summaryrefslogtreecommitdiff
path: root/tools/perf/scripts/python/bin
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/scripts/python/bin')
0 files changed, 0 insertions, 0 deletions
f0d99079f6bbacdcfc58fe08'>diff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/.gitignore1
-rw-r--r--Documentation/.renames.txt1191
-rw-r--r--Documentation/ABI/README17
-rw-r--r--Documentation/ABI/obsolete/automount-tracefs-debugfs20
-rw-r--r--Documentation/ABI/obsolete/o2cb11
-rw-r--r--Documentation/ABI/obsolete/procfs-i8k10
-rw-r--r--Documentation/ABI/obsolete/sysfs-bus-iio174
-rw-r--r--Documentation/ABI/obsolete/sysfs-cpuidle9
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra5
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt22
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-samsung-laptop10
-rw-r--r--Documentation/ABI/obsolete/sysfs-gpio20
-rw-r--r--Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled9
-rw-r--r--Documentation/ABI/obsolete/sysfs-kernel-fadump_registered10
-rw-r--r--Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem10
-rw-r--r--Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop8
-rw-r--r--Documentation/ABI/obsolete/sysfs-selinux-user12
-rw-r--r--Documentation/ABI/removed/devfs1
-rw-r--r--Documentation/ABI/removed/o2cb4
-rw-r--r--Documentation/ABI/removed/raw13941
-rw-r--r--Documentation/ABI/removed/sysfs-bus-nfit2
-rw-r--r--Documentation/ABI/removed/sysfs-class-cxl (renamed from Documentation/ABI/testing/sysfs-class-cxl)106
-rw-r--r--Documentation/ABI/removed/sysfs-class-rfkill6
-rw-r--r--Documentation/ABI/removed/sysfs-firmware-efi-vars12
-rw-r--r--Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore9
-rw-r--r--Documentation/ABI/removed/sysfs-kernel-uids (renamed from Documentation/ABI/testing/sysfs-kernel-uids)6
-rw-r--r--Documentation/ABI/removed/sysfs-mce37
-rw-r--r--Documentation/ABI/removed/sysfs-selinux-checkreqprot26
-rw-r--r--Documentation/ABI/removed/sysfs-selinux-disable29
-rw-r--r--Documentation/ABI/removed/video13941
-rw-r--r--Documentation/ABI/stable/firewire-cdev69
-rw-r--r--Documentation/ABI/stable/o2cb6
-rw-r--r--Documentation/ABI/stable/procfs-audit_loginuid27
-rw-r--r--Documentation/ABI/stable/sysfs-acpi-pmprofile42
-rw-r--r--Documentation/ABI/stable/sysfs-block881
-rw-r--r--Documentation/ABI/stable/sysfs-bus-firewire3
-rw-r--r--Documentation/ABI/stable/sysfs-bus-fsl-mc19
-rw-r--r--Documentation/ABI/stable/sysfs-bus-mhi44
-rw-r--r--Documentation/ABI/stable/sysfs-bus-nvmem51
-rw-r--r--Documentation/ABI/stable/sysfs-bus-usb6
-rw-r--r--Documentation/ABI/stable/sysfs-bus-vmbus61
-rw-r--r--Documentation/ABI/stable/sysfs-bus-w13
-rw-r--r--Documentation/ABI/stable/sysfs-bus-xen-backend2
-rw-r--r--Documentation/ABI/stable/sysfs-class-backlight15
-rw-r--r--Documentation/ABI/stable/sysfs-class-bluetooth9
-rw-r--r--Documentation/ABI/stable/sysfs-class-infiniband230
-rw-r--r--Documentation/ABI/stable/sysfs-class-rfkill27
-rw-r--r--Documentation/ABI/stable/sysfs-class-tpm143
-rw-r--r--Documentation/ABI/stable/sysfs-devices12
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node151
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-cpu92
-rw-r--r--Documentation/ABI/stable/sysfs-driver-aspeed-vuart11
-rw-r--r--Documentation/ABI/stable/sysfs-driver-dma-idxd361
-rw-r--r--Documentation/ABI/stable/sysfs-driver-dma-ioatdma10
-rw-r--r--Documentation/ABI/stable/sysfs-driver-firmware-zynqmp256
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp3
-rw-r--r--Documentation/ABI/stable/sysfs-driver-misc-cp50025
-rw-r--r--Documentation/ABI/stable/sysfs-driver-mlxreg-io781
-rw-r--r--Documentation/ABI/stable/sysfs-driver-speakup395
-rw-r--r--Documentation/ABI/stable/sysfs-driver-w1_ds243813
-rw-r--r--Documentation/ABI/stable/sysfs-driver-w1_ds28e044
-rw-r--r--Documentation/ABI/stable/sysfs-driver-w1_ds28ea002
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-efi-vars75
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-opal-dump5
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-opal-elog2
-rw-r--r--Documentation/ABI/stable/sysfs-hypervisor-xen16
-rw-r--r--Documentation/ABI/stable/sysfs-kernel-notes5
-rw-r--r--Documentation/ABI/stable/sysfs-kernel-time-aux-clocks5
-rw-r--r--Documentation/ABI/stable/sysfs-module25
-rw-r--r--Documentation/ABI/stable/sysfs-platform-wmi-bmof7
-rw-r--r--Documentation/ABI/stable/vdso19
-rw-r--r--Documentation/ABI/testing/configfs-acpi34
-rw-r--r--Documentation/ABI/testing/configfs-iio2
-rw-r--r--Documentation/ABI/testing/configfs-most241
-rw-r--r--Documentation/ABI/testing/configfs-spear-pcie-gadget36
-rw-r--r--Documentation/ABI/testing/configfs-tsm-report145
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget96
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-acm7
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-ecm12
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-eem10
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-ffs12
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-hid10
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-loopback6
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-mass-storage24
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-midi14
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-midi254
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-printer6
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-rndis16
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-sourcesink18
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-subset10
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uac143
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uac251
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uvc342
-rw-r--r--Documentation/ABI/testing/debugfs-alienware-wmi64
-rw-r--r--Documentation/ABI/testing/debugfs-amd-iommu131
-rw-r--r--Documentation/ABI/testing/debugfs-cec-error-inj6
-rw-r--r--Documentation/ABI/testing/debugfs-cros-ec78
-rw-r--r--Documentation/ABI/testing/debugfs-cxl157
-rw-r--r--Documentation/ABI/testing/debugfs-dell-wmi-ddv34
-rw-r--r--Documentation/ABI/testing/debugfs-driver-dcc127
-rw-r--r--Documentation/ABI/testing/debugfs-driver-genwqe2
-rw-r--r--Documentation/ABI/testing/debugfs-driver-habanalabs323
-rw-r--r--Documentation/ABI/testing/debugfs-driver-qat109
-rw-r--r--Documentation/ABI/testing/debugfs-driver-qat_telemetry259
-rw-r--r--Documentation/ABI/testing/debugfs-dwc-pcie157
-rw-r--r--Documentation/ABI/testing/debugfs-ec11
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-hpre193
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-migration25
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-sec166
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-zip167
-rw-r--r--Documentation/ABI/testing/debugfs-hyperv23
-rw-r--r--Documentation/ABI/testing/debugfs-iio-ad946739
-rw-r--r--Documentation/ABI/testing/debugfs-iio-backend20
-rw-r--r--Documentation/ABI/testing/debugfs-intel-iommu276
-rw-r--r--Documentation/ABI/testing/debugfs-moxtet35
-rw-r--r--Documentation/ABI/testing/debugfs-msi-wmi-platform14
-rw-r--r--Documentation/ABI/testing/debugfs-pcie-ptm70
-rw-r--r--Documentation/ABI/testing/debugfs-pfo-nx-crypto28
-rw-r--r--Documentation/ABI/testing/debugfs-pktcdvd19
-rw-r--r--Documentation/ABI/testing/debugfs-scmi70
-rw-r--r--Documentation/ABI/testing/debugfs-scmi-raw208
-rw-r--r--Documentation/ABI/testing/debugfs-tpmi40
-rw-r--r--Documentation/ABI/testing/debugfs-vfio31
-rw-r--r--Documentation/ABI/testing/debugfs-wilco-ec45
-rw-r--r--Documentation/ABI/testing/dell-smbios-wmi34
-rw-r--r--Documentation/ABI/testing/dev-kmsg53
-rw-r--r--Documentation/ABI/testing/evm58
-rw-r--r--Documentation/ABI/testing/gpio-cdev18
-rw-r--r--Documentation/ABI/testing/ima_policy117
-rw-r--r--Documentation/ABI/testing/procfs-attr-current20
-rw-r--r--Documentation/ABI/testing/procfs-attr-exec20
-rw-r--r--Documentation/ABI/testing/procfs-attr-prev19
-rw-r--r--Documentation/ABI/testing/procfs-diskstats49
-rw-r--r--Documentation/ABI/testing/procfs-smaps_rollup51
-rw-r--r--Documentation/ABI/testing/pstore24
-rw-r--r--Documentation/ABI/testing/rtc-cdev8
-rw-r--r--Documentation/ABI/testing/securityfs-secrets-coco51
-rw-r--r--Documentation/ABI/testing/sysfs-amd-pmc13
-rw-r--r--Documentation/ABI/testing/sysfs-amd-pmf13
-rw-r--r--Documentation/ABI/testing/sysfs-ata13
-rw-r--r--Documentation/ABI/testing/sysfs-block273
-rw-r--r--Documentation/ABI/testing/sysfs-block-device69
-rw-r--r--Documentation/ABI/testing/sysfs-block-rnbd64
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram68
-rw-r--r--Documentation/ABI/testing/sysfs-bus-acpi37
-rw-r--r--Documentation/ABI/testing/sysfs-bus-auxiliary9
-rw-r--r--Documentation/ABI/testing/sysfs-bus-bcma2
-rw-r--r--Documentation/ABI/testing/sysfs-bus-cdx121
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coreboot45
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-cti247
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source21
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etb1027
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x28
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x279
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel6
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-stm8
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc42
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm280
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe20
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-ultra_smb31
-rw-r--r--Documentation/ABI/testing/sysfs-bus-counter477
-rw-r--r--Documentation/ABI/testing/sysfs-bus-css37
-rw-r--r--Documentation/ABI/testing/sysfs-bus-cxl617
-rw-r--r--Documentation/ABI/testing/sysfs-bus-dax153
-rw-r--r--Documentation/ABI/testing/sysfs-bus-dfl17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-dfl-devices-emif25
-rw-r--r--Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios47
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices24
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-caps24
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-dfl_fme104
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa30
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-events12
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-format7
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt113
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x753
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci199
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu37
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore13
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl25
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu25
-rw-r--r--Documentation/ABI/testing/sysfs-bus-fcoe70
-rw-r--r--Documentation/ABI/testing/sysfs-bus-fsi26
-rw-r--r--Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo10
-rw-r--r--Documentation/ABI/testing/sysfs-bus-fsl-mc12
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa948026
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-hm635212
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr100
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-pca954x23
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu113
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i3c163
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio1085
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-accel-adxl3727
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-ad9739a19
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta23
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-ad413020
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-ad719227
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a13
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector5
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-hi84355
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-max1141013
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-max961117
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-mcp356453
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-mt636078
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-pac19349
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-adc-stm323
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-bno05581
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-cdc-ad774611
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp4017
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-chemical-sunrise-co238
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8117
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-cros-ec22
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dac61
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dac-ad576631
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dac-ltc268855
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm3210
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-distance-srf0817
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-dma-buffer19
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-filter-admv881818
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-ad95232
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-adf437111
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-admv101338
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-admv101423
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-health-afe440x22
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-humidity10
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x9
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-impedance-analyzer-ad593335
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc9
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-inv_icm4260018
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-light-isl290186
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als11
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-light-tsl277213
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm3257
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-magnetometer-hmc584319
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-proximity24
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-proximity-as393511
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s121027
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-sps3028
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-sx931010
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-sx932429
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-thermocouple18
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-timer-stm32150
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-vf6102
-rw-r--r--Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth11
-rw-r--r--Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc23
-rw-r--r--Documentation/ABI/testing/sysfs-bus-intel_th-output-devices6
-rw-r--r--Documentation/ABI/testing/sysfs-bus-mdio101
-rw-r--r--Documentation/ABI/testing/sysfs-bus-mei30
-rw-r--r--Documentation/ABI/testing/sysfs-bus-most299
-rw-r--r--Documentation/ABI/testing/sysfs-bus-moxtet-devices17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-nfit61
-rw-r--r--Documentation/ABI/testing/sysfs-bus-nvdimm59
-rw-r--r--Documentation/ABI/testing/sysfs-bus-optee-devices17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-papr-pmem75
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci304
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-aer163
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats122
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-avs8
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-catpt17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss44
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic26
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd4
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd62
-rw-r--r--Documentation/ABI/testing/sysfs-bus-peci16
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform36
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro325
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon13
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache12
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev9
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rapidio55
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd37
-rw-r--r--Documentation/ABI/testing/sysfs-bus-siox25
-rw-r--r--Documentation/ABI/testing/sysfs-bus-soundwire-master23
-rw-r--r--Documentation/ABI/testing/sysfs-bus-soundwire-slave109
-rw-r--r--Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor40
-rw-r--r--Documentation/ABI/testing/sysfs-bus-surface_aggregator-tabletsw57
-rw-r--r--Documentation/ABI/testing/sysfs-bus-thunderbolt271
-rw-r--r--Documentation/ABI/testing/sysfs-bus-umc28
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb446
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg29
-rw-r--r--Documentation/ABI/testing/sysfs-bus-vdpa57
-rw-r--r--Documentation/ABI/testing/sysfs-bus-vfio-mdev10
-rw-r--r--Documentation/ABI/testing/sysfs-bus-wmi81
-rw-r--r--Documentation/ABI/testing/sysfs-c2port7
-rw-r--r--Documentation/ABI/testing/sysfs-class2
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight186
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-adp552031
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-adp886054
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-adp887056
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-lm353332
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi97
-rw-r--r--Documentation/ABI/testing/sysfs-class-chromeos53
-rw-r--r--Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar74
-rw-r--r--Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc6
-rw-r--r--Documentation/ABI/testing/sysfs-class-devfreq95
-rw-r--r--Documentation/ABI/testing/sysfs-class-devfreq-event12
-rw-r--r--Documentation/ABI/testing/sysfs-class-devlink130
-rw-r--r--Documentation/ABI/testing/sysfs-class-drm8
-rw-r--r--Documentation/ABI/testing/sysfs-class-extcon46
-rw-r--r--Documentation/ABI/testing/sysfs-class-fc27
-rw-r--r--Documentation/ABI/testing/sysfs-class-fc_host23
-rw-r--r--Documentation/ABI/testing/sysfs-class-fc_remote_ports23
-rw-r--r--Documentation/ABI/testing/sysfs-class-firmware77
-rw-r--r--Documentation/ABI/testing/sysfs-class-firmware-attributes494
-rw-r--r--Documentation/ABI/testing/sysfs-class-fpga-manager5
-rw-r--r--Documentation/ABI/testing/sysfs-class-gnss4
-rw-r--r--Documentation/ABI/testing/sysfs-class-hwmon1075
-rw-r--r--Documentation/ABI/testing/sysfs-class-intel_pmt119
-rw-r--r--Documentation/ABI/testing/sysfs-class-intel_pmt-features134
-rw-r--r--Documentation/ABI/testing/sysfs-class-led34
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-aw200xx5
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-lm353352
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-sc27xx22
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia28
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-flash27
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-multicolor24
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-netdev147
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-pattern64
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-tty62
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-usbport1
-rw-r--r--Documentation/ABI/testing/sysfs-class-leds-gt683r12
-rw-r--r--Documentation/ABI/testing/sysfs-class-mei50
-rw-r--r--Documentation/ABI/testing/sysfs-class-mic178
-rw-r--r--Documentation/ABI/testing/sysfs-class-mic.txt166
-rw-r--r--Documentation/ABI/testing/sysfs-class-mtd2
-rw-r--r--Documentation/ABI/testing/sysfs-class-mux2
-rw-r--r--Documentation/ABI/testing/sysfs-class-net89
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-batman-adv30
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-cdc_ncm6
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-dsa13
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-mesh108
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-peak_usb19
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-phydev51
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-qmi30
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-queues47
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-statistics60
-rw-r--r--Documentation/ABI/testing/sysfs-class-ocxl20
-rw-r--r--Documentation/ABI/testing/sysfs-class-pktcdvd93
-rw-r--r--Documentation/ABI/testing/sysfs-class-platform-profile48
-rw-r--r--Documentation/ABI/testing/sysfs-class-power756
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-gaokun27
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-ltc4162l82
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-max1720x32
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-mp26299
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-rt946719
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-rt947132
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-surface15
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-twl403033
-rw-r--r--Documentation/ABI/testing/sysfs-class-power-wilco40
-rw-r--r--Documentation/ABI/testing/sysfs-class-powercap4
-rw-r--r--Documentation/ABI/testing/sysfs-class-pwm22
-rw-r--r--Documentation/ABI/testing/sysfs-class-rapidio52
-rw-r--r--Documentation/ABI/testing/sysfs-class-rc44
-rw-r--r--Documentation/ABI/testing/sysfs-class-rc-nuvoton2
-rw-r--r--Documentation/ABI/testing/sysfs-class-regulator117
-rw-r--r--Documentation/ABI/testing/sysfs-class-remoteproc68
-rw-r--r--Documentation/ABI/testing/sysfs-class-rnbd-client133
-rw-r--r--Documentation/ABI/testing/sysfs-class-rnbd-server58
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration1
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtrs-client146
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtrs-server53
-rw-r--r--Documentation/ABI/testing/sysfs-class-scsi_host7
-rw-r--r--Documentation/ABI/testing/sysfs-class-spi-eeprom19
-rw-r--r--Documentation/ABI/testing/sysfs-class-switchtec2
-rw-r--r--Documentation/ABI/testing/sysfs-class-tee15
-rw-r--r--Documentation/ABI/testing/sysfs-class-thermal259
-rw-r--r--Documentation/ABI/testing/sysfs-class-typec228
-rw-r--r--Documentation/ABI/testing/sysfs-class-usb_power_delivery256
-rw-r--r--Documentation/ABI/testing/sysfs-class-usb_role6
-rw-r--r--Documentation/ABI/testing/sysfs-class-uwb_rc159
-rw-r--r--Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc57
-rw-r--r--Documentation/ABI/testing/sysfs-class-vduse33
-rw-r--r--Documentation/ABI/testing/sysfs-class-wakeup76
-rw-r--r--Documentation/ABI/testing/sysfs-class-watchdog82
-rw-r--r--Documentation/ABI/testing/sysfs-dev7
-rw-r--r--Documentation/ABI/testing/sysfs-devices-consumer9
-rw-r--r--Documentation/ABI/testing/sysfs-devices-mapping62
-rw-r--r--Documentation/ABI/testing/sysfs-devices-memory75
-rw-r--r--Documentation/ABI/testing/sysfs-devices-online2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-physical_location42
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD6
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget10
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-docg310
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-dock10
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-ipmi52
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs126
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb8
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-soc-ipa114
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-stratix10-rsu174
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power79
-rw-r--r--Documentation/ABI/testing/sysfs-devices-removable20
-rw-r--r--Documentation/ABI/testing/sysfs-devices-soc51
-rw-r--r--Documentation/ABI/testing/sysfs-devices-software_node10
-rw-r--r--Documentation/ABI/testing/sysfs-devices-state_synced29
-rw-r--r--Documentation/ABI/testing/sysfs-devices-supplier9
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu474
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-ibm-rtl6
-rw-r--r--Documentation/ABI/testing/sysfs-devices-vfio-dev8
-rw-r--r--Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest63
-rw-r--r--Documentation/ABI/testing/sysfs-devices-waiting_for_supplier17
-rw-r--r--Documentation/ABI/testing/sysfs-devices-xenbus41
-rw-r--r--Documentation/ABI/testing/sysfs-driver-altera-cvp2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-amd-sfh13
-rw-r--r--Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing27
-rw-r--r--Documentation/ABI/testing/sysfs-driver-bd9571mwv-regulator6
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ccp105
-rw-r--r--Documentation/ABI/testing/sysfs-driver-chromeos-acpi152
-rw-r--r--Documentation/ABI/testing/sysfs-driver-eud9
-rw-r--r--Documentation/ABI/testing/sysfs-driver-framer-pef22568
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ge-achc15
-rw-r--r--Documentation/ABI/testing/sysfs-driver-genwqe11
-rw-r--r--Documentation/ABI/testing/sysfs-driver-habanalabs265
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid12
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd13
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-corsair-void38
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-lenovo10
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff18
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-ntrig13
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-kone21
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-wiimote12
-rw-r--r--Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb6
-rw-r--r--Documentation/ABI/testing/sysfs-driver-input-exc300026
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intc_sar54
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon93
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-m10-bmc36
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update61
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon198
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel_sdsi90
-rw-r--r--Documentation/ABI/testing/sysfs-driver-jz4780-efuse20
-rw-r--r--Documentation/ABI/testing/sysfs-driver-panfrost-profiling10
-rw-r--r--Documentation/ABI/testing/sysfs-driver-panthor-profiling10
-rw-r--r--Documentation/ABI/testing/sysfs-driver-pciback19
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ppi2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-qaic18
-rw-r--r--Documentation/ABI/testing/sysfs-driver-qat167
-rw-r--r--Documentation/ABI/testing/sysfs-driver-qat_ras41
-rw-r--r--Documentation/ABI/testing/sysfs-driver-qat_rl226
-rw-r--r--Documentation/ABI/testing/sysfs-driver-samsung-laptop18
-rw-r--r--Documentation/ABI/testing/sysfs-driver-spi-intel20
-rw-r--r--Documentation/ABI/testing/sysfs-driver-st2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-tegra-fuse2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-toshiba_acpi26
-rw-r--r--Documentation/ABI/testing/sysfs-driver-toshiba_haps2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-typec-displayport25
-rw-r--r--Documentation/ABI/testing/sysfs-driver-uacce57
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ucsi-ccg6
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ufs969
-rw-r--r--Documentation/ABI/testing/sysfs-driver-w1_ds28e173
-rw-r--r--Documentation/ABI/testing/sysfs-driver-w1_therm190
-rw-r--r--Documentation/ABI/testing/sysfs-driver-wacom6
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xdata49
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xen-blkback19
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xen-blkfront11
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager16
-rw-r--r--Documentation/ABI/testing/sysfs-driver-zynqmp-fpga73
-rw-r--r--Documentation/ABI/testing/sysfs-edac-ecs74
-rw-r--r--Documentation/ABI/testing/sysfs-edac-memory-repair206
-rw-r--r--Documentation/ABI/testing/sysfs-edac-scrub85
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-acpi307
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-dmi-entries52
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi15
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi-esrt44
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi-runtime-map14
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-gsmi2
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-initrd8
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo35
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-memmap20
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-opal-powercap4
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-opal-psr4
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups21
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info29
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg25
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-sfi15
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-sgi_uv160
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm28
-rw-r--r--Documentation/ABI/testing/sysfs-fs-erofs45
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext413
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs908
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ubifs35
-rw-r--r--Documentation/ABI/testing/sysfs-fs-virtiofs11
-rw-r--r--Documentation/ABI/testing/sysfs-fs-xfs28
-rw-r--r--Documentation/ABI/testing/sysfs-hypervisor-xen13
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-address_bits10
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-boot_params23
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-btf25
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-cpu_byteorder12
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers24
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-fadump59
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-fscaps2
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-hardlockup_count7
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-iommu_groups38
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-livepatch39
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-cma44
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-damon546
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-hugepages12
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-ksm15
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers25
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-mempolicy4
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave54
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-numa28
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage18
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-oops_count6
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-rcu_stall_count6
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-reboot40
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab228
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-softlockup_count7
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-vmcoreinfo2
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-warn_count6
-rw-r--r--Documentation/ABI/testing/sysfs-mce97
-rw-r--r--Documentation/ABI/testing/sysfs-memory-page-offline4
-rw-r--r--Documentation/ABI/testing/sysfs-module39
-rw-r--r--Documentation/ABI/testing/sysfs-nvmem-cells21
-rw-r--r--Documentation/ABI/testing/sysfs-ocfs212
-rw-r--r--Documentation/ABI/testing/sysfs-platform-alienware-wmi14
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-laptop23
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-wmi176
-rw-r--r--Documentation/ABI/testing/sysfs-platform-at9110
-rw-r--r--Documentation/ABI/testing/sysfs-platform-brcmstb-memc15
-rw-r--r--Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg8
-rw-r--r--Documentation/ABI/testing/sysfs-platform-chipidea-usb26
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dell-laptop20
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi71
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dell-smbios6
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv9
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dfl-fme223
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dfl-port85
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dptf125
-rw-r--r--Documentation/ABI/testing/sysfs-platform-eeepc-laptop14
-rw-r--r--Documentation/ABI/testing/sysfs-platform-hidma2
-rw-r--r--Documentation/ABI/testing/sysfs-platform-hidma-mgmt20
-rw-r--r--Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl8
-rw-r--r--Documentation/ABI/testing/sysfs-platform-ideapad-laptop26
-rw-r--r--Documentation/ABI/testing/sysfs-platform-intel-ifs52
-rw-r--r--Documentation/ABI/testing/sysfs-platform-intel-pmc22
-rw-r--r--Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update13
-rw-r--r--Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt5
-rw-r--r--Documentation/ABI/testing/sysfs-platform-kim5
-rw-r--r--Documentation/ABI/testing/sysfs-platform-lg-laptop1
-rw-r--r--Documentation/ABI/testing/sysfs-platform-mellanox-bootctl162
-rw-r--r--Documentation/ABI/testing/sysfs-platform-mellanox-pmc64
-rw-r--r--Documentation/ABI/testing/sysfs-platform-oxp25
-rw-r--r--Documentation/ABI/testing/sysfs-platform-phy-rcar-gen3-usb210
-rw-r--r--Documentation/ABI/testing/sysfs-platform-power-on-reason12
-rw-r--r--Documentation/ABI/testing/sysfs-platform-renesas_usb310
-rw-r--r--Documentation/ABI/testing/sysfs-platform-silicom30
-rw-r--r--Documentation/ABI/testing/sysfs-platform-sst-atom17
-rw-r--r--Documentation/ABI/testing/sysfs-platform-usbip-vudc11
-rw-r--r--Documentation/ABI/testing/sysfs-platform-wilco-ec58
-rw-r--r--Documentation/ABI/testing/sysfs-platform_profile40
-rw-r--r--Documentation/ABI/testing/sysfs-power203
-rw-r--r--Documentation/ABI/testing/sysfs-pps-gen43
-rw-r--r--Documentation/ABI/testing/sysfs-pps-gen-tio6
-rw-r--r--Documentation/ABI/testing/sysfs-profiling2
-rw-r--r--Documentation/ABI/testing/sysfs-ptp48
-rw-r--r--Documentation/ABI/testing/sysfs-secvar130
-rw-r--r--Documentation/ABI/testing/sysfs-timecard293
-rw-r--r--Documentation/ABI/testing/sysfs-tty52
-rw-r--r--Documentation/ABI/testing/sysfs-uevent28
-rw-r--r--Documentation/ABI/testing/sysfs-wusb_cbaf100
-rw-r--r--Documentation/ABI/testing/usb-charger-uevent54
-rw-r--r--Documentation/ABI/testing/usb-uevent27
-rw-r--r--Documentation/DMA-API-HOWTO.txt969
-rw-r--r--Documentation/DMA-API.txt768
-rw-r--r--Documentation/DMA-ISA-LPC.txt152
-rw-r--r--Documentation/DMA-attributes.txt158
-rw-r--r--Documentation/EDID/1024x768.S44
-rw-r--r--Documentation/EDID/1280x1024.S44
-rw-r--r--Documentation/EDID/1600x1200.S44
-rw-r--r--Documentation/EDID/1680x1050.S44
-rw-r--r--Documentation/EDID/1920x1080.S44
-rw-r--r--Documentation/EDID/800x600.S41
-rw-r--r--Documentation/EDID/HOWTO.txt58
-rw-r--r--Documentation/EDID/Makefile26
-rw-r--r--Documentation/EDID/edid.S272
-rw-r--r--Documentation/EDID/hex1
-rw-r--r--Documentation/IPMI.txt746
-rw-r--r--Documentation/IRQ-affinity.txt70
-rw-r--r--Documentation/IRQ-domain.txt269
-rw-r--r--Documentation/IRQ.txt24
-rw-r--r--Documentation/Intel-IOMMU.txt114
-rw-r--r--Documentation/Kconfig28
-rw-r--r--Documentation/Makefile128
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt270
-rw-r--r--Documentation/PCI/PCIEBUS-HOWTO.txt198
-rw-r--r--Documentation/PCI/acpi-info.rst192
-rw-r--r--Documentation/PCI/acpi-info.txt187
-rw-r--r--Documentation/PCI/boot-interrupts.rst159
-rw-r--r--Documentation/PCI/controller/index.rst10
-rw-r--r--Documentation/PCI/controller/rcar-pcie-firmware.rst32
-rw-r--r--Documentation/PCI/endpoint/function/binding/pci-ntb.rst38
-rw-r--r--Documentation/PCI/endpoint/function/binding/pci-test.rst26
-rw-r--r--Documentation/PCI/endpoint/function/binding/pci-test.txt19
-rw-r--r--Documentation/PCI/endpoint/index.rst21
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint-cfs.rst138
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint-cfs.txt105
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint.rst259
-rw-r--r--Documentation/PCI/endpoint/pci-endpoint.txt215
-rw-r--r--Documentation/PCI/endpoint/pci-ntb-function.rst348
-rw-r--r--Documentation/PCI/endpoint/pci-ntb-howto.rst158
-rw-r--r--Documentation/PCI/endpoint/pci-nvme-function.rst13
-rw-r--r--Documentation/PCI/endpoint/pci-test-function.rst103
-rw-r--r--Documentation/PCI/endpoint/pci-test-function.txt87
-rw-r--r--Documentation/PCI/endpoint/pci-test-howto.rst220
-rw-r--r--Documentation/PCI/endpoint/pci-test-howto.txt206
-rw-r--r--Documentation/PCI/endpoint/pci-vntb-function.rst129
-rw-r--r--Documentation/PCI/endpoint/pci-vntb-howto.rst169
-rw-r--r--Documentation/PCI/index.rst22
-rw-r--r--Documentation/PCI/msi-howto.rst297
-rw-r--r--Documentation/PCI/pci-error-recovery.rst457
-rw-r--r--Documentation/PCI/pci-error-recovery.txt413
-rw-r--r--Documentation/PCI/pci-iov-howto.rst171
-rw-r--r--Documentation/PCI/pci-iov-howto.txt147
-rw-r--r--Documentation/PCI/pci.rst578
-rw-r--r--Documentation/PCI/pci.txt636
-rw-r--r--Documentation/PCI/pcieaer-howto.rst257
-rw-r--r--Documentation/PCI/pcieaer-howto.txt267
-rw-r--r--Documentation/PCI/pciebus-howto.rst228
-rw-r--r--Documentation/PCI/sysfs-pci.rst138
-rw-r--r--Documentation/PCI/tph.rst132
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg499
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg695
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg741
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg834
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.html1455
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.rst1196
-rw-r--r--Documentation/RCU/Design/Data-Structures/blkd_task.svg676
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg18
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html666
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst521
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel0.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel1.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel2.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel3.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel4.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel5.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel6.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel7.svg4
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Funnel8.svg4
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html9
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html705
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst648
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg2
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg9
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg8
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg12
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg31
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg8
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg8
-rw-r--r--Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg36
-rw-r--r--Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg62
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html3254
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst2865
-rw-r--r--Documentation/RCU/NMI-RCU.rst123
-rw-r--r--Documentation/RCU/NMI-RCU.txt120
-rw-r--r--Documentation/RCU/RTFP.txt112
-rw-r--r--Documentation/RCU/UP.rst152
-rw-r--r--Documentation/RCU/UP.txt135
-rw-r--r--Documentation/RCU/arrayRCU.txt153
-rw-r--r--Documentation/RCU/checklist.rst554
-rw-r--r--Documentation/RCU/checklist.txt486
-rw-r--r--Documentation/RCU/index.rst37
-rw-r--r--Documentation/RCU/listRCU.rst511
-rw-r--r--Documentation/RCU/listRCU.txt315
-rw-r--r--Documentation/RCU/lockdep-splat.rst115
-rw-r--r--Documentation/RCU/lockdep-splat.txt110
-rw-r--r--Documentation/RCU/lockdep.rst119
-rw-r--r--Documentation/RCU/lockdep.txt102
-rw-r--r--Documentation/RCU/rcu.rst93
-rw-r--r--Documentation/RCU/rcu.txt89
-rw-r--r--Documentation/RCU/rcu_dereference.rst502
-rw-r--r--Documentation/RCU/rcu_dereference.txt353
-rw-r--r--Documentation/RCU/rcubarrier.rst377
-rw-r--r--Documentation/RCU/rcubarrier.txt326
-rw-r--r--Documentation/RCU/rculist_nulls.rst215
-rw-r--r--Documentation/RCU/rculist_nulls.txt172
-rw-r--r--Documentation/RCU/rcuref.rst158
-rw-r--r--Documentation/RCU/rcuref.txt132
-rw-r--r--Documentation/RCU/stallwarn.rst491
-rw-r--r--Documentation/RCU/stallwarn.txt315
-rw-r--r--Documentation/RCU/torture.rst374
-rw-r--r--Documentation/RCU/torture.txt314
-rw-r--r--Documentation/RCU/whatisRCU.rst1412
-rw-r--r--Documentation/RCU/whatisRCU.txt1057
-rw-r--r--Documentation/accel/amdxdna/amdnpu.rst281
-rw-r--r--Documentation/accel/amdxdna/index.rst11
-rw-r--r--Documentation/accel/index.rst20
-rw-r--r--Documentation/accel/introduction.rst110
-rw-r--r--Documentation/accel/qaic/aic080.rst14
-rw-r--r--Documentation/accel/qaic/aic100.rst517
-rw-r--r--Documentation/accel/qaic/index.rst14
-rw-r--r--Documentation/accel/qaic/qaic.rst209
-rw-r--r--Documentation/accel/rocket/index.rst19
-rw-r--r--Documentation/accelerators/ocxl.rst176
-rw-r--r--Documentation/accounting/cgroupstats.rst31
-rw-r--r--Documentation/accounting/cgroupstats.txt27
-rw-r--r--Documentation/accounting/delay-accounting.rst214
-rw-r--r--Documentation/accounting/delay-accounting.txt117
-rw-r--r--Documentation/accounting/index.rst14
-rw-r--r--Documentation/accounting/psi.rst188
-rw-r--r--Documentation/accounting/psi.txt73
-rw-r--r--Documentation/accounting/taskstats-struct.rst199
-rw-r--r--Documentation/accounting/taskstats-struct.txt180
-rw-r--r--Documentation/accounting/taskstats.rst180
-rw-r--r--Documentation/accounting/taskstats.txt181
-rw-r--r--Documentation/acpi/DSD-properties-rules.txt97
-rw-r--r--Documentation/acpi/acpi-lid.txt96
-rw-r--r--Documentation/acpi/aml-debugger.txt66
-rw-r--r--Documentation/acpi/apei/einj.txt177
-rw-r--r--Documentation/acpi/apei/output_format.txt147
-rw-r--r--Documentation/acpi/cppc_sysfs.txt69
-rw-r--r--Documentation/acpi/debug.txt148
-rw-r--r--Documentation/acpi/dsd/data-node-references.txt89
-rw-r--r--Documentation/acpi/dsd/graph.txt174
-rw-r--r--Documentation/acpi/dsdt-override.txt7
-rw-r--r--Documentation/acpi/enumeration.txt426
-rw-r--r--Documentation/acpi/gpio-properties.txt223
-rw-r--r--Documentation/acpi/i2c-muxes.txt58
-rw-r--r--Documentation/acpi/initrd_table_override.txt107
-rw-r--r--Documentation/acpi/linuxized-acpica.txt262
-rw-r--r--Documentation/acpi/lpit.txt25
-rw-r--r--Documentation/acpi/method-customizing.txt73
-rw-r--r--Documentation/acpi/method-tracing.txt192
-rw-r--r--Documentation/acpi/namespace.txt388
-rw-r--r--Documentation/acpi/osi.txt187
-rw-r--r--Documentation/acpi/scan_handlers.txt77
-rw-r--r--Documentation/acpi/ssdt-overlays.txt172
-rw-r--r--Documentation/acpi/video_extension.txt106
-rw-r--r--Documentation/admin-guide/LSM/LoadPin.rst14
-rw-r--r--Documentation/admin-guide/LSM/SELinux.rst13
-rw-r--r--Documentation/admin-guide/LSM/SafeSetID.rst118
-rw-r--r--Documentation/admin-guide/LSM/Smack.rst4
-rw-r--r--Documentation/admin-guide/LSM/Yama.rst7
-rw-r--r--Documentation/admin-guide/LSM/apparmor.rst7
-rw-r--r--Documentation/admin-guide/LSM/index.rst16
-rw-r--r--Documentation/admin-guide/LSM/ipe.rst824
-rw-r--r--Documentation/admin-guide/LSM/landlock.rst158
-rw-r--r--Documentation/admin-guide/LSM/tomoyo.rst35
-rw-r--r--Documentation/admin-guide/RAS/address-translation.rst24
-rw-r--r--Documentation/admin-guide/RAS/error-decoding.rst21
-rw-r--r--Documentation/admin-guide/RAS/index.rst7
-rw-r--r--Documentation/admin-guide/RAS/main.rst1223
-rw-r--r--Documentation/admin-guide/README.rst218
-rw-r--r--Documentation/admin-guide/abi-obsolete-files.rst7
-rw-r--r--Documentation/admin-guide/abi-obsolete.rst13
-rw-r--r--Documentation/admin-guide/abi-removed-files.rst7
-rw-r--r--Documentation/admin-guide/abi-removed.rst7
-rw-r--r--Documentation/admin-guide/abi-stable-files.rst7
-rw-r--r--Documentation/admin-guide/abi-stable.rst16
-rw-r--r--Documentation/admin-guide/abi-testing-files.rst7
-rw-r--r--Documentation/admin-guide/abi-testing.rst22
-rw-r--r--Documentation/admin-guide/abi.rst29
-rw-r--r--Documentation/admin-guide/acpi/cppc_sysfs.rst78
-rw-r--r--Documentation/admin-guide/acpi/fan_performance_states.rst90
-rw-r--r--Documentation/admin-guide/acpi/index.rst14
-rw-r--r--Documentation/admin-guide/acpi/initrd_table_override.rst115
-rw-r--r--Documentation/admin-guide/acpi/ssdt-overlays.rst181
-rw-r--r--Documentation/admin-guide/aoe/aoe.rst150
-rw-r--r--Documentation/admin-guide/aoe/autoload.sh (renamed from Documentation/aoe/autoload.sh)0
-rw-r--r--Documentation/admin-guide/aoe/examples.rst23
-rw-r--r--Documentation/admin-guide/aoe/index.rst17
-rw-r--r--Documentation/admin-guide/aoe/status.sh (renamed from Documentation/aoe/status.sh)0
-rw-r--r--Documentation/admin-guide/aoe/todo.rst17
-rw-r--r--Documentation/admin-guide/aoe/udev-install.sh (renamed from Documentation/aoe/udev-install.sh)0
-rw-r--r--Documentation/admin-guide/aoe/udev.txt (renamed from Documentation/aoe/udev.txt)8
-rw-r--r--Documentation/admin-guide/auxdisplay/cfag12864b.rst98
-rw-r--r--Documentation/admin-guide/auxdisplay/index.rst16
-rw-r--r--Documentation/admin-guide/auxdisplay/ks0108.rst50
-rw-r--r--Documentation/admin-guide/bcache.rst36
-rw-r--r--Documentation/admin-guide/binderfs.rst87
-rw-r--r--Documentation/admin-guide/binfmt-misc.rst12
-rw-r--r--Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg (renamed from Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg (renamed from Documentation/blockdev/drbd/DRBD-data-packets.svg)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/conn-states-8.dot (renamed from Documentation/blockdev/drbd/conn-states-8.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst42
-rw-r--r--Documentation/admin-guide/blockdev/drbd/disk-states-8.dot (renamed from Documentation/blockdev/drbd/disk-states-8.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot (renamed from Documentation/blockdev/drbd/drbd-connection-state-overview.dot)0
-rw-r--r--Documentation/admin-guide/blockdev/drbd/figures.rst30
-rw-r--r--Documentation/admin-guide/blockdev/drbd/index.rst19
-rw-r--r--Documentation/admin-guide/blockdev/drbd/peer-states-8.dot8
-rw-r--r--Documentation/admin-guide/blockdev/floppy.rst255
-rw-r--r--Documentation/admin-guide/blockdev/index.rst17
-rw-r--r--Documentation/admin-guide/blockdev/nbd.rst31
-rw-r--r--Documentation/admin-guide/blockdev/paride.rst207
-rw-r--r--Documentation/admin-guide/blockdev/ramdisk.rst153
-rw-r--r--Documentation/admin-guide/blockdev/zoned_loop.rst169
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst569
-rw-r--r--Documentation/admin-guide/bootconfig.rst327
-rw-r--r--Documentation/admin-guide/braille-console.rst4
-rw-r--r--Documentation/admin-guide/btmrvl.rst (renamed from Documentation/btmrvl.txt)0
-rw-r--r--Documentation/admin-guide/bug-bisect.rst219
-rw-r--r--Documentation/admin-guide/bug-hunting.rst83
-rw-r--r--Documentation/admin-guide/cgroup-v1/blkio-controller.rst301
-rw-r--r--Documentation/admin-guide/cgroup-v1/cgroups.rst697
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpuacct.rst50
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpusets.rst884
-rw-r--r--Documentation/admin-guide/cgroup-v1/devices.rst132
-rw-r--r--Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst131
-rw-r--r--Documentation/admin-guide/cgroup-v1/hugetlb.rst139
-rw-r--r--Documentation/admin-guide/cgroup-v1/index.rst31
-rw-r--r--Documentation/admin-guide/cgroup-v1/memcg_test.rst344
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst964
-rw-r--r--Documentation/admin-guide/cgroup-v1/misc.rst4
-rw-r--r--Documentation/admin-guide/cgroup-v1/net_cls.rst44
-rw-r--r--Documentation/admin-guide/cgroup-v1/net_prio.rst57
-rw-r--r--Documentation/admin-guide/cgroup-v1/pids.rst93
-rw-r--r--Documentation/admin-guide/cgroup-v1/rdma.rst117
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst1491
-rw-r--r--Documentation/admin-guide/cifs/authors.rst69
-rw-r--r--Documentation/admin-guide/cifs/changes.rst9
-rw-r--r--Documentation/admin-guide/cifs/index.rst21
-rw-r--r--Documentation/admin-guide/cifs/introduction.rst53
-rw-r--r--Documentation/admin-guide/cifs/todo.rst133
-rw-r--r--Documentation/admin-guide/cifs/usage.rst864
-rwxr-xr-xDocumentation/admin-guide/cifs/winucase_convert.pl (renamed from Documentation/filesystems/cifs/winucase_convert.pl)2
-rw-r--r--Documentation/admin-guide/clearing-warn-once.rst9
-rw-r--r--Documentation/admin-guide/conf.py10
-rw-r--r--Documentation/admin-guide/cpu-load.rst117
-rw-r--r--Documentation/admin-guide/cputopology.rst101
-rw-r--r--Documentation/admin-guide/dell_rbu.rst128
-rw-r--r--Documentation/admin-guide/device-mapper/cache-policies.rst131
-rw-r--r--Documentation/admin-guide/device-mapper/cache.rst337
-rw-r--r--Documentation/admin-guide/device-mapper/delay.rst54
-rw-r--r--Documentation/admin-guide/device-mapper/dm-clone.rst333
-rw-r--r--Documentation/admin-guide/device-mapper/dm-crypt.rst212
-rw-r--r--Documentation/admin-guide/device-mapper/dm-dust.rst305
-rw-r--r--Documentation/admin-guide/device-mapper/dm-ebs.rst51
-rw-r--r--Documentation/admin-guide/device-mapper/dm-flakey.rst88
-rw-r--r--Documentation/admin-guide/device-mapper/dm-ima.rst715
-rw-r--r--Documentation/admin-guide/device-mapper/dm-init.rst133
-rw-r--r--Documentation/admin-guide/device-mapper/dm-integrity.rst308
-rw-r--r--Documentation/admin-guide/device-mapper/dm-io.rst75
-rw-r--r--Documentation/admin-guide/device-mapper/dm-log.rst57
-rw-r--r--Documentation/admin-guide/device-mapper/dm-pcache.rst202
-rw-r--r--Documentation/admin-guide/device-mapper/dm-queue-length.rst48
-rw-r--r--Documentation/admin-guide/device-mapper/dm-raid.rst423
-rw-r--r--Documentation/admin-guide/device-mapper/dm-service-time.rst101
-rw-r--r--Documentation/admin-guide/device-mapper/dm-uevent.rst110
-rw-r--r--Documentation/admin-guide/device-mapper/dm-zoned.rst194
-rw-r--r--Documentation/admin-guide/device-mapper/era.rst116
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst49
-rw-r--r--Documentation/admin-guide/device-mapper/kcopyd.rst47
-rw-r--r--Documentation/admin-guide/device-mapper/linear.rst63
-rw-r--r--Documentation/admin-guide/device-mapper/log-writes.rst145
-rw-r--r--Documentation/admin-guide/device-mapper/persistent-data.rst88
-rw-r--r--Documentation/admin-guide/device-mapper/snapshot.rst196
-rw-r--r--Documentation/admin-guide/device-mapper/statistics.rst225
-rw-r--r--Documentation/admin-guide/device-mapper/striped.rst61
-rw-r--r--Documentation/admin-guide/device-mapper/switch.rst141
-rw-r--r--Documentation/admin-guide/device-mapper/thin-provisioning.rst427
-rw-r--r--Documentation/admin-guide/device-mapper/unstriped.rst135
-rw-r--r--Documentation/admin-guide/device-mapper/vdo-design.rst633
-rw-r--r--Documentation/admin-guide/device-mapper/vdo.rst413
-rw-r--r--Documentation/admin-guide/device-mapper/verity.rst265
-rw-r--r--Documentation/admin-guide/device-mapper/writecache.rst114
-rw-r--r--Documentation/admin-guide/device-mapper/zero.rst37
-rw-r--r--Documentation/admin-guide/devices.rst10
-rw-r--r--Documentation/admin-guide/devices.txt80
-rw-r--r--Documentation/admin-guide/dynamic-debug-howto.rst285
-rw-r--r--Documentation/admin-guide/edid.rst27
-rw-r--r--Documentation/admin-guide/efi-stub.rst100
-rw-r--r--Documentation/admin-guide/ext4.rst111
-rw-r--r--Documentation/admin-guide/features.rst3
-rw-r--r--Documentation/admin-guide/filesystem-monitoring.rst78
-rw-r--r--Documentation/admin-guide/gpio/gpio-aggregator.rst218
-rw-r--r--Documentation/admin-guide/gpio/gpio-mockup.rst59
-rw-r--r--Documentation/admin-guide/gpio/gpio-sim.rst137
-rw-r--r--Documentation/admin-guide/gpio/gpio-virtuser.rst177
-rw-r--r--Documentation/admin-guide/gpio/index.rst21
-rw-r--r--Documentation/admin-guide/gpio/obsolete.rst13
-rw-r--r--Documentation/admin-guide/hw-vuln/attack_vector_controls.rst236
-rw-r--r--Documentation/admin-guide/hw-vuln/core-scheduling.rst226
-rw-r--r--Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst91
-rw-r--r--Documentation/admin-guide/hw-vuln/gather_data_sampling.rst109
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst29
-rw-r--r--Documentation/admin-guide/hw-vuln/indirect-target-selection.rst168
-rw-r--r--Documentation/admin-guide/hw-vuln/l1d_flush.rst69
-rw-r--r--Documentation/admin-guide/hw-vuln/l1tf.rst (renamed from Documentation/admin-guide/l1tf.rst)11
-rw-r--r--Documentation/admin-guide/hw-vuln/mds.rst303
-rw-r--r--Documentation/admin-guide/hw-vuln/multihit.rst167
-rw-r--r--Documentation/admin-guide/hw-vuln/old_microcode.rst21
-rw-r--r--Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst269
-rw-r--r--Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst96
-rw-r--r--Documentation/admin-guide/hw-vuln/rsb.rst268
-rw-r--r--Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst150
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst725
-rw-r--r--Documentation/admin-guide/hw-vuln/srso.rst242
-rw-r--r--Documentation/admin-guide/hw-vuln/tsx_async_abort.rst270
-rw-r--r--Documentation/admin-guide/hw-vuln/vmscape.rst110
-rw-r--r--Documentation/admin-guide/hw_random.rst104
-rw-r--r--Documentation/admin-guide/index.rst169
-rw-r--r--Documentation/admin-guide/init.rst76
-rw-r--r--Documentation/admin-guide/initrd.rst2
-rw-r--r--Documentation/admin-guide/iostats.rst187
-rw-r--r--Documentation/admin-guide/jfs.rst66
-rw-r--r--Documentation/admin-guide/kdump/gdbmacros.txt323
-rw-r--r--Documentation/admin-guide/kdump/index.rst20
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst612
-rw-r--r--Documentation/admin-guide/kdump/vmcoreinfo.rst596
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst115
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt5668
-rw-r--r--Documentation/admin-guide/kernel-per-CPU-kthreads.rst325
-rw-r--r--Documentation/admin-guide/laptops/alienware-wmi.rst127
-rw-r--r--Documentation/admin-guide/laptops/asus-laptop.rst271
-rw-r--r--Documentation/admin-guide/laptops/disk-shock-protection.rst151
-rw-r--r--Documentation/admin-guide/laptops/index.rst19
-rw-r--r--Documentation/admin-guide/laptops/laptop-mode.rst770
-rw-r--r--Documentation/admin-guide/laptops/lg-laptop.rst (renamed from Documentation/laptops/lg-laptop.rst)15
-rw-r--r--Documentation/admin-guide/laptops/samsung-galaxybook.rst174
-rw-r--r--Documentation/admin-guide/laptops/sony-laptop.rst174
-rw-r--r--Documentation/admin-guide/laptops/sonypi.rst158
-rw-r--r--Documentation/admin-guide/laptops/thinkpad-acpi.rst1691
-rw-r--r--Documentation/admin-guide/laptops/toshiba_haps.rst87
-rw-r--r--Documentation/admin-guide/lcd-panel-cgram.rst27
-rw-r--r--Documentation/admin-guide/ldm.rst (renamed from Documentation/ldm.txt)0
-rw-r--r--Documentation/admin-guide/lockup-watchdogs.rst83
-rw-r--r--Documentation/admin-guide/md.rst93
-rw-r--r--Documentation/admin-guide/media/au0828-cardlist.rst (renamed from Documentation/media/v4l-drivers/au0828-cardlist.rst)2
-rw-r--r--Documentation/admin-guide/media/avermedia.rst94
-rw-r--r--Documentation/admin-guide/media/bt8xx.rst157
-rw-r--r--Documentation/admin-guide/media/bttv-cardlist.rst (renamed from Documentation/media/v4l-drivers/bttv-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/bttv.rst (renamed from Documentation/media/v4l-drivers/bttv.rst)450
-rw-r--r--Documentation/admin-guide/media/building.rst357
-rw-r--r--Documentation/admin-guide/media/c3-isp.dot26
-rw-r--r--Documentation/admin-guide/media/c3-isp.rst101
-rw-r--r--Documentation/admin-guide/media/cafe_ccic.rst (renamed from Documentation/media/v4l-drivers/cafe_ccic.rst)2
-rw-r--r--Documentation/admin-guide/media/cardlist.rst29
-rw-r--r--Documentation/admin-guide/media/cec.rst467
-rw-r--r--Documentation/admin-guide/media/ci.rst77
-rw-r--r--Documentation/admin-guide/media/cx18-cardlist.rst17
-rw-r--r--Documentation/admin-guide/media/cx231xx-cardlist.rst99
-rw-r--r--Documentation/admin-guide/media/cx23885-cardlist.rst (renamed from Documentation/media/v4l-drivers/cx23885-cardlist.rst)8
-rw-r--r--Documentation/admin-guide/media/cx88-cardlist.rst (renamed from Documentation/media/v4l-drivers/cx88-cardlist.rst)8
-rw-r--r--Documentation/admin-guide/media/cx88.rst58
-rw-r--r--Documentation/admin-guide/media/dvb-drivers.rst15
-rw-r--r--Documentation/admin-guide/media/dvb-usb-a800-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-af9005-cardlist.rst20
-rw-r--r--Documentation/admin-guide/media/dvb-usb-af9015-cardlist.rst80
-rw-r--r--Documentation/admin-guide/media/dvb-usb-af9035-cardlist.rst74
-rw-r--r--Documentation/admin-guide/media/dvb-usb-anysee-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-au6610-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-az6007-cardlist.rst20
-rw-r--r--Documentation/admin-guide/media/dvb-usb-az6027-cardlist.rst24
-rw-r--r--Documentation/admin-guide/media/dvb-usb-ce6230-cardlist.rst18
-rw-r--r--Documentation/admin-guide/media/dvb-usb-cinergyT2-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-cxusb-cardlist.rst40
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dib0700-cardlist.rst162
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dibusb-mb-cardlist.rst42
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dibusb-mc-cardlist.rst30
-rw-r--r--Documentation/admin-guide/media/dvb-usb-digitv-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dtt200u-cardlist.rst22
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dtv5100-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dvbsky-cardlist.rst42
-rw-r--r--Documentation/admin-guide/media/dvb-usb-dw2102-cardlist.rst56
-rw-r--r--Documentation/admin-guide/media/dvb-usb-ec168-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-gl861-cardlist.rst20
-rw-r--r--Documentation/admin-guide/media/dvb-usb-gp8psk-cardlist.rst22
-rw-r--r--Documentation/admin-guide/media/dvb-usb-lmedm04-cardlist.rst20
-rw-r--r--Documentation/admin-guide/media/dvb-usb-m920x-cardlist.rst26
-rw-r--r--Documentation/admin-guide/media/dvb-usb-mxl111sf-cardlist.rst36
-rw-r--r--Documentation/admin-guide/media/dvb-usb-nova-t-usb2-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-opera1-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-pctv452e-cardlist.rst20
-rw-r--r--Documentation/admin-guide/media/dvb-usb-rtl28xxu-cardlist.rst80
-rw-r--r--Documentation/admin-guide/media/dvb-usb-technisat-usb2-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-ttusb2-cardlist.rst24
-rw-r--r--Documentation/admin-guide/media/dvb-usb-umt-010-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-vp702x-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb-usb-vp7045-cardlist.rst18
-rw-r--r--Documentation/admin-guide/media/dvb-usb-zd1301-cardlist.rst16
-rw-r--r--Documentation/admin-guide/media/dvb.rst12
-rw-r--r--Documentation/admin-guide/media/dvb_intro.rst616
-rw-r--r--Documentation/admin-guide/media/dvb_references.rst29
-rw-r--r--Documentation/admin-guide/media/em28xx-cardlist.rst (renamed from Documentation/media/v4l-drivers/em28xx-cardlist.rst)26
-rw-r--r--Documentation/admin-guide/media/faq.rst216
-rw-r--r--Documentation/admin-guide/media/fimc.rst (renamed from Documentation/media/v4l-drivers/fimc.rst)32
-rw-r--r--Documentation/admin-guide/media/frontend-cardlist.rst226
-rw-r--r--Documentation/admin-guide/media/gspca-cardlist.rst (renamed from Documentation/media/v4l-drivers/gspca-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/i2c-cardlist.rst287
-rw-r--r--Documentation/admin-guide/media/imx.rst714
-rw-r--r--Documentation/admin-guide/media/imx6q-sabreauto.dot51
-rw-r--r--Documentation/admin-guide/media/imx6q-sabresd.dot56
-rw-r--r--Documentation/admin-guide/media/imx7.rst221
-rw-r--r--Documentation/admin-guide/media/index.rst61
-rw-r--r--Documentation/admin-guide/media/intro.rst27
-rw-r--r--Documentation/admin-guide/media/ipu3.rst596
-rw-r--r--Documentation/admin-guide/media/ipu3_rcb.svg331
-rw-r--r--Documentation/admin-guide/media/ipu6-isys.rst161
-rw-r--r--Documentation/admin-guide/media/ipu6_isys_graph.svg548
-rw-r--r--Documentation/admin-guide/media/ivtv-cardlist.rst (renamed from Documentation/media/v4l-drivers/ivtv-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/ivtv.rst (renamed from Documentation/media/v4l-drivers/ivtv.rst)5
-rw-r--r--Documentation/admin-guide/media/lmedm04.rst (renamed from Documentation/media/dvb-drivers/lmedm04.rst)2
-rw-r--r--Documentation/admin-guide/media/mgb4.rst385
-rw-r--r--Documentation/admin-guide/media/misc-cardlist.rst28
-rw-r--r--Documentation/admin-guide/media/omap3isp.rst92
-rw-r--r--Documentation/admin-guide/media/opera-firmware.rst (renamed from Documentation/media/dvb-drivers/opera-firmware.rst)2
-rw-r--r--Documentation/admin-guide/media/other-usb-cardlist.rst78
-rw-r--r--Documentation/admin-guide/media/pci-cardlist.rst108
-rw-r--r--Documentation/admin-guide/media/philips.rst (renamed from Documentation/media/v4l-drivers/philips.rst)2
-rw-r--r--Documentation/admin-guide/media/platform-cardlist.rst89
-rw-r--r--Documentation/admin-guide/media/qcom_camss.rst (renamed from Documentation/media/v4l-drivers/qcom_camss.rst)10
-rw-r--r--Documentation/admin-guide/media/qcom_camss_8x96_graph.dot (renamed from Documentation/media/v4l-drivers/qcom_camss_8x96_graph.dot)2
-rw-r--r--Documentation/admin-guide/media/qcom_camss_graph.dot (renamed from Documentation/media/v4l-drivers/qcom_camss_graph.dot)2
-rw-r--r--Documentation/admin-guide/media/radio-cardlist.rst44
-rw-r--r--Documentation/admin-guide/media/raspberrypi-pisp-be.dot20
-rw-r--r--Documentation/admin-guide/media/raspberrypi-pisp-be.rst109
-rw-r--r--Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot27
-rw-r--r--Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst78
-rw-r--r--Documentation/admin-guide/media/rcar-fdp1.rst (renamed from Documentation/media/v4l-drivers/rcar-fdp1.rst)2
-rw-r--r--Documentation/admin-guide/media/remote-controller.rst76
-rw-r--r--Documentation/admin-guide/media/rkisp1.dot18
-rw-r--r--Documentation/admin-guide/media/rkisp1.rst204
-rw-r--r--Documentation/admin-guide/media/saa7134-cardlist.rst (renamed from Documentation/media/v4l-drivers/saa7134-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/saa7134.rst89
-rw-r--r--Documentation/admin-guide/media/saa7164-cardlist.rst (renamed from Documentation/media/v4l-drivers/saa7164-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/si470x.rst (renamed from Documentation/media/v4l-drivers/si470x.rst)2
-rw-r--r--Documentation/admin-guide/media/si4713.rst (renamed from Documentation/media/v4l-drivers/si4713.rst)8
-rw-r--r--Documentation/admin-guide/media/si476x.rst (renamed from Documentation/media/v4l-drivers/si476x.rst)4
-rw-r--r--Documentation/admin-guide/media/siano-cardlist.rst56
-rw-r--r--Documentation/admin-guide/media/starfive_camss.rst72
-rw-r--r--Documentation/admin-guide/media/starfive_camss_graph.dot12
-rw-r--r--Documentation/admin-guide/media/technisat.rst (renamed from Documentation/media/dvb-drivers/technisat.rst)2
-rw-r--r--Documentation/admin-guide/media/ttusb-dec.rst (renamed from Documentation/media/dvb-drivers/ttusb-dec.rst)2
-rw-r--r--Documentation/admin-guide/media/tuner-cardlist.rst (renamed from Documentation/media/v4l-drivers/tuner-cardlist.rst)4
-rw-r--r--Documentation/admin-guide/media/usb-cardlist.rst149
-rw-r--r--Documentation/admin-guide/media/v4l-drivers.rst37
-rw-r--r--Documentation/admin-guide/media/vimc.dot26
-rw-r--r--Documentation/admin-guide/media/vimc.rst110
-rw-r--r--Documentation/admin-guide/media/visl.rst185
-rw-r--r--Documentation/admin-guide/media/vivid.rst (renamed from Documentation/media/v4l-drivers/vivid.rst)276
-rw-r--r--Documentation/admin-guide/media/zoran-cardlist.rst51
-rw-r--r--Documentation/admin-guide/mm/cma_debugfs.rst31
-rw-r--r--Documentation/admin-guide/mm/concepts.rst68
-rw-r--r--Documentation/admin-guide/mm/damon/index.rst17
-rw-r--r--Documentation/admin-guide/mm/damon/lru_sort.rst294
-rw-r--r--Documentation/admin-guide/mm/damon/reclaim.rst301
-rw-r--r--Documentation/admin-guide/mm/damon/start.rst178
-rw-r--r--Documentation/admin-guide/mm/damon/stat.rst69
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst671
-rw-r--r--Documentation/admin-guide/mm/hugetlbpage.rst126
-rw-r--r--Documentation/admin-guide/mm/idle_page_tracking.rst9
-rw-r--r--Documentation/admin-guide/mm/index.rst21
-rw-r--r--Documentation/admin-guide/mm/kho.rst115
-rw-r--r--Documentation/admin-guide/mm/ksm.rst146
-rw-r--r--Documentation/admin-guide/mm/memory-hotplug.rst910
-rw-r--r--Documentation/admin-guide/mm/multigen_lru.rst163
-rw-r--r--Documentation/admin-guide/mm/nommu-mmap.rst (renamed from Documentation/nommu-mmap.txt)0
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst58
-rw-r--r--Documentation/admin-guide/mm/numaperf.rst176
-rw-r--r--Documentation/admin-guide/mm/pagemap.rst224
-rw-r--r--Documentation/admin-guide/mm/shrinker_debugfs.rst133
-rw-r--r--Documentation/admin-guide/mm/slab.rst469
-rw-r--r--Documentation/admin-guide/mm/soft-dirty.rst2
-rw-r--r--Documentation/admin-guide/mm/swap_numa.rst (renamed from Documentation/vm/swap_numa.rst)2
-rw-r--r--Documentation/admin-guide/mm/transhuge.rst468
-rw-r--r--Documentation/admin-guide/mm/userfaultfd.rst387
-rw-r--r--Documentation/admin-guide/mm/zswap.rst138
-rw-r--r--Documentation/admin-guide/module-signing.rst21
-rw-r--r--Documentation/admin-guide/mono.rst4
-rw-r--r--Documentation/admin-guide/namespaces/compatibility-list.rst43
-rw-r--r--Documentation/admin-guide/namespaces/index.rst11
-rw-r--r--Documentation/admin-guide/namespaces/resource-control.rst18
-rw-r--r--Documentation/admin-guide/nfs/index.rst14
-rw-r--r--Documentation/admin-guide/nfs/nfs-client.rst144
-rw-r--r--Documentation/admin-guide/nfs/nfs-idmapper.rst78
-rw-r--r--Documentation/admin-guide/nfs/nfs-rdma.rst292
-rw-r--r--Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst40
-rw-r--r--Documentation/admin-guide/nfs/nfsroot.rst364
-rw-r--r--Documentation/admin-guide/nfs/pnfs-block-server.rst42
-rw-r--r--Documentation/admin-guide/nfs/pnfs-scsi-server.rst24
-rw-r--r--Documentation/admin-guide/numastat.rst55
-rw-r--r--Documentation/admin-guide/nvme-multipath.rst72
-rw-r--r--Documentation/admin-guide/perf-security.rst325
-rw-r--r--Documentation/admin-guide/perf/alibaba_pmu.rst105
-rw-r--r--Documentation/admin-guide/perf/ampere_cspmu.rst29
-rw-r--r--Documentation/admin-guide/perf/arm-ccn.rst61
-rw-r--r--Documentation/admin-guide/perf/arm-cmn.rst65
-rw-r--r--Documentation/admin-guide/perf/arm-ni.rst17
-rw-r--r--Documentation/admin-guide/perf/arm_dsu_pmu.rst29
-rw-r--r--Documentation/admin-guide/perf/cxl.rst68
-rw-r--r--Documentation/admin-guide/perf/dwc_pcie_pmu.rst94
-rw-r--r--Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst115
-rw-r--r--Documentation/admin-guide/perf/hisi-pcie-pmu.rst148
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst177
-rw-r--r--Documentation/admin-guide/perf/hns3-pmu.rst136
-rw-r--r--Documentation/admin-guide/perf/imx-ddr.rst100
-rw-r--r--Documentation/admin-guide/perf/index.rst32
-rw-r--r--Documentation/admin-guide/perf/meson-ddr-pmu.rst70
-rw-r--r--Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst80
-rw-r--r--Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst37
-rw-r--r--Documentation/admin-guide/perf/mrvl-pem-pmu.rst56
-rw-r--r--Documentation/admin-guide/perf/nvidia-pmu.rst333
-rw-r--r--Documentation/admin-guide/perf/qcom_l2_pmu.rst39
-rw-r--r--Documentation/admin-guide/perf/qcom_l3_pmu.rst26
-rw-r--r--Documentation/admin-guide/perf/starfive_starlink_pmu.rst46
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst44
-rw-r--r--Documentation/admin-guide/perf/xgene-pmu.rst49
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst802
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst86
-rw-r--r--Documentation/admin-guide/pm/cpufreq_drivers.rst274
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst657
-rw-r--r--Documentation/admin-guide/pm/index.rst2
-rw-r--r--Documentation/admin-guide/pm/intel-speed-select.rst939
-rw-r--r--Documentation/admin-guide/pm/intel_epb.rst41
-rw-r--r--Documentation/admin-guide/pm/intel_idle.rst316
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst284
-rw-r--r--Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst184
-rw-r--r--Documentation/admin-guide/pm/sleep-states.rst84
-rw-r--r--Documentation/admin-guide/pm/strategies.rst8
-rw-r--r--Documentation/admin-guide/pm/suspend-flows.rst270
-rw-r--r--Documentation/admin-guide/pm/system-wide.rst3
-rw-r--r--Documentation/admin-guide/pm/working-state.rst9
-rw-r--r--Documentation/admin-guide/pnp.rst285
-rw-r--r--Documentation/admin-guide/pstore-blk.rst234
-rw-r--r--Documentation/admin-guide/quickly-build-trimmed-linux.rst1097
-rw-r--r--Documentation/admin-guide/ramoops.rst35
-rw-r--r--Documentation/admin-guide/rapidio.rst (renamed from Documentation/driver-api/rapidio.rst)0
-rw-r--r--Documentation/admin-guide/ras.rst1210
-rw-r--r--Documentation/admin-guide/reporting-bugs.rst182
-rw-r--r--Documentation/admin-guide/reporting-issues.rst1764
-rw-r--r--Documentation/admin-guide/reporting-regressions.rst451
-rw-r--r--Documentation/admin-guide/rtc.rst140
-rw-r--r--Documentation/admin-guide/security-bugs.rst89
-rw-r--r--Documentation/admin-guide/serial-console.rst42
-rw-r--r--Documentation/admin-guide/spkguide.txt1625
-rw-r--r--Documentation/admin-guide/svga.rst250
-rw-r--r--Documentation/admin-guide/syscall-user-dispatch.rst99
-rw-r--r--Documentation/admin-guide/sysctl/abi.rst34
-rw-r--r--Documentation/admin-guide/sysctl/fs.rst374
-rw-r--r--Documentation/admin-guide/sysctl/index.rst104
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst1718
-rw-r--r--Documentation/admin-guide/sysctl/net.rst519
-rw-r--r--Documentation/admin-guide/sysctl/sunrpc.rst25
-rw-r--r--Documentation/admin-guide/sysctl/user.rst84
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst1125
-rw-r--r--Documentation/admin-guide/sysrq.rst96
-rw-r--r--Documentation/admin-guide/tainted-kernels.rst194
-rw-r--r--Documentation/admin-guide/thermal/index.rst8
-rw-r--r--Documentation/admin-guide/thermal/intel_powerclamp.rst345
-rw-r--r--Documentation/admin-guide/thunderbolt.rst157
-rw-r--r--Documentation/admin-guide/ufs.rst68
-rw-r--r--Documentation/admin-guide/unicode.rst13
-rw-r--r--Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst2222
-rw-r--r--Documentation/admin-guide/video-output.rst34
-rw-r--r--Documentation/admin-guide/workload-tracing.rst606
-rw-r--r--Documentation/admin-guide/xfs.rst551
-rw-r--r--Documentation/aoe/aoe.txt143
-rw-r--r--Documentation/aoe/todo.txt14
-rw-r--r--Documentation/arch/arc/arc.rst85
-rw-r--r--Documentation/arch/arc/features.rst3
-rw-r--r--Documentation/arch/arc/index.rst17
-rw-r--r--Documentation/arch/arm/arm.rst212
-rw-r--r--Documentation/arch/arm/booting.rst237
-rw-r--r--Documentation/arch/arm/cluster-pm-race-avoidance.rst533
-rw-r--r--Documentation/arch/arm/features.rst3
-rw-r--r--Documentation/arch/arm/firmware.rst72
-rw-r--r--Documentation/arch/arm/google/chromebook-boot-flow.rst69
-rw-r--r--Documentation/arch/arm/index.rst85
-rw-r--r--Documentation/arch/arm/interrupts.rst169
-rw-r--r--Documentation/arch/arm/ixp4xx.rst173
-rw-r--r--Documentation/arch/arm/kernel_mode_neon.rst124
-rw-r--r--Documentation/arch/arm/kernel_user_helpers.rst268
-rw-r--r--Documentation/arch/arm/keystone/knav-qmss.rst60
-rw-r--r--Documentation/arch/arm/keystone/overview.rst74
-rw-r--r--Documentation/arch/arm/marvell.rst527
-rw-r--r--Documentation/arch/arm/mem_alignment.rst63
-rw-r--r--Documentation/arch/arm/memory.rst103
-rw-r--r--Documentation/arch/arm/microchip.rst230
-rw-r--r--Documentation/arch/arm/netwinder.rst85
-rw-r--r--Documentation/arch/arm/nwfpe/index.rst13
-rw-r--r--Documentation/arch/arm/nwfpe/netwinder-fpe.rst162
-rw-r--r--Documentation/arch/arm/nwfpe/notes.rst32
-rw-r--r--Documentation/arch/arm/nwfpe/nwfpe.rst74
-rw-r--r--Documentation/arch/arm/nwfpe/todo.rst72
-rw-r--r--Documentation/arch/arm/omap/dss.rst372
-rw-r--r--Documentation/arch/arm/omap/index.rst12
-rw-r--r--Documentation/arch/arm/omap/omap.rst18
-rw-r--r--Documentation/arch/arm/omap/omap_pm.rst165
-rw-r--r--Documentation/arch/arm/porting.rst137
-rw-r--r--Documentation/arch/arm/pxa/mfp.rst288
-rw-r--r--Documentation/arch/arm/sa1100/assabet.rst301
-rw-r--r--Documentation/arch/arm/sa1100/cerf.rst35
-rw-r--r--Documentation/arch/arm/sa1100/index.rst13
-rw-r--r--Documentation/arch/arm/sa1100/lart.rst15
-rw-r--r--Documentation/arch/arm/sa1100/serial_uart.rst51
-rw-r--r--Documentation/arch/arm/samsung/bootloader-interface.rst81
-rwxr-xr-xDocumentation/arch/arm/samsung/clksrc-change-registers.awk (renamed from Documentation/arm/Samsung/clksrc-change-registers.awk)0
-rw-r--r--Documentation/arch/arm/samsung/gpio.rst32
-rw-r--r--Documentation/arch/arm/samsung/index.rst12
-rw-r--r--Documentation/arch/arm/samsung/overview.rst76
-rw-r--r--Documentation/arch/arm/setup.rst108
-rw-r--r--Documentation/arch/arm/spear/overview.rst66
-rw-r--r--Documentation/arch/arm/sti/overview.rst32
-rw-r--r--Documentation/arch/arm/sti/stih407-overview.rst19
-rw-r--r--Documentation/arch/arm/sti/stih418-overview.rst21
-rw-r--r--Documentation/arch/arm/stm32/overview.rst (renamed from Documentation/arm/stm32/overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst415
-rw-r--r--Documentation/arch/arm/stm32/stm32f429-overview.rst (renamed from Documentation/arm/stm32/stm32f429-overview.rst)5
-rw-r--r--Documentation/arch/arm/stm32/stm32f746-overview.rst (renamed from Documentation/arm/stm32/stm32f746-overview.rst)7
-rw-r--r--Documentation/arch/arm/stm32/stm32f769-overview.rst (renamed from Documentation/arm/stm32/stm32f769-overview.rst)7
-rw-r--r--Documentation/arch/arm/stm32/stm32h743-overview.rst (renamed from Documentation/arm/stm32/stm32h743-overview.rst)7
-rw-r--r--Documentation/arch/arm/stm32/stm32h750-overview.rst34
-rw-r--r--Documentation/arch/arm/stm32/stm32mp13-overview.rst37
-rw-r--r--Documentation/arch/arm/stm32/stm32mp151-overview.rst36
-rw-r--r--Documentation/arch/arm/stm32/stm32mp157-overview.rst (renamed from Documentation/arm/stm32/stm32mp157-overview.rst)1
-rw-r--r--Documentation/arch/arm/sunxi.rst170
-rw-r--r--Documentation/arch/arm/sunxi/clocks.rst57
-rw-r--r--Documentation/arch/arm/swp_emulation.rst27
-rw-r--r--Documentation/arch/arm/tcm.rst161
-rw-r--r--Documentation/arch/arm/uefi.rst72
-rw-r--r--Documentation/arch/arm/vfp/release-notes.rst57
-rw-r--r--Documentation/arch/arm/vlocks.rst212
-rw-r--r--Documentation/arch/arm64/acpi_object_usage.rst809
-rw-r--r--Documentation/arch/arm64/amu.rst119
-rw-r--r--Documentation/arch/arm64/arm-acpi.rst575
-rw-r--r--Documentation/arch/arm64/arm-cca.rst69
-rw-r--r--Documentation/arch/arm64/asymmetric-32bit.rst163
-rw-r--r--Documentation/arch/arm64/booting.rst608
-rw-r--r--Documentation/arch/arm64/cpu-feature-registers.rst419
-rw-r--r--Documentation/arch/arm64/cpu-hotplug.rst79
-rw-r--r--Documentation/arch/arm64/elf_hwcaps.rst452
-rw-r--r--Documentation/arch/arm64/features.rst3
-rw-r--r--Documentation/arch/arm64/gcs.rst227
-rw-r--r--Documentation/arch/arm64/hugetlbpage.rst43
-rw-r--r--Documentation/arch/arm64/index.rst42
-rw-r--r--Documentation/arch/arm64/kasan-offsets.sh26
-rw-r--r--Documentation/arch/arm64/kdump.rst92
-rw-r--r--Documentation/arch/arm64/legacy_instructions.rst68
-rw-r--r--Documentation/arch/arm64/memory-tagging-extension.rst375
-rw-r--r--Documentation/arch/arm64/memory.rst100
-rw-r--r--Documentation/arch/arm64/mops.rst44
-rw-r--r--Documentation/arch/arm64/perf.rst238
-rw-r--r--Documentation/arch/arm64/pointer-authentication.rst142
-rw-r--r--Documentation/arch/arm64/ptdump.rst94
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst307
-rw-r--r--Documentation/arch/arm64/sme.rst461
-rw-r--r--Documentation/arch/arm64/sve.rst614
-rw-r--r--Documentation/arch/arm64/tagged-address-abi.rst179
-rw-r--r--Documentation/arch/arm64/tagged-pointers.rst89
-rw-r--r--Documentation/arch/index.rst27
-rw-r--r--Documentation/arch/loongarch/booting.rst42
-rw-r--r--Documentation/arch/loongarch/features.rst3
-rw-r--r--Documentation/arch/loongarch/index.rst22
-rw-r--r--Documentation/arch/loongarch/introduction.rst390
-rw-r--r--Documentation/arch/loongarch/irq-chip-model.rst256
-rw-r--r--Documentation/arch/m68k/buddha-driver.rst209
-rw-r--r--Documentation/arch/m68k/features.rst3
-rw-r--r--Documentation/arch/m68k/index.rst20
-rw-r--r--Documentation/arch/m68k/kernel-options.rst911
-rw-r--r--Documentation/arch/mips/booting.rst28
-rw-r--r--Documentation/arch/mips/features.rst3
-rw-r--r--Documentation/arch/mips/index.rst21
-rw-r--r--Documentation/arch/mips/ingenic-tcu.rst71
-rw-r--r--Documentation/arch/nios2/features.rst3
-rw-r--r--Documentation/arch/nios2/index.rst12
-rw-r--r--Documentation/arch/nios2/nios2.rst24
-rw-r--r--Documentation/arch/openrisc/features.rst3
-rw-r--r--Documentation/arch/openrisc/index.rst20
-rw-r--r--Documentation/arch/openrisc/openrisc_port.rst127
-rw-r--r--Documentation/arch/openrisc/todo.rst15
-rw-r--r--Documentation/arch/parisc/debugging.rst46
-rw-r--r--Documentation/arch/parisc/features.rst3
-rw-r--r--Documentation/arch/parisc/index.rst20
-rw-r--r--Documentation/arch/parisc/registers.rst154
-rw-r--r--Documentation/arch/powerpc/associativity.rst105
-rw-r--r--Documentation/arch/powerpc/booting.rst110
-rw-r--r--Documentation/arch/powerpc/bootwrapper.rst131
-rw-r--r--Documentation/arch/powerpc/cpu_families.rst219
-rw-r--r--Documentation/arch/powerpc/cpu_features.rst60
-rw-r--r--Documentation/arch/powerpc/dawr-power9.rst101
-rw-r--r--Documentation/arch/powerpc/dexcr.rst195
-rw-r--r--Documentation/arch/powerpc/dscr.rst87
-rw-r--r--Documentation/arch/powerpc/eeh-pci-error-recovery.rst335
-rw-r--r--Documentation/arch/powerpc/elf_hwcaps.rst232
-rw-r--r--Documentation/arch/powerpc/elfnote.rst41
-rw-r--r--Documentation/arch/powerpc/features.rst3
-rw-r--r--Documentation/arch/powerpc/firmware-assisted-dump.rst396
-rw-r--r--Documentation/arch/powerpc/htm.rst104
-rw-r--r--Documentation/arch/powerpc/hvcs.rst581
-rw-r--r--Documentation/arch/powerpc/imc.rst199
-rw-r--r--Documentation/arch/powerpc/index.rst49
-rw-r--r--Documentation/arch/powerpc/isa-versions.rst101
-rw-r--r--Documentation/arch/powerpc/kasan.txt58
-rw-r--r--Documentation/arch/powerpc/kaslr-booke32.rst42
-rw-r--r--Documentation/arch/powerpc/kvm-nested.rst656
-rw-r--r--Documentation/arch/powerpc/mpc52xx.rst43
-rw-r--r--Documentation/arch/powerpc/papr_hcalls.rst313
-rw-r--r--Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst312
-rw-r--r--Documentation/arch/powerpc/pmu-ebb.rst138
-rw-r--r--Documentation/arch/powerpc/ptrace.rst157
-rw-r--r--Documentation/arch/powerpc/qe_firmware.rst296
-rw-r--r--Documentation/arch/powerpc/syscall64-abi.rst153
-rw-r--r--Documentation/arch/powerpc/transactional_memory.rst274
-rw-r--r--Documentation/arch/powerpc/ultravisor.rst1117
-rw-r--r--Documentation/arch/powerpc/vas-api.rst305
-rw-r--r--Documentation/arch/powerpc/vcpudispatch_stats.rst75
-rw-r--r--Documentation/arch/powerpc/vmemmap_dedup.rst101
-rw-r--r--Documentation/arch/powerpc/vpa-dtl.rst156
-rw-r--r--Documentation/arch/riscv/acpi.rst10
-rw-r--r--Documentation/arch/riscv/boot-image-header.rst59
-rw-r--r--Documentation/arch/riscv/boot.rst169
-rw-r--r--Documentation/arch/riscv/cmodx.rst130
-rw-r--r--Documentation/arch/riscv/features.rst3
-rw-r--r--Documentation/arch/riscv/hwprobe.rst372
-rw-r--r--Documentation/arch/riscv/index.rst25
-rw-r--r--Documentation/arch/riscv/patch-acceptance.rst59
-rw-r--r--Documentation/arch/riscv/uabi.rst86
-rw-r--r--Documentation/arch/riscv/vector.rst140
-rw-r--r--Documentation/arch/riscv/vm-layout.rst136
-rw-r--r--Documentation/arch/s390/3270.ChangeLog (renamed from Documentation/s390/3270.ChangeLog)2
-rw-r--r--Documentation/arch/s390/3270.rst298
-rw-r--r--Documentation/arch/s390/cds.rst530
-rw-r--r--Documentation/arch/s390/common_io.rst140
-rw-r--r--Documentation/arch/s390/config3270.sh (renamed from Documentation/s390/config3270.sh)0
-rw-r--r--Documentation/arch/s390/driver-model.rst307
-rw-r--r--Documentation/arch/s390/features.rst3
-rw-r--r--Documentation/arch/s390/index.rst31
-rw-r--r--Documentation/arch/s390/mm.rst111
-rw-r--r--Documentation/arch/s390/monreader.rst212
-rw-r--r--Documentation/arch/s390/pci.rst133
-rw-r--r--Documentation/arch/s390/qeth.rst64
-rw-r--r--Documentation/arch/s390/s390dbf.rst478
-rw-r--r--Documentation/arch/s390/text_files.rst11
-rw-r--r--Documentation/arch/s390/vfio-ap-locking.rst115
-rw-r--r--Documentation/arch/s390/vfio-ap.rst1129
-rw-r--r--Documentation/arch/s390/vfio-ccw.rst445
-rw-r--r--Documentation/arch/s390/zfcpdump.rst50
-rw-r--r--Documentation/arch/sh/booting.rst12
-rw-r--r--Documentation/arch/sh/features.rst3
-rw-r--r--Documentation/arch/sh/index.rst50
-rw-r--r--Documentation/arch/sh/new-machine.rst277
-rw-r--r--Documentation/arch/sh/register-banks.rst40
-rw-r--r--Documentation/arch/sparc/adi.rst286
-rw-r--r--Documentation/arch/sparc/console.rst9
-rw-r--r--Documentation/arch/sparc/features.rst3
-rw-r--r--Documentation/arch/sparc/index.rst13
-rw-r--r--Documentation/arch/sparc/oradax/dax-hv-api.txt (renamed from Documentation/sparc/oradax/dax-hv-api.txt)46
-rw-r--r--Documentation/arch/sparc/oradax/oracle-dax.rst445
-rw-r--r--Documentation/arch/x86/amd-debugging.rst368
-rw-r--r--Documentation/arch/x86/amd-hfi.rst133
-rw-r--r--Documentation/arch/x86/amd-memory-encryption.rst278
-rw-r--r--Documentation/arch/x86/amd_hsmp.rst192
-rw-r--r--Documentation/arch/x86/boot.rst1442
-rw-r--r--Documentation/arch/x86/booting-dt.rst21
-rw-r--r--Documentation/arch/x86/buslock.rst133
-rw-r--r--Documentation/arch/x86/cpuinfo.rst202
-rw-r--r--Documentation/arch/x86/earlyprintk.rst151
-rw-r--r--Documentation/arch/x86/elf_auxvec.rst53
-rw-r--r--Documentation/arch/x86/entry_64.rst110
-rw-r--r--Documentation/arch/x86/exception-tables.rst357
-rw-r--r--Documentation/arch/x86/features.rst3
-rw-r--r--Documentation/arch/x86/i386/IO-APIC.rst123
-rw-r--r--Documentation/arch/x86/i386/index.rst10
-rw-r--r--Documentation/arch/x86/ifs.rst2
-rw-r--r--Documentation/arch/x86/index.rst46
-rw-r--r--Documentation/arch/x86/intel-hfi.rst72
-rw-r--r--Documentation/arch/x86/intel_txt.rst (renamed from Documentation/intel_txt.txt)0
-rw-r--r--Documentation/arch/x86/iommu.rst151
-rw-r--r--Documentation/arch/x86/kernel-stacks.rst152
-rw-r--r--Documentation/arch/x86/mds.rst209
-rw-r--r--Documentation/arch/x86/microcode.rst240
-rw-r--r--Documentation/arch/x86/mtrr.rst354
-rw-r--r--Documentation/arch/x86/orc-unwinder.rst182
-rw-r--r--Documentation/arch/x86/pat.rst240
-rw-r--r--Documentation/arch/x86/pti.rst193
-rw-r--r--Documentation/arch/x86/resume.svg4
-rw-r--r--Documentation/arch/x86/sgx.rst302
-rw-r--r--Documentation/arch/x86/shstk.rst179
-rw-r--r--Documentation/arch/x86/suspend.svg4
-rw-r--r--Documentation/arch/x86/sva.rst286
-rw-r--r--Documentation/arch/x86/tdx.rst446
-rw-r--r--Documentation/arch/x86/tlb.rst83
-rw-r--r--Documentation/arch/x86/topology.rst421
-rw-r--r--Documentation/arch/x86/tsx_async_abort.rst117
-rw-r--r--Documentation/arch/x86/usb-legacy-support.rst41
-rw-r--r--Documentation/arch/x86/x86_64/5level-paging.rst58
-rw-r--r--Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst24
-rw-r--r--Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst78
-rw-r--r--Documentation/arch/x86/x86_64/fred.rst96
-rw-r--r--Documentation/arch/x86/x86_64/fsgs.rst199
-rw-r--r--Documentation/arch/x86/x86_64/index.rst17
-rw-r--r--Documentation/arch/x86/x86_64/machinecheck.rst33
-rw-r--r--Documentation/arch/x86/x86_64/mm.rst180
-rw-r--r--Documentation/arch/x86/x86_64/uefi.rst75
-rw-r--r--Documentation/arch/x86/xstate.rst174
-rw-r--r--Documentation/arch/x86/zero-page.rst47
-rw-r--r--Documentation/arch/xtensa/atomctl.rst51
-rw-r--r--Documentation/arch/xtensa/booting.rst22
-rw-r--r--Documentation/arch/xtensa/features.rst3
-rw-r--r--Documentation/arch/xtensa/index.rst14
-rw-r--r--Documentation/arch/xtensa/mmu.rst198
-rw-r--r--Documentation/arm/Booting218
-rw-r--r--Documentation/arm/IXP4xx172
-rw-r--r--Documentation/arm/Interrupts167
-rw-r--r--Documentation/arm/Marvell/README395
-rw-r--r--Documentation/arm/Microchip/README169
-rw-r--r--Documentation/arm/Netwinder78
-rw-r--r--Documentation/arm/OMAP/DSS362
-rw-r--r--Documentation/arm/OMAP/README11
-rw-r--r--Documentation/arm/OMAP/omap_pm154
-rw-r--r--Documentation/arm/Porting135
-rw-r--r--Documentation/arm/README204
-rw-r--r--Documentation/arm/SA1100/ADSBitsy43
-rw-r--r--Documentation/arm/SA1100/Assabet300
-rw-r--r--Documentation/arm/SA1100/Brutus66
-rw-r--r--Documentation/arm/SA1100/CERF29
-rw-r--r--Documentation/arm/SA1100/FreeBird21
-rw-r--r--Documentation/arm/SA1100/GraphicsClient98
-rw-r--r--Documentation/arm/SA1100/GraphicsMaster53
-rw-r--r--Documentation/arm/SA1100/HUW_WEBPANEL17
-rw-r--r--Documentation/arm/SA1100/Itsy39
-rw-r--r--Documentation/arm/SA1100/LART14
-rw-r--r--Documentation/arm/SA1100/PLEB11
-rw-r--r--Documentation/arm/SA1100/Pangolin23
-rw-r--r--Documentation/arm/SA1100/Tifon7
-rw-r--r--Documentation/arm/SA1100/Yopy2
-rw-r--r--Documentation/arm/SA1100/empeg2
-rw-r--r--Documentation/arm/SA1100/nanoEngine11
-rw-r--r--Documentation/arm/SA1100/serial_UART47
-rw-r--r--Documentation/arm/SH-Mobile/.gitignore1
-rw-r--r--Documentation/arm/SPEAr/overview.txt63
-rw-r--r--Documentation/arm/Samsung-S3C24XX/CPUfreq.txt75
-rw-r--r--Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt58
-rw-r--r--Documentation/arm/Samsung-S3C24XX/GPIO.txt171
-rw-r--r--Documentation/arm/Samsung-S3C24XX/H1940.txt40
-rw-r--r--Documentation/arm/Samsung-S3C24XX/NAND.txt30
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt318
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2412.txt120
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2413.txt21
-rw-r--r--Documentation/arm/Samsung-S3C24XX/SMDK2440.txt56
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Suspend.txt137
-rw-r--r--Documentation/arm/Samsung-S3C24XX/USB-Host.txt93
-rw-r--r--Documentation/arm/Samsung/Bootloader-interface.txt68
-rw-r--r--Documentation/arm/Samsung/GPIO.txt40
-rw-r--r--Documentation/arm/Samsung/Overview.txt86
-rw-r--r--Documentation/arm/Setup129
-rw-r--r--Documentation/arm/VFP/release-notes.txt55
-rw-r--r--Documentation/arm/cluster-pm-race-avoidance.txt498
-rw-r--r--Documentation/arm/firmware.txt70
-rw-r--r--Documentation/arm/kernel_mode_neon.txt121
-rw-r--r--Documentation/arm/kernel_user_helpers.txt267
-rw-r--r--Documentation/arm/keystone/Overview.txt55
-rw-r--r--Documentation/arm/keystone/knav-qmss.txt56
-rw-r--r--Documentation/arm/mem_alignment58
-rw-r--r--Documentation/arm/memory.txt88
-rw-r--r--Documentation/arm/nwfpe/NOTES29
-rw-r--r--Documentation/arm/nwfpe/README70
-rw-r--r--Documentation/arm/nwfpe/README.FPE156
-rw-r--r--Documentation/arm/nwfpe/TODO67
-rw-r--r--Documentation/arm/pxa/mfp.txt286
-rw-r--r--Documentation/arm/sti/overview.txt33
-rw-r--r--Documentation/arm/sti/stih407-overview.txt18
-rw-r--r--Documentation/arm/sti/stih415-overview.txt12
-rw-r--r--Documentation/arm/sti/stih416-overview.txt12
-rw-r--r--Documentation/arm/sti/stih418-overview.txt20
-rw-r--r--Documentation/arm/sunxi/README102
-rw-r--r--Documentation/arm/sunxi/clocks.txt56
-rw-r--r--Documentation/arm/swp_emulation27
-rw-r--r--Documentation/arm/tcm.txt155
-rw-r--r--Documentation/arm/uefi.txt60
-rw-r--r--Documentation/arm/vlocks.txt211
-rw-r--r--Documentation/arm64/acpi_object_usage.txt622
-rw-r--r--Documentation/arm64/arm-acpi.txt519
-rw-r--r--Documentation/arm64/booting.txt253
-rw-r--r--Documentation/arm64/cpu-feature-registers.txt272
-rw-r--r--Documentation/arm64/elf_hwcaps.txt184
-rw-r--r--Documentation/arm64/hugetlbpage.txt38
-rw-r--r--Documentation/arm64/legacy_instructions.txt57
-rw-r--r--Documentation/arm64/memory.txt97
-rw-r--r--Documentation/arm64/silicon-errata.txt80
-rw-r--r--Documentation/arm64/sve.txt508
-rw-r--r--Documentation/arm64/tagged-pointers.txt66
-rw-r--r--Documentation/atomic_bitops.txt16
-rw-r--r--Documentation/atomic_t.txt163
-rw-r--r--Documentation/auxdisplay/cfag12864b105
-rw-r--r--Documentation/auxdisplay/ks010855
-rw-r--r--Documentation/auxdisplay/lcd-panel-cgram.txt24
-rw-r--r--Documentation/backlight/lp855x-driver.txt66
-rw-r--r--Documentation/block/bfq-iosched.rst606
-rw-r--r--Documentation/block/bfq-iosched.txt561
-rw-r--r--Documentation/block/biodoc.txt1165
-rw-r--r--Documentation/block/biovecs.rst151
-rw-r--r--Documentation/block/biovecs.txt119
-rw-r--r--Documentation/block/blk-mq.rst153
-rw-r--r--Documentation/block/capability.txt15
-rw-r--r--Documentation/block/cfq-iosched.txt291
-rw-r--r--Documentation/block/cmdline-partition.rst56
-rw-r--r--Documentation/block/cmdline-partition.txt46
-rw-r--r--Documentation/block/data-integrity.rst248
-rw-r--r--Documentation/block/data-integrity.txt281
-rw-r--r--Documentation/block/deadline-iosched.rst72
-rw-r--r--Documentation/block/deadline-iosched.txt75
-rw-r--r--Documentation/block/index.rst24
-rw-r--r--Documentation/block/inline-encryption.rst550
-rw-r--r--Documentation/block/ioprio.rst178
-rw-r--r--Documentation/block/ioprio.txt183
-rw-r--r--Documentation/block/kyber-iosched.rst15
-rw-r--r--Documentation/block/kyber-iosched.txt14
-rw-r--r--Documentation/block/null_blk.rst151
-rw-r--r--Documentation/block/null_blk.txt94
-rw-r--r--Documentation/block/pr.rst119
-rw-r--r--Documentation/block/pr.txt119
-rw-r--r--Documentation/block/queue-sysfs.txt197
-rw-r--r--Documentation/block/request.txt88
-rw-r--r--Documentation/block/stat.rst103
-rw-r--r--Documentation/block/stat.txt86
-rw-r--r--Documentation/block/switching-sched.rst35
-rw-r--r--Documentation/block/switching-sched.txt37
-rw-r--r--Documentation/block/ublk.rst441
-rw-r--r--Documentation/block/writeback_cache_control.rst95
-rw-r--r--Documentation/block/writeback_cache_control.txt86
-rw-r--r--Documentation/blockdev/drbd/README.txt16
-rw-r--r--Documentation/blockdev/drbd/data-structure-v9.txt38
-rw-r--r--Documentation/blockdev/drbd/node-states-8.dot14
-rw-r--r--Documentation/blockdev/floppy.txt245
-rw-r--r--Documentation/blockdev/nbd.txt31
-rw-r--r--Documentation/blockdev/paride.txt417
-rw-r--r--Documentation/blockdev/ramdisk.txt174
-rw-r--r--Documentation/blockdev/zram.txt271
-rw-r--r--Documentation/bpf/bpf_design_QA.rst212
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst189
-rw-r--r--Documentation/bpf/bpf_iterators.rst589
-rw-r--r--Documentation/bpf/bpf_licensing.rst92
-rw-r--r--Documentation/bpf/bpf_prog_run.rst117
-rw-r--r--Documentation/bpf/btf.rst1210
-rw-r--r--Documentation/bpf/clang-notes.rst36
-rw-r--r--Documentation/bpf/classic_vs_extended.rst376
-rw-r--r--Documentation/bpf/cpumasks.rst384
-rw-r--r--Documentation/bpf/drgn.rst213
-rw-r--r--Documentation/bpf/faq.rst11
-rw-r--r--Documentation/bpf/fs_kfuncs.rst21
-rw-r--r--Documentation/bpf/graph_ds_impl.rst267
-rw-r--r--Documentation/bpf/helpers.rst7
-rw-r--r--Documentation/bpf/index.rst51
-rw-r--r--Documentation/bpf/kfuncs.rst712
-rw-r--r--Documentation/bpf/libbpf/index.rst33
-rw-r--r--Documentation/bpf/libbpf/libbpf_build.rst37
-rw-r--r--Documentation/bpf/libbpf/libbpf_naming_convention.rst193
-rw-r--r--Documentation/bpf/libbpf/libbpf_overview.rst236
-rw-r--r--Documentation/bpf/libbpf/program_types.rst235
-rw-r--r--Documentation/bpf/linux-notes.rst84
-rw-r--r--Documentation/bpf/llvm_reloc.rst546
-rw-r--r--Documentation/bpf/map_array.rst262
-rw-r--r--Documentation/bpf/map_bloom_filter.rst174
-rw-r--r--Documentation/bpf/map_cgroup_storage.rst169
-rw-r--r--Documentation/bpf/map_cgrp_storage.rst109
-rw-r--r--Documentation/bpf/map_cpumap.rst177
-rw-r--r--Documentation/bpf/map_devmap.rst238
-rw-r--r--Documentation/bpf/map_hash.rst265
-rw-r--r--Documentation/bpf/map_lpm_trie.rst197
-rw-r--r--Documentation/bpf/map_lru_hash_update.dot172
-rw-r--r--Documentation/bpf/map_of_maps.rst130
-rw-r--r--Documentation/bpf/map_queue_stack.rst146
-rw-r--r--Documentation/bpf/map_sk_storage.rst159
-rw-r--r--Documentation/bpf/map_sockmap.rst498
-rw-r--r--Documentation/bpf/map_xskmap.rst192
-rw-r--r--Documentation/bpf/maps.rst82
-rw-r--r--Documentation/bpf/other.rst10
-rw-r--r--Documentation/bpf/prog_cgroup_sockopt.rst162
-rw-r--r--Documentation/bpf/prog_cgroup_sysctl.rst125
-rw-r--r--Documentation/bpf/prog_flow_dissector.rst147
-rw-r--r--Documentation/bpf/prog_lsm.rst143
-rw-r--r--Documentation/bpf/prog_sk_lookup.rst98
-rw-r--r--Documentation/bpf/programs.rst12
-rw-r--r--Documentation/bpf/redirect.rst81
-rw-r--r--Documentation/bpf/ringbuf.rst206
-rw-r--r--Documentation/bpf/s390.rst205
-rw-r--r--Documentation/bpf/standardization/abi.rst28
-rw-r--r--Documentation/bpf/standardization/index.rst18
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst790
-rw-r--r--Documentation/bpf/syscall_api.rst11
-rw-r--r--Documentation/bpf/test_debug.rst9
-rw-r--r--Documentation/bpf/verifier.rst560
-rw-r--r--Documentation/bt8xxgpio.txt62
-rw-r--r--Documentation/bus-devices/ti-gpmc.txt122
-rw-r--r--Documentation/bus-virt-phys-mapping.txt220
-rw-r--r--Documentation/cdrom/Makefile21
-rw-r--r--Documentation/cdrom/cdrom-standard.rst1047
-rw-r--r--Documentation/cdrom/cdrom-standard.tex1026
-rw-r--r--Documentation/cdrom/ide-cd534
-rw-r--r--Documentation/cdrom/index.rst17
-rw-r--r--Documentation/cdrom/packet-writing.txt132
-rw-r--r--Documentation/cgroup-v1/blkio-controller.txt375
-rw-r--r--Documentation/cgroup-v1/cgroups.txt677
-rw-r--r--Documentation/cgroup-v1/cpuacct.txt49
-rw-r--r--Documentation/cgroup-v1/cpusets.txt839
-rw-r--r--Documentation/cgroup-v1/devices.txt116
-rw-r--r--Documentation/cgroup-v1/freezer-subsystem.txt123
-rw-r--r--Documentation/cgroup-v1/hugetlb.txt45
-rw-r--r--Documentation/cgroup-v1/memcg_test.txt280
-rw-r--r--Documentation/cgroup-v1/memory.txt891
-rw-r--r--Documentation/cgroup-v1/net_cls.txt39
-rw-r--r--Documentation/cgroup-v1/net_prio.txt55
-rw-r--r--Documentation/cgroup-v1/pids.txt85
-rw-r--r--Documentation/cgroup-v1/rdma.txt109
-rw-r--r--Documentation/clearing-warn-once.txt7
-rw-r--r--Documentation/cma/debugfs.txt21
-rw-r--r--Documentation/conf.py824
-rw-r--r--Documentation/connector/connector.txt196
-rw-r--r--Documentation/console/console.txt145
-rw-r--r--Documentation/core-api/asm-annotations.rst222
-rw-r--r--Documentation/core-api/assoc_array.rst6
-rw-r--r--Documentation/core-api/atomic_ops.rst664
-rw-r--r--Documentation/core-api/cachetlb.rst113
-rw-r--r--Documentation/core-api/cgroup.rst9
-rw-r--r--Documentation/core-api/circular-buffers.rst2
-rw-r--r--Documentation/core-api/cleanup.rst8
-rw-r--r--Documentation/core-api/conf.py10
-rw-r--r--Documentation/core-api/cpu_hotplug.rst651
-rw-r--r--Documentation/core-api/debugging-via-ohci1394.rst185
-rw-r--r--Documentation/core-api/dma-api-howto.rst935
-rw-r--r--Documentation/core-api/dma-api.rst868
-rw-r--r--Documentation/core-api/dma-attributes.rst150
-rw-r--r--Documentation/core-api/dma-isa-lpc.rst152
-rw-r--r--Documentation/core-api/entry.rst279
-rw-r--r--Documentation/core-api/flexible-arrays.rst130
-rw-r--r--Documentation/core-api/floating-point.rst78
-rw-r--r--Documentation/core-api/folio_queue.rst209
-rw-r--r--Documentation/core-api/genalloc.rst28
-rw-r--r--Documentation/core-api/generic-radix-tree.rst12
-rw-r--r--Documentation/core-api/genericirq.rst58
-rw-r--r--Documentation/core-api/gfp_mask-from-fs-io.rst20
-rw-r--r--Documentation/core-api/idr.rst35
-rw-r--r--Documentation/core-api/index.rst129
-rw-r--r--Documentation/core-api/irq/concepts.rst25
-rw-r--r--Documentation/core-api/irq/index.rst11
-rw-r--r--Documentation/core-api/irq/irq-affinity.rst70
-rw-r--r--Documentation/core-api/irq/irq-domain.rst320
-rw-r--r--Documentation/core-api/irq/irqflags-tracing.rst (renamed from Documentation/irqflags-tracing.txt)0
-rw-r--r--Documentation/core-api/kernel-api.rst139
-rw-r--r--Documentation/core-api/kho/bindings/kho.yaml43
-rw-r--r--Documentation/core-api/kho/bindings/memblock/memblock.yaml39
-rw-r--r--Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml40
-rw-r--r--Documentation/core-api/kho/bindings/sub-fdt.yaml27
-rw-r--r--Documentation/core-api/kho/concepts.rst74
-rw-r--r--Documentation/core-api/kho/fdt.rst80
-rw-r--r--Documentation/core-api/kho/index.rst13
-rw-r--r--Documentation/core-api/kobject.rst434
-rw-r--r--Documentation/core-api/kref.rst328
-rw-r--r--Documentation/core-api/list.rst776
-rw-r--r--Documentation/core-api/local_ops.rst2
-rw-r--r--Documentation/core-api/maple_tree.rst221
-rw-r--r--Documentation/core-api/memory-allocation.rst118
-rw-r--r--Documentation/core-api/memory-hotplug.rst94
-rw-r--r--Documentation/core-api/min_heap.rst302
-rw-r--r--Documentation/core-api/mm-api.rst102
-rw-r--r--Documentation/core-api/netlink.rst102
-rw-r--r--Documentation/core-api/packing.rst345
-rw-r--r--Documentation/core-api/padata.rst178
-rw-r--r--Documentation/core-api/parser.rst17
-rw-r--r--Documentation/core-api/pin_user_pages.rst286
-rw-r--r--Documentation/core-api/printk-basics.rst112
-rw-r--r--Documentation/core-api/printk-formats.rst280
-rw-r--r--Documentation/core-api/printk-index.rst137
-rw-r--r--Documentation/core-api/protection-keys.rst120
-rw-r--r--Documentation/core-api/rbtree.rst429
-rw-r--r--Documentation/core-api/real-time/architecture-porting.rst109
-rw-r--r--Documentation/core-api/real-time/differences.rst242
-rw-r--r--Documentation/core-api/real-time/index.rst16
-rw-r--r--Documentation/core-api/real-time/theory.rst116
-rw-r--r--Documentation/core-api/refcount-vs-atomic.rst89
-rw-r--r--Documentation/core-api/swiotlb.rst321
-rw-r--r--Documentation/core-api/symbol-namespaces.rst168
-rw-r--r--Documentation/core-api/this_cpu_ops.rst347
-rw-r--r--Documentation/core-api/timekeeping.rst21
-rw-r--r--Documentation/core-api/unaligned-memory-access.rst265
-rw-r--r--Documentation/core-api/union_find.rst106
-rw-r--r--Documentation/core-api/watch_queue.rst343
-rw-r--r--Documentation/core-api/workqueue.rst461
-rw-r--r--Documentation/core-api/wrappers/atomic_bitops.rst18
-rw-r--r--Documentation/core-api/wrappers/atomic_t.rst19
-rw-r--r--Documentation/core-api/wrappers/memory-barriers.rst18
-rw-r--r--Documentation/core-api/xarray.rst379
-rw-r--r--Documentation/cpu-freq/amd-powernow.txt38
-rw-r--r--Documentation/cpu-freq/core.rst113
-rw-r--r--Documentation/cpu-freq/core.txt120
-rw-r--r--Documentation/cpu-freq/cpu-drivers.rst285
-rw-r--r--Documentation/cpu-freq/cpu-drivers.txt295
-rw-r--r--Documentation/cpu-freq/cpufreq-nforce2.txt19
-rw-r--r--Documentation/cpu-freq/cpufreq-stats.rst136
-rw-r--r--Documentation/cpu-freq/cpufreq-stats.txt127
-rw-r--r--Documentation/cpu-freq/index.rst36
-rw-r--r--Documentation/cpu-freq/index.txt56
-rw-r--r--Documentation/cpu-freq/pcc-cpufreq.txt207
-rw-r--r--Documentation/cpu-load.txt114
-rw-r--r--Documentation/cpuidle/core.txt23
-rw-r--r--Documentation/cpuidle/driver.txt37
-rw-r--r--Documentation/cpuidle/governor.txt28
-rw-r--r--Documentation/cpuidle/sysfs.txt98
-rw-r--r--Documentation/cputopology.txt159
-rw-r--r--Documentation/crc32.txt189
-rw-r--r--Documentation/crypto/api-aead.rst3
-rw-r--r--Documentation/crypto/api-akcipher.rst7
-rw-r--r--Documentation/crypto/api-digest.rst3
-rw-r--r--Documentation/crypto/api-intro.rst262
-rw-r--r--Documentation/crypto/api-intro.txt250
-rw-r--r--Documentation/crypto/api-kpp.rst3
-rw-r--r--Documentation/crypto/api-rng.rst3
-rw-r--r--Documentation/crypto/api-samples.rst177
-rw-r--r--Documentation/crypto/api-sig.rst18
-rw-r--r--Documentation/crypto/api-skcipher.rst36
-rw-r--r--Documentation/crypto/api.rst15
-rw-r--r--Documentation/crypto/architecture.rst43
-rw-r--r--Documentation/crypto/asymmetric-keys.rst424
-rw-r--r--Documentation/crypto/asymmetric-keys.txt429
-rw-r--r--Documentation/crypto/async-tx-api.rst270
-rw-r--r--Documentation/crypto/async-tx-api.txt225
-rw-r--r--Documentation/crypto/conf.py10
-rw-r--r--Documentation/crypto/crypto_engine.rst106
-rw-r--r--Documentation/crypto/descore-readme.rst414
-rw-r--r--Documentation/crypto/descore-readme.txt352
-rw-r--r--Documentation/crypto/devel-algos.rst71
-rw-r--r--Documentation/crypto/device_drivers/index.rst9
-rw-r--r--Documentation/crypto/device_drivers/octeontx2.rst25
-rw-r--r--Documentation/crypto/index.rst18
-rw-r--r--Documentation/crypto/krb5.rst262
-rw-r--r--Documentation/crypto/userspace-if.rst39
-rw-r--r--Documentation/debugging-modules.txt22
-rw-r--r--Documentation/debugging-via-ohci1394.txt185
-rw-r--r--Documentation/dell_rbu.txt128
-rw-r--r--Documentation/dev-tools/autofdo.rst168
-rw-r--r--Documentation/dev-tools/checkpatch.rst1259
-rw-r--r--Documentation/dev-tools/checkuapi.rst477
-rw-r--r--Documentation/dev-tools/clang-format.rst184
-rw-r--r--Documentation/dev-tools/coccinelle.rst96
-rw-r--r--Documentation/dev-tools/conf.py10
-rw-r--r--Documentation/dev-tools/gcov.rst43
-rw-r--r--Documentation/dev-tools/gdb-kernel-debugging.rst175
-rw-r--r--Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst93
-rw-r--r--Documentation/dev-tools/index.rst26
-rw-r--r--Documentation/dev-tools/kasan.rst656
-rw-r--r--Documentation/dev-tools/kcov.rst229
-rw-r--r--Documentation/dev-tools/kcsan.rst377
-rw-r--r--Documentation/dev-tools/kfence.rst340
-rw-r--r--Documentation/dev-tools/kmemleak.rst53
-rw-r--r--Documentation/dev-tools/kmsan.rst435
-rw-r--r--Documentation/dev-tools/kselftest.rst310
-rw-r--r--Documentation/dev-tools/ktap.rst314
-rw-r--r--Documentation/dev-tools/kunit/api/clk.rst10
-rw-r--r--Documentation/dev-tools/kunit/api/functionredirection.rst162
-rw-r--r--Documentation/dev-tools/kunit/api/index.rst48
-rw-r--r--Documentation/dev-tools/kunit/api/of.rst13
-rw-r--r--Documentation/dev-tools/kunit/api/platformdevice.rst10
-rw-r--r--Documentation/dev-tools/kunit/api/resource.rst22
-rw-r--r--Documentation/dev-tools/kunit/api/test.rst10
-rw-r--r--Documentation/dev-tools/kunit/architecture.rst196
-rw-r--r--Documentation/dev-tools/kunit/faq.rst104
-rw-r--r--Documentation/dev-tools/kunit/index.rst109
-rw-r--r--Documentation/dev-tools/kunit/kunit_suitememorydiagram.svg81
-rw-r--r--Documentation/dev-tools/kunit/run_manual.rst100
-rw-r--r--Documentation/dev-tools/kunit/run_wrapper.rst337
-rw-r--r--Documentation/dev-tools/kunit/running_tips.rst448
-rw-r--r--Documentation/dev-tools/kunit/start.rst309
-rw-r--r--Documentation/dev-tools/kunit/style.rst213
-rw-r--r--Documentation/dev-tools/kunit/usage.rst1214
-rw-r--r--Documentation/dev-tools/lkmm/docs/access-marking.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/cheatsheet.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/control-dependencies.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/explanation.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/glossary.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/herd-representation.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/index.rst21
-rw-r--r--Documentation/dev-tools/lkmm/docs/litmus-tests.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/locking.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/ordering.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/readme.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/recipes.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/references.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/simple.rst11
-rw-r--r--Documentation/dev-tools/lkmm/index.rst15
-rw-r--r--Documentation/dev-tools/lkmm/readme.rst11
-rw-r--r--Documentation/dev-tools/propeller.rst162
-rw-r--r--Documentation/dev-tools/sparse.rst13
-rw-r--r--Documentation/dev-tools/testing-devices.rst47
-rw-r--r--Documentation/dev-tools/testing-overview.rst182
-rw-r--r--Documentation/dev-tools/ubsan.rst35
-rw-r--r--Documentation/device-mapper/cache-policies.txt121
-rw-r--r--Documentation/device-mapper/cache.txt308
-rw-r--r--Documentation/device-mapper/delay.txt28
-rw-r--r--Documentation/device-mapper/dm-crypt.txt162
-rw-r--r--Documentation/device-mapper/dm-flakey.txt57
-rw-r--r--Documentation/device-mapper/dm-integrity.txt203
-rw-r--r--Documentation/device-mapper/dm-io.txt75
-rw-r--r--Documentation/device-mapper/dm-log.txt54
-rw-r--r--Documentation/device-mapper/dm-queue-length.txt39
-rw-r--r--Documentation/device-mapper/dm-raid.txt354
-rw-r--r--Documentation/device-mapper/dm-service-time.txt91
-rw-r--r--Documentation/device-mapper/dm-uevent.txt97
-rw-r--r--Documentation/device-mapper/dm-zoned.txt144
-rw-r--r--Documentation/device-mapper/era.txt108
-rw-r--r--Documentation/device-mapper/kcopyd.txt47
-rw-r--r--Documentation/device-mapper/linear.txt61
-rw-r--r--Documentation/device-mapper/log-writes.txt140
-rw-r--r--Documentation/device-mapper/persistent-data.txt84
-rw-r--r--Documentation/device-mapper/snapshot.txt176
-rw-r--r--Documentation/device-mapper/statistics.txt223
-rw-r--r--Documentation/device-mapper/striped.txt57
-rw-r--r--Documentation/device-mapper/switch.txt138
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt411
-rw-r--r--Documentation/device-mapper/unstriped.txt124
-rw-r--r--Documentation/device-mapper/verity.txt219
-rw-r--r--Documentation/device-mapper/writecache.txt70
-rw-r--r--Documentation/device-mapper/zero.txt37
-rw-r--r--Documentation/devicetree/bindings/.gitignore9
-rw-r--r--Documentation/devicetree/bindings/.yamllint44
-rw-r--r--Documentation/devicetree/bindings/ABI.rst42
-rw-r--r--Documentation/devicetree/bindings/ABI.txt39
-rw-r--r--Documentation/devicetree/bindings/Makefile85
-rw-r--r--Documentation/devicetree/bindings/access-controllers/access-controllers.yaml84
-rw-r--r--Documentation/devicetree/bindings/arc/archs-pct.txt17
-rw-r--r--Documentation/devicetree/bindings/arc/snps,archs-pct.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/actions.txt56
-rw-r--r--Documentation/devicetree/bindings/arm/actions.yaml53
-rw-r--r--Documentation/devicetree/bindings/arm/airoha,en7581-chip-scu.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/airoha.yaml32
-rw-r--r--Documentation/devicetree/bindings/arm/al,alpine.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/altera.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/altera.yaml69
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml133
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-controller.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-system.txt13
-rw-r--r--Documentation/devicetree/bindings/arm/amazon,al.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/amd,pensando.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic,scpi.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.txt138
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.yaml277
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml62
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/analog-top.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/assist.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/bootrom.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/pmu.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt32
-rw-r--r--Documentation/devicetree/bindings/arm/apm/scu.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/apple.yaml352
-rw-r--r--Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml147
-rw-r--r--Documentation/devicetree/bindings/arm/arm,cci-400.yaml211
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml104
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml81
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml352
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml77
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml81
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml133
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml133
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml99
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml163
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml97
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml128
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml104
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml167
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml98
-rw-r--r--Documentation/devicetree/bindings/arm/arm,corstone1000.yaml45
-rw-r--r--Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml77
-rw-r--r--Documentation/devicetree/bindings/arm/arm,integrator.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/arm,juno-fpga-apb-regs.yaml61
-rw-r--r--Documentation/devicetree/bindings/arm/arm,morello.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/arm,realview.yaml88
-rw-r--r--Documentation/devicetree/bindings/arm/arm,scmi.txt179
-rw-r--r--Documentation/devicetree/bindings/arm/arm,scpi.txt219
-rw-r--r--Documentation/devicetree/bindings/arm/arm,scu.yaml46
-rw-r--r--Documentation/devicetree/bindings/arm/arm,trace-buffer-extension.yaml50
-rw-r--r--Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/arm,versatile.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml231
-rw-r--r--Documentation/devicetree/bindings/arm/arm-boards237
-rw-r--r--Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/armadeus.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/aspeed/aspeed,sbc.yaml37
-rw-r--r--Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml112
-rw-r--r--Documentation/devicetree/bindings/arm/atmel,sama5d2-secumod.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.txt72
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.yaml294
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-sysregs.txt159
-rw-r--r--Documentation/devicetree/bindings/arm/axentia.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/axiado.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/axis.txt29
-rw-r--r--Documentation/devicetree/bindings/arm/axis.yaml36
-rw-r--r--Documentation/devicetree/bindings/arm/axxia.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/axxia.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml66
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt36
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550-cpu-method.txt36
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt63
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml114
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm53573.yaml39
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt4
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml153
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml31
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,hr2.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml30
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,ns2.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml25
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,nsp-cpu-method.txt39
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,nsp.txt34
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml83
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,stingray.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml24
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml167
-rw-r--r--Documentation/devicetree/bindings/arm/bhf.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/bitmain.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/blaize.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda.yaml24
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/hb-sregs.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/l2ecc.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/cavium,thunder-88xx.yaml19
-rw-r--r--Documentation/devicetree/bindings/arm/cavium-thunder.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/cavium-thunder2.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/cci-control-port.yaml38
-rw-r--r--Documentation/devicetree/bindings/arm/cci.txt224
-rw-r--r--Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml38
-rw-r--r--Documentation/devicetree/bindings/arm/cix.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/compulab-boards.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt49
-rw-r--r--Documentation/devicetree/bindings/arm/coresight.txt317
-rw-r--r--Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp10
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.txt490
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.yaml701
-rw-r--r--Documentation/devicetree/bindings/arm/davinci.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/digicolor.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/digicolor.yaml20
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml67
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.yaml46
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx51-m4if.yaml41
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-pm.yaml50
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-sim.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt183
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/m4if.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/tigerp.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.txt229
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.yaml1784
-rw-r--r--Documentation/devicetree/bindings/arm/fw-cfg.txt38
-rw-r--r--Documentation/devicetree/bindings/arm/gemini.txt108
-rw-r--r--Documentation/devicetree/bindings/arm/gemini.yaml95
-rw-r--r--Documentation/devicetree/bindings/arm/google.yaml54
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/cpuctrl.yaml74
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/hi3798cv200-perictrl.yaml64
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/hi6220-domain-ctrl.yaml68
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-bootwrapper.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-fabric.yaml27
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/pctrl.yaml34
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/controller/sysctrl.yaml149
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt33
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt319
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml61
-rw-r--r--Documentation/devicetree/bindings/arm/hpe,gxp.yaml27
-rw-r--r--Documentation/devicetree/bindings/arm/i2se.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/idle-states.txt699
-rw-r--r--Documentation/devicetree/bindings/arm/intel,keembay.yaml24
-rw-r--r--Documentation/devicetree/bindings/arm/intel,socfpga.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml45
-rw-r--r--Documentation/devicetree/bindings/arm/juno,scpi.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/keystone.txt42
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml46
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/ti,sci.txt85
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml124
-rw-r--r--Documentation/devicetree/bindings/arm/l2c2x0.txt114
-rw-r--r--Documentation/devicetree/bindings/arm/linux,dummy-virt.yaml20
-rw-r--r--Documentation/devicetree/bindings/arm/marvell,berlin.yaml45
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/98dx3236.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt138
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt146
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-370-xp.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-375.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt50
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-39x.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml128
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-8kp.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt52
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/kirkwood.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,ac5.yaml32
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada-370-xp.yaml78
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada375.yaml21
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada390.yaml32
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,dove.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.txt105
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.yaml266
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.yaml37
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek.txt79
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek.yaml458
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt33
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.yaml167
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt35
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,jpgdecsys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mcucfg.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml145
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml105
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml43
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt34
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vencltsys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/microchip,sparx5.yaml67
-rw-r--r--Documentation/devicetree/bindings/arm/moxart.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/moxart.yaml20
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/feroceon.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/mrvl.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml46
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/tauros2.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt84
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt49
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt44
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt41
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt57
-rw-r--r--Documentation/devicetree/bindings/arm/msm/ssbi.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/mstar/mstar,l3bridge.yaml44
-rw-r--r--Documentation/devicetree/bindings/arm/mstar/mstar,smpctrl.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/mstar/mstar.yaml46
-rw-r--r--Documentation/devicetree/bindings/arm/npcm/npcm.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/nspire.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/nuvoton/nuvoton,ma35d1.yaml30
-rw-r--r--Documentation/devicetree/bindings/arm/nuvoton/nuvoton,npcm.yaml36
-rw-r--r--Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml71
-rw-r--r--Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/olimex.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/omap/crossbar.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/omap/ctrl.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/omap/mpu.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt101
-rw-r--r--Documentation/devicetree/bindings/arm/omap/prcm.txt7
-rw-r--r--Documentation/devicetree/bindings/arm/omap/prm-inst.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/oxnas.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/picoxcell.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt70
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.yaml133
-rw-r--r--Documentation/devicetree/bindings/arm/primecell.txt46
-rw-r--r--Documentation/devicetree/bindings/arm/primecell.yaml39
-rw-r--r--Documentation/devicetree/bindings/arm/psci.txt111
-rw-r--r--Documentation/devicetree/bindings/arm/psci.yaml264
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml88
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-remote-etm.yaml55
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tnoc.yaml113
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml128
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml150
-rw-r--r--Documentation/devicetree/bindings/arm/qcom-soc.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/qcom.txt57
-rw-r--r--Documentation/devicetree/bindings/arm/qcom.yaml1226
-rw-r--r--Documentation/devicetree/bindings/arm/rda.yaml24
-rw-r--r--Documentation/devicetree/bindings/arm/realtek.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/realtek.yaml60
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.txt240
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.yaml1282
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/pmu.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/pmu.yaml77
-rw-r--r--Documentation/devicetree/bindings/arm/rtsm-dcscb.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/pmu.txt72
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt83
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml271
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-soc.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/sysreg.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/scu.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/shmobile.txt169
-rw-r--r--Documentation/devicetree/bindings/arm/sirf.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml25
-rw-r--r--Documentation/devicetree/bindings/arm/socionext/synquacer.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/socionext/uniphier.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/sp810.txt46
-rw-r--r--Documentation/devicetree/bindings/arm/sp810.yaml80
-rw-r--r--Documentation/devicetree/bindings/arm/spe-pmu.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/spear-misc.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/spear.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/spear.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/sprd.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/sprd/sprd.yaml45
-rw-r--r--Documentation/devicetree/bindings/arm/ste-u300.txt46
-rw-r--r--Documentation/devicetree/bindings/arm/sti.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/sti.yaml29
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml71
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml76
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/stm32.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/stm32.yaml218
-rw-r--r--Documentation/devicetree/bindings/arm/sunplus,sp7021.yaml29
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.txt21
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml1114
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml168
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml38
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt44
-rw-r--r--Documentation/devicetree/bindings/arm/swir.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/syna.txt105
-rw-r--r--Documentation/devicetree/bindings/arm/technologic.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.txt65
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.yaml267
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,nvec.txt21
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml50
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt129
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml217
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml97
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt100
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt300
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml74
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt32
-rw-r--r--Documentation/devicetree/bindings/arm/tesla.yaml27
-rw-r--r--Documentation/devicetree/bindings/arm/ti/k3.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/ti/k3.yaml215
-rw-r--r--Documentation/devicetree/bindings/arm/ti/nspire.yaml27
-rw-r--r--Documentation/devicetree/bindings/arm/ti/omap.yaml186
-rw-r--r--Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml29
-rw-r--r--Documentation/devicetree/bindings/arm/ti/ti,keystone.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/topology.txt475
-rw-r--r--Documentation/devicetree/bindings/arm/toshiba.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/uniphier/cache-uniphier.txt60
-rw-r--r--Documentation/devicetree/bindings/arm/ux500.yaml73
-rw-r--r--Documentation/devicetree/bindings/arm/ux500/boards.txt4
-rw-r--r--Documentation/devicetree/bindings/arm/versatile-sysreg.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress-config.yaml285
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress-sysreg.txt103
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml96
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress.txt229
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500.yaml29
-rw-r--r--Documentation/devicetree/bindings/arm/xen.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/xilinx.txt83
-rw-r--r--Documentation/devicetree/bindings/arm/zte,sysctrl.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/zte.txt14
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-ceva.txt59
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-common.yaml123
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-da850.txt18
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-dm816.txt21
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-fsl-qoriq.txt21
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-mtk.txt51
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.txt91
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.yaml208
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-st.txt35
-rw-r--r--Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml47
-rw-r--r--Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml67
-rw-r--r--Documentation/devicetree/bindings/ata/apm,xgene-ahci.yaml65
-rw-r--r--Documentation/devicetree/bindings/ata/apm-xgene.txt77
-rw-r--r--Documentation/devicetree/bindings/ata/arasan,cf-spear1340.yaml70
-rw-r--r--Documentation/devicetree/bindings/ata/ata-generic.yaml58
-rw-r--r--Documentation/devicetree/bindings/ata/atmel-at91_cf.txt19
-rw-r--r--Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml115
-rw-r--r--Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt38
-rw-r--r--Documentation/devicetree/bindings/ata/brcm,sata-brcm.yaml87
-rw-r--r--Documentation/devicetree/bindings/ata/cavium,ebt3000-compact-flash.yaml59
-rw-r--r--Documentation/devicetree/bindings/ata/cavium-compact-flash.txt30
-rw-r--r--Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml187
-rw-r--r--Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml42
-rw-r--r--Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.txt55
-rw-r--r--Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml107
-rw-r--r--Documentation/devicetree/bindings/ata/exynos-sata.txt30
-rw-r--r--Documentation/devicetree/bindings/ata/faraday,ftide010.txt38
-rw-r--r--Documentation/devicetree/bindings/ata/faraday,ftide010.yaml91
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,ahci.yaml64
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,imx-pata.yaml42
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,pq-sata.yaml60
-rw-r--r--Documentation/devicetree/bindings/ata/fsl-sata.txt28
-rw-r--r--Documentation/devicetree/bindings/ata/imx-pata.txt16
-rw-r--r--Documentation/devicetree/bindings/ata/imx-sata.txt37
-rw-r--r--Documentation/devicetree/bindings/ata/imx-sata.yaml133
-rw-r--r--Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml62
-rw-r--r--Documentation/devicetree/bindings/ata/marvell,orion-sata.yaml83
-rw-r--r--Documentation/devicetree/bindings/ata/marvell.txt22
-rw-r--r--Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml98
-rw-r--r--Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml175
-rw-r--r--Documentation/devicetree/bindings/ata/nvidia,tegra124-ahci.txt44
-rw-r--r--Documentation/devicetree/bindings/ata/pata-arasan.txt37
-rw-r--r--Documentation/devicetree/bindings/ata/pata-common.yaml53
-rw-r--r--Documentation/devicetree/bindings/ata/qcom-sata.txt48
-rw-r--r--Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml84
-rw-r--r--Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml130
-rw-r--r--Documentation/devicetree/bindings/ata/sata-common.yaml57
-rw-r--r--Documentation/devicetree/bindings/ata/sata_highbank.txt44
-rw-r--r--Documentation/devicetree/bindings/ata/sata_highbank.yaml95
-rw-r--r--Documentation/devicetree/bindings/ata/sata_rcar.txt33
-rw-r--r--Documentation/devicetree/bindings/ata/snps,dwc-ahci-common.yaml106
-rw-r--r--Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml80
-rw-r--r--Documentation/devicetree/bindings/ata/st,ahci.yaml72
-rw-r--r--Documentation/devicetree/bindings/ata/ti,da850-ahci.yaml39
-rw-r--r--Documentation/devicetree/bindings/ata/ti,dm816-ahci.yaml43
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml44
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt18
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/gpio-7-segment.yaml55
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt45
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml127
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml104
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml55
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt17
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/maxim,max6959.yaml44
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/modtronix,lcd2s.yaml58
-rw-r--r--Documentation/devicetree/bindings/board/fsl,bcsr.yaml32
-rw-r--r--Documentation/devicetree/bindings/board/fsl,fpga-qixis-i2c.yaml70
-rw-r--r--Documentation/devicetree/bindings/board/fsl,fpga-qixis.yaml81
-rw-r--r--Documentation/devicetree/bindings/board/fsl-board.txt111
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml88
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml82
-rw-r--r--Documentation/devicetree/bindings/bus/arm,integrator-ap-lm.yaml83
-rw-r--r--Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml37
-rw-r--r--Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml90
-rw-r--r--Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml107
-rw-r--r--Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt33
-rw-r--r--Documentation/devicetree/bindings/bus/brcm,gisb-arb.yaml67
-rw-r--r--Documentation/devicetree/bindings/bus/fsl,imx8mp-aipstz.yaml104
-rw-r--r--Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml232
-rw-r--r--Documentation/devicetree/bindings/bus/fsl,spba-bus.yaml68
-rw-r--r--Documentation/devicetree/bindings/bus/imx-weim.txt82
-rw-r--r--Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml66
-rw-r--r--Documentation/devicetree/bindings/bus/moxtet.txt46
-rw-r--r--Documentation/devicetree/bindings/bus/mti,mips-cdmm.yaml37
-rw-r--r--Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.txt44
-rw-r--r--Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml84
-rw-r--r--Documentation/devicetree/bindings/bus/palmbus.yaml80
-rw-r--r--Documentation/devicetree/bindings/bus/qcom,ebi2.txt138
-rw-r--r--Documentation/devicetree/bindings/bus/qcom,ssbi.yaml63
-rw-r--r--Documentation/devicetree/bindings/bus/qcom,ssc-block-bus.yaml144
-rw-r--r--Documentation/devicetree/bindings/bus/renesas,bsc.txt46
-rw-r--r--Documentation/devicetree/bindings/bus/renesas,bsc.yaml74
-rw-r--r--Documentation/devicetree/bindings/bus/simple-pm-bus.txt44
-rw-r--r--Documentation/devicetree/bindings/bus/simple-pm-bus.yaml77
-rw-r--r--Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml103
-rw-r--r--Documentation/devicetree/bindings/bus/st,stm32-etzpc.yaml96
-rw-r--r--Documentation/devicetree/bindings/bus/st,stm32mp25-rifsc.yaml105
-rw-r--r--Documentation/devicetree/bindings/bus/sun50i-de2-bus.txt37
-rw-r--r--Documentation/devicetree/bindings/bus/sunxi-rsb.txt47
-rw-r--r--Documentation/devicetree/bindings/bus/ti-sysc.txt135
-rw-r--r--Documentation/devicetree/bindings/bus/ti-sysc.yaml215
-rw-r--r--Documentation/devicetree/bindings/bus/uniphier-system-bus.txt66
-rw-r--r--Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml82
-rw-r--r--Documentation/devicetree/bindings/c6x/clocks.txt40
-rw-r--r--Documentation/devicetree/bindings/c6x/dscr.txt127
-rw-r--r--Documentation/devicetree/bindings/c6x/emifa.txt62
-rw-r--r--Documentation/devicetree/bindings/c6x/soc.txt28
-rw-r--r--Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml103
-rw-r--r--Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml63
-rw-r--r--Documentation/devicetree/bindings/cache/freescale-l2cache.txt55
-rw-r--r--Documentation/devicetree/bindings/cache/l2c2x0.yaml241
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,kirkwood-cache.yaml45
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,tauros2-cache.yaml39
-rw-r--r--Documentation/devicetree/bindings/cache/qcom,llcc.yaml317
-rw-r--r--Documentation/devicetree/bindings/cache/sifive,ccache0.yaml212
-rw-r--r--Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml101
-rw-r--r--Documentation/devicetree/bindings/cache/starfive,jh8100-starlink-cache.yaml66
-rw-r--r--Documentation/devicetree/bindings/chosen.txt137
-rw-r--r--Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/actions,owl-cmu.txt51
-rw-r--r--Documentation/devicetree/bindings/clock/actions,owl-cmu.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml101
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml108
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml153
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml152
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml87
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml166
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun55i-a523-ccu.yaml136
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun8i-a83t-de2-clk.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml103
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-de-clks.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-clks.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/alphascale,acc.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/alphascale,asm9260-clock-controller.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/altr_socfpga.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml78
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.yaml201
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml120
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt55
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.yaml85
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt45
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.yaml37
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,meson8-clkc.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,meson8-ddr-clkc.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,meson8b-clkc.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml96
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/apm,xgene-device-clock.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/apm,xgene-socpll-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/apple,nco.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml110
-rw-r--r--Documentation/devicetree/bindings/clock/arm-integrator.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/arm-syscon-icst.txt70
-rw-r--r--Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt70
-rw-r--r--Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/artpec6.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/at91-clock.txt62
-rw-r--r--Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml162
-rw-r--r--Documentation/devicetree/bindings/clock/atmel,at91sam9x5-sckc.yaml73
-rw-r--r--Documentation/devicetree/bindings/clock/axi-clkgen.txt25
-rw-r--r--Documentation/devicetree/bindings/clock/axis,artpec6-clkctrl.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/axis,artpec8-clock.yaml213
-rw-r--r--Documentation/devicetree/bindings/clock/baikal,bt1-ccu-div.yaml196
-rw-r--r--Documentation/devicetree/bindings/clock/baikal,bt1-ccu-pll.yaml131
-rw-r--r--Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2711-dvp.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt58
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml40
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.yaml44
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt313
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml417
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt138
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,kona-ccu.yaml181
-rw-r--r--Documentation/devicetree/bindings/clock/calxeda.txt17
-rw-r--r--Documentation/devicetree/bindings/clock/calxeda.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml90
-rw-r--r--Documentation/devicetree/bindings/clock/cirrus,ep7209-clk.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/cirrus,lochnagar.yaml78
-rw-r--r--Documentation/devicetree/bindings/clock/clk-exynos-audss.txt103
-rw-r--r--Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt53
-rw-r--r--Documentation/devicetree/bindings/clock/clock-bindings.txt172
-rw-r--r--Documentation/devicetree/bindings/clock/clps711x-clock.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/cs2000-cp.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/csr,atlas7-car.txt55
-rw-r--r--Documentation/devicetree/bindings/clock/dove-divider-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/efm32-clock.txt11
-rw-r--r--Documentation/devicetree/bindings/clock/emev2-clock.txt98
-rw-r--r--Documentation/devicetree/bindings/clock/exynos3250-clock.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/exynos4-clock.txt86
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5250-clock.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5260-clock.txt190
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5410-clock.txt50
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5420-clock.txt42
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5433-clock.txt484
-rw-r--r--Documentation/devicetree/bindings/clock/exynos7-clock.txt108
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-clock.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-factor-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,imx8-acm.yaml282
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,imx8m-anatop.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,imx93-anatop.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,plldig.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,qoriq-clock-legacy.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,qoriq-clock.yaml207
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,vf610-ccm.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/gated-fixed-clock.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/google,gs101-clock.yaml181
-rw-r--r--Documentation/devicetree/bindings/clock/gpio-gate-clock.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/gpio-mux-clock.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/gpio-mux-clock.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/hi3620-clock.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/hi6220-clock.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/hisilicon,hi3559av100-clock.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/hix5hd2-clock.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/idt,versaclock5.txt91
-rw-r--r--Documentation/devicetree/bindings/clock/idt,versaclock5.yaml195
-rw-r--r--Documentation/devicetree/bindings/clock/img,pistachio-clk.yaml136
-rw-r--r--Documentation/devicetree/bindings/clock/imx1-clock.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/imx1-clock.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/imx21-clock.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/imx21-clock.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/imx23-clock.txt70
-rw-r--r--Documentation/devicetree/bindings/clock/imx23-clock.yaml85
-rw-r--r--Documentation/devicetree/bindings/clock/imx25-clock.txt160
-rw-r--r--Documentation/devicetree/bindings/clock/imx25-clock.yaml178
-rw-r--r--Documentation/devicetree/bindings/clock/imx27-clock.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/imx27-clock.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/imx28-clock.txt93
-rw-r--r--Documentation/devicetree/bindings/clock/imx28-clock.yaml108
-rw-r--r--Documentation/devicetree/bindings/clock/imx31-clock.txt90
-rw-r--r--Documentation/devicetree/bindings/clock/imx31-clock.yaml112
-rw-r--r--Documentation/devicetree/bindings/clock/imx35-clock.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/imx35-clock.yaml131
-rw-r--r--Documentation/devicetree/bindings/clock/imx5-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/imx5-clock.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/imx6q-clock.txt38
-rw-r--r--Documentation/devicetree/bindings/clock/imx6q-clock.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sl-clock.txt10
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sl-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sll-clock.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sll-clock.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sx-clock.txt13
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sx-clock.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/imx6ul-clock.txt13
-rw-r--r--Documentation/devicetree/bindings/clock/imx6ul-clock.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/imx7d-clock.txt13
-rw-r--r--Documentation/devicetree/bindings/clock/imx7d-clock.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml110
-rw-r--r--Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml88
-rw-r--r--Documentation/devicetree/bindings/clock/imx8m-clock.yaml133
-rw-r--r--Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml103
-rw-r--r--Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/imx93-clock.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/ingenic,cgu.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/ingenic,cgu.yaml132
-rw-r--r--Documentation/devicetree/bindings/clock/intc_stratix10.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/intel,agilex.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/intel,agilex5-clkmgr.yaml40
-rw-r--r--Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/intel,stratix10.yaml35
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-pll.txt4
-rw-r--r--Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-ccu.txt77
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-cgu.txt131
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt52
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,axm5516-clks.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,nspire-cx-clock.yaml33
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-370-corediv-clock.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-3700-periph-clock.yaml96
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-3700-tbg-clock.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-3700-uart-clock.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-xp-cpu-clock.yaml44
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,berlin.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,berlin2-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,dove-divider-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mmp2-audio-clock.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mmp2-clock.yaml69
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mmp2.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mvebu-core-clock.yaml94
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,pxa1908.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/marvell-armada-370-gating-clock.yaml227
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max77686.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max9485.txt59
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max9485.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,ethsys.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml87
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt6795-clock.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7621-sysc.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7988-ethwarp.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7988-xfi-pll.yaml48
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8186-clock.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8186-sys-clock.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml93
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8192-clock.yaml191
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8192-sys-clock.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8195-clock.yaml238
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8195-sys-clock.yaml76
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8196-clock.yaml112
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8196-sys-clock.yaml107
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8365-clock.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8365-sys-clock.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mtmips-sysc.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,syscon.yaml112
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,lan966x-gck.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,mpfs-ccc.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,pic32.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,pic32mzda-clk.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,sparx5-dpll.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/milbeaut-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt48
-rw-r--r--Documentation/devicetree/bindings/clock/moxa,moxart-clock.yaml38
-rw-r--r--Documentation/devicetree/bindings/clock/mstar,msc313-cpupll.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/mstar,msc313-mpll.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-core-clock.txt86
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt205
-rw-r--r--Documentation/devicetree/bindings/clock/nspire-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,ma35d1-clk.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.txt100
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt107
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra124-car.yaml110
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra124-dfll.txt83
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra210-car.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,imx95-blk-ctl.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,imx95-display-master-csr.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc1850-ccu.yaml104
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc1850-cgu.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.yaml35
-rw-r--r--Documentation/devicetree/bindings/clock/oxnas,stdclk.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/pistachio-clock.txt123
-rw-r--r--Documentation/devicetree/bindings/clock/prima2-clock.txt73
-rw-r--r--Documentation/devicetree/bindings/clock/pwm-clock.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/pwm-clock.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/qca,ath79-pll.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/qca,ath79-pll.yaml70
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,a53pll.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,a53pll.yaml70
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,a7pll.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,camcc-sm8250.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,camcc.txt18
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml85
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm6125.yaml106
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm8x50.yaml123
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-apq8084.yaml87
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq6018.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq8064.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-mdm9607.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-mdm9615.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8660.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8916.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8953.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8974.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8976.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml69
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc7280.yaml86
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc8280xp.yaml122
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdm660.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdm845.yaml101
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdx65.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8250.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8350.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc.yaml41
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,glymur-dispcc.yaml98
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,glymur-gcc.yaml121
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gpucc-sdm660.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gpucc.yaml96
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,hfpll.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,hfpll.yaml69
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5018-gcc.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml83
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5424-apss-clk.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-cmn-pll.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-nsscc.yaml98
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml88
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,krait-cc.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,krait-cc.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,lcc.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,lcc.yaml120
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-camcc.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-dispcc.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-gcc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-videocc.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,mmcc.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,mmcc.yaml346
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,msm8996-cbf.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml86
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcm2290-dispcc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs404-turingcc.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-dispcc.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-gpucc.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-videocc.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs8300-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qdu1000-ecpricc.yaml68
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qdu1000-gcc.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmcc.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml166
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml73
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml81
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-gcc.yaml85
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sar2130p-gcc.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml107
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscorecc.yaml202
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc8180x-camcc.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc8280xp-lpasscc.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-camcc.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml89
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-lpasscc.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdx75-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-camcc.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-dispcc.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-dispcc.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-lpasscc.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml70
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml95
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-dispcc.yaml86
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-videocc.yaml88
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml101
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-gcc.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8650-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8750-gcc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.txt59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,videocc.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,videocc.yaml166
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,x1e80100-camcc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,x1e80100-gcc.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/qoriq-clock.txt204
-rw-r--r--Documentation/devicetree/bindings/clock/raspberrypi,rp1-clocks.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,5p35023.yaml90
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,9series.yaml113
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml245
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-div6-clock.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt40
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt97
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml153
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,emev2-smu.yaml140
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,h8300-div-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,h8s2678-pll-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt47
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt43
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.yaml73
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r9a08g045-vbattb.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt55
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml101
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt53
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml89
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt35
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/riscv,rpmi-clock.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/riscv,rpmi-mpxy-clock.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt65
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,px30-cru.yaml119
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.txt58
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.yaml76
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.yaml78
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.txt58
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.yaml85
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.yaml76
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt58
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.yaml78
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt68
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml83
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3528-cru.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3562-cru.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml73
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3576-cru.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3588-cru.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt59
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rv1126-cru.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos2200-cmu.yaml247
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml382
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml524
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml272
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos7870-cmu.yaml227
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml193
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml353
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos8895-clock.yaml239
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml164
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml282
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov920-clock.yaml256
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s2mps11.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt55
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c6400-clock.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt76
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml78
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt77
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/sifive/fu540-prci.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si514.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5341.yaml223
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5351.txt126
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5351.yaml265
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si544.txt25
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si544.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si570.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si570.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/skyworks,si521xx.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,cv1800-clk.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2044-clk.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/spacemit,k1-pll.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml159
-rw-r--r--Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml102
-rw-r--r--Documentation/devicetree/bindings/clock/sprd,ums512-clk.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/sprd.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32-rcc.txt132
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32-rcc.yaml144
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt71
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml131
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp21-rcc.yaml199
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp25-rcc.yaml219
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt3
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,flexgen.txt9
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,quadfs.txt3
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7100-audclk.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-aoncrg.yaml107
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-ispcrg.yaml87
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-pll.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-stgcrg.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-syscrg.yaml118
-rw-r--r--Documentation/devicetree/bindings/clock/starfive,jh7110-voutcrg.yaml90
-rw-r--r--Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt80
-rw-r--r--Documentation/devicetree/bindings/clock/stericsson,abx500.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml178
-rw-r--r--Documentation/devicetree/bindings/clock/sun8i-de2.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/sun9i-de.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/sun9i-usb.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/sunplus,sp7021-clkc.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/sunxi-ccu.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/sunxi.txt225
-rw-r--r--Documentation/devicetree/bindings/clock/tango4-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml198
-rw-r--r--Documentation/devicetree/bindings/clock/thead,th1520-clk-ap.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/ti,am62-audio-refclk.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/ti,am654-ehrpwm-tbclk.yaml39
-rw-r--r--Documentation/devicetree/bindings/clock/ti,cdce706.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti,cdce925.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/ti,cdce925.yaml103
-rw-r--r--Documentation/devicetree/bindings/clock/ti,clkctrl.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/ti,lmk04832.yaml209
-rw-r--r--Documentation/devicetree/bindings/clock/ti,sci-clk.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/ti,sci-clk.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/ti-clkctrl.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/ti/adpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/apll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/autoidle.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/ti/clockdomain.txt5
-rw-r--r--Documentation/devicetree/bindings/clock/ti/composite.txt54
-rw-r--r--Documentation/devicetree/bindings/clock/ti/davinci/pll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/davinci/psc.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/divider.txt117
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dpll.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dra7-atl.txt6
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fapll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt43
-rw-r--r--Documentation/devicetree/bindings/clock/ti/gate.txt106
-rw-r--r--Documentation/devicetree/bindings/clock/ti/interface.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/ti/mux.txt79
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,autoidle.yaml34
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,composite-clock.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml179
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,fixed-factor-clock.yaml76
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml125
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,interface-clock.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml125
-rw-r--r--Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/uniphier-clock.txt132
-rw-r--r--Documentation/devicetree/bindings/clock/ux500.txt64
-rw-r--r--Documentation/devicetree/bindings/clock/vf610-clock.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/xgene.txt131
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,clocking-wizard.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,vcu.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml146
-rw-r--r--Documentation/devicetree/bindings/clock/zx296702-clk.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/zx296718-clk.txt37
-rw-r--r--Documentation/devicetree/bindings/common-properties.txt17
-rw-r--r--Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml88
-rw-r--r--Documentation/devicetree/bindings/connector/samsung,usb-connector-11pin.txt49
-rw-r--r--Documentation/devicetree/bindings/connector/usb-connector.txt119
-rw-r--r--Documentation/devicetree/bindings/connector/usb-connector.yaml531
-rw-r--r--Documentation/devicetree/bindings/counter/fsl,ftm-quaddec.yaml36
-rw-r--r--Documentation/devicetree/bindings/counter/interrupt-counter.yaml62
-rw-r--r--Documentation/devicetree/bindings/counter/ti,am62-ecap-capture.yaml61
-rw-r--r--Documentation/devicetree/bindings/counter/ti-eqep.yaml67
-rw-r--r--Documentation/devicetree/bindings/cpu/cpu-capacity.txt (renamed from Documentation/devicetree/bindings/arm/cpu-capacity.txt)22
-rw-r--r--Documentation/devicetree/bindings/cpu/idle-states.yaml928
-rw-r--r--Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml55
-rw-r--r--Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml128
-rw-r--r--Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt2
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt60
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml70
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt243
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml406
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt6
-rw-r--r--Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt37
-rw-r--r--Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml82
-rw-r--r--Documentation/devicetree/bindings/cpufreq/nvidia,tegra124-cpufreq.txt6
-rw-r--r--Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt56
-rw-r--r--Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml211
-rw-r--r--Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml48
-rw-r--r--Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt128
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml95
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml103
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml60
-rw-r--r--Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml38
-rw-r--r--Documentation/devicetree/bindings/crypto/amd-ccp.txt17
-rw-r--r--Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml52
-rw-r--r--Documentation/devicetree/bindings/crypto/arm,cryptocell.yaml53
-rw-r--r--Documentation/devicetree/bindings/crypto/arm-cryptocell.txt21
-rw-r--r--Documentation/devicetree/bindings/crypto/artpec6-crypto.txt16
-rw-r--r--Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml53
-rw-r--r--Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml49
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml72
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml66
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml70
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel-crypto.txt81
-rw-r--r--Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml39
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt22
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml44
-rw-r--r--Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml50
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0-mon.yaml167
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml318
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec2.0.yaml144
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-dcp.txt16
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-dcp.yaml56
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt15
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml74
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt21
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-scc.yaml54
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec2.txt65
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec4.txt553
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec6.txt157
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml134
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt67
-rw-r--r--Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml69
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure,safexcel-eip93.yaml67
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure,safexcel.yaml88
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt40
-rw-r--r--Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml56
-rw-r--r--Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml45
-rw-r--r--Documentation/devicetree/bindings/crypto/intel,keembay-ocs-ecc.yaml47
-rw-r--r--Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml46
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml133
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell-cesa.txt44
-rw-r--r--Documentation/devicetree/bindings/crypto/mediatek-crypto.txt25
-rw-r--r--Documentation/devicetree/bindings/crypto/mv_cesa.txt32
-rw-r--r--Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml52
-rw-r--r--Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml52
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-aes.txt31
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-des.txt30
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-sham.txt28
-rw-r--r--Documentation/devicetree/bindings/crypto/picochip-spacc.txt21
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml49
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,prng.txt19
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,prng.yaml69
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom-qce.txt25
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom-qce.yaml177
-rw-r--r--Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml127
-rw-r--r--Documentation/devicetree/bindings/crypto/rockchip-crypto.txt28
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml45
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung-sss.txt32
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung-sss.yaml57
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-crc.txt16
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml38
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt19
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml74
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-hash.txt30
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml94
-rw-r--r--Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml96
-rw-r--r--Documentation/devicetree/bindings/crypto/sun4i-ss.txt23
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml50
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml56
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml58
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml65
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml98
-rw-r--r--Documentation/devicetree/bindings/crypto/xlnx,versal-trng.yaml35
-rw-r--r--Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml37
-rw-r--r--Documentation/devicetree/bindings/csky/pmu.txt38
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt26
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt147
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml74
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt18
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml48
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml169
-rw-r--r--Documentation/devicetree/bindings/devfreq/exynos-bus.txt423
-rw-r--r--Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml126
-rw-r--r--Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt213
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml272
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml117
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml125
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml170
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml677
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml125
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml136
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml140
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml254
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml329
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml120
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt115
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml157
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-g12a-dw-mipi-dsi.yaml118
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt116
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml150
-rw-r--r--Documentation/devicetree/bindings/display/apple,h7-display-pipe-mipi.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/apple,h7-display-pipe.yaml88
-rw-r--r--Documentation/devicetree/bindings/display/arm,hdlcd.txt79
-rw-r--r--Documentation/devicetree/bindings/display/arm,hdlcd.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/arm,komeda.yaml131
-rw-r--r--Documentation/devicetree/bindings/display/arm,malidp.txt65
-rw-r--r--Documentation/devicetree/bindings/display/arm,malidp.yaml119
-rw-r--r--Documentation/devicetree/bindings/display/arm,pl11x.txt109
-rw-r--r--Documentation/devicetree/bindings/display/arm,pl11x.yaml173
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc-display.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc.txt88
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt75
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt174
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml143
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-dpi.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml85
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-hvs.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-pixelvalve0.yaml48
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-txp.yaml40
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-vc4.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml49
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt50
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt142
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7511.yaml237
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml188
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml187
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml109
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,dp.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix_dp.txt51
-rw-r--r--Documentation/devicetree/bindings/display/bridge/anx6345.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/bridge/anx7814.txt38
-rw-r--r--Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt133
-rw-r--r--Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml180
-rw-r--r--Documentation/devicetree/bindings/display/bridge/cdns,mhdp8546.yaml164
-rw-r--r--Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml123
-rw-r--r--Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml77
-rw-r--r--Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt50
-rw-r--r--Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt33
-rw-r--r--Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt32
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8mp-hdmi-tx.yaml102
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-ldb.yaml173
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-combiner.yaml144
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-link.yaml144
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pxl2dpi.yaml108
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml122
-rw-r--r--Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml80
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml84
-rw-r--r--Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it6263.yaml251
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml156
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml132
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml124
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml117
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lontium,lt9611.yaml124
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml188
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt70
-rw-r--r--Documentation/devicetree/bindings/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml111
-rw-r--r--Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt91
-rw-r--r--Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml218
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nxp,ptn3460.yaml106
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml116
-rw-r--r--Documentation/devicetree/bindings/display/bridge/parade,ps8622.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ps8622.txt31
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ps8640.yaml121
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ptn3460.txt39
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml120
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml248
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt83
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml130
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt70
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml235
-rw-r--r--Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml309
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sii902x.txt35
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sii9234.txt49
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil,sii8620.yaml108
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil,sii9022.yaml147
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil,sii9234.yaml110
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil-sii8620.txt33
-rw-r--r--Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/bridge/solomon,ssd2825.yaml141
-rw-r--r--Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml66
-rw-r--r--Documentation/devicetree/bindings/display/bridge/tda998x.txt54
-rw-r--r--Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt60
-rw-r--r--Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt50
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,dlpc3433.yaml117
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml191
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt87
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml273
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,tdp158.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt46
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,tfp410.yaml119
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt51
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml87
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt35
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt53
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml204
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml131
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml233
-rw-r--r--Documentation/devicetree/bindings/display/bridge/waveshare,dsi2dpi.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt4
-rw-r--r--Documentation/devicetree/bindings/display/connector/analog-tv-connector.txt25
-rw-r--r--Documentation/devicetree/bindings/display/connector/analog-tv-connector.yaml53
-rw-r--r--Documentation/devicetree/bindings/display/connector/dp-connector.yaml105
-rw-r--r--Documentation/devicetree/bindings/display/connector/dvi-connector.txt36
-rw-r--r--Documentation/devicetree/bindings/display/connector/dvi-connector.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/connector/hdmi-connector.txt30
-rw-r--r--Documentation/devicetree/bindings/display/connector/hdmi-connector.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/connector/vga-connector.txt36
-rw-r--r--Documentation/devicetree/bindings/display/connector/vga-connector.yaml47
-rw-r--r--Documentation/devicetree/bindings/display/dp-aux-bus.yaml37
-rw-r--r--Documentation/devicetree/bindings/display/dsi-controller.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/elgin,jg10309-01.yaml54
-rw-r--r--Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt36
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos-mic.txt51
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt60
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos7-decon.txt65
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_dp.txt112
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt90
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt64
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_hdmiddc.txt15
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_hdmiphy.txt15
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_mixer.txt26
-rw-r--r--Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt107
-rw-r--r--Documentation/devicetree/bindings/display/faraday,tve200.txt54
-rw-r--r--Documentation/devicetree/bindings/display/faraday,tve200.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/fsl,dcu.txt34
-rw-r--r--Documentation/devicetree/bindings/display/fsl,lcdif.yaml218
-rw-r--r--Documentation/devicetree/bindings/display/fsl,ls1021a-dcu.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/fsl,tcon.txt17
-rw-r--r--Documentation/devicetree/bindings/display/fsl,vf610-tcon.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/himax,hx8357.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/ht16k33.txt40
-rw-r--r--Documentation/devicetree/bindings/display/ilitek,ili9341.txt27
-rw-r--r--Documentation/devicetree/bindings/display/ilitek,ili9486.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-display-subsystem.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt57
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-lcdc.yaml146
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-parallel-display.yaml74
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml118
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6q-ipu.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6q-ldb.yaml193
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6qp-pre.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6qp-prg.yaml54
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pvi.yaml84
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml57
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blit-engine.yaml204
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blitblend.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-clut.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-constframe.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-display-engine.yaml152
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-dither.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-extdst.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-fetchunit.yaml141
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-filter.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-framegen.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-gammacor.yaml32
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-layerblend.yaml39
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-matrix.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml250
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-rop.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-safety.yaml34
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-signature.yaml53
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-store.yaml96
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-tcon.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc.yaml236
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl-imx-drm.txt162
-rw-r--r--Documentation/devicetree/bindings/display/imx/hdmi.txt65
-rw-r--r--Documentation/devicetree/bindings/display/imx/ldb.txt147
-rw-r--r--Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml107
-rw-r--r--Documentation/devicetree/bindings/display/ingenic,ipu.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/ingenic,lcd.yaml128
-rw-r--r--Documentation/devicetree/bindings/display/intel,keembay-display.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/lvds-data-mapping.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/lvds-dual-ports.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/lvds.yaml31
-rw-r--r--Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,aal.yaml137
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ccorr.yaml109
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml52
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,color.yaml122
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt205
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dither.yaml109
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml138
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt35
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml153
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml107
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt62
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml151
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml208
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,gamma.yaml115
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt148
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml136
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml137
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml151
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,od.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ovl-2l.yaml113
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ovl.yaml133
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml87
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,postmask.yaml104
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,rdma.yaml144
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,split.yaml114
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ufoe.yaml107
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,wdma.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/msm/dp-controller.yaml357
-rw-r--r--Documentation/devicetree/bindings/display/msm/dpu-common.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/msm/dpu.txt131
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml482
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml101
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-20nm.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-common.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi.txt244
-rw-r--r--Documentation/devicetree/bindings/display/msm/edp.txt56
-rw-r--r--Documentation/devicetree/bindings/display/msm/gmu.yaml364
-rw-r--r--Documentation/devicetree/bindings/display/msm/gpu.txt38
-rw-r--r--Documentation/devicetree/bindings/display/msm/gpu.yaml535
-rw-r--r--Documentation/devicetree/bindings/display/msm/hdmi.txt99
-rw-r--r--Documentation/devicetree/bindings/display/msm/hdmi.yaml241
-rw-r--r--Documentation/devicetree/bindings/display/msm/mdp4.txt112
-rw-r--r--Documentation/devicetree/bindings/display/msm/mdp4.yaml131
-rw-r--r--Documentation/devicetree/bindings/display/msm/mdp5.txt158
-rw-r--r--Documentation/devicetree/bindings/display/msm/mdss-common.yaml106
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml157
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml226
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,msm8998-dpu.yaml101
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml278
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,qcm2290-dpu.yaml90
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml215
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml464
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml443
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7180-dpu.yaml125
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml322
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml114
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml444
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8180x-dpu.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8180x-mdss.yaml359
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml155
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml292
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml98
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml288
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6115-dpu.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml203
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml223
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6150-dpu.yaml108
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6150-mdss.yaml245
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml225
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml218
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm7150-dpu.yaml143
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml464
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-dpu.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml347
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml350
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml244
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml364
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml353
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml131
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml341
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml474
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml255
-rw-r--r--Documentation/devicetree/bindings/display/mxsfb.txt86
-rw-r--r--Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/advantech,idk-1110wr.yaml84
-rw-r--r--Documentation/devicetree/bindings/display/panel/advantech,idk-2121wr.yaml112
-rw-r--r--Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt26
-rw-r--r--Documentation/devicetree/bindings/display/panel/ampire,am800480r3tmqwa1h.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/anbernic,rg35xx-plus-panel.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/panel/apple,summit.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/arm,rtsm-display.yaml27
-rw-r--r--Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt31
-rw-r--r--Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml51
-rw-r--r--Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml33
-rw-r--r--Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml59
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b080uan01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b101aw03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b101ean01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b101xtn01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b116xw03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b133htn01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,b133xtn01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt29
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt12
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,g133han01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,g185han01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/auo,t215hvn01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/avic,tm070ddh03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml31
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,bf060y8m-aj0.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt28
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,nv101wxmn51.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,td4320.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,th101mb31ig002-28a.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,tv101wum-ll2.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/panel/chunghwa,claa070wp03xg.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/chunghwa,claa101wa01a.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/chunghwa,claa101wb03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/display-timing.txt124
-rw-r--r--Documentation/devicetree/bindings/display/panel/display-timings.yaml76
-rw-r--r--Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt13
-rw-r--r--Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml31
-rw-r--r--Documentation/devicetree/bindings/display/panel/ebbg,ft8719.yaml74
-rw-r--r--Documentation/devicetree/bindings/display/panel/edt,et-series.txt39
-rw-r--r--Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml59
-rw-r--r--Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/focaltech,gpt3.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/panel/foxlink,fl500wvr00-a0t.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/giantplus,gpg482739qs5.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/hannstar,hsd070pww1.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/hannstar,hsd100pxn1.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx8279.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83102.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83112a.yaml76
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83112b.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/panel/hit,tx23d38vm0caa.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/hydis,hv101hd1.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt49
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml100
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9806e.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt20
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,at043tn24.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,at070tn92.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml50
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt12
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,g101ice-l01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,g121i1-l01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,n116bge.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,n156bge-l21.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt22
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt24
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,p097pfg.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt22
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,zj070na-01p.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/jadard,jd9365da-h3.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml96
-rw-r--r--Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.txt31
-rw-r--r--Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.yaml74
-rw-r--r--Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt22
-rw-r--r--Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt25
-rw-r--r--Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lb070wv8.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,ld070wx3-sl01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lg4573.txt19
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lg4573.yaml46
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lh500wx1-sd03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lp129qe.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.txt33
-rw-r--r--Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml62
-rw-r--r--Documentation/devicetree/bindings/display/panel/logicpd,type28.yaml42
-rw-r--r--Documentation/devicetree/bindings/display/panel/mantix,mlaf057we51-x.yaml76
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa070mc01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt47
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml92
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt47
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/nec,nl8048hl11.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/netron-dy,e231732.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml62
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt35950.yaml109
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml108
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml90
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36672e.yaml66
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/panel/nvd,9128.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/okaya,rs800480t-7x0gp.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino-43-ts.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/panel/ontat,yx700wv03.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt23
-rw-r--r--Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml52
-rw-r--r--Documentation/devicetree/bindings/display/panel/ortustech,com43h4m85ulc.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/panasonic,vvx10f004b00.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt20
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common-dual.yaml47
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common.txt101
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common.yaml167
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-dpi.txt50
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-dpi.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-dsi-cm.txt29
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-dsi-cm.yaml86
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-edp-legacy.yaml117
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-edp.yaml188
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-lvds.txt121
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-lvds.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml171
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml118
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml106
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple.yaml379
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-timing.yaml214
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel.txt4
-rw-r--r--Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml31
-rw-r--r--Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml29
-rw-r--r--Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml29
-rw-r--r--Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt49
-rw-r--r--Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm67200.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt25
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm69380.yaml90
-rw-r--r--Documentation/devicetree/bindings/display/panel/renesas,r61307.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/panel/renesas,r69328.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt25
-rw-r--r--Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml51
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,amoled-mipi-dsi.yaml77
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams495qa01.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams581vf01.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams639rq08.yaml80
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml114
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ld9040.txt66
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml107
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml99
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml90
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ltn101nt05.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ltn140at29-301.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml99
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d7aa0.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt31
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e3ha8.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e63j0x03.txt24
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams427ap24.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e8aa0.txt56
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e8aa0.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e8aa5x01-ams561ra01.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.txt23
-rw-r--r--Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.yaml52
-rw-r--r--Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt41
-rw-r--r--Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt12
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq101r1sx01.txt49
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq101r1sx01.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.txt36
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.txt43
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt22
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.yaml53
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/shelly,sca07010-bfn-lnn.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/simple-panel.txt28
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml144
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.txt37
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml59
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx565akm.txt30
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml57
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml85
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,tulip-truly-nt35521.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/startek,kd070fhfid015.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.txt4
-rw-r--r--Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.yaml33
-rw-r--r--Documentation/devicetree/bindings/display/panel/synaptics,r63353.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml33
-rw-r--r--Documentation/devicetree/bindings/display/panel/ti,nspire.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/panel/tianma,tm070jdhg30.txt7
-rw-r--r--Documentation/devicetree/bindings/display/panel/tianma,tm070rvhg71.txt29
-rw-r--r--Documentation/devicetree/bindings/display/panel/toshiba,lt089ac29000.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpk,f07a-0102.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpk,f10a-0102.txt8
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt30
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td043mtea1.txt33
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt47
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml105
-rw-r--r--Documentation/devicetree/bindings/display/panel/truly,nt35597-2K-display.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt16
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,g2647fb105.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,r66451.yaml59
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,rm692e5.yaml77
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,vtdr6130.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/panel/winstar,wf35ltiacd.txt48
-rw-r--r--Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/renesas,cmm.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/renesas,du.txt114
-rw-r--r--Documentation/devicetree/bindings/display/renesas,du.yaml950
-rw-r--r--Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml173
-rw-r--r--Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml130
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/analogix_dp-rockchip.txt98
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/cdn-dp-rockchip.txt74
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt63
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt68
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/inno_hdmi-rockchip.txt49
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml124
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-dp.yaml150
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml184
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml166
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml146
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,lvds.yaml170
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml129
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3399-cdn-dp.yaml170
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3588-dw-hdmi-qp.yaml189
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3588-mipi-dsi2.yaml121
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-drm.txt19
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-drm.yaml42
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt99
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt69
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml126
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml315
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml226
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml142
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos5-dp.yaml163
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml145
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml93
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml142
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml196
-rw-r--r--Documentation/devicetree/bindings/display/sharp,ls010b7dh04.yaml92
-rw-r--r--Documentation/devicetree/bindings/display/simple-framebuffer-sunxi.txt36
-rw-r--r--Documentation/devicetree/bindings/display/simple-framebuffer.txt91
-rw-r--r--Documentation/devicetree/bindings/display/simple-framebuffer.yaml213
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7567.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7571.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7586.txt22
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7586.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7735r.txt35
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7735r.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd-common.yaml42
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml269
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd132x.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd133x.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/sprd/sprd,display-subsystem.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/sprd/sprd,sharkl3-dpu.yaml77
-rw-r--r--Documentation/devicetree/bindings/display/sprd/sprd,sharkl3-dsi-host.yaml88
-rw-r--r--Documentation/devicetree/bindings/display/ssd1307fb.txt51
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32-dsi.yaml136
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32-ltdc.txt141
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml128
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32mp25-lvds.yaml130
-rw-r--r--Documentation/devicetree/bindings/display/ste,mcde.yaml168
-rw-r--r--Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt620
-rw-r--r--Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt93
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra114-mipi.txt41
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra114-mipi.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-dpaux.yaml151
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-sor.yaml197
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-vic.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra186-dc.yaml85
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra186-display.yaml308
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra186-dsi-padctl.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-dc.yaml182
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-dsi.yaml158
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-epp.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-gr2d.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-gr3d.yaml213
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-hdmi.yaml125
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt416
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml441
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-isp.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-mpe.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-tvo.yaml57
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-vi.yaml218
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-vip.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra210-csi.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,am625-oldi.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml349
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml208
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,k2g-dss.yaml105
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,opa362.txt38
-rw-r--r--Documentation/devicetree/bindings/display/tilcdc/tfp410.txt21
-rw-r--r--Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt8
-rw-r--r--Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml242
-rw-r--r--Documentation/devicetree/bindings/display/xylon,logicvc-display.yaml301
-rw-r--r--Documentation/devicetree/bindings/display/zte,vou.txt120
-rw-r--r--Documentation/devicetree/bindings/dma/adi,axi-dmac.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/adi,axi-dmac.yaml129
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml57
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml99
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml62
-rw-r--r--Documentation/devicetree/bindings/dma/altr,msgdma.yaml63
-rw-r--r--Documentation/devicetree/bindings/dma/apple,admac.yaml96
-rw-r--r--Documentation/devicetree/bindings/dma/arm,dma-350.yaml44
-rw-r--r--Documentation/devicetree/bindings/dma/arm,pl330.yaml92
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl08x.txt59
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl08x.yaml140
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl330.txt45
-rw-r--r--Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml68
-rw-r--r--Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml82
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-dma.txt42
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-xdma.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt83
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.yaml102
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt29
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml41
-rw-r--r--Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml84
-rw-r--r--Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml144
-rw-r--r--Documentation/devicetree/bindings/dma/dma-common.yaml52
-rw-r--r--Documentation/devicetree/bindings/dma/dma-controller.yaml22
-rw-r--r--Documentation/devicetree/bindings/dma/dma-router.yaml43
-rw-r--r--Documentation/devicetree/bindings/dma/dma.txt110
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,edma.yaml344
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml137
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml125
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml132
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,imx-dma.yaml70
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,imx-sdma.yaml151
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml134
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-edma.txt75
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-dma.txt48
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt113
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt60
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-qdma.yaml135
-rw-r--r--Documentation/devicetree/bindings/dma/ingenic,dma.yaml99
-rw-r--r--Documentation/devicetree/bindings/dma/intel,ldma.yaml116
-rw-r--r--Documentation/devicetree/bindings/dma/jz4780-dma.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/k3dma.txt4
-rw-r--r--Documentation/devicetree/bindings/dma/loongson,ls1b-apbdma.yaml65
-rw-r--r--Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml62
-rw-r--r--Documentation/devicetree/bindings/dma/lpc1850-dmamux.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,mmp-dma.yaml72
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml84
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,xor-v2.yaml61
-rw-r--r--Documentation/devicetree/bindings/dma/mediatek,mt7622-hsdma.yaml63
-rw-r--r--Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml124
-rw-r--r--Documentation/devicetree/bindings/dma/milbeaut-m10v-hdmac.txt32
-rw-r--r--Documentation/devicetree/bindings/dma/milbeaut-m10v-xdmac.txt24
-rw-r--r--Documentation/devicetree/bindings/dma/mmp-dma.txt79
-rw-r--r--Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt4
-rw-r--r--Documentation/devicetree/bindings/dma/mtk-hsdma.txt33
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor-v2.txt28
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor.txt40
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml118
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra20-apbdma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra20-apbdma.yaml100
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml158
-rw-r--r--Documentation/devicetree/bindings/dma/nxp,lpc3220-dmamux.yaml49
-rw-r--r--Documentation/devicetree/bindings/dma/owl-dma.txt47
-rw-r--r--Documentation/devicetree/bindings/dma/owl-dma.yaml80
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,adm.yaml99
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml121
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,gpi.yaml121
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_adm.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_bam_dma.txt50
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt95
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,nbpfaxi.txt (renamed from Documentation/devicetree/bindings/dma/nbpfaxi.txt)0
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt113
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml175
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml234
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rzn1-dmamux.yaml51
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt50
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,usb-dmac.yaml104
-rw-r--r--Documentation/devicetree/bindings/dma/shdma.txt84
-rw-r--r--Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml80
-rw-r--r--Documentation/devicetree/bindings/dma/sirfsoc-dma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml185
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt39
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml157
-rw-r--r--Documentation/devicetree/bindings/dma/snps-dma.txt67
-rw-r--r--Documentation/devicetree/bindings/dma/socionext,uniphier-mio-dmac.yaml63
-rw-r--r--Documentation/devicetree/bindings/dma/socionext,uniphier-xdmac.yaml62
-rw-r--r--Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml51
-rw-r--r--Documentation/devicetree/bindings/dma/spacemit,k1-pdma.yaml68
-rw-r--r--Documentation/devicetree/bindings/dma/sprd,sc9860-dma.yaml92
-rw-r--r--Documentation/devicetree/bindings/dma/sprd-dma.txt41
-rw-r--r--Documentation/devicetree/bindings/dma/ste-coh901318.txt32
-rw-r--r--Documentation/devicetree/bindings/dma/ste-dma40.txt138
-rw-r--r--Documentation/devicetree/bindings/dma/stericsson,dma40.yaml179
-rw-r--r--Documentation/devicetree/bindings/dma/stm32-dma.txt83
-rw-r--r--Documentation/devicetree/bindings/dma/stm32-dmamux.txt84
-rw-r--r--Documentation/devicetree/bindings/dma/stm32-mdma.txt94
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dma.yaml120
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dma3.yaml141
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml67
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-mdma.yaml106
-rw-r--r--Documentation/devicetree/bindings/dma/sun4i-dma.txt45
-rw-r--r--Documentation/devicetree/bindings/dma/sun6i-dma.txt76
-rw-r--r--Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt4
-rw-r--r--Documentation/devicetree/bindings/dma/ti-edma.txt14
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml266
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml195
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-udma.yaml193
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml89
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml74
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/zynqmp_dma.txt26
-rw-r--r--Documentation/devicetree/bindings/dma/zxdma.txt38
-rw-r--r--Documentation/devicetree/bindings/dpll/dpll-device.yaml76
-rw-r--r--Documentation/devicetree/bindings/dpll/dpll-pin.yaml45
-rw-r--r--Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml115
-rw-r--r--Documentation/devicetree/bindings/dsp/fsl,dsp.yaml249
-rw-r--r--Documentation/devicetree/bindings/dsp/mediatek,mt8186-dsp.yaml93
-rw-r--r--Documentation/devicetree/bindings/dsp/mediatek,mt8195-dsp.yaml105
-rw-r--r--Documentation/devicetree/bindings/dts-coding-style.rst210
-rw-r--r--Documentation/devicetree/bindings/dvfs/performance-domain.yaml79
-rw-r--r--Documentation/devicetree/bindings/edac/altr,socfpga-ecc-manager.yaml324
-rw-r--r--Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml66
-rw-r--r--Documentation/devicetree/bindings/edac/apm,xgene-edac.yaml203
-rw-r--r--Documentation/devicetree/bindings/edac/apm-xgene-edac.txt112
-rw-r--r--Documentation/devicetree/bindings/edac/aspeed,ast2400-sdram-edac.yaml48
-rw-r--r--Documentation/devicetree/bindings/edac/dmc-520.yaml61
-rw-r--r--Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt268
-rw-r--r--Documentation/devicetree/bindings/eeprom/at24.txt84
-rw-r--r--Documentation/devicetree/bindings/eeprom/at24.yaml231
-rw-r--r--Documentation/devicetree/bindings/eeprom/at25.txt42
-rw-r--r--Documentation/devicetree/bindings/eeprom/at25.yaml157
-rw-r--r--Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml70
-rw-r--r--Documentation/devicetree/bindings/eeprom/st,m24lr.yaml52
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/acer,aspire1-ec.yaml60
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/google,cros-ec.yaml326
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/gw,gsc.yaml193
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/huawei,gaokun3-ec.yaml124
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/kontron,sl28cpld.yaml158
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/lenovo,thinkpad-t14s-ec.yaml50
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/lenovo,yoga-c630-ec.yaml83
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/microsoft,surface-sam.yaml47
-rw-r--r--Documentation/devicetree/bindings/example-schema.yaml279
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-arizona.txt76
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml73
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt23
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-sm5502.txt21
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt21
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.txt24
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml55
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usbc-tusb320.yaml43
-rw-r--r--Documentation/devicetree/bindings/extcon/fcs,fsa880.yaml52
-rw-r--r--Documentation/devicetree/bindings/extcon/linux,extcon-usb-gpio.yaml43
-rw-r--r--Documentation/devicetree/bindings/extcon/maxim,max14526.yaml80
-rw-r--r--Documentation/devicetree/bindings/extcon/maxim,max77843.yaml41
-rw-r--r--Documentation/devicetree/bindings/extcon/qcom,pm8941-misc.txt41
-rw-r--r--Documentation/devicetree/bindings/extcon/qcom,pm8941-misc.yaml66
-rw-r--r--Documentation/devicetree/bindings/extcon/richtek,rt8973a-muic.yaml49
-rw-r--r--Documentation/devicetree/bindings/extcon/siliconmitus,sm5502-muic.yaml57
-rw-r--r--Documentation/devicetree/bindings/extcon/wlf,arizona.yaml126
-rw-r--r--Documentation/devicetree/bindings/firmware/amlogic,meson-gxbb-sm.yaml39
-rw-r--r--Documentation/devicetree/bindings/firmware/arm,scmi.yaml589
-rw-r--r--Documentation/devicetree/bindings/firmware/arm,scpi.yaml249
-rw-r--r--Documentation/devicetree/bindings/firmware/brcm,kona-smc.yaml39
-rw-r--r--Documentation/devicetree/bindings/firmware/cznic,turris-mox-rwtm.txt19
-rw-r--r--Documentation/devicetree/bindings/firmware/cznic,turris-omnia-mcu.yaml86
-rw-r--r--Documentation/devicetree/bindings/firmware/fsl,scu.yaml227
-rw-r--r--Documentation/devicetree/bindings/firmware/google,gs101-acpm-ipc.yaml85
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml94
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,stratix10-svc.yaml93
-rw-r--r--Documentation/devicetree/bindings/firmware/meson/meson_sm.txt15
-rw-r--r--Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.txt108
-rw-r--r--Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.yaml216
-rw-r--r--Documentation/devicetree/bindings/firmware/nvidia,tegra210-bpmp.txt35
-rw-r--r--Documentation/devicetree/bindings/firmware/nxp,imx95-scmi-pinctrl.yaml53
-rw-r--r--Documentation/devicetree/bindings/firmware/nxp,imx95-scmi.yaml72
-rw-r--r--Documentation/devicetree/bindings/firmware/qcom,scm.txt42
-rw-r--r--Documentation/devicetree/bindings/firmware/qcom,scm.yaml242
-rw-r--r--Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml54
-rw-r--r--Documentation/devicetree/bindings/firmware/thead,th1520-aon.yaml60
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt82
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml177
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-fpga2sdram-bridge.txt16
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-freeze-bridge.txt23
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-hps2fpga-bridge.txt39
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-passive-serial.txt29
-rw-r--r--Documentation/devicetree/bindings/fpga/altr,fpga-passive-serial.yaml74
-rw-r--r--Documentation/devicetree/bindings/fpga/altr,freeze-bridge-controller.yaml41
-rw-r--r--Documentation/devicetree/bindings/fpga/altr,socfpga-fpga2sdram-bridge.yaml33
-rw-r--r--Documentation/devicetree/bindings/fpga/altr,socfpga-hps2fpga-bridge.yaml49
-rw-r--r--Documentation/devicetree/bindings/fpga/fpga-bridge.yaml30
-rw-r--r--Documentation/devicetree/bindings/fpga/fpga-region.txt497
-rw-r--r--Documentation/devicetree/bindings/fpga/fpga-region.yaml359
-rw-r--r--Documentation/devicetree/bindings/fpga/intel,stratix10-soc-fpga-mgr.yaml36
-rw-r--r--Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml81
-rw-r--r--Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml45
-rw-r--r--Documentation/devicetree/bindings/fpga/xilinx-pr-decoupler.txt36
-rw-r--r--Documentation/devicetree/bindings/fpga/xilinx-slave-serial.txt43
-rw-r--r--Documentation/devicetree/bindings/fpga/xilinx-zynq-fpga-mgr.txt19
-rw-r--r--Documentation/devicetree/bindings/fpga/xilinx-zynq-fpga-mgr.yaml52
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,fpga-selectmap.yaml86
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,fpga-slave-serial.yaml80
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,pr-decoupler.yaml67
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml33
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,zynqmp-pcap-fpga.yaml36
-rw-r--r--Documentation/devicetree/bindings/fsi/aspeed,ast2400-cf-fsi-master.yaml81
-rw-r--r--Documentation/devicetree/bindings/fsi/aspeed,ast2600-fsi-master.yaml121
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-controller.yaml66
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-ast-cf.txt36
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-gpio.txt28
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-gpio.yaml63
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml66
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,i2cr-fsi-master.yaml44
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-fsi-controller.yaml45
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-occ.yaml40
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-sbefifo.yaml46
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-scom.yaml38
-rw-r--r--Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt40
-rw-r--r--Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.yaml88
-rw-r--r--Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml70
-rw-r--r--Documentation/devicetree/bindings/gnss/gnss-common.yaml50
-rw-r--r--Documentation/devicetree/bindings/gnss/gnss.txt36
-rw-r--r--Documentation/devicetree/bindings/gnss/mediatek.yaml60
-rw-r--r--Documentation/devicetree/bindings/gnss/sirfstar.txt45
-rw-r--r--Documentation/devicetree/bindings/gnss/sirfstar.yaml77
-rw-r--r--Documentation/devicetree/bindings/gnss/u-blox,neo-6m.yaml69
-rw-r--r--Documentation/devicetree/bindings/gnss/u-blox.txt44
-rw-r--r--Documentation/devicetree/bindings/goldfish/pipe.txt2
-rw-r--r--Documentation/devicetree/bindings/gpio/8xxx_gpio.txt72
-rw-r--r--Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt35
-rw-r--r--Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpio/adi,ds4520-gpio.yaml51
-rw-r--r--Documentation/devicetree/bindings/gpio/airoha,en7523-gpio.yaml66
-rw-r--r--Documentation/devicetree/bindings/gpio/altr-pio-1.0.yaml75
-rw-r--r--Documentation/devicetree/bindings/gpio/apm,xgene-gpio-sb.yaml94
-rw-r--r--Documentation/devicetree/bindings/gpio/apple,smc-gpio.yaml29
-rw-r--r--Documentation/devicetree/bindings/gpio/aspeed,ast2400-gpio.yaml169
-rw-r--r--Documentation/devicetree/bindings/gpio/aspeed,sgpio.yaml87
-rw-r--r--Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml81
-rw-r--r--Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml77
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,bcm6345-gpio.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,bcm63xx-gpio.yaml72
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt83
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml111
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt52
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml100
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml70
-rw-r--r--Documentation/devicetree/bindings/gpio/cavium,octeon-3860-gpio.yaml62
-rw-r--r--Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/cdns,gpio.yaml84
-rw-r--r--Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt17
-rw-r--r--Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/delta,tn48m-gpio.yaml39
-rw-r--r--Documentation/devicetree/bindings/gpio/exar,xra1403.yaml75
-rw-r--r--Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml85
-rw-r--r--Documentation/devicetree/bindings/gpio/faraday,ftgpio010.txt27
-rw-r--r--Documentation/devicetree/bindings/gpio/faraday,ftgpio010.yaml65
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl,imx8qxp-sc-gpio.yaml39
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl,qoriq-gpio.yaml95
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt35
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml114
-rw-r--r--Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.yaml50
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74x164.txt27
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-altera.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-aspeed.txt36
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ath79.txt37
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-atlas7.txt50
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-axp209.txt75
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-clps711x.txt28
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml64
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-davinci.txt147
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-davinci.yaml187
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-delay.yaml79
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-eic-sprd.txt97
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml157
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-latch.yaml94
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-lp3943.txt37
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max3191x.txt59
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max732x.txt58
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max77620.txt25
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt38
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mmio.yaml162
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mvebu.txt95
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mvebu.yaml156
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mxs.txt88
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mxs.yaml205
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-nmk.txt31
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-omap.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-palmas.txt27
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca953x.txt86
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml231
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt69
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pisosr.txt34
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-rda.yaml50
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-samsung.txt41
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-sprd.txt28
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stericsson-coh901.txt7
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stmpe.txt18
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stp-xway.txt42
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stp-xway.yaml99
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-tpic2810.txt16
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ts4800.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ts4900.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-twl4030.txt29
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-uniphier.txt51
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-vf610.txt57
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-vf610.yaml135
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-virtio.yaml59
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt64
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xilinx.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xlp.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xra1403.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zevio.txt16
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zynq.txt34
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zynq.yaml116
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt65
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_atmel.txt31
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_oxnas.txt47
-rw-r--r--Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml56
-rw-r--r--Documentation/devicetree/bindings/gpio/ibm,ppc4xx-gpio.txt24
-rw-r--r--Documentation/devicetree/bindings/gpio/idt,32434-gpio.yaml67
-rw-r--r--Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml73
-rw-r--r--Documentation/devicetree/bindings/gpio/kontron,sl28cpld-gpio.yaml54
-rw-r--r--Documentation/devicetree/bindings/gpio/lacie,netxbig-gpio-ext.yaml60
-rw-r--r--Documentation/devicetree/bindings/gpio/lantiq,gpio-mm-lantiq.yaml54
-rw-r--r--Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml168
-rw-r--r--Documentation/devicetree/bindings/gpio/lsi,zevio-gpio.yaml43
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max31910.yaml104
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max7360-gpio.yaml83
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml44
-rw-r--r--Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.txt35
-rw-r--r--Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.yaml72
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,pic32-gpio.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,pic32mzda-gpio.yaml71
-rw-r--r--Documentation/devicetree/bindings/gpio/mrvl-gpio.txt48
-rw-r--r--Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml172
-rw-r--r--Documentation/devicetree/bindings/gpio/mstar,msc313-gpio.yaml61
-rw-r--r--Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/ni,169445-nand-gpio.txt38
-rw-r--r--Documentation/devicetree/bindings/gpio/nintendo,hollywood-gpio.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/nuvoton,sgpio.yaml87
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.txt165
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml219
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.txt40
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.yaml110
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc1850-gpio.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc1850-gpio.yaml78
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc3220-gpio.yaml50
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml144
-rw-r--r--Documentation/devicetree/bindings/gpio/pisosr-gpio.yaml67
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.txt10
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.yaml69
-rw-r--r--Documentation/devicetree/bindings/gpio/qca,ar7100-gpio.yaml60
-rw-r--r--Documentation/devicetree/bindings/gpio/qcom,wcd934x-gpio.yaml47
-rw-r--r--Documentation/devicetree/bindings/gpio/raspberrypi,firmware-gpio.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml108
-rw-r--r--Documentation/devicetree/bindings/gpio/realtek,rtd-gpio.yaml69
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml70
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt91
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml153
-rw-r--r--Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml99
-rw-r--r--Documentation/devicetree/bindings/gpio/rockchip,rk3328-grf-gpio.txt32
-rw-r--r--Documentation/devicetree/bindings/gpio/sifive,gpio.yaml93
-rw-r--r--Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt21
-rw-r--r--Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml145
-rw-r--r--Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt65
-rw-r--r--Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml101
-rw-r--r--Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/spear_spics.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/sprd,gpio-eic.yaml124
-rw-r--r--Documentation/devicetree/bindings/gpio/sprd,gpio.yaml75
-rw-r--r--Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/st,spear-spics-gpio.yaml82
-rw-r--r--Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml57
-rw-r--r--Documentation/devicetree/bindings/gpio/ti,keystone-dsp-gpio.yaml65
-rw-r--r--Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml108
-rw-r--r--Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml61
-rw-r--r--Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml69
-rw-r--r--Documentation/devicetree/bindings/gpio/trivial-gpio.yaml110
-rw-r--r--Documentation/devicetree/bindings/gpio/wd,mbl-gpio.txt38
-rw-r--r--Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml155
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml46
-rw-r--r--Documentation/devicetree/bindings/gpio/xylon,logicvc-gpio.yaml71
-rw-r--r--Documentation/devicetree/bindings/gpio/zx296702-gpio.txt24
-rw-r--r--Documentation/devicetree/bindings/gpu/apple,agx.yaml100
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml334
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt87
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml207
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt112
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml185
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml147
-rw-r--r--Documentation/devicetree/bindings/gpu/aspeed,ast2400-gfx.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt28
-rw-r--r--Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml138
-rw-r--r--Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml106
-rw-r--r--Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml135
-rw-r--r--Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml94
-rw-r--r--Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml156
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml164
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml138
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt90
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.yaml171
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-g2d.txt27
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-g2d.yaml86
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-rotator.txt27
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-rotator.yaml56
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-scaler.txt27
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-scaler.yaml94
-rw-r--r--Documentation/devicetree/bindings/gpu/vivante,gc.yaml76
-rw-r--r--Documentation/devicetree/bindings/graph.txt129
-rw-r--r--Documentation/devicetree/bindings/h8300/cpu.txt13
-rw-r--r--Documentation/devicetree/bindings/hwinfo/loongson,ls2k-chipid.yaml38
-rw-r--r--Documentation/devicetree/bindings/hwinfo/renesas,prr.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwinfo/samsung,exynos-chipid.yaml58
-rw-r--r--Documentation/devicetree/bindings/hwinfo/samsung,s5pv210-chipid.yaml30
-rw-r--r--Documentation/devicetree/bindings/hwinfo/ti,k3-socinfo.yaml40
-rw-r--r--Documentation/devicetree/bindings/hwinfo/via,vt8500-scc-id.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwlock/allwinner,sun6i-a31-hwspinlock.yaml53
-rw-r--r--Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt26
-rw-r--r--Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt39
-rw-r--r--Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml55
-rw-r--r--Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt28
-rw-r--r--Documentation/devicetree/bindings/hwlock/sprd,hwspinlock-r3p0.yaml50
-rw-r--r--Documentation/devicetree/bindings/hwlock/sprd-hwspinlock.txt23
-rw-r--r--Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml49
-rw-r--r--Documentation/devicetree/bindings/hwlock/ti,omap-hwspinlock.yaml46
-rw-r--r--Documentation/devicetree/bindings/hwmon/adc128d818.txt38
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ad741x.yaml38
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml70
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1266.yaml51
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml134
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml63
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2945.yaml52
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml101
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml127
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2992.yaml81
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc4282.yaml159
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,max31760.yaml42
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,max31827.yaml120
-rw-r--r--Documentation/devicetree/bindings/hwmon/ads1015.txt73
-rw-r--r--Documentation/devicetree/bindings/hwmon/ads7828.txt25
-rw-r--r--Documentation/devicetree/bindings/hwmon/adt7475.yaml141
-rw-r--r--Documentation/devicetree/bindings/hwmon/amd,sbrmi.yaml53
-rw-r--r--Documentation/devicetree/bindings/hwmon/amd,sbtsi.yaml54
-rw-r--r--Documentation/devicetree/bindings/hwmon/amphenol,chipcap2.yaml77
-rw-r--r--Documentation/devicetree/bindings/hwmon/aspeed,g6-pwm-tach.yaml71
-rw-r--r--Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt2
-rw-r--r--Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml105
-rw-r--r--Documentation/devicetree/bindings/hwmon/cirrus,lochnagar.yaml35
-rw-r--r--Documentation/devicetree/bindings/hwmon/fan-common.yaml79
-rw-r--r--Documentation/devicetree/bindings/hwmon/g762.txt47
-rw-r--r--Documentation/devicetree/bindings/hwmon/gmt,g762.yaml95
-rw-r--r--Documentation/devicetree/bindings/hwmon/gpio-fan.txt40
-rw-r--r--Documentation/devicetree/bindings/hwmon/gpio-fan.yaml63
-rw-r--r--Documentation/devicetree/bindings/hwmon/hpe,gxp-fan-ctrl.yaml45
-rw-r--r--Documentation/devicetree/bindings/hwmon/hwmon-common.yaml19
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt21
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibm,occ-hwmon.yaml39
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibm,opal-sensor.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibmpowernv.txt23
-rw-r--r--Documentation/devicetree/bindings/hwmon/iio-hwmon.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwmon/ina2xx.txt24
-rw-r--r--Documentation/devicetree/bindings/hwmon/ina3221.txt44
-rw-r--r--Documentation/devicetree/bindings/hwmon/jc42.txt46
-rw-r--r--Documentation/devicetree/bindings/hwmon/jedec,jc42.yaml78
-rw-r--r--Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml28
-rw-r--r--Documentation/devicetree/bindings/hwmon/lantiq,cputemp.yaml30
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc2978.yaml103
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml44
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc4286.yaml49
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm70.txt22
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm75.yaml105
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm87.txt30
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm90.txt50
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc2978.txt48
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc4151.txt18
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltq-cputemp.txt10
-rw-r--r--Documentation/devicetree/bindings/hwmon/max1619.txt12
-rw-r--r--Documentation/devicetree/bindings/hwmon/max6650.txt28
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max20730.yaml65
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max31790.yaml70
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max6639.yaml91
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max6650.yaml69
-rw-r--r--Documentation/devicetree/bindings/hwmon/mcp3021.txt21
-rw-r--r--Documentation/devicetree/bindings/hwmon/microchip,emc2305.yaml111
-rw-r--r--Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml53
-rw-r--r--Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml43
-rw-r--r--Documentation/devicetree/bindings/hwmon/microchip,sparx5-temp.yaml44
-rw-r--r--Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml164
-rw-r--r--Documentation/devicetree/bindings/hwmon/national,lm90.yaml241
-rw-r--r--Documentation/devicetree/bindings/hwmon/npcm750-pwm-fan.txt8
-rw-r--r--Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml141
-rw-r--r--Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt43
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml57
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct7363.yaml65
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct7802.yaml144
-rw-r--r--Documentation/devicetree/bindings/hwmon/nxp,mc34vr500.yaml36
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/adi,adp1050.yaml60
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/adi,lt3074.yaml50
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml77
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/isil,isl68137.yaml151
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/mps,mp2975.yaml75
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/mps,mpq8785.yaml74
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,lm25066.yaml69
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,tps25990.yaml83
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,ucd90320.yaml62
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/vicor,pli1209bc.yaml62
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.txt37
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.yaml116
-rw-r--r--Documentation/devicetree/bindings/hwmon/renesas,isl28022.yaml64
-rw-r--r--Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml43
-rw-r--r--Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml61
-rw-r--r--Documentation/devicetree/bindings/hwmon/sht15.txt19
-rw-r--r--Documentation/devicetree/bindings/hwmon/sophgo,sg2042-hwmon-mcu.yaml47
-rw-r--r--Documentation/devicetree/bindings/hwmon/st,stts751.yaml41
-rw-r--r--Documentation/devicetree/bindings/hwmon/starfive,jh71x0-temp.yaml70
-rw-r--r--Documentation/devicetree/bindings/hwmon/stts751.txt15
-rw-r--r--Documentation/devicetree/bindings/hwmon/syna,as370.yaml32
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,adc128d818.yaml62
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml56
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,amc6821.yaml108
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml172
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,ina3221.yaml121
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,lm87.yaml70
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml56
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml66
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml104
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml109
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml113
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml93
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tps23861.yaml53
-rw-r--r--Documentation/devicetree/bindings/hwmon/tmp108.txt14
-rw-r--r--Documentation/devicetree/bindings/hwmon/vexpress.txt2
-rw-r--r--Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml40
-rw-r--r--Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml63
-rw-r--r--Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml58
-rw-r--r--Documentation/devicetree/bindings/i2c/apple,i2c.yaml77
-rw-r--r--Documentation/devicetree/bindings/i2c/arm,i2c-versatile.yaml29
-rw-r--r--Documentation/devicetree/bindings/i2c/aspeed,i2c.yaml68
-rw-r--r--Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml142
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt20
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,brcmstb-i2c.yaml96
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt37
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.yaml71
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,kona-i2c.txt35
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,kona-i2c.yaml59
-rw-r--r--Documentation/devicetree/bindings/i2c/cdns,i2c-r1p10.yaml75
-rw-r--r--Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml66
-rw-r--r--Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml73
-rw-r--r--Documentation/devicetree/bindings/i2c/hisilicon,hix5hd2-i2c.yaml51
-rw-r--r--Documentation/devicetree/bindings/i2c/hpe,gxp-i2c.yaml59
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt82
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.yaml135
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-arb.txt35
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-aspeed.txt48
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-at91.txt63
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-atr.yaml34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt26
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-cadence.txt28
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-cros-ec-tunnel.txt39
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-davinci.txt4
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt135
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml173
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-designware.txt64
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-efm32.txt33
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-emev2.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.txt53
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml151
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-fsi.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-gate.txt41
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-gate.yaml38
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-gpio.txt46
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-gpio.yaml99
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt19
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml85
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx.txt49
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx.yaml127
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt20
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-jz4780.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt33
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-meson.txt30
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mpc.txt62
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mpc.yaml101
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml136
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mtk.txt43
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt81
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpio.yaml107
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.txt99
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.yaml123
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt8
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt72
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml174
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt93
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.yaml103
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt4
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux.txt73
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux.yaml87
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt64
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mxs.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mxs.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-nomadik.txt23
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-ocores.txt69
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-omap.txt35
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-owl.txt27
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-owl.yaml62
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pnx.txt34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt4
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa.yaml74
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rcar.txt61
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-riic.txt29
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rk3x.txt68
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml148
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt58
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt60
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sirf.txt19
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sprd.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt15
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-st.txt41
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-stm32.txt56
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt41
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-synquacer.txt29
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-uniphier-f.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-uniphier.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-versatile.txt10
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-virtio.yaml51
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-vt8500.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-xiic.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-zx2967.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c.txt94
-rw-r--r--Documentation/devicetree/bindings/i2c/ibm,i2c-fsi.yaml76
-rw-r--r--Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml90
-rw-r--r--Documentation/devicetree/bindings/i2c/loongson,ls2x-i2c.yaml51
-rw-r--r--Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml142
-rw-r--r--Documentation/devicetree/bindings/i2c/mediatek,mt7621-i2c.yaml61
-rw-r--r--Documentation/devicetree/bindings/i2c/microchip,corei2c.yaml58
-rw-r--r--Documentation/devicetree/bindings/i2c/nuvoton,npcm7xx-i2c.yaml77
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.txt42
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.yaml44
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt74
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml224
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,lpc1788-i2c.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,pca9541.txt29
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,pca9541.yaml56
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,pnx-i2c.yaml49
-rw-r--r--Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml114
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml363
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml147
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml103
-rw-r--r--Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml105
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rcar-i2c.yaml166
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,riic.yaml162
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml149
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml80
-rw-r--r--Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml163
-rw-r--r--Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml170
-rw-r--r--Documentation/devicetree/bindings/i2c/socionext,synquacer-i2c.yaml58
-rw-r--r--Documentation/devicetree/bindings/i2c/socionext,uniphier-fi2c.yaml55
-rw-r--r--Documentation/devicetree/bindings/i2c/socionext,uniphier-i2c.yaml55
-rw-r--r--Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml64
-rw-r--r--Documentation/devicetree/bindings/i2c/sprd,sc9860-i2c.yaml65
-rw-r--r--Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml153
-rw-r--r--Documentation/devicetree/bindings/i2c/st,sti-i2c.yaml71
-rw-r--r--Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml193
-rw-r--r--Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml96
-rw-r--r--Documentation/devicetree/bindings/i2c/tsd,mule-i2c-mux.yaml69
-rw-r--r--Documentation/devicetree/bindings/i2c/wm,wm8505-i2c.yaml47
-rw-r--r--Documentation/devicetree/bindings/i2c/xlnx,xps-iic-2.00.a.yaml60
-rw-r--r--Documentation/devicetree/bindings/i3c/adi,i3c-master.yaml72
-rw-r--r--Documentation/devicetree/bindings/i3c/aspeed,ast2600-i3c.yaml72
-rw-r--r--Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml65
-rw-r--r--Documentation/devicetree/bindings/i3c/i3c.yaml196
-rw-r--r--Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml52
-rw-r--r--Documentation/devicetree/bindings/i3c/renesas,i3c.yaml185
-rw-r--r--Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml95
-rw-r--r--Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adis16201.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adis16240.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl313.yaml89
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml90
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml89
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl367.yaml80
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml67
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl380.yaml92
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adxl345.txt39
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adxl372.txt33
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bma180.txt26
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma220.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml119
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bmi088.yaml71
-rw-r--r--Documentation/devicetree/bindings/iio/accel/dmard06.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/accel/fsl,mma7455.yaml82
-rw-r--r--Documentation/devicetree/bindings/iio/accel/fsl,mma8452.yaml65
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml72
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kxsd9.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kxsd9.yaml69
-rw-r--r--Documentation/devicetree/bindings/iio/accel/lis302.txt6
-rw-r--r--Documentation/devicetree/bindings/iio/accel/memsensing,msa311.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/accel/mma8452.txt31
-rw-r--r--Documentation/devicetree/bindings/iio/accel/murata,sca3300.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/accel/nxp,fxls8962af.yaml95
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adc.yaml87
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4000.yaml246
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml110
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4080.yaml96
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml262
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4170-4.yaml554
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml268
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml155
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml127
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml182
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7173.yaml483
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml149
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml257
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7280a.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7291.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml110
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7298.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml317
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7405.yaml60
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml226
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml428
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7625.yaml176
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7768-1.yaml191
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7779.yaml152
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml99
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml81
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7944.yaml213
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7949.yaml103
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad799x.yaml73
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad9467.yaml80
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ade9000.yaml95
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml140
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml177
-rw-r--r--Documentation/devicetree/bindings/iio/adc/allwinner,sun20i-d1-gpadc.yaml96
-rw-r--r--Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml43
-rw-r--r--Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt37
-rw-r--r--Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.yaml153
-rw-r--r--Documentation/devicetree/bindings/iio/adc/aspeed,ast2400-adc.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml93
-rw-r--r--Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/at91_adc.txt83
-rw-r--r--Documentation/devicetree/bindings/iio/adc/atmel,sama5d2-adc.yaml100
-rw-r--r--Documentation/devicetree/bindings/iio/adc/atmel,sama9260-adc.yaml121
-rw-r--r--Documentation/devicetree/bindings/iio/adc/avia-hx711.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml67
-rw-r--r--Documentation/devicetree/bindings/iio/adc/axp20x_adc.txt48
-rw-r--r--Documentation/devicetree/bindings/iio/adc/berlin2_adc.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/adc/brcm,iproc-static-adc.txt40
-rw-r--r--Documentation/devicetree/bindings/iio/adc/brcm,iproc-static-adc.yaml65
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cc10001_adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cirrus,ep9301-adc.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cosmic,10001-adc.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cpcap-adc.txt17
-rw-r--r--Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt16
-rw-r--r--Documentation/devicetree/bindings/iio/adc/dlg,da9150-gpadc.yaml35
-rw-r--r--Documentation/devicetree/bindings/iio/adc/envelope-detector.txt54
-rw-r--r--Documentation/devicetree/bindings/iio/adc/envelope-detector.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/fsl,imx25-gcq.txt57
-rw-r--r--Documentation/devicetree/bindings/iio/adc/fsl,imx25-gcq.yaml131
-rw-r--r--Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml88
-rw-r--r--Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/hi8435.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/adc/holt,hi8435.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml91
-rw-r--r--Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/adc/lltc,ltc2497.yaml60
-rw-r--r--Documentation/devicetree/bindings/iio/adc/lpc1850-adc.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ltc2497.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/adc/marvell,berlin2-adc.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max1027-adc.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max11100.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max1118.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max1363.txt63
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max9611.txt27
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1027.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max11100.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1118.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max11205.yaml69
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml81
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1241.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1363.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max34408.yaml139
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max9611.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mcp320x.txt57
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mcp3422.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mcp3911.txt30
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mediatek,mt2701-auxadc.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mediatek,mt6359-auxadc.yaml35
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mediatek,mt6360-adc.yaml31
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,mcp3201.yaml79
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,mcp3564.yaml205
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml93
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,pac1921.yaml71
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,pac1934.yaml120
-rw-r--r--Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mt6577_auxadc.txt31
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton,nau7802.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton,nct7201.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton,npcm750-adc.yaml67
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton-nau7802.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml85
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,lpc3220-adc.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/adc/palmas-gpadc.txt48
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,pm8018-adc.yaml166
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,pm8xxx-xoadc.txt157
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.txt46
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml66
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt168
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml308
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,gyroadc.txt98
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml141
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml170
-rw-r--r--Documentation/devicetree/bindings/iio/adc/richtek,rtq6056.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt37
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml91
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79104.yaml76
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79112.yaml104
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79124.yaml114
-rw-r--r--Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.txt103
-rw-r--r--Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml167
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sophgo,cv1800b-saradc.yaml83
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml103
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sprd,sc27xx-adc.txt40
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,spear600-adc.yaml69
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt140
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml614
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt135
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml429
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stmpe-adc.yaml42
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc081c.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc0832.yaml57
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc084s021.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc108s102.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc12138.yaml87
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc128s052.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc161s626.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml131
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1100.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1119.yaml155
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads124s08.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1298.yaml79
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads131e08.yaml182
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads7924.yaml110
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads7950.yaml65
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads8344.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads8688.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,am3359-adc.yaml75
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,lmp92064.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,tlc4541.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,tsc2046.yaml119
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,twl4030-madc.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,twl6030-gpadc.yaml43
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc0832.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc084s021.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc108s102.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc12138.txt37
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc161s626.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-ads7950.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-ads8688.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/adc/twl4030-madc.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/vf610-adc.txt36
-rw-r--r--Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/xilinx-xadc.txt49
-rw-r--r--Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml236
-rw-r--r--Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml371
-rw-r--r--Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml172
-rw-r--r--Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt26
-rw-r--r--Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt41
-rw-r--r--Documentation/devicetree/bindings/iio/afe/current-sense-shunt.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/afe/temperature-sense-rtd.yaml101
-rw-r--r--Documentation/devicetree/bindings/iio/afe/temperature-transducer.yaml114
-rw-r--r--Documentation/devicetree/bindings/iio/afe/voltage-divider.txt53
-rw-r--r--Documentation/devicetree/bindings/iio/afe/voltage-divider.yaml97
-rw-r--r--Documentation/devicetree/bindings/iio/amplifiers/adi,ada4250.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml101
-rw-r--r--Documentation/devicetree/bindings/iio/cdc/adi,ad7150.yaml69
-rw-r--r--Documentation/devicetree/bindings/iio/cdc/adi,ad7746.yaml77
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/ams,ccs811.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/aosong,ags02ma.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/atlas,ec-sm.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/atlas,orp-sm.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/atlas,ph-sm.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/atlas,sensor.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/bosch,bme680.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/sciosense,ens160.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/senseair,sunrise.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/sensirion,scd30.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/sensirion,scd4x.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/sensirion,sps30.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/winsen,mhz19b.yaml33
-rw-r--r--Documentation/devicetree/bindings/iio/common.yaml37
-rw-r--r--Documentation/devicetree/bindings/iio/counter/stm32-lptimer-cnt.txt27
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ad5592r.txt155
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ad5755.txt124
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ad5758.txt83
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ad7303.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad3530r.yaml100
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml248
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5064.yaml267
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml72
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5421.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5449.yaml96
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5504.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5592r.yaml204
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5624r.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5686.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5696.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5755.yaml169
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml140
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5761.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5764.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml65
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml196
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5791.yaml97
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad7303.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad8460.yaml164
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad8801.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad9739a.yaml95
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,axi-dac.yaml125
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ltc2664.yaml181
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ltc2672.yaml160
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ltc2688.yaml147
-rw-r--r--Documentation/devicetree/bindings/iio/dac/dac.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/dac/dpot-dac.txt41
-rw-r--r--Documentation/devicetree/bindings/iio/dac/dpot-dac.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ds4424.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/dac/fsl,vf610-dac.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/dac/lltc,ltc1660.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/dac/lltc,ltc2632.yaml77
-rw-r--r--Documentation/devicetree/bindings/iio/dac/lpc1850-dac.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ltc1660.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ltc2632.txt37
-rw-r--r--Documentation/devicetree/bindings/iio/dac/max5821.txt14
-rw-r--r--Documentation/devicetree/bindings/iio/dac/maxim,ds4424.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/dac/maxim,max5522.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/dac/maxim,max5821.yaml44
-rw-r--r--Documentation/devicetree/bindings/iio/dac/mcp4725.txt35
-rw-r--r--Documentation/devicetree/bindings/iio/dac/microchip,mcp4725.yaml90
-rw-r--r--Documentation/devicetree/bindings/iio/dac/microchip,mcp4728.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/dac/microchip,mcp4821.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/dac/microchip,mcp4922.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/dac/rohm,bd79703.yaml81
-rw-r--r--Documentation/devicetree/bindings/iio/dac/st,stm32-dac.txt63
-rw-r--r--Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml114
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac082s085.yaml73
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac5571.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac5571.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac7311.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac7512.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac7512.yaml42
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac7612.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti-dac082s085.txt34
-rw-r--r--Documentation/devicetree/bindings/iio/dac/vf610-dac.txt20
-rw-r--r--Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adf4350.txt86
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adf4371.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml199
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml102
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admfm2000.yaml127
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml154
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml145
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml134
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml57
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,itg3200.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.txt45
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml98
-rw-r--r--Documentation/devicetree/bindings/iio/health/afe4403.txt33
-rw-r--r--Documentation/devicetree/bindings/iio/health/afe4404.txt29
-rw-r--r--Documentation/devicetree/bindings/iio/health/max30100.txt28
-rw-r--r--Documentation/devicetree/bindings/iio/health/max30102.txt33
-rw-r--r--Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml76
-rw-r--r--Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/dht11.txt14
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/dht11.yaml41
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/hdc100x.txt17
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/hts221.txt30
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/htu21.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/st,hts221.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/iio-bindings.txt97
-rw-r--r--Documentation/devicetree/bindings/iio/impedance-analyzer/adi,ad5933.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16460.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml165
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16480.yaml144
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml74
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bmi160.txt35
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi160.yaml99
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi270.yaml80
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bno055.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,smi240.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt60
-rw-r--r--Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml110
-rw-r--r--Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml122
-rw-r--r--Documentation/devicetree/bindings/iio/imu/nxp,fxos8700.yaml79
-rw-r--r--Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml125
-rw-r--r--Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt38
-rw-r--r--Documentation/devicetree/bindings/iio/light/adux1020.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/light/ams,as73211.yaml57
-rw-r--r--Documentation/devicetree/bindings/iio/light/amstaos,tsl2563.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/light/amstaos,tsl2591.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/light/apds9300.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/light/apds9960.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/light/avago,apds9300.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/light/bh1750.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/light/bh1750.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/light/capella,cm3605.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/light/capella,cm36651.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/light/cm3605.txt41
-rw-r--r--Documentation/devicetree/bindings/iio/light/cm36651.txt26
-rw-r--r--Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/light/dynaimage,al3320a.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/light/gp2ap020a00f.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/light/isl29018.txt27
-rw-r--r--Documentation/devicetree/bindings/iio/light/isl29018.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/light/isl29501.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/light/liteon,ltr390.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml58
-rw-r--r--Documentation/devicetree/bindings/iio/light/liteon,ltrf216a.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/maxim,max44009.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/light/noa1305.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/light/opt3001.txt25
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bh1745.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bu27034anuc.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml87
-rw-r--r--Documentation/devicetree/bindings/iio/light/sharp,gp2ap020a00f.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/light/st,uvis25.yaml42
-rw-r--r--Documentation/devicetree/bindings/iio/light/st,vl6180.yaml44
-rw-r--r--Documentation/devicetree/bindings/iio/light/stk33xx.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/light/ti,opt3001.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/light/ti,opt4001.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/light/ti,opt4060.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2563.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2583.txt25
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2583.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2772.txt42
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2772.yaml84
-rw-r--r--Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/light/us5182d.txt45
-rw-r--r--Documentation/devicetree/bindings/iio/light/uvis25.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,vcnl4000.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,vcnl4035.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6030.yaml107
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6046x00.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6075.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/light/vl6180.txt15
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/ak8974.txt29
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt30
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/allegromicro,als31300.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8974.yaml57
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml96
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/bmc150_magn.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/bosch,bmc150_magn.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/fsl,mag3110.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/hmc5843.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/honeywell,hmc5843.yaml43
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/infineon,tlv493d-a1b6.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/mmc35240.txt13
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/pni,rm3100.yaml42
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/ti,tmag5273.yaml75
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/voltafield,af8133j.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/yamaha,yas530.yaml112
-rw-r--r--Documentation/devicetree/bindings/iio/mount-matrix.txt203
-rw-r--r--Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt39
-rw-r--r--Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml79
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/ad5272.txt27
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/adi,ad5272.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/ds1803.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/max5432.yaml44
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/max5481.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/mcp4131.txt84
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/microchip,mcp41010.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/microchip,mcp4131.yaml104
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/microchip,mcp4531.yaml116
-rw-r--r--Documentation/devicetree/bindings/iio/potentiometer/renesas,x9250.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/potentiostat/lmp91000.txt33
-rw-r--r--Documentation/devicetree/bindings/iio/potentiostat/ti,lmp91000.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/asc,dlhl60d.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/bmp085.txt27
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/bmp085.yaml123
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/honeywell,hsc030pa.yaml145
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/honeywell,mprls0025pa.yaml158
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/hoperf,hp03.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/hp03.txt17
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/invensense,icp10100.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/ms5611.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/ms5637.txt17
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml65
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/rohm,bm1390.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/sensirion,sdp500.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/zpa2326.txt29
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml74
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/as3935.txt34
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/awinic,aw96103.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/devantech-srf04.txt28
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/devantech-srf04.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml35
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml60
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/murata,irsd200.yaml60
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/nicera,d3323aa.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/parallax-ping.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml131
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/semtech,sx9324.yaml203
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/semtech,sx9360.yaml98
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/semtech,sx9500.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/st,vl53l0x.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/sx9500.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/tyhx,hx9023s.yaml93
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/vishay,vcnl3020.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/vl53l0x.txt12
-rw-r--r--Documentation/devicetree/bindings/iio/resolver/adi,ad2s1210.yaml177
-rw-r--r--Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/samsung,sensorhub-rinato.yaml73
-rw-r--r--Documentation/devicetree/bindings/iio/sensorhub.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/st,st-sensors.yaml202
-rw-r--r--Documentation/devicetree/bindings/iio/st-sensors.txt77
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml560
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/maxim,max31855k.yaml76
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/maxim,max31856.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/maxim_thermocouple.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/melexis,mlx90614.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/melexis,mlx90632.yaml74
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/microchip,mcp9600.yaml120
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/mlx90614.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/mlx90632.txt28
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/ti,tmp006.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/ti,tmp007.yaml57
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/tmp007.txt33
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/tsys01.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/timer/stm32-lptimer-trigger.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/timer/stm32-timer-trigger.txt25
-rw-r--r--Documentation/devicetree/bindings/incomplete-devices.yaml285
-rw-r--r--Documentation/devicetree/bindings/index.rst10
-rw-r--r--Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt2
-rw-r--r--Documentation/devicetree/bindings/input/adafruit,seesaw-gamepad.yaml63
-rw-r--r--Documentation/devicetree/bindings/input/adc-joystick.yaml127
-rw-r--r--Documentation/devicetree/bindings/input/adc-keys.txt49
-rw-r--r--Documentation/devicetree/bindings/input/adc-keys.yaml103
-rw-r--r--Documentation/devicetree/bindings/input/adi,adp5588.yaml139
-rw-r--r--Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml120
-rw-r--r--Documentation/devicetree/bindings/input/ariel-pwrbutton.yaml58
-rw-r--r--Documentation/devicetree/bindings/input/atmel,captouch.txt36
-rw-r--r--Documentation/devicetree/bindings/input/atmel,captouch.yaml59
-rw-r--r--Documentation/devicetree/bindings/input/atmel,maxtouch.txt41
-rw-r--r--Documentation/devicetree/bindings/input/atmel,maxtouch.yaml121
-rw-r--r--Documentation/devicetree/bindings/input/awinic,aw86927.yaml48
-rw-r--r--Documentation/devicetree/bindings/input/azoteq,iqs7222.yaml1148
-rw-r--r--Documentation/devicetree/bindings/input/cap11xx.txt78
-rw-r--r--Documentation/devicetree/bindings/input/cirrus,cs40l50.yaml68
-rw-r--r--Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml87
-rw-r--r--Documentation/devicetree/bindings/input/cros-ec-keyb.txt72
-rw-r--r--Documentation/devicetree/bindings/input/cypress,cyapa.txt42
-rw-r--r--Documentation/devicetree/bindings/input/cypress,cyapa.yaml49
-rw-r--r--Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt25
-rw-r--r--Documentation/devicetree/bindings/input/cypress,tm2-touchkey.yaml73
-rw-r--r--Documentation/devicetree/bindings/input/cypress-sf.yaml61
-rw-r--r--Documentation/devicetree/bindings/input/da9062-onkey.txt47
-rw-r--r--Documentation/devicetree/bindings/input/dlg,da7280.yaml248
-rw-r--r--Documentation/devicetree/bindings/input/dlg,da9062-onkey.yaml38
-rw-r--r--Documentation/devicetree/bindings/input/elan,ekth3000.yaml81
-rw-r--r--Documentation/devicetree/bindings/input/elan,ekth6915.yaml84
-rw-r--r--Documentation/devicetree/bindings/input/elan_i2c.txt33
-rw-r--r--Documentation/devicetree/bindings/input/elants_i2c.txt34
-rw-r--r--Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml90
-rw-r--r--Documentation/devicetree/bindings/input/fsl,scu-key.yaml42
-rw-r--r--Documentation/devicetree/bindings/input/fsl-mma8450.txt12
-rw-r--r--Documentation/devicetree/bindings/input/goodix,gt7375p.yaml89
-rw-r--r--Documentation/devicetree/bindings/input/goodix,gt7986u-spifw.yaml69
-rw-r--r--Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml140
-rw-r--r--Documentation/devicetree/bindings/input/gpio-beeper.txt13
-rw-r--r--Documentation/devicetree/bindings/input/gpio-beeper.yaml33
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys-polled.txt45
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.txt58
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.yaml181
-rw-r--r--Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt49
-rw-r--r--Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml103
-rw-r--r--Documentation/devicetree/bindings/input/gpio-mouse.txt32
-rw-r--r--Documentation/devicetree/bindings/input/gpio-mouse.yaml68
-rw-r--r--Documentation/devicetree/bindings/input/gpio-vibrator.yaml39
-rw-r--r--Documentation/devicetree/bindings/input/hid-over-i2c.txt44
-rw-r--r--Documentation/devicetree/bindings/input/hid-over-i2c.yaml85
-rw-r--r--Documentation/devicetree/bindings/input/ibm,op-panel.yaml50
-rw-r--r--Documentation/devicetree/bindings/input/ilitek,ili2901.yaml66
-rw-r--r--Documentation/devicetree/bindings/input/ilitek,ili9882t.yaml67
-rw-r--r--Documentation/devicetree/bindings/input/imx-keypad.txt53
-rw-r--r--Documentation/devicetree/bindings/input/imx-keypad.yaml85
-rw-r--r--Documentation/devicetree/bindings/input/input-reset.txt2
-rw-r--r--Documentation/devicetree/bindings/input/input.yaml64
-rw-r--r--Documentation/devicetree/bindings/input/iqs269a.yaml648
-rw-r--r--Documentation/devicetree/bindings/input/iqs626a.yaml878
-rw-r--r--Documentation/devicetree/bindings/input/iqs62x-keys.yaml132
-rw-r--r--Documentation/devicetree/bindings/input/keys.txt8
-rw-r--r--Documentation/devicetree/bindings/input/lpc32xx-key.txt31
-rw-r--r--Documentation/devicetree/bindings/input/matrix-keymap.txt28
-rw-r--r--Documentation/devicetree/bindings/input/matrix-keymap.yaml48
-rw-r--r--Documentation/devicetree/bindings/input/max77650-onkey.yaml38
-rw-r--r--Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml86
-rw-r--r--Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml95
-rw-r--r--Documentation/devicetree/bindings/input/microchip,cap11xx.yaml226
-rw-r--r--Documentation/devicetree/bindings/input/microchip,qt1050.txt78
-rw-r--r--Documentation/devicetree/bindings/input/mpr121-touchkey.txt30
-rw-r--r--Documentation/devicetree/bindings/input/mtk-pmic-keys.txt43
-rw-r--r--Documentation/devicetree/bindings/input/nxp,lpc3220-key.yaml61
-rw-r--r--Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml66
-rw-r--r--Documentation/devicetree/bindings/input/pwm-beeper.txt24
-rw-r--r--Documentation/devicetree/bindings/input/pwm-beeper.yaml41
-rw-r--r--Documentation/devicetree/bindings/input/pwm-vibrator.txt66
-rw-r--r--Documentation/devicetree/bindings/input/pwm-vibrator.yaml59
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml89
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml75
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt53
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml65
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt90
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-pwrkey.txt46
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt23
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.yaml47
-rw-r--r--Documentation/devicetree/bindings/input/regulator-haptic.txt21
-rw-r--r--Documentation/devicetree/bindings/input/regulator-haptic.yaml43
-rw-r--r--Documentation/devicetree/bindings/input/rmi4/rmi_2d_sensor.txt56
-rw-r--r--Documentation/devicetree/bindings/input/rmi4/rmi_f01.txt39
-rw-r--r--Documentation/devicetree/bindings/input/rmi4/rmi_i2c.txt61
-rw-r--r--Documentation/devicetree/bindings/input/rmi4/rmi_spi.txt56
-rw-r--r--Documentation/devicetree/bindings/input/rotary-encoder.txt50
-rw-r--r--Documentation/devicetree/bindings/input/rotary-encoder.yaml90
-rw-r--r--Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml121
-rw-r--r--Documentation/devicetree/bindings/input/samsung-keypad.txt77
-rw-r--r--Documentation/devicetree/bindings/input/snvs-pwrkey.txt1
-rw-r--r--Documentation/devicetree/bindings/input/sprd,sc27xx-vibra.txt23
-rw-r--r--Documentation/devicetree/bindings/input/sprd,sc27xx-vibrator.yaml31
-rw-r--r--Documentation/devicetree/bindings/input/stmpe-keypad.txt41
-rw-r--r--Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt62
-rw-r--r--Documentation/devicetree/bindings/input/syna,rmi4.yaml293
-rw-r--r--Documentation/devicetree/bindings/input/tca8418_keypad.txt10
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv260x.txt50
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv260x.yaml109
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv2665.txt17
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv2667.txt17
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv266x.yaml50
-rw-r--r--Documentation/devicetree/bindings/input/ti,nspire-keypad.txt60
-rw-r--r--Documentation/devicetree/bindings/input/ti,nspire-keypad.yaml74
-rw-r--r--Documentation/devicetree/bindings/input/ti,tca8418.yaml61
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ad7879.txt71
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/adi,ad7879.yaml150
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ads7846.txt93
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml70
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/auo_pixcir_ts.txt6
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs5xx.yaml75
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml769
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/bu21013.txt28
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml62
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt44
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt34
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml72
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml148
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml111
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/cyttsp.txt93
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt63
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml133
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/eeti,exc3000.yaml94
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/eeti.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt18
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ektf2127.txt25
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/elan,ektf2127.yaml58
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/elan,elants_i2c.yaml74
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/exc3000.txt26
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/fsl,imx6ul-tsc.yaml111
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml96
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix.txt43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix.yaml81
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml64
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/himax,hx852es.yaml81
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml119
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/hynitron,cst816x.yaml65
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/hynitron,cstxxx.yaml65
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ilitek_ts_i2c.yaml76
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml109
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt38
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt16
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/max11801-ts.txt17
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/maxim,max11801.yaml46
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml96
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/mms114.txt41
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml75
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/neonode,zforce.yaml72
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml62
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml68
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt31
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/resistive-adc-touch.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/resistive-adc-touch.yaml86
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/rohm,bu21013.yaml95
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.txt32
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/samsung,s6sy761.yaml54
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/semtech,sx8654.yaml52
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml91
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt44
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml79
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sitronix-st1232.txt24
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/st,stmfts.txt41
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/st,stmfts.yaml72
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/stmpe.txt43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sx8654.txt15
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml183
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,am3359-tsc.yaml76
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml128
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,tsc2007.yaml77
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti-tsc-adc.txt83
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/toradex,vf50-touchscreen.yaml77
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt39
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml219
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt64
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt39
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zeitec,zet6223.yaml62
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zet6223.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt34
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml125
-rw-r--r--Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt21
-rw-r--r--Documentation/devicetree/bindings/input/zii,rave-sp-pwrbutton.txt22
-rw-r--r--Documentation/devicetree/bindings/input/zii,rave-sp-pwrbutton.yaml36
-rw-r--r--Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml104
-rw-r--r--Documentation/devicetree/bindings/interconnect/interconnect.txt86
-rw-r--r--Documentation/devicetree/bindings/interconnect/mediatek,cci.yaml147
-rw-r--r--Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml51
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,bcm-voter.yaml65
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,glymur-rpmh.yaml172
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,milos-rpmh.yaml136
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml77
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8953.yaml100
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8974.yaml62
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8996.yaml126
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml144
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml89
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml93
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml73
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml72
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qdu1000-rpmh.yaml70
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpm-common.yaml28
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml50
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpmh-common.yaml43
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml145
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sa8775p-rpmh.yaml50
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml117
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml124
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml49
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml108
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml92
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm6115.yaml152
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm6350-rpmh.yaml82
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm7150-rpmh.yaml84
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml124
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8550-rpmh.yaml139
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8650-rpmh.yaml136
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml136
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,x1e80100-rpmh.yaml83
-rw-r--r--Documentation/devicetree/bindings/interconnect/samsung,exynos-bus.yaml317
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt37
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml65
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.yaml49
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun6i-a31-r-intc.yaml67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml65
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt29
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/altr,msi-controller.yaml65
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml91
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt19
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/andestech,plicsw.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apm,xgene1-msi.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml133
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml144
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt173
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml314
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml267
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt171
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml244
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,nvic.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,nvic.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,vic.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,vic.yaml81
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2700-intc.yaml86
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt40
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.yaml90
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2712-msix.yaml60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt131
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.yaml162
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt37
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.yaml51
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm3380-l2-intc.txt39
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt55
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.yaml81
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.yaml91
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt88
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.yaml152
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.yaml72
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/chrp,open-pic.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cirrus,ep7209-intc.yaml71
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cnxt,cx92755-ic.yaml47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,apb-intc.txt62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,apb-intc.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt40
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt21
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt17
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.yaml34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml51
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,icoll.yaml45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,imx8qxp-dc-intc.yaml318
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml72
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml114
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-extirq.yaml137
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-msi.yaml79
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,mpic-msi.yaml161
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,mu-msi.yaml95
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,tzic.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,vf610-mscm-ir.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.yaml47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt84
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.yaml76
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/idt,32434-pic.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt82
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt105
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.yaml79
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml66
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/intel,ce4100-ioapic.txt26
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/intel,ce4100-ioapic.yaml60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/intel,ce4100-lapic.yaml71
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml56
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/interrupts.txt20
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt26
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/jcore,aic.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,eiointc.yaml59
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,htpic.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,htvec.yaml57
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml125
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,ls1x-intc.yaml51
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,pch-msi.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,pch-pic.yaml57
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,ap806-gicp.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,ap806-sei.yaml58
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,cp110-icu.yaml101
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,gicp.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt112
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,mpic.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt42
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,orion-bridge-intc.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,sei.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt33
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,mt6577-sysirq.yaml86
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml68
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,pic32mzda-evic.yaml60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml73
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.txt60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml132
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.txt21
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.yaml64
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/msi-controller.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/msi.txt2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mstar,mst-intc.yaml64
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mti,cpu-interrupt-controller.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml146
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml39
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra20-ictlr.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra20-ictlr.yaml82
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt58
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.yaml68
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/open-pic.txt97
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.yaml38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.yaml45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ar7100-cpu-intc.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ar7100-misc-intc.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ath79-cpu-intc.txt44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml118
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt72
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml106
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ralink,rt2880-intc.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/rda,8810pl-intc.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml105
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,h8300h-intc.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,h8s-intc.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.txt62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.yaml107
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml94
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.yaml80
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml282
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rzv2h-icu.yaml280
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,aplic.yaml180
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.yaml73
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,imsics.yaml172
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,rpmi-mpxy-system-msi.yaml67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,rpmi-system-msi.yaml74
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.txt50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml96
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/samsung,s3c24xx-irq.txt53
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.txt58
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml179
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,arc700-intc.txt24
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,arc700-intc.yaml42
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-intc.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-intc.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt31
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.yaml64
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/socionext,synquacer-exiu.txt31
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/socionext,synquacer-exiu.yaml53
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt32
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,spear300-shirq.yaml67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt35
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml65
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml116
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sunplus,sp7021-intc.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800-irqc.yaml49
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt14
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml104
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,c64x+megamod-pic.txt103
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,cp-intc.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,cp-intc.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap2-intc.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu31
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu.yaml55
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml163
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml119
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml107
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/via,vt8500-intc.txt16
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/via,vt8500-intc.yaml76
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/xlnx,intc.yaml82
-rw-r--r--Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml66
-rw-r--r--Documentation/devicetree/bindings/iommu/apple,dart.yaml92
-rw-r--r--Documentation/devicetree/bindings/iommu/apple,sart.yaml59
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt77
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml95
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.txt139
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml710
-rw-r--r--Documentation/devicetree/bindings/iommu/iommu.txt24
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.txt77
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml234
-rw-r--r--Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt64
-rw-r--r--Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt14
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml78
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,iommu.txt121
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,iommu.yaml128
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,tbu.yaml69
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt70
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml141
-rw-r--r--Documentation/devicetree/bindings/iommu/riscv,iommu.yaml147
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.txt38
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml89
-rw-r--r--Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt67
-rw-r--r--Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml99
-rw-r--r--Documentation/devicetree/bindings/iommu/sprd,iommu.yaml57
-rw-r--r--Documentation/devicetree/bindings/iommu/xen,grant-dma.yaml39
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt3
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml109
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt25
-rw-r--r--Documentation/devicetree/bindings/ipmi/ipmb-dev.yaml56
-rw-r--r--Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml67
-rw-r--r--Documentation/devicetree/bindings/ipmi/ipmi-smic.txt25
-rw-r--r--Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml63
-rw-r--r--Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt7
-rw-r--r--Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml38
-rw-r--r--Documentation/devicetree/bindings/leds/allwinner,sun50i-a100-ledc.yaml137
-rw-r--r--Documentation/devicetree/bindings/leds/ams,as3645a.txt79
-rw-r--r--Documentation/devicetree/bindings/leds/ams,as3645a.yaml130
-rw-r--r--Documentation/devicetree/bindings/leds/awinic,aw200xx.yaml173
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/apple,dwi-bl.yaml57
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/common.yaml53
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/gpio-backlight.txt16
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml41
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml49
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/kinetic,ktd2801.yaml46
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/kinetic,ktz8866.yaml86
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml44
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml142
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/lp855x-backlight.yaml149
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/lp855x.txt72
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/max8925-backlight.txt10
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/mediatek,mt6370-backlight.yaml121
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml71
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/pm8941-wled.txt42
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/pwm-backlight.txt61
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml91
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml263
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/richtek,rt4831-backlight.yaml67
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/ti,lm3509.yaml136
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/ti,lp8864.yaml80
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/zii,rave-sp-backlight.txt23
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/zii,rave-sp-backlight.yaml36
-rw-r--r--Documentation/devicetree/bindings/leds/common.txt116
-rw-r--r--Documentation/devicetree/bindings/leds/common.yaml317
-rw-r--r--Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml99
-rw-r--r--Documentation/devicetree/bindings/leds/irled/gpio-ir-tx.txt14
-rw-r--r--Documentation/devicetree/bindings/leds/irled/gpio-ir-tx.yaml36
-rw-r--r--Documentation/devicetree/bindings/leds/irled/ir-spi-led.yaml61
-rw-r--r--Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.txt13
-rw-r--r--Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml37
-rw-r--r--Documentation/devicetree/bindings/leds/irled/spi-ir-led.txt29
-rw-r--r--Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml193
-rw-r--r--Documentation/devicetree/bindings/leds/kinetic,ktd202x.yaml171
-rw-r--r--Documentation/devicetree/bindings/leds/kinetic,ktd2692.yaml87
-rw-r--r--Documentation/devicetree/bindings/leds/leds-aat1290.txt73
-rw-r--r--Documentation/devicetree/bindings/leds/leds-an30259a.txt43
-rw-r--r--Documentation/devicetree/bindings/leds/leds-aw2013.yaml104
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm63138.yaml102
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6328.txt319
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6328.yaml400
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6358.txt4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-class-multicolor.yaml41
-rw-r--r--Documentation/devicetree/bindings/leds/leds-consumer.yaml67
-rw-r--r--Documentation/devicetree/bindings/leds/leds-cr0014114.txt26
-rw-r--r--Documentation/devicetree/bindings/leds/leds-el15203000.txt69
-rw-r--r--Documentation/devicetree/bindings/leds/leds-gpio.txt66
-rw-r--r--Documentation/devicetree/bindings/leds/leds-gpio.yaml85
-rw-r--r--Documentation/devicetree/bindings/leds/leds-group-multicolor.yaml61
-rw-r--r--Documentation/devicetree/bindings/leds/leds-is31fl319x.txt59
-rw-r--r--Documentation/devicetree/bindings/leds/leds-ktd2692.txt50
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lgm.yaml135
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3532.txt105
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3601x.txt14
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm36274.txt85
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3692x.txt52
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3697.txt73
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp50xx.yaml164
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp55xx.txt228
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp55xx.yaml292
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp8860.txt45
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lt3593.txt11
-rw-r--r--Documentation/devicetree/bindings/leds/leds-max77650.yaml49
-rw-r--r--Documentation/devicetree/bindings/leds/leds-mt6323.txt60
-rw-r--r--Documentation/devicetree/bindings/leds/leds-mt6360.yaml191
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pca9532.txt49
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pca955x.txt88
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pm8058.txt67
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml86
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm.txt50
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm.yaml71
-rw-r--r--Documentation/devicetree/bindings/leds/leds-qcom-lpg.yaml302
-rw-r--r--Documentation/devicetree/bindings/leds/leds-rt4505.yaml58
-rw-r--r--Documentation/devicetree/bindings/leds/leds-sc27xx-bltc.txt41
-rw-r--r--Documentation/devicetree/bindings/leds/leds-sgm3140.yaml65
-rw-r--r--Documentation/devicetree/bindings/leds/leds-spi-byte.txt44
-rw-r--r--Documentation/devicetree/bindings/leds/leds-tlc591xx.txt40
-rw-r--r--Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt49
-rw-r--r--Documentation/devicetree/bindings/leds/maxim,max77693.yaml105
-rw-r--r--Documentation/devicetree/bindings/leds/mediatek,mt6370-flashlight.yaml41
-rw-r--r--Documentation/devicetree/bindings/leds/mediatek,mt6370-indicator.yaml80
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca953x.yaml94
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca955x.yaml158
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca963x.yaml140
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca995x.yaml83
-rw-r--r--Documentation/devicetree/bindings/leds/onnn,ncp5623.yaml98
-rw-r--r--Documentation/devicetree/bindings/leds/panasonic,an30259a.yaml84
-rw-r--r--Documentation/devicetree/bindings/leds/pca963x.txt52
-rw-r--r--Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml57
-rw-r--r--Documentation/devicetree/bindings/leds/qcom,spmi-flash-led.yaml122
-rw-r--r--Documentation/devicetree/bindings/leds/register-bit-led.txt94
-rw-r--r--Documentation/devicetree/bindings/leds/register-bit-led.yaml95
-rw-r--r--Documentation/devicetree/bindings/leds/regulator-led.yaml55
-rw-r--r--Documentation/devicetree/bindings/leds/richtek,rt8515.yaml111
-rw-r--r--Documentation/devicetree/bindings/leds/rohm,bd2606mvv.yaml81
-rw-r--r--Documentation/devicetree/bindings/leds/rohm,bd71828-leds.yaml42
-rw-r--r--Documentation/devicetree/bindings/leds/silergy,sy7802.yaml100
-rw-r--r--Documentation/devicetree/bindings/leds/skyworks,aat1290.yaml95
-rw-r--r--Documentation/devicetree/bindings/leds/sprd,sc2731-bltc.yaml53
-rw-r--r--Documentation/devicetree/bindings/leds/st,led1202.yaml132
-rw-r--r--Documentation/devicetree/bindings/leds/tca6507.txt49
-rw-r--r--Documentation/devicetree/bindings/leds/ti,lp8860.yaml90
-rw-r--r--Documentation/devicetree/bindings/leds/ti,tca6507.yaml135
-rw-r--r--Documentation/devicetree/bindings/leds/ti,tlc59116.yaml90
-rw-r--r--Documentation/devicetree/bindings/leds/ti,tps61310.yaml120
-rw-r--r--Documentation/devicetree/bindings/leds/ti.lm36922.yaml110
-rw-r--r--Documentation/devicetree/bindings/leds/trigger-source.yaml26
-rw-r--r--Documentation/devicetree/bindings/loongarch/cpus.yaml61
-rw-r--r--Documentation/devicetree/bindings/loongarch/loongson.yaml34
-rw-r--r--Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt52
-rw-r--r--Documentation/devicetree/bindings/lpddr2/lpddr2.txt102
-rw-r--r--Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml80
-rw-r--r--Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml53
-rw-r--r--Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml96
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhu.yaml169
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml208
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhuv3.yaml224
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm-mhu.txt43
-rw-r--r--Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml68
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,bcm2835-mbox.txt26
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,bcm2835-mbox.yaml40
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml64
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt59
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.yaml63
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt25
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.yaml66
-rw-r--r--Documentation/devicetree/bindings/mailbox/cix,sky1-mbox.yaml77
-rw-r--r--Documentation/devicetree/bindings/mailbox/fsl,mu.txt54
-rw-r--r--Documentation/devicetree/bindings/mailbox/fsl,mu.yaml197
-rw-r--r--Documentation/devicetree/bindings/mailbox/google,gs101-mbox.yaml69
-rw-r--r--Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.yaml42
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,gce-mailbox.yaml83
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,gce-props.yaml52
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,mt8196-gpueb-mbox.yaml64
-rw-r--r--Documentation/devicetree/bindings/mailbox/meson-mhu.txt34
-rw-r--r--Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml57
-rw-r--r--Documentation/devicetree/bindings/mailbox/microchip,sbi-ipc.yaml123
-rw-r--r--Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml57
-rw-r--r--Documentation/devicetree/bindings/mailbox/mtk-gce.txt57
-rw-r--r--Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.txt52
-rw-r--r--Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml137
-rw-r--r--Documentation/devicetree/bindings/mailbox/omap-mailbox.txt139
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.txt67
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml291
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom,cpucp-mbox.yaml54
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml88
-rw-r--r--Documentation/devicetree/bindings/mailbox/riscv,rpmi-shmem-mbox.yaml124
-rw-r--r--Documentation/devicetree/bindings/mailbox/riscv,sbi-mpxy-mbox.yaml51
-rw-r--r--Documentation/devicetree/bindings/mailbox/rockchip,rk3368-mailbox.yaml56
-rw-r--r--Documentation/devicetree/bindings/mailbox/rockchip-mailbox.txt32
-rw-r--r--Documentation/devicetree/bindings/mailbox/sophgo,cv1800b-mailbox.yaml60
-rw-r--r--Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml69
-rw-r--r--Documentation/devicetree/bindings/mailbox/st,sti-mailbox.yaml53
-rw-r--r--Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml76
-rw-r--r--Documentation/devicetree/bindings/mailbox/sti-mailbox.txt51
-rw-r--r--Documentation/devicetree/bindings/mailbox/stm32-ipcc.txt47
-rw-r--r--Documentation/devicetree/bindings/mailbox/thead,th1520-mbox.yaml89
-rw-r--r--Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml295
-rw-r--r--Documentation/devicetree/bindings/mailbox/ti,secure-proxy.txt50
-rw-r--r--Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml79
-rw-r--r--Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml253
-rw-r--r--Documentation/devicetree/bindings/media/allegro,al5e.yaml105
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml129
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml78
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-video-engine.yaml94
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml68
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml143
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml101
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml137
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml70
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-mipi-csi2.yaml125
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml83
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,axg-ge2d.yaml47
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-isp.yaml88
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-mipi-adapter.yaml111
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-mipi-csi2.yaml127
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,gx-vdec.yaml141
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,meson-ir-tx.yaml59
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,meson6-ir.yaml48
-rw-r--r--Documentation/devicetree/bindings/media/amphion,vpu.yaml179
-rw-r--r--Documentation/devicetree/bindings/media/aspeed,video-engine.yaml70
-rw-r--r--Documentation/devicetree/bindings/media/atmel,isc.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/atmel-isc.txt65
-rw-r--r--Documentation/devicetree/bindings/media/atmel-isi.txt2
-rw-r--r--Documentation/devicetree/bindings/media/brcm,bcm2835-unicam.yaml127
-rw-r--r--Documentation/devicetree/bindings/media/cdns,csi2rx.txt100
-rw-r--r--Documentation/devicetree/bindings/media/cdns,csi2rx.yaml210
-rw-r--r--Documentation/devicetree/bindings/media/cdns,csi2tx.txt3
-rw-r--r--Documentation/devicetree/bindings/media/cec-gpio.txt42
-rw-r--r--Documentation/devicetree/bindings/media/cec.txt8
-rw-r--r--Documentation/devicetree/bindings/media/cec/amlogic,meson-gx-ao-cec.yaml92
-rw-r--r--Documentation/devicetree/bindings/media/cec/cec-common.yaml28
-rw-r--r--Documentation/devicetree/bindings/media/cec/cec-gpio.yaml74
-rw-r--r--Documentation/devicetree/bindings/media/cec/nvidia,tegra114-cec.yaml63
-rw-r--r--Documentation/devicetree/bindings/media/cec/samsung,s5p-cec.yaml66
-rw-r--r--Documentation/devicetree/bindings/media/cec/st,stih-cec.yaml66
-rw-r--r--Documentation/devicetree/bindings/media/cec/st,stm32-cec.yaml57
-rw-r--r--Documentation/devicetree/bindings/media/cedrus.txt54
-rw-r--r--Documentation/devicetree/bindings/media/cnm,wave521c.yaml61
-rw-r--r--Documentation/devicetree/bindings/media/coda.txt31
-rw-r--r--Documentation/devicetree/bindings/media/coda.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/exynos-fimc-lite.txt16
-rw-r--r--Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt16
-rw-r--r--Documentation/devicetree/bindings/media/exynos4-fimc-is.txt50
-rw-r--r--Documentation/devicetree/bindings/media/exynos5-gsc.txt38
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx-capture-subsystem.yaml37
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6-mipi-csi2.yaml143
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6q-vdoa.yaml42
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6ull-pxp.yaml87
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx8qm-isi.yaml117
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx8qxp-isi.yaml106
-rw-r--r--Documentation/devicetree/bindings/media/fsl-pxp.txt26
-rw-r--r--Documentation/devicetree/bindings/media/fsl-vdoa.txt21
-rw-r--r--Documentation/devicetree/bindings/media/gpio-ir-receiver.txt17
-rw-r--r--Documentation/devicetree/bindings/media/gpio-ir-receiver.yaml43
-rw-r--r--Documentation/devicetree/bindings/media/hix5hd2-ir.txt3
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ad5820.txt19
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,ad5820.yaml56
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adp1653.txt (renamed from Documentation/devicetree/bindings/media/i2c/adp1653.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7180.yaml189
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7343.txt (renamed from Documentation/devicetree/bindings/media/i2c/adv7343.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv748x.yaml212
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7604.yaml160
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7180.txt49
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv748x.txt107
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7604.txt88
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ak7375.txt8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/alliedvision,alvium-csi2.yaml81
-rw-r--r--Documentation/devicetree/bindings/media/i2c/aptina,mt9p031.yaml109
-rw-r--r--Documentation/devicetree/bindings/media/i2c/aptina,mt9v032.txt (renamed from Documentation/devicetree/bindings/media/i2c/mt9v032.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/aptina,mt9v111.txt46
-rw-r--r--Documentation/devicetree/bindings/media/i2c/aptina,mt9v111.yaml75
-rw-r--r--Documentation/devicetree/bindings/media/i2c/asahi-kasei,ak7375.yaml54
-rw-r--r--Documentation/devicetree/bindings/media/i2c/chrontel,ch7322.yaml68
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9714.txt9
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9714.yaml47
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml97
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.txt9
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.yaml41
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc0308.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc05a2.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc08a3.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc2145.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml119
-rw-r--r--Documentation/devicetree/bindings/media/i2c/imi,rdacm2x-gmsl.yaml137
-rw-r--r--Documentation/devicetree/bindings/media/i2c/imx274.txt33
-rw-r--r--Documentation/devicetree/bindings/media/i2c/isil,isl79987.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max2175.txt (renamed from Documentation/devicetree/bindings/media/i2c/max2175.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml416
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96712.yaml121
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96714.yaml174
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96717.yaml157
-rw-r--r--Documentation/devicetree/bindings/media/i2c/melexis,mlx90640.txt20
-rw-r--r--Documentation/devicetree/bindings/media/i2c/micron,mt9m111.txt37
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml133
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mt9m111.txt32
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mt9p031.txt40
-rw-r--r--Documentation/devicetree/bindings/media/i2c/nokia,smia.txt68
-rw-r--r--Documentation/devicetree/bindings/media/i2c/nxp,tda1997x.txt (renamed from Documentation/devicetree/bindings/media/i2c/tda1997x.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/onnn,ar0521.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/onnn,mt9m001.txt38
-rw-r--r--Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml123
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov2659.txt38
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov2680.txt46
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov2685.txt41
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov5640.txt92
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov5645.txt54
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov5647.txt35
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov5695.txt41
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov7251.txt52
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov772x.txt40
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,og01a1b.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,og0ve1b.yaml97
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml147
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov02e10.yaml152
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov08x40.yaml120
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2640.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov2640.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2659.txt47
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2685.yaml102
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2735.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml130
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml149
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5642.yaml141
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml75
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml98
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5670.yaml93
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5675.yaml122
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml141
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov6211.yaml96
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov64a40.yaml103
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7670.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov7670.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov772x.yaml134
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7740.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov7740.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8856.yaml131
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8858.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov9282.yaml95
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov9650.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov9650.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/rda,rda5807.yaml67
-rw-r--r--Documentation/devicetree/bindings/media/i2c/samsung,s5k5baf.yaml105
-rw-r--r--Documentation/devicetree/bindings/media/i2c/samsung,s5k6a3.yaml102
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml128
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx219.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx258.yaml135
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml76
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx283.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx290.yaml146
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx296.yaml106
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml82
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx335.yaml100
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml94
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml115
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,st-mipid02.yaml175
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,vd55g1.yaml133
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,vd56g3.yaml139
-rw-r--r--Documentation/devicetree/bindings/media/i2c/techwell,tw9900.yaml137
-rw-r--r--Documentation/devicetree/bindings/media/i2c/thine,thp7312.yaml223
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ds90ub913.yaml133
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ds90ub953.yaml147
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml441
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ths8200.txt (renamed from Documentation/devicetree/bindings/media/i2c/ths8200.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp514x.txt (renamed from Documentation/devicetree/bindings/media/i2c/tvp514x.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp5150.txt157
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp7002.txt (renamed from Documentation/devicetree/bindings/media/i2c/tvp7002.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/toshiba,et8ek8.txt8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/toshiba,tc358743.txt (renamed from Documentation/devicetree/bindings/media/i2c/tc358743.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml180
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tvp5150.txt45
-rw-r--r--Documentation/devicetree/bindings/media/img,e5010-jpeg-enc.yaml75
-rw-r--r--Documentation/devicetree/bindings/media/imx.txt53
-rw-r--r--Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml95
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-fg.yaml65
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-hdr.yaml65
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-rdma.yaml168
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-rsz.yaml82
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-stitch.yaml65
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-tcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-tdshp.yaml65
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-wrot.yaml90
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt7622-cir.yaml55
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt8173-vpu.yaml74
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml161
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml140
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml198
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml179
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml285
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt38
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.yaml82
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml75
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-mdp.txt13
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-vcodec.txt108
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-vpu.txt31
-rw-r--r--Documentation/devicetree/bindings/media/meson-ao-cec.txt28
-rw-r--r--Documentation/devicetree/bindings/media/meson-ir.txt20
-rw-r--r--Documentation/devicetree/bindings/media/microchip,csi2dc.yaml199
-rw-r--r--Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml46
-rw-r--r--Documentation/devicetree/bindings/media/microchip,xisc.yaml129
-rw-r--r--Documentation/devicetree/bindings/media/mtk-cir.txt28
-rw-r--r--Documentation/devicetree/bindings/media/nokia,n900-ir20
-rw-r--r--Documentation/devicetree/bindings/media/nuvoton,npcm-ece.yaml43
-rw-r--r--Documentation/devicetree/bindings/media/nuvoton,npcm-vcd.yaml72
-rw-r--r--Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt62
-rw-r--r--Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml119
-rw-r--r--Documentation/devicetree/bindings/media/nxp,dw100.yaml69
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx-mipi-csi2.yaml233
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml87
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8-isi.yaml179
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8-jpeg.yaml122
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml202
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-vpu.yaml68
-rw-r--r--Documentation/devicetree/bindings/media/qcom,camss.txt229
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8916-camss.yaml253
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml86
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8953-camss.yaml337
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8996-camss.yaml386
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml157
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcm2290-camss.yaml243
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml130
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml336
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sa8775p-camss.yaml361
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml122
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7280-camss.yaml425
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml131
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc8280xp-camss.yaml532
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm660-camss.yaml404
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm660-venus.yaml159
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm670-camss.yaml318
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm845-camss.yaml379
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml122
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm845-venus.yaml131
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8250-camss.yaml492
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8250-venus.yaml135
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8550-camss.yaml597
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8550-iris.yaml199
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8750-iris.yaml186
-rw-r--r--Documentation/devicetree/bindings/media/qcom,venus-common.yaml73
-rw-r--r--Documentation/devicetree/bindings/media/qcom,venus.txt108
-rw-r--r--Documentation/devicetree/bindings/media/qcom,x1e80100-camss.yaml367
-rw-r--r--Documentation/devicetree/bindings/media/raspberrypi,pispbe.yaml63
-rw-r--r--Documentation/devicetree/bindings/media/raspberrypi,rp1-cfe.yaml93
-rw-r--r--Documentation/devicetree/bindings/media/rc.txt118
-rw-r--r--Documentation/devicetree/bindings/media/rc.yaml164
-rw-r--r--Documentation/devicetree/bindings/media/rcar_vin.txt214
-rw-r--r--Documentation/devicetree/bindings/media/renesas,ceu.txt86
-rw-r--r--Documentation/devicetree/bindings/media/renesas,ceu.yaml82
-rw-r--r--Documentation/devicetree/bindings/media/renesas,csi2.yaml170
-rw-r--r--Documentation/devicetree/bindings/media/renesas,drif.txt177
-rw-r--r--Documentation/devicetree/bindings/media/renesas,drif.yaml269
-rw-r--r--Documentation/devicetree/bindings/media/renesas,fcp.txt33
-rw-r--r--Documentation/devicetree/bindings/media/renesas,fcp.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/renesas,fdp1.txt37
-rw-r--r--Documentation/devicetree/bindings/media/renesas,fdp1.yaml69
-rw-r--r--Documentation/devicetree/bindings/media/renesas,imr.yaml67
-rw-r--r--Documentation/devicetree/bindings/media/renesas,isp.yaml246
-rw-r--r--Documentation/devicetree/bindings/media/renesas,jpu.txt25
-rw-r--r--Documentation/devicetree/bindings/media/renesas,jpu.yaml65
-rw-r--r--Documentation/devicetree/bindings/media/renesas,rcar-csi2.txt101
-rw-r--r--Documentation/devicetree/bindings/media/renesas,rzg2l-cru.yaml227
-rw-r--r--Documentation/devicetree/bindings/media/renesas,rzg2l-csi2.yaml182
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vin.yaml361
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vsp1.txt30
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vsp1.yaml134
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,rk3568-vepu.yaml69
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,vdec.yaml157
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-isp1.yaml341
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-rga.txt33
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-rga.yaml86
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-vpu.txt29
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-vpu.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/s5p-cec.txt36
-rw-r--r--Documentation/devicetree/bindings/media/s5p-mfc.txt76
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4210-csis.yaml170
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4210-fimc.yaml152
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml226
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-lite.yaml63
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos5250-gsc.yaml109
-rw-r--r--Documentation/devicetree/bindings/media/samsung,fimc.yaml278
-rw-r--r--Documentation/devicetree/bindings/media/samsung,s5c73m3.yaml165
-rw-r--r--Documentation/devicetree/bindings/media/samsung,s5p-mfc.yaml195
-rw-r--r--Documentation/devicetree/bindings/media/samsung,s5pv210-jpeg.yaml123
-rw-r--r--Documentation/devicetree/bindings/media/samsung-fimc.txt209
-rw-r--r--Documentation/devicetree/bindings/media/samsung-mipi-csis.txt81
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5c73m3.txt97
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5k5baf.txt58
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5k6a3.txt33
-rw-r--r--Documentation/devicetree/bindings/media/sh_mobile_ceu.txt17
-rw-r--r--Documentation/devicetree/bindings/media/silabs,si470x.yaml48
-rw-r--r--Documentation/devicetree/bindings/media/snps,dw-hdmi-rx.yaml132
-rw-r--r--Documentation/devicetree/bindings/media/spi/sony-cxd2880.txt4
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-cec.txt19
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dcmi.txt45
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml122
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dcmipp.yaml130
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml71
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32mp25-csi.yaml124
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32mp25-video-codec.yaml53
-rw-r--r--Documentation/devicetree/bindings/media/starfive,jh7110-camss.yaml180
-rw-r--r--Documentation/devicetree/bindings/media/stih-cec.txt27
-rw-r--r--Documentation/devicetree/bindings/media/sunxi-ir.txt28
-rw-r--r--Documentation/devicetree/bindings/media/tango-ir.txt21
-rw-r--r--Documentation/devicetree/bindings/media/tegra-cec.txt27
-rw-r--r--Documentation/devicetree/bindings/media/ti,cal.yaml177
-rw-r--r--Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml100
-rw-r--r--Documentation/devicetree/bindings/media/ti,vpe.yaml64
-rw-r--r--Documentation/devicetree/bindings/media/ti-cal.txt72
-rw-r--r--Documentation/devicetree/bindings/media/video-interface-devices.yaml414
-rw-r--r--Documentation/devicetree/bindings/media/video-interfaces.txt276
-rw-r--r--Documentation/devicetree/bindings/media/video-interfaces.yaml241
-rw-r--r--Documentation/devicetree/bindings/media/video-mux.txt60
-rw-r--r--Documentation/devicetree/bindings/media/video-mux.yaml106
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/video.txt2
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/xlnx,csi2rxss.yaml209
-rw-r--r--Documentation/devicetree/bindings/media/zx-irdec.txt14
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/arm,pl172.txt127
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/arm,pl172.yaml222
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/arm,pl35x-smc.yaml156
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt35
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/brcm,brcmstb-memc-ddr.yaml70
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.txt27
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml48
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.txt16
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml42
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/canaan,k210-sram.yaml52
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr-channel.yaml146
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr-props.yaml74
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml135
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml204
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml157
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml243
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr4.yaml35
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr5.yaml46
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt79
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/exynos-srom.yaml93
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt29
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml106
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ifc.yaml135
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml31
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml201
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt82
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/imx8m-ddrc.yaml73
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml53
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt75
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml46
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml95
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-bus-controller.yaml107
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml80
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml31
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml44
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,mt7621-memc.yaml32
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt42
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml185
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt44
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml144
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mvebu-sdram-controller.txt21
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml50
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.txt374
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml550
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml157
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra186-mc.yaml356
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml249
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt26
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.yaml79
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra210-emc.yaml93
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml355
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt123
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml172
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt157
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml54
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qcom,ebi2-peripheral-props.yaml91
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qcom,ebi2.yaml156
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,dbsc.yaml56
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.txt12
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml149
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml142
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas-memory-controllers.txt44
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml384
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/samsung,exynos4210-srom-peripheral-props.yaml35
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml139
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/samsung,s5pv210-dmc.yaml33
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/snps,dw-umctl2-ddrc.yaml117
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi-props.yaml144
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml128
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32mp25-omm.yaml226
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/starfive,jh7110-dmc.yaml74
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/synopsys.txt15
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml35
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml252
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml190
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti-da8xx-ddrctl.txt20
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti/emif.txt2
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml57
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/xlnx,versal-net-ddrmc5.yaml41
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/xlnx,zynq-ddrc-a05.yaml37
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/xlnx,zynqmp-ocmc-1.0.yaml45
-rw-r--r--Documentation/devicetree/bindings/mfd/ab8500.txt159
-rw-r--r--Documentation/devicetree/bindings/mfd/ac100.txt50
-rw-r--r--Documentation/devicetree/bindings/mfd/act8945a.txt82
-rw-r--r--Documentation/devicetree/bindings/mfd/actions,atc260x.yaml183
-rw-r--r--Documentation/devicetree/bindings/mfd/adi,adp5585.yaml313
-rw-r--r--Documentation/devicetree/bindings/mfd/adi,max77541.yaml68
-rw-r--r--Documentation/devicetree/bindings/mfd/airoha,en7581-gpio-sysctl.yaml90
-rw-r--r--Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml76
-rw-r--r--Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml222
-rw-r--r--Documentation/devicetree/bindings/mfd/allwinner,sun8i-a23-prcm.yaml168
-rw-r--r--Documentation/devicetree/bindings/mfd/ampere,smpro.yaml42
-rw-r--r--Documentation/devicetree/bindings/mfd/ams,as3711.yaml223
-rw-r--r--Documentation/devicetree/bindings/mfd/apple,smc.yaml84
-rw-r--r--Documentation/devicetree/bindings/mfd/arizona.txt101
-rw-r--r--Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml67
-rw-r--r--Documentation/devicetree/bindings/mfd/as3711.txt73
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml166
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-gfx.txt17
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-lpc.txt199
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml202
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-scu.txt24
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,at91sam9260-gpbr.yaml45
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,at91sam9260-matrix.yaml52
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,hlcdc.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,sama5d2-flexcom.yaml98
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-flexcom.txt63
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-gpbr.txt15
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt54
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-matrix.txt24
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-smc.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-tcb.txt56
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-usart.txt85
-rw-r--r--Documentation/devicetree/bindings/mfd/axp20x.txt262
-rw-r--r--Documentation/devicetree/bindings/mfd/bd9571mwv.txt69
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt39
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm59056.yaml76
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm6318-gpio-sysctl.yaml177
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml194
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm6328-gpio-sysctl.yaml162
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm6358-gpio-sysctl.yaml130
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml236
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml246
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,cru.yaml108
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,iproc-cdru.txt16
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,iproc-mhb.txt18
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,misc.yaml60
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,twd.yaml69
-rw-r--r--Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml109
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml365
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,madera.yaml297
-rw-r--r--Documentation/devicetree/bindings/mfd/cros-ec.txt73
-rw-r--r--Documentation/devicetree/bindings/mfd/da9062.txt109
-rw-r--r--Documentation/devicetree/bindings/mfd/da9063.txt111
-rw-r--r--Documentation/devicetree/bindings/mfd/delta,tn48m-cpld.yaml90
-rw-r--r--Documentation/devicetree/bindings/mfd/dlg,da9063.yaml327
-rw-r--r--Documentation/devicetree/bindings/mfd/ene-kb3930.yaml55
-rw-r--r--Documentation/devicetree/bindings/mfd/ene-kb930.yaml65
-rw-r--r--Documentation/devicetree/bindings/mfd/fsl,mc13xxx.yaml288
-rw-r--r--Documentation/devicetree/bindings/mfd/fsl,mcu-mpc8349emitx.yaml53
-rw-r--r--Documentation/devicetree/bindings/mfd/hisilicon,hi6421-spmi-pmic.yaml133
-rw-r--r--Documentation/devicetree/bindings/mfd/iqs62x.yaml170
-rw-r--r--Documentation/devicetree/bindings/mfd/khadas,mcu.yaml44
-rw-r--r--Documentation/devicetree/bindings/mfd/lp3943.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/lp873x.txt67
-rw-r--r--Documentation/devicetree/bindings/mfd/lp87565.txt43
-rw-r--r--Documentation/devicetree/bindings/mfd/madera.txt102
-rw-r--r--Documentation/devicetree/bindings/mfd/marvell,88pm886-a1.yaml80
-rw-r--r--Documentation/devicetree/bindings/mfd/max14577.txt145
-rw-r--r--Documentation/devicetree/bindings/mfd/max77620.txt9
-rw-r--r--Documentation/devicetree/bindings/mfd/max77650.yaml149
-rw-r--r--Documentation/devicetree/bindings/mfd/max77686.txt26
-rw-r--r--Documentation/devicetree/bindings/mfd/max77693.txt193
-rw-r--r--Documentation/devicetree/bindings/mfd/max77802.txt25
-rw-r--r--Documentation/devicetree/bindings/mfd/max8925.txt64
-rw-r--r--Documentation/devicetree/bindings/mfd/max8998.txt117
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max14577.yaml196
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max5970.yaml156
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max7360.yaml191
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77686.yaml132
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77693.yaml191
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77705.yaml158
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77714.yaml68
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77759.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77802.yaml194
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77843.yaml145
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max8925.yaml145
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max8998.yaml324
-rw-r--r--Documentation/devicetree/bindings/mfd/mc13xxx.txt156
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml135
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6360.yaml247
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6370.yaml282
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6397.yaml600
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt8195-scpsys.yaml71
-rw-r--r--Documentation/devicetree/bindings/mfd/mfd.txt15
-rw-r--r--Documentation/devicetree/bindings/mfd/motorola-cpcap.txt22
-rw-r--r--Documentation/devicetree/bindings/mfd/mps,mp2629.yaml64
-rw-r--r--Documentation/devicetree/bindings/mfd/mscc,ocelot.yaml163
-rw-r--r--Documentation/devicetree/bindings/mfd/mt6397.txt83
-rw-r--r--Documentation/devicetree/bindings/mfd/mxs-lradc.txt45
-rw-r--r--Documentation/devicetree/bindings/mfd/mxs-lradc.yaml134
-rw-r--r--Documentation/devicetree/bindings/mfd/netronix,ntxec.yaml65
-rw-r--r--Documentation/devicetree/bindings/mfd/nxp,bbnsm.yaml101
-rw-r--r--Documentation/devicetree/bindings/mfd/nxp,lpc1850-creg.yaml148
-rw-r--r--Documentation/devicetree/bindings/mfd/omap-usb-host.txt8
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml156
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt78
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml357
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,tcsr.txt22
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml77
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-pm8xxx.txt99
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml107
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-rpm.txt279
-rw-r--r--Documentation/devicetree/bindings/mfd/qnap,ts433-mcu.yaml46
-rw-r--r--Documentation/devicetree/bindings/mfd/richtek,rt4831.yaml90
-rw-r--r--Documentation/devicetree/bindings/mfd/richtek,rt5033.yaml138
-rw-r--r--Documentation/devicetree/bindings/mfd/richtek,rt5120.yaml178
-rw-r--r--Documentation/devicetree/bindings/mfd/ricoh,rn5t618.yaml111
-rw-r--r--Documentation/devicetree/bindings/mfd/rk808.txt233
-rw-r--r--Documentation/devicetree/bindings/mfd/rn5t618.txt43
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml223
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml429
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml261
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk816.yaml274
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml393
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml286
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml201
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml212
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt63
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml244
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml228
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd9571mwv.yaml127
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml123
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd96801-pmic.yaml175
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd96802-pmic.yaml101
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt72
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml117
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2dos05.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml91
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml307
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml296
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,sec-core.txt86
-rw-r--r--Documentation/devicetree/bindings/mfd/silergy,sy7636a.yaml83
-rw-r--r--Documentation/devicetree/bindings/mfd/spacemit,p1.yaml86
-rw-r--r--Documentation/devicetree/bindings/mfd/sprd,sc2731.yaml244
-rw-r--r--Documentation/devicetree/bindings/mfd/sprd,sc27xx-pmic.txt40
-rw-r--r--Documentation/devicetree/bindings/mfd/sprd,ums512-glbreg.yaml73
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml177
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml190
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stmfx.yaml122
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stmpe.yaml297
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stpmic1.yaml340
-rw-r--r--Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml522
-rw-r--r--Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml299
-rw-r--r--Documentation/devicetree/bindings/mfd/stm32-lptimer.txt48
-rw-r--r--Documentation/devicetree/bindings/mfd/stm32-timers.txt66
-rw-r--r--Documentation/devicetree/bindings/mfd/stmpe.txt28
-rw-r--r--Documentation/devicetree/bindings/mfd/sun4i-gpadc.txt59
-rw-r--r--Documentation/devicetree/bindings/mfd/sun6i-prcm.txt59
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon-common.yaml71
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.txt32
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.yaml263
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,am3359-tscadc.yaml85
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,bq25703a.yaml117
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp8732.yaml112
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml117
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml88
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml106
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,nspire-misc.yaml51
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps65086.yaml125
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps65910.yaml318
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps6594.yaml196
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,twl.yaml557
-rw-r--r--Documentation/devicetree/bindings/mfd/ti-lmu.txt112
-rw-r--r--Documentation/devicetree/bindings/mfd/tps6105x.txt47
-rw-r--r--Documentation/devicetree/bindings/mfd/tps65086.txt54
-rw-r--r--Documentation/devicetree/bindings/mfd/tps65910.txt205
-rw-r--r--Documentation/devicetree/bindings/mfd/twl-familly.txt46
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-audio.txt46
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-power.txt48
-rw-r--r--Documentation/devicetree/bindings/mfd/twl6040.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/wlf,arizona.yaml281
-rw-r--r--Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml116
-rw-r--r--Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml421
-rw-r--r--Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml63
-rw-r--r--Documentation/devicetree/bindings/mfd/zii,rave-sp.txt39
-rw-r--r--Documentation/devicetree/bindings/mfd/zii,rave-sp.yaml63
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/brcm,bmips.txt8
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/soc.yaml120
-rw-r--r--Documentation/devicetree/bindings/mips/cpu_irq.txt47
-rw-r--r--Documentation/devicetree/bindings/mips/cpus.yaml118
-rw-r--r--Documentation/devicetree/bindings/mips/econet.yaml26
-rw-r--r--Documentation/devicetree/bindings/mips/ingenic/devices.yaml58
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/lantiq,cgu.yaml32
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml32
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/lantiq,ebu.yaml32
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/lantiq,pmu.yaml32
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/rcu-gphy.txt36
-rw-r--r--Documentation/devicetree/bindings/mips/lantiq/rcu.txt20
-rw-r--r--Documentation/devicetree/bindings/mips/loongson/devices.yaml56
-rw-r--r--Documentation/devicetree/bindings/mips/loongson/ls2k-reset.yaml38
-rw-r--r--Documentation/devicetree/bindings/mips/loongson/rs780e-acpi.yaml42
-rw-r--r--Documentation/devicetree/bindings/mips/mobileye.yaml37
-rw-r--r--Documentation/devicetree/bindings/mips/mscc.txt19
-rw-r--r--Documentation/devicetree/bindings/mips/mti,mips-cm.yaml57
-rw-r--r--Documentation/devicetree/bindings/mips/ralink.txt18
-rw-r--r--Documentation/devicetree/bindings/mips/ralink.yaml88
-rw-r--r--Documentation/devicetree/bindings/mips/realtek-rtl.yaml28
-rw-r--r--Documentation/devicetree/bindings/misc/allwinner,syscon.txt20
-rw-r--r--Documentation/devicetree/bindings/misc/aspeed,ast2400-cvic.yaml60
-rw-r--r--Documentation/devicetree/bindings/misc/aspeed,cvic.txt35
-rw-r--r--Documentation/devicetree/bindings/misc/atmel-ssc.txt50
-rw-r--r--Documentation/devicetree/bindings/misc/brcm,kona-smc.txt15
-rw-r--r--Documentation/devicetree/bindings/misc/eeprom-93xx46.txt25
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml26
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt158
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml187
-rw-r--r--Documentation/devicetree/bindings/misc/ge-achc.txt26
-rw-r--r--Documentation/devicetree/bindings/misc/ge-achc.yaml65
-rw-r--r--Documentation/devicetree/bindings/misc/idt,89hpesx.yaml72
-rw-r--r--Documentation/devicetree/bindings/misc/idt_89hpesx.txt44
-rw-r--r--Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml51
-rw-r--r--Documentation/devicetree/bindings/misc/lwn,bk4-spi.yaml54
-rw-r--r--Documentation/devicetree/bindings/misc/lwn-bk4.txt26
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra186-misc.txt12
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra186-misc.yaml44
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt14
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.yaml51
-rw-r--r--Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml56
-rw-r--r--Documentation/devicetree/bindings/misc/pci1de4,1.yaml137
-rw-r--r--Documentation/devicetree/bindings/misc/pvpanic-mmio.txt29
-rw-r--r--Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml149
-rw-r--r--Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml57
-rw-r--r--Documentation/devicetree/bindings/misc/ti,fpc202.yaml94
-rw-r--r--Documentation/devicetree/bindings/misc/ti,j721e-esm.yaml53
-rw-r--r--Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml140
-rw-r--r--Documentation/devicetree/bindings/misc/xlnx,tmr-inject.yaml47
-rw-r--r--Documentation/devicetree/bindings/misc/xlnx,tmr-manager.yaml47
-rw-r--r--Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml111
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-gx-mmc.yaml79
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt35
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml70
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdio.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdio.yaml97
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.txt78
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml336
-rw-r--r--Documentation/devicetree/bindings/mmc/arm,pl18x.yaml251
-rw-r--r--Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml107
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel,hsmci.yaml106
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel,sama5d2-sdhci.yaml93
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel-hsmci.txt73
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.yaml54
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml64
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt21
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.yaml48
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.txt38
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml117
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt35
-rw-r--r--Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml158
-rw-r--r--Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt92
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml107
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-esdhc.txt51
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml216
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-mmc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-mmc.yaml65
-rw-r--r--Documentation/devicetree/bindings/mmc/fujitsu,sdhci-fujitsu.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/hi3798cv200-dw-mshc.txt40
-rw-r--r--Documentation/devicetree/bindings/mmc/hisilicon,hi3798cv200-dw-mshc.yaml97
-rw-r--r--Documentation/devicetree/bindings/mmc/img-dw-mshc.txt28
-rw-r--r--Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml107
-rw-r--r--Documentation/devicetree/bindings/mmc/jz4740.txt39
-rw-r--r--Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/litex,mmc.yaml78
-rw-r--r--Documentation/devicetree/bindings/mmc/loongson,ls2k0500-mmc.yaml112
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml44
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml44
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt172
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml260
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml67
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-card.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-card.yaml100
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-controller-common.yaml365
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-controller.yaml49
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.txt25
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml48
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.txt16
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml43
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt31
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml64
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-slot.yaml48
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-spi-slot.yaml77
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc.txt174
-rw-r--r--Documentation/devicetree/bindings/mmc/mmci.txt72
-rw-r--r--Documentation/devicetree/bindings/mmc/mtk-sd.txt70
-rw-r--r--Documentation/devicetree/bindings/mmc/mtk-sd.yaml403
-rw-r--r--Documentation/devicetree/bindings/mmc/mxs-mmc.txt27
-rw-r--r--Documentation/devicetree/bindings/mmc/mxs-mmc.yaml63
-rw-r--r--Documentation/devicetree/bindings/mmc/npcm,sdhci.yaml45
-rw-r--r--Documentation/devicetree/bindings/mmc/nuvoton,ma35d1-sdhci.yaml87
-rw-r--r--Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt112
-rw-r--r--Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml313
-rw-r--r--Documentation/devicetree/bindings/mmc/orion-sdio.txt16
-rw-r--r--Documentation/devicetree/bindings/mmc/owl-mmc.yaml67
-rw-r--r--Documentation/devicetree/bindings/mmc/pxa-mmc.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.txt52
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.yaml135
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml314
-rw-r--r--Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt49
-rw-r--r--Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml129
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung,exynos-dw-mshc.yaml171
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung,s3c6410-sdhci.yaml81
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung,s3cmci.txt42
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung-sdhci.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-am654.yaml242
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-atmel.txt21
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-cadence.txt80
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-common.yaml32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-dove.txt14
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-fujitsu.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-milbeaut.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt63
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.yaml275
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-of-dwcmshc.txt20
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-omap.txt22
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-pxa.txt50
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml149
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-sirf.txt18
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-sprd.txt41
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci.txt13
-rw-r--r--Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml149
-rw-r--r--Documentation/devicetree/bindings/mmc/socfpga-dw-mshc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/socionext,uniphier-sd.yaml117
-rw-r--r--Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml53
-rw-r--r--Documentation/devicetree/bindings/mmc/sprd,sdhci-r11.yaml112
-rw-r--r--Documentation/devicetree/bindings/mmc/starfive,jh7110-mmc.yaml75
-rw-r--r--Documentation/devicetree/bindings/mmc/sunplus,mmc.yaml61
-rw-r--r--Documentation/devicetree/bindings/mmc/sunxi-mmc.txt52
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc-common.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt141
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml105
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt8
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap.txt28
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt118
-rw-r--r--Documentation/devicetree/bindings/mmc/uniphier-sd.txt55
-rw-r--r--Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/zx-dw-mshc.txt31
-rw-r--r--Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml111
-rw-r--r--Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml125
-rw-r--r--Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml58
-rw-r--r--Documentation/devicetree/bindings/mtd/arm,pl353-nand-r2p1.yaml52
-rw-r--r--Documentation/devicetree/bindings/mtd/arm-versatile.txt26
-rw-r--r--Documentation/devicetree/bindings/mtd/aspeed-smc.txt51
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel,dataflash.yaml55
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-dataflash.txt17
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-nand.txt18
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-quadspi.txt31
-rw-r--r--Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt183
-rw-r--r--Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml279
-rw-r--r--Documentation/devicetree/bindings/mtd/cadence-quadspi.txt61
-rw-r--r--Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml81
-rw-r--r--Documentation/devicetree/bindings/mtd/common.txt15
-rw-r--r--Documentation/devicetree/bindings/mtd/cortina,gemini-flash.txt24
-rw-r--r--Documentation/devicetree/bindings/mtd/davinci-nand.txt94
-rw-r--r--Documentation/devicetree/bindings/mtd/denali,nand.yaml153
-rw-r--r--Documentation/devicetree/bindings/mtd/denali-nand.txt40
-rw-r--r--Documentation/devicetree/bindings/mtd/elm.txt16
-rw-r--r--Documentation/devicetree/bindings/mtd/flctl-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl,vf610-nfc.yaml89
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-quadspi.txt65
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt12
-rw-r--r--Documentation/devicetree/bindings/mtd/fsmc-nand.txt6
-rw-r--r--Documentation/devicetree/bindings/mtd/gpio-control-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-nand.txt147
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-nor.txt98
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-onenand.txt48
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmi-nand.txt75
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmi-nand.yaml197
-rw-r--r--Documentation/devicetree/bindings/mtd/hisi504-nand.txt4
-rw-r--r--Documentation/devicetree/bindings/mtd/hisilicon,fmc-spi-nor.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt86
-rw-r--r--Documentation/devicetree/bindings/mtd/ingenic,nand.yaml139
-rw-r--r--Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml90
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt91
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml127
-rw-r--r--Documentation/devicetree/bindings/mtd/loongson,ls1b-nand-controller.yaml122
-rw-r--r--Documentation/devicetree/bindings/mtd/lpc32xx-mlc.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/lpc32xx-slc.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml227
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell-nand.txt126
-rw-r--r--Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml154
-rw-r--r--Documentation/devicetree/bindings/mtd/mediatek,nand-ecc-engine.yaml63
-rw-r--r--Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml47
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.txt96
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.yaml226
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd.yaml117
-rw-r--r--Documentation/devicetree/bindings/mtd/mtk-nand.txt176
-rw-r--r--Documentation/devicetree/bindings/mtd/mtk-quadspi.txt49
-rw-r--r--Documentation/devicetree/bindings/mtd/mxc-nand.txt19
-rw-r--r--Documentation/devicetree/bindings/mtd/mxc-nand.yaml44
-rw-r--r--Documentation/devicetree/bindings/mtd/mxic-nand.txt36
-rw-r--r--Documentation/devicetree/bindings/mtd/mxicy,nand-ecc-engine.yaml77
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-chip.yaml74
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-controller.yaml71
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-macronix.txt27
-rw-r--r--Documentation/devicetree/bindings/mtd/nand.txt75
-rw-r--r--Documentation/devicetree/bindings/mtd/nuvoton,ma35d1-nand.yaml95
-rw-r--r--Documentation/devicetree/bindings/mtd/nvidia-tegra20-nand.txt8
-rw-r--r--Documentation/devicetree/bindings/mtd/nxp,lpc1773-spifi.yaml74
-rw-r--r--Documentation/devicetree/bindings/mtd/nxp-spifi.txt58
-rw-r--r--Documentation/devicetree/bindings/mtd/orion-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/oxnas-nand.txt41
-rw-r--r--Documentation/devicetree/bindings/mtd/partition.txt157
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.yaml30
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/binman.yaml53
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml72
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm947xx-cfe-partitions.txt42
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm947xx-cfe-partitions.yaml50
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-cfe-nor-partitions.txt24
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-imagetag.txt45
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,trx.txt5
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml185
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml76
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/linux,ubi.yaml75
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml100
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/partition.yaml141
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/partitions.yaml42
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml62
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml48
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/seama.yaml44
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/tplink,safeloader-partitions.yaml57
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/u-boot.yaml56
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/ubi-volume.yaml40
-rw-r--r--Documentation/devicetree/bindings/mtd/qcom,nandc.yaml241
-rw-r--r--Documentation/devicetree/bindings/mtd/qcom_nandc.txt138
-rw-r--r--Documentation/devicetree/bindings/mtd/raw-nand-chip.yaml111
-rw-r--r--Documentation/devicetree/bindings/mtd/realtek,rtl9301-ecc.yaml41
-rw-r--r--Documentation/devicetree/bindings/mtd/renesas-nandc.yaml66
-rw-r--r--Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml165
-rw-r--r--Documentation/devicetree/bindings/mtd/samsung,s5pv210-onenand.yaml65
-rw-r--r--Documentation/devicetree/bindings/mtd/samsung-s3c2410.txt56
-rw-r--r--Documentation/devicetree/bindings/mtd/spi-nand.txt5
-rw-r--r--Documentation/devicetree/bindings/mtd/spi-nand.yaml28
-rw-r--r--Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml158
-rw-r--r--Documentation/devicetree/bindings/mtd/stm32-quadspi.txt43
-rw-r--r--Documentation/devicetree/bindings/mtd/sunxi-nand.txt48
-rw-r--r--Documentation/devicetree/bindings/mtd/tango-nand.txt38
-rw-r--r--Documentation/devicetree/bindings/mtd/technologic,nand.yaml45
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml71
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,davinci-nand.yaml124
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,elm.yaml72
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,gpmc-nand.yaml126
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,gpmc-onenand.yaml84
-rw-r--r--Documentation/devicetree/bindings/mtd/vf610-nfc.txt59
-rw-r--r--Documentation/devicetree/bindings/mux/adi,adg792a.txt2
-rw-r--r--Documentation/devicetree/bindings/mux/adi,adgs1408.txt2
-rw-r--r--Documentation/devicetree/bindings/mux/gpio-mux.txt69
-rw-r--r--Documentation/devicetree/bindings/mux/gpio-mux.yaml103
-rw-r--r--Documentation/devicetree/bindings/mux/mmio-mux.txt60
-rw-r--r--Documentation/devicetree/bindings/mux/mux-consumer.yaml67
-rw-r--r--Documentation/devicetree/bindings/mux/mux-controller.txt157
-rw-r--r--Documentation/devicetree/bindings/mux/mux-controller.yaml206
-rw-r--r--Documentation/devicetree/bindings/mux/reg-mux.yaml146
-rw-r--r--Documentation/devicetree/bindings/nds32/andestech-boards40
-rw-r--r--Documentation/devicetree/bindings/nds32/atl2c.txt28
-rw-r--r--Documentation/devicetree/bindings/nds32/cpus.txt38
-rw-r--r--Documentation/devicetree/bindings/net/actions,owl-emac.yaml96
-rw-r--r--Documentation/devicetree/bindings/net/adi,adin.yaml84
-rw-r--r--Documentation/devicetree/bindings/net/adi,adin1110.yaml81
-rw-r--r--Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml122
-rw-r--r--Documentation/devicetree/bindings/net/airoha,an7583-mdio.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml166
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml98
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en8811h.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml67
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml68
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt19
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt27
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt27
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml417
-rw-r--r--Documentation/devicetree/bindings/net/altera_tse.txt114
-rw-r--r--Documentation/devicetree/bindings/net/altr,gmii-to-sgmii-2.0.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml178
-rw-r--r--Documentation/devicetree/bindings/net/altr,tse.yaml168
-rw-r--r--Documentation/devicetree/bindings/net/amd-xgbe.txt5
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,g12a-mdio-mux.yaml80
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,gxl-mdio-mux.yaml64
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml182
-rw-r--r--Documentation/devicetree/bindings/net/apm,xgene-enet.yaml115
-rw-r--r--Documentation/devicetree/bindings/net/apm,xgene-mdio-rgmii.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-enet.txt91
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-mdio.txt37
-rw-r--r--Documentation/devicetree/bindings/net/arc_emac.txt46
-rw-r--r--Documentation/devicetree/bindings/net/asix,ax88178.yaml70
-rw-r--r--Documentation/devicetree/bindings/net/asix,ax88796c.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml53
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth.txt5
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/amlogic,w155s2-bt.yaml63
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml29
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/brcm,bcm4377-bluetooth.yaml82
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml163
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/marvell,88w8897.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,bluetooth.txt80
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7622-bluetooth.yaml51
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/nokia,h4p-bluetooth.txt (renamed from Documentation/devicetree/bindings/net/nokia-bluetooth.txt)0
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml109
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml259
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/realtek,bluetooth.yaml68
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/ti,bluetooth.yaml95
-rw-r--r--Documentation/devicetree/bindings/net/brcm,amac.txt30
-rw-r--r--Documentation/devicetree/bindings/net/brcm,amac.yaml88
-rw-r--r--Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml158
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt132
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.txt124
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml143
-rw-r--r--Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt23
-rw-r--r--Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml38
-rw-r--r--Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.txt62
-rw-r--r--Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml80
-rw-r--r--Documentation/devicetree/bindings/net/brcm,systemport.txt33
-rw-r--r--Documentation/devicetree/bindings/net/brcm,systemport.yaml86
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt43
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml89
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-bluetooth.txt35
-rw-r--r--Documentation/devicetree/bindings/net/btusb.txt7
-rw-r--r--Documentation/devicetree/bindings/net/calxeda-xgmac.txt18
-rw-r--r--Documentation/devicetree/bindings/net/calxeda-xgmac.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml80
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel-can.txt14
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,c_can.yaml116
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml175
-rw-r--r--Documentation/devicetree/bindings/net/can/c_can.txt65
-rw-r--r--Documentation/devicetree/bindings/net/can/can-controller.yaml27
-rw-r--r--Documentation/devicetree/bindings/net/can/can-transceiver.txt24
-rw-r--r--Documentation/devicetree/bindings/net/can/can-transceiver.yaml20
-rw-r--r--Documentation/devicetree/bindings/net/can/cc770.txt2
-rw-r--r--Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml225
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl-flexcan.txt35
-rw-r--r--Documentation/devicetree/bindings/net/can/m_can.txt75
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml70
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt24
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml81
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml51
-rw-r--r--Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml134
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt77
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_canfd.txt100
-rw-r--r--Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml139
-rw-r--r--Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml299
-rw-r--r--Documentation/devicetree/bindings/net/can/rockchip,rk3568v2-canfd.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/can/sja1000.txt58
-rw-r--r--Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml96
-rw-r--r--Documentation/devicetree/bindings/net/can/sun4i_can.txt36
-rw-r--r--Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml199
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx,can.yaml169
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx_can.txt60
-rw-r--r--Documentation/devicetree/bindings/net/cdns,macb.yaml239
-rw-r--r--Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt92
-rw-r--r--Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml138
-rw-r--r--Documentation/devicetree/bindings/net/cpsw-phy-sel.txt2
-rw-r--r--Documentation/devicetree/bindings/net/cpsw.txt14
-rw-r--r--Documentation/devicetree/bindings/net/davicom,dm9000.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/davicom,dm9051.yaml62
-rw-r--r--Documentation/devicetree/bindings/net/davicom-dm9000.txt27
-rw-r--r--Documentation/devicetree/bindings/net/davinci-mdio.txt36
-rw-r--r--Documentation/devicetree/bindings/net/davinci_emac.txt3
-rw-r--r--Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/dsa/b53.txt145
-rw-r--r--Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml276
-rw-r--r--Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml167
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa-port.yaml81
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.txt406
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml132
-rw-r--r--Documentation/devicetree/bindings/net/dsa/ksz.txt72
-rw-r--r--Documentation/devicetree/bindings/net/dsa/lan9303.txt2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml202
-rw-r--r--Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt143
-rw-r--r--Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml88
-rw-r--r--Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml337
-rw-r--r--Documentation/devicetree/bindings/net/dsa/marvell.txt106
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml847
-rw-r--r--Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml135
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml264
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml193
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml260
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mt7530.txt92
-rw-r--r--Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml209
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml161
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca8k.txt110
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca8k.yaml321
-rw-r--r--Documentation/devicetree/bindings/net/dsa/realtek-smi.txt153
-rw-r--r--Documentation/devicetree/bindings/net/dsa/realtek.yaml390
-rw-r--r--Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml152
-rw-r--r--Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt81
-rw-r--r--Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml194
-rw-r--r--Documentation/devicetree/bindings/net/dwmac-sun8i.txt200
-rw-r--r--Documentation/devicetree/bindings/net/emac_rockchip.txt52
-rw-r--r--Documentation/devicetree/bindings/net/engleder,tsnep.yaml119
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml355
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy-package.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy.yaml317
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-switch-port.yaml26
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-switch.yaml85
-rw-r--r--Documentation/devicetree/bindings/net/ethernet.txt67
-rw-r--r--Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml120
-rw-r--r--Documentation/devicetree/bindings/net/fixed-link.txt55
-rw-r--r--Documentation/devicetree/bindings/net/fsl,cpm-enet.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/fsl,cpm-mdio.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml38
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml60
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc.yaml88
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml277
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml167
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-mdio.yaml123
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-muram.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-port.yaml75
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman.yaml210
-rw-r--r--Documentation/devicetree/bindings/net/fsl,gianfar-mdio.yaml112
-rw-r--r--Documentation/devicetree/bindings/net/fsl,gianfar.yaml248
-rw-r--r--Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt85
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt634
-rw-r--r--Documentation/devicetree/bindings/net/fsl-tsec-phy.txt82
-rw-r--r--Documentation/devicetree/bindings/net/ftgmac100.txt34
-rw-r--r--Documentation/devicetree/bindings/net/gpmc-eth.txt97
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-femac.txt4
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt17
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt4
-rw-r--r--Documentation/devicetree/bindings/net/icplus-ip101ag.txt19
-rw-r--r--Documentation/devicetree/bindings/net/idt,3243x-emac.yaml73
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt27
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/atmel,at86rf233.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/ca8210.txt2
-rw-r--r--Documentation/devicetree/bindings/net/ingenic,mac.yaml77
-rw-r--r--Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml137
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml111
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml123
-rw-r--r--Documentation/devicetree/bindings/net/keystone-netcp.txt54
-rw-r--r--Documentation/devicetree/bindings/net/lantiq,etop-xway.yaml68
-rw-r--r--Documentation/devicetree/bindings/net/lantiq,pef2256.yaml213
-rw-r--r--Documentation/devicetree/bindings/net/lantiq,xrx200-net.txt21
-rw-r--r--Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/litex,liteeth.yaml97
-rw-r--r--Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml114
-rw-r--r--Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml113
-rw-r--r--Documentation/devicetree/bindings/net/lpc-eth.txt23
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt49
-rw-r--r--Documentation/devicetree/bindings/net/marvell,aquantia.yaml122
-rw-r--r--Documentation/devicetree/bindings/net/marvell,armada-370-neta.yaml79
-rw-r--r--Documentation/devicetree/bindings/net/marvell,armada-380-neta-bm.yaml60
-rw-r--r--Documentation/devicetree/bindings/net/marvell,dfx-server.yaml62
-rw-r--r--Documentation/devicetree/bindings/net/marvell,mvusb.yaml69
-rw-r--r--Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml60
-rw-r--r--Documentation/devicetree/bindings/net/marvell,pp2.yaml305
-rw-r--r--Documentation/devicetree/bindings/net/marvell,prestera.txt47
-rw-r--r--Documentation/devicetree/bindings/net/marvell,prestera.yaml91
-rw-r--r--Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt49
-rw-r--r--Documentation/devicetree/bindings/net/marvell-neta-bm.txt47
-rw-r--r--Documentation/devicetree/bindings/net/marvell-orion-mdio.txt54
-rw-r--r--Documentation/devicetree/bindings/net/marvell-pp2.txt139
-rw-r--r--Documentation/devicetree/bindings/net/marvell-pxa168.txt4
-rw-r--r--Documentation/devicetree/bindings/net/maxim,ds26522.txt13
-rw-r--r--Documentation/devicetree/bindings/net/maxim,ds26522.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml48
-rw-r--r--Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/mdio-gpio.txt26
-rw-r--r--Documentation/devicetree/bindings/net/mdio-gpio.yaml57
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-gpio.txt119
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-gpio.yaml103
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt75
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-mmioreg.yaml78
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-multiplexer.yaml82
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux.txt129
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux.yaml41
-rw-r--r--Documentation/devicetree/bindings/net/mdio.txt38
-rw-r--r--Documentation/devicetree/bindings/net/mdio.yaml116
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt24
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,net.yaml627
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,star-emac.yaml107
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-bluetooth.txt35
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-dwmac.yaml190
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-net.txt87
-rw-r--r--Documentation/devicetree/bindings/net/meson-dwmac.txt71
-rw-r--r--Documentation/devicetree/bindings/net/micrel,ks8851.yaml98
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ks8851.txt18
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ks8995.txt20
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt131
-rw-r--r--Documentation/devicetree/bindings/net/micrel.txt12
-rw-r--r--Documentation/devicetree/bindings/net/microchip,enc28j60.txt3
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan78xx.txt5
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan8650.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan95xx.yaml67
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml171
-rw-r--r--Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml269
-rw-r--r--Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml151
-rw-r--r--Documentation/devicetree/bindings/net/mscc,miim.yaml71
-rw-r--r--Documentation/devicetree/bindings/net/mscc,vsc7514-switch.yaml223
-rw-r--r--Documentation/devicetree/bindings/net/mscc-miim.txt26
-rw-r--r--Documentation/devicetree/bindings/net/mscc-ocelot.txt79
-rw-r--r--Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt7
-rw-r--r--Documentation/devicetree/bindings/net/network-class.yaml46
-rw-r--r--Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml170
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt84
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml63
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml65
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp-nci.txt33
-rw-r--r--Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt29
-rw-r--r--Documentation/devicetree/bindings/net/nfc/pn544.txt33
-rw-r--r--Documentation/devicetree/bindings/net/nfc/s3fwrn5.txt25
-rw-r--r--Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml99
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml105
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml64
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt38
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt36
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st21nfca.txt37
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st95hf.txt45
-rw-r--r--Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml106
-rw-r--r--Documentation/devicetree/bindings/net/nfc/trf7970a.txt43
-rw-r--r--Documentation/devicetree/bindings/net/nixge.txt72
-rw-r--r--Documentation/devicetree/bindings/net/nvidia,tegra234-mgbe.yaml162
-rw-r--r--Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml111
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc-eth.yaml48
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.txt20
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml85
-rw-r--r--Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml104
-rw-r--r--Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml105
-rw-r--r--Documentation/devicetree/bindings/net/nxp,tja11xx.yaml127
-rw-r--r--Documentation/devicetree/bindings/net/oxnas-dwmac.txt38
-rw-r--r--Documentation/devicetree/bindings/net/pcs/fsl,lynx-pcs.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml102
-rw-r--r--Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml252
-rw-r--r--Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml136
-rw-r--r--Documentation/devicetree/bindings/net/phy.txt74
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml192
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/podl-pse-regulator.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml128
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml144
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml126
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar71xx.yaml194
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar803x.yaml177
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca7000.txt85
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca7000.yaml109
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca808x.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml136
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml279
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml119
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml61
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml533
-rw-r--r--Documentation/devicetree/bindings/net/qcom,qca807x.yaml184
-rw-r--r--Documentation/devicetree/bindings/net/qcom-emac.txt2
-rw-r--r--Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt55
-rw-r--r--Documentation/devicetree/bindings/net/ralink,rt2880-net.txt59
-rw-r--r--Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt30
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml83
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl9301-mdio.yaml86
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl9301-switch.yaml175
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ether.yaml131
-rw-r--r--Documentation/devicetree/bindings/net/renesas,etheravb.yaml375
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ethertsn.yaml130
-rw-r--r--Documentation/devicetree/bindings/net/renesas,r8a779f0-ether-switch.yaml262
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ravb.txt130
-rw-r--r--Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml75
-rw-r--r--Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml303
-rw-r--r--Documentation/devicetree/bindings/net/rfkill-gpio.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/rockchip,emac.yaml115
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.txt76
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.yaml183
-rw-r--r--Documentation/devicetree/bindings/net/samsung-sxgbe.txt8
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt85
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.yaml143
-rw-r--r--Documentation/devicetree/bindings/net/sh_eth.txt69
-rw-r--r--Documentation/devicetree/bindings/net/smsc,lan9115.yaml108
-rw-r--r--Documentation/devicetree/bindings/net/smsc,lan91c111.yaml61
-rw-r--r--Documentation/devicetree/bindings/net/smsc-lan87xx.txt4
-rw-r--r--Documentation/devicetree/bindings/net/smsc-lan91c111.txt17
-rw-r--r--Documentation/devicetree/bindings/net/smsc911x.txt43
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwc-qos-ethernet.txt7
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml732
-rw-r--r--Documentation/devicetree/bindings/net/socfpga-dwmac.txt51
-rw-r--r--Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml73
-rw-r--r--Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt64
-rw-r--r--Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml142
-rw-r--r--Documentation/devicetree/bindings/net/socionext-netsec.txt53
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml114
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml132
-rw-r--r--Documentation/devicetree/bindings/net/spacemit,k1-emac.yaml81
-rw-r--r--Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml190
-rw-r--r--Documentation/devicetree/bindings/net/sti-dwmac.txt5
-rw-r--r--Documentation/devicetree/bindings/net/stm32-dwmac.txt45
-rw-r--r--Documentation/devicetree/bindings/net/stm32-dwmac.yaml219
-rw-r--r--Documentation/devicetree/bindings/net/stmmac.txt179
-rw-r--r--Documentation/devicetree/bindings/net/sunplus,sp7021-emac.yaml143
-rw-r--r--Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml118
-rw-r--r--Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml112
-rw-r--r--Documentation/devicetree/bindings/net/ti,cc1352p7.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml242
-rw-r--r--Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml80
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83822.yaml147
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.txt48
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.yaml141
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83869.yaml100
-rw-r--r--Documentation/devicetree/bindings/net/ti,icss-iep.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml233
-rw-r--r--Documentation/devicetree/bindings/net/ti,icssm-prueth.yaml233
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml319
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml153
-rw-r--r--Documentation/devicetree/bindings/net/ti,pruss-ecap.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/ti-bluetooth.txt61
-rw-r--r--Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml71
-rw-r--r--Documentation/devicetree/bindings/net/via,vt8500-rhine.yaml41
-rw-r--r--Documentation/devicetree/bindings/net/via-rhine.txt17
-rw-r--r--Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml165
-rw-r--r--Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt38
-rw-r--r--Documentation/devicetree/bindings/net/wireless/esp,esp8089.txt30
-rw-r--r--Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml43
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ieee80211.txt24
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ieee80211.yaml44
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt68
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt32
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml324
-rw-r--r--Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml99
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qca,ath9k.txt48
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml106
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt158
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml394
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml146
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml469
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml210
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml104
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml315
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ralink,rt2880.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/wireless/realtek,rtl8188e.yaml50
-rw-r--r--Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml129
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt29
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt57
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt45
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wlcore.yaml142
-rw-r--r--Documentation/devicetree/bindings/net/wireless/wireless-controller.yaml23
-rw-r--r--Documentation/devicetree/bindings/net/wiznet,w5x00.txt50
-rw-r--r--Documentation/devicetree/bindings/net/xilinx_axienet.txt55
-rw-r--r--Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt35
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml200
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,emaclite.yaml68
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml60
-rw-r--r--Documentation/devicetree/bindings/nios2/nios2.txt2
-rw-r--r--Documentation/devicetree/bindings/npu/rockchip,rk3588-rknn-core.yaml112
-rw-r--r--Documentation/devicetree/bindings/numa.txt275
-rw-r--r--Documentation/devicetree/bindings/nvme/apple,nvme-ans.yaml119
-rw-r--r--Documentation/devicetree/bindings/nvmem/airoha,an8855-efuse.yaml123
-rw-r--r--Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml57
-rw-r--r--Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt28
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml61
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml58
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic-efuse.txt39
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic-meson-mx-efuse.txt22
-rw-r--r--Documentation/devicetree/bindings/nvmem/apple,efuses.yaml51
-rw-r--r--Documentation/devicetree/bindings/nvmem/apple,spmi-nvmem.yaml54
-rw-r--r--Documentation/devicetree/bindings/nvmem/brcm,nvram.yaml73
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,layerscape-sfp.yaml63
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml56
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,t1023-sfp.yaml37
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,vf610-ocotp.yaml47
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-iim.txt22
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-iim.yaml59
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-ocotp.txt43
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml89
-rw-r--r--Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml45
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml55
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/fixed-layout.yaml62
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml69
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml32
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml147
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/u-boot,env.yaml132
-rw-r--r--Documentation/devicetree/bindings/nvmem/lpc1857-eeprom.txt28
-rw-r--r--Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml32
-rw-r--r--Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml107
-rw-r--r--Documentation/devicetree/bindings/nvmem/microchip,lan9662-otpc.yaml45
-rw-r--r--Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml51
-rw-r--r--Documentation/devicetree/bindings/nvmem/mtk-efuse.txt39
-rw-r--r--Documentation/devicetree/bindings/nvmem/mxs-ocotp.txt24
-rw-r--r--Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml49
-rw-r--r--Documentation/devicetree/bindings/nvmem/nintendo-otp.yaml44
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem-consumer.yaml45
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem-deprecated-cells.yaml28
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem-provider.yaml18
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem.txt81
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem.yaml92
-rw-r--r--Documentation/devicetree/bindings/nvmem/nxp,lpc1857-eeprom.yaml61
-rw-r--r--Documentation/devicetree/bindings/nvmem/nxp,s32g-ocotp-nvmem.yaml45
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml136
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,sec-qfprom.yaml56
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml58
-rw-r--r--Documentation/devicetree/bindings/nvmem/qfprom.txt35
-rw-r--r--Documentation/devicetree/bindings/nvmem/renesas,rcar-efuse.yaml68
-rw-r--r--Documentation/devicetree/bindings/nvmem/renesas,rcar-otp.yaml43
-rw-r--r--Documentation/devicetree/bindings/nvmem/rmem.yaml54
-rw-r--r--Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml148
-rw-r--r--Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt54
-rw-r--r--Documentation/devicetree/bindings/nvmem/rockchip-efuse.yaml73
-rw-r--r--Documentation/devicetree/bindings/nvmem/sc27xx-efuse.txt52
-rw-r--r--Documentation/devicetree/bindings/nvmem/snvs-lpgpr.txt21
-rw-r--r--Documentation/devicetree/bindings/nvmem/snvs-lpgpr.yaml41
-rw-r--r--Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml84
-rw-r--r--Documentation/devicetree/bindings/nvmem/sprd,sc2731-efuse.yaml39
-rw-r--r--Documentation/devicetree/bindings/nvmem/sprd,ums312-efuse.yaml61
-rw-r--r--Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml73
-rw-r--r--Documentation/devicetree/bindings/nvmem/sunplus,sp7021-ocotp.yaml79
-rw-r--r--Documentation/devicetree/bindings/nvmem/uniphier-efuse.txt49
-rw-r--r--Documentation/devicetree/bindings/nvmem/vf610-ocotp.txt19
-rw-r--r--Documentation/devicetree/bindings/nvmem/xlnx,zynqmp-nvmem.yaml42
-rw-r--r--Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.txt40
-rw-r--r--Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.yaml54
-rw-r--r--Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml135
-rw-r--r--Documentation/devicetree/bindings/opp/kryo-cpufreq.txt680
-rw-r--r--Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml110
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v1.yaml67
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-base.yaml247
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml309
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml96
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml63
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2.yaml475
-rw-r--r--Documentation/devicetree/bindings/opp/opp.txt545
-rw-r--r--Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml101
-rw-r--r--Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt63
-rw-r--r--Documentation/devicetree/bindings/pci/83xx-512x-pci.txt40
-rw-r--r--Documentation/devicetree/bindings/pci/aardvark-pci.txt55
-rw-r--r--Documentation/devicetree/bindings/pci/altera-pcie-msi.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/altera-pcie.txt48
-rw-r--r--Documentation/devicetree/bindings/pci/altr,pcie-root-port.yaml124
-rw-r--r--Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml71
-rw-r--r--Documentation/devicetree/bindings/pci/amd,versal2-mdb-host.yaml143
-rw-r--r--Documentation/devicetree/bindings/pci/amlogic,axg-pcie.yaml134
-rw-r--r--Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml84
-rw-r--r--Documentation/devicetree/bindings/pci/apple,pcie.yaml194
-rw-r--r--Documentation/devicetree/bindings/pci/arm,juno-r1-pcie.txt10
-rw-r--r--Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml118
-rw-r--r--Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml168
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt133
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml174
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml237
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.yaml50
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt66
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.yaml72
-rw-r--r--Documentation/devicetree/bindings/pci/cdns-pcie-ep.yaml24
-rw-r--r--Documentation/devicetree/bindings/pci/cdns-pcie-host.yaml37
-rw-r--r--Documentation/devicetree/bindings/pci/cdns-pcie.yaml25
-rw-r--r--Documentation/devicetree/bindings/pci/designware-pcie-ecam.txt42
-rw-r--r--Documentation/devicetree/bindings/pci/designware-pcie.txt70
-rw-r--r--Documentation/devicetree/bindings/pci/faraday,ftpci100.txt135
-rw-r--r--Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml174
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-common.yaml255
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-ep.yaml195
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt79
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml256
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,layerscape-pcie-ep.yaml99
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,layerscape-pcie.yaml182
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,mpc8xxx-pci.yaml113
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,pci.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml181
-rw-r--r--Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt2
-rw-r--r--Documentation/devicetree/bindings/pci/hisilicon-pcie.txt85
-rw-r--r--Documentation/devicetree/bindings/pci/host-generic-pci.txt101
-rw-r--r--Documentation/devicetree/bindings/pci/host-generic-pci.yaml181
-rw-r--r--Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml100
-rw-r--r--Documentation/devicetree/bindings/pci/intel,keembay-pcie-ep.yaml69
-rw-r--r--Documentation/devicetree/bindings/pci/intel,keembay-pcie.yaml97
-rw-r--r--Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml117
-rw-r--r--Documentation/devicetree/bindings/pci/kirin-pcie.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/layerscape-pci.txt60
-rw-r--r--Documentation/devicetree/bindings/pci/loongson.yaml63
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml103
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,armada8k-pcie.yaml100
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,kirkwood-pcie.yaml280
-rw-r--r--Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml173
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek,mt7621-pcie.yaml177
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie-cfg.yaml39
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml353
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie.txt216
-rw-r--r--Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml97
-rw-r--r--Documentation/devicetree/bindings/pci/mobiveil-pcie.txt70
-rw-r--r--Documentation/devicetree/bindings/pci/mvebu-pci.txt294
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie-ep.yaml319
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.yaml380
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt20
-rw-r--r--Documentation/devicetree/bindings/pci/pci-armada8k.txt40
-rw-r--r--Documentation/devicetree/bindings/pci/pci-ep-bus.yaml58
-rw-r--r--Documentation/devicetree/bindings/pci/pci-ep.yaml127
-rw-r--r--Documentation/devicetree/bindings/pci/pci-iommu.txt171
-rw-r--r--Documentation/devicetree/bindings/pci/pci-keystone.txt63
-rw-r--r--Documentation/devicetree/bindings/pci/pci-msi.txt220
-rw-r--r--Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt83
-rw-r--r--Documentation/devicetree/bindings/pci/pci-thunder-ecam.txt30
-rw-r--r--Documentation/devicetree/bindings/pci/pci-thunder-pem.txt43
-rw-r--r--Documentation/devicetree/bindings/pci/pci.txt26
-rw-r--r--Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml81
-rw-r--r--Documentation/devicetree/bindings/pci/plda,xpressrich3-axi.txt12
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml135
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml332
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml122
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml180
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml190
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml168
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc8280xp.yaml177
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml160
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8250.yaml176
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8350.yaml165
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml180
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml177
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml168
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie.txt290
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie.yaml782
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml118
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml130
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci-ep.yaml93
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci-host.yaml139
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci.txt66
-rw-r--r--Documentation/devicetree/bindings/pci/renesas,pci-rcar-gen2.yaml190
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip,rk3399-pcie-common.yaml69
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip,rk3399-pcie-ep.yaml70
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip,rk3399-pcie.yaml133
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie-common.yaml132
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie-ep.yaml95
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml178
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-pcie-ep.txt62
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-pcie-host.txt135
-rw-r--r--Documentation/devicetree/bindings/pci/samsung,exynos-pcie.yaml119
-rw-r--r--Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt58
-rw-r--r--Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml120
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml267
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml203
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml245
-rw-r--r--Documentation/devicetree/bindings/pci/socionext,uniphier-pcie-ep.yaml134
-rw-r--r--Documentation/devicetree/bindings/pci/socionext,uniphier-pcie.yaml121
-rw-r--r--Documentation/devicetree/bindings/pci/sophgo,sg2042-pcie-host.yaml64
-rw-r--r--Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml122
-rw-r--r--Documentation/devicetree/bindings/pci/spear13xx-pcie.txt14
-rw-r--r--Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml45
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-common.yaml33
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-ep.yaml73
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-host.yaml112
-rw-r--r--Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml127
-rw-r--r--Documentation/devicetree/bindings/pci/tango-pcie.txt29
-rw-r--r--Documentation/devicetree/bindings/pci/ti,am65-pci-ep.yaml76
-rw-r--r--Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml148
-rw-r--r--Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml143
-rw-r--r--Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml206
-rw-r--r--Documentation/devicetree/bindings/pci/ti-pci.txt15
-rw-r--r--Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml111
-rw-r--r--Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml100
-rw-r--r--Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt76
-rw-r--r--Documentation/devicetree/bindings/pci/versatile.txt59
-rw-r--r--Documentation/devicetree/bindings/pci/versatile.yaml94
-rw-r--r--Documentation/devicetree/bindings/pci/xgene-pci-msi.txt68
-rw-r--r--Documentation/devicetree/bindings/pci/xgene-pci.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt70
-rw-r--r--Documentation/devicetree/bindings/pci/xilinx-pcie.txt88
-rw-r--r--Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml137
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,axi-pcie-host.yaml88
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml156
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml146
-rw-r--r--Documentation/devicetree/bindings/peci/nuvoton,npcm-peci.yaml56
-rw-r--r--Documentation/devicetree/bindings/peci/peci-aspeed.yaml72
-rw-r--r--Documentation/devicetree/bindings/peci/peci-controller.yaml33
-rw-r--r--Documentation/devicetree/bindings/perf/amlogic,g12-ddr-pmu.yaml54
-rw-r--r--Documentation/devicetree/bindings/perf/apm,xgene-pmu.yaml142
-rw-r--r--Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt112
-rw-r--r--Documentation/devicetree/bindings/perf/arm,ccn.yaml40
-rw-r--r--Documentation/devicetree/bindings/perf/arm,cmn.yaml70
-rw-r--r--Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml39
-rw-r--r--Documentation/devicetree/bindings/perf/arm,dsu-pmu.yaml44
-rw-r--r--Documentation/devicetree/bindings/perf/arm,ni.yaml30
-rw-r--r--Documentation/devicetree/bindings/perf/arm,smmu-v3-pmcg.yaml70
-rw-r--r--Documentation/devicetree/bindings/perf/arm-ccn.txt22
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml62
-rw-r--r--Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml37
-rw-r--r--Documentation/devicetree/bindings/perf/marvell-cn10k-tad.yaml63
-rw-r--r--Documentation/devicetree/bindings/perf/riscv,pmu.yaml160
-rw-r--r--Documentation/devicetree/bindings/perf/spe-pmu.yaml40
-rw-r--r--Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml46
-rw-r--r--Documentation/devicetree/bindings/phy/airoha,en7581-pcie-phy.yaml69
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun4i-a10-usb-phy.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb-phy.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun50i-h6-usb3-phy.yaml49
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun5i-a13-usb-phy.yaml95
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml84
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-usb-phy.yaml121
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun8i-a23-usb-phy.yaml104
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun8i-a83t-usb-phy.yaml124
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml167
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun8i-r40-usb-phy.yaml121
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun8i-v3s-usb-phy.yaml88
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun9i-a80-usb-phy.yaml132
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,suniv-f1c100s-usb-phy.yaml83
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,axg-mipi-dphy.yaml70
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-mipi-dphy-analog.yaml23
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml81
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml23
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson-axg-pcie.yaml52
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson-gxl-usb2-phy.yaml56
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson8-hdmi-tx-phy.yaml65
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,meson8b-usb2-phy.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml169
-rw-r--r--Documentation/devicetree/bindings/phy/apm-xgene-phy.txt76
-rw-r--r--Documentation/devicetree/bindings/phy/bcm-ns-usb2-phy.txt21
-rw-r--r--Documentation/devicetree/bindings/phy/bcm-ns-usb2-phy.yaml59
-rw-r--r--Documentation/devicetree/bindings/phy/bcm-ns-usb3-phy.txt34
-rw-r--r--Documentation/devicetree/bindings/phy/bcm-ns-usb3-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-sata-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-usb-phy.txt16
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,bcm63xx-usbh-phy.yaml78
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml199
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,cygnus-pcie-phy.txt47
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,cygnus-pcie-phy.yaml77
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.txt15
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.yaml36
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,mdio-mux-bus-pci.txt27
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt30
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml41
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml144
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt41
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml46
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml65
-rw-r--r--Documentation/devicetree/bindings/phy/brcm-sata-phy.txt57
-rw-r--r--Documentation/devicetree/bindings/phy/calxeda-combophy.txt17
-rw-r--r--Documentation/devicetree/bindings/phy/calxeda-combophy.yaml50
-rw-r--r--Documentation/devicetree/bindings/phy/cdns,dphy-rx.yaml42
-rw-r--r--Documentation/devicetree/bindings/phy/cdns,dphy.yaml57
-rw-r--r--Documentation/devicetree/bindings/phy/cdns,salvo-phy.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/dm816x-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml102
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mp-hdmi-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml158
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8qm-hsio.yaml164
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8qm-lvds-phy.yaml61
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml40
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,mxs-usbphy.yaml146
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi3660-usb3.yaml51
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi3798cv200-combphy.yaml56
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml35
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml48
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml93
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,phy-hi3670-pcie.yaml82
-rw-r--r--Documentation/devicetree/bindings/phy/hix5hd2-phy.txt22
-rw-r--r--Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/intel,combo-phy.yaml109
-rw-r--r--Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml44
-rw-r--r--Documentation/devicetree/bindings/phy/intel,lgm-emmc-phy.yaml75
-rw-r--r--Documentation/devicetree/bindings/phy/intel,lgm-usb-phy.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/keystone-usb-phy.txt19
-rw-r--r--Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml71
-rw-r--r--Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml95
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-3700-utmi-phy.yaml57
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml40
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml83
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-cp110-utmi-phy.yaml115
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml76
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml42
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml167
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mmp3-hsic-phy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml42
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml47
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,pxa1928-usb-phy.yaml47
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml99
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml96
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt7621-pci-phy.yaml41
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt7628-usbphy.yaml74
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt7988-xfi-tphy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml79
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,pcie-phy.yaml75
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,tphy.yaml333
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml69
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml215
-rw-r--r--Documentation/devicetree/bindings/phy/meson-gxl-usb2-phy.txt21
-rw-r--r--Documentation/devicetree/bindings/phy/meson-gxl-usb3-phy.txt31
-rw-r--r--Documentation/devicetree/bindings/phy/meson8b-usb2-phy.txt28
-rw-r--r--Documentation/devicetree/bindings/phy/microchip,lan966x-serdes.yaml59
-rw-r--r--Documentation/devicetree/bindings/phy/microchip,sparx5-serdes.yaml115
-rw-r--r--Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml93
-rw-r--r--Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml81
-rw-r--r--Documentation/devicetree/bindings/phy/mscc,vsc7514-serdes.yaml56
-rw-r--r--Documentation/devicetree/bindings/phy/mxs-usb-phy.txt31
-rw-r--r--Documentation/devicetree/bindings/phy/nuvoton,ma35d1-usb2-phy.yaml45
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt733
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.yaml654
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra186-xusb-padctl.yaml544
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra194-xusb-padctl.yaml632
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.txt74
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml373
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra210-xusb-padctl.yaml786
-rw-r--r--Documentation/devicetree/bindings/phy/nxp,ptn3222.yaml55
-rw-r--r--Documentation/devicetree/bindings/phy/phy-ath79-usb.txt18
-rw-r--r--Documentation/devicetree/bindings/phy/phy-bindings.txt2
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-dp.txt30
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml166
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml216
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hi3798cv200-combphy.txt59
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt16
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt71
-rw-r--r--Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt26
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt29
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt150
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt109
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mvebu.txt42
-rw-r--r--Documentation/devicetree/bindings/phy/phy-ocelot-serdes.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-inno-hdmi.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt78
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml153
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt84
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml152
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stih407-usb.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stih41x-usb.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.txt73
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml286
-rw-r--r--Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt29
-rw-r--r--Documentation/devicetree/bindings/phy/pxa1928-usb-phy.txt18
-rw-r--r--Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml49
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml87
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,hdmi-phy-other.yaml125
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,hdmi-phy-qmp.yaml95
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml109
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq5332-usb-hsphy.yaml61
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq806x-usb-phy-hs.yaml56
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq806x-usb-phy-ss.yaml74
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml102
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml79
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-pcie-phy.yaml189
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-pcie-phy.yaml97
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-usb3-phy.yaml186
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,pcie2-phy.yaml86
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml205
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sa8775p-dwmac-sgmii-phy.yaml60
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sata-phy.yaml55
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml303
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml183
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml198
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml203
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml88
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml84
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-8x16-phy.txt76
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml90
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.txt84
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hsic-phy.txt65
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hsic-phy.yaml67
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml184
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-ss.yaml83
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt37
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt23
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt137
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt67
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-usb-ipq4019-phy.yaml52
-rw-r--r--Documentation/devicetree/bindings/phy/ralink-usb-phy.txt23
-rw-r--r--Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt62
-rw-r--r--Documentation/devicetree/bindings/phy/rcar-gen3-phy-pcie.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt53
-rw-r--r--Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb3.txt50
-rw-r--r--Documentation/devicetree/bindings/phy/realtek,usb2phy.yaml175
-rw-r--r--Documentation/devicetree/bindings/phy/realtek,usb3phy.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml54
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,rcar-gen2-usb-phy.yaml123
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,rcar-gen3-pcie-phy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml161
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,usb3-phy.yaml78
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml248
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,px30-dsi-dphy.yaml72
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3228-hdmi-phy.yaml97
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3288-dp-phy.yaml41
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-emmc-phy.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml45
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml120
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml87
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt26
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt34
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-inno-csi-dphy.yaml141
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-mipi-dphy-rx0.yaml73
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt52
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-usb-phy.yaml78
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml40
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml43
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos-pcie-phy.yaml51
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml133
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml114
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml102
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml253
-rw-r--r--Documentation/devicetree/bindings/phy/samsung-phy.txt209
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml132
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml105
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb2-phy.yaml83
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml163
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml146
-rw-r--r--Documentation/devicetree/bindings/phy/sophgo,cv1800b-usb2-phy.yaml54
-rw-r--r--Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/st,stih407-usb2-phy.yaml63
-rw-r--r--Documentation/devicetree/bindings/phy/st,stm32mp25-combophy.yaml119
-rw-r--r--Documentation/devicetree/bindings/phy/st-spear-miphy.txt15
-rw-r--r--Documentation/devicetree/bindings/phy/starfive,jh7110-dphy-rx.yaml71
-rw-r--r--Documentation/devicetree/bindings/phy/starfive,jh7110-dphy-tx.yaml68
-rw-r--r--Documentation/devicetree/bindings/phy/starfive,jh7110-pcie-phy.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/starfive,jh7110-usb-phy.yaml50
-rw-r--r--Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt66
-rw-r--r--Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt37
-rw-r--r--Documentation/devicetree/bindings/phy/sunplus,sp7021-usb2-phy.yaml73
-rw-r--r--Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml98
-rw-r--r--Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml169
-rw-r--r--Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml259
-rw-r--r--Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml70
-rw-r--r--Documentation/devicetree/bindings/phy/ti-phy.txt38
-rw-r--r--Documentation/devicetree/bindings/phy/transmit-amplitude.yaml103
-rw-r--r--Documentation/devicetree/bindings/phy/uniphier-pcie-phy.txt31
-rw-r--r--Documentation/devicetree/bindings/phy/uniphier-usb2-phy.txt45
-rw-r--r--Documentation/devicetree/bindings/phy/uniphier-usb3-hsphy.txt69
-rw-r--r--Documentation/devicetree/bindings/phy/uniphier-usb3-ssphy.txt57
-rw-r--r--Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml105
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s500-pinctrl.yaml242
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s700-pinctrl.txt170
-rw-r--r--Documentation/devicetree/bindings/pinctrl/airoha,en7581-pinctrl.yaml403
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml315
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sun55i-a523-pinctrl.yaml175
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt148
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml73
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-common.yaml64
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-g12a-aobus.yaml71
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-g12a-periphs.yaml75
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson8-pinctrl-aobus.yaml80
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson8-pinctrl-cbus.yaml82
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml139
-rw-r--r--Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml135
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml223
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml253
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml535
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt151
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt14
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91rm9200-pinctrl.yaml184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/awinic,aw9523-pinctrl.yaml139
-rw-r--r--Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt126
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt461
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.yaml259
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm21664-pinctrl.yaml152
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2712c0-pinctrl.yaml137
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt92
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.yaml120
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm4708-pinmux.txt57
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm4908-pinctrl.yaml73
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm6318-pinctrl.yaml151
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm63268-pinctrl.yaml172
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm6328-pinctrl.yaml135
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm6358-pinctrl.yaml97
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm6362-pinctrl.yaml214
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm6368-pinctrl.yaml225
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.yaml111
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml90
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,ns2-pinmux.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,nsp-pinmux.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml180
-rw-r--r--Documentation/devicetree/bindings/pinctrl/canaan,k230-pinctrl.yaml127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml186
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera-pinctrl.txt99
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cypress,cy8c95x0.yaml158
-rw-r--r--Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml156
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt33
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.yaml184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt39
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sll-pinctrl.txt40
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt36
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.txt37
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7d-pinctrl.txt87
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7d-pinctrl.yaml113
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7ulp-iomuxc1.yaml99
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7ulp-pinctrl.txt61
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx8m-pinctrl.yaml90
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.txt36
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx8ulp-pinctrl.yaml82
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx9-pinctrl.yaml87
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imxrt1050.yaml79
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imxrt1170.yaml77
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml74
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,vf610-iomuxc.yaml83
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt41
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.txt72
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml191
-rw-r--r--Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml76
-rw-r--r--Documentation/devicetree/bindings/pinctrl/intel,pinctrl-keembay.yaml136
-rw-r--r--Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt35
-rw-r--r--Documentation/devicetree/bindings/pinctrl/loongson,ls2k-pinctrl.yaml123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,ac5-pinctrl.yaml73
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt44
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml181
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6779-pinctrl.yaml248
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6795-pinctrl.yaml228
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6893-pinctrl.yaml193
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7620-pinctrl.yaml298
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7621-pinctrl.yaml261
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml401
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt76x8-pinctrl.yaml450
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7981-pinctrl.yaml480
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7986-pinctrl.yaml462
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7988-pinctrl.yaml575
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml239
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8186-pinctrl.yaml275
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8188-pinctrl.yaml232
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml213
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8192-pinctrl.yaml184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8195-pinctrl.yaml286
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8196-pinctrl.yaml236
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8365-pinctrl.yaml230
-rw-r--r--Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt82
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,mcp23s08.yaml161
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,pic32-pinctrl.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml168
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mscc,ocelot-pinctrl.txt39
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mscc,ocelot-pinctrl.yaml127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,ma35d1-pinctrl.yaml178
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,npcm7xx-pinctrl.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml215
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,wpcm450-pinctrl.yaml162
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra-pinmux-common.yaml178
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.txt131
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.yaml153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-dpaux-padctl.txt59
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.yaml174
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra186-pinmux.yaml285
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra194-pinmux.yaml282
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt143
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.yaml110
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.txt166
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.yaml140
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux-aon.yaml81
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux-common.yaml52
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux.yaml142
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt144
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.yaml174
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt71
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml79
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/oxnas,pinctrl.txt56
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml156
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt172
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt109
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt234
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-max77620.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mcp23s08.txt146
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt359
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-rk805.txt4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt255
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-single.yaml219
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-sx150x.txt72
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-zx.txt84
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl.yaml45
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml133
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt95
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.yaml110
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt188
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.yaml129
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,glymur-tlmm.yaml133
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq4019-pinctrl.txt84
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq4019-pinctrl.yaml103
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5018-tlmm.yaml123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5332-tlmm.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5424-tlmm.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt101
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.yaml108
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.txt181
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.yaml121
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq9574-tlmm.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,lpass-lpi-common.yaml75
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml119
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.txt161
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,mdm9615-pinctrl.yaml106
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml133
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8226-pinctrl.yaml101
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8660-pinctrl.txt96
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8660-pinctrl.yaml110
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8909-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt195
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.yaml152
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8917-pinctrl.yaml160
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8953-pinctrl.yaml136
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt190
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.yaml150
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt121
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.yaml165
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8976-pinctrl.yaml122
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.txt186
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.yaml148
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt208
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.yaml168
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.txt193
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.yaml157
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt252
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml617
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt181
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml200
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcm2290-tlmm.yaml122
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.txt199
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.yaml162
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs615-tlmm.yaml124
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs8300-tlmm.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qdu1000-tlmm.yaml119
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml131
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sar2130p-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.yaml144
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc7280-lpass-lpi-pinctrl.yaml113
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc7280-pinctrl.yaml130
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc8180x-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc8280xp-lpass-lpi-pinctrl.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc8280xp-tlmm.yaml137
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm630-pinctrl.yaml173
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm660-lpass-lpi-pinctrl.yaml109
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm660-pinctrl.txt191
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm670-tlmm.yaml112
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.txt176
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.yaml155
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdx55-pinctrl.yaml122
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdx65-tlmm.yaml153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdx75-tlmm.yaml131
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm4250-lpass-lpi-pinctrl.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm4450-tlmm.yaml135
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6115-lpass-lpi-pinctrl.yaml95
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6115-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6125-tlmm.yaml137
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml148
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml142
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm7150-tlmm.yaml146
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.yaml159
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8250-lpass-lpi-pinctrl.yaml120
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8250-pinctrl.yaml120
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8350-lpass-lpi-pinctrl.yaml102
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8350-tlmm.yaml135
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8450-lpass-lpi-pinctrl.yaml123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8450-tlmm.yaml134
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml113
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8550-tlmm.yaml148
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8650-lpass-lpi-pinctrl.yaml111
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8650-tlmm.yaml141
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8750-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,tlmm-common.yaml101
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,x1e80100-tlmm.yaml137
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ralink,rt2880-pinctrl.yaml141
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ralink,rt305x-pinctrl.yaml206
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ralink,rt3352-pinctrl.yaml243
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ralink,rt3883-pinctrl.yaml261
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ralink,rt5350-pinctrl.yaml206
-rw-r--r--Documentation/devicetree/bindings/pinctrl/raspberrypi,rp1-gpio.yaml231
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml188
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml187
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml186
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,pfc.yaml196
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,r9a09g077-pinctrl.yaml172
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rza1-pinctrl.txt223
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rza1-ports.yaml190
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rza2-pinctrl.yaml98
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml226
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-poeg.yaml86
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzn1-pinctrl.txt153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzn1-pinctrl.yaml126
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml167
-rw-r--r--Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt168
-rw-r--r--Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml193
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml52
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml80
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml413
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt380
-rw-r--r--Documentation/devicetree/bindings/pinctrl/semtech,sx1501q.yaml251
-rw-r--r--Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.txt27
-rw-r--r--Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.yaml84
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sophgo,cv1800-pinctrl.yaml122
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sophgo,sg2042-pinctrl.yaml129
-rw-r--r--Documentation/devicetree/bindings/pinctrl/spacemit,k1-pinctrl.yaml142
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sprd,pinctrl.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml187
-rw-r--r--Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt205
-rw-r--r--Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml314
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7100-pinctrl.yaml307
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml124
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml142
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sunplus,sp7021-pinctrl.yaml377
-rw-r--r--Documentation/devicetree/bindings/pinctrl/thead,th1520-pinctrl.yaml176
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt13
-rw-r--r--Documentation/devicetree/bindings/pinctrl/toshiba,visconti-pinctrl.yaml96
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,pinctrl-zynq.yaml210
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,versal-pinctrl.yaml278
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt105
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,zynqmp-pinctrl.yaml353
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.txt65
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/allwinner,sun20i-d1-ppu.yaml57
-rw-r--r--Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml184
-rw-r--r--Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt10
-rw-r--r--Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml49
-rw-r--r--Documentation/devicetree/bindings/power/apple,pmgr-pwrstate.yaml90
-rw-r--r--Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml161
-rw-r--r--Documentation/devicetree/bindings/power/brcm,bcm-pmb.yaml51
-rw-r--r--Documentation/devicetree/bindings/power/brcm,bcm63xx-power.yaml44
-rw-r--r--Documentation/devicetree/bindings/power/domain-idle-state.txt33
-rw-r--r--Documentation/devicetree/bindings/power/domain-idle-state.yaml81
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpc.txt91
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpc.yaml148
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt71
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml144
-rw-r--r--Documentation/devicetree/bindings/power/fsl,scu-pd.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/mediatek,power-controller.yaml264
-rw-r--r--Documentation/devicetree/bindings/power/mti,mips-cpc.txt8
-rw-r--r--Documentation/devicetree/bindings/power/mti,mips-cpc.yaml37
-rw-r--r--Documentation/devicetree/bindings/power/pd-samsung.txt45
-rw-r--r--Documentation/devicetree/bindings/power/pd-samsung.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/power-domain.yaml135
-rw-r--r--Documentation/devicetree/bindings/power/power_domain.txt97
-rw-r--r--Documentation/devicetree/bindings/power/qcom,kpss-acc-v2.yaml44
-rw-r--r--Documentation/devicetree/bindings/power/qcom,rpmpd.yaml217
-rw-r--r--Documentation/devicetree/bindings/power/raspberrypi,bcm2835-power.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/renesas,apmu.txt35
-rw-r--r--Documentation/devicetree/bindings/power/renesas,apmu.yaml57
-rw-r--r--Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt60
-rw-r--r--Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml71
-rw-r--r--Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt100
-rw-r--r--Documentation/devicetree/bindings/power/renesas,sysc-rmobile.yaml121
-rw-r--r--Documentation/devicetree/bindings/power/reset/apple,smc-reboot.yaml40
-rw-r--r--Documentation/devicetree/bindings/power/reset/atmel,at91sam9260-shdwc.yaml82
-rw-r--r--Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml122
-rw-r--r--Documentation/devicetree/bindings/power/reset/brcm,bcm21664-resetmgr.txt14
-rw-r--r--Documentation/devicetree/bindings/power/reset/brcm,bcm21664-resetmgr.yaml31
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-poweroff.txt39
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-poweroff.yaml65
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-restart.txt54
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-restart.yaml79
-rw-r--r--Documentation/devicetree/bindings/power/reset/ltc2952-poweroff.txt4
-rw-r--r--Documentation/devicetree/bindings/power/reset/msm-poweroff.txt17
-rw-r--r--Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml54
-rw-r--r--Documentation/devicetree/bindings/power/reset/ocelot-reset.txt9
-rw-r--r--Documentation/devicetree/bindings/power/reset/qcom,pon.txt48
-rw-r--r--Documentation/devicetree/bindings/power/reset/qcom,pon.yaml154
-rw-r--r--Documentation/devicetree/bindings/power/reset/qcom,pshold.yaml35
-rw-r--r--Documentation/devicetree/bindings/power/reset/reboot-mode.txt25
-rw-r--r--Documentation/devicetree/bindings/power/reset/reboot-mode.yaml49
-rw-r--r--Documentation/devicetree/bindings/power/reset/regulator-poweroff.yaml37
-rw-r--r--Documentation/devicetree/bindings/power/reset/restart-handler.yaml30
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt30
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml66
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.txt35
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml57
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.txt23
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml98
-rw-r--r--Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml52
-rw-r--r--Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml82
-rw-r--r--Documentation/devicetree/bindings/power/rockchip,power-controller.yaml260
-rw-r--r--Documentation/devicetree/bindings/power/rockchip-io-domain.txt135
-rw-r--r--Documentation/devicetree/bindings/power/rockchip-io-domain.yaml414
-rw-r--r--Documentation/devicetree/bindings/power/starfive,jh7110-pmu.yaml45
-rw-r--r--Documentation/devicetree/bindings/power/supply/ab8500/btemp.txt16
-rw-r--r--Documentation/devicetree/bindings/power/supply/ab8500/chargalg.txt16
-rw-r--r--Documentation/devicetree/bindings/power/supply/ab8500/charger.txt25
-rw-r--r--Documentation/devicetree/bindings/power/supply/ab8500/fg.txt58
-rw-r--r--Documentation/devicetree/bindings/power/supply/act8945a-charger.txt44
-rw-r--r--Documentation/devicetree/bindings/power/supply/adc-battery.yaml70
-rw-r--r--Documentation/devicetree/bindings/power/supply/axp20x_ac_power.txt22
-rw-r--r--Documentation/devicetree/bindings/power/supply/axp20x_battery.txt20
-rw-r--r--Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt40
-rw-r--r--Documentation/devicetree/bindings/power/supply/battery.txt57
-rw-r--r--Documentation/devicetree/bindings/power/supply/battery.yaml184
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq2415x.txt47
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq2415x.yaml97
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24190.txt51
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24190.yaml100
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24257.txt62
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24257.yaml124
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24735.yaml89
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq2515x.yaml92
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq256xx.yaml126
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25890.txt49
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25890.yaml127
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25980.yaml112
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq27xxx.txt56
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq27xxx.yaml125
-rw-r--r--Documentation/devicetree/bindings/power/supply/charger-manager.txt81
-rw-r--r--Documentation/devicetree/bindings/power/supply/charger-manager.yaml217
-rw-r--r--Documentation/devicetree/bindings/power/supply/cpcap-battery.txt31
-rw-r--r--Documentation/devicetree/bindings/power/supply/cpcap-battery.yaml87
-rw-r--r--Documentation/devicetree/bindings/power/supply/cpcap-charger.txt37
-rw-r--r--Documentation/devicetree/bindings/power/supply/cpcap-charger.yaml106
-rw-r--r--Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml77
-rw-r--r--Documentation/devicetree/bindings/power/supply/da9150-charger.txt26
-rw-r--r--Documentation/devicetree/bindings/power/supply/da9150-fg.txt23
-rw-r--r--Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml52
-rw-r--r--Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml51
-rw-r--r--Documentation/devicetree/bindings/power/supply/gpio-charger.txt27
-rw-r--r--Documentation/devicetree/bindings/power/supply/gpio-charger.yaml100
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml61
-rw-r--r--Documentation/devicetree/bindings/power/supply/isp1704.txt17
-rw-r--r--Documentation/devicetree/bindings/power/supply/isp1704.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/supply/lego,ev3-battery.yaml55
-rw-r--r--Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt21
-rw-r--r--Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml44
-rw-r--r--Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml66
-rw-r--r--Documentation/devicetree/bindings/power/supply/lp8727_charger.txt43
-rw-r--r--Documentation/devicetree/bindings/power/supply/ltc2941.txt28
-rw-r--r--Documentation/devicetree/bindings/power/supply/ltc3651-charger.txt27
-rw-r--r--Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml73
-rw-r--r--Documentation/devicetree/bindings/power/supply/max17042_battery.txt31
-rw-r--r--Documentation/devicetree/bindings/power/supply/max77650-charger.yaml37
-rw-r--r--Documentation/devicetree/bindings/power/supply/max8903-charger.txt24
-rw-r--r--Documentation/devicetree/bindings/power/supply/max8925_battery.txt18
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,ds2760.txt26
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml43
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml84
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max14656.txt23
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml45
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml128
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml84
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17201.yaml58
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml70
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml50
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml44
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max8903.yaml67
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/supply/mediatek,mt6370-charger.yaml96
-rw-r--r--Documentation/devicetree/bindings/power/supply/microchip,ucs1002.yaml51
-rw-r--r--Documentation/devicetree/bindings/power/supply/mitsumi,mm8013.yaml38
-rw-r--r--Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/supply/nokia,n900-battery.yaml49
-rw-r--r--Documentation/devicetree/bindings/power/supply/olpc-battery.yaml27
-rw-r--r--Documentation/devicetree/bindings/power/supply/olpc_battery.txt5
-rw-r--r--Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml49
-rw-r--r--Documentation/devicetree/bindings/power/supply/power-supply.yaml27
-rw-r--r--Documentation/devicetree/bindings/power/supply/power_supply.txt23
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,coincell-charger.txt48
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pm8916-bms-vm.yaml83
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pm8916-lbc.yaml128
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pm8941-charger.yaml178
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pm8941-coincell.yaml67
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml82
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom_smbb.txt150
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt5033-battery.yaml54
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml67
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml90
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt9467.yaml82
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt9471.yaml73
-rw-r--r--Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml164
-rw-r--r--Documentation/devicetree/bindings/power/supply/rt9455_charger.txt46
-rw-r--r--Documentation/devicetree/bindings/power/supply/rx51-battery.txt25
-rw-r--r--Documentation/devicetree/bindings/power/supply/samsung,battery.yaml56
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs,sbs-battery.yaml84
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs,sbs-charger.yaml55
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.txt66
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.yaml111
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs_sbs-battery.txt27
-rw-r--r--Documentation/devicetree/bindings/power/supply/sbs_sbs-charger.txt21
-rw-r--r--Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml33
-rw-r--r--Documentation/devicetree/bindings/power/supply/sc2731_charger.txt40
-rw-r--r--Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml73
-rw-r--r--Documentation/devicetree/bindings/power/supply/st,stc3117.yaml74
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml75
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml39
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml125
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml79
-rw-r--r--Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml181
-rw-r--r--Documentation/devicetree/bindings/power/supply/ti,bq24735.txt39
-rw-r--r--Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml90
-rw-r--r--Documentation/devicetree/bindings/power/supply/ti,twl6030-charger.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/supply/tps65090-charger.yaml36
-rw-r--r--Documentation/devicetree/bindings/power/supply/tps65090.txt17
-rw-r--r--Documentation/devicetree/bindings/power/supply/tps65217-charger.yaml43
-rw-r--r--Documentation/devicetree/bindings/power/supply/tps65217_charger.txt17
-rw-r--r--Documentation/devicetree/bindings/power/supply/twl-charger.txt30
-rw-r--r--Documentation/devicetree/bindings/power/supply/twl4030-charger.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-ac-power-supply.yaml35
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml51
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml104
-rw-r--r--Documentation/devicetree/bindings/power/wakeup-source.txt33
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cache_sram.txt20
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpus.txt4
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dcsr.txt4
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dma.txt204
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt61
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt17
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt2
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpic.txt231
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt111
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pmc.txt63
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pmc.yaml152
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/raideng.txt2
-rw-r--r--Documentation/devicetree/bindings/powerpc/nintendo/gamecube.txt2
-rw-r--r--Documentation/devicetree/bindings/powerpc/nintendo/wii.txt16
-rw-r--r--Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt2
-rw-r--r--Documentation/devicetree/bindings/powerpc/sleep.yaml47
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.txt23
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.yaml49
-rw-r--r--Documentation/devicetree/bindings/property-units.txt42
-rw-r--r--Documentation/devicetree/bindings/ptp/fsl,ptp.yaml156
-rw-r--r--Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml63
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-idt82p33.yaml45
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-idtcm.yaml69
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-ines.txt35
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-qoriq.txt78
-rw-r--r--Documentation/devicetree/bindings/ptp/timestamper.txt42
-rw-r--r--Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml57
-rw-r--r--Documentation/devicetree/bindings/pwm/airoha,en7581-pwm.yaml34
-rw-r--r--Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml116
-rw-r--r--Documentation/devicetree/bindings/pwm/apple,s5l-fpwm.yaml52
-rw-r--r--Documentation/devicetree/bindings/pwm/argon40,fan-hat.yaml48
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel,at91sam-pwm.yaml52
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel,hlcdc-pwm.yaml35
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt29
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-pwm.txt34
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt16
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.yaml42
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.txt21
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,iproc-pwm.yaml45
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt21
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml51
-rw-r--r--Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml53
-rw-r--r--Documentation/devicetree/bindings/pwm/clk-pwm.yaml46
-rw-r--r--Documentation/devicetree/bindings/pwm/fsl,vf610-ftm-pwm.yaml97
-rw-r--r--Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt23
-rw-r--r--Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml57
-rw-r--r--Documentation/devicetree/bindings/pwm/img-pwm.txt2
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-pwm.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-pwm.yaml86
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-tpm-pwm.yaml66
-rw-r--r--Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt25
-rw-r--r--Documentation/devicetree/bindings/pwm/intel,keembay-pwm.yaml46
-rw-r--r--Documentation/devicetree/bindings/pwm/intel,lgm-pwm.yaml47
-rw-r--r--Documentation/devicetree/bindings/pwm/iqs620a-pwm.yaml34
-rw-r--r--Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml35
-rw-r--r--Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml67
-rw-r--r--Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/marvell,berlin-pwm.yaml44
-rw-r--r--Documentation/devicetree/bindings/pwm/marvell,pxa-pwm.yaml71
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml100
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml81
-rw-r--r--Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml83
-rw-r--r--Documentation/devicetree/bindings/pwm/mxs-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/mxs-pwm.yaml53
-rw-r--r--Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt76
-rw-r--r--Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.yaml95
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,lpc1850-sct-pwm.yaml54
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,lpc3220-pwm.yaml44
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml118
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt2
-rw-r--r--Documentation/devicetree/bindings/pwm/opencores,pwm.yaml56
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-amlogic.yaml181
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt30
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-bcm2835.yaml42
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-berlin.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt55
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-gpio.yaml46
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-hibvt.txt4
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-lp3943.txt2
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-mediatek.txt40
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-meson.txt26
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt43
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-nexus-node.yaml65
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-omap-dmtimer.txt22
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-rockchip.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml108
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-samsung.txt51
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-samsung.yaml130
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sifive.yaml72
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-stm32.txt35
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sun4i.txt24
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiecap.txt51
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiecap.yaml63
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt49
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.yaml64
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-zx.txt22
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.txt11
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.yaml36
-rw-r--r--Documentation/devicetree/bindings/pwm/pxa-pwm.txt30
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt39
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.yaml99
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml378
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt35
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml94
-rw-r--r--Documentation/devicetree/bindings/pwm/snps,dw-apb-timers-pwm2.yaml67
-rw-r--r--Documentation/devicetree/bindings/pwm/sophgo,sg2042-pwm.yaml60
-rw-r--r--Documentation/devicetree/bindings/pwm/spear-pwm.txt2
-rw-r--r--Documentation/devicetree/bindings/pwm/sprd,ums512-pwm.yaml66
-rw-r--r--Documentation/devicetree/bindings/pwm/st,stmpe-pwm.txt2
-rw-r--r--Documentation/devicetree/bindings/pwm/sunplus,sp7021-pwm.yaml42
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,omap-dmtimer-pwm.yaml59
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/toshiba,pwm-visconti.yaml45
-rw-r--r--Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml43
-rw-r--r--Documentation/devicetree/bindings/pwm/vt8500-pwm.txt18
-rw-r--r--Documentation/devicetree/bindings/regulator/act8865-regulator.txt94
-rw-r--r--Documentation/devicetree/bindings/regulator/act8945a-regulator.txt79
-rw-r--r--Documentation/devicetree/bindings/regulator/active-semi,act8600.yaml139
-rw-r--r--Documentation/devicetree/bindings/regulator/active-semi,act8846.yaml167
-rw-r--r--Documentation/devicetree/bindings/regulator/active-semi,act8865.yaml158
-rw-r--r--Documentation/devicetree/bindings/regulator/active-semi,act8945a.yaml271
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml157
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,max77503-regulator.yaml50
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,max77541-regulator.yaml38
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,max77857.yaml86
-rw-r--r--Documentation/devicetree/bindings/regulator/allwinner,sun20i-d1-system-ldos.yaml37
-rw-r--r--Documentation/devicetree/bindings/regulator/anatop-regulator.txt40
-rw-r--r--Documentation/devicetree/bindings/regulator/anatop-regulator.yaml93
-rw-r--r--Documentation/devicetree/bindings/regulator/arizona-regulator.txt17
-rw-r--r--Documentation/devicetree/bindings/regulator/awinic,aw37503.yaml78
-rw-r--r--Documentation/devicetree/bindings/regulator/brcm,bcm59054.yaml56
-rw-r--r--Documentation/devicetree/bindings/regulator/brcm,bcm59056.yaml51
-rw-r--r--Documentation/devicetree/bindings/regulator/da9210.txt29
-rw-r--r--Documentation/devicetree/bindings/regulator/da9211.txt4
-rw-r--r--Documentation/devicetree/bindings/regulator/dlg,da9121.yaml213
-rw-r--r--Documentation/devicetree/bindings/regulator/dlg,da9210.yaml52
-rw-r--r--Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml132
-rw-r--r--Documentation/devicetree/bindings/regulator/fan53555.txt23
-rw-r--r--Documentation/devicetree/bindings/regulator/fcs,fan53555.yaml73
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.txt35
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.yaml160
-rw-r--r--Documentation/devicetree/bindings/regulator/google,cros-ec-regulator.yaml54
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.txt43
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.yaml122
-rw-r--r--Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml45
-rw-r--r--Documentation/devicetree/bindings/regulator/lltc,ltc3676.yaml167
-rw-r--r--Documentation/devicetree/bindings/regulator/lp872x.txt4
-rw-r--r--Documentation/devicetree/bindings/regulator/ltc3676.txt94
-rw-r--r--Documentation/devicetree/bindings/regulator/max77650-regulator.yaml35
-rw-r--r--Documentation/devicetree/bindings/regulator/max77686.txt71
-rw-r--r--Documentation/devicetree/bindings/regulator/max77802.txt111
-rw-r--r--Documentation/devicetree/bindings/regulator/max8660.txt47
-rw-r--r--Documentation/devicetree/bindings/regulator/max8660.yaml73
-rw-r--r--Documentation/devicetree/bindings/regulator/max8893.yaml88
-rw-r--r--Documentation/devicetree/bindings/regulator/max8952.txt52
-rw-r--r--Documentation/devicetree/bindings/regulator/max8973-regulator.txt52
-rw-r--r--Documentation/devicetree/bindings/regulator/max8997-regulator.txt144
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max14577.yaml78
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max20086.yaml107
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max20411.yaml58
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77686.yaml83
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77693.yaml60
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77802.yaml86
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77826.yaml67
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77838.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77843.yaml65
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max8952.yaml109
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max8973.yaml141
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max8997.yaml446
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6331-regulator.yaml280
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6332-regulator.yaml119
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml284
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6358-regulator.yaml250
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6397-regulator.yaml238
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6873-dvfsrc-regulator.yaml45
-rw-r--r--Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml200
-rw-r--r--Documentation/devicetree/bindings/regulator/mp8859.txt22
-rw-r--r--Documentation/devicetree/bindings/regulator/mps,mp5416.yaml78
-rw-r--r--Documentation/devicetree/bindings/regulator/mps,mp886x.yaml63
-rw-r--r--Documentation/devicetree/bindings/regulator/mps,mpq2286.yaml59
-rw-r--r--Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml117
-rw-r--r--Documentation/devicetree/bindings/regulator/mps,mpq7932.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml385
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6360-regulator.yaml107
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6397-regulator.txt217
-rw-r--r--Documentation/devicetree/bindings/regulator/nvidia,tegra-regulators-coupling.txt65
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml249
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pf0900.yaml163
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pf5300.yaml54
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml190
-rw-r--r--Documentation/devicetree/bindings/regulator/onnn,fan53880.yaml87
-rw-r--r--Documentation/devicetree/bindings/regulator/pfuze100.txt394
-rw-r--r--Documentation/devicetree/bindings/regulator/pfuze100.yaml199
-rw-r--r--Documentation/devicetree/bindings/regulator/pv88060.txt2
-rw-r--r--Documentation/devicetree/bindings/regulator/pwm-regulator.txt92
-rw-r--r--Documentation/devicetree/bindings/regulator/pwm-regulator.yaml127
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml244
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,rpm-regulator.yaml128
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt160
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml558
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml60
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.txt286
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml145
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt268
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.yaml439
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,usb-vbus-regulator.yaml58
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml106
-rw-r--r--Documentation/devicetree/bindings/regulator/raspberrypi,7inch-touchscreen-panel-regulator-v2.yaml61
-rw-r--r--Documentation/devicetree/bindings/regulator/raspberrypi,7inch-touchscreen-panel-regulator.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator-max77620.txt2
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator-output.yaml39
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.txt134
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.yaml307
-rw-r--r--Documentation/devicetree/bindings/regulator/renesas,raa215300.yaml85
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml89
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt4803.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt4831-regulator.yaml36
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5133.yaml178
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml141
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml73
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml90
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt6160-regulator.yaml61
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt6190.yaml79
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml88
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtmv20-regulator.yaml160
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml106
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml207
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml77
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd71815-regulator.yaml117
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd71828-regulator.yaml105
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd71837-regulator.txt124
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd71837-regulator.yaml161
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd71847-regulator.yaml156
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd9576-regulator.yaml40
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd96801-regulator.yaml63
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd96802-regulator.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt79
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml62
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt102
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml61
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mpu05.yaml47
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt145
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml100
-rw-r--r--Documentation/devicetree/bindings/regulator/silergy,sy8106a.yaml52
-rw-r--r--Documentation/devicetree/bindings/regulator/silergy,sy8824x.yaml45
-rw-r--r--Documentation/devicetree/bindings/regulator/silergy,sy8827n.yaml47
-rw-r--r--Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml49
-rw-r--r--Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml99
-rw-r--r--Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt43
-rw-r--r--Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.yaml46
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml47
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.txt20
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml57
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stpmic1-regulator.txt68
-rw-r--r--Documentation/devicetree/bindings/regulator/sy8106a-regulator.txt23
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62360.yaml98
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62864.yaml63
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62870.yaml52
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps65132.yaml87
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps65219.yaml198
-rw-r--r--Documentation/devicetree/bindings/regulator/tps62360-regulator.txt44
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65132-regulator.txt46
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65218.txt9
-rw-r--r--Documentation/devicetree/bindings/regulator/twl-regulator.txt73
-rw-r--r--Documentation/devicetree/bindings/regulator/uniphier-regulator.txt57
-rw-r--r--Documentation/devicetree/bindings/regulator/vctrl-regulator.yaml80
-rw-r--r--Documentation/devicetree/bindings/regulator/vctrl.txt49
-rw-r--r--Documentation/devicetree/bindings/regulator/vexpress.txt2
-rw-r--r--Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml43
-rw-r--r--Documentation/devicetree/bindings/regulator/wlf,arizona.yaml41
-rw-r--r--Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml87
-rw-r--r--Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml150
-rw-r--r--Documentation/devicetree/bindings/remoteproc/imx-rproc.txt33
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml77
-rw-r--r--Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml262
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt126
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt139
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml220
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,glink-edge.yaml97
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,glink-rpm-edge.yaml98
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,milos-pas.yaml198
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml345
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml392
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,pas-common.yaml88
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml46
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt97
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,qcs404-cdsp-pil.yaml164
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,qcs404-pas.yaml95
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml171
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml176
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7180-mss-pil.yaml247
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml186
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7280-adsp-pil.yaml195
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7280-mss-pil.yaml268
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml211
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc8280xp-pas.yaml148
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sdm845-adsp-pil.yaml163
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sdx55-pas.yaml110
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6115-pas.yaml157
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6350-pas.yaml168
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml145
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml196
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8350-pas.yaml192
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml289
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,smd-edge.yaml118
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,wcnss-pil.txt158
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,wcnss-pil.yaml321
-rw-r--r--Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml65
-rw-r--r--Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml196
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml211
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-m4f-rproc.yaml125
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml347
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,keystone-rproc.txt4
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,omap-remoteproc.yaml331
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,pru-consumer.yaml60
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml243
-rw-r--r--Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5fss.yaml371
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml46
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/nvidia,tegra210-emc-table.yaml31
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/nvidia,tegra264-bpmp-shmem.yaml48
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/phram.yaml47
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/qcom,cmd-db.txt37
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/qcom,cmd-db.yaml46
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt51
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.yaml66
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/ramoops.txt51
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/ramoops.yaml144
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt137
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/xen,shared-memory.txt24
-rw-r--r--Documentation/devicetree/bindings/reset/allwinner,sun6i-a31-clock-reset.yaml68
-rw-r--r--Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt21
-rw-r--r--Documentation/devicetree/bindings/reset/altr,rst-mgr.yaml55
-rw-r--r--Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt21
-rw-r--r--Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.yaml56
-rw-r--r--Documentation/devicetree/bindings/reset/amlogic,meson-reset.txt19
-rw-r--r--Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml49
-rw-r--r--Documentation/devicetree/bindings/reset/ath79-reset.txt20
-rw-r--r--Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml75
-rw-r--r--Documentation/devicetree/bindings/reset/berlin,reset.txt23
-rw-r--r--Documentation/devicetree/bindings/reset/bitmain,bm1880-reset.yaml36
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,bcm4908-misc-pcie-reset.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,bcm6345-reset.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,bcm7216-pcie-sata-rescal.yaml41
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.yaml48
-rw-r--r--Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml40
-rw-r--r--Documentation/devicetree/bindings/reset/canaan,k230-rst.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/delta,tn48m-reset.yaml35
-rw-r--r--Documentation/devicetree/bindings/reset/fsl,imx-src.txt49
-rw-r--r--Documentation/devicetree/bindings/reset/fsl,imx-src.yaml70
-rw-r--r--Documentation/devicetree/bindings/reset/fsl,imx7-src.txt47
-rw-r--r--Documentation/devicetree/bindings/reset/fsl,imx7-src.yaml65
-rw-r--r--Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt43
-rw-r--r--Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml58
-rw-r--r--Documentation/devicetree/bindings/reset/hisilicon,hi6220-reset.txt36
-rw-r--r--Documentation/devicetree/bindings/reset/intel,rcu-gw.yaml65
-rw-r--r--Documentation/devicetree/bindings/reset/lantiq,reset.txt30
-rw-r--r--Documentation/devicetree/bindings/reset/lantiq,reset.yaml49
-rw-r--r--Documentation/devicetree/bindings/reset/marvell,berlin2-reset.yaml38
-rw-r--r--Documentation/devicetree/bindings/reset/microchip,rst.yaml59
-rw-r--r--Documentation/devicetree/bindings/reset/nuvoton,ma35d1-reset.yaml46
-rw-r--r--Documentation/devicetree/bindings/reset/nuvoton,npcm750-reset.yaml76
-rw-r--r--Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.txt83
-rw-r--r--Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.yaml101
-rw-r--r--Documentation/devicetree/bindings/reset/oxnas,reset.txt32
-rw-r--r--Documentation/devicetree/bindings/reset/qca,ar7100-reset.yaml40
-rw-r--r--Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt52
-rw-r--r--Documentation/devicetree/bindings/reset/qcom,aoss-reset.yaml52
-rw-r--r--Documentation/devicetree/bindings/reset/qcom,pdc-global.txt52
-rw-r--r--Documentation/devicetree/bindings/reset/qcom,pdc-global.yaml51
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rst.txt46
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rst.yaml70
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rzg2l-usbphy-ctrl.yaml77
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rzv2h-usb2phy-reset.yaml61
-rw-r--r--Documentation/devicetree/bindings/reset/sirf,rstc.txt42
-rw-r--r--Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt33
-rw-r--r--Documentation/devicetree/bindings/reset/snps,axs10x-reset.yaml48
-rw-r--r--Documentation/devicetree/bindings/reset/snps,dw-reset.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/socfpga-reset.txt15
-rw-r--r--Documentation/devicetree/bindings/reset/socionext,uniphier-glue-reset.yaml110
-rw-r--r--Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml72
-rw-r--r--Documentation/devicetree/bindings/reset/sophgo,sg2042-reset.yaml42
-rw-r--r--Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt42
-rw-r--r--Documentation/devicetree/bindings/reset/st,sti-powerdown.txt45
-rw-r--r--Documentation/devicetree/bindings/reset/st,stih407-picophyreset.yaml47
-rw-r--r--Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml49
-rw-r--r--Documentation/devicetree/bindings/reset/st,stm32-rcc.txt2
-rw-r--r--Documentation/devicetree/bindings/reset/st,stm32mp1-rcc.txt2
-rw-r--r--Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml38
-rw-r--r--Documentation/devicetree/bindings/reset/sunplus,reset.yaml38
-rw-r--r--Documentation/devicetree/bindings/reset/thead,th1520-reset.yaml44
-rw-r--r--Documentation/devicetree/bindings/reset/ti,sci-reset.txt62
-rw-r--r--Documentation/devicetree/bindings/reset/ti,sci-reset.yaml51
-rw-r--r--Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml49
-rw-r--r--Documentation/devicetree/bindings/reset/ti-syscon-reset.txt2
-rw-r--r--Documentation/devicetree/bindings/reset/uniphier-reset.txt176
-rw-r--r--Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml53
-rw-r--r--Documentation/devicetree/bindings/reset/zte,zx2967-reset.txt20
-rw-r--r--Documentation/devicetree/bindings/riscv/andes.yaml25
-rw-r--r--Documentation/devicetree/bindings/riscv/canaan.yaml47
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.txt162
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml231
-rw-r--r--Documentation/devicetree/bindings/riscv/eswin.yaml29
-rw-r--r--Documentation/devicetree/bindings/riscv/extensions.yaml893
-rw-r--r--Documentation/devicetree/bindings/riscv/microchip.yaml52
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive.yaml35
-rw-r--r--Documentation/devicetree/bindings/riscv/spacemit.yaml30
-rw-r--r--Documentation/devicetree/bindings/riscv/starfive.yaml40
-rw-r--r--Documentation/devicetree/bindings/riscv/sunxi.yaml74
-rw-r--r--Documentation/devicetree/bindings/riscv/thead.yaml33
-rw-r--r--Documentation/devicetree/bindings/rng/SUNW,n2-rng.yaml50
-rw-r--r--Documentation/devicetree/bindings/rng/airoha,en7581-trng.yaml38
-rw-r--r--Documentation/devicetree/bindings/rng/amlogic,meson-rng.txt21
-rw-r--r--Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml43
-rw-r--r--Documentation/devicetree/bindings/rng/apm,rng.txt17
-rw-r--r--Documentation/devicetree/bindings/rng/apm,x-gene-rng.yaml47
-rw-r--r--Documentation/devicetree/bindings/rng/arm-cctrng.yaml52
-rw-r--r--Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml56
-rw-r--r--Documentation/devicetree/bindings/rng/atmel-trng.txt16
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt40
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.yaml82
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm74110-rng.yaml35
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt14
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,iproc-rng200.yaml36
-rw-r--r--Documentation/devicetree/bindings/rng/hisi-rng.txt12
-rw-r--r--Documentation/devicetree/bindings/rng/hisi-rng.yaml32
-rw-r--r--Documentation/devicetree/bindings/rng/imx-rng.txt20
-rw-r--r--Documentation/devicetree/bindings/rng/imx-rng.yaml52
-rw-r--r--Documentation/devicetree/bindings/rng/ingenic,rng.yaml36
-rw-r--r--Documentation/devicetree/bindings/rng/ingenic,trng.yaml43
-rw-r--r--Documentation/devicetree/bindings/rng/inside-secure,safexcel-eip76.yaml86
-rw-r--r--Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml35
-rw-r--r--Documentation/devicetree/bindings/rng/ks-sa-rng.txt21
-rw-r--r--Documentation/devicetree/bindings/rng/microsoft,vmgenid.yaml49
-rw-r--r--Documentation/devicetree/bindings/rng/mtk-rng.txt20
-rw-r--r--Documentation/devicetree/bindings/rng/mtk-rng.yaml55
-rw-r--r--Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml37
-rw-r--r--Documentation/devicetree/bindings/rng/omap_rng.txt38
-rw-r--r--Documentation/devicetree/bindings/rng/rockchip,rk3568-rng.yaml61
-rw-r--r--Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml60
-rw-r--r--Documentation/devicetree/bindings/rng/samsung,exynos4-rng.txt19
-rw-r--r--Documentation/devicetree/bindings/rng/samsung,exynos4-rng.yaml45
-rw-r--r--Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml76
-rw-r--r--Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml36
-rw-r--r--Documentation/devicetree/bindings/rng/sparc_sun_oracle_rng.txt30
-rw-r--r--Documentation/devicetree/bindings/rng/st,rng.txt15
-rw-r--r--Documentation/devicetree/bindings/rng/st,rng.yaml35
-rw-r--r--Documentation/devicetree/bindings/rng/st,stm32-rng.txt25
-rw-r--r--Documentation/devicetree/bindings/rng/st,stm32-rng.yaml97
-rw-r--r--Documentation/devicetree/bindings/rng/starfive,jh7110-trng.yaml59
-rw-r--r--Documentation/devicetree/bindings/rng/ti,keystone-rng.yaml50
-rw-r--r--Documentation/devicetree/bindings/rng/ti,omap-rom-rng.yaml41
-rw-r--r--Documentation/devicetree/bindings/rng/timeriomem_rng.txt25
-rw-r--r--Documentation/devicetree/bindings/rng/timeriomem_rng.yaml48
-rw-r--r--Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/abracon,abx80x.txt30
-rw-r--r--Documentation/devicetree/bindings/rtc/abracon,abx80x.yaml98
-rw-r--r--Documentation/devicetree/bindings/rtc/adi,max31335.yaml72
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml43
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml197
-rw-r--r--Documentation/devicetree/bindings/rtc/alphascale,asm9260-rtc.txt19
-rw-r--r--Documentation/devicetree/bindings/rtc/alphascale,asm9260-rtc.yaml50
-rw-r--r--Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml68
-rw-r--r--Documentation/devicetree/bindings/rtc/amlogic,meson-vrtc.yaml44
-rw-r--r--Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml63
-rw-r--r--Documentation/devicetree/bindings/rtc/apm,xgene-rtc.yaml45
-rw-r--r--Documentation/devicetree/bindings/rtc/arm,pl031.yaml58
-rw-r--r--Documentation/devicetree/bindings/rtc/armada-380-rtc.txt24
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml56
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt23
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml72
-rw-r--r--Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt20
-rw-r--r--Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/cdns,rtc.txt25
-rw-r--r--Documentation/devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml42
-rw-r--r--Documentation/devicetree/bindings/rtc/digicolor-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/dw-apb.txt32
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rtc7301.txt16
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rtc7301.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rx8900.txt22
-rw-r--r--Documentation/devicetree/bindings/rtc/epson,rx8900.yaml52
-rw-r--r--Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml58
-rw-r--r--Documentation/devicetree/bindings/rtc/fsl,ls-ftm-alarm.yaml73
-rw-r--r--Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml31
-rw-r--r--Documentation/devicetree/bindings/rtc/fsl,stmp3xxx-rtc.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt30
-rw-r--r--Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml56
-rw-r--r--Documentation/devicetree/bindings/rtc/imxdi-rtc.txt20
-rw-r--r--Documentation/devicetree/bindings/rtc/imxdi-rtc.yaml43
-rw-r--r--Documentation/devicetree/bindings/rtc/ingenic,jz4740-rtc.txt37
-rw-r--r--Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml114
-rw-r--r--Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml64
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl12057.txt74
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl1208.yaml100
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl1219.txt29
-rw-r--r--Documentation/devicetree/bindings/rtc/loongson,rtc.yaml57
-rw-r--r--Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/marvell,armada-380-rtc.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/marvell,pxa-rtc.yaml40
-rw-r--r--Documentation/devicetree/bindings/rtc/maxim,ds1742.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/maxim,ds3231.txt38
-rw-r--r--Documentation/devicetree/bindings/rtc/maxim,mcp795.txt11
-rw-r--r--Documentation/devicetree/bindings/rtc/mediatek,mt2712-rtc.yaml39
-rw-r--r--Documentation/devicetree/bindings/rtc/mediatek,mt7622-rtc.yaml52
-rw-r--r--Documentation/devicetree/bindings/rtc/microchip,mpfs-rtc.yaml69
-rw-r--r--Documentation/devicetree/bindings/rtc/microcrystal,rv3028.yaml57
-rw-r--r--Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml66
-rw-r--r--Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml49
-rw-r--r--Documentation/devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml35
-rw-r--r--Documentation/devicetree/bindings/rtc/nuvoton,ma35d1-rtc.yaml48
-rw-r--r--Documentation/devicetree/bindings/rtc/nuvoton,nct3018y.yaml45
-rw-r--r--Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt24
-rw-r--r--Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.yaml62
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml63
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml49
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf2123.yaml47
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml56
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml113
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf8523.yaml45
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf85363.yaml60
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf8563.yaml56
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt16
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml72
-rw-r--r--Documentation/devicetree/bindings/rtc/orion-rtc.txt18
-rw-r--r--Documentation/devicetree/bindings/rtc/pcf85363.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/pcf8563.txt26
-rw-r--r--Documentation/devicetree/bindings/rtc/pxa-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml98
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,rz-rtca3.yaml84
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml74
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,sh-rtc.yaml77
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-cmos.txt2
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-ds1307.txt44
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-ds1307.yaml102
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-m41t80.txt30
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mt7622.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mxc.txt26
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mxc.yaml61
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mxc_v2.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mxc_v2.yaml46
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-sh.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc.yaml71
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.txt31
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.yaml62
-rw-r--r--Documentation/devicetree/bindings/rtc/sa1100-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml57
-rw-r--r--Documentation/devicetree/bindings/rtc/sirf,prima2-sysrtc.txt13
-rw-r--r--Documentation/devicetree/bindings/rtc/snvs-rtc.txt1
-rw-r--r--Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml86
-rw-r--r--Documentation/devicetree/bindings/rtc/spear-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/sprd,sc2731-rtc.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/sprd,sc27xx-rtc.txt26
-rw-r--r--Documentation/devicetree/bindings/rtc/st,m41t80.yaml73
-rw-r--r--Documentation/devicetree/bindings/rtc/st,m48t86.yaml38
-rw-r--r--Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt61
-rw-r--r--Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml172
-rw-r--r--Documentation/devicetree/bindings/rtc/stericsson,coh901331.txt16
-rw-r--r--Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/sun6i-rtc.txt27
-rw-r--r--Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml56
-rw-r--r--Documentation/devicetree/bindings/rtc/sunxi-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/ti,bq32000.yaml49
-rw-r--r--Documentation/devicetree/bindings/rtc/ti,bq32k.txt18
-rw-r--r--Documentation/devicetree/bindings/rtc/ti,k3-rtc.yaml62
-rw-r--r--Documentation/devicetree/bindings/rtc/trivial-rtc.yaml105
-rw-r--r--Documentation/devicetree/bindings/rtc/twl-rtc.txt11
-rw-r--r--Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/xgene-rtc.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/xlnx,zynqmp-rtc.yaml84
-rw-r--r--Documentation/devicetree/bindings/rtc/xlnx-rtc.txt25
-rw-r--r--Documentation/devicetree/bindings/security/tpm/ibmvtpm.txt41
-rw-r--r--Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt34
-rw-r--r--Documentation/devicetree/bindings/security/tpm/st33zp24-spi.txt32
-rw-r--r--Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt26
-rw-r--r--Documentation/devicetree/bindings/security/tpm/tpm_tis_mmio.txt25
-rw-r--r--Documentation/devicetree/bindings/security/tpm/tpm_tis_spi.txt23
-rw-r--r--Documentation/devicetree/bindings/serial/8250.txt74
-rw-r--r--Documentation/devicetree/bindings/serial/8250.yaml335
-rw-r--r--Documentation/devicetree/bindings/serial/8250_omap.yaml126
-rw-r--r--Documentation/devicetree/bindings/serial/actions,owl-uart.txt16
-rw-r--r--Documentation/devicetree/bindings/serial/actions,owl-uart.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/altera_jtaguart.txt5
-rw-r--r--Documentation/devicetree/bindings/serial/altera_uart.txt8
-rw-r--r--Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml19
-rw-r--r--Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml25
-rw-r--r--Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt38
-rw-r--r--Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml105
-rw-r--r--Documentation/devicetree/bindings/serial/arc-uart.txt25
-rw-r--r--Documentation/devicetree/bindings/serial/arm,dcc.yaml30
-rw-r--r--Documentation/devicetree/bindings/serial/arm,mps2-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml38
-rw-r--r--Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt10
-rw-r--r--Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml200
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt36
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.yaml47
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml95
-rw-r--r--Documentation/devicetree/bindings/serial/cavium-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/cdns,uart.txt22
-rw-r--r--Documentation/devicetree/bindings/serial/cdns,uart.yaml82
-rw-r--r--Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/digicolor-usart.txt27
-rw-r--r--Documentation/devicetree/bindings/serial/efm32-uart.txt20
-rw-r--r--Documentation/devicetree/bindings/serial/esp,esp32-acm.yaml42
-rw-r--r--Documentation/devicetree/bindings/serial/esp,esp32-uart.yaml51
-rw-r--r--Documentation/devicetree/bindings/serial/fsl,s32-linflexuart.yaml50
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-imx-uart.txt36
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml153
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-lpuart.txt35
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-lpuart.yaml98
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-mxs-auart.txt53
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-mxs-auart.yaml94
-rw-r--r--Documentation/devicetree/bindings/serial/ingenic,uart.txt27
-rw-r--r--Documentation/devicetree/bindings/serial/ingenic,uart.yaml103
-rw-r--r--Documentation/devicetree/bindings/serial/lantiq,asc.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/lantiq_asc.txt16
-rw-r--r--Documentation/devicetree/bindings/serial/litex,liteuart.yaml40
-rw-r--r--Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml102
-rw-r--r--Documentation/devicetree/bindings/serial/maxim,max310x.txt48
-rw-r--r--Documentation/devicetree/bindings/serial/maxim,max310x.yaml74
-rw-r--r--Documentation/devicetree/bindings/serial/mediatek,uart.yaml126
-rw-r--r--Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt29
-rw-r--r--Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml53
-rw-r--r--Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt64
-rw-r--r--Documentation/devicetree/bindings/serial/mrvl-serial.txt4
-rw-r--r--Documentation/devicetree/bindings/serial/mtk-uart.txt42
-rw-r--r--Documentation/devicetree/bindings/serial/mvebu-uart.txt53
-rw-r--r--Documentation/devicetree/bindings/serial/nuvoton,ma35d1-serial.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra194-tcu.yaml61
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.txt36
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml125
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml73
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt28
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml39
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt66
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml133
-rw-r--r--Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt14
-rw-r--r--Documentation/devicetree/bindings/serial/omap_serial.txt36
-rw-r--r--Documentation/devicetree/bindings/serial/pl011.txt51
-rw-r--r--Documentation/devicetree/bindings/serial/pl011.yaml130
-rw-r--r--Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/qca,ar9330-uart.yaml50
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uart.txt25
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt81
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml125
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,sa8255p-geni-uart.yaml69
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,serial-geni-qcom.yaml87
-rw-r--r--Documentation/devicetree/bindings/serial/rda,8810pl-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,em-uart.yaml74
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,hscif.yaml158
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,rsci.yaml85
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,rzn1-uart.txt10
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,sci-serial.txt140
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,sci.yaml109
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,scif.yaml284
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,scifa.yaml112
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,scifb.yaml103
-rw-r--r--Documentation/devicetree/bindings/serial/rs485.txt32
-rw-r--r--Documentation/devicetree/bindings/serial/rs485.yaml65
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.txt58
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.yaml225
-rw-r--r--Documentation/devicetree/bindings/serial/serial-peripheral-props.yaml41
-rw-r--r--Documentation/devicetree/bindings/serial/serial.txt56
-rw-r--r--Documentation/devicetree/bindings/serial/serial.yaml133
-rw-r--r--Documentation/devicetree/bindings/serial/sifive-serial.yaml65
-rw-r--r--Documentation/devicetree/bindings/serial/sirf-uart.txt34
-rw-r--r--Documentation/devicetree/bindings/serial/slave-device.txt45
-rw-r--r--Documentation/devicetree/bindings/serial/snps,arc-uart.yaml51
-rw-r--r--Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt76
-rw-r--r--Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml192
-rw-r--r--Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/socionext,uniphier-uart.yaml51
-rw-r--r--Documentation/devicetree/bindings/serial/sprd-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/sprd-uart.yaml82
-rw-r--r--Documentation/devicetree/bindings/serial/st,asc.yaml55
-rw-r--r--Documentation/devicetree/bindings/serial/st,stm32-uart.yaml133
-rw-r--r--Documentation/devicetree/bindings/serial/st,stm32-usart.txt51
-rw-r--r--Documentation/devicetree/bindings/serial/st-asc.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/sunplus,sp7021-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/uniphier-uart.txt22
-rw-r--r--Documentation/devicetree/bindings/serial/via,vt8500-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/vt8500-uart.txt27
-rw-r--r--Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.txt23
-rw-r--r--Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml88
-rw-r--r--Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml51
-rw-r--r--Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt22
-rw-r--r--Documentation/devicetree/bindings/serio/arm,pl050.yaml67
-rw-r--r--Documentation/devicetree/bindings/serio/ps2-gpio.txt23
-rw-r--r--Documentation/devicetree/bindings/serio/ps2-gpio.yaml64
-rw-r--r--Documentation/devicetree/bindings/sifive/sifive-blocks-ip-versioning.txt38
-rw-r--r--Documentation/devicetree/bindings/siox/eckelmann,siox-gpio.txt19
-rw-r--r--Documentation/devicetree/bindings/siox/eckelmann,siox-gpio.yaml48
-rw-r--r--Documentation/devicetree/bindings/slimbus/bus.txt50
-rw-r--r--Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml120
-rw-r--r--Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt84
-rw-r--r--Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt39
-rw-r--r--Documentation/devicetree/bindings/slimbus/slimbus.yaml90
-rw-r--r--Documentation/devicetree/bindings/soc/altera/altr,sys-mgr.yaml51
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.txt29
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,canvas.yaml50
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-gx-clk-measure.yaml42
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-gx-hhi-sysctrl.yaml207
-rw-r--r--Documentation/devicetree/bindings/soc/aspeed/uart-routing.yaml56
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm23550-cdc.yaml35
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2711-avs-monitor.yaml44
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml86
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-vchiq.txt16
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-vchiq.yaml53
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/raspberrypi,bcm2835-power.txt47
-rw-r--r--Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml94
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman-portals.txt56
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman.txt137
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml193
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-tsa.yaml205
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-firmware.yaml48
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-ic.yaml47
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-muram.yaml71
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-si.yaml40
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-siram.yaml39
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-tsa.yaml210
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-ucc-qmc.yaml197
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe.yaml148
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,ucc-hdlc.yaml140
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/network.txt130
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt167
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,bman-portal.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,bman.yaml83
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,imx23-digctl.yaml53
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml67
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml60
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml56
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml69
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml112
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman.yaml93
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,rcpm.yaml87
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,vf610-src.yaml47
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman-portals.txt134
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman.txt187
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/rcpm.txt63
-rw-r--r--Documentation/devicetree/bindings/soc/google/google,gs101-pmu-intr-gen.yaml35
-rw-r--r--Documentation/devicetree/bindings/soc/hisilicon/hisilicon,hi3660-usb3-otg-bc.yaml46
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx-anatop.yaml146
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx-iomuxc-gpr.yaml74
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mm-disp-blk-ctrl.yaml94
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mm-vpu-blk-ctrl.yaml164
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mn-disp-blk-ctrl.yaml97
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mp-hdmi-blk-ctrl.yaml99
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mp-hsio-blk-ctrl.yaml92
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mp-media-blk-ctrl.yaml169
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mq-vpu-blk-ctrl.yaml71
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx93-media-blk-ctrl.yaml115
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx93-src.yaml97
-rw-r--r--Documentation/devicetree/bindings/soc/imx/imx8m-soc.yaml86
-rw-r--r--Documentation/devicetree/bindings/soc/intel/intel,hps-copy-engine.yaml51
-rw-r--r--Documentation/devicetree/bindings/soc/intel/intel,lgm-syscon.yaml57
-rw-r--r--Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml40
-rw-r--r--Documentation/devicetree/bindings/soc/loongson/loongson,ls2k-pmc.yaml83
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/devapc.yaml61
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,ccorr.yaml68
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mt8183-dvfsrc.yaml84
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mutex.yaml123
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,pwrap.yaml167
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,wdma.yaml81
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml94
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/pwrap.txt66
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/scpsys.txt14
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml220
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml50
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,sparx5-cpu-syscon.yaml49
-rw-r--r--Documentation/devicetree/bindings/soc/mobileye/mobileye,eyeq5-olb.yaml352
-rw-r--r--Documentation/devicetree/bindings/soc/nuvoton/nuvoton,gfxi.yaml39
-rw-r--r--Documentation/devicetree/bindings/soc/nuvoton/nuvoton,npcm-gcr.yaml50
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml121
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml53
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt84
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml211
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,dcc.yaml45
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,eud.yaml79
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.txt94
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml170
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt89
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.txt87
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.yaml132
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,msm8976-ramp-controller.yaml36
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml46
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml126
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml71
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpm.yaml116
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpmh-rsc.yaml289
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,sa8255p-geni-se-qup.yaml107
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,saw2.yaml120
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,se-common-props.yaml26
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.txt59
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml152
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt98
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd.yaml69
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smem.txt57
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml98
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt110
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.yaml145
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smsm.txt104
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smsm.yaml150
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt124
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml137
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml67
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt137
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas,r9a09g011-sys.yaml43
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas,r9a09g057-sys.yaml55
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas,rzg2l-sysc.yaml64
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas,rzv2m-pwc.yaml56
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml73
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas.yaml630
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.txt49
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.yaml383
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/power_domain.txt132
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/exynos-pmu.yaml233
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml207
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml110
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-adamv.yaml50
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-ahci-glue.yaml77
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-dwc3-glue.yaml106
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-mioctrl.yaml65
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-perictrl.yaml64
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-sdctrl.yaml61
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-soc-glue-debug.yaml68
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-soc-glue.yaml114
-rw-r--r--Documentation/devicetree/bindings/soc/socionext/socionext,uniphier-sysctrl.yaml104
-rw-r--r--Documentation/devicetree/bindings/soc/sophgo/sophgo,sg2044-top-syscon.yaml49
-rw-r--r--Documentation/devicetree/bindings/soc/sophgo/sophgo.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/spacemit/spacemit,k1-syscon.yaml95
-rw-r--r--Documentation/devicetree/bindings/soc/sprd/sprd,sc9863a-glbregs.yaml55
-rw-r--r--Documentation/devicetree/bindings/soc/starfive/starfive,jh7110-syscon.yaml93
-rw-r--r--Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml55
-rw-r--r--Documentation/devicetree/bindings/soc/tegra/nvidia,nvec.yaml84
-rw-r--r--Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-ahb.yaml40
-rw-r--r--Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-flowctrl.yaml41
-rw-r--r--Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml416
-rw-r--r--Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml96
-rw-r--r--Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt58
-rw-r--r--Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml59
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,am654-serdes-ctrl.yaml43
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,j721e-system-controller.yaml137
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,j784s4-bist.yaml63
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,pruss.yaml560
-rw-r--r--Documentation/devicetree/bindings/soc/ti/wkup-m3-ipc.yaml175
-rw-r--r--Documentation/devicetree/bindings/soc/ti/wkup_m3_ipc.txt57
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml280
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml43
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu.txt31
-rw-r--r--Documentation/devicetree/bindings/soc/zte/pd-2967xx.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1372.yaml69
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1373.yaml111
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau17x1.txt32
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau17x1.yaml52
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1977.txt54
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1977.yaml94
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau7002.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau7002.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau7118.yaml88
-rw-r--r--Documentation/devicetree/bindings/sound/adi,axi-i2s.txt7
-rw-r--r--Documentation/devicetree/bindings/sound/adi,max98363.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/adi,max98388.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/adi,max98396.yaml141
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm2518.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm3515.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/ak4104.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/ak4118.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/ak4458.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/ak4554.txt11
-rw-r--r--Documentation/devicetree/bindings/sound/ak4613.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/ak4642.txt37
-rw-r--r--Documentation/devicetree/bindings/sound/ak5558.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/alc5623.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/alc5632.txt43
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml346
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml147
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml126
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml44
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun50i-h6-dmic.yaml87
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,aiu.yaml118
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-fifo.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-fifo.yaml112
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-pdm.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-pdm.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-sound-card.txt124
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-sound-card.yaml181
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-spdifin.yaml86
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-spdifout.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-spdifout.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-tdm-formatters.yaml88
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-tdm-iface.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-tdm-iface.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,t9015.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/apple,mca.yaml140
-rw-r--r--Documentation/devicetree/bindings/sound/arm,pl041.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/arndale.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4104.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4375.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml77
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4554.yaml27
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4613.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4619.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4642.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak5558.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,asoc-wm8904.yaml84
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml109
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,at91sam9g20ek-wm8731.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sam9x5-wm8731-audio.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sama5d2-classd.yaml105
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sama5d2-i2s.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sama5d2-pdmic.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-classd.txt55
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-i2s.txt46
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-pdmic.txt55
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-wm8904.txt55
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card.txt132
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card.yaml57
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card2.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-port.yaml131
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-scu-card.txt123
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph.yaml53
-rw-r--r--Documentation/devicetree/bindings/sound/audio-iio-aux.yaml64
-rw-r--r--Documentation/devicetree/bindings/sound/awinic,aw8738.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/awinic,aw87390.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/awinic,aw88395.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/axentia,tse850-pcm5142.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm63xx-audio.txt29
-rw-r--r--Documentation/devicetree/bindings/sound/bt-sco.txt13
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs35l41.yaml215
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs35l45.yaml159
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs4234.yaml74
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs4270.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs4271.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l42.yaml226
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml314
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l84.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42xx8.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml195
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs530x.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,lochnagar.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,madera.yaml118
-rw-r--r--Documentation/devicetree/bindings/sound/component-common.yaml21
-rw-r--r--Documentation/devicetree/bindings/sound/cs35l35.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/cs35l36.txt168
-rw-r--r--Documentation/devicetree/bindings/sound/cs4270.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/cs4271.txt57
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l42.txt107
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l51.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l73.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/cs42xx8.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/cs4341.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/cs53l30.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/da7213.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/da7219.txt112
-rw-r--r--Documentation/devicetree/bindings/sound/da9055.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/dai-common.yaml18
-rw-r--r--Documentation/devicetree/bindings/sound/dai-params.yaml37
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml202
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcbsp.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcbsp.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/designware-i2s.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/dialog,da7219.yaml238
-rw-r--r--Documentation/devicetree/bindings/sound/dlg,da7213.yaml103
-rw-r--r--Documentation/devicetree/bindings/sound/dmic-codec.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/dmic.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/es8328.txt38
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7134.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es71x4.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7241.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7241.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8316.yaml78
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8326.yaml120
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8328.yaml86
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8375.yaml71
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8389.yaml50
-rw-r--r--Documentation/devicetree/bindings/sound/foursemi,fs2105s.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,asrc.txt66
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml66
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,audmix.yaml142
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,easrc.yaml109
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,esai.txt64
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,esai.yaml136
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx-asrc.yaml189
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx-audio-es8328.yaml111
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx95-cm7-sof.yaml64
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,micfil.yaml96
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,mqs.yaml117
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,mxs-audio-sgtl5000.yaml81
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml147
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml134
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sai.yaml262
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,saif.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sgtl5000.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sof-cpu.yaml27
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,spdif.txt64
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,spdif.yaml160
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,ssi.txt87
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,ssi.yaml194
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,xcvr.yaml160
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-asoc-card.txt94
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml242
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-sai.txt80
-rw-r--r--Documentation/devicetree/bindings/sound/google,chv3-codec.yaml31
-rw-r--r--Documentation/devicetree/bindings/sound/google,chv3-i2s.yaml44
-rw-r--r--Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml78
-rw-r--r--Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml137
-rw-r--r--Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml182
-rw-r--r--Documentation/devicetree/bindings/sound/gtm601.txt13
-rw-r--r--Documentation/devicetree/bindings/sound/hdmi.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/ics43432.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-card.yaml131
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-es8328.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-hdmi.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt56
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-spdif.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audmux.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audmux.yaml119
-rw-r--r--Documentation/devicetree/bindings/sound/infineon,peb2466.yaml91
-rw-r--r--Documentation/devicetree/bindings/sound/ingenic,aic.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/ingenic,codec.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/ingenic,jz4740-i2s.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/inno-rk3036.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/intel,keembay-i2s.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/invensense,ics43432.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/irondevice,sma1303.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/irondevice,sma1307.yaml53
-rw-r--r--Documentation/devicetree/bindings/sound/linux,bt-sco.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/linux,spdif.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls-audio-card.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls2k1000-i2s.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml105
-rw-r--r--Documentation/devicetree/bindings/sound/max98090.txt59
-rw-r--r--Documentation/devicetree/bindings/sound/max98095.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/max98357a.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/max98371.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/max98504.txt44
-rw-r--r--Documentation/devicetree/bindings/sound/max9867.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/max9892x.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max9759.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max9759.yaml45
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98088.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98088.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98090.yaml84
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98095.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98357a.yaml52
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98371.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98390.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98504.yaml86
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98520.yaml35
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max9867.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98925.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt2701-audio.yaml116
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt2701-wm8960.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt7986-afe.yaml160
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt7986-wm8960.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8173-afe-pcm.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183-audio.yaml228
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183_da7219.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183_mt6358_ts3a227.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8188-afe.yaml241
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml136
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8365-afe.yaml130
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8365-mt6357.yaml107
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-i2smcc.yaml115
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-pdmc.yaml105
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-spdifrx.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-spdiftx.yaml78
-rw-r--r--Documentation/devicetree/bindings/sound/mscc,zl38060.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt146
-rw-r--r--Documentation/devicetree/bindings/sound/mt2701-wm8960.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/mt6358.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/mt6359.yaml61
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-afe-pcm.yaml180
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml197
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml201
-rw-r--r--Documentation/devicetree/bindings/sound/mt8192-afe-pcm.yaml105
-rw-r--r--Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml203
-rw-r--r--Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml200
-rw-r--r--Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml202
-rw-r--r--Documentation/devicetree/bindings/sound/mtk-afe-pcm.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/mtk-btcvsd-snd.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/mvebu-audio.txt14
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-saif.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/name-prefix.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/nau8540.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/nau8810.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/nau8822.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/nau8824.txt88
-rw-r--r--Documentation/devicetree/bindings/sound/nau8825.txt105
-rw-r--r--Documentation/devicetree/bindings/sound/neofidelity,ntp8835.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/neofidelity,ntp8918.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8315.yaml44
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8325.yaml80
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8540.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8810.yaml45
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8821.yaml148
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8822.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8824.yaml190
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8825.yaml253
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.yaml74
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-common.yaml87
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml200
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.yaml97
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5631.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.txt52
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.yaml84
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.txt67
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.yaml100
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-sgtl5000.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-sgtl5000.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.yaml33
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.txt40
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.yaml93
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-das.yaml36
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.yaml77
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-spdif.yaml88
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml146
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml197
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml81
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml100
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml117
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml78
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml88
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml75
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml198
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,lpc3220-i2s.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml44
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml99
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,uda1342.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/omap-mcpdm.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/omap-twl4030.txt62
-rw-r--r--Documentation/devicetree/bindings/sound/option,gtm601.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/pcm3060.txt6
-rw-r--r--Documentation/devicetree/bindings/sound/pcm512x.txt52
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,apq8016-sbc-sndcard.yaml205
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,apq8016-sbc.txt89
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,apq8096.txt120
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.txt54
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml290
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml138
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml161
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml165
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml121
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,msm8916-wcd-analog.txt100
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,msm8916-wcd-digital-codec.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,msm8916-wcd-digital.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,pm4125-codec.yaml134
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,pm8916-wcd-analog-codec.yaml153
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6adm-routing.yaml39
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6adm.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6adm.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6afe.txt178
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6afe.yaml81
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml34
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6apm-lpass-dais.yaml35
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6apm.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6asm-dais.yaml96
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6asm.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6asm.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6core.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6core.yaml39
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml164
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6prm.yaml50
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6usb.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,sdm845.txt80
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,sm8250.yaml214
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd9335.txt123
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd9335.yaml156
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml239
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd937x-sdw.yaml127
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml84
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd939x-sdw.yaml69
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml99
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd93xx-common.yaml95
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml71
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wsa8840.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc203.yaml36
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc5623.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc5632.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1015.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml43
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1016.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1019.yaml35
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5514.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5616.yaml61
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5631.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5640.yaml146
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5645.yaml131
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5659.yaml129
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5677.yaml135
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5682.yaml156
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml150
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,fsi.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,fsi.yaml87
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,idt821034.yaml75
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.txt498
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.yaml541
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9120.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rtq9128.yaml61
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,i2s-tdm.yaml192
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,pdm.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,pdm.yaml123
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3036-codec.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3288-hdmi-analog.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3308-codec.yaml102
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.yaml74
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml144
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-i2s.txt49
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-i2s.yaml140
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-max98090.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-spdif.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-spdif.yaml109
-rw-r--r--Documentation/devicetree/bindings/sound/rohm,bd28623.txt29
-rw-r--r--Documentation/devicetree/bindings/sound/rohm,bd28623.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/rt1011.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/rt1308.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/rt5514.txt37
-rw-r--r--Documentation/devicetree/bindings/sound/rt5616.txt32
-rw-r--r--Documentation/devicetree/bindings/sound/rt5631.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/rt5640.txt94
-rw-r--r--Documentation/devicetree/bindings/sound/rt5645.txt72
-rw-r--r--Documentation/devicetree/bindings/sound/rt5651.txt5
-rw-r--r--Documentation/devicetree/bindings/sound/rt5659.txt78
-rw-r--r--Documentation/devicetree/bindings/sound/rt5663.txt8
-rw-r--r--Documentation/devicetree/bindings/sound/rt5665.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/rt5668.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/rt5677.txt78
-rw-r--r--Documentation/devicetree/bindings/sound/rt5682.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,aries-wm8994.yaml153
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,arndale.yaml45
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,midas-audio.yaml145
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,odroid.txt54
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,odroid.yaml92
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,smdk-wm8994.txt14
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml38
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,snow.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,tm2-audio.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,tm2.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/samsung-i2s.txt84
-rw-r--r--Documentation/devicetree/bindings/sound/samsung-i2s.yaml162
-rw-r--r--Documentation/devicetree/bindings/sound/serial-midi.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/sgtl5000.txt51
-rw-r--r--Documentation/devicetree/bindings/sound/simple-amplifier.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/simple-audio-amplifier.yaml45
-rw-r--r--Documentation/devicetree/bindings/sound/simple-audio-mux.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/simple-card.txt212
-rw-r--r--Documentation/devicetree/bindings/sound/simple-card.yaml571
-rw-r--r--Documentation/devicetree/bindings/sound/simple-scu-card.txt94
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-audio-codec.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-usp.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/snow.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/snps,designware-i2s.yaml197
-rw-r--r--Documentation/devicetree/bindings/sound/socionext,uniphier-aio.yaml102
-rw-r--r--Documentation/devicetree/bindings/sound/socionext,uniphier-evea.yaml75
-rw-r--r--Documentation/devicetree/bindings/sound/sound-card-common.yaml34
-rw-r--r--Documentation/devicetree/bindings/sound/sound-dai.yaml20
-rw-r--r--Documentation/devicetree/bindings/sound/spdif-receiver.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/spdif-transmitter.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/sprd,pcm-platform.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/sprd,sc9860-mcdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/ssm2518.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/st,sta350.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt63
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-i2s.txt62
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml139
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-sai.txt107
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-sai.yaml227
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-spdifrx.txt56
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml91
-rw-r--r--Documentation/devicetree/bindings/sound/starfive,jh7110-pwmdac.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/starfive,jh7110-tdm.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/sun4i-codec.txt94
-rw-r--r--Documentation/devicetree/bindings/sound/sun4i-i2s.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/sun50i-codec-analog.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/sun8i-a33-codec.txt63
-rw-r--r--Documentation/devicetree/bindings/sound/sun8i-codec-analog.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/sunxi,sun4i-spdif.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/tas2552.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/tas571x.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/tas5720.txt8
-rw-r--r--Documentation/devicetree/bindings/sound/tdm-slot.txt4
-rw-r--r--Documentation/devicetree/bindings/sound/test-component.yaml33
-rw-r--r--Documentation/devicetree/bindings/sound/tfa9879.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml139
-rw-r--r--Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml145
-rw-r--r--Documentation/devicetree/bindings/sound/ti,omap-twl4030.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/ti,omap4-mcpdm.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1681.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1681.yaml43
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1754.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm3168a.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml108
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm512x.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm6240.yaml179
-rw-r--r--Documentation/devicetree/bindings/sound/ti,src4xxx.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2562.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2770.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2781.yaml218
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas27xx.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas57xx.yaml137
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas5805m.yaml57
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas6424.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320adc3xxx.yaml164
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320adcx140.yaml209
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320aic32x4.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320aic3x.yaml166
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320dac3100.yaml127
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tpa6130a2.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/ti,ts3a227e.yaml94
-rw-r--r--Documentation/devicetree/bindings/sound/ti,twl4030-audio.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic31xx.txt72
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic32x4.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic3x.txt80
-rw-r--r--Documentation/devicetree/bindings/sound/tpa6130a2.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/ts3a227e.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/uda1334.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/uniphier,aio.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/uniphier,evea.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,arizona.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,arizona.yaml119
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8510.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8523.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8524.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8580.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8711.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8728.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8731.yaml99
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8737.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8750.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8753.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8776.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8782.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8804.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8903.yaml116
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8904.yaml203
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8940.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8960.yaml134
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8961.yaml43
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8962.yaml124
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8974.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8974.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8978.yaml61
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8994.yaml194
-rw-r--r--Documentation/devicetree/bindings/sound/wm8510.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8523.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8524.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8580.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8711.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8728.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8731.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/wm8737.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8750.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8753.txt40
-rw-r--r--Documentation/devicetree/bindings/sound/wm8776.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8782.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/wm8804.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/wm8903.txt82
-rw-r--r--Documentation/devicetree/bindings/sound/wm8904.txt33
-rw-r--r--Documentation/devicetree/bindings/sound/wm8960.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/wm8962.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/wm8994.txt83
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,audio-formatter.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,i2s.yaml65
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,spdif.yaml77
-rw-r--r--Documentation/devicetree/bindings/sound/xmos,xvf3500.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/zte,tdm.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/zte,zx-aud96p22.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/zte,zx-i2s.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/zte,zx-spdif.txt27
-rw-r--r--Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml285
-rw-r--r--Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml86
-rw-r--r--Documentation/devicetree/bindings/spi/adi,axi-spi-engine.txt31
-rw-r--r--Documentation/devicetree/bindings/spi/adi,axi-spi-engine.yaml90
-rw-r--r--Documentation/devicetree/bindings/spi/airoha,en7581-snand.yaml65
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml84
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml121
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a1-spifc.yaml44
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a4-spifc.yaml82
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a4-spisg.yaml59
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,meson-gx-spicc.yaml116
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,meson6-spifc.yaml54
-rw-r--r--Documentation/devicetree/bindings/spi/apple,spi.yaml66
-rw-r--r--Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml61
-rw-r--r--Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml82
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml93
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,quadspi.yaml102
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-aux-spi.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-aux-spi.yaml53
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.yaml50
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm63xx-hsspi.yaml134
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml71
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt233
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.yaml196
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,qspi-nor-peripheral-props.yaml42
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml198
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,xspi.yaml101
-rw-r--r--Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml70
-rw-r--r--Documentation/devicetree/bindings/spi/efm32-spi.txt39
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,dspi-peripheral-props.yaml30
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,dspi.yaml134
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,espi.yaml65
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml96
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,spi.yaml74
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt52
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml106
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-spi.txt58
-rw-r--r--Documentation/devicetree/bindings/spi/hpe,gxp-spifi.yaml56
-rw-r--r--Documentation/devicetree/bindings/spi/ibm,spi-fsi.yaml55
-rw-r--r--Documentation/devicetree/bindings/spi/ingenic,spi.yaml75
-rw-r--r--Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml46
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,armada-3700-spi.yaml55
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml85
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,orion-spi.yaml102
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml121
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml99
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml124
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml58
-rw-r--r--Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml86
-rw-r--r--Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml38
-rw-r--r--Documentation/devicetree/bindings/spi/mxicy,mx25f0a-spi.yaml65
-rw-r--r--Documentation/devicetree/bindings/spi/mxs-spi.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/mxs-spi.yaml59
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,npcm-fiu.txt58
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt36
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml61
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt41
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.yaml100
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt37
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.yaml81
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt37
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.yaml90
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra210-quad-peripheral-props.yaml32
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml111
-rw-r--r--Documentation/devicetree/bindings/spi/nxp,lpc3220-spi.yaml44
-rw-r--r--Documentation/devicetree/bindings/spi/nxp,sc18is.yaml51
-rw-r--r--Documentation/devicetree/bindings/spi/omap-spi.txt47
-rw-r--r--Documentation/devicetree/bindings/spi/omap-spi.yaml117
-rw-r--r--Documentation/devicetree/bindings/spi/qca,ar934x-spi.yaml43
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.txt39
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml119
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.txt36
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml100
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml87
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qup.txt103
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml94
-rw-r--r--Documentation/devicetree/bindings/spi/ralink,mt7621-spi.yaml61
-rw-r--r--Documentation/devicetree/bindings/spi/realtek,rtl-spi.yaml41
-rw-r--r--Documentation/devicetree/bindings/spi/realtek,rtl9301-snand.yaml62
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,hspi.yaml58
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rspi.yaml155
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rzv2h-rspi.yaml96
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml79
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml198
-rw-r--r--Documentation/devicetree/bindings/spi/rockchip-sfc.yaml93
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml33
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi.yaml194
-rw-r--r--Documentation/devicetree/bindings/spi/sh-hspi.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/sh-msiof.txt103
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt32
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml204
-rw-r--r--Documentation/devicetree/bindings/spi/socionext,f-ospi.yaml57
-rw-r--r--Documentation/devicetree/bindings/spi/socionext,synquacer-spi.yaml73
-rw-r--r--Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml56
-rw-r--r--Documentation/devicetree/bindings/spi/spi-armada-3700.txt25
-rw-r--r--Documentation/devicetree/bindings/spi/spi-bcm63xx-hsspi.txt33
-rw-r--r--Documentation/devicetree/bindings/spi/spi-bcm63xx.txt33
-rw-r--r--Documentation/devicetree/bindings/spi/spi-bus.txt112
-rw-r--r--Documentation/devicetree/bindings/spi/spi-cadence.txt30
-rw-r--r--Documentation/devicetree/bindings/spi/spi-cadence.yaml86
-rw-r--r--Documentation/devicetree/bindings/spi/spi-controller.yaml216
-rw-r--r--Documentation/devicetree/bindings/spi/spi-davinci.txt8
-rw-r--r--Documentation/devicetree/bindings/spi/spi-dw.txt24
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt59
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt19
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml103
-rw-r--r--Documentation/devicetree/bindings/spi/spi-gpio.txt43
-rw-r--r--Documentation/devicetree/bindings/spi/spi-gpio.yaml74
-rw-r--r--Documentation/devicetree/bindings/spi/spi-lantiq-ssc.txt21
-rw-r--r--Documentation/devicetree/bindings/spi/spi-meson.txt55
-rw-r--r--Documentation/devicetree/bindings/spi/spi-mt65xx.txt60
-rw-r--r--Documentation/devicetree/bindings/spi/spi-mux.yaml87
-rw-r--r--Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml97
-rw-r--r--Documentation/devicetree/bindings/spi/spi-orion.txt79
-rw-r--r--Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml131
-rw-r--r--Documentation/devicetree/bindings/spi/spi-pl022.yaml111
-rw-r--r--Documentation/devicetree/bindings/spi/spi-pxa2xx.txt24
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rockchip.txt58
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rockchip.yaml119
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rspi.txt72
-rw-r--r--Documentation/devicetree/bindings/spi/spi-samsung.txt122
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sc18is602.txt23
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml54
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sifive.yaml87
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sirf.txt42
-rw-r--r--Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt32
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sprd-adi.txt58
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sprd.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/spi-st-ssc.txt40
-rw-r--r--Documentation/devicetree/bindings/spi/spi-stm32-qspi.txt44
-rw-r--r--Documentation/devicetree/bindings/spi/spi-stm32.txt59
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sun4i.txt23
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sun6i.txt44
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml78
-rw-r--r--Documentation/devicetree/bindings/spi/spi-uniphier.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/spi-xilinx.txt21
-rw-r--r--Documentation/devicetree/bindings/spi/spi-xilinx.yaml55
-rw-r--r--Documentation/devicetree/bindings/spi/spi-xlp.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.txt25
-rw-r--r--Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml81
-rw-r--r--Documentation/devicetree/bindings/spi/spi_atmel.txt36
-rw-r--r--Documentation/devicetree/bindings/spi/spi_pl022.txt70
-rw-r--r--Documentation/devicetree/bindings/spi/sprd,sc9860-spi.yaml72
-rw-r--r--Documentation/devicetree/bindings/spi/sprd,spi-adi.yaml103
-rw-r--r--Documentation/devicetree/bindings/spi/st,ssc-spi.yaml61
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml89
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32-spi.yaml130
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml106
-rw-r--r--Documentation/devicetree/bindings/spi/ti,qspi.yaml96
-rw-r--r--Documentation/devicetree/bindings/spi/ti_qspi.txt53
-rw-r--r--Documentation/devicetree/bindings/spi/xlnx,zynq-qspi.yaml59
-rw-r--r--Documentation/devicetree/bindings/spmi/apple,spmi.yaml57
-rw-r--r--Documentation/devicetree/bindings/spmi/hisilicon,hisi-spmi-controller.yaml70
-rw-r--r--Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml82
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt65
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml128
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,x1e80100-spmi-pmic-arb.yaml140
-rw-r--r--Documentation/devicetree/bindings/spmi/spmi.txt41
-rw-r--r--Documentation/devicetree/bindings/spmi/spmi.yaml75
-rw-r--r--Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml163
-rw-r--r--Documentation/devicetree/bindings/sram/qcom,imem.yaml99
-rw-r--r--Documentation/devicetree/bindings/sram/qcom,ocmem.yaml123
-rw-r--r--Documentation/devicetree/bindings/sram/renesas,smp-sram.txt27
-rw-r--r--Documentation/devicetree/bindings/sram/rockchip-pmu-sram.txt16
-rw-r--r--Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt30
-rw-r--r--Documentation/devicetree/bindings/sram/samsung-sram.txt38
-rw-r--r--Documentation/devicetree/bindings/sram/sram.txt80
-rw-r--r--Documentation/devicetree/bindings/sram/sram.yaml306
-rw-r--r--Documentation/devicetree/bindings/sram/sunxi-sram.txt103
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/lpc32xx-adc.txt16
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt24
-rw-r--r--Documentation/devicetree/bindings/submitting-patches.rst114
-rw-r--r--Documentation/devicetree/bindings/submitting-patches.txt74
-rw-r--r--Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml48
-rw-r--r--Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml179
-rw-r--r--Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt33
-rw-r--r--Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml65
-rw-r--r--Documentation/devicetree/bindings/thermal/armada-thermal.txt41
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml48
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt19
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml56
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt41
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml47
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,ns-thermal.txt37
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,ns-thermal.yaml60
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,sr-thermal.txt105
-rw-r--r--Documentation/devicetree/bindings/thermal/da9062-thermal.txt36
-rw-r--r--Documentation/devicetree/bindings/thermal/dlg,da9062-thermal.yaml35
-rw-r--r--Documentation/devicetree/bindings/thermal/exynos-thermal.txt106
-rw-r--r--Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml39
-rw-r--r--Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml85
-rw-r--r--Documentation/devicetree/bindings/thermal/hisilicon,tsensor.yaml57
-rw-r--r--Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt32
-rw-r--r--Documentation/devicetree/bindings/thermal/imx-thermal.txt61
-rw-r--r--Documentation/devicetree/bindings/thermal/imx-thermal.yaml114
-rw-r--r--Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml72
-rw-r--r--Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml71
-rw-r--r--Documentation/devicetree/bindings/thermal/marvell,armada-ap806-thermal.yaml46
-rw-r--r--Documentation/devicetree/bindings/thermal/marvell,armada370-thermal.yaml37
-rw-r--r--Documentation/devicetree/bindings/thermal/max77620_thermal.txt6
-rw-r--r--Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml148
-rw-r--r--Documentation/devicetree/bindings/thermal/mediatek,thermal.yaml106
-rw-r--r--Documentation/devicetree/bindings/thermal/mediatek-thermal.txt47
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt184
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml388
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.txt32
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.yaml34
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra30-tsensor.yaml70
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom,spmi-temp-alarm.yaml84
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-lmh.yaml88
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm-hc.yaml149
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml263
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt51
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.txt41
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml434
-rw-r--r--Documentation/devicetree/bindings/thermal/qoriq-thermal.txt70
-rw-r--r--Documentation/devicetree/bindings/thermal/qoriq-thermal.yaml118
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt59
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml153
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-thermal.txt76
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-thermal.yaml155
-rw-r--r--Documentation/devicetree/bindings/thermal/renesas,r9a08g045-tsu.yaml93
-rw-r--r--Documentation/devicetree/bindings/thermal/renesas,r9a09g047-tsu.yaml87
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.txt84
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.yaml178
-rw-r--r--Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml79
-rw-r--r--Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml185
-rw-r--r--Documentation/devicetree/bindings/thermal/socionext,uniphier-thermal.yaml55
-rw-r--r--Documentation/devicetree/bindings/thermal/sprd-thermal.yaml112
-rw-r--r--Documentation/devicetree/bindings/thermal/st,stih407-thermal.yaml58
-rw-r--r--Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml80
-rw-r--r--Documentation/devicetree/bindings/thermal/st-thermal.txt32
-rw-r--r--Documentation/devicetree/bindings/thermal/stm32-thermal.txt61
-rw-r--r--Documentation/devicetree/bindings/thermal/tango-thermal.txt17
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml122
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt89
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-idle.yaml152
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-sensor.yaml77
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-zones.yaml359
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal.txt586
-rw-r--r--Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml57
-rw-r--r--Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml97
-rw-r--r--Documentation/devicetree/bindings/thermal/uniphier-thermal.txt65
-rw-r--r--Documentation/devicetree/bindings/thermal/zx2967-thermal.txt116
-rw-r--r--Documentation/devicetree/bindings/timer/actions,owl-timer.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/actions,owl-timer.yaml107
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml101
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml77
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml39
-rw-r--r--Documentation/devicetree/bindings/timer/amlogic,meson6-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/amlogic,meson6-timer.yaml54
-rw-r--r--Documentation/devicetree/bindings/timer/andestech,atcpit100-timer.txt33
-rw-r--r--Documentation/devicetree/bindings/timer/andestech,plmt0.yaml53
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer.txt112
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer.yaml128
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml123
-rw-r--r--Documentation/devicetree/bindings/timer/arm,armv7m-systick.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/arm,armv7m-systick.yaml54
-rw-r--r--Documentation/devicetree/bindings/timer/arm,global_timer.txt27
-rw-r--r--Documentation/devicetree/bindings/timer/arm,global_timer.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/arm,sp804.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/arm,sp804.yaml97
-rw-r--r--Documentation/devicetree/bindings/timer/arm,twd-timer.yaml56
-rw-r--r--Documentation/devicetree/bindings/timer/arm,twd.txt53
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt22
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.yaml50
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcmbca-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,kona-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml52
-rw-r--r--Documentation/devicetree/bindings/timer/cadence,ttc-timer.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/cdns,ttc.yaml72
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/digicolor-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml80
-rw-r--r--Documentation/devicetree/bindings/timer/energymicro,efm32-timer.txt23
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.txt37
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml89
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml62
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.txt30
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.yaml83
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,imxgpt.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml108
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,timrot.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml59
-rw-r--r--Documentation/devicetree/bindings/timer/hpe,gxp-timer.yaml47
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml69
-rw-r--r--Documentation/devicetree/bindings/timer/ingenic,sysost.yaml63
-rw-r--r--Documentation/devicetree/bindings/timer/ingenic,tcu.yaml302
-rw-r--r--Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt33
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml56
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml88
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,timer.yaml87
-rw-r--r--Documentation/devicetree/bindings/timer/mrvl,mmp-timer.txt13
-rw-r--r--Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/mstar,msc313e-timer.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.yaml54
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml150
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra186-timer.yaml109
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.yaml55
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml64
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml65
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,tpm-timer.yaml65
-rw-r--r--Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/qcom,msm-timer.txt47
-rw-r--r--Documentation/devicetree/bindings/timer/ralink,cevt-systick.yaml38
-rw-r--r--Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml44
-rw-r--r--Documentation/devicetree/bindings/timer/rda,8810pl-timer.yaml47
-rw-r--r--Documentation/devicetree/bindings/timer/realtek,otto-timer.yaml63
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,16bit-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,8bit-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,cmt.txt85
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,cmt.yaml206
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,em-sti.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,mtu2.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,mtu2.yaml76
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,ostm.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,ostm.yaml79
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml306
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tmu.txt46
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tmu.yaml136
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tpu.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/riscv,timer.yaml52
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml66
-rw-r--r--Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt88
-rw-r--r--Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml247
-rw-r--r--Documentation/devicetree/bindings/timer/sifive,clint.yaml105
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.txt27
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml84
-rw-r--r--Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/sprd,sc9860-timer.yaml68
-rw-r--r--Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt20
-rw-r--r--Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml58
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.yaml36
-rw-r--r--Documentation/devicetree/bindings/timer/st,stm32-timer.txt22
-rw-r--r--Documentation/devicetree/bindings/timer/st,stm32-timer.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/stericsson-u300-apptimer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml51
-rw-r--r--Documentation/devicetree/bindings/timer/ti,c64x+timer64.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/ti,da830-timer.yaml68
-rw-r--r--Documentation/devicetree/bindings/timer/ti,davinci-timer.txt37
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml63
-rw-r--r--Documentation/devicetree/bindings/timer/ti,timer-dm.yaml159
-rw-r--r--Documentation/devicetree/bindings/timer/ti,timer.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/via,vt8500-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/via,vt8500-timer.yaml51
-rw-r--r--Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml92
-rw-r--r--Documentation/devicetree/bindings/timestamp/hardware-timestamps-common.yaml29
-rw-r--r--Documentation/devicetree/bindings/timestamp/hte-consumer.yaml39
-rw-r--r--Documentation/devicetree/bindings/timestamp/nvidia,tegra194-hte.yaml140
-rw-r--r--Documentation/devicetree/bindings/tpm/google,cr50.yaml65
-rw-r--r--Documentation/devicetree/bindings/tpm/ibm,vtpm.yaml104
-rw-r--r--Documentation/devicetree/bindings/tpm/microsoft,ftpm.yaml47
-rw-r--r--Documentation/devicetree/bindings/tpm/tcg,tpm-tis-i2c.yaml91
-rw-r--r--Documentation/devicetree/bindings/tpm/tcg,tpm-tis-mmio.yaml49
-rw-r--r--Documentation/devicetree/bindings/tpm/tcg,tpm_tis-spi.yaml76
-rw-r--r--Documentation/devicetree/bindings/tpm/tpm-common.yaml87
-rw-r--r--Documentation/devicetree/bindings/trigger-source/adi,util-sigma-delta-spi.yaml49
-rw-r--r--Documentation/devicetree/bindings/trigger-source/gpio-trigger.yaml40
-rw-r--r--Documentation/devicetree/bindings/trigger-source/pwm-trigger.yaml37
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.txt190
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml490
-rw-r--r--Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml73
-rw-r--r--Documentation/devicetree/bindings/ufs/hisilicon,ufs.yaml90
-rw-r--r--Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml101
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,sc7180-ufshc.yaml167
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml178
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs-common.yaml67
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs.yaml243
-rw-r--r--Documentation/devicetree/bindings/ufs/renesas,ufs.yaml73
-rw-r--r--Documentation/devicetree/bindings/ufs/rockchip,rk3576-ufshc.yaml105
-rw-r--r--Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml130
-rw-r--r--Documentation/devicetree/bindings/ufs/snps,tc-dwc-g210.yaml51
-rw-r--r--Documentation/devicetree/bindings/ufs/sprd,ums9620-ufs.yaml79
-rw-r--r--Documentation/devicetree/bindings/ufs/tc-dwc-g210-pltfrm.txt26
-rw-r--r--Documentation/devicetree/bindings/ufs/ti,j721e-ufs.yaml90
-rw-r--r--Documentation/devicetree/bindings/ufs/ufs-common.yaml129
-rw-r--r--Documentation/devicetree/bindings/ufs/ufs-hisi.txt41
-rw-r--r--Documentation/devicetree/bindings/ufs/ufs-qcom.txt60
-rw-r--r--Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt73
-rw-r--r--Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.txt28
-rw-r--r--Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml108
-rw-r--r--Documentation/devicetree/bindings/usb/am33xx-usb.txt7
-rw-r--r--Documentation/devicetree/bindings/usb/amlogic,dwc3.txt42
-rw-r--r--Documentation/devicetree/bindings/usb/amlogic,meson-g12a-usb-ctrl.yaml237
-rw-r--r--Documentation/devicetree/bindings/usb/analogix,anx7411.yaml70
-rw-r--r--Documentation/devicetree/bindings/usb/aspeed,ast2600-udc.yaml52
-rw-r--r--Documentation/devicetree/bindings/usb/aspeed,usb-vhub.yaml139
-rw-r--r--Documentation/devicetree/bindings/usb/atmel-usb.txt64
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bcm3384-usb.txt11
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bcm7445-ehci.yaml59
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bdc.txt29
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bdc.yaml50
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,usb-pinmap.yaml73
-rw-r--r--Documentation/devicetree/bindings/usb/cdns,usb3.yaml111
-rw-r--r--Documentation/devicetree/bindings/usb/chipidea,usb2-common.yaml203
-rw-r--r--Documentation/devicetree/bindings/usb/chipidea,usb2-imx.yaml309
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt113
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml121
-rw-r--r--Documentation/devicetree/bindings/usb/cypress,cypd4226.yaml93
-rw-r--r--Documentation/devicetree/bindings/usb/cypress,hx3.yaml89
-rw-r--r--Documentation/devicetree/bindings/usb/da8xx-usb.txt5
-rw-r--r--Documentation/devicetree/bindings/usb/dwc2.txt54
-rw-r--r--Documentation/devicetree/bindings/usb/dwc2.yaml205
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-cavium.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-st.txt6
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-xilinx.txt32
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml136
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3.txt118
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-mv.txt23
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-omap.txt31
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-orion.txt22
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-st.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/exynos-usb.txt121
-rw-r--r--Documentation/devicetree/bindings/usb/faraday,fotg210.txt35
-rw-r--r--Documentation/devicetree/bindings/usb/faraday,fotg210.yaml78
-rw-r--r--Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml107
-rw-r--r--Documentation/devicetree/bindings/usb/fcs,fusb302.txt41
-rw-r--r--Documentation/devicetree/bindings/usb/fcs,fusb302.yaml67
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml135
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,imx8mq-dwc3.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,imx8qm-cdns3.yaml103
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,ls1028a.yaml52
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,usb2.yaml95
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,usbmisc.yaml92
-rw-r--r--Documentation/devicetree/bindings/usb/fsl-usb.txt81
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ehci.yaml189
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ohci.yaml182
-rw-r--r--Documentation/devicetree/bindings/usb/generic-xhci.yaml86
-rw-r--r--Documentation/devicetree/bindings/usb/generic.txt46
-rw-r--r--Documentation/devicetree/bindings/usb/genesys,gl850g.yaml113
-rw-r--r--Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml115
-rw-r--r--Documentation/devicetree/bindings/usb/hisilicon,hi3798mv200-dwc3.yaml99
-rw-r--r--Documentation/devicetree/bindings/usb/ingenic,musb.yaml79
-rw-r--r--Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml39
-rw-r--r--Documentation/devicetree/bindings/usb/intel,keembay-dwc3.yaml77
-rw-r--r--Documentation/devicetree/bindings/usb/isp1301.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/ite,it5205.yaml72
-rw-r--r--Documentation/devicetree/bindings/usb/keystone-usb.txt54
-rw-r--r--Documentation/devicetree/bindings/usb/lpc32xx-udc.txt28
-rw-r--r--Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml62
-rw-r--r--Documentation/devicetree/bindings/usb/maxim,max33359.yaml81
-rw-r--r--Documentation/devicetree/bindings/usb/maxim,max3420-udc.yaml67
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml100
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mt6370-tcpc.yaml36
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt120
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml231
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtu3.txt98
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml354
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,musb.yaml115
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml69
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,usb2514.yaml90
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,usb5744.yaml110
-rw-r--r--Documentation/devicetree/bindings/usb/msm-hsusb.txt110
-rw-r--r--Documentation/devicetree/bindings/usb/npcm7xx-usb.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml211
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.txt120
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml200
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml171
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml175
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt23
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml195
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra234-xusb.yaml159
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,isp1760.yaml67
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,lpc3220-udc.yaml50
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml92
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,ptn5110.yaml74
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-nxp.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-omap3.txt15
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-st.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/omap-usb.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml138
-rw-r--r--Documentation/devicetree/bindings/usb/parade,ps5511.yaml108
-rw-r--r--Documentation/devicetree/bindings/usb/parade,ps8830.yaml143
-rw-r--r--Documentation/devicetree/bindings/usb/pxa-usb.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,dwc3.txt103
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,dwc3.yaml641
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,pmic-typec.yaml249
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml625
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml100
-rw-r--r--Documentation/devicetree/bindings/usb/realtek,rtd-dwc3.yaml80
-rw-r--r--Documentation/devicetree/bindings/usb/realtek,rtd-type-c.yaml82
-rw-r--r--Documentation/devicetree/bindings/usb/realtek,rts5411.yaml92
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,rzg3e-xhci.yaml87
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,rzn1-usbf.yaml68
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,rzv2m-usb3drd.yaml129
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml119
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,usb3-peri.yaml150
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,usbhs.yaml183
-rw-r--r--Documentation/devicetree/bindings/usb/renesas_usb3.txt40
-rw-r--r--Documentation/devicetree/bindings/usb/renesas_usbhs.txt53
-rw-r--r--Documentation/devicetree/bindings/usb/richtek,rt1711h.txt29
-rw-r--r--Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml103
-rw-r--r--Documentation/devicetree/bindings/usb/richtek,rt1719.yaml85
-rw-r--r--Documentation/devicetree/bindings/usb/rockchip,dwc3.txt56
-rw-r--r--Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml181
-rw-r--r--Documentation/devicetree/bindings/usb/rockchip,rk3399-dwc3.yaml115
-rw-r--r--Documentation/devicetree/bindings/usb/s3c2410-usb.txt22
-rw-r--r--Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml223
-rw-r--r--Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml107
-rw-r--r--Documentation/devicetree/bindings/usb/samsung-hsotg.txt38
-rw-r--r--Documentation/devicetree/bindings/usb/smsc,usb3503.yaml159
-rw-r--r--Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml432
-rw-r--r--Documentation/devicetree/bindings/usb/snps,dwc3.yaml106
-rw-r--r--Documentation/devicetree/bindings/usb/spacemit,k1-dwc3.yaml121
-rw-r--r--Documentation/devicetree/bindings/usb/spear-usb.txt35
-rw-r--r--Documentation/devicetree/bindings/usb/st,stusb160x.yaml91
-rw-r--r--Documentation/devicetree/bindings/usb/st,typec-stm32g0.yaml91
-rw-r--r--Documentation/devicetree/bindings/usb/starfive,jh7110-usb.yaml115
-rw-r--r--Documentation/devicetree/bindings/usb/ti,am62-usb.yaml107
-rw-r--r--Documentation/devicetree/bindings/usb/ti,hd3ss3220.yaml81
-rw-r--r--Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml112
-rw-r--r--Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml92
-rw-r--r--Documentation/devicetree/bindings/usb/ti,tps6598x.yaml159
-rw-r--r--Documentation/devicetree/bindings/usb/ti,tusb1046.yaml49
-rw-r--r--Documentation/devicetree/bindings/usb/ti,tusb73x0-pci.yaml55
-rw-r--r--Documentation/devicetree/bindings/usb/ti,twl4030-usb.yaml74
-rw-r--r--Documentation/devicetree/bindings/usb/ti,twl6030-usb.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/ti,usb8020b.yaml69
-rw-r--r--Documentation/devicetree/bindings/usb/ti,usb8041.yaml69
-rw-r--r--Documentation/devicetree/bindings/usb/twlxxxx-usb.txt43
-rw-r--r--Documentation/devicetree/bindings/usb/typec-tcpci.txt49
-rw-r--r--Documentation/devicetree/bindings/usb/udc-xilinx.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/usb-device.txt102
-rw-r--r--Documentation/devicetree/bindings/usb/usb-device.yaml126
-rw-r--r--Documentation/devicetree/bindings/usb/usb-drd.yaml79
-rw-r--r--Documentation/devicetree/bindings/usb/usb-ehci.txt46
-rw-r--r--Documentation/devicetree/bindings/usb/usb-hcd.txt9
-rw-r--r--Documentation/devicetree/bindings/usb/usb-hcd.yaml52
-rw-r--r--Documentation/devicetree/bindings/usb/usb-hub.yaml84
-rw-r--r--Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt43
-rw-r--r--Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml73
-rw-r--r--Documentation/devicetree/bindings/usb/usb-ohci.txt35
-rw-r--r--Documentation/devicetree/bindings/usb/usb-switch.yaml80
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.yaml75
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.txt54
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/usb.yaml64
-rw-r--r--Documentation/devicetree/bindings/usb/usb251xb.txt84
-rw-r--r--Documentation/devicetree/bindings/usb/usb251xb.yaml278
-rw-r--r--Documentation/devicetree/bindings/usb/usb3503.txt39
-rw-r--r--Documentation/devicetree/bindings/usb/usbmisc-imx.txt17
-rw-r--r--Documentation/devicetree/bindings/usb/vialab,vl817.yaml70
-rw-r--r--Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml76
-rw-r--r--Documentation/devicetree/bindings/usb/xlnx,usb2.yaml47
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt442
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml1887
-rw-r--r--Documentation/devicetree/bindings/virtio/mmio.txt17
-rw-r--r--Documentation/devicetree/bindings/virtio/mmio.yaml67
-rw-r--r--Documentation/devicetree/bindings/virtio/pci-iommu.yaml99
-rw-r--r--Documentation/devicetree/bindings/virtio/virtio-device.yaml41
-rw-r--r--Documentation/devicetree/bindings/w1/amd,axi-1wire-host.yaml44
-rw-r--r--Documentation/devicetree/bindings/w1/fsl-imx-owire.txt18
-rw-r--r--Documentation/devicetree/bindings/w1/fsl-imx-owire.yaml48
-rw-r--r--Documentation/devicetree/bindings/w1/maxim,ds2482.yaml46
-rw-r--r--Documentation/devicetree/bindings/w1/w1-gpio.txt27
-rw-r--r--Documentation/devicetree/bindings/w1/w1-gpio.yaml43
-rw-r--r--Documentation/devicetree/bindings/w1/w1-uart.yaml59
-rw-r--r--Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml98
-rw-r--r--Documentation/devicetree/bindings/watchdog/alphascale,asm9260-wdt.yaml70
-rw-r--r--Documentation/devicetree/bindings/watchdog/alphascale-asm9260.txt35
-rw-r--r--Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/amlogic,meson6-wdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/apple,wdt.yaml63
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm,sbsa-gwdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm,sp805.txt32
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm,sp805.yaml75
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm,twd-wdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm-smc-wdt.yaml39
-rw-r--r--Documentation/devicetree/bindings/watchdog/armada-37xx-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/aspeed,ast2400-wdt.yaml142
-rw-r--r--Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt56
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel,at91rm9200-wdt.yaml33
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel,at91sam9-wdt.yaml127
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml81
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt9
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt34
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel-wdt.txt51
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm7038-wdt.yaml43
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.yaml41
-rw-r--r--Documentation/devicetree/bindings/watchdog/cadence-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/cdns,wdt-r1p2.yaml62
-rw-r--r--Documentation/devicetree/bindings/watchdog/cirrus,ep9301-wdt.yaml42
-rw-r--r--Documentation/devicetree/bindings/watchdog/cnxt,cx92755-wdt.yaml45
-rw-r--r--Documentation/devicetree/bindings/watchdog/da9062-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/davinci-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/digicolor-wdt.txt25
-rw-r--r--Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml54
-rw-r--r--Documentation/devicetree/bindings/watchdog/dw_wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.txt22
-rw-r--r--Documentation/devicetree/bindings/watchdog/faraday,ftwdt010.yaml67
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml37
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml128
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml67
-rw-r--r--Documentation/devicetree/bindings/watchdog/gpio-wdt.txt28
-rw-r--r--Documentation/devicetree/bindings/watchdog/img,pdc-wdt.yaml55
-rw-r--r--Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt17
-rw-r--r--Documentation/devicetree/bindings/watchdog/intel,keembay-wdt.yaml60
-rw-r--r--Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml40
-rw-r--r--Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml68
-rw-r--r--Documentation/devicetree/bindings/watchdog/loongson,ls1x-wdt.yaml42
-rw-r--r--Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvell,armada-3700-wdt.yaml41
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml83
-rw-r--r--Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml45
-rw-r--r--Documentation/devicetree/bindings/watchdog/mediatek,mt7621-wdt.yaml40
-rw-r--r--Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml90
-rw-r--r--Documentation/devicetree/bindings/watchdog/meson-gxbb-wdt.txt16
-rw-r--r--Documentation/devicetree/bindings/watchdog/meson-wdt.txt21
-rw-r--r--Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/mstar,msc313e-wdt.yaml40
-rw-r--r--Documentation/devicetree/bindings/watchdog/mt7621-wdt.txt12
-rw-r--r--Documentation/devicetree/bindings/watchdog/mtk-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/nuvoton,npcm-wdt.txt28
-rw-r--r--Documentation/devicetree/bindings/watchdog/nuvoton,npcm750-wdt.yaml60
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,lpc1850-wwdt.yaml52
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml37
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,s32g2-swt.yaml54
-rw-r--r--Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt26
-rw-r--r--Documentation/devicetree/bindings/watchdog/pnx4008-wdt.txt17
-rw-r--r--Documentation/devicetree/bindings/watchdog/qca,ar7130-wdt.yaml33
-rw-r--r--Documentation/devicetree/bindings/watchdog/qca-ar7130-wdt.txt13
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom,pm8916-wdt.yaml56
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.txt28
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml149
-rw-r--r--Documentation/devicetree/bindings/watchdog/ralink,rt2880-wdt.yaml46
-rw-r--r--Documentation/devicetree/bindings/watchdog/realtek,otto-wdt.yaml90
-rw-r--r--Documentation/devicetree/bindings/watchdog/realtek,rtd119x.txt17
-rw-r--r--Documentation/devicetree/bindings/watchdog/realtek,rtd1295-watchdog.yaml38
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml240
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas-wdt.txt46
-rw-r--r--Documentation/devicetree/bindings/watchdog/rt2880-wdt.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.txt35
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml132
-rw-r--r--Documentation/devicetree/bindings/watchdog/sbsa-gwdt.txt31
-rw-r--r--Documentation/devicetree/bindings/watchdog/sigma,smp8642-wdt.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml110
-rw-r--r--Documentation/devicetree/bindings/watchdog/socionext,uniphier-wdt.yaml30
-rw-r--r--Documentation/devicetree/bindings/watchdog/sprd,sp9860-wdt.yaml64
-rw-r--r--Documentation/devicetree/bindings/watchdog/sprd-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt26
-rw-r--r--Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml64
-rw-r--r--Documentation/devicetree/bindings/watchdog/starfive,jh7100-wdt.yaml93
-rw-r--r--Documentation/devicetree/bindings/watchdog/stericsson-coh901327.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/sunplus,sp7021-wdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt20
-rw-r--r--Documentation/devicetree/bindings/watchdog/technologic,ts7200-wdt.yaml45
-rw-r--r--Documentation/devicetree/bindings/watchdog/ti,davinci-wdt.yaml55
-rw-r--r--Documentation/devicetree/bindings/watchdog/ti,rti-wdt.yaml87
-rw-r--r--Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml48
-rw-r--r--Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt10
-rw-r--r--Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt20
-rw-r--r--Documentation/devicetree/bindings/watchdog/watchdog.yaml32
-rw-r--r--Documentation/devicetree/bindings/watchdog/xlnx,versal-wwdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/xlnx,xps-timebase-wdt.yaml68
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt39
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-wdt.yaml49
-rw-r--r--Documentation/devicetree/bindings/watchdog/ziirave-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/zte,zx2967-wdt.txt32
-rw-r--r--Documentation/devicetree/bindings/writing-bindings.rst132
-rw-r--r--Documentation/devicetree/bindings/writing-schema.rst257
-rw-r--r--Documentation/devicetree/bindings/xilinx.txt169
-rw-r--r--Documentation/devicetree/booting-without-of.txt1553
-rw-r--r--Documentation/devicetree/changesets.rst37
-rw-r--r--Documentation/devicetree/changesets.txt31
-rw-r--r--Documentation/devicetree/dynamic-resolution-notes.rst27
-rw-r--r--Documentation/devicetree/dynamic-resolution-notes.txt24
-rw-r--r--Documentation/devicetree/index.rst30
-rw-r--r--Documentation/devicetree/kernel-api.rst57
-rw-r--r--Documentation/devicetree/of_unittest.rst224
-rw-r--r--Documentation/devicetree/of_unittest.txt197
-rw-r--r--Documentation/devicetree/overlay-notes.rst150
-rw-r--r--Documentation/devicetree/overlay-notes.txt139
-rw-r--r--Documentation/devicetree/todo.txt10
-rw-r--r--Documentation/devicetree/usage-model.rst420
-rw-r--r--Documentation/devicetree/usage-model.txt415
-rw-r--r--Documentation/digsig.txt101
-rw-r--r--Documentation/doc-guide/checktransupdate.rst54
-rw-r--r--Documentation/doc-guide/conf.py10
-rw-r--r--Documentation/doc-guide/contributing.rst300
-rw-r--r--Documentation/doc-guide/index.rst9
-rw-r--r--Documentation/doc-guide/kernel-doc.rst156
-rw-r--r--Documentation/doc-guide/maintainer-profile.rst52
-rw-r--r--Documentation/doc-guide/parse-headers.rst6
-rw-r--r--Documentation/doc-guide/sphinx.rst254
-rw-r--r--Documentation/docutils.conf2
-rw-r--r--Documentation/dontdiff262
-rw-r--r--Documentation/driver-api/80211/cfg80211.rst392
-rw-r--r--Documentation/driver-api/80211/conf.py10
-rw-r--r--Documentation/driver-api/80211/mac80211-advanced.rst162
-rw-r--r--Documentation/driver-api/80211/mac80211.rst145
-rw-r--r--Documentation/driver-api/acpi/index.rst9
-rw-r--r--Documentation/driver-api/acpi/linuxized-acpica.rst279
-rw-r--r--Documentation/driver-api/acpi/scan_handlers.rst83
-rw-r--r--Documentation/driver-api/aperture.rst13
-rw-r--r--Documentation/driver-api/auxiliary_bus.rst49
-rw-r--r--Documentation/driver-api/backlight/lp855x-driver.rst81
-rw-r--r--Documentation/driver-api/basics.rst53
-rw-r--r--Documentation/driver-api/clk.rst11
-rw-r--r--Documentation/driver-api/coco/index.rst12
-rw-r--r--Documentation/driver-api/coco/measurement-registers.rst12
-rw-r--r--Documentation/driver-api/component.rst19
-rw-r--r--Documentation/driver-api/conf.py10
-rw-r--r--Documentation/driver-api/connector.rst157
-rw-r--r--Documentation/driver-api/console.rst152
-rw-r--r--Documentation/driver-api/crypto/iaa/iaa-crypto.rst849
-rw-r--r--Documentation/driver-api/crypto/iaa/index.rst20
-rw-r--r--Documentation/driver-api/crypto/index.rst20
-rw-r--r--Documentation/driver-api/cxl/allocation/dax.rst60
-rw-r--r--Documentation/driver-api/cxl/allocation/hugepages.rst32
-rw-r--r--Documentation/driver-api/cxl/allocation/page-allocator.rst85
-rw-r--r--Documentation/driver-api/cxl/allocation/reclaim.rst51
-rw-r--r--Documentation/driver-api/cxl/conventions.rst182
-rw-r--r--Documentation/driver-api/cxl/devices/device-types.rst165
-rw-r--r--Documentation/driver-api/cxl/index.rst54
-rw-r--r--Documentation/driver-api/cxl/linux/access-coordinates.rst178
-rw-r--r--Documentation/driver-api/cxl/linux/cxl-driver.rst630
-rw-r--r--Documentation/driver-api/cxl/linux/dax-driver.rst43
-rw-r--r--Documentation/driver-api/cxl/linux/early-boot.rst137
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst314
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst291
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst401
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/single-device.rst246
-rw-r--r--Documentation/driver-api/cxl/linux/memory-hotplug.rst78
-rw-r--r--Documentation/driver-api/cxl/linux/overview.rst103
-rw-r--r--Documentation/driver-api/cxl/maturity-map.rst202
-rw-r--r--Documentation/driver-api/cxl/platform/acpi.rst76
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/cedt.rst62
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/dsdt.rst28
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/hmat.rst32
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/slit.rst21
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/srat.rst71
-rw-r--r--Documentation/driver-api/cxl/platform/bios-and-efi.rst262
-rw-r--r--Documentation/driver-api/cxl/platform/cdat.rst118
-rw-r--r--Documentation/driver-api/cxl/platform/example-configs.rst13
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/flexible.rst296
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst107
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst90
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst136
-rw-r--r--Documentation/driver-api/cxl/theory-of-operation.rst415
-rw-r--r--Documentation/driver-api/devfreq.rst30
-rw-r--r--Documentation/driver-api/device-io.rst440
-rw-r--r--Documentation/driver-api/device_connection.rst43
-rw-r--r--Documentation/driver-api/device_link.rst97
-rw-r--r--Documentation/driver-api/dma-buf.rst264
-rw-r--r--Documentation/driver-api/dmaengine/client.rst123
-rw-r--r--Documentation/driver-api/dmaengine/dmatest.rst134
-rw-r--r--Documentation/driver-api/dmaengine/index.rst6
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst167
-rw-r--r--Documentation/driver-api/dpll.rst631
-rw-r--r--Documentation/driver-api/driver-model/binding.rst98
-rw-r--r--Documentation/driver-api/driver-model/bus.rst146
-rw-r--r--Documentation/driver-api/driver-model/design-patterns.rst116
-rw-r--r--Documentation/driver-api/driver-model/device.rst120
-rw-r--r--Documentation/driver-api/driver-model/devres.rst468
-rw-r--r--Documentation/driver-api/driver-model/driver.rst286
-rw-r--r--Documentation/driver-api/driver-model/index.rst23
-rw-r--r--Documentation/driver-api/driver-model/overview.rst124
-rw-r--r--Documentation/driver-api/driver-model/platform.rst247
-rw-r--r--Documentation/driver-api/driver-model/porting.rst448
-rw-r--r--Documentation/driver-api/early-userspace/buffer-format.rst132
-rw-r--r--Documentation/driver-api/early-userspace/early_userspace_support.rst154
-rw-r--r--Documentation/driver-api/early-userspace/index.rst18
-rw-r--r--Documentation/driver-api/edac.rst120
-rw-r--r--Documentation/driver-api/eisa.rst230
-rw-r--r--Documentation/driver-api/extcon.rst255
-rw-r--r--Documentation/driver-api/firewire.rst6
-rw-r--r--Documentation/driver-api/firmware/built-in-fw.rst2
-rw-r--r--Documentation/driver-api/firmware/core.rst1
-rw-r--r--Documentation/driver-api/firmware/direct-fs-lookup.rst2
-rw-r--r--Documentation/driver-api/firmware/efi/index.rst11
-rw-r--r--Documentation/driver-api/firmware/fallback-mechanisms.rst104
-rw-r--r--Documentation/driver-api/firmware/firmware-usage-guidelines.rst49
-rw-r--r--Documentation/driver-api/firmware/firmware_cache.rst2
-rw-r--r--Documentation/driver-api/firmware/fw_search_path.rst9
-rw-r--r--Documentation/driver-api/firmware/fw_upload.rst127
-rw-r--r--Documentation/driver-api/firmware/index.rst2
-rw-r--r--Documentation/driver-api/firmware/lookup-order.rst2
-rw-r--r--Documentation/driver-api/firmware/other_interfaces.rst36
-rw-r--r--Documentation/driver-api/firmware/request_firmware.rst7
-rw-r--r--Documentation/driver-api/fpga/fpga-bridge.rst17
-rw-r--r--Documentation/driver-api/fpga/fpga-mgr.rst81
-rw-r--r--Documentation/driver-api/fpga/fpga-programming.rst18
-rw-r--r--Documentation/driver-api/fpga/fpga-region.rst37
-rw-r--r--Documentation/driver-api/generic-counter.rst573
-rw-r--r--Documentation/driver-api/gpio/board.rst110
-rw-r--r--Documentation/driver-api/gpio/bt8xxgpio.rst62
-rw-r--r--Documentation/driver-api/gpio/consumer.rst65
-rw-r--r--Documentation/driver-api/gpio/driver.rst681
-rw-r--r--Documentation/driver-api/gpio/drivers-on-gpio.rst28
-rw-r--r--Documentation/driver-api/gpio/index.rst6
-rw-r--r--Documentation/driver-api/gpio/intro.rst22
-rw-r--r--Documentation/driver-api/gpio/legacy-boards.rst298
-rw-r--r--Documentation/driver-api/gpio/legacy.rst770
-rw-r--r--Documentation/driver-api/gpio/using-gpio.rst50
-rw-r--r--Documentation/driver-api/hsi.rst4
-rw-r--r--Documentation/driver-api/hte/hte.rst79
-rw-r--r--Documentation/driver-api/hte/index.rst22
-rw-r--r--Documentation/driver-api/hte/tegra-hte.rst47
-rw-r--r--Documentation/driver-api/i3c/device-driver-api.rst9
-rw-r--r--Documentation/driver-api/i3c/index.rst11
-rw-r--r--Documentation/driver-api/i3c/master-driver-api.rst9
-rw-r--r--Documentation/driver-api/i3c/protocol.rst203
-rw-r--r--Documentation/driver-api/iio/buffers.rst27
-rw-r--r--Documentation/driver-api/iio/core.rst42
-rw-r--r--Documentation/driver-api/iio/hw-consumer.rst5
-rw-r--r--Documentation/driver-api/iio/triggered-buffers.rst2
-rw-r--r--Documentation/driver-api/iio/triggers.rst8
-rw-r--r--Documentation/driver-api/index.rst159
-rw-r--r--Documentation/driver-api/infiniband.rst124
-rw-r--r--Documentation/driver-api/infrastructure.rst29
-rw-r--r--Documentation/driver-api/input.rst16
-rw-r--r--Documentation/driver-api/interconnect.rst140
-rw-r--r--Documentation/driver-api/io-mapping.rst91
-rw-r--r--Documentation/driver-api/io_ordering.rst (renamed from Documentation/io_ordering.txt)0
-rw-r--r--Documentation/driver-api/ioctl.rst253
-rw-r--r--Documentation/driver-api/ipmb.rst109
-rw-r--r--Documentation/driver-api/ipmi.rst807
-rw-r--r--Documentation/driver-api/isa.rst122
-rw-r--r--Documentation/driver-api/libata.rst92
-rw-r--r--Documentation/driver-api/mailbox.rst (renamed from Documentation/mailbox.txt)0
-rw-r--r--Documentation/driver-api/md/index.rst12
-rw-r--r--Documentation/driver-api/md/md-cluster.rst385
-rw-r--r--Documentation/driver-api/md/raid5-cache.rst111
-rw-r--r--Documentation/driver-api/md/raid5-ppl.rst47
-rw-r--r--Documentation/driver-api/media/camera-sensor.rst158
-rw-r--r--Documentation/driver-api/media/cec-core.rst (renamed from Documentation/media/kapi/cec-core.rst)122
-rw-r--r--Documentation/driver-api/media/drivers/bttv-devel.rst116
-rw-r--r--Documentation/driver-api/media/drivers/ccs/ccs-regs.asc1041
-rw-r--r--Documentation/driver-api/media/drivers/ccs/ccs.rst82
-rwxr-xr-xDocumentation/driver-api/media/drivers/ccs/mk-ccs-regs480
-rw-r--r--Documentation/driver-api/media/drivers/contributors.rst (renamed from Documentation/media/dvb-drivers/contributors.rst)2
-rw-r--r--Documentation/driver-api/media/drivers/cx2341x-devel.rst3685
-rw-r--r--Documentation/driver-api/media/drivers/cx88-devel.rst113
-rw-r--r--Documentation/driver-api/media/drivers/dvb-usb.rst (renamed from Documentation/media/dvb-drivers/dvb-usb.rst)4
-rw-r--r--Documentation/driver-api/media/drivers/fimc-devel.rst33
-rw-r--r--Documentation/driver-api/media/drivers/frontends.rst (renamed from Documentation/media/dvb-drivers/frontends.rst)2
-rw-r--r--Documentation/driver-api/media/drivers/index.rst41
-rw-r--r--Documentation/driver-api/media/drivers/ipu6.rst190
-rw-r--r--Documentation/driver-api/media/drivers/pvrusb2.rst (renamed from Documentation/media/v4l-drivers/pvrusb2.rst)4
-rw-r--r--Documentation/driver-api/media/drivers/pxa_camera.rst (renamed from Documentation/media/v4l-drivers/pxa_camera.rst)6
-rw-r--r--Documentation/driver-api/media/drivers/radiotrack.rst (renamed from Documentation/media/v4l-drivers/radiotrack.rst)2
-rw-r--r--Documentation/driver-api/media/drivers/rkisp1.rst43
-rw-r--r--Documentation/driver-api/media/drivers/saa7134-devel.rst67
-rw-r--r--Documentation/driver-api/media/drivers/sh_mobile_ceu_camera.rst (renamed from Documentation/media/v4l-drivers/sh_mobile_ceu_camera.rst)4
-rw-r--r--Documentation/driver-api/media/drivers/tuners.rst (renamed from Documentation/media/v4l-drivers/tuners.rst)4
-rw-r--r--Documentation/driver-api/media/drivers/vidtv.rst513
-rw-r--r--Documentation/driver-api/media/drivers/vimc-devel.rst15
-rw-r--r--Documentation/driver-api/media/drivers/zoran.rst (renamed from Documentation/media/v4l-drivers/zoran.rst)32
-rw-r--r--Documentation/driver-api/media/dtv-ca.rst (renamed from Documentation/media/kapi/dtv-ca.rst)2
-rw-r--r--Documentation/driver-api/media/dtv-common.rst (renamed from Documentation/media/kapi/dtv-common.rst)11
-rw-r--r--Documentation/driver-api/media/dtv-core.rst (renamed from Documentation/media/kapi/dtv-core.rst)8
-rw-r--r--Documentation/driver-api/media/dtv-demux.rst (renamed from Documentation/media/kapi/dtv-demux.rst)4
-rw-r--r--Documentation/driver-api/media/dtv-frontend.rst (renamed from Documentation/media/kapi/dtv-frontend.rst)30
-rw-r--r--Documentation/driver-api/media/dtv-net.rst6
-rw-r--r--Documentation/driver-api/media/index.rst54
-rw-r--r--Documentation/driver-api/media/maintainer-entry-profile.rst206
-rw-r--r--Documentation/driver-api/media/mc-core.rst341
-rw-r--r--Documentation/driver-api/media/rc-core.rst (renamed from Documentation/media/kapi/rc-core.rst)2
-rw-r--r--Documentation/driver-api/media/tx-rx.rst144
-rw-r--r--Documentation/driver-api/media/v4l2-async.rst5
-rw-r--r--Documentation/driver-api/media/v4l2-cci.rst5
-rw-r--r--Documentation/driver-api/media/v4l2-common.rst (renamed from Documentation/media/kapi/v4l2-common.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-controls.rst (renamed from Documentation/media/kapi/v4l2-controls.rst)240
-rw-r--r--Documentation/driver-api/media/v4l2-core.rst (renamed from Documentation/media/kapi/v4l2-core.rst)6
-rw-r--r--Documentation/driver-api/media/v4l2-dev.rst (renamed from Documentation/media/kapi/v4l2-dev.rst)25
-rw-r--r--Documentation/driver-api/media/v4l2-device.rst (renamed from Documentation/media/kapi/v4l2-device.rst)10
-rw-r--r--Documentation/driver-api/media/v4l2-dv-timings.rst6
-rw-r--r--Documentation/driver-api/media/v4l2-event.rst (renamed from Documentation/media/kapi/v4l2-event.rst)13
-rw-r--r--Documentation/driver-api/media/v4l2-fh.rst136
-rw-r--r--Documentation/driver-api/media/v4l2-flash-led-class.rst (renamed from Documentation/media/kapi/v4l2-flash-led-class.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-fwnode.rst5
-rw-r--r--Documentation/driver-api/media/v4l2-intro.rst (renamed from Documentation/media/kapi/v4l2-intro.rst)4
-rw-r--r--Documentation/driver-api/media/v4l2-jpeg.rst10
-rw-r--r--Documentation/driver-api/media/v4l2-mc.rst (renamed from Documentation/media/kapi/v4l2-mc.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-mediabus.rst (renamed from Documentation/media/kapi/v4l2-mediabus.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-mem2mem.rst (renamed from Documentation/media/kapi/v4l2-mem2mem.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-rect.rst6
-rw-r--r--Documentation/driver-api/media/v4l2-subdev.rst644
-rw-r--r--Documentation/driver-api/media/v4l2-tuner.rst (renamed from Documentation/media/kapi/v4l2-tuner.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-tveeprom.rst (renamed from Documentation/media/kapi/v4l2-tveeprom.rst)2
-rw-r--r--Documentation/driver-api/media/v4l2-videobuf2.rst (renamed from Documentation/media/kapi/v4l2-videobuf2.rst)2
-rw-r--r--Documentation/driver-api/mei/hdcp.rst32
-rw-r--r--Documentation/driver-api/mei/iamt.rst101
-rw-r--r--Documentation/driver-api/mei/index.rst18
-rw-r--r--Documentation/driver-api/mei/mei-client-bus.rst168
-rw-r--r--Documentation/driver-api/mei/mei.rst213
-rw-r--r--Documentation/driver-api/mei/nfc.rst28
-rw-r--r--Documentation/driver-api/memory-devices/index.rst18
-rw-r--r--Documentation/driver-api/memory-devices/ti-emif.rst64
-rw-r--r--Documentation/driver-api/memory-devices/ti-gpmc.rst179
-rw-r--r--Documentation/driver-api/men-chameleon-bus.rst187
-rw-r--r--Documentation/driver-api/miscellaneous.rst5
-rw-r--r--Documentation/driver-api/mmc/index.rst14
-rw-r--r--Documentation/driver-api/mmc/mmc-async-req.rst98
-rw-r--r--Documentation/driver-api/mmc/mmc-dev-attrs.rst91
-rw-r--r--Documentation/driver-api/mmc/mmc-dev-parts.rst41
-rw-r--r--Documentation/driver-api/mmc/mmc-test.rst299
-rw-r--r--Documentation/driver-api/mmc/mmc-tools.rst37
-rw-r--r--Documentation/driver-api/mtd/index.rst12
-rw-r--r--Documentation/driver-api/mtd/nand_ecc.rst763
-rw-r--r--Documentation/driver-api/mtd/spi-intel.rst90
-rw-r--r--Documentation/driver-api/mtd/spi-nor.rst205
-rw-r--r--Documentation/driver-api/mtdnand.rst9
-rw-r--r--Documentation/driver-api/nfc/index.rst11
-rw-r--r--Documentation/driver-api/nfc/nfc-hci.rst311
-rw-r--r--Documentation/driver-api/nfc/nfc-pn544.rst34
-rw-r--r--Documentation/driver-api/ntb.rst263
-rw-r--r--Documentation/driver-api/nvdimm/btt.rst285
-rw-r--r--Documentation/driver-api/nvdimm/firmware-activate.rst86
-rw-r--r--Documentation/driver-api/nvdimm/index.rst13
-rw-r--r--Documentation/driver-api/nvdimm/nvdimm.rst657
-rw-r--r--Documentation/driver-api/nvdimm/security.rst143
-rw-r--r--Documentation/driver-api/nvmem.rst202
-rw-r--r--Documentation/driver-api/parport-lowlevel.rst (renamed from Documentation/parport-lowlevel.txt)0
-rw-r--r--Documentation/driver-api/pci/index.rst5
-rw-r--r--Documentation/driver-api/pci/p2pdma.rst24
-rw-r--r--Documentation/driver-api/pci/pci.rst11
-rw-r--r--Documentation/driver-api/phy/index.rst18
-rw-r--r--Documentation/driver-api/phy/phy.rst223
-rw-r--r--Documentation/driver-api/phy/samsung-usb2.rst137
-rw-r--r--Documentation/driver-api/pin-control.rst1510
-rw-r--r--Documentation/driver-api/pinctl.rst1439
-rw-r--r--Documentation/driver-api/pldmfw/driver-ops.rst56
-rw-r--r--Documentation/driver-api/pldmfw/file-format.rst203
-rw-r--r--Documentation/driver-api/pldmfw/index.rst72
-rw-r--r--Documentation/driver-api/pm/conf.py10
-rw-r--r--Documentation/driver-api/pm/cpuidle.rst279
-rw-r--r--Documentation/driver-api/pm/devices.rst251
-rw-r--r--Documentation/driver-api/pm/index.rst9
-rw-r--r--Documentation/driver-api/pm/notifiers.rst8
-rw-r--r--Documentation/driver-api/pm/types.rst2
-rw-r--r--Documentation/driver-api/pps.rst311
-rw-r--r--Documentation/driver-api/ptp.rst137
-rw-r--r--Documentation/driver-api/pwm.rst191
-rw-r--r--Documentation/driver-api/pwrseq.rst95
-rw-r--r--Documentation/driver-api/rapidio/index.rst15
-rw-r--r--Documentation/driver-api/rapidio/mport_cdev.rst110
-rw-r--r--Documentation/driver-api/rapidio/rapidio.rst362
-rw-r--r--Documentation/driver-api/rapidio/rio_cm.rst135
-rw-r--r--Documentation/driver-api/rapidio/sysfs.rst7
-rw-r--r--Documentation/driver-api/rapidio/tsi721.rst112
-rw-r--r--Documentation/driver-api/regulator.rst4
-rw-r--r--Documentation/driver-api/reset.rst221
-rw-r--r--Documentation/driver-api/rfkill.rst (renamed from Documentation/rfkill.txt)0
-rw-r--r--Documentation/driver-api/s390-drivers.rst4
-rw-r--r--Documentation/driver-api/scsi.rst26
-rw-r--r--Documentation/driver-api/serial/driver.rst105
-rw-r--r--Documentation/driver-api/serial/index.rst27
-rw-r--r--Documentation/driver-api/serial/serial-iso7816.rst90
-rw-r--r--Documentation/driver-api/serial/serial-rs485.rst135
-rw-r--r--Documentation/driver-api/sm501.rst (renamed from Documentation/SM501.txt)0
-rw-r--r--Documentation/driver-api/sound.rst54
-rw-r--r--Documentation/driver-api/soundwire/bra.rst336
-rw-r--r--Documentation/driver-api/soundwire/bra_cadence.rst66
-rw-r--r--Documentation/driver-api/soundwire/index.rst4
-rw-r--r--Documentation/driver-api/soundwire/locking.rst4
-rw-r--r--Documentation/driver-api/soundwire/stream.rst187
-rw-r--r--Documentation/driver-api/soundwire/summary.rst15
-rw-r--r--Documentation/driver-api/spi.rst6
-rw-r--r--Documentation/driver-api/surface_aggregator/client-api.rst38
-rw-r--r--Documentation/driver-api/surface_aggregator/client.rst397
-rw-r--r--Documentation/driver-api/surface_aggregator/clients/cdev.rst204
-rw-r--r--Documentation/driver-api/surface_aggregator/clients/dtx.rst718
-rw-r--r--Documentation/driver-api/surface_aggregator/clients/index.rst23
-rw-r--r--Documentation/driver-api/surface_aggregator/clients/san.rst44
-rw-r--r--Documentation/driver-api/surface_aggregator/index.rst21
-rw-r--r--Documentation/driver-api/surface_aggregator/internal-api.rst67
-rw-r--r--Documentation/driver-api/surface_aggregator/internal.rst578
-rw-r--r--Documentation/driver-api/surface_aggregator/overview.rst79
-rw-r--r--Documentation/driver-api/surface_aggregator/ssh.rst346
-rw-r--r--Documentation/driver-api/switchtec.rst102
-rw-r--r--Documentation/driver-api/sync_file.rst (renamed from Documentation/sync_file.txt)0
-rw-r--r--Documentation/driver-api/target.rst16
-rw-r--r--Documentation/driver-api/tee.rst66
-rw-r--r--Documentation/driver-api/thermal/cpu-cooling-api.rst107
-rw-r--r--Documentation/driver-api/thermal/cpu-idle-cooling.rst199
-rw-r--r--Documentation/driver-api/thermal/exynos_thermal.rst90
-rw-r--r--Documentation/driver-api/thermal/exynos_thermal_emulation.rst61
-rw-r--r--Documentation/driver-api/thermal/index.rst19
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst411
-rw-r--r--Documentation/driver-api/thermal/nouveau_thermal.rst96
-rw-r--r--Documentation/driver-api/thermal/power_allocator.rst281
-rw-r--r--Documentation/driver-api/thermal/sysfs-api.rst433
-rw-r--r--Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst55
-rw-r--r--Documentation/driver-api/tty/console.rst45
-rw-r--r--Documentation/driver-api/tty/index.rst75
-rw-r--r--Documentation/driver-api/tty/moxa-smartio.rst197
-rw-r--r--Documentation/driver-api/tty/n_gsm.rst192
-rw-r--r--Documentation/driver-api/tty/n_tty.rst22
-rw-r--r--Documentation/driver-api/tty/tty_buffer.rst49
-rw-r--r--Documentation/driver-api/tty/tty_driver.rst130
-rw-r--r--Documentation/driver-api/tty/tty_internals.rst31
-rw-r--r--Documentation/driver-api/tty/tty_ioctl.rst10
-rw-r--r--Documentation/driver-api/tty/tty_ldisc.rst85
-rw-r--r--Documentation/driver-api/tty/tty_port.rst71
-rw-r--r--Documentation/driver-api/tty/tty_struct.rst81
-rw-r--r--Documentation/driver-api/uio-howto.rst13
-rw-r--r--Documentation/driver-api/usb/URB.rst4
-rw-r--r--Documentation/driver-api/usb/anchors.rst11
-rw-r--r--Documentation/driver-api/usb/bulk-streams.rst4
-rw-r--r--Documentation/driver-api/usb/callbacks.rst6
-rw-r--r--Documentation/driver-api/usb/dma.rst54
-rw-r--r--Documentation/driver-api/usb/dwc3.rst2
-rw-r--r--Documentation/driver-api/usb/error-codes.rst3
-rw-r--r--Documentation/driver-api/usb/gadget.rst10
-rw-r--r--Documentation/driver-api/usb/hotplug.rst4
-rw-r--r--Documentation/driver-api/usb/index.rst2
-rw-r--r--Documentation/driver-api/usb/power-management.rst16
-rw-r--r--Documentation/driver-api/usb/typec.rst1
-rw-r--r--Documentation/driver-api/usb/typec_bus.rst54
-rw-r--r--Documentation/driver-api/usb/usb.rst30
-rw-r--r--Documentation/driver-api/usb/usb3-debug-port.rst2
-rw-r--r--Documentation/driver-api/usb/writing_musb_glue_layer.rst10
-rw-r--r--Documentation/driver-api/usb/writing_usb_driver.rst56
-rw-r--r--Documentation/driver-api/vfio-mediated-device.rst273
-rw-r--r--Documentation/driver-api/vfio-pci-device-specific-driver-acceptance.rst35
-rw-r--r--Documentation/driver-api/vfio.rst707
-rw-r--r--Documentation/driver-api/virtio/index.rst11
-rw-r--r--Documentation/driver-api/virtio/virtio.rst145
-rw-r--r--Documentation/driver-api/virtio/writing_virtio_drivers.rst196
-rw-r--r--Documentation/driver-api/vme.rst4
-rw-r--r--Documentation/driver-api/w1.rst3
-rw-r--r--Documentation/driver-api/wbrf.rst78
-rw-r--r--Documentation/driver-api/wmi.rst20
-rw-r--r--Documentation/driver-api/xilinx/eemi.rst40
-rw-r--r--Documentation/driver-api/xilinx/index.rst16
-rw-r--r--Documentation/driver-api/xillybus.rst379
-rw-r--r--Documentation/driver-api/zorro.rst (renamed from Documentation/zorro.txt)0
-rw-r--r--Documentation/driver-model/binding.txt98
-rw-r--r--Documentation/driver-model/bus.txt143
-rw-r--r--Documentation/driver-model/class.txt147
-rw-r--r--Documentation/driver-model/design-patterns.txt116
-rw-r--r--Documentation/driver-model/device.txt106
-rw-r--r--Documentation/driver-model/devres.txt398
-rw-r--r--Documentation/driver-model/driver.txt215
-rw-r--r--Documentation/driver-model/overview.txt123
-rw-r--r--Documentation/driver-model/platform.txt244
-rw-r--r--Documentation/driver-model/porting.txt447
-rw-r--r--Documentation/early-userspace/README151
-rw-r--r--Documentation/early-userspace/buffer-format.txt112
-rw-r--r--Documentation/edac/features.rst103
-rw-r--r--Documentation/edac/index.rst12
-rw-r--r--Documentation/edac/memory_repair.rst152
-rw-r--r--Documentation/edac/scrub.rst342
-rw-r--r--Documentation/efi-stub.txt100
-rw-r--r--Documentation/eisa.txt230
-rw-r--r--Documentation/extcon/intel-int3496.txt27
-rw-r--r--Documentation/fault-injection/fault-injection.rst611
-rw-r--r--Documentation/fault-injection/fault-injection.txt435
-rw-r--r--Documentation/fault-injection/index.rst20
-rw-r--r--Documentation/fault-injection/notifier-error-inject.rst98
-rw-r--r--Documentation/fault-injection/notifier-error-inject.txt94
-rw-r--r--Documentation/fault-injection/nvme-fault-injection.rst178
-rw-r--r--Documentation/fault-injection/nvme-fault-injection.txt116
-rw-r--r--Documentation/fault-injection/provoke-crashes.rst57
-rw-r--r--Documentation/fault-injection/provoke-crashes.txt38
-rw-r--r--Documentation/fb/api.rst307
-rw-r--r--Documentation/fb/api.txt306
-rw-r--r--Documentation/fb/arkfb.rst68
-rw-r--r--Documentation/fb/arkfb.txt68
-rw-r--r--Documentation/fb/aty128fb.rst73
-rw-r--r--Documentation/fb/aty128fb.txt72
-rw-r--r--Documentation/fb/cirrusfb.rst94
-rw-r--r--Documentation/fb/cirrusfb.txt97
-rw-r--r--Documentation/fb/cmap_xfbdev.rst56
-rw-r--r--Documentation/fb/cmap_xfbdev.txt53
-rw-r--r--Documentation/fb/deferred_io.rst79
-rw-r--r--Documentation/fb/deferred_io.txt75
-rw-r--r--Documentation/fb/efifb.rst71
-rw-r--r--Documentation/fb/efifb.txt37
-rw-r--r--Documentation/fb/ep93xx-fb.rst136
-rw-r--r--Documentation/fb/ep93xx-fb.txt135
-rw-r--r--Documentation/fb/fbcon.rst354
-rw-r--r--Documentation/fb/fbcon.txt339
-rw-r--r--Documentation/fb/framebuffer.rst353
-rw-r--r--Documentation/fb/framebuffer.txt343
-rw-r--r--Documentation/fb/gxfb.rst52
-rw-r--r--Documentation/fb/gxfb.txt52
-rw-r--r--Documentation/fb/index.rst59
-rw-r--r--Documentation/fb/intel810.rst287
-rw-r--r--Documentation/fb/intel810.txt278
-rw-r--r--Documentation/fb/intelfb.txt149
-rw-r--r--Documentation/fb/internals.rst86
-rw-r--r--Documentation/fb/internals.txt82
-rw-r--r--Documentation/fb/lxfb.rst52
-rw-r--r--Documentation/fb/lxfb.txt52
-rw-r--r--Documentation/fb/matroxfb.rst438
-rw-r--r--Documentation/fb/matroxfb.txt413
-rw-r--r--Documentation/fb/metronomefb.rst38
-rw-r--r--Documentation/fb/metronomefb.txt36
-rw-r--r--Documentation/fb/modedb.rst182
-rw-r--r--Documentation/fb/modedb.txt151
-rw-r--r--Documentation/fb/pvr2fb.rst66
-rw-r--r--Documentation/fb/pvr2fb.txt65
-rw-r--r--Documentation/fb/pxafb.rst173
-rw-r--r--Documentation/fb/pxafb.txt142
-rw-r--r--Documentation/fb/s3fb.rst82
-rw-r--r--Documentation/fb/s3fb.txt82
-rw-r--r--Documentation/fb/sa1100fb.rst37
-rw-r--r--Documentation/fb/sa1100fb.txt39
-rw-r--r--Documentation/fb/sh7760fb.rst130
-rw-r--r--Documentation/fb/sh7760fb.txt131
-rw-r--r--Documentation/fb/sisfb.rst160
-rw-r--r--Documentation/fb/sisfb.txt158
-rw-r--r--Documentation/fb/sm501.rst15
-rw-r--r--Documentation/fb/sm501.txt10
-rw-r--r--Documentation/fb/sm712fb.rst35
-rw-r--r--Documentation/fb/sm712fb.txt31
-rw-r--r--Documentation/fb/sstfb.rst204
-rw-r--r--Documentation/fb/sstfb.txt174
-rw-r--r--Documentation/fb/tgafb.rst71
-rw-r--r--Documentation/fb/tgafb.txt69
-rw-r--r--Documentation/fb/tridentfb.rst78
-rw-r--r--Documentation/fb/tridentfb.txt70
-rw-r--r--Documentation/fb/udlfb.rst169
-rw-r--r--Documentation/fb/udlfb.txt159
-rw-r--r--Documentation/fb/uvesafb.rst188
-rw-r--r--Documentation/fb/uvesafb.txt184
-rw-r--r--Documentation/fb/vesafb.rst190
-rw-r--r--Documentation/fb/vesafb.txt181
-rw-r--r--Documentation/fb/viafb.rst297
-rw-r--r--Documentation/fb/viafb.txt252
-rw-r--r--Documentation/fb/vt8623fb.rst64
-rw-r--r--Documentation/fb/vt8623fb.txt64
-rw-r--r--Documentation/features/arch-support.txt1
-rw-r--r--Documentation/features/core/cBPF-JIT/arch-support.txt7
-rw-r--r--Documentation/features/core/eBPF-JIT/arch-support.txt13
-rw-r--r--Documentation/features/core/generic-idle-thread/arch-support.txt7
-rw-r--r--Documentation/features/core/jump-labels/arch-support.txt17
-rw-r--r--Documentation/features/core/mseal_sys_mappings/arch-support.txt30
-rw-r--r--Documentation/features/core/thread-info-in-task/arch-support.txt30
-rw-r--r--Documentation/features/core/tracehook/arch-support.txt11
-rw-r--r--Documentation/features/debug/KASAN/arch-support.txt17
-rw-r--r--Documentation/features/debug/debug-vm-pgtable/arch-support.txt30
-rw-r--r--Documentation/features/debug/gcov-profile-all/arch-support.txt15
-rw-r--r--Documentation/features/debug/kcov/arch-support.txt30
-rw-r--r--Documentation/features/debug/kgdb/arch-support.txt11
-rw-r--r--Documentation/features/debug/kmemleak/arch-support.txt30
-rw-r--r--Documentation/features/debug/kprobes-on-ftrace/arch-support.txt11
-rw-r--r--Documentation/features/debug/kprobes/arch-support.txt9
-rw-r--r--Documentation/features/debug/kretprobes/arch-support.txt11
-rw-r--r--Documentation/features/debug/optprobes/arch-support.txt7
-rw-r--r--Documentation/features/debug/stackprotector/arch-support.txt11
-rw-r--r--Documentation/features/debug/uprobes/arch-support.txt9
-rw-r--r--Documentation/features/debug/user-ret-profiler/arch-support.txt7
-rw-r--r--Documentation/features/io/dma-contiguous/arch-support.txt9
-rwxr-xr-xDocumentation/features/list-arch.sh19
-rw-r--r--Documentation/features/locking/cmpxchg-local/arch-support.txt7
-rw-r--r--Documentation/features/locking/lockdep/arch-support.txt11
-rw-r--r--Documentation/features/locking/queued-rwlocks/arch-support.txt13
-rw-r--r--Documentation/features/locking/queued-spinlocks/arch-support.txt15
-rw-r--r--Documentation/features/locking/rwsem-optimized/arch-support.txt33
-rw-r--r--Documentation/features/perf/kprobes-event/arch-support.txt15
-rw-r--r--Documentation/features/perf/perf-regs/arch-support.txt11
-rw-r--r--Documentation/features/perf/perf-stackdump/arch-support.txt11
-rw-r--r--Documentation/features/sched/membarrier-sync-core/arch-support.txt31
-rw-r--r--Documentation/features/sched/numa-balancing/arch-support.txt9
-rwxr-xr-xDocumentation/features/scripts/features-refresh.sh2
-rw-r--r--Documentation/features/seccomp/seccomp-filter/arch-support.txt15
-rw-r--r--Documentation/features/time/arch-tick-broadcast/arch-support.txt9
-rw-r--r--Documentation/features/time/clockevents/arch-support.txt13
-rw-r--r--Documentation/features/time/context-tracking/arch-support.txt17
-rw-r--r--Documentation/features/time/irq-time-acct/arch-support.txt9
-rw-r--r--Documentation/features/time/modern-timekeeping/arch-support.txt33
-rw-r--r--Documentation/features/time/virt-cpuacct/arch-support.txt9
-rw-r--r--Documentation/features/vm/ELF-ASLR/arch-support.txt10
-rw-r--r--Documentation/features/vm/PG_uncached/arch-support.txt33
-rw-r--r--Documentation/features/vm/THP/arch-support.txt9
-rw-r--r--Documentation/features/vm/TLB/arch-support.txt11
-rw-r--r--Documentation/features/vm/huge-vmap/arch-support.txt13
-rw-r--r--Documentation/features/vm/ioremap_prot/arch-support.txt13
-rw-r--r--Documentation/features/vm/numa-memblock/arch-support.txt33
-rw-r--r--Documentation/features/vm/pte_special/arch-support.txt13
-rw-r--r--Documentation/filesystems/9p.rst277
-rw-r--r--Documentation/filesystems/9p.txt161
-rw-r--r--Documentation/filesystems/Locking588
-rw-r--r--Documentation/filesystems/adfs.rst108
-rw-r--r--Documentation/filesystems/adfs.txt75
-rw-r--r--Documentation/filesystems/affs.rst250
-rw-r--r--Documentation/filesystems/affs.txt222
-rw-r--r--Documentation/filesystems/afs.rst251
-rw-r--r--Documentation/filesystems/afs.txt258
-rw-r--r--Documentation/filesystems/api-summary.rst147
-rw-r--r--Documentation/filesystems/autofs-mount-control.rst410
-rw-r--r--Documentation/filesystems/autofs-mount-control.txt406
-rw-r--r--Documentation/filesystems/autofs.rst580
-rw-r--r--Documentation/filesystems/autofs.txt529
-rw-r--r--Documentation/filesystems/automount-support.rst98
-rw-r--r--Documentation/filesystems/automount-support.txt93
-rw-r--r--Documentation/filesystems/befs.rst128
-rw-r--r--Documentation/filesystems/befs.txt117
-rw-r--r--Documentation/filesystems/bfs.rst60
-rw-r--r--Documentation/filesystems/bfs.txt57
-rw-r--r--Documentation/filesystems/btrfs.rst43
-rw-r--r--Documentation/filesystems/btrfs.txt31
-rw-r--r--Documentation/filesystems/buffer.rst12
-rw-r--r--Documentation/filesystems/caching/backend-api.rst479
-rw-r--r--Documentation/filesystems/caching/backend-api.txt726
-rw-r--r--Documentation/filesystems/caching/cachefiles.rst662
-rw-r--r--Documentation/filesystems/caching/cachefiles.txt501
-rw-r--r--Documentation/filesystems/caching/fscache.rst348
-rw-r--r--Documentation/filesystems/caching/fscache.txt448
-rw-r--r--Documentation/filesystems/caching/index.rst12
-rw-r--r--Documentation/filesystems/caching/netfs-api.rst452
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt910
-rw-r--r--Documentation/filesystems/caching/object.txt320
-rw-r--r--Documentation/filesystems/caching/operations.txt213
-rw-r--r--Documentation/filesystems/ceph.rst221
-rw-r--r--Documentation/filesystems/ceph.txt170
-rw-r--r--Documentation/filesystems/cifs/AUTHORS63
-rw-r--r--Documentation/filesystems/cifs/CHANGES4
-rw-r--r--Documentation/filesystems/cifs/README743
-rw-r--r--Documentation/filesystems/cifs/TODO104
-rw-r--r--Documentation/filesystems/cifs/cifs.txt35
-rw-r--r--Documentation/filesystems/coda.rst1670
-rw-r--r--Documentation/filesystems/coda.txt1673
-rw-r--r--Documentation/filesystems/conf.py10
-rw-r--r--Documentation/filesystems/configfs.rst487
-rw-r--r--Documentation/filesystems/configfs/configfs.txt508
-rw-r--r--Documentation/filesystems/cramfs.rst123
-rw-r--r--Documentation/filesystems/cramfs.txt118
-rw-r--r--Documentation/filesystems/dax.rst305
-rw-r--r--Documentation/filesystems/dax.txt132
-rw-r--r--Documentation/filesystems/debugfs.rst243
-rw-r--r--Documentation/filesystems/debugfs.txt231
-rw-r--r--Documentation/filesystems/devpts.rst36
-rw-r--r--Documentation/filesystems/devpts.txt26
-rw-r--r--Documentation/filesystems/directory-locking135
-rw-r--r--Documentation/filesystems/directory-locking.rst286
-rw-r--r--Documentation/filesystems/dlmfs.rst140
-rw-r--r--Documentation/filesystems/dlmfs.txt130
-rw-r--r--Documentation/filesystems/dnotify.rst75
-rw-r--r--Documentation/filesystems/dnotify.txt70
-rw-r--r--Documentation/filesystems/ecryptfs.rst86
-rw-r--r--Documentation/filesystems/ecryptfs.txt77
-rw-r--r--Documentation/filesystems/efivarfs.rst43
-rw-r--r--Documentation/filesystems/efivarfs.txt23
-rw-r--r--Documentation/filesystems/erofs.rst369
-rw-r--r--Documentation/filesystems/exofs.txt185
-rw-r--r--Documentation/filesystems/ext2.rst398
-rw-r--r--Documentation/filesystems/ext2.txt382
-rw-r--r--Documentation/filesystems/ext3.rst14
-rw-r--r--Documentation/filesystems/ext3.txt12
-rw-r--r--Documentation/filesystems/ext4/about.rst2
-rw-r--r--Documentation/filesystems/ext4/atomic_writes.rst225
-rw-r--r--Documentation/filesystems/ext4/attributes.rst68
-rw-r--r--Documentation/filesystems/ext4/bigalloc.rst32
-rw-r--r--Documentation/filesystems/ext4/bitmaps.rst13
-rw-r--r--Documentation/filesystems/ext4/blockgroup.rst55
-rw-r--r--Documentation/filesystems/ext4/blockmap.rst2
-rw-r--r--Documentation/filesystems/ext4/blocks.rst6
-rw-r--r--Documentation/filesystems/ext4/checksums.rst26
-rw-r--r--Documentation/filesystems/ext4/directory.rst187
-rw-r--r--Documentation/filesystems/ext4/dynamic.rst10
-rw-r--r--Documentation/filesystems/ext4/eainode.rst10
-rw-r--r--Documentation/filesystems/ext4/globals.rst14
-rw-r--r--Documentation/filesystems/ext4/group_descr.rst125
-rw-r--r--Documentation/filesystems/ext4/ifork.rst60
-rw-r--r--Documentation/filesystems/ext4/index.rst10
-rw-r--r--Documentation/filesystems/ext4/inlinedata.rst8
-rw-r--r--Documentation/filesystems/ext4/inode_table.rst9
-rw-r--r--Documentation/filesystems/ext4/inodes.rst324
-rw-r--r--Documentation/filesystems/ext4/journal.rst374
-rw-r--r--Documentation/filesystems/ext4/mmp.rst36
-rw-r--r--Documentation/filesystems/ext4/orphan.rst42
-rw-r--r--Documentation/filesystems/ext4/overview.rst22
-rw-r--r--Documentation/filesystems/ext4/special_inodes.rst19
-rw-r--r--Documentation/filesystems/ext4/super.rst576
-rw-r--r--Documentation/filesystems/ext4/verity.rst44
-rw-r--r--Documentation/filesystems/f2fs.rst1028
-rw-r--r--Documentation/filesystems/f2fs.txt716
-rw-r--r--Documentation/filesystems/fiemap.rst217
-rw-r--r--Documentation/filesystems/fiemap.txt229
-rw-r--r--Documentation/filesystems/files.rst123
-rw-r--r--Documentation/filesystems/files.txt123
-rw-r--r--Documentation/filesystems/fscrypt.rst1483
-rw-r--r--Documentation/filesystems/fsverity.rst912
-rw-r--r--Documentation/filesystems/fuse-io.txt38
-rw-r--r--Documentation/filesystems/fuse.txt423
-rw-r--r--Documentation/filesystems/fuse/fuse-io-uring.rst99
-rw-r--r--Documentation/filesystems/fuse/fuse-io.rst45
-rw-r--r--Documentation/filesystems/fuse/fuse-passthrough.rst133
-rw-r--r--Documentation/filesystems/fuse/fuse.rst440
-rw-r--r--Documentation/filesystems/fuse/index.rst14
-rw-r--r--Documentation/filesystems/gfs2-glocks.rst249
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt232
-rw-r--r--Documentation/filesystems/gfs2-uevents.rst112
-rw-r--r--Documentation/filesystems/gfs2-uevents.txt100
-rw-r--r--Documentation/filesystems/gfs2.rst52
-rw-r--r--Documentation/filesystems/gfs2.txt45
-rw-r--r--Documentation/filesystems/hfs.rst87
-rw-r--r--Documentation/filesystems/hfs.txt82
-rw-r--r--Documentation/filesystems/hfsplus.rst61
-rw-r--r--Documentation/filesystems/hfsplus.txt59
-rw-r--r--Documentation/filesystems/hpfs.rst353
-rw-r--r--Documentation/filesystems/hpfs.txt296
-rw-r--r--Documentation/filesystems/idmappings.rst1039
-rw-r--r--Documentation/filesystems/index.rst459
-rw-r--r--Documentation/filesystems/inotify.rst90
-rw-r--r--Documentation/filesystems/inotify.txt79
-rw-r--r--Documentation/filesystems/iomap/design.rst459
-rw-r--r--Documentation/filesystems/iomap/index.rst13
-rw-r--r--Documentation/filesystems/iomap/operations.rst743
-rw-r--r--Documentation/filesystems/iomap/porting.rst120
-rw-r--r--Documentation/filesystems/isofs.rst64
-rw-r--r--Documentation/filesystems/isofs.txt48
-rw-r--r--Documentation/filesystems/jfs.txt52
-rw-r--r--Documentation/filesystems/journalling.rst213
-rw-r--r--Documentation/filesystems/locking.rst669
-rw-r--r--Documentation/filesystems/locks.rst65
-rw-r--r--Documentation/filesystems/locks.txt68
-rw-r--r--Documentation/filesystems/mandatory-locking.txt171
-rw-r--r--Documentation/filesystems/mount_api.rst811
-rw-r--r--Documentation/filesystems/multigrain-ts.rst125
-rw-r--r--Documentation/filesystems/netfs_library.rst1051
-rw-r--r--Documentation/filesystems/nfs/Exporting160
-rw-r--r--Documentation/filesystems/nfs/client-identifier.rst216
-rw-r--r--Documentation/filesystems/nfs/exporting.rst240
-rw-r--r--Documentation/filesystems/nfs/fault_injection.txt69
-rw-r--r--Documentation/filesystems/nfs/idmapper.txt75
-rw-r--r--Documentation/filesystems/nfs/index.rst17
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.rst122
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.txt123
-rw-r--r--Documentation/filesystems/nfs/localio.rst357
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt274
-rw-r--r--Documentation/filesystems/nfs/nfs.txt136
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.rst256
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt173
-rw-r--r--Documentation/filesystems/nfs/nfsd-admin-interfaces.txt41
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt355
-rw-r--r--Documentation/filesystems/nfs/pnfs-block-server.txt37
-rw-r--r--Documentation/filesystems/nfs/pnfs-scsi-server.txt23
-rw-r--r--Documentation/filesystems/nfs/pnfs.rst78
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt73
-rw-r--r--Documentation/filesystems/nfs/reexport.rst117
-rw-r--r--Documentation/filesystems/nfs/rpc-cache.rst220
-rw-r--r--Documentation/filesystems/nfs/rpc-cache.txt202
-rw-r--r--Documentation/filesystems/nfs/rpc-server-gss.rst93
-rw-r--r--Documentation/filesystems/nfs/rpc-server-gss.txt91
-rw-r--r--Documentation/filesystems/nilfs2.rst286
-rw-r--r--Documentation/filesystems/nilfs2.txt276
-rw-r--r--Documentation/filesystems/ntfs.txt451
-rw-r--r--Documentation/filesystems/ntfs3.rst123
-rw-r--r--Documentation/filesystems/ocfs2-online-filecheck.rst99
-rw-r--r--Documentation/filesystems/ocfs2-online-filecheck.txt94
-rw-r--r--Documentation/filesystems/ocfs2.rst117
-rw-r--r--Documentation/filesystems/ocfs2.txt106
-rw-r--r--Documentation/filesystems/omfs.rst112
-rw-r--r--Documentation/filesystems/omfs.txt106
-rw-r--r--Documentation/filesystems/orangefs.rst556
-rw-r--r--Documentation/filesystems/orangefs.txt529
-rw-r--r--Documentation/filesystems/overlayfs.rst833
-rw-r--r--Documentation/filesystems/overlayfs.txt483
-rw-r--r--Documentation/filesystems/path-lookup.md1297
-rw-r--r--Documentation/filesystems/path-lookup.rst1390
-rw-r--r--Documentation/filesystems/path-lookup.txt4
-rw-r--r--Documentation/filesystems/porting640
-rw-r--r--Documentation/filesystems/porting.rst1311
-rw-r--r--Documentation/filesystems/proc.rst2453
-rw-r--r--Documentation/filesystems/proc.txt1967
-rw-r--r--Documentation/filesystems/propagate_umount.txt484
-rw-r--r--Documentation/filesystems/qnx6.rst196
-rw-r--r--Documentation/filesystems/qnx6.txt174
-rw-r--r--Documentation/filesystems/quota.rst85
-rw-r--r--Documentation/filesystems/quota.txt68
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.rst368
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.txt359
-rw-r--r--Documentation/filesystems/relay.rst491
-rw-r--r--Documentation/filesystems/relay.txt494
-rw-r--r--Documentation/filesystems/resctrl.rst1848
-rw-r--r--Documentation/filesystems/romfs.rst194
-rw-r--r--Documentation/filesystems/romfs.txt186
-rw-r--r--Documentation/filesystems/seq_file.rst396
-rw-r--r--Documentation/filesystems/seq_file.txt359
-rw-r--r--Documentation/filesystems/sharedsubtree.rst994
-rw-r--r--Documentation/filesystems/sharedsubtree.txt939
-rw-r--r--Documentation/filesystems/smb/cifsroot.rst105
-rw-r--r--Documentation/filesystems/smb/index.rst11
-rw-r--r--Documentation/filesystems/smb/ksmbd.rst186
-rw-r--r--Documentation/filesystems/smb/smbdirect.rst103
-rw-r--r--Documentation/filesystems/splice.rst22
-rw-r--r--Documentation/filesystems/spufs.txt521
-rw-r--r--Documentation/filesystems/spufs/index.rst13
-rw-r--r--Documentation/filesystems/spufs/spu_create.rst131
-rw-r--r--Documentation/filesystems/spufs/spu_run.rst138
-rw-r--r--Documentation/filesystems/spufs/spufs.rst273
-rw-r--r--Documentation/filesystems/squashfs.rst323
-rw-r--r--Documentation/filesystems/squashfs.txt259
-rw-r--r--Documentation/filesystems/sysfs-pci.txt131
-rw-r--r--Documentation/filesystems/sysfs-tagging.txt42
-rw-r--r--Documentation/filesystems/sysfs.rst435
-rw-r--r--Documentation/filesystems/sysfs.txt385
-rw-r--r--Documentation/filesystems/sysv-fs.txt197
-rw-r--r--Documentation/filesystems/tmpfs.rst276
-rw-r--r--Documentation/filesystems/tmpfs.txt149
-rw-r--r--Documentation/filesystems/ubifs-authentication.md426
-rw-r--r--Documentation/filesystems/ubifs-authentication.rst448
-rw-r--r--Documentation/filesystems/ubifs.rst137
-rw-r--r--Documentation/filesystems/ubifs.txt126
-rw-r--r--Documentation/filesystems/udf.rst75
-rw-r--r--Documentation/filesystems/udf.txt66
-rw-r--r--Documentation/filesystems/ufs.txt60
-rw-r--r--Documentation/filesystems/vfat.rst387
-rw-r--r--Documentation/filesystems/vfat.txt347
-rw-r--r--Documentation/filesystems/vfs.rst1551
-rw-r--r--Documentation/filesystems/vfs.txt1261
-rw-r--r--Documentation/filesystems/virtiofs.rst76
-rw-r--r--Documentation/filesystems/xfs-delayed-logging-design.txt793
-rw-r--r--Documentation/filesystems/xfs-self-describing-metadata.txt350
-rw-r--r--Documentation/filesystems/xfs.txt469
-rw-r--r--Documentation/filesystems/xfs/index.rst14
-rw-r--r--Documentation/filesystems/xfs/xfs-delayed-logging-design.rst1087
-rw-r--r--Documentation/filesystems/xfs/xfs-maintainer-entry-profile.rst194
-rw-r--r--Documentation/filesystems/xfs/xfs-online-fsck-design.rst5503
-rw-r--r--Documentation/filesystems/xfs/xfs-self-describing-metadata.rst353
-rw-r--r--Documentation/filesystems/zonefs.rst485
-rw-r--r--Documentation/firmware-guide/acpi/DSD-properties-rules.rst103
-rw-r--r--Documentation/firmware-guide/acpi/acpi-lid.rst114
-rw-r--r--Documentation/firmware-guide/acpi/aml-debugger.rst75
-rw-r--r--Documentation/firmware-guide/acpi/apei/einj.rst271
-rw-r--r--Documentation/firmware-guide/acpi/apei/output_format.rst150
-rw-r--r--Documentation/firmware-guide/acpi/chromeos-acpi-device.rst362
-rw-r--r--Documentation/firmware-guide/acpi/debug.rst132
-rw-r--r--Documentation/firmware-guide/acpi/dsd/data-node-references.rst94
-rw-r--r--Documentation/firmware-guide/acpi/dsd/graph.rst170
-rw-r--r--Documentation/firmware-guide/acpi/dsd/leds.rst102
-rw-r--r--Documentation/firmware-guide/acpi/dsd/phy.rst201
-rw-r--r--Documentation/firmware-guide/acpi/enumeration.rst733
-rw-r--r--Documentation/firmware-guide/acpi/extcon-intel-int3496.rst33
-rw-r--r--Documentation/firmware-guide/acpi/gpio-properties.rst336
-rw-r--r--Documentation/firmware-guide/acpi/i2c-muxes.rst61
-rw-r--r--Documentation/firmware-guide/acpi/index.rst31
-rw-r--r--Documentation/firmware-guide/acpi/intel-pmc-mux.rst153
-rw-r--r--Documentation/firmware-guide/acpi/lpit.rst33
-rw-r--r--Documentation/firmware-guide/acpi/method-tracing.rst238
-rw-r--r--Documentation/firmware-guide/acpi/namespace.rst400
-rw-r--r--Documentation/firmware-guide/acpi/non-d0-probe.rst78
-rw-r--r--Documentation/firmware-guide/acpi/osi.rst187
-rw-r--r--Documentation/firmware-guide/acpi/video_extension.rst121
-rw-r--r--Documentation/firmware-guide/index.rst13
-rw-r--r--Documentation/flexible-arrays.txt123
-rw-r--r--Documentation/fmc/API.txt47
-rw-r--r--Documentation/fmc/FMC-and-SDB.txt88
-rw-r--r--Documentation/fmc/carrier.txt311
-rw-r--r--Documentation/fmc/fmc-chardev.txt64
-rw-r--r--Documentation/fmc/fmc-fakedev.txt36
-rw-r--r--Documentation/fmc/fmc-trivial.txt17
-rw-r--r--Documentation/fmc/fmc-write-eeprom.txt98
-rw-r--r--Documentation/fmc/identifiers.txt168
-rw-r--r--Documentation/fmc/mezzanine.txt123
-rw-r--r--Documentation/fmc/parameters.txt56
-rw-r--r--Documentation/fpga/dfl.rst687
-rw-r--r--Documentation/fpga/dfl.txt285
-rw-r--r--Documentation/fpga/index.rst17
-rw-r--r--Documentation/futex-requeue-pi.txt132
-rw-r--r--Documentation/gcc-plugins.txt93
-rw-r--r--Documentation/gpio/sysfs.txt156
-rw-r--r--Documentation/gpu/afbc.rst235
-rw-r--r--Documentation/gpu/amdgpu.rst129
-rw-r--r--Documentation/gpu/amdgpu/amd-hardware-list-info.rst23
-rw-r--r--Documentation/gpu/amdgpu/amdgpu-glossary.rst237
-rw-r--r--Documentation/gpu/amdgpu/apu-asic-info-table.csv18
-rw-r--r--Documentation/gpu/amdgpu/debugfs.rst210
-rw-r--r--Documentation/gpu/amdgpu/debugging.rst105
-rw-r--r--Documentation/gpu/amdgpu/dgpu-asic-info-table.csv30
-rw-r--r--Documentation/gpu/amdgpu/display/config_example.svg414
-rw-r--r--Documentation/gpu/amdgpu/display/dc-arch-overview.svg731
-rw-r--r--Documentation/gpu/amdgpu/display/dc-components.svg732
-rw-r--r--Documentation/gpu/amdgpu/display/dc-debug.rst305
-rw-r--r--Documentation/gpu/amdgpu/display/dc-glossary.rst231
-rw-r--r--Documentation/gpu/amdgpu/display/dc_pipeline_overview.svg1125
-rw-r--r--Documentation/gpu/amdgpu/display/dcn-blocks.rst57
-rw-r--r--Documentation/gpu/amdgpu/display/dcn-overview.rst232
-rw-r--r--Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg1370
-rw-r--r--Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg1529
-rw-r--r--Documentation/gpu/amdgpu/display/display-contributing.rst168
-rw-r--r--Documentation/gpu/amdgpu/display/display-manager.rst180
-rw-r--r--Documentation/gpu/amdgpu/display/global_sync_vblank.svg485
-rw-r--r--Documentation/gpu/amdgpu/display/index.rst97
-rw-r--r--Documentation/gpu/amdgpu/display/mpo-cursor.svg435
-rw-r--r--Documentation/gpu/amdgpu/display/mpo-overview.rst242
-rw-r--r--Documentation/gpu/amdgpu/display/multi-display-hdcp-mpo-less-pipe-ex.svg220
-rw-r--r--Documentation/gpu/amdgpu/display/multi-display-hdcp-mpo.svg171
-rw-r--r--Documentation/gpu/amdgpu/display/pipeline_4k_no_split.svg958
-rw-r--r--Documentation/gpu/amdgpu/display/pipeline_4k_split.svg1062
-rw-r--r--Documentation/gpu/amdgpu/display/programming-model-dcn.rst162
-rw-r--r--Documentation/gpu/amdgpu/display/single-display-mpo-multi-video.svg339
-rw-r--r--Documentation/gpu/amdgpu/display/single-display-mpo.svg266
-rw-r--r--Documentation/gpu/amdgpu/display/trace-groups-table.csv29
-rw-r--r--Documentation/gpu/amdgpu/driver-core.rst213
-rw-r--r--Documentation/gpu/amdgpu/driver-misc.rst130
-rw-r--r--Documentation/gpu/amdgpu/flashing.rst33
-rw-r--r--Documentation/gpu/amdgpu/gc/index.rst52
-rw-r--r--Documentation/gpu/amdgpu/gc/mes.rst38
-rw-r--r--Documentation/gpu/amdgpu/index.rst24
-rw-r--r--Documentation/gpu/amdgpu/module-parameters.rst7
-rw-r--r--Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg1279
-rw-r--r--Documentation/gpu/amdgpu/process-isolation.rst59
-rw-r--r--Documentation/gpu/amdgpu/ras.rst62
-rw-r--r--Documentation/gpu/amdgpu/thermal.rst170
-rw-r--r--Documentation/gpu/amdgpu/userq.rst203
-rw-r--r--Documentation/gpu/amdgpu/xgmi.rst5
-rw-r--r--Documentation/gpu/automated_testing.rst171
-rw-r--r--Documentation/gpu/backlight.rst12
-rw-r--r--Documentation/gpu/conf.py10
-rw-r--r--Documentation/gpu/dp-mst/topology-figure-1.dot52
-rw-r--r--Documentation/gpu/dp-mst/topology-figure-2.dot56
-rw-r--r--Documentation/gpu/dp-mst/topology-figure-3.dot59
-rw-r--r--Documentation/gpu/driver-uapi.rst34
-rw-r--r--Documentation/gpu/drivers.rst14
-rw-r--r--Documentation/gpu/drm-client.rst6
-rw-r--r--Documentation/gpu/drm-compute.rst54
-rw-r--r--Documentation/gpu/drm-internals.rst159
-rw-r--r--Documentation/gpu/drm-kms-helpers.rst242
-rw-r--r--Documentation/gpu/drm-kms.rst265
-rw-r--r--Documentation/gpu/drm-mm.rst315
-rw-r--r--Documentation/gpu/drm-uapi.rst506
-rw-r--r--Documentation/gpu/drm-usage-stats.rst217
-rw-r--r--Documentation/gpu/drm-vm-bind-async.rst309
-rw-r--r--Documentation/gpu/drm-vm-bind-locking.rst582
-rw-r--r--Documentation/gpu/i915.rst401
-rw-r--r--Documentation/gpu/imagination/index.rst13
-rw-r--r--Documentation/gpu/imagination/uapi.rst171
-rw-r--r--Documentation/gpu/implementation_guidelines.rst10
-rw-r--r--Documentation/gpu/index.rst13
-rw-r--r--Documentation/gpu/introduction.rst78
-rw-r--r--Documentation/gpu/kms-properties.csv3
-rw-r--r--Documentation/gpu/komeda-kms.rst488
-rw-r--r--Documentation/gpu/mcde.rst8
-rw-r--r--Documentation/gpu/meson.rst6
-rw-r--r--Documentation/gpu/msm-crash-dump.rst4
-rw-r--r--Documentation/gpu/msm-preemption.rst99
-rw-r--r--Documentation/gpu/nouveau.rst32
-rw-r--r--Documentation/gpu/nova/core/devinit.rst61
-rw-r--r--Documentation/gpu/nova/core/falcon.rst158
-rw-r--r--Documentation/gpu/nova/core/fwsec.rst181
-rw-r--r--Documentation/gpu/nova/core/guidelines.rst24
-rw-r--r--Documentation/gpu/nova/core/todo.rst429
-rw-r--r--Documentation/gpu/nova/core/vbios.rst181
-rw-r--r--Documentation/gpu/nova/guidelines.rst69
-rw-r--r--Documentation/gpu/nova/index.rst34
-rw-r--r--Documentation/gpu/panfrost.rst49
-rw-r--r--Documentation/gpu/panthor.rst56
-rw-r--r--Documentation/gpu/pl111.rst8
-rw-r--r--Documentation/gpu/rfc/gpusvm.rst118
-rw-r--r--Documentation/gpu/rfc/i915_gem_lmem.rst22
-rw-r--r--Documentation/gpu/rfc/i915_scheduler.rst152
-rw-r--r--Documentation/gpu/rfc/i915_small_bar.h189
-rw-r--r--Documentation/gpu/rfc/i915_small_bar.rst47
-rw-r--r--Documentation/gpu/rfc/i915_vm_bind.h290
-rw-r--r--Documentation/gpu/rfc/i915_vm_bind.rst245
-rw-r--r--Documentation/gpu/rfc/index.rst37
-rw-r--r--Documentation/gpu/tinydrm.rst42
-rw-r--r--Documentation/gpu/todo.rst826
-rw-r--r--Documentation/gpu/vc4.rst19
-rw-r--r--Documentation/gpu/vgaarbiter.rst16
-rw-r--r--Documentation/gpu/vkms.rst196
-rw-r--r--Documentation/gpu/xe/index.rst30
-rw-r--r--Documentation/gpu/xe/xe-drm-usage-stats.rst10
-rw-r--r--Documentation/gpu/xe/xe_configfs.rst16
-rw-r--r--Documentation/gpu/xe/xe_cs.rst8
-rw-r--r--Documentation/gpu/xe/xe_debugging.rst7
-rw-r--r--Documentation/gpu/xe/xe_devcoredump.rst14
-rw-r--r--Documentation/gpu/xe/xe_device.rst10
-rw-r--r--Documentation/gpu/xe/xe_firmware.rst43
-rw-r--r--Documentation/gpu/xe/xe_gt_freq.rst14
-rw-r--r--Documentation/gpu/xe/xe_gt_mcr.rst13
-rw-r--r--Documentation/gpu/xe/xe_map.rst8
-rw-r--r--Documentation/gpu/xe/xe_migrate.rst8
-rw-r--r--Documentation/gpu/xe/xe_mm.rst29
-rw-r--r--Documentation/gpu/xe/xe_pcode.rst23
-rw-r--r--Documentation/gpu/xe/xe_pm.rst14
-rw-r--r--Documentation/gpu/xe/xe_rtp.rst20
-rw-r--r--Documentation/gpu/xe/xe_tile.rst14
-rw-r--r--Documentation/gpu/xe/xe_wa.rst14
-rw-r--r--Documentation/gpu/zynqmp.rst147
-rw-r--r--Documentation/hid/amd-sfh-hid.rst145
-rw-r--r--Documentation/hid/hid-alps.rst180
-rw-r--r--Documentation/hid/hid-alps.txt139
-rw-r--r--Documentation/hid/hid-bpf.rst527
-rw-r--r--Documentation/hid/hid-sensor.rst242
-rw-r--r--Documentation/hid/hid-sensor.txt224
-rw-r--r--Documentation/hid/hid-transport.rst359
-rw-r--r--Documentation/hid/hid-transport.txt317
-rw-r--r--Documentation/hid/hiddev.rst251
-rw-r--r--Documentation/hid/hiddev.txt205
-rw-r--r--Documentation/hid/hidintro.rst524
-rw-r--r--Documentation/hid/hidraw.rst180
-rw-r--r--Documentation/hid/hidraw.txt119
-rw-r--r--Documentation/hid/hidreport-parsing.rst49
-rw-r--r--Documentation/hid/index.rst22
-rw-r--r--Documentation/hid/intel-ish-hid.rst581
-rw-r--r--Documentation/hid/intel-ish-hid.txt454
-rw-r--r--Documentation/hid/intel-thc-hid.rst596
-rw-r--r--Documentation/hid/uhid.rst193
-rw-r--r--Documentation/hid/uhid.txt187
-rw-r--r--Documentation/highuid.txt80
-rw-r--r--Documentation/hw_random.txt105
-rw-r--r--Documentation/hwmon/ab850022
-rw-r--r--Documentation/hwmon/abituguru92
-rw-r--r--Documentation/hwmon/abituguru-datasheet312
-rw-r--r--Documentation/hwmon/abituguru-datasheet.rst336
-rw-r--r--Documentation/hwmon/abituguru.rst113
-rw-r--r--Documentation/hwmon/abituguru365
-rw-r--r--Documentation/hwmon/abituguru3.rst75
-rw-r--r--Documentation/hwmon/abx50028
-rw-r--r--Documentation/hwmon/acbel-fsg032.rst80
-rw-r--r--Documentation/hwmon/acpi_power_meter51
-rw-r--r--Documentation/hwmon/acpi_power_meter.rst77
-rw-r--r--Documentation/hwmon/ad731425
-rw-r--r--Documentation/hwmon/ad7314.rst34
-rw-r--r--Documentation/hwmon/adc128d81847
-rw-r--r--Documentation/hwmon/adc128d818.rst50
-rw-r--r--Documentation/hwmon/adm1021113
-rw-r--r--Documentation/hwmon/adm102551
-rw-r--r--Documentation/hwmon/adm1025.rst60
-rw-r--r--Documentation/hwmon/adm102693
-rw-r--r--Documentation/hwmon/adm1026.rst101
-rw-r--r--Documentation/hwmon/adm103135
-rw-r--r--Documentation/hwmon/adm1031.rst43
-rw-r--r--Documentation/hwmon/adm1177.rst36
-rw-r--r--Documentation/hwmon/adm1266.rst37
-rw-r--r--Documentation/hwmon/adm1275119
-rw-r--r--Documentation/hwmon/adm1275.rst172
-rw-r--r--Documentation/hwmon/adm9240177
-rw-r--r--Documentation/hwmon/adm9240.rst201
-rw-r--r--Documentation/hwmon/adp1050.rst123
-rw-r--r--Documentation/hwmon/ads101576
-rw-r--r--Documentation/hwmon/ads782858
-rw-r--r--Documentation/hwmon/ads7828.rst65
-rw-r--r--Documentation/hwmon/adt741073
-rw-r--r--Documentation/hwmon/adt7410.rst94
-rw-r--r--Documentation/hwmon/adt741142
-rw-r--r--Documentation/hwmon/adt7411.rst50
-rw-r--r--Documentation/hwmon/adt746267
-rw-r--r--Documentation/hwmon/adt7462.rst70
-rw-r--r--Documentation/hwmon/adt747090
-rw-r--r--Documentation/hwmon/adt7470.rst94
-rw-r--r--Documentation/hwmon/adt7475126
-rw-r--r--Documentation/hwmon/adt7475.rst157
-rw-r--r--Documentation/hwmon/aht10.rst56
-rw-r--r--Documentation/hwmon/amc6821102
-rw-r--r--Documentation/hwmon/amc6821.rst113
-rw-r--r--Documentation/hwmon/aquacomputer_d5next.rst117
-rw-r--r--Documentation/hwmon/asb10072
-rw-r--r--Documentation/hwmon/asb100.rst73
-rw-r--r--Documentation/hwmon/asc7621296
-rw-r--r--Documentation/hwmon/asc7621.rst326
-rw-r--r--Documentation/hwmon/aspeed-g6-pwm-tach.rst26
-rw-r--r--Documentation/hwmon/aspeed-pwm-tacho22
-rw-r--r--Documentation/hwmon/aspeed-pwm-tacho.rst24
-rw-r--r--Documentation/hwmon/asus_ec_sensors.rst87
-rw-r--r--Documentation/hwmon/asus_rog_ryujin.rst47
-rw-r--r--Documentation/hwmon/asus_wmi_sensors.rst78
-rw-r--r--Documentation/hwmon/bcm54140.rst45
-rw-r--r--Documentation/hwmon/bel-pfe.rst112
-rw-r--r--Documentation/hwmon/bpa-rs600.rst74
-rw-r--r--Documentation/hwmon/bt1-pvt.rst117
-rw-r--r--Documentation/hwmon/cgbc-hwmon.rst63
-rw-r--r--Documentation/hwmon/chipcap2.rst73
-rw-r--r--Documentation/hwmon/coretemp181
-rw-r--r--Documentation/hwmon/coretemp.rst195
-rw-r--r--Documentation/hwmon/corsair-cpro.rst49
-rw-r--r--Documentation/hwmon/corsair-psu.rst101
-rw-r--r--Documentation/hwmon/cros_ec_hwmon.rst31
-rw-r--r--Documentation/hwmon/crps.rst97
-rw-r--r--Documentation/hwmon/da905261
-rw-r--r--Documentation/hwmon/da9052.rst78
-rw-r--r--Documentation/hwmon/da905547
-rw-r--r--Documentation/hwmon/da9055.rst57
-rw-r--r--Documentation/hwmon/dell-smm-hwmon.rst407
-rw-r--r--Documentation/hwmon/dme1737328
-rw-r--r--Documentation/hwmon/dme1737.rst364
-rw-r--r--Documentation/hwmon/dps920ab.rst73
-rw-r--r--Documentation/hwmon/drivetemp.rst70
-rw-r--r--Documentation/hwmon/ds1621187
-rw-r--r--Documentation/hwmon/ds1621.rst217
-rw-r--r--Documentation/hwmon/ds62034
-rw-r--r--Documentation/hwmon/ds620.rst38
-rw-r--r--Documentation/hwmon/emc140359
-rw-r--r--Documentation/hwmon/emc1403.rst91
-rw-r--r--Documentation/hwmon/emc210333
-rw-r--r--Documentation/hwmon/emc2103.rst37
-rw-r--r--Documentation/hwmon/emc2305.rst36
-rw-r--r--Documentation/hwmon/emc6w20142
-rw-r--r--Documentation/hwmon/emc6w201.rst47
-rw-r--r--Documentation/hwmon/f71805f167
-rw-r--r--Documentation/hwmon/f71805f.rst181
-rw-r--r--Documentation/hwmon/f71882fg138
-rw-r--r--Documentation/hwmon/f71882fg.rst193
-rw-r--r--Documentation/hwmon/fam15h_power102
-rw-r--r--Documentation/hwmon/fam15h_power.rst131
-rw-r--r--Documentation/hwmon/fsp-3y.rst28
-rw-r--r--Documentation/hwmon/ftsteutates27
-rw-r--r--Documentation/hwmon/ftsteutates.rst42
-rw-r--r--Documentation/hwmon/g760a36
-rw-r--r--Documentation/hwmon/g760a.rst40
-rw-r--r--Documentation/hwmon/g76265
-rw-r--r--Documentation/hwmon/g762.rst74
-rw-r--r--Documentation/hwmon/gigabyte_waterforce.rst47
-rw-r--r--Documentation/hwmon/gl518sm73
-rw-r--r--Documentation/hwmon/gl518sm.rst80
-rw-r--r--Documentation/hwmon/gpd-fan.rst78
-rw-r--r--Documentation/hwmon/gsc-hwmon.rst53
-rw-r--r--Documentation/hwmon/gxp-fan-ctrl.rst28
-rw-r--r--Documentation/hwmon/hih613037
-rw-r--r--Documentation/hwmon/hih6130.rst45
-rw-r--r--Documentation/hwmon/hp-wmi-sensors.rst140
-rw-r--r--Documentation/hwmon/hs3001.rst37
-rw-r--r--Documentation/hwmon/htu31.rst37
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.rst381
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.txt333
-rw-r--r--Documentation/hwmon/ibm-cffps54
-rw-r--r--Documentation/hwmon/ibm-cffps.rst57
-rw-r--r--Documentation/hwmon/ibmaem38
-rw-r--r--Documentation/hwmon/ibmaem.rst44
-rw-r--r--Documentation/hwmon/ibmpowernv80
-rw-r--r--Documentation/hwmon/ibmpowernv.rst87
-rw-r--r--Documentation/hwmon/ina20993
-rw-r--r--Documentation/hwmon/ina209.rst99
-rw-r--r--Documentation/hwmon/ina233.rst75
-rw-r--r--Documentation/hwmon/ina238.rst109
-rw-r--r--Documentation/hwmon/ina2xx64
-rw-r--r--Documentation/hwmon/ina2xx.rst163
-rw-r--r--Documentation/hwmon/ina322137
-rw-r--r--Documentation/hwmon/ina3221.rst74
-rw-r--r--Documentation/hwmon/index.rst285
-rw-r--r--Documentation/hwmon/inspur-ipsps1.rst79
-rw-r--r--Documentation/hwmon/intel-m10-bmc-hwmon.rst78
-rw-r--r--Documentation/hwmon/ir3522187
-rw-r--r--Documentation/hwmon/ir35221.rst92
-rw-r--r--Documentation/hwmon/ir36021.rst63
-rw-r--r--Documentation/hwmon/ir38064.rst90
-rw-r--r--Documentation/hwmon/isl28022.rst64
-rw-r--r--Documentation/hwmon/isl68137.rst625
-rw-r--r--Documentation/hwmon/it87274
-rw-r--r--Documentation/hwmon/it87.rst385
-rw-r--r--Documentation/hwmon/jc42103
-rw-r--r--Documentation/hwmon/jc42.rst152
-rw-r--r--Documentation/hwmon/k10temp77
-rw-r--r--Documentation/hwmon/k10temp.rst134
-rw-r--r--Documentation/hwmon/k8temp55
-rw-r--r--Documentation/hwmon/k8temp.rst62
-rw-r--r--Documentation/hwmon/kbatt.rst60
-rw-r--r--Documentation/hwmon/kfan.rst39
-rw-r--r--Documentation/hwmon/lan966x.rst40
-rw-r--r--Documentation/hwmon/lineage-pem77
-rw-r--r--Documentation/hwmon/lineage-pem.rst85
-rw-r--r--Documentation/hwmon/lm25066107
-rw-r--r--Documentation/hwmon/lm25066.rst139
-rw-r--r--Documentation/hwmon/lm6377
-rw-r--r--Documentation/hwmon/lm63.rst95
-rw-r--r--Documentation/hwmon/lm7051
-rw-r--r--Documentation/hwmon/lm70.rst69
-rw-r--r--Documentation/hwmon/lm7390
-rw-r--r--Documentation/hwmon/lm73.rst98
-rw-r--r--Documentation/hwmon/lm7593
-rw-r--r--Documentation/hwmon/lm75.rst180
-rw-r--r--Documentation/hwmon/lm7738
-rw-r--r--Documentation/hwmon/lm77.rst45
-rw-r--r--Documentation/hwmon/lm7868
-rw-r--r--Documentation/hwmon/lm78.rst80
-rw-r--r--Documentation/hwmon/lm8063
-rw-r--r--Documentation/hwmon/lm80.rst74
-rw-r--r--Documentation/hwmon/lm8385
-rw-r--r--Documentation/hwmon/lm83.rst97
-rw-r--r--Documentation/hwmon/lm85230
-rw-r--r--Documentation/hwmon/lm85.rst286
-rw-r--r--Documentation/hwmon/lm8777
-rw-r--r--Documentation/hwmon/lm87.rst86
-rw-r--r--Documentation/hwmon/lm90275
-rw-r--r--Documentation/hwmon/lm90.rst684
-rw-r--r--Documentation/hwmon/lm9235
-rw-r--r--Documentation/hwmon/lm92.rst48
-rw-r--r--Documentation/hwmon/lm93309
-rw-r--r--Documentation/hwmon/lm93.rst312
-rw-r--r--Documentation/hwmon/lm9523441
-rw-r--r--Documentation/hwmon/lm95234.rst48
-rw-r--r--Documentation/hwmon/lm9524541
-rw-r--r--Documentation/hwmon/lm95245.rst48
-rw-r--r--Documentation/hwmon/lochnagar.rst83
-rw-r--r--Documentation/hwmon/lt3074.rst72
-rw-r--r--Documentation/hwmon/lt7182s.rst92
-rw-r--r--Documentation/hwmon/ltc294584
-rw-r--r--Documentation/hwmon/ltc2945.rst92
-rw-r--r--Documentation/hwmon/ltc2947.rst100
-rw-r--r--Documentation/hwmon/ltc2978216
-rw-r--r--Documentation/hwmon/ltc2978.rst491
-rw-r--r--Documentation/hwmon/ltc299049
-rw-r--r--Documentation/hwmon/ltc2990.rst62
-rw-r--r--Documentation/hwmon/ltc2991.rst43
-rw-r--r--Documentation/hwmon/ltc2992.rst56
-rw-r--r--Documentation/hwmon/ltc381561
-rw-r--r--Documentation/hwmon/ltc3815.rst67
-rw-r--r--Documentation/hwmon/ltc415147
-rw-r--r--Documentation/hwmon/ltc4151.rst55
-rw-r--r--Documentation/hwmon/ltc421551
-rw-r--r--Documentation/hwmon/ltc4215.rst59
-rw-r--r--Documentation/hwmon/ltc4245102
-rw-r--r--Documentation/hwmon/ltc4245.rst111
-rw-r--r--Documentation/hwmon/ltc426056
-rw-r--r--Documentation/hwmon/ltc4260.rst64
-rw-r--r--Documentation/hwmon/ltc426163
-rw-r--r--Documentation/hwmon/ltc4261.rst71
-rw-r--r--Documentation/hwmon/ltc4282.rst133
-rw-r--r--Documentation/hwmon/ltc4286.rst95
-rw-r--r--Documentation/hwmon/max127.rst45
-rw-r--r--Documentation/hwmon/max15301.rst95
-rw-r--r--Documentation/hwmon/max1606466
-rw-r--r--Documentation/hwmon/max16064.rst75
-rw-r--r--Documentation/hwmon/max16065105
-rw-r--r--Documentation/hwmon/max16065.rst127
-rw-r--r--Documentation/hwmon/max161929
-rw-r--r--Documentation/hwmon/max1619.rst29
-rw-r--r--Documentation/hwmon/max16601.rst145
-rw-r--r--Documentation/hwmon/max166860
-rw-r--r--Documentation/hwmon/max1668.rst70
-rw-r--r--Documentation/hwmon/max19760
-rw-r--r--Documentation/hwmon/max197.rst70
-rw-r--r--Documentation/hwmon/max20730.rst82
-rw-r--r--Documentation/hwmon/max2075177
-rw-r--r--Documentation/hwmon/max20751.rst84
-rw-r--r--Documentation/hwmon/max3172234
-rw-r--r--Documentation/hwmon/max31722.rst46
-rw-r--r--Documentation/hwmon/max31730.rst44
-rw-r--r--Documentation/hwmon/max31760.rst77
-rw-r--r--Documentation/hwmon/max3178560
-rw-r--r--Documentation/hwmon/max31785.rst66
-rw-r--r--Documentation/hwmon/max3179037
-rw-r--r--Documentation/hwmon/max31790.rst45
-rw-r--r--Documentation/hwmon/max31827.rst144
-rw-r--r--Documentation/hwmon/max34440135
-rw-r--r--Documentation/hwmon/max34440.rst209
-rw-r--r--Documentation/hwmon/max6620.rst46
-rw-r--r--Documentation/hwmon/max663949
-rw-r--r--Documentation/hwmon/max6639.rst55
-rw-r--r--Documentation/hwmon/max664221
-rw-r--r--Documentation/hwmon/max665065
-rw-r--r--Documentation/hwmon/max6650.rst74
-rw-r--r--Documentation/hwmon/max669758
-rw-r--r--Documentation/hwmon/max6697.rst91
-rw-r--r--Documentation/hwmon/max77705.rst39
-rw-r--r--Documentation/hwmon/max868875
-rw-r--r--Documentation/hwmon/max8688.rst85
-rw-r--r--Documentation/hwmon/mc13783-adc74
-rw-r--r--Documentation/hwmon/mc13783-adc.rst89
-rw-r--r--Documentation/hwmon/mc33xs2410_hwmon.rst34
-rw-r--r--Documentation/hwmon/mc34vr500.rst32
-rw-r--r--Documentation/hwmon/mcp302129
-rw-r--r--Documentation/hwmon/mcp3021.rst38
-rw-r--r--Documentation/hwmon/menf21bmc50
-rw-r--r--Documentation/hwmon/menf21bmc.rst55
-rw-r--r--Documentation/hwmon/mlxreg-fan60
-rw-r--r--Documentation/hwmon/mlxreg-fan.rst70
-rw-r--r--Documentation/hwmon/mp2856.rst98
-rw-r--r--Documentation/hwmon/mp2869.rst175
-rw-r--r--Documentation/hwmon/mp2888.rst113
-rw-r--r--Documentation/hwmon/mp2891.rst179
-rw-r--r--Documentation/hwmon/mp29502.rst93
-rw-r--r--Documentation/hwmon/mp2975.rst128
-rw-r--r--Documentation/hwmon/mp2993.rst150
-rw-r--r--Documentation/hwmon/mp5023.rst84
-rw-r--r--Documentation/hwmon/mp5920.rst91
-rw-r--r--Documentation/hwmon/mp5990.rst106
-rw-r--r--Documentation/hwmon/mp9941.rst92
-rw-r--r--Documentation/hwmon/mpq8785.rst109
-rw-r--r--Documentation/hwmon/nct668357
-rw-r--r--Documentation/hwmon/nct6683.rst71
-rw-r--r--Documentation/hwmon/nct6775212
-rw-r--r--Documentation/hwmon/nct6775.rst289
-rw-r--r--Documentation/hwmon/nct7363.rst35
-rw-r--r--Documentation/hwmon/nct780231
-rw-r--r--Documentation/hwmon/nct7802.rst38
-rw-r--r--Documentation/hwmon/nct790460
-rw-r--r--Documentation/hwmon/nct7904.rst67
-rw-r--r--Documentation/hwmon/npcm750-pwm-fan22
-rw-r--r--Documentation/hwmon/npcm750-pwm-fan.rst26
-rw-r--r--Documentation/hwmon/nsa32053
-rw-r--r--Documentation/hwmon/nsa320.rst64
-rw-r--r--Documentation/hwmon/ntc_thermistor100
-rw-r--r--Documentation/hwmon/ntc_thermistor.rst111
-rw-r--r--Documentation/hwmon/nzxt-kraken2.rst42
-rw-r--r--Documentation/hwmon/nzxt-kraken3.rst77
-rw-r--r--Documentation/hwmon/nzxt-smart2.rst62
-rw-r--r--Documentation/hwmon/occ.rst153
-rw-r--r--Documentation/hwmon/pc87360184
-rw-r--r--Documentation/hwmon/pc87360.rst198
-rw-r--r--Documentation/hwmon/pc8742759
-rw-r--r--Documentation/hwmon/pc87427.rst63
-rw-r--r--Documentation/hwmon/pcf859190
-rw-r--r--Documentation/hwmon/pcf8591.rst98
-rw-r--r--Documentation/hwmon/peci-cputemp.rst90
-rw-r--r--Documentation/hwmon/peci-dimmtemp.rst57
-rw-r--r--Documentation/hwmon/pim4328.rst105
-rw-r--r--Documentation/hwmon/pli1209bc.rst75
-rw-r--r--Documentation/hwmon/pm6764tr.rst32
-rw-r--r--Documentation/hwmon/pmbus216
-rw-r--r--Documentation/hwmon/pmbus-core283
-rw-r--r--Documentation/hwmon/pmbus-core.rst410
-rw-r--r--Documentation/hwmon/pmbus.rst277
-rw-r--r--Documentation/hwmon/powerz.rst30
-rw-r--r--Documentation/hwmon/powr122045
-rw-r--r--Documentation/hwmon/powr1220.rst53
-rw-r--r--Documentation/hwmon/pt5161l.rst42
-rw-r--r--Documentation/hwmon/pwm-fan17
-rw-r--r--Documentation/hwmon/pwm-fan.rst32
-rw-r--r--Documentation/hwmon/pxe1610.rst107
-rw-r--r--Documentation/hwmon/q54sj108a2.rst54
-rw-r--r--Documentation/hwmon/qnap-mcu-hwmon.rst27
-rw-r--r--Documentation/hwmon/raspberrypi-hwmon22
-rw-r--r--Documentation/hwmon/raspberrypi-hwmon.rst25
-rw-r--r--Documentation/hwmon/sa67.rst41
-rw-r--r--Documentation/hwmon/sbrmi.rst79
-rw-r--r--Documentation/hwmon/sbtsi_temp.rst42
-rw-r--r--Documentation/hwmon/sch562727
-rw-r--r--Documentation/hwmon/sch5627.rst45
-rw-r--r--Documentation/hwmon/sch563634
-rw-r--r--Documentation/hwmon/sch5636.rst37
-rw-r--r--Documentation/hwmon/scpi-hwmon33
-rw-r--r--Documentation/hwmon/scpi-hwmon.rst36
-rw-r--r--Documentation/hwmon/sfctemp.rst33
-rw-r--r--Documentation/hwmon/sg2042-mcu.rst78
-rw-r--r--Documentation/hwmon/sht1573
-rw-r--r--Documentation/hwmon/sht15.rst83
-rw-r--r--Documentation/hwmon/sht2150
-rw-r--r--Documentation/hwmon/sht21.rst72
-rw-r--r--Documentation/hwmon/sht3x76
-rw-r--r--Documentation/hwmon/sht3x.rst112
-rw-r--r--Documentation/hwmon/sht4x.rst59
-rw-r--r--Documentation/hwmon/shtc143
-rw-r--r--Documentation/hwmon/shtc1.rst67
-rw-r--r--Documentation/hwmon/sis5595106
-rw-r--r--Documentation/hwmon/sis5595.rst123
-rw-r--r--Documentation/hwmon/sl28cpld.rst36
-rw-r--r--Documentation/hwmon/smm665157
-rw-r--r--Documentation/hwmon/smpro-hwmon.rst102
-rw-r--r--Documentation/hwmon/smsc47b397163
-rw-r--r--Documentation/hwmon/smsc47b397.rst197
-rw-r--r--Documentation/hwmon/smsc47m163
-rw-r--r--Documentation/hwmon/smsc47m1.rst86
-rw-r--r--Documentation/hwmon/smsc47m192103
-rw-r--r--Documentation/hwmon/smsc47m192.rst116
-rw-r--r--Documentation/hwmon/sparx5-temp.rst33
-rw-r--r--Documentation/hwmon/spd5118.rst63
-rw-r--r--Documentation/hwmon/stpddc60.rst90
-rw-r--r--Documentation/hwmon/submitting-patches145
-rw-r--r--Documentation/hwmon/submitting-patches.rst149
-rw-r--r--Documentation/hwmon/surface_fan.rst25
-rw-r--r--Documentation/hwmon/sy7636a-hwmon.rst26
-rw-r--r--Documentation/hwmon/sysfs-interface809
-rw-r--r--Documentation/hwmon/sysfs-interface.rst656
-rw-r--r--Documentation/hwmon/tc65431
-rw-r--r--Documentation/hwmon/tc654.rst34
-rw-r--r--Documentation/hwmon/tc7420
-rw-r--r--Documentation/hwmon/tc74.rst23
-rw-r--r--Documentation/hwmon/thmc5074
-rw-r--r--Documentation/hwmon/thmc50.rst89
-rw-r--r--Documentation/hwmon/tmp10226
-rw-r--r--Documentation/hwmon/tmp102.rst31
-rw-r--r--Documentation/hwmon/tmp10328
-rw-r--r--Documentation/hwmon/tmp103.rst33
-rw-r--r--Documentation/hwmon/tmp10836
-rw-r--r--Documentation/hwmon/tmp108.rst49
-rw-r--r--Documentation/hwmon/tmp40167
-rw-r--r--Documentation/hwmon/tmp401.rst80
-rw-r--r--Documentation/hwmon/tmp42144
-rw-r--r--Documentation/hwmon/tmp421.rst76
-rw-r--r--Documentation/hwmon/tmp464.rst73
-rw-r--r--Documentation/hwmon/tmp513.rst103
-rw-r--r--Documentation/hwmon/tps23861.rst41
-rw-r--r--Documentation/hwmon/tps25990.rst147
-rw-r--r--Documentation/hwmon/tps4042264
-rw-r--r--Documentation/hwmon/tps40422.rst73
-rw-r--r--Documentation/hwmon/tps53679.rst195
-rw-r--r--Documentation/hwmon/tps546d24.rst35
-rw-r--r--Documentation/hwmon/twl4030-madc-hwmon45
-rw-r--r--Documentation/hwmon/twl4030-madc-hwmon.rst49
-rw-r--r--Documentation/hwmon/ucd9000118
-rw-r--r--Documentation/hwmon/ucd9000.rst137
-rw-r--r--Documentation/hwmon/ucd9200112
-rw-r--r--Documentation/hwmon/ucd9200.rst124
-rw-r--r--Documentation/hwmon/userspace-tools40
-rw-r--r--Documentation/hwmon/userspace-tools.rst43
-rw-r--r--Documentation/hwmon/vexpress34
-rw-r--r--Documentation/hwmon/vexpress.rst41
-rw-r--r--Documentation/hwmon/via686a78
-rw-r--r--Documentation/hwmon/via686a.rst84
-rw-r--r--Documentation/hwmon/vt1211206
-rw-r--r--Documentation/hwmon/vt1211.rst226
-rw-r--r--Documentation/hwmon/w83627ehf190
-rw-r--r--Documentation/hwmon/w83627ehf.rst248
-rw-r--r--Documentation/hwmon/w83627hf115
-rw-r--r--Documentation/hwmon/w83627hf.rst124
-rw-r--r--Documentation/hwmon/w83773g33
-rw-r--r--Documentation/hwmon/w83773g.rst35
-rw-r--r--Documentation/hwmon/w83781d453
-rw-r--r--Documentation/hwmon/w83781d.rst513
-rw-r--r--Documentation/hwmon/w83791d161
-rw-r--r--Documentation/hwmon/w83791d.rst180
-rw-r--r--Documentation/hwmon/w83792d181
-rw-r--r--Documentation/hwmon/w83792d.rst199
-rw-r--r--Documentation/hwmon/w83793106
-rw-r--r--Documentation/hwmon/w83793.rst113
-rw-r--r--Documentation/hwmon/w83795127
-rw-r--r--Documentation/hwmon/w83795.rst142
-rw-r--r--Documentation/hwmon/w83l785ts40
-rw-r--r--Documentation/hwmon/w83l785ts.rst45
-rw-r--r--Documentation/hwmon/w83l786ng54
-rw-r--r--Documentation/hwmon/w83l786ng.rst66
-rw-r--r--Documentation/hwmon/wm831x37
-rw-r--r--Documentation/hwmon/wm831x.rst40
-rw-r--r--Documentation/hwmon/wm835026
-rw-r--r--Documentation/hwmon/wm8350.rst30
-rw-r--r--Documentation/hwmon/xdp710.rst83
-rw-r--r--Documentation/hwmon/xdpe12284.rst106
-rw-r--r--Documentation/hwmon/xdpe152c4.rst118
-rw-r--r--Documentation/hwmon/xgene-hwmon30
-rw-r--r--Documentation/hwmon/xgene-hwmon.rst36
-rw-r--r--Documentation/hwmon/zl6100160
-rw-r--r--Documentation/hwmon/zl6100.rst259
-rw-r--r--Documentation/hwspinlock.txt404
-rw-r--r--Documentation/i2c/DMA-considerations71
-rw-r--r--Documentation/i2c/busses/i2c-ali153542
-rw-r--r--Documentation/i2c/busses/i2c-ali1535.rst45
-rw-r--r--Documentation/i2c/busses/i2c-ali156327
-rw-r--r--Documentation/i2c/busses/i2c-ali1563.rst30
-rw-r--r--Documentation/i2c/busses/i2c-ali15x3112
-rw-r--r--Documentation/i2c/busses/i2c-ali15x3.rst122
-rw-r--r--Documentation/i2c/busses/i2c-amd-mp2.rst25
-rw-r--r--Documentation/i2c/busses/i2c-amd75625
-rw-r--r--Documentation/i2c/busses/i2c-amd756.rst29
-rw-r--r--Documentation/i2c/busses/i2c-amd811141
-rw-r--r--Documentation/i2c/busses/i2c-amd8111.rst43
-rw-r--r--Documentation/i2c/busses/i2c-diolan-u2c26
-rw-r--r--Documentation/i2c/busses/i2c-diolan-u2c.rst29
-rw-r--r--Documentation/i2c/busses/i2c-i801170
-rw-r--r--Documentation/i2c/busses/i2c-i801.rst191
-rw-r--r--Documentation/i2c/busses/i2c-ismt36
-rw-r--r--Documentation/i2c/busses/i2c-ismt.rst44
-rw-r--r--Documentation/i2c/busses/i2c-mlxcpld51
-rw-r--r--Documentation/i2c/busses/i2c-mlxcpld.rst57
-rw-r--r--Documentation/i2c/busses/i2c-nforce250
-rw-r--r--Documentation/i2c/busses/i2c-nforce2.rst53
-rw-r--r--Documentation/i2c/busses/i2c-nvidia-gpu18
-rw-r--r--Documentation/i2c/busses/i2c-nvidia-gpu.rst20
-rw-r--r--Documentation/i2c/busses/i2c-ocores68
-rw-r--r--Documentation/i2c/busses/i2c-ocores.rst70
-rw-r--r--Documentation/i2c/busses/i2c-parport178
-rw-r--r--Documentation/i2c/busses/i2c-parport-light22
-rw-r--r--Documentation/i2c/busses/i2c-parport.rst190
-rw-r--r--Documentation/i2c/busses/i2c-pca-isa23
-rw-r--r--Documentation/i2c/busses/i2c-pca-isa.rst26
-rw-r--r--Documentation/i2c/busses/i2c-piix4110
-rw-r--r--Documentation/i2c/busses/i2c-piix4.rst174
-rw-r--r--Documentation/i2c/busses/i2c-sis559559
-rw-r--r--Documentation/i2c/busses/i2c-sis5595.rst68
-rw-r--r--Documentation/i2c/busses/i2c-sis63058
-rw-r--r--Documentation/i2c/busses/i2c-sis630.rst63
-rw-r--r--Documentation/i2c/busses/i2c-sis96x73
-rw-r--r--Documentation/i2c/busses/i2c-sis96x.rst82
-rw-r--r--Documentation/i2c/busses/i2c-taos-evm46
-rw-r--r--Documentation/i2c/busses/i2c-taos-evm.rst48
-rw-r--r--Documentation/i2c/busses/i2c-via34
-rw-r--r--Documentation/i2c/busses/i2c-via.rst40
-rw-r--r--Documentation/i2c/busses/i2c-viapro73
-rw-r--r--Documentation/i2c/busses/i2c-viapro.rst77
-rw-r--r--Documentation/i2c/busses/index.rst32
-rw-r--r--Documentation/i2c/busses/scx200_acb32
-rw-r--r--Documentation/i2c/busses/scx200_acb.rst37
-rw-r--r--Documentation/i2c/dev-interface213
-rw-r--r--Documentation/i2c/dev-interface.rst221
-rw-r--r--Documentation/i2c/dma-considerations.rst71
-rw-r--r--Documentation/i2c/fault-codes124
-rw-r--r--Documentation/i2c/fault-codes.rst135
-rw-r--r--Documentation/i2c/functionality148
-rw-r--r--Documentation/i2c/functionality.rst156
-rw-r--r--Documentation/i2c/gpio-fault-injection81
-rw-r--r--Documentation/i2c/gpio-fault-injection.rst136
-rw-r--r--Documentation/i2c/i2c-address-translators.rst96
-rw-r--r--Documentation/i2c/i2c-protocol88
-rw-r--r--Documentation/i2c/i2c-protocol.rst96
-rw-r--r--Documentation/i2c/i2c-stub64
-rw-r--r--Documentation/i2c/i2c-stub.rst66
-rw-r--r--Documentation/i2c/i2c-sysfs.rst387
-rw-r--r--Documentation/i2c/i2c-topology376
-rw-r--r--Documentation/i2c/i2c-topology.rst412
-rw-r--r--Documentation/i2c/i2c_bus.svg1342
-rw-r--r--Documentation/i2c/index.rst75
-rw-r--r--Documentation/i2c/instantiating-devices248
-rw-r--r--Documentation/i2c/instantiating-devices.rst272
-rw-r--r--Documentation/i2c/muxes/i2c-mux-gpio83
-rw-r--r--Documentation/i2c/muxes/i2c-mux-gpio.rst85
-rw-r--r--Documentation/i2c/old-module-parameters44
-rw-r--r--Documentation/i2c/old-module-parameters.rst55
-rw-r--r--Documentation/i2c/slave-eeprom-backend14
-rw-r--r--Documentation/i2c/slave-eeprom-backend.rst26
-rw-r--r--Documentation/i2c/slave-interface193
-rw-r--r--Documentation/i2c/slave-interface.rst201
-rw-r--r--Documentation/i2c/slave-testunit-backend.rst235
-rw-r--r--Documentation/i2c/smbus-protocol283
-rw-r--r--Documentation/i2c/smbus-protocol.rst324
-rw-r--r--Documentation/i2c/summary43
-rw-r--r--Documentation/i2c/summary.rst82
-rw-r--r--Documentation/i2c/ten-bit-addresses28
-rw-r--r--Documentation/i2c/ten-bit-addresses.rst33
-rw-r--r--Documentation/i2c/upgrading-clients279
-rw-r--r--Documentation/i2c/writing-clients403
-rw-r--r--Documentation/i2c/writing-clients.rst403
-rw-r--r--Documentation/ia64/IRQ-redir.txt69
-rw-r--r--Documentation/ia64/README43
-rw-r--r--Documentation/ia64/aliasing.txt221
-rw-r--r--Documentation/ia64/efirtc.txt128
-rw-r--r--Documentation/ia64/err_inject.txt1068
-rw-r--r--Documentation/ia64/fsys.txt286
-rw-r--r--Documentation/ia64/mca.txt194
-rw-r--r--Documentation/ia64/serial.txt151
-rw-r--r--Documentation/ia64/xen.txt183
-rw-r--r--Documentation/ide/ChangeLog.ide-cd.1994-2004268
-rw-r--r--Documentation/ide/ChangeLog.ide-floppy.1996-200263
-rw-r--r--Documentation/ide/ChangeLog.ide-tape.1995-2002257
-rw-r--r--Documentation/ide/ide-tape.txt65
-rw-r--r--Documentation/ide/ide.txt256
-rw-r--r--Documentation/ide/warm-plug-howto.txt18
-rw-r--r--Documentation/iio/ad3552r.rst73
-rw-r--r--Documentation/iio/ad4000.rst220
-rw-r--r--Documentation/iio/ad4030.rst180
-rw-r--r--Documentation/iio/ad4695.rst269
-rw-r--r--Documentation/iio/ad7191.rst119
-rw-r--r--Documentation/iio/ad7380.rst212
-rw-r--r--Documentation/iio/ad7606.rst189
-rw-r--r--Documentation/iio/ad7625.rst91
-rw-r--r--Documentation/iio/ad7944.rst178
-rw-r--r--Documentation/iio/ade9000.rst268
-rw-r--r--Documentation/iio/adis16475.rst384
-rw-r--r--Documentation/iio/adis16480.rst446
-rw-r--r--Documentation/iio/adis16550.rst376
-rw-r--r--Documentation/iio/adxl313.rst293
-rw-r--r--Documentation/iio/adxl345.rst443
-rw-r--r--Documentation/iio/adxl380.rst233
-rw-r--r--Documentation/iio/bno055.rst51
-rw-r--r--Documentation/iio/ep93xx_adc.rst40
-rw-r--r--Documentation/iio/ep93xx_adc.txt29
-rw-r--r--Documentation/iio/iio_adc.rst305
-rw-r--r--Documentation/iio/iio_configfs.rst102
-rw-r--r--Documentation/iio/iio_configfs.txt93
-rw-r--r--Documentation/iio/iio_devbuf.rst152
-rw-r--r--Documentation/iio/iio_dmabuf_api.rst54
-rw-r--r--Documentation/iio/iio_tools.rst27
-rw-r--r--Documentation/iio/index.rst40
-rw-r--r--Documentation/iio/opt4060.rst61
-rw-r--r--Documentation/images/COPYING-logo21
-rw-r--r--Documentation/images/logo.gif (renamed from Documentation/logo.gif)bin16335 -> 16335 bytes
-rw-r--r--Documentation/images/logo.svg2040
-rw-r--r--Documentation/index.rst140
-rw-r--r--Documentation/infiniband/core_locking.rst116
-rw-r--r--Documentation/infiniband/core_locking.txt112
-rw-r--r--Documentation/infiniband/index.rst24
-rw-r--r--Documentation/infiniband/ipoib.rst115
-rw-r--r--Documentation/infiniband/ipoib.txt105
-rw-r--r--Documentation/infiniband/opa_vnic.rst159
-rw-r--r--Documentation/infiniband/opa_vnic.txt153
-rw-r--r--Documentation/infiniband/sysfs.rst6
-rw-r--r--Documentation/infiniband/sysfs.txt4
-rw-r--r--Documentation/infiniband/tag_matching.rst69
-rw-r--r--Documentation/infiniband/tag_matching.txt64
-rw-r--r--Documentation/infiniband/ucaps.rst71
-rw-r--r--Documentation/infiniband/user_mad.rst166
-rw-r--r--Documentation/infiniband/user_mad.txt153
-rw-r--r--Documentation/infiniband/user_verbs.rst75
-rw-r--r--Documentation/infiniband/user_verbs.txt69
-rw-r--r--Documentation/input/conf.py10
-rw-r--r--Documentation/input/devices/amijoy.rst125
-rw-r--r--Documentation/input/devices/atarikbd.rst4
-rw-r--r--Documentation/input/devices/edt-ft5x06.rst21
-rw-r--r--Documentation/input/devices/elantech.rst2
-rw-r--r--Documentation/input/devices/iforce-protocol.rst2
-rw-r--r--Documentation/input/devices/ntrig.rst2
-rw-r--r--Documentation/input/devices/pxrc.rst7
-rw-r--r--Documentation/input/devices/rotary-encoder.rst8
-rw-r--r--Documentation/input/devices/xpad.rst12
-rw-r--r--Documentation/input/event-codes.rst68
-rw-r--r--Documentation/input/ff.rst6
-rw-r--r--Documentation/input/gamepad.rst21
-rw-r--r--Documentation/input/gameport-programming.rst35
-rw-r--r--Documentation/input/index.rst6
-rw-r--r--Documentation/input/input-programming.rst87
-rw-r--r--Documentation/input/input.rst12
-rw-r--r--Documentation/input/input_kapi.rst5
-rw-r--r--Documentation/input/input_uapi.rst5
-rw-r--r--Documentation/input/joydev/index.rst5
-rw-r--r--Documentation/input/joydev/joystick-api.rst16
-rw-r--r--Documentation/input/joydev/joystick.rst27
-rw-r--r--Documentation/input/multi-touch-protocol.rst14
-rw-r--r--Documentation/input/notifier.rst3
-rw-r--r--Documentation/input/uinput.rst8
-rw-r--r--Documentation/io-mapping.txt97
-rw-r--r--Documentation/ioctl/botching-up-ioctls.txt224
-rw-r--r--Documentation/ioctl/cdrom.txt967
-rw-r--r--Documentation/ioctl/hdio.txt1071
-rw-r--r--Documentation/ioctl/ioctl-decoding.txt24
-rw-r--r--Documentation/ioctl/ioctl-number.txt349
-rw-r--r--Documentation/iostats.txt193
-rw-r--r--Documentation/isa.txt122
-rw-r--r--Documentation/isapnp.txt15
-rw-r--r--Documentation/isdn/CREDITS70
-rw-r--r--Documentation/isdn/HiSax.cert96
-rw-r--r--Documentation/isdn/INTERFACE759
-rw-r--r--Documentation/isdn/INTERFACE.CAPI355
-rw-r--r--Documentation/isdn/INTERFACE.fax163
-rw-r--r--Documentation/isdn/README599
-rw-r--r--Documentation/isdn/README.FAQ26
-rw-r--r--Documentation/isdn/README.HiSax659
-rw-r--r--Documentation/isdn/README.audio138
-rw-r--r--Documentation/isdn/README.avmb1187
-rw-r--r--Documentation/isdn/README.concap259
-rw-r--r--Documentation/isdn/README.diversion127
-rw-r--r--Documentation/isdn/README.fax45
-rw-r--r--Documentation/isdn/README.gigaset423
-rw-r--r--Documentation/isdn/README.hfc-pci41
-rw-r--r--Documentation/isdn/README.hysdn195
-rw-r--r--Documentation/isdn/README.mISDN6
-rw-r--r--Documentation/isdn/README.syncppp58
-rw-r--r--Documentation/isdn/README.x25184
-rw-r--r--Documentation/isdn/credits.rst73
-rw-r--r--Documentation/isdn/index.rst21
-rw-r--r--Documentation/isdn/interface_capi.rst336
-rw-r--r--Documentation/isdn/m_isdn.rst9
-rw-r--r--Documentation/isdn/syncPPP.FAQ224
-rw-r--r--Documentation/kbuild/Kconfig.recursion-issue-016
-rw-r--r--Documentation/kbuild/Kconfig.recursion-issue-024
-rw-r--r--Documentation/kbuild/bash-completion.rst65
-rw-r--r--Documentation/kbuild/gcc-plugins.rst133
-rw-r--r--Documentation/kbuild/gendwarfksyms.rst395
-rw-r--r--Documentation/kbuild/headers_install.rst44
-rw-r--r--Documentation/kbuild/headers_install.txt50
-rw-r--r--Documentation/kbuild/index.rst33
-rw-r--r--Documentation/kbuild/issues.rst15
-rw-r--r--Documentation/kbuild/kbuild.rst343
-rw-r--r--Documentation/kbuild/kbuild.txt253
-rw-r--r--Documentation/kbuild/kconfig-language.rst811
-rw-r--r--Documentation/kbuild/kconfig-language.txt669
-rw-r--r--Documentation/kbuild/kconfig-macro-language.rst247
-rw-r--r--Documentation/kbuild/kconfig-macro-language.txt242
-rw-r--r--Documentation/kbuild/kconfig.rst295
-rw-r--r--Documentation/kbuild/kconfig.txt272
-rw-r--r--Documentation/kbuild/llvm.rst225
-rw-r--r--Documentation/kbuild/makefiles.rst1698
-rw-r--r--Documentation/kbuild/makefiles.txt1402
-rw-r--r--Documentation/kbuild/modules.rst512
-rw-r--r--Documentation/kbuild/modules.txt541
-rw-r--r--Documentation/kbuild/reproducible-builds.rst140
-rw-r--r--Documentation/kdump/gdbmacros.txt264
-rw-r--r--Documentation/kdump/kdump.txt509
-rw-r--r--Documentation/kernel-hacking/conf.py10
-rw-r--r--Documentation/kernel-hacking/false-sharing.rst206
-rw-r--r--Documentation/kernel-hacking/hacking.rst73
-rw-r--r--Documentation/kernel-hacking/index.rst1
-rw-r--r--Documentation/kernel-hacking/locking.rst236
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt356
-rw-r--r--Documentation/kobject.txt430
-rw-r--r--Documentation/kprobes.txt800
-rw-r--r--Documentation/kref.txt319
-rw-r--r--Documentation/laptops/asus-laptop.txt257
-rw-r--r--Documentation/laptops/disk-shock-protection.txt149
-rw-r--r--Documentation/laptops/laptop-mode.txt782
-rw-r--r--Documentation/laptops/sony-laptop.txt144
-rw-r--r--Documentation/laptops/sonypi.txt152
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt1487
-rw-r--r--Documentation/laptops/toshiba_haps.txt76
-rw-r--r--Documentation/leds/index.rst32
-rw-r--r--Documentation/leds/leds-blinkm.rst107
-rw-r--r--Documentation/leds/leds-blinkm.txt80
-rw-r--r--Documentation/leds/leds-cht-wcove.rst38
-rw-r--r--Documentation/leds/leds-class-flash.rst90
-rw-r--r--Documentation/leds/leds-class-flash.txt73
-rw-r--r--Documentation/leds/leds-class-multicolor.rst106
-rw-r--r--Documentation/leds/leds-class.rst260
-rw-r--r--Documentation/leds/leds-class.txt122
-rw-r--r--Documentation/leds/leds-el15203000.rst140
-rw-r--r--Documentation/leds/leds-lm3556.rst137
-rw-r--r--Documentation/leds/leds-lm3556.txt85
-rw-r--r--Documentation/leds/leds-lp3944.rst59
-rw-r--r--Documentation/leds/leds-lp3944.txt50
-rw-r--r--Documentation/leds/leds-lp5521.rst115
-rw-r--r--Documentation/leds/leds-lp5521.txt101
-rw-r--r--Documentation/leds/leds-lp5523.rst147
-rw-r--r--Documentation/leds/leds-lp5523.txt130
-rw-r--r--Documentation/leds/leds-lp5562.rst137
-rw-r--r--Documentation/leds/leds-lp5562.txt120
-rw-r--r--Documentation/leds/leds-lp55xx.rst224
-rw-r--r--Documentation/leds/leds-lp55xx.txt194
-rw-r--r--Documentation/leds/leds-mlxcpld.rst118
-rw-r--r--Documentation/leds/leds-mlxcpld.txt110
-rw-r--r--Documentation/leds/leds-mt6370-rgb.rst64
-rw-r--r--Documentation/leds/leds-qcom-lpg.rst78
-rw-r--r--Documentation/leds/leds-sc27xx.rst27
-rw-r--r--Documentation/leds/leds-st1202.rst34
-rw-r--r--Documentation/leds/ledtrig-oneshot.rst44
-rw-r--r--Documentation/leds/ledtrig-oneshot.txt43
-rw-r--r--Documentation/leds/ledtrig-transient.rst160
-rw-r--r--Documentation/leds/ledtrig-transient.txt152
-rw-r--r--Documentation/leds/ledtrig-usbport.rst46
-rw-r--r--Documentation/leds/ledtrig-usbport.txt41
-rw-r--r--Documentation/leds/uleds.rst37
-rw-r--r--Documentation/leds/uleds.txt36
-rw-r--r--Documentation/leds/well-known-leds.txt111
-rw-r--r--Documentation/lightnvm/pblk.txt21
-rw-r--r--Documentation/litmus-tests/README80
-rw-r--r--Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus32
-rw-r--r--Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus25
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-ordered-1.litmus35
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-ordered-2.litmus30
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-unordered-1.litmus34
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-unordered-2.litmus30
-rw-r--r--Documentation/litmus-tests/locking/DCL-broken.litmus54
-rw-r--r--Documentation/litmus-tests/locking/DCL-fixed.litmus55
-rw-r--r--Documentation/litmus-tests/locking/RM-broken.litmus41
-rw-r--r--Documentation/litmus-tests/locking/RM-fixed.litmus41
-rw-r--r--Documentation/litmus-tests/rcu/RCU+sync+free.litmus42
-rw-r--r--Documentation/litmus-tests/rcu/RCU+sync+read.litmus37
-rw-r--r--Documentation/livepatch/api.rst30
-rw-r--r--Documentation/livepatch/callbacks.rst133
-rw-r--r--Documentation/livepatch/callbacks.txt605
-rw-r--r--Documentation/livepatch/cumulative-patches.rst102
-rw-r--r--Documentation/livepatch/index.rst24
-rw-r--r--Documentation/livepatch/livepatch.rst448
-rw-r--r--Documentation/livepatch/livepatch.txt467
-rw-r--r--Documentation/livepatch/module-elf-format.rst305
-rw-r--r--Documentation/livepatch/module-elf-format.txt323
-rw-r--r--Documentation/livepatch/reliable-stacktrace.rst309
-rw-r--r--Documentation/livepatch/shadow-vars.rst226
-rw-r--r--Documentation/livepatch/shadow-vars.txt209
-rw-r--r--Documentation/livepatch/system-state.rst167
-rw-r--r--Documentation/locking/futex-requeue-pi.rst132
-rw-r--r--Documentation/locking/hwspinlock.rst441
-rw-r--r--Documentation/locking/index.rst33
-rw-r--r--Documentation/locking/lockdep-design.rst663
-rw-r--r--Documentation/locking/lockdep-design.txt333
-rw-r--r--Documentation/locking/lockstat.rst204
-rw-r--r--Documentation/locking/lockstat.txt183
-rw-r--r--Documentation/locking/locktorture.rst169
-rw-r--r--Documentation/locking/locktorture.txt145
-rw-r--r--Documentation/locking/locktypes.rst555
-rw-r--r--Documentation/locking/mutex-design.rst170
-rw-r--r--Documentation/locking/mutex-design.txt142
-rw-r--r--Documentation/locking/percpu-rw-semaphore.rst28
-rw-r--r--Documentation/locking/pi-futex.rst122
-rw-r--r--Documentation/locking/preempt-locking.rst144
-rw-r--r--Documentation/locking/robust-futex-ABI.rst184
-rw-r--r--Documentation/locking/robust-futexes.rst221
-rw-r--r--Documentation/locking/rt-mutex-design.rst574
-rw-r--r--Documentation/locking/rt-mutex-design.txt559
-rw-r--r--Documentation/locking/rt-mutex.rst77
-rw-r--r--Documentation/locking/rt-mutex.txt73
-rw-r--r--Documentation/locking/seqlock.rst241
-rw-r--r--Documentation/locking/spinlocks.rst165
-rw-r--r--Documentation/locking/spinlocks.txt167
-rw-r--r--Documentation/locking/ww-mutex-design.rst393
-rw-r--r--Documentation/locking/ww-mutex-design.txt383
-rw-r--r--Documentation/lockup-watchdogs.txt83
-rw-r--r--Documentation/logo.txt13
-rw-r--r--Documentation/lsm.txt201
-rw-r--r--Documentation/lzo.txt171
-rw-r--r--Documentation/m68k/README.buddha210
-rw-r--r--Documentation/m68k/kernel-options.txt884
-rw-r--r--Documentation/maintainer/conf.py10
-rw-r--r--Documentation/maintainer/configure-git.rst28
-rw-r--r--Documentation/maintainer/feature-and-driver-maintainers.rst166
-rw-r--r--Documentation/maintainer/index.rst5
-rw-r--r--Documentation/maintainer/maintainer-entry-profile.rst114
-rw-r--r--Documentation/maintainer/messy-diffstat.rst96
-rw-r--r--Documentation/maintainer/modifying-patches.rst50
-rw-r--r--Documentation/maintainer/pull-requests.rst10
-rw-r--r--Documentation/maintainer/rebasing-and-merging.rst222
-rw-r--r--Documentation/md/md-cluster.txt325
-rw-r--r--Documentation/md/raid5-cache.txt109
-rw-r--r--Documentation/md/raid5-ppl.txt45
-rw-r--r--Documentation/media/.gitignore3
-rw-r--r--Documentation/media/Makefile67
-rw-r--r--Documentation/media/audio.h.rst.exceptions17
-rw-r--r--Documentation/media/cec-drivers/index.rst34
-rw-r--r--Documentation/media/cec-drivers/pulse8-cec.rst11
-rw-r--r--Documentation/media/conf.py10
-rw-r--r--Documentation/media/dvb-drivers/avermedia.rst267
-rw-r--r--Documentation/media/dvb-drivers/bt8xx.rst122
-rw-r--r--Documentation/media/dvb-drivers/cards.rst144
-rw-r--r--Documentation/media/dvb-drivers/ci.rst229
-rw-r--r--Documentation/media/dvb-drivers/faq.rst167
-rw-r--r--Documentation/media/dvb-drivers/index.rst45
-rw-r--r--Documentation/media/dvb-drivers/intro.rst21
-rw-r--r--Documentation/media/dvb-drivers/udev.rst61
-rw-r--r--Documentation/media/index.rst24
-rw-r--r--Documentation/media/intro.rst46
-rw-r--r--Documentation/media/kapi/csi2.rst72
-rw-r--r--Documentation/media/kapi/dtv-net.rst4
-rw-r--r--Documentation/media/kapi/mc-core.rst266
-rw-r--r--Documentation/media/kapi/v4l2-async.rst3
-rw-r--r--Documentation/media/kapi/v4l2-clocks.rst29
-rw-r--r--Documentation/media/kapi/v4l2-dv-timings.rst4
-rw-r--r--Documentation/media/kapi/v4l2-fh.rst139
-rw-r--r--Documentation/media/kapi/v4l2-fwnode.rst3
-rw-r--r--Documentation/media/kapi/v4l2-rect.rst4
-rw-r--r--Documentation/media/kapi/v4l2-subdev.rst442
-rw-r--r--Documentation/media/kapi/v4l2-videobuf.rst404
-rw-r--r--Documentation/media/media_kapi.rst38
-rw-r--r--Documentation/media/media_uapi.rst33
-rw-r--r--Documentation/media/uapi/cec/cec-func-close.rst47
-rw-r--r--Documentation/media/uapi/cec/cec-header.rst10
-rw-r--r--Documentation/media/uapi/dvb/audio-bilingual-channel-select.rst59
-rw-r--r--Documentation/media/uapi/dvb/audio-channel-select.rst59
-rw-r--r--Documentation/media/uapi/dvb/audio-clear-buffer.rst48
-rw-r--r--Documentation/media/uapi/dvb/audio-continue.rst49
-rw-r--r--Documentation/media/uapi/dvb/audio-fclose.rst56
-rw-r--r--Documentation/media/uapi/dvb/audio-fopen.rst108
-rw-r--r--Documentation/media/uapi/dvb/audio-fwrite.rst84
-rw-r--r--Documentation/media/uapi/dvb/audio-get-capabilities.rst56
-rw-r--r--Documentation/media/uapi/dvb/audio-get-status.rst56
-rw-r--r--Documentation/media/uapi/dvb/audio-pause.rst50
-rw-r--r--Documentation/media/uapi/dvb/audio-play.rst49
-rw-r--r--Documentation/media/uapi/dvb/audio-select-source.rst58
-rw-r--r--Documentation/media/uapi/dvb/audio-set-av-sync.rst60
-rw-r--r--Documentation/media/uapi/dvb/audio-set-bypass-mode.rst63
-rw-r--r--Documentation/media/uapi/dvb/audio-set-id.rst60
-rw-r--r--Documentation/media/uapi/dvb/audio-set-mixer.rst54
-rw-r--r--Documentation/media/uapi/dvb/audio-set-mute.rst64
-rw-r--r--Documentation/media/uapi/dvb/audio-set-streamtype.rst70
-rw-r--r--Documentation/media/uapi/dvb/audio-stop.rst49
-rw-r--r--Documentation/media/uapi/dvb/audio.rst27
-rw-r--r--Documentation/media/uapi/dvb/audio_data_types.rst116
-rw-r--r--Documentation/media/uapi/dvb/audio_function_calls.rst30
-rw-r--r--Documentation/media/uapi/dvb/ca-set-descr.rst46
-rw-r--r--Documentation/media/uapi/dvb/ca_data_types.rst9
-rw-r--r--Documentation/media/uapi/dvb/dmx-add-pid.rst49
-rw-r--r--Documentation/media/uapi/dvb/dmx-munmap.rst54
-rw-r--r--Documentation/media/uapi/dvb/dmx-set-buffer-size.rst50
-rw-r--r--Documentation/media/uapi/dvb/dmx-stop.rst45
-rw-r--r--Documentation/media/uapi/dvb/dmx_types.rst9
-rw-r--r--Documentation/media/uapi/dvb/examples.rst16
-rw-r--r--Documentation/media/uapi/dvb/fe-diseqc-recv-slave-reply.rst48
-rw-r--r--Documentation/media/uapi/dvb/fe-diseqc-send-master-cmd.rst49
-rw-r--r--Documentation/media/uapi/dvb/frontend-header.rst4
-rw-r--r--Documentation/media/uapi/dvb/headers.rst21
-rw-r--r--Documentation/media/uapi/dvb/intro.rst183
-rw-r--r--Documentation/media/uapi/dvb/legacy_dvb_apis.rst32
-rw-r--r--Documentation/media/uapi/dvb/net-remove-if.rst48
-rw-r--r--Documentation/media/uapi/dvb/net-types.rst9
-rw-r--r--Documentation/media/uapi/dvb/net.rst41
-rw-r--r--Documentation/media/uapi/dvb/video-clear-buffer.rst56
-rw-r--r--Documentation/media/uapi/dvb/video-command.rst98
-rw-r--r--Documentation/media/uapi/dvb/video-continue.rst59
-rw-r--r--Documentation/media/uapi/dvb/video-fast-forward.rst76
-rw-r--r--Documentation/media/uapi/dvb/video-fclose.rst55
-rw-r--r--Documentation/media/uapi/dvb/video-fopen.rst115
-rw-r--r--Documentation/media/uapi/dvb/video-freeze.rst63
-rw-r--r--Documentation/media/uapi/dvb/video-fwrite.rst83
-rw-r--r--Documentation/media/uapi/dvb/video-get-capabilities.rst63
-rw-r--r--Documentation/media/uapi/dvb/video-get-event.rst107
-rw-r--r--Documentation/media/uapi/dvb/video-get-frame-count.rst67
-rw-r--r--Documentation/media/uapi/dvb/video-get-pts.rst71
-rw-r--r--Documentation/media/uapi/dvb/video-get-size.rst71
-rw-r--r--Documentation/media/uapi/dvb/video-get-status.rst73
-rw-r--r--Documentation/media/uapi/dvb/video-play.rst59
-rw-r--r--Documentation/media/uapi/dvb/video-select-source.rst77
-rw-r--r--Documentation/media/uapi/dvb/video-set-blank.rst66
-rw-r--r--Documentation/media/uapi/dvb/video-set-display-format.rst62
-rw-r--r--Documentation/media/uapi/dvb/video-set-format.rst85
-rw-r--r--Documentation/media/uapi/dvb/video-set-streamtype.rst63
-rw-r--r--Documentation/media/uapi/dvb/video-slowmotion.rst76
-rw-r--r--Documentation/media/uapi/dvb/video-stillpicture.rst63
-rw-r--r--Documentation/media/uapi/dvb/video-stop.rst76
-rw-r--r--Documentation/media/uapi/dvb/video-try-command.rst68
-rw-r--r--Documentation/media/uapi/dvb/video.rst36
-rw-r--r--Documentation/media/uapi/dvb/video_function_calls.rst35
-rw-r--r--Documentation/media/uapi/dvb/video_types.rst248
-rw-r--r--Documentation/media/uapi/mediactl/media-func-close.rst47
-rw-r--r--Documentation/media/uapi/mediactl/media-header.rst10
-rw-r--r--Documentation/media/uapi/rc/lirc-dev-intro.rst133
-rw-r--r--Documentation/media/uapi/rc/lirc-dev.rst14
-rw-r--r--Documentation/media/uapi/rc/lirc-header.rst10
-rw-r--r--Documentation/media/uapi/rc/lirc-set-rec-timeout-reports.rst49
-rw-r--r--Documentation/media/uapi/v4l/async.rst9
-rw-r--r--Documentation/media/uapi/v4l/audio.rst97
-rw-r--r--Documentation/media/uapi/v4l/buffer.rst809
-rw-r--r--Documentation/media/uapi/v4l/capture-example.rst13
-rw-r--r--Documentation/media/uapi/v4l/common.rst46
-rw-r--r--Documentation/media/uapi/v4l/depth-formats.rst16
-rw-r--r--Documentation/media/uapi/v4l/dev-codec.rst36
-rw-r--r--Documentation/media/uapi/v4l/dev-effect.rst21
-rw-r--r--Documentation/media/uapi/v4l/dev-meta.rst60
-rw-r--r--Documentation/media/uapi/v4l/dev-subdev.rst456
-rw-r--r--Documentation/media/uapi/v4l/dev-teletext.rst34
-rw-r--r--Documentation/media/uapi/v4l/devices.rst28
-rw-r--r--Documentation/media/uapi/v4l/dv-timings.rst38
-rw-r--r--Documentation/media/uapi/v4l/extended-controls.rst4059
-rw-r--r--Documentation/media/uapi/v4l/func-munmap.rst58
-rw-r--r--Documentation/media/uapi/v4l/func-poll.rst117
-rw-r--r--Documentation/media/uapi/v4l/func-read.rst133
-rw-r--r--Documentation/media/uapi/v4l/func-select.rst120
-rw-r--r--Documentation/media/uapi/v4l/libv4l.rst13
-rw-r--r--Documentation/media/uapi/v4l/meta-formats.rst18
-rw-r--r--Documentation/media/uapi/v4l/open.rst158
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-compressed.rst120
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-grey.rst44
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-m420.rst126
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-meta-d4xx.rst210
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-meta-uvc.rst51
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgo.rst168
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-meta-vsp1-hgt.rst120
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv12.rst129
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv12m.rst144
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv12mt.rst60
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv16.rst153
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv16m.rst157
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-nv24.rst95
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-packed-rgb.rst863
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst198
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-rgb.rst23
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-srggb14p.rst127
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-uyvy.rst110
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst111
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-v4l2.rst150
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-vyuy.rst108
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y10.rst65
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y10b.rst33
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y10p.rst33
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y12.rst65
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y16-be.rst69
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y16.rst69
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-y41p.rst151
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv410.rst127
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv411p.rst115
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv420.rst143
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv420m.rst152
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv422m.rst141
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv422p.rst129
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuv444m.rst141
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yuyv.rst118
-rw-r--r--Documentation/media/uapi/v4l/pixfmt-yvyu.rst108
-rw-r--r--Documentation/media/uapi/v4l/rw.rst47
-rw-r--r--Documentation/media/uapi/v4l/video.rst68
-rw-r--r--Documentation/media/uapi/v4l/videodev.rst9
-rw-r--r--Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst210
-rw-r--r--Documentation/media/uapi/v4l/vidioc-encoder-cmd.rst161
-rw-r--r--Documentation/media/uapi/v4l/vidioc-enum-fmt.rst130
-rw-r--r--Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst413
-rw-r--r--Documentation/media/uapi/v4l/vidioc-g-parm.rst264
-rw-r--r--Documentation/media/uapi/v4l/vidioc-g-std.rst74
-rw-r--r--Documentation/media/uapi/v4l/vidioc-prepare-buf.rst61
-rw-r--r--Documentation/media/uapi/v4l/vidioc-reqbufs.rst145
-rw-r--r--Documentation/media/uapi/v4l/vidioc-subdev-enum-frame-size.rst118
-rw-r--r--Documentation/media/uapi/v4l/vidioc-subdev-enum-mbus-code.rst91
-rw-r--r--Documentation/media/uapi/v4l/vidioc-subdev-g-crop.rst118
-rw-r--r--Documentation/media/uapi/v4l/vidioc-subdev-g-frame-interval.rst110
-rw-r--r--Documentation/media/uapi/v4l/vidioc-subdev-g-selection.rst118
-rw-r--r--Documentation/media/uapi/v4l/yuv-formats.rst56
-rw-r--r--Documentation/media/v4l-drivers/cardlist.rst18
-rw-r--r--Documentation/media/v4l-drivers/cpia2.rst193
-rw-r--r--Documentation/media/v4l-drivers/cx18.rst37
-rw-r--r--Documentation/media/v4l-drivers/cx2341x.rst3858
-rw-r--r--Documentation/media/v4l-drivers/cx88.rst163
-rw-r--r--Documentation/media/v4l-drivers/davinci-vpbe.rst95
-rw-r--r--Documentation/media/v4l-drivers/imx.rst617
-rw-r--r--Documentation/media/v4l-drivers/index.rst67
-rw-r--r--Documentation/media/v4l-drivers/meye.rst132
-rw-r--r--Documentation/media/v4l-drivers/omap3isp.rst282
-rw-r--r--Documentation/media/v4l-drivers/omap4_camera.rst60
-rw-r--r--Documentation/media/v4l-drivers/saa7134.rst113
-rw-r--r--Documentation/media/v4l-drivers/soc-camera.rst169
-rw-r--r--Documentation/media/v4l-drivers/tm6000-cardlist.rst81
-rw-r--r--Documentation/media/v4l-drivers/usbvision-cardlist.rst281
-rw-r--r--Documentation/media/v4l-drivers/v4l-with-ir.rst73
-rw-r--r--Documentation/media/v4l-drivers/zr364xx.rst108
-rw-r--r--Documentation/media/video.h.rst.exceptions37
-rw-r--r--Documentation/memory-barriers.txt687
-rw-r--r--Documentation/memory-devices/ti-emif.txt57
-rw-r--r--Documentation/men-chameleon-bus.txt175
-rw-r--r--Documentation/mhi/index.rst18
-rw-r--r--Documentation/mhi/mhi.rst218
-rw-r--r--Documentation/mhi/topology.rst60
-rw-r--r--Documentation/mic/mic_overview.txt81
-rw-r--r--Documentation/mic/scif_overview.txt98
-rw-r--r--Documentation/mips/AU1xxx_IDE.README115
-rw-r--r--Documentation/misc-devices/ad525x_dpot.rst57
-rw-r--r--Documentation/misc-devices/ad525x_dpot.txt57
-rw-r--r--Documentation/misc-devices/amd-sbi.rst99
-rw-r--r--Documentation/misc-devices/apds990x.rst128
-rw-r--r--Documentation/misc-devices/apds990x.txt111
-rw-r--r--Documentation/misc-devices/bh1770glc.rst135
-rw-r--r--Documentation/misc-devices/bh1770glc.txt116
-rw-r--r--Documentation/misc-devices/c2port.rst94
-rw-r--r--Documentation/misc-devices/c2port.txt90
-rw-r--r--Documentation/misc-devices/dw-xdata-pcie.rst64
-rw-r--r--Documentation/misc-devices/eeprom96
-rw-r--r--Documentation/misc-devices/ibmvmc.rst1
-rw-r--r--Documentation/misc-devices/ics932s40131
-rw-r--r--Documentation/misc-devices/ics932s401.rst36
-rw-r--r--Documentation/misc-devices/index.rst31
-rw-r--r--Documentation/misc-devices/isl2900362
-rw-r--r--Documentation/misc-devices/isl29003.rst75
-rw-r--r--Documentation/misc-devices/lis3lv02d93
-rw-r--r--Documentation/misc-devices/lis3lv02d.rst99
-rw-r--r--Documentation/misc-devices/max6875110
-rw-r--r--Documentation/misc-devices/max6875.rst136
-rw-r--r--Documentation/misc-devices/mei/mei-client-bus.txt141
-rw-r--r--Documentation/misc-devices/mei/mei.txt266
-rw-r--r--Documentation/misc-devices/mrvl_cn10k_dpi.rst52
-rw-r--r--Documentation/misc-devices/oxsemi-tornado.rst131
-rw-r--r--Documentation/misc-devices/pci-endpoint-test.rst56
-rw-r--r--Documentation/misc-devices/pci-endpoint-test.txt41
-rw-r--r--Documentation/misc-devices/spear-pcie-gadget.rst170
-rw-r--r--Documentation/misc-devices/spear-pcie-gadget.txt130
-rw-r--r--Documentation/misc-devices/tps6594-pfsm.rst87
-rw-r--r--Documentation/misc-devices/uacce.rst176
-rw-r--r--Documentation/misc-devices/xilinx_sdfec.rst292
-rw-r--r--Documentation/mm/active_mm.rst95
-rw-r--r--Documentation/mm/allocation-profiling.rst104
-rw-r--r--Documentation/mm/arch_pgtable_helpers.rst250
-rw-r--r--Documentation/mm/balance.rst100
-rw-r--r--Documentation/mm/bootmem.rst5
-rw-r--r--Documentation/mm/damon/api.rst20
-rw-r--r--Documentation/mm/damon/design.rst818
-rw-r--r--Documentation/mm/damon/faq.rst27
-rw-r--r--Documentation/mm/damon/index.rst45
-rw-r--r--Documentation/mm/damon/maintainer-profile.rst103
-rw-r--r--Documentation/mm/damon/monitoring_intervals_tuning_example.rst247
-rw-r--r--Documentation/mm/free_page_reporting.rst38
-rw-r--r--Documentation/mm/highmem.rst213
-rw-r--r--Documentation/mm/hmm.rst441
-rw-r--r--Documentation/mm/hugetlbfs_reserv.rst595
-rw-r--r--Documentation/mm/hwpoison.rst182
-rw-r--r--Documentation/mm/index.rst65
-rw-r--r--Documentation/mm/ksm.rst85
-rw-r--r--Documentation/mm/memory-model.rst175
-rw-r--r--Documentation/mm/mmu_notifier.rst97
-rw-r--r--Documentation/mm/multigen_lru.rst269
-rw-r--r--Documentation/mm/numa.rst148
-rw-r--r--Documentation/mm/oom.rst5
-rw-r--r--Documentation/mm/overcommit-accounting.rst85
-rw-r--r--Documentation/mm/page_allocation.rst5
-rw-r--r--Documentation/mm/page_cache.rst15
-rw-r--r--Documentation/mm/page_frags.rst43
-rw-r--r--Documentation/mm/page_migration.rst207
-rw-r--r--Documentation/mm/page_owner.rst235
-rw-r--r--Documentation/mm/page_reclaim.rst5
-rw-r--r--Documentation/mm/page_table_check.rst80
-rw-r--r--Documentation/mm/page_tables.rst281
-rw-r--r--Documentation/mm/physical_memory.rst635
-rw-r--r--Documentation/mm/process_addrs.rst909
-rw-r--r--Documentation/mm/remap_file_pages.rst31
-rw-r--r--Documentation/mm/shmfs.rst5
-rw-r--r--Documentation/mm/slab.rst12
-rw-r--r--Documentation/mm/split_page_table_lock.rst107
-rw-r--r--Documentation/mm/swap-table.rst69
-rw-r--r--Documentation/mm/swap.rst5
-rw-r--r--Documentation/mm/transhuge.rst192
-rw-r--r--Documentation/mm/unevictable-lru.rst559
-rw-r--r--Documentation/mm/vmalloc.rst5
-rw-r--r--Documentation/mm/vmalloced-kernel-stacks.rst153
-rw-r--r--Documentation/mm/vmemmap_dedup.rst231
-rw-r--r--Documentation/mm/zsmalloc.rst269
-rw-r--r--Documentation/mmc/mmc-async-req.txt87
-rw-r--r--Documentation/mmc/mmc-dev-attrs.txt77
-rw-r--r--Documentation/mmc/mmc-dev-parts.txt40
-rw-r--r--Documentation/mmc/mmc-tools.txt34
-rw-r--r--Documentation/mtd/intel-spi.txt88
-rw-r--r--Documentation/mtd/nand_ecc.txt714
-rw-r--r--Documentation/mtd/spi-nor.txt65
-rw-r--r--Documentation/namespaces/compatibility-list.txt39
-rw-r--r--Documentation/namespaces/resource-control.txt14
-rw-r--r--Documentation/netlabel/cipso_ipv4.rst56
-rw-r--r--Documentation/netlabel/cipso_ipv4.txt49
-rw-r--r--Documentation/netlabel/draft_ietf.rst5
-rw-r--r--Documentation/netlabel/index.rst21
-rw-r--r--Documentation/netlabel/introduction.rst52
-rw-r--r--Documentation/netlabel/introduction.txt46
-rw-r--r--Documentation/netlabel/lsm_interface.rst53
-rw-r--r--Documentation/netlabel/lsm_interface.txt47
-rw-r--r--Documentation/netlink/genetlink-c.yaml406
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml473
-rw-r--r--Documentation/netlink/genetlink.yaml353
-rw-r--r--Documentation/netlink/netlink-raw.yaml517
-rw-r--r--Documentation/netlink/specs/binder.yaml93
-rw-r--r--Documentation/netlink/specs/conntrack.yaml642
-rw-r--r--Documentation/netlink/specs/devlink.yaml2327
-rw-r--r--Documentation/netlink/specs/dpll.yaml672
-rw-r--r--Documentation/netlink/specs/ethtool.yaml2764
-rw-r--r--Documentation/netlink/specs/fou.yaml132
-rw-r--r--Documentation/netlink/specs/handshake.yaml132
-rw-r--r--Documentation/netlink/specs/index.rst13
-rw-r--r--Documentation/netlink/specs/lockd.yaml45
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml395
-rw-r--r--Documentation/netlink/specs/net_shaper.yaml363
-rw-r--r--Documentation/netlink/specs/netdev.yaml784
-rw-r--r--Documentation/netlink/specs/nfsd.yaml224
-rw-r--r--Documentation/netlink/specs/nftables.yaml1532
-rw-r--r--Documentation/netlink/specs/nl80211.yaml1933
-rw-r--r--Documentation/netlink/specs/nlctrl.yaml208
-rw-r--r--Documentation/netlink/specs/ovpn.yaml508
-rw-r--r--Documentation/netlink/specs/ovs_datapath.yaml160
-rw-r--r--Documentation/netlink/specs/ovs_flow.yaml1002
-rw-r--r--Documentation/netlink/specs/ovs_vport.yaml172
-rw-r--r--Documentation/netlink/specs/psp.yaml187
-rw-r--r--Documentation/netlink/specs/rt-addr.yaml194
-rw-r--r--Documentation/netlink/specs/rt-link.yaml2521
-rw-r--r--Documentation/netlink/specs/rt-neigh.yaml453
-rw-r--r--Documentation/netlink/specs/rt-route.yaml324
-rw-r--r--Documentation/netlink/specs/rt-rule.yaml273
-rw-r--r--Documentation/netlink/specs/tc.yaml4210
-rw-r--r--Documentation/netlink/specs/tcp_metrics.yaml169
-rw-r--r--Documentation/netlink/specs/team.yaml206
-rw-r--r--Documentation/networking/3c509.txt213
-rw-r--r--Documentation/networking/6lowpan.rst53
-rw-r--r--Documentation/networking/6lowpan.txt50
-rw-r--r--Documentation/networking/6pack.rst191
-rw-r--r--Documentation/networking/6pack.txt175
-rw-r--r--Documentation/networking/LICENSE.qla3xxx46
-rw-r--r--Documentation/networking/LICENSE.qlcnic288
-rw-r--r--Documentation/networking/LICENSE.qlge288
-rw-r--r--Documentation/networking/PLIP.txt215
-rw-r--r--Documentation/networking/README.ipw2100293
-rw-r--r--Documentation/networking/README.ipw2200472
-rw-r--r--Documentation/networking/README.sb1000207
-rw-r--r--Documentation/networking/af_xdp.rst617
-rw-r--r--Documentation/networking/altera_tse.txt263
-rw-r--r--Documentation/networking/arcnet-hardware.rst3234
-rw-r--r--Documentation/networking/arcnet-hardware.txt3133
-rw-r--r--Documentation/networking/arcnet.rst594
-rw-r--r--Documentation/networking/arcnet.txt556
-rw-r--r--Documentation/networking/atm.rst14
-rw-r--r--Documentation/networking/atm.txt8
-rw-r--r--Documentation/networking/ax25.rst16
-rw-r--r--Documentation/networking/ax25.txt10
-rw-r--r--Documentation/networking/bareudp.rst59
-rw-r--r--Documentation/networking/batman-adv.rst122
-rw-r--r--Documentation/networking/baycom.txt158
-rw-r--r--Documentation/networking/bonding.rst2917
-rw-r--r--Documentation/networking/bonding.txt2827
-rw-r--r--Documentation/networking/bridge.rst334
-rw-r--r--Documentation/networking/caif/Linux-CAIF.txt175
-rw-r--r--Documentation/networking/caif/README109
-rw-r--r--Documentation/networking/caif/caif.rst138
-rw-r--r--Documentation/networking/caif/index.rst12
-rw-r--r--Documentation/networking/caif/linux_caif.rst195
-rw-r--r--Documentation/networking/caif/spi_porting.txt208
-rw-r--r--Documentation/networking/can.rst162
-rw-r--r--Documentation/networking/can_ucan_protocol.rst6
-rw-r--r--Documentation/networking/cdc_mbim.rst355
-rw-r--r--Documentation/networking/cdc_mbim.txt339
-rw-r--r--Documentation/networking/checksum-offloads.rst143
-rw-r--r--Documentation/networking/checksum-offloads.txt122
-rw-r--r--Documentation/networking/conf.py10
-rw-r--r--Documentation/networking/cops.txt63
-rw-r--r--Documentation/networking/cs89x0.txt624
-rw-r--r--Documentation/networking/cxacru.txt100
-rw-r--r--Documentation/networking/cxgb.txt352
-rw-r--r--Documentation/networking/dccp.txt207
-rw-r--r--Documentation/networking/dctcp.rst52
-rw-r--r--Documentation/networking/dctcp.txt44
-rw-r--r--Documentation/networking/de4x5.txt178
-rw-r--r--Documentation/networking/decnet.txt232
-rw-r--r--Documentation/networking/defza.txt57
-rw-r--r--Documentation/networking/device_drivers/atm/cxacru-cf.py (renamed from Documentation/networking/cxacru-cf.py)0
-rw-r--r--Documentation/networking/device_drivers/atm/cxacru.rst120
-rw-r--r--Documentation/networking/device_drivers/atm/fore200e.rst66
-rw-r--r--Documentation/networking/device_drivers/atm/index.rst20
-rw-r--r--Documentation/networking/device_drivers/atm/iphase.rst193
-rw-r--r--Documentation/networking/device_drivers/can/can327.rst331
-rw-r--r--Documentation/networking/device_drivers/can/ctu/ctucanfd-driver.rst638
-rw-r--r--Documentation/networking/device_drivers/can/ctu/fsm_txt_buffer_user.svg151
-rw-r--r--Documentation/networking/device_drivers/can/freescale/flexcan.rst54
-rw-r--r--Documentation/networking/device_drivers/can/index.rst22
-rw-r--r--Documentation/networking/device_drivers/cellular/index.rst18
-rw-r--r--Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst196
-rw-r--r--Documentation/networking/device_drivers/ethernet/3com/3c509.rst249
-rw-r--r--Documentation/networking/device_drivers/ethernet/3com/vortex.rst459
-rw-r--r--Documentation/networking/device_drivers/ethernet/altera/altera_tse.rst286
-rw-r--r--Documentation/networking/device_drivers/ethernet/amazon/ena.rst471
-rw-r--r--Documentation/networking/device_drivers/ethernet/amd/pds_core.rst139
-rw-r--r--Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst85
-rw-r--r--Documentation/networking/device_drivers/ethernet/amd/pds_vfio_pci.rst79
-rw-r--r--Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst556
-rw-r--r--Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst393
-rw-r--r--Documentation/networking/device_drivers/ethernet/cirrus/cs89x0.rst647
-rw-r--r--Documentation/networking/device_drivers/ethernet/davicom/dm9000.rst171
-rw-r--r--Documentation/networking/device_drivers/ethernet/dec/dmfe.rst71
-rw-r--r--Documentation/networking/device_drivers/ethernet/dlink/dl2k.rst314
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa.rst269
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst (renamed from Documentation/networking/dpaa2/dpio-driver.rst)25
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst (renamed from Documentation/networking/dpaa2/ethernet-driver.rst)3
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst12
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst194
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst406
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst217
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/gianfar.rst51
-rw-r--r--Documentation/networking/device_drivers/ethernet/google/gve.rst175
-rw-r--r--Documentation/networking/device_drivers/ethernet/huawei/hinic.rst128
-rw-r--r--Documentation/networking/device_drivers/ethernet/huawei/hinic3.rst137
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst72
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/e100.rst (renamed from Documentation/networking/e100.rst)26
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/e1000.rst (renamed from Documentation/networking/e1000.rst)20
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/e1000e.rst (renamed from Documentation/networking/e1000e.rst)20
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/fm10k.rst (renamed from Documentation/networking/fm10k.rst)18
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/i40e.rst778
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/iavf.rst326
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst1219
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/idpf.rst160
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/igb.rst (renamed from Documentation/networking/igb.rst)39
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/igbvf.rst (renamed from Documentation/networking/igbvf.rst)14
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst552
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst (renamed from Documentation/networking/ixgbevf.rst)12
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst41
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst24
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst433
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst1380
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/index.rst25
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst171
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst281
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/tracepoints.rst229
-rw-r--r--Documentation/networking/device_drivers/ethernet/meta/fbnic.rst192
-rw-r--r--Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst120
-rw-r--r--Documentation/networking/device_drivers/ethernet/neterion/s2io.rst196
-rw-r--r--Documentation/networking/device_drivers/ethernet/netronome/nfp.rst374
-rw-r--r--Documentation/networking/device_drivers/ethernet/pensando/ionic.rst306
-rw-r--r--Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst52
-rw-r--r--Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst194
-rw-r--r--Documentation/networking/device_drivers/ethernet/smsc/smc9.rst48
-rw-r--r--Documentation/networking/device_drivers/ethernet/stmicro/stmmac.rst700
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/am65_nuss_cpsw_switchdev.rst143
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/cpsw.rst587
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/cpsw_switchdev.rst242
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst56
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/tlan.rst140
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/ngbe.rst14
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst16
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/txgbe.rst20
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst16
-rw-r--r--Documentation/networking/device_drivers/fddi/defza.rst63
-rw-r--r--Documentation/networking/device_drivers/fddi/index.rst19
-rw-r--r--Documentation/networking/device_drivers/fddi/skfp.rst253
-rw-r--r--Documentation/networking/device_drivers/hamradio/baycom.rst174
-rw-r--r--Documentation/networking/device_drivers/hamradio/index.rst19
-rw-r--r--Documentation/networking/device_drivers/hamradio/z8530drv.rst686
-rw-r--r--Documentation/networking/device_drivers/index.rst25
-rw-r--r--Documentation/networking/device_drivers/wifi/index.rst19
-rw-r--r--Documentation/networking/device_drivers/wifi/intel/ipw2100.rst323
-rw-r--r--Documentation/networking/device_drivers/wifi/intel/ipw2200.rst526
-rw-r--r--Documentation/networking/device_drivers/wwan/index.rst19
-rw-r--r--Documentation/networking/device_drivers/wwan/iosm.rst96
-rw-r--r--Documentation/networking/device_drivers/wwan/t7xx.rst214
-rw-r--r--Documentation/networking/devlink-params-bnxt.txt18
-rw-r--r--Documentation/networking/devlink-params.txt42
-rw-r--r--Documentation/networking/devlink/am65-nuss-cpsw-switch.rst26
-rw-r--r--Documentation/networking/devlink/bnxt.rst84
-rw-r--r--Documentation/networking/devlink/devlink-dpipe.rst252
-rw-r--r--Documentation/networking/devlink/devlink-eswitch-attr.rst76
-rw-r--r--Documentation/networking/devlink/devlink-flash.rst121
-rw-r--r--Documentation/networking/devlink/devlink-health.rst138
-rw-r--r--Documentation/networking/devlink/devlink-info.rst224
-rw-r--r--Documentation/networking/devlink/devlink-linecard.rst122
-rw-r--r--Documentation/networking/devlink/devlink-params.rst153
-rw-r--r--Documentation/networking/devlink/devlink-port.rst484
-rw-r--r--Documentation/networking/devlink/devlink-region.rst83
-rw-r--r--Documentation/networking/devlink/devlink-reload.rst90
-rw-r--r--Documentation/networking/devlink/devlink-resource.rst76
-rw-r--r--Documentation/networking/devlink/devlink-selftests.rst38
-rw-r--r--Documentation/networking/devlink/devlink-trap.rst640
-rw-r--r--Documentation/networking/devlink/etas_es58x.rst36
-rw-r--r--Documentation/networking/devlink/hns3.rst30
-rw-r--r--Documentation/networking/devlink/i40e.rst59
-rw-r--r--Documentation/networking/devlink/ice.rst487
-rw-r--r--Documentation/networking/devlink/index.rst103
-rw-r--r--Documentation/networking/devlink/ionic.rst29
-rw-r--r--Documentation/networking/devlink/iosm.rst162
-rw-r--r--Documentation/networking/devlink/ixgbe.rst171
-rw-r--r--Documentation/networking/devlink/kvaser_pciefd.rst24
-rw-r--r--Documentation/networking/devlink/kvaser_usb.rst33
-rw-r--r--Documentation/networking/devlink/mlx4.rst56
-rw-r--r--Documentation/networking/devlink/mlx5.rst407
-rw-r--r--Documentation/networking/devlink/mlxsw.rst105
-rw-r--r--Documentation/networking/devlink/mv88e6xxx.rst28
-rw-r--r--Documentation/networking/devlink/netdevsim.rst99
-rw-r--r--Documentation/networking/devlink/nfp.rst68
-rw-r--r--Documentation/networking/devlink/octeontx2.rst79
-rw-r--r--Documentation/networking/devlink/prestera.rst141
-rw-r--r--Documentation/networking/devlink/qed.rst26
-rw-r--r--Documentation/networking/devlink/sfc.rst71
-rw-r--r--Documentation/networking/devlink/ti-cpsw-switch.rst31
-rw-r--r--Documentation/networking/devlink/zl3073x.rst65
-rw-r--r--Documentation/networking/devmem.rst419
-rw-r--r--Documentation/networking/diagnostic/index.rst17
-rw-r--r--Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst784
-rw-r--r--Documentation/networking/dl2k.txt282
-rw-r--r--Documentation/networking/dm9000.txt167
-rw-r--r--Documentation/networking/dmfe.txt66
-rw-r--r--Documentation/networking/dns_resolver.rst153
-rw-r--r--Documentation/networking/dns_resolver.txt157
-rw-r--r--Documentation/networking/dpaa.txt260
-rw-r--r--Documentation/networking/dpaa2/index.rst10
-rw-r--r--Documentation/networking/dpaa2/overview.rst405
-rw-r--r--Documentation/networking/driver.rst127
-rw-r--r--Documentation/networking/driver.txt93
-rw-r--r--Documentation/networking/dsa/b53.rst183
-rw-r--r--Documentation/networking/dsa/bcm_sf2.rst115
-rw-r--r--Documentation/networking/dsa/bcm_sf2.txt114
-rw-r--r--Documentation/networking/dsa/configuration.rst458
-rw-r--r--Documentation/networking/dsa/dsa.rst1133
-rw-r--r--Documentation/networking/dsa/dsa.txt601
-rw-r--r--Documentation/networking/dsa/index.rst13
-rw-r--r--Documentation/networking/dsa/lan9303.rst37
-rw-r--r--Documentation/networking/dsa/lan9303.txt37
-rw-r--r--Documentation/networking/dsa/sja1105.rst445
-rw-r--r--Documentation/networking/ena.txt305
-rw-r--r--Documentation/networking/eql.rst373
-rw-r--r--Documentation/networking/eql.txt528
-rw-r--r--Documentation/networking/ethtool-netlink.rst2571
-rw-r--r--Documentation/networking/fib_trie.rst149
-rw-r--r--Documentation/networking/fib_trie.txt145
-rw-r--r--Documentation/networking/filter.rst685
-rw-r--r--Documentation/networking/filter.txt1542
-rw-r--r--Documentation/networking/fore200e.txt64
-rw-r--r--Documentation/networking/framerelay.txt39
-rw-r--r--Documentation/networking/gen_stats.rst129
-rw-r--r--Documentation/networking/gen_stats.txt119
-rw-r--r--Documentation/networking/generic-hdlc.rst170
-rw-r--r--Documentation/networking/generic-hdlc.txt132
-rw-r--r--Documentation/networking/generic_netlink.rst9
-rw-r--r--Documentation/networking/generic_netlink.txt3
-rw-r--r--Documentation/networking/gianfar.txt42
-rw-r--r--Documentation/networking/gtp.rst251
-rw-r--r--Documentation/networking/gtp.txt230
-rw-r--r--Documentation/networking/hinic.txt125
-rw-r--r--Documentation/networking/i40e.rst770
-rw-r--r--Documentation/networking/iavf.rst281
-rw-r--r--Documentation/networking/ice.rst45
-rw-r--r--Documentation/networking/ieee802154.rst184
-rw-r--r--Documentation/networking/ieee802154.txt177
-rw-r--r--Documentation/networking/ila.rst296
-rw-r--r--Documentation/networking/ila.txt285
-rw-r--r--Documentation/networking/index.rst142
-rw-r--r--Documentation/networking/ioam6-sysctl.rst26
-rw-r--r--Documentation/networking/iou-zcrx.rst202
-rw-r--r--Documentation/networking/ip-sysctl.rst3730
-rw-r--r--Documentation/networking/ip-sysctl.txt2203
-rw-r--r--Documentation/networking/ip_dynaddr.rst40
-rw-r--r--Documentation/networking/ip_dynaddr.txt29
-rw-r--r--Documentation/networking/ipddp.txt73
-rw-r--r--Documentation/networking/iphase.txt158
-rw-r--r--Documentation/networking/ipsec.rst46
-rw-r--r--Documentation/networking/ipsec.txt38
-rw-r--r--Documentation/networking/ipv6.rst78
-rw-r--r--Documentation/networking/ipv6.txt72
-rw-r--r--Documentation/networking/ipvlan.rst189
-rw-r--r--Documentation/networking/ipvlan.txt146
-rw-r--r--Documentation/networking/ipvs-sysctl.rst332
-rw-r--r--Documentation/networking/ipvs-sysctl.txt294
-rw-r--r--Documentation/networking/iso15765-2.rst386
-rw-r--r--Documentation/networking/ixgb.rst467
-rw-r--r--Documentation/networking/ixgbe.rst527
-rw-r--r--Documentation/networking/j1939.rst1135
-rw-r--r--Documentation/networking/kapi.rst33
-rw-r--r--Documentation/networking/kcm.rst290
-rw-r--r--Documentation/networking/kcm.txt285
-rw-r--r--Documentation/networking/l2tp.rst786
-rw-r--r--Documentation/networking/l2tp.txt345
-rw-r--r--Documentation/networking/lapb-module.rst305
-rw-r--r--Documentation/networking/lapb-module.txt263
-rw-r--r--Documentation/networking/ltpc.txt131
-rw-r--r--Documentation/networking/mac80211-injection.rst106
-rw-r--r--Documentation/networking/mac80211-injection.txt97
-rw-r--r--Documentation/networking/mac80211_hwsim/README68
-rw-r--r--Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst80
-rw-r--r--Documentation/networking/mctp.rst320
-rw-r--r--Documentation/networking/mpls-sysctl.rst57
-rw-r--r--Documentation/networking/mpls-sysctl.txt48
-rw-r--r--Documentation/networking/mptcp-sysctl.rst153
-rw-r--r--Documentation/networking/mptcp.rst156
-rw-r--r--Documentation/networking/msg_zerocopy.rst21
-rw-r--r--Documentation/networking/multi-pf-netdev.rst174
-rw-r--r--Documentation/networking/multiqueue.rst78
-rw-r--r--Documentation/networking/multiqueue.txt79
-rw-r--r--Documentation/networking/napi.rst457
-rw-r--r--Documentation/networking/net_cachelines/index.rst16
-rw-r--r--Documentation/networking/net_cachelines/inet_connection_sock.rst51
-rw-r--r--Documentation/networking/net_cachelines/inet_sock.rst46
-rw-r--r--Documentation/networking/net_cachelines/net_device.rst193
-rw-r--r--Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst162
-rw-r--r--Documentation/networking/net_cachelines/snmp.rst141
-rw-r--r--Documentation/networking/net_cachelines/tcp_sock.rst170
-rw-r--r--Documentation/networking/net_dim.rst218
-rw-r--r--Documentation/networking/net_dim.txt174
-rw-r--r--Documentation/networking/net_failover.rst117
-rw-r--r--Documentation/networking/netconsole.rst465
-rw-r--r--Documentation/networking/netconsole.txt210
-rw-r--r--Documentation/networking/netdev-FAQ.rst259
-rw-r--r--Documentation/networking/netdev-features.rst195
-rw-r--r--Documentation/networking/netdev-features.txt181
-rw-r--r--Documentation/networking/netdevices.rst418
-rw-r--r--Documentation/networking/netdevices.txt104
-rw-r--r--Documentation/networking/netfilter-sysctl.rst17
-rw-r--r--Documentation/networking/netfilter-sysctl.txt10
-rw-r--r--Documentation/networking/netif-msg.rst95
-rw-r--r--Documentation/networking/netif-msg.txt79
-rw-r--r--Documentation/networking/netmem.rst98
-rw-r--r--Documentation/networking/netvsc.txt84
-rw-r--r--Documentation/networking/nexthop-group-resilient.rst293
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.rst231
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.txt163
-rw-r--r--Documentation/networking/nf_flowtable.rst235
-rw-r--r--Documentation/networking/nf_flowtable.txt112
-rw-r--r--Documentation/networking/nfc.rst130
-rw-r--r--Documentation/networking/nfc.txt128
-rw-r--r--Documentation/networking/oa-tc6-framework.rst497
-rw-r--r--Documentation/networking/openvswitch.rst251
-rw-r--r--Documentation/networking/openvswitch.txt248
-rw-r--r--Documentation/networking/operstates.rst187
-rw-r--r--Documentation/networking/operstates.txt162
-rw-r--r--Documentation/networking/packet_mmap.rst1084
-rw-r--r--Documentation/networking/packet_mmap.txt1061
-rw-r--r--Documentation/networking/page_pool.rst199
-rw-r--r--Documentation/networking/phonet.rst230
-rw-r--r--Documentation/networking/phonet.txt214
-rw-r--r--Documentation/networking/phy-link-topology.rst121
-rw-r--r--Documentation/networking/phy.rst564
-rw-r--r--Documentation/networking/phy.txt427
-rw-r--r--Documentation/networking/pktgen.rst422
-rw-r--r--Documentation/networking/pktgen.txt400
-rw-r--r--Documentation/networking/plip.rst222
-rw-r--r--Documentation/networking/ppp_generic.rst456
-rw-r--r--Documentation/networking/ppp_generic.txt426
-rw-r--r--Documentation/networking/proc_net_tcp.rst57
-rw-r--r--Documentation/networking/proc_net_tcp.txt48
-rw-r--r--Documentation/networking/pse-pd/index.rst10
-rw-r--r--Documentation/networking/pse-pd/introduction.rst73
-rw-r--r--Documentation/networking/pse-pd/pse-pi.rst301
-rw-r--r--Documentation/networking/psp.rst183
-rw-r--r--Documentation/networking/radiotap-headers.rst159
-rw-r--r--Documentation/networking/radiotap-headers.txt152
-rw-r--r--Documentation/networking/ray_cs.txt150
-rw-r--r--Documentation/networking/rds.rst448
-rw-r--r--Documentation/networking/rds.txt423
-rw-r--r--Documentation/networking/regulatory.rst209
-rw-r--r--Documentation/networking/regulatory.txt204
-rw-r--r--Documentation/networking/representors.rst262
-rw-r--r--Documentation/networking/rmnet.txt82
-rw-r--r--Documentation/networking/rxrpc.rst1162
-rw-r--r--Documentation/networking/rxrpc.txt1179
-rw-r--r--Documentation/networking/s2io.txt141
-rw-r--r--Documentation/networking/scaling.rst589
-rw-r--r--Documentation/networking/scaling.txt484
-rw-r--r--Documentation/networking/sctp.rst42
-rw-r--r--Documentation/networking/sctp.txt35
-rw-r--r--Documentation/networking/secid.rst20
-rw-r--r--Documentation/networking/secid.txt14
-rw-r--r--Documentation/networking/seg6-sysctl.rst39
-rw-r--r--Documentation/networking/seg6-sysctl.txt18
-rw-r--r--Documentation/networking/segmentation-offloads.rst190
-rw-r--r--Documentation/networking/segmentation-offloads.txt170
-rw-r--r--Documentation/networking/sfp-phylink.rst405
-rw-r--r--Documentation/networking/skbuff.rst37
-rw-r--r--Documentation/networking/skfp.txt220
-rw-r--r--Documentation/networking/smc-sysctl.rst73
-rw-r--r--Documentation/networking/smc9.txt42
-rw-r--r--Documentation/networking/snmp_counter.rst1789
-rw-r--r--Documentation/networking/spider_net.txt204
-rw-r--r--Documentation/networking/sriov.rst25
-rw-r--r--Documentation/networking/statistics.rst236
-rw-r--r--Documentation/networking/stmmac.txt401
-rw-r--r--Documentation/networking/strparser.rst247
-rw-r--r--Documentation/networking/strparser.txt207
-rw-r--r--Documentation/networking/switchdev.rst564
-rw-r--r--Documentation/networking/switchdev.txt394
-rw-r--r--Documentation/networking/sysfs-tagging.rst48
-rw-r--r--Documentation/networking/tc-actions-env-rules.rst29
-rw-r--r--Documentation/networking/tc-actions-env-rules.txt24
-rw-r--r--Documentation/networking/tc-queue-filters.rst37
-rw-r--r--Documentation/networking/tcp-thin.rst52
-rw-r--r--Documentation/networking/tcp-thin.txt47
-rw-r--r--Documentation/networking/tcp_ao.rst453
-rw-r--r--Documentation/networking/team.rst8
-rw-r--r--Documentation/networking/team.txt2
-rw-r--r--Documentation/networking/ti-cpsw.txt541
-rw-r--r--Documentation/networking/timestamping.rst851
-rw-r--r--Documentation/networking/timestamping.txt538
-rw-r--r--Documentation/networking/tipc.rst215
-rw-r--r--Documentation/networking/tlan.txt117
-rw-r--r--Documentation/networking/tls-handshake.rst222
-rw-r--r--Documentation/networking/tls-offload-layers.svg1
-rw-r--r--Documentation/networking/tls-offload-reorder-bad.svg1
-rw-r--r--Documentation/networking/tls-offload-reorder-good.svg1
-rw-r--r--Documentation/networking/tls-offload.rst540
-rw-r--r--Documentation/networking/tls.rst326
-rw-r--r--Documentation/networking/tls.txt197
-rw-r--r--Documentation/networking/tproxy.rst109
-rw-r--r--Documentation/networking/tproxy.txt104
-rw-r--r--Documentation/networking/tuntap.rst259
-rw-r--r--Documentation/networking/tuntap.txt227
-rw-r--r--Documentation/networking/udplite.rst291
-rw-r--r--Documentation/networking/udplite.txt278
-rw-r--r--Documentation/networking/vortex.txt448
-rw-r--r--Documentation/networking/vrf.rst464
-rw-r--r--Documentation/networking/vrf.txt404
-rw-r--r--Documentation/networking/vxge.txt93
-rw-r--r--Documentation/networking/vxlan.rst88
-rw-r--r--Documentation/networking/vxlan.txt51
-rw-r--r--Documentation/networking/x25-iface.rst81
-rw-r--r--Documentation/networking/x25-iface.txt123
-rw-r--r--Documentation/networking/x25.rst46
-rw-r--r--Documentation/networking/x25.txt44
-rw-r--r--Documentation/networking/xdp-rx-metadata.rst161
-rw-r--r--Documentation/networking/xfrm_device.rst202
-rw-r--r--Documentation/networking/xfrm_device.txt139
-rw-r--r--Documentation/networking/xfrm_proc.rst119
-rw-r--r--Documentation/networking/xfrm_proc.txt82
-rw-r--r--Documentation/networking/xfrm_sync.rst189
-rw-r--r--Documentation/networking/xfrm_sync.txt169
-rw-r--r--Documentation/networking/xfrm_sysctl.rst11
-rw-r--r--Documentation/networking/xfrm_sysctl.txt4
-rw-r--r--Documentation/networking/xsk-tx-metadata.rst147
-rw-r--r--Documentation/networking/z8530book.rst256
-rw-r--r--Documentation/networking/z8530drv.txt657
-rw-r--r--Documentation/nfc/nfc-hci.txt290
-rw-r--r--Documentation/nfc/nfc-pn544.txt32
-rw-r--r--Documentation/nios2/README23
-rw-r--r--Documentation/ntb.txt230
-rw-r--r--Documentation/numastat.txt30
-rw-r--r--Documentation/nvdimm/btt.txt273
-rw-r--r--Documentation/nvdimm/maintainer-entry-profile.rst60
-rw-r--r--Documentation/nvdimm/nvdimm.txt815
-rw-r--r--Documentation/nvme/feature-and-quirk-policy.rst77
-rw-r--r--Documentation/nvme/index.rst12
-rw-r--r--Documentation/nvme/nvme-pci-endpoint-target.rst368
-rw-r--r--Documentation/nvmem/nvmem.txt183
-rw-r--r--Documentation/openrisc/README110
-rw-r--r--Documentation/openrisc/TODO12
-rw-r--r--Documentation/padata.txt163
-rw-r--r--Documentation/parisc/debugging39
-rw-r--r--Documentation/parisc/registers129
-rw-r--r--Documentation/pcmcia/devicetable.rst37
-rw-r--r--Documentation/pcmcia/devicetable.txt33
-rw-r--r--Documentation/pcmcia/driver-changes.rst160
-rw-r--r--Documentation/pcmcia/driver-changes.txt149
-rw-r--r--Documentation/pcmcia/driver.rst30
-rw-r--r--Documentation/pcmcia/driver.txt30
-rw-r--r--Documentation/pcmcia/index.rst20
-rw-r--r--Documentation/pcmcia/locking.rst133
-rw-r--r--Documentation/pcmcia/locking.txt118
-rw-r--r--Documentation/peci/index.rst16
-rw-r--r--Documentation/peci/peci.rst51
-rw-r--r--Documentation/percpu-rw-semaphore.txt28
-rw-r--r--Documentation/perf/arm-ccn.txt59
-rw-r--r--Documentation/perf/arm_dsu_pmu.txt28
-rw-r--r--Documentation/perf/hisi-pmu.txt53
-rw-r--r--Documentation/perf/qcom_l2_pmu.txt38
-rw-r--r--Documentation/perf/qcom_l3_pmu.txt25
-rw-r--r--Documentation/perf/xgene-pmu.txt48
-rw-r--r--Documentation/phy.txt197
-rw-r--r--Documentation/phy/samsung-usb2.txt135
-rw-r--r--Documentation/pi-futex.txt122
-rw-r--r--Documentation/platform/x86-laptop-drivers.txt18
-rw-r--r--Documentation/pnp.txt292
-rw-r--r--Documentation/power/apm-acpi.rst36
-rw-r--r--Documentation/power/apm-acpi.txt32
-rw-r--r--Documentation/power/basic-pm-debugging.rst269
-rw-r--r--Documentation/power/basic-pm-debugging.txt254
-rw-r--r--Documentation/power/charger-manager.rst205
-rw-r--r--Documentation/power/charger-manager.txt200
-rw-r--r--Documentation/power/drivers-testing.rst52
-rw-r--r--Documentation/power/drivers-testing.txt46
-rw-r--r--Documentation/power/energy-model.rst419
-rw-r--r--Documentation/power/freezing-of-tasks.rst256
-rw-r--r--Documentation/power/freezing-of-tasks.txt231
-rw-r--r--Documentation/power/index.rst46
-rw-r--r--Documentation/power/interface.txt77
-rw-r--r--Documentation/power/opp.rst381
-rw-r--r--Documentation/power/opp.txt342
-rw-r--r--Documentation/power/pci.rst1132
-rw-r--r--Documentation/power/pci.txt1094
-rw-r--r--Documentation/power/pm_qos_interface.rst211
-rw-r--r--Documentation/power/pm_qos_interface.txt212
-rw-r--r--Documentation/power/power_supply_class.rst288
-rw-r--r--Documentation/power/power_supply_class.txt231
-rw-r--r--Documentation/power/powercap/dtpm.rst212
-rw-r--r--Documentation/power/powercap/powercap.rst262
-rw-r--r--Documentation/power/powercap/powercap.txt236
-rw-r--r--Documentation/power/regulator/consumer.rst257
-rw-r--r--Documentation/power/regulator/consumer.txt218
-rw-r--r--Documentation/power/regulator/design.rst38
-rw-r--r--Documentation/power/regulator/design.txt33
-rw-r--r--Documentation/power/regulator/machine.rst97
-rw-r--r--Documentation/power/regulator/machine.txt96
-rw-r--r--Documentation/power/regulator/overview.rst178
-rw-r--r--Documentation/power/regulator/overview.txt171
-rw-r--r--Documentation/power/regulator/regulator.rst32
-rw-r--r--Documentation/power/regulator/regulator.txt30
-rw-r--r--Documentation/power/runtime_pm.rst972
-rw-r--r--Documentation/power/runtime_pm.txt928
-rw-r--r--Documentation/power/s2ram.rst87
-rw-r--r--Documentation/power/s2ram.txt85
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.rst287
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.txt274
-rw-r--r--Documentation/power/suspend-and-interrupts.rst137
-rw-r--r--Documentation/power/suspend-and-interrupts.txt135
-rw-r--r--Documentation/power/swsusp-and-swap-files.rst63
-rw-r--r--Documentation/power/swsusp-and-swap-files.txt60
-rw-r--r--Documentation/power/swsusp-dmcrypt.rst140
-rw-r--r--Documentation/power/swsusp-dmcrypt.txt138
-rw-r--r--Documentation/power/swsusp.rst503
-rw-r--r--Documentation/power/swsusp.txt446
-rw-r--r--Documentation/power/tricks.rst29
-rw-r--r--Documentation/power/tricks.txt27
-rw-r--r--Documentation/power/userland-swsusp.rst193
-rw-r--r--Documentation/power/userland-swsusp.txt170
-rw-r--r--Documentation/power/video.rst213
-rw-r--r--Documentation/power/video.txt185
-rw-r--r--Documentation/powerpc/DAWR-POWER9.txt58
-rw-r--r--Documentation/powerpc/bootwrapper.txt141
-rw-r--r--Documentation/powerpc/cpu_families.txt221
-rw-r--r--Documentation/powerpc/cpu_features.txt56
-rw-r--r--Documentation/powerpc/cxl.txt449
-rw-r--r--Documentation/powerpc/cxlflash.txt429
-rw-r--r--Documentation/powerpc/dscr.txt83
-rw-r--r--Documentation/powerpc/eeh-pci-error-recovery.txt334
-rw-r--r--Documentation/powerpc/firmware-assisted-dump.txt277
-rw-r--r--Documentation/powerpc/hvcs.txt567
-rw-r--r--Documentation/powerpc/mpc52xx.txt39
-rw-r--r--Documentation/powerpc/pci_iov_resource_on_powernv.txt301
-rw-r--r--Documentation/powerpc/pmu-ebb.txt137
-rw-r--r--Documentation/powerpc/ptrace.txt151
-rw-r--r--Documentation/powerpc/qe_firmware.txt295
-rw-r--r--Documentation/powerpc/syscall64-abi.txt105
-rw-r--r--Documentation/powerpc/transactional_memory.txt244
-rw-r--r--Documentation/pps/pps.txt239
-rw-r--r--Documentation/preempt-locking.txt145
-rw-r--r--Documentation/process/1.Intro.rst14
-rw-r--r--Documentation/process/2.Process.rst132
-rw-r--r--Documentation/process/3.Early-stage.rst13
-rw-r--r--Documentation/process/4.Coding.rst23
-rw-r--r--Documentation/process/5.Posting.rst85
-rw-r--r--Documentation/process/6.Followthrough.rst7
-rw-r--r--Documentation/process/7.AdvancedTopics.rst26
-rw-r--r--Documentation/process/8.Conclusion.rst29
-rw-r--r--Documentation/process/adding-syscalls.rst113
-rw-r--r--Documentation/process/applying-patches.rst149
-rw-r--r--Documentation/process/backporting.rst604
-rw-r--r--Documentation/process/botching-up-ioctls.rst225
-rw-r--r--Documentation/process/changes.rst199
-rw-r--r--Documentation/process/clang-format.rst184
-rw-r--r--Documentation/process/code-of-conduct-interpretation.rst122
-rw-r--r--Documentation/process/coding-style.rst347
-rw-r--r--Documentation/process/conf.py10
-rw-r--r--Documentation/process/contribution-maturity-model.rst109
-rw-r--r--Documentation/process/cve.rst121
-rw-r--r--Documentation/process/debugging/driver_development_debugging_guide.rst235
-rw-r--r--Documentation/process/debugging/gdb-kernel-debugging.rst175
-rw-r--r--Documentation/process/debugging/index.rst80
-rw-r--r--Documentation/process/debugging/kgdb.rst (renamed from Documentation/dev-tools/kgdb.rst)129
-rw-r--r--Documentation/process/debugging/media_specific_debugging_guide.rst180
-rw-r--r--Documentation/process/debugging/userspace_debugging_guide.rst280
-rw-r--r--Documentation/process/deprecated.rst319
-rw-r--r--Documentation/process/development-process.rst19
-rw-r--r--Documentation/process/email-clients.rst115
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst358
-rw-r--r--Documentation/process/handling-regressions.rst796
-rw-r--r--Documentation/process/howto.rst131
-rw-r--r--Documentation/process/index.rst91
-rw-r--r--Documentation/process/kernel-docs.rst745
-rw-r--r--Documentation/process/kernel-driver-statement.rst2
-rw-r--r--Documentation/process/kernel-enforcement-statement.rst4
-rw-r--r--Documentation/process/license-rules.rst129
-rw-r--r--Documentation/process/magic-number.rst161
-rw-r--r--Documentation/process/maintainer-handbooks.rst22
-rw-r--r--Documentation/process/maintainer-kvm-x86.rst390
-rw-r--r--Documentation/process/maintainer-netdev.rst552
-rw-r--r--Documentation/process/maintainer-pgp-guide.rst515
-rw-r--r--Documentation/process/maintainer-soc-clean-dts.rst26
-rw-r--r--Documentation/process/maintainer-soc.rst209
-rw-r--r--Documentation/process/maintainer-tip.rst848
-rw-r--r--Documentation/process/maintainers.rst1
-rw-r--r--Documentation/process/management-style.rst7
-rw-r--r--Documentation/process/programming-language.rst29
-rw-r--r--Documentation/process/researcher-guidelines.rst170
-rw-r--r--Documentation/process/security-bugs.rst116
-rw-r--r--Documentation/process/stable-api-nonsense.rst15
-rw-r--r--Documentation/process/stable-kernel-rules.rst281
-rw-r--r--Documentation/process/submit-checklist.rst177
-rw-r--r--Documentation/process/submitting-drivers.rst191
-rw-r--r--Documentation/process/submitting-patches.rst764
-rw-r--r--Documentation/process/volatile-considered-harmful.rst4
-rw-r--r--Documentation/pti/pti_intel_mid.txt99
-rw-r--r--Documentation/ptp/ptp.txt86
-rw-r--r--Documentation/pwm.txt158
-rw-r--r--Documentation/rapidio/mport_cdev.txt107
-rw-r--r--Documentation/rapidio/rapidio.txt351
-rw-r--r--Documentation/rapidio/rio_cm.txt119
-rw-r--r--Documentation/rapidio/sysfs.txt3
-rw-r--r--Documentation/rapidio/tsi721.txt97
-rw-r--r--Documentation/rbtree.txt429
-rw-r--r--Documentation/remoteproc.txt355
-rw-r--r--Documentation/riscv/pmu.txt249
-rw-r--r--Documentation/robust-futex-ABI.txt186
-rw-r--r--Documentation/robust-futexes.txt222
-rw-r--r--Documentation/rpmsg.txt341
-rw-r--r--Documentation/rtc.txt140
-rw-r--r--Documentation/rust/arch-support.rst25
-rw-r--r--Documentation/rust/coding-guidelines.rst412
-rw-r--r--Documentation/rust/general-information.rst161
-rw-r--r--Documentation/rust/index.rst67
-rw-r--r--Documentation/rust/quick-start.rst366
-rw-r--r--Documentation/rust/testing.rst236
-rw-r--r--Documentation/s390/3270.txt271
-rw-r--r--Documentation/s390/CommonIO125
-rw-r--r--Documentation/s390/DASD73
-rw-r--r--Documentation/s390/Debugging390.txt2142
-rw-r--r--Documentation/s390/cds.txt472
-rw-r--r--Documentation/s390/driver-model.txt287
-rw-r--r--Documentation/s390/monreader.txt197
-rw-r--r--Documentation/s390/qeth.txt50
-rw-r--r--Documentation/s390/s390dbf.txt667
-rw-r--r--Documentation/s390/vfio-ap.txt837
-rw-r--r--Documentation/s390/vfio-ccw.txt300
-rw-r--r--Documentation/s390/zfcpdump.txt48
-rw-r--r--Documentation/scheduler/completion.rst293
-rw-r--r--Documentation/scheduler/completion.txt291
-rw-r--r--Documentation/scheduler/index.rst34
-rw-r--r--Documentation/scheduler/membarrier.rst39
-rw-r--r--Documentation/scheduler/sched-arch.rst72
-rw-r--r--Documentation/scheduler/sched-arch.txt74
-rw-r--r--Documentation/scheduler/sched-bwc.rst246
-rw-r--r--Documentation/scheduler/sched-bwc.txt122
-rw-r--r--Documentation/scheduler/sched-capacity.rst442
-rw-r--r--Documentation/scheduler/sched-deadline.rst919
-rw-r--r--Documentation/scheduler/sched-deadline.txt871
-rw-r--r--Documentation/scheduler/sched-debug.rst54
-rw-r--r--Documentation/scheduler/sched-design-CFS.rst256
-rw-r--r--Documentation/scheduler/sched-design-CFS.txt242
-rw-r--r--Documentation/scheduler/sched-domains.rst80
-rw-r--r--Documentation/scheduler/sched-domains.txt77
-rw-r--r--Documentation/scheduler/sched-eevdf.rst43
-rw-r--r--Documentation/scheduler/sched-energy.rst404
-rw-r--r--Documentation/scheduler/sched-ext.rst367
-rw-r--r--Documentation/scheduler/sched-nice-design.rst112
-rw-r--r--Documentation/scheduler/sched-nice-design.txt108
-rw-r--r--Documentation/scheduler/sched-pelt.c3
-rw-r--r--Documentation/scheduler/sched-rt-group.rst190
-rw-r--r--Documentation/scheduler/sched-rt-group.txt183
-rw-r--r--Documentation/scheduler/sched-stats.rst205
-rw-r--r--Documentation/scheduler/sched-stats.txt154
-rw-r--r--Documentation/scheduler/sched-util-clamp.rst741
-rw-r--r--Documentation/scheduler/schedutil.rst172
-rw-r--r--Documentation/scheduler/text_files.rst5
-rw-r--r--Documentation/scsi/53c700.rst134
-rw-r--r--Documentation/scsi/53c700.txt135
-rw-r--r--Documentation/scsi/BusLogic.rst581
-rw-r--r--Documentation/scsi/BusLogic.txt566
-rw-r--r--Documentation/scsi/ChangeLog.lpfc40
-rw-r--r--Documentation/scsi/ChangeLog.megaraid10
-rw-r--r--Documentation/scsi/ChangeLog.megaraid_sas4
-rw-r--r--Documentation/scsi/ChangeLog.ncr53c8xx16
-rw-r--r--Documentation/scsi/ChangeLog.sym53c8xx14
-rw-r--r--Documentation/scsi/ChangeLog.sym53c8xx_210
-rw-r--r--Documentation/scsi/FlashPoint.rst176
-rw-r--r--Documentation/scsi/FlashPoint.txt163
-rw-r--r--Documentation/scsi/LICENSE.qla2xxx290
-rw-r--r--Documentation/scsi/LICENSE.qla4xxx289
-rw-r--r--Documentation/scsi/NinjaSCSI.rst164
-rw-r--r--Documentation/scsi/NinjaSCSI.txt128
-rw-r--r--Documentation/scsi/aacraid.rst177
-rw-r--r--Documentation/scsi/aacraid.txt150
-rw-r--r--Documentation/scsi/advansys.rst272
-rw-r--r--Documentation/scsi/advansys.txt243
-rw-r--r--Documentation/scsi/aha152x.rst204
-rw-r--r--Documentation/scsi/aha152x.txt183
-rw-r--r--Documentation/scsi/aic79xx.rst593
-rw-r--r--Documentation/scsi/aic79xx.txt497
-rw-r--r--Documentation/scsi/aic7xxx.rst458
-rw-r--r--Documentation/scsi/aic7xxx.txt394
-rw-r--r--Documentation/scsi/arcmsr_spec.rst908
-rw-r--r--Documentation/scsi/arcmsr_spec.txt574
-rw-r--r--Documentation/scsi/bfa.rst92
-rw-r--r--Documentation/scsi/bfa.txt82
-rw-r--r--Documentation/scsi/bnx2fc.rst81
-rw-r--r--Documentation/scsi/bnx2fc.txt75
-rw-r--r--Documentation/scsi/cxgb3i.rst90
-rw-r--r--Documentation/scsi/cxgb3i.txt84
-rw-r--r--Documentation/scsi/dc395x.rst114
-rw-r--r--Documentation/scsi/dc395x.txt102
-rw-r--r--Documentation/scsi/dpti.rst92
-rw-r--r--Documentation/scsi/dpti.txt83
-rw-r--r--Documentation/scsi/g_NCR5380.rst93
-rw-r--r--Documentation/scsi/g_NCR5380.txt68
-rw-r--r--Documentation/scsi/hpsa.rst129
-rw-r--r--Documentation/scsi/hpsa.txt130
-rw-r--r--Documentation/scsi/hptiop.rst215
-rw-r--r--Documentation/scsi/hptiop.txt184
-rw-r--r--Documentation/scsi/index.rst78
-rw-r--r--Documentation/scsi/libsas.rst458
-rw-r--r--Documentation/scsi/libsas.txt395
-rw-r--r--Documentation/scsi/link_power_management_policy.rst25
-rw-r--r--Documentation/scsi/link_power_management_policy.txt19
-rw-r--r--Documentation/scsi/lpfc.rst81
-rw-r--r--Documentation/scsi/lpfc.txt83
-rw-r--r--Documentation/scsi/megaraid.rst77
-rw-r--r--Documentation/scsi/megaraid.txt70
-rw-r--r--Documentation/scsi/ncr53c8xx.rst2169
-rw-r--r--Documentation/scsi/ncr53c8xx.txt1824
-rw-r--r--Documentation/scsi/osd.txt197
-rw-r--r--Documentation/scsi/osst.txt218
-rw-r--r--Documentation/scsi/ppa.rst18
-rw-r--r--Documentation/scsi/ppa.txt14
-rw-r--r--Documentation/scsi/qlogicfas.rst87
-rw-r--r--Documentation/scsi/qlogicfas.txt78
-rw-r--r--Documentation/scsi/scsi-changer.rst184
-rw-r--r--Documentation/scsi/scsi-changer.txt180
-rw-r--r--Documentation/scsi/scsi-generic.rst107
-rw-r--r--Documentation/scsi/scsi-generic.txt101
-rw-r--r--Documentation/scsi/scsi-parameters.rst108
-rw-r--r--Documentation/scsi/scsi-parameters.txt112
-rw-r--r--Documentation/scsi/scsi.rst44
-rw-r--r--Documentation/scsi/scsi.txt44
-rw-r--r--Documentation/scsi/scsi_eh.rst485
-rw-r--r--Documentation/scsi/scsi_eh.txt475
-rw-r--r--Documentation/scsi/scsi_fc_transport.rst593
-rw-r--r--Documentation/scsi/scsi_fc_transport.txt496
-rw-r--r--Documentation/scsi/scsi_mid_low_api.rst1222
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt1281
-rw-r--r--Documentation/scsi/scsi_transport_srp/Makefile7
-rw-r--r--Documentation/scsi/scsi_transport_srp/figures.rst6
-rw-r--r--Documentation/scsi/sd-parameters.rst27
-rw-r--r--Documentation/scsi/sd-parameters.txt22
-rw-r--r--Documentation/scsi/smartpqi.rst78
-rw-r--r--Documentation/scsi/smartpqi.txt80
-rw-r--r--Documentation/scsi/st.rst678
-rw-r--r--Documentation/scsi/st.txt592
-rw-r--r--Documentation/scsi/sym53c500_cs.rst29
-rw-r--r--Documentation/scsi/sym53c500_cs.txt23
-rw-r--r--Documentation/scsi/sym53c8xx_2.rst1209
-rw-r--r--Documentation/scsi/sym53c8xx_2.txt1048
-rw-r--r--Documentation/scsi/tcm_qla2xxx.rst36
-rw-r--r--Documentation/scsi/tcm_qla2xxx.txt22
-rw-r--r--Documentation/scsi/ufs.rst210
-rw-r--r--Documentation/scsi/ufs.txt153
-rw-r--r--Documentation/scsi/wd719x.rst24
-rw-r--r--Documentation/scsi/wd719x.txt21
-rw-r--r--Documentation/security/IMA-templates.rst27
-rw-r--r--Documentation/security/LSM-sctp.rst175
-rw-r--r--Documentation/security/LSM.rst14
-rw-r--r--Documentation/security/SCTP.rst344
-rw-r--r--Documentation/security/SELinux-sctp.rst158
-rw-r--r--Documentation/security/credentials.rst26
-rw-r--r--Documentation/security/digsig.rst101
-rw-r--r--Documentation/security/index.rst13
-rw-r--r--Documentation/security/ipe.rst446
-rw-r--r--Documentation/security/keys/core.rst176
-rw-r--r--Documentation/security/keys/request-key.rst52
-rw-r--r--Documentation/security/keys/trusted-encrypted.rst370
-rw-r--r--Documentation/security/landlock.rst140
-rw-r--r--Documentation/security/lsm-development.rst17
-rw-r--r--Documentation/security/lsm.rst131
-rw-r--r--Documentation/security/sak.rst (renamed from Documentation/SAK.txt)0
-rw-r--r--Documentation/security/secrets/coco.rst103
-rw-r--r--Documentation/security/secrets/index.rst9
-rw-r--r--Documentation/security/self-protection.rst5
-rw-r--r--Documentation/security/siphash.rst199
-rw-r--r--Documentation/security/snp-tdx-threat-model.rst253
-rw-r--r--Documentation/security/tpm/index.rst6
-rw-r--r--Documentation/security/tpm/tpm-security.rst216
-rw-r--r--Documentation/security/tpm/tpm_event_log.rst55
-rw-r--r--Documentation/security/tpm/tpm_ffa_crb.rst65
-rw-r--r--Documentation/security/tpm/tpm_ftpm_tee.rst27
-rw-r--r--Documentation/security/tpm/tpm_tis.rst46
-rw-r--r--Documentation/security/tpm/xen-tpmfront.rst124
-rw-r--r--Documentation/security/tpm/xen-tpmfront.txt113
-rw-r--r--Documentation/serial/README.cycladesZ8
-rw-r--r--Documentation/serial/driver486
-rw-r--r--Documentation/serial/moxa-smartio523
-rw-r--r--Documentation/serial/n_gsm.txt96
-rw-r--r--Documentation/serial/rocket.txt189
-rw-r--r--Documentation/serial/serial-iso7816.txt83
-rw-r--r--Documentation/serial/serial-rs485.txt95
-rw-r--r--Documentation/serial/tty.txt313
-rw-r--r--Documentation/sgi-ioc4.txt49
-rw-r--r--Documentation/sh/conf.py10
-rw-r--r--Documentation/sh/index.rst59
-rw-r--r--Documentation/sh/new-machine.txt278
-rw-r--r--Documentation/sh/register-banks.txt33
-rw-r--r--Documentation/siphash.txt189
-rw-r--r--Documentation/smsc_ece1099.txt60
-rw-r--r--Documentation/sound/alsa-configuration.rst175
-rw-r--r--Documentation/sound/cards/audigy-mixer.rst260
-rw-r--r--Documentation/sound/cards/emu-mixer.rst226
-rw-r--r--Documentation/sound/cards/index.rst2
-rw-r--r--Documentation/sound/cards/maya44.rst2
-rw-r--r--Documentation/sound/cards/pcmtest.rst120
-rw-r--r--Documentation/sound/cards/sb-live-mixer.rst49
-rw-r--r--Documentation/sound/codecs/cs35l56.rst306
-rw-r--r--Documentation/sound/codecs/index.rst9
-rw-r--r--Documentation/sound/conf.py10
-rw-r--r--Documentation/sound/designs/compress-accel.rst134
-rw-r--r--Documentation/sound/designs/compress-offload.rst84
-rw-r--r--Documentation/sound/designs/control-names.rst2
-rw-r--r--Documentation/sound/designs/index.rst3
-rw-r--r--Documentation/sound/designs/jack-controls.rst2
-rw-r--r--Documentation/sound/designs/jack-injection.rst166
-rw-r--r--Documentation/sound/designs/midi-2.0.rst584
-rw-r--r--Documentation/sound/designs/powersave.rst6
-rw-r--r--Documentation/sound/designs/procfile.rst2
-rw-r--r--Documentation/sound/designs/seq-oss.rst2
-rw-r--r--Documentation/sound/designs/timestamping.rst2
-rw-r--r--Documentation/sound/designs/tracepoints.rst22
-rw-r--r--Documentation/sound/hd-audio/controls.rst2
-rw-r--r--Documentation/sound/hd-audio/index.rst2
-rw-r--r--Documentation/sound/hd-audio/intel-multi-link.rst312
-rw-r--r--Documentation/sound/hd-audio/models.rst17
-rw-r--r--Documentation/sound/hd-audio/notes.rst52
-rw-r--r--Documentation/sound/hd-audio/realtek-pc-beep.rst129
-rw-r--r--Documentation/sound/index.rst12
-rw-r--r--Documentation/sound/kernel-api/alsa-driver-api.rst3
-rw-r--r--Documentation/sound/kernel-api/writing-an-alsa-driver.rst1481
-rw-r--r--Documentation/sound/soc/clocking.rst12
-rw-r--r--Documentation/sound/soc/codec-to-codec.rst21
-rw-r--r--Documentation/sound/soc/codec.rst12
-rw-r--r--Documentation/sound/soc/dai.rst10
-rw-r--r--Documentation/sound/soc/dapm-graph.svg375
-rw-r--r--Documentation/sound/soc/dapm.rst172
-rw-r--r--Documentation/sound/soc/dpcm.rst47
-rw-r--r--Documentation/sound/soc/index.rst1
-rw-r--r--Documentation/sound/soc/machine.rst26
-rw-r--r--Documentation/sound/soc/platform.rst4
-rw-r--r--Documentation/sound/soc/usb.rst482
-rw-r--r--Documentation/sound/utimers.rst126
-rw-r--r--Documentation/sparc/adi.txt278
-rw-r--r--Documentation/sparc/console.txt9
-rw-r--r--Documentation/sparc/oradax/oracle-dax.txt429
-rw-r--r--Documentation/speculation.txt90
-rw-r--r--Documentation/sphinx-static/custom.css153
-rw-r--r--Documentation/sphinx-static/theme_overrides.css31
-rw-r--r--Documentation/sphinx-static/theme_rtd_colors.css37
-rw-r--r--Documentation/sphinx/automarkup.py306
-rw-r--r--Documentation/sphinx/cdomain.py165
-rw-r--r--Documentation/sphinx/kernel_abi.py173
-rw-r--r--Documentation/sphinx/kernel_feat.py135
-rwxr-xr-xDocumentation/sphinx/kernel_include.py531
-rw-r--r--Documentation/sphinx/kerneldoc-preamble.sty234
-rw-r--r--Documentation/sphinx/kerneldoc.py271
-rw-r--r--Documentation/sphinx/kfigure.py214
-rw-r--r--Documentation/sphinx/load_config.py36
-rwxr-xr-xDocumentation/sphinx/maintainers_include.py197
-rw-r--r--Documentation/sphinx/min_requirements.txt11
-rw-r--r--Documentation/sphinx/parallel-wrapper.sh33
-rwxr-xr-xDocumentation/sphinx/parse-headers.pl401
-rwxr-xr-xDocumentation/sphinx/parser_yaml.py123
-rw-r--r--Documentation/sphinx/requirements.txt7
-rwxr-xr-xDocumentation/sphinx/rstFlatTable.py25
-rw-r--r--Documentation/sphinx/templates/kernel-toc.html19
-rw-r--r--Documentation/sphinx/templates/translations.html15
-rw-r--r--Documentation/sphinx/translations.py99
-rw-r--r--Documentation/spi/butterfly68
-rw-r--r--Documentation/spi/butterfly.rst74
-rw-r--r--Documentation/spi/index.rst21
-rw-r--r--Documentation/spi/pxa2xx235
-rw-r--r--Documentation/spi/spi-lm70llp79
-rw-r--r--Documentation/spi/spi-lm70llp.rst84
-rw-r--r--Documentation/spi/spi-sc18is60236
-rw-r--r--Documentation/spi/spi-sc18is602.rst39
-rw-r--r--Documentation/spi/spi-summary625
-rw-r--r--Documentation/spi/spi-summary.rst714
-rw-r--r--Documentation/spi/spidev149
-rw-r--r--Documentation/spi/spidev.rst191
-rw-r--r--Documentation/staging/crc32.rst189
-rw-r--r--Documentation/staging/index.rst16
-rw-r--r--Documentation/staging/lzo.rst202
-rw-r--r--Documentation/staging/magic-number.rst84
-rw-r--r--Documentation/staging/remoteproc.rst360
-rw-r--r--Documentation/staging/rpmsg.rst299
-rw-r--r--Documentation/staging/speculation.rst91
-rw-r--r--Documentation/staging/static-keys.rst328
-rw-r--r--Documentation/staging/xz.rst98
-rw-r--r--Documentation/static-keys.txt331
-rw-r--r--Documentation/subsystem-apis.rst93
-rw-r--r--Documentation/sunrpc/xdr/nfs4_1.x186
-rw-r--r--Documentation/svga.txt249
-rw-r--r--Documentation/switchtec.txt102
-rw-r--r--Documentation/sysctl/README76
-rw-r--r--Documentation/sysctl/abi.txt54
-rw-r--r--Documentation/sysctl/fs.txt366
-rw-r--r--Documentation/sysctl/kernel.txt1122
-rw-r--r--Documentation/sysctl/net.txt407
-rw-r--r--Documentation/sysctl/sunrpc.txt20
-rw-r--r--Documentation/sysctl/user.txt66
-rw-r--r--Documentation/sysctl/vm.txt913
-rw-r--r--Documentation/target/index.rst19
-rw-r--r--Documentation/target/scripts.rst5
-rwxr-xr-xDocumentation/target/tcm_mod_builder.py664
-rw-r--r--Documentation/target/tcm_mod_builder.rst149
-rw-r--r--Documentation/target/tcm_mod_builder.txt145
-rw-r--r--Documentation/target/tcmu-design.rst405
-rw-r--r--Documentation/target/tcmu-design.txt381
-rw-r--r--Documentation/tee.txt127
-rw-r--r--Documentation/tee/amd-tee.rst90
-rw-r--r--Documentation/tee/index.rst21
-rw-r--r--Documentation/tee/op-tee.rst166
-rw-r--r--Documentation/tee/qtee.rst96
-rw-r--r--Documentation/tee/tee.rst22
-rw-r--r--Documentation/tee/ts-tee.rst71
-rw-r--r--Documentation/thermal/cpu-cooling-api.txt92
-rw-r--r--Documentation/thermal/exynos_thermal77
-rw-r--r--Documentation/thermal/exynos_thermal_emulation53
-rw-r--r--Documentation/thermal/intel_powerclamp.txt317
-rw-r--r--Documentation/thermal/nouveau_thermal82
-rw-r--r--Documentation/thermal/power_allocator.txt247
-rw-r--r--Documentation/thermal/sysfs-api.txt636
-rw-r--r--Documentation/thermal/x86_pkg_temperature_thermal47
-rw-r--r--Documentation/this_cpu_ops.txt339
-rw-r--r--Documentation/timers/NO_HZ.txt318
-rw-r--r--Documentation/timers/delay_sleep_functions.rst121
-rw-r--r--Documentation/timers/highres.rst250
-rw-r--r--Documentation/timers/highres.txt249
-rw-r--r--Documentation/timers/hpet.rst30
-rw-r--r--Documentation/timers/hpet.txt28
-rw-r--r--Documentation/timers/hrtimers.rst173
-rw-r--r--Documentation/timers/hrtimers.txt178
-rw-r--r--Documentation/timers/index.rst22
-rw-r--r--Documentation/timers/no_hz.rst317
-rw-r--r--Documentation/timers/timekeeping.rst180
-rw-r--r--Documentation/timers/timekeeping.txt179
-rw-r--r--Documentation/timers/timers-howto.txt105
-rw-r--r--Documentation/tools/index.rst21
-rw-r--r--Documentation/tools/rtla/Makefile53
-rw-r--r--Documentation/tools/rtla/common_appendix.rst24
-rw-r--r--Documentation/tools/rtla/common_hist_options.rst23
-rw-r--r--Documentation/tools/rtla/common_options.rst119
-rw-r--r--Documentation/tools/rtla/common_osnoise_description.rst8
-rw-r--r--Documentation/tools/rtla/common_osnoise_options.rst39
-rw-r--r--Documentation/tools/rtla/common_timerlat_aa.rst7
-rw-r--r--Documentation/tools/rtla/common_timerlat_description.rst18
-rw-r--r--Documentation/tools/rtla/common_timerlat_options.rst67
-rw-r--r--Documentation/tools/rtla/common_top_options.rst3
-rw-r--r--Documentation/tools/rtla/index.rst27
-rw-r--r--Documentation/tools/rtla/rtla-hwnoise.rst109
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-hist.rst68
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-top.rst63
-rw-r--r--Documentation/tools/rtla/rtla-osnoise.rst59
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-hist.rst113
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-top.rst136
-rw-r--r--Documentation/tools/rtla/rtla-timerlat.rst54
-rw-r--r--Documentation/tools/rtla/rtla.rst48
-rw-r--r--Documentation/tools/rv/Makefile52
-rw-r--r--Documentation/tools/rv/common_appendix.rst16
-rw-r--r--Documentation/tools/rv/common_ikm.rst21
-rw-r--r--Documentation/tools/rv/index.rst25
-rw-r--r--Documentation/tools/rv/rv-list.rst43
-rw-r--r--Documentation/tools/rv/rv-mon-sched.rst69
-rw-r--r--Documentation/tools/rv/rv-mon-wip.rst44
-rw-r--r--Documentation/tools/rv/rv-mon-wwnr.rst43
-rw-r--r--Documentation/tools/rv/rv-mon.rst55
-rw-r--r--Documentation/tools/rv/rv.rst63
-rw-r--r--Documentation/trace/boottime-trace.rst301
-rw-r--r--Documentation/trace/coresight-cpu-debug.txt187
-rw-r--r--Documentation/trace/coresight.txt430
-rw-r--r--Documentation/trace/coresight/coresight-config.rst294
-rw-r--r--Documentation/trace/coresight/coresight-cpu-debug.rst193
-rw-r--r--Documentation/trace/coresight/coresight-dummy.rst32
-rw-r--r--Documentation/trace/coresight/coresight-ect.rst226
-rw-r--r--Documentation/trace/coresight/coresight-etm4x-reference.rst827
-rw-r--r--Documentation/trace/coresight/coresight-perf.rst189
-rw-r--r--Documentation/trace/coresight/coresight-tpda.rst52
-rw-r--r--Documentation/trace/coresight/coresight-tpdm.rst45
-rw-r--r--Documentation/trace/coresight/coresight-trbe.rst38
-rw-r--r--Documentation/trace/coresight/coresight.rst686
-rw-r--r--Documentation/trace/coresight/index.rst9
-rw-r--r--Documentation/trace/coresight/panic.rst362
-rw-r--r--Documentation/trace/coresight/ultrasoc-smb.rst83
-rw-r--r--Documentation/trace/debugging.rst161
-rw-r--r--Documentation/trace/eprobetrace.rst269
-rw-r--r--Documentation/trace/events-kmem.rst2
-rw-r--r--Documentation/trace/events-msr.rst6
-rw-r--r--Documentation/trace/events-nmi.rst6
-rw-r--r--Documentation/trace/events-power.rst21
-rw-r--r--Documentation/trace/events.rst676
-rw-r--r--Documentation/trace/fprobe.rst198
-rw-r--r--Documentation/trace/fprobetrace.rst251
-rw-r--r--Documentation/trace/ftrace-design.rst32
-rw-r--r--Documentation/trace/ftrace-uses.rst106
-rw-r--r--Documentation/trace/ftrace.rst554
-rw-r--r--Documentation/trace/hisi-ptt.rst304
-rw-r--r--Documentation/trace/histogram-design.rst2118
-rw-r--r--Documentation/trace/histogram.rst930
-rw-r--r--Documentation/trace/hwlat_detector.rst15
-rw-r--r--Documentation/trace/index.rst98
-rw-r--r--Documentation/trace/intel_th.rst32
-rw-r--r--Documentation/trace/kprobes.rst789
-rw-r--r--Documentation/trace/kprobetrace.rst133
-rw-r--r--Documentation/trace/mmiotrace.rst22
-rw-r--r--Documentation/trace/osnoise-tracer.rst180
-rw-r--r--Documentation/trace/postprocess/decode_msr.py4
-rw-r--r--Documentation/trace/postprocess/trace-pagealloc-postprocess.pl6
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl55
-rw-r--r--Documentation/trace/ring-buffer-design.rst983
-rw-r--r--Documentation/trace/ring-buffer-design.txt955
-rw-r--r--Documentation/trace/ring-buffer-map.rst106
-rw-r--r--Documentation/trace/rv/da_monitor_instrumentation.rst171
-rw-r--r--Documentation/trace/rv/deterministic_automata.rst184
-rw-r--r--Documentation/trace/rv/index.rst17
-rw-r--r--Documentation/trace/rv/linear_temporal_logic.rst134
-rw-r--r--Documentation/trace/rv/monitor_rtapp.rst133
-rw-r--r--Documentation/trace/rv/monitor_sched.rst402
-rw-r--r--Documentation/trace/rv/monitor_synthesis.rst271
-rw-r--r--Documentation/trace/rv/monitor_wip.rst55
-rw-r--r--Documentation/trace/rv/monitor_wwnr.rst45
-rw-r--r--Documentation/trace/rv/runtime-verification.rst231
-rw-r--r--Documentation/trace/stm.rst4
-rw-r--r--Documentation/trace/timerlat-tracer.rst260
-rw-r--r--Documentation/trace/tracepoint-analysis.rst8
-rw-r--r--Documentation/trace/tracepoints.rst44
-rw-r--r--Documentation/trace/uprobetracer.rst55
-rw-r--r--Documentation/trace/user_events.rst304
-rw-r--r--Documentation/translations/index.rst46
-rw-r--r--Documentation/translations/it_IT/RCU/index.rst19
-rw-r--r--Documentation/translations/it_IT/RCU/torture.rst369
-rw-r--r--Documentation/translations/it_IT/admin-guide/README.rst12
-rw-r--r--Documentation/translations/it_IT/admin-guide/kernel-parameters.rst12
-rw-r--r--Documentation/translations/it_IT/arch/riscv/patch-acceptance.rst60
-rw-r--r--Documentation/translations/it_IT/core-api/index.rst30
-rw-r--r--Documentation/translations/it_IT/core-api/memory-allocation.rst13
-rw-r--r--Documentation/translations/it_IT/core-api/symbol-namespaces.rst163
-rw-r--r--Documentation/translations/it_IT/dev-tools/clang-format.rst197
-rw-r--r--Documentation/translations/it_IT/dev-tools/index.rst17
-rw-r--r--Documentation/translations/it_IT/devicetree/bindings/submitting-patches.rst11
-rw-r--r--Documentation/translations/it_IT/disclaimer-ita.rst13
-rw-r--r--Documentation/translations/it_IT/doc-guide/index.rst6
-rw-r--r--Documentation/translations/it_IT/doc-guide/kernel-doc.rst115
-rw-r--r--Documentation/translations/it_IT/doc-guide/parse-headers.rst9
-rw-r--r--Documentation/translations/it_IT/doc-guide/sphinx.rst118
-rw-r--r--Documentation/translations/it_IT/i2c/i2c-protocol.rst99
-rw-r--r--Documentation/translations/it_IT/i2c/index.rst46
-rw-r--r--Documentation/translations/it_IT/i2c/summary.rst84
-rw-r--r--Documentation/translations/it_IT/index.rst169
-rw-r--r--Documentation/translations/it_IT/kernel-hacking/hacking.rst57
-rw-r--r--Documentation/translations/it_IT/kernel-hacking/locking.rst243
-rw-r--r--Documentation/translations/it_IT/locking/index.rst20
-rw-r--r--Documentation/translations/it_IT/locking/lockdep-design.rst678
-rw-r--r--Documentation/translations/it_IT/locking/lockstat.rst230
-rw-r--r--Documentation/translations/it_IT/locking/locktorture.rst181
-rw-r--r--Documentation/translations/it_IT/locking/locktypes.rst547
-rw-r--r--Documentation/translations/it_IT/maintainer/configure-git.rst10
-rw-r--r--Documentation/translations/it_IT/process/1.Intro.rst297
-rw-r--r--Documentation/translations/it_IT/process/2.Process.rst530
-rw-r--r--Documentation/translations/it_IT/process/3.Early-stage.rst242
-rw-r--r--Documentation/translations/it_IT/process/4.Coding.rst450
-rw-r--r--Documentation/translations/it_IT/process/5.Posting.rst381
-rw-r--r--Documentation/translations/it_IT/process/6.Followthrough.rst247
-rw-r--r--Documentation/translations/it_IT/process/7.AdvancedTopics.rst210
-rw-r--r--Documentation/translations/it_IT/process/8.Conclusion.rst84
-rw-r--r--Documentation/translations/it_IT/process/adding-syscalls.rst643
-rw-r--r--Documentation/translations/it_IT/process/applying-patches.rst15
-rw-r--r--Documentation/translations/it_IT/process/botching-up-ioctls.rst249
-rw-r--r--Documentation/translations/it_IT/process/changes.rst553
-rw-r--r--Documentation/translations/it_IT/process/code-of-conduct.rst12
-rw-r--r--Documentation/translations/it_IT/process/coding-style.rst1230
-rw-r--r--Documentation/translations/it_IT/process/deprecated.rst409
-rw-r--r--Documentation/translations/it_IT/process/development-process.rst32
-rw-r--r--Documentation/translations/it_IT/process/email-clients.rst376
-rw-r--r--Documentation/translations/it_IT/process/howto.rst642
-rw-r--r--Documentation/translations/it_IT/process/index.rst107
-rw-r--r--Documentation/translations/it_IT/process/kernel-docs.rst18
-rw-r--r--Documentation/translations/it_IT/process/kernel-driver-statement.rst211
-rw-r--r--Documentation/translations/it_IT/process/kernel-enforcement-statement.rst175
-rw-r--r--Documentation/translations/it_IT/process/license-rules.rst500
-rw-r--r--Documentation/translations/it_IT/process/maintainer-handbooks.rst24
-rw-r--r--Documentation/translations/it_IT/process/maintainer-pgp-guide.rst941
-rw-r--r--Documentation/translations/it_IT/process/maintainer-tip.rst10
-rw-r--r--Documentation/translations/it_IT/process/maintainers.rst13
-rw-r--r--Documentation/translations/it_IT/process/management-style.rst295
-rw-r--r--Documentation/translations/it_IT/process/programming-language.rst68
-rw-r--r--Documentation/translations/it_IT/process/security-bugs.rst12
-rw-r--r--Documentation/translations/it_IT/process/stable-api-nonsense.rst209
-rw-r--r--Documentation/translations/it_IT/process/stable-kernel-rules.rst248
-rw-r--r--Documentation/translations/it_IT/process/submit-checklist.rst142
-rw-r--r--Documentation/translations/it_IT/process/submitting-patches.rst930
-rw-r--r--Documentation/translations/it_IT/process/volatile-considered-harmful.rst134
-rw-r--r--Documentation/translations/it_IT/staging/index.rst13
-rw-r--r--Documentation/translations/it_IT/staging/magic-number.rst90
-rw-r--r--Documentation/translations/it_IT/subsystem-apis.rst47
-rw-r--r--Documentation/translations/ja_JP/SubmitChecklist111
-rw-r--r--Documentation/translations/ja_JP/SubmittingPatches68
-rw-r--r--Documentation/translations/ja_JP/disclaimer-ja_JP.rst24
-rw-r--r--Documentation/translations/ja_JP/howto.rst654
-rw-r--r--Documentation/translations/ja_JP/index.rst16
-rw-r--r--Documentation/translations/ja_JP/process/howto.rst629
-rw-r--r--Documentation/translations/ja_JP/process/submit-checklist.rst163
-rw-r--r--Documentation/translations/ko_KR/core-api/wrappers/memory-barriers.rst18
-rw-r--r--Documentation/translations/ko_KR/howto.rst632
-rw-r--r--Documentation/translations/ko_KR/index.rst17
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt626
-rw-r--r--Documentation/translations/ko_KR/process/howto.rst619
-rw-r--r--Documentation/translations/sp_SP/disclaimer-sp.rst9
-rw-r--r--Documentation/translations/sp_SP/index.rst81
-rw-r--r--Documentation/translations/sp_SP/memory-barriers.txt3134
-rw-r--r--Documentation/translations/sp_SP/process/1.Intro.rst302
-rw-r--r--Documentation/translations/sp_SP/process/2.Process.rst543
-rw-r--r--Documentation/translations/sp_SP/process/3.Early-stage.rst241
-rw-r--r--Documentation/translations/sp_SP/process/4.Coding.rst470
-rw-r--r--Documentation/translations/sp_SP/process/5.Posting.rst391
-rw-r--r--Documentation/translations/sp_SP/process/6.Followthrough.rst230
-rw-r--r--Documentation/translations/sp_SP/process/7.AdvancedTopics.rst214
-rw-r--r--Documentation/translations/sp_SP/process/8.Conclusion.rst82
-rw-r--r--Documentation/translations/sp_SP/process/adding-syscalls.rst632
-rw-r--r--Documentation/translations/sp_SP/process/code-of-conduct.rst97
-rw-r--r--Documentation/translations/sp_SP/process/coding-style.rst1315
-rw-r--r--Documentation/translations/sp_SP/process/contribution-maturity-model.rst120
-rw-r--r--Documentation/translations/sp_SP/process/deprecated.rst381
-rw-r--r--Documentation/translations/sp_SP/process/development-process.rst32
-rw-r--r--Documentation/translations/sp_SP/process/email-clients.rst374
-rw-r--r--Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst341
-rw-r--r--Documentation/translations/sp_SP/process/handling-regressions.rst797
-rw-r--r--Documentation/translations/sp_SP/process/howto.rst617
-rw-r--r--Documentation/translations/sp_SP/process/index.rst32
-rw-r--r--Documentation/translations/sp_SP/process/kernel-docs.rst186
-rw-r--r--Documentation/translations/sp_SP/process/kernel-enforcement-statement.rst174
-rw-r--r--Documentation/translations/sp_SP/process/magic-number.rst89
-rw-r--r--Documentation/translations/sp_SP/process/maintainer-kvm-x86.rst465
-rw-r--r--Documentation/translations/sp_SP/process/management-style.rst299
-rw-r--r--Documentation/translations/sp_SP/process/programming-language.rst53
-rw-r--r--Documentation/translations/sp_SP/process/researcher-guidelines.rst150
-rw-r--r--Documentation/translations/sp_SP/process/security-bugs.rst103
-rw-r--r--Documentation/translations/sp_SP/process/submit-checklist.rst134
-rw-r--r--Documentation/translations/sp_SP/process/submitting-patches.rst919
-rw-r--r--Documentation/translations/sp_SP/scheduler/index.rst10
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-bwc.rst287
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst277
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-eevdf.rst58
-rw-r--r--Documentation/translations/sp_SP/wrappers/memory-barriers.rst19
-rw-r--r--Documentation/translations/zh_CN/HOWTO534
-rw-r--r--Documentation/translations/zh_CN/IRQ.txt39
-rw-r--r--Documentation/translations/zh_CN/PCI/acpi-info.rst139
-rw-r--r--Documentation/translations/zh_CN/PCI/index.rst34
-rw-r--r--Documentation/translations/zh_CN/PCI/msi-howto.rst244
-rw-r--r--Documentation/translations/zh_CN/PCI/pci-iov-howto.rst169
-rw-r--r--Documentation/translations/zh_CN/PCI/pci.rst514
-rw-r--r--Documentation/translations/zh_CN/PCI/pciebus-howto.rst192
-rw-r--r--Documentation/translations/zh_CN/PCI/sysfs-pci.rst126
-rw-r--r--Documentation/translations/zh_CN/SecurityBugs50
-rw-r--r--Documentation/translations/zh_CN/SubmittingDrivers164
-rw-r--r--Documentation/translations/zh_CN/SubmittingPatches412
-rw-r--r--Documentation/translations/zh_CN/accounting/delay-accounting.rst112
-rw-r--r--Documentation/translations/zh_CN/accounting/index.rst25
-rw-r--r--Documentation/translations/zh_CN/accounting/psi.rst155
-rw-r--r--Documentation/translations/zh_CN/accounting/taskstats.rst145
-rw-r--r--Documentation/translations/zh_CN/admin-guide/README.rst291
-rw-r--r--Documentation/translations/zh_CN/admin-guide/bootconfig.rst293
-rw-r--r--Documentation/translations/zh_CN/admin-guide/bug-bisect.rst81
-rw-r--r--Documentation/translations/zh_CN/admin-guide/bug-hunting.rst331
-rw-r--r--Documentation/translations/zh_CN/admin-guide/clearing-warn-once.rst9
-rw-r--r--Documentation/translations/zh_CN/admin-guide/cpu-load.rst105
-rw-r--r--Documentation/translations/zh_CN/admin-guide/cputopology.rst96
-rw-r--r--Documentation/translations/zh_CN/admin-guide/index.rst132
-rw-r--r--Documentation/translations/zh_CN/admin-guide/init.rst54
-rw-r--r--Documentation/translations/zh_CN/admin-guide/lockup-watchdogs.rst66
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst29
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/lru_sort.rst263
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst228
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst124
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst345
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/index.rst49
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/ksm.rst198
-rw-r--r--Documentation/translations/zh_CN/admin-guide/numastat.rst48
-rw-r--r--Documentation/translations/zh_CN/admin-guide/reporting-issues.rst1368
-rw-r--r--Documentation/translations/zh_CN/admin-guide/reporting-regressions.rst370
-rw-r--r--Documentation/translations/zh_CN/admin-guide/sysrq.rst280
-rw-r--r--Documentation/translations/zh_CN/admin-guide/tainted-kernels.rst157
-rw-r--r--Documentation/translations/zh_CN/admin-guide/unicode.rst170
-rw-r--r--Documentation/translations/zh_CN/arch/arm/Booting175
-rw-r--r--Documentation/translations/zh_CN/arch/arm/kernel_user_helpers.txt284
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/amu.rst100
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/booting.txt246
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/elf_hwcaps.rst240
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/hugetlbpage.rst45
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/index.rst19
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/legacy_instructions.txt72
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/memory.txt114
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/perf.rst86
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/silicon-errata.txt74
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/tagged-pointers.txt52
-rw-r--r--Documentation/translations/zh_CN/arch/index.rst28
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/booting.rst48
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/features.rst8
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/index.rst27
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/introduction.rst353
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst244
-rw-r--r--Documentation/translations/zh_CN/arch/mips/booting.rst34
-rw-r--r--Documentation/translations/zh_CN/arch/mips/features.rst13
-rw-r--r--Documentation/translations/zh_CN/arch/mips/index.rst29
-rw-r--r--Documentation/translations/zh_CN/arch/mips/ingenic-tcu.rst72
-rw-r--r--Documentation/translations/zh_CN/arch/openrisc/index.rst32
-rw-r--r--Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst127
-rw-r--r--Documentation/translations/zh_CN/arch/openrisc/todo.rst23
-rw-r--r--Documentation/translations/zh_CN/arch/parisc/debugging.rst45
-rw-r--r--Documentation/translations/zh_CN/arch/parisc/index.rst31
-rw-r--r--Documentation/translations/zh_CN/arch/parisc/registers.rst156
-rw-r--r--Documentation/translations/zh_CN/arch/riscv/boot-image-header.rst69
-rw-r--r--Documentation/translations/zh_CN/arch/riscv/boot.rst155
-rw-r--r--Documentation/translations/zh_CN/arch/riscv/index.rst31
-rw-r--r--Documentation/translations/zh_CN/arch/riscv/patch-acceptance.rst33
-rw-r--r--Documentation/translations/zh_CN/arch/riscv/vm-layout.rst104
-rw-r--r--Documentation/translations/zh_CN/arm/Booting175
-rw-r--r--Documentation/translations/zh_CN/arm/kernel_user_helpers.txt284
-rw-r--r--Documentation/translations/zh_CN/arm64/booting.txt246
-rw-r--r--Documentation/translations/zh_CN/arm64/legacy_instructions.txt72
-rw-r--r--Documentation/translations/zh_CN/arm64/memory.txt114
-rw-r--r--Documentation/translations/zh_CN/arm64/silicon-errata.txt74
-rw-r--r--Documentation/translations/zh_CN/arm64/tagged-pointers.txt52
-rw-r--r--Documentation/translations/zh_CN/basic_profiling.txt71
-rw-r--r--Documentation/translations/zh_CN/coding-style.rst950
-rw-r--r--Documentation/translations/zh_CN/core-api/assoc_array.rst473
-rw-r--r--Documentation/translations/zh_CN/core-api/boot-time-mm.rst49
-rw-r--r--Documentation/translations/zh_CN/core-api/cachetlb.rst333
-rw-r--r--Documentation/translations/zh_CN/core-api/circular-buffers.rst210
-rw-r--r--Documentation/translations/zh_CN/core-api/cpu_hotplug.rst661
-rw-r--r--Documentation/translations/zh_CN/core-api/errseq.rst145
-rw-r--r--Documentation/translations/zh_CN/core-api/genalloc.rst109
-rw-r--r--Documentation/translations/zh_CN/core-api/generic-radix-tree.rst23
-rw-r--r--Documentation/translations/zh_CN/core-api/genericirq.rst409
-rw-r--r--Documentation/translations/zh_CN/core-api/gfp_mask-from-fs-io.rst66
-rw-r--r--Documentation/translations/zh_CN/core-api/idr.rst80
-rw-r--r--Documentation/translations/zh_CN/core-api/index.rst148
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/concepts.rst26
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/index.rst22
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/irq-affinity.rst78
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/irq-domain.rst237
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/irqflags-tracing.rst47
-rw-r--r--Documentation/translations/zh_CN/core-api/kernel-api.rst378
-rw-r--r--Documentation/translations/zh_CN/core-api/kobject.rst379
-rw-r--r--Documentation/translations/zh_CN/core-api/kref.rst311
-rw-r--r--Documentation/translations/zh_CN/core-api/local_ops.rst196
-rw-r--r--Documentation/translations/zh_CN/core-api/memory-allocation.rst138
-rw-r--r--Documentation/translations/zh_CN/core-api/memory-hotplug.rst119
-rw-r--r--Documentation/translations/zh_CN/core-api/mm-api.rst131
-rw-r--r--Documentation/translations/zh_CN/core-api/packing.rst160
-rw-r--r--Documentation/translations/zh_CN/core-api/padata.rst161
-rw-r--r--Documentation/translations/zh_CN/core-api/printk-basics.rst111
-rw-r--r--Documentation/translations/zh_CN/core-api/printk-formats.rst597
-rw-r--r--Documentation/translations/zh_CN/core-api/protection-keys.rst99
-rw-r--r--Documentation/translations/zh_CN/core-api/rbtree.rst391
-rw-r--r--Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst156
-rw-r--r--Documentation/translations/zh_CN/core-api/symbol-namespaces.rst133
-rw-r--r--Documentation/translations/zh_CN/core-api/this_cpu_ops.rst285
-rw-r--r--Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst229
-rw-r--r--Documentation/translations/zh_CN/core-api/union_find.rst92
-rw-r--r--Documentation/translations/zh_CN/core-api/watch_queue.rst313
-rw-r--r--Documentation/translations/zh_CN/core-api/workqueue.rst696
-rw-r--r--Documentation/translations/zh_CN/core-api/xarray.rst373
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/core.rst109
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst256
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/cpufreq-stats.rst133
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/index.rst47
-rw-r--r--Documentation/translations/zh_CN/dev-tools/gcov.rst264
-rw-r--r--Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst167
-rw-r--r--Documentation/translations/zh_CN/dev-tools/index.rst41
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kasan.rst476
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kcov.rst359
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kcsan.rst320
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kmemleak.rst229
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kmsan.rst392
-rw-r--r--Documentation/translations/zh_CN/dev-tools/sparse.rst110
-rw-r--r--Documentation/translations/zh_CN/dev-tools/testing-overview.rst163
-rw-r--r--Documentation/translations/zh_CN/dev-tools/ubsan.rst88
-rw-r--r--Documentation/translations/zh_CN/devicetree/changesets.rst37
-rw-r--r--Documentation/translations/zh_CN/devicetree/dynamic-resolution-notes.rst31
-rw-r--r--Documentation/translations/zh_CN/devicetree/index.rst45
-rw-r--r--Documentation/translations/zh_CN/devicetree/kernel-api.rst58
-rw-r--r--Documentation/translations/zh_CN/devicetree/of_unittest.rst189
-rw-r--r--Documentation/translations/zh_CN/devicetree/overlay-notes.rst140
-rw-r--r--Documentation/translations/zh_CN/devicetree/usage-model.rst330
-rw-r--r--Documentation/translations/zh_CN/disclaimer-zh_CN.rst7
-rw-r--r--Documentation/translations/zh_CN/doc-guide/checktransupdate.rst55
-rw-r--r--Documentation/translations/zh_CN/doc-guide/contributing.rst238
-rw-r--r--Documentation/translations/zh_CN/doc-guide/index.rst28
-rw-r--r--Documentation/translations/zh_CN/doc-guide/kernel-doc.rst499
-rw-r--r--Documentation/translations/zh_CN/doc-guide/maintainer-profile.rst43
-rw-r--r--Documentation/translations/zh_CN/doc-guide/parse-headers.rst187
-rw-r--r--Documentation/translations/zh_CN/doc-guide/sphinx.rst412
-rw-r--r--Documentation/translations/zh_CN/driver-api/gpio/index.rst66
-rw-r--r--Documentation/translations/zh_CN/driver-api/index.rst129
-rw-r--r--Documentation/translations/zh_CN/driver-api/io_ordering.rst60
-rw-r--r--Documentation/translations/zh_CN/driver-api/phy/index.rst20
-rw-r--r--Documentation/translations/zh_CN/driver-api/phy/phy.rst212
-rw-r--r--Documentation/translations/zh_CN/email-clients.txt210
-rw-r--r--Documentation/translations/zh_CN/filesystems/debugfs.rst221
-rw-r--r--Documentation/translations/zh_CN/filesystems/index.rst29
-rw-r--r--Documentation/translations/zh_CN/filesystems/sysfs.txt23
-rw-r--r--Documentation/translations/zh_CN/filesystems/tmpfs.rst146
-rw-r--r--Documentation/translations/zh_CN/filesystems/virtiofs.rst58
-rw-r--r--Documentation/translations/zh_CN/glossary.rst37
-rw-r--r--Documentation/translations/zh_CN/gpio.txt650
-rw-r--r--Documentation/translations/zh_CN/how-to.rst473
-rw-r--r--Documentation/translations/zh_CN/iio/ep93xx_adc.rst48
-rw-r--r--Documentation/translations/zh_CN/iio/iio_configfs.rst106
-rw-r--r--Documentation/translations/zh_CN/iio/index.rst22
-rw-r--r--Documentation/translations/zh_CN/index.rst149
-rw-r--r--Documentation/translations/zh_CN/infiniband/core_locking.rst115
-rw-r--r--Documentation/translations/zh_CN/infiniband/index.rst40
-rw-r--r--Documentation/translations/zh_CN/infiniband/ipoib.rst111
-rw-r--r--Documentation/translations/zh_CN/infiniband/opa_vnic.rst156
-rw-r--r--Documentation/translations/zh_CN/infiniband/sysfs.rst21
-rw-r--r--Documentation/translations/zh_CN/infiniband/tag_matching.rst63
-rw-r--r--Documentation/translations/zh_CN/infiniband/user_mad.rst164
-rw-r--r--Documentation/translations/zh_CN/infiniband/user_verbs.rst72
-rw-r--r--Documentation/translations/zh_CN/io_ordering.txt67
-rw-r--r--Documentation/translations/zh_CN/kbuild/gcc-plugins.rst126
-rw-r--r--Documentation/translations/zh_CN/kbuild/headers_install.rst39
-rw-r--r--Documentation/translations/zh_CN/kbuild/index.rst36
-rw-r--r--Documentation/translations/zh_CN/kbuild/kbuild.rst304
-rw-r--r--Documentation/translations/zh_CN/kbuild/kconfig.rst259
-rw-r--r--Documentation/translations/zh_CN/kbuild/llvm.rst203
-rw-r--r--Documentation/translations/zh_CN/kbuild/reproducible-builds.rst114
-rw-r--r--Documentation/translations/zh_CN/kernel-hacking/hacking.rst707
-rw-r--r--Documentation/translations/zh_CN/kernel-hacking/index.rst22
-rw-r--r--Documentation/translations/zh_CN/locking/index.rst43
-rw-r--r--Documentation/translations/zh_CN/locking/mutex-design.rst145
-rw-r--r--Documentation/translations/zh_CN/locking/spinlocks.rst149
-rw-r--r--Documentation/translations/zh_CN/magic-number.txt153
-rw-r--r--Documentation/translations/zh_CN/maintainer/configure-git.rst62
-rw-r--r--Documentation/translations/zh_CN/maintainer/index.rst21
-rw-r--r--Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst92
-rw-r--r--Documentation/translations/zh_CN/maintainer/modifying-patches.rst51
-rw-r--r--Documentation/translations/zh_CN/maintainer/pull-requests.rst148
-rw-r--r--Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst165
-rw-r--r--Documentation/translations/zh_CN/mm/active_mm.rst90
-rw-r--r--Documentation/translations/zh_CN/mm/balance.rst81
-rw-r--r--Documentation/translations/zh_CN/mm/damon/api.rst32
-rw-r--r--Documentation/translations/zh_CN/mm/damon/design.rst140
-rw-r--r--Documentation/translations/zh_CN/mm/damon/faq.rst31
-rw-r--r--Documentation/translations/zh_CN/mm/damon/index.rst32
-rw-r--r--Documentation/translations/zh_CN/mm/free_page_reporting.rst38
-rw-r--r--Documentation/translations/zh_CN/mm/highmem.rst151
-rw-r--r--Documentation/translations/zh_CN/mm/hmm.rst355
-rw-r--r--Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst437
-rw-r--r--Documentation/translations/zh_CN/mm/hwpoison.rst166
-rw-r--r--Documentation/translations/zh_CN/mm/index.rst69
-rw-r--r--Documentation/translations/zh_CN/mm/ksm.rst70
-rw-r--r--Documentation/translations/zh_CN/mm/memory-model.rst135
-rw-r--r--Documentation/translations/zh_CN/mm/mmu_notifier.rst97
-rw-r--r--Documentation/translations/zh_CN/mm/numa.rst101
-rw-r--r--Documentation/translations/zh_CN/mm/overcommit-accounting.rst85
-rw-r--r--Documentation/translations/zh_CN/mm/page_frags.rst38
-rw-r--r--Documentation/translations/zh_CN/mm/page_migration.rst228
-rw-r--r--Documentation/translations/zh_CN/mm/page_owner.rst216
-rw-r--r--Documentation/translations/zh_CN/mm/page_table_check.rst69
-rw-r--r--Documentation/translations/zh_CN/mm/page_tables.rst221
-rw-r--r--Documentation/translations/zh_CN/mm/physical_memory.rst356
-rw-r--r--Documentation/translations/zh_CN/mm/remap_file_pages.rst32
-rw-r--r--Documentation/translations/zh_CN/mm/split_page_table_lock.rst96
-rw-r--r--Documentation/translations/zh_CN/mm/vmalloced-kernel-stacks.rst133
-rw-r--r--Documentation/translations/zh_CN/mm/zsmalloc.rst78
-rw-r--r--Documentation/translations/zh_CN/networking/alias.rst56
-rw-r--r--Documentation/translations/zh_CN/networking/index.rst160
-rw-r--r--Documentation/translations/zh_CN/networking/msg_zerocopy.rst223
-rw-r--r--Documentation/translations/zh_CN/networking/napi.rst362
-rw-r--r--Documentation/translations/zh_CN/networking/netif-msg.rst92
-rw-r--r--Documentation/translations/zh_CN/networking/netmem.rst92
-rw-r--r--Documentation/translations/zh_CN/networking/vxlan.rst85
-rw-r--r--Documentation/translations/zh_CN/networking/xfrm_proc.rst126
-rw-r--r--Documentation/translations/zh_CN/oops-tracing.txt212
-rw-r--r--Documentation/translations/zh_CN/peci/index.rst26
-rw-r--r--Documentation/translations/zh_CN/peci/peci.rst54
-rw-r--r--Documentation/translations/zh_CN/power/energy-model.rst210
-rw-r--r--Documentation/translations/zh_CN/power/index.rst56
-rw-r--r--Documentation/translations/zh_CN/power/opp.rst341
-rw-r--r--Documentation/translations/zh_CN/process/1.Intro.rst195
-rw-r--r--Documentation/translations/zh_CN/process/2.Process.rst364
-rw-r--r--Documentation/translations/zh_CN/process/3.Early-stage.rst168
-rw-r--r--Documentation/translations/zh_CN/process/4.Coding.rst293
-rw-r--r--Documentation/translations/zh_CN/process/5.Posting.rst253
-rw-r--r--Documentation/translations/zh_CN/process/6.Followthrough.rst157
-rw-r--r--Documentation/translations/zh_CN/process/7.AdvancedTopics.rst147
-rw-r--r--Documentation/translations/zh_CN/process/8.Conclusion.rst69
-rw-r--r--Documentation/translations/zh_CN/process/code-of-conduct-interpretation.rst108
-rw-r--r--Documentation/translations/zh_CN/process/code-of-conduct.rst72
-rw-r--r--Documentation/translations/zh_CN/process/coding-style.rst1072
-rw-r--r--Documentation/translations/zh_CN/process/cve.rst89
-rw-r--r--Documentation/translations/zh_CN/process/development-process.rst25
-rw-r--r--Documentation/translations/zh_CN/process/email-clients.rst334
-rw-r--r--Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst228
-rw-r--r--Documentation/translations/zh_CN/process/howto.rst495
-rw-r--r--Documentation/translations/zh_CN/process/index.rst102
-rw-r--r--Documentation/translations/zh_CN/process/kernel-driver-statement.rst199
-rw-r--r--Documentation/translations/zh_CN/process/kernel-enforcement-statement.rst151
-rw-r--r--Documentation/translations/zh_CN/process/license-rules.rst370
-rw-r--r--Documentation/translations/zh_CN/process/magic-number.rst82
-rw-r--r--Documentation/translations/zh_CN/process/maintainer-pgp-guide.rst789
-rw-r--r--Documentation/translations/zh_CN/process/management-style.rst207
-rw-r--r--Documentation/translations/zh_CN/process/programming-language.rst53
-rw-r--r--Documentation/translations/zh_CN/process/researcher-guidelines.rst129
-rw-r--r--Documentation/translations/zh_CN/process/security-bugs.rst84
-rw-r--r--Documentation/translations/zh_CN/process/stable-api-nonsense.rst155
-rw-r--r--Documentation/translations/zh_CN/process/stable-kernel-rules.rst64
-rw-r--r--Documentation/translations/zh_CN/process/submit-checklist.rst110
-rw-r--r--Documentation/translations/zh_CN/process/submitting-patches.rst658
-rw-r--r--Documentation/translations/zh_CN/process/volatile-considered-harmful.rst106
-rw-r--r--Documentation/translations/zh_CN/rust/arch-support.rst27
-rw-r--r--Documentation/translations/zh_CN/rust/coding-guidelines.rst204
-rw-r--r--Documentation/translations/zh_CN/rust/general-information.rst75
-rw-r--r--Documentation/translations/zh_CN/rust/index.rst28
-rw-r--r--Documentation/translations/zh_CN/rust/quick-start.rst229
-rw-r--r--Documentation/translations/zh_CN/scheduler/completion.rst256
-rw-r--r--Documentation/translations/zh_CN/scheduler/index.rst45
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-arch.rst71
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-bwc.rst204
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-capacity.rst390
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-debug.rst51
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst205
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-domains.rst72
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-energy.rst351
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-nice-design.rst99
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-stats.rst156
-rw-r--r--Documentation/translations/zh_CN/scheduler/schedutil.rst164
-rw-r--r--Documentation/translations/zh_CN/security/IMA-templates.rst97
-rw-r--r--Documentation/translations/zh_CN/security/credentials.rst479
-rw-r--r--Documentation/translations/zh_CN/security/digsig.rst103
-rw-r--r--Documentation/translations/zh_CN/security/index.rst34
-rw-r--r--Documentation/translations/zh_CN/security/keys/index.rst22
-rw-r--r--Documentation/translations/zh_CN/security/landlock.rst123
-rw-r--r--Documentation/translations/zh_CN/security/lsm.rst95
-rw-r--r--Documentation/translations/zh_CN/security/sak.rst86
-rw-r--r--Documentation/translations/zh_CN/security/secrets/index.rst17
-rw-r--r--Documentation/translations/zh_CN/security/self-protection.rst271
-rw-r--r--Documentation/translations/zh_CN/security/siphash.rst195
-rw-r--r--Documentation/translations/zh_CN/security/snp-tdx-threat-model.rst209
-rw-r--r--Documentation/translations/zh_CN/security/tpm/index.rst20
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm-security.rst151
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_event_log.rst49
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_ftpm_tee.rst31
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_tis.rst43
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_vtpm_proxy.rst51
-rw-r--r--Documentation/translations/zh_CN/security/tpm/xen-tpmfront.rst114
-rw-r--r--Documentation/translations/zh_CN/sound/hd-audio/controls.rst102
-rw-r--r--Documentation/translations/zh_CN/sound/hd-audio/index.rst14
-rw-r--r--Documentation/translations/zh_CN/sound/index.rst22
-rw-r--r--Documentation/translations/zh_CN/sparse.txt95
-rw-r--r--Documentation/translations/zh_CN/stable_api_nonsense.txt157
-rw-r--r--Documentation/translations/zh_CN/stable_kernel_rules.txt66
-rw-r--r--Documentation/translations/zh_CN/staging/index.rst26
-rw-r--r--Documentation/translations/zh_CN/staging/speculation.rst85
-rw-r--r--Documentation/translations/zh_CN/staging/xz.rst100
-rw-r--r--Documentation/translations/zh_CN/subsystem-apis.rst110
-rw-r--r--Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst168
-rw-r--r--Documentation/translations/zh_CN/userspace-api/ebpf/index.rst22
-rw-r--r--Documentation/translations/zh_CN/userspace-api/ebpf/syscall.rst29
-rw-r--r--Documentation/translations/zh_CN/userspace-api/futex2.rst80
-rw-r--r--Documentation/translations/zh_CN/userspace-api/index.rst47
-rw-r--r--Documentation/translations/zh_CN/userspace-api/no_new_privs.rst57
-rw-r--r--Documentation/translations/zh_CN/userspace-api/seccomp_filter.rst293
-rw-r--r--Documentation/translations/zh_CN/userspace-api/sysfs-platform_profile.rst40
-rw-r--r--Documentation/translations/zh_CN/video4linux/omap3isp.txt4
-rw-r--r--Documentation/translations/zh_CN/video4linux/v4l2-framework.txt38
-rw-r--r--Documentation/translations/zh_CN/virt/acrn/cpuid.rst56
-rw-r--r--Documentation/translations/zh_CN/virt/acrn/index.rst25
-rw-r--r--Documentation/translations/zh_CN/virt/acrn/introduction.rst52
-rw-r--r--Documentation/translations/zh_CN/virt/acrn/io-request.rst99
-rw-r--r--Documentation/translations/zh_CN/virt/guest-halt-polling.rst87
-rw-r--r--Documentation/translations/zh_CN/virt/index.rst38
-rw-r--r--Documentation/translations/zh_CN/virt/ne_overview.rst88
-rw-r--r--Documentation/translations/zh_CN/virt/paravirt_ops.rst41
-rw-r--r--Documentation/translations/zh_CN/volatile-considered-harmful.txt113
-rw-r--r--Documentation/translations/zh_TW/IRQ.txt41
-rw-r--r--Documentation/translations/zh_TW/admin-guide/README.rst295
-rw-r--r--Documentation/translations/zh_TW/admin-guide/bootconfig.rst294
-rw-r--r--Documentation/translations/zh_TW/admin-guide/bug-bisect.rst85
-rw-r--r--Documentation/translations/zh_TW/admin-guide/bug-hunting.rst334
-rw-r--r--Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst16
-rw-r--r--Documentation/translations/zh_TW/admin-guide/cpu-load.rst112
-rw-r--r--Documentation/translations/zh_TW/admin-guide/cputopology.rst97
-rw-r--r--Documentation/translations/zh_TW/admin-guide/index.rst136
-rw-r--r--Documentation/translations/zh_TW/admin-guide/init.rst58
-rw-r--r--Documentation/translations/zh_TW/admin-guide/lockup-watchdogs.rst67
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/index.rst30
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/lru_sort.rst264
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/reclaim.rst229
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst125
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst346
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/index.rst50
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/ksm.rst199
-rw-r--r--Documentation/translations/zh_TW/admin-guide/reporting-issues.rst1370
-rw-r--r--Documentation/translations/zh_TW/admin-guide/reporting-regressions.rst371
-rw-r--r--Documentation/translations/zh_TW/admin-guide/security-bugs.rst78
-rw-r--r--Documentation/translations/zh_TW/admin-guide/sysrq.rst281
-rw-r--r--Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst161
-rw-r--r--Documentation/translations/zh_TW/admin-guide/unicode.rst174
-rw-r--r--Documentation/translations/zh_TW/arch/arm/Booting176
-rw-r--r--Documentation/translations/zh_TW/arch/arm/kernel_user_helpers.txt285
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/amu.rst104
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/booting.txt251
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst244
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/hugetlbpage.rst49
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/index.rst23
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt77
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/memory.txt119
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/perf.rst88
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt79
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt57
-rw-r--r--Documentation/translations/zh_TW/arch/index.rst29
-rw-r--r--Documentation/translations/zh_TW/arch/loongarch/booting.rst49
-rw-r--r--Documentation/translations/zh_TW/arch/loongarch/features.rst9
-rw-r--r--Documentation/translations/zh_TW/arch/loongarch/index.rst28
-rw-r--r--Documentation/translations/zh_TW/arch/loongarch/introduction.rst354
-rw-r--r--Documentation/translations/zh_TW/arch/loongarch/irq-chip-model.rst158
-rw-r--r--Documentation/translations/zh_TW/arch/mips/booting.rst35
-rw-r--r--Documentation/translations/zh_TW/arch/mips/features.rst14
-rw-r--r--Documentation/translations/zh_TW/arch/mips/index.rst30
-rw-r--r--Documentation/translations/zh_TW/arch/mips/ingenic-tcu.rst73
-rw-r--r--Documentation/translations/zh_TW/arch/openrisc/index.rst33
-rw-r--r--Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst128
-rw-r--r--Documentation/translations/zh_TW/arch/openrisc/todo.rst24
-rw-r--r--Documentation/translations/zh_TW/arch/parisc/debugging.rst46
-rw-r--r--Documentation/translations/zh_TW/arch/parisc/index.rst32
-rw-r--r--Documentation/translations/zh_TW/arch/parisc/registers.rst157
-rw-r--r--Documentation/translations/zh_TW/cpu-freq/core.rst110
-rw-r--r--Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst257
-rw-r--r--Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst134
-rw-r--r--Documentation/translations/zh_TW/cpu-freq/index.rst48
-rw-r--r--Documentation/translations/zh_TW/dev-tools/gcov.rst265
-rw-r--r--Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst164
-rw-r--r--Documentation/translations/zh_TW/dev-tools/index.rst43
-rw-r--r--Documentation/translations/zh_TW/dev-tools/kasan.rst460
-rw-r--r--Documentation/translations/zh_TW/dev-tools/sparse.rst91
-rw-r--r--Documentation/translations/zh_TW/dev-tools/testing-overview.rst162
-rw-r--r--Documentation/translations/zh_TW/disclaimer-zh_TW.rst11
-rw-r--r--Documentation/translations/zh_TW/filesystems/debugfs.rst223
-rw-r--r--Documentation/translations/zh_TW/filesystems/index.rst31
-rw-r--r--Documentation/translations/zh_TW/filesystems/sysfs.txt377
-rw-r--r--Documentation/translations/zh_TW/filesystems/tmpfs.rst147
-rw-r--r--Documentation/translations/zh_TW/filesystems/virtiofs.rst60
-rw-r--r--Documentation/translations/zh_TW/index.rst134
-rw-r--r--Documentation/translations/zh_TW/io_ordering.txt68
-rw-r--r--Documentation/translations/zh_TW/process/1.Intro.rst199
-rw-r--r--Documentation/translations/zh_TW/process/2.Process.rst369
-rw-r--r--Documentation/translations/zh_TW/process/3.Early-stage.rst172
-rw-r--r--Documentation/translations/zh_TW/process/4.Coding.rst297
-rw-r--r--Documentation/translations/zh_TW/process/5.Posting.rst246
-rw-r--r--Documentation/translations/zh_TW/process/6.Followthrough.rst156
-rw-r--r--Documentation/translations/zh_TW/process/7.AdvancedTopics.rst137
-rw-r--r--Documentation/translations/zh_TW/process/8.Conclusion.rst73
-rw-r--r--Documentation/translations/zh_TW/process/code-of-conduct-interpretation.rst112
-rw-r--r--Documentation/translations/zh_TW/process/code-of-conduct.rst76
-rw-r--r--Documentation/translations/zh_TW/process/coding-style.rst1087
-rw-r--r--Documentation/translations/zh_TW/process/development-process.rst30
-rw-r--r--Documentation/translations/zh_TW/process/email-clients.rst329
-rw-r--r--Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst232
-rw-r--r--Documentation/translations/zh_TW/process/howto.rst499
-rw-r--r--Documentation/translations/zh_TW/process/index.rst67
-rw-r--r--Documentation/translations/zh_TW/process/kernel-driver-statement.rst203
-rw-r--r--Documentation/translations/zh_TW/process/kernel-enforcement-statement.rst155
-rw-r--r--Documentation/translations/zh_TW/process/license-rules.rst372
-rw-r--r--Documentation/translations/zh_TW/process/magic-number.rst76
-rw-r--r--Documentation/translations/zh_TW/process/management-style.rst211
-rw-r--r--Documentation/translations/zh_TW/process/programming-language.rst75
-rw-r--r--Documentation/translations/zh_TW/process/stable-api-nonsense.rst159
-rw-r--r--Documentation/translations/zh_TW/process/stable-kernel-rules.rst68
-rw-r--r--Documentation/translations/zh_TW/process/submit-checklist.rst114
-rw-r--r--Documentation/translations/zh_TW/process/submitting-patches.rst659
-rw-r--r--Documentation/translations/zh_TW/process/volatile-considered-harmful.rst110
-rw-r--r--Documentation/unaligned-memory-access.txt265
-rw-r--r--Documentation/usb/CREDITS8
-rw-r--r--Documentation/usb/WUSB-Design-overview.txt439
-rw-r--r--Documentation/usb/acm.rst132
-rw-r--r--Documentation/usb/acm.txt128
-rw-r--r--Documentation/usb/authorization.rst129
-rw-r--r--Documentation/usb/authorization.txt123
-rw-r--r--Documentation/usb/chipidea.rst134
-rw-r--r--Documentation/usb/chipidea.txt96
-rw-r--r--Documentation/usb/dwc3.rst53
-rw-r--r--Documentation/usb/dwc3.txt45
-rw-r--r--Documentation/usb/ehci.rst230
-rw-r--r--Documentation/usb/ehci.txt214
-rw-r--r--Documentation/usb/functionfs-desc.rst39
-rw-r--r--Documentation/usb/functionfs.rst106
-rw-r--r--Documentation/usb/functionfs.txt67
-rw-r--r--Documentation/usb/gadget-testing.rst1152
-rw-r--r--Documentation/usb/gadget-testing.txt819
-rw-r--r--Documentation/usb/gadget_configfs.rst399
-rw-r--r--Documentation/usb/gadget_configfs.txt384
-rw-r--r--Documentation/usb/gadget_hid.rst457
-rw-r--r--Documentation/usb/gadget_hid.txt452
-rw-r--r--Documentation/usb/gadget_multi.rst165
-rw-r--r--Documentation/usb/gadget_multi.txt150
-rw-r--r--Documentation/usb/gadget_printer.rst523
-rw-r--r--Documentation/usb/gadget_printer.txt510
-rw-r--r--Documentation/usb/gadget_serial.rst289
-rw-r--r--Documentation/usb/gadget_serial.txt286
-rw-r--r--Documentation/usb/gadget_uvc.rst380
-rw-r--r--Documentation/usb/index.rst40
-rw-r--r--Documentation/usb/iuu_phoenix.rst94
-rw-r--r--Documentation/usb/iuu_phoenix.txt84
-rw-r--r--Documentation/usb/linux.inf2
-rw-r--r--Documentation/usb/mass-storage.rst243
-rw-r--r--Documentation/usb/mass-storage.txt225
-rw-r--r--Documentation/usb/misc_usbsevseg.rst51
-rw-r--r--Documentation/usb/misc_usbsevseg.txt46
-rw-r--r--Documentation/usb/mtouchusb.rst84
-rw-r--r--Documentation/usb/mtouchusb.txt72
-rw-r--r--Documentation/usb/ohci.rst35
-rw-r--r--Documentation/usb/ohci.txt32
-rw-r--r--Documentation/usb/raw-gadget.rst91
-rw-r--r--Documentation/usb/rio.txt138
-rw-r--r--Documentation/usb/text_files.rst23
-rw-r--r--Documentation/usb/usb-help.rst17
-rw-r--r--Documentation/usb/usb-help.txt16
-rw-r--r--Documentation/usb/usb-serial.rst537
-rw-r--r--Documentation/usb/usb-serial.txt486
-rw-r--r--Documentation/usb/usbip_protocol.rst452
-rw-r--r--Documentation/usb/usbip_protocol.txt357
-rw-r--r--Documentation/usb/usbmon.rst375
-rw-r--r--Documentation/usb/usbmon.txt355
-rw-r--r--Documentation/usb/wusb-cbaf130
-rw-r--r--Documentation/userspace-api/ELF.rst34
-rw-r--r--Documentation/userspace-api/accelerators/ocxl.rst179
-rw-r--r--Documentation/userspace-api/check_exec.rst144
-rw-r--r--Documentation/userspace-api/conf.py10
-rw-r--r--Documentation/userspace-api/dcdbas.rst (renamed from Documentation/dcdbas.txt)0
-rw-r--r--Documentation/userspace-api/dma-buf-alloc-exchange.rst389
-rw-r--r--Documentation/userspace-api/dma-buf-heaps.rst28
-rw-r--r--Documentation/userspace-api/ebpf/index.rst17
-rw-r--r--Documentation/userspace-api/ebpf/syscall.rst24
-rw-r--r--Documentation/userspace-api/futex2.rst86
-rw-r--r--Documentation/userspace-api/fwctl/fwctl-cxl.rst142
-rw-r--r--Documentation/userspace-api/fwctl/fwctl.rst286
-rw-r--r--Documentation/userspace-api/fwctl/index.rst14
-rw-r--r--Documentation/userspace-api/fwctl/pds_fwctl.rst46
-rw-r--r--Documentation/userspace-api/gpio/chardev.rst116
-rw-r--r--Documentation/userspace-api/gpio/chardev_v1.rst131
-rw-r--r--Documentation/userspace-api/gpio/error-codes.rst79
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-chipinfo-ioctl.rst41
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineevent-ioctl.rst84
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-linehandle-ioctl.rst125
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-ioctl.rst54
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-unwatch-ioctl.rst49
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-watch-ioctl.rst74
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-get-line-values-ioctl.rst63
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-set-config-ioctl.rst66
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-set-line-values-ioctl.rst55
-rw-r--r--Documentation/userspace-api/gpio/gpio-lineevent-data-read.rst89
-rw-r--r--Documentation/userspace-api/gpio/gpio-lineinfo-changed-read.rst87
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-line-ioctl.rst152
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-lineinfo-ioctl.rst50
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-lineinfo-watch-ioctl.rst67
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-event-read.rst88
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-get-values-ioctl.rst58
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-set-config-ioctl.rst61
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-set-values-ioctl.rst54
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-lineinfo-changed-read.rst81
-rw-r--r--Documentation/userspace-api/gpio/index.rst18
-rw-r--r--Documentation/userspace-api/gpio/obsolete.rst11
-rw-r--r--Documentation/userspace-api/gpio/sysfs.rst171
-rw-r--r--Documentation/userspace-api/index.rst55
-rw-r--r--Documentation/userspace-api/ioctl/cdrom.rst1242
-rw-r--r--Documentation/userspace-api/ioctl/hdio.rst547
-rw-r--r--Documentation/userspace-api/ioctl/index.rst15
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-decoding.rst31
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst414
-rw-r--r--Documentation/userspace-api/iommufd.rst384
-rw-r--r--Documentation/userspace-api/isapnp.rst15
-rw-r--r--Documentation/userspace-api/landlock.rst710
-rw-r--r--Documentation/userspace-api/lsm.rst73
-rw-r--r--Documentation/userspace-api/media/cec/cec-api.rst (renamed from Documentation/media/uapi/cec/cec-api.rst)12
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-close.rst43
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-ioctl.rst (renamed from Documentation/media/uapi/cec/cec-func-ioctl.rst)13
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-open.rst (renamed from Documentation/media/uapi/cec/cec-func-open.rst)16
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-poll.rst (renamed from Documentation/media/uapi/cec/cec-func-poll.rst)16
-rw-r--r--Documentation/userspace-api/media/cec/cec-funcs.rst (renamed from Documentation/media/uapi/cec/cec-funcs.rst)3
-rw-r--r--Documentation/userspace-api/media/cec/cec-header.rst11
-rw-r--r--Documentation/userspace-api/media/cec/cec-intro.rst (renamed from Documentation/media/uapi/cec/cec-intro.rst)2
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst (renamed from Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst)33
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-conn-info.rst106
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-log-addrs.rst (renamed from Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst)34
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-phys-addr.rst (renamed from Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst)17
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-dqevent.rst (renamed from Documentation/media/uapi/cec/cec-ioc-dqevent.rst)55
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-g-mode.rst (renamed from Documentation/media/uapi/cec/cec-ioc-g-mode.rst)29
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-receive.rst (renamed from Documentation/media/uapi/cec/cec-ioc-receive.rst)105
-rw-r--r--Documentation/userspace-api/media/cec/cec-pin-error-inj.rst (renamed from Documentation/media/uapi/cec/cec-pin-error-inj.rst)48
-rw-r--r--Documentation/userspace-api/media/cec/cec.h.rst.exceptions (renamed from Documentation/media/cec.h.rst.exceptions)95
-rw-r--r--Documentation/userspace-api/media/conf_nitpick.py (renamed from Documentation/media/conf_nitpick.py)4
-rw-r--r--Documentation/userspace-api/media/drivers/aspeed-video.rst65
-rw-r--r--Documentation/userspace-api/media/drivers/camera-sensor.rst106
-rw-r--r--Documentation/userspace-api/media/drivers/ccs.rst114
-rw-r--r--Documentation/userspace-api/media/drivers/cx2341x-uapi.rst177
-rw-r--r--Documentation/userspace-api/media/drivers/dw100.rst84
-rw-r--r--Documentation/userspace-api/media/drivers/imx-uapi.rst125
-rw-r--r--Documentation/userspace-api/media/drivers/index.rst40
-rw-r--r--Documentation/userspace-api/media/drivers/max2175.rst (renamed from Documentation/media/v4l-drivers/max2175.rst)4
-rw-r--r--Documentation/userspace-api/media/drivers/npcm-video.rst66
-rw-r--r--Documentation/userspace-api/media/drivers/omap3isp-uapi.rst208
-rw-r--r--Documentation/userspace-api/media/drivers/thp7312.rst39
-rw-r--r--Documentation/userspace-api/media/drivers/uvcvideo.rst (renamed from Documentation/media/v4l-drivers/uvcvideo.rst)68
-rw-r--r--Documentation/userspace-api/media/drivers/vgxy61.rst25
-rw-r--r--Documentation/userspace-api/media/dvb/ca-fclose.rst (renamed from Documentation/media/uapi/dvb/ca-fclose.rst)9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-fopen.rst (renamed from Documentation/media/uapi/dvb/ca-fopen.rst)9
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-cap.rst (renamed from Documentation/media/uapi/dvb/ca-get-cap.rst)10
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-descr-info.rst (renamed from Documentation/media/uapi/dvb/ca-get-descr-info.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-msg.rst (renamed from Documentation/media/uapi/dvb/ca-get-msg.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/ca-get-slot-info.rst (renamed from Documentation/media/uapi/dvb/ca-get-slot-info.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/ca-reset.rst (renamed from Documentation/media/uapi/dvb/ca-reset.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/ca-send-msg.rst (renamed from Documentation/media/uapi/dvb/ca-send-msg.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/ca-set-descr.rst46
-rw-r--r--Documentation/userspace-api/media/dvb/ca.h.rst.exceptions (renamed from Documentation/media/ca.h.rst.exceptions)2
-rw-r--r--Documentation/userspace-api/media/dvb/ca.rst (renamed from Documentation/media/uapi/dvb/ca.rst)5
-rw-r--r--Documentation/userspace-api/media/dvb/ca_data_types.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/ca_function_calls.rst (renamed from Documentation/media/uapi/dvb/ca_function_calls.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/ca_high_level.rst157
-rw-r--r--Documentation/userspace-api/media/dvb/demux.rst (renamed from Documentation/media/uapi/dvb/demux.rst)4
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-add-pid.rst47
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-expbuf.rst (renamed from Documentation/media/uapi/dvb/dmx-expbuf.rst)13
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fclose.rst (renamed from Documentation/media/uapi/dvb/dmx-fclose.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fopen.rst (renamed from Documentation/media/uapi/dvb/dmx-fopen.rst)9
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fread.rst (renamed from Documentation/media/uapi/dvb/dmx-fread.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-fwrite.rst (renamed from Documentation/media/uapi/dvb/dmx-fwrite.rst)8
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-get-pes-pids.rst (renamed from Documentation/media/uapi/dvb/dmx-get-pes-pids.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-get-stc.rst (renamed from Documentation/media/uapi/dvb/dmx-get-stc.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-mmap.rst (renamed from Documentation/media/uapi/dvb/dmx-mmap.rst)17
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-munmap.rst52
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-qbuf.rst (renamed from Documentation/media/uapi/dvb/dmx-qbuf.rst)23
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-querybuf.rst (renamed from Documentation/media/uapi/dvb/dmx-querybuf.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-remove-pid.rst (renamed from Documentation/media/uapi/dvb/dmx-remove-pid.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-reqbufs.rst (renamed from Documentation/media/uapi/dvb/dmx-reqbufs.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-buffer-size.rst48
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-filter.rst (renamed from Documentation/media/uapi/dvb/dmx-set-filter.rst)16
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-set-pes-filter.rst (renamed from Documentation/media/uapi/dvb/dmx-set-pes-filter.rst)15
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-start.rst (renamed from Documentation/media/uapi/dvb/dmx-start.rst)13
-rw-r--r--Documentation/userspace-api/media/dvb/dmx-stop.rst44
-rw-r--r--Documentation/userspace-api/media/dvb/dmx.h.rst.exceptions (renamed from Documentation/media/dmx.h.rst.exceptions)2
-rw-r--r--Documentation/userspace-api/media/dvb/dmx_fcalls.rst (renamed from Documentation/media/uapi/dvb/dmx_fcalls.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/dmx_types.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/dvb-fe-read-status.rst (renamed from Documentation/media/uapi/dvb/dvb-fe-read-status.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/dvb-frontend-event.rst (renamed from Documentation/media/uapi/dvb/dvb-frontend-event.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/dvb-frontend-parameters.rst (renamed from Documentation/media/uapi/dvb/dvb-frontend-parameters.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/dvbapi.rst (renamed from Documentation/media/uapi/dvb/dvbapi.rst)10
-rw-r--r--Documentation/userspace-api/media/dvb/dvbproperty.rst (renamed from Documentation/media/uapi/dvb/dvbproperty.rst)4
-rw-r--r--Documentation/userspace-api/media/dvb/dvbstb.svg (renamed from Documentation/media/uapi/dvb/dvbstb.svg)3
-rw-r--r--Documentation/userspace-api/media/dvb/examples.rst16
-rw-r--r--Documentation/userspace-api/media/dvb/fe-bandwidth-t.rst (renamed from Documentation/media/uapi/dvb/fe-bandwidth-t.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-recv-slave-reply.rst47
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-reset-overload.rst (renamed from Documentation/media/uapi/dvb/fe-diseqc-reset-overload.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst (renamed from Documentation/media/uapi/dvb/fe-diseqc-send-burst.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-send-master-cmd.rst48
-rw-r--r--Documentation/userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd.rst (renamed from Documentation/media/uapi/dvb/fe-dishnetwork-send-legacy-cmd.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/fe-enable-high-lnb-voltage.rst (renamed from Documentation/media/uapi/dvb/fe-enable-high-lnb-voltage.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-event.rst (renamed from Documentation/media/uapi/dvb/fe-get-event.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-frontend.rst (renamed from Documentation/media/uapi/dvb/fe-get-frontend.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-info.rst (renamed from Documentation/media/uapi/dvb/fe-get-info.rst)16
-rw-r--r--Documentation/userspace-api/media/dvb/fe-get-property.rst (renamed from Documentation/media/uapi/dvb/fe-get-property.rst)17
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-ber.rst (renamed from Documentation/media/uapi/dvb/fe-read-ber.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-signal-strength.rst (renamed from Documentation/media/uapi/dvb/fe-read-signal-strength.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-snr.rst (renamed from Documentation/media/uapi/dvb/fe-read-snr.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-status.rst (renamed from Documentation/media/uapi/dvb/fe-read-status.rst)13
-rw-r--r--Documentation/userspace-api/media/dvb/fe-read-uncorrected-blocks.rst (renamed from Documentation/media/uapi/dvb/fe-read-uncorrected-blocks.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-frontend-tune-mode.rst (renamed from Documentation/media/uapi/dvb/fe-set-frontend-tune-mode.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-frontend.rst (renamed from Documentation/media/uapi/dvb/fe-set-frontend.rst)13
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-tone.rst (renamed from Documentation/media/uapi/dvb/fe-set-tone.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-voltage.rst (renamed from Documentation/media/uapi/dvb/fe-set-voltage.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/fe-type-t.rst (renamed from Documentation/media/uapi/dvb/fe-type-t.rst)4
-rw-r--r--Documentation/userspace-api/media/dvb/fe_property_parameters.rst (renamed from Documentation/media/uapi/dvb/fe_property_parameters.rst)50
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-header.rst6
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-property-cable-systems.rst (renamed from Documentation/media/uapi/dvb/frontend-property-cable-systems.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-property-satellite-systems.rst (renamed from Documentation/media/uapi/dvb/frontend-property-satellite-systems.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-property-terrestrial-systems.rst (renamed from Documentation/media/uapi/dvb/frontend-property-terrestrial-systems.rst)4
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-stat-properties.rst (renamed from Documentation/media/uapi/dvb/frontend-stat-properties.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend.h.rst.exceptions (renamed from Documentation/media/frontend.h.rst.exceptions)34
-rw-r--r--Documentation/userspace-api/media/dvb/frontend.rst (renamed from Documentation/media/uapi/dvb/frontend.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_f_close.rst (renamed from Documentation/media/uapi/dvb/frontend_f_close.rst)10
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_f_open.rst (renamed from Documentation/media/uapi/dvb/frontend_f_open.rst)14
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_fcalls.rst (renamed from Documentation/media/uapi/dvb/frontend_fcalls.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_legacy_api.rst (renamed from Documentation/media/uapi/dvb/frontend_legacy_api.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_legacy_dvbv3_api.rst (renamed from Documentation/media/uapi/dvb/frontend_legacy_dvbv3_api.rst)2
-rw-r--r--Documentation/userspace-api/media/dvb/headers.rst25
-rw-r--r--Documentation/userspace-api/media/dvb/intro.rst183
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst26
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_audio.rst1642
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_decoder_api.rst61
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_osd.rst883
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_video.rst2430
-rw-r--r--Documentation/userspace-api/media/dvb/net-add-if.rst (renamed from Documentation/media/uapi/dvb/net-add-if.rst)11
-rw-r--r--Documentation/userspace-api/media/dvb/net-get-if.rst (renamed from Documentation/media/uapi/dvb/net-get-if.rst)12
-rw-r--r--Documentation/userspace-api/media/dvb/net-remove-if.rst46
-rw-r--r--Documentation/userspace-api/media/dvb/net-types.rst9
-rw-r--r--Documentation/userspace-api/media/dvb/net.h.rst.exceptions (renamed from Documentation/media/net.h.rst.exceptions)2
-rw-r--r--Documentation/userspace-api/media/dvb/net.rst41
-rw-r--r--Documentation/userspace-api/media/dvb/query-dvb-frontend-info.rst (renamed from Documentation/media/uapi/dvb/query-dvb-frontend-info.rst)2
-rw-r--r--Documentation/userspace-api/media/fdl-appendix.rst (renamed from Documentation/media/uapi/fdl-appendix.rst)68
-rw-r--r--Documentation/userspace-api/media/gen-errors.rst (renamed from Documentation/media/uapi/gen-errors.rst)6
-rw-r--r--Documentation/userspace-api/media/glossary.rst217
-rw-r--r--Documentation/userspace-api/media/index.rst67
-rw-r--r--Documentation/userspace-api/media/intro.rst46
-rw-r--r--Documentation/userspace-api/media/mediactl/media-controller-intro.rst (renamed from Documentation/media/uapi/mediactl/media-controller-intro.rst)2
-rw-r--r--Documentation/userspace-api/media/mediactl/media-controller-model.rst (renamed from Documentation/media/uapi/mediactl/media-controller-model.rst)8
-rw-r--r--Documentation/userspace-api/media/mediactl/media-controller.rst (renamed from Documentation/media/uapi/mediactl/media-controller.rst)10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-close.rst43
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-ioctl.rst (renamed from Documentation/media/uapi/mediactl/media-func-ioctl.rst)12
-rw-r--r--Documentation/userspace-api/media/mediactl/media-func-open.rst (renamed from Documentation/media/uapi/mediactl/media-func-open.rst)12
-rw-r--r--Documentation/userspace-api/media/mediactl/media-funcs.rst (renamed from Documentation/media/uapi/mediactl/media-funcs.rst)2
-rw-r--r--Documentation/userspace-api/media/mediactl/media-header.rst11
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-device-info.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-device-info.rst)17
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-enum-entities.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst)39
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-enum-links.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-enum-links.rst)26
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-g-topology.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-g-topology.rst)28
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-request-alloc.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-request-alloc.rst)13
-rw-r--r--Documentation/userspace-api/media/mediactl/media-ioc-setup-link.rst (renamed from Documentation/media/uapi/mediactl/media-ioc-setup-link.rst)12
-rw-r--r--Documentation/userspace-api/media/mediactl/media-request-ioc-queue.rst (renamed from Documentation/media/uapi/mediactl/media-request-ioc-queue.rst)9
-rw-r--r--Documentation/userspace-api/media/mediactl/media-request-ioc-reinit.rst (renamed from Documentation/media/uapi/mediactl/media-request-ioc-reinit.rst)10
-rw-r--r--Documentation/userspace-api/media/mediactl/media-types.rst (renamed from Documentation/media/uapi/mediactl/media-types.rst)43
-rw-r--r--Documentation/userspace-api/media/mediactl/media.h.rst.exceptions (renamed from Documentation/media/media.h.rst.exceptions)2
-rw-r--r--Documentation/userspace-api/media/mediactl/request-api.rst (renamed from Documentation/media/uapi/mediactl/request-api.rst)17
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-close.rst (renamed from Documentation/media/uapi/mediactl/request-func-close.rst)10
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-ioctl.rst (renamed from Documentation/media/uapi/mediactl/request-func-ioctl.rst)10
-rw-r--r--Documentation/userspace-api/media/mediactl/request-func-poll.rst (renamed from Documentation/media/uapi/mediactl/request-func-poll.rst)14
-rw-r--r--Documentation/userspace-api/media/rc/keytable.c.rst (renamed from Documentation/media/uapi/rc/keytable.c.rst)2
-rw-r--r--Documentation/userspace-api/media/rc/lirc-dev-intro.rst176
-rw-r--r--Documentation/userspace-api/media/rc/lirc-dev.rst14
-rw-r--r--Documentation/userspace-api/media/rc/lirc-func.rst (renamed from Documentation/media/uapi/rc/lirc-func.rst)3
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-features.rst (renamed from Documentation/media/uapi/rc/lirc-get-features.rst)29
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-rec-mode.rst (renamed from Documentation/media/uapi/rc/lirc-get-rec-mode.rst)14
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-rec-resolution.rst (renamed from Documentation/media/uapi/rc/lirc-get-rec-resolution.rst)10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-send-mode.rst (renamed from Documentation/media/uapi/rc/lirc-get-send-mode.rst)16
-rw-r--r--Documentation/userspace-api/media/rc/lirc-get-timeout.rst (renamed from Documentation/media/uapi/rc/lirc-get-timeout.rst)15
-rw-r--r--Documentation/userspace-api/media/rc/lirc-header.rst12
-rw-r--r--Documentation/userspace-api/media/rc/lirc-read.rst (renamed from Documentation/media/uapi/rc/lirc-read.rst)15
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-measure-carrier-mode.rst (renamed from Documentation/media/uapi/rc/lirc-set-measure-carrier-mode.rst)10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-carrier-range.rst (renamed from Documentation/media/uapi/rc/lirc-set-rec-carrier-range.rst)8
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-carrier.rst (renamed from Documentation/media/uapi/rc/lirc-set-rec-carrier.rst)10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-rec-timeout.rst (renamed from Documentation/media/uapi/rc/lirc-set-rec-timeout.rst)15
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-send-carrier.rst (renamed from Documentation/media/uapi/rc/lirc-set-send-carrier.rst)10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst (renamed from Documentation/media/uapi/rc/lirc-set-send-duty-cycle.rst)12
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-transmitter-mask.rst (renamed from Documentation/media/uapi/rc/lirc-set-transmitter-mask.rst)10
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-wideband-receiver.rst (renamed from Documentation/media/uapi/rc/lirc-set-wideband-receiver.rst)12
-rw-r--r--Documentation/userspace-api/media/rc/lirc-write.rst (renamed from Documentation/media/uapi/rc/lirc-write.rst)12
-rw-r--r--Documentation/userspace-api/media/rc/lirc.h.rst.exceptions (renamed from Documentation/media/lirc.h.rst.exceptions)15
-rw-r--r--Documentation/userspace-api/media/rc/rc-intro.rst (renamed from Documentation/media/uapi/rc/rc-intro.rst)2
-rw-r--r--Documentation/userspace-api/media/rc/rc-protos.rst454
-rw-r--r--Documentation/userspace-api/media/rc/rc-sysfs-nodes.rst (renamed from Documentation/media/uapi/rc/rc-sysfs-nodes.rst)4
-rw-r--r--Documentation/userspace-api/media/rc/rc-table-change.rst (renamed from Documentation/media/uapi/rc/rc-table-change.rst)2
-rw-r--r--Documentation/userspace-api/media/rc/rc-tables.rst (renamed from Documentation/media/uapi/rc/rc-tables.rst)44
-rw-r--r--Documentation/userspace-api/media/rc/remote_controllers.rst (renamed from Documentation/media/uapi/rc/remote_controllers.rst)11
-rw-r--r--Documentation/userspace-api/media/typical_media_device.svg (renamed from Documentation/media/typical_media_device.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/app-pri.rst (renamed from Documentation/media/uapi/v4l/app-pri.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/audio.rst97
-rw-r--r--Documentation/userspace-api/media/v4l/bayer.svg (renamed from Documentation/media/uapi/v4l/bayer.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/biblio.rst (renamed from Documentation/media/uapi/v4l/biblio.rst)80
-rw-r--r--Documentation/userspace-api/media/v4l/buffer.rst801
-rw-r--r--Documentation/userspace-api/media/v4l/capture-example.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/capture.c.rst (renamed from Documentation/media/uapi/v4l/capture.c.rst)60
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces-defs.rst (renamed from Documentation/media/uapi/v4l/colorspaces-defs.rst)15
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces-details.rst (renamed from Documentation/media/uapi/v4l/colorspaces-details.rst)47
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces.rst (renamed from Documentation/media/uapi/v4l/colorspaces.rst)6
-rw-r--r--Documentation/userspace-api/media/v4l/common-defs.rst (renamed from Documentation/media/uapi/v4l/common-defs.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/common.rst60
-rw-r--r--Documentation/userspace-api/media/v4l/compat.rst (renamed from Documentation/media/uapi/v4l/compat.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/constraints.svg (renamed from Documentation/media/uapi/v4l/constraints.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/control.rst (renamed from Documentation/media/uapi/v4l/control.rst)42
-rw-r--r--Documentation/userspace-api/media/v4l/crop.rst (renamed from Documentation/media/uapi/v4l/crop.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/crop.svg (renamed from Documentation/media/uapi/v4l/crop.svg)3
-rw-r--r--Documentation/userspace-api/media/v4l/depth-formats.rst17
-rw-r--r--Documentation/userspace-api/media/v4l/dev-capture.rst (renamed from Documentation/media/uapi/v4l/dev-capture.rst)11
-rw-r--r--Documentation/userspace-api/media/v4l/dev-decoder.rst1127
-rw-r--r--Documentation/userspace-api/media/v4l/dev-encoder.rst729
-rw-r--r--Documentation/userspace-api/media/v4l/dev-event.rst (renamed from Documentation/media/uapi/v4l/dev-event.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/dev-mem2mem.rst43
-rw-r--r--Documentation/userspace-api/media/v4l/dev-meta.rst88
-rw-r--r--Documentation/userspace-api/media/v4l/dev-osd.rst (renamed from Documentation/media/uapi/v4l/dev-osd.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/dev-output.rst (renamed from Documentation/media/uapi/v4l/dev-output.rst)9
-rw-r--r--Documentation/userspace-api/media/v4l/dev-overlay.rst (renamed from Documentation/media/uapi/v4l/dev-overlay.rst)21
-rw-r--r--Documentation/userspace-api/media/v4l/dev-radio.rst (renamed from Documentation/media/uapi/v4l/dev-radio.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/dev-raw-vbi.rst (renamed from Documentation/media/uapi/v4l/dev-raw-vbi.rst)27
-rw-r--r--Documentation/userspace-api/media/v4l/dev-rds.rst (renamed from Documentation/media/uapi/v4l/dev-rds.rst)18
-rw-r--r--Documentation/userspace-api/media/v4l/dev-sdr.rst (renamed from Documentation/media/uapi/v4l/dev-sdr.rst)8
-rw-r--r--Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst (renamed from Documentation/media/uapi/v4l/dev-sliced-vbi.rst)116
-rw-r--r--Documentation/userspace-api/media/v4l/dev-stateless-decoder.rst424
-rw-r--r--Documentation/userspace-api/media/v4l/dev-subdev.rst693
-rw-r--r--Documentation/userspace-api/media/v4l/dev-touch.rst (renamed from Documentation/media/uapi/v4l/dev-touch.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/devices.rst26
-rw-r--r--Documentation/userspace-api/media/v4l/diff-v4l.rst (renamed from Documentation/media/uapi/v4l/diff-v4l.rst)49
-rw-r--r--Documentation/userspace-api/media/v4l/dmabuf.rst (renamed from Documentation/media/uapi/v4l/dmabuf.rst)10
-rw-r--r--Documentation/userspace-api/media/v4l/dv-timings.rst59
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-camera.rst674
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst4168
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst2674
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-colorimetry.rst93
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-detect.rst64
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-dv.rst159
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-flash.rst188
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-fm-rx.rst83
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-fm-tx.rst174
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-image-process.rst57
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-image-source.rst94
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-jpeg.rst105
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-rf-tuner.rst89
-rw-r--r--Documentation/userspace-api/media/v4l/extended-controls.rst173
-rw-r--r--Documentation/userspace-api/media/v4l/field-order.rst (renamed from Documentation/media/uapi/v4l/field-order.rst)39
-rw-r--r--Documentation/userspace-api/media/v4l/fieldseq_bt.svg (renamed from Documentation/media/uapi/v4l/fieldseq_bt.svg)5
-rw-r--r--Documentation/userspace-api/media/v4l/fieldseq_tb.svg (renamed from Documentation/media/uapi/v4l/fieldseq_tb.svg)5
-rw-r--r--Documentation/userspace-api/media/v4l/format.rst (renamed from Documentation/media/uapi/v4l/format.rst)11
-rw-r--r--Documentation/userspace-api/media/v4l/fourcc.rst (renamed from Documentation/media/v4l-drivers/fourcc.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/func-close.rst (renamed from Documentation/media/uapi/v4l/func-close.rst)10
-rw-r--r--Documentation/userspace-api/media/v4l/func-ioctl.rst (renamed from Documentation/media/uapi/v4l/func-ioctl.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/func-mmap.rst (renamed from Documentation/media/uapi/v4l/func-mmap.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/func-munmap.rst54
-rw-r--r--Documentation/userspace-api/media/v4l/func-open.rst (renamed from Documentation/media/uapi/v4l/func-open.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/func-poll.rst113
-rw-r--r--Documentation/userspace-api/media/v4l/func-read.rst130
-rw-r--r--Documentation/userspace-api/media/v4l/func-select.rst116
-rw-r--r--Documentation/userspace-api/media/v4l/func-write.rst (renamed from Documentation/media/uapi/v4l/func-write.rst)15
-rw-r--r--Documentation/userspace-api/media/v4l/hist-v4l2.rst (renamed from Documentation/media/uapi/v4l/hist-v4l2.rst)321
-rw-r--r--Documentation/userspace-api/media/v4l/hsv-formats.rst (renamed from Documentation/media/uapi/v4l/hsv-formats.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/io.rst (renamed from Documentation/media/uapi/v4l/io.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/libv4l-introduction.rst (renamed from Documentation/media/uapi/v4l/libv4l-introduction.rst)36
-rw-r--r--Documentation/userspace-api/media/v4l/libv4l.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/meta-formats.rst26
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-c3-isp.rst86
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-d4xx.rst252
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-generic.rst340
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-intel-ipu3.rst81
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-pisp-be.rst56
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst39
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-rkisp1.rst89
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-uvc-msxu-1-5.rst23
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-uvc.rst53
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vivid.rst36
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vsp1-hgo.rst168
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vsp1-hgt.rst129
-rw-r--r--Documentation/userspace-api/media/v4l/mmap.rst (renamed from Documentation/media/uapi/v4l/mmap.rst)46
-rw-r--r--Documentation/userspace-api/media/v4l/mt2110t.svg315
-rw-r--r--Documentation/userspace-api/media/v4l/nv12mt.svg (renamed from Documentation/media/uapi/v4l/nv12mt.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/nv12mt_example.svg (renamed from Documentation/media/uapi/v4l/nv12mt_example.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/open.rst238
-rw-r--r--Documentation/userspace-api/media/v4l/pipeline.dot (renamed from Documentation/media/uapi/v4l/pipeline.dot)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-bayer.rst34
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-cnf4.rst31
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-compressed.rst279
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-indexed.rst (renamed from Documentation/media/uapi/v4l/pixfmt-indexed.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-intro.rst (renamed from Documentation/media/uapi/v4l/pixfmt-intro.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-inzi.rst (renamed from Documentation/media/uapi/v4l/pixfmt-inzi.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-m420.rst71
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-packed-hsv.rst (renamed from Documentation/media/uapi/v4l/pixfmt-packed-hsv.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst482
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-rawnn-cru.rst143
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-reserved.rst (renamed from Documentation/media/uapi/v4l/pixfmt-reserved.rst)101
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-rgb.rst1278
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cs08.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-cs08.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cs14le.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-cs14le.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cu08.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-cu08.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cu16le.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-cu16le.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu16be.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-pcu16be.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu18be.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-pcu18be.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu20be.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-pcu20be.rst)3
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-ru12le.rst (renamed from Documentation/media/uapi/v4l/pixfmt-sdr-ru12le.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10-ipu3.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb10-ipu3.rst)14
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb10.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10alaw8.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb10alaw8.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10dpcm8.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb10dpcm8.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10p.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb10p.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb12.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb12.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb12p.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb12p.rst)13
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb14.rst77
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb14p.rst147
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb16.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb16.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb8-pisp-comp.rst74
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb8.rst (renamed from Documentation/media/uapi/v4l/pixfmt-srggb8.rst)5
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-td08.rst (renamed from Documentation/media/uapi/v4l/pixfmt-tch-td08.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-td16.rst (renamed from Documentation/media/uapi/v4l/pixfmt-tch-td16.rst)36
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-tu08.rst (renamed from Documentation/media/uapi/v4l/pixfmt-tch-tu08.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-tu16.rst (renamed from Documentation/media/uapi/v4l/pixfmt-tch-tu16.rst)36
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-uv8.rst (renamed from Documentation/media/uapi/v4l/pixfmt-uv8.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst123
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst240
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y12i.rst (renamed from Documentation/media/uapi/v4l/pixfmt-y12i.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y16i.rst73
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y8i.rst (renamed from Documentation/media/uapi/v4l/pixfmt-y8i.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst215
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst1455
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-z16.rst (renamed from Documentation/media/uapi/v4l/pixfmt-z16.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt.rst (renamed from Documentation/media/uapi/v4l/pixfmt.rst)3
-rw-r--r--Documentation/userspace-api/media/v4l/planar-apis.rst (renamed from Documentation/media/uapi/v4l/planar-apis.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/querycap.rst (renamed from Documentation/media/uapi/v4l/querycap.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/rw.rst47
-rw-r--r--Documentation/userspace-api/media/v4l/sdr-formats.rst (renamed from Documentation/media/uapi/v4l/sdr-formats.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-configuration.rst (renamed from Documentation/media/uapi/v4l/selection-api-configuration.rst)4
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-examples.rst (renamed from Documentation/media/uapi/v4l/selection-api-examples.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-intro.rst (renamed from Documentation/media/uapi/v4l/selection-api-intro.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-targets.rst (renamed from Documentation/media/uapi/v4l/selection-api-targets.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-vs-crop-api.rst (renamed from Documentation/media/uapi/v4l/selection-api-vs-crop-api.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api.rst (renamed from Documentation/media/uapi/v4l/selection-api.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/selection.svg (renamed from Documentation/media/uapi/v4l/selection.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/selections-common.rst (renamed from Documentation/media/uapi/v4l/selections-common.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/standard.rst (renamed from Documentation/media/uapi/v4l/standard.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/streaming-par.rst (renamed from Documentation/media/uapi/v4l/streaming-par.rst)7
-rw-r--r--Documentation/userspace-api/media/v4l/subdev-formats.rst (renamed from Documentation/media/uapi/v4l/subdev-formats.rst)1210
-rw-r--r--Documentation/userspace-api/media/v4l/subdev-image-processing-crop.svg (renamed from Documentation/media/uapi/v4l/subdev-image-processing-crop.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/subdev-image-processing-full.svg (renamed from Documentation/media/uapi/v4l/subdev-image-processing-full.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/subdev-image-processing-scaling-multi-source.svg (renamed from Documentation/media/uapi/v4l/subdev-image-processing-scaling-multi-source.svg)1
-rw-r--r--Documentation/userspace-api/media/v4l/tch-formats.rst (renamed from Documentation/media/uapi/v4l/tch-formats.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/tuner.rst (renamed from Documentation/media/uapi/v4l/tuner.rst)6
-rw-r--r--Documentation/userspace-api/media/v4l/user-func.rst (renamed from Documentation/media/uapi/v4l/user-func.rst)6
-rw-r--r--Documentation/userspace-api/media/v4l/userp.rst (renamed from Documentation/media/uapi/v4l/userp.rst)21
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2-selection-flags.rst (renamed from Documentation/media/uapi/v4l/v4l2-selection-flags.rst)16
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2-selection-targets.rst (renamed from Documentation/media/uapi/v4l/v4l2-selection-targets.rst)25
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2.rst (renamed from Documentation/media/uapi/v4l/v4l2.rst)25
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2grab-example.rst (renamed from Documentation/media/uapi/v4l/v4l2grab-example.rst)2
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2grab.c.rst (renamed from Documentation/media/uapi/v4l/v4l2grab.c.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vbi_525.svg (renamed from Documentation/media/uapi/v4l/vbi_525.svg)9
-rw-r--r--Documentation/userspace-api/media/v4l/vbi_625.svg (renamed from Documentation/media/uapi/v4l/vbi_625.svg)9
-rw-r--r--Documentation/userspace-api/media/v4l/vbi_hsync.svg (renamed from Documentation/media/uapi/v4l/vbi_hsync.svg)9
-rw-r--r--Documentation/userspace-api/media/v4l/video.rst68
-rw-r--r--Documentation/userspace-api/media/v4l/videodev.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/videodev2.h.rst.exceptions (renamed from Documentation/media/videodev2.h.rst.exceptions)66
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst (renamed from Documentation/media/uapi/v4l/vidioc-create-bufs.rst)29
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-cropcap.rst (renamed from Documentation/media/uapi/v4l/vidioc-cropcap.rst)19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dbg-g-chip-info.rst (renamed from Documentation/media/uapi/v4l/vidioc-dbg-g-chip-info.rst)33
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dbg-g-register.rst (renamed from Documentation/media/uapi/v4l/vidioc-dbg-g-register.rst)36
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-decoder-cmd.rst218
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dqevent.rst (renamed from Documentation/media/uapi/v4l/vidioc-dqevent.rst)115
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-dv-timings-cap.rst (renamed from Documentation/media/uapi/v4l/vidioc-dv-timings-cap.rst)39
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-encoder-cmd.rst169
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-dv-timings.rst (renamed from Documentation/media/uapi/v4l/vidioc-enum-dv-timings.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst266
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-frameintervals.rst (renamed from Documentation/media/uapi/v4l/vidioc-enum-frameintervals.rst)43
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-framesizes.rst (renamed from Documentation/media/uapi/v4l/vidioc-enum-framesizes.rst)44
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-freq-bands.rst (renamed from Documentation/media/uapi/v4l/vidioc-enum-freq-bands.rst)18
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumaudio.rst (renamed from Documentation/media/uapi/v4l/vidioc-enumaudio.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumaudioout.rst (renamed from Documentation/media/uapi/v4l/vidioc-enumaudioout.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enuminput.rst (renamed from Documentation/media/uapi/v4l/vidioc-enuminput.rst)24
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumoutput.rst (renamed from Documentation/media/uapi/v4l/vidioc-enumoutput.rst)21
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enumstd.rst (renamed from Documentation/media/uapi/v4l/vidioc-enumstd.rst)31
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-expbuf.rst (renamed from Documentation/media/uapi/v4l/vidioc-expbuf.rst)18
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-audio.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-audio.rst)26
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-audioout.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-audioout.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-crop.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-crop.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-ctrl.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-ctrl.rst)23
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-dv-timings.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-dv-timings.rst)70
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-edid.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-edid.rst)29
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-enc-index.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-enc-index.rst)21
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst553
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-fbuf.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-fbuf.rst)80
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-fmt.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-fmt.rst)55
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-frequency.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-frequency.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-input.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-input.rst)17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-jpegcomp.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-jpegcomp.rst)23
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-modulator.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-modulator.rst)24
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-output.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-output.rst)19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-parm.rst269
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-priority.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-priority.rst)20
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-selection.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-selection.rst)19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-sliced-vbi-cap.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-sliced-vbi-cap.rst)41
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-std.rst82
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-tuner.rst (renamed from Documentation/media/uapi/v4l/vidioc-g-tuner.rst)31
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-log-status.rst (renamed from Documentation/media/uapi/v4l/vidioc-log-status.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-overlay.rst (renamed from Documentation/media/uapi/v4l/vidioc-overlay.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-prepare-buf.rst56
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-qbuf.rst (renamed from Documentation/media/uapi/v4l/vidioc-qbuf.rst)43
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-query-dv-timings.rst (renamed from Documentation/media/uapi/v4l/vidioc-query-dv-timings.rst)17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querybuf.rst (renamed from Documentation/media/uapi/v4l/vidioc-querybuf.rst)12
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querycap.rst (renamed from Documentation/media/uapi/v4l/vidioc-querycap.rst)51
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst (renamed from Documentation/media/uapi/v4l/vidioc-queryctrl.rst)220
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querystd.rst (renamed from Documentation/media/uapi/v4l/vidioc-querystd.rst)17
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-remove-bufs.rst86
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst207
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-s-hw-freq-seek.rst (renamed from Documentation/media/uapi/v4l/vidioc-s-hw-freq-seek.rst)15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-streamon.rst (renamed from Documentation/media/uapi/v4l/vidioc-streamon.rst)19
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-interval.rst (renamed from Documentation/media/uapi/v4l/vidioc-subdev-enum-frame-interval.rst)28
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-frame-size.rst131
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst163
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-client-cap.rst103
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst127
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-fmt.rst (renamed from Documentation/media/uapi/v4l/vidioc-subdev-g-fmt.rst)41
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-frame-interval.rst126
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-routing.rst164
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-selection.rst125
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-querycap.rst104
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subscribe-event.rst (renamed from Documentation/media/uapi/v4l/vidioc-subscribe-event.rst)23
-rw-r--r--Documentation/userspace-api/media/v4l/yuv-formats.rst274
-rw-r--r--Documentation/userspace-api/mfd_noexec.rst86
-rw-r--r--Documentation/userspace-api/mseal.rst209
-rw-r--r--Documentation/userspace-api/netlink/c-code-gen.rst109
-rw-r--r--Documentation/userspace-api/netlink/genetlink-legacy.rst292
-rw-r--r--Documentation/userspace-api/netlink/index.rst21
-rw-r--r--Documentation/userspace-api/netlink/intro-specs.rst159
-rw-r--r--Documentation/userspace-api/netlink/intro.rst687
-rw-r--r--Documentation/userspace-api/netlink/netlink-raw.rst194
-rw-r--r--Documentation/userspace-api/netlink/specs.rst467
-rw-r--r--Documentation/userspace-api/ntsync.rst385
-rw-r--r--Documentation/userspace-api/perf_ring_buffer.rst830
-rw-r--r--Documentation/userspace-api/seccomp_filter.rst108
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst46
-rw-r--r--Documentation/userspace-api/sysfs-platform_profile.rst80
-rw-r--r--Documentation/userspace-api/tee.rst39
-rw-r--r--Documentation/userspace-api/vduse.rst233
-rw-r--r--Documentation/vfio-mediated-device.txt414
-rw-r--r--Documentation/vfio.txt520
-rw-r--r--Documentation/video-output.txt34
-rw-r--r--Documentation/virt/acrn/cpuid.rst46
-rw-r--r--Documentation/virt/acrn/index.rst12
-rw-r--r--Documentation/virt/acrn/introduction.rst43
-rw-r--r--Documentation/virt/acrn/io-request.rst97
-rw-r--r--Documentation/virt/coco/sev-guest.rst243
-rw-r--r--Documentation/virt/coco/tdx-guest.rst52
-rw-r--r--Documentation/virt/guest-halt-polling.rst84
-rw-r--r--Documentation/virt/hyperv/clocks.rst82
-rw-r--r--Documentation/virt/hyperv/coco.rst260
-rw-r--r--Documentation/virt/hyperv/hibernation.rst336
-rw-r--r--Documentation/virt/hyperv/index.rst15
-rw-r--r--Documentation/virt/hyperv/overview.rst207
-rw-r--r--Documentation/virt/hyperv/vmbus.rst346
-rw-r--r--Documentation/virt/hyperv/vpci.rst316
-rw-r--r--Documentation/virt/index.rst25
-rw-r--r--Documentation/virt/kvm/api.rst9277
-rw-r--r--Documentation/virt/kvm/arm/fw-pseudo-registers.rst151
-rw-r--r--Documentation/virt/kvm/arm/hyp-abi.rst78
-rw-r--r--Documentation/virt/kvm/arm/hypercalls.rst203
-rw-r--r--Documentation/virt/kvm/arm/index.rst15
-rw-r--r--Documentation/virt/kvm/arm/ptp_kvm.rst35
-rw-r--r--Documentation/virt/kvm/arm/pvtime.rst82
-rw-r--r--Documentation/virt/kvm/arm/vcpu-features.rst48
-rw-r--r--Documentation/virt/kvm/devices/README (renamed from Documentation/virtual/kvm/devices/README)0
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-its.rst215
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v3.rst375
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic.rst156
-rw-r--r--Documentation/virt/kvm/devices/index.rst19
-rw-r--r--Documentation/virt/kvm/devices/mpic.rst58
-rw-r--r--Documentation/virt/kvm/devices/s390_flic.rst170
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst293
-rw-r--r--Documentation/virt/kvm/devices/vfio.rst61
-rw-r--r--Documentation/virt/kvm/devices/vm.rst402
-rw-r--r--Documentation/virt/kvm/devices/xics.rst92
-rw-r--r--Documentation/virt/kvm/devices/xive.rst247
-rw-r--r--Documentation/virt/kvm/halt-polling.rst153
-rw-r--r--Documentation/virt/kvm/index.rst22
-rw-r--r--Documentation/virt/kvm/locking.rst318
-rw-r--r--Documentation/virt/kvm/loongarch/hypercalls.rst89
-rw-r--r--Documentation/virt/kvm/loongarch/index.rst10
-rw-r--r--Documentation/virt/kvm/ppc-pv.rst222
-rw-r--r--Documentation/virt/kvm/review-checklist.rst122
-rw-r--r--Documentation/virt/kvm/s390/index.rst13
-rw-r--r--Documentation/virt/kvm/s390/s390-diag.rst140
-rw-r--r--Documentation/virt/kvm/s390/s390-pv-boot.rst84
-rw-r--r--Documentation/virt/kvm/s390/s390-pv-dump.rst64
-rw-r--r--Documentation/virt/kvm/s390/s390-pv.rst116
-rw-r--r--Documentation/virt/kvm/vcpu-requests.rst (renamed from Documentation/virtual/kvm/vcpu-requests.rst)59
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst613
-rw-r--r--Documentation/virt/kvm/x86/cpuid.rst124
-rw-r--r--Documentation/virt/kvm/x86/errata.rst80
-rw-r--r--Documentation/virt/kvm/x86/hypercalls.rst192
-rw-r--r--Documentation/virt/kvm/x86/index.rst19
-rw-r--r--Documentation/virt/kvm/x86/intel-tdx.rst268
-rw-r--r--Documentation/virt/kvm/x86/mmu.rst509
-rw-r--r--Documentation/virt/kvm/x86/msr.rst390
-rw-r--r--Documentation/virt/kvm/x86/nested-vmx.rst244
-rw-r--r--Documentation/virt/kvm/x86/running-nested-guests.rst278
-rw-r--r--Documentation/virt/kvm/x86/timekeeping.rst645
-rw-r--r--Documentation/virt/ne_overview.rst100
-rw-r--r--Documentation/virt/paravirt_ops.rst35
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst1240
-rw-r--r--Documentation/virtual/kvm/amd-memory-encryption.rst247
-rw-r--r--Documentation/virtual/kvm/api.txt4897
-rw-r--r--Documentation/virtual/kvm/arm/hyp-abi.txt53
-rw-r--r--Documentation/virtual/kvm/arm/psci.txt30
-rw-r--r--Documentation/virtual/kvm/cpuid.txt83
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-its.txt181
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-v3.txt251
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt127
-rw-r--r--Documentation/virtual/kvm/devices/mpic.txt53
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt163
-rw-r--r--Documentation/virtual/kvm/devices/vcpu.txt62
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt36
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt269
-rw-r--r--Documentation/virtual/kvm/devices/xics.txt66
-rw-r--r--Documentation/virtual/kvm/halt-polling.txt127
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt143
-rw-r--r--Documentation/virtual/kvm/locking.txt217
-rw-r--r--Documentation/virtual/kvm/mmu.txt469
-rw-r--r--Documentation/virtual/kvm/msr.txt275
-rw-r--r--Documentation/virtual/kvm/nested-vmx.txt240
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt212
-rw-r--r--Documentation/virtual/kvm/review-checklist.txt38
-rw-r--r--Documentation/virtual/kvm/s390-diag.txt82
-rw-r--r--Documentation/virtual/kvm/timekeeping.txt612
-rw-r--r--Documentation/virtual/paravirt_ops.txt32
-rw-r--r--Documentation/virtual/uml/UserModeLinux-HOWTO.txt4589
-rw-r--r--Documentation/vm/.gitignore2
-rw-r--r--Documentation/vm/active_mm.rst91
-rw-r--r--Documentation/vm/balance.rst102
-rw-r--r--Documentation/vm/cleancache.rst296
-rw-r--r--Documentation/vm/conf.py10
-rw-r--r--Documentation/vm/frontswap.rst293
-rw-r--r--Documentation/vm/highmem.rst147
-rw-r--r--Documentation/vm/hmm.rst386
-rw-r--r--Documentation/vm/hugetlbfs_reserv.rst595
-rw-r--r--Documentation/vm/hwpoison.rst186
-rw-r--r--Documentation/vm/index.rst50
-rw-r--r--Documentation/vm/ksm.rst87
-rw-r--r--Documentation/vm/mmu_notifier.rst99
-rw-r--r--Documentation/vm/numa.rst150
-rw-r--r--Documentation/vm/overcommit-accounting.rst87
-rw-r--r--Documentation/vm/page_frags.rst45
-rw-r--r--Documentation/vm/page_migration.rst257
-rw-r--r--Documentation/vm/page_owner.rst90
-rw-r--r--Documentation/vm/remap_file_pages.rst33
-rw-r--r--Documentation/vm/slub.rst367
-rw-r--r--Documentation/vm/split_page_table_lock.rst100
-rw-r--r--Documentation/vm/transhuge.rst197
-rw-r--r--Documentation/vm/unevictable-lru.rst614
-rw-r--r--Documentation/vm/z3fold.rst30
-rw-r--r--Documentation/vm/zsmalloc.rst82
-rw-r--r--Documentation/vm/zswap.rst135
-rw-r--r--Documentation/w1/index.rst21
-rw-r--r--Documentation/w1/masters/ds248231
-rw-r--r--Documentation/w1/masters/ds2482.rst39
-rw-r--r--Documentation/w1/masters/ds249068
-rw-r--r--Documentation/w1/masters/ds2490.rst72
-rw-r--r--Documentation/w1/masters/index.rst15
-rw-r--r--Documentation/w1/masters/mxc-w112
-rw-r--r--Documentation/w1/masters/mxc-w1.rst17
-rw-r--r--Documentation/w1/masters/omap-hdq52
-rw-r--r--Documentation/w1/masters/omap-hdq.rst54
-rw-r--r--Documentation/w1/masters/w1-gpio44
-rw-r--r--Documentation/w1/masters/w1-gpio.rst47
-rw-r--r--Documentation/w1/masters/w1-uart.rst54
-rw-r--r--Documentation/w1/slaves/index.rst16
-rw-r--r--Documentation/w1/slaves/w1_ds240625
-rw-r--r--Documentation/w1/slaves/w1_ds2406.rst27
-rw-r--r--Documentation/w1/slaves/w1_ds241350
-rw-r--r--Documentation/w1/slaves/w1_ds2413.rst59
-rw-r--r--Documentation/w1/slaves/w1_ds242347
-rw-r--r--Documentation/w1/slaves/w1_ds2423.rst54
-rw-r--r--Documentation/w1/slaves/w1_ds243863
-rw-r--r--Documentation/w1/slaves/w1_ds2438.rst86
-rw-r--r--Documentation/w1/slaves/w1_ds28e0436
-rw-r--r--Documentation/w1/slaves/w1_ds28e04.rst41
-rw-r--r--Documentation/w1/slaves/w1_ds28e1768
-rw-r--r--Documentation/w1/slaves/w1_ds28e17.rst72
-rw-r--r--Documentation/w1/slaves/w1_therm67
-rw-r--r--Documentation/w1/slaves/w1_therm.rst144
-rw-r--r--Documentation/w1/w1-generic.rst133
-rw-r--r--Documentation/w1/w1-netlink.rst202
-rw-r--r--Documentation/w1/w1.generic121
-rw-r--r--Documentation/w1/w1.netlink189
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.rst218
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.txt218
-rw-r--r--Documentation/watchdog/hpwdt.rst77
-rw-r--r--Documentation/watchdog/hpwdt.txt66
-rw-r--r--Documentation/watchdog/index.rst25
-rw-r--r--Documentation/watchdog/mlx-wdt.rst66
-rw-r--r--Documentation/watchdog/pcwd-watchdog.rst71
-rw-r--r--Documentation/watchdog/pcwd-watchdog.txt66
-rw-r--r--Documentation/watchdog/watchdog-api.rst271
-rw-r--r--Documentation/watchdog/watchdog-api.txt237
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.rst350
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.txt309
-rw-r--r--Documentation/watchdog/watchdog-parameters.rst730
-rw-r--r--Documentation/watchdog/watchdog-parameters.txt410
-rw-r--r--Documentation/watchdog/watchdog-pm.rst22
-rw-r--r--Documentation/watchdog/wdt.rst63
-rw-r--r--Documentation/watchdog/wdt.txt50
-rw-r--r--Documentation/wimax/README.i2400m260
-rw-r--r--Documentation/wimax/README.wimax81
-rw-r--r--Documentation/wmi/acpi-interface.rst106
-rw-r--r--Documentation/wmi/devices/alienware-wmi.rst322
-rw-r--r--Documentation/wmi/devices/dell-wmi-ddv.rst319
-rw-r--r--Documentation/wmi/devices/index.rst22
-rw-r--r--Documentation/wmi/devices/lenovo-wmi-gamezone.rst203
-rw-r--r--Documentation/wmi/devices/lenovo-wmi-other.rst108
-rw-r--r--Documentation/wmi/devices/msi-wmi-platform.rst198
-rw-r--r--Documentation/wmi/devices/wmi-bmof.rst25
-rw-r--r--Documentation/wmi/driver-development-guide.rst191
-rw-r--r--Documentation/wmi/index.rst20
-rw-r--r--Documentation/x86/amd-memory-encryption.txt90
-rw-r--r--Documentation/x86/boot.txt1160
-rw-r--r--Documentation/x86/earlyprintk.txt141
-rw-r--r--Documentation/x86/entry_64.txt104
-rw-r--r--Documentation/x86/exception-tables.txt327
-rw-r--r--Documentation/x86/i386/IO-APIC.txt119
-rw-r--r--Documentation/x86/intel_mpx.txt244
-rw-r--r--Documentation/x86/intel_rdt_ui.txt1118
-rw-r--r--Documentation/x86/kernel-stacks141
-rw-r--r--Documentation/x86/microcode.txt136
-rw-r--r--Documentation/x86/mtrr.txt329
-rw-r--r--Documentation/x86/orc-unwinder.txt179
-rw-r--r--Documentation/x86/pat.txt230
-rw-r--r--Documentation/x86/protection-keys.txt90
-rw-r--r--Documentation/x86/pti.txt186
-rw-r--r--Documentation/x86/tlb.txt75
-rw-r--r--Documentation/x86/topology.txt217
-rw-r--r--Documentation/x86/usb-legacy-support.txt44
-rw-r--r--Documentation/x86/x86_64/5level-paging.txt61
-rw-r--r--Documentation/x86/x86_64/boot-options.txt281
-rw-r--r--Documentation/x86/x86_64/cpu-hotplug-spec21
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets67
-rw-r--r--Documentation/x86/x86_64/machinecheck83
-rw-r--r--Documentation/x86/x86_64/mm.txt153
-rw-r--r--Documentation/x86/x86_64/uefi.txt42
-rw-r--r--Documentation/x86/zero-page.txt40
-rw-r--r--Documentation/xilinx/eemi.txt67
-rw-r--r--Documentation/xillybus.txt379
-rw-r--r--Documentation/xtensa/atomctl.txt44
-rw-r--r--Documentation/xtensa/mmu.txt189
-rw-r--r--Documentation/xz.txt127
14163 files changed, 1294802 insertions, 471758 deletions
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index e74fec8693b2..d6dc7c9b8e25 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
output
*.pyc
diff --git a/Documentation/.renames.txt b/Documentation/.renames.txt
new file mode 100644
index 000000000000..c0bd5d3dc8b9
--- /dev/null
+++ b/Documentation/.renames.txt
@@ -0,0 +1,1191 @@
+80211/cfg80211 driver-api/80211/cfg80211
+80211/index driver-api/80211/index
+80211/introduction driver-api/80211/introduction
+80211/mac80211 driver-api/80211/mac80211
+80211/mac80211-advanced driver-api/80211/mac80211-advanced
+EDID/howto admin-guide/edid
+PCI/picebus-howto PCI/pciebus-howto
+RAS/address-translation admin-guide/RAS/address-translation
+RAS/error-decoding admin-guide/RAS/error-decoding
+RAS/ras admin-guide/RAS/error-decoding
+accelerators/ocxl userspace-api/accelerators/ocxl
+admin-guide/gpio/sysfs userspace-api/gpio/sysfs
+admin-guide/l1tf admin-guide/hw-vuln/l1tf
+admin-guide/media/v4l-with-ir admin-guide/media/remote-controller
+admin-guide/ras admin-guide/RAS/main
+admin-guide/security-bugs process/security-bugs
+aoe/aoe admin-guide/aoe/aoe
+aoe/examples admin-guide/aoe/examples
+aoe/index admin-guide/aoe/index
+aoe/todo admin-guide/aoe/todo
+arc/arc arch/arc/arc
+arc/features arch/arc/features
+arc/index arch/arc/index
+arch/x86/resctrl filesystems/resctrl
+arm/arm arch/arm/arm
+arm/booting arch/arm/booting
+arm/cluster-pm-race-avoidance arch/arm/cluster-pm-race-avoidance
+arm/features arch/arm/features
+arm/firmware arch/arm/firmware
+arm/google/chromebook-boot-flow arch/arm/google/chromebook-boot-flow
+arm/index arch/arm/index
+arm/interrupts arch/arm/interrupts
+arm/ixp4xx arch/arm/ixp4xx
+arm/kernel_mode_neon arch/arm/kernel_mode_neon
+arm/kernel_user_helpers arch/arm/kernel_user_helpers
+arm/keystone/knav-qmss arch/arm/keystone/knav-qmss
+arm/keystone/overview arch/arm/keystone/overview
+arm/marvel arch/arm/marvell
+arm/marvell arch/arm/marvell
+arm/mem_alignment arch/arm/mem_alignment
+arm/memory arch/arm/memory
+arm/microchip arch/arm/microchip
+arm/netwinder arch/arm/netwinder
+arm/nwfpe/index arch/arm/nwfpe/index
+arm/nwfpe/netwinder-fpe arch/arm/nwfpe/netwinder-fpe
+arm/nwfpe/notes arch/arm/nwfpe/notes
+arm/nwfpe/nwfpe arch/arm/nwfpe/nwfpe
+arm/nwfpe/todo arch/arm/nwfpe/todo
+arm/omap/dss arch/arm/omap/dss
+arm/omap/index arch/arm/omap/index
+arm/omap/omap arch/arm/omap/omap
+arm/omap/omap_pm arch/arm/omap/omap_pm
+arm/porting arch/arm/porting
+arm/pxa/mfp arch/arm/pxa/mfp
+arm/sa1100/assabet arch/arm/sa1100/assabet
+arm/sa1100/cerf arch/arm/sa1100/cerf
+arm/sa1100/index arch/arm/sa1100/index
+arm/sa1100/lart arch/arm/sa1100/lart
+arm/sa1100/serial_uart arch/arm/sa1100/serial_uart
+arm/samsung/bootloader-interface arch/arm/samsung/bootloader-interface
+arm/samsung/gpio arch/arm/samsung/gpio
+arm/samsung/index arch/arm/samsung/index
+arm/samsung/overview arch/arm/samsung/overview
+arm/setup arch/arm/setup
+arm/spear/overview arch/arm/spear/overview
+arm/sti/overview arch/arm/sti/overview
+arm/sti/stih407-overview arch/arm/sti/stih407-overview
+arm/sti/stih418-overview arch/arm/sti/stih418-overview
+arm/stm32/overview arch/arm/stm32/overview
+arm/stm32/stm32-dma-mdma-chaining arch/arm/stm32/stm32-dma-mdma-chaining
+arm/stm32/stm32f429-overview arch/arm/stm32/stm32f429-overview
+arm/stm32/stm32f746-overview arch/arm/stm32/stm32f746-overview
+arm/stm32/stm32f769-overview arch/arm/stm32/stm32f769-overview
+arm/stm32/stm32h743-overview arch/arm/stm32/stm32h743-overview
+arm/stm32/stm32h750-overview arch/arm/stm32/stm32h750-overview
+arm/stm32/stm32mp13-overview arch/arm/stm32/stm32mp13-overview
+arm/stm32/stm32mp151-overview arch/arm/stm32/stm32mp151-overview
+arm/stm32/stm32mp157-overview arch/arm/stm32/stm32mp157-overview
+arm/sunxi arch/arm/sunxi
+arm/sunxi/clocks arch/arm/sunxi/clocks
+arm/swp_emulation arch/arm/swp_emulation
+arm/tcm arch/arm/tcm
+arm/uefi arch/arm/uefi
+arm/vfp/release-notes arch/arm/vfp/release-notes
+arm/vlocks arch/arm/vlocks
+arm64/acpi_object_usage arch/arm64/acpi_object_usage
+arm64/amu arch/arm64/amu
+arm64/arm-acpi arch/arm64/arm-acpi
+arm64/asymmetric-32bit arch/arm64/asymmetric-32bit
+arm64/booting arch/arm64/booting
+arm64/cpu-feature-registers arch/arm64/cpu-feature-registers
+arm64/elf_hwcaps arch/arm64/elf_hwcaps
+arm64/features arch/arm64/features
+arm64/hugetlbpage arch/arm64/hugetlbpage
+arm64/index arch/arm64/index
+arm64/legacy_instructions arch/arm64/legacy_instructions
+arm64/memory arch/arm64/memory
+arm64/memory-tagging-extension arch/arm64/memory-tagging-extension
+arm64/perf arch/arm64/perf
+arm64/pointer-authentication arch/arm64/pointer-authentication
+arm64/silicon-errata arch/arm64/silicon-errata
+arm64/sme arch/arm64/sme
+arm64/sve arch/arm64/sve
+arm64/tagged-address-abi arch/arm64/tagged-address-abi
+arm64/tagged-pointers arch/arm64/tagged-pointers
+asm-annotations core-api/asm-annotations
+auxdisplay/lcd-panel-cgram admin-guide/lcd-panel-cgram
+backlight/lp855x-driver driver-api/backlight/lp855x-driver
+blockdev/drbd/data-structure-v9 admin-guide/blockdev/drbd/data-structure-v9
+blockdev/drbd/figures admin-guide/blockdev/drbd/figures
+blockdev/drbd/index admin-guide/blockdev/drbd/index
+blockdev/floppy admin-guide/blockdev/floppy
+blockdev/index admin-guide/blockdev/index
+blockdev/nbd admin-guide/blockdev/nbd
+blockdev/paride admin-guide/blockdev/paride
+blockdev/ramdisk admin-guide/blockdev/ramdisk
+blockdev/zram admin-guide/blockdev/zram
+bpf/README bpf/index
+bpf/bpf_lsm bpf/prog_lsm
+bpf/instruction-set bpf/standardization/instruction-set
+bpf/libbpf/libbpf bpf/libbpf/index
+bpf/standardization/linux-notes bpf/linux-notes
+bus-devices/ti-gpmc driver-api/memory-devices/ti-gpmc
+cgroup-v1/blkio-controller admin-guide/cgroup-v1/blkio-controller
+cgroup-v1/cgroups admin-guide/cgroup-v1/cgroups
+cgroup-v1/cpuacct admin-guide/cgroup-v1/cpuacct
+cgroup-v1/cpusets admin-guide/cgroup-v1/cpusets
+cgroup-v1/devices admin-guide/cgroup-v1/devices
+cgroup-v1/freezer-subsystem admin-guide/cgroup-v1/freezer-subsystem
+cgroup-v1/hugetlb admin-guide/cgroup-v1/hugetlb
+cgroup-v1/index admin-guide/cgroup-v1/index
+cgroup-v1/memcg_test admin-guide/cgroup-v1/memcg_test
+cgroup-v1/memory admin-guide/cgroup-v1/memory
+cgroup-v1/net_cls admin-guide/cgroup-v1/net_cls
+cgroup-v1/net_prio admin-guide/cgroup-v1/net_prio
+cgroup-v1/pids admin-guide/cgroup-v1/pids
+cgroup-v1/rdma admin-guide/cgroup-v1/rdma
+cma/debugfs admin-guide/mm/cma_debugfs
+connector/connector driver-api/connector
+console/console driver-api/console
+core-api/gcc-plugins kbuild/gcc-plugins
+core-api/ioctl driver-api/ioctl
+core-api/memory-hotplug-notifier core-api/memory-hotplug
+dev-tools/gdb-kernel-debugging process/debugging/gdb-kernel-debugging
+dev-tools/kgdb process/debugging/kgdb
+dev-tools/tools dev-tools/index
+development-process/1.Intro process/1.Intro
+development-process/2.Process process/2.Process
+development-process/3.Early-stage process/3.Early-stage
+development-process/4.Coding process/4.Coding
+development-process/5.Posting process/5.Posting
+development-process/6.Followthrough process/6.Followthrough
+development-process/7.AdvancedTopics process/7.AdvancedTopics
+development-process/8.Conclusion process/8.Conclusion
+development-process/development-process process/development-process
+development-process/index process/index
+device-mapper/cache admin-guide/device-mapper/cache
+device-mapper/cache-policies admin-guide/device-mapper/cache-policies
+device-mapper/delay admin-guide/device-mapper/delay
+device-mapper/dm-crypt admin-guide/device-mapper/dm-crypt
+device-mapper/dm-flakey admin-guide/device-mapper/dm-flakey
+device-mapper/dm-init admin-guide/device-mapper/dm-init
+device-mapper/dm-integrity admin-guide/device-mapper/dm-integrity
+device-mapper/dm-io admin-guide/device-mapper/dm-io
+device-mapper/dm-log admin-guide/device-mapper/dm-log
+device-mapper/dm-queue-length admin-guide/device-mapper/dm-queue-length
+device-mapper/dm-raid admin-guide/device-mapper/dm-raid
+device-mapper/dm-service-time admin-guide/device-mapper/dm-service-time
+device-mapper/dm-uevent admin-guide/device-mapper/dm-uevent
+device-mapper/dm-zoned admin-guide/device-mapper/dm-zoned
+device-mapper/era admin-guide/device-mapper/era
+device-mapper/index admin-guide/device-mapper/index
+device-mapper/kcopyd admin-guide/device-mapper/kcopyd
+device-mapper/linear admin-guide/device-mapper/linear
+device-mapper/log-writes admin-guide/device-mapper/log-writes
+device-mapper/persistent-data admin-guide/device-mapper/persistent-data
+device-mapper/snapshot admin-guide/device-mapper/snapshot
+device-mapper/statistics admin-guide/device-mapper/statistics
+device-mapper/striped admin-guide/device-mapper/striped
+device-mapper/switch admin-guide/device-mapper/switch
+device-mapper/thin-provisioning admin-guide/device-mapper/thin-provisioning
+device-mapper/unstriped admin-guide/device-mapper/unstriped
+device-mapper/verity admin-guide/device-mapper/verity
+device-mapper/writecache admin-guide/device-mapper/writecache
+device-mapper/zero admin-guide/device-mapper/zero
+devicetree/writing-schema devicetree/bindings/writing-schema
+driver-api/bt8xxgpio driver-api/gpio/bt8xxgpio
+driver-api/cxl/access-coordinates driver-api/cxl/linux/access-coordinates
+driver-api/cxl/memory-devices driver-api/cxl/theory-of-operation
+driver-api/dcdbas userspace-api/dcdbas
+driver-api/dell_rbu admin-guide/dell_rbu
+driver-api/edid admin-guide/edid
+driver-api/gpio driver-api/gpio/index
+driver-api/hte/tegra194-hte driver-api/hte/tegra-hte
+driver-api/isapnp userspace-api/isapnp
+driver-api/media/drivers/v4l-drivers/zoran driver-api/media/drivers/zoran
+driver-api/mtd/intel-spi driver-api/mtd/spi-intel
+driver-api/pci driver-api/pci/pci
+driver-api/pinctl driver-api/pin-control
+driver-api/rapidio admin-guide/rapidio
+driver-api/serial/moxa-smartio driver-api/tty/moxa-smartio
+driver-api/serial/n_gsm driver-api/tty/n_gsm
+driver-api/serial/tty driver-api/tty/tty_ldisc
+driver-api/thermal/intel_powerclamp admin-guide/thermal/intel_powerclamp
+driver-api/usb driver-api/usb/usb
+driver-model/binding driver-api/driver-model/binding
+driver-model/bus driver-api/driver-model/bus
+driver-model/design-patterns driver-api/driver-model/design-patterns
+driver-model/device driver-api/driver-model/device
+driver-model/devres driver-api/driver-model/devres
+driver-model/driver driver-api/driver-model/driver
+driver-model/index driver-api/driver-model/index
+driver-model/overview driver-api/driver-model/overview
+driver-model/platform driver-api/driver-model/platform
+driver-model/porting driver-api/driver-model/porting
+early-userspace/buffer-format driver-api/early-userspace/buffer-format
+early-userspace/early_userspace_support driver-api/early-userspace/early_userspace_support
+early-userspace/index driver-api/early-userspace/index
+errseq core-api/errseq
+filesystems/binderfs admin-guide/binderfs
+filesystems/cifs/cifsd filesystems/smb/ksmbd
+filesystems/cifs/cifsroot filesystems/smb/cifsroot
+filesystems/cifs/index filesystems/smb/index
+filesystems/cifs/ksmbd filesystems/smb/ksmbd
+filesystems/ext4/ext4 admin-guide/ext4
+filesystems/ext4/ondisk/about filesystems/ext4/about
+filesystems/ext4/ondisk/allocators filesystems/ext4/allocators
+filesystems/ext4/ondisk/attributes filesystems/ext4/attributes
+filesystems/ext4/ondisk/bigalloc filesystems/ext4/bigalloc
+filesystems/ext4/ondisk/bitmaps filesystems/ext4/bitmaps
+filesystems/ext4/ondisk/blockgroup filesystems/ext4/blockgroup
+filesystems/ext4/ondisk/blockmap filesystems/ext4/blockmap
+filesystems/ext4/ondisk/blocks filesystems/ext4/blocks
+filesystems/ext4/ondisk/checksums filesystems/ext4/checksums
+filesystems/ext4/ondisk/directory filesystems/ext4/directory
+filesystems/ext4/ondisk/dynamic filesystems/ext4/dynamic
+filesystems/ext4/ondisk/eainode filesystems/ext4/eainode
+filesystems/ext4/ondisk/globals filesystems/ext4/globals
+filesystems/ext4/ondisk/group_descr filesystems/ext4/group_descr
+filesystems/ext4/ondisk/ifork filesystems/ext4/ifork
+filesystems/ext4/ondisk/inlinedata filesystems/ext4/inlinedata
+filesystems/ext4/ondisk/inodes filesystems/ext4/inodes
+filesystems/ext4/ondisk/journal filesystems/ext4/journal
+filesystems/ext4/ondisk/mmp filesystems/ext4/mmp
+filesystems/ext4/ondisk/overview filesystems/ext4/overview
+filesystems/ext4/ondisk/special_inodes filesystems/ext4/special_inodes
+filesystems/ext4/ondisk/super filesystems/ext4/super
+filesystems/sysfs-pci PCI/sysfs-pci
+filesystems/sysfs-tagging networking/sysfs-tagging
+filesystems/xfs-delayed-logging-design filesystems/xfs/xfs-delayed-logging-design
+filesystems/xfs-maintainer-entry-profile filesystems/xfs/xfs-maintainer-entry-profile
+filesystems/xfs-online-fsck-design filesystems/xfs/xfs-online-fsck-design
+filesystems/xfs-self-describing-metadata filesystems/xfs/xfs-self-describing-metadata
+gpio/index admin-guide/gpio/index
+gpio/sysfs userspace-api/gpio/sysfs
+gpu/amdgpu gpu/amdgpu/index
+hte/hte driver-api/hte/hte
+hte/index driver-api/hte/index
+hte/tegra194-hte driver-api/hte/tegra-hte
+input/alps input/devices/alps
+input/amijoy input/devices/amijoy
+input/appletouch input/devices/appletouch
+input/atarikbd input/devices/atarikbd
+input/bcm5974 input/devices/bcm5974
+input/cma3000_d0x input/devices/cma3000_d0x
+input/cs461x input/devices/cs461x
+input/edt-ft5x06 input/devices/edt-ft5x06
+input/elantech input/devices/elantech
+input/iforce-protocol input/devices/iforce-protocol
+input/joystick input/joydev/joystick
+input/joystick-api input/joydev/joystick-api
+input/joystick-parport input/devices/joystick-parport
+input/ntrig input/devices/ntrig
+input/rotary-encoder input/devices/rotary-encoder
+input/sentelic input/devices/sentelic
+input/walkera0701 input/devices/walkera0701
+input/xpad input/devices/xpad
+input/yealink input/devices/yealink
+interconnect/interconnect driver-api/interconnect
+ioctl/botching-up-ioctls process/botching-up-ioctls
+ioctl/cdrom userspace-api/ioctl/cdrom
+ioctl/hdio userspace-api/ioctl/hdio
+ioctl/index userspace-api/ioctl/index
+ioctl/ioctl-decoding userspace-api/ioctl/ioctl-decoding
+ioctl/ioctl-number userspace-api/ioctl/ioctl-number
+kbuild/namespaces core-api/symbol-namespaces
+kdump/index admin-guide/kdump/index
+kdump/kdump admin-guide/kdump/kdump
+kdump/vmcoreinfo admin-guide/kdump/vmcoreinfo
+kernel-documentation doc-guide/kernel-doc
+laptops/asus-laptop admin-guide/laptops/asus-laptop
+laptops/disk-shock-protection admin-guide/laptops/disk-shock-protection
+laptops/index admin-guide/laptops/index
+laptops/laptop-mode admin-guide/laptops/laptop-mode
+laptops/lg-laptop admin-guide/laptops/lg-laptop
+laptops/sony-laptop admin-guide/laptops/sony-laptop
+laptops/sonypi admin-guide/laptops/sonypi
+laptops/thinkpad-acpi admin-guide/laptops/thinkpad-acpi
+laptops/toshiba_haps admin-guide/laptops/toshiba_haps
+loongarch/booting arch/loongarch/booting
+loongarch/features arch/loongarch/features
+loongarch/index arch/loongarch/index
+loongarch/introduction arch/loongarch/introduction
+loongarch/irq-chip-model arch/loongarch/irq-chip-model
+m68k/buddha-driver arch/m68k/buddha-driver
+m68k/features arch/m68k/features
+m68k/index arch/m68k/index
+m68k/kernel-options arch/m68k/kernel-options
+md/index driver-api/md/index
+md/md-cluster driver-api/md/md-cluster
+md/raid5-cache driver-api/md/raid5-cache
+md/raid5-ppl driver-api/md/raid5-ppl
+media/dvb-drivers/avermedia admin-guide/media/avermedia
+media/dvb-drivers/bt8xx admin-guide/media/bt8xx
+media/dvb-drivers/ci admin-guide/media/ci
+media/dvb-drivers/contributors driver-api/media/drivers/contributors
+media/dvb-drivers/dvb-usb driver-api/media/drivers/dvb-usb
+media/dvb-drivers/faq admin-guide/media/faq
+media/dvb-drivers/frontends driver-api/media/drivers/frontends
+media/dvb-drivers/index driver-api/media/drivers/index
+media/dvb-drivers/lmedm04 admin-guide/media/lmedm04
+media/dvb-drivers/opera-firmware admin-guide/media/opera-firmware
+media/dvb-drivers/technisat admin-guide/media/technisat
+media/dvb-drivers/ttusb-dec admin-guide/media/ttusb-dec
+media/intro userspace-api/media/intro
+media/kapi/cec-core driver-api/media/cec-core
+media/kapi/dtv-ca driver-api/media/dtv-ca
+media/kapi/dtv-common driver-api/media/dtv-common
+media/kapi/dtv-core driver-api/media/dtv-core
+media/kapi/dtv-demux driver-api/media/dtv-demux
+media/kapi/dtv-frontend driver-api/media/dtv-frontend
+media/kapi/dtv-net driver-api/media/dtv-net
+media/kapi/mc-core driver-api/media/mc-core
+media/kapi/rc-core driver-api/media/rc-core
+media/kapi/v4l2-async driver-api/media/v4l2-async
+media/kapi/v4l2-common driver-api/media/v4l2-common
+media/kapi/v4l2-controls driver-api/media/v4l2-controls
+media/kapi/v4l2-core driver-api/media/v4l2-core
+media/kapi/v4l2-dev driver-api/media/v4l2-dev
+media/kapi/v4l2-device driver-api/media/v4l2-device
+media/kapi/v4l2-dv-timings driver-api/media/v4l2-dv-timings
+media/kapi/v4l2-event driver-api/media/v4l2-event
+media/kapi/v4l2-fh driver-api/media/v4l2-fh
+media/kapi/v4l2-flash-led-class driver-api/media/v4l2-flash-led-class
+media/kapi/v4l2-fwnode driver-api/media/v4l2-fwnode
+media/kapi/v4l2-intro driver-api/media/v4l2-intro
+media/kapi/v4l2-mc driver-api/media/v4l2-mc
+media/kapi/v4l2-mediabus driver-api/media/v4l2-mediabus
+media/kapi/v4l2-mem2mem driver-api/media/v4l2-mem2mem
+media/kapi/v4l2-rect driver-api/media/v4l2-rect
+media/kapi/v4l2-subdev driver-api/media/v4l2-subdev
+media/kapi/v4l2-tuner driver-api/media/v4l2-tuner
+media/kapi/v4l2-tveeprom driver-api/media/v4l2-tveeprom
+media/kapi/v4l2-videobuf2 driver-api/media/v4l2-videobuf2
+media/media_kapi driver-api/media/index
+media/media_uapi userspace-api/media/index
+media/uapi/cec/cec-api userspace-api/media/cec/cec-api
+media/uapi/cec/cec-func-close userspace-api/media/cec/cec-func-close
+media/uapi/cec/cec-func-ioctl userspace-api/media/cec/cec-func-ioctl
+media/uapi/cec/cec-func-open userspace-api/media/cec/cec-func-open
+media/uapi/cec/cec-func-poll userspace-api/media/cec/cec-func-poll
+media/uapi/cec/cec-funcs userspace-api/media/cec/cec-funcs
+media/uapi/cec/cec-header userspace-api/media/cec/cec-header
+media/uapi/cec/cec-intro userspace-api/media/cec/cec-intro
+media/uapi/cec/cec-ioc-adap-g-caps userspace-api/media/cec/cec-ioc-adap-g-caps
+media/uapi/cec/cec-ioc-adap-g-conn-info userspace-api/media/cec/cec-ioc-adap-g-conn-info
+media/uapi/cec/cec-ioc-adap-g-log-addrs userspace-api/media/cec/cec-ioc-adap-g-log-addrs
+media/uapi/cec/cec-ioc-adap-g-phys-addr userspace-api/media/cec/cec-ioc-adap-g-phys-addr
+media/uapi/cec/cec-ioc-dqevent userspace-api/media/cec/cec-ioc-dqevent
+media/uapi/cec/cec-ioc-g-mode userspace-api/media/cec/cec-ioc-g-mode
+media/uapi/cec/cec-ioc-receive userspace-api/media/cec/cec-ioc-receive
+media/uapi/cec/cec-pin-error-inj userspace-api/media/cec/cec-pin-error-inj
+media/uapi/dvb/ca userspace-api/media/dvb/ca
+media/uapi/dvb/ca-fclose userspace-api/media/dvb/ca-fclose
+media/uapi/dvb/ca-fopen userspace-api/media/dvb/ca-fopen
+media/uapi/dvb/ca-get-cap userspace-api/media/dvb/ca-get-cap
+media/uapi/dvb/ca-get-descr-info userspace-api/media/dvb/ca-get-descr-info
+media/uapi/dvb/ca-get-msg userspace-api/media/dvb/ca-get-msg
+media/uapi/dvb/ca-get-slot-info userspace-api/media/dvb/ca-get-slot-info
+media/uapi/dvb/ca-reset userspace-api/media/dvb/ca-reset
+media/uapi/dvb/ca-send-msg userspace-api/media/dvb/ca-send-msg
+media/uapi/dvb/ca-set-descr userspace-api/media/dvb/ca-set-descr
+media/uapi/dvb/ca_data_types userspace-api/media/dvb/ca_data_types
+media/uapi/dvb/ca_function_calls userspace-api/media/dvb/ca_function_calls
+media/uapi/dvb/ca_high_level userspace-api/media/dvb/ca_high_level
+media/uapi/dvb/demux userspace-api/media/dvb/demux
+media/uapi/dvb/dmx-add-pid userspace-api/media/dvb/dmx-add-pid
+media/uapi/dvb/dmx-expbuf userspace-api/media/dvb/dmx-expbuf
+media/uapi/dvb/dmx-fclose userspace-api/media/dvb/dmx-fclose
+media/uapi/dvb/dmx-fopen userspace-api/media/dvb/dmx-fopen
+media/uapi/dvb/dmx-fread userspace-api/media/dvb/dmx-fread
+media/uapi/dvb/dmx-fwrite userspace-api/media/dvb/dmx-fwrite
+media/uapi/dvb/dmx-get-pes-pids userspace-api/media/dvb/dmx-get-pes-pids
+media/uapi/dvb/dmx-get-stc userspace-api/media/dvb/dmx-get-stc
+media/uapi/dvb/dmx-mmap userspace-api/media/dvb/dmx-mmap
+media/uapi/dvb/dmx-munmap userspace-api/media/dvb/dmx-munmap
+media/uapi/dvb/dmx-qbuf userspace-api/media/dvb/dmx-qbuf
+media/uapi/dvb/dmx-querybuf userspace-api/media/dvb/dmx-querybuf
+media/uapi/dvb/dmx-remove-pid userspace-api/media/dvb/dmx-remove-pid
+media/uapi/dvb/dmx-reqbufs userspace-api/media/dvb/dmx-reqbufs
+media/uapi/dvb/dmx-set-buffer-size userspace-api/media/dvb/dmx-set-buffer-size
+media/uapi/dvb/dmx-set-filter userspace-api/media/dvb/dmx-set-filter
+media/uapi/dvb/dmx-set-pes-filter userspace-api/media/dvb/dmx-set-pes-filter
+media/uapi/dvb/dmx-start userspace-api/media/dvb/dmx-start
+media/uapi/dvb/dmx-stop userspace-api/media/dvb/dmx-stop
+media/uapi/dvb/dmx_fcalls userspace-api/media/dvb/dmx_fcalls
+media/uapi/dvb/dmx_types userspace-api/media/dvb/dmx_types
+media/uapi/dvb/dvb-fe-read-status userspace-api/media/dvb/dvb-fe-read-status
+media/uapi/dvb/dvb-frontend-event userspace-api/media/dvb/dvb-frontend-event
+media/uapi/dvb/dvb-frontend-parameters userspace-api/media/dvb/dvb-frontend-parameters
+media/uapi/dvb/dvbapi userspace-api/media/dvb/dvbapi
+media/uapi/dvb/dvbproperty userspace-api/media/dvb/dvbproperty
+media/uapi/dvb/examples userspace-api/media/dvb/examples
+media/uapi/dvb/fe-bandwidth-t userspace-api/media/dvb/fe-bandwidth-t
+media/uapi/dvb/fe-diseqc-recv-slave-reply userspace-api/media/dvb/fe-diseqc-recv-slave-reply
+media/uapi/dvb/fe-diseqc-reset-overload userspace-api/media/dvb/fe-diseqc-reset-overload
+media/uapi/dvb/fe-diseqc-send-burst userspace-api/media/dvb/fe-diseqc-send-burst
+media/uapi/dvb/fe-diseqc-send-master-cmd userspace-api/media/dvb/fe-diseqc-send-master-cmd
+media/uapi/dvb/fe-dishnetwork-send-legacy-cmd userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd
+media/uapi/dvb/fe-enable-high-lnb-voltage userspace-api/media/dvb/fe-enable-high-lnb-voltage
+media/uapi/dvb/fe-get-event userspace-api/media/dvb/fe-get-event
+media/uapi/dvb/fe-get-frontend userspace-api/media/dvb/fe-get-frontend
+media/uapi/dvb/fe-get-info userspace-api/media/dvb/fe-get-info
+media/uapi/dvb/fe-get-property userspace-api/media/dvb/fe-get-property
+media/uapi/dvb/fe-read-ber userspace-api/media/dvb/fe-read-ber
+media/uapi/dvb/fe-read-signal-strength userspace-api/media/dvb/fe-read-signal-strength
+media/uapi/dvb/fe-read-snr userspace-api/media/dvb/fe-read-snr
+media/uapi/dvb/fe-read-status userspace-api/media/dvb/fe-read-status
+media/uapi/dvb/fe-read-uncorrected-blocks userspace-api/media/dvb/fe-read-uncorrected-blocks
+media/uapi/dvb/fe-set-frontend userspace-api/media/dvb/fe-set-frontend
+media/uapi/dvb/fe-set-frontend-tune-mode userspace-api/media/dvb/fe-set-frontend-tune-mode
+media/uapi/dvb/fe-set-tone userspace-api/media/dvb/fe-set-tone
+media/uapi/dvb/fe-set-voltage userspace-api/media/dvb/fe-set-voltage
+media/uapi/dvb/fe-type-t userspace-api/media/dvb/fe-type-t
+media/uapi/dvb/fe_property_parameters userspace-api/media/dvb/fe_property_parameters
+media/uapi/dvb/frontend userspace-api/media/dvb/frontend
+media/uapi/dvb/frontend-header userspace-api/media/dvb/frontend-header
+media/uapi/dvb/frontend-property-cable-systems userspace-api/media/dvb/frontend-property-cable-systems
+media/uapi/dvb/frontend-property-satellite-systems userspace-api/media/dvb/frontend-property-satellite-systems
+media/uapi/dvb/frontend-property-terrestrial-systems userspace-api/media/dvb/frontend-property-terrestrial-systems
+media/uapi/dvb/frontend-stat-properties userspace-api/media/dvb/frontend-stat-properties
+media/uapi/dvb/frontend_f_close userspace-api/media/dvb/frontend_f_close
+media/uapi/dvb/frontend_f_open userspace-api/media/dvb/frontend_f_open
+media/uapi/dvb/frontend_fcalls userspace-api/media/dvb/frontend_fcalls
+media/uapi/dvb/frontend_legacy_api userspace-api/media/dvb/frontend_legacy_api
+media/uapi/dvb/frontend_legacy_dvbv3_api userspace-api/media/dvb/frontend_legacy_dvbv3_api
+media/uapi/dvb/headers userspace-api/media/dvb/headers
+media/uapi/dvb/intro userspace-api/media/dvb/intro
+media/uapi/dvb/legacy_dvb_apis userspace-api/media/dvb/legacy_dvb_apis
+media/uapi/dvb/net userspace-api/media/dvb/net
+media/uapi/dvb/net-add-if userspace-api/media/dvb/net-add-if
+media/uapi/dvb/net-get-if userspace-api/media/dvb/net-get-if
+media/uapi/dvb/net-remove-if userspace-api/media/dvb/net-remove-if
+media/uapi/dvb/net-types userspace-api/media/dvb/net-types
+media/uapi/dvb/query-dvb-frontend-info userspace-api/media/dvb/query-dvb-frontend-info
+media/uapi/fdl-appendix userspace-api/media/fdl-appendix
+media/uapi/gen-errors userspace-api/media/gen-errors
+media/uapi/mediactl/media-controller userspace-api/media/mediactl/media-controller
+media/uapi/mediactl/media-controller-intro userspace-api/media/mediactl/media-controller-intro
+media/uapi/mediactl/media-controller-model userspace-api/media/mediactl/media-controller-model
+media/uapi/mediactl/media-func-close userspace-api/media/mediactl/media-func-close
+media/uapi/mediactl/media-func-ioctl userspace-api/media/mediactl/media-func-ioctl
+media/uapi/mediactl/media-func-open userspace-api/media/mediactl/media-func-open
+media/uapi/mediactl/media-funcs userspace-api/media/mediactl/media-funcs
+media/uapi/mediactl/media-header userspace-api/media/mediactl/media-header
+media/uapi/mediactl/media-ioc-device-info userspace-api/media/mediactl/media-ioc-device-info
+media/uapi/mediactl/media-ioc-enum-entities userspace-api/media/mediactl/media-ioc-enum-entities
+media/uapi/mediactl/media-ioc-enum-links userspace-api/media/mediactl/media-ioc-enum-links
+media/uapi/mediactl/media-ioc-g-topology userspace-api/media/mediactl/media-ioc-g-topology
+media/uapi/mediactl/media-ioc-request-alloc userspace-api/media/mediactl/media-ioc-request-alloc
+media/uapi/mediactl/media-ioc-setup-link userspace-api/media/mediactl/media-ioc-setup-link
+media/uapi/mediactl/media-request-ioc-queue userspace-api/media/mediactl/media-request-ioc-queue
+media/uapi/mediactl/media-request-ioc-reinit userspace-api/media/mediactl/media-request-ioc-reinit
+media/uapi/mediactl/media-types userspace-api/media/mediactl/media-types
+media/uapi/mediactl/request-api userspace-api/media/mediactl/request-api
+media/uapi/mediactl/request-func-close userspace-api/media/mediactl/request-func-close
+media/uapi/mediactl/request-func-ioctl userspace-api/media/mediactl/request-func-ioctl
+media/uapi/mediactl/request-func-poll userspace-api/media/mediactl/request-func-poll
+media/uapi/rc/keytable.c userspace-api/media/rc/keytable.c
+media/uapi/rc/lirc-dev userspace-api/media/rc/lirc-dev
+media/uapi/rc/lirc-dev-intro userspace-api/media/rc/lirc-dev-intro
+media/uapi/rc/lirc-func userspace-api/media/rc/lirc-func
+media/uapi/rc/lirc-get-features userspace-api/media/rc/lirc-get-features
+media/uapi/rc/lirc-get-rec-mode userspace-api/media/rc/lirc-get-rec-mode
+media/uapi/rc/lirc-get-rec-resolution userspace-api/media/rc/lirc-get-rec-resolution
+media/uapi/rc/lirc-get-send-mode userspace-api/media/rc/lirc-get-send-mode
+media/uapi/rc/lirc-get-timeout userspace-api/media/rc/lirc-get-timeout
+media/uapi/rc/lirc-header userspace-api/media/rc/lirc-header
+media/uapi/rc/lirc-read userspace-api/media/rc/lirc-read
+media/uapi/rc/lirc-set-measure-carrier-mode userspace-api/media/rc/lirc-set-measure-carrier-mode
+media/uapi/rc/lirc-set-rec-carrier userspace-api/media/rc/lirc-set-rec-carrier
+media/uapi/rc/lirc-set-rec-carrier-range userspace-api/media/rc/lirc-set-rec-carrier-range
+media/uapi/rc/lirc-set-rec-timeout userspace-api/media/rc/lirc-set-rec-timeout
+media/uapi/rc/lirc-set-send-carrier userspace-api/media/rc/lirc-set-send-carrier
+media/uapi/rc/lirc-set-send-duty-cycle userspace-api/media/rc/lirc-set-send-duty-cycle
+media/uapi/rc/lirc-set-transmitter-mask userspace-api/media/rc/lirc-set-transmitter-mask
+media/uapi/rc/lirc-set-wideband-receiver userspace-api/media/rc/lirc-set-wideband-receiver
+media/uapi/rc/lirc-write userspace-api/media/rc/lirc-write
+media/uapi/rc/rc-intro userspace-api/media/rc/rc-intro
+media/uapi/rc/rc-protos userspace-api/media/rc/rc-protos
+media/uapi/rc/rc-sysfs-nodes userspace-api/media/rc/rc-sysfs-nodes
+media/uapi/rc/rc-table-change userspace-api/media/rc/rc-table-change
+media/uapi/rc/rc-tables userspace-api/media/rc/rc-tables
+media/uapi/rc/remote_controllers userspace-api/media/rc/remote_controllers
+media/uapi/v4l/app-pri userspace-api/media/v4l/app-pri
+media/uapi/v4l/audio userspace-api/media/v4l/audio
+media/uapi/v4l/biblio userspace-api/media/v4l/biblio
+media/uapi/v4l/buffer userspace-api/media/v4l/buffer
+media/uapi/v4l/capture-example userspace-api/media/v4l/capture-example
+media/uapi/v4l/capture.c userspace-api/media/v4l/capture.c
+media/uapi/v4l/colorspaces userspace-api/media/v4l/colorspaces
+media/uapi/v4l/colorspaces-defs userspace-api/media/v4l/colorspaces-defs
+media/uapi/v4l/colorspaces-details userspace-api/media/v4l/colorspaces-details
+media/uapi/v4l/common userspace-api/media/v4l/common
+media/uapi/v4l/common-defs userspace-api/media/v4l/common-defs
+media/uapi/v4l/compat userspace-api/media/v4l/compat
+media/uapi/v4l/control userspace-api/media/v4l/control
+media/uapi/v4l/crop userspace-api/media/v4l/crop
+media/uapi/v4l/depth-formats userspace-api/media/v4l/depth-formats
+media/uapi/v4l/dev-capture userspace-api/media/v4l/dev-capture
+media/uapi/v4l/dev-codec userspace-api/media/v4l/dev-mem2mem
+media/uapi/v4l/dev-decoder userspace-api/media/v4l/dev-decoder
+media/uapi/v4l/dev-event userspace-api/media/v4l/dev-event
+media/uapi/v4l/dev-mem2mem userspace-api/media/v4l/dev-mem2mem
+media/uapi/v4l/dev-meta userspace-api/media/v4l/dev-meta
+media/uapi/v4l/dev-osd userspace-api/media/v4l/dev-osd
+media/uapi/v4l/dev-output userspace-api/media/v4l/dev-output
+media/uapi/v4l/dev-overlay userspace-api/media/v4l/dev-overlay
+media/uapi/v4l/dev-radio userspace-api/media/v4l/dev-radio
+media/uapi/v4l/dev-raw-vbi userspace-api/media/v4l/dev-raw-vbi
+media/uapi/v4l/dev-rds userspace-api/media/v4l/dev-rds
+media/uapi/v4l/dev-sdr userspace-api/media/v4l/dev-sdr
+media/uapi/v4l/dev-sliced-vbi userspace-api/media/v4l/dev-sliced-vbi
+media/uapi/v4l/dev-stateless-decoder userspace-api/media/v4l/dev-stateless-decoder
+media/uapi/v4l/dev-subdev userspace-api/media/v4l/dev-subdev
+media/uapi/v4l/dev-touch userspace-api/media/v4l/dev-touch
+media/uapi/v4l/devices userspace-api/media/v4l/devices
+media/uapi/v4l/diff-v4l userspace-api/media/v4l/diff-v4l
+media/uapi/v4l/dmabuf userspace-api/media/v4l/dmabuf
+media/uapi/v4l/dv-timings userspace-api/media/v4l/dv-timings
+media/uapi/v4l/ext-ctrls-camera userspace-api/media/v4l/ext-ctrls-camera
+media/uapi/v4l/ext-ctrls-codec userspace-api/media/v4l/ext-ctrls-codec
+media/uapi/v4l/ext-ctrls-detect userspace-api/media/v4l/ext-ctrls-detect
+media/uapi/v4l/ext-ctrls-dv userspace-api/media/v4l/ext-ctrls-dv
+media/uapi/v4l/ext-ctrls-flash userspace-api/media/v4l/ext-ctrls-flash
+media/uapi/v4l/ext-ctrls-fm-rx userspace-api/media/v4l/ext-ctrls-fm-rx
+media/uapi/v4l/ext-ctrls-fm-tx userspace-api/media/v4l/ext-ctrls-fm-tx
+media/uapi/v4l/ext-ctrls-image-process userspace-api/media/v4l/ext-ctrls-image-process
+media/uapi/v4l/ext-ctrls-image-source userspace-api/media/v4l/ext-ctrls-image-source
+media/uapi/v4l/ext-ctrls-jpeg userspace-api/media/v4l/ext-ctrls-jpeg
+media/uapi/v4l/ext-ctrls-rf-tuner userspace-api/media/v4l/ext-ctrls-rf-tuner
+media/uapi/v4l/extended-controls userspace-api/media/v4l/extended-controls
+media/uapi/v4l/field-order userspace-api/media/v4l/field-order
+media/uapi/v4l/format userspace-api/media/v4l/format
+media/uapi/v4l/func-close userspace-api/media/v4l/func-close
+media/uapi/v4l/func-ioctl userspace-api/media/v4l/func-ioctl
+media/uapi/v4l/func-mmap userspace-api/media/v4l/func-mmap
+media/uapi/v4l/func-munmap userspace-api/media/v4l/func-munmap
+media/uapi/v4l/func-open userspace-api/media/v4l/func-open
+media/uapi/v4l/func-poll userspace-api/media/v4l/func-poll
+media/uapi/v4l/func-read userspace-api/media/v4l/func-read
+media/uapi/v4l/func-select userspace-api/media/v4l/func-select
+media/uapi/v4l/func-write userspace-api/media/v4l/func-write
+media/uapi/v4l/hist-v4l2 userspace-api/media/v4l/hist-v4l2
+media/uapi/v4l/hsv-formats userspace-api/media/v4l/hsv-formats
+media/uapi/v4l/io userspace-api/media/v4l/io
+media/uapi/v4l/libv4l userspace-api/media/v4l/libv4l
+media/uapi/v4l/libv4l-introduction userspace-api/media/v4l/libv4l-introduction
+media/uapi/v4l/meta-formats userspace-api/media/v4l/meta-formats
+media/uapi/v4l/mmap userspace-api/media/v4l/mmap
+media/uapi/v4l/open userspace-api/media/v4l/open
+media/uapi/v4l/pixfmt userspace-api/media/v4l/pixfmt
+media/uapi/v4l/pixfmt-002 userspace-api/media/v4l/pixfmt-v4l2
+media/uapi/v4l/pixfmt-003 userspace-api/media/v4l/pixfmt-v4l2-mplane
+media/uapi/v4l/pixfmt-004 userspace-api/media/v4l/pixfmt-intro
+media/uapi/v4l/pixfmt-006 userspace-api/media/v4l/colorspaces-defs
+media/uapi/v4l/pixfmt-007 userspace-api/media/v4l/colorspaces-details
+media/uapi/v4l/pixfmt-013 userspace-api/media/v4l/pixfmt-compressed
+media/uapi/v4l/pixfmt-bayer userspace-api/media/v4l/pixfmt-bayer
+media/uapi/v4l/pixfmt-cnf4 userspace-api/media/v4l/pixfmt-cnf4
+media/uapi/v4l/pixfmt-compressed userspace-api/media/v4l/pixfmt-compressed
+media/uapi/v4l/pixfmt-indexed userspace-api/media/v4l/pixfmt-indexed
+media/uapi/v4l/pixfmt-intro userspace-api/media/v4l/pixfmt-intro
+media/uapi/v4l/pixfmt-inzi userspace-api/media/v4l/pixfmt-inzi
+media/uapi/v4l/pixfmt-m420 userspace-api/media/v4l/pixfmt-m420
+media/uapi/v4l/pixfmt-meta-d4xx userspace-api/media/v4l/metafmt-d4xx
+media/uapi/v4l/pixfmt-meta-intel-ipu3 userspace-api/media/v4l/metafmt-intel-ipu3
+media/uapi/v4l/pixfmt-meta-uvc userspace-api/media/v4l/metafmt-uvc
+media/uapi/v4l/pixfmt-meta-vivid userspace-api/media/v4l/metafmt-vivid
+media/uapi/v4l/pixfmt-meta-vsp1-hgo userspace-api/media/v4l/metafmt-vsp1-hgo
+media/uapi/v4l/pixfmt-meta-vsp1-hgt userspace-api/media/v4l/metafmt-vsp1-hgt
+media/uapi/v4l/pixfmt-packed-hsv userspace-api/media/v4l/pixfmt-packed-hsv
+media/uapi/v4l/pixfmt-packed-yuv userspace-api/media/v4l/pixfmt-packed-yuv
+media/uapi/v4l/pixfmt-reserved userspace-api/media/v4l/pixfmt-reserved
+media/uapi/v4l/pixfmt-rgb userspace-api/media/v4l/pixfmt-rgb
+media/uapi/v4l/pixfmt-sbggr16 userspace-api/media/v4l/pixfmt-srggb16
+media/uapi/v4l/pixfmt-sdr-cs08 userspace-api/media/v4l/pixfmt-sdr-cs08
+media/uapi/v4l/pixfmt-sdr-cs14le userspace-api/media/v4l/pixfmt-sdr-cs14le
+media/uapi/v4l/pixfmt-sdr-cu08 userspace-api/media/v4l/pixfmt-sdr-cu08
+media/uapi/v4l/pixfmt-sdr-cu16le userspace-api/media/v4l/pixfmt-sdr-cu16le
+media/uapi/v4l/pixfmt-sdr-pcu16be userspace-api/media/v4l/pixfmt-sdr-pcu16be
+media/uapi/v4l/pixfmt-sdr-pcu18be userspace-api/media/v4l/pixfmt-sdr-pcu18be
+media/uapi/v4l/pixfmt-sdr-pcu20be userspace-api/media/v4l/pixfmt-sdr-pcu20be
+media/uapi/v4l/pixfmt-sdr-ru12le userspace-api/media/v4l/pixfmt-sdr-ru12le
+media/uapi/v4l/pixfmt-srggb10 userspace-api/media/v4l/pixfmt-srggb10
+media/uapi/v4l/pixfmt-srggb10-ipu3 userspace-api/media/v4l/pixfmt-srggb10-ipu3
+media/uapi/v4l/pixfmt-srggb10alaw8 userspace-api/media/v4l/pixfmt-srggb10alaw8
+media/uapi/v4l/pixfmt-srggb10dpcm8 userspace-api/media/v4l/pixfmt-srggb10dpcm8
+media/uapi/v4l/pixfmt-srggb10p userspace-api/media/v4l/pixfmt-srggb10p
+media/uapi/v4l/pixfmt-srggb12 userspace-api/media/v4l/pixfmt-srggb12
+media/uapi/v4l/pixfmt-srggb12p userspace-api/media/v4l/pixfmt-srggb12p
+media/uapi/v4l/pixfmt-srggb14 userspace-api/media/v4l/pixfmt-srggb14
+media/uapi/v4l/pixfmt-srggb14p userspace-api/media/v4l/pixfmt-srggb14p
+media/uapi/v4l/pixfmt-srggb16 userspace-api/media/v4l/pixfmt-srggb16
+media/uapi/v4l/pixfmt-srggb8 userspace-api/media/v4l/pixfmt-srggb8
+media/uapi/v4l/pixfmt-tch-td08 userspace-api/media/v4l/pixfmt-tch-td08
+media/uapi/v4l/pixfmt-tch-td16 userspace-api/media/v4l/pixfmt-tch-td16
+media/uapi/v4l/pixfmt-tch-tu08 userspace-api/media/v4l/pixfmt-tch-tu08
+media/uapi/v4l/pixfmt-tch-tu16 userspace-api/media/v4l/pixfmt-tch-tu16
+media/uapi/v4l/pixfmt-uv8 userspace-api/media/v4l/pixfmt-uv8
+media/uapi/v4l/pixfmt-v4l2 userspace-api/media/v4l/pixfmt-v4l2
+media/uapi/v4l/pixfmt-v4l2-mplane userspace-api/media/v4l/pixfmt-v4l2-mplane
+media/uapi/v4l/pixfmt-y12i userspace-api/media/v4l/pixfmt-y12i
+media/uapi/v4l/pixfmt-y8i userspace-api/media/v4l/pixfmt-y8i
+media/uapi/v4l/pixfmt-z16 userspace-api/media/v4l/pixfmt-z16
+media/uapi/v4l/planar-apis userspace-api/media/v4l/planar-apis
+media/uapi/v4l/querycap userspace-api/media/v4l/querycap
+media/uapi/v4l/rw userspace-api/media/v4l/rw
+media/uapi/v4l/sdr-formats userspace-api/media/v4l/sdr-formats
+media/uapi/v4l/selection-api userspace-api/media/v4l/selection-api
+media/uapi/v4l/selection-api-002 userspace-api/media/v4l/selection-api-intro
+media/uapi/v4l/selection-api-003 userspace-api/media/v4l/selection-api-targets
+media/uapi/v4l/selection-api-004 userspace-api/media/v4l/selection-api-configuration
+media/uapi/v4l/selection-api-005 userspace-api/media/v4l/selection-api-vs-crop-api
+media/uapi/v4l/selection-api-006 userspace-api/media/v4l/selection-api-examples
+media/uapi/v4l/selection-api-configuration userspace-api/media/v4l/selection-api-configuration
+media/uapi/v4l/selection-api-examples userspace-api/media/v4l/selection-api-examples
+media/uapi/v4l/selection-api-intro userspace-api/media/v4l/selection-api-intro
+media/uapi/v4l/selection-api-targets userspace-api/media/v4l/selection-api-targets
+media/uapi/v4l/selection-api-vs-crop-api userspace-api/media/v4l/selection-api-vs-crop-api
+media/uapi/v4l/selections-common userspace-api/media/v4l/selections-common
+media/uapi/v4l/standard userspace-api/media/v4l/standard
+media/uapi/v4l/streaming-par userspace-api/media/v4l/streaming-par
+media/uapi/v4l/subdev-formats userspace-api/media/v4l/subdev-formats
+media/uapi/v4l/tch-formats userspace-api/media/v4l/tch-formats
+media/uapi/v4l/tuner userspace-api/media/v4l/tuner
+media/uapi/v4l/user-func userspace-api/media/v4l/user-func
+media/uapi/v4l/userp userspace-api/media/v4l/userp
+media/uapi/v4l/v4l2 userspace-api/media/v4l/v4l2
+media/uapi/v4l/v4l2-selection-flags userspace-api/media/v4l/v4l2-selection-flags
+media/uapi/v4l/v4l2-selection-targets userspace-api/media/v4l/v4l2-selection-targets
+media/uapi/v4l/v4l2grab-example userspace-api/media/v4l/v4l2grab-example
+media/uapi/v4l/v4l2grab.c userspace-api/media/v4l/v4l2grab.c
+media/uapi/v4l/video userspace-api/media/v4l/video
+media/uapi/v4l/videodev userspace-api/media/v4l/videodev
+media/uapi/v4l/vidioc-create-bufs userspace-api/media/v4l/vidioc-create-bufs
+media/uapi/v4l/vidioc-cropcap userspace-api/media/v4l/vidioc-cropcap
+media/uapi/v4l/vidioc-dbg-g-chip-info userspace-api/media/v4l/vidioc-dbg-g-chip-info
+media/uapi/v4l/vidioc-dbg-g-register userspace-api/media/v4l/vidioc-dbg-g-register
+media/uapi/v4l/vidioc-decoder-cmd userspace-api/media/v4l/vidioc-decoder-cmd
+media/uapi/v4l/vidioc-dqevent userspace-api/media/v4l/vidioc-dqevent
+media/uapi/v4l/vidioc-dv-timings-cap userspace-api/media/v4l/vidioc-dv-timings-cap
+media/uapi/v4l/vidioc-encoder-cmd userspace-api/media/v4l/vidioc-encoder-cmd
+media/uapi/v4l/vidioc-enum-dv-timings userspace-api/media/v4l/vidioc-enum-dv-timings
+media/uapi/v4l/vidioc-enum-fmt userspace-api/media/v4l/vidioc-enum-fmt
+media/uapi/v4l/vidioc-enum-frameintervals userspace-api/media/v4l/vidioc-enum-frameintervals
+media/uapi/v4l/vidioc-enum-framesizes userspace-api/media/v4l/vidioc-enum-framesizes
+media/uapi/v4l/vidioc-enum-freq-bands userspace-api/media/v4l/vidioc-enum-freq-bands
+media/uapi/v4l/vidioc-enumaudio userspace-api/media/v4l/vidioc-enumaudio
+media/uapi/v4l/vidioc-enumaudioout userspace-api/media/v4l/vidioc-enumaudioout
+media/uapi/v4l/vidioc-enuminput userspace-api/media/v4l/vidioc-enuminput
+media/uapi/v4l/vidioc-enumoutput userspace-api/media/v4l/vidioc-enumoutput
+media/uapi/v4l/vidioc-enumstd userspace-api/media/v4l/vidioc-enumstd
+media/uapi/v4l/vidioc-expbuf userspace-api/media/v4l/vidioc-expbuf
+media/uapi/v4l/vidioc-g-audio userspace-api/media/v4l/vidioc-g-audio
+media/uapi/v4l/vidioc-g-audioout userspace-api/media/v4l/vidioc-g-audioout
+media/uapi/v4l/vidioc-g-crop userspace-api/media/v4l/vidioc-g-crop
+media/uapi/v4l/vidioc-g-ctrl userspace-api/media/v4l/vidioc-g-ctrl
+media/uapi/v4l/vidioc-g-dv-timings userspace-api/media/v4l/vidioc-g-dv-timings
+media/uapi/v4l/vidioc-g-edid userspace-api/media/v4l/vidioc-g-edid
+media/uapi/v4l/vidioc-g-enc-index userspace-api/media/v4l/vidioc-g-enc-index
+media/uapi/v4l/vidioc-g-ext-ctrls userspace-api/media/v4l/vidioc-g-ext-ctrls
+media/uapi/v4l/vidioc-g-fbuf userspace-api/media/v4l/vidioc-g-fbuf
+media/uapi/v4l/vidioc-g-fmt userspace-api/media/v4l/vidioc-g-fmt
+media/uapi/v4l/vidioc-g-frequency userspace-api/media/v4l/vidioc-g-frequency
+media/uapi/v4l/vidioc-g-input userspace-api/media/v4l/vidioc-g-input
+media/uapi/v4l/vidioc-g-jpegcomp userspace-api/media/v4l/vidioc-g-jpegcomp
+media/uapi/v4l/vidioc-g-modulator userspace-api/media/v4l/vidioc-g-modulator
+media/uapi/v4l/vidioc-g-output userspace-api/media/v4l/vidioc-g-output
+media/uapi/v4l/vidioc-g-parm userspace-api/media/v4l/vidioc-g-parm
+media/uapi/v4l/vidioc-g-priority userspace-api/media/v4l/vidioc-g-priority
+media/uapi/v4l/vidioc-g-selection userspace-api/media/v4l/vidioc-g-selection
+media/uapi/v4l/vidioc-g-sliced-vbi-cap userspace-api/media/v4l/vidioc-g-sliced-vbi-cap
+media/uapi/v4l/vidioc-g-std userspace-api/media/v4l/vidioc-g-std
+media/uapi/v4l/vidioc-g-tuner userspace-api/media/v4l/vidioc-g-tuner
+media/uapi/v4l/vidioc-log-status userspace-api/media/v4l/vidioc-log-status
+media/uapi/v4l/vidioc-overlay userspace-api/media/v4l/vidioc-overlay
+media/uapi/v4l/vidioc-prepare-buf userspace-api/media/v4l/vidioc-prepare-buf
+media/uapi/v4l/vidioc-qbuf userspace-api/media/v4l/vidioc-qbuf
+media/uapi/v4l/vidioc-query-dv-timings userspace-api/media/v4l/vidioc-query-dv-timings
+media/uapi/v4l/vidioc-querybuf userspace-api/media/v4l/vidioc-querybuf
+media/uapi/v4l/vidioc-querycap userspace-api/media/v4l/vidioc-querycap
+media/uapi/v4l/vidioc-queryctrl userspace-api/media/v4l/vidioc-queryctrl
+media/uapi/v4l/vidioc-querystd userspace-api/media/v4l/vidioc-querystd
+media/uapi/v4l/vidioc-reqbufs userspace-api/media/v4l/vidioc-reqbufs
+media/uapi/v4l/vidioc-s-hw-freq-seek userspace-api/media/v4l/vidioc-s-hw-freq-seek
+media/uapi/v4l/vidioc-streamon userspace-api/media/v4l/vidioc-streamon
+media/uapi/v4l/vidioc-subdev-enum-frame-interval userspace-api/media/v4l/vidioc-subdev-enum-frame-interval
+media/uapi/v4l/vidioc-subdev-enum-frame-size userspace-api/media/v4l/vidioc-subdev-enum-frame-size
+media/uapi/v4l/vidioc-subdev-enum-mbus-code userspace-api/media/v4l/vidioc-subdev-enum-mbus-code
+media/uapi/v4l/vidioc-subdev-g-crop userspace-api/media/v4l/vidioc-subdev-g-crop
+media/uapi/v4l/vidioc-subdev-g-fmt userspace-api/media/v4l/vidioc-subdev-g-fmt
+media/uapi/v4l/vidioc-subdev-g-frame-interval userspace-api/media/v4l/vidioc-subdev-g-frame-interval
+media/uapi/v4l/vidioc-subdev-g-selection userspace-api/media/v4l/vidioc-subdev-g-selection
+media/uapi/v4l/vidioc-subscribe-event userspace-api/media/v4l/vidioc-subscribe-event
+media/uapi/v4l/yuv-formats userspace-api/media/v4l/yuv-formats
+media/v4l-drivers/au0828-cardlist admin-guide/media/au0828-cardlist
+media/v4l-drivers/bttv admin-guide/media/bttv
+media/v4l-drivers/bttv-cardlist admin-guide/media/bttv-cardlist
+media/v4l-drivers/bttv-devel driver-api/media/drivers/bttv-devel
+media/v4l-drivers/cafe_ccic admin-guide/media/cafe_ccic
+media/v4l-drivers/cardlist admin-guide/media/cardlist
+media/v4l-drivers/cx2341x driver-api/media/drivers/cx2341x-devel
+media/v4l-drivers/cx2341x-devel driver-api/media/drivers/cx2341x-devel
+media/v4l-drivers/cx2341x-uapi userspace-api/media/drivers/cx2341x-uapi
+media/v4l-drivers/cx23885-cardlist admin-guide/media/cx23885-cardlist
+media/v4l-drivers/cx88 admin-guide/media/cx88
+media/v4l-drivers/cx88-cardlist admin-guide/media/cx88-cardlist
+media/v4l-drivers/cx88-devel driver-api/media/drivers/cx88-devel
+media/v4l-drivers/em28xx-cardlist admin-guide/media/em28xx-cardlist
+media/v4l-drivers/fimc admin-guide/media/fimc
+media/v4l-drivers/fimc-devel driver-api/media/drivers/fimc-devel
+media/v4l-drivers/fourcc userspace-api/media/v4l/fourcc
+media/v4l-drivers/gspca-cardlist admin-guide/media/gspca-cardlist
+media/v4l-drivers/imx admin-guide/media/imx
+media/v4l-drivers/imx-uapi userspace-api/media/drivers/imx-uapi
+media/v4l-drivers/imx7 admin-guide/media/imx7
+media/v4l-drivers/index userspace-api/media/drivers/index
+media/v4l-drivers/ipu3 admin-guide/media/ipu3
+media/v4l-drivers/ivtv admin-guide/media/ivtv
+media/v4l-drivers/ivtv-cardlist admin-guide/media/ivtv-cardlist
+media/v4l-drivers/max2175 userspace-api/media/drivers/max2175
+media/v4l-drivers/omap3isp admin-guide/media/omap3isp
+media/v4l-drivers/omap3isp-uapi userspace-api/media/drivers/omap3isp-uapi
+media/v4l-drivers/philips admin-guide/media/philips
+media/v4l-drivers/pvrusb2 driver-api/media/drivers/pvrusb2
+media/v4l-drivers/pxa_camera driver-api/media/drivers/pxa_camera
+media/v4l-drivers/qcom_camss admin-guide/media/qcom_camss
+media/v4l-drivers/radiotrack driver-api/media/drivers/radiotrack
+media/v4l-drivers/rcar-fdp1 admin-guide/media/rcar-fdp1
+media/v4l-drivers/saa7134 admin-guide/media/saa7134
+media/v4l-drivers/saa7134-cardlist admin-guide/media/saa7134-cardlist
+media/v4l-drivers/saa7134-devel driver-api/media/drivers/saa7134-devel
+media/v4l-drivers/saa7164-cardlist admin-guide/media/saa7164-cardlist
+media/v4l-drivers/sh_mobile_ceu_camera driver-api/media/drivers/sh_mobile_ceu_camera
+media/v4l-drivers/si470x admin-guide/media/si470x
+media/v4l-drivers/si4713 admin-guide/media/si4713
+media/v4l-drivers/si476x admin-guide/media/si476x
+media/v4l-drivers/tuner-cardlist admin-guide/media/tuner-cardlist
+media/v4l-drivers/tuners driver-api/media/drivers/tuners
+media/v4l-drivers/uvcvideo userspace-api/media/drivers/uvcvideo
+media/v4l-drivers/v4l-with-ir admin-guide/media/remote-controller
+media/v4l-drivers/vimc admin-guide/media/vimc
+media/v4l-drivers/vimc-devel driver-api/media/drivers/vimc-devel
+media/v4l-drivers/vivid admin-guide/media/vivid
+media/v4l-drivers/zoran driver-api/media/drivers/zoran
+memory-devices/ti-emif driver-api/memory-devices/ti-emif
+mips/booting arch/mips/booting
+mips/features arch/mips/features
+mips/index arch/mips/index
+mips/ingenic-tcu arch/mips/ingenic-tcu
+mm/slub admin-guide/mm/slab
+mmc/index driver-api/mmc/index
+mmc/mmc-async-req driver-api/mmc/mmc-async-req
+mmc/mmc-dev-attrs driver-api/mmc/mmc-dev-attrs
+mmc/mmc-dev-parts driver-api/mmc/mmc-dev-parts
+mmc/mmc-tools driver-api/mmc/mmc-tools
+mtd/index driver-api/mtd/index
+mtd/intel-spi driver-api/mtd/spi-intel
+mtd/nand_ecc driver-api/mtd/nand_ecc
+mtd/spi-nor driver-api/mtd/spi-nor
+namespaces/compatibility-list admin-guide/namespaces/compatibility-list
+namespaces/index admin-guide/namespaces/index
+namespaces/resource-control admin-guide/namespaces/resource-control
+networking/altera_tse networking/device_drivers/ethernet/altera/altera_tse
+networking/baycom networking/device_drivers/hamradio/baycom
+networking/bpf_flow_dissector bpf/prog_flow_dissector
+networking/cxacru networking/device_drivers/atm/cxacru
+networking/defza networking/device_drivers/fddi/defza
+networking/device_drivers/3com/3c509 networking/device_drivers/ethernet/3com/3c509
+networking/device_drivers/3com/vortex networking/device_drivers/ethernet/3com/vortex
+networking/device_drivers/amazon/ena networking/device_drivers/ethernet/amazon/ena
+networking/device_drivers/aquantia/atlantic networking/device_drivers/ethernet/aquantia/atlantic
+networking/device_drivers/chelsio/cxgb networking/device_drivers/ethernet/chelsio/cxgb
+networking/device_drivers/cirrus/cs89x0 networking/device_drivers/ethernet/cirrus/cs89x0
+networking/device_drivers/davicom/dm9000 networking/device_drivers/ethernet/davicom/dm9000
+networking/device_drivers/dec/dmfe networking/device_drivers/ethernet/dec/dmfe
+networking/device_drivers/dlink/dl2k networking/device_drivers/ethernet/dlink/dl2k
+networking/device_drivers/freescale/dpaa networking/device_drivers/ethernet/freescale/dpaa
+networking/device_drivers/freescale/dpaa2/dpio-driver networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver
+networking/device_drivers/freescale/dpaa2/ethernet-driver networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver
+networking/device_drivers/freescale/dpaa2/index networking/device_drivers/ethernet/freescale/dpaa2/index
+networking/device_drivers/freescale/dpaa2/mac-phy-support networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support
+networking/device_drivers/freescale/dpaa2/overview networking/device_drivers/ethernet/freescale/dpaa2/overview
+networking/device_drivers/freescale/gianfar networking/device_drivers/ethernet/freescale/gianfar
+networking/device_drivers/google/gve networking/device_drivers/ethernet/google/gve
+networking/device_drivers/intel/e100 networking/device_drivers/ethernet/intel/e100
+networking/device_drivers/intel/e1000 networking/device_drivers/ethernet/intel/e1000
+networking/device_drivers/intel/e1000e networking/device_drivers/ethernet/intel/e1000e
+networking/device_drivers/intel/fm10k networking/device_drivers/ethernet/intel/fm10k
+networking/device_drivers/intel/i40e networking/device_drivers/ethernet/intel/i40e
+networking/device_drivers/intel/iavf networking/device_drivers/ethernet/intel/iavf
+networking/device_drivers/intel/ice networking/device_drivers/ethernet/intel/ice
+networking/device_drivers/intel/igb networking/device_drivers/ethernet/intel/igb
+networking/device_drivers/intel/igbvf networking/device_drivers/ethernet/intel/igbvf
+networking/device_drivers/intel/ipw2100 networking/device_drivers/wifi/intel/ipw2100
+networking/device_drivers/intel/ipw2200 networking/device_drivers/wifi/intel/ipw2200
+networking/device_drivers/intel/ixgbe networking/device_drivers/ethernet/intel/ixgbe
+networking/device_drivers/intel/ixgbevf networking/device_drivers/ethernet/intel/ixgbevf
+networking/device_drivers/marvell/octeontx2 networking/device_drivers/ethernet/marvell/octeontx2
+networking/device_drivers/microsoft/netvsc networking/device_drivers/ethernet/microsoft/netvsc
+networking/device_drivers/neterion/s2io networking/device_drivers/ethernet/neterion/s2io
+networking/device_drivers/netronome/nfp networking/device_drivers/ethernet/netronome/nfp
+networking/device_drivers/pensando/ionic networking/device_drivers/ethernet/pensando/ionic
+networking/device_drivers/qualcomm/rmnet networking/device_drivers/cellular/qualcomm/rmnet
+networking/device_drivers/smsc/smc9 networking/device_drivers/ethernet/smsc/smc9
+networking/device_drivers/stmicro/stmmac networking/device_drivers/ethernet/stmicro/stmmac
+networking/device_drivers/ti/cpsw networking/device_drivers/ethernet/ti/cpsw
+networking/device_drivers/ti/cpsw_switchdev networking/device_drivers/ethernet/ti/cpsw_switchdev
+networking/device_drivers/ti/tlan networking/device_drivers/ethernet/ti/tlan
+networking/devlink-trap networking/devlink/devlink-trap
+networking/dpaa2/dpio-driver networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver
+networking/dpaa2/ethernet-driver networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver
+networking/dpaa2/index networking/device_drivers/ethernet/freescale/dpaa2/index
+networking/dpaa2/overview networking/device_drivers/ethernet/freescale/dpaa2/overview
+networking/e100 networking/device_drivers/ethernet/intel/e100
+networking/e1000 networking/device_drivers/ethernet/intel/e1000
+networking/e1000e networking/device_drivers/ethernet/intel/e1000e
+networking/fm10k networking/device_drivers/ethernet/intel/fm10k
+networking/fore200e networking/device_drivers/atm/fore200e
+networking/hinic networking/device_drivers/ethernet/huawei/hinic
+networking/i40e networking/device_drivers/ethernet/intel/i40e
+networking/iavf networking/device_drivers/ethernet/intel/iavf
+networking/ice networking/device_drivers/ethernet/intel/ice
+networking/igb networking/device_drivers/ethernet/intel/igb
+networking/igbvf networking/device_drivers/ethernet/intel/igbvf
+networking/iphase networking/device_drivers/atm/iphase
+networking/ixgbe networking/device_drivers/ethernet/intel/ixgbe
+networking/ixgbevf networking/device_drivers/ethernet/intel/ixgbevf
+networking/netdev-FAQ process/maintainer-netdev
+networking/skfp networking/device_drivers/fddi/skfp
+networking/z8530drv networking/device_drivers/hamradio/z8530drv
+nfc/index driver-api/nfc/index
+nfc/nfc-hci driver-api/nfc/nfc-hci
+nfc/nfc-pn544 driver-api/nfc/nfc-pn544
+nios2/features arch/nios2/features
+nios2/index arch/nios2/index
+nios2/nios2 arch/nios2/nios2
+nvdimm/btt driver-api/nvdimm/btt
+nvdimm/index driver-api/nvdimm/index
+nvdimm/nvdimm driver-api/nvdimm/nvdimm
+nvdimm/security driver-api/nvdimm/security
+nvmem/nvmem driver-api/nvmem
+openrisc/features arch/openrisc/features
+openrisc/index arch/openrisc/index
+openrisc/openrisc_port arch/openrisc/openrisc_port
+openrisc/todo arch/openrisc/todo
+parisc/debugging arch/parisc/debugging
+parisc/features arch/parisc/features
+parisc/index arch/parisc/index
+parisc/registers arch/parisc/registers
+perf/arm-ccn admin-guide/perf/arm-ccn
+perf/arm_dsu_pmu admin-guide/perf/arm_dsu_pmu
+perf/hisi-pmu admin-guide/perf/hisi-pmu
+perf/index admin-guide/perf/index
+perf/qcom_l2_pmu admin-guide/perf/qcom_l2_pmu
+perf/qcom_l3_pmu admin-guide/perf/qcom_l3_pmu
+perf/thunderx2-pmu admin-guide/perf/thunderx2-pmu
+perf/xgene-pmu admin-guide/perf/xgene-pmu
+phy/samsung-usb2 driver-api/phy/samsung-usb2
+powerpc/associativity arch/powerpc/associativity
+powerpc/booting arch/powerpc/booting
+powerpc/bootwrapper arch/powerpc/bootwrapper
+powerpc/cpu_families arch/powerpc/cpu_families
+powerpc/cpu_features arch/powerpc/cpu_features
+powerpc/dawr-power9 arch/powerpc/dawr-power9
+powerpc/dexcr arch/powerpc/dexcr
+powerpc/dscr arch/powerpc/dscr
+powerpc/eeh-pci-error-recovery arch/powerpc/eeh-pci-error-recovery
+powerpc/elf_hwcaps arch/powerpc/elf_hwcaps
+powerpc/elfnote arch/powerpc/elfnote
+powerpc/features arch/powerpc/features
+powerpc/firmware-assisted-dump arch/powerpc/firmware-assisted-dump
+powerpc/hvcs arch/powerpc/hvcs
+powerpc/imc arch/powerpc/imc
+powerpc/index arch/powerpc/index
+powerpc/isa-versions arch/powerpc/isa-versions
+powerpc/kaslr-booke32 arch/powerpc/kaslr-booke32
+powerpc/mpc52xx arch/powerpc/mpc52xx
+powerpc/papr_hcalls arch/powerpc/papr_hcalls
+powerpc/pci_iov_resource_on_powernv arch/powerpc/pci_iov_resource_on_powernv
+powerpc/pmu-ebb arch/powerpc/pmu-ebb
+powerpc/ptrace arch/powerpc/ptrace
+powerpc/qe_firmware arch/powerpc/qe_firmware
+powerpc/syscall64-abi arch/powerpc/syscall64-abi
+powerpc/transactional_memory arch/powerpc/transactional_memory
+powerpc/ultravisor arch/powerpc/ultravisor
+powerpc/vas-api arch/powerpc/vas-api
+powerpc/vcpudispatch_stats arch/powerpc/vcpudispatch_stats
+powerpc/vmemmap_dedup arch/powerpc/vmemmap_dedup
+process/clang-format dev-tools/clang-format
+process/magic-number staging/magic-number
+process/unaligned-memory-access core-api/unaligned-memory-access
+rapidio/index driver-api/rapidio/index
+rapidio/mport_cdev driver-api/rapidio/mport_cdev
+rapidio/rapidio driver-api/rapidio/rapidio
+rapidio/rio_cm driver-api/rapidio/rio_cm
+rapidio/sysfs driver-api/rapidio/sysfs
+rapidio/tsi721 driver-api/rapidio/tsi721
+riscv/acpi arch/riscv/acpi
+riscv/boot arch/riscv/boot
+riscv/boot-image-header arch/riscv/boot-image-header
+riscv/features arch/riscv/features
+riscv/hwprobe arch/riscv/hwprobe
+riscv/index arch/riscv/index
+riscv/patch-acceptance arch/riscv/patch-acceptance
+riscv/uabi arch/riscv/uabi
+riscv/vector arch/riscv/vector
+riscv/vm-layout arch/riscv/vm-layout
+s390/3270 arch/s390/3270
+s390/cds arch/s390/cds
+s390/common_io arch/s390/common_io
+s390/driver-model arch/s390/driver-model
+s390/features arch/s390/features
+s390/index arch/s390/index
+s390/monreader arch/s390/monreader
+s390/pci arch/s390/pci
+s390/qeth arch/s390/qeth
+s390/s390dbf arch/s390/s390dbf
+s390/text_files arch/s390/text_files
+s390/vfio-ap arch/s390/vfio-ap
+s390/vfio-ap-locking arch/s390/vfio-ap-locking
+s390/vfio-ccw arch/s390/vfio-ccw
+s390/zfcpdump arch/s390/zfcpdump
+security/LSM security/lsm-development
+security/LSM-sctp security/SCTP
+serial/driver driver-api/serial/driver
+serial/index driver-api/serial/index
+serial/moxa-smartio driver-api/tty/moxa-smartio
+serial/n_gsm driver-api/tty/n_gsm
+serial/serial-iso7816 driver-api/serial/serial-iso7816
+serial/serial-rs485 driver-api/serial/serial-rs485
+serial/tty driver-api/tty/tty_ldisc
+sh/booting arch/sh/booting
+sh/features arch/sh/features
+sh/index arch/sh/index
+sh/new-machine arch/sh/new-machine
+sh/register-banks arch/sh/register-banks
+sparc/adi arch/sparc/adi
+sparc/console arch/sparc/console
+sparc/features arch/sparc/features
+sparc/index arch/sparc/index
+sparc/oradax/oracle-dax arch/sparc/oradax/oracle-dax
+staging/kprobes trace/kprobes
+sysctl/abi admin-guide/sysctl/abi
+sysctl/fs admin-guide/sysctl/fs
+sysctl/index admin-guide/sysctl/index
+sysctl/kernel admin-guide/sysctl/kernel
+sysctl/net admin-guide/sysctl/net
+sysctl/sunrpc admin-guide/sysctl/sunrpc
+sysctl/user admin-guide/sysctl/user
+sysctl/vm admin-guide/sysctl/vm
+thermal/cpu-cooling-api driver-api/thermal/cpu-cooling-api
+thermal/exynos_thermal driver-api/thermal/exynos_thermal
+thermal/exynos_thermal_emulation driver-api/thermal/exynos_thermal_emulation
+thermal/index driver-api/thermal/index
+thermal/intel_powerclamp admin-guide/thermal/intel_powerclamp
+thermal/nouveau_thermal driver-api/thermal/nouveau_thermal
+thermal/power_allocator driver-api/thermal/power_allocator
+thermal/sysfs-api driver-api/thermal/sysfs-api
+thermal/x86_pkg_temperature_thermal driver-api/thermal/x86_pkg_temperature_thermal
+tpm/index security/tpm/index
+tpm/tpm_vtpm_proxy security/tpm/tpm_vtpm_proxy
+trace/coresight trace/coresight/coresight
+trace/coresight-cpu-debug trace/coresight/coresight-cpu-debug
+trace/rv/da_monitor_synthesis trace/rv/monitor_synthesis
+translations/it_IT/admin-guide/security-bugs translations/it_IT/process/security-bugs
+translations/it_IT/process/clang-format translations/it_IT/dev-tools/clang-format
+translations/it_IT/process/magic-number translations/it_IT/staging/magic-number
+translations/it_IT/riscv/patch-acceptance translations/it_IT/arch/riscv/patch-acceptance
+translations/ja_JP/howto translations/ja_JP/process/howto
+translations/ko_KR/howto translations/ko_KR/process/howto
+translations/sp_SP/howto translations/sp_SP/process/howto
+translations/sp_SP/submitting-patches translations/sp_SP/process/submitting-patches
+translations/zh_CN/admin-guide/security-bugs translations/zh_CN/process/security-bugs
+translations/zh_CN/arch translations/zh_CN/arch/index
+translations/zh_CN/arm64/amu translations/zh_CN/arch/arm64/amu
+translations/zh_CN/arm64/elf_hwcaps translations/zh_CN/arch/arm64/elf_hwcaps
+translations/zh_CN/arm64/hugetlbpage translations/zh_CN/arch/arm64/hugetlbpage
+translations/zh_CN/arm64/index translations/zh_CN/arch/arm64/index
+translations/zh_CN/arm64/perf translations/zh_CN/arch/arm64/perf
+translations/zh_CN/coding-style translations/zh_CN/process/coding-style
+translations/zh_CN/loongarch/booting translations/zh_CN/arch/loongarch/booting
+translations/zh_CN/loongarch/features translations/zh_CN/arch/loongarch/features
+translations/zh_CN/loongarch/index translations/zh_CN/arch/loongarch/index
+translations/zh_CN/loongarch/introduction translations/zh_CN/arch/loongarch/introduction
+translations/zh_CN/loongarch/irq-chip-model translations/zh_CN/arch/loongarch/irq-chip-model
+translations/zh_CN/mips/booting translations/zh_CN/arch/mips/booting
+translations/zh_CN/mips/features translations/zh_CN/arch/mips/features
+translations/zh_CN/mips/index translations/zh_CN/arch/mips/index
+translations/zh_CN/mips/ingenic-tcu translations/zh_CN/arch/mips/ingenic-tcu
+translations/zh_CN/openrisc/index translations/zh_CN/arch/openrisc/index
+translations/zh_CN/openrisc/openrisc_port translations/zh_CN/arch/openrisc/openrisc_port
+translations/zh_CN/openrisc/todo translations/zh_CN/arch/openrisc/todo
+translations/zh_CN/parisc/debugging translations/zh_CN/arch/parisc/debugging
+translations/zh_CN/parisc/index translations/zh_CN/arch/parisc/index
+translations/zh_CN/parisc/registers translations/zh_CN/arch/parisc/registers
+translations/zh_CN/riscv/boot-image-header translations/zh_CN/arch/riscv/boot-image-header
+translations/zh_CN/riscv/index translations/zh_CN/arch/riscv/index
+translations/zh_CN/riscv/patch-acceptance translations/zh_CN/arch/riscv/patch-acceptance
+translations/zh_CN/riscv/vm-layout translations/zh_CN/arch/riscv/vm-layout
+translations/zh_CN/vm/active_mm translations/zh_CN/mm/active_mm
+translations/zh_CN/vm/balance translations/zh_CN/mm/balance
+translations/zh_CN/vm/damon/api translations/zh_CN/mm/damon/api
+translations/zh_CN/vm/damon/design translations/zh_CN/mm/damon/design
+translations/zh_CN/vm/damon/faq translations/zh_CN/mm/damon/faq
+translations/zh_CN/vm/damon/index translations/zh_CN/mm/damon/index
+translations/zh_CN/vm/free_page_reporting translations/zh_CN/mm/free_page_reporting
+translations/zh_CN/vm/highmem translations/zh_CN/mm/highmem
+translations/zh_CN/vm/hmm translations/zh_CN/mm/hmm
+translations/zh_CN/vm/hugetlbfs_reserv translations/zh_CN/mm/hugetlbfs_reserv
+translations/zh_CN/vm/hwpoison translations/zh_CN/mm/hwpoison
+translations/zh_CN/vm/index translations/zh_CN/mm/index
+translations/zh_CN/vm/ksm translations/zh_CN/mm/ksm
+translations/zh_CN/vm/memory-model translations/zh_CN/mm/memory-model
+translations/zh_CN/vm/mmu_notifier translations/zh_CN/mm/mmu_notifier
+translations/zh_CN/vm/numa translations/zh_CN/mm/numa
+translations/zh_CN/vm/overcommit-accounting translations/zh_CN/mm/overcommit-accounting
+translations/zh_CN/vm/page_frags translations/zh_CN/mm/page_frags
+translations/zh_CN/vm/page_owner translations/zh_CN/mm/page_owner
+translations/zh_CN/vm/page_table_check translations/zh_CN/mm/page_table_check
+translations/zh_CN/vm/remap_file_pages translations/zh_CN/mm/remap_file_pages
+translations/zh_CN/vm/split_page_table_lock translations/zh_CN/mm/split_page_table_lock
+translations/zh_CN/vm/zsmalloc translations/zh_CN/mm/zsmalloc
+translations/zh_TW/arm64/amu translations/zh_TW/arch/arm64/amu
+translations/zh_TW/arm64/elf_hwcaps translations/zh_TW/arch/arm64/elf_hwcaps
+translations/zh_TW/arm64/hugetlbpage translations/zh_TW/arch/arm64/hugetlbpage
+translations/zh_TW/arm64/index translations/zh_TW/arch/arm64/index
+translations/zh_TW/arm64/perf translations/zh_TW/arch/arm64/perf
+tty/device_drivers/oxsemi-tornado misc-devices/oxsemi-tornado
+tty/index driver-api/tty/index
+tty/n_tty driver-api/tty/n_tty
+tty/tty_buffer driver-api/tty/tty_buffer
+tty/tty_driver driver-api/tty/tty_driver
+tty/tty_internals driver-api/tty/tty_internals
+tty/tty_ldisc driver-api/tty/tty_ldisc
+tty/tty_port driver-api/tty/tty_port
+tty/tty_struct driver-api/tty/tty_struct
+usb/typec driver-api/usb/typec
+usb/usb3-debug-port driver-api/usb/usb3-debug-port
+userspace-api/media/drivers/st-vgxy61 userspace-api/media/drivers/vgxy61
+userspace-api/media/v4l/pixfmt-meta-d4xx userspace-api/media/v4l/metafmt-d4xx
+userspace-api/media/v4l/pixfmt-meta-intel-ipu3 userspace-api/media/v4l/metafmt-intel-ipu3
+userspace-api/media/v4l/pixfmt-meta-rkisp1 userspace-api/media/v4l/metafmt-rkisp1
+userspace-api/media/v4l/pixfmt-meta-uvc userspace-api/media/v4l/metafmt-uvc
+userspace-api/media/v4l/pixfmt-meta-vivid userspace-api/media/v4l/metafmt-vivid
+userspace-api/media/v4l/pixfmt-meta-vsp1-hgo userspace-api/media/v4l/metafmt-vsp1-hgo
+userspace-api/media/v4l/pixfmt-meta-vsp1-hgt userspace-api/media/v4l/metafmt-vsp1-hgt
+virt/coco/sevguest virt/coco/sev-guest
+virt/kvm/amd-memory-encryption virt/kvm/x86/amd-memory-encryption
+virt/kvm/arm/psci virt/kvm/arm/fw-pseudo-registers
+virt/kvm/cpuid virt/kvm/x86/cpuid
+virt/kvm/hypercalls virt/kvm/x86/hypercalls
+virt/kvm/mmu virt/kvm/x86/mmu
+virt/kvm/msr virt/kvm/x86/msr
+virt/kvm/nested-vmx virt/kvm/x86/nested-vmx
+virt/kvm/running-nested-guests virt/kvm/x86/running-nested-guests
+virt/kvm/s390-diag virt/kvm/s390/s390-diag
+virt/kvm/s390-pv virt/kvm/s390/s390-pv
+virt/kvm/s390-pv-boot virt/kvm/s390/s390-pv-boot
+virt/kvm/timekeeping virt/kvm/x86/timekeeping
+virt/kvm/x86/halt-polling virt/kvm/halt-polling
+virtual/index virt/index
+virtual/kvm/amd-memory-encryption virt/kvm/x86/amd-memory-encryption
+virtual/kvm/cpuid virt/kvm/x86/cpuid
+virtual/kvm/index virt/kvm/index
+virtual/kvm/vcpu-requests virt/kvm/vcpu-requests
+virtual/paravirt_ops virt/paravirt_ops
+vm/active_mm mm/active_mm
+vm/arch_pgtable_helpers mm/arch_pgtable_helpers
+vm/balance mm/balance
+vm/bootmem mm/bootmem
+vm/damon/api mm/damon/api
+vm/damon/design mm/damon/design
+vm/damon/faq mm/damon/faq
+vm/damon/index mm/damon/index
+vm/free_page_reporting mm/free_page_reporting
+vm/highmem mm/highmem
+vm/hmm mm/hmm
+vm/hugetlbfs_reserv mm/hugetlbfs_reserv
+vm/hugetlbpage admin-guide/mm/hugetlbpage
+vm/hwpoison mm/hwpoison
+vm/idle_page_tracking admin-guide/mm/idle_page_tracking
+vm/index mm/index
+vm/ksm mm/ksm
+vm/memory-model mm/memory-model
+vm/mmu_notifier mm/mmu_notifier
+vm/numa mm/numa
+vm/numa_memory_policy admin-guide/mm/numa_memory_policy
+vm/oom mm/oom
+vm/overcommit-accounting mm/overcommit-accounting
+vm/page_allocation mm/page_allocation
+vm/page_cache mm/page_cache
+vm/page_frags mm/page_frags
+vm/page_migration mm/page_migration
+vm/page_owner mm/page_owner
+vm/page_reclaim mm/page_reclaim
+vm/page_table_check mm/page_table_check
+vm/page_tables mm/page_tables
+vm/pagemap admin-guide/mm/pagemap
+vm/physical_memory mm/physical_memory
+vm/process_addrs mm/process_addrs
+vm/remap_file_pages mm/remap_file_pages
+vm/shmfs mm/shmfs
+vm/slab mm/slab
+vm/slub admin-guide/mm/slab
+vm/soft-dirty admin-guide/mm/soft-dirty
+vm/split_page_table_lock mm/split_page_table_lock
+vm/swap mm/swap
+vm/swap_numa admin-guide/mm/swap_numa
+vm/transhuge mm/transhuge
+vm/unevictable-lru mm/unevictable-lru
+vm/userfaultfd admin-guide/mm/userfaultfd
+vm/vmalloc mm/vmalloc
+vm/vmalloced-kernel-stacks mm/vmalloced-kernel-stacks
+vm/vmemmap_dedup mm/vmemmap_dedup
+vm/zsmalloc mm/zsmalloc
+vm/zswap admin-guide/mm/zswap
+watch_queue core-api/watch_queue
+x86/amd-memory-encryption arch/x86/amd-memory-encryption
+x86/amd_hsmp arch/x86/amd_hsmp
+x86/boot arch/x86/boot
+x86/booting-dt arch/x86/booting-dt
+x86/buslock arch/x86/buslock
+x86/cpuinfo arch/x86/cpuinfo
+x86/earlyprintk arch/x86/earlyprintk
+x86/elf_auxvec arch/x86/elf_auxvec
+x86/entry_64 arch/x86/entry_64
+x86/exception-tables arch/x86/exception-tables
+x86/features arch/x86/features
+x86/i386/IO-APIC arch/x86/i386/IO-APIC
+x86/i386/index arch/x86/i386/index
+x86/ifs arch/x86/ifs
+x86/index arch/x86/index
+x86/intel-hfi arch/x86/intel-hfi
+x86/intel_txt arch/x86/intel_txt
+x86/iommu arch/x86/iommu
+x86/kernel-stacks arch/x86/kernel-stacks
+x86/mds arch/x86/mds
+x86/microcode arch/x86/microcode
+x86/mtrr arch/x86/mtrr
+x86/orc-unwinder arch/x86/orc-unwinder
+x86/pat arch/x86/pat
+x86/protection-keys core-api/protection-keys
+x86/pti arch/x86/pti
+x86/resctrl filesystems/resctrl
+x86/resctrl_ui filesystems/resctrl
+x86/sgx arch/x86/sgx
+x86/sva arch/x86/sva
+x86/tdx arch/x86/tdx
+x86/tlb arch/x86/tlb
+x86/topology arch/x86/topology
+x86/tsx_async_abort arch/x86/tsx_async_abort
+x86/usb-legacy-support arch/x86/usb-legacy-support
+x86/x86_64/5level-paging arch/x86/x86_64/5level-paging
+x86/x86_64/cpu-hotplug-spec arch/x86/x86_64/cpu-hotplug-spec
+x86/x86_64/fake-numa-for-cpusets arch/x86/x86_64/fake-numa-for-cpusets
+x86/x86_64/fsgs arch/x86/x86_64/fsgs
+x86/x86_64/index arch/x86/x86_64/index
+x86/x86_64/machinecheck arch/x86/x86_64/machinecheck
+x86/x86_64/mm arch/x86/x86_64/mm
+x86/x86_64/uefi arch/x86/x86_64/uefi
+x86/xstate arch/x86/xstate
+x86/zero-page arch/x86/zero-page
+xilinx/eemi driver-api/xilinx/eemi
+xilinx/index driver-api/xilinx/index
+xtensa/atomctl arch/xtensa/atomctl
+xtensa/booting arch/xtensa/booting
+xtensa/features arch/xtensa/features
+xtensa/index arch/xtensa/index
+xtensa/mmu arch/xtensa/mmu
diff --git a/Documentation/ABI/README b/Documentation/ABI/README
index 3121029dce21..315fffe1f831 100644
--- a/Documentation/ABI/README
+++ b/Documentation/ABI/README
@@ -1,4 +1,5 @@
-This directory attempts to document the ABI between the Linux kernel and
+This part of the documentation inside Documentation/ABI directory
+attempts to document the ABI between the Linux kernel and
userspace, and the relative stability of these interfaces. Due to the
everchanging nature of Linux, and the differing maturity levels, these
interfaces should be used by userspace programs in different ways.
@@ -32,7 +33,7 @@ The different levels of stability are:
layout of the files below for details on how to do this.)
obsolete/
- This directory documents interfaces that are still remaining in
+ This directory documents interfaces that are still remaining in
the kernel, but are marked to be removed at some later point in
time. The description of the interface will document the reason
why it is obsolete and when it can be expected to be removed.
@@ -45,7 +46,9 @@ Every file in these directories will contain the following information:
What: Short description of the interface
Date: Date created
-KernelVersion: Kernel version this feature first showed up in.
+KernelVersion: (Optional) Kernel version this feature first showed up in.
+ Note: git history often provides more accurate version
+ info, so this field may be omitted.
Contact: Primary contact for this interface (may be a mailing list)
Description: Long description of the interface and how to use it.
Users: All users of this interface who wish to be notified when
@@ -58,6 +61,14 @@ Users: All users of this interface who wish to be notified when
be changed further.
+Note:
+ The fields should be use a simple notation, compatible with ReST markup.
+ Also, the file **should not** have a top-level index, like::
+
+ ===
+ foo
+ ===
+
How things move between levels:
Interfaces in stable may move to obsolete, as long as the proper
diff --git a/Documentation/ABI/obsolete/automount-tracefs-debugfs b/Documentation/ABI/obsolete/automount-tracefs-debugfs
new file mode 100644
index 000000000000..a5196ec78cb5
--- /dev/null
+++ b/Documentation/ABI/obsolete/automount-tracefs-debugfs
@@ -0,0 +1,20 @@
+What: /sys/kernel/debug/tracing
+Date: May 2008
+KernelVersion: 2.6.27
+Contact: linux-trace-kernel@vger.kernel.org
+Description:
+
+ The ftrace was first added to the kernel, its interface was placed
+ into the debugfs file system under the "tracing" directory. Access
+ to the files were in /sys/kernel/debug/tracing. As systems wanted
+ access to the tracing interface without having to enable debugfs, a
+ new interface was created called "tracefs". This was a stand alone
+ file system and was usually mounted in /sys/kernel/tracing.
+
+ To allow older tooling to continue to operate, when mounting
+ debugfs, the tracefs file system would automatically get mounted in
+ the "tracing" directory of debugfs. The tracefs interface was added
+ in January 2015 in the v4.1 kernel.
+
+ All tooling should now be using tracefs directly and the "tracing"
+ directory in debugfs should be removed by January 2030.
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb
new file mode 100644
index 000000000000..8f39b596731d
--- /dev/null
+++ b/Documentation/ABI/obsolete/o2cb
@@ -0,0 +1,11 @@
+What: /sys/o2cb
+Date: Dec 2005
+KernelVersion: 2.6.16
+Contact: ocfs2-devel@lists.linux.dev
+Description: Ocfs2-tools looks at 'interface-revision' for versioning
+ information. Each logmask/ file controls a set of debug prints
+ and can be written into with the strings "allow", "deny", or
+ "off". Reading the file returns the current state.
+ Was renamed to /sys/fs/u2cb/
+Users: ocfs2-tools. It's sufficient to mail proposed changes to
+ ocfs2-devel@lists.linux.dev.
diff --git a/Documentation/ABI/obsolete/procfs-i8k b/Documentation/ABI/obsolete/procfs-i8k
new file mode 100644
index 000000000000..32df4d5bdd15
--- /dev/null
+++ b/Documentation/ABI/obsolete/procfs-i8k
@@ -0,0 +1,10 @@
+What: /proc/i8k
+Date: November 2001
+KernelVersion: 2.4.14
+Contact: Pali Rohár <pali@kernel.org>
+Description: Legacy interface for getting/setting sensor information like
+ fan speed, temperature, serial number, hotkey status etc
+ on Dell Laptops.
+ Since the driver is now using the standard hwmon sysfs interface,
+ the procfs interface is deprecated.
+Users: https://github.com/vitorafsr/i8kutils
diff --git a/Documentation/ABI/obsolete/sysfs-bus-iio b/Documentation/ABI/obsolete/sysfs-bus-iio
new file mode 100644
index 000000000000..a13523561958
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-bus-iio
@@ -0,0 +1,174 @@
+What: /sys/bus/iio/devices/iio:deviceX/buffer/length
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of scans contained by the buffer.
+
+ Since Kernel 5.11, multiple buffers are supported.
+ so, it is better to use, instead:
+
+ /sys/bus/iio/devices/iio:deviceX/bufferY/length
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/enable
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Actually start the buffer capture up. Will start trigger
+ if first device and appropriate.
+
+ Since Kernel 5.11, multiple buffers are supported.
+ so, it is better to use, instead:
+
+ /sys/bus/iio/devices/iio:deviceX/bufferY/enable
+
+What: /sys/bus/iio/devices/iio:deviceX/scan_elements
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Directory containing interfaces for elements that will be
+ captured for a single triggered sample set in the buffer.
+
+ Since kernel 5.11 the scan_elements attributes are merged into
+ the bufferY directory, to be configurable per buffer.
+
+What: /sys/.../iio:deviceX/scan_elements/in_accel_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_accel_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_accel_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_en
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
+What: /sys/.../iio:deviceX/scan_elements/in_incli_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_incli_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_en
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_en
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scan element control for triggered data capture.
+
+ Since kernel 5.11 the scan_elements attributes are merged into
+ the bufferY directory, to be configurable per buffer.
+
+What: /sys/.../iio:deviceX/scan_elements/in_accel_type
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_type
+What: /sys/.../iio:deviceX/scan_elements/in_magn_type
+What: /sys/.../iio:deviceX/scan_elements/in_incli_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltage_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_type
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_type
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_type
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_type
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_type
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Description of the scan element data storage within the buffer
+ and hence the form in which it is read from user-space.
+ Form is [be|le]:[s|u]bits/storagebits[>>shift].
+ be or le specifies big or little endian. s or u specifies if
+ signed (2's complement) or unsigned. bits is the number of bits
+ of data and storagebits is the space (after padding) that it
+ occupies in the buffer. shift if specified, is the shift that
+ needs to be applied prior to masking out unused bits. Some
+ devices put their data in the middle of the transferred elements
+ with additional information on both sides. Note that some
+ devices will have additional information in the unused bits
+ so to get a clean value, the bits value must be used to mask
+ the buffer output value appropriately. The storagebits value
+ also specifies the data alignment. So s48/64>>2 will be a
+ signed 48 bit integer stored in a 64 bit location aligned to
+ a 64 bit boundary. To obtain the clean value, shift right 2
+ and apply a mask to zero the top 16 bits of the result.
+ For other storage combinations this attribute will be extended
+ appropriately.
+
+ Since kernel 5.11 the scan_elements attributes are merged into
+ the bufferY directory, to be configurable per buffer.
+
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_index
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_index
+What: /sys/.../iio:deviceX/scan_elements/in_incli_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_incli_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_index
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_index
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_index
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_index
+KernelVersion: 2.6.37
+Description:
+ A single positive integer specifying the position of this
+ scan element in the buffer. Note these are not dependent on
+ what is enabled and may not be contiguous. Thus for user-space
+ to establish the full layout these must be used in conjunction
+ with all _en attributes to establish which channels are present,
+ and the relevant _type attributes to establish the data storage
+ format.
+
+ Since kernel 5.11 the scan_elements attributes are merged into
+ the bufferY directory, to be configurable per buffer.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/watermark
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A single positive integer specifying the maximum number of scan
+ elements to wait for.
+
+ Poll will block until the watermark is reached.
+
+ Blocking read will wait until the minimum between the requested
+ read amount or the low water mark is available.
+
+ Non-blocking read will retrieve the available samples from the
+ buffer even if there are less samples then watermark level. This
+ allows the application to block on poll with a timeout and read
+ the available samples after the timeout expires and thus have a
+ maximum delay guarantee.
+
+ Since Kernel 5.11, multiple buffers are supported.
+ so, it is better to use, instead:
+
+ /sys/bus/iio/devices/iio:deviceX/bufferY/watermark
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/data_available
+KernelVersion: 4.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ A read-only value indicating the bytes of data available in the
+ buffer. In the case of an output buffer, this indicates the
+ amount of empty space available to write data to. In the case of
+ an input buffer, this indicates the amount of data available for
+ reading.
+
+ Since Kernel 5.11, multiple buffers are supported.
+ so, it is better to use, instead:
+
+ /sys/bus/iio/devices/iio:deviceX/bufferY/data_available
diff --git a/Documentation/ABI/obsolete/sysfs-cpuidle b/Documentation/ABI/obsolete/sysfs-cpuidle
new file mode 100644
index 000000000000..972cc11d3434
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-cpuidle
@@ -0,0 +1,9 @@
+What: /sys/devices/system/cpu/cpuidle/current_governor_ro
+Date: April, 2020
+Contact: linux-pm@vger.kernel.org
+Description:
+ current_governor_ro shows current using cpuidle governor, but read only.
+ with the update that cpuidle governor can be changed at runtime in default,
+ both current_governor and current_governor_ro co-exist under
+ /sys/devices/system/cpu/cpuidle/ file, it's duplicate so make
+ current_governor_ro obsolete.
diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
index 16020b31ae64..66545c587a64 100644
--- a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
@@ -5,12 +5,15 @@ Description: It is possible to switch the cpi setting of the mouse with the
press of a button.
When read, this file returns the raw number of the actual cpi
setting reported by the mouse. This number has to be further
- processed to receive the real dpi value.
+ processed to receive the real dpi value:
+ ===== ====
VALUE DPI
+ ===== ====
1 400
2 800
4 1600
+ ===== ====
This file is readonly.
Has never been used. If bookkeeping is done, it's done in userland tools.
diff --git a/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt b/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt
new file mode 100644
index 000000000000..39d5659f388b
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt
@@ -0,0 +1,22 @@
+These files allow sending arbitrary IPC commands to the PMC/SCU which
+may be dangerous. These will be removed eventually and should not be
+used in any new applications.
+
+What: /sys/bus/platform/devices/INT34D2:00/simplecmd
+Date: Jun 2015
+KernelVersion: 4.1
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This interface allows userspace to send an arbitrary
+ IPC command to the PMC/SCU.
+
+ Format: %d %d where first number is command and
+ second number is subcommand.
+
+What: /sys/bus/platform/devices/INT34D2:00/northpeak
+Date: Jun 2015
+KernelVersion: 4.1
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This interface allows userspace to enable and disable
+ Northpeak through the PMC/SCU.
+
+ Format: %u.
diff --git a/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop b/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
new file mode 100644
index 000000000000..204c3f3a1d78
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
@@ -0,0 +1,10 @@
+What: /sys/devices/platform/samsung/battery_life_extender
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Max battery charge level can be modified, battery cycle
+ life can be extended by reducing the max battery charge
+ level.
+
+ - 0 means normal battery mode (100% charge)
+ - 1 means battery life extender mode (80% charge)
diff --git a/Documentation/ABI/obsolete/sysfs-gpio b/Documentation/ABI/obsolete/sysfs-gpio
index 40d41ea1a3f5..0d3f12c4dcbd 100644
--- a/Documentation/ABI/obsolete/sysfs-gpio
+++ b/Documentation/ABI/obsolete/sysfs-gpio
@@ -11,20 +11,30 @@ Description:
Kernel code may export it for complete or partial access.
GPIOs are identified as they are inside the kernel, using integers in
- the range 0..INT_MAX. See Documentation/gpio for more information.
+ the range 0..INT_MAX. See Documentation/admin-guide/gpio for more information.
+
+ ::
/sys/class/gpio
/export ... asks the kernel to export a GPIO to userspace
/unexport ... to return a GPIO to the kernel
/gpioN ... for each exported GPIO #N OR
- /<LINE-NAME> ... for a properly named GPIO line
/value ... always readable, writes fail for input GPIOs
/direction ... r/w as: in, out (default low); write: high, low
/edge ... r/w as: none, falling, rising, both
+ /active_low ... r/w as: 0, 1
/gpiochipN ... for each gpiochip; #N is its first GPIO
/base ... (r/o) same as N
- /label ... (r/o) descriptive, not necessarily unique
+ /label ... (r/o) descriptive chip name
/ngpio ... (r/o) number of GPIOs; numbered N to N + (ngpio - 1)
+ /gpio<OFFSET>
+ /value ... always readable, writes fail for input GPIOs
+ /direction ... r/w as: in, out (default low); write: high, low
+ /chipX ... for each gpiochip; #X is the gpio device ID
+ /export ... asks the kernel to export a GPIO at HW offset X to userspace
+ /unexport ... to return a GPIO at HW offset X to the kernel
+ /label ... (r/o) descriptive chip name
+ /ngpio ... (r/o) number of GPIOs exposed by the chip
- This ABI is deprecated and will be removed after 2020. It is
- replaced with the GPIO character device.
+ This ABI is obsoleted by Documentation/ABI/testing/gpio-cdev and will be
+ removed after 2020.
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled
new file mode 100644
index 000000000000..e9c2de8b3688
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled
@@ -0,0 +1,9 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/enabled.
+
+What: /sys/kernel/fadump_enabled
+Date: Feb 2012
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Primarily used to identify whether the FADump is enabled in
+ the kernel or not.
+User: Kdump service
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
new file mode 100644
index 000000000000..dae880b1a5d5
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
@@ -0,0 +1,10 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.
+
+What: /sys/kernel/fadump_registered
+Date: Feb 2012
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Helps to control the dump collect feature from userspace.
+ Setting 1 to this file enables the system to collect the
+ dump and 0 to disable it.
+User: Kdump service
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
new file mode 100644
index 000000000000..ca2396edb5f1
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
@@ -0,0 +1,10 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.
+
+What: /sys/kernel/fadump_release_mem
+Date: Feb 2012
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: write only
+ This is a special sysfs file and only available when
+ the system is booted to capture the vmcore using FADump.
+ It is used to release the memory reserved by FADump to
+ save the crash dump.
diff --git a/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop b/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
new file mode 100644
index 000000000000..c1dbd19c679c
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
@@ -0,0 +1,8 @@
+What: /sys/bus/platform/devices/VPC2004:*/conservation_mode
+Date: Aug 2017
+KernelVersion: 4.14
+Contact: platform-driver-x86@vger.kernel.org
+Description:
+ Controls whether the conservation mode is enabled or not.
+ This feature limits the maximum battery charge percentage to
+ around 50-60% in order to prolong the lifetime of the battery.
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-user b/Documentation/ABI/obsolete/sysfs-selinux-user
new file mode 100644
index 000000000000..8ab7557f283f
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-selinux-user
@@ -0,0 +1,12 @@
+What: /sys/fs/selinux/user
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ The selinuxfs "user" node allows userspace to request a list
+ of security contexts that can be reached for a given SELinux
+ user from a given starting context. This was used by libselinux
+ when various login-style programs requested contexts for
+ users, but libselinux stopped using it in 2020.
+ Kernel support will be removed no sooner than Dec 2025.
diff --git a/Documentation/ABI/removed/devfs b/Documentation/ABI/removed/devfs
index 0020c49933c4..24fb35adf277 100644
--- a/Documentation/ABI/removed/devfs
+++ b/Documentation/ABI/removed/devfs
@@ -5,6 +5,7 @@ Description:
devfs has been unmaintained for a number of years, has unfixable
races, contains a naming policy within the kernel that is
against the LSB, and can be replaced by using udev.
+
The files fs/devfs/*, include/linux/devfs_fs*.h were removed,
along with the assorted devfs function calls throughout the
kernel tree.
diff --git a/Documentation/ABI/removed/o2cb b/Documentation/ABI/removed/o2cb
index 20c91adca6d4..61cff238fbe8 100644
--- a/Documentation/ABI/removed/o2cb
+++ b/Documentation/ABI/removed/o2cb
@@ -1,10 +1,10 @@
What: /sys/o2cb symlink
Date: May 2011
KernelVersion: 3.0
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is
removed when new versions of ocfs2-tools which know to look
in /sys/fs/o2cb are sufficiently prevalent. Don't code new
software to look here, it should try /sys/fs/o2cb instead.
Users: ocfs2-tools. It's sufficient to mail proposed changes to
- ocfs2-devel@oss.oracle.com.
+ ocfs2-devel@lists.linux.dev.
diff --git a/Documentation/ABI/removed/raw1394 b/Documentation/ABI/removed/raw1394
index ec333e676322..9ec7ec493920 100644
--- a/Documentation/ABI/removed/raw1394
+++ b/Documentation/ABI/removed/raw1394
@@ -7,6 +7,7 @@ Description:
to implement sensible device security policies, and its low level
of abstraction that required userspace clients to duplicate much
of the kernel's ieee1394 core functionality.
+
Replaced by /dev/fw*, i.e. the <linux/firewire-cdev.h> ABI of
firewire-core.
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit
index ae8c1ca53828..277437005def 100644
--- a/Documentation/ABI/removed/sysfs-bus-nfit
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -1,7 +1,7 @@
What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
Date: Aug, 2017
KernelVersion: v4.14 (Removed v4.18)
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Size of a write request to a DIMM that will not incur a
read-modify-write cycle at the memory controller.
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/removed/sysfs-class-cxl
index bbbabffc682a..266c413b96e8 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/removed/sysfs-class-cxl
@@ -1,13 +1,15 @@
-Note: Attributes that are shared between devices are stored in the directory
-pointed to by the symlink device/.
-Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
+The cxl driver was removed in 6.15.
+
+Please note that attributes that are shared between devices are stored in
+the directory pointed to by the symlink device/.
+For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
/sys/class/cxl/afu0.0s/device/irqs_max, i.e. /sys/class/cxl/afu0.0/irqs_max.
Slave contexts (eg. /sys/class/cxl/afu0.0s):
What: /sys/class/cxl/<afu>/afu_err_buf
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
AFU Error Buffer contents. The contents of this file are
@@ -18,7 +20,7 @@ Description: read only
What: /sys/class/cxl/<afu>/irqs_max
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
Decimal value of maximum number of interrupts that can be
@@ -29,7 +31,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/irqs_min
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the minimum number of interrupts that
@@ -39,15 +41,15 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/mmio_size
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
- Decimal value of the size of the MMIO space that may be mmaped
+ Decimal value of the size of the MMIO space that may be mmapped
by userspace.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/modes_supported
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
List of the modes this AFU supports. One per line.
@@ -55,7 +57,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/mode
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
The current mode the AFU is using. Will be one of the modes
@@ -65,22 +67,27 @@ Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/prefault_mode
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
Set the mode for prefaulting in segments into the segment table
when performing the START_WORK ioctl. Only applicable when
running under hashed page table mmu.
Possible values:
- none: No prefaulting (default)
- work_element_descriptor: Treat the work element
- descriptor as an effective address and
- prefault what it points to.
- all: all segments process calling START_WORK maps.
+
+ ======================= ======================================
+ none No prefaulting (default)
+ work_element_descriptor Treat the work element
+ descriptor as an effective address and
+ prefault what it points to.
+ all all segments process calling
+ START_WORK maps.
+ ======================= ======================================
+
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/reset
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: write only
Writing 1 here will reset the AFU provided there are not
@@ -88,18 +95,18 @@ Description: write only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/api_version
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the current version of the kernel/user API.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/api_version_compatible
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
- Decimal value of the the lowest version of the userspace API
- this this kernel supports.
+ Decimal value of the lowest version of the userspace API
+ this kernel supports.
Users: https://github.com/ibm-capi/libcxl
@@ -109,7 +116,7 @@ An AFU may optionally export one or more PCIe like configuration records, known
as AFU configuration records, which will show up here (if present).
What: /sys/class/cxl/<afu>/cr<config num>/vendor
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the vendor ID found in this AFU
@@ -117,7 +124,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/device
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the device ID found in this AFU
@@ -125,7 +132,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/class
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the class code found in this AFU
@@ -133,7 +140,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/config
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
This binary file provides raw access to the AFU configuration
@@ -147,24 +154,25 @@ Users: https://github.com/ibm-capi/libcxl
Master contexts (eg. /sys/class/cxl/afu0.0m)
What: /sys/class/cxl/<afu>m/mmio_size
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
- Decimal value of the size of the MMIO space that may be mmaped
+ Decimal value of the size of the MMIO space that may be mmapped
by userspace. This includes all slave contexts space also.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>m/pp_mmio_len
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the Per Process MMIO space length.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<afu>m/pp_mmio_off (not in a guest)
-Date: September 2014
+What: /sys/class/cxl/<afu>m/pp_mmio_off
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
+ (not in a guest)
Decimal value of the Per Process MMIO space offset.
Users: https://github.com/ibm-capi/libcxl
@@ -172,46 +180,50 @@ Users: https://github.com/ibm-capi/libcxl
Card info (eg. /sys/class/cxl/card0)
What: /sys/class/cxl/<card>/caia_version
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Identifies the CAIA Version the card implements.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/psl_revision
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Identifies the revision level of the PSL.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/base_image (not in a guest)
-Date: September 2014
+What: /sys/class/cxl/<card>/base_image
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
+ (not in a guest)
Identifies the revision level of the base image for devices
that support loadable PSLs. For FPGAs this field identifies
the image contained in the on-adapter flash which is loaded
during the initial program load.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/image_loaded (not in a guest)
-Date: September 2014
+What: /sys/class/cxl/<card>/image_loaded
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
+ (not in a guest)
Will return "user" or "factory" depending on the image loaded
onto the card.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/load_image_on_perst (not in a guest)
-Date: December 2014
+What: /sys/class/cxl/<card>/load_image_on_perst
+Date: December 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
+ (not in a guest)
Valid entries are "none", "user", and "factory".
"none" means PERST will not cause image to be loaded to the
card. A power cycle is required to load the image.
"none" could be useful for debugging because the trace arrays
are preserved.
+
"user" and "factory" means PERST will cause either the user or
user or factory image to be loaded.
Default is to reload on PERST whichever image the card has
@@ -219,7 +231,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/reset
-Date: October 2014
+Date: October 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: write only
Writing 1 will issue a PERST to card provided there are no
@@ -229,18 +241,22 @@ Description: write only
contexts on the card AFUs.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
-Date: July 2015
+What: /sys/class/cxl/<card>/perst_reloads_same_image
+Date: July 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
+ (not in a guest)
Trust that when an image is reloaded via PERST, it will not
have changed.
- 0 = don't trust, the image may be different (default)
- 1 = trust that the image will not change.
+
+ == =================================================
+ 0 don't trust, the image may be different (default)
+ 1 trust that the image will not change.
+ == =================================================
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/psl_timebase_synced
-Date: March 2016
+Date: March 2016, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Returns 1 if the psl timebase register is synchronized
@@ -248,7 +264,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/tunneled_ops_supported
-Date: May 2018
+Date: May 2018, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Returns 1 if tunneled operations are supported in capi mode,
diff --git a/Documentation/ABI/removed/sysfs-class-rfkill b/Documentation/ABI/removed/sysfs-class-rfkill
index 3ce6231f20b2..20cb688af173 100644
--- a/Documentation/ABI/removed/sysfs-class-rfkill
+++ b/Documentation/ABI/removed/sysfs-class-rfkill
@@ -1,13 +1,13 @@
rfkill - radio frequency (RF) connector kill switch support
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
What: /sys/class/rfkill/rfkill[0-9]+/claim
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: This file was deprecated because there no longer was a way to
claim just control over a single rfkill instance.
This file was scheduled to be removed in 2012, and was removed
in 2016.
-Values: 0: Kernel handles events
+Values: 0: Kernel handles events
diff --git a/Documentation/ABI/removed/sysfs-firmware-efi-vars b/Documentation/ABI/removed/sysfs-firmware-efi-vars
new file mode 100644
index 000000000000..8d97368b149b
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-firmware-efi-vars
@@ -0,0 +1,12 @@
+What: /sys/firmware/efi/vars
+Date: April 2004, removed March 2023
+Description:
+ This directory exposed interfaces for interacting with
+ EFI variables. For more information on EFI variables,
+ see 'Variable Services' in the UEFI specification
+ (section 7.2 in specification version 2.3 Errata D).
+
+ The 'efivars' sysfs interface was removed in March of 2023,
+ after being considered deprecated no later than September
+ of 2020. Its functionality has been replaced by the
+ 'efivarfs' filesystem.
diff --git a/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore
new file mode 100644
index 000000000000..a8d46cd0f4e6
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore
@@ -0,0 +1,9 @@
+This ABI is moved to /sys/firmware/opal/mpipl/release_core.
+
+What: /sys/kernel/fadump_release_opalcore
+Date: Sep 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: write only
+ The sysfs file is available when the system is booted to
+ collect the dump on OPAL based machine. It used to release
+ the memory used to collect the opalcore.
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/removed/sysfs-kernel-uids
index 28f14695a852..85a90b86ce1e 100644
--- a/Documentation/ABI/testing/sysfs-kernel-uids
+++ b/Documentation/ABI/removed/sysfs-kernel-uids
@@ -1,14 +1,14 @@
What: /sys/kernel/uids/<uid>/cpu_shares
-Date: December 2007
+Date: December 2007, finally removed in kernel v2.6.34-rc1
Contact: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Description:
The /sys/kernel/uids/<uid>/cpu_shares tunable is used
to set the cpu bandwidth a user is allowed. This is a
- propotional value. What that means is that if there
+ proportional value. What that means is that if there
are two users logged in, each with an equal number of
shares, then they will get equal CPU bandwidth. Another
example would be, if User A has shares = 1024 and user
B has shares = 2048, User B will get twice the CPU
bandwidth user A will. For more details refer
- Documentation/scheduler/sched-design-CFS.txt
+ Documentation/scheduler/sched-design-CFS.rst
diff --git a/Documentation/ABI/removed/sysfs-mce b/Documentation/ABI/removed/sysfs-mce
new file mode 100644
index 000000000000..ef5dd2a80918
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-mce
@@ -0,0 +1,37 @@
+What: /sys/devices/system/machinecheck/machinecheckX/tolerant
+Contact: Borislav Petkov <bp@suse.de>
+Date: Dec, 2021
+Description:
+ Unused and obsolete after the advent of recoverable machine
+ checks (see last sentence below) and those are present since
+ 2010 (Nehalem).
+
+ Original description:
+
+ The entries appear for each CPU, but they are truly shared
+ between all CPUs.
+
+ Tolerance level. When a machine check exception occurs for a
+ non corrected machine check the kernel can take different
+ actions.
+
+ Since machine check exceptions can happen any time it is
+ sometimes risky for the kernel to kill a process because it
+ defies normal kernel locking rules. The tolerance level
+ configures how hard the kernel tries to recover even at some
+ risk of deadlock. Higher tolerant values trade potentially
+ better uptime with the risk of a crash or even corruption
+ (for tolerant >= 3).
+
+ == ===========================================================
+ 0 always panic on uncorrected errors, log corrected errors
+ 1 panic or SIGBUS on uncorrected errors, log corrected errors
+ 2 SIGBUS or log uncorrected errors, log corrected errors
+ 3 never panic or SIGBUS, log all errors (for testing only)
+ == ===========================================================
+
+ Default: 1
+
+ Note this only makes a difference if the CPU allows recovery
+ from a machine check exception. Current x86 CPUs generally
+ do not.
diff --git a/Documentation/ABI/removed/sysfs-selinux-checkreqprot b/Documentation/ABI/removed/sysfs-selinux-checkreqprot
new file mode 100644
index 000000000000..f599a0a87e8b
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-selinux-checkreqprot
@@ -0,0 +1,26 @@
+What: /sys/fs/selinux/checkreqprot
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ REMOVAL UPDATE: The SELinux checkreqprot functionality was removed in
+ March 2023, the original deprecation notice is shown below.
+
+ The selinuxfs "checkreqprot" node allows SELinux to be configured
+ to check the protection requested by userspace for mmap/mprotect
+ calls instead of the actual protection applied by the kernel.
+ This was a compatibility mechanism for legacy userspace and
+ for the READ_IMPLIES_EXEC personality flag. However, if set to
+ 1, it weakens security by allowing mappings to be made executable
+ without authorization by policy. The default value of checkreqprot
+ at boot was changed starting in Linux v4.4 to 0 (i.e. check the
+ actual protection), and Android and Linux distributions have been
+ explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
+ initialization for some time. Support for setting checkreqprot to 1
+ will be removed no sooner than June 2021, at which point the kernel
+ will always cease using checkreqprot internally and will always
+ check the actual protections being applied upon mmap/mprotect calls.
+ The checkreqprot selinuxfs node will remain for backward compatibility
+ but will discard writes of the "0" value and will reject writes of the
+ "1" value when this mechanism is removed.
diff --git a/Documentation/ABI/removed/sysfs-selinux-disable b/Documentation/ABI/removed/sysfs-selinux-disable
new file mode 100644
index 000000000000..cb783c64cab3
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-selinux-disable
@@ -0,0 +1,29 @@
+What: /sys/fs/selinux/disable
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ REMOVAL UPDATE: The SELinux runtime disable functionality was removed
+ in March 2023, the original deprecation notice is shown below.
+
+ The selinuxfs "disable" node allows SELinux to be disabled at runtime
+ prior to a policy being loaded into the kernel. If disabled via this
+ mechanism, SELinux will remain disabled until the system is rebooted.
+
+ The preferred method of disabling SELinux is via the "selinux=0" boot
+ parameter, but the selinuxfs "disable" node was created to make it
+ easier for systems with primitive bootloaders that did not allow for
+ easy modification of the kernel command line. Unfortunately, allowing
+ for SELinux to be disabled at runtime makes it difficult to secure the
+ kernel's LSM hooks using the "__ro_after_init" feature.
+
+ Thankfully, the need for the SELinux runtime disable appears to be
+ gone, the default Kconfig configuration disables this selinuxfs node,
+ and only one of the major distributions, Fedora, supports disabling
+ SELinux at runtime. Fedora is in the process of removing the
+ selinuxfs "disable" node and once that is complete we will start the
+ slow process of removing this code from the kernel.
+
+ More information on /sys/fs/selinux/disable can be found under the
+ CONFIG_SECURITY_SELINUX_DISABLE Kconfig option.
diff --git a/Documentation/ABI/removed/video1394 b/Documentation/ABI/removed/video1394
index c39c25aee77b..1905d35a6619 100644
--- a/Documentation/ABI/removed/video1394
+++ b/Documentation/ABI/removed/video1394
@@ -8,6 +8,7 @@ Description:
performance issues in its first generation. Any video1394 user had
to use raw1394 + libraw1394 too because video1394 did not provide
asynchronous I/O for device discovery and configuration.
+
Replaced by /dev/fw*, i.e. the <linux/firewire-cdev.h> ABI of
firewire-core.
diff --git a/Documentation/ABI/stable/firewire-cdev b/Documentation/ABI/stable/firewire-cdev
index f72ed653878a..261f85b13154 100644
--- a/Documentation/ABI/stable/firewire-cdev
+++ b/Documentation/ABI/stable/firewire-cdev
@@ -14,13 +14,17 @@ Description:
Each /dev/fw* is associated with one IEEE 1394 node, which can
be remote or local nodes. Operations on a /dev/fw* file have
different scope:
+
- The 1394 node which is associated with the file:
+
- Asynchronous request transmission
- Get the Configuration ROM
- Query node ID
- Query maximum speed of the path between this node
and local node
+
- The 1394 bus (i.e. "card") to which the node is attached to:
+
- Isochronous stream transmission and reception
- Asynchronous stream transmission and reception
- Asynchronous broadcast request transmission
@@ -31,7 +35,9 @@ Description:
manager
- Query cycle time
- Bus reset initiation, bus reset event reception
+
- All 1394 buses:
+
- Allocation of IEEE 1212 address ranges on the local
link layers, reception of inbound requests to such
an address range, asynchronous response transmission
@@ -43,6 +49,7 @@ Description:
userland implement different access permission models, some
operations are restricted to /dev/fw* files that are associated
with a local node:
+
- Addition of descriptors or directories to the local
nodes' Configuration ROM
- PHY packet transmission and reception
@@ -55,50 +62,50 @@ Description:
The following file operations are supported:
open(2)
- Currently the only useful flags are O_RDWR.
+ Currently the only useful flags are O_RDWR.
ioctl(2)
- Initiate various actions. Some take immediate effect, others
- are performed asynchronously while or after the ioctl returns.
- See the inline documentation in <linux/firewire-cdev.h> for
- descriptions of all ioctls.
+ Initiate various actions. Some take immediate effect, others
+ are performed asynchronously while or after the ioctl returns.
+ See the inline documentation in <linux/firewire-cdev.h> for
+ descriptions of all ioctls.
poll(2), select(2), epoll_wait(2) etc.
- Watch for events to become available to be read.
+ Watch for events to become available to be read.
read(2)
- Receive various events. There are solicited events like
- outbound asynchronous transaction completion or isochronous
- buffer completion, and unsolicited events such as bus resets,
- request reception, or PHY packet reception. Always use a read
- buffer which is large enough to receive the largest event that
- could ever arrive. See <linux/firewire-cdev.h> for descriptions
- of all event types and for which ioctls affect reception of
- events.
+ Receive various events. There are solicited events like
+ outbound asynchronous transaction completion or isochronous
+ buffer completion, and unsolicited events such as bus resets,
+ request reception, or PHY packet reception. Always use a read
+ buffer which is large enough to receive the largest event that
+ could ever arrive. See <linux/firewire-cdev.h> for descriptions
+ of all event types and for which ioctls affect reception of
+ events.
mmap(2)
- Allocate a DMA buffer for isochronous reception or transmission
- and map it into the process address space. The arguments should
- be used as follows: addr = NULL, length = the desired buffer
- size, i.e. number of packets times size of largest packet,
- prot = at least PROT_READ for reception and at least PROT_WRITE
- for transmission, flags = MAP_SHARED, fd = the handle to the
- /dev/fw*, offset = 0.
+ Allocate a DMA buffer for isochronous reception or transmission
+ and map it into the process address space. The arguments should
+ be used as follows: addr = NULL, length = the desired buffer
+ size, i.e. number of packets times size of largest packet,
+ prot = at least PROT_READ for reception and at least PROT_WRITE
+ for transmission, flags = MAP_SHARED, fd = the handle to the
+ /dev/fw*, offset = 0.
Isochronous reception works in packet-per-buffer fashion except
for multichannel reception which works in buffer-fill mode.
munmap(2)
- Unmap the isochronous I/O buffer from the process address space.
+ Unmap the isochronous I/O buffer from the process address space.
close(2)
- Besides stopping and freeing I/O contexts that were associated
- with the file descriptor, back out any changes to the local
- nodes' Configuration ROM. Deallocate isochronous channels and
- bandwidth at the IRM that were marked for kernel-assisted
- re- and deallocation.
-
-Users: libraw1394
- libdc1394
- libhinawa
+ Besides stopping and freeing I/O contexts that were associated
+ with the file descriptor, back out any changes to the local
+ nodes' Configuration ROM. Deallocate isochronous channels and
+ bandwidth at the IRM that were marked for kernel-assisted
+ re- and deallocation.
+
+Users: libraw1394;
+ libdc1394;
+ libhinawa;
tools like linux-firewire-utils, fwhack, ...
diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb
index 5eb1545e0b8d..3a83b5c54e93 100644
--- a/Documentation/ABI/stable/o2cb
+++ b/Documentation/ABI/stable/o2cb
@@ -1,10 +1,10 @@
-What: /sys/fs/o2cb/ (was /sys/o2cb)
+What: /sys/fs/o2cb/
Date: Dec 2005
KernelVersion: 2.6.16
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description: Ocfs2-tools looks at 'interface-revision' for versioning
information. Each logmask/ file controls a set of debug prints
and can be written into with the strings "allow", "deny", or
"off". Reading the file returns the current state.
Users: ocfs2-tools. It's sufficient to mail proposed changes to
- ocfs2-devel@oss.oracle.com.
+ ocfs2-devel@lists.linux.dev.
diff --git a/Documentation/ABI/stable/procfs-audit_loginuid b/Documentation/ABI/stable/procfs-audit_loginuid
new file mode 100644
index 000000000000..cda405178391
--- /dev/null
+++ b/Documentation/ABI/stable/procfs-audit_loginuid
@@ -0,0 +1,27 @@
+What: Audit Login UID
+Date: 2005-02-01
+KernelVersion: 2.6.11-rc2 1e2d1492e178 ("[PATCH] audit: handle loginuid through proc")
+Contact: linux-audit@redhat.com
+Users: audit and login applications
+Description:
+ The /proc/$pid/loginuid pseudofile is written to set and
+ read to get the audit login UID of process $pid as a
+ decimal unsigned int (%u, u32). If it is unset,
+ permissions are not needed to set it. The accessor must
+ have CAP_AUDIT_CONTROL in the initial user namespace to
+ write it if it has been set. It cannot be written again
+ if AUDIT_FEATURE_LOGINUID_IMMUTABLE is enabled. It
+ cannot be unset if AUDIT_FEATURE_ONLY_UNSET_LOGINUID is
+ enabled.
+
+What: Audit Login Session ID
+Date: 2008-03-13
+KernelVersion: 2.6.25-rc7 1e0bd7550ea9 ("[PATCH] export sessionid alongside the loginuid in procfs")
+Contact: linux-audit@redhat.com
+Users: audit and login applications
+Description:
+ The /proc/$pid/sessionid pseudofile is read to get the
+ audit login session ID of process $pid as a decimal
+ unsigned int (%u, u32). It is set automatically,
+ serially assigned with each new login.
+
diff --git a/Documentation/ABI/stable/sysfs-acpi-pmprofile b/Documentation/ABI/stable/sysfs-acpi-pmprofile
index 964c7a8afb26..cd55e421d921 100644
--- a/Documentation/ABI/stable/sysfs-acpi-pmprofile
+++ b/Documentation/ABI/stable/sysfs-acpi-pmprofile
@@ -1,22 +1,28 @@
-What: /sys/firmware/acpi/pm_profile
+What: /sys/firmware/acpi/pm_profile
Date: 03-Nov-2011
KernelVersion: v3.2
Contact: linux-acpi@vger.kernel.org
-Description: The ACPI pm_profile sysfs interface exports the platform
- power management (and performance) requirement expectations
- as provided by BIOS. The integer value is directly passed as
- retrieved from the FADT ACPI table.
-Values: For possible values see ACPI specification:
- 5.2.9 Fixed ACPI Description Table (FADT)
- Field: Preferred_PM_Profile
+Description: The ACPI pm_profile sysfs interface exposes the preferred
+ power management (and performance) profile of the platform
+ as provided in the ACPI FADT Preferred_PM_Profile field.
- Currently these values are defined by spec:
- 0 Unspecified
- 1 Desktop
- 2 Mobile
- 3 Workstation
- 4 Enterprise Server
- 5 SOHO Server
- 6 Appliance PC
- 7 Performance Server
- >7 Reserved
+ The integer value is directly passed as retrieved from the FADT.
+
+Values: For the possible values refer to the Preferred_PM_Profile field
+ definition in Table 5.9 "FADT Format", Section 5.2.9 "Fixed ACPI
+ Description Table (FADT)" of the ACPI specification.
+
+ As of ACPI 6.5, the following values are defined:
+
+ == =================
+ 0 Unspecified
+ 1 Desktop
+ 2 Mobile
+ 3 Workstation
+ 4 Enterprise Server
+ 5 SOHO Server
+ 6 Appliance PC
+ 7 Performance Server
+ 8 Tablet
+ >8 Reserved
+ == =================
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
new file mode 100644
index 000000000000..0ed10aeff86b
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-block
@@ -0,0 +1,881 @@
+What: /sys/block/<disk>/alignment_offset
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report a physical block size that is
+ bigger than the logical block size (for instance a drive
+ with 4KB physical sectors exposing 512-byte logical
+ blocks to the operating system). This parameter
+ indicates how many bytes the beginning of the device is
+ offset from the disk's natural alignment.
+
+
+What: /sys/block/<disk>/discard_alignment
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may
+ internally allocate space in units that are bigger than
+ the exported logical block size. The discard_alignment
+ parameter indicates how many bytes the beginning of the
+ device is offset from the internal allocation unit's
+ natural alignment.
+
+What: /sys/block/<disk>/atomic_write_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter specifies the maximum atomic write
+ size reported by the device. This parameter is relevant
+ for merging of writes, where a merged atomic write
+ operation must not exceed this number of bytes.
+ This parameter may be greater than the value in
+ atomic_write_unit_max_bytes as
+ atomic_write_unit_max_bytes will be rounded down to a
+ power-of-two and atomic_write_unit_max_bytes may also be
+ limited by some other queue limits, such as max_segments.
+ This parameter - along with atomic_write_unit_min_bytes
+ and atomic_write_unit_max_bytes - will not be larger than
+ max_hw_sectors_kb, but may be larger than max_sectors_kb.
+
+
+What: /sys/block/<disk>/atomic_write_unit_min_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter specifies the smallest block which can
+ be written atomically with an atomic write operation. All
+ atomic write operations must begin at a
+ atomic_write_unit_min boundary and must be multiples of
+ atomic_write_unit_min. This value must be a power-of-two.
+
+
+What: /sys/block/<disk>/atomic_write_unit_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter defines the largest block which can be
+ written atomically with an atomic write operation. This
+ value must be a multiple of atomic_write_unit_min and must
+ be a power-of-two. This value will not be larger than
+ atomic_write_max_bytes.
+
+
+What: /sys/block/<disk>/atomic_write_boundary_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] A device may need to internally split an atomic write I/O
+ which straddles a given logical block address boundary. This
+ parameter specifies the size in bytes of the atomic boundary if
+ one is reported by the device. This value must be a
+ power-of-two and at least the size as in
+ atomic_write_unit_max_bytes.
+ Any attempt to merge atomic write I/Os must not result in a
+ merged I/O which crosses this boundary (if any).
+
+
+What: /sys/block/<disk>/diskseq
+Date: February 2021
+Contact: Matteo Croce <teknoraver@meta.com>
+Description:
+ The /sys/block/<disk>/diskseq files reports the disk
+ sequence number, which is a monotonically increasing
+ number assigned to every drive.
+ Some devices, like the loop device, refresh such number
+ every time the backing file is changed.
+ The value type is 64 bit unsigned.
+
+
+What: /sys/block/<disk>/inflight
+Date: October 2009
+Contact: Jens Axboe <axboe@kernel.dk>, Nikanth Karthikesan <knikanth@suse.de>
+Description:
+ Reports the number of I/O requests currently in progress
+ (pending / in flight) in a device driver. This can be less
+ than the number of requests queued in the block device queue.
+ The report contains 2 fields: one for read requests
+ and one for write requests.
+ The value type is unsigned int.
+ Cf. Documentation/block/stat.rst which contains a single value for
+ requests in flight.
+ This is related to /sys/block/<disk>/queue/nr_requests
+ and for SCSI device also its queue_depth.
+
+
+What: /sys/block/<disk>/integrity/device_is_integrity_capable
+Date: July 2014
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether a storage device is capable of storing
+ integrity metadata. Set if the device is T10 PI-capable.
+ This flag is set to 1 if the storage media is formatted
+ with T10 Protection Information. If the storage media is
+ not formatted with T10 Protection Information, this flag
+ is set to 0.
+
+
+What: /sys/block/<disk>/integrity/format
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Metadata format for integrity capable block device.
+ E.g. T10-DIF-TYPE1-CRC.
+ This field describes the type of T10 Protection Information
+ that the block device can send and receive.
+ If the device can store application integrity metadata but
+ no T10 Protection Information profile is used, this field
+ contains "nop".
+ If the device does not support integrity metadata, this
+ field contains "none".
+
+
+What: /sys/block/<disk>/integrity/protection_interval_bytes
+Date: July 2015
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Describes the number of data bytes which are protected
+ by one integrity tuple. Typically the device's logical
+ block size.
+
+
+What: /sys/block/<disk>/integrity/read_verify
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should verify the
+ integrity of read requests serviced by devices that
+ support sending integrity metadata.
+
+
+What: /sys/block/<disk>/integrity/tag_size
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Number of bytes of integrity tag space available per
+ protection_interval_bytes, which is typically
+ the device's logical block size.
+ This field describes the size of the application tag
+ if the storage device is formatted with T10 Protection
+ Information and permits use of the application tag.
+ The tag_size is reported in bytes and indicates the
+ space available for adding an opaque tag to each block
+ (protection_interval_bytes).
+ If the device does not support T10 Protection Information
+ (even if the device provides application integrity
+ metadata space), this field is set to 0.
+
+
+What: /sys/block/<disk>/integrity/write_generate
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should automatically
+ generate checksums for write requests bound for
+ devices that support receiving integrity metadata.
+
+
+What: /sys/block/<disk>/partscan
+Date: May 2024
+Contact: Christoph Hellwig <hch@lst.de>
+Description:
+ The /sys/block/<disk>/partscan files reports if partition
+ scanning is enabled for the disk. It returns "1" if partition
+ scanning is enabled, or "0" if not. The value type is a 32-bit
+ unsigned integer, but only "0" and "1" are valid values.
+
+
+What: /sys/block/<disk>/<partition>/alignment_offset
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report a physical block size that is
+ bigger than the logical block size (for instance a drive
+ with 4KB physical sectors exposing 512-byte logical
+ blocks to the operating system). This parameter
+ indicates how many bytes the beginning of the partition
+ is offset from the disk's natural alignment.
+
+
+What: /sys/block/<disk>/<partition>/discard_alignment
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may
+ internally allocate space in units that are bigger than
+ the exported logical block size. The discard_alignment
+ parameter indicates how many bytes the beginning of the
+ partition is offset from the internal allocation unit's
+ natural alignment.
+
+
+What: /sys/block/<disk>/<partition>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/<partition>/stat files display the
+ I/O statistics of partition <partition>. The format is the
+ same as the format of /sys/block/<disk>/stat.
+
+
+What: /sys/block/<disk>/queue/add_random
+Date: June 2010
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file allows to turn off the disk entropy contribution.
+ Default value of this file is '1'(on).
+
+
+What: /sys/block/<disk>/queue/chunk_sectors
+Date: September 2016
+Contact: Hannes Reinecke <hare@suse.com>
+Description:
+ [RO] chunk_sectors has different meaning depending on the type
+ of the disk. For a RAID device (dm-raid), chunk_sectors
+ indicates the size in 512B sectors of the RAID volume stripe
+ segment. For a zoned block device, either host-aware or
+ host-managed, chunk_sectors indicates the size in 512B sectors
+ of the zones of the device, with the eventual exception of the
+ last zone of the device which may be smaller.
+
+
+What: /sys/block/<disk>/queue/crypto/
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ The presence of this subdirectory of /sys/block/<disk>/queue/
+ indicates that the device supports inline encryption. This
+ subdirectory contains files which describe the inline encryption
+ capabilities of the device. For more information about inline
+ encryption, refer to Documentation/block/inline-encryption.rst.
+
+
+What: /sys/block/<disk>/queue/crypto/hw_wrapped_keys
+Date: February 2025
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The presence of this file indicates that the device
+ supports hardware-wrapped inline encryption keys, i.e. key blobs
+ that can only be unwrapped and used by dedicated hardware. For
+ more information about hardware-wrapped inline encryption keys,
+ see Documentation/block/inline-encryption.rst.
+
+
+What: /sys/block/<disk>/queue/crypto/max_dun_bits
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file shows the maximum length, in bits, of data unit
+ numbers accepted by the device in inline encryption requests.
+
+
+What: /sys/block/<disk>/queue/crypto/modes/<mode>
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] For each crypto mode (i.e., encryption/decryption
+ algorithm) the device supports with inline encryption, a file
+ will exist at this location. It will contain a hexadecimal
+ number that is a bitmask of the supported data unit sizes, in
+ bytes, for that crypto mode.
+
+ Currently, the crypto modes that may be supported are:
+
+ * AES-256-XTS
+ * AES-128-CBC-ESSIV
+ * Adiantum
+
+ For example, if a device supports AES-256-XTS inline encryption
+ with data unit sizes of 512 and 4096 bytes, the file
+ /sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
+ will contain "0x1200".
+
+
+What: /sys/block/<disk>/queue/crypto/num_keyslots
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file shows the number of keyslots the device has for
+ use with inline encryption.
+
+
+What: /sys/block/<disk>/queue/crypto/raw_keys
+Date: February 2025
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The presence of this file indicates that the device
+ supports raw inline encryption keys, i.e. keys that are managed
+ in raw, plaintext form in software.
+
+
+What: /sys/block/<disk>/queue/dax
+Date: June 2016
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file indicates whether the device supports Direct
+ Access (DAX), used by CPU-addressable storage to bypass the
+ pagecache. It shows '1' if true, '0' if not.
+
+
+What: /sys/block/<disk>/queue/discard_granularity
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] Devices that support discard functionality may internally
+ allocate space using units that are bigger than the logical
+ block size. The discard_granularity parameter indicates the size
+ of the internal allocation unit in bytes if reported by the
+ device. Otherwise the discard_granularity will be set to match
+ the device's physical block size. A discard_granularity of 0
+ means that the device does not support discard functionality.
+
+
+What: /sys/block/<disk>/queue/discard_max_bytes
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RW] While discard_max_hw_bytes is the hardware limit for the
+ device, this setting is the software limit. Some devices exhibit
+ large latencies when large discards are issued, setting this
+ value lower will make Linux issue smaller discards and
+ potentially help reduce latencies induced by large discard
+ operations.
+
+
+What: /sys/block/<disk>/queue/discard_max_hw_bytes
+Date: July 2015
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Devices that support discard functionality may have
+ internal limits on the number of bytes that can be trimmed or
+ unmapped in a single operation. The `discard_max_hw_bytes`
+ parameter is set by the device driver to the maximum number of
+ bytes that can be discarded in a single operation. Discard
+ requests issued to the device must not exceed this limit. A
+ `discard_max_hw_bytes` value of 0 means that the device does not
+ support discard functionality.
+
+
+What: /sys/block/<disk>/queue/discard_zeroes_data
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] Will always return 0. Don't rely on any specific behavior
+ for discards, and don't read this file.
+
+
+What: /sys/block/<disk>/queue/dma_alignment
+Date: May 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ Reports the alignment that user space addresses must have to be
+ used for raw block device access with O_DIRECT and other driver
+ specific passthrough mechanisms.
+
+
+What: /sys/block/<disk>/queue/fua
+Date: May 2018
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Whether or not the block driver supports the FUA flag for
+ write requests. FUA stands for Force Unit Access. If the FUA
+ flag is set that means that write requests must bypass the
+ volatile cache of the storage device.
+
+
+What: /sys/block/<disk>/queue/hw_sector_size
+Date: January 2008
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This is the hardware sector size of the device, in bytes.
+
+
+What: /sys/block/<disk>/queue/independent_access_ranges/
+Date: October 2021
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The presence of this sub-directory of the
+ /sys/block/xxx/queue/ directory indicates that the device is
+ capable of executing requests targeting different sector ranges
+ in parallel. For instance, single LUN multi-actuator hard-disks
+ will have an independent_access_ranges directory if the device
+ correctly advertises the sector ranges of its actuators.
+
+ The independent_access_ranges directory contains one directory
+ per access range, with each range described using the sector
+ (RO) attribute file to indicate the first sector of the range
+ and the nr_sectors (RO) attribute file to indicate the total
+ number of sectors in the range starting from the first sector of
+ the range. For example, a dual-actuator hard-disk will have the
+ following independent_access_ranges entries.::
+
+ $ tree /sys/block/<disk>/queue/independent_access_ranges/
+ /sys/block/<disk>/queue/independent_access_ranges/
+ |-- 0
+ | |-- nr_sectors
+ | `-- sector
+ `-- 1
+ |-- nr_sectors
+ `-- sector
+
+ The sector and nr_sectors attributes use 512B sector unit,
+ regardless of the actual block size of the device. Independent
+ access ranges do not overlap and include all sectors within the
+ device capacity. The access ranges are numbered in increasing
+ order of the range start sector, that is, the sector attribute
+ of range 0 always has the value 0.
+
+
+What: /sys/block/<disk>/queue/io_poll
+Date: November 2015
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] When read, this file shows whether polling is enabled (1)
+ or disabled (0). Writing '0' to this file will disable polling
+ for this device. Writing any non-zero value will enable this
+ feature.
+
+
+What: /sys/block/<disk>/queue/io_poll_delay
+Date: November 2016
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This was used to control what kind of polling will be
+ performed. It is now fixed to -1, which is classic polling.
+ In this mode, the CPU will repeatedly ask for completions
+ without giving up any time.
+ <deprecated>
+
+
+What: /sys/block/<disk>/queue/io_timeout
+Date: November 2018
+Contact: Weiping Zhang <zhangweiping@didiglobal.com>
+Description:
+ [RW] io_timeout is the request timeout in milliseconds. If a
+ request does not complete in this time then the block driver
+ timeout handler is invoked. That timeout handler can decide to
+ retry the request, to fail it or to start a device recovery
+ strategy.
+
+
+What: /sys/block/<disk>/queue/iostats
+Date: January 2009
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file is used to control (on/off) the iostats
+ accounting of the disk.
+
+What: /sys/block/<disk>/queue/iostats_passthrough
+Date: October 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file is used to control (on/off) the iostats
+ accounting of the disk for passthrough commands.
+
+
+What: /sys/block/<disk>/queue/logical_block_size
+Date: May 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] This is the smallest unit the storage device can address.
+ It is typically 512 bytes.
+
+
+What: /sys/block/<disk>/queue/max_active_zones
+Date: July 2020
+Contact: Niklas Cassel <niklas.cassel@wdc.com>
+Description:
+ [RO] For zoned block devices (zoned attribute indicating
+ "host-managed" or "host-aware"), the sum of zones belonging to
+ any of the zone states: EXPLICIT OPEN, IMPLICIT OPEN or CLOSED,
+ is limited by this value. If this value is 0, there is no limit.
+
+ If the host attempts to exceed this limit, the driver should
+ report this error with BLK_STS_ZONE_ACTIVE_RESOURCE, which user
+ space may see as the EOVERFLOW errno.
+
+
+What: /sys/block/<disk>/queue/max_discard_segments
+Date: February 2017
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The maximum number of DMA scatter/gather entries in a
+ discard request.
+
+
+What: /sys/block/<disk>/queue/max_hw_sectors_kb
+Date: September 2004
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This is the maximum number of kilobytes supported in a
+ single data transfer.
+
+
+What: /sys/block/<disk>/queue/max_integrity_segments
+Date: September 2010
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum number of elements in a DMA scatter/gather list
+ with integrity data that will be submitted by the block layer
+ core to the associated block driver.
+
+
+What: /sys/block/<disk>/queue/max_open_zones
+Date: July 2020
+Contact: Niklas Cassel <niklas.cassel@wdc.com>
+Description:
+ [RO] For zoned block devices (zoned attribute indicating
+ "host-managed" or "host-aware"), the sum of zones belonging to
+ any of the zone states: EXPLICIT OPEN or IMPLICIT OPEN, is
+ limited by this value. If this value is 0, there is no limit.
+
+
+What: /sys/block/<disk>/queue/max_sectors_kb
+Date: September 2004
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This is the maximum number of kilobytes that the block
+ layer will allow for a filesystem request. Must be smaller than
+ or equal to the maximum size allowed by the hardware. Write 0
+ to use default kernel settings.
+
+
+What: /sys/block/<disk>/queue/max_segment_size
+Date: March 2010
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum size in bytes of a single element in a DMA
+ scatter/gather list.
+
+What: /sys/block/<disk>/queue/max_write_streams
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum number of write streams supported, 0 if not
+ supported. If supported, valid values are 1 through
+ max_write_streams, inclusive.
+
+What: /sys/block/<disk>/queue/write_stream_granularity
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Granularity of a write stream in bytes. The granularity
+ of a write stream is the size that should be discarded or
+ overwritten together to avoid write amplification in the device.
+
+What: /sys/block/<disk>/queue/max_segments
+Date: March 2010
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum number of elements in a DMA scatter/gather list
+ that is submitted to the associated block driver.
+
+
+What: /sys/block/<disk>/queue/minimum_io_size
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] Storage devices may report a granularity or preferred
+ minimum I/O size which is the smallest request the device can
+ perform without incurring a performance penalty. For disk
+ drives this is often the physical block size. For RAID arrays
+ it is often the stripe chunk size. A properly aligned multiple
+ of minimum_io_size is the preferred request size for workloads
+ where a high number of I/O operations is desired.
+
+
+What: /sys/block/<disk>/queue/nomerges
+Date: January 2010
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] Standard I/O elevator operations include attempts to merge
+ contiguous I/Os. For known random I/O loads these attempts will
+ always fail and result in extra cycles being spent in the
+ kernel. This allows one to turn off this behavior on one of two
+ ways: When set to 1, complex merge checks are disabled, but the
+ simple one-shot merges with the previous I/O request are
+ enabled. When set to 2, all merge tries are disabled. The
+ default value is 0 - which enables all types of merge tries.
+
+
+What: /sys/block/<disk>/queue/nr_requests
+Date: July 2003
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This controls how many requests may be allocated in the
+ block layer. Noted this value only represents the quantity for a
+ single blk_mq_tags instance. The actual number for the entire
+ device depends on the hardware queue count, whether elevator is
+ enabled, and whether tags are shared.
+
+
+What: /sys/block/<disk>/queue/nr_zones
+Date: November 2018
+Contact: Damien Le Moal <damien.lemoal@wdc.com>
+Description:
+ [RO] nr_zones indicates the total number of zones of a zoned
+ block device ("host-aware" or "host-managed" zone model). For
+ regular block devices, the value is always 0.
+
+
+What: /sys/block/<disk>/queue/optimal_io_size
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] Storage devices may report an optimal I/O size, which is
+ the device's preferred unit for sustained I/O. This is rarely
+ reported for disk drives. For RAID arrays it is usually the
+ stripe width or the internal track size. A properly aligned
+ multiple of optimal_io_size is the preferred request size for
+ workloads where sustained throughput is desired. If no optimal
+ I/O size is reported this file contains 0.
+
+
+What: /sys/block/<disk>/queue/physical_block_size
+Date: May 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] This is the smallest unit a physical storage device can
+ write atomically. It is usually the same as the logical block
+ size but may be bigger. One example is SATA drives with 4KB
+ sectors that expose a 512-byte logical block size to the
+ operating system. For stacked block devices the
+ physical_block_size variable contains the maximum
+ physical_block_size of the component devices.
+
+
+What: /sys/block/<disk>/queue/read_ahead_kb
+Date: May 2004
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] Maximum number of kilobytes to read-ahead for filesystems
+ on this block device.
+
+ For MADV_HUGEPAGE, the readahead size may exceed this setting
+ since its granularity is based on the hugepage size.
+
+
+What: /sys/block/<disk>/queue/rotational
+Date: January 2009
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file is used to stat if the device is of rotational
+ type or non-rotational type.
+
+
+What: /sys/block/<disk>/queue/rq_affinity
+Date: September 2008
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] If this option is '1', the block layer will migrate request
+ completions to the cpu "group" that originally submitted the
+ request. For some workloads this provides a significant
+ reduction in CPU cycles due to caching effects.
+
+ For storage configurations that need to maximize distribution of
+ completion processing setting this option to '2' forces the
+ completion to run on the requesting cpu (bypassing the "group"
+ aggregation logic).
+
+
+What: /sys/block/<disk>/queue/scheduler
+Date: October 2004
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] When read, this file will display the current and available
+ IO schedulers for this block device. The currently active IO
+ scheduler will be enclosed in [] brackets. Writing an IO
+ scheduler name to this file will switch control of this block
+ device to that new IO scheduler. Note that writing an IO
+ scheduler name to this file will attempt to load that IO
+ scheduler module, if it isn't already present in the system.
+
+
+What: /sys/block/<disk>/queue/stable_writes
+Date: September 2020
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file will contain '1' if memory must not be modified
+ while it is being used in a write request to this device. When
+ this is the case and the kernel is performing writeback of a
+ page, the kernel will wait for writeback to complete before
+ allowing the page to be modified again, rather than allowing
+ immediate modification as is normally the case. This
+ restriction arises when the device accesses the memory multiple
+ times where the same data must be seen every time -- for
+ example, once to calculate a checksum and once to actually write
+ the data. If no such restriction exists, this file will contain
+ '0'. This file is writable for testing purposes.
+
+What: /sys/block/<disk>/queue/virt_boundary_mask
+Date: April 2021
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file shows the I/O segment memory alignment mask for
+ the block device. I/O requests to this device will be split
+ between segments wherever either the memory address of the end
+ of the previous segment or the memory address of the beginning
+ of the current segment is not aligned to virt_boundary_mask + 1
+ bytes.
+
+
+What: /sys/block/<disk>/queue/wbt_lat_usec
+Date: November 2016
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] If the device is registered for writeback throttling, then
+ this file shows the target minimum read latency. If this latency
+ is exceeded in a given window of time (see curr_win_nsec), then
+ the writeback throttling will start scaling back writes. Writing
+ a value of '0' to this file disables the feature. Writing a
+ value of '-1' to this file resets the value to the default
+ setting.
+
+
+What: /sys/block/<disk>/queue/write_cache
+Date: April 2016
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] When read, this file will display whether the device has
+ write back caching enabled or not. It will return "write back"
+ for the former case, and "write through" for the latter. Writing
+ to this file can change the kernels view of the device, but it
+ doesn't alter the device state. This means that it might not be
+ safe to toggle the setting from "write back" to "write through",
+ since that will also eliminate cache flushes issued by the
+ kernel.
+
+
+What: /sys/block/<disk>/queue/write_same_max_bytes
+Date: January 2012
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ [RO] Some devices support a write same operation in which a
+ single data block can be written to a range of several
+ contiguous blocks on storage. This can be used to wipe areas on
+ disk or to initialize drives in a RAID configuration.
+ write_same_max_bytes indicates how many bytes can be written in
+ a single write same command. If write_same_max_bytes is 0, write
+ same is not supported by the device.
+
+
+What: /sys/block/<disk>/queue/write_zeroes_max_bytes
+Date: November 2016
+Contact: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Description:
+ [RO] Devices that support write zeroes operation in which a
+ single request can be issued to zero out the range of contiguous
+ blocks on storage without having any payload in the request.
+ This can be used to optimize writing zeroes to the devices.
+ write_zeroes_max_bytes indicates how many bytes can be written
+ in a single write zeroes command. If write_zeroes_max_bytes is
+ 0, write zeroes is not supported by the device.
+
+
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_hw_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RO] This file indicates whether a device supports zeroing data
+ in a specified block range without incurring the cost of
+ physically writing zeroes to the media for each individual
+ block. If this parameter is set to write_zeroes_max_bytes, the
+ device implements a zeroing operation which opportunistically
+ avoids writing zeroes to media while still guaranteeing that
+ subsequent reads from the specified block range will return
+ zeroed data. This operation is a best-effort optimization, a
+ device may fall back to physically writing zeroes to the media
+ due to other factors such as misalignment or being asked to
+ clear a block range smaller than the device's internal
+ allocation unit. If this parameter is set to 0, the device may
+ have to write each logical block media during a zeroing
+ operation.
+
+
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RW] While write_zeroes_unmap_max_hw_bytes is the hardware limit
+ for the device, this setting is the software limit. Since the
+ unmap write zeroes operation is a best-effort optimization, some
+ devices may still physically writing zeroes to media. So the
+ speed of this operation is not guaranteed. Writing a value of
+ '0' to this file disables this operation. Otherwise, this
+ parameter should be equal to write_zeroes_unmap_max_hw_bytes.
+
+
+What: /sys/block/<disk>/queue/zone_append_max_bytes
+Date: May 2020
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This is the maximum number of bytes that can be written to
+ a sequential zone of a zoned block device using a zone append
+ write operation (REQ_OP_ZONE_APPEND). This value is always 0 for
+ regular block devices.
+
+
+What: /sys/block/<disk>/queue/zone_write_granularity
+Date: January 2021
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This indicates the alignment constraint, in bytes, for
+ write operations in sequential zones of zoned block devices
+ (devices with a zoned attributed that reports "host-managed" or
+ "host-aware"). This value is always 0 for regular block devices.
+
+
+What: /sys/block/<disk>/queue/zoned
+Date: September 2016
+Contact: Damien Le Moal <damien.lemoal@wdc.com>
+Description:
+ [RO] zoned indicates if the device is a zoned block device and
+ the zone model of the device if it is indeed zoned. The
+ possible values indicated by zoned are "none" for regular block
+ devices and "host-aware" or "host-managed" for zoned block
+ devices. The characteristics of host-aware and host-managed
+ zoned block devices are described in the ZBC (Zoned Block
+ Commands) and ZAC (Zoned Device ATA Command Set) standards.
+ These standards also define the "drive-managed" zone model.
+ However, since drive-managed zoned block devices do not support
+ zone commands, they will be treated as regular block devices and
+ zoned will report "none".
+
+
+What: /sys/block/<disk>/hidden
+Date: March 2023
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] the block device is hidden. it doesn’t produce events, and
+ can’t be opened from userspace or using blkdev_get*.
+ Used for the underlying components of multipath devices.
+
+
+What: /sys/block/<disk>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/stat files displays the I/O
+ statistics of disk <disk>. They contain 11 fields:
+
+ == ==============================================
+ 1 reads completed successfully
+ 2 reads merged
+ 3 sectors read
+ 4 time spent reading (ms)
+ 5 writes completed
+ 6 writes merged
+ 7 sectors written
+ 8 time spent writing (ms)
+ 9 I/Os currently in progress
+ 10 time spent doing I/Os (ms)
+ 11 weighted time spent doing I/Os (ms)
+ 12 discards completed
+ 13 discards merged
+ 14 sectors discarded
+ 15 time spent discarding (ms)
+ 16 flush requests completed
+ 17 time spent flushing (ms)
+ == ==============================================
+
+ For more details refer Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/stable/sysfs-bus-firewire b/Documentation/ABI/stable/sysfs-bus-firewire
index 41e5a0cd1e3e..9ac9eddb82ef 100644
--- a/Documentation/ABI/stable/sysfs-bus-firewire
+++ b/Documentation/ABI/stable/sysfs-bus-firewire
@@ -47,6 +47,7 @@ Description:
IEEE 1394 node device attribute.
Read-only and immutable.
Values: 1: The sysfs entry represents a local node (a controller card).
+
0: The sysfs entry represents a remote node.
@@ -125,7 +126,9 @@ Description:
Read-only attribute, immutable during the target's lifetime.
Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
Colon-separated hexadecimal string representations of
+
u64 EUI-64 : u24 directory_ID : u16 LUN
+
without 0x prefixes, without whitespace. The former sbp2 driver
(removed in 2.6.37 after being superseded by firewire-sbp2) used
a somewhat shorter format which was not as close to SAM.
diff --git a/Documentation/ABI/stable/sysfs-bus-fsl-mc b/Documentation/ABI/stable/sysfs-bus-fsl-mc
new file mode 100644
index 000000000000..58f06c7eeed7
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-fsl-mc
@@ -0,0 +1,19 @@
+What: /sys/bus/fsl-mc/rescan
+Date: January 2021
+KernelVersion: 5.12
+Contact: Ioana Ciornei <ioana.ciornei@nxp.com>
+Description: Writing a non-zero value to this attribute will
+ force a rescan of fsl-mc bus in the system and
+ synchronize the objects under fsl-mc bus and the
+ Management Complex firmware.
+Users: Userspace drivers and management tools
+
+What: /sys/bus/fsl-mc/autorescan
+Date: January 2021
+KernelVersion: 5.12
+Contact: Ioana Ciornei <ioana.ciornei@nxp.com>
+Description: Writing a zero value to this attribute will
+ disable the DPRC IRQs on which automatic rescan
+ of the fsl-mc bus is performed. A non-zero value
+ will enable the DPRC IRQs.
+Users: Userspace drivers and management tools
diff --git a/Documentation/ABI/stable/sysfs-bus-mhi b/Documentation/ABI/stable/sysfs-bus-mhi
new file mode 100644
index 000000000000..8b9698fa0beb
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-mhi
@@ -0,0 +1,44 @@
+What: /sys/bus/mhi/devices/.../serialnumber
+Date: Sept 2020
+KernelVersion: 5.10
+Contact: mhi@lists.linux.dev
+Description: The file holds the serial number of the client device obtained
+ using a BHI (Boot Host Interface) register read after at least
+ one attempt to power up the device has been done. If read
+ without having the device power on at least once, the file will
+ read all 0's.
+Users: Any userspace application or clients interested in device info.
+
+What: /sys/bus/mhi/devices/.../oem_pk_hash
+Date: Sept 2020
+KernelVersion: 5.10
+Contact: mhi@lists.linux.dev
+Description: The file holds the OEM PK Hash value of the endpoint device
+ obtained using a BHI (Boot Host Interface) register read after
+ at least one attempt to power up the device has been done. If
+ read without having the device power on at least once, the file
+ will read all 0's.
+Users: Any userspace application or clients interested in device info.
+
+What: /sys/bus/mhi/devices/.../soc_reset
+Date: April 2022
+KernelVersion: 5.19
+Contact: mhi@lists.linux.dev
+Description: Initiates a SoC reset on the MHI controller. A SoC reset is
+ a reset of last resort, and will require a complete re-init.
+ This can be useful as a method of recovery if the device is
+ non-responsive, or as a means of loading new firmware as a
+ system administration task.
+
+What: /sys/bus/mhi/devices/.../trigger_edl
+Date: April 2024
+KernelVersion: 6.10
+Contact: mhi@lists.linux.dev
+Description: Writing a non-zero value to this file will force devices to
+ enter EDL (Emergency Download) mode. This entry only exists for
+ devices capable of entering the EDL mode using the standard EDL
+ triggering mechanism defined in the MHI spec v1.2. Once in EDL
+ mode, the flash programmer image can be downloaded to the
+ device to enter the flash programmer execution environment.
+ This can be useful if user wants to use QDL (Qualcomm Download,
+ which is used to download firmware over EDL) to update firmware.
diff --git a/Documentation/ABI/stable/sysfs-bus-nvmem b/Documentation/ABI/stable/sysfs-bus-nvmem
index 5923ab4620c5..0ae8cb074acf 100644
--- a/Documentation/ABI/stable/sysfs-bus-nvmem
+++ b/Documentation/ABI/stable/sysfs-bus-nvmem
@@ -1,19 +1,50 @@
+What: /sys/bus/nvmem/devices/.../force_ro
+Date: June 2024
+KernelVersion: 6.11
+Contact: Marek Vasut <marex@denx.de>
+Description:
+ This read/write attribute allows users to set read-write
+ devices as read-only and back to read-write from userspace.
+ This can be used to unlock and relock write-protection of
+ devices which are generally locked, except during sporadic
+ programming operation.
+ Read returns '0' or '1' for read-write or read-only modes
+ respectively.
+ Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
+ and "off", i.e. what kstrtobool() supports.
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled.
+
What: /sys/bus/nvmem/devices/.../nvmem
Date: July 2015
-KernelVersion: 4.2
+KernelVersion: 4.2
Contact: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Description:
This file allows user to read/write the raw NVMEM contents.
Permissions for write to this file depends on the nvmem
provider configuration.
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled
+
+ ex::
- ex:
- hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
+ hexdump /sys/bus/nvmem/devices/qfprom0/nvmem
- 0000000 0000 0000 0000 0000 0000 0000 0000 0000
- *
- 00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
- 0000000 0000 0000 0000 0000 0000 0000 0000 0000
- ...
- *
- 0001000
+ 0000000 0000 0000 0000 0000 0000 0000 0000 0000
+ *
+ 00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00
+ 0000000 0000 0000 0000 0000 0000 0000 0000 0000
+ ...
+ *
+ 0001000
+
+What: /sys/bus/nvmem/devices/.../type
+Date: November 2018
+KernelVersion: 5.0
+Contact: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Description:
+ This read-only attribute allows user to read the NVMEM
+ device type. Supported types are "Unknown", "EEPROM",
+ "OTP", "Battery backed", "FRAM".
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled.
diff --git a/Documentation/ABI/stable/sysfs-bus-usb b/Documentation/ABI/stable/sysfs-bus-usb
index b832eeff9999..cad4bc232520 100644
--- a/Documentation/ABI/stable/sysfs-bus-usb
+++ b/Documentation/ABI/stable/sysfs-bus-usb
@@ -50,8 +50,10 @@ Description:
Tools can use this file and the connected_duration file to
compute the percentage of time that a device has been active.
- For example,
- echo $((100 * `cat active_duration` / `cat connected_duration`))
+ For example::
+
+ echo $((100 * `cat active_duration` / `cat connected_duration`))
+
will give an integer percentage. Note that this does not
account for counter wrap.
Users:
diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 3fed8fdb873d..3066feae1d8d 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -1,3 +1,10 @@
+What: /sys/bus/vmbus/hibernation
+Date: Jan 2021
+KernelVersion: 5.12
+Contact: Dexuan Cui <decui@microsoft.com>
+Description: Whether the host supports hibernation for the VM.
+Users: Daemon that sets up swap partition/file for hibernation.
+
What: /sys/bus/vmbus/devices/<UUID>/id
Date: Jul 2009
KernelVersion: 2.6.31
@@ -54,14 +61,7 @@ Date: September. 2017
KernelVersion: 4.14
Contact: Stephen Hemminger <sthemmin@microsoft.com>
Description: Directory for per-channel information
- NN is the VMBUS relid associtated with the channel.
-
-What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/cpu
-Date: September. 2017
-KernelVersion: 4.14
-Contact: Stephen Hemminger <sthemmin@microsoft.com>
-Description: VCPU (sub)channel is affinitized to
-Users: tools/hv/lsvmbus and other debugging tools
+ NN is the VMBUS relid associated with the channel.
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/cpu
Date: September. 2017
@@ -81,7 +81,9 @@ What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/latency
Date: September. 2017
KernelVersion: 4.14
Contact: Stephen Hemminger <sthemmin@microsoft.com>
-Description: Channel signaling latency
+Description: Channel signaling latency. This file is available only for
+ performance critical channels (storage, network, etc.) that use
+ the monitor page mechanism.
Users: Debugging tools
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_mask
@@ -95,7 +97,9 @@ What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/pending
Date: September. 2017
KernelVersion: 4.14
Contact: Stephen Hemminger <sthemmin@microsoft.com>
-Description: Channel interrupt pending state
+Description: Channel interrupt pending state. This file is available only for
+ performance critical channels (storage, network, etc.) that use
+ the monitor page mechanism.
Users: Debugging tools
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/read_avail
@@ -137,7 +141,9 @@ What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/monitor_id
Date: January. 2018
KernelVersion: 4.16
Contact: Stephen Hemminger <sthemmin@microsoft.com>
-Description: Monitor bit associated with channel
+Description: Monitor bit associated with channel. This file is available only
+ for performance critical channels (storage, network, etc.) that
+ use the monitor page mechanism.
Users: Debugging tools and userspace drivers
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/ring
@@ -146,3 +152,36 @@ KernelVersion: 4.16
Contact: Stephen Hemminger <sthemmin@microsoft.com>
Description: Binary file created by uio_hv_generic for ring buffer
Users: Userspace drivers
+
+What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_in_full
+Date: February 2019
+KernelVersion: 5.0
+Contact: Michael Kelley <mikelley@microsoft.com>
+Description: Number of guest to host interrupts caused by the inbound ring
+ buffer transitioning from full to not full while a packet is
+ waiting for buffer space to become available
+Users: Debugging tools
+
+What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_out_empty
+Date: February 2019
+KernelVersion: 5.0
+Contact: Michael Kelley <mikelley@microsoft.com>
+Description: Number of guest to host interrupts caused by the outbound ring
+ buffer transitioning from empty to not empty
+Users: Debugging tools
+
+What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_first
+Date: February 2019
+KernelVersion: 5.0
+Contact: Michael Kelley <mikelley@microsoft.com>
+Description: Number of write operations that were the first to encounter an
+ outbound ring buffer full condition
+Users: Debugging tools
+
+What: /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_total
+Date: February 2019
+KernelVersion: 5.0
+Contact: Michael Kelley <mikelley@microsoft.com>
+Description: Total number of write operations that encountered an outbound
+ ring buffer full condition
+Users: Debugging tools
diff --git a/Documentation/ABI/stable/sysfs-bus-w1 b/Documentation/ABI/stable/sysfs-bus-w1
index 140d85b4ae92..5cd5e872bcae 100644
--- a/Documentation/ABI/stable/sysfs-bus-w1
+++ b/Documentation/ABI/stable/sysfs-bus-w1
@@ -6,6 +6,7 @@ Description: Bus scanning interval, microseconds component.
control systems are attached/generate presence for as short as
100 ms - hence the tens-to-hundreds milliseconds scan intervals
are required.
- see Documentation/w1/w1.generic for detailed information.
+
+ see Documentation/w1/w1-generic.rst for detailed information.
Users: any user space application which wants to know bus scanning
interval
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index e8b60bd766f7..480a89edfa05 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -19,7 +19,7 @@ Date: April 2011
KernelVersion: 3.0
Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Description:
- The major:minor number (in hexidecimal) of the
+ The major:minor number (in hexadecimal) of the
physical device providing the storage for this backend
block device.
diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight
index 70302f370e7e..40b8c46b95b2 100644
--- a/Documentation/ABI/stable/sysfs-class-backlight
+++ b/Documentation/ABI/stable/sysfs-class-backlight
@@ -3,9 +3,11 @@ Date: April 2005
KernelVersion: 2.6.12
Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
- Control BACKLIGHT power, values are FB_BLANK_* from fb.h
- - FB_BLANK_UNBLANK (0) : power on.
- - FB_BLANK_POWERDOWN (4) : power off
+ Control BACKLIGHT power, values are compatible with
+ FB_BLANK_* from fb.h
+
+ - 0 (FB_BLANK_UNBLANK) : power on.
+ - 4 (FB_BLANK_POWERDOWN) : power off
Users: HAL
What: /sys/class/backlight/<backlight>/brightness
@@ -24,7 +26,12 @@ Date: March 2006
KernelVersion: 2.6.17
Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
- Show the actual brightness by querying the hardware.
+ Show the actual brightness by querying the hardware. Due
+ to implementation differences in hardware this may not
+ match the value in 'brightness'. For example some hardware
+ may treat blanking differently or have custom power saving
+ features. Userspace should generally use the values in
+ 'brightness' to make decisions.
Users: HAL
What: /sys/class/backlight/<backlight>/max_brightness
diff --git a/Documentation/ABI/stable/sysfs-class-bluetooth b/Documentation/ABI/stable/sysfs-class-bluetooth
new file mode 100644
index 000000000000..36be02471174
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-bluetooth
@@ -0,0 +1,9 @@
+What: /sys/class/bluetooth/hci<index>/reset
+Date: 14-Jan-2025
+KernelVersion: 6.13
+Contact: linux-bluetooth@vger.kernel.org
+Description: This write-only attribute allows users to trigger the vendor reset
+ method on the Bluetooth device when arbitrary data is written.
+ The reset may or may not be done through the device transport
+ (e.g., UART/USB), and can also be done through an out-of-band
+ approach such as GPIO.
diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband
index 17211ceb9bf4..694f23a03a28 100644
--- a/Documentation/ABI/stable/sysfs-class-infiniband
+++ b/Documentation/ABI/stable/sysfs-class-infiniband
@@ -8,12 +8,14 @@ Date: Apr, 2005
KernelVersion: v2.6.12
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ===========================================
node_type: (RO) Node type (CA, RNIC, usNIC, usNIC UDP,
switch or router)
node_guid: (RO) Node GUID
sys_image_guid: (RO) System image GUID
+ =============== ===========================================
What: /sys/class/infiniband/<device>/node_desc
@@ -47,6 +49,7 @@ KernelVersion: v2.6.12
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ===============================================
lid: (RO) Port LID
rate: (RO) Port data rate (active width * active
@@ -66,8 +69,9 @@ Description:
cap_mask: (RO) Port capability mask. 2 bits here are
settable- IsCommunicationManagementSupported
- (set when CM module is loaded) and IsSM (set via
- open of issmN file).
+ (set when CM module is loaded) and IsSM (set
+ via open of issmN file).
+ =============== ===============================================
What: /sys/class/infiniband/<device>/ports/<port-num>/link_layer
@@ -103,8 +107,7 @@ Date: Apr, 2005
KernelVersion: v2.6.12
Contact: linux-rdma@vger.kernel.org
Description:
- Errors info:
- -----------
+ **Errors info**:
symbol_error: (RO) Total number of minor link errors detected on
one or more physical lanes.
@@ -142,8 +145,7 @@ Description:
intervention. It can also indicate hardware issues or extremely
poor link signal integrity
- Data info:
- ---------
+ **Data info**:
port_xmit_data: (RO) Total number of data octets, divided by 4
(lanes), transmitted on all VLs. This is 64 bit counter
@@ -176,8 +178,7 @@ Description:
transmitted on all VLs from the port. This may include multicast
packets with errors.
- Misc info:
- ---------
+ **Misc info**:
port_xmit_discards: (RO) Total number of outbound packets
discarded by the port because the port is down or congested.
@@ -231,10 +232,10 @@ Description: The RoCE type of the associated GID resides at index <gid-index>.
or "RoCE v2" for RoCE v2 based GIDs.
-What: /sys/class/infiniband_mad/umadN/ibdev
-What: /sys/class/infiniband_mad/umadN/port
-What: /sys/class/infiniband_mad/issmN/ibdev
-What: /sys/class/infiniband_mad/issmN/port
+What: /sys/class/infiniband_mad/umad<N>/ibdev
+What: /sys/class/infiniband_mad/umad<N>/port
+What: /sys/class/infiniband_mad/issm<N>/ibdev
+What: /sys/class/infiniband_mad/issm<N>/port
Date: Apr, 2005
KernelVersion: v2.6.12
Contact: linux-rdma@vger.kernel.org
@@ -244,9 +245,11 @@ Description:
two umad devices and two issm devices, while a switch will have
one device of each type (for switch port 0).
+ ======= =====================================
ibdev: (RO) Show Infiniband (IB) device name
port: (RO) Display port number
+ ======= =====================================
What: /sys/class/infiniband_mad/abi_version
@@ -258,33 +261,18 @@ Description:
userspace ABI compatibility of umad & issm devices.
-What: /sys/class/infiniband_cm/ucmN/ibdev
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Display Infiniband (IB) device name
-
-
-What: /sys/class/infiniband_cm/abi_version
-Date: Oct, 2005
-KernelVersion: v2.6.14
-Contact: linux-rdma@vger.kernel.org
-Description:
- (RO) Value is incremented if any changes are made that break
- userspace ABI compatibility of ucm devices.
-
-
-What: /sys/class/infiniband_verbs/uverbsN/ibdev
-What: /sys/class/infiniband_verbs/uverbsN/abi_version
+What: /sys/class/infiniband_verbs/uverbs<N>/ibdev
+What: /sys/class/infiniband_verbs/uverbs<N>/abi_version
Date: Sept, 2005
KernelVersion: v2.6.14
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ===========================================
ibdev: (RO) Display Infiniband (IB) device name
abi_version: (RO) Show ABI version of IB device specific
interfaces.
+ =============== ===========================================
What: /sys/class/infiniband_verbs/abi_version
@@ -306,31 +294,14 @@ Date: Apr, 2005
KernelVersion: v2.6.12
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ================================================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Host Channel Adapter type: MT23108, MT25208
(MT23108 compat mode), MT25208 or MT25204
board_id: (RO) Manufacturing board ID
-
-
-sysfs interface for Chelsio T3 RDMA Driver (cxgb3)
---------------------------------------------------
-
-What: /sys/class/infiniband/cxgb3_X/hw_rev
-What: /sys/class/infiniband/cxgb3_X/hca_type
-What: /sys/class/infiniband/cxgb3_X/board_id
-Date: Feb, 2007
-KernelVersion: v2.6.21
-Contact: linux-rdma@vger.kernel.org
-Description:
- hw_rev: (RO) Hardware revision number
-
- hca_type: (RO) HCA type. Here it is a driver short name.
- It should normally match the name in its bus
- driver structure (e.g. pci_driver::name).
-
- board_id: (RO) Manufacturing board id
+ =============== ================================================
sysfs interface for Mellanox ConnectX HCA IB driver (mlx4)
@@ -343,11 +314,13 @@ Date: Sep, 2007
KernelVersion: v2.6.24
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ===============================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Host channel adapter type
board_id: (RO) Manufacturing board ID
+ =============== ===============================
What: /sys/class/infiniband/mlx4_X/iov/ports/<port-num>/gids/<n>
@@ -373,6 +346,7 @@ Description:
example, ports/1/pkeys/10 contains the value at index 10 in port
1's P_Key table.
+ ======================= ==========================================
gids/<n>: (RO) The physical port gids n = 0..127
admin_guids/<n>: (RW) Allows examining or changing the
@@ -382,7 +356,7 @@ Description:
pkeys/<n>: (RO) Displays the contents of the physical
key table n = 0..126
- mcgs/: (RO) Muticast group table
+ mcgs/: (RO) Multicast group table
<m>/gid_idx/0: (RO) Display the GID mapping m = 1..2
@@ -401,6 +375,7 @@ Description:
guest, whenever it uses its pkey index
1, will actually be using the real pkey
index 10.
+ ======================= ==========================================
What: /sys/class/infiniband/mlx4_X/iov/<pci-slot-num>/ports/<m>/smi_enabled
@@ -412,34 +387,19 @@ Description:
Enabling QP0 on VFs for selected VF/port. By default, no VFs are
enabled for QP0 operation.
- smi_enabled: (RO) Indicates whether smi is currently enabled
- for the indicated VF/port
+ ================= ==== ===========================================
+ smi_enabled: (RO) Indicates whether smi is currently enabled
+ for the indicated VF/port
- enable_smi_admin:(RW) Used by the admin to request that smi
- capability be enabled or disabled for the
- indicated VF/port. 0 = disable, 1 = enable.
+ enable_smi_admin: (RW) Used by the admin to request that smi
+ capability be enabled or disabled for the
+ indicated VF/port. 0 = disable, 1 = enable.
+ ================= ==== ===========================================
The requested enablement will occur at the next reset of the VF
(e.g. driver restart on the VM which owns the VF).
-sysfs interface for NetEffect RNIC Low-Level iWARP driver (nes)
----------------------------------------------------------------
-
-What: /sys/class/infiniband/nesX/hw_rev
-What: /sys/class/infiniband/nesX/hca_type
-What: /sys/class/infiniband/nesX/board_id
-Date: Feb, 2008
-KernelVersion: v2.6.25
-Contact: linux-rdma@vger.kernel.org
-Description:
- hw_rev: (RO) Hardware revision number
-
- hca_type: (RO) Host Channel Adapter type (NEX020)
-
- board_id: (RO) Manufacturing board id
-
-
sysfs interface for Chelsio T4/T5 RDMA driver (cxgb4)
-----------------------------------------------------
@@ -451,6 +411,7 @@ KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== =============================================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Driver short name. Should normally match
@@ -459,6 +420,7 @@ Description:
board_id: (RO) Manufacturing board id. (Vendor + device
information)
+ =============== =============================================
sysfs interface for Intel IB driver qib
@@ -479,6 +441,7 @@ Date: May, 2010
KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ======================================================
version: (RO) Display version information of installed software
and drivers.
@@ -505,9 +468,10 @@ Description:
chip_reset: (WO) Reset the chip if possible by writing
"reset" to this file. Only allowed if no user
contexts are open that use chip resources.
+ =============== ======================================================
-What: /sys/class/infiniband/qibX/ports/N/sl2vl/[0-15]
+What: /sys/class/infiniband/qibX/ports/<N>/sl2vl/[0-15]
Date: May, 2010
KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
@@ -516,34 +480,37 @@ Description:
the Service Level (SL). Listing the SL files returns the Virtual
Lane (VL) as programmed by the SL.
-What: /sys/class/infiniband/qibX/ports/N/CCMgtA/cc_settings_bin
-What: /sys/class/infiniband/qibX/ports/N/CCMgtA/cc_table_bin
+What: /sys/class/infiniband/qibX/ports/<N>/CCMgtA/cc_settings_bin
+What: /sys/class/infiniband/qibX/ports/<N>/CCMgtA/cc_table_bin
Date: May, 2010
KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
Description:
Per-port congestion control. Both are binary attributes.
- cc_table_bin: (RO) Congestion control table size followed by
+ =============== ================================================
+ cc_table_bin (RO) Congestion control table size followed by
table entries.
- cc_settings_bin:(RO) Congestion settings: port control, control
+ cc_settings_bin (RO) Congestion settings: port control, control
map and an array of 16 entries for the
congestion entries - increase, timer, event log
trigger threshold and the minimum injection rate
delay.
+ =============== ================================================
-What: /sys/class/infiniband/qibX/ports/N/linkstate/loopback
-What: /sys/class/infiniband/qibX/ports/N/linkstate/led_override
-What: /sys/class/infiniband/qibX/ports/N/linkstate/hrtbt_enable
-What: /sys/class/infiniband/qibX/ports/N/linkstate/status
-What: /sys/class/infiniband/qibX/ports/N/linkstate/status_str
+What: /sys/class/infiniband/qibX/ports/<N>/linkstate/loopback
+What: /sys/class/infiniband/qibX/ports/<N>/linkstate/led_override
+What: /sys/class/infiniband/qibX/ports/<N>/linkstate/hrtbt_enable
+What: /sys/class/infiniband/qibX/ports/<N>/linkstate/status
+What: /sys/class/infiniband/qibX/ports/<N>/linkstate/status_str
Date: May, 2010
KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
Description:
[to be documented]
+ =============== ===============================================
loopback: (WO)
led_override: (WO)
hrtbt_enable: (RW)
@@ -554,17 +521,18 @@ Description:
errors. Possible states are- "Initted",
"Present", "IB_link_up", "IB_configured" or
"Fatal_Hardware_Error".
-
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/rc_resends
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/seq_naks
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/rdma_seq
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/rnr_naks
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/other_naks
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/rc_timeouts
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/look_pkts
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/pkt_drops
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/dma_wait
-What: /sys/class/infiniband/qibX/ports/N/diag_counters/unaligned
+ =============== ===============================================
+
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/rc_resends
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/seq_naks
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/rdma_seq
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/rnr_naks
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/other_naks
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/rc_timeouts
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/look_pkts
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/pkt_drops
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/dma_wait
+What: /sys/class/infiniband/qibX/ports/<N>/diag_counters/unaligned
Date: May, 2010
KernelVersion: v2.6.35
Contact: linux-rdma@vger.kernel.org
@@ -602,6 +570,7 @@ Contact: Christian Benvenuti <benve@cisco.com>,
linux-rdma@vger.kernel.org
Description:
+ =============== ===============================================
board_id: (RO) Manufacturing board id
config: (RO) Report the configuration for this PF
@@ -614,6 +583,7 @@ Description:
iface: (RO) Shows which network interface this usNIC
entry is associated to (visible with ifconfig).
+ =============== ===============================================
What: /sys/class/infiniband/usnic_X/qpn/summary
What: /sys/class/infiniband/usnic_X/qpn/context
@@ -658,6 +628,7 @@ Date: May, 2016
KernelVersion: v4.6
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== =============================================
hw_rev: (RO) Hardware revision number
board_id: (RO) Manufacturing board id
@@ -676,73 +647,80 @@ Description:
available.
tempsense: (RO) Thermal sense information
+ =============== =============================================
-What: /sys/class/infiniband/hfi1_X/ports/N/CCMgtA/cc_settings_bin
-What: /sys/class/infiniband/hfi1_X/ports/N/CCMgtA/cc_table_bin
-What: /sys/class/infiniband/hfi1_X/ports/N/CCMgtA/cc_prescan
+What: /sys/class/infiniband/hfi1_X/ports/<N>/CCMgtA/cc_settings_bin
+What: /sys/class/infiniband/hfi1_X/ports/<N>/CCMgtA/cc_table_bin
+What: /sys/class/infiniband/hfi1_X/ports/<N>/CCMgtA/cc_prescan
Date: May, 2016
KernelVersion: v4.6
Contact: linux-rdma@vger.kernel.org
Description:
Per-port congestion control.
- cc_table_bin: (RO) CCA tables used by PSM2 Congestion control
+ =============== ================================================
+ cc_table_bin (RO) CCA tables used by PSM2 Congestion control
table size followed by table entries. Binary
attribute.
- cc_settings_bin:(RO) Congestion settings: port control, control
+ cc_settings_bin (RO) Congestion settings: port control, control
map and an array of 16 entries for the
congestion entries - increase, timer, event log
trigger threshold and the minimum injection rate
delay. Binary attribute.
- cc_prescan: (RW) enable prescanning for faster BECN
+ cc_prescan (RW) enable prescanning for faster BECN
response. Write "on" to enable and "off" to
disable.
+ =============== ================================================
-What: /sys/class/infiniband/hfi1_X/ports/N/sc2vl/[0-31]
-What: /sys/class/infiniband/hfi1_X/ports/N/sl2sc/[0-31]
-What: /sys/class/infiniband/hfi1_X/ports/N/vl2mtu/[0-15]
+What: /sys/class/infiniband/hfi1_X/ports/<N>/sc2vl/[0-31]
+What: /sys/class/infiniband/hfi1_X/ports/<N>/sl2sc/[0-31]
+What: /sys/class/infiniband/hfi1_X/ports/<N>/vl2mtu/[0-15]
Date: May, 2016
KernelVersion: v4.6
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ===================================================
sc2vl/: (RO) 32 files (0 - 31) used to translate sl->vl
sl2sc/: (RO) 32 files (0 - 31) used to translate sl->sc
vl2mtu/: (RO) 16 files (0 - 15) used to determine MTU for vl
+ =============== ===================================================
-What: /sys/class/infiniband/hfi1_X/sdma_N/cpu_list
-What: /sys/class/infiniband/hfi1_X/sdma_N/vl
+What: /sys/class/infiniband/hfi1_X/sdma_<N>/cpu_list
+What: /sys/class/infiniband/hfi1_X/sdma_<N>/vl
Date: Sept, 2016
KernelVersion: v4.8
Contact: linux-rdma@vger.kernel.org
Description:
sdma<N>/ contains one directory per sdma engine (0 - 15)
+ =============== ==============================================
cpu_list: (RW) List of cpus for user-process to sdma
engine assignment.
vl: (RO) Displays the virtual lane (vl) the sdma
engine maps to.
+ =============== ==============================================
This interface gives the user control on the affinity settings
for the device. As an example, to set an sdma engine irq
affinity and thread affinity of a user processes to use the
sdma engine, which is "near" in terms of NUMA configuration, or
- physical cpu location, the user will do:
+ physical cpu location, the user will do::
- echo "3" > /proc/irq/<N>/smp_affinity_list
- echo "4-7" > /sys/devices/.../sdma3/cpu_list
- cat /sys/devices/.../sdma3/vl
- 0
- echo "8" > /proc/irq/<M>/smp_affinity_list
- echo "9-12" > /sys/devices/.../sdma4/cpu_list
- cat /sys/devices/.../sdma4/vl
- 1
+ echo "3" > /proc/irq/<N>/smp_affinity_list
+ echo "4-7" > /sys/devices/.../sdma3/cpu_list
+ cat /sys/devices/.../sdma3/vl
+ 0
+ echo "8" > /proc/irq/<M>/smp_affinity_list
+ echo "9-12" > /sys/devices/.../sdma4/cpu_list
+ cat /sys/devices/.../sdma4/vl
+ 1
to make sure that when a process runs on cpus 4,5,6, or 7, and
uses vl=0, then sdma engine 3 is selected by the driver, and
@@ -753,24 +731,6 @@ Description:
is the irq number of "sdma3", and M is irq number of "sdma4" in
the /proc/interrupts file.
-
-sysfs interface for Intel(R) X722 iWARP i40iw driver
-----------------------------------------------------
-
-What: /sys/class/infiniband/i40iwX/hw_rev
-What: /sys/class/infiniband/i40iwX/hca_type
-What: /sys/class/infiniband/i40iwX/board_id
-Date: Jan, 2016
-KernelVersion: v4.10
-Contact: linux-rdma@vger.kernel.org
-Description:
- hw_rev: (RO) Hardware revision number
-
- hca_type: (RO) Show HCA type (I40IW)
-
- board_id: (RO) I40IW board ID
-
-
sysfs interface for QLogic qedr NIC Driver
------------------------------------------
@@ -781,9 +741,11 @@ KernelVersion: v4.10
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ==== ========================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Display HCA type
+ =============== ==== ========================
sysfs interface for VMware Paravirtual RDMA driver
@@ -797,11 +759,13 @@ KernelVersion: v4.10
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ==== =====================================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Host channel adapter type
board_id: (RO) Display PVRDMA manufacturing board ID
+ =============== ==== =====================================
sysfs interface for Broadcom NetXtreme-E RoCE driver
@@ -813,6 +777,8 @@ Date: Feb, 2017
KernelVersion: v4.11
Contact: linux-rdma@vger.kernel.org
Description:
+ =============== ==== =========================
hw_rev: (RO) Hardware revision number
hca_type: (RO) Host channel adapter type
+ =============== ==== =========================
diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index 80151a409d67..67b605e3dd16 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -1,8 +1,8 @@
rfkill - radio frequency (RF) connector kill switch support
-For details to this subsystem look at Documentation/rfkill.txt.
+For details to this subsystem look at Documentation/driver-api/rfkill.rst.
-For the deprecated /sys/class/rfkill/*/claim knobs of this interface look in
+For the deprecated ``/sys/class/rfkill/*/claim`` knobs of this interface look in
Documentation/ABI/removed/sysfs-class-rfkill.
What: /sys/class/rfkill
@@ -16,7 +16,7 @@ Description: The rfkill class subsystem folder.
What: /sys/class/rfkill/rfkill[0-9]+/name
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Name assigned by driver to this key (interface or driver name).
Values: arbitrary string.
@@ -24,7 +24,7 @@ Values: arbitrary string.
What: /sys/class/rfkill/rfkill[0-9]+/type
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Driver type string ("wlan", "bluetooth", etc).
Values: See include/linux/rfkill.h.
@@ -32,18 +32,19 @@ Values: See include/linux/rfkill.h.
What: /sys/class/rfkill/rfkill[0-9]+/persistent
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Whether the soft blocked state is initialised from non-volatile
storage at startup.
-Values: A numeric value.
- 0: false
- 1: true
+Values: A numeric value:
+
+ - 0: false
+ - 1: true
What: /sys/class/rfkill/rfkill[0-9]+/state
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Current state of the transmitter.
This file was scheduled to be removed in 2014, but due to its
@@ -54,6 +55,7 @@ Description: Current state of the transmitter.
through this interface. There will likely be another attempt to
remove it in the future.
Values: A numeric value.
+
0: RFKILL_STATE_SOFT_BLOCKED
transmitter is turned off by software
1: RFKILL_STATE_UNBLOCKED
@@ -65,10 +67,11 @@ Values: A numeric value.
What: /sys/class/rfkill/rfkill[0-9]+/hard
Date: 12-March-2010
-KernelVersion v2.6.34
+KernelVersion: v2.6.34
Contact: linux-wireless@vger.kernel.org
Description: Current hardblock state. This file is read only.
Values: A numeric value.
+
0: inactive
The transmitter is (potentially) active.
1: active
@@ -78,11 +81,13 @@ Values: A numeric value.
What: /sys/class/rfkill/rfkill[0-9]+/soft
Date: 12-March-2010
-KernelVersion v2.6.34
+KernelVersion: v2.6.34
Contact: linux-wireless@vger.kernel.org
Description: Current softblock state. This file is read and write.
Values: A numeric value.
+
0: inactive
The transmitter is (potentially) active.
+
1: active
The transmitter is turned off by software.
diff --git a/Documentation/ABI/stable/sysfs-class-tpm b/Documentation/ABI/stable/sysfs-class-tpm
index c0e23830f56a..411d5895bed4 100644
--- a/Documentation/ABI/stable/sysfs-class-tpm
+++ b/Documentation/ABI/stable/sysfs-class-tpm
@@ -1,7 +1,7 @@
What: /sys/class/tpm/tpmX/device/
Date: April 2005
KernelVersion: 2.6.12
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The device/ directory under a specific TPM instance exposes
the properties of that TPM chip
@@ -9,7 +9,7 @@ Description: The device/ directory under a specific TPM instance exposes
What: /sys/class/tpm/tpmX/device/active
Date: April 2006
KernelVersion: 2.6.17
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "active" property prints a '1' if the TPM chip is accepting
commands. An inactive TPM chip still contains all the state of
an active chip (Storage Root Key, NVRAM, etc), and can be
@@ -21,7 +21,7 @@ Description: The "active" property prints a '1' if the TPM chip is accepting
What: /sys/class/tpm/tpmX/device/cancel
Date: June 2005
KernelVersion: 2.6.13
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "cancel" property allows you to cancel the currently
pending TPM command. Writing any value to cancel will call the
TPM vendor specific cancel operation.
@@ -29,14 +29,14 @@ Description: The "cancel" property allows you to cancel the currently
What: /sys/class/tpm/tpmX/device/caps
Date: April 2005
KernelVersion: 2.6.12
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "caps" property contains TPM manufacturer and version info.
- Example output:
+ Example output::
- Manufacturer: 0x53544d20
- TCG version: 1.2
- Firmware version: 8.16
+ Manufacturer: 0x53544d20
+ TCG version: 1.2
+ Firmware version: 8.16
Manufacturer is a hex dump of the 4 byte manufacturer info
space in a TPM. TCG version shows the TCG TPM spec level that
@@ -46,7 +46,7 @@ Description: The "caps" property contains TPM manufacturer and version info.
What: /sys/class/tpm/tpmX/device/durations
Date: March 2011
KernelVersion: 3.1
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "durations" property shows the 3 vendor-specific values
used to wait for a short, medium and long TPM command. All
TPM commands are categorized as short, medium or long in
@@ -54,9 +54,9 @@ Description: The "durations" property shows the 3 vendor-specific values
any longer than necessary before starting to poll for a
result.
- Example output:
+ Example output::
- 3015000 4508000 180995000 [original]
+ 3015000 4508000 180995000 [original]
Here the short, medium and long durations are displayed in
usecs. "[original]" indicates that the values are displayed
@@ -69,7 +69,7 @@ Description: The "durations" property shows the 3 vendor-specific values
What: /sys/class/tpm/tpmX/device/enabled
Date: April 2006
KernelVersion: 2.6.17
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "enabled" property prints a '1' if the TPM chip is enabled,
meaning that it should be visible to the OS. This property
may be visible but produce a '0' after some operation that
@@ -78,7 +78,7 @@ Description: The "enabled" property prints a '1' if the TPM chip is enabled,
What: /sys/class/tpm/tpmX/device/owned
Date: April 2006
KernelVersion: 2.6.17
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "owned" property produces a '1' if the TPM_TakeOwnership
ordinal has been executed successfully in the chip. A '0'
indicates that ownership hasn't been taken.
@@ -86,20 +86,20 @@ Description: The "owned" property produces a '1' if the TPM_TakeOwnership
What: /sys/class/tpm/tpmX/device/pcrs
Date: April 2005
KernelVersion: 2.6.12
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "pcrs" property will dump the current value of all Platform
Configuration Registers in the TPM. Note that since these
values may be constantly changing, the output is only valid
for a snapshot in time.
- Example output:
+ Example output::
- PCR-00: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
- PCR-01: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
- PCR-02: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
- PCR-03: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
- PCR-04: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
- ...
+ PCR-00: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-01: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-02: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-03: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-04: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ ...
The number of PCRs and hex bytes needed to represent a PCR
value will vary depending on TPM chip version. For TPM 1.1 and
@@ -109,7 +109,7 @@ Description: The "pcrs" property will dump the current value of all Platform
What: /sys/class/tpm/tpmX/device/pubek
Date: April 2005
KernelVersion: 2.6.12
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "pubek" property will return the TPM's public endorsement
key if possible. If the TPM has had ownership established and
is version 1.2, the pubek will not be available without the
@@ -119,49 +119,49 @@ Description: The "pubek" property will return the TPM's public endorsement
ated at TPM manufacture time and exists for the life of the
chip.
- Example output:
-
- Algorithm: 00 00 00 01
- Encscheme: 00 03
- Sigscheme: 00 01
- Parameters: 00 00 08 00 00 00 00 02 00 00 00 00
- Modulus length: 256
- Modulus:
- B4 76 41 82 C9 20 2C 10 18 40 BC 8B E5 44 4C 6C
- 3A B2 92 0C A4 9B 2A 83 EB 5C 12 85 04 48 A0 B6
- 1E E4 81 84 CE B2 F2 45 1C F0 85 99 61 02 4D EB
- 86 C4 F7 F3 29 60 52 93 6B B2 E5 AB 8B A9 09 E3
- D7 0E 7D CA 41 BF 43 07 65 86 3C 8C 13 7A D0 8B
- 82 5E 96 0B F8 1F 5F 34 06 DA A2 52 C1 A9 D5 26
- 0F F4 04 4B D9 3F 2D F2 AC 2F 74 64 1F 8B CD 3E
- 1E 30 38 6C 70 63 69 AB E2 50 DF 49 05 2E E1 8D
- 6F 78 44 DA 57 43 69 EE 76 6C 38 8A E9 8E A3 F0
- A7 1F 3C A8 D0 12 15 3E CA 0E BD FA 24 CD 33 C6
- 47 AE A4 18 83 8E 22 39 75 93 86 E6 FD 66 48 B6
- 10 AD 94 14 65 F9 6A 17 78 BD 16 53 84 30 BF 70
- E0 DC 65 FD 3C C6 B0 1E BF B9 C1 B5 6C EF B1 3A
- F8 28 05 83 62 26 11 DC B4 6B 5A 97 FF 32 26 B6
- F7 02 71 CF 15 AE 16 DD D1 C1 8E A8 CF 9B 50 7B
- C3 91 FF 44 1E CF 7C 39 FE 17 77 21 20 BD CE 9B
-
- Possible values:
-
- Algorithm: TPM_ALG_RSA (1)
- Encscheme: TPM_ES_RSAESPKCSv15 (2)
+ Example output::
+
+ Algorithm: 00 00 00 01
+ Encscheme: 00 03
+ Sigscheme: 00 01
+ Parameters: 00 00 08 00 00 00 00 02 00 00 00 00
+ Modulus length: 256
+ Modulus:
+ B4 76 41 82 C9 20 2C 10 18 40 BC 8B E5 44 4C 6C
+ 3A B2 92 0C A4 9B 2A 83 EB 5C 12 85 04 48 A0 B6
+ 1E E4 81 84 CE B2 F2 45 1C F0 85 99 61 02 4D EB
+ 86 C4 F7 F3 29 60 52 93 6B B2 E5 AB 8B A9 09 E3
+ D7 0E 7D CA 41 BF 43 07 65 86 3C 8C 13 7A D0 8B
+ 82 5E 96 0B F8 1F 5F 34 06 DA A2 52 C1 A9 D5 26
+ 0F F4 04 4B D9 3F 2D F2 AC 2F 74 64 1F 8B CD 3E
+ 1E 30 38 6C 70 63 69 AB E2 50 DF 49 05 2E E1 8D
+ 6F 78 44 DA 57 43 69 EE 76 6C 38 8A E9 8E A3 F0
+ A7 1F 3C A8 D0 12 15 3E CA 0E BD FA 24 CD 33 C6
+ 47 AE A4 18 83 8E 22 39 75 93 86 E6 FD 66 48 B6
+ 10 AD 94 14 65 F9 6A 17 78 BD 16 53 84 30 BF 70
+ E0 DC 65 FD 3C C6 B0 1E BF B9 C1 B5 6C EF B1 3A
+ F8 28 05 83 62 26 11 DC B4 6B 5A 97 FF 32 26 B6
+ F7 02 71 CF 15 AE 16 DD D1 C1 8E A8 CF 9B 50 7B
+ C3 91 FF 44 1E CF 7C 39 FE 17 77 21 20 BD CE 9B
+
+ Possible values::
+
+ Algorithm: TPM_ALG_RSA (1)
+ Encscheme: TPM_ES_RSAESPKCSv15 (2)
TPM_ES_RSAESOAEP_SHA1_MGF1 (3)
- Sigscheme: TPM_SS_NONE (1)
- Parameters, a byte string of 3 u32 values:
+ Sigscheme: TPM_SS_NONE (1)
+ Parameters, a byte string of 3 u32 values:
Key Length (bits): 00 00 08 00 (2048)
Num primes: 00 00 00 02 (2)
Exponent Size: 00 00 00 00 (0 means the
default exp)
- Modulus Length: 256 (bytes)
- Modulus: The 256 byte Endorsement Key modulus
+ Modulus Length: 256 (bytes)
+ Modulus: The 256 byte Endorsement Key modulus
What: /sys/class/tpm/tpmX/device/temp_deactivated
Date: April 2006
KernelVersion: 2.6.17
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "temp_deactivated" property returns a '1' if the chip has
been temporarily deactivated, usually until the next power
cycle. Whether a warm boot (reboot) will clear a TPM chip
@@ -170,16 +170,41 @@ Description: The "temp_deactivated" property returns a '1' if the chip has
What: /sys/class/tpm/tpmX/device/timeouts
Date: March 2011
KernelVersion: 3.1
-Contact: tpmdd-devel@lists.sf.net
+Contact: linux-integrity@vger.kernel.org
Description: The "timeouts" property shows the 4 vendor-specific values
for the TPM's interface spec timeouts. The use of these
timeouts is defined by the TPM interface spec that the chip
conforms to.
- Example output:
+ Example output::
- 750000 750000 750000 750000 [original]
+ 750000 750000 750000 750000 [original]
The four timeout values are shown in usecs, with a trailing
"[original]" or "[adjusted]" depending on whether the values
were scaled by the driver to be reported in usec from msecs.
+
+What: /sys/class/tpm/tpmX/tpm_version_major
+Date: October 2019
+KernelVersion: 5.5
+Contact: linux-integrity@vger.kernel.org
+Description: The "tpm_version_major" property shows the TCG spec major version
+ implemented by the TPM device.
+
+ Example output::
+
+ 2
+
+What: /sys/class/tpm/tpmX/pcr-<H>/<N>
+Date: March 2021
+KernelVersion: 5.12
+Contact: linux-integrity@vger.kernel.org
+Description: produces output in compact hex representation for PCR
+ number N from hash bank H. N is the numeric value of
+ the PCR number and H is the crypto string
+ representation of the hash
+
+ Example output::
+
+ cat /sys/class/tpm/tpm0/pcr-sha256/7
+ 2ED93F199692DC6788EFA6A1FE74514AB9760B2A6CEEAEF6C808C13E4ABB0D42
diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices
index 4404bd9b96c1..98a8ef99ac5f 100644
--- a/Documentation/ABI/stable/sysfs-devices
+++ b/Documentation/ABI/stable/sysfs-devices
@@ -1,5 +1,6 @@
-# Note: This documents additional properties of any device beyond what
-# is documented in Documentation/admin-guide/sysfs-rules.rst
+Note:
+ This documents additional properties of any device beyond what
+ is documented in Documentation/admin-guide/sysfs-rules.rst
What: /sys/devices/*/of_node
Date: February 2015
@@ -22,3 +23,10 @@ Contact: Device Tree mailing list <devicetree@vger.kernel.org>
Description:
If CONFIG_OF is enabled, then this file is present. When
read, it returns full name of the device node.
+
+What: /sys/devices/*/dev
+Date: Jun 2006
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ Major and minor numbers of the character device corresponding
+ to the device (in <major>:<minor> format).
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 3e90e1f3bf0a..2d0e023f22a7 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -54,14 +54,14 @@ Date: October 2002
Contact: Linux Memory Management list <linux-mm@kvack.org>
Description:
Provides information about the node's distribution and memory
- utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
+ utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.rst
What: /sys/devices/system/node/nodeX/numastat
Date: October 2002
Contact: Linux Memory Management list <linux-mm@kvack.org>
Description:
The node's hit/miss statistics, in units of pages.
- See Documentation/numastat.txt
+ See Documentation/admin-guide/numastat.rst
What: /sys/devices/system/node/nodeX/distance
Date: October 2002
@@ -90,4 +90,149 @@ Date: December 2009
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
Description:
The node's huge page size control/query attributes.
- See Documentation/admin-guide/mm/hugetlbpage.rst \ No newline at end of file
+ See Documentation/admin-guide/mm/hugetlbpage.rst
+
+What: /sys/devices/system/node/nodeX/accessY/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The node's relationship to other nodes for access class "Y".
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing symlinks to memory initiator
+ nodes that have class "Y" access to this target node's
+ memory. CPUs and other memory initiators in nodes not in
+ the list accessing this node's memory may have different
+ performance.
+
+What: /sys/devices/system/node/nodeX/accessY/targets/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing symlinks to memory targets that
+ this initiator node has class "Y" access.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's read bandwidth in MB/s when accessed from
+ nodes found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/read_latency
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's read latency in nanoseconds when accessed
+ from nodes found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's write bandwidth in MB/s when accessed from
+ found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/write_latency
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's write latency in nanoseconds when access
+ from nodes found in this class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing attributes for the memory-side cache
+ level 'Y'.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/indexing
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The caches associativity indexing: 0 for direct mapped,
+ non-zero if indexed.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/line_size
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The number of bytes accessed from the next cache level on a
+ cache miss.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/size
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The size of this memory side cache in bytes.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/write_policy
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The cache write policy: 0 for write-back, 1 for write-through,
+ other or unknown.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode
+Date: March 2025
+Contact: Dave Jiang <dave.jiang@intel.com>
+Description:
+ The address mode: 0 for reserved, 1 for extended-linear.
+
+What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes
+Date: November 2021
+Contact: Jarkko Sakkinen <jarkko@kernel.org>
+Description:
+ The total amount of SGX physical memory in bytes.
+
+What: /sys/devices/system/node/nodeX/memory_failure/total
+Date: January 2023
+Contact: Jiaqi Yan <jiaqiyan@google.com>
+Description:
+ The total number of raw poisoned pages (pages containing
+ corrupted data due to memory errors) on a NUMA node.
+
+What: /sys/devices/system/node/nodeX/memory_failure/ignored
+Date: January 2023
+Contact: Jiaqi Yan <jiaqiyan@google.com>
+Description:
+ Of the raw poisoned pages on a NUMA node, how many pages are
+ ignored by memory error recovery attempt, usually because
+ support for this type of pages is unavailable, and kernel
+ gives up the recovery.
+
+What: /sys/devices/system/node/nodeX/memory_failure/failed
+Date: January 2023
+Contact: Jiaqi Yan <jiaqiyan@google.com>
+Description:
+ Of the raw poisoned pages on a NUMA node, how many pages are
+ failed by memory error recovery attempt. This usually means
+ a key recovery operation failed.
+
+What: /sys/devices/system/node/nodeX/memory_failure/delayed
+Date: January 2023
+Contact: Jiaqi Yan <jiaqiyan@google.com>
+Description:
+ Of the raw poisoned pages on a NUMA node, how many pages are
+ delayed by memory error recovery attempt. Delayed poisoned
+ pages usually will be retried by kernel.
+
+What: /sys/devices/system/node/nodeX/memory_failure/recovered
+Date: January 2023
+Contact: Jiaqi Yan <jiaqiyan@google.com>
+Description:
+ Of the raw poisoned pages on a NUMA node, how many pages are
+ recovered by memory error recovery attempt.
+
+What: /sys/devices/system/node/nodeX/reclaim
+Date: June 2025
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Perform user-triggered proactive reclaim on a NUMA node.
+ This interface is equivalent to the memcg variant.
+
+ See Documentation/admin-guide/cgroup-v2.rst
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 33c133e2a631..cf78bd99f6c8 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -23,3 +23,95 @@ Description: Default value for the Data Stream Control Register (DSCR) on
here).
If set by a process it will be inherited by child processes.
Values: 64 bit unsigned integer (bit field)
+
+What: /sys/devices/system/cpu/cpuX/topology/die_id
+Description: the CPU die ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+Values: integer
+
+What: /sys/devices/system/cpu/cpuX/topology/core_id
+Description: the CPU core ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+Values: integer
+
+What: /sys/devices/system/cpu/cpuX/topology/cluster_id
+Description: the cluster ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+Values: integer
+
+What: /sys/devices/system/cpu/cpuX/topology/book_id
+Description: the book ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent. it's only used on s390.
+Values: integer
+
+What: /sys/devices/system/cpu/cpuX/topology/drawer_id
+Description: the drawer ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent. it's only used on s390.
+Values: integer
+
+What: /sys/devices/system/cpu/cpuX/topology/core_cpus
+Description: internal kernel map of CPUs within the same core.
+ (deprecated name: "thread_siblings")
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/core_cpus_list
+Description: human-readable list of CPUs within the same core.
+ The format is like 0-3, 8-11, 14,17.
+ (deprecated name: "thread_siblings_list").
+Values: decimal list.
+
+What: /sys/devices/system/cpu/cpuX/topology/package_cpus
+Description: internal kernel map of the CPUs sharing the same physical_package_id.
+ (deprecated name: "core_siblings").
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/package_cpus_list
+Description: human-readable list of CPUs sharing the same physical_package_id.
+ The format is like 0-3, 8-11, 14,17.
+ (deprecated name: "core_siblings_list")
+Values: decimal list.
+
+What: /sys/devices/system/cpu/cpuX/topology/die_cpus
+Description: internal kernel map of CPUs within the same die.
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
+Description: human-readable list of CPUs within the same die.
+ The format is like 0-3, 8-11, 14,17.
+Values: decimal list.
+
+What: /sys/devices/system/cpu/cpuX/topology/cluster_cpus
+Description: internal kernel map of CPUs within the same cluster.
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/cluster_cpus_list
+Description: human-readable list of CPUs within the same cluster.
+ The format is like 0-3, 8-11, 14,17.
+Values: decimal list.
+
+What: /sys/devices/system/cpu/cpuX/topology/book_siblings
+Description: internal kernel map of cpuX's hardware threads within the same
+ book_id. it's only used on s390.
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/book_siblings_list
+Description: human-readable list of cpuX's hardware threads within the same
+ book_id.
+ The format is like 0-3, 8-11, 14,17. it's only used on s390.
+Values: decimal list.
+
+What: /sys/devices/system/cpu/cpuX/topology/drawer_siblings
+Description: internal kernel map of cpuX's hardware threads within the same
+ drawer_id. it's only used on s390.
+Values: hexadecimal bitmask.
+
+What: /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list
+Description: human-readable list of cpuX's hardware threads within the same
+ drawer_id.
+ The format is like 0-3, 8-11, 14,17. it's only used on s390.
+Values: decimal list.
diff --git a/Documentation/ABI/stable/sysfs-driver-aspeed-vuart b/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
index 8062953ce77b..950cafc9443a 100644
--- a/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
+++ b/Documentation/ABI/stable/sysfs-driver-aspeed-vuart
@@ -6,10 +6,19 @@ Description: Configures which IO port the host side of the UART
Users: OpenBMC. Proposed changes should be mailed to
openbmc@lists.ozlabs.org
-What: /sys/bus/platform/drivers/aspeed-vuart*/sirq
+What: /sys/bus/platform/drivers/aspeed-vuart/*/sirq
Date: April 2017
Contact: Jeremy Kerr <jk@ozlabs.org>
Description: Configures which interrupt number the host side of
the UART will appear on the host <-> BMC LPC bus.
Users: OpenBMC. Proposed changes should be mailed to
openbmc@lists.ozlabs.org
+
+What: /sys/bus/platform/drivers/aspeed-vuart/*/sirq_polarity
+Date: July 2019
+Contact: Oskar Senft <osk@google.com>
+Description: Configures the polarity of the serial interrupt to the
+ host via the BMC LPC bus.
+ Set to 0 for active-low or 1 for active-high.
+Users: OpenBMC. Proposed changes should be mailed to
+ openbmc@lists.ozlabs.org
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
new file mode 100644
index 000000000000..4a355e6747ae
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -0,0 +1,361 @@
+What: /sys/bus/dsa/devices/dsa<m>/version
+Date: Apr 15, 2020
+KernelVersion: 5.8.0
+Contact: dmaengine@vger.kernel.org
+Description: The hardware version number.
+
+What: /sys/bus/dsa/devices/dsa<m>/cdev_major
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The major number that the character device driver assigned to
+ this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/errors
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The error information for this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_batch_size
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The largest number of work descriptors in a batch.
+ It's not visible when the device does not support batch.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_work_queues_size
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The maximum work queue size supported by this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_engines
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The maximum number of engines supported by this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_groups
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The maximum number of groups can be created under this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_read_buffers
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: The total number of read buffers supported by this device.
+ The read buffers represent resources within the DSA
+ implementation, and these resources are allocated by engines to
+ support operations. See DSA spec v1.2 9.2.4 Total Read Buffers.
+ It's not visible when the device does not support Read Buffer
+ allocation control.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_transfer_size
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The number of bytes to be read from the source address to
+ perform the operation. The maximum transfer size is dependent on
+ the workqueue the descriptor was submitted to.
+
+What: /sys/bus/dsa/devices/dsa<m>/max_work_queues
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The maximum work queue number that this device supports.
+
+What: /sys/bus/dsa/devices/dsa<m>/numa_node
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The numa node number for this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/op_cap
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The operation capability bit mask specify the operation types
+ supported by the this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/pasid_enabled
+Date: Oct 27, 2020
+KernelVersion: 5.11.0
+Contact: dmaengine@vger.kernel.org
+Description: To indicate if user PASID (process address space identifier) is
+ enabled or not for this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/state
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The state information of this device. It can be either enabled
+ or disabled.
+
+What: /sys/bus/dsa/devices/dsa<m>/group<m>.<n>
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The assigned group under this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/engine<m>.<n>
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The assigned engine under this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/wq<m>.<n>
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The assigned work queue under this device.
+
+What: /sys/bus/dsa/devices/dsa<m>/configurable
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: To indicate if this device is configurable or not.
+
+What: /sys/bus/dsa/devices/dsa<m>/read_buffer_limit
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: The maximum number of read buffers that may be in use at
+ one time by operations that access low bandwidth memory in the
+ device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit.
+ It's not visible when the device does not support Read Buffer
+ allocation control.
+
+What: /sys/bus/dsa/devices/dsa<m>/cmd_status
+Date: Aug 28, 2020
+KernelVersion: 5.10.0
+Contact: dmaengine@vger.kernel.org
+Description: The last executed device administrative command's status/error.
+ Also last configuration error overloaded.
+ Writing to it will clear the status.
+
+What: /sys/bus/dsa/devices/dsa<m>/iaa_cap
+Date: Sept 14, 2022
+KernelVersion: 6.0.0
+Contact: dmaengine@vger.kernel.org
+Description: IAA (IAX) capability mask. Exported to user space for application
+ consumption. This attribute should only be visible on IAA devices
+ that are version 2 or later.
+
+What: /sys/bus/dsa/devices/dsa<m>/event_log_size
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: The event log size to be configured. Default is 64 entries and
+ occupies 4k size if the evl entry is 64 bytes. It's visible
+ only on platforms that support the capability.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
+Date: Oct 27, 2020
+KernelVersion: 5.11.0
+Contact: dmaengine@vger.kernel.org
+Description: To indicate block on fault is allowed or not for the work queue
+ to support on demand paging.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/group_id
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The group id that this work queue belongs to.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/size
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The work queue size for this work queue.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/type
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The type of this work queue, it can be "kernel" type for work
+ queue usages in the kernel space or "user" type for work queue
+ usages by applications in user space.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/cdev_minor
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The minor number assigned to this work queue by the character
+ device driver.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/mode
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The work queue mode type for this work queue.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/priority
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The priority value of this work queue, it is a value relative to
+ other work queue in the same group to control quality of service
+ for dispatching work from multiple workqueues in the same group.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/state
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The current state of the work queue.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/threshold
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The number of entries in this work queue that may be filled
+ via a limited portal.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/max_transfer_size
+Date: Aug 28, 2020
+KernelVersion: 5.10.0
+Contact: dmaengine@vger.kernel.org
+Description: The max transfer sized for this workqueue. Cannot exceed device
+ max transfer size. Configurable parameter.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/max_batch_size
+Date: Aug 28, 2020
+KernelVersion: 5.10.0
+Contact: dmaengine@vger.kernel.org
+Description: The max batch size for this workqueue. Cannot exceed device
+ max batch size. Configurable parameter.
+ It's not visible when the device does not support batch.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/ats_disable
+Date: Nov 13, 2020
+KernelVersion: 5.11.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicate whether ATS disable is turned on for the workqueue.
+ 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/prs_disable
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Controls whether PRS disable is turned on for the workqueue.
+ 0 indicates PRS is on, and 1 indicates PRS is off for the
+ workqueue. This option overrides block_on_fault attribute
+ if set. It's visible only on platforms that support the
+ capability.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/occupancy
+Date: May 25, 2021
+KernelVersion: 5.14.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the current number of entries in this WQ if WQ Occupancy
+ Support bit WQ capabilities is 1.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/enqcmds_retries
+Date: Oct 29, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicate the number of retires for an enqcmds submission on a sharedwq.
+ A max value to set attribute is capped at 64.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/op_config
+Date: Sept 14, 2022
+KernelVersion: 6.0.0
+Contact: dmaengine@vger.kernel.org
+Description: Shows the operation capability bits displayed in bitmap format
+ presented by %*pb printk() output format specifier.
+ The attribute can be configured when the WQ is disabled in
+ order to configure the WQ to accept specific bits that
+ correlates to the operations allowed. It's visible only
+ on platforms that support the capability.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/driver_name
+Date: Sept 8, 2023
+KernelVersion: 6.7.0
+Contact: dmaengine@vger.kernel.org
+Description: Name of driver to be bounded to the wq.
+
+What: /sys/bus/dsa/devices/engine<m>.<n>/group_id
+Date: Oct 25, 2019
+KernelVersion: 5.6.0
+Contact: dmaengine@vger.kernel.org
+Description: The group that this engine belongs to.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/use_read_buffer_limit
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Enable the use of global read buffer limit for the group. See DSA
+ spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit.
+ It's not visible when the device does not support Read Buffer
+ allocation control.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_allowed
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicates max number of read buffers that may be in use at one time
+ by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read
+ Buffers Allowed.
+ It's not visible when the device does not support Read Buffer
+ allocation control.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_reserved
+Date: Dec 10, 2021
+KernelVersion: 5.17.0
+Contact: dmaengine@vger.kernel.org
+Description: Indicates the number of Read Buffers reserved for the use of
+ engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
+ Reserved.
+ It's not visible when the device does not support Read Buffer
+ allocation control.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/desc_progress_limit
+Date: Sept 14, 2022
+KernelVersion: 6.0.0
+Contact: dmaengine@vger.kernel.org
+Description: Allows control of the number of work descriptors that can be
+ concurrently processed by an engine in the group as a fraction
+ of the Maximum Work Descriptors in Progress value specified in
+ the ENGCAP register. The acceptable values are 0 (default),
+ 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of
+ the max value). It's visible only on platforms that support
+ the capability.
+
+What: /sys/bus/dsa/devices/group<m>.<n>/batch_progress_limit
+Date: Sept 14, 2022
+KernelVersion: 6.0.0
+Contact: dmaengine@vger.kernel.org
+Description: Allows control of the number of batch descriptors that can be
+ concurrently processed by an engine in the group as a fraction
+ of the Maximum Batch Descriptors in Progress value specified in
+ the ENGCAP register. The acceptable values are 0 (default),
+ 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of
+ the max value). It's visible only on platforms that support
+ the capability.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/cr_faults
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the number of Completion Record (CR) faults this application
+ has caused.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/cr_fault_failures
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the number of Completion Record (CR) faults failures that this
+ application has caused. The failure counter is incremented when the
+ driver cannot fault in the address for the CR. Typically this is caused
+ by a bad address programmed in the submitted descriptor or a malicious
+ submitter is using bad CR address on purpose.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/pid
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the process id of the application that opened the file. This is
+ helpful information for a monitor daemon that wants to kill the
+ application that opened the file.
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-ioatdma b/Documentation/ABI/stable/sysfs-driver-dma-ioatdma
index 420c1d09e42f..3a4e2cd0ddcc 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-ioatdma
+++ b/Documentation/ABI/stable/sysfs-driver-dma-ioatdma
@@ -1,29 +1,29 @@
-What: sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/cap
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/cap
Date: December 3, 2009
KernelVersion: 2.6.32
Contact: dmaengine@vger.kernel.org
Description: Capabilities the DMA supports.Currently there are DMA_PQ, DMA_PQ_VAL,
DMA_XOR,DMA_XOR_VAL,DMA_INTERRUPT.
-What: sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_active
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_active
Date: December 3, 2009
KernelVersion: 2.6.32
Contact: dmaengine@vger.kernel.org
Description: The number of descriptors active in the ring.
-What: sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_size
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_size
Date: December 3, 2009
KernelVersion: 2.6.32
Contact: dmaengine@vger.kernel.org
Description: Descriptor ring size, total number of descriptors available.
-What: sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/version
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/version
Date: December 3, 2009
KernelVersion: 2.6.32
Contact: dmaengine@vger.kernel.org
Description: Version of ioatdma device.
-What: sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/intr_coalesce
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/intr_coalesce
Date: August 8, 2017
KernelVersion: 4.14
Contact: dmaengine@vger.kernel.org
diff --git a/Documentation/ABI/stable/sysfs-driver-firmware-zynqmp b/Documentation/ABI/stable/sysfs-driver-firmware-zynqmp
new file mode 100644
index 000000000000..c3fec3c835af
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-firmware-zynqmp
@@ -0,0 +1,256 @@
+What: /sys/devices/platform/firmware\:zynqmp-firmware/ggs*
+Date: March 2020
+KernelVersion: 5.6
+Contact: "Jolly Shah" <jollys@xilinx.com>
+Description:
+ Read/Write PMU global general storage register value,
+ GLOBAL_GEN_STORAGE{0:3}.
+ Global general storage register that can be used
+ by system to pass information between masters.
+
+ The register is reset during system or power-on
+ resets. Three registers are used by the FSBL and
+ other Xilinx software products: GLOBAL_GEN_STORAGE{4:6}.
+
+ Usage::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/ggs0
+ # echo <value> > /sys/devices/platform/firmware\:zynqmp-firmware/ggs0
+
+ Example::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/ggs0
+ # echo 0x1234ABCD > /sys/devices/platform/firmware\:zynqmp-firmware/ggs0
+
+Users: Xilinx
+
+What: /sys/devices/platform/firmware\:zynqmp-firmware/pggs*
+Date: March 2020
+KernelVersion: 5.6
+Contact: "Jolly Shah" <jollys@xilinx.com>
+Description:
+ Read/Write PMU persistent global general storage register
+ value, PERS_GLOB_GEN_STORAGE{0:3}.
+ Persistent global general storage register that
+ can be used by system to pass information between
+ masters.
+
+ This register is only reset by the power-on reset
+ and maintains its value through a system reset.
+ Four registers are used by the FSBL and other Xilinx
+ software products: PERS_GLOB_GEN_STORAGE{4:7}.
+ Register is reset only by a POR reset.
+
+ Usage::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/pggs0
+ # echo <value> > /sys/devices/platform/firmware\:zynqmp-firmware/pggs0
+
+ Example::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/pggs0
+ # echo 0x1234ABCD > /sys/devices/platform/firmware\:zynqmp-firmware/pggs0
+
+Users: Xilinx
+
+What: /sys/devices/platform/firmware\:zynqmp-firmware/shutdown_scope
+Date: March 2020
+KernelVersion: 5.6
+Contact: "Jolly Shah" <jollys@xilinx.com>
+Description:
+ This sysfs interface allows to set the shutdown scope for the
+ next shutdown request. When the next shutdown is performed, the
+ platform specific portion of PSCI-system_off can use the chosen
+ shutdown scope.
+
+ Following are available shutdown scopes(subtypes):
+
+ subsystem:
+ Only the APU along with all of its peripherals
+ not used by other processing units will be
+ shut down. This may result in the FPD power
+ domain being shut down provided that no other
+ processing unit uses FPD peripherals or DRAM.
+ ps_only:
+ The complete PS will be shut down, including the
+ RPU, PMU, etc. Only the PL domain (FPGA)
+ remains untouched.
+ system:
+ The complete system/device is shut down.
+
+ Usage::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/shutdown_scope
+ # echo <scope> > /sys/devices/platform/firmware\:zynqmp-firmware/shutdown_scope
+
+ Example::
+
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/shutdown_scope
+ # echo "subsystem" > /sys/devices/platform/firmware\:zynqmp-firmware/shutdown_scope
+
+Users: Xilinx
+
+What: /sys/devices/platform/firmware\:zynqmp-firmware/health_status
+Date: March 2020
+KernelVersion: 5.6
+Contact: "Jolly Shah" <jollys@xilinx.com>
+Description:
+ This sysfs interface allows to set the health status. If PMUFW
+ is compiled with CHECK_HEALTHY_BOOT, it will check the healthy
+ bit on FPD WDT expiration. If healthy bit is set by a user
+ application running in Linux, PMUFW will do APU only restart. If
+ healthy bit is not set during FPD WDT expiration, PMUFW will do
+ system restart.
+
+ Usage:
+
+ Set healthy bit::
+
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
+
+ Unset healthy bit::
+
+ # echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
+
+Users: Xilinx
+
+What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "Ronak Jain" <ronak.jain@xilinx.com>
+Description:
+ This sysfs interface allows user to configure features at
+ runtime. The user can enable or disable features running at
+ firmware as well as the user can configure the parameters of
+ the features at runtime. The supported features are over
+ temperature and external watchdog. Here, the external watchdog
+ is completely different than the /dev/watchdog as the external
+ watchdog is running on the firmware and it is used to monitor
+ the health of firmware not APU(Linux). Also, the external
+ watchdog is interfaced outside of the zynqmp soc.
+
+ The supported config ids are for the feature configuration is,
+ 1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
+ disable the over temperature feature.
+ 2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
+ over temperature limit in Degree Celsius.
+ 3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
+ the external watchdog feature.
+ 4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
+ external watchdog feature.
+
+ Usage:
+
+ Select over temperature config ID to enable/disable feature
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+
+ Check over temperature config ID is selected or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ The expected result is 1.
+
+ Select over temperature config ID to configure OT limit
+ # echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+
+ Check over temperature config ID is selected or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ The expected result is 2.
+
+ Select external watchdog config ID to enable/disable feature
+ # echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+
+ Check external watchdog config ID is selected or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ The expected result is 3.
+
+ Select external watchdog config ID to configure time interval
+ # echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+
+ Check external watchdog config ID is selected or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ The expected result is 4.
+
+Users: Xilinx
+
+What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "Ronak Jain" <ronak.jain@xilinx.com>
+Description:
+ This sysfs interface allows to configure features at runtime.
+ The user can enable or disable features running at firmware.
+ Also, the user can configure the parameters of the features
+ at runtime. The supported features are over temperature and
+ external watchdog. Here, the external watchdog is completely
+ different than the /dev/watchdog as the external watchdog is
+ running on the firmware and it is used to monitor the health
+ of firmware not APU(Linux). Also, the external watchdog is
+ interfaced outside of the zynqmp soc.
+
+ By default the features are disabled in the firmware. The user
+ can enable features by querying appropriate config id of the
+ features.
+
+ The default limit for the over temperature is 90 Degree Celsius.
+ The default timer interval for the external watchdog is 570ms.
+
+ The supported config ids are for the feature configuration is,
+ 1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
+ disable the over temperature feature.
+ 2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
+ over temperature limit in Degree Celsius.
+ 3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
+ the external watchdog feature.
+ 4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
+ external watchdog feature.
+
+ Usage:
+
+ Enable over temperature feature
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the over temperature feature is enabled or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 1.
+
+ Disable over temperature feature
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the over temperature feature is disabled or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 0.
+
+ Configure over temperature limit to 50 Degree Celsius
+ # echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 50 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the over temperature limit is configured or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 50.
+
+ Enable external watchdog feature
+ # echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the external watchdog feature is enabled or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 1.
+
+ Disable external watchdog feature
+ # echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the external watchdog feature is disabled or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 0.
+
+ Configure external watchdog timer interval to 500ms
+ # echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
+ # echo 500 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+
+ Check whether the external watchdog timer interval is configured or not
+ # cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
+ The expected result is 500.
+
+Users: Xilinx
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
index 7049a2b50359..bada15a329f7 100644
--- a/Documentation/ABI/stable/sysfs-driver-ib_srp
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -6,6 +6,7 @@ Description: Interface for making ib_srp connect to a new target.
One can request ib_srp to connect to a new target by writing
a comma-separated list of login parameters to this sysfs
attribute. The supported parameters are:
+
* id_ext, a 16-digit hexadecimal number specifying the eight
byte identifier extension in the 16-byte SRP target port
identifier. The target port identifier is sent by ib_srp
@@ -67,6 +68,8 @@ Description: Interface for making ib_srp connect to a new target.
initiator is allowed to queue per SCSI host. The default
value for this parameter is 62. The lowest supported value
is 2.
+ * max_it_iu_size, a decimal number specifying the maximum
+ initiator to target information unit length.
What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
Date: January 2, 2006
diff --git a/Documentation/ABI/stable/sysfs-driver-misc-cp500 b/Documentation/ABI/stable/sysfs-driver-misc-cp500
new file mode 100644
index 000000000000..525bd18a2db4
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-misc-cp500
@@ -0,0 +1,25 @@
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/0000:XX:XX.X/version
+Date: June 2024
+KernelVersion: 6.11
+Contact: Gerhard Engleder <eg@keba.com>
+Description: Version of the FPGA configuration bitstream as printable string.
+ This file is read only.
+Users: KEBA
+
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/0000:XX:XX.X/keep_cfg
+Date: June 2024
+KernelVersion: 6.11
+Contact: Gerhard Engleder <eg@keba.com>
+Description: Flag which signals if FPGA shall keep or reload configuration
+ bitstream on reset. Normal FPGA behavior and default is to keep
+ configuration bitstream and to only reset the configured logic.
+
+ Reloading configuration on reset enables an update of the
+ configuration bitstream with a simple reboot. Otherwise it is
+ necessary to power cycle the device to reload the new
+ configuration bitstream.
+
+ This file is read/write. The values are as follows:
+ 1 = keep configuration bitstream on reset, default
+ 0 = reload configuration bitstream on reset
+Users: KEBA
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index d9d117d457e1..f59461111221 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -1,30 +1,54 @@
-What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
- asic_health
-
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_health
Date: June 2018
KernelVersion: 4.19
-Contact: Vadim Pasternak <vadimpmellanox.com>
+Contact: Vadim Pasternak <vadimp@nvidia.com>
Description: This file shows ASIC health status. The possible values are:
0 - health failed, 2 - health OK, 3 - ASIC in booting state.
The files are read only.
-What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
- cpld1_version
- cpld2_version
-
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld1_version
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_version
Date: June 2018
KernelVersion: 4.19
-Contact: Vadim Pasternak <vadimpmellanox.com>
+Contact: Vadim Pasternak <vadimp@nvidia.com>
Description: These files show with which CPLD versions have been burned
on carrier and switch boards.
The files are read only.
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/fan_dir
+Date: December 2018
+KernelVersion: 5.0
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows the system fans direction:
+ forward direction - relevant bit is set 0;
+ reversed direction - relevant bit is set 1.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version
+Date: November 2018
+KernelVersion: 5.0
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which CPLD versions have been burned
+ on LED or Gearbox board.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
+Date: November 2018
+KernelVersion: 5.0
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files enable and disable the access to the JTAG domain.
+ By default access to the JTAG domain is disabled.
+
+ The file is read/write.
+
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/select_iio
Date: June 2018
KernelVersion: 4.19
-Contact: Vadim Pasternak <vadimpmellanox.com>
+Contact: Vadim Pasternak <vadimp@nvidia.com>
Description: This file allows iio devices selection.
Attribute select_iio can be written with 0 or with 1. It
@@ -38,7 +62,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/psu1_on
/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pwr_down
Date: June 2018
KernelVersion: 4.19
-Contact: Vadim Pasternak <vadimpmellanox.com>
+Contact: Vadim Pasternak <vadimp@nvidia.com>
Description: These files allow asserting system power cycling, switching
power supply units on and off and system's main power domain
shutdown.
@@ -54,19 +78,18 @@ Description: These files allow asserting system power cycling, switching
The files are write only.
-What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
- reset_aux_pwr_or_ref
- reset_asic_thermal
- reset_hotswap_or_halt
- reset_hotswap_or_wd
- reset_fw_reset
- reset_long_pb
- reset_main_pwr_fail
- reset_short_pb
- reset_sw_reset
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_aux_pwr_or_ref
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_asic_thermal
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_hotswap_or_halt
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_hotswap_or_wd
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_fw_reset
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_long_pb
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_main_pwr_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_short_pb
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_reset
Date: June 2018
KernelVersion: 4.19
-Contact: Vadim Pasternak <vadimpmellanox.com>
+Contact: Vadim Pasternak <vadimp@nvidia.com>
Description: These files show the system reset cause, as following: power
auxiliary outage or power refresh, ASIC thermal shutdown, halt,
hotswap, watchdog, firmware reset, long press power button,
@@ -76,3 +99,717 @@ Description: These files show the system reset cause, as following: power
reset cause.
The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_pwr_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_comex
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_system
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_voltmon_upgrade_fail
+Date: November 2018
+KernelVersion: 5.0
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the system reset cause, as following: ComEx
+ power fail, reset from ComEx, system platform reset, reset
+ due to voltage monitor devices upgrade failure,
+ Value 1 in file means this is reset cause, 0 - otherwise.
+ Only one bit could be 1 at the same time, representing only
+ the last reset cause.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version
+Date: November 2018
+KernelVersion: 5.0
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which CPLD versions have been burned
+ on LED board.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_thermal
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_comex_wd
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_from_asic
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_reload_bios
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sff_wd
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
+Date: June 2019
+KernelVersion: 5.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the system reset cause, as following:
+ COMEX thermal shutdown; wathchdog power off or reset was derived
+ by one of the next components: COMEX, switch board or by Small Form
+ Factor mezzanine, reset requested from ASIC, reset caused by BIOS
+ reload. Value 1 in file means this is reset cause, 0 - otherwise.
+ Only one of the above causes could be 1 at the same time, representing
+ only last reset cause.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config1
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config2
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show system static topology identification
+ like system's static I2C topology, number and type of FPGA
+ devices within the system and so on.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_ac_pwr_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_platform
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_soc
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_pwr_off
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the system reset causes, as following: reset
+ due to AC power failure, reset invoked from software by
+ assertion reset signal through CPLD. reset caused by signal
+ asserted by SOC through ACPI register, reset invoked from
+ software by assertion power off signal through CPLD.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pcie_asic_reset_dis
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows to retain ASIC up during PCIe root complex
+ reset, when attribute is set 1.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/vpd_wp
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows to overwrite system VPD hardware write
+ protection when attribute is set 1.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/voltreg_update_status
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file exposes the configuration update status of burnable
+ voltage regulator devices. The status values are as following:
+ 0 - OK; 1 - CRC failure; 2 = I2C failure; 3 - in progress.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ufm_version
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file exposes the firmware version of burnable voltage
+ regulator devices.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld1_pn
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_pn
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_pn
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_pn
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld1_version_min
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_version_min
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version_min
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version_min
+Date: July 2020
+KernelVersion: 5.9
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which CPLD part numbers and minor
+ versions have been burned CPLD devices equipped on a
+ system.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/bios_active_image
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/bios_auth_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/bios_upgrade_fail
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: The files represent BIOS statuses:
+
+ bios_active_image: location of current active BIOS image:
+ 0: Top, 1: Bottom.
+ The reported value should correspond to value expected by OS
+ in case of BIOS safe mode is 0. This bit is related to Intel
+ top-swap feature of DualBios on the same flash.
+
+ bios_auth_fail: BIOS upgrade is failed because provided BIOS
+ image is not signed correctly.
+
+ bios_upgrade_fail: BIOS upgrade is failed by some other
+ reason not because authentication. For example due to
+ physical SPI flash problem.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc1_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc2_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc3_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc4_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc5_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc6_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc7_enable
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc8_enable
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow line cards enable state control.
+ Expected behavior:
+ When lc{n}_enable is written 1, related line card is released
+ from the reset state, when 0 - is hold in reset state.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc1_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc2_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc3_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc4_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc5_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc6_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc7_pwr
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc8_pwr
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files switching line cards power on and off.
+ Expected behavior:
+ When lc{n}_pwr is written 1, related line card is powered
+ on, when written 0 - powered off.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc1_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc2_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc3_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc4_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc5_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc6_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc7_rst_mask
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lc8_rst_mask
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files clear line card reset bit enforced by ASIC, when it
+ sets it due to some abnormal ASIC behavior.
+ Expected behavior:
+ When lc{n}_rst_mask is written 1, related line card reset bit
+ is cleared, when written 0 - no effect.
+
+ The files are write only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/os_started
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file, when written 1, indicates to programmable devices
+ that OS is taking control over it.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pm_mgmt_en
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file assigns power management control ownership.
+ When power management control is provided by hardware, hardware
+ will automatically power off one or more line previously
+ powered line cards in case system power budget is getting
+ insufficient. It could be in case when some of power units lost
+ power good state.
+ When pm_mgmt_en is written 1, power management control by
+ software is enabled, 0 - power management control by hardware.
+ Note that for any setting of pm_mgmt_en attribute hardware will
+ not allow to power on any new line card in case system power
+ budget is insufficient.
+ Same in case software will try to power on several line cards
+ at once - hardware will power line cards while system has
+ enough power budget.
+ Default is 0.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/psu3_on
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/psu4_on
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files switching power supply units on and off.
+ Expected behavior:
+ When psu3_on or psu4_on is written 1, related unit will be
+ disconnected from the power source, when written 0 - connected.
+
+ The files are write only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/shutdown_unlock
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows to unlock ASIC after thermal shutdown event.
+ When system thermal shutdown is enforced by ASIC, ASIC is
+ getting locked and after system boot it will not be available.
+ Software can decide to unlock it by setting this attribute to
+ 1 and then perform system power cycle by setting pwr_cycle
+ attribute to 1 (power cycle of main power domain).
+ Before setting shutdown_unlock to 1 it is recommended to
+ validate that system reboot cause is reset_asic_thermal or
+ reset_thermal_spc_or_pciesw.
+ In case shutdown_unlock is not set 1, the only way to release
+ ASIC from locking - is full system power cycle through the
+ external power distribution unit.
+ Default is 1.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/cpld1_pn
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/cpld1_version
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/cpld1_version_min
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which CPLD major and minor versions
+ and part number has been burned CPLD device on line card.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/fpga1_pn
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/fpga1_version
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/fpga1_version_min
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which FPGA major and minor versions
+ and part number has been burned FPGA device on line card.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/vpd_wp
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allow to overwrite line card VPD hardware write
+ protection mode. When attribute is set 1 - write protection is
+ disabled, when 0 - enabled.
+ Default is 0.
+ If the system is in locked-down mode writing this file will not
+ be allowed.
+ The purpose if this file is to allow line card VPD burning
+ during production flow.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_aux_pwr_or_ref
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_dc_dc_pwr_fail
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_fpga_not_done
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_from_chassis
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_line_card
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/reset_pwr_off_from_chassis
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the line reset cause, as following: power
+ auxiliary outage or power refresh, DC-to-DC power failure, FPGA reset
+ failed, line card reset failed, power off from chassis.
+ Value 1 in file means this is reset cause, 0 - otherwise. Only one of
+ the above causes could be 1 at the same time, representing only last
+ reset cause.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/cpld_upgrade_en
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/fpga_upgrade_en
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow CPLD and FPGA burning. Value 1 in file means burning
+ is enabled, 0 - otherwise.
+ If the system is in locked-down mode writing these files will
+ not be allowed.
+ The purpose of these files to allow line card CPLD and FPGA
+ upgrade through the JTAG daisy-chain.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/qsfp_pwr_en
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/pwr_en
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow to power on/off all QSFP ports and whole line card.
+ The attributes are set 1 for power on, 0 - for power off.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/agb_spi_burn_en
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/fpga_spi_burn_en
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow gearboxes and FPGA SPI flash burning.
+ The attributes are set 1 to enable burning, 0 - to disable.
+ If the system is in locked-down mode writing these files will
+ not be allowed.
+ The purpose of these files to allow line card Gearboxes and FPGA
+ burning during production flow.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/max_power
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/i2c-*/*-0032/mlxreg-io.*/hwmon/hwmon*/config
+Date: October 2021
+KernelVersion: 5.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files provide the maximum powered required for line card
+ feeding and line card configuration Id.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
+Date: May 2022
+KernelVersion: 5.19
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows to reset PHY 88E1548 when attribute is set 0
+ due to some abnormal PHY behavior.
+ Expected behavior:
+ When phy_reset is written 1, all PHY 88E1548 are released
+ from the reset state, when 0 - are hold in reset state.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
+Date: May 2022
+KernelVersion: 5.19
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows to reset ASIC MT52132 when attribute is set 0
+ due to some abnormal ASIC behavior.
+ Expected behavior:
+ When mac_reset is written 1, the ASIC MT52132 is released
+ from the reset state, when 0 - is hold in reset state.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
+Date: May 2022
+KernelVersion: 5.19
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows QSFP ports power status. The value is set to 0
+ when one of any QSFP ports is plugged. The value is set to 1 when
+ there are no any QSFP ports are plugged.
+ The possible values are:
+ 0 - Power good, 1 - Not power good.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic2_health
+Date: July 2022
+KernelVersion: 5.20
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows 2-nd ASIC health status. The possible values are:
+ 0 - health failed, 2 - health OK, 3 - ASIC in booting state.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_reset
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic2_reset
+Date: July 2022
+KernelVersion: 5.20
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow to each of ASICs by writing 1.
+
+ The files are write only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/comm_chnl_ready
+Date: July 2022
+KernelVersion: 5.20
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file is used to indicate remote end (for example BMC) that system
+ host CPU is ready for sending telemetry data to remote end.
+ For indication the file should be written 1.
+
+ The file is write only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config3
+Date: January 2020
+KernelVersion: 5.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: The file indicates COME module hardware configuration.
+ The value is pushed by hardware through GPIO pins.
+ The purpose is to expose some minor BOM changes for the same system SKU.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_pwr_converter_fail
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows the system reset cause due to power converter
+ devices failure.
+ Value 1 in file means this is reset cause, 0 - otherwise.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_ap_reset
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_ap_reset
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files aim to monitor the status of the External Root of Trust (EROT)
+ processor's RESET output to the Application Processor (AP).
+ By reading this file, could be determined if the EROT has invalidated or
+ revoked AP Firmware, at which point it will hold the AP in RESET until a
+ valid firmware is loaded. This protects the AP from running an
+ unauthorized firmware. In the normal flow, the AP reset should be released
+ after the EROT validates the integrity of the FW, and it should be done so
+ as quickly as possible so that the AP boots before the CPU starts to
+ communicate to each ASIC.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_recovery
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_recovery
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_reset
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_reset
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files aim to perform External Root of Trust (EROT) recovery
+ sequence after EROT device failure.
+ These EROT devices protect ASICs from unauthorized access and in normal
+ flow their reset should be released with system power – earliest power
+ up stage, so that EROTs can begin boot and authentication process before
+ CPU starts to communicate to ASICs.
+ Issuing a reset to the EROT while asserting the recovery signal will cause
+ the EROT Application Processor to enter recovery mode so that the EROT FW
+ can be updated/recovered.
+ For reset/recovery the related file should be toggled by 1/0.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot1_wp
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/erot2_wp
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow access to External Root of Trust (EROT) for reset
+ and recovery sequence after EROT device failure.
+ Default is 0 (programming disabled).
+ If the system is in locked-down mode writing this file will not be allowed.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/spi_chnl_select
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file allows SPI chip selection for External Root of Trust (EROT)
+ device Out-of-Band recovery.
+ File can be written with 0 or with 1. It selects which EROT can be accessed
+ through SPI device.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_pg_fail
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak vadimp@nvidia.com
+Description: This file shows ASIC Power Good status.
+ Value 1 in file means ASIC Power Good failed, 0 - otherwise.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd1_boot_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd2_boot_fail
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd_fail
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak vadimp@nvidia.com
+Description: These files are related to clock boards status in system.
+ - clk_brd1_boot_fail: warning about 1-st clock board failed to boot from CI.
+ - clk_brd2_boot_fail: warning about 2-nd clock board failed to boot from CI.
+ - clk_brd_fail: error about common clock board boot failure.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/clk_brd_prog_en
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file enables programming of clock boards.
+ Default is 0 (programming disabled).
+ If the system is in locked-down mode writing this file will not be allowed.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pwr_converter_prog_en
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file enables programming of power converters.
+ Default is 0 (programming disabled).
+ If the system is in locked-down mode writing this file will not be allowed.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_ac_ok_fail
+Date: February 2023
+KernelVersion: 6.3
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows the system reset cause due to AC power failure.
+ Value 1 in file means this is reset cause, 0 - otherwise.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld5_pn
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld5_version
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld5_version_min
+Date: August 2023
+KernelVersion: 6.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show with which CPLD part numbers, version and minor
+ versions have been burned the 5-th CPLD device equipped on a
+ system.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_cap
+Date: August 2023
+KernelVersion: 6.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file indicates the available method of CPLD/FPGA devices
+ field update through the JTAG chain:
+
+ b00 - field update through LPC bus register memory space.
+ b01 - Reserved.
+ b10 - Reserved.
+ b11 - field update through CPU GPIOs bit-banging.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/lid_open
+Date: August 2023
+KernelVersion: 6.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: 1 - indicates that system lid is opened, otherwise 0.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_long_pwr_pb
+Date: August 2023
+KernelVersion: 6.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file if set 1 indicates that system has been reset by
+ long press of power button.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_dc_dc_pwr_fail
+Date: August 2023
+KernelVersion: 6.6
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows 1 in case the system reset happened due to the
+ failure of any DC-DC power converter devices equipped on the
+ switch board.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_request
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file when written 1 activates request to allow access to
+ the write protected flashes. Such request can be performed only
+ for system equipped with BMC (Board Management Controller),
+ which can grant access to protected flashes. In case BMC allows
+ access - it will respond with "global_wp_response". BMC decides
+ regarding time window of granted access. After granted window is
+ expired, BMC will change value back to 0.
+ Default value is 0.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_response
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file, when set 1, indicates that access to protected
+ flashes have been granted to host CPU by BMC.
+ Default value is 0.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/shutdown_unlock
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: When ASICs are getting overheated, system protection
+ hardware mechanism enforces system reboot. After system
+ reboot ASICs come up in locked state. To unlock ASICs,
+ this file should be written 1
+ Default value is 0.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/boot_progress
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the Data Process Unit board boot progress
+ state. Valid states are:
+ - 4 : OS starting.
+ - 5 : OS running.
+ - 6 : Low-Power Standby.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/dpu_id
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows hardware Id of Data Process Unit board.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_aux_pwr_or_reload
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_dpu_thermal
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_from_main_board
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files expose the cause of the most recent reset of the Data
+ Processing Unit (DPU) board. The possible causes are:
+ - Power auxiliary outage or power reload.
+ - Thermal shutdown.
+ - Reset request from the main board.
+ Value 1 in file means this is reset cause, 0 - otherwise. Only one of
+ the above causes could be 1 at the same time, representing only last
+ reset cause.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/perst_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/phy_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/tpm_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/usbphy_rst
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow to reset hardware components of Data Process
+ Unit board. Respectively PCI, Ethernet PHY, TPM and USB PHY
+ resets.
+ Default values for all the attributes is 1. Writing 0 will
+ cause reset of the related component.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/ufm_upgrade
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show status of Unified Fabric Manager upgrade.
+ state. 0 - means upgrade is done, 1 - otherwise.
+
+ The file is read only.
diff --git a/Documentation/ABI/stable/sysfs-driver-speakup b/Documentation/ABI/stable/sysfs-driver-speakup
new file mode 100644
index 000000000000..bcb6831aa114
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-speakup
@@ -0,0 +1,395 @@
+What: /sys/accessibility/speakup/attrib_bleep
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Beeps the PC speaker when there is an attribute change such as
+ foreground or background color when using speakup review
+ commands. One = on, zero = off.
+
+What: /sys/accessibility/speakup/bell_pos
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This works much like a typewriter bell. If for example 72 is
+ echoed to bell_pos, it will beep the PC speaker when typing on
+ a line past character 72.
+
+What: /sys/accessibility/speakup/bleeps
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls whether one hears beeps through the PC speaker
+ when using speakup's review commands.
+ TODO: what values does it accept?
+
+What: /sys/accessibility/speakup/bleep_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls the duration of the PC speaker beeps speakup
+ produces.
+ TODO: What are the units? Jiffies?
+
+What: /sys/accessibility/speakup/cursor_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls cursor delay when using arrow keys. When a
+ connection is very slow, with the default setting, when moving
+ with the arrows, or backspacing etc. speakup says the incorrect
+ characters. Set this to a higher value to adjust for the delay
+ and better synchronisation between cursor position and speech.
+
+What: /sys/accessibility/speakup/cur_phonetic
+KernelVersion: 6.2
+Contact: speakup@linux-speakup.org
+Description: This allows speakup to speak letters phoneticaly when arrowing through
+ a word letter by letter. This doesn't affect the spelling when typing
+ the characters. When cur_phonetic=1, speakup will speak characters
+ phoneticaly when arrowing over a letter. When cur_phonetic=0, speakup
+ will speak letters as normally.
+
+What: /sys/accessibility/speakup/delimiters
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Delimit a word from speakup.
+ TODO: add more info
+
+What: /sys/accessibility/speakup/ex_num
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/key_echo
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if speakup speaks keys when they are typed. One = on,
+ zero = off or don't echo keys.
+
+What: /sys/accessibility/speakup/keymap
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Speakup keymap remaps keys to Speakup functions.
+ It uses a binary
+ format. A special program called genmap is needed to compile a
+ textual keymap into the binary format which is then loaded into
+ /sys/accessibility/speakup/keymap.
+
+What: /sys/accessibility/speakup/no_interrupt
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if typing interrupts output from speakup. With
+ no_interrupt set to zero, typing on the keyboard will interrupt
+ speakup if for example
+ the say screen command is used before the
+ entire screen is read.
+
+ With no_interrupt set to one, if the say
+ screen command is used, and one then types on the keyboard,
+ speakup will continue to say the whole screen regardless until
+ it finishes.
+
+What: /sys/accessibility/speakup/punc_all
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to four.
+
+What: /sys/accessibility/speakup/punc_level
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls the level of punctuation spoken as the screen is
+ displayed, not reviewed. Levels range from zero no punctuation,
+ to four, all punctuation. One corresponds to punc_some, two
+ corresponds to punc_most, and three as well as four both
+ correspond to punc_all. Some hardware synthesizers may have
+ different levels each corresponding to three and four for
+ punc_level. Also note that if punc_level is set to zero, and
+ key_echo is set to one, typed punctuation is still spoken as it
+ is typed.
+
+What: /sys/accessibility/speakup/punc_most
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to two.
+
+What: /sys/accessibility/speakup/punc_some
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to one.
+
+What: /sys/accessibility/speakup/reading_punc
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Almost the same as punc_level, the differences being that
+ reading_punc controls the level of punctuation when reviewing
+ the screen with speakup's screen review commands. The other
+ difference is that reading_punc set to three speaks punc_all,
+ and reading_punc set to four speaks all punctuation, including
+ spaces.
+
+What: /sys/accessibility/speakup/repeats
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: A list of characters speakup repeats. Normally, when there are
+ more than three characters in a row, speakup
+ just reads three of
+ those characters. For example, "......" would be read as dot,
+ dot, dot. If a . is added to the list of characters in repeats,
+ "......" would be read as dot, dot, dot, times six.
+
+What: /sys/accessibility/speakup/say_control
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: If set to one, speakup speaks shift, alt and control when those
+ keys are pressed. If say_control is set to zero, shift, ctrl,
+ and alt are not spoken when they are pressed.
+
+What: /sys/accessibility/speakup/say_word_ctl
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/silent
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/spell_delay
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls how fast a word is spelled
+ when speakup's say word
+ review command is pressed twice quickly to speak the current
+ word being reviewed. Zero just speaks the letters one after
+ another, while values one through four
+ seem to introduce more of
+ a pause between the spelling of each letter by speakup.
+
+What: /sys/accessibility/speakup/synth
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the synthesizer driver currently in use. Reading
+ synth returns the synthesizer driver currently in use. Writing
+ synth switches to the given synthesizer driver, provided it is
+ either built into the kernel, or already loaded as a module.
+
+What: /sys/accessibility/speakup/synth_direct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Sends whatever is written to synth_direct
+ directly to the speech synthesizer in use, bypassing speakup.
+ This could be used to make the synthesizer speak
+ a string, or to
+ send control sequences to the synthesizer to change how the
+ synthesizer behaves.
+
+What: /sys/accessibility/speakup/version
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Reading version returns the version of speakup, and the version
+ of the synthesizer driver currently in use.
+
+What: /sys/accessibility/speakup/i18n/announcements
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This file contains various general announcements, most of which
+ cannot be categorized. You will find messages such as "You
+ killed Speakup", "I'm alive", "leaving help", "parked",
+ "unparked", and others. You will also find the names of the
+ screen edges and cursor tracking modes here.
+
+What: /sys/accessibility/speakup/i18n/chartab
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO
+
+What: /sys/accessibility/speakup/i18n/ctl_keys
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Here, you will find names of control keys. These are used with
+ Speakup's say_control feature.
+
+What: /sys/accessibility/speakup/i18n/function_names
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Here, you will find a list of names for Speakup functions.
+ These are used by the help system. For example, suppose that
+ you have activated help mode, and you pressed
+ keypad 3. Speakup
+ says: "keypad 3 is character, say next."
+ The message "character, say next" names a Speakup function, and
+ it comes from this function_names file.
+
+What: /sys/accessibility/speakup/i18n/states
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This file contains names for key states.
+ Again, these are part of the help system. For instance, if you
+ had pressed speakup + keypad 3, you would hear:
+ "speakup keypad 3 is go to bottom edge."
+
+ The speakup key is depressed, so the name of the key state is
+ speakup.
+
+ This part of the message comes from the states collection.
+
+What: /sys/accessibility/speakup/i18n/characters
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Through this sys entry, Speakup gives you the ability to change
+ how Speakup pronounces a given character. You could, for
+ example, change how some punctuation characters are spoken. You
+ can even change how Speakup will pronounce certain letters. For
+ further details see '12. Changing the Pronunciation of
+ Characters' in Speakup User's Guide (file spkguide.txt in
+ source).
+
+What: /sys/accessibility/speakup/i18n/colors
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: When you use the "say attributes" function, Speakup says the
+ name of the foreground and background colors. These names come
+ from the i18n/colors file.
+
+What: /sys/accessibility/speakup/i18n/formatted
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This group of messages contains embedded formatting codes, to
+ specify the type and width of displayed data. If you change
+ these, you must preserve all of the formatting codes, and they
+ must appear in the order used by the default messages.
+
+What: /sys/accessibility/speakup/i18n/key_names
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Again, key_names is used by Speakup's help system. In the
+ previous example, Speakup said that you pressed "keypad 3."
+ This name came from the key_names file.
+
+What: /sys/accessibility/speakup/<synth-name>/
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: In `/sys/accessibility/speakup` is a directory corresponding to
+ the synthesizer driver currently in use (E.G) `soft` for the
+ soft driver. This directory contains files which control the
+ speech synthesizer itself,
+ as opposed to controlling the speakup
+ screen reader. The parameters in this directory have the same
+ names and functions across all
+ supported synthesizers. The range
+ of values for freq, pitch, rate, and vol is the same for all
+ supported synthesizers, with the given range being internally
+ mapped by the driver to more or less fit the range of values
+ supported for a given parameter by the individual synthesizer.
+ Below is a description of values and parameters for soft
+ synthesizer, which is currently the most commonly used.
+
+What: /sys/accessibility/speakup/<synth-name>/caps_start
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is the string that is sent to the synthesizer to cause it
+ to start speaking uppercase letters. For the soft synthesizer
+ and most others, this causes the pitch of the voice to rise
+ above the currently set pitch.
+
+What: /sys/accessibility/speakup/<synth-name>/caps_stop
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is the string sent to the synthesizer to cause it to stop
+ speaking uppercase letters. In the case of the soft synthesizer
+ and most others, this returns the pitch of the voice
+ down to the
+ currently set pitch.
+
+What: /sys/accessibility/speakup/<synth-name>/delay_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/<synth-name>/direct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if punctuation is spoken by speakup, or by the
+ synthesizer.
+
+ For example, speakup speaks ">" as "greater", while
+ the espeak synthesizer used by the soft driver speaks "greater
+ than". Zero lets speakup speak the punctuation. One lets the
+ synthesizer itself speak punctuation.
+
+What: /sys/accessibility/speakup/<synth-name>/freq
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the frequency of the speech synthesizer. Range is
+ 0-9.
+
+What: /sys/accessibility/speakup/<synth-name>/flush_time
+KernelVersion: 5.12
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the timeout to wait for the synthesizer flush to
+ complete. This can be used when the cable gets faulty and flush
+ notifications are getting lost.
+
+What: /sys/accessibility/speakup/<synth-name>/full_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/<synth-name>/jiffy_delta
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls how many jiffys the kernel gives to the
+ synthesizer. Setting this too high can make a system unstable,
+ or even crash it.
+
+What: /sys/accessibility/speakup/<synth-name>/pitch
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the pitch of the synthesizer. The range is 0-9.
+
+What: /sys/accessibility/speakup/<synth-name>/inflection
+KernelVersion: 5.8
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the inflection of the synthesizer, i.e. the pitch
+ range. The range is 0-9.
+
+What: /sys/accessibility/speakup/<synth-name>/punct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the amount of punctuation spoken by the
+ synthesizer. The range for the soft driver seems to be 0-2.
+ TODO: How is this related to speakup's punc_level, or
+ reading_punc.
+
+What: /sys/accessibility/speakup/<synth-name>/rate
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the rate of the synthesizer. Range is from zero
+ slowest, to nine fastest.
+
+What: /sys/accessibility/speakup/<synth-name>/tone
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the tone of the speech synthesizer. The range for
+ the soft driver seems to be 0-2. This seems to make no
+ difference if using espeak and the espeakup connector.
+ TODO: does espeakup support different tonalities?
+
+What: /sys/accessibility/speakup/<synth-name>/trigger_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/<synth-name>/voice
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the voice used by the synthesizer if the
+ synthesizer can speak in more than one voice. The range for the
+ soft driver is 0-7. Note that while espeak supports multiple
+ voices, this parameter will not set the voice when the espeakup
+ connector is used between speakup and espeak.
+
+What: /sys/accessibility/speakup/<synth-name>/vol
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the volume of the speech synthesizer. Range is 0-9,
+ with zero being the softest, and nine being the loudest.
+
diff --git a/Documentation/ABI/stable/sysfs-driver-w1_ds2438 b/Documentation/ABI/stable/sysfs-driver-w1_ds2438
new file mode 100644
index 000000000000..d2e7681cc287
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-w1_ds2438
@@ -0,0 +1,13 @@
+What: /sys/bus/w1/devices/.../page1
+Date: April 2021
+Contact: Luiz Sampaio <sampaio.ime@gmail.com>
+Description: read the contents of the page1 of the DS2438
+ see Documentation/w1/slaves/w1_ds2438.rst for detailed information
+Users: any user space application which wants to communicate with DS2438
+
+What: /sys/bus/w1/devices/.../offset
+Date: April 2021
+Contact: Luiz Sampaio <sampaio.ime@gmail.com>
+Description: write the contents to the offset register of the DS2438
+ see Documentation/w1/slaves/w1_ds2438.rst for detailed information
+Users: any user space application which wants to communicate with DS2438
diff --git a/Documentation/ABI/stable/sysfs-driver-w1_ds28e04 b/Documentation/ABI/stable/sysfs-driver-w1_ds28e04
index 26579ee868c9..3e1c1fa8d54d 100644
--- a/Documentation/ABI/stable/sysfs-driver-w1_ds28e04
+++ b/Documentation/ABI/stable/sysfs-driver-w1_ds28e04
@@ -2,7 +2,7 @@ What: /sys/bus/w1/devices/.../pio
Date: May 2012
Contact: Markus Franke <franm@hrz.tu-chemnitz.de>
Description: read/write the contents of the two PIO's of the DS28E04-100
- see Documentation/w1/slaves/w1_ds28e04 for detailed information
+ see Documentation/w1/slaves/w1_ds28e04.rst for detailed information
Users: any user space application which wants to communicate with DS28E04-100
@@ -11,5 +11,5 @@ What: /sys/bus/w1/devices/.../eeprom
Date: May 2012
Contact: Markus Franke <franm@hrz.tu-chemnitz.de>
Description: read/write the contents of the EEPROM memory of the DS28E04-100
- see Documentation/w1/slaves/w1_ds28e04 for detailed information
+ see Documentation/w1/slaves/w1_ds28e04.rst for detailed information
Users: any user space application which wants to communicate with DS28E04-100
diff --git a/Documentation/ABI/stable/sysfs-driver-w1_ds28ea00 b/Documentation/ABI/stable/sysfs-driver-w1_ds28ea00
index e928def14f28..534e63731a49 100644
--- a/Documentation/ABI/stable/sysfs-driver-w1_ds28ea00
+++ b/Documentation/ABI/stable/sysfs-driver-w1_ds28ea00
@@ -2,5 +2,5 @@ What: /sys/bus/w1/devices/.../w1_seq
Date: Apr 2015
Contact: Matt Campbell <mattrcampbell@gmail.com>
Description: Support for the DS28EA00 chain sequence function
- see Documentation/w1/slaves/w1_therm for detailed information
+ see Documentation/w1/slaves/w1_therm.rst for detailed information
Users: any user space application which wants to communicate with DS28EA00
diff --git a/Documentation/ABI/stable/sysfs-firmware-efi-vars b/Documentation/ABI/stable/sysfs-firmware-efi-vars
deleted file mode 100644
index 5def20b9019e..000000000000
--- a/Documentation/ABI/stable/sysfs-firmware-efi-vars
+++ /dev/null
@@ -1,75 +0,0 @@
-What: /sys/firmware/efi/vars
-Date: April 2004
-Contact: Matt Domsch <Matt_Domsch@dell.com>
-Description:
- This directory exposes interfaces for interactive with
- EFI variables. For more information on EFI variables,
- see 'Variable Services' in the UEFI specification
- (section 7.2 in specification version 2.3 Errata D).
-
- In summary, EFI variables are named, and are classified
- into separate namespaces through the use of a vendor
- GUID. They also have an arbitrary binary value
- associated with them.
-
- The efivars module enumerates these variables and
- creates a separate directory for each one found. Each
- directory has a name of the form "<key>-<vendor guid>"
- and contains the following files:
-
- attributes: A read-only text file enumerating the
- EFI variable flags. Potential values
- include:
-
- EFI_VARIABLE_NON_VOLATILE
- EFI_VARIABLE_BOOTSERVICE_ACCESS
- EFI_VARIABLE_RUNTIME_ACCESS
- EFI_VARIABLE_HARDWARE_ERROR_RECORD
- EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS
-
- See the EFI documentation for an
- explanation of each of these variables.
-
- data: A read-only binary file that can be read
- to attain the value of the EFI variable
-
- guid: The vendor GUID of the variable. This
- should always match the GUID in the
- variable's name.
-
- raw_var: A binary file that can be read to obtain
- a structure that contains everything
- there is to know about the variable.
- For structure definition see "struct
- efi_variable" in the kernel sources.
-
- This file can also be written to in
- order to update the value of a variable.
- For this to work however, all fields of
- the "struct efi_variable" passed must
- match byte for byte with the structure
- read out of the file, save for the value
- portion.
-
- **Note** the efi_variable structure
- read/written with this file contains a
- 'long' type that may change widths
- depending on your underlying
- architecture.
-
- size: As ASCII representation of the size of
- the variable's value.
-
-
- In addition, two other magic binary files are provided
- in the top-level directory and are used for adding and
- removing variables:
-
- new_var: Takes a "struct efi_variable" and
- instructs the EFI firmware to create a
- new variable.
-
- del_var: Takes a "struct efi_variable" and
- instructs the EFI firmware to remove any
- variable that has a matching vendor GUID
- and variable key name.
diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-dump b/Documentation/ABI/stable/sysfs-firmware-opal-dump
index 32fe7f5c4880..1f74f45327ba 100644
--- a/Documentation/ABI/stable/sysfs-firmware-opal-dump
+++ b/Documentation/ABI/stable/sysfs-firmware-opal-dump
@@ -7,6 +7,7 @@ Description:
This is only for the powerpc/powernv platform.
+ =============== ===============================================
initiate_dump: When '1' is written to it,
we will initiate a dump.
Read this file for supported commands.
@@ -19,8 +20,11 @@ Description:
and ID of the dump, use the id and type files.
Do not rely on any particular size of dump
type or dump id.
+ =============== ===============================================
Each dump has the following files:
+
+ =============== ===============================================
id: An ASCII representation of the dump ID
in hex (e.g. '0x01')
type: An ASCII representation of the type of
@@ -39,3 +43,4 @@ Description:
inaccessible.
Reading this file will get a list of
supported actions.
+ =============== ===============================================
diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-elog b/Documentation/ABI/stable/sysfs-firmware-opal-elog
index 2536434d49d0..7c8a61a2d005 100644
--- a/Documentation/ABI/stable/sysfs-firmware-opal-elog
+++ b/Documentation/ABI/stable/sysfs-firmware-opal-elog
@@ -38,6 +38,7 @@ Description:
For each log entry (directory), there are the following
files:
+ ============== ================================================
id: An ASCII representation of the ID of the
error log, in hex - e.g. "0x01".
@@ -58,3 +59,4 @@ Description:
entry will be removed from sysfs.
Reading this file will list the supported
operations (currently just acknowledge).
+ ============== ================================================
diff --git a/Documentation/ABI/stable/sysfs-hypervisor-xen b/Documentation/ABI/stable/sysfs-hypervisor-xen
index 3cf5cdfcd9a8..be9ca9981bb1 100644
--- a/Documentation/ABI/stable/sysfs-hypervisor-xen
+++ b/Documentation/ABI/stable/sysfs-hypervisor-xen
@@ -33,6 +33,8 @@ Description: If running under Xen:
Space separated list of supported guest system types. Each type
is in the format: <class>-<major>.<minor>-<arch>
With:
+
+ ======== ============================================
<class>: "xen" -- x86: paravirtualized, arm: standard
"hvm" -- x86 only: fully virtualized
<major>: major guest interface version
@@ -43,6 +45,7 @@ Description: If running under Xen:
"x86_64": 64 bit x86 guest
"armv7l": 32 bit arm guest
"aarch64": 64 bit arm guest
+ ======== ============================================
What: /sys/hypervisor/properties/changeset
Date: March 2009
@@ -117,3 +120,16 @@ Contact: xen-devel@lists.xenproject.org
Description: If running under Xen:
The Xen version is in the format <major>.<minor><extra>
This is the <minor> part of it.
+
+What: /sys/hypervisor/start_flags/*
+Date: March 2023
+KernelVersion: 6.3.0
+Contact: xen-devel@lists.xenproject.org
+Description: If running under Xen:
+ All bits in Xen's start-flags are represented as
+ boolean files, returning '1' if set, '0' otherwise.
+ This takes the place of the defunct /proc/xen/capabilities,
+ which would contain "control_d" on dom0, and be empty
+ otherwise. This flag is now exposed as "initdomain" in
+ addition to the "privileged" flag; all other possible flags
+ are accessible as "unknownXX".
diff --git a/Documentation/ABI/stable/sysfs-kernel-notes b/Documentation/ABI/stable/sysfs-kernel-notes
new file mode 100644
index 000000000000..2c76ee9e67f7
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-kernel-notes
@@ -0,0 +1,5 @@
+What: /sys/kernel/notes
+Date: July 2009
+Contact: <linux-kernel@vger.kernel.org>
+Description: The /sys/kernel/notes file contains the binary representation
+ of the running vmlinux's .notes section.
diff --git a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
new file mode 100644
index 000000000000..825508f42af6
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
@@ -0,0 +1,5 @@
+What: /sys/kernel/time/aux_clocks/<ID>/enable
+Date: May 2025
+Contact: Thomas Gleixner <tglx@linutronix.de>
+Description:
+ Controls the enablement of auxiliary clock timekeepers.
diff --git a/Documentation/ABI/stable/sysfs-module b/Documentation/ABI/stable/sysfs-module
index 6272ae5fb366..41b1f16e8795 100644
--- a/Documentation/ABI/stable/sysfs-module
+++ b/Documentation/ABI/stable/sysfs-module
@@ -1,8 +1,7 @@
-What: /sys/module
-Description:
- The /sys/module tree consists of the following structure:
+The /sys/module tree consists of the following structure:
- /sys/module/MODULENAME
+What: /sys/module/<MODULENAME>
+Description:
The name of the module that is in the kernel. This
module name will always show up if the module is loaded as a
dynamic module. If it is built directly into the kernel, it
@@ -12,7 +11,8 @@ Description:
Note: The conditions of creation in the built-in case are not
by design and may be removed in the future.
- /sys/module/MODULENAME/parameters
+What: /sys/module/<MODULENAME>/parameters
+Description:
This directory contains individual files that are each
individual parameters of the module that are able to be
changed at runtime. See the individual module
@@ -25,10 +25,23 @@ Description:
individual driver documentation for details as to the
stability of the different parameters.
- /sys/module/MODULENAME/refcnt
+What: /sys/module/<MODULENAME>/refcnt
+Description:
If the module is able to be unloaded from the kernel, this file
will contain the current reference count of the module.
Note: If the module is built into the kernel, or if the
CONFIG_MODULE_UNLOAD kernel configuration value is not enabled,
this file will not be present.
+
+What: /sys/module/<MODULENAME>/srcversion
+Date: Jun 2005
+Description:
+ If the module source has MODULE_VERSION, this file will contain
+ the checksum of the source code.
+
+What: /sys/module/<MODULENAME>/version
+Date: Jun 2005
+Description:
+ If the module source has MODULE_VERSION, this file will contain
+ the version of the source code.
diff --git a/Documentation/ABI/stable/sysfs-platform-wmi-bmof b/Documentation/ABI/stable/sysfs-platform-wmi-bmof
new file mode 100644
index 000000000000..2881244e3f09
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-platform-wmi-bmof
@@ -0,0 +1,7 @@
+What: /sys/bus/wmi/devices/05901221-D566-11D1-B2F0-00A0C9062910[-X]/bmof
+Date: Jun 2017
+KernelVersion: 4.13
+Description:
+ Binary MOF metadata used to describe the details of available ACPI WMI interfaces.
+
+ See Documentation/wmi/devices/wmi-bmof.rst for details.
diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso
index 55406ec8a35a..85dbb6a160df 100644
--- a/Documentation/ABI/stable/vdso
+++ b/Documentation/ABI/stable/vdso
@@ -1,11 +1,19 @@
+What: vDSO
+Date: July 2011
+KernelVersion: 3.0
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+
On some architectures, when the kernel loads any userspace program it
maps an ELF DSO into that program's address space. This DSO is called
the vDSO and it often contains useful and highly-optimized alternatives
to real syscalls.
-These functions are called just like ordinary C function according to
-your platform's ABI. Call them from a sensible context. (For example,
-if you set CS on x86 to something strange, the vDSO functions are
+These functions are called according to your platform's ABI. On many
+platforms they are called just like ordinary C function. On other platforms
+(ex: powerpc) they are called with the same convention as system calls which
+is different from ordinary C functions. Call them from a sensible context.
+(For example, if you set CS on x86 to something strange, the vDSO functions are
within their rights to crash.) In addition, if you pass a bad
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
@@ -23,6 +31,7 @@ Unless otherwise noted, the set of symbols with any given version and the
ABI of those symbols is considered stable. It may vary across architectures,
though.
-(As of this writing, this ABI documentation as been confirmed for x86_64.
+Note:
+ As of this writing, this ABI documentation as been confirmed for x86_64.
The maintainers of the other vDSO-using architectures should confirm
- that it is correct for their architecture.)
+ that it is correct for their architecture.
diff --git a/Documentation/ABI/testing/configfs-acpi b/Documentation/ABI/testing/configfs-acpi
index 4ab4e99aa863..c09b640c3cb1 100644
--- a/Documentation/ABI/testing/configfs-acpi
+++ b/Documentation/ABI/testing/configfs-acpi
@@ -14,7 +14,8 @@ Description:
This group contains the configuration for user defined ACPI
tables. The attributes of a user define table are:
- aml - a binary attribute that the user can use to
+ aml
+ - a binary attribute that the user can use to
fill in the ACPI aml definitions. Once the aml
data is written to this file and the file is
closed the table will be loaded and ACPI devices
@@ -26,11 +27,26 @@ Description:
The rest of the attributes are read-only and are valid only
after the table has been loaded by filling the aml entry:
- signature - ASCII table signature
- length - length of table in bytes, including the header
- revision - ACPI Specification minor version number
- oem_id - ASCII OEM identification
- oem_table_id - ASCII OEM table identification
- oem_revision - OEM revision number
- asl_compiler_id - ASCII ASL compiler vendor ID
- asl_compiler_revision - ASL compiler version
+ signature
+ - ASCII table signature
+
+ length
+ - length of table in bytes, including the header
+
+ revision
+ - ACPI Specification minor version number
+
+ oem_id
+ - ASCII OEM identification
+
+ oem_table_id
+ - ASCII OEM table identification
+
+ oem_revision
+ - OEM revision number
+
+ asl_compiler_id
+ - ASCII ASL compiler vendor ID
+
+ asl_compiler_revision
+ - ASL compiler version
diff --git a/Documentation/ABI/testing/configfs-iio b/Documentation/ABI/testing/configfs-iio
index aebda53ec0f7..1637fcb50f56 100644
--- a/Documentation/ABI/testing/configfs-iio
+++ b/Documentation/ABI/testing/configfs-iio
@@ -31,4 +31,4 @@ Date: April 2016
KernelVersion: 4.7
Description:
Dummy IIO devices directory. Creating a directory here will result
- in creating a dummy IIO device in the IIO subystem.
+ in creating a dummy IIO device in the IIO subsystem.
diff --git a/Documentation/ABI/testing/configfs-most b/Documentation/ABI/testing/configfs-most
new file mode 100644
index 000000000000..0a4b8649aa5a
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-most
@@ -0,0 +1,241 @@
+What: /sys/kernel/config/most_<component>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description: Interface is used to configure and connect device channels
+ to component drivers.
+
+ Attributes are visible only when configfs is mounted. To mount
+ configfs in /sys/kernel/config directory use:
+ # mount -t configfs none /sys/kernel/config/
+
+
+What: /sys/kernel/config/most_cdev/<link>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description:
+ The attributes:
+
+ buffer_size
+ configure the buffer size for this channel
+
+ subbuffer_size
+ configure the sub-buffer size for this channel
+ (needed for synchronous and isochronous data)
+
+
+ num_buffers
+ configure number of buffers used for this
+ channel
+
+ datatype
+ configure type of data that will travel over
+ this channel
+
+ direction
+ configure whether this link will be an input
+ or output
+
+ dbr_size
+ configure DBR data buffer size (this is used
+ for MediaLB communication only)
+
+ packets_per_xact
+ configure the number of packets that will be
+ collected from the network before being
+ transmitted via USB (this is used for USB
+ communication only)
+
+ device
+ name of the device the link is to be attached to
+
+ channel
+ name of the channel the link is to be attached to
+
+ comp_params
+ pass parameters needed by some components
+
+ create_link
+ write '1' to this attribute to trigger the
+ creation of the link. In case of speculative
+ configuration, the creation is post-poned until
+ a physical device is being attached to the bus.
+
+ destroy_link
+ write '1' to this attribute to destroy an
+ active link
+
+What: /sys/kernel/config/most_video/<link>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description:
+ The attributes:
+
+ buffer_size
+ configure the buffer size for this channel
+
+ subbuffer_size
+ configure the sub-buffer size for this channel
+ (needed for synchronous and isochronous data)
+
+
+ num_buffers
+ configure number of buffers used for this
+ channel
+
+ datatype
+ configure type of data that will travel over
+ this channel
+
+ direction
+ configure whether this link will be an input
+ or output
+
+ dbr_size
+ configure DBR data buffer size (this is used
+ for MediaLB communication only)
+
+ packets_per_xact
+ configure the number of packets that will be
+ collected from the network before being
+ transmitted via USB (this is used for USB
+ communication only)
+
+ device
+ name of the device the link is to be attached to
+
+ channel
+ name of the channel the link is to be attached to
+
+ comp_params
+ pass parameters needed by some components
+
+ create_link
+ write '1' to this attribute to trigger the
+ creation of the link. In case of speculative
+ configuration, the creation is post-poned until
+ a physical device is being attached to the bus.
+
+ destroy_link
+ write '1' to this attribute to destroy an
+ active link
+
+What: /sys/kernel/config/most_net/<link>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description:
+ The attributes:
+
+ buffer_size
+ configure the buffer size for this channel
+
+ subbuffer_size
+ configure the sub-buffer size for this channel
+ (needed for synchronous and isochronous data)
+
+
+ num_buffers
+ configure number of buffers used for this
+ channel
+
+ datatype
+ configure type of data that will travel over
+ this channel
+
+ direction
+ configure whether this link will be an input
+ or output
+
+ dbr_size
+ configure DBR data buffer size (this is used
+ for MediaLB communication only)
+
+ packets_per_xact
+ configure the number of packets that will be
+ collected from the network before being
+ transmitted via USB (this is used for USB
+ communication only)
+
+ device
+ name of the device the link is to be attached to
+
+ channel
+ name of the channel the link is to be attached to
+
+ comp_params
+ pass parameters needed by some components
+
+ create_link
+ write '1' to this attribute to trigger the
+ creation of the link. In case of speculative
+ configuration, the creation is post-poned until
+ a physical device is being attached to the bus.
+
+ destroy_link
+ write '1' to this attribute to destroy an
+ active link
+
+What: /sys/kernel/config/most_sound/<card>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description:
+ The attributes:
+
+ create_card
+ write '1' to this attribute to trigger the
+ registration of the sound card with the ALSA
+ subsystem.
+
+What: /sys/kernel/config/most_sound/<card>/<link>
+Date: March 8, 2019
+KernelVersion: 5.2
+Description:
+ The attributes:
+
+ buffer_size
+ configure the buffer size for this channel
+
+ subbuffer_size
+ configure the sub-buffer size for this channel
+ (needed for synchronous and isochronous data)
+
+
+ num_buffers
+ configure number of buffers used for this
+ channel
+
+ datatype
+ configure type of data that will travel over
+ this channel
+
+ direction
+ configure whether this link will be an input
+ or output
+
+ dbr_size
+ configure DBR data buffer size (this is used
+ for MediaLB communication only)
+
+ packets_per_xact
+ configure the number of packets that will be
+ collected from the network before being
+ transmitted via USB (this is used for USB
+ communication only)
+
+ device
+ name of the device the link is to be attached to
+
+ channel
+ name of the channel the link is to be attached to
+
+ comp_params
+ pass parameters needed by some components
+
+ create_link
+ write '1' to this attribute to trigger the
+ creation of the link. In case of speculative
+ configuration, the creation is post-poned until
+ a physical device is being attached to the bus.
+
+ destroy_link
+ write '1' to this attribute to destroy an
+ active link
diff --git a/Documentation/ABI/testing/configfs-spear-pcie-gadget b/Documentation/ABI/testing/configfs-spear-pcie-gadget
index 840c324ef34d..cf877bd341df 100644
--- a/Documentation/ABI/testing/configfs-spear-pcie-gadget
+++ b/Documentation/ABI/testing/configfs-spear-pcie-gadget
@@ -10,22 +10,24 @@ Description:
This interfaces can be used to show spear's PCIe device capability.
Nodes are only visible when configfs is mounted. To mount configfs
- in /config directory use:
- # mount -t configfs none /config/
+ in /config directory use::
- For nth PCIe Device Controller
- /config/pcie-gadget.n/
- link ... used to enable ltssm and read its status.
- int_type ...used to configure and read type of supported
- interrupt
- no_of_msi ... used to configure number of MSI vector needed and
+ # mount -t configfs none /config/
+
+ For nth PCIe Device Controller /config/pcie-gadget.n/:
+
+ =============== ======================================================
+ link used to enable ltssm and read its status.
+ int_type used to configure and read type of supported interrupt
+ no_of_msi used to configure number of MSI vector needed and
to read no of MSI granted.
- inta ... write 1 to assert INTA and 0 to de-assert.
- send_msi ... write MSI vector to be sent.
- vendor_id ... used to write and read vendor id (hex)
- device_id ... used to write and read device id (hex)
- bar0_size ... used to write and read bar0_size
- bar0_address ... used to write and read bar0 mapped area in hex.
- bar0_rw_offset ... used to write and read offset of bar0 where
- bar0_data will be written or read.
- bar0_data ... used to write and read data at bar0_rw_offset.
+ inta write 1 to assert INTA and 0 to de-assert.
+ send_msi write MSI vector to be sent.
+ vendor_id used to write and read vendor id (hex)
+ device_id used to write and read device id (hex)
+ bar0_size used to write and read bar0_size
+ bar0_address used to write and read bar0 mapped area in hex.
+ bar0_rw_offset used to write and read offset of bar0 where bar0_data
+ will be written or read.
+ bar0_data used to write and read data at bar0_rw_offset.
+ =============== ======================================================
diff --git a/Documentation/ABI/testing/configfs-tsm-report b/Documentation/ABI/testing/configfs-tsm-report
new file mode 100644
index 000000000000..534408bc1408
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-tsm-report
@@ -0,0 +1,145 @@
+What: /sys/kernel/config/tsm/report/$name/inblob
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Up to 64 bytes of user specified binary data. For replay
+ protection this should include a nonce, but the kernel does not
+ place any restrictions on the content.
+
+What: /sys/kernel/config/tsm/report/$name/outblob
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Binary attestation report generated from @inblob and other
+ options The format of the report is implementation specific
+ where the implementation is conveyed via the @provider
+ attribute.
+
+What: /sys/kernel/config/tsm/report/$name/auxblob
+Date: October, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Optional supplemental data that a TSM may emit, visibility
+ of this attribute depends on TSM, and may be empty if no
+ auxiliary data is available.
+
+ When @provider is "sev_guest" this file contains the
+ "cert_table" from SEV-ES Guest-Hypervisor Communication Block
+ Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ.
+ https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf
+
+What: /sys/kernel/config/tsm/report/$name/manifestblob
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Optional supplemental data that a TSM may emit, visibility
+ of this attribute depends on TSM, and may be empty if no
+ manifest data is available.
+
+ See 'service_provider' for information on the format of the
+ manifest blob.
+
+What: /sys/kernel/config/tsm/report/$name/provider
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) A name for the format-specification of @outblob like
+ "sev_guest" [1] or "tdx_guest" [2] in the near term, or a
+ common standard format in the future.
+
+ [1]: SEV Secure Nested Paging Firmware ABI Specification
+ Revision 1.55 Table 22
+ https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf
+
+ [2]: Intel® Trust Domain Extensions Data Center Attestation
+ Primitives : Quote Generation Library and Quote Verification
+ Library Revision 0.8 Appendix 4,5
+ https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf
+
+What: /sys/kernel/config/tsm/report/$name/generation
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) The value in this attribute increments each time @inblob or
+ any option is written. Userspace can detect conflicts by
+ checking generation before writing to any attribute and making
+ sure the number of writes matches expectations after reading
+ @outblob, or it can prevent conflicts by creating a report
+ instance per requesting context.
+
+What: /sys/kernel/config/tsm/report/$name/privlevel
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports for TVMs running at
+ different privilege levels, like SEV-SNP "VMPL", specify the
+ privilege level via this attribute. The minimum acceptable
+ value is conveyed via @privlevel_floor and the maximum
+ acceptable value is TSM_PRIVLEVEL_MAX (3).
+
+What: /sys/kernel/config/tsm/report/$name/privlevel_floor
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Indicates the minimum permissible value that can be written
+ to @privlevel.
+
+What: /sys/kernel/config/tsm/report/$name/service_provider
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Specifying the service provider via this attribute will create
+ an attestation report as specified by the service provider.
+ The only currently supported service provider is "svsm".
+
+ For the "svsm" service provider, see the Secure VM Service Module
+ for SEV-SNP Guests v1.00 Section 7. For the doc, search for
+ "site:amd.com "Secure VM Service Module for SEV-SNP
+ Guests", docID: 58019"
+
+What: /sys/kernel/config/tsm/report/$name/service_guid
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Specifying an empty/null GUID (00000000-0000-0000-0000-000000)
+ requests all active services within the service provider be
+ part of the attestation report. Specifying a GUID request
+ an attestation report of just the specified service using the
+ manifest form specified by the service_manifest_version
+ attribute.
+
+ See 'service_provider' for information on the format of the
+ service guid.
+
+What: /sys/kernel/config/tsm/report/$name/service_manifest_version
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Indicates the service manifest version requested for the
+ attestation report (default 0). If this field is not set by
+ the user, the default manifest version of the service (the
+ service's initial/first manifest version) is returned.
+
+ See 'service_provider' for information on the format of the
+ service manifest version.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
index 95a36589a66b..a8bb896def54 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -12,18 +12,24 @@ Description:
The attributes of a gadget:
- UDC - bind a gadget to UDC/unbind a gadget;
- write UDC's name found in /sys/class/udc/*
- to bind a gadget, empty string "" to unbind.
-
- bDeviceClass - USB device class code
- bDeviceSubClass - USB device subclass code
- bDeviceProtocol - USB device protocol code
- bMaxPacketSize0 - maximum endpoint 0 packet size
- bcdDevice - bcd device release number
- bcdUSB - bcd USB specification version number
- idProduct - product ID
- idVendor - vendor ID
+ ================ ============================================
+ UDC bind a gadget to UDC/unbind a gadget;
+ write UDC's name found in /sys/class/udc/*
+ to bind a gadget, empty string "" to unbind.
+
+ max_speed maximum speed the driver supports. Valid
+ names are super-speed-plus, super-speed,
+ high-speed, full-speed, and low-speed.
+
+ bDeviceClass USB device class code
+ bDeviceSubClass USB device subclass code
+ bDeviceProtocol USB device protocol code
+ bMaxPacketSize0 maximum endpoint 0 packet size
+ bcdDevice bcd device release number
+ bcdUSB bcd USB specification version number
+ idProduct product ID
+ idVendor vendor ID
+ ================ ============================================
What: /config/usb-gadget/gadget/configs
Date: Jun 2013
@@ -37,8 +43,10 @@ KernelVersion: 3.11
Description:
The attributes of a configuration:
- bmAttributes - configuration characteristics
- MaxPower - maximum power consumption from the bus
+ ================ ======================================
+ bmAttributes configuration characteristics
+ MaxPower maximum power consumption from the bus
+ ================ ======================================
What: /config/usb-gadget/gadget/configs/config/strings
Date: Jun 2013
@@ -53,7 +61,9 @@ KernelVersion: 3.11
Description:
The attributes:
- configuration - configuration description
+ ================ =========================
+ configuration configuration description
+ ================ =========================
What: /config/usb-gadget/gadget/functions
@@ -72,8 +82,10 @@ Description:
The attributes:
- compatible_id - 8-byte string for "Compatible ID"
- sub_compatible_id - 8-byte string for "Sub Compatible ID"
+ ================= =====================================
+ compatible_id 8-byte string for "Compatible ID"
+ sub_compatible_id 8-byte string for "Sub Compatible ID"
+ ================= =====================================
What: /config/usb-gadget/gadget/functions/<func>.<inst>/interface.<n>/<property>
Date: May 2014
@@ -85,16 +97,19 @@ Description:
The attributes:
- type - value 1..7 for interpreting the data
- 1: unicode string
- 2: unicode string with environment variable
- 3: binary
- 4: little-endian 32-bit
- 5: big-endian 32-bit
- 6: unicode string with a symbolic link
- 7: multiple unicode strings
- data - blob of data to be interpreted depending on
+ ===== ===============================================
+ type value 1..7 for interpreting the data
+
+ - 1: unicode string
+ - 2: unicode string with environment variable
+ - 3: binary
+ - 4: little-endian 32-bit
+ - 5: big-endian 32-bit
+ - 6: unicode string with a symbolic link
+ - 7: multiple unicode strings
+ data blob of data to be interpreted depending on
type
+ ===== ===============================================
What: /config/usb-gadget/gadget/strings
Date: Jun 2013
@@ -109,9 +124,11 @@ KernelVersion: 3.11
Description:
The attributes:
- serialnumber - gadget's serial number (string)
- product - gadget's product description
- manufacturer - gadget's manufacturer description
+ ============ =================================
+ serialnumber gadget's serial number (string)
+ product gadget's product description
+ manufacturer gadget's manufacturer description
+ ============ =================================
What: /config/usb-gadget/gadget/os_desc
Date: May 2014
@@ -119,8 +136,23 @@ KernelVersion: 3.16
Description:
This group contains "OS String" extension handling attributes.
- use - flag turning "OS Desctiptors" support on/off
- b_vendor_code - one-byte value used for custom per-device and
+ ============= ===============================================
+ use flag turning "OS Descriptors" support on/off
+ b_vendor_code one-byte value used for custom per-device and
per-interface requests
- qw_sign - an identifier to be reported as "OS String"
+ qw_sign an identifier to be reported as "OS String"
proper
+ ============= ===============================================
+
+What: /config/usb-gadget/gadget/webusb
+Date: Dec 2022
+KernelVersion: 6.3
+Description:
+ This group contains "WebUSB" extension handling attributes.
+
+ ============= ===============================================
+ use flag turning "WebUSB" support on/off
+ bcdVersion bcd WebUSB specification version number
+ bVendorCode one-byte value used for custom per-device
+ landingPage UTF-8 encoded URL of the device's landing page
+ ============= ===============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-acm b/Documentation/ABI/testing/configfs-usb-gadget-acm
index d21092d75a05..25e68be9eb66 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-acm
+++ b/Documentation/ABI/testing/configfs-usb-gadget-acm
@@ -6,3 +6,10 @@ Description:
This item contains just one readonly attribute: port_num.
It contains the port number of the /dev/ttyGS<n> device
associated with acm function's instance "name".
+
+What: /config/usb-gadget/gadget/functions/acm.name/protocol
+Date: Aug 2024
+KernelVersion: 6.13
+Description:
+ Reported bInterfaceProtocol for the ACM device. For legacy
+ reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ecm b/Documentation/ABI/testing/configfs-usb-gadget-ecm
index 0addf7704b4c..732101ca9d0b 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-ecm
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ecm
@@ -4,13 +4,17 @@ KernelVersion: 3.11
Description:
The attributes:
- ifname - network device interface name associated with
+ ifname
+ - network device interface name associated with
this function instance
- qmult - queue length multiplier for high and
+ qmult
+ - queue length multiplier for high and
super speed
- host_addr - MAC address of host's end of this
+ host_addr
+ - MAC address of host's end of this
Ethernet over USB link
- dev_addr - MAC address of device's end of this
+ dev_addr
+ - MAC address of device's end of this
Ethernet over USB link
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-eem b/Documentation/ABI/testing/configfs-usb-gadget-eem
index a4c57158fcde..178c3d5fb647 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-eem
+++ b/Documentation/ABI/testing/configfs-usb-gadget-eem
@@ -4,11 +4,13 @@ KernelVersion: 3.11
Description:
The attributes:
- ifname - network device interface name associated with
+ ========== =============================================
+ ifname network device interface name associated with
this function instance
- qmult - queue length multiplier for high and
+ qmult queue length multiplier for high and
super speed
- host_addr - MAC address of host's end of this
+ host_addr MAC address of host's end of this
Ethernet over USB link
- dev_addr - MAC address of device's end of this
+ dev_addr MAC address of device's end of this
Ethernet over USB link
+ ========== =============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ffs b/Documentation/ABI/testing/configfs-usb-gadget-ffs
index e39b27653c65..bf8936ff6d38 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-ffs
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ffs
@@ -4,6 +4,14 @@ KernelVersion: 3.13
Description: The purpose of this directory is to create and remove it.
A corresponding USB function instance is created/removed.
- There are no attributes here.
- All parameters are set through FunctionFS.
+ All attributes are read only:
+
+ ============= ============================================
+ ready 1 if the function is ready to be used, E.G.
+ if userspace has written descriptors and
+ strings to ep0, so the gadget can be
+ enabled - 0 otherwise.
+ ============= ============================================
+
+ All other parameters are set through FunctionFS.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-hid b/Documentation/ABI/testing/configfs-usb-gadget-hid
index f12e00e6baa3..748705c4cb58 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-hid
+++ b/Documentation/ABI/testing/configfs-usb-gadget-hid
@@ -4,8 +4,10 @@ KernelVersion: 3.19
Description:
The attributes:
- protocol - HID protocol to use
- report_desc - blob corresponding to HID report descriptors
+ ============= ============================================
+ protocol HID protocol to use
+ report_desc blob corresponding to HID report descriptors
except the data passed through /dev/hidg<N>
- report_length - HID report length
- subclass - HID device subclass to use
+ report_length HID report length
+ subclass HID device subclass to use
+ ============= ============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-loopback b/Documentation/ABI/testing/configfs-usb-gadget-loopback
index 06beefbcf061..e6c6ba5ac7ff 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-loopback
+++ b/Documentation/ABI/testing/configfs-usb-gadget-loopback
@@ -4,5 +4,7 @@ KernelVersion: 3.13
Description:
The attributes:
- qlen - depth of loopback queue
- buflen - buffer length
+ ======= =======================
+ qlen depth of loopback queue
+ buflen buffer length
+ ======= =======================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-mass-storage b/Documentation/ABI/testing/configfs-usb-gadget-mass-storage
index 9931fb0d63ba..fc0328069267 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-mass-storage
+++ b/Documentation/ABI/testing/configfs-usb-gadget-mass-storage
@@ -4,12 +4,14 @@ KernelVersion: 3.13
Description:
The attributes:
- stall - Set to permit function to halt bulk endpoints.
+ =========== ==============================================
+ stall Set to permit function to halt bulk endpoints.
Disabled on some USB devices known not to work
correctly. You should set it to true.
- num_buffers - Number of pipeline buffers. Valid numbers
+ num_buffers Number of pipeline buffers. Valid numbers
are 2..4. Available only if
CONFIG_USB_GADGET_DEBUG_FILES is set.
+ =========== ==============================================
What: /config/usb-gadget/gadget/functions/mass_storage.name/lun.name
Date: Oct 2013
@@ -17,15 +19,23 @@ KernelVersion: 3.13
Description:
The attributes:
- file - The path to the backing file for the LUN.
+ ============ ==============================================
+ file The path to the backing file for the LUN.
Required if LUN is not marked as removable.
- ro - Flag specifying access to the LUN shall be
+ ro Flag specifying access to the LUN shall be
read-only. This is implied if CD-ROM emulation
is enabled as well as when it was impossible
to open "filename" in R/W mode.
- removable - Flag specifying that LUN shall be indicated as
+ removable Flag specifying that LUN shall be indicated as
being removable.
- cdrom - Flag specifying that LUN shall be reported as
+ cdrom Flag specifying that LUN shall be reported as
being a CD-ROM.
- nofua - Flag specifying that FUA flag
+ nofua Flag specifying that FUA flag
in SCSI WRITE(10,12)
+ forced_eject This write-only file is useful only when
+ the function is active. It causes the backing
+ file to be forcibly detached from the LUN,
+ regardless of whether the host has allowed it.
+ Any non-zero number of bytes written will
+ result in ejection.
+ ============ ==============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-midi b/Documentation/ABI/testing/configfs-usb-gadget-midi
index 6b341df7249c..07389cddd51a 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-midi
+++ b/Documentation/ABI/testing/configfs-usb-gadget-midi
@@ -4,9 +4,11 @@ KernelVersion: 3.19
Description:
The attributes:
- index - index value for the USB MIDI adapter
- id - ID string for the USB MIDI adapter
- buflen - MIDI buffer length
- qlen - USB read request queue length
- in_ports - number of MIDI input ports
- out_ports - number of MIDI output ports
+ ========== ====================================
+ index index value for the USB MIDI adapter
+ id ID string for the USB MIDI adapter
+ buflen MIDI buffer length
+ qlen USB read request queue length
+ in_ports number of MIDI input ports
+ out_ports number of MIDI output ports
+ ========== ====================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-midi2 b/Documentation/ABI/testing/configfs-usb-gadget-midi2
new file mode 100644
index 000000000000..d76a52e2ca7f
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-midi2
@@ -0,0 +1,54 @@
+What: /config/usb-gadget/gadget/functions/midi2.name
+Date: Jul 2023
+KernelVersion: 6.6
+Description:
+ The attributes:
+
+ ============ ===============================================
+ process_ump Flag to process UMP Stream messages (0 or 1)
+ static_block Flag for static blocks (0 or 1)
+ iface_name MIDI interface name string
+ ============ ===============================================
+
+What: /config/usb-gadget/gadget/functions/midi2.name/ep.number
+Date: Jul 2023
+KernelVersion: 6.6
+Description:
+ This group contains a UMP Endpoint configuration.
+ A new Endpoint starts from 0, and can be up to 3.
+
+ The attributes:
+
+ ============= ===============================================
+ protocol_caps MIDI protocol capabilities (1, 2 or 3 for both)
+ protocol Default MIDI protocol (1 or 2)
+ ep_name UMP Endpoint name string
+ product_id Product ID string
+ manufacturer Manufacture ID (24 bit)
+ family Device family ID (16 bit)
+ model Device model ID (16 bit)
+ sw_revision Software Revision (32 bit)
+ ============= ===============================================
+
+What: /config/usb-gadget/gadget/functions/midi2.name/ep.number/block.number
+Date: Jul 2023
+KernelVersion: 6.6
+Description:
+ This group contains a UMP Function Block configuration.
+ A new block starts from 0, and can be up to 31.
+
+ The attributes:
+
+ ================= ==============================================
+ name Function Block name string
+ direction 1: input, 2: output, 3: bidirectional
+ first_group The first UMP Group number (0-15)
+ num_groups The number of groups in this FB (1-16)
+ midi1_first_group The first UMP Group number for MIDI 1.0 (0-15)
+ midi1_num_groups The number of groups for MIDI 1.0 (0-16)
+ ui_hint 0: unknown, 1: receiver, 2: sender, 3: both
+ midi_ci_version Supported MIDI-CI version number (8 bit)
+ is_midi1 Legacy MIDI 1.0 device (0, 1 or 2)
+ sysex8_streams Max number of SysEx8 streams (8 bit)
+ active Active FB flag (0 or 1)
+ ================= ==============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-printer b/Documentation/ABI/testing/configfs-usb-gadget-printer
index 6b0714e3c605..7aa731bac2da 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-printer
+++ b/Documentation/ABI/testing/configfs-usb-gadget-printer
@@ -4,6 +4,8 @@ KernelVersion: 4.1
Description:
The attributes:
- pnp_string - Data to be passed to the host in pnp string
- q_len - Number of requests per endpoint
+ ========== ===========================================
+ pnp_string Data to be passed to the host in pnp string
+ q_len Number of requests per endpoint
+ ========== ===========================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-rndis b/Documentation/ABI/testing/configfs-usb-gadget-rndis
index 137399095d74..9416eda7fe93 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-rndis
+++ b/Documentation/ABI/testing/configfs-usb-gadget-rndis
@@ -4,14 +4,16 @@ KernelVersion: 3.11
Description:
The attributes:
- ifname - network device interface name associated with
+ ========= =============================================
+ ifname network device interface name associated with
this function instance
- qmult - queue length multiplier for high and
+ qmult queue length multiplier for high and
super speed
- host_addr - MAC address of host's end of this
+ host_addr MAC address of host's end of this
Ethernet over USB link
- dev_addr - MAC address of device's end of this
+ dev_addr MAC address of device's end of this
Ethernet over USB link
- class - USB interface class, default is 02 (hex)
- subclass - USB interface subclass, default is 06 (hex)
- protocol - USB interface protocol, default is 00 (hex)
+ class USB interface class, default is 02 (hex)
+ subclass USB interface subclass, default is 06 (hex)
+ protocol USB interface protocol, default is 00 (hex)
+ ========= =============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink
index f56335af2d88..1f3d31b607b7 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink
+++ b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink
@@ -4,11 +4,13 @@ KernelVersion: 3.13
Description:
The attributes:
- pattern - 0 (all zeros), 1 (mod63), 2 (none)
- isoc_interval - 1..16
- isoc_maxpacket - 0 - 1023 (fs), 0 - 1024 (hs/ss)
- isoc_mult - 0..2 (hs/ss only)
- isoc_maxburst - 0..15 (ss only)
- buflen - buffer length
- bulk_qlen - depth of queue for bulk
- iso_qlen - depth of queue for iso
+ ============== ==================================
+ pattern 0 (all zeros), 1 (mod63), 2 (none)
+ isoc_interval 1..16
+ isoc_maxpacket 0 - 1023 (fs), 0 - 1024 (hs/ss)
+ isoc_mult 0..2 (hs/ss only)
+ isoc_maxburst 0..15 (ss only)
+ buflen buffer length
+ bulk_qlen depth of queue for bulk
+ iso_qlen depth of queue for iso
+ ============== ==================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-subset b/Documentation/ABI/testing/configfs-usb-gadget-subset
index 9373e2c51ea4..0061b864351f 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-subset
+++ b/Documentation/ABI/testing/configfs-usb-gadget-subset
@@ -4,11 +4,13 @@ KernelVersion: 3.11
Description:
The attributes:
- ifname - network device interface name associated with
+ ========== =============================================
+ ifname network device interface name associated with
this function instance
- qmult - queue length multiplier for high and
+ qmult queue length multiplier for high and
super speed
- host_addr - MAC address of host's end of this
+ host_addr MAC address of host's end of this
Ethernet over USB link
- dev_addr - MAC address of device's end of this
+ dev_addr MAC address of device's end of this
Ethernet over USB link
+ ========== =============================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac1 b/Documentation/ABI/testing/configfs-usb-gadget-uac1
index abfe447c848f..64188a85592b 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uac1
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac1
@@ -4,11 +4,38 @@ KernelVersion: 4.14
Description:
The attributes:
- c_chmask - capture channel mask
- c_srate - capture sampling rate
- c_ssize - capture sample size (bytes)
- p_chmask - playback channel mask
- p_srate - playback sampling rate
- p_ssize - playback sample size (bytes)
- req_number - the number of pre-allocated request
- for both capture and playback
+ ===================== =======================================
+ c_chmask capture channel mask
+ c_srate list of capture sampling rates (comma-separated)
+ c_ssize capture sample size (bytes)
+ c_mute_present capture mute control enable
+ c_volume_present capture volume control enable
+ c_volume_min capture volume control min value
+ (in 1/256 dB)
+ c_volume_max capture volume control max value
+ (in 1/256 dB)
+ c_volume_res capture volume control resolution
+ (in 1/256 dB)
+ p_chmask playback channel mask
+ p_srate list of playback sampling rates (comma-separated)
+ p_ssize playback sample size (bytes)
+ p_mute_present playback mute control enable
+ p_volume_present playback volume control enable
+ p_volume_min playback volume control min value
+ (in 1/256 dB)
+ p_volume_max playback volume control max value
+ (in 1/256 dB)
+ p_volume_res playback volume control resolution
+ (in 1/256 dB)
+ req_number the number of pre-allocated requests
+ for both capture and playback
+ function_name name of the interface
+ p_it_name playback input terminal name
+ p_it_ch_name playback channels name
+ p_ot_name playback output terminal name
+ p_fu_vol_name playback mute/volume functional unit name
+ c_it_name capture input terminal name
+ c_it_ch_name capture channels name
+ c_ot_name capture output terminal name
+ c_fu_vol_name capture mute/volume functional unit name
+ ===================== =======================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac2 b/Documentation/ABI/testing/configfs-usb-gadget-uac2
index 2bfdd4efa9bd..133e995c3e92 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uac2
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac2
@@ -4,9 +4,48 @@ KernelVersion: 3.18
Description:
The attributes:
- c_chmask - capture channel mask
- c_srate - capture sampling rate
- c_ssize - capture sample size (bytes)
- p_chmask - playback channel mask
- p_srate - playback sampling rate
- p_ssize - playback sample size (bytes)
+ ===================== =======================================
+ c_chmask capture channel mask
+ c_srate list of capture sampling rates (comma-separated)
+ c_ssize capture sample size (bytes)
+ c_hs_bint capture bInterval for HS/SS (1-4: fixed, 0: auto)
+ c_sync capture synchronization type
+ (async/adaptive)
+ c_mute_present capture mute control enable
+ c_volume_present capture volume control enable
+ c_volume_min capture volume control min value
+ (in 1/256 dB)
+ c_volume_max capture volume control max value
+ (in 1/256 dB)
+ c_volume_res capture volume control resolution
+ (in 1/256 dB)
+ fb_max maximum extra bandwidth in async mode
+ p_chmask playback channel mask
+ p_srate list of playback sampling rates (comma-separated)
+ p_ssize playback sample size (bytes)
+ p_hs_bint playback bInterval for HS/SS (1-4: fixed, 0: auto)
+ p_mute_present playback mute control enable
+ p_volume_present playback volume control enable
+ p_volume_min playback volume control min value
+ (in 1/256 dB)
+ p_volume_max playback volume control max value
+ (in 1/256 dB)
+ p_volume_res playback volume control resolution
+ (in 1/256 dB)
+ req_number the number of pre-allocated requests
+ for both capture and playback
+ function_name name of the interface
+ if_ctrl_name topology control name
+ clksrc_in_name input clock name
+ clksrc_out_name output clock name
+ p_it_name playback input terminal name
+ p_it_ch_name playback input first channel name
+ p_ot_name playback output terminal name
+ p_fu_vol_name playback mute/volume function unit name
+ c_it_name capture input terminal name
+ c_it_ch_name capture input first channel name
+ c_ot_name capture output terminal name
+ c_fu_vol_name capture mute/volume functional unit name
+ c_terminal_type code of the capture terminal type
+ p_terminal_type code of the playback terminal type
+ ===================== =======================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 809765bd9573..b6720768d63d 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -3,18 +3,26 @@ Date: Dec 2014
KernelVersion: 4.0
Description: UVC function directory
- streaming_maxburst - 0..15 (ss only)
- streaming_maxpacket - 1..1023 (fs), 1..3072 (hs/ss)
- streaming_interval - 1..16
+ =================== =============================
+ streaming_maxburst 0..15 (ss only)
+ streaming_maxpacket 1..1023 (fs), 1..3072 (hs/ss)
+ streaming_interval 1..16
+ function_name string [32]
+ =================== =============================
What: /config/usb-gadget/gadget/functions/uvc.name/control
Date: Dec 2014
KernelVersion: 4.0
Description: Control descriptors
- All attributes read only:
- bInterfaceNumber - USB interface number for this
- streaming interface
+ All attributes read only except enable_interrupt_ep:
+
+ =================== =============================
+ bInterfaceNumber USB interface number for this
+ streaming interface
+ enable_interrupt_ep flag to enable the interrupt
+ endpoint for the VC interface
+ =================== =============================
What: /config/usb-gadget/gadget/functions/uvc.name/control/class
Date: Dec 2014
@@ -46,14 +54,17 @@ Date: Dec 2014
KernelVersion: 4.0
Description: Default output terminal descriptors
- All attributes read only:
- iTerminal - index of string descriptor
- bSourceID - id of the terminal to which this terminal
+ All attributes read only except bSourceID:
+
+ ============== =============================================
+ iTerminal index of string descriptor
+ bSourceID id of the terminal to which this terminal
is connected
- bAssocTerminal - id of the input terminal to which this output
+ bAssocTerminal id of the input terminal to which this output
terminal is associated
- wTerminalType - terminal type
- bTerminalID - a non-zero id of this terminal
+ wTerminalType terminal type
+ bTerminalID a non-zero id of this terminal
+ ============== =============================================
What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera
Date: Dec 2014
@@ -65,17 +76,20 @@ Date: Dec 2014
KernelVersion: 4.0
Description: Default camera terminal descriptors
- All attributes read only:
- bmControls - bitmap specifying which controls are
- supported for the video stream
- wOcularFocalLength - the value of Locular
- wObjectiveFocalLengthMax- the value of Lmin
- wObjectiveFocalLengthMin- the value of Lmax
- iTerminal - index of string descriptor
- bAssocTerminal - id of the output terminal to which
- this terminal is connected
- wTerminalType - terminal type
- bTerminalID - a non-zero id of this terminal
+ All attributes read only except bmControls, which is read/write:
+
+ ======================== ====================================
+ bmControls bitmap specifying which controls are
+ supported for the video stream
+ wOcularFocalLength the value of Locular
+ wObjectiveFocalLengthMax the value of Lmin
+ wObjectiveFocalLengthMin the value of Lmax
+ iTerminal index of string descriptor
+ bAssocTerminal id of the output terminal to which
+ this terminal is connected
+ wTerminalType terminal type
+ bTerminalID a non-zero id of this terminal
+ ======================== ====================================
What: /config/usb-gadget/gadget/functions/uvc.name/control/processing
Date: Dec 2014
@@ -87,14 +101,45 @@ Date: Dec 2014
KernelVersion: 4.0
Description: Default processing unit descriptors
- All attributes read only:
- iProcessing - index of string descriptor
- bmControls - bitmap specifying which controls are
+ All attributes read only except bmControls, which is read/write:
+
+ =============== ========================================
+ iProcessing index of string descriptor
+ bmControls bitmap specifying which controls are
supported for the video stream
- wMaxMultiplier - maximum digital magnification x100
- bSourceID - id of the terminal to which this unit is
+ wMaxMultiplier maximum digital magnification x100
+ bSourceID id of the terminal to which this unit is
connected
- bUnitID - a non-zero id of this unit
+ bUnitID a non-zero id of this unit
+ =============== ========================================
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/extensions
+Date: Nov 2022
+KernelVersion: 6.1
+Description: Extension unit descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/extensions/name
+Date: Nov 2022
+KernelVersion: 6.1
+Description: Extension Unit (XU) Descriptor
+
+ bLength, bUnitID and iExtension are read-only. All others are
+ read-write.
+
+ ================= ========================================
+ bLength size of the descriptor in bytes
+ bUnitID non-zero ID of this unit
+ guidExtensionCode Vendor-specific code identifying the XU
+ bNumControls number of controls in this XU
+ bNrInPins number of input pins for this unit
+ baSourceID list of the IDs of the units or terminals
+ to which this XU is connected
+ bControlSize size of the bmControls field in bytes
+ bmControls list of bitmaps detailing which vendor
+ specific controls are supported
+ iExtension index of a string descriptor that describes
+ this extension unit
+ ================= ========================================
What: /config/usb-gadget/gadget/functions/uvc.name/control/header
Date: Dec 2014
@@ -114,8 +159,11 @@ KernelVersion: 4.0
Description: Streaming descriptors
All attributes read only:
- bInterfaceNumber - USB interface number for this
- streaming interface
+
+ ================ =============================
+ bInterfaceNumber USB interface number for this
+ streaming interface
+ ================ =============================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class
Date: Dec 2014
@@ -147,14 +195,34 @@ Date: Dec 2014
KernelVersion: 4.0
Description: Default color matching descriptors
- All attributes read only:
- bMatrixCoefficients - matrix used to compute luma and
- chroma values from the color primaries
- bTransferCharacteristics- optoelectronic transfer
- characteristic of the source picutre,
- also called the gamma function
- bColorPrimaries - color primaries and the reference
- white
+ All attributes read/write:
+
+ ======================== ======================================
+ bMatrixCoefficients matrix used to compute luma and
+ chroma values from the color primaries
+ bTransferCharacteristics optoelectronic transfer
+ characteristic of the source picture,
+ also called the gamma function
+ bColorPrimaries color primaries and the reference
+ white
+ ======================== ======================================
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching/name
+Date: Dec 2022
+KernelVersion: 6.3
+Description: Additional color matching descriptors
+
+ All attributes read/write:
+
+ ======================== ======================================
+ bMatrixCoefficients matrix used to compute luma and
+ chroma values from the color primaries
+ bTransferCharacteristics optoelectronic transfer
+ characteristic of the source picture,
+ also called the gamma function
+ bColorPrimaries color primaries and the reference
+ white
+ ======================== ======================================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg
Date: Dec 2014
@@ -168,47 +236,52 @@ Description: Specific MJPEG format descriptors
All attributes read only,
except bmaControls and bDefaultFrameIndex:
- bFormatIndex - unique id for this format descriptor;
+
+ =================== =====================================
+ bFormatIndex unique id for this format descriptor;
only defined after parent header is
linked into the streaming class;
read-only
- bmaControls - this format's data for bmaControls in
+ bmaControls this format's data for bmaControls in
the streaming header
- bmInterfaceFlags - specifies interlace information,
+ bmInterlaceFlags specifies interlace information,
read-only
- bAspectRatioY - the X dimension of the picture aspect
+ bAspectRatioY the X dimension of the picture aspect
ratio, read-only
- bAspectRatioX - the Y dimension of the picture aspect
+ bAspectRatioX the Y dimension of the picture aspect
ratio, read-only
- bmFlags - characteristics of this format,
+ bmFlags characteristics of this format,
read-only
- bDefaultFrameIndex - optimum frame index for this stream
+ bDefaultFrameIndex optimum frame index for this stream
+ =================== =====================================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name/name
Date: Dec 2014
KernelVersion: 4.0
Description: Specific MJPEG frame descriptors
- bFrameIndex - unique id for this framedescriptor;
- only defined after parent format is
- linked into the streaming header;
- read-only
- dwFrameInterval - indicates how frame interval can be
- programmed; a number of values
- separated by newline can be specified
- dwDefaultFrameInterval - the frame interval the device would
- like to use as default
- dwMaxVideoFrameBufferSize- the maximum number of bytes the
- compressor will produce for a video
- frame or still image
- dwMaxBitRate - the maximum bit rate at the shortest
- frame interval in bps
- dwMinBitRate - the minimum bit rate at the longest
- frame interval in bps
- wHeight - height of decoded bitmap frame in px
- wWidth - width of decoded bitmam frame in px
- bmCapabilities - still image support, fixed frame-rate
- support
+ ========================= =====================================
+ bFrameIndex unique id for this framedescriptor;
+ only defined after parent format is
+ linked into the streaming header;
+ read-only
+ dwFrameInterval indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval the frame interval the device would
+ like to use as default
+ dwMaxVideoFrameBufferSize the maximum number of bytes the
+ compressor will produce for a video
+ frame or still image
+ dwMaxBitRate the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate the minimum bit rate at the longest
+ frame interval in bps
+ wHeight height of decoded bitmap frame in px
+ wWidth width of decoded bitmam frame in px
+ bmCapabilities still image support, fixed frame-rate
+ support
+ ========================= =====================================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed
Date: Dec 2014
@@ -220,50 +293,118 @@ Date: Dec 2014
KernelVersion: 4.0
Description: Specific uncompressed format descriptors
- bFormatIndex - unique id for this format descriptor;
+ ================== =======================================
+ bFormatIndex unique id for this format descriptor;
only defined after parent header is
linked into the streaming class;
read-only
- bmaControls - this format's data for bmaControls in
+ bmaControls this format's data for bmaControls in
the streaming header
- bmInterfaceFlags - specifies interlace information,
+ bmInterlaceFlags specifies interlace information,
read-only
- bAspectRatioY - the X dimension of the picture aspect
+ bAspectRatioY the X dimension of the picture aspect
ratio, read-only
- bAspectRatioX - the Y dimension of the picture aspect
+ bAspectRatioX the Y dimension of the picture aspect
ratio, read-only
- bDefaultFrameIndex - optimum frame index for this stream
- bBitsPerPixel - number of bits per pixel used to
+ bDefaultFrameIndex optimum frame index for this stream
+ bBitsPerPixel number of bits per pixel used to
specify color in the decoded video
frame
- guidFormat - globally unique id used to identify
+ guidFormat globally unique id used to identify
stream-encoding format
+ ================== =======================================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name/name
Date: Dec 2014
KernelVersion: 4.0
Description: Specific uncompressed frame descriptors
- bFrameIndex - unique id for this framedescriptor;
- only defined after parent format is
- linked into the streaming header;
- read-only
- dwFrameInterval - indicates how frame interval can be
- programmed; a number of values
- separated by newline can be specified
- dwDefaultFrameInterval - the frame interval the device would
- like to use as default
- dwMaxVideoFrameBufferSize- the maximum number of bytes the
- compressor will produce for a video
- frame or still image
- dwMaxBitRate - the maximum bit rate at the shortest
- frame interval in bps
- dwMinBitRate - the minimum bit rate at the longest
- frame interval in bps
- wHeight - height of decoded bitmap frame in px
- wWidth - width of decoded bitmam frame in px
- bmCapabilities - still image support, fixed frame-rate
- support
+ ========================= =====================================
+ bFrameIndex unique id for this framedescriptor;
+ only defined after parent format is
+ linked into the streaming header;
+ read-only
+ dwFrameInterval indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval the frame interval the device would
+ like to use as default
+ dwMaxVideoFrameBufferSize the maximum number of bytes the
+ compressor will produce for a video
+ frame or still image
+ dwMaxBitRate the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate the minimum bit rate at the longest
+ frame interval in bps
+ wHeight height of decoded bitmap frame in px
+ wWidth width of decoded bitmam frame in px
+ bmCapabilities still image support, fixed frame-rate
+ support
+ ========================= =====================================
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Framebased format descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Specific framebased format descriptors
+
+ ================== =======================================
+ bFormatIndex unique id for this format descriptor;
+ only defined after parent header is
+ linked into the streaming class;
+ read-only
+ bmaControls this format's data for bmaControls in
+ the streaming header
+ bmInterlaceFlags specifies interlace information,
+ read-only
+ bAspectRatioY the X dimension of the picture aspect
+ ratio, read-only
+ bAspectRatioX the Y dimension of the picture aspect
+ ratio, read-only
+ bDefaultFrameIndex optimum frame index for this stream
+ bBitsPerPixel number of bits per pixel used to
+ specify color in the decoded video
+ frame
+ guidFormat globally unique id used to identify
+ stream-encoding format
+ ================== =======================================
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name/name
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Specific framebased frame descriptors
+
+ ========================= =====================================
+ bFrameIndex unique id for this framedescriptor;
+ only defined after parent format is
+ linked into the streaming header;
+ read-only
+ dwFrameInterval indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval the frame interval the device would
+ like to use as default
+ dwBytesPerLine Specifies the number of bytes per line
+ of video for packed fixed frame size
+ formats, allowing the receiver to
+ perform stride alignment of the video.
+ If the bVariableSize value (above) is
+ TRUE (1), or if the format does not
+ permit such alignment, this value shall
+ be set to zero (0).
+ dwMaxBitRate the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate the minimum bit rate at the longest
+ frame interval in bps
+ wHeight height of decoded bitmap frame in px
+ wWidth width of decoded bitmam frame in px
+ bmCapabilities still image support, fixed frame-rate
+ support
+ ========================= =====================================
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header
Date: Dec 2014
@@ -276,17 +417,20 @@ KernelVersion: 4.0
Description: Specific streaming header descriptors
All attributes read only:
- bTriggerUsage - how the host software will respond to
+
+ ==================== =====================================
+ bTriggerUsage how the host software will respond to
a hardware trigger interrupt event
- bTriggerSupport - flag specifying if hardware
+ bTriggerSupport flag specifying if hardware
triggering is supported
- bStillCaptureMethod - method of still image caputre
+ bStillCaptureMethod method of still image capture
supported
- bTerminalLink - id of the output terminal to which
+ bTerminalLink id of the output terminal to which
the video endpoint of this interface
is connected
- bmInfo - capabilities of this video streaming
+ bmInfo capabilities of this video streaming
interface
+ ==================== =====================================
What: /sys/class/udc/udc.name/device/gadget/video4linux/video.name/function_name
Date: May 2018
diff --git a/Documentation/ABI/testing/debugfs-alienware-wmi b/Documentation/ABI/testing/debugfs-alienware-wmi
new file mode 100644
index 000000000000..c7f525d6baac
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-alienware-wmi
@@ -0,0 +1,64 @@
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/system_description
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes the raw ``system_description`` number reported
+ by the WMAX device.
+
+ Only present on devices with the AWCC interface.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/hwmon_data
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes HWMON private data.
+
+ Includes fan sensor count, temperature sensor count, internal
+ fan IDs and internal temp IDs.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/pprof_data
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes Platform Profile private data.
+
+ Includes internal mapping to platform profiles and thermal
+ profile IDs.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/total_gpios
+Date: May 2025
+KernelVersion: 6.16
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ Total number of GPIO pins reported by the device.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/pinX
+Date: May 2025
+KernelVersion: 6.16
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file controls GPIO pin X status.
+
+ See Documentation/wmi/devices/alienware-wmi.rst for details.
+
+ RW
diff --git a/Documentation/ABI/testing/debugfs-amd-iommu b/Documentation/ABI/testing/debugfs-amd-iommu
new file mode 100644
index 000000000000..5621a66aa693
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-amd-iommu
@@ -0,0 +1,131 @@
+What: /sys/kernel/debug/iommu/amd/iommu<x>/mmio
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ MMIO register offset for iommu<x>, and the file outputs the corresponding
+ MMIO register value of iommu<x>
+
+ Example::
+
+ $ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio
+ $ cat /sys/kernel/debug/iommu/amd/iommu00/mmio
+
+ Output::
+
+ Offset:0x18 Value:0x000c22000003f48d
+
+What: /sys/kernel/debug/iommu/amd/iommu<x>/capability
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ capability register offset for iommu<x>, and the file outputs the
+ corresponding capability register value of iommu<x>.
+
+ Example::
+
+ $ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability
+ $ cat /sys/kernel/debug/iommu/amd/iommu00/capability
+
+ Output::
+
+ Offset:0x10 Value:0x00203040
+
+What: /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing iommu<x> command
+ buffer entries.
+
+ Examples::
+
+ $ cat /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
+
+ Output::
+
+ CMD Buffer Head Offset:339 Tail Offset:339
+ 0: 00835001 10000001 00003c00 00000000
+ 1: 00000000 30000005 fffff003 7fffffff
+ 2: 00835001 10000001 00003c01 00000000
+ 3: 00000000 30000005 fffff003 7fffffff
+ 4: 00835001 10000001 00003c02 00000000
+ 5: 00000000 30000005 fffff003 7fffffff
+ 6: 00835001 10000001 00003c03 00000000
+ 7: 00000000 30000005 fffff003 7fffffff
+ 8: 00835001 10000001 00003c04 00000000
+ 9: 00000000 30000005 fffff003 7fffffff
+ 10: 00835001 10000001 00003c05 00000000
+ 11: 00000000 30000005 fffff003 7fffffff
+ [...]
+
+What: /sys/kernel/debug/iommu/amd/devid
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ device ID, which can be used to dump IOMMU data structures such as the
+ interrupt remapping table and device table.
+
+ Example:
+
+ 1.
+ ::
+
+ $ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid
+ $ cat /sys/kernel/debug/iommu/amd/devid
+
+ Output::
+
+ 0000:01:00.0
+
+ 2.
+ ::
+
+ $ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid
+ $ cat /sys/kernel/debug/iommu/amd/devid
+
+ Output::
+
+ 0000:01:00.0
+
+What: /sys/kernel/debug/iommu/amd/devtbl
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing the device table entry
+ for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
+
+ Example::
+
+ $ cat /sys/kernel/debug/iommu/amd/devtbl
+
+ Output::
+
+ DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu
+ 0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3
+
+What: /sys/kernel/debug/iommu/amd/irqtbl
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing valid IRT table entries
+ for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
+
+ Example::
+
+ $ cat /sys/kernel/debug/iommu/amd/irqtbl
+
+ Output::
+
+ DeviceId 0000:01:00.0
+ IRT[0000] 0000000000000020 0000000000000241
+ IRT[0001] 0000000000000020 0000000000000841
+ IRT[0002] 0000000000000020 0000000000002041
+ IRT[0003] 0000000000000020 0000000000008041
+ IRT[0004] 0000000000000020 0000000000020041
+ IRT[0005] 0000000000000020 0000000000080041
+ IRT[0006] 0000000000000020 0000000000200041
+ IRT[0007] 0000000000000020 0000000000800041
+ [...]
diff --git a/Documentation/ABI/testing/debugfs-cec-error-inj b/Documentation/ABI/testing/debugfs-cec-error-inj
index 122b65c5fe62..c512f71bba8e 100644
--- a/Documentation/ABI/testing/debugfs-cec-error-inj
+++ b/Documentation/ABI/testing/debugfs-cec-error-inj
@@ -1,6 +1,6 @@
What: /sys/kernel/debug/cec/*/error-inj
Date: March 2018
-Contact: Hans Verkuil <hans.verkuil@cisco.com>
+Contact: Hans Verkuil <hverkuil@kernel.org>
Description:
The CEC Framework allows for CEC error injection commands through
@@ -23,7 +23,7 @@ error injections without having to know the details of the driver-specific
commands.
Note that the output of 'error-inj' shall be valid as input to 'error-inj'.
-So this must work:
+So this must work::
$ cat error-inj >einj.txt
$ cat einj.txt >error-inj
@@ -37,4 +37,4 @@ when changes are made.
The following CEC error injection implementations exist:
-- Documentation/media/uapi/cec/cec-pin-error-inj.rst
+- Documentation/userspace-api/media/cec/cec-pin-error-inj.rst
diff --git a/Documentation/ABI/testing/debugfs-cros-ec b/Documentation/ABI/testing/debugfs-cros-ec
new file mode 100644
index 000000000000..9a040c6f5e03
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-cros-ec
@@ -0,0 +1,78 @@
+What: /sys/kernel/debug/<cros-ec-device>/console_log
+Date: September 2017
+KernelVersion: 4.13
+Description:
+ If the EC supports the CONSOLE_READ command type, this file
+ can be used to grab the EC logs. The kernel polls for the log
+ and keeps its own buffer but userspace should grab this and
+ write it out to some logs.
+
+What: /sys/kernel/debug/<cros-ec-device>/panicinfo
+Date: September 2017
+KernelVersion: 4.13
+Description:
+ This file dumps the EC panic information from the previous
+ reboot. This file will only exist if the PANIC_INFO command
+ type is supported by the EC.
+
+What: /sys/kernel/debug/<cros-ec-device>/pdinfo
+Date: June 2018
+KernelVersion: 4.17
+Description:
+ This file provides the port role, muxes and power debug
+ information for all the USB PD/type-C ports available. If
+ the are no ports available, this file will be just an empty
+ file.
+
+What: /sys/kernel/debug/<cros-ec-device>/uptime
+Date: June 2019
+KernelVersion: 5.3
+Description:
+ A u32 providing the time since EC booted in ms. This is
+ is used for synchronizing the AP host time with the EC
+ log. An error is returned if the command is not supported
+ by the EC or there is a communication problem.
+
+What: /sys/kernel/debug/<cros-ec-device>/last_resume_result
+Date: June 2019
+KernelVersion: 5.3
+Description:
+ Some ECs have a feature where they will track transitions to
+ the (Intel) processor's SLP_S0 line, in order to detect cases
+ where a system failed to go into S0ix. When the system resumes,
+ an EC with this feature will return a summary of SLP_S0
+ transitions that occurred. The last_resume_result file returns
+ the most recent response from the AP's resume message to the EC.
+
+ The bottom 31 bits contain a count of the number of SLP_S0
+ transitions that occurred since the suspend message was
+ received. Bit 31 is set if the EC attempted to wake the
+ system due to a timeout when watching for SLP_S0 transitions.
+ Callers can use this to detect a wake from the EC due to
+ S0ix timeouts. The result will be zero if no suspend
+ transitions have been attempted, or the EC does not support
+ this feature.
+
+ Output will be in the format: "0x%08x\n".
+
+What: /sys/kernel/debug/<cros-ec-device>/suspend_timeout_ms
+Date: August 2022
+KernelVersion: 6.1
+Description:
+ Some ECs have a feature where they will track transitions of
+ a hardware-controlled sleep line, such as Intel's SLP_S0 line,
+ in order to detect cases where a system failed to go into deep
+ sleep states. The suspend_timeout_ms file controls the amount of
+ time in milliseconds the EC will wait before declaring a sleep
+ timeout event and attempting to wake the system.
+
+ Supply 0 to use the default value coded into EC firmware. Supply
+ 65535 (EC_HOST_SLEEP_TIMEOUT_INFINITE) to disable the EC sleep
+ failure detection mechanism. Values in between 0 and 65535
+ indicate the number of milliseconds the EC should wait after a
+ sleep transition before declaring a timeout. This includes both
+ the duration after a sleep command was received but before the
+ hardware line changed, as well as the duration between when the
+ hardware line changed and the kernel sent an EC resume command.
+
+ Output will be in the format: "%u\n".
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
new file mode 100644
index 000000000000..2989d4da96c1
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -0,0 +1,157 @@
+What: /sys/kernel/debug/cxl/memX/inject_poison
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Device Physical Address (DPA) is written to this
+ attribute, the memdev driver sends an inject poison command to
+ the device for the specified address. The DPA must be 64-byte
+ aligned and the length of the injected poison is 64-bytes. If
+ successful, the device returns poison when the address is
+ accessed through the CXL.mem bus. Injecting poison adds the
+ address to the device's Poison List and the error source is set
+ to Injected. In addition, the device adds a poison creation
+ event to its internal Informational Event log, updates the
+ Event Status register, and if configured, interrupts the host.
+ It is not an error to inject poison into an address that
+ already has poison present and no error is returned. If the
+ device returns 'Inject Poison Limit Reached' an -EBUSY error
+ is returned to the user. The inject_poison attribute is only
+ visible for devices supporting the capability.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+ poison injection can result in permanent data loss. Injected
+ poison may render data permanently inaccessible even after
+ clearing, as the clear operation writes zeros and does not
+ recover original data.
+
+ SYSTEM STABILITY RISK: For volatile memory, poison injection
+ can cause kernel crashes, system instability, or unpredictable
+ behavior if the poisoned addresses are accessed by running code
+ or critical kernel structures.
+
+What: /sys/kernel/debug/cxl/memX/clear_poison
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Device Physical Address (DPA) is written to this
+ attribute, the memdev driver sends a clear poison command to
+ the device for the specified address. Clearing poison removes
+ the address from the device's Poison List and writes 0 (zero)
+ for 64 bytes starting at address. It is not an error to clear
+ poison from an address that does not have poison set. If the
+ device cannot clear poison from the address, -ENXIO is returned.
+ The clear_poison attribute is only visible for devices
+ supporting the capability.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+ specified address range and removes the address from the poison
+ list. It does NOT recover or restore original data that may have
+ been present before poison injection. Any original data at the
+ cleared address is permanently lost and replaced with zeros.
+
+ CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+ purposes only and should not be used as a data repair tool.
+ Clearing poison is fundamentally different from data recovery
+ or error correction.
+
+What: /sys/kernel/debug/cxl/regionX/inject_poison
+Date: August, 2025
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Host Physical Address (HPA) is written to this
+ attribute, the region driver translates it to a Device
+ Physical Address (DPA) and identifies the corresponding
+ memdev. It then sends an inject poison command to that memdev
+ at the translated DPA. Refer to the memdev ABI entry at:
+ /sys/kernel/debug/cxl/memX/inject_poison for the detailed
+ behavior. This attribute is only visible if all memdevs
+ participating in the region support both inject and clear
+ poison commands.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+ poison injection can result in permanent data loss. Injected
+ poison may render data permanently inaccessible even after
+ clearing, as the clear operation writes zeros and does not
+ recover original data.
+
+ SYSTEM STABILITY RISK: For volatile memory, poison injection
+ can cause kernel crashes, system instability, or unpredictable
+ behavior if the poisoned addresses are accessed by running code
+ or critical kernel structures.
+
+What: /sys/kernel/debug/cxl/regionX/clear_poison
+Date: August, 2025
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Host Physical Address (HPA) is written to this
+ attribute, the region driver translates it to a Device
+ Physical Address (DPA) and identifies the corresponding
+ memdev. It then sends a clear poison command to that memdev
+ at the translated DPA. Refer to the memdev ABI entry at:
+ /sys/kernel/debug/cxl/memX/clear_poison for the detailed
+ behavior. This attribute is only visible if all memdevs
+ participating in the region support both inject and clear
+ poison commands.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+ specified address range and removes the address from the poison
+ list. It does NOT recover or restore original data that may have
+ been present before poison injection. Any original data at the
+ cleared address is permanently lost and replaced with zeros.
+
+ CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+ purposes only and should not be used as a data repair tool.
+ Clearing poison is fundamentally different from data recovery
+ or error correction.
+
+What: /sys/kernel/debug/cxl/einj_types
+Date: January, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Prints the CXL protocol error types made available by
+ the platform in the format:
+
+ 0x<error number> <error type>
+
+ The possible error types are (as of ACPI v6.5):
+
+ 0x1000 CXL.cache Protocol Correctable
+ 0x2000 CXL.cache Protocol Uncorrectable non-fatal
+ 0x4000 CXL.cache Protocol Uncorrectable fatal
+ 0x8000 CXL.mem Protocol Correctable
+ 0x10000 CXL.mem Protocol Uncorrectable non-fatal
+ 0x20000 CXL.mem Protocol Uncorrectable fatal
+
+ The <error number> can be written to einj_inject to inject
+ <error type> into a chosen dport.
+
+What: /sys/kernel/debug/cxl/$dport_dev/einj_inject
+Date: January, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) Writing an integer to this file injects the corresponding
+ CXL protocol error into $dport_dev ($dport_dev will be a device
+ name from /sys/bus/pci/devices). The integer to type mapping for
+ injection can be found by reading from einj_types. If the dport
+ was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+ a CXL 2.0 error is injected.
diff --git a/Documentation/ABI/testing/debugfs-dell-wmi-ddv b/Documentation/ABI/testing/debugfs-dell-wmi-ddv
new file mode 100644
index 000000000000..81cfc788be15
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-dell-wmi-ddv
@@ -0,0 +1,34 @@
+What: /sys/kernel/debug/dell-wmi-ddv-<wmi_device_name>/fan_sensor_information
+Date: September 2022
+KernelVersion: 6.1
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file contains the contents of the fan sensor information
+ buffer, which contains fan sensor entries and a terminating
+ character (0xFF).
+
+ Each fan sensor entry contains:
+
+ - fan type (single byte)
+ - fan speed in RPM (two bytes, little endian)
+
+ See Documentation/wmi/devices/dell-wmi-ddv.rst for details.
+
+What: /sys/kernel/debug/dell-wmi-ddv-<wmi_device_name>/thermal_sensor_information
+Date: September 2022
+KernelVersion: 6.1
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file contains the contents of the thermal sensor information
+ buffer, which contains thermal sensor entries and a terminating
+ character (0xFF).
+
+ Each thermal sensor entry contains:
+
+ - thermal type (single byte)
+ - current temperature (single byte)
+ - min. temperature (single byte)
+ - max. temperature (single byte)
+ - unknown field (single byte)
+
+ See Documentation/wmi/devices/dell-wmi-ddv.rst for details.
diff --git a/Documentation/ABI/testing/debugfs-driver-dcc b/Documentation/ABI/testing/debugfs-driver-dcc
new file mode 100644
index 000000000000..27ed5919d21b
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-dcc
@@ -0,0 +1,127 @@
+What: /sys/kernel/debug/dcc/.../ready
+Date: December 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ This file is used to check the status of the dcc
+ hardware if it's ready to receive user configurations.
+ A 'Y' here indicates dcc is ready.
+
+What: /sys/kernel/debug/dcc/.../trigger
+Date: December 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ This is the debugfs interface for manual software
+ triggers. The trigger can be invoked by writing '1'
+ to the file.
+
+What: /sys/kernel/debug/dcc/.../config_reset
+Date: December 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ This file is used to reset the configuration of
+ a dcc driver to the default configuration. When '1'
+ is written to the file, all the previous addresses
+ stored in the driver gets removed and users need to
+ reconfigure addresses again.
+
+What: /sys/kernel/debug/dcc/.../[list-number]/config
+Date: December 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ This stores the addresses of the registers which
+ can be read in case of a hardware crash or manual
+ software triggers. The input addresses type
+ can be one of following dcc instructions: read,
+ write, read-write, and loop type. The lists need to
+ be configured sequentially and not in a overlapping
+ manner; e.g. users can jump to list x only after
+ list y is configured and enabled. The input format for
+ each type is as follows:
+
+ i) Read instruction
+
+ ::
+
+ echo R <addr> <n> <bus> >/sys/kernel/debug/dcc/../[list-number]/config
+
+ where:
+
+ <addr>
+ The address to be read.
+
+ <n>
+ The addresses word count, starting from address <1>.
+ Each word is 32 bits (4 bytes). If omitted, defaulted
+ to 1.
+
+ <bus type>
+ The bus type, which can be either 'apb' or 'ahb'.
+ The default is 'ahb' if leaved out.
+
+ ii) Write instruction
+
+ ::
+
+ echo W <addr> <n> <bus type> > /sys/kernel/debug/dcc/../[list-number]/config
+
+ where:
+
+ <addr>
+ The address to be written.
+
+ <n>
+ The value to be written at <addr>.
+
+ <bus type>
+ The bus type, which can be either 'apb' or 'ahb'.
+
+ iii) Read-write instruction
+
+ ::
+
+ echo RW <addr> <n> <mask> > /sys/kernel/debug/dcc/../[list-number]/config
+
+ where:
+
+ <addr>
+ The address to be read and written.
+
+ <n>
+ The value to be written at <addr>.
+
+ <mask>
+ The value mask.
+
+ iv) Loop instruction
+
+ ::
+
+ echo L <loop count> <address count> <address>... > /sys/kernel/debug/dcc/../[list-number]/config
+
+ where:
+
+ <loop count>
+ Number of iterations
+
+ <address count>
+ total number of addresses to be written
+
+ <address>
+ Space-separated list of addresses.
+
+What: /sys/kernel/debug/dcc/.../[list-number]/enable
+Date: December 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ This debugfs interface is used for enabling the
+ the dcc hardware. A file named "enable" is in the
+ directory list number where users can enable/disable
+ the specific list by writing boolean (1 or 0) to the
+ file.
+
+ On enabling the dcc, all the addresses specified
+ by the user for the corresponding list is written
+ into dcc sram which is read by the dcc hardware
+ on manual or crash induced triggers. Lists must
+ be configured and enabled sequentially, e.g. list
+ 2 can only be enabled when list 1 have so.
diff --git a/Documentation/ABI/testing/debugfs-driver-genwqe b/Documentation/ABI/testing/debugfs-driver-genwqe
index 1c2f25674e8c..b45b016545d8 100644
--- a/Documentation/ABI/testing/debugfs-driver-genwqe
+++ b/Documentation/ABI/testing/debugfs-driver-genwqe
@@ -31,7 +31,7 @@ What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_regs
Date: Oct 2013
Contact: haver@linux.vnet.ibm.com
Description: Dump of the error registers before the last reset of
- the card occured.
+ the card occurred.
Only available for PF.
What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid0
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
new file mode 100644
index 000000000000..3318a14f35b9
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -0,0 +1,323 @@
+What: /sys/kernel/debug/accel/<parent_device>/addr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets the device address to be used for read or write through
+ PCI bar, or the device VA of a host mapped memory to be read or
+ written directly from the host. The latter option is allowed
+ only when the IOMMU is disabled.
+ The acceptable value is a string that starts with "0x"
+
+What: /sys/kernel/debug/accel/<parent_device>/clk_gate
+Date: May 2020
+KernelVersion: 5.8
+Contact: ogabbay@kernel.org
+Description: This setting is now deprecated as clock gating is handled solely by the f/w
+
+What: /sys/kernel/debug/accel/<parent_device>/command_buffers
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays a list with information about the currently allocated
+ command buffers
+
+What: /sys/kernel/debug/accel/<parent_device>/command_submission
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays a list with information about the currently active
+ command submissions
+
+What: /sys/kernel/debug/accel/<parent_device>/command_submission_jobs
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays a list with detailed information about each JOB (CB) of
+ each active command submission
+
+What: /sys/kernel/debug/accel/<parent_device>/data32
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the root user to read or write directly through the
+ device's PCI bar. Writing to this file generates a write
+ transaction while reading from the file generates a read
+ transaction. This custom interface is needed (instead of using
+ the generic Linux user-space PCI mapping) because the DDR bar
+ is very small compared to the DDR memory and only the driver can
+ move the bar before and after the transaction.
+
+ If the IOMMU is disabled, it also allows the root user to read
+ or write from the host a device VA of a host mapped memory
+
+What: /sys/kernel/debug/accel/<parent_device>/data64
+Date: Jan 2020
+KernelVersion: 5.6
+Contact: ogabbay@kernel.org
+Description: Allows the root user to read or write 64 bit data directly
+ through the device's PCI bar. Writing to this file generates a
+ write transaction while reading from the file generates a read
+ transaction. This custom interface is needed (instead of using
+ the generic Linux user-space PCI mapping) because the DDR bar
+ is very small compared to the DDR memory and only the driver can
+ move the bar before and after the transaction.
+
+ If the IOMMU is disabled, it also allows the root user to read
+ or write from the host a device VA of a host mapped memory
+
+What: /sys/kernel/debug/accel/<parent_device>/data_dma
+Date: Apr 2021
+KernelVersion: 5.13
+Contact: ogabbay@kernel.org
+Description: Allows the root user to read from the device's internal
+ memory (DRAM/SRAM) through a DMA engine.
+ This property is a binary blob that contains the result of the
+ DMA transfer.
+ This custom interface is needed (instead of using the generic
+ Linux user-space PCI mapping) because the amount of internal
+ memory is huge (>32GB) and reading it via the PCI bar will take
+ a very long time.
+ This interface doesn't support concurrency in the same device.
+ In GAUDI and GOYA, this action can cause undefined behavior
+ in case it is done while the device is executing user
+ workloads.
+ Only supported on GAUDI at this stage.
+
+What: /sys/kernel/debug/accel/<parent_device>/device
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Enables the root user to set the device to specific state.
+ Valid values are "disable", "enable", "suspend", "resume".
+ User can read this property to see the valid values
+
+What: /sys/kernel/debug/accel/<parent_device>/device_release_watchdog_timeout
+Date: Oct 2022
+KernelVersion: 6.2
+Contact: ttayar@habana.ai
+Description: The watchdog timeout value in seconds for a device release upon
+ certain error cases, after which the device is reset.
+
+What: /sys/kernel/debug/accel/<parent_device>/dma_size
+Date: Apr 2021
+KernelVersion: 5.13
+Contact: ogabbay@kernel.org
+Description: Specify the size of the DMA transaction when using DMA to read
+ from the device's internal memory. The value can not be larger
+ than 128MB. Writing to this value initiates the DMA transfer.
+ When the write is finished, the user can read the "data_dma"
+ blob
+
+What: /sys/kernel/debug/accel/<parent_device>/dump_razwi_events
+Date: Aug 2022
+KernelVersion: 5.20
+Contact: fkassabri@habana.ai
+Description: Dumps all razwi events to dmesg if exist.
+ After reading the status register of an existing event
+ the routine will clear the status register.
+ Usage: cat dump_razwi_events
+
+What: /sys/kernel/debug/accel/<parent_device>/dump_security_violations
+Date: Jan 2021
+KernelVersion: 5.12
+Contact: ogabbay@kernel.org
+Description: Dumps all security violations to dmesg. This will also ack
+ all security violations meanings those violations will not be
+ dumped next time user calls this API
+
+What: /sys/kernel/debug/accel/<parent_device>/engines
+Date: Jul 2019
+KernelVersion: 5.3
+Contact: ogabbay@kernel.org
+Description: Displays the status registers values of the device engines and
+ their derived idle status
+
+What: /sys/kernel/debug/accel/<parent_device>/i2c_addr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets I2C device address for I2C transaction that is generated
+ by the device's CPU, Not available when device is loaded with secured
+ firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/i2c_bus
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets I2C bus address for I2C transaction that is generated by
+ the device's CPU, Not available when device is loaded with secured
+ firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/i2c_data
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Triggers an I2C transaction that is generated by the device's
+ CPU. Writing to this file generates a write transaction while
+ reading from the file generates a read transaction, Not available
+ when device is loaded with secured firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/i2c_len
+Date: Dec 2021
+KernelVersion: 5.17
+Contact: obitton@habana.ai
+Description: Sets I2C length in bytes for I2C transaction that is generated by
+ the device's CPU, Not available when device is loaded with secured
+ firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/i2c_reg
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets I2C register id for I2C transaction that is generated by
+ the device's CPU, Not available when device is loaded with secured
+ firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/led0
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets the state of the first S/W led on the device, Not available
+ when device is loaded with secured firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/led1
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets the state of the second S/W led on the device, Not available
+ when device is loaded with secured firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/led2
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets the state of the third S/W led on the device, Not available
+ when device is loaded with secured firmware
+
+What: /sys/kernel/debug/accel/<parent_device>/memory_scrub
+Date: May 2022
+KernelVersion: 5.19
+Contact: dhirschfeld@habana.ai
+Description: Allows the root user to scrub the dram memory. The scrubbing
+ value can be set using the debugfs file memory_scrub_val.
+
+What: /sys/kernel/debug/accel/<parent_device>/memory_scrub_val
+Date: May 2022
+KernelVersion: 5.19
+Contact: dhirschfeld@habana.ai
+Description: The value to which the dram will be set to when the user
+ scrubs the dram using 'memory_scrub' debugfs file and
+ the scrubbing value when using module param 'memory_scrub'
+
+What: /sys/kernel/debug/accel/<parent_device>/mmu
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the hop values and physical address for a given ASID
+ and virtual address. The user should write the ASID and VA into
+ the file and then read the file to get the result.
+ e.g. to display info about VA 0x1000 for ASID 1 you need to do:
+ echo "1 0x1000" > /sys/kernel/debug/accel/<parent_device>/mmu
+
+What: /sys/kernel/debug/accel/<parent_device>/mmu_error
+Date: Mar 2021
+KernelVersion: 5.12
+Contact: fkassabri@habana.ai
+Description: Check and display page fault or access violation mmu errors for
+ all MMUs specified in mmu_cap_mask.
+ e.g. to display error info for MMU hw cap bit 9, you need to do:
+ echo "0x200" > /sys/kernel/debug/accel/<parent_device>/mmu_error
+ cat /sys/kernel/debug/accel/<parent_device>/mmu_error
+
+What: /sys/kernel/debug/accel/<parent_device>/monitor_dump
+Date: Mar 2022
+KernelVersion: 5.19
+Contact: osharabi@habana.ai
+Description: Allows the root user to dump monitors status from the device's
+ protected config space.
+ This property is a binary blob that contains the result of the
+ monitors registers dump.
+ This custom interface is needed (instead of using the generic
+ Linux user-space PCI mapping) because this space is protected
+ and cannot be accessed using PCI read.
+ This interface doesn't support concurrency in the same device.
+ Only supported on GAUDI.
+
+What: /sys/kernel/debug/accel/<parent_device>/monitor_dump_trig
+Date: Mar 2022
+KernelVersion: 5.19
+Contact: osharabi@habana.ai
+Description: Triggers dump of monitor data. The value to trigger the operation
+ must be 1. Triggering the monitor dump operation initiates dump of
+ current registers values of all monitors.
+ When the write is finished, the user can read the "monitor_dump"
+ blob
+
+What: /sys/kernel/debug/accel/<parent_device>/server_type
+Date: Feb 2024
+KernelVersion: 6.11
+Contact: trisin@habana.ai
+Description: Exposes the device's server type, maps to enum hl_server_type.
+
+What: /sys/kernel/debug/accel/<parent_device>/set_power_state
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
+ for D3Hot
+
+What: /sys/kernel/debug/accel/<parent_device>/skip_reset_on_timeout
+Date: Jun 2021
+KernelVersion: 5.13
+Contact: ynudelman@habana.ai
+Description: Sets the skip reset on timeout option for the device. Value of
+ "0" means device will be reset in case some CS has timed out,
+ otherwise it will not be reset.
+
+What: /sys/kernel/debug/accel/<parent_device>/state_dump
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: ynudelman@habana.ai
+Description: Gets the state dump occurring on a CS timeout or failure.
+ State dump is used for debug and is created each time in case of
+ a problem in a CS execution, before reset.
+ Reading from the node returns the newest state dump available.
+ Writing an integer X discards X state dumps, so that the
+ next read would return X+1-st newest state dump.
+
+What: /sys/kernel/debug/accel/<parent_device>/stop_on_err
+Date: Mar 2020
+KernelVersion: 5.6
+Contact: ogabbay@kernel.org
+Description: Sets the stop-on_error option for the device engines. Value of
+ "0" is for disable, otherwise enable.
+ Relevant only for GOYA and GAUDI.
+
+What: /sys/kernel/debug/accel/<parent_device>/timeout_locked
+Date: Sep 2021
+KernelVersion: 5.16
+Contact: obitton@habana.ai
+Description: Sets the command submission timeout value in seconds.
+
+What: /sys/kernel/debug/accel/<parent_device>/userptr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays a list with information about the current user
+ pointers (user virtual addresses) that are pinned and mapped
+ to DMA addresses
+
+What: /sys/kernel/debug/accel/<parent_device>/userptr_lookup
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: ogabbay@kernel.org
+Description: Allows to search for specific user pointers (user virtual
+ addresses) that are pinned and mapped to DMA addresses, and see
+ their resolution to the specific dma address.
+
+What: /sys/kernel/debug/accel/<parent_device>/vm
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays a list with information about all the active virtual
+ address mappings per ASID and all user mappings of HW blocks
diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat
new file mode 100644
index 000000000000..3f1efbbad6ca
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-qat
@@ -0,0 +1,109 @@
+What: /sys/kernel/debug/qat_<device>_<BDF>/fw_counters
+Date: November 2023
+KernelVersion: 6.6
+Contact: qat-linux@intel.com
+Description: (RO) Read returns the number of requests sent to the FW and the number of responses
+ received from the FW for each Acceleration Engine
+ Reported firmware counters::
+
+ <N>: Number of requests sent from Acceleration Engine N to FW and responses
+ Acceleration Engine N received from FW
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/config
+Date: November 2023
+KernelVersion: 6.6
+Contact: qat-linux@intel.com
+Description: (RW) Read returns value of the Heartbeat update period.
+ Write to the file changes this period value.
+
+ This period should reflect planned polling interval of device
+ health status. High frequency Heartbeat monitoring wastes CPU cycles
+ but minimizes the customer’s system downtime. Also, if there are
+ large service requests that take some time to complete, high frequency
+ Heartbeat monitoring could result in false reports of unresponsiveness
+ and in those cases, period needs to be increased.
+
+ This parameter is effective only for c3xxx, c62x, dh895xcc devices.
+ 4xxx has this value internally fixed to 200ms.
+
+ Default value is set to 500. Minimal allowed value is 200.
+ All values are expressed in milliseconds.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/queries_failed
+Date: November 2023
+KernelVersion: 6.6
+Contact: qat-linux@intel.com
+Description: (RO) Read returns the number of times the device became unresponsive.
+
+ Attribute returns value of the counter which is incremented when
+ status query results negative.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/queries_sent
+Date: November 2023
+KernelVersion: 6.6
+Contact: qat-linux@intel.com
+Description: (RO) Read returns the number of times the control process checked
+ if the device is responsive.
+
+ Attribute returns value of the counter which is incremented on
+ every status query.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/status
+Date: November 2023
+KernelVersion: 6.6
+Contact: qat-linux@intel.com
+Description: (RO) Read returns the device health status.
+
+ Returns 0 when device is healthy or -1 when is unresponsive
+ or the query failed to send.
+
+ The driver does not monitor for Heartbeat. It is left for a user
+ to poll the status periodically.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/pm_status
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (RO) Read returns power management information specific to the
+ QAT device.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/cnv_errors
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (RO) Read returns, for each Acceleration Engine (AE), the number
+ of errors and the type of the last error detected by the device
+ when performing verified compression.
+ Reported counters::
+
+ <N>: Number of Compress and Verify (CnV) errors and type
+ of the last CnV error detected by Acceleration
+ Engine N.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/inject_error
+Date: March 2024
+KernelVersion: 6.8
+Contact: qat-linux@intel.com
+Description: (WO) Write to inject an error that simulates an heartbeat
+ failure. This is to be used for testing purposes.
+
+ After writing this file, the driver stops arbitration on a
+ random engine and disables the fetching of heartbeat counters.
+ If a workload is running on the device, a job submitted to the
+ accelerator might not get a response and a read of the
+ `heartbeat/status` attribute might report -1, i.e. device
+ unresponsive.
+ The error is unrecoverable thus the device must be restarted to
+ restore its functionality.
+
+ This attribute is available only when the kernel is built with
+ CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION=y.
+
+ A write of 1 enables error injection.
+
+ The following example shows how to enable error injection::
+
+ # cd /sys/kernel/debug/qat_<device>_<BDF>
+ # echo 1 > heartbeat/inject_error
diff --git a/Documentation/ABI/testing/debugfs-driver-qat_telemetry b/Documentation/ABI/testing/debugfs-driver-qat_telemetry
new file mode 100644
index 000000000000..06097ee0f154
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-qat_telemetry
@@ -0,0 +1,259 @@
+What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/control
+Date: March 2024
+KernelVersion: 6.8
+Contact: qat-linux@intel.com
+Description: (RW) Enables/disables the reporting of telemetry metrics.
+
+ Allowed values to write:
+ ========================
+ * 0: disable telemetry
+ * 1: enable telemetry
+ * 2, 3, 4: enable telemetry and calculate minimum, maximum
+ and average for each counter over 2, 3 or 4 samples
+
+ Returned values:
+ ================
+ * 1-4: telemetry is enabled and running
+ * 0: telemetry is disabled
+
+ Example.
+
+ Writing '3' to this file starts the collection of
+ telemetry metrics. Samples are collected every second and
+ stored in a circular buffer of size 3. These values are then
+ used to calculate the minimum, maximum and average for each
+ counter. After enabling, counters can be retrieved through
+ the ``device_data`` file::
+
+ echo 3 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/control
+
+ Writing '0' to this file stops the collection of telemetry
+ metrics::
+
+ echo 0 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/control
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/device_data
+Date: March 2024
+KernelVersion: 6.8
+Contact: qat-linux@intel.com
+Description: (RO) Reports device telemetry counters.
+ Reads report metrics about performance and utilization of
+ a QAT device:
+
+ ======================= ========================================
+ Field Description
+ ======================= ========================================
+ sample_cnt number of acquisitions of telemetry data
+ from the device. Reads are performed
+ every 1000 ms.
+ pci_trans_cnt number of PCIe partial transactions
+ max_rd_lat maximum logged read latency [ns] (could
+ be any read operation)
+ rd_lat_acc_avg average read latency [ns]
+ max_gp_lat max get to put latency [ns] (only takes
+ samples for AE0)
+ gp_lat_acc_avg average get to put latency [ns]
+ bw_in PCIe, write bandwidth [Mbps]
+ bw_out PCIe, read bandwidth [Mbps]
+ re_acc_avg average ring empty time [ns]
+ at_page_req_lat_avg Address Translator(AT), average page
+ request latency [ns]
+ at_trans_lat_avg AT, average page translation latency [ns]
+ at_max_tlb_used AT, maximum uTLB used
+ util_cpr<N> utilization of Compression slice N [%]
+ exec_cpr<N> execution count of Compression slice N
+ util_xlt<N> utilization of Translator slice N [%]
+ exec_xlt<N> execution count of Translator slice N
+ util_dcpr<N> utilization of Decompression slice N [%]
+ exec_dcpr<N> execution count of Decompression slice N
+ util_cnv<N> utilization of Compression and verify slice N [%]
+ exec_cnv<N> execution count of Compression and verify slice N
+ util_dcprz<N> utilization of Decompression slice N [%]
+ exec_dcprz<N> execution count of Decompression slice N
+ util_pke<N> utilization of PKE N [%]
+ exec_pke<N> execution count of PKE N
+ util_ucs<N> utilization of UCS slice N [%]
+ exec_ucs<N> execution count of UCS slice N
+ util_wat<N> utilization of Wireless Authentication
+ slice N [%]
+ exec_wat<N> execution count of Wireless Authentication
+ slice N
+ util_wcp<N> utilization of Wireless Cipher slice N [%]
+ exec_wcp<N> execution count of Wireless Cipher slice N
+ util_cph<N> utilization of Cipher slice N [%]
+ exec_cph<N> execution count of Cipher slice N
+ util_ath<N> utilization of Authentication slice N [%]
+ exec_ath<N> execution count of Authentication slice N
+ cmdq_wait_cnv<N> wait time for cmdq N to get Compression and verify
+ slice ownership
+ cmdq_exec_cnv<N> Compression and verify slice execution time while
+ owned by cmdq N
+ cmdq_drain_cnv<N> time taken for cmdq N to release Compression and
+ verify slice ownership
+ cmdq_wait_dcprz<N> wait time for cmdq N to get Decompression
+ slice N ownership
+ cmdq_exec_dcprz<N> Decompression slice execution time while
+ owned by cmdq N
+ cmdq_drain_dcprz<N> time taken for cmdq N to release Decompression
+ slice ownership
+ cmdq_wait_pke<N> wait time for cmdq N to get PKE slice ownership
+ cmdq_exec_pke<N> PKE slice execution time while owned by cmdq N
+ cmdq_drain_pke<N> time taken for cmdq N to release PKE slice
+ ownership
+ cmdq_wait_ucs<N> wait time for cmdq N to get UCS slice ownership
+ cmdq_exec_ucs<N> UCS slice execution time while owned by cmdq N
+ cmdq_drain_ucs<N> time taken for cmdq N to release UCS slice
+ ownership
+ cmdq_wait_ath<N> wait time for cmdq N to get Authentication slice
+ ownership
+ cmdq_exec_ath<N> Authentication slice execution time while owned
+ by cmdq N
+ cmdq_drain_ath<N> time taken for cmdq N to release Authentication
+ slice ownership
+ ======================= ========================================
+
+ The telemetry report file can be read with the following command::
+
+ cat /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/device_data
+
+ If ``control`` is set to 1, only the current values of the
+ counters are displayed::
+
+ <counter_name> <current>
+
+ If ``control`` is 2, 3 or 4, counters are displayed in the
+ following format::
+
+ <counter_name> <current> <min> <max> <avg>
+
+ If a device lacks of a specific accelerator, the corresponding
+ attribute is not reported.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/rp_<A/B/C/D>_data
+Date: March 2024
+KernelVersion: 6.8
+Contact: qat-linux@intel.com
+Description: (RW) Selects up to 4 Ring Pairs (RP) to monitor, one per file,
+ and report telemetry counters related to each.
+
+ Allowed values to write:
+ ========================
+ * 0 to ``<num_rps - 1>``:
+ Ring pair to be monitored. The value of ``num_rps`` can be
+ retrieved through ``/sys/bus/pci/devices/<BDF>/qat/num_rps``.
+ See Documentation/ABI/testing/sysfs-driver-qat.
+
+ Reads report metrics about performance and utilization of
+ the selected RP:
+
+ ======================= ========================================
+ Field Description
+ ======================= ========================================
+ sample_cnt number of acquisitions of telemetry data
+ from the device. Reads are performed
+ every 1000 ms
+ rp_num RP number associated with slot <A/B/C/D>
+ service_type service associated to the RP
+ pci_trans_cnt number of PCIe partial transactions
+ gp_lat_acc_avg average get to put latency [ns]
+ bw_in PCIe, write bandwidth [Mbps]
+ bw_out PCIe, read bandwidth [Mbps]
+ at_glob_devtlb_hit Message descriptor DevTLB hit rate
+ at_glob_devtlb_miss Message descriptor DevTLB miss rate
+ tl_at_payld_devtlb_hit Payload DevTLB hit rate
+ tl_at_payld_devtlb_miss Payload DevTLB miss rate
+ ======================= ========================================
+
+ Example.
+
+ Writing the value '32' to the file ``rp_C_data`` starts the
+ collection of telemetry metrics for ring pair 32::
+
+ echo 32 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/rp_C_data
+
+ Once a ring pair is selected, statistics can be read accessing
+ the file::
+
+ cat /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/rp_C_data
+
+ If ``control`` is set to 1, only the current values of the
+ counters are displayed::
+
+ <counter_name> <current>
+
+ If ``control`` is 2, 3 or 4, counters are displayed in the
+ following format::
+
+ <counter_name> <current> <min> <max> <avg>
+
+
+ On QAT GEN4 devices there are 64 RPs on a PF, so the allowed
+ values are 0..63. This number is absolute to the device.
+ If Virtual Functions (VF) are used, the ring pair number can
+ be derived from the Bus, Device, Function of the VF:
+
+ ============ ====== ====== ====== ======
+ PCI BDF/VF RP0 RP1 RP2 RP3
+ ============ ====== ====== ====== ======
+ 0000:6b:0.1 RP 0 RP 1 RP 2 RP 3
+ 0000:6b:0.2 RP 4 RP 5 RP 6 RP 7
+ 0000:6b:0.3 RP 8 RP 9 RP 10 RP 11
+ 0000:6b:0.4 RP 12 RP 13 RP 14 RP 15
+ 0000:6b:0.5 RP 16 RP 17 RP 18 RP 19
+ 0000:6b:0.6 RP 20 RP 21 RP 22 RP 23
+ 0000:6b:0.7 RP 24 RP 25 RP 26 RP 27
+ 0000:6b:1.0 RP 28 RP 29 RP 30 RP 31
+ 0000:6b:1.1 RP 32 RP 33 RP 34 RP 35
+ 0000:6b:1.2 RP 36 RP 37 RP 38 RP 39
+ 0000:6b:1.3 RP 40 RP 41 RP 42 RP 43
+ 0000:6b:1.4 RP 44 RP 45 RP 46 RP 47
+ 0000:6b:1.5 RP 48 RP 49 RP 50 RP 51
+ 0000:6b:1.6 RP 52 RP 53 RP 54 RP 55
+ 0000:6b:1.7 RP 56 RP 57 RP 58 RP 59
+ 0000:6b:2.0 RP 60 RP 61 RP 62 RP 63
+ ============ ====== ====== ====== ======
+
+ The mapping is only valid for the BDFs of VFs on the host.
+
+
+ The service provided on a ring-pair varies depending on the
+ configuration. The configuration for a given device can be
+ queried and set using ``cfg_services``.
+ See Documentation/ABI/testing/sysfs-driver-qat for details.
+
+ The following table reports how ring pairs are mapped to VFs
+ on the PF 0000:6b:0.0 configured for `sym;asym` or `asym;sym`:
+
+ =========== ============ =========== ============ ===========
+ PCI BDF/VF RP0/service RP1/service RP2/service RP3/service
+ =========== ============ =========== ============ ===========
+ 0000:6b:0.1 RP 0 asym RP 1 sym RP 2 asym RP 3 sym
+ 0000:6b:0.2 RP 4 asym RP 5 sym RP 6 asym RP 7 sym
+ 0000:6b:0.3 RP 8 asym RP 9 sym RP10 asym RP11 sym
+ ... ... ... ... ...
+ =========== ============ =========== ============ ===========
+
+ All VFs follow the same pattern.
+
+
+ The following table reports how ring pairs are mapped to VFs on
+ the PF 0000:6b:0.0 configured for `dc`:
+
+ =========== ============ =========== ============ ===========
+ PCI BDF/VF RP0/service RP1/service RP2/service RP3/service
+ =========== ============ =========== ============ ===========
+ 0000:6b:0.1 RP 0 dc RP 1 dc RP 2 dc RP 3 dc
+ 0000:6b:0.2 RP 4 dc RP 5 dc RP 6 dc RP 7 dc
+ 0000:6b:0.3 RP 8 dc RP 9 dc RP10 dc RP11 dc
+ ... ... ... ... ...
+ =========== ============ =========== ============ ===========
+
+ The mapping of a RP to a service can be retrieved using
+ ``rp2srv`` from sysfs.
+ See Documentation/ABI/testing/sysfs-driver-qat for details.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/debugfs-dwc-pcie b/Documentation/ABI/testing/debugfs-dwc-pcie
new file mode 100644
index 000000000000..92b76f52a408
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-dwc-pcie
@@ -0,0 +1,157 @@
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_debug/lane_detect
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Write the lane number to be checked for detection. Read
+ will return whether PHY indicates receiver detection on the
+ selected lane. The default selected lane is Lane0.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_debug/rx_valid
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Write the lane number to be checked as valid or invalid.
+ Read will return the status of PIPE RXVALID signal of the
+ selected lane. The default selected lane is Lane0.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: The "rasdes_err_inj" is a directory which can be used to inject
+ errors into the system. The possible errors that can be injected
+ are:
+
+ 1) tx_lcrc - TLP LCRC error injection TX Path
+ 2) b16_crc_dllp - 16b CRC error injection of ACK/NAK DLLP
+ 3) b16_crc_upd_fc - 16b CRC error injection of Update-FC DLLP
+ 4) tx_ecrc - TLP ECRC error injection TX Path
+ 5) fcrc_tlp - TLP's FCRC error injection TX Path
+ 6) parity_tsos - Parity error of TSOS
+ 7) parity_skpos - Parity error on SKPOS
+ 8) rx_lcrc - LCRC error injection RX Path
+ 9) rx_ecrc - ECRC error injection RX Path
+ 10) tlp_err_seq - TLPs SEQ# error
+ 11) ack_nak_dllp_seq - DLLPS ACK/NAK SEQ# error
+ 12) ack_nak_dllp - ACK/NAK DLLPs transmission block
+ 13) upd_fc_dllp - UpdateFC DLLPs transmission block
+ 14) nak_dllp - Always transmission for NAK DLLP
+ 15) inv_sync_hdr_sym - Invert SYNC header
+ 16) com_pad_ts1 - COM/PAD TS1 order set
+ 17) com_pad_ts2 - COM/PAD TS2 order set
+ 18) com_fts - COM/FTS FTS order set
+ 19) com_idl - COM/IDL E-idle order set
+ 20) end_edb - END/EDB symbol
+ 21) stp_sdp - STP/SDP symbol
+ 22) com_skp - COM/SKP SKP order set
+ 23) posted_tlp_hdr - Posted TLP Header credit value control
+ 24) non_post_tlp_hdr - Non-Posted TLP Header credit value control
+ 25) cmpl_tlp_hdr - Completion TLP Header credit value control
+ 26) posted_tlp_data - Posted TLP Data credit value control
+ 27) non_post_tlp_data - Non-Posted TLP Data credit value control
+ 28) cmpl_tlp_data - Completion TLP Data credit value control
+ 29) duplicate_tlp - Generates duplicate TLPs
+ 30) nullified_tlp - Generates Nullified TLPs
+
+ (WO) Write to the attribute will prepare controller to inject
+ the respective error in the next transmission of data.
+
+ Parameter required to write will change in the following ways:
+
+ - Errors 9 and 10 are sequence errors. The write command:
+
+ echo <count> <diff> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+ <diff>
+ The difference to add or subtract from natural
+ sequence number to generate sequence error.
+ Allowed range from -4095 to 4095
+
+ - Errors 23 to 28 are credit value error insertions. The write
+ command:
+
+ echo <count> <diff> <vc> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+ <diff>
+ The difference to add or subtract from UpdateFC
+ credit value. Allowed range from -4095 to 4095
+ <vc>
+ Target VC number
+
+ - All other errors. The write command:
+
+ echo <count> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/counter_enable
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: The "rasdes_event_counters" is the directory which can be used
+ to collect statistical data about the number of times a certain
+ event has occurred in the controller. The list of possible
+ events are:
+
+ 1) EBUF Overflow
+ 2) EBUF Underrun
+ 3) Decode Error
+ 4) Running Disparity Error
+ 5) SKP OS Parity Error
+ 6) SYNC Header Error
+ 7) Rx Valid De-assertion
+ 8) CTL SKP OS Parity Error
+ 9) 1st Retimer Parity Error
+ 10) 2nd Retimer Parity Error
+ 11) Margin CRC and Parity Error
+ 12) Detect EI Infer
+ 13) Receiver Error
+ 14) RX Recovery Req
+ 15) N_FTS Timeout
+ 16) Framing Error
+ 17) Deskew Error
+ 18) Framing Error In L0
+ 19) Deskew Uncompleted Error
+ 20) Bad TLP
+ 21) LCRC Error
+ 22) Bad DLLP
+ 23) Replay Number Rollover
+ 24) Replay Timeout
+ 25) Rx Nak DLLP
+ 26) Tx Nak DLLP
+ 27) Retry TLP
+ 28) FC Timeout
+ 29) Poisoned TLP
+ 30) ECRC Error
+ 31) Unsupported Request
+ 32) Completer Abort
+ 33) Completion Timeout
+ 34) EBUF SKP Add
+ 35) EBUF SKP Del
+
+ (RW) Write 1 to enable the event counter and write 0 to disable
+ the event counter. Read will return whether the counter is
+ currently enabled or disabled. Counter is disabled by default.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/counter_value
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RO) Read will return the current value of the event counter.
+ To reset the counter, counter should be disabled first and then
+ enabled back using the "counter_enable" attribute.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/lane_select
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Some lanes in the event list are lane specific events.
+ These include events from 1 to 11, as well as, 34 and 35. Write
+ the lane number for which you wish the counter to be enabled,
+ disabled, or value dumped. Read will return the current
+ selected lane number. Lane0 is selected by default.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/ltssm_status
+Date: February 2025
+Contact: Hans Zhang <18255117159@163.com>
+Description: (RO) Read will return the current PCIe LTSSM state in both
+ string and raw value.
diff --git a/Documentation/ABI/testing/debugfs-ec b/Documentation/ABI/testing/debugfs-ec
index 6546115a94da..ab6099daa8f5 100644
--- a/Documentation/ABI/testing/debugfs-ec
+++ b/Documentation/ABI/testing/debugfs-ec
@@ -6,7 +6,7 @@ Description:
General information like which GPE is assigned to the EC and whether
the global lock should get used.
Knowing the EC GPE one can watch the amount of HW events related to
-the EC here (XY -> GPE number from /sys/kernel/debug/ec/*/gpe):
+the EC here (XY -> GPE number from `/sys/kernel/debug/ec/*/gpe`):
/sys/firmware/acpi/interrupts/gpeXY
The io file is binary and a userspace tool located here:
@@ -14,7 +14,8 @@ ftp://ftp.suse.com/pub/people/trenn/sources/ec/
should get used to read out the 256 Embedded Controller registers
or writing to them.
-CAUTION: Do not write to the Embedded Controller if you don't know
-what you are doing! Rebooting afterwards also is a good idea.
-This can influence the way your machine is cooled and fans may
-not get switched on again after you did a wrong write.
+CAUTION:
+ Do not write to the Embedded Controller if you don't know
+ what you are doing! Rebooting afterwards also is a good idea.
+ This can influence the way your machine is cooled and fans may
+ not get switched on again after you did a wrong write.
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
new file mode 100644
index 000000000000..29fb7d5ffc69
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -0,0 +1,193 @@
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the HPRE cluster.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Write the HPRE core selection in the cluster into this file,
+ and then we can read the debug information of the core.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: HPRE cores debug registers read clear control. 1 means enable
+ register read clear, otherwise 0. Writing to this file has no
+ functional effect, only enable or disable counters clear after
+ reading of these registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One HPRE controller has one PF and multiple VFs, each function
+ has a QM. Select the QM which below qm refers to.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ HPRE driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the HPRE.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the QM.
+ Available for PF and VF in host. VF in guest currently only
+ has one debug register.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One QM may contain multiple queues. Select specific queue to
+ show its debug registers in above regs.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read clear control. 1 means enable
+ register read clear, otherwise 0.
+ Writing to this file has no functional effect, only enable or
+ disable counters clear after reading of these registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
+ QM task completion.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
+ Two states: work, stop.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on HPRE.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: HPRE debug registers(regs) read hardware register value. This
+ node is used to show the change of the register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests sent
+ with returning busy.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of completed but error requests.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of invalid requests being received.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Set the threshold time for counting the request which is
+ processed longer than the threshold.
+ 0: disable(default), 1: 1 microsecond.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of time out requests.
+ Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and hpre capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-migration b/Documentation/ABI/testing/debugfs-hisi-migration
new file mode 100644
index 000000000000..2c01b2d387dd
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-migration
@@ -0,0 +1,25 @@
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/dev_data
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Read the configuration data and some status data
+ required for device live migration. These data include device
+ status data, queue configuration data, some task configuration
+ data and device attribute data. The output format of the data
+ is defined by the live migration driver.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/migf_data
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Read the data from the last completed live migration.
+ This data includes the same device status data as in "dev_data".
+ The migf_data is the dev_data that is migrated.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Used to obtain the device command sending and receiving
+ channel status. Returns failure or success logs based on the
+ results.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
new file mode 100644
index 000000000000..82bf4a0dc7f7
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -0,0 +1,166 @@
+What: /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Enabling/disabling of clear action after reading
+ the SEC debug registers.
+ 0: disable, 1: enable.
+ Only available for PF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One SEC controller has one PF and multiple VFs, each function
+ has a QM. This file can be used to select the QM which below
+ qm refers to.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ SEC driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of QM related debug registers.
+ Available for PF and VF in host. VF in guest currently only
+ has one debug register.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One QM of SEC may contain multiple queues. Select specific
+ queue to show its debug registers in above 'regs'.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Enabling/disabling of clear action after reading
+ the SEC's QM debug registers.
+ 0: disable, 1: enable.
+ Only available for PF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
+ QM task completion.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
+ Two states: work, stop.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on SEC.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: SEC debug registers(regs) read hardware register value. This
+ node is used to show the change of the register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests sent with returning busy.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of BD type error requests
+ to be received.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of invalid requests being received.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of completed but marked error requests
+ to be received.
+ Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and sec capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
new file mode 100644
index 000000000000..0abd65d27e9b
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -0,0 +1,167 @@
+What: /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of compression cores related debug registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of decompression cores related debug registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Compression/decompression core debug registers read clear
+ control. 1 means enable register read clear, otherwise 0.
+ Writing to this file has no functional effect, only enable or
+ disable counters clear after reading of these registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/current_qm
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: One ZIP controller has one PF and multiple VFs, each function
+ has a QM. Select the QM which below qm refers to.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ ZIP driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of QM related debug registers.
+ Available for PF and VF in host. VF in guest currently only
+ has one debug register.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: One QM may contain multiple queues. Select specific queue to
+ show its debug registers in above regs.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read clear control. 1 means enable
+ register read clear, otherwise 0.
+ Writing to this file has no functional effect, only enable or
+ disable counters clear after reading of these registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
+ QM task completion.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
+ Two states: work, stop.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm registers value. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on ZIP.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: ZIP debug registers(regs) read hardware register value. This
+ node is used to show the change of the registers value. this
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests received
+ with returning busy.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of BD type error requests
+ to be received.
+ Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and zip capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on ZIP.
diff --git a/Documentation/ABI/testing/debugfs-hyperv b/Documentation/ABI/testing/debugfs-hyperv
new file mode 100644
index 000000000000..9185e1b06bba
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hyperv
@@ -0,0 +1,23 @@
+What: /sys/kernel/debug/hyperv/<UUID>/fuzz_test_state
+Date: October 2019
+KernelVersion: 5.5
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing status of a vmbus device, whether its in an ON
+ state or a OFF state
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_buffer_interrupt_delay
+Date: October 2019
+KernelVersion: 5.5
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing buffer interrupt delay value between 0 - 1000
+ microseconds (inclusive).
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_message_delay
+Date: October 2019
+KernelVersion: 5.5
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing message delay value between 0 - 1000 microseconds
+ (inclusive).
+Users: Debugging tools
diff --git a/Documentation/ABI/testing/debugfs-iio-ad9467 b/Documentation/ABI/testing/debugfs-iio-ad9467
new file mode 100644
index 000000000000..0352fca1f7f2
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-iio-ad9467
@@ -0,0 +1,39 @@
+What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ This dumps the calibration table that was filled during the
+ digital interface tuning process.
+
+What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ List all the available test tones:
+ - off
+ - midscale_short
+ - pos_fullscale
+ - neg_fullscale
+ - checkerboard
+ - prbs23
+ - prbs9
+ - one_zero_toggle
+ - user
+ - bit_toggle
+ - sync
+ - one_bit_high
+ - mixed_bit_frequency
+ - ramp
+
+ Note that depending on the actual device being used, some of the
+ above might not be available (and they won't be listed when
+ reading the file).
+
+What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing to this file will initiate one of available test tone on
+ channel Y. Reading it, shows which test is running. In cases
+ where an IIO backend is available and supports the test tone,
+ additional information about the data correctness is given.
diff --git a/Documentation/ABI/testing/debugfs-iio-backend b/Documentation/ABI/testing/debugfs-iio-backend
new file mode 100644
index 000000000000..01ab94469432
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-iio-backend
@@ -0,0 +1,20 @@
+What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Name of Backend Y connected to device X.
+
+What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Directly access the registers of backend Y. Typical usage is:
+
+ Reading address 0x50
+ echo 0x50 > direct_reg_access
+ cat direct_reg_access
+
+ Writing address 0x50
+ echo 0x50 0x3 > direct_reg_access
+ //readback address 0x50
+ cat direct_reg_access
diff --git a/Documentation/ABI/testing/debugfs-intel-iommu b/Documentation/ABI/testing/debugfs-intel-iommu
new file mode 100644
index 000000000000..2ab8464504a9
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-intel-iommu
@@ -0,0 +1,276 @@
+What: /sys/kernel/debug/iommu/intel/iommu_regset
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps all the register contents for each IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/iommu_regset
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar1 Register Base Address: fed90000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/ir_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps the table entries for Interrupt
+ remapping and Interrupt posting.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/ir_translation_struct
+
+ Remapped Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:0a.0 00000080 24 0000000000040050 000000800024000d
+ 1 00:0a.0 00000001 ef 0000000000040050 0000000100ef000d
+
+ Remapped Interrupt supported on IOMMU: dmar1
+ IR table address:100300000
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:02.0 00000002 26 0000000000040010 000000020026000d
+
+ [...]
+
+ ****
+
+ Posted Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+ Entry SrcID PDA_high PDA_low Vct IRTE_high IRTE_low
+
+What: /sys/kernel/debug/iommu/intel/dmar_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps Intel IOMMU DMA remapping tables, such
+ as root table, context table, PASID directory and PASID
+ table entries in debugfs. For legacy mode, it doesn't
+ support PASID, and hence PASID field is defaulted to
+ '-1' and other PASID related fields are invalid.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_translation_struct
+
+ IOMMU dmar1: Root Table Address: 0x103027000
+ B.D.F Root_entry
+ 00:02.0 0x0000000000000000:0x000000010303e001
+
+ Context_entry
+ 0x0000000000000102:0x000000010303f005
+
+ PASID PASID_table_entry
+ -1 0x0000000000000000:0x0000000000000000:0x0000000000000000
+
+ IOMMU dmar0: Root Table Address: 0x103028000
+ B.D.F Root_entry
+ 00:0a.0 0x0000000000000000:0x00000001038a7001
+
+ Context_entry
+ 0x0000000000000000:0x0000000103220e7d
+
+ PASID PASID_table_entry
+ 0 0x0000000000000000:0x0000000000800002:0x00000001038a5089
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/invalidation_queue
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file exports invalidation queue internals of each
+ IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/invalidation_queue
+
+ Invalidation queue on IOMMU: dmar0
+ Base: 0x10022e000 Head: 20 Tail: 20
+ Index qw0 qw1 qw2
+ 0 0000000000000014 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059c04 0000000000000000
+ 2 0000000000000014 0000000000000000 0000000000000000
+
+ qw3 status
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+
+ [...]
+
+ Invalidation queue on IOMMU: dmar1
+ Base: 0x10026e000 Head: 32 Tail: 32
+ Index qw0 qw1 status
+ 0 0000000000000004 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059804 0000000000000000
+ 2 0000000000000011 0000000000000000 0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/dmar_perf_latency
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file is used to control and show counts of
+ execution time ranges for various types per DMAR.
+
+ Firstly, write a value to
+ /sys/kernel/debug/iommu/intel/dmar_perf_latency
+ to enable sampling.
+
+ The possible values are as follows:
+
+ * 0 - disable sampling all latency data
+
+ * 1 - enable sampling IOTLB invalidation latency data
+
+ * 2 - enable sampling devTLB invalidation latency data
+
+ * 3 - enable sampling intr entry cache invalidation latency data
+
+ Next, read /sys/kernel/debug/iommu/intel/dmar_perf_latency gives
+ a snapshot of sampling result of all enabled monitors.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Disable sampling all latency data:
+
+ $ sudo echo 0 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ 2) Enable sampling IOTLB invalidation latency data
+
+ $ sudo echo 1 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 0 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 0 0 0
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 18 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 2 2 2
+
+ 3) Enable sampling devTLB invalidation latency data
+
+ $ sudo echo 2 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_devtlb 0 0 0 0 0
+
+ >=10ms min(us) max(us) average(us)
+ inv_devtlb 0 0 0 0
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/<bdf>/domain_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps a specified page table of Intel IOMMU
+ in legacy mode or scalable mode.
+
+ For a device that only supports legacy mode, dump its
+ page table by the debugfs file in the debugfs device
+ directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct.
+
+ For a device that supports scalable mode, dump the
+ page table of specified pasid by the debugfs file in
+ the debugfs pasid directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/1/domain_translation_struct.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Dump the page table of device "0000:00:02.0" that only supports legacy mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct
+
+ Device 0000:00:02.0 @0x1017f8000
+ IOVA_PFN PML5E PML4E
+ 0x000000008d800 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d801 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d802 | 0x0000000000000000 0x00000001017f9003
+
+ PDPE PDE PTE
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d800003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d801003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d802003
+
+ [...]
+
+ 2) Dump the page table of device "0000:00:0a.0" with PASID "1" that
+ supports scalable mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:0a.0/1/domain_translation_struct
+
+ Device 0000:00:0a.0 with pasid 1 @0x10c112000
+ IOVA_PFN PML5E PML4E
+ 0x0000000000000 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000001 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000002 | 0x0000000000000000 0x000000010df93003
+
+ PDPE PDE PTE
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c00803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c01803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c02803
+
+ [...]
diff --git a/Documentation/ABI/testing/debugfs-moxtet b/Documentation/ABI/testing/debugfs-moxtet
new file mode 100644
index 000000000000..637d8587d03d
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-moxtet
@@ -0,0 +1,35 @@
+What: /sys/kernel/debug/moxtet/input
+Date: March 2019
+KernelVersion: 5.3
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Read input from the shift registers, in hexadecimal.
+ Returns N+1 bytes, where N is the number of Moxtet connected
+ modules. The first byte is from the CPU board itself.
+
+ Example::
+
+ 101214
+
+ == =======================================
+ 10 CPU board with SD card
+ 12 2 = PCIe module, 1 = IRQ not active
+ 14 4 = Peridot module, 1 = IRQ not active
+ == =======================================
+
+What: /sys/kernel/debug/moxtet/output
+Date: March 2019
+KernelVersion: 5.3
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) Read last written value to the shift registers, in
+ hexadecimal, or write values to the shift registers, also
+ in hexadecimal.
+
+ Example::
+
+ 0102
+
+ == ================================================
+ 01 01 was last written, or is to be written, to the
+ first module's shift register
+ 02 the same for second module
+ == ================================================
diff --git a/Documentation/ABI/testing/debugfs-msi-wmi-platform b/Documentation/ABI/testing/debugfs-msi-wmi-platform
new file mode 100644
index 000000000000..71f9992168d8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-msi-wmi-platform
@@ -0,0 +1,14 @@
+What: /sys/kernel/debug/msi-wmi-platform-<wmi_device_name>/*
+Date: April 2024
+KernelVersion: 6.10
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file allows to execute the associated WMI method with the same name.
+
+ To start the execution, write a buffer containing the method arguments
+ at file offset 0. Partial writes or writes at a different offset are not
+ supported.
+
+ The buffer returned by the WMI method can then be read from the file.
+
+ See Documentation/wmi/devices/msi-wmi-platform.rst for details.
diff --git a/Documentation/ABI/testing/debugfs-pcie-ptm b/Documentation/ABI/testing/debugfs-pcie-ptm
new file mode 100644
index 000000000000..602d41363571
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pcie-ptm
@@ -0,0 +1,70 @@
+What: /sys/kernel/debug/pcie_ptm_*/local_clock
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM local clock in nanoseconds. Applicable for both Root
+ Complex and Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/master_clock
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM master clock in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t1
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T1 timestamp in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t2
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T2 timestamp in nanoseconds. Applicable only for
+ Root Complex controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t3
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T3 timestamp in nanoseconds. Applicable only for
+ Root Complex controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t4
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T4 timestamp in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/context_update
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RW) Control the PTM context update mode. Applicable only for
+ Endpoint controllers.
+
+ Following values are supported:
+
+ * auto = PTM context auto update trigger for every 10ms
+
+ * manual = PTM context manual update. Writing 'manual' to this
+ file triggers PTM context update (default)
+
+What: /sys/kernel/debug/pcie_ptm_*/context_valid
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RW) Control the PTM context validity (local clock timing).
+ Applicable only for Root Complex controllers. PTM context is
+ invalidated by hardware if the Root Complex enters low power
+ mode or changes link frequency.
+
+ Following values are supported:
+
+ * 0 = PTM context invalid (default)
+
+ * 1 = PTM context valid
diff --git a/Documentation/ABI/testing/debugfs-pfo-nx-crypto b/Documentation/ABI/testing/debugfs-pfo-nx-crypto
index 685d5a448423..f75a655c1531 100644
--- a/Documentation/ABI/testing/debugfs-pfo-nx-crypto
+++ b/Documentation/ABI/testing/debugfs-pfo-nx-crypto
@@ -4,42 +4,42 @@ KernelVersion: 3.4
Contact: Kent Yoder <key@linux.vnet.ibm.com>
Description:
- These debugfs interfaces are built by the nx-crypto driver, built in
+These debugfs interfaces are built by the nx-crypto driver, built in
arch/powerpc/crypto/nx.
Error Detection
===============
errors:
-- A u32 providing a total count of errors since the driver was loaded. The
-only errors counted here are those returned from the hcall, H_COP_OP.
+ A u32 providing a total count of errors since the driver was loaded. The
+ only errors counted here are those returned from the hcall, H_COP_OP.
last_error:
-- The most recent non-zero return code from the H_COP_OP hcall. -EBUSY is not
-recorded here (the hcall will retry until -EBUSY goes away).
+ The most recent non-zero return code from the H_COP_OP hcall. -EBUSY is not
+ recorded here (the hcall will retry until -EBUSY goes away).
last_error_pid:
-- The process ID of the process who received the most recent error from the
-hcall.
+ The process ID of the process who received the most recent error from the
+ hcall.
Device Use
==========
aes_bytes:
-- The total number of bytes encrypted using AES in any of the driver's
-supported modes.
+ The total number of bytes encrypted using AES in any of the driver's
+ supported modes.
aes_ops:
-- The total number of AES operations submitted to the hardware.
+ The total number of AES operations submitted to the hardware.
sha256_bytes:
-- The total number of bytes hashed by the hardware using SHA-256.
+ The total number of bytes hashed by the hardware using SHA-256.
sha256_ops:
-- The total number of SHA-256 operations submitted to the hardware.
+ The total number of SHA-256 operations submitted to the hardware.
sha512_bytes:
-- The total number of bytes hashed by the hardware using SHA-512.
+ The total number of bytes hashed by the hardware using SHA-512.
sha512_ops:
-- The total number of SHA-512 operations submitted to the hardware.
+ The total number of SHA-512 operations submitted to the hardware.
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
deleted file mode 100644
index cf11736acb76..000000000000
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ /dev/null
@@ -1,19 +0,0 @@
-What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
-
-debugfs interface
------------------
-
-The pktcdvd module (packet writing driver) creates
-these files in debugfs:
-
-/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
- info (0444) Lots of driver statistics and infos.
-
-Example:
--------
-
-cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/debugfs-scmi b/Documentation/ABI/testing/debugfs-scmi
new file mode 100644
index 000000000000..ee7179ab2edf
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-scmi
@@ -0,0 +1,70 @@
+What: /sys/kernel/debug/scmi/<n>/instance_name
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: The name of the underlying SCMI instance <n> described by
+ all the debugfs accessors rooted at /sys/kernel/debug/scmi/<n>,
+ expressed as the full name of the top DT SCMI node under which
+ this SCMI instance is rooted.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/atomic_threshold_us
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: An optional time value, expressed in microseconds, representing,
+ on this SCMI instance <n>, the threshold above which any SCMI
+ command, advertised to have an higher-than-threshold execution
+ latency, should not be considered for atomic mode of operation,
+ even if requested.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/type
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: A string representing the type of transport configured for this
+ SCMI instance <n>.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/is_atomic
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: A boolean stating if the transport configured on the underlying
+ SCMI instance <n> is capable of atomic mode of operation.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/max_rx_timeout_ms
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: Timeout in milliseconds allowed for SCMI synchronous replies
+ for the currently configured SCMI transport for instance <n>.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/max_msg_size
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: Max message size of allowed SCMI messages for the currently
+ configured SCMI transport for instance <n>.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/tx_max_msg
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: Max number of concurrently allowed in-flight SCMI messages for
+ the currently configured SCMI transport for instance <n> on the
+ TX channels.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/transport/rx_max_msg
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: Max number of concurrently allowed in-flight SCMI messages for
+ the currently configured SCMI transport for instance <n> on the
+ RX channels.
+Users: Debugging, any userspace test suite
diff --git a/Documentation/ABI/testing/debugfs-scmi-raw b/Documentation/ABI/testing/debugfs-scmi-raw
new file mode 100644
index 000000000000..5847b96b3896
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-scmi-raw
@@ -0,0 +1,208 @@
+What: /sys/kernel/debug/scmi/<n>/raw/message
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw synchronous message injection/snooping facility; write
+ a complete SCMI synchronous command message (header included)
+ in little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n>.
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/message_async
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility; write
+ a complete SCMI asynchronous command message (header included)
+ in little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n>.
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/message_poll
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw message injection/snooping facility using polling mode;
+ write a complete SCMI command message (header included) in
+ little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n>, using polling mode on
+ the reception path. (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/message_poll_async
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility using
+ polling-mode; write a complete SCMI asynchronous command message
+ (header included) in little-endian binary format to have it sent
+ to the configured backend SCMI server for instance <n>, using
+ polling-mode on the reception path of the immediate part of the
+ asynchronous command. (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/errors
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw message errors facility; any kind of timed-out or
+ generally unexpectedly received SCMI message, for instance <n>,
+ can be read from this entry.
+ Each read gives back one message at time (receiving an EOF at
+ each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/notification
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw notification snooping facility; any notification
+ emitted by the backend SCMI server, for instance <n>, can be
+ read from this entry.
+ Each read gives back one message at time (receiving an EOF at
+ each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/reset
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw stack reset facility; writing a value to this entry
+ causes the internal queues of any kind of received message,
+ still pending to be read out for instance <n>, to be immediately
+ flushed.
+ Can be used to reset and clean the SCMI Raw stack between to
+ different test-run.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw synchronous message injection/snooping facility; write
+ a complete SCMI synchronous command message (header included)
+ in little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n> through the <m> transport
+ channel.
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_async
+Date: March 2023
+KernelVersion: 6.3
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility; write
+ a complete SCMI asynchronous command message (header included)
+ in little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n> through the <m> transport
+ channel.
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
+
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw message injection/snooping facility using polling mode;
+ write a complete SCMI command message (header included) in
+ little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n> through the <m> transport
+ channel, using polling mode on the reception path.
+ (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll_async
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility using
+ polling-mode; write a complete SCMI asynchronous command message
+ (header included) in little-endian binary format to have it sent
+ to the configured backend SCMI server for instance <n> through
+ the <m> transport channel, using polling mode on the reception
+ path of the immediate part of the asynchronous command.
+ (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
diff --git a/Documentation/ABI/testing/debugfs-tpmi b/Documentation/ABI/testing/debugfs-tpmi
new file mode 100644
index 000000000000..c493a1403d2f
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-tpmi
@@ -0,0 +1,40 @@
+What: /sys/kernel/debug/tpmi-<n>/pfs_dump
+Date: November 2023
+KernelVersion: 6.6
+Contact: srinivas.pandruvada@linux.intel.com
+Description:
+The PFS (PM Feature Structure) table, shows details of each power
+management feature. This includes:
+tpmi_id, number of entries, entry size, offset, vsec offset, lock status
+and disabled status.
+Users: Debugging, any user space test suite
+
+What: /sys/kernel/debug/tpmi-<n>/tpmi-id-<n>/mem_dump
+Date: November 2023
+KernelVersion: 6.6
+Contact: srinivas.pandruvada@linux.intel.com
+Description:
+Shows the memory dump of the MMIO region for a TPMI ID.
+Users: Debugging, any user space test suite
+
+What: /sys/kernel/debug/tpmi-<n>/tpmi-id-<n>/mem_write
+Date: November 2023
+KernelVersion: 6.6
+Contact: srinivas.pandruvada@linux.intel.com
+Description:
+Allows to write at any offset. It doesn't check for Read/Write access
+as hardware will not allow to write at read-only memory. This write is
+at offset multiples of 4. The format is instance,offset,contents.
+Example:
+echo 0,0x20,0xff > mem_write
+echo 1,64,64 > mem_write
+Users: Debugging, any user space test suite
+
+What: /sys/kernel/debug/tpmi-<n>/plr/domain<n>/status
+Date: Aug 2024
+KernelVersion: 6.11
+Contact: Tero Kristo <tero.kristo@linux.intel.com>
+Description:
+Shows the currently active Performance Limit Reasons for die level and the
+individual CPUs under the die. The contents of this file are sticky, and
+clearing all the statuses can be done by writing "0\n" to this file.
diff --git a/Documentation/ABI/testing/debugfs-vfio b/Documentation/ABI/testing/debugfs-vfio
new file mode 100644
index 000000000000..70ec2d454686
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-vfio
@@ -0,0 +1,31 @@
+What: /sys/kernel/debug/vfio
+Date: December 2023
+KernelVersion: 6.8
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: This debugfs file directory is used for debugging
+ of vfio devices, it's a common directory for all vfio devices.
+ Vfio core will create a device subdirectory under this
+ directory.
+
+What: /sys/kernel/debug/vfio/<device>/migration
+Date: December 2023
+KernelVersion: 6.8
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: This debugfs file directory is used for debugging
+ of vfio devices that support live migration.
+ The debugfs of each vfio device that supports live migration
+ could be created under this directory.
+
+What: /sys/kernel/debug/vfio/<device>/migration/state
+Date: December 2023
+KernelVersion: 6.8
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Read the live migration status of the vfio device.
+ The contents of the state file reflects the migration state
+ relative to those defined in the vfio_device_mig_state enum
+
+What: /sys/kernel/debug/vfio/<device>/migration/features
+Date: Oct 2025
+KernelVersion: 6.18
+Contact: Cédric Le Goater <clg@redhat.com>
+Description: Read the migration features of the vfio device.
diff --git a/Documentation/ABI/testing/debugfs-wilco-ec b/Documentation/ABI/testing/debugfs-wilco-ec
new file mode 100644
index 000000000000..682e3c09ef4d
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-wilco-ec
@@ -0,0 +1,45 @@
+What: /sys/kernel/debug/wilco_ec/h1_gpio
+Date: April 2019
+KernelVersion: 5.2
+Description:
+ As part of Chrome OS's FAFT (Fully Automated Firmware Testing)
+ tests, we need to ensure that the H1 chip is properly setting
+ some GPIO lines. The h1_gpio attribute exposes the state
+ of the lines:
+ - ENTRY_TO_FACT_MODE in BIT(0)
+ - SPI_CHROME_SEL in BIT(1)
+
+ Output will formatted with "0x%02x\n".
+
+What: /sys/kernel/debug/wilco_ec/raw
+Date: January 2019
+KernelVersion: 5.1
+Description:
+ Write and read raw mailbox commands to the EC.
+
+ You can write a hexadecimal sentence to raw, and that series of
+ bytes will be sent to the EC. Then, you can read the bytes of
+ response by reading from raw.
+
+ For writing, bytes 0-1 indicate the message type, one of enum
+ wilco_ec_msg_type. Byte 2+ consist of the data passed in the
+ request, starting at MBOX[0]. At least three bytes are required
+ for writing, two for the type and at least a single byte of
+ data.
+
+ Example::
+
+ // Request EC info type 3 (EC firmware build date)
+ // Corresponds with sending type 0x00f0 with
+ // MBOX = [38, 00, 03, 00]
+ $ echo 00 f0 38 00 03 00 > /sys/kernel/debug/wilco_ec/raw
+ // View the result. The decoded ASCII result "12/21/18" is
+ // included after the raw hex.
+ // Corresponds with MBOX = [00, 00, 31, 32, 2f, 32, 31, 38, ...]
+ $ cat /sys/kernel/debug/wilco_ec/raw
+ 00 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00 ..12/21/18.8...
+
+ Note that the first 16 bytes of the received MBOX[] will be
+ printed, even if some of the data is junk, and skipping bytes
+ 17 to 32. It is up to you to know how many of the first bytes of
+ data are the actual response.
diff --git a/Documentation/ABI/testing/dell-smbios-wmi b/Documentation/ABI/testing/dell-smbios-wmi
index fc919ce16008..f58229084469 100644
--- a/Documentation/ABI/testing/dell-smbios-wmi
+++ b/Documentation/ABI/testing/dell-smbios-wmi
@@ -1,7 +1,7 @@
What: /dev/wmi/dell-smbios
Date: November 2017
KernelVersion: 4.15
-Contact: "Mario Limonciello" <mario.limonciello@dell.com>
+Contact: Dell.Client.Kernel@dell.com
Description:
Perform SMBIOS calls on supported Dell machines.
through the Dell ACPI-WMI interface.
@@ -10,29 +10,29 @@ Description:
<uapi/linux/wmi.h>
1) To perform an SMBIOS call from userspace, you'll need to
- first determine the minimum size of the calling interface
- buffer for your machine.
- Platforms that contain larger buffers can return larger
- objects from the system firmware.
- Commonly this size is either 4k or 32k.
+ first determine the minimum size of the calling interface
+ buffer for your machine.
+ Platforms that contain larger buffers can return larger
+ objects from the system firmware.
+ Commonly this size is either 4k or 32k.
- To determine the size of the buffer read() a u64 dword from
- the WMI character device /dev/wmi/dell-smbios.
+ To determine the size of the buffer read() a u64 dword from
+ the WMI character device /dev/wmi/dell-smbios.
2) After you've determined the minimum size of the calling
- interface buffer, you can allocate a structure that represents
- the structure documented above.
+ interface buffer, you can allocate a structure that represents
+ the structure documented above.
3) In the 'length' object store the size of the buffer you
- determined above and allocated.
+ determined above and allocated.
4) In this buffer object, prepare as necessary for the SMBIOS
- call you're interested in. Typically SMBIOS buffers have
- "class", "select", and "input" defined to values that coincide
- with the data you are interested in.
- Documenting class/select/input values is outside of the scope
- of this documentation. Check with the libsmbios project for
- further documentation on these values.
+ call you're interested in. Typically SMBIOS buffers have
+ "class", "select", and "input" defined to values that coincide
+ with the data you are interested in.
+ Documenting class/select/input values is outside of the scope
+ of this documentation. Check with the libsmbios project for
+ further documentation on these values.
6) Run the call by using ioctl() as described in the header.
diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg
index fff817efa508..a377b6c093c9 100644
--- a/Documentation/ABI/testing/dev-kmsg
+++ b/Documentation/ABI/testing/dev-kmsg
@@ -6,13 +6,14 @@ Description: The /dev/kmsg character device node provides userspace access
to the kernel's printk buffer.
Injecting messages:
+
Every write() to the opened device node places a log entry in
the kernel's printk buffer.
The logged line can be prefixed with a <N> syslog prefix, which
carries the syslog priority and facility. The single decimal
prefix number is composed of the 3 lowest bits being the syslog
- priority and the higher bits the syslog facility number.
+ priority and the next 8 bits the syslog facility number.
If no prefix is given, the priority number is the default kernel
log priority and the facility number is set to LOG_USER (1). It
@@ -21,6 +22,7 @@ Description: The /dev/kmsg character device node provides userspace access
the messages can always be reliably determined.
Accessing the buffer:
+
Every read() from the opened device node receives one record
of the kernel's printk buffer.
@@ -48,6 +50,7 @@ Description: The /dev/kmsg character device node provides userspace access
if needed, without limiting the interface to a single reader.
The device supports seek with the following parameters:
+
SEEK_SET, 0
seek to the first entry in the buffer
SEEK_END, 0
@@ -56,6 +59,17 @@ Description: The /dev/kmsg character device node provides userspace access
seek after the last record available at the time
the last SYSLOG_ACTION_CLEAR was issued.
+ Other seek operations or offsets are not supported because of
+ the special behavior this device has. The device allows to read
+ or write only whole variable length messages (records) that are
+ stored in a ring buffer.
+
+ Because of the non-standard behavior also the error values are
+ non-standard. -ESPIPE is returned for non-zero offset. -EINVAL
+ is returned for other operations, e.g. SEEK_CUR. This behavior
+ and values are historical and could not be modified without the
+ risk of breaking userspace.
+
The output format consists of a prefix carrying the syslog
prefix including priority and facility, the 64 bit message
sequence number and the monotonic timestamp in microseconds,
@@ -76,27 +90,30 @@ Description: The /dev/kmsg character device node provides userspace access
readable context of the message, for reliable processing in
userspace.
- Example:
- 7,160,424069,-;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored)
- SUBSYSTEM=acpi
- DEVICE=+acpi:PNP0A03:00
- 6,339,5140900,-;NET: Registered protocol family 10
- 30,340,5690716,-;udevd[80]: starting version 181
+ Example::
+
+ 7,160,424069,-;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored)
+ SUBSYSTEM=acpi
+ DEVICE=+acpi:PNP0A03:00
+ 6,339,5140900,-;NET: Registered protocol family 10
+ 30,340,5690716,-;udevd[80]: starting version 181
The DEVICE= key uniquely identifies devices the following way:
- b12:8 - block dev_t
- c127:3 - char dev_t
- n8 - netdev ifindex
- +sound:card0 - subsystem:devname
+
+ ============ =================
+ b12:8 block dev_t
+ c127:3 char dev_t
+ n8 netdev ifindex
+ +sound:card0 subsystem:devname
+ ============ =================
The flags field carries '-' by default. A 'c' indicates a
- fragment of a line. All following fragments are flagged with
- '+'. Note, that these hints about continuation lines are not
- necessarily correct, and the stream could be interleaved with
- unrelated messages, but merging the lines in the output
- usually produces better human readable results. A similar
- logic is used internally when messages are printed to the
- console, /proc/kmsg or the syslog() syscall.
+ fragment of a line. Note, that these hints about continuation
+ lines are not necessarily correct, and the stream could be
+ interleaved with unrelated messages, but merging the lines in
+ the output usually produces better human readable results. A
+ similar logic is used internally when messages are printed to
+ the console, /proc/kmsg or the syslog() syscall.
By default, kernel tries to avoid fragments by concatenating
when it can and fragments are rare; however, when extended
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 201d10319fa1..44750a933db4 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -1,4 +1,5 @@
-What: security/evm
+What: /sys/kernel/security/evm
+What: /sys/kernel/security/*/evm
Date: March 2011
Contact: Mimi Zohar <zohar@us.ibm.com>
Description:
@@ -17,33 +18,68 @@ Description:
echoing a value to <securityfs>/evm made up of the
following bits:
+ === ==================================================
Bit Effect
+ === ==================================================
0 Enable HMAC validation and creation
1 Enable digital signature validation
2 Permit modification of EVM-protected metadata at
runtime. Not supported if HMAC validation and
- creation is enabled.
+ creation is enabled (deprecated).
31 Disable further runtime modification of EVM policy
+ === ==================================================
- For example:
+ For example::
- echo 1 ><securityfs>/evm
+ echo 1 ><securityfs>/evm
will enable HMAC validation and creation
- echo 0x80000003 ><securityfs>/evm
+ ::
+
+ echo 0x80000003 ><securityfs>/evm
will enable HMAC and digital signature validation and
HMAC creation and disable all further modification of policy.
- echo 0x80000006 ><securityfs>/evm
+ ::
+
+ echo 0x80000006 ><securityfs>/evm
will enable digital signature validation, permit
modification of EVM-protected metadata and
- disable all further modification of policy
+ disable all further modification of policy. This option is now
+ deprecated in favor of::
+
+ echo 0x80000002 ><securityfs>/evm
+
+ as the outstanding issues that prevent the usage of EVM portable
+ signatures have been solved.
+
+ Echoing a value is additive, the new value is added to the
+ existing initialization flags.
+
+ For example, after::
+
+ echo 2 ><securityfs>/evm
- Note that once a key has been loaded, it will no longer be
- possible to enable metadata modification.
+ another echo can be performed::
+
+ echo 1 ><securityfs>/evm
+
+ and the resulting value will be 3.
+
+ Note that once an HMAC key has been loaded, it will no longer
+ be possible to enable metadata modification. Signaling that an
+ HMAC key has been loaded will clear the corresponding flag.
+ For example, if the current value is 6 (2 and 4 set)::
+
+ echo 1 ><securityfs>/evm
+
+ will set the new value to 3 (4 cleared).
+
+ Loading an HMAC key is the only way to disable metadata
+ modification.
Until key loading has been signaled EVM can not create
or validate the 'security.evm' xattr, but returns
@@ -58,14 +94,14 @@ Description:
core/ima-setup) have support for loading keys at boot
time.
-What: security/integrity/evm/evm_xattrs
+What: /sys/kernel/security/*/evm/evm_xattrs
Date: April 2018
Contact: Matthew Garrett <mjg59@google.com>
Description:
Shows the set of extended attributes used to calculate or
validate the EVM signature, and allows additional attributes
to be added at runtime. Any signatures generated after
- additional attributes are added (and on files posessing those
+ additional attributes are added (and on files possessing those
additional attributes) will only be valid if the same
additional attributes are configured on system boot. Writing
a single period (.) will lock the xattr list from any further
diff --git a/Documentation/ABI/testing/gpio-cdev b/Documentation/ABI/testing/gpio-cdev
index 7b265fbb47e3..c9689b2a6fed 100644
--- a/Documentation/ABI/testing/gpio-cdev
+++ b/Documentation/ABI/testing/gpio-cdev
@@ -6,21 +6,23 @@ Description:
The character device files /dev/gpiochip* are the interface
between GPIO chips and userspace.
- The ioctl(2)-based ABI is defined and documented in
- [include/uapi]<linux/gpio.h>.
+ The ioctl(2)-based ABI is defined in
+ [include/uapi]<linux/gpio.h> and documented in
+ Documentation/userspace-api/gpio/chardev.rst.
The following file operations are supported:
open(2)
- Currently the only useful flags are O_RDWR.
+ Currently the only useful flags are O_RDWR.
ioctl(2)
- Initiate various actions.
- See the inline documentation in [include/uapi]<linux/gpio.h>
- for descriptions of all ioctls.
+ Initiate various actions.
+
+ See Documentation/userspace-api/gpio/chardev.rst
+ for a description of all ioctls.
close(2)
- Stops and free up the I/O contexts that was associated
- with the file descriptor.
+ Stops and free up the I/O contexts that was associated
+ with the file descriptor.
Users: TBD
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 74c6702de74e..c2385183826c 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -1,4 +1,4 @@
-What: security/ima/policy
+What: /sys/kernel/security/*/ima/policy
Date: May 2008
Contact: Mimi Zohar <zohar@us.ibm.com>
Description:
@@ -15,32 +15,69 @@ Description:
IMA appraisal, if configured, uses these file measurements
for local measurement appraisal.
- rule format: action [condition ...]
+ ::
- action: measure | dont_measure | appraise | dont_appraise |
- audit | hash | dont_hash
- condition:= base | lsm [option]
- base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=]
- [euid=] [fowner=] [fsname=]]
+ rule format: action [condition ...]
+
+ action: measure | dont_measure | appraise | dont_appraise |
+ audit | hash | dont_hash
+ condition:= base | lsm [option]
+ base: [[func=] [mask=] [fsmagic=] [fsuuid=] [fsname=]
+ [uid=] [euid=] [gid=] [egid=]
+ [fowner=] [fgroup=]]
lsm: [[subj_user=] [subj_role=] [subj_type=]
[obj_user=] [obj_role=] [obj_type=]]
- option: [[appraise_type=]] [permit_directio]
-
- base: func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
+ option: [digest_type=] [template=] [permit_directio]
+ [appraise_type=] [appraise_flag=]
+ [appraise_algos=] [keyrings=]
+ base:
+ func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
[FIRMWARE_CHECK]
[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
+ [KEXEC_CMDLINE] [KEY_CHECK] [CRITICAL_DATA]
+ [SETXATTR_CHECK][MMAP_CHECK_REQPROT]
mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
[[^]MAY_EXEC]
fsmagic:= hex value
fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6)
uid:= decimal value
euid:= decimal value
+ gid:= decimal value
+ egid:= decimal value
fowner:= decimal value
- lsm: are LSM specific
- option: appraise_type:= [imasig]
+ fgroup:= decimal value
+ lsm: are LSM specific
+ option:
+ appraise_type:= [imasig] | [imasig|modsig] | [sigv3]
+ where 'imasig' is the original or the signature
+ format v2.
+ where 'modsig' is an appended signature,
+ where 'sigv3' is the signature format v3. (Currently
+ limited to fsverity digest based signatures
+ stored in security.ima xattr. Requires
+ specifying "digest_type=verity" first.)
+
+ appraise_flag:= [check_blacklist] (deprecated)
+ Setting the check_blacklist flag is no longer necessary.
+ All appraisal functions set it by default.
+ digest_type:= verity
+ Require fs-verity's file digest instead of the
+ regular IMA file hash.
+ keyrings:= list of keyrings
+ (eg, .builtin_trusted_keys|.ima). Only valid
+ when action is "measure" and func is KEY_CHECK.
+ template:= name of a defined IMA template type
+ (eg, ima-ng). Only valid when action is "measure".
pcr:= decimal value
+ label:= [selinux]|[kernel_info]|[data_label]
+ data_label:= a unique string used for grouping and limiting critical data.
+ For example, "selinux" to measure critical data for SELinux.
+ appraise_algos:= comma-separated list of hash algorithms
+ For example, "sha256,sha512" to only accept to appraise
+ files where the security.ima xattr was hashed with one
+ of these two algorithms.
- default policy:
+ default policy:
# PROC_SUPER_MAGIC
dont_measure fsmagic=0x9fa0
dont_appraise fsmagic=0x9fa0
@@ -88,7 +125,8 @@ Description:
Examples of LSM specific definitions:
- SELinux:
+ SELinux::
+
dont_measure obj_type=var_log_t
dont_appraise obj_type=var_log_t
dont_measure obj_type=auditd_log_t
@@ -96,10 +134,57 @@ Description:
measure subj_user=system_u func=FILE_CHECK mask=MAY_READ
measure subj_role=system_r func=FILE_CHECK mask=MAY_READ
- Smack:
+ Smack::
+
measure subj_user=_ func=FILE_CHECK mask=MAY_READ
- Example of measure rules using alternate PCRs:
+ Example of measure rules using alternate PCRs::
measure func=KEXEC_KERNEL_CHECK pcr=4
measure func=KEXEC_INITRAMFS_CHECK pcr=5
+
+ Example of appraise rule allowing modsig appended signatures:
+
+ appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig
+
+ Example of measure rule using KEY_CHECK to measure all keys:
+
+ measure func=KEY_CHECK
+
+ Example of measure rule using KEY_CHECK to only measure
+ keys added to .builtin_trusted_keys or .ima keyring:
+
+ measure func=KEY_CHECK keyrings=.builtin_trusted_keys|.ima
+
+ Example of the special SETXATTR_CHECK appraise rule, that
+ restricts the hash algorithms allowed when writing to the
+ security.ima xattr of a file:
+
+ appraise func=SETXATTR_CHECK appraise_algos=sha256,sha384,sha512
+
+ Example of a 'measure' rule requiring fs-verity's digests
+ with indication of type of digest in the measurement list.
+
+ measure func=FILE_CHECK digest_type=verity \
+ template=ima-ngv2
+
+ Example of 'measure' and 'appraise' rules requiring fs-verity
+ signatures (format version 3) stored in security.ima xattr.
+
+ The 'measure' rule specifies the 'ima-sigv3' template option,
+ which includes the indication of type of digest and the file
+ signature in the measurement list.
+
+ measure func=BPRM_CHECK digest_type=verity \
+ template=ima-sigv3
+
+
+ The 'appraise' rule specifies the type and signature format
+ version (sigv3) required.
+
+ appraise func=BPRM_CHECK digest_type=verity \
+ appraise_type=sigv3
+
+ All of these policy rules could, for example, be constrained
+ either based on a filesystem's UUID (fsuuid) or based on LSM
+ labels.
diff --git a/Documentation/ABI/testing/procfs-attr-current b/Documentation/ABI/testing/procfs-attr-current
new file mode 100644
index 000000000000..198b9fe1c8e8
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-attr-current
@@ -0,0 +1,20 @@
+What: /proc/*/attr/current
+Contact: linux-security-module@vger.kernel.org,
+ selinux@vger.kernel.org,
+ apparmor@lists.ubuntu.com
+Description: The current security information used by a Linux
+ security module (LSM) that is active on the system.
+ The details of permissions required to read from
+ this interface and hence obtain the security state
+ of the task identified is LSM dependent.
+ A process cannot write to this interface unless it
+ refers to itself.
+ The other details of permissions required to write to
+ this interface and hence change the security state of
+ the task identified are LSM dependent.
+ The format of the data used by this interface is LSM
+ dependent.
+ SELinux, Smack and AppArmor provide this interface.
+Users: SELinux user-space
+ Smack user-space
+ AppArmor user-space
diff --git a/Documentation/ABI/testing/procfs-attr-exec b/Documentation/ABI/testing/procfs-attr-exec
new file mode 100644
index 000000000000..34593866a7ab
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-attr-exec
@@ -0,0 +1,20 @@
+What: /proc/*/attr/exec
+Contact: linux-security-module@vger.kernel.org,
+ selinux@vger.kernel.org,
+ apparmor@lists.ubuntu.com
+Description: The security information to be used on the process
+ by a Linux security module (LSM) active on the system
+ after a subsequent exec() call.
+ The details of permissions required to read from
+ this interface and hence obtain the security state
+ of the task identified is LSM dependent.
+ A process cannot write to this interface unless it
+ refers to itself.
+ The other details of permissions required to write to
+ this interface and hence change the security state of
+ the task identified are LSM dependent.
+ The format of the data used by this interface is LSM
+ dependent.
+ SELinux and AppArmor provide this interface.
+Users: SELinux user-space
+ AppArmor user-space
diff --git a/Documentation/ABI/testing/procfs-attr-prev b/Documentation/ABI/testing/procfs-attr-prev
new file mode 100644
index 000000000000..f990b3595839
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-attr-prev
@@ -0,0 +1,19 @@
+What: /proc/*/attr/prev
+Contact: linux-security-module@vger.kernel.org,
+ selinux@vger.kernel.org,
+ apparmor@lists.ubuntu.com
+Description: The security information used on the process by
+ a Linux security module (LSM) active on the system
+ prior to the most recent exec() call.
+ The details of permissions required to read from
+ this interface is LSM dependent.
+ A process cannot write to this interface unless it
+ refers to itself.
+ The other details of permissions required to write to
+ this interface are LSM dependent.
+ The format of the data used by this interface is LSM
+ dependent.
+ SELinux and AppArmor provide this interface.
+Users: SELinux user-space
+ AppArmor user-space
+
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index abac31d216de..6a719cf2075c 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -6,27 +6,38 @@ Description:
of block devices. Each line contains the following 14
fields:
- 1 - major number
- 2 - minor mumber
- 3 - device name
- 4 - reads completed successfully
- 5 - reads merged
- 6 - sectors read
- 7 - time spent reading (ms)
- 8 - writes completed
- 9 - writes merged
- 10 - sectors written
- 11 - time spent writing (ms)
- 12 - I/Os currently in progress
- 13 - time spent doing I/Os (ms)
- 14 - weighted time spent doing I/Os (ms)
+ == ===================================
+ 1 major number
+ 2 minor number
+ 3 device name
+ 4 reads completed successfully
+ 5 reads merged
+ 6 sectors read
+ 7 time spent reading (ms)
+ 8 writes completed
+ 9 writes merged
+ 10 sectors written
+ 11 time spent writing (ms)
+ 12 I/Os currently in progress
+ 13 time spent doing I/Os (ms)
+ 14 weighted time spent doing I/Os (ms)
+ == ===================================
Kernel 4.18+ appends four more fields for discard
tracking putting the total at 18:
- 15 - discards completed successfully
- 16 - discards merged
- 17 - sectors discarded
- 18 - time spent discarding
+ == ===================================
+ 15 discards completed successfully
+ 16 discards merged
+ 17 sectors discarded
+ 18 time spent discarding
+ == ===================================
- For more details refer to Documentation/iostats.txt
+ Kernel 5.5+ appends two more fields for flush requests:
+
+ == =====================================
+ 19 flush requests completed successfully
+ 20 time spent flushing
+ == =====================================
+
+ For more details refer to Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/procfs-smaps_rollup b/Documentation/ABI/testing/procfs-smaps_rollup
index 0a54ed0d63c9..b446a7154a1b 100644
--- a/Documentation/ABI/testing/procfs-smaps_rollup
+++ b/Documentation/ABI/testing/procfs-smaps_rollup
@@ -3,29 +3,40 @@ Date: August 2017
Contact: Daniel Colascione <dancol@google.com>
Description:
This file provides pre-summed memory information for a
- process. The format is identical to /proc/pid/smaps,
+ process. The format is almost identical to /proc/pid/smaps,
except instead of an entry for each VMA in a process,
smaps_rollup has a single entry (tagged "[rollup]")
for which each field is the sum of the corresponding
fields from all the maps in /proc/pid/smaps.
- For more details, see the procfs man page.
+ Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
+ are not present in /proc/pid/smaps. These fields represent
+ the sum of the Pss field of each type (anon, file, shmem).
+ For more details, see Documentation/filesystems/proc.rst
+ and the procfs man page.
- Typical output looks like this:
+ Typical output looks like this::
- 00100000-ff709000 ---p 00000000 00:00 0 [rollup]
- Rss: 884 kB
- Pss: 385 kB
- Shared_Clean: 696 kB
- Shared_Dirty: 0 kB
- Private_Clean: 120 kB
- Private_Dirty: 68 kB
- Referenced: 884 kB
- Anonymous: 68 kB
- LazyFree: 0 kB
- AnonHugePages: 0 kB
- ShmemPmdMapped: 0 kB
- Shared_Hugetlb: 0 kB
- Private_Hugetlb: 0 kB
- Swap: 0 kB
- SwapPss: 0 kB
- Locked: 385 kB
+ 00100000-ff709000 ---p 00000000 00:00 0 [rollup]
+ Size: 1192 kB
+ KernelPageSize: 4 kB
+ MMUPageSize: 4 kB
+ Rss: 884 kB
+ Pss: 385 kB
+ Pss_Dirty: 68 kB
+ Pss_Anon: 301 kB
+ Pss_File: 80 kB
+ Pss_Shmem: 4 kB
+ Shared_Clean: 696 kB
+ Shared_Dirty: 0 kB
+ Private_Clean: 120 kB
+ Private_Dirty: 68 kB
+ Referenced: 884 kB
+ Anonymous: 68 kB
+ LazyFree: 0 kB
+ AnonHugePages: 0 kB
+ ShmemPmdMapped: 0 kB
+ Shared_Hugetlb: 0 kB
+ Private_Hugetlb: 0 kB
+ Swap: 0 kB
+ SwapPss: 0 kB
+ Locked: 385 kB
diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
index 5fca9f5e10a3..d3cff4a7ee10 100644
--- a/Documentation/ABI/testing/pstore
+++ b/Documentation/ABI/testing/pstore
@@ -1,6 +1,7 @@
-Where: /sys/fs/pstore/... (or /dev/pstore/...)
+What: /sys/fs/pstore/...
+What: /dev/pstore/...
Date: March 2011
-Kernel Version: 2.6.39
+KernelVersion: 2.6.39
Contact: tony.luck@intel.com
Description: Generic interface to platform dependent persistent storage.
@@ -9,25 +10,25 @@ Description: Generic interface to platform dependent persistent storage.
provide a generic interface to show records captured in
the dying moments. In the case of a panic the last part
of the console log is captured, but other interesting
- data can also be saved.
+ data can also be saved::
- # mount -t pstore -o kmsg_bytes=8000 - /sys/fs/pstore
+ # mount -t pstore -o kmsg_bytes=8000 - /sys/fs/pstore
- $ ls -l /sys/fs/pstore/
- total 0
- -r--r--r-- 1 root root 7896 Nov 30 15:38 dmesg-erst-1
+ $ ls -l /sys/fs/pstore/
+ total 0
+ -r--r--r-- 1 root root 7896 Nov 30 15:38 dmesg-erst-1
Different users of this interface will result in different
filename prefixes. Currently two are defined:
- "dmesg" - saved console log
- "mce" - architecture dependent data from fatal h/w error
+ - "dmesg" - saved console log
+ - "mce" - architecture dependent data from fatal h/w error
Once the information in a file has been read, removing
the file will signal to the underlying persistent storage
- device that it can reclaim the space for later re-use.
+ device that it can reclaim the space for later re-use::
- $ rm /sys/fs/pstore/dmesg-erst-1
+ $ rm /sys/fs/pstore/dmesg-erst-1
The expectation is that all files in /sys/fs/pstore/
will be saved elsewhere and erased from persistent store
@@ -44,4 +45,3 @@ Description: Generic interface to platform dependent persistent storage.
backends are available, the preferred backend may be
set by passing the pstore.backend= argument to the kernel at
boot time.
-
diff --git a/Documentation/ABI/testing/rtc-cdev b/Documentation/ABI/testing/rtc-cdev
index 97447283f13b..25910c3c3d7e 100644
--- a/Documentation/ABI/testing/rtc-cdev
+++ b/Documentation/ABI/testing/rtc-cdev
@@ -33,6 +33,14 @@ Description:
Requires a separate RTC_PIE_ON call to enable the periodic
interrupts.
+ * RTC_VL_READ: Read the voltage inputs status of the RTC when
+ supported. The value is a bit field of RTC_VL_*, giving the
+ status of the main and backup voltages.
+
+ * RTC_VL_CLEAR: Clear the voltage status of the RTC. Some RTCs
+ need user interaction when the backup power provider is
+ replaced or charged to be able to clear the status.
+
The ioctl() calls supported by the older /dev/rtc interface are
also supported by the newer RTC class framework. However,
because the chips and systems are not standardized, some PC/AT
diff --git a/Documentation/ABI/testing/securityfs-secrets-coco b/Documentation/ABI/testing/securityfs-secrets-coco
new file mode 100644
index 000000000000..f2b6909155f9
--- /dev/null
+++ b/Documentation/ABI/testing/securityfs-secrets-coco
@@ -0,0 +1,51 @@
+What: security/secrets/coco
+Date: February 2022
+Contact: Dov Murik <dovmurik@linux.ibm.com>
+Description:
+ Exposes confidential computing (coco) EFI secrets to
+ userspace via securityfs.
+
+ EFI can declare memory area used by confidential computing
+ platforms (such as AMD SEV and SEV-ES) for secret injection by
+ the Guest Owner during VM's launch. The secrets are encrypted
+ by the Guest Owner and decrypted inside the trusted enclave,
+ and therefore are not readable by the untrusted host.
+
+ The efi_secret module exposes the secrets to userspace. Each
+ secret appears as a file under <securityfs>/secrets/coco,
+ where the filename is the GUID of the entry in the secrets
+ table. This module is loaded automatically by the EFI driver
+ if the EFI secret area is populated.
+
+ Two operations are supported for the files: read and unlink.
+ Reading the file returns the content of secret entry.
+ Unlinking the file overwrites the secret data with zeroes and
+ removes the entry from the filesystem. A secret cannot be read
+ after it has been unlinked.
+
+ For example, listing the available secrets::
+
+ # modprobe efi_secret
+ # ls -l /sys/kernel/security/secrets/coco
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+ -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+ Reading the secret data by reading a file::
+
+ # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+ the-content-of-the-secret-data
+
+ Wiping a secret by unlinking a file::
+
+ # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+ # ls -l /sys/kernel/security/secrets/coco
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+ Note: The binary format of the secrets table injected by the
+ Guest Owner is described in
+ drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+ the EFI secret area".
diff --git a/Documentation/ABI/testing/sysfs-amd-pmc b/Documentation/ABI/testing/sysfs-amd-pmc
new file mode 100644
index 000000000000..c421b72844f1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-amd-pmc
@@ -0,0 +1,13 @@
+What: /sys/bus/platform/drivers/amd_pmc/*/smu_fw_version
+Date: October 2022
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description: Reading this file reports the version of the firmware loaded to
+ System Management Unit (SMU) contained in AMD CPUs and
+ APUs.
+
+What: /sys/bus/platform/drivers/amd_pmc/*/smu_program
+Date: October 2022
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description: Reading this file reports the program corresponding to the SMU
+ firmware version. The program field is used to disambiguate two
+ APU/CPU models that can share the same firmware binary.
diff --git a/Documentation/ABI/testing/sysfs-amd-pmf b/Documentation/ABI/testing/sysfs-amd-pmf
new file mode 100644
index 000000000000..7fc0e1c2b76b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-amd-pmf
@@ -0,0 +1,13 @@
+What: /sys/devices/platform/*/cnqf_enable
+Date: September 2022
+Contact: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Description: Reading this file tells if the AMD Platform Management(PMF)
+ Cool n Quiet Framework(CnQF) feature is enabled or not.
+
+ This feature is not enabled by default and gets only turned on
+ if OEM BIOS passes a "flag" to PMF ACPI function (index 11 or 12)
+ or in case the user writes "on".
+
+ To turn off CnQF user can write "off" to the sysfs node.
+ Note: Systems that support auto mode will not have this sysfs file
+ available.
diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata
index 9ab0ef1dd1c7..3daecac48964 100644
--- a/Documentation/ABI/testing/sysfs-ata
+++ b/Documentation/ABI/testing/sysfs-ata
@@ -1,4 +1,4 @@
-What: /sys/class/ata_...
+What: /sys/class/ata_*
Description:
Provide a place in sysfs for storing the ATA topology of the
system. This allows retrieving various information about ATA
@@ -107,13 +107,14 @@ Description:
described in ATA8 7.16 and 7.17. Only valid if
the device is not a PM.
- pio_mode: (RO) Transfer modes supported by the device when
- in PIO mode. Mostly used by PATA device.
+ pio_mode: (RO) PIO transfer mode used by the device.
+ Mostly used by PATA devices.
- xfer_mode: (RO) Current transfer mode
+ xfer_mode: (RO) Current transfer mode. Mostly used by
+ PATA devices.
- dma_mode: (RO) Transfer modes supported by the device when
- in DMA mode. Mostly used by PATA device.
+ dma_mode: (RO) DMA transfer mode used by the device.
+ Mostly used by PATA devices.
class: (RO) Device class. Can be "ata" for disk,
"atapi" for packet device, "pmp" for PM, or
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
deleted file mode 100644
index dea212db9df3..000000000000
--- a/Documentation/ABI/testing/sysfs-block
+++ /dev/null
@@ -1,273 +0,0 @@
-What: /sys/block/<disk>/stat
-Date: February 2008
-Contact: Jerome Marchand <jmarchan@redhat.com>
-Description:
- The /sys/block/<disk>/stat files displays the I/O
- statistics of disk <disk>. They contain 11 fields:
- 1 - reads completed successfully
- 2 - reads merged
- 3 - sectors read
- 4 - time spent reading (ms)
- 5 - writes completed
- 6 - writes merged
- 7 - sectors written
- 8 - time spent writing (ms)
- 9 - I/Os currently in progress
- 10 - time spent doing I/Os (ms)
- 11 - weighted time spent doing I/Os (ms)
- For more details refer Documentation/iostats.txt
-
-
-What: /sys/block/<disk>/<part>/stat
-Date: February 2008
-Contact: Jerome Marchand <jmarchan@redhat.com>
-Description:
- The /sys/block/<disk>/<part>/stat files display the
- I/O statistics of partition <part>. The format is the
- same as the above-written /sys/block/<disk>/stat
- format.
-
-
-What: /sys/block/<disk>/integrity/format
-Date: June 2008
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Metadata format for integrity capable block device.
- E.g. T10-DIF-TYPE1-CRC.
-
-
-What: /sys/block/<disk>/integrity/read_verify
-Date: June 2008
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Indicates whether the block layer should verify the
- integrity of read requests serviced by devices that
- support sending integrity metadata.
-
-
-What: /sys/block/<disk>/integrity/tag_size
-Date: June 2008
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Number of bytes of integrity tag space available per
- 512 bytes of data.
-
-
-What: /sys/block/<disk>/integrity/device_is_integrity_capable
-Date: July 2014
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Indicates whether a storage device is capable of storing
- integrity metadata. Set if the device is T10 PI-capable.
-
-What: /sys/block/<disk>/integrity/protection_interval_bytes
-Date: July 2015
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Describes the number of data bytes which are protected
- by one integrity tuple. Typically the device's logical
- block size.
-
-What: /sys/block/<disk>/integrity/write_generate
-Date: June 2008
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Indicates whether the block layer should automatically
- generate checksums for write requests bound for
- devices that support receiving integrity metadata.
-
-What: /sys/block/<disk>/alignment_offset
-Date: April 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Storage devices may report a physical block size that is
- bigger than the logical block size (for instance a drive
- with 4KB physical sectors exposing 512-byte logical
- blocks to the operating system). This parameter
- indicates how many bytes the beginning of the device is
- offset from the disk's natural alignment.
-
-What: /sys/block/<disk>/<partition>/alignment_offset
-Date: April 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Storage devices may report a physical block size that is
- bigger than the logical block size (for instance a drive
- with 4KB physical sectors exposing 512-byte logical
- blocks to the operating system). This parameter
- indicates how many bytes the beginning of the partition
- is offset from the disk's natural alignment.
-
-What: /sys/block/<disk>/queue/logical_block_size
-Date: May 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- This is the smallest unit the storage device can
- address. It is typically 512 bytes.
-
-What: /sys/block/<disk>/queue/physical_block_size
-Date: May 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- This is the smallest unit a physical storage device can
- write atomically. It is usually the same as the logical
- block size but may be bigger. One example is SATA
- drives with 4KB sectors that expose a 512-byte logical
- block size to the operating system. For stacked block
- devices the physical_block_size variable contains the
- maximum physical_block_size of the component devices.
-
-What: /sys/block/<disk>/queue/minimum_io_size
-Date: April 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Storage devices may report a granularity or preferred
- minimum I/O size which is the smallest request the
- device can perform without incurring a performance
- penalty. For disk drives this is often the physical
- block size. For RAID arrays it is often the stripe
- chunk size. A properly aligned multiple of
- minimum_io_size is the preferred request size for
- workloads where a high number of I/O operations is
- desired.
-
-What: /sys/block/<disk>/queue/optimal_io_size
-Date: April 2009
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Storage devices may report an optimal I/O size, which is
- the device's preferred unit for sustained I/O. This is
- rarely reported for disk drives. For RAID arrays it is
- usually the stripe width or the internal track size. A
- properly aligned multiple of optimal_io_size is the
- preferred request size for workloads where sustained
- throughput is desired. If no optimal I/O size is
- reported this file contains 0.
-
-What: /sys/block/<disk>/queue/nomerges
-Date: January 2010
-Contact:
-Description:
- Standard I/O elevator operations include attempts to
- merge contiguous I/Os. For known random I/O loads these
- attempts will always fail and result in extra cycles
- being spent in the kernel. This allows one to turn off
- this behavior on one of two ways: When set to 1, complex
- merge checks are disabled, but the simple one-shot merges
- with the previous I/O request are enabled. When set to 2,
- all merge tries are disabled. The default value is 0 -
- which enables all types of merge tries.
-
-What: /sys/block/<disk>/discard_alignment
-Date: May 2011
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Devices that support discard functionality may
- internally allocate space in units that are bigger than
- the exported logical block size. The discard_alignment
- parameter indicates how many bytes the beginning of the
- device is offset from the internal allocation unit's
- natural alignment.
-
-What: /sys/block/<disk>/<partition>/discard_alignment
-Date: May 2011
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Devices that support discard functionality may
- internally allocate space in units that are bigger than
- the exported logical block size. The discard_alignment
- parameter indicates how many bytes the beginning of the
- partition is offset from the internal allocation unit's
- natural alignment.
-
-What: /sys/block/<disk>/queue/discard_granularity
-Date: May 2011
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Devices that support discard functionality may
- internally allocate space using units that are bigger
- than the logical block size. The discard_granularity
- parameter indicates the size of the internal allocation
- unit in bytes if reported by the device. Otherwise the
- discard_granularity will be set to match the device's
- physical block size. A discard_granularity of 0 means
- that the device does not support discard functionality.
-
-What: /sys/block/<disk>/queue/discard_max_bytes
-Date: May 2011
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Devices that support discard functionality may have
- internal limits on the number of bytes that can be
- trimmed or unmapped in a single operation. Some storage
- protocols also have inherent limits on the number of
- blocks that can be described in a single command. The
- discard_max_bytes parameter is set by the device driver
- to the maximum number of bytes that can be discarded in
- a single operation. Discard requests issued to the
- device must not exceed this limit. A discard_max_bytes
- value of 0 means that the device does not support
- discard functionality.
-
-What: /sys/block/<disk>/queue/discard_zeroes_data
-Date: May 2011
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Will always return 0. Don't rely on any specific behavior
- for discards, and don't read this file.
-
-What: /sys/block/<disk>/queue/write_same_max_bytes
-Date: January 2012
-Contact: Martin K. Petersen <martin.petersen@oracle.com>
-Description:
- Some devices support a write same operation in which a
- single data block can be written to a range of several
- contiguous blocks on storage. This can be used to wipe
- areas on disk or to initialize drives in a RAID
- configuration. write_same_max_bytes indicates how many
- bytes can be written in a single write same command. If
- write_same_max_bytes is 0, write same is not supported
- by the device.
-
-What: /sys/block/<disk>/queue/write_zeroes_max_bytes
-Date: November 2016
-Contact: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
-Description:
- Devices that support write zeroes operation in which a
- single request can be issued to zero out the range of
- contiguous blocks on storage without having any payload
- in the request. This can be used to optimize writing zeroes
- to the devices. write_zeroes_max_bytes indicates how many
- bytes can be written in a single write zeroes command. If
- write_zeroes_max_bytes is 0, write zeroes is not supported
- by the device.
-
-What: /sys/block/<disk>/queue/zoned
-Date: September 2016
-Contact: Damien Le Moal <damien.lemoal@hgst.com>
-Description:
- zoned indicates if the device is a zoned block device
- and the zone model of the device if it is indeed zoned.
- The possible values indicated by zoned are "none" for
- regular block devices and "host-aware" or "host-managed"
- for zoned block devices. The characteristics of
- host-aware and host-managed zoned block devices are
- described in the ZBC (Zoned Block Commands) and ZAC
- (Zoned Device ATA Command Set) standards. These standards
- also define the "drive-managed" zone model. However,
- since drive-managed zoned block devices do not support
- zone commands, they will be treated as regular block
- devices and zoned will report "none".
-
-What: /sys/block/<disk>/queue/chunk_sectors
-Date: September 2016
-Contact: Hannes Reinecke <hare@suse.com>
-Description:
- chunk_sectors has different meaning depending on the type
- of the disk. For a RAID device (dm-raid), chunk_sectors
- indicates the size in 512B sectors of the RAID volume
- stripe segment. For a zoned block device, either
- host-aware or host-managed, chunk_sectors indicates the
- size of 512B sectors of the zones of the device, with
- the eventual exception of the last zone of the device
- which may be smaller.
diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
index 82ef6eab042d..2d543cfa4079 100644
--- a/Documentation/ABI/testing/sysfs-block-device
+++ b/Documentation/ABI/testing/sysfs-block-device
@@ -8,11 +8,13 @@ Description:
It has the following valid values:
+ == ========================================================
0 OFF - the LED is not activated on activity
1 BLINK_ON - the LED blinks on every 10ms when activity is
detected.
2 BLINK_OFF - the LED is on when idle, and blinks off
every 10ms when activity is detected.
+ == ========================================================
Note that the user must turn sw_activity OFF it they wish to
control the activity LED via the em_message file.
@@ -45,7 +47,7 @@ Description:
- Values below -2 are rejected with -EINVAL
For more information, see
- Documentation/laptops/disk-shock-protection.txt
+ Documentation/admin-guide/laptops/disk-shock-protection.rst
What: /sys/block/*/device/ncq_prio_enable
@@ -53,6 +55,65 @@ Date: Oct, 2016
KernelVersion: v4.10
Contact: linux-ide@vger.kernel.org
Description:
- (RW) Write to the file to turn on or off the SATA ncq (native
- command queueing) support. By default this feature is turned
- off.
+ (RW) Write to the file to turn on or off the SATA NCQ (native
+ command queueing) priority support. By default this feature is
+ turned off. If the device does not support the SATA NCQ
+ priority feature, writing "1" to this file results in an error
+ (see ncq_prio_supported).
+
+
+What: /sys/block/*/device/sas_ncq_prio_enable
+Date: Oct, 2016
+KernelVersion: v4.10
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RW) This is the equivalent of the ncq_prio_enable attribute
+ file for SATA devices connected to a SAS host-bus-adapter
+ (HBA) implementing support for the SATA NCQ priority feature.
+ This file does not exist if the HBA driver does not implement
+ support for the SATA NCQ priority feature, regardless of the
+ device support for this feature (see sas_ncq_prio_supported).
+
+
+What: /sys/block/*/device/ncq_prio_supported
+Date: Aug, 2021
+KernelVersion: v5.15
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RO) Indicates if the device supports the SATA NCQ (native
+ command queueing) priority feature.
+
+
+What: /sys/block/*/device/sas_ncq_prio_supported
+Date: Aug, 2021
+KernelVersion: v5.15
+Contact: linux-ide@vger.kernel.org
+Description:
+ (RO) This is the equivalent of the ncq_prio_supported attribute
+ file for SATA devices connected to a SAS host-bus-adapter
+ (HBA) implementing support for the SATA NCQ priority feature.
+ This file does not exist if the HBA driver does not implement
+ support for the SATA NCQ priority feature, regardless of the
+ device support for this feature.
+
+
+What: /sys/block/*/device/cdl_supported
+Date: May, 2023
+KernelVersion: v6.5
+Contact: linux-scsi@vger.kernel.org
+Description:
+ (RO) Indicates if the device supports the command duration
+ limits feature found in some ATA and SCSI devices.
+
+
+What: /sys/block/*/device/cdl_enable
+Date: May, 2023
+KernelVersion: v6.5
+Contact: linux-scsi@vger.kernel.org
+Description:
+ (RW) For a device supporting the command duration limits
+ feature, write to the file to turn on or off the feature.
+ By default this feature is turned off.
+ Writing "1" to this file enables the use of command duration
+ limits for read and write commands in the kernel and turns on
+ the feature on the device. Writing "0" disables the feature.
diff --git a/Documentation/ABI/testing/sysfs-block-rnbd b/Documentation/ABI/testing/sysfs-block-rnbd
new file mode 100644
index 000000000000..80b420b5d6b8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-rnbd
@@ -0,0 +1,64 @@
+What: /sys/block/rnbd<N>/rnbd/unmap_device
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: To unmap a volume, "normal" or "force" has to be written to:
+ /sys/block/rnbd<N>/rnbd/unmap_device
+
+ When "normal" is used, the operation will fail with EBUSY if any process
+ is using the device. When "force" is used, the device is also unmapped
+ when device is in use. All I/Os that are in progress will fail.
+
+ Example::
+
+ # echo "normal" > /sys/block/rnbd0/rnbd/unmap_device
+
+What: /sys/block/rnbd<N>/rnbd/state
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: The file contains the current state of the block device. The state file
+ returns "open" when the device is successfully mapped from the server
+ and accepting I/O requests. When the connection to the server gets
+ disconnected in case of an error (e.g. link failure), the state file
+ returns "closed" and all I/O requests submitted to it will fail with -EIO.
+
+What: /sys/block/rnbd<N>/rnbd/session
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RNBD uses RTRS session to transport the data between client and
+ server. The entry "session" contains the name of the session, that
+ was used to establish the RTRS session. It's the same name that
+ was passed as server parameter to the map_device entry.
+
+What: /sys/block/rnbd<N>/rnbd/mapping_path
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains the path that was passed as "device_path" to the map_device
+ operation.
+
+What: /sys/block/rnbd<N>/rnbd/access_mode
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains the device access mode: ro, rw or migration.
+
+What: /sys/block/rnbd<N>/rnbd/resize
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Write the number of sectors to change the size of the disk.
+
+What: /sys/block/rnbd<N>/rnbd/remap_device
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Remap the disconnected device if the session is not destroyed yet.
+
+What: /sys/block/rnbd<N>/rnbd/nr_poll_queues
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains the number of poll-mode queues
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index c1513c756af1..36c57de0a10a 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -22,14 +22,6 @@ Description:
device. The reset operation frees all the memory associated
with this device.
-What: /sys/block/zram<id>/max_comp_streams
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The max_comp_streams file is read-write and specifies the
- number of backend's zcomp_strm compression streams (number of
- concurrent compress operations).
-
What: /sys/block/zram<id>/comp_algorithm
Date: February 2014
Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@ -98,3 +90,63 @@ Description:
The backing_dev file is read-write and set up backing
device for zram to write incompressible pages.
For using, user should enable CONFIG_ZRAM_WRITEBACK.
+
+What: /sys/block/zram<id>/idle
+Date: November 2018
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ idle file is write-only and mark zram slot as idle.
+ If system has mounted debugfs, user can see which slots
+ are idle via /sys/kernel/debug/zram/zram<id>/block_state
+
+What: /sys/block/zram<id>/writeback
+Date: November 2018
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The writeback file is write-only and trigger idle and/or
+ huge page writeback to backing device.
+
+What: /sys/block/zram<id>/bd_stat
+Date: November 2018
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The bd_stat file is read-only and represents backing device's
+ statistics (bd_count, bd_reads, bd_writes) in a format
+ similar to block layer statistics file format.
+
+What: /sys/block/zram<id>/writeback_limit_enable
+Date: November 2018
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The writeback_limit_enable file is read-write and specifies
+ eanbe of writeback_limit feature. "1" means eable the feature.
+ No limit "0" is the initial state.
+
+What: /sys/block/zram<id>/writeback_limit
+Date: November 2018
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The writeback_limit file is read-write and specifies the maximum
+ amount of writeback ZRAM can do. The limit could be changed
+ in run time.
+
+What: /sys/block/zram<id>/recomp_algorithm
+Date: November 2022
+Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+ The recomp_algorithm file is read-write and allows to set
+ or show secondary compression algorithms.
+
+What: /sys/block/zram<id>/recompress
+Date: November 2022
+Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+ The recompress file is write-only and triggers re-compression
+ with secondary compression algorithms.
+
+What: /sys/block/zram<id>/algorithm_params
+Date: August 2024
+Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+ The algorithm_params file is write-only and is used to setup
+ compression algorithm parameters.
diff --git a/Documentation/ABI/testing/sysfs-bus-acpi b/Documentation/ABI/testing/sysfs-bus-acpi
index e7898cfe5fb1..6f2b907a8013 100644
--- a/Documentation/ABI/testing/sysfs-bus-acpi
+++ b/Documentation/ABI/testing/sysfs-bus-acpi
@@ -1,17 +1,18 @@
What: /sys/bus/acpi/devices/.../path
Date: December 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the full path of ACPI namespace
object associated with the device object. For example,
\_SB_.PCI0.
+
This file is not present for device objects representing
fixed ACPI hardware features (like power and sleep
buttons).
What: /sys/bus/acpi/devices/.../modalias
Date: July 2007
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the PNP IDs of the device object.
That is acpi:HHHHHHHH:[CCCCCCC:]. Where each HHHHHHHH or
@@ -19,7 +20,7 @@ Description:
What: /sys/bus/acpi/devices/.../hid
Date: April 2005
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the hardware ID (_HID) of the
device object. For example, PNP0103.
@@ -28,14 +29,14 @@ Description:
What: /sys/bus/acpi/devices/.../description
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_STR control method, if present.
What: /sys/bus/acpi/devices/.../adr
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_ADR control method, which is present for ACPI device
@@ -44,14 +45,14 @@ Description:
What: /sys/bus/acpi/devices/.../uid
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_UID control method, if present.
What: /sys/bus/acpi/devices/.../eject
Date: December 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
Writing 1 to this attribute will trigger hot removal of
this device object. This file exists for every device
@@ -59,7 +60,7 @@ Description:
What: /sys/bus/acpi/devices/.../status
Date: Jan, 2014
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
(RO) Returns the ACPI device status: enabled, disabled or
functioning or present, if the method _STA is present.
@@ -67,14 +68,16 @@ Description:
The return value is a decimal integer representing the device's
status bitmap:
- Bit [0] – Set if the device is present.
- Bit [1] – Set if the device is enabled and decoding its
- resources.
- Bit [2] – Set if the device should be shown in the UI.
- Bit [3] – Set if the device is functioning properly (cleared if
- device failed its diagnostics).
- Bit [4] – Set if the battery is present.
- Bits [31:5] – Reserved (must be cleared)
+ =========== ==================================================
+ Bit [0] Set if the device is present.
+ Bit [1] Set if the device is enabled and decoding its
+ resources.
+ Bit [2] Set if the device should be shown in the UI.
+ Bit [3] Set if the device is functioning properly (cleared
+ if device failed its diagnostics).
+ Bit [4] Set if the battery is present.
+ Bits [31:5] Reserved (must be cleared)
+ =========== ==================================================
If bit [0] is clear, then bit 1 must also be clear (a device
that is not present cannot be enabled).
@@ -87,7 +90,7 @@ Description:
What: /sys/bus/acpi/devices/.../hrv
Date: Apr, 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
(RO) Allows users to read the hardware version of non-PCI
hardware, if the _HRV control method is present. It is mostly
diff --git a/Documentation/ABI/testing/sysfs-bus-auxiliary b/Documentation/ABI/testing/sysfs-bus-auxiliary
new file mode 100644
index 000000000000..cc856079690f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-auxiliary
@@ -0,0 +1,9 @@
+What: /sys/bus/auxiliary/devices/.../irqs/
+Date: April, 2024
+Contact: Shay Drory <shayd@nvidia.com>
+Description:
+ The /sys/devices/.../irqs directory contains a variable set of
+ files, with each file is named as irq number similar to PCI PF
+ or VF's irq number located in msi_irqs directory.
+ These irq files are added and removed dynamically when an IRQ
+ is requested and freed respectively for the PCI SF.
diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma
index 721b4aea3020..e93d3ddca844 100644
--- a/Documentation/ABI/testing/sysfs-bus-bcma
+++ b/Documentation/ABI/testing/sysfs-bus-bcma
@@ -3,7 +3,7 @@ Date: May 2011
KernelVersion: 3.0
Contact: Rafał Miłecki <zajec5@gmail.com>
Description:
- Each BCMA core has it's manufacturer id. See
+ Each BCMA core has its manufacturer id. See
include/linux/bcma/bcma.h for possible values.
What: /sys/bus/bcma/devices/.../id
diff --git a/Documentation/ABI/testing/sysfs-bus-cdx b/Documentation/ABI/testing/sysfs-bus-cdx
new file mode 100644
index 000000000000..e84277531414
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-cdx
@@ -0,0 +1,121 @@
+What: /sys/bus/cdx/rescan
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Writing y/1/on to this file will cause rescan of the bus
+ and devices on the CDX bus. Any new devices are scanned and
+ added to the list of Linux devices and any devices removed are
+ also deleted from Linux.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/rescan
+
+What: /sys/bus/cdx/devices/.../vendor
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Vendor ID for this CDX device, in hexadecimal. Vendor ID is
+ 16 bit identifier which is specific to the device manufacturer.
+ Combination of Vendor ID and Device ID identifies a device.
+
+What: /sys/bus/cdx/devices/.../device
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Device ID for this CDX device, in hexadecimal. Device ID is
+ 16 bit identifier to identify a device type within the range
+ of a device manufacturer.
+ Combination of Vendor ID and Device ID identifies a device.
+
+What: /sys/bus/cdx/devices/.../subsystem_vendor
+Date: July 2023
+Contact: puneet.gupta@amd.com
+Description:
+ Subsystem Vendor ID for this CDX device, in hexadecimal.
+ Subsystem Vendor ID is 16 bit identifier specific to the
+ card manufacturer.
+
+What: /sys/bus/cdx/devices/.../subsystem_device
+Date: July 2023
+Contact: puneet.gupta@amd.com
+Description:
+ Subsystem Device ID for this CDX device, in hexadecimal
+ Subsystem Device ID is 16 bit identifier specific to the
+ card manufacturer.
+
+What: /sys/bus/cdx/devices/.../class
+Date: July 2023
+Contact: puneet.gupta@amd.com
+Description:
+ This file contains the class of the CDX device, in hexadecimal.
+ Class is 24 bit identifier specifies the functionality of the device.
+
+What: /sys/bus/cdx/devices/.../revision
+Date: July 2023
+Contact: puneet.gupta@amd.com
+Description:
+ This file contains the revision field of the CDX device, in hexadecimal.
+ Revision is 8 bit revision identifier of the device.
+
+What: /sys/bus/cdx/devices/.../enable
+Date: October 2023
+Contact: abhijit.gangurde@amd.com
+Description:
+ CDX bus should be disabled before updating the devices in FPGA.
+ Writing n/0/off will attempt to disable the CDX bus and.
+ writing y/1/on will attempt to enable the CDX bus. Reading this file
+ gives the current state of the bus, 1 for enabled and 0 for disabled.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/.../enable
+
+What: /sys/bus/cdx/devices/.../reset
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Writing y/1/on to this file resets the CDX device or all devices
+ on the bus. On resetting the device, the corresponding driver is
+ notified twice, once before the device is being reset, and again
+ after the reset has been complete.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/.../reset
+
+What: /sys/bus/cdx/devices/.../remove
+Date: March 2023
+Contact: tarak.reddy@amd.com
+Description:
+ Writing y/1/on to this file removes the corresponding
+ device from the CDX bus. If the device is to be reconfigured
+ reconfigured in the Hardware, the device can be removed, so
+ that the device driver does not access the device while it is
+ being reconfigured.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/devices/.../remove
+
+What: /sys/bus/cdx/devices/.../resource<N>
+Date: July 2023
+Contact: puneet.gupta@amd.com
+Description:
+ The resource binary file contains the content of the memory
+ regions. These files can be m'maped from userspace.
+
+What: /sys/bus/cdx/devices/.../modalias
+Date: July 2023
+Contact: nipun.gupta@amd.com
+Description:
+ This attribute indicates the CDX ID of the device.
+ That is in the format:
+ cdx:vXXXXdXXXXsvXXXXsdXXXXcXXXXXX,
+ where:
+
+ - vXXXX contains the vendor ID;
+ - dXXXX contains the device ID;
+ - svXXXX contains the subsystem vendor ID;
+ - sdXXXX contains the subsystem device ID;
+ - cXXXXXX contains the device class.
diff --git a/Documentation/ABI/testing/sysfs-bus-coreboot b/Documentation/ABI/testing/sysfs-bus-coreboot
new file mode 100644
index 000000000000..8e8d6af24a4c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coreboot
@@ -0,0 +1,45 @@
+What: /sys/bus/coreboot
+Date: August 2022
+Contact: Jack Rosenthal <jrosenth@chromium.org>
+Description:
+ The coreboot bus provides a variety of virtual devices used to
+ access data structures created by the Coreboot BIOS.
+
+What: /sys/bus/coreboot/devices/cbmem-<id>
+Date: August 2022
+Contact: Jack Rosenthal <jrosenth@chromium.org>
+Description:
+ CBMEM is a downwards-growing memory region created by Coreboot,
+ and contains tagged data structures to be shared with payloads
+ in the boot process and the OS. Each CBMEM entry is given a
+ directory in /sys/bus/coreboot/devices based on its id.
+ A list of ids known to Coreboot can be found in the coreboot
+ source tree at
+ ``src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h``.
+
+What: /sys/bus/coreboot/devices/cbmem-<id>/address
+Date: August 2022
+Contact: Jack Rosenthal <jrosenth@chromium.org>
+Description:
+ This is the physical memory address that the CBMEM entry's data
+ begins at, in hexadecimal (e.g., ``0x76ffe000``).
+
+What: /sys/bus/coreboot/devices/cbmem-<id>/size
+Date: August 2022
+Contact: Jack Rosenthal <jrosenth@chromium.org>
+Description:
+ This is the size of the CBMEM entry's data, in hexadecimal
+ (e.g., ``0x1234``).
+
+What: /sys/bus/coreboot/devices/cbmem-<id>/mem
+Date: August 2022
+Contact: Jack Rosenthal <jrosenth@chromium.org>
+Description:
+ A file exposing read/write access to the entry's data. Note
+ that this file does not support mmap(), as coreboot
+ does not guarantee that the data will be page-aligned.
+
+ The mode of this file is 0600. While there shouldn't be
+ anything security-sensitive contained in CBMEM, read access
+ requires root privileges given this is exposing a small subset
+ of physical memory.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
new file mode 100644
index 000000000000..a2aef7f5a6d7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
@@ -0,0 +1,247 @@
+What: /sys/bus/coresight/devices/<cti-name>/enable
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Enable/Disable the CTI hardware.
+
+What: /sys/bus/coresight/devices/<cti-name>/powered
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Indicate if the CTI hardware is powered.
+
+What: /sys/bus/coresight/devices/<cti-name>/ctmid
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Display the associated CTM ID
+
+What: /sys/bus/coresight/devices/<cti-name>/nr_trigger_cons
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Number of devices connected to triggers on this CTI
+
+What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/name
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Name of connected device <N>
+
+What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/in_signals
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Input trigger signals from connected device <N>
+
+What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/in_types
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Functional types for the input trigger signals
+ from connected device <N>
+
+What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/out_signals
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Output trigger signals to connected device <N>
+
+What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/out_types
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Functional types for the output trigger signals
+ to connected device <N>
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/inout_sel
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Select the index for inen and outen registers.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/inen
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Read or write the CTIINEN register selected by inout_sel.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/outen
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Read or write the CTIOUTEN register selected by inout_sel.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/gate
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Read or write CTIGATE register.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/asicctl
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Read or write ASICCTL register.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/intack
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Write the INTACK register.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/appset
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Set CTIAPPSET register to activate channel. Read back to
+ determine current value of register.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/appclear
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Write APPCLEAR register to deactivate channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/apppulse
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Write APPPULSE to pulse a channel active for one clock
+ cycle.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/chinstatus
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Read current status of channel inputs.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/choutstatus
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) read current status of channel outputs.
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/triginstatus
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) read current status of input trigger signals
+
+What: /sys/bus/coresight/devices/<cti-name>/regs/trigoutstatus
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) read current status of output trigger signals.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trigin_attach
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Attach a CTI input trigger to a CTM channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trigin_detach
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Detach a CTI input trigger from a CTM channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_attach
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Attach a CTI output trigger to a CTM channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_detach
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Detach a CTI output trigger from a CTM channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_gate_enable
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Enable CTIGATE for single channel (Write) or list enabled
+ channels through the gate (R).
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_gate_disable
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Disable CTIGATE for single channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_set
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Activate a single channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_clear
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Deactivate a single channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_pulse
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Pulse a single channel - activate for a single clock cycle.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_filtered
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) List of output triggers filtered across all connections.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/trig_filter_enable
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Enable or disable trigger output signal filtering.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_inuse
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) show channels with at least one attached trigger signal.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_free
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) show channels with no attached trigger signals.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_sel
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (RW) Write channel number to select a channel to view, read to
+ see selected channel number.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_in
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Read to see input triggers connected to selected view
+ channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_out
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Read) Read to see output triggers connected to selected view
+ channel.
+
+What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_reset
+Date: March 2020
+KernelVersion: 5.7
+Contact: Mike Leach or Mathieu Poirier
+Description: (Write) Clear all channel / trigger programming.
+
+What: /sys/bus/coresight/devices/<cti-name>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source b/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source
new file mode 100644
index 000000000000..321e3ee1fc9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source
@@ -0,0 +1,21 @@
+What: /sys/bus/coresight/devices/dummy_source<N>/enable_source
+Date: Dec 2024
+KernelVersion: 6.14
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (RW) Enable/disable tracing of dummy source. A sink should be activated
+ before enabling the source. The path of coresight components linking
+ the source to the sink is configured and managed automatically by the
+ coresight framework.
+
+What: /sys/bus/coresight/devices/dummy_source<N>/traceid
+Date: Dec 2024
+KernelVersion: 6.14
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (R) Show the trace ID that will appear in the trace stream
+ coming from this trace entity.
+
+What: /sys/bus/coresight/devices/dummy_source<N>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10 b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
index b5f526081711..f30526949687 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
@@ -4,7 +4,10 @@ KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Add/remove a sink from a trace path. There can be multiple
source for a single sink.
- ex: echo 1 > /sys/bus/coresight/devices/20010000.etb/enable_sink
+
+ ex::
+
+ echo 1 > /sys/bus/coresight/devices/20010000.etb/enable_sink
What: /sys/bus/coresight/devices/<memory_map>.etb/trigger_cntr
Date: November 2014
@@ -16,25 +19,31 @@ Description: (RW) Disables write access to the Trace RAM by stopping the
into the Trace RAM following the trigger event is equal to the
value stored in this register+1 (from ARM ETB-TRM).
+What: /sys/bus/coresight/devices/<memory_map>.etb/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/rdp
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Defines the depth, in words, of the trace RAM in powers of
+Description: (Read) Defines the depth, in words, of the trace RAM in powers of
2. The value is read directly from HW register RDP, 0x004.
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/sts
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB status register. The value
+Description: (Read) Shows the value held by the ETB status register. The value
is read directly from HW register STS, 0x00C.
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/rrp
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB RAM Read Pointer register
+Description: (Read) Shows the value held by the ETB RAM Read Pointer register
that is used to read entries from the Trace RAM over the APB
interface. The value is read directly from HW register RRP,
0x014.
@@ -43,7 +52,7 @@ What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/rwp
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB RAM Write Pointer register
+Description: (Read) Shows the value held by the ETB RAM Write Pointer register
that is used to sets the write pointer to write entries from
the CoreSight bus into the Trace RAM. The value is read directly
from HW register RWP, 0x018.
@@ -52,21 +61,21 @@ What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/trg
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Similar to "trigger_cntr" above except that this value is
+Description: (Read) Similar to "trigger_cntr" above except that this value is
read directly from HW register TRG, 0x01C.
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/ctl
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB Control register. The value
+Description: (Read) Shows the value held by the ETB Control register. The value
is read directly from HW register CTL, 0x020.
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/ffsr
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB Formatter and Flush Status
+Description: (Read) Shows the value held by the ETB Formatter and Flush Status
register. The value is read directly from HW register FFSR,
0x300.
@@ -74,6 +83,6 @@ What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/ffcr
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the ETB Formatter and Flush Control
+Description: (Read) Shows the value held by the ETB Formatter and Flush Control
register. The value is read directly from HW register FFCR,
0x304.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
index 924265a1295d..245c322c91f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
@@ -22,7 +22,7 @@ Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Used in conjunction with @addr_idx. Specifies
characteristics about the address comparator being configure,
for example the access type, the kind of instruction to trace,
- processor contect ID to trigger on, etc. Individual fields in
+ processor context ID to trigger on, etc. Individual fields in
the access type register may vary on the version of the trace
entity.
@@ -31,7 +31,7 @@ Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Used in conjunction with @addr_idx. Specifies the range of
- addresses to trigger on. Inclusion or exclusion is specificed
+ addresses to trigger on. Inclusion or exclusion is specified
in the corresponding access type register.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_single
@@ -146,28 +146,28 @@ What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_addr_cmp
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Provides the number of address comparators pairs accessible
+Description: (Read) Provides the number of address comparators pairs accessible
on a trace unit, as specified by bit 3:0 of register ETMCCR.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_cntr
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Provides the number of counters accessible on a trace unit,
+Description: (Read) Provides the number of counters accessible on a trace unit,
as specified by bit 15:13 of register ETMCCR.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_ctxid_cmp
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Provides the number of context ID comparator available on a
+Description: (Read) Provides the number of context ID comparator available on a
trace unit, as specified by bit 25:24 of register ETMCCR.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/reset
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (W) Cancels all configuration on a trace unit and set it back
+Description: (Write) Cancels all configuration on a trace unit and set it back
to its boot configuration.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_12_event
@@ -216,7 +216,7 @@ What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/curr_seq_state
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Holds the current state of the sequencer.
+Description: (Read) Holds the current state of the sequencer.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/sync_freq
Date: November 2014
@@ -236,7 +236,7 @@ What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/traceid
Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (RW) Holds the trace ID that will appear in the trace stream
+Description: (RO) Holds the trace ID that will appear in the trace stream
coming from this trace entity.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/trigger_event
@@ -251,6 +251,12 @@ KernelVersion: 4.4
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RO) Holds the cpu number this tracer is affined to.
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mgmt/etmccr
Date: September 2015
KernelVersion: 4.4
@@ -304,19 +310,19 @@ What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mgmt/etmtsscr
Date: September 2015
KernelVersion: 4.4
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (RO) Print the content of the ETM Trace Start/Stop Conrol
+Description: (RO) Print the content of the ETM Trace Start/Stop Control
register (0x018). The value is read directly from the HW.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mgmt/etmtecr1
Date: September 2015
KernelVersion: 4.4
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (RO) Print the content of the ETM Enable Conrol #1
+Description: (RO) Print the content of the ETM Enable Control #1
register (0x024). The value is read directly from the HW.
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mgmt/etmtecr2
Date: September 2015
KernelVersion: 4.4
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (RO) Print the content of the ETM Enable Conrol #2
+Description: (RO) Print the content of the ETM Enable Control #2
register (0x01c). The value is read directly from the HW.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
index 36258bc1b473..6f19a6a5f2e1 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
@@ -1,4 +1,4 @@
-What: /sys/bus/coresight/devices/<memory_map>.etm/enable_source
+What: /sys/bus/coresight/devices/etm<N>/enable_source
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -8,82 +8,82 @@ Description: (RW) Enable/disable tracing on this specific trace entiry.
of coresight components linking the source to the sink is
configured and managed automatically by the coresight framework.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cpu
+What: /sys/bus/coresight/devices/etm<N>/cpu
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) The CPU this tracing entity is associated with.
+Description: (Read) The CPU this tracing entity is associated with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_pe_cmp
+What: /sys/bus/coresight/devices/etm<N>/nr_pe_cmp
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of PE comparator inputs that are
+Description: (Read) Indicates the number of PE comparator inputs that are
available for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_addr_cmp
+What: /sys/bus/coresight/devices/etm<N>/nr_addr_cmp
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of address comparator pairs that are
+Description: (Read) Indicates the number of address comparator pairs that are
available for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_cntr
+What: /sys/bus/coresight/devices/etm<N>/nr_cntr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of counters that are available for
+Description: (Read) Indicates the number of counters that are available for
tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_ext_inp
+What: /sys/bus/coresight/devices/etm<N>/nr_ext_inp
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates how many external inputs are implemented.
+Description: (Read) Indicates how many external inputs are implemented.
-What: /sys/bus/coresight/devices/<memory_map>.etm/numcidc
+What: /sys/bus/coresight/devices/etm<N>/numcidc
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of Context ID comparators that are
+Description: (Read) Indicates the number of Context ID comparators that are
available for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/numvmidc
+What: /sys/bus/coresight/devices/etm<N>/numvmidc
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of VMID comparators that are available
+Description: (Read) Indicates the number of VMID comparators that are available
for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nrseqstate
+What: /sys/bus/coresight/devices/etm<N>/nrseqstate
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of sequencer states that are
+Description: (Read) Indicates the number of sequencer states that are
implemented.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_resource
+What: /sys/bus/coresight/devices/etm<N>/nr_resource
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of resource selection pairs that are
+Description: (Read) Indicates the number of resource selection pairs that are
available for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/nr_ss_cmp
+What: /sys/bus/coresight/devices/etm<N>/nr_ss_cmp
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the number of single-shot comparator controls that
+Description: (Read) Indicates the number of single-shot comparator controls that
are available for tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/reset
+What: /sys/bus/coresight/devices/etm<N>/reset
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (W) Cancels all configuration on a trace unit and set it back
+Description: (Write) Cancels all configuration on a trace unit and set it back
to its boot configuration.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mode
+What: /sys/bus/coresight/devices/etm<N>/mode
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
@@ -91,373 +91,442 @@ Description: (RW) Controls various modes supported by this ETM, for example
P0 instruction tracing, branch broadcast, cycle counting and
context ID tracing.
-What: /sys/bus/coresight/devices/<memory_map>.etm/pe
+What: /sys/bus/coresight/devices/etm<N>/pe
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls which PE to trace.
-What: /sys/bus/coresight/devices/<memory_map>.etm/event
+What: /sys/bus/coresight/devices/etm<N>/event
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls the tracing of arbitrary events from bank 0 to 3.
-What: /sys/bus/coresight/devices/<memory_map>.etm/event_instren
+What: /sys/bus/coresight/devices/etm<N>/event_instren
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls the behavior of the events in bank 0 to 3.
-What: /sys/bus/coresight/devices/<memory_map>.etm/event_ts
+What: /sys/bus/coresight/devices/etm<N>/event_ts
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls the insertion of global timestamps in the trace
streams.
-What: /sys/bus/coresight/devices/<memory_map>.etm/syncfreq
+What: /sys/bus/coresight/devices/etm<N>/syncfreq
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls how often trace synchronization requests occur.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cyc_threshold
+What: /sys/bus/coresight/devices/etm<N>/cyc_threshold
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Sets the threshold value for cycle counting.
-What: /sys/bus/coresight/devices/<memory_map>.etm/bb_ctrl
+What: /sys/bus/coresight/devices/etm<N>/bb_ctrl
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls which regions in the memory map are enabled to
use branch broadcasting.
-What: /sys/bus/coresight/devices/<memory_map>.etm/event_vinst
+What: /sys/bus/coresight/devices/etm<N>/event_vinst
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls instruction trace filtering.
-What: /sys/bus/coresight/devices/<memory_map>.etm/s_exlevel_vinst
+What: /sys/bus/coresight/devices/etm<N>/s_exlevel_vinst
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) In Secure state, each bit controls whether instruction
tracing is enabled for the corresponding exception level.
-What: /sys/bus/coresight/devices/<memory_map>.etm/ns_exlevel_vinst
+What: /sys/bus/coresight/devices/etm<N>/ns_exlevel_vinst
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) In non-secure state, each bit controls whether instruction
tracing is enabled for the corresponding exception level.
-What: /sys/bus/coresight/devices/<memory_map>.etm/addr_idx
+What: /sys/bus/coresight/devices/etm<N>/addr_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which address comparator or pair (of comparators) to
work with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/addr_instdatatype
+What: /sys/bus/coresight/devices/etm<N>/addr_instdatatype
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls what type of comparison the trace unit performs.
-What: /sys/bus/coresight/devices/<memory_map>.etm/addr_single
+What: /sys/bus/coresight/devices/etm<N>/addr_single
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Used to setup single address comparator values.
-What: /sys/bus/coresight/devices/<memory_map>.etm/addr_range
+What: /sys/bus/coresight/devices/etm<N>/addr_range
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Used to setup address range comparator values.
-What: /sys/bus/coresight/devices/<memory_map>.etm/seq_idx
+What: /sys/bus/coresight/devices/etm<N>/seq_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which sequensor.
-What: /sys/bus/coresight/devices/<memory_map>.etm/seq_state
+What: /sys/bus/coresight/devices/etm<N>/seq_state
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Use this to set, or read, the sequencer state.
-What: /sys/bus/coresight/devices/<memory_map>.etm/seq_event
+What: /sys/bus/coresight/devices/etm<N>/seq_event
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Moves the sequencer state to a specific state.
-What: /sys/bus/coresight/devices/<memory_map>.etm/seq_reset_event
+What: /sys/bus/coresight/devices/etm<N>/seq_reset_event
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Moves the sequencer to state 0 when a programmed event
occurs.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cntr_idx
+What: /sys/bus/coresight/devices/etm<N>/cntr_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which counter unit to work with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cntrldvr
+What: /sys/bus/coresight/devices/etm<N>/cntrldvr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) This sets or returns the reload count value of the
specific counter.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cntr_val
+What: /sys/bus/coresight/devices/etm<N>/cntr_val
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) This sets or returns the current count value of the
specific counter.
-What: /sys/bus/coresight/devices/<memory_map>.etm/cntr_ctrl
+What: /sys/bus/coresight/devices/etm<N>/cntr_ctrl
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls the operation of the selected counter.
-What: /sys/bus/coresight/devices/<memory_map>.etm/res_idx
+What: /sys/bus/coresight/devices/etm<N>/res_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which resource selection unit to work with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/res_ctrl
+What: /sys/bus/coresight/devices/etm<N>/res_ctrl
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Controls the selection of the resources in the trace unit.
-What: /sys/bus/coresight/devices/<memory_map>.etm/ctxid_idx
+What: /sys/bus/coresight/devices/etm<N>/ctxid_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which context ID comparator to work with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/ctxid_pid
+What: /sys/bus/coresight/devices/etm<N>/ctxid_pid
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Get/Set the context ID comparator value to trigger on.
-What: /sys/bus/coresight/devices/<memory_map>.etm/ctxid_masks
+What: /sys/bus/coresight/devices/etm<N>/ctxid_masks
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Mask for all 8 context ID comparator value
registers (if implemented).
-What: /sys/bus/coresight/devices/<memory_map>.etm/vmid_idx
+What: /sys/bus/coresight/devices/etm<N>/vmid_idx
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Select which virtual machine ID comparator to work with.
-What: /sys/bus/coresight/devices/<memory_map>.etm/vmid_val
+What: /sys/bus/coresight/devices/etm<N>/vmid_val
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Get/Set the virtual machine ID comparator value to
trigger on.
-What: /sys/bus/coresight/devices/<memory_map>.etm/vmid_masks
+What: /sys/bus/coresight/devices/etm<N>/vmid_masks
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Mask for all 8 virtual machine ID comparator value
registers (if implemented).
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcoslsr
+What: /sys/bus/coresight/devices/etm<N>/addr_exlevel_s_ns
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Set the Exception Level matching bits for secure and
+ non-secure exception levels.
+
+What: /sys/bus/coresight/devices/etm<N>/vinst_pe_cmp_start_stop
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Access the start stop control register for PE input
+ comparators.
+
+What: /sys/bus/coresight/devices/etm<N>/addr_cmp_view
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (Read) Print the current settings for the selected address
+ comparator.
+
+What: /sys/bus/coresight/devices/etm<N>/sshot_idx
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Select the single shot control register to access.
+
+What: /sys/bus/coresight/devices/etm<N>/sshot_ctrl
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Access the selected single shot control register.
+
+What: /sys/bus/coresight/devices/etm<N>/sshot_status
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (Read) Print the current value of the selected single shot
+ status register.
+
+What: /sys/bus/coresight/devices/etm<N>/sshot_pe_ctrl
+Date: December 2019
+KernelVersion: 5.5
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Access the selected single show PE comparator control
+ register.
+
+What: /sys/bus/coresight/devices/etm<N>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcoslsr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the OS Lock Status Register (0x304).
+Description: (Read) Print the content of the OS Lock Status Register (0x304).
The value it taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpdcr
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpdcr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Power Down Control Register
+Description: (Read) Print the content of the Power Down Control Register
(0x310). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpdsr
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpdsr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Power Down Status Register
+Description: (Read) Print the content of the Power Down Status Register
(0x314). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trclsr
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trclsr
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the SW Lock Status Register
+Description: (Read) Print the content of the SW Lock Status Register
(0xFB4). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcauthstatus
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcauthstatus
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Authentication Status Register
+Description: (Read) Print the content of the Authentication Status Register
(0xFB8). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcdevid
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcdevid
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Device ID Register
+Description: (Read) Print the content of the Device ID Register
(0xFC8). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcdevtype
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcdevarch
+Date: January 2021
+KernelVersion: 5.12
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (Read) Print the content of the Device Architecture Register
+ (offset 0xFBC). The value is taken directly read
+ from the HW.
+
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcdevtype
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Device Type Register
+Description: (Read) Print the content of the Device Type Register
(0xFCC). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr0
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpidr0
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Peripheral ID0 Register
+Description: (Read) Print the content of the Peripheral ID0 Register
(0xFE0). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr1
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpidr1
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Peripheral ID1 Register
+Description: (Read) Print the content of the Peripheral ID1 Register
(0xFE4). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr2
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpidr2
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Peripheral ID2 Register
+Description: (Read) Print the content of the Peripheral ID2 Register
(0xFE8). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcpidr3
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcpidr3
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the Peripheral ID3 Register
+Description: (Read) Print the content of the Peripheral ID3 Register
(0xFEC). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trcconfig
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trcconfig
Date: February 2016
KernelVersion: 4.07
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the trace configuration register
+Description: (Read) Print the content of the trace configuration register
(0x010) as currently set by SW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/mgmt/trctraceid
+What: /sys/bus/coresight/devices/etm<N>/mgmt/trctraceid
Date: February 2016
KernelVersion: 4.07
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Print the content of the trace ID register (0x040).
+Description: (Read) Print the content of the trace ID register (0x040).
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr0
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr0
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the tracing capabilities of the trace unit (0x1E0).
+Description: (Read) Returns the tracing capabilities of the trace unit (0x1E0).
The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr1
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr1
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the tracing capabilities of the trace unit (0x1E4).
+Description: (Read) Returns the tracing capabilities of the trace unit (0x1E4).
The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr2
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr2
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the maximum size of the data value, data address,
- VMID, context ID and instuction address in the trace unit
+Description: (Read) Returns the maximum size of the data value, data address,
+ VMID, context ID and instruction address in the trace unit
(0x1E8). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr3
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr3
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the value associated with various resources
+Description: (Read) Returns the value associated with various resources
available to the trace unit. See the Trace Macrocell
architecture specification for more details (0x1E8).
The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr4
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr4
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns how many resources the trace unit supports (0x1F0).
+Description: (Read) Returns how many resources the trace unit supports (0x1F0).
The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr5
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr5
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns how many resources the trace unit supports (0x1F4).
+Description: (Read) Returns how many resources the trace unit supports (0x1F4).
The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr8
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr8
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the maximum speculation depth of the instruction
+Description: (Read) Returns the maximum speculation depth of the instruction
trace stream. (0x180). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr9
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr9
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the number of P0 right-hand keys that the trace unit
+Description: (Read) Returns the number of P0 right-hand keys that the trace unit
can use (0x184). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr10
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr10
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the number of P1 right-hand keys that the trace unit
+Description: (Read) Returns the number of P1 right-hand keys that the trace unit
can use (0x188). The value is taken directly from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr11
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr11
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the number of special P1 right-hand keys that the
+Description: (Read) Returns the number of special P1 right-hand keys that the
trace unit can use (0x18C). The value is taken directly from
the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr12
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr12
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the number of conditional P1 right-hand keys that
+Description: (Read) Returns the number of conditional P1 right-hand keys that
the trace unit can use (0x190). The value is taken directly
from the HW.
-What: /sys/bus/coresight/devices/<memory_map>.etm/trcidr/trcidr13
+What: /sys/bus/coresight/devices/etm<N>/trcidr/trcidr13
Date: April 2015
KernelVersion: 4.01
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Returns the number of special conditional P1 right-hand keys
+Description: (Read) Returns the number of special conditional P1 right-hand keys
that the trace unit can use (0x194). The value is taken
directly from the HW.
+
+What: /sys/bus/coresight/devices/etm<N>/ts_source
+Date: October 2022
+KernelVersion: 6.1
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org> or Suzuki K Poulose <suzuki.poulose@arm.com>
+Description: (Read) When FEAT_TRF is implemented, value of TRFCR_ELx.TS used for
+ trace session. Otherwise -1 indicates an unknown time source. Check
+ trcidr0.tssize to see if a global timestamp is available.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
index d75acda5e1b3..86938e9bbcde 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
@@ -10,3 +10,9 @@ Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Defines input port priority order.
+
+What: /sys/bus/coresight/devices/<memory_map>.funnel/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
index 1dffabe7f48d..848e2ffc1480 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
@@ -42,7 +42,7 @@ What: /sys/bus/coresight/devices/<memory_map>.stm/status
Date: April 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) List various control and status registers. The specific
+Description: (Read) List various control and status registers. The specific
layout and content is driver specific.
What: /sys/bus/coresight/devices/<memory_map>.stm/traceid
@@ -51,3 +51,9 @@ KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Holds the trace ID that will appear in the trace stream
coming from this trace entity.
+
+What: /sys/bus/coresight/devices/<memory_map>.stm/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
index ab49b9ac3bcb..55e298b9c4a4 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
@@ -11,21 +11,21 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/rsz
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Defines the size, in 32-bit words, of the local RAM buffer.
+Description: (Read) Defines the size, in 32-bit words, of the local RAM buffer.
The value is read directly from HW register RSZ, 0x004.
What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/sts
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC status register. The value
+Description: (Read) Shows the value held by the TMC status register. The value
is read directly from HW register STS, 0x00C.
What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/rrp
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC RAM Read Pointer register
+Description: (Read) Shows the value held by the TMC RAM Read Pointer register
that is used to read entries from the Trace RAM over the APB
interface. The value is read directly from HW register RRP,
0x014.
@@ -34,7 +34,7 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/rwp
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC RAM Write Pointer register
+Description: (Read) Shows the value held by the TMC RAM Write Pointer register
that is used to sets the write pointer to write entries from
the CoreSight bus into the Trace RAM. The value is read directly
from HW register RWP, 0x018.
@@ -43,21 +43,21 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/trg
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Similar to "trigger_cntr" above except that this value is
+Description: (Read) Similar to "trigger_cntr" above except that this value is
read directly from HW register TRG, 0x01C.
What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/ctl
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC Control register. The value
+Description: (Read) Shows the value held by the TMC Control register. The value
is read directly from HW register CTL, 0x020.
What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/ffsr
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC Formatter and Flush Status
+Description: (Read) Shows the value held by the TMC Formatter and Flush Status
register. The value is read directly from HW register FFSR,
0x300.
@@ -65,7 +65,7 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/ffcr
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC Formatter and Flush Control
+Description: (Read) Shows the value held by the TMC Formatter and Flush Control
register. The value is read directly from HW register FFCR,
0x304.
@@ -73,7 +73,7 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/mode
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Shows the value held by the TMC Mode register, which
+Description: (Read) Shows the value held by the TMC Mode register, which
indicate the mode the device has been configured to enact. The
The value is read directly from the MODE register, 0x028.
@@ -81,7 +81,7 @@ What: /sys/bus/coresight/devices/<memory_map>.tmc/mgmt/devid
Date: March 2016
KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
-Description: (R) Indicates the capabilities of the Coresight TMC.
+Description: (Read) Indicates the capabilities of the Coresight TMC.
The value is read directly from the DEVID register, 0xFC8,
What: /sys/bus/coresight/devices/<memory_map>.tmc/buffer_size
@@ -91,3 +91,25 @@ Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Size of the trace buffer for TMC-ETR when used in SYSFS
mode. Writable only for TMC-ETR configurations. The value
should be aligned to the kernel pagesize.
+
+What: /sys/bus/coresight/devices/<memory_map>.tmc/buf_modes_available
+Date: August 2023
+KernelVersion: 6.7
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description: (Read) Shows all supported Coresight TMC-ETR buffer modes available
+ for the users to configure explicitly. This file is available only
+ for TMC ETR devices.
+
+What: /sys/bus/coresight/devices/<memory_map>.tmc/buf_mode_preferred
+Date: August 2023
+KernelVersion: 6.7
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description: (RW) Current Coresight TMC-ETR buffer mode selected. But user could
+ only provide a mode which is supported for a given ETR device. This
+ file is available only for TMC ETR devices.
+
+What: /sys/bus/coresight/devices/<memory_map>.tmc/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm
new file mode 100644
index 000000000000..98f1c6545027
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm
@@ -0,0 +1,280 @@
+What: /sys/bus/coresight/devices/<tpdm-name>/integration_test
+Date: January 2023
+KernelVersion: 6.2
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Run integration test for tpdm. Integration test
+ will generate test data for tpdm. It can help to make
+ sure that the trace path is enabled and the link configurations
+ are fine.
+
+ Accepts only one of the 2 values - 1 or 2.
+ 1 : Generate 64 bits data
+ 2 : Generate 32 bits data
+
+What: /sys/bus/coresight/devices/<tpdm-name>/reset_dataset
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Reset the dataset of the tpdm.
+
+ Accepts only one value - 1.
+ 1 : Reset the dataset of the tpdm
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_type
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the trigger type of the DSB for tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Set the DSB trigger type to false
+ 1 : Set the DSB trigger type to true
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_ts
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the trigger timestamp of the DSB for tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Set the DSB trigger type to false
+ 1 : Set the DSB trigger type to true
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_mode
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the programming mode of the DSB for tpdm.
+
+ Accepts the value needs to be greater than 0. What data
+ bits do is listed below.
+ Bit[0:1] : Test mode control bit for choosing the inputs.
+ Bit[3] : Set to 0 for low performance mode. Set to 1 for high
+ performance mode.
+ Bit[4:8] : Select byte lane for high performance mode.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_idx
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the index number of the edge detection for the DSB
+ subunit TPDM. Since there are at most 256 edge detections, this
+ value ranges from 0 to 255.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_val
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ Write a data to control the edge detection corresponding to
+ the index number. Before writing data to this sysfs file,
+ "ctrl_idx" should be written first to configure the index
+ number of the edge detection which needs to be controlled.
+
+ Accepts only one of the following values.
+ 0 - Rising edge detection
+ 1 - Falling edge detection
+ 2 - Rising and falling edge detection (toggle detection)
+
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_mask
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ Write a data to mask the edge detection corresponding to the index
+ number. Before writing data to this sysfs file, "ctrl_idx" should
+ be written first to configure the index number of the edge detection
+ which needs to be masked.
+
+ Accepts only one of the 2 values - 0 or 1.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/edcr[0:15]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ Read a set of the edge control value of the DSB in TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/edcmr[0:7]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ Read a set of the edge control mask of the DSB in TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_patt/xpr[0:7]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the trigger pattern for the DSB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_patt/xpmr[0:7]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the trigger pattern for the DSB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpr[0:7]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the pattern for the DSB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpmr[0:7]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the pattern for the DSB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/enable_ts
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Set the pattern timestamp of DSB tpdm. Read
+ the pattern timestamp of DSB tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Disable DSB pattern timestamp.
+ 1 : Enable DSB pattern timestamp.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/set_type
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Set the pattern type of DSB tpdm. Read
+ the pattern type of DSB tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Set the DSB pattern type to value.
+ 1 : Set the DSB pattern type to toggle.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_msr/msr[0:31]
+Date: March 2023
+KernelVersion: 6.7
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the MSR(mux select register) for the DSB subunit
+ TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_mode
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description: (Write) Set the data collection mode of CMB tpdm. Continuous
+ change creates CMB data set elements on every CMBCLK edge.
+ Trace-on-change creates CMB data set elements only when a new
+ data set element differs in value from the previous element
+ in a CMB data set.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Continuous CMB collection mode.
+ 1 : Trace-on-change CMB collection mode.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_patt/xpr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the trigger pattern for the CMB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_patt/xpmr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the trigger pattern for the CMB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the pattern for the CMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpmr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the pattern for the CMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_patt/enable_ts
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Set the pattern timestamp of CMB tpdm. Read
+ the pattern timestamp of CMB tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Disable CMB pattern timestamp.
+ 1 : Enable CMB pattern timestamp.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_ts
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the trigger timestamp of the CMB for tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Set the CMB trigger type to false
+ 1 : Set the CMB trigger type to true
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_ts_all
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Read or write the status of timestamp upon all interface.
+ Only value 0 and 1 can be written to this node. Set this node to 1 to request
+ timestamp to all trace packet.
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Disable the timestamp of all trace packets.
+ 1 : Enable the timestamp of all trace packets.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_msr/msr[0:31]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the MSR(mux select register) for the CMB subunit
+ TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/mcmb_trig_lane
+Date: Feb 2025
+KernelVersion 6.15
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get which lane participates in the output pattern
+ match cross trigger mechanism for the MCMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/mcmb_lanes_select
+Date: Feb 2025
+KernelVersion 6.15
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the enablement of the individual lane.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
new file mode 100644
index 000000000000..8a4b749ed26e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
@@ -0,0 +1,20 @@
+What: /sys/bus/coresight/devices/trbe<cpu>/align
+Date: March 2021
+KernelVersion: 5.13
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description: (Read) Shows the TRBE write pointer alignment. This value
+ is fetched from the TRBIDR register.
+
+What: /sys/bus/coresight/devices/trbe<cpu>/flag
+Date: March 2021
+KernelVersion: 5.13
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description: (Read) Shows if TRBE updates in the memory are with access
+ and dirty flag updates as well. This value is fetched from
+ the TRBIDR register.
+
+What: /sys/bus/coresight/devices/trbe<cpu>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-ultra_smb b/Documentation/ABI/testing/sysfs-bus-coresight-devices-ultra_smb
new file mode 100644
index 000000000000..f560918ae738
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-ultra_smb
@@ -0,0 +1,31 @@
+What: /sys/bus/coresight/devices/ultra_smb<N>/enable_sink
+Date: January 2023
+KernelVersion: 6.3
+Contact: Junhao He <hejunhao3@huawei.com>
+Description: (RW) Add/remove a SMB device from a trace path. There can be
+ multiple sources for a single SMB device.
+
+What: /sys/bus/coresight/devices/ultra_smb<N>/mgmt/buf_size
+Date: January 2023
+KernelVersion: 6.3
+Contact: Junhao He <hejunhao3@huawei.com>
+Description: (RO) Shows the buffer size of each UltraSoc SMB device.
+
+What: /sys/bus/coresight/devices/ultra_smb<N>/mgmt/buf_status
+Date: January 2023
+KernelVersion: 6.3
+Contact: Junhao He <hejunhao3@huawei.com>
+Description: (RO) Shows the value of UltraSoc SMB status register.
+ BIT(0) is zero means buffer is empty.
+
+What: /sys/bus/coresight/devices/ultra_smb<N>/mgmt/read_pos
+Date: January 2023
+KernelVersion: 6.3
+Contact: Junhao He <hejunhao3@huawei.com>
+Description: (RO) Shows the value of UltraSoc SMB Read Pointer register.
+
+What: /sys/bus/coresight/devices/ultra_smb<N>/mgmt/write_pos
+Date: January 2023
+KernelVersion: 6.3
+Contact: Junhao He <hejunhao3@huawei.com>
+Description: (RO) Shows the value of UltraSoc SMB Write Pointer register.
diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter
new file mode 100644
index 000000000000..3e7eddd8aff3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-counter
@@ -0,0 +1,477 @@
+What: /sys/bus/counter/devices/counterX/cascade_counts_enable
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Indicates the cascading of Counts on Counter X.
+
+ Valid attribute values are boolean.
+
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Selects the external clock pin for phase counting mode of
+ Counter X.
+
+ MTCLKA-MTCLKB:
+ MTCLKA and MTCLKB pins are selected for the external
+ phase clock.
+
+ MTCLKC-MTCLKD:
+ MTCLKC and MTCLKD pins are selected for the external
+ phase clock.
+
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_available
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Discrete set of available values for the respective device
+ configuration are listed in this file.
+
+What: /sys/bus/counter/devices/counterX/countY/count
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Count data of Count Y represented as a string.
+
+What: /sys/bus/counter/devices/counterX/countY/compare
+KernelVersion: 6.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ If the counter device supports compare registers -- registers
+ used to compare counter channels against a particular count --
+ the compare count for channel Y is provided by this attribute.
+
+What: /sys/bus/counter/devices/counterX/countY/capture
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Historical capture of the Count Y count data.
+
+What: /sys/bus/counter/devices/counterX/countY/ceiling
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Count value ceiling for Count Y. This is the upper limit for the
+ respective counter.
+
+What: /sys/bus/counter/devices/counterX/countY/floor
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Count value floor for Count Y. This is the lower limit for the
+ respective counter.
+
+What: /sys/bus/counter/devices/counterX/countY/count_mode
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Count mode for channel Y. The ceiling and floor values for
+ Count Y are used by the count mode where required. The following
+ count modes are available:
+
+ normal:
+ Counting is continuous in either direction.
+
+ range limit:
+ An upper or lower limit is set, mimicking limit switches
+ in the mechanical counterpart. The upper limit is set to
+ the Count Y ceiling value, while the lower limit is set
+ to the Count Y floor value. The counter freezes at
+ count = ceiling when counting up, and at count = floor
+ when counting down. At either of these limits, the
+ counting is resumed only when the count direction is
+ reversed.
+
+ non-recycle:
+ The counter is disabled whenever a counter overflow or
+ underflow takes place. The counter is re-enabled when a
+ new count value is loaded to the counter via a preset
+ operation or direct write.
+
+ modulo-n:
+ A count value boundary is set between the Count Y floor
+ value and the Count Y ceiling value. The counter is
+ reset to the Count Y floor value at count = ceiling when
+ counting up, while the counter is set to the Count Y
+ ceiling value at count = floor when counting down; the
+ counter does not freeze at the boundary points, but
+ counts continuously throughout.
+
+ interrupt on terminal count:
+ The output signal is initially low, and will remain low
+ until the counter reaches zero. The output signal then
+ goes high and remains high until a new preset value is
+ set.
+
+ hardware retriggerable one-shot:
+ The output signal is initially high. The output signal
+ will go low by a trigger input signal, and will remain
+ low until the counter reaches zero. The output will then
+ go high and remain high until the next trigger. A
+ trigger results in loading the counter to the preset
+ value and setting the output signal low, thus starting
+ the one-shot pulse.
+
+ rate generator:
+ The output signal is initially high. When the counter
+ has decremented to 1, the output signal goes low for one
+ clock pulse. The output signal then goes high again, the
+ counter is reloaded to the preset value, and the process
+ repeats in a periodic manner as such.
+
+ square wave mode:
+ The output signal is initially high.
+
+ If the initial count is even, the counter is decremented
+ by two on succeeding clock pulses. When the count
+ expires, the output signal changes value and the
+ counter is reloaded to the preset value. The process
+ repeats in periodic manner as such.
+
+ If the initial count is odd, the initial count minus one
+ (an even number) is loaded and then is decremented by
+ two on succeeding clock pulses. One clock pulse after
+ the count expires, the output signal goes low and the
+ counter is reloaded to the preset value minus one.
+ Succeeding clock pulses decrement the count by two. When
+ the count expires, the output goes high again and the
+ counter is reloaded to the preset value minus one. The
+ process repeats in a periodic manner as such.
+
+ software triggered strobe:
+ The output signal is initially high. When the count
+ expires, the output will go low for one clock pulse and
+ then go high again. The counting sequence is "triggered"
+ by setting the preset value.
+
+ hardware triggered strobe:
+ The output signal is initially high. Counting is started
+ by a trigger input signal. When the count expires, the
+ output signal will go low for one clock pulse and then
+ go high again. A trigger results in loading the counter
+ to the preset value.
+
+What: /sys/bus/counter/devices/counterX/countY/count_mode_available
+What: /sys/bus/counter/devices/counterX/countY/error_noise_available
+What: /sys/bus/counter/devices/counterX/countY/function_available
+What: /sys/bus/counter/devices/counterX/countY/prescaler_available
+What: /sys/bus/counter/devices/counterX/countY/signalZ_action_available
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Discrete set of available values for the respective Count Y
+ configuration are listed in this file. Values are delimited by
+ newline characters.
+
+What: /sys/bus/counter/devices/counterX/countY/direction
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the count direction of Count
+ Y. Two count directions are available: forward and backward.
+
+ Some counter devices are able to determine the direction of
+ their counting. For example, quadrature encoding counters can
+ determine the direction of movement by evaluating the leading
+ phase of the respective A and B quadrature encoding signals.
+ This attribute exposes such count directions.
+
+What: /sys/bus/counter/devices/counterX/countY/enable
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Whether channel Y counter is enabled. Valid attribute values are
+ boolean.
+
+ This attribute is intended to serve as a pause/unpause mechanism
+ for Count Y. Suppose a counter device is used to count the total
+ movement of a conveyor belt: this attribute allows an operator
+ to temporarily pause the counter, service the conveyor belt,
+ and then finally unpause the counter to continue where it had
+ left off.
+
+What: /sys/bus/counter/devices/counterX/countY/error_noise
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates whether excessive noise is
+ present at the channel Y counter inputs.
+
+What: /sys/bus/counter/devices/counterX/countY/function
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Count function mode of Count Y; count function evaluation is
+ triggered by conditions specified by the Count Y signalZ_action
+ attributes. The following count functions are available:
+
+ increase:
+ Accumulated count is incremented.
+
+ decrease:
+ Accumulated count is decremented.
+
+ pulse-direction:
+ Rising edges on signal A updates the respective count.
+ The input level of signal B determines direction.
+
+ quadrature x1 a:
+ If direction is forward, rising edges on quadrature pair
+ signal A updates the respective count; if the direction
+ is backward, falling edges on quadrature pair signal A
+ updates the respective count. Quadrature encoding
+ determines the direction.
+
+ quadrature x1 b:
+ If direction is forward, rising edges on quadrature pair
+ signal B updates the respective count; if the direction
+ is backward, falling edges on quadrature pair signal B
+ updates the respective count. Quadrature encoding
+ determines the direction.
+
+ quadrature x2 a:
+ Any state transition on quadrature pair signal A updates
+ the respective count. Quadrature encoding determines the
+ direction.
+
+ quadrature x2 b:
+ Any state transition on quadrature pair signal B updates
+ the respective count. Quadrature encoding determines the
+ direction.
+
+ quadrature x4:
+ Any state transition on either quadrature pair signals
+ updates the respective count. Quadrature encoding
+ determines the direction.
+
+What: /sys/bus/counter/devices/counterX/countY/name
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the device-specific name of
+ Count Y. If possible, this should match the name of the
+ respective channel as it appears in the device datasheet.
+
+What: /sys/bus/counter/devices/counterX/countY/prescaler
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Configure the prescaler value associated with Count Y.
+ On the FlexTimer, the counter clock source passes through a
+ prescaler (i.e. a counter). This acts like a clock
+ divider.
+
+What: /sys/bus/counter/devices/counterX/countY/preset
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ If the counter device supports preset registers -- registers
+ used to load counter channels to a set count upon device-defined
+ preset operation trigger events -- the preset count for channel
+ Y is provided by this attribute.
+
+What: /sys/bus/counter/devices/counterX/countY/preset_enable
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Whether channel Y counter preset operation is enabled. Valid
+ attribute values are boolean.
+
+What: /sys/bus/counter/devices/counterX/countY/signalZ_action
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Action mode of Count Y for Signal Z. This attribute indicates
+ the condition of Signal Z that triggers the count function
+ evaluation for Count Y. The following action modes are
+ available:
+
+ none:
+ Signal does not trigger the count function. In
+ Pulse-Direction count function mode, this Signal is
+ evaluated as Direction.
+
+ rising edge:
+ Low state transitions to high state.
+
+ falling edge:
+ High state transitions to low state.
+
+ both edges:
+ Any state transition.
+
+What: /sys/bus/counter/devices/counterX/countY/num_overflows
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute indicates the number of overflows of count Y.
+
+What: /sys/bus/counter/devices/counterX/cascade_counts_enable_component_id
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_component_id
+What: /sys/bus/counter/devices/counterX/countY/capture_component_id
+What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id
+What: /sys/bus/counter/devices/counterX/countY/compare_component_id
+What: /sys/bus/counter/devices/counterX/countY/count_mode_component_id
+What: /sys/bus/counter/devices/counterX/countY/direction_component_id
+What: /sys/bus/counter/devices/counterX/countY/enable_component_id
+What: /sys/bus/counter/devices/counterX/countY/error_noise_component_id
+What: /sys/bus/counter/devices/counterX/countY/floor_component_id
+What: /sys/bus/counter/devices/counterX/countY/num_overflows_component_id
+What: /sys/bus/counter/devices/counterX/countY/prescaler_component_id
+What: /sys/bus/counter/devices/counterX/countY/preset_component_id
+What: /sys/bus/counter/devices/counterX/countY/preset_enable_component_id
+What: /sys/bus/counter/devices/counterX/countY/signalZ_action_component_id
+What: /sys/bus/counter/devices/counterX/signalY/cable_fault_component_id
+What: /sys/bus/counter/devices/counterX/signalY/cable_fault_enable_component_id
+What: /sys/bus/counter/devices/counterX/signalY/filter_clock_prescaler_component_id
+What: /sys/bus/counter/devices/counterX/signalY/frequency_component_id
+What: /sys/bus/counter/devices/counterX/signalY/index_polarity_component_id
+What: /sys/bus/counter/devices/counterX/signalY/polarity_component_id
+What: /sys/bus/counter/devices/counterX/signalY/synchronous_mode_component_id
+KernelVersion: 5.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the component ID of the
+ respective extension or Synapse.
+
+What: /sys/bus/counter/devices/counterX/countY/spike_filter_ns
+KernelVersion: 5.14
+Contact: linux-iio@vger.kernel.org
+Description:
+ If the counter device supports programmable spike filter this
+ attribute indicates the value in nanoseconds where noise pulses
+ shorter or equal to configured value are ignored. Value 0 means
+ filter is disabled.
+
+What: /sys/bus/counter/devices/counterX/events_queue_size
+KernelVersion: 5.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ Size of the Counter events queue in number of struct
+ counter_event data structures. The number of elements will be
+ rounded-up to a power of 2.
+
+What: /sys/bus/counter/devices/counterX/name
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the device-specific name of
+ the Counter. This should match the name of the device as it
+ appears in its respective datasheet.
+
+What: /sys/bus/counter/devices/counterX/num_counts
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the total number of Counts
+ belonging to the Counter.
+
+What: /sys/bus/counter/devices/counterX/num_signals
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the total number of Signals
+ belonging to the Counter.
+
+What: /sys/bus/counter/devices/counterX/signalY/cable_fault
+KernelVersion: 5.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates whether a differential
+ encoder cable fault (not connected or loose wires) is detected
+ for the respective channel of Signal Y. Valid attribute values
+ are boolean. Detection must first be enabled via the
+ corresponding cable_fault_enable attribute.
+
+What: /sys/bus/counter/devices/counterX/signalY/cable_fault_enable
+KernelVersion: 5.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Whether detection of differential encoder cable faults for the
+ respective channel of Signal Y is enabled. Valid attribute
+ values are boolean.
+
+What: /sys/bus/counter/devices/counterX/signalY/filter_clock_prescaler
+KernelVersion: 5.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Filter clock factor for input Signal Y. This prescaler value
+ affects the inputs of both quadrature pair signals.
+
+What: /sys/bus/counter/devices/counterX/signalY/index_polarity
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Active level of index input Signal Y; irrelevant in
+ non-synchronous load mode.
+
+What: /sys/bus/counter/devices/counterX/signalY/index_polarity_available
+What: /sys/bus/counter/devices/counterX/signalY/synchronous_mode_available
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Discrete set of available values for the respective Signal Y
+ configuration are listed in this file.
+
+What: /sys/bus/counter/devices/counterX/signalY/polarity
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Active level of Signal Y. The following polarity values are
+ available:
+
+ positive:
+ Signal high state considered active level (rising edge).
+
+ negative:
+ Signal low state considered active level (falling edge).
+
+What: /sys/bus/counter/devices/counterX/signalY/name
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the device-specific name of
+ Signal Y. If possible, this should match the name of the
+ respective signal as it appears in the device datasheet.
+
+What: /sys/bus/counter/devices/counterX/signalY/signal
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Signal level state of Signal Y. The following signal level
+ states are available:
+
+ low:
+ Low level state.
+
+ high:
+ High level state.
+
+What: /sys/bus/counter/devices/counterX/signalY/synchronous_mode
+KernelVersion: 5.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Configure the counter associated with Signal Y for
+ non-synchronous or synchronous load mode. Synchronous load mode
+ cannot be selected in non-quadrature (Pulse-Direction) clock
+ mode.
+
+ non-synchronous:
+ A logic low level is the active level at this index
+ input. The index function (as enabled via preset_enable)
+ is performed directly on the active level of the index
+ input.
+
+ synchronous:
+ Intended for interfacing with encoder Index output in
+ quadrature clock mode. The active level is configured
+ via index_polarity. The index function (as enabled via
+ preset_enable) is performed synchronously with the
+ quadrature clock on the active level of the index input.
+
+What: /sys/bus/counter/devices/counterX/signalY/frequency
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only attribute that indicates the signal Y frequency, in Hz.
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
index 2979c40c10e9..d4d5cfb63b90 100644
--- a/Documentation/ABI/testing/sysfs-bus-css
+++ b/Documentation/ABI/testing/sysfs-bus-css
@@ -1,35 +1,56 @@
What: /sys/bus/css/devices/.../type
Date: March 2008
-Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
- linux-s390@vger.kernel.org
+Contact: linux-s390@vger.kernel.org
Description: Contains the subchannel type, as reported by the hardware.
This attribute is present for all subchannel types.
What: /sys/bus/css/devices/.../modalias
Date: March 2008
-Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
- linux-s390@vger.kernel.org
+Contact: linux-s390@vger.kernel.org
Description: Contains the module alias as reported with uevents.
It is of the format css:t<type> and present for all
subchannel types.
What: /sys/bus/css/drivers/io_subchannel/.../chpids
Date: December 2002
-Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
- linux-s390@vger.kernel.org
+Contact: linux-s390@vger.kernel.org
Description: Contains the ids of the channel paths used by this
subchannel, as reported by the channel subsystem
during subchannel recognition.
+
Note: This is an I/O-subchannel specific attribute.
Users: s390-tools, HAL
What: /sys/bus/css/drivers/io_subchannel/.../pimpampom
Date: December 2002
-Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
- linux-s390@vger.kernel.org
+Contact: linux-s390@vger.kernel.org
Description: Contains the PIM/PAM/POM values, as reported by the
channel subsystem when last queried by the common I/O
layer (this implies that this attribute is not necessarily
in sync with the values current in the channel subsystem).
+
Note: This is an I/O-subchannel specific attribute.
Users: s390-tools, HAL
+
+What: /sys/bus/css/devices/.../driver_override
+Date: June 2019
+Contact: linux-s390@vger.kernel.org
+Description: This file allows the driver for a device to be specified. When
+ specified, only a driver with a name matching the value written
+ to driver_override will have an opportunity to bind to the
+ device. The override is specified by writing a string to the
+ driver_override file (echo vfio-ccw > driver_override) and
+ may be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device
+ will not bind to any driver. This also allows devices to
+ opt-out of driver binding using a driver_override name such as
+ "none". Only a single driver may be specified in the override,
+ there is no support for parsing delimiters.
+
+ Note that unlike the mechanism of the same name for pci, this
+ file does not allow to override basic matching rules. I.e.,
+ the driver must still match the subchannel type of the device.
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
new file mode 100644
index 000000000000..6b4e8c7a963d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -0,0 +1,617 @@
+What: /sys/bus/cxl/flush
+Date: January, 2022
+KernelVersion: v5.18
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) If userspace manually unbinds a port the kernel schedules
+ all descendant memdevs for unbind. Writing '1' to this attribute
+ flushes that work.
+
+
+What: /sys/bus/cxl/devices/memX/firmware_version
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) "FW Revision" string as reported by the Identify
+ Memory Device Output Payload in the CXL-2.0
+ specification.
+
+
+What: /sys/bus/cxl/devices/memX/payload_max
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Maximum size (in bytes) of the mailbox command payload
+ registers. Linux caps this at 1MB if the device reports a
+ larger size.
+
+
+What: /sys/bus/cxl/devices/memX/label_storage_size
+Date: May, 2021
+KernelVersion: v5.13
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Size (in bytes) of the Label Storage Area (LSA).
+
+
+What: /sys/bus/cxl/devices/memX/ram/size
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) "Volatile Only Capacity" as bytes. Represents the
+ identically named field in the Identify Memory Device Output
+ Payload in the CXL-2.0 specification.
+
+
+What: /sys/bus/cxl/devices/memX/ram/qos_class
+Date: May, 2023
+KernelVersion: v6.8
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) For CXL host platforms that support "QoS Telemetry"
+ this attribute conveys a comma delimited list of platform
+ specific cookies that identifies a QoS performance class
+ for the volatile partition of the CXL mem device. These
+ class-ids can be compared against a similar "qos_class"
+ published for a root decoder. While it is not required
+ that the endpoints map their local memory-class to a
+ matching platform class, mismatches are not recommended
+ and there are platform specific performance related
+ side-effects that may result. First class-id is displayed.
+
+
+What: /sys/bus/cxl/devices/memX/pmem/size
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) "Persistent Only Capacity" as bytes. Represents the
+ identically named field in the Identify Memory Device Output
+ Payload in the CXL-2.0 specification.
+
+
+What: /sys/bus/cxl/devices/memX/pmem/qos_class
+Date: May, 2023
+KernelVersion: v6.8
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) For CXL host platforms that support "QoS Telemetry"
+ this attribute conveys a comma delimited list of platform
+ specific cookies that identifies a QoS performance class
+ for the persistent partition of the CXL mem device. These
+ class-ids can be compared against a similar "qos_class"
+ published for a root decoder. While it is not required
+ that the endpoints map their local memory-class to a
+ matching platform class, mismatches are not recommended
+ and there are platform specific performance related
+ side-effects that may result. First class-id is displayed.
+
+
+What: /sys/bus/cxl/devices/memX/serial
+Date: January, 2022
+KernelVersion: v5.18
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) 64-bit serial number per the PCIe Device Serial Number
+ capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
+ Memory Device PCIe Capabilities and Extended Capabilities.
+
+
+What: /sys/bus/cxl/devices/memX/numa_node
+Date: January, 2022
+KernelVersion: v5.18
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) If NUMA is enabled and the platform has affinitized the
+ host PCI device for this memory device, emit the CPU node
+ affinity for this device.
+
+
+What: /sys/bus/cxl/devices/memX/security/state
+Date: June, 2023
+KernelVersion: v6.5
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Reading this file will display the CXL security state for
+ that device. Such states can be: 'disabled', 'sanitize', when
+ a sanitization is currently underway; or those available only
+ for persistent memory: 'locked', 'unlocked' or 'frozen'. This
+ sysfs entry is select/poll capable from userspace to notify
+ upon completion of a sanitize operation.
+
+
+What: /sys/bus/cxl/devices/memX/security/sanitize
+Date: June, 2023
+KernelVersion: v6.5
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) Write a boolean 'true' string value to this attribute to
+ sanitize the device to securely re-purpose or decommission it.
+ This is done by ensuring that all user data and meta-data,
+ whether it resides in persistent capacity, volatile capacity,
+ or the LSA, is made permanently unavailable by whatever means
+ is appropriate for the media type. This functionality requires
+ the device to be disabled, that is, not actively decoding any
+ HPA ranges. This permits avoiding explicit global CPU cache
+ management, relying instead for it to be done when a region
+ transitions between software programmed and hardware committed
+ states. If this file is not present, then there is no hardware
+ support for the operation.
+
+
+What /sys/bus/cxl/devices/memX/security/erase
+Date: June, 2023
+KernelVersion: v6.5
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) Write a boolean 'true' string value to this attribute to
+ secure erase user data by changing the media encryption keys for
+ all user data areas of the device. This functionality requires
+ the device to be disabled, that is, not actively decoding any
+ HPA ranges. This permits avoiding explicit global CPU cache
+ management, relying instead for it to be done when a region
+ transitions between software programmed and hardware committed
+ states. If this file is not present, then there is no hardware
+ support for the operation.
+
+
+What: /sys/bus/cxl/devices/memX/firmware/
+Date: April, 2023
+KernelVersion: v6.5
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Firmware uploader mechanism. The different files under
+ this directory can be used to upload and activate new
+ firmware for CXL devices. The interfaces under this are
+ documented in sysfs-class-firmware.
+
+
+What: /sys/bus/cxl/devices/*/devtype
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL device objects export the devtype attribute which
+ mirrors the same value communicated in the DEVTYPE environment
+ variable for uevents for devices on the "cxl" bus.
+
+
+What: /sys/bus/cxl/devices/*/modalias
+Date: December, 2021
+KernelVersion: v5.18
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL device objects export the modalias attribute which
+ mirrors the same value communicated in the MODALIAS environment
+ variable for uevents for devices on the "cxl" bus.
+
+
+What: /sys/bus/cxl/devices/portX/uport
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL port objects are enumerated from either a platform
+ firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
+ port with CXL component registers. The 'uport' symlink connects
+ the CXL portX object to the device that published the CXL port
+ capability.
+
+
+What: /sys/bus/cxl/devices/{port,endpoint}X/parent_dport
+Date: January, 2023
+KernelVersion: v6.3
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL port objects are instantiated for each upstream port in
+ a CXL/PCIe switch, and for each endpoint to map the
+ corresponding memory device into the CXL port hierarchy. When a
+ descendant CXL port (switch or endpoint) is enumerated it is
+ useful to know which 'dport' object in the parent CXL port
+ routes to this descendant. The 'parent_dport' symlink points to
+ the device representing the downstream port of a CXL switch that
+ routes to {port,endpoint}X.
+
+
+What: /sys/bus/cxl/devices/portX/dportY
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL port objects are enumerated from either a platform
+ firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
+ port with CXL component registers. The 'dportY' symlink
+ identifies one or more downstream ports that the upstream port
+ may target in its decode of CXL memory resources. The 'Y'
+ integer reflects the hardware port unique-id used in the
+ hardware decoder target list.
+
+
+What: /sys/bus/cxl/devices/portX/decoders_committed
+Date: October, 2023
+KernelVersion: v6.7
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) A memory device is considered active when any of its
+ decoders are in the "committed" state (See CXL 3.0 8.2.4.19.7
+ CXL HDM Decoder n Control Register). Hotplug and destructive
+ operations like "sanitize" are blocked while device is actively
+ decoding a Host Physical Address range. Note that this number
+ may be elevated without any regionX objects active or even
+ enumerated, as this may be due to decoders established by
+ platform firmware or a previous kernel (kexec).
+
+
+What: /sys/bus/cxl/devices/decoderX.Y
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL decoder objects are enumerated from either a platform
+ firmware description, or a CXL HDM decoder register set in a
+ PCIe device (see CXL 2.0 section 8.2.5.12 CXL HDM Decoder
+ Capability Structure). The 'X' in decoderX.Y represents the
+ cxl_port container of this decoder, and 'Y' represents the
+ instance id of a given decoder resource.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/{start,size}
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The 'start' and 'size' attributes together convey the
+ physical address base and number of bytes mapped in the
+ decoder's decode window. For decoders of devtype
+ "cxl_decoder_root" the address range is fixed. For decoders of
+ devtype "cxl_decoder_switch" the address is bounded by the
+ decode range of the cxl_port ancestor of the decoder's cxl_port,
+ and dynamically updates based on the active memory regions in
+ that address space.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/locked
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) CXL HDM decoders have the capability to lock the
+ configuration until the next device reset. For decoders of
+ devtype "cxl_decoder_root" there is no standard facility to
+ unlock them. For decoders of devtype "cxl_decoder_switch" a
+ secondary bus reset, of the PCIe bridge that provides the bus
+ for this decoders uport, unlocks / resets the decoder.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/target_list
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Display a comma separated list of the current decoder
+ target configuration. The list is ordered by the current
+ configured interleave order of the decoder's dport instances.
+ Each entry in the list is a dport id.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/cap_{pmem,ram,type2,type3}
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) When a CXL decoder is of devtype "cxl_decoder_root", it
+ represents a fixed memory window identified by platform
+ firmware. A fixed window may only support a subset of memory
+ types. The 'cap_*' attributes indicate whether persistent
+ memory, volatile memory, accelerator memory, and / or expander
+ memory may be mapped behind this decoder's memory window.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/target_type
+Date: June, 2021
+KernelVersion: v5.14
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) When a CXL decoder is of devtype "cxl_decoder_switch", it
+ can optionally decode either accelerator memory (type-2) or
+ expander memory (type-3). The 'target_type' attribute indicates
+ the current setting which may dynamically change based on what
+ memory regions are activated in this decode hierarchy.
+
+
+What: /sys/bus/cxl/devices/endpointX/CDAT
+Date: July, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) If this sysfs entry is not present no DOE mailbox was
+ found to support CDAT data. If it is present and the length of
+ the data is 0 reading the CDAT data failed. Otherwise the CDAT
+ data is reported.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/mode
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
+ translates from a host physical address range, to a device
+ local address range. Device-local address ranges are further
+ split into a 'ram' (volatile memory) range and 'pmem'
+ (persistent memory) range. The 'mode' attribute emits one of
+ 'ram', 'pmem', or 'none'. The 'none' indicates the decoder is
+ not actively decoding, or no DPA allocation policy has been
+ set.
+
+ 'mode' can be written, when the decoder is in the 'disabled'
+ state, with either 'ram' or 'pmem' to set the boundaries for the
+ next allocation.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) When a CXL decoder is of devtype "cxl_decoder_endpoint",
+ and its 'dpa_size' attribute is non-zero, this attribute
+ indicates the device physical address (DPA) base address of the
+ allocation.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/dpa_size
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
+ translates from a host physical address range, to a device local
+ address range. The range, base address plus length in bytes, of
+ DPA allocated to this decoder is conveyed in these 2 attributes.
+ Allocations can be mutated as long as the decoder is in the
+ disabled state. A write to 'dpa_size' releases the previous DPA
+ allocation and then attempts to allocate from the free capacity
+ in the device partition referred to by 'decoderX.Y/mode'.
+ Allocate and free requests can only be performed on the highest
+ instance number disabled decoder with non-zero size. I.e.
+ allocations are enforced to occur in increasing 'decoderX.Y/id'
+ order and frees are enforced to occur in decreasing
+ 'decoderX.Y/id' order.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/interleave_ways
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The number of targets across which this decoder's host
+ physical address (HPA) memory range is interleaved. The device
+ maps every Nth block of HPA (of size ==
+ 'interleave_granularity') to consecutive DPA addresses. The
+ decoder's position in the interleave is determined by the
+ device's (endpoint or switch) switch ancestry. For root
+ decoders their interleave is specified by platform firmware and
+ they only specify a downstream target order for host bridges.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/interleave_granularity
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The number of consecutive bytes of host physical address
+ space this decoder claims at address N before the decode rotates
+ to the next target in the interleave at address N +
+ interleave_granularity (assuming N is aligned to
+ interleave_granularity).
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/create_{pmem,ram}_region
+Date: May, 2022, January, 2023
+KernelVersion: v6.0 (pmem), v6.3 (ram)
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Write a string in the form 'regionZ' to start the process
+ of defining a new persistent, or volatile memory region
+ (interleave-set) within the decode range bounded by root decoder
+ 'decoderX.Y'. The value written must match the current value
+ returned from reading this attribute. An atomic compare exchange
+ operation is done on write to assign the requested id to a
+ region and allocate the region-id for the next creation attempt.
+ EBUSY is returned if the region name written does not match the
+ current cached value.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/delete_region
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) Write a string in the form 'regionZ' to delete that region,
+ provided it is currently idle / not bound to a driver.
+
+
+What: /sys/bus/cxl/devices/decoderX.Y/qos_class
+Date: May, 2023
+KernelVersion: v6.5
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) For CXL host platforms that support "QoS Telemetry" this
+ root-decoder-only attribute conveys a platform specific cookie
+ that identifies a QoS performance class for the CXL Window.
+ This class-id can be compared against a similar "qos_class"
+ published for each memory-type that an endpoint supports. While
+ it is not required that endpoints map their local memory-class
+ to a matching platform class, mismatches are not recommended and
+ there are platform specific side-effects that may result.
+
+
+What: /sys/bus/cxl/devices/regionZ/uuid
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Write a unique identifier for the region. This field must
+ be set for persistent regions and it must not conflict with the
+ UUID of another region. For volatile ram regions this
+ attribute is a read-only empty string.
+
+
+What: /sys/bus/cxl/devices/regionZ/interleave_granularity
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Set the number of consecutive bytes each device in the
+ interleave set will claim. The possible interleave granularity
+ values are determined by the CXL spec and the participating
+ devices.
+
+
+What: /sys/bus/cxl/devices/regionZ/interleave_ways
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Configures the number of devices participating in the
+ region is set by writing this value. Each device will provide
+ 1/interleave_ways of storage for the region.
+
+
+What: /sys/bus/cxl/devices/regionZ/size
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) System physical address space to be consumed by the region.
+ When written trigger the driver to allocate space out of the
+ parent root decoder's address space. When read the size of the
+ address space is reported and should match the span of the
+ region's resource attribute. Size shall be set after the
+ interleave configuration parameters. Once set it cannot be
+ changed, only freed by writing 0. The kernel makes no guarantees
+ that data is maintained over an address space freeing event, and
+ there is no guarantee that a free followed by an allocate
+ results in the same address being allocated.
+
+
+What: /sys/bus/cxl/devices/regionZ/mode
+Date: January, 2023
+KernelVersion: v6.3
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The mode of a region is established at region creation time
+ and dictates the mode of the endpoint decoder that comprise the
+ region. For more details on the possible modes see
+ /sys/bus/cxl/devices/decoderX.Y/mode
+
+
+What: /sys/bus/cxl/devices/regionZ/resource
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) A region is a contiguous partition of a CXL root decoder
+ address space. Region capacity is allocated by writing to the
+ size attribute, the resulting physical address space determined
+ by the driver is reflected here. It is therefore not useful to
+ read this before writing a value to the size attribute.
+
+
+What: /sys/bus/cxl/devices/regionZ/target[0..N]
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Write an endpoint decoder object name to 'targetX' where X
+ is the intended position of the endpoint device in the region
+ interleave and N is the 'interleave_ways' setting for the
+ region. ENXIO is returned if the write results in an impossible
+ to map decode scenario, like the endpoint is unreachable at that
+ position relative to the root decoder interleave. EBUSY is
+ returned if the position in the region is already occupied, or
+ if the region is not in a state to accept interleave
+ configuration changes. EINVAL is returned if the object name is
+ not an endpoint decoder. Once all positions have been
+ successfully written a final validation for decode conflicts is
+ performed before activating the region.
+
+
+What: /sys/bus/cxl/devices/regionZ/commit
+Date: May, 2022
+KernelVersion: v6.0
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RW) Write a boolean 'true' string value to this attribute to
+ trigger the region to transition from the software programmed
+ state to the actively decoding in hardware state. The commit
+ operation in addition to validating that the region is in proper
+ configured state, validates that the decoders are being
+ committed in spec mandated order (last committed decoder id +
+ 1), and checks that the hardware accepts the commit request.
+ Reading this value indicates whether the region is committed or
+ not.
+
+
+What: /sys/bus/cxl/devices/memX/trigger_poison_list
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a boolean 'true' is written to this attribute the
+ memdev driver retrieves the poison list from the device. The
+ list consists of addresses that are poisoned, or would result
+ in poison if accessed, and the source of the poison. This
+ attribute is only visible for devices supporting the
+ capability. The retrieved errors are logged as kernel
+ events when cxl_poison event tracing is enabled.
+
+
+What: /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
+ /sys/bus/cxl/devices/regionZ/accessY/write_bandwidth
+Date: Jan, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The aggregated read or write bandwidth of the region. The
+ number is the accumulated read or write bandwidth of all CXL memory
+ devices that contributes to the region in MB/s. It is
+ identical data that should appear in
+ /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or
+ /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth.
+ See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+ the number to the closest initiator and access1 provides the
+ number to the closest CPU.
+
+
+What: /sys/bus/cxl/devices/regionZ/accessY/read_latency
+ /sys/bus/cxl/devices/regionZ/accessY/write_latency
+Date: Jan, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The read or write latency of the region. The number is
+ the worst read or write latency of all CXL memory devices that
+ contributes to the region in nanoseconds. It is identical data
+ that should appear in
+ /sys/devices/system/node/nodeX/accessY/initiators/read_latency or
+ /sys/devices/system/node/nodeX/accessY/initiators/write_latency.
+ See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+ the number to the closest initiator and access1 provides the
+ number to the closest CPU.
+
+
+What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
+Date: Feb, 2025
+KernelVersion: v6.15
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The device dirty shutdown count value, which is the number
+ of times the device could have incurred in potential data loss.
+ The count is persistent across power loss and wraps back to 0
+ upon overflow. If this file is not present, the device does not
+ have the necessary support for dirty tracking.
diff --git a/Documentation/ABI/testing/sysfs-bus-dax b/Documentation/ABI/testing/sysfs-bus-dax
new file mode 100644
index 000000000000..b34266bfae49
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-dax
@@ -0,0 +1,153 @@
+What: /sys/bus/dax/devices/daxX.Y/align
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) Provides a way to specify an alignment for a dax device.
+ Values allowed are constrained by the physical address ranges
+ that back the dax device, and also by arch requirements.
+
+What: /sys/bus/dax/devices/daxX.Y/mapping
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (WO) Provides a way to allocate a mapping range under a dax
+ device. Specified in the format <start>-<end>.
+
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/start
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/end
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/page_offset
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) A dax device may have multiple constituent discontiguous
+ address ranges. These are represented by the different
+ 'mappingX' subdirectories. The 'start' attribute indicates the
+ start physical address for the given range. The 'end' attribute
+ indicates the end physical address for the given range. The
+ 'page_offset' attribute indicates the offset of the current
+ range in the dax device.
+
+What: /sys/bus/dax/devices/daxX.Y/resource
+Date: June, 2019
+KernelVersion: v5.3
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The resource attribute indicates the starting physical
+ address of a dax device. In case of a device with multiple
+ constituent ranges, it indicates the starting address of the
+ first range.
+
+What: /sys/bus/dax/devices/daxX.Y/size
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) The size attribute indicates the total size of a dax
+ device. For creating subdivided dax devices, or for resizing
+ an existing device, the new size can be written to this as
+ part of the reconfiguration process.
+
+What: /sys/bus/dax/devices/daxX.Y/numa_node
+Date: November, 2019
+KernelVersion: v5.5
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) If NUMA is enabled and the platform has affinitized the
+ backing device for this dax device, emit the CPU node
+ affinity for this device.
+
+What: /sys/bus/dax/devices/daxX.Y/target_node
+Date: February, 2019
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The target-node attribute is the Linux numa-node that a
+ device-dax instance may create when it is online. Prior to
+ being online the device's 'numa_node' property reflects the
+ closest online cpu node which is the typical expectation of a
+ device 'numa_node'. Once it is online it becomes its own
+ distinct numa node.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/available_size
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The available_size attribute tracks available dax region
+ capacity. This only applies to volatile hmem devices, not pmem
+ devices, since pmem devices are defined by nvdimm namespace
+ boundaries.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/size
+Date: July, 2017
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The size attribute indicates the size of a given dax region
+ in bytes.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/align
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The align attribute indicates alignment of the dax region.
+ Changes on align may not always be valid, when say certain
+ mappings were created with 2M and then we switch to 1G. This
+ validates all ranges against the new value being attempted, post
+ resizing.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/seed
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The seed device is a concept for dynamic dax regions to be
+ able to split the region amongst multiple sub-instances. The
+ seed device, similar to libnvdimm seed devices, is a device
+ that starts with zero capacity allocated and unbound to a
+ driver.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/create
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) The create interface to the dax region provides a way to
+ create a new unconfigured dax device under the given region, which
+ can then be configured (with a size etc.) and then probed.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/delete
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (WO) The delete interface for a dax region provides for deletion
+ of any 0-sized and idle dax devices.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/id
+Date: July, 2017
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The id attribute indicates the region id of a dax region.
+
+What: /sys/bus/dax/devices/daxX.Y/memmap_on_memory
+Date: January, 2024
+KernelVersion: v6.8
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) Control the memmap_on_memory setting if the dax device
+ were to be hotplugged as system memory. This determines whether
+ the 'altmap' for the hotplugged memory will be placed on the
+ device being hotplugged (memmap_on_memory=1) or if it will be
+ placed on regular memory (memmap_on_memory=0). This attribute
+ must be set before the device is handed over to the 'kmem'
+ driver (i.e. hotplugged into system-ram). Additionally, this
+ depends on CONFIG_MHP_MEMMAP_ON_MEMORY, and a globally enabled
+ memmap_on_memory parameter for memory_hotplug. This is
+ typically set on the kernel command line -
+ memory_hotplug.memmap_on_memory set to 'true' or 'force'."
diff --git a/Documentation/ABI/testing/sysfs-bus-dfl b/Documentation/ABI/testing/sysfs-bus-dfl
new file mode 100644
index 000000000000..b0265ab17200
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-dfl
@@ -0,0 +1,17 @@
+What: /sys/bus/dfl/devices/dfl_dev.X/type
+Date: Aug 2020
+KernelVersion: 5.10
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. It returns type of DFL FIU of the device. Now DFL
+ supports 2 FIU types, 0 for FME, 1 for PORT.
+
+ Format: 0x%x
+
+What: /sys/bus/dfl/devices/dfl_dev.X/feature_id
+Date: Aug 2020
+KernelVersion: 5.10
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. It returns feature identifier local to its DFL FIU
+ type.
+
+ Format: 0x%x
diff --git a/Documentation/ABI/testing/sysfs-bus-dfl-devices-emif b/Documentation/ABI/testing/sysfs-bus-dfl-devices-emif
new file mode 100644
index 000000000000..817d14126d4d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-dfl-devices-emif
@@ -0,0 +1,25 @@
+What: /sys/bus/dfl/devices/dfl_dev.X/infX_cal_fail
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. It indicates if the calibration failed on this
+ memory interface. "1" for calibration failure, "0" for OK.
+ Format: %u
+
+What: /sys/bus/dfl/devices/dfl_dev.X/infX_init_done
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. It indicates if the initialization completed on
+ this memory interface. "1" for initialization complete, "0"
+ for not yet.
+ Format: %u
+
+What: /sys/bus/dfl/devices/dfl_dev.X/infX_clear
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Write-only. Writing "1" to this file will zero out all memory
+ data in this memory interface. Writing of other values is
+ invalid.
+ Format: %u
diff --git a/Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios b/Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios
new file mode 100644
index 000000000000..5335d742bcaf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios
@@ -0,0 +1,47 @@
+What: /sys/bus/dfl/devices/dfl_dev.X/fec_mode
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. Returns the FEC mode of the 25G links of the
+ ethernet retimers configured by Nios firmware. "rs" for Reed
+ Solomon FEC, "kr" for Fire Code FEC, "no" for NO FEC.
+ "not supported" if the FEC mode setting is not supported, this
+ happens when the Nios firmware version major < 3, or no link is
+ configured to 25G.
+ Format: string
+
+What: /sys/bus/dfl/devices/dfl_dev.X/retimer_A_mode
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. Returns the enumeration value of the working mode of
+ the retimer A configured by the Nios firmware. The value is
+ read out from shared registers filled by the Nios firmware. Now
+ the values could be:
+
+ - "0": Reset
+ - "1": 4x10G
+ - "2": 4x25G
+ - "3": 2x25G
+ - "4": 2x25G+2x10G
+ - "5": 1x25G
+
+ If the Nios firmware is updated in future to support more
+ retimer modes, more enumeration value is expected.
+ Format: 0x%x
+
+What: /sys/bus/dfl/devices/dfl_dev.X/retimer_B_mode
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. Returns the enumeration value of the working mode of
+ the retimer B configured by the Nios firmware. The value format
+ is the same as retimer_A_mode.
+
+What: /sys/bus/dfl/devices/dfl_dev.X/nios_fw_version
+Date: Oct 2020
+KernelVersion: 5.12
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read-only. Returns the version of the Nios firmware in the
+ FPGA. Its format is "major.minor.patch".
+ Format: %x.%x.%x
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices b/Documentation/ABI/testing/sysfs-bus-event_source-devices
new file mode 100644
index 000000000000..79b268319df1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices
@@ -0,0 +1,24 @@
+What: /sys/bus/event_source/devices/<pmu>
+Date: 2014/02/24
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Performance Monitoring Unit (<pmu>)
+
+ Each <pmu> directory, for a PMU device, is a name
+ optionally followed by an underscore and then either a
+ decimal or hexadecimal number. For example, cpu is a
+ PMU name without a suffix as is intel_bts,
+ uncore_imc_0 is a PMU name with a 0 numeric suffix,
+ ddr_pmu_87e1b0000000 is a PMU name with a hex
+ suffix. The hex suffix must be more than two
+ characters long to avoid ambiguity with PMUs like the
+ S390 cpum_cf.
+
+ Tools can treat PMUs with the same name that differ by
+ suffix as instances of the same PMU for the sake of,
+ for example, opening an event. For example, the PMUs
+ uncore_imc_free_running_0 and
+ uncore_imc_free_running_1 have an event data_read;
+ opening the data_read event on a PMU specified as
+ uncore_imc_free_running should be treated as opening
+ the data_read event on PMU uncore_imc_free_running_0
+ and PMU uncore_imc_free_running_1.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
new file mode 100644
index 000000000000..a5f506f7d481
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
@@ -0,0 +1,24 @@
+What: /sys/bus/event_source/devices/<dev>/caps
+Date: May 2022
+KernelVersion: 5.19
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Attribute group to describe the capabilities exposed
+ for a particular pmu. Each attribute of this group can
+ expose information specific to a PMU, say pmu_name, so that
+ userspace can understand some of the feature which the
+ platform specific PMU supports.
+
+ One of the example available capability in supported platform
+ like Intel is pmu_name, which exposes underlying CPU name known
+ to the PMU driver.
+
+ Example output in powerpc:
+ grep . /sys/bus/event_source/devices/cpu/caps/*
+ /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
+
+ The "branch_counter_nr" in the supported platform exposes the
+ maximum number of counters which can be shown in the u64 counters
+ of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
+ exposes the width of each counter. Both of them can be used by
+ the perf tool to parse the logged counters in each branch.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-dfl_fme b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dfl_fme
new file mode 100644
index 000000000000..63a32ddcb95e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dfl_fme
@@ -0,0 +1,104 @@
+What: /sys/bus/event_source/devices/dfl_fmeX/format
+Date: April 2020
+KernelVersion: 5.8
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. All supported attributes are listed
+ below::
+
+ event = "config:0-11" - event ID
+ evtype = "config:12-15" - event type
+ portid = "config:16-23" - event source
+
+ For example::
+
+ fab_mmio_read = "event=0x06,evtype=0x02,portid=0xff"
+
+ It shows this fab_mmio_read is a fabric type (0x02) event with
+ 0x06 local event id for overall monitoring (portid=0xff).
+
+What: /sys/bus/event_source/devices/dfl_fmeX/cpumask
+Date: April 2020
+KernelVersion: 5.8
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. This file always returns cpu which the PMU is bound
+ for access to all fme pmu performance monitoring events.
+
+What: /sys/bus/event_source/devices/dfl_fmeX/events
+Date: April 2020
+KernelVersion: 5.8
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Attribute group to describe performance monitoring
+ events specific to fme. Each attribute in this group describes
+ a single performance monitoring event supported by this fme pmu.
+ The name of the file is the name of the event.
+ (See ABI/testing/sysfs-bus-event_source-devices-events).
+
+ All supported performance monitoring events are listed below.
+
+ Basic events (evtype=0x00)::
+
+ clock = "event=0x00,evtype=0x00,portid=0xff"
+
+ Cache events (evtype=0x01)::
+
+ cache_read_hit = "event=0x00,evtype=0x01,portid=0xff"
+ cache_read_miss = "event=0x01,evtype=0x01,portid=0xff"
+ cache_write_hit = "event=0x02,evtype=0x01,portid=0xff"
+ cache_write_miss = "event=0x03,evtype=0x01,portid=0xff"
+ cache_hold_request = "event=0x05,evtype=0x01,portid=0xff"
+ cache_data_write_port_contention =
+ "event=0x06,evtype=0x01,portid=0xff"
+ cache_tag_write_port_contention =
+ "event=0x07,evtype=0x01,portid=0xff"
+ cache_tx_req_stall = "event=0x08,evtype=0x01,portid=0xff"
+ cache_rx_req_stall = "event=0x09,evtype=0x01,portid=0xff"
+ cache_eviction = "event=0x0a,evtype=0x01,portid=0xff"
+
+ Fabric events (evtype=0x02)::
+
+ fab_pcie0_read = "event=0x00,evtype=0x02,portid=0xff"
+ fab_pcie0_write = "event=0x01,evtype=0x02,portid=0xff"
+ fab_pcie1_read = "event=0x02,evtype=0x02,portid=0xff"
+ fab_pcie1_write = "event=0x03,evtype=0x02,portid=0xff"
+ fab_upi_read = "event=0x04,evtype=0x02,portid=0xff"
+ fab_upi_write = "event=0x05,evtype=0x02,portid=0xff"
+ fab_mmio_read = "event=0x06,evtype=0x02,portid=0xff"
+ fab_mmio_write = "event=0x07,evtype=0x02,portid=0xff"
+ fab_port_pcie0_read = "event=0x00,evtype=0x02,portid=?"
+ fab_port_pcie0_write = "event=0x01,evtype=0x02,portid=?"
+ fab_port_pcie1_read = "event=0x02,evtype=0x02,portid=?"
+ fab_port_pcie1_write = "event=0x03,evtype=0x02,portid=?"
+ fab_port_upi_read = "event=0x04,evtype=0x02,portid=?"
+ fab_port_upi_write = "event=0x05,evtype=0x02,portid=?"
+ fab_port_mmio_read = "event=0x06,evtype=0x02,portid=?"
+ fab_port_mmio_write = "event=0x07,evtype=0x02,portid=?"
+
+ VTD events (evtype=0x03)::
+
+ vtd_port_read_transaction = "event=0x00,evtype=0x03,portid=?"
+ vtd_port_write_transaction = "event=0x01,evtype=0x03,portid=?"
+ vtd_port_devtlb_read_hit = "event=0x02,evtype=0x03,portid=?"
+ vtd_port_devtlb_write_hit = "event=0x03,evtype=0x03,portid=?"
+ vtd_port_devtlb_4k_fill = "event=0x04,evtype=0x03,portid=?"
+ vtd_port_devtlb_2m_fill = "event=0x05,evtype=0x03,portid=?"
+ vtd_port_devtlb_1g_fill = "event=0x06,evtype=0x03,portid=?"
+
+ VTD SIP events (evtype=0x04)::
+
+ vtd_sip_iotlb_4k_hit = "event=0x00,evtype=0x04,portid=0xff"
+ vtd_sip_iotlb_2m_hit = "event=0x01,evtype=0x04,portid=0xff"
+ vtd_sip_iotlb_1g_hit = "event=0x02,evtype=0x04,portid=0xff"
+ vtd_sip_slpwc_l3_hit = "event=0x03,evtype=0x04,portid=0xff"
+ vtd_sip_slpwc_l4_hit = "event=0x04,evtype=0x04,portid=0xff"
+ vtd_sip_rcc_hit = "event=0x05,evtype=0x04,portid=0xff"
+ vtd_sip_iotlb_4k_miss = "event=0x06,evtype=0x04,portid=0xff"
+ vtd_sip_iotlb_2m_miss = "event=0x07,evtype=0x04,portid=0xff"
+ vtd_sip_iotlb_1g_miss = "event=0x08,evtype=0x04,portid=0xff"
+ vtd_sip_slpwc_l3_miss = "event=0x09,evtype=0x04,portid=0xff"
+ vtd_sip_slpwc_l4_miss = "event=0x0a,evtype=0x04,portid=0xff"
+ vtd_sip_rcc_miss = "event=0x0b,evtype=0x04,portid=0xff"
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa
new file mode 100644
index 000000000000..3c7d132281b0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa
@@ -0,0 +1,30 @@
+What: /sys/bus/event_source/devices/dsa*/format
+Date: April 2021
+KernelVersion: 5.13
+Contact: Tom Zanussi <tom.zanussi@linux.intel.com>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config or
+ perf_event_attr.config1 for the IDXD DSA pmu. (See also
+ ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute in this group defines a bit range in
+ perf_event_attr.config or perf_event_attr.config1.
+ All supported attributes are listed below (See the
+ IDXD DSA Spec for possible attribute values)::
+
+ event_category = "config:0-3" - event category
+ event = "config:4-31" - event ID
+
+ filter_wq = "config1:0-31" - workqueue filter
+ filter_tc = "config1:32-39" - traffic class filter
+ filter_pgsz = "config1:40-43" - page size filter
+ filter_sz = "config1:44-51" - transfer size filter
+ filter_eng = "config1:52-59" - engine filter
+
+What: /sys/bus/event_source/devices/dsa*/cpumask
+Date: April 2021
+KernelVersion: 5.13
+Contact: Tom Zanussi <tom.zanussi@linux.intel.com>
+Description: Read-only. This file always returns the cpu to which the
+ IDXD DSA pmu is bound for access to all dsa pmu
+ performance monitoring events.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index 505f080d20a1..0fe1b9487202 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -37,6 +37,14 @@ Description: Per-pmu performance monitoring events specific to the running syste
performance monitoring event supported by the <pmu>. The name
of the file is the name of the event.
+ As performance monitoring event names are case insensitive
+ in the perf tool, the perf tool only looks for all lower
+ case or all upper case event names in sysfs to avoid
+ scanning the directory. It is therefore required the
+ name of the event here is either completely lower or upper
+ case, with no mixed-case characters. Numbers, '.', '_', and
+ '-' are also allowed.
+
File contents:
<term>[=<value>][,<term>[=<value>]]...
@@ -47,7 +55,7 @@ Description: Per-pmu performance monitoring events specific to the running syste
If a <term> is specified alone (without an assigned value), it
is implied that 0x1 is assigned to that <term>.
- Examples (each of these lines would be in a seperate file):
+ Examples (each of these lines would be in a separate file):
event=0x2abc
event=0x423,inv,cmask=0x3
@@ -83,7 +91,7 @@ Description: Perf event scaling factors
A string representing a floating point value expressed in
scientific notation to be multiplied by the event count
- recieved from the kernel to match the unit specified in the
+ received from the kernel to match the unit specified in the
<event>.unit file.
Example:
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
index 77f47ff5ee02..df7ccc1b2fba 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
@@ -1,6 +1,6 @@
-Where: /sys/bus/event_source/devices/<dev>/format
+What: /sys/bus/event_source/devices/<dev>/format
Date: January 2012
-Kernel Version: 3.3
+KernelVersion: 3.3
Contact: Jiri Olsa <jolsa@redhat.com>
Description:
Attribute group to describe the magic bits that go into
@@ -10,7 +10,8 @@ Description:
name/value pairs.
Userspace must be prepared for the possibility that attributes
- define overlapping bit ranges. For example:
+ define overlapping bit ranges. For example::
+
attr1 = 'config:0-23'
attr2 = 'config:0-7'
attr3 = 'config:12-35'
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt
new file mode 100644
index 000000000000..1119766564d7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt
@@ -0,0 +1,113 @@
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains files for tuning the PCIe link
+ parameters(events). Each file is named after the event
+ of the PCIe link.
+
+ See Documentation/trace/hisi-ptt.rst for more information.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_cpl
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx completion TLPs, which influence
+ the proportion of outbound completion TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_np
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx non-posted TLPs, which influence
+ the proportion of outbound non-posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_p
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx posted TLPs, which influence the
+ proportion of outbound posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/rx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark for inbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/tx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark of outbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains the files providing the PCIe Root Port filters
+ information used for PTT trace. Each file is named after the supported
+ Root Port device name <domain>:<bus>:<device>.<function>.
+
+ See the description of the "filter" in Documentation/trace/hisi-ptt.rst
+ for more information.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/multiselect
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates if this kind of filter can be selected at the same
+ time as others filters, or must be used on it's own. 1 indicates
+ the former case and 0 indicates the latter.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/<bdf>
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates the filter value of this Root Port filter, which
+ can be used to control the TLP headers to trace by the PTT trace.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains the files providing the PCIe Requester filters
+ information used for PTT trace. Each file is named after the supported
+ Endpoint device name <domain>:<bus>:<device>.<function>.
+
+ See the description of the "filter" in Documentation/trace/hisi-ptt.rst
+ for more information.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/multiselect
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates if this kind of filter can be selected at the same
+ time as others filters, or must be used on it's own. 1 indicates
+ the former case and 0 indicates the latter.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/<bdf>
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates the filter value of this Requester filter, which
+ can be used to control the TLP headers to trace by the PTT trace.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index ec27c6c9e737..de390a010af8 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -1,3 +1,28 @@
+What: /sys/bus/event_source/devices/hv_24x7/format
+Date: September 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. All supported attributes are listed
+ below::
+
+ chip = "config:16-31"
+ core = "config:16-31"
+ domain = "config:0-3"
+ lpar = "config:0-15"
+ offset = "config:32-63"
+ vcpu = "config:16-31"
+
+ For example::
+
+ PM_PB_CYC = "domain=1,offset=0x80,chip=?,lpar=0x0"
+
+ In this event, '?' after chip specifies that
+ this value will be provided by user while running this event.
+
What: /sys/bus/event_source/devices/hv_24x7/interface/catalog
Date: February 2014
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
@@ -22,6 +47,34 @@ Description:
Exposes the "version" field of the 24x7 catalog. This is also
extractable from the provided binary "catalog" sysfs entry.
+What: /sys/devices/hv_24x7/interface/sockets
+Date: May 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs interface exposes the number of sockets present in the
+ system.
+
+What: /sys/devices/hv_24x7/interface/chipspersocket
+Date: May 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs interface exposes the number of chips per socket
+ present in the system.
+
+What: /sys/devices/hv_24x7/interface/coresperchip
+Date: May 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs interface exposes the number of cores per chip
+ present in the system.
+
+What: /sys/devices/hv_24x7/cpumask
+Date: July 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs file exposes the cpumask which is designated to make
+ HCALLs to retrieve hv-24x7 pmu event counter data.
+
What: /sys/bus/event_source/devices/hv_24x7/event_descs/<event-name>
Date: February 2014
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 3ca4e554d2f9..40f7cd240591 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -1,3 +1,34 @@
+What: /sys/bus/event_source/devices/hv_gpci/format
+Date: September 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. All supported attributes are listed
+ below::
+
+ counter_info_version = "config:16-23"
+ length = "config:24-31"
+ partition_id = "config:32-63"
+ request = "config:0-31"
+ sibling_part_id = "config:32-63"
+ hw_chip_id = "config:32-63"
+ offset = "config:32-63"
+ phys_processor_idx = "config:32-63"
+ secondary_index = "config:0-15"
+ starting_index = "config:32-63"
+
+ For example::
+
+ processor_core_utilization_instructions_completed = "request=0x94,
+ phys_processor_idx=?,counter_info_version=0x8,
+ length=8,offset=0x18"
+
+ In this event, '?' after phys_processor_idx specifies this value
+ this value will be provided by user while running this event.
+
What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged
Date: February 2014
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
@@ -5,6 +36,7 @@ Description:
'0' if the hypervisor is configured to forbid access to event
counters being accumulated by other guests and to physical
domain event counters.
+
'1' if that access is allowed.
What: /sys/bus/event_source/devices/hv_gpci/interface/ga
@@ -41,3 +73,170 @@ Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
Description:
A number indicating the latest version of the gpci interface
that the kernel is aware of.
+
+What: /sys/devices/hv_gpci/cpumask
+Date: October 2020
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: read only
+ This sysfs file exposes the cpumask which is designated to make
+ HCALLs to retrieve hv-gpci pmu event counter data.
+
+What: /sys/devices/hv_gpci/interface/processor_bus_topology
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ PROCESSOR_BUS_TOPOLOGY(0xD0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/processor_config
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ PROCESSOR_CONFIG(0x90).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_domain
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What: /sys/devices/hv_gpci/interface/affinity_domain_via_partition
+Date: July 2023
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: admin read only
+ This sysfs file exposes the system topology information by making HCALL
+ H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value
+ AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1).
+
+ * This sysfs file will be created only for power10 and above platforms.
+
+ * User needs root privileges to read data from this sysfs file.
+
+ * This sysfs file will be created, only when the HCALL returns "H_SUCCESS",
+ "H_AUTHORITY" or "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY" can be resolved during
+ runtime by setting "Enable Performance Information Collection" option.
+
+ * The end user reading this sysfs file must decode the content as per
+ underlying platform/firmware.
+
+ Possible error codes while reading this sysfs file:
+
+ * "-EPERM" : Partition is not permitted to retrieve performance information,
+ required to set "Enable Performance Information Collection" option.
+
+ * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address
+ or because of some hardware error. Refer to getPerfCountInfo documentation for
+ more information.
+
+ * "-EFBIG" : System information exceeds PAGE_SIZE.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
new file mode 100644
index 000000000000..d7af4919302e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
@@ -0,0 +1,37 @@
+What: /sys/bus/event_source/devices/dmar*/format
+Date: Jan 2023
+KernelVersion: 6.3
+Contact: Kan Liang <kan.liang@linux.intel.com>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config,
+ perf_event_attr.config1 or perf_event_attr.config2 for
+ the IOMMU pmu. (See also
+ ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute in this group defines a bit range in
+ perf_event_attr.config, perf_event_attr.config1,
+ or perf_event_attr.config2. All supported attributes
+ are listed below (See the VT-d Spec 4.0 for possible
+ attribute values)::
+
+ event = "config:0-27" - event ID
+ event_group = "config:28-31" - event group ID
+
+ filter_requester_en = "config1:0" - Enable Requester ID filter
+ filter_domain_en = "config1:1" - Enable Domain ID filter
+ filter_pasid_en = "config1:2" - Enable PASID filter
+ filter_ats_en = "config1:3" - Enable Address Type filter
+ filter_page_table_en= "config1:4" - Enable Page Table Level filter
+ filter_requester_id = "config1:16-31" - Requester ID filter
+ filter_domain = "config1:32-47" - Domain ID filter
+ filter_pasid = "config2:0-21" - PASID filter
+ filter_ats = "config2:24-28" - Address Type filter
+ filter_page_table = "config2:32-36" - Page Table Level filter
+
+What: /sys/bus/event_source/devices/dmar*/cpumask
+Date: Jan 2023
+KernelVersion: 6.3
+Contact: Kan Liang <kan.liang@linux.intel.com>
+Description: Read-only. This file always returns the CPU to which the
+ IOMMU pmu is bound for access to all IOMMU pmu performance
+ monitoring events.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore
new file mode 100644
index 000000000000..b56e8f019fd4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore
@@ -0,0 +1,13 @@
+What: /sys/bus/event_source/devices/uncore_*/alias
+Date: June 2021
+KernelVersion: 5.15
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Read-only. An attribute to describe the alias name of
+ the uncore PMU if an alias exists on some platforms.
+ The 'perf(1)' tool should treat both names the same.
+ They both can be used to access the uncore PMU.
+
+ Example:
+
+ $ cat /sys/devices/uncore_cha_2/alias
+ uncore_type_0_2
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl
new file mode 100644
index 000000000000..7b7c789a5cf5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl
@@ -0,0 +1,25 @@
+What: /sys/bus/event_source/devices/vpa_dtl/format
+Date: February 2025
+Contact: Linux on PowerPC Developer List <linuxppc-dev at lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. Supported attribute are listed
+ below::
+
+ event = "config:0-7" - event ID
+
+ For example::
+
+ dtl_cede = "event=0x1"
+
+What: /sys/bus/event_source/devices/vpa_dtl/events
+Date: February 2025
+Contact: Linux on PowerPC Developer List <linuxppc-dev at lists.ozlabs.org>
+Description: (RO) Attribute group to describe performance monitoring events
+ for the Virtual Processor Dispatch Trace Log. Each attribute in
+ this group describes a single performance monitoring event
+ supported by vpa_dtl pmu. The name of the file is the name of
+ the event (See ABI/testing/sysfs-bus-event_source-devices-events).
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
new file mode 100644
index 000000000000..a116aee9709a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
@@ -0,0 +1,25 @@
+What: /sys/bus/event_source/devices/vpa_pmu/format
+Date: November 2024
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. Supported attribute are listed
+ below::
+
+ event = "config:0-31" - event ID
+
+ For example::
+
+ l1_to_l2_lat = "event=0x1"
+
+What: /sys/bus/event_source/devices/vpa_pmu/events
+Date: November 2024
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe performance monitoring
+ events for the Virtual Processor Area events. Each attribute
+ in this group describes a single performance monitoring event
+ supported by vpa_pmu. The name of the file is the name of
+ the event (See ABI/testing/sysfs-bus-event_source-devices-events).
diff --git a/Documentation/ABI/testing/sysfs-bus-fcoe b/Documentation/ABI/testing/sysfs-bus-fcoe
index 657df13b100d..5a4f2091ac37 100644
--- a/Documentation/ABI/testing/sysfs-bus-fcoe
+++ b/Documentation/ABI/testing/sysfs-bus-fcoe
@@ -3,16 +3,19 @@ Date: August 2012
KernelVersion: TBD
Contact: Robert Love <robert.w.love@intel.com>, devel@open-fcoe.org
Description: The FCoE bus. Attributes in this directory are control interfaces.
+
Attributes:
- ctlr_create: 'FCoE Controller' instance creation interface. Writing an
+ ctlr_create:
+ 'FCoE Controller' instance creation interface. Writing an
<ifname> to this file will allocate and populate sysfs with a
fcoe_ctlr_device (ctlr_X). The user can then configure any
per-port settings and finally write to the fcoe_ctlr_device's
'start' attribute to begin the kernel's discovery and login
process.
- ctlr_destroy: 'FCoE Controller' instance removal interface. Writing a
+ ctlr_destroy:
+ 'FCoE Controller' instance removal interface. Writing a
fcoe_ctlr_device's sysfs name to this file will log the
fcoe_ctlr_device out of the fabric or otherwise connected
FCoE devices. It will also free all kernel memory allocated
@@ -28,15 +31,17 @@ Description: 'FCoE Controller' instances on the fcoe bus.
1) Write interface name to ctlr_create 2) Configure the FCoE
Controller (ctlr_X) 3) Enable the FCoE Controller to begin
discovery and login. The FCoE Controller is destroyed by
- writing it's name, i.e. ctlr_X to the ctlr_delete file.
+ writing its name, i.e. ctlr_X to the ctlr_delete file.
Attributes:
- fcf_dev_loss_tmo: Device loss timeout period (see below). Changing
+ fcf_dev_loss_tmo:
+ Device loss timeout period (see below). Changing
this value will change the dev_loss_tmo for all
FCFs discovered by this controller.
- mode: Display or change the FCoE Controller's mode. Possible
+ mode:
+ Display or change the FCoE Controller's mode. Possible
modes are 'Fabric' and 'VN2VN'. If a FCoE Controller
is started in 'Fabric' mode then FIP FCF discovery is
initiated and ultimately a fabric login is attempted.
@@ -44,23 +49,30 @@ Attributes:
FIP VN2VN discovery and login is performed. A FCoE
Controller only supports one mode at a time.
- enabled: Whether an FCoE controller is enabled or disabled.
+ enabled:
+ Whether an FCoE controller is enabled or disabled.
0 if disabled, 1 if enabled. Writing either 0 or 1
to this file will enable or disable the FCoE controller.
- lesb/link_fail: Link Error Status Block (LESB) link failure count.
+ lesb/link_fail:
+ Link Error Status Block (LESB) link failure count.
- lesb/vlink_fail: Link Error Status Block (LESB) virtual link
+ lesb/vlink_fail:
+ Link Error Status Block (LESB) virtual link
failure count.
- lesb/miss_fka: Link Error Status Block (LESB) missed FCoE
+ lesb/miss_fka:
+ Link Error Status Block (LESB) missed FCoE
Initialization Protocol (FIP) Keep-Alives (FKA).
- lesb/symb_err: Link Error Status Block (LESB) symbolic error count.
+ lesb/symb_err:
+ Link Error Status Block (LESB) symbolic error count.
- lesb/err_block: Link Error Status Block (LESB) block error count.
+ lesb/err_block:
+ Link Error Status Block (LESB) block error count.
- lesb/fcs_error: Link Error Status Block (LESB) Fibre Channel
+ lesb/fcs_error:
+ Link Error Status Block (LESB) Fibre Channel
Services error count.
Notes: ctlr_X (global increment starting at 0)
@@ -75,31 +87,41 @@ Description: 'FCoE FCF' instances on the fcoe bus. A FCF is a Fibre Channel
Fibre Channel frames into a FC fabric. It can also take
outbound FC frames and pack them in Ethernet packets to
be sent to their destination on the Ethernet segment.
+
Attributes:
- fabric_name: Identifies the fabric that the FCF services.
+ fabric_name:
+ Identifies the fabric that the FCF services.
- switch_name: Identifies the FCF.
+ switch_name:
+ Identifies the FCF.
- priority: The switch's priority amongst other FCFs on the same
+ priority:
+ The switch's priority amongst other FCFs on the same
fabric.
- selected: 1 indicates that the switch has been selected for use;
+ selected:
+ 1 indicates that the switch has been selected for use;
0 indicates that the switch will not be used.
- fc_map: The Fibre Channel MAP
+ fc_map:
+ The Fibre Channel MAP
- vfid: The Virtual Fabric ID
+ vfid:
+ The Virtual Fabric ID
- mac: The FCF's MAC address
+ mac:
+ The FCF's MAC address
- fka_period: The FIP Keep-Alive period
+ fka_period:
+ The FIP Keep-Alive period
fabric_state: The internal kernel state
- "Unknown" - Initialization value
- "Disconnected" - No link to the FCF/fabric
- "Connected" - Host is connected to the FCF
- "Deleted" - FCF is being removed from the system
+
+ - "Unknown" - Initialization value
+ - "Disconnected" - No link to the FCF/fabric
+ - "Connected" - Host is connected to the FCF
+ - "Deleted" - FCF is being removed from the system
dev_loss_tmo: The device loss timeout period for this FCF.
diff --git a/Documentation/ABI/testing/sysfs-bus-fsi b/Documentation/ABI/testing/sysfs-bus-fsi
index 57c806350d6c..76e0caa0c2b3 100644
--- a/Documentation/ABI/testing/sysfs-bus-fsi
+++ b/Documentation/ABI/testing/sysfs-bus-fsi
@@ -1,25 +1,25 @@
-What: /sys/bus/platform/devices/fsi-master/rescan
+What: /sys/bus/platform/devices/../fsi-master/fsi0/rescan
Date: May 2017
KernelVersion: 4.12
-Contact: cbostic@linux.vnet.ibm.com
+Contact: linux-fsi@lists.ozlabs.org
Description:
Initiates a FSI master scan for all connected slave devices
on its links.
-What: /sys/bus/platform/devices/fsi-master/break
+What: /sys/bus/platform/devices/../fsi-master/fsi0/break
Date: May 2017
KernelVersion: 4.12
-Contact: cbostic@linux.vnet.ibm.com
+Contact: linux-fsi@lists.ozlabs.org
Description:
Sends an FSI BREAK command on a master's communication
- link to any connnected slaves. A BREAK resets connected
+ link to any connected slaves. A BREAK resets connected
device's logic and preps it to receive further commands
from the master.
-What: /sys/bus/platform/devices/fsi-master/slave@00:00/term
+What: /sys/bus/platform/devices/../fsi-master/fsi0/slave@00:00/term
Date: May 2017
KernelVersion: 4.12
-Contact: cbostic@linux.vnet.ibm.com
+Contact: linux-fsi@lists.ozlabs.org
Description:
Sends an FSI terminate command from the master to its
connected slave. A terminate resets the slave's state machines
@@ -29,10 +29,18 @@ Description:
ongoing operation in case of an expired 'Master Time Out'
timer.
-What: /sys/bus/platform/devices/fsi-master/slave@00:00/raw
+What: /sys/bus/platform/devices/../fsi-master/fsi0/slave@00:00/raw
Date: May 2017
KernelVersion: 4.12
-Contact: cbostic@linux.vnet.ibm.com
+Contact: linux-fsi@lists.ozlabs.org
Description:
Provides a means of reading/writing a 32 bit value from/to a
specified FSI bus address.
+
+What: /sys/bus/platform/devices/../cfam_reset
+Date: Sept 2020
+KernelVersion: 5.10
+Contact: linux-fsi@lists.ozlabs.org
+Description:
+ Provides a means of resetting the cfam that is attached to the
+ FSI device.
diff --git a/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo b/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo
new file mode 100644
index 000000000000..c7393b4dd2d8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo
@@ -0,0 +1,10 @@
+What: /sys/bus/fsi/devices/XX.XX.00:06/sbefifoX/timeout
+KernelVersion: 5.15
+Contact: eajames@linux.ibm.com
+Description:
+ Indicates whether or not this SBE device has experienced a
+ timeout; i.e. the SBE did not respond within the time allotted
+ by the driver. A value of 1 indicates that a timeout has
+ occurred and no transfers have completed since the timeout. A
+ value of 0 indicates that no timeout has occurred, or if one
+ has, more recent transfers have completed successfully.
diff --git a/Documentation/ABI/testing/sysfs-bus-fsl-mc b/Documentation/ABI/testing/sysfs-bus-fsl-mc
index 80256b8b4f26..bf3c6af6ad89 100644
--- a/Documentation/ABI/testing/sysfs-bus-fsl-mc
+++ b/Documentation/ABI/testing/sysfs-bus-fsl-mc
@@ -6,8 +6,10 @@ Description:
the driver to attempt to bind to the device found at
this location. The format for the location is Object.Id
and is the same as found in /sys/bus/fsl-mc/devices/.
- For example:
- # echo dpni.2 > /sys/bus/fsl-mc/drivers/fsl_dpaa2_eth/bind
+
+ For example::
+
+ # echo dpni.2 > /sys/bus/fsl-mc/drivers/fsl_dpaa2_eth/bind
What: /sys/bus/fsl-mc/drivers/.../unbind
Date: December 2016
@@ -17,5 +19,7 @@ Description:
driver to attempt to unbind from the device found at
this location. The format for the location is Object.Id
and is the same as found in /sys/bus/fsl-mc/devices/.
- For example:
- # echo dpni.2 > /sys/bus/fsl-mc/drivers/fsl_dpaa2_eth/unbind
+
+ For example::
+
+ # echo dpni.2 > /sys/bus/fsl-mc/drivers/fsl_dpaa2_eth/unbind
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
index 9de269bb0ae5..288bc2fa9547 100644
--- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
@@ -3,19 +3,25 @@ Date: February 2011
Contact: Minkyu Kang <mk7.kang@samsung.com>
Description:
show what device is attached
- NONE - no device
- USB - USB device is attached
- UART - UART is attached
- CHARGER - Charger is attaced
- JIG - JIG is attached
+
+ ======= ======================
+ NONE no device
+ USB USB device is attached
+ UART UART is attached
+ CHARGER Charger is attached
+ JIG JIG is attached
+ ======= ======================
What: /sys/bus/i2c/devices/.../switch
Date: February 2011
Contact: Minkyu Kang <mk7.kang@samsung.com>
Description:
show or set the state of manual switch
- VAUDIO - switch to VAUDIO path
- UART - switch to UART path
- AUDIO - switch to AUDIO path
- DHOST - switch to DHOST path
- AUTO - switch automatically by device
+
+ ======= ==============================
+ VAUDIO switch to VAUDIO path
+ UART switch to UART path
+ AUDIO switch to AUDIO path
+ DHOST switch to DHOST path
+ AUTO switch automatically by device
+ ======= ==============================
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
index feb2e4a87075..4a251b7f11e4 100644
--- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
@@ -1,20 +1,20 @@
-Where: /sys/bus/i2c/devices/.../heading0_input
+What: /sys/bus/i2c/devices/.../heading0_input
Date: April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
Contact: alan.cox@intel.com
Description: Reports the current heading from the compass as a floating
point value in degrees.
-Where: /sys/bus/i2c/devices/.../power_state
+What: /sys/bus/i2c/devices/.../power_state
Date: April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
Contact: alan.cox@intel.com
Description: Sets the power state of the device. 0 sets the device into
sleep mode, 1 wakes it up.
-Where: /sys/bus/i2c/devices/.../calibration
+What: /sys/bus/i2c/devices/.../calibration
Date: April 2010
-Kernel Version: 2.6.36?
+KernelVersion: 2.6.36?
Contact: alan.cox@intel.com
Description: Sets the calibration on or off (1 = on, 0 = off). See the
chip data sheet.
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr b/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr
new file mode 100644
index 000000000000..7c51ce8d38ba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr
@@ -0,0 +1,100 @@
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/unlock
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Write-only attribute used to present a password and unlock
+ access to protected areas of the M24LR chip, including
+ configuration registers such as the Sector Security Status
+ (SSS) bytes. A valid password must be written to enable write
+ access to these regions via the I2C interface.
+
+ Format:
+ - Hexadecimal string representing a 32-bit (4-byte) password
+ - Accepts 1 to 8 hex digits (e.g., "c", "1F", "a1b2c3d4")
+ - No "0x" prefix, whitespace, or trailing newline
+ - Case-insensitive
+
+ Behavior:
+ - If the password matches the internal stored value,
+ access to protected memory/configuration is granted
+ - If the password does not match the internally stored value,
+ it will fail silently
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/new_pass
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Write-only attribute used to update the password required to
+ unlock the M24LR chip.
+
+ Format:
+ - Hexadecimal string representing a new 32-bit password
+ - Accepts 1 to 8 hex digits (e.g., "1A", "ffff", "c0ffee00")
+ - No "0x" prefix, whitespace, or trailing newline
+ - Case-insensitive
+
+ Behavior:
+ - Overwrites the current password stored in the I2C password
+ register
+ - Requires the device to be unlocked before changing the
+ password
+ - If the device is locked, the write silently fails
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/uid
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read-only attribute that exposes the 8-byte unique identifier
+ programmed into the M24LR chip at the factory.
+
+ Format:
+ - Lowercase hexadecimal string representing a 64-bit value
+ - 1 to 16 hex digits (e.g., "e00204f12345678")
+ - No "0x" prefix
+ - Includes a trailing newline
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/total_sectors
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read-only attribute that exposes the total number of EEPROM
+ sectors available in the M24LR chip.
+
+ Format:
+ - 1 to 2 hex digits (e.g. "F")
+ - No "0x" prefix
+ - Includes a trailing newline
+
+ Notes:
+ - Value is encoded by the chip and corresponds to the EEPROM
+ size (e.g., 3 = 4 kbit for M24LR04E-R)
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/sss
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read/write binary attribute representing the Sector Security
+ Status (SSS) bytes for all EEPROM sectors in STMicroelectronics
+ M24LR chips.
+
+ Each EEPROM sector has one SSS byte, which controls I2C and
+ RF access through protection bits and optional password
+ authentication.
+
+ Format:
+ - The file contains one byte per EEPROM sector
+ - Byte at offset N corresponds to sector N
+ - Binary access only; use tools like dd, Python, or C that
+ support byte-level I/O and offset control.
+
+ Notes:
+ - The number of valid bytes in this file is equal to the
+ value exposed by 'total_sectors' file
+ - Write access requires prior password authentication in
+ I2C mode
+ - Refer to the M24LR datasheet for full SSS bit layout
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-pca954x b/Documentation/ABI/testing/sysfs-bus-i2c-devices-pca954x
new file mode 100644
index 000000000000..b6c69eb80ca4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-pca954x
@@ -0,0 +1,23 @@
+What: /sys/bus/i2c/.../idle_state
+Date: January 2019
+KernelVersion: 5.2
+Contact: Robert Shearman <robert.shearman@att.com>
+Description:
+ Value that exists only for mux devices that can be
+ written to control the behaviour of the multiplexer on
+ idle. Possible values:
+
+ =========== ===============================================
+ -2 disconnect on idle, i.e. deselect the last used
+ channel, which is useful when there is a device
+ with an address that conflicts with another
+ device on another mux on the same parent bus.
+ -1 leave the mux as-is, which is the most optimal
+ setting in terms of I2C operations and is the
+ default mode.
+ 0..<nchans> set the mux to a predetermined channel,
+ which is useful if there is one channel that is
+ used almost always, and you want to reduce the
+ latency for normal operations after rare
+ transactions on other channels
+ =========== ===============================================
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu
new file mode 100644
index 000000000000..35a8f6dae5bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu
@@ -0,0 +1,113 @@
+What: /sys/bus/i2c/devices/<mcu_device>/board_revision
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains board revision number.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %u.
+
+What: /sys/bus/i2c/devices/<mcu_device>/first_mac_address
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains device first MAC address. Each Turris Omnia is
+ allocated 3 MAC addresses. The two additional addresses are
+ computed from the first one by incrementing it.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %pM.
+
+What: /sys/bus/i2c/devices/<mcu_device>/front_button_mode
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) The front button on the Turris Omnia router can be
+ configured either to change the intensity of all the LEDs on the
+ front panel, or to send the press event to the CPU as an
+ interrupt.
+
+ This file switches between these two modes:
+ - ``mcu`` makes the button press event be handled by the MCU to
+ change the LEDs panel intensity.
+ - ``cpu`` makes the button press event be handled by the CPU.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/front_button_poweron
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) Newer versions of the microcontroller firmware of the
+ Turris Omnia router support powering off the router into true
+ low power mode. The router can be powered on by pressing the
+ front button.
+
+ This file configures whether front button power on is enabled.
+
+ This file is present only if the power off feature is supported
+ by the firmware.
+
+ Format: %i.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_features
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Newer versions of the microcontroller firmware report the
+ features they support. These can be read from this file. If the
+ MCU firmware is too old, this file reads 0x0.
+
+ Format: 0x%x.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_version_hash_application
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the version hash (commit hash) of the application
+ part of the microcontroller firmware.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_version_hash_bootloader
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the version hash (commit hash) of the bootloader
+ part of the microcontroller firmware.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/mcu_type
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the microcontroller type (STM32, GD32, MKL).
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/reset_selector
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the selected factory reset level, determined by
+ how long the rear reset button was held by the user during board
+ reset.
+
+ Format: %i.
+
+What: /sys/bus/i2c/devices/<mcu_device>/serial_number
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the 64-bit board serial number in hexadecimal
+ format.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %016X.
diff --git a/Documentation/ABI/testing/sysfs-bus-i3c b/Documentation/ABI/testing/sysfs-bus-i3c
new file mode 100644
index 000000000000..c812ab180ff4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i3c
@@ -0,0 +1,163 @@
+What: /sys/bus/i3c/devices/i3c-<bus-id>
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ An I3C bus. This directory will contain one sub-directory per
+ I3C device present on the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/current_master
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ Expose the master that owns the bus (<bus-id>-<master-pid>) at
+ the time this file is read. Note that bus ownership can change
+ overtime, so there's no guarantee that when the read() call
+ returns, the value returned is still valid.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/mode
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ I3C bus mode. Can be "pure", "mixed-fast" or "mixed-slow". See
+ the I3C specification for a detailed description of what each
+ of these modes implies.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/i3c_scl_frequency
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ The frequency (expressed in Hz) of the SCL signal when
+ operating in I3C SDR mode.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/i2c_scl_frequency
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ The frequency (expressed in Hz) of the SCL signal when
+ operating in I2C mode.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/dynamic_address
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ Dynamic address assigned to the master controller. This
+ address may change if the bus is re-initialized.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/bcr
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ BCR stands for Bus Characteristics Register and express the
+ device capabilities in term of speed, maximum read/write
+ length, etc. See the I3C specification for more details.
+ This entry describes the BCR of the master controller driving
+ the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/dcr
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ DCR stands for Device Characteristics Register and express the
+ device capabilities in term of exposed features. See the I3C
+ specification for more details.
+ This entry describes the DCR of the master controller driving
+ the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/pid
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ PID stands for Provisioned ID and is used to uniquely identify
+ a device on a bus. This PID contains information about the
+ vendor, the part and an instance ID so that several devices of
+ the same type can be connected on the same bus.
+ See the I3C specification for more details.
+ This entry describes the PID of the master controller driving
+ the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/hdrcap
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ Expose the HDR (High Data Rate) capabilities of a device.
+ Returns a list of supported HDR mode, each element is separated
+ by space. Modes can be "hdr-ddr", "hdr-tsp" and "hdr-tsl".
+ See the I3C specification for more details about these HDR
+ modes.
+
+ This entry describes the HDRCAP of the master controller
+ driving the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/hotjoin
+KernelVersion: 6.8
+Contact: linux-i3c@vger.kernel.org
+Description:
+ I3C’s Hot-Join mechanism allows an I3C Device to inform the
+ Active Controller that a newly-joined Target is present on the
+ I3C Bus and is ready to receive a Dynamic Address, in order to
+ become fully functional on the Bus. Hot-Join is used when the
+ Target is mounted on the same I3C bus and remains depowered
+ until needed or until the Target is physically inserted into the
+ I3C bus
+
+ This entry allows to enable or disable Hot-join of the Current
+ Controller driving the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ An I3C device present on I3C bus identified by <bus-id>. Note
+ that all devices are represented including the master driving
+ the bus.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>/dynamic_address
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ Dynamic address assigned to device <bus-id>-<device-pid>. This
+ address may change if the bus is re-initialized.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>/bcr
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ BCR stands for Bus Characteristics Register and express the
+ device capabilities in term of speed, maximum read/write
+ length, etc. See the I3C specification for more details.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>/dcr
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ DCR stands for Device Characteristics Register and express the
+ device capabilities in term of exposed features. See the I3C
+ specification for more details.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>/pid
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ PID stands for Provisioned ID and is used to uniquely identify
+ a device on a bus. This PID contains information about the
+ vendor, the part and an instance ID so that several devices of
+ the same type can be connected on the same bus.
+ See the I3C specification for more details.
+
+What: /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>/hdrcap
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ Expose the HDR (High Data Rate) capabilities of a device.
+ Returns a list of supported HDR mode, each element is separated
+ by space. Modes can be "hdr-ddr", "hdr-tsp" and "hdr-tsl".
+
+ See the I3C specification for more details about these HDR
+ modes.
+
+What: /sys/bus/i3c/devices/<bus-id>-<device-pid>
+KernelVersion: 5.0
+Contact: linux-i3c@vger.kernel.org
+Description:
+ These directories are just symbolic links to
+ /sys/bus/i3c/devices/i3c-<bus-id>/<bus-id>-<device-pid>.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 8127a08e366d..89b4740dcfa1 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -15,6 +15,7 @@ Description:
based on hardware generated events (e.g. data ready) or
provided by a separate driver for other hardware (e.g.
periodic timer, GPIO or high resolution timer).
+
Contains trigger type specific elements. These do not
generalize well and hence are not documented in this file.
X is the IIO index of the trigger.
@@ -32,6 +33,57 @@ Description:
Description of the physical chip / device for device X.
Typically a part number.
+What: /sys/bus/iio/devices/iio:deviceX/label
+KernelVersion: 5.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Optional symbolic label for a device.
+ This is useful for userspace to be able to better identify an
+ individual device.
+
+ The contents of the label are free-form, but there are some
+ standardized uses:
+
+ For proximity sensors which give the proximity (of a person) to
+ a certain wlan or wwan antenna the following standardized labels
+ are used:
+
+ * "proximity-wifi"
+ * "proximity-lte"
+ * "proximity-wifi-lte"
+ * "proximity-wifi-left"
+ * "proximity-wifi-right"
+
+ These are used to indicate to userspace that these proximity
+ sensors may be used to tune transmit power to ensure that
+ Specific Absorption Rate (SAR) limits are honored.
+ The "-left" and "-right" labels are for devices with multiple
+ antennas.
+
+ In some laptops/tablets the standardized proximity sensor labels
+ instead indicate proximity to a specific part of the device:
+
+ * "proximity-palmrest" indicates proximity to the keyboard's palmrest
+ * "proximity-palmrest-left" indicates proximity to the left part of the palmrest
+ * "proximity-palmrest-right" indicates proximity to the right part of the palmrest
+ * "proximity-lap" indicates the device is being used on someone's lap
+
+ Note "proximity-lap" is special in that its value may be
+ calculated by firmware from other sensor readings, rather then
+ being a raw sensor reading.
+
+ For accelerometers used in 2-in-1s with 360° (yoga-style) hinges,
+ which have an accelerometer in both their base and their display,
+ the following standardized labels are used:
+
+ * "accel-base"
+ * "accel-display"
+
+ For devices where an accelerometer is housed in the swivel camera subassembly
+ (for AR application), the following standardized label is used:
+
+ * "accel-camera"
+
What: /sys/bus/iio/devices/iio:deviceX/current_timestamp_clock
KernelVersion: 4.5
Contact: linux-iio@vger.kernel.org
@@ -40,7 +92,9 @@ Description:
buffered samples and events for device X.
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/buffer/sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/events/sampling_frequency
What: /sys/bus/iio/devices/triggerX/sampling_frequency
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
@@ -49,20 +103,28 @@ Description:
resulting sampling frequency. In many devices this
parameter has an effect on input filters etc. rather than
simply controlling when the input is sampled. As this
- effects data ready triggers, hardware buffers and the sysfs
+ affects data ready triggers, hardware buffers and the sysfs
direct access interfaces, it may be found in any of the
- relevant directories. If it effects all of the above
+ relevant directories. If it affects all of the above
then it is to be found in the base device directory.
+ The stm32-timer-trigger has the additional characteristic that
+ a sampling_frequency of 0 is defined to stop sampling.
+
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency_available
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency_available
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_sampling_frequency_available
What: /sys/.../iio:deviceX/buffer/sampling_frequency_available
What: /sys/bus/iio/devices/triggerX/sampling_frequency_available
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
- When the internal sampling clock can only take a small
- discrete set of values, this file lists those available.
+ When the internal sampling clock can only take a specific set of
+ frequencies, we can specify the available values with:
+
+ - a small discrete set of values like "0 2 4 6 8"
+ - a range with minimum, step and maximum frequencies like
+ "[min step max]"
What: /sys/bus/iio/devices/iio:deviceX/oversampling_ratio
KernelVersion: 2.6.38
@@ -79,8 +141,6 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_raw
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -107,19 +167,18 @@ Description:
is required is a consistent labeling. Units after application
of scale and offset are millivolts.
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
-KernelVersion: 3.17
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_rms_raw
+KernelVersion: 6.18
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no bias removal etc.) current measurement from
- channel Y. In special cases where the channel does not
- correspond to externally available input one of the named
- versions may be used. The number must always be specified and
- unique to allow association with event codes. Units after
- application of scale and offset are milliamps.
+ Raw (unscaled) Root Mean Square (RMS) voltage measurement from
+ channel Y. Units after application of scale and offset are
+ millivolts.
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_active_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_reactive_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_apparent_raw
KernelVersion: 4.5
Contact: linux-iio@vger.kernel.org
Description:
@@ -128,6 +187,13 @@ Description:
unique to allow association with event codes. Units after
application of scale and offset are milliwatts.
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_powerfactor
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Power factor measurement from channel Y. Power factor is the
+ ratio of active power to apparent power. The value is unitless.
+
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_raw
KernelVersion: 3.2
Contact: linux-iio@vger.kernel.org
@@ -135,7 +201,7 @@ Description:
Raw capacitance measurement from channel Y. Units after
application of scale and offset are nanofarads.
-What: /sys/.../iio:deviceX/in_capacitanceY-in_capacitanceZ_raw
+What: /sys/.../iio:deviceX/in_capacitanceY-capacitanceZ_raw
KernelVersion: 3.2
Contact: linux-iio@vger.kernel.org
Description:
@@ -146,8 +212,27 @@ Description:
is required is a consistent labeling. Units after application
of scale and offset are nanofarads.
+What: /sys/.../iio:deviceX/in_capacitanceY-capacitanceZ_zeropoint
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ For differential channels, this an offset that is applied
+ equally to both inputs. As the reading is of the difference
+ between the two inputs, this should not be applied to the _raw
+ reading by userspace (unlike _offset) and unlike calibbias
+ it does not affect the differential value measured because
+ the effect of _zeropoint cancels out across the two inputs
+ that make up the differential pair. It's purpose is to bring
+ the individual signals, before the differential is measured,
+ within the measurement range of the device. The naming is
+ chosen because if the separate inputs that make the
+ differential pair are drawn on a graph in their
+ _raw units, this is the value that the zero point on the
+ measurement axis represents. It is expressed with the
+ same scaling as _raw.
+
What: /sys/bus/iio/devices/iio:deviceX/in_temp_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_x_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_y_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_ambient_raw
@@ -163,7 +248,8 @@ Description:
less measurements. Units after application of scale and offset
are milli degrees Celsius.
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_input
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_input
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_input
KernelVersion: 2.6.38
Contact: linux-iio@vger.kernel.org
Description:
@@ -180,6 +266,15 @@ Description:
Has all of the equivalent parameters as per voltageY. Units
after application of scale and offset are m/s^2.
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_linear_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_linear_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_linear_z_raw
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ As per in_accel_X_raw attributes, but minus the
+ acceleration due to gravity.
+
What: /sys/bus/iio/devices/iio:deviceX/in_gravity_x_raw
What: /sys/bus/iio/devices/iio:deviceX/in_gravity_y_raw
What: /sys/bus/iio/devices/iio:deviceX/in_gravity_z_raw
@@ -190,7 +285,37 @@ Description:
but should match other such assignments on device).
Units after application of scale and offset are m/s^2.
+What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_z_raw
+KernelVersion: 6.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ Angular displacement between two consecutive samples on x, y or
+ z (may be arbitrarily assigned but should match other such
+ assignments on device).
+ In order to compute the total angular displacement during a
+ desired period of time, the application should sum-up the delta
+ angle samples acquired during that time.
+ Units after application of scale and offset are radians.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_z_raw
+KernelVersion: 6.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ The linear velocity change between two consecutive samples on x,
+ y or z (may be arbitrarily assigned but should match other such
+ assignments on device).
+ In order to compute the total linear velocity change during a
+ desired period of time, the application should sum-up the delta
+ velocity samples acquired during that time.
+ Units after application of scale and offset are meters per
+ second.
+
What: /sys/bus/iio/devices/iio:deviceX/in_angl_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_anglY_raw
KernelVersion: 4.17
Contact: linux-iio@vger.kernel.org
Description:
@@ -243,10 +368,21 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_peak_raw
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_peak_raw
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_peak_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_peak_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_peak_raw
KernelVersion: 2.6.36
Contact: linux-iio@vger.kernel.org
Description:
- Highest value since some reset condition. These
+ Highest value since some reset condition. These
+ attributes allow access to this and are otherwise
+ the direct equivalent of the <type>Y[_name]_raw attributes.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_trough_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_trough_raw
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lowest value since some reset condition. These
attributes allow access to this and are otherwise
the direct equivalent of the <type>Y[_name]_raw attributes.
@@ -285,11 +421,11 @@ Contact: linux-iio@vger.kernel.org
Description:
Scaled humidity measurement in milli percent.
-What: /sys/bus/iio/devices/iio:deviceX/in_X_mean_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_Y_mean_raw
KernelVersion: 3.5
Contact: linux-iio@vger.kernel.org
Description:
- Averaged raw measurement from channel X. The number of values
+ Averaged raw measurement from channel Y. The number of values
used for averaging is device specific. The converting rules for
normal raw values also applies to the averaged raw values.
@@ -297,18 +433,14 @@ What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_q_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_current_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_current_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_current_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
@@ -317,6 +449,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_offset
What: /sys/bus/iio/devices/iio:deviceX/in_magn_offset
What: /sys/bus/iio/devices/iio:deviceX/in_rot_offset
What: /sys/bus/iio/devices/iio:deviceX/in_angl_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_offset
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -335,21 +468,15 @@ Description:
to the _raw output.
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_scale
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
What: /sys/bus/iio/devices/iio:deviceX/in_current_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_current_i_scale
What: /sys/bus/iio/devices/iio:deviceX/in_current_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_scale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
@@ -370,7 +497,16 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_scale
What: /sys/bus/iio/devices/iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_scale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_scale
What: /sys/bus/iio/devices/iio:deviceX/in_countY_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_scale
What: /sys/bus/iio/devices/iio:deviceX/in_angl_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_z_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_scale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -385,37 +521,98 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
-What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
-What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
Hardware applied calibration offset (assumed to fix production
inaccuracies).
+ icm42600: For this device values are real physical offsets
+ expressed in SI units (m/s^2 for accelerometers and rad/s
+ for gyroscope)/
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
+KernelVersion: 5.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available values of calibbias. Maybe expressed as either of:
-What /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
-What /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
-what /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
-what /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
+ - a small discrete set of values like "0 2 4 6 8"
+ - a range specified as "[min step max]"
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay
+KernelVersion: 6.17
+Contact: linux-iio@vger.kernel.org
+Description:
+ Delay of start of conversion from common reference point shared
+ by all channels. Can be writable when used to compensate for
+ delay variation introduced by external filters feeding a
+ simultaneous sampling ADC.
+
+ E.g., for the ad7606 ADC series, this value is intended as a
+ configurable time delay in seconds, to correct delay introduced
+ by an optional external filtering circuit.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay_available
+KernelVersion: 6.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available values of convdelay. Maybe expressed as:
+
+ - a range specified as "[min step max]"
+
+ If shared across all channels, <type>_convdelay_available
+ is used.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -423,6 +620,20 @@ Description:
production inaccuracies). If shared across all channels,
<type>_calibscale is used.
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
+KernelVersion: 4.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available values of calibscale. Maybe expressed as either of:
+
+ - a small discrete set of values like "1 8 16"
+ - a range specified as "[min step max]"
+
+ If shared across all channels, <type>_calibscale_available is used.
+
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
@@ -467,10 +678,10 @@ What: /sys/.../iio:deviceX/in_magn_scale_available
What: /sys/.../iio:deviceX/in_illuminance_scale_available
What: /sys/.../iio:deviceX/in_intensity_scale_available
What: /sys/.../iio:deviceX/in_proximity_scale_available
-What: /sys/.../iio:deviceX/in_voltageX_scale_available
+What: /sys/.../iio:deviceX/in_voltageY_scale_available
What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available
-What: /sys/.../iio:deviceX/out_voltageX_scale_available
-What: /sys/.../iio:deviceX/out_altvoltageX_scale_available
+What: /sys/.../iio:deviceX/out_voltageY_scale_available
+What: /sys/.../iio:deviceX/out_altvoltageY_scale_available
What: /sys/.../iio:deviceX/in_capacitance_scale_available
What: /sys/.../iio:deviceX/in_pressure_scale_available
What: /sys/.../iio:deviceX/in_pressureY_scale_available
@@ -478,19 +689,30 @@ KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
If a discrete set of scale values is available, they
- are listed in this attribute.
+ are listed in this attribute. Unlike illumination,
+ multiplying intensity by intensity_scale does not
+ yield value with any standardized unit.
-What /sys/bus/iio/devices/iio:deviceX/out_voltageY_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_clear_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_hardwaregain
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
Hardware applied gain factor. If shared across all channels,
<type>_hardwaregain is used.
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_hardwaregain_available
+KernelVersion: 5.10
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available hardware applied gain factors. Shared across all
+ channels.
+
What: /sys/.../in_accel_filter_low_pass_3db_frequency
What: /sys/.../in_magn_filter_low_pass_3db_frequency
What: /sys/.../in_anglvel_filter_low_pass_3db_frequency
@@ -545,7 +767,10 @@ Description:
1kohm_to_gnd: connected to ground via an 1kOhm resistor,
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
+ 7.7kohm_to_gnd: connected to ground via a 7.7kOhm resistor,
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
+ 32kohm_to_gnd: connected to ground via a 32kOhm resistor,
+ 42kohm_to_gnd: connected to ground via a 42kOhm resistor,
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
@@ -586,8 +811,31 @@ Description:
Output frequency for channel Y in Hz. The number must always be
specified and unique if the output corresponds to a single
channel.
-
+ Some drivers have additional constraints:
+ ADF4371 has an integrated VCO with fundamendal output
+ frequency ranging from 4000000000 Hz 8000000000 Hz.
+
+ out_altvoltage0_frequency:
+ A divide by 1, 2, 4, 8, 16, 32 or circuit generates
+ frequencies from 62500000 Hz to 8000000000 Hz.
+ out_altvoltage1_frequency:
+ This channel duplicates the channel 0 frequency
+ out_altvoltage2_frequency:
+ A frequency doubler generates frequencies from
+ 8000000000 Hz to 16000000000 Hz.
+ out_altvoltage3_frequency:
+ A frequency quadrupler generates frequencies from
+ 16000000000 Hz to 32000000000 Hz.
+
+ Note: writes to one of the channels will affect the frequency of
+ all the other channels, since it involves changing the VCO
+ fundamental output frequency.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_phase
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_phase
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_phase
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_i_phase
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_q_phase
KernelVersion: 3.4.0
Contact: linux-iio@vger.kernel.org
Description:
@@ -597,6 +845,17 @@ Description:
specified and unique if the output corresponds to a single
channel.
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_raw
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set/get output current for channel Y. Units after application
+ of scale and offset are milliamps.
+ For some devices current channels are used to specify
+ current supplied to elements used in taking a measurement
+ of a different type. E.g. LED currents.
+
What: /sys/bus/iio/devices/iio:deviceX/events
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
@@ -637,6 +896,8 @@ What: /sys/.../iio:deviceX/events/in_voltageY_thresh_falling_en
What: /sys/.../iio:deviceX/events/in_voltageY_thresh_either_en
What: /sys/.../iio:deviceX/events/in_tempY_thresh_rising_en
What: /sys/.../iio:deviceX/events/in_tempY_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_capacitanceY_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_capacitanceY_thresh_falling_en
KernelVersion: 2.6.37
Contact: linux-iio@vger.kernel.org
Description:
@@ -649,6 +910,7 @@ Description:
<type>[Y][_name]_<raw|input>_thresh_falling_value may take
different values, but the device can only enable both thresholds
or neither.
+
Note the driver will assume the last p events requested are
to be enabled where p is how many it supports (which may vary
depending on the exact set requested. So if you want to be
@@ -703,6 +965,7 @@ Description:
<type>[Y][_name]_<raw|input>_roc_falling_value may take
different values, but the device can only enable both rate of
change thresholds or neither.
+
Note the driver will assume the last p events requested are
to be enabled where p is however many it supports (which may
vary depending on the exact set requested. So if you want to be
@@ -712,6 +975,32 @@ Description:
a given event type is enabled a future point (and not those for
whatever event was previously enabled).
+What: /sys/.../events/in_capacitanceY_adaptive_thresh_rising_en
+What: /sys/.../events/in_capacitanceY_adaptive_thresh_falling_en
+KernelVersion: 5.13
+Contact: linux-iio@vger.kernel.org
+Description:
+ Adaptive thresholds are similar to normal fixed thresholds
+ but the value is expressed as an offset from a value which
+ provides a low frequency approximation of the channel itself.
+ Thus these detect if a rapid change occurs in the specified
+ direction which crosses tracking value + offset.
+ Tracking value calculation is devices specific.
+
+What: /sys/.../in_capacitanceY_adaptive_thresh_rising_timeout
+What: /sys/.../in_capacitanceY_adaptive_thresh_falling_timeout
+KernelVersion: 5.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ When adaptive thresholds are used, the tracking signal
+ may adjust too slowly to step changes in the raw signal.
+ Thus these specify the time in seconds for which the
+ difference between the slow tracking signal and the raw
+ signal is allowed to remain out-of-range before a reset
+ event occurs in which the tracking signal is made equal
+ to the raw signal, allowing slow tracking to resume and the
+ adaptive threshold event detection to function as expected.
+
What: /sys/.../events/in_accel_thresh_rising_value
What: /sys/.../events/in_accel_thresh_falling_value
What: /sys/.../events/in_accel_x_raw_thresh_rising_value
@@ -747,18 +1036,26 @@ What: /sys/.../events/in_voltageY_raw_thresh_falling_value
What: /sys/.../events/in_tempY_raw_thresh_rising_value
What: /sys/.../events/in_tempY_raw_thresh_falling_value
What: /sys/.../events/in_illuminance0_thresh_falling_value
-what: /sys/.../events/in_illuminance0_thresh_rising_value
-what: /sys/.../events/in_proximity0_thresh_falling_value
-what: /sys/.../events/in_proximity0_thresh_rising_value
+What: /sys/.../events/in_illuminance0_thresh_rising_value
+What: /sys/.../events/in_proximity0_thresh_falling_value
+What: /sys/.../events/in_proximity0_thresh_rising_value
+What: /sys/.../events/in_illuminance_thresh_rising_value
+What: /sys/.../events/in_illuminance_thresh_falling_value
+What: /sys/.../events/in_capacitanceY_thresh_rising_value
+What: /sys/.../events/in_capacitanceY_thresh_falling_value
+What: /sys/.../events/in_capacitanceY_thresh_adaptive_rising_value
+What: /sys/.../events/in_capacitanceY_thresh_falling_rising_value
KernelVersion: 2.6.37
Contact: linux-iio@vger.kernel.org
Description:
Specifies the value of threshold that the device is comparing
against for the events enabled by
<type>Y[_name]_thresh[_rising|falling]_en.
+
If separate attributes exist for the two directions, but
direction is not specified for this attribute, then a single
threshold value applies to both directions.
+
The raw or input element of the name indicates whether the
value is in raw device units or in processed units (as _raw
and _input do on sysfs direct channel read attributes).
@@ -827,11 +1124,11 @@ What: /sys/.../events/in_tempY_thresh_rising_hysteresis
What: /sys/.../events/in_tempY_thresh_falling_hysteresis
What: /sys/.../events/in_tempY_thresh_either_hysteresis
What: /sys/.../events/in_illuminance0_thresh_falling_hysteresis
-what: /sys/.../events/in_illuminance0_thresh_rising_hysteresis
-what: /sys/.../events/in_illuminance0_thresh_either_hysteresis
-what: /sys/.../events/in_proximity0_thresh_falling_hysteresis
-what: /sys/.../events/in_proximity0_thresh_rising_hysteresis
-what: /sys/.../events/in_proximity0_thresh_either_hysteresis
+What: /sys/.../events/in_illuminance0_thresh_rising_hysteresis
+What: /sys/.../events/in_illuminance0_thresh_either_hysteresis
+What: /sys/.../events/in_proximity0_thresh_falling_hysteresis
+What: /sys/.../events/in_proximity0_thresh_rising_hysteresis
+What: /sys/.../events/in_proximity0_thresh_either_hysteresis
KernelVersion: 3.13
Contact: linux-iio@vger.kernel.org
Description:
@@ -841,6 +1138,7 @@ Description:
If separate attributes exist for the two directions, but
direction is not specified for this attribute, then a single
hysteresis value applies to both directions.
+
For falling events the hysteresis is added to the _value attribute for
this event to get the upper threshold for when the event goes back to
normal, for rising events the hysteresis is subtracted from the _value
@@ -887,6 +1185,7 @@ Description:
Specifies the value of rate of change threshold that the
device is comparing against for the events enabled by
<type>[Y][_name]_roc[_rising|falling]_en.
+
If separate attributes exist for the two directions,
but direction is not specified for this attribute,
then a single threshold value applies to both directions.
@@ -969,6 +1268,7 @@ What: /sys/.../events/in_activity_jogging_thresh_rising_period
What: /sys/.../events/in_activity_jogging_thresh_falling_period
What: /sys/.../events/in_activity_running_thresh_rising_period
What: /sys/.../events/in_activity_running_thresh_falling_period
+What: /sys/.../events/in_illuminance_thresh_either_period
KernelVersion: 2.6.37
Contact: linux-iio@vger.kernel.org
Description:
@@ -1007,7 +1307,7 @@ What: /sys/.../events/in_activity_running_thresh_falling_en
KernelVersion: 3.19
Contact: linux-iio@vger.kernel.org
Description:
- Enables or disables activitity events. Depending on direction
+ Enables or disables activity events. Depending on direction
an event is generated when sensor ENTERS or LEAVES a given state.
What: /sys/.../events/in_activity_still_thresh_rising_value
@@ -1062,6 +1362,32 @@ Description:
number or direction is not specified, applies to all channels of
this type.
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_en
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Similar to in_accel_mag[_y][_rising|_falling]_en, but the event
+ value is relative to a reference magnitude. The reference magnitude
+ includes the graviational acceleration.
+
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_value
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_value
+What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_value
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_value
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_value
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_value
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value to which the reference magnitude of the channel is
+ compared. If the axis is not specified, it applies to all channels
+ of this type.
+
What: /sys/.../events/in_steps_change_en
KernelVersion: 4.0
Contact: linux-iio@vger.kernel.org
@@ -1088,75 +1414,82 @@ Description:
The name of the trigger source being used, as per string given
in /sys/class/iio/triggerY/name.
-What: /sys/bus/iio/devices/iio:deviceX/buffer/length
-KernelVersion: 2.6.35
+What: /sys/bus/iio/devices/iio:deviceX/bufferY/length
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
Number of scans contained by the buffer.
-What: /sys/bus/iio/devices/iio:deviceX/buffer/enable
-KernelVersion: 2.6.35
+What: /sys/bus/iio/devices/iio:deviceX/bufferY/enable
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
Actually start the buffer capture up. Will start trigger
if first device and appropriate.
-What: /sys/bus/iio/devices/iio:deviceX/scan_elements
-KernelVersion: 2.6.37
+ Note that it might be impossible to configure other attributes,
+ (e.g.: events, scale, sampling rate) if they impact the currently
+ active buffer capture session.
+
+What: /sys/bus/iio/devices/iio:deviceX/bufferY
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
Directory containing interfaces for elements that will be
captured for a single triggered sample set in the buffer.
-What: /sys/.../iio:deviceX/scan_elements/in_accel_x_en
-What: /sys/.../iio:deviceX/scan_elements/in_accel_y_en
-What: /sys/.../iio:deviceX/scan_elements/in_accel_z_en
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_en
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_en
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_en
-What: /sys/.../iio:deviceX/scan_elements/in_magn_x_en
-What: /sys/.../iio:deviceX/scan_elements/in_magn_y_en
-What: /sys/.../iio:deviceX/scan_elements/in_magn_z_en
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_en
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_en
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_en
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_en
-What: /sys/.../iio:deviceX/scan_elements/in_timestamp_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_en
-What: /sys/.../iio:deviceX/scan_elements/in_incli_x_en
-What: /sys/.../iio:deviceX/scan_elements/in_incli_y_en
-What: /sys/.../iio:deviceX/scan_elements/in_pressureY_en
-What: /sys/.../iio:deviceX/scan_elements/in_pressure_en
-What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_en
-What: /sys/.../iio:deviceX/scan_elements/in_proximity_en
-KernelVersion: 2.6.37
+ Since kernel 5.11 the scan_elements attributes are merged into
+ the bufferY directory, to be configurable per buffer.
+
+What: /sys/.../iio:deviceX/bufferY/in_accel_x_en
+What: /sys/.../iio:deviceX/bufferY/in_accel_y_en
+What: /sys/.../iio:deviceX/bufferY/in_accel_z_en
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_x_en
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_y_en
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_z_en
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_x_en
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_y_en
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_z_en
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_x_en
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_y_en
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_z_en
+What: /sys/.../iio:deviceX/bufferY/in_magn_x_en
+What: /sys/.../iio:deviceX/bufferY/in_magn_y_en
+What: /sys/.../iio:deviceX/bufferY/in_magn_z_en
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_magnetic_en
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_true_en
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_magnetic_tilt_comp_en
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_true_tilt_comp_en
+What: /sys/.../iio:deviceX/bufferY/in_timestamp_en
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_en
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_en
+What: /sys/.../iio:deviceX/bufferY/in_voltageY-voltageZ_en
+What: /sys/.../iio:deviceX/bufferY/in_incli_x_en
+What: /sys/.../iio:deviceX/bufferY/in_incli_y_en
+What: /sys/.../iio:deviceX/bufferY/in_pressureY_en
+What: /sys/.../iio:deviceX/bufferY/in_pressure_en
+What: /sys/.../iio:deviceX/bufferY/in_rot_quaternion_en
+What: /sys/.../iio:deviceX/bufferY/in_proximity_en
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
Scan element control for triggered data capture.
-What: /sys/.../iio:deviceX/scan_elements/in_accel_type
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_type
-What: /sys/.../iio:deviceX/scan_elements/in_magn_type
-What: /sys/.../iio:deviceX/scan_elements/in_incli_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_type
-What: /sys/.../iio:deviceX/scan_elements/in_timestamp_type
-What: /sys/.../iio:deviceX/scan_elements/in_pressureY_type
-What: /sys/.../iio:deviceX/scan_elements/in_pressure_type
-What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_type
-What: /sys/.../iio:deviceX/scan_elements/in_proximity_type
-KernelVersion: 2.6.37
+What: /sys/.../iio:deviceX/bufferY/in_accel_type
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_type
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_type
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_type
+What: /sys/.../iio:deviceX/bufferY/in_magn_type
+What: /sys/.../iio:deviceX/bufferY/in_incli_type
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_type
+What: /sys/.../iio:deviceX/bufferY/in_voltage_type
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_type
+What: /sys/.../iio:deviceX/bufferY/in_timestamp_type
+What: /sys/.../iio:deviceX/bufferY/in_pressureY_type
+What: /sys/.../iio:deviceX/bufferY/in_pressure_type
+What: /sys/.../iio:deviceX/bufferY/in_rot_quaternion_type
+What: /sys/.../iio:deviceX/bufferY/in_proximity_type
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
Description of the scan element data storage within the buffer
@@ -1186,33 +1519,35 @@ Description:
If the type parameter can take one of a small set of values,
this attribute lists them.
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_index
-What: /sys/.../iio:deviceX/scan_elements/in_accel_x_index
-What: /sys/.../iio:deviceX/scan_elements/in_accel_y_index
-What: /sys/.../iio:deviceX/scan_elements/in_accel_z_index
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_index
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_index
-What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_index
-What: /sys/.../iio:deviceX/scan_elements/in_magn_x_index
-What: /sys/.../iio:deviceX/scan_elements/in_magn_y_index
-What: /sys/.../iio:deviceX/scan_elements/in_magn_z_index
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_index
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_index
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_index
-What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_index
-What: /sys/.../iio:deviceX/scan_elements/in_incli_x_index
-What: /sys/.../iio:deviceX/scan_elements/in_incli_y_index
-What: /sys/.../iio:deviceX/scan_elements/in_timestamp_index
-What: /sys/.../iio:deviceX/scan_elements/in_pressureY_index
-What: /sys/.../iio:deviceX/scan_elements/in_pressure_index
-What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_index
-What: /sys/.../iio:deviceX/scan_elements/in_proximity_index
-KernelVersion: 2.6.37
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_index
+What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_index
+What: /sys/.../iio:deviceX/bufferY/in_accel_x_index
+What: /sys/.../iio:deviceX/bufferY/in_accel_y_index
+What: /sys/.../iio:deviceX/bufferY/in_accel_z_index
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_x_index
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_y_index
+What: /sys/.../iio:deviceX/bufferY/in_deltaangl_z_index
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_x_index
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_y_index
+What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_z_index
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_x_index
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_y_index
+What: /sys/.../iio:deviceX/bufferY/in_anglvel_z_index
+What: /sys/.../iio:deviceX/bufferY/in_magn_x_index
+What: /sys/.../iio:deviceX/bufferY/in_magn_y_index
+What: /sys/.../iio:deviceX/bufferY/in_magn_z_index
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_magnetic_index
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_true_index
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_magnetic_tilt_comp_index
+What: /sys/.../iio:deviceX/bufferY/in_rot_from_north_true_tilt_comp_index
+What: /sys/.../iio:deviceX/bufferY/in_incli_x_index
+What: /sys/.../iio:deviceX/bufferY/in_incli_y_index
+What: /sys/.../iio:deviceX/bufferY/in_timestamp_index
+What: /sys/.../iio:deviceX/bufferY/in_pressureY_index
+What: /sys/.../iio:deviceX/bufferY/in_pressure_index
+What: /sys/.../iio:deviceX/bufferY/in_rot_quaternion_index
+What: /sys/.../iio:deviceX/bufferY/in_proximity_index
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
A single positive integer specifying the position of this
@@ -1240,7 +1575,7 @@ Description:
This attribute is used to read the amount of quadrature error
present in the device at a given time.
-What: /sys/.../iio:deviceX/in_accelX_power_mode
+What: /sys/.../iio:deviceX/in_accelY_power_mode
KernelVersion: 3.11
Contact: linux-iio@vger.kernel.org
Description:
@@ -1252,6 +1587,9 @@ Description:
What: /sys/.../iio:deviceX/in_energy_input
What: /sys/.../iio:deviceX/in_energy_raw
+What: /sys/.../iio:deviceX/in_energyY_active_raw
+What: /sys/.../iio:deviceX/in_energyY_reactive_raw
+What: /sys/.../iio:deviceX/in_energyY_apparent_raw
KernelVersion: 4.0
Contact: linux-iio@vger.kernel.org
Description:
@@ -1285,6 +1623,7 @@ Description:
Proximity measurement indicating that some
object is near the sensor, usually by observing
reflectivity of infrared or ultrasound emitted.
+
Often these sensors are unit less and as such conversion
to SI units is not possible. Higher proximity measurements
indicate closer objects, and vice versa. Units after
@@ -1307,7 +1646,13 @@ What: /sys/.../iio:deviceX/in_intensityY_raw
What: /sys/.../iio:deviceX/in_intensityY_ir_raw
What: /sys/.../iio:deviceX/in_intensityY_both_raw
What: /sys/.../iio:deviceX/in_intensityY_uv_raw
+What: /sys/.../iio:deviceX/in_intensityY_uva_raw
+What: /sys/.../iio:deviceX/in_intensityY_uvb_raw
What: /sys/.../iio:deviceX/in_intensityY_duv_raw
+What: /sys/.../iio:deviceX/in_intensity_red_raw
+What: /sys/.../iio:deviceX/in_intensity_green_raw
+What: /sys/.../iio:deviceX/in_intensity_blue_raw
+What: /sys/.../iio:deviceX/in_intensity_clear_raw
KernelVersion: 3.4
Contact: linux-iio@vger.kernel.org
Description:
@@ -1315,8 +1660,9 @@ Description:
that measurements contain visible and infrared light
components or just infrared light, respectively. Modifier
uv indicates that measurements contain ultraviolet light
- components. Modifier duv indicates that measurements
- contain deep ultraviolet light components.
+ components. Modifiers uva, uvb and duv indicate that
+ measurements contain A, B or deep (C) ultraviolet light
+ components respectively.
What: /sys/.../iio:deviceX/in_uvindex_input
KernelVersion: 4.6
@@ -1327,6 +1673,7 @@ Description:
standardised CIE Erythemal Action Spectrum. UV index values range
from 0 (low) to >=11 (extreme).
+What: /sys/.../iio:deviceX/in_intensity_integration_time
What: /sys/.../iio:deviceX/in_intensity_red_integration_time
What: /sys/.../iio:deviceX/in_intensity_green_integration_time
What: /sys/.../iio:deviceX/in_intensity_blue_integration_time
@@ -1336,7 +1683,8 @@ KernelVersion: 3.12
Contact: linux-iio@vger.kernel.org
Description:
This attribute is used to get/set the integration time in
- seconds.
+ seconds. If shared across all channels of a given type,
+ <type>_integration_time is used.
What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_integration_time
KernelVersion: 4.0
@@ -1363,22 +1711,31 @@ Description:
Raw value of rotation from true/magnetic north measured with
or without compensation from tilt sensors.
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
-KernelVersion: 3.18
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
+KernelVersion: 3.17
Contact: linux-iio@vger.kernel.org
Description:
- Raw current measurement from channel X. Units are in milliamps
+ Raw current measurement from channel Y. Units are in milliamps
after application of scale and offset. If no offset or scale is
present, output should be considered as processed with the
- unit in milliamps.
+ unit in milliamps. In special cases where the channel does not
+ correspond to externally available input one of the named
+ versions may be used.
Channels with 'i' and 'q' modifiers always exist in pairs and both
channels refer to the same signal. The 'i' channel contains the in-phase
component of the signal while the 'q' channel contains the quadrature
component.
+What: /sys/bus/iio/devices/iio:deviceX/in_altcurrentY_rms_raw
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled no bias removal etc.) Root Mean Square (RMS) current
+ measurement from channel Y. Units after application of scale and
+ offset are milliamps.
+
What: /sys/.../iio:deviceX/in_energy_en
What: /sys/.../iio:deviceX/in_distance_en
What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en
@@ -1422,23 +1779,26 @@ Description:
Specifies number of seconds in which we compute the steps
that occur in order to decide if the consumer is making steps.
-What: /sys/bus/iio/devices/iio:deviceX/buffer/watermark
-KernelVersion: 4.2
+What: /sys/bus/iio/devices/iio:deviceX/bufferY/watermark
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
A single positive integer specifying the maximum number of scan
elements to wait for.
+
Poll will block until the watermark is reached.
+
Blocking read will wait until the minimum between the requested
read amount or the low water mark is available.
+
Non-blocking read will retrieve the available samples from the
buffer even if there are less samples then watermark level. This
allows the application to block on poll with a timeout and read
the available samples after the timeout expires and thus have a
maximum delay guarantee.
-What: /sys/bus/iio/devices/iio:deviceX/buffer/data_available
-KernelVersion: 4.16
+What: /sys/bus/iio/devices/iio:deviceX/bufferY/data_available
+KernelVersion: 5.11
Contact: linux-iio@vger.kernel.org
Description:
A read-only value indicating the bytes of data available in the
@@ -1459,11 +1819,13 @@ Description:
device settings allows it (e.g. if a trigger is set that samples
data differently that the hardware fifo does then hardware fifo
will not enabled).
+
If the hardware fifo is enabled and the level of the hardware
fifo reaches the hardware fifo watermark level the device will
flush its hardware fifo to the device buffer. Doing a non
blocking read on the device when no samples are present in the
device buffer will also force a flush.
+
When the hardware fifo is enabled there is no need to use a
trigger to use buffer mode since the watermark settings
guarantees that the hardware fifo is flushed to the device
@@ -1501,6 +1863,7 @@ Description:
A single positive integer specifying the minimum watermark level
for the hardware fifo of this device. If the device does not
have a hardware fifo this entry is not present.
+
If the user sets buffer/watermark to a value less than this one,
then the hardware watermark will remain unset.
@@ -1511,6 +1874,7 @@ Description:
A single positive integer specifying the maximum watermark level
for the hardware fifo of this device. If the device does not
have a hardware fifo this entry is not present.
+
If the user sets buffer/watermark to a value greater than this
one, then the hardware watermark will be capped at this value.
@@ -1522,15 +1886,16 @@ Description:
levels for the hardware fifo. This entry is optional and if it
is not present it means that all the values between
hwfifo_watermark_min and hwfifo_watermark_max are supported.
+
If the user sets buffer/watermark to a value greater than
hwfifo_watermak_min but not equal to any of the values in this
list, the driver will chose an appropriate value for the
hardware fifo watermark level.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibemissivity
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_calibemissivity
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibemissivity
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibemissivity
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibemissivity
KernelVersion: 4.1
Contact: linux-iio@vger.kernel.org
Description:
@@ -1551,32 +1916,40 @@ Description:
is considered as one sample for <type>[_name]_sampling_frequency.
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_co2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_ethanol_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_h2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_o2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_voc_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no offset etc.) percentage reading of a substance.
+ Raw (unscaled no offset etc.) reading of a substance. Units
+ after application of scale and offset are percents.
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_resistanceY_raw
What: /sys/bus/iio/devices/iio:deviceX/out_resistance_raw
-What: /sys/bus/iio/devices/iio:deviceX/out_resistanceX_raw
+What: /sys/bus/iio/devices/iio:deviceX/out_resistanceY_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no offset etc.) resistance reading that can be processed
- into an ohm value.
+ Raw (unscaled no offset etc.) resistance reading.
+ Units after application of scale and offset are ohms.
What: /sys/bus/iio/devices/iio:deviceX/heater_enable
KernelVersion: 4.1.0
Contact: linux-iio@vger.kernel.org
Description:
'1' (enable) or '0' (disable) specifying the enable
- of heater function. Same reading values apply
+ of heater function. Same reading values apply.
+
This ABI is especially applicable for humidity sensors
to heatup the device and get rid of any condensation
in some humidity environment
@@ -1599,17 +1972,21 @@ Description:
Mounting matrix for IIO sensors. This is a rotation matrix which
informs userspace about sensor chip's placement relative to the
main hardware it is mounted on.
+
Main hardware placement is defined according to the local
reference frame related to the physical quantity the sensor
measures.
+
Given that the rotation matrix is defined in a board specific
way (platform data and / or device-tree), the main hardware
reference frame definition is left to the implementor's choice
(see below for a magnetometer example).
+
Applications should apply this rotation matrix to samples so
that when main hardware reference frame is aligned onto local
reference frame, then sensor chip reference frame is also
perfectly aligned with it.
+
Matrix is a 3x3 unitary matrix and typically looks like
[0, 1, 0; 1, 0, 0; 0, 0, -1]. Identity matrix
[1, 0, 0; 0, 1, 0; 0, 0, 1] means sensor chip and main hardware
@@ -1618,8 +1995,10 @@ Description:
For example, a mounting matrix for a magnetometer sensor informs
userspace about sensor chip's ORIENTATION relative to the main
hardware.
+
More specifically, main hardware orientation is defined with
respect to the LOCAL EARTH GEOMAGNETIC REFERENCE FRAME where :
+
* Y is in the ground plane and positive towards magnetic North ;
* X is in the ground plane, perpendicular to the North axis and
positive towards the East ;
@@ -1628,13 +2007,16 @@ Description:
An implementor might consider that for a hand-held device, a
'natural' orientation would be 'front facing camera at the top'.
The main hardware reference frame could then be described as :
+
* Y is in the plane of the screen and is positive towards the
top of the screen ;
* X is in the plane of the screen, perpendicular to Y axis, and
positive towards the right hand side of the screen ;
* Z is perpendicular to the screen plane and positive out of the
screen.
+
Another example for a quadrotor UAV might be :
+
* Y is in the plane of the propellers and positive towards the
front-view camera;
* X is in the plane of the propellers, perpendicular to Y axis,
@@ -1645,13 +2027,16 @@ What: /sys/bus/iio/devices/iio:deviceX/in_electricalconductivity_raw
KernelVersion: 4.8
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no offset etc.) electric conductivity reading that
- can be processed to siemens per meter.
+ Raw (unscaled no offset etc.) electric conductivity reading.
+ Units after application of scale and offset are siemens per
+ meter.
What: /sys/bus/iio/devices/iio:deviceX/in_countY_raw
KernelVersion: 4.10
Contact: linux-iio@vger.kernel.org
Description:
+ This interface is deprecated; please use the Counter subsystem.
+
Raw counter device counts from channel Y. For quadrature
counters, multiplication by an available [Y]_scale results in
the counts of a single quadrature signal phase from channel Y.
@@ -1660,6 +2045,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_indexY_raw
KernelVersion: 4.10
Contact: linux-iio@vger.kernel.org
Description:
+ This interface is deprecated; please use the Counter subsystem.
+
Raw counter device index value from channel Y. This attribute
provides an absolute positional reference (e.g. a pulse once per
revolution) which may be used to home positional systems as
@@ -1669,7 +2056,10 @@ What: /sys/bus/iio/devices/iio:deviceX/in_count_count_direction_available
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description:
+ This interface is deprecated; please use the Counter subsystem.
+
A list of possible counting directions which are:
+
- "up" : counter device is increasing.
- "down": counter device is decreasing.
@@ -1677,11 +2067,358 @@ What: /sys/bus/iio/devices/iio:deviceX/in_countY_count_direction
KernelVersion: 4.12
Contact: linux-iio@vger.kernel.org
Description:
+ This interface is deprecated; please use the Counter subsystem.
+
Raw counter device counters direction for channel Y.
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_label
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_label
+KernelVersion: 5.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Optional symbolic label to a device channel.
+ If a label is defined for this channel add that to the channel
+ specific attributes. This is useful for userspace to be able to
+ better identify an individual channel.
+
What: /sys/bus/iio/devices/iio:deviceX/in_phaseY_raw
KernelVersion: 4.18
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled) phase difference reading from channel Y
- that can be processed to radians. \ No newline at end of file
+ Raw (unscaled) phase difference reading from channel Y.
+ Units after application of scale and offset are radians.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm1_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm1_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm2p5_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm2p5_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm4_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm4_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm10_input
+What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm10_input
+KernelVersion: 4.22
+Contact: linux-iio@vger.kernel.org
+Description:
+ Mass concentration reading of particulate matter in ug / m3.
+ pmX consists of particles with aerodynamic diameter less or
+ equal to X micrometers.
+
+What: /sys/bus/iio/devices/iio:deviceX/events/in_illuminance_period_available
+Date: November 2019
+KernelVersion: 5.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of valid periods (in seconds) for which the light intensity
+ must be above the threshold level before interrupt is asserted.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_filter_notch_center_frequency
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ Center frequency in Hz for a notch filter. Used i.e. for line
+ noise suppression.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_thermocouple_type
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ One of the following thermocouple types: B, E, J, K, N, R, S, T.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibambient
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibambient
+KernelVersion: 5.10
+Contact: linux-iio@vger.kernel.org
+Description:
+ Calibrated ambient temperature for object temperature
+ calculation in milli degrees Celsius.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_z_raw
+KernelVersion: 5.10
+Contact: linux-iio@vger.kernel.org
+Description:
+ Unscaled light intensity according to CIE 1931/DIN 5033 color space.
+ Units after application of scale are nano nanowatts per square meter.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_anglY_label
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Optional symbolic label for channel Y.
+ For Intel hid hinge sensor, the label values are:
+ hinge, keyboard, screen. It means the three channels
+ each correspond respectively to hinge angle, keyboard angle,
+ and screen angle.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_hysteresis_relative
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_hysteresis_relative
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specify the percent for light sensor relative to the channel
+ absolute value that a data field should change before an event
+ is generated. Units are a percentage of the prior reading.
+
+What: /sys/bus/iio/devices/iio:deviceX/calibration_auto_enable
+Date: June 2020
+KernelVersion: 5.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Some sensors have the ability to apply auto calibration at
+ runtime. For example, it may be necessary to compensate for
+ contaminant build-up in a measurement chamber or optical
+ element deterioration that would otherwise lead to sensor drift.
+
+ Writing 1 or 0 to this attribute will respectively activate or
+ deactivate this auto calibration function.
+
+ Upon reading, the current status is returned.
+
+What: /sys/bus/iio/devices/iio:deviceX/calibration_forced_value
+Date: June 2020
+KernelVersion: 5.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Some sensors have the ability to apply a manual calibration using
+ a known measurement value, perhaps obtained from an external
+ reference device.
+
+ Writing a value to this function will force such a calibration
+ change. For the scd30 the value should be from the range
+ [400 1 2000].
+
+ Note for the scd30 that a valid value may only be obtained once
+ it is has been written. Until then any read back of this value
+ should be ignored. As for the scd4x an error will be returned
+ immediately if the manual calibration has failed.
+
+What: /sys/bus/iio/devices/iio:deviceX/calibration_forced_value_available
+KernelVersion: 5.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available range for the forced calibration value, expressed as:
+
+ - a range specified as "[min step max]"
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_sampling_frequency
+KernelVersion: 5.20
+Contact: linux-iio@vger.kernel.org
+Description:
+ Some devices have separate controls of sampling frequency for
+ individual channels. If multiple channels are enabled in a scan,
+ then the sampling_frequency of the scan may be computed from the
+ per channel sampling frequencies.
+
+What: /sys/.../events/in_accel_gesture_singletap_en
+What: /sys/.../events/in_accel_gesture_doubletap_en
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Device generates an event on a single or double tap.
+
+What: /sys/.../events/in_accel_gesture_singletap_value
+What: /sys/.../events/in_accel_gesture_doubletap_value
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the threshold value that the device is comparing
+ against to generate the tap gesture event. The lower
+ threshold value increases the sensitivity of tap detection.
+ Units and the exact meaning of value are device-specific.
+
+What: /sys/.../events/in_accel_gesture_tap_value_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available threshold values which can be used to
+ modify the sensitivity of the tap detection.
+
+What: /sys/.../events/in_accel_gesture_singletap_reset_timeout
+What: /sys/.../events/in_accel_gesture_doubletap_reset_timeout
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the timeout value in seconds for the tap detector
+ to not to look for another tap event after the event as
+ occurred. Basically the minimum quiet time between the two
+ single-tap's or two double-tap's.
+
+What: /sys/.../events/in_accel_gesture_tap_reset_timeout_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available tap reset timeout values. Units in seconds.
+
+What: /sys/.../events/in_accel_gesture_doubletap_tap2_min_delay
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the minimum quiet time in seconds between the two
+ taps of a double tap.
+
+What: /sys/.../events/in_accel_gesture_doubletap_tap2_min_delay_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available delay values between two taps in the double
+ tap. Units in seconds.
+
+What: /sys/.../events/in_accel_gesture_tap_maxtomin_time
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the maximum time difference allowed between upper
+ and lower peak of tap to consider it as the valid tap event.
+ Units in seconds.
+
+What: /sys/.../events/in_accel_gesture_tap_maxtomin_time_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available time values between upper peak to lower
+ peak. Units in seconds.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_yaw_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_pitch_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_roll_raw
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled) euler angles readings. Units after
+ application of scale are deg.
+
+What: /sys/bus/iio/devices/iio:deviceX/serialnumber
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ An example format is 16-bytes, 2-digits-per-byte, HEX-string
+ representing the sensor unique ID number.
+
+What: /sys/bus/iio/devices/iio:deviceX/filter_type_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_type_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns a list with the possible filter modes. Options
+ for the attribute:
+
+ * "none" - Filter is disabled/bypassed.
+ * "sinc1" - The digital sinc1 filter. Fast 1st
+ conversion time. Poor noise performance.
+ * "sinc3" - The digital sinc3 filter. Moderate 1st
+ conversion time. Good noise performance.
+ * "sinc3+pf1" - Sinc3 + device specific Post Filter 1.
+ * "sinc3+pf2" - Sinc3 + device specific Post Filter 2.
+ * "sinc3+pf3" - Sinc3 + device specific Post Filter 3.
+ * "sinc3+pf4" - Sinc3 + device specific Post Filter 4.
+ * "sinc3+rej60" - Sinc3 + 60Hz rejection.
+ * "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion
+ time.
+ * "sinc4" - Sinc 4. Excellent noise performance. Long
+ 1st conversion time.
+ * "sinc4+lp" - Sinc4 + Low Pass Filter.
+ * "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion
+ time.
+ * "sinc4+rej60" - Sinc4 + 60Hz rejection.
+ * "sinc5" - The digital sinc5 filter. Excellent noise
+ performance
+ * "sinc5+avg" - Sinc5 + averaging by 4.
+ * "sinc5+pf1" - Sinc5 + device specific Post Filter 1.
+ * "sinc5+sinc1" - Sinc5 + Sinc1.
+ * "sinc5+sinc1+pf1" - Sinc5 + Sinc1 + device specific Post Filter 1.
+ * "sinc5+sinc1+pf2" - Sinc5 + Sinc1 + device specific Post Filter 2.
+ * "sinc5+sinc1+pf3" - Sinc5 + Sinc1 + device specific Post Filter 3.
+ * "sinc5+sinc1+pf4" - Sinc5 + Sinc1 + device specific Post Filter 4.
+ * "wideband" - filter with wideband low ripple passband
+ and sharp transition band.
+
+What: /sys/bus/iio/devices/iio:deviceX/filter_type
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_type
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies which filter type apply to the channel. The possible
+ values are given by the filter_type_available attribute.
+
+What: /sys/.../events/in_proximity_thresh_either_runningperiod
+KernelVersion: 6.6
+Contact: linux-iio@vger.kernel.org
+Description:
+ A running period of time (in seconds) for which
+ in_proximity_thresh_either_runningcount amount of conditions
+ must occur before an event is generated. If direction is not
+ specified then this period applies to both directions.
+
+What: /sys/.../events/in_proximity_thresh_either_runningcount
+KernelVersion: 6.6
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of conditions that must occur, during a running
+ period, before an event is generated.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_colortemp_raw
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Represents light color temperature, which measures light color
+ temperature in Kelvin.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_chromaticity_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_chromaticity_y_raw
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ The x and y light color coordinate on the CIE 1931 chromaticity
+ diagram.
+
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_mag_either_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_mag_rising_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_thresh_falling_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_thresh_rising_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_anglvelY_mag_rising_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_anglY_thresh_rising_label
+What: /sys/bus/iio/devices/iio:deviceX/events/in_phaseY_mag_rising_label
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Optional symbolic label to a device channel event.
+ If a label is defined for this event add that to the event
+ specific attributes. This is useful for userspace to be able to
+ better identify an individual event.
+
+What: /sys/.../events/in_accel_gesture_tap_wait_timeout
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Enable tap gesture confirmation with timeout.
+
+What: /sys/.../events/in_accel_gesture_tap_wait_dur
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Timeout value in seconds for tap gesture confirmation.
+
+What: /sys/.../events/in_accel_gesture_tap_wait_dur_available
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of available timeout value for tap gesture confirmation.
+
+What: /sys/.../iio:deviceX/in_shunt_resistor
+What: /sys/.../iio:deviceX/in_current_shunt_resistor
+What: /sys/.../iio:deviceX/in_power_shunt_resistor
+KernelVersion: 6.10
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value of current sense resistor in Ohms.
+
+What: /sys/.../iio:deviceX/in_attention_input
+KernelVersion: 6.13
+Contact: linux-iio@vger.kernel.org
+Description:
+ Value representing the user's attention to the system expressed
+ in units as percentage. This usually means if the user is
+ looking at the screen or not.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-accel-adxl372 b/Documentation/ABI/testing/sysfs-bus-iio-accel-adxl372
new file mode 100644
index 000000000000..47e34f865ca1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-accel-adxl372
@@ -0,0 +1,7 @@
+What: /sys/bus/iio/devices/triggerX/name = "adxl372-devX-peak"
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ The adxl372 accelerometer kernel module provides an additional trigger,
+ which sets the device in a mode in which it will record only the peak acceleration
+ sensed over the set period of time in the events sysfs.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-ad9739a b/Documentation/ABI/testing/sysfs-bus-iio-ad9739a
new file mode 100644
index 000000000000..ed59299e6f8d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-ad9739a
@@ -0,0 +1,19 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_operating_mode
+KernelVersion: 6.9
+Contact: linux-iio@vger.kernel.org
+Description:
+ DAC operating mode. One of the following modes can be selected:
+
+ * normal: This is DAC normal mode.
+ * mixed-mode: In this mode the output is effectively chopped at
+ the DAC sample rate. This has the effect of
+ reducing the power of the fundamental signal while
+ increasing the power of the images centered around
+ the DAC sample rate, thus improving the output
+ power of these images.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_operating_mode_available
+KernelVersion: 6.9
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available operating modes.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
new file mode 100644
index 000000000000..a5a8a579f4f3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
@@ -0,0 +1,23 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, initiates the system calibration procedure. This is done on a
+ single channel at a time. Write '1' to start the calibration.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode_available
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, returns a list with the possible calibration modes.
+ There are two available options:
+ "zero_scale" - calibrate to zero scale
+ "full_scale" - calibrate to full scale
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, sets up the calibration mode used in the system calibration
+ procedure. Reading returns the current calibration mode.
+ Writing sets the system calibration mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
new file mode 100644
index 000000000000..d3fad27421d6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
@@ -0,0 +1,20 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available
+KernelVersion: 6.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns a list with the possible filter modes.
+
+ This ABI is only kept for backwards compatibility and the values
+ returned are identical to filter_type_available attribute
+ documented in Documentation/ABI/testing/sysfs-bus-iio. Please,
+ use filter_type_available like ABI to provide filter options for
+ new drivers.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode
+KernelVersion: 6.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ This ABI is only kept for backwards compatibility and the values
+ returned are identical to in_voltageY-voltageZ_filter_type
+ attribute documented in Documentation/ABI/testing/sysfs-bus-iio.
+ Please, use in_voltageY-voltageZ_filter_type for new drivers.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
new file mode 100644
index 000000000000..28be1cabf112
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
@@ -0,0 +1,27 @@
+What: /sys/bus/iio/devices/iio:deviceX/ac_excitation_en
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, is used to enable the AC
+ excitation mode found on some converters. In ac excitation mode,
+ the polarity of the excitation voltage is reversed on
+ alternate cycles, to eliminate DC errors.
+
+What: /sys/bus/iio/devices/iio:deviceX/bridge_switch_en
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, is used to close or open the
+ bridge power down switch found on some converters.
+ In bridge applications, such as strain gauges and load cells,
+ the bridge itself consumes the majority of the current in the
+ system. To minimize the current consumption of the system,
+ the bridge can be disconnected (when it is not being used
+ using the bridge_switch_en attribute.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage2-voltage2_shorted_raw
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Measure voltage from AIN2 pin connected to AIN(+)
+ and AIN(-) shorted.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
new file mode 100644
index 000000000000..83b7efe6aa07
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
@@ -0,0 +1,13 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_en
+KernelVersion: 5.14
+Contact: linux-iio@vger.kernel.org
+Description:
+ Used to enable an output for balancing cells for time
+ controlled via in_voltage_Y-voltageZ_balance_switch_timer.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_timer
+KernelVersion: 5.14
+Contact: linux-iio@vger.kernel.org
+Description:
+ Time in seconds for which balance switch will be turned on.
+ Multiple of 71.5 seconds.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector b/Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector
index 2071f9bcfaa5..1c2a07f7a75e 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector
@@ -5,7 +5,8 @@ Contact: Peter Rosin <peda@axentia.se>
Description:
The DAC is used to find the peak level of an alternating
voltage input signal by a binary search using the output
- of a comparator wired to an interrupt pin. Like so:
+ of a comparator wired to an interrupt pin. Like so::
+
_
| \
input +------>-------|+ \
@@ -19,10 +20,12 @@ Description:
| irq|------<-------'
| |
'-------'
+
The boolean invert attribute (0/1) should be set when the
input signal is centered around the maximum value of the
dac instead of zero. The envelope detector will search
from below in this case and will also invert the result.
+
The edge/level of the interrupt is also switched to its
opposite value.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-hi8435 b/Documentation/ABI/testing/sysfs-bus-iio-adc-hi8435
index f30b4c424fb6..4b01150af397 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-hi8435
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-hi8435
@@ -19,9 +19,11 @@ Description:
is separately set for "GND-Open" and "Supply-Open" modes.
Channels 0..31 have common low threshold values, but could have different
sensing_modes.
+
The low voltage threshold range is between 2..21V.
Hysteresis between low and high thresholds can not be lower then 2 and
can not be odd.
+
If falling threshold results hysteresis to odd value then rising
threshold is automatically subtracted by one.
@@ -34,10 +36,13 @@ Description:
this value then the threshold rising event is pushed.
Depending on in_voltageY_sensing_mode the high voltage threshold
is separately set for "GND-Open" and "Supply-Open" modes.
+
Channels 0..31 have common high threshold values, but could have different
sensing_modes.
+
The high voltage threshold range is between 3..22V.
Hysteresis between low and high thresholds can not be lower then 2 and
can not be odd.
+
If rising threshold results hysteresis to odd value then falling
threshold is automatically appended by one.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410
new file mode 100644
index 000000000000..2a53c6b37360
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410
@@ -0,0 +1,13 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_en
+Date: September 2022
+KernelVersion: 6.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Enable or disable a notch filter.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_center
+Date: September 2022
+KernelVersion: 6.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Center frequency of the notch filter in Hz.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611
deleted file mode 100644
index 6d2d2b094941..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611
+++ /dev/null
@@ -1,17 +0,0 @@
-What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
-Date: March 2017
-KernelVersion: 4.12
-Contact: linux-iio@vger.kernel.org
-Description: The value of the shunt resistor used to compute power drain on
- common input voltage pin (RS+). In Ohms.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
-Date: March 2017
-KernelVersion: 4.12
-Contact: linux-iio@vger.kernel.org
-Description: The value of the shunt resistor used to compute current flowing
- between RS+ and RS- voltage sense inputs. In Ohms.
-
-These attributes describe a single physical component, exposed as two distinct
-attributes as it is used to calculate two different values: power load and
-current flowing between RS+ and RS- inputs.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564 b/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564
new file mode 100644
index 000000000000..b168aa44b233
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564
@@ -0,0 +1,53 @@
+What: /sys/bus/iio/devices/iio:deviceX/boost_current_gain
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to set the gain of the biasing current
+ circuit of the Delta-Sigma modulator. The different BOOST
+ settings are applied to the entire modulator circuit, including
+ the voltage reference buffers.
+
+What: /sys/bus/iio/devices/iio:deviceX/boost_current_gain_available
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns a list with the possible gain values for
+ the current biasing circuit of the Delta-Sigma modulator.
+
+What: /sys/bus/iio/devices/iio:deviceX/auto_zeroing_mux_enable
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to enable the analog input multiplexer
+ auto-zeroing algorithm (the input multiplexer and the ADC
+ include an offset cancellation algorithm that cancels the offset
+ contribution of the ADC). When the offset cancellation algorithm
+ is enabled, ADC takes two conversions, one with the differential
+ input as VIN+/VIN-, one with VIN+/VIN- inverted. In this case the
+ conversion time is multiplied by two compared to the default
+ case where the algorithm is disabled. This technique allows the
+ cancellation of the ADC offset error and the achievement of
+ ultra-low offset without any digital calibration. The resulting
+ offset is the residue of the difference between the two
+ conversions, which is on the order of magnitude of the noise
+ floor. This offset is effectively canceled at every conversion,
+ so the residual offset error temperature drift is extremely low.
+ Write '1' to enable it, write '0' to disable it.
+
+What: /sys/bus/iio/devices/iio:deviceX/auto_zeroing_ref_enable
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to enable the chopping algorithm for the
+ internal voltage reference buffer. This setting has no effect
+ when external voltage reference is selected.
+ Internal voltage reference buffer injects a certain quantity of
+ 1/f noise into the system that can be modulated with the
+ incoming input signals and can limit the SNR performance at
+ higher Oversampling Ratio values (over 256). To overcome this
+ limitation, the buffer includes an auto-zeroing algorithm that
+ greatly reduces (cancels out) the 1/f noise and cancels the
+ offset value of the reference buffer. As a result, the SNR of
+ the system is not affected by this 1/f noise component of the
+ reference buffer, even at maximum oversampling ratio values.
+ Write '1' to enable it, write '0' to disable it.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-mt6360 b/Documentation/ABI/testing/sysfs-bus-iio-adc-mt6360
new file mode 100644
index 000000000000..e5a7b1c7cca3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-mt6360
@@ -0,0 +1,78 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage0_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 USBID ADC which connected to connector ID pin.
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage1_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 VBUS ADC with lower accuracy(+-75mA)
+ higher measure range(1~22mV)
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage2_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 VBUS ADC with higher accuracy(+-30mA)
+ lower measure range(1~9.76V)
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage3_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 VSYS ADC
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage4_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 VBAT ADC
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_current5_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 IBUS ADC
+ Calculating with scale and offset returns voltage in uA
+
+What: /sys/bus/iio/devices/iio:deviceX/in_current6_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 IBAT ADC
+ Calculating with scale and offset returns voltage in uA
+
+What: /sys/bus/iio/devices/iio:deviceX/in_current7_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 CHG_VDDP ADC
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_temp8_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 IC junction temperature
+ Calculating with scale and offset returns temperature in degree
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage9_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 VREF_TS ADC
+ Calculating with scale and offset returns voltage in uV
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage10_raw
+KernelVersion: 5.8.0
+Contact: gene_chen@richtek.com
+Description:
+ Indicated MT6360 TS ADC
+ Calculating with scale and offset returns voltage in uV
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934 b/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934
new file mode 100644
index 000000000000..625b7f867847
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934
@@ -0,0 +1,9 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistorY
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value of the shunt resistor may be known only at runtime
+ and set by a client application. This attribute allows to
+ set its value in micro-ohms. X is the IIO index of the device.
+ Y is the channel number. The value is used to calculate
+ current, power and accumulated energy.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-adc-stm32
index efe4c85e3c8b..1975c7a1af34 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-stm32
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-stm32
@@ -5,10 +5,13 @@ Description:
The STM32 ADC can be configured to use external trigger sources
(e.g. timers, pwm or exti gpio). Then, it can be tuned to start
conversions on external trigger by either:
+
- "rising-edge"
- "falling-edge"
- "both-edges".
+
Reading returns current trigger polarity.
+
Writing value before enabling conversions sets trigger polarity.
What: /sys/bus/iio/devices/triggerX/trigger_polarity_available
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-bno055 b/Documentation/ABI/testing/sysfs-bus-iio-bno055
new file mode 100644
index 000000000000..f32b1644e986
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-bno055
@@ -0,0 +1,81 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_raw_range
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled) range for acceleration readings. Unit after
+ application of scale is m/s^2. Note that this doesn't affects
+ the scale (which should be used when changing the maximum and
+ minimum readable value affects also the reading scaling factor).
+
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_raw_range
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Range for angular velocity readings in radians per second. Note
+ that this does not affects the scale (which should be used when
+ changing the maximum and minimum readable value affects also the
+ reading scaling factor).
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_raw_range_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of allowed values for in_accel_raw_range attribute
+
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_raw_range_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of allowed values for in_anglvel_raw_range attribute
+
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_calibration_fast_enable
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Can be 1 or 0. Enables/disables the "Fast Magnetometer
+ Calibration" HW function.
+
+What: /sys/bus/iio/devices/iio:deviceX/fusion_enable
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Can be 1 or 0. Enables/disables the "sensor fusion" (a.k.a.
+ NDOF) HW function.
+
+What: /sys/bus/iio/devices/iio:deviceX/calibration_data
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reports the binary calibration data blob for the IMU sensors.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibration_auto_status
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reports the autocalibration status for the accelerometer sensor.
+ Can be 0 (calibration non even enabled) or 1 to 5 where the greater
+ the number, the better the calibration status.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_gyro_calibration_auto_status
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reports the autocalibration status for the gyroscope sensor.
+ Can be 0 (calibration non even enabled) or 1 to 5 where the greater
+ the number, the better the calibration status.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_calibration_auto_status
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reports the autocalibration status for the magnetometer sensor.
+ Can be 0 (calibration non even enabled) or 1 to 5 where the greater
+ the number, the better the calibration status.
+
+What: /sys/bus/iio/devices/iio:deviceX/sys_calibration_auto_status
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reports the status for the IMU overall autocalibration.
+ Can be 0 (calibration non even enabled) or 1 to 5 where the greater
+ the number, the better the calibration status.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cdc-ad7746 b/Documentation/ABI/testing/sysfs-bus-iio-cdc-ad7746
new file mode 100644
index 000000000000..02ca8941dce1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-cdc-ad7746
@@ -0,0 +1,11 @@
+What: /sys/.../iio:deviceX/in_capacitableY_calibbias_calibration
+What: /sys/.../iio:deviceX/in_capacitableY_calibscale_calibration
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Write 1 to trigger a calibration of the calibbias or
+ calibscale. For calibscale, a full scale capacitance should
+ be connected to the capacitance input and a
+ calibscale_calibration then started. For calibbias see
+ the device datasheet section on "capacitive system offset
+ calibration".
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40 b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
new file mode 100644
index 000000000000..a95547e874f1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
@@ -0,0 +1,17 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_temp_raw
+Date: August 2021
+KernelVersion: 5.15
+Contact: Andreas Klinger <ak@it-klinger.de>
+Description:
+ Set the temperature. This value is sent to the sensor for
+ temperature compensation.
+ Default value: 25000 (25 °C)
+
+What: /sys/bus/iio/devices/iio:deviceX/out_humidityrelative_raw
+Date: August 2021
+KernelVersion: 5.15
+Contact: Andreas Klinger <ak@it-klinger.de>
+Description:
+ Set the relative humidity. This value is sent to the sensor for
+ humidity compensation.
+ Default value: 50000 (50 % relative humidity)
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-chemical-sunrise-co2 b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sunrise-co2
new file mode 100644
index 000000000000..ee7aeb11709b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sunrise-co2
@@ -0,0 +1,38 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_calibration_factory
+Date: August 2021
+KernelVersion: 5.16
+Contact: Jacopo Mondi <jacopo@jmondi.org>
+Description:
+ Writing '1' triggers a 'Factory' calibration cycle.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_calibration_background
+Date: August 2021
+KernelVersion: 5.16
+Contact: Jacopo Mondi <jacopo@jmondi.org>
+Description:
+ Writing '1' triggers a 'Background' calibration cycle.
+
+What: /sys/bus/iio/devices/iio:deviceX/error_status_available
+Date: August 2021
+KernelVersion: 5.16
+Contact: Jacopo Mondi <jacopo@jmondi.org>
+Description:
+ Reading returns the list of possible chip error status.
+ Available options are:
+ - 'error_fatal': Analog front-end initialization error
+ - 'error_i2c': Read/write to non-existing register
+ - 'error_algorithm': Corrupted parameters
+ - 'error_calibration': Calibration has failed
+ - 'error_self_diagnostic': Internal interface failure
+ - 'error_out_of_range': Measured concentration out of scale
+ - 'error_memory': Error during memory operations
+ - 'error_no_measurement': Cleared at first measurement
+ - 'error_low_voltage': Sensor regulated voltage too low
+ - 'error_measurement_timeout': Unable to complete measurement
+
+What: /sys/bus/iio/devices/iio:deviceX/error_status
+Date: August 2021
+KernelVersion: 5.16
+Contact: Jacopo Mondi <jacopo@jmondi.org>
+Description:
+ Reading returns the current chip error status.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8 b/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8
deleted file mode 100644
index 7fac2c268d9a..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8
+++ /dev/null
@@ -1,117 +0,0 @@
-What: /sys/bus/iio/devices/iio:deviceX/in_count_count_mode_available
-What: /sys/bus/iio/devices/iio:deviceX/in_count_noise_error_available
-What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
-What: /sys/bus/iio/devices/iio:deviceX/in_index_index_polarity_available
-What: /sys/bus/iio/devices/iio:deviceX/in_index_synchronous_mode_available
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Discrete set of available values for the respective counter
- configuration are listed in this file.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_countY_count_mode
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Count mode for channel Y. Four count modes are available:
- normal, range limit, non-recycle, and modulo-n. The preset value
- for channel Y is used by the count mode where required.
-
- Normal:
- Counting is continuous in either direction.
-
- Range Limit:
- An upper or lower limit is set, mimicking limit switches
- in the mechanical counterpart. The upper limit is set to
- the preset value, while the lower limit is set to 0. The
- counter freezes at count = preset when counting up, and
- at count = 0 when counting down. At either of these
- limits, the counting is resumed only when the count
- direction is reversed.
-
- Non-recycle:
- Counter is disabled whenever a 24-bit count overflow or
- underflow takes place. The counter is re-enabled when a
- new count value is loaded to the counter via a preset
- operation or write to raw.
-
- Modulo-N:
- A count boundary is set between 0 and the preset value.
- The counter is reset to 0 at count = preset when
- counting up, while the counter is set to the preset
- value at count = 0 when counting down; the counter does
- not freeze at the bundary points, but counts
- continuously throughout.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_countY_noise_error
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Read-only attribute that indicates whether excessive noise is
- present at the channel Y count inputs in quadrature clock mode;
- irrelevant in non-quadrature clock mode.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_countY_preset
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- If the counter device supports preset registers, the preset
- count for channel Y is provided by this attribute.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_countY_quadrature_mode
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Configure channel Y counter for non-quadrature or quadrature
- clock mode. Selecting non-quadrature clock mode will disable
- synchronous load mode. In quadrature clock mode, the channel Y
- scale attribute selects the encoder phase division (scale of 1
- selects full-cycle, scale of 0.5 selects half-cycle, scale of
- 0.25 selects quarter-cycle) processed by the channel Y counter.
-
- Non-quadrature:
- The filter and decoder circuit are bypassed. Encoder A
- input serves as the count input and B as the UP/DOWN
- direction control input, with B = 1 selecting UP Count
- mode and B = 0 selecting Down Count mode.
-
- Quadrature:
- Encoder A and B inputs are digitally filtered and
- decoded for UP/DN clock.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_countY_set_to_preset_on_index
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Whether to set channel Y counter with channel Y preset value
- when channel Y index input is active, or continuously count.
- Valid attribute values are boolean.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_indexY_index_polarity
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Active level of channel Y index input; irrelevant in
- non-synchronous load mode.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_indexY_synchronous_mode
-KernelVersion: 4.10
-Contact: linux-iio@vger.kernel.org
-Description:
- Configure channel Y counter for non-synchronous or synchronous
- load mode. Synchronous load mode cannot be selected in
- non-quadrature clock mode.
-
- Non-synchronous:
- A logic low level is the active level at this index
- input. The index function (as enabled via
- set_to_preset_on_index) is performed directly on the
- active level of the index input.
-
- Synchronous:
- Intended for interfacing with encoder Index output in
- quadrature clock mode. The active level is configured
- via index_polarity. The index function (as enabled via
- set_to_preset_on_index) is performed synchronously with
- the quadrature clock on the active level of the index
- input.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
index 0e95c2ca105c..9e3926243797 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
+++ b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
@@ -4,25 +4,15 @@ KernelVersion: 4.7
Contact: linux-iio@vger.kernel.org
Description:
Writing '1' will perform a FOC (Fast Online Calibration). The
- corresponding calibration offsets can be read from *_calibbias
+ corresponding calibration offsets can be read from `*_calibbias`
entries.
-What: /sys/bus/iio/devices/iio:deviceX/location
-Date: July 2015
-KernelVersion: 4.7
-Contact: linux-iio@vger.kernel.org
-Description:
- This attribute returns a string with the physical location where
- the motion sensor is placed. For example, in a laptop a motion
- sensor can be located on the base or on the lid. Current valid
- values are 'base' and 'lid'.
-
What: /sys/bus/iio/devices/iio:deviceX/id
-Date: Septembre 2017
+Date: September 2017
KernelVersion: 4.14
Contact: linux-iio@vger.kernel.org
Description:
- This attribute is exposed by the CrOS EC legacy accelerometer
- driver and represents the sensor ID as exposed by the EC. This
- ID is used by the Android sensor service hardware abstraction
- layer (sensor HAL) through the Android container on ChromeOS.
+ This attribute is exposed by the CrOS EC sensors driver and
+ represents the sensor ID as exposed by the EC. This ID is used
+ by the Android sensor service hardware abstraction layer (sensor
+ HAL) through the Android container on ChromeOS.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dac b/Documentation/ABI/testing/sysfs-bus-iio-dac
new file mode 100644
index 000000000000..810eaac5533c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dac
@@ -0,0 +1,61 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Toggle enable. Write 1 to enable toggle or 0 to disable it. This
+ is useful when one wants to change the DAC output codes. For
+ autonomous toggling, the way it should be done is:
+
+ - disable toggle operation;
+ - change out_currentY_rawN, where N is the integer value of the symbol;
+ - enable toggle operation.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute has the same meaning as out_currentY_raw. It is
+ specific to toggle enabled channels and refers to the DAC output
+ code in INPUT_N (_rawN), where N is the integer value of the symbol.
+ The same scale and offset as in out_currentY_raw applies.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Performs a SW switch to a predefined output symbol. This attribute
+ is specific to toggle enabled channels and allows switching between
+ multiple predefined symbols. Each symbol corresponds to a different
+ output, denoted as out_currentY_rawN, where N is the integer value
+ of the symbol. Writing an integer value N will select out_currentY_rawN.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Toggle enable. Write 1 to enable toggle or 0 to disable it. This
+ is useful when one wants to change the DAC output codes. For
+ autonomous toggling, the way it should be done is:
+
+ - disable toggle operation;
+ - change out_voltageY_rawN, where N is the integer value of the symbol;
+ - enable toggle operation.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute has the same meaning as out_currentY_raw. It is
+ specific to toggle enabled channels and refers to the DAC output
+ code in INPUT_N (_rawN), where N is the integer value of the symbol.
+ The same scale and offset as in out_currentY_raw applies.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Performs a SW switch to a predefined output symbol. This attribute
+ is specific to toggle enabled channels and allows switching between
+ multiple predefined symbols. Each symbol corresponds to a different
+ output, denoted as out_voltageY_rawN, where N is the integer value
+ of the symbol. Writing an integer value N will select out_voltageY_rawN.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dac-ad5766 b/Documentation/ABI/testing/sysfs-bus-iio-dac-ad5766
new file mode 100644
index 000000000000..7fbcba15bf1e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dac-ad5766
@@ -0,0 +1,31 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_dither_enable
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Dither enable. Write 1 to enable dither or 0 to disable it.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_dither_invert
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Inverts the dither applied to the selected DAC channel. Dither is not
+ inverted by default. Write "1" to invert dither.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_dither_scale_available
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Returns possible scalings available for the current channel.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_dither_scale
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scales the dither before it is applied to the selected channel.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_dither_source
+KernelVersion: 5.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ Selects dither source applied to the selected channel. Write "0" to
+ select N0 source, write "1" to select N1 source.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688 b/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
new file mode 100644
index 000000000000..ae95a5477382
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
@@ -0,0 +1,55 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Dither enable. Write 1 to enable dither or 0 to disable it. This is useful
+ for changing the dither parameters. They way it should be done is:
+
+ - disable dither operation;
+ - change dither parameters (eg: frequency, phase...);
+ - enabled dither operation
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ This raw, unscaled value refers to the dither signal amplitude.
+ The same scale as in out_voltageY_raw applies. However, the
+ offset might be different as it's always 0 for this attribute.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw_available
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available range for dither raw amplitude values.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_offset
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Offset applied to out_voltageY_dither_raw. Read only attribute
+ always set to 0.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Sets the dither signal frequency. Units are in Hz.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency_available
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Returns the available values for the dither frequency.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Sets the dither signal phase. Units are in Radians.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase_available
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Returns the available values for the dither phase.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32
index da9822309f07..91439d6d60b5 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32
@@ -3,14 +3,20 @@ KernelVersion: 4.14
Contact: arnaud.pouliquen@st.com
Description:
For audio purpose only.
+
Used by audio driver to set/get the spi input frequency.
+
This is mandatory if DFSDM is slave on SPI bus, to
provide information on the SPI clock frequency during runtime
Notice that the SPI frequency should be a multiple of sample
frequency to ensure the precision.
- if DFSDM input is SPI master
+
+ if DFSDM input is SPI master:
+
Reading SPI clkout frequency,
error on writing
+
If DFSDM input is SPI Slave:
+
Reading returns value previously set.
- Writing value before starting conversions. \ No newline at end of file
+ Writing value before starting conversions.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08 b/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
index 0a1ca1487fa9..9dae94aa880b 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
+++ b/Documentation/ABI/testing/sysfs-bus-iio-distance-srf08
@@ -1,12 +1,4 @@
-What /sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
-Date: January 2017
-KernelVersion: 4.11
-Contact: linux-iio@vger.kernel.org
-Description:
- Show or set the gain boost of the amp, from 0-31 range.
- default 31
-
-What /sys/bus/iio/devices/iio:deviceX/sensor_max_range
+What: /sys/bus/iio/devices/iio:deviceX/sensor_max_range
Date: January 2017
KernelVersion: 4.11
Contact: linux-iio@vger.kernel.org
@@ -15,8 +7,11 @@ Description:
first object echoed in meters. Default value is 6.020.
This setting limits the time the driver is waiting for a
echo.
+
Showing the range of available values is represented as the
minimum value, the step and the maximum value, all enclosed
in square brackets.
- Example:
- [0.043 0.043 11.008]
+
+ Example::
+
+ [0.043 0.043 11.008]
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer b/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer
new file mode 100644
index 000000000000..d526e6571001
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dma-buffer
@@ -0,0 +1,19 @@
+What: /sys/bus/iio/devices/iio:deviceX/buffer/length_align_bytes
+KernelVersion: 5.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ DMA buffers tend to have a alignment requirement for the
+ buffers. If this alignment requirement is not met samples might
+ be dropped from the buffer.
+
+ This property reports the alignment requirements in bytes.
+ This means that the buffer size in bytes needs to be a integer
+ multiple of the number reported by this file.
+
+ The alignment requirements in number of sample sets will depend
+ on the enabled channels and the bytes per channel. This means
+ that the alignment requirement in samples sets might change
+ depending on which and how many channels are enabled. Whereas
+ the alignment requirement reported in bytes by this property
+ will remain static and does not depend on which channels are
+ enabled.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
new file mode 100644
index 000000000000..c431f0a13cf5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
@@ -0,0 +1,18 @@
+What: /sys/bus/iio/devices/iio:deviceX/filter_mode_available
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading this returns the valid values that can be written to the
+ filter_mode attribute:
+
+ - auto -> Adjust bandpass filter to track changes in input clock rate.
+ - manual -> disable/unregister the clock rate notifier / input clock tracking.
+ - bypass -> bypass low pass filter, high pass filter and disable/unregister
+ the clock rate notifier
+
+What: /sys/bus/iio/devices/iio:deviceX/filter_mode
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute configures the filter mode.
+ Reading returns the actual mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
index a91aeabe7b24..d065cda7dd96 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
@@ -8,7 +8,9 @@ KernelVersion: 3.4.0
Contact: linux-iio@vger.kernel.org
Description:
Reading returns either '1' or '0'.
+
'1' means that the clock in question is present.
+
'0' means that the clock is missing.
What: /sys/bus/iio/devices/iio:deviceX/pllY_locked
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371
new file mode 100644
index 000000000000..7fe6935d1448
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4371
@@ -0,0 +1,11 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_name
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns the datasheet name for channel Y::
+
+ out_altvoltage0_name: RF8x
+ out_altvoltage1_name: RFAUX8x
+ out_altvoltage2_name: RF16x
+ out_altvoltage3_name: RF32x
+
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
new file mode 100644
index 000000000000..9cf8cd0dd2df
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
@@ -0,0 +1,38 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_i_calibphase
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write unscaled value for the Local Oscillatior path quadrature I phase shift.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_q_calibphase
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write unscaled value for the Local Oscillatior path quadrature Q phase shift.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the Local Oscillatior Feedthrough Offset Calibration I Positive
+ side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the Local Oscillatior Feedthrough Offset Calibration Q Positive side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage1_i_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write raw value for the Local Oscillatior Feedthrough Offset Calibration I Negative
+ side.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage1_q_calibbias
+KernelVersion:
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write raw value for the Local Oscillatior Feedthrough Offset Calibration Q Negative
+ side.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
new file mode 100644
index 000000000000..395010a0ef8b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
@@ -0,0 +1,23 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_coarse
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the digital attenuator gain (IF_I) with coarse steps.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_coarse
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the digital attenuator gain (IF_Q) with coarse steps.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_fine
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the digital attenuator gain (IF_I) with fine steps.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_fine
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read/write value for the digital attenuator gain (IF_Q) with fine steps.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x b/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
index 6adba9058b22..a8e04b41d9ff 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
+++ b/Documentation/ABI/testing/sysfs-bus-iio-health-afe440x
@@ -6,14 +6,20 @@ Description:
Get measured values from the ADC for these stages. Y is the
specific stage number corresponding to datasheet stage names
as follows:
- 1 -> LED2
- 2 -> ALED2/LED3
- 3 -> LED1
- 4 -> ALED1/LED4
+
+ == ==========
+ 1 LED2
+ 2 ALED2/LED3
+ 3 LED1
+ 4 ALED1/LED4
+ == ==========
+
Note that channels 5 and 6 represent LED2-ALED2 and LED1-ALED1
respectively which simply helper channels containing the
calculated difference in the value of stage 1 - 2 and 3 - 4.
The values are expressed in 24-bit twos complement.
+ The LED current for the stage is controlled via
+ out_currentY_raw.
What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_offset
Date: May 2016
@@ -31,11 +37,3 @@ Contact: Andrew F. Davis <afd@ti.com>
Description:
Get and set the resistance and the capacitance settings for the
Transimpedance Amplifier during the associated stage.
-
-What: /sys/bus/iio/devices/iio:deviceX/out_currentY_raw
-Date: May 2016
-KernelVersion:
-Contact: Andrew F. Davis <afd@ti.com>
-Description:
- Get and set the LED current for the specified LED active during
- this stage. Y is the specific stage number.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-humidity b/Documentation/ABI/testing/sysfs-bus-iio-humidity
new file mode 100644
index 000000000000..cb0d7e75d297
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-humidity
@@ -0,0 +1,10 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw
+What: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw_available
+KernelVersion: 5.3.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Controls the heater device within the humidity sensor to get
+ rid of excess condensation.
+
+ In some devices, this is just a switch in which case 0 = OFF,
+ and 1 = ON.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x b/Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x
deleted file mode 100644
index b72bb62552cf..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-iio-humidity-hdc100x
+++ /dev/null
@@ -1,9 +0,0 @@
-What: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw
-What: /sys/bus/iio/devices/iio:deviceX/out_current_heater_raw_available
-KernelVersion: 4.3
-Contact: linux-iio@vger.kernel.org
-Description:
- Controls the heater device within the humidity sensor to get
- rid of excess condensation.
-
- Valid control values are 0 = OFF, and 1 = ON.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-impedance-analyzer-ad5933 b/Documentation/ABI/testing/sysfs-bus-iio-impedance-analyzer-ad5933
new file mode 100644
index 000000000000..0e86747c67f8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-impedance-analyzer-ad5933
@@ -0,0 +1,35 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency_start
+Date: March 2019
+KernelVersion: 3.1.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Frequency sweep start frequency in Hz.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency_increment
+Date: March 2019
+KernelVersion: 3.1.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Frequency increment in Hz (step size) between consecutive
+ frequency points along the sweep.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency_points
+Date: March 2019
+KernelVersion: 3.1.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of frequency points (steps) in the frequency sweep.
+ This value, in conjunction with the
+ out_altvoltageY_frequency_start and the
+ out_altvoltageY_frequency_increment, determines the frequency
+ sweep range for the sweep operation.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_settling_cycles
+Date: March 2019
+KernelVersion: 3.1.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of output excitation cycles (settling time cycles)
+ that are allowed to pass through the unknown impedance,
+ after each frequency increment, and before the ADC is triggered
+ to perform a conversion sequence of the response signal.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
index 8916f7ec6507..8dbca113112d 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
+++ b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
@@ -13,12 +13,3 @@ Description:
available for reading data. However, samples can be occasionally skipped
or repeated, depending on the beat between the capture and conversion
rates.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
-Date: December 2015
-KernelVersion: 4.4
-Contact: linux-iio@vger.kernel.org
-Description:
- The value of the shunt resistor may be known only at runtime fom an
- eeprom content read by a client application. This attribute allows to
- set its value in ohms.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600 b/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600
new file mode 100644
index 000000000000..7eeacfb7650d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600
@@ -0,0 +1,18 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_power_mode
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Accelerometer power mode. Setting this attribute will set the
+ requested power mode to use if the ODR support it. If ODR
+ support only 1 mode, power mode will be enforced.
+ Reading this attribute will return the current accelerometer
+ power mode if the sensor is on, or the requested value if the
+ sensor is off. The value between real and requested value can
+ be different for ODR supporting only 1 mode.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_power_mode_available
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of available accelerometer power modes that can be set in
+ in_accel_power_mode attribute.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-light-isl29018 b/Documentation/ABI/testing/sysfs-bus-iio-light-isl29018
index f0ce0a0476ea..220206a20d98 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-light-isl29018
+++ b/Documentation/ABI/testing/sysfs-bus-iio-light-isl29018
@@ -15,5 +15,7 @@ Description:
Scheme 0 has wider dynamic range, Scheme 1 proximity detection
is less affected by the ambient IR noise variation.
- 0 Sensing IR from LED and ambient
- 1 Sensing IR from LED with ambient IR rejection
+ == =============================================
+ 0 Sensing IR from LED and ambient
+ 1 Sensing IR from LED with ambient IR rejection
+ == =============================================
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als b/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als
index 22c5ea670971..c476d48d0f82 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als
+++ b/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als
@@ -41,14 +41,6 @@ Description:
Get the current light zone (0..4) as defined by the
in_illuminance0_threshY_{falling,rising} thresholds.
-What: /sys/bus/iio/devices/iio:deviceX/out_currentY_raw
-Date: May 2012
-KernelVersion: 3.5
-Contact: Johan Hovold <jhovold@gmail.com>
-Description:
- Get output current for channel Y (0..255), that is,
- out_currentY_currentZ_raw, where Z is the current zone.
-
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_currentZ_raw
Date: May 2012
KernelVersion: 3.5
@@ -59,3 +51,6 @@ Description:
These values correspond to the ALS-mapper target registers for
ALS-mapper Y + 1.
+
+ Note that out_currentY_raw provides the current for the
+ current zone.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-light-tsl2772 b/Documentation/ABI/testing/sysfs-bus-iio-light-tsl2772
new file mode 100644
index 000000000000..b2798b258bf7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-light-tsl2772
@@ -0,0 +1,13 @@
+What: /sys/bus/iio/devices/device[n]/in_illuminance0_calibrate
+KernelVersion: 3.3-rc1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Causes an internal calibration of the als gain trim
+ value which is later used in calculating illuminance in lux.
+
+What: /sys/bus/iio/devices/device[n]/in_proximity0_calibrate
+KernelVersion: 3.3-rc1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Causes a recalculation and adjustment to the
+ proximity_thresh_rising_value.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32
deleted file mode 100644
index ad2cc63e4bf8..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32
+++ /dev/null
@@ -1,57 +0,0 @@
-What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset
-KernelVersion: 4.13
-Contact: fabrice.gasnier@st.com
-Description:
- Reading returns the current preset value. Writing sets the
- preset value. Encoder counts continuously from 0 to preset
- value, depending on direction (up/down).
-
-What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
-KernelVersion: 4.13
-Contact: fabrice.gasnier@st.com
-Description:
- Reading returns the list possible quadrature modes.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
-KernelVersion: 4.13
-Contact: fabrice.gasnier@st.com
-Description:
- Configure the device counter quadrature modes:
- - non-quadrature:
- Encoder IN1 input servers as the count input (up
- direction).
- - quadrature:
- Encoder IN1 and IN2 inputs are mixed to get direction
- and count.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_count_polarity_available
-KernelVersion: 4.13
-Contact: fabrice.gasnier@st.com
-Description:
- Reading returns the list possible active edges.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_count0_polarity
-KernelVersion: 4.13
-Contact: fabrice.gasnier@st.com
-Description:
- Configure the device encoder/counter active edge:
- - rising-edge
- - falling-edge
- - both-edges
-
- In non-quadrature mode, device counts up on active edge.
- In quadrature mode, encoder counting scenarios are as follows:
- ----------------------------------------------------------------
- | Active | Level on | IN1 signal | IN2 signal |
- | edge | opposite |------------------------------------------
- | | signal | Rising | Falling | Rising | Falling |
- ----------------------------------------------------------------
- | Rising | High -> | Down | - | Up | - |
- | edge | Low -> | Up | - | Down | - |
- ----------------------------------------------------------------
- | Falling | High -> | - | Up | - | Down |
- | edge | Low -> | - | Down | - | Up |
- ----------------------------------------------------------------
- | Both | High -> | Down | Up | Up | Down |
- | edges | Low -> | Up | Down | Down | Up |
- ----------------------------------------------------------------
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-magnetometer-hmc5843 b/Documentation/ABI/testing/sysfs-bus-iio-magnetometer-hmc5843
index 6275e9f56e6c..13f099ef6a95 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-magnetometer-hmc5843
+++ b/Documentation/ABI/testing/sysfs-bus-iio-magnetometer-hmc5843
@@ -5,11 +5,16 @@ Contact: linux-iio@vger.kernel.org
Description:
Current configuration and available configurations
for the bias current.
- normal - Normal measurement configurations (default)
- positivebias - Positive bias configuration
- negativebias - Negative bias configuration
- disabled - Only available on HMC5983. Disables magnetic
+
+ ============ ============================================
+ normal Normal measurement configurations (default)
+ positivebias Positive bias configuration
+ negativebias Negative bias configuration
+ disabled Only available on HMC5983. Disables magnetic
sensor and enables temperature sensor.
- Note: The effect of this configuration may vary
- according to the device. For exact documentation
- check the device's datasheet.
+ ============ ============================================
+
+ Note:
+ The effect of this configuration may vary
+ according to the device. For exact documentation
+ check the device's datasheet.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity b/Documentation/ABI/testing/sysfs-bus-iio-proximity
new file mode 100644
index 000000000000..9b9d1cc9b703
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity
@@ -0,0 +1,24 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_nearlevel
+Date: March 2020
+KernelVersion: 5.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Near level for proximity sensors. This is a single integer
+ value that tells user space when an object should be
+ considered close to the device. If the value read from the
+ sensor is above or equal to the value in this file an object
+ should typically be considered near.
+
+What: /sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
+Date: March 2014
+KernelVersion: 3.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ Proximity sensors sometimes have a controllable amplifier
+ on the signal from which time of flight measurements are
+ taken.
+ The appropriate values to take is dependent on both the
+ sensor and its operating environment:
+ * as3935 (0-31 range)
+ 18 = indoors (default)
+ 14 = outdoors
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
index 9a17ab5036a4..1e5c40775a6c 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
+++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
@@ -1,4 +1,4 @@
-What /sys/bus/iio/devices/iio:deviceX/in_proximity_input
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_input
Date: March 2014
KernelVersion: 3.15
Contact: Matt Ranostay <matt.ranostay@konsulko.com>
@@ -6,15 +6,6 @@ Description:
Get the current distance in meters of storm (1km steps)
1000-40000 = distance in meters
-What /sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
-Date: March 2014
-KernelVersion: 3.15
-Contact: Matt Ranostay <matt.ranostay@konsulko.com>
-Description:
- Show or set the gain boost of the amp, from 0-31 range.
- 18 = indoors (default)
- 14 = outdoors
-
What /sys/bus/iio/devices/iio:deviceX/noise_level_tripped
Date: May 2017
KernelVersion: 4.13
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210 b/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210
new file mode 100644
index 000000000000..f92c79342b93
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210
@@ -0,0 +1,27 @@
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_max
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns the current Degradation of Signal Reset Maximum
+ Threshold value in millivolts. Writing sets the value.
+
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_max_available
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns the allowable voltage range for
+ in_altvoltage0_mag_rising_reset_max.
+
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_min
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns the current Degradation of Signal Reset Minimum
+ Threshold value in millivolts. Writing sets the value.
+
+What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_min_available
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns the allowable voltage range for
+ in_altvoltage0_mag_rising_reset_min.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-sps30 b/Documentation/ABI/testing/sysfs-bus-iio-sps30
new file mode 100644
index 000000000000..06e1c272537b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-sps30
@@ -0,0 +1,28 @@
+What: /sys/bus/iio/devices/iio:deviceX/start_cleaning
+Date: December 2018
+KernelVersion: 5.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing 1 starts sensor self cleaning. Internal fan accelerates
+ to its maximum speed and keeps spinning for about 10 seconds in
+ order to blow out accumulated dust.
+
+What: /sys/bus/iio/devices/iio:deviceX/cleaning_period
+Date: January 2019
+KernelVersion: 5.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Sensor is capable of triggering self cleaning periodically.
+ Period can be changed by writing a new value here. Upon reading
+ the current one is returned. Units are seconds.
+
+ Writing 0 disables periodical self cleaning entirely.
+
+What: /sys/bus/iio/devices/iio:deviceX/cleaning_period_available
+Date: January 2019
+KernelVersion: 5.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ The range of available values in seconds represented as the
+ minimum value, the step and the maximum value, all enclosed in
+ square brackets.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-sx9310 b/Documentation/ABI/testing/sysfs-bus-iio-sx9310
new file mode 100644
index 000000000000..3ac7759013e5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-sx9310
@@ -0,0 +1,10 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity3_comb_raw
+Date: February 2019
+KernelVersion: 5.6
+Contact: Daniel Campello <campello@chromium.org>
+Description:
+ Proximity measurement indicating that some object is
+ near the combined sensor. The combined sensor presents
+ proximity measurements constructed by hardware by
+ combining measurements taken from a given set of
+ physical sensors.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-sx9324 b/Documentation/ABI/testing/sysfs-bus-iio-sx9324
new file mode 100644
index 000000000000..a8342770e7cb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-sx9324
@@ -0,0 +1,29 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity<id>_setup
+Date: November 2021
+KernelVersion: 5.17
+Contact: Gwendal Grignou <gwendal@chromium.org>
+Description:
+ SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
+ defines if the input is
+
+ + not connected (HZ),
+ + grounded (GD),
+ + connected to an antenna where it can act as a base
+ (DS - data shield), or measured input (MI).
+
+ The sensor rotates measurement across 4 phases
+ (PH0, PH1, PH2, PH3), where the inputs are configured
+ and then measured.
+
+ By default, during the first phase, [PH0], CS0 is measured,
+ while CS1 and CS2 are used as shields.
+ `cat in_proximity0_setup` returns "MI,DS,DS".
+ [PH1], CS1 is measured, CS0 and CS2 are shield:
+ `cat in_proximity1_setup` returns "DS,MI,DS".
+ [PH2], CS2 is measured, CS0 and CS1 are shield:
+ `cat in_proximity1_setup` returns "DS,DS,MI".
+ [PH3], CS1 and CS2 are measured (combo mode):
+ `cat in_proximity1_setup` returns "DS,MI,MI".
+
+ Note, these are the chip default. Hardware layout will most
+ likely dictate different output. The entry is read-only.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-thermocouple b/Documentation/ABI/testing/sysfs-bus-iio-thermocouple
new file mode 100644
index 000000000000..01259df297ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-thermocouple
@@ -0,0 +1,18 @@
+What: /sys/bus/iio/devices/iio:deviceX/fault_ovuv
+KernelVersion: 5.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Overvoltage or Undervoltage Input Fault. The internal circuitry
+ is protected from excessive voltages applied to the thermocouple
+ cables. The device can also detect if such a condition occurs.
+
+ Reading returns '1' if input voltage is negative or greater
+ than VDD, otherwise '0'.
+
+What: /sys/bus/iio/devices/iio:deviceX/fault_oc
+KernelVersion: 5.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Open-circuit fault. The detection of open-circuit faults,
+ such as those caused by broken thermocouple wires.
+ Reading returns '1' if fault, '0' otherwise.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
index 161c147d3c40..05074c4a65e2 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
+++ b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
@@ -3,67 +3,85 @@ KernelVersion: 4.11
Contact: benjamin.gaignard@st.com
Description:
Reading returns the list possible master modes which are:
- - "reset" : The UG bit from the TIMx_EGR register is
+
+
+ - "reset"
+ The UG bit from the TIMx_EGR register is
used as trigger output (TRGO).
- - "enable" : The Counter Enable signal CNT_EN is used
+ - "enable"
+ The Counter Enable signal CNT_EN is used
as trigger output.
- - "update" : The update event is selected as trigger output.
+ - "update"
+ The update event is selected as trigger output.
For instance a master timer can then be used
as a prescaler for a slave timer.
- - "compare_pulse" : The trigger output send a positive pulse
- when the CC1IF flag is to be set.
- - "OC1REF" : OC1REF signal is used as trigger output.
- - "OC2REF" : OC2REF signal is used as trigger output.
- - "OC3REF" : OC3REF signal is used as trigger output.
- - "OC4REF" : OC4REF signal is used as trigger output.
+ - "compare_pulse"
+ The trigger output send a positive pulse
+ when the CC1IF flag is to be set.
+ - "OC1REF"
+ OC1REF signal is used as trigger output.
+ - "OC2REF"
+ OC2REF signal is used as trigger output.
+ - "OC3REF"
+ OC3REF signal is used as trigger output.
+ - "OC4REF"
+ OC4REF signal is used as trigger output.
+
Additional modes (on TRGO2 only):
- - "OC5REF" : OC5REF signal is used as trigger output.
- - "OC6REF" : OC6REF signal is used as trigger output.
+
+ - "OC5REF"
+ OC5REF signal is used as trigger output.
+ - "OC6REF"
+ OC6REF signal is used as trigger output.
- "compare_pulse_OC4REF":
- OC4REF rising or falling edges generate pulses.
+ OC4REF rising or falling edges generate pulses.
- "compare_pulse_OC6REF":
- OC6REF rising or falling edges generate pulses.
+ OC6REF rising or falling edges generate pulses.
- "compare_pulse_OC4REF_r_or_OC6REF_r":
- OC4REF or OC6REF rising edges generate pulses.
+ OC4REF or OC6REF rising edges generate pulses.
- "compare_pulse_OC4REF_r_or_OC6REF_f":
- OC4REF rising or OC6REF falling edges generate pulses.
+ OC4REF rising or OC6REF falling edges generate
+ pulses.
- "compare_pulse_OC5REF_r_or_OC6REF_r":
- OC5REF or OC6REF rising edges generate pulses.
+ OC5REF or OC6REF rising edges generate pulses.
- "compare_pulse_OC5REF_r_or_OC6REF_f":
- OC5REF rising or OC6REF falling edges generate pulses.
-
- +-----------+ +-------------+ +---------+
- | Prescaler +-> | Counter | +-> | Master | TRGO(2)
- +-----------+ +--+--------+-+ |-> | Control +-->
- | | || +---------+
- +--v--------+-+ OCxREF || +---------+
- | Chx compare +----------> | Output | ChX
- +-----------+-+ | | Control +-->
- . | | +---------+
- . | | .
- +-----------v-+ OC6REF | .
- | Ch6 compare +---------+>
- +-------------+
-
- Example with: "compare_pulse_OC4REF_r_or_OC6REF_r":
-
- X
- X X
- X . . X
- X . . X
- X . . X
- count X . . . . X
- . . . .
- . . . .
- +---------------+
- OC4REF | . . |
- +-+ . . +-+
- . +---+ .
- OC6REF . | | .
- +-------+ +-------+
- +-+ +-+
- TRGO2 | | | |
- +-+ +---+ +---------+
+ OC5REF rising or OC6REF falling edges generate
+ pulses.
+
+ ::
+
+ +-----------+ +-------------+ +---------+
+ | Prescaler +-> | Counter | +-> | Master | TRGO(2)
+ +-----------+ +--+--------+-+ |-> | Control +-->
+ | | || +---------+
+ +--v--------+-+ OCxREF || +---------+
+ | Chx compare +----------> | Output | ChX
+ +-----------+-+ | | Control +-->
+ . | | +---------+
+ . | | .
+ +-----------v-+ OC6REF | .
+ | Ch6 compare +---------+>
+ +-------------+
+
+ Example with: "compare_pulse_OC4REF_r_or_OC6REF_r"::
+
+ X
+ X X
+ X . . X
+ X . . X
+ X . . X
+ count X . . . . X
+ . . . .
+ . . . .
+ +---------------+
+ OC4REF | . . |
+ +-+ . . +-+
+ . +---+ .
+ OC6REF . | | .
+ +-------+ +-------+
+ +-+ +-+
+ TRGO2 | | | |
+ +-+ +---+ +---------+
What: /sys/bus/iio/devices/triggerX/master_mode
KernelVersion: 4.11
@@ -72,14 +90,6 @@ Description:
Reading returns the current master modes.
Writing set the master mode
-What: /sys/bus/iio/devices/triggerX/sampling_frequency
-KernelVersion: 4.11
-Contact: benjamin.gaignard@st.com
-Description:
- Reading returns the current sampling frequency.
- Writing an value different of 0 set and start sampling.
- Writing 0 stop sampling.
-
What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset
KernelVersion: 4.12
Contact: benjamin.gaignard@st.com
@@ -91,29 +101,6 @@ Description:
When counting down the counter start from preset value
and fire event when reach 0.
-What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
-KernelVersion: 4.12
-Contact: benjamin.gaignard@st.com
-Description:
- Reading returns the list possible quadrature modes.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
-KernelVersion: 4.12
-Contact: benjamin.gaignard@st.com
-Description:
- Configure the device counter quadrature modes:
- channel_A:
- Encoder A input servers as the count input and B as
- the UP/DOWN direction control input.
-
- channel_B:
- Encoder B input serves as the count input and A as
- the UP/DOWN direction control input.
-
- quadrature:
- Encoder A and B inputs are mixed to get direction
- and count with a scale of 0.25.
-
What: /sys/bus/iio/devices/iio:deviceX/in_count_enable_mode_available
KernelVersion: 4.12
Contact: benjamin.gaignard@st.com
@@ -127,6 +114,7 @@ Description:
Configure the device counter enable modes, in all case
counting direction is set by in_count0_count_direction
attribute and the counter is clocked by the internal clock.
+
always:
Counter is always ON.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610
index 308a6756d3bf..491ead804488 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-vf610
+++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610
@@ -1,4 +1,4 @@
-What: /sys/bus/iio/devices/iio:deviceX/conversion_mode
+What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode
KernelVersion: 4.2
Contact: linux-iio@vger.kernel.org
Description:
diff --git a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth
index 22d0843849a8..b7b2278fe042 100644
--- a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth
+++ b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth
@@ -10,10 +10,13 @@ Date: June 2015
KernelVersion: 4.3
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Description: (RO) Output port type:
- 0: not present,
- 1: MSU (Memory Storage Unit)
- 2: CTP (Common Trace Port)
- 4: PTI (MIPI PTI).
+
+ == =========================
+ 0 not present,
+ 1 MSU (Memory Storage Unit)
+ 2 CTP (Common Trace Port)
+ 4 PTI (MIPI PTI).
+ == =========================
What: /sys/bus/intel_th/devices/<intel_th_id>-gth/outputs/[0-7]_drop
Date: June 2015
diff --git a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
index b940c5d91cf7..a74252e580a5 100644
--- a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
+++ b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
@@ -9,10 +9,13 @@ Date: June 2015
KernelVersion: 4.3
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Description: (RW) Configure MSC operating mode:
+
- "single", for contiguous buffer mode (high-order alloc);
- "multi", for multiblock mode;
- "ExI", for DCI handler mode;
- - "debug", for debug mode.
+ - "debug", for debug mode;
+ - any of the currently loaded buffer sinks.
+
If operating mode changes, existing buffer is deallocated,
provided there are no active users and tracing is not enabled,
otherwise the write will fail.
@@ -22,12 +25,30 @@ Date: June 2015
KernelVersion: 4.3
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Description: (RW) Configure MSC buffer size for "single" or "multi" modes.
+
In single mode, this is a single number of pages, has to be
power of 2. In multiblock mode, this is a comma-separated list
of numbers of pages for each window to be allocated. Number of
windows is not limited.
+
Writing to this file deallocates existing buffer (provided
there are no active users and tracing is not enabled) and then
allocates a new one.
+What: /sys/bus/intel_th/devices/<intel_th_id>-msc<msc-id>/win_switch
+Date: May 2019
+KernelVersion: 5.2
+Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description: (RW) Trigger window switch for the MSC's buffer, in
+ multi-window mode. In "multi" mode, accepts writes of "1", thereby
+ triggering a window switch for the buffer. Returns an error in any
+ other operating mode or attempts to write something other than "1".
+
+What: /sys/bus/intel_th/devices/<intel_th_id>-msc<msc-id>/stop_on_full
+Date: March 2020
+KernelVersion: 5.7
+Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description: (RW) Configure whether trace stops when the last available window
+ becomes full (1/y/Y) or wraps around and continues until the next
+ window becomes available again (0/n/N).
diff --git a/Documentation/ABI/testing/sysfs-bus-intel_th-output-devices b/Documentation/ABI/testing/sysfs-bus-intel_th-output-devices
index 4d48a9451866..d1f667104944 100644
--- a/Documentation/ABI/testing/sysfs-bus-intel_th-output-devices
+++ b/Documentation/ABI/testing/sysfs-bus-intel_th-output-devices
@@ -3,11 +3,13 @@ Date: June 2015
KernelVersion: 4.3
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Description: (RW) Writes of 1 or 0 enable or disable trace output to this
- output device. Reads return current status.
+ output device. Reads return current status. Requires that the
+ correstponding output port driver be loaded.
What: /sys/bus/intel_th/devices/<intel_th_id>-msc<msc-id>/port
Date: June 2015
KernelVersion: 4.3
Contact: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Description: (RO) Port number, corresponding to this output device on the
- switch (GTH).
+ switch (GTH) or "unassigned" if the corresponding output
+ port driver is not loaded.
diff --git a/Documentation/ABI/testing/sysfs-bus-mdio b/Documentation/ABI/testing/sysfs-bus-mdio
index 491baaf4285f..38be04dfc05e 100644
--- a/Documentation/ABI/testing/sysfs-bus-mdio
+++ b/Documentation/ABI/testing/sysfs-bus-mdio
@@ -1,29 +1,72 @@
-What: /sys/bus/mdio_bus/devices/.../phy_id
-Date: November 2012
-KernelVersion: 3.8
-Contact: netdev@vger.kernel.org
-Description:
- This attribute contains the 32-bit PHY Identifier as reported
- by the device during bus enumeration, encoded in hexadecimal.
- This ID is used to match the device with the appropriate
- driver.
-
-What: /sys/bus/mdio_bus/devices/.../phy_interface
-Date: February 2014
-KernelVersion: 3.15
-Contact: netdev@vger.kernel.org
-Description:
- This attribute contains the PHY interface as configured by the
- Ethernet driver during bus enumeration, encoded in string.
- This interface mode is used to configure the Ethernet MAC with the
- appropriate mode for its data lines to the PHY hardware.
-
-What: /sys/bus/mdio_bus/devices/.../phy_has_fixups
-Date: February 2014
-KernelVersion: 3.15
-Contact: netdev@vger.kernel.org
-Description:
- This attribute contains the boolean value whether a given PHY
- device has had any "fixup" workaround running on it, encoded as
- a boolean. This information is provided to help troubleshooting
- PHY configurations.
+What: /sys/bus/mdio_bus/devices/.../statistics/
+What: /sys/class/mdio_bus/.../statistics/
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ This folder contains statistics about global and per
+ MDIO bus address statistics.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/transfers
+What: /sys/class/mdio_bus/.../transfers
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of transfers for this MDIO bus.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/errors
+What: /sys/class/mdio_bus/.../statistics/errors
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of transfer errors for this MDIO bus.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/writes
+What: /sys/class/mdio_bus/.../statistics/writes
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of write transactions for this MDIO bus.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/reads
+What: /sys/class/mdio_bus/.../statistics/reads
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of read transactions for this MDIO bus.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/transfers_<addr>
+What: /sys/class/mdio_bus/.../statistics/transfers_<addr>
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of transfers for this MDIO bus address.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/errors_<addr>
+What: /sys/class/mdio_bus/.../statistics/errors_<addr>
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of transfer errors for this MDIO bus address.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/writes_<addr>
+What: /sys/class/mdio_bus/.../statistics/writes_<addr>
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of write transactions for this MDIO bus address.
+
+What: /sys/bus/mdio_bus/devices/.../statistics/reads_<addr>
+What: /sys/class/mdio_bus/.../statistics/reads_<addr>
+Date: January 2020
+KernelVersion: 5.6
+Contact: netdev@vger.kernel.org
+Description:
+ Total number of read transactions for this MDIO bus address.
diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei
index 6bd45346ac7e..6e9a105fe5cb 100644
--- a/Documentation/ABI/testing/sysfs-bus-mei
+++ b/Documentation/ABI/testing/sysfs-bus-mei
@@ -4,7 +4,7 @@ KernelVersion: 3.10
Contact: Samuel Ortiz <sameo@linux.intel.com>
linux-mei@linux.intel.com
Description: Stores the same MODALIAS value emitted by uevent
- Format: mei:<mei device name>:<device uuid>:
+ Format: mei:<mei device name>:<device uuid>:<protocol version>
What: /sys/bus/mei/devices/.../name
Date: May 2015
@@ -26,3 +26,31 @@ KernelVersion: 4.3
Contact: Tomas Winkler <tomas.winkler@intel.com>
Description: Stores mei client protocol version
Format: %d
+
+What: /sys/bus/mei/devices/.../max_conn
+Date: Nov 2019
+KernelVersion: 5.5
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Stores mei client maximum number of connections
+ Format: %d
+
+What: /sys/bus/mei/devices/.../fixed
+Date: Nov 2019
+KernelVersion: 5.5
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Stores mei client fixed address, if any
+ Format: %d
+
+What: /sys/bus/mei/devices/.../vtag
+Date: Nov 2020
+KernelVersion: 5.9
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Stores mei client vtag support status
+ Format: %d
+
+What: /sys/bus/mei/devices/.../max_len
+Date: Nov 2019
+KernelVersion: 5.5
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Stores mei client maximum message length
+ Format: %d
diff --git a/Documentation/ABI/testing/sysfs-bus-most b/Documentation/ABI/testing/sysfs-bus-most
new file mode 100644
index 000000000000..38cc03e408e7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-most
@@ -0,0 +1,299 @@
+What: /sys/bus/most/devices/<dev>/description
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Provides information about the physical location of the
+ device. Hardware attached via USB, for instance,
+ might return <1-1.1:1.0>
+Users:
+
+What: /sys/bus/most/devices/<dev>/interface
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the type of peripheral interface the device uses.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ If the network interface controller is attached via USB, a dci
+ directory is created that allows applications to read and
+ write the controller's DCI registers.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/arb_address
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to set an arbitrary DCI register address an
+ application wants to read from or write to.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/arb_value
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to read and write the DCI register whose address
+ is stored in arb_address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_eui48_hi
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MAC address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_eui48_lo
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MAC address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_eui48_mi
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MAC address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_filter
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MEP filter address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_hash0
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MEP hash table.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_hash1
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MEP hash table.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_hash2
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MEP hash table.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/mep_hash3
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to check and configure the MEP hash table.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/ni_state
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the current network interface state.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/node_address
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the current node address.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/node_position
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the current node position.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/packet_bandwidth
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the configured packet bandwidth.
+Users:
+
+What: /sys/bus/most/devices/<dev>/dci/sync_ep
+Date: June 2016
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Triggers the controller's synchronization process for a certain
+ endpoint.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ For every channel of the device a directory is created, whose
+ name is dictated by the HDM. This enables an application to
+ collect information about the channel's capabilities and
+ configure it.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/available_datatypes
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the data types the channel can transport.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/available_directions
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the directions the channel is capable of.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/number_of_packet_buffers
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the number of packet buffers the channel can
+ handle.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/number_of_stream_buffers
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the number of streaming buffers the channel can
+ handle.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/size_of_packet_buffer
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the size of a packet buffer the channel can
+ handle.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/size_of_stream_buffer
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates the size of a streaming buffer the channel can
+ handle.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_number_of_buffers
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured number of buffers of
+ the channel.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_buffer_size
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured buffer size of the channel.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_direction
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured direction of the channel.
+ The following strings will be accepted::
+
+ 'tx',
+ 'rx'
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_datatype
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured data type of the channel.
+ The following strings will be accepted::
+
+ 'control',
+ 'async',
+ 'sync',
+ 'isoc_avp'
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_subbuffer_size
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured subbuffer size of
+ the channel.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/set_packets_per_xact
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is to read back the configured number of packets per
+ transaction of the channel. This is only applicable when
+ connected via USB.
+Users:
+
+What: /sys/bus/most/devices/<dev>/<channel>/channel_starving
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ Indicates whether channel ran out of buffers.
+Users:
+
+What: /sys/bus/most/drivers/most_core/components
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to retrieve a list of registered components.
+Users:
+
+What: /sys/bus/most/drivers/most_core/links
+Date: March 2017
+KernelVersion: 4.15
+Contact: Christian Gromm <christian.gromm@microchip.com>
+Description:
+ This is used to retrieve a list of established links.
+Users:
diff --git a/Documentation/ABI/testing/sysfs-bus-moxtet-devices b/Documentation/ABI/testing/sysfs-bus-moxtet-devices
new file mode 100644
index 000000000000..32dccc00d57d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-moxtet-devices
@@ -0,0 +1,17 @@
+What: /sys/bus/moxtet/devices/moxtet-<name>.<addr>/module_description
+Date: March 2019
+KernelVersion: 5.3
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Moxtet module description. Format: string
+
+What: /sys/bus/moxtet/devices/moxtet-<name>.<addr>/module_id
+Date: March 2019
+KernelVersion: 5.3
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Moxtet module ID. Format: %x
+
+What: /sys/bus/moxtet/devices/moxtet-<name>.<addr>/module_name
+Date: March 2019
+KernelVersion: 5.3
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Moxtet module name. Format: string
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit
index a1cb44dcb908..ed483a11c58c 100644
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -1,11 +1,11 @@
-For all of the nmem device attributes under nfit/*, see the 'NVDIMM Firmware
+For all of the nmem device attributes under ``nfit/*``, see the 'NVDIMM Firmware
Interface Table (NFIT)' section in the ACPI specification
(http://www.uefi.org/specifications) for more details.
What: /sys/bus/nd/devices/nmemX/nfit/serial
Date: Jun, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Serial number of the NVDIMM (non-volatile dual in-line
memory module), assigned by the module vendor.
@@ -14,7 +14,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/handle
Date: Apr, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) The address (given by the _ADR object) of the device on its
parent bus of the NVDIMM device containing the NVDIMM region.
@@ -23,7 +23,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/device
Date: Apr, 2015
KernelVersion: v4.1
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Device id for the NVDIMM, assigned by the module vendor.
@@ -31,7 +31,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/rev_id
Date: Jun, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Revision of the NVDIMM, assigned by the module vendor.
@@ -39,7 +39,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/phys_id
Date: Apr, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Handle (i.e., instance number) for the SMBIOS (system
management BIOS) Memory Device structure describing the NVDIMM
@@ -49,7 +49,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/flags
Date: Jun, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) The flags in the NFIT memory device sub-structure indicate
the state of the data on the nvdimm relative to its energy
@@ -68,7 +68,7 @@ What: /sys/bus/nd/devices/nmemX/nfit/format1
What: /sys/bus/nd/devices/nmemX/nfit/formats
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) The interface codes indicate support for persistent memory
mapped directly into system physical address space and / or a
@@ -84,7 +84,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/vendor
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Vendor id of the NVDIMM.
@@ -92,7 +92,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/dsm_mask
Date: May, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) The bitmask indicates the supported device specific control
functions relative to the NVDIMM command family supported by the
@@ -102,7 +102,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/family
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Displays the NVDIMM family command sets. Values
0, 1, 2 and 3 correspond to NVDIMM_FAMILY_INTEL,
@@ -118,16 +118,16 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/id
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) ACPI specification 6.2 section 5.2.25.9, defines an
- identifier for an NVDIMM, which refelects the id attribute.
+ identifier for an NVDIMM, which reflects the id attribute.
What: /sys/bus/nd/devices/nmemX/nfit/subsystem_vendor
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Sub-system vendor id of the NVDIMM non-volatile memory
subsystem controller.
@@ -136,7 +136,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/subsystem_rev_id
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Sub-system revision id of the NVDIMM non-volatile memory subsystem
controller, assigned by the non-volatile memory subsystem
@@ -146,7 +146,7 @@ Description:
What: /sys/bus/nd/devices/nmemX/nfit/subsystem_device
Date: Apr, 2016
KernelVersion: v4.7
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) Sub-system device id for the NVDIMM non-volatile memory
subsystem controller, assigned by the non-volatile memory
@@ -156,7 +156,7 @@ Description:
What: /sys/bus/nd/devices/ndbusX/nfit/revision
Date: Jun, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) ACPI NFIT table revision number.
@@ -164,7 +164,7 @@ Description:
What: /sys/bus/nd/devices/ndbusX/nfit/scrub
Date: Sep, 2016
KernelVersion: v4.9
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RW) This shows the number of full Address Range Scrubs (ARS)
that have been completed since driver load time. Userspace can
@@ -177,7 +177,7 @@ Description:
What: /sys/bus/nd/devices/ndbusX/nfit/hw_error_scrub
Date: Sep, 2016
KernelVersion: v4.9
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RW) Provides a way to toggle the behavior between just adding
the address (cache line) where the MCE happened to the poison
@@ -196,17 +196,36 @@ Description:
What: /sys/bus/nd/devices/ndbusX/nfit/dsm_mask
Date: Jun, 2017
KernelVersion: v4.13
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) The bitmask indicates the supported bus specific control
functions. See the section named 'NVDIMM Root Device _DSMs' in
the ACPI specification.
+What: /sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle
+Date: Apr, 2020
+KernelVersion: v5.8
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) The Intel platform implementation of firmware activate
+ support exposes an option let the platform force idle devices in
+ the system over the activation event, or trust that the OS will
+ do it. The safe default is to let the platform force idle
+ devices since the kernel is already in a suspend state, and on
+ the chance that a driver does not properly quiesce bus-mastering
+ after a suspend callback the platform will handle it. However,
+ the activation might abort if, for example, platform firmware
+ determines that the activation time exceeds the max PCI-E
+ completion timeout. Since the platform does not know whether the
+ OS is running the activation from a suspend context it aborts,
+ but if the system owner trusts driver suspend callback to be
+ sufficient then 'firmware_activation_noidle' can be
+ enabled to bypass the activation abort.
What: /sys/bus/nd/devices/regionX/nfit/range_index
Date: Jun, 2015
KernelVersion: v4.2
-Contact: linux-nvdimm@lists.01.org
+Contact: nvdimm@lists.linux.dev
Description:
(RO) A unique number provided by the BIOS to identify an address
range. Used by NVDIMM Region Mapping Structure to uniquely refer
diff --git a/Documentation/ABI/testing/sysfs-bus-nvdimm b/Documentation/ABI/testing/sysfs-bus-nvdimm
new file mode 100644
index 000000000000..64eb8f4c6a41
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-nvdimm
@@ -0,0 +1,59 @@
+What: nvdimm
+Date: July 2020
+KernelVersion: 5.8
+Contact: Dan Williams <dan.j.williams@intel.com>
+Description:
+
+The libnvdimm sub-system implements a common sysfs interface for
+platform nvdimm resources. See Documentation/driver-api/nvdimm/.
+
+What: /sys/bus/event_source/devices/nmemX/format
+Date: February 2022
+KernelVersion: 5.18
+Contact: Kajol Jain <kjain@linux.ibm.com>
+Description: (RO) Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. Supported attribute is listed
+ below::
+
+ event = "config:0-4" - event ID
+
+ For example::
+
+ ctl_res_cnt = "event=0x1"
+
+What: /sys/bus/event_source/devices/nmemX/events
+Date: February 2022
+KernelVersion: 5.18
+Contact: Kajol Jain <kjain@linux.ibm.com>
+Description: (RO) Attribute group to describe performance monitoring events
+ for the nvdimm memory device. Each attribute in this group
+ describes a single performance monitoring event supported by
+ this nvdimm pmu. The name of the file is the name of the event.
+ (See ABI/testing/sysfs-bus-event_source-devices-events). A
+ listing of the events supported by a given nvdimm provider type
+ can be found in Documentation/driver-api/nvdimm/$provider.
+
+What: /sys/bus/event_source/devices/nmemX/cpumask
+Date: February 2022
+KernelVersion: 5.18
+Contact: Kajol Jain <kjain@linux.ibm.com>
+Description: (RO) This sysfs file exposes the cpumask which is designated to
+ to retrieve nvdimm pmu event counter data.
+
+What: /sys/bus/nd/devices/nmemX/cxl/id
+Date: November 2022
+KernelVersion: 6.2
+Contact: Dave Jiang <dave.jiang@intel.com>
+Description: (RO) Show the id (serial) of the device. This is CXL specific.
+
+What: /sys/bus/nd/devices/nmemX/cxl/provider
+Date: November 2022
+KernelVersion: 6.2
+Contact: Dave Jiang <dave.jiang@intel.com>
+Description: (RO) Shows the CXL bridge device that ties to a CXL memory device
+ to this NVDIMM device. I.e. the parent of the device returned is
+ a /sys/bus/cxl/devices/memX instance.
diff --git a/Documentation/ABI/testing/sysfs-bus-optee-devices b/Documentation/ABI/testing/sysfs-bus-optee-devices
new file mode 100644
index 000000000000..af31e5a22d89
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-optee-devices
@@ -0,0 +1,17 @@
+What: /sys/bus/tee/devices/optee-ta-<uuid>/
+Date: May 2020
+KernelVersion 5.8
+Contact: op-tee@lists.trustedfirmware.org
+Description:
+ OP-TEE bus provides reference to registered drivers under this directory. The <uuid>
+ matches Trusted Application (TA) driver and corresponding TA in secure OS. Drivers
+ are free to create needed API under optee-ta-<uuid> directory.
+
+What: /sys/bus/tee/devices/optee-ta-<uuid>/need_supplicant
+Date: November 2023
+KernelVersion: 6.7
+Contact: op-tee@lists.trustedfirmware.org
+Description:
+ Allows to distinguish whether an OP-TEE based TA/device requires user-space
+ tee-supplicant to function properly or not. This attribute will be present for
+ devices which depend on tee-supplicant to be running.
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem
new file mode 100644
index 000000000000..34ee8c59ab25
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -0,0 +1,75 @@
+What: /sys/bus/nd/devices/nmemX/papr/flags
+Date: Apr, 2020
+KernelVersion: v5.8
+Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
+Description:
+ (RO) Report flags indicating various states of a
+ papr-pmem NVDIMM device. Each flag maps to a one or
+ more bits set in the dimm-health-bitmap retrieved in
+ response to H_SCM_HEALTH hcall. The details of the bit
+ flags returned in response to this hcall is available
+ at 'Documentation/arch/powerpc/papr_hcalls.rst' . Below are
+ the flags reported in this sysfs file:
+
+ * "not_armed"
+ Indicates that NVDIMM contents will not
+ survive a power cycle.
+ * "flush_fail"
+ Indicates that NVDIMM contents
+ couldn't be flushed during last
+ shut-down event.
+ * "restore_fail"
+ Indicates that NVDIMM contents
+ couldn't be restored during NVDIMM
+ initialization.
+ * "encrypted"
+ NVDIMM contents are encrypted.
+ * "smart_notify"
+ There is health event for the NVDIMM.
+ * "scrubbed"
+ Indicating that contents of the
+ NVDIMM have been scrubbed.
+ * "locked"
+ Indicating that NVDIMM contents can't
+ be modified until next power cycle.
+
+What: /sys/bus/nd/devices/nmemX/papr/perf_stats
+Date: May, 2020
+KernelVersion: v5.9
+Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
+Description:
+ (RO) Report various performance stats related to papr-scm NVDIMM
+ device. This attribute is only available for NVDIMM devices
+ that support reporting NVDIMM performance stats. Each stat is
+ reported on a new line with each line composed of a
+ stat-identifier followed by it value. Below are currently known
+ dimm performance stats which are reported:
+
+ * "CtlResCt" : Controller Reset Count
+ * "CtlResTm" : Controller Reset Elapsed Time
+ * "PonSecs " : Power-on Seconds
+ * "MemLife " : Life Remaining
+ * "CritRscU" : Critical Resource Utilization
+ * "HostLCnt" : Host Load Count
+ * "HostSCnt" : Host Store Count
+ * "HostSDur" : Host Store Duration
+ * "HostLDur" : Host Load Duration
+ * "MedRCnt " : Media Read Count
+ * "MedWCnt " : Media Write Count
+ * "MedRDur " : Media Read Duration
+ * "MedWDur " : Media Write Duration
+ * "CchRHCnt" : Cache Read Hit Count
+ * "CchWHCnt" : Cache Write Hit Count
+ * "FastWCnt" : Fast Write Count
+
+What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject
+Date: Jan, 2022
+KernelVersion: v5.17
+Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
+Description:
+ (RO) Reports the health bitmap inject bitmap that is applied to
+ bitmap received from PowerVM via the H_SCM_HEALTH. This is used
+ to forcibly set specific bits returned from Hcall. These is then
+ used to simulate various health or shutdown states for an nvdimm
+ and are set by user-space tools like ndctl by issuing a PAPR DSM.
+
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 8bfee557e50e..92debe879ffb 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -1,4 +1,5 @@
What: /sys/bus/pci/drivers/.../bind
+What: /sys/devices/pciX/.../bind
Date: December 2003
Contact: linux-pci@vger.kernel.org
Description:
@@ -7,11 +8,14 @@ Description:
this location. This is useful for overriding default
bindings. The format for the location is: DDDD:BB:DD.F.
That is Domain:Bus:Device.Function and is the same as
- found in /sys/bus/pci/devices/. For example:
- # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/bind
+ found in /sys/bus/pci/devices/. For example::
+
+ # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/bind
+
(Note: kernels before 2.6.28 may require echo -n).
What: /sys/bus/pci/drivers/.../unbind
+What: /sys/devices/pciX/.../unbind
Date: December 2003
Contact: linux-pci@vger.kernel.org
Description:
@@ -20,11 +24,14 @@ Description:
this location. This may be useful when overriding default
bindings. The format for the location is: DDDD:BB:DD.F.
That is Domain:Bus:Device.Function and is the same as
- found in /sys/bus/pci/devices/. For example:
- # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/unbind
+ found in /sys/bus/pci/devices/. For example::
+
+ # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/unbind
+
(Note: kernels before 2.6.28 may require echo -n).
What: /sys/bus/pci/drivers/.../new_id
+What: /sys/devices/pciX/.../new_id
Date: December 2003
Contact: linux-pci@vger.kernel.org
Description:
@@ -38,10 +45,12 @@ Description:
Class, Class Mask, and Private Driver Data. The Vendor ID
and Device ID fields are required, the rest are optional.
Upon successfully adding an ID, the driver will probe
- for the device and attempt to bind to it. For example:
- # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
+ for the device and attempt to bind to it. For example::
+
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
What: /sys/bus/pci/drivers/.../remove_id
+What: /sys/devices/pciX/.../remove_id
Date: February 2009
Contact: Chris Wright <chrisw@sous-sol.org>
Description:
@@ -54,8 +63,9 @@ Description:
required, the rest are optional. After successfully
removing an ID, the driver will no longer support the
device. This is useful to ensure auto probing won't
- match the driver to the device. For example:
- # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+ match the driver to the device. For example::
+
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
What: /sys/bus/pci/rescan
Date: January 2009
@@ -90,6 +100,17 @@ Description:
This attribute indicates the mode that the irq vector named by
the file is in (msi vs. msix)
+What: /sys/bus/pci/devices/.../irq
+Date: August 2021
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ If a driver has enabled MSI (not MSI-X), "irq" contains the
+ IRQ of the first MSI vector. Otherwise "irq" contains the
+ IRQ of the legacy INTx interrupt.
+
+ "irq" being set to 0 indicates that the device isn't
+ capable of generating legacy INTx interrupts.
+
What: /sys/bus/pci/devices/.../remove
Date: January 2009
Contact: Linux PCI developers <linux-pci@vger.kernel.org>
@@ -115,6 +136,23 @@ Description:
child buses, and re-discover devices removed earlier
from this part of the device tree.
+What: /sys/bus/pci/devices/.../reset_method
+Date: August 2021
+Contact: Amey Narkhede <ameynarkhede03@gmail.com>
+Description:
+ Some devices allow an individual function to be reset
+ without affecting other functions in the same slot.
+
+ For devices that have this support, a file named
+ reset_method is present in sysfs. Reading this file
+ gives names of the supported and enabled reset methods and
+ their ordering. Writing a space-separated list of names of
+ reset methods sets the reset methods and ordering to be
+ used when resetting the device. Writing an empty string
+ disables the ability to reset the device. Writing
+ "default" enables all supported reset methods in the
+ default ordering.
+
What: /sys/bus/pci/devices/.../reset
Date: July 2009
Contact: Michael S. Tsirkin <mst@redhat.com>
@@ -125,6 +163,17 @@ Description:
will be present in sysfs. Writing 1 to this file
will perform reset.
+What: /sys/bus/pci/devices/.../reset_subordinate
+Date: October 2024
+Contact: linux-pci@vger.kernel.org
+Description:
+ This is visible only for bridge devices. If you want to reset
+ all devices attached through the subordinate bus of a specific
+ bridge device, writing 1 to this will try to do it. This will
+ affect all devices attached to the system through this bridge
+ similiar to writing 1 to their individual "reset" file, so use
+ with caution.
+
What: /sys/bus/pci/devices/.../vpd
Date: February 2008
Contact: Ben Hutchings <bwh@kernel.org>
@@ -133,11 +182,11 @@ Description:
binary file containing the Vital Product Data for the
device. It should follow the VPD format defined in
PCI Specification 2.1 or 2.2, but users should consider
- that some devices may have malformatted data. If the
- underlying VPD has a writable section then the
+ that some devices may have incorrectly formatted data.
+ If the underlying VPD has a writable section then the
corresponding section of this file will be writable.
-What: /sys/bus/pci/devices/.../virtfnN
+What: /sys/bus/pci/devices/.../virtfn<N>
Date: March 2009
Contact: Yu Zhao <yu.zhao@intel.com>
Description:
@@ -164,6 +213,24 @@ Description:
The symbolic link points to the PCI device sysfs entry of the
Physical Function this device associates with.
+What: /sys/bus/pci/devices/.../modalias
+Date: May 2005
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ This attribute indicates the PCI ID of the device object.
+
+ That is in the format:
+ pci:vXXXXXXXXdXXXXXXXXsvXXXXXXXXsdXXXXXXXXbcXXscXXiXX,
+ where:
+
+ - vXXXXXXXX contains the vendor ID;
+ - dXXXXXXXX contains the device ID;
+ - svXXXXXXXX contains the sub-vendor ID;
+ - sdXXXXXXXX contains the subsystem device ID;
+ - bcXX contains the device class;
+ - scXX contains the device subclass;
+ - iXX contains the device class programming interface.
+
What: /sys/bus/pci/slots/.../module
Date: June 2009
Contact: linux-pci@vger.kernel.org
@@ -189,10 +256,13 @@ What: /sys/bus/pci/devices/.../index
Date: July 2010
Contact: Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
Description:
- Reading this attribute will provide the firmware
- given instance (SMBIOS type 41 device type instance) of the
- PCI device. The attribute will be created only if the firmware
- has given an instance number to the PCI device.
+ Reading this attribute will provide the firmware given instance
+ number of the PCI device. Depending on the platform this can
+ be for example the SMBIOS type 41 device type instance or the
+ user-defined ID (UID) on s390. The attribute will be created
+ only if the firmware has given an instance number to the PCI
+ device and that number is guaranteed to uniquely identify the
+ device in the system.
Users:
Userspace applications interested in knowing the
firmware assigned device type instance of the PCI
@@ -347,3 +417,207 @@ Description:
If the device has any Peer-to-Peer memory registered, this
file contains a '1' if the memory has been published for
use outside the driver that owns the device.
+
+What: /sys/bus/pci/devices/.../p2pmem/allocate
+Date: August 2022
+Contact: Logan Gunthorpe <logang@deltatee.com>
+Description:
+ This file allows mapping p2pmem into userspace. For each
+ mmap() call on this file, the kernel will allocate a chunk
+ of Peer-to-Peer memory for use in Peer-to-Peer transactions.
+ This memory can be used in O_DIRECT calls to NVMe backed
+ files for Peer-to-Peer copies.
+
+What: /sys/bus/pci/devices/.../link/clkpm
+ /sys/bus/pci/devices/.../link/l0s_aspm
+ /sys/bus/pci/devices/.../link/l1_aspm
+ /sys/bus/pci/devices/.../link/l1_1_aspm
+ /sys/bus/pci/devices/.../link/l1_2_aspm
+ /sys/bus/pci/devices/.../link/l1_1_pcipm
+ /sys/bus/pci/devices/.../link/l1_2_pcipm
+Date: October 2019
+Contact: Heiner Kallweit <hkallweit1@gmail.com>
+Description: If ASPM is supported for an endpoint, these files can be
+ used to disable or enable the individual power management
+ states. Write y/1/on to enable, n/0/off to disable.
+
+What: /sys/bus/pci/devices/.../power_state
+Date: November 2020
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ This file contains the current PCI power state of the device.
+ The value comes from the PCI kernel device state and can be one
+ of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
+ The file is read only.
+
+What: /sys/bus/pci/devices/.../sriov_vf_total_msix
+Date: January 2021
+Contact: Leon Romanovsky <leonro@nvidia.com>
+Description:
+ This file is associated with a SR-IOV physical function (PF).
+ It contains the total number of MSI-X vectors available for
+ assignment to all virtual functions (VFs) associated with PF.
+ The value will be zero if the device doesn't support this
+ functionality. For supported devices, the value will be
+ constant and won't be changed after MSI-X vectors assignment.
+
+What: /sys/bus/pci/devices/.../sriov_vf_msix_count
+Date: January 2021
+Contact: Leon Romanovsky <leonro@nvidia.com>
+Description:
+ This file is associated with a SR-IOV virtual function (VF).
+ It allows configuration of the number of MSI-X vectors for
+ the VF. This allows devices that have a global pool of MSI-X
+ vectors to optimally divide them between VFs based on VF usage.
+
+ The values accepted are:
+ * > 0 - this number will be reported as the Table Size in the
+ VF's MSI-X capability
+ * < 0 - not valid
+ * = 0 - will reset to the device default value
+
+ The file is writable if the PF is bound to a driver that
+ implements ->sriov_set_msix_vec_count().
+
+What: /sys/bus/pci/devices/.../resourceN_resize
+Date: September 2022
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ These files provide an interface to PCIe Resizable BAR support.
+ A file is created for each BAR resource (N) supported by the
+ PCIe Resizable BAR extended capability of the device. Reading
+ each file exposes the bitmap of available resource sizes:
+
+ # cat resource1_resize
+ 00000000000001c0
+
+ The bitmap represents supported resource sizes for the BAR,
+ where bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc. In the above
+ example the device supports 64MB, 128MB, and 256MB BAR sizes.
+
+ When writing the file, the user provides the bit position of
+ the desired resource size, for example:
+
+ # echo 7 > resource1_resize
+
+ This indicates to set the size value corresponding to bit 7,
+ 128MB. The resulting size is 2 ^ (bit# + 20). This definition
+ matches the PCIe specification of this capability.
+
+ In order to make use of resource resizing, all PCI drivers must
+ be unbound from the device and peer devices under the same
+ parent bridge may need to be soft removed. In the case of
+ VGA devices, writing a resize value will remove low level
+ console drivers from the device. Raw users of pci-sysfs
+ resourceN attributes must be terminated prior to resizing.
+ Success of the resizing operation is not guaranteed.
+
+What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
+What: /sys/class/leds/*:enclosure:*/brightness
+Date: August 2024
+KernelVersion: 6.12
+Description:
+ LED indications on PCIe storage enclosures which are controlled
+ through the NPEM interface (Native PCIe Enclosure Management,
+ PCIe r6.1 sec 6.28) are accessible as led class devices, both
+ below /sys/class/leds and below NPEM-capable PCI devices.
+
+ Although these led class devices could be manipulated manually,
+ in practice they are typically manipulated automatically by an
+ application such as ledmon(8).
+
+ The name of a led class device is as follows:
+ <bdf>:enclosure:<indication>
+ where:
+
+ - <bdf> is the domain, bus, device and function number
+ (e.g. 10000:02:05.0)
+ - <indication> is a short description of the LED indication
+
+ Valid indications per PCIe r6.1 table 6-27 are:
+
+ - ok (drive is functioning normally)
+ - locate (drive is being identified by an admin)
+ - fail (drive is not functioning properly)
+ - rebuild (drive is part of an array that is rebuilding)
+ - pfa (drive is predicted to fail soon)
+ - hotspare (drive is marked to be used as a replacement)
+ - ica (drive is part of an array that is degraded)
+ - ifa (drive is part of an array that is failed)
+ - idt (drive is not the right type for the connector)
+ - disabled (drive is disabled, removal is safe)
+ - specific0 to specific7 (enclosure-specific indications)
+
+ Broadly, the indications fall into one of these categories:
+
+ - to signify drive state (ok, locate, fail, idt, disabled)
+ - to signify drive role or state in a software RAID array
+ (rebuild, pfa, hotspare, ica, ifa)
+ - to signify any other role or state (specific0 to specific7)
+
+ Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
+ ok, locate, fail, rebuild. All others are optional.
+ A led class device is only visible if the corresponding
+ indication is supported by the device.
+
+ To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
+ to the "brightness" file. Note that manipulating an indication
+ may implicitly manipulate other indications at the vendor's
+ discretion. E.g. when the user lights up the "ok" indication,
+ the vendor may choose to automatically turn off the "fail"
+ indication. The current state of an indication can be
+ retrieved by reading its "brightness" file.
+
+ The PCIe Base Specification allows vendors leeway to choose
+ different colors or blinking patterns for the indications,
+ but they typically follow the IBPI standard. E.g. the "locate"
+ indication is usually presented as one or two LEDs blinking at
+ 4 Hz frequency:
+ https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
+
+ PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
+ to facilitate shared access by operating system and platform
+ firmware to a device's NPEM registers. The kernel will use
+ this DSM interface where available, instead of accessing NPEM
+ registers directly. The DSM interface does not support the
+ enclosure-specific indications "specific0" to "specific7",
+ hence the corresponding led class devices are unavailable if
+ the DSM interface is used.
+
+What: /sys/bus/pci/devices/.../doe_features
+Date: March 2025
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ This directory contains a list of the supported Data Object
+ Exchange (DOE) features. The features are the file name.
+ The contents of each file is the raw Vendor ID and data
+ object feature values.
+
+ The value comes from the device and specifies the vendor and
+ data object type supported. The lower (RHS of the colon) is
+ the data object type in hex. The upper (LHS of the colon)
+ is the vendor ID.
+
+ As all DOE devices must support the DOE discovery feature,
+ if DOE is supported you will at least see the doe_discovery
+ file, with this contents:
+
+ # cat doe_features/doe_discovery
+ 0001:00
+
+ If the device supports other features you will see other
+ files as well. For example if CMA/SPDM and secure CMA/SPDM
+ are supported the doe_features directory will look like
+ this:
+
+ # ls doe_features
+ 0001:01 0001:02 doe_discovery
+
+What: /sys/bus/pci/devices/.../serial_number
+Date: December 2025
+Contact: Matthew Wood <thepacketgeek@gmail.com>
+Description:
+ This is visible only for PCI devices that support the serial
+ number extended capability. The file is read only and due to
+ the possible sensitivity of accessible serial numbers, admin
+ only.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
new file mode 100644
index 000000000000..5ed284523956
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
@@ -0,0 +1,163 @@
+PCIe Device AER statistics
+--------------------------
+
+These attributes show up under all the devices that are AER capable. These
+statistical counters indicate the errors "as seen/reported by the device".
+Note that this may mean that if an endpoint is causing problems, the AER
+counters may increment at its link partner (e.g. root port) because the
+errors may be "seen" / reported by the link partner and not the
+problematic endpoint itself (which may report all counters as 0 as it never
+saw any problems).
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_correctable
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of correctable errors seen and reported by this
+ PCI device using ERR_COR. Note that since multiple errors may
+ be reported using a single ERR_COR message, thus
+ TOTAL_ERR_COR at the end of the file may not match the actual
+ total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_correctable
+ Receiver Error 2
+ Bad TLP 0
+ Bad DLLP 0
+ RELAY_NUM Rollover 0
+ Replay Timer Timeout 0
+ Advisory Non-Fatal 0
+ Corrected Internal Error 0
+ Header Log Overflow 0
+ TOTAL_ERR_COR 2
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_fatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of uncorrectable fatal errors seen and reported by this
+ PCI device using ERR_FATAL. Note that since multiple errors may
+ be reported using a single ERR_FATAL message, thus
+ TOTAL_ERR_FATAL at the end of the file may not match the actual
+ total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_fatal
+ Undefined 0
+ Data Link Protocol 0
+ Surprise Down Error 0
+ Poisoned TLP 0
+ Flow Control Protocol 0
+ Completion Timeout 0
+ Completer Abort 0
+ Unexpected Completion 0
+ Receiver Overflow 0
+ Malformed TLP 0
+ ECRC 0
+ Unsupported Request 0
+ ACS Violation 0
+ Uncorrectable Internal Error 0
+ MC Blocked TLP 0
+ AtomicOp Egress Blocked 0
+ TLP Prefix Blocked Error 0
+ TOTAL_ERR_FATAL 0
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of uncorrectable nonfatal errors seen and reported by this
+ PCI device using ERR_NONFATAL. Note that since multiple errors
+ may be reported using a single ERR_FATAL message, thus
+ TOTAL_ERR_NONFATAL at the end of the file may not match the
+ actual total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_nonfatal
+ Undefined 0
+ Data Link Protocol 0
+ Surprise Down Error 0
+ Poisoned TLP 0
+ Flow Control Protocol 0
+ Completion Timeout 0
+ Completer Abort 0
+ Unexpected Completion 0
+ Receiver Overflow 0
+ Malformed TLP 0
+ ECRC 0
+ Unsupported Request 0
+ ACS Violation 0
+ Uncorrectable Internal Error 0
+ MC Blocked TLP 0
+ AtomicOp Egress Blocked 0
+ TLP Prefix Blocked Error 0
+ TOTAL_ERR_NONFATAL 0
+
+PCIe Rootport AER statistics
+----------------------------
+
+These attributes show up under only the rootports (or root complex event
+collectors) that are AER capable. These indicate the number of error messages as
+"reported to" the rootport. Please note that the rootports also transmit
+(internally) the ERR_* messages for errors seen by the internal rootport PCI
+device, so these counters include them and are thus cumulative of all the error
+messages on the PCI hierarchy originating at that root port.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_cor
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_COR messages reported to rootport.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_fatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_FATAL messages reported to rootport.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_nonfatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_NONFATAL messages reported to rootport.
+
+PCIe AER ratelimits
+-------------------
+
+These attributes show up under all the devices that are AER capable.
+They represent configurable ratelimits of logs per error type.
+
+See Documentation/PCI/pcieaer-howto.rst for more info on ratelimits.
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER correctable error log ratelimiting.
+ Writing a positive value sets the ratelimit interval in ms.
+ Default is DEFAULT_RATELIMIT_INTERVAL (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for correctable error logs. Writing a value
+ changes the number of errors (burst) allowed per interval
+ before ratelimiting. Reading gets the current ratelimit
+ burst. Default is DEFAULT_RATELIMIT_BURST (10).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER non-fatal uncorrectable error log
+ ratelimiting. Writing a positive value sets the ratelimit
+ interval in ms. Default is DEFAULT_RATELIMIT_INTERVAL
+ (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for non-fatal uncorrectable error logs.
+ Writing a value changes the number of errors (burst)
+ allowed per interval before ratelimiting. Reading gets the
+ current ratelimit burst. Default is DEFAULT_RATELIMIT_BURST
+ (10).
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
deleted file mode 100644
index 4b0318c99507..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ /dev/null
@@ -1,122 +0,0 @@
-==========================
-PCIe Device AER statistics
-==========================
-These attributes show up under all the devices that are AER capable. These
-statistical counters indicate the errors "as seen/reported by the device".
-Note that this may mean that if an endpoint is causing problems, the AER
-counters may increment at its link partner (e.g. root port) because the
-errors may be "seen" / reported by the link partner and not the
-problematic endpoint itself (which may report all counters as 0 as it never
-saw any problems).
-
-Where: /sys/bus/pci/devices/<dev>/aer_dev_correctable
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of correctable errors seen and reported by this
- PCI device using ERR_COR. Note that since multiple errors may
- be reported using a single ERR_COR message, thus
- TOTAL_ERR_COR at the end of the file may not match the actual
- total of all the errors in the file. Sample output:
--------------------------------------------------------------------------
-localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_correctable
-Receiver Error 2
-Bad TLP 0
-Bad DLLP 0
-RELAY_NUM Rollover 0
-Replay Timer Timeout 0
-Advisory Non-Fatal 0
-Corrected Internal Error 0
-Header Log Overflow 0
-TOTAL_ERR_COR 2
--------------------------------------------------------------------------
-
-Where: /sys/bus/pci/devices/<dev>/aer_dev_fatal
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of uncorrectable fatal errors seen and reported by this
- PCI device using ERR_FATAL. Note that since multiple errors may
- be reported using a single ERR_FATAL message, thus
- TOTAL_ERR_FATAL at the end of the file may not match the actual
- total of all the errors in the file. Sample output:
--------------------------------------------------------------------------
-localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_fatal
-Undefined 0
-Data Link Protocol 0
-Surprise Down Error 0
-Poisoned TLP 0
-Flow Control Protocol 0
-Completion Timeout 0
-Completer Abort 0
-Unexpected Completion 0
-Receiver Overflow 0
-Malformed TLP 0
-ECRC 0
-Unsupported Request 0
-ACS Violation 0
-Uncorrectable Internal Error 0
-MC Blocked TLP 0
-AtomicOp Egress Blocked 0
-TLP Prefix Blocked Error 0
-TOTAL_ERR_FATAL 0
--------------------------------------------------------------------------
-
-Where: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of uncorrectable nonfatal errors seen and reported by this
- PCI device using ERR_NONFATAL. Note that since multiple errors
- may be reported using a single ERR_FATAL message, thus
- TOTAL_ERR_NONFATAL at the end of the file may not match the
- actual total of all the errors in the file. Sample output:
--------------------------------------------------------------------------
-localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_nonfatal
-Undefined 0
-Data Link Protocol 0
-Surprise Down Error 0
-Poisoned TLP 0
-Flow Control Protocol 0
-Completion Timeout 0
-Completer Abort 0
-Unexpected Completion 0
-Receiver Overflow 0
-Malformed TLP 0
-ECRC 0
-Unsupported Request 0
-ACS Violation 0
-Uncorrectable Internal Error 0
-MC Blocked TLP 0
-AtomicOp Egress Blocked 0
-TLP Prefix Blocked Error 0
-TOTAL_ERR_NONFATAL 0
--------------------------------------------------------------------------
-
-============================
-PCIe Rootport AER statistics
-============================
-These attributes show up under only the rootports (or root complex event
-collectors) that are AER capable. These indicate the number of error messages as
-"reported to" the rootport. Please note that the rootports also transmit
-(internally) the ERR_* messages for errors seen by the internal rootport PCI
-device, so these counters include them and are thus cumulative of all the error
-messages on the PCI hierarchy originating at that root port.
-
-Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_COR messages reported to rootport.
-
-Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_FATAL messages reported to rootport.
-
-Where: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
-Date: July 2018
-Kernel Version: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_NONFATAL messages reported to rootport.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-avs b/Documentation/ABI/testing/sysfs-bus-pci-devices-avs
new file mode 100644
index 000000000000..ebff3fa12055
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-avs
@@ -0,0 +1,8 @@
+What: /sys/devices/pci0000:00/<dev>/avs/fw_version
+Date: February 2024
+Contact: Cezary Rojewski <cezary.rojewski@intel.com>
+Description:
+ Version of AudioDSP firmware ASoC avs driver is communicating
+ with.
+
+ Format: %d.%d.%d.%d, type:major:minor:build.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-catpt b/Documentation/ABI/testing/sysfs-bus-pci-devices-catpt
new file mode 100644
index 000000000000..f85db86d63e8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-catpt
@@ -0,0 +1,17 @@
+What: /sys/devices/pci0000:00/<dev>/fw_version
+Date: September 2020
+Contact: Cezary Rojewski <cezary.rojewski@intel.com>
+Description:
+ Version of AudioDSP firmware ASoC catpt driver is
+ communicating with.
+
+ Format: %d.%d.%d.%d, type:major:minor:build.
+
+What: /sys/devices/pci0000:00/<dev>/fw_info
+Date: September 2020
+Contact: Cezary Rojewski <cezary.rojewski@intel.com>
+Description:
+ Detailed AudioDSP firmware build information including
+ build hash and log-providers hash. This information is
+ obtained during initial handshake with firmware.
+ Format: %s.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 53d99edd1d75..92a94e1068c2 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -1,68 +1,68 @@
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model
Date: March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
Contact: iss_storagedev@hp.com
Description: Displays the SCSI INQUIRY page 0 model for logical drive
Y of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
Date: March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
Contact: iss_storagedev@hp.com
Description: Displays the SCSI INQUIRY page 0 revision for logical
drive Y of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
Date: March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
Contact: iss_storagedev@hp.com
Description: Displays the SCSI INQUIRY page 83 serial number for logical
drive Y of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
Date: March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
Contact: iss_storagedev@hp.com
Description: Displays the SCSI INQUIRY page 0 vendor for logical drive
Y of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
Date: March 2009
-Kernel Version: 2.6.30
+KernelVersion: 2.6.30
Contact: iss_storagedev@hp.com
Description: A symbolic link to /sys/block/cciss!cXdY
-Where: /sys/bus/pci/devices/<dev>/ccissX/rescan
+What: /sys/bus/pci/devices/<dev>/ccissX/rescan
Date: August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
Contact: iss_storagedev@hp.com
Description: Kicks of a rescan of the controller to discover logical
drive topology changes.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
Date: August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
Contact: iss_storagedev@hp.com
Description: Displays the 8-byte LUN ID used to address logical
drive Y of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
Date: August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
Contact: iss_storagedev@hp.com
Description: Displays the RAID level of logical drive Y of
controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+What: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
Date: August 2009
-Kernel Version: 2.6.31
+KernelVersion: 2.6.31
Contact: iss_storagedev@hp.com
Description: Displays the usage count (number of opens) of logical drive Y
of controller X.
-Where: /sys/bus/pci/devices/<dev>/ccissX/resettable
+What: /sys/bus/pci/devices/<dev>/ccissX/resettable
Date: February 2011
-Kernel Version: 2.6.38
+KernelVersion: 2.6.38
Contact: iss_storagedev@hp.com
Description: Value of 1 indicates the controller can honor the reset_devices
kernel parameter. Value of 0 indicates reset_devices cannot be
@@ -71,9 +71,9 @@ Description: Value of 1 indicates the controller can honor the reset_devices
a dump device, as kdump requires resetting the device in order
to work reliably.
-Where: /sys/bus/pci/devices/<dev>/ccissX/transport_mode
+What: /sys/bus/pci/devices/<dev>/ccissX/transport_mode
Date: July 2011
-Kernel Version: 3.0
+KernelVersion: 3.0
Contact: iss_storagedev@hp.com
Description: Value of "simple" indicates that the controller has been placed
in "simple mode". Value of "performant" indicates that the
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic b/Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic
new file mode 100644
index 000000000000..4ec03cd36357
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic
@@ -0,0 +1,26 @@
+What: /sys/devices/pci0000:00/*/QEMU0001:00/capability for MMIO
+ /sys/bus/pci/drivers/pvpanic-pci/0000:00:0*.0/capability for PCI
+Date: Jan 2021
+Contact: zhenwei pi <pizhenwei@bytedance.com>
+Description:
+ Read-only attribute. Capabilities of pvpanic device which
+ are supported by QEMU.
+
+ Format: %x.
+
+ Detailed bit definition refers to section <Bit Definition>
+ from pvpanic device specification:
+ https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/specs/pvpanic.txt
+
+What: /sys/devices/pci0000:00/*/QEMU0001:00/events
+ /sys/bus/pci/drivers/pvpanic-pci/0000:00:0*.0/events for PCI
+Date: Jan 2021
+Contact: zhenwei pi <pizhenwei@bytedance.com>
+Description:
+ RW attribute. Set/get which features in-use. This attribute
+ is used to enable/disable feature(s) of pvpanic device.
+ Notice that this value should be a subset of capability.
+
+ Format: %x.
+
+ Also refer to pvpanic device specification.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd
index 60c60fa624b2..c90d97a80855 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd
@@ -21,11 +21,11 @@ Description:
number returns the port to normal operation.
For example: To force the high-speed device attached to
- port 4 on bus 2 to run at full speed:
+ port 4 on bus 2 to run at full speed::
echo 4 >/sys/bus/usb/devices/usb2/../companion
- To return the port to high-speed operation:
+ To return the port to high-speed operation::
echo -4 >/sys/bus/usb/devices/usb2/../companion
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
index 0088aba4caa8..fc82aa4e54b0 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
@@ -23,3 +23,65 @@ Description:
Reading this attribute gives the state of the DbC. It
can be one of the following states: disabled, enabled,
initialized, connected, configured and stalled.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_idVendor
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_idVendor attribute lets us change the idVendor field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x1d6b (Linux Foundation).
+ It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_idProduct
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_idProduct attribute lets us change the idProduct field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x0010. It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_bcdDevice
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_bcdDevice attribute lets us change the bcdDevice field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x0010. (device rev 0.10)
+ It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_bInterfaceProtocol
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This attribute lets us change the bInterfaceProtocol field
+ presented in the USB Interface descriptor by the xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB descriptor change while
+ connected to a USB host.
+ The default value is 1 (GNU Remote Debug command).
+ Other permissible value is 0 which is for vendor defined debug
+ target.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_poll_interval_ms
+Date: February 2024
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This attribute adjust the polling interval used to check for
+ DbC events. Unit is milliseconds. Accepted values range from 0
+ up to 5000. The default value is 64 ms.
+ This polling interval is used while DbC is enabled but has no
+ active data transfers.
diff --git a/Documentation/ABI/testing/sysfs-bus-peci b/Documentation/ABI/testing/sysfs-bus-peci
new file mode 100644
index 000000000000..87454ec5d981
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-peci
@@ -0,0 +1,16 @@
+What: /sys/bus/peci/rescan
+Date: July 2021
+KernelVersion: 5.18
+Contact: Iwona Winiarska <iwona.winiarska@intel.com>
+Description:
+ Writing a non-zero value to this attribute will
+ initiate scan for PECI devices on all PECI controllers
+ in the system.
+
+What: /sys/bus/peci/devices/<controller_id>-<device_addr>/remove
+Date: July 2021
+KernelVersion: 5.18
+Contact: Iwona Winiarska <iwona.winiarska@intel.com>
+Description:
+ Writing a non-zero value to this attribute will
+ remove the PECI device and any of its children.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
index 5172a6124b27..c4dfe7355c2d 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -18,3 +18,39 @@ Description:
devices to opt-out of driver binding using a driver_override
name such as "none". Only a single driver may be specified in
the override, there is no support for parsing delimiters.
+
+What: /sys/bus/platform/devices/.../numa_node
+Date: June 2020
+Contact: Barry Song <song.bao.hua@hisilicon.com>
+Description:
+ This file contains the NUMA node to which the platform device
+ is attached. It won't be visible if the node is unknown. The
+ value comes from an ACPI _PXM method or a similar firmware
+ source. Initial users for this file would be devices like
+ arm smmu which are populated by arm64 acpi_iort.
+
+What: /sys/bus/platform/devices/.../msi_irqs/
+Date: August 2021
+Contact: Barry Song <song.bao.hua@hisilicon.com>
+Description:
+ The /sys/devices/.../msi_irqs directory contains a variable set
+ of files, with each file being named after a corresponding msi
+ irq vector allocated to that device.
+
+What: /sys/bus/platform/devices/.../msi_irqs/<N>
+Date: August 2021
+Contact: Barry Song <song.bao.hua@hisilicon.com>
+Description:
+ This attribute will show "msi" if <N> is a valid msi irq
+
+What: /sys/bus/platform/devices/.../modalias
+Description:
+ Same as MODALIAS in the uevent at device creation.
+
+ A platform device that it is exposed via devicetree uses:
+
+ - of:N`of node name`T`type`
+
+ Other platform devices use, instead:
+
+ - platform:`driver name`
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
new file mode 100644
index 000000000000..fead760dcf77
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
@@ -0,0 +1,325 @@
+What: /sys/bus/platform/devices/smpro-errmon.*/error_[core|mem|pcie|other]_[ce|ue]
+KernelVersion: 6.1
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) Contains the 48-byte Ampere (Vendor-Specific) Error Record printed
+ in hex format according to the table below:
+
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | Offset | Field | Size (byte) | Description |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 00 | Error Type | 1 | See :ref:`the table below <smpro-error-types>` for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 01 | Subtype | 1 | See :ref:`the table below <smpro-error-types>` for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 02 | Instance | 2 | See :ref:`the table below <smpro-error-types>` for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 04 | Error status | 4 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 08 | Error Address | 8 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 16 | Error Misc 0 | 8 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 24 | Error Misc 1 | 8 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 32 | Error Misc 2 | 8 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+ | 40 | Error Misc 3 | 8 | See ARM RAS specification for details |
+ +--------+---------------+-------------+------------------------------------------------------------+
+
+ The table below defines the value of error types, their subtype, subcomponent and instance:
+
+ .. _smpro-error-types:
+
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | Error Group | Error Type | Sub type | Sub component | Instance |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | CPM (core) | 0 | 0 | Snoop-Logic | CPM # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | CPM (core) | 0 | 2 | Armv8 Core 1 | CPM # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 1 | ERR1 | MCU # \| SLOT << 11 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 2 | ERR2 | MCU # \| SLOT << 11 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 3 | ERR3 | MCU # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 4 | ERR4 | MCU # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 5 | ERR5 | MCU # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 6 | ERR6 | MCU # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | MCU (mem) | 1 | 7 | Link Error | MCU # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | Mesh (other) | 2 | 0 | Cross Point | X \| (Y << 5) \| NS <<11 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | Mesh (other) | 2 | 1 | Home Node(IO) | X \| (Y << 5) \| NS <<11 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | Mesh (other) | 2 | 2 | Home Node(Mem) | X \| (Y << 5) \| NS <<11 \| device<<12 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | Mesh (other) | 2 | 4 | CCIX Node | X \| (Y << 5) \| NS <<11 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | 2P Link (other) | 3 | 0 | N/A | Altra 2P Link # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 0 | ERR0 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 1 | ERR1 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 2 | ERR2 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 3 | ERR3 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 4 | ERR4 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 5 | ERR5 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 6 | ERR6 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 7 | ERR7 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 8 | ERR8 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 9 | ERR9 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 10 | ERR10 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 11 | ERR11 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 12 | ERR12 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | GIC (other) | 5 | 13-21 | ERR13 | RC # + 1 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TCU | 100 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU0 | 0 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU1 | 1 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU2 | 2 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU3 | 3 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU4 | 4 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU5 | 5 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU6 | 6 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU7 | 7 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU8 | 8 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMMU (other) | 6 | TBU9 | 9 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PCIe AER (pcie) | 7 | Root | 0 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PCIe AER (pcie) | 7 | Device | 1 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PCIe RC (pcie) | 8 | RCA HB | 0 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PCIe RC (pcie) | 8 | RCB HB | 1 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PCIe RC (pcie) | 8 | RASDP | 8 | RC # |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | OCM (other) | 9 | ERR0 | 0 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | OCM (other) | 9 | ERR1 | 1 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | OCM (other) | 9 | ERR2 | 2 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMpro (other) | 10 | ERR0 | 0 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMpro (other) | 10 | ERR1 | 1 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | SMpro (other) | 10 | MPA_ERR | 2 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PMpro (other) | 11 | ERR0 | 0 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PMpro (other) | 11 | ERR1 | 1 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+ | PMpro (other) | 11 | MPA_ERR | 2 | 0 |
+ +-----------------+------------+----------+----------------+----------------------------------------+
+
+ Example::
+
+ # cat error_other_ue
+ 880807001e004010401040101500000001004010401040100c0000000000000000000000000000000000000000000000
+
+ The detail of each sysfs entries is as below:
+
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Error | Sysfs entry | Description (when triggered) |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Core's CE | /sys/bus/platform/devices/smpro-errmon.*/error_core_ce | Core has CE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Core's UE | /sys/bus/platform/devices/smpro-errmon.*/error_core_ue | Core has UE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ce | Memory has CE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ue | Memory has UE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | PCIe's CE | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ce | any PCIe controller has CE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | PCIe's UE | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ue | any PCIe controller has UE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Other's CE | /sys/bus/platform/devices/smpro-errmon.*/error_other_ce | any other CE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+ | Other's UE | /sys/bus/platform/devices/smpro-errmon.*/error_other_ue | any other UE error |
+ +-------------+---------------------------------------------------------+----------------------------------+
+
+ UE: Uncorrect-able Error
+ CE: Correct-able Error
+
+ For details, see section `3.3 Ampere (Vendor-Specific) Error Record Formats,
+ Altra Family RAS Supplement`.
+
+
+What: /sys/bus/platform/devices/smpro-errmon.*/overflow_[core|mem|pcie|other]_[ce|ue]
+KernelVersion: 6.1
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) Return the overflow status of each type HW error reported:
+
+ - 0 : No overflow
+ - 1 : There is an overflow and the oldest HW errors are dropped
+
+ The detail of each sysfs entries is as below:
+
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Overflow | Sysfs entry | Description |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Core's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ce | Core CE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Core's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ue | Core UE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ce | Memory CE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ue | Memory UE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | PCIe's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ce | any PCIe controller CE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | PCIe's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ue | any PCIe controller UE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Other's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ce| any other CE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+ | Other's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ue| other UE error overflow |
+ +-------------+-----------------------------------------------------------+---------------------------------------+
+
+ where:
+
+ - UE: Uncorrect-able Error
+ - CE: Correct-able Error
+
+What: /sys/bus/platform/devices/smpro-errmon.*/[error|warn]_[smpro|pmpro]
+KernelVersion: 6.1
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) Contains the internal firmware error/warning printed as hex format.
+
+ The detail of each sysfs entries is as below:
+
+ +---------------+------------------------------------------------------+--------------------------+
+ | Error | Sysfs entry | Description |
+ +---------------+------------------------------------------------------+--------------------------+
+ | SMpro error | /sys/bus/platform/devices/smpro-errmon.*/error_smpro | system has SMpro error |
+ +---------------+------------------------------------------------------+--------------------------+
+ | SMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_smpro | system has SMpro warning |
+ +---------------+------------------------------------------------------+--------------------------+
+ | PMpro error | /sys/bus/platform/devices/smpro-errmon.*/error_pmpro | system has PMpro error |
+ +---------------+------------------------------------------------------+--------------------------+
+ | PMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_pmpro | system has PMpro warning |
+ +---------------+------------------------------------------------------+--------------------------+
+
+ For details, see section `5.10 RAS Internal Error Register Definitions,
+ Altra Family Soc BMC Interface Specification`.
+
+What: /sys/bus/platform/devices/smpro-errmon.*/event_[vrd_warn_fault|vrd_hot|dimm_hot|dimm_2x_refresh]
+KernelVersion: 6.1 (event_[vrd_warn_fault|vrd_hot|dimm_hot]), 6.4 (event_dimm_2x_refresh)
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) Contains the detail information in case of VRD/DIMM warning/hot events
+ in hex format as below::
+
+ AAAA
+
+ where:
+
+ - ``AAAA``: The event detail information data
+
+ The detail of each sysfs entries is as below:
+
+ +---------------+---------------------------------------------------------------+---------------------+
+ | Event | Sysfs entry | Description |
+ +---------------+---------------------------------------------------------------+---------------------+
+ | VRD HOT | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_hot | VRD Hot |
+ +---------------+---------------------------------------------------------------+---------------------+
+ | VR Warn/Fault | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_warn_fault | VR Warning or Fault |
+ +---------------+---------------------------------------------------------------+---------------------+
+ | DIMM HOT | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_hot | DIMM Hot |
+ +---------------+---------------------------------------------------------------+---------------------+
+ | DIMM 2X | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_2x_refresh| DIMM 2x refresh rate|
+ | REFRESH RATE | | event in high temp |
+ +---------------+---------------------------------------------------------------+---------------------+
+
+ For more details, see section `5.7 GPI Status Registers and 5.9 Memory Error Register Definitions,
+ Altra Family Soc BMC Interface Specification`.
+
+What: /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
+KernelVersion: 6.4
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) The sysfs returns the 2-byte DIMM failure syndrome data for slot
+ 0-15 if it failed to initialize.
+
+ For more details, see section `5.11 Boot Stage Register Definitions,
+ Altra Family Soc BMC Interface Specification`.
+
+What: /sys/bus/platform/devices/smpro-misc.*/boot_progress
+KernelVersion: 6.1
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) Contains the boot stages information in hex as format below::
+
+ AABBCCCCCCCC
+
+ where:
+
+ - ``AA`` : The boot stages
+
+ - 00: SMpro firmware booting
+ - 01: PMpro firmware booting
+ - 02: ATF BL1 firmware booting
+ - 03: DDR initialization
+ - 04: DDR training report status
+ - 05: ATF BL2 firmware booting
+ - 06: ATF BL31 firmware booting
+ - 07: ATF BL32 firmware booting
+ - 08: UEFI firmware booting
+ - 09: OS booting
+
+ - ``BB`` : Boot status
+
+ - 00: Not started
+ - 01: Started
+ - 02: Completed without error
+ - 03: Failed.
+
+ - ``CCCCCCCC``: Boot status information defined for each boot stages
+
+ For details, see section `5.11 Boot Stage Register Definitions`
+ and section `6. Processor Boot Progress Codes, Altra Family Soc BMC
+ Interface Specification`.
+
+
+What: /sys/bus/platform/devices/smpro-misc*/soc_power_limit
+KernelVersion: 6.1
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RW) Contains the desired SoC power limit in Watt.
+ Writes to this sysfs set the desired SoC power limit (W).
+ Reads from this register return the current SoC power limit (W).
+ The value ranges:
+
+ - Minimum: 120 W
+ - Maximum: Socket TDP power
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon b/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon
new file mode 100644
index 000000000000..b24d7ab0278f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon
@@ -0,0 +1,13 @@
+What: /sys/bus/platform/devices/occ-hwmon.X/ffdc
+KernelVersion: 5.15
+Contact: eajames@linux.ibm.com
+Description:
+ Contains the First Failure Data Capture from the SBEFIFO
+ hardware, if there is any from a previous transfer. Otherwise,
+ the file is empty. The data is cleared when it's been
+ completely read by a user. As the name suggests, only the data
+ from the first error is saved, until it's cleared upon read. The OCC hwmon driver, running on
+ a Baseboard Management Controller (BMC), communicates with
+ POWER9 and up processors over the Self-Boot Engine (SBE) FIFO.
+ In many error conditions, the SBEFIFO will return error data
+ indicating the type of error and system state, etc.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
new file mode 100644
index 000000000000..ac3431736f5c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
@@ -0,0 +1,12 @@
+What: /sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101:00/amd_x3d_mode
+Date: November 2024
+KernelVersion: 6.13
+Contact: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Description: (RW) AMD 3D V-Cache optimizer allows users to switch CPU core
+ rankings dynamically.
+
+ This file switches between these two modes:
+ - "frequency" cores within the faster CCD are prioritized before
+ those in the slower CCD.
+ - "cache" cores within the larger L3 CCD are prioritized before
+ those in the smaller L3 CCD.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev b/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev
new file mode 100644
index 000000000000..b06a48c3c85a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev
@@ -0,0 +1,9 @@
+What: /sys/bus/platform/devices/<dev>/always_powered_in_suspend
+Date: June 2022
+KernelVersion: 5.20
+Contact: Matthias Kaehlcke <matthias@kaehlcke.net>
+ linux-usb@vger.kernel.org
+Description:
+ (RW) Controls whether the USB hub remains always powered
+ during system suspend or not. This attribute is not
+ available for non-hub devices.
diff --git a/Documentation/ABI/testing/sysfs-bus-rapidio b/Documentation/ABI/testing/sysfs-bus-rapidio
index 13208b27dd87..f8b6728dac10 100644
--- a/Documentation/ABI/testing/sysfs-bus-rapidio
+++ b/Documentation/ABI/testing/sysfs-bus-rapidio
@@ -1,32 +1,35 @@
-What: /sys/bus/rapidio/devices/nn:d:iiii
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>
Description:
For each RapidIO device, the RapidIO subsystem creates files in
an individual subdirectory with the following name format of
device_name "nn:d:iiii", where:
- nn - two-digit hexadecimal ID of RapidIO network where the
+ ==== ========================================================
+ nn two-digit hexadecimal ID of RapidIO network where the
device resides
- d - device type: 'e' - for endpoint or 's' - for switch
- iiii - four-digit device destID for endpoints, or switchID for
+ d device type: 'e' - for endpoint or 's' - for switch
+ iiii four-digit device destID for endpoints, or switchID for
switches
+ ==== ========================================================
For example, below is a list of device directories that
represents a typical RapidIO network with one switch, one host,
and two agent endpoints, as it is seen by the enumerating host
- (with destID = 1):
+ (with destID = 1)::
- /sys/bus/rapidio/devices/00:e:0000
- /sys/bus/rapidio/devices/00:e:0002
- /sys/bus/rapidio/devices/00:s:0001
+ /sys/bus/rapidio/devices/00:e:0000
+ /sys/bus/rapidio/devices/00:e:0002
+ /sys/bus/rapidio/devices/00:s:0001
- NOTE: An enumerating or discovering endpoint does not create a
- sysfs entry for itself, this is why an endpoint with destID=1 is
- not shown in the list.
+ NOTE:
+ An enumerating or discovering endpoint does not create a
+ sysfs entry for itself, this is why an endpoint with destID=1
+ is not shown in the list.
Attributes Common for All RapidIO Devices
-----------------------------------------
-What: /sys/bus/rapidio/devices/nn:d:iiii/did
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/did
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -34,7 +37,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) returns the device identifier
-What: /sys/bus/rapidio/devices/nn:d:iiii/vid
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/vid
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -42,7 +45,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) returns the device vendor identifier
-What: /sys/bus/rapidio/devices/nn:d:iiii/device_rev
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/device_rev
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -50,7 +53,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) returns the device revision level
-What: /sys/bus/rapidio/devices/nn:d:iiii/asm_did
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/asm_did
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -58,7 +61,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) returns identifier for the assembly containing the device
-What: /sys/bus/rapidio/devices/nn:d:iiii/asm_rev
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/asm_rev
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -67,7 +70,7 @@ Description:
(RO) returns revision level of the assembly containing the
device
-What: /sys/bus/rapidio/devices/nn:d:iiii/asm_vid
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/asm_vid
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -76,7 +79,7 @@ Description:
(RO) returns vendor identifier of the assembly containing the
device
-What: /sys/bus/rapidio/devices/nn:d:iiii/destid
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/destid
Date: Mar, 2011
KernelVersion: v2.6.3
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -85,7 +88,7 @@ Description:
(RO) returns device destination ID assigned by the enumeration
routine
-What: /sys/bus/rapidio/devices/nn:d:iiii/lprev
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/lprev
Date: Mar, 2011
KernelVersion: v2.6.39
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -94,7 +97,7 @@ Description:
(RO) returns name of previous device (switch) on the path to the
device that that owns this attribute
-What: /sys/bus/rapidio/devices/nn:d:iiii/modalias
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/modalias
Date: Jul, 2013
KernelVersion: v3.11
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -102,7 +105,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) returns the device modalias
-What: /sys/bus/rapidio/devices/nn:d:iiii/config
+What: /sys/bus/rapidio/devices/<nn>:<d>:<iiii>/config
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -125,7 +128,7 @@ device-specific sysfs attributes by specifying a callback function that may be
set by the switch initialization routine during enumeration or discovery
process.
-What: /sys/bus/rapidio/devices/nn:s:iiii/routes
+What: /sys/bus/rapidio/devices/<nn>:<s>:<iiii>/routes
Date: Nov, 2005
KernelVersion: v2.6.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -135,7 +138,7 @@ Description:
This attribute reports only valid routing table entries, one
line for each entry.
-What: /sys/bus/rapidio/devices/nn:s:iiii/destid
+What: /sys/bus/rapidio/devices/<nn>:<s>:<iiii>/destid
Date: Mar, 2011
KernelVersion: v2.6.3
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -144,7 +147,7 @@ Description:
(RO) device destination ID of the associated device that defines
a route to the switch
-What: /sys/bus/rapidio/devices/nn:s:iiii/hopcount
+What: /sys/bus/rapidio/devices/<nn>:<s>:<iiii>/hopcount
Date: Mar, 2011
KernelVersion: v2.6.39
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -152,7 +155,7 @@ Contact: Matt Porter <mporter@kernel.crashing.org>,
Description:
(RO) number of hops on the path to the switch
-What: /sys/bus/rapidio/devices/nn:s:iiii/lnext
+What: /sys/bus/rapidio/devices/<nn>:<s>:<iiii>/lnext
Date: Mar, 2011
KernelVersion: v2.6.39
Contact: Matt Porter <mporter@kernel.crashing.org>,
@@ -169,7 +172,7 @@ Device-specific Switch Attributes
IDT_GEN2-
-What: /sys/bus/rapidio/devices/nn:s:iiii/errlog
+What: /sys/bus/rapidio/devices/<nn>:<s>:<iiii>/errlog
Date: Oct, 2010
KernelVersion: v2.6.37
Contact: Matt Porter <mporter@kernel.crashing.org>,
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index cc30bee8b5f4..417a2fe21be1 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -7,6 +7,8 @@ Description:
Usage: <mon ip addr> <options> <pool name> <rbd image name> [<snap name>]
+ Example::
+
$ echo "192.168.0.1 name=admin rbd foo" > /sys/bus/rbd/add
The snapshot name can be "-" or omitted to map the image
@@ -23,6 +25,8 @@ Description:
Usage: <dev-id> [force]
+ Example::
+
$ echo 2 > /sys/bus/rbd/remove
Optional "force" argument which when passed will wait for
@@ -80,26 +84,29 @@ Date: Oct, 2010
KernelVersion: v2.6.37
Contact: Sage Weil <sage@newdream.net>
Description:
- size: (RO) The size (in bytes) of the mapped block
+
+ ============== ================================================
+ size (RO) The size (in bytes) of the mapped block
device.
- major: (RO) The block device major number.
+ major (RO) The block device major number.
- client_id: (RO) The ceph unique client id that was assigned
+ client_id (RO) The ceph unique client id that was assigned
for this specific session.
- pool: (RO) The name of the storage pool where this rbd
+ pool (RO) The name of the storage pool where this rbd
image resides. An rbd image name is unique
within its pool.
- name: (RO) The name of the rbd image.
+ name (RO) The name of the rbd image.
- refresh: (WO) Writing to this file will reread the image
+ refresh (WO) Writing to this file will reread the image
header data and set all relevant data structures
accordingly.
- current_snap: (RO) The current snapshot for which the device
+ current_snap (RO) The current snapshot for which the device
is mapped.
+ ============== ================================================
What: /sys/bus/rbd/devices/<dev-id>/pool_id
@@ -117,11 +124,13 @@ Date: Oct, 2012
KernelVersion: v3.7
Contact: Sage Weil <sage@newdream.net>
Description:
- image_id: (RO) The unique id for the rbd image. (For rbd
+ ========= ===============================================
+ image_id (RO) The unique id for the rbd image. (For rbd
image format 1 this is empty.)
- features: (RO) A hexadecimal encoding of the feature bits
+ features (RO) A hexadecimal encoding of the feature bits
for this image.
+ ========= ===============================================
What: /sys/bus/rbd/devices/<dev-id>/parent
@@ -149,14 +158,16 @@ Date: Aug, 2016
KernelVersion: v4.9
Contact: Sage Weil <sage@newdream.net>
Description:
- snap_id: (RO) The current snapshot's id.
+ ============ ================================================
+ snap_id (RO) The current snapshot's id.
- config_info: (RO) The string written into
+ config_info (RO) The string written into
/sys/bus/rbd/add{,_single_major}.
- cluster_fsid: (RO) The ceph cluster UUID.
+ cluster_fsid (RO) The ceph cluster UUID.
- client_addr: (RO) The ceph unique client
+ client_addr (RO) The ceph unique client
entity_addr_t (address + nonce). The format is
<address>:<port>/<nonce>: '1.2.3.4:1234/5678' or
'[1:2:3:4:5:6:7:8]:1234/5678'.
+ ============ ================================================
diff --git a/Documentation/ABI/testing/sysfs-bus-siox b/Documentation/ABI/testing/sysfs-bus-siox
index fed7c3765a4e..50e80238f30d 100644
--- a/Documentation/ABI/testing/sysfs-bus-siox
+++ b/Documentation/ABI/testing/sysfs-bus-siox
@@ -1,6 +1,6 @@
What: /sys/bus/siox/devices/siox-X/active
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
On reading represents the current state of the bus. If it
contains a "0" the bus is stopped and connected devices are
@@ -8,11 +8,12 @@ Description:
When the file contains a "1" the bus is operated and periodically
does a push-pull cycle to write and read data from the
connected devices.
+
When writing a "0" or "1" the bus moves to the described state.
What: /sys/bus/siox/devices/siox-X/device_add
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Write-only file. Write
@@ -21,19 +22,21 @@ Description:
to add a new device dynamically. <type> is the name that is used to match
to a driver (similar to the platform bus). <inbytes> and <outbytes> define
the length of the input and output shift register in bytes respectively.
+
<statustype> defines the 4 bit device type that is check to identify connection
problems.
+
The new device is added to the end of the existing chain.
What: /sys/bus/siox/devices/siox-X/device_remove
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Write-only file. A single write removes the last device in the siox chain.
What: /sys/bus/siox/devices/siox-X/poll_interval_ns
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Defines the interval between two poll cycles in nano seconds.
Note this is rounded to jiffies on writing. On reading the current value
@@ -41,33 +44,33 @@ Description:
What: /sys/bus/siox/devices/siox-X-Y/connected
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value. "0" means the Yth device on siox bus X isn't "connected" i.e.
communication with it is not ensured. "1" signals a working connection.
What: /sys/bus/siox/devices/siox-X-Y/inbytes
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value reporting the inbytes value provided to siox-X/device_add
What: /sys/bus/siox/devices/siox-X-Y/status_errors
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Counts the number of time intervals when the read status byte doesn't yield the
expected value.
What: /sys/bus/siox/devices/siox-X-Y/type
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value reporting the type value provided to siox-X/device_add.
What: /sys/bus/siox/devices/siox-X-Y/watchdog
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value reporting if the watchdog of the siox device is
active. "0" means the watchdog is not active and the device is expected to
@@ -75,13 +78,13 @@ Description:
What: /sys/bus/siox/devices/siox-X-Y/watchdog_errors
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value reporting the number to time intervals when the
watchdog was active.
What: /sys/bus/siox/devices/siox-X-Y/outbytes
KernelVersion: 4.16
-Contact: Gavin Schenk <g.schenk@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Contact: Thorsten Scherer <t.scherer@eckelmann.de>, Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Description:
Read-only value reporting the outbytes value provided to siox-X/device_add.
diff --git a/Documentation/ABI/testing/sysfs-bus-soundwire-master b/Documentation/ABI/testing/sysfs-bus-soundwire-master
new file mode 100644
index 000000000000..d2342911ffbb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-soundwire-master
@@ -0,0 +1,23 @@
+What: /sys/bus/soundwire/devices/sdw-master-<N>/revision
+ /sys/bus/soundwire/devices/sdw-master-<N>/clk_stop_modes
+ /sys/bus/soundwire/devices/sdw-master-<N>/clk_freq
+ /sys/bus/soundwire/devices/sdw-master-<N>/clk_gears
+ /sys/bus/soundwire/devices/sdw-master-<N>/default_col
+ /sys/bus/soundwire/devices/sdw-master-<N>/default_frame_rate
+ /sys/bus/soundwire/devices/sdw-master-<N>/default_row
+ /sys/bus/soundwire/devices/sdw-master-<N>/dynamic_shape
+ /sys/bus/soundwire/devices/sdw-master-<N>/err_threshold
+ /sys/bus/soundwire/devices/sdw-master-<N>/max_clk_freq
+
+Date: April 2020
+
+Contact: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ Bard Liao <yung-chuan.liao@linux.intel.com>
+ Vinod Koul <vkoul@kernel.org>
+
+Description: SoundWire Master-N DisCo properties.
+ These properties are defined by MIPI DisCo Specification
+ for SoundWire. They define various properties of the Master
+ and are used by the bus to configure the Master. clk_stop_modes
+ is a bitmask for simplifications and combines the
+ clock-stop-mode0 and clock-stop-mode1 properties.
diff --git a/Documentation/ABI/testing/sysfs-bus-soundwire-slave b/Documentation/ABI/testing/sysfs-bus-soundwire-slave
new file mode 100644
index 000000000000..fbf55834dfee
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-soundwire-slave
@@ -0,0 +1,109 @@
+What: /sys/bus/soundwire/devices/sdw:.../status
+ /sys/bus/soundwire/devices/sdw:.../device_number
+
+Date: September 2020
+
+Contact: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ Bard Liao <yung-chuan.liao@linux.intel.com>
+ Vinod Koul <vkoul@kernel.org>
+
+Description: SoundWire Slave status
+
+ These properties report the Slave status, e.g. if it
+ is UNATTACHED or not, and in the latter case show the
+ device_number. This status information is useful to
+ detect devices exposed by platform firmware but not
+ physically present on the bus, and conversely devices
+ not exposed in platform firmware but enumerated.
+
+What: /sys/bus/soundwire/devices/sdw:.../dev-properties/mipi_revision
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/wake_capable
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/test_mode_capable
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/clk_stop_mode1
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/simple_clk_stop_capable
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/clk_stop_timeout
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/ch_prep_timeout
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/reset_behave
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/high_PHY_capable
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/paging_support
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/bank_delay_support
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/p15_behave
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/master_count
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/source_ports
+ /sys/bus/soundwire/devices/sdw:.../dev-properties/sink_ports
+
+Date: May 2020
+
+Contact: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ Bard Liao <yung-chuan.liao@linux.intel.com>
+ Vinod Koul <vkoul@kernel.org>
+
+Description: SoundWire Slave DisCo properties.
+ These properties are defined by MIPI DisCo Specification
+ for SoundWire. They define various properties of the
+ SoundWire Slave and are used by the bus to configure
+ the Slave
+
+
+What: /sys/bus/soundwire/devices/sdw:.../dp0/max_word
+ /sys/bus/soundwire/devices/sdw:.../dp0/min_word
+ /sys/bus/soundwire/devices/sdw:.../dp0/words
+ /sys/bus/soundwire/devices/sdw:.../dp0/BRA_flow_controlled
+ /sys/bus/soundwire/devices/sdw:.../dp0/simple_ch_prep_sm
+ /sys/bus/soundwire/devices/sdw:.../dp0/imp_def_interrupts
+
+Date: May 2020
+
+Contact: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ Bard Liao <yung-chuan.liao@linux.intel.com>
+ Vinod Koul <vkoul@kernel.org>
+
+Description: SoundWire Slave Data Port-0 DisCo properties.
+ These properties are defined by MIPI DisCo Specification
+ for the SoundWire. They define various properties of the
+ Data port 0 are used by the bus to configure the Data Port 0.
+
+
+What: /sys/bus/soundwire/devices/sdw:.../dp<N>_src/max_word
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/min_word
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/words
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/type
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/max_grouping
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/simple_ch_prep_sm
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/ch_prep_timeout
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/imp_def_interrupts
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/min_ch
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/max_ch
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/channels
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/ch_combinations
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/max_async_buffer
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/block_pack_mode
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_src/port_encoding
+
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/max_word
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/min_word
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/words
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/type
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/max_grouping
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/simple_ch_prep_sm
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/ch_prep_timeout
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/imp_def_interrupts
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/min_ch
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/max_ch
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/channels
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/ch_combinations
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/max_async_buffer
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/block_pack_mode
+ /sys/bus/soundwire/devices/sdw:.../dp<N>_sink/port_encoding
+
+Date: May 2020
+
+Contact: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ Bard Liao <yung-chuan.liao@linux.intel.com>
+ Vinod Koul <vkoul@kernel.org>
+
+Description: SoundWire Slave Data Source/Sink Port-N DisCo properties.
+ These properties are defined by MIPI DisCo Specification
+ for SoundWire. They define various properties of the
+ Source/Sink Data port N and are used by the bus to configure
+ the Data Port N.
diff --git a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
new file mode 100644
index 000000000000..9ed5582ddea2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
@@ -0,0 +1,40 @@
+What: /sys/bus/spi/devices/.../spi-nor/jedec_id
+Date: April 2021
+KernelVersion: 5.14
+Contact: linux-mtd@lists.infradead.org
+Description: (RO) The JEDEC ID of the SPI NOR flash as reported by the
+ flash device.
+
+ The attribute is not present if the flash doesn't support
+ the "Read JEDEC ID" command (9Fh). This is the case for
+ non-JEDEC compliant flashes.
+
+What: /sys/bus/spi/devices/.../spi-nor/manufacturer
+Date: April 2021
+KernelVersion: 5.14
+Contact: linux-mtd@lists.infradead.org
+Description: (RO) Manufacturer of the SPI NOR flash.
+
+ The attribute is not present if the flash device isn't
+ known to the kernel and is only probed by its SFDP
+ tables.
+
+What: /sys/bus/spi/devices/.../spi-nor/partname
+Date: April 2021
+KernelVersion: 5.14
+Contact: linux-mtd@lists.infradead.org
+Description: (RO) Part name of the SPI NOR flash.
+
+ The attribute is optional. User space should not rely on
+ it to be present or even correct. Instead, user space
+ should read the jedec_id attribute.
+
+What: /sys/bus/spi/devices/.../spi-nor/sfdp
+Date: April 2021
+KernelVersion: 5.14
+Contact: linux-mtd@lists.infradead.org
+Description: (RO) This attribute is only present if the SPI NOR flash
+ device supports the "Read SFDP" command (5Ah).
+
+ If present, it contains the complete SFDP (serial flash
+ discoverable parameters) binary data of the flash.
diff --git a/Documentation/ABI/testing/sysfs-bus-surface_aggregator-tabletsw b/Documentation/ABI/testing/sysfs-bus-surface_aggregator-tabletsw
new file mode 100644
index 000000000000..74cd9d754e60
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-surface_aggregator-tabletsw
@@ -0,0 +1,57 @@
+What: /sys/bus/surface_aggregator/devices/01:0e:01:00:01/state
+Date: July 2022
+KernelVersion: 5.20
+Contact: Maximilian Luz <luzmaximilian@gmail.com>
+Description:
+ This attribute returns a string with the current type-cover
+ or device posture, as indicated by the embedded controller.
+ Currently returned posture states are:
+
+ - "disconnected": The type-cover has been disconnected.
+
+ - "closed": The type-cover has been folded closed and lies on
+ top of the display.
+
+ - "laptop": The type-cover is open and in laptop-mode, i.e.,
+ ready for normal use.
+
+ - "folded-canvas": The type-cover has been folded back
+ part-ways, but does not lie flush with the back side of the
+ device. In general, this means that the kick-stand is used
+ and extended atop of the cover.
+
+ - "folded-back": The type cover has been fully folded back and
+ lies flush with the back side of the device.
+
+ - "<unknown>": The current state is unknown to the driver, for
+ example due to newer as-of-yet unsupported hardware.
+
+ New states may be introduced with new hardware. Users therefore
+ must not rely on this list of states being exhaustive and
+ gracefully handle unknown states.
+
+What: /sys/bus/surface_aggregator/devices/01:26:01:00:01/state
+Date: July 2022
+KernelVersion: 5.20
+Contact: Maximilian Luz <luzmaximilian@gmail.com>
+Description:
+ This attribute returns a string with the current device posture, as indicated by the embedded controller. Currently
+ returned posture states are:
+
+ - "closed": The lid of the device is closed.
+
+ - "laptop": The lid of the device is opened and the device
+ operates as a normal laptop.
+
+ - "slate": The screen covers the keyboard or has been flipped
+ back and the device operates mainly based on touch input.
+
+ - "tablet": The device operates as tablet and exclusively
+ relies on touch input (or external peripherals).
+
+ - "<unknown>": The current state is unknown to the driver, for
+ example due to newer as-of-yet unsupported hardware.
+
+ New states may be introduced with new hardware. Users therefore
+ must not rely on this list of states being exhaustive and
+ gracefully handle unknown states.
diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt
index 151584a1f950..221b6c75ed93 100644
--- a/Documentation/ABI/testing/sysfs-bus-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt
@@ -1,7 +1,7 @@
-What: /sys/bus/thunderbolt/devices/.../domainX/boot_acl
+What: /sys/bus/thunderbolt/devices/.../domainX/boot_acl
Date: Jun 2018
KernelVersion: 4.17
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: Holds a comma separated list of device unique_ids that
are allowed to be connected automatically during system
startup (e.g boot devices). The list always contains
@@ -21,60 +21,99 @@ Description: Holds a comma separated list of device unique_ids that
If a device is authorized automatically during boot its
boot attribute is set to 1.
-What: /sys/bus/thunderbolt/devices/.../domainX/security
+What: /sys/bus/thunderbolt/devices/.../domainX/deauthorization
+Date: May 2021
+KernelVersion: 5.12
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute tells whether the system supports
+ de-authorization of devices. Value of 1 means user can
+ de-authorize PCIe tunnel by writing 0 to authorized
+ attribute under each device.
+
+What: /sys/bus/thunderbolt/devices/.../domainX/iommu_dma_protection
+Date: Mar 2019
+KernelVersion: 4.21
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute tells whether the system uses IOMMU
+ for DMA protection. Value of 1 means IOMMU is used 0 means
+ it is not (DMA protection is solely based on Thunderbolt
+ security levels).
+
+What: /sys/bus/thunderbolt/devices/.../domainX/security
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute holds current Thunderbolt security level
set by the system BIOS. Possible values are:
- none: All devices are automatically authorized
- user: Devices are only authorized based on writing
- appropriate value to the authorized attribute
- secure: Require devices that support secure connect at
- minimum. User needs to authorize each device.
- dponly: Automatically tunnel Display port (and USB). No
- PCIe tunnels are created.
- usbonly: Automatically tunnel USB controller of the
+ ======= ==================================================
+ none All devices are automatically authorized
+ user Devices are only authorized based on writing
+ appropriate value to the authorized attribute
+ secure Require devices that support secure connect at
+ minimum. User needs to authorize each device.
+ dponly Automatically tunnel Display port (and USB). No
+ PCIe tunnels are created.
+ usbonly Automatically tunnel USB controller of the
connected Thunderbolt dock (and Display Port). All
PCIe links downstream of the dock are removed.
+ nopcie USB4 system where PCIe tunneling is disabled from
+ the BIOS.
+ ======= ==================================================
-What: /sys/bus/thunderbolt/devices/.../authorized
+What: /sys/bus/thunderbolt/devices/.../authorized
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute is used to authorize Thunderbolt devices
after they have been connected. If the device is not
- authorized, no devices such as PCIe and Display port are
- available to the system.
+ authorized, no PCIe devices are available to the system.
Contents of this attribute will be 0 when the device is not
yet authorized.
Possible values are supported:
- 1: The device will be authorized and connected
+
+ == ===================================================
+ 0 The device will be de-authorized (only supported if
+ deauthorization attribute under domain contains 1)
+ 1 The device will be authorized and connected
+ == ===================================================
When key attribute contains 32 byte hex string the possible
values are:
- 1: The 32 byte hex string is added to the device NVM and
- the device is authorized.
- 2: Send a challenge based on the 32 byte hex string. If the
- challenge response from device is valid, the device is
- authorized. In case of failure errno will be ENOKEY if
- the device did not contain a key at all, and
- EKEYREJECTED if the challenge response did not match.
-
-What: /sys/bus/thunderbolt/devices/.../boot
+
+ == ========================================================
+ 0 The device will be de-authorized (only supported if
+ deauthorization attribute under domain contains 1)
+ 1 The 32 byte hex string is added to the device NVM and
+ the device is authorized.
+ 2 Send a challenge based on the 32 byte hex string. If the
+ challenge response from device is valid, the device is
+ authorized. In case of failure errno will be ENOKEY if
+ the device did not contain a key at all, and
+ EKEYREJECTED if the challenge response did not match.
+ == ========================================================
+
+What: /sys/bus/thunderbolt/devices/.../boot
Date: Jun 2018
KernelVersion: 4.17
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains 1 if Thunderbolt device was already
authorized on boot and 0 otherwise.
-What: /sys/bus/thunderbolt/devices/.../key
+What: /sys/bus/thunderbolt/devices/.../generation
+Date: Jan 2020
+KernelVersion: 5.5
+Contact: Christian Kellner <christian@kellner.me>
+Description: This attribute contains the generation of the Thunderbolt
+ controller associated with the device. It will contain 4
+ for USB4.
+
+What: /sys/bus/thunderbolt/devices/.../key
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: When a devices supports Thunderbolt secure connect it will
have this attribute. Writing 32 byte hex string changes
authorization to use the secure connection method instead.
@@ -84,35 +123,70 @@ Description: When a devices supports Thunderbolt secure connect it will
What: /sys/bus/thunderbolt/devices/.../device
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains id of this device extracted from
the device DROM.
What: /sys/bus/thunderbolt/devices/.../device_name
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains name of this device extracted from
the device DROM.
+What: /sys/bus/thunderbolt/devices/.../maxhopid
+Date: Jul 2021
+KernelVersion: 5.13
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: Only set for XDomains. The maximum HopID the other host
+ supports as its input HopID.
+
+What: /sys/bus/thunderbolt/devices/.../rx_speed
+Date: Jan 2020
+KernelVersion: 5.5
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute reports the device RX speed per lane.
+ All RX lanes run at the same speed.
+
+What: /sys/bus/thunderbolt/devices/.../rx_lanes
+Date: Jan 2020
+KernelVersion: 5.5
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute reports number of RX lanes the device is
+ using simultaneously through its upstream port.
+
+What: /sys/bus/thunderbolt/devices/.../tx_speed
+Date: Jan 2020
+KernelVersion: 5.5
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute reports the TX speed per lane.
+ All TX lanes run at the same speed.
+
+What: /sys/bus/thunderbolt/devices/.../tx_lanes
+Date: Jan 2020
+KernelVersion: 5.5
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: This attribute reports number of TX lanes the device is
+ using simultaneously through its upstream port.
+
What: /sys/bus/thunderbolt/devices/.../vendor
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains vendor id of this device extracted
from the device DROM.
What: /sys/bus/thunderbolt/devices/.../vendor_name
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains vendor name of this device extracted
from the device DROM.
What: /sys/bus/thunderbolt/devices/.../unique_id
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This attribute contains unique_id string of this device.
This is either read from hardware registers (UUID on
newer hardware) or based on UID from the device DROM.
@@ -121,7 +195,7 @@ Description: This attribute contains unique_id string of this device.
What: /sys/bus/thunderbolt/devices/.../nvm_version
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: If the device has upgradeable firmware the version
number is available here. Format: %x.%x, major.minor.
If the device is in safe mode reading the file returns
@@ -130,64 +204,167 @@ Description: If the device has upgradeable firmware the version
What: /sys/bus/thunderbolt/devices/.../nvm_authenticate
Date: Sep 2017
KernelVersion: 4.13
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: When new NVM image is written to the non-active NVM
area (through non_activeX NVMem device), the
- authentication procedure is started by writing 1 to
- this file. If everything goes well, the device is
+ authentication procedure is started by writing to
+ this file.
+ If everything goes well, the device is
restarted with the new NVM firmware. If the image
verification fails an error code is returned instead.
+ This file will accept writing values "1", "2" or "3".
+
+ - Writing "1" will flush the image to the storage
+ area and authenticate the image in one action.
+ - Writing "2" will run some basic validation on the image
+ and flush it to the storage area.
+ - Writing "3" will authenticate the image that is
+ currently written in the storage area. This is only
+ supported with USB4 devices and retimers.
+
When read holds status of the last authentication
operation if an error occurred during the process. This
is directly the status value from the DMA configuration
based mailbox before the device is power cycled. Writing
0 here clears the status.
+What: /sys/bus/thunderbolt/devices/.../nvm_authenticate_on_disconnect
+Date: Oct 2020
+KernelVersion: v5.9
+Contact: Mario Limonciello <mario.limonciello@outlook.com>
+Description: For supported devices, automatically authenticate the new Thunderbolt
+ image when the device is disconnected from the host system.
+
+ This file will accept writing values "1" or "2"
+
+ - Writing "1" will flush the image to the storage
+ area and prepare the device for authentication on disconnect.
+ - Writing "2" will run some basic validation on the image
+ and flush it to the storage area.
+
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/key
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This contains name of the property directory the XDomain
service exposes. This entry describes the protocol in
question. Following directories are already reserved by
the Apple XDomain specification:
- network: IP/ethernet over Thunderbolt
- targetdm: Target disk mode protocol over Thunderbolt
- extdisp: External display mode protocol over Thunderbolt
+ ======== ===============================================
+ network IP/ethernet over Thunderbolt
+ targetdm Target disk mode protocol over Thunderbolt
+ extdisp External display mode protocol over Thunderbolt
+ ======== ===============================================
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/modalias
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: Stores the same MODALIAS value emitted by uevent for
the XDomain service. Format: tbtsvc:kSpNvNrN
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcid
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This contains XDomain protocol identifier the XDomain
service supports.
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcvers
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This contains XDomain protocol version the XDomain
service supports.
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcrevs
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This contains XDomain software version the XDomain
service supports.
What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcstns
Date: Jan 2018
KernelVersion: 4.15
-Contact: thunderbolt-software@lists.01.org
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
Description: This contains XDomain service specific settings as
bitmask. Format: %x
+
+What: /sys/bus/thunderbolt/devices/usb4_portX/connector
+Date: April 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Symlink to the USB Type-C connector. This link is only
+ created when USB Type-C Connector Class is enabled,
+ and only if the system firmware is capable of
+ describing the connection between a port and its
+ connector.
+
+What: /sys/bus/thunderbolt/devices/usb4_portX/link
+Date: Sep 2021
+KernelVersion: v5.14
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: Returns the current link mode. Possible values are
+ "usb4", "tbt" and "none".
+
+What: /sys/bus/thunderbolt/devices/usb4_portX/offline
+Date: Sep 2021
+KernelVersion: v5.14
+Contact: Rajmohan Mani <rajmohan.mani@intel.com>
+Description: Writing 1 to this attribute puts the USB4 port into
+ offline mode. Only allowed when there is nothing
+ connected to the port (link attribute returns "none").
+ Once the port is in offline mode it does not receive any
+ hotplug events. This is used to update NVM firmware of
+ on-board retimers. Writing 0 puts the port back to
+ online mode.
+
+ This attribute is only visible if the platform supports
+ powering on retimers when there is no cable connected.
+
+What: /sys/bus/thunderbolt/devices/usb4_portX/rescan
+Date: Sep 2021
+KernelVersion: v5.14
+Contact: Rajmohan Mani <rajmohan.mani@intel.com>
+Description: When the USB4 port is in offline mode writing 1 to this
+ attribute forces rescan of the sideband for on-board
+ retimers. Each retimer appear under the USB4 port as if
+ the USB4 link was up. These retimers act in the same way
+ as if the cable was connected so upgrading their NVM
+ firmware can be done the usual way.
+
+What: /sys/bus/thunderbolt/devices/<device>:<port>.<index>/device
+Date: Oct 2020
+KernelVersion: v5.9
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: Retimer device identifier read from the hardware.
+
+What: /sys/bus/thunderbolt/devices/<device>:<port>.<index>/nvm_authenticate
+Date: Oct 2020
+KernelVersion: v5.9
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: When new NVM image is written to the non-active NVM
+ area (through non_activeX NVMem device), the
+ authentication procedure is started by writing 1 to
+ this file. If everything goes well, the device is
+ restarted with the new NVM firmware. If the image
+ verification fails an error code is returned instead.
+
+ When read holds status of the last authentication
+ operation if an error occurred during the process.
+ Format: %x.
+
+What: /sys/bus/thunderbolt/devices/<device>:<port>.<index>/nvm_version
+Date: Oct 2020
+KernelVersion: v5.9
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: Holds retimer NVM version number. Format: %x.%x, major.minor.
+
+What: /sys/bus/thunderbolt/devices/<device>:<port>.<index>/vendor
+Date: Oct 2020
+KernelVersion: v5.9
+Contact: Mika Westerberg <mika.westerberg@linux.intel.com>
+Description: Retimer vendor identifier read from the hardware.
diff --git a/Documentation/ABI/testing/sysfs-bus-umc b/Documentation/ABI/testing/sysfs-bus-umc
deleted file mode 100644
index 948fec412446..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-umc
+++ /dev/null
@@ -1,28 +0,0 @@
-What: /sys/bus/umc/
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The Wireless Host Controller Interface (WHCI)
- specification describes a PCI-based device with
- multiple capabilities; the UWB Multi-interface
- Controller (UMC).
-
- The umc bus presents each of the individual
- capabilties as a device.
-
-What: /sys/bus/umc/devices/.../capability_id
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The ID of this capability, with 0 being the radio
- controller capability.
-
-What: /sys/bus/umc/devices/.../version
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The specification version this capability's hardware
- interface complies with.
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 559baa5c418c..af9b653422f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -1,4 +1,4 @@
-What: /sys/bus/usb/devices/INTERFACE/authorized
+What: /sys/bus/usb/devices/<INTERFACE>/authorized
Date: August 2015
Description:
This allows to authorize (1) or deauthorize (0)
@@ -9,6 +9,7 @@ Description:
by writing INTERFACE to /sys/bus/usb/drivers_probe
This allows to avoid side-effects with drivers
that need multiple interfaces.
+
A deauthorized interface cannot be probed or claimed.
What: /sys/bus/usb/devices/usbX/interface_authorized_default
@@ -27,40 +28,6 @@ Description:
drivers, non-authorized one are not. By default, wired
USB devices are authorized.
- Certified Wireless USB devices are not authorized
- initially and should be (by writing 1) after the
- device has been authenticated.
-
-What: /sys/bus/usb/device/.../wusb_cdid
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- For Certified Wireless USB devices only.
-
- A devices's CDID, as 16 space-separated hex octets.
-
-What: /sys/bus/usb/device/.../wusb_ck
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- For Certified Wireless USB devices only.
-
- Write the device's connection key (CK) to start the
- authentication of the device. The CK is 16
- space-separated hex octets.
-
-What: /sys/bus/usb/device/.../wusb_disconnect
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- For Certified Wireless USB devices only.
-
- Write a 1 to force the device to disconnect
- (equivalent to unplugging a wired USB device).
-
What: /sys/bus/usb/drivers/.../new_id
Date: October 2011
Contact: linux-usb@vger.kernel.org
@@ -72,24 +39,27 @@ Description:
table at compile time. The format for the device ID is:
idVendor idProduct bInterfaceClass RefIdVendor RefIdProduct
The vendor ID and device ID fields are required, the
- rest is optional. The Ref* tuple can be used to tell the
+ rest is optional. The `Ref*` tuple can be used to tell the
driver to use the same driver_data for the new device as
it is used for the reference device.
Upon successfully adding an ID, the driver will probe
- for the device and attempt to bind to it. For example:
- # echo "8086 10f5" > /sys/bus/usb/drivers/foo/new_id
+ for the device and attempt to bind to it. For example::
+
+ # echo "8086 10f5" > /sys/bus/usb/drivers/foo/new_id
Here add a new device (0458:7045) using driver_data from
- an already supported device (0458:704c):
- # echo "0458 7045 0 0458 704c" > /sys/bus/usb/drivers/foo/new_id
+ an already supported device (0458:704c)::
+
+ # echo "0458 7045 0 0458 704c" > /sys/bus/usb/drivers/foo/new_id
Reading from this file will list all dynamically added
device IDs in the same format, with one entry per
- line. For example:
- # cat /sys/bus/usb/drivers/foo/new_id
- 8086 10f5
- dead beef 06
- f00d cafe
+ line. For example::
+
+ # cat /sys/bus/usb/drivers/foo/new_id
+ 8086 10f5
+ dead beef 06
+ f00d cafe
The list will be truncated at PAGE_SIZE bytes due to
sysfs restrictions.
@@ -150,17 +120,6 @@ Description:
files hold a string value (enable or disable) indicating whether
or not USB3 hardware LPM U1 or U2 is enabled for the device.
-What: /sys/bus/usb/devices/.../removable
-Date: February 2012
-Contact: Matthew Garrett <mjg@redhat.com>
-Description:
- Some information about whether a given USB device is
- physically fixed to the platform can be inferred from a
- combination of hub descriptor bits and platform-specific data
- such as ACPI. This file will read either "removable" or
- "fixed" if the information is available, and "unknown"
- otherwise.
-
What: /sys/bus/usb/devices/.../ltm_capable
Date: July 2012
Contact: Sarah Sharp <sarah.a.sharp@linux.intel.com>
@@ -173,23 +132,40 @@ Description:
The file will be present for all speeds of USB devices, and will
always read "no" for USB 1.1 and USB 2.0 devices.
-What: /sys/bus/usb/devices/.../(hub interface)/portX
+What: /sys/bus/usb/devices/<INTERFACE>/wireless_status
+Date: February 2023
+Contact: Bastien Nocera <hadess@hadess.net>
+Description:
+ Some USB devices use a USB receiver dongle to communicate
+ wirelessly with their device using proprietary protocols. This
+ attribute allows user-space to know whether the device is
+ connected to its receiver dongle, and, for example, consider
+ the device to be absent when choosing whether to show the
+ device's battery, show a headset in a list of outputs, or show
+ an on-screen keyboard if the only wireless keyboard is
+ turned off.
+ This attribute is not to be used to replace protocol specific
+ statuses available in WWAN, WLAN/Wi-Fi, Bluetooth, etc.
+ If the device does not use a receiver dongle with a wireless
+ device, then this attribute will not exist.
+
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>
Date: August 2012
Contact: Lan Tianyu <tianyu.lan@intel.com>
Description:
- The /sys/bus/usb/devices/.../(hub interface)/portX
+ The /sys/bus/usb/devices/.../<hub_interface>/port<X>
is usb port device's sysfs directory.
-What: /sys/bus/usb/devices/.../(hub interface)/portX/connect_type
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/connect_type
Date: January 2013
Contact: Lan Tianyu <tianyu.lan@intel.com>
Description:
Some platforms provide usb port connect types through ACPI.
This attribute is to expose these information to user space.
- The file will read "hotplug", "wired" and "not used" if the
+ The file will read "hotplug", "hardwired" and "not used" if the
information is available, and "unknown" otherwise.
-What: /sys/bus/usb/devices/.../(hub interface)/portX/location
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/location
Date: October 2018
Contact: Bjørn Mork <bjorn@mork.no>
Description:
@@ -199,7 +175,7 @@ Description:
raw location value as a hex integer.
-What: /sys/bus/usb/devices/.../(hub interface)/portX/quirks
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/quirks
Date: May 2018
Contact: Nicolas Boichat <drinkcat@chromium.org>
Description:
@@ -209,9 +185,11 @@ Description:
advance, and behaves well according to the specification.
This attribute is a bit-field that controls the behavior of
a specific port:
+
- Bit 0 of this field selects the "old" enumeration scheme,
as it is considerably faster (it only causes one USB reset
instead of 2).
+
The old enumeration scheme can also be selected globally
using /sys/module/usbcore/parameters/old_scheme_first, but
it is often not desirable as the new scheme was introduced to
@@ -221,7 +199,7 @@ Description:
used to help make enumeration work better on some high speed
devices.
-What: /sys/bus/usb/devices/.../(hub interface)/portX/over_current_count
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/over_current_count
Date: February 2018
Contact: Richard Leitner <richard.leitner@skidata.com>
Description:
@@ -233,12 +211,12 @@ Description:
poll() for monitoring changes to this value in user space.
Any time this value changes the corresponding hub device will send a
- udev event with the following attributes:
+ udev event with the following attributes::
- OVER_CURRENT_PORT=/sys/bus/usb/devices/.../(hub interface)/portX
- OVER_CURRENT_COUNT=[current value of this sysfs attribute]
+ OVER_CURRENT_PORT=/sys/bus/usb/devices/.../<hub_interface>/port<X>
+ OVER_CURRENT_COUNT=[current value of this sysfs attribute]
-What: /sys/bus/usb/devices/.../(hub interface)/portX/usb3_lpm_permit
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/usb3_lpm_permit
Date: November 2015
Contact: Lu Baolu <baolu.lu@linux.intel.com>
Description:
@@ -249,6 +227,47 @@ Description:
is permitted, "u2" if only u2 is permitted, "u1_u2" if both u1 and
u2 are permitted.
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/connector
+Date: December 2021
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Link to the USB Type-C connector when available. This link is
+ only created when USB Type-C Connector Class is enabled, and
+ only if the system firmware is capable of describing the
+ connection between a port and its connector.
+
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/disable
+Date: June 2022
+Contact: Michael Grzeschik <m.grzeschik@pengutronix.de>
+Description:
+ This file controls the state of a USB port, including
+ Vbus power output (but only on hubs that support
+ power switching -- most hubs don't support it). If
+ a port is disabled, the port is unusable: Devices
+ attached to the port will not be detected, initialized,
+ or enumerated.
+
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/early_stop
+Date: Sep 2022
+Contact: Ray Chi <raychi@google.com>
+Description:
+ Some USB hosts have some watchdog mechanisms so that the device
+ may enter ramdump if it takes a long time during port initialization.
+ This attribute allows each port just has two attempts so that the
+ port initialization will be failed quickly. In addition, if a port
+ which is marked with early_stop has failed to initialize, it will ignore
+ all future connections until this attribute is clear.
+
+What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/state
+Date: June 2023
+Contact: Roy Luo <royluo@google.com>
+Description:
+ Indicates current state of the USB device attached to the port.
+ Valid states are: 'not-attached', 'attached', 'powered',
+ 'reconnecting', 'unauthenticated', 'default', 'addressed',
+ 'configured', and 'suspended'. This file supports poll() to
+ monitor the state change from user space.
+
What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout
Date: May 2013
Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
@@ -293,3 +312,296 @@ Description:
USB 3.2 adds Dual-lane support, 2 rx and 2 tx -lanes over Type-C.
Inter-Chip SSIC devices support asymmetric lanes up to 4 lanes per
direction. Devices before USB 3.2 are single lane (tx_lanes = 1)
+
+What: /sys/bus/usb/devices/.../typec
+Date: November 2023
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Symlink to the USB Type-C partner device. USB Type-C partner
+ represents the component that communicates over the
+ Configuration Channel (CC signal on USB Type-C connectors and
+ cables) with the local port.
+
+What: /sys/bus/usb/devices/usbX/bAlternateSetting
+Description:
+ The current interface alternate setting number, in decimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bcdDevice
+Description:
+ The device's release number, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bConfigurationValue
+Description:
+ While a USB device typically have just one configuration
+ setting, some devices support multiple configurations.
+
+ This value shows the current configuration, in decimal.
+
+ Changing its value will change the device's configuration
+ to another setting.
+
+ The number of configurations supported by a device is at:
+
+ /sys/bus/usb/devices/usbX/bNumConfigurations
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bDeviceClass
+Description:
+ Class code of the device, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bDeviceProtocol
+Description:
+ Protocol code of the device, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bDeviceSubClass
+Description:
+ Subclass code of the device, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bInterfaceClass
+Description:
+ Class code of the interface, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bInterfaceNumber
+Description:
+ Interface number, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bInterfaceProtocol
+Description:
+ Protocol code of the interface, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bInterfaceSubClass
+Description:
+ Subclass code of the interface, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bmAttributes
+Description:
+ Attributes of the current configuration, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bMaxPacketSize0
+Description:
+ Maximum endpoint 0 packet size, in decimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bMaxPower
+Description:
+ Maximum power consumption of the active configuration of
+ the device, in miliamperes.
+
+What: /sys/bus/usb/devices/usbX/bNumConfigurations
+Description:
+ Number of the possible configurations of the device, in
+ decimal. The current configuration is controlled via:
+
+ /sys/bus/usb/devices/usbX/bConfigurationValue
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bNumEndpoints
+Description:
+ Number of endpoints used on this interface, in hexadecimal.
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/bNumInterfaces
+Description:
+ Number of interfaces on this device, in decimal.
+
+What: /sys/bus/usb/devices/usbX/busnum
+Description:
+ Number of the bus.
+
+What: /sys/bus/usb/devices/usbX/configuration
+Description:
+ Contents of the string descriptor associated with the
+ current configuration. It may include the firmware version
+ of a device and/or its serial number.
+
+What: /sys/bus/usb/devices/usbX/descriptors
+Description:
+ Contains the interface descriptors, in binary.
+
+What: /sys/bus/usb/devices/usbX/bos_descriptors
+Date: March 2024
+Contact: Elbert Mai <code@elbertmai.com>
+Description:
+ Binary file containing the cached binary device object store (BOS)
+ of the device. This consists of the BOS descriptor followed by the
+ set of device capability descriptors. All descriptors read from
+ this file are in bus-endian format. Note that the kernel will not
+ request the BOS from a device if its bcdUSB is less than 0x0201.
+
+What: /sys/bus/usb/devices/usbX/idProduct
+Description:
+ Product ID, in hexadecimal.
+
+What: /sys/bus/usb/devices/usbX/idVendor
+Description:
+ Vendor ID, in hexadecimal.
+
+What: /sys/bus/usb/devices/usbX/devspec
+Description:
+ Displays the Device Tree Open Firmware node of the interface.
+
+What: /sys/bus/usb/devices/usbX/avoid_reset_quirk
+Description:
+ Most devices have this set to zero.
+
+ If the value is 1, enable a USB quirk that prevents this
+ device to use reset.
+
+ (read/write)
+
+What: /sys/bus/usb/devices/usbX/devnum
+Description:
+ USB interface device number, in decimal.
+
+What: /sys/bus/usb/devices/usbX/devpath
+Description:
+ String containing the USB interface device path.
+
+What: /sys/bus/usb/devices/usbX/manufacturer
+Description:
+ Vendor specific string containing the name of the
+ manufacturer of the device.
+
+What: /sys/bus/usb/devices/usbX/maxchild
+Description:
+ Number of ports of an USB hub
+
+What: /sys/bus/usb/devices/usbX/persist
+Description:
+ Keeps the device even if it gets disconnected.
+
+What: /sys/bus/usb/devices/usbX/product
+Description:
+ Vendor specific string containing the name of the
+ device's product.
+
+What: /sys/bus/usb/devices/usbX/speed
+Description:
+ Shows the device's max speed, according to the USB version,
+ in Mbps.
+ Can be:
+
+ ======= ====================
+ Unknown speed unknown
+ 1.5 Low speed
+ 15 Full speed
+ 480 High Speed
+ 5000 Super Speed
+ 10000 Super Speed+
+ 20000 Super Speed+ Gen 2x2
+ ======= ====================
+
+What: /sys/bus/usb/devices/usbX/supports_autosuspend
+Description:
+ Returns 1 if the device doesn't support autosuspend.
+ Otherwise, returns 0.
+
+What: /sys/bus/usb/devices/usbX/urbnum
+Description:
+ Number of URBs submitted for the whole device.
+
+What: /sys/bus/usb/devices/usbX/version
+Description:
+ String containing the USB device version, as encoded
+ at the BCD descriptor.
+
+What: /sys/bus/usb/devices/usbX/power/autosuspend
+Description:
+ Time in milliseconds for the device to autosuspend. If the
+ value is negative, then autosuspend is prevented.
+
+ (read/write)
+
+What: /sys/bus/usb/devices/usbX/power/active_duration
+Description:
+ The total time the device has not been suspended.
+
+What: /sys/bus/usb/devices/usbX/power/connected_duration
+Description:
+ The total time (in msec) that the device has been connected.
+
+What: /sys/bus/usb/devices/usbX/power/level
+Description:
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/bEndpointAddress
+Description:
+ The address of the endpoint described by this descriptor,
+ in hexadecimal. The endpoint direction on this bitmapped field
+ is also shown at:
+
+ /sys/bus/usb/devices/usbX/ep_<N>/direction
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/bInterval
+Description:
+ The interval of the endpoint as described on its descriptor,
+ in hexadecimal. The actual interval depends on the version
+ of the USB. Also shown in time units at
+ /sys/bus/usb/devices/usbX/ep_<N>/interval.
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/bLength
+Description:
+ Number of bytes of the endpoint descriptor, in hexadecimal.
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/bmAttributes
+Description:
+ Attributes which apply to the endpoint as described on its
+ descriptor, in hexadecimal. The endpoint type on this
+ bitmapped field is also shown at:
+
+ /sys/bus/usb/devices/usbX/ep_<N>/type
+
+ See USB specs for its meaning.
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/direction
+Description:
+ Direction of the endpoint. Can be:
+
+ - both (on control endpoints)
+ - in
+ - out
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/interval
+Description:
+ Interval for polling endpoint for data transfers, in
+ milisseconds or microseconds.
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/type
+Description:
+ Descriptor type. Can be:
+
+ - Control
+ - Isoc
+ - Bulk
+ - Interrupt
+ - unknown
+
+What: /sys/bus/usb/devices/usbX/ep_<N>/wMaxPacketSize
+Description:
+ Maximum packet size this endpoint is capable of
+ sending or receiving, in hexadecimal.
diff --git a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
index 70d00dfa443d..2f86e4223bfc 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
+++ b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
@@ -1,43 +1,46 @@
-Where: /sys/bus/usb/.../powered
+What: /sys/bus/usb/.../powered
Date: August 2008
-Kernel Version: 2.6.26
+KernelVersion: 2.6.26
Contact: Harrison Metzger <harrisonmetz@gmail.com>
Description: Controls whether the device's display will powered.
A value of 0 is off and a non-zero value is on.
-Where: /sys/bus/usb/.../mode_msb
-Where: /sys/bus/usb/.../mode_lsb
+What: /sys/bus/usb/.../mode_msb
+What: /sys/bus/usb/.../mode_lsb
Date: August 2008
-Kernel Version: 2.6.26
+KernelVersion: 2.6.26
Contact: Harrison Metzger <harrisonmetz@gmail.com>
Description: Controls the devices display mode.
For a 6 character display the values are
+
MSB 0x06; LSB 0x3F, and
+
for an 8 character display the values are
+
MSB 0x08; LSB 0xFF.
-Where: /sys/bus/usb/.../textmode
+What: /sys/bus/usb/.../textmode
Date: August 2008
-Kernel Version: 2.6.26
+KernelVersion: 2.6.26
Contact: Harrison Metzger <harrisonmetz@gmail.com>
Description: Controls the way the device interprets its text buffer.
raw: each character controls its segment manually
hex: each character is between 0-15
ascii: each character is between '0'-'9' and 'A'-'F'.
-Where: /sys/bus/usb/.../text
+What: /sys/bus/usb/.../text
Date: August 2008
-Kernel Version: 2.6.26
+KernelVersion: 2.6.26
Contact: Harrison Metzger <harrisonmetz@gmail.com>
Description: The text (or data) for the device to display
-Where: /sys/bus/usb/.../decimals
+What: /sys/bus/usb/.../decimals
Date: August 2008
-Kernel Version: 2.6.26
+KernelVersion: 2.6.26
Contact: Harrison Metzger <harrisonmetz@gmail.com>
Description: Controls the decimal places on the device.
To set the nth decimal place, give this field
- the value of 10 ** n. Assume this field has
+ the value of ``10 ** n``. Assume this field has
the value k and has 1 or more decimal places set,
to set the mth place (where m is not already set),
- change this fields value to k + 10 ** m.
+ change this fields value to ``k + 10 ** m``.
diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa
new file mode 100644
index 000000000000..2c833b5163f2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-vdpa
@@ -0,0 +1,57 @@
+What: /sys/bus/vdpa/drivers_autoprobe
+Date: March 2020
+Contact: virtualization@lists.linux.dev
+Description:
+ This file determines whether new devices are immediately bound
+ to a driver after the creation. It initially contains 1, which
+ means the kernel automatically binds devices to a compatible
+ driver immediately after they are created.
+
+ Writing "0" to this file disable this feature, any other string
+ enable it.
+
+What: /sys/bus/vdpa/driver_probe
+Date: March 2020
+Contact: virtualization@lists.linux.dev
+Description:
+ Writing a device name to this file will cause the kernel binds
+ devices to a compatible driver.
+
+ This can be useful when /sys/bus/vdpa/drivers_autoprobe is
+ disabled.
+
+What: /sys/bus/vdpa/drivers/.../bind
+Date: March 2020
+Contact: virtualization@lists.linux.dev
+Description:
+ Writing a device name to this file will cause the driver to
+ attempt to bind to the device. This is useful for overriding
+ default bindings.
+
+What: /sys/bus/vdpa/drivers/.../unbind
+Date: March 2020
+Contact: virtualization@lists.linux.dev
+Description:
+ Writing a device name to this file will cause the driver to
+ attempt to unbind from the device. This may be useful when
+ overriding default bindings.
+
+What: /sys/bus/vdpa/devices/.../driver_override
+Date: November 2021
+Contact: virtualization@lists.linux.dev
+Description:
+ This file allows the driver for a device to be specified.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind to
+ the device. The override is specified by writing a string to the
+ driver_override file (echo vhost-vdpa > driver_override) and may
+ be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device will
+ not bind to any driver. This also allows devices to opt-out of
+ driver binding using a driver_override name such as "none".
+ Only a single driver may be specified in the override, there is
+ no support for parsing delimiters.
diff --git a/Documentation/ABI/testing/sysfs-bus-vfio-mdev b/Documentation/ABI/testing/sysfs-bus-vfio-mdev
index 452dbe39270e..59fc804265db 100644
--- a/Documentation/ABI/testing/sysfs-bus-vfio-mdev
+++ b/Documentation/ABI/testing/sysfs-bus-vfio-mdev
@@ -28,8 +28,9 @@ Description:
Writing UUID to this file will create mediated device of
type <type-id> for parent device <device>. This is a
write-only file.
- For example:
- # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \
+ For example::
+
+ # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \
/sys/devices/foo/mdev_supported_types/foo-1/create
What: /sys/.../mdev_supported_types/<type-id>/devices/
@@ -107,5 +108,6 @@ Description:
Writing '1' to this file destroys the mediated device. The
vendor driver can fail the remove() callback if that device
is active and the vendor driver doesn't support hot unplug.
- Example:
- # echo 1 > /sys/bus/mdev/devices/<UUID>/remove
+ Example::
+
+ # echo 1 > /sys/bus/mdev/devices/<UUID>/remove
diff --git a/Documentation/ABI/testing/sysfs-bus-wmi b/Documentation/ABI/testing/sysfs-bus-wmi
new file mode 100644
index 000000000000..d71a219c610e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-wmi
@@ -0,0 +1,81 @@
+What: /sys/bus/wmi/devices/.../driver_override
+Date: February 2024
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file allows the driver for a device to be specified which
+ will override standard ID table matching.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind
+ to the device.
+ The override is specified by writing a string to the
+ driver_override file (echo wmi-event-dummy > driver_override).
+ The override may be cleared with an empty string (echo > \
+ driver_override) which returns the device to standard matching
+ rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to automatically
+ load the specified driver. If no driver with a matching name is
+ currently loaded in the kernel, the device will not bind to any
+ driver.
+ This also allows devices to opt-out of driver binding using a
+ driver_override name such as "none". Only a single driver may be
+ specified in the override, there is no support for parsing delimiters.
+
+What: /sys/bus/wmi/devices/.../modalias
+Date: November 20:15
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the MODALIAS value emitted by uevent for a
+ given WMI device.
+
+ Format: wmi:XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.
+
+What: /sys/bus/wmi/devices/.../guid
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the GUID used to match WMI devices to
+ compatible WMI drivers. This GUID is not necessarily unique
+ inside a given machine, it is solely used to identify the
+ interface exposed by a given WMI device.
+
+What: /sys/bus/wmi/devices/.../object_id
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the WMI object ID used internally to construct
+ the ACPI method names used by non-event WMI devices. It contains
+ two ASCII letters.
+
+What: /sys/bus/wmi/devices/.../notify_id
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the WMI notify ID used internally to map ACPI
+ events to WMI event devices. It contains two ASCII letters.
+
+What: /sys/bus/wmi/devices/.../instance_count
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the number of WMI object instances being
+ present on a given WMI device. It contains a non-negative
+ number.
+
+What: /sys/bus/wmi/devices/.../expensive
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains a boolean flag signaling if interacting with
+ the given WMI device will consume significant CPU resources.
+ The WMI driver core will take care of enabling/disabling such
+ WMI devices.
+
+What: /sys/bus/wmi/devices/.../setable
+Date: May 2017
+Contact: Darren Hart (VMware) <dvhart@infradead.org>
+Description:
+ This file contains a boolean flags signaling the data block
+ associated with the given WMI device is writable. If the
+ given WMI device is not associated with a data block, then
+ this file will not exist.
diff --git a/Documentation/ABI/testing/sysfs-c2port b/Documentation/ABI/testing/sysfs-c2port
index 716cffc457e9..f7b8cf6e4398 100644
--- a/Documentation/ABI/testing/sysfs-c2port
+++ b/Documentation/ABI/testing/sysfs-c2port
@@ -66,13 +66,6 @@ Description:
the "erase" command on the on-board flash of the connected
micro.
-What: /sys/class/c2port/c2portX/flash_erase
-Date: October 2008
-Contact: Rodolfo Giometti <giometti@linux.it>
-Description:
- The /sys/class/c2port/c2portX/flash_erase file show the
- on-board flash size of the connected micro.
-
What: /sys/class/c2port/c2portX/reset
Date: October 2008
Contact: Rodolfo Giometti <giometti@linux.it>
diff --git a/Documentation/ABI/testing/sysfs-class b/Documentation/ABI/testing/sysfs-class
index 676530fcf747..906735faa1b8 100644
--- a/Documentation/ABI/testing/sysfs-class
+++ b/Documentation/ABI/testing/sysfs-class
@@ -1,5 +1,5 @@
What: /sys/class/
-Date: Febuary 2006
+Date: February 2006
Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Description:
The /sys/class directory will consist of a group of
diff --git a/Documentation/ABI/testing/sysfs-class-backlight b/Documentation/ABI/testing/sysfs-class-backlight
new file mode 100644
index 000000000000..c453646b06e2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight
@@ -0,0 +1,186 @@
+What: /sys/class/backlight/<backlight>/scale
+Date: July 2019
+KernelVersion: 5.4
+Contact: Daniel Thompson <daniel.thompson@linaro.org>
+Description:
+ Description of the scale of the brightness curve.
+
+ The human eye senses brightness approximately logarithmically,
+ hence linear changes in brightness are perceived as being
+ non-linear. To achieve a linear perception of brightness changes
+ controls like sliders need to apply a logarithmic mapping for
+ backlights with a linear brightness curve.
+
+ Possible values of the attribute are:
+
+ unknown
+ The scale of the brightness curve is unknown.
+
+ linear
+ The brightness changes linearly with each step. Brightness
+ controls should apply a logarithmic mapping for a linear
+ perception.
+
+ non-linear
+ The brightness changes non-linearly with each step. Brightness
+ controls should use a linear mapping for a linear perception.
+
+What: /sys/class/backlight/<backlight>/ambient_light_level
+Date: Apr, 2010
+KernelVersion: v2.6.35
+Contact: Michael Hennerich <michael.hennerich@analog.com>
+Description:
+ (RO) Get conversion value of the light sensor.
+
+ The value is automatically updated every 80 ms when the
+ light sensor is enabled.
+
+ The value range is device-driver specific:
+
+ For ADP8870:
+
+ It returns integer between 0 (dark) and 8000 (max ambient
+ brightness).
+
+ For ADP8860:
+
+ It returns a 13-bits integer.
+
+What: /sys/class/backlight/<backlight>/ambient_light_zone
+Date: Apr, 2010
+KernelVersion: v2.6.35
+Contact: Michael Hennerich <michael.hennerich@analog.com>,
+ device-drivers-devel@blackfin.uclinux.org
+
+Description:
+ (RW) Read or write the specific brightness level at which the
+ backlight operates.
+
+ The value meaning is device-driver specific:
+
+ For ADP8860:
+
+ == ==========================
+ 0 Off: Backlight set to 0 mA
+ 1 Level 1: daylight
+ 2 Level 2: bright
+ 3 Level 3: dark
+ == ==========================
+
+ For ADP8870:
+
+ == ==========================
+ 0 Off: Backlight set to 0 mA
+ 1 Level 1: daylight
+ 2 Level 2: bright
+ 3 Level 3: office
+ 4 Level 4: indoor
+ 5 Level 5: dark
+ == ==========================
+
+ Writing 0 returns to normal/automatic ambient light level
+ operation.
+
+ It can be enabled by writing the value stored in
+ /sys/class/backlight/<backlight>/max_brightness to
+ /sys/class/backlight/<backlight>/brightness.
+
+What: /sys/class/backlight/<backlight>/<ambient light zone>_max
+Date: Sep, 2009
+KernelVersion: v2.6.32
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the maximum brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127. This file
+ will also show the brightness level stored for this
+ <ambient light zone>.
+
+ The <ambient light zone> is device-driver specific:
+
+ For ADP5520 and ADP5501, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ daylight /sys/class/backlight/<backlight>/daylight_max
+ office /sys/class/backlight/<backlight>/office_max
+ dark /sys/class/backlight/<backlight>/dark_max
+ =========== ================================================
+
+ For ADP8860, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ l1_daylight /sys/class/backlight/<backlight>/l1_daylight_max
+ l2_office /sys/class/backlight/<backlight>/l2_office_max
+ l3_dark /sys/class/backlight/<backlight>/l3_dark_max
+ =========== ================================================
+
+ For ADP8870, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ l1_daylight /sys/class/backlight/<backlight>/l1_daylight_max
+ l2_bright /sys/class/backlight/<backlight>/l2_bright_max
+ l3_office /sys/class/backlight/<backlight>/l3_office_max
+ l4_indoor /sys/class/backlight/<backlight>/l4_indoor_max
+ l5_dark /sys/class/backlight/<backlight>/l5_dark_max
+ =========== ================================================
+
+ See also: /sys/class/backlight/<backlight>/ambient_light_zone.
+
+What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
+Date: Sep, 2009
+KernelVersion: v2.6.32
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the dim brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127, typically
+ set to 0. Full off when the backlight is disabled.
+ This file will also show the dim brightness level stored for
+ this <ambient light zone>.
+
+ The <ambient light zone> is device-driver specific:
+
+ For ADP5520 and ADP5501, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ daylight /sys/class/backlight/<backlight>/daylight_dim
+ office /sys/class/backlight/<backlight>/office_dim
+ dark /sys/class/backlight/<backlight>/dark_dim
+ =========== ================================================
+
+ For ADP8860, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ l1_daylight /sys/class/backlight/<backlight>/l1_daylight_dim
+ l2_office /sys/class/backlight/<backlight>/l2_office_dim
+ l3_dark /sys/class/backlight/<backlight>/l3_dark_dim
+ =========== ================================================
+
+ For ADP8870, <ambient light zone> can be:
+
+ =========== ================================================
+ Ambient sysfs entry
+ light zone
+ =========== ================================================
+ l1_daylight /sys/class/backlight/<backlight>/l1_daylight_dim
+ l2_bright /sys/class/backlight/<backlight>/l2_bright_dim
+ l3_office /sys/class/backlight/<backlight>/l3_office_dim
+ l4_indoor /sys/class/backlight/<backlight>/l4_indoor_dim
+ l5_dark /sys/class/backlight/<backlight>/l5_dark_dim
+ =========== ================================================
+
+ See also: /sys/class/backlight/<backlight>/ambient_light_zone.
+
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-adp5520 b/Documentation/ABI/testing/sysfs-class-backlight-adp5520
deleted file mode 100644
index 34b6ebafa210..000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-adp5520
+++ /dev/null
@@ -1,31 +0,0 @@
-sysfs interface for analog devices adp5520(01) backlight driver
----------------------------------------------------------------
-
-The backlight brightness control operates at three different levels for the
-adp5520 and adp5501 devices: daylight (level 1), office (level 2) and dark
-(level 3). By default the brightness operates at the daylight brightness level.
-
-What: /sys/class/backlight/<backlight>/daylight_max
-What: /sys/class/backlight/<backlight>/office_max
-What: /sys/class/backlight/<backlight>/dark_max
-Date: Sep, 2009
-KernelVersion: v2.6.32
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RW) Maximum current setting for the backlight when brightness
- is at one of the three levels (daylight, office or dark). This
- is an input code between 0 and 127, which is transformed to a
- value between 0 mA and 30 mA using linear or non-linear
- algorithms.
-
-What: /sys/class/backlight/<backlight>/daylight_dim
-What: /sys/class/backlight/<backlight>/office_dim
-What: /sys/class/backlight/<backlight>/dark_dim
-Date: Sep, 2009
-KernelVersion: v2.6.32
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RW) Dim current setting for the backlight when brightness is at
- one of the three levels (daylight, office or dark). This is an
- input code between 0 and 127, which is transformed to a value
- between 0 mA and 30 mA using linear or non-linear algorithms.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-adp8860 b/Documentation/ABI/testing/sysfs-class-backlight-adp8860
deleted file mode 100644
index 54d61c788b1b..000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-adp8860
+++ /dev/null
@@ -1,54 +0,0 @@
-sysfs interface for analog devices adp8860 backlight driver
------------------------------------------------------------
-
-The backlight brightness control operates at three different levels for the
-adp8860, adp8861 and adp8863 devices: daylight (level 1), office (level 2) and
-dark (level 3). By default the brightness operates at the daylight brightness
-level.
-
-What: /sys/class/backlight/<backlight>/ambient_light_level
-Date: Apr, 2010
-KernelVersion: v2.6.35
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RO) 13-bit conversion value for the first light sensor—high
- byte (Bit 12 to Bit 8). The value is updated every 80 ms (when
- the light sensor is enabled).
-
-
-What: /sys/class/backlight/<backlight>/ambient_light_zone
-Date: Apr, 2010
-KernelVersion: v2.6.35
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RW) Read or write the specific level at which the backlight
- operates. Value "0" enables automatic ambient light sensing, and
- values "1", "2" or "3" set the control to daylight, office or
- dark respectively.
-
-
-What: /sys/class/backlight/<backlight>/l1_daylight_max
-What: /sys/class/backlight/<backlight>/l2_office_max
-What: /sys/class/backlight/<backlight>/l3_dark_max
-Date: Apr, 2010
-KernelVersion: v2.6.35
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RW) Maximum current setting for the backlight when brightness
- is at one of the three levels (daylight, office or dark). This
- is an input code between 0 and 127, which is transformed to a
- value between 0 mA and 30 mA using linear or non-linear
- algorithms.
-
-
-What: /sys/class/backlight/<backlight>/l1_daylight_dim
-What: /sys/class/backlight/<backlight>/l2_office_dim
-What: /sys/class/backlight/<backlight>/l3_dark_dim
-Date: Apr, 2010
-KernelVersion: v2.6.35
-Contact: Michael Hennerich <michael.hennerich@analog.com>
-Description:
- (RW) Dim current setting for the backlight when brightness is at
- one of the three levels (daylight, office or dark). This is an
- input code between 0 and 127, which is transformed to a value
- between 0 mA and 30 mA using linear or non-linear algorithms.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
deleted file mode 100644
index 33e648808117..000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
+++ /dev/null
@@ -1,56 +0,0 @@
-What: /sys/class/backlight/<backlight>/<ambient light zone>_max
-What: /sys/class/backlight/<backlight>/l1_daylight_max
-What: /sys/class/backlight/<backlight>/l2_bright_max
-What: /sys/class/backlight/<backlight>/l3_office_max
-What: /sys/class/backlight/<backlight>/l4_indoor_max
-What: /sys/class/backlight/<backlight>/l5_dark_max
-Date: May 2011
-KernelVersion: 3.0
-Contact: device-drivers-devel@blackfin.uclinux.org
-Description:
- Control the maximum brightness for <ambient light zone>
- on this <backlight>. Values are between 0 and 127. This file
- will also show the brightness level stored for this
- <ambient light zone>.
-
-What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
-What: /sys/class/backlight/<backlight>/l2_bright_dim
-What: /sys/class/backlight/<backlight>/l3_office_dim
-What: /sys/class/backlight/<backlight>/l4_indoor_dim
-What: /sys/class/backlight/<backlight>/l5_dark_dim
-Date: May 2011
-KernelVersion: 3.0
-Contact: device-drivers-devel@blackfin.uclinux.org
-Description:
- Control the dim brightness for <ambient light zone>
- on this <backlight>. Values are between 0 and 127, typically
- set to 0. Full off when the backlight is disabled.
- This file will also show the dim brightness level stored for
- this <ambient light zone>.
-
-What: /sys/class/backlight/<backlight>/ambient_light_level
-Date: May 2011
-KernelVersion: 3.0
-Contact: device-drivers-devel@blackfin.uclinux.org
-Description:
- Get conversion value of the light sensor.
- This value is updated every 80 ms (when the light sensor
- is enabled). Returns integer between 0 (dark) and
- 8000 (max ambient brightness)
-
-What: /sys/class/backlight/<backlight>/ambient_light_zone
-Date: May 2011
-KernelVersion: 3.0
-Contact: device-drivers-devel@blackfin.uclinux.org
-Description:
- Get/Set current ambient light zone. Reading returns
- integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
- Writing a value between 1..5 forces the backlight controller
- to enter the corresponding ambient light zone.
- Writing 0 returns to normal/automatic ambient light level
- operation. The ambient light sensing feature on these devices
- is an extension to the API documented in
- Documentation/ABI/stable/sysfs-class-backlight.
- It can be enabled by writing the value stored in
- /sys/class/backlight/<backlight>/max_brightness to
- /sys/class/backlight/<backlight>/brightness.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
index 77cf7ac949af..8251e78abc49 100644
--- a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
@@ -4,10 +4,12 @@ KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
Get the ALS output channel used as input in
- ALS-current-control mode (0, 1), where
+ ALS-current-control mode (0, 1), where:
- 0 - out_current0 (backlight 0)
- 1 - out_current1 (backlight 1)
+ == ==========================
+ 0 out_current0 (backlight 0)
+ 1 out_current1 (backlight 1)
+ == ==========================
What: /sys/class/backlight/<backlight>/als_en
Date: May 2012
@@ -28,21 +30,25 @@ Date: April 2012
KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
- Set the brightness-mapping mode (0, 1), where
+ Set the brightness-mapping mode (0, 1), where:
- 0 - exponential mode
- 1 - linear mode
+ == ================
+ 0 exponential mode
+ 1 linear mode
+ == ================
What: /sys/class/backlight/<backlight>/pwm
Date: April 2012
KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
- Set the PWM-input control mask (5 bits), where
+ Set the PWM-input control mask (5 bits), where:
- bit 5 - PWM-input enabled in Zone 4
- bit 4 - PWM-input enabled in Zone 3
- bit 3 - PWM-input enabled in Zone 2
- bit 2 - PWM-input enabled in Zone 1
- bit 1 - PWM-input enabled in Zone 0
- bit 0 - PWM-input enabled
+ ===== ===========================
+ bit 5 PWM-input enabled in Zone 4
+ bit 4 PWM-input enabled in Zone 3
+ bit 3 PWM-input enabled in Zone 2
+ bit 2 PWM-input enabled in Zone 1
+ bit 1 PWM-input enabled in Zone 0
+ bit 0 PWM-input enabled
+ ===== ===========================
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index d773d5697cf5..0d2abd88a18c 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -23,15 +23,17 @@ default
The default backing dev, used for non-block device backed
filesystems which do not provide their own BDI.
-Files under /sys/class/bdi/<bdi>/
----------------------------------
-
-read_ahead_kb (read-write)
-
+What: /sys/class/bdi/<bdi>/read_ahead_kb
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description:
Size of the read-ahead window in kilobytes
-min_ratio (read-write)
-
+ (read-write)
+What: /sys/class/bdi/<bdi>/min_ratio
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description:
Under normal circumstances each device is given a part of the
total write-back cache that relates to its current average
writeout speed in relation to the other devices.
@@ -40,8 +42,27 @@ min_ratio (read-write)
percentage of the write-back cache to a particular device.
For example, this is useful for providing a minimum QoS.
-max_ratio (read-write)
+ (read-write)
+What: /sys/class/bdi/<bdi>/min_ratio_fine
+Date: November 2022
+Contact: Stefan Roesch <shr@devkernel.io>
+Description:
+ Under normal circumstances each device is given a part of the
+ total write-back cache that relates to its current average
+ writeout speed in relation to the other devices.
+
+ The 'min_ratio_fine' parameter allows assigning a minimum reserve
+ of the write-back cache to a particular device. The value is
+ expressed as part of 1 million. For example, this is useful for
+ providing a minimum QoS.
+
+ (read-write)
+
+What: /sys/class/bdi/<bdi>/max_ratio
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description:
Allows limiting a particular device to use not more than the
given percentage of the write-back cache. This is useful in
situations where we want to avoid one device taking all or
@@ -49,7 +70,65 @@ max_ratio (read-write)
mount that is prone to get stuck, or a FUSE mount which cannot
be trusted to play fair.
-stable_pages_required (read-only)
+ (read-write)
+
+What: /sys/class/bdi/<bdi>/max_ratio_fine
+Date: November 2022
+Contact: Stefan Roesch <shr@devkernel.io>
+Description:
+ Allows limiting a particular device to use not more than the
+ given value of the write-back cache. The value is given as part
+ of 1 million. This is useful in situations where we want to avoid
+ one device taking all or most of the write-back cache. For example
+ in case of an NFS mount that is prone to get stuck, or a FUSE mount
+ which cannot be trusted to play fair.
+
+ (read-write)
+What: /sys/class/bdi/<bdi>/min_bytes
+Date: October 2022
+Contact: Stefan Roesch <shr@devkernel.io>
+Description:
+ Under normal circumstances each device is given a part of the
+ total write-back cache that relates to its current average
+ writeout speed in relation to the other devices.
+
+ The 'min_bytes' parameter allows assigning a minimum
+ percentage of the write-back cache to a particular device
+ expressed in bytes.
+ For example, this is useful for providing a minimum QoS.
+
+ (read-write)
+
+What: /sys/class/bdi/<bdi>/max_bytes
+Date: October 2022
+Contact: Stefan Roesch <shr@devkernel.io>
+Description:
+ Allows limiting a particular device to use not more than the
+ given 'max_bytes' of the write-back cache. This is useful in
+ situations where we want to avoid one device taking all or
+ most of the write-back cache. For example in case of an NFS
+ mount that is prone to get stuck, a FUSE mount which cannot be
+ trusted to play fair, or a nbd device.
+
+ (read-write)
+
+What: /sys/class/bdi/<bdi>/strict_limit
+Date: October 2022
+Contact: Stefan Roesch <shr@devkernel.io>
+Description:
+ Forces per-BDI checks for the share of given device in the write-back
+ cache even before the global background dirty limit is reached. This
+ is useful in situations where the global limit is much higher than
+ affordable for given relatively slow (or untrusted) device. Turning
+ strictlimit on has no visible effect if max_ratio is equal to 100%.
+
+ (read-write)
+What: /sys/class/bdi/<bdi>/stable_pages_required
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description:
If set, the backing device requires that all pages comprising a write
request must not be changed until writeout is complete.
+
+ (read-only)
diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos
new file mode 100644
index 000000000000..7fa5be6cc774
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos
@@ -0,0 +1,53 @@
+What: /sys/class/chromeos/<ec-device-name>/flashinfo
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ Show the EC flash information.
+
+What: /sys/class/chromeos/<ec-device-name>/kb_wake_angle
+Date: March 2018
+KernelVersion: 4.17
+Description:
+ Control the keyboard wake lid angle. Values are between
+ 0 and 360. This file will also show the keyboard wake lid
+ angle by querying the hardware.
+
+What: /sys/class/chromeos/<ec-device-name>/reboot
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ Tell the EC to reboot in various ways. Options are:
+
+ - "cancel": Cancel a pending reboot.
+ - "ro": Jump to RO without rebooting.
+ - "rw": Jump to RW without rebooting.
+ - "cold": Cold reboot.
+ - "disable-jump": Disable jump until next reboot.
+ - "hibernate": Hibernate the EC.
+ - "at-shutdown": Reboot after an AP shutdown.
+
+What: /sys/class/chromeos/<ec-device-name>/version
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ Show the information about the EC software and hardware.
+
+What: /sys/class/chromeos/cros_ec/usbpdmuxinfo
+Date: February 2025
+Description:
+ Show PD mux status for each typec port with following flags:
+ - "USB": USB connected
+ - "DP": DP connected
+ - "POLARITY": CC line Polarity inverted
+ - "HPD_IRQ": Hot Plug Detect interrupt is asserted
+ - "HPD_LVL": Hot Plug Detect level is asserted
+ - "SAFE": DP is in safe mode
+ - "TBT": TBT enabled
+ - "USB4": USB4 enabled
+
+What: /sys/class/chromeos/cros_ec/ap_mode_entry
+Date: February 2025
+Description:
+ Show if the AP mode entry EC feature is supported.
+ It indicates whether the EC waits for direction from the AP
+ to enter Type-C altmodes or USB4 mode.
diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar
new file mode 100644
index 000000000000..57a037791403
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar
@@ -0,0 +1,74 @@
+What: /sys/class/chromeos/<ec-device-name>/lightbar/brightness
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ Writing to this file adjusts the overall brightness of
+ the lightbar, separate from any color intensity. The
+ valid range is 0 (off) to 255 (maximum brightness).
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/interval_msec
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ The lightbar is controlled by an embedded controller (EC),
+ which also manages the keyboard, battery charging, fans,
+ and other system hardware. To prevent unprivileged users
+ from interfering with the other EC functions, the rate at
+ which the lightbar control files can be read or written is
+ limited.
+
+ Reading this file will return the number of milliseconds
+ that must elapse between accessing any of the lightbar
+ functions through this interface. Going faster will simply
+ block until the necessary interval has lapsed. The interval
+ applies uniformly to all accesses of any kind by any user.
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/led_rgb
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ This allows you to control each LED segment. If the
+ lightbar is already running one of the automatic
+ sequences, you probably won’t see anything change because
+ your color setting will be almost immediately replaced.
+ To get useful results, you should stop the lightbar
+ sequence first.
+
+ The values written to this file are sets of four integers,
+ indicating LED, RED, GREEN, BLUE. The LED number is 0 to 3
+ to select a single segment, or 4 to set all four segments
+ to the same value at once. The RED, GREEN, and BLUE
+ numbers should be in the range 0 (off) to 255 (maximum).
+ You can update more than one segment at a time by writing
+ more than one set of four integers.
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/program
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ This allows you to upload and run custom lightbar sequences.
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/sequence
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ The Pixel lightbar has a number of built-in sequences
+ that it displays under various conditions, such as at
+ power on, shut down, or while running. Reading from this
+ file displays the current sequence that the lightbar is
+ displaying. Writing to this file allows you to change the
+ sequence.
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/userspace_control
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ This allows you to take the control of the lightbar. This
+ prevents the kernel from going through its normal
+ sequences.
+
+What: /sys/class/chromeos/<ec-device-name>/lightbar/version
+Date: August 2015
+KernelVersion: 4.2
+Description:
+ Show the information about the lightbar version.
diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc
new file mode 100644
index 000000000000..38c5aaaaa89a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc
@@ -0,0 +1,6 @@
+What: /sys/class/chromeos/<ec-device-name>/vbc/vboot_context
+Date: October 2015
+KernelVersion: 4.4
+Description:
+ Read/write the verified boot context data included on a
+ small nvram space on some EC implementations.
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index ee39acacf6f8..df8ba88b9f6a 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -7,6 +7,13 @@ Description:
The name of devfreq object denoted as ... is same as the
name of device using devfreq.
+What: /sys/class/devfreq/.../name
+Date: November 2019
+Contact: Chanwoo Choi <cw00.choi@samsung.com>
+Description:
+ The /sys/class/devfreq/.../name shows the name of device
+ of the corresponding devfreq object.
+
What: /sys/class/devfreq/.../governor
Date: September 2011
Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
@@ -30,38 +37,23 @@ Description:
The /sys/class/devfreq/.../target_freq shows the next governor
predicted target frequency of the corresponding devfreq object.
-What: /sys/class/devfreq/.../polling_interval
-Date: September 2011
-Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
-Description:
- The /sys/class/devfreq/.../polling_interval shows and sets
- the requested polling interval of the corresponding devfreq
- object. The values are represented in ms. If the value is
- less than 1 jiffy, it is considered to be 0, which means
- no polling. This value is meaningless if the governor is
- not polling; thus. If the governor is not using
- devfreq-provided central polling
- (/sys/class/devfreq/.../central_polling is 0), this value
- may be useless.
-
What: /sys/class/devfreq/.../trans_stat
Date: October 2012
Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
-Descrtiption:
- This ABI shows the statistics of devfreq behavior on a
- specific device. It shows the time spent in each state and
- the number of transitions between states.
+Description:
+ This ABI shows or clears the statistics of devfreq behavior
+ on a specific device. It shows the time spent in each state
+ and the number of transitions between states.
In order to activate this ABI, the devfreq target device
driver should provide the list of available frequencies
- with its profile.
+ with its profile. If need to reset the statistics of devfreq
+ behavior on a specific device, enter 0(zero) to 'trans_stat'
+ as following::
-What: /sys/class/devfreq/.../userspace/set_freq
-Date: September 2011
-Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
-Description:
- The /sys/class/devfreq/.../userspace/set_freq shows and
- sets the requested frequency for the devfreq object if
- userspace governor is in effect.
+ echo 0 > /sys/class/devfreq/.../trans_stat
+
+ If the transition table is bigger than PAGE_SIZE, reading
+ this will return an -EFBIG error.
What: /sys/class/devfreq/.../available_frequencies
Date: October 2012
@@ -98,3 +90,54 @@ Description:
frequency requested by governors and min_freq.
The max_freq overrides min_freq because max_freq may be
used to throttle devices to avoid overheating.
+
+What: /sys/class/devfreq/.../polling_interval
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../polling_interval shows and sets
+ the requested polling interval of the corresponding devfreq
+ object. The values are represented in ms. If the value is
+ less than 1 jiffy, it is considered to be 0, which means
+ no polling. This value is meaningless if the governor is
+ not polling.
+
+ A list of governors that support the node:
+ - simple_ondmenad
+ - tegra_actmon
+
+What: /sys/class/devfreq/.../userspace/set_freq
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../userspace/set_freq shows and
+ sets the requested frequency for the devfreq object if
+ userspace governor is in effect.
+
+ A list of governors that support the node:
+ - userspace
+
+What: /sys/class/devfreq/.../timer
+Date: July 2020
+Contact: Chanwoo Choi <cw00.choi@samsung.com>
+Description:
+ This ABI shows and stores the kind of work timer by users.
+ This work timer is used by devfreq workqueue in order to
+ monitor the device status such as utilization. The user
+ can change the work timer on runtime according to their demand
+ as following::
+
+ echo deferrable > /sys/class/devfreq/.../timer
+ echo delayed > /sys/class/devfreq/.../timer
+
+ A list of governors that support the node:
+ - simple_ondemand
+
+What: /sys/class/devfreq/.../related_cpus
+Date: June 2025
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description: The list of CPUs whose performance is closely related to the
+ frequency of this devfreq domain.
+
+ This file is only present if a specific devfreq device is
+ closely associated with a subset of CPUs.
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq-event b/Documentation/ABI/testing/sysfs-class-devfreq-event
index ceaf0f686d4a..dbe48495e55a 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq-event
+++ b/Documentation/ABI/testing/sysfs-class-devfreq-event
@@ -1,25 +1,25 @@
-What: /sys/class/devfreq-event/event(x)/
+What: /sys/class/devfreq-event/event<x>/
Date: January 2017
Contact: Chanwoo Choi <cw00.choi@samsung.com>
Description:
Provide a place in sysfs for the devfreq-event objects.
This allows accessing various devfreq-event specific variables.
- The name of devfreq-event object denoted as 'event(x)' which
+ The name of devfreq-event object denoted as 'event<x>' which
includes the unique number of 'x' for each devfreq-event object.
-What: /sys/class/devfreq-event/event(x)/name
+What: /sys/class/devfreq-event/event<x>/name
Date: January 2017
Contact: Chanwoo Choi <cw00.choi@samsung.com>
Description:
- The /sys/class/devfreq-event/event(x)/name attribute contains
+ The /sys/class/devfreq-event/event<x>/name attribute contains
the name of the devfreq-event object. This attribute is
read-only.
-What: /sys/class/devfreq-event/event(x)/enable_count
+What: /sys/class/devfreq-event/event<x>/enable_count
Date: January 2017
Contact: Chanwoo Choi <cw00.choi@samsung.com>
Description:
- The /sys/class/devfreq-event/event(x)/enable_count attribute
+ The /sys/class/devfreq-event/event<x>/enable_count attribute
contains the reference count to enable the devfreq-event
object. If the device is enabled, the value of attribute is
greater than zero.
diff --git a/Documentation/ABI/testing/sysfs-class-devlink b/Documentation/ABI/testing/sysfs-class-devlink
new file mode 100644
index 000000000000..8a21ce515f61
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-devlink
@@ -0,0 +1,130 @@
+What: /sys/class/devlink/.../
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ Provide a place in sysfs for the device link objects in the
+ kernel at any given time. The name of a device link directory,
+ denoted as ... above, is of the form <supplier>--<consumer>
+ where <supplier> is the supplier bus:device name and <consumer>
+ is the consumer bus:device name.
+
+What: /sys/class/devlink/.../auto_remove_on
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file indicates if the device link will ever be
+ automatically removed by the driver core when the consumer and
+ supplier devices themselves are still present.
+
+ This will be one of the following strings:
+
+ - 'consumer unbind'
+ - 'supplier unbind'
+ - 'never'
+
+ 'consumer unbind' means the device link will be removed when
+ the consumer's driver is unbound from the consumer device.
+
+ 'supplier unbind' means the device link will be removed when
+ the supplier's driver is unbound from the supplier device.
+
+ 'never' means the device link will not be automatically removed
+ when as long as the supplier and consumer devices themselves
+ are still present.
+
+What: /sys/class/devlink/.../consumer
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file is a symlink to the consumer device's sysfs directory.
+
+What: /sys/class/devlink/.../runtime_pm
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file indicates if the device link has any impact on the
+ runtime power management behavior of the consumer and supplier
+ devices. For example: Making sure the supplier doesn't enter
+ runtime suspend while the consumer is active.
+
+ This will be one of the following strings:
+
+ === ========================================
+ '0' Does not affect runtime power management
+ '1' Affects runtime power management
+ === ========================================
+
+What: /sys/class/devlink/.../status
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file indicates the status of the device link. The status
+ of a device link is affected by whether the supplier and
+ consumer devices have been bound to their corresponding
+ drivers. The status of a device link also affects the binding
+ and unbinding of the supplier and consumer devices with their
+ drivers and also affects whether the software state of the
+ supplier device is synced with the hardware state of the
+ supplier device after boot up.
+ See also: sysfs-devices-state_synced.
+
+ This will be one of the following strings:
+
+ - 'not tracked'
+ - 'dormant'
+ - 'available'
+ - 'consumer probing'
+ - 'active'
+ - 'supplier unbinding'
+ - 'unknown'
+
+ 'not tracked' means this device link does not track the status
+ and has no impact on the binding, unbinding and syncing the
+ hardware and software device state.
+
+ 'dormant' means the supplier and the consumer devices have not
+ bound to their driver.
+
+ 'available' means the supplier has bound to its driver and is
+ available to supply resources to the consumer device.
+
+ 'consumer probing' means the consumer device is currently
+ trying to bind to its driver.
+
+ 'active' means the supplier and consumer devices have both
+ bound successfully to their drivers.
+
+ 'supplier unbinding' means the supplier devices is currently in
+ the process of unbinding from its driver.
+
+ 'unknown' means the state of the device link is not any of the
+ above. If this is ever the value, there's a bug in the kernel.
+
+What: /sys/class/devlink/.../supplier
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file is a symlink to the supplier device's sysfs directory.
+
+What: /sys/class/devlink/.../sync_state_only
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ This file indicates if the device link is limited to only
+ affecting the syncing of the hardware and software state of the
+ supplier device.
+
+ This will be one of the following strings:
+
+ === ================================
+ '0'
+ '1' Affects runtime power management
+ === ================================
+
+ '0' means the device link can affect other device behaviors
+ like binding/unbinding, suspend/resume, runtime power
+ management, etc.
+
+ '1' means the device link will only affect the syncing of
+ hardware and software state of the supplier device after boot
+ up and doesn't not affect other behaviors of the devices.
diff --git a/Documentation/ABI/testing/sysfs-class-drm b/Documentation/ABI/testing/sysfs-class-drm
new file mode 100644
index 000000000000..d23fed5e29a7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-drm
@@ -0,0 +1,8 @@
+What: /sys/class/drm/.../boot_display
+Date: January 2026
+Contact: Linux DRI developers <dri-devel@vger.kernel.org>
+Description:
+ This file indicates that displays connected to the device were
+ used to display the boot sequence. If a display connected to
+ the device was used to display the boot sequence the file will
+ be present and contain "1".
diff --git a/Documentation/ABI/testing/sysfs-class-extcon b/Documentation/ABI/testing/sysfs-class-extcon
index 57a726232912..f8e705375b24 100644
--- a/Documentation/ABI/testing/sysfs-class-extcon
+++ b/Documentation/ABI/testing/sysfs-class-extcon
@@ -39,19 +39,22 @@ Description:
callback.
If the default callback for showing function is used, the
- format is like this:
- # cat state
- USB_OTG=1
- HDMI=0
- TA=1
- EAR_JACK=0
- #
+ format is like this::
+
+ # cat state
+ USB_OTG=1
+ HDMI=0
+ TA=1
+ EAR_JACK=0
+ #
+
In this example, the extcon device has USB_OTG and TA
cables attached and HDMI and EAR_JACK cables detached.
In order to update the state of an extcon device, enter a hex
- state number starting with 0x:
- # echo 0xHEX > state
+ state number starting with 0x::
+
+ # echo 0xHEX > state
This updates the whole state of the extcon device.
Inputs of all the methods are required to meet the
@@ -62,19 +65,19 @@ Description:
interface associated with each cable cannot update
multiple cable states of an extcon device simultaneously.
-What: /sys/class/extcon/.../cable.x/name
+What: /sys/class/extcon/.../cable.X/name
Date: February 2012
Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
Description:
- The /sys/class/extcon/.../cable.x/name shows the name of cable
- "x" (integer between 0 and 31) of an extcon device.
+ The /sys/class/extcon/.../cable.X/name shows the name of cable
+ "X" (integer between 0 and 31) of an extcon device.
-What: /sys/class/extcon/.../cable.x/state
+What: /sys/class/extcon/.../cable.X/state
Date: February 2012
Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
Description:
- The /sys/class/extcon/.../cable.x/state shows and stores the
- state of cable "x" (integer between 0 and 31) of an extcon
+ The /sys/class/extcon/.../cable.X/state shows and stores the
+ state of cable "X" (integer between 0 and 31) of an extcon
device. The state value is either 0 (detached) or 1
(attached).
@@ -84,12 +87,13 @@ Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
Description:
Shows the relations of mutually exclusiveness. For example,
if the mutually_exclusive array of extcon device is
- {0x3, 0x5, 0xC, 0x0}, then the output is:
- # ls mutually_exclusive/
- 0x3
- 0x5
- 0xc
- #
+ {0x3, 0x5, 0xC, 0x0}, then the output is::
+
+ # ls mutually_exclusive/
+ 0x3
+ 0x5
+ 0xc
+ #
Note that mutually_exclusive is a sub-directory of the extcon
device and the file names under the mutually_exclusive
diff --git a/Documentation/ABI/testing/sysfs-class-fc b/Documentation/ABI/testing/sysfs-class-fc
new file mode 100644
index 000000000000..3057a6d3b8cf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-fc
@@ -0,0 +1,27 @@
+What: /sys/class/fc/fc_udev_device/appid_store
+Date: Aug 2021
+Contact: Muneendra Kumar <muneendra.kumar@broadconm.com>
+Description:
+ This interface allows an admin to set an FC application
+ identifier in the blkcg associated with a cgroup id. The
+ identifier is typically a UUID that is associated with
+ an application or logical entity such as a virtual
+ machine or container group. The application or logical
+ entity utilizes a block device via the cgroup id.
+ FC adapter drivers may query the identifier and tag FC
+ traffic based on the identifier. FC host and FC fabric
+ entities can utilize the application id and FC traffic
+ tag to identify traffic sources.
+
+ The interface expects a string "<cgroupid>:<appid>" where:
+ <cgroupid> is inode of the cgroup in hexadecimal
+ <appid> is user provided string upto 128 characters
+ in length.
+
+ If an appid_store is done for a cgroup id that already
+ has an appid set, the new value will override the
+ previous value.
+
+ If an admin wants to remove an FC application identifier
+ from a cgroup, an appid_store should be done with the
+ following string: "<cgroupid>:"
diff --git a/Documentation/ABI/testing/sysfs-class-fc_host b/Documentation/ABI/testing/sysfs-class-fc_host
new file mode 100644
index 000000000000..0a696cbd8232
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-fc_host
@@ -0,0 +1,23 @@
+What: /sys/class/fc_host/hostX/statistics/fpin_cn_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of congestion notification
+ events recorded by the F_Port, reported using fabric
+ performance impact notification (FPIN) event.
+
+What: /sys/class/fc_host/hostX/statistics/fpin_li_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of link integrity error
+ events recorded by the F_Port/Nx_Port, reported using fabric
+ performance impact notification (FPIN) event.
+
+What: /sys/class/fc_host/hostX/statistics/fpin_dn_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of delivery related errors
+ recorded by the F_Port/Nx_Port, reported using fabric
+ performance impact notification (FPIN) event.
diff --git a/Documentation/ABI/testing/sysfs-class-fc_remote_ports b/Documentation/ABI/testing/sysfs-class-fc_remote_ports
new file mode 100644
index 000000000000..55a951529e03
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-fc_remote_ports
@@ -0,0 +1,23 @@
+What: /sys/class/fc_remote_ports/rport-X:Y-Z/statistics/fpin_cn_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of congestion notification
+ events recorded by the F_Port/Nx_Port, reported using fabric
+ performance impact notification (FPIN) event.
+
+What: /sys/class/fc_remote_ports/rport-X:Y-Z/statistics/fpin_li_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of link integrity error
+ events recorded by the F_Port/Nx_Port, reported using fabric
+ performance impact notification (FPIN) event.
+
+What: /sys/class/fc_remote_ports/rport-X:Y-Z/statistics/fpin_dn_yyy
+Date: July 2020
+Contact: Shyam Sundar <ssundar@marvell.com>
+Description:
+ These files contain the number of delivery related errors
+ recorded by the F_Port/Nx_Port, reported using fabric
+ performance impact notification (FPIN) event.
diff --git a/Documentation/ABI/testing/sysfs-class-firmware b/Documentation/ABI/testing/sysfs-class-firmware
new file mode 100644
index 000000000000..fba87a55f3ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-firmware
@@ -0,0 +1,77 @@
+What: /sys/class/firmware/.../data
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: The data sysfs file is used for firmware-fallback and for
+ firmware uploads. Cat a firmware image to this sysfs file
+ after you echo 1 to the loading sysfs file. When the firmware
+ image write is complete, echo 0 to the loading sysfs file. This
+ sequence will signal the completion of the firmware write and
+ signal the lower-level driver that the firmware data is
+ available.
+
+What: /sys/class/firmware/.../cancel
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: Write-only. For firmware uploads, write a "1" to this file to
+ request that the transfer of firmware data to the lower-level
+ device be canceled. This request will be rejected (EBUSY) if
+ the update cannot be canceled (e.g. a FLASH write is in
+ progress) or (ENODEV) if there is no firmware update in progress.
+
+What: /sys/class/firmware/.../error
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: Read-only. Returns a string describing a failed firmware
+ upload. This string will be in the form of <STATUS>:<ERROR>,
+ where <STATUS> will be one of the status strings described
+ for the status sysfs file and <ERROR> will be one of the
+ following: "hw-error", "timeout", "user-abort", "device-busy",
+ "invalid-file-size", "read-write-error", "flash-wearout". The
+ error sysfs file is only meaningful when the current firmware
+ upload status is "idle". If this file is read while a firmware
+ transfer is in progress, then the read will fail with EBUSY.
+
+What: /sys/class/firmware/.../loading
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: The loading sysfs file is used for both firmware-fallback and
+ for firmware uploads. Echo 1 onto the loading file to indicate
+ you are writing a firmware file to the data sysfs node. Echo
+ -1 onto this file to abort the data write or echo 0 onto this
+ file to indicate that the write is complete. For firmware
+ uploads, the zero value also triggers the transfer of the
+ firmware data to the lower-level device driver.
+
+What: /sys/class/firmware/.../remaining_size
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: Read-only. For firmware upload, this file contains the size
+ of the firmware data that remains to be transferred to the
+ lower-level device driver. The size value is initialized to
+ the full size of the firmware image that was previously
+ written to the data sysfs file. This value is periodically
+ updated during the "transferring" phase of the firmware
+ upload.
+ Format: "%u".
+
+What: /sys/class/firmware/.../status
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: Read-only. Returns a string describing the current status of
+ a firmware upload. The string will be one of the following:
+ idle, "receiving", "preparing", "transferring", "programming".
+
+What: /sys/class/firmware/.../timeout
+Date: July 2022
+KernelVersion: 5.19
+Contact: Russ Weight <russ.weight@linux.dev>
+Description: This file supports the timeout mechanism for firmware
+ fallback. This file has no affect on firmware uploads. For
+ more information on timeouts please see the documentation
+ for firmware fallback.
diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes
new file mode 100644
index 000000000000..2713efa509b4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes
@@ -0,0 +1,494 @@
+What: /sys/class/firmware-attributes/*/attributes/*/
+Date: February 2021
+KernelVersion: 5.11
+Contact: Divya Bharathi <Divya.Bharathi@Dell.com>,
+ Prasanth KSR <prasanth.ksr@dell.com>
+ Dell.Client.Kernel@dell.com
+Description:
+ A sysfs interface for systems management software to enable
+ configuration capability on supported systems. This directory
+ exposes interfaces for interacting with configuration options.
+
+ Unless otherwise specified in an attribute description all attributes are optional
+ and will accept UTF-8 input.
+
+ type:
+ A file that can be read to obtain the type of attribute.
+ This attribute is mandatory.
+
+ The following are known types:
+
+ - enumeration: a set of pre-defined valid values
+ - integer: a range of numerical values
+ - string
+
+ HP specific types
+ -----------------
+ - ordered-list - a set of ordered list valid values
+
+
+ All attribute types support the following values:
+
+ current_value:
+ A file that can be read to obtain the current
+ value of the <attr>.
+
+ This file can also be written to in order to update the value of a
+ <attr>
+
+ This attribute is mandatory.
+
+ default_value:
+ A file that can be read to obtain the default
+ value of the <attr>
+
+ display_name:
+ A file that can be read to obtain a user friendly
+ description of the at <attr>
+
+ display_name_language_code:
+ A file that can be read to obtain
+ the IETF language tag corresponding to the
+ "display_name" of the <attr>
+
+ "enumeration"-type specific properties:
+
+ possible_values:
+ A file that can be read to obtain the possible
+ values of the <attr>. Values are separated using
+ semi-colon (``;``).
+
+ "integer"-type specific properties:
+
+ min_value:
+ A file that can be read to obtain the lower
+ bound value of the <attr>
+
+ max_value:
+ A file that can be read to obtain the upper
+ bound value of the <attr>
+
+ scalar_increment:
+ A file that can be read to obtain the scalar value used for
+ increments of current_value this attribute accepts.
+
+ "string"-type specific properties:
+
+ max_length:
+ A file that can be read to obtain the maximum
+ length value of the <attr>
+
+ min_length:
+ A file that can be read to obtain the minimum
+ length value of the <attr>
+
+ Dell specific class extensions
+ ------------------------------
+
+ On Dell systems the following additional attributes are available:
+
+ dell_modifier:
+ A file that can be read to obtain attribute-level
+ dependency rule. It says an attribute X will become read-only or
+ suppressed, if/if-not attribute Y is configured.
+
+ modifier rules can be in following format::
+
+ [ReadOnlyIf:<attribute>=<value>]
+ [ReadOnlyIfNot:<attribute>=<value>]
+ [SuppressIf:<attribute>=<value>]
+ [SuppressIfNot:<attribute>=<value>]
+
+ For example::
+
+ AutoOnFri/dell_modifier has value,
+ [SuppressIfNot:AutoOn=SelectDays]
+
+ This means AutoOnFri will be suppressed in BIOS setup if AutoOn
+ attribute is not "SelectDays" and its value will not be effective
+ through sysfs until this rule is met.
+
+ Enumeration attributes also support the following:
+
+ dell_value_modifier:
+ A file that can be read to obtain value-level dependency.
+ This file is similar to dell_modifier but here, an
+ attribute's current value will be forcefully changed based
+ dependent attributes value.
+
+ dell_value_modifier rules can be in following format::
+
+ <value>[ForceIf:<attribute>=<value>]
+ <value>[ForceIfNot:<attribute>=<value>]
+
+ For example::
+
+ LegacyOrom/dell_value_modifier has value:
+ Disabled[ForceIf:SecureBoot=Enabled]
+
+ This means LegacyOrom's current value will be forced to
+ "Disabled" in BIOS setup if SecureBoot is Enabled and its
+ value will not be effective through sysfs until this rule is
+ met.
+
+ HP specific class extensions
+ ------------------------------
+
+ On HP systems the following additional attributes are available:
+
+ "ordered-list"-type specific properties:
+
+ elements:
+ A file that can be read to obtain the possible
+ list of values of the <attr>. Values are separated using
+ semi-colon (``;``) and listed according to their priority.
+ An element listed first has the highest priority. Writing
+ the list in a different order to current_value alters
+ the priority order for the particular attribute.
+
+What: /sys/class/firmware-attributes/*/authentication/
+Date: February 2021
+KernelVersion: 5.11
+Contact: Divya Bharathi <Divya.Bharathi@Dell.com>,
+ Prasanth KSR <prasanth.ksr@dell.com>
+ Dell.Client.Kernel@dell.com
+Description:
+ Devices support various authentication mechanisms which can be exposed
+ as a separate configuration object.
+
+ For example a "BIOS Admin" password and "System" Password can be set,
+ reset or cleared using these attributes.
+
+ - An "Admin" password is used for preventing modification to the BIOS
+ settings.
+ - A "System" password is required to boot a machine.
+
+ Change in any of these two authentication methods will also generate an
+ uevent KOBJ_CHANGE.
+
+ is_enabled:
+ A file that can be read to obtain a 0/1 flag to see if
+ <attr> authentication is enabled.
+ This attribute is mandatory.
+
+ role:
+ The type of authentication used.
+ This attribute is mandatory.
+
+ Known types:
+ bios-admin:
+ Representing BIOS administrator password
+ power-on:
+ Representing a password required to use
+ the system
+ system-mgmt:
+ Representing System Management password.
+ See Lenovo extensions section for details
+ HDD:
+ Representing HDD password
+ See Lenovo extensions section for details
+ NVMe:
+ Representing NVMe password
+ See Lenovo extensions section for details
+
+ mechanism:
+ The means of authentication. This attribute is mandatory.
+ Supported types are "password" or "certificate".
+
+ max_password_length:
+ A file that can be read to obtain the
+ maximum length of the Password
+
+ min_password_length:
+ A file that can be read to obtain the
+ minimum length of the Password
+
+ current_password:
+ A write only value used for privileged access such as
+ setting attributes when a system or admin password is set
+ or resetting to a new password
+
+ This attribute is mandatory when mechanism == "password".
+
+ new_password:
+ A write only value that when used in tandem with
+ current_password will reset a system or admin password.
+
+ Note, password management is session specific. If Admin password is set,
+ same password must be written into current_password file (required for
+ password-validation) and must be cleared once the session is over.
+ For example::
+
+ echo "password" > current_password
+ echo "disabled" > TouchScreen/current_value
+ echo "" > current_password
+
+ Drivers may emit a CHANGE uevent when a password is set or unset
+ userspace may check it again.
+
+ On Dell, Lenovo and HP systems, if Admin password is set, then all BIOS attributes
+ require password validation.
+ On Lenovo systems if you change the Admin password the new password is not active until
+ the next boot.
+
+ Lenovo specific class extensions
+ --------------------------------
+
+ On Lenovo systems the following additional settings are available:
+
+ role: system-mgmt This gives the same authority as the bios-admin password to control
+ security related features. The authorities allocated can be set via
+ the BIOS menu SMP Access Control Policy
+
+ role: HDD & NVMe This password is used to unlock access to the drive at boot. Note see
+ 'level' and 'index' extensions below.
+
+ lenovo_encoding:
+ The encoding method that is used. This can be either "ascii"
+ or "scancode". Default is set to "ascii"
+
+ lenovo_kbdlang:
+ The keyboard language method that is used. This is generally a
+ two char code (e.g. "us", "fr", "gr") and may vary per platform.
+ Default is set to "us"
+
+ level:
+ Available for HDD and NVMe authentication to set 'user' or 'master'
+ privilege level.
+ If only the user password is configured then this should be used to
+ unlock the drive at boot. If both master and user passwords are set
+ then either can be used. If a master password is set a user password
+ is required.
+ This attribute defaults to 'user' level
+
+ index:
+ Used with HDD and NVME authentication to set the drive index
+ that is being referenced (e.g hdd1, hdd2 etc)
+ This attribute defaults to device 1.
+
+ certificate, signature, save_signature:
+ These attributes are used for certificate based authentication. This is
+ used in conjunction with a signing server as an alternative to password
+ based authentication.
+ The user writes to the attribute(s) with a BASE64 encoded string obtained
+ from the signing server.
+ The attributes can be displayed to check the stored value.
+
+ Some usage examples:
+
+ Installing a certificate to enable feature::
+
+ echo "supervisor password" > authentication/Admin/current_password
+ echo "signed certificate" > authentication/Admin/certificate
+
+ Updating the installed certificate::
+
+ echo "signature" > authentication/Admin/signature
+ echo "signed certificate" > authentication/Admin/certificate
+
+ Removing the installed certificate::
+
+ echo "signature" > authentication/Admin/signature
+ echo "" > authentication/Admin/certificate
+
+ Changing a BIOS setting::
+
+ echo "signature" > authentication/Admin/signature
+ echo "save signature" > authentication/Admin/save_signature
+ echo Enable > attribute/PasswordBeep/current_value
+
+ You cannot enable certificate authentication if a supervisor password
+ has not been set.
+ Clearing the certificate results in no bios-admin authentication method
+ being configured allowing anyone to make changes.
+ After any of these operations the system must reboot for the changes to
+ take effect.
+ Admin and System certificates are supported from 2025 systems onward.
+
+ certificate_thumbprint:
+ Read only attribute used to display the MD5, SHA1 and SHA256 thumbprints
+ for the certificate installed in the BIOS.
+
+ certificate_to_password:
+ Write only attribute used to switch from certificate based authentication
+ back to password based.
+ Usage::
+
+ echo "signature" > authentication/Admin/signature
+ echo "password" > authentication/Admin/certificate_to_password
+
+ HP specific class extensions
+ --------------------------------
+
+ On HP systems the following additional settings are available:
+
+ role: enhanced-bios-auth:
+ This role is specific to Secure Platform Management (SPM) attribute.
+ It requires configuring an endorsement (kek) and signing certificate (sk).
+
+
+What: /sys/class/firmware-attributes/*/attributes/pending_reboot
+Date: February 2021
+KernelVersion: 5.11
+Contact: Divya Bharathi <Divya.Bharathi@Dell.com>,
+ Prasanth KSR <prasanth.ksr@dell.com>
+ Dell.Client.Kernel@dell.com
+Description:
+ A read-only attribute reads 1 if a reboot is necessary to apply
+ pending BIOS attribute changes. Also, an uevent_KOBJ_CHANGE is
+ generated when it changes to 1.
+
+ == =========================================
+ 0 All BIOS attributes setting are current
+ 1 A reboot is necessary to get pending BIOS
+ attribute changes applied
+ == =========================================
+
+ Note, userspace applications need to follow below steps for efficient
+ BIOS management,
+
+ 1. Check if admin password is set. If yes, follow session method for
+ password management as briefed under authentication section above.
+ 2. Before setting any attribute, check if it has any modifiers
+ or value_modifiers. If yes, incorporate them and then modify
+ attribute.
+
+ Drivers may emit a CHANGE uevent when this value changes and userspace
+ may check it again.
+
+What: /sys/class/firmware-attributes/*/attributes/reset_bios
+Date: February 2021
+KernelVersion: 5.11
+Contact: Divya Bharathi <Divya.Bharathi@Dell.com>,
+ Prasanth KSR <prasanth.ksr@dell.com>
+ Dell.Client.Kernel@dell.com
+Description:
+ This attribute can be used to reset the BIOS Configuration.
+ Specifically, it tells which type of reset BIOS configuration is being
+ requested on the host.
+
+ Reading from it returns a list of supported options encoded as:
+
+ - 'builtinsafe' (Built in safe configuration profile)
+ - 'lastknowngood' (Last known good saved configuration profile)
+ - 'factory' (Default factory settings configuration profile)
+ - 'custom' (Custom saved configuration profile)
+
+ The currently selected option is printed in square brackets as
+ shown below::
+
+ # echo "factory" > /sys/class/firmware-attributes/*/device/attributes/reset_bios
+ # cat /sys/class/firmware-attributes/*/device/attributes/reset_bios
+ builtinsafe lastknowngood [factory] custom
+
+ Note that any changes to this attribute requires a reboot
+ for changes to take effect.
+
+What: /sys/class/firmware-attributes/*/attributes/save_settings
+Date: August 2023
+KernelVersion: 6.6
+Contact: Mark Pearson <mpearson-lenovo@squebb.ca>
+Description:
+ On Lenovo platforms there is a limitation in the number of times an attribute can be
+ saved. This is an architectural limitation and it limits the number of attributes
+ that can be modified to 48.
+ A solution for this is instead of the attribute being saved after every modification,
+ to allow a user to bulk set the attributes, and then trigger a final save. This allows
+ unlimited attributes.
+
+ Read the attribute to check what save mode is enabled (single or bulk).
+ E.g:
+ # cat /sys/class/firmware-attributes/thinklmi/attributes/save_settings
+ single
+
+ Write the attribute with 'bulk' to enable bulk save mode.
+ Write the attribute with 'single' to enable saving, after every attribute set.
+ The default setting is single mode.
+ E.g:
+ # echo bulk > /sys/class/firmware-attributes/thinklmi/attributes/save_settings
+
+ When in bulk mode write 'save' to trigger a save of all currently modified attributes.
+ Note, once a save has been triggered, in bulk mode, attributes can no longer be set and
+ will return a permissions error. This is to prevent users hitting the 48+ save limitation
+ (which requires entering the BIOS to clear the error condition)
+ E.g:
+ # echo save > /sys/class/firmware-attributes/thinklmi/attributes/save_settings
+
+What: /sys/class/firmware-attributes/*/attributes/debug_cmd
+Date: July 2021
+KernelVersion: 5.14
+Contact: Mark Pearson <markpearson@lenovo.com>
+Description:
+ This write only attribute can be used to send debug commands to the BIOS.
+ This should only be used when recommended by the BIOS vendor. Vendors may
+ use it to enable extra debug attributes or BIOS features for testing purposes.
+
+ Note that any changes to this attribute requires a reboot for changes to take effect.
+
+
+ HP specific class extensions - Secure Platform Manager (SPM)
+ --------------------------------
+
+What: /sys/class/firmware-attributes/*/authentication/SPM/kek
+Date: March 2023
+KernelVersion: 5.18
+Contact: "Jorge Lopez" <jorge.lopez2@hp.com>
+Description:
+ 'kek' Key-Encryption-Key is a write-only file that can be used to configure the
+ RSA public key that will be used by the BIOS to verify
+ signatures when setting the signing key. When written,
+ the bytes should correspond to the KEK certificate
+ (x509 .DER format containing an OU). The size of the
+ certificate must be less than or equal to 4095 bytes.
+
+What: /sys/class/firmware-attributes/*/authentication/SPM/sk
+Date: March 2023
+KernelVersion: 5.18
+Contact: "Jorge Lopez" <jorge.lopez2@hp.com>
+Description:
+ 'sk' Signature Key is a write-only file that can be used to configure the RSA
+ public key that will be used by the BIOS to verify signatures
+ when configuring BIOS settings and security features. When
+ written, the bytes should correspond to the modulus of the
+ public key. The exponent is assumed to be 0x10001.
+
+What: /sys/class/firmware-attributes/*/authentication/SPM/status
+Date: March 2023
+KernelVersion: 5.18
+Contact: "Jorge Lopez" <jorge.lopez2@hp.com>
+Description:
+ 'status' is a read-only file that returns ASCII text in JSON format reporting
+ the status information.
+
+ "State": "not provisioned | provisioned | provisioning in progress",
+ "Version": "Major.Minor",
+ "Nonce": <16-bit unsigned number display in base 10>,
+ "FeaturesInUse": <16-bit unsigned number display in base 10>,
+ "EndorsementKeyMod": "<256 bytes in base64>",
+ "SigningKeyMod": "<256 bytes in base64>"
+
+What: /sys/class/firmware-attributes/*/attributes/Sure_Start/audit_log_entries
+Date: March 2023
+KernelVersion: 5.18
+Contact: "Jorge Lopez" <jorge.lopez2@hp.com>
+Description:
+ 'audit_log_entries' is a read-only file that returns the events in the log.
+
+ Audit log entry format
+
+ Byte 0-15: Requested Audit Log entry (Each Audit log is 16 bytes)
+ Byte 16-127: Unused
+
+What: /sys/class/firmware-attributes/*/attributes/Sure_Start/audit_log_entry_count
+Date: March 2023
+KernelVersion: 5.18
+Contact: "Jorge Lopez" <jorge.lopez2@hp.com>
+Description:
+ 'audit_log_entry_count' is a read-only file that returns the number of existing
+ audit log events available to be read. Values are separated using comma. (``,``)
+
+ [No of entries],[log entry size],[Max number of entries supported]
+
+ log entry size identifies audit log size for the current BIOS version.
+ The current size is 16 bytes but it can be up to 128 bytes long in future BIOS
+ versions.
diff --git a/Documentation/ABI/testing/sysfs-class-fpga-manager b/Documentation/ABI/testing/sysfs-class-fpga-manager
index 5284fa33d4c5..d78689c357a5 100644
--- a/Documentation/ABI/testing/sysfs-class-fpga-manager
+++ b/Documentation/ABI/testing/sysfs-class-fpga-manager
@@ -28,8 +28,7 @@ Description: Read fpga manager state as a string.
* firmware request = firmware class request in progress
* firmware request error = firmware request failed
* write init = preparing FPGA for programming
- * write init error = Error while preparing FPGA for
- programming
+ * write init error = Error while preparing FPGA for programming
* write = FPGA ready to receive image data
* write error = Error while programming
* write complete = Doing post programming steps
@@ -47,7 +46,7 @@ Description: Read fpga manager status as a string.
programming errors to userspace. This is a list of strings for
the supported status.
- * reconfig operation error - invalid operations detected by
+ * reconfig operation error - invalid operations detected by
reconfiguration hardware.
e.g. start reconfiguration
with errors not cleared
diff --git a/Documentation/ABI/testing/sysfs-class-gnss b/Documentation/ABI/testing/sysfs-class-gnss
index 2467b6900eae..9650f3a7fc03 100644
--- a/Documentation/ABI/testing/sysfs-class-gnss
+++ b/Documentation/ABI/testing/sysfs-class-gnss
@@ -1,4 +1,4 @@
-What: /sys/class/gnss/gnssN/type
+What: /sys/class/gnss/gnss<N>/type
Date: May 2018
KernelVersion: 4.18
Contact: Johan Hovold <johan@kernel.org>
@@ -6,9 +6,11 @@ Description:
The GNSS receiver type. The currently identified types reflect
the protocol(s) supported by the receiver:
+ ====== ===========
"NMEA" NMEA 0183
"SiRF" SiRF Binary
"UBX" UBX
+ ====== ===========
Note that also non-"NMEA" type receivers typically support a
subset of NMEA 0183 with vendor extensions (e.g. to allow
diff --git a/Documentation/ABI/testing/sysfs-class-hwmon b/Documentation/ABI/testing/sysfs-class-hwmon
new file mode 100644
index 000000000000..cfd0d0bab483
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-hwmon
@@ -0,0 +1,1075 @@
+What: /sys/class/hwmon/hwmonX/name
+Description:
+ The chip name.
+ This should be a short, lowercase string, not containing
+ whitespace, dashes, or the wildcard character '*'.
+ This attribute represents the chip name. It is the only
+ mandatory attribute.
+ I2C devices get this attribute created automatically.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/label
+Description:
+ A descriptive label that allows to uniquely identify a
+ device within the system.
+ The contents of the label are free-form.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/update_interval
+Description:
+ The interval at which the chip will update readings.
+ Unit: millisecond
+
+ RW
+
+ Some devices have a variable update rate or interval.
+ This attribute can be used to change it to the desired value.
+
+What: /sys/class/hwmon/hwmonX/inY_min
+Description:
+ Voltage min value.
+
+ Unit: millivolt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/inY_lcrit
+Description:
+ Voltage critical min value.
+
+ Unit: millivolt
+
+ RW
+
+ If voltage drops to or below this limit, the system may
+ take drastic action such as power down or reset. At the very
+ least, it should report a fault.
+
+What: /sys/class/hwmon/hwmonX/inY_max
+Description:
+ Voltage max value.
+
+ Unit: millivolt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/inY_crit
+Description:
+ Voltage critical max value.
+
+ Unit: millivolt
+
+ RW
+
+ If voltage reaches or exceeds this limit, the system may
+ take drastic action such as power down or reset. At the very
+ least, it should report a fault.
+
+What: /sys/class/hwmon/hwmonX/inY_input
+Description:
+ Voltage input value.
+
+ Unit: millivolt
+
+ RO
+
+ Voltage measured on the chip pin.
+
+ Actual voltage depends on the scaling resistors on the
+ motherboard, as recommended in the chip datasheet.
+
+ This varies by chip and by motherboard.
+ Because of this variation, values are generally NOT scaled
+ by the chip driver, and must be done by the application.
+ However, some drivers (notably lm87 and via686a)
+ do scale, because of internal resistors built into a chip.
+ These drivers will output the actual voltage. Rule of
+ thumb: drivers should report the voltage values at the
+ "pins" of the chip.
+
+What: /sys/class/hwmon/hwmonX/inY_average
+Description:
+ Average voltage
+
+ Unit: millivolt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/inY_lowest
+Description:
+ Historical minimum voltage
+
+ Unit: millivolt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/inY_highest
+Description:
+ Historical maximum voltage
+
+ Unit: millivolt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/inY_reset_history
+Description:
+ Reset inX_lowest and inX_highest
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/in_reset_history
+Description:
+ Reset inX_lowest and inX_highest for all sensors
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/inY_label
+Description:
+ Suggested voltage channel label.
+
+ Text string
+
+ Should only be created if the driver has hints about what
+ this voltage channel is being used for, and user-space
+ doesn't. In all other cases, the label is provided by
+ user-space.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/inY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/inY_fault
+Description:
+ Reports a voltage hard failure (eg: shorted component)
+
+ - 1: Failed
+ - 0: Ok
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/cpuY_vid
+Description:
+ CPU core reference voltage.
+
+ Unit: millivolt
+
+ RO
+
+ Not always correct.
+
+What: /sys/class/hwmon/hwmonX/vrm
+Description:
+ Voltage Regulator Module version number.
+
+ RW (but changing it should no more be necessary)
+
+ Originally the VRM standard version multiplied by 10, but now
+ an arbitrary number, as not all standards have a version
+ number.
+
+ Affects the way the driver calculates the CPU core reference
+ voltage from the vid pins.
+
+What: /sys/class/hwmon/hwmonX/inY_rated_min
+Description:
+ Minimum rated voltage.
+
+ Unit: millivolt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/inY_rated_max
+Description:
+ Maximum rated voltage.
+
+ Unit: millivolt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/fanY_min
+Description:
+ Fan minimum value
+
+ Unit: revolution/min (RPM)
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/fanY_max
+Description:
+ Fan maximum value
+
+ Unit: revolution/min (RPM)
+
+ Only rarely supported by the hardware.
+ RW
+
+What: /sys/class/hwmon/hwmonX/fanY_input
+Description:
+ Fan input value.
+
+ Unit: revolution/min (RPM)
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/fanY_div
+Description:
+ Fan divisor.
+
+ Integer value in powers of two (1, 2, 4, 8, 16, 32, 64, 128).
+
+ RW
+
+ Some chips only support values 1, 2, 4 and 8.
+ Note that this is actually an internal clock divisor, which
+ affects the measurable speed range, not the read value.
+
+What: /sys/class/hwmon/hwmonX/fanY_pulses
+Description:
+ Number of tachometer pulses per fan revolution.
+
+ Integer value, typically between 1 and 4.
+
+ RW
+
+ This value is a characteristic of the fan connected to the
+ device's input, so it has to be set in accordance with the fan
+ model.
+
+ Should only be created if the chip has a register to configure
+ the number of pulses. In the absence of such a register (and
+ thus attribute) the value assumed by all devices is 2 pulses
+ per fan revolution.
+
+What: /sys/class/hwmon/hwmonX/fanY_target
+Description:
+ Desired fan speed
+
+ Unit: revolution/min (RPM)
+
+ RW
+
+ Only makes sense if the chip supports closed-loop fan speed
+ control based on the measured fan speed.
+
+What: /sys/class/hwmon/hwmonX/fanY_label
+Description:
+ Suggested fan channel label.
+
+ Text string
+
+ Should only be created if the driver has hints about what
+ this fan channel is being used for, and user-space doesn't.
+ In all other cases, the label is provided by user-space.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/fanY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/fanY_fault
+Description:
+ Reports if a fan has reported failure.
+
+ - 1: Failed
+ - 0: Ok
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/pwmY
+Description:
+ Pulse width modulation fan control.
+
+ Integer value in the range 0 to 255
+
+ RW
+
+ 255 is max or 100%.
+
+What: /sys/class/hwmon/hwmonX/pwmY_enable
+Description:
+ Fan speed control method:
+
+ - 0: no fan speed control (i.e. fan at full speed)
+ - 1: manual fan speed control enabled (using `pwmY`)
+ - 2+: automatic fan speed control enabled
+
+ Check individual chip documentation files for automatic mode
+ details.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/pwmY_mode
+Description:
+ - 0: DC mode (direct current)
+ - 1: PWM mode (pulse-width modulation)
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/pwmY_freq
+Description:
+ Base PWM frequency in Hz.
+
+ Only possibly available when pwmN_mode is PWM, but not always
+ present even then.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/pwmY_auto_channels_temp
+Description:
+ Select which temperature channels affect this PWM output in
+ auto mode.
+
+ Bitfield, 1 is temp1, 2 is temp2, 4 is temp3 etc...
+ Which values are possible depend on the chip used.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/pwmY_auto_pointZ_pwm
+What: /sys/class/hwmon/hwmonX/pwmY_auto_pointZ_temp
+What: /sys/class/hwmon/hwmonX/pwmY_auto_pointZ_temp_hyst
+Description:
+ Define the PWM vs temperature curve.
+
+ Number of trip points is chip-dependent. Use this for chips
+ which associate trip points to PWM output channels.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_auto_pointZ_pwm
+What: /sys/class/hwmon/hwmonX/tempY_auto_pointZ_temp
+What: /sys/class/hwmon/hwmonX/tempY_auto_pointZ_temp_hyst
+Description:
+ Define the PWM vs temperature curve.
+
+ Number of trip points is chip-dependent. Use this for chips
+ which associate trip points to temperature channels.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_type
+Description:
+ Sensor type selection.
+
+ Integers 1 to 6
+
+ RW
+
+ - 1: CPU embedded diode
+ - 2: 3904 transistor
+ - 3: thermal diode
+ - 4: thermistor
+ - 5: AMD AMDSI
+ - 6: Intel PECI
+
+ Not all types are supported by all chips
+
+What: /sys/class/hwmon/hwmonX/tempY_max
+Description:
+ Temperature max value.
+
+ Unit: millidegree Celsius (or millivolt, see below)
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_max_alarm
+Description:
+ Maximum temperature alarm flag.
+
+ - 0: OK
+ - 1: temperature has reached tempY_max
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_min
+Description:
+ Temperature min value.
+
+ Unit: millidegree Celsius
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_min_alarm
+Description:
+ Minimum temperature alarm flag.
+
+ - 0: OK
+ - 1: temperature has reached tempY_min
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_max_hyst
+Description:
+ Temperature hysteresis value for max limit.
+
+ Unit: millidegree Celsius
+
+ Must be reported as an absolute temperature, NOT a delta
+ from the max value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_min_hyst
+Description:
+ Temperature hysteresis value for min limit.
+ Unit: millidegree Celsius
+
+ Must be reported as an absolute temperature, NOT a delta
+ from the min value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_input
+Description:
+ Temperature input value.
+
+ Unit: millidegree Celsius
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_crit
+Description:
+ Temperature critical max value, typically greater than
+ corresponding temp_max values.
+
+ Unit: millidegree Celsius
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_crit_alarm
+Description:
+ Critical high temperature alarm flag.
+
+ - 0: OK
+ - 1: temperature has reached tempY_crit
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_crit_hyst
+Description:
+ Temperature hysteresis value for critical limit.
+
+ Unit: millidegree Celsius
+
+ Must be reported as an absolute temperature, NOT a delta
+ from the critical value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_emergency
+Description:
+ Temperature emergency max value, for chips supporting more than
+ two upper temperature limits. Must be equal or greater than
+ corresponding temp_crit values.
+
+ Unit: millidegree Celsius
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_emergency_alarm
+Description:
+ Emergency high temperature alarm flag.
+
+ - 0: OK
+ - 1: temperature has reached tempY_emergency
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_emergency_hyst
+Description:
+ Temperature hysteresis value for emergency limit.
+
+ Unit: millidegree Celsius
+
+ Must be reported as an absolute temperature, NOT a delta
+ from the emergency value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_lcrit
+Description:
+ Temperature critical min value, typically lower than
+ corresponding temp_min values.
+
+ Unit: millidegree Celsius
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_lcrit_hyst
+Description:
+ Temperature hysteresis value for critical min limit.
+
+ Unit: millidegree Celsius
+
+ Must be reported as an absolute temperature, NOT a delta
+ from the critical min value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_offset
+Description:
+ Temperature offset which is added to the temperature reading
+ by the chip.
+
+ Unit: millidegree Celsius
+
+ Read/Write value.
+
+What: /sys/class/hwmon/hwmonX/tempY_label
+Description:
+ Suggested temperature channel label.
+
+ Text string
+
+ Should only be created if the driver has hints about what
+ this temperature channel is being used for, and user-space
+ doesn't. In all other cases, the label is provided by
+ user-space.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_lowest
+Description:
+ Historical minimum temperature
+
+ Unit: millidegree Celsius
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_highest
+Description:
+ Historical maximum temperature
+
+ Unit: millidegree Celsius
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_reset_history
+Description:
+ Reset temp_lowest and temp_highest
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/temp_reset_history
+Description:
+ Reset temp_lowest and temp_highest for all sensors
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/tempY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/tempY_rated_min
+Description:
+ Minimum rated temperature.
+
+ Unit: millidegree Celsius
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/tempY_rated_max
+Description:
+ Maximum rated temperature.
+
+ Unit: millidegree Celsius
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_max
+Description:
+ Current max value
+
+ Unit: milliampere
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/currY_min
+Description:
+ Current min value.
+
+ Unit: milliampere
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/currY_lcrit
+Description:
+ Current critical low value
+
+ Unit: milliampere
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/currY_crit
+Description:
+ Current critical high value.
+
+ Unit: milliampere
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/currY_input
+Description:
+ Current input value
+
+ Unit: milliampere
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_average
+Description:
+ Average current use
+
+ Unit: milliampere
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_lowest
+Description:
+ Historical minimum current
+
+ Unit: milliampere
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_highest
+Description:
+ Historical maximum current
+ Unit: milliampere
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_reset_history
+Description:
+ Reset currX_lowest and currX_highest
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/curr_reset_history
+Description:
+ Reset currX_lowest and currX_highest for all sensors
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/currY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/currY_rated_min
+Description:
+ Minimum rated current.
+
+ Unit: milliampere
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/currY_rated_max
+Description:
+ Maximum rated current.
+
+ Unit: milliampere
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average
+Description:
+ Average power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average_interval
+Description:
+ Power use averaging interval. A poll
+ notification is sent to this file if the
+ hardware changes the averaging interval.
+
+ Unit: milliseconds
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_average_interval_max
+Description:
+ Maximum power use averaging interval
+
+ Unit: milliseconds
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average_interval_min
+Description:
+ Minimum power use averaging interval
+
+ Unit: milliseconds
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average_highest
+Description:
+ Historical average maximum power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average_lowest
+Description:
+ Historical average minimum power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_average_max
+Description:
+ A poll notification is sent to
+ `powerY_average` when power use
+ rises above this value.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_average_min
+Description:
+ A poll notification is sent to
+ `powerY_average` when power use
+ sinks below this value.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_input
+Description:
+ Instantaneous power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_input_highest
+Description:
+ Historical maximum power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_input_lowest
+Description:
+ Historical minimum power use
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_reset_history
+Description:
+ Reset input_highest, input_lowest,
+ average_highest and average_lowest.
+
+ WO
+
+What: /sys/class/hwmon/hwmonX/powerY_accuracy
+Description:
+ Accuracy of the power meter.
+
+ Unit: Percent
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_cap
+Description:
+ If power use rises above this limit, the
+ system should take action to reduce power use.
+ A poll notification is sent to this file if the
+ cap is changed by the hardware. The `*_cap`
+ files only appear if the cap is known to be
+ enforced by hardware.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_cap_hyst
+Description:
+ Margin of hysteresis built around capping and
+ notification.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_cap_max
+Description:
+ Maximum cap that can be set.
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_cap_min
+Description:
+ Minimum cap that can be set.
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_max
+Description:
+ Maximum power.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_crit
+Description:
+ Critical maximum power.
+
+ If power rises to or above this limit, the
+ system is expected take drastic action to reduce
+ power consumption, such as a system shutdown or
+ a forced powerdown of some devices.
+
+ Unit: microWatt
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return
+ -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/powerY_rated_min
+Description:
+ Minimum rated power.
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/powerY_rated_max
+Description:
+ Maximum rated power.
+
+ Unit: microWatt
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/energyY_input
+Description:
+ Cumulative energy use
+
+ Unit: microJoule
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/energyY_enable
+Description:
+ Enable or disable the sensors.
+
+ When disabled the sensor read will return
+ -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_alarm
+Description:
+ Humidity limit detection
+
+ - 0: OK
+ - 1: Humidity limit has been reached
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_enable
+Description:
+ Enable or disable the sensors
+
+ When disabled the sensor read will return
+ -ENODATA.
+
+ - 1: Enable
+ - 0: Disable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_fault
+Description:
+ Reports a humidity sensor failure.
+
+ - 1: Failed
+ - 0: Ok
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_input
+Description:
+ Humidity
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_label
+Description:
+ Suggested humidity channel label.
+
+ Text string
+
+ Should only be created if the driver has hints about what
+ this humidity channel is being used for, and user-space
+ doesn't. In all other cases, the label is provided by
+ user-space.
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_max
+Description:
+ Humidity max value.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_max_alarm
+Description:
+ Maximum humidity detection
+
+ - 0: OK
+ - 1: Maximum humidity detected
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_max_hyst
+Description:
+ Humidity hysteresis value for max limit.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ Must be reported as an absolute humidity, NOT a delta
+ from the max value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_min
+Description:
+ Humidity min value.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_min_alarm
+Description:
+ Minimum humidity detection
+
+ - 0: OK
+ - 1: Minimum humidity detected
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_min_hyst
+Description:
+ Humidity hysteresis value for min limit.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ Must be reported as an absolute humidity, NOT a delta
+ from the min value.
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/humidityY_rated_min
+Description:
+ Minimum rated humidity.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ RO
+
+What: /sys/class/hwmon/hwmonX/humidityY_rated_max
+Description:
+ Maximum rated humidity.
+
+ Unit: milli-percent (per cent mille, pcm)
+
+ RO
+
+
+What: /sys/class/hwmon/hwmonX/intrusionY_alarm
+Description:
+ Chassis intrusion detection
+
+ - 0: OK
+ - 1: intrusion detected
+
+ RW
+
+ Contrary to regular alarm flags which clear themselves
+ automatically when read, this one sticks until cleared by
+ the user. This is done by writing 0 to the file. Writing
+ other values is unsupported.
+
+What: /sys/class/hwmon/hwmonX/intrusionY_beep
+Description:
+ Chassis intrusion beep
+
+ - 0: disable
+ - 1: enable
+
+ RW
+
+What: /sys/class/hwmon/hwmonX/device/pec
+Description:
+ PEC support on I2C devices
+
+ - 0, off, n: disable
+ - 1, on, y: enable
+
+ RW
diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt
new file mode 100644
index 000000000000..ed4c886a21b1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-intel_pmt
@@ -0,0 +1,119 @@
+What: /sys/class/intel_pmt/
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ The intel_pmt/ class directory contains information for
+ devices that expose hardware telemetry using Intel Platform
+ Monitoring Technology (PMT)
+
+What: /sys/class/intel_pmt/telem<x>
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ The telem<x> directory contains files describing an instance of
+ a PMT telemetry device that exposes hardware telemetry. Each
+ telem<x> directory has an associated telem file. This file
+ may be opened and mapped or read to access the telemetry space
+ of the device. The register layout of the telemetry space is
+ determined from an XML file that matches the PCI device id and
+ GUID for the device.
+
+What: /sys/class/intel_pmt/telem<x>/telem
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ (RO) The telemetry data for this telemetry device. This file
+ may be mapped or read to obtain the data.
+
+What: /sys/class/intel_pmt/telem<x>/guid
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ (RO) The GUID for this telemetry device. The GUID identifies
+ the version of the XML file for the parent device that is to
+ be used to get the register layout.
+
+What: /sys/class/intel_pmt/telem<x>/size
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ (RO) The size of telemetry region in bytes that corresponds to
+ the mapping size for the telem file.
+
+What: /sys/class/intel_pmt/telem<x>/offset
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ (RO) The offset of telemetry region in bytes that corresponds to
+ the mapping for the telem file.
+
+What: /sys/class/intel_pmt/crashlog<x>
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ The crashlog<x> directory contains files for configuring an
+ instance of a PMT crashlog device that can perform crash data
+ recording. Each crashlog<x> device has an associated crashlog
+ file. This file can be opened and mapped or read to access the
+ resulting crashlog buffer. The register layout for the buffer
+ can be determined from an XML file of specified GUID for the
+ parent device.
+
+What: /sys/class/intel_pmt/crashlog<x>/crashlog
+Date: October 2020
+KernelVersion: 5.10
+Contact: David Box <david.e.box@linux.intel.com>
+Description:
+ (RO) The crashlog buffer for this crashlog device. This file
+ may be mapped or read to obtain the data.
+
+What: /sys/class/intel_pmt/crashlog<x>/guid
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ (RO) The GUID for this crashlog device. The GUID identifies the
+ version of the XML file for the parent device that should be
+ used to determine the register layout.
+
+What: /sys/class/intel_pmt/crashlog<x>/size
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ (RO) The length of the result buffer in bytes that corresponds
+ to the size for the crashlog buffer.
+
+What: /sys/class/intel_pmt/crashlog<x>/offset
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ (RO) The offset of the buffer in bytes that corresponds
+ to the mapping for the crashlog device.
+
+What: /sys/class/intel_pmt/crashlog<x>/enable
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ (RW) Boolean value controlling if the crashlog functionality
+ is enabled for the crashlog device.
+
+What: /sys/class/intel_pmt/crashlog<x>/trigger
+Date: October 2020
+KernelVersion: 5.10
+Contact: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+ (RW) Boolean value controlling the triggering of the crashlog
+ device node. When read it provides data on if the crashlog has
+ been triggered. When written to it can be used to either clear
+ the current trigger by writing false, or to trigger a new
+ event if the trigger is not currently set.
diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt-features b/Documentation/ABI/testing/sysfs-class-intel_pmt-features
new file mode 100644
index 000000000000..cddb30e5bdf6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-intel_pmt-features
@@ -0,0 +1,134 @@
+What: /sys/class/intel_pmt/features-<PCI BDF>/
+Date: 2025-04-24
+KernelVersion: 6.16
+Contact: david.e.box@linux.intel.com
+Description:
+ The `features-<PCI BDF>/` directory represents the "features"
+ capability exposed by Intel PMT (Platform Monitoring Technology)
+ for the given PCI device.
+
+ Each directory corresponds to a PMT feature and contains
+ attributes describing the available telemetry, monitoring, or
+ control functionalities.
+
+Directory Structure:
+
+ /sys/class/intel_pmt/features-<PCI BDF>/
+ ├── accelerator_telemetry/ # Per-accelerator telemetry data
+ ├── crash_log/ # Contains system crash telemetry logs
+ ├── per_core_environment_telemetry/ # Environmental telemetry per core
+ ├── per_core_performance_telemetry/ # Performance telemetry per core
+ ├── per_rmid_energy_telemetry/ # Energy telemetry for RMIDs
+ ├── per_rmid_perf_telemetry/ # Performance telemetry for RMIDs
+ ├── tpmi_control/ # TPMI-related controls and telemetry
+ ├── tracing/ # PMT tracing features
+ └── uncore_telemetry/ # Uncore telemetry data
+
+Common Files (Present in all feature directories):
+
+ caps
+ - Read-only
+ - Lists available capabilities for this feature.
+
+ guids
+ - Read-only
+ - Lists GUIDs associated with this feature.
+
+Additional Attributes (Conditional Presence):
+
+ max_command_size
+ - Read-only
+ - Present if the feature supports out-of-band MCTP access.
+ - Maximum supported MCTP command size for out-of-band PMT access (bytes).
+
+ max_stream_size
+ - Read-only
+ - Present if the feature supports out-of-band MCTP access.
+ - Maximum supported MCTP stream size (bytes).
+
+ min_watcher_period_ms
+ - Read-only
+ - Present if the feature supports the watcher API.
+ The watcher API provides a writable control interface that allows user
+ configuration of monitoring behavior, such as setting the sampling or
+ reporting interval.
+ - Minimum supported time period for the watcher interface (milliseconds).
+
+ num_rmids
+ - Read-only
+ - Present if the feature supports RMID (Resource Monitoring ID) telemetry.
+ RMIDs are identifiers used by hardware to track and report resource usage,
+ such as memory bandwidth or energy consumption, on a per-logical-entity
+ basis (e.g., per core, thread, or process group).
+ - Maximum number of RMIDs tracked simultaneously.
+
+Example:
+For a device with PCI BDF `0000:00:03.1`, the directory tree could look like:
+
+ /sys/class/intel_pmt/features-0000:00:03.1/
+ ├── accelerator_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ ├── crash_log/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ ├── per_core_environment_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ ├── per_rmid_energy_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ │ ├── num_rmids
+ ├── tpmi_control/
+ │ ├── caps
+ │ ├── guids
+ ├── tracing/
+ │ ├── caps
+ │ ├── guids
+ ├── uncore_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+
+Notes:
+ - Some attributes are only present if the corresponding feature supports
+ the capability (e.g., `max_command_size` for MCTP-capable features).
+ - Features supporting RMIDs include `num_rmids`.
+ - Features supporting the watcher API include `min_watcher_period_ms`.
+ - The `caps` file provides additional information about the functionality
+ of the feature.
+
+Example 'caps' content for the 'tracing' feature:
+
+ /sys/class/intel_pmt/features-0000:00:03.1/
+ ├── tracing/
+ │ ├── caps
+
+ telemetry Available: No
+ watcher Available: Yes
+ crashlog Available: No
+ streaming Available: No
+ threashold Available: No
+ window Available: No
+ config Available: Yes
+ tracing Available: No
+ inband Available: Yes
+ oob Available: Yes
+ secure_chan Available: No
+ pmt_sp Available: Yes
+ pmt_sp_policy Available: Yes
+ mailbox Available: Yes
+ bios_lock Available: Yes
diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
index 5f67f7ab277b..0313b82644f2 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -3,9 +3,26 @@ Date: March 2006
KernelVersion: 2.6.17
Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
- Set the brightness of the LED. Most LEDs don't
- have hardware brightness support, so will just be turned on for
- non-zero brightness settings. The value is between 0 and
+ Set the brightness of the LED.
+
+ Most LEDs don't have hardware brightness support, so will
+ just be turned on for non-zero brightness settings.
+
+ .. Note::
+
+ For multicolor LEDs, writing to this file will update all
+ LEDs within the group to a calculated percentage of what
+ each color LED intensity is set to.
+
+ The percentage is calculated for each grouped LED via
+ the equation below::
+
+ led_brightness = brightness * multi_intensity/max_brightness
+
+ For additional details please refer to
+ Documentation/leds/leds-class-multicolor.rst.
+
+ The value is between 0 and
/sys/class/leds/<led>/max_brightness.
Writing 0 to this file clears active trigger.
@@ -13,6 +30,8 @@ Description:
Writing non-zero to this file while trigger is active changes the
top brightness trigger is going to use.
+
+
What: /sys/class/leds/<led>/max_brightness
Date: March 2006
KernelVersion: 2.6.17
@@ -47,10 +66,17 @@ Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
Set the trigger for this LED. A trigger is a kernel based source
of LED events.
+
You can change triggers in a similar manner to the way an IO
scheduler is chosen. Trigger specific parameters can appear in
/sys/class/leds/<led> once a given trigger is selected. For
- their documentation see sysfs-class-led-trigger-*.
+ their documentation see `sysfs-class-led-trigger-*`.
+
+ Writing "none" removes the trigger for this LED.
+
+ Writing "default" sets the trigger to the LED's default trigger
+ (which would often be configured in the device tree for the
+ hardware).
What: /sys/class/leds/<led>/inverted
Date: January 2011
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-aw200xx b/Documentation/ABI/testing/sysfs-class-led-driver-aw200xx
new file mode 100644
index 000000000000..6d4449cf9d71
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-aw200xx
@@ -0,0 +1,5 @@
+What: /sys/class/leds/<led>/dim
+Date: May 2023
+Description: 64-level DIM current. If you write a negative value or
+ "auto", the dim will be calculated according to the
+ brightness.
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
index 620ebb3b9baa..e38a835d0a85 100644
--- a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
@@ -4,10 +4,12 @@ KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
Set the ALS output channel to use as input in
- ALS-current-control mode (1, 2), where
+ ALS-current-control mode (1, 2), where:
- 1 - out_current1
- 2 - out_current2
+ == ============
+ 1 out_current1
+ 2 out_current2
+ == ============
What: /sys/class/leds/<led>/als_en
Date: May 2012
@@ -22,16 +24,18 @@ Date: April 2012
KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
- Set the pattern generator fall and rise times (0..7), where
+ Set the pattern generator fall and rise times (0..7), where:
- 0 - 2048 us
- 1 - 262 ms
- 2 - 524 ms
- 3 - 1.049 s
- 4 - 2.097 s
- 5 - 4.194 s
- 6 - 8.389 s
- 7 - 16.78 s
+ == =======
+ 0 2048 us
+ 1 262 ms
+ 2 524 ms
+ 3 1.049 s
+ 4 2.097 s
+ 5 4.194 s
+ 6 8.389 s
+ 7 16.78 s
+ == =======
What: /sys/class/leds/<led>/id
Date: April 2012
@@ -45,21 +49,25 @@ Date: April 2012
KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
- Set the brightness-mapping mode (0, 1), where
+ Set the brightness-mapping mode (0, 1), where:
- 0 - exponential mode
- 1 - linear mode
+ == ================
+ 0 exponential mode
+ 1 linear mode
+ == ================
What: /sys/class/leds/<led>/pwm
Date: April 2012
KernelVersion: 3.5
Contact: Johan Hovold <jhovold@gmail.com>
Description:
- Set the PWM-input control mask (5 bits), where
+ Set the PWM-input control mask (5 bits), where:
- bit 5 - PWM-input enabled in Zone 4
- bit 4 - PWM-input enabled in Zone 3
- bit 3 - PWM-input enabled in Zone 2
- bit 2 - PWM-input enabled in Zone 1
- bit 1 - PWM-input enabled in Zone 0
- bit 0 - PWM-input enabled
+ ===== ===========================
+ bit 5 PWM-input enabled in Zone 4
+ bit 4 PWM-input enabled in Zone 3
+ bit 3 PWM-input enabled in Zone 2
+ bit 2 PWM-input enabled in Zone 1
+ bit 1 PWM-input enabled in Zone 0
+ bit 0 PWM-input enabled
+ ===== ===========================
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-sc27xx b/Documentation/ABI/testing/sysfs-class-led-driver-sc27xx
deleted file mode 100644
index 45b1e605d355..000000000000
--- a/Documentation/ABI/testing/sysfs-class-led-driver-sc27xx
+++ /dev/null
@@ -1,22 +0,0 @@
-What: /sys/class/leds/<led>/hw_pattern
-Date: September 2018
-KernelVersion: 4.20
-Description:
- Specify a hardware pattern for the SC27XX LED. For the SC27XX
- LED controller, it only supports 4 stages to make a single
- hardware pattern, which is used to configure the rise time,
- high time, fall time and low time for the breathing mode.
-
- For the breathing mode, the SC27XX LED only expects one brightness
- for the high stage. To be compatible with the hardware pattern
- format, we should set brightness as 0 for rise stage, fall
- stage and low stage.
-
- Min stage duration: 125 ms
- Max stage duration: 31875 ms
-
- Since the stage duration step is 125 ms, the duration should be
- a multiplier of 125, like 125ms, 250ms, 375ms, 500ms ... 31875ms.
-
- Thus the format of the hardware pattern values should be:
- "0 rise_duration brightness high_duration 0 fall_duration 0 low_duration".
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia
new file mode 100644
index 000000000000..369b4ae8be5f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia
@@ -0,0 +1,28 @@
+What: /sys/class/leds/<led>/device/brightness
+Date: July 2020
+KernelVersion: 5.9
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) On the front panel of the Turris Omnia router there is also
+ a button which can be used to control the intensity of all the
+ LEDs at once, so that if they are too bright, user can dim them.
+
+ The microcontroller cycles between 8 levels of this global
+ brightness (from 100% to 0%), but this setting can have any
+ integer value between 0 and 100. It is therefore convenient to be
+ able to change this setting from software.
+
+ Format: %i
+
+What: /sys/class/leds/<led>/device/gamma_correction
+Date: August 2023
+KernelVersion: 6.6
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) Newer versions of the microcontroller firmware of the
+ Turris Omnia router support gamma correction for the RGB LEDs.
+ This feature can be enabled/disabled by writing to this file.
+
+ If the feature is not supported because the MCU firmware is too
+ old, the file always reads as 0, and writing to the file results
+ in the EOPNOTSUPP error.
+
+ Format: %i
diff --git a/Documentation/ABI/testing/sysfs-class-led-flash b/Documentation/ABI/testing/sysfs-class-led-flash
index 220a0270b47b..11e5677c3672 100644
--- a/Documentation/ABI/testing/sysfs-class-led-flash
+++ b/Documentation/ABI/testing/sysfs-class-led-flash
@@ -55,26 +55,35 @@ Description: read only
Flash faults are re-read after strobing the flash. Possible
flash faults:
- * led-over-voltage - flash controller voltage to the flash LED
+ * led-over-voltage
+ flash controller voltage to the flash LED
has exceeded the limit specific to the flash controller
- * flash-timeout-exceeded - the flash strobe was still on when
+ * flash-timeout-exceeded
+ the flash strobe was still on when
the timeout set by the user has expired; not all flash
controllers may set this in all such conditions
- * controller-over-temperature - the flash controller has
+ * controller-over-temperature
+ the flash controller has
overheated
- * controller-short-circuit - the short circuit protection
+ * controller-short-circuit
+ the short circuit protection
of the flash controller has been triggered
- * led-power-supply-over-current - current in the LED power
+ * led-power-supply-over-current
+ current in the LED power
supply has exceeded the limit specific to the flash
controller
- * indicator-led-fault - the flash controller has detected
+ * indicator-led-fault
+ the flash controller has detected
a short or open circuit condition on the indicator LED
- * led-under-voltage - flash controller voltage to the flash
+ * led-under-voltage
+ flash controller voltage to the flash
LED has been below the minimum limit specific to
the flash
- * controller-under-voltage - the input voltage of the flash
+ * controller-under-voltage
+ the input voltage of the flash
controller is below the limit under which strobing the
flash at full current will not be possible;
the condition persists until this flag is no longer set
- * led-over-temperature - the temperature of the LED has exceeded
+ * led-over-temperature
+ the temperature of the LED has exceeded
its allowed upper limit
diff --git a/Documentation/ABI/testing/sysfs-class-led-multicolor b/Documentation/ABI/testing/sysfs-class-led-multicolor
new file mode 100644
index 000000000000..16fc827b10cb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-multicolor
@@ -0,0 +1,24 @@
+
+What: /sys/class/leds/<led>/multi_index
+Date: March 2020
+KernelVersion: 5.9
+Contact: Dan Murphy <dmurphy@ti.com>
+Description: read
+ The multi_index array, when read, will output the LED colors
+ as an array of strings as they are indexed in the
+ multi_intensity file.
+
+ For additional details please refer to
+ Documentation/leds/leds-class-multicolor.rst.
+
+What: /sys/class/leds/<led>/multi_intensity
+Date: March 2020
+KernelVersion: 5.9
+Contact: Dan Murphy <dmurphy@ti.com>
+Description: read/write
+ This file contains array of integers. Order of components is
+ described by the multi_index array. The maximum intensity should
+ not exceed /sys/class/leds/<led>/max_brightness.
+
+ For additional details please refer to
+ Documentation/leds/leds-class-multicolor.rst.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
index 451af6d6768c..ed46b37ab8a2 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
@@ -13,33 +13,180 @@ Description:
Specifies the duration of the LED blink in milliseconds.
Defaults to 50 ms.
+ When offloaded is true, the interval value MUST be set to the
+ default value and cannot be changed.
+ Trying to set any value in this specific mode will return
+ an EINVAL error.
+
What: /sys/class/leds/<led>/link
Date: Dec 2017
KernelVersion: 4.16
Contact: linux-leds@vger.kernel.org
Description:
Signal the link state of the named network device.
+
If set to 0 (default), the LED's normal state is off.
+
If set to 1, the LED's normal state reflects the link state
of the named network device.
Setting this value also immediately changes the LED state.
+
What: /sys/class/leds/<led>/tx
Date: Dec 2017
KernelVersion: 4.16
Contact: linux-leds@vger.kernel.org
Description:
Signal transmission of data on the named network device.
+
If set to 0 (default), the LED will not blink on transmission.
+
If set to 1, the LED will blink for the milliseconds specified
in interval to signal transmission.
+ When offloaded is true, the blink interval is controlled by
+ hardware and won't reflect the value set in interval.
+
What: /sys/class/leds/<led>/rx
Date: Dec 2017
KernelVersion: 4.16
Contact: linux-leds@vger.kernel.org
Description:
Signal reception of data on the named network device.
+
If set to 0 (default), the LED will not blink on reception.
+
If set to 1, the LED will blink for the milliseconds specified
in interval to signal reception.
+
+ When offloaded is true, the blink interval is controlled by
+ hardware and won't reflect the value set in interval.
+
+What: /sys/class/leds/<led>/offloaded
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Communicate whether the LED trigger modes are offloaded to
+ hardware or whether software fallback is used.
+
+ If 0, the LED is using software fallback to blink.
+
+ If 1, the LED blinking in requested mode is offloaded to
+ hardware.
+
+What: /sys/class/leds/<led>/link_10
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 10Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 10MBps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 10Mbps link speed.
+
+What: /sys/class/leds/<led>/link_100
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 100Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 100Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 100Mbps link speed.
+
+What: /sys/class/leds/<led>/link_1000
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 1000Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 1000Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 1000Mbps link speed.
+
+What: /sys/class/leds/<led>/link_2500
+Date: Nov 2023
+KernelVersion: 6.8
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 2500Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 2500Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 2500Mbps link speed.
+
+What: /sys/class/leds/<led>/link_5000
+Date: Nov 2023
+KernelVersion: 6.8
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 5000Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 5000Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 5000Mbps link speed.
+
+What: /sys/class/leds/<led>/link_10000
+Date: Nov 2023
+KernelVersion: 6.8
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 10000Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 10000Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+ Present only if the named network device supports 10000Mbps link speed.
+
+What: /sys/class/leds/<led>/half_duplex
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link half duplex state of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link half
+ duplex state of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/full_duplex
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link full duplex state of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link full
+ duplex state of the named network device.
+ Setting this value also immediately changes the LED state.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
index 1e5d172e0646..22f28f2e9ac4 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
@@ -7,55 +7,20 @@ Description:
timer. It can do gradual dimming and step change of brightness.
The pattern is given by a series of tuples, of brightness and
- duration (ms). The LED is expected to traverse the series and
- each brightness value for the specified duration. Duration of
- 0 means brightness should immediately change to new value, and
- writing malformed pattern deactivates any active one.
+ duration (ms).
- 1. For gradual dimming, the dimming interval now is set as 50
- milliseconds. So the tuple with duration less than dimming
- interval (50ms) is treated as a step change of brightness,
- i.e. the subsequent brightness will be applied without adding
- intervening dimming intervals.
+ The exact format is described in:
+ Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
- The gradual dimming format of the software pattern values should be:
- "brightness_1 duration_1 brightness_2 duration_2 brightness_3
- duration_3 ...". For example:
-
- echo 0 1000 255 2000 > pattern
-
- It will make the LED go gradually from zero-intensity to max (255)
- intensity in 1000 milliseconds, then back to zero intensity in 2000
- milliseconds:
-
- LED brightness
- ^
- 255-| / \ / \ /
- | / \ / \ /
- | / \ / \ /
- | / \ / \ /
- 0-| / \/ \/
- +---0----1----2----3----4----5----6------------> time (s)
-
- 2. To make the LED go instantly from one brightness value to another,
- we should use zero-time lengths (the brightness must be same as
- the previous tuple's). So the format should be:
- "brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
- brightness_2 0 ...". For example:
-
- echo 0 1000 0 0 255 2000 255 0 > pattern
-
- It will make the LED stay off for one second, then stay at max brightness
- for two seconds:
+What: /sys/class/leds/<led>/hr_pattern
+Date: April 2024
+Description:
+ Specify a software pattern for the LED, that supports altering
+ the brightness for the specified duration with one software
+ timer. It can do gradual dimming and step change of brightness.
- LED brightness
- ^
- 255-| +---------+ +---------+
- | | | | |
- | | | | |
- | | | | |
- 0-| -----+ +----+ +----
- +---0----1----2----3----4----5----6------------> time (s)
+ Unlike the /sys/class/leds/<led>/pattern, this attribute runs
+ a pattern on high-resolution timer (hrtimer).
What: /sys/class/leds/<led>/hw_pattern
Date: September 2018
@@ -68,8 +33,8 @@ Description:
Since different LED hardware can have different semantics of
hardware patterns, each driver is expected to provide its own
- description for the hardware patterns in their ABI documentation
- file.
+ description for the hardware patterns in their documentation
+ file at Documentation/leds/.
What: /sys/class/leds/<led>/repeat
Date: September 2018
@@ -80,3 +45,6 @@ Description:
This file will always return the originally written repeat
number.
+
+ It should be noticed that some leds, like EL15203000 may
+ only support indefinitely patterns, so they always store -1.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-tty b/Documentation/ABI/testing/sysfs-class-led-trigger-tty
new file mode 100644
index 000000000000..308fbc3627cd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-tty
@@ -0,0 +1,62 @@
+What: /sys/class/leds/<tty_led>/ttyname
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: linux-leds@vger.kernel.org
+Description:
+ Specifies the tty device name of the triggering tty
+
+What: /sys/class/leds/<tty_led>/rx
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ Signal reception (rx) of data on the named tty device.
+ If set to 0, the LED will not blink on reception.
+ If set to 1 (default), the LED will blink on reception.
+
+What: /sys/class/leds/<tty_led>/tx
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ Signal transmission (tx) of data on the named tty device.
+ If set to 0, the LED will not blink on transmission.
+ If set to 1 (default), the LED will blink on transmission.
+
+What: /sys/class/leds/<tty_led>/cts
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ CTS = Clear To Send
+ DCE is ready to accept data from the DTE.
+ If the line state is detected, the LED is switched on.
+ If set to 0 (default), the LED will not evaluate CTS.
+ If set to 1, the LED will evaluate CTS.
+
+What: /sys/class/leds/<tty_led>/dsr
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ DSR = Data Set Ready
+ DCE is ready to receive and send data.
+ If the line state is detected, the LED is switched on.
+ If set to 0 (default), the LED will not evaluate DSR.
+ If set to 1, the LED will evaluate DSR.
+
+What: /sys/class/leds/<tty_led>/dcd
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ DCD = Data Carrier Detect
+ DTE is receiving a carrier from the DCE.
+ If the line state is detected, the LED is switched on.
+ If set to 0 (default), the LED will not evaluate CAR (DCD).
+ If set to 1, the LED will evaluate CAR (DCD).
+
+What: /sys/class/leds/<tty_led>/rng
+Date: February 2024
+KernelVersion: 6.8
+Description:
+ RNG = Ring Indicator
+ DCE has detected an incoming ring signal on the telephone
+ line. If the line state is detected, the LED is switched on.
+ If set to 0 (default), the LED will not evaluate RNG.
+ If set to 1, the LED will evaluate RNG.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-usbport b/Documentation/ABI/testing/sysfs-class-led-trigger-usbport
index f440e690daef..eb81152b8348 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-usbport
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-usbport
@@ -8,5 +8,6 @@ Description:
selected for the USB port trigger. Selecting ports makes trigger
observing them for any connected devices and lighting on LED if
there are any.
+
Echoing "1" value selects USB port. Echoing "0" unselects it.
Current state can be also read.
diff --git a/Documentation/ABI/testing/sysfs-class-leds-gt683r b/Documentation/ABI/testing/sysfs-class-leds-gt683r
index e4fae6026e79..b57ffb26e722 100644
--- a/Documentation/ABI/testing/sysfs-class-leds-gt683r
+++ b/Documentation/ABI/testing/sysfs-class-leds-gt683r
@@ -5,12 +5,14 @@ Contact: Janne Kanniainen <janne.kanniainen@gmail.com>
Description:
Set the mode of LEDs. You should notice that changing the mode
of one LED will update the mode of its two sibling devices as
- well.
+ well. Possible values are:
- 0 - normal
- 1 - audio
- 2 - breathing
+ == =========
+ 0 normal
+ 1 audio
+ 2 breathing
+ == =========
Normal: LEDs are fully on when enabled
Audio: LEDs brightness depends on sound level
- Breathing: LEDs brightness varies at human breathing rate \ No newline at end of file
+ Breathing: LEDs brightness varies at human breathing rate
diff --git a/Documentation/ABI/testing/sysfs-class-mei b/Documentation/ABI/testing/sysfs-class-mei
index 17d7444a2397..1db36ddf8e58 100644
--- a/Documentation/ABI/testing/sysfs-class-mei
+++ b/Documentation/ABI/testing/sysfs-class-mei
@@ -6,7 +6,7 @@ Description:
The mei/ class sub-directory belongs to mei device class
-What: /sys/class/mei/meiN/
+What: /sys/class/mei/mei<N>/
Date: May 2014
KernelVersion: 3.17
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -14,7 +14,7 @@ Description:
The /sys/class/mei/meiN directory is created for
each probed mei device
-What: /sys/class/mei/meiN/fw_status
+What: /sys/class/mei/mei<N>/fw_status
Date: Nov 2014
KernelVersion: 3.19
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -29,7 +29,7 @@ Description: Display fw status registers content
Also number of registers varies between 1 and 6
depending on generation.
-What: /sys/class/mei/meiN/hbm_ver
+What: /sys/class/mei/mei<N>/hbm_ver
Date: Aug 2016
KernelVersion: 4.9
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -38,7 +38,7 @@ Description: Display the negotiated HBM protocol version.
The HBM protocol version negotiated
between the driver and the device.
-What: /sys/class/mei/meiN/hbm_ver_drv
+What: /sys/class/mei/mei<N>/hbm_ver_drv
Date: Aug 2016
KernelVersion: 4.9
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -46,7 +46,7 @@ Description: Display the driver HBM protocol version.
The HBM protocol version supported by the driver.
-What: /sys/class/mei/meiN/tx_queue_limit
+What: /sys/class/mei/mei<N>/tx_queue_limit
Date: Jan 2018
KernelVersion: 4.16
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -55,7 +55,7 @@ Description: Configure tx queue limit
Set maximal number of pending writes
per opened session.
-What: /sys/class/mei/meiN/fw_ver
+What: /sys/class/mei/mei<N>/fw_ver
Date: May 2018
KernelVersion: 4.18
Contact: Tomas Winkler <tomas.winkler@intel.com>
@@ -65,3 +65,41 @@ Description: Display the ME firmware version.
<platform>:<major>.<minor>.<milestone>.<build_no>.
There can be up to three such blocks for different
FW components.
+
+What: /sys/class/mei/mei<N>/dev_state
+Date: Mar 2019
+KernelVersion: 5.1
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Display the ME device state.
+
+ The device state can have following values:
+ INITIALIZING
+ INIT_CLIENTS
+ ENABLED
+ RESETTING
+ DISABLED
+ POWER_DOWN
+ POWER_UP
+
+What: /sys/class/mei/mei<N>/trc
+Date: Nov 2019
+KernelVersion: 5.5
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Display trc status register content
+
+ The ME FW writes Glitch Detection HW (TRC)
+ status information into trc status register
+ for BIOS and OS to monitor fw health.
+
+What: /sys/class/mei/mei<N>/kind
+Date: Jul 2020
+KernelVersion: 5.8
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Display kind of the device
+
+ Generic devices are marked as "mei"
+ while special purpose have their own
+ names.
+ Available options:
+ - mei: generic mei device.
+ - itouch: itouch (ipts) mei device.
diff --git a/Documentation/ABI/testing/sysfs-class-mic b/Documentation/ABI/testing/sysfs-class-mic
new file mode 100644
index 000000000000..5e5f36d10055
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mic
@@ -0,0 +1,178 @@
+What: /sys/class/mic/
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ The mic class directory belongs to Intel MIC devices and
+ provides information per MIC device. An Intel MIC device is a
+ PCIe form factor add-in Coprocessor card based on the Intel Many
+ Integrated Core (MIC) architecture that runs a Linux OS.
+
+What: /sys/class/mic/mic<X>
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ The directories /sys/class/mic/mic0, /sys/class/mic/mic1 etc.,
+ represent MIC devices (0,1,..etc). Each directory has
+ information specific to that MIC device.
+
+What: /sys/class/mic/mic<X>/family
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ Provides information about the Coprocessor family for an Intel
+ MIC device. For example - "x100"
+
+What: /sys/class/mic/mic<X>/stepping
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ Provides information about the silicon stepping for an Intel
+ MIC device. For example - "A0" or "B0"
+
+What: /sys/class/mic/mic<X>/state
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this entry provides the current state of an Intel
+ MIC device in the context of the card OS. Possible values that
+ will be read are:
+
+
+ =============== ===============================================
+ "ready" The MIC device is ready to boot the card OS.
+ On reading this entry after an OSPM resume,
+ a "boot" has to be written to this entry if
+ the card was previously shutdown during OSPM
+ suspend.
+ "booting" The MIC device has initiated booting a card OS.
+ "online" The MIC device has completed boot and is online
+ "shutting_down" The card OS is shutting down.
+ "resetting" A reset has been initiated for the MIC device
+ "reset_failed" The MIC device has failed to reset.
+ =============== ===============================================
+
+ When written, this sysfs entry triggers different state change
+ operations depending upon the current state of the card OS.
+ Acceptable values are:
+
+
+ ========== ===================================================
+ "boot" Boot the card OS image specified by the combination
+ of firmware, ramdisk, cmdline and bootmode
+ sysfs entries.
+ "reset" Initiates device reset.
+ "shutdown" Initiates card OS shutdown.
+ ========== ===================================================
+
+What: /sys/class/mic/mic<X>/shutdown_status
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. This
+ OS can shutdown because of various reasons. When read, this
+ entry provides the status on why the card OS was shutdown.
+ Possible values are:
+
+ ========== ===================================================
+ "nop" shutdown status is not applicable, when the card OS
+ is "online"
+ "crashed" Shutdown because of a HW or SW crash.
+ "halted" Shutdown because of a halt command.
+ "poweroff" Shutdown because of a poweroff command.
+ "restart" Shutdown because of a restart command.
+ ========== ===================================================
+
+What: /sys/class/mic/mic<X>/cmdline
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. Before
+ booting this card OS, it is possible to pass kernel command line
+ options to configure various features in it, similar to
+ self-bootable machines. When read, this entry provides
+ information about the current kernel command line options set to
+ boot the card OS. This entry can be written to change the
+ existing kernel command line options. Typically, the user would
+ want to read the current command line options, append new ones
+ or modify existing ones and then write the whole kernel command
+ line back to this entry.
+
+What: /sys/class/mic/mic<X>/firmware
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the path name under
+ /lib/firmware/ where the firmware image to be booted on the
+ card can be found. The entry can be written to change the
+ firmware image location under /lib/firmware/.
+
+What: /sys/class/mic/mic<X>/ramdisk
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the path name under
+ /lib/firmware/ where the ramdisk image to be used during card
+ OS boot can be found. The entry can be written to change
+ the ramdisk image location under /lib/firmware/.
+
+What: /sys/class/mic/mic<X>/bootmode
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the current bootmode for
+ the card. This sysfs entry can be written with the following
+ valid strings:
+ a) linux - Boot a Linux image.
+ b) flash - Boot an image for flash updates.
+
+What: /sys/class/mic/mic<X>/log_buf_addr
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. For
+ debugging purpose and early kernel boot messages, the user can
+ access the card OS log buffer via debugfs. When read, this entry
+ provides the kernel virtual address of the buffer where the card
+ OS log buffer can be read. This entry is written by the host
+ configuration daemon to set the log buffer address. The correct
+ log buffer address to be written can be found in the System.map
+ file of the card OS.
+
+What: /sys/class/mic/mic<X>/log_buf_len
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. For
+ debugging purpose and early kernel boot messages, the user can
+ access the card OS log buffer via debugfs. When read, this entry
+ provides the kernel virtual address where the card OS log buffer
+ length can be read. This entry is written by host configuration
+ daemon to set the log buffer length address. The correct log
+ buffer length address to be written can be found in the
+ System.map file of the card OS.
+
+What: /sys/class/mic/mic<X>/heartbeat_enable
+Date: March 2015
+KernelVersion: 4.4
+Contact: Ashutosh Dixit <ashutosh.dixit@intel.com>
+Description:
+ The MIC drivers detect and inform user space about card crashes
+ via a heartbeat mechanism (see the description of
+ shutdown_status above). User space can turn off this
+ notification by setting heartbeat_enable to 0 and enable it by
+ setting this entry to 1. If this notification is disabled it is
+ the responsibility of user space to detect card crashes via
+ alternative means such as a network ping. This setting is
+ enabled by default.
diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt b/Documentation/ABI/testing/sysfs-class-mic.txt
deleted file mode 100644
index 6ef682603179..000000000000
--- a/Documentation/ABI/testing/sysfs-class-mic.txt
+++ /dev/null
@@ -1,166 +0,0 @@
-What: /sys/class/mic/
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- The mic class directory belongs to Intel MIC devices and
- provides information per MIC device. An Intel MIC device is a
- PCIe form factor add-in Coprocessor card based on the Intel Many
- Integrated Core (MIC) architecture that runs a Linux OS.
-
-What: /sys/class/mic/mic(x)
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- The directories /sys/class/mic/mic0, /sys/class/mic/mic1 etc.,
- represent MIC devices (0,1,..etc). Each directory has
- information specific to that MIC device.
-
-What: /sys/class/mic/mic(x)/family
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- Provides information about the Coprocessor family for an Intel
- MIC device. For example - "x100"
-
-What: /sys/class/mic/mic(x)/stepping
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- Provides information about the silicon stepping for an Intel
- MIC device. For example - "A0" or "B0"
-
-What: /sys/class/mic/mic(x)/state
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- When read, this entry provides the current state of an Intel
- MIC device in the context of the card OS. Possible values that
- will be read are:
- "ready" - The MIC device is ready to boot the card OS. On
- reading this entry after an OSPM resume, a "boot" has to be
- written to this entry if the card was previously shutdown
- during OSPM suspend.
- "booting" - The MIC device has initiated booting a card OS.
- "online" - The MIC device has completed boot and is online
- "shutting_down" - The card OS is shutting down.
- "resetting" - A reset has been initiated for the MIC device
- "reset_failed" - The MIC device has failed to reset.
-
- When written, this sysfs entry triggers different state change
- operations depending upon the current state of the card OS.
- Acceptable values are:
- "boot" - Boot the card OS image specified by the combination
- of firmware, ramdisk, cmdline and bootmode
- sysfs entries.
- "reset" - Initiates device reset.
- "shutdown" - Initiates card OS shutdown.
-
-What: /sys/class/mic/mic(x)/shutdown_status
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- An Intel MIC device runs a Linux OS during its operation. This
- OS can shutdown because of various reasons. When read, this
- entry provides the status on why the card OS was shutdown.
- Possible values are:
- "nop" - shutdown status is not applicable, when the card OS is
- "online"
- "crashed" - Shutdown because of a HW or SW crash.
- "halted" - Shutdown because of a halt command.
- "poweroff" - Shutdown because of a poweroff command.
- "restart" - Shutdown because of a restart command.
-
-What: /sys/class/mic/mic(x)/cmdline
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- An Intel MIC device runs a Linux OS during its operation. Before
- booting this card OS, it is possible to pass kernel command line
- options to configure various features in it, similar to
- self-bootable machines. When read, this entry provides
- information about the current kernel command line options set to
- boot the card OS. This entry can be written to change the
- existing kernel command line options. Typically, the user would
- want to read the current command line options, append new ones
- or modify existing ones and then write the whole kernel command
- line back to this entry.
-
-What: /sys/class/mic/mic(x)/firmware
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- When read, this sysfs entry provides the path name under
- /lib/firmware/ where the firmware image to be booted on the
- card can be found. The entry can be written to change the
- firmware image location under /lib/firmware/.
-
-What: /sys/class/mic/mic(x)/ramdisk
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- When read, this sysfs entry provides the path name under
- /lib/firmware/ where the ramdisk image to be used during card
- OS boot can be found. The entry can be written to change
- the ramdisk image location under /lib/firmware/.
-
-What: /sys/class/mic/mic(x)/bootmode
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- When read, this sysfs entry provides the current bootmode for
- the card. This sysfs entry can be written with the following
- valid strings:
- a) linux - Boot a Linux image.
- b) flash - Boot an image for flash updates.
-
-What: /sys/class/mic/mic(x)/log_buf_addr
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- An Intel MIC device runs a Linux OS during its operation. For
- debugging purpose and early kernel boot messages, the user can
- access the card OS log buffer via debugfs. When read, this entry
- provides the kernel virtual address of the buffer where the card
- OS log buffer can be read. This entry is written by the host
- configuration daemon to set the log buffer address. The correct
- log buffer address to be written can be found in the System.map
- file of the card OS.
-
-What: /sys/class/mic/mic(x)/log_buf_len
-Date: October 2013
-KernelVersion: 3.13
-Contact: Sudeep Dutt <sudeep.dutt@intel.com>
-Description:
- An Intel MIC device runs a Linux OS during its operation. For
- debugging purpose and early kernel boot messages, the user can
- access the card OS log buffer via debugfs. When read, this entry
- provides the kernel virtual address where the card OS log buffer
- length can be read. This entry is written by host configuration
- daemon to set the log buffer length address. The correct log
- buffer length address to be written can be found in the
- System.map file of the card OS.
-
-What: /sys/class/mic/mic(x)/heartbeat_enable
-Date: March 2015
-KernelVersion: 4.4
-Contact: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Description:
- The MIC drivers detect and inform user space about card crashes
- via a heartbeat mechanism (see the description of
- shutdown_status above). User space can turn off this
- notification by setting heartbeat_enable to 0 and enable it by
- setting this entry to 1. If this notification is disabled it is
- the responsibility of user space to detect card crashes via
- alternative means such as a network ping. This setting is
- enabled by default.
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 3bc7c0a95c92..f77fa4f6d465 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -39,7 +39,7 @@ KernelVersion: 2.6.29
Contact: linux-mtd@lists.infradead.org
Description:
Major and minor numbers of the character device corresponding
- to the read-only variant of thie MTD device (in
+ to the read-only variant of the MTD device (in
<major>:<minor> format). In this case <minor> will be odd.
What: /sys/class/mtd/mtdX/erasesize
diff --git a/Documentation/ABI/testing/sysfs-class-mux b/Documentation/ABI/testing/sysfs-class-mux
index 8715f9c7bd4f..c58b7b6e1aa6 100644
--- a/Documentation/ABI/testing/sysfs-class-mux
+++ b/Documentation/ABI/testing/sysfs-class-mux
@@ -7,7 +7,7 @@ Description:
Framework and provides a sysfs interface for using MUX
controllers.
-What: /sys/class/mux/muxchipN/
+What: /sys/class/mux/muxchip<N>/
Date: April 2017
KernelVersion: 4.13
Contact: Peter Rosin <peda@axentia.se>
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 664a8f6a634f..ebf21beba846 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -4,10 +4,13 @@ KernelVersion: 3.17
Contact: netdev@vger.kernel.org
Description:
Indicates the name assignment type. Possible values are:
- 1: enumerated by the kernel, possibly in an unpredictable way
- 2: predictably named by the kernel
- 3: named by userspace
- 4: renamed
+
+ == ==========================================================
+ 1 enumerated by the kernel, possibly in an unpredictable way
+ 2 predictably named by the kernel
+ 3 named by userspace
+ 4 renamed
+ == ==========================================================
What: /sys/class/net/<iface>/addr_assign_type
Date: July 2010
@@ -15,10 +18,13 @@ KernelVersion: 3.2
Contact: netdev@vger.kernel.org
Description:
Indicates the address assignment type. Possible values are:
- 0: permanent address
- 1: randomly generated
- 2: stolen from another device
- 3: set using dev_set_mac_address
+
+ == =============================
+ 0 permanent address
+ 1 randomly generated
+ 2 stolen from another device
+ 3 set using dev_set_mac_address
+ == =============================
What: /sys/class/net/<iface>/addr_len
Date: April 2005
@@ -51,9 +57,12 @@ Description:
Default value 0 does not forward any link local frames.
Restricted bits:
- 0: 01-80-C2-00-00-00 Bridge Group Address used for STP
- 1: 01-80-C2-00-00-01 (MAC Control) 802.3 used for MAC PAUSE
- 2: 01-80-C2-00-00-02 (Link Aggregation) 802.3ad
+
+ == ========================================================
+ 0 01-80-C2-00-00-00 Bridge Group Address used for STP
+ 1 01-80-C2-00-00-01 (MAC Control) 802.3 used for MAC PAUSE
+ 2 01-80-C2-00-00-02 (Link Aggregation) 802.3ad
+ == ========================================================
Any values not setting these bits can be used. Take special
care when forwarding control frames e.g. 802.1X-PAE or LLDP.
@@ -73,9 +82,12 @@ KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
Description:
Indicates the current physical link state of the interface.
- Posssible values are:
- 0: physical link is down
- 1: physical link is up
+ Possible values are:
+
+ == =====================
+ 0 physical link is down
+ 1 physical link is up
+ == =====================
Note: some special devices, e.g: bonding and team drivers will
allow this attribute to be written to force a link state for
@@ -124,15 +136,34 @@ Description:
authentication is performed (e.g: 802.1x). 'link_mode' attribute
will also reflect the dormant state.
-What: /sys/clas/net/<iface>/duplex
+What: /sys/class/net/<iface>/testing
+Date: April 2002
+KernelVersion: 5.8
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates whether the interface is under test. Possible
+ values are:
+
+ == =============================
+ 0 interface is not being tested
+ 1 interface is being tested
+ == =============================
+
+ When an interface is under test, it cannot be expected
+ to pass packets as normal.
+
+What: /sys/class/net/<iface>/duplex
Date: October 2009
KernelVersion: 2.6.33
Contact: netdev@vger.kernel.org
Description:
Indicates the interface latest or current duplex value. Possible
values are:
- half: half duplex
- full: full duplex
+
+ ==== ===========
+ half half duplex
+ full full duplex
+ ==== ===========
Note: This attribute is only valid for interfaces that implement
the ethtool get_link_ksettings method (mostly Ethernet).
@@ -183,8 +214,11 @@ Description:
Indicates the interface link mode, as a decimal number. This
attribute should be used in conjunction with 'dormant' attribute
to determine the interface usability. Possible values:
- 0: default link mode
- 1: dormant link mode
+
+ == =================
+ 0 default link mode
+ 1 dormant link mode
+ == =================
What: /sys/class/net/<iface>/mtu
Date: April 2005
@@ -213,7 +247,9 @@ KernelVersion: 2.6.17
Contact: netdev@vger.kernel.org
Description:
Indicates the interface RFC2863 operational state as a string.
+
Possible values are:
+
"unknown", "notpresent", "down", "lowerlayerdown", "testing",
"dormant", "up".
@@ -301,3 +337,18 @@ Contact: netdev@vger.kernel.org
Description:
32-bit unsigned integer counting the number of times the link has
been down
+
+What: /sys/class/net/<iface>/threaded
+Date: Jan 2021
+KernelVersion: 5.12
+Contact: netdev@vger.kernel.org
+Description:
+ Boolean value to control the threaded mode per device. User could
+ set this value to enable/disable threaded mode for all napi
+ belonging to this device, without the need to do device up/down.
+
+ Possible values:
+ == ==================================
+ 0 threaded mode disabled for this dev
+ 1 threaded mode enabled for this dev
+ == ==================================
diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv
deleted file mode 100644
index 898106849e27..000000000000
--- a/Documentation/ABI/testing/sysfs-class-net-batman-adv
+++ /dev/null
@@ -1,30 +0,0 @@
-
-What: /sys/class/net/<iface>/batman-adv/elp_interval
-Date: Feb 2014
-Contact: Linus Lüssing <linus.luessing@web.de>
-Description:
- Defines the interval in milliseconds in which batman
- emits probing packets for neighbor sensing (ELP).
-
-What: /sys/class/net/<iface>/batman-adv/iface_status
-Date: May 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Indicates the status of <iface> as it is seen by batman.
-
-What: /sys/class/net/<iface>/batman-adv/mesh_iface
-Date: May 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- The /sys/class/net/<iface>/batman-adv/mesh_iface file
- displays the batman mesh interface this <iface>
- currently is associated with.
-
-What: /sys/class/net/<iface>/batman-adv/throughput_override
-Date: Feb 2014
-Contact: Antonio Quartulli <a@unstable.cc>
-description:
- Defines the throughput value to be used by B.A.T.M.A.N. V
- when estimating the link throughput using this interface.
- If the value is set to 0 then batman-adv will try to
- estimate the throughput by itself.
diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
index f7be0e88b139..06416d0e163d 100644
--- a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
+++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
@@ -91,9 +91,9 @@ Date: May 2014
KernelVersion: 3.16
Contact: Bjørn Mork <bjorn@mork.no>
Description:
- Bit 0: 16-bit NTB supported (set to 1)
- Bit 1: 32-bit NTB supported
- Bits 2 – 15: reserved (reset to zero; must be ignored by host)
+ - Bit 0: 16-bit NTB supported (set to 1)
+ - Bit 1: 32-bit NTB supported
+ - Bits 2 – 15: reserved (reset to zero; must be ignored by host)
What: /sys/class/net/<iface>/cdc_ncm/dwNtbInMaxSize
Date: May 2014
diff --git a/Documentation/ABI/testing/sysfs-class-net-dsa b/Documentation/ABI/testing/sysfs-class-net-dsa
index f240221e071e..e2da26b44dd0 100644
--- a/Documentation/ABI/testing/sysfs-class-net-dsa
+++ b/Documentation/ABI/testing/sysfs-class-net-dsa
@@ -1,7 +1,14 @@
-What: /sys/class/net/<iface>/tagging
+What: /sys/class/net/<iface>/dsa/tagging
Date: August 2018
KernelVersion: 4.20
Contact: netdev@vger.kernel.org
Description:
- String indicating the type of tagging protocol used by the
- DSA slave network device.
+ On read, this file returns a string indicating the type of
+ tagging protocol used by the DSA network devices that are
+ attached to this master interface.
+ On write, this file changes the tagging protocol of the
+ attached DSA switches, if this operation is supported by the
+ driver. Changing the tagging protocol must be done with the DSA
+ interfaces and the master interface all administratively down.
+ See the "name" field of each registered struct dsa_device_ops
+ for a list of valid values.
diff --git a/Documentation/ABI/testing/sysfs-class-net-mesh b/Documentation/ABI/testing/sysfs-class-net-mesh
deleted file mode 100644
index c2b956d44a95..000000000000
--- a/Documentation/ABI/testing/sysfs-class-net-mesh
+++ /dev/null
@@ -1,108 +0,0 @@
-
-What: /sys/class/net/<mesh_iface>/mesh/aggregated_ogms
-Date: May 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Indicates whether the batman protocol messages of the
- mesh <mesh_iface> shall be aggregated or not.
-
-What: /sys/class/net/<mesh_iface>/mesh/<vlan_subdir>/ap_isolation
-Date: May 2011
-Contact: Antonio Quartulli <a@unstable.cc>
-Description:
- Indicates whether the data traffic going from a
- wireless client to another wireless client will be
- silently dropped. <vlan_subdir> is empty when referring
- to the untagged lan.
-
-What: /sys/class/net/<mesh_iface>/mesh/bonding
-Date: June 2010
-Contact: Simon Wunderlich <sw@simonwunderlich.de>
-Description:
- Indicates whether the data traffic going through the
- mesh will be sent using multiple interfaces at the
- same time (if available).
-
-What: /sys/class/net/<mesh_iface>/mesh/bridge_loop_avoidance
-Date: November 2011
-Contact: Simon Wunderlich <sw@simonwunderlich.de>
-Description:
- Indicates whether the bridge loop avoidance feature
- is enabled. This feature detects and avoids loops
- between the mesh and devices bridged with the soft
- interface <mesh_iface>.
-
-What: /sys/class/net/<mesh_iface>/mesh/fragmentation
-Date: October 2010
-Contact: Andreas Langer <an.langer@gmx.de>
-Description:
- Indicates whether the data traffic going through the
- mesh will be fragmented or silently discarded if the
- packet size exceeds the outgoing interface MTU.
-
-What: /sys/class/net/<mesh_iface>/mesh/gw_bandwidth
-Date: October 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Defines the bandwidth which is propagated by this
- node if gw_mode was set to 'server'.
-
-What: /sys/class/net/<mesh_iface>/mesh/gw_mode
-Date: October 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Defines the state of the gateway features. Can be
- either 'off', 'client' or 'server'.
-
-What: /sys/class/net/<mesh_iface>/mesh/gw_sel_class
-Date: October 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Defines the selection criteria this node will use
- to choose a gateway if gw_mode was set to 'client'.
-
-What: /sys/class/net/<mesh_iface>/mesh/hop_penalty
-Date: Oct 2010
-Contact: Linus Lüssing <linus.luessing@web.de>
-Description:
- Defines the penalty which will be applied to an
- originator message's tq-field on every hop.
-
-What: /sys/class/net/<mesh_iface>/mesh/isolation_mark
-Date: Nov 2013
-Contact: Antonio Quartulli <a@unstable.cc>
-Description:
- Defines the isolation mark (and its bitmask) which
- is used to classify clients as "isolated" by the
- Extended Isolation feature.
-
-What: /sys/class/net/<mesh_iface>/mesh/multicast_mode
-Date: Feb 2014
-Contact: Linus Lüssing <linus.luessing@web.de>
-Description:
- Indicates whether multicast optimizations are enabled
- or disabled. If set to zero then all nodes in the
- mesh are going to use classic flooding for any
- multicast packet with no optimizations.
-
-What: /sys/class/net/<mesh_iface>/mesh/network_coding
-Date: Nov 2012
-Contact: Martin Hundeboll <martin@hundeboll.net>
-Description:
- Controls whether Network Coding (using some magic
- to send fewer wifi packets but still the same
- content) is enabled or not.
-
-What: /sys/class/net/<mesh_iface>/mesh/orig_interval
-Date: May 2010
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Defines the interval in milliseconds in which batman
- sends its protocol messages.
-
-What: /sys/class/net/<mesh_iface>/mesh/routing_algo
-Date: Dec 2011
-Contact: Marek Lindner <mareklindner@neomailbox.ch>
-Description:
- Defines the routing procotol this mesh instance
- uses to find the optimal paths through the mesh.
diff --git a/Documentation/ABI/testing/sysfs-class-net-peak_usb b/Documentation/ABI/testing/sysfs-class-net-peak_usb
new file mode 100644
index 000000000000..9e3d0bf4d4b2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-peak_usb
@@ -0,0 +1,19 @@
+
+What: /sys/class/net/<iface>/peak_usb/can_channel_id
+Date: November 2022
+KernelVersion: 6.2
+Contact: Stephane Grosjean <s.grosjean@peak-system.com>
+Description:
+ PEAK PCAN-USB devices support user-configurable CAN channel
+ identifiers. Contrary to a USB serial number, these identifiers
+ are writable and can be set per CAN interface. This means that
+ if a USB device exports multiple CAN interfaces, each of them
+ can be assigned a unique channel ID.
+ This attribute provides read-only access to the currently
+ configured value of the channel identifier. Depending on the
+ device type, the identifier has a length of 8 or 32 bit. The
+ value read from this attribute is always an 8 digit 32 bit
+ hexadecimal value in big endian format. If the device only
+ supports an 8 bit identifier, the upper 24 bit of the value are
+ set to zero.
+
diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 6ebabfb27912..31615c59bff9 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -11,26 +11,65 @@ Date: February 2014
KernelVersion: 3.15
Contact: netdev@vger.kernel.org
Description:
- Boolean value indicating whether the PHY device has
- any fixups registered against it (phy_register_fixup)
+ This attribute contains the boolean value whether a given PHY
+ device has had any "fixup" workaround running on it, encoded as
+ a boolean. This information is provided to help troubleshooting
+ PHY configurations.
What: /sys/class/mdio_bus/<bus>/<device>/phy_id
Date: November 2012
KernelVersion: 3.8
Contact: netdev@vger.kernel.org
Description:
- 32-bit hexadecimal value corresponding to the PHY device's OUI,
- model and revision number.
+ This attribute contains the 32-bit PHY Identifier as reported
+ by the device during bus enumeration, encoded in hexadecimal.
+ This ID is used to match the device with the appropriate
+ driver.
+
+What: /sys/class/mdio_bus/<bus>/<device>/c45_phy_ids/mmd<n>_device_id
+Date: June 2025
+KernelVersion: 6.17
+Contact: netdev@vger.kernel.org
+Description:
+ This attribute contains the 32-bit PHY Identifier as reported
+ by the device during bus enumeration, encoded in hexadecimal.
+ These C45 IDs are used to match the device with the appropriate
+ driver. These files are invisible to the C22 device.
What: /sys/class/mdio_bus/<bus>/<device>/phy_interface
Date: February 2014
KernelVersion: 3.15
Contact: netdev@vger.kernel.org
Description:
- String value indicating the PHY interface, possible
- values are:.
+ This attribute contains the PHY interface as configured by the
+ Ethernet driver during bus enumeration, encoded in string.
+ This interface mode is used to configure the Ethernet MAC with the
+ appropriate mode for its data lines to the PHY hardware.
+
+ Possible values are:
+
<empty> (not available), mii, gmii, sgmii, tbi, rev-mii,
rmii, rgmii, rgmii-id, rgmii-rxid, rgmii-txid, rtbi, smii
xgmii, moca, qsgmii, trgmii, 1000base-x, 2500base-x, rxaui,
xaui, 10gbase-kr, unknown
+What: /sys/class/mdio_bus/<bus>/<device>/phy_standalone
+Date: May 2019
+KernelVersion: 5.3
+Contact: netdev@vger.kernel.org
+Description:
+ Boolean value indicating whether the PHY device is used in
+ standalone mode, without a net_device associated, by PHYLINK.
+ Attribute created only when this is the case.
+
+What: /sys/class/mdio_bus/<bus>/<device>/phy_dev_flags
+Date: March 2021
+KernelVersion: 5.13
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit hexadecimal number representing a bit mask of the
+ configuration bits passed from the consumer of the PHY
+ (Ethernet MAC, switch, etc.) to the PHY driver. The flags are
+ only used internally by the kernel and their placement are
+ not meant to be stable across kernel versions. This is intended
+ for facilitating the debugging of PHY drivers.
diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi
index 7122d6264c49..b028f5bc86db 100644
--- a/Documentation/ABI/testing/sysfs-class-net-qmi
+++ b/Documentation/ABI/testing/sysfs-class-net-qmi
@@ -29,7 +29,7 @@ Contact: Bjørn Mork <bjorn@mork.no>
Description:
Unsigned integer.
- Write a number ranging from 1 to 127 to add a qmap mux
+ Write a number ranging from 1 to 254 to add a qmap mux
based network device, supported by recent Qualcomm based
modems.
@@ -46,5 +46,31 @@ Contact: Bjørn Mork <bjorn@mork.no>
Description:
Unsigned integer.
- Write a number ranging from 1 to 127 to delete a previously
+ Write a number ranging from 1 to 254 to delete a previously
created qmap mux based network device.
+
+What: /sys/class/net/<qmimux iface>/qmap/mux_id
+Date: January 2021
+KernelVersion: 5.12
+Contact: Daniele Palmas <dnlplm@gmail.com>
+Description:
+ Unsigned integer
+
+ Indicates the mux id associated to the qmimux network interface
+ during its creation.
+
+What: /sys/class/net/<iface>/qmi/pass_through
+Date: January 2021
+KernelVersion: 5.12
+Contact: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
+Description:
+ Boolean. Default: 'N'
+
+ Set this to 'Y' to enable 'pass-through' mode, allowing packets
+ in MAP format to be passed on to the stack.
+
+ Normally the rmnet driver (CONFIG_RMNET) is then used to process
+ and demultiplex these packets.
+
+ 'Pass-through' mode can be enabled when the device is in
+ 'raw-ip' mode only.
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 978b76358661..84aa25e0d14d 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus
Date: March 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -8,7 +8,7 @@ Description:
network device queue. Possible values depend on the number
of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt
Date: April 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -16,7 +16,7 @@ Description:
Number of Receive Packet Steering flows being currently
processed by this particular network device receive queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -24,7 +24,7 @@ Description:
Indicates the number of transmit timeout events seen by this
network interface transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate
Date: March 2015
KernelVersion: 4.1
Contact: netdev@vger.kernel.org
@@ -32,17 +32,17 @@ Description:
A Mbps max-rate set for the queue, a value of zero means disabled,
default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus
Date: November 2010
KernelVersion: 2.6.38
Contact: netdev@vger.kernel.org
Description:
Mask of the CPU(s) currently enabled to participate into the
Transmit Packet Steering packet processing flow for this
- network device transmit queue. Possible vaules depend on the
+ network device transmit queue. Possible values depend on the
number of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs
Date: June 2018
KernelVersion: 4.18.0
Contact: netdev@vger.kernel.org
@@ -53,7 +53,7 @@ Description:
number of available receive queue(s) in the network device.
Default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -62,7 +62,7 @@ Description:
of this particular network device transmit queue.
Default value is 1000.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -70,7 +70,7 @@ Description:
Indicates the number of bytes (objects) in flight on this
network device transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -79,7 +79,7 @@ Description:
on this network device transmit queue. This value is clamped
to be within the bounds defined by limit_max and limit_min.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -88,7 +88,7 @@ Description:
queued on this network device transmit queue. See
include/linux/dynamic_queue_limits.h for the default value.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -96,3 +96,26 @@ Description:
Indicates the absolute minimum limit of bytes allowed to be
queued on this network device transmit queue. Default value is
0.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_thrs
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Tx completion stall detection threshold in ms. Kernel will
+ guarantee to detect all stalls longer than this threshold but
+ may also detect stalls longer than half of the threshold.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_cnt
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Number of detected Tx completion stalls.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_max
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Longest detected Tx completion stall. Write 0 to clear.
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
index 397118de7b5e..53e508c6936a 100644
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/statistics/collisions
+What: /sys/class/net/<iface>/statistics/collisions
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
Indicates the number of collisions seen by this network device.
This value might not be relevant with all MAC layers.
-What: /sys/class/<iface>/statistics/multicast
+What: /sys/class/net/<iface>/statistics/multicast
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -14,7 +14,7 @@ Description:
Indicates the number of multicast packets received by this
network device.
-What: /sys/class/<iface>/statistics/rx_bytes
+What: /sys/class/net/<iface>/statistics/rx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -23,7 +23,7 @@ Description:
See the network driver for the exact meaning of when this
value is incremented.
-What: /sys/class/<iface>/statistics/rx_compressed
+What: /sys/class/net/<iface>/statistics/rx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
network device. This value might only be relevant for interfaces
that support packet compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/rx_crc_errors
+What: /sys/class/net/<iface>/statistics/rx_crc_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -41,7 +41,7 @@ Description:
by this network device. Note that the specific meaning might
depend on the MAC layer used by the interface.
-What: /sys/class/<iface>/statistics/rx_dropped
+What: /sys/class/net/<iface>/statistics/rx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -51,7 +51,15 @@ Description:
packet processing. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_fifo_errors
+What: /sys/class/net/<iface>/statistics/rx_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of receive errors on this network device.
+ See the network driver for the exact meaning of this value.
+
+What: /sys/class/net/<iface>/statistics/rx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -60,7 +68,7 @@ Description:
network device. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_frame_errors
+What: /sys/class/net/<iface>/statistics/rx_frame_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -70,7 +78,7 @@ Description:
on the MAC layer protocol used. See the network driver for
the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_length_errors
+What: /sys/class/net/<iface>/statistics/rx_length_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -79,7 +87,7 @@ Description:
error, oversized or undersized. See the network driver for the
exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_missed_errors
+What: /sys/class/net/<iface>/statistics/rx_missed_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -88,7 +96,15 @@ Description:
due to lack of capacity in the receive side. See the network
driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_over_errors
+What: /sys/class/net/<iface>/statistics/rx_nohandler
+Date: February 2016
+KernelVersion: 4.6
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of received packets that were dropped on
+ an inactive device by the network core.
+
+What: /sys/class/net/<iface>/statistics/rx_over_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -98,7 +114,7 @@ Description:
(e.g: larger than MTU). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_packets
+What: /sys/class/net/<iface>/statistics/rx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -106,7 +122,7 @@ Description:
Indicates the total number of good packets received by this
network device.
-What: /sys/class/<iface>/statistics/tx_aborted_errors
+What: /sys/class/net/<iface>/statistics/tx_aborted_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -116,7 +132,7 @@ Description:
a medium collision). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/tx_bytes
+What: /sys/class/net/<iface>/statistics/tx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -127,7 +143,7 @@ Description:
transmitted packets or all packets that have been queued for
transmission.
-What: /sys/class/<iface>/statistics/tx_carrier_errors
+What: /sys/class/net/<iface>/statistics/tx_carrier_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -136,7 +152,7 @@ Description:
because of carrier errors (e.g: physical link down). See the
network driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/tx_compressed
+What: /sys/class/net/<iface>/statistics/tx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -145,7 +161,7 @@ Description:
this might only be relevant for devices that support
compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/tx_dropped
+What: /sys/class/net/<iface>/statistics/tx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -154,7 +170,7 @@ Description:
See the driver for the exact reasons as to why the packets were
dropped.
-What: /sys/class/<iface>/statistics/tx_errors
+What: /sys/class/net/<iface>/statistics/tx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -163,7 +179,7 @@ Description:
a network device. See the driver for the exact reasons as to
why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_fifo_errors
+What: /sys/class/net/<iface>/statistics/tx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -172,7 +188,7 @@ Description:
FIFO error. See the driver for the exact reasons as to why the
packets were dropped.
-What: /sys/class/<iface>/statistics/tx_heartbeat_errors
+What: /sys/class/net/<iface>/statistics/tx_heartbeat_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -181,7 +197,7 @@ Description:
reported as heartbeat errors. See the driver for the exact
reasons as to why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_packets
+What: /sys/class/net/<iface>/statistics/tx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -190,7 +206,7 @@ Description:
device. See the driver for whether this reports the number of all
attempted or successful transmissions.
-What: /sys/class/<iface>/statistics/tx_window_errors
+What: /sys/class/net/<iface>/statistics/tx_window_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-class-ocxl b/Documentation/ABI/testing/sysfs-class-ocxl
index b5b1fa197592..847a7edc3113 100644
--- a/Documentation/ABI/testing/sysfs-class-ocxl
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -11,8 +11,11 @@ Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Number of contexts for the AFU, in the format <n>/<max>
where:
- n: number of currently active contexts, for debug
- max: maximum number of contexts supported by the AFU
+
+ ==== ===============================================
+ n number of currently active contexts, for debug
+ max maximum number of contexts supported by the AFU
+ ==== ===============================================
What: /sys/class/ocxl/<afu name>/pp_mmio_size
Date: January 2018
@@ -33,3 +36,16 @@ Date: January 2018
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
Give access the global mmio area for the AFU
+
+What: /sys/class/ocxl/<afu name>/reload_on_reset
+Date: February 2020
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Control whether the FPGA is reloaded on a link reset. Enabled
+ through a vendor-specific logic block on the FPGA.
+
+ =========== ===========================================
+ 0 Do not reload FPGA image from flash
+ 1 Reload FPGA image from flash
+ unavailable The device does not support this capability
+ =========== ===========================================
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
deleted file mode 100644
index dde4f26d0780..000000000000
--- a/Documentation/ABI/testing/sysfs-class-pktcdvd
+++ /dev/null
@@ -1,93 +0,0 @@
-sysfs interface
----------------
-The pktcdvd module (packet writing driver) creates the following files in the
-sysfs: (<devid> is in the format major:minor)
-
-What: /sys/class/pktcdvd/add
-What: /sys/class/pktcdvd/remove
-What: /sys/class/pktcdvd/device_map
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
-
- add: (WO) Write a block device id (major:minor) to
- create a new pktcdvd device and map it to the
- block device.
-
- remove: (WO) Write the pktcdvd device id (major:minor)
- to remove the pktcdvd device.
-
- device_map: (RO) Shows the device mapping in format:
- pktcdvd[0-7] <pktdevid> <blkdevid>
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/dev
-What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- dev: (RO) Device id
-
- uevent: (WO) To send a uevent
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- packets_started: (RO) Number of started packets.
-
- packets_finished: (RO) Number of finished packets.
-
- kb_written: (RO) kBytes written.
-
- kb_read: (RO) kBytes read.
-
- kb_read_gather: (RO) kBytes read to fill write packets.
-
- reset: (WO) Write any value to it to reset
- pktcdvd device statistic values, like
- bytes read/written.
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- size: (RO) Contains the size of the bio write queue.
-
- congestion_off: (RW) If bio write queue size is below this mark,
- accept new bio requests from the block layer.
-
- congestion_on: (RW) If bio write queue size is higher as this
- mark, do no longer accept bio write requests
- from the block layer and wait till the pktcdvd
- device has processed enough bio's so that bio
- write queue size is below congestion off mark.
- A value of <= 0 disables congestion control.
-
-
-Example:
---------
-To use the pktcdvd sysfs interface directly, you can do:
-
-# create a new pktcdvd device mapped to /dev/hdc
-echo "22:0" >/sys/class/pktcdvd/add
-cat /sys/class/pktcdvd/device_map
-# assuming device pktcdvd0 was created, look at stat's
-cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
-# print the device id of the mapped block device
-fgrep pktcdvd0 /sys/class/pktcdvd/device_map
-# remove device, using pktcdvd0 device id 253:0
-echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/Documentation/ABI/testing/sysfs-class-platform-profile b/Documentation/ABI/testing/sysfs-class-platform-profile
new file mode 100644
index 000000000000..dc72adfb830a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-platform-profile
@@ -0,0 +1,48 @@
+What: /sys/class/platform-profile/platform-profile-X/name
+Date: March 2025
+KernelVersion: 6.14
+Description: Name of the class device given by the driver.
+
+ RO
+
+What: /sys/class/platform-profile/platform-profile-X/choices
+Date: March 2025
+KernelVersion: 6.14
+Description: This file contains a space-separated list of profiles supported
+ for this device.
+
+ Drivers must use the following standard profile-names:
+
+ ==================== ========================================
+ low-power Low power consumption
+ cool Cooler operation
+ quiet Quieter operation
+ balanced Balance between low power consumption
+ and performance
+ balanced-performance Balance between performance and low
+ power consumption with a slight bias
+ towards performance
+ performance High performance operation
+ custom Driver defined custom profile
+ ==================== ========================================
+
+ RO
+
+What: /sys/class/platform-profile/platform-profile-X/profile
+Date: March 2025
+KernelVersion: 6.14
+Description: Reading this file gives the current selected profile for this
+ device. Writing this file with one of the strings from
+ platform_profile_choices changes the profile to the new value.
+
+ This file can be monitored for changes by polling for POLLPRI,
+ POLLPRI will be signaled on any changes, independent of those
+ changes coming from a userspace write; or coming from another
+ source such as e.g. a hotkey triggered profile change handled
+ either directly by the embedded-controller or fully handled
+ inside the kernel.
+
+ This file may also emit the string 'custom' to indicate
+ that the driver is using a driver defined custom profile.
+
+ RW
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 5e23e22dce1b..4b21d5d23251 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -1,4 +1,4 @@
-===== General Properties =====
+**General Properties**
What: /sys/class/power_supply/<supply_name>/manufacturer
Date: May 2007
@@ -34,16 +34,240 @@ Description:
Describes the main type of the supply.
Access: Read
- Valid values: "Battery", "UPS", "Mains", "USB"
+ Valid values: "Battery", "UPS", "Mains", "USB", "Wireless"
-===== Battery Properties =====
+**Battery and USB properties**
+
+What: /sys/class/power_supply/<supply_name>/current_avg
+Date: May 2007
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports an average IBAT current reading for the battery, over
+ a fixed period. Normally devices will provide a fixed interval
+ in which they average readings to smooth out the reported
+ value.
+
+ USB:
+
+ Reports an average IBUS current reading over a fixed period.
+ Normally devices will provide a fixed interval in which they
+ average readings to smooth out the reported value.
+
+ Access: Read
+
+ Valid values: Represented in microamps. Negative values are
+ used for discharging batteries, positive values for charging
+ batteries and for USB IBUS current.
+
+What: /sys/class/power_supply/<supply_name>/current_max
+Date: October 2010
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the maximum IBAT current allowed into the battery.
+
+ USB:
+
+ Reports the maximum IBUS current the supply can support.
+
+ Access: Read
+ Valid values: Represented in microamps
+
+What: /sys/class/power_supply/<supply_name>/current_now
+Date: May 2007
+Contact: linux-pm@vger.kernel.org
+Description:
+
+ Battery:
+
+ Reports an instant, single IBAT current reading for the
+ battery. This value is not averaged/smoothed.
+
+ Access: Read
+
+ USB:
+
+ Reports the IBUS current supplied now. This value is generally
+ read-only reporting, unless the 'online' state of the supply
+ is set to be programmable, in which case this value can be set
+ within the reported min/max range.
+
+ Access: Read, Write
+
+ Valid values: Represented in microamps. Negative values are
+ used for discharging batteries, positive values for charging
+ batteries and for USB IBUS current.
+
+What: /sys/class/power_supply/<supply_name>/temp
+Date: May 2007
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the current TBAT battery temperature reading.
+
+ USB:
+
+ Reports the current supply temperature reading. This would
+ normally be the internal temperature of the device itself
+ (e.g TJUNC temperature of an IC)
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/<supply_name>/temp_alert_max
+Date: July 2012
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Maximum TBAT temperature trip-wire value where the supply will
+ notify user-space of the event.
+
+ USB:
+
+ Maximum supply temperature trip-wire value where the supply
+ will notify user-space of the event.
+
+ This is normally used for the charging scenario where
+ user-space needs to know if the temperature has crossed an
+ upper threshold so it can take appropriate action (e.g. warning
+ user that the temperature is critically high, and charging has
+ stopped).
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/<supply_name>/temp_alert_min
+Date: July 2012
+Contact: linux-pm@vger.kernel.org
+Description:
+
+ Battery:
+
+ Minimum TBAT temperature trip-wire value where the supply will
+ notify user-space of the event.
+
+ USB:
+
+ Minimum supply temperature trip-wire value where the supply
+ will notify user-space of the event.
+
+ This is normally used for the charging scenario where user-space
+ needs to know if the temperature has crossed a lower threshold
+ so it can take appropriate action (e.g. warning user that
+ temperature level is high, and charging current has been
+ reduced accordingly to remedy the situation).
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/<supply_name>/temp_max
+Date: July 2014
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the maximum allowed TBAT battery temperature for
+ charging.
+
+ USB:
+
+ Reports the maximum allowed supply temperature for operation.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/<supply_name>/temp_min
+Date: July 2014
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the minimum allowed TBAT battery temperature for
+ charging.
+
+ USB:
+
+ Reports the minimum allowed supply temperature for operation.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/<supply_name>/voltage_max,
+Date: January 2008
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the maximum safe VBAT voltage permitted for the
+ battery, during charging.
+
+ USB:
+
+ Reports the maximum VBUS voltage the supply can support.
+
+ Access: Read
+
+ Valid values: Represented in microvolts
+
+What: /sys/class/power_supply/<supply_name>/voltage_min,
+Date: January 2008
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports the minimum safe VBAT voltage permitted for the
+ battery, during discharging.
+
+ USB:
+
+ Reports the minimum VBUS voltage the supply can support.
+
+ Access: Read
+
+ Valid values: Represented in microvolts
+
+What: /sys/class/power_supply/<supply_name>/voltage_now,
+Date: May 2007
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery:
+
+ Reports an instant, single VBAT voltage reading for the
+ battery. This value is not averaged/smoothed.
+
+ Access: Read
+
+ USB:
+
+ Reports the VBUS voltage supplied now. This value is generally
+ read-only reporting, unless the 'online' state of the supply
+ is set to be programmable, in which case this value can be set
+ within the reported min/max range.
+
+ Access: Read, Write
+
+ Valid values: Represented in microvolts
+
+**Battery Properties**
What: /sys/class/power_supply/<supply_name>/capacity
Date: May 2007
Contact: linux-pm@vger.kernel.org
Description:
Fine grain representation of battery capacity.
+
Access: Read
+
Valid values: 0 - 100 (percent)
What: /sys/class/power_supply/<supply_name>/capacity_alert_max
@@ -58,6 +282,7 @@ Description:
low).
Access: Read, Write
+
Valid values: 0 - 100 (percent)
What: /sys/class/power_supply/<supply_name>/capacity_alert_min
@@ -72,6 +297,23 @@ Description:
critically low).
Access: Read, Write
+
+ Valid values: 0 - 100 (percent)
+
+What: /sys/class/power_supply/<supply_name>/capacity_error_margin
+Date: April 2019
+Contact: linux-pm@vger.kernel.org
+Description:
+ Battery capacity measurement becomes unreliable without
+ recalibration. This values provides the maximum error
+ margin expected to exist by the fuel gauge in percent.
+ Values close to 0% will be returned after (re-)calibration
+ has happened. Over time the error margin will increase.
+ 100% means, that the capacity related values are basically
+ completely useless.
+
+ Access: Read
+
Valid values: 0 - 100 (percent)
What: /sys/class/power_supply/<supply_name>/capacity_level
@@ -81,48 +323,113 @@ Description:
Coarse representation of battery capacity.
Access: Read
- Valid values: "Unknown", "Critical", "Low", "Normal", "High",
+
+ Valid values:
+ "Unknown", "Critical", "Low", "Normal", "High",
"Full"
-What: /sys/class/power_supply/<supply_name>/current_avg
-Date: May 2007
+What: /sys/class/power_supply/<supply_name>/charge_control_limit
+Date: Oct 2012
Contact: linux-pm@vger.kernel.org
Description:
- Reports an average IBAT current reading for the battery, over a
- fixed period. Normally devices will provide a fixed interval in
- which they average readings to smooth out the reported value.
+ Maximum allowable charging current. Used for charge rate
+ throttling for thermal cooling or improving battery health.
+
+ Access: Read, Write
- Access: Read
Valid values: Represented in microamps
-What: /sys/class/power_supply/<supply_name>/current_max
-Date: October 2010
+What: /sys/class/power_supply/<supply_name>/charge_control_limit_max
+Date: Oct 2012
Contact: linux-pm@vger.kernel.org
Description:
- Reports the maximum IBAT current allowed into the battery.
+ Maximum legal value for the charge_control_limit property.
Access: Read
+
Valid values: Represented in microamps
-What: /sys/class/power_supply/<supply_name>/current_now
-Date: May 2007
+What: /sys/class/power_supply/<supply_name>/charge_control_start_threshold
+Date: April 2019
Contact: linux-pm@vger.kernel.org
Description:
- Reports an instant, single IBAT current reading for the battery.
- This value is not averaged/smoothed.
+ Represents a battery percentage level, below which charging will
+ begin.
- Access: Read
- Valid values: Represented in microamps
+ Access: Read, Write
+ Valid values: 0 - 100 (percent)
+
+What: /sys/class/power_supply/<supply_name>/charge_control_end_threshold
+Date: April 2019
+Contact: linux-pm@vger.kernel.org
+Description:
+ Represents a battery percentage level, above which charging will
+ stop. Not all hardware is capable of setting this to an arbitrary
+ percentage. Drivers will round written values to the nearest
+ supported value. Reading back the value will show the actual
+ threshold set by the driver.
+
+ Access: Read, Write
+
+ Valid values: 0 - 100 (percent)
What: /sys/class/power_supply/<supply_name>/charge_type
Date: July 2009
Contact: linux-pm@vger.kernel.org
Description:
- Represents the type of charging currently being applied to the
- battery.
+ Select the charging algorithm to use for a battery.
+
+ Standard:
+ Fully charge the battery at a moderate rate.
+ Fast:
+ Quickly charge the battery using fast-charge
+ technology. This is typically harder on the battery
+ than standard charging and may lower its lifespan.
+ Trickle:
+ Users who primarily operate the system while
+ plugged into an external power source can extend
+ battery life with this mode. Vendor tooling may
+ call this "Primarily AC Use".
+ Adaptive:
+ Automatically optimize battery charge rate based
+ on typical usage pattern.
+ Custom:
+ Use the charge_control_* properties to determine
+ when to start and stop charging. Advanced users
+ can use this to drastically extend battery life.
+ Long Life:
+ The charger reduces its charging rate in order to
+ prolong the battery health.
+ Bypass:
+ The charger bypasses the charging path around the
+ integrated converter allowing for a "smart" wall
+ adaptor to perform the power conversion externally.
- Access: Read
- Valid values: "Unknown", "N/A", "Trickle", "Fast"
+ Access: Read, Write
+
+ Reading this returns the current active value, e.g. 'Standard'.
+ Check charge_types to get the values supported by the battery.
+
+ Valid values:
+ "Unknown", "N/A", "Trickle", "Fast", "Standard",
+ "Adaptive", "Custom", "Long Life", "Bypass"
+
+What: /sys/class/power_supply/<supply_name>/charge_types
+Date: December 2024
+Contact: linux-pm@vger.kernel.org
+Description:
+ Identical to charge_type but reading returns a list of supported
+ charge-types with the currently active type surrounded by square
+ brackets, e.g.: "Fast [Standard] Long_Life".
+
+ power_supply class devices may support both charge_type and
+ charge_types for backward compatibility. In this case both will
+ always have the same active value and the active value can be
+ changed by writing either property.
+
+ Note charge-types which contain a space such as "Long Life" will
+ have the space replaced by a '_' resulting in e.g. "Long_Life".
+ When writing charge-types both variants are accepted.
What: /sys/class/power_supply/<supply_name>/charge_term_current
Date: July 2014
@@ -132,6 +439,7 @@ Description:
when the battery is considered full and charging should end.
Access: Read
+
Valid values: Represented in microamps
What: /sys/class/power_supply/<supply_name>/health
@@ -142,9 +450,13 @@ Description:
functionality.
Access: Read
- Valid values: "Unknown", "Good", "Overheat", "Dead",
- "Over voltage", "Unspecified failure", "Cold",
- "Watchdog timer expire", "Safety timer expire"
+
+ Valid values:
+ "Unknown", "Good", "Overheat", "Dead",
+ "Over voltage", "Under voltage", "Unspecified failure", "Cold",
+ "Watchdog timer expire", "Safety timer expire",
+ "Over current", "Calibration required", "Warm",
+ "Cool", "Hot", "No battery", "Blown fuse", "Cell imbalance"
What: /sys/class/power_supply/<supply_name>/precharge_current
Date: June 2017
@@ -154,18 +466,24 @@ Description:
for a battery charge cycle.
Access: Read
+
Valid values: Represented in microamps
What: /sys/class/power_supply/<supply_name>/present
Date: May 2007
Contact: linux-pm@vger.kernel.org
Description:
- Reports whether a battery is present or not in the system.
+ Reports whether a battery is present or not in the system. If the
+ property does not exist, the battery is considered to be present.
Access: Read
+
Valid values:
+
+ == =======
0: Absent
1: Present
+ == =======
What: /sys/class/power_supply/<supply_name>/status
Date: May 2007
@@ -176,76 +494,39 @@ Description:
used to enable/disable charging to the battery.
Access: Read, Write
- Valid values: "Unknown", "Charging", "Discharging",
- "Not charging", "Full"
-
-What: /sys/class/power_supply/<supply_name>/technology
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
-Description:
- Describes the battery technology supported by the supply.
-
- Access: Read
- Valid values: "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe",
- "NiCd", "LiMn"
-
-What: /sys/class/power_supply/<supply_name>/temp
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the current TBAT battery temperature reading.
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ Valid values:
+ "Unknown", "Charging", "Discharging",
+ "Not charging", "Full"
-What: /sys/class/power_supply/<supply_name>/temp_alert_max
-Date: July 2012
+What: /sys/class/power_supply/<supply_name>/charge_behaviour
+Date: November 2021
Contact: linux-pm@vger.kernel.org
Description:
- Maximum TBAT temperature trip-wire value where the supply will
- notify user-space of the event. This is normally used for the
- battery charging scenario where user-space needs to know the
- battery temperature has crossed an upper threshold so it can
- take appropriate action (e.g. warning user that battery level is
- critically high, and charging has stopped).
+ Represents the charging behaviour.
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
-
-What: /sys/class/power_supply/<supply_name>/temp_alert_min
-Date: July 2012
-Contact: linux-pm@vger.kernel.org
-Description:
- Minimum TBAT temperature trip-wire value where the supply will
- notify user-space of the event. This is normally used for the
- battery charging scenario where user-space needs to know the
- battery temperature has crossed a lower threshold so it can take
- appropriate action (e.g. warning user that battery level is
- high, and charging current has been reduced accordingly to
- remedy the situation).
+ Access: Read, Write
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ Valid values:
+ ===================== ========================================
+ auto: Charge normally, respect thresholds
+ inhibit-charge: Do not charge while AC is attached
+ inhibit-charge-awake: inhibit-charge only when device is awake
+ force-discharge: Force discharge while AC is attached
+ ===================== ========================================
-What: /sys/class/power_supply/<supply_name>/temp_max
-Date: July 2014
+What: /sys/class/power_supply/<supply_name>/technology
+Date: May 2007
Contact: linux-pm@vger.kernel.org
Description:
- Reports the maximum allowed TBAT battery temperature for
- charging.
+ Describes the battery technology supported by the supply.
Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
-What: /sys/class/power_supply/<supply_name>/temp_min
-Date: July 2014
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the minimum allowed TBAT battery temperature for
- charging.
+ Valid values:
+ "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe",
+ "NiCd", "LiMn"
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
What: /sys/class/power_supply/<supply_name>/voltage_avg,
Date: May 2007
@@ -256,72 +537,60 @@ Description:
which they average readings to smooth out the reported value.
Access: Read
- Valid values: Represented in microvolts
-What: /sys/class/power_supply/<supply_name>/voltage_max,
-Date: January 2008
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the maximum safe VBAT voltage permitted for the battery,
- during charging.
-
- Access: Read
- Valid values: Represented in microvolts
-
-What: /sys/class/power_supply/<supply_name>/voltage_min,
-Date: January 2008
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the minimum safe VBAT voltage permitted for the battery,
- during discharging.
-
- Access: Read
Valid values: Represented in microvolts
-What: /sys/class/power_supply/<supply_name>/voltage_now,
-Date: May 2007
+What: /sys/class/power_supply/<supply_name>/cycle_count
+Date: January 2010
Contact: linux-pm@vger.kernel.org
Description:
- Reports an instant, single VBAT voltage reading for the battery.
- This value is not averaged/smoothed.
+ Reports the number of full charge + discharge cycles the
+ battery has undergone.
Access: Read
- Valid values: Represented in microvolts
-===== USB Properties =====
+ Valid values:
+ Integer > 0: representing full cycles
+ Integer = 0: cycle_count info is not available
-What: /sys/class/power_supply/<supply_name>/current_avg
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
+What: /sys/class/power_supply/<supply_name>/internal_resistance
+Date: August 2025
+Contact: linux-arm-msm@vger.kernel.org
Description:
- Reports an average IBUS current reading over a fixed period.
- Normally devices will provide a fixed interval in which they
- average readings to smooth out the reported value.
+ Represent the battery's internal resistance, often referred
+ to as Equivalent Series Resistance (ESR). It is a dynamic
+ parameter that reflects the opposition to current flow within
+ the cell. It is not a fixed value but varies significantly
+ based on several operational conditions, including battery
+ state of charge (SoC), temperature, and whether the battery
+ is in a charging or discharging state.
Access: Read
- Valid values: Represented in microamps
+ Valid values: Represented in microohms
-What: /sys/class/power_supply/<supply_name>/current_max
-Date: October 2010
-Contact: linux-pm@vger.kernel.org
+What: /sys/class/power_supply/<supply_name>/state_of_health
+Date: August 2025
+Contact: linux-arm-msm@vger.kernel.org
Description:
- Reports the maximum IBUS current the supply can support.
+ The state_of_health parameter quantifies the overall condition
+ of a battery as a percentage, reflecting its ability to deliver
+ rated performance relative to its original specifications. It is
+ dynamically computed using a combination of learned capacity
+ and impedance-based degradation indicators, both of which evolve
+ over the battery's lifecycle.
+ Note that the exact algorithms are kept secret by most battery
+ vendors and the value from different battery vendors cannot be
+ compared with each other as there is no vendor-agnostic definition
+ of "performance". Also this usually cannot be used for any
+ calculations (i.e. this is not the factor between charge_full and
+ charge_full_design).
Access: Read
- Valid values: Represented in microamps
-What: /sys/class/power_supply/<supply_name>/current_now
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the IBUS current supplied now. This value is generally
- read-only reporting, unless the 'online' state of the supply
- is set to be programmable, in which case this value can be set
- within the reported min/max range.
+ Valid values: 0 - 100 (percent)
- Access: Read, Write
- Valid values: Represented in microamps
+**USB Properties**
What: /sys/class/power_supply/<supply_name>/input_current_limit
Date: July 2014
@@ -331,83 +600,63 @@ Description:
supply. Normally this is configured based on the type of
connection made (e.g. A configured SDP should output a maximum
of 500mA so the input current limit is set to the same value).
+ Use preferably input_power_limit, and for problems that can be
+ solved using power limit use input_current_limit.
Access: Read, Write
+
Valid values: Represented in microamps
-What: /sys/class/power_supply/<supply_name>/online,
-Date: May 2007
+What: /sys/class/power_supply/<supply_name>/input_voltage_limit
+Date: May 2019
Contact: linux-pm@vger.kernel.org
Description:
- Indicates if VBUS is present for the supply. When the supply is
- online, and the supply allows it, then it's possible to switch
- between online states (e.g. Fixed -> Programmable for a PD_PPS
- USB supply so voltage and current can be controlled).
+ This entry configures the incoming VBUS voltage limit currently
+ set in the supply. Normally this is configured based on
+ system-level knowledge or user input (e.g. This is part of the
+ Pixel C's thermal management strategy to effectively limit the
+ input power to 5V when the screen is on to meet Google's skin
+ temperature targets). Note that this feature should not be
+ used for safety critical things.
+ Use preferably input_power_limit, and for problems that can be
+ solved using power limit use input_voltage_limit.
Access: Read, Write
- Valid values:
- 0: Offline
- 1: Online Fixed - Fixed Voltage Supply
- 2: Online Programmable - Programmable Voltage Supply
-
-What: /sys/class/power_supply/<supply_name>/temp
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the current supply temperature reading. This would
- normally be the internal temperature of the device itself (e.g
- TJUNC temperature of an IC)
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ Valid values: Represented in microvolts
-What: /sys/class/power_supply/<supply_name>/temp_alert_max
-Date: July 2012
+What: /sys/class/power_supply/<supply_name>/input_power_limit
+Date: May 2019
Contact: linux-pm@vger.kernel.org
Description:
- Maximum supply temperature trip-wire value where the supply will
- notify user-space of the event. This is normally used for the
- charging scenario where user-space needs to know the supply
- temperature has crossed an upper threshold so it can take
- appropriate action (e.g. warning user that the supply
- temperature is critically high, and charging has stopped to
- remedy the situation).
+ This entry configures the incoming power limit currently set
+ in the supply. Normally this is configured based on
+ system-level knowledge or user input. Use preferably this
+ feature to limit the incoming power and use current/voltage
+ limit only for problems that can be solved using power limit.
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
-
-What: /sys/class/power_supply/<supply_name>/temp_alert_min
-Date: July 2012
-Contact: linux-pm@vger.kernel.org
-Description:
- Minimum supply temperature trip-wire value where the supply will
- notify user-space of the event. This is normally used for the
- charging scenario where user-space needs to know the supply
- temperature has crossed a lower threshold so it can take
- appropriate action (e.g. warning user that the supply
- temperature is high, and charging current has been reduced
- accordingly to remedy the situation).
+ Access: Read, Write
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ Valid values: Represented in microwatts
-What: /sys/class/power_supply/<supply_name>/temp_max
-Date: July 2014
+What: /sys/class/power_supply/<supply_name>/online,
+Date: May 2007
Contact: linux-pm@vger.kernel.org
Description:
- Reports the maximum allowed supply temperature for operation.
+ Indicates if VBUS is present for the supply. When the supply is
+ online, and the supply allows it, then it's possible to switch
+ between online states (e.g. Fixed -> Programmable for a PD_PPS
+ USB supply so voltage and current can be controlled).
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ Access: Read, Write
-What: /sys/class/power_supply/<supply_name>/temp_min
-Date: July 2014
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the mainimum allowed supply temperature for operation.
+ Valid values:
- Access: Read
- Valid values: Represented in 1/10 Degrees Celsius
+ == ==================================================
+ 0: Offline
+ 1: Online Fixed - Fixed Voltage Supply
+ 2: Online Programmable - Programmable Voltage Supply
+ == ==================================================
What: /sys/class/power_supply/<supply_name>/usb_type
Date: March 2018
@@ -417,41 +666,18 @@ Description:
the supply, for example it can show if USB-PD capable source
is attached.
- Access: Read-Only
- Valid values: "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
- "PD_DRP", "PD_PPS", "BrickID"
-
-What: /sys/class/power_supply/<supply_name>/voltage_max
-Date: January 2008
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the maximum VBUS voltage the supply can support.
-
- Access: Read
- Valid values: Represented in microvolts
-
-What: /sys/class/power_supply/<supply_name>/voltage_min
-Date: January 2008
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the minimum VBUS voltage the supply can support.
+ Access: For power-supplies which consume USB power such
+ as battery charger chips, this indicates the type of
+ the connected USB power source and is Read-Only.
- Access: Read
- Valid values: Represented in microvolts
-
-What: /sys/class/power_supply/<supply_name>/voltage_now
-Date: May 2007
-Contact: linux-pm@vger.kernel.org
-Description:
- Reports the VBUS voltage supplied now. This value is generally
- read-only reporting, unless the 'online' state of the supply
- is set to be programmable, in which case this value can be set
- within the reported min/max range.
+ For power-supplies which act as a USB power-source such as
+ e.g. the UCS1002 USB Port Power Controller this is writable.
- Access: Read, Write
- Valid values: Represented in microvolts
+ Valid values:
+ "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
+ "PD_DRP", "PD_PPS", "BrickID"
-===== Device Specific Properties =====
+**Device Specific Properties**
What: /sys/class/power/ds2760-battery.*/charge_now
Date: May 2010
@@ -485,6 +711,7 @@ Description:
will drop to 0 A) and will trigger interrupt.
Valid values:
+
- 5, 6 or 7 (hours),
- 0: disabled.
@@ -499,6 +726,7 @@ Description:
will drop to 0 A) and will trigger interrupt.
Valid values:
+
- 4 - 16 (hours), step by 2 (rounded down)
- 0: disabled.
@@ -513,6 +741,7 @@ Description:
interrupt and start top-off charging mode.
Valid values:
+
- 100000 - 200000 (microamps), step by 25000 (rounded down)
- 200000 - 350000 (microamps), step by 50000 (rounded down)
- 0: disabled.
@@ -528,6 +757,7 @@ Description:
will drop to 0 A) and will trigger interrupt.
Valid values:
+
- 0 - 70 (minutes), step by 10 (rounded down)
What: /sys/class/power_supply/bq24257-charger/ovp_voltage
@@ -541,6 +771,7 @@ Description:
device datasheet for details.
Valid values:
+
- 6000000, 6500000, 7000000, 8000000, 9000000, 9500000, 10000000,
10500000 (all uV)
@@ -556,6 +787,7 @@ Description:
lower than the set value. See device datasheet for details.
Valid values:
+
- 4200000, 4280000, 4360000, 4440000, 4520000, 4600000, 4680000,
4760000 (all uV)
@@ -570,6 +802,7 @@ Description:
the charger operates normally. See device datasheet for details.
Valid values:
+
- 1: enabled
- 0: disabled
@@ -585,5 +818,88 @@ Description:
from the system. See device datasheet for details.
Valid values:
+
- 1: enabled
- 0: disabled
+
+What: /sys/class/power_supply/<supply_name>/manufacture_year
+Date: January 2020
+Contact: linux-pm@vger.kernel.org
+Description:
+ Reports the year (following Gregorian calendar) when the device has been
+ manufactured.
+
+ Access: Read
+
+ Valid values: Reported as integer
+
+What: /sys/class/power_supply/<supply_name>/manufacture_month
+Date: January 2020
+Contact: linux-pm@vger.kernel.org
+Description:
+ Reports the month when the device has been manufactured.
+
+ Access: Read
+
+ Valid values: 1-12
+
+What: /sys/class/power_supply/<supply_name>/manufacture_day
+Date: January 2020
+Contact: linux-pm@vger.kernel.org
+Description:
+ Reports the day of month when the device has been manufactured.
+
+ Access: Read
+ Valid values: 1-31
+
+What: /sys/class/power_supply/<supply_name>/extensions/<extension_name>
+Date: March 2025
+Contact: linux-pm@vger.kernel.org
+Description:
+ Reports the extensions registered to the power supply.
+ Each entry is a link to the device which registered the extension.
+
+ Access: Read
+
+What: /sys/class/power_supply/max8971-charger/fast_charge_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in fast-charge mode. When the timer expires
+ the device will terminate fast-charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 4 - 10 (hours), step by 1
+ - 0: disabled.
+
+What: /sys/class/power_supply/max8971-charger/top_off_threshold_current
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the charging current threshold for
+ entering top-off charging mode. When charging current in fast
+ charge mode drops below this value, the charger will trigger
+ interrupt and start top-off charging mode.
+
+ Valid values:
+
+ - 50000 - 200000 (microamps), step by 50000 (rounded down)
+
+What: /sys/class/power_supply/max8971-charger/top_off_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in top-off charge mode. When the timer expires
+ the device will terminate top-off charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 0 - 70 (minutes), step by 10 (rounded down)
diff --git a/Documentation/ABI/testing/sysfs-class-power-gaokun b/Documentation/ABI/testing/sysfs-class-power-gaokun
new file mode 100644
index 000000000000..0633aed7b355
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-gaokun
@@ -0,0 +1,27 @@
+What: /sys/class/power_supply/gaokun-ec-battery/smart_charge_delay
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows configuration of smart charging delay.
+
+ Smart charging behavior: when the power adapter is connected
+ for delay hours, battery charging will follow the rules of
+ charge_control_start_threshold and charge_control_end_threshold.
+ For more information about charge control, please refer to
+ sysfs-class-power.
+
+ Access: Read, Write
+
+ Valid values: In hours (non-negative)
+
+What: /sys/class/power_supply/gaokun-ec-battery/battery_adaptive_charge
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows enabling battery adaptive charging.
+
+ Access: Read, Write
+
+ Valid values: 0 (disabled) or 1 (enabled)
diff --git a/Documentation/ABI/testing/sysfs-class-power-ltc4162l b/Documentation/ABI/testing/sysfs-class-power-ltc4162l
new file mode 100644
index 000000000000..ba30db93052b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-ltc4162l
@@ -0,0 +1,82 @@
+What: /sys/class/power_supply/ltc4162-l/charge_status
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ Detailed charge status information as reported by the chip.
+
+ Access: Read
+
+ Valid values:
+ ilim_reg_active
+ thermal_reg_active
+ vin_uvcl_active
+ iin_limit_active
+ constant_current
+ constant_voltage
+ charger_off
+
+What: /sys/class/power_supply/ltc4162-l/ibat
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ Battery input current as measured by the charger. Negative value
+ means that the battery is discharging.
+
+ Access: Read
+
+ Valid values: Signed value in microamps
+
+What: /sys/class/power_supply/ltc4162-l/vbat
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ Battery voltage as measured by the charger.
+
+ Access: Read
+
+ Valid values: In microvolts
+
+What: /sys/class/power_supply/ltc4162-l/vbat_avg
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ Battery voltage, averaged over time, as measured by the charger.
+
+ Access: Read
+
+ Valid values: In microvolts
+
+What: /sys/class/power_supply/ltc4162-l/force_telemetry
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ To save battery current, the measurement system is disabled if
+ the battery is the only source of power. This affects all
+ voltage, current and temperature measurements.
+ Write a "1" to this to keep performing telemetry once every few
+ seconds, even when running on battery (as reported by the online
+ property, which is "1" when external power is available and "0"
+ when the system runs on battery).
+
+ Access: Read, Write
+
+ Valid values: 0 (disabled) or 1 (enabled)
+
+What: /sys/class/power_supply/ltc4162-l/arm_ship_mode
+Date: Januari 2021
+KernelVersion: 5.11
+Description:
+ The charger will normally drain the battery while inactive,
+ typically drawing about 54 microamps. Write a "1" to this
+ property to arm a special "ship" mode that extends shelf life
+ by reducing the leakage to about 2.8 microamps. The chip will
+ remain in this mode (and no longer respond to I2C commands)
+ until some external power-supply is attached raising the input
+ voltage above 1V. It will then automatically revert to "0".
+ Writing a "0" to the property cancels the "ship" mode request.
+ The ship mode, when armed, activates once the input voltage
+ drops below 1V.
+
+ Access: Read, Write
+
+ Valid values: 0 (disable) or 1 (enable)
diff --git a/Documentation/ABI/testing/sysfs-class-power-max1720x b/Documentation/ABI/testing/sysfs-class-power-max1720x
new file mode 100644
index 000000000000..7d895bfda9ce
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-max1720x
@@ -0,0 +1,32 @@
+What: /sys/class/power_supply/max1720x/temp_ain1
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from AIN1 thermistor.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/max1720x/temp_ain2
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from AIN2 thermistor.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/max1720x/temp_int
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from internal die.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
diff --git a/Documentation/ABI/testing/sysfs-class-power-mp2629 b/Documentation/ABI/testing/sysfs-class-power-mp2629
new file mode 100644
index 000000000000..914d67caac0d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-mp2629
@@ -0,0 +1,9 @@
+What: /sys/class/power_supply/mp2629_battery/batt_impedance_compen
+Date: April 2020
+KernelVersion: 5.7
+Description:
+ Represents a battery impedance compensation to accelerate charging.
+
+ Access: Read, Write
+
+ Valid values: Represented in milli-ohms. Valid range is [0, 140].
diff --git a/Documentation/ABI/testing/sysfs-class-power-rt9467 b/Documentation/ABI/testing/sysfs-class-power-rt9467
new file mode 100644
index 000000000000..619b7c45d145
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-rt9467
@@ -0,0 +1,19 @@
+What: /sys/class/power_supply/rt9467-*/sysoff_enable
+Date: Feb 2023
+KernelVersion: 6.3
+Contact: ChiaEn Wu <chiaen_wu@richtek.com>
+Description:
+ This entry allows enabling the sysoff mode of rt9467 charger
+ devices.
+ If enabled and the input is removed, the internal battery FET
+ is turned off to reduce the leakage from the BAT pin. See
+ device datasheet for details. It's commonly used when the
+ product enter shipping stage. After entering shipping mode,
+ only 'VBUS' or 'Power key" pressed can make it leave this mode.
+ 'Disable' also can help to leave it, but it's more like to
+ abort the action before the device really enter shipping mode.
+
+ Access: Read, Write
+ Valid values:
+ - 1: enabled
+ - 0: disabled
diff --git a/Documentation/ABI/testing/sysfs-class-power-rt9471 b/Documentation/ABI/testing/sysfs-class-power-rt9471
new file mode 100644
index 000000000000..0a390ee5ac21
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-rt9471
@@ -0,0 +1,32 @@
+What: /sys/class/power_supply/rt9471-*/sysoff_enable
+Date: Feb 2023
+KernelVersion: 6.3
+Contact: ChiYuan Huang <cy_huang@richtek.com>
+Description:
+ This entry allows enabling the sysoff mode of rt9471 charger devices.
+ If enabled and the input is removed, the internal battery FET is turned
+ off to reduce the leakage from the BAT pin. See device datasheet for details.
+ It's commonly used when the product enter shipping stage. After entering
+ shipping mode, only 'VBUS' or 'Power key" pressed can make it leave this
+ mode. 'Disable' also can help to leave it, but it's more like to abort
+ the action before the device really enter shipping mode.
+
+ Access: Read, Write
+ Valid values:
+ - 1: enabled
+ - 0: disabled
+
+What: /sys/class/power_supply/rt9471-*/port_detect_enable
+Date: Feb 2023
+KernelVersion: 6.3
+Contact: ChiYuan Huang <cy_huang@richtek.com>
+Description:
+ This entry allows enabling the USB BC12 port detect function of rt9471 charger
+ devices. If enabled and VBUS is inserted, device will start to do the BC12
+ port detect and report the usb port type when port detect is done. See
+ datasheet for details. Normally controlled when TypeC/USBPD port integrated.
+
+ Access: Read, Write
+ Valid values:
+ - 1: enabled
+ - 0: disabled
diff --git a/Documentation/ABI/testing/sysfs-class-power-surface b/Documentation/ABI/testing/sysfs-class-power-surface
new file mode 100644
index 000000000000..79cde4dcf2f5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-surface
@@ -0,0 +1,15 @@
+What: /sys/class/power_supply/<supply_name>/alarm
+Date: April 2021
+KernelVersion: 5.13
+Contact: Maximilian Luz <luzmaximilian@gmail.com>
+Description:
+ Battery trip point. When the remaining battery capacity crosses this
+ value in either direction, the system will be notified and if
+ necessary woken.
+
+ Set to zero to clear/disable.
+
+ Access: Read, Write
+
+ Valid values: In micro-Wh or micro-Ah, depending on the power unit
+ of the battery
diff --git a/Documentation/ABI/testing/sysfs-class-power-twl4030 b/Documentation/ABI/testing/sysfs-class-power-twl4030
index b4fd32d210c5..b52f7023f8ba 100644
--- a/Documentation/ABI/testing/sysfs-class-power-twl4030
+++ b/Documentation/ABI/testing/sysfs-class-power-twl4030
@@ -4,18 +4,20 @@ Description:
Writing to this can disable charging.
Possible values are:
- "auto" - draw power as appropriate for detected
- power source and battery status.
- "off" - do not draw any power.
- "continuous"
- - activate mode described as "linear" in
- TWL data sheets. This uses whatever
- current is available and doesn't switch off
- when voltage drops.
- This is useful for unstable power sources
- such as bicycle dynamo, but care should
- be taken that battery is not over-charged.
+ ============= ===========================================
+ "auto" draw power as appropriate for detected
+ power source and battery status.
+ "off" do not draw any power.
+ "continuous" activate mode described as "linear" in
+ TWL data sheets. This uses whatever
+ current is available and doesn't switch off
+ when voltage drops.
+
+ This is useful for unstable power sources
+ such as bicycle dynamo, but care should
+ be taken that battery is not over-charged.
+ ============= ===========================================
What: /sys/class/power_supply/twl4030_ac/mode
Description:
@@ -23,6 +25,9 @@ Description:
Writing to this can disable charging.
Possible values are:
- "auto" - draw power as appropriate for detected
- power source and battery status.
- "off" - do not draw any power.
+
+ ====== ===========================================
+ "auto" draw power as appropriate for detected
+ power source and battery status.
+ "off" do not draw any power.
+ ====== ===========================================
diff --git a/Documentation/ABI/testing/sysfs-class-power-wilco b/Documentation/ABI/testing/sysfs-class-power-wilco
new file mode 100644
index 000000000000..083c4641b4c4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-wilco
@@ -0,0 +1,40 @@
+What: /sys/class/power_supply/wilco-charger/charge_type
+Date: April 2019
+KernelVersion: 5.2
+Description:
+ What charging algorithm to use:
+
+ Standard:
+ Fully charges battery at a standard rate.
+ Adaptive:
+ Battery settings adaptively optimized based on
+ typical battery usage pattern.
+ Fast:
+ Battery charges over a shorter period.
+ Trickle:
+ Extends battery lifespan, intended for users who
+ primarily use their Chromebook while connected to AC.
+ Custom:
+ A low and high threshold percentage is specified.
+ Charging begins when level drops below
+ charge_control_start_threshold, and ceases when
+ level is above charge_control_end_threshold.
+ Long Life:
+ Customized charge rate for last longer battery life.
+ On Wilco device this mode is pre-configured in the factory
+ through EC's private PID. Switching to a different mode will
+ be denied by Wilco EC when Long Life mode is enabled.
+
+What: /sys/class/power_supply/wilco-charger/charge_control_start_threshold
+Date: April 2019
+KernelVersion: 5.2
+Description:
+ Used when charge_type="Custom", as described above. Measured in
+ percentages. The valid range is [50, 95].
+
+What: /sys/class/power_supply/wilco-charger/charge_control_end_threshold
+Date: April 2019
+KernelVersion: 5.2
+Description:
+ Used when charge_type="Custom", as described above. Measured in
+ percentages. The valid range is [55, 100].
diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap
index db3b3ff70d84..ca491ec4e693 100644
--- a/Documentation/ABI/testing/sysfs-class-powercap
+++ b/Documentation/ABI/testing/sysfs-class-powercap
@@ -5,7 +5,7 @@ Contact: linux-pm@vger.kernel.org
Description:
The powercap/ class sub directory belongs to the power cap
subsystem. Refer to
- Documentation/power/powercap/powercap.txt for details.
+ Documentation/power/powercap/powercap.rst for details.
What: /sys/class/powercap/<control type>
Date: September 2013
@@ -147,6 +147,6 @@ What: /sys/class/powercap/.../<power zone>/enabled
Date: September 2013
KernelVersion: 3.13
Contact: linux-pm@vger.kernel.org
-Description
+Description:
This allows to enable/disable power capping at power zone level.
This applies to current power zone and its children.
diff --git a/Documentation/ABI/testing/sysfs-class-pwm b/Documentation/ABI/testing/sysfs-class-pwm
index c20e61354561..0638c94d01ef 100644
--- a/Documentation/ABI/testing/sysfs-class-pwm
+++ b/Documentation/ABI/testing/sysfs-class-pwm
@@ -7,7 +7,7 @@ Description:
Framework and provides a sysfs interface for using PWM
channels.
-What: /sys/class/pwm/pwmchipN/
+What: /sys/class/pwm/pwmchip<N>/
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -16,14 +16,14 @@ Description:
probed PWM controller/chip where N is the base of the
PWM chip.
-What: /sys/class/pwm/pwmchipN/npwm
+What: /sys/class/pwm/pwmchip<N>/npwm
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
Description:
The number of PWM channels supported by the PWM chip.
-What: /sys/class/pwm/pwmchipN/export
+What: /sys/class/pwm/pwmchip<N>/export
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -31,14 +31,14 @@ Description:
Exports a PWM channel from the PWM chip for sysfs control.
Value is between 0 and /sys/class/pwm/pwmchipN/npwm - 1.
-What: /sys/class/pwm/pwmchipN/unexport
+What: /sys/class/pwm/pwmchip<N>/unexport
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
Description:
Unexports a PWM channel.
-What: /sys/class/pwm/pwmchipN/pwmX
+What: /sys/class/pwm/pwmchip<N>/pwmX
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -47,21 +47,21 @@ Description:
each exported PWM channel where X is the exported PWM
channel number.
-What: /sys/class/pwm/pwmchipN/pwmX/period
+What: /sys/class/pwm/pwmchip<N>/pwmX/period
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
Description:
Sets the PWM signal period in nanoseconds.
-What: /sys/class/pwm/pwmchipN/pwmX/duty_cycle
+What: /sys/class/pwm/pwmchip<N>/pwmX/duty_cycle
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
Description:
Sets the PWM signal duty cycle in nanoseconds.
-What: /sys/class/pwm/pwmchipN/pwmX/polarity
+What: /sys/class/pwm/pwmchip<N>/pwmX/polarity
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -69,7 +69,7 @@ Description:
Sets the output polarity of the PWM signal to "normal" or
"inversed".
-What: /sys/class/pwm/pwmchipN/pwmX/enable
+What: /sys/class/pwm/pwmchip<N>/pwmX/enable
Date: May 2013
KernelVersion: 3.11
Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -78,10 +78,10 @@ Description:
0 is disabled
1 is enabled
-What: /sys/class/pwm/pwmchipN/pwmX/capture
+What: /sys/class/pwm/pwmchip<N>/pwmX/capture
Date: June 2016
KernelVersion: 4.8
-Contact: Lee Jones <lee.jones@linaro.org>
+Contact: Lee Jones <lee@kernel.org>
Description:
Capture information about a PWM signal. The output format is a
pair unsigned integers (period and duty cycle), separated by a
diff --git a/Documentation/ABI/testing/sysfs-class-rapidio b/Documentation/ABI/testing/sysfs-class-rapidio
index 8716beeb16c1..81e09145525a 100644
--- a/Documentation/ABI/testing/sysfs-class-rapidio
+++ b/Documentation/ABI/testing/sysfs-class-rapidio
@@ -6,50 +6,54 @@ Description:
The /sys/class/rapidio_port subdirectory contains individual
subdirectories named as "rapidioN" where N = mport ID registered
with RapidIO subsystem.
+
NOTE: An mport ID is not a RapidIO destination ID assigned to a
given local mport device.
-What: /sys/class/rapidio_port/rapidioN/sys_size
+What: /sys/class/rapidio_port/rapidio<N>/sys_size
Date: Apr, 2014
KernelVersion: v3.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
Alexandre Bounine <alexandre.bounine@idt.com>
Description:
(RO) reports RapidIO common transport system size:
+
0 = small (8-bit destination ID, max. 256 devices),
+
1 = large (16-bit destination ID, max. 65536 devices).
-What: /sys/class/rapidio_port/rapidioN/port_destid
+What: /sys/class/rapidio_port/rapidio<N>/port_destid
Date: Apr, 2014
KernelVersion: v3.15
Contact: Matt Porter <mporter@kernel.crashing.org>,
Alexandre Bounine <alexandre.bounine@idt.com>
Description:
- (RO) reports RapidIO destination ID assigned to the given
- RapidIO mport device. If value 0xFFFFFFFF is returned this means
- that no valid destination ID have been assigned to the mport
- (yet). Normally, before enumeration/discovery have been executed
- only fabric enumerating mports have a valid destination ID
- assigned to them using "hdid=..." rapidio module parameter.
+
+(RO) reports RapidIO destination ID assigned to the given
+RapidIO mport device. If value 0xFFFFFFFF is returned this means
+that no valid destination ID have been assigned to the mport
+(yet). Normally, before enumeration/discovery have been executed
+only fabric enumerating mports have a valid destination ID
+assigned to them using "hdid=..." rapidio module parameter.
After enumeration or discovery was performed for a given mport device,
the corresponding subdirectory will also contain subdirectories for each
child RapidIO device connected to the mport.
The example below shows mport device subdirectory with several child RapidIO
-devices attached to it.
-
-[rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l
-total 0
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003
-drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005
-lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0
--r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid
-drwxr-xr-x 2 root root 0 Feb 11 15:11 power
-lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port
--r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size
--rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent
+devices attached to it::
+
+ [rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l
+ total 0
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003
+ drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005
+ lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0
+ -r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid
+ drwxr-xr-x 2 root root 0 Feb 11 15:11 power
+ lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port
+ -r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size
+ -rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent
diff --git a/Documentation/ABI/testing/sysfs-class-rc b/Documentation/ABI/testing/sysfs-class-rc
index 6c0d6c8cb911..84e46d70d82b 100644
--- a/Documentation/ABI/testing/sysfs-class-rc
+++ b/Documentation/ABI/testing/sysfs-class-rc
@@ -7,7 +7,7 @@ Description:
core and provides a sysfs interface for configuring infrared
remote controller receivers.
-What: /sys/class/rc/rcN/
+What: /sys/class/rc/rc<N>/
Date: Apr 2010
KernelVersion: 2.6.35
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
@@ -15,38 +15,47 @@ Description:
A /sys/class/rc/rcN directory is created for each remote
control receiver device where N is the number of the receiver.
-What: /sys/class/rc/rcN/protocols
+What: /sys/class/rc/rc<N>/protocols
Date: Jun 2010
KernelVersion: 2.6.36
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Description:
Reading this file returns a list of available protocols,
- something like:
+ something like::
+
"rc5 [rc6] nec jvc [sony]"
+
Enabled protocols are shown in [] brackets.
+
Writing "+proto" will add a protocol to the list of enabled
protocols.
+
Writing "-proto" will remove a protocol from the list of enabled
protocols.
+
Writing "proto" will enable only "proto".
+
Writing "none" will disable all protocols.
+
Write fails with EINVAL if an invalid protocol combination or
unknown protocol name is used.
-What: /sys/class/rc/rcN/filter
+What: /sys/class/rc/rc<N>/filter
Date: Jan 2014
KernelVersion: 3.15
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Description:
Sets the scancode filter expected value.
+
Use in combination with /sys/class/rc/rcN/filter_mask to set the
expected value of the bits set in the filter mask.
If the hardware supports it then scancodes which do not match
the filter will be ignored. Otherwise the write will fail with
an error.
+
This value may be reset to 0 if the current protocol is altered.
-What: /sys/class/rc/rcN/filter_mask
+What: /sys/class/rc/rc<N>/filter_mask
Date: Jan 2014
KernelVersion: 3.15
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
@@ -56,56 +65,73 @@ Description:
of the scancode which should be compared against the expected
value. A value of 0 disables the filter to allow all valid
scancodes to be processed.
+
If the hardware supports it then scancodes which do not match
the filter will be ignored. Otherwise the write will fail with
an error.
+
This value may be reset to 0 if the current protocol is altered.
-What: /sys/class/rc/rcN/wakeup_protocols
+What: /sys/class/rc/rc<N>/wakeup_protocols
Date: Feb 2017
KernelVersion: 4.11
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Description:
Reading this file returns a list of available protocols to use
- for the wakeup filter, something like:
+ for the wakeup filter, something like::
+
"rc-5 nec nec-x rc-6-0 rc-6-6a-24 [rc-6-6a-32] rc-6-mce"
+
Note that protocol variants are listed, so "nec", "sony",
"rc-5", "rc-6" have their different bit length encodings
listed if available.
+
The enabled wakeup protocol is shown in [] brackets.
+
Only one protocol can be selected at a time.
+
Writing "proto" will use "proto" for wakeup events.
+
Writing "none" will disable wakeup.
+
Write fails with EINVAL if an invalid protocol combination or
unknown protocol name is used, or if wakeup is not supported by
the hardware.
-What: /sys/class/rc/rcN/wakeup_filter
+What: /sys/class/rc/rc<N>/wakeup_filter
Date: Jan 2014
KernelVersion: 3.15
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Description:
Sets the scancode wakeup filter expected value.
+
Use in combination with /sys/class/rc/rcN/wakeup_filter_mask to
set the expected value of the bits set in the wakeup filter mask
to trigger a system wake event.
+
If the hardware supports it and wakeup_filter_mask is not 0 then
scancodes which match the filter will wake the system from e.g.
suspend to RAM or power off.
+
Otherwise the write will fail with an error.
+
This value may be reset to 0 if the wakeup protocol is altered.
-What: /sys/class/rc/rcN/wakeup_filter_mask
+What: /sys/class/rc/rc<N>/wakeup_filter_mask
Date: Jan 2014
KernelVersion: 3.15
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Description:
Sets the scancode wakeup filter mask of bits to compare.
+
Use in combination with /sys/class/rc/rcN/wakeup_filter to set
the bits of the scancode which should be compared against the
expected value to trigger a system wake event.
+
If the hardware supports it and wakeup_filter_mask is not 0 then
scancodes which match the filter will wake the system from e.g.
suspend to RAM or power off.
+
Otherwise the write will fail with an error.
+
This value may be reset to 0 if the wakeup protocol is altered.
diff --git a/Documentation/ABI/testing/sysfs-class-rc-nuvoton b/Documentation/ABI/testing/sysfs-class-rc-nuvoton
index d3abe45f8690..f7bad8ecd08f 100644
--- a/Documentation/ABI/testing/sysfs-class-rc-nuvoton
+++ b/Documentation/ABI/testing/sysfs-class-rc-nuvoton
@@ -1,4 +1,4 @@
-What: /sys/class/rc/rcN/wakeup_data
+What: /sys/class/rc/rc<N>/wakeup_data
Date: Mar 2016
KernelVersion: 4.6
Contact: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator
index bc578bc60628..475b9a372657 100644
--- a/Documentation/ABI/testing/sysfs-class-regulator
+++ b/Documentation/ABI/testing/sysfs-class-regulator
@@ -35,13 +35,13 @@ Description:
This will be one of the following strings:
- off
- on
- error
- fast
- normal
- idle
- standby
+ - off
+ - on
+ - error
+ - fast
+ - normal
+ - idle
+ - standby
"off" means the regulator is not supplying power to the
system.
@@ -74,9 +74,9 @@ Description:
This will be one of the following strings:
- 'voltage'
- 'current'
- 'unknown'
+ - 'voltage'
+ - 'current'
+ - 'unknown'
'voltage' means the regulator output voltage can be controlled
by software.
@@ -129,11 +129,11 @@ Description:
The opmode value can be one of the following strings:
- 'fast'
- 'normal'
- 'idle'
- 'standby'
- 'unknown'
+ - 'fast'
+ - 'normal'
+ - 'idle'
+ - 'standby'
+ - 'unknown'
The modes are described in include/linux/regulator/consumer.h
@@ -360,9 +360,9 @@ Description:
This will be one of the following strings:
- 'enabled'
- 'disabled'
- 'unknown'
+ - 'enabled'
+ - 'disabled'
+ - 'unknown'
'enabled' means the regulator is in bypass mode.
@@ -370,3 +370,84 @@ Description:
'unknown' means software cannot determine the state, or
the reported state is invalid.
+
+What: /sys/class/regulator/.../under_voltage
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ under_voltage. This indicates if the device reports an
+ under-voltage fault (1) or not (0).
+
+What: /sys/class/regulator/.../over_current
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ over_current. This indicates if the device reports an
+ over-current fault (1) or not (0).
+
+What: /sys/class/regulator/.../regulation_out
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ regulation_out. This indicates if the device reports an
+ out-of-regulation fault (1) or not (0).
+
+What: /sys/class/regulator/.../fail
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ fail. This indicates if the device reports an output failure
+ (1) or not (0).
+
+What: /sys/class/regulator/.../over_temp
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ over_temp. This indicates if the device reports an
+ over-temperature fault (1) or not (0).
+
+What: /sys/class/regulator/.../under_voltage_warn
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ under_voltage_warn. This indicates if the device reports an
+ under-voltage warning (1) or not (0).
+
+What: /sys/class/regulator/.../over_current_warn
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ over_current_warn. This indicates if the device reports an
+ over-current warning (1) or not (0).
+
+What: /sys/class/regulator/.../over_voltage_warn
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ over_voltage_warn. This indicates if the device reports an
+ over-voltage warning (1) or not (0).
+
+What: /sys/class/regulator/.../over_temp_warn
+Date: April 2022
+KernelVersion: 5.18
+Contact: Zev Weiss <zev@bewilderbeest.net>
+Description:
+ Some regulator directories will contain a field called
+ over_temp_warn. This indicates if the device reports an
+ over-temperature warning (1) or not (0).
diff --git a/Documentation/ABI/testing/sysfs-class-remoteproc b/Documentation/ABI/testing/sysfs-class-remoteproc
index c3afe9fab646..b2b8e2db2503 100644
--- a/Documentation/ABI/testing/sysfs-class-remoteproc
+++ b/Documentation/ABI/testing/sysfs-class-remoteproc
@@ -16,11 +16,11 @@ Description: Remote processor state
Reports the state of the remote processor, which will be one of:
- "offline"
- "suspended"
- "running"
- "crashed"
- "invalid"
+ - "offline"
+ - "suspended"
+ - "running"
+ - "crashed"
+ - "invalid"
"offline" means the remote processor is powered off.
@@ -38,8 +38,8 @@ Description: Remote processor state
Writing this file controls the state of the remote processor.
The following states can be written:
- "start"
- "stop"
+ - "start"
+ - "stop"
Writing "start" will attempt to start the processor running the
firmware indicated by, or written to,
@@ -48,3 +48,57 @@ Description: Remote processor state
Writing "stop" will attempt to halt the remote processor and
return it to the "offline" state.
+
+What: /sys/class/remoteproc/.../name
+Date: August 2019
+KernelVersion: 5.4
+Contact: Suman Anna <s-anna@ti.com>
+Description: Remote processor name
+
+ Reports the name of the remote processor. This can be used by
+ userspace in exactly identifying a remote processor and ease
+ up the usage in modifying the 'firmware' or 'state' files.
+
+What: /sys/class/remoteproc/.../coredump
+Date: July 2020
+Contact: Bjorn Andersson <bjorn.andersson@linaro.org>, Ohad Ben-Cohen <ohad@wizery.com>
+Description: Remote processor coredump configuration
+
+ Reports the coredump configuration of the remote processor,
+ which will be one of:
+
+ "disabled"
+ "enabled"
+ "inline"
+
+ "disabled" means no dump will be collected.
+
+ "enabled" means when the remote processor's coredump is
+ collected it will be copied to a separate buffer and that
+ buffer is exposed to userspace.
+
+ "inline" means when the remote processor's coredump is
+ collected userspace will directly read from the remote
+ processor's device memory. Extra buffer will not be used to
+ copy the dump. Also recovery process will not proceed until
+ all data is read by userspace.
+
+What: /sys/class/remoteproc/.../recovery
+Date: July 2020
+Contact: Bjorn Andersson <bjorn.andersson@linaro.org>, Ohad Ben-Cohen <ohad@wizery.com>
+Description: Remote processor recovery mechanism
+
+ Reports the recovery mechanism of the remote processor,
+ which will be one of:
+
+ "enabled"
+ "disabled"
+
+ "enabled" means, the remote processor will be automatically
+ recovered whenever it crashes. Moreover, if the remote
+ processor crashes while recovery is disabled, it will
+ be automatically recovered too as soon as recovery is enabled.
+
+ "disabled" means, a remote processor will remain in a crashed
+ state if it crashes. This is useful for debugging purposes;
+ without it, debugging a crash is substantially harder.
diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-client b/Documentation/ABI/testing/sysfs-class-rnbd-client
new file mode 100644
index 000000000000..0b5997ab3365
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rnbd-client
@@ -0,0 +1,133 @@
+What: /sys/class/rnbd-client
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Provide information about RNBD-client.
+ All sysfs files that are not read-only provide the usage information on read:
+
+ Example::
+
+ # cat /sys/class/rnbd-client/ctl/map_device
+
+ > Usage: echo "sessname=<name of the rtrs session> path=<[srcaddr,]dstaddr>
+ > [path=<[srcaddr,]dstaddr>] device_path=<full path on remote side>
+ > [access_mode=<ro|rw|migration>] > map_device
+ >
+ > addr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]
+
+What: /sys/class/rnbd-client/ctl/map_device
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Expected format is the following::
+
+ sessname=<name of the rtrs session>
+ path=<[srcaddr,]dstaddr> [path=<[srcaddr,]dstaddr> ...]
+ device_path=<full path on remote side>
+ [access_mode=<ro|rw|migration>]
+
+ Where:
+
+ sessname:
+ accepts a string not bigger than 256 chars, which identifies
+ a given session on the client and on the server.
+ I.e. "clt_hostname-srv_hostname" could be a natural choice.
+
+ path:
+ describes a connection between the client and the server by
+ specifying destination and, when required, the source address.
+ The addresses are to be provided in the following format::
+
+ ip:<IPv6>
+ ip:<IPv4>
+ gid:<GID>
+
+ for example::
+
+ path=ip:10.0.0.66
+
+ The single addr is treated as the destination.
+ The connection will be established to this server from any client IP address.
+
+ ::
+
+ path=ip:10.0.0.66,ip:10.0.1.66
+
+ First addr is the source address and the second is the destination.
+
+ If multiple "path=" options are specified multiple connection
+ will be established and data will be sent according to
+ the selected multipath policy (see RTRS mp_policy sysfs entry description).
+
+ device_path:
+ Path to the block device on the server side. Path is specified
+ relative to the directory on server side configured in the
+ 'dev_search_path' module parameter of the rnbd_server.
+ The rnbd_server prepends the <device_path> received from client
+ with <dev_search_path> and tries to open the
+ <dev_search_path>/<device_path> block device. On success,
+ a /dev/rnbd<N> device file, a /sys/block/rnbd<N>/
+ directory and an entry in /sys/class/rnbd-client/ctl/devices
+ will be created.
+
+ If 'dev_search_path' contains '%SESSNAME%', then each session can
+ have different devices namespace, e.g. server was configured with
+ the following parameter "dev_search_path=/run/rnbd-devs/%SESSNAME%",
+ client has this string "sessname=blya device_path=sda", then server
+ will try to open: /run/rnbd-devs/blya/sda.
+
+ access_mode:
+ the access_mode parameter specifies if the device is to be
+ mapped as "ro" read-only or "rw" read-write. The server allows
+ a device to be exported in rw mode only once. The "migration"
+ access mode has to be specified if a second mapping in read-write
+ mode is desired.
+
+ By default "rw" is used.
+
+ nr_poll_queues
+ specifies the number of poll-mode queues. If the IO has HIPRI flag,
+ the block-layer will send the IO via the poll-mode queue.
+ For fast network and device the polling is faster than interrupt-base
+ IO handling because it saves time for context switching, switching to
+ another process, handling the interrupt and switching back to the
+ issuing process.
+
+ Set -1 if you want to set it as the number of CPUs
+ By default rnbd client creates only irq-mode queues.
+
+ NOTICE: MUST make a unique session for a device using the poll-mode queues.
+
+ Exit Codes:
+
+ If the device is already mapped it will fail with EEXIST. If the input
+ has an invalid format it will return EINVAL. If the device path cannot
+ be found on the server, it will fail with ENOENT.
+
+ Finding device file after mapping
+ ---------------------------------
+
+ After mapping, the device file can be found by:
+ o The symlink /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name>
+ points to /sys/block/<dev-name>. The last part of the symlink destination
+ is the same as the device name. By extracting the last part of the
+ path the path to the device /dev/<dev-name> can be build.
+
+ * /dev/block/$(cat /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name>/dev)
+
+ How to find the <device_id> of the device is described on the next
+ section.
+
+What: /sys/class/rnbd-client/ctl/devices/
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: For each device mapped on the client a new symbolic link is created as
+ /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name>, which points
+ to the block device created by rnbd (/sys/block/rnbd<N>/).
+ The <device_id> of each device is created as follows:
+
+ - If the 'device_path' provided during mapping contains slashes ("/"),
+ they are replaced by exclamation mark ("!") and used as as the
+ <device_id>. Otherwise, the <device_id> will be the same as the
+ "device_path" provided.
diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-server b/Documentation/ABI/testing/sysfs-class-rnbd-server
new file mode 100644
index 000000000000..6c5996cd7cfb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rnbd-server
@@ -0,0 +1,58 @@
+What: /sys/class/rnbd-server
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: provide information about RNBD-server.
+
+What: /sys/class/rnbd-server/ctl/
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: When a client maps a device, a directory entry with the name of the
+ block device is created under /sys/class/rnbd-server/ctl/devices/.
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/block_dev
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Is a symlink to the sysfs entry of the exported device.
+
+ Example:
+ block_dev -> ../../../../class/block/ram0
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: For each client a particular device is exported to, following directory will be
+ created:
+
+ /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/
+
+ When the device is unmapped by that client, the directory will be removed.
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/read_only
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains '1' if device is mapped read-only, otherwise '0'.
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/mapping_path
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains the relative device path provided by the user during mapping.
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/access_mode
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains the device access mode: ro, rw or migration.
+
+What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/force_close
+Date: Nov 2020
+KernelVersion: 5.10
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Write "1" to the file to close the device on server side. Please
+ note that the client side device will not be closed, read or
+ write to the device will get -ENOTCONN.
diff --git a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
index ec950c93e5c6..ee8ed6494a01 100644
--- a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
+++ b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
@@ -7,6 +7,7 @@ Description: Attribute for calibrating ST-Ericsson AB8500 Real Time Clock
calibrate the AB8500.s 32KHz Real Time Clock.
Every 60 seconds the AB8500 will correct the RTC's value
by adding to it the value of this attribute.
+
The range of the attribute is -127 to +127 in units of
30.5 micro-seconds (half-parts-per-million of the 32KHz clock)
Users: The /vendor/st-ericsson/base_utilities/core/rtc_calibration
diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-client b/Documentation/ABI/testing/sysfs-class-rtrs-client
new file mode 100644
index 000000000000..fecc59d1b96f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rtrs-client
@@ -0,0 +1,146 @@
+What: /sys/class/rtrs-client
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: When a user of RTRS API creates a new session, a directory entry with
+ the name of that session is created under /sys/class/rtrs-client/<session-name>/
+
+What: /sys/class/rtrs-client/<session-name>/add_path
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RW, adds a new path (connection) to an existing session. Expected format is the
+ following::
+
+ <[source addr,]destination addr>
+ *addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]
+
+What: /sys/class/rtrs-client/<session-name>/max_reconnect_attempts
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Maximum number reconnect attempts the client should make before giving up
+ after connection breaks unexpectedly.
+
+What: /sys/class/rtrs-client/<session-name>/mp_policy
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Multipath policy specifies which path should be selected on each IO:
+
+ round-robin (0):
+ select path in per CPU round-robin manner.
+
+ min-inflight (1):
+ select path with minimum inflights.
+
+ min-latency (2):
+ select path with minimum latency.
+
+What: /sys/class/rtrs-client/<session-name>/paths/
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Each path belonging to a given session is listed here by its source and
+ destination address. When a new path is added to a session by writing to
+ the "add_path" entry, a directory <src@dst> is created.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/state
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains "connected" if the session is connected to the peer and fully
+ functional. Otherwise the file contains "disconnected"
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/reconnect
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Write "1" to the file in order to reconnect the path.
+ Operation is blocking and returns 0 if reconnect was successful.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/disconnect
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Write "1" to the file in order to disconnect the path.
+ Operation blocks until RTRS path is disconnected.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/remove_path
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Write "1" to the file in order to disconnected and remove the path
+ from the session. Operation blocks until the path is disconnected
+ and removed from the session.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_name
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the name of HCA the connection established on.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_port
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the port number of active port traffic is going through.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/src_addr
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the source address of the path
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/dst_addr
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the destination address of the path
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/cur_latency
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the latency time calculated by the heart-beat messages.
+ Whenever the client sends heart-beat message, it checks the time gap
+ between sending the heart-beat message and receiving the ACK.
+ This value can be changed regularly.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/stats/reset_all
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RW, Read will return usage help, write 0 will clear all the statistics.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/stats/cpu_migration
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RTRS expects that each HCA IRQ is pinned to a separate CPU. If it's
+ not the case, the processing of an I/O response could be processed on a
+ different CPU than where it was originally submitted. This file shows
+ how many interrupts where generated on a non expected CPU.
+
+ "from:"
+ is the CPU on which the IRQ was expected, but not generated.
+ "to:"
+ is the CPU on which the IRQ was generated, but not expected.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/stats/reconnects
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains 2 unsigned int values, the first one records number of successful
+ reconnects in the path lifetime, the second one records number of failed
+ reconnects in the path lifetime.
+
+What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/stats/rdma
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains statistics regarding rdma operations and inflight operations.
+ The output consists of 6 values::
+
+ <read-count> <read-total-size> <write-count> \
+ <write-total-size> <inflights> <failovered>
diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-server b/Documentation/ABI/testing/sysfs-class-rtrs-server
new file mode 100644
index 000000000000..b08601d80409
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rtrs-server
@@ -0,0 +1,53 @@
+What: /sys/class/rtrs-server
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: When a user of RTRS API creates a new session on a client side, a
+ directory entry with the name of that session is created in here.
+
+What: /sys/class/rtrs-server/<session-name>/paths/
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: When new path is created by writing to "add_path" entry on client side,
+ a directory entry named as <source address>@<destination address> is created
+ on server.
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/disconnect
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: When "1" is written to the file, the RTRS session is being disconnected.
+ Operations is non-blocking and returns control immediately to the caller.
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_name
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the name of HCA the connection established on.
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_port
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the port number of active port traffic is going through.
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/src_addr
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the source address of the path
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/dst_addr
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: RO, Contains the destination address of the path
+
+What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/stats/rdma
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
+Description: Contains statistics regarding rdma operations and inflight operations.
+ The output consists of 5 values:
+ <read-count> <read-total-size> <write-count> <write-total-size> <inflights>
diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host
index bafc59fd7b69..7c98d8f43c45 100644
--- a/Documentation/ABI/testing/sysfs-class-scsi_host
+++ b/Documentation/ABI/testing/sysfs-class-scsi_host
@@ -56,8 +56,9 @@ Description:
management) on top, which makes it match the Windows IRST (Intel
Rapid Storage Technology) driver settings. This setting is also
close to min_power, except that:
+
a) It does not use host-initiated slumber mode, but it does
- allow device-initiated slumber
+ allow device-initiated slumber
b) It does not enable low power device sleep mode (DevSlp).
What: /sys/class/scsi_host/hostX/em_message
@@ -70,8 +71,8 @@ Description:
protocol, writes and reads correspond to the LED message format
as defined in the AHCI spec.
- The user must turn sw_activity (under /sys/block/*/device/) OFF
- it they wish to control the activity LED via the em_message
+ The user must turn sw_activity (under `/sys/block/*/device/`)
+ OFF it they wish to control the activity LED via the em_message
file.
em_message_type: (RO) Displays the current enclosure management
diff --git a/Documentation/ABI/testing/sysfs-class-spi-eeprom b/Documentation/ABI/testing/sysfs-class-spi-eeprom
new file mode 100644
index 000000000000..1ff757982079
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-spi-eeprom
@@ -0,0 +1,19 @@
+What: /sys/class/spi_master/spi<bus>/spi<bus>.<dev>/fram
+Date: June 2021
+KernelVersion: 5.14
+Contact: Jiri Prchal <jiri.prchal@aksignal.cz>
+Description:
+ Contains the FRAM binary data. Same as EEPROM, just another file
+ name to indicate that it employs ferroelectric process.
+ It performs write operations at bus speed - no write delays.
+
+What: /sys/class/spi_master/spi<bus>/spi<bus>.<dev>/sernum
+Date: May 2021
+KernelVersion: 5.14
+Contact: Jiri Prchal <jiri.prchal@aksignal.cz>
+Description:
+ Contains the serial number of the Cypress FRAM (FM25VN) if it is
+ present. It will be displayed as a 8 byte hex string, as read
+ from the device.
+
+ This is a read-only attribute.
diff --git a/Documentation/ABI/testing/sysfs-class-switchtec b/Documentation/ABI/testing/sysfs-class-switchtec
index 48cb4c15e430..76c7a661a595 100644
--- a/Documentation/ABI/testing/sysfs-class-switchtec
+++ b/Documentation/ABI/testing/sysfs-class-switchtec
@@ -1,6 +1,6 @@
switchtec - Microsemi Switchtec PCI Switch Management Endpoint
-For details on this subsystem look at Documentation/switchtec.txt.
+For details on this subsystem look at Documentation/driver-api/switchtec.rst.
What: /sys/class/switchtec
Date: 05-Jan-2017
diff --git a/Documentation/ABI/testing/sysfs-class-tee b/Documentation/ABI/testing/sysfs-class-tee
new file mode 100644
index 000000000000..c9144d16003e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-tee
@@ -0,0 +1,15 @@
+What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
+Date: May 2024
+KernelVersion: 6.10
+Contact: op-tee@lists.trustedfirmware.org
+Description:
+ RPMB frames can be routed to the RPMB device via the
+ user-space daemon tee-supplicant or the RPMB subsystem
+ in the kernel. The value "user" means that the driver
+ will route the RPMB frames via user space. Conversely,
+ "kernel" means that the frames are routed via the RPMB
+ subsystem without assistance from tee-supplicant. It
+ should be assumed that RPMB frames are routed via user
+ space if the variable is absent. The primary purpose
+ of this variable is to let systemd know whether
+ tee-supplicant is needed in the early boot with initramfs.
diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal
new file mode 100644
index 000000000000..968d89e15e8e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-thermal
@@ -0,0 +1,259 @@
+What: /sys/class/thermal/thermal_zoneX/type
+Description:
+ Strings which represent the thermal zone type.
+ This is given by thermal zone driver as part of registration.
+ E.g: "acpitz" indicates it's an ACPI thermal device.
+ In order to keep it consistent with hwmon sys attribute; this
+ should be a short, lowercase string, not containing spaces nor
+ dashes.
+
+ RO, Required
+
+What: /sys/class/thermal/thermal_zoneX/temp
+Description:
+ Current temperature as reported by thermal zone (sensor).
+
+ Unit: millidegree Celsius
+
+ RO, Required
+
+What: /sys/class/thermal/thermal_zoneX/mode
+Description:
+ One of the predefined values in [enabled, disabled].
+ This file gives information about the algorithm that is
+ currently managing the thermal zone. It can be either default
+ kernel based algorithm or user space application.
+
+ enabled
+ enable Kernel Thermal management.
+ disabled
+ Preventing kernel thermal zone driver actions upon
+ trip points so that user application can take full
+ charge of the thermal management.
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/policy
+Description:
+ One of the various thermal governors used for a particular zone.
+
+ RW, Required
+
+What: /sys/class/thermal/thermal_zoneX/available_policies
+Description:
+ Available thermal governors which can be used for a
+ particular zone.
+
+ RO, Required
+
+What: /sys/class/thermal/thermal_zoneX/trip_point_Y_temp
+Description:
+ The temperature above which trip point will be fired.
+
+ Unit: millidegree Celsius
+
+ RO, Optional
+
+What: /sys/class/thermal/thermal_zoneX/trip_point_Y_type
+Description:
+ Strings which indicate the type of the trip point.
+
+ E.g. it can be one of critical, hot, passive, `active[0-*]`
+ for ACPI thermal zone.
+
+ RO, Optional
+
+What: /sys/class/thermal/thermal_zoneX/trip_point_Y_hyst
+Description:
+ The hysteresis value for a trip point, represented as an
+ integer.
+
+ Unit: Celsius
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/cdevY
+Description:
+ Sysfs link to the thermal cooling device node where the sys I/F
+ for cooling device throttling control represents.
+
+ RO, Optional
+
+What: /sys/class/thermal/thermal_zoneX/cdevY_trip_point
+Description:
+ The trip point in this thermal zone which `cdev[0-*]` is
+ associated with; -1 means the cooling device is not
+ associated with any trip point.
+
+ RO, Optional
+
+What: /sys/class/thermal/thermal_zoneX/cdevY_weight
+Description:
+ The influence of `cdev[0-*]` in this thermal zone. This value
+ is relative to the rest of cooling devices in the thermal
+ zone. For example, if a cooling device has a weight double
+ than that of other, it's twice as effective in cooling the
+ thermal zone.
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/emul_temp
+Description:
+ Interface to set the emulated temperature method in thermal zone
+ (sensor). After setting this temperature, the thermal zone may
+ pass this temperature to platform emulation function if
+ registered or cache it locally. This is useful in debugging
+ different temperature threshold and its associated cooling
+ action. This is write only node and writing 0 on this node
+ should disable emulation.
+
+ Unit: millidegree Celsius
+
+ WO, Optional
+
+ WARNING:
+ Be careful while enabling this option on production systems,
+ because userland can easily disable the thermal policy by simply
+ flooding this sysfs node with low temperature values.
+
+
+What: /sys/class/thermal/thermal_zoneX/k_d
+Description:
+ The derivative term of the power allocator governor's PID
+ controller. For more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/k_i
+Description:
+ The integral term of the power allocator governor's PID
+ controller. This term allows the PID controller to compensate
+ for long term drift. For more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/k_po
+Description:
+ The proportional term of the power allocator governor's PID
+ controller during temperature overshoot. Temperature overshoot
+ is when the current temperature is above the "desired
+ temperature" trip point. For more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/k_pu
+Description:
+ The proportional term of the power allocator governor's PID
+ controller during temperature undershoot. Temperature undershoot
+ is when the current temperature is below the "desired
+ temperature" trip point. For more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/integral_cutoff
+Description:
+ Temperature offset from the desired temperature trip point
+ above which the integral term of the power allocator
+ governor's PID controller starts accumulating errors. For
+ example, if integral_cutoff is 0, then the integral term only
+ accumulates error when temperature is above the desired
+ temperature trip point. For more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ Unit: millidegree Celsius
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/slope
+Description:
+ The slope constant used in a linear extrapolation model
+ to determine a hotspot temperature based off the sensor's
+ raw readings. It is up to the device driver to determine
+ the usage of these values.
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/offset
+Description:
+ The offset constant used in a linear extrapolation model
+ to determine a hotspot temperature based off the sensor's
+ raw readings. It is up to the device driver to determine
+ the usage of these values.
+
+ RW, Optional
+
+What: /sys/class/thermal/thermal_zoneX/sustainable_power
+Description:
+ An estimate of the sustained power that can be dissipated by
+ the thermal zone. Used by the power allocator governor. For
+ more information see
+ Documentation/driver-api/thermal/power_allocator.rst
+
+ Unit: milliwatts
+
+ RW, Optional
+
+What: /sys/class/thermal/cooling_deviceX/type
+Description:
+ String which represents the type of device, e.g:
+
+ - for generic ACPI: should be "Fan", "Processor" or "LCD"
+ - for memory controller device on intel_menlow platform:
+ should be "Memory controller".
+
+ RO, Required
+
+What: /sys/class/thermal/cooling_deviceX/max_state
+Description:
+ The maximum permissible cooling state of this cooling device.
+
+ RO, Required
+
+What: /sys/class/thermal/cooling_deviceX/cur_state
+Description:
+ The current cooling state of this cooling device.
+ The value can any integer numbers between 0 and max_state:
+
+ - cur_state == 0 means no cooling
+ - cur_state == max_state means the maximum cooling.
+
+ RW, Required
+
+What: /sys/class/thermal/cooling_deviceX/stats/reset
+Description:
+ Writing any value resets the cooling device's statistics.
+
+ WO, Required
+
+What: /sys/class/thermal/cooling_deviceX/stats/time_in_state_ms:
+Description:
+ The amount of time spent by the cooling device in various
+ cooling states. The output will have "<state> <time>" pair
+ in each line, which will mean this cooling device spent <time>
+ msec of time at <state>.
+
+ Output will have one line for each of the supported states.
+
+ RO, Required
+
+What: /sys/class/thermal/cooling_deviceX/stats/total_trans
+Description:
+ A single positive value showing the total number of times
+ the state of a cooling device is changed.
+
+ RO, Required
+
+What: /sys/class/thermal/cooling_deviceX/stats/trans_table
+Description:
+ This gives fine grained information about all the cooling state
+ transitions. The cat output here is a two dimensional matrix,
+ where an entry <i,j> (row i, column j) represents the number
+ of transitions from State_i to State_j. If the transition
+ table is bigger than PAGE_SIZE, reading this will return
+ an -EFBIG error.
+
+ RO, Required
diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec
index d7647b258c3c..38e101c17a00 100644
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@@ -20,13 +20,13 @@ Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
The supported power roles. This attribute can be used to request
- power role swap on the port when the port supports USB Power
- Delivery. Swapping is supported as synchronous operation, so
- write(2) to the attribute will not return until the operation
- has finished. The attribute is notified about role changes so
- that poll(2) on the attribute wakes up. Change on the role will
- also generate uevent KOBJ_CHANGE. The current role is show in
- brackets, for example "[source] sink" when in source mode.
+ power role swap on the port. Swapping is supported as
+ synchronous operation, so write(2) to the attribute will not
+ return until the operation has finished. The attribute is
+ notified about role changes so that poll(2) on the attribute
+ wakes up. Change on the role will also generate uevent
+ KOBJ_CHANGE. The current role is show in brackets, for example
+ "[source] sink" when in source mode.
Valid values: source, sink
@@ -40,10 +40,13 @@ Description:
attribute will not return until the operation has finished.
Valid values:
- - source (The port will behave as source only DFP port)
- - sink (The port will behave as sink only UFP port)
- - dual (The port will behave as dual-role-data and
+
+ ====== ==============================================
+ source (The port will behave as source only DFP port)
+ sink (The port will behave as sink only UFP port)
+ dual (The port will behave as dual-role-data and
dual-role-power port)
+ ====== ==============================================
What: /sys/class/typec/<port>/vconn_source
Date: April 2017
@@ -59,6 +62,7 @@ Description:
generates uevent KOBJ_CHANGE.
Valid values:
+
- "no" when the port is not the VCONN Source
- "yes" when the port is the VCONN Source
@@ -72,6 +76,7 @@ Description:
power operation mode should show "usb_power_delivery".
Valid values:
+
- default
- 1.5A
- 3.0A
@@ -100,7 +105,25 @@ Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
Revision number of the supported USB Power Delivery
- specification, or 0 when USB Power Delivery is not supported.
+ specification, or 0.0 when USB Power Delivery is not supported.
+
+ Example values:
+ - "2.0": USB Power Delivery Release 2.0
+ - "3.0": USB Power Delivery Release 3.0
+ - "3.1": USB Power Delivery Release 3.1
+
+What: /sys/class/typec/<port>-{partner|cable}/usb_power_delivery_revision
+Date: January 2021
+Contact: Benson Leung <bleung@chromium.org>
+Description:
+ Revision number of the supported USB Power Delivery
+ specification of the port partner or cable, or 0.0 when USB
+ Power Delivery is not supported.
+
+ Example values:
+ - "2.0": USB Power Delivery Release 2.0
+ - "3.0": USB Power Delivery Release 3.0
+ - "3.1": USB Power Delivery Release 3.1
What: /sys/class/typec/<port>/usb_typec_revision
Date: April 2017
@@ -108,6 +131,36 @@ Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
Revision number of the supported USB Type-C specification.
+What: /sys/class/typec/<port>/orientation
+Date: February 2020
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ Indicates the active orientation of the Type-C connector.
+ Valid values:
+ - "normal": CC1 orientation
+ - "reverse": CC2 orientation
+ - "unknown": Orientation cannot be determined.
+
+What: /sys/class/typec/<port>/select_usb_power_delivery
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Lists the USB Power Delivery Capabilities that the port can
+ advertise to the partner. The currently used capabilities are in
+ brackets. Selection happens by writing to the file.
+
+What: /sys/class/typec/<port>/usb_capability
+Date: November 2024
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description: Lists the supported USB Modes. The default USB mode that is used
+ next time with the Enter_USB Message is in brackets. The default
+ mode can be changed by writing to the file when supported by the
+ driver.
+
+ Valid values:
+ - usb2 (USB 2.0)
+ - usb3 (USB 3.2)
+ - usb4 (USB4)
USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)
@@ -125,7 +178,50 @@ Description:
Shows if the partner supports USB Power Delivery communication:
Valid values: yes, no
-What: /sys/class/typec/<port>-partner>/identity/
+What: /sys/class/typec/<port>-partner/number_of_alternate_modes
+Date: November 2020
+Contact: Prashant Malani <pmalani@chromium.org>
+Description:
+ Shows the number of alternate modes which are advertised by the partner
+ during Power Delivery discovery. This file remains hidden until a value
+ greater than or equal to 0 is set by Type C port driver.
+
+What: /sys/class/typec/<port>-partner/type
+Date: December 2020
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description: USB Power Delivery Specification defines a set of product types
+ for the partner devices. This file will show the product type of
+ the partner if it is known. Dual-role capable partners will have
+ both UFP and DFP product types defined, but only one that
+ matches the current role will be active at the time. If the
+ product type of the partner is not visible to the device driver,
+ this file will not exist.
+
+ When the partner product type is detected, or changed with role
+ swap, uvevent is also raised that contains PRODUCT_TYPE=<product
+ type> (for example PRODUCT_TYPE=hub).
+
+ Valid values:
+
+ UFP / device role
+ ====================== ==========================
+ undefined -
+ hub PDUSB Hub
+ peripheral PDUSB Peripheral
+ psd Power Bank
+ ama Alternate Mode Adapter
+ ====================== ==========================
+
+ DFP / host role
+ ====================== ==========================
+ undefined -
+ hub PDUSB Hub
+ host PDUSB Host
+ power_brick Power Brick
+ amc Alternate Mode Controller
+ ====================== ==========================
+
+What: /sys/class/typec/<port>-partner/identity/
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
@@ -137,30 +233,19 @@ Description:
directory exists, it will have an attribute file for every VDO
in Discover Identity command result.
-What: /sys/class/typec/<port>-partner/identity/id_header
-Date: April 2017
+What: /sys/class/typec/<port>-partner/usb_mode
+Date: November 2024
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
- ID Header VDO part of Discover Identity command result. The
- value will show 0 until Discover Identity command result becomes
- available. The value can be polled.
-
-What: /sys/class/typec/<port>-partner/identity/cert_stat
-Date: April 2017
-Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
- Cert Stat VDO part of Discover Identity command result. The
- value will show 0 until Discover Identity command result becomes
- available. The value can be polled.
-
-What: /sys/class/typec/<port>-partner/identity/product
-Date: April 2017
-Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
- Product VDO part of Discover Identity command result. The value
- will show 0 until Discover Identity command result becomes
- available. The value can be polled.
+Description: The USB Modes that the partner device supports. The active mode
+ is displayed in brackets. The active USB mode can be changed by
+ writing to this file when the port driver is able to send Data
+ Reset Message to the partner. That requires USB Power Delivery
+ contract between the partner and the port.
+ Valid values:
+ - usb2 (USB 2.0)
+ - usb3 (USB 3.2)
+ - usb4 (USB4)
USB Type-C cable devices (eg. /sys/class/typec/port0-cable/)
@@ -173,31 +258,64 @@ described in USB Type-C and USB Power Delivery specifications.
What: /sys/class/typec/<port>-cable/type
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
- Shows if the cable is active.
- Valid values: active, passive
+Description: USB Power Delivery Specification defines a set of product types
+ for the cables. This file will show the product type of the
+ cable if it is known. If the product type of the cable is not
+ visible to the device driver, this file will not exist.
+
+ When the cable product type is detected, uvevent is also raised
+ with PRODUCT_TYPE showing the product type of the cable.
+
+ Valid values:
+
+ ====================== ==========================
+ undefined -
+ active Active Cable
+ passive Passive Cable
+ ====================== ==========================
What: /sys/class/typec/<port>-cable/plug_type
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
Shows type of the plug on the cable:
+
- type-a - Standard A
- type-b - Standard B
- type-c
- captive
-What: /sys/class/typec/<port>-cable/identity/
+What: /sys/class/typec/<port>-<plug>/number_of_alternate_modes
+Date: November 2020
+Contact: Prashant Malani <pmalani@chromium.org>
+Description:
+ Shows the number of alternate modes which are advertised by the plug
+ associated with a particular cable during Power Delivery discovery.
+ This file remains hidden until a value greater than or equal to 0
+ is set by Type C port driver.
+
+
+USB Type-C partner/cable Power Delivery Identity objects
+
+NOTE: The following attributes will be applicable to both
+partner (e.g /sys/class/typec/port0-partner/) and
+cable (e.g /sys/class/typec/port0-cable/) devices. Consequently, the example file
+paths below are prefixed with "/sys/class/typec/<port>-{partner|cable}/" to
+reflect this.
+
+What: /sys/class/typec/<port>-{partner|cable}/identity/
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
This directory appears only if the port device driver is capable
of showing the result of Discover Identity USB power delivery
command. That will not always be possible even when USB power
- delivery is supported. If the directory exists, it will have an
- attribute for every VDO returned by Discover Identity command.
+ delivery is supported, for example when USB power delivery
+ communication for the port is mostly handled in firmware. If the
+ directory exists, it will have an attribute file for every VDO
+ in Discover Identity command result.
-What: /sys/class/typec/<port>-cable/identity/id_header
+What: /sys/class/typec/<port>-{partner|cable}/identity/id_header
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
@@ -205,7 +323,7 @@ Description:
value will show 0 until Discover Identity command result becomes
available. The value can be polled.
-What: /sys/class/typec/<port>-cable/identity/cert_stat
+What: /sys/class/typec/<port>-{partner|cable}/identity/cert_stat
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
@@ -213,7 +331,7 @@ Description:
value will show 0 until Discover Identity command result becomes
available. The value can be polled.
-What: /sys/class/typec/<port>-cable/identity/product
+What: /sys/class/typec/<port>-{partner|cable}/identity/product
Date: April 2017
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
@@ -221,6 +339,30 @@ Description:
will show 0 until Discover Identity command result becomes
available. The value can be polled.
+What: /sys/class/typec/<port>-{partner|cable}/identity/product_type_vdo1
+Date: October 2020
+Contact: Prashant Malani <pmalani@chromium.org>
+Description:
+ 1st Product Type VDO of Discover Identity command result.
+ The value will show 0 until Discover Identity command result becomes
+ available and a valid Product Type VDO is returned.
+
+What: /sys/class/typec/<port>-{partner|cable}/identity/product_type_vdo2
+Date: October 2020
+Contact: Prashant Malani <pmalani@chromium.org>
+Description:
+ 2nd Product Type VDO of Discover Identity command result.
+ The value will show 0 until Discover Identity command result becomes
+ available and a valid Product Type VDO is returned.
+
+What: /sys/class/typec/<port>-{partner|cable}/identity/product_type_vdo3
+Date: October 2020
+Contact: Prashant Malani <pmalani@chromium.org>
+Description:
+ 3rd Product Type VDO of Discover Identity command result.
+ The value will show 0 until Discover Identity command result becomes
+ available and a valid Product Type VDO is returned.
+
USB Type-C port alternate mode devices.
diff --git a/Documentation/ABI/testing/sysfs-class-usb_power_delivery b/Documentation/ABI/testing/sysfs-class-usb_power_delivery
new file mode 100644
index 000000000000..61d233c320ea
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-usb_power_delivery
@@ -0,0 +1,256 @@
+What: /sys/class/usb_power_delivery
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Directory for USB Power Delivery devices.
+
+What: /sys/class/usb_power_delivery/.../revision
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ File showing the USB Power Delivery Specification Revision used
+ in communication.
+
+What: /sys/class/usb_power_delivery/.../version
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This is an optional attribute file showing the version of the
+ specific revision of the USB Power Delivery Specification. In
+ most cases the specification version is not known and the file
+ is not available.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The source capabilities message "Source_Capabilities" contains a
+ set of Power Data Objects (PDO), each representing a type of
+ power supply. The order of the PDO objects is defined in the USB
+ Power Delivery Specification. Each PDO - power supply - will
+ have its own device, and the PDO device name will start with the
+ object position number as the first character followed by the
+ power supply type name (":" as delimiter).
+
+ /sys/class/usb_power_delivery/.../source_capabilities/<position>:<type>
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The sink capability message "Sink_Capabilities" contains a set
+ of Power Data Objects (PDO) just like with source capabilities,
+ but instead of describing the power capabilities, these objects
+ describe the power requirements.
+
+ The order of the objects in the sink capability message is the
+ same as with the source capabilities message.
+
+Fixed Supplies
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:fixed_supply
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Devices containing the attributes (the bit fields) defined for
+ Fixed Supplies.
+
+ The device "1:fixed_supply" is special. USB Power Delivery
+ Specification dictates that the first PDO (at object position
+ 1), and the only mandatory PDO, is always the vSafe5V Fixed
+ Supply Object. vSafe5V Object has additional fields defined for
+ it that the other Fixed Supply Objects do not have and that are
+ related to the USB capabilities rather than power capabilities.
+
+What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/dual_role_power
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file contains boolean value that tells does the device
+ support both source and sink power roles.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/1:fixed_supply/usb_suspend_supported
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the USB Suspend Supported bit in
+ vSafe5V Fixed Supply Object. If the bit is set then the device
+ will follow the USB 2.0 and USB 3.2 rules for suspend and
+ resume.
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities/1:fixed_supply/higher_capability
+Date: February 2023
+Contact: Saranya Gopal <saranya.gopal@linux.intel.com>
+Description:
+ This file shows the value of the Higher capability bit in
+ vsafe5V Fixed Supply Object. If the bit is set, then the sink
+ needs more than vsafe5V(eg. 12 V) to provide full functionality.
+ Valid values: 0, 1
+
+What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/unconstrained_power
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the Unconstrained Power bit in
+ vSafe5V Fixed Supply Object. The bit is set when an external
+ source of power, powerful enough to power the entire system on
+ its own, is available for the device.
+
+What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/usb_communication_capable
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the USB Communication Capable bit in
+ vSafe5V Fixed Supply Object.
+
+What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/dual_role_data
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the Dual-Role Data bit in vSafe5V
+ Fixed Supply Object. Dual role data means ability act as both
+ USB host and USB device.
+
+What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/unchunked_extended_messages_supported
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the Unchunked Extended Messages
+ Supported bit in vSafe5V Fixed Supply Object.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:fixed_supply/voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The voltage the supply supports in millivolts.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:fixed_supply/peak_current
+Date: October 2023
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file shows the value of the Fixed Power Source Peak Current
+ Capability field.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:fixed_supply/maximum_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum current of the fixed source supply in milliamperes.
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:fixed_supply/operational_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Operational current of the sink in milliamperes.
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:fixed_supply/fast_role_swap_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This file contains the value of the "Fast Role Swap USB Type-C
+ Current" field that tells the current level the sink requires
+ after a Fast Role Swap.
+ 0 - Fast Swap not supported"
+ 1 - Default USB Power"
+ 2 - 1.5A@5V"
+ 3 - 3.0A@5V"
+
+Variable Supplies
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Variable Power Supply PDO.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply/maximum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply/minimum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Minimum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:variable_supply/maximum_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The maximum current in milliamperes that the source can supply
+ at the given Voltage range.
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:variable_supply/operational_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The operational current in milliamperes that the sink requires
+ at the given Voltage range.
+
+Battery Supplies
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Battery PDO.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery/maximum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery/minimum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Minimum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:battery/maximum_power
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum allowable Power in milliwatts.
+
+What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:battery/operational_power
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The operational power that the sink requires at the given
+ voltage range.
+
+Standard Power Range (SPR) Programmable Power Supplies
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Programmable Power Supply (PPS) Augmented PDO (APDO).
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/maximum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/minimum_voltage
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Minimum Voltage in millivolts.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/maximum_current
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Maximum Current in milliamperes.
+
+What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:programmable_supply/pps_power_limited
+Date: May 2022
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ The PPS Power Limited bit indicates whether or not the source
+ supply will exceed the rated output power if requested.
diff --git a/Documentation/ABI/testing/sysfs-class-usb_role b/Documentation/ABI/testing/sysfs-class-usb_role
index 3b810a425a52..9fab3f06679e 100644
--- a/Documentation/ABI/testing/sysfs-class-usb_role
+++ b/Documentation/ABI/testing/sysfs-class-usb_role
@@ -19,3 +19,9 @@ Description:
- none
- host
- device
+
+What: /sys/class/usb_role/<switch>/connector
+Date: Feb 2024
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Optional symlink to the USB Type-C connector.
diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
deleted file mode 100644
index 85f4875d16ac..000000000000
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc
+++ /dev/null
@@ -1,159 +0,0 @@
-What: /sys/class/uwb_rc
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- Interfaces for WiMedia Ultra Wideband Common Radio
- Platform (UWB) radio controllers.
-
- Familiarity with the ECMA-368 'High Rate Ultra
- Wideband MAC and PHY Specification' is assumed.
-
-What: /sys/class/uwb_rc/beacon_timeout_ms
-Date: July 2008
-KernelVersion: 2.6.27
-Description:
- If no beacons are received from a device for at least
- this time, the device will be considered to have gone
- and it will be removed. The default is 3 superframes
- (~197 ms) as required by the specification.
-
-What: /sys/class/uwb_rc/uwbN/
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- An individual UWB radio controller.
-
-What: /sys/class/uwb_rc/uwbN/beacon
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- Write:
-
- <channel>
-
- to force a specific channel to be used when beaconing,
- or, if <channel> is -1, to prohibit beaconing. If
- <channel> is 0, then the default channel selection
- algorithm will be used. Valid channels depends on the
- radio controller's supported band groups.
-
- Reading returns the currently active channel, or -1 if
- the radio controller is not beaconing.
-
-What: /sys/class/uwb_rc/uwbN/ASIE
-Date: August 2014
-KernelVersion: 3.18
-Contact: linux-usb@vger.kernel.org
-Description:
-
- The application-specific information element (ASIE)
- included in this device's beacon, in space separated
- hex octets.
-
- Reading returns the current ASIE. Writing replaces
- the current ASIE with the one written.
-
-What: /sys/class/uwb_rc/uwbN/scan
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- Write:
-
- <channel> <type> [<bpst offset>]
-
- to start (or stop) scanning on a channel. <type> is one of:
- 0 - scan
- 1 - scan outside BP
- 2 - scan while inactive
- 3 - scanning disabled
- 4 - scan (with start time of <bpst offset>)
-
-What: /sys/class/uwb_rc/uwbN/mac_address
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- The EUI-48, in colon-separated hex octets, for this
- radio controller. A write will change the radio
- controller's EUI-48 but only do so while the device is
- not beaconing or scanning.
-
-What: /sys/class/uwb_rc/uwbN/wusbhc
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- A symlink to the device (if any) of the WUSB Host
- Controller PAL using this radio controller.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- A neighbour UWB device that has either been detected
- as part of a scan or is a member of the radio
- controllers beacon group.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/BPST
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- The time (using the radio controllers internal 1 ms
- interval superframe timer) of the last beacon from
- this device was received.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/DevAddr
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- The current DevAddr of this device in colon separated
- hex octets.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/EUI_48
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
-
- The EUI-48 of this device in colon separated hex
- octets.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/BPST
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/IEs
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- The latest IEs included in this device's beacon, in
- space separated hex octets with one IE per line.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/LQE
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- Link Quality Estimate - the Signal to Noise Ratio
- (SNR) of all packets received from this device in dB.
- This gives an estimate on a suitable PHY rate. Refer
- to [ECMA-368] section 13.3 for more details.
-
-What: /sys/class/uwb_rc/uwbN/<EUI-48>/RSSI
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: linux-usb@vger.kernel.org
-Description:
- Received Signal Strength Indication - the strength of
- the received signal in dB. LQE is a more useful
- measure of the radio link quality.
diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc
deleted file mode 100644
index 5977e2875325..000000000000
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc
+++ /dev/null
@@ -1,57 +0,0 @@
-What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- Write the CHID (16 space-separated hex octets) for this host controller.
- This starts the host controller, allowing it to accept connection from
- WUSB devices.
-
- Set an all zero CHID to stop the host controller.
-
-What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout
-Date: July 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- Devices that haven't sent a WUSB packet to the host
- within 'wusb_trust_timeout' ms are considered to have
- disconnected and are removed. The default value of
- 4000 ms is the value required by the WUSB
- specification.
-
- Since this relates to security (specifically, the
- lifetime of PTKs and GTKs) it should not be changed
- from the default.
-
-What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_phy_rate
-Date: August 2009
-KernelVersion: 2.6.32
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The maximum PHY rate to use for all connected devices.
- This is only of limited use for testing and
- development as the hardware's automatic rate
- adaptation is better then this simple control.
-
- Refer to [ECMA-368] section 10.3.1.1 for the value to
- use.
-
-What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_dnts
-Date: June 2013
-KernelVersion: 3.11
-Contact: Thomas Pugliese <thomas.pugliese@gmail.com>
-Description:
- The device notification time slot (DNTS) count and inverval in
- milliseconds that the WUSB host should use. This controls how
- often the devices will have the opportunity to send
- notifications to the host.
-
-What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_retry_count
-Date: June 2013
-KernelVersion: 3.11
-Contact: Thomas Pugliese <thomas.pugliese@gmail.com>
-Description:
- The number of retries that the WUSB host should attempt
- before reporting an error for a bus transaction. The range of
- valid values is [0..15], where 0 indicates infinite retries.
diff --git a/Documentation/ABI/testing/sysfs-class-vduse b/Documentation/ABI/testing/sysfs-class-vduse
new file mode 100644
index 000000000000..2f2bc5c8fc48
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-vduse
@@ -0,0 +1,33 @@
+What: /sys/class/vduse/
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: Yongji Xie <xieyongji@bytedance.com>
+Description:
+ The vduse/ class sub-directory belongs to the VDUSE
+ framework and provides a sysfs interface for configuring
+ VDUSE devices.
+
+What: /sys/class/vduse/control/
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: Yongji Xie <xieyongji@bytedance.com>
+Description:
+ This directory entry is created for the control device
+ of VDUSE framework.
+
+What: /sys/class/vduse/<device-name>/
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: Yongji Xie <xieyongji@bytedance.com>
+Description:
+ This directory entry is created when a VDUSE device is
+ created via the control device.
+
+What: /sys/class/vduse/<device-name>/msg_timeout
+Date: Oct 2021
+KernelVersion: 5.15
+Contact: Yongji Xie <xieyongji@bytedance.com>
+Description:
+ (RW) The timeout (in seconds) for waiting for the control
+ message's response from userspace. Default value is 30s.
+ Writing a '0' to the file means to disable the timeout.
diff --git a/Documentation/ABI/testing/sysfs-class-wakeup b/Documentation/ABI/testing/sysfs-class-wakeup
new file mode 100644
index 000000000000..754aab8b6dcd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-wakeup
@@ -0,0 +1,76 @@
+What: /sys/class/wakeup/
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ The /sys/class/wakeup/ directory contains pointers to all
+ wakeup sources in the kernel at that moment in time.
+
+What: /sys/class/wakeup/.../name
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the name of the wakeup source.
+
+What: /sys/class/wakeup/.../active_count
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the number of times the wakeup source was
+ activated.
+
+What: /sys/class/wakeup/.../event_count
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the number of signaled wakeup events
+ associated with the wakeup source.
+
+What: /sys/class/wakeup/.../wakeup_count
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the number of times the wakeup source might
+ abort suspend.
+
+What: /sys/class/wakeup/.../expire_count
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the number of times the wakeup source's
+ timeout has expired.
+
+What: /sys/class/wakeup/.../active_time_ms
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the amount of time the wakeup source has
+ been continuously active, in milliseconds. If the wakeup
+ source is not active, this file contains '0'.
+
+What: /sys/class/wakeup/.../total_time_ms
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the total amount of time this wakeup source
+ has been active, in milliseconds.
+
+What: /sys/class/wakeup/.../max_time_ms
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the maximum amount of time this wakeup
+ source has been continuously active, in milliseconds.
+
+What: /sys/class/wakeup/.../last_change_ms
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ This file contains the monotonic clock time when the wakeup
+ source was touched last time, in milliseconds.
+
+What: /sys/class/wakeup/.../prevent_suspend_time_ms
+Date: June 2019
+Contact: Tri Vo <trong@android.com>
+Description:
+ The file contains the total amount of time this wakeup source
+ has been preventing autosleep, in milliseconds.
diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog
index 736046b33040..70eabccf0557 100644
--- a/Documentation/ABI/testing/sysfs-class-watchdog
+++ b/Documentation/ABI/testing/sysfs-class-watchdog
@@ -6,6 +6,19 @@ Description:
device at boot. It is equivalent to WDIOC_GETBOOTSTATUS of
ioctl interface.
+What: /sys/class/watchdog/watchdogn/options
+Date: April 2023
+Contact: Thomas Weißschuh
+Description:
+ It is a read only file. It contains options of watchdog device.
+
+What: /sys/class/watchdog/watchdogn/fw_version
+Date: April 2023
+Contact: Thomas Weißschuh
+Description:
+ It is a read only file. It contains firmware version of
+ watchdog device.
+
What: /sys/class/watchdog/watchdogn/identity
Date: August 2015
Contact: Wim Van Sebroeck <wim@iguana.be>
@@ -17,8 +30,13 @@ What: /sys/class/watchdog/watchdogn/nowayout
Date: August 2015
Contact: Wim Van Sebroeck <wim@iguana.be>
Description:
- It is a read only file. While reading, it gives '1' if that
- device supports nowayout feature else, it gives '0'.
+ It is a read/write file. While reading, it gives '1'
+ if the device has the nowayout feature set, otherwise
+ it gives '0'. Writing a '1' to the file enables the
+ nowayout feature. Once set, the nowayout feature
+ cannot be disabled, so writing a '0' either has no
+ effect (if the feature was already disabled) or
+ results in a permission error.
What: /sys/class/watchdog/watchdogn/state
Date: August 2015
@@ -49,3 +67,63 @@ Contact: Wim Van Sebroeck <wim@iguana.be>
Description:
It is a read only file. It is read to know about current
value of timeout programmed.
+
+What: /sys/class/watchdog/watchdogn/pretimeout
+Date: December 2016
+Contact: Wim Van Sebroeck <wim@iguana.be>
+Description:
+ It is a read only file. It specifies the time in seconds before
+ timeout when the pretimeout interrupt is delivered. Pretimeout
+ is an optional feature.
+
+What: /sys/class/watchdog/watchdogn/pretimeout_available_governors
+Date: February 2017
+Contact: Wim Van Sebroeck <wim@iguana.be>
+Description:
+ It is a read only file. It shows the pretimeout governors
+ available for this watchdog.
+
+What: /sys/class/watchdog/watchdogn/pretimeout_governor
+Date: February 2017
+Contact: Wim Van Sebroeck <wim@iguana.be>
+Description:
+ It is a read/write file. When read, the currently assigned
+ pretimeout governor is returned. When written, it sets
+ the pretimeout governor.
+
+What: /sys/class/watchdog/watchdog1/access_cs0
+Date: August 2019
+Contact: Ivan Mikhaylov <i.mikhaylov@yadro.com>,
+ Alexander Amelkin <a.amelkin@yadro.com>
+Description:
+ It is a read/write file. This attribute exists only if the
+ system has booted from the alternate flash chip due to
+ expiration of a watchdog timer of AST2400/AST2500 when
+ alternate boot function was enabled with 'aspeed,alt-boot'
+ devicetree option for that watchdog or with an appropriate
+ h/w strapping (for WDT2 only).
+
+ At alternate flash the 'access_cs0' sysfs node provides:
+
+ ast2400:
+ a way to get access to the primary SPI flash
+ chip at CS0 after booting from the alternate
+ chip at CS1.
+ ast2500:
+ a way to restore the normal address mapping
+ from (CS0->CS1, CS1->CS0) to (CS0->CS0,
+ CS1->CS1).
+
+ Clearing the boot code selection and timeout counter also
+ resets to the initial state the chip select line mapping. When
+ the SoC is in normal mapping state (i.e. booted from CS0),
+ clearing those bits does nothing for both versions of the SoC.
+ For alternate boot mode (booted from CS1 due to wdt2
+ expiration) the behavior differs as described above.
+
+ This option can be used with wdt2 (watchdog1) only.
+
+ When read, the current status of the boot code selection is
+ shown. When written with any non-zero value, it clears
+ the boot code selection and the timeout counter, which results
+ in chipselect reset for AST2400/AST2500.
diff --git a/Documentation/ABI/testing/sysfs-dev b/Documentation/ABI/testing/sysfs-dev
index a9f2b8b0530f..d1739063e762 100644
--- a/Documentation/ABI/testing/sysfs-dev
+++ b/Documentation/ABI/testing/sysfs-dev
@@ -9,9 +9,10 @@ Description: The /sys/dev tree provides a method to look up the sysfs
the form "<major>:<minor>". These links point to the
corresponding sysfs path for the given device.
- Example:
- $ readlink /sys/dev/block/8:32
- ../../block/sdc
+ Example::
+
+ $ readlink /sys/dev/block/8:32
+ ../../block/sdc
Entries in /sys/dev/char and /sys/dev/block will be
dynamically created and destroyed as devices enter and
diff --git a/Documentation/ABI/testing/sysfs-devices-consumer b/Documentation/ABI/testing/sysfs-devices-consumer
new file mode 100644
index 000000000000..0809fda092e6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-consumer
@@ -0,0 +1,9 @@
+What: /sys/devices/.../consumer:<consumer>
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ The /sys/devices/.../consumer:<consumer> are symlinks to device
+ links where this device is the supplier. <consumer> denotes the
+ name of the consumer in that device link and is of the form
+ bus:device name. There can be zero or more of these symlinks
+ for a given device.
diff --git a/Documentation/ABI/testing/sysfs-devices-mapping b/Documentation/ABI/testing/sysfs-devices-mapping
new file mode 100644
index 000000000000..2eee1446ad4c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-mapping
@@ -0,0 +1,62 @@
+What: /sys/devices/uncore_iio_x/dieX
+Date: February 2020
+Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
+Description:
+ Each IIO stack (PCIe root port) has its own IIO PMON block, so
+ each dieX file (where X is die number) holds "Segment:Root Bus"
+ for PCIe root port, which can be monitored by that IIO PMON
+ block.
+ For example, on 4-die Xeon platform with up to 6 IIO stacks per
+ die and, therefore, 6 IIO PMON blocks per die, the mapping of
+ IIO PMON block 0 exposes as the following::
+
+ $ ls /sys/devices/uncore_iio_0/die*
+ -r--r--r-- /sys/devices/uncore_iio_0/die0
+ -r--r--r-- /sys/devices/uncore_iio_0/die1
+ -r--r--r-- /sys/devices/uncore_iio_0/die2
+ -r--r--r-- /sys/devices/uncore_iio_0/die3
+
+ $ tail /sys/devices/uncore_iio_0/die*
+ ==> /sys/devices/uncore_iio_0/die0 <==
+ 0000:00
+ ==> /sys/devices/uncore_iio_0/die1 <==
+ 0000:40
+ ==> /sys/devices/uncore_iio_0/die2 <==
+ 0000:80
+ ==> /sys/devices/uncore_iio_0/die3 <==
+ 0000:c0
+
+ Which means::
+
+ IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
+ IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
+ IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
+ IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
+
+What: /sys/devices/uncore_upi_x/dieX
+Date: March 2022
+Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
+Description:
+ Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
+ value that means UPI link number X on die Y is connected to UPI
+ link Z on die W and this link between sockets can be monitored
+ by UPI PMON block.
+ For example, 4-die Sapphire Rapids platform has the following
+ UPI 0 topology::
+
+ # tail /sys/devices/uncore_upi_0/die*
+ ==> /sys/devices/uncore_upi_0/die0 <==
+ upi_1,die_1
+ ==> /sys/devices/uncore_upi_0/die1 <==
+ upi_0,die_3
+ ==> /sys/devices/uncore_upi_0/die2 <==
+ upi_1,die_3
+ ==> /sys/devices/uncore_upi_0/die3 <==
+ upi_0,die_1
+
+ Which means::
+
+ UPI link 0 on die 0 is connected to UPI link 1 on die 1
+ UPI link 0 on die 1 is connected to UPI link 0 on die 3
+ UPI link 0 on die 2 is connected to UPI link 1 on die 3
+ UPI link 0 on die 3 is connected to UPI link 0 on die 1 \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index deef3b5723cf..cec65827e602 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -13,21 +13,22 @@ What: /sys/devices/system/memory/memoryX/removable
Date: June 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
- The file /sys/devices/system/memory/memoryX/removable
- indicates whether this memory block is removable or not.
- This is useful for a user-level agent to determine
- identify removable sections of the memory before attempting
- potentially expensive hot-remove memory operation
+ The file /sys/devices/system/memory/memoryX/removable is a
+ legacy interface used to indicated whether a memory block is
+ likely to be offlineable or not. Newer kernel versions return
+ "1" if and only if the kernel supports memory offlining.
Users: hotplug memory remove tools
- http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+ http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+ lsmem/chmem part of util-linux
What: /sys/devices/system/memory/memoryX/phys_device
Date: September 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
The file /sys/devices/system/memory/memoryX/phys_device
- is read-only and is designed to show the name of physical
- memory device. Implementation is currently incomplete.
+ is read-only; it is a legacy interface only ever used on s390x
+ to expose the covered storage increment.
+Users: Legacy s390-tools lsmem/chmem
What: /sys/devices/system/memory/memoryX/phys_index
Date: September 2008
@@ -43,20 +44,25 @@ Date: September 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
The file /sys/devices/system/memory/memoryX/state
- is read-write. When read, its contents show the
- online/offline state of the memory section. When written,
- root can toggle the the online/offline state of a removable
- memory section (see removable file description above)
- using the following commands.
- # echo online > /sys/devices/system/memory/memoryX/state
- # echo offline > /sys/devices/system/memory/memoryX/state
-
- For example, if /sys/devices/system/memory/memory22/removable
- contains a value of 1 and
- /sys/devices/system/memory/memory22/state contains the
- string "online" the following command can be executed by
- by root to offline that section.
- # echo offline > /sys/devices/system/memory/memory22/state
+ is read-write. When read, it returns the online/offline
+ state of the memory block. When written, root can toggle
+ the online/offline state of a memory block using the following
+ commands::
+
+ # echo online > /sys/devices/system/memory/memoryX/state
+ # echo offline > /sys/devices/system/memory/memoryX/state
+
+ On newer kernel versions, advanced states can be specified
+ when onlining to select a target zone: "online_movable"
+ selects the movable zone. "online_kernel" selects the
+ applicable kernel zone (DMA, DMA32, or Normal). However,
+ after successfully setting one of the advanced states,
+ reading the file will return "online"; the zone information
+ can be obtained via "valid_zones" instead.
+
+ While onlining is unlikely to fail, there are no guarantees
+ that offlining will succeed. Offlining is more likely to
+ succeed if "valid_zones" indicates "Movable".
Users: hotplug memory remove tools
http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
@@ -66,8 +72,19 @@ Date: July 2014
Contact: Zhang Zhen <zhenzhang.zhang@huawei.com>
Description:
The file /sys/devices/system/memory/memoryX/valid_zones is
- read-only and is designed to show which zone this memory
- block can be onlined to.
+ read-only.
+
+ For online memory blocks, it returns in which zone memory
+ provided by a memory block is managed. If multiple zones
+ apply (not applicable for hotplugged memory), "None" is returned
+ and the memory block cannot be offlined.
+
+ For offline memory blocks, it returns by which zone memory
+ provided by a memory block can be managed when onlining.
+ The first returned zone ("default") will be used when setting
+ the state of an offline memory block to "online". Only one of
+ the kernel zones (DMA, DMA32, Normal) is applicable for a single
+ memory block.
What: /sys/devices/system/memoryX/nodeY
Date: October 2009
@@ -78,6 +95,7 @@ Description:
For example, the following symbolic link is created for
memory section 9 on node0:
+
/sys/devices/system/memory/memory9/node0 -> ../../node/node0
@@ -90,4 +108,13 @@ Description:
points to the corresponding /sys/devices/system/memory/memoryY
memory section directory. For example, the following symbolic
link is created for memory section 9 on node0.
+
/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
+What: /sys/devices/system/memory/crash_hotplug
+Date: Aug 2023
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) indicates whether or not the kernel updates relevant kexec
+ segments on memory hot un/plug and/or on/offline events, avoiding the
+ need to reload kdump kernel.
diff --git a/Documentation/ABI/testing/sysfs-devices-online b/Documentation/ABI/testing/sysfs-devices-online
index f990026c0740..c3359fec2980 100644
--- a/Documentation/ABI/testing/sysfs-devices-online
+++ b/Documentation/ABI/testing/sysfs-devices-online
@@ -17,4 +17,4 @@ Description:
After a successful execution of the bus type's .offline()
callback the device cannot be used for any purpose until either
it is removed (i.e. device_del() is called for it), or its bus
- type's .online() is exeucted successfully.
+ type's .online() is executed successfully.
diff --git a/Documentation/ABI/testing/sysfs-devices-physical_location b/Documentation/ABI/testing/sysfs-devices-physical_location
new file mode 100644
index 000000000000..202324b87083
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-physical_location
@@ -0,0 +1,42 @@
+What: /sys/devices/.../physical_location
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ This directory contains information on physical location of
+ the device connection point with respect to the system's
+ housing.
+
+What: /sys/devices/.../physical_location/panel
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ Describes which panel surface of the system’s housing the
+ device connection point resides on.
+
+What: /sys/devices/.../physical_location/vertical_position
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ Describes vertical position of the device connection point on
+ the panel surface.
+
+What: /sys/devices/.../physical_location/horizontal_position
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ Describes horizontal position of the device connection point on
+ the panel surface.
+
+What: /sys/devices/.../physical_location/dock
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ "Yes" if the device connection point resides in a docking
+ station or a port replicator. "No" otherwise.
+
+What: /sys/devices/.../physical_location/lid
+Date: March 2022
+Contact: Won Chung <wonchung@google.com>
+Description:
+ "Yes" if the device connection point resides on the lid of
+ laptop system. "No" otherwise.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
index 7e43cdce9a52..bc44bc903bc8 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
+++ b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
@@ -7,6 +7,7 @@ Description:
(RO) Hexadecimal bitmask of the TAD attributes are reported by
the platform firmware (see ACPI 6.2, section 9.18.2):
+ ======= ======================================================
BIT(0): AC wakeup implemented if set
BIT(1): DC wakeup implemented if set
BIT(2): Get/set real time features implemented if set
@@ -16,6 +17,7 @@ Description:
BIT(6): The AC timer wakes up from S5 if set
BIT(7): The DC timer wakes up from S4 if set
BIT(8): The DC timer wakes up from S5 if set
+ ======= ======================================================
The other bits are reserved.
@@ -62,15 +64,17 @@ Description:
timer status with the following meaning of bits (see ACPI 6.2,
Section 9.18.5):
+ ======= ======================================================
Bit(0): The timer has expired if set.
Bit(1): The timer has woken up the system from a sleep state
(S3 or S4/S5 if supported) if set.
+ ======= ======================================================
The other bits are reserved.
Reads also cause the AC alarm timer status to be reset.
- Another way to reset the the status of the AC alarm timer is to
+ Another way to reset the status of the AC alarm timer is to
write (the number) 0 to this file.
If the status return value indicates that the timer has expired,
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
index d548eaac230a..40f29a01fd14 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
+++ b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
@@ -3,8 +3,9 @@ Date: April 2010
Contact: Fabien Chouteau <fabien.chouteau@barco.com>
Description:
Show the suspend state of an USB composite gadget.
- 1 -> suspended
- 0 -> resumed
+
+ - 1 -> suspended
+ - 0 -> resumed
(_UDC_ is the name of the USB Device Controller driver)
@@ -17,5 +18,6 @@ Description:
Storage mode.
Possible values are:
- 1 -> ignore the FUA flag
- 0 -> obey the FUA flag
+
+ - 1 -> ignore the FUA flag
+ - 0 -> obey the FUA flag
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-docg3 b/Documentation/ABI/testing/sysfs-devices-platform-docg3
index 8aa36716882f..378c42694bfb 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-docg3
+++ b/Documentation/ABI/testing/sysfs-devices-platform-docg3
@@ -9,8 +9,10 @@ Description:
The protection has information embedded whether it blocks reads,
writes or both.
The result is:
- 0 -> the DPS is not keylocked
- 1 -> the DPS is keylocked
+
+ - 0 -> the DPS is not keylocked
+ - 1 -> the DPS is keylocked
+
Users: None identified so far.
What: /sys/devices/platform/docg3/f[0-3]_dps[01]_protection_key
@@ -27,8 +29,12 @@ Description:
Entering the correct value toggle the lock, and can be observed
through f[0-3]_dps[01]_is_keylocked.
Possible values are:
+
- 8 bytes
+
Typical values are:
+
- "00000000"
- "12345678"
+
Users: None identified so far.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
index 1d8c18f905c7..411c174de830 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-dock
+++ b/Documentation/ABI/testing/sysfs-devices-platform-dock
@@ -1,4 +1,4 @@
-What: /sys/devices/platform/dock.N/docked
+What: /sys/devices/platform/dock.<N>/docked
Date: Dec, 2006
KernelVersion: 2.6.19
Contact: linux-acpi@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
(RO) Value 1 or 0 indicates whether the software believes the
laptop is docked in a docking station.
-What: /sys/devices/platform/dock.N/undock
+What: /sys/devices/platform/dock.<N>/undock
Date: Dec, 2006
KernelVersion: 2.6.19
Contact: linux-acpi@vger.kernel.org
@@ -14,14 +14,14 @@ Description:
(WO) Writing to this file causes the software to initiate an
undock request to the firmware.
-What: /sys/devices/platform/dock.N/uid
+What: /sys/devices/platform/dock.<N>/uid
Date: Feb, 2007
KernelVersion: v2.6.21
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Displays the docking station the laptop is docked to.
-What: /sys/devices/platform/dock.N/flags
+What: /sys/devices/platform/dock.<N>/flags
Date: May, 2007
KernelVersion: v2.6.21
Contact: linux-acpi@vger.kernel.org
@@ -30,7 +30,7 @@ Description:
request has been made by the user (from the immediate_undock
option).
-What: /sys/devices/platform/dock.N/type
+What: /sys/devices/platform/dock.<N>/type
Date: Aug, 2008
KernelVersion: v2.6.27
Contact: linux-acpi@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-ipmi b/Documentation/ABI/testing/sysfs-devices-platform-ipmi
index 2a781e7513b7..07df0ddc0b69 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-ipmi
+++ b/Documentation/ABI/testing/sysfs-devices-platform-ipmi
@@ -123,38 +123,40 @@ KernelVersion: v4.15
Contact: openipmi-developer@lists.sourceforge.net
Description:
- idles: (RO) Number of times the interface was
+ ====================== ========================================
+ idles (RO) Number of times the interface was
idle while being polled.
- watchdog_pretimeouts: (RO) Number of watchdog pretimeouts.
+ watchdog_pretimeouts (RO) Number of watchdog pretimeouts.
- complete_transactions: (RO) Number of completed messages.
+ complete_transactions (RO) Number of completed messages.
- events: (RO) Number of IPMI events received from
+ events (RO) Number of IPMI events received from
the hardware.
- interrupts: (RO) Number of interrupts the driver
+ interrupts (RO) Number of interrupts the driver
handled.
- hosed_count: (RO) Number of times the hardware didn't
+ hosed_count (RO) Number of times the hardware didn't
follow the state machine.
- long_timeouts: (RO) Number of times the driver
+ long_timeouts (RO) Number of times the driver
requested a timer while nothing was in
progress.
- flag_fetches: (RO) Number of times the driver
+ flag_fetches (RO) Number of times the driver
requested flags from the hardware.
- attentions: (RO) Number of time the driver got an
+ attentions (RO) Number of time the driver got an
ATTN from the hardware.
- incoming_messages: (RO) Number of asynchronous messages
+ incoming_messages (RO) Number of asynchronous messages
received.
- short_timeouts: (RO) Number of times the driver
+ short_timeouts (RO) Number of times the driver
requested a timer while an operation was
in progress.
+ ====================== ========================================
What: /sys/devices/platform/ipmi_si.*/interrupts_enabled
@@ -201,38 +203,40 @@ Date: Sep, 2017
KernelVersion: v4.15
Contact: openipmi-developer@lists.sourceforge.net
Description:
- hosed: (RO) Number of times the hardware didn't
+ ====================== ========================================
+ hosed (RO) Number of times the hardware didn't
follow the state machine.
- alerts: (RO) Number of alerts received.
+ alerts (RO) Number of alerts received.
- sent_messages: (RO) Number of total messages sent.
+ sent_messages (RO) Number of total messages sent.
- sent_message_parts: (RO) Number of message parts sent.
+ sent_message_parts (RO) Number of message parts sent.
Messages may be broken into parts if
they are long.
- receieved_messages: (RO) Number of message responses
+ received_messages (RO) Number of message responses
received.
- received_message_parts: (RO) Number of message fragments
+ received_message_parts (RO) Number of message fragments
received.
- events: (RO) Number of received events.
+ events (RO) Number of received events.
- watchdog_pretimeouts: (RO) Number of watchdog pretimeouts.
+ watchdog_pretimeouts (RO) Number of watchdog pretimeouts.
- flag_fetches: (RO) Number of times a flag fetch was
+ flag_fetches (RO) Number of times a flag fetch was
requested.
- send_retries: (RO) Number of time a message was
+ send_retries (RO) Number of time a message was
retried.
- receive_retries: (RO) Number of times the receive of a
+ receive_retries (RO) Number of times the receive of a
message was retried.
- send_errors: (RO) Number of times the send of a
+ send_errors (RO) Number of times the send of a
message failed.
- receive_errors: (RO) Number of errors in receiving
+ receive_errors (RO) Number of errors in receiving
messages.
+ ====================== ========================================
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs b/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
new file mode 100644
index 000000000000..d1b3a95a5518
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
@@ -0,0 +1,126 @@
+What: /sys/devices/platform/HISI04Bx:00/chipX/all_linked
+What: /sys/devices/platform/HISI04Bx:00/chipX/linked_full_lane
+What: /sys/devices/platform/HISI04Bx:00/chipX/crc_err_cnt
+Date: November 2023
+KernelVersion: 6.6
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ The /sys/devices/platform/HISI04Bx:00/chipX/ directory
+ contains read-only attributes exposing some summarization
+ information of all HCCS ports under a specified chip.
+ The X in 'chipX' indicates the Xth chip on platform.
+
+ There are following attributes in this directory:
+
+ ================= ==== =========================================
+ all_linked: (RO) if all enabled ports on this chip are
+ linked (bool).
+ linked_full_lane: (RO) if all linked ports on this chip are full
+ lane (bool).
+ crc_err_cnt: (RO) total CRC err count for all ports on this
+ chip.
+ ================= ==== =========================================
+
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/all_linked
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/linked_full_lane
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/crc_err_cnt
+Date: November 2023
+KernelVersion: 6.6
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ The /sys/devices/platform/HISI04Bx:00/chipX/dieY/ directory
+ contains read-only attributes exposing some summarization
+ information of all HCCS ports under a specified die.
+ The Y in 'dieY' indicates the hardware id of the die on chip who
+ has chip id X.
+
+ There are following attributes in this directory:
+
+ ================= ==== =========================================
+ all_linked: (RO) if all enabled ports on this die are
+ linked (bool).
+ linked_full_lane: (RO) if all linked ports on this die are full
+ lane (bool).
+ crc_err_cnt: (RO) total CRC err count for all ports on this
+ die.
+ ================= ==== =========================================
+
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/type
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/lane_mode
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/enable
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/cur_lane_num
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/link_fsm
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/lane_mask
+What: /sys/devices/platform/HISI04Bx:00/chipX/dieY/hccsN/crc_err_cnt
+Date: November 2023
+KernelVersion: 6.6
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ The /sys/devices/platform/HISI04Bx/chipX/dieX/hccsN/ directory
+ contains read-only attributes exposing information about
+ a HCCS port. The N value in 'hccsN' indicates this port id.
+ The X in 'chipX' indicates the ID of the chip to which the
+ HCCS port belongs. For example, X ranges from to 'n - 1' if the
+ chip number on platform is n.
+ The Y in 'dieY' indicates the hardware id of the die to which
+ the hccs port belongs.
+ Note: type, lane_mode and enable are fixed attributes on running
+ platform.
+
+ The HCCS port have the following attributes:
+
+ ============= ==== =============================================
+ type: (RO) port type (string), e.g. HCCS-v1 -> H32
+ lane_mode: (RO) the lane mode of this port (string), e.g. x8
+ enable: (RO) indicate if this port is enabled (bool).
+ cur_lane_num: (RO) current lane number of this port.
+ link_fsm: (RO) link finite state machine of this port.
+ lane_mask: (RO) current lane mask of this port, every bit
+ indicates a lane.
+ crc_err_cnt: (RO) CRC err count on this port.
+ ============= ==== =============================================
+
+What: /sys/devices/platform/HISI04Bx:00/used_types
+Date: August 2024
+KernelVersion: 6.12
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ This interface is used to show all HCCS types used on the
+ platform, like, HCCS-v1, HCCS-v2 and so on.
+
+What: /sys/devices/platform/HISI04Bx:00/available_inc_dec_lane_types
+What: /sys/devices/platform/HISI04Bx:00/dec_lane_of_type
+What: /sys/devices/platform/HISI04Bx:00/inc_lane_of_type
+Date: August 2024
+KernelVersion: 6.12
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ These interfaces under /sys/devices/platform/HISI04Bx/ are
+ used to support the low power consumption feature of some
+ HCCS types by changing the number of lanes used. The interfaces
+ changing the number of lanes used are 'dec_lane_of_type' and
+ 'inc_lane_of_type' which require root privileges. These
+ interfaces aren't exposed if no HCCS type on platform support
+ this feature. Please note that decreasing lane number is only
+ allowed if all the specified HCCS ports are not busy.
+
+ The low power consumption interfaces are as follows:
+
+ ============================= ==== ================================
+ available_inc_dec_lane_types: (RO) available HCCS types (string) to
+ increase and decrease the number
+ of lane used, e.g. HCCS-v2.
+ dec_lane_of_type: (WO) input HCCS type supported
+ decreasing lane to decrease the
+ used lane number of all specified
+ HCCS type ports on platform to
+ the minimum.
+ You can query the 'cur_lane_num'
+ to get the minimum lane number
+ after executing successfully.
+ inc_lane_of_type: (WO) input HCCS type supported
+ increasing lane to increase the
+ used lane number of all specified
+ HCCS type ports on platform to
+ the full lane state.
+ ============================= ==== ================================
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb b/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb
index 2107082426da..e45ac2e865d5 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb
+++ b/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb
@@ -17,10 +17,10 @@ Description:
to overlay planes.
Selects the composition mode for the overlay. Possible values
- are
+ are:
- 0 - Alpha Blending
- 1 - ROP3
+ - 0 - Alpha Blending
+ - 1 - ROP3
What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_position
Date: May 2012
@@ -30,7 +30,7 @@ Description:
to overlay planes.
Stores the x,y overlay position on the display in pixels. The
- position format is `[0-9]+,[0-9]+'.
+ position format is `[0-9]+,[0-9]+`.
What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_rop3
Date: May 2012
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
new file mode 100644
index 000000000000..364b1ba41242
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
@@ -0,0 +1,114 @@
+What: /sys/devices/platform/soc@X/XXXXXXX.ipa/
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The /sys/devices/platform/soc@X/XXXXXXX.ipa/ directory
+ contains read-only attributes exposing information about
+ an IPA device. The X values could vary, but are typically
+ "soc@0/1e40000.ipa".
+
+What: .../XXXXXXX.ipa/version
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/version file contains the IPA hardware
+ version, as a period-separated set of two or three integers
+ (e.g., "3.5.1" or "4.2").
+
+What: .../XXXXXXX.ipa/feature/
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/feature/ directory contains a set of
+ attributes describing features implemented by the IPA
+ hardware.
+
+What: .../XXXXXXX.ipa/feature/rx_offload
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/feature/rx_offload file contains a
+ string indicating the type of receive checksum offload
+ that is supported by the hardware. The possible values
+ are "MAPv4" or "MAPv5".
+
+What: .../XXXXXXX.ipa/feature/tx_offload
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/feature/tx_offload file contains a
+ string indicating the type of transmit checksum offload
+ that is supported by the hardware. The possible values
+ are "MAPv4" or "MAPv5".
+
+What: .../XXXXXXX.ipa/endpoint_id/
+Date: July 2022
+KernelVersion: v5.19
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/endpoint_id/ directory contains
+ attributes that define IDs associated with IPA
+ endpoints. The "rx" or "tx" in an endpoint name is
+ from the perspective of the AP. An endpoint ID is a
+ small unsigned integer.
+
+What: .../XXXXXXX.ipa/endpoint_id/modem_rx
+Date: July 2022
+KernelVersion: v5.19
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/endpoint_id/modem_rx file contains
+ the ID of the AP endpoint on which packets originating
+ from the embedded modem are received.
+
+What: .../XXXXXXX.ipa/endpoint_id/modem_tx
+Date: July 2022
+KernelVersion: v5.19
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/endpoint_id/modem_tx file contains
+ the ID of the AP endpoint on which packets destined
+ for the embedded modem are sent.
+
+What: .../XXXXXXX.ipa/endpoint_id/monitor_rx
+Date: July 2022
+KernelVersion: v5.19
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/endpoint_id/monitor_rx file contains
+ the ID of the AP endpoint on which IPA "monitor" data is
+ received. The monitor endpoint supplies replicas of
+ packets that enter the IPA hardware for processing.
+ Each replicated packet is preceded by a fixed-size "ODL"
+ header (see .../XXXXXXX.ipa/feature/monitor, above).
+ Large packets are truncated, to reduce the bandwidth
+ required to provide the monitor function.
+
+What: .../XXXXXXX.ipa/modem/
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/modem/ directory contains attributes
+ describing properties of the modem embedded in the SoC.
+
+What: .../XXXXXXX.ipa/modem/rx_endpoint_id
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/modem/rx_endpoint_id file duplicates
+ the value found in .../XXXXXXX.ipa/endpoint_id/modem_rx.
+
+What: .../XXXXXXX.ipa/modem/tx_endpoint_id
+Date: June 2021
+KernelVersion: v5.14
+Contact: Alex Elder <elder@kernel.org>
+Description:
+ The .../XXXXXXX.ipa/modem/tx_endpoint_id file duplicates
+ the value found in .../XXXXXXX.ipa/endpoint_id/modem_tx.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-stratix10-rsu b/Documentation/ABI/testing/sysfs-devices-platform-stratix10-rsu
new file mode 100644
index 000000000000..ee253b033280
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-stratix10-rsu
@@ -0,0 +1,174 @@
+ Intel Stratix10 Remote System Update (RSU) device attributes
+
+What: /sys/devices/platform/stratix10-rsu.0/current_image
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the address in flash of currently running image.
+
+What: /sys/devices/platform/stratix10-rsu.0/fail_image
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the address in flash of failed image.
+
+What: /sys/devices/platform/stratix10-rsu.0/state
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the state of RSU system.
+ The state field has two parts: major error code in
+ upper 16 bits and minor error code in lower 16 bits.
+
+ b[15:0]
+ Currently used only when major error is 0xF006
+ (CPU watchdog timeout), in which case the minor
+ error code is the value reported by CPU to
+ firmware through the RSU notify command before
+ the watchdog timeout occurs.
+
+ b[31:16]
+ 0xF001 bitstream error
+ 0xF002 hardware access failure
+ 0xF003 bitstream corruption
+ 0xF004 internal error
+ 0xF005 device error
+ 0xF006 CPU watchdog timeout
+ 0xF007 internal unknown error
+
+What: /sys/devices/platform/stratix10-rsu.0/version
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the version number of RSU firmware. 19.3 or late
+ version includes information about the firmware which
+ reported the error.
+
+ pre 19.3:
+ b[31:0]
+ 0x0 version number
+
+ 19.3 or late:
+ b[15:0]
+ 0x1 version number
+ b[31:16]
+ 0x0 no error
+ 0x0DCF Decision CMF error
+ 0x0ACF Application CMF error
+
+What: /sys/devices/platform/stratix10-rsu.0/error_location
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the error offset inside the image that failed.
+
+What: /sys/devices/platform/stratix10-rsu.0/error_details
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) error code.
+
+What: /sys/devices/platform/stratix10-rsu.0/retry_counter
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) the current image's retry counter, which is used by
+ user to know how many times the images is still allowed
+ to reload itself before giving up and starting RSU
+ fail-over flow.
+
+What: /sys/devices/platform/stratix10-rsu.0/reboot_image
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (WO) the address in flash of image to be loaded on next
+ reboot command.
+
+What: /sys/devices/platform/stratix10-rsu.0/notify
+Date: August 2019
+KernelVersion: 5.4
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (WO) client to notify firmware with different actions.
+
+ b[15:0]
+ inform firmware the current software execution
+ stage.
+
+ == ===========================================
+ 0 the first stage bootloader didn't run or
+ didn't reach the point of launching second
+ stage bootloader.
+ 1 failed in second bootloader or didn't get
+ to the point of launching the operating
+ system.
+ 2 both first and second stage bootloader ran
+ and the operating system launch was
+ attempted.
+ == ===========================================
+
+ b[16]
+ == ===========================================
+ 1 firmware to reset current image retry
+ counter.
+ 0 no action.
+ == ===========================================
+
+ b[17]
+ == ===========================================
+ 1 firmware to clear RSU log
+ 0 no action.
+ == ===========================================
+
+ b[18]
+ this is negative logic
+
+ == ===========================================
+ 1 no action
+ 0 firmware record the notify code defined
+ in b[15:0].
+ == ===========================================
+
+What: /sys/devices/platform/stratix10-rsu.0/dcmf0
+Date: June 2020
+KernelVersion: 5.8
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) Decision firmware copy 0 version information.
+
+What: /sys/devices/platform/stratix10-rsu.0/dcmf1
+Date: June 2020
+KernelVersion: 5.8
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) Decision firmware copy 1 version information.
+
+What: /sys/devices/platform/stratix10-rsu.0/dcmf2
+Date: June 2020
+KernelVersion: 5.8
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) Decision firmware copy 2 version information.
+
+What: /sys/devices/platform/stratix10-rsu.0/dcmf3
+Date: June 2020
+KernelVersion: 5.8
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) Decision firmware copy 3 version information.
+
+What: /sys/devices/platform/stratix10-rsu.0/max_retry
+Date: June 2020
+KernelVersion: 5.8
+Contact: Richard Gong <richard.gong@linux.intel.com>
+Description:
+ (RO) max retry parameter is stored in the firmware
+ decision IO section, as a byte located at offset 0x18c.
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 80a00f7b6667..9bf7c8a267c5 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -1,6 +1,6 @@
What: /sys/devices/.../power/
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power directory contains attributes
allowing the user space to check and modify some power
@@ -8,7 +8,7 @@ Description:
What: /sys/devices/.../power/wakeup
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/wakeup attribute allows the user
space to check if the device is enabled to wake up the system
@@ -34,7 +34,7 @@ Description:
What: /sys/devices/.../power/control
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/control attribute allows the user
space to control the run-time power management of the device.
@@ -53,10 +53,10 @@ Description:
What: /sys/devices/.../power/async
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../async attribute allows the user space to
- enable or diasble the device's suspend and resume callbacks to
+ enable or disable the device's suspend and resume callbacks to
be executed asynchronously (ie. in separate threads, in parallel
with the main suspend/resume thread) during system-wide power
transitions (eg. suspend to RAM, hibernation).
@@ -79,7 +79,7 @@ Description:
What: /sys/devices/.../power/wakeup_count
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_count attribute contains the number
of signaled wakeup events associated with the device. This
@@ -90,7 +90,7 @@ Description:
What: /sys/devices/.../power/wakeup_active_count
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_active_count attribute contains the
number of times the processing of wakeup events associated with
@@ -102,7 +102,7 @@ Description:
What: /sys/devices/.../power/wakeup_abort_count
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_abort_count attribute contains the
number of times the processing of a wakeup event associated with
@@ -114,7 +114,7 @@ Description:
What: /sys/devices/.../power/wakeup_expire_count
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_expire_count attribute contains the
number of times a wakeup event associated with the device has
@@ -126,7 +126,7 @@ Description:
What: /sys/devices/.../power/wakeup_active
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_active attribute contains either 1,
or 0, depending on whether or not a wakeup event associated with
@@ -138,7 +138,7 @@ Description:
What: /sys/devices/.../power/wakeup_total_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_total_time_ms attribute contains
the total time of processing wakeup events associated with the
@@ -149,7 +149,7 @@ Description:
What: /sys/devices/.../power/wakeup_max_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_max_time_ms attribute contains
the maximum time of processing a single wakeup event associated
@@ -161,7 +161,7 @@ Description:
What: /sys/devices/.../power/wakeup_last_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_last_time_ms attribute contains
the value of the monotonic clock corresponding to the time of
@@ -173,7 +173,7 @@ Description:
What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
contains the total time the device has been preventing
@@ -203,7 +203,7 @@ Description:
What: /sys/devices/.../power/pm_qos_resume_latency_us
Date: March 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_resume_latency_us attribute
contains the PM QoS resume latency limit for the given device,
@@ -223,7 +223,7 @@ Description:
What: /sys/devices/.../power/pm_qos_latency_tolerance_us
Date: January 2014
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_latency_tolerance_us attribute
contains the PM QoS active state latency tolerance limit for the
@@ -248,7 +248,7 @@ Description:
What: /sys/devices/.../power/pm_qos_no_power_off
Date: September 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_no_power_off attribute
is used for manipulating the PM QoS "no power off" flag. If
@@ -260,3 +260,48 @@ Description:
This attribute has no effect on system-wide suspend/resume and
hibernation.
+
+What: /sys/devices/.../power/runtime_status
+Date: April 2010
+Contact: Rafael J. Wysocki <rafael@kernel.org>
+Description:
+ The /sys/devices/.../power/runtime_status attribute contains
+ the current runtime PM status of the device, which may be
+ "suspended", "suspending", "resuming", "active", "error" (fatal
+ error), or "unsupported" (runtime PM is disabled).
+
+What: /sys/devices/.../power/runtime_active_time
+Date: Jul 2010
+Contact: Arjan van de Ven <arjan@linux.intel.com>
+Description:
+ Reports the total time that the device has been active, in
+ milliseconds. Used for runtime PM statistics.
+
+What: /sys/devices/.../power/runtime_suspended_time
+Date: Jul 2010
+Contact: Arjan van de Ven <arjan@linux.intel.com>
+Description:
+ Reports total time that the device has been suspended, in
+ milliseconds. Used for runtime PM statistics.
+
+What: /sys/devices/.../power/runtime_usage
+Date: Apr 2010
+Contact: Dominik Brodowski <linux@dominikbrodowski.net>
+Description:
+ Reports the runtime PM usage count of a device.
+
+What: /sys/devices/.../power/runtime_enabled
+Date: Apr 2010
+Contact: Dominik Brodowski <linux@dominikbrodowski.net>
+Description:
+ Is runtime PM enabled for this device?
+ States are "enabled", "disabled", "forbidden" or a
+ combination of the latter two.
+
+What: /sys/devices/.../power/runtime_active_kids
+Date: Apr 2010
+Contact: Dominik Brodowski <linux@dominikbrodowski.net>
+Description:
+ Reports the runtime PM children usage count of a device, or
+ 0 if the children will be ignored.
+
diff --git a/Documentation/ABI/testing/sysfs-devices-removable b/Documentation/ABI/testing/sysfs-devices-removable
new file mode 100644
index 000000000000..754ecb4587ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-removable
@@ -0,0 +1,20 @@
+What: /sys/devices/.../removable
+Date: May 2021
+Contact: Rajat Jain <rajatxjain@gmail.com>
+Description:
+ Information about whether a given device can be removed from the
+ platform by the user. This is determined by its subsystem in a
+ bus / platform-specific way. This attribute is only present for
+ devices that can support determining such information:
+
+ =========== ===================================================
+ "removable" device can be removed from the platform by the user
+ "fixed" device is fixed to the platform / cannot be removed
+ by the user.
+ "unknown" The information is unavailable / cannot be deduced.
+ =========== ===================================================
+
+ Currently this is only supported by USB (which infers the
+ information from a combination of hub descriptor bits and
+ platform-specific data such as ACPI) and PCI (which gets this
+ from ACPI / device tree).
diff --git a/Documentation/ABI/testing/sysfs-devices-soc b/Documentation/ABI/testing/sysfs-devices-soc
index 6d9cc253f2b2..5269808ec35f 100644
--- a/Documentation/ABI/testing/sysfs-devices-soc
+++ b/Documentation/ABI/testing/sysfs-devices-soc
@@ -1,6 +1,6 @@
What: /sys/devices/socX
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
The /sys/devices/ directory contains a sub-directory for each
System-on-Chip (SoC) device on a running platform. Information
@@ -14,42 +14,79 @@ Description:
What: /sys/devices/socX/machine
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute common to all SoCs. Contains the SoC machine
name (e.g. Ux500).
What: /sys/devices/socX/family
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute common to all SoCs. Contains SoC family name
(e.g. DB8500).
+ On many of ARM based silicon with SMCCC v1.2+ compliant firmware
+ this will contain the JEDEC JEP106 manufacturer’s identification
+ code. The format is "jep106:XXYY" where XX is identity code and
+ YY is continuation code.
+
+ This manufacturer’s identification code is defined by one
+ or more eight (8) bit fields, each consisting of seven (7)
+ data bits plus one (1) odd parity bit. It is a single field,
+ limiting the possible number of vendors to 126. To expand
+ the maximum number of identification codes, a continuation
+ scheme has been defined.
+
+ The specified mechanism is that an identity code of 0x7F
+ represents the "continuation code" and implies the presence
+ of an additional identity code field, and this mechanism
+ may be extended to multiple continuation codes followed
+ by the manufacturer's identity code.
+
+ For example, ARM has identity code 0x7F 0x7F 0x7F 0x7F 0x3B,
+ which is code 0x3B on the fifth 'page'. This is shortened
+ as JEP106 identity code of 0x3B and a continuation code of
+ 0x4 to represent the four continuation codes preceding the
+ identity code.
+
+What: /sys/devices/socX/serial_number
+Date: January 2019
+contact: Bjorn Andersson <bjorn.andersson@linaro.org>
+Description:
+ Read-only attribute supported by most SoCs. Contains the SoC's
+ serial number, if available.
+
What: /sys/devices/socX/soc_id
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported by most SoCs. In the case of
ST-Ericsson's chips this contains the SoC serial number.
+ On many of ARM based silicon with SMCCC v1.2+ compliant firmware
+ this will contain the SOC ID appended to the family attribute
+ to ensure there is no conflict in this namespace across various
+ vendors. The format is "jep106:XXYY:ZZZZ" where XX is identity
+ code, YY is continuation code and ZZZZ is the SOC ID.
+
What: /sys/devices/socX/revision
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported by most SoCs. Contains the SoC's
manufacturing revision number.
What: /sys/devices/socX/process
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported ST-Ericsson's silicon. Contains the
the process by which the silicon chip was manufactured.
What: /sys/bus/soc
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
The /sys/bus/soc/ directory contains the usual sub-folders
expected under most buses. /sys/bus/soc/devices is of particular
diff --git a/Documentation/ABI/testing/sysfs-devices-software_node b/Documentation/ABI/testing/sysfs-devices-software_node
new file mode 100644
index 000000000000..85df37de359f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-software_node
@@ -0,0 +1,10 @@
+What: /sys/devices/.../software_node/
+Date: January 2019
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ This directory contains the details about the device that are
+ assigned in kernel (i.e. software), as opposed to the
+ firmware_node directory which contains the details that are
+ assigned for the device in firmware. The main attributes in the
+ directory will show the properties the device has, and the
+ relationship it has to some of the other devices.
diff --git a/Documentation/ABI/testing/sysfs-devices-state_synced b/Documentation/ABI/testing/sysfs-devices-state_synced
new file mode 100644
index 000000000000..c64636ddac41
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-state_synced
@@ -0,0 +1,29 @@
+What: /sys/devices/.../state_synced
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ The /sys/devices/.../state_synced attribute is only present for
+ devices whose bus types or driver provides the .sync_state()
+ callback. The number read from it (0 or 1) reflects the value
+ of the device's 'state_synced' field. A value of 0 means the
+ .sync_state() callback hasn't been called yet. A value of 1
+ means the .sync_state() callback has been called.
+
+ Generally, if a device has sync_state() support and has some of
+ the resources it provides enabled at the time the kernel starts
+ (Eg: enabled by hardware reset or bootloader or anything that
+ run before the kernel starts), then it'll keep those resources
+ enabled and in a state that's compatible with the state they
+ were in at the start of the kernel. The device will stop doing
+ this only when the sync_state() callback has been called --
+ which happens only when all its consumer devices are registered
+ and have probed successfully. Resources that were left disabled
+ at the time the kernel starts are not affected or limited in
+ any way by sync_state() callbacks.
+
+ Writing "1" to this file will force a call to the device's
+ sync_state() function if it hasn't been called already. The
+ sync_state() call happens independent of the state of the
+ consumer devices.
+
+
diff --git a/Documentation/ABI/testing/sysfs-devices-supplier b/Documentation/ABI/testing/sysfs-devices-supplier
new file mode 100644
index 000000000000..207f5972e98d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-supplier
@@ -0,0 +1,9 @@
+What: /sys/devices/.../supplier:<supplier>
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ The /sys/devices/.../supplier:<supplier> are symlinks to device
+ links where this device is the consumer. <supplier> denotes the
+ name of the supplier in that device link and is of the form
+ bus:device name. There can be zero or more of these symlinks
+ for a given device.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 73318225a368..8aed6d94c4cd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -7,7 +7,7 @@ Description:
Individual CPU attributes are contained in subdirectories
named by the kernel's logical CPU number, e.g.:
- /sys/devices/system/cpu/cpu#/
+ /sys/devices/system/cpu/cpuX/
What: /sys/devices/system/cpu/kernel_max
/sys/devices/system/cpu/offline
@@ -34,7 +34,7 @@ Description: CPU topology files that describe kernel limits related to
present: cpus that have been identified as being present in
the system.
- See Documentation/cputopology.txt for more information.
+ See Documentation/admin-guide/cputopology.rst for more information.
What: /sys/devices/system/cpu/probe
@@ -50,10 +50,10 @@ Description: Dynamic addition and removal of CPU's. This is not hotplug
architecture specific.
release: writes to this file dynamically remove a CPU from
- the system. Information writtento the file to remove CPU's
+ the system. Information written to the file to remove CPU's
is architecture specific.
-What: /sys/devices/system/cpu/cpu#/node
+What: /sys/devices/system/cpu/cpuX/node
Date: October 2009
Contact: Linux memory management mailing list <linux-mm@kvack.org>
Description: Discover NUMA node a CPU belongs to
@@ -67,49 +67,51 @@ Description: Discover NUMA node a CPU belongs to
/sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
-What: /sys/devices/system/cpu/cpu#/topology/core_id
- /sys/devices/system/cpu/cpu#/topology/core_siblings
- /sys/devices/system/cpu/cpu#/topology/core_siblings_list
- /sys/devices/system/cpu/cpu#/topology/physical_package_id
- /sys/devices/system/cpu/cpu#/topology/thread_siblings
- /sys/devices/system/cpu/cpu#/topology/thread_siblings_list
+What: /sys/devices/system/cpu/cpuX/topology/core_siblings
+ /sys/devices/system/cpu/cpuX/topology/core_siblings_list
+ /sys/devices/system/cpu/cpuX/topology/physical_package_id
+ /sys/devices/system/cpu/cpuX/topology/thread_siblings
+ /sys/devices/system/cpu/cpuX/topology/thread_siblings_list
+ /sys/devices/system/cpu/cpuX/topology/ppin
Date: December 2008
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: CPU topology files that describe a logical CPU's relationship
to other cores and threads in the same physical package.
- One cpu# directory is created per logical CPU in the system,
+ One cpuX directory is created per logical CPU in the system,
e.g. /sys/devices/system/cpu/cpu42/.
Briefly, the files above are:
- core_id: the CPU core ID of cpu#. Typically it is the
- hardware platform's identifier (rather than the kernel's).
- The actual value is architecture and platform dependent.
-
- core_siblings: internal kernel map of cpu#'s hardware threads
+ core_siblings: internal kernel map of cpuX's hardware threads
within the same physical_package_id.
core_siblings_list: human-readable list of the logical CPU
- numbers within the same physical_package_id as cpu#.
+ numbers within the same physical_package_id as cpuX.
- physical_package_id: physical package id of cpu#. Typically
+ physical_package_id: physical package id of cpuX. Typically
corresponds to a physical socket number, but the actual value
is architecture and platform dependent.
- thread_siblings: internel kernel map of cpu#'s hardware
- threads within the same core as cpu#
+ thread_siblings: internal kernel map of cpuX's hardware
+ threads within the same core as cpuX
- thread_siblings_list: human-readable list of cpu#'s hardware
- threads within the same core as cpu#
+ thread_siblings_list: human-readable list of cpuX's hardware
+ threads within the same core as cpuX
- See Documentation/cputopology.txt for more information.
+ ppin: human-readable Protected Processor Identification
+ Number of the socket the cpu# belongs to. There should be
+ one per physical_package_id. File is readable only to
+ admin.
+ See Documentation/admin-guide/cputopology.rst for more information.
-What: /sys/devices/system/cpu/cpuidle/current_driver
- /sys/devices/system/cpu/cpuidle/current_governer_ro
- /sys/devices/system/cpu/cpuidle/available_governors
+
+What: /sys/devices/system/cpu/cpuidle/available_governors
+ /sys/devices/system/cpu/cpuidle/current_driver
/sys/devices/system/cpu/cpuidle/current_governor
+ /sys/devices/system/cpu/cpuidle/current_governer_ro
+ /sys/devices/system/cpu/cpuidle/intel_c1_demotion
Date: September 2007
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Discover cpuidle policy and mechanism
@@ -119,32 +121,33 @@ Description: Discover cpuidle policy and mechanism
consumption during idle.
Idle policy (governor) is differentiated from idle mechanism
- (driver)
-
- current_driver: (RO) displays current idle mechanism
-
- current_governor_ro: (RO) displays current idle policy
-
- With the cpuidle_sysfs_switch boot option enabled (meant for
- developer testing), the following three attributes are visible
- instead:
-
- current_driver: same as described above
+ (driver).
available_governors: (RO) displays a space separated list of
- available governors
+ available governors.
+
+ current_driver: (RO) displays current idle mechanism.
current_governor: (RW) displays current idle policy. Users can
switch the governor at runtime by writing to this file.
- See files in Documentation/cpuidle/ for more information.
+ current_governor_ro: (RO) displays current idle policy.
+
+ intel_c1_demotion: (RW) enables/disables the C1 demotion
+ feature on Intel CPUs.
+ See Documentation/admin-guide/pm/cpuidle.rst,
+ Documentation/admin-guide/pm/intel_idle.rst, and
+ Documentation/driver-api/pm/cpuidle.rst for more information.
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/name
/sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
/sys/devices/system/cpu/cpuX/cpuidle/stateN/power
/sys/devices/system/cpu/cpuX/cpuidle/stateN/time
/sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/above
+ /sys/devices/system/cpu/cpuX/cpuidle/stateN/below
Date: September 2007
KernelVersion: v2.6.24
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -154,20 +157,30 @@ Description:
The processor idle states which are available for use have the
following attributes:
- name: (RO) Name of the idle state (string).
+ ======== ==== =================================================
+ name: (RO) Name of the idle state (string).
latency: (RO) The latency to exit out of this idle state (in
- microseconds).
+ microseconds).
+
+ power: (RO) The power consumed while in this idle state (in
+ milliwatts).
- power: (RO) The power consumed while in this idle state (in
- milliwatts).
+ time: (RO) The total time spent in this idle state
+ (in microseconds).
- time: (RO) The total time spent in this idle state (in microseconds).
+ usage: (RO) Number of times this state was entered (a count).
- usage: (RO) Number of times this state was entered (a count).
+ above: (RO) Number of times this state was entered, but the
+ observed CPU idle duration was too short for it
+ (a count).
+ below: (RO) Number of times this state was entered, but the
+ observed CPU idle duration was too long for it
+ (a count).
+ ======== ==== =================================================
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/desc
Date: February 2008
KernelVersion: v2.6.25
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -175,7 +188,7 @@ Description:
(RO) A small description about the idle state (string).
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/disable
Date: March 2012
KernelVersion: v3.10
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -188,8 +201,14 @@ Description:
does not reflect it. Likewise, if one enables a deep state but a
lighter state still is disabled, then this has no effect.
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/default_status
+Date: December 2019
+KernelVersion: v5.6
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description:
+ (RO) The default status of this state, "enabled" or "disabled".
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/residency
Date: March 2014
KernelVersion: v3.15
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -198,7 +217,7 @@ Description:
time (in microseconds) this cpu should spend in this idle state
to make the transition worth the effort.
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/s2idle/
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/s2idle/
Date: March 2018
KernelVersion: v4.17
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -208,7 +227,7 @@ Description:
This attribute group is only present for states that can be
used in suspend-to-idle with suspended timekeeping.
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/s2idle/time
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/s2idle/time
Date: March 2018
KernelVersion: v4.17
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -216,7 +235,7 @@ Description:
Total time spent by the CPU in suspend-to-idle (with scheduler
tick suspended) after requesting this state.
-What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/s2idle/usage
+What: /sys/devices/system/cpu/cpuX/cpuidle/state<N>/s2idle/usage
Date: March 2018
KernelVersion: v4.17
Contact: Linux power management list <linux-pm@vger.kernel.org>
@@ -224,7 +243,7 @@ Description:
Total number of times this state has been requested by the CPU
while entering suspend-to-idle.
-What: /sys/devices/system/cpu/cpu#/cpufreq/*
+What: /sys/devices/system/cpu/cpuX/cpufreq/*
Date: pre-git history
Contact: linux-pm@vger.kernel.org
Description: Discover and change clock speed of CPUs
@@ -239,7 +258,7 @@ Description: Discover and change clock speed of CPUs
See files in Documentation/cpu-freq/ for more information.
-What: /sys/devices/system/cpu/cpu#/cpufreq/freqdomain_cpus
+What: /sys/devices/system/cpu/cpuX/cpufreq/freqdomain_cpus
Date: June 2013
Contact: linux-pm@vger.kernel.org
Description: Discover CPUs in the same CPU frequency coordination domain
@@ -251,7 +270,62 @@ Description: Discover CPUs in the same CPU frequency coordination domain
attribute is useful for user space DVFS controllers to get better
power/performance results for platforms using acpi-cpufreq.
- This file is only present if the acpi-cpufreq driver is in use.
+ This file is only present if the acpi-cpufreq or the cppc-cpufreq
+ drivers are in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_select
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous selection enable
+
+ Read/write interface to control autonomous selection enable
+ Read returns autonomous selection status:
+ 0: autonomous selection is disabled
+ 1: autonomous selection is enabled
+
+ Write 'y' or '1' or 'on' to enable autonomous selection.
+ Write 'n' or '0' or 'off' to disable autonomous selection.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_act_window
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous activity window
+
+ This file indicates a moving utilization sensitivity window to
+ the platform's autonomous selection policy.
+
+ Read/write an integer represents autonomous activity window (in
+ microseconds) from/to this file. The max value to write is
+ 1270000000 but the max significand is 127. This means that if 128
+ is written to this file, 127 will be stored. If the value is
+ greater than 130, only the first two digits will be saved as
+ significand.
+
+ Writing a zero value to this file enable the platform to
+ determine an appropriate Activity Window depending on the workload.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/energy_performance_preference_val
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Energy performance preference
+
+ Read/write an 8-bit integer from/to this file. This file
+ represents a range of values from 0 (performance preference) to
+ 0xFF (energy efficiency preference) that influences the rate of
+ performance increase/decrease and the result of the hardware's
+ energy efficiency and performance optimization policies.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
@@ -266,12 +340,12 @@ Description: Disable L3 cache indices
on a processor with this functionality will return the currently
disabled index for that node. There is one L3 structure per
node, or per internal node on MCM machines. Writing a valid
- index to one of these files will cause the specificed cache
+ index to one of these files will cause the specified cache
index to be disabled.
All AMD processors with L3 caches provide this functionality.
For details, see BKDGs at
- http://developer.amd.com/documentation/guides/Pages/default.aspx
+ https://www.amd.com/en/support/tech-docs?keyword=bios+kernel
What: /sys/devices/system/cpu/cpufreq/boost
@@ -281,21 +355,22 @@ Description: Processor frequency boosting control
This switch controls the boost setting for the whole system.
Boosting allows the CPU and the firmware to run at a frequency
- beyound it's nominal limit.
+ beyond its nominal limit.
+
More details can be found in
Documentation/admin-guide/pm/cpufreq.rst
-What: /sys/devices/system/cpu/cpu#/crash_notes
- /sys/devices/system/cpu/cpu#/crash_notes_size
+What: /sys/devices/system/cpu/cpuX/crash_notes
+ /sys/devices/system/cpu/cpuX/crash_notes_size
Date: April 2013
Contact: kexec@lists.infradead.org
Description: address and size of the percpu note.
crash_notes: the physical address of the memory that holds the
- note of cpu#.
+ note of cpuX.
- crash_notes_size: size of the note of cpu#.
+ crash_notes_size: size of the note of cpuX.
What: /sys/devices/system/cpu/intel_pstate/max_perf_pct
@@ -329,43 +404,54 @@ Contact: Sudeep Holla <sudeep.holla@arm.com>
Description: Parameters for the CPU cache attributes
allocation_policy:
- - WriteAllocate: allocate a memory location to a cache line
- on a cache miss because of a write
- - ReadAllocate: allocate a memory location to a cache line
+ - WriteAllocate:
+ allocate a memory location to a cache line
+ on a cache miss because of a write
+ - ReadAllocate:
+ allocate a memory location to a cache line
on a cache miss because of a read
- - ReadWriteAllocate: both writeallocate and readallocate
-
- attributes: LEGACY used only on IA64 and is same as write_policy
+ - ReadWriteAllocate:
+ both writeallocate and readallocate
- coherency_line_size: the minimum amount of data in bytes that gets
+ coherency_line_size:
+ the minimum amount of data in bytes that gets
transferred from memory to cache
- level: the cache hierarchy in the multi-level cache configuration
+ level:
+ the cache hierarchy in the multi-level cache configuration
- number_of_sets: total number of sets in the cache, a set is a
+ number_of_sets:
+ total number of sets in the cache, a set is a
collection of cache lines with the same cache index
- physical_line_partition: number of physical cache line per cache tag
+ physical_line_partition:
+ number of physical cache line per cache tag
- shared_cpu_list: the list of logical cpus sharing the cache
+ shared_cpu_list:
+ the list of logical cpus sharing the cache
- shared_cpu_map: logical cpu mask containing the list of cpus sharing
+ shared_cpu_map:
+ logical cpu mask containing the list of cpus sharing
the cache
- size: the total cache size in kB
+ size:
+ the total cache size in kB
type:
- Instruction: cache that only holds instructions
- Data: cache that only caches data
- Unified: cache that holds both data and instructions
- ways_of_associativity: degree of freedom in placing a particular block
- of memory in the cache
+ ways_of_associativity:
+ degree of freedom in placing a particular block
+ of memory in the cache
write_policy:
- - WriteThrough: data is written to both the cache line
+ - WriteThrough:
+ data is written to both the cache line
and to the block in the lower-level memory
- - WriteBack: data is written only to the cache line and
+ - WriteBack:
+ data is written only to the cache line and
the modified cache line is written to main
memory only when it is replaced
@@ -396,7 +482,7 @@ What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
- Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
@@ -406,30 +492,30 @@ Description: POWERNV CPUFreq driver's frequency throttle stats directory and
throttle attributes exported in the 'throttle_stats' directory:
- turbo_stat : This file gives the total number of times the max
- frequency is throttled to lower frequency in turbo (at and above
- nominal frequency) range of frequencies.
+ frequency is throttled to lower frequency in turbo (at and above
+ nominal frequency) range of frequencies.
- sub_turbo_stat : This file gives the total number of times the
- max frequency is throttled to lower frequency in sub-turbo(below
- nominal frequency) range of frequencies.
+ max frequency is throttled to lower frequency in sub-turbo(below
+ nominal frequency) range of frequencies.
- unthrottle : This file gives the total number of times the max
- frequency is unthrottled after being throttled.
+ frequency is unthrottled after being throttled.
- powercap : This file gives the total number of times the max
- frequency is throttled due to 'Power Capping'.
+ frequency is throttled due to 'Power Capping'.
- overtemp : This file gives the total number of times the max
- frequency is throttled due to 'CPU Over Temperature'.
+ frequency is throttled due to 'CPU Over Temperature'.
- supply_fault : This file gives the total number of times the
- max frequency is throttled due to 'Power Supply Failure'.
+ max frequency is throttled due to 'Power Supply Failure'.
- overcurrent : This file gives the total number of times the
- max frequency is throttled due to 'Overcurrent'.
+ max frequency is throttled due to 'Overcurrent'.
- occ_reset : This file gives the total number of times the max
- frequency is throttled due to 'OCC Reset'.
+ frequency is throttled due to 'OCC Reset'.
The sysfs attributes representing different throttle reasons like
powercap, overtemp, supply_fault, overcurrent and occ_reset map to
@@ -446,7 +532,7 @@ What: /sys/devices/system/cpu/cpufreq/policyX/throttle_stats
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
- Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
@@ -458,25 +544,49 @@ What: /sys/devices/system/cpu/cpuX/regs/
/sys/devices/system/cpu/cpuX/regs/identification/
/sys/devices/system/cpu/cpuX/regs/identification/midr_el1
/sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
+ /sys/devices/system/cpu/cpuX/regs/identification/aidr_el1
+ /sys/devices/system/cpu/cpuX/regs/identification/smidr_el1
Date: June 2016
Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
Description: AArch64 CPU registers
+
'identification' directory exposes the CPU ID registers for
- identifying model and revision of the CPU.
+ identifying model and revision of the CPU and SMCU.
+
+What: /sys/devices/system/cpu/aarch32_el0
+Date: May 2021
+Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description: Identifies the subset of CPUs in the system that can execute
+ AArch32 (32-bit ARM) applications. If present, the same format as
+ /sys/devices/system/cpu/{offline,online,possible,present} is used.
+ If absent, then all or none of the CPUs can execute AArch32
+ applications and execve() will behave accordingly.
-What: /sys/devices/system/cpu/cpu#/cpu_capacity
+What: /sys/devices/system/cpu/cpuX/cpu_capacity
Date: December 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: information about CPUs heterogeneity.
- cpu_capacity: capacity of cpu#.
+ cpu_capacity: capacity of cpuX.
What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
+ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ /sys/devices/system/cpu/vulnerabilities/l1tf
+ /sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/old_microcode
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
+ /sys/devices/system/cpu/vulnerabilities/retbleed
+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
- /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
- /sys/devices/system/cpu/vulnerabilities/l1tf
+ /sys/devices/system/cpu/vulnerabilities/srbds
+ /sys/devices/system/cpu/vulnerabilities/tsa
+ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ /sys/devices/system/cpu/vulnerabilities/vmscape
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
@@ -485,29 +595,185 @@ Description: Information about CPU vulnerabilities
vulnerabilities. The output of those files reflects the
state of the CPUs in the system. Possible output values:
+ ================ ==============================================
"Not affected" CPU is not affected by the vulnerability
"Vulnerable" CPU is affected and no mitigation in effect
"Mitigation: $M" CPU is affected and mitigation $M is in effect
+ ================ ==============================================
- Details about the l1tf file can be found in
- Documentation/admin-guide/l1tf.rst
+ See also: Documentation/admin-guide/hw-vuln/index.rst
What: /sys/devices/system/cpu/smt
/sys/devices/system/cpu/smt/active
/sys/devices/system/cpu/smt/control
Date: June 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
-Description: Control Symetric Multi Threading (SMT)
+Description: Control Symmetric Multi Threading (SMT)
active: Tells whether SMT is active (enabled and siblings online)
control: Read/write interface to control SMT. Possible
values:
- "on" SMT is enabled
- "off" SMT is disabled
- "forceoff" SMT is force disabled. Cannot be changed.
- "notsupported" SMT is not supported by the CPU
+ ================ =========================================
+ "on" SMT is enabled
+ "off" SMT is disabled
+ "<N>" SMT is enabled with N threads per core.
+ "forceoff" SMT is force disabled. Cannot be changed.
+ "notsupported" SMT is not supported by the CPU
+ "notimplemented" SMT runtime toggling is not
+ implemented for the architecture
+ ================ =========================================
If control status is "forceoff" or "notsupported" writes
- are rejected.
+ are rejected. Note that enabling SMT on PowerPC skips
+ offline cores.
+
+What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias
+Date: March 2019
+Contact: linux-pm@vger.kernel.org
+Description: Intel Energy and Performance Bias Hint (EPB)
+
+ EPB for the given CPU in a sliding scale 0 - 15, where a value
+ of 0 corresponds to a hint preference for highest performance
+ and a value of 15 corresponds to the maximum energy savings.
+
+ In order to change the EPB value for the CPU, write either
+ a number in the 0 - 15 sliding scale above, or one of the
+ strings: "performance", "balance-performance", "normal",
+ "balance-power", "power" (that represent values reflected by
+ their meaning), to this attribute.
+
+ This attribute is present for all online CPUs supporting the
+ Intel EPB feature.
+
+What: /sys/devices/system/cpu/umwait_control
+ /sys/devices/system/cpu/umwait_control/enable_c02
+ /sys/devices/system/cpu/umwait_control/max_time
+Date: May 2019
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Umwait control
+
+ enable_c02: Read/write interface to control umwait C0.2 state
+ Read returns C0.2 state status:
+ 0: C0.2 is disabled
+ 1: C0.2 is enabled
+
+ Write 'y' or '1' or 'on' to enable C0.2 state.
+ Write 'n' or '0' or 'off' to disable C0.2 state.
+
+ The interface is case insensitive.
+
+ max_time: Read/write interface to control umwait maximum time
+ in TSC-quanta that the CPU can reside in either C0.1
+ or C0.2 state. The time is an unsigned 32-bit number.
+ Note that a value of zero means there is no limit.
+ Low order two bits must be zero.
+
+What: /sys/devices/system/cpu/sev
+ /sys/devices/system/cpu/sev/vmpl
+Date: May 2024
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Secure Encrypted Virtualization (SEV) information
+
+ This directory is only present when running as an SEV-SNP guest.
+
+ vmpl: Reports the Virtual Machine Privilege Level (VMPL) at which
+ the SEV-SNP guest is running.
+
+
+What: /sys/devices/system/cpu/svm
+Date: August 2019
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Secure Virtual Machine
+
+ If 1, it means the system is using the Protected Execution
+ Facility in POWER9 and newer processors. i.e., it is a Secure
+ Virtual Machine.
+
+What: /sys/devices/system/cpu/cpuX/purr
+Date: Apr 2005
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: PURR ticks for this CPU since the system boot.
+
+ The Processor Utilization Resources Register (PURR) is
+ a 64-bit counter which provides an estimate of the
+ resources used by the CPU thread. The contents of this
+ register increases monotonically. This sysfs interface
+ exposes the number of PURR ticks for cpuX.
+
+What: /sys/devices/system/cpu/cpuX/spurr
+Date: Dec 2006
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: SPURR ticks for this CPU since the system boot.
+
+ The Scaled Processor Utilization Resources Register
+ (SPURR) is a 64-bit counter that provides a frequency
+ invariant estimate of the resources used by the CPU
+ thread. The contents of this register increases
+ monotonically. This sysfs interface exposes the number
+ of SPURR ticks for cpuX.
+
+What: /sys/devices/system/cpu/cpuX/idle_purr
+Date: Apr 2020
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: PURR ticks for cpuX when it was idle.
+
+ This sysfs interface exposes the number of PURR ticks
+ for cpuX when it was idle.
+
+What: /sys/devices/system/cpu/cpuX/idle_spurr
+Date: Apr 2020
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: SPURR ticks for cpuX when it was idle.
+
+ This sysfs interface exposes the number of SPURR ticks
+ for cpuX when it was idle.
+
+What: /sys/devices/system/cpu/cpuX/mte_tcf_preferred
+Date: July 2021
+Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description: Preferred MTE tag checking mode
+
+ When a user program specifies more than one MTE tag checking
+ mode, this sysfs node is used to specify which mode should
+ be preferred when scheduling a task on that CPU. Possible
+ values:
+
+ ================ ==============================================
+ "sync" Prefer synchronous mode
+ "asymm" Prefer asymmetric mode
+ "async" Prefer asynchronous mode
+ ================ ==============================================
+
+ See also: Documentation/arch/arm64/memory-tagging-extension.rst
+
+What: /sys/devices/system/cpu/nohz_full
+Date: Apr 2015
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) the list of CPUs that are in nohz_full mode.
+ These CPUs are set by boot parameter "nohz_full=".
+
+What: /sys/devices/system/cpu/isolated
+Date: Apr 2015
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) the list of CPUs that are isolated and don't
+ participate in load balancing. These CPUs are set by
+ boot parameter "isolcpus=".
+
+What: /sys/devices/system/cpu/crash_hotplug
+Date: Aug 2023
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) indicates whether or not the kernel updates relevant kexec
+ segments on memory hot un/plug and/or on/offline events, avoiding the
+ need to reload kdump kernel.
+
+What: /sys/devices/system/cpu/enabled
+Date: Nov 2022
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) the list of CPUs that can be brought online.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
index 470def06ab0a..1a8ee26e92ae 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
+++ b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
@@ -5,8 +5,10 @@ Contact: Vernon Mauery <vernux@us.ibm.com>
Description: The state file allows a means by which to change in and
out of Premium Real-Time Mode (PRTM), as well as the
ability to query the current state.
- 0 => PRTM off
- 1 => PRTM enabled
+
+ - 0 => PRTM off
+ - 1 => PRTM enabled
+
Users: The ibm-prtm userspace daemon uses this interface.
diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev b/Documentation/ABI/testing/sysfs-devices-vfio-dev
new file mode 100644
index 000000000000..e21424fd9666
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
@@ -0,0 +1,8 @@
+What: /sys/.../<device>/vfio-dev/vfioX/
+Date: September 2022
+Contact: Yi Liu <yi.l.liu@intel.com>
+Description:
+ This directory is created when the device is bound to a
+ vfio driver. The layout under this directory matches what
+ exists for a standard 'struct device'. 'X' is a unique
+ index marking this device in vfio.
diff --git a/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest b/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest
new file mode 100644
index 000000000000..8fca56c8c9df
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest
@@ -0,0 +1,63 @@
+What: /sys/devices/virtual/misc/tdx_guest/measurements/MRNAME[:HASH]
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ Value of a TDX measurement register (MR). MRNAME and HASH above
+ are placeholders. The optional suffix :HASH is used for MRs
+ that have associated hash algorithms. See below for a complete
+ list of TDX MRs exposed via sysfs. Refer to Intel TDX Module
+ ABI Specification for the definition of TDREPORT and the full
+ list of TDX measurements.
+
+ Intel TDX Module ABI Specification can be found at:
+ https://www.intel.com/content/www/us/en/developer/tools/trust-domain-extensions/documentation.html#architecture
+
+ See also:
+ https://docs.kernel.org/driver-api/coco/measurement-registers.html
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrconfigid
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MRCONFIGID - 48-byte immutable storage typically used for
+ software-defined ID for non-owner-defined configuration of the
+ guest TD – e.g., run-time or OS configuration.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrowner
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MROWNER - 48-byte immutable storage typically used for
+ software-defined ID for the guest TD’s owner.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrownerconfig
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MROWNERCONFIG - 48-byte immutable storage typically used
+ for software-defined ID for owner-defined configuration of the
+ guest TD – e.g., specific to the workload rather than the
+ run-time or OS.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrtd:sha384
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MRTD - Measurement of the initial contents of the TD.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/rtmr[0123]:sha384
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RW) RTMR[0123] - 4 Run-Time extendable Measurement Registers.
+ Read from any of these returns the current value of the
+ corresponding RTMR. Write extends the written buffer to the
+ RTMR. All writes must start at offset 0 and be 48 bytes in
+ size. Partial writes will result in EINVAL returned by the
+ write() syscall.
diff --git a/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier b/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier
new file mode 100644
index 000000000000..59d073d20db6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier
@@ -0,0 +1,17 @@
+What: /sys/devices/.../waiting_for_supplier
+Date: May 2020
+Contact: Saravana Kannan <saravanak@google.com>
+Description:
+ The /sys/devices/.../waiting_for_supplier attribute is only
+ present when fw_devlink kernel command line option is enabled
+ and is set to something stricter than "permissive". It is
+ removed once a device probes successfully (because the
+ information is no longer relevant). The number read from it (0
+ or 1) reflects whether the device is waiting for one or more
+ suppliers to be added and then linked to using device links
+ before the device can probe.
+
+ A value of 0 means the device is not waiting for any suppliers
+ to be added before it can probe. A value of 1 means the device
+ is waiting for one or more suppliers to be added before it can
+ probe.
diff --git a/Documentation/ABI/testing/sysfs-devices-xenbus b/Documentation/ABI/testing/sysfs-devices-xenbus
new file mode 100644
index 000000000000..fd796cb4f315
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-xenbus
@@ -0,0 +1,41 @@
+What: /sys/devices/*/xenbus/event_channels
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Number of Xen event channels associated with a kernel based
+ paravirtualized device frontend or backend.
+
+What: /sys/devices/*/xenbus/events
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Total number of Xen events received for a Xen pv device
+ frontend or backend.
+
+What: /sys/devices/*/xenbus/jiffies_eoi_delayed
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Summed up time in jiffies the EOI of an interrupt for a Xen
+ pv device has been delayed in order to avoid stalls due to
+ event storms. This value rising is a first sign for a rogue
+ other end of the pv device.
+
+What: /sys/devices/*/xenbus/spurious_events
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Number of events received for a Xen pv device which did not
+ require any action. Too many spurious events in a row will
+ trigger delayed EOI processing.
+
+What: /sys/devices/*/xenbus/spurious_threshold
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Controls the tolerated number of subsequent spurious events
+ before delayed EOI processing is triggered for a Xen pv
+ device. Default is 1. This can be modified in case the other
+ end of the pv device is issuing spurious events on a regular
+ basis and is known not to be malicious on purpose. Raising
+ the value for such cases can improve pv device performance.
diff --git a/Documentation/ABI/testing/sysfs-driver-altera-cvp b/Documentation/ABI/testing/sysfs-driver-altera-cvp
index 8cde64a71edb..fbd8078fd7ad 100644
--- a/Documentation/ABI/testing/sysfs-driver-altera-cvp
+++ b/Documentation/ABI/testing/sysfs-driver-altera-cvp
@@ -1,6 +1,6 @@
What: /sys/bus/pci/drivers/altera-cvp/chkcfg
Date: May 2017
-Kernel Version: 4.13
+KernelVersion: 4.13
Contact: Anatolij Gustschin <agust@denx.de>
Description:
Contains either 1 or 0 and controls if configuration
diff --git a/Documentation/ABI/testing/sysfs-driver-amd-sfh b/Documentation/ABI/testing/sysfs-driver-amd-sfh
new file mode 100644
index 000000000000..c053126a83bb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-amd-sfh
@@ -0,0 +1,13 @@
+What: /sys/bus/pci/drivers/pcie_mp2_amd/*/hpd
+Date: April 2025
+Contact: mario.limonciello@amd.com
+Description:
+ Human presence detection (HPD) enable/disable.
+ When HPD is enabled, the device will be able to detect the
+ presence of a human and will send an interrupt that can be
+ used to wake the system from a low power state.
+ When HPD is disabled, the device will not be able to detect
+ the presence of a human.
+
+ Access: Read/Write
+ Valid values: enabled/disabled
diff --git a/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing
new file mode 100644
index 000000000000..910df0e5815a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing
@@ -0,0 +1,27 @@
+What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/uart\*
+Date: September 2021
+Contact: Oskar Senft <osk@google.com>
+ Chia-Wei Wang <chiawei_wang@aspeedtech.com>
+Description: Selects the RX source of the UARTx device.
+
+ When read, each file shows the list of available options with currently
+ selected option marked by brackets "[]". The list of available options
+ depends on the selected file.
+
+ e.g.
+ cat /sys/bus/platform/drivers/aspeed-uart-routing/\*.uart_routing/uart1
+ [io1] io2 io3 io4 uart2 uart3 uart4 io6
+
+ In this case, UART1 gets its input from IO1 (physical serial port 1).
+
+Users: OpenBMC. Proposed changes should be mailed to
+ openbmc@lists.ozlabs.org
+
+What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/io\*
+Date: September 2021
+Contact: Oskar Senft <osk@google.com>
+ Chia-Wei Wang <chiawei_wang@aspeedtech.com>
+Description: Selects the RX source of IOx serial port. The current selection
+ will be marked by brackets "[]".
+Users: OpenBMC. Proposed changes should be mailed to
+ openbmc@lists.ozlabs.org
diff --git a/Documentation/ABI/testing/sysfs-driver-bd9571mwv-regulator b/Documentation/ABI/testing/sysfs-driver-bd9571mwv-regulator
index 4d63a7904b94..90596d8bb51c 100644
--- a/Documentation/ABI/testing/sysfs-driver-bd9571mwv-regulator
+++ b/Documentation/ABI/testing/sysfs-driver-bd9571mwv-regulator
@@ -6,11 +6,13 @@ Description: Read/write the current state of DDR Backup Mode, which controls
if DDR power rails will be kept powered during system suspend.
("on"/"1" = enabled, "off"/"0" = disabled).
Two types of power switches (or control signals) can be used:
+
A. With a momentary power switch (or pulse signal), DDR
Backup Mode is enabled by default when available, as the
PMIC will be configured only during system suspend.
B. With a toggle power switch (or level signal), the
following steps must be followed exactly:
+
1. Configure PMIC for backup mode, to change the role of
the accessory power switch from a power switch to a
wake-up switch,
@@ -20,8 +22,10 @@ Description: Read/write the current state of DDR Backup Mode, which controls
3. Suspend system,
4. Switch accessory power switch on, to resume the
system.
+
DDR Backup Mode must be explicitly enabled by the user,
to invoke step 1.
- See also Documentation/devicetree/bindings/mfd/bd9571mwv.txt.
+
+ See also Documentation/devicetree/bindings/mfd/rohm,bd9571mwv.yaml.
Users: User space applications for embedded boards equipped with a
BD9571MWV PMIC.
diff --git a/Documentation/ABI/testing/sysfs-driver-ccp b/Documentation/ABI/testing/sysfs-driver-ccp
new file mode 100644
index 000000000000..ee6b787eee7a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ccp
@@ -0,0 +1,105 @@
+What: /sys/bus/pci/devices/<BDF>/fused_part
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/fused_part file reports
+ whether the CPU or APU has been fused to prevent tampering.
+ 0: Not fused
+ 1: Fused
+
+What: /sys/bus/pci/devices/<BDF>/debug_lock_on
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/debug_lock_on reports
+ whether the AMD CPU or APU has been unlocked for debugging.
+ Possible values:
+ 0: Not locked
+ 1: Locked
+
+What: /sys/bus/pci/devices/<BDF>/tsme_status
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/tsme_status file reports
+ the status of transparent secure memory encryption on AMD systems.
+ Possible values:
+ 0: Not active
+ 1: Active
+
+What: /sys/bus/pci/devices/<BDF>/anti_rollback_status
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/anti_rollback_status file reports
+ whether the PSP is enforcing rollback protection.
+ Possible values:
+ 0: Not enforcing
+ 1: Enforcing
+
+What: /sys/bus/pci/devices/<BDF>/rpmc_production_enabled
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rpmc_production_enabled file reports
+ whether Replay Protected Monotonic Counter support has been enabled.
+ Possible values:
+ 0: Not enabled
+ 1: Enabled
+
+What: /sys/bus/pci/devices/<BDF>/rpmc_spirom_available
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rpmc_spirom_available file reports
+ whether an Replay Protected Monotonic Counter supported SPI is installed
+ on the system.
+ Possible values:
+ 0: Not present
+ 1: Present
+
+What: /sys/bus/pci/devices/<BDF>/hsp_tpm_available
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/hsp_tpm_available file reports
+ whether the HSP TPM has been activated.
+ Possible values:
+ 0: Not activated or present
+ 1: Activated
+
+What: /sys/bus/pci/devices/<BDF>/rom_armor_enforced
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rom_armor_enforced file reports
+ whether RomArmor SPI protection is enforced.
+ Possible values:
+ 0: Not enforced
+ 1: Enforced
+
+What: /sys/bus/pci/devices/<BDF>/bootloader_version
+Date: June 2023
+KernelVersion: 6.4
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/bootloader_version
+ file reports the firmware version of the AMD AGESA
+ bootloader.
+
+What: /sys/bus/pci/devices/<BDF>/tee_version
+Date: June 2023
+KernelVersion: 6.4
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/tee_version
+ file reports the firmware version of the AMD Trusted
+ Execution Environment (TEE).
diff --git a/Documentation/ABI/testing/sysfs-driver-chromeos-acpi b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
new file mode 100644
index 000000000000..d46b1c85840d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
@@ -0,0 +1,152 @@
+What: /sys/bus/platform/devices/GGL0001:*/BINF.2
+ /sys/bus/platform/devices/GOOG0016:*/BINF.2
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns active EC firmware of current boot (boolean).
+
+ == ===============================
+ 0 Read only (recovery) firmware.
+ 1 Rewritable firmware.
+ == ===============================
+
+What: /sys/bus/platform/devices/GGL0001:*/BINF.3
+ /sys/bus/platform/devices/GOOG0016:*/BINF.3
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns main firmware type for current boot (integer).
+
+ == =====================================
+ 0 Recovery.
+ 1 Normal.
+ 2 Developer.
+ 3 Netboot (factory installation only).
+ == =====================================
+
+What: /sys/bus/platform/devices/GGL0001:*/CHSW
+ /sys/bus/platform/devices/GOOG0016:*/CHSW
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns switch position for Chrome OS specific hardware
+ switches when the firmware is booted (integer).
+
+ ==== ===========================================
+ 0 No changes.
+ 2 Recovery button was pressed.
+ 4 Recovery button was pressed (EC firmware).
+ 32 Developer switch was enabled.
+ 512 Firmware write protection was disabled.
+ ==== ===========================================
+
+What: /sys/bus/platform/devices/GGL0001:*/FMAP
+ /sys/bus/platform/devices/GOOG0016:*/FMAP
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns physical memory address of the start of the main
+ processor firmware flashmap.
+
+What: /sys/bus/platform/devices/GGL0001:*/FRID
+ /sys/bus/platform/devices/GOOG0016:*/FRID
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns firmware version for the read-only portion of the
+ main processor firmware.
+
+What: /sys/bus/platform/devices/GGL0001:*/FWID
+ /sys/bus/platform/devices/GOOG0016:*/FWID
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns firmware version for the rewritable portion of the
+ main processor firmware.
+
+What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.0
+ /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.0
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns type of the GPIO signal for the Chrome OS specific
+ GPIO assignments (integer).
+
+ =========== ==================================
+ 1 Recovery button.
+ 2 Developer mode switch.
+ 3 Firmware write protection switch.
+ 256 to 511 Debug header GPIO 0 to GPIO 255.
+ =========== ==================================
+
+What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.1
+ /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.1
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns signal attributes of the GPIO signal (integer bitfield).
+
+ == =======================
+ 0 Signal is active low.
+ 1 Signal is active high.
+ == =======================
+
+What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.2
+ /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.2
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns the GPIO number on the specified GPIO
+ controller.
+
+What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.3
+ /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.3
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns name of the GPIO controller.
+
+What: /sys/bus/platform/devices/GGL0001:*/HWID
+ /sys/bus/platform/devices/GOOG0016:*/HWID
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns hardware ID for the Chromebook.
+
+What: /sys/bus/platform/devices/GGL0001:*/MECK
+ /sys/bus/platform/devices/GOOG0016:*/MECK
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns the SHA-1 or SHA-256 hash that is read out of the
+ Management Engine extended registers during boot. The hash
+ is exported via ACPI so the OS can verify that the Management
+ Engine firmware has not changed. If Management Engine is not
+ present, or if the firmware was unable to read the extended registers, this buffer size can be zero.
+
+What: /sys/bus/platform/devices/GGL0001:*/VBNV.0
+ /sys/bus/platform/devices/GOOG0016:*/VBNV.0
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns offset in CMOS bank 0 of the verified boot non-volatile
+ storage block, counting from the first writable CMOS byte
+ (that is, 'offset = 0' is the byte following the 14 bytes of
+ clock data).
+
+What: /sys/bus/platform/devices/GGL0001:*/VBNV.1
+ /sys/bus/platform/devices/GOOG0016:*/VBNV.1
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Return the size in bytes of the verified boot non-volatile
+ storage block.
+
+What: /sys/bus/platform/devices/GGL0001:*/VDAT
+ /sys/bus/platform/devices/GOOG0016:*/VDAT
+Date: May 2022
+KernelVersion: 5.19
+Description:
+ Returns the verified boot data block shared between the
+ firmware verification step and the kernel verification step
+ (hex dump).
diff --git a/Documentation/ABI/testing/sysfs-driver-eud b/Documentation/ABI/testing/sysfs-driver-eud
new file mode 100644
index 000000000000..2bab0db2d2f0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-eud
@@ -0,0 +1,9 @@
+What: /sys/bus/platform/drivers/qcom_eud/.../enable
+Date: February 2022
+Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
+Description:
+ The Enable/Disable sysfs interface for Embedded
+ USB Debugger(EUD). This enables and disables the
+ EUD based on a 1 or a 0 value. By enabling EUD,
+ the user is able to activate the mini-usb hub of
+ EUD for debug and trace capabilities.
diff --git a/Documentation/ABI/testing/sysfs-driver-framer-pef2256 b/Documentation/ABI/testing/sysfs-driver-framer-pef2256
new file mode 100644
index 000000000000..29f97783bf07
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-framer-pef2256
@@ -0,0 +1,8 @@
+What: /sys/bus/platform/devices/xxx/version
+Date: Sep 2025
+Contact: netdev@vger.kernel.org
+Description: Reports the version of the PEF2256 framer
+
+ Access: Read
+
+ Valid values: Represented as string
diff --git a/Documentation/ABI/testing/sysfs-driver-ge-achc b/Documentation/ABI/testing/sysfs-driver-ge-achc
new file mode 100644
index 000000000000..c3e77def4b20
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ge-achc
@@ -0,0 +1,15 @@
+What: /sys/bus/spi/<dev>/update_firmware
+Date: Jul 2021
+Contact: sebastian.reichel@collabora.com
+Description: Write 1 to this file to update the ACHC microcontroller
+ firmware via the EzPort interface. For this the kernel
+ will load "achc.bin" via the firmware API (so usually
+ from /lib/firmware). The write will block until the FW
+ has either been flashed successfully or an error occurred.
+
+What: /sys/bus/spi/<dev>/reset
+Date: Jul 2021
+Contact: sebastian.reichel@collabora.com
+Description: This file represents the microcontroller's reset line.
+ 1 means the reset line is asserted, 0 means it's not
+ asserted. The file is read and writable.
diff --git a/Documentation/ABI/testing/sysfs-driver-genwqe b/Documentation/ABI/testing/sysfs-driver-genwqe
index 64ac6d567c4b..69d855dc4c47 100644
--- a/Documentation/ABI/testing/sysfs-driver-genwqe
+++ b/Documentation/ABI/testing/sysfs-driver-genwqe
@@ -29,8 +29,12 @@ What: /sys/class/genwqe/genwqe<n>_card/reload_bitstream
Date: May 2014
Contact: klebers@linux.vnet.ibm.com
Description: Interface to trigger a PCIe card reset to reload the bitstream.
+
+ ::
+
sudo sh -c 'echo 1 > \
/sys/class/genwqe/genwqe0_card/reload_bitstream'
+
If successfully, the card will come back with the bitstream set
on 'next_bitstream'.
@@ -64,8 +68,11 @@ Description: Base clock frequency of the card.
What: /sys/class/genwqe/genwqe<n>_card/device/sriov_numvfs
Date: Oct 2013
Contact: haver@linux.vnet.ibm.com
-Description: Enable VFs (1..15):
+Description: Enable VFs (1..15)::
+
sudo sh -c 'echo 15 > \
/sys/bus/pci/devices/0000\:1b\:00.0/sriov_numvfs'
- Disable VFs:
+
+ Disable VFs::
+
Write a 0 into the same sysfs entry.
diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
new file mode 100644
index 000000000000..4244f5af4b54
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -0,0 +1,265 @@
+What: /sys/class/accel/accel<n>/device/armcp_kernel_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the Linux kernel running on the device's CPU.
+ Will be DEPRECATED in Linux kernel version 5.10, and be
+ replaced with cpucp_kernel_ver
+
+What: /sys/class/accel/accel<n>/device/armcp_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the application running on the device's CPU
+ Will be DEPRECATED in Linux kernel version 5.10, and be
+ replaced with cpucp_ver
+
+What: /sys/class/accel/accel<n>/device/clk_max_freq_mhz
+Date: Jun 2019
+KernelVersion: 5.7
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum clock frequency, in MHz.
+ The device clock might be set to lower value than the maximum.
+ The user should read the clk_cur_freq_mhz to see the actual
+ frequency value of the device clock. This property is valid
+ only for the Gaudi ASIC family
+
+What: /sys/class/accel/accel<n>/device/clk_cur_freq_mhz
+Date: Jun 2019
+KernelVersion: 5.7
+Contact: ogabbay@kernel.org
+Description: Displays the current frequency, in MHz, of the device clock.
+ This property is valid only for the Gaudi ASIC family
+
+What: /sys/class/accel/accel<n>/device/cpld_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the Device's CPLD F/W
+
+What: /sys/class/accel/accel<n>/device/cpucp_kernel_ver
+Date: Oct 2020
+KernelVersion: 5.10
+Contact: ogabbay@kernel.org
+Description: Version of the Linux kernel running on the device's CPU
+
+What: /sys/class/accel/accel<n>/device/cpucp_ver
+Date: Oct 2020
+KernelVersion: 5.10
+Contact: ogabbay@kernel.org
+Description: Version of the application running on the device's CPU
+
+What: /sys/class/accel/accel<n>/device/device_type
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the code name of the device according to its type.
+ The supported values are: "GOYA"
+
+What: /sys/class/accel/accel<n>/device/eeprom
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: A binary file attribute that contains the contents of the
+ on-board EEPROM
+
+What: /sys/class/accel/accel<n>/device/fuse_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the device's version from the eFuse
+
+What: /sys/class/accel/accel<n>/device/fw_os_ver
+Date: Dec 2021
+KernelVersion: 5.18
+Contact: ogabbay@kernel.org
+Description: Version of the firmware OS running on the device's CPU
+
+What: /sys/class/accel/accel<n>/device/hard_reset
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Interface to trigger a hard-reset operation for the device.
+ Hard-reset will reset ALL internal components of the device
+ except for the PCI interface and the internal PLLs
+
+What: /sys/class/accel/accel<n>/device/hard_reset_cnt
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays how many times the device have undergone a hard-reset
+ operation since the driver was loaded
+
+What: /sys/class/accel/accel<n>/device/high_pll
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum clock frequency for MME, TPC
+ and IC when the power management profile is set to "automatic".
+ This property is valid only for the Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/ic_clk
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the Interconnect fabric. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device IC clock might be set to lower value than the
+ maximum. The user should read the ic_clk_curr to see the actual
+ frequency value of the IC. This property is valid only for the
+ Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/ic_clk_curr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the current clock frequency, in Hz, of the Interconnect
+ fabric. This property is valid only for the Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/infineon_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
+
+What: /sys/class/accel/accel<n>/device/max_power
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum power consumption of the
+ device in milliwatts.
+
+What: /sys/class/accel/accel<n>/device/mme_clk
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the MME compute engine. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device MME clock might be set to lower value than the
+ maximum. The user should read the mme_clk_curr to see the actual
+ frequency value of the MME. This property is valid only for the
+ Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/mme_clk_curr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the current clock frequency, in Hz, of the MME compute
+ engine. This property is valid only for the Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/module_id
+Date: Nov 2023
+KernelVersion: not yet upstreamed
+Contact: ogabbay@kernel.org
+Description: Displays the device's module id
+
+What: /sys/class/accel/accel<n>/device/parent_device
+Date: Nov 2023
+KernelVersion: 6.8
+Contact: ttayar@habana.ai
+Description: Displays the name of the parent device of the accel device
+
+What: /sys/class/accel/accel<n>/device/pci_addr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the PCI address of the device. This is needed so the
+ user would be able to open a device based on its PCI address
+
+What: /sys/class/accel/accel<n>/device/pm_mng_profile
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Power management profile. Values are "auto", "manual". In "auto"
+ mode, the driver will set the maximum clock frequency to a high
+ value when a user-space process opens the device's file (unless
+ it was already opened by another process). The driver will set
+ the max clock frequency to a low value when there are no user
+ processes that are opened on the device's file. In "manual"
+ mode, the user sets the maximum clock frequency by writing to
+ ic_clk, mme_clk and tpc_clk. This property is valid only for
+ the Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/preboot_btl_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the device's preboot F/W code
+
+What: /sys/class/accel/accel<n>/device/security_enabled
+Date: Oct 2022
+KernelVersion: 6.1
+Contact: obitton@habana.ai
+Description: Displays the device's security status
+
+What: /sys/class/accel/accel<n>/device/soft_reset
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Interface to trigger a soft-reset operation for the device.
+ Soft-reset will reset only the compute and DMA engines of the
+ device
+
+What: /sys/class/accel/accel<n>/device/soft_reset_cnt
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays how many times the device have undergone a soft-reset
+ operation since the driver was loaded
+
+What: /sys/class/accel/accel<n>/device/status
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Status of the card:
+
+ * "operational" - Device is available for work.
+ * "in reset" - Device is going through reset, will be
+ available shortly.
+ * "disabled" - Device is not usable.
+ * "needs reset" - Device is not usable until a hard reset
+ is initiated.
+ * "in device creation" - Device is not available yet, as it
+ is still initializing.
+ * "in reset after device release" - Device is going through
+ a compute-reset which is executed after a device release
+ (relevant for Gaudi2 only).
+
+What: /sys/class/accel/accel<n>/device/thermal_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the Device's thermal daemon
+
+What: /sys/class/accel/accel<n>/device/tpc_clk
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the TPC compute engines. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device TPC clock might be set to lower value than the
+ maximum. The user should read the tpc_clk_curr to see the actual
+ frequency value of the TPC. This property is valid only for
+ Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/tpc_clk_curr
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Displays the current clock frequency, in Hz, of the TPC compute
+ engines. This property is valid only for the Goya ASIC family
+
+What: /sys/class/accel/accel<n>/device/uboot_ver
+Date: Jan 2019
+KernelVersion: 5.1
+Contact: ogabbay@kernel.org
+Description: Version of the u-boot running on the device's CPU
+
+What: /sys/class/accel/accel<n>/device/vrm_ver
+Date: Jan 2022
+KernelVersion: 5.17
+Contact: ogabbay@kernel.org
+Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
diff --git a/Documentation/ABI/testing/sysfs-driver-hid b/Documentation/ABI/testing/sysfs-driver-hid
index 48942cacb0bf..a59533410871 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid
+++ b/Documentation/ABI/testing/sysfs-driver-hid
@@ -1,6 +1,6 @@
-What: For USB devices : /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
- For BT devices : /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
- Symlink : /sys/class/hidraw/hidraw<num>/device/report_descriptor
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+What: /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+What: /sys/class/hidraw/hidraw<num>/device/report_descriptor
Date: Jan 2011
KernelVersion: 2.0.39
Contact: Alan Ott <alan@signal11.us>
@@ -9,9 +9,9 @@ Description: When read, this file returns the device's raw binary HID
This file cannot be written.
Users: HIDAPI library (http://www.signal11.us/oss/hidapi)
-What: For USB devices : /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
- For BT devices : /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
- Symlink : /sys/class/hidraw/hidraw<num>/device/country
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+What: /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+What: /sys/class/hidraw/hidraw<num>/device/country
Date: February 2015
KernelVersion: 3.19
Contact: Olivier Gay <ogay@logitech.com>
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
new file mode 100644
index 000000000000..8c9718d83e9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
@@ -0,0 +1,13 @@
+What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
+Date: March, 2025
+KernelVersion: 6.15
+Contact: linux-input@vger.kernel.org
+Description:
+ The set of keys displayed on the Touch Bar.
+ Valid values are:
+ == =================
+ 0 Escape key only
+ 1 Function keys
+ 2 Media/brightness keys
+ 3 None
+ == =================
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-corsair-void b/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
new file mode 100644
index 000000000000..83fa625c0025
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
@@ -0,0 +1,38 @@
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_headset
+Date: January 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) The firmware version of the headset
+ * Returns -ENODATA if no version was reported
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_receiver
+Date: January 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) The firmware version of the receiver
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/microphone_up
+Date: July 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) Get the physical position of the microphone
+ * 1 -> Microphone up
+ * 0 -> Microphone down
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/send_alert
+Date: July 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (W) Play a built-in notification from the headset (0 / 1)
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/set_sidetone
+Date: December 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (W) Set the sidetone volume (0 - sidetone_max)
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/sidetone_max
+Date: July 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) Report the maximum sidetone volume
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-lenovo b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
index 53a0725962e1..aee85ca1f6be 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-lenovo
+++ b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
@@ -3,14 +3,18 @@ Date: July 2011
Contact: linux-input@vger.kernel.org
Description: This controls if mouse clicks should be generated if the trackpoint is quickly pressed. How fast this press has to be
is being controlled by press_speed.
+
Values are 0 or 1.
+
Applies to Thinkpad USB Keyboard with TrackPoint.
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/dragging
Date: July 2011
Contact: linux-input@vger.kernel.org
Description: If this setting is enabled, it is possible to do dragging by pressing the trackpoint. This requires press_to_select to be enabled.
+
Values are 0 or 1.
+
Applies to Thinkpad USB Keyboard with TrackPoint.
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/release_to_select
@@ -25,7 +29,9 @@ Date: July 2011
Contact: linux-input@vger.kernel.org
Description: This setting controls if the mouse click events generated by pressing the trackpoint (if press_to_select is enabled) generate
a left or right mouse button click.
+
Values are 0 or 1.
+
Applies to Thinkpad USB Keyboard with TrackPoint.
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/sensitivity
@@ -39,12 +45,16 @@ What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-
Date: July 2011
Contact: linux-input@vger.kernel.org
Description: This setting controls how fast the trackpoint needs to be pressed to generate a mouse click if press_to_select is enabled.
+
Values are decimal integers from 1 (slowest) to 255 (fastest).
+
Applies to Thinkpad USB Keyboard with TrackPoint.
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/fn_lock
Date: July 2014
Contact: linux-input@vger.kernel.org
Description: This setting controls whether Fn Lock is enabled on the keyboard (i.e. if F1 is Mute or F1)
+
Values are 0 or 1
+
Applies to ThinkPad Compact (USB|Bluetooth) Keyboard with TrackPoint.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
index 305dffd229a8..de07be314efc 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
+++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
@@ -12,7 +12,9 @@ KernelVersion: 4.1
Contact: Michal Malý <madcatxster@devoid-pointer.net>
Description: Displays a set of alternate modes supported by a wheel. Each
mode is listed as follows:
+
Tag: Mode Name
+
Currently active mode is marked with an asterisk. List also
contains an abstract item "native" which always denotes the
native mode of the wheel. Echoing the mode tag switches the
@@ -24,24 +26,30 @@ Description: Displays a set of alternate modes supported by a wheel. Each
This entry is not created for devices that have only one mode.
Currently supported mode switches:
- Driving Force Pro:
+
+ Driving Force Pro::
+
DF-EX --> DFP
- G25:
+ G25::
+
DF-EX --> DFP --> G25
- G27:
+ G27::
+
DF-EX <*> DFP <-> G25 <-> G27
DF-EX <*--------> G25 <-> G27
DF-EX <*----------------> G27
- G29:
+ G29::
+
DF-EX <*> DFP <-> G25 <-> G27 <-> G29
DF-EX <*--------> G25 <-> G27 <-> G29
DF-EX <*----------------> G27 <-> G29
DF-EX <*------------------------> G29
- DFGT:
+ DFGT::
+
DF-EX <*> DFP <-> DFGT
DF-EX <*--------> DFGT
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-ntrig b/Documentation/ABI/testing/sysfs-driver-hid-ntrig
index e574a5625efe..0e323a5cec6c 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-ntrig
+++ b/Documentation/ABI/testing/sysfs-driver-hid-ntrig
@@ -29,12 +29,13 @@ Contact: linux-input@vger.kernel.org
Description:
Threholds to override activation slack.
- activation_width: (RW) Width threshold to immediately
+ ================= =====================================
+ activation_width (RW) Width threshold to immediately
start processing touch events.
- activation_height: (RW) Height threshold to immediately
+ activation_height (RW) Height threshold to immediately
start processing touch events.
-
+ ================= =====================================
What: /sys/bus/hid/drivers/ntrig/<dev>/min_width
What: /sys/bus/hid/drivers/ntrig/<dev>/min_height
@@ -44,11 +45,13 @@ Contact: linux-input@vger.kernel.org
Description:
Minimum size contact accepted.
- min_width: (RW) Minimum touch contact width to decide
+ ========== ===========================================
+ min_width (RW) Minimum touch contact width to decide
activation and activity.
- min_height: (RW) Minimum touch contact height to decide
+ min_height (RW) Minimum touch contact height to decide
activation and activity.
+ ========== ===========================================
What: /sys/bus/hid/drivers/ntrig/<dev>/sensor_physical_width
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
index 3ca3971109bf..11cd9bf0ad18 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
@@ -3,17 +3,21 @@ Date: March 2010
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
Description: It is possible to switch the dpi setting of the mouse with the
press of a button.
+
When read, this file returns the raw number of the actual dpi
setting reported by the mouse. This number has to be further
- processed to receive the real dpi value.
+ processed to receive the real dpi value:
+ ===== =====
VALUE DPI
+ ===== =====
1 800
2 1200
3 1600
4 2000
5 2400
6 3200
+ ===== =====
This file is readonly.
Users: http://roccat.sourceforge.net
@@ -22,6 +26,7 @@ What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-
Date: March 2010
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
Description: When read, this file returns the number of the actual profile.
+
This file is readonly.
Users: http://roccat.sourceforge.net
@@ -33,6 +38,7 @@ Description: When read, this file returns the raw integer version number of the
further usage in other programs. To receive the real version
number the decimal point has to be shifted 2 positions to the
left. E.g. a returned value of 138 means 1.38
+
This file is readonly.
Users: http://roccat.sourceforge.net
@@ -43,10 +49,13 @@ Description: The mouse can store 5 profiles which can be switched by the
press of a button. A profile holds information like button
mappings, sensitivity, the colors of the 5 leds and light
effects.
+
When read, these files return the respective profile. The
returned data is 975 bytes in size.
+
When written, this file lets one write the respective profile
data back to the mouse. The data has to be 975 bytes long.
+
The mouse will reject invalid data, whereas the profile number
stored in the profile doesn't need to fit the number of the
store.
@@ -58,6 +67,7 @@ Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
Description: When read, this file returns the settings stored in the mouse.
The size of the data is 36 bytes and holds information like the
startup_profile, tcu state and calibration_data.
+
When written, this file lets write settings back to the mouse.
The data has to be 36 bytes long. The mouse will reject invalid
data.
@@ -67,8 +77,10 @@ What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-
Date: March 2010
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
Description: The integer value of this attribute ranges from 1 to 5.
+
When read, this attribute returns the number of the profile
that's active when the mouse is powered on.
+
When written, this file sets the number of the startup profile
and the mouse activates this profile immediately.
Users: http://roccat.sourceforge.net
@@ -80,9 +92,12 @@ Description: The mouse has a "Tracking Control Unit" which lets the user
calibrate the laser power to fit the mousepad surface.
When read, this file returns the current state of the TCU,
where 0 means off and 1 means on.
+
Writing 0 in this file will switch the TCU off.
+
Writing 1 in this file will start the calibration which takes
around 6 seconds to complete and activates the TCU.
+
Users: http://roccat.sourceforge.net
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/weight
@@ -93,14 +108,18 @@ Description: The mouse can be equipped with one of four supplied weights
and its value can be read out. When read, this file returns the
raw value returned by the mouse which eases further processing
in other software.
+
The values map to the weights as follows:
+ ===== ======
VALUE WEIGHT
+ ===== ======
0 none
1 5g
2 10g
3 15g
4 20g
+ ===== ======
This file is readonly.
Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
index 39dfa5cb1cc5..3bf43d9dcdfe 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-wiimote
+++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
@@ -20,6 +20,7 @@ Description: This file contains the currently connected and initialized
the official Nintendo Nunchuck extension and classic is the
Nintendo Classic Controller extension. The motionp extension can
be combined with the other two.
+
Starting with kernel-version 3.11 Motion Plus hotplugging is
supported and if detected, it's no longer reported as static
extension. You will get uevent notifications for the motion-plus
@@ -39,9 +40,13 @@ Description: While a device is initialized by the wiimote driver, we perform
Other strings for each device-type are available and may be
added if new device-specific detections are added.
Currently supported are:
- gen10: First Wii Remote generation
- gen20: Second Wii Remote Plus generation (builtin MP)
+
+ ============= =======================================
+ gen10: First Wii Remote generation
+ gen20: Second Wii Remote Plus generation
+ (builtin MP)
balanceboard: Wii Balance Board
+ ============= =======================================
What: /sys/bus/hid/drivers/wiimote/<dev>/bboard_calib
Date: May 2013
@@ -54,6 +59,7 @@ Description: This attribute is only provided if the device was detected as a
First, 0kg values for all 4 sensors are written, followed by the
17kg values for all 4 sensors and last the 34kg values for all 4
sensors.
+
Calibration data is already applied by the kernel to all input
values but may be used by user-space to perform other
transformations.
@@ -68,9 +74,11 @@ Description: This attribute is only provided if the device was detected as a
is prefixed with a +/-. Each value is a signed 16bit number.
Data is encoded as decimal numbers and specifies the offsets of
the analog sticks of the pro-controller.
+
Calibration data is already applied by the kernel to all input
values but may be used by user-space to perform other
transformations.
+
Calibration data is detected by the kernel during device setup.
You can write "scan\n" into this file to re-trigger calibration.
You can also write data directly in the form "x1:y1 x2:y2" to
diff --git a/Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb b/Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb
new file mode 100644
index 000000000000..c7afc2328045
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb
@@ -0,0 +1,6 @@
+What: /sys/class/input/input(x)/device/function_row_physmap
+Date: January 2021
+Contact: Philip Chen <philipchen@chromium.org>
+Description: A space separated list of scancodes for the top row keys,
+ ordered by the physical positions of the keys, from left
+ to right.
diff --git a/Documentation/ABI/testing/sysfs-driver-input-exc3000 b/Documentation/ABI/testing/sysfs-driver-input-exc3000
new file mode 100644
index 000000000000..704434b277b0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-input-exc3000
@@ -0,0 +1,26 @@
+What: /sys/bus/i2c/devices/xxx/fw_version
+Date: Aug 2020
+Contact: linux-input@vger.kernel.org
+Description: Reports the firmware version provided by the touchscreen, for example "00_T6" on a EXC80H60
+
+ Access: Read
+
+ Valid values: Represented as string
+
+What: /sys/bus/i2c/devices/xxx/model
+Date: Aug 2020
+Contact: linux-input@vger.kernel.org
+Description: Reports the model identification provided by the touchscreen, for example "Orion_1320" on a EXC80H60
+
+ Access: Read
+
+ Valid values: Represented as string
+
+What: /sys/bus/i2c/devices/xxx/type
+Date: Jan 2021
+Contact: linux-input@vger.kernel.org
+Description: Reports the type identification provided by the touchscreen, for example "PCAP82H80 Series"
+
+ Access: Read
+
+ Valid values: Represented as string
diff --git a/Documentation/ABI/testing/sysfs-driver-intc_sar b/Documentation/ABI/testing/sysfs-driver-intc_sar
new file mode 100644
index 000000000000..ec334b0e5ed9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intc_sar
@@ -0,0 +1,54 @@
+What: /sys/bus/platform/devices/INTC1092:00/intc_reg
+Date: August 2021
+KernelVersion: 5.15
+Contact: Shravan S <s.shravan@intel.com>,
+ An Sudhakar <sudhakar.an@intel.com>
+Description:
+ Specific Absorption Rate (SAR) regulatory mode is typically
+ derived based on information like mcc (Mobile Country Code) and
+ mnc (Mobile Network Code) that is available for the currently
+ attached LTE network. A userspace application is required to set
+ the current SAR regulatory mode on the Dynamic SAR driver using
+ this sysfs node. Such an application can also read back using
+ this sysfs node, the currently configured regulatory mode value
+ from the Dynamic SAR driver.
+
+ Acceptable regulatory modes are:
+ == ====
+ 0 FCC
+ 1 CE
+ 2 ISED
+ == ====
+
+ - The regulatory mode value has one of the above values.
+ - The default regulatory mode used in the driver is 0.
+
+What: /sys/bus/platform/devices/INTC1092:00/intc_data
+Date: August 2021
+KernelVersion: 5.15
+Contact: Shravan S <s.shravan@intel.com>,
+ An Sudhakar <sudhakar.an@intel.com>
+Description:
+ This sysfs entry is used to retrieve Dynamic SAR information
+ emitted/maintained by a BIOS that supports Dynamic SAR.
+
+ The retrieved information is in the order given below:
+ - device_mode
+ - bandtable_index
+ - antennatable_index
+ - sartable_index
+
+ The above information is sent as integer values separated
+ by a single space. This information can then be pushed to a
+ WWAN modem that uses this to control the transmit signal
+ level using the Band/Antenna/SAR table index information.
+ These parameters are derived/decided by aggregating
+ device-mode like laptop/tablet/clamshell etc. and the
+ proximity-sensor data available to the embedded controller on
+ given host. The regulatory mode configured on Dynamic SAR
+ driver also influences these values.
+
+ The userspace applications can poll for changes to this file
+ using POLLPRI event on file-descriptor (fd) obtained by opening
+ this sysfs entry. Application can then read this information from
+ the sysfs node and consume the given information.
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
new file mode 100644
index 000000000000..a885e5316d02
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -0,0 +1,93 @@
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/in0_input
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Current Voltage in millivolt.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 will enable the power limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_rated_max
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Card default power limit (default TDP setting).
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max_interval
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_crit
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in microwatts.
+
+ Card reactive critical (I1) power limit in microwatts is exposed
+ for client products. The power controller will throttle the
+ operating frequency if the power averaged over a window exceeds
+ this limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/curr1_crit
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in milliamperes.
+
+ Card reactive critical (I1) power limit in milliamperes is
+ exposed for server products. The power controller will throttle
+ the operating frequency if the power averaged over a window
+ exceeds this limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/energy1_input
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Energy input of device or gt in microjoules.
+
+ For i915 device level hwmon devices (name "i915") this
+ reflects energy input for the entire device. For gt level
+ hwmon devices (name "i915_gtN") this reflects energy input
+ for the gt.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/fan1_input
+Date: November 2024
+KernelVersion: 6.12
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Fan speed of device in RPM.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/temp1_input
+Date: November 2024
+KernelVersion: 6.12
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. GPU package temperature in millidegree Celsius.
+
+ Only supported for particular Intel i915 graphics platforms.
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc
new file mode 100644
index 000000000000..faeae8fedb14
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc
@@ -0,0 +1,36 @@
+What: /sys/bus/.../drivers/intel-m10-bmc/.../bmc_version
+Date: June 2020
+KernelVersion: 5.10
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read only. Returns the hardware build version of Intel
+ MAX10 BMC chip.
+ Format: "0x%x".
+
+What: /sys/bus/.../drivers/intel-m10-bmc/.../bmcfw_version
+Date: June 2020
+KernelVersion: 5.10
+Contact: Xu Yilun <yilun.xu@intel.com>
+Description: Read only. Returns the firmware version of Intel MAX10
+ BMC chip.
+ Format: "0x%x".
+
+What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address
+Date: January 2021
+KernelVersion: 5.12
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns the first MAC address in a block
+ of sequential MAC addresses assigned to the board
+ that is managed by the Intel MAX10 BMC. It is stored in
+ FLASH storage and is mirrored in the MAX10 BMC register
+ space.
+ Format: "%02x:%02x:%02x:%02x:%02x:%02x".
+
+What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count
+Date: January 2021
+KernelVersion: 5.12
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns the number of sequential MAC
+ addresses assigned to the board managed by the Intel
+ MAX10 BMC. This value is stored in FLASH and is mirrored
+ in the MAX10 BMC register space.
+ Format: "%u".
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update
new file mode 100644
index 000000000000..3a6ca780c75c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update
@@ -0,0 +1,61 @@
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns the root entry hash for the static
+ region if one is programmed, else it returns the
+ string: "hash not programmed". This file is only
+ visible if the underlying device supports it.
+ Format: string.
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns the root entry hash for the partial
+ reconfiguration region if one is programmed, else it
+ returns the string: "hash not programmed". This file
+ is only visible if the underlying device supports it.
+ Format: string.
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns the root entry hash for the BMC image
+ if one is programmed, else it returns the string:
+ "hash not programmed". This file is only visible if the
+ underlying device supports it.
+ Format: string.
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns a list of indices for canceled code
+ signing keys for the static region. The standard bitmap
+ list format is used (e.g. "1,2-6,9").
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns a list of indices for canceled code
+ signing keys for the partial reconfiguration region. The
+ standard bitmap list format is used (e.g. "1,2-6,9").
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns a list of indices for canceled code
+ signing keys for the BMC. The standard bitmap list format
+ is used (e.g. "1,2-6,9").
+
+What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count
+Date: Sep 2022
+KernelVersion: 5.20
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
+Description: Read only. Returns number of times the secure update
+ staging area has been flashed.
+ Format: "%u".
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
new file mode 100644
index 000000000000..d9e2b17c6872
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -0,0 +1,198 @@
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max
+Date: September 2023
+KernelVersion: 6.5
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card reactive sustained (PL1) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 and <= TDP will enable the power limit.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_rated_max
+Date: September 2023
+KernelVersion: 6.5
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Card default power limit (default TDP setting).
+
+ Only supported for particular Intel Xe graphics platforms.
+
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
+Date: September 2023
+KernelVersion: 6.5
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Card energy input of device in microjoules.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
+Date: October 2023
+KernelVersion: 6.6
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package reactive sustained (PL1) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 and <= TDP will enable the power limit.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_rated_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package default power limit (default TDP setting).
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in microwatts.
+
+ Card reactive critical (I1) power limit in microwatts is exposed
+ for client products. The power controller will throttle the
+ operating frequency if the power averaged over a window exceeds
+ this limit.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in milliamperes.
+
+ Card reactive critical (I1) power limit in milliamperes is
+ exposed for server products. The power controller will throttle
+ the operating frequency if the power averaged over a window
+ exceeds this limit.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy2_input
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package energy input of device in microjoules.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max_interval
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in1_input
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package current voltage in millivolt.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input
+Date: March 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package temperature in millidegree Celsius.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input
+Date: March 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. VRAM temperature in millidegree Celsius.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 1 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 2 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 3 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card burst (PL2) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically milli seconds)
+ exceeds this limit. A read value of 0 means that the PL2
+ power limit is disabled, writing 0 disables the limit.
+ PL2 is greater than PL1 and its time window is lesser
+ compared to PL1.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package burst (PL2) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically milli seconds)
+ exceeds this limit. A read value of 0 means that the PL2
+ power limit is disabled, writing 0 disables the limit.
+ PL2 is greater than PL1 and its time window is lesser
+ compared to PL1.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap_interval
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card burst power limit interval (Tau in PL2/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap_interval
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package burst power limit interval (Tau in PL2/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
diff --git a/Documentation/ABI/testing/sysfs-driver-intel_sdsi b/Documentation/ABI/testing/sysfs-driver-intel_sdsi
new file mode 100644
index 000000000000..f8afed127107
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel_sdsi
@@ -0,0 +1,90 @@
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ This directory contains interface files for accessing Intel
+ On Demand (formerly Software Defined Silicon or SDSi) features
+ on a CPU. X represents the socket instance (though not the
+ socket ID). The socket ID is determined by reading the
+ registers file and decoding it per the specification.
+
+ Some files communicate with On Demand hardware through a
+ mailbox. Should the operation fail, one of the following error
+ codes may be returned:
+
+ ========== =====
+ Error Code Cause
+ ========== =====
+ EIO General mailbox failure. Log may indicate cause.
+ EBUSY Mailbox is owned by another agent.
+ EPERM On Demand capability is not enabled in hardware.
+ EPROTO Failure in mailbox protocol detected by driver.
+ See log for details.
+ EOVERFLOW For provision commands, the size of the data
+ exceeds what may be written.
+ ESPIPE Seeking is not allowed.
+ ETIMEDOUT Failure to complete mailbox transaction in time.
+ ========== =====
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (RO) The GUID for the registers file. The GUID identifies
+ the layout of the registers file in this directory.
+ Information about the register layouts for a particular GUID
+ is available at http://github.com/intel/intel-sdsi
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/registers
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (RO) Contains information needed by applications to provision
+ a CPU and monitor status information. The layout of this file
+ is determined by the GUID in this directory. Information about
+ the layout for a particular GUID is available at
+ http://github.com/intel/intel-sdsi
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_akc
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (WO) Used to write an Authentication Key Certificate (AKC) to
+ the On Demand NVRAM for the CPU. The AKC is used to authenticate
+ a Capability Activation Payload. Mailbox command.
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_cap
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (WO) Used to write a Capability Activation Payload (CAP) to the
+ On Demand NVRAM for the CPU. CAPs are used to activate a given
+ CPU feature. A CAP is validated by On Demand hardware using a
+ previously provisioned AKC file. Upon successful authentication,
+ the CPU configuration is updated. A cold reboot is required to
+ fully activate the feature. Mailbox command.
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/meter_certificate
+Date: Nov 2022
+KernelVersion: 6.2
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (RO) Used to read back the current meter certificate for the CPU
+ from Intel On Demand hardware. The meter certificate contains
+ utilization metrics of On Demand enabled features. Mailbox
+ command.
+
+What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/state_certificate
+Date: Feb 2022
+KernelVersion: 5.18
+Contact: "David E. Box" <david.e.box@linux.intel.com>
+Description:
+ (RO) Used to read back the current state certificate for the CPU
+ from On Demand hardware. The state certificate contains
+ information about the current licenses on the CPU. Mailbox
+ command.
diff --git a/Documentation/ABI/testing/sysfs-driver-jz4780-efuse b/Documentation/ABI/testing/sysfs-driver-jz4780-efuse
new file mode 100644
index 000000000000..4cf595d681e6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-jz4780-efuse
@@ -0,0 +1,20 @@
+What: /sys/devices/*/<our-device>/nvmem
+Date: December 2017
+Contact: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
+Description: read-only access to the efuse on the Ingenic JZ4780 SoC
+ The SoC has a one time programmable 8K efuse that is
+ split into segments. The driver supports read only.
+ The segments are:
+
+ ===== ======== =================
+ 0x000 64 bit Random Number
+ 0x008 128 bit Ingenic Chip ID
+ 0x018 128 bit Customer ID
+ 0x028 3520 bit Reserved
+ 0x1E0 8 bit Protect Segment
+ 0x1E1 2296 bit HDMI Key
+ 0x300 2048 bit Security boot key
+ ===== ======== =================
+
+Users: any user space application which wants to read the Chip
+ and Customer ID
diff --git a/Documentation/ABI/testing/sysfs-driver-panfrost-profiling b/Documentation/ABI/testing/sysfs-driver-panfrost-profiling
new file mode 100644
index 000000000000..7597c420e54b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-panfrost-profiling
@@ -0,0 +1,10 @@
+What: /sys/bus/platform/drivers/panfrost/.../profiling
+Date: February 2024
+KernelVersion: 6.8.0
+Contact: Adrian Larumbe <adrian.larumbe@collabora.com>
+Description:
+ Get/set drm fdinfo's engine and cycles profiling status.
+ Valid values are:
+ 0: Don't enable fdinfo job profiling sources.
+ 1: Enable fdinfo job profiling sources, this enables both the GPU's
+ timestamp and cycle counter registers.
diff --git a/Documentation/ABI/testing/sysfs-driver-panthor-profiling b/Documentation/ABI/testing/sysfs-driver-panthor-profiling
new file mode 100644
index 000000000000..af05fccedc15
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-panthor-profiling
@@ -0,0 +1,10 @@
+What: /sys/bus/platform/drivers/panthor/.../profiling
+Date: September 2024
+KernelVersion: 6.11.0
+Contact: Adrian Larumbe <adrian.larumbe@collabora.com>
+Description:
+ Bitmask to enable drm fdinfo's job profiling measurements.
+ Valid values are:
+ 0: Don't enable fdinfo job profiling sources.
+ 1: Enable GPU cycle measurements for running jobs.
+ 2: Enable GPU timestamp sampling for running jobs.
diff --git a/Documentation/ABI/testing/sysfs-driver-pciback b/Documentation/ABI/testing/sysfs-driver-pciback
index 6a733bfa37e6..49f5fd0c8bbd 100644
--- a/Documentation/ABI/testing/sysfs-driver-pciback
+++ b/Documentation/ABI/testing/sysfs-driver-pciback
@@ -7,7 +7,22 @@ Description:
the format of DDDD:BB:DD.F-REG:SIZE:MASK will allow the guest
to write and read from the PCI device. That is Domain:Bus:
Device.Function-Register:Size:Mask (Domain is optional).
- For example:
- #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
+ For example::
+
+ #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
+
will allow the guest to read and write to the configuration
register 0x0E.
+
+What: /sys/bus/pci/drivers/pciback/allow_interrupt_control
+Date: Jan 2020
+KernelVersion: 5.6
+Contact: xen-devel@lists.xenproject.org
+Description:
+ List of devices which can have interrupt control flag (INTx,
+ MSI, MSI-X) set by a connected guest. It is meant to be set
+ only when the guest is a stubdomain hosting device model (qemu)
+ and the actual device is assigned to a HVM. It is not safe
+ (similar to permissive attribute) to set for a devices assigned
+ to a PV guest. The device is automatically removed from this
+ list when the connected pcifront terminates.
diff --git a/Documentation/ABI/testing/sysfs-driver-ppi b/Documentation/ABI/testing/sysfs-driver-ppi
index 9921ef285899..1a56fc507689 100644
--- a/Documentation/ABI/testing/sysfs-driver-ppi
+++ b/Documentation/ABI/testing/sysfs-driver-ppi
@@ -1,6 +1,6 @@
What: /sys/class/tpm/tpmX/ppi/
Date: August 2012
-Kernel Version: 3.6
+KernelVersion: 3.6
Contact: xiaoyan.zhang@intel.com
Description:
This folder includes the attributes related with PPI (Physical
diff --git a/Documentation/ABI/testing/sysfs-driver-qaic b/Documentation/ABI/testing/sysfs-driver-qaic
new file mode 100644
index 000000000000..f794fd734163
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-qaic
@@ -0,0 +1,18 @@
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ce_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of correctable errors received from device since driver is loaded.
+
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of uncorrectable errors received from device since driver is loaded.
+
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_nonfatal_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of uncorrectable non-fatal errors received from device since driver
+ is loaded.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat b/Documentation/ABI/testing/sysfs-driver-qat
new file mode 100644
index 000000000000..b0561b9fc4eb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-qat
@@ -0,0 +1,167 @@
+What: /sys/bus/pci/devices/<BDF>/qat/state
+Date: June 2022
+KernelVersion: 6.0
+Contact: qat-linux@intel.com
+Description: (RW) Reports the current state of the QAT device. Write to
+ the file to start or stop the device.
+
+ The values are:
+
+ * up: the device is up and running
+ * down: the device is down
+
+
+ It is possible to transition the device from up to down only
+ if the device is up and vice versa.
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
+Date: June 2022
+KernelVersion: 6.0
+Contact: qat-linux@intel.com
+Description: (RW) Reports the current configuration of the QAT device.
+ Write to the file to change the configured services.
+
+ One or more services can be enabled per device.
+ Certain configurations are restricted to specific device types;
+ where applicable this is explicitly indicated, for example
+ (qat_6xxx) denotes applicability exclusively to that device series.
+
+ The available services include:
+
+ * sym: Configures the device for symmetric cryptographic operations.
+ * asym: Configures the device for asymmetric cryptographic operations.
+ * dc: Configures the device for compression and decompression
+ operations.
+ * dcc: Similar to dc, but with the additional dc chaining feature
+ enabled, cipher then compress (qat_6xxx), hash then compression.
+ If this is not required choose dc.
+ * decomp: Configures the device for decompression operations (qat_6xxx).
+
+ Service combinations are permitted for all services except dcc.
+ On QAT GEN4 devices (qat_4xxx driver) a maximum of two services can be
+ combined and on QAT GEN6 devices (qat_6xxx driver ) a maximum of three
+ services can be combined.
+ The order of services is not significant. For instance, sym;asym is
+ functionally equivalent to asym;sym.
+
+ It is possible to set the configuration only if the device
+ is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
+
+ The following example shows how to change the configuration of
+ a device configured for running crypto services in order to
+ run data compression::
+
+ # cat /sys/bus/pci/devices/<BDF>/qat/state
+ up
+ # cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
+ sym;asym
+ # echo down > /sys/bus/pci/devices/<BDF>/qat/state
+ # echo dc > /sys/bus/pci/devices/<BDF>/qat/cfg_services
+ # echo up > /sys/bus/pci/devices/<BDF>/qat/state
+ # cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
+ dc
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
+Date: June 2023
+KernelVersion: 6.5
+Contact: qat-linux@intel.com
+Description: (RW) This configuration option provides a way to force the device into remaining in
+ the MAX power state.
+ If idle support is enabled the device will transition to the `MIN` power state when
+ idle, otherwise will stay in the MAX power state.
+ Write to the file to enable or disable idle support.
+
+ The values are:
+
+ * 0: idle support is disabled
+ * 1: idle support is enabled
+
+ Default value is 1.
+
+ It is possible to set the pm_idle_enabled value only if the device
+ is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
+
+ The following example shows how to change the pm_idle_enabled of
+ a device::
+
+ # cat /sys/bus/pci/devices/<BDF>/qat/state
+ up
+ # cat /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
+ 1
+ # echo down > /sys/bus/pci/devices/<BDF>/qat/state
+ # echo 0 > /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
+ # echo up > /sys/bus/pci/devices/<BDF>/qat/state
+ # cat /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
+ 0
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/rp2srv
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) This attribute provides a way for a user to query a
+ specific ring pair for the type of service that it is currently
+ configured for.
+
+ When written to, the value is cached and used to perform the
+ read operation. Allowed values are in the range 0 to N-1, where
+ N is the max number of ring pairs supported by a device. This
+ can be queried using the attribute qat/num_rps.
+
+ A read returns the service associated to the ring pair queried.
+
+ The values are:
+
+ * dc: the ring pair is configured for running compression services
+ * sym: the ring pair is configured for running symmetric crypto
+ services
+ * asym: the ring pair is configured for running asymmetric crypto
+ services
+
+ Example usage::
+
+ # echo 1 > /sys/bus/pci/devices/<BDF>/qat/rp2srv
+ # cat /sys/bus/pci/devices/<BDF>/qat/rp2srv
+ sym
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/num_rps
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RO) Returns the number of ring pairs that a single device has.
+
+ Example usage::
+
+ # cat /sys/bus/pci/devices/<BDF>/qat/num_rps
+ 64
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/auto_reset
+Date: May 2024
+KernelVersion: 6.9
+Contact: qat-linux@intel.com
+Description: (RW) Reports the current state of the autoreset feature
+ for a QAT device
+
+ Write to the attribute to enable or disable device auto reset.
+
+ Device auto reset is disabled by default.
+
+ The values are:
+
+ * 1/Yy/on: auto reset enabled. If the device encounters an
+ unrecoverable error, it will be reset automatically.
+ * 0/Nn/off: auto reset disabled. If the device encounters an
+ unrecoverable error, it will not be reset.
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat_ras b/Documentation/ABI/testing/sysfs-driver-qat_ras
new file mode 100644
index 000000000000..82ceb04445ec
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-qat_ras
@@ -0,0 +1,41 @@
+What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (RO) Reports the number of correctable errors detected by the device.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (RO) Reports the number of non fatal errors detected by the device.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (RO) Reports the number of fatal errors detected by the device.
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description: (WO) Write to resets all error counters of a device.
+
+ The following example reports how to reset the counters::
+
+ # echo 1 > /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
+ # cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
+ 0
+ # cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
+ 0
+ # cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
+ 0
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat_rl b/Documentation/ABI/testing/sysfs-driver-qat_rl
new file mode 100644
index 000000000000..d534f89b4971
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-qat_rl
@@ -0,0 +1,226 @@
+What: /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (WO) This attribute is used to perform an operation on an SLA.
+ The supported operations are: add, update, rm, rm_all, and get.
+
+ Input values must be filled through the associated attribute in
+ this group before a write to this file.
+ If the operation completes successfully, the associated
+ attributes will be updated.
+ The associated attributes are: cir, pir, srv, rp, and id.
+
+ Supported operations:
+
+ * add: Creates a new SLA with the provided inputs from user.
+ * Inputs: cir, pir, srv, and rp
+ * Output: id
+
+ * get: Returns the configuration of the specified SLA in id attribute
+ * Inputs: id
+ * Outputs: cir, pir, srv, and rp
+
+ * update: Updates the SLA with new values set in the following attributes
+ * Inputs: id, cir, and pir
+
+ * rm: Removes the specified SLA in the id attribute.
+ * Inputs: id
+
+ * rm_all: Removes all the configured SLAs.
+ * Inputs: None
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/rp
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) When read, reports the current assigned ring pairs for the
+ queried SLA.
+ When wrote to, configures the ring pairs associated to a new SLA.
+
+ The value is a 64-bit bit mask and is written/displayed in hex.
+ Each bit of this mask represents a single ring pair i.e.,
+ bit 1 == ring pair id 0; bit 3 == ring pair id 2.
+
+ Selected ring pairs must to be assigned to a single service,
+ i.e. the one provided with the srv attribute. The service
+ assigned to a certain ring pair can be checked by querying
+ the attribute qat/rp2srv.
+
+ The maximum number of ring pairs is 4 per SLA.
+
+ Applicability in sla_op:
+
+ * WRITE: add operation
+ * READ: get operation
+
+ Example usage::
+
+ ## Read
+ # echo 4 > /sys/bus/pci/devices/<BDF>/qat_rl/id
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/rp
+ 0x5
+
+ ## Write
+ # echo 0x5 > /sys/bus/pci/devices/<BDF>/qat_rl/rp
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/id
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) If written to, the value is used to retrieve a particular
+ SLA and operate on it.
+ This is valid only for the following operations: update, rm,
+ and get.
+ A read of this attribute is only guaranteed to have correct data
+ after creation of an SLA.
+
+ Applicability in sla_op:
+
+ * WRITE: rm and update operations
+ * READ: add and get operations
+
+ Example usage::
+
+ ## Read
+ ## Set attributes e.g. cir, pir, srv, etc
+ # echo "add" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/id
+ 4
+
+ ## Write
+ # echo 7 > /sys/bus/pci/devices/<BDF>/qat_rl/id
+ # echo "get" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/rp
+ 0x5 ## ring pair ID 0 and ring pair ID 2
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/cir
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) Committed information rate (CIR). Rate guaranteed to be
+ achieved by a particular SLA. The value is expressed in
+ permille scale, i.e. 1000 refers to the maximum device
+ throughput for a selected service.
+
+ After sending a "get" to sla_op, this will be populated with the
+ CIR for that queried SLA.
+ Write to this file before sending an "add/update" sla_op, to set
+ the SLA to the specified value.
+
+ Applicability in sla_op:
+
+ * WRITE: add and update operations
+ * READ: get operation
+
+ Example usage::
+
+ ## Write
+ # echo 500 > /sys/bus/pci/devices/<BDF>/qat_rl/cir
+ # echo "add" /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+
+ ## Read
+ # echo 4 > /sys/bus/pci/devices/<BDF>/qat_rl/id
+ # echo "get" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/cir
+ 500
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/pir
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) Peak information rate (PIR). The maximum rate that can be
+ achieved by that particular SLA. An SLA can reach a value
+ between CIR and PIR when the device is not fully utilized by
+ requests from other users (assigned to different SLAs).
+
+ After sending a "get" to sla_op, this will be populated with the
+ PIR for that queried SLA.
+ Write to this file before sending an "add/update" sla_op, to set
+ the SLA to the specified value.
+
+ Applicability in sla_op:
+
+ * WRITE: add and update operations
+ * READ: get operation
+
+ Example usage::
+
+ ## Write
+ # echo 750 > /sys/bus/pci/devices/<BDF>/qat_rl/pir
+ # echo "add" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+
+ ## Read
+ # echo 4 > /sys/bus/pci/devices/<BDF>/qat_rl/id
+ # echo "get" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/pir
+ 750
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/srv
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) Service (SRV). Represents the service (sym, asym, dc)
+ associated to an SLA.
+ Can be written to or queried to set/show the SRV type for an SLA.
+ The SRV attribute is used to specify the SRV type before adding
+ an SLA. After an SLA is configured, reports the service
+ associated to that SLA.
+
+ Applicability in sla_op:
+
+ * WRITE: add and update operations
+ * READ: get operation
+
+ Example usage::
+
+ ## Write
+ # echo "dc" > /sys/bus/pci/devices/<BDF>/qat_rl/srv
+ # echo "add" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/id
+ 4
+
+ ## Read
+ # echo 4 > /sys/bus/pci/devices/<BDF>/qat_rl/id
+ # echo "get" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/srv
+ dc
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
+Date: January 2024
+KernelVersion: 6.7
+Contact: qat-linux@intel.com
+Description:
+ (RW) This file will return the remaining capability for a
+ particular service/sla. This is the remaining value that a new
+ SLA can be set to or a current SLA can be increased with.
+
+ Example usage::
+
+ # echo "asym" > /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
+ 250
+ # echo 250 > /sys/bus/pci/devices/<BDF>/qat_rl/cir
+ # echo "add" > /sys/bus/pci/devices/<BDF>/qat_rl/sla_op
+ # cat /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
+ 0
+
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-samsung-laptop b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
index 34d3a3359cf4..408cb0ddf4aa 100644
--- a/Documentation/ABI/testing/sysfs-driver-samsung-laptop
+++ b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
@@ -9,25 +9,17 @@ Description: Some Samsung laptops have different "performance levels"
their fans quiet at all costs. Reading from this file
will show the current performance level. Writing to the
file can change this value.
+
Valid options:
- "silent"
- "normal"
- "overclock"
+ - "silent"
+ - "normal"
+ - "overclock"
+
Note that not all laptops support all of these options.
Specifically, not all support the "overclock" option,
and it's still unknown if this value even changes
anything, other than making the user feel a bit better.
-What: /sys/devices/platform/samsung/battery_life_extender
-Date: December 1, 2011
-KernelVersion: 3.3
-Contact: Corentin Chary <corentin.chary@gmail.com>
-Description: Max battery charge level can be modified, battery cycle
- life can be extended by reducing the max battery charge
- level.
- 0 means normal battery mode (100% charge)
- 1 means battery life extender mode (80% charge)
-
What: /sys/devices/platform/samsung/usb_charge
Date: December 1, 2011
KernelVersion: 3.3
diff --git a/Documentation/ABI/testing/sysfs-driver-spi-intel b/Documentation/ABI/testing/sysfs-driver-spi-intel
new file mode 100644
index 000000000000..d7c9139ddbf3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-spi-intel
@@ -0,0 +1,20 @@
+What: /sys/devices/.../intel_spi_protected
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the userspace to check if the
+ Intel SPI flash controller is write protected from the host.
+
+What: /sys/devices/.../intel_spi_locked
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the user space to check if the
+ Intel SPI flash controller locks supported opcodes.
+
+What: /sys/devices/.../intel_spi_bios_locked
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the user space to check if the
+ Intel SPI flash controller BIOS region is locked for writes.
diff --git a/Documentation/ABI/testing/sysfs-driver-st b/Documentation/ABI/testing/sysfs-driver-st
index ba5d77008a85..88cab66fd77f 100644
--- a/Documentation/ABI/testing/sysfs-driver-st
+++ b/Documentation/ABI/testing/sysfs-driver-st
@@ -1,6 +1,6 @@
What: /sys/bus/scsi/drivers/st/debug_flag
Date: October 2015
-Kernel Version: ?.?
+KernelVersion: ?.?
Contact: shane.seymour@hpe.com
Description:
This file allows you to turn debug output from the st driver
diff --git a/Documentation/ABI/testing/sysfs-driver-tegra-fuse b/Documentation/ABI/testing/sysfs-driver-tegra-fuse
index 69f5af632657..b8936fad2ccf 100644
--- a/Documentation/ABI/testing/sysfs-driver-tegra-fuse
+++ b/Documentation/ABI/testing/sysfs-driver-tegra-fuse
@@ -3,7 +3,7 @@ Date: February 2014
Contact: Peter De Schrijver <pdeschrijver@nvidia.com>
Description: read-only access to the efuses on Tegra20, Tegra30, Tegra114
and Tegra124 SoC's from NVIDIA. The efuses contain write once
- data programmed at the factory. The data is layed out in 32bit
+ data programmed at the factory. The data is laid out in 32bit
words in LSB first format. Each bit represents a single value
as decoded from the fuse registers. Bits order/assignment
exactly matches the HW registers, including any unused bits.
diff --git a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
index f34221b52b14..e5a438d84e1f 100644
--- a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
+++ b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
@@ -4,10 +4,12 @@ KernelVersion: 3.15
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the keyboard backlight operation mode, valid
values are:
+
* 0x1 -> FN-Z
* 0x2 -> AUTO (also called TIMER)
* 0x8 -> ON
* 0x10 -> OFF
+
Note that from kernel 3.16 onwards this file accepts all listed
parameters, kernel 3.15 only accepts the first two (FN-Z and
AUTO).
@@ -41,8 +43,10 @@ KernelVersion: 3.15
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This files controls the status of the touchpad and pointing
stick (if available), valid values are:
+
* 0 -> OFF
* 1 -> ON
+
Users: KToshiba
What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/available_kbd_modes
@@ -51,10 +55,12 @@ KernelVersion: 3.16
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file shows the supported keyboard backlight modes
the system supports, which can be:
+
* 0x1 -> FN-Z
* 0x2 -> AUTO (also called TIMER)
* 0x8 -> ON
* 0x10 -> OFF
+
Note that not all keyboard types support the listed modes.
See the entry named "available_kbd_modes"
Users: KToshiba
@@ -65,6 +71,7 @@ KernelVersion: 3.16
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file shows the current keyboard backlight type,
which can be:
+
* 1 -> Type 1, supporting modes FN-Z and AUTO
* 2 -> Type 2, supporting modes TIMER, ON and OFF
Users: KToshiba
@@ -75,10 +82,12 @@ KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the USB Sleep & Charge charging mode, which
can be:
+
* 0 -> Disabled (0x00)
* 1 -> Alternate (0x09)
* 2 -> Auto (0x21)
* 3 -> Typical (0x11)
+
Note that from kernel 4.1 onwards this file accepts all listed
values, kernel 4.0 only supports the first three.
Note that this feature only works when connected to power, if
@@ -93,8 +102,10 @@ Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the USB Sleep Functions under battery, and
set the level at which point they will be disabled, accepted
values can be:
+
* 0 -> Disabled
* 1-100 -> Battery level to disable sleep functions
+
Currently it prints two values, the first one indicates if the
feature is enabled or disabled, while the second one shows the
current battery level set.
@@ -107,8 +118,10 @@ Date: January 23, 2015
KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the USB Rapid Charge state, which can be:
+
* 0 -> Disabled
* 1 -> Enabled
+
Note that toggling this value requires a reboot for changes to
take effect.
Users: KToshiba
@@ -118,8 +131,10 @@ Date: January 23, 2015
KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the Sleep & Music state, which values can be:
+
* 0 -> Disabled
* 1 -> Enabled
+
Note that this feature only works when connected to power, if
you want to use it under battery, see the entry named
"sleep_functions_on_battery"
@@ -138,6 +153,7 @@ KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the state of the internal fan, valid
values are:
+
* 0 -> OFF
* 1 -> ON
@@ -147,8 +163,10 @@ KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the Special Functions (hotkeys) operation
mode, valid values are:
+
* 0 -> Normal Operation
* 1 -> Special Functions
+
In the "Normal Operation" mode, the F{1-12} keys are as usual
and the hotkeys are accessed via FN-F{1-12}.
In the "Special Functions" mode, the F{1-12} keys trigger the
@@ -163,8 +181,10 @@ KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls whether the laptop should turn ON whenever
the LID is opened, valid values are:
+
* 0 -> Disabled
* 1 -> Enabled
+
Note that toggling this value requires a reboot for changes to
take effect.
Users: KToshiba
@@ -174,8 +194,10 @@ Date: February 12, 2015
KernelVersion: 4.0
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the USB 3 functionality, valid values are:
+
* 0 -> Disabled (Acts as a regular USB 2)
* 1 -> Enabled (Full USB 3 functionality)
+
Note that toggling this value requires a reboot for changes to
take effect.
Users: KToshiba
@@ -188,10 +210,14 @@ Description: This file controls the Cooling Method feature.
Reading this file prints two values, the first is the actual cooling method
and the second is the maximum cooling method supported.
When the maximum cooling method is ONE, valid values are:
+
* 0 -> Maximum Performance
* 1 -> Battery Optimized
+
When the maximum cooling method is TWO, valid values are:
+
* 0 -> Maximum Performance
* 1 -> Performance
* 2 -> Battery Optimized
+
Users: KToshiba
diff --git a/Documentation/ABI/testing/sysfs-driver-toshiba_haps b/Documentation/ABI/testing/sysfs-driver-toshiba_haps
index a662370b4dbf..c938690ce10d 100644
--- a/Documentation/ABI/testing/sysfs-driver-toshiba_haps
+++ b/Documentation/ABI/testing/sysfs-driver-toshiba_haps
@@ -4,10 +4,12 @@ KernelVersion: 3.17
Contact: Azael Avalos <coproscefalo@gmail.com>
Description: This file controls the built-in accelerometer protection level,
valid values are:
+
* 0 -> Disabled
* 1 -> Low
* 2 -> Medium
* 3 -> High
+
The default potection value is set to 2 (Medium).
Users: KToshiba
diff --git a/Documentation/ABI/testing/sysfs-driver-typec-displayport b/Documentation/ABI/testing/sysfs-driver-typec-displayport
index 231471ad0d4b..314acd54e13e 100644
--- a/Documentation/ABI/testing/sysfs-driver-typec-displayport
+++ b/Documentation/ABI/testing/sysfs-driver-typec-displayport
@@ -47,3 +47,28 @@ Description:
USB SuperSpeed protocol. From user perspective pin assignments C
and E are equal, where all channels on the connector are used
for carrying DisplayPort protocol (allowing higher resolutions).
+
+What: /sys/bus/typec/devices/.../displayport/hpd
+Date: Dec 2022
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ VESA DisplayPort Alt Mode on USB Type-C Standard defines how
+ HotPlugDetect(HPD) shall be supported on the USB-C connector when
+ operating in DisplayPort Alt Mode. This is a read only node which
+ reflects the current state of HPD.
+
+ Valid values:
+ - 1: when HPD’s logical state is high (HPD_High) as defined
+ by VESA DisplayPort Alt Mode on USB Type-C Standard.
+ - 0 when HPD’s logical state is low (HPD_Low) as defined by
+ VESA DisplayPort Alt Mode on USB Type-C Standard.
+
+What: /sys/bus/typec/devices/.../displayport/irq_hpd
+Date: June 2025
+Contact: RD Babiera <rdbabiera@google.com>
+Description:
+ IRQ_HPD events are sent over the USB PD protocol in Status Update and
+ Attention messages. IRQ_HPD can only be asserted when HPD is high,
+ and is asserted when an IRQ_HPD has been issued since the last Status
+ Update. This is a read only node that returns the number of IRQ events
+ raised in the driver's lifetime.
diff --git a/Documentation/ABI/testing/sysfs-driver-uacce b/Documentation/ABI/testing/sysfs-driver-uacce
new file mode 100644
index 000000000000..d3f0b8f3c589
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-uacce
@@ -0,0 +1,57 @@
+What: /sys/class/uacce/<dev_name>/api
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Api of the device
+ Can be any string and up to userspace to parse.
+ Application use the api to match the correct driver
+
+What: /sys/class/uacce/<dev_name>/flags
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h
+
+What: /sys/class/uacce/<dev_name>/available_instances
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Available instances left of the device
+ Return -ENODEV if uacce_ops get_available_instances is not provided
+
+What: /sys/class/uacce/<dev_name>/isolate_strategy
+Date: Nov 2022
+KernelVersion: 6.1
+Contact: linux-accelerators@lists.ozlabs.org
+Description: (RW) A sysfs node that configure the error threshold for the hardware
+ isolation strategy. This size is a configured integer value, which is the
+ number of threshold for hardware errors occurred in one hour. The default is 0.
+ 0 means never isolate the device. The maximum value is 65535. You can write
+ a number of threshold based on your hardware.
+
+What: /sys/class/uacce/<dev_name>/isolate
+Date: Nov 2022
+KernelVersion: 6.1
+Contact: linux-accelerators@lists.ozlabs.org
+Description: (R) A sysfs node that read the device isolated state. The value 1
+ means the device is unavailable. The 0 means the device is
+ available.
+
+What: /sys/class/uacce/<dev_name>/algorithms
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Algorithms supported by this accelerator, separated by new line.
+ Can be any string and up to userspace to parse.
+
+What: /sys/class/uacce/<dev_name>/region_mmio_size
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Size (bytes) of mmio region queue file
+
+What: /sys/class/uacce/<dev_name>/region_dus_size
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Size (bytes) of dus region queue file
diff --git a/Documentation/ABI/testing/sysfs-driver-ucsi-ccg b/Documentation/ABI/testing/sysfs-driver-ucsi-ccg
new file mode 100644
index 000000000000..45cf62ad89e9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ucsi-ccg
@@ -0,0 +1,6 @@
+What: /sys/bus/i2c/drivers/ucsi_ccg/.../do_flash
+Date: May 2019
+Contact: Ajay Gupta <ajayg@nvidia.com>
+Description:
+ Tell the driver for Cypress CCGx Type-C controller to attempt
+ firmware upgrade by writing [Yy1] to the file.
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index 016724ec26d5..a90612ab5780 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -13,65 +13,80 @@ Description:
Interface specification for more details.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/device_type
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/device_type
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the device type. This is one of the UFS
device descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/device_class
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/device_class
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the device class. This is one of the UFS
device descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/device_sub_class
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/device_sub_class
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the UFS storage subclass. This is one of
the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/protocol
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/protocol
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the protocol supported by an UFS device.
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/number_of_luns
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/number_of_luns
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows number of logical units. This is one of
the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/number_of_wluns
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/number_of_wluns
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows number of well known logical units.
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/boot_enable
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/boot_enable
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows value that indicates whether the device is
enabled for boot. This is one of the UFS device descriptor
parameters. The full information about the descriptor could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/descriptor_access_enable
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/descriptor_access_enable
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows value that indicates whether the device
@@ -79,145 +94,179 @@ Description: This file shows value that indicates whether the device
of the boot sequence. This is one of the UFS device descriptor
parameters. The full information about the descriptor could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/initial_power_mode
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/initial_power_mode
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows value that defines the power mode after
device initialization or hardware reset. This is one of
the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/high_priority_lun
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/high_priority_lun
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the high priority lun. This is one of
the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/secure_removal_type
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/secure_removal_type
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the secure removal type. This is one of
the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/support_security_lun
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/support_security_lun
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the security lun is supported.
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/bkops_termination_latency
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/bkops_termination_latency
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the background operations termination
latency. This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/initial_active_icc_level
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/initial_active_icc_level
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the initial active ICC level. This is one
of the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/specification_version
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/specification_version
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the specification version. This is one
of the UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/manufacturing_date
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/manufacturing_date
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the manufacturing date in BCD format.
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/manufacturer_id
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/manufacturer_id
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
-Description: This file shows the manufacturee ID. This is one of the
+Description: This file shows the manufacturer ID. This is one of the
UFS device descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/rtt_capability
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/rtt_capability
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum number of outstanding RTTs
supported by the device. This is one of the UFS device
descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/rtc_update
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/rtc_update
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the frequency and method of the realtime
clock update. This is one of the UFS device descriptor
parameters. The full information about the descriptor
could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/ufs_features
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/ufs_features
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows which features are supported by the device.
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be
found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/ffu_timeout
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/ffu_timeout
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the FFU timeout. This is one of the
UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/queue_depth
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/queue_depth
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the device queue depth. This is one of the
UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/device_version
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/device_version
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the device version. This is one of the
UFS device descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/number_of_secure_wpa
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/number_of_secure_wpa
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows number of secure write protect areas
supported by the device. This is one of the UFS device
descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/psa_max_data_size
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/psa_max_data_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum amount of data that may be
@@ -225,38 +274,46 @@ Description: This file shows the maximum amount of data that may be
This is one of the UFS device descriptor parameters.
The full information about the descriptor could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/psa_state_timeout
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/psa_state_timeout
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the command maximum timeout for a change
in PSA state. This is one of the UFS device descriptor
parameters. The full information about the descriptor could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/interconnect_descriptor/unipro_version
+What: /sys/bus/platform/devices/*.ufs/interconnect_descriptor/unipro_version
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the MIPI UniPro version number in BCD format.
This is one of the UFS interconnect descriptor parameters.
The full information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/interconnect_descriptor/mphy_version
+What: /sys/bus/platform/devices/*.ufs/interconnect_descriptor/mphy_version
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the MIPI M-PHY version number in BCD format.
This is one of the UFS interconnect descriptor parameters.
The full information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/raw_device_capacity
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/raw_device_capacity
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the total memory quantity available to
@@ -264,123 +321,151 @@ Description: This file shows the total memory quantity available to
of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/max_number_of_luns
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/max_number_of_luns
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum number of logical units
supported by the UFS device. This is one of the UFS
geometry descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/segment_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/segment_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the segment size. This is one of the UFS
geometry descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/allocation_unit_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/allocation_unit_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the allocation unit size. This is one of
the UFS geometry descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/min_addressable_block_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/min_addressable_block_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the minimum addressable block size. This
is one of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at UFS
specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/optimal_read_block_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/optimal_read_block_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the optimal read block size. This is one
of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at UFS
specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/optimal_write_block_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/optimal_write_block_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the optimal write block size. This is one
of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at UFS
specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/max_in_buffer_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/max_in_buffer_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum data-in buffer size. This
is one of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at UFS
specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/max_out_buffer_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/max_out_buffer_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum data-out buffer size. This
is one of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at UFS
specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/rpmb_rw_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/rpmb_rw_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum number of RPMB frames allowed
in Security Protocol In/Out. This is one of the UFS geometry
descriptor parameters. The full information about the
descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/dyn_capacity_resource_policy
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/dyn_capacity_resource_policy
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the dynamic capacity resource policy. This
is one of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/data_ordering
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/data_ordering
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows support for out-of-order data transfer.
This is one of the UFS geometry descriptor parameters.
The full information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/max_number_of_contexts
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/max_number_of_contexts
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows maximum available number of contexts which
are supported by the device. This is one of the UFS geometry
descriptor parameters. The full information about the
descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/sys_data_tag_unit_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/sys_data_tag_unit_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows system data tag unit size. This is one of
the UFS geometry descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/sys_data_tag_resource_size
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/sys_data_tag_resource_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows maximum storage area size allocated by
@@ -388,27 +473,33 @@ Description: This file shows maximum storage area size allocated by
This is one of the UFS geometry descriptor parameters.
The full information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/secure_removal_types
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/secure_removal_types
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows supported secure removal types. This is
one of the UFS geometry descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/memory_types
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/memory_types
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows supported memory types. This is one of
the UFS geometry descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/*_memory_max_alloc_units
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/*_memory_max_alloc_units
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum number of allocation units for
@@ -416,9 +507,11 @@ Description: This file shows the maximum number of allocation units for
enhanced type 1-4). This is one of the UFS geometry
descriptor parameters. The full information about the
descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/*_memory_capacity_adjustment_factor
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/*_memory_capacity_adjustment_factor
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the memory capacity adjustment factor for
@@ -426,83 +519,102 @@ Description: This file shows the memory capacity adjustment factor for
enhanced type 1-4). This is one of the UFS geometry
descriptor parameters. The full information about the
descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/health_descriptor/eol_info
+What: /sys/bus/platform/devices/*.ufs/health_descriptor/eol_info
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows preend of life information. This is one
of the UFS health descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/health_descriptor/life_time_estimation_a
+What: /sys/bus/platform/devices/*.ufs/health_descriptor/life_time_estimation_a
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows indication of the device life time
(method a). This is one of the UFS health descriptor
parameters. The full information about the descriptor
could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/health_descriptor/life_time_estimation_b
+What: /sys/bus/platform/devices/*.ufs/health_descriptor/life_time_estimation_b
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows indication of the device life time
(method b). This is one of the UFS health descriptor
parameters. The full information about the descriptor
could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/power_descriptor/active_icc_levels_vcc*
+What: /sys/bus/platform/devices/*.ufs/power_descriptor/active_icc_levels_vcc*
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows maximum VCC, VCCQ and VCCQ2 value for
active ICC levels from 0 to 15. This is one of the UFS
power descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/string_descriptors/manufacturer_name
+What: /sys/bus/platform/devices/*.ufs/string_descriptors/manufacturer_name
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
-Description: This file contains a device manufactureer name string.
+Description: This file contains a device manufacturer name string.
The full information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/string_descriptors/product_name
+What: /sys/bus/platform/devices/*.ufs/string_descriptors/product_name
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file contains a product name string. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/string_descriptors/oem_id
+What: /sys/bus/platform/devices/*.ufs/string_descriptors/oem_id
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file contains a OEM ID string. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/string_descriptors/serial_number
+What: /sys/bus/platform/devices/*.ufs/string_descriptors/serial_number
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file contains a device serial number string. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/string_descriptors/product_revision
+What: /sys/bus/platform/devices/*.ufs/string_descriptors/product_revision
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file contains a product revision string. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
@@ -512,6 +624,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows boot LUN information. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/lun_write_protect
@@ -520,6 +633,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows LUN write protection status. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/lun_queue_depth
@@ -528,6 +642,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows LUN queue depth. This is one of the UFS
unit descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/psa_sensitive
@@ -536,6 +651,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows PSA sensitivity. This is one of the UFS
unit descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/lun_memory_type
@@ -544,6 +660,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows LUN memory type. This is one of the UFS
unit descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/data_reliability
@@ -553,6 +670,7 @@ Description: This file defines the device behavior when a power failure
occurs during a write operation. This is one of the UFS
unit descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/logical_block_size
@@ -562,6 +680,7 @@ Description: This file shows the size of addressable logical blocks
(calculated as an exponent with base 2). This is one of
the UFS unit descriptor parameters. The full information about
the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/logical_block_count
@@ -571,6 +690,7 @@ Description: This file shows total number of addressable logical blocks.
This is one of the UFS unit descriptor parameters. The full
information about the descriptor could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/erase_block_size
@@ -579,6 +699,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the erase block size. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/provisioning_type
@@ -587,14 +708,16 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the thin provisioning type. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
-What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
+What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the total physical memory resources. This is
one of the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/context_capabilities
@@ -603,6 +726,7 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the context capabilities. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/unit_descriptor/large_unit_granularity
@@ -611,147 +735,183 @@ Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the granularity of the LUN. This is one of
the UFS unit descriptor parameters. The full information
about the descriptor could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/device_init
+What: /sys/bus/platform/devices/*.ufs/flags/device_init
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the device init status. The full information
about the flag could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/permanent_wpe
+What: /sys/bus/platform/devices/*.ufs/flags/permanent_wpe
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether permanent write protection is enabled.
The full information about the flag could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/power_on_wpe
+What: /sys/bus/platform/devices/*.ufs/flags/power_on_wpe
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether write protection is enabled on all
logical units configured as power on write protected. The
full information about the flag could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/bkops_enable
+What: /sys/bus/platform/devices/*.ufs/flags/bkops_enable
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the device background operations are
enabled. The full information about the flag could be
found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/life_span_mode_enable
+What: /sys/bus/platform/devices/*.ufs/flags/life_span_mode_enable
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the device life span mode is enabled.
The full information about the flag could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/phy_resource_removal
+What: /sys/bus/platform/devices/*.ufs/flags/phy_resource_removal
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether physical resource removal is enable.
The full information about the flag could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/busy_rtc
+What: /sys/bus/platform/devices/*.ufs/flags/busy_rtc
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the device is executing internal
operation related to real time clock. The full information
about the flag could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/flags/disable_fw_update
+What: /sys/bus/platform/devices/*.ufs/flags/disable_fw_update
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the device FW update is permanently
disabled. The full information about the flag could be found
at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/boot_lun_enabled
+What: /sys/bus/platform/devices/*.ufs/attributes/boot_lun_enabled
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the boot lun enabled UFS device attribute.
The full information about the attribute could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/current_power_mode
+What: /sys/bus/platform/devices/*.ufs/attributes/current_power_mode
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the current power mode UFS device attribute.
The full information about the attribute could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/active_icc_level
+What: /sys/bus/platform/devices/*.ufs/attributes/active_icc_level
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the active icc level UFS device attribute.
The full information about the attribute could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/ooo_data_enabled
+What: /sys/bus/platform/devices/*.ufs/attributes/ooo_data_enabled
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the out of order data transfer enabled UFS
device attribute. The full information about the attribute
could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/bkops_status
+What: /sys/bus/platform/devices/*.ufs/attributes/bkops_status
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the background operations status UFS device
attribute. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/purge_status
+What: /sys/bus/platform/devices/*.ufs/attributes/purge_status
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the purge operation status UFS device
attribute. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/max_data_in_size
+What: /sys/bus/platform/devices/*.ufs/attributes/max_data_in_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum data size in a DATA IN
UPIU. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/max_data_out_size
+What: /sys/bus/platform/devices/*.ufs/attributes/max_data_out_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the maximum number of bytes that can be
requested with a READY TO TRANSFER UPIU. The full information
about the attribute could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/reference_clock_frequency
+What: /sys/bus/platform/devices/*.ufs/attributes/reference_clock_frequency
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the reference clock frequency UFS device
attribute. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/configuration_descriptor_lock
+What: /sys/bus/platform/devices/*.ufs/attributes/configuration_descriptor_lock
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows whether the configuration descriptor is locked.
@@ -759,127 +919,852 @@ Description: This file shows whether the configuration descriptor is locked.
UFS specifications 2.1. The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/max_number_of_rtt
-Date: February 2018
-Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
+What: /sys/bus/platform/devices/*.ufs/attributes/max_number_of_rtt
+Date: May 2024
+Contact: Avri Altman <avri.altman@wdc.com>
Description: This file provides the maximum current number of
- outstanding RTTs in device that is allowed. The full
- information about the attribute could be found at
- UFS specifications 2.1.
- The file is read only.
+ outstanding RTTs in device that is allowed. bMaxNumOfRTT is a
+ read-write persistent attribute and is equal to two after device
+ manufacturing. It shall not be set to a value greater than
+ bDeviceRTTCap value, and it may be set only when the hw queues are
+ empty.
+
+ The file is read write.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/exception_event_control
+What: /sys/bus/platform/devices/*.ufs/attributes/exception_event_control
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the exception event control UFS device
attribute. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/exception_event_status
+What: /sys/bus/platform/devices/*.ufs/attributes/exception_event_status
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the exception event status UFS device
attribute. The full information about the attribute could
be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/ffu_status
+What: /sys/bus/platform/devices/*.ufs/attributes/ffu_status
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file provides the ffu status UFS device attribute.
The full information about the attribute could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/psa_state
+What: /sys/bus/platform/devices/*.ufs/attributes/psa_state
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file show the PSA feature status. The full information
about the attribute could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/psa_data_size
+What: /sys/bus/platform/devices/*.ufs/attributes/psa_data_size
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the amount of data that the host plans to
load to all logical units in pre-soldering state.
The full information about the attribute could be found at
UFS specifications 2.1.
+
The file is read only.
What: /sys/class/scsi_device/*/device/dyn_cap_needed
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
-Description: This file shows the The amount of physical memory needed
+Description: This file shows the amount of physical memory needed
to be removed from the physical memory resources pool of
the particular logical unit. The full information about
the attribute could be found at UFS specifications 2.1.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/rpm_lvl
+What: /sys/bus/platform/devices/*.ufs/rpm_lvl
Date: September 2014
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry could be used to set or show the UFS device
runtime power management level. The current driver
- implementation supports 6 levels with next target states:
- 0 - an UFS device will stay active, an UIC link will
- stay active
- 1 - an UFS device will stay active, an UIC link will
- hibernate
- 2 - an UFS device will moved to sleep, an UIC link will
- stay active
- 3 - an UFS device will moved to sleep, an UIC link will
- hibernate
- 4 - an UFS device will be powered off, an UIC link will
- hibernate
- 5 - an UFS device will be powered off, an UIC link will
- be powered off
+ implementation supports 7 levels with next target states:
+
+ == ====================================================
+ 0 UFS device will stay active, UIC link will
+ stay active
+ 1 UFS device will stay active, UIC link will
+ hibernate
+ 2 UFS device will be moved to sleep, UIC link will
+ stay active
+ 3 UFS device will be moved to sleep, UIC link will
+ hibernate
+ 4 UFS device will be powered off, UIC link will
+ hibernate
+ 5 UFS device will be powered off, UIC link will
+ be powered off
+ 6 UFS device will be moved to deep sleep, UIC link
+ will be powered off. Note, deep sleep might not be
+ supported in which case this value will not be
+ accepted
+ == ====================================================
What: /sys/bus/platform/drivers/ufshcd/*/rpm_target_dev_state
+What: /sys/bus/platform/devices/*.ufs/rpm_target_dev_state
Date: February 2018
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry shows the target power mode of an UFS device
for the chosen runtime power management level.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/rpm_target_link_state
+What: /sys/bus/platform/devices/*.ufs/rpm_target_link_state
Date: February 2018
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry shows the target state of an UFS UIC link
for the chosen runtime power management level.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/spm_lvl
+What: /sys/bus/platform/devices/*.ufs/spm_lvl
Date: September 2014
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry could be used to set or show the UFS device
system power management level. The current driver
- implementation supports 6 levels with next target states:
- 0 - an UFS device will stay active, an UIC link will
- stay active
- 1 - an UFS device will stay active, an UIC link will
- hibernate
- 2 - an UFS device will moved to sleep, an UIC link will
- stay active
- 3 - an UFS device will moved to sleep, an UIC link will
- hibernate
- 4 - an UFS device will be powered off, an UIC link will
- hibernate
- 5 - an UFS device will be powered off, an UIC link will
- be powered off
+ implementation supports 7 levels with next target states:
+
+ == ====================================================
+ 0 UFS device will stay active, UIC link will
+ stay active
+ 1 UFS device will stay active, UIC link will
+ hibernate
+ 2 UFS device will be moved to sleep, UIC link will
+ stay active
+ 3 UFS device will be moved to sleep, UIC link will
+ hibernate
+ 4 UFS device will be powered off, UIC link will
+ hibernate
+ 5 UFS device will be powered off, UIC link will
+ be powered off
+ 6 UFS device will be moved to deep sleep, UIC link
+ will be powered off. Note, deep sleep might not be
+ supported in which case this value will not be
+ accepted
+ == ====================================================
What: /sys/bus/platform/drivers/ufshcd/*/spm_target_dev_state
+What: /sys/bus/platform/devices/*.ufs/spm_target_dev_state
Date: February 2018
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry shows the target power mode of an UFS device
for the chosen system power management level.
+
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/spm_target_link_state
+What: /sys/bus/platform/devices/*.ufs/spm_target_link_state
Date: February 2018
-Contact: Subhash Jadavani <subhashj@codeaurora.org>
+Contact: Can Guo <quic_cang@quicinc.com>
Description: This entry shows the target state of an UFS UIC link
for the chosen system power management level.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/monitor_enable
+What: /sys/bus/platform/devices/*.ufs/monitor/monitor_enable
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the status of performance monitor enablement
+ and it can be used to start/stop the monitor. When the monitor
+ is stopped, the performance data collected is also cleared.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/monitor_chunk_size
+What: /sys/bus/platform/devices/*.ufs/monitor/monitor_chunk_size
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file tells the monitor to focus on requests transferring
+ data of specific chunk size (in Bytes). 0 means any chunk size.
+ It can only be changed when monitor is disabled.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_total_sectors
+What: /sys/bus/platform/devices/*.ufs/monitor/read_total_sectors
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how many sectors (in 512 Bytes) have been
+ sent from device to host after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_total_busy
+What: /sys/bus/platform/devices/*.ufs/monitor/read_total_busy
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how long (in micro seconds) has been spent
+ sending data from device to host after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_nr_requests
+What: /sys/bus/platform/devices/*.ufs/monitor/read_nr_requests
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how many read requests have been sent after
+ monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_max
+What: /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_max
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the maximum latency (in micro seconds) of
+ read requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_min
+What: /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_min
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the minimum latency (in micro seconds) of
+ read requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_avg
+What: /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_avg
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the average latency (in micro seconds) of
+ read requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_sum
+What: /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_sum
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the total latency (in micro seconds) of
+ read requests sent after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_total_sectors
+What: /sys/bus/platform/devices/*.ufs/monitor/write_total_sectors
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how many sectors (in 512 Bytes) have been sent
+ from host to device after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_total_busy
+What: /sys/bus/platform/devices/*.ufs/monitor/write_total_busy
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how long (in micro seconds) has been spent
+ sending data from host to device after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_nr_requests
+What: /sys/bus/platform/devices/*.ufs/monitor/write_nr_requests
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how many write requests have been sent after
+ monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_max
+What: /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_max
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the maximum latency (in micro seconds) of write
+ requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_min
+What: /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_min
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the minimum latency (in micro seconds) of write
+ requests after monitor gets started.
+
The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_avg
+What: /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_avg
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the average latency (in micro seconds) of write
+ requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_sum
+What: /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_sum
+Date: January 2021
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the total latency (in micro seconds) of write
+ requests after monitor gets started.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/lane
+What: /sys/bus/platform/devices/*.ufs/power_info/lane
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows how many lanes are enabled on the UFS link,
+ i.e., an output 2 means UFS link is operating with 2 lanes.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/mode
+What: /sys/bus/platform/devices/*.ufs/power_info/mode
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the PA power mode of UFS.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/rate
+What: /sys/bus/platform/devices/*.ufs/power_info/rate
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the speed rate of UFS link.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/gear
+What: /sys/bus/platform/devices/*.ufs/power_info/gear
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the gear of UFS link.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/dev_pm
+What: /sys/bus/platform/devices/*.ufs/power_info/dev_pm
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the UFS device power mode.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/power_info/link_state
+What: /sys/bus/platform/devices/*.ufs/power_info/link_state
+Date: September 2023
+Contact: Can Guo <quic_cang@quicinc.com>
+Description: This file shows the state of UFS link.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_presv_us_en
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/wb_presv_us_en
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows if preserve user-space was configured
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_shared_alloc_units
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/wb_shared_alloc_units
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the shared allocated units of WB buffer
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_type
+What: /sys/bus/platform/devices/*.ufs/device_descriptor/wb_type
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the configured WB type.
+ 0x1 for shared buffer mode. 0x0 for dedicated buffer mode.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_buff_cap_adj
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_buff_cap_adj
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the total user-space decrease in shared
+ buffer mode.
+ The value of this parameter is 3 for TLC NAND when SLC mode
+ is used as WriteBooster Buffer. 2 for MLC NAND.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_max_alloc_units
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_max_alloc_units
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the Maximum total WriteBooster Buffer size
+ which is supported by the entire device.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_max_wb_luns
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_max_wb_luns
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the maximum number of luns that can support
+ WriteBooster.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_sup_red_type
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_sup_red_type
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: The supportability of user space reduction mode
+ and preserve user space mode.
+ 00h: WriteBooster Buffer can be configured only in
+ user space reduction type.
+ 01h: WriteBooster Buffer can be configured only in
+ preserve user space type.
+ 02h: Device can be configured in either user space
+ reduction type or preserve user space type.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_sup_wb_type
+What: /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_sup_wb_type
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: The supportability of WriteBooster Buffer type.
+
+ === ==========================================================
+ 00h LU based WriteBooster Buffer configuration
+ 01h Single shared WriteBooster Buffer configuration
+ 02h Supporting both LU based WriteBooster.
+ Buffer and Single shared WriteBooster Buffer configuration
+ === ==========================================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/flags/wb_enable
+What: /sys/bus/platform/devices/*.ufs/flags/wb_enable
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the status of WriteBooster.
+
+ == ============================
+ 0 WriteBooster is not enabled.
+ 1 WriteBooster is enabled
+ == ============================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/flags/wb_flush_en
+What: /sys/bus/platform/devices/*.ufs/flags/wb_flush_en
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows if flush is enabled.
+
+ == =================================
+ 0 Flush operation is not performed.
+ 1 Flush operation is performed.
+ == =================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/flags/wb_flush_during_h8
+What: /sys/bus/platform/devices/*.ufs/flags/wb_flush_during_h8
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: Flush WriteBooster Buffer during hibernate state.
+
+ == =================================================
+ 0 Device is not allowed to flush the
+ WriteBooster Buffer during link hibernate state.
+ 1 Device is allowed to flush the
+ WriteBooster Buffer during link hibernate state.
+ == =================================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_avail_buf
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_avail_buf
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the amount of unused WriteBooster buffer
+ available.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_cur_buf
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_cur_buf
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the amount of unused current buffer.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_flush_status
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_flush_status
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the flush operation status.
+
+
+ === ======================================
+ 00h idle
+ 01h Flush operation in progress
+ 02h Flush operation stopped prematurely.
+ 03h Flush operation completed successfully
+ 04h Flush operation general failure
+ === ======================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_life_time_est
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_life_time_est
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows an indication of the WriteBooster Buffer
+ lifetime based on the amount of performed program/erase cycles
+
+ === =============================================
+ 01h 0% - 10% WriteBooster Buffer life time used
+ ...
+ 0Ah 90% - 100% WriteBooster Buffer life time used
+ === =============================================
+
+ The file is read only.
+
+What: /sys/class/scsi_device/*/device/unit_descriptor/wb_buf_alloc_units
+Date: June 2020
+Contact: Asutosh Das <quic_asutoshd@quicinc.com>
+Description: This entry shows the configured size of WriteBooster buffer.
+ 0400h corresponds to 4GB.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/wb_on
+What: /sys/bus/platform/devices/*.ufs/wb_on
+Date: January 2021
+Contact: Bean Huo <beanhuo@micron.com>
+Description: This node is used to set or display whether UFS WriteBooster is
+ enabled. Echo 0 to this file to disable UFS WriteBooster or 1 to
+ enable it. The WriteBooster is enabled after power-on/reset,
+ however, it will be disabled/enable while CLK scaling down/up
+ (if the platform supports UFSHCD_CAP_CLK_SCALING). For a
+ platform that doesn't support UFSHCD_CAP_CLK_SCALING, we can
+ disable/enable WriteBooster through this sysfs node.
+
+What: /sys/bus/platform/drivers/ufshcd/*/enable_wb_buf_flush
+What: /sys/bus/platform/devices/*.ufs/enable_wb_buf_flush
+Date: July 2022
+Contact: Jinyoung Choi <j-young.choi@samsung.com>
+Description: This entry shows the status of WriteBooster buffer flushing
+ and it can be used to enable or disable the flushing.
+ If flushing is enabled, the device executes the flush
+ operation when the command queue is empty.
+
+What: /sys/bus/platform/drivers/ufshcd/*/wb_flush_threshold
+What: /sys/bus/platform/devices/*.ufs/wb_flush_threshold
+Date: June 2023
+Contact: Lu Hongfei <luhongfei@vivo.com>
+Description:
+ wb_flush_threshold represents the threshold for flushing WriteBooster buffer,
+ whose value expressed in unit of 10% granularity, such as '1' representing 10%,
+ '2' representing 20%, and so on.
+ If avail_wb_buff < wb_flush_threshold, it indicates that WriteBooster buffer needs to
+ be flushed, otherwise it is not necessary.
+
+Contact: Daniil Lunev <dlunev@chromium.org>
+What: /sys/bus/platform/drivers/ufshcd/*/capabilities/
+What: /sys/bus/platform/devices/*.ufs/capabilities/
+Date: August 2022
+Description: The group represents the effective capabilities of the
+ host-device pair. i.e. the capabilities which are enabled in the
+ driver for the specific host controller, supported by the host
+ controller and are supported and/or have compatible
+ configuration on the device side.
+
+Contact: Daniil Lunev <dlunev@chromium.org>
+What: /sys/bus/platform/drivers/ufshcd/*/capabilities/clock_scaling
+What: /sys/bus/platform/devices/*.ufs/capabilities/clock_scaling
+Date: August 2022
+Contact: Daniil Lunev <dlunev@chromium.org>
+Description: Indicates status of clock scaling.
+
+ == ============================
+ 0 Clock scaling is not supported.
+ 1 Clock scaling is supported.
+ == ============================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/capabilities/write_booster
+What: /sys/bus/platform/devices/*.ufs/capabilities/write_booster
+Date: August 2022
+Contact: Daniil Lunev <dlunev@chromium.org>
+Description: Indicates status of Write Booster.
+
+ == ============================
+ 0 Write Booster can not be enabled.
+ 1 Write Booster can be enabled.
+ == ============================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/rtc_update_ms
+What: /sys/bus/platform/devices/*.ufs/rtc_update_ms
+Date: November 2023
+Contact: Bean Huo <beanhuo@micron.com>
+Description:
+ rtc_update_ms indicates how often the host should synchronize or update the
+ UFS RTC. If set to 0, this will disable UFS RTC periodic update.
+
+What: /sys/devices/platform/.../ufshci_capabilities/version
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: UFS version register.
+ Symbol - VER. This file shows the UFSHCD version.
+ Example: Version 3.12 would be represented as 0000_0312h.
+ The file is read only.
+
+What: /sys/devices/platform/.../ufshci_capabilities/product_id
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: product ID register.
+ Symbol - HCPID. This file shows the UFSHCD product id.
+ The content of this register is vendor specific.
+ The file is read only.
+
+What: /sys/devices/platform/.../ufshci_capabilities/man_id
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: manufacturer ID register.
+ Symbol - HCMID. This file shows the UFSHCD manufacturer id.
+ The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/critical_health
+What: /sys/bus/platform/devices/*.ufs/critical_health
+Date: February 2025
+Contact: Avri Altman <avri.altman@wdc.com>
+Description: Report the number of times a critical health event has been
+ reported by a UFS device. Further insight into the specific
+ issue can be gained by reading one of: bPreEOLInfo,
+ bDeviceLifeTimeEstA, bDeviceLifeTimeEstB,
+ bWriteBoosterBufferLifeTimeEst, and bRPMBLifeTimeEst.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkscale_enable
+What: /sys/bus/platform/devices/*.ufs/clkscale_enable
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows whether the UFS clock scaling is enabled or not.
+ And it can be used to enable/disable the clock scaling by writing
+ 1 or 0 to this attribute.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkgate_enable
+What: /sys/bus/platform/devices/*.ufs/clkgate_enable
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows whether the UFS clock gating is enabled or not.
+ And it can be used to enable/disable the clock gating by writing
+ 1 or 0 to this attribute.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkgate_delay_ms
+What: /sys/bus/platform/devices/*.ufs/clkgate_delay_ms
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows and sets the number of milliseconds of idle time
+ before the UFS driver starts to perform clock gating. This can
+ prevent the UFS from frequently performing clock gating/ungating.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count
+What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_count
+Date: March 2025
+Contact: Bao D. Nguyen <quic_nguyenb@quicinc.com>
+Description:
+ This attribute is applicable to ufs devices compliant to the
+ JEDEC specifications version 4.1 or later. The
+ device_lvl_exception_count is a counter indicating the number of
+ times the device level exceptions have occurred since the last
+ time this variable is reset. Writing a 0 value to this
+ attribute will reset the device_lvl_exception_count. If the
+ device_lvl_exception_count reads a positive value, the user
+ application should read the device_lvl_exception_id attribute to
+ know more information about the exception.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id
+What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_id
+Date: March 2025
+Contact: Bao D. Nguyen <quic_nguyenb@quicinc.com>
+Description:
+ Reading the device_lvl_exception_id returns the
+ qDeviceLevelExceptionID attribute of the ufs device JEDEC
+ specification version 4.1. The definition of the
+ qDeviceLevelExceptionID is the ufs device vendor specific
+ implementation. Refer to the device manufacturer datasheet for
+ more information on the meaning of the qDeviceLevelExceptionID
+ attribute value.
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/wb_resize_enable
+What: /sys/bus/platform/devices/*.ufs/wb_resize_enable
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable the WriteBooster buffer resize by setting this
+ attribute.
+
+ ======== ======================================
+ idle There is no resize operation
+ decrease Decrease WriteBooster buffer size
+ increase Increase WriteBooster buffer size
+ ======== ======================================
+
+ The file is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_hint
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_hint
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ wb_resize_hint indicates hint information about which type of resize
+ for WriteBooster buffer is recommended by the device.
+
+ ========= ======================================
+ keep Recommend keep the buffer size
+ decrease Recommend to decrease the buffer size
+ increase Recommend to increase the buffer size
+ ========= ======================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_status
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_status
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can check the resize operation status of the WriteBooster
+ buffer by reading this attribute.
+
+ ================ ========================================
+ idle Resize operation is not issued
+ in_progress Resize operation in progress
+ complete_success Resize operation completed successfully
+ general_failure Resize operation general failure
+ ================ ========================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/analysis_trigger
+What: /sys/bus/platform/devices/*.ufs/hid/analysis_trigger
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable or disable HID analysis operation.
+
+ ======= =========================================
+ disable disable HID analysis operation
+ enable enable HID analysis operation
+ ======= =========================================
+
+ The file is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_trigger
+What: /sys/bus/platform/devices/*.ufs/hid/defrag_trigger
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable or disable HID defragmentation operation.
+
+ ======= =========================================
+ disable disable HID defragmentation operation
+ enable enable HID defragmentation operation
+ ======= =========================================
+
+ The attribute is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/fragmented_size
+What: /sys/bus/platform/devices/*.ufs/hid/fragmented_size
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The total fragmented size in the device is reported through
+ this attribute.
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_size
+What: /sys/bus/platform/devices/*.ufs/hid/defrag_size
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host sets the size to be defragmented by an HID
+ defragmentation operation.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/progress_ratio
+What: /sys/bus/platform/devices/*.ufs/hid/progress_ratio
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ Defragmentation progress is reported by this attribute,
+ indicates the ratio of the completed defragmentation size
+ over the requested defragmentation size.
+
+ ==== ============================================
+ 1 1%
+ ...
+ 100 100%
+ ==== ============================================
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/state
+What: /sys/bus/platform/devices/*.ufs/hid/state
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The HID state is reported by this attribute.
+
+ ==================== ===========================
+ idle Idle (analysis required)
+ analysis_in_progress Analysis in progress
+ defrag_required Defrag required
+ defrag_in_progress Defrag in progress
+ defrag_completed Defrag completed
+ defrag_not_required Defrag is not required
+ ==================== ===========================
+
+ The attribute is read only.
diff --git a/Documentation/ABI/testing/sysfs-driver-w1_ds28e17 b/Documentation/ABI/testing/sysfs-driver-w1_ds28e17
index d301e7017afe..e92aba4eb594 100644
--- a/Documentation/ABI/testing/sysfs-driver-w1_ds28e17
+++ b/Documentation/ABI/testing/sysfs-driver-w1_ds28e17
@@ -5,7 +5,9 @@ Contact: Jan Kandziora <jjj@gmx.de>
Description: When written, this file sets the I2C speed on the connected
DS28E17 chip. When read, it reads the current setting from
the DS28E17 chip.
+
Valid values: 100, 400, 900 [kBaud].
+
Default 100, can be set by w1_ds28e17.speed= module parameter.
Users: w1_ds28e17 driver
@@ -17,5 +19,6 @@ Description: When written, this file sets the multiplier used to calculate
the busy timeout for I2C operations on the connected DS28E17
chip. When read, returns the current setting.
Valid values: 1 to 9.
+
Default 1, can be set by w1_ds28e17.stretch= module parameter.
Users: w1_ds28e17 driver
diff --git a/Documentation/ABI/testing/sysfs-driver-w1_therm b/Documentation/ABI/testing/sysfs-driver-w1_therm
new file mode 100644
index 000000000000..74642c73d29c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-w1_therm
@@ -0,0 +1,190 @@
+What: /sys/bus/w1/devices/.../alarms
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RW) read or write TH and TL (Temperature High an Low) alarms.
+ Values shall be space separated and in the device range
+ (typical -55 degC to 125 degC), if not values will be trimmed
+ to device min/max capabilities. Values are integer as they are
+ stored in a 8bit register in the device. Lowest value is
+ automatically put to TL. Once set, alarms could be search at
+ master level, refer to Documentation/w1/w1-generic.rst for
+ detailed information
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../eeprom_cmd
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (WO) writing that file will either trigger a save of the
+ device data to its embedded EEPROM, either restore data
+ embedded in device EEPROM. Be aware that devices support
+ limited EEPROM writing cycles (typical 50k)
+
+ * 'save': save device RAM to EEPROM
+ * 'restore': restore EEPROM data in device RAM
+
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../ext_power
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RO) return the power status by asking the device
+
+ * '0': device parasite powered
+ * '1': device externally powered
+ * '-xx': xx is kernel error when reading power status
+
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../resolution
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RW) get or set the device resolution (on supported devices,
+ if not, this entry is not present). Note that the resolution
+ will be changed only in device RAM, so it will be cleared when
+ power is lost. Trigger a 'save' to EEPROM command to keep
+ values after power-on. Read or write are :
+
+ * '9..14': device resolution in bit
+ or resolution to set in bit
+ * '-xx': xx is kernel error when reading the resolution
+ * Anything else: do nothing
+
+ Some DS18B20 clones are fixed in 12-bit resolution, so the
+ actual resolution is read back from the chip and verified. Error
+ is reported if the results differ.
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../temperature
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RO) return the temperature in 1/1000 degC.
+
+ * If a bulk read has been triggered, it will directly
+ return the temperature computed when the bulk read
+ occurred, if available. If not yet available, nothing
+ is returned (a debug kernel message is sent), you
+ should retry later on.
+ * If no bulk read has been triggered, it will trigger
+ a conversion and send the result. Note that the
+ conversion duration depend on the resolution (if
+ device support this feature). It takes 94ms in 9bits
+ resolution, 750ms for 12bits.
+
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../w1_slave
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RW) return the temperature in 1/1000 degC.
+ *read*: return 2 lines with the hexa output data sent on the
+ bus, return the CRC check and temperature in 1/1000 degC
+ *write*:
+
+ * '0' : save the 2 or 3 bytes to the device EEPROM
+ (i.e. TH, TL and config register)
+ * '9..14' : set the device resolution in RAM
+ (if supported)
+ * Anything else: do nothing
+
+ refer to Documentation/w1/slaves/w1_therm.rst for detailed
+ information.
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/w1_bus_masterXX/therm_bulk_read
+Date: May 2020
+Contact: Akira Shimahara <akira215corp@gmail.com>
+Description:
+ (RW) trigger a bulk read conversion. read the status
+
+ *read*:
+ * '-1':
+ conversion in progress on at least 1 sensor
+ * '1' :
+ conversion complete but at least one sensor
+ value has not been read yet
+ * '0' :
+ no bulk operation. Reading temperature will
+ trigger a conversion on each device
+
+ *write*:
+ 'trigger': trigger a bulk read on all supporting
+ devices on the bus
+
+ Note that if a bulk read is sent but one sensor is not read
+ immediately, the next access to temperature on this device
+ will return the temperature measured at the time of issue
+ of the bulk read command (not the current temperature).
+Users: any user space application which wants to communicate with
+ w1_term device
+
+
+What: /sys/bus/w1/devices/.../conv_time
+Date: July 2020
+Contact: Ivan Zaentsev <ivan.zaentsev@wirenboard.ru>
+Description:
+ (RW) Get, set, or measure a temperature conversion time. The
+ setting remains active until a resolution change. Then it is
+ reset to default (datasheet) conversion time for a new
+ resolution.
+
+ *read*:
+ Actual conversion time in milliseconds.
+
+ *write*:
+ * '0':
+ Set the default conversion time from the datasheet.
+ * '1':
+ Measure and set the conversion time. Make a single
+ temperature conversion, measure an actual value.
+ Increase it by 20% for temperature range. A new
+ conversion time can be obtained by reading this
+ same attribute.
+ * other positive value:
+ Set the conversion time in milliseconds.
+
+Users: An application using the w1_term device
+
+
+What: /sys/bus/w1/devices/.../features
+Date: July 2020
+Contact: Ivan Zaentsev <ivan.zaentsev@wirenboard.ru>
+Description:
+ (RW) Control optional driver settings.
+ Bit masks to read/write (bitwise OR):
+
+ == ============================================================
+ 1 Enable check for conversion success. If byte 6 of
+ scratchpad memory is 0xC after conversion, and
+ temperature reads 85.00 (powerup value) or 127.94
+ (insufficient power) - return a conversion error.
+
+ 2 Enable poll for conversion completion. Generate read cycles
+ after the conversion start and wait for 1's. In parasite
+ power mode this feature is not available.
+ == ============================================================
+
+ *read*:
+ Currently selected features.
+
+ *write*:
+ Select features.
+
+Users: An application using the w1_term device
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom
index 2aa5503ee200..16acaa5712ec 100644
--- a/Documentation/ABI/testing/sysfs-driver-wacom
+++ b/Documentation/ABI/testing/sysfs-driver-wacom
@@ -1,6 +1,6 @@
What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed
Date: April 2010
-Kernel Version: 2.6.35
+KernelVersion: 2.6.35
Contact: linux-bluetooth@vger.kernel.org
Description:
The /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed file
@@ -79,7 +79,9 @@ Description:
When the Wacom Intuos 4 is connected over Bluetooth, the
image has to contain 256 bytes (64x32 px 1 bit colour).
The format is also scrambled, like in the USB mode, and it can
- be summarized by converting 76543210 into GECA6420.
+ be summarized by converting::
+
+ 76543210 into GECA6420.
HGFEDCBA HFDB7531
What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_remote/unpair_remote
diff --git a/Documentation/ABI/testing/sysfs-driver-xdata b/Documentation/ABI/testing/sysfs-driver-xdata
new file mode 100644
index 000000000000..f574e8e6dca2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xdata
@@ -0,0 +1,49 @@
+What: /sys/class/misc/drivers/dw-xdata-pcie.<device>/write
+Date: April 2021
+KernelVersion: 5.13
+Contact: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+Description: Allows the user to enable the PCIe traffic generator which
+ will create write TLPs frames - from the Root Complex to the
+ Endpoint direction or to disable the PCIe traffic generator
+ in all directions.
+
+ Write y/1/on to enable, n/0/off to disable
+
+ Usage e.g.
+ echo 1 > /sys/class/misc/dw-xdata-pcie.<device>/write
+ or
+ echo 0 > /sys/class/misc/dw-xdata-pcie.<device>/write
+
+ The user can read the current PCIe link throughput generated
+ through this generator in MB/s.
+
+ Usage e.g.
+ cat /sys/class/misc/dw-xdata-pcie.<device>/write
+ 204
+
+ The file is read and write.
+
+What: /sys/class/misc/dw-xdata-pcie.<device>/read
+Date: April 2021
+KernelVersion: 5.13
+Contact: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+Description: Allows the user to enable the PCIe traffic generator which
+ will create read TLPs frames - from the Endpoint to the Root
+ Complex direction or to disable the PCIe traffic generator
+ in all directions.
+
+ Write y/1/on to enable, n/0/off to disable
+
+ Usage e.g.
+ echo 1 > /sys/class/misc/dw-xdata-pcie.<device>/read
+ or
+ echo 0 > /sys/class/misc/dw-xdata-pcie.<device>/read
+
+ The user can read the current PCIe link throughput generated
+ through this generator in MB/s.
+
+ Usage e.g.
+ cat /sys/class/misc/dw-xdata-pcie.<device>/read
+ 199
+
+ The file is read and write.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
index 4e7babb3ba1f..fac0f429a869 100644
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkback
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -25,3 +25,22 @@ Description:
allocated without being in use. The time is in
seconds, 0 means indefinitely long.
The default is 60 seconds.
+
+What: /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
+Date: December 2019
+KernelVersion: 5.6
+Contact: Maximilian Heyne <mheyne@amazon.de>
+Description:
+ When memory pressure is reported to blkback this option
+ controls the duration in milliseconds that blkback will not
+ cache any page not backed by a grant mapping.
+ The default is 10ms.
+
+What: /sys/module/xen_blkback/parameters/feature_persistent
+Date: September 2020
+KernelVersion: 5.10
+Contact: Maximilian Heyne <mheyne@amazon.de>
+Description:
+ Whether to enable the persistent grants feature or not. Note
+ that this option only takes effect on newly connected backends.
+ The default is Y (enable).
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
index c0a6cb7eb314..4d36c5a10546 100644
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@@ -1,4 +1,4 @@
-What: /sys/module/xen_blkfront/parameters/max
+What: /sys/module/xen_blkfront/parameters/max_indirect_segments
Date: June 2013
KernelVersion: 3.11
Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
@@ -8,3 +8,12 @@ Description:
is 32 - higher value means more potential throughput but more
memory usage. The backend picks the minimum of the frontend
and its default backend value.
+
+What: /sys/module/xen_blkfront/parameters/feature_persistent
+Date: September 2020
+KernelVersion: 5.10
+Contact: Maximilian Heyne <mheyne@amazon.de>
+Description:
+ Whether to enable the persistent grants feature or not. Note
+ that this option only takes effect on newly connected frontends.
+ The default is Y (enable).
diff --git a/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager b/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
new file mode 100644
index 000000000000..57b9b68a73ee
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xilinx-tmr-manager
@@ -0,0 +1,16 @@
+What: /sys/devices/platform/amba_pl/<dev>/errcnt
+Date: Nov 2022
+Contact: appana.durga.kedareswara.rao@amd.com
+Description: This control file provides the fault detection count.
+ This file cannot be written.
+ Example:
+ # cat /sys/devices/platform/amba_pl/44a10000.tmr_manager/errcnt
+ 1
+
+What: /sys/devices/platform/amba_pl/<dev>/dis_block_break
+Date: Nov 2022
+Contact: appana.durga.kedareswara.rao@amd.com
+Description: Write any value to it, This control file enables the break signal.
+ This file is write only.
+ Example:
+ # echo <any value> > /sys/devices/platform/amba_pl/44a10000.tmr_manager/dis_block_break
diff --git a/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga b/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga
new file mode 100644
index 000000000000..8f93d27b6d91
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga
@@ -0,0 +1,73 @@
+What: /sys/bus/platform/drivers/zynqmp_fpga_manager/firmware:zynqmp-firmware:pcap/status
+Date: February 2023
+KernelVersion: 6.4
+Contact: Nava kishore Manne <nava.kishore.manne@amd.com>
+Description: (RO) Read fpga status.
+ Read returns a hexadecimal value that tells the current status
+ of the FPGA device. Each bit position in the status value is
+ described Below(see ug570 chapter 9).
+ https://docs.xilinx.com/v/u/en-US/ug570-ultrascale-configuration
+
+ ====================== ==============================================
+ BIT(0) 0: No CRC error
+ 1: CRC error
+
+ BIT(1) 0: Decryptor security not set
+ 1: Decryptor security set
+
+ BIT(2) 0: MMCMs/PLLs are not locked
+ 1: MMCMs/PLLs are locked
+
+ BIT(3) 0: DCI not matched
+ 1: DCI matched
+
+ BIT(4) 0: Start-up sequence has not finished
+ 1: Start-up sequence has finished
+
+ BIT(5) 0: All I/Os are placed in High-Z state
+ 1: All I/Os behave as configured
+
+ BIT(6) 0: Flip-flops and block RAM are write disabled
+ 1: Flip-flops and block RAM are write enabled
+
+ BIT(7) 0: GHIGH_B_STATUS asserted
+ 1: GHIGH_B_STATUS deasserted
+
+ BIT(8) to BIT(10) Status of the mode pins
+
+ BIT(11) 0: Initialization has not finished
+ 1: Initialization finished
+
+ BIT(12) Value on INIT_B_PIN pin
+
+ BIT(13) 0: Signal not released
+ 1: Signal released
+
+ BIT(14) Value on DONE_PIN pin.
+
+ BIT(15) 0: No IDCODE_ERROR
+ 1: IDCODE_ERROR
+
+ BIT(16) 0: No SECURITY_ERROR
+ 1: SECURITY_ERROR
+
+ BIT(17) System Monitor over-temperature if set
+
+ BIT(18) to BIT(20) Start-up state machine (0 to 7)
+ Phase 0 = 000
+ Phase 1 = 001
+ Phase 2 = 011
+ Phase 3 = 010
+ Phase 4 = 110
+ Phase 5 = 111
+ Phase 6 = 101
+ Phase 7 = 100
+
+ BIT(25) to BIT(26) Indicates the detected bus width
+ 00 = x1
+ 01 = x8
+ 10 = x16
+ 11 = x32
+ ====================== ==============================================
+
+ The other bits are reserved.
diff --git a/Documentation/ABI/testing/sysfs-edac-ecs b/Documentation/ABI/testing/sysfs-edac-ecs
new file mode 100644
index 000000000000..87c885c4eb1a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-ecs
@@ -0,0 +1,74 @@
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/ecs_fruX subdirectory
+ pertains to the memory media ECS (Error Check Scrub) control
+ feature, where <dev-name> directory corresponds to a device
+ registered with the EDAC device driver for the ECS feature.
+ /ecs_fruX belongs to the media FRUs (Field Replaceable Unit)
+ under the memory device.
+
+ The sysfs ECS attr nodes are only present if the parent
+ driver has implemented the corresponding attr callback
+ function and provided the necessary operations to the EDAC
+ device driver during registration.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/log_entry_type
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The log entry type of how the DDR5 ECS log is reported.
+
+ - 0 - per DRAM.
+
+ - 1 - per memory media FRU.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/mode
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The mode of how the DDR5 ECS counts the errors.
+ Error count is tracked based on two different modes
+ selected by DDR5 ECS Control Feature - Codeword mode and
+ Row Count mode. If the ECS is under Codeword mode, then
+ the error count increments each time a codeword with check
+ bit errors is detected. If the ECS is under Row Count mode,
+ then the error counter increments each time a row with
+ check bit errors is detected.
+
+ - 0 - ECS counts rows in the memory media that have ECC errors.
+
+ - 1 - ECS counts codewords with errors, specifically, it counts
+ the number of ECC-detected errors in the memory media.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/reset
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (WO) ECS reset ECC counter.
+
+ - 1 - reset ECC counter to the default value.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/threshold
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) DDR5 ECS threshold count per gigabits of memory cells.
+ The ECS error count is subject to the ECS Threshold count
+ per Gbit, which masks error counts less than the Threshold.
+
+ Supported values are 256, 1024 and 4096.
+
+ All other values are reserved.
diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair
new file mode 100644
index 000000000000..0434a3b23ff3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-memory-repair
@@ -0,0 +1,206 @@
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/mem_repairX subdirectory
+ pertains to the memory media repair features control, such as
+ PPR (Post Package Repair), memory sparing etc, where <dev-name>
+ directory corresponds to a device registered with the EDAC
+ device driver for the memory repair features.
+
+ Post Package Repair is a maintenance operation requests the memory
+ device to perform a repair operation on its media. It is a memory
+ self-healing feature that fixes a failing memory location by
+ replacing it with a spare row in a DRAM device. For example, a
+ CXL memory device with DRAM components that support PPR features may
+ implement PPR maintenance operations. DRAM components may support
+ two types of PPR functions: hard PPR, for a permanent row repair, and
+ soft PPR, for a temporary row repair. Soft PPR may be much faster
+ than hard PPR, but the repair is lost with a power cycle.
+
+ The sysfs attributes nodes for a repair feature are only
+ present if the parent driver has implemented the corresponding
+ attr callback function and provided the necessary operations
+ to the EDAC device driver during registration.
+
+ In some states of system configuration (e.g. before address
+ decoders have been configured), memory devices (e.g. CXL)
+ may not have an active mapping in the main host address
+ physical address map. As such, the memory to repair must be
+ identified by a device specific physical addressing scheme
+ using a device physical address(DPA). The DPA and other control
+ attributes to use will be presented in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair_type
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Memory repair type. For eg. post package repair,
+ memory sparing etc. Valid values are:
+
+ - ppr - Post package repair.
+
+ - cacheline-sparing
+
+ - row-sparing
+
+ - bank-sparing
+
+ - rank-sparing
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/persist_mode
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Get/Set the current persist repair mode set for a
+ repair function. Persist repair modes supported in the
+ device, based on a memory repair function, either is temporary,
+ which is lost with a power cycle or permanent. Valid values are:
+
+ - 0 - Soft memory repair (temporary repair).
+
+ - 1 - Hard memory repair (permanent repair).
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair_safe_when_in_use
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if memory media is accessible and data is retained
+ during the memory repair operation.
+ The data may not be retained and memory requests may not be
+ correctly processed during a repair operation. In such case
+ repair operation can not be executed at runtime. The memory
+ must be taken offline.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/hpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Host Physical Address (HPA) of the memory to repair.
+ The HPA to use will be provided in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/dpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Device Physical Address (DPA) of the memory to repair.
+ The specific DPA to use will be provided in related error
+ records.
+
+ In some states of system configuration (e.g. before address
+ decoders have been configured), memory devices (e.g. CXL)
+ may not have an active mapping in the main host address
+ physical address map. As such, the memory to repair must be
+ identified by a device specific physical addressing scheme
+ using a DPA. The device physical address(DPA) to use will be
+ presented in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/nibble_mask
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Read/Write Nibble mask of the memory to repair.
+ Nibble mask identifies one or more nibbles in error on the
+ memory bus that produced the error event. Nibble Mask bit 0
+ shall be set if nibble 0 on the memory bus produced the
+ event, etc. For example, CXL PPR and sparing, a nibble mask
+ bit set to 1 indicates the request to perform repair
+ operation in the specific device. All nibble mask bits set
+ to 1 indicates the request to perform the operation in all
+ devices. Eg. for CXL memory repair, the specific value of
+ nibble mask to use will be provided in related error records.
+ For more details, See nibble mask field in CXL spec ver 3.1,
+ section 8.2.9.7.1.2 Table 8-103 soft PPR and section
+ 8.2.9.7.1.3 Table 8-104 hard PPR, section 8.2.9.7.1.4
+ Table 8-105 memory sparing.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/min_hpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/max_hpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/min_dpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/max_dpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The supported range of memory address that is to be
+ repaired. The memory device may give the supported range of
+ attributes to use and it will depend on the memory device
+ and the portion of memory to repair.
+ The userspace may receive the specific value of attributes
+ to use for a repair operation from the memory device via
+ related error records and trace events, for eg. CXL DRAM
+ and CXL general media error records in CXL memory devices.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank_group
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/rank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/row
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/column
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/channel
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/sub_channel
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The control attributes for the memory to be repaired.
+ The specific value of attributes to use depends on the
+ portion of memory to repair and will be reported to the host
+ in related error records and be available to userspace
+ in trace events, such as CXL DRAM and CXL general media
+ error records of CXL memory devices.
+
+ When readng back these attributes, it returns the current
+ value of memory requested to be repaired.
+
+ bank_group - The bank group of the memory to repair.
+
+ bank - The bank number of the memory to repair.
+
+ rank - The rank of the memory to repair. Rank is defined as a
+ set of memory devices on a channel that together execute a
+ transaction.
+
+ row - The row number of the memory to repair.
+
+ column - The column number of the memory to repair.
+
+ channel - The channel of the memory to repair. Channel is
+ defined as an interface that can be independently accessed
+ for a transaction.
+
+ sub_channel - The subchannel of the memory to repair.
+
+ The requirement to set these attributes varies based on the
+ repair function. The attributes in sysfs are not present
+ unless required for a repair function.
+
+ For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103
+ soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations,
+ these attributes are not required to set. CXL spec ver 3.1,
+ Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes
+ are required to set based on memory sparing granularity.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (WO) Issue the memory repair operation for the specified
+ memory repair attributes. The operation may fail if resources
+ are insufficient based on the requirements of the memory
+ device and repair function.
+
+ - 1 - Issue the repair operation.
+
+ - All other values are reserved.
diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub
new file mode 100644
index 000000000000..ab6014743da5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-scrub
@@ -0,0 +1,85 @@
+What: /sys/bus/edac/devices/<dev-name>/scrubX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/scrubX subdirectory
+ belongs to an instance of memory scrub control feature,
+ where <dev-name> directory corresponds to a device/memory
+ region registered with the EDAC device driver for the
+ scrub control feature.
+
+ The sysfs scrub attr nodes are only present if the parent
+ driver has implemented the corresponding attr callback
+ function and provided the necessary operations to the EDAC
+ device driver during registration.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/addr
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The base address of the memory region to be scrubbed
+ for on-demand scrubbing. Setting address starts scrubbing.
+ The size must be set before that.
+
+ The readback addr value is non-zero if the requested
+ on-demand scrubbing is in progress, zero otherwise.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/size
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The size of the memory region to be scrubbed
+ (on-demand scrubbing).
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/enable_background
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Start/Stop background (patrol) scrubbing if supported.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/min_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Supported minimum scrub cycle duration in seconds
+ by the memory scrubber.
+
+ Device-based scrub: returns the minimum scrub cycle
+ supported by the memory device.
+
+ Region-based scrub: returns the max of minimum scrub cycles
+ supported by individual memory devices that back the region.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Supported maximum scrub cycle duration in seconds
+ by the memory scrubber.
+
+ Device-based scrub: returns the maximum scrub cycle supported
+ by the memory device.
+
+ Region-based scrub: returns the min of maximum scrub cycles
+ supported by individual memory devices that back the region.
+
+ If the memory device does not provide maximum scrub cycle
+ information, return the maximum supported value of the scrub
+ cycle field.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The current scrub cycle duration in seconds and must be
+ within the supported range by the memory scrubber.
+
+ Scrub has an overhead when running and that may want to be
+ reduced by taking longer to do it.
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
index 613f42a9d5cd..72e7c9161ce7 100644
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -1,3 +1,46 @@
+What: /sys/firmware/acpi/fpdt/
+Date: Jan 2021
+Contact: Zhang Rui <rui.zhang@intel.com>
+Description:
+ ACPI Firmware Performance Data Table (FPDT) provides
+ information for firmware performance data for system boot,
+ S3 suspend and S3 resume. This sysfs entry contains the
+ performance data retrieved from the FPDT.
+
+ boot:
+ firmware_start_ns: Timer value logged at the beginning
+ of firmware image execution. In nanoseconds.
+ bootloader_load_ns: Timer value logged just prior to
+ loading the OS boot loader into memory.
+ In nanoseconds.
+ bootloader_launch_ns: Timer value logged just prior to
+ launching the currently loaded OS boot loader
+ image. In nanoseconds.
+ exitbootservice_start_ns: Timer value logged at the
+ point when the OS loader calls the
+ ExitBootServices function for UEFI compatible
+ firmware. In nanoseconds.
+ exitbootservice_end_ns: Timer value logged at the point
+ just prior to the OS loader gaining control
+ back from the ExitBootServices function for
+ UEFI compatible firmware. In nanoseconds.
+ suspend:
+ suspend_start_ns: Timer value recorded at the previous
+ OS write to SLP_TYP upon entry to S3. In
+ nanoseconds.
+ suspend_end_ns: Timer value recorded at the previous
+ firmware write to SLP_TYP used to trigger
+ hardware entry to S3. In nanoseconds.
+ resume:
+ resume_count: A count of the number of S3 resume cycles
+ since the last full boot sequence.
+ resume_avg_ns: Average timer value of all resume cycles
+ logged since the last full boot sequence,
+ including the most recent resume. In nanoseconds.
+ resume_prev_ns: Timer recorded at the end of the previous
+ platform runtime firmware S3 resume, just prior to
+ handoff to the OS waking vector. In nanoseconds.
+
What: /sys/firmware/acpi/bgrt/
Date: January 2012
Contact: Matthew Garrett <mjg@redhat.com>
@@ -12,11 +55,14 @@ Description:
image: The image bitmap. Currently a 32-bit BMP.
status: 1 if the image is valid, 0 if firmware invalidated it.
type: 0 indicates image is in BMP format.
+
+ ======== ===================================================
version: The version of the BGRT. Currently 1.
xoffset: The number of pixels between the left of the screen
and the left edge of the image.
yoffset: The number of pixels between the top of the screen
and the top edge of the image.
+ ======== ===================================================
What: /sys/firmware/acpi/hotplug/
Date: February 2013
@@ -33,12 +79,14 @@ Description:
The following setting is available to user space for each
hotplug profile:
+ ======== =======================================================
enabled: If set, the ACPI core will handle notifications of
- hotplug events associated with the given class of
- devices and will allow those devices to be ejected with
- the help of the _EJ0 control method. Unsetting it
- effectively disables hotplug for the correspoinding
- class of devices.
+ hotplug events associated with the given class of
+ devices and will allow those devices to be ejected with
+ the help of the _EJ0 control method. Unsetting it
+ effectively disables hotplug for the corresponding
+ class of devices.
+ ======== =======================================================
The value of the above attribute is an integer number: 1 (set)
or 0 (unset). Attempts to write any other values to it will
@@ -60,97 +108,101 @@ Description:
number of a "General Purpose Events" (GPE).
A GPE vectors to a specified handler in AML, which
- can do a anything the BIOS writer wants from
+ can do anything the BIOS writer wants from
OS context. GPE 0x12, for example, would vector
to a level or edge handler called _L12 or _E12.
The handler may do its business and return.
- Or the handler may send send a Notify event
+ Or the handler may send a Notify event
to a Linux device driver registered on an ACPI device,
such as a battery, or a processor.
- To figure out where all the SCI's are coming from,
+ To figure out where all the SCIs are coming from,
/sys/firmware/acpi/interrupts contains a file listing
every possible source, and the count of how many
- times it has triggered.
-
- $ cd /sys/firmware/acpi/interrupts
- $ grep . *
- error: 0
- ff_gbl_lock: 0 enable
- ff_pmtimer: 0 invalid
- ff_pwr_btn: 0 enable
- ff_rt_clk: 2 disable
- ff_slp_btn: 0 invalid
- gpe00: 0 invalid
- gpe01: 0 enable
- gpe02: 108 enable
- gpe03: 0 invalid
- gpe04: 0 invalid
- gpe05: 0 invalid
- gpe06: 0 enable
- gpe07: 0 enable
- gpe08: 0 invalid
- gpe09: 0 invalid
- gpe0A: 0 invalid
- gpe0B: 0 invalid
- gpe0C: 0 invalid
- gpe0D: 0 invalid
- gpe0E: 0 invalid
- gpe0F: 0 invalid
- gpe10: 0 invalid
- gpe11: 0 invalid
- gpe12: 0 invalid
- gpe13: 0 invalid
- gpe14: 0 invalid
- gpe15: 0 invalid
- gpe16: 0 invalid
- gpe17: 1084 enable
- gpe18: 0 enable
- gpe19: 0 invalid
- gpe1A: 0 invalid
- gpe1B: 0 invalid
- gpe1C: 0 invalid
- gpe1D: 0 invalid
- gpe1E: 0 invalid
- gpe1F: 0 invalid
- gpe_all: 1192
- sci: 1194
- sci_not: 0
-
- sci - The number of times the ACPI SCI
- has been called and claimed an interrupt.
-
- sci_not - The number of times the ACPI SCI
- has been called and NOT claimed an interrupt.
-
- gpe_all - count of SCI caused by GPEs.
-
- gpeXX - count for individual GPE source
-
- ff_gbl_lock - Global Lock
-
- ff_pmtimer - PM Timer
-
- ff_pwr_btn - Power Button
-
- ff_rt_clk - Real Time Clock
-
- ff_slp_btn - Sleep Button
-
- error - an interrupt that can't be accounted for above.
-
- invalid: it's either a GPE or a Fixed Event that
- doesn't have an event handler.
-
- disable: the GPE/Fixed Event is valid but disabled.
-
- enable: the GPE/Fixed Event is valid and enabled.
-
- Root has permission to clear any of these counters. Eg.
- # echo 0 > gpe11
-
- All counters can be cleared by clearing the total "sci":
- # echo 0 > sci
+ times it has triggered::
+
+ $ cd /sys/firmware/acpi/interrupts
+ $ grep . *
+ error: 0
+ ff_gbl_lock: 0 enable
+ ff_pmtimer: 0 invalid
+ ff_pwr_btn: 0 enable
+ ff_rt_clk: 2 disable
+ ff_slp_btn: 0 invalid
+ gpe00: 0 invalid
+ gpe01: 0 enable
+ gpe02: 108 enable
+ gpe03: 0 invalid
+ gpe04: 0 invalid
+ gpe05: 0 invalid
+ gpe06: 0 enable
+ gpe07: 0 enable
+ gpe08: 0 invalid
+ gpe09: 0 invalid
+ gpe0A: 0 invalid
+ gpe0B: 0 invalid
+ gpe0C: 0 invalid
+ gpe0D: 0 invalid
+ gpe0E: 0 invalid
+ gpe0F: 0 invalid
+ gpe10: 0 invalid
+ gpe11: 0 invalid
+ gpe12: 0 invalid
+ gpe13: 0 invalid
+ gpe14: 0 invalid
+ gpe15: 0 invalid
+ gpe16: 0 invalid
+ gpe17: 1084 enable
+ gpe18: 0 enable
+ gpe19: 0 invalid
+ gpe1A: 0 invalid
+ gpe1B: 0 invalid
+ gpe1C: 0 invalid
+ gpe1D: 0 invalid
+ gpe1E: 0 invalid
+ gpe1F: 0 invalid
+ gpe_all: 1192
+ sci: 1194
+ sci_not: 0
+
+ =========== ==================================================
+ sci The number of times the ACPI SCI
+ has been called and claimed an interrupt.
+
+ sci_not The number of times the ACPI SCI
+ has been called and NOT claimed an interrupt.
+
+ gpe_all count of SCI caused by GPEs.
+
+ gpeXX count for individual GPE source
+
+ ff_gbl_lock Global Lock
+
+ ff_pmtimer PM Timer
+
+ ff_pwr_btn Power Button
+
+ ff_rt_clk Real Time Clock
+
+ ff_slp_btn Sleep Button
+
+ error an interrupt that can't be accounted for above.
+
+ invalid it's either a GPE or a Fixed Event that
+ doesn't have an event handler.
+
+ disable the GPE/Fixed Event is valid but disabled.
+
+ enable the GPE/Fixed Event is valid and enabled.
+ =========== ==================================================
+
+ Root has permission to clear any of these counters. Eg.::
+
+ # echo 0 > gpe11
+
+ All counters can be cleared by clearing the total "sci"::
+
+ # echo 0 > sci
None of these counters has an effect on the function
of the system, they are simply statistics.
@@ -165,32 +217,55 @@ Description:
Let's take power button fixed event for example, please kill acpid
and other user space applications so that the machine won't shutdown
- when pressing the power button.
- # cat ff_pwr_btn
- 0 enabled
- # press the power button for 3 times;
- # cat ff_pwr_btn
- 3 enabled
- # echo disable > ff_pwr_btn
- # cat ff_pwr_btn
- 3 disabled
- # press the power button for 3 times;
- # cat ff_pwr_btn
- 3 disabled
- # echo enable > ff_pwr_btn
- # cat ff_pwr_btn
- 4 enabled
- /*
- * this is because the status bit is set even if the enable bit is cleared,
- * and it triggers an ACPI fixed event when the enable bit is set again
- */
- # press the power button for 3 times;
- # cat ff_pwr_btn
- 7 enabled
- # echo disable > ff_pwr_btn
- # press the power button for 3 times;
- # echo clear > ff_pwr_btn /* clear the status bit */
- # echo disable > ff_pwr_btn
- # cat ff_pwr_btn
- 7 enabled
+ when pressing the power button::
+
+ # cat ff_pwr_btn
+ 0 enabled
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 3 enabled
+ # echo disable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 3 disabled
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 3 disabled
+ # echo enable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 4 enabled
+ /*
+ * this is because the status bit is set even if the enable
+ * bit is cleared, and it triggers an ACPI fixed event when
+ * the enable bit is set again
+ */
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 7 enabled
+ # echo disable > ff_pwr_btn
+ # press the power button for 3 times;
+ # echo clear > ff_pwr_btn /* clear the status bit */
+ # echo disable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 7 enabled
+
+What: /sys/firmware/acpi/memory_ranges/rangeX
+Date: February 2025
+Contact: Tony Luck <tony.luck@intel.com>
+Description:
+ On systems with the ACPI MRRM table reports the parameters for
+ each range.
+
+ base: Starting system physical address.
+
+ length: Length of this range in bytes.
+
+ node: NUMA node that this range belongs to. Negative numbers
+ indicate that the node number could not be determined (e.g
+ for an address range that is reserved for future hot add of
+ memory).
+
+ local_region_id: ID associated with access by agents
+ local to this range of addresses.
+ remote_region_id: ID associated with access by agents
+ non-local to this range of addresses.
diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi-entries b/Documentation/ABI/testing/sysfs-firmware-dmi-entries
index 210ad44b95a5..b6c23807b804 100644
--- a/Documentation/ABI/testing/sysfs-firmware-dmi-entries
+++ b/Documentation/ABI/testing/sysfs-firmware-dmi-entries
@@ -2,7 +2,7 @@ What: /sys/firmware/dmi/entries/
Date: February 2011
Contact: Mike Waychison <mikew@google.com>
Description:
- Many machines' firmware (x86 and ia64) export DMI /
+ Many machines' firmware (x86 and arm64) export DMI /
SMBIOS tables to the operating system. Getting at this
information is often valuable to userland, especially in
cases where there are OEM extensions used.
@@ -33,7 +33,7 @@ Description:
doesn't matter), they will be represented in sysfs as
entries "T-0" through "T-(N-1)":
- Example entry directories:
+ Example entry directories::
/sys/firmware/dmi/entries/17-0
/sys/firmware/dmi/entries/17-1
@@ -50,61 +50,65 @@ Description:
Each DMI entry in sysfs has the common header values
exported as attributes:
- handle : The 16bit 'handle' that is assigned to this
+ ======== =================================================
+ handle The 16bit 'handle' that is assigned to this
entry by the firmware. This handle may be
referred to by other entries.
- length : The length of the entry, as presented in the
+ length The length of the entry, as presented in the
entry itself. Note that this is _not the
total count of bytes associated with the
- entry_. This value represents the length of
+ entry. This value represents the length of
the "formatted" portion of the entry. This
"formatted" region is sometimes followed by
the "unformatted" region composed of nul
terminated strings, with termination signalled
by a two nul characters in series.
- raw : The raw bytes of the entry. This includes the
+ raw The raw bytes of the entry. This includes the
"formatted" portion of the entry, the
"unformatted" strings portion of the entry,
and the two terminating nul characters.
- type : The type of the entry. This value is the same
+ type The type of the entry. This value is the same
as found in the directory name. It indicates
how the rest of the entry should be interpreted.
- instance: The instance ordinal of the entry for the
+ instance The instance ordinal of the entry for the
given type. This value is the same as found
in the parent directory name.
- position: The ordinal position (zero-based) of the entry
+ position The ordinal position (zero-based) of the entry
within the entirety of the DMI entry table.
+ ======== =================================================
- === Entry Specialization ===
+ **Entry Specialization**
Some entry types may have other information available in
sysfs. Not all types are specialized.
- --- Type 15 - System Event Log ---
+ **Type 15 - System Event Log**
This entry allows the firmware to export a log of
events the system has taken. This information is
typically backed by nvram, but the implementation
details are abstracted by this table. This entry's data
- is exported in the directory:
+ is exported in the directory::
- /sys/firmware/dmi/entries/15-0/system_event_log
+ /sys/firmware/dmi/entries/15-0/system_event_log
and has the following attributes (documented in the
SMBIOS / DMI specification under "System Event Log (Type 15)":
- area_length
- header_start_offset
- data_start_offset
- access_method
- status
- change_token
- access_method_address
- header_format
- per_log_type_descriptor_length
- type_descriptors_supported_count
+ - area_length
+ - header_start_offset
+ - data_start_offset
+ - access_method
+ - status
+ - change_token
+ - access_method_address
+ - header_format
+ - per_log_type_descriptor_length
+ - type_descriptors_supported_count
As well, the kernel exports the binary attribute:
- raw_event_log : The raw binary bits of the event log
+ ============= ====================================
+ raw_event_log The raw binary bits of the event log
as described by the DMI entry.
+ ============= ====================================
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
index e794eac32a90..927e362d4974 100644
--- a/Documentation/ABI/testing/sysfs-firmware-efi
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -28,3 +28,18 @@ Description: Displays the physical addresses of all EFI Configuration
versions are always printed first, i.e. ACPI20 comes
before ACPI.
Users: dmidecode
+
+What: /sys/firmware/efi/tables/rci2
+Date: July 2019
+Contact: Narendra K <Narendra.K@dell.com>, linux-bugs@dell.com
+Description: Displays the content of the Runtime Configuration Interface
+ Table version 2 on Dell EMC PowerEdge systems in binary format
+Users: It is used by Dell EMC OpenManage Server Administrator tool to
+ populate BIOS setup page.
+
+What: /sys/firmware/efi/ovmf_debug_log
+Date: July 2025
+Contact: Gerd Hoffmann <kraxel@redhat.com>, linux-efi@vger.kernel.org
+Description: Displays the content of the OVMF debug log buffer. The file is
+ only present in case the firmware supports logging to a memory
+ buffer.
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-esrt b/Documentation/ABI/testing/sysfs-firmware-efi-esrt
index 6e431d1a4e79..4c2d440487dd 100644
--- a/Documentation/ABI/testing/sysfs-firmware-efi-esrt
+++ b/Documentation/ABI/testing/sysfs-firmware-efi-esrt
@@ -24,58 +24,64 @@ Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: The version of the ESRT structure provided by the firmware.
-What: /sys/firmware/efi/esrt/entries/entry$N/
+What: /sys/firmware/efi/esrt/entries/entry<N>/
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: Each ESRT entry is identified by a GUID, and each gets a
subdirectory under entries/ .
example: /sys/firmware/efi/esrt/entries/entry0/
-What: /sys/firmware/efi/esrt/entries/entry$N/fw_type
+What: /sys/firmware/efi/esrt/entries/entry<N>/fw_type
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: What kind of firmware entry this is:
- 0 - Unknown
- 1 - System Firmware
- 2 - Device Firmware
- 3 - UEFI Driver
-What: /sys/firmware/efi/esrt/entries/entry$N/fw_class
+ == ===============
+ 0 Unknown
+ 1 System Firmware
+ 2 Device Firmware
+ 3 UEFI Driver
+ == ===============
+
+What: /sys/firmware/efi/esrt/entries/entry<N>/fw_class
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: This is the entry's guid, and will match the directory name.
-What: /sys/firmware/efi/esrt/entries/entry$N/fw_version
+What: /sys/firmware/efi/esrt/entries/entry<N>/fw_version
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: The version of the firmware currently installed. This is a
32-bit unsigned integer.
-What: /sys/firmware/efi/esrt/entries/entry$N/lowest_supported_fw_version
+What: /sys/firmware/efi/esrt/entries/entry<N>/lowest_supported_fw_version
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: The lowest version of the firmware that can be installed.
-What: /sys/firmware/efi/esrt/entries/entry$N/capsule_flags
+What: /sys/firmware/efi/esrt/entries/entry<N>/capsule_flags
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: Flags that must be passed to UpdateCapsule()
-What: /sys/firmware/efi/esrt/entries/entry$N/last_attempt_version
+What: /sys/firmware/efi/esrt/entries/entry<N>/last_attempt_version
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: The last firmware version for which an update was attempted.
-What: /sys/firmware/efi/esrt/entries/entry$N/last_attempt_status
+What: /sys/firmware/efi/esrt/entries/entry<N>/last_attempt_status
Date: February 2015
Contact: Peter Jones <pjones@redhat.com>
Description: The result of the last firmware update attempt for the
firmware resource entry.
- 0 - Success
- 1 - Insufficient resources
- 2 - Incorrect version
- 3 - Invalid format
- 4 - Authentication error
- 5 - AC power event
- 6 - Battery power event
+
+ == ======================
+ 0 Success
+ 1 Insufficient resources
+ 2 Incorrect version
+ 3 Invalid format
+ 4 Authentication error
+ 5 AC power event
+ 6 Battery power event
+ == ======================
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
index c61b9b348e99..9c4d581be396 100644
--- a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
+++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
@@ -14,7 +14,7 @@ Description: Switching efi runtime services to virtual mode requires
/sys/firmware/efi/runtime-map/ is the directory the kernel
exports that information in.
- subdirectories are named with the number of the memory range:
+ subdirectories are named with the number of the memory range::
/sys/firmware/efi/runtime-map/0
/sys/firmware/efi/runtime-map/1
@@ -24,11 +24,13 @@ Description: Switching efi runtime services to virtual mode requires
Each subdirectory contains five files:
- attribute : The attributes of the memory range.
- num_pages : The size of the memory range in pages.
- phys_addr : The physical address of the memory range.
- type : The type of the memory range.
- virt_addr : The virtual address of the memory range.
+ ========= =========================================
+ attribute The attributes of the memory range.
+ num_pages The size of the memory range in pages.
+ phys_addr The physical address of the memory range.
+ type The type of the memory range.
+ virt_addr The virtual address of the memory range.
+ ========= =========================================
Above values are all hexadecimal numbers with the '0x' prefix.
Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-firmware-gsmi b/Documentation/ABI/testing/sysfs-firmware-gsmi
index 0faa0aaf4b6a..7a558354c1ee 100644
--- a/Documentation/ABI/testing/sysfs-firmware-gsmi
+++ b/Documentation/ABI/testing/sysfs-firmware-gsmi
@@ -20,7 +20,7 @@ Description:
This directory has the same layout (and
underlying implementation as /sys/firmware/efi/vars.
- See Documentation/ABI/*/sysfs-firmware-efi-vars
+ See `Documentation/ABI/*/sysfs-firmware-efi-vars`
for more information on how to interact with
this structure.
diff --git a/Documentation/ABI/testing/sysfs-firmware-initrd b/Documentation/ABI/testing/sysfs-firmware-initrd
new file mode 100644
index 000000000000..20bf7cf77a19
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-initrd
@@ -0,0 +1,8 @@
+What: /sys/firmware/initrd
+Date: December 2023
+Contact: Alexander Graf <graf@amazon.com>
+Description:
+ When the kernel was booted with an initrd and the
+ "retain_initrd" option is set on the kernel command
+ line, /sys/firmware/initrd contains the contents of the
+ initrd that the kernel was booted with.
diff --git a/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo b/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo
new file mode 100644
index 000000000000..5e3f6148c52e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo
@@ -0,0 +1,35 @@
+What: /sys/firmware/lefi/boardinfo
+Date: October 2020
+Contact: Tiezhu Yang <yangtiezhu@loongson.cn>
+Description:
+ Get mainboard and BIOS info easily on the Loongson platform,
+ this is useful to point out the current used mainboard type
+ and BIOS version when there exists problems related with
+ hardware or firmware.
+
+ The related structures are already defined in the interface
+ specification about firmware and kernel which are common
+ requirement and specific for Loongson64, so only add a new
+ boardinfo.c file in arch/mips/loongson64.
+
+ For example:
+
+ [loongson@linux ~]$ cat /sys/firmware/lefi/boardinfo
+ Board Info
+ Manufacturer : LEMOTE
+ Board Name : LEMOTE-LS3A4000-7A1000-1w-V01-pc
+ Family : LOONGSON3
+
+ BIOS Info
+ Vendor : Kunlun
+ Version : Kunlun-A1901-V4.1.3-20200414093938
+ ROM Size : 4 KB
+ Release Date : 2020-04-14
+
+ By the way, using dmidecode command can get the similar info if there
+ exists SMBIOS in firmware, but the fact is that there is no SMBIOS on
+ some machines, we can see nothing when execute dmidecode, like this:
+
+ [root@linux loongson]# dmidecode
+ # dmidecode 2.12
+ # No SMBIOS nor DMI entry point found, sorry.
diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap
index eca0d65087dc..9205122fa4b1 100644
--- a/Documentation/ABI/testing/sysfs-firmware-memmap
+++ b/Documentation/ABI/testing/sysfs-firmware-memmap
@@ -20,7 +20,7 @@ Description:
the raw memory map to userspace.
The structure is as follows: Under /sys/firmware/memmap there
- are subdirectories with the number of the entry as their name:
+ are subdirectories with the number of the entry as their name::
/sys/firmware/memmap/0
/sys/firmware/memmap/1
@@ -34,14 +34,16 @@ Description:
Each directory contains three files:
- start : The start address (as hexadecimal number with the
+ ======== =====================================================
+ start The start address (as hexadecimal number with the
'0x' prefix).
- end : The end address, inclusive (regardless whether the
+ end The end address, inclusive (regardless whether the
firmware provides inclusive or exclusive ranges).
- type : Type of the entry as string. See below for a list of
+ type Type of the entry as string. See below for a list of
valid types.
+ ======== =====================================================
- So, for example:
+ So, for example::
/sys/firmware/memmap/0/start
/sys/firmware/memmap/0/end
@@ -54,12 +56,15 @@ Description:
- System RAM
- ACPI Tables
- ACPI Non-volatile Storage
+ - Unusable memory
+ - Persistent Memory (legacy)
+ - Persistent Memory
+ - Soft Reserved
- reserved
Following shell snippet can be used to display that memory
- map in a human-readable format:
+ map in a human-readable format::
- -------------------- 8< ----------------------------------------
#!/bin/bash
cd /sys/firmware/memmap
for dir in * ; do
@@ -68,4 +73,3 @@ Description:
type=$(cat $dir/type)
printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type"
done
- -------------------- >8 ----------------------------------------
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-powercap b/Documentation/ABI/testing/sysfs-firmware-opal-powercap
index c9b66ec4f165..d2d12ee89288 100644
--- a/Documentation/ABI/testing/sysfs-firmware-opal-powercap
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-powercap
@@ -1,6 +1,6 @@
What: /sys/firmware/opal/powercap
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Powercap directory for Powernv (P8, P9) servers
Each folder in this directory contains a
@@ -11,7 +11,7 @@ What: /sys/firmware/opal/powercap/system-powercap
/sys/firmware/opal/powercap/system-powercap/powercap-max
/sys/firmware/opal/powercap/system-powercap/powercap-current
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: System powercap directory and attributes applicable for
Powernv (P8, P9) servers
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-psr b/Documentation/ABI/testing/sysfs-firmware-opal-psr
index cc2ece70e365..1e55b56a0f89 100644
--- a/Documentation/ABI/testing/sysfs-firmware-opal-psr
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-psr
@@ -1,6 +1,6 @@
What: /sys/firmware/opal/psr
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Power-Shift-Ratio directory for Powernv P9 servers
Power-Shift-Ratio allows to provide hints the firmware
@@ -10,7 +10,7 @@ Description: Power-Shift-Ratio directory for Powernv P9 servers
What: /sys/firmware/opal/psr/cpu_to_gpu_X
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: PSR sysfs attributes for Powernv P9 servers
Power-Shift-Ratio between CPU and GPU for a given chip
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
new file mode 100644
index 000000000000..fcb1fb4795b6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
@@ -0,0 +1,21 @@
+What: /sys/firmware/opal/sensor_groups
+Date: August 2017
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Sensor groups directory for POWER9 powernv servers
+
+ Each folder in this directory contains a sensor group
+ which are classified based on type of the sensor
+ like power, temperature, frequency, current, etc. They
+ can also indicate the group of sensors belonging to
+ different owners like CSM, Profiler, Job-Scheduler
+
+What: /sys/firmware/opal/sensor_groups/<sensor_group_name>/clear
+Date: August 2017
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Sysfs file to clear the min-max of all the sensors
+ belonging to the group.
+
+ Writing 1 to this file will clear the minimum and
+ maximum values of all the sensors in the group.
+ In POWER9, the min-max of a sensor is the historical minimum
+ and maximum value of the sensor cached by OCC.
diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info
new file mode 100644
index 000000000000..f5cefb81ac9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info
@@ -0,0 +1,29 @@
+What: /sys/firmware/papr/energy_scale_info
+Date: February 2022
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Directory hosting a set of platform attributes like
+ energy/frequency on Linux running as a PAPR guest.
+
+ Each file in a directory contains a platform
+ attribute hierarchy pertaining to performance/
+ energy-savings mode and processor frequency.
+
+What: /sys/firmware/papr/energy_scale_info/<id>
+Date: February 2022
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Energy, frequency attributes directory for POWERVM servers
+
+What: /sys/firmware/papr/energy_scale_info/<id>/desc
+Date: February 2022
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: String description of the energy attribute of <id>
+
+What: /sys/firmware/papr/energy_scale_info/<id>/value
+Date: February 2022
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: Numeric value of the energy attribute of <id>
+
+What: /sys/firmware/papr/energy_scale_info/<id>/value_desc
+Date: February 2022
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
+Description: String value of the energy attribute of <id>
diff --git a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
index 011dda4f8e8a..54d1bfd0db12 100644
--- a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
+++ b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
@@ -12,10 +12,11 @@ Description:
configuration data to the guest userspace.
The authoritative guest-side hardware interface documentation
- to the fw_cfg device can be found in "docs/specs/fw_cfg.txt"
- in the QEMU source tree.
+ to the fw_cfg device can be found in "docs/specs/fw_cfg.rst"
+ in the QEMU source tree, or online at:
+ https://qemu-project.gitlab.io/qemu/specs/fw_cfg.html
- === SysFS fw_cfg Interface ===
+ **SysFS fw_cfg Interface**
The fw_cfg sysfs interface described in this document is only
intended to display discoverable blobs (i.e., those registered
@@ -31,7 +32,7 @@ Description:
/sys/firmware/qemu_fw_cfg/rev
- --- Discoverable fw_cfg blobs by selector key ---
+ **Discoverable fw_cfg blobs by selector key**
All discoverable blobs listed in the fw_cfg file directory are
displayed as entries named after their unique selector key
@@ -45,24 +46,26 @@ Description:
Each such fw_cfg sysfs entry has the following values exported
as attributes:
- name : The 56-byte nul-terminated ASCII string used as the
+ ==== ====================================================
+ name The 56-byte nul-terminated ASCII string used as the
blob's 'file name' in the fw_cfg directory.
- size : The length of the blob, as given in the fw_cfg
+ size The length of the blob, as given in the fw_cfg
directory.
- key : The value of the blob's selector key as given in the
+ key The value of the blob's selector key as given in the
fw_cfg directory. This value is the same as used in
the parent directory name.
- raw : The raw bytes of the blob, obtained by selecting the
+ raw The raw bytes of the blob, obtained by selecting the
entry via the control register, and reading a number
of bytes equal to the blob size from the data
register.
+ ==== ====================================================
- --- Listing fw_cfg blobs by file name ---
+ **Listing fw_cfg blobs by file name**
While the fw_cfg device does not impose any specific naming
convention on the blobs registered in the file directory,
QEMU developers have traditionally used path name semantics
- to give each blob a descriptive name. For example:
+ to give each blob a descriptive name. For example::
"bootorder"
"genroms/kvmvapic.bin"
@@ -81,7 +84,7 @@ Description:
of directories matching the path name components of fw_cfg
blob names, ending in symlinks to the by_key entry for each
"basename", as illustrated below (assume current directory is
- /sys/firmware):
+ /sys/firmware)::
qemu_fw_cfg/by_name/bootorder -> ../by_key/38
qemu_fw_cfg/by_name/etc/e820 -> ../../by_key/35
diff --git a/Documentation/ABI/testing/sysfs-firmware-sfi b/Documentation/ABI/testing/sysfs-firmware-sfi
deleted file mode 100644
index 4be7d44aeacf..000000000000
--- a/Documentation/ABI/testing/sysfs-firmware-sfi
+++ /dev/null
@@ -1,15 +0,0 @@
-What: /sys/firmware/sfi/tables/
-Date: May 2010
-Contact: Len Brown <lenb@kernel.org>
-Description:
- SFI defines a number of small static memory tables
- so the kernel can get platform information from firmware.
-
- The tables are defined in the latest SFI specification:
- http://simplefirmware.org/documentation
-
- While the tables are used by the kernel, user-space
- can observe them this way:
-
- # cd /sys/firmware/sfi/tables
- # cat $TABLENAME > $TABLENAME.bin
diff --git a/Documentation/ABI/testing/sysfs-firmware-sgi_uv b/Documentation/ABI/testing/sysfs-firmware-sgi_uv
index 4573fd4b7876..7fe9244b87bb 100644
--- a/Documentation/ABI/testing/sysfs-firmware-sgi_uv
+++ b/Documentation/ABI/testing/sysfs-firmware-sgi_uv
@@ -1,27 +1,159 @@
What: /sys/firmware/sgi_uv/
-Date: August 2008
-Contact: Russ Anderson <rja@sgi.com>
+Date: September 2020
+Contact: Justin Ernst <justin.ernst@hpe.com>
Description:
The /sys/firmware/sgi_uv directory contains information
- about the SGI UV platform.
+ about the UV platform.
- Under that directory are a number of files:
+ Under that directory are a number of read-only attributes::
+ archtype
+ hub_type
+ hubless
partition_id
coherence_id
+ uv_type
+
+ The archtype entry contains the UV architecture type that
+ is used to select arch-dependent addresses and features.
+ It can be set via the OEM_ID in the ACPI MADT table or by
+ UVsystab entry both passed from UV BIOS.
+
+ The hub_type entry is used to select the type of hub which is
+ similar to uv_type but encoded in a binary format. Include
+ the file uv_hub.h to get the definitions.
+
+ The hubless entry basically is present and set only if there
+ is no hub. In this case the hub_type entry is not present.
The partition_id entry contains the partition id.
- SGI UV systems can be partitioned into multiple physical
+ UV systems can be partitioned into multiple physical
machines, which each partition running a unique copy
- of the operating system. Each partition will have a unique
- partition id. To display the partition id, use the command:
-
- cat /sys/firmware/sgi_uv/partition_id
+ of the operating system. Each partition will have a unique
+ partition id.
The coherence_id entry contains the coherence id.
- A partitioned SGI UV system can have one or more coherence
- domain. The coherence id indicates which coherence domain
- this partition is in. To display the coherence id, use the
- command:
+ A partitioned UV system can have one or more coherence
+ domains. The coherence id indicates which coherence domain
+ this partition is in.
+
+ The uv_type entry contains the hub revision number.
+ This value can be used to identify the UV system version::
+
+ "0.*" = Hubless UV ('*' is subtype)
+ "3.0" = UV2
+ "5.0" = UV3
+ "7.0" = UV4
+ "7.1" = UV4a
+ "9.0" = UV5
+
+ The /sys/firmware/sgi_uv directory also contains two directories::
+
+ hubs/
+ pcibuses/
+
+ The hubs directory contains a number of hub objects, each representing
+ a UV Hub visible to the BIOS. Each hub object's name is appended by a
+ unique ordinal value (ex. /sys/firmware/sgi_uv/hubs/hub_5)
+
+ Each hub object directory contains a number of read-only attributes::
+
+ cnode
+ location
+ name
+ nasid
+ shared
+ this_partition
+
+ The cnode entry contains the cnode number of the corresponding hub.
+ If a cnode value is not applicable, the value returned will be -1.
+
+ The location entry contains the location string of the corresponding hub.
+ This value is used to physically identify a hub within a system.
+
+ The name entry contains the name of the corresponding hub. This name can
+ be two variants::
+
+ "UVHub x.x" = A 'node' ASIC, connecting a CPU to the interconnect
+ fabric. The 'x.x' value represents the ASIC revision.
+ (ex. 'UVHub 5.0')
+
+ "NLxRouter" = A 'router ASIC, only connecting other ASICs to
+ the interconnect fabric. The 'x' value representing
+ the fabric technology version. (ex. 'NL8Router')
+
+ The nasid entry contains the nasid number of the corresponding hub.
+ If a nasid value is not applicable, the value returned will be -1.
+
+ The shared entry contains a boolean value describing whether the
+ corresponding hub is shared between system partitions.
+
+ The this_partition entry contains a boolean value describing whether
+ the corresponding hub is local to the current partition.
+
+ Each hub object directory also contains a number of port objects,
+ each representing a fabric port on the corresponding hub.
+ A port object's name is appended by a unique ordinal value
+ (ex. /sys/firmware/sgi_uv/hubs/hub_5/port_3)
+
+ Each port object directory contains a number of read-only attributes::
+
+ conn_hub
+ conn_port
+
+ The conn_hub entry contains a value representing the unique
+ ordinal value of the hub on the other end of the fabric
+ cable plugged into the port. If the port is disconnected,
+ the value returned will be -1.
+
+ The conn_port entry contains a value representing the unique
+ ordinal value of the port on the other end of the fabric cable
+ plugged into the port. If the port is disconnected, the value
+ returned will be -1.
+
+ Ex:
+ A value of '3' is read from:
+ /sys/firmware/sgi_uv/hubs/hub_5/port_3/conn_hub
+
+ and a value of '6' is read from:
+ /sys/firmware/sgi_uv/hubs/hub_5/port_3/conn_port
+
+ representing that this port is connected to:
+ /sys/firmware/sgi_uv/hubs/hub_3/port_6
+
+ The pcibuses directory contains a number of PCI bus objects.
+ Each PCI bus object's name is appended by its PCI bus address.
+ (ex. pcibus_0003:80)
+
+ Each pcibus object has a number of possible read-only attributes::
+
+ type
+ location
+ slot
+ ppb_addr
+ iio_stack
+
+ The type entry contains a value describing the type of IO at
+ the corresponding PCI bus address. Known possible values
+ across all UV versions are::
+
+ BASE IO
+ PCIe IO
+ PCIe SLOT
+ NODE IO
+ Riser
+ PPB
+
+ The location entry contains the location string of the UV Hub
+ of the CPU physically connected to the corresponding PCI bus.
+
+ The slot entry contains the physical slot number of the
+ corresponding PCI bus. This value is used to physically locate
+ PCI cards within a system.
+
+ The ppb_addr entry contains the PCI address string of the
+ bridged PCI bus. This entry is only present when the PCI bus
+ object type is 'PPB'.
- cat /sys/firmware/sgi_uv/coherence_id
+ The iio_stack entry contains a value describing the IIO stack
+ number that the corresponding PCI bus object is connected to.
diff --git a/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm
new file mode 100644
index 000000000000..26741cb84504
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm
@@ -0,0 +1,28 @@
+What: /sys/firmware/turris-mox-rwtm/board_version
+Date: August 2019
+KernelVersion: 5.4
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Board version burned into eFuses of this Turris Mox board.
+ Format: %i
+
+What: /sys/firmware/turris-mox-rwtm/mac_address*
+Date: August 2019
+KernelVersion: 5.4
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) MAC addresses burned into eFuses of this Turris Mox board.
+ Format: %pM
+
+What: /sys/firmware/turris-mox-rwtm/ram_size
+Date: August 2019
+KernelVersion: 5.4
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) RAM size in MiB of this Turris Mox board as was detected
+ during manufacturing and burned into eFuses. Can be 512 or 1024.
+ Format: %i
+
+What: /sys/firmware/turris-mox-rwtm/serial_number
+Date: August 2019
+KernelVersion: 5.4
+Contact: Marek Behún <kabel@kernel.org>
+Description: (Read) Serial number burned into eFuses of this Turris Mox device.
+ Format: %016X
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
new file mode 100644
index 000000000000..76d9808ed581
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -0,0 +1,45 @@
+What: /sys/fs/erofs/features/
+Date: November 2021
+Contact: "Huang Jianan" <huangjianan@oppo.com>
+Description: Shows all enabled kernel features.
+ Supported features:
+ zero_padding, compr_cfgs, big_pcluster, chunked_file,
+ device_table, compr_head2, sb_chksum, ztailpacking,
+ dedupe, fragments, 48bit, metabox.
+
+What: /sys/fs/erofs/<disk>/sync_decompress
+Date: November 2021
+Contact: "Huang Jianan" <huangjianan@oppo.com>
+Description: Control strategy of sync decompression:
+
+ - 0 (default, auto): enable for readpage, and enable for
+ readahead on atomic contexts only.
+ - 1 (force on): enable for readpage and readahead.
+ - 2 (force off): disable for all situations.
+
+What: /sys/fs/erofs/<disk>/drop_caches
+Date: November 2024
+Contact: "Guo Chunhai" <guochunhai@vivo.com>
+Description: Writing to this will drop compression-related caches,
+ currently used to drop in-memory pclusters and cached
+ compressed folios:
+
+ - 1 : invalidate cached compressed folios
+ - 2 : drop in-memory pclusters
+ - 3 : drop in-memory pclusters and cached compressed folios
+
+What: /sys/fs/erofs/accel
+Date: May 2025
+Contact: "Bo Liu" <liubo03@inspur.com>
+Description: Used to set or show hardware accelerators in effect
+ and multiple accelerators are separated by '\n'.
+ Supported accelerator(s): qat_deflate.
+ Disable all accelerators with an empty string (echo > accel).
+
+What: /sys/fs/erofs/<disk>/dir_ra_bytes
+Date: July 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: Used to set or show readahead bytes during readdir(), by
+ default the value is 16384.
+
+ - 0: disable readahead.
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index c631253cf85c..2edd0a6672d3 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -33,7 +33,7 @@ What: /sys/fs/ext4/<disk>/mb_order2_req
Date: March 2008
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
- Tuning parameter which controls the minimum size for
+ Tuning parameter which controls the minimum size for
requests (as a power of 2) where the buddy cache is
used
@@ -45,8 +45,8 @@ Description:
parameter will have their blocks allocated out of a
block group specific preallocation pool, so that small
files are packed closely together. Each large file
- will have its blocks allocated out of its own unique
- preallocation pool.
+ will have its blocks allocated out of its own unique
+ preallocation pool.
What: /sys/fs/ext4/<disk>/inode_readahead_blks
Date: March 2008
@@ -109,3 +109,10 @@ Description:
write operation (since a 4k random write might turn
into a much larger write due to the zeroout
operation).
+
+What: /sys/fs/ext4/<disk>/journal_task
+Date: February 2019
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the pid of journal thread in
+ current pid-namespace or 0 if task is unreachable.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3ac41774ad3c..b590809869ca 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -1,229 +1,935 @@
What: /sys/fs/f2fs/<disk>/gc_max_sleep_time
Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
- Controls the maximun sleep time for gc_thread. Time
- is in milliseconds.
+Description: Controls the maximum sleep time for gc_thread. Time
+ is in milliseconds.
What: /sys/fs/f2fs/<disk>/gc_min_sleep_time
Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
- Controls the minimum sleep time for gc_thread. Time
- is in milliseconds.
+Description: Controls the minimum sleep time for gc_thread. Time
+ is in milliseconds.
What: /sys/fs/f2fs/<disk>/gc_no_gc_sleep_time
Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
- Controls the default sleep time for gc_thread. Time
- is in milliseconds.
+Description: Controls the default sleep time for gc_thread. Time
+ is in milliseconds.
What: /sys/fs/f2fs/<disk>/gc_idle
Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
-Description:
- Controls the victim selection policy for garbage collection.
+Description: Controls the victim selection policy for garbage collection.
+ Setting gc_idle = 0(default) will disable this option. Setting:
+
+ =========== ===============================================
+ gc_idle = 1 will select the Cost Benefit approach & setting
+ gc_idle = 2 will select the greedy approach & setting
+ gc_idle = 3 will select the age-threshold based approach.
+ =========== ===============================================
What: /sys/fs/f2fs/<disk>/reclaim_segments
Date: October 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the issue rate of segment discard commands.
+Description: This parameter controls the number of prefree segments to be
+ reclaimed. If the number of prefree segments is larger than
+ the number of segments in the proportion to the percentage
+ over total volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
+
+What: /sys/fs/f2fs/<disk>/main_blkaddr
+Date: November 2019
+Contact: "Ramon Pantin" <pantin@google.com>
+Description: Shows first block address of MAIN area.
What: /sys/fs/f2fs/<disk>/ipu_policy
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the in-place-update policy.
+Description: Controls the in-place-update policy.
+ updates in f2fs. User can set:
+
+ ===== =============== ===================================================
+ value policy description
+ 0x00 DISABLE disable IPU(=default option in LFS mode)
+ 0x01 FORCE all the time
+ 0x02 SSR if SSR mode is activated
+ 0x04 UTIL if FS utilization is over threshold
+ 0x08 SSR_UTIL if SSR mode is activated and FS utilization is over
+ threshold
+ 0x10 FSYNC activated in fsync path only for high performance
+ flash storages. IPU will be triggered only if the
+ # of dirty pages over min_fsync_blocks.
+ (=default option)
+ 0x20 ASYNC do IPU given by asynchronous write requests
+ 0x40 NOCACHE disable IPU bio cache
+ 0x80 HONOR_OPU_WRITE use OPU write prior to IPU write if inode has
+ FI_OPU_WRITE flag
+ ===== =============== ===================================================
+
+ Refer segment.h for details.
What: /sys/fs/f2fs/<disk>/min_ipu_util
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the FS utilization condition for the in-place-update
- policies.
+Description: Controls the FS utilization condition for the in-place-update
+ policies. It is used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
What: /sys/fs/f2fs/<disk>/min_fsync_blocks
Date: September 2014
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the dirty page count condition for the in-place-update
- policies.
+Description: Controls the dirty page count condition for the in-place-update
+ policies.
What: /sys/fs/f2fs/<disk>/min_seq_blocks
Date: August 2018
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the dirty page count condition for batched sequential
- writes in ->writepages.
-
+Description: Controls the dirty page count condition for batched sequential
+ writes in writepages.
What: /sys/fs/f2fs/<disk>/min_hot_blocks
Date: March 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the dirty page count condition for redefining hot data.
+Description: Controls the dirty page count condition for redefining hot data.
What: /sys/fs/f2fs/<disk>/min_ssr_sections
Date: October 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Controls the fee section threshold to trigger SSR allocation.
+Description: Controls the free section threshold to trigger SSR allocation.
+ If this is large, SSR mode will be enabled early.
What: /sys/fs/f2fs/<disk>/max_small_discards
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the issue rate of small discard commands.
-
-What: /sys/fs/f2fs/<disk>/discard_granularity
-Date: July 2017
-Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Controls discard granularity of inner discard thread, inner thread
+Description: Controls the issue rate of discard commands that consist of small
+ blocks less than 2MB. The candidates to be discarded are cached during
+ checkpoint, and issued by issue_discard thread after checkpoint.
+ It is enabled by default.
+
+What: /sys/fs/f2fs/<disk>/max_ordered_discard
+Date: October 2022
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: Controls the maximum ordered discard, the unit size is one block(4KB).
+ Set it to 16 by default.
+
+What: /sys/fs/f2fs/<disk>/max_discard_request
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the number of discards a thread will issue at a time.
+ Higher number will allow the discard thread to finish its work
+ faster, at the cost of higher latency for incoming I/O.
+
+What: /sys/fs/f2fs/<disk>/min_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait between
+ issuing discard requests when there are discards to be issued and
+ no I/O aware interruptions occur.
+
+What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait between
+ issuing discard requests when there are discards to be issued and
+ an I/O aware interruption occurs.
+
+What: /sys/fs/f2fs/<disk>/max_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait when there are
+ no discard operations to be issued.
+
+What: /sys/fs/f2fs/<disk>/discard_granularity
+Date: July 2017
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: Controls discard granularity of inner discard thread. Inner thread
will not issue discards with size that is smaller than granularity.
- The unit size is one block, now only support configuring in range
- of [1, 512].
+ The unit size is one block(4KB), now only support configuring
+ in range of [1, 512]. Default value is 16.
+ For small devices, default value is 1.
+
+What: /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date: January 2019
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Set timeout to issue discard commands during umount.
+ Default: 5 secs
+
+What: /sys/fs/f2fs/<disk>/pending_discard
+Date: November 2021
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of pending discard commands in the queue.
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the number of trials to find a victim segment.
+Description: Controls the number of trials to find a victim segment
+ when conducting SSR and cleaning operations. The default value
+ is 4096 which covers 8GB block address range.
+
+What: /sys/fs/f2fs/<disk>/migration_granularity
+Date: October 2018
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: Controls migration granularity of garbage collection on large
+ section, it can let GC move partial segment{s} of one section
+ in one GC cycle, so that dispersing heavy overhead GC to
+ multiple lightweight one.
What: /sys/fs/f2fs/<disk>/dir_level
Date: March 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the directory level for large directory.
+Description: Controls the directory level for large directory. If a
+ directory has a number of files, it can reduce the file lookup
+ latency by increasing this dir_level value. Otherwise, it
+ needs to decrease this value to reduce the space overhead.
+ The default value is 0.
What: /sys/fs/f2fs/<disk>/ram_thresh
Date: March 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
-Description:
- Controls the memory footprint used by f2fs.
-
-What: /sys/fs/f2fs/<disk>/batched_trim_sections
-Date: February 2015
-Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the trimming rate in batch mode.
- <deprecated>
+Description: Controls the memory footprint used by free nids and cached
+ nat entries. By default, 1 is set, which indicates
+ 10 MB / 1 GB RAM.
What: /sys/fs/f2fs/<disk>/cp_interval
Date: October 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the checkpoint timing.
+Description: Controls the checkpoint timing, set to 60 seconds by default.
What: /sys/fs/f2fs/<disk>/idle_interval
Date: January 2016
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls the idle timing for all paths other than
- discard and gc path.
+Description: Controls the idle timing of system, if there is no FS operation
+ during given interval.
+ Set to 5 seconds by default.
What: /sys/fs/f2fs/<disk>/discard_idle_interval
Date: September 2018
Contact: "Chao Yu" <yuchao0@huawei.com>
-Contact: "Sahitya Tummala" <stummala@codeaurora.org>
-Description:
- Controls the idle timing for discard path.
+Contact: "Sahitya Tummala" <quic_stummala@quicinc.com>
+Description: Controls the idle timing of discard thread given
+ this time interval.
+ Default is 5 secs.
What: /sys/fs/f2fs/<disk>/gc_idle_interval
Date: September 2018
Contact: "Chao Yu" <yuchao0@huawei.com>
-Contact: "Sahitya Tummala" <stummala@codeaurora.org>
-Description:
- Controls the idle timing for gc path.
+Contact: "Sahitya Tummala" <quic_stummala@quicinc.com>
+Description: Controls the idle timing for gc path. Set to 5 seconds by default.
What: /sys/fs/f2fs/<disk>/iostat_enable
Date: August 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Controls to enable/disable IO stat.
+Description: Controls to enable/disable IO stat.
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
-Description:
- Controls the count of nid pages to be readaheaded.
+Description: Controls the count of nid pages to be readaheaded.
+ When building free nids, F2FS reads NAT blocks ahead for
+ speed up. Default is 0.
What: /sys/fs/f2fs/<disk>/dirty_nats_ratio
Date: January 2016
Contact: "Chao Yu" <chao2.yu@samsung.com>
-Description:
- Controls dirty nat entries ratio threshold, if current
- ratio exceeds configured threshold, checkpoint will
- be triggered for flushing dirty nat entries.
+Description: Controls dirty nat entries ratio threshold, if current
+ ratio exceeds configured threshold, checkpoint will
+ be triggered for flushing dirty nat entries.
What: /sys/fs/f2fs/<disk>/lifetime_write_kbytes
Date: January 2016
Contact: "Shuoran Liu" <liushuoran@huawei.com>
-Description:
- Shows total written kbytes issued to disk.
+Description: Shows total written kbytes issued to disk.
What: /sys/fs/f2fs/<disk>/features
Date: July 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Shows all enabled features in current device.
+Description: <deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
+ Shows all enabled features in current device.
+ Supported features:
+ encryption, blkzoned, extra_attr, projquota, inode_checksum,
+ flexible_inline_xattr, quota_ino, inode_crtime, lost_found,
+ verity, sb_checksum, casefold, readonly, compression, pin_file.
+
+What: /sys/fs/f2fs/<disk>/feature_list/
+Date: June 2021
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Expand /sys/fs/f2fs/<disk>/features to meet sysfs rule.
+ Supported on-disk features:
+ encryption, block_zoned (aka blkzoned), extra_attr,
+ project_quota (aka projquota), inode_checksum,
+ flexible_inline_xattr, quota_ino, inode_crtime, lost_found,
+ verity, sb_checksum, casefold, readonly, compression.
+ Note that, pin_file is moved into /sys/fs/f2fs/features/.
+
+What: /sys/fs/f2fs/features/
+Date: July 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows all enabled kernel features.
+ Supported features:
+ encryption, block_zoned, extra_attr, project_quota,
+ inode_checksum, flexible_inline_xattr, quota_ino,
+ inode_crtime, lost_found, verity, sb_checksum,
+ casefold, readonly, compression, test_dummy_encryption_v2,
+ atomic_write, pin_file, encrypted_casefold, linear_lookup.
What: /sys/fs/f2fs/<disk>/inject_rate
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
-Description:
- Controls the injection rate.
+Description: Controls the injection rate of arbitrary faults.
What: /sys/fs/f2fs/<disk>/inject_type
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
-Description:
- Controls the injection type.
+Description: Controls the injection type of arbitrary faults.
+
+What: /sys/fs/f2fs/<disk>/dirty_segments
+Date: October 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of dirty segments.
What: /sys/fs/f2fs/<disk>/reserved_blocks
Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Controls target reserved blocks in system, the threshold
- is soft, it could exceed current available user space.
+Description: Controls target reserved blocks in system, the threshold
+ is soft, it could exceed current available user space.
What: /sys/fs/f2fs/<disk>/current_reserved_blocks
Date: October 2017
Contact: "Yunlong Song" <yunlong.song@huawei.com>
Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Shows current reserved blocks in system, it may be temporarily
- smaller than target_reserved_blocks, but will gradually
- increase to target_reserved_blocks when more free blocks are
- freed by user later.
+Description: Shows current reserved blocks in system, it may be temporarily
+ smaller than target_reserved_blocks, but will gradually
+ increase to target_reserved_blocks when more free blocks are
+ freed by user later.
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Do background GC agressively
+Description: Do background GC aggressively when set. Set to 0 by default.
+ gc urgent high(1): does GC forcibly in a period of given
+ gc_urgent_sleep_time and ignores I/O idling check. uses greedy
+ GC approach and turns SSR mode on.
+ gc urgent low(2): lowers the bar of checking I/O idling in
+ order to process outstanding discard commands and GC a
+ little bit aggressively. always uses cost benefit GC approach,
+ and will override age-threshold GC approach if ATGC is enabled
+ at the same time.
+ gc urgent mid(3): does GC forcibly in a period of given
+ gc_urgent_sleep_time and executes a mid level of I/O idling check.
+ always uses cost benefit GC approach, and will override
+ age-threshold GC approach if ATGC is enabled at the same time.
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
- Controls sleep time of GC urgent mode
+Description: Controls sleep time of GC urgent mode. Set to 500ms by default.
What: /sys/fs/f2fs/<disk>/readdir_ra
Date: November 2017
Contact: "Sheng Yong" <shengyong1@huawei.com>
-Description:
- Controls readahead inode block in readdir.
+Description: Controls readahead inode block in readdir. Enabled by default.
+
+What: /sys/fs/f2fs/<disk>/gc_pin_file_thresh
+Date: January 2018
+Contact: Jaegeuk Kim <jaegeuk@kernel.org>
+Description: This indicates how many GC can be failed for the pinned
+ file. If it exceeds this, F2FS doesn't guarantee its pinning
+ state. 2048 trials is set by default, and 65535 as maximum.
What: /sys/fs/f2fs/<disk>/extension_list
-Date: Feburary 2018
+Date: February 2018
Contact: "Chao Yu" <yuchao0@huawei.com>
-Description:
- Used to control configure extension list:
- - Query: cat /sys/fs/f2fs/<disk>/extension_list
- - Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list
- - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
- - [h] means add/del hot file extension
- - [c] means add/del cold file extension
+Description: Used to control configure extension list:
+ - Query: cat /sys/fs/f2fs/<disk>/extension_list
+ - Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list
+ - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
+ - [h] means add/del hot file extension
+ - [c] means add/del cold file extension
+
+What: /sys/fs/f2fs/<disk>/unusable
+Date: April 2019
+Contact: "Daniel Rosenberg" <drosen@google.com>
+Description: If checkpoint=disable, it displays the number of blocks that
+ are unusable.
+ If checkpoint=enable it displays the number of blocks that
+ would be unusable if checkpoint=disable were to be set.
+
+What: /sys/fs/f2fs/<disk>/encoding
+Date: July 2019
+Contact: "Daniel Rosenberg" <drosen@google.com>
+Description: Displays name and version of the encoding set for the filesystem.
+ If no encoding is set, displays (none)
+
+What: /sys/fs/f2fs/<disk>/free_segments
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of free segments in disk.
+
+What: /sys/fs/f2fs/<disk>/cp_foreground_calls
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of checkpoint operations performed on demand. Available when
+ CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/cp_background_calls
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of checkpoint operations performed in the background to
+ free segments. Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/gc_foreground_calls
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of garbage collection operations performed on demand.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/gc_background_calls
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of garbage collection operations triggered in background.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/moved_blocks_foreground
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of blocks moved by garbage collection in foreground.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/moved_blocks_background
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Number of blocks moved by garbage collection in background.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/avg_vblocks
+Date: September 2019
+Contact: "Hridya Valsaraju" <hridya@google.com>
+Description: Average number of valid blocks.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
+What: /sys/fs/f2fs/<disk>/mounted_time_sec
+Date: February 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Show the mounted time in secs of this partition.
+
+What: /sys/fs/f2fs/<disk>/data_io_flag
+Date: April 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Give a way to attach REQ_META|FUA to data writes
+ given temperature-based bits. Now the bits indicate:
+
+ +-------------------+-------------------+
+ | REQ_META | REQ_FUA |
+ +------+------+-----+------+------+-----+
+ | 5 | 4 | 3 | 2 | 1 | 0 |
+ +------+------+-----+------+------+-----+
+ | Cold | Warm | Hot | Cold | Warm | Hot |
+ +------+------+-----+------+------+-----+
+
+What: /sys/fs/f2fs/<disk>/node_io_flag
+Date: June 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Give a way to attach REQ_META|FUA to node writes
+ given temperature-based bits. Now the bits indicate:
+
+ +-------------------+-------------------+
+ | REQ_META | REQ_FUA |
+ +------+------+-----+------+------+-----+
+ | 5 | 4 | 3 | 2 | 1 | 0 |
+ +------+------+-----+------+------+-----+
+ | Cold | Warm | Hot | Cold | Warm | Hot |
+ +------+------+-----+------+------+-----+
+
+What: /sys/fs/f2fs/<disk>/iostat_period_ms
+Date: April 2020
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Give a way to change iostat_period time. 3secs by default.
+ The new iostat trace gives stats gap given the period.
+What: /sys/fs/f2fs/<disk>/max_io_bytes
+Date: December 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: This gives a control to limit the bio size in f2fs.
+ Default is zero, which will follow underlying block layer limit,
+ whereas, if it has a certain bytes value, f2fs won't submit a
+ bio larger than that size.
+
+What: /sys/fs/f2fs/<disk>/stat/sb_status
+Date: December 2020
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: Show status of f2fs superblock in real time.
+
+ ====== ===================== =================================
+ value sb status macro description
+ 0x1 SBI_IS_DIRTY dirty flag for checkpoint
+ 0x2 SBI_IS_CLOSE specify unmounting
+ 0x4 SBI_NEED_FSCK need fsck.f2fs to fix
+ 0x8 SBI_POR_DOING recovery is doing or not
+ 0x10 SBI_NEED_SB_WRITE need to recover superblock
+ 0x20 SBI_NEED_CP need to checkpoint
+ 0x40 SBI_IS_SHUTDOWN shutdown by ioctl
+ 0x80 SBI_IS_RECOVERED recovered orphan/data
+ 0x100 SBI_CP_DISABLED CP was disabled last mount
+ 0x200 SBI_CP_DISABLED_QUICK CP was disabled quickly
+ 0x400 SBI_QUOTA_NEED_FLUSH need to flush quota info in CP
+ 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
+ 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
+ 0x2000 SBI_IS_RESIZEFS resizefs is in process
+ 0x4000 SBI_IS_FREEZING freefs is in process
+ ====== ===================== =================================
+
+What: /sys/fs/f2fs/<disk>/stat/cp_status
+Date: September 2022
+Contact: "Chao Yu" <chao.yu@oppo.com>
+Description: Show status of f2fs checkpoint in real time.
+
+ =============================== ==============================
+ cp flag value
+ CP_UMOUNT_FLAG 0x00000001
+ CP_ORPHAN_PRESENT_FLAG 0x00000002
+ CP_COMPACT_SUM_FLAG 0x00000004
+ CP_ERROR_FLAG 0x00000008
+ CP_FSCK_FLAG 0x00000010
+ CP_FASTBOOT_FLAG 0x00000020
+ CP_CRC_RECOVERY_FLAG 0x00000040
+ CP_NAT_BITS_FLAG 0x00000080
+ CP_TRIMMED_FLAG 0x00000100
+ CP_NOCRC_RECOVERY_FLAG 0x00000200
+ CP_LARGE_NAT_BITMAP_FLAG 0x00000400
+ CP_QUOTA_NEED_FSCK_FLAG 0x00000800
+ CP_DISABLED_FLAG 0x00001000
+ CP_DISABLED_QUICK_FLAG 0x00002000
+ CP_RESIZEFS_FLAG 0x00004000
+ =============================== ==============================
+
+What: /sys/fs/f2fs/<disk>/stat/issued_discard
+Date: December 2023
+Contact: "Zhiguo Niu" <zhiguo.niu@unisoc.com>
+Description: Shows the number of issued discard.
+
+What: /sys/fs/f2fs/<disk>/stat/queued_discard
+Date: December 2023
+Contact: "Zhiguo Niu" <zhiguo.niu@unisoc.com>
+Description: Shows the number of queued discard.
+
+What: /sys/fs/f2fs/<disk>/stat/undiscard_blks
+Date: December 2023
+Contact: "Zhiguo Niu" <zhiguo.niu@unisoc.com>
+Description: Shows the total number of undiscard blocks.
+
+What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
+Date: January 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Give a way to change checkpoint merge daemon's io priority.
+ Its default value is "be,3", which means "BE" I/O class and
+ I/O priority "3". We can select the class between "rt" and "be",
+ and set the I/O priority within valid range of it. "," delimiter
+ is necessary in between I/O class and priority number.
+
+What: /sys/fs/f2fs/<disk>/ovp_segments
+Date: March 2021
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of overprovision segments.
+
+What: /sys/fs/f2fs/<disk>/compr_written_block
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the block count written after compression since mount. Note
+ that when the compressed blocks are deleted, this count doesn't
+ decrease. If you write "0" here, you can initialize
+ compr_written_block and compr_saved_block to "0".
+
+What: /sys/fs/f2fs/<disk>/compr_saved_block
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the saved block count with compression since mount. Note
+ that when the compressed blocks are deleted, this count doesn't
+ decrease. If you write "0" here, you can initialize
+ compr_written_block and compr_saved_block to "0".
+
+What: /sys/fs/f2fs/<disk>/compr_new_inode
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the count of inode newly enabled for compression since mount.
+ Note that when the compression is disabled for the files, this count
+ doesn't decrease. If you write "0" here, you can initialize
+ compr_new_inode to "0".
+
+What: /sys/fs/f2fs/<disk>/atgc_candidate_ratio
+Date: May 2021
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: When ATGC is on, it controls candidate ratio in order to limit total
+ number of potential victim in all candidates, the value should be in
+ range of [0, 100], by default it was initialized as 20(%).
+
+What: /sys/fs/f2fs/<disk>/atgc_candidate_count
+Date: May 2021
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: When ATGC is on, it controls candidate count in order to limit total
+ number of potential victim in all candidates, by default it was
+ initialized as 10 (sections).
+
+What: /sys/fs/f2fs/<disk>/atgc_age_weight
+Date: May 2021
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: When ATGC is on, it controls age weight to balance weight proportion
+ in between aging and valid blocks, the value should be in range of
+ [0, 100], by default it was initialized as 60(%).
+
+What: /sys/fs/f2fs/<disk>/atgc_age_threshold
+Date: May 2021
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description: When ATGC is on, it controls age threshold to bypass GCing young
+ candidates whose age is not beyond the threshold, by default it was
+ initialized as 604800 seconds (equals to 7 days).
+
+What: /sys/fs/f2fs/<disk>/atgc_enabled
+Date: Feb 2024
+Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
+Description: It represents whether ATGC is on or off. The value is 1 which
+ indicates that ATGC is on, and 0 indicates that it is off.
+
+What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
+Date: July 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show how many segments have been reclaimed by GC during a specific
+ GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
+ 3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
+ You can re-initialize this value to "0".
+
+What: /sys/fs/f2fs/<disk>/gc_segment_mode
+Date: July 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: You can control for which gc mode the "gc_reclaimed_segments" node shows.
+ Refer to the description of the modes in "gc_reclaimed_segments".
+
+What: /sys/fs/f2fs/<disk>/seq_file_ra_mul
+Date: July 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: You can control the multiplier value of bdi device readahead window size
+ between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
+
+What: /sys/fs/f2fs/<disk>/max_fragment_chunk
+Date: August 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: With "mode=fragment:block" mount options, we can scatter block allocation.
+ f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+ in the length of 1..<max_fragment_hole> by turns. This value can be set
+ between 1..512 and the default value is 4.
+
+What: /sys/fs/f2fs/<disk>/max_fragment_hole
+Date: August 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: With "mode=fragment:block" mount options, we can scatter block allocation.
+ f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+ in the length of 1..<max_fragment_hole> by turns. This value can be set
+ between 1..512 and the default value is 4.
+
+What: /sys/fs/f2fs/<disk>/gc_remaining_trials
+Date: October 2022
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: You can set the trial count limit for GC urgent and idle mode with this value.
+ If GC thread gets to the limit, the mode will turn back to GC normal mode.
+ By default, the value is zero, which means there is no limit like before.
+
+What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
+Date: January 2022
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Controls max # of node block writes to be used for roll forward
+ recovery. This can limit the roll forward recovery time.
+
+What: /sys/fs/f2fs/<disk>/unusable_blocks_per_sec
+Date: June 2022
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of unusable blocks in a section which was defined by
+ the zone capacity reported by underlying zoned device.
+
+What: /sys/fs/f2fs/<disk>/current_atomic_write
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the total current atomic write block count, which is not committed yet.
+ This is a read-only entry.
+
+What: /sys/fs/f2fs/<disk>/peak_atomic_write
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the peak value of total current atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
+
+What: /sys/fs/f2fs/<disk>/committed_atomic_block
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the accumulated total committed atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
+
+What: /sys/fs/f2fs/<disk>/revoked_atomic_block
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the accumulated total revoked atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
+
+What: /sys/fs/f2fs/<disk>/gc_mode
+Date: October 2022
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: Show the current gc_mode as a string.
+ This is a read-only entry.
+
+What: /sys/fs/f2fs/<disk>/discard_urgent_util
+Date: November 2022
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: When space utilization exceeds this, do background DISCARD aggressively.
+ Does DISCARD forcibly in a period of given min_discard_issue_time when the number
+ of discards is not 0 and set discard granularity to 1.
+ Default: 80
+
+What: /sys/fs/f2fs/<disk>/hot_data_age_threshold
+Date: November 2022
+Contact: "Ping Xiong" <xiongping1@xiaomi.com>
+Description: When DATA SEPARATION is on, it controls the age threshold to indicate
+ the data blocks as hot. By default it was initialized as 262144 blocks
+ (equals to 1GB).
+
+What: /sys/fs/f2fs/<disk>/warm_data_age_threshold
+Date: November 2022
+Contact: "Ping Xiong" <xiongping1@xiaomi.com>
+Description: When DATA SEPARATION is on, it controls the age threshold to indicate
+ the data blocks as warm. By default it was initialized as 2621440 blocks
+ (equals to 10GB).
+
+What: /sys/fs/f2fs/<disk>/fault_rate
+Date: May 2016
+Contact: "Sheng Yong" <shengyong@oppo.com>
+Contact: "Chao Yu" <chao@kernel.org>
+Description: Enable fault injection in all supported types with
+ specified injection rate.
+
+What: /sys/fs/f2fs/<disk>/fault_type
+Date: May 2016
+Contact: "Sheng Yong" <shengyong@oppo.com>
+Contact: "Chao Yu" <chao@kernel.org>
+Description: Support configuring fault injection type, should be
+ enabled with fault_injection option, fault type value
+ is shown below, it supports single or combined type.
+
+ =========================== ==========
+ Type_Name Type_Value
+ =========================== ==========
+ FAULT_KMALLOC 0x00000001
+ FAULT_KVMALLOC 0x00000002
+ FAULT_PAGE_ALLOC 0x00000004
+ FAULT_PAGE_GET 0x00000008
+ FAULT_ALLOC_BIO 0x00000010 (obsolete)
+ FAULT_ALLOC_NID 0x00000020
+ FAULT_ORPHAN 0x00000040
+ FAULT_BLOCK 0x00000080
+ FAULT_DIR_DEPTH 0x00000100
+ FAULT_EVICT_INODE 0x00000200
+ FAULT_TRUNCATE 0x00000400
+ FAULT_READ_IO 0x00000800
+ FAULT_CHECKPOINT 0x00001000
+ FAULT_DISCARD 0x00002000
+ FAULT_WRITE_IO 0x00004000
+ FAULT_SLAB_ALLOC 0x00008000
+ FAULT_DQUOT_INIT 0x00010000
+ FAULT_LOCK_OP 0x00020000
+ FAULT_BLKADDR_VALIDITY 0x00040000
+ FAULT_BLKADDR_CONSISTENCE 0x00080000
+ FAULT_NO_SEGMENT 0x00100000
+ FAULT_INCONSISTENT_FOOTER 0x00200000
+ FAULT_TIMEOUT 0x00400000 (1000ms)
+ FAULT_VMALLOC 0x00800000
+ =========================== ==========
+
+What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
+Date: January 2023
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: Controls background discard granularity of inner discard thread
+ when is not in idle. Inner thread will not issue discards with size that
+ is smaller than granularity. The unit size is one block(4KB), now only
+ support configuring in range of [0, 512].
+ Default: 512
+
+What: /sys/fs/f2fs/<disk>/last_age_weight
+Date: January 2023
+Contact: "Ping Xiong" <xiongping1@xiaomi.com>
+Description: When DATA SEPARATION is on, it controls the weight of last data block age.
+
+What: /sys/fs/f2fs/<disk>/compress_watermark
+Date: February 2023
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: When compress cache is on, it controls free memory watermark
+ in order to limit caching compress page. If free memory is lower
+ than watermark, then deny caching compress page. The value should be in
+ range of (0, 100], by default it was initialized as 20(%).
+
+What: /sys/fs/f2fs/<disk>/compress_percent
+Date: February 2023
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: When compress cache is on, it controls cached page
+ percent(compress pages / free_ram) in order to limit caching compress page.
+ If cached page percent exceed threshold, then deny caching compress page.
+ The value should be in range of (0, 100], by default it was initialized
+ as 20(%).
+
+What: /sys/fs/f2fs/<disk>/discard_io_aware
+Date: November 2023
+Contact: "Chao Yu" <chao@kernel.org>
+Description: It controls to enable/disable IO aware feature for background discard.
+ By default, the value is 1 which indicates IO aware is on.
+
+What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
+Date: July 2024
+Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
+Description: The zone UFS we are currently using consists of two parts:
+ conventional zones and sequential zones. It can be used to control which part
+ to prioritize for writes, with a default value of 0.
+
+ ======================== =========================================
+ value description
+ blkzone_alloc_policy = 0 Prioritize writing to sequential zones
+ blkzone_alloc_policy = 1 Only allow writing to sequential zones
+ blkzone_alloc_policy = 2 Prioritize writing to conventional zones
+ ======================== =========================================
+
+What: /sys/fs/f2fs/<disk>/migration_window_granularity
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Controls migration window granularity of garbage collection on large
+ section. it can control the scanning window granularity for GC migration
+ in a unit of segment, while migration_granularity controls the number
+ of segments which can be migrated at the same turn.
+
+What: /sys/fs/f2fs/<disk>/reserved_segments
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: In order to fine tune GC behavior, we can control the number of
+ reserved segments.
+
+What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: If the percentage of free sections over total sections is above this
+ number, F2FS do not garbage collection for zoned devices through the
+ background GC thread. the default number is "60".
+
+What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: If the percentage of free sections over total sections is under this
+ number, F2FS boosts garbage collection for zoned devices through the
+ background GC thread. the default number is "25".
+
+What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: It controls the valid block ratio threshold not to trigger excessive GC
+ for zoned devices. The initial value of it is 95(%). F2FS will stop the
+ background GC thread from initiating GC for sections having valid blocks
+ exceeding the ratio.
+
+What: /sys/fs/f2fs/<disk>/max_read_extent_count
+Date: November 2024
+Contact: "Chao Yu" <chao@kernel.org>
+Description: It controls max read extent count for per-inode, the value of threshold
+ is 10240 by default.
+
+What: /sys/fs/f2fs/tuning/reclaim_caches_kb
+Date: February 2025
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the given KBs of file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N KBs spaces in LRU.
+
+What: /sys/fs/f2fs/<disk>/carve_out
+Date: March 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: For several zoned storage devices, vendors will provide extra space which
+ was used for device level GC than specs and F2FS can use this space for
+ filesystem level GC. To do that, we can reserve the space using
+ reserved_blocks. However, it is not enough, since this extra space should
+ not be shown to users. So, with this new sysfs node, we can hide the space
+ by subtracting reserved_blocks from total bytes.
+
+What: /sys/fs/f2fs/<disk>/encoding_flags
+Date: April 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: This is a read-only entry to show the value of sb.s_encoding_flags, the
+ value is hexadecimal.
+
+ ============================ ==========
+ Flag_Name Flag_Value
+ ============================ ==========
+ SB_ENC_STRICT_MODE_FL 0x00000001
+ SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002
+ ============================ ==========
+
+What: /sys/fs/f2fs/<disk>/reserved_pin_section
+Date: June 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: This threshold is used to control triggering garbage collection while
+ fallocating on pinned file, so, it can guarantee there is enough free
+ reserved section before preallocating on pinned file.
+ By default, the value is ovp_sections, especially, for zoned ufs, the
+ value is 1.
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Set a multiplier for the background GC migration window when F2FS GC is
+ boosted. The range should be from 1 to the segment count in a section.
+ Default: 5
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
+ Default: 1
+
+What: /sys/fs/f2fs/<disk>/effective_lookup_mode
+Date: August 2025
+Contact: "Daniel Lee" <chullee@google.com>
+Description:
+ This is a read-only entry to show the effective directory lookup mode
+ F2FS is currently using for casefolded directories.
+ This considers both the "lookup_mode" mount option and the on-disk
+ encoding flag, SB_ENC_NO_COMPAT_FALLBACK_FL.
+
+ Possible values are:
+ - "perf": Hash-only lookup.
+ - "compat": Hash-based lookup with a linear search fallback enabled
+ - "auto:perf": lookup_mode is auto and fallback is disabled on-disk
+ - "auto:compat": lookup_mode is auto and fallback is enabled on-disk
+
+What: /sys/fs/f2fs/<disk>/bggc_io_aware
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Used to adjust the BG_GC priority when pending IO, with a default value
+ of 0. Specifically, for ZUFS, the default value is 1.
+
+ ================== ======================================================
+ value description
+ bggc_io_aware = 0 skip background GC if there is any kind of pending IO
+ bggc_io_aware = 1 skip background GC if there is pending read IO
+ bggc_io_aware = 2 don't aware IO for background GC
+ ================== ======================================================
+
+What: /sys/fs/f2fs/<disk>/allocate_section_hint
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Indicates the hint section between the first device and others in multi-devices
+ setup. It defaults to the end of the first device in sections. For a single storage
+ device, it defaults to the total number of sections. It can be manually set to match
+ scenarios where multi-devices are mapped to the same dm device.
+
+What: /sys/fs/f2fs/<disk>/allocate_section_policy
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Controls write priority in multi-devices setups. A value of 0 means normal writing.
+ A value of 1 prioritizes writing to devices before the allocate_section_hint. A value of 2
+ prioritizes writing to devices after the allocate_section_hint. The default is 0.
+
+ =========================== ==========================================================
+ value description
+ allocate_section_policy = 0 Normal writing
+ allocate_section_policy = 1 Prioritize writing to section before allocate_section_hint
+ allocate_section_policy = 2 Prioritize writing to section after allocate_section_hint
+ =========================== ==========================================================
diff --git a/Documentation/ABI/testing/sysfs-fs-ubifs b/Documentation/ABI/testing/sysfs-fs-ubifs
new file mode 100644
index 000000000000..af5afda30220
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-ubifs
@@ -0,0 +1,35 @@
+What: /sys/fs/ubifsX_Y/error_magic
+Date: October 2021
+KernelVersion: 5.16
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Exposes magic errors: every node starts with a magic number.
+
+ This counter keeps track of the number of accesses of nodes
+ with a corrupted magic number.
+
+ The counter is reset to 0 with a remount.
+
+What: /sys/fs/ubifsX_Y/error_node
+Date: October 2021
+KernelVersion: 5.16
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Exposes node errors. Every node embeds its type.
+
+ This counter keeps track of the number of accesses of nodes
+ with a corrupted node type.
+
+ The counter is reset to 0 with a remount.
+
+What: /sys/fs/ubifsX_Y/error_crc
+Date: October 2021
+KernelVersion: 5.16
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Exposes crc errors: every node embeds a crc checksum.
+
+ This counter keeps track of the number of accesses of nodes
+ with a bad crc checksum.
+
+ The counter is reset to 0 with a remount.
diff --git a/Documentation/ABI/testing/sysfs-fs-virtiofs b/Documentation/ABI/testing/sysfs-fs-virtiofs
new file mode 100644
index 000000000000..4839dbce997e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-virtiofs
@@ -0,0 +1,11 @@
+What: /sys/fs/virtiofs/<n>/tag
+Date: Feb 2024
+Contact: virtio-fs@lists.linux.dev
+Description:
+ [RO] The mount "tag" that can be used to mount this filesystem.
+
+What: /sys/fs/virtiofs/<n>/device
+Date: Feb 2024
+Contact: virtio-fs@lists.linux.dev
+Description:
+ Symlink to the virtio device that exports this filesystem.
diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs
index ea0cc8c42093..7da4de948b46 100644
--- a/Documentation/ABI/testing/sysfs-fs-xfs
+++ b/Documentation/ABI/testing/sysfs-fs-xfs
@@ -1,7 +1,7 @@
What: /sys/fs/xfs/<disk>/log/log_head_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current head of the
log. The LSN is exported in "cycle:basic block" format.
@@ -10,30 +10,28 @@ Users: xfstests
What: /sys/fs/xfs/<disk>/log/log_tail_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current tail of the
log. The LSN is exported in "cycle:basic block" format.
-What: /sys/fs/xfs/<disk>/log/reserve_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/reserve_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log reserve grant head. It
represents the total log reservation of all currently
- outstanding transactions. The grant head is exported in
- "cycle:bytes" format.
+ outstanding transactions in bytes.
Users: xfstests
-What: /sys/fs/xfs/<disk>/log/write_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/write_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log write grant head. It
represents the total log reservation of all currently
- oustanding transactions, including regrants due to
- rolling transactions. The grant head is exported in
- "cycle:bytes" format.
+ outstanding transactions, including regrants due to
+ rolling transactions in bytes.
Users: xfstests
diff --git a/Documentation/ABI/testing/sysfs-hypervisor-xen b/Documentation/ABI/testing/sysfs-hypervisor-xen
index 53b7b2ea7515..4dbe0c49b393 100644
--- a/Documentation/ABI/testing/sysfs-hypervisor-xen
+++ b/Documentation/ABI/testing/sysfs-hypervisor-xen
@@ -15,14 +15,17 @@ KernelVersion: 4.3
Contact: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Description: If running under Xen:
Describes mode that Xen's performance-monitoring unit (PMU)
- uses. Accepted values are
- "off" -- PMU is disabled
- "self" -- The guest can profile itself
- "hv" -- The guest can profile itself and, if it is
+ uses. Accepted values are:
+
+ ====== ============================================
+ "off" PMU is disabled
+ "self" The guest can profile itself
+ "hv" The guest can profile itself and, if it is
privileged (e.g. dom0), the hypervisor
- "all" -- The guest can profile itself, the hypervisor
+ "all" The guest can profile itself, the hypervisor
and all other guests. Only available to
privileged guests.
+ ====== ============================================
What: /sys/hypervisor/pmu/pmu_features
Date: August 2015
diff --git a/Documentation/ABI/testing/sysfs-kernel-address_bits b/Documentation/ABI/testing/sysfs-kernel-address_bits
new file mode 100644
index 000000000000..3b72e48086aa
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-address_bits
@@ -0,0 +1,10 @@
+What: /sys/kernel/address_bits
+Date: May 2023
+KernelVersion: 6.3
+Contact: Thomas Weißschuh <linux@weissschuh.net>
+Description:
+ The address size of the running kernel in bits.
+
+ Access: Read
+
+Users: util-linux
diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params
index eca38ce2852d..7f9bda453c4d 100644
--- a/Documentation/ABI/testing/sysfs-kernel-boot_params
+++ b/Documentation/ABI/testing/sysfs-kernel-boot_params
@@ -23,16 +23,17 @@ Description: The /sys/kernel/boot_params directory contains two
representation of setup_data type. "data" file is the binary
representation of setup_data payload.
- The whole boot_params directory structure is like below:
- /sys/kernel/boot_params
- |__ data
- |__ setup_data
- | |__ 0
- | | |__ data
- | | |__ type
- | |__ 1
- | |__ data
- | |__ type
- |__ version
+ The whole boot_params directory structure is like below::
+
+ /sys/kernel/boot_params
+ |__ data
+ |__ setup_data
+ | |__ 0
+ | | |__ data
+ | | |__ type
+ | |__ 1
+ | |__ data
+ | |__ type
+ |__ version
Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-kernel-btf b/Documentation/ABI/testing/sysfs-kernel-btf
new file mode 100644
index 000000000000..fe96efdc9b6c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-btf
@@ -0,0 +1,25 @@
+What: /sys/kernel/btf
+Date: Aug 2019
+KernelVersion: 5.5
+Contact: bpf@vger.kernel.org
+Description:
+ Contains BTF type information and related data for kernel and
+ kernel modules.
+
+What: /sys/kernel/btf/vmlinux
+Date: Aug 2019
+KernelVersion: 5.5
+Contact: bpf@vger.kernel.org
+Description:
+ Read-only binary attribute exposing kernel's own BTF type
+ information with description of all internal kernel types. See
+ Documentation/bpf/btf.rst for detailed description of format
+ itself.
+
+What: /sys/kernel/btf/<module-name>
+Date: Nov 2020
+KernelVersion: 5.11
+Contact: bpf@vger.kernel.org
+Description:
+ Read-only binary attribute exposing kernel module's BTF type
+ information as an add-on to the kernel's BTF (/sys/kernel/btf/vmlinux).
diff --git a/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder
new file mode 100644
index 000000000000..f0e6ac1b5356
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder
@@ -0,0 +1,12 @@
+What: /sys/kernel/cpu_byteorder
+Date: February 2023
+KernelVersion: 6.2
+Contact: Thomas Weißschuh <linux@weissschuh.net>
+Description:
+ The endianness of the running kernel.
+
+ Access: Read
+
+ Valid values:
+ "little", "big"
+Users: util-linux
diff --git a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers
new file mode 100644
index 000000000000..5d3bc997dc64
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers
@@ -0,0 +1,24 @@
+What: /sys/kernel/dmabuf/buffers
+Date: May 2021
+KernelVersion: v5.13
+Contact: Hridya Valsaraju <hridya@google.com>
+Description: The /sys/kernel/dmabuf/buffers directory contains a
+ snapshot of the internal state of every DMA-BUF.
+ /sys/kernel/dmabuf/buffers/<inode_number> will contain the
+ statistics for the DMA-BUF with the unique inode number
+ <inode_number>
+Users: kernel memory tuning/debugging tools
+
+What: /sys/kernel/dmabuf/buffers/<inode_number>/exporter_name
+Date: May 2021
+KernelVersion: v5.13
+Contact: Hridya Valsaraju <hridya@google.com>
+Description: This file is read-only and contains the name of the exporter of
+ the DMA-BUF.
+
+What: /sys/kernel/dmabuf/buffers/<inode_number>/size
+Date: May 2021
+KernelVersion: v5.13
+Contact: Hridya Valsaraju <hridya@google.com>
+Description: This file is read-only and specifies the size of the DMA-BUF in
+ bytes.
diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump
new file mode 100644
index 000000000000..b64b7622e6fc
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-fadump
@@ -0,0 +1,59 @@
+What: /sys/kernel/fadump/*
+Date: Dec 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description:
+ The /sys/kernel/fadump/* is a collection of FADump sysfs
+ file provide information about the configuration status
+ of Firmware Assisted Dump (FADump).
+
+What: /sys/kernel/fadump/enabled
+Date: Dec 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Primarily used to identify whether the FADump is enabled in
+ the kernel or not.
+User: Kdump service
+
+What: /sys/kernel/fadump/registered
+Date: Dec 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Helps to control the dump collect feature from userspace.
+ Setting 1 to this file enables the system to collect the
+ dump and 0 to disable it.
+User: Kdump service
+
+What: /sys/kernel/fadump/release_mem
+Date: Dec 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: write only
+ This is a special sysfs file and only available when
+ the system is booted to capture the vmcore using FADump.
+ It is used to release the memory reserved by FADump to
+ save the crash dump.
+
+What: /sys/kernel/fadump/mem_reserved
+Date: Dec 2019
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Provide information about the amount of memory reserved by
+ FADump to save the crash dump in bytes.
+
+What: /sys/kernel/fadump/hotplug_ready
+Date: Apr 2024
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Kdump udev rule re-registers fadump on memory add/remove events,
+ primarily to update the elfcorehdr. This sysfs indicates the
+ kdump udev rule that fadump re-registration is not required on
+ memory add/remove events because elfcorehdr is now prepared in
+ the second/fadump kernel.
+User: kexec-tools
+
+What: /sys/kernel/fadump/bootargs_append
+Date: May 2024
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ This is a special sysfs file available to setup additional
+ parameters to be passed to capture kernel. For HASH MMU it
+ is exported only if RMA size higher than 768MB.
diff --git a/Documentation/ABI/testing/sysfs-kernel-fscaps b/Documentation/ABI/testing/sysfs-kernel-fscaps
index 50a3033b5e15..bcff34665192 100644
--- a/Documentation/ABI/testing/sysfs-kernel-fscaps
+++ b/Documentation/ABI/testing/sysfs-kernel-fscaps
@@ -2,7 +2,7 @@ What: /sys/kernel/fscaps
Date: February 2011
KernelVersion: 2.6.38
Contact: Ludwig Nussel <ludwig.nussel@suse.de>
-Description
+Description:
Shows whether file system capabilities are honored
when executing a binary
diff --git a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
new file mode 100644
index 000000000000..dfdd4078b077
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
@@ -0,0 +1,7 @@
+What: /sys/kernel/hardlockup_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected a hard lockup since last boot.
+ Available only if CONFIG_HARDLOCKUP_DETECTOR is enabled.
diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index 35c64e00b35c..a42d4383d999 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -24,3 +24,41 @@ Description: /sys/kernel/iommu_groups/reserved_regions list IOVA
region is described on a single line: the 1st field is
the base IOVA, the second is the end IOVA and the third
field describes the type of the region.
+
+ Since kernel 5.3, in case an RMRR is used only by graphics or
+ USB devices it is now exposed as "direct-relaxable" instead
+ of "direct". In device assignment use case, for instance,
+ those RMRR are considered to be relaxable and safe.
+
+What: /sys/kernel/iommu_groups/<grp_id>/type
+Date: November 2020
+KernelVersion: v5.11
+Contact: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
+Description: /sys/kernel/iommu_groups/<grp_id>/type shows the type of default
+ domain in use by iommu for this group. See include/linux/iommu.h
+ for possible read values. A privileged user could request kernel to
+ change the group type by writing to this file. Valid write values:
+
+ ======== ======================================================
+ DMA All the DMA transactions from the device in this group
+ are translated by the iommu.
+ DMA-FQ As above, but using batched invalidation to lazily
+ remove translations after use. This may offer reduced
+ overhead at the cost of reduced memory protection.
+ identity All the DMA transactions from the device in this group
+ are not translated by the iommu. Maximum performance
+ but zero protection.
+ auto Change to the type the device was booted with.
+ ======== ======================================================
+
+ The default domain type of a group may be modified only when
+
+ - The device in the group is not bound to any device driver.
+ So, the users must unbind the appropriate driver before
+ changing the default domain type.
+
+ Unbinding a device driver will take away the driver's control
+ over the device and if done on devices that host root file
+ system could lead to catastrophic effects (the users might
+ need to reboot the machine to get it to normal state). So, it's
+ expected that the users understand what they're doing.
diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch
index dac7e1e62a8b..3c3f36b32b57 100644
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -33,18 +33,6 @@ Description:
An attribute which indicates whether the patch is currently in
transition.
-What: /sys/kernel/livepatch/<patch>/signal
-Date: Nov 2017
-KernelVersion: 4.15.0
-Contact: live-patching@vger.kernel.org
-Description:
- A writable attribute that allows administrator to affect the
- course of an existing transition. Writing 1 sends a fake
- signal to all remaining blocking tasks. The fake signal
- means that no proper signal is delivered (there is no data in
- signal pending structures). Tasks are interrupted or woken up,
- and forced to change their patched state.
-
What: /sys/kernel/livepatch/<patch>/force
Date: Nov 2017
KernelVersion: 4.15.0
@@ -57,7 +45,24 @@ Description:
use this feature without a clearance from a patch
distributor. Removal (rmmod) of patch modules is permanently
disabled when the feature is used. See
- Documentation/livepatch/livepatch.txt for more information.
+ Documentation/livepatch/livepatch.rst for more information.
+
+What: /sys/kernel/livepatch/<patch>/replace
+Date: Jun 2024
+KernelVersion: 6.11.0
+Contact: live-patching@vger.kernel.org
+Description:
+ An attribute which indicates whether the patch supports
+ atomic-replace.
+
+What: /sys/kernel/livepatch/<patch>/stack_order
+Date: Jan 2025
+KernelVersion: 6.14.0
+Description:
+ This attribute specifies the sequence in which live patch modules
+ are applied to the system. If multiple live patches modify the same
+ function, the implementation with the biggest 'stack_order' number
+ is used, unless a transition is currently in progress.
What: /sys/kernel/livepatch/<patch>/<object>
Date: Nov 2014
@@ -67,6 +72,14 @@ Description:
The object directory contains subdirectories for each function
that is patched within the object.
+What: /sys/kernel/livepatch/<patch>/<object>/patched
+Date: August 2022
+KernelVersion: 6.1.0
+Contact: live-patching@vger.kernel.org
+Description:
+ An attribute which indicates whether the object is currently
+ patched.
+
What: /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
Date: Nov 2014
KernelVersion: 3.19.0
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma
new file mode 100644
index 000000000000..aaf2a5d8b13b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
@@ -0,0 +1,44 @@
+What: /sys/kernel/mm/cma/
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ /sys/kernel/mm/cma/ contains a subdirectory for each CMA
+ heap name (also sometimes called CMA areas).
+
+ Each CMA heap subdirectory (that is, each
+ /sys/kernel/mm/cma/<cma-heap-name> directory) contains the
+ following items:
+
+ alloc_pages_success
+ alloc_pages_fail
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/alloc_pages_success
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ the number of pages CMA API succeeded to allocate
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/alloc_pages_fail
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ the number of pages CMA API failed to allocate
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/release_pages_success
+Date: Feb 2024
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description:
+ the number of pages CMA API succeeded to release
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/total_pages
+Date: Jun 2024
+Contact: Frank van der Linden <fvdl@google.com>
+Description:
+ The size of the CMA area in pages.
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/available_pages
+Date: Jun 2024
+Contact: Frank van der Linden <fvdl@google.com>
+Description:
+ The number of pages in the CMA area that are still
+ available for CMA allocation.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
new file mode 100644
index 000000000000..b6b71db36ca7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -0,0 +1,546 @@
+what: /sys/kernel/mm/damon/
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Interface for Data Access MONitoring (DAMON). Contains files
+ for controlling DAMON. For more details on DAMON itself,
+ please refer to Documentation/admin-guide/mm/damon/index.rst.
+
+What: /sys/kernel/mm/damon/admin/
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Interface for privileged users of DAMON. Contains files for
+ controlling DAMON that aimed to be used by privileged users.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON worker thread (kdamond)
+ named '0' to 'N-1' under the kdamonds/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing 'on' or 'off' to this file makes the kdamond starts or
+ stops, respectively. Reading the file returns the keywords
+ based on the current status. Writing 'commit' to this file
+ makes the kdamond reads the user inputs in the sysfs files
+ except 'state' again. Writing 'commit_schemes_quota_goals' to
+ this file makes the kdamond reads the quota goal files again.
+ Writing 'update_schemes_stats' to the file updates contents of
+ schemes stats files of the kdamond. Writing
+ 'update_schemes_tried_regions' to the file updates contents of
+ 'tried_regions' directory of every scheme directory of this
+ kdamond. Writing 'update_schemes_tried_bytes' to the file
+ updates only '.../tried_regions/total_bytes' files of this
+ kdamond. Writing 'clear_schemes_tried_regions' to the file
+ removes contents of the 'tried_regions' directory. Writing
+ 'update_schemes_effective_quotas' to the file updates
+ '.../quotas/effective_bytes' files of this kdamond.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the pid of the kdamond if it is
+ running.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/refresh_ms
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the time interval for
+ automatic DAMON status file contents update. Writing '0'
+ disables the update. Reading this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON context named '0' to
+ 'N-1' under the contexts/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/avail_operations
+Date: Apr 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the available monitoring operations
+ sets on the currently running kernel.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a keyword for a monitoring operations set ('vaddr' for
+ virtual address spaces monitoring, 'fvaddr' for fixed virtual
+ address ranges monitoring, and 'paddr' for the physical address
+ space monitoring) to this file makes the context to use the
+ operations set. Reading the file returns the keyword for the
+ operations set the context is set to use.
+
+ Note that only the operations sets that listed in
+ 'avail_operations' file are valid inputs.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/addr_unit
+Date: Aug 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing an integer to this file sets the 'address unit'
+ parameter of the given operations set of the context. Reading
+ the file returns the last-written 'address unit' value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the sampling interval of the
+ DAMON context in microseconds as the value. Reading this file
+ returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the aggregation interval of
+ the DAMON context in microseconds as the value. Reading this
+ file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the update interval of the
+ DAMON context in microseconds as the value. Reading this file
+ returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/access_bp
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the monitoring intervals
+ auto-tuning target DAMON-observed access events ratio within
+ the given time interval (aggrs in same directory), in bp
+ (1/10,000). Reading this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/aggrs
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the time interval to achieve
+ the monitoring intervals auto-tuning target DAMON-observed
+ access events ratio (access_bp in same directory) within.
+ Reading this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/min_sample_us
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the minimum value of
+ auto-tuned sampling interval in microseconds. Reading this
+ file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/max_sample_us
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the maximum value of
+ auto-tuned sampling interval in microseconds. Reading this
+ file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
+
+WDate: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the minimum number of
+ monitoring regions of the DAMON context as the value. Reading
+ this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the maximum number of
+ monitoring regions of the DAMON context as the value. Reading
+ this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON target of the context
+ named '0' to 'N-1' under the contexts/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the pid of
+ the target process if the context is for virtual address spaces
+ monitoring, respectively.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting each DAMON target memory region of the
+ context named '0' to 'N-1' under the regions/ directory. In
+ case of the virtual address space monitoring, DAMON
+ automatically sets the target memory region based on the target
+ processes' mappings.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the start
+ address of the monitoring region.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the end
+ address of the monitoring region.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON-based operation scheme
+ of the context named '0' to 'N-1' under the schemes/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the action
+ of the scheme.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/target_nid
+Date: Jun 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Action's target NUMA node id. Supported by only relevant
+ actions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/apply_interval_us
+Date: Sep 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the action apply interval of
+ the scheme in microseconds. Reading this file returns the
+ value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the minimum
+ size of the scheme's target regions in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ size of the scheme's target regions in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the manimum
+ 'nr_accesses' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ 'nr_accesses' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the minimum
+ 'age' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ 'age' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the time
+ quota of the scheme in milliseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the size
+ quota of the scheme in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/effective_bytes
+Date: Feb 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading from this file gets the effective size quota of the
+ scheme in bytes, which adjusted for the time quota and goals.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the quotas
+ charge reset interval of the scheme in milliseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/nr_goals
+Date: Nov 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting automatic tuning of the scheme's
+ aggressiveness named '0' to 'N-1' under the goals/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_metric
+Date: Feb 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the quota
+ auto-tuning goal metric.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_value
+Date: Nov 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the target
+ value of the goal metric.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/current_value
+Date: Nov 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the current
+ value of the goal metric.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/nid
+Date: Apr 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the nid
+ parameter of the goal.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for 'size' in
+ permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for
+ 'nr_accesses' in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for 'age' in
+ permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the metric
+ of the watermarks for the scheme. The writable/readable
+ keywords for this file are 'none' for disabling the watermarks
+ feature, or 'free_mem_rate' for the system's global free memory
+ rate in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the metric
+ check interval of the watermarks for the scheme in
+ microseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the high
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the mid
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the low
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/nr_filters
+Date: Dec 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting filters of the scheme named '0' to
+ 'N-1' under the filters/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/type
+Date: Dec 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the type of
+ the memory of the interest. 'anon' for anonymous pages,
+ 'memcg' for specific memory cgroup, 'young' for young pages,
+ 'addr' for address range (an open-ended interval), or 'target'
+ for DAMON monitoring target can be written and read.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/memcg_path
+Date: Dec 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'memcg' is written to the 'type' file, writing to and
+ reading from this file sets and gets the path to the memory
+ cgroup of the interest.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/addr_start
+Date: Jul 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'addr' is written to the 'type' file, writing to or reading
+ from this file sets or gets the start address of the address
+ range for the filter.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/addr_end
+Date: Jul 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'addr' is written to the 'type' file, writing to or reading
+ from this file sets or gets the end address of the address
+ range for the filter.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/min
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'hugepage_size' is written to the 'type' file, writing to
+ or reading from this file sets or gets the minimum size of the
+ hugepage for the filter.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/max
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'hugepage_size' is written to the 'type' file, writing to
+ or reading from this file sets or gets the maximum size of the
+ hugepage for the filter.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/target_idx
+Date: Dec 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'target' is written to the 'type' file, writing to or
+ reading from this file sets or gets the index of the DAMON
+ monitoring target of the interest.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/matching
+Date: Dec 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing 'Y' or 'N' to this file sets whether the filter is for
+ the memory of the 'type', or all except the 'type'.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/allow
+Date: Jan 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing 'Y' or 'N' to this file sets whether to allow or reject
+ applying the scheme's action to the memory that satisfies the
+ 'type' and the 'matching' of the directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/core_filters
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Directory for DAMON core layer-handled DAMOS filters. Files
+ under this directory works same to those of
+ /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters
+ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/ops_filters
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Directory for DAMON operations set layer-handled DAMOS filters.
+ Files under this directory works same to those of
+ /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters
+ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/nr_dests
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting action destinations of the scheme named
+ '0' to 'N-1' under the dests/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/id
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the id of
+ the DAMOS action destination. For DAMOS_MIGRATE_{HOT,COLD}
+ actions, the destination node's node id can be written and
+ read.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/weight
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the weight
+ of the DAMOS action destination to select as the destination of
+ each action among the destinations.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of regions that the action
+ of the scheme has tried to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of regions that the
+ action of the scheme has tried to be applied in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of regions that the action
+ of the scheme has successfully applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of regions that the
+ action of the scheme has successfully applied in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_ops_filter_passed
+Date: Dec 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of memory that passed
+ DAMON operations layer-handled filters of the scheme in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of the exceed events of
+ the scheme's quotas.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/total_bytes
+Date: Jul 2023
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total amount of memory that
+ corresponding DAMON-based Operation Scheme's action has tried
+ to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/start
+Date: Oct 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the start address of a memory region
+ that corresponding DAMON-based Operation Scheme's action has
+ tried to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/end
+Date: Oct 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the end address of a memory region
+ that corresponding DAMON-based Operation Scheme's action has
+ tried to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/nr_accesses
+Date: Oct 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the 'nr_accesses' of a memory region
+ that corresponding DAMON-based Operation Scheme's action has
+ tried to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/age
+Date: Oct 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the 'age' of a memory region that
+ corresponding DAMON-based Operation Scheme's action has tried
+ to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/sz_filter_passed
+Date: Dec 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the size of the memory in the region
+ that passed DAMON operations layer-handled filters of the
+ scheme in bytes.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
index fdaa2162fae1..294387e2c7fb 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -7,9 +7,11 @@ Description:
of the hugepages supported by the kernel/CPU combination.
Under these directories are a number of files:
- nr_hugepages
- nr_overcommit_hugepages
- free_hugepages
- surplus_hugepages
- resv_hugepages
+
+ - nr_hugepages
+ - nr_overcommit_hugepages
+ - free_hugepages
+ - surplus_hugepages
+ - resv_hugepages
+
See Documentation/admin-guide/mm/hugetlbpage.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
index dfc13244cda3..6041a025b65a 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
@@ -34,13 +34,14 @@ Description: Kernel Samepage Merging daemon sysfs interface
in a tree.
run: write 0 to disable ksm, read 0 while ksm is disabled.
- write 1 to run ksm, read 1 while ksm is running.
- write 2 to disable ksm and unmerge all its pages.
+
+ - write 1 to run ksm, read 1 while ksm is running.
+ - write 2 to disable ksm and unmerge all its pages.
sleep_millisecs: how many milliseconds ksm should sleep between
scans.
- See Documentation/vm/ksm.rst for more information.
+ See Documentation/mm/ksm.rst for more information.
What: /sys/kernel/mm/ksm/merge_across_nodes
Date: January 2013
@@ -50,3 +51,11 @@ Description: Control merging pages across different NUMA nodes.
When it is set to 0 only pages from the same node are merged,
otherwise pages from all nodes can be merged together (default).
+
+What: /sys/kernel/mm/ksm/general_profit
+Date: April 2023
+KernelVersion: 6.4
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Measure how effective KSM is.
+ general_profit: how effective is KSM. The formula for the
+ calculation is in Documentation/admin-guide/mm/ksm.rst.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers b/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers
new file mode 100644
index 000000000000..721a05b90109
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers
@@ -0,0 +1,25 @@
+What: /sys/devices/virtual/memory_tiering/
+Date: August 2022
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: A collection of all the memory tiers allocated.
+
+ Individual memory tier details are contained in subdirectories
+ named by the abstract distance of the memory tier.
+
+ /sys/devices/virtual/memory_tiering/memory_tierN/
+
+
+What: /sys/devices/virtual/memory_tiering/memory_tierN/
+ /sys/devices/virtual/memory_tiering/memory_tierN/nodelist
+Date: August 2022
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Directory with details of a specific memory tier
+
+ This is the directory containing information about a particular
+ memory tier, memtierN, where N is derived based on abstract distance.
+
+ A smaller value of N implies a higher (faster) memory tier in the
+ hierarchy.
+
+ nodelist: NUMA nodes that are part of this memory tier.
+
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy
new file mode 100644
index 000000000000..8ac327fd7fb6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy
@@ -0,0 +1,4 @@
+What: /sys/kernel/mm/mempolicy/
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for Mempolicy
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
new file mode 100644
index 000000000000..649c0e9b895c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
@@ -0,0 +1,54 @@
+What: /sys/kernel/mm/mempolicy/weighted_interleave/
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Configuration Interface for the Weighted Interleave policy
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/nodeN
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Weight configuration interface for nodeN
+
+ The interleave weight for a memory node (N). These weights are
+ utilized by tasks which have set their mempolicy to
+ MPOL_WEIGHTED_INTERLEAVE.
+
+ These weights only affect new allocations, and changes at runtime
+ will not cause migrations on already allocated pages.
+
+ The minimum weight for a node is always 1.
+
+ Minimum weight: 1
+ Maximum weight: 255
+
+ Writing invalid values (i.e. any values not in [1,255],
+ empty string, ...) will return -EINVAL.
+
+ Changing the weight to a valid value will automatically
+ switch the system to manual mode as well.
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/auto
+Date: May 2025
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Auto-weighting configuration interface
+
+ Configuration mode for weighted interleave. 'true' indicates
+ that the system is in auto mode, and a 'false' indicates that
+ the system is in manual mode.
+
+ In auto mode, all node weights are re-calculated and overwritten
+ (visible via the nodeN interfaces) whenever new bandwidth data
+ is made available during either boot or hotplug events.
+
+ In manual mode, node weights can only be updated by the user.
+ Note that nodes that are onlined with previously set weights
+ will reuse those weights. If they were not previously set or
+ are onlined with missing bandwidth data, the weights will use
+ a default weight of 1.
+
+ Writing any true value string (e.g. Y or 1) will enable auto
+ mode, while writing any false value string (e.g. N or 0) will
+ enable manual mode. All other strings are ignored and will
+ return -EINVAL.
+
+ Writing a new weight to a node directly via the nodeN interface
+ will also automatically switch the system to manual mode.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-numa b/Documentation/ABI/testing/sysfs-kernel-mm-numa
new file mode 100644
index 000000000000..90e375ff54cb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-numa
@@ -0,0 +1,28 @@
+What: /sys/kernel/mm/numa/
+Date: June 2021
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for NUMA
+
+What: /sys/kernel/mm/numa/demotion_enabled
+Date: June 2021
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Enable/disable demoting pages during reclaim
+
+ Page migration during reclaim is intended for systems
+ with tiered memory configurations. These systems have
+ multiple types of memory with varied performance
+ characteristics instead of plain NUMA systems where
+ the same kind of memory is found at varied distances.
+ Allowing page migration during reclaim enables these
+ systems to migrate pages from fast tiers to slow tiers
+ when the fast tier is under pressure. This migration
+ is performed before swap if an eligible numa node is
+ present in cpuset.mems for the cgroup (or if cpuset v1
+ is being used). If cpusets.mems changes at runtime, it
+ may move data to a NUMA node that does not fall into the
+ cpuset of the new cpusets.mems, which might be construed
+ to violate the guarantees of cpusets. Shared memory,
+ such as libraries, owned by another cgroup may still be
+ demoted and result in memory use on a node not present
+ in cpusets.mem. This should not be enabled on systems
+ which need strict cpuset location guarantees.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
new file mode 100644
index 000000000000..7bfbb9cc2c11
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
@@ -0,0 +1,18 @@
+What: /sys/kernel/mm/transparent_hugepage/
+Date: April 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description:
+ /sys/kernel/mm/transparent_hugepage/ contains a number of files and
+ subdirectories,
+
+ - defrag
+ - enabled
+ - hpage_pmd_size
+ - khugepaged
+ - shmem_enabled
+ - use_zero_page
+ - subdirectories of the form hugepages-<size>kB, where <size>
+ is the page size of the hugepages supported by the kernel/CPU
+ combination.
+
+ See Documentation/admin-guide/mm/transhuge.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count
new file mode 100644
index 000000000000..156cca9dbc96
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-oops_count
@@ -0,0 +1,6 @@
+What: /sys/kernel/oops_count
+Date: November 2022
+KernelVersion: 6.2.0
+Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+ Shows how many times the system has Oopsed since last boot.
diff --git a/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
new file mode 100644
index 000000000000..a4a97a7f4a4d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
@@ -0,0 +1,6 @@
+What: /sys/kernel/rcu_stall_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected an RCU stall since last boot.
diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot
new file mode 100644
index 000000000000..52571fd5ddba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-reboot
@@ -0,0 +1,40 @@
+What: /sys/kernel/reboot
+Date: November 2020
+KernelVersion: 5.11
+Contact: Matteo Croce <teknoraver@meta.com>
+Description: Interface to set the kernel reboot behavior, similarly to
+ what can be done via the reboot= cmdline option.
+ (see Documentation/admin-guide/kernel-parameters.txt)
+
+What: /sys/kernel/reboot/mode
+Date: November 2020
+KernelVersion: 5.11
+Contact: Matteo Croce <teknoraver@meta.com>
+Description: Reboot mode. Valid values are: cold warm hard soft gpio
+
+What: /sys/kernel/reboot/type
+Date: November 2020
+KernelVersion: 5.11
+Contact: Matteo Croce <teknoraver@meta.com>
+Description: Reboot type. Valid values are: bios acpi kbd triple efi pci
+
+What: /sys/kernel/reboot/cpu
+Date: November 2020
+KernelVersion: 5.11
+Contact: Matteo Croce <teknoraver@meta.com>
+Description: CPU number to use to reboot.
+
+What: /sys/kernel/reboot/force
+Date: November 2020
+KernelVersion: 5.11
+Contact: Matteo Croce <teknoraver@meta.com>
+Description: Don't wait for any other CPUs on reboot and
+ avoid anything that could hang.
+
+What: /sys/kernel/reboot/hw_protection
+Date: April 2025
+KernelVersion: 6.15
+Contact: Ahmad Fatoum <a.fatoum@pengutronix.de>
+Description: Hardware protection action taken on critical events like
+ overtemperature or imminent voltage loss.
+ Valid values are: reboot shutdown
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 29601d93a1c2..b26e4299f822 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -2,7 +2,7 @@ What: /sys/kernel/slab
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The /sys/kernel/slab directory contains a snapshot of the
internal state of the SLUB allocator for each cache. Certain
@@ -10,51 +10,52 @@ Description:
any cache it aliases, if any).
Users: kernel memory tuning tools
-What: /sys/kernel/slab/cache/aliases
+What: /sys/kernel/slab/<cache>/aliases
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The aliases file is read-only and specifies how many caches
have merged into this cache.
-What: /sys/kernel/slab/cache/align
+What: /sys/kernel/slab/<cache>/align
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The align file is read-only and specifies the cache's object
alignment in bytes.
-What: /sys/kernel/slab/cache/alloc_calls
+What: /sys/kernel/slab/<cache>/alloc_calls
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_calls file is read-only and lists the kernel code
locations from which allocations for this cache were performed.
The alloc_calls file only contains information if debugging is
- enabled for that cache (see Documentation/vm/slub.rst).
+ enabled for that cache (see
+ Documentation/admin-guide/mm/slab.rst).
-What: /sys/kernel/slab/cache/alloc_fastpath
+What: /sys/kernel/slab/<cache>/alloc_fastpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_fastpath file shows how many objects have been
allocated using the fast path. It can be written to clear the
current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/alloc_from_partial
+What: /sys/kernel/slab/<cache>/alloc_from_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_from_partial file shows how many times a cpu slab has
been full and it has been refilled by using a slab from the list
@@ -62,33 +63,33 @@ Description:
count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/alloc_refill
+What: /sys/kernel/slab/<cache>/alloc_refill
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_refill file shows how many times the per-cpu freelist
was empty but there were objects available as the result of
remote cpu frees. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/alloc_slab
+What: /sys/kernel/slab/<cache>/alloc_slab
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_slab file is shows how many times a new slab had to
be allocated from the page allocator. It can be written to
clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/alloc_slowpath
+What: /sys/kernel/slab/<cache>/alloc_slowpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_slowpath file shows how many objects have been
allocated using the slow path because of a refill or
@@ -96,30 +97,30 @@ Description:
clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/cache_dma
+What: /sys/kernel/slab/<cache>/cache_dma
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The cache_dma file is read-only and specifies whether objects
are from ZONE_DMA.
Available when CONFIG_ZONE_DMA is enabled.
-What: /sys/kernel/slab/cache/cpu_slabs
+What: /sys/kernel/slab/<cache>/cpu_slabs
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The cpu_slabs file is read-only and displays how many cpu slabs
are active and their NUMA locality.
-What: /sys/kernel/slab/cache/cpuslab_flush
+What: /sys/kernel/slab/<cache>/cpuslab_flush
Date: April 2009
KernelVersion: 2.6.31
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file cpuslab_flush shows how many times a cache's cpu slabs
have been flushed as the result of destroying or shrinking a
@@ -128,126 +129,126 @@ Description:
current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/ctor
+What: /sys/kernel/slab/<cache>/ctor
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The ctor file is read-only and specifies the cache's object
constructor function, which is invoked for each object when a
new slab is allocated.
-What: /sys/kernel/slab/cache/deactivate_empty
+What: /sys/kernel/slab/<cache>/deactivate_empty
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_empty file shows how many times an empty cpu slab
was deactivated. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/deactivate_full
+What: /sys/kernel/slab/<cache>/deactivate_full
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_full file shows how many times a full cpu slab
was deactivated. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/deactivate_remote_frees
+What: /sys/kernel/slab/<cache>/deactivate_remote_frees
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_remote_frees file shows how many times a cpu slab
has been deactivated and contained free objects that were freed
remotely. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/deactivate_to_head
+What: /sys/kernel/slab/<cache>/deactivate_to_head
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_to_head file shows how many times a partial cpu
slab was deactivated and added to the head of its node's partial
list. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/deactivate_to_tail
+What: /sys/kernel/slab/<cache>/deactivate_to_tail
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_to_tail file shows how many times a partial cpu
slab was deactivated and added to the tail of its node's partial
list. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/destroy_by_rcu
+What: /sys/kernel/slab/<cache>/destroy_by_rcu
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The destroy_by_rcu file is read-only and specifies whether
slabs (not objects) are freed by rcu.
-What: /sys/kernel/slab/cache/free_add_partial
+What: /sys/kernel/slab/<cache>/free_add_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_add_partial file shows how many times an object has
been freed in a full slab so that it had to added to its node's
partial list. It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/free_calls
+What: /sys/kernel/slab/<cache>/free_calls
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_calls file is read-only and lists the locations of
object frees if slab debugging is enabled (see
- Documentation/vm/slub.rst).
+ Documentation/admin-guide/mm/slab.rst).
-What: /sys/kernel/slab/cache/free_fastpath
+What: /sys/kernel/slab/<cache>/free_fastpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_fastpath file shows how many objects have been freed
using the fast path because it was an object from the cpu slab.
It can be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/free_frozen
+What: /sys/kernel/slab/<cache>/free_frozen
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_frozen file shows how many objects have been freed to
a frozen slab (i.e. a remote cpu slab). It can be written to
clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/free_remove_partial
+What: /sys/kernel/slab/<cache>/free_remove_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_remove_partial file shows how many times an object has
been freed to a now-empty slab so that it had to be removed from
@@ -255,38 +256,38 @@ Description:
count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/free_slab
+What: /sys/kernel/slab/<cache>/free_slab
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_slab file shows how many times an empty slab has been
freed back to the page allocator. It can be written to clear
the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/free_slowpath
+What: /sys/kernel/slab/<cache>/free_slowpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_slowpath file shows how many objects have been freed
using the slow path (i.e. to a full or partial slab). It can
be written to clear the current count.
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/hwcache_align
+What: /sys/kernel/slab/<cache>/hwcache_align
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The hwcache_align file is read-only and specifies whether
objects are aligned on cachelines.
-What: /sys/kernel/slab/cache/min_partial
+What: /sys/kernel/slab/<cache>/min_partial
Date: February 2009
KernelVersion: 2.6.30
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
@@ -297,194 +298,223 @@ Description:
allocating new slabs. Such slabs may be reclaimed by utilizing
the shrink file.
-What: /sys/kernel/slab/cache/object_size
+What: /sys/kernel/slab/<cache>/object_size
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The object_size file is read-only and specifies the cache's
object size.
-What: /sys/kernel/slab/cache/objects
+What: /sys/kernel/slab/<cache>/objects
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The objects file is read-only and displays how many objects are
active and from which nodes they are from.
-What: /sys/kernel/slab/cache/objects_partial
+What: /sys/kernel/slab/<cache>/objects_partial
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The objects_partial file is read-only and displays how many
objects are on partial slabs and from which nodes they are
from.
-What: /sys/kernel/slab/cache/objs_per_slab
+What: /sys/kernel/slab/<cache>/objs_per_slab
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file objs_per_slab is read-only and specifies how many
objects may be allocated from a single slab of the order
- specified in /sys/kernel/slab/cache/order.
+ specified in /sys/kernel/slab/<cache>/order.
-What: /sys/kernel/slab/cache/order
+What: /sys/kernel/slab/<cache>/order
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The order file specifies the page order at which new slabs are
allocated. It is writable and can be changed to increase the
number of objects per slab. If a slab cannot be allocated
because of fragmentation, SLUB will retry with the minimum order
possible depending on its characteristics.
+
When debug_guardpage_minorder=N (N > 0) parameter is specified
(see Documentation/admin-guide/kernel-parameters.rst), the minimum possible
order is used and this sysfs entry can not be used to change
the order at run time.
-What: /sys/kernel/slab/cache/order_fallback
+What: /sys/kernel/slab/<cache>/order_fallback
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The order_fallback file shows how many times an allocation of a
new slab has not been possible at the cache's order and instead
fallen back to its minimum possible order. It can be written to
clear the current count.
+
Available when CONFIG_SLUB_STATS is enabled.
-What: /sys/kernel/slab/cache/partial
+What: /sys/kernel/slab/<cache>/partial
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The partial file is read-only and displays how long many
partial slabs there are and how long each node's list is.
-What: /sys/kernel/slab/cache/poison
+What: /sys/kernel/slab/<cache>/poison
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The poison file specifies whether objects should be poisoned
when a new slab is allocated.
-What: /sys/kernel/slab/cache/reclaim_account
+What: /sys/kernel/slab/<cache>/reclaim_account
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The reclaim_account file specifies whether the cache's objects
are reclaimable (and grouped by their mobility).
-What: /sys/kernel/slab/cache/red_zone
+What: /sys/kernel/slab/<cache>/red_zone
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The red_zone file specifies whether the cache's objects are red
zoned.
-What: /sys/kernel/slab/cache/remote_node_defrag_ratio
+What: /sys/kernel/slab/<cache>/remote_node_defrag_ratio
Date: January 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file remote_node_defrag_ratio specifies the percentage of
times SLUB will attempt to refill the cpu slab with a partial
slab from a remote node as opposed to allocating a new slab on
the local node. This reduces the amount of wasted memory over
the entire system but can be expensive.
+
Available when CONFIG_NUMA is enabled.
-What: /sys/kernel/slab/cache/sanity_checks
+What: /sys/kernel/slab/<cache>/sanity_checks
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The sanity_checks file specifies whether expensive checks
should be performed on free and, at minimum, enables double free
checks. Caches that enable sanity_checks cannot be merged with
caches that do not.
-What: /sys/kernel/slab/cache/shrink
+What: /sys/kernel/slab/<cache>/shrink
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
- The shrink file is written when memory should be reclaimed from
- a cache. Empty partial slabs are freed and the partial list is
- sorted so the slabs with the fewest available objects are used
- first.
+ The shrink file is used to reclaim unused slab cache
+ memory from a cache. Empty per-cpu or partial slabs
+ are freed and the partial list is sorted so the slabs
+ with the fewest available objects are used first.
+ It only accepts a value of "1" on write for shrinking
+ the cache. Other input values are considered invalid.
+ Shrinking slab caches might be expensive and can
+ adversely impact other running applications. So it
+ should be used with care.
-What: /sys/kernel/slab/cache/slab_size
+What: /sys/kernel/slab/<cache>/slab_size
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The slab_size file is read-only and specifies the object size
with metadata (debugging information and alignment) in bytes.
-What: /sys/kernel/slab/cache/slabs
+What: /sys/kernel/slab/<cache>/slabs
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The slabs file is read-only and displays how long many slabs
there are (both cpu and partial) and from which nodes they are
from.
-What: /sys/kernel/slab/cache/store_user
+What: /sys/kernel/slab/<cache>/store_user
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The store_user file specifies whether the location of
allocation or free should be tracked for a cache.
-What: /sys/kernel/slab/cache/total_objects
+What: /sys/kernel/slab/<cache>/total_objects
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The total_objects file is read-only and displays how many total
objects a cache has and from which nodes they are from.
-What: /sys/kernel/slab/cache/trace
+What: /sys/kernel/slab/<cache>/trace
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The trace file specifies whether object allocations and frees
should be traced.
-What: /sys/kernel/slab/cache/validate
+What: /sys/kernel/slab/<cache>/validate
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
Writing to the validate file causes SLUB to traverse all of its
cache's objects and check the validity of metadata.
+
+What: /sys/kernel/slab/<cache>/usersize
+Date: Jun 2017
+Contact: David Windsor <dave@nullcore.net>
+Description:
+ The usersize file is read-only and contains the usercopy
+ region size.
+
+What: /sys/kernel/slab/<cache>/slabs_cpu_partial
+Date: Aug 2011
+Contact: Christoph Lameter <cl@gentwo.org>
+Description:
+ This read-only file shows the number of partialli allocated
+ frozen slabs.
+
+What: /sys/kernel/slab/<cache>/cpu_partial
+Date: Aug 2011
+Contact: Christoph Lameter <cl@gentwo.org>
+Description:
+ This read-only file shows the number of per cpu partial
+ pages to keep around.
diff --git a/Documentation/ABI/testing/sysfs-kernel-softlockup_count b/Documentation/ABI/testing/sysfs-kernel-softlockup_count
new file mode 100644
index 000000000000..337ff5531b5f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-softlockup_count
@@ -0,0 +1,7 @@
+What: /sys/kernel/softlockup_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected a soft lockup since last boot.
+ Available only if CONFIG_SOFTLOCKUP_DETECTOR is enabled.
diff --git a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
index 7bd81168e063..1f1087a5f075 100644
--- a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
+++ b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
@@ -4,7 +4,7 @@ KernelVersion: 2.6.24
Contact: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Kexec Mailing List <kexec@lists.infradead.org>
Vivek Goyal <vgoyal@redhat.com>
-Description
+Description:
Shows physical address and size of vmcoreinfo ELF note.
First value contains physical address of note in hex and
second value contains the size of note in hex. This ELF
diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count
new file mode 100644
index 000000000000..90a029813717
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-warn_count
@@ -0,0 +1,6 @@
+What: /sys/kernel/warn_count
+Date: November 2022
+KernelVersion: 6.2.0
+Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+ Shows how many times the system has Warned since last boot.
diff --git a/Documentation/ABI/testing/sysfs-mce b/Documentation/ABI/testing/sysfs-mce
new file mode 100644
index 000000000000..83172f50e27c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-mce
@@ -0,0 +1,97 @@
+What: /sys/devices/system/machinecheck/machinecheckX/
+Contact: Andi Kleen <ak@linux.intel.com>
+Date: Feb, 2007
+Description:
+ (X = CPU number)
+
+ Machine checks report internal hardware error conditions
+ detected by the CPU. Uncorrected errors typically cause a
+ machine check (often with panic), corrected ones cause a
+ machine check log entry.
+
+ For more details about the x86 machine check architecture
+ see the Intel and AMD architecture manuals from their
+ developer websites.
+
+ For more details about the architecture
+ see http://one.firstfloor.org/~andi/mce.pdf
+
+ Each CPU has its own directory.
+
+What: /sys/devices/system/machinecheck/machinecheckX/bank<Y>
+Contact: Andi Kleen <ak@linux.intel.com>
+Date: Feb, 2007
+Description:
+ (Y bank number)
+
+ 64bit Hex bitmask enabling/disabling specific subevents for
+ bank Y.
+
+ When a bit in the bitmask is zero then the respective
+ subevent will not be reported.
+
+ By default all events are enabled.
+
+ Note that BIOS maintain another mask to disable specific events
+ per bank. This is not visible here
+
+What: /sys/devices/system/machinecheck/machinecheckX/check_interval
+Contact: Andi Kleen <ak@linux.intel.com>
+Date: Feb, 2007
+Description:
+ The entries appear for each CPU, but they are truly shared
+ between all CPUs.
+
+ How often to poll for corrected machine check errors, in
+ seconds (Note output is hexadecimal). Default 5 minutes.
+ When the poller finds MCEs it triggers an exponential speedup
+ (poll more often) on the polling interval. When the poller
+ stops finding MCEs, it triggers an exponential backoff
+ (poll less often) on the polling interval. The check_interval
+ variable is both the initial and maximum polling interval.
+ 0 means no polling for corrected machine check errors
+ (but some corrected errors might be still reported
+ in other ways)
+
+What: /sys/devices/system/machinecheck/machinecheckX/trigger
+Contact: Andi Kleen <ak@linux.intel.com>
+Date: Feb, 2007
+Description:
+ The entries appear for each CPU, but they are truly shared
+ between all CPUs.
+
+ Program to run when a machine check event is detected.
+ This is an alternative to running mcelog regularly from cron
+ and allows to detect events faster.
+
+What: /sys/devices/system/machinecheck/machinecheckX/monarch_timeout
+Contact: Andi Kleen <ak@linux.intel.com>
+Date: Feb, 2007
+Description:
+ How long to wait for the other CPUs to machine check too on a
+ exception. 0 to disable waiting for other CPUs.
+
+ Unit: us
+
+What: /sys/devices/system/machinecheck/machinecheckX/ignore_ce
+Contact: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Date: Jun 2009
+Description:
+ Disables polling and CMCI for corrected errors.
+ All corrected events are not cleared and kept in bank MSRs.
+
+What: /sys/devices/system/machinecheck/machinecheckX/dont_log_ce
+Contact: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Date: Jun 2009
+Description:
+ Disables logging for corrected errors.
+ All reported corrected errors will be cleared silently.
+
+ This option will be useful if you never care about corrected
+ errors.
+
+What: /sys/devices/system/machinecheck/machinecheckX/cmci_disabled
+Contact: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+Date: Jun 2009
+Description:
+ Disables the CMCI feature.
diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline
index e14703f12fdf..00f4e35f916f 100644
--- a/Documentation/ABI/testing/sysfs-memory-page-offline
+++ b/Documentation/ABI/testing/sysfs-memory-page-offline
@@ -10,7 +10,7 @@ Description:
dropping it if possible. The kernel will then be placed
on the bad page list and never be reused.
- The offlining is done in kernel specific granuality.
+ The offlining is done in kernel specific granularity.
Normally it's the base page size of the kernel, but
this might change.
@@ -35,7 +35,7 @@ Description:
to access this page assuming it's poisoned by the
hardware.
- The offlining is done in kernel specific granuality.
+ The offlining is done in kernel specific granularity.
Normally it's the base page size of the kernel, but
this might change.
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index 0aac02e7fb0e..62addab47d0c 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -17,14 +17,15 @@ KernelVersion: 3.1
Contact: Kirill Smelkov <kirr@mns.spb.ru>
Description: Maximum time allowed for periodic transfers per microframe (μs)
- [ USB 2.0 sets maximum allowed time for periodic transfers per
+ Note:
+ USB 2.0 sets maximum allowed time for periodic transfers per
microframe to be 80%, that is 100 microseconds out of 125
microseconds (full microframe).
However there are cases, when 80% max isochronous bandwidth is
too limiting. For example two video streams could require 110
microseconds of isochronous bandwidth per microframe to work
- together. ]
+ together.
Through this setting it is possible to raise the limit so that
the host controller would allow allocating more than 100
@@ -36,17 +37,37 @@ Description: Maximum time allowed for periodic transfers per microframe (μs)
What: /sys/module/*/{coresize,initsize}
Date: Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
Contact: Kay Sievers <kay.sievers@vrfy.org>
Description: Module size in bytes.
+What: /sys/module/*/initstate
+Date: Nov 2006
+KernelVersion: 2.6.19
+Contact: Kay Sievers <kay.sievers@vrfy.org>
+Description: Show the initialization state(live, coming, going) of
+ the module.
+
What: /sys/module/*/taint
Date: Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
Contact: Kay Sievers <kay.sievers@vrfy.org>
Description: Module taint flags:
- P - proprietary module
- O - out-of-tree module
- F - force-loaded module
- C - staging driver module
- E - unsigned module
+ == =====================
+ P proprietary module
+ O out-of-tree module
+ F force-loaded module
+ C staging driver module
+ E unsigned module
+ == =====================
+
+What: /sys/module/grant_table/parameters/free_per_iteration
+Date: July 2023
+KernelVersion: 6.5 but backported to all supported stable branches
+Contact: Xen developer discussion <xen-devel@lists.xenproject.org>
+Description: Read and write number of grant entries to attempt to free per iteration.
+
+ Note: Future versions of Xen and Linux may provide a better
+ interface for controlling the rate of deferred grant reclaim
+ or may not need it at all.
+Users: Qubes OS (https://www.qubes-os.org)
diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells
new file mode 100644
index 000000000000..c7c9444f92a8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-nvmem-cells
@@ -0,0 +1,21 @@
+What: /sys/bus/nvmem/devices/.../cells/<cell-name>
+Date: May 2023
+KernelVersion: 6.5
+Contact: Miquel Raynal <miquel.raynal@bootlin.com>
+Description:
+ The "cells" folder contains one file per cell exposed by the
+ NVMEM device. The name of the file is: "<name>@<byte>,<bit>",
+ with <name> being the cell name and <where> its location in
+ the NVMEM device, in hexadecimal bytes and bits (without the
+ '0x' prefix, to mimic device tree node names). The length of
+ the file is the size of the cell (when known). The content of
+ the file is the binary content of the cell (may sometimes be
+ ASCII, likely without trailing character).
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled.
+
+ Example::
+
+ hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0
+ 00000000 54 4e 34 38 4d 2d 50 2d 44 4e |TN48M-P-DN|
+ 0000000a
diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2
index b7cc516a8a8a..494d7c1ac710 100644
--- a/Documentation/ABI/testing/sysfs-ocfs2
+++ b/Documentation/ABI/testing/sysfs-ocfs2
@@ -1,13 +1,13 @@
What: /sys/fs/ocfs2/
Date: April 2008
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description:
The /sys/fs/ocfs2 directory contains knobs used by the
ocfs2-tools to interact with the filesystem.
What: /sys/fs/ocfs2/max_locking_protocol
Date: April 2008
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description:
The /sys/fs/ocfs2/max_locking_protocol file displays version
of ocfs2 locking supported by the filesystem. This version
@@ -28,7 +28,7 @@ Description:
What: /sys/fs/ocfs2/loaded_cluster_plugins
Date: April 2008
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description:
The /sys/fs/ocfs2/loaded_cluster_plugins file describes
the available plugins to support ocfs2 cluster operation.
@@ -48,7 +48,7 @@ Description:
What: /sys/fs/ocfs2/active_cluster_plugin
Date: April 2008
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description:
The /sys/fs/ocfs2/active_cluster_plugin displays which
cluster plugin is currently in use by the filesystem.
@@ -65,7 +65,7 @@ Description:
What: /sys/fs/ocfs2/cluster_stack
Date: April 2008
-Contact: ocfs2-devel@oss.oracle.com
+Contact: ocfs2-devel@lists.linux.dev
Description:
The /sys/fs/ocfs2/cluster_stack file contains the name
of current ocfs2 cluster stack. This value is set by
@@ -86,4 +86,4 @@ Description:
stack return an error.
Users:
- ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>
+ ocfs2-tools <ocfs2-tools-devel@lists.linux.dev>
diff --git a/Documentation/ABI/testing/sysfs-platform-alienware-wmi b/Documentation/ABI/testing/sysfs-platform-alienware-wmi
new file mode 100644
index 000000000000..4877b3745f4e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-alienware-wmi
@@ -0,0 +1,14 @@
+What: /sys/class/hwmon/hwmonX/fanY_boost
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes fan boost control for Dell gaming laptops with
+ the AWCC WMI interface.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ Integer value in the range 0 to 255
+
+ RW
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
index cd9d667c3da2..c78d358dbdbe 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -4,13 +4,16 @@ KernelVersion: 2.6.20
Contact: "Corentin Chary" <corentincj@iksaif.net>
Description:
This file allows display switching. The value
- is composed by 4 bits and defined as follow:
- 4321
- |||`- LCD
- ||`-- CRT
- |`--- TV
- `---- DVI
- Ex: - 0 (0000b) means no display
+ is composed by 4 bits and defined as follow::
+
+ 4321
+ |||`- LCD
+ ||`-- CRT
+ |`--- TV
+ `---- DVI
+
+ Ex:
+ - 0 (0000b) means no display
- 3 (0011b) CRT+LCD.
What: /sys/devices/platform/asus_laptop/gps
@@ -28,10 +31,12 @@ Contact: "Corentin Chary" <corentincj@iksaif.net>
Description:
Some models like the W1N have a LED display that can be
used to display several items of information.
- To control the LED display, use the following :
+ To control the LED display, use the following::
+
echo 0x0T000DDD > /sys/devices/platform/asus_laptop/
+
where T control the 3 letters display, and DDD the 3 digits display.
- The DDD table can be found in Documentation/laptops/asus-laptop.txt
+ The DDD table can be found in Documentation/admin-guide/laptops/asus-laptop.rst
What: /sys/devices/platform/asus_laptop/bluetooth
Date: January 2007
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
index 019e1e29370e..28144371a0f1 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -5,6 +5,7 @@ Contact: "Corentin Chary" <corentincj@iksaif.net>
Description:
Change CPU clock configuration (write-only).
There are three available clock configuration:
+
* 0 -> Super Performance Mode
* 1 -> High Performance Mode
* 2 -> Power Saving Mode
@@ -36,3 +37,178 @@ KernelVersion: 3.5
Contact: "AceLan Kao" <acelan.kao@canonical.com>
Description:
Resume on lid open. 1 means on, 0 means off.
+
+What: /sys/devices/platform/<platform>/fan_boost_mode
+Date: Sep 2019
+KernelVersion: 5.3
+Contact: "Yurii Pavlovskyi" <yurii.pavlovskyi@gmail.com>
+Description:
+ Fan boost mode:
+ * 0 - normal,
+ * 1 - overboost,
+ * 2 - silent
+
+What: /sys/devices/platform/<platform>/throttle_thermal_policy
+Date: Dec 2019
+KernelVersion: 5.6
+Contact: "Leonid Maksymchuk" <leonmaxx@gmail.com>
+Description:
+ Throttle thermal policy mode:
+ * 0 - default,
+ * 1 - overboost,
+ * 2 - silent
+
+What: /sys/devices/platform/<platform>/gpu_mux_mode
+Date: Aug 2022
+KernelVersion: 6.1
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Switch the GPU hardware MUX mode. Laptops with this feature can
+ can be toggled to boot with only the dGPU (discrete mode) or in
+ standard Optimus/Hybrid mode. On switch a reboot is required:
+
+ * 0 - Discrete GPU,
+ * 1 - Optimus/Hybrid,
+
+What: /sys/devices/platform/<platform>/dgpu_disable
+Date: Aug 2022
+KernelVersion: 5.17
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Disable discrete GPU:
+ * 0 - Enable dGPU,
+ * 1 - Disable dGPU
+
+What: /sys/devices/platform/<platform>/egpu_enable
+Date: Aug 2022
+KernelVersion: 5.17
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Enable the external GPU paired with ROG X-Flow laptops.
+ Toggling this setting will also trigger ACPI to disable the dGPU:
+
+ * 0 - Disable,
+ * 1 - Enable
+
+What: /sys/devices/platform/<platform>/panel_od
+Date: Aug 2022
+KernelVersion: 5.17
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Enable an LCD response-time boost to reduce or remove ghosting:
+ * 0 - Disable,
+ * 1 - Enable
+
+What: /sys/devices/platform/<platform>/charge_mode
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Get the current charging mode being used:
+ * 1 - Barrel connected charger,
+ * 2 - USB-C charging
+ * 3 - Both connected, barrel used for charging
+
+What: /sys/devices/platform/<platform>/egpu_connected
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Show if the egpu (XG Mobile) is correctly connected:
+ * 0 - False,
+ * 1 - True
+
+What: /sys/devices/platform/<platform>/mini_led_mode
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Change the mini-LED mode:
+ * 0 - Single-zone,
+ * 1 - Multi-zone
+ * 2 - Multi-zone strong (available on newer generation mini-led)
+
+What: /sys/devices/platform/<platform>/available_mini_led_mode
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ List the available mini-led modes.
+
+What: /sys/devices/platform/<platform>/ppt_pl1_spl
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the Package Power Target total of CPU: PL1 on Intel, SPL on AMD.
+ Shown on Intel+Nvidia or AMD+Nvidia based systems:
+
+ * min=5, max=250
+
+What: /sys/devices/platform/<platform>/ppt_pl2_sppt
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the Slow Package Power Tracking Limit of CPU: PL2 on Intel, SPPT,
+ on AMD. Shown on Intel+Nvidia or AMD+Nvidia based systems:
+
+ * min=5, max=250
+
+What: /sys/devices/platform/<platform>/ppt_fppt
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the Fast Package Power Tracking Limit of CPU. AMD+Nvidia only:
+ * min=5, max=250
+
+What: /sys/devices/platform/<platform>/ppt_apu_sppt
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the APU SPPT limit. Shown on full AMD systems only:
+ * min=5, max=130
+
+What: /sys/devices/platform/<platform>/ppt_platform_sppt
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the platform SPPT limit. Shown on full AMD systems only:
+ * min=5, max=130
+
+What: /sys/devices/platform/<platform>/nv_dynamic_boost
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the dynamic boost limit of the Nvidia dGPU:
+ * min=5, max=25
+
+What: /sys/devices/platform/<platform>/nv_temp_target
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set the target temperature limit of the Nvidia dGPU:
+ * min=75, max=87
+
+What: /sys/devices/platform/<platform>/boot_sound
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set if the BIOS POST sound is played on boot.
+ * 0 - False,
+ * 1 - True
+
+What: /sys/devices/platform/<platform>/mcu_powersave
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ Set if the MCU can go in to low-power mode on system sleep
+ * 0 - False,
+ * 1 - True
diff --git a/Documentation/ABI/testing/sysfs-platform-at91 b/Documentation/ABI/testing/sysfs-platform-at91
index 4cc6a865ae66..b146be74b8e0 100644
--- a/Documentation/ABI/testing/sysfs-platform-at91
+++ b/Documentation/ABI/testing/sysfs-platform-at91
@@ -18,8 +18,10 @@ Description:
In order to use an extended can_id add the
CAN_EFF_FLAG (0x80000000U) to the can_id. Example:
- - standard id 0x7ff:
- echo 0x7ff > /sys/class/net/can0/mb0_id
+ - standard id 0x7ff::
- - extended id 0x1fffffff:
- echo 0x9fffffff > /sys/class/net/can0/mb0_id
+ echo 0x7ff > /sys/class/net/can0/mb0_id
+
+ - extended id 0x1fffffff::
+
+ echo 0x9fffffff > /sys/class/net/can0/mb0_id
diff --git a/Documentation/ABI/testing/sysfs-platform-brcmstb-memc b/Documentation/ABI/testing/sysfs-platform-brcmstb-memc
new file mode 100644
index 000000000000..2f2b750ac2fd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-brcmstb-memc
@@ -0,0 +1,15 @@
+What: /sys/bus/platform/devices/*/srpd
+Date: July 2022
+KernelVersion: 5.21
+Contact: Florian Fainelli <f.fainelli@gmail.com>
+Description:
+ Self Refresh Power Down (SRPD) inactivity timeout counted in
+ internal DDR controller clock cycles. Possible values range
+ from 0 (disable inactivity timeout) to 65535 (0xffff).
+
+What: /sys/bus/platform/devices/*/frequency
+Date: July 2022
+KernelVersion: 5.21
+Contact: Florian Fainelli <f.fainelli@gmail.com>
+Description:
+ DDR PHY frequency in Hz.
diff --git a/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg b/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg
index 151c59578db4..f58cfb06b160 100644
--- a/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg
+++ b/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg
@@ -1,6 +1,6 @@
What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
Date: Feb 2014
-Contact: Li Jun <b47624@freescale.com>
+Contact: Li Jun <jun.li@nxp.com>
Description:
Can be set and read.
Set a_bus_req(A-device bus request) input to be 1 if
@@ -17,7 +17,7 @@ Description:
What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
Date: Feb 2014
-Contact: Li Jun <b47624@freescale.com>
+Contact: Li Jun <jun.li@nxp.com>
Description:
Can be set and read
The a_bus_drop(A-device bus drop) input is 1 when the
@@ -32,7 +32,7 @@ Description:
What: /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
Date: Feb 2014
-Contact: Li Jun <b47624@freescale.com>
+Contact: Li Jun <jun.li@nxp.com>
Description:
Can be set and read.
The b_bus_req(B-device bus request) input is 1 during the time
@@ -47,7 +47,7 @@ Description:
What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_clr_err
Date: Feb 2014
-Contact: Li Jun <b47624@freescale.com>
+Contact: Li Jun <jun.li@nxp.com>
Description:
Only can be set.
The a_clr_err(A-device Vbus error clear) input is used to clear
diff --git a/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2
index b0f4684a83fe..b9f7d924f28a 100644
--- a/Documentation/ABI/testing/sysfs-platform-chipidea-usb2
+++ b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2
@@ -2,8 +2,8 @@ What: /sys/bus/platform/devices/ci_hdrc.0/role
Date: Mar 2017
Contact: Peter Chen <peter.chen@nxp.com>
Description:
- It returns string "gadget" or "host" when read it, it indicates
- current controller role.
+ When read, it returns string "gadget" or "host", indicating
+ the current controller role.
- It will do role switch when write "gadget" or "host" to it.
+ It will do role switch when "gadget" or "host" is written to it.
Only controller at dual-role configuration supports writing.
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
index 8c6a0b8e1131..701529653283 100644
--- a/Documentation/ABI/testing/sysfs-platform-dell-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@@ -2,7 +2,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_enabled
Date: December 2014
KernelVersion: 3.19
Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
- Pali Rohár <pali.rohar@gmail.com>
+ Pali Rohár <pali@kernel.org>
Description:
This file allows to control the automatic keyboard
illumination mode on some systems that have an ambient
@@ -13,16 +13,16 @@ What: /sys/class/leds/dell::kbd_backlight/als_setting
Date: December 2014
KernelVersion: 3.19
Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
- Pali Rohár <pali.rohar@gmail.com>
+ Pali Rohár <pali@kernel.org>
Description:
- This file allows to specifiy the on/off threshold value,
+ This file allows to specify the on/off threshold value,
as reported by the ambient light sensor.
What: /sys/class/leds/dell::kbd_backlight/start_triggers
Date: December 2014
KernelVersion: 3.19
Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
- Pali Rohár <pali.rohar@gmail.com>
+ Pali Rohár <pali@kernel.org>
Description:
This file allows to control the input triggers that
turn on the keyboard backlight illumination that is
@@ -34,9 +34,12 @@ Description:
this file. To disable a trigger, write its name preceded
by '-' instead.
- For example, to enable the keyboard as trigger run:
+ For example, to enable the keyboard as trigger run::
+
echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
- To disable it:
+
+ To disable it::
+
echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
Note that not all the available triggers can be configured.
@@ -45,7 +48,7 @@ What: /sys/class/leds/dell::kbd_backlight/stop_timeout
Date: December 2014
KernelVersion: 3.19
Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
- Pali Rohár <pali.rohar@gmail.com>
+ Pali Rohár <pali@kernel.org>
Description:
This file allows to specify the interval after which the
keyboard illumination is disabled because of inactivity.
@@ -57,7 +60,8 @@ Description:
with any the above units. If no unit is specified, the value
is assumed to be expressed in seconds.
- For example, to set the timeout to 10 minutes run:
+ For example, to set the timeout to 10 minutes run::
+
echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
Note that when this file is read, the returned value might be
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
new file mode 100644
index 000000000000..b4da7b2ea0ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
@@ -0,0 +1,71 @@
+What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_supported_type
+Date: Apr 2021
+KernelVersion: 5.13
+Contact: "<perry.yuan@dell.com>"
+Description:
+ Display which dell hardware level privacy devices are supported
+ “Dell Privacy” is a set of HW, FW, and SW features to enhance
+ Dell’s commitment to platform privacy for MIC, Camera, and
+ ePrivacy screens.
+ The supported hardware privacy devices are:
+
+ Attributes:
+ Microphone Mute:
+ Identifies the local microphone can be muted by hardware, no applications
+ is available to capture system mic sound
+
+ Camera Shutter:
+ Identifies camera shutter controlled by hardware, which is a micromechanical
+ shutter assembly that is built onto the camera module to block capturing images
+ from outside the laptop
+
+ Values:
+
+ supported:
+ The privacy device is supported by this system
+
+ unsupported:
+ The privacy device is not supported on this system
+
+ For example to check which privacy devices are supported::
+
+ # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_supported_type
+ [Microphone Mute] [supported]
+ [Camera Shutter] [supported]
+ [ePrivacy Screen] [unsupported]
+
+What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_current_state
+Date: Apr 2021
+KernelVersion: 5.13
+Contact: "<perry.yuan@dell.com>"
+Description:
+ Allow user space to check current dell privacy device state.
+ Describes the Device State class exposed by BIOS which can be
+ consumed by various applications interested in knowing the Privacy
+ feature capabilities
+
+ Attributes:
+ Microphone:
+ Identifies the local microphone can be muted by hardware, no applications
+ is available to capture system mic sound
+
+ Camera Shutter:
+ Identifies camera shutter controlled by hardware, which is a micromechanical
+ shutter assembly that is built onto the camera module to block capturing images
+ from outside the laptop
+
+ Values:
+ muted:
+ Identifies the privacy device is turned off
+ and cannot send stream to OS applications
+
+ unmuted:
+ Identifies the privacy device is turned on,
+ audio or camera driver can get stream from mic
+ and camera module to OS applications
+
+ For example to check all supported current privacy device states::
+
+ # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_current_state
+ [Microphone] [unmuted]
+ [Camera Shutter] [unmuted]
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-smbios b/Documentation/ABI/testing/sysfs-platform-dell-smbios
index 205d3b6361e0..5583da581025 100644
--- a/Documentation/ABI/testing/sysfs-platform-dell-smbios
+++ b/Documentation/ABI/testing/sysfs-platform-dell-smbios
@@ -1,7 +1,7 @@
What: /sys/devices/platform/<platform>/tokens/*
Date: November 2017
KernelVersion: 4.15
-Contact: "Mario Limonciello" <mario.limonciello@dell.com>
+Contact: Dell.Client.Kernel@dell.com
Description:
A read-only description of Dell platform tokens
available on the machine.
@@ -13,8 +13,8 @@ Description:
For example the token ID "5" would be available
as the following attributes:
- 0005_location
- 0005_value
+ - 0005_location
+ - 0005_value
Tokens will vary from machine to machine, and
only tokens available on that machine will be
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv b/Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
new file mode 100644
index 000000000000..a9d39d9e8865
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
@@ -0,0 +1,9 @@
+What: /sys/class/power_supply/<battery_name>/eppid
+Date: September 2022
+KernelVersion: 6.1
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Reports the Dell ePPID (electronic Piece Part Identification)
+ of the ACPI battery.
+
+ See Documentation/wmi/devices/dell-wmi-ddv.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index 8fa4febfa4b2..2d5b78d2cf51 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -21,3 +21,226 @@ Contact: Wu Hao <hao.wu@intel.com>
Description: Read-only. It returns Bitstream (static FPGA region) meta
data, which includes the synthesis date, seed and other
information of this static FPGA region.
+
+What: /sys/bus/platform/devices/dfl-fme.0/cache_size
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns cache size of this FPGA device.
+
+What: /sys/bus/platform/devices/dfl-fme.0/fabric_version
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns fabric version of this FPGA device.
+ Userspace applications need this information to select
+ best data channels per different fabric design.
+
+What: /sys/bus/platform/devices/dfl-fme.0/socket_id
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns socket_id to indicate which socket
+ this FPGA belongs to, only valid for integrated solution.
+ User only needs this information, in case standard numa node
+ can't provide correct information.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie0_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie0 link.
+ Write this file to clear errors logged in pcie0_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie1_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie1 link.
+ Write this file to clear errors logged in pcie1_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/nonfatal_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns non-fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/catfatal_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns catastrophic and fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/inject_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to check errors injected. Write this
+ file to inject errors for testing purpose. Write fails with
+ -EINVAL if input parsing fails or input inject error code isn't
+ supported.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme_errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get errors detected on FME.
+ Write this file to clear errors logged in fme_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/first_error
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/next_error
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the second error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/name
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read this file to get the name of hwmon device, it
+ supports values:
+
+ ================= =========================
+ 'dfl_fme_thermal' thermal hwmon device name
+ 'dfl_fme_power' power hwmon device name
+ ================= =========================
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns FPGA device temperature in millidegrees
+ Celsius.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware threshold1 temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, hardware starts 50% or 90% throttling (see
+ 'temp1_max_policy').
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware threshold2 temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, hardware starts 100% throttling.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_emergency
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware trip threshold temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, a fatal event will be triggered to board management
+ controller (BMC) to shutdown FPGA.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_alarm
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if temperature is currently at or above
+ hardware threshold1 (see 'temp1_max'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit_alarm
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if temperature is currently at or above
+ hardware threshold2 (see 'temp1_crit'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_max_policy
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read this file to get the policy of hardware threshold1
+ (see 'temp1_max'). It only supports two values (policies):
+
+ == ==========================
+ 0 AP2 state (90% throttling)
+ 1 AP1 state (50% throttling)
+ == ==========================
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_input
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns current FPGA power consumption in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get current hardware power
+ threshold1 in uW. If power consumption rises at or above
+ this threshold, hardware starts 50% throttling.
+ Write this file to set current hardware power threshold1 in uW.
+ As hardware only accepts values in Watts, so input value will
+ be round down per Watts (< 1 watts part will be discarded) and
+ clamped within the range from 0 to 127 Watts. Write fails with
+ -EINVAL if input parsing fails.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get current hardware power
+ threshold2 in uW. If power consumption rises at or above
+ this threshold, hardware starts 90% throttling.
+ Write this file to set current hardware power threshold2 in uW.
+ As hardware only accepts values in Watts, so input value will
+ be round down per Watts (< 1 watts part will be discarded) and
+ clamped within the range from 0 to 127 Watts. Write fails with
+ -EINVAL if input parsing fails.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max_alarm
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if power consumption is currently at or
+ above hardware threshold1 (see 'power1_max'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit_alarm
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if power consumption is currently at or
+ above hardware threshold2 (see 'power1_crit'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_xeon_limit
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns power limit for XEON in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_fpga_limit
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns power limit for FPGA in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_ltr
+Date: October 2019
+KernelVersion: 5.5
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get current Latency Tolerance
+ Reporting (ltr) value. It returns 1 if all Accelerated
+ Function Units (AFUs) can tolerate latency >= 40us for memory
+ access or 0 if any AFU is latency sensitive (< 40us).
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port b/Documentation/ABI/testing/sysfs-platform-dfl-port
index 6a92dda517b0..65658267fcc0 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-port
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
@@ -14,3 +14,88 @@ Description: Read-only. User can program different PR bitstreams to FPGA
Accelerator Function Unit (AFU) for different functions. It
returns uuid which could be used to identify which PR bitstream
is programmed in this AFU.
+
+What: /sys/bus/platform/devices/dfl-port.0/power_state
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It reports the APx (AFU Power) state, different APx
+ means different throttling level. When reading this file, it
+ returns "0" - Normal / "1" - AP1 / "2" - AP2 / "6" - AP6.
+
+What: /sys/bus/platform/devices/dfl-port.0/ap1_event
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-write. Read this file for AP1 (AFU Power State 1) event.
+ It's used to indicate transient AP1 state. Write 1 to this
+ file to clear AP1 event.
+
+What: /sys/bus/platform/devices/dfl-port.0/ap2_event
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-write. Read this file for AP2 (AFU Power State 2) event.
+ It's used to indicate transient AP2 state. Write 1 to this
+ file to clear AP2 event.
+
+What: /sys/bus/platform/devices/dfl-port.0/ltr
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-write. Read or set AFU latency tolerance reporting value.
+ Set ltr to 1 if the AFU can tolerate latency >= 40us or set it
+ to 0 if it is latency sensitive.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcmd
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. User writes command to this interface to set
+ userclock to AFU.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqsts
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the status of issued command
+ to userclck_freqcmd.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcntrcmd
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. User writes command to this interface to set
+ userclock counter.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcntrsts
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the status of issued command
+ to userclck_freqcntrcmd.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/errors
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get errors detected on port and
+ Accelerated Function Unit (AFU). Write error code to this file
+ to clear errors. Write fails with -EINVAL if input parsing
+ fails or input error code doesn't match. Write fails with
+ -EBUSY or -ETIMEDOUT if error can't be cleared as hardware
+ in low power state (-EBUSY) or not respoding (-ETIMEDOUT).
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/first_error
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/first_malformed_req
+Date: August 2019
+KernelVersion: 5.4
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first malformed request
+ captured by hardware.
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
index 325dc0667dbb..620fd20434a5 100644
--- a/Documentation/ABI/testing/sysfs-platform-dptf
+++ b/Documentation/ABI/testing/sysfs-platform-dptf
@@ -27,10 +27,15 @@ KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) Display the platform power source
- 0x00 = DC
- 0x01 = AC
- 0x02 = USB
- 0x03 = Wireless Charger
+
+ ========= ============================
+ bits[3:0] Current power source
+ - 0x00 = DC
+ - 0x01 = AC
+ - 0x02 = USB
+ - 0x03 = Wireless Charger
+ bits[7:4] Power source sequence number
+ ========= ============================
What: /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
Date: Jul, 2016
@@ -38,3 +43,115 @@ KernelVersion: v4.10
Contact: linux-acpi@vger.kernel.org
Description:
(RO) The maximum sustained power for battery in milliwatts.
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/rest_of_platform_power_mw
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Shows the rest (outside of SoC) of worst-case platform power.
+
+What: /sys/bus/platform/devices/INT3407:00/dptf_power/prochot_confirm
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (WO) Confirm embedded controller about a prochot notification.
+
+What: /sys/bus/platform/devices/INT3532:00/dptf_battery/max_platform_power_mw
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The maximum platform power that can be supported by the battery in milli watts.
+
+What: /sys/bus/platform/devices/INT3532:00/dptf_battery/max_steady_state_power_mw
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The maximum sustained power for battery in milli watts.
+
+What: /sys/bus/platform/devices/INT3532:00/dptf_battery/high_freq_impedance_mohm
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The high frequency impedance value that can be obtained from battery
+ fuel gauge in milli Ohms.
+
+What: /sys/bus/platform/devices/INT3532:00/dptf_battery/no_load_voltage_mv
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The no-load voltage that can be obtained from battery fuel gauge in
+ milli volts.
+
+What: /sys/bus/platform/devices/INT3532:00/dptf_battery/current_discharge_capbility_ma
+Date: June, 2020
+KernelVersion: v5.8
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) The battery discharge current capability obtained from battery fuel gauge in
+ milli Amps.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/freq_mhz_low_clock
+Date: November, 2020
+KernelVersion: v5.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RW) The PCH FIVR (Fully Integrated Voltage Regulator) switching frequency in MHz,
+ when FIVR clock is 19.2MHz or 24MHz.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/freq_mhz_high_clock
+Date: November, 2020
+KernelVersion: v5.10
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RW) The PCH FIVR (Fully Integrated Voltage Regulator) switching frequency in MHz,
+ when FIVR clock is 38.4MHz.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/fivr_switching_freq_mhz
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Get the FIVR switching control frequency in MHz.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/fivr_switching_fault_status
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Read the FIVR switching frequency control fault status.
+
+What: /sys/bus/platform/devices/INTC1045:00/pch_fivr_switch_frequency/ssc_clock_info
+Date: September, 2021
+KernelVersion: v5.15
+Contact: linux-acpi@vger.kernel.org
+Description:
+ (RO) Presents SSC (spread spectrum clock) information for EMI
+ (Electro magnetic interference) control. This is a bit mask.
+
+ ======= ==========================================
+ Bits Description
+ ======= ==========================================
+ [7:0] Sets clock spectrum spread percentage:
+ 0x00=0.2% , 0x3F=10%
+ 1 LSB = 0.1% increase in spread (for
+ settings 0x01 thru 0x1C)
+ 1 LSB = 0.2% increase in spread (for
+ settings 0x1E thru 0x3F)
+ [8] When set to 1, enables spread
+ spectrum clock
+ [9] 0: Triangle mode. FFC frequency
+ walks around the Fcenter in a linear
+ fashion
+ 1: Random walk mode. FFC frequency
+ changes randomly within the SSC
+ (Spread spectrum clock) range
+ [10] 0: No white noise. 1: Add white noise
+ to spread waveform
+ [11] When 1, future writes are ignored.
+ ======= ==========================================
diff --git a/Documentation/ABI/testing/sysfs-platform-eeepc-laptop b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
index 5b026c69587a..70dbe0733cf6 100644
--- a/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
@@ -4,9 +4,11 @@ KernelVersion: 2.6.26
Contact: "Corentin Chary" <corentincj@iksaif.net>
Description:
This file allows display switching.
+
- 1 = LCD
- 2 = CRT
- 3 = LCD+CRT
+
If you run X11, you should use xrandr instead.
What: /sys/devices/platform/eeepc/camera
@@ -30,16 +32,20 @@ Contact: "Corentin Chary" <corentincj@iksaif.net>
Description:
Change CPU clock configuration.
On the Eee PC 1000H there are three available clock configuration:
+
* 0 -> Super Performance Mode
* 1 -> High Performance Mode
* 2 -> Power Saving Mode
+
On Eee PC 701 there is only 2 available clock configurations.
Available configuration are listed in available_cpufv file.
Reading this file will show the raw hexadecimal value which
- is defined as follow:
- | 8 bit | 8 bit |
- | `---- Current mode
- `------------ Availables modes
+ is defined as follow::
+
+ | 8 bit | 8 bit |
+ | `---- Current mode
+ `------------ Availables modes
+
For example, 0x301 means: mode 1 selected, 3 available modes.
What: /sys/devices/platform/eeepc/available_cpufv
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma
index fca40a54df59..a80aeda85ef6 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -2,7 +2,7 @@ What: /sys/devices/platform/hidma-*/chid
/sys/devices/platform/QCOM8061:*/chid
Date: Dec 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the ID of the channel within the HIDMA instance.
It is used to associate a given HIDMA channel with the
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
index 3b6c5c9eabdc..0373745b4e18 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
+++ b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
@@ -2,7 +2,7 @@ What: /sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
/sys/devices/platform/QCOM8060:*/chanops/chan*/priority
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains either 0 or 1 and indicates if the DMA channel is a
low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What: /sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
/sys/devices/platform/QCOM8060:*/chanops/chan*/weight
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains 0..15 and indicates the weight of the channel among
equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What: /sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
/sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the platform specific cycle value to wait after a
reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What: /sys/devices/platform/hidma-mgmt*/dma_channels
/sys/devices/platform/QCOM8060:*/dma_channels
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the number of dma channels supported by one instance
of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What: /sys/devices/platform/hidma-mgmt*/hw_version_major
/sys/devices/platform/QCOM8060:*/hw_version_major
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Version number major for the hardware.
@@ -49,7 +49,7 @@ What: /sys/devices/platform/hidma-mgmt*/hw_version_minor
/sys/devices/platform/QCOM8060:*/hw_version_minor
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Version number minor for the hardware.
@@ -57,7 +57,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_rd_xactions
/sys/devices/platform/QCOM8060:*/max_rd_xactions
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains a value between 0 and 31. Maximum number of
read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_read_request
/sys/devices/platform/QCOM8060:*/max_read_request
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Size of each read request. The value needs to be a power
of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_wr_xactions
/sys/devices/platform/QCOM8060:*/max_wr_xactions
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains a value between 0 and 31. Maximum number of
write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_write_request
/sys/devices/platform/QCOM8060:*/max_write_request
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Size of each write request. The value needs to be a power
of two and can be between 128 and 1024.
diff --git a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
index 3c3514815cd5..b6a138b50d99 100644
--- a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
+++ b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
@@ -1,18 +1,18 @@
What: /sys/devices/platform/<i2c-demux-name>/available_masters
Date: January 2016
KernelVersion: 4.6
-Contact: Wolfram Sang <wsa@the-dreams.de>
+Contact: Wolfram Sang <wsa+renesas@sang-engineering.com>
Description:
Reading the file will give you a list of masters which can be
selected for a demultiplexed bus. The format is
- "<index>:<name>". Example from a Renesas Lager board:
+ "<index>:<name>". Example from a Renesas Lager board::
- 0:/i2c@e6500000 1:/i2c@e6508000
+ 0:/i2c@e6500000 1:/i2c@e6508000
What: /sys/devices/platform/<i2c-demux-name>/current_master
Date: January 2016
KernelVersion: 4.6
-Contact: Wolfram Sang <wsa@the-dreams.de>
+Contact: Wolfram Sang <wsa+renesas@sang-engineering.com>
Description:
This file selects/shows the active I2C master for a demultiplexed
bus. It uses the <index> value from the file 'available_masters'.
diff --git a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
index 1b31be3f996a..5ec0dee9e707 100644
--- a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
@@ -1,23 +1,24 @@
-What: /sys/devices/platform/ideapad/camera_power
+What: /sys/bus/platform/devices/VPC2004:*/camera_power
Date: Dec 2010
KernelVersion: 2.6.37
Contact: "Ike Panhc <ike.pan@canonical.com>"
Description:
Control the power of camera module. 1 means on, 0 means off.
-What: /sys/devices/platform/ideapad/fan_mode
+What: /sys/bus/platform/devices/VPC2004:*/fan_mode
Date: June 2012
KernelVersion: 3.6
Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
Description:
Change fan mode
There are four available modes:
+
* 0 -> Super Silent Mode
* 1 -> Standard Mode
* 2 -> Dust Cleaning
* 4 -> Efficient Thermal Dissipation Mode
-What: /sys/devices/platform/ideapad/touchpad
+What: /sys/bus/platform/devices/VPC2004:*/touchpad
Date: May 2017
KernelVersion: 4.13
Contact: "Ritesh Raj Sarraf <rrs@debian.org>"
@@ -26,15 +27,26 @@ Description:
* 1 -> Switched On
* 0 -> Switched Off
-What: /sys/bus/pci/devices/<bdf>/<device>/VPC2004:00/fn_lock
+What: /sys/bus/platform/devices/VPC2004:*/fn_lock
Date: May 2018
KernelVersion: 4.18
Contact: "Oleg Keri <ezhi99@gmail.com>"
Description:
Control fn-lock mode.
+
* 1 -> Switched On
* 0 -> Switched Off
- For example:
- # echo "0" > \
- /sys/bus/pci/devices/0000:00:1f.0/PNP0C09:00/VPC2004:00/fn_lock
+ For example::
+
+ # echo "0" > \
+ /sys/bus/pci/devices/0000:00:1f.0/PNP0C09:00/VPC2004:00/fn_lock
+
+What: /sys/bus/platform/devices/VPC2004:*/usb_charging
+Date: Feb 2021
+KernelVersion: 5.12
+Contact: platform-driver-x86@vger.kernel.org
+Description:
+ Controls whether the "always on USB charging" feature is
+ enabled or not. This feature enables charging USB devices
+ even if the computer is not turned on.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-ifs b/Documentation/ABI/testing/sysfs-platform-intel-ifs
new file mode 100644
index 000000000000..41b4d5b1e21c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-intel-ifs
@@ -0,0 +1,52 @@
+Device instance to test mapping
+intel_ifs_0 -> Scan Test
+intel_ifs_1 -> Array BIST test
+
+What: /sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Date: Nov 16 2022
+KernelVersion: 6.2
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description: Write <cpu#> to trigger IFS test for one online core.
+ Note that the test is per core. The cpu# can be
+ for any thread on the core. Running on one thread
+ completes the test for the core containing that thread.
+ Example: to test the core containing cpu5: echo 5 >
+ /sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Devices: all
+
+What: /sys/devices/virtual/misc/intel_ifs_<N>/status
+Date: Nov 16 2022
+KernelVersion: 6.2
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description: The status of the last test. It can be one of "pass", "fail"
+ or "untested".
+Devices: all
+
+What: /sys/devices/virtual/misc/intel_ifs_<N>/details
+Date: Nov 16 2022
+KernelVersion: 6.2
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description: Additional information regarding the last test. The details file reports
+ the hex value of the STATUS MSR for this test. Note that the error_code field
+ may contain driver defined software code not defined in the Intel SDM.
+Devices: all
+
+What: /sys/devices/virtual/misc/intel_ifs_<N>/image_version
+Date: Nov 16 2022
+KernelVersion: 6.2
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description: Version (hexadecimal) of loaded IFS test image. If no test image
+ is loaded reports "none". Only present for device instances where a test image
+ is applicable.
+Devices: intel_ifs_0
+
+What: /sys/devices/virtual/misc/intel_ifs_<N>/current_batch
+Date: Nov 16 2022
+KernelVersion: 6.2
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description: Write a number less than or equal to 0xff to load an IFS test image.
+ The number written treated as the 2 digit suffix in the following file name:
+ /lib/firmware/intel/ifs_<N>/ff-mm-ss-02x.scan
+ Reading the file will provide the suffix of the currently loaded IFS test image.
+ This file is present only for device instances where a test image is applicable.
+Devices: intel_ifs_0
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-pmc b/Documentation/ABI/testing/sysfs-platform-intel-pmc
new file mode 100644
index 000000000000..f31d59b21f9b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-intel-pmc
@@ -0,0 +1,22 @@
+What: /sys/devices/platform/<platform>/etr3
+Date: Apr 2021
+KernelVersion: 5.13
+Contact: "Tomas Winkler" <tomas.winkler@intel.com>
+Description:
+ The file exposes "Extended Test Mode Register 3" global
+ reset bits. The bits are used during an Intel platform
+ manufacturing process to indicate that consequent reset
+ of the platform is a "global reset". This type of reset
+ is required in order for manufacturing configurations
+ to take effect.
+
+ Display global reset setting bits for PMC.
+
+ * bit 31 - global reset is locked
+ * bit 20 - global reset is set
+
+ Writing bit 20 value to the etr3 will induce
+ a platform "global reset" upon consequent platform reset,
+ in case the register is not locked.
+ The "global reset bit" should be locked on a production
+ system and the file is in read-only mode.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
new file mode 100644
index 000000000000..7ffd1579b8f7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
@@ -0,0 +1,13 @@
+What: /sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651[-X]/firmware_update_request
+Date: April 2020
+KernelVersion: 5.7
+Contact: "Jithu Joseph" <jithu.joseph@intel.com>
+Description:
+ Allow user space entities to trigger update of Slim
+ Bootloader (SBL). This attribute normally has a value
+ of 0 and userspace can signal SBL to update firmware,
+ on next reboot, by writing a value of 1.
+ There are two available states:
+
+ * 0 -> Skip firmware update while rebooting
+ * 1 -> Attempt firmware update on next reboot
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
index 8af65059d519..10ef1282c9d2 100644
--- a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
@@ -1,11 +1,12 @@
-What: /sys/devices/platform/<platform>/force_power
+What: /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341[-X]/force_power
Date: September 2017
KernelVersion: 4.15
-Contact: "Mario Limonciello" <mario.limonciello@dell.com>
+Contact: "Mario Limonciello" <mario.limonciello@outlook.com>
Description:
Modify the platform force power state, influencing
Thunderbolt controllers to turn on or off when no
devices are connected (write-only)
There are two available states:
+
* 0 -> Force power disabled
* 1 -> Force power enabled
diff --git a/Documentation/ABI/testing/sysfs-platform-kim b/Documentation/ABI/testing/sysfs-platform-kim
index c1653271872a..0a38caa62a32 100644
--- a/Documentation/ABI/testing/sysfs-platform-kim
+++ b/Documentation/ABI/testing/sysfs-platform-kim
@@ -5,10 +5,11 @@ Contact: "Pavan Savoy" <pavan_savoy@ti.com>
Description:
Name of the UART device at which the WL128x chip
is connected. example: "/dev/ttyS0".
+
The device name flows down to architecture specific board
- initialization file from the SFI/ATAGS bootloader
+ initialization file from the ATAGS bootloader
firmware. The name exposed is read from the user-space
- dameon and opens the device when install is requested.
+ daemon and opens the device when install is requested.
What: /sys/devices/platform/kim/baud_rate
Date: January 2010
diff --git a/Documentation/ABI/testing/sysfs-platform-lg-laptop b/Documentation/ABI/testing/sysfs-platform-lg-laptop
index cf47749b19df..0570cd524d0e 100644
--- a/Documentation/ABI/testing/sysfs-platform-lg-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-lg-laptop
@@ -17,6 +17,7 @@ Date: October 2018
KernelVersion: 4.20
Contact: "Matan Ziv-Av <matan@svgalib.org>
Description:
+ Deprecated use /sys/class/power_supply/CMB0/charge_control_end_threshold
Maximal battery charge level. Accepted values are 80 or 100.
What: /sys/devices/platform/lg-laptop/fan_mode
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
new file mode 100644
index 000000000000..09f783fa0a53
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
@@ -0,0 +1,162 @@
+What: /sys/bus/platform/devices/MLNXBF04:00/lifecycle_state
+Date: Oct 2019
+KernelVersion: 5.5
+Contact: "Liming Sun <lsun@mellanox.com>"
+Description:
+ The Life-cycle state of the SoC, which could be one of the
+ following values.
+
+ ============== =============================================
+ Production Production state and can be updated to secure
+ GA Secured Secure chip and not able to change state
+ GA Non-Secured Non-Secure chip and not able to change state
+ RMA Return Merchandise Authorization
+ ============== =============================================
+
+What: /sys/bus/platform/devices/MLNXBF04:00/post_reset_wdog
+Date: Oct 2019
+KernelVersion: 5.5
+Contact: "Liming Sun <lsun@mellanox.com>"
+Description:
+ The watchdog setting in seconds for the next booting. It's used
+ to reboot the chip and recover it to the old state if the new
+ boot partition fails.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/reset_action
+Date: Oct 2019
+KernelVersion: 5.5
+Contact: "Liming Sun <lsun@mellanox.com>"
+Description:
+ The source of the boot stream for the next reset. It could be
+ one of the following values:
+
+ =========== ===============================================
+ external boot from external source (USB or PCIe)
+ emmc boot from the onchip eMMC
+ emmc_legacy boot from the onchip eMMC in legacy (slow) mode
+ =========== ===============================================
+
+What: /sys/bus/platform/devices/MLNXBF04:00/second_reset_action
+Date: Oct 2019
+KernelVersion: 5.5
+Contact: "Liming Sun <lsun@mellanox.com>"
+Description:
+ Update the source of the boot stream after next reset. It could
+ be one of the following values and will be applied after next
+ reset.
+
+ =========== ===============================================
+ external boot from external source (USB or PCIe)
+ emmc boot from the onchip eMMC
+ emmc_legacy boot from the onchip eMMC in legacy (slow) mode
+ swap_emmc swap the primary / secondary boot partition
+ none cancel the action
+ =========== ===============================================
+
+What: /sys/bus/platform/devices/MLNXBF04:00/secure_boot_fuse_state
+Date: Oct 2019
+KernelVersion: 5.5
+Contact: "Liming Sun <lsun@mellanox.com>"
+Description:
+ The state of eFuse versions with the following values.
+
+ ======= ===============================================
+ InUse burnt, valid and currently in use
+ Used burnt and valid
+ Free not burnt and free to use
+ Skipped not burnt but not free (skipped)
+ Wasted burnt and invalid
+ Invalid not burnt but marked as valid (error state).
+ ======= ===============================================
+
+What: /sys/bus/platform/devices/MLNXBF04:00/bootfifo
+Date: Apr 2023
+KernelVersion: 6.4
+Contact: "Liming Sun <limings@nvidia.com>"
+Description:
+ The file used to access the BlueField boot fifo.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/rsh_log
+Date: May 2023
+KernelVersion: 6.4
+Contact: "Liming Sun <limings@nvidia.com>"
+Description:
+ The file used to write BlueField boot log with the format
+ "[INFO|WARN|ERR|ASSERT ]<msg>". Log level 'INFO' is used by
+ default if not specified.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/oob_mac
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "oob_mac" sysfs attribute holds the MAC address for
+ the out-of-band 1Gbps Ethernet port. This MAC address is
+ provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/opn
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "opn" sysfs attribute holds the board's part number.
+ This value is provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/sku
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "sku" sysfs attribute holds the board's SKU number.
+ This value is provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/modl
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "modl" sysfs attribute holds the board's model number.
+ This value is provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/sn
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "sn" sysfs attribute holds the board's serial number.
+ This value is provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/uuid
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "uuid" sysfs attribute holds the board's UUID.
+ This value is provided by the manufacturing team.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/rev
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "rev" sysfs attribute holds the board's revision.
+ This value is provided on a board-level label.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/mfg_lock
+Date: August 2023
+KernelVersion: 6.5
+Contact: "David Thompson <davthompson@nvidia.com>"
+Description:
+ The "mfg_lock" sysfs attribute is write-only.
+ A successful write to this attribute will latch the
+ board-level attributes into EEPROM, making them read-only.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/rtc_battery
+Date: June 2025
+KernelVersion: 6.15
+Contact: "Xiangrong Li <xiangrongl@nvidia.com>"
+Description:
+ The "rtc_battery" sysfs attribute is read-only.
+ A successful read from this attribute returns the status of
+ the board's RTC battery. The RTC battery status register is
+ also cleared upon successful read operation.
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-pmc b/Documentation/ABI/testing/sysfs-platform-mellanox-pmc
new file mode 100644
index 000000000000..29b3f9c58e00
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-pmc
@@ -0,0 +1,64 @@
+HID Driver Description
+MLNXBFD0 mlxbf-pmc Performance counters (BlueField-1)
+MLNXBFD1 mlxbf-pmc Performance counters (BlueField-2)
+MLNXBFD2 mlxbf-pmc Performance counters (BlueField-3)
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event_list
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ List of events supported by the counters in the specific block.
+ It is used to extract the event number or ID associated with
+ each event.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event<N>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Event monitored by corresponding counter. This is used to
+ program or read back the event that should be or is currently
+ being monitored by counter<N>.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/counter<N>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Counter value of the event being monitored. This is used to
+ read the counter value of the event which was programmed using
+ event<N>. This is also used to clear or reset the counter value
+ by writing 0 to the counter sysfs.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/enable
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Start or stop counters. This is used to start the counters
+ for monitoring the programmed events and also to stop the
+ counters after the desired duration. Writing value 1 will
+ start all the counters in the block, and writing 0 will
+ stop all the counters together.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/<reg>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Value of register. This is used to read or reset the registers
+ where various performance statistics are counted for each block.
+ Writing 0 to the sysfs will clear the counter, writing any other
+ value is not allowed.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/count_clock
+Date: Mar 2025
+KernelVersion: 6.14
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Use a counter for counting cycles. This is used to repurpose/dedicate
+ any of the counters in the block to counting cycles. Each counter is
+ represented by a bit (bit 0 for counter0, bit1 for counter1 and so on)
+ and setting the corresponding bit will reserve that specific counter
+ for counting cycles and override the event<N> setting.
diff --git a/Documentation/ABI/testing/sysfs-platform-oxp b/Documentation/ABI/testing/sysfs-platform-oxp
new file mode 100644
index 000000000000..b3f39fc21dfa
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-oxp
@@ -0,0 +1,25 @@
+What: /sys/devices/platform/<platform>/tt_toggle
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Takeover TDP controls from the device. OneXPlayer devices have a
+ turbo button that can be used to switch between two TDP modes
+ (usually 15W and 25W). By setting this attribute to 1, this
+ functionality is disabled, handing TDP control over to (Windows)
+ userspace software and the Turbo button turns into a keyboard
+ shortcut over the AT keyboard of the device. In addition,
+ using this setting is a prerequisite for PWM control for most
+ newer models (otherwise it NOOPs).
+
+What: /sys/devices/platform/<platform>/tt_led
+Date: April 2025
+KernelVersion: 6.16
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Some OneXPlayer devices (e.g., X1 series) feature a little LED
+ nested in the Turbo button. This LED is illuminated when the
+ device is in the higher TDP mode (e.g., 25W). Once tt_toggle
+ is engaged, this LED is left dangling to its last state. This
+ attribute allows userspace to control the LED state manually
+ (either with 1 or 0). Only a subset of devices contain this LED.
diff --git a/Documentation/ABI/testing/sysfs-platform-phy-rcar-gen3-usb2 b/Documentation/ABI/testing/sysfs-platform-phy-rcar-gen3-usb2
index 6212697bbf6f..bc510ccc37a7 100644
--- a/Documentation/ABI/testing/sysfs-platform-phy-rcar-gen3-usb2
+++ b/Documentation/ABI/testing/sysfs-platform-phy-rcar-gen3-usb2
@@ -7,9 +7,11 @@ Description:
The file can show/change the phy mode for role swap of usb.
Write the following strings to change the mode:
- "host" - switching mode from peripheral to host.
- "peripheral" - switching mode from host to peripheral.
+
+ - "host" - switching mode from peripheral to host.
+ - "peripheral" - switching mode from host to peripheral.
Read the file, then it shows the following strings:
- "host" - The mode is host now.
- "peripheral" - The mode is peripheral now.
+
+ - "host" - The mode is host now.
+ - "peripheral" - The mode is peripheral now.
diff --git a/Documentation/ABI/testing/sysfs-platform-power-on-reason b/Documentation/ABI/testing/sysfs-platform-power-on-reason
new file mode 100644
index 000000000000..c3b29dbc64bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-power-on-reason
@@ -0,0 +1,12 @@
+What: /sys/devices/platform/.../power_on_reason
+Date: June 2023
+KernelVersion: 6.5
+Contact: Kamel Bouhara <kamel.bouhara@bootlin.com>
+Description: Shows system power on reason. The following strings/reasons can
+ be read (the list can be extended):
+ "regular power-up", "RTC wakeup", "watchdog timeout",
+ "software reset", "reset button action", "CPU clock failure",
+ "crystal oscillator failure", "brown-out reset",
+ "unknown reason".
+
+ The file is read only.
diff --git a/Documentation/ABI/testing/sysfs-platform-renesas_usb3 b/Documentation/ABI/testing/sysfs-platform-renesas_usb3
index 5621c15d5dc0..b08379e7fe37 100644
--- a/Documentation/ABI/testing/sysfs-platform-renesas_usb3
+++ b/Documentation/ABI/testing/sysfs-platform-renesas_usb3
@@ -7,9 +7,11 @@ Description:
The file can show/change the drd mode of usb.
Write the following string to change the mode:
- "host" - switching mode from peripheral to host.
- "peripheral" - switching mode from host to peripheral.
+
+ - "host" - switching mode from peripheral to host.
+ - "peripheral" - switching mode from host to peripheral.
Read the file, then it shows the following strings:
- "host" - The mode is host now.
- "peripheral" - The mode is peripheral now.
+
+ - "host" - The mode is host now.
+ - "peripheral" - The mode is peripheral now.
diff --git a/Documentation/ABI/testing/sysfs-platform-silicom b/Documentation/ABI/testing/sysfs-platform-silicom
new file mode 100644
index 000000000000..4d1cc5bdbcc5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-silicom
@@ -0,0 +1,30 @@
+What: /sys/devices/platform/silicom-platform/uc_version
+Date: November 2023
+KernelVersion: 6.7
+Contact: Henry Shi <henrys@silicom-usa.com>
+Description:
+ This file allows to read microcontroller firmware
+ version of current platform.
+
+What: /sys/devices/platform/silicom-platform/power_cycle
+Date: November 2023
+KernelVersion: 6.7
+Contact: Henry Shi <henrys@silicom-usa.com>
+Description:
+ This file allow user to power cycle the platform.
+ Default value is 0; when set to 1, it powers down
+ the platform, waits 5 seconds, then powers on the
+ device. It returns to default value after power cycle.
+
+ 0 - default value.
+
+What: /sys/devices/platform/silicom-platform/efuse_status
+Date: November 2023
+KernelVersion: 6.7
+Contact: Henry Shi <henrys@silicom-usa.com>
+Description:
+ This file is read only. It returns the current
+ OTP status:
+
+ 0 - not programmed.
+ 1 - programmed.
diff --git a/Documentation/ABI/testing/sysfs-platform-sst-atom b/Documentation/ABI/testing/sysfs-platform-sst-atom
index 0d07c0395660..4bb2e6135c2e 100644
--- a/Documentation/ABI/testing/sysfs-platform-sst-atom
+++ b/Documentation/ABI/testing/sysfs-platform-sst-atom
@@ -1,17 +1,26 @@
-What: /sys/devices/platform/8086%x:00/firmware_version
+What: /sys/devices/platform/8086<x>:00/firmware_version
Date: November 2016
KernelVersion: 4.10
Contact: "Sebastien Guiriec" <sebastien.guiriec@intel.com>
Description:
LPE Firmware version for SST driver on all atom
- plaforms (BYT/CHT/Merrifield/BSW).
- If the FW has never been loaded it will display:
+ platforms (BYT/CHT/Merrifield/BSW).
+ If the FW has never been loaded it will display::
+
"FW not yet loaded"
- If FW has been loaded it will display:
+
+ If FW has been loaded it will display::
+
"v01.aa.bb.cc"
+
aa: Major version is reflecting SoC version:
+
+ === =============
0d: BYT FW
0b: BSW FW
07: Merrifield FW
+ === =============
+
bb: Minor version
+
cc: Build version
diff --git a/Documentation/ABI/testing/sysfs-platform-usbip-vudc b/Documentation/ABI/testing/sysfs-platform-usbip-vudc
index 81fcfb454913..53622d3ba27c 100644
--- a/Documentation/ABI/testing/sysfs-platform-usbip-vudc
+++ b/Documentation/ABI/testing/sysfs-platform-usbip-vudc
@@ -16,10 +16,13 @@ Contact: Krzysztof Opasiak <k.opasiak@samsung.com>
Description:
Current status of the device.
Allowed values:
- 1 - Device is available and can be exported
- 2 - Device is currently exported
- 3 - Fatal error occurred during communication
- with peer
+
+ == ==========================================
+ 1 Device is available and can be exported
+ 2 Device is currently exported
+ 3 Fatal error occurred during communication
+ with peer
+ == ==========================================
What: /sys/devices/platform/usbip-vudc.%d/usbip_sockfd
Date: April 2016
diff --git a/Documentation/ABI/testing/sysfs-platform-wilco-ec b/Documentation/ABI/testing/sysfs-platform-wilco-ec
new file mode 100644
index 000000000000..4439d0644091
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-wilco-ec
@@ -0,0 +1,58 @@
+What: /sys/bus/platform/devices/GOOG000C\:00/boot_on_ac
+Date: April 2019
+KernelVersion: 5.3
+Description:
+ Boot on AC is a policy which makes the device boot from S5
+ when AC power is connected. This is useful for users who
+ want to run their device headless or with a dock.
+
+ Input should be parseable by kstrtou8() to 0 or 1.
+
+What: /sys/bus/platform/devices/GOOG000C\:00/build_date
+Date: May 2019
+KernelVersion: 5.3
+Description:
+ Display Wilco Embedded Controller firmware build date.
+ Output will a MM/DD/YY string.
+
+What: /sys/bus/platform/devices/GOOG000C\:00/build_revision
+Date: May 2019
+KernelVersion: 5.3
+Description:
+ Display Wilco Embedded Controller build revision.
+ Output will a version string be similar to the example below:
+ d2592cae0
+
+What: /sys/bus/platform/devices/GOOG000C\:00/model_number
+Date: May 2019
+KernelVersion: 5.3
+Description:
+ Display Wilco Embedded Controller model number.
+ Output will a version string be similar to the example below:
+ 08B6
+
+What: /sys/bus/platform/devices/GOOG000C\:00/usb_charge
+Date: October 2019
+KernelVersion: 5.5
+Description:
+ Control the USB PowerShare Policy. USB PowerShare is a policy
+ which affects charging via the special USB PowerShare port
+ (marked with a small lightning bolt or battery icon) when in
+ low power states:
+
+ - In S0, the port will always provide power.
+ - In S0ix, if usb_charge is enabled, then power will be
+ supplied to the port when on AC or if battery is > 50%.
+ Else no power is supplied.
+ - In S5, if usb_charge is enabled, then power will be supplied
+ to the port when on AC. Else no power is supplied.
+
+ Input should be either "0" or "1".
+
+What: /sys/bus/platform/devices/GOOG000C\:00/version
+Date: May 2019
+KernelVersion: 5.3
+Description:
+ Display Wilco Embedded Controller firmware version.
+ The format of the string is x.y.z. Where x is major, y is minor
+ and z is the build number. For example: 95.00.06
diff --git a/Documentation/ABI/testing/sysfs-platform_profile b/Documentation/ABI/testing/sysfs-platform_profile
new file mode 100644
index 000000000000..125324ab53a9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform_profile
@@ -0,0 +1,40 @@
+What: /sys/firmware/acpi/platform_profile_choices
+Date: October 2020
+Contact: Hans de Goede <hdegoede@redhat.com>
+Description: This file contains a space-separated list of profiles supported for this device.
+
+ Drivers must use the following standard profile-names:
+
+ ==================== ========================================
+ low-power Low power consumption
+ cool Cooler operation
+ quiet Quieter operation
+ balanced Balance between low power consumption
+ and performance
+ balanced-performance Balance between performance and low
+ power consumption with a slight bias
+ towards performance
+ performance High performance operation
+ ==================== ========================================
+
+ Userspace may expect drivers to offer more than one of these
+ standard profile names.
+
+What: /sys/firmware/acpi/platform_profile
+Date: October 2020
+Contact: Hans de Goede <hdegoede@redhat.com>
+Description: Reading this file gives the current selected profile for this
+ device. Writing this file with one of the strings from
+ platform_profile_choices changes the profile to the new value.
+
+ This file can be monitored for changes by polling for POLLPRI,
+ POLLPRI will be signalled on any changes, independent of those
+ changes coming from a userspace write; or coming from another
+ source such as e.g. a hotkey triggered profile change handled
+ either directly by the embedded-controller or fully handled
+ inside the kernel.
+
+ This file may also emit the string 'custom' to indicate
+ that multiple platform profiles drivers are in use but
+ have different values. This string can not be written to
+ this interface and is solely for informational purposes.
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 18b7dc929234..4d8e1ad020f0 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -1,6 +1,6 @@
What: /sys/power/
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power directory will contain files that will
provide a unified interface to the power management
@@ -8,7 +8,7 @@ Description:
What: /sys/power/state
Date: November 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/state file controls system sleep states.
Reading from this file returns the available sleep state
@@ -23,7 +23,7 @@ Description:
What: /sys/power/mem_sleep
Date: November 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/mem_sleep file controls the operating mode of
system suspend. Reading from it returns the available modes
@@ -41,20 +41,24 @@ Description:
What: /sys/power/disk
Date: September 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/disk file controls the operating mode of the
suspend-to-disk mechanism. Reading from this file returns
the name of the method by which the system will be put to
sleep on the next suspend. There are four methods supported:
+
'firmware' - means that the memory image will be saved to disk
by some firmware, in which case we also assume that the
firmware will handle the system suspend.
+
'platform' - the memory image will be saved by the kernel and
the system will be put to sleep by the platform driver (e.g.
ACPI or other PM registers).
+
'shutdown' - the memory image will be saved by the kernel and
the system will be powered off.
+
'reboot' - the memory image will be saved by the kernel and
the system will be rebooted.
@@ -74,19 +78,19 @@ Description:
The suspend-to-disk method may be chosen by writing to this
file one of the accepted strings:
- 'firmware'
- 'platform'
- 'shutdown'
- 'reboot'
- 'testproc'
- 'test'
+ - 'firmware'
+ - 'platform'
+ - 'shutdown'
+ - 'reboot'
+ - 'testproc'
+ - 'test'
It will only change to 'firmware' or 'platform' if the system
supports that.
What: /sys/power/image_size
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/image_size file controls the size of the image
created by the suspend-to-disk mechanism. It can be written a
@@ -103,7 +107,7 @@ Description:
What: /sys/power/pm_trace
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_trace file controls the code which saves the
last PM event point in the RTC across reboots, so that you can
@@ -114,9 +118,9 @@ Description:
string representing a nonzero integer into it.
To use this debugging feature you should attempt to suspend
- the machine, then reboot it and run
+ the machine, then reboot it and run::
- dmesg -s 1000000 | grep 'hash matches'
+ dmesg -s 1000000 | grep 'hash matches'
If you do not get any matches (or they appear to be false
positives), it is possible that the last PM event point
@@ -127,7 +131,7 @@ Description:
CAUTION: Using it will cause your machine's real-time (CMOS)
clock to be set to a random invalid time after a resume.
-What; /sys/power/pm_trace_dev_match
+What: /sys/power/pm_trace_dev_match
Date: October 2010
Contact: James Hogan <jhogan@kernel.org>
Description:
@@ -148,11 +152,11 @@ Description:
case further investigation is required to determine which
device is causing the problem. Note that genuine RTC clock
values (such as when pm_trace has not been used), can still
- match a device and output it's name here.
+ match a device and output its name here.
What: /sys/power/pm_async
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_async file controls the switch allowing the
user space to enable or disable asynchronous suspend and resume
@@ -165,7 +169,7 @@ Description:
What: /sys/power/wakeup_count
Date: July 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wakeup_count file allows user space to put the
system into a sleep state while taking into account the
@@ -180,7 +184,7 @@ Description:
What: /sys/power/reserved_size
Date: May 2011
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/reserved_size file allows user space to control
the amount of memory reserved for allocations made by device
@@ -194,7 +198,7 @@ Description:
What: /sys/power/autosleep
Date: April 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/autosleep file can be written one of the strings
returned by reads from /sys/power/state. If that happens, a
@@ -211,7 +215,7 @@ Description:
What: /sys/power/wake_lock
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wake_lock file allows user space to create
wakeup source objects and activate them on demand (if one of
@@ -238,12 +242,13 @@ Description:
What: /sys/power/wake_unlock
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wake_unlock file allows user space to deactivate
wakeup sources created with the help of /sys/power/wake_lock.
When a string is written to /sys/power/wake_unlock, it will be
assumed to represent the name of a wakeup source to deactivate.
+
If a wakeup source object of that name exists and is active at
the moment, it will be deactivated.
@@ -278,7 +283,7 @@ Description:
What: /sys/power/pm_debug_messages
Date: July 2017
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_debug_messages file controls the printing
of debug messages from the system suspend/hiberbation
@@ -290,7 +295,7 @@ Description:
What: /sys/power/resume_offset
Date: April 2018
-Contact: Mario Limonciello <mario.limonciello@dell.com>
+Contact: Mario Limonciello <mario.limonciello@outlook.com>
Description:
This file is used for telling the kernel an offset into a disk
to use when hibernating the system such as with a swap file.
@@ -300,4 +305,152 @@ Description:
attempt.
Using this sysfs file will override any values that were
- set using the kernel command line for disk offset. \ No newline at end of file
+ set using the kernel command line for disk offset.
+
+What: /sys/power/suspend_stats
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats directory contains suspend related
+ statistics.
+
+What: /sys/power/suspend_stats/success
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/success file contains the number
+ of times entering system sleep state succeeded.
+
+What: /sys/power/suspend_stats/fail
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/fail file contains the number
+ of times entering system sleep state failed.
+
+What: /sys/power/suspend_stats/failed_freeze
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_freeze file contains the
+ number of times freezing processes failed.
+
+What: /sys/power/suspend_stats/failed_prepare
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_prepare file contains the
+ number of times preparing all non-sysdev devices for
+ a system PM transition failed.
+
+What: /sys/power/suspend_stats/failed_resume
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_resume file contains the
+ number of times executing "resume" callbacks of
+ non-sysdev devices failed.
+
+What: /sys/power/suspend_stats/failed_resume_early
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_resume_early file contains
+ the number of times executing "early resume" callbacks
+ of devices failed.
+
+What: /sys/power/suspend_stats/failed_resume_noirq
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_resume_noirq file contains
+ the number of times executing "noirq resume" callbacks
+ of devices failed.
+
+What: /sys/power/suspend_stats/failed_suspend
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_suspend file contains
+ the number of times executing "suspend" callbacks
+ of all non-sysdev devices failed.
+
+What: /sys/power/suspend_stats/failed_suspend_late
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_suspend_late file contains
+ the number of times executing "late suspend" callbacks
+ of all devices failed.
+
+What: /sys/power/suspend_stats/failed_suspend_noirq
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/failed_suspend_noirq file contains
+ the number of times executing "noirq suspend" callbacks
+ of all devices failed.
+
+What: /sys/power/suspend_stats/last_failed_dev
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/last_failed_dev file contains
+ the last device for which a suspend/resume callback failed.
+
+What: /sys/power/suspend_stats/last_failed_errno
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/last_failed_errno file contains
+ the errno of the last failed attempt at entering
+ system sleep state.
+
+What: /sys/power/suspend_stats/last_failed_step
+Date: July 2019
+Contact: Kalesh Singh <kaleshsingh96@gmail.com>
+Description:
+ The /sys/power/suspend_stats/last_failed_step file contains
+ the last failed step in the suspend/resume path.
+
+What: /sys/power/suspend_stats/last_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/last_hw_sleep file
+ contains the duration of time spent in a hardware sleep
+ state in the most recent system suspend-resume cycle.
+ This number is measured in microseconds.
+
+What: /sys/power/suspend_stats/total_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/total_hw_sleep file
+ contains the aggregate of time spent in a hardware sleep
+ state since the kernel was booted. This number
+ is measured in microseconds.
+
+What: /sys/power/suspend_stats/max_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/max_hw_sleep file
+ contains the maximum amount of time that the hardware can
+ report for time spent in a hardware sleep state. When sleep
+ cycles are longer than this time, the values for
+ 'total_hw_sleep' and 'last_hw_sleep' may not be accurate.
+ This number is measured in microseconds.
+
+What: /sys/power/sync_on_suspend
+Date: October 2019
+Contact: Jonas Meurer <jonas@freesources.org>
+Description:
+ This file controls whether or not the kernel will sync()
+ filesystems during system suspend (after freezing user space
+ and before suspending devices).
+
+ Writing a "1" to this file enables the sync() and writing a "0"
+ disables it. Reads from the file return the current value.
+ The default is "1" if the build-time "SUSPEND_SKIP_SYNC" config
+ flag is unset, or "0" otherwise.
diff --git a/Documentation/ABI/testing/sysfs-pps-gen b/Documentation/ABI/testing/sysfs-pps-gen
new file mode 100644
index 000000000000..2519207b88fd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps-gen
@@ -0,0 +1,43 @@
+What: /sys/class/pps-gen/
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ The /sys/class/pps-gen/ directory contains files and
+ directories that provide a unified interface to the PPS
+ generators.
+
+What: /sys/class/pps-gen/pps-genX/
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ The /sys/class/pps-gen/pps-genX/ directory is related to X-th
+ PPS generator in the system. Each directory contain files to
+ manage and control its PPS generator.
+
+What: /sys/class/pps-gen/pps-genX/enable
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This write-only file enables or disables generation of the
+ PPS signal.
+
+What: /sys/class/pps-gen/pps-genX/system
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This read-only file returns "1" if the generator takes the
+ timing from the system clock, while it returns "0" if not
+ (i.e. from a peripheral device clock).
+
+What: /sys/class/pps-gen/pps-genX/time
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This read-only file contains the current time stored into the
+ generator clock as two integers representing the current time
+ seconds and nanoseconds.
diff --git a/Documentation/ABI/testing/sysfs-pps-gen-tio b/Documentation/ABI/testing/sysfs-pps-gen-tio
new file mode 100644
index 000000000000..3c34ff17a335
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps-gen-tio
@@ -0,0 +1,6 @@
+What: /sys/class/pps-gen/pps-genx/enable
+Date: April 2025
+KernelVersion: 6.15
+Contact: Subramanian Mohan<subramanian.mohan@intel.com>
+Description:
+ Enable or disable PPS TIO generator output.
diff --git a/Documentation/ABI/testing/sysfs-profiling b/Documentation/ABI/testing/sysfs-profiling
index 8a8e466eb2c0..e39dd3a0ceef 100644
--- a/Documentation/ABI/testing/sysfs-profiling
+++ b/Documentation/ABI/testing/sysfs-profiling
@@ -5,7 +5,7 @@ Description:
/sys/kernel/profiling is the runtime equivalent
of the boot-time profile= option.
- You can get the same effect running:
+ You can get the same effect running::
echo 2 > /sys/kernel/profiling
diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
index a17f817a9309..9c317ac7c47a 100644
--- a/Documentation/ABI/testing/sysfs-ptp
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -6,7 +6,7 @@ Description:
providing a standardized interface to the ancillary
features of PTP hardware clocks.
-What: /sys/class/ptp/ptpN/
+What: /sys/class/ptp/ptp<N>/
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -14,7 +14,7 @@ Description:
hardware clock registered into the PTP class driver
subsystem.
-What: /sys/class/ptp/ptpN/clock_name
+What: /sys/class/ptp/ptp<N>/clock_name
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -25,7 +25,7 @@ Description:
MAC based ones. The string does not necessarily have
to be any kind of unique id.
-What: /sys/class/ptp/ptpN/max_adjustment
+What: /sys/class/ptp/ptp<N>/max_adjustment
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -33,35 +33,55 @@ Description:
frequency adjustment value (a positive integer) in
parts per billion.
-What: /sys/class/ptp/ptpN/n_alarms
+What: /sys/class/ptp/ptp<N>/max_vclocks
+Date: May 2021
+Contact: Yangbo Lu <yangbo.lu@nxp.com>
+Description:
+ This file contains the maximum number of ptp vclocks.
+ Write integer to re-configure it.
+
+What: /sys/class/ptp/ptp<N>/n_alarms
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
This file contains the number of periodic or one shot
alarms offer by the PTP hardware clock.
-What: /sys/class/ptp/ptpN/n_external_timestamps
+What: /sys/class/ptp/ptp<N>/n_external_timestamps
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
This file contains the number of external timestamp
channels offered by the PTP hardware clock.
-What: /sys/class/ptp/ptpN/n_periodic_outputs
+What: /sys/class/ptp/ptp<N>/n_periodic_outputs
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
This file contains the number of programmable periodic
output channels offered by the PTP hardware clock.
-What: /sys/class/ptp/ptpN/n_pins
+What: /sys/class/ptp/ptp<N>/n_pins
Date: March 2014
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
This file contains the number of programmable pins
offered by the PTP hardware clock.
-What: /sys/class/ptp/ptpN/pins
+What: /sys/class/ptp/ptp<N>/n_vclocks
+Date: May 2021
+Contact: Yangbo Lu <yangbo.lu@nxp.com>
+Description:
+ This file contains the number of virtual PTP clocks in
+ use. By default, the value is 0 meaning that only the
+ physical clock is in use. Setting the value creates
+ the corresponding number of virtual clocks and causes
+ the physical clock to become free running. Setting the
+ value back to 0 deletes the virtual clocks and
+ switches the physical clock back to normal, adjustable
+ operation.
+
+What: /sys/class/ptp/ptp<N>/pins
Date: March 2014
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -69,12 +89,12 @@ Description:
pin offered by the PTP hardware clock. The file name
is the hardware dependent pin name. Reading from this
file produces two numbers, the assigned function (see
- the PTP_PF_ enumeration values in linux/ptp_clock.h)
+ the `PTP_PF_` enumeration values in linux/ptp_clock.h)
and the channel number. The function and channel
assignment may be changed by two writing numbers into
the file.
-What: /sys/class/ptp/ptpN/pps_available
+What: /sys/class/ptp/ptp<N>/pps_available
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -83,7 +103,7 @@ Description:
"1" means that the PPS is supported, while "0" means
not supported.
-What: /sys/class/ptp/ptpN/extts_enable
+What: /sys/class/ptp/ptp<N>/extts_enable
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -93,7 +113,7 @@ Description:
To disable external timestamps, write the channel
index followed by a "0" into the file.
-What: /sys/class/ptp/ptpN/fifo
+What: /sys/class/ptp/ptp<N>/fifo
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -101,7 +121,7 @@ Description:
the form of three integers: channel index, seconds,
and nanoseconds.
-What: /sys/class/ptp/ptpN/period
+What: /sys/class/ptp/ptp<N>/period
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
@@ -112,7 +132,7 @@ Description:
period nanoseconds. To disable a periodic output, set
all the seconds and nanoseconds values to zero.
-What: /sys/class/ptp/ptpN/pps_enable
+What: /sys/class/ptp/ptp<N>/pps_enable
Date: September 2010
Contact: Richard Cochran <richardcochran@gmail.com>
Description:
diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar
new file mode 100644
index 000000000000..1016967a730f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-secvar
@@ -0,0 +1,130 @@
+What: /sys/firmware/secvar
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: This directory is created if the POWER firmware supports OS
+ secureboot, thereby secure variables. It exposes interface
+ for reading/writing the secure variables
+
+What: /sys/firmware/secvar/vars
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: This directory lists all the secure variables that are supported
+ by the firmware.
+
+What: /sys/firmware/secvar/format
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: A string indicating which backend is in use by the firmware.
+ This determines the format of the variable and the accepted
+ format of variable updates.
+
+ On powernv/OPAL, this value is provided by the OPAL firmware
+ and is expected to be "ibm,edk2-compat-v1".
+
+ On pseries/PLPKS, this is generated by the kernel based on the
+ version number in the SB_VERSION variable in the keystore. The
+ version numbering in the SB_VERSION variable starts from 1. The
+ format string takes the form "ibm,plpks-sb-v<version>" in the
+ case of dynamic key management mode. If the SB_VERSION variable
+ does not exist (or there is an error while reading it), it takes
+ the form "ibm,plpks-sb-v0", indicating that the key management
+ mode is static.
+
+What: /sys/firmware/secvar/vars/<variable name>
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Each secure variable is represented as a directory named as
+ <variable_name>. The variable name is unique and is in ASCII
+ representation. The data and size can be determined by reading
+ their respective attribute files.
+
+ Only secvars relevant to the key management mode are exposed.
+ Only in the dynamic key management mode should the user have
+ access (read and write) to the secure boot secvars db, dbx,
+ grubdb, grubdbx, and sbat. These secvars are not consumed in the
+ static key management mode. PK, trustedcadb and moduledb are the
+ secvars common to both static and dynamic key management modes.
+
+What: /sys/firmware/secvar/vars/<variable_name>/size
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: An integer representation of the size of the content of the
+ variable. In other words, it represents the size of the data.
+
+What: /sys/firmware/secvar/vars/<variable_name>/data
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: A read-only file containing the value of the variable. The size
+ of the file represents the maximum size of the variable data.
+
+What: /sys/firmware/secvar/vars/<variable_name>/update
+Date: August 2019
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: A write-only file that is used to submit the new value for the
+ variable. The size of the file represents the maximum size of
+ the variable data that can be written.
+
+What: /sys/firmware/secvar/config
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: This optional directory contains read-only config attributes as
+ defined by the secure variable implementation. All data is in
+ ASCII format. The directory is only created if the backing
+ implementation provides variables to populate it, which at
+ present is only PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/version
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Config version as reported by the hypervisor in ASCII decimal
+ format.
+
+ Currently only provided by PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/max_object_size
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Maximum allowed size of objects in the keystore in bytes,
+ represented in ASCII decimal format.
+
+ This is not necessarily the same as the max size that can be
+ written to an update file as writes can contain more than
+ object data, you should use the size of the update file for
+ that purpose.
+
+ Currently only provided by PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/total_size
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Total size of the PLPKS in bytes, represented in ASCII decimal
+ format.
+
+ Currently only provided by PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/used_space
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Current space consumed by the key store, in bytes, represented
+ in ASCII decimal format.
+
+ Currently only provided by PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/supported_policies
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Bitmask of supported policy flags by the hypervisor,
+ represented as an 8 byte hexadecimal ASCII string. Consult the
+ hypervisor documentation for what these flags are.
+
+ Currently only provided by PLPKS on the pseries platform.
+
+What: /sys/firmware/secvar/config/signed_update_algorithms
+Date: February 2023
+Contact: Nayna Jain <nayna@linux.ibm.com>
+Description: Bitmask of flags indicating which algorithms the hypervisor
+ supports for signed update of objects, represented as a 16 byte
+ hexadecimal ASCII string. Consult the hypervisor documentation
+ for what these flags mean.
+
+ Currently only provided by PLPKS on the pseries platform.
diff --git a/Documentation/ABI/testing/sysfs-timecard b/Documentation/ABI/testing/sysfs-timecard
new file mode 100644
index 000000000000..3ae41b7634ac
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-timecard
@@ -0,0 +1,293 @@
+What: /sys/class/timecard/
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: This directory contains files and directories
+ providing a standardized interface to the ancillary
+ features of the OpenCompute timecard.
+
+What: /sys/class/timecard/ocpN/
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: This directory contains the attributes of the Nth timecard
+ registered.
+
+What: /sys/class/timecard/ocpN/available_clock_sources
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) The list of available time sources that the PHC
+ uses for clock adjustments.
+
+ ==== =================================================
+ NONE no adjustments
+ PPS adjustments come from the PPS1 selector (default)
+ TOD adjustments from the GNSS/TOD module
+ IRIG adjustments from external IRIG-B signal
+ DCF adjustments from external DCF signal
+ ==== =================================================
+
+What: /sys/class/timecard/ocpN/available_sma_inputs
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Set of available destinations (sinks) for a SMA
+ input signal.
+
+ ===== ================================================
+ 10Mhz signal is used as the 10Mhz reference clock
+ PPS1 signal is sent to the PPS1 selector
+ PPS2 signal is sent to the PPS2 selector
+ TS1 signal is sent to timestamper 1
+ TS2 signal is sent to timestamper 2
+ TS3 signal is sent to timestamper 3
+ TS4 signal is sent to timestamper 4
+ IRIG signal is sent to the IRIG-B module
+ DCF signal is sent to the DCF module
+ FREQ1 signal is sent to frequency counter 1
+ FREQ2 signal is sent to frequency counter 2
+ FREQ3 signal is sent to frequency counter 3
+ FREQ4 signal is sent to frequency counter 4
+ None signal input is disabled
+ ===== ================================================
+
+What: /sys/class/timecard/ocpN/available_sma_outputs
+Date: May 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Set of available sources for a SMA output signal.
+
+ ===== ================================================
+ 10Mhz output is from the 10Mhz reference clock
+ PHC output PPS is from the PHC clock
+ MAC output PPS is from the Miniature Atomic Clock
+ GNSS1 output PPS is from the first GNSS module
+ GNSS2 output PPS is from the second GNSS module
+ IRIG output is from the PHC, in IRIG-B format
+ DCF output is from the PHC, in DCF format
+ GEN1 output is from frequency generator 1
+ GEN2 output is from frequency generator 2
+ GEN3 output is from frequency generator 3
+ GEN4 output is from frequency generator 4
+ GND output is GND
+ VCC output is VCC
+ ===== ================================================
+
+What: /sys/class/timecard/ocpN/clock_source
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) Contains the current synchronization source used by
+ the PHC. May be changed by writing one of the listed
+ values from the available_clock_sources attribute set.
+
+What: /sys/class/timecard/ocpN/clock_status_drift
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Contains the current drift value used by the firmware
+ for internal disciplining of the atomic clock.
+
+What: /sys/class/timecard/ocpN/clock_status_offset
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Contains the current offset value used by the firmware
+ for internal disciplining of the atomic clock.
+
+What: /sys/class/timecard/ocpN/freqX
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Optional directory containing the sysfs nodes for
+ frequency counter <X>.
+
+What: /sys/class/timecard/ocpN/freqX/frequency
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Contains the measured frequency over the specified
+ measurement period.
+
+What: /sys/class/timecard/ocpN/freqX/seconds
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) Specifies the number of seconds from 0-255 that the
+ frequency should be measured over. Write 0 to disable.
+
+What: /sys/class/timecard/ocpN/genX
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Optional directory containing the sysfs nodes for
+ frequency generator <X>.
+
+What: /sys/class/timecard/ocpN/genX/duty
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Specifies the signal duty cycle as a percentage from 1-99.
+
+What: /sys/class/timecard/ocpN/genX/period
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Specifies the signal period in nanoseconds.
+
+What: /sys/class/timecard/ocpN/genX/phase
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Specifies the signal phase offset in nanoseconds.
+
+What: /sys/class/timecard/ocpN/genX/polarity
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Specifies the signal polarity, either 1 or 0.
+
+What: /sys/class/timecard/ocpN/genX/running
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Either 0 or 1, showing if the signal generator is running.
+
+What: /sys/class/timecard/ocpN/genX/start
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Shows the time in <sec>.<nsec> that the signal generator
+ started running.
+
+What: /sys/class/timecard/ocpN/genX/signal
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) Used to start the signal generator, and summarize
+ the current status.
+
+ The signal generator may be started by writing the signal
+ period, followed by the optional signal values. If the
+ optional values are not provided, they default to the current
+ settings, which may be obtained from the other sysfs nodes.
+
+ period [duty [phase [polarity]]]
+
+ echo 500000000 > signal # 1/2 second period
+ echo 1000000 40 100 > signal
+ echo 0 > signal # turn off generator
+
+ Period and phase are specified in nanoseconds. Duty cycle is
+ a percentage from 1-99. Polarity is 1 or 0.
+
+ Reading this node will return:
+
+ period duty phase polarity start_time
+
+What: /sys/class/timecard/ocpN/gnss_sync
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Indicates whether a valid GNSS signal is received,
+ or when the signal was lost.
+
+What: /sys/class/timecard/ocpN/i2c
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: This optional attribute links to the associated i2c device.
+
+What: /sys/class/timecard/ocpN/irig_b_mode
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) An integer from 0-7 indicating the timecode format
+ of the IRIG-B output signal: B00<n>
+
+What: /sys/class/timecard/ocpN/pps
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: This optional attribute links to the associated PPS device.
+
+What: /sys/class/timecard/ocpN/ptp
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: This attribute links to the associated PTP device.
+
+What: /sys/class/timecard/ocpN/serialnum
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) Provides the serial number of the timecard.
+
+What: /sys/class/timecard/ocpN/sma1
+What: /sys/class/timecard/ocpN/sma2
+What: /sys/class/timecard/ocpN/sma3
+What: /sys/class/timecard/ocpN/sma4
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) These attributes specify the direction of the signal
+ on the associated SMA connectors, and also the signal sink
+ or source.
+
+ The display format of the attribute is a space separated
+ list of signals, prefixed by the input/output direction.
+
+ The signal direction may be changed (if supported) by
+ prefixing the signal list with either "in:" or "out:".
+ If neither prefix is present, then the direction is unchanged.
+
+ The output signal may be changed by writing one of the listed
+ values from the available_sma_outputs attribute set.
+
+ The input destinations may be changed by writing multiple
+ values from the available_sma_inputs attribute set,
+ separated by spaces. If there are duplicated input
+ destinations between connectors, the lowest numbered SMA
+ connector is given priority.
+
+ Note that not all input combinations may make sense.
+
+ The 10Mhz reference clock input is currently only valid
+ on SMA1 and may not be combined with other destination sinks.
+
+What: /sys/class/timecard/ocpN/tod_correction
+Date: March 2022
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) The incoming GNSS signal is in UTC time, and the NMEA
+ format messages do not provide a TAI offset. This sets the
+ correction value for the incoming time.
+
+ If UBX_LS is enabled, this should be 0, and the offset is
+ taken from the UBX-NAV-TIMELS message.
+
+What: /sys/class/timecard/ocpN/ts_window_adjust
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) When retrieving the PHC with the PTP SYS_OFFSET_EXTENDED
+ ioctl, a system timestamp is made before and after the PHC
+ time is retrieved. The midpoint between the two system
+ timestamps is usually taken to be the SYS time associated
+ with the PHC time. This estimate may be wrong, as it depends
+ on PCI latencies, and when the PHC time was latched
+
+ The attribute value reduces the end timestamp by the given
+ number of nanoseconds, so the computed midpoint matches the
+ retrieved PHC time.
+
+ The initial value is set based on measured PCI latency and
+ the estimated point where the FPGA latches the PHC time. This
+ value may be changed by writing an unsigned integer.
+
+What: /sys/class/timecard/ocpN/tty
+Date: August 2024
+Contact: Vadim Fedorenko <vadim.fedorenko@linux.dev>
+Description: (RO) Directory containing the sysfs nodes for TTY attributes
+
+What: /sys/class/timecard/ocpN/tty/ttyGNSS
+What: /sys/class/timecard/ocpN/tty/ttyGNSS2
+Date: August 2024
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) These optional attributes contain names of the TTY serial
+ ports associated with the GNSS devices.
+
+What: /sys/class/timecard/ocpN/tty/ttyMAC
+Date: August 2024
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) This optional attribute contains name of the TTY serial
+ port associated with the Miniature Atomic Clock.
+
+What: /sys/class/timecard/ocpN/tty/ttyNMEA
+Date: August 2024
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RO) This optional attribute contains name of the TTY serial
+ port which outputs the PHC time in NMEA ZDA format.
+
+What: /sys/class/timecard/ocpN/utc_tai_offset
+Date: September 2021
+Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
+Description: (RW) The DCF and IRIG output signals are in UTC, while the
+ TimeCard operates on TAI. This attribute allows setting the
+ offset in seconds, which is added to the TAI timebase for
+ these formats.
+
+ The offset may be changed by writing an unsigned integer.
diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index 9eb3c2b6b040..895c47f05f6f 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -9,7 +9,7 @@ Description:
The file supports poll() to detect virtual
console switches.
-What: /sys/class/tty/tty0/active
+What: /sys/class/tty/tty<x>/active
Date: Nov 2010
Contact: Kay Sievers <kay.sievers@vrfy.org>
Description:
@@ -18,7 +18,7 @@ Description:
The file supports poll() to detect virtual
console switches.
-What: /sys/class/tty/ttyS0/uartclk
+What: /sys/class/tty/ttyS<x>/uartclk
Date: Sep 2012
Contact: Tomas Hlavacek <tmshlvck@gmail.com>
Description:
@@ -29,7 +29,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/type
+What: /sys/class/tty/ttyS<x>/type
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -38,7 +38,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/line
+What: /sys/class/tty/ttyS<x>/line
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -47,7 +47,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/port
+What: /sys/class/tty/ttyS<x>/port
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -56,7 +56,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/irq
+What: /sys/class/tty/ttyS<x>/irq
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -65,7 +65,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/flags
+What: /sys/class/tty/ttyS<x>/flags
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -74,7 +74,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/xmit_fifo_size
+What: /sys/class/tty/ttyS<x>/xmit_fifo_size
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -83,25 +83,28 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/close_delay
+What: /sys/class/tty/ttyS<x>/close_delay
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
- Show the closing delay time for this port in ms.
+ Show the closing delay time for this port in centiseconds.
- These sysfs values expose the TIOCGSERIAL interface via
- sysfs rather than via ioctls.
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/closing_wait
+What: /sys/class/tty/ttyS<x>/closing_wait
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
- Show the close wait time for this port in ms.
+ Show the close wait time for this port in centiseconds.
- These sysfs values expose the TIOCGSERIAL interface via
- sysfs rather than via ioctls.
+ Waiting forever is represented as 0. If waiting on close is
+ disabled then the value is 65535.
-What: /sys/class/tty/ttyS0/custom_divisor
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS<x>/custom_divisor
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -110,7 +113,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/io_type
+What: /sys/class/tty/ttyS<x>/io_type
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -120,7 +123,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/iomem_base
+What: /sys/class/tty/ttyS<x>/iomem_base
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -129,7 +132,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/iomem_reg_shift
+What: /sys/class/tty/ttyS<x>/iomem_reg_shift
Date: October 2012
Contact: Alan Cox <alan@linux.intel.com>
Description:
@@ -139,7 +142,7 @@ Description:
These sysfs values expose the TIOCGSERIAL interface via
sysfs rather than via ioctls.
-What: /sys/class/tty/ttyS0/rx_trig_bytes
+What: /sys/class/tty/ttyS<x>/rx_trig_bytes
Date: May 2014
Contact: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Description:
@@ -154,3 +157,10 @@ Description:
device specification. For example, when user sets 7bytes on
16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
automatically changed to 4 bytes.
+
+What: /sys/class/tty/ttyS<x>/console
+Date: February 2020
+Contact: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Description:
+ Allows user to detach or attach back the given device as
+ kernel console. It shows and accepts a boolean variable.
diff --git a/Documentation/ABI/testing/sysfs-uevent b/Documentation/ABI/testing/sysfs-uevent
index aa39f8d7bcdf..0b6227706b35 100644
--- a/Documentation/ABI/testing/sysfs-uevent
+++ b/Documentation/ABI/testing/sysfs-uevent
@@ -6,42 +6,46 @@ Description:
Enable passing additional variables for synthetic uevents that
are generated by writing /sys/.../uevent file.
- Recognized extended format is ACTION [UUID [KEY=VALUE ...].
+ Recognized extended format is::
- The ACTION is compulsory - it is the name of the uevent action
- ("add", "change", "remove"). There is no change compared to
- previous functionality here. The rest of the extended format
- is optional.
+ ACTION [UUID [KEY=VALUE ...]
+
+ The ACTION is compulsory - it is the name of the uevent
+ action (``add``, ``change``, ``remove``). There is no change
+ compared to previous functionality here. The rest of the
+ extended format is optional.
You need to pass UUID first before any KEY=VALUE pairs.
- The UUID must be in "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+ The UUID must be in ``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``
format where 'x' is a hex digit. The UUID is considered to be
a transaction identifier so it's possible to use the same UUID
value for one or more synthetic uevents in which case we
logically group these uevents together for any userspace
listeners. The UUID value appears in uevent as
- "SYNTH_UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" environment
+ ``SYNTH_UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`` environment
variable.
If UUID is not passed in, the generated synthetic uevent gains
- "SYNTH_UUID=0" environment variable automatically.
+ ``SYNTH_UUID=0`` environment variable automatically.
The KEY=VALUE pairs can contain alphanumeric characters only.
+
It's possible to define zero or more pairs - each pair is then
delimited by a space character ' '. Each pair appears in
- synthetic uevent as "SYNTH_ARG_KEY=VALUE". That means the KEY
- name gains "SYNTH_ARG_" prefix to avoid possible collisions
+ synthetic uevent as ``SYNTH_ARG_KEY=VALUE``. That means the KEY
+ name gains ``SYNTH_ARG_`` prefix to avoid possible collisions
with existing variables.
- Example of valid sequence written to the uevent file:
+ Example of valid sequence written to the uevent file::
add fe4d7c9d-b8c6-4a70-9ef1-3d8a58d18eed A=1 B=abc
- This generates synthetic uevent including these variables:
+ This generates synthetic uevent including these variables::
ACTION=add
SYNTH_ARG_A=1
SYNTH_ARG_B=abc
SYNTH_UUID=fe4d7c9d-b8c6-4a70-9ef1-3d8a58d18eed
+
Users:
udev, userspace tools generating synthetic uevents
diff --git a/Documentation/ABI/testing/sysfs-wusb_cbaf b/Documentation/ABI/testing/sysfs-wusb_cbaf
deleted file mode 100644
index a99c5f86a37a..000000000000
--- a/Documentation/ABI/testing/sysfs-wusb_cbaf
+++ /dev/null
@@ -1,100 +0,0 @@
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_*
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- Various files for managing Cable Based Association of
- (wireless) USB devices.
-
- The sequence of operations should be:
-
- 1. Device is plugged in.
-
- 2. The connection manager (CM) sees a device with CBA capability.
- (the wusb_chid etc. files in /sys/devices/blah/OURDEVICE).
-
- 3. The CM writes the host name, supported band groups,
- and the CHID (host ID) into the wusb_host_name,
- wusb_host_band_groups and wusb_chid files. These
- get sent to the device and the CDID (if any) for
- this host is requested.
-
- 4. The CM can verify that the device's supported band
- groups (wusb_device_band_groups) are compatible
- with the host.
-
- 5. The CM reads the wusb_cdid file.
-
- 6. The CM looks it up its database.
-
- - If it has a matching CHID,CDID entry, the device
- has been authorized before and nothing further
- needs to be done.
-
- - If the CDID is zero (or the CM doesn't find a
- matching CDID in its database), the device is
- assumed to be not known. The CM may associate
- the host with device by: writing a randomly
- generated CDID to wusb_cdid and then a random CK
- to wusb_ck (this uploads the new CC to the
- device).
-
- CMD may choose to prompt the user before
- associating with a new device.
-
- 7. Device is unplugged.
-
- References:
- [WUSB-AM] Association Models Supplement to the
- Certified Wireless Universal Serial Bus
- Specification, version 1.0.
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_chid
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The CHID of the host formatted as 16 space-separated
- hex octets.
-
- Writes fetches device's supported band groups and the
- the CDID for any existing association with this host.
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_host_name
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- A friendly name for the host as a UTF-8 encoded string.
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_host_band_groups
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The band groups supported by the host, in the format
- defined in [WUSB-AM].
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_device_band_groups
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The band groups supported by the device, in the format
- defined in [WUSB-AM].
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_cdid
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- The device's CDID formatted as 16 space-separated hex
- octets.
-
-What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_ck
-Date: August 2008
-KernelVersion: 2.6.27
-Contact: David Vrabel <david.vrabel@csr.com>
-Description:
- Write 16 space-separated random, hex octets to
- associate with the device.
diff --git a/Documentation/ABI/testing/usb-charger-uevent b/Documentation/ABI/testing/usb-charger-uevent
new file mode 100644
index 000000000000..1db89b0cf80f
--- /dev/null
+++ b/Documentation/ABI/testing/usb-charger-uevent
@@ -0,0 +1,54 @@
+What: Raise a uevent when a USB charger is inserted or removed
+Date: 2020-01-14
+KernelVersion: 5.6
+Contact: linux-usb@vger.kernel.org
+Description: There are two USB charger states:
+
+ - USB_CHARGER_ABSENT
+ - USB_CHARGER_PRESENT
+
+ There are five USB charger types:
+
+ ======================== ==========================
+ USB_CHARGER_UNKNOWN_TYPE Charger type is unknown
+ USB_CHARGER_SDP_TYPE Standard Downstream Port
+ USB_CHARGER_CDP_TYPE Charging Downstream Port
+ USB_CHARGER_DCP_TYPE Dedicated Charging Port
+ USB_CHARGER_ACA_TYPE Accessory Charging Adapter
+ ======================== ==========================
+
+ https://www.usb.org/document-library/battery-charging-v12-spec-and-adopters-agreement
+
+ Here are two examples taken using ``udevadm monitor -p`` when
+ USB charger is online::
+
+ UDEV change /devices/soc0/usbphynop1 (platform)
+ ACTION=change
+ DEVPATH=/devices/soc0/usbphynop1
+ DRIVER=usb_phy_generic
+ MODALIAS=of:Nusbphynop1T(null)Cusb-nop-xceiv
+ OF_COMPATIBLE_0=usb-nop-xceiv
+ OF_COMPATIBLE_N=1
+ OF_FULLNAME=/usbphynop1
+ OF_NAME=usbphynop1
+ SEQNUM=2493
+ SUBSYSTEM=platform
+ USB_CHARGER_STATE=USB_CHARGER_PRESENT
+ USB_CHARGER_TYPE=USB_CHARGER_SDP_TYPE
+ USEC_INITIALIZED=227422826
+
+ USB charger is offline::
+
+ KERNEL change /devices/soc0/usbphynop1 (platform)
+ ACTION=change
+ DEVPATH=/devices/soc0/usbphynop1
+ DRIVER=usb_phy_generic
+ MODALIAS=of:Nusbphynop1T(null)Cusb-nop-xceiv
+ OF_COMPATIBLE_0=usb-nop-xceiv
+ OF_COMPATIBLE_N=1
+ OF_FULLNAME=/usbphynop1
+ OF_NAME=usbphynop1
+ SEQNUM=2494
+ SUBSYSTEM=platform
+ USB_CHARGER_STATE=USB_CHARGER_ABSENT
+ USB_CHARGER_TYPE=USB_CHARGER_UNKNOWN_TYPE
diff --git a/Documentation/ABI/testing/usb-uevent b/Documentation/ABI/testing/usb-uevent
new file mode 100644
index 000000000000..2b8eca4bf2b1
--- /dev/null
+++ b/Documentation/ABI/testing/usb-uevent
@@ -0,0 +1,27 @@
+What: Raise a uevent when a USB Host Controller has died
+Date: 2019-04-17
+KernelVersion: 5.2
+Contact: linux-usb@vger.kernel.org
+Description: When the USB Host Controller has entered a state where it is no
+ longer functional a uevent will be raised. The uevent will
+ contain ACTION=offline and ERROR=DEAD.
+
+ Here is an example taken using udevadm monitor -p::
+
+ KERNEL[130.428945] offline /devices/pci0000:00/0000:00:10.0/usb2 (usb)
+ ACTION=offline
+ BUSNUM=002
+ DEVNAME=/dev/bus/usb/002/001
+ DEVNUM=001
+ DEVPATH=/devices/pci0000:00/0000:00:10.0/usb2
+ DEVTYPE=usb_device
+ DRIVER=usb
+ ERROR=DEAD
+ MAJOR=189
+ MINOR=128
+ PRODUCT=1d6b/2/414
+ SEQNUM=2168
+ SUBSYSTEM=usb
+ TYPE=9/0/1
+
+Users: chromium-os-dev@chromium.org
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
deleted file mode 100644
index f0cc3f772265..000000000000
--- a/Documentation/DMA-API-HOWTO.txt
+++ /dev/null
@@ -1,969 +0,0 @@
-=========================
-Dynamic DMA mapping Guide
-=========================
-
-:Author: David S. Miller <davem@redhat.com>
-:Author: Richard Henderson <rth@cygnus.com>
-:Author: Jakub Jelinek <jakub@redhat.com>
-
-This is a guide to device driver writers on how to use the DMA API
-with example pseudo-code. For a concise description of the API, see
-DMA-API.txt.
-
-CPU and DMA addresses
-=====================
-
-There are several kinds of addresses involved in the DMA API, and it's
-important to understand the differences.
-
-The kernel normally uses virtual addresses. Any address returned by
-kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
-be stored in a ``void *``.
-
-The virtual memory system (TLB, page tables, etc.) translates virtual
-addresses to CPU physical addresses, which are stored as "phys_addr_t" or
-"resource_size_t". The kernel manages device resources like registers as
-physical addresses. These are the addresses in /proc/iomem. The physical
-address is not directly useful to a driver; it must use ioremap() to map
-the space and produce a virtual address.
-
-I/O devices use a third kind of address: a "bus address". If a device has
-registers at an MMIO address, or if it performs DMA to read or write system
-memory, the addresses used by the device are bus addresses. In some
-systems, bus addresses are identical to CPU physical addresses, but in
-general they are not. IOMMUs and host bridges can produce arbitrary
-mappings between physical and bus addresses.
-
-From a device's point of view, DMA uses the bus address space, but it may
-be restricted to a subset of that space. For example, even if a system
-supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
-so devices only need to use 32-bit DMA addresses.
-
-Here's a picture and some examples::
-
- CPU CPU Bus
- Virtual Physical Address
- Address Address Space
- Space Space
-
- +-------+ +------+ +------+
- | | |MMIO | Offset | |
- | | Virtual |Space | applied | |
- C +-------+ --------> B +------+ ----------> +------+ A
- | | mapping | | by host | |
- +-----+ | | | | bridge | | +--------+
- | | | | +------+ | | | |
- | CPU | | | | RAM | | | | Device |
- | | | | | | | | | |
- +-----+ +-------+ +------+ +------+ +--------+
- | | Virtual |Buffer| Mapping | |
- X +-------+ --------> Y +------+ <---------- +------+ Z
- | | mapping | RAM | by IOMMU
- | | | |
- | | | |
- +-------+ +------+
-
-During the enumeration process, the kernel learns about I/O devices and
-their MMIO space and the host bridges that connect them to the system. For
-example, if a PCI device has a BAR, the kernel reads the bus address (A)
-from the BAR and converts it to a CPU physical address (B). The address B
-is stored in a struct resource and usually exposed via /proc/iomem. When a
-driver claims a device, it typically uses ioremap() to map physical address
-B at a virtual address (C). It can then use, e.g., ioread32(C), to access
-the device registers at bus address A.
-
-If the device supports DMA, the driver sets up a buffer using kmalloc() or
-a similar interface, which returns a virtual address (X). The virtual
-memory system maps X to a physical address (Y) in system RAM. The driver
-can use virtual address X to access the buffer, but the device itself
-cannot because DMA doesn't go through the CPU virtual memory system.
-
-In some simple systems, the device can do DMA directly to physical address
-Y. But in many others, there is IOMMU hardware that translates DMA
-addresses to physical addresses, e.g., it translates Z to Y. This is part
-of the reason for the DMA API: the driver can give a virtual address X to
-an interface like dma_map_single(), which sets up any required IOMMU
-mapping and returns the DMA address Z. The driver then tells the device to
-do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
-RAM.
-
-So that Linux can use the dynamic DMA mapping, it needs some help from the
-drivers, namely it has to take into account that DMA addresses should be
-mapped only for the time they are actually used and unmapped after the DMA
-transfer.
-
-The following API will work of course even on platforms where no such
-hardware exists.
-
-Note that the DMA API works with any bus independent of the underlying
-microprocessor architecture. You should use the DMA API rather than the
-bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
-pci_map_*() interfaces.
-
-First of all, you should make sure::
-
- #include <linux/dma-mapping.h>
-
-is in your driver, which provides the definition of dma_addr_t. This type
-can hold any valid DMA address for the platform and should be used
-everywhere you hold a DMA address returned from the DMA mapping functions.
-
-What memory is DMA'able?
-========================
-
-The first piece of information you must know is what kernel memory can
-be used with the DMA mapping facilities. There has been an unwritten
-set of rules regarding this, and this text is an attempt to finally
-write them down.
-
-If you acquired your memory via the page allocator
-(i.e. __get_free_page*()) or the generic memory allocators
-(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from
-that memory using the addresses returned from those routines.
-
-This means specifically that you may _not_ use the memory/addresses
-returned from vmalloc() for DMA. It is possible to DMA to the
-_underlying_ memory mapped into a vmalloc() area, but this requires
-walking page tables to get the physical addresses, and then
-translating each of those pages back to a kernel address using
-something like __va(). [ EDIT: Update this when we integrate
-Gerd Knorr's generic code which does this. ]
-
-This rule also means that you may use neither kernel image addresses
-(items in data/text/bss segments), nor module image addresses, nor
-stack addresses for DMA. These could all be mapped somewhere entirely
-different than the rest of physical memory. Even if those classes of
-memory could physically work with DMA, you'd need to ensure the I/O
-buffers were cacheline-aligned. Without that, you'd see cacheline
-sharing problems (data corruption) on CPUs with DMA-incoherent caches.
-(The CPU could write to one word, DMA would write to a different one
-in the same cache line, and one of them could be overwritten.)
-
-Also, this means that you cannot take the return of a kmap()
-call and DMA to/from that. This is similar to vmalloc().
-
-What about block I/O and networking buffers? The block I/O and
-networking subsystems make sure that the buffers they use are valid
-for you to DMA from/to.
-
-DMA addressing limitations
-==========================
-
-Does your device have any DMA addressing limitations? For example, is
-your device only capable of driving the low order 24-bits of address?
-If so, you need to inform the kernel of this fact.
-
-By default, the kernel assumes that your device can address the full
-32-bits. For a 64-bit capable device, this needs to be increased.
-And for a device with limitations, as discussed in the previous
-paragraph, it needs to be decreased.
-
-Special note about PCI: PCI-X specification requires PCI-X devices to
-support 64-bit addressing (DAC) for all transactions. And at least
-one platform (SGI SN2) requires 64-bit consistent allocations to
-operate correctly when the IO bus is in PCI-X mode.
-
-For correct operation, you must interrogate the kernel in your device
-probe routine to see if the DMA controller on the machine can properly
-support the DMA addressing limitation your device has. It is good
-style to do this even if your device holds the default setting,
-because this shows that you did think about these issues wrt. your
-device.
-
-The query is performed via a call to dma_set_mask_and_coherent()::
-
- int dma_set_mask_and_coherent(struct device *dev, u64 mask);
-
-which will query the mask for both streaming and coherent APIs together.
-If you have some special requirements, then the following two separate
-queries can be used instead:
-
- The query for streaming mappings is performed via a call to
- dma_set_mask()::
-
- int dma_set_mask(struct device *dev, u64 mask);
-
- The query for consistent allocations is performed via a call
- to dma_set_coherent_mask()::
-
- int dma_set_coherent_mask(struct device *dev, u64 mask);
-
-Here, dev is a pointer to the device struct of your device, and mask
-is a bit mask describing which bits of an address your device
-supports. It returns zero if your card can perform DMA properly on
-the machine given the address mask you provided. In general, the
-device struct of your device is embedded in the bus-specific device
-struct of your device. For example, &pdev->dev is a pointer to the
-device struct of a PCI device (pdev is a pointer to the PCI device
-struct of your device).
-
-If it returns non-zero, your device cannot perform DMA properly on
-this platform, and attempting to do so will result in undefined
-behavior. You must either use a different mask, or not use DMA.
-
-This means that in the failure case, you have three options:
-
-1) Use another DMA mask, if possible (see below).
-2) Use some non-DMA mode for data transfer, if possible.
-3) Ignore this device and do not initialize it.
-
-It is recommended that your driver print a kernel KERN_WARNING message
-when you end up performing either #2 or #3. In this manner, if a user
-of your driver reports that performance is bad or that the device is not
-even detected, you can ask them for the kernel messages to find out
-exactly why.
-
-The standard 32-bit addressing device would do something like this::
-
- if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
- dev_warn(dev, "mydev: No suitable DMA available\n");
- goto ignore_this_device;
- }
-
-Another common scenario is a 64-bit capable device. The approach here
-is to try for 64-bit addressing, but back down to a 32-bit mask that
-should not fail. The kernel may fail the 64-bit mask not because the
-platform is not capable of 64-bit addressing. Rather, it may fail in
-this case simply because 32-bit addressing is done more efficiently
-than 64-bit addressing. For example, Sparc64 PCI SAC addressing is
-more efficient than DAC addressing.
-
-Here is how you would handle a 64-bit capable device which can drive
-all 64-bits when accessing streaming DMA::
-
- int using_dac;
-
- if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
- using_dac = 1;
- } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
- using_dac = 0;
- } else {
- dev_warn(dev, "mydev: No suitable DMA available\n");
- goto ignore_this_device;
- }
-
-If a card is capable of using 64-bit consistent allocations as well,
-the case would look like this::
-
- int using_dac, consistent_using_dac;
-
- if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
- using_dac = 1;
- consistent_using_dac = 1;
- } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
- using_dac = 0;
- consistent_using_dac = 0;
- } else {
- dev_warn(dev, "mydev: No suitable DMA available\n");
- goto ignore_this_device;
- }
-
-The coherent mask will always be able to set the same or a smaller mask as
-the streaming mask. However for the rare case that a device driver only
-uses consistent allocations, one would have to check the return value from
-dma_set_coherent_mask().
-
-Finally, if your device can only drive the low 24-bits of
-address you might do something like::
-
- if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
- dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
- goto ignore_this_device;
- }
-
-When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
-returns zero, the kernel saves away this mask you have provided. The
-kernel will use this information later when you make DMA mappings.
-
-There is a case which we are aware of at this time, which is worth
-mentioning in this documentation. If your device supports multiple
-functions (for example a sound card provides playback and record
-functions) and the various different functions have _different_
-DMA addressing limitations, you may wish to probe each mask and
-only provide the functionality which the machine can handle. It
-is important that the last call to dma_set_mask() be for the
-most specific mask.
-
-Here is pseudo-code showing how this might be done::
-
- #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32)
- #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24)
-
- struct my_sound_card *card;
- struct device *dev;
-
- ...
- if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
- card->playback_enabled = 1;
- } else {
- card->playback_enabled = 0;
- dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
- card->name);
- }
- if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
- card->record_enabled = 1;
- } else {
- card->record_enabled = 0;
- dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
- card->name);
- }
-
-A sound card was used as an example here because this genre of PCI
-devices seems to be littered with ISA chips given a PCI front end,
-and thus retaining the 16MB DMA addressing limitations of ISA.
-
-Types of DMA mappings
-=====================
-
-There are two types of DMA mappings:
-
-- Consistent DMA mappings which are usually mapped at driver
- initialization, unmapped at the end and for which the hardware should
- guarantee that the device and the CPU can access the data
- in parallel and will see updates made by each other without any
- explicit software flushing.
-
- Think of "consistent" as "synchronous" or "coherent".
-
- The current default is to return consistent memory in the low 32
- bits of the DMA space. However, for future compatibility you should
- set the consistent mask even if this default is fine for your
- driver.
-
- Good examples of what to use consistent mappings for are:
-
- - Network card DMA ring descriptors.
- - SCSI adapter mailbox command data structures.
- - Device firmware microcode executed out of
- main memory.
-
- The invariant these examples all require is that any CPU store
- to memory is immediately visible to the device, and vice
- versa. Consistent mappings guarantee this.
-
- .. important::
-
- Consistent DMA memory does not preclude the usage of
- proper memory barriers. The CPU may reorder stores to
- consistent memory just as it may normal memory. Example:
- if it is important for the device to see the first word
- of a descriptor updated before the second, you must do
- something like::
-
- desc->word0 = address;
- wmb();
- desc->word1 = DESC_VALID;
-
- in order to get correct behavior on all platforms.
-
- Also, on some platforms your driver may need to flush CPU write
- buffers in much the same way as it needs to flush write buffers
- found in PCI bridges (such as by reading a register's value
- after writing it).
-
-- Streaming DMA mappings which are usually mapped for one DMA
- transfer, unmapped right after it (unless you use dma_sync_* below)
- and for which hardware can optimize for sequential accesses.
-
- Think of "streaming" as "asynchronous" or "outside the coherency
- domain".
-
- Good examples of what to use streaming mappings for are:
-
- - Networking buffers transmitted/received by a device.
- - Filesystem buffers written/read by a SCSI device.
-
- The interfaces for using this type of mapping were designed in
- such a way that an implementation can make whatever performance
- optimizations the hardware allows. To this end, when using
- such mappings you must be explicit about what you want to happen.
-
-Neither type of DMA mapping has alignment restrictions that come from
-the underlying bus, although some devices may have such restrictions.
-Also, systems with caches that aren't DMA-coherent will work better
-when the underlying buffers don't share cache lines with other data.
-
-
-Using Consistent DMA mappings
-=============================
-
-To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
-you should do::
-
- dma_addr_t dma_handle;
-
- cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
-
-where device is a ``struct device *``. This may be called in interrupt
-context with the GFP_ATOMIC flag.
-
-Size is the length of the region you want to allocate, in bytes.
-
-This routine will allocate RAM for that region, so it acts similarly to
-__get_free_pages() (but takes size instead of a page order). If your
-driver needs regions sized smaller than a page, you may prefer using
-the dma_pool interface, described below.
-
-The consistent DMA mapping interfaces, for non-NULL dev, will by
-default return a DMA address which is 32-bit addressable. Even if the
-device indicates (via DMA mask) that it may address the upper 32-bits,
-consistent allocation will only return > 32-bit addresses for DMA if
-the consistent DMA mask has been explicitly changed via
-dma_set_coherent_mask(). This is true of the dma_pool interface as
-well.
-
-dma_alloc_coherent() returns two values: the virtual address which you
-can use to access it from the CPU and dma_handle which you pass to the
-card.
-
-The CPU virtual address and the DMA address are both
-guaranteed to be aligned to the smallest PAGE_SIZE order which
-is greater than or equal to the requested size. This invariant
-exists (for example) to guarantee that if you allocate a chunk
-which is smaller than or equal to 64 kilobytes, the extent of the
-buffer you receive will not cross a 64K boundary.
-
-To unmap and free such a DMA region, you call::
-
- dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
-where dev, size are the same as in the above call and cpu_addr and
-dma_handle are the values dma_alloc_coherent() returned to you.
-This function may not be called in interrupt context.
-
-If your driver needs lots of smaller memory regions, you can write
-custom code to subdivide pages returned by dma_alloc_coherent(),
-or you can use the dma_pool API to do that. A dma_pool is like
-a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
-Also, it understands common hardware constraints for alignment,
-like queue heads needing to be aligned on N byte boundaries.
-
-Create a dma_pool like this::
-
- struct dma_pool *pool;
-
- pool = dma_pool_create(name, dev, size, align, boundary);
-
-The "name" is for diagnostics (like a kmem_cache name); dev and size
-are as above. The device's hardware alignment requirement for this
-type of data is "align" (which is expressed in bytes, and must be a
-power of two). If your device has no boundary crossing restrictions,
-pass 0 for boundary; passing 4096 says memory allocated from this pool
-must not cross 4KByte boundaries (but at that time it may be better to
-use dma_alloc_coherent() directly instead).
-
-Allocate memory from a DMA pool like this::
-
- cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
-
-flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
-holding SMP locks), GFP_ATOMIC otherwise. Like dma_alloc_coherent(),
-this returns two values, cpu_addr and dma_handle.
-
-Free memory that was allocated from a dma_pool like this::
-
- dma_pool_free(pool, cpu_addr, dma_handle);
-
-where pool is what you passed to dma_pool_alloc(), and cpu_addr and
-dma_handle are the values dma_pool_alloc() returned. This function
-may be called in interrupt context.
-
-Destroy a dma_pool by calling::
-
- dma_pool_destroy(pool);
-
-Make sure you've called dma_pool_free() for all memory allocated
-from a pool before you destroy the pool. This function may not
-be called in interrupt context.
-
-DMA Direction
-=============
-
-The interfaces described in subsequent portions of this document
-take a DMA direction argument, which is an integer and takes on
-one of the following values::
-
- DMA_BIDIRECTIONAL
- DMA_TO_DEVICE
- DMA_FROM_DEVICE
- DMA_NONE
-
-You should provide the exact DMA direction if you know it.
-
-DMA_TO_DEVICE means "from main memory to the device"
-DMA_FROM_DEVICE means "from the device to main memory"
-It is the direction in which the data moves during the DMA
-transfer.
-
-You are _strongly_ encouraged to specify this as precisely
-as you possibly can.
-
-If you absolutely cannot know the direction of the DMA transfer,
-specify DMA_BIDIRECTIONAL. It means that the DMA can go in
-either direction. The platform guarantees that you may legally
-specify this, and that it will work, but this may be at the
-cost of performance for example.
-
-The value DMA_NONE is to be used for debugging. One can
-hold this in a data structure before you come to know the
-precise direction, and this will help catch cases where your
-direction tracking logic has failed to set things up properly.
-
-Another advantage of specifying this value precisely (outside of
-potential platform-specific optimizations of such) is for debugging.
-Some platforms actually have a write permission boolean which DMA
-mappings can be marked with, much like page protections in the user
-program address space. Such platforms can and do report errors in the
-kernel logs when the DMA controller hardware detects violation of the
-permission setting.
-
-Only streaming mappings specify a direction, consistent mappings
-implicitly have a direction attribute setting of
-DMA_BIDIRECTIONAL.
-
-The SCSI subsystem tells you the direction to use in the
-'sc_data_direction' member of the SCSI command your driver is
-working on.
-
-For Networking drivers, it's a rather simple affair. For transmit
-packets, map/unmap them with the DMA_TO_DEVICE direction
-specifier. For receive packets, just the opposite, map/unmap them
-with the DMA_FROM_DEVICE direction specifier.
-
-Using Streaming DMA mappings
-============================
-
-The streaming DMA mapping routines can be called from interrupt
-context. There are two versions of each map/unmap, one which will
-map/unmap a single memory region, and one which will map/unmap a
-scatterlist.
-
-To map a single region, you do::
-
- struct device *dev = &my_dev->dev;
- dma_addr_t dma_handle;
- void *addr = buffer->ptr;
- size_t size = buffer->len;
-
- dma_handle = dma_map_single(dev, addr, size, direction);
- if (dma_mapping_error(dev, dma_handle)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling;
- }
-
-and to unmap it::
-
- dma_unmap_single(dev, dma_handle, size, direction);
-
-You should call dma_mapping_error() as dma_map_single() could fail and return
-error. Doing so will ensure that the mapping code will work correctly on all
-DMA implementations without any dependency on the specifics of the underlying
-implementation. Using the returned address without checking for errors could
-result in failures ranging from panics to silent data corruption. The same
-applies to dma_map_page() as well.
-
-You should call dma_unmap_single() when the DMA activity is finished, e.g.,
-from the interrupt which told you that the DMA transfer is done.
-
-Using CPU pointers like this for single mappings has a disadvantage:
-you cannot reference HIGHMEM memory in this way. Thus, there is a
-map/unmap interface pair akin to dma_{map,unmap}_single(). These
-interfaces deal with page/offset pairs instead of CPU pointers.
-Specifically::
-
- struct device *dev = &my_dev->dev;
- dma_addr_t dma_handle;
- struct page *page = buffer->page;
- unsigned long offset = buffer->offset;
- size_t size = buffer->len;
-
- dma_handle = dma_map_page(dev, page, offset, size, direction);
- if (dma_mapping_error(dev, dma_handle)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling;
- }
-
- ...
-
- dma_unmap_page(dev, dma_handle, size, direction);
-
-Here, "offset" means byte offset within the given page.
-
-You should call dma_mapping_error() as dma_map_page() could fail and return
-error as outlined under the dma_map_single() discussion.
-
-You should call dma_unmap_page() when the DMA activity is finished, e.g.,
-from the interrupt which told you that the DMA transfer is done.
-
-With scatterlists, you map a region gathered from several regions by::
-
- int i, count = dma_map_sg(dev, sglist, nents, direction);
- struct scatterlist *sg;
-
- for_each_sg(sglist, sg, count, i) {
- hw_address[i] = sg_dma_address(sg);
- hw_len[i] = sg_dma_len(sg);
- }
-
-where nents is the number of entries in the sglist.
-
-The implementation is free to merge several consecutive sglist entries
-into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any
-consecutive sglist entries can be merged into one provided the first one
-ends and the second one starts on a page boundary - in fact this is a huge
-advantage for cards which either cannot do scatter-gather or have very
-limited number of scatter-gather entries) and returns the actual number
-of sg entries it mapped them to. On failure 0 is returned.
-
-Then you should loop count times (note: this can be less than nents times)
-and use sg_dma_address() and sg_dma_len() macros where you previously
-accessed sg->address and sg->length as shown above.
-
-To unmap a scatterlist, just call::
-
- dma_unmap_sg(dev, sglist, nents, direction);
-
-Again, make sure DMA activity has already finished.
-
-.. note::
-
- The 'nents' argument to the dma_unmap_sg call must be
- the _same_ one you passed into the dma_map_sg call,
- it should _NOT_ be the 'count' value _returned_ from the
- dma_map_sg call.
-
-Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
-counterpart, because the DMA address space is a shared resource and
-you could render the machine unusable by consuming all DMA addresses.
-
-If you need to use the same streaming DMA region multiple times and touch
-the data in between the DMA transfers, the buffer needs to be synced
-properly in order for the CPU and device to see the most up-to-date and
-correct copy of the DMA buffer.
-
-So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
-transfer call either::
-
- dma_sync_single_for_cpu(dev, dma_handle, size, direction);
-
-or::
-
- dma_sync_sg_for_cpu(dev, sglist, nents, direction);
-
-as appropriate.
-
-Then, if you wish to let the device get at the DMA area again,
-finish accessing the data with the CPU, and then before actually
-giving the buffer to the hardware call either::
-
- dma_sync_single_for_device(dev, dma_handle, size, direction);
-
-or::
-
- dma_sync_sg_for_device(dev, sglist, nents, direction);
-
-as appropriate.
-
-.. note::
-
- The 'nents' argument to dma_sync_sg_for_cpu() and
- dma_sync_sg_for_device() must be the same passed to
- dma_map_sg(). It is _NOT_ the count returned by
- dma_map_sg().
-
-After the last DMA transfer call one of the DMA unmap routines
-dma_unmap_{single,sg}(). If you don't touch the data from the first
-dma_map_*() call till dma_unmap_*(), then you don't have to call the
-dma_sync_*() routines at all.
-
-Here is pseudo code which shows a situation in which you would need
-to use the dma_sync_*() interfaces::
-
- my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
- {
- dma_addr_t mapping;
-
- mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
- if (dma_mapping_error(cp->dev, mapping)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling;
- }
-
- cp->rx_buf = buffer;
- cp->rx_len = len;
- cp->rx_dma = mapping;
-
- give_rx_buf_to_card(cp);
- }
-
- ...
-
- my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs)
- {
- struct my_card *cp = devid;
-
- ...
- if (read_card_status(cp) == RX_BUF_TRANSFERRED) {
- struct my_card_header *hp;
-
- /* Examine the header to see if we wish
- * to accept the data. But synchronize
- * the DMA transfer with the CPU first
- * so that we see updated contents.
- */
- dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
- cp->rx_len,
- DMA_FROM_DEVICE);
-
- /* Now it is safe to examine the buffer. */
- hp = (struct my_card_header *) cp->rx_buf;
- if (header_is_ok(hp)) {
- dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
- DMA_FROM_DEVICE);
- pass_to_upper_layers(cp->rx_buf);
- make_and_setup_new_rx_buf(cp);
- } else {
- /* CPU should not write to
- * DMA_FROM_DEVICE-mapped area,
- * so dma_sync_single_for_device() is
- * not needed here. It would be required
- * for DMA_BIDIRECTIONAL mapping if
- * the memory was modified.
- */
- give_rx_buf_to_card(cp);
- }
- }
- }
-
-Drivers converted fully to this interface should not use virt_to_bus() any
-longer, nor should they use bus_to_virt(). Some drivers have to be changed a
-little bit, because there is no longer an equivalent to bus_to_virt() in the
-dynamic DMA mapping scheme - you have to always store the DMA addresses
-returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
-calls (dma_map_sg() stores them in the scatterlist itself if the platform
-supports dynamic DMA mapping in hardware) in your driver structures and/or
-in the card registers.
-
-All drivers should be using these interfaces with no exceptions. It
-is planned to completely remove virt_to_bus() and bus_to_virt() as
-they are entirely deprecated. Some ports already do not provide these
-as it is impossible to correctly support them.
-
-Handling Errors
-===============
-
-DMA address space is limited on some architectures and an allocation
-failure can be determined by:
-
-- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
-
-- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
- by using dma_mapping_error()::
-
- dma_addr_t dma_handle;
-
- dma_handle = dma_map_single(dev, addr, size, direction);
- if (dma_mapping_error(dev, dma_handle)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling;
- }
-
-- unmap pages that are already mapped, when mapping error occurs in the middle
- of a multiple page mapping attempt. These example are applicable to
- dma_map_page() as well.
-
-Example 1::
-
- dma_addr_t dma_handle1;
- dma_addr_t dma_handle2;
-
- dma_handle1 = dma_map_single(dev, addr, size, direction);
- if (dma_mapping_error(dev, dma_handle1)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling1;
- }
- dma_handle2 = dma_map_single(dev, addr, size, direction);
- if (dma_mapping_error(dev, dma_handle2)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling2;
- }
-
- ...
-
- map_error_handling2:
- dma_unmap_single(dma_handle1);
- map_error_handling1:
-
-Example 2::
-
- /*
- * if buffers are allocated in a loop, unmap all mapped buffers when
- * mapping error is detected in the middle
- */
-
- dma_addr_t dma_addr;
- dma_addr_t array[DMA_BUFFERS];
- int save_index = 0;
-
- for (i = 0; i < DMA_BUFFERS; i++) {
-
- ...
-
- dma_addr = dma_map_single(dev, addr, size, direction);
- if (dma_mapping_error(dev, dma_addr)) {
- /*
- * reduce current DMA mapping usage,
- * delay and try again later or
- * reset driver.
- */
- goto map_error_handling;
- }
- array[i].dma_addr = dma_addr;
- save_index++;
- }
-
- ...
-
- map_error_handling:
-
- for (i = 0; i < save_index; i++) {
-
- ...
-
- dma_unmap_single(array[i].dma_addr);
- }
-
-Networking drivers must call dev_kfree_skb() to free the socket buffer
-and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
-(ndo_start_xmit). This means that the socket buffer is just dropped in
-the failure case.
-
-SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
-fails in the queuecommand hook. This means that the SCSI subsystem
-passes the command to the driver again later.
-
-Optimizing Unmap State Space Consumption
-========================================
-
-On many platforms, dma_unmap_{single,page}() is simply a nop.
-Therefore, keeping track of the mapping address and length is a waste
-of space. Instead of filling your drivers up with ifdefs and the like
-to "work around" this (which would defeat the whole purpose of a
-portable API) the following facilities are provided.
-
-Actually, instead of describing the macros one by one, we'll
-transform some example code.
-
-1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
- Example, before::
-
- struct ring_state {
- struct sk_buff *skb;
- dma_addr_t mapping;
- __u32 len;
- };
-
- after::
-
- struct ring_state {
- struct sk_buff *skb;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- DEFINE_DMA_UNMAP_LEN(len);
- };
-
-2) Use dma_unmap_{addr,len}_set() to set these values.
- Example, before::
-
- ringp->mapping = FOO;
- ringp->len = BAR;
-
- after::
-
- dma_unmap_addr_set(ringp, mapping, FOO);
- dma_unmap_len_set(ringp, len, BAR);
-
-3) Use dma_unmap_{addr,len}() to access these values.
- Example, before::
-
- dma_unmap_single(dev, ringp->mapping, ringp->len,
- DMA_FROM_DEVICE);
-
- after::
-
- dma_unmap_single(dev,
- dma_unmap_addr(ringp, mapping),
- dma_unmap_len(ringp, len),
- DMA_FROM_DEVICE);
-
-It really should be self-explanatory. We treat the ADDR and LEN
-separately, because it is possible for an implementation to only
-need the address in order to perform the unmap operation.
-
-Platform Issues
-===============
-
-If you are just writing drivers for Linux and do not maintain
-an architecture port for the kernel, you can safely skip down
-to "Closing".
-
-1) Struct scatterlist requirements.
-
- You need to enable CONFIG_NEED_SG_DMA_LENGTH if the architecture
- supports IOMMUs (including software IOMMU).
-
-2) ARCH_DMA_MINALIGN
-
- Architectures must ensure that kmalloc'ed buffer is
- DMA-safe. Drivers and subsystems depend on it. If an architecture
- isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
- the CPU cache is identical to data in main memory),
- ARCH_DMA_MINALIGN must be set so that the memory allocator
- makes sure that kmalloc'ed buffer doesn't share a cache line with
- the others. See arch/arm/include/asm/cache.h as an example.
-
- Note that ARCH_DMA_MINALIGN is about DMA memory alignment
- constraints. You don't need to worry about the architecture data
- alignment constraints (e.g. the alignment constraints about 64-bit
- objects).
-
-Closing
-=======
-
-This document, and the API itself, would not be in its current
-form without the feedback and suggestions from numerous individuals.
-We would like to specifically mention, in no particular order, the
-following people::
-
- Russell King <rmk@arm.linux.org.uk>
- Leo Dagum <dagum@barrel.engr.sgi.com>
- Ralf Baechle <ralf@oss.sgi.com>
- Grant Grundler <grundler@cup.hp.com>
- Jay Estabrook <Jay.Estabrook@compaq.com>
- Thomas Sailer <sailer@ife.ee.ethz.ch>
- Andrea Arcangeli <andrea@suse.de>
- Jens Axboe <jens.axboe@oracle.com>
- David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
deleted file mode 100644
index 7a7d8a415ce8..000000000000
--- a/Documentation/DMA-API.txt
+++ /dev/null
@@ -1,768 +0,0 @@
-============================================
-Dynamic DMA mapping using the generic device
-============================================
-
-:Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
-
-This document describes the DMA API. For a more gentle introduction
-of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
-
-This API is split into two pieces. Part I describes the basic API.
-Part II describes extensions for supporting non-consistent memory
-machines. Unless you know that your driver absolutely has to support
-non-consistent platforms (this is usually only legacy platforms) you
-should only use the API described in part I.
-
-Part I - dma_API
-----------------
-
-To get the dma_API, you must #include <linux/dma-mapping.h>. This
-provides dma_addr_t and the interfaces described below.
-
-A dma_addr_t can hold any valid DMA address for the platform. It can be
-given to a device to use as a DMA source or target. A CPU cannot reference
-a dma_addr_t directly because there may be translation between its physical
-address space and the DMA address space.
-
-Part Ia - Using large DMA-coherent buffers
-------------------------------------------
-
-::
-
- void *
- dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-
-Consistent memory is memory for which a write by either the device or
-the processor can immediately be read by the processor or device
-without having to worry about caching effects. (You may however need
-to make sure to flush the processor's write buffers before telling
-devices to read that memory.)
-
-This routine allocates a region of <size> bytes of consistent memory.
-
-It returns a pointer to the allocated region (in the processor's virtual
-address space) or NULL if the allocation failed.
-
-It also returns a <dma_handle> which may be cast to an unsigned integer the
-same width as the bus and given to the device as the DMA address base of
-the region.
-
-Note: consistent memory can be expensive on some platforms, and the
-minimum allocation length may be as big as a page, so you should
-consolidate your requests for consistent memory as much as possible.
-The simplest way to do that is to use the dma_pool calls (see below).
-
-The flag parameter (dma_alloc_coherent() only) allows the caller to
-specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
-implementation may choose to ignore flags that affect the location of
-the returned memory, like GFP_DMA).
-
-::
-
- void *
- dma_zalloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-
-Wraps dma_alloc_coherent() and also zeroes the returned memory if the
-allocation attempt succeeded.
-
-::
-
- void
- dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-
-Free a region of consistent memory you previously allocated. dev,
-size and dma_handle must all be the same as those passed into
-dma_alloc_coherent(). cpu_addr must be the virtual address returned by
-the dma_alloc_coherent().
-
-Note that unlike their sibling allocation calls, these routines
-may only be called with IRQs enabled.
-
-
-Part Ib - Using small DMA-coherent buffers
-------------------------------------------
-
-To get this part of the dma_API, you must #include <linux/dmapool.h>
-
-Many drivers need lots of small DMA-coherent memory regions for DMA
-descriptors or I/O buffers. Rather than allocating in units of a page
-or more using dma_alloc_coherent(), you can use DMA pools. These work
-much like a struct kmem_cache, except that they use the DMA-coherent allocator,
-not __get_free_pages(). Also, they understand common hardware constraints
-for alignment, like queue heads needing to be aligned on N-byte boundaries.
-
-
-::
-
- struct dma_pool *
- dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t alloc);
-
-dma_pool_create() initializes a pool of DMA-coherent buffers
-for use with a given device. It must be called in a context which
-can sleep.
-
-The "name" is for diagnostics (like a struct kmem_cache name); dev and size
-are like what you'd pass to dma_alloc_coherent(). The device's hardware
-alignment requirement for this type of data is "align" (which is expressed
-in bytes, and must be a power of two). If your device has no boundary
-crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
-from this pool must not cross 4KByte boundaries.
-
-::
-
- void *
- dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
- dma_addr_t *handle)
-
-Wraps dma_pool_alloc() and also zeroes the returned memory if the
-allocation attempt succeeded.
-
-
-::
-
- void *
- dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
- dma_addr_t *dma_handle);
-
-This allocates memory from the pool; the returned memory will meet the
-size and alignment requirements specified at creation time. Pass
-GFP_ATOMIC to prevent blocking, or if it's permitted (not
-in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
-blocking. Like dma_alloc_coherent(), this returns two values: an
-address usable by the CPU, and the DMA address usable by the pool's
-device.
-
-::
-
- void
- dma_pool_free(struct dma_pool *pool, void *vaddr,
- dma_addr_t addr);
-
-This puts memory back into the pool. The pool is what was passed to
-dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
-were returned when that routine allocated the memory being freed.
-
-::
-
- void
- dma_pool_destroy(struct dma_pool *pool);
-
-dma_pool_destroy() frees the resources of the pool. It must be
-called in a context which can sleep. Make sure you've freed all allocated
-memory back to the pool before you destroy it.
-
-
-Part Ic - DMA addressing limitations
-------------------------------------
-
-::
-
- int
- dma_set_mask_and_coherent(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-streaming and coherent DMA mask parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
- int
- dma_set_mask(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
- int
- dma_set_coherent_mask(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
- u64
- dma_get_required_mask(struct device *dev)
-
-This API returns the mask that the platform requires to
-operate efficiently. Usually this means the returned mask
-is the minimum required to cover all of memory. Examining the
-required mask gives drivers with variable descriptor sizes the
-opportunity to use smaller descriptors as necessary.
-
-Requesting the required mask does not alter the current mask. If you
-wish to take advantage of it, you should issue a dma_set_mask()
-call to set the mask to the value returned.
-
-
-Part Id - Streaming DMA mappings
---------------------------------
-
-::
-
- dma_addr_t
- dma_map_single(struct device *dev, void *cpu_addr, size_t size,
- enum dma_data_direction direction)
-
-Maps a piece of processor virtual memory so it can be accessed by the
-device and returns the DMA address of the memory.
-
-The direction for both APIs may be converted freely by casting.
-However the dma_API uses a strongly typed enumerator for its
-direction:
-
-======================= =============================================
-DMA_NONE no direction (used for debugging)
-DMA_TO_DEVICE data is going from the memory to the device
-DMA_FROM_DEVICE data is coming from the device to the memory
-DMA_BIDIRECTIONAL direction isn't known
-======================= =============================================
-
-.. note::
-
- Not all memory regions in a machine can be mapped by this API.
- Further, contiguous kernel virtual space may not be contiguous as
- physical memory. Since this API does not provide any scatter/gather
- capability, it will fail if the user tries to map a non-physically
- contiguous piece of memory. For this reason, memory to be mapped by
- this API should be obtained from sources which guarantee it to be
- physically contiguous (like kmalloc).
-
- Further, the DMA address of the memory must be within the
- dma_mask of the device (the dma_mask is a bit mask of the
- addressable region for the device, i.e., if the DMA address of
- the memory ANDed with the dma_mask is still equal to the DMA
- address, then the device can perform DMA to the memory). To
- ensure that the memory allocated by kmalloc is within the dma_mask,
- the driver may specify various platform-dependent flags to restrict
- the DMA address range of the allocation (e.g., on x86, GFP_DMA
- guarantees to be within the first 16MB of available DMA addresses,
- as required by ISA devices).
-
- Note also that the above constraints on physical contiguity and
- dma_mask may not apply if the platform has an IOMMU (a device which
- maps an I/O DMA address to a physical memory address). However, to be
- portable, device driver writers may *not* assume that such an IOMMU
- exists.
-
-.. warning::
-
- Memory coherency operates at a granularity called the cache
- line width. In order for memory mapped by this API to operate
- correctly, the mapped region must begin exactly on a cache line
- boundary and end exactly on one (to prevent two separately mapped
- regions from sharing a single cache line). Since the cache line size
- may not be known at compile time, the API will not enforce this
- requirement. Therefore, it is recommended that driver writers who
- don't take special care to determine the cache line size at run time
- only map virtual regions that begin and end on page boundaries (which
- are guaranteed also to be cache line boundaries).
-
- DMA_TO_DEVICE synchronisation must be done after the last modification
- of the memory region by the software and before it is handed off to
- the device. Once this primitive is used, memory covered by this
- primitive should be treated as read-only by the device. If the device
- may write to it at any point, it should be DMA_BIDIRECTIONAL (see
- below).
-
- DMA_FROM_DEVICE synchronisation must be done before the driver
- accesses data that may be changed by the device. This memory should
- be treated as read-only by the driver. If the driver needs to write
- to it at any point, it should be DMA_BIDIRECTIONAL (see below).
-
- DMA_BIDIRECTIONAL requires special handling: it means that the driver
- isn't sure if the memory was modified before being handed off to the
- device and also isn't sure if the device will also modify it. Thus,
- you must always sync bidirectional memory twice: once before the
- memory is handed off to the device (to make sure all memory changes
- are flushed from the processor) and once before the data may be
- accessed after being used by the device (to make sure any processor
- cache lines are updated with data that the device may have changed).
-
-::
-
- void
- dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
-
-Unmaps the region previously mapped. All the parameters passed in
-must be identical to those passed in (and returned) by the mapping
-API.
-
-::
-
- dma_addr_t
- dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-
- void
- dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
-
-API for mapping and unmapping for pages. All the notes and warnings
-for the other mapping APIs apply here. Also, although the <offset>
-and <size> parameters are provided to do partial page mapping, it is
-recommended that you never use these unless you really know what the
-cache width is.
-
-::
-
- dma_addr_t
- dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-
- void
- dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-
-API for mapping and unmapping for MMIO resources. All the notes and
-warnings for the other mapping APIs apply here. The API should only be
-used to map device MMIO resources, mapping of RAM is not permitted.
-
-::
-
- int
- dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-
-In some circumstances dma_map_single(), dma_map_page() and dma_map_resource()
-will fail to create a mapping. A driver can check for these errors by testing
-the returned DMA address with dma_mapping_error(). A non-zero return value
-means the mapping could not be created and the driver should take appropriate
-action (e.g. reduce current DMA mapping usage or delay and try again later).
-
-::
-
- int
- dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
-
-Returns: the number of DMA address segments mapped (this may be shorter
-than <nents> passed in if some elements of the scatter/gather list are
-physically or virtually adjacent and an IOMMU maps them with a single
-entry).
-
-Please note that the sg cannot be mapped again if it has been mapped once.
-The mapping process is allowed to destroy information in the sg.
-
-As with the other mapping interfaces, dma_map_sg() can fail. When it
-does, 0 is returned and a driver must take appropriate action. It is
-critical that the driver do something, in the case of a block driver
-aborting the request or even oopsing is better than doing nothing and
-corrupting the filesystem.
-
-With scatterlists, you use the resulting mapping like this::
-
- int i, count = dma_map_sg(dev, sglist, nents, direction);
- struct scatterlist *sg;
-
- for_each_sg(sglist, sg, count, i) {
- hw_address[i] = sg_dma_address(sg);
- hw_len[i] = sg_dma_len(sg);
- }
-
-where nents is the number of entries in the sglist.
-
-The implementation is free to merge several consecutive sglist entries
-into one (e.g. with an IOMMU, or if several pages just happen to be
-physically contiguous) and returns the actual number of sg entries it
-mapped them to. On failure 0, is returned.
-
-Then you should loop count times (note: this can be less than nents times)
-and use sg_dma_address() and sg_dma_len() macros where you previously
-accessed sg->address and sg->length as shown above.
-
-::
-
- void
- dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
-
-Unmap the previously mapped scatter/gather list. All the parameters
-must be the same as those and passed in to the scatter/gather mapping
-API.
-
-Note: <nents> must be the number you passed in, *not* the number of
-DMA address entries returned.
-
-::
-
- void
- dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
- size_t size,
- enum dma_data_direction direction)
-
- void
- dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
- size_t size,
- enum dma_data_direction direction)
-
- void
- dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nents,
- enum dma_data_direction direction)
-
- void
- dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nents,
- enum dma_data_direction direction)
-
-Synchronise a single contiguous or scatter/gather mapping for the CPU
-and device. With the sync_sg API, all the parameters must be the same
-as those passed into the single mapping API. With the sync_single API,
-you can use dma_handle and size parameters that aren't identical to
-those passed into the single mapping API to do a partial sync.
-
-
-.. note::
-
- You must do this:
-
- - Before reading values that have been written by DMA from the device
- (use the DMA_FROM_DEVICE direction)
- - After writing values that will be written to the device using DMA
- (use the DMA_TO_DEVICE) direction
- - before *and* after handing memory to the device if the memory is
- DMA_BIDIRECTIONAL
-
-See also dma_map_single().
-
-::
-
- dma_addr_t
- dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-
- void
- dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-
- int
- dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-
- void
- dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-
-The four functions above are just like the counterpart functions
-without the _attrs suffixes, except that they pass an optional
-dma_attrs.
-
-The interpretation of DMA attributes is architecture-specific, and
-each attribute should be documented in Documentation/DMA-attributes.txt.
-
-If dma_attrs are 0, the semantics of each of these functions
-is identical to those of the corresponding function
-without the _attrs suffix. As a result dma_map_single_attrs()
-can generally replace dma_map_single(), etc.
-
-As an example of the use of the ``*_attrs`` functions, here's how
-you could pass an attribute DMA_ATTR_FOO when mapping memory
-for DMA::
-
- #include <linux/dma-mapping.h>
- /* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
- * documented in Documentation/DMA-attributes.txt */
- ...
-
- unsigned long attr;
- attr |= DMA_ATTR_FOO;
- ....
- n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
- ....
-
-Architectures that care about DMA_ATTR_FOO would check for its
-presence in their implementations of the mapping and unmapping
-routines, e.g.:::
-
- void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
- {
- ....
- if (attrs & DMA_ATTR_FOO)
- /* twizzle the frobnozzle */
- ....
- }
-
-
-Part II - Advanced dma usage
-----------------------------
-
-Warning: These pieces of the DMA API should not be used in the
-majority of cases, since they cater for unlikely corner cases that
-don't belong in usual drivers.
-
-If you don't understand how cache line coherency works between a
-processor and an I/O device, you should not be using this part of the
-API at all.
-
-::
-
- void *
- dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t flag, unsigned long attrs)
-
-Identical to dma_alloc_coherent() except that when the
-DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the
-platform will choose to return either consistent or non-consistent memory
-as it sees fit. By using this API, you are guaranteeing to the platform
-that you have all the correct and necessary sync points for this memory
-in the driver should it choose to return non-consistent memory.
-
-Note: where the platform can return consistent memory, it will
-guarantee that the sync points become nops.
-
-Warning: Handling non-consistent memory is a real pain. You should
-only use this API if you positively know your driver will be
-required to work on one of the rare (usually non-PCI) architectures
-that simply cannot make consistent memory.
-
-::
-
- void
- dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle, unsigned long attrs)
-
-Free memory allocated by the dma_alloc_attrs(). All parameters common
-parameters must identical to those otherwise passed to dma_fre_coherent,
-and the attrs argument must be identical to the attrs passed to
-dma_alloc_attrs().
-
-::
-
- int
- dma_get_cache_alignment(void)
-
-Returns the processor cache alignment. This is the absolute minimum
-alignment *and* width that you must observe when either mapping
-memory or doing partial flushes.
-
-.. note::
-
- This API may return a number *larger* than the actual cache
- line, but it will guarantee that one or more cache lines fit exactly
- into the width returned by this call. It will also always be a power
- of two for easy alignment.
-
-::
-
- void
- dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
-
-Do a partial sync of memory that was allocated by dma_alloc_attrs() with
-the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and
-continuing on for size. Again, you *must* observe the cache line
-boundaries when doing this.
-
-::
-
- int
- dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
- dma_addr_t device_addr, size_t size, int
- flags)
-
-Declare region of memory to be handed out by dma_alloc_coherent() when
-it's asked for coherent memory for this device.
-
-phys_addr is the CPU physical address to which the memory is currently
-assigned (this will be ioremapped so the CPU can access the region).
-
-device_addr is the DMA address the device needs to be programmed
-with to actually address this memory (this will be handed out as the
-dma_addr_t in dma_alloc_coherent()).
-
-size is the size of the area (must be multiples of PAGE_SIZE).
-
-flags can be ORed together and are:
-
-- DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions.
- Do not allow dma_alloc_coherent() to fall back to system memory when
- it's out of memory in the declared region.
-
-As a simplification for the platforms, only *one* such region of
-memory may be declared per device.
-
-For reasons of efficiency, most platforms choose to track the declared
-region only at the granularity of a page. For smaller allocations,
-you should use the dma_pool() API.
-
-::
-
- void
- dma_release_declared_memory(struct device *dev)
-
-Remove the memory region previously declared from the system. This
-API performs *no* in-use checking for this region and will return
-unconditionally having removed all the required structures. It is the
-driver's job to ensure that no parts of this memory region are
-currently in use.
-
-::
-
- void *
- dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-
-This is used to occupy specific regions of the declared space
-(dma_alloc_coherent() will hand out the first free region it finds).
-
-device_addr is the *device* address of the region requested.
-
-size is the size (and should be a page-sized multiple).
-
-The return value will be either a pointer to the processor virtual
-address of the memory, or an error (via PTR_ERR()) if any part of the
-region is occupied.
-
-Part III - Debug drivers use of the DMA-API
--------------------------------------------
-
-The DMA-API as described above has some constraints. DMA addresses must be
-released with the corresponding function with the same size for example. With
-the advent of hardware IOMMUs it becomes more and more important that drivers
-do not violate those constraints. In the worst case such a violation can
-result in data corruption up to destroyed filesystems.
-
-To debug drivers and find bugs in the usage of the DMA-API checking code can
-be compiled into the kernel which will tell the developer about those
-violations. If your architecture supports it you can select the "Enable
-debugging of DMA-API usage" option in your kernel configuration. Enabling this
-option has a performance impact. Do not enable it in production kernels.
-
-If you boot the resulting kernel will contain code which does some bookkeeping
-about what DMA memory was allocated for which device. If this code detects an
-error it prints a warning message with some details into your kernel log. An
-example warning message may look like this::
-
- WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
- check_unmap+0x203/0x490()
- Hardware name:
- forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
- function [device address=0x00000000640444be] [size=66 bytes] [mapped as
- single] [unmapped as page]
- Modules linked in: nfsd exportfs bridge stp llc r8169
- Pid: 0, comm: swapper Tainted: G W 2.6.28-dmatest-09289-g8bb99c0 #1
- Call Trace:
- <IRQ> [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
- [<ffffffff80647b70>] _spin_unlock+0x10/0x30
- [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
- [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
- [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
- [<ffffffff80252f96>] queue_work+0x56/0x60
- [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
- [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
- [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
- [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
- [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
- [<ffffffff803c7ea3>] check_unmap+0x203/0x490
- [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
- [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
- [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
- [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
- [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
- [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
- [<ffffffff8020c093>] ret_from_intr+0x0/0xa
- <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
-
-The driver developer can find the driver and the device including a stacktrace
-of the DMA-API call which caused this warning.
-
-Per default only the first error will result in a warning message. All other
-errors will only silently counted. This limitation exist to prevent the code
-from flooding your kernel log. To support debugging a device driver this can
-be disabled via debugfs. See the debugfs interface documentation below for
-details.
-
-The debugfs directory for the DMA-API debugging code is called dma-api/. In
-this directory the following files can currently be found:
-
-=============================== ===============================================
-dma-api/all_errors This file contains a numeric value. If this
- value is not equal to zero the debugging code
- will print a warning for every error it finds
- into the kernel log. Be careful with this
- option, as it can easily flood your logs.
-
-dma-api/disabled This read-only file contains the character 'Y'
- if the debugging code is disabled. This can
- happen when it runs out of memory or if it was
- disabled at boot time
-
-dma-api/error_count This file is read-only and shows the total
- numbers of errors found.
-
-dma-api/num_errors The number in this file shows how many
- warnings will be printed to the kernel log
- before it stops. This number is initialized to
- one at system boot and be set by writing into
- this file
-
-dma-api/min_free_entries This read-only file can be read to get the
- minimum number of free dma_debug_entries the
- allocator has ever seen. If this value goes
- down to zero the code will attempt to increase
- nr_total_entries to compensate.
-
-dma-api/num_free_entries The current number of free dma_debug_entries
- in the allocator.
-
-dma-api/nr_total_entries The total number of dma_debug_entries in the
- allocator, both free and used.
-
-dma-api/driver-filter You can write a name of a driver into this file
- to limit the debug output to requests from that
- particular driver. Write an empty string to
- that file to disable the filter and see
- all errors again.
-=============================== ===============================================
-
-If you have this code compiled into your kernel it will be enabled by default.
-If you want to boot without the bookkeeping anyway you can provide
-'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
-Notice that you can not enable it again at runtime. You have to reboot to do
-so.
-
-If you want to see debug messages only for a special device driver you can
-specify the dma_debug_driver=<drivername> parameter. This will enable the
-driver filter at boot time. The debug code will only print errors for that
-driver afterwards. This filter can be disabled or changed later using debugfs.
-
-When the code disables itself at runtime this is most likely because it ran
-out of dma_debug_entries and was unable to allocate more on-demand. 65536
-entries are preallocated at boot - if this is too low for you boot with
-'dma_debug_entries=<your_desired_number>' to overwrite the default. The
-code will print to the kernel log each time it has dynamically allocated
-as many entries as were initially preallocated. This is to indicate that a
-larger preallocation size may be appropriate, or if it happens continually
-that a driver may be leaking mappings.
-
-::
-
- void
- debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
-
-dma-debug interface debug_dma_mapping_error() to debug drivers that fail
-to check DMA mapping errors on addresses returned by dma_map_single() and
-dma_map_page() interfaces. This interface clears a flag set by
-debug_dma_map_page() to indicate that dma_mapping_error() has been called by
-the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
-this flag is still set, prints warning message that includes call trace that
-leads up to the unmap. This interface can be called from dma_mapping_error()
-routines to enable DMA mapping error check debugging.
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
deleted file mode 100644
index 8c2b8be6e45b..000000000000
--- a/Documentation/DMA-ISA-LPC.txt
+++ /dev/null
@@ -1,152 +0,0 @@
-============================
-DMA with ISA and LPC devices
-============================
-
-:Author: Pierre Ossman <drzeus@drzeus.cx>
-
-This document describes how to do DMA transfers using the old ISA DMA
-controller. Even though ISA is more or less dead today the LPC bus
-uses the same DMA system so it will be around for quite some time.
-
-Headers and dependencies
-------------------------
-
-To do ISA style DMA you need to include two headers::
-
- #include <linux/dma-mapping.h>
- #include <asm/dma.h>
-
-The first is the generic DMA API used to convert virtual addresses to
-bus addresses (see Documentation/DMA-API.txt for details).
-
-The second contains the routines specific to ISA DMA transfers. Since
-this is not present on all platforms make sure you construct your
-Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
-to build your driver on unsupported platforms.
-
-Buffer allocation
------------------
-
-The ISA DMA controller has some very strict requirements on which
-memory it can access so extra care must be taken when allocating
-buffers.
-
-(You usually need a special buffer for DMA transfers instead of
-transferring directly to and from your normal data structures.)
-
-The DMA-able address space is the lowest 16 MB of _physical_ memory.
-Also the transfer block may not cross page boundaries (which are 64
-or 128 KiB depending on which channel you use).
-
-In order to allocate a piece of memory that satisfies all these
-requirements you pass the flag GFP_DMA to kmalloc.
-
-Unfortunately the memory available for ISA DMA is scarce so unless you
-allocate the memory during boot-up it's a good idea to also pass
-__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
-
-(This scarcity also means that you should allocate the buffer as
-early as possible and not release it until the driver is unloaded.)
-
-Address translation
--------------------
-
-To translate the virtual address to a bus address, use the normal DMA
-API. Do _not_ use isa_virt_to_phys() even though it does the same
-thing. The reason for this is that the function isa_virt_to_phys()
-will require a Kconfig dependency to ISA, not just ISA_DMA_API which
-is really all you need. Remember that even though the DMA controller
-has its origins in ISA it is used elsewhere.
-
-Note: x86_64 had a broken DMA API when it came to ISA but has since
-been fixed. If your arch has problems then fix the DMA API instead of
-reverting to the ISA functions.
-
-Channels
---------
-
-A normal ISA DMA controller has 8 channels. The lower four are for
-8-bit transfers and the upper four are for 16-bit transfers.
-
-(Actually the DMA controller is really two separate controllers where
-channel 4 is used to give DMA access for the second controller (0-3).
-This means that of the four 16-bits channels only three are usable.)
-
-You allocate these in a similar fashion as all basic resources:
-
-extern int request_dma(unsigned int dmanr, const char * device_id);
-extern void free_dma(unsigned int dmanr);
-
-The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
-driver author but depends on what the hardware supports. Check your
-specs or test different channels.
-
-Transfer data
--------------
-
-Now for the good stuff, the actual DMA transfer. :)
-
-Before you use any ISA DMA routines you need to claim the DMA lock
-using claim_dma_lock(). The reason is that some DMA operations are
-not atomic so only one driver may fiddle with the registers at a
-time.
-
-The first time you use the DMA controller you should call
-clear_dma_ff(). This clears an internal register in the DMA
-controller that is used for the non-atomic operations. As long as you
-(and everyone else) uses the locking functions then you only need to
-reset this once.
-
-Next, you tell the controller in which direction you intend to do the
-transfer using set_dma_mode(). Currently you have the options
-DMA_MODE_READ and DMA_MODE_WRITE.
-
-Set the address from where the transfer should start (this needs to
-be 16-bit aligned for 16-bit transfers) and how many bytes to
-transfer. Note that it's _bytes_. The DMA routines will do all the
-required translation to values that the DMA controller understands.
-
-The final step is enabling the DMA channel and releasing the DMA
-lock.
-
-Once the DMA transfer is finished (or timed out) you should disable
-the channel again. You should also check get_dma_residue() to make
-sure that all data has been transferred.
-
-Example::
-
- int flags, residue;
-
- flags = claim_dma_lock();
-
- clear_dma_ff();
-
- set_dma_mode(channel, DMA_MODE_WRITE);
- set_dma_addr(channel, phys_addr);
- set_dma_count(channel, num_bytes);
-
- dma_enable(channel);
-
- release_dma_lock(flags);
-
- while (!device_done());
-
- flags = claim_dma_lock();
-
- dma_disable(channel);
-
- residue = dma_get_residue(channel);
- if (residue != 0)
- printk(KERN_ERR "driver: Incomplete DMA transfer!"
- " %d bytes left!\n", residue);
-
- release_dma_lock(flags);
-
-Suspend/resume
---------------
-
-It is the driver's responsibility to make sure that the machine isn't
-suspended while a DMA transfer is in progress. Also, all DMA settings
-are lost when the system suspends so if your driver relies on the DMA
-controller being in a certain state then you have to restore these
-registers upon resume.
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
deleted file mode 100644
index 8f8d97f65d73..000000000000
--- a/Documentation/DMA-attributes.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-==============
-DMA attributes
-==============
-
-This document describes the semantics of the DMA attributes that are
-defined in linux/dma-mapping.h.
-
-DMA_ATTR_WRITE_BARRIER
-----------------------
-
-DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA
-to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces
-all pending DMA writes to complete, and thus provides a mechanism to
-strictly order DMA from a device across all intervening busses and
-bridges. This barrier is not specific to a particular type of
-interconnect, it applies to the system as a whole, and so its
-implementation must account for the idiosyncrasies of the system all
-the way from the DMA device to memory.
-
-As an example of a situation where DMA_ATTR_WRITE_BARRIER would be
-useful, suppose that a device does a DMA write to indicate that data is
-ready and available in memory. The DMA of the "completion indication"
-could race with data DMA. Mapping the memory used for completion
-indications with DMA_ATTR_WRITE_BARRIER would prevent the race.
-
-DMA_ATTR_WEAK_ORDERING
-----------------------
-
-DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping
-may be weakly ordered, that is that reads and writes may pass each other.
-
-Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
-those that do not will simply ignore the attribute and exhibit default
-behavior.
-
-DMA_ATTR_WRITE_COMBINE
-----------------------
-
-DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
-buffered to improve performance.
-
-Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
-those that do not will simply ignore the attribute and exhibit default
-behavior.
-
-DMA_ATTR_NON_CONSISTENT
------------------------
-
-DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
-consistent or non-consistent memory as it sees fit. By using this API,
-you are guaranteeing to the platform that you have all the correct and
-necessary sync points for this memory in the driver.
-
-DMA_ATTR_NO_KERNEL_MAPPING
---------------------------
-
-DMA_ATTR_NO_KERNEL_MAPPING lets the platform to avoid creating a kernel
-virtual mapping for the allocated buffer. On some architectures creating
-such mapping is non-trivial task and consumes very limited resources
-(like kernel virtual address space or dma consistent address space).
-Buffers allocated with this attribute can be only passed to user space
-by calling dma_mmap_attrs(). By using this API, you are guaranteeing
-that you won't dereference the pointer returned by dma_alloc_attr(). You
-can treat it as a cookie that must be passed to dma_mmap_attrs() and
-dma_free_attrs(). Make sure that both of these also get this attribute
-set on each call.
-
-Since it is optional for platforms to implement
-DMA_ATTR_NO_KERNEL_MAPPING, those that do not will simply ignore the
-attribute and exhibit default behavior.
-
-DMA_ATTR_SKIP_CPU_SYNC
-----------------------
-
-By default dma_map_{single,page,sg} functions family transfer a given
-buffer from CPU domain to device domain. Some advanced use cases might
-require sharing a buffer between more than one device. This requires
-having a mapping created separately for each device and is usually
-performed by calling dma_map_{single,page,sg} function more than once
-for the given buffer with device pointer to each device taking part in
-the buffer sharing. The first call transfers a buffer from 'CPU' domain
-to 'device' domain, what synchronizes CPU caches for the given region
-(usually it means that the cache has been flushed or invalidated
-depending on the dma direction). However, next calls to
-dma_map_{single,page,sg}() for other devices will perform exactly the
-same synchronization operation on the CPU cache. CPU cache synchronization
-might be a time consuming operation, especially if the buffers are
-large, so it is highly recommended to avoid it if possible.
-DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of
-the CPU cache for the given buffer assuming that it has been already
-transferred to 'device' domain. This attribute can be also used for
-dma_unmap_{single,page,sg} functions family to force buffer to stay in
-device domain after releasing a mapping for it. Use this attribute with
-care!
-
-DMA_ATTR_FORCE_CONTIGUOUS
--------------------------
-
-By default DMA-mapping subsystem is allowed to assemble the buffer
-allocated by dma_alloc_attrs() function from individual pages if it can
-be mapped as contiguous chunk into device dma address space. By
-specifying this attribute the allocated buffer is forced to be contiguous
-also in physical memory.
-
-DMA_ATTR_ALLOC_SINGLE_PAGES
----------------------------
-
-This is a hint to the DMA-mapping subsystem that it's probably not worth
-the time to try to allocate memory to in a way that gives better TLB
-efficiency (AKA it's not worth trying to build the mapping out of larger
-pages). You might want to specify this if:
-
-- You know that the accesses to this memory won't thrash the TLB.
- You might know that the accesses are likely to be sequential or
- that they aren't sequential but it's unlikely you'll ping-pong
- between many addresses that are likely to be in different physical
- pages.
-- You know that the penalty of TLB misses while accessing the
- memory will be small enough to be inconsequential. If you are
- doing a heavy operation like decryption or decompression this
- might be the case.
-- You know that the DMA mapping is fairly transitory. If you expect
- the mapping to have a short lifetime then it may be worth it to
- optimize allocation (avoid coming up with large pages) instead of
- getting the slight performance win of larger pages.
-
-Setting this hint doesn't guarantee that you won't get huge pages, but it
-means that we won't try quite as hard to get them.
-
-.. note:: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
- though ARM64 patches will likely be posted soon.
-
-DMA_ATTR_NO_WARN
-----------------
-
-This tells the DMA-mapping subsystem to suppress allocation failure reports
-(similarly to __GFP_NOWARN).
-
-On some architectures allocation failures are reported with error messages
-to the system logs. Although this can help to identify and debug problems,
-drivers which handle failures (eg, retry later) have no problems with them,
-and can actually flood the system logs with error messages that aren't any
-problem at all, depending on the implementation of the retry mechanism.
-
-So, this provides a way for drivers to avoid those error messages on calls
-where allocation failures are not a problem, and shouldn't bother the logs.
-
-.. note:: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
-
-DMA_ATTR_PRIVILEGED
--------------------
-
-Some advanced peripherals such as remote processors and GPUs perform
-accesses to DMA buffers in both privileged "supervisor" and unprivileged
-"user" modes. This attribute is used to indicate to the DMA-mapping
-subsystem that the buffer is fully accessible at the elevated privilege
-level (and ideally inaccessible or at least read-only at the
-lesser-privileged levels).
diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S
deleted file mode 100644
index 6f3e4b75e49e..000000000000
--- a/Documentation/EDID/1024x768.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor
-
- Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 65000 /* kHz */
-#define XPIX 1024
-#define YPIX 768
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 320
-#define YBLANK 38
-#define XOFFSET 8
-#define XPULSE 144
-#define YOFFSET (63+3)
-#define YPULSE (63+6)
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux XGA"
-#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
-#define HSYNC_POL 0
-#define VSYNC_POL 0
-#define CRC 0x55
-
-#include "edid.S"
diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S
deleted file mode 100644
index bd9bef2a65af..000000000000
--- a/Documentation/EDID/1280x1024.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor
-
- Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 108000 /* kHz */
-#define XPIX 1280
-#define YPIX 1024
-#define XY_RATIO XY_RATIO_5_4
-#define XBLANK 408
-#define YBLANK 42
-#define XOFFSET 48
-#define XPULSE 112
-#define YOFFSET (63+1)
-#define YPULSE (63+3)
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux SXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-#define CRC 0xa0
-
-#include "edid.S"
diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S
deleted file mode 100644
index a45101c6160c..000000000000
--- a/Documentation/EDID/1600x1200.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor
-
- Copyright (C) 2013 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 162000 /* kHz */
-#define XPIX 1600
-#define YPIX 1200
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 560
-#define YBLANK 50
-#define XOFFSET 64
-#define XPULSE 192
-#define YOFFSET (63+1)
-#define YPULSE (63+3)
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux UXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-#define CRC 0x9d
-
-#include "edid.S"
diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S
deleted file mode 100644
index b0d7c69282b4..000000000000
--- a/Documentation/EDID/1680x1050.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor
-
- Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 146250 /* kHz */
-#define XPIX 1680
-#define YPIX 1050
-#define XY_RATIO XY_RATIO_16_10
-#define XBLANK 560
-#define YBLANK 39
-#define XOFFSET 104
-#define XPULSE 176
-#define YOFFSET (63+3)
-#define YPULSE (63+6)
-#define DPI 96
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux WSXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-#define CRC 0x26
-
-#include "edid.S"
diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S
deleted file mode 100644
index 3084355e81e7..000000000000
--- a/Documentation/EDID/1920x1080.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor
-
- Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 148500 /* kHz */
-#define XPIX 1920
-#define YPIX 1080
-#define XY_RATIO XY_RATIO_16_9
-#define XBLANK 280
-#define YBLANK 45
-#define XOFFSET 88
-#define XPULSE 44
-#define YOFFSET (63+4)
-#define YPULSE (63+5)
-#define DPI 96
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux FHD"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-#define CRC 0x05
-
-#include "edid.S"
diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S
deleted file mode 100644
index 6644e26d5801..000000000000
--- a/Documentation/EDID/800x600.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- 800x600.S: EDID data set for standard 800x600 60 Hz monitor
-
- Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
- Copyright (C) 2014 Linaro Limited
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 40000 /* kHz */
-#define XPIX 800
-#define YPIX 600
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 256
-#define YBLANK 28
-#define XOFFSET 40
-#define XPULSE 128
-#define YOFFSET (63+1)
-#define YPULSE (63+4)
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux SVGA"
-#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-#define CRC 0xc2
-
-#include "edid.S"
diff --git a/Documentation/EDID/HOWTO.txt b/Documentation/EDID/HOWTO.txt
deleted file mode 100644
index 835db332289b..000000000000
--- a/Documentation/EDID/HOWTO.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-In the good old days when graphics parameters were configured explicitly
-in a file called xorg.conf, even broken hardware could be managed.
-
-Today, with the advent of Kernel Mode Setting, a graphics board is
-either correctly working because all components follow the standards -
-or the computer is unusable, because the screen remains dark after
-booting or it displays the wrong area. Cases when this happens are:
-- The graphics board does not recognize the monitor.
-- The graphics board is unable to detect any EDID data.
-- The graphics board incorrectly forwards EDID data to the driver.
-- The monitor sends no or bogus EDID data.
-- A KVM sends its own EDID data instead of querying the connected monitor.
-Adding the kernel parameter "nomodeset" helps in most cases, but causes
-restrictions later on.
-
-As a remedy for such situations, the kernel configuration item
-CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
-individually prepared or corrected EDID data set in the /lib/firmware
-directory from where it is loaded via the firmware interface. The code
-(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
-commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
-1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
-not contain code to create these data. In order to elucidate the origin
-of the built-in binary EDID blobs and to facilitate the creation of
-individual data for a specific misbehaving monitor, commented sources
-and a Makefile environment are given here.
-
-To create binary EDID and C source code files from the existing data
-material, simply type "make".
-
-If you want to create your own EDID file, copy the file 1024x768.S,
-replace the settings with your own data and add a new target to the
-Makefile. Please note that the EDID data structure expects the timing
-values in a different way as compared to the standard X11 format.
-
-X11:
-HTimings: hdisp hsyncstart hsyncend htotal
-VTimings: vdisp vsyncstart vsyncend vtotal
-
-EDID:
-#define XPIX hdisp
-#define XBLANK htotal-hdisp
-#define XOFFSET hsyncstart-hdisp
-#define XPULSE hsyncend-hsyncstart
-
-#define YPIX vdisp
-#define YBLANK vtotal-vdisp
-#define YOFFSET (63+(vsyncstart-vdisp))
-#define YPULSE (63+(vsyncend-vsyncstart))
-
-The CRC value in the last line
- #define CRC 0x55
-also is a bit tricky. After a first version of the binary data set is
-created, it must be checked with the "edid-decode" utility which will
-most probably complain about a wrong CRC. Fortunately, the utility also
-displays the correct CRC which must then be inserted into the source
-file. After the make procedure is repeated, the EDID data set is ready
-to be used.
diff --git a/Documentation/EDID/Makefile b/Documentation/EDID/Makefile
deleted file mode 100644
index 17763ca3f12b..000000000000
--- a/Documentation/EDID/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-
-SOURCES := $(wildcard [0-9]*x[0-9]*.S)
-
-BIN := $(patsubst %.S, %.bin, $(SOURCES))
-
-IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES))
-
-CODE := $(patsubst %.S, %.c, $(SOURCES))
-
-all: $(BIN) $(IHEX) $(CODE)
-
-clean:
- @rm -f *.o *.bin.ihex *.bin *.c
-
-%.o: %.S
- @cc -c $^
-
-%.bin: %.o
- @objcopy -Obinary $^ $@
-
-%.bin.ihex: %.o
- @objcopy -Oihex $^ $@
- @dos2unix $@ 2>/dev/null
-
-%.c: %.bin
- @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@
diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S
deleted file mode 100644
index ef082dcc6084..000000000000
--- a/Documentation/EDID/edid.S
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- edid.S: EDID data template
-
- Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-
-/* Manufacturer */
-#define MFG_LNX1 'L'
-#define MFG_LNX2 'N'
-#define MFG_LNX3 'X'
-#define SERIAL 0
-#define YEAR 2012
-#define WEEK 5
-
-/* EDID 1.3 standard definitions */
-#define XY_RATIO_16_10 0b00
-#define XY_RATIO_4_3 0b01
-#define XY_RATIO_5_4 0b10
-#define XY_RATIO_16_9 0b11
-
-/* Provide defaults for the timing bits */
-#ifndef ESTABLISHED_TIMING1_BITS
-#define ESTABLISHED_TIMING1_BITS 0x00
-#endif
-#ifndef ESTABLISHED_TIMING2_BITS
-#define ESTABLISHED_TIMING2_BITS 0x00
-#endif
-#ifndef ESTABLISHED_TIMING3_BITS
-#define ESTABLISHED_TIMING3_BITS 0x00
-#endif
-
-#define mfgname2id(v1,v2,v3) \
- ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
-#define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
-#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f))
-#define msbs4(v1,v2,v3,v4) \
- (((v1&0x03)>>2)+((v2&0x03)>>4)+((v3&0x03)>>6)+((v4&0x03)>>8))
-#define pixdpi2mm(pix,dpi) ((pix*25)/dpi)
-#define xsize pixdpi2mm(XPIX,DPI)
-#define ysize pixdpi2mm(YPIX,DPI)
-
- .data
-
-/* Fixed header pattern */
-header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00
-
-mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3))
-
-prod_code: .hword 0
-
-/* Serial number. 32 bits, little endian. */
-serial_number: .long SERIAL
-
-/* Week of manufacture */
-week: .byte WEEK
-
-/* Year of manufacture, less 1990. (1990-2245)
- If week=255, it is the model year instead */
-year: .byte YEAR-1990
-
-version: .byte VERSION /* EDID version, usually 1 (for 1.3) */
-revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */
-
-/* If Bit 7=1 Digital input. If set, the following bit definitions apply:
- Bits 6-1 Reserved, must be 0
- Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB,
- 1 pixel per clock, up to 8 bits per color, MSB aligned,
- If Bit 7=0 Analog input. If clear, the following bit definitions apply:
- Bits 6-5 Video white and sync levels, relative to blank
- 00=+0.7/-0.3 V; 01=+0.714/-0.286 V;
- 10=+1.0/-0.4 V; 11=+0.7/0 V
- Bit 4 Blank-to-black setup (pedestal) expected
- Bit 3 Separate sync supported
- Bit 2 Composite sync (on HSync) supported
- Bit 1 Sync on green supported
- Bit 0 VSync pulse must be serrated when somposite or
- sync-on-green is used. */
-video_parms: .byte 0x6d
-
-/* Maximum horizontal image size, in centimetres
- (max 292 cm/115 in at 16:9 aspect ratio) */
-max_hor_size: .byte xsize/10
-
-/* Maximum vertical image size, in centimetres.
- If either byte is 0, undefined (e.g. projector) */
-max_vert_size: .byte ysize/10
-
-/* Display gamma, minus 1, times 100 (range 1.00-3.5 */
-gamma: .byte 120
-
-/* Bit 7 DPMS standby supported
- Bit 6 DPMS suspend supported
- Bit 5 DPMS active-off supported
- Bits 4-3 Display type: 00=monochrome; 01=RGB colour;
- 10=non-RGB multicolour; 11=undefined
- Bit 2 Standard sRGB colour space. Bytes 25-34 must contain
- sRGB standard values.
- Bit 1 Preferred timing mode specified in descriptor block 1.
- Bit 0 GTF supported with default parameter values. */
-dsp_features: .byte 0xea
-
-/* Chromaticity coordinates. */
-/* Red and green least-significant bits
- Bits 7-6 Red x value least-significant 2 bits
- Bits 5-4 Red y value least-significant 2 bits
- Bits 3-2 Green x value lst-significant 2 bits
- Bits 1-0 Green y value least-significant 2 bits */
-red_green_lsb: .byte 0x5e
-
-/* Blue and white least-significant 2 bits */
-blue_white_lsb: .byte 0xc0
-
-/* Red x value most significant 8 bits.
- 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */
-red_x_msb: .byte 0xa4
-
-/* Red y value most significant 8 bits */
-red_y_msb: .byte 0x59
-
-/* Green x and y value most significant 8 bits */
-green_x_y_msb: .byte 0x4a,0x98
-
-/* Blue x and y value most significant 8 bits */
-blue_x_y_msb: .byte 0x25,0x20
-
-/* Default white point x and y value most significant 8 bits */
-white_x_y_msb: .byte 0x50,0x54
-
-/* Established timings */
-/* Bit 7 720x400 @ 70 Hz
- Bit 6 720x400 @ 88 Hz
- Bit 5 640x480 @ 60 Hz
- Bit 4 640x480 @ 67 Hz
- Bit 3 640x480 @ 72 Hz
- Bit 2 640x480 @ 75 Hz
- Bit 1 800x600 @ 56 Hz
- Bit 0 800x600 @ 60 Hz */
-estbl_timing1: .byte ESTABLISHED_TIMING1_BITS
-
-/* Bit 7 800x600 @ 72 Hz
- Bit 6 800x600 @ 75 Hz
- Bit 5 832x624 @ 75 Hz
- Bit 4 1024x768 @ 87 Hz, interlaced (1024x768)
- Bit 3 1024x768 @ 60 Hz
- Bit 2 1024x768 @ 72 Hz
- Bit 1 1024x768 @ 75 Hz
- Bit 0 1280x1024 @ 75 Hz */
-estbl_timing2: .byte ESTABLISHED_TIMING2_BITS
-
-/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II)
- Bits 6-0 Other manufacturer-specific display mod */
-estbl_timing3: .byte ESTABLISHED_TIMING3_BITS
-
-/* Standard timing */
-/* X resolution, less 31, divided by 8 (256-2288 pixels) */
-std_xres: .byte (XPIX/8)-31
-/* Y resolution, X:Y pixel ratio
- Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9.
- Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */
-std_vres: .byte (XY_RATIO<<6)+VFREQ-60
- .fill 7,2,0x0101 /* Unused */
-
-descriptor1:
-/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */
-clock: .hword CLOCK/10
-
-/* Horizontal active pixels 8 lsbits (0-4095) */
-x_act_lsb: .byte XPIX&0xff
-/* Horizontal blanking pixels 8 lsbits (0-4095)
- End of active to start of next active. */
-x_blk_lsb: .byte XBLANK&0xff
-/* Bits 7-4 Horizontal active pixels 4 msbits
- Bits 3-0 Horizontal blanking pixels 4 msbits */
-x_msbs: .byte msbs2(XPIX,XBLANK)
-
-/* Vertical active lines 8 lsbits (0-4095) */
-y_act_lsb: .byte YPIX&0xff
-/* Vertical blanking lines 8 lsbits (0-4095) */
-y_blk_lsb: .byte YBLANK&0xff
-/* Bits 7-4 Vertical active lines 4 msbits
- Bits 3-0 Vertical blanking lines 4 msbits */
-y_msbs: .byte msbs2(YPIX,YBLANK)
-
-/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */
-x_snc_off_lsb: .byte XOFFSET&0xff
-/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */
-x_snc_pls_lsb: .byte XPULSE&0xff
-/* Bits 7-4 Vertical sync offset lines 4 lsbits -63)
- Bits 3-0 Vertical sync pulse width lines 4 lsbits -63) */
-y_snc_lsb: .byte ((YOFFSET-63)<<4)+(YPULSE-63)
-/* Bits 7-6 Horizontal sync offset pixels 2 msbits
- Bits 5-4 Horizontal sync pulse width pixels 2 msbits
- Bits 3-2 Vertical sync offset lines 2 msbits
- Bits 1-0 Vertical sync pulse width lines 2 msbits */
-xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE)
-
-/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */
-x_dsp_size: .byte xsize&0xff
-
-/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */
-y_dsp_size: .byte ysize&0xff
-
-/* Bits 7-4 Horizontal display size, mm, 4 msbits
- Bits 3-0 Vertical display size, mm, 4 msbits */
-dsp_size_mbsb: .byte msbs2(xsize,ysize)
-
-/* Horizontal border pixels (each side; total is twice this) */
-x_border: .byte 0
-/* Vertical border lines (each side; total is twice this) */
-y_border: .byte 0
-
-/* Bit 7 Interlaced
- Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0:
- Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar,
- sync=1 during left; 11=4-way interleaved stereo
- Bit 0=1 2-way interleaved stereo: 01=Right image on even lines;
- 10=Left image on even lines; 11=side-by-side
- Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite;
- 10=Digital composite (on HSync); 11=Digital separate
- Bit 2 If digital separate: Vertical sync polarity (1=positive)
- Other types: VSync serrated (HSync during VSync)
- Bit 1 If analog sync: Sync on all 3 RGB lines (else green only)
- Digital: HSync polarity (1=positive)
- Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */
-features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1)
-
-descriptor2: .byte 0,0 /* Not a detailed timing descriptor */
- .byte 0 /* Must be zero */
- .byte 0xff /* Descriptor is monitor serial number (text) */
- .byte 0 /* Must be zero */
-start1: .ascii "Linux #0"
-end1: .byte 0x0a /* End marker */
- .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */
-descriptor3: .byte 0,0 /* Not a detailed timing descriptor */
- .byte 0 /* Must be zero */
- .byte 0xfd /* Descriptor is monitor range limits */
- .byte 0 /* Must be zero */
-start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */
- .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */
- .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate
- (1-255 kHz) */
- .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate
- (1-255 kHz) */
- .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up
- to 10 MHz multiple (10-2550 MHz) */
- .byte 0 /* No extended timing information type */
-end2: .byte 0x0a /* End marker */
- .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */
-descriptor4: .byte 0,0 /* Not a detailed timing descriptor */
- .byte 0 /* Must be zero */
- .byte 0xfc /* Descriptor is text */
- .byte 0 /* Must be zero */
-start3: .ascii TIMING_NAME
-end3: .byte 0x0a /* End marker */
- .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */
-extensions: .byte 0 /* Number of extensions to follow */
-checksum: .byte CRC /* Sum of all bytes must be 0 */
diff --git a/Documentation/EDID/hex b/Documentation/EDID/hex
deleted file mode 100644
index 8873ebb618af..000000000000
--- a/Documentation/EDID/hex
+++ /dev/null
@@ -1 +0,0 @@
-"\t" 8/1 "0x%02x, " "\n"
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
deleted file mode 100644
index 5ef1047e2e66..000000000000
--- a/Documentation/IPMI.txt
+++ /dev/null
@@ -1,746 +0,0 @@
-=====================
-The Linux IPMI Driver
-=====================
-
-:Author: Corey Minyard <minyard@mvista.com> / <minyard@acm.org>
-
-The Intelligent Platform Management Interface, or IPMI, is a
-standard for controlling intelligent devices that monitor a system.
-It provides for dynamic discovery of sensors in the system and the
-ability to monitor the sensors and be informed when the sensor's
-values change or go outside certain boundaries. It also has a
-standardized database for field-replaceable units (FRUs) and a watchdog
-timer.
-
-To use this, you need an interface to an IPMI controller in your
-system (called a Baseboard Management Controller, or BMC) and
-management software that can use the IPMI system.
-
-This document describes how to use the IPMI driver for Linux. If you
-are not familiar with IPMI itself, see the web site at
-http://www.intel.com/design/servers/ipmi/index.htm. IPMI is a big
-subject and I can't cover it all here!
-
-Configuration
--------------
-
-The Linux IPMI driver is modular, which means you have to pick several
-things to have it work right depending on your hardware. Most of
-these are available in the 'Character Devices' menu then the IPMI
-menu.
-
-No matter what, you must pick 'IPMI top-level message handler' to use
-IPMI. What you do beyond that depends on your needs and hardware.
-
-The message handler does not provide any user-level interfaces.
-Kernel code (like the watchdog) can still use it. If you need access
-from userland, you need to select 'Device interface for IPMI' if you
-want access through a device driver.
-
-The driver interface depends on your hardware. If your system
-properly provides the SMBIOS info for IPMI, the driver will detect it
-and just work. If you have a board with a standard interface (These
-will generally be either "KCS", "SMIC", or "BT", consult your hardware
-manual), choose the 'IPMI SI handler' option. A driver also exists
-for direct I2C access to the IPMI management controller. Some boards
-support this, but it is unknown if it will work on every board. For
-this, choose 'IPMI SMBus handler', but be ready to try to do some
-figuring to see if it will work on your system if the SMBIOS/APCI
-information is wrong or not present. It is fairly safe to have both
-these enabled and let the drivers auto-detect what is present.
-
-You should generally enable ACPI on your system, as systems with IPMI
-can have ACPI tables describing them.
-
-If you have a standard interface and the board manufacturer has done
-their job correctly, the IPMI controller should be automatically
-detected (via ACPI or SMBIOS tables) and should just work. Sadly,
-many boards do not have this information. The driver attempts
-standard defaults, but they may not work. If you fall into this
-situation, you need to read the section below named 'The SI Driver' or
-"The SMBus Driver" on how to hand-configure your system.
-
-IPMI defines a standard watchdog timer. You can enable this with the
-'IPMI Watchdog Timer' config option. If you compile the driver into
-the kernel, then via a kernel command-line option you can have the
-watchdog timer start as soon as it initializes. It also have a lot
-of other options, see the 'Watchdog' section below for more details.
-Note that you can also have the watchdog continue to run if it is
-closed (by default it is disabled on close). Go into the 'Watchdog
-Cards' menu, enable 'Watchdog Timer Support', and enable the option
-'Disable watchdog shutdown on close'.
-
-IPMI systems can often be powered off using IPMI commands. Select
-'IPMI Poweroff' to do this. The driver will auto-detect if the system
-can be powered off by IPMI. It is safe to enable this even if your
-system doesn't support this option. This works on ATCA systems, the
-Radisys CPI1 card, and any IPMI system that supports standard chassis
-management commands.
-
-If you want the driver to put an event into the event log on a panic,
-enable the 'Generate a panic event to all BMCs on a panic' option. If
-you want the whole panic string put into the event log using OEM
-events, enable the 'Generate OEM events containing the panic string'
-option. You can also enable these dynamically by setting the module
-parameter named "panic_op" in the ipmi_msghandler module to "event"
-or "string". Setting that parameter to "none" disables this function.
-
-Basic Design
-------------
-
-The Linux IPMI driver is designed to be very modular and flexible, you
-only need to take the pieces you need and you can use it in many
-different ways. Because of that, it's broken into many chunks of
-code. These chunks (by module name) are:
-
-ipmi_msghandler - This is the central piece of software for the IPMI
-system. It handles all messages, message timing, and responses. The
-IPMI users tie into this, and the IPMI physical interfaces (called
-System Management Interfaces, or SMIs) also tie in here. This
-provides the kernelland interface for IPMI, but does not provide an
-interface for use by application processes.
-
-ipmi_devintf - This provides a userland IOCTL interface for the IPMI
-driver, each open file for this device ties in to the message handler
-as an IPMI user.
-
-ipmi_si - A driver for various system interfaces. This supports KCS,
-SMIC, and BT interfaces. Unless you have an SMBus interface or your
-own custom interface, you probably need to use this.
-
-ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the
-I2C kernel driver's SMBus interfaces to send and receive IPMI messages
-over the SMBus.
-
-ipmi_powernv - A driver for access BMCs on POWERNV systems.
-
-ipmi_watchdog - IPMI requires systems to have a very capable watchdog
-timer. This driver implements the standard Linux watchdog timer
-interface on top of the IPMI message handler.
-
-ipmi_poweroff - Some systems support the ability to be turned off via
-IPMI commands.
-
-bt-bmc - This is not part of the main driver, but instead a driver for
-accessing a BMC-side interface of a BT interface. It is used on BMCs
-running Linux to provide an interface to the host.
-
-These are all individually selectable via configuration options.
-
-Much documentation for the interface is in the include files. The
-IPMI include files are:
-
-linux/ipmi.h - Contains the user interface and IOCTL interface for IPMI.
-
-linux/ipmi_smi.h - Contains the interface for system management interfaces
-(things that interface to IPMI controllers) to use.
-
-linux/ipmi_msgdefs.h - General definitions for base IPMI messaging.
-
-
-Addressing
-----------
-
-The IPMI addressing works much like IP addresses, you have an overlay
-to handle the different address types. The overlay is::
-
- struct ipmi_addr
- {
- int addr_type;
- short channel;
- char data[IPMI_MAX_ADDR_SIZE];
- };
-
-The addr_type determines what the address really is. The driver
-currently understands two different types of addresses.
-
-"System Interface" addresses are defined as::
-
- struct ipmi_system_interface_addr
- {
- int addr_type;
- short channel;
- };
-
-and the type is IPMI_SYSTEM_INTERFACE_ADDR_TYPE. This is used for talking
-straight to the BMC on the current card. The channel must be
-IPMI_BMC_CHANNEL.
-
-Messages that are destined to go out on the IPMB bus use the
-IPMI_IPMB_ADDR_TYPE address type. The format is::
-
- struct ipmi_ipmb_addr
- {
- int addr_type;
- short channel;
- unsigned char slave_addr;
- unsigned char lun;
- };
-
-The "channel" here is generally zero, but some devices support more
-than one channel, it corresponds to the channel as defined in the IPMI
-spec.
-
-
-Messages
---------
-
-Messages are defined as::
-
- struct ipmi_msg
- {
- unsigned char netfn;
- unsigned char lun;
- unsigned char cmd;
- unsigned char *data;
- int data_len;
- };
-
-The driver takes care of adding/stripping the header information. The
-data portion is just the data to be send (do NOT put addressing info
-here) or the response. Note that the completion code of a response is
-the first item in "data", it is not stripped out because that is how
-all the messages are defined in the spec (and thus makes counting the
-offsets a little easier :-).
-
-When using the IOCTL interface from userland, you must provide a block
-of data for "data", fill it, and set data_len to the length of the
-block of data, even when receiving messages. Otherwise the driver
-will have no place to put the message.
-
-Messages coming up from the message handler in kernelland will come in
-as::
-
- struct ipmi_recv_msg
- {
- struct list_head link;
-
- /* The type of message as defined in the "Receive Types"
- defines above. */
- int recv_type;
-
- ipmi_user_t *user;
- struct ipmi_addr addr;
- long msgid;
- struct ipmi_msg msg;
-
- /* Call this when done with the message. It will presumably free
- the message and do any other necessary cleanup. */
- void (*done)(struct ipmi_recv_msg *msg);
-
- /* Place-holder for the data, don't make any assumptions about
- the size or existence of this, since it may change. */
- unsigned char msg_data[IPMI_MAX_MSG_LENGTH];
- };
-
-You should look at the receive type and handle the message
-appropriately.
-
-
-The Upper Layer Interface (Message Handler)
--------------------------------------------
-
-The upper layer of the interface provides the users with a consistent
-view of the IPMI interfaces. It allows multiple SMI interfaces to be
-addressed (because some boards actually have multiple BMCs on them)
-and the user should not have to care what type of SMI is below them.
-
-
-Watching For Interfaces
-^^^^^^^^^^^^^^^^^^^^^^^
-
-When your code comes up, the IPMI driver may or may not have detected
-if IPMI devices exist. So you might have to defer your setup until
-the device is detected, or you might be able to do it immediately.
-To handle this, and to allow for discovery, you register an SMI
-watcher with ipmi_smi_watcher_register() to iterate over interfaces
-and tell you when they come and go.
-
-
-Creating the User
-^^^^^^^^^^^^^^^^^
-
-To use the message handler, you must first create a user using
-ipmi_create_user. The interface number specifies which SMI you want
-to connect to, and you must supply callback functions to be called
-when data comes in. The callback function can run at interrupt level,
-so be careful using the callbacks. This also allows to you pass in a
-piece of data, the handler_data, that will be passed back to you on
-all calls.
-
-Once you are done, call ipmi_destroy_user() to get rid of the user.
-
-From userland, opening the device automatically creates a user, and
-closing the device automatically destroys the user.
-
-
-Messaging
-^^^^^^^^^
-
-To send a message from kernel-land, the ipmi_request_settime() call does
-pretty much all message handling. Most of the parameter are
-self-explanatory. However, it takes a "msgid" parameter. This is NOT
-the sequence number of messages. It is simply a long value that is
-passed back when the response for the message is returned. You may
-use it for anything you like.
-
-Responses come back in the function pointed to by the ipmi_recv_hndl
-field of the "handler" that you passed in to ipmi_create_user().
-Remember again, these may be running at interrupt level. Remember to
-look at the receive type, too.
-
-From userland, you fill out an ipmi_req_t structure and use the
-IPMICTL_SEND_COMMAND ioctl. For incoming stuff, you can use select()
-or poll() to wait for messages to come in. However, you cannot use
-read() to get them, you must call the IPMICTL_RECEIVE_MSG with the
-ipmi_recv_t structure to actually get the message. Remember that you
-must supply a pointer to a block of data in the msg.data field, and
-you must fill in the msg.data_len field with the size of the data.
-This gives the receiver a place to actually put the message.
-
-If the message cannot fit into the data you provide, you will get an
-EMSGSIZE error and the driver will leave the data in the receive
-queue. If you want to get it and have it truncate the message, us
-the IPMICTL_RECEIVE_MSG_TRUNC ioctl.
-
-When you send a command (which is defined by the lowest-order bit of
-the netfn per the IPMI spec) on the IPMB bus, the driver will
-automatically assign the sequence number to the command and save the
-command. If the response is not receive in the IPMI-specified 5
-seconds, it will generate a response automatically saying the command
-timed out. If an unsolicited response comes in (if it was after 5
-seconds, for instance), that response will be ignored.
-
-In kernelland, after you receive a message and are done with it, you
-MUST call ipmi_free_recv_msg() on it, or you will leak messages. Note
-that you should NEVER mess with the "done" field of a message, that is
-required to properly clean up the message.
-
-Note that when sending, there is an ipmi_request_supply_msgs() call
-that lets you supply the smi and receive message. This is useful for
-pieces of code that need to work even if the system is out of buffers
-(the watchdog timer uses this, for instance). You supply your own
-buffer and own free routines. This is not recommended for normal use,
-though, since it is tricky to manage your own buffers.
-
-
-Events and Incoming Commands
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The driver takes care of polling for IPMI events and receiving
-commands (commands are messages that are not responses, they are
-commands that other things on the IPMB bus have sent you). To receive
-these, you must register for them, they will not automatically be sent
-to you.
-
-To receive events, you must call ipmi_set_gets_events() and set the
-"val" to non-zero. Any events that have been received by the driver
-since startup will immediately be delivered to the first user that
-registers for events. After that, if multiple users are registered
-for events, they will all receive all events that come in.
-
-For receiving commands, you have to individually register commands you
-want to receive. Call ipmi_register_for_cmd() and supply the netfn
-and command name for each command you want to receive. You also
-specify a bitmask of the channels you want to receive the command from
-(or use IPMI_CHAN_ALL for all channels if you don't care). Only one
-user may be registered for each netfn/cmd/channel, but different users
-may register for different commands, or the same command if the
-channel bitmasks do not overlap.
-
-From userland, equivalent IOCTLs are provided to do these functions.
-
-
-The Lower Layer (SMI) Interface
--------------------------------
-
-As mentioned before, multiple SMI interfaces may be registered to the
-message handler, each of these is assigned an interface number when
-they register with the message handler. They are generally assigned
-in the order they register, although if an SMI unregisters and then
-another one registers, all bets are off.
-
-The ipmi_smi.h defines the interface for management interfaces, see
-that for more details.
-
-
-The SI Driver
--------------
-
-The SI driver allows KCS, BT, and SMIC interfaces to be configured
-in the system. It discovers interfaces through a host of different
-methods, depending on the system.
-
-You can specify up to four interfaces on the module load line and
-control some module parameters::
-
- modprobe ipmi_si.o type=<type1>,<type2>....
- ports=<port1>,<port2>... addrs=<addr1>,<addr2>...
- irqs=<irq1>,<irq2>...
- regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,...
- regshifts=<shift1>,<shift2>,...
- slave_addrs=<addr1>,<addr2>,...
- force_kipmid=<enable1>,<enable2>,...
- kipmid_max_busy_us=<ustime1>,<ustime2>,...
- unload_when_empty=[0|1]
- trydmi=[0|1] tryacpi=[0|1]
- tryplatform=[0|1] trypci=[0|1]
-
-Each of these except try... items is a list, the first item for the
-first interface, second item for the second interface, etc.
-
-The si_type may be either "kcs", "smic", or "bt". If you leave it blank, it
-defaults to "kcs".
-
-If you specify addrs as non-zero for an interface, the driver will
-use the memory address given as the address of the device. This
-overrides si_ports.
-
-If you specify ports as non-zero for an interface, the driver will
-use the I/O port given as the device address.
-
-If you specify irqs as non-zero for an interface, the driver will
-attempt to use the given interrupt for the device.
-
-The other try... items disable discovery by their corresponding
-names. These are all enabled by default, set them to zero to disable
-them. The tryplatform disables openfirmware.
-
-The next three parameters have to do with register layout. The
-registers used by the interfaces may not appear at successive
-locations and they may not be in 8-bit registers. These parameters
-allow the layout of the data in the registers to be more precisely
-specified.
-
-The regspacings parameter give the number of bytes between successive
-register start addresses. For instance, if the regspacing is set to 4
-and the start address is 0xca2, then the address for the second
-register would be 0xca6. This defaults to 1.
-
-The regsizes parameter gives the size of a register, in bytes. The
-data used by IPMI is 8-bits wide, but it may be inside a larger
-register. This parameter allows the read and write type to specified.
-It may be 1, 2, 4, or 8. The default is 1.
-
-Since the register size may be larger than 32 bits, the IPMI data may not
-be in the lower 8 bits. The regshifts parameter give the amount to shift
-the data to get to the actual IPMI data.
-
-The slave_addrs specifies the IPMI address of the local BMC. This is
-usually 0x20 and the driver defaults to that, but in case it's not, it
-can be specified when the driver starts up.
-
-The force_ipmid parameter forcefully enables (if set to 1) or disables
-(if set to 0) the kernel IPMI daemon. Normally this is auto-detected
-by the driver, but systems with broken interrupts might need an enable,
-or users that don't want the daemon (don't need the performance, don't
-want the CPU hit) can disable it.
-
-If unload_when_empty is set to 1, the driver will be unloaded if it
-doesn't find any interfaces or all the interfaces fail to work. The
-default is one. Setting to 0 is useful with the hotmod, but is
-obviously only useful for modules.
-
-When compiled into the kernel, the parameters can be specified on the
-kernel command line as::
-
- ipmi_si.type=<type1>,<type2>...
- ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>...
- ipmi_si.irqs=<irq1>,<irq2>...
- ipmi_si.regspacings=<sp1>,<sp2>,...
- ipmi_si.regsizes=<size1>,<size2>,...
- ipmi_si.regshifts=<shift1>,<shift2>,...
- ipmi_si.slave_addrs=<addr1>,<addr2>,...
- ipmi_si.force_kipmid=<enable1>,<enable2>,...
- ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...
-
-It works the same as the module parameters of the same names.
-
-If your IPMI interface does not support interrupts and is a KCS or
-SMIC interface, the IPMI driver will start a kernel thread for the
-interface to help speed things up. This is a low-priority kernel
-thread that constantly polls the IPMI driver while an IPMI operation
-is in progress. The force_kipmid module parameter will all the user to
-force this thread on or off. If you force it off and don't have
-interrupts, the driver will run VERY slowly. Don't blame me,
-these interfaces suck.
-
-Unfortunately, this thread can use a lot of CPU depending on the
-interface's performance. This can waste a lot of CPU and cause
-various issues with detecting idle CPU and using extra power. To
-avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
-microseconds, that kipmid will spin before sleeping for a tick. This
-value sets a balance between performance and CPU waste and needs to be
-tuned to your needs. Maybe, someday, auto-tuning will be added, but
-that's not a simple thing and even the auto-tuning would need to be
-tuned to the user's desired performance.
-
-The driver supports a hot add and remove of interfaces. This way,
-interfaces can be added or removed after the kernel is up and running.
-This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
-write-only parameter. You write a string to this interface. The string
-has the format::
-
- <op1>[:op2[:op3...]]
-
-The "op"s are::
-
- add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]]
-
-You can specify more than one interface on the line. The "opt"s are::
-
- rsp=<regspacing>
- rsi=<regsize>
- rsh=<regshift>
- irq=<irq>
- ipmb=<ipmb slave addr>
-
-and these have the same meanings as discussed above. Note that you
-can also use this on the kernel command line for a more compact format
-for specifying an interface. Note that when removing an interface,
-only the first three parameters (si type, address type, and address)
-are used for the comparison. Any options are ignored for removing.
-
-The SMBus Driver (SSIF)
------------------------
-
-The SMBus driver allows up to 4 SMBus devices to be configured in the
-system. By default, the driver will only register with something it
-finds in DMI or ACPI tables. You can change this
-at module load time (for a module) with::
-
- modprobe ipmi_ssif.o
- addr=<i2caddr1>[,<i2caddr2>[,...]]
- adapter=<adapter1>[,<adapter2>[...]]
- dbg=<flags1>,<flags2>...
- slave_addrs=<addr1>,<addr2>,...
- tryacpi=[0|1] trydmi=[0|1]
- [dbg_probe=1]
-
-The addresses are normal I2C addresses. The adapter is the string
-name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
-It is *NOT* i2c-<n> itself. Also, the comparison is done ignoring
-spaces, so if the name is "This is an I2C chip" you can say
-adapter_name=ThisisanI2cchip. This is because it's hard to pass in
-spaces in kernel parameters.
-
-The debug flags are bit flags for each BMC found, they are:
-IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
-
-The tryxxx parameters can be used to disable detecting interfaces
-from various sources.
-
-Setting dbg_probe to 1 will enable debugging of the probing and
-detection process for BMCs on the SMBusses.
-
-The slave_addrs specifies the IPMI address of the local BMC. This is
-usually 0x20 and the driver defaults to that, but in case it's not, it
-can be specified when the driver starts up.
-
-Discovering the IPMI compliant BMC on the SMBus can cause devices on
-the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI
-message as a block write to the I2C bus and waits for a response.
-This action can be detrimental to some I2C devices. It is highly
-recommended that the known I2C address be given to the SMBus driver in
-the smb_addr parameter unless you have DMI or ACPI data to tell the
-driver what to use.
-
-When compiled into the kernel, the addresses can be specified on the
-kernel command line as::
-
- ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
- ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
- ipmi_ssif.dbg=<flags1>[,<flags2>[...]]
- ipmi_ssif.dbg_probe=1
- ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]]
- ipmi_ssif.tryacpi=[0|1] ipmi_ssif.trydmi=[0|1]
-
-These are the same options as on the module command line.
-
-The I2C driver does not support non-blocking access or polling, so
-this driver cannod to IPMI panic events, extend the watchdog at panic
-time, or other panic-related IPMI functions without special kernel
-patches and driver modifications. You can get those at the openipmi
-web page.
-
-The driver supports a hot add and remove of interfaces through the I2C
-sysfs interface.
-
-Other Pieces
-------------
-
-Get the detailed info related with the IPMI device
---------------------------------------------------
-
-Some users need more detailed information about a device, like where
-the address came from or the raw base device for the IPMI interface.
-You can use the IPMI smi_watcher to catch the IPMI interfaces as they
-come or go, and to grab the information, you can use the function
-ipmi_get_smi_info(), which returns the following structure::
-
- struct ipmi_smi_info {
- enum ipmi_addr_src addr_src;
- struct device *dev;
- union {
- struct {
- void *acpi_handle;
- } acpi_info;
- } addr_info;
- };
-
-Currently special info for only for SI_ACPI address sources is
-returned. Others may be added as necessary.
-
-Note that the dev pointer is included in the above structure, and
-assuming ipmi_smi_get_info returns success, you must call put_device
-on the dev pointer.
-
-
-Watchdog
---------
-
-A watchdog timer is provided that implements the Linux-standard
-watchdog timer interface. It has three module parameters that can be
-used to control it::
-
- modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
- preaction=<preaction type> preop=<preop type> start_now=x
- nowayout=x ifnum_to_use=n panic_wdt_timeout=<t>
-
-ifnum_to_use specifies which interface the watchdog timer should use.
-The default is -1, which means to pick the first one registered.
-
-The timeout is the number of seconds to the action, and the pretimeout
-is the amount of seconds before the reset that the pre-timeout panic will
-occur (if pretimeout is zero, then pretimeout will not be enabled). Note
-that the pretimeout is the time before the final timeout. So if the
-timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
-will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout
-is the value of timeout which is set on kernel panic, in order to let actions
-such as kdump to occur during panic.
-
-The action may be "reset", "power_cycle", or "power_off", and
-specifies what to do when the timer times out, and defaults to
-"reset".
-
-The preaction may be "pre_smi" for an indication through the SMI
-interface, "pre_int" for an indication through the SMI with an
-interrupts, and "pre_nmi" for a NMI on a preaction. This is how
-the driver is informed of the pretimeout.
-
-The preop may be set to "preop_none" for no operation on a pretimeout,
-"preop_panic" to set the preoperation to panic, or "preop_give_data"
-to provide data to read from the watchdog device when the pretimeout
-occurs. A "pre_nmi" setting CANNOT be used with "preop_give_data"
-because you can't do data operations from an NMI.
-
-When preop is set to "preop_give_data", one byte comes ready to read
-on the device when the pretimeout occurs. Select and fasync work on
-the device, as well.
-
-If start_now is set to 1, the watchdog timer will start running as
-soon as the driver is loaded.
-
-If nowayout is set to 1, the watchdog timer will not stop when the
-watchdog device is closed. The default value of nowayout is true
-if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not.
-
-When compiled into the kernel, the kernel command line is available
-for configuring the watchdog::
-
- ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t>
- ipmi_watchdog.action=<action type>
- ipmi_watchdog.preaction=<preaction type>
- ipmi_watchdog.preop=<preop type>
- ipmi_watchdog.start_now=x
- ipmi_watchdog.nowayout=x
- ipmi_watchdog.panic_wdt_timeout=<t>
-
-The options are the same as the module parameter options.
-
-The watchdog will panic and start a 120 second reset timeout if it
-gets a pre-action. During a panic or a reboot, the watchdog will
-start a 120 timer if it is running to make sure the reboot occurs.
-
-Note that if you use the NMI preaction for the watchdog, you MUST NOT
-use the nmi watchdog. There is no reasonable way to tell if an NMI
-comes from the IPMI controller, so it must assume that if it gets an
-otherwise unhandled NMI, it must be from IPMI and it will panic
-immediately.
-
-Once you open the watchdog timer, you must write a 'V' character to the
-device to close it, or the timer will not stop. This is a new semantic
-for the driver, but makes it consistent with the rest of the watchdog
-drivers in Linux.
-
-
-Panic Timeouts
---------------
-
-The OpenIPMI driver supports the ability to put semi-custom and custom
-events in the system event log if a panic occurs. if you enable the
-'Generate a panic event to all BMCs on a panic' option, you will get
-one event on a panic in a standard IPMI event format. If you enable
-the 'Generate OEM events containing the panic string' option, you will
-also get a bunch of OEM events holding the panic string.
-
-
-The field settings of the events are:
-
-* Generator ID: 0x21 (kernel)
-* EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format)
-* Sensor Type: 0x20 (OS critical stop sensor)
-* Sensor #: The first byte of the panic string (0 if no panic string)
-* Event Dir | Event Type: 0x6f (Assertion, sensor-specific event info)
-* Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3)
-* Event data 2: second byte of panic string
-* Event data 3: third byte of panic string
-
-See the IPMI spec for the details of the event layout. This event is
-always sent to the local management controller. It will handle routing
-the message to the right place
-
-Other OEM events have the following format:
-
-* Record ID (bytes 0-1): Set by the SEL.
-* Record type (byte 2): 0xf0 (OEM non-timestamped)
-* byte 3: The slave address of the card saving the panic
-* byte 4: A sequence number (starting at zero)
- The rest of the bytes (11 bytes) are the panic string. If the panic string
- is longer than 11 bytes, multiple messages will be sent with increasing
- sequence numbers.
-
-Because you cannot send OEM events using the standard interface, this
-function will attempt to find an SEL and add the events there. It
-will first query the capabilities of the local management controller.
-If it has an SEL, then they will be stored in the SEL of the local
-management controller. If not, and the local management controller is
-an event generator, the event receiver from the local management
-controller will be queried and the events sent to the SEL on that
-device. Otherwise, the events go nowhere since there is nowhere to
-send them.
-
-
-Poweroff
---------
-
-If the poweroff capability is selected, the IPMI driver will install
-a shutdown function into the standard poweroff function pointer. This
-is in the ipmi_poweroff module. When the system requests a powerdown,
-it will send the proper IPMI commands to do this. This is supported on
-several platforms.
-
-There is a module parameter named "poweroff_powercycle" that may
-either be zero (do a power down) or non-zero (do a power cycle, power
-the system off, then power it on in a few seconds). Setting
-ipmi_poweroff.poweroff_control=x will do the same thing on the kernel
-command line. The parameter is also available via the proc filesystem
-in /proc/sys/dev/ipmi/poweroff_powercycle. Note that if the system
-does not support power cycling, it will always do the power off.
-
-The "ifnum_to_use" parameter specifies which interface the poweroff
-code should use. The default is -1, which means to pick the first one
-registered.
-
-Note that if you have ACPI enabled, the system will prefer using ACPI to
-power off.
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
deleted file mode 100644
index 29da5000836a..000000000000
--- a/Documentation/IRQ-affinity.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-================
-SMP IRQ affinity
-================
-
-ChangeLog:
- - Started by Ingo Molnar <mingo@redhat.com>
- - Update by Max Krasnyansky <maxk@qualcomm.com>
-
-
-/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
-which target CPUs are permitted for a given IRQ source. It's a bitmask
-(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not
-allowed to turn off all CPUs, and if an IRQ controller does not support
-IRQ affinity then the value will not change from the default of all cpus.
-
-/proc/irq/default_smp_affinity specifies default affinity mask that applies
-to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
-will be set to the default mask. It can then be changed as described above.
-Default mask is 0xffffffff.
-
-Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
-it to CPU4-7 (this is an 8-CPU SMP box)::
-
- [root@moon 44]# cd /proc/irq/44
- [root@moon 44]# cat smp_affinity
- ffffffff
-
- [root@moon 44]# echo 0f > smp_affinity
- [root@moon 44]# cat smp_affinity
- 0000000f
- [root@moon 44]# ping -f h
- PING hell (195.4.7.3): 56 data bytes
- ...
- --- hell ping statistics ---
- 6029 packets transmitted, 6027 packets received, 0% packet loss
- round-trip min/avg/max = 0.1/0.1/0.4 ms
- [root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
- CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
- 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1
-
-As can be seen from the line above IRQ44 was delivered only to the first four
-processors (0-3).
-Now lets restrict that IRQ to CPU(4-7).
-
-::
-
- [root@moon 44]# echo f0 > smp_affinity
- [root@moon 44]# cat smp_affinity
- 000000f0
- [root@moon 44]# ping -f h
- PING hell (195.4.7.3): 56 data bytes
- ..
- --- hell ping statistics ---
- 2779 packets transmitted, 2777 packets received, 0% packet loss
- round-trip min/avg/max = 0.1/0.5/585.4 ms
- [root@moon 44]# cat /proc/interrupts | 'CPU\|44:'
- CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
- 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1
-
-This time around IRQ44 was delivered only to the last four processors.
-i.e counters for the CPU0-3 did not change.
-
-Here is an example of limiting that same irq (44) to cpus 1024 to 1031::
-
- [root@moon 44]# echo 1024-1031 > smp_affinity_list
- [root@moon 44]# cat smp_affinity_list
- 1024-1031
-
-Note that to do this with a bitmask would require 32 bitmasks of zero
-to follow the pertinent one.
diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
deleted file mode 100644
index 507775cce753..000000000000
--- a/Documentation/IRQ-domain.txt
+++ /dev/null
@@ -1,269 +0,0 @@
-===============================================
-The irq_domain interrupt number mapping library
-===============================================
-
-The current design of the Linux kernel uses a single large number
-space where each separate IRQ source is assigned a different number.
-This is simple when there is only one interrupt controller, but in
-systems with multiple interrupt controllers the kernel must ensure
-that each one gets assigned non-overlapping allocations of Linux
-IRQ numbers.
-
-The number of interrupt controllers registered as unique irqchips
-show a rising tendency: for example subdrivers of different kinds
-such as GPIO controllers avoid reimplementing identical callback
-mechanisms as the IRQ core system by modelling their interrupt
-handlers as irqchips, i.e. in effect cascading interrupt controllers.
-
-Here the interrupt number loose all kind of correspondence to
-hardware interrupt numbers: whereas in the past, IRQ numbers could
-be chosen so they matched the hardware IRQ line into the root
-interrupt controller (i.e. the component actually fireing the
-interrupt line to the CPU) nowadays this number is just a number.
-
-For this reason we need a mechanism to separate controller-local
-interrupt numbers, called hardware irq's, from Linux IRQ numbers.
-
-The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
-irq numbers, but they don't provide any support for reverse mapping of
-the controller-local IRQ (hwirq) number into the Linux IRQ number
-space.
-
-The irq_domain library adds mapping between hwirq and IRQ numbers on
-top of the irq_alloc_desc*() API. An irq_domain to manage mapping is
-preferred over interrupt controller drivers open coding their own
-reverse mapping scheme.
-
-irq_domain also implements translation from an abstract irq_fwspec
-structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
-be easily extended to support other IRQ topology data sources.
-
-irq_domain usage
-================
-
-An interrupt controller driver creates and registers an irq_domain by
-calling one of the irq_domain_add_*() functions (each mapping method
-has a different allocator function, more on that later). The function
-will return a pointer to the irq_domain on success. The caller must
-provide the allocator function with an irq_domain_ops structure.
-
-In most cases, the irq_domain will begin empty without any mappings
-between hwirq and IRQ numbers. Mappings are added to the irq_domain
-by calling irq_create_mapping() which accepts the irq_domain and a
-hwirq number as arguments. If a mapping for the hwirq doesn't already
-exist then it will allocate a new Linux irq_desc, associate it with
-the hwirq, and call the .map() callback so the driver can perform any
-required hardware setup.
-
-When an interrupt is received, irq_find_mapping() function should
-be used to find the Linux IRQ number from the hwirq number.
-
-The irq_create_mapping() function must be called *atleast once*
-before any call to irq_find_mapping(), lest the descriptor will not
-be allocated.
-
-If the driver has the Linux IRQ number or the irq_data pointer, and
-needs to know the associated hwirq number (such as in the irq_chip
-callbacks) then it can be directly obtained from irq_data->hwirq.
-
-Types of irq_domain mappings
-============================
-
-There are several mechanisms available for reverse mapping from hwirq
-to Linux irq, and each mechanism uses a different allocation function.
-Which reverse map type should be used depends on the use case. Each
-of the reverse map types are described below:
-
-Linear
-------
-
-::
-
- irq_domain_add_linear()
- irq_domain_create_linear()
-
-The linear reverse map maintains a fixed size table indexed by the
-hwirq number. When a hwirq is mapped, an irq_desc is allocated for
-the hwirq, and the IRQ number is stored in the table.
-
-The Linear map is a good choice when the maximum number of hwirqs is
-fixed and a relatively small number (~ < 256). The advantages of this
-map are fixed time lookup for IRQ numbers, and irq_descs are only
-allocated for in-use IRQs. The disadvantage is that the table must be
-as large as the largest possible hwirq number.
-
-irq_domain_add_linear() and irq_domain_create_linear() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-The majority of drivers should use the linear map.
-
-Tree
-----
-
-::
-
- irq_domain_add_tree()
- irq_domain_create_tree()
-
-The irq_domain maintains a radix tree map from hwirq numbers to Linux
-IRQs. When an hwirq is mapped, an irq_desc is allocated and the
-hwirq is used as the lookup key for the radix tree.
-
-The tree map is a good choice if the hwirq number can be very large
-since it doesn't need to allocate a table as large as the largest
-hwirq number. The disadvantage is that hwirq to IRQ number lookup is
-dependent on how many entries are in the table.
-
-irq_domain_add_tree() and irq_domain_create_tree() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-Very few drivers should need this mapping.
-
-No Map
-------
-
-::
-
- irq_domain_add_nomap()
-
-The No Map mapping is to be used when the hwirq number is
-programmable in the hardware. In this case it is best to program the
-Linux IRQ number into the hardware itself so that no mapping is
-required. Calling irq_create_direct_mapping() will allocate a Linux
-IRQ number and call the .map() callback so that driver can program the
-Linux IRQ number into the hardware.
-
-Most drivers cannot use this mapping.
-
-Legacy
-------
-
-::
-
- irq_domain_add_simple()
- irq_domain_add_legacy()
- irq_domain_add_legacy_isa()
-
-The Legacy mapping is a special case for drivers that already have a
-range of irq_descs allocated for the hwirqs. It is used when the
-driver cannot be immediately converted to use the linear mapping. For
-example, many embedded system board support files use a set of #defines
-for IRQ numbers that are passed to struct device registrations. In that
-case the Linux IRQ numbers cannot be dynamically assigned and the legacy
-mapping should be used.
-
-The legacy map assumes a contiguous range of IRQ numbers has already
-been allocated for the controller and that the IRQ number can be
-calculated by adding a fixed offset to the hwirq number, and
-visa-versa. The disadvantage is that it requires the interrupt
-controller to manage IRQ allocations and it requires an irq_desc to be
-allocated for every hwirq, even if it is unused.
-
-The legacy map should only be used if fixed IRQ mappings must be
-supported. For example, ISA controllers would use the legacy map for
-mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
-numbers.
-
-Most users of legacy mappings should use irq_domain_add_simple() which
-will use a legacy domain only if an IRQ range is supplied by the
-system and will otherwise use a linear domain mapping. The semantics
-of this call are such that if an IRQ range is specified then
-descriptors will be allocated on-the-fly for it, and if no range is
-specified it will fall through to irq_domain_add_linear() which means
-*no* irq descriptors will be allocated.
-
-A typical use case for simple domains is where an irqchip provider
-is supporting both dynamic and static IRQ assignments.
-
-In order to avoid ending up in a situation where a linear domain is
-used and no descriptor gets allocated it is very important to make sure
-that the driver using the simple domain call irq_create_mapping()
-before any irq_find_mapping() since the latter will actually work
-for the static IRQ assignment case.
-
-Hierarchy IRQ domain
---------------------
-
-On some architectures, there may be multiple interrupt controllers
-involved in delivering an interrupt from the device to the target CPU.
-Let's look at a typical interrupt delivering path on x86 platforms::
-
- Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
-
-There are three interrupt controllers involved:
-
-1) IOAPIC controller
-2) Interrupt remapping controller
-3) Local APIC controller
-
-To support such a hardware topology and make software architecture match
-hardware architecture, an irq_domain data structure is built for each
-interrupt controller and those irq_domains are organized into hierarchy.
-When building irq_domain hierarchy, the irq_domain near to the device is
-child and the irq_domain near to CPU is parent. So a hierarchy structure
-as below will be built for the example above::
-
- CPU Vector irq_domain (root irq_domain to manage CPU vectors)
- ^
- |
- Interrupt Remapping irq_domain (manage irq_remapping entries)
- ^
- |
- IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
-
-There are four major interfaces to use hierarchy irq_domain:
-
-1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
- controller related resources to deliver these interrupts.
-2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
- related resources associated with these interrupts.
-3) irq_domain_activate_irq(): activate interrupt controller hardware to
- deliver the interrupt.
-4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
- to stop delivering the interrupt.
-
-Following changes are needed to support hierarchy irq_domain:
-
-1) a new field 'parent' is added to struct irq_domain; it's used to
- maintain irq_domain hierarchy information.
-2) a new field 'parent_data' is added to struct irq_data; it's used to
- build hierarchy irq_data to match hierarchy irq_domains. The irq_data
- is used to store irq_domain pointer and hardware irq number.
-3) new callbacks are added to struct irq_domain_ops to support hierarchy
- irq_domain operations.
-
-With support of hierarchy irq_domain and hierarchy irq_data ready, an
-irq_domain structure is built for each interrupt controller, and an
-irq_data structure is allocated for each irq_domain associated with an
-IRQ. Now we could go one step further to support stacked(hierarchy)
-irq_chip. That is, an irq_chip is associated with each irq_data along
-the hierarchy. A child irq_chip may implement a required action by
-itself or by cooperating with its parent irq_chip.
-
-With stacked irq_chip, interrupt controller driver only needs to deal
-with the hardware managed by itself and may ask for services from its
-parent irq_chip when needed. So we could achieve a much cleaner
-software architecture.
-
-For an interrupt controller driver to support hierarchy irq_domain, it
-needs to:
-
-1) Implement irq_domain_ops.alloc and irq_domain_ops.free
-2) Optionally implement irq_domain_ops.activate and
- irq_domain_ops.deactivate.
-3) Optionally implement an irq_chip to manage the interrupt controller
- hardware.
-4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
- they are unused with hierarchy irq_domain.
-
-Hierarchy irq_domain is in no way x86 specific, and is heavily used to
-support other architectures, such as ARM, ARM64 etc.
-
-=== Debugging ===
-
-Most of the internals of the IRQ subsystem are exposed in debugfs by
-turning CONFIG_GENERIC_IRQ_DEBUGFS on.
diff --git a/Documentation/IRQ.txt b/Documentation/IRQ.txt
deleted file mode 100644
index 4273806a606b..000000000000
--- a/Documentation/IRQ.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-===============
-What is an IRQ?
-===============
-
-An IRQ is an interrupt request from a device.
-Currently they can come in over a pin, or over a packet.
-Several devices may be connected to the same pin thus
-sharing an IRQ.
-
-An IRQ number is a kernel identifier used to talk about a hardware
-interrupt source. Typically this is an index into the global irq_desc
-array, but except for what linux/interrupt.h implements the details
-are architecture specific.
-
-An IRQ number is an enumeration of the possible interrupt sources on a
-machine. Typically what is enumerated is the number of input pins on
-all of the interrupt controller in the system. In the case of ISA
-what is enumerated are the 16 input pins on the two i8259 interrupt
-controllers.
-
-Architectures can assign additional meaning to the IRQ numbers, and
-are encouraged to in the case where there is any manual configuration
-of the hardware involved. The ISA IRQs are a classic example of
-assigning this kind of additional meaning.
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
deleted file mode 100644
index 9dae6b47e398..000000000000
--- a/Documentation/Intel-IOMMU.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-===================
-Linux IOMMU Support
-===================
-
-The architecture spec can be obtained from the below location.
-
-http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
-
-This guide gives a quick cheat sheet for some basic understanding.
-
-Some Keywords
-
-- DMAR - DMA remapping
-- DRHD - DMA Remapping Hardware Unit Definition
-- RMRR - Reserved memory Region Reporting Structure
-- ZLR - Zero length reads from PCI devices
-- IOVA - IO Virtual address.
-
-Basic stuff
------------
-
-ACPI enumerates and lists the different DMA engines in the platform, and
-device scope relationships between PCI devices and which DMA engine controls
-them.
-
-What is RMRR?
--------------
-
-There are some devices the BIOS controls, for e.g USB devices to perform
-PS2 emulation. The regions of memory used for these devices are marked
-reserved in the e820 map. When we turn on DMA translation, DMA to those
-regions will fail. Hence BIOS uses RMRR to specify these regions along with
-devices that need to access these regions. OS is expected to setup
-unity mappings for these regions for these devices to access these regions.
-
-How is IOVA generated?
-----------------------
-
-Well behaved drivers call pci_map_*() calls before sending command to device
-that needs to perform DMA. Once DMA is completed and mapping is no longer
-required, device performs a pci_unmap_*() calls to unmap the region.
-
-The Intel IOMMU driver allocates a virtual address per domain. Each PCIE
-device has its own domain (hence protection). Devices under p2p bridges
-share the virtual address with all devices under the p2p bridge due to
-transaction id aliasing for p2p bridges.
-
-IOVA generation is pretty generic. We used the same technique as vmalloc()
-but these are not global address spaces, but separate for each domain.
-Different DMA engines may support different number of domains.
-
-We also allocate guard pages with each mapping, so we can attempt to catch
-any overflow that might happen.
-
-
-Graphics Problems?
-------------------
-If you encounter issues with graphics devices, you can try adding
-option intel_iommu=igfx_off to turn off the integrated graphics engine.
-If this fixes anything, please ensure you file a bug reporting the problem.
-
-Some exceptions to IOVA
------------------------
-Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
-The same is true for peer to peer transactions. Hence we reserve the
-address from PCI MMIO ranges so they are not allocated for IOVA addresses.
-
-
-Fault reporting
----------------
-When errors are reported, the DMA engine signals via an interrupt. The fault
-reason and device that caused it with fault reason is printed on console.
-
-See below for sample.
-
-
-Boot Message Sample
--------------------
-
-Something like this gets printed indicating presence of DMAR tables
-in ACPI.
-
-ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
-
-When DMAR is being processed and initialized by ACPI, prints DMAR locations
-and any RMRR's processed::
-
- ACPI DMAR:Host address width 36
- ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
- ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
- ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
- ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
- ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
-
-When DMAR is enabled for use, you will notice..
-
-PCI-DMA: Using DMAR IOMMU
-
-Fault reporting
----------------
-
-::
-
- DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
- DMAR:[fault reason 05] PTE Write access is not set
- DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
- DMAR:[fault reason 05] PTE Write access is not set
-
-TBD
-----
-
-- For compatibility testing, could use unity map domain for all devices, just
- provide a 1-1 for all useful memory under a single domain for all devices.
-- API for paravirt ops for abstracting functionality for VMM folks.
diff --git a/Documentation/Kconfig b/Documentation/Kconfig
new file mode 100644
index 000000000000..3a0e7ac0c4e3
--- /dev/null
+++ b/Documentation/Kconfig
@@ -0,0 +1,28 @@
+if COMPILE_TEST
+
+menu "Documentation"
+
+config WARN_MISSING_DOCUMENTS
+ bool "Warn if there's a missing documentation file"
+ help
+ It is not uncommon that a document gets renamed.
+ This option makes the Kernel to check for missing dependencies,
+ warning when something is missing. Works only if the Kernel
+ is built from a git tree.
+
+ If unsure, select 'N'.
+
+config WARN_ABI_ERRORS
+ bool "Warn if there are errors at ABI files"
+ help
+ The files under Documentation/ABI should follow what's
+ described at Documentation/ABI/README. Yet, as they're manually
+ written, it would be possible that some of those files would
+ have errors that would break them for being parsed by
+ scripts/get_abi.pl. Add a check to verify them.
+
+ If unsure, select 'N'.
+
+endmenu
+
+endif
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2ca77ad0f238..3609cb86137b 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -2,18 +2,41 @@
# Makefile for Sphinx documentation
#
-subdir-y :=
+# for cleaning
+subdir- := devicetree/bindings
+
+ifneq ($(MAKECMDGOALS),cleandocs)
+# Check for broken documentation file references
+ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y)
+$(shell $(srctree)/scripts/documentation-file-ref-check --warn)
+endif
+
+# Check for broken ABI files
+ifeq ($(CONFIG_WARN_ABI_ERRORS),y)
+$(shell $(srctree)/scripts/get_abi.py --dir $(srctree)/Documentation/ABI validate)
+endif
+endif
# You can set these variables from the command line.
SPHINXBUILD = sphinx-build
SPHINXOPTS =
SPHINXDIRS = .
-_SPHINXDIRS = $(patsubst $(srctree)/Documentation/%/conf.py,%,$(wildcard $(srctree)/Documentation/*/conf.py))
+DOCS_THEME =
+DOCS_CSS =
+_SPHINXDIRS = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)))
SPHINX_CONF = conf.py
PAPER =
BUILDDIR = $(obj)/output
PDFLATEX = xelatex
-LATEXOPTS = -interaction=batchmode
+LATEXOPTS = -interaction=batchmode -no-shell-escape
+
+# For denylisting "variable font" files
+# Can be overridden by setting as an env variable
+FONTS_CONF_DENY_VF ?= $(HOME)/deny-vf
+
+ifeq ($(findstring 1, $(KBUILD_VERBOSE)),)
+SPHINXOPTS += "-q"
+endif
# User-friendly check for sphinx-build
HAVE_SPHINX := $(shell if which $(SPHINXBUILD) >/dev/null 2>&1; then echo 1; else echo 0; fi)
@@ -23,20 +46,30 @@ ifeq ($(HAVE_SPHINX),0)
.DEFAULT:
$(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.)
@echo
- @./scripts/sphinx-pre-install
+ @$(srctree)/scripts/sphinx-pre-install
@echo " SKIP Sphinx $@ target."
else # HAVE_SPHINX
-# User-friendly check for pdflatex
+# User-friendly check for pdflatex and latexmk
HAVE_PDFLATEX := $(shell if which $(PDFLATEX) >/dev/null 2>&1; then echo 1; else echo 0; fi)
+HAVE_LATEXMK := $(shell if which latexmk >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+ifeq ($(HAVE_LATEXMK),1)
+ PDFLATEX := latexmk -$(PDFLATEX)
+endif #HAVE_LATEXMK
# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-KERNELDOC = $(srctree)/scripts/kernel-doc
-KERNELDOC_CONF = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
-ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
+PAPEROPT_a4 = -D latex_elements.papersize=a4paper
+PAPEROPT_letter = -D latex_elements.papersize=letterpaper
+ALLSPHINXOPTS = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
+ALLSPHINXOPTS += $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
+ifneq ($(wildcard $(srctree)/.config),)
+ifeq ($(CONFIG_RUST),y)
+ # Let Sphinx know we will include rustdoc
+ ALLSPHINXOPTS += -t rustdoc
+endif
+endif
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
@@ -44,33 +77,64 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
# $2 sphinx builder e.g. "html"
-# $3 name of the build subfolder / e.g. "media", used as:
+# $3 name of the build subfolder / e.g. "userspace-api/media", used as:
# * dest folder relative to $(BUILDDIR) and
# * cache folder relative to $(BUILDDIR)/.doctrees
-# $4 dest subfolder e.g. "man" for man pages at media/man
-# $5 reST source folder relative to $(srctree)/$(src),
-# e.g. "media" for the linux-tv book-set at ./Documentation/media
+# $4 dest subfolder e.g. "man" for man pages at userspace-api/media/man
+# $5 reST source folder relative to $(src),
+# e.g. "userspace-api/media" for the linux-tv book-set at ./Documentation/userspace-api/media
+
+PYTHONPYCACHEPREFIX ?= $(abspath $(BUILDDIR)/__pycache__)
quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
- cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2 && \
- PYTHONDONTWRITEBYTECODE=1 \
- BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
+ cmd_sphinx = \
+ PYTHONPYCACHEPREFIX="$(PYTHONPYCACHEPREFIX)" \
+ BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(src)/$5/$(SPHINX_CONF)) \
+ $(PYTHON3) $(srctree)/scripts/jobserver-exec \
+ $(CONFIG_SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
$(SPHINXBUILD) \
-b $2 \
- -c $(abspath $(srctree)/$(src)) \
+ -c $(abspath $(src)) \
-d $(abspath $(BUILDDIR)/.doctrees/$3) \
-D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
$(ALLSPHINXOPTS) \
- $(abspath $(srctree)/$(src)/$5) \
- $(abspath $(BUILDDIR)/$3/$4)
+ $(abspath $(src)/$5) \
+ $(abspath $(BUILDDIR)/$3/$4) && \
+ if [ "x$(DOCS_CSS)" != "x" ]; then \
+ cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \
+ fi
htmldocs:
+ @$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
+htmldocs-redirects: $(srctree)/Documentation/.renames.txt
+ @tools/docs/gen-redirects.py --output $(BUILDDIR) < $<
+
+# If Rust support is available and .config exists, add rustdoc generated contents.
+# If there are any, the errors from this make rustdoc will be displayed but
+# won't stop the execution of htmldocs
+
+ifneq ($(wildcard $(srctree)/.config),)
+ifeq ($(CONFIG_RUST),y)
+ $(Q)$(MAKE) rustdoc || true
+endif
+endif
+
+texinfodocs:
+ @$(srctree)/scripts/sphinx-pre-install --version-check
+ @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,texinfo,$(var),texinfo,$(var)))
+
+# Note: the 'info' Make target is generated by sphinx itself when
+# running the texinfodocs target define above.
+infodocs: texinfodocs
+ $(MAKE) -C $(BUILDDIR)/texinfo info
+
linkcheckdocs:
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
latexdocs:
+ @$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
ifeq ($(HAVE_PDFLATEX),0)
@@ -81,15 +145,23 @@ pdfdocs:
else # HAVE_PDFLATEX
+pdfdocs: DENY_VF = XDG_CONFIG_HOME=$(FONTS_CONF_DENY_VF)
pdfdocs: latexdocs
- $(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
+ @$(srctree)/scripts/sphinx-pre-install --version-check
+ $(foreach var,$(SPHINXDIRS), \
+ $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" $(DENY_VF) -C $(BUILDDIR)/$(var)/latex || sh $(srctree)/scripts/check-variable-fonts.sh || exit; \
+ mkdir -p $(BUILDDIR)/$(var)/pdf; \
+ mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \
+ )
endif # HAVE_PDFLATEX
epubdocs:
+ @$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
xmldocs:
+ @$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
endif # HAVE_SPHINX
@@ -102,17 +174,21 @@ refcheckdocs:
cleandocs:
$(Q)rm -rf $(BUILDDIR)
- $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media clean
dochelp:
@echo ' Linux kernel internal documentation in different formats from ReST:'
@echo ' htmldocs - HTML'
+ @echo ' htmldocs-redirects - generate HTML redirects for moved pages'
+ @echo ' texinfodocs - Texinfo'
+ @echo ' infodocs - Info'
@echo ' latexdocs - LaTeX'
@echo ' pdfdocs - PDF'
@echo ' epubdocs - EPUB'
@echo ' xmldocs - XML'
- @echo ' linkcheckdocs - check for broken external links (will connect to external hosts)'
- @echo ' refcheckdocs - check for references to non-existing files under Documentation'
+ @echo ' linkcheckdocs - check for broken external links'
+ @echo ' (will connect to external hosts)'
+ @echo ' refcheckdocs - check for references to non-existing files under'
+ @echo ' Documentation'
@echo ' cleandocs - clean all generated files'
@echo
@echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
@@ -121,4 +197,8 @@ dochelp:
@echo ' make SPHINX_CONF={conf-file} [target] use *additional* sphinx-build'
@echo ' configuration. This is e.g. useful to build with nit-picking config.'
@echo
+ @echo ' make DOCS_THEME={sphinx-theme} selects a different Sphinx theme.'
+ @echo
+ @echo ' make DOCS_CSS={a .css file} adds a DOCS_CSS override file for html/epub output.'
+ @echo
@echo ' Default location for the generated documents is Documentation/output'
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
deleted file mode 100644
index 618e13d5e276..000000000000
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ /dev/null
@@ -1,270 +0,0 @@
- The MSI Driver Guide HOWTO
- Tom L Nguyen tom.l.nguyen@intel.com
- 10/03/2003
- Revised Feb 12, 2004 by Martine Silbermann
- email: Martine.Silbermann@hp.com
- Revised Jun 25, 2004 by Tom L Nguyen
- Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
- Copyright 2003, 2008 Intel Corporation
-
-1. About this guide
-
-This guide describes the basics of Message Signaled Interrupts (MSIs),
-the advantages of using MSI over traditional interrupt mechanisms, how
-to change your driver to use MSI or MSI-X and some basic diagnostics to
-try if a device doesn't support MSIs.
-
-
-2. What are MSIs?
-
-A Message Signaled Interrupt is a write from the device to a special
-address which causes an interrupt to be received by the CPU.
-
-The MSI capability was first specified in PCI 2.2 and was later enhanced
-in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
-capability was also introduced with PCI 3.0. It supports more interrupts
-per device than MSI and allows interrupts to be independently configured.
-
-Devices may support both MSI and MSI-X, but only one can be enabled at
-a time.
-
-
-3. Why use MSIs?
-
-There are three reasons why using MSIs can give an advantage over
-traditional pin-based interrupts.
-
-Pin-based PCI interrupts are often shared amongst several devices.
-To support this, the kernel must call each interrupt handler associated
-with an interrupt, which leads to reduced performance for the system as
-a whole. MSIs are never shared, so this problem cannot arise.
-
-When a device writes data to memory, then raises a pin-based interrupt,
-it is possible that the interrupt may arrive before all the data has
-arrived in memory (this becomes more likely with devices behind PCI-PCI
-bridges). In order to ensure that all the data has arrived in memory,
-the interrupt handler must read a register on the device which raised
-the interrupt. PCI transaction ordering rules require that all the data
-arrive in memory before the value may be returned from the register.
-Using MSIs avoids this problem as the interrupt-generating write cannot
-pass the data writes, so by the time the interrupt is raised, the driver
-knows that all the data has arrived in memory.
-
-PCI devices can only support a single pin-based interrupt per function.
-Often drivers have to query the device to find out what event has
-occurred, slowing down interrupt handling for the common case. With
-MSIs, a device can support more interrupts, allowing each interrupt
-to be specialised to a different purpose. One possible design gives
-infrequent conditions (such as errors) their own interrupt which allows
-the driver to handle the normal interrupt handling path more efficiently.
-Other possible designs include giving one interrupt to each packet queue
-in a network card or each port in a storage controller.
-
-
-4. How to use MSIs
-
-PCI devices are initialised to use pin-based interrupts. The device
-driver has to set up the device to use MSI or MSI-X. Not all machines
-support MSIs correctly, and for those machines, the APIs described below
-will simply fail and the device will continue to use pin-based interrupts.
-
-4.1 Include kernel support for MSIs
-
-To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
-option enabled. This option is only available on some architectures,
-and it may depend on some other options also being set. For example,
-on x86, you must also enable X86_UP_APIC or SMP in order to see the
-CONFIG_PCI_MSI option.
-
-4.2 Using MSI
-
-Most of the hard work is done for the driver in the PCI layer. The driver
-simply has to request that the PCI layer set up the MSI capability for this
-device.
-
-To automatically use MSI or MSI-X interrupt vectors, use the following
-function:
-
- int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
- unsigned int max_vecs, unsigned int flags);
-
-which allocates up to max_vecs interrupt vectors for a PCI device. It
-returns the number of vectors allocated or a negative error. If the device
-has a requirements for a minimum number of vectors the driver can pass a
-min_vecs argument set to this limit, and the PCI core will return -ENOSPC
-if it can't meet the minimum number of vectors.
-
-The flags argument is used to specify which type of interrupt can be used
-by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
-A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
-any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set,
-pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
-
-To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
-vectors, use the following function:
-
- int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
-
-Any allocated resources should be freed before removing the device using
-the following function:
-
- void pci_free_irq_vectors(struct pci_dev *dev);
-
-If a device supports both MSI-X and MSI capabilities, this API will use the
-MSI-X facilities in preference to the MSI facilities. MSI-X supports any
-number of interrupts between 1 and 2048. In contrast, MSI is restricted to
-a maximum of 32 interrupts (and must be a power of two). In addition, the
-MSI interrupt vectors must be allocated consecutively, so the system might
-not be able to allocate as many vectors for MSI as it could for MSI-X. On
-some platforms, MSI interrupts must all be targeted at the same set of CPUs
-whereas MSI-X interrupts can all be targeted at different CPUs.
-
-If a device supports neither MSI-X or MSI it will fall back to a single
-legacy IRQ vector.
-
-The typical usage of MSI or MSI-X interrupts is to allocate as many vectors
-as possible, likely up to the limit supported by the device. If nvec is
-larger than the number supported by the device it will automatically be
-capped to the supported limit, so there is no need to query the number of
-vectors supported beforehand:
-
- nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
- if (nvec < 0)
- goto out_err;
-
-If a driver is unable or unwilling to deal with a variable number of MSI
-interrupts it can request a particular number of interrupts by passing that
-number to pci_alloc_irq_vectors() function as both 'min_vecs' and
-'max_vecs' parameters:
-
- ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
- if (ret < 0)
- goto out_err;
-
-The most notorious example of the request type described above is enabling
-the single MSI mode for a device. It could be done by passing two 1s as
-'min_vecs' and 'max_vecs':
-
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
- if (ret < 0)
- goto out_err;
-
-Some devices might not support using legacy line interrupts, in which case
-the driver can specify that only MSI or MSI-X is acceptable:
-
- nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
- if (nvec < 0)
- goto out_err;
-
-4.3 Legacy APIs
-
-The following old APIs to enable and disable MSI or MSI-X interrupts should
-not be used in new code:
-
- pci_enable_msi() /* deprecated */
- pci_disable_msi() /* deprecated */
- pci_enable_msix_range() /* deprecated */
- pci_enable_msix_exact() /* deprecated */
- pci_disable_msix() /* deprecated */
-
-Additionally there are APIs to provide the number of supported MSI or MSI-X
-vectors: pci_msi_vec_count() and pci_msix_vec_count(). In general these
-should be avoided in favor of letting pci_alloc_irq_vectors() cap the
-number of vectors. If you have a legitimate special use case for the count
-of vectors we might have to revisit that decision and add a
-pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently.
-
-4.4 Considerations when using MSIs
-
-4.4.1 Spinlocks
-
-Most device drivers have a per-device spinlock which is taken in the
-interrupt handler. With pin-based interrupts or a single MSI, it is not
-necessary to disable interrupts (Linux guarantees the same interrupt will
-not be re-entered). If a device uses multiple interrupts, the driver
-must disable interrupts while the lock is held. If the device sends
-a different interrupt, the driver will deadlock trying to recursively
-acquire the spinlock. Such deadlocks can be avoided by using
-spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
-and acquire the lock (see Documentation/kernel-hacking/locking.rst).
-
-4.5 How to tell whether MSI/MSI-X is enabled on a device
-
-Using 'lspci -v' (as root) may show some devices with "MSI", "Message
-Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
-has an 'Enable' flag which is followed with either "+" (enabled)
-or "-" (disabled).
-
-
-5. MSI quirks
-
-Several PCI chipsets or devices are known not to support MSIs.
-The PCI stack provides three ways to disable MSIs:
-
-1. globally
-2. on all devices behind a specific bridge
-3. on a single device
-
-5.1. Disabling MSIs globally
-
-Some host chipsets simply don't support MSIs properly. If we're
-lucky, the manufacturer knows this and has indicated it in the ACPI
-FADT table. In this case, Linux automatically disables MSIs.
-Some boards don't include this information in the table and so we have
-to detect them ourselves. The complete list of these is found near the
-quirk_disable_all_msi() function in drivers/pci/quirks.c.
-
-If you have a board which has problems with MSIs, you can pass pci=nomsi
-on the kernel command line to disable MSIs on all devices. It would be
-in your best interests to report the problem to linux-pci@vger.kernel.org
-including a full 'lspci -v' so we can add the quirks to the kernel.
-
-5.2. Disabling MSIs below a bridge
-
-Some PCI bridges are not able to route MSIs between busses properly.
-In this case, MSIs must be disabled on all devices behind the bridge.
-
-Some bridges allow you to enable MSIs by changing some bits in their
-PCI configuration space (especially the Hypertransport chipsets such
-as the nVidia nForce and Serverworks HT2000). As with host chipsets,
-Linux mostly knows about them and automatically enables MSIs if it can.
-If you have a bridge unknown to Linux, you can enable
-MSIs in configuration space using whatever method you know works, then
-enable MSIs on that bridge by doing:
-
- echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
-
-where $bridge is the PCI address of the bridge you've enabled (eg
-0000:00:0e.0).
-
-To disable MSIs, echo 0 instead of 1. Changing this value should be
-done with caution as it could break interrupt handling for all devices
-below this bridge.
-
-Again, please notify linux-pci@vger.kernel.org of any bridges that need
-special handling.
-
-5.3. Disabling MSIs on a single device
-
-Some devices are known to have faulty MSI implementations. Usually this
-is handled in the individual device driver, but occasionally it's necessary
-to handle this with a quirk. Some drivers have an option to disable use
-of MSI. While this is a convenient workaround for the driver author,
-it is not good practice, and should not be emulated.
-
-5.4. Finding why MSIs are disabled on a device
-
-From the above three sections, you can see that there are many reasons
-why MSIs may not be enabled for a given device. Your first step should
-be to examine your dmesg carefully to determine whether MSIs are enabled
-for your machine. You should also check your .config to be sure you
-have enabled CONFIG_PCI_MSI.
-
-Then, 'lspci -t' gives the list of bridges above a device. Reading
-/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1)
-or disabled (0). If 0 is found in any of the msi_bus files belonging
-to bridges between the PCI root and the device, MSIs are disabled.
-
-It is also worth checking the device driver to see whether it supports MSIs.
-For example, it may contain calls to pci_irq_alloc_vectors() with the
-PCI_IRQ_MSI or PCI_IRQ_MSIX flags.
diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt
deleted file mode 100644
index 15f0bb3b5045..000000000000
--- a/Documentation/PCI/PCIEBUS-HOWTO.txt
+++ /dev/null
@@ -1,198 +0,0 @@
- The PCI Express Port Bus Driver Guide HOWTO
- Tom L Nguyen tom.l.nguyen@intel.com
- 11/03/2004
-
-1. About this guide
-
-This guide describes the basics of the PCI Express Port Bus driver
-and provides information on how to enable the service drivers to
-register/unregister with the PCI Express Port Bus Driver.
-
-2. Copyright 2004 Intel Corporation
-
-3. What is the PCI Express Port Bus Driver
-
-A PCI Express Port is a logical PCI-PCI Bridge structure. There
-are two types of PCI Express Port: the Root Port and the Switch
-Port. The Root Port originates a PCI Express link from a PCI Express
-Root Complex and the Switch Port connects PCI Express links to
-internal logical PCI buses. The Switch Port, which has its secondary
-bus representing the switch's internal routing logic, is called the
-switch's Upstream Port. The switch's Downstream Port is bridging from
-switch's internal routing bus to a bus representing the downstream
-PCI Express link from the PCI Express Switch.
-
-A PCI Express Port can provide up to four distinct functions,
-referred to in this document as services, depending on its port type.
-PCI Express Port's services include native hotplug support (HP),
-power management event support (PME), advanced error reporting
-support (AER), and virtual channel support (VC). These services may
-be handled by a single complex driver or be individually distributed
-and handled by corresponding service drivers.
-
-4. Why use the PCI Express Port Bus Driver?
-
-In existing Linux kernels, the Linux Device Driver Model allows a
-physical device to be handled by only a single driver. The PCI
-Express Port is a PCI-PCI Bridge device with multiple distinct
-services. To maintain a clean and simple solution each service
-may have its own software service driver. In this case several
-service drivers will compete for a single PCI-PCI Bridge device.
-For example, if the PCI Express Root Port native hotplug service
-driver is loaded first, it claims a PCI-PCI Bridge Root Port. The
-kernel therefore does not load other service drivers for that Root
-Port. In other words, it is impossible to have multiple service
-drivers load and run on a PCI-PCI Bridge device simultaneously
-using the current driver model.
-
-To enable multiple service drivers running simultaneously requires
-having a PCI Express Port Bus driver, which manages all populated
-PCI Express Ports and distributes all provided service requests
-to the corresponding service drivers as required. Some key
-advantages of using the PCI Express Port Bus driver are listed below:
-
- - Allow multiple service drivers to run simultaneously on
- a PCI-PCI Bridge Port device.
-
- - Allow service drivers implemented in an independent
- staged approach.
-
- - Allow one service driver to run on multiple PCI-PCI Bridge
- Port devices.
-
- - Manage and distribute resources of a PCI-PCI Bridge Port
- device to requested service drivers.
-
-5. Configuring the PCI Express Port Bus Driver vs. Service Drivers
-
-5.1 Including the PCI Express Port Bus Driver Support into the Kernel
-
-Including the PCI Express Port Bus driver depends on whether the PCI
-Express support is included in the kernel config. The kernel will
-automatically include the PCI Express Port Bus driver as a kernel
-driver when the PCI Express support is enabled in the kernel.
-
-5.2 Enabling Service Driver Support
-
-PCI device drivers are implemented based on Linux Device Driver Model.
-All service drivers are PCI device drivers. As discussed above, it is
-impossible to load any service driver once the kernel has loaded the
-PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver
-Model requires some minimal changes on existing service drivers that
-imposes no impact on the functionality of existing service drivers.
-
-A service driver is required to use the two APIs shown below to
-register its service with the PCI Express Port Bus driver (see
-section 5.2.1 & 5.2.2). It is important that a service driver
-initializes the pcie_port_service_driver data structure, included in
-header file /include/linux/pcieport_if.h, before calling these APIs.
-Failure to do so will result an identity mismatch, which prevents
-the PCI Express Port Bus driver from loading a service driver.
-
-5.2.1 pcie_port_service_register
-
-int pcie_port_service_register(struct pcie_port_service_driver *new)
-
-This API replaces the Linux Driver Model's pci_register_driver API. A
-service driver should always calls pcie_port_service_register at
-module init. Note that after service driver being loaded, calls
-such as pci_enable_device(dev) and pci_set_master(dev) are no longer
-necessary since these calls are executed by the PCI Port Bus driver.
-
-5.2.2 pcie_port_service_unregister
-
-void pcie_port_service_unregister(struct pcie_port_service_driver *new)
-
-pcie_port_service_unregister replaces the Linux Driver Model's
-pci_unregister_driver. It's always called by service driver when a
-module exits.
-
-5.2.3 Sample Code
-
-Below is sample service driver code to initialize the port service
-driver data structure.
-
-static struct pcie_port_service_id service_id[] = { {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_RC_PORT,
- .service_type = PCIE_PORT_SERVICE_AER,
- }, { /* end: all zeroes */ }
-};
-
-static struct pcie_port_service_driver root_aerdrv = {
- .name = (char *)device_name,
- .id_table = &service_id[0],
-
- .probe = aerdrv_load,
- .remove = aerdrv_unload,
-
- .suspend = aerdrv_suspend,
- .resume = aerdrv_resume,
-};
-
-Below is a sample code for registering/unregistering a service
-driver.
-
-static int __init aerdrv_service_init(void)
-{
- int retval = 0;
-
- retval = pcie_port_service_register(&root_aerdrv);
- if (!retval) {
- /*
- * FIX ME
- */
- }
- return retval;
-}
-
-static void __exit aerdrv_service_exit(void)
-{
- pcie_port_service_unregister(&root_aerdrv);
-}
-
-module_init(aerdrv_service_init);
-module_exit(aerdrv_service_exit);
-
-6. Possible Resource Conflicts
-
-Since all service drivers of a PCI-PCI Bridge Port device are
-allowed to run simultaneously, below lists a few of possible resource
-conflicts with proposed solutions.
-
-6.1 MSI and MSI-X Vector Resource
-
-Once MSI or MSI-X interrupts are enabled on a device, it stays in this
-mode until they are disabled again. Since service drivers of the same
-PCI-PCI Bridge port share the same physical device, if an individual
-service driver enables or disables MSI/MSI-X mode it may result
-unpredictable behavior.
-
-To avoid this situation all service drivers are not permitted to
-switch interrupt mode on its device. The PCI Express Port Bus driver
-is responsible for determining the interrupt mode and this should be
-transparent to service drivers. Service drivers need to know only
-the vector IRQ assigned to the field irq of struct pcie_device, which
-is passed in when the PCI Express Port Bus driver probes each service
-driver. Service drivers should use (struct pcie_device*)dev->irq to
-call request_irq/free_irq. In addition, the interrupt mode is stored
-in the field interrupt_mode of struct pcie_device.
-
-6.3 PCI Memory/IO Mapped Regions
-
-Service drivers for PCI Express Power Management (PME), Advanced
-Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
-PCI configuration space on the PCI Express port. In all cases the
-registers accessed are independent of each other. This patch assumes
-that all service drivers will be well behaved and not overwrite
-other service driver's configuration settings.
-
-6.4 PCI Config Registers
-
-Each service driver runs its PCI config operations on its own
-capability structure except the PCI Express capability structure, in
-which Root Control register and Device Control register are shared
-between PME and AER. This patch assumes that all service drivers
-will be well behaved and not overwrite other service driver's
-configuration settings.
diff --git a/Documentation/PCI/acpi-info.rst b/Documentation/PCI/acpi-info.rst
new file mode 100644
index 000000000000..34c64a5a66ec
--- /dev/null
+++ b/Documentation/PCI/acpi-info.rst
@@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+ACPI considerations for PCI host bridges
+========================================
+
+The general rule is that the ACPI namespace should describe everything the
+OS might use unless there's another way for the OS to find it [1, 2].
+
+For example, there's no standard hardware mechanism for enumerating PCI
+host bridges, so the ACPI namespace must describe each host bridge, the
+method for accessing PCI config space below it, the address space windows
+the host bridge forwards to PCI (using _CRS), and the routing of legacy
+INTx interrupts (using _PRT).
+
+PCI devices, which are below the host bridge, generally do not need to be
+described via ACPI. The OS can discover them via the standard PCI
+enumeration mechanism, using config accesses to discover and identify
+devices and read and size their BARs. However, ACPI may describe PCI
+devices if it provides power management or hotplug functionality for them
+or if the device has INTx interrupts connected by platform interrupt
+controllers and a _PRT is needed to describe those connections.
+
+ACPI resource description is done via _CRS objects of devices in the ACPI
+namespace [2]. The _CRS is like a generalized PCI BAR: the OS can read
+_CRS and figure out what resource is being consumed even if it doesn't have
+a driver for the device [3]. That's important because it means an old OS
+can work correctly even on a system with new devices unknown to the OS.
+The new devices might not do anything, but the OS can at least make sure no
+resources conflict with them.
+
+Static tables like MCFG, HPET, ECDT, etc., are *not* mechanisms for
+reserving address space. The static tables are for things the OS needs to
+know early in boot, before it can parse the ACPI namespace. If a new table
+is defined, an old OS needs to operate correctly even though it ignores the
+table. _CRS allows that because it is generic and understood by the old
+OS; a static table does not.
+
+If the OS is expected to manage a non-discoverable device described via
+ACPI, that device will have a specific _HID/_CID that tells the OS what
+driver to bind to it, and the _CRS tells the OS and the driver where the
+device's registers are.
+
+PCI host bridges are PNP0A03 or PNP0A08 devices. Their _CRS should
+describe all the address space they consume. This includes all the windows
+they forward down to the PCI bus, as well as registers of the host bridge
+itself that are not forwarded to PCI. The host bridge registers include
+things like secondary/subordinate bus registers that determine the bus
+range below the bridge, window registers that describe the apertures, etc.
+These are all device-specific, non-architected things, so the only way a
+PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain
+the device-specific details. The host bridge registers also include ECAM
+space, since it is consumed by the host bridge.
+
+ACPI defines a Consumer/Producer bit to distinguish the bridge registers
+("Consumer") from the bridge apertures ("Producer") [4, 5], but early
+BIOSes didn't use that bit correctly. The result is that the current ACPI
+spec defines Consumer/Producer only for the Extended Address Space
+descriptors; the bit should be ignored in the older QWord/DWord/Word
+Address Space descriptors. Consequently, OSes have to assume all
+QWord/DWord/Word descriptors are windows.
+
+Prior to the addition of Extended Address Space descriptors, the failure of
+Consumer/Producer meant there was no way to describe bridge registers in
+the PNP0A03/PNP0A08 device itself. The workaround was to describe the
+bridge registers (including ECAM space) in PNP0C02 catch-all devices [6].
+With the exception of ECAM, the bridge register space is device-specific
+anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to
+know about it.
+
+New architectures should be able to use "Consumer" Extended Address Space
+descriptors in the PNP0A03 device for bridge registers, including ECAM,
+although a strict interpretation of [6] might prohibit this. Old x86 and
+ia64 kernels assume all address space descriptors, including "Consumer"
+Extended Address Space ones, are windows, so it would not be safe to
+describe bridge registers this way on those architectures.
+
+PNP0C02 "motherboard" devices are basically a catch-all. There's no
+programming model for them other than "don't use these resources for
+anything else." So a PNP0C02 _CRS should claim any address space that is
+(1) not claimed by _CRS under any other device object in the ACPI namespace
+and (2) should not be assigned by the OS to something else.
+
+The PCIe spec requires the Enhanced Configuration Access Method (ECAM)
+unless there's a standard firmware interface for config access, e.g., the
+ia64 SAL interface [7]. A host bridge consumes ECAM memory address space
+and converts memory accesses into PCI configuration accesses. The spec
+defines the ECAM address space layout and functionality; only the base of
+the address space is device-specific. An ACPI OS learns the base address
+from either the static MCFG table or a _CBA method in the PNP0A03 device.
+
+The MCFG table must describe the ECAM space of non-hot pluggable host
+bridges [8]. Since MCFG is a static table and can't be updated by hotplug,
+a _CBA method in the PNP0A03 device describes the ECAM space of a
+hot-pluggable host bridge [9]. Note that for both MCFG and _CBA, the base
+address always corresponds to bus 0, even if the bus range below the bridge
+(which is reported via _CRS) doesn't start at 0.
+
+
+[1] ACPI 6.2, sec 6.1:
+ For any device that is on a non-enumerable type of bus (for example, an
+ ISA bus), OSPM enumerates the devices' identifier(s) and the ACPI
+ system firmware must supply an _HID object ... for each device to
+ enable OSPM to do that.
+
+[2] ACPI 6.2, sec 3.7:
+ The OS enumerates motherboard devices simply by reading through the
+ ACPI Namespace looking for devices with hardware IDs.
+
+ Each device enumerated by ACPI includes ACPI-defined objects in the
+ ACPI Namespace that report the hardware resources the device could
+ occupy [_PRS], an object that reports the resources that are currently
+ used by the device [_CRS], and objects for configuring those resources
+ [_SRS]. The information is used by the Plug and Play OS (OSPM) to
+ configure the devices.
+
+[3] ACPI 6.2, sec 6.2:
+ OSPM uses device configuration objects to configure hardware resources
+ for devices enumerated via ACPI. Device configuration objects provide
+ information about current and possible resource requirements, the
+ relationship between shared resources, and methods for configuring
+ hardware resources.
+
+ When OSPM enumerates a device, it calls _PRS to determine the resource
+ requirements of the device. It may also call _CRS to find the current
+ resource settings for the device. Using this information, the Plug and
+ Play system determines what resources the device should consume and
+ sets those resources by calling the device’s _SRS control method.
+
+ In ACPI, devices can consume resources (for example, legacy keyboards),
+ provide resources (for example, a proprietary PCI bridge), or do both.
+ Unless otherwise specified, resources for a device are assumed to be
+ taken from the nearest matching resource above the device in the device
+ hierarchy.
+
+[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4:
+ QWord/DWord/Word Address Space Descriptor (.1, .2, .3)
+ General Flags: Bit [0] Ignored
+
+ Extended Address Space Descriptor (.4)
+ General Flags: Bit [0] Consumer/Producer:
+
+ * 1 – This device consumes this resource
+ * 0 – This device produces and consumes this resource
+
+[5] ACPI 6.2, sec 19.6.43:
+ ResourceUsage specifies whether the Memory range is consumed by
+ this device (ResourceConsumer) or passed on to child devices
+ (ResourceProducer). If nothing is specified, then
+ ResourceConsumer is assumed.
+
+[6] PCI Firmware 3.2, sec 4.1.2:
+ If the operating system does not natively comprehend reserving the
+ MMCFG region, the MMCFG region must be reserved by firmware. The
+ address range reported in the MCFG table or by _CBA method (see Section
+ 4.1.3) must be reserved by declaring a motherboard resource. For most
+ systems, the motherboard resource would appear at the root of the ACPI
+ namespace (under \_SB) in a node with a _HID of EISAID (PNP0C02), and
+ the resources in this case should not be claimed in the root PCI bus’s
+ _CRS. The resources can optionally be returned in Int15 E820 or
+ EFIGetMemoryMap as reserved memory but must always be reported through
+ ACPI as a motherboard resource.
+
+[7] PCI Express 4.0, sec 7.2.2:
+ For systems that are PC-compatible, or that do not implement a
+ processor-architecture-specific firmware interface standard that allows
+ access to the Configuration Space, the ECAM is required as defined in
+ this section.
+
+[8] PCI Firmware 3.2, sec 4.1.2:
+ The MCFG table is an ACPI table that is used to communicate the base
+ addresses corresponding to the non-hot removable PCI Segment Groups
+ range within a PCI Segment Group available to the operating system at
+ boot. This is required for the PC-compatible systems.
+
+ The MCFG table is only used to communicate the base addresses
+ corresponding to the PCI Segment Groups available to the system at
+ boot.
+
+[9] PCI Firmware 3.2, sec 4.1.3:
+ The _CBA (Memory mapped Configuration Base Address) control method is
+ an optional ACPI object that returns the 64-bit memory mapped
+ configuration base address for the hot plug capable host bridge. The
+ base address returned by _CBA is processor-relative address. The _CBA
+ control method evaluates to an Integer.
+
+ This control method appears under a host bridge object. When the _CBA
+ method appears under an active host bridge object, the operating system
+ evaluates this structure to identify the memory mapped configuration
+ base address corresponding to the PCI Segment Group for the bus number
+ range specified in _CRS method. An ACPI name space object that contains
+ the _CBA method must also contain a corresponding _SEG method.
diff --git a/Documentation/PCI/acpi-info.txt b/Documentation/PCI/acpi-info.txt
deleted file mode 100644
index 3ffa3b03970e..000000000000
--- a/Documentation/PCI/acpi-info.txt
+++ /dev/null
@@ -1,187 +0,0 @@
- ACPI considerations for PCI host bridges
-
-The general rule is that the ACPI namespace should describe everything the
-OS might use unless there's another way for the OS to find it [1, 2].
-
-For example, there's no standard hardware mechanism for enumerating PCI
-host bridges, so the ACPI namespace must describe each host bridge, the
-method for accessing PCI config space below it, the address space windows
-the host bridge forwards to PCI (using _CRS), and the routing of legacy
-INTx interrupts (using _PRT).
-
-PCI devices, which are below the host bridge, generally do not need to be
-described via ACPI. The OS can discover them via the standard PCI
-enumeration mechanism, using config accesses to discover and identify
-devices and read and size their BARs. However, ACPI may describe PCI
-devices if it provides power management or hotplug functionality for them
-or if the device has INTx interrupts connected by platform interrupt
-controllers and a _PRT is needed to describe those connections.
-
-ACPI resource description is done via _CRS objects of devices in the ACPI
-namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read
-_CRS and figure out what resource is being consumed even if it doesn't have
-a driver for the device [3].  That's important because it means an old OS
-can work correctly even on a system with new devices unknown to the OS.
-The new devices might not do anything, but the OS can at least make sure no
-resources conflict with them.
-
-Static tables like MCFG, HPET, ECDT, etc., are *not* mechanisms for
-reserving address space. The static tables are for things the OS needs to
-know early in boot, before it can parse the ACPI namespace. If a new table
-is defined, an old OS needs to operate correctly even though it ignores the
-table. _CRS allows that because it is generic and understood by the old
-OS; a static table does not.
-
-If the OS is expected to manage a non-discoverable device described via
-ACPI, that device will have a specific _HID/_CID that tells the OS what
-driver to bind to it, and the _CRS tells the OS and the driver where the
-device's registers are.
-
-PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should
-describe all the address space they consume.  This includes all the windows
-they forward down to the PCI bus, as well as registers of the host bridge
-itself that are not forwarded to PCI.  The host bridge registers include
-things like secondary/subordinate bus registers that determine the bus
-range below the bridge, window registers that describe the apertures, etc.
-These are all device-specific, non-architected things, so the only way a
-PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain
-the device-specific details.  The host bridge registers also include ECAM
-space, since it is consumed by the host bridge.
-
-ACPI defines a Consumer/Producer bit to distinguish the bridge registers
-("Consumer") from the bridge apertures ("Producer") [4, 5], but early
-BIOSes didn't use that bit correctly. The result is that the current ACPI
-spec defines Consumer/Producer only for the Extended Address Space
-descriptors; the bit should be ignored in the older QWord/DWord/Word
-Address Space descriptors. Consequently, OSes have to assume all
-QWord/DWord/Word descriptors are windows.
-
-Prior to the addition of Extended Address Space descriptors, the failure of
-Consumer/Producer meant there was no way to describe bridge registers in
-the PNP0A03/PNP0A08 device itself. The workaround was to describe the
-bridge registers (including ECAM space) in PNP0C02 catch-all devices [6].
-With the exception of ECAM, the bridge register space is device-specific
-anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to
-know about it.  
-
-New architectures should be able to use "Consumer" Extended Address Space
-descriptors in the PNP0A03 device for bridge registers, including ECAM,
-although a strict interpretation of [6] might prohibit this. Old x86 and
-ia64 kernels assume all address space descriptors, including "Consumer"
-Extended Address Space ones, are windows, so it would not be safe to
-describe bridge registers this way on those architectures.
-
-PNP0C02 "motherboard" devices are basically a catch-all.  There's no
-programming model for them other than "don't use these resources for
-anything else."  So a PNP0C02 _CRS should claim any address space that is
-(1) not claimed by _CRS under any other device object in the ACPI namespace
-and (2) should not be assigned by the OS to something else.
-
-The PCIe spec requires the Enhanced Configuration Access Method (ECAM)
-unless there's a standard firmware interface for config access, e.g., the
-ia64 SAL interface [7]. A host bridge consumes ECAM memory address space
-and converts memory accesses into PCI configuration accesses. The spec
-defines the ECAM address space layout and functionality; only the base of
-the address space is device-specific. An ACPI OS learns the base address
-from either the static MCFG table or a _CBA method in the PNP0A03 device.
-
-The MCFG table must describe the ECAM space of non-hot pluggable host
-bridges [8]. Since MCFG is a static table and can't be updated by hotplug,
-a _CBA method in the PNP0A03 device describes the ECAM space of a
-hot-pluggable host bridge [9]. Note that for both MCFG and _CBA, the base
-address always corresponds to bus 0, even if the bus range below the bridge
-(which is reported via _CRS) doesn't start at 0.
-
-
-[1] ACPI 6.2, sec 6.1:
- For any device that is on a non-enumerable type of bus (for example, an
- ISA bus), OSPM enumerates the devices' identifier(s) and the ACPI
- system firmware must supply an _HID object ... for each device to
- enable OSPM to do that.
-
-[2] ACPI 6.2, sec 3.7:
- The OS enumerates motherboard devices simply by reading through the
- ACPI Namespace looking for devices with hardware IDs.
-
- Each device enumerated by ACPI includes ACPI-defined objects in the
- ACPI Namespace that report the hardware resources the device could
- occupy [_PRS], an object that reports the resources that are currently
- used by the device [_CRS], and objects for configuring those resources
- [_SRS]. The information is used by the Plug and Play OS (OSPM) to
- configure the devices.
-
-[3] ACPI 6.2, sec 6.2:
- OSPM uses device configuration objects to configure hardware resources
- for devices enumerated via ACPI. Device configuration objects provide
- information about current and possible resource requirements, the
- relationship between shared resources, and methods for configuring
- hardware resources.
-
- When OSPM enumerates a device, it calls _PRS to determine the resource
- requirements of the device. It may also call _CRS to find the current
- resource settings for the device. Using this information, the Plug and
- Play system determines what resources the device should consume and
- sets those resources by calling the device’s _SRS control method.
-
- In ACPI, devices can consume resources (for example, legacy keyboards),
- provide resources (for example, a proprietary PCI bridge), or do both.
- Unless otherwise specified, resources for a device are assumed to be
- taken from the nearest matching resource above the device in the device
- hierarchy.
-
-[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4:
- QWord/DWord/Word Address Space Descriptor (.1, .2, .3)
- General Flags: Bit [0] Ignored
-
- Extended Address Space Descriptor (.4)
- General Flags: Bit [0] Consumer/Producer:
- 1–This device consumes this resource
- 0–This device produces and consumes this resource
-
-[5] ACPI 6.2, sec 19.6.43:
- ResourceUsage specifies whether the Memory range is consumed by
- this device (ResourceConsumer) or passed on to child devices
- (ResourceProducer). If nothing is specified, then
- ResourceConsumer is assumed.
-
-[6] PCI Firmware 3.2, sec 4.1.2:
- If the operating system does not natively comprehend reserving the
- MMCFG region, the MMCFG region must be reserved by firmware. The
- address range reported in the MCFG table or by _CBA method (see Section
- 4.1.3) must be reserved by declaring a motherboard resource. For most
- systems, the motherboard resource would appear at the root of the ACPI
- namespace (under \_SB) in a node with a _HID of EISAID (PNP0C02), and
- the resources in this case should not be claimed in the root PCI bus’s
- _CRS. The resources can optionally be returned in Int15 E820 or
- EFIGetMemoryMap as reserved memory but must always be reported through
- ACPI as a motherboard resource.
-
-[7] PCI Express 4.0, sec 7.2.2:
- For systems that are PC-compatible, or that do not implement a
- processor-architecture-specific firmware interface standard that allows
- access to the Configuration Space, the ECAM is required as defined in
- this section.
-
-[8] PCI Firmware 3.2, sec 4.1.2:
- The MCFG table is an ACPI table that is used to communicate the base
- addresses corresponding to the non-hot removable PCI Segment Groups
- range within a PCI Segment Group available to the operating system at
- boot. This is required for the PC-compatible systems.
-
- The MCFG table is only used to communicate the base addresses
- corresponding to the PCI Segment Groups available to the system at
- boot.
-
-[9] PCI Firmware 3.2, sec 4.1.3:
- The _CBA (Memory mapped Configuration Base Address) control method is
- an optional ACPI object that returns the 64-bit memory mapped
- configuration base address for the hot plug capable host bridge. The
- base address returned by _CBA is processor-relative address. The _CBA
- control method evaluates to an Integer.
-
- This control method appears under a host bridge object. When the _CBA
- method appears under an active host bridge object, the operating system
- evaluates this structure to identify the memory mapped configuration
- base address corresponding to the PCI Segment Group for the bus number
- range specified in _CRS method. An ACPI name space object that contains
- the _CBA method must also contain a corresponding _SEG method.
diff --git a/Documentation/PCI/boot-interrupts.rst b/Documentation/PCI/boot-interrupts.rst
new file mode 100644
index 000000000000..931077bb0953
--- /dev/null
+++ b/Documentation/PCI/boot-interrupts.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Boot Interrupts
+===============
+
+:Author: - Sean V Kelley <sean.v.kelley@linux.intel.com>
+
+Overview
+========
+
+On PCI Express, interrupts are represented with either MSI or inbound
+interrupt messages (Assert_INTx/Deassert_INTx). The integrated IO-APIC in a
+given Core IO converts the legacy interrupt messages from PCI Express to
+MSI interrupts. If the IO-APIC is disabled (via the mask bits in the
+IO-APIC table entries), the messages are routed to the legacy PCH. This
+in-band interrupt mechanism was traditionally necessary for systems that
+did not support the IO-APIC and for boot. Intel in the past has used the
+term "boot interrupts" to describe this mechanism. Further, the PCI Express
+protocol describes this in-band legacy wire-interrupt INTx mechanism for
+I/O devices to signal PCI-style level interrupts. The subsequent paragraphs
+describe problems with the Core IO handling of INTx message routing to the
+PCH and mitigation within BIOS and the OS.
+
+
+Issue
+=====
+
+When in-band legacy INTx messages are forwarded to the PCH, they in turn
+trigger a new interrupt for which the OS likely lacks a handler. When an
+interrupt goes unhandled over time, they are tracked by the Linux kernel as
+Spurious Interrupts. The IRQ will be disabled by the Linux kernel after it
+reaches a specific count with the error "nobody cared". This disabled IRQ
+now prevents valid usage by an existing interrupt which may happen to share
+the IRQ line::
+
+ irq 19: nobody cared (try booting with the "irqpoll" option)
+ CPU: 0 PID: 2988 Comm: irq/34-nipalk Tainted: 4.14.87-rt49-02410-g4a640ec-dirty #1
+ Hardware name: National Instruments NI PXIe-8880/NI PXIe-8880, BIOS 2.1.5f1 01/09/2020
+ Call Trace:
+
+ <IRQ>
+ ? dump_stack+0x46/0x5e
+ ? __report_bad_irq+0x2e/0xb0
+ ? note_interrupt+0x242/0x290
+ ? nNIKAL100_memoryRead16+0x8/0x10 [nikal]
+ ? handle_irq_event_percpu+0x55/0x70
+ ? handle_irq_event+0x4f/0x80
+ ? handle_fasteoi_irq+0x81/0x180
+ ? handle_irq+0x1c/0x30
+ ? do_IRQ+0x41/0xd0
+ ? common_interrupt+0x84/0x84
+ </IRQ>
+
+ handlers:
+ irq_default_primary_handler threaded usb_hcd_irq
+ Disabling IRQ #19
+
+
+Conditions
+==========
+
+The use of threaded interrupts is the most likely condition to trigger
+this problem today. Threaded interrupts may not be re-enabled after the IRQ
+handler wakes. These "one shot" conditions mean that the threaded interrupt
+needs to keep the interrupt line masked until the threaded handler has run.
+Especially when dealing with high data rate interrupts, the thread needs to
+run to completion; otherwise some handlers will end up in stack overflows
+since the interrupt of the issuing device is still active.
+
+Affected Chipsets
+=================
+
+The legacy interrupt forwarding mechanism exists today in a number of
+devices including but not limited to chipsets from AMD/ATI, Broadcom, and
+Intel. Changes made through the mitigations below have been applied to
+drivers/pci/quirks.c
+
+Starting with ICX there are no longer any IO-APICs in the Core IO's
+devices. IO-APIC is only in the PCH. Devices connected to the Core IO's
+PCIe Root Ports will use native MSI/MSI-X mechanisms.
+
+Mitigations
+===========
+
+The mitigations take the form of PCI quirks. The preference has been to
+first identify and make use of a means to disable the routing to the PCH.
+In such a case a quirk to disable boot interrupt generation can be
+added. [1]_
+
+Intel® 6300ESB I/O Controller Hub
+ Alternate Base Address Register:
+ BIE: Boot Interrupt Enable
+
+ == ===========================
+ 0 Boot interrupt is enabled.
+ 1 Boot interrupt is disabled.
+ == ===========================
+
+Intel® Sandy Bridge through Sky Lake based Xeon servers:
+ Coherent Interface Protocol Interrupt Control
+ dis_intx_route2pch/dis_intx_route2ich/dis_intx_route2dmi2:
+ When this bit is set. Local INTx messages received from the
+ Intel® Quick Data DMA/PCI Express ports are not routed to legacy
+ PCH - they are either converted into MSI via the integrated IO-APIC
+ (if the IO-APIC mask bit is clear in the appropriate entries)
+ or cause no further action (when mask bit is set)
+
+In the absence of a way to directly disable the routing, another approach
+has been to make use of PCI Interrupt pin to INTx routing tables for
+purposes of redirecting the interrupt handler to the rerouted interrupt
+line by default. Therefore, on chipsets where this INTx routing cannot be
+disabled, the Linux kernel will reroute the valid interrupt to its legacy
+interrupt. This redirection of the handler will prevent the occurrence of
+the spurious interrupt detection which would ordinarily disable the IRQ
+line due to excessive unhandled counts. [2]_
+
+The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS exists to enable (or
+disable) the redirection of the interrupt handler to the PCH interrupt
+line. The option can be overridden by either pci=ioapicreroute or
+pci=noioapicreroute. [3]_
+
+
+More Documentation
+==================
+
+There is an overview of the legacy interrupt handling in several datasheets
+(6300ESB and 6700PXH below). While largely the same, it provides insight
+into the evolution of its handling with chipsets.
+
+Example of disabling of the boot interrupt
+------------------------------------------
+
+ - Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
+ 5.7.3 Boot Interrupt
+ https://www.intel.com/content/dam/doc/datasheet/6300esb-io-controller-hub-datasheet.pdf
+
+ - Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
+ Datasheet - Volume 2: Registers (Document # 330784-003)
+ 6.6.41 cipintrc Coherent Interface Protocol Interrupt Control
+ https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
+
+Example of handler rerouting
+----------------------------
+
+ - Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
+ 2.15.2 PCI Express Legacy INTx Support and Boot Interrupt
+ https://www.intel.com/content/dam/doc/datasheet/6700pxh-64-bit-pci-hub-datasheet.pdf
+
+
+If you have any legacy PCI interrupt questions that aren't answered, email me.
+
+Cheers,
+ Sean V Kelley
+ sean.v.kelley@linux.intel.com
+
+.. [1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
+.. [2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
+.. [3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
diff --git a/Documentation/PCI/controller/index.rst b/Documentation/PCI/controller/index.rst
new file mode 100644
index 000000000000..c2ce9ccdcfa0
--- /dev/null
+++ b/Documentation/PCI/controller/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+PCI Native Host Bridge and Endpoint Drivers
+===========================================
+
+.. toctree::
+ :maxdepth: 2
+
+ rcar-pcie-firmware
diff --git a/Documentation/PCI/controller/rcar-pcie-firmware.rst b/Documentation/PCI/controller/rcar-pcie-firmware.rst
new file mode 100644
index 000000000000..67d3bf66e315
--- /dev/null
+++ b/Documentation/PCI/controller/rcar-pcie-firmware.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Firmware of PCIe controller for Renesas R-Car V4H
+=================================================
+
+Renesas R-Car V4H (r8a779g0) has a PCIe controller, requiring a specific
+firmware download during startup.
+
+However, Renesas currently cannot distribute the firmware free of charge.
+
+The firmware file "104_PCIe_fw_addr_data_ver1.05.txt" (note that the file name
+might be different between different datasheet revisions) can be found in the
+datasheet encoded as text, and as such, the file's content must be converted
+back to binary form. This can be achieved using the following example script:
+
+.. code-block:: sh
+
+ $ awk '/^\s*0x[0-9A-Fa-f]{4}\s+0x[0-9A-Fa-f]{4}/ { print substr($2,5,2) substr($2,3,2) }' \
+ 104_PCIe_fw_addr_data_ver1.05.txt | \
+ xxd -p -r > rcar_gen4_pcie.bin
+
+Once the text content has been converted into a binary firmware file, verify
+its checksum as follows:
+
+.. code-block:: sh
+
+ $ sha1sum rcar_gen4_pcie.bin
+ 1d0bd4b189b4eb009f5d564b1f93a79112994945 rcar_gen4_pcie.bin
+
+The resulting binary file called "rcar_gen4_pcie.bin" should be placed in the
+"/lib/firmware" directory before the driver runs.
diff --git a/Documentation/PCI/endpoint/function/binding/pci-ntb.rst b/Documentation/PCI/endpoint/function/binding/pci-ntb.rst
new file mode 100644
index 000000000000..40253d3d5163
--- /dev/null
+++ b/Documentation/PCI/endpoint/function/binding/pci-ntb.rst
@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+PCI NTB Endpoint Function
+==========================
+
+1) Create a subdirectory to pci_epf_ntb directory in configfs.
+
+Standard EPF Configurable Fields:
+
+================ ===========================================================
+vendorid should be 0x104c
+deviceid should be 0xb00d for TI's J721E SoC
+revid don't care
+progif_code don't care
+subclass_code should be 0x00
+baseclass_code should be 0x5
+cache_line_size don't care
+subsys_vendor_id don't care
+subsys_id don't care
+interrupt_pin don't care
+msi_interrupts don't care
+msix_interrupts don't care
+================ ===========================================================
+
+2) Create a subdirectory to directory created in 1
+
+NTB EPF specific configurable fields:
+
+================ ===========================================================
+db_count Number of doorbells; default = 4
+mw1 size of memory window1
+mw2 size of memory window2
+mw3 size of memory window3
+mw4 size of memory window4
+num_mws Number of memory windows; max = 4
+spad_count Number of scratchpad registers; default = 64
+================ ===========================================================
diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.rst b/Documentation/PCI/endpoint/function/binding/pci-test.rst
new file mode 100644
index 000000000000..57ee866fb165
--- /dev/null
+++ b/Documentation/PCI/endpoint/function/binding/pci-test.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+PCI Test Endpoint Function
+==========================
+
+name: Should be "pci_epf_test" to bind to the pci_epf_test driver.
+
+Configurable Fields:
+
+================ ===========================================================
+vendorid should be 0x104c
+deviceid should be 0xb500 for DRA74x and 0xb501 for DRA72x
+revid don't care
+progif_code don't care
+subclass_code don't care
+baseclass_code should be 0xff
+cache_line_size don't care
+subsys_vendor_id don't care
+subsys_id don't care
+interrupt_pin Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD
+msi_interrupts Should be 1 to 32 depending on the number of MSI interrupts
+ to test
+msix_interrupts Should be 1 to 2048 depending on the number of MSI-X
+ interrupts to test
+================ ===========================================================
diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.txt b/Documentation/PCI/endpoint/function/binding/pci-test.txt
deleted file mode 100644
index cd76ba47394b..000000000000
--- a/Documentation/PCI/endpoint/function/binding/pci-test.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-PCI TEST ENDPOINT FUNCTION
-
-name: Should be "pci_epf_test" to bind to the pci_epf_test driver.
-
-Configurable Fields:
-vendorid : should be 0x104c
-deviceid : should be 0xb500 for DRA74x and 0xb501 for DRA72x
-revid : don't care
-progif_code : don't care
-subclass_code : don't care
-baseclass_code : should be 0xff
-cache_line_size : don't care
-subsys_vendor_id : don't care
-subsys_id : don't care
-interrupt_pin : Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD
-msi_interrupts : Should be 1 to 32 depending on the number of MSI interrupts
- to test
-msix_interrupts : Should be 1 to 2048 depending on the number of MSI-X
- interrupts to test
diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
new file mode 100644
index 000000000000..dd1f62e731c9
--- /dev/null
+++ b/Documentation/PCI/endpoint/index.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+PCI Endpoint Framework
+======================
+
+.. toctree::
+ :maxdepth: 2
+
+ pci-endpoint
+ pci-endpoint-cfs
+ pci-test-function
+ pci-test-howto
+ pci-ntb-function
+ pci-ntb-howto
+ pci-vntb-function
+ pci-vntb-howto
+ pci-nvme-function
+
+ function/binding/pci-test
+ function/binding/pci-ntb
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
new file mode 100644
index 000000000000..e69c2872ce3b
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Configuring PCI Endpoint Using CONFIGFS
+=======================================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the
+PCI endpoint function and to bind the endpoint function
+with the endpoint controller. (For introducing other mechanisms to
+configure the PCI Endpoint Function refer to [1]).
+
+Mounting configfs
+=================
+
+The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs
+directory. configfs can be mounted using the following command::
+
+ mount -t configfs none /sys/kernel/config
+
+Directory Structure
+===================
+
+The pci_ep configfs has two directories at its root: controllers and
+functions. Every EPC device present in the system will have an entry in
+the *controllers* directory and every EPF driver present in the system
+will have an entry in the *functions* directory.
+::
+
+ /sys/kernel/config/pci_ep/
+ .. controllers/
+ .. functions/
+
+Creating EPF Device
+===================
+
+Every registered EPF driver will be listed in controllers directory. The
+entries corresponding to EPF driver will be created by the EPF core.
+::
+
+ /sys/kernel/config/pci_ep/functions/
+ .. <EPF Driver1>/
+ ... <EPF Device 11>/
+ ... <EPF Device 21>/
+ ... <EPF Device 31>/
+ .. <EPF Driver2>/
+ ... <EPF Device 12>/
+ ... <EPF Device 22>/
+
+In order to create a <EPF device> of the type probed by <EPF Driver>, the
+user has to create a directory inside <EPF DriverN>.
+
+Every <EPF device> directory consists of the following entries that can be
+used to configure the standard configuration header of the endpoint function.
+(These entries are created by the framework when any new <EPF Device> is
+created)
+::
+
+ .. <EPF Driver1>/
+ ... <EPF Device 11>/
+ ... vendorid
+ ... deviceid
+ ... revid
+ ... progif_code
+ ... subclass_code
+ ... baseclass_code
+ ... cache_line_size
+ ... subsys_vendor_id
+ ... subsys_id
+ ... interrupt_pin
+ ... <Symlink EPF Device 31>/
+ ... primary/
+ ... <Symlink EPC Device1>/
+ ... secondary/
+ ... <Symlink EPC Device2>/
+
+If an EPF device has to be associated with 2 EPCs (like in the case of
+Non-transparent bridge), symlink of endpoint controller connected to primary
+interface should be added in 'primary' directory and symlink of endpoint
+controller connected to secondary interface should be added in 'secondary'
+directory.
+
+The <EPF Device> directory can have a list of symbolic links
+(<Symlink EPF Device 31>) to other <EPF Device>. These symbolic links should
+be created by the user to represent the virtual functions that are bound to
+the physical function. In the above directory structure <EPF Device 11> is a
+physical function and <EPF Device 31> is a virtual function. An EPF device once
+it's linked to another EPF device, cannot be linked to an EPC device.
+
+EPC Device
+==========
+
+Every registered EPC device will be listed in controllers directory. The
+entries corresponding to EPC device will be created by the EPC core.
+::
+
+ /sys/kernel/config/pci_ep/controllers/
+ .. <EPC Device1>/
+ ... <Symlink EPF Device11>/
+ ... <Symlink EPF Device12>/
+ ... start
+ .. <EPC Device2>/
+ ... <Symlink EPF Device21>/
+ ... <Symlink EPF Device22>/
+ ... start
+
+The <EPC Device> directory will have a list of symbolic links to
+<EPF Device>. These symbolic links should be created by the user to
+represent the functions present in the endpoint device. Only <EPF Device>
+that represents a physical function can be linked to an EPC device.
+
+The <EPC Device> directory will also have a *start* field. Once
+"1" is written to this field, the endpoint device will be ready to
+establish the link with the host. This is usually done after
+all the EPF devices are created and linked with the EPC device.
+::
+
+ | controllers/
+ | <Directory: EPC name>/
+ | <Symbolic Link: Function>
+ | start
+ | functions/
+ | <Directory: EPF driver>/
+ | <Directory: EPF device>/
+ | vendorid
+ | deviceid
+ | revid
+ | progif_code
+ | subclass_code
+ | baseclass_code
+ | cache_line_size
+ | subsys_vendor_id
+ | subsys_id
+ | interrupt_pin
+ | function
+
+[1] Documentation/PCI/endpoint/pci-endpoint.rst
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt b/Documentation/PCI/endpoint/pci-endpoint-cfs.txt
deleted file mode 100644
index d740f29960a4..000000000000
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt
+++ /dev/null
@@ -1,105 +0,0 @@
- CONFIGURING PCI ENDPOINT USING CONFIGFS
- Kishon Vijay Abraham I <kishon@ti.com>
-
-The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the
-PCI endpoint function and to bind the endpoint function
-with the endpoint controller. (For introducing other mechanisms to
-configure the PCI Endpoint Function refer to [1]).
-
-*) Mounting configfs
-
-The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs
-directory. configfs can be mounted using the following command.
-
- mount -t configfs none /sys/kernel/config
-
-*) Directory Structure
-
-The pci_ep configfs has two directories at its root: controllers and
-functions. Every EPC device present in the system will have an entry in
-the *controllers* directory and and every EPF driver present in the system
-will have an entry in the *functions* directory.
-
-/sys/kernel/config/pci_ep/
- .. controllers/
- .. functions/
-
-*) Creating EPF Device
-
-Every registered EPF driver will be listed in controllers directory. The
-entries corresponding to EPF driver will be created by the EPF core.
-
-/sys/kernel/config/pci_ep/functions/
- .. <EPF Driver1>/
- ... <EPF Device 11>/
- ... <EPF Device 21>/
- .. <EPF Driver2>/
- ... <EPF Device 12>/
- ... <EPF Device 22>/
-
-In order to create a <EPF device> of the type probed by <EPF Driver>, the
-user has to create a directory inside <EPF DriverN>.
-
-Every <EPF device> directory consists of the following entries that can be
-used to configure the standard configuration header of the endpoint function.
-(These entries are created by the framework when any new <EPF Device> is
-created)
-
- .. <EPF Driver1>/
- ... <EPF Device 11>/
- ... vendorid
- ... deviceid
- ... revid
- ... progif_code
- ... subclass_code
- ... baseclass_code
- ... cache_line_size
- ... subsys_vendor_id
- ... subsys_id
- ... interrupt_pin
-
-*) EPC Device
-
-Every registered EPC device will be listed in controllers directory. The
-entries corresponding to EPC device will be created by the EPC core.
-
-/sys/kernel/config/pci_ep/controllers/
- .. <EPC Device1>/
- ... <Symlink EPF Device11>/
- ... <Symlink EPF Device12>/
- ... start
- .. <EPC Device2>/
- ... <Symlink EPF Device21>/
- ... <Symlink EPF Device22>/
- ... start
-
-The <EPC Device> directory will have a list of symbolic links to
-<EPF Device>. These symbolic links should be created by the user to
-represent the functions present in the endpoint device.
-
-The <EPC Device> directory will also have a *start* field. Once
-"1" is written to this field, the endpoint device will be ready to
-establish the link with the host. This is usually done after
-all the EPF devices are created and linked with the EPC device.
-
-
- | controllers/
- | <Directory: EPC name>/
- | <Symbolic Link: Function>
- | start
- | functions/
- | <Directory: EPF driver>/
- | <Directory: EPF device>/
- | vendorid
- | deviceid
- | revid
- | progif_code
- | subclass_code
- | baseclass_code
- | cache_line_size
- | subsys_vendor_id
- | subsys_id
- | interrupt_pin
- | function
-
-[1] -> Documentation/PCI/endpoint/pci-endpoint.txt
diff --git a/Documentation/PCI/endpoint/pci-endpoint.rst b/Documentation/PCI/endpoint/pci-endpoint.rst
new file mode 100644
index 000000000000..0741c8cbd74e
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-endpoint.rst
@@ -0,0 +1,259 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to use the PCI Endpoint Framework in order to create
+endpoint controller driver, endpoint function driver, and using configfs
+interface to bind the function driver to the controller driver.
+
+Introduction
+============
+
+Linux has a comprehensive PCI subsystem to support PCI controllers that
+operates in Root Complex mode. The subsystem has capability to scan PCI bus,
+assign memory resources and IRQ resources, load PCI driver (based on
+vendor ID, device ID), support other services like hot-plug, power management,
+advanced error reporting and virtual channels.
+
+However the PCI controller IP integrated in some SoCs is capable of operating
+either in Root Complex mode or Endpoint mode. PCI Endpoint Framework will
+add endpoint mode support in Linux. This will help to run Linux in an
+EP system which can have a wide variety of use cases from testing or
+validation, co-processor accelerator, etc.
+
+PCI Endpoint Core
+=================
+
+The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller
+library, the Endpoint Function library, and the configfs layer to bind the
+endpoint function with the endpoint controller.
+
+PCI Endpoint Controller(EPC) Library
+------------------------------------
+
+The EPC library provides APIs to be used by the controller that can operate
+in endpoint mode. It also provides APIs to be used by function driver/library
+in order to implement a particular endpoint function.
+
+APIs for the PCI controller Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI controller driver.
+
+* devm_pci_epc_create()/pci_epc_create()
+
+ The PCI controller driver should implement the following ops:
+
+ * write_header: ops to populate configuration space header
+ * set_bar: ops to configure the BAR
+ * clear_bar: ops to reset the BAR
+ * alloc_addr_space: ops to allocate in PCI controller address space
+ * free_addr_space: ops to free the allocated address space
+ * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
+ * start: ops to start the PCI link
+ * stop: ops to stop the PCI link
+
+ The PCI controller driver can then create a new EPC device by invoking
+ devm_pci_epc_create()/pci_epc_create().
+
+* pci_epc_destroy()
+
+ The PCI controller driver can destroy the EPC device created by
+ pci_epc_create() using pci_epc_destroy().
+
+* pci_epc_linkup()
+
+ In order to notify all the function devices that the EPC device to which
+ they are linked has established a link with the host, the PCI controller
+ driver should invoke pci_epc_linkup().
+
+* pci_epc_mem_init()
+
+ Initialize the pci_epc_mem structure used for allocating EPC addr space.
+
+* pci_epc_mem_exit()
+
+ Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init().
+
+
+EPC APIs for the PCI Endpoint Function Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint function driver.
+
+* pci_epc_write_header()
+
+ The PCI endpoint function driver should use pci_epc_write_header() to
+ write the standard configuration header to the endpoint controller.
+
+* pci_epc_set_bar()
+
+ The PCI endpoint function driver should use pci_epc_set_bar() to configure
+ the Base Address Register in order for the host to assign PCI addr space.
+ Register space of the function driver is usually configured
+ using this API.
+
+* pci_epc_clear_bar()
+
+ The PCI endpoint function driver should use pci_epc_clear_bar() to reset
+ the BAR.
+
+* pci_epc_raise_irq()
+
+ The PCI endpoint function driver should use pci_epc_raise_irq() to raise
+ Legacy Interrupt, MSI or MSI-X Interrupt.
+
+* pci_epc_mem_alloc_addr()
+
+ The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to
+ allocate memory address from EPC addr space which is required to access
+ RC's buffer
+
+* pci_epc_mem_free_addr()
+
+ The PCI endpoint function driver should use pci_epc_mem_free_addr() to
+ free the memory space allocated using pci_epc_mem_alloc_addr().
+
+* pci_epc_map_addr()
+
+ A PCI endpoint function driver should use pci_epc_map_addr() to map to a RC
+ PCI address the CPU address of local memory obtained with
+ pci_epc_mem_alloc_addr().
+
+* pci_epc_unmap_addr()
+
+ A PCI endpoint function driver should use pci_epc_unmap_addr() to unmap the
+ CPU address of local memory mapped to a RC address with pci_epc_map_addr().
+
+* pci_epc_mem_map()
+
+ A PCI endpoint controller may impose constraints on the RC PCI addresses that
+ can be mapped. The function pci_epc_mem_map() allows endpoint function
+ drivers to allocate and map controller memory while handling such
+ constraints. This function will determine the size of the memory that must be
+ allocated with pci_epc_mem_alloc_addr() for successfully mapping a RC PCI
+ address range. This function will also indicate the size of the PCI address
+ range that was actually mapped, which can be less than the requested size, as
+ well as the offset into the allocated memory to use for accessing the mapped
+ RC PCI address range.
+
+* pci_epc_mem_unmap()
+
+ A PCI endpoint function driver can use pci_epc_mem_unmap() to unmap and free
+ controller memory that was allocated and mapped using pci_epc_mem_map().
+
+
+Other EPC APIs
+~~~~~~~~~~~~~~
+
+There are other APIs provided by the EPC library. These are used for binding
+the EPF device with EPC device. pci-ep-cfs.c can be used as reference for
+using these APIs.
+
+* pci_epc_get()
+
+ Get a reference to the PCI endpoint controller based on the device name of
+ the controller.
+
+* pci_epc_put()
+
+ Release the reference to the PCI endpoint controller obtained using
+ pci_epc_get()
+
+* pci_epc_add_epf()
+
+ Add a PCI endpoint function to a PCI endpoint controller. A PCIe device
+ can have up to 8 functions according to the specification.
+
+* pci_epc_remove_epf()
+
+ Remove the PCI endpoint function from PCI endpoint controller.
+
+* pci_epc_start()
+
+ The PCI endpoint function driver should invoke pci_epc_start() once it
+ has configured the endpoint function and wants to start the PCI link.
+
+* pci_epc_stop()
+
+ The PCI endpoint function driver should invoke pci_epc_stop() to stop
+ the PCI LINK.
+
+
+PCI Endpoint Function(EPF) Library
+----------------------------------
+
+The EPF library provides APIs to be used by the function driver and the EPC
+library to provide endpoint mode functionality.
+
+EPF APIs for the PCI Endpoint Function Driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint function driver.
+
+* pci_epf_register_driver()
+
+ The PCI Endpoint Function driver should implement the following ops:
+ * bind: ops to perform when an EPC device has been bound to EPF device
+ * unbind: ops to perform when a binding has been lost between an EPC
+ device and EPF device
+ * add_cfs: optional ops to create function specific configfs
+ attributes
+
+ The PCI Function driver can then register the PCI EPF driver by using
+ pci_epf_register_driver().
+
+* pci_epf_unregister_driver()
+
+ The PCI Function driver can unregister the PCI EPF driver by using
+ pci_epf_unregister_driver().
+
+* pci_epf_alloc_space()
+
+ The PCI Function driver can allocate space for a particular BAR using
+ pci_epf_alloc_space().
+
+* pci_epf_free_space()
+
+ The PCI Function driver can free the allocated space
+ (using pci_epf_alloc_space) by invoking pci_epf_free_space().
+
+APIs for the PCI Endpoint Controller Library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section lists the APIs that the PCI Endpoint core provides to be used
+by the PCI endpoint controller library.
+
+* pci_epf_linkup()
+
+ The PCI endpoint controller library invokes pci_epf_linkup() when the
+ EPC device has established the connection to the host.
+
+Other EPF APIs
+~~~~~~~~~~~~~~
+
+There are other APIs provided by the EPF library. These are used to notify
+the function driver when the EPF device is bound to the EPC device.
+pci-ep-cfs.c can be used as reference for using these APIs.
+
+* pci_epf_create()
+
+ Create a new PCI EPF device by passing the name of the PCI EPF device.
+ This name will be used to bind the EPF device to a EPF driver.
+
+* pci_epf_destroy()
+
+ Destroy the created PCI EPF device.
+
+* pci_epf_bind()
+
+ pci_epf_bind() should be invoked when the EPF device has been bound to
+ an EPC device.
+
+* pci_epf_unbind()
+
+ pci_epf_unbind() should be invoked when the binding between EPC device
+ and EPF device is lost.
diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt
deleted file mode 100644
index e86a96b66a6a..000000000000
--- a/Documentation/PCI/endpoint/pci-endpoint.txt
+++ /dev/null
@@ -1,215 +0,0 @@
- PCI ENDPOINT FRAMEWORK
- Kishon Vijay Abraham I <kishon@ti.com>
-
-This document is a guide to use the PCI Endpoint Framework in order to create
-endpoint controller driver, endpoint function driver, and using configfs
-interface to bind the function driver to the controller driver.
-
-1. Introduction
-
-Linux has a comprehensive PCI subsystem to support PCI controllers that
-operates in Root Complex mode. The subsystem has capability to scan PCI bus,
-assign memory resources and IRQ resources, load PCI driver (based on
-vendor ID, device ID), support other services like hot-plug, power management,
-advanced error reporting and virtual channels.
-
-However the PCI controller IP integrated in some SoCs is capable of operating
-either in Root Complex mode or Endpoint mode. PCI Endpoint Framework will
-add endpoint mode support in Linux. This will help to run Linux in an
-EP system which can have a wide variety of use cases from testing or
-validation, co-processor accelerator, etc.
-
-2. PCI Endpoint Core
-
-The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller
-library, the Endpoint Function library, and the configfs layer to bind the
-endpoint function with the endpoint controller.
-
-2.1 PCI Endpoint Controller(EPC) Library
-
-The EPC library provides APIs to be used by the controller that can operate
-in endpoint mode. It also provides APIs to be used by function driver/library
-in order to implement a particular endpoint function.
-
-2.1.1 APIs for the PCI controller Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI controller driver.
-
-*) devm_pci_epc_create()/pci_epc_create()
-
- The PCI controller driver should implement the following ops:
- * write_header: ops to populate configuration space header
- * set_bar: ops to configure the BAR
- * clear_bar: ops to reset the BAR
- * alloc_addr_space: ops to allocate in PCI controller address space
- * free_addr_space: ops to free the allocated address space
- * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
- * start: ops to start the PCI link
- * stop: ops to stop the PCI link
-
- The PCI controller driver can then create a new EPC device by invoking
- devm_pci_epc_create()/pci_epc_create().
-
-*) devm_pci_epc_destroy()/pci_epc_destroy()
-
- The PCI controller driver can destroy the EPC device created by either
- devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or
- pci_epc_destroy().
-
-*) pci_epc_linkup()
-
- In order to notify all the function devices that the EPC device to which
- they are linked has established a link with the host, the PCI controller
- driver should invoke pci_epc_linkup().
-
-*) pci_epc_mem_init()
-
- Initialize the pci_epc_mem structure used for allocating EPC addr space.
-
-*) pci_epc_mem_exit()
-
- Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init().
-
-2.1.2 APIs for the PCI Endpoint Function Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint function driver.
-
-*) pci_epc_write_header()
-
- The PCI endpoint function driver should use pci_epc_write_header() to
- write the standard configuration header to the endpoint controller.
-
-*) pci_epc_set_bar()
-
- The PCI endpoint function driver should use pci_epc_set_bar() to configure
- the Base Address Register in order for the host to assign PCI addr space.
- Register space of the function driver is usually configured
- using this API.
-
-*) pci_epc_clear_bar()
-
- The PCI endpoint function driver should use pci_epc_clear_bar() to reset
- the BAR.
-
-*) pci_epc_raise_irq()
-
- The PCI endpoint function driver should use pci_epc_raise_irq() to raise
- Legacy Interrupt, MSI or MSI-X Interrupt.
-
-*) pci_epc_mem_alloc_addr()
-
- The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to
- allocate memory address from EPC addr space which is required to access
- RC's buffer
-
-*) pci_epc_mem_free_addr()
-
- The PCI endpoint function driver should use pci_epc_mem_free_addr() to
- free the memory space allocated using pci_epc_mem_alloc_addr().
-
-2.1.3 Other APIs
-
-There are other APIs provided by the EPC library. These are used for binding
-the EPF device with EPC device. pci-ep-cfs.c can be used as reference for
-using these APIs.
-
-*) pci_epc_get()
-
- Get a reference to the PCI endpoint controller based on the device name of
- the controller.
-
-*) pci_epc_put()
-
- Release the reference to the PCI endpoint controller obtained using
- pci_epc_get()
-
-*) pci_epc_add_epf()
-
- Add a PCI endpoint function to a PCI endpoint controller. A PCIe device
- can have up to 8 functions according to the specification.
-
-*) pci_epc_remove_epf()
-
- Remove the PCI endpoint function from PCI endpoint controller.
-
-*) pci_epc_start()
-
- The PCI endpoint function driver should invoke pci_epc_start() once it
- has configured the endpoint function and wants to start the PCI link.
-
-*) pci_epc_stop()
-
- The PCI endpoint function driver should invoke pci_epc_stop() to stop
- the PCI LINK.
-
-2.2 PCI Endpoint Function(EPF) Library
-
-The EPF library provides APIs to be used by the function driver and the EPC
-library to provide endpoint mode functionality.
-
-2.2.1 APIs for the PCI Endpoint Function Driver
-
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint function driver.
-
-*) pci_epf_register_driver()
-
- The PCI Endpoint Function driver should implement the following ops:
- * bind: ops to perform when a EPC device has been bound to EPF device
- * unbind: ops to perform when a binding has been lost between a EPC
- device and EPF device
- * linkup: ops to perform when the EPC device has established a
- connection with a host system
-
- The PCI Function driver can then register the PCI EPF driver by using
- pci_epf_register_driver().
-
-*) pci_epf_unregister_driver()
-
- The PCI Function driver can unregister the PCI EPF driver by using
- pci_epf_unregister_driver().
-
-*) pci_epf_alloc_space()
-
- The PCI Function driver can allocate space for a particular BAR using
- pci_epf_alloc_space().
-
-*) pci_epf_free_space()
-
- The PCI Function driver can free the allocated space
- (using pci_epf_alloc_space) by invoking pci_epf_free_space().
-
-2.2.2 APIs for the PCI Endpoint Controller Library
-This section lists the APIs that the PCI Endpoint core provides to be used
-by the PCI endpoint controller library.
-
-*) pci_epf_linkup()
-
- The PCI endpoint controller library invokes pci_epf_linkup() when the
- EPC device has established the connection to the host.
-
-2.2.2 Other APIs
-There are other APIs provided by the EPF library. These are used to notify
-the function driver when the EPF device is bound to the EPC device.
-pci-ep-cfs.c can be used as reference for using these APIs.
-
-*) pci_epf_create()
-
- Create a new PCI EPF device by passing the name of the PCI EPF device.
- This name will be used to bind the the EPF device to a EPF driver.
-
-*) pci_epf_destroy()
-
- Destroy the created PCI EPF device.
-
-*) pci_epf_bind()
-
- pci_epf_bind() should be invoked when the EPF device has been bound to
- a EPC device.
-
-*) pci_epf_unbind()
-
- pci_epf_unbind() should be invoked when the binding between EPC device
- and EPF device is lost.
diff --git a/Documentation/PCI/endpoint/pci-ntb-function.rst b/Documentation/PCI/endpoint/pci-ntb-function.rst
new file mode 100644
index 000000000000..3b9d836a4924
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-ntb-function.rst
@@ -0,0 +1,348 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI NTB Function
+=================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+PCI Non-Transparent Bridges (NTB) allow two host systems to communicate
+with each other by exposing each host as a device to the other host.
+NTBs typically support the ability to generate interrupts on the remote
+machine, expose memory ranges as BARs, and perform DMA. They also support
+scratchpads, which are areas of memory within the NTB that are accessible
+from both machines.
+
+PCI NTB Function allows two different systems (or hosts) to communicate
+with each other by configuring the endpoint instances in such a way that
+transactions from one system are routed to the other system.
+
+In the below diagram, PCI NTB function configures the SoC with multiple
+PCI Endpoint (EP) instances in such a way that transactions from one EP
+controller are routed to the other EP controller. Once PCI NTB function
+configures the SoC with multiple EP instances, HOST1 and HOST2 can
+communicate with each other using SoC as a bridge.
+
+.. code-block:: text
+
+ +-------------+ +-------------+
+ | | | |
+ | HOST1 | | HOST2 |
+ | | | |
+ +------^------+ +------^------+
+ | |
+ | |
+ +---------|-------------------------------------------------|---------+
+ | +------v------+ +------v------+ |
+ | | | | | |
+ | | EP | | EP | |
+ | | CONTROLLER1 | | CONTROLLER2 | |
+ | | <-----------------------------------> | |
+ | | | | | |
+ | | | | | |
+ | | | SoC With Multiple EP Instances | | |
+ | | | (Configured using NTB Function) | | |
+ | +-------------+ +-------------+ |
+ +---------------------------------------------------------------------+
+
+Constructs used for Implementing NTB
+====================================
+
+ 1) Config Region
+ 2) Self Scratchpad Registers
+ 3) Peer Scratchpad Registers
+ 4) Doorbell (DB) Registers
+ 5) Memory Window (MW)
+
+
+Config Region:
+--------------
+
+Config Region is a construct that is specific to NTB implemented using NTB
+Endpoint Function Driver. The host and endpoint side NTB function driver will
+exchange information with each other using this region. Config Region has
+Control/Status Registers for configuring the Endpoint Controller. Host can
+write into this region for configuring the outbound Address Translation Unit
+(ATU) and to indicate the link status. Endpoint can indicate the status of
+commands issued by host in this region. Endpoint can also indicate the
+scratchpad offset and number of memory windows to the host using this region.
+
+The format of Config Region is given below. All the fields here are 32 bits.
+
+.. code-block:: text
+
+ +------------------------+
+ | COMMAND |
+ +------------------------+
+ | ARGUMENT |
+ +------------------------+
+ | STATUS |
+ +------------------------+
+ | TOPOLOGY |
+ +------------------------+
+ | ADDRESS (LOWER 32) |
+ +------------------------+
+ | ADDRESS (UPPER 32) |
+ +------------------------+
+ | SIZE |
+ +------------------------+
+ | NO OF MEMORY WINDOW |
+ +------------------------+
+ | MEMORY WINDOW1 OFFSET |
+ +------------------------+
+ | SPAD OFFSET |
+ +------------------------+
+ | SPAD COUNT |
+ +------------------------+
+ | DB ENTRY SIZE |
+ +------------------------+
+ | DB DATA |
+ +------------------------+
+ | : |
+ +------------------------+
+ | : |
+ +------------------------+
+ | DB DATA |
+ +------------------------+
+
+
+ COMMAND:
+
+ NTB function supports three commands:
+
+ CMD_CONFIGURE_DOORBELL (0x1): Command to configure doorbell. Before
+ invoking this command, the host should allocate and initialize
+ MSI/MSI-X vectors (i.e., initialize the MSI/MSI-X Capability in the
+ Endpoint). The endpoint on receiving this command will configure
+ the outbound ATU such that transactions to Doorbell BAR will be routed
+ to the MSI/MSI-X address programmed by the host. The ARGUMENT
+ register should be populated with number of DBs to configure (in the
+ lower 16 bits) and if MSI or MSI-X should be configured (BIT 16).
+
+ CMD_CONFIGURE_MW (0x2): Command to configure memory window (MW). The
+ host invokes this command after allocating a buffer that can be
+ accessed by remote host. The allocated address should be programmed
+ in the ADDRESS register (64 bit), the size should be programmed in
+ the SIZE register and the memory window index should be programmed
+ in the ARGUMENT register. The endpoint on receiving this command
+ will configure the outbound ATU such that transactions to MW BAR
+ are routed to the address provided by the host.
+
+ CMD_LINK_UP (0x3): Command to indicate an NTB application is
+ bound to the EP device on the host side. Once the endpoint
+ receives this command from both the hosts, the endpoint will
+ raise a LINK_UP event to both the hosts to indicate the host
+ NTB applications can start communicating with each other.
+
+ ARGUMENT:
+
+ The value of this register is based on the commands issued in
+ command register. See COMMAND section for more information.
+
+ TOPOLOGY:
+
+ Set to NTB_TOPO_B2B_USD for Primary interface
+ Set to NTB_TOPO_B2B_DSD for Secondary interface
+
+ ADDRESS/SIZE:
+
+ Address and Size to be used while configuring the memory window.
+ See "CMD_CONFIGURE_MW" for more info.
+
+ MEMORY WINDOW1 OFFSET:
+
+ Memory Window 1 and Doorbell registers are packed together in the
+ same BAR. The initial portion of the region will have doorbell
+ registers and the latter portion of the region is for memory window 1.
+ This register will specify the offset of the memory window 1.
+
+ NO OF MEMORY WINDOW:
+
+ Specifies the number of memory windows supported by the NTB device.
+
+ SPAD OFFSET:
+
+ Self scratchpad region and config region are packed together in the
+ same BAR. The initial portion of the region will have config region
+ and the latter portion of the region is for self scratchpad. This
+ register will specify the offset of the self scratchpad registers.
+
+ SPAD COUNT:
+
+ Specifies the number of scratchpad registers supported by the NTB
+ device.
+
+ DB ENTRY SIZE:
+
+ Used to determine the offset within the DB BAR that should be written
+ in order to raise doorbell. EPF NTB can use either MSI or MSI-X to
+ ring doorbell (MSI-X support will be added later). MSI uses same
+ address for all the interrupts and MSI-X can provide different
+ addresses for different interrupts. The MSI/MSI-X address is provided
+ by the host and the address it gives is based on the MSI/MSI-X
+ implementation supported by the host. For instance, ARM platform
+ using GIC ITS will have the same MSI-X address for all the interrupts.
+ In order to support all the combinations and use the same mechanism
+ for both MSI and MSI-X, EPF NTB allocates a separate region in the
+ Outbound Address Space for each of the interrupts. This region will
+ be mapped to the MSI/MSI-X address provided by the host. If a host
+ provides the same address for all the interrupts, all the regions
+ will be translated to the same address. If a host provides different
+ addresses, the regions will be translated to different addresses. This
+ will ensure there is no difference while raising the doorbell.
+
+ DB DATA:
+
+ EPF NTB supports 32 interrupts, so there are 32 DB DATA registers.
+ This holds the MSI/MSI-X data that has to be written to MSI address
+ for raising doorbell interrupt. This will be populated by EPF NTB
+ while invoking CMD_CONFIGURE_DOORBELL.
+
+Scratchpad Registers:
+---------------------
+
+ Each host has its own register space allocated in the memory of NTB endpoint
+ controller. They are both readable and writable from both sides of the bridge.
+ They are used by applications built over NTB and can be used to pass control
+ and status information between both sides of a device.
+
+ Scratchpad registers has 2 parts
+ 1) Self Scratchpad: Host's own register space
+ 2) Peer Scratchpad: Remote host's register space.
+
+Doorbell Registers:
+-------------------
+
+ Doorbell Registers are used by the hosts to interrupt each other.
+
+Memory Window:
+--------------
+
+ Actual transfer of data between the two hosts will happen using the
+ memory window.
+
+Modeling Constructs:
+====================
+
+There are 5 or more distinct regions (config, self scratchpad, peer
+scratchpad, doorbell, one or more memory windows) to be modeled to achieve
+NTB functionality. At least one memory window is required while more than
+one is permitted. All these regions should be mapped to BARs for hosts to
+access these regions.
+
+If one 32-bit BAR is allocated for each of these regions, the scheme would
+look like this:
+
+====== ===============
+BAR NO CONSTRUCTS USED
+====== ===============
+BAR0 Config Region
+BAR1 Self Scratchpad
+BAR2 Peer Scratchpad
+BAR3 Doorbell
+BAR4 Memory Window 1
+BAR5 Memory Window 2
+====== ===============
+
+However if we allocate a separate BAR for each of the regions, there would not
+be enough BARs for all the regions in a platform that supports only 64-bit
+BARs.
+
+In order to be supported by most of the platforms, the regions should be
+packed and mapped to BARs in a way that provides NTB functionality and
+also makes sure the host doesn't access any region that it is not supposed
+to.
+
+The following scheme is used in EPF NTB Function:
+
+====== ===============================
+BAR NO CONSTRUCTS USED
+====== ===============================
+BAR0 Config Region + Self Scratchpad
+BAR1 Peer Scratchpad
+BAR2 Doorbell + Memory Window 1
+BAR3 Memory Window 2
+BAR4 Memory Window 3
+BAR5 Memory Window 4
+====== ===============================
+
+With this scheme, for the basic NTB functionality 3 BARs should be sufficient.
+
+Modeling Config/Scratchpad Region:
+----------------------------------
+
+.. code-block:: text
+
+ +-----------------+------->+------------------+ +-----------------+
+ | BAR0 | | CONFIG REGION | | BAR0 |
+ +-----------------+----+ +------------------+<-------+-----------------+
+ | BAR1 | | |SCRATCHPAD REGION | | BAR1 |
+ +-----------------+ +-->+------------------+<-------+-----------------+
+ | BAR2 | Local Memory | BAR2 |
+ +-----------------+ +-----------------+
+ | BAR3 | | BAR3 |
+ +-----------------+ +-----------------+
+ | BAR4 | | BAR4 |
+ +-----------------+ +-----------------+
+ | BAR5 | | BAR5 |
+ +-----------------+ +-----------------+
+ EP CONTROLLER 1 EP CONTROLLER 2
+
+Above diagram shows Config region + Scratchpad region for HOST1 (connected to
+EP controller 1) allocated in local memory. The HOST1 can access the config
+region and scratchpad region (self scratchpad) using BAR0 of EP controller 1.
+The peer host (HOST2 connected to EP controller 2) can also access this
+scratchpad region (peer scratchpad) using BAR1 of EP controller 2. This
+diagram shows the case where Config region and Scratchpad regions are allocated
+for HOST1, however the same is applicable for HOST2.
+
+Modeling Doorbell/Memory Window 1:
+----------------------------------
+
+.. code-block:: text
+
+ +-----------------+ +----->+----------------+-----------+-----------------+
+ | BAR0 | | | Doorbell 1 +-----------> MSI-X ADDRESS 1 |
+ +-----------------+ | +----------------+ +-----------------+
+ | BAR1 | | | Doorbell 2 +---------+ | |
+ +-----------------+----+ +----------------+ | | |
+ | BAR2 | | Doorbell 3 +-------+ | +-----------------+
+ +-----------------+----+ +----------------+ | +-> MSI-X ADDRESS 2 |
+ | BAR3 | | | Doorbell 4 +-----+ | +-----------------+
+ +-----------------+ | |----------------+ | | | |
+ | BAR4 | | | | | | +-----------------+
+ +-----------------+ | | MW1 +---+ | +-->+ MSI-X ADDRESS 3||
+ | BAR5 | | | | | | +-----------------+
+ +-----------------+ +----->-----------------+ | | | |
+ EP CONTROLLER 1 | | | | +-----------------+
+ | | | +---->+ MSI-X ADDRESS 4 |
+ +----------------+ | +-----------------+
+ EP CONTROLLER 2 | | |
+ (OB SPACE) | | |
+ +-------> MW1 |
+ | |
+ | |
+ +-----------------+
+ | |
+ | |
+ | |
+ | |
+ | |
+ +-----------------+
+ PCI Address Space
+ (Managed by HOST2)
+
+Above diagram shows how the doorbell and memory window 1 is mapped so that
+HOST1 can raise doorbell interrupt on HOST2 and also how HOST1 can access
+buffers exposed by HOST2 using memory window1 (MW1). Here doorbell and
+memory window 1 regions are allocated in EP controller 2 outbound (OB) address
+space. Allocating and configuring BARs for doorbell and memory window1
+is done during the initialization phase of NTB endpoint function driver.
+Mapping from EP controller 2 OB space to PCI address space is done when HOST2
+sends CMD_CONFIGURE_MW/CMD_CONFIGURE_DOORBELL.
+
+Modeling Optional Memory Windows:
+---------------------------------
+
+This is modeled the same was as MW1 but each of the additional memory windows
+is mapped to separate BARs.
diff --git a/Documentation/PCI/endpoint/pci-ntb-howto.rst b/Documentation/PCI/endpoint/pci-ntb-howto.rst
new file mode 100644
index 000000000000..4261e7157ef1
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-ntb-howto.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+PCI Non-Transparent Bridge (NTB) Endpoint Function (EPF) User Guide
+===================================================================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to help users use pci-epf-ntb function driver
+and ntb_hw_epf host driver for NTB functionality. The list of steps to
+be followed in the host side and EP side is given below. For the hardware
+configuration and internals of NTB using configurable endpoints see
+Documentation/PCI/endpoint/pci-ntb-function.rst
+
+Endpoint Device
+===============
+
+Endpoint Controller Devices
+---------------------------
+
+For implementing NTB functionality at least two endpoint controller devices
+are required.
+
+To find the list of endpoint controller devices in the system::
+
+ # ls /sys/class/pci_epc/
+ 2900000.pcie-ep 2910000.pcie-ep
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/controllers
+ 2900000.pcie-ep 2910000.pcie-ep
+
+
+Endpoint Function Drivers
+-------------------------
+
+To find the list of endpoint function drivers in the system::
+
+ # ls /sys/bus/pci-epf/drivers
+ pci_epf_ntb pci_epf_ntb
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/functions
+ pci_epf_ntb pci_epf_ntb
+
+
+Creating pci-epf-ntb Device
+----------------------------
+
+PCI endpoint function device can be created using the configfs. To create
+pci-epf-ntb device, the following commands can be used::
+
+ # mount -t configfs none /sys/kernel/config
+ # cd /sys/kernel/config/pci_ep/
+ # mkdir functions/pci_epf_ntb/func1
+
+The "mkdir func1" above creates the pci-epf-ntb function device that will
+be probed by pci_epf_ntb driver.
+
+The PCI endpoint framework populates the directory with the following
+configurable fields::
+
+ # ls functions/pci_epf_ntb/func1
+ baseclass_code deviceid msi_interrupts pci-epf-ntb.0
+ progif_code secondary subsys_id vendorid
+ cache_line_size interrupt_pin msix_interrupts primary
+ revid subclass_code subsys_vendor_id
+
+The PCI endpoint function driver populates these entries with default values
+when the device is bound to the driver. The pci-epf-ntb driver populates
+vendorid with 0xffff and interrupt_pin with 0x0001::
+
+ # cat functions/pci_epf_ntb/func1/vendorid
+ 0xffff
+ # cat functions/pci_epf_ntb/func1/interrupt_pin
+ 0x0001
+
+
+Configuring pci-epf-ntb Device
+-------------------------------
+
+The user can configure the pci-epf-ntb device using its configfs entry. In order
+to change the vendorid and the deviceid, the following
+commands can be used::
+
+ # echo 0x104c > functions/pci_epf_ntb/func1/vendorid
+ # echo 0xb00d > functions/pci_epf_ntb/func1/deviceid
+
+The PCI endpoint framework also automatically creates a sub-directory in the
+function attribute directory. This sub-directory has the same name as the name
+of the function device and is populated with the following NTB specific
+attributes that can be configured by the user::
+
+ # ls functions/pci_epf_ntb/func1/pci_epf_ntb.0/
+ db_count mw1 mw2 mw3 mw4 num_mws
+ spad_count
+
+A sample configuration for NTB function is given below::
+
+ # echo 4 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/db_count
+ # echo 128 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/spad_count
+ # echo 2 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/num_mws
+ # echo 0x100000 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/mw1
+ # echo 0x100000 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/mw2
+
+Binding pci-epf-ntb Device to EP Controller
+--------------------------------------------
+
+NTB function device should be attached to two PCI endpoint controllers
+connected to the two hosts. Use the 'primary' and 'secondary' entries
+inside NTB function device to attach one PCI endpoint controller to
+primary interface and the other PCI endpoint controller to the secondary
+interface::
+
+ # ln -s controllers/2900000.pcie-ep/ functions/pci-epf-ntb/func1/primary
+ # ln -s controllers/2910000.pcie-ep/ functions/pci-epf-ntb/func1/secondary
+
+Once the above step is completed, both the PCI endpoint controllers are ready to
+establish a link with the host.
+
+
+Start the Link
+--------------
+
+In order for the endpoint device to establish a link with the host, the _start_
+field should be populated with '1'. For NTB, both the PCI endpoint controllers
+should establish link with the host::
+
+ # echo 1 > controllers/2900000.pcie-ep/start
+ # echo 1 > controllers/2910000.pcie-ep/start
+
+
+RootComplex Device
+==================
+
+lspci Output
+------------
+
+Note that the devices listed here correspond to the values populated in
+"Creating pci-epf-ntb Device" section above::
+
+ # lspci
+ 0000:00:00.0 PCI bridge: Texas Instruments Device b00d
+ 0000:01:00.0 RAM memory: Texas Instruments Device b00d
+
+
+Using ntb_hw_epf Device
+-----------------------
+
+The host side software follows the standard NTB software architecture in Linux.
+All the existing client side NTB utilities like NTB Transport Client and NTB
+Netdev, NTB Ping Pong Test Client and NTB Tool Test Client can be used with NTB
+function device.
+
+For more information on NTB see
+:doc:`Non-Transparent Bridge <../../driver-api/ntb>`
diff --git a/Documentation/PCI/endpoint/pci-nvme-function.rst b/Documentation/PCI/endpoint/pci-nvme-function.rst
new file mode 100644
index 000000000000..a68015317f7f
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-nvme-function.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI NVMe Function
+=================
+
+:Author: Damien Le Moal <dlemoal@kernel.org>
+
+The PCI NVMe endpoint function implements a PCI NVMe controller using the NVMe
+subsystem target core code. The driver for this function resides with the NVMe
+subsystem as drivers/nvme/target/pci-epf.c.
+
+See Documentation/nvme/nvme-pci-endpoint-target.rst for more details.
diff --git a/Documentation/PCI/endpoint/pci-test-function.rst b/Documentation/PCI/endpoint/pci-test-function.rst
new file mode 100644
index 000000000000..3c8521d7aa31
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-test-function.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI Test Function
+=================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+Traditionally PCI RC has always been validated by using standard
+PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards.
+However with the addition of EP-core in linux kernel, it is possible
+to configure a PCI controller that can operate in EP mode to work as
+a test device.
+
+The PCI endpoint test device is a virtual device (defined in software)
+used to test the endpoint functionality and serve as a sample driver
+for other PCI endpoint devices (to use the EP framework).
+
+The PCI endpoint test device has the following registers:
+
+ 1) PCI_ENDPOINT_TEST_MAGIC
+ 2) PCI_ENDPOINT_TEST_COMMAND
+ 3) PCI_ENDPOINT_TEST_STATUS
+ 4) PCI_ENDPOINT_TEST_SRC_ADDR
+ 5) PCI_ENDPOINT_TEST_DST_ADDR
+ 6) PCI_ENDPOINT_TEST_SIZE
+ 7) PCI_ENDPOINT_TEST_CHECKSUM
+ 8) PCI_ENDPOINT_TEST_IRQ_TYPE
+ 9) PCI_ENDPOINT_TEST_IRQ_NUMBER
+
+* PCI_ENDPOINT_TEST_MAGIC
+
+This register will be used to test BAR0. A known pattern will be written
+and read back from MAGIC register to verify BAR0.
+
+* PCI_ENDPOINT_TEST_COMMAND
+
+This register will be used by the host driver to indicate the function
+that the endpoint device must perform.
+
+======== ================================================================
+Bitfield Description
+======== ================================================================
+Bit 0 raise legacy IRQ
+Bit 1 raise MSI IRQ
+Bit 2 raise MSI-X IRQ
+Bit 3 read command (read data from RC buffer)
+Bit 4 write command (write data to RC buffer)
+Bit 5 copy command (copy data from one RC buffer to another RC buffer)
+======== ================================================================
+
+* PCI_ENDPOINT_TEST_STATUS
+
+This register reflects the status of the PCI endpoint device.
+
+======== ==============================
+Bitfield Description
+======== ==============================
+Bit 0 read success
+Bit 1 read fail
+Bit 2 write success
+Bit 3 write fail
+Bit 4 copy success
+Bit 5 copy fail
+Bit 6 IRQ raised
+Bit 7 source address is invalid
+Bit 8 destination address is invalid
+======== ==============================
+
+* PCI_ENDPOINT_TEST_SRC_ADDR
+
+This register contains the source address (RC buffer address) for the
+COPY/READ command.
+
+* PCI_ENDPOINT_TEST_DST_ADDR
+
+This register contains the destination address (RC buffer address) for
+the COPY/WRITE command.
+
+* PCI_ENDPOINT_TEST_IRQ_TYPE
+
+This register contains the interrupt type (Legacy/MSI) triggered
+for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
+
+Possible types:
+
+====== ==
+Legacy 0
+MSI 1
+MSI-X 2
+====== ==
+
+* PCI_ENDPOINT_TEST_IRQ_NUMBER
+
+This register contains the triggered ID interrupt.
+
+Admissible values:
+
+====== ===========
+Legacy 0
+MSI [1 .. 32]
+MSI-X [1 .. 2048]
+====== ===========
diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt
deleted file mode 100644
index 5916f1f592bb..000000000000
--- a/Documentation/PCI/endpoint/pci-test-function.txt
+++ /dev/null
@@ -1,87 +0,0 @@
- PCI TEST
- Kishon Vijay Abraham I <kishon@ti.com>
-
-Traditionally PCI RC has always been validated by using standard
-PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards.
-However with the addition of EP-core in linux kernel, it is possible
-to configure a PCI controller that can operate in EP mode to work as
-a test device.
-
-The PCI endpoint test device is a virtual device (defined in software)
-used to test the endpoint functionality and serve as a sample driver
-for other PCI endpoint devices (to use the EP framework).
-
-The PCI endpoint test device has the following registers:
-
- 1) PCI_ENDPOINT_TEST_MAGIC
- 2) PCI_ENDPOINT_TEST_COMMAND
- 3) PCI_ENDPOINT_TEST_STATUS
- 4) PCI_ENDPOINT_TEST_SRC_ADDR
- 5) PCI_ENDPOINT_TEST_DST_ADDR
- 6) PCI_ENDPOINT_TEST_SIZE
- 7) PCI_ENDPOINT_TEST_CHECKSUM
- 8) PCI_ENDPOINT_TEST_IRQ_TYPE
- 9) PCI_ENDPOINT_TEST_IRQ_NUMBER
-
-*) PCI_ENDPOINT_TEST_MAGIC
-
-This register will be used to test BAR0. A known pattern will be written
-and read back from MAGIC register to verify BAR0.
-
-*) PCI_ENDPOINT_TEST_COMMAND:
-
-This register will be used by the host driver to indicate the function
-that the endpoint device must perform.
-
-Bitfield Description:
- Bit 0 : raise legacy IRQ
- Bit 1 : raise MSI IRQ
- Bit 2 : raise MSI-X IRQ
- Bit 3 : read command (read data from RC buffer)
- Bit 4 : write command (write data to RC buffer)
- Bit 5 : copy command (copy data from one RC buffer to another
- RC buffer)
-
-*) PCI_ENDPOINT_TEST_STATUS
-
-This register reflects the status of the PCI endpoint device.
-
-Bitfield Description:
- Bit 0 : read success
- Bit 1 : read fail
- Bit 2 : write success
- Bit 3 : write fail
- Bit 4 : copy success
- Bit 5 : copy fail
- Bit 6 : IRQ raised
- Bit 7 : source address is invalid
- Bit 8 : destination address is invalid
-
-*) PCI_ENDPOINT_TEST_SRC_ADDR
-
-This register contains the source address (RC buffer address) for the
-COPY/READ command.
-
-*) PCI_ENDPOINT_TEST_DST_ADDR
-
-This register contains the destination address (RC buffer address) for
-the COPY/WRITE command.
-
-*) PCI_ENDPOINT_TEST_IRQ_TYPE
-
-This register contains the interrupt type (Legacy/MSI) triggered
-for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
-
-Possible types:
- - Legacy : 0
- - MSI : 1
- - MSI-X : 2
-
-*) PCI_ENDPOINT_TEST_IRQ_NUMBER
-
-This register contains the triggered ID interrupt.
-
-Admissible values:
- - Legacy : 0
- - MSI : [1 .. 32]
- - MSI-X : [1 .. 2048]
diff --git a/Documentation/PCI/endpoint/pci-test-howto.rst b/Documentation/PCI/endpoint/pci-test-howto.rst
new file mode 100644
index 000000000000..dd66858cde46
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-test-howto.rst
@@ -0,0 +1,220 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+PCI Test User Guide
+===================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to help users use pci-epf-test function driver
+and pci_endpoint_test host driver for testing PCI. The list of steps to
+be followed in the host side and EP side is given below.
+
+Endpoint Device
+===============
+
+Endpoint Controller Devices
+---------------------------
+
+To find the list of endpoint controller devices in the system::
+
+ # ls /sys/class/pci_epc/
+ 51000000.pcie_ep
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/controllers
+ 51000000.pcie_ep
+
+
+Endpoint Function Drivers
+-------------------------
+
+To find the list of endpoint function drivers in the system::
+
+ # ls /sys/bus/pci-epf/drivers
+ pci_epf_test
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/functions
+ pci_epf_test
+
+
+Creating pci-epf-test Device
+----------------------------
+
+PCI endpoint function device can be created using the configfs. To create
+pci-epf-test device, the following commands can be used::
+
+ # mount -t configfs none /sys/kernel/config
+ # cd /sys/kernel/config/pci_ep/
+ # mkdir functions/pci_epf_test/func1
+
+The "mkdir func1" above creates the pci-epf-test function device that will
+be probed by pci_epf_test driver.
+
+The PCI endpoint framework populates the directory with the following
+configurable fields::
+
+ # ls functions/pci_epf_test/func1
+ baseclass_code interrupt_pin progif_code subsys_id
+ cache_line_size msi_interrupts revid subsys_vendorid
+ deviceid msix_interrupts subclass_code vendorid
+
+The PCI endpoint function driver populates these entries with default values
+when the device is bound to the driver. The pci-epf-test driver populates
+vendorid with 0xffff and interrupt_pin with 0x0001::
+
+ # cat functions/pci_epf_test/func1/vendorid
+ 0xffff
+ # cat functions/pci_epf_test/func1/interrupt_pin
+ 0x0001
+
+
+Configuring pci-epf-test Device
+-------------------------------
+
+The user can configure the pci-epf-test device using configfs entry. In order
+to change the vendorid and the number of MSI interrupts used by the function
+device, the following commands can be used::
+
+ # echo 0x104c > functions/pci_epf_test/func1/vendorid
+ # echo 0xb500 > functions/pci_epf_test/func1/deviceid
+ # echo 32 > functions/pci_epf_test/func1/msi_interrupts
+ # echo 2048 > functions/pci_epf_test/func1/msix_interrupts
+
+
+Binding pci-epf-test Device to EP Controller
+--------------------------------------------
+
+In order for the endpoint function device to be useful, it has to be bound to
+a PCI endpoint controller driver. Use the configfs to bind the function
+device to one of the controller driver present in the system::
+
+ # ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/
+
+Once the above step is completed, the PCI endpoint is ready to establish a link
+with the host.
+
+
+Start the Link
+--------------
+
+In order for the endpoint device to establish a link with the host, the _start_
+field should be populated with '1'::
+
+ # echo 1 > controllers/51000000.pcie_ep/start
+
+
+RootComplex Device
+==================
+
+lspci Output
+------------
+
+Note that the devices listed here correspond to the value populated in 1.4
+above::
+
+ 00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01)
+ 01:00.0 Unassigned class [ff00]: Texas Instruments Device b500
+
+
+Using Endpoint Test function Device
+-----------------------------------
+
+Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
+the default PCI endpoint tests. To build the Kselftest for PCI endpoint
+subsystem, the following commands should be used::
+
+ # cd <kernel-dir>
+ # make -C tools/testing/selftests/pci_endpoint
+
+or if you desire to compile and install in your system::
+
+ # cd <kernel-dir>
+ # make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install
+
+The test will be located in <rootfs>/usr/bin/
+
+Kselftest Output
+~~~~~~~~~~~~~~~~
+::
+
+ # pci_endpoint_test
+ TAP version 13
+ 1..16
+ # Starting 16 tests from 9 test cases.
+ # RUN pci_ep_bar.BAR0.BAR_TEST ...
+ # OK pci_ep_bar.BAR0.BAR_TEST
+ ok 1 pci_ep_bar.BAR0.BAR_TEST
+ # RUN pci_ep_bar.BAR1.BAR_TEST ...
+ # OK pci_ep_bar.BAR1.BAR_TEST
+ ok 2 pci_ep_bar.BAR1.BAR_TEST
+ # RUN pci_ep_bar.BAR2.BAR_TEST ...
+ # OK pci_ep_bar.BAR2.BAR_TEST
+ ok 3 pci_ep_bar.BAR2.BAR_TEST
+ # RUN pci_ep_bar.BAR3.BAR_TEST ...
+ # OK pci_ep_bar.BAR3.BAR_TEST
+ ok 4 pci_ep_bar.BAR3.BAR_TEST
+ # RUN pci_ep_bar.BAR4.BAR_TEST ...
+ # OK pci_ep_bar.BAR4.BAR_TEST
+ ok 5 pci_ep_bar.BAR4.BAR_TEST
+ # RUN pci_ep_bar.BAR5.BAR_TEST ...
+ # OK pci_ep_bar.BAR5.BAR_TEST
+ ok 6 pci_ep_bar.BAR5.BAR_TEST
+ # RUN pci_ep_basic.CONSECUTIVE_BAR_TEST ...
+ # OK pci_ep_basic.CONSECUTIVE_BAR_TEST
+ ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
+ # RUN pci_ep_basic.LEGACY_IRQ_TEST ...
+ # OK pci_ep_basic.LEGACY_IRQ_TEST
+ ok 8 pci_ep_basic.LEGACY_IRQ_TEST
+ # RUN pci_ep_basic.MSI_TEST ...
+ # OK pci_ep_basic.MSI_TEST
+ ok 9 pci_ep_basic.MSI_TEST
+ # RUN pci_ep_basic.MSIX_TEST ...
+ # OK pci_ep_basic.MSIX_TEST
+ ok 10 pci_ep_basic.MSIX_TEST
+ # RUN pci_ep_data_transfer.memcpy.READ_TEST ...
+ # OK pci_ep_data_transfer.memcpy.READ_TEST
+ ok 11 pci_ep_data_transfer.memcpy.READ_TEST
+ # RUN pci_ep_data_transfer.memcpy.WRITE_TEST ...
+ # OK pci_ep_data_transfer.memcpy.WRITE_TEST
+ ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
+ # RUN pci_ep_data_transfer.memcpy.COPY_TEST ...
+ # OK pci_ep_data_transfer.memcpy.COPY_TEST
+ ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
+ # RUN pci_ep_data_transfer.dma.READ_TEST ...
+ # OK pci_ep_data_transfer.dma.READ_TEST
+ ok 14 pci_ep_data_transfer.dma.READ_TEST
+ # RUN pci_ep_data_transfer.dma.WRITE_TEST ...
+ # OK pci_ep_data_transfer.dma.WRITE_TEST
+ ok 15 pci_ep_data_transfer.dma.WRITE_TEST
+ # RUN pci_ep_data_transfer.dma.COPY_TEST ...
+ # OK pci_ep_data_transfer.dma.COPY_TEST
+ ok 16 pci_ep_data_transfer.dma.COPY_TEST
+ # PASSED: 16 / 16 tests passed.
+ # Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0
+
+
+Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
+capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
+controllers, it is advisable to skip this testcase using this
+command::
+
+ # pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
+
+Kselftest EP Doorbell
+~~~~~~~~~~~~~~~~~~~~~
+
+If the Endpoint MSI controller is used for the doorbell usecase, run below
+command for testing it:
+
+ # pci_endpoint_test -f pcie_ep_doorbell
+
+ # Starting 1 tests from 1 test cases.
+ # RUN pcie_ep_doorbell.DOORBELL_TEST ...
+ # OK pcie_ep_doorbell.DOORBELL_TEST
+ ok 1 pcie_ep_doorbell.DOORBELL_TEST
+ # PASSED: 1 / 1 tests passed.
+ # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt
deleted file mode 100644
index 040479f437a5..000000000000
--- a/Documentation/PCI/endpoint/pci-test-howto.txt
+++ /dev/null
@@ -1,206 +0,0 @@
- PCI TEST USERGUIDE
- Kishon Vijay Abraham I <kishon@ti.com>
-
-This document is a guide to help users use pci-epf-test function driver
-and pci_endpoint_test host driver for testing PCI. The list of steps to
-be followed in the host side and EP side is given below.
-
-1. Endpoint Device
-
-1.1 Endpoint Controller Devices
-
-To find the list of endpoint controller devices in the system:
-
- # ls /sys/class/pci_epc/
- 51000000.pcie_ep
-
-If PCI_ENDPOINT_CONFIGFS is enabled
- # ls /sys/kernel/config/pci_ep/controllers
- 51000000.pcie_ep
-
-1.2 Endpoint Function Drivers
-
-To find the list of endpoint function drivers in the system:
-
- # ls /sys/bus/pci-epf/drivers
- pci_epf_test
-
-If PCI_ENDPOINT_CONFIGFS is enabled
- # ls /sys/kernel/config/pci_ep/functions
- pci_epf_test
-
-1.3 Creating pci-epf-test Device
-
-PCI endpoint function device can be created using the configfs. To create
-pci-epf-test device, the following commands can be used
-
- # mount -t configfs none /sys/kernel/config
- # cd /sys/kernel/config/pci_ep/
- # mkdir functions/pci_epf_test/func1
-
-The "mkdir func1" above creates the pci-epf-test function device that will
-be probed by pci_epf_test driver.
-
-The PCI endpoint framework populates the directory with the following
-configurable fields.
-
- # ls functions/pci_epf_test/func1
- baseclass_code interrupt_pin progif_code subsys_id
- cache_line_size msi_interrupts revid subsys_vendorid
- deviceid msix_interrupts subclass_code vendorid
-
-The PCI endpoint function driver populates these entries with default values
-when the device is bound to the driver. The pci-epf-test driver populates
-vendorid with 0xffff and interrupt_pin with 0x0001
-
- # cat functions/pci_epf_test/func1/vendorid
- 0xffff
- # cat functions/pci_epf_test/func1/interrupt_pin
- 0x0001
-
-1.4 Configuring pci-epf-test Device
-
-The user can configure the pci-epf-test device using configfs entry. In order
-to change the vendorid and the number of MSI interrupts used by the function
-device, the following commands can be used.
-
- # echo 0x104c > functions/pci_epf_test/func1/vendorid
- # echo 0xb500 > functions/pci_epf_test/func1/deviceid
- # echo 16 > functions/pci_epf_test/func1/msi_interrupts
- # echo 8 > functions/pci_epf_test/func1/msix_interrupts
-
-1.5 Binding pci-epf-test Device to EP Controller
-
-In order for the endpoint function device to be useful, it has to be bound to
-a PCI endpoint controller driver. Use the configfs to bind the function
-device to one of the controller driver present in the system.
-
- # ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/
-
-Once the above step is completed, the PCI endpoint is ready to establish a link
-with the host.
-
-1.6 Start the Link
-
-In order for the endpoint device to establish a link with the host, the _start_
-field should be populated with '1'.
-
- # echo 1 > controllers/51000000.pcie_ep/start
-
-2. RootComplex Device
-
-2.1 lspci Output
-
-Note that the devices listed here correspond to the value populated in 1.4 above
-
- 00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01)
- 01:00.0 Unassigned class [ff00]: Texas Instruments Device b500
-
-2.2 Using Endpoint Test function Device
-
-pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
-tests. To compile this tool the following commands should be used:
-
- # cd <kernel-dir>
- # make -C tools/pci
-
-or if you desire to compile and install in your system:
-
- # cd <kernel-dir>
- # make -C tools/pci install
-
-The tool and script will be located in <rootfs>/usr/bin/
-
-2.2.1 pcitest.sh Output
- # pcitest.sh
- BAR tests
-
- BAR0: OKAY
- BAR1: OKAY
- BAR2: OKAY
- BAR3: OKAY
- BAR4: NOT OKAY
- BAR5: NOT OKAY
-
- Interrupt tests
-
- SET IRQ TYPE TO LEGACY: OKAY
- LEGACY IRQ: NOT OKAY
- SET IRQ TYPE TO MSI: OKAY
- MSI1: OKAY
- MSI2: OKAY
- MSI3: OKAY
- MSI4: OKAY
- MSI5: OKAY
- MSI6: OKAY
- MSI7: OKAY
- MSI8: OKAY
- MSI9: OKAY
- MSI10: OKAY
- MSI11: OKAY
- MSI12: OKAY
- MSI13: OKAY
- MSI14: OKAY
- MSI15: OKAY
- MSI16: OKAY
- MSI17: NOT OKAY
- MSI18: NOT OKAY
- MSI19: NOT OKAY
- MSI20: NOT OKAY
- MSI21: NOT OKAY
- MSI22: NOT OKAY
- MSI23: NOT OKAY
- MSI24: NOT OKAY
- MSI25: NOT OKAY
- MSI26: NOT OKAY
- MSI27: NOT OKAY
- MSI28: NOT OKAY
- MSI29: NOT OKAY
- MSI30: NOT OKAY
- MSI31: NOT OKAY
- MSI32: NOT OKAY
- SET IRQ TYPE TO MSI-X: OKAY
- MSI-X1: OKAY
- MSI-X2: OKAY
- MSI-X3: OKAY
- MSI-X4: OKAY
- MSI-X5: OKAY
- MSI-X6: OKAY
- MSI-X7: OKAY
- MSI-X8: OKAY
- MSI-X9: NOT OKAY
- MSI-X10: NOT OKAY
- MSI-X11: NOT OKAY
- MSI-X12: NOT OKAY
- MSI-X13: NOT OKAY
- MSI-X14: NOT OKAY
- MSI-X15: NOT OKAY
- MSI-X16: NOT OKAY
- [...]
- MSI-X2047: NOT OKAY
- MSI-X2048: NOT OKAY
-
- Read Tests
-
- SET IRQ TYPE TO MSI: OKAY
- READ ( 1 bytes): OKAY
- READ ( 1024 bytes): OKAY
- READ ( 1025 bytes): OKAY
- READ (1024000 bytes): OKAY
- READ (1024001 bytes): OKAY
-
- Write Tests
-
- WRITE ( 1 bytes): OKAY
- WRITE ( 1024 bytes): OKAY
- WRITE ( 1025 bytes): OKAY
- WRITE (1024000 bytes): OKAY
- WRITE (1024001 bytes): OKAY
-
- Copy Tests
-
- COPY ( 1 bytes): OKAY
- COPY ( 1024 bytes): OKAY
- COPY ( 1025 bytes): OKAY
- COPY (1024000 bytes): OKAY
- COPY (1024001 bytes): OKAY
diff --git a/Documentation/PCI/endpoint/pci-vntb-function.rst b/Documentation/PCI/endpoint/pci-vntb-function.rst
new file mode 100644
index 000000000000..0c51f53ab972
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-vntb-function.rst
@@ -0,0 +1,129 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI vNTB Function
+=================
+
+:Author: Frank Li <Frank.Li@nxp.com>
+
+The difference between PCI NTB function and PCI vNTB function is
+
+PCI NTB function need at two endpoint instances and connect HOST1
+and HOST2.
+
+PCI vNTB function only use one host and one endpoint(EP), use NTB
+connect EP and PCI host
+
+.. code-block:: text
+
+
+ +------------+ +---------------------------------------+
+ | | | |
+ +------------+ | +--------------+
+ | NTB | | | NTB |
+ | NetDev | | | NetDev |
+ +------------+ | +--------------+
+ | NTB | | | NTB |
+ | Transfer | | | Transfer |
+ +------------+ | +--------------+
+ | | | | |
+ | PCI NTB | | | |
+ | EPF | | | |
+ | Driver | | | PCI Virtual |
+ | | +---------------+ | NTB Driver |
+ | | | PCI EP NTB |<------>| |
+ | | | FN Driver | | |
+ +------------+ +---------------+ +--------------+
+ | | | | | |
+ | PCI BUS | <-----> | PCI EP BUS | | Virtual PCI |
+ | | PCI | | | BUS |
+ +------------+ +---------------+--------+--------------+
+ PCI RC PCI EP
+
+Constructs used for Implementing vNTB
+=====================================
+
+ 1) Config Region
+ 2) Self Scratchpad Registers
+ 3) Peer Scratchpad Registers
+ 4) Doorbell (DB) Registers
+ 5) Memory Window (MW)
+
+
+Config Region:
+--------------
+
+It is same as PCI NTB Function driver
+
+Scratchpad Registers:
+---------------------
+
+It is appended after Config region.
+
+.. code-block:: text
+
+
+ +--------------------------------------------------+ Base
+ | |
+ | |
+ | |
+ | Common Config Register |
+ | |
+ | |
+ | |
+ +-----------------------+--------------------------+ Base + span_offset
+ | | |
+ | Peer Span Space | Span Space |
+ | | |
+ | | |
+ +-----------------------+--------------------------+ Base + span_offset
+ | | | + span_count * 4
+ | | |
+ | Span Space | Peer Span Space |
+ | | |
+ +-----------------------+--------------------------+
+ Virtual PCI Pcie Endpoint
+ NTB Driver NTB Driver
+
+
+Doorbell Registers:
+-------------------
+
+ Doorbell Registers are used by the hosts to interrupt each other.
+
+Memory Window:
+--------------
+
+ Actual transfer of data between the two hosts will happen using the
+ memory window.
+
+Modeling Constructs:
+====================
+
+32-bit BARs.
+
+====== ===============
+BAR NO CONSTRUCTS USED
+====== ===============
+BAR0 Config Region
+BAR1 Doorbell
+BAR2 Memory Window 1
+BAR3 Memory Window 2
+BAR4 Memory Window 3
+BAR5 Memory Window 4
+====== ===============
+
+64-bit BARs.
+
+====== ===============================
+BAR NO CONSTRUCTS USED
+====== ===============================
+BAR0 Config Region + Scratchpad
+BAR1
+BAR2 Doorbell
+BAR3
+BAR4 Memory Window 1
+BAR5
+====== ===============================
+
+
diff --git a/Documentation/PCI/endpoint/pci-vntb-howto.rst b/Documentation/PCI/endpoint/pci-vntb-howto.rst
new file mode 100644
index 000000000000..9a7a2f0a6849
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-vntb-howto.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+PCI Non-Transparent Bridge (NTB) Endpoint Function (EPF) User Guide
+===================================================================
+
+:Author: Frank Li <Frank.Li@nxp.com>
+
+This document is a guide to help users use pci-epf-vntb function driver
+and ntb_hw_epf host driver for NTB functionality. The list of steps to
+be followed in the host side and EP side is given below. For the hardware
+configuration and internals of NTB using configurable endpoints see
+Documentation/PCI/endpoint/pci-vntb-function.rst
+
+Endpoint Device
+===============
+
+Endpoint Controller Devices
+---------------------------
+
+To find the list of endpoint controller devices in the system::
+
+ # ls /sys/class/pci_epc/
+ 5f010000.pcie_ep
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/controllers
+ 5f010000.pcie_ep
+
+Endpoint Function Drivers
+-------------------------
+
+To find the list of endpoint function drivers in the system::
+
+ # ls /sys/bus/pci-epf/drivers
+ pci_epf_ntb pci_epf_test pci_epf_vntb
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+ # ls /sys/kernel/config/pci_ep/functions
+ pci_epf_ntb pci_epf_test pci_epf_vntb
+
+
+Creating pci-epf-vntb Device
+----------------------------
+
+PCI endpoint function device can be created using the configfs. To create
+pci-epf-vntb device, the following commands can be used::
+
+ # mount -t configfs none /sys/kernel/config
+ # cd /sys/kernel/config/pci_ep/
+ # mkdir functions/pci_epf_vntb/func1
+
+The "mkdir func1" above creates the pci-epf-ntb function device that will
+be probed by pci_epf_vntb driver.
+
+The PCI endpoint framework populates the directory with the following
+configurable fields::
+
+ # ls functions/pci_epf_ntb/func1
+ baseclass_code deviceid msi_interrupts pci-epf-ntb.0
+ progif_code secondary subsys_id vendorid
+ cache_line_size interrupt_pin msix_interrupts primary
+ revid subclass_code subsys_vendor_id
+
+The PCI endpoint function driver populates these entries with default values
+when the device is bound to the driver. The pci-epf-vntb driver populates
+vendorid with 0xffff and interrupt_pin with 0x0001::
+
+ # cat functions/pci_epf_vntb/func1/vendorid
+ 0xffff
+ # cat functions/pci_epf_vntb/func1/interrupt_pin
+ 0x0001
+
+
+Configuring pci-epf-vntb Device
+-------------------------------
+
+The user can configure the pci-epf-vntb device using its configfs entry. In order
+to change the vendorid and the deviceid, the following
+commands can be used::
+
+ # echo 0x1957 > functions/pci_epf_vntb/func1/vendorid
+ # echo 0x0809 > functions/pci_epf_vntb/func1/deviceid
+
+The PCI endpoint framework also automatically creates a sub-directory in the
+function attribute directory. This sub-directory has the same name as the name
+of the function device and is populated with the following NTB specific
+attributes that can be configured by the user::
+
+ # ls functions/pci_epf_vntb/func1/pci_epf_vntb.0/
+ ctrl_bar db_count mw1_bar mw2_bar mw3_bar mw4_bar spad_count
+ db_bar mw1 mw2 mw3 mw4 num_mws vbus_number
+ vntb_vid vntb_pid
+
+A sample configuration for NTB function is given below::
+
+ # echo 4 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/db_count
+ # echo 128 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/spad_count
+ # echo 1 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/num_mws
+ # echo 0x100000 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/mw1
+
+By default, each construct is assigned a BAR, as needed and in order.
+Should a specific BAR setup be required by the platform, BAR may be assigned
+to each construct using the related ``XYZ_bar`` entry.
+
+A sample configuration for virtual NTB driver for virtual PCI bus::
+
+ # echo 0x1957 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_vid
+ # echo 0x080A > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_pid
+ # echo 0x10 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vbus_number
+
+Binding pci-epf-ntb Device to EP Controller
+--------------------------------------------
+
+NTB function device should be attached to PCI endpoint controllers
+connected to the host.
+
+ # ln -s controllers/5f010000.pcie_ep functions/pci-epf-ntb/func1/primary
+
+Once the above step is completed, the PCI endpoint controllers are ready to
+establish a link with the host.
+
+
+Start the Link
+--------------
+
+In order for the endpoint device to establish a link with the host, the _start_
+field should be populated with '1'. For NTB, both the PCI endpoint controllers
+should establish link with the host (imx8 don't need this steps)::
+
+ # echo 1 > controllers/5f010000.pcie_ep/start
+
+RootComplex Device
+==================
+
+lspci Output at Host side
+-------------------------
+
+Note that the devices listed here correspond to the values populated in
+"Creating pci-epf-ntb Device" section above::
+
+ # lspci
+ 00:00.0 PCI bridge: Freescale Semiconductor Inc Device 0000 (rev 01)
+ 01:00.0 RAM memory: Freescale Semiconductor Inc Device 0809
+
+Endpoint Device / Virtual PCI bus
+=================================
+
+lspci Output at EP Side / Virtual PCI bus
+-----------------------------------------
+
+Note that the devices listed here correspond to the values populated in
+"Creating pci-epf-ntb Device" section above::
+
+ # lspci
+ 10:00.0 Unassigned class [ffff]: Dawicontrol Computersysteme GmbH Device 1234 (rev ff)
+
+Using ntb_hw_epf Device
+-----------------------
+
+The host side software follows the standard NTB software architecture in Linux.
+All the existing client side NTB utilities like NTB Transport Client and NTB
+Netdev, NTB Ping Pong Test Client and NTB Tool Test Client can be used with NTB
+function device.
+
+For more information on NTB see
+:doc:`Non-Transparent Bridge <../../driver-api/ntb>`
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
new file mode 100644
index 000000000000..5d720d2a415e
--- /dev/null
+++ b/Documentation/PCI/index.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI Bus Subsystem
+=================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ pci
+ pciebus-howto
+ pci-iov-howto
+ msi-howto
+ sysfs-pci
+ acpi-info
+ pci-error-recovery
+ pcieaer-howto
+ endpoint/index
+ controller/index
+ boot-interrupts
+ tph
diff --git a/Documentation/PCI/msi-howto.rst b/Documentation/PCI/msi-howto.rst
new file mode 100644
index 000000000000..0692c9aec66f
--- /dev/null
+++ b/Documentation/PCI/msi-howto.rst
@@ -0,0 +1,297 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==========================
+The MSI Driver Guide HOWTO
+==========================
+
+:Authors: Tom L Nguyen; Martine Silbermann; Matthew Wilcox
+
+:Copyright: 2003, 2008 Intel Corporation
+
+About this guide
+================
+
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
+
+
+What are MSIs?
+==============
+
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
+
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
+capability was also introduced with PCI 3.0. It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
+
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
+
+
+Why use MSIs?
+=============
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole. MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges). In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt. PCI transaction ordering rules require that all the data
+arrive in memory before the value may be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case. With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose. One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+How to use MSIs
+===============
+
+PCI devices are initialised to use pin-based interrupts. The device
+driver has to set up the device to use MSI or MSI-X. Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+Include kernel support for MSIs
+-------------------------------
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled. This option is only available on some architectures,
+and it may depend on some other options also being set. For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+Using MSI
+---------
+
+Most of the hard work is done for the driver in the PCI layer. The driver
+simply has to request that the PCI layer set up the MSI capability for this
+device.
+
+To automatically use MSI or MSI-X interrupt vectors, use the following
+function::
+
+ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags);
+
+which allocates up to max_vecs interrupt vectors for a PCI device. It
+returns the number of vectors allocated or a negative error. If the device
+has a requirements for a minimum number of vectors the driver can pass a
+min_vecs argument set to this limit, and the PCI core will return -ENOSPC
+if it can't meet the minimum number of vectors.
+
+The flags argument is used to specify which type of interrupt can be used
+by the device and the driver (PCI_IRQ_INTX, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
+any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set,
+pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
+
+To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
+vectors, use the following function::
+
+ int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
+
+Any allocated resources should be freed before removing the device using
+the following function::
+
+ void pci_free_irq_vectors(struct pci_dev *dev);
+
+If a device supports both MSI-X and MSI capabilities, this API will use the
+MSI-X facilities in preference to the MSI facilities. MSI-X supports any
+number of interrupts between 1 and 2048. In contrast, MSI is restricted to
+a maximum of 32 interrupts (and must be a power of two). In addition, the
+MSI interrupt vectors must be allocated consecutively, so the system might
+not be able to allocate as many vectors for MSI as it could for MSI-X. On
+some platforms, MSI interrupts must all be targeted at the same set of CPUs
+whereas MSI-X interrupts can all be targeted at different CPUs.
+
+If a device supports neither MSI-X or MSI it will fall back to a single
+legacy IRQ vector.
+
+The typical usage of MSI or MSI-X interrupts is to allocate as many vectors
+as possible, likely up to the limit supported by the device. If nvec is
+larger than the number supported by the device it will automatically be
+capped to the supported limit, so there is no need to query the number of
+vectors supported beforehand::
+
+ nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
+ if (nvec < 0)
+ goto out_err;
+
+If a driver is unable or unwilling to deal with a variable number of MSI
+interrupts it can request a particular number of interrupts by passing that
+number to pci_alloc_irq_vectors() function as both 'min_vecs' and
+'max_vecs' parameters::
+
+ ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
+ if (ret < 0)
+ goto out_err;
+
+The most notorious example of the request type described above is enabling
+the single MSI mode for a device. It could be done by passing two 1s as
+'min_vecs' and 'max_vecs'::
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0)
+ goto out_err;
+
+Some devices might not support using legacy line interrupts, in which case
+the driver can specify that only MSI or MSI-X is acceptable::
+
+ nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (nvec < 0)
+ goto out_err;
+
+Legacy APIs
+-----------
+
+The following old APIs to enable and disable MSI or MSI-X interrupts should
+not be used in new code::
+
+ pci_enable_msi() /* deprecated */
+ pci_disable_msi() /* deprecated */
+ pci_enable_msix_range() /* deprecated */
+ pci_enable_msix_exact() /* deprecated */
+ pci_disable_msix() /* deprecated */
+
+Additionally there are APIs to provide the number of supported MSI or MSI-X
+vectors: pci_msi_vec_count() and pci_msix_vec_count(). In general these
+should be avoided in favor of letting pci_alloc_irq_vectors() cap the
+number of vectors. If you have a legitimate special use case for the count
+of vectors we might have to revisit that decision and add a
+pci_nr_irq_vectors() helper that handles MSI and MSI-X transparently.
+
+Considerations when using MSIs
+------------------------------
+
+Spinlocks
+~~~~~~~~~
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler. With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered). If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held. If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock. Such deadlocks can be avoided by using
+spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
+and acquire the lock (see Documentation/kernel-hacking/locking.rst).
+
+How to tell whether MSI/MSI-X is enabled on a device
+----------------------------------------------------
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
+has an 'Enable' flag which is followed with either "+" (enabled)
+or "-" (disabled).
+
+
+MSI quirks
+==========
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+Disabling MSIs globally
+-----------------------
+
+Some host chipsets simply don't support MSIs properly. If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table. In this case, Linux automatically disables MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves. The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices. It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+Disabling MSIs below a bridge
+-----------------------------
+
+Some PCI bridges are not able to route MSIs between buses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000). As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge unknown to Linux, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing::
+
+ echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1. Changing this value should be
+done with caution as it could break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+Disabling MSIs on a single device
+---------------------------------
+
+Some devices are known to have faulty MSI implementations. Usually this
+is handled in the individual device driver, but occasionally it's necessary
+to handle this with a quirk. Some drivers have an option to disable use
+of MSI. While this is a convenient workaround for the driver author,
+it is not good practice, and should not be emulated.
+
+Finding why MSIs are disabled on a device
+-----------------------------------------
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device. Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine. You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device. Reading
+`/sys/bus/pci/devices/*/msi_bus` will tell you whether MSIs are enabled (1)
+or disabled (0). If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_alloc_irq_vectors() with the
+PCI_IRQ_MSI or PCI_IRQ_MSIX flags.
+
+
+List of device drivers MSI(-X) APIs
+===================================
+
+The PCI/MSI subsystem has a dedicated C file for its exported device driver
+APIs — `drivers/pci/msi/api.c`. The following functions are exported:
+
+.. kernel-doc:: drivers/pci/msi/api.c
+ :export:
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
new file mode 100644
index 000000000000..5df481ac6193
--- /dev/null
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -0,0 +1,457 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+PCI Error Recovery
+==================
+
+
+:Authors: - Linas Vepstas <linasvepstas@gmail.com>
+ - Richard Lary <rlary@us.ibm.com>
+ - Mike Mason <mmlnx@us.ibm.com>
+
+
+Many PCI bus controllers are able to detect a variety of hardware
+PCI errors on the bus, such as parity errors on the data and address
+buses, as well as SERR and PERR errors. Some of the more advanced
+chipsets are able to deal with these errors; these include PCIe chipsets,
+and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
+pSeries boxes. A typical action taken is to disconnect the affected device,
+halting all I/O to it. The goal of a disconnection is to avoid system
+corruption; for example, to halt system memory corruption due to DMAs
+to "wild" addresses. Typically, a reconnection mechanism is also
+offered, so that the affected PCI device(s) are reset and put back
+into working condition. The reset phase requires coordination
+between the affected device drivers and the PCI controller chip.
+This document describes a generic API for notifying device drivers
+of a bus disconnection, and then performing error recovery.
+This API is currently implemented in the 2.6.16 and later kernels.
+
+Reporting and recovery is performed in several steps. First, when
+a PCI hardware error has resulted in a bus disconnect, that event
+is reported as soon as possible to all affected device drivers,
+including multiple instances of a device driver on multi-function
+cards. This allows device drivers to avoid deadlocking in spinloops,
+waiting for some i/o-space register to change, when it never will.
+It also gives the drivers a chance to defer incoming I/O as
+needed.
+
+Next, recovery is performed in several stages. Most of the complexity
+is forced by the need to handle multi-function devices, that is,
+devices that have multiple device drivers associated with them.
+In the first stage, each driver is allowed to indicate what type
+of reset it desires, the choices being a simple re-enabling of I/O
+or requesting a slot reset.
+
+If any driver requests a slot reset, that is what will be done.
+
+After a reset and/or a re-enabling of I/O, all drivers are
+again notified, so that they may then perform any device setup/config
+that may be required. After these have all completed, a final
+"resume normal operations" event is sent out.
+
+The biggest reason for choosing a kernel-based implementation rather
+than a user-space implementation was the need to deal with bus
+disconnects of PCI devices attached to storage media, and, in particular,
+disconnects from devices holding the root file system. If the root
+file system is disconnected, a user-space mechanism would have to go
+through a large number of contortions to complete recovery. Almost all
+of the current Linux file systems are not tolerant of disconnection
+from/reconnection to their underlying block device. By contrast,
+bus errors are easy to manage in the device driver. Indeed, most
+device drivers already handle very similar recovery procedures;
+for example, the SCSI-generic layer already provides significant
+mechanisms for dealing with SCSI bus errors and SCSI bus resets.
+
+
+Detailed Design
+===============
+
+Design and implementation details below, based on a chain of
+public email discussions with Ben Herrenschmidt, circa 5 April 2005.
+
+The error recovery API support is exposed to the driver in the form of
+a structure of function pointers pointed to by a new field in struct
+pci_driver. A driver that fails to provide the structure is "non-aware",
+and the actual recovery steps taken are platform dependent. The
+arch/powerpc implementation will simulate a PCI hotplug remove/add.
+
+This structure has the form::
+
+ struct pci_error_handlers
+ {
+ int (*error_detected)(struct pci_dev *dev, pci_channel_state_t);
+ int (*mmio_enabled)(struct pci_dev *dev);
+ int (*slot_reset)(struct pci_dev *dev);
+ void (*resume)(struct pci_dev *dev);
+ void (*cor_error_detected)(struct pci_dev *dev);
+ };
+
+The possible channel states are::
+
+ typedef enum {
+ pci_channel_io_normal, /* I/O channel is in normal state */
+ pci_channel_io_frozen, /* I/O to channel is blocked */
+ pci_channel_io_perm_failure, /* PCI card is dead */
+ } pci_channel_state_t;
+
+Possible return values are::
+
+ enum pci_ers_result {
+ PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */
+ PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
+ PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */
+ PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */
+ PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */
+ };
+
+A driver does not have to implement all of these callbacks; however,
+if it implements any, it must implement error_detected(). If a callback
+is not implemented, the corresponding feature is considered unsupported.
+For example, if mmio_enabled() and resume() aren't there, then it
+is assumed that the driver does not need these callbacks
+for recovery. Typically a driver will want to know about
+a slot_reset().
+
+The actual steps taken by a platform to recover from a PCI error
+event will be platform-dependent, but will follow the general
+sequence described below.
+
+STEP 0: Error Event
+-------------------
+A PCI bus error is detected by the PCI hardware. On powerpc, the slot
+is isolated, in that all I/O is blocked: all reads return 0xffffffff,
+all writes are ignored.
+
+Similarly, on platforms supporting Downstream Port Containment
+(PCIe r7.0 sec 6.2.11), the link to the sub-hierarchy with the
+faulting device is disabled. Any device in the sub-hierarchy
+becomes inaccessible.
+
+STEP 1: Notification
+--------------------
+Platform calls the error_detected() callback on every instance of
+every driver affected by the error.
+
+At this point, the device might not be accessible anymore, depending on
+the platform (the slot will be isolated on powerpc). The driver may
+already have "noticed" the error because of a failing I/O, but this
+is the proper "synchronization point", that is, it gives the driver
+a chance to cleanup, waiting for pending stuff (timers, whatever, etc...)
+to complete; it can take semaphores, schedule, etc... everything but
+touch the device. Within this function and after it returns, the driver
+shouldn't do any new IOs. Called in task context. This is sort of a
+"quiesce" point. See note about interrupts at the end of this doc.
+
+All drivers participating in this system must implement this call.
+The driver must return one of the following result codes:
+
+ - PCI_ERS_RESULT_RECOVERED
+ Driver returns this if it thinks the device is usable despite
+ the error and does not need further intervention.
+ - PCI_ERS_RESULT_CAN_RECOVER
+ Driver returns this if it thinks it might be able to recover
+ the HW by just banging IOs or if it wants to be given
+ a chance to extract some diagnostic information (see
+ mmio_enable, below).
+ - PCI_ERS_RESULT_NEED_RESET
+ Driver returns this if it can't recover without a
+ slot reset.
+ - PCI_ERS_RESULT_DISCONNECT
+ Driver returns this if it doesn't want to recover at all.
+
+The next step taken will depend on the result codes returned by the
+drivers.
+
+If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER,
+then the platform should re-enable IOs on the slot (or do nothing in
+particular, if the platform doesn't isolate slots), and recovery
+proceeds to STEP 2 (MMIO Enable).
+
+If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
+then recovery proceeds to STEP 4 (Slot Reset).
+
+If the platform is unable to recover the slot, the next step
+is STEP 6 (Permanent Failure).
+
+.. note::
+
+ The current powerpc implementation assumes that a device driver will
+ *not* schedule or semaphore in this routine; the current powerpc
+ implementation uses one kernel thread to notify all devices;
+ thus, if one device sleeps/schedules, all devices are affected.
+ Doing better requires complex multi-threaded logic in the error
+ recovery implementation (e.g. waiting for all notification threads
+ to "join" before proceeding with recovery.) This seems excessively
+ complex and not worth implementing.
+
+ The current powerpc implementation doesn't much care if the device
+ attempts I/O at this point, or not. I/Os will fail, returning
+ a value of 0xff on read, and writes will be dropped. If more than
+ EEH_MAX_FAILS I/Os are attempted to a frozen adapter, EEH
+ assumes that the device driver has gone into an infinite loop
+ and prints an error to syslog. A reboot is then required to
+ get the device working again.
+
+STEP 2: MMIO Enabled
+--------------------
+The platform re-enables MMIO to the device (but typically not the
+DMA), and then calls the mmio_enabled() callback on all affected
+device drivers.
+
+This is the "early recovery" call. IOs are allowed again, but DMA is
+not, with some restrictions. This is NOT a callback for the driver to
+start operations again, only to peek/poke at the device, extract diagnostic
+information, if any, and eventually do things like trigger a device local
+reset or some such, but not restart operations. This callback is made if
+all drivers on a segment agree that they can try to recover and if no automatic
+link reset was performed by the HW. If the platform can't just re-enable IOs
+without a slot reset or a link reset, it will not call this callback, and
+instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset).
+
+.. note::
+
+ On platforms supporting Advanced Error Reporting (PCIe r7.0 sec 6.2),
+ the faulting device may already be accessible in STEP 1 (Notification).
+ Drivers should nevertheless defer accesses to STEP 2 (MMIO Enabled)
+ to be compatible with EEH on powerpc and with s390 (where devices are
+ inaccessible until STEP 2).
+
+ On platforms supporting Downstream Port Containment, the link to the
+ sub-hierarchy with the faulting device is re-enabled in STEP 3 (Link
+ Reset). Hence devices in the sub-hierarchy are inaccessible until
+ STEP 4 (Slot Reset).
+
+ For errors such as Surprise Down (PCIe r7.0 sec 6.2.7), the device
+ may not even be accessible in STEP 4 (Slot Reset). Drivers can detect
+ accessibility by checking whether reads from the device return all 1's
+ (PCI_POSSIBLE_ERROR()).
+
+.. note::
+
+ The following is proposed; no platform implements this yet:
+ Proposal: All I/Os should be done _synchronously_ from within
+ this callback, errors triggered by them will be returned via
+ the normal pci_check_whatever() API, no new error_detected()
+ callback will be issued due to an error happening here. However,
+ such an error might cause IOs to be re-blocked for the whole
+ segment, and thus invalidate the recovery that other devices
+ on the same segment might have done, forcing the whole segment
+ into one of the next states, that is, link reset or slot reset.
+
+The driver should return one of the following result codes:
+ - PCI_ERS_RESULT_RECOVERED
+ Driver returns this if it thinks the device is fully
+ functional and thinks it is ready to start
+ normal driver operations again. There is no
+ guarantee that the driver will actually be
+ allowed to proceed, as another driver on the
+ same segment might have failed and thus triggered a
+ slot reset on platforms that support it.
+
+ - PCI_ERS_RESULT_NEED_RESET
+ Driver returns this if it thinks the device is not
+ recoverable in its current state and it needs a slot
+ reset to proceed.
+
+ - PCI_ERS_RESULT_DISCONNECT
+ Same as above. Total failure, no recovery even after
+ reset driver dead. (To be defined more precisely)
+
+The next step taken depends on the results returned by the drivers.
+If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
+proceeds to either STEP 3 (Link Reset) or to STEP 5 (Resume Operations).
+
+If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
+proceeds to STEP 4 (Slot Reset)
+
+STEP 3: Link Reset
+------------------
+The platform resets the link. This is a PCIe specific step
+and is done whenever a fatal error has been detected that can be
+"solved" by resetting the link.
+
+STEP 4: Slot Reset
+------------------
+
+In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
+platform will perform a slot reset on the requesting PCI device(s).
+The actual steps taken by a platform to perform a slot reset
+will be platform-dependent. Upon completion of slot reset, the
+platform will call the device slot_reset() callback.
+
+Powerpc platforms implement two levels of slot reset:
+soft reset(default) and fundamental(optional) reset.
+
+Powerpc soft reset consists of asserting the adapter #RST line and then
+restoring the PCI BARs and PCI configuration header to a state
+that is equivalent to what it would be after a fresh system
+power-on followed by power-on BIOS/system firmware initialization.
+Soft reset is also known as hot-reset.
+
+Powerpc fundamental reset is supported by PCIe cards only
+and results in device's state machines, hardware logic, port states and
+configuration registers to initialize to their default conditions.
+
+For most PCI devices, a soft reset will be sufficient for recovery.
+Optional fundamental reset is provided to support a limited number
+of PCIe devices for which a soft reset is not sufficient
+for recovery.
+
+If the platform supports PCI hotplug, then the reset might be
+performed by toggling the slot electrical power off/on.
+
+It is important for the platform to restore the PCI config space
+to the "fresh poweron" state, rather than the "last state". After
+a slot reset, the device driver will almost always use its standard
+device initialization routines, and an unusual config space setup
+may result in hung devices, kernel panics, or silent data corruption.
+
+This call gives drivers the chance to re-initialize the hardware
+(re-download firmware, etc.). At this point, the driver may assume
+that the card is in a fresh state and is fully functional. The slot
+is unfrozen and the driver has full access to PCI config space,
+memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
+will also be available.
+
+Drivers should not restart normal I/O processing operations
+at this point. If all device drivers report success on this
+callback, the platform will call resume() to complete the sequence,
+and let the driver restart normal I/O processing.
+
+A driver can still return a critical failure for this function if
+it can't get the device operational after reset. If the platform
+previously tried a soft reset, it might now try a hard reset (power
+cycle) and then call slot_reset() again. If the device still can't
+be recovered, there is nothing more that can be done; the platform
+will typically report a "permanent failure" in such a case. The
+device will be considered "dead" in this case.
+
+Drivers for multi-function cards will need to coordinate among
+themselves as to which driver instance will perform any "one-shot"
+or global device initialization. For example, the Symbios sym53cxx2
+driver performs device init only from PCI function 0::
+
+ + if (PCI_FUNC(pdev->devfn) == 0)
+ + sym_reset_scsi_bus(np, 0);
+
+Result codes:
+ - PCI_ERS_RESULT_DISCONNECT
+ Same as above.
+
+Drivers for PCIe cards that require a fundamental reset must
+set the needs_freset bit in the pci_dev structure in their probe function.
+For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
+PCI card types::
+
+ + /* Set EEH reset type to fundamental if required by hba */
+ + if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
+ + pdev->needs_freset = 1;
+ +
+
+Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
+Failure).
+
+.. note::
+
+ The current powerpc implementation does not try a power-cycle
+ reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
+ However, it probably should.
+
+
+STEP 5: Resume Operations
+-------------------------
+The platform will call the resume() callback on all affected device
+drivers if all drivers on the segment have returned
+PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks.
+The goal of this callback is to tell the driver to restart activity,
+that everything is back and running. This callback does not return
+a result code.
+
+At this point, if a new error happens, the platform will restart
+a new error recovery sequence.
+
+STEP 6: Permanent Failure
+-------------------------
+A "permanent failure" has occurred, and the platform cannot recover
+the device. The platform will call error_detected() with a
+pci_channel_state_t value of pci_channel_io_perm_failure.
+
+The device driver should, at this point, assume the worst. It should
+cancel all pending I/O, refuse all new I/O, returning -EIO to
+higher layers. The device driver should then clean up all of its
+memory and remove itself from kernel operations, much as it would
+during system shutdown.
+
+The platform will typically notify the system operator of the
+permanent failure in some way. If the device is hotplug-capable,
+the operator will probably want to remove and replace the device.
+Note, however, not all failures are truly "permanent". Some are
+caused by over-heating, some by a poorly seated card. Many
+PCI error events are caused by software bugs, e.g. DMAs to
+wild addresses or bogus split transactions due to programming
+errors. See the discussion in Documentation/arch/powerpc/eeh-pci-error-recovery.rst
+for additional detail on real-life experience of the causes of
+software errors.
+
+
+Conclusion; General Remarks
+---------------------------
+The way the callbacks are called is platform policy. A platform with
+no slot reset capability may want to just "ignore" drivers that can't
+recover (disconnect them) and try to let other cards on the same segment
+recover. Keep in mind that in most real life cases, though, there will
+be only one driver per segment.
+
+Now, a note about interrupts. If you get an interrupt and your
+device is dead or has been isolated, there is a problem :)
+The current policy is to turn this into a platform policy.
+That is, the recovery API only requires that:
+
+ - There is no guarantee that interrupt delivery can proceed from any
+ device on the segment starting from the error detection and until the
+ slot_reset callback is called, at which point interrupts are expected
+ to be fully operational.
+
+ - There is no guarantee that interrupt delivery is stopped, that is,
+ a driver that gets an interrupt after detecting an error, or that detects
+ an error within the interrupt handler such that it prevents proper
+ ack'ing of the interrupt (and thus removal of the source) should just
+ return IRQ_NOTHANDLED. It's up to the platform to deal with that
+ condition, typically by masking the IRQ source during the duration of
+ the error handling. It is expected that the platform "knows" which
+ interrupts are routed to error-management capable slots and can deal
+ with temporarily disabling that IRQ number during error processing (this
+ isn't terribly complex). That means some IRQ latency for other devices
+ sharing the interrupt, but there is simply no other way. High end
+ platforms aren't supposed to share interrupts between many devices
+ anyway :)
+
+.. note::
+
+ Implementation details for the powerpc platform are discussed in
+ the file Documentation/arch/powerpc/eeh-pci-error-recovery.rst
+
+ As of this writing, there is a growing list of device drivers with
+ patches implementing error recovery. Not all of these patches are in
+ mainline yet. These may be used as "examples":
+
+ - drivers/scsi/ipr
+ - drivers/scsi/sym53c8xx_2
+ - drivers/scsi/qla2xxx
+ - drivers/scsi/lpfc
+ - drivers/next/bnx2.c
+ - drivers/next/e100.c
+ - drivers/net/e1000
+ - drivers/net/e1000e
+ - drivers/net/ixgbe
+ - drivers/net/cxgb3
+ - drivers/net/s2io.c
+
+ The cor_error_detected() callback is invoked in handle_error_source() when
+ the error severity is "correctable". The callback is optional and allows
+ additional logging to be done if desired. See example:
+
+ - drivers/cxl/pci.c
+
+The End
+-------
diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt
deleted file mode 100644
index 0b6bb3ef449e..000000000000
--- a/Documentation/PCI/pci-error-recovery.txt
+++ /dev/null
@@ -1,413 +0,0 @@
-
- PCI Error Recovery
- ------------------
- February 2, 2006
-
- Current document maintainer:
- Linas Vepstas <linasvepstas@gmail.com>
- updated by Richard Lary <rlary@us.ibm.com>
- and Mike Mason <mmlnx@us.ibm.com> on 27-Jul-2009
-
-
-Many PCI bus controllers are able to detect a variety of hardware
-PCI errors on the bus, such as parity errors on the data and address
-buses, as well as SERR and PERR errors. Some of the more advanced
-chipsets are able to deal with these errors; these include PCI-E chipsets,
-and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
-pSeries boxes. A typical action taken is to disconnect the affected device,
-halting all I/O to it. The goal of a disconnection is to avoid system
-corruption; for example, to halt system memory corruption due to DMA's
-to "wild" addresses. Typically, a reconnection mechanism is also
-offered, so that the affected PCI device(s) are reset and put back
-into working condition. The reset phase requires coordination
-between the affected device drivers and the PCI controller chip.
-This document describes a generic API for notifying device drivers
-of a bus disconnection, and then performing error recovery.
-This API is currently implemented in the 2.6.16 and later kernels.
-
-Reporting and recovery is performed in several steps. First, when
-a PCI hardware error has resulted in a bus disconnect, that event
-is reported as soon as possible to all affected device drivers,
-including multiple instances of a device driver on multi-function
-cards. This allows device drivers to avoid deadlocking in spinloops,
-waiting for some i/o-space register to change, when it never will.
-It also gives the drivers a chance to defer incoming I/O as
-needed.
-
-Next, recovery is performed in several stages. Most of the complexity
-is forced by the need to handle multi-function devices, that is,
-devices that have multiple device drivers associated with them.
-In the first stage, each driver is allowed to indicate what type
-of reset it desires, the choices being a simple re-enabling of I/O
-or requesting a slot reset.
-
-If any driver requests a slot reset, that is what will be done.
-
-After a reset and/or a re-enabling of I/O, all drivers are
-again notified, so that they may then perform any device setup/config
-that may be required. After these have all completed, a final
-"resume normal operations" event is sent out.
-
-The biggest reason for choosing a kernel-based implementation rather
-than a user-space implementation was the need to deal with bus
-disconnects of PCI devices attached to storage media, and, in particular,
-disconnects from devices holding the root file system. If the root
-file system is disconnected, a user-space mechanism would have to go
-through a large number of contortions to complete recovery. Almost all
-of the current Linux file systems are not tolerant of disconnection
-from/reconnection to their underlying block device. By contrast,
-bus errors are easy to manage in the device driver. Indeed, most
-device drivers already handle very similar recovery procedures;
-for example, the SCSI-generic layer already provides significant
-mechanisms for dealing with SCSI bus errors and SCSI bus resets.
-
-
-Detailed Design
----------------
-Design and implementation details below, based on a chain of
-public email discussions with Ben Herrenschmidt, circa 5 April 2005.
-
-The error recovery API support is exposed to the driver in the form of
-a structure of function pointers pointed to by a new field in struct
-pci_driver. A driver that fails to provide the structure is "non-aware",
-and the actual recovery steps taken are platform dependent. The
-arch/powerpc implementation will simulate a PCI hotplug remove/add.
-
-This structure has the form:
-struct pci_error_handlers
-{
- int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
- int (*mmio_enabled)(struct pci_dev *dev);
- int (*slot_reset)(struct pci_dev *dev);
- void (*resume)(struct pci_dev *dev);
-};
-
-The possible channel states are:
-enum pci_channel_state {
- pci_channel_io_normal, /* I/O channel is in normal state */
- pci_channel_io_frozen, /* I/O to channel is blocked */
- pci_channel_io_perm_failure, /* PCI card is dead */
-};
-
-Possible return values are:
-enum pci_ers_result {
- PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */
- PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
- PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */
- PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */
- PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */
-};
-
-A driver does not have to implement all of these callbacks; however,
-if it implements any, it must implement error_detected(). If a callback
-is not implemented, the corresponding feature is considered unsupported.
-For example, if mmio_enabled() and resume() aren't there, then it
-is assumed that the driver is not doing any direct recovery and requires
-a slot reset. Typically a driver will want to know about
-a slot_reset().
-
-The actual steps taken by a platform to recover from a PCI error
-event will be platform-dependent, but will follow the general
-sequence described below.
-
-STEP 0: Error Event
--------------------
-A PCI bus error is detected by the PCI hardware. On powerpc, the slot
-is isolated, in that all I/O is blocked: all reads return 0xffffffff,
-all writes are ignored.
-
-
-STEP 1: Notification
---------------------
-Platform calls the error_detected() callback on every instance of
-every driver affected by the error.
-
-At this point, the device might not be accessible anymore, depending on
-the platform (the slot will be isolated on powerpc). The driver may
-already have "noticed" the error because of a failing I/O, but this
-is the proper "synchronization point", that is, it gives the driver
-a chance to cleanup, waiting for pending stuff (timers, whatever, etc...)
-to complete; it can take semaphores, schedule, etc... everything but
-touch the device. Within this function and after it returns, the driver
-shouldn't do any new IOs. Called in task context. This is sort of a
-"quiesce" point. See note about interrupts at the end of this doc.
-
-All drivers participating in this system must implement this call.
-The driver must return one of the following result codes:
- - PCI_ERS_RESULT_CAN_RECOVER:
- Driver returns this if it thinks it might be able to recover
- the HW by just banging IOs or if it wants to be given
- a chance to extract some diagnostic information (see
- mmio_enable, below).
- - PCI_ERS_RESULT_NEED_RESET:
- Driver returns this if it can't recover without a
- slot reset.
- - PCI_ERS_RESULT_DISCONNECT:
- Driver returns this if it doesn't want to recover at all.
-
-The next step taken will depend on the result codes returned by the
-drivers.
-
-If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER,
-then the platform should re-enable IOs on the slot (or do nothing in
-particular, if the platform doesn't isolate slots), and recovery
-proceeds to STEP 2 (MMIO Enable).
-
-If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
-then recovery proceeds to STEP 4 (Slot Reset).
-
-If the platform is unable to recover the slot, the next step
-is STEP 6 (Permanent Failure).
-
->>> The current powerpc implementation assumes that a device driver will
->>> *not* schedule or semaphore in this routine; the current powerpc
->>> implementation uses one kernel thread to notify all devices;
->>> thus, if one device sleeps/schedules, all devices are affected.
->>> Doing better requires complex multi-threaded logic in the error
->>> recovery implementation (e.g. waiting for all notification threads
->>> to "join" before proceeding with recovery.) This seems excessively
->>> complex and not worth implementing.
-
->>> The current powerpc implementation doesn't much care if the device
->>> attempts I/O at this point, or not. I/O's will fail, returning
->>> a value of 0xff on read, and writes will be dropped. If more than
->>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
->>> assumes that the device driver has gone into an infinite loop
->>> and prints an error to syslog. A reboot is then required to
->>> get the device working again.
-
-STEP 2: MMIO Enabled
--------------------
-The platform re-enables MMIO to the device (but typically not the
-DMA), and then calls the mmio_enabled() callback on all affected
-device drivers.
-
-This is the "early recovery" call. IOs are allowed again, but DMA is
-not, with some restrictions. This is NOT a callback for the driver to
-start operations again, only to peek/poke at the device, extract diagnostic
-information, if any, and eventually do things like trigger a device local
-reset or some such, but not restart operations. This callback is made if
-all drivers on a segment agree that they can try to recover and if no automatic
-link reset was performed by the HW. If the platform can't just re-enable IOs
-without a slot reset or a link reset, it will not call this callback, and
-instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
-
->>> The following is proposed; no platform implements this yet:
->>> Proposal: All I/O's should be done _synchronously_ from within
->>> this callback, errors triggered by them will be returned via
->>> the normal pci_check_whatever() API, no new error_detected()
->>> callback will be issued due to an error happening here. However,
->>> such an error might cause IOs to be re-blocked for the whole
->>> segment, and thus invalidate the recovery that other devices
->>> on the same segment might have done, forcing the whole segment
->>> into one of the next states, that is, link reset or slot reset.
-
-The driver should return one of the following result codes:
- - PCI_ERS_RESULT_RECOVERED
- Driver returns this if it thinks the device is fully
- functional and thinks it is ready to start
- normal driver operations again. There is no
- guarantee that the driver will actually be
- allowed to proceed, as another driver on the
- same segment might have failed and thus triggered a
- slot reset on platforms that support it.
-
- - PCI_ERS_RESULT_NEED_RESET
- Driver returns this if it thinks the device is not
- recoverable in its current state and it needs a slot
- reset to proceed.
-
- - PCI_ERS_RESULT_DISCONNECT
- Same as above. Total failure, no recovery even after
- reset driver dead. (To be defined more precisely)
-
-The next step taken depends on the results returned by the drivers.
-If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
-proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
-
-If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
-proceeds to STEP 4 (Slot Reset)
-
-STEP 3: Link Reset
-------------------
-The platform resets the link. This is a PCI-Express specific step
-and is done whenever a fatal error has been detected that can be
-"solved" by resetting the link.
-
-STEP 4: Slot Reset
-------------------
-
-In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
-the platform will perform a slot reset on the requesting PCI device(s).
-The actual steps taken by a platform to perform a slot reset
-will be platform-dependent. Upon completion of slot reset, the
-platform will call the device slot_reset() callback.
-
-Powerpc platforms implement two levels of slot reset:
-soft reset(default) and fundamental(optional) reset.
-
-Powerpc soft reset consists of asserting the adapter #RST line and then
-restoring the PCI BAR's and PCI configuration header to a state
-that is equivalent to what it would be after a fresh system
-power-on followed by power-on BIOS/system firmware initialization.
-Soft reset is also known as hot-reset.
-
-Powerpc fundamental reset is supported by PCI Express cards only
-and results in device's state machines, hardware logic, port states and
-configuration registers to initialize to their default conditions.
-
-For most PCI devices, a soft reset will be sufficient for recovery.
-Optional fundamental reset is provided to support a limited number
-of PCI Express devices for which a soft reset is not sufficient
-for recovery.
-
-If the platform supports PCI hotplug, then the reset might be
-performed by toggling the slot electrical power off/on.
-
-It is important for the platform to restore the PCI config space
-to the "fresh poweron" state, rather than the "last state". After
-a slot reset, the device driver will almost always use its standard
-device initialization routines, and an unusual config space setup
-may result in hung devices, kernel panics, or silent data corruption.
-
-This call gives drivers the chance to re-initialize the hardware
-(re-download firmware, etc.). At this point, the driver may assume
-that the card is in a fresh state and is fully functional. The slot
-is unfrozen and the driver has full access to PCI config space,
-memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
-will also be available.
-
-Drivers should not restart normal I/O processing operations
-at this point. If all device drivers report success on this
-callback, the platform will call resume() to complete the sequence,
-and let the driver restart normal I/O processing.
-
-A driver can still return a critical failure for this function if
-it can't get the device operational after reset. If the platform
-previously tried a soft reset, it might now try a hard reset (power
-cycle) and then call slot_reset() again. It the device still can't
-be recovered, there is nothing more that can be done; the platform
-will typically report a "permanent failure" in such a case. The
-device will be considered "dead" in this case.
-
-Drivers for multi-function cards will need to coordinate among
-themselves as to which driver instance will perform any "one-shot"
-or global device initialization. For example, the Symbios sym53cxx2
-driver performs device init only from PCI function 0:
-
-+ if (PCI_FUNC(pdev->devfn) == 0)
-+ sym_reset_scsi_bus(np, 0);
-
- Result codes:
- - PCI_ERS_RESULT_DISCONNECT
- Same as above.
-
-Drivers for PCI Express cards that require a fundamental reset must
-set the needs_freset bit in the pci_dev structure in their probe function.
-For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
-PCI card types:
-
-+ /* Set EEH reset type to fundamental if required by hba */
-+ if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
-+ pdev->needs_freset = 1;
-+
-
-Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
-Failure).
-
->>> The current powerpc implementation does not try a power-cycle
->>> reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
->>> However, it probably should.
-
-
-STEP 5: Resume Operations
--------------------------
-The platform will call the resume() callback on all affected device
-drivers if all drivers on the segment have returned
-PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks.
-The goal of this callback is to tell the driver to restart activity,
-that everything is back and running. This callback does not return
-a result code.
-
-At this point, if a new error happens, the platform will restart
-a new error recovery sequence.
-
-STEP 6: Permanent Failure
--------------------------
-A "permanent failure" has occurred, and the platform cannot recover
-the device. The platform will call error_detected() with a
-pci_channel_state value of pci_channel_io_perm_failure.
-
-The device driver should, at this point, assume the worst. It should
-cancel all pending I/O, refuse all new I/O, returning -EIO to
-higher layers. The device driver should then clean up all of its
-memory and remove itself from kernel operations, much as it would
-during system shutdown.
-
-The platform will typically notify the system operator of the
-permanent failure in some way. If the device is hotplug-capable,
-the operator will probably want to remove and replace the device.
-Note, however, not all failures are truly "permanent". Some are
-caused by over-heating, some by a poorly seated card. Many
-PCI error events are caused by software bugs, e.g. DMA's to
-wild addresses or bogus split transactions due to programming
-errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
-for additional detail on real-life experience of the causes of
-software errors.
-
-
-Conclusion; General Remarks
----------------------------
-The way the callbacks are called is platform policy. A platform with
-no slot reset capability may want to just "ignore" drivers that can't
-recover (disconnect them) and try to let other cards on the same segment
-recover. Keep in mind that in most real life cases, though, there will
-be only one driver per segment.
-
-Now, a note about interrupts. If you get an interrupt and your
-device is dead or has been isolated, there is a problem :)
-The current policy is to turn this into a platform policy.
-That is, the recovery API only requires that:
-
- - There is no guarantee that interrupt delivery can proceed from any
-device on the segment starting from the error detection and until the
-slot_reset callback is called, at which point interrupts are expected
-to be fully operational.
-
- - There is no guarantee that interrupt delivery is stopped, that is,
-a driver that gets an interrupt after detecting an error, or that detects
-an error within the interrupt handler such that it prevents proper
-ack'ing of the interrupt (and thus removal of the source) should just
-return IRQ_NOTHANDLED. It's up to the platform to deal with that
-condition, typically by masking the IRQ source during the duration of
-the error handling. It is expected that the platform "knows" which
-interrupts are routed to error-management capable slots and can deal
-with temporarily disabling that IRQ number during error processing (this
-isn't terribly complex). That means some IRQ latency for other devices
-sharing the interrupt, but there is simply no other way. High end
-platforms aren't supposed to share interrupts between many devices
-anyway :)
-
->>> Implementation details for the powerpc platform are discussed in
->>> the file Documentation/powerpc/eeh-pci-error-recovery.txt
-
->>> As of this writing, there is a growing list of device drivers with
->>> patches implementing error recovery. Not all of these patches are in
->>> mainline yet. These may be used as "examples":
->>>
->>> drivers/scsi/ipr
->>> drivers/scsi/sym53c8xx_2
->>> drivers/scsi/qla2xxx
->>> drivers/scsi/lpfc
->>> drivers/next/bnx2.c
->>> drivers/next/e100.c
->>> drivers/net/e1000
->>> drivers/net/e1000e
->>> drivers/net/ixgb
->>> drivers/net/ixgbe
->>> drivers/net/cxgb3
->>> drivers/net/s2io.c
->>> drivers/net/qlge
-
-The End
--------
diff --git a/Documentation/PCI/pci-iov-howto.rst b/Documentation/PCI/pci-iov-howto.rst
new file mode 100644
index 000000000000..27d35933cea2
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.rst
@@ -0,0 +1,171 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+====================================
+PCI Express I/O Virtualization Howto
+====================================
+
+:Copyright: |copy| 2009 Intel Corporation
+:Authors: - Yu Zhao <yu.zhao@intel.com>
+ - Donald Dutile <ddutile@redhat.com>
+
+Overview
+========
+
+What is SR-IOV
+--------------
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+User Guide
+==========
+
+How can I enable SR-IOV capability
+----------------------------------
+
+Multiple methods are available for SR-IOV enablement.
+In the first method, the device driver (PF driver) will control the
+enabling and disabling of the capability via API provided by SR-IOV core.
+If the hardware has SR-IOV capability, loading its PF driver would
+enable it and all VFs associated with the PF. Some PF drivers require
+a module parameter to be set to determine the number of VFs to enable.
+In the second method, a write to the sysfs file sriov_numvfs will
+enable and disable the VFs associated with a PCIe PF. This method
+enables per-PF, VF enable/disable values versus the first method,
+which applies to all PFs of the same device. Additionally, the
+PCI SRIOV core support ensures that enable/disable operations are
+valid to reduce duplication in multiple drivers for the same
+checks, e.g., check numvfs == 0 if enabling VFs, ensure
+numvfs <= totalvfs.
+The second method is the recommended method for new/future VF devices.
+
+How can I use the Virtual Functions
+-----------------------------------
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+Developer Guide
+===============
+
+SR-IOV API
+----------
+
+To enable SR-IOV capability:
+
+(a) For the first method, in the driver::
+
+ int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+
+'nr_virtfn' is number of VFs to be enabled.
+
+(b) For the second method, from sysfs::
+
+ echo 'nr_virtfn' > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+To disable SR-IOV capability:
+
+(a) For the first method, in the driver::
+
+ void pci_disable_sriov(struct pci_dev *dev);
+
+(b) For the second method, from sysfs::
+
+ echo 0 > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+To enable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. This is the
+default behavior.
+::
+
+ echo 1 > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
+To disable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. Updating this
+entry will not affect VFs which are already probed.
+::
+
+ echo 0 > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
+Usage example
+-------------
+
+Following piece of code illustrates the usage of the SR-IOV API.
+::
+
+ static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ {
+ pci_enable_sriov(dev, NR_VIRTFN);
+
+ ...
+
+ return 0;
+ }
+
+ static void dev_remove(struct pci_dev *dev)
+ {
+ pci_disable_sriov(dev);
+
+ ...
+ }
+
+ static int dev_suspend(struct device *dev)
+ {
+ ...
+
+ return 0;
+ }
+
+ static int dev_resume(struct device *dev)
+ {
+ ...
+
+ return 0;
+ }
+
+ static void dev_shutdown(struct pci_dev *dev)
+ {
+ ...
+ }
+
+ static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
+ {
+ if (numvfs > 0) {
+ ...
+ pci_enable_sriov(dev, numvfs);
+ ...
+ return numvfs;
+ }
+ if (numvfs == 0) {
+ ....
+ pci_disable_sriov(dev);
+ ...
+ return 0;
+ }
+ }
+
+ static struct pci_driver dev_driver = {
+ .name = "SR-IOV Physical Function driver",
+ .id_table = dev_id_table,
+ .probe = dev_probe,
+ .remove = dev_remove,
+ .driver.pm = &dev_pm_ops,
+ .shutdown = dev_shutdown,
+ .sriov_configure = dev_sriov_configure,
+ };
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
deleted file mode 100644
index d2a84151e99c..000000000000
--- a/Documentation/PCI/pci-iov-howto.txt
+++ /dev/null
@@ -1,147 +0,0 @@
- PCI Express I/O Virtualization Howto
- Copyright (C) 2009 Intel Corporation
- Yu Zhao <yu.zhao@intel.com>
-
- Update: November 2012
- -- sysfs-based SRIOV enable-/disable-ment
- Donald Dutile <ddutile@redhat.com>
-
-1. Overview
-
-1.1 What is SR-IOV
-
-Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
-capability which makes one physical device appear as multiple virtual
-devices. The physical device is referred to as Physical Function (PF)
-while the virtual devices are referred to as Virtual Functions (VF).
-Allocation of the VF can be dynamically controlled by the PF via
-registers encapsulated in the capability. By default, this feature is
-not enabled and the PF behaves as traditional PCIe device. Once it's
-turned on, each VF's PCI configuration space can be accessed by its own
-Bus, Device and Function Number (Routing ID). And each VF also has PCI
-Memory Space, which is used to map its register set. VF device driver
-operates on the register set so it can be functional and appear as a
-real existing PCI device.
-
-2. User Guide
-
-2.1 How can I enable SR-IOV capability
-
-Multiple methods are available for SR-IOV enablement.
-In the first method, the device driver (PF driver) will control the
-enabling and disabling of the capability via API provided by SR-IOV core.
-If the hardware has SR-IOV capability, loading its PF driver would
-enable it and all VFs associated with the PF. Some PF drivers require
-a module parameter to be set to determine the number of VFs to enable.
-In the second method, a write to the sysfs file sriov_numvfs will
-enable and disable the VFs associated with a PCIe PF. This method
-enables per-PF, VF enable/disable values versus the first method,
-which applies to all PFs of the same device. Additionally, the
-PCI SRIOV core support ensures that enable/disable operations are
-valid to reduce duplication in multiple drivers for the same
-checks, e.g., check numvfs == 0 if enabling VFs, ensure
-numvfs <= totalvfs.
-The second method is the recommended method for new/future VF devices.
-
-2.2 How can I use the Virtual Functions
-
-The VF is treated as hot-plugged PCI devices in the kernel, so they
-should be able to work in the same way as real PCI devices. The VF
-requires device driver that is same as a normal PCI device's.
-
-3. Developer Guide
-
-3.1 SR-IOV API
-
-To enable SR-IOV capability:
-(a) For the first method, in the driver:
- int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
- 'nr_virtfn' is number of VFs to be enabled.
-(b) For the second method, from sysfs:
- echo 'nr_virtfn' > \
- /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
-
-To disable SR-IOV capability:
-(a) For the first method, in the driver:
- void pci_disable_sriov(struct pci_dev *dev);
-(b) For the second method, from sysfs:
- echo 0 > \
- /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
-
-To enable auto probing VFs by a compatible driver on the host, run
-command below before enabling SR-IOV capabilities. This is the
-default behavior.
- echo 1 > \
- /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
-
-To disable auto probing VFs by a compatible driver on the host, run
-command below before enabling SR-IOV capabilities. Updating this
-entry will not affect VFs which are already probed.
- echo 0 > \
- /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
-
-3.2 Usage example
-
-Following piece of code illustrates the usage of the SR-IOV API.
-
-static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
- pci_enable_sriov(dev, NR_VIRTFN);
-
- ...
-
- return 0;
-}
-
-static void dev_remove(struct pci_dev *dev)
-{
- pci_disable_sriov(dev);
-
- ...
-}
-
-static int dev_suspend(struct pci_dev *dev, pm_message_t state)
-{
- ...
-
- return 0;
-}
-
-static int dev_resume(struct pci_dev *dev)
-{
- ...
-
- return 0;
-}
-
-static void dev_shutdown(struct pci_dev *dev)
-{
- ...
-}
-
-static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
-{
- if (numvfs > 0) {
- ...
- pci_enable_sriov(dev, numvfs);
- ...
- return numvfs;
- }
- if (numvfs == 0) {
- ....
- pci_disable_sriov(dev);
- ...
- return 0;
- }
-}
-
-static struct pci_driver dev_driver = {
- .name = "SR-IOV Physical Function driver",
- .id_table = dev_id_table,
- .probe = dev_probe,
- .remove = dev_remove,
- .suspend = dev_suspend,
- .resume = dev_resume,
- .shutdown = dev_shutdown,
- .sriov_configure = dev_sriov_configure,
-};
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
new file mode 100644
index 000000000000..f4d2662871ab
--- /dev/null
+++ b/Documentation/PCI/pci.rst
@@ -0,0 +1,578 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+How To Write Linux PCI Drivers
+==============================
+
+:Authors: - Martin Mares <mj@ucw.cz>
+ - Grant Grundler <grundler@parisc-linux.org>
+
+The world of PCI is vast and full of (mostly unpleasant) surprises.
+Since each CPU architecture implements different chip-sets and PCI devices
+have different requirements (erm, "features"), the result is the PCI support
+in the Linux kernel is not as trivial as one would wish. This short paper
+tries to introduce all potential driver authors to Linux APIs for
+PCI device drivers.
+
+A more complete resource is the third edition of "Linux Device Drivers"
+by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
+LDD3 is available for free (under Creative Commons License) from:
+https://lwn.net/Kernel/LDD3/.
+
+However, keep in mind that all documents are subject to "bit rot".
+Refer to the source code if things are not working as described here.
+
+Please send questions/comments/patches about Linux PCI API to the
+"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
+
+
+Structure of PCI drivers
+========================
+PCI drivers "discover" PCI devices in a system via pci_register_driver().
+Actually, it's the other way around. When the PCI generic code discovers
+a new device, the driver with a matching "description" will be notified.
+Details on this below.
+
+pci_register_driver() leaves most of the probing for devices to
+the PCI layer and supports online insertion/removal of devices [thus
+supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver].
+pci_register_driver() call requires passing in a table of function
+pointers and thus dictates the high level structure of a driver.
+
+Once the driver knows about a PCI device and takes ownership, the
+driver generally needs to perform the following initialization:
+
+ - Enable the device
+ - Request MMIO/IOP resources
+ - Set the DMA mask size (for both coherent and streaming DMA)
+ - Allocate and initialize shared control data (pci_allocate_coherent())
+ - Access device configuration space (if needed)
+ - Register IRQ handler (request_irq())
+ - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+ - Enable DMA/processing engines
+
+When done using the device, and perhaps the module needs to be unloaded,
+the driver needs to take the following steps:
+
+ - Disable the device from generating IRQs
+ - Release the IRQ (free_irq())
+ - Stop all DMA activity
+ - Release DMA buffers (both streaming and coherent)
+ - Unregister from other subsystems (e.g. scsi or netdev)
+ - Release MMIO/IOP resources
+ - Disable the device
+
+Most of these topics are covered in the following sections.
+For the rest look at LDD3 or <linux/pci.h> .
+
+If the PCI subsystem is not configured (CONFIG_PCI is not set), most of
+the PCI functions described below are defined as inline functions either
+completely empty or just returning an appropriate error codes to avoid
+lots of ifdefs in the drivers.
+
+
+pci_register_driver() call
+==========================
+
+PCI device drivers call ``pci_register_driver()`` during their
+initialization with a pointer to a structure describing the driver
+(``struct pci_driver``):
+
+.. kernel-doc:: include/linux/pci.h
+ :functions: pci_driver
+
+The ID table is an array of ``struct pci_device_id`` entries ending with an
+all-zero entry. Definitions with static const are generally preferred.
+
+.. kernel-doc:: include/linux/mod_devicetable.h
+ :functions: pci_device_id
+
+Most drivers only need ``PCI_DEVICE()`` or ``PCI_DEVICE_CLASS()`` to set up
+a pci_device_id table.
+
+New PCI IDs may be added to a device driver pci_ids table at runtime
+as shown below::
+
+ echo "vendor device subvendor subdevice class class_mask driver_data" > \
+ /sys/bus/pci/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The vendor and device fields are mandatory, the others are optional. Users
+need pass only as many optional fields as necessary:
+
+ - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
+ - class and classmask fields default to 0
+ - driver_data defaults to 0UL.
+ - override_only field defaults to 0.
+
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCI devices listed in its (newly updated) pci_ids list.
+
+When the driver exits, it just calls pci_unregister_driver() and the PCI layer
+automatically calls the remove hook for all devices handled by the driver.
+
+
+"Attributes" for driver functions/data
+--------------------------------------
+
+Please mark the initialization and cleanup functions where appropriate
+(the corresponding macros are defined in <linux/init.h>):
+
+ ====== =================================================
+ __init Initialization code. Thrown away after the driver
+ initializes.
+ __exit Exit code. Ignored for non-modular drivers.
+ ====== =================================================
+
+Tips on when/where to use the above attributes:
+ - The module_init()/module_exit() functions (and all
+ initialization functions called _only_ from these)
+ should be marked __init/__exit.
+
+ - Do not mark the struct pci_driver.
+
+ - Do NOT mark a function if you are not sure which mark to use.
+ Better to not mark the function than mark the function wrong.
+
+
+How to find PCI devices manually
+================================
+
+PCI drivers should have a really good reason for not using the
+pci_register_driver() interface to search for PCI devices.
+The main reason PCI devices are controlled by multiple drivers
+is because one PCI device implements several different HW services.
+E.g. combined serial/parallel port/floppy controller.
+
+A manual search may be performed using the following constructs:
+
+Searching by vendor and device ID::
+
+ struct pci_dev *dev = NULL;
+ while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
+ configure_device(dev);
+
+Searching by class ID (iterate in a similar way)::
+
+ pci_get_class(CLASS_ID, dev)
+
+Searching by both vendor/device and subsystem vendor/device ID::
+
+ pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
+
+You can use the constant PCI_ANY_ID as a wildcard replacement for
+VENDOR_ID or DEVICE_ID. This allows searching for any device from a
+specific vendor, for example.
+
+These functions are hotplug-safe. They increment the reference count on
+the pci_dev that they return. You must eventually (possibly at module unload)
+decrement the reference count on these devices by calling pci_dev_put().
+
+
+Device Initialization Steps
+===========================
+
+As noted in the introduction, most PCI drivers need the following steps
+for device initialization:
+
+ - Enable the device
+ - Request MMIO/IOP resources
+ - Set the DMA mask size (for both coherent and streaming DMA)
+ - Allocate and initialize shared control data (pci_allocate_coherent())
+ - Access device configuration space (if needed)
+ - Register IRQ handler (request_irq())
+ - Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+ - Enable DMA/processing engines.
+
+The driver can access PCI config space registers at any time.
+(Well, almost. When running BIST, config space can go away...but
+that will just result in a PCI Bus Master Abort and config reads
+will return garbage).
+
+
+Enable the PCI device
+---------------------
+Before touching any device registers, the driver needs to enable
+the PCI device by calling pci_enable_device(). This will:
+
+ - wake up the device if it was in suspended state,
+ - allocate I/O and memory regions of the device (if BIOS did not),
+ - allocate an IRQ (if BIOS did not).
+
+.. note::
+ pci_enable_device() can fail! Check the return value.
+
+.. warning::
+ OS BUG: we don't check resource allocations before enabling those
+ resources. The sequence would make more sense if we called
+ pci_request_resources() before calling pci_enable_device().
+ Currently, the device drivers can't detect the bug when two
+ devices have been allocated the same range. This is not a common
+ problem and unlikely to get fixed soon.
+
+ This has been discussed before but not changed as of 2.6.19:
+ https://lore.kernel.org/r/20060302180025.GC28895@flint.arm.linux.org.uk/
+
+
+pci_set_master() will enable DMA by setting the bus master bit
+in the PCI_COMMAND register. It also fixes the latency timer value if
+it's set to something bogus by the BIOS. pci_clear_master() will
+disable DMA by clearing the bus master bit.
+
+If the PCI device can use the PCI Memory-Write-Invalidate transaction,
+call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval
+and also ensures that the cache line size register is set correctly.
+Check the return value of pci_set_mwi() as not all architectures
+or chip-sets may support Memory-Write-Invalidate. Alternatively,
+if Mem-Wr-Inval would be nice to have but is not required, call
+pci_try_set_mwi() to have the system do its best effort at enabling
+Mem-Wr-Inval.
+
+
+Request MMIO/IOP resources
+--------------------------
+Memory (MMIO), and I/O port addresses should NOT be read directly
+from the PCI device config space. Use the values in the pci_dev structure
+as the PCI "bus address" might have been remapped to a "host physical"
+address by the arch/chip-set specific kernel support.
+
+See Documentation/driver-api/io-mapping.rst for how to access device registers
+or device memory.
+
+The device driver needs to call pci_request_region() to verify
+no other device is already using the same address resource.
+Conversely, drivers should call pci_release_region() AFTER
+calling pci_disable_device().
+The idea is to prevent two devices colliding on the same address range.
+
+.. tip::
+ See OS BUG comment above. Currently (2.6.19), The driver can only
+ determine MMIO and IO Port resource availability _after_ calling
+ pci_enable_device().
+
+Generic flavors of pci_request_region() are request_mem_region()
+(for MMIO ranges) and request_region() (for IO Port ranges).
+Use these for address resources that are not described by "normal" PCI
+BARs.
+
+Also see pci_request_selected_regions() below.
+
+
+Set the DMA mask size
+---------------------
+.. note::
+ If anything below doesn't make sense, please refer to
+ Documentation/core-api/dma-api.rst. This section is just a reminder that
+ drivers need to indicate DMA capabilities of the device and is not
+ an authoritative source for DMA interfaces.
+
+While all drivers should explicitly indicate the DMA capability
+(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
+32-bit bus master capability for streaming data need the driver
+to "register" this capability by calling dma_set_mask() with
+appropriate parameters. In general this allows more efficient DMA
+on systems where System RAM exists above 4G _physical_ address.
+
+Drivers for all PCI-X and PCIe compliant devices must call
+dma_set_mask() as they are 64-bit DMA devices.
+
+Similarly, drivers must also "register" this capability if the device
+can directly address "coherent memory" in System RAM above 4G physical
+address by calling dma_set_coherent_mask().
+Again, this includes drivers for all PCI-X and PCIe compliant devices.
+Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
+64-bit DMA capable for payload ("streaming") data but not control
+("coherent") data.
+
+
+Setup shared control data
+-------------------------
+Once the DMA masks are set, the driver can allocate "coherent" (a.k.a. shared)
+memory. See Documentation/core-api/dma-api.rst for a full description of
+the DMA APIs. This section is just a reminder that it needs to be done
+before enabling DMA on the device.
+
+
+Initialize device registers
+---------------------------
+Some drivers will need specific "capability" fields programmed
+or other "vendor specific" register initialized or reset.
+E.g. clearing pending interrupts.
+
+
+Register IRQ handler
+--------------------
+While calling request_irq() is the last step described here,
+this is often just another intermediate step to initialize a device.
+This step can often be deferred until the device is opened for use.
+
+All interrupt handlers for IRQ lines should be registered with IRQF_SHARED
+and use the devid to map IRQs to devices (remember that all PCI IRQ lines
+can be shared).
+
+request_irq() will associate an interrupt handler and device handle
+with an interrupt number. Historically interrupt numbers represent
+IRQ lines which run from the PCI device to the Interrupt controller.
+With MSI and MSI-X (more below) the interrupt number is a CPU "vector".
+
+request_irq() also enables the interrupt. Make sure the device is
+quiesced and does not have any interrupts pending before registering
+the interrupt handler.
+
+MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts"
+which deliver interrupts to the CPU via a DMA write to a Local APIC.
+The fundamental difference between MSI and MSI-X is how multiple
+"vectors" get allocated. MSI requires contiguous blocks of vectors
+while MSI-X can allocate several individual ones.
+
+MSI capability can be enabled by calling pci_alloc_irq_vectors() with the
+PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This
+causes the PCI support to program CPU vector data into the PCI device
+capability registers. Many architectures, chip-sets, or BIOSes do NOT
+support MSI or MSI-X and a call to pci_alloc_irq_vectors with just
+the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always
+specify PCI_IRQ_INTX as well.
+
+Drivers that have different interrupt handlers for MSI/MSI-X and
+legacy INTx should chose the right one based on the msi_enabled
+and msix_enabled flags in the pci_dev structure after calling
+pci_alloc_irq_vectors.
+
+There are (at least) two really good reasons for using MSI:
+
+1) MSI is an exclusive interrupt vector by definition.
+ This means the interrupt handler doesn't have to verify
+ its device caused the interrupt.
+
+2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed
+ to be visible to the host CPU(s) when the MSI is delivered. This
+ is important for both data coherency and avoiding stale control data.
+ This guarantee allows the driver to omit MMIO reads to flush
+ the DMA stream.
+
+See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
+of MSI/MSI-X usage.
+
+
+PCI device shutdown
+===================
+
+When a PCI device driver is being unloaded, most of the following
+steps need to be performed:
+
+ - Disable the device from generating IRQs
+ - Release the IRQ (free_irq())
+ - Stop all DMA activity
+ - Release DMA buffers (both streaming and coherent)
+ - Unregister from other subsystems (e.g. scsi or netdev)
+ - Disable device from responding to MMIO/IO Port addresses
+ - Release MMIO/IO Port resource(s)
+
+
+Stop IRQs on the device
+-----------------------
+How to do this is chip/device specific. If it's not done, it opens
+the possibility of a "screaming interrupt" if (and only if)
+the IRQ is shared with another device.
+
+When the shared IRQ handler is "unhooked", the remaining devices
+using the same IRQ line will still need the IRQ enabled. Thus if the
+"unhooked" device asserts IRQ line, the system will respond assuming
+it was one of the remaining devices asserted the IRQ line. Since none
+of the other devices will handle the IRQ, the system will "hang" until
+it decides the IRQ isn't going to get handled and masks the IRQ (100,000
+iterations later). Once the shared IRQ is masked, the remaining devices
+will stop functioning properly. Not a nice situation.
+
+This is another reason to use MSI or MSI-X if it's available.
+MSI and MSI-X are defined to be exclusive interrupts and thus
+are not susceptible to the "screaming interrupt" problem.
+
+
+Release the IRQ
+---------------
+Once the device is quiesced (no more IRQs), one can call free_irq().
+This function will return control once any pending IRQs are handled,
+"unhook" the drivers IRQ handler from that IRQ, and finally release
+the IRQ if no one else is using it.
+
+
+Stop all DMA activity
+---------------------
+It's extremely important to stop all DMA operations BEFORE attempting
+to deallocate DMA control data. Failure to do so can result in memory
+corruption, hangs, and on some chip-sets a hard crash.
+
+Stopping DMA after stopping the IRQs can avoid races where the
+IRQ handler might restart DMA engines.
+
+While this step sounds obvious and trivial, several "mature" drivers
+didn't get this step right in the past.
+
+
+Release DMA buffers
+-------------------
+Once DMA is stopped, clean up streaming DMA first.
+I.e. unmap data buffers and return buffers to "upstream"
+owners if there is one.
+
+Then clean up "coherent" buffers which contain the control data.
+
+See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
+
+
+Unregister from other subsystems
+--------------------------------
+Most low level PCI device drivers support some other subsystem
+like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
+driver isn't losing resources from that other subsystem.
+If this happens, typically the symptom is an Oops (panic) when
+the subsystem attempts to call into a driver that has been unloaded.
+
+
+Disable Device from responding to MMIO/IO Port addresses
+--------------------------------------------------------
+io_unmap() MMIO or IO Port resources and then call pci_disable_device().
+This is the symmetric opposite of pci_enable_device().
+Do not access device registers after calling pci_disable_device().
+
+
+Release MMIO/IO Port Resource(s)
+--------------------------------
+Call pci_release_region() to mark the MMIO or IO Port range as available.
+Failure to do so usually results in the inability to reload the driver.
+
+
+How to access PCI config space
+==============================
+
+You can use `pci_(read|write)_config_(byte|word|dword)` to access the config
+space of a device represented by `struct pci_dev *`. All these functions return
+0 when successful or an error code (`PCIBIOS_...`) which can be translated to a
+text string by pcibios_strerror. Most drivers expect that accesses to valid PCI
+devices don't fail.
+
+If you don't have a struct pci_dev available, you can call
+`pci_bus_(read|write)_config_(byte|word|dword)` to access a given device
+and function on that bus.
+
+If you access fields in the standard portion of the config header, please
+use symbolic names of locations and bits declared in <linux/pci.h>.
+
+If you need to access Extended PCI Capability registers, just call
+pci_find_capability() for the particular capability and it will find the
+corresponding register block for you.
+
+
+Other interesting functions
+===========================
+
+============================= ================================================
+pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain,
+ bus and slot and number. If the device is
+ found, its reference count is increased.
+pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3)
+pci_find_capability() Find specified capability in device's capability
+ list.
+pci_resource_start() Returns bus start address for a given PCI region
+pci_resource_end() Returns bus end address for a given PCI region
+pci_resource_len() Returns the byte length of a PCI region
+pci_set_drvdata() Set private driver data pointer for a pci_dev
+pci_get_drvdata() Return private driver data pointer for a pci_dev
+pci_set_mwi() Enable Memory-Write-Invalidate transactions.
+pci_clear_mwi() Disable Memory-Write-Invalidate transactions.
+============================= ================================================
+
+
+Miscellaneous hints
+===================
+
+When displaying PCI device names to the user (for example when a driver wants
+to tell the user what card has it found), please use pci_name(pci_dev).
+
+Always refer to the PCI devices by a pointer to the pci_dev structure.
+All PCI layer functions use this identification and it's the only
+reasonable one. Don't use bus/slot/function numbers except for very
+special purposes -- on systems with multiple primary buses their semantics
+can be pretty complex.
+
+Don't try to turn on Fast Back to Back writes in your driver. All devices
+on the bus need to be capable of doing it, so this is something which needs
+to be handled by platform and generic code, not individual drivers.
+
+
+Vendor and device identifications
+=================================
+
+Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
+are shared across multiple drivers. You can add private definitions in
+your driver if they're helpful, or just use plain hex constants.
+
+The device IDs are arbitrary hex numbers (vendor controlled) and normally used
+only in a single location, the pci_device_id table.
+
+Please DO submit new vendor/device IDs to https://pci-ids.ucw.cz/.
+There's a mirror of the pci.ids file at https://github.com/pciutils/pciids.
+
+
+Obsolete functions
+==================
+
+There are several functions which you might come across when trying to
+port an old driver to the new PCI interface. They are no longer present
+in the kernel as they aren't compatible with hotplug or PCI domains or
+having sane locking.
+
+================= ===========================================
+pci_find_device() Superseded by pci_get_device()
+pci_find_subsys() Superseded by pci_get_subsys()
+pci_find_slot() Superseded by pci_get_domain_bus_and_slot()
+pci_get_slot() Superseded by pci_get_domain_bus_and_slot()
+================= ===========================================
+
+The alternative is the traditional PCI device driver that walks PCI
+device lists. This is still possible but discouraged.
+
+
+MMIO Space and "Write Posting"
+==============================
+
+Converting a driver from using I/O Port space to using MMIO space
+often requires some additional changes. Specifically, "write posting"
+needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2)
+already do this. I/O Port space guarantees write transactions reach the PCI
+device before the CPU can continue. Writes to MMIO space allow the CPU
+to continue before the transaction reaches the PCI device. HW weenies
+call this "Write Posting" because the write completion is "posted" to
+the CPU before the transaction has reached its destination.
+
+Thus, timing sensitive code should add readl() where the CPU is
+expected to wait before doing other work. The classic "bit banging"
+sequence works fine for I/O Port space::
+
+ for (i = 8; --i; val >>= 1) {
+ outb(val & 1, ioport_reg); /* write bit */
+ udelay(10);
+ }
+
+The same sequence for MMIO space should be::
+
+ for (i = 8; --i; val >>= 1) {
+ writeb(val & 1, mmio_reg); /* write bit */
+ readb(safe_mmio_reg); /* flush posted write */
+ udelay(10);
+ }
+
+It is important that "safe_mmio_reg" not have any side effects that
+interferes with the correct operation of the device.
+
+Another case to watch out for is when resetting a PCI device. Use PCI
+Configuration space reads to flush the writel(). This will gracefully
+handle the PCI master abort on all platforms if the PCI device is
+expected to not respond to a readl(). Most x86 platforms will allow
+MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
+(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
deleted file mode 100644
index badb26ac33dc..000000000000
--- a/Documentation/PCI/pci.txt
+++ /dev/null
@@ -1,636 +0,0 @@
-
- How To Write Linux PCI Drivers
-
- by Martin Mares <mj@ucw.cz> on 07-Feb-2000
- updated by Grant Grundler <grundler@parisc-linux.org> on 23-Dec-2006
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The world of PCI is vast and full of (mostly unpleasant) surprises.
-Since each CPU architecture implements different chip-sets and PCI devices
-have different requirements (erm, "features"), the result is the PCI support
-in the Linux kernel is not as trivial as one would wish. This short paper
-tries to introduce all potential driver authors to Linux APIs for
-PCI device drivers.
-
-A more complete resource is the third edition of "Linux Device Drivers"
-by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
-LDD3 is available for free (under Creative Commons License) from:
-
- http://lwn.net/Kernel/LDD3/
-
-However, keep in mind that all documents are subject to "bit rot".
-Refer to the source code if things are not working as described here.
-
-Please send questions/comments/patches about Linux PCI API to the
-"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
-
-
-
-0. Structure of PCI drivers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-PCI drivers "discover" PCI devices in a system via pci_register_driver().
-Actually, it's the other way around. When the PCI generic code discovers
-a new device, the driver with a matching "description" will be notified.
-Details on this below.
-
-pci_register_driver() leaves most of the probing for devices to
-the PCI layer and supports online insertion/removal of devices [thus
-supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver].
-pci_register_driver() call requires passing in a table of function
-pointers and thus dictates the high level structure of a driver.
-
-Once the driver knows about a PCI device and takes ownership, the
-driver generally needs to perform the following initialization:
-
- Enable the device
- Request MMIO/IOP resources
- Set the DMA mask size (for both coherent and streaming DMA)
- Allocate and initialize shared control data (pci_allocate_coherent())
- Access device configuration space (if needed)
- Register IRQ handler (request_irq())
- Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
- Enable DMA/processing engines
-
-When done using the device, and perhaps the module needs to be unloaded,
-the driver needs to take the follow steps:
- Disable the device from generating IRQs
- Release the IRQ (free_irq())
- Stop all DMA activity
- Release DMA buffers (both streaming and coherent)
- Unregister from other subsystems (e.g. scsi or netdev)
- Release MMIO/IOP resources
- Disable the device
-
-Most of these topics are covered in the following sections.
-For the rest look at LDD3 or <linux/pci.h> .
-
-If the PCI subsystem is not configured (CONFIG_PCI is not set), most of
-the PCI functions described below are defined as inline functions either
-completely empty or just returning an appropriate error codes to avoid
-lots of ifdefs in the drivers.
-
-
-
-1. pci_register_driver() call
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-PCI device drivers call pci_register_driver() during their
-initialization with a pointer to a structure describing the driver
-(struct pci_driver):
-
- field name Description
- ---------- ------------------------------------------------------
- id_table Pointer to table of device ID's the driver is
- interested in. Most drivers should export this
- table using MODULE_DEVICE_TABLE(pci,...).
-
- probe This probing function gets called (during execution
- of pci_register_driver() for already existing
- devices or later if a new device gets inserted) for
- all PCI devices which match the ID table and are not
- "owned" by the other drivers yet. This function gets
- passed a "struct pci_dev *" for each device whose
- entry in the ID table matches the device. The probe
- function returns zero when the driver chooses to
- take "ownership" of the device or an error code
- (negative number) otherwise.
- The probe function always gets called from process
- context, so it can sleep.
-
- remove The remove() function gets called whenever a device
- being handled by this driver is removed (either during
- deregistration of the driver or when it's manually
- pulled out of a hot-pluggable slot).
- The remove function always gets called from process
- context, so it can sleep.
-
- suspend Put device into low power state.
- suspend_late Put device into low power state.
-
- resume_early Wake device from low power state.
- resume Wake device from low power state.
-
- (Please see Documentation/power/pci.txt for descriptions
- of PCI Power Management and the related functions.)
-
- shutdown Hook into reboot_notifier_list (kernel/sys.c).
- Intended to stop any idling DMA operations.
- Useful for enabling wake-on-lan (NIC) or changing
- the power state of a device before reboot.
- e.g. drivers/net/e100.c.
-
- err_handler See Documentation/PCI/pci-error-recovery.txt
-
-
-The ID table is an array of struct pci_device_id entries ending with an
-all-zero entry. Definitions with static const are generally preferred.
-
-Each entry consists of:
-
- vendor,device Vendor and device ID to match (or PCI_ANY_ID)
-
- subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID)
- subdevice,
-
- class Device class, subclass, and "interface" to match.
- See Appendix D of the PCI Local Bus Spec or
- include/linux/pci_ids.h for a full list of classes.
- Most drivers do not need to specify class/class_mask
- as vendor/device is normally sufficient.
-
- class_mask limit which sub-fields of the class field are compared.
- See drivers/scsi/sym53c8xx_2/ for example of usage.
-
- driver_data Data private to the driver.
- Most drivers don't need to use driver_data field.
- Best practice is to use driver_data as an index
- into a static list of equivalent device types,
- instead of using it as a pointer.
-
-
-Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up
-a pci_device_id table.
-
-New PCI IDs may be added to a device driver pci_ids table at runtime
-as shown below:
-
-echo "vendor device subvendor subdevice class class_mask driver_data" > \
-/sys/bus/pci/drivers/{driver}/new_id
-
-All fields are passed in as hexadecimal values (no leading 0x).
-The vendor and device fields are mandatory, the others are optional. Users
-need pass only as many optional fields as necessary:
- o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
- o class and classmask fields default to 0
- o driver_data defaults to 0UL.
-
-Note that driver_data must match the value used by any of the pci_device_id
-entries defined in the driver. This makes the driver_data field mandatory
-if all the pci_device_id entries have a non-zero driver_data value.
-
-Once added, the driver probe routine will be invoked for any unclaimed
-PCI devices listed in its (newly updated) pci_ids list.
-
-When the driver exits, it just calls pci_unregister_driver() and the PCI layer
-automatically calls the remove hook for all devices handled by the driver.
-
-
-1.1 "Attributes" for driver functions/data
-
-Please mark the initialization and cleanup functions where appropriate
-(the corresponding macros are defined in <linux/init.h>):
-
- __init Initialization code. Thrown away after the driver
- initializes.
- __exit Exit code. Ignored for non-modular drivers.
-
-Tips on when/where to use the above attributes:
- o The module_init()/module_exit() functions (and all
- initialization functions called _only_ from these)
- should be marked __init/__exit.
-
- o Do not mark the struct pci_driver.
-
- o Do NOT mark a function if you are not sure which mark to use.
- Better to not mark the function than mark the function wrong.
-
-
-
-2. How to find PCI devices manually
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-PCI drivers should have a really good reason for not using the
-pci_register_driver() interface to search for PCI devices.
-The main reason PCI devices are controlled by multiple drivers
-is because one PCI device implements several different HW services.
-E.g. combined serial/parallel port/floppy controller.
-
-A manual search may be performed using the following constructs:
-
-Searching by vendor and device ID:
-
- struct pci_dev *dev = NULL;
- while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
- configure_device(dev);
-
-Searching by class ID (iterate in a similar way):
-
- pci_get_class(CLASS_ID, dev)
-
-Searching by both vendor/device and subsystem vendor/device ID:
-
- pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
-
-You can use the constant PCI_ANY_ID as a wildcard replacement for
-VENDOR_ID or DEVICE_ID. This allows searching for any device from a
-specific vendor, for example.
-
-These functions are hotplug-safe. They increment the reference count on
-the pci_dev that they return. You must eventually (possibly at module unload)
-decrement the reference count on these devices by calling pci_dev_put().
-
-
-
-3. Device Initialization Steps
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-As noted in the introduction, most PCI drivers need the following steps
-for device initialization:
-
- Enable the device
- Request MMIO/IOP resources
- Set the DMA mask size (for both coherent and streaming DMA)
- Allocate and initialize shared control data (pci_allocate_coherent())
- Access device configuration space (if needed)
- Register IRQ handler (request_irq())
- Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
- Enable DMA/processing engines.
-
-The driver can access PCI config space registers at any time.
-(Well, almost. When running BIST, config space can go away...but
-that will just result in a PCI Bus Master Abort and config reads
-will return garbage).
-
-
-3.1 Enable the PCI device
-~~~~~~~~~~~~~~~~~~~~~~~~~
-Before touching any device registers, the driver needs to enable
-the PCI device by calling pci_enable_device(). This will:
- o wake up the device if it was in suspended state,
- o allocate I/O and memory regions of the device (if BIOS did not),
- o allocate an IRQ (if BIOS did not).
-
-NOTE: pci_enable_device() can fail! Check the return value.
-
-[ OS BUG: we don't check resource allocations before enabling those
- resources. The sequence would make more sense if we called
- pci_request_resources() before calling pci_enable_device().
- Currently, the device drivers can't detect the bug when when two
- devices have been allocated the same range. This is not a common
- problem and unlikely to get fixed soon.
-
- This has been discussed before but not changed as of 2.6.19:
- http://lkml.org/lkml/2006/3/2/194
-]
-
-pci_set_master() will enable DMA by setting the bus master bit
-in the PCI_COMMAND register. It also fixes the latency timer value if
-it's set to something bogus by the BIOS. pci_clear_master() will
-disable DMA by clearing the bus master bit.
-
-If the PCI device can use the PCI Memory-Write-Invalidate transaction,
-call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval
-and also ensures that the cache line size register is set correctly.
-Check the return value of pci_set_mwi() as not all architectures
-or chip-sets may support Memory-Write-Invalidate. Alternatively,
-if Mem-Wr-Inval would be nice to have but is not required, call
-pci_try_set_mwi() to have the system do its best effort at enabling
-Mem-Wr-Inval.
-
-
-3.2 Request MMIO/IOP resources
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Memory (MMIO), and I/O port addresses should NOT be read directly
-from the PCI device config space. Use the values in the pci_dev structure
-as the PCI "bus address" might have been remapped to a "host physical"
-address by the arch/chip-set specific kernel support.
-
-See Documentation/io-mapping.txt for how to access device registers
-or device memory.
-
-The device driver needs to call pci_request_region() to verify
-no other device is already using the same address resource.
-Conversely, drivers should call pci_release_region() AFTER
-calling pci_disable_device().
-The idea is to prevent two devices colliding on the same address range.
-
-[ See OS BUG comment above. Currently (2.6.19), The driver can only
- determine MMIO and IO Port resource availability _after_ calling
- pci_enable_device(). ]
-
-Generic flavors of pci_request_region() are request_mem_region()
-(for MMIO ranges) and request_region() (for IO Port ranges).
-Use these for address resources that are not described by "normal" PCI
-BARs.
-
-Also see pci_request_selected_regions() below.
-
-
-3.3 Set the DMA mask size
-~~~~~~~~~~~~~~~~~~~~~~~~~
-[ If anything below doesn't make sense, please refer to
- Documentation/DMA-API.txt. This section is just a reminder that
- drivers need to indicate DMA capabilities of the device and is not
- an authoritative source for DMA interfaces. ]
-
-While all drivers should explicitly indicate the DMA capability
-(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
-32-bit bus master capability for streaming data need the driver
-to "register" this capability by calling pci_set_dma_mask() with
-appropriate parameters. In general this allows more efficient DMA
-on systems where System RAM exists above 4G _physical_ address.
-
-Drivers for all PCI-X and PCIe compliant devices must call
-pci_set_dma_mask() as they are 64-bit DMA devices.
-
-Similarly, drivers must also "register" this capability if the device
-can directly address "consistent memory" in System RAM above 4G physical
-address by calling pci_set_consistent_dma_mask().
-Again, this includes drivers for all PCI-X and PCIe compliant devices.
-Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
-64-bit DMA capable for payload ("streaming") data but not control
-("consistent") data.
-
-
-3.4 Setup shared control data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
-memory. See Documentation/DMA-API.txt for a full description of
-the DMA APIs. This section is just a reminder that it needs to be done
-before enabling DMA on the device.
-
-
-3.5 Initialize device registers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Some drivers will need specific "capability" fields programmed
-or other "vendor specific" register initialized or reset.
-E.g. clearing pending interrupts.
-
-
-3.6 Register IRQ handler
-~~~~~~~~~~~~~~~~~~~~~~~~
-While calling request_irq() is the last step described here,
-this is often just another intermediate step to initialize a device.
-This step can often be deferred until the device is opened for use.
-
-All interrupt handlers for IRQ lines should be registered with IRQF_SHARED
-and use the devid to map IRQs to devices (remember that all PCI IRQ lines
-can be shared).
-
-request_irq() will associate an interrupt handler and device handle
-with an interrupt number. Historically interrupt numbers represent
-IRQ lines which run from the PCI device to the Interrupt controller.
-With MSI and MSI-X (more below) the interrupt number is a CPU "vector".
-
-request_irq() also enables the interrupt. Make sure the device is
-quiesced and does not have any interrupts pending before registering
-the interrupt handler.
-
-MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts"
-which deliver interrupts to the CPU via a DMA write to a Local APIC.
-The fundamental difference between MSI and MSI-X is how multiple
-"vectors" get allocated. MSI requires contiguous blocks of vectors
-while MSI-X can allocate several individual ones.
-
-MSI capability can be enabled by calling pci_alloc_irq_vectors() with the
-PCI_IRQ_MSI and/or PCI_IRQ_MSIX flags before calling request_irq(). This
-causes the PCI support to program CPU vector data into the PCI device
-capability registers. Many architectures, chip-sets, or BIOSes do NOT
-support MSI or MSI-X and a call to pci_alloc_irq_vectors with just
-the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always
-specify PCI_IRQ_LEGACY as well.
-
-Drivers that have different interrupt handlers for MSI/MSI-X and
-legacy INTx should chose the right one based on the msi_enabled
-and msix_enabled flags in the pci_dev structure after calling
-pci_alloc_irq_vectors.
-
-There are (at least) two really good reasons for using MSI:
-1) MSI is an exclusive interrupt vector by definition.
- This means the interrupt handler doesn't have to verify
- its device caused the interrupt.
-
-2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed
- to be visible to the host CPU(s) when the MSI is delivered. This
- is important for both data coherency and avoiding stale control data.
- This guarantee allows the driver to omit MMIO reads to flush
- the DMA stream.
-
-See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
-of MSI/MSI-X usage.
-
-
-
-4. PCI device shutdown
-~~~~~~~~~~~~~~~~~~~~~~~
-
-When a PCI device driver is being unloaded, most of the following
-steps need to be performed:
-
- Disable the device from generating IRQs
- Release the IRQ (free_irq())
- Stop all DMA activity
- Release DMA buffers (both streaming and consistent)
- Unregister from other subsystems (e.g. scsi or netdev)
- Disable device from responding to MMIO/IO Port addresses
- Release MMIO/IO Port resource(s)
-
-
-4.1 Stop IRQs on the device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-How to do this is chip/device specific. If it's not done, it opens
-the possibility of a "screaming interrupt" if (and only if)
-the IRQ is shared with another device.
-
-When the shared IRQ handler is "unhooked", the remaining devices
-using the same IRQ line will still need the IRQ enabled. Thus if the
-"unhooked" device asserts IRQ line, the system will respond assuming
-it was one of the remaining devices asserted the IRQ line. Since none
-of the other devices will handle the IRQ, the system will "hang" until
-it decides the IRQ isn't going to get handled and masks the IRQ (100,000
-iterations later). Once the shared IRQ is masked, the remaining devices
-will stop functioning properly. Not a nice situation.
-
-This is another reason to use MSI or MSI-X if it's available.
-MSI and MSI-X are defined to be exclusive interrupts and thus
-are not susceptible to the "screaming interrupt" problem.
-
-
-4.2 Release the IRQ
-~~~~~~~~~~~~~~~~~~~
-Once the device is quiesced (no more IRQs), one can call free_irq().
-This function will return control once any pending IRQs are handled,
-"unhook" the drivers IRQ handler from that IRQ, and finally release
-the IRQ if no one else is using it.
-
-
-4.3 Stop all DMA activity
-~~~~~~~~~~~~~~~~~~~~~~~~~
-It's extremely important to stop all DMA operations BEFORE attempting
-to deallocate DMA control data. Failure to do so can result in memory
-corruption, hangs, and on some chip-sets a hard crash.
-
-Stopping DMA after stopping the IRQs can avoid races where the
-IRQ handler might restart DMA engines.
-
-While this step sounds obvious and trivial, several "mature" drivers
-didn't get this step right in the past.
-
-
-4.4 Release DMA buffers
-~~~~~~~~~~~~~~~~~~~~~~~
-Once DMA is stopped, clean up streaming DMA first.
-I.e. unmap data buffers and return buffers to "upstream"
-owners if there is one.
-
-Then clean up "consistent" buffers which contain the control data.
-
-See Documentation/DMA-API.txt for details on unmapping interfaces.
-
-
-4.5 Unregister from other subsystems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Most low level PCI device drivers support some other subsystem
-like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
-driver isn't losing resources from that other subsystem.
-If this happens, typically the symptom is an Oops (panic) when
-the subsystem attempts to call into a driver that has been unloaded.
-
-
-4.6 Disable Device from responding to MMIO/IO Port addresses
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-io_unmap() MMIO or IO Port resources and then call pci_disable_device().
-This is the symmetric opposite of pci_enable_device().
-Do not access device registers after calling pci_disable_device().
-
-
-4.7 Release MMIO/IO Port Resource(s)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Call pci_release_region() to mark the MMIO or IO Port range as available.
-Failure to do so usually results in the inability to reload the driver.
-
-
-
-5. How to access PCI config space
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You can use pci_(read|write)_config_(byte|word|dword) to access the config
-space of a device represented by struct pci_dev *. All these functions return 0
-when successful or an error code (PCIBIOS_...) which can be translated to a text
-string by pcibios_strerror. Most drivers expect that accesses to valid PCI
-devices don't fail.
-
-If you don't have a struct pci_dev available, you can call
-pci_bus_(read|write)_config_(byte|word|dword) to access a given device
-and function on that bus.
-
-If you access fields in the standard portion of the config header, please
-use symbolic names of locations and bits declared in <linux/pci.h>.
-
-If you need to access Extended PCI Capability registers, just call
-pci_find_capability() for the particular capability and it will find the
-corresponding register block for you.
-
-
-
-6. Other interesting functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain,
- bus and slot and number. If the device is
- found, its reference count is increased.
-pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3)
-pci_find_capability() Find specified capability in device's capability
- list.
-pci_resource_start() Returns bus start address for a given PCI region
-pci_resource_end() Returns bus end address for a given PCI region
-pci_resource_len() Returns the byte length of a PCI region
-pci_set_drvdata() Set private driver data pointer for a pci_dev
-pci_get_drvdata() Return private driver data pointer for a pci_dev
-pci_set_mwi() Enable Memory-Write-Invalidate transactions.
-pci_clear_mwi() Disable Memory-Write-Invalidate transactions.
-
-
-
-7. Miscellaneous hints
-~~~~~~~~~~~~~~~~~~~~~~
-
-When displaying PCI device names to the user (for example when a driver wants
-to tell the user what card has it found), please use pci_name(pci_dev).
-
-Always refer to the PCI devices by a pointer to the pci_dev structure.
-All PCI layer functions use this identification and it's the only
-reasonable one. Don't use bus/slot/function numbers except for very
-special purposes -- on systems with multiple primary buses their semantics
-can be pretty complex.
-
-Don't try to turn on Fast Back to Back writes in your driver. All devices
-on the bus need to be capable of doing it, so this is something which needs
-to be handled by platform and generic code, not individual drivers.
-
-
-
-8. Vendor and device identifications
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
-are shared across multiple drivers. You can add private definitions in
-your driver if they're helpful, or just use plain hex constants.
-
-The device IDs are arbitrary hex numbers (vendor controlled) and normally used
-only in a single location, the pci_device_id table.
-
-Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/.
-There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
-and https://github.com/pciutils/pciids.
-
-
-
-9. Obsolete functions
-~~~~~~~~~~~~~~~~~~~~~
-
-There are several functions which you might come across when trying to
-port an old driver to the new PCI interface. They are no longer present
-in the kernel as they aren't compatible with hotplug or PCI domains or
-having sane locking.
-
-pci_find_device() Superseded by pci_get_device()
-pci_find_subsys() Superseded by pci_get_subsys()
-pci_find_slot() Superseded by pci_get_domain_bus_and_slot()
-pci_get_slot() Superseded by pci_get_domain_bus_and_slot()
-
-
-The alternative is the traditional PCI device driver that walks PCI
-device lists. This is still possible but discouraged.
-
-
-
-10. MMIO Space and "Write Posting"
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Converting a driver from using I/O Port space to using MMIO space
-often requires some additional changes. Specifically, "write posting"
-needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2)
-already do this. I/O Port space guarantees write transactions reach the PCI
-device before the CPU can continue. Writes to MMIO space allow the CPU
-to continue before the transaction reaches the PCI device. HW weenies
-call this "Write Posting" because the write completion is "posted" to
-the CPU before the transaction has reached its destination.
-
-Thus, timing sensitive code should add readl() where the CPU is
-expected to wait before doing other work. The classic "bit banging"
-sequence works fine for I/O Port space:
-
- for (i = 8; --i; val >>= 1) {
- outb(val & 1, ioport_reg); /* write bit */
- udelay(10);
- }
-
-The same sequence for MMIO space should be:
-
- for (i = 8; --i; val >>= 1) {
- writeb(val & 1, mmio_reg); /* write bit */
- readb(safe_mmio_reg); /* flush posted write */
- udelay(10);
- }
-
-It is important that "safe_mmio_reg" not have any side effects that
-interferes with the correct operation of the device.
-
-Another case to watch out for is when resetting a PCI device. Use PCI
-Configuration space reads to flush the writel(). This will gracefully
-handle the PCI master abort on all platforms if the PCI device is
-expected to not respond to a readl(). Most x86 platforms will allow
-MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
-(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
-
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
new file mode 100644
index 000000000000..3210c4792978
--- /dev/null
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -0,0 +1,257 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================================
+The PCI Express Advanced Error Reporting Driver Guide HOWTO
+===========================================================
+
+:Authors: - T. Long Nguyen <tom.l.nguyen@intel.com>
+ - Yanmin Zhang <yanmin.zhang@intel.com>
+
+:Copyright: |copy| 2006 Intel Corporation
+
+Overview
+===========
+
+About this guide
+----------------
+
+This guide describes the basics of the PCI Express (PCIe) Advanced Error
+Reporting (AER) driver and provides information on how to use it, as
+well as how to enable the drivers of Endpoint devices to conform with
+the PCIe AER driver.
+
+
+What is the PCIe AER Driver?
+----------------------------
+
+PCIe error signaling can occur on the PCIe link itself
+or on behalf of transactions initiated on the link. PCIe
+defines two error reporting paradigms: the baseline capability and
+the Advanced Error Reporting capability. The baseline capability is
+required of all PCIe components providing a minimum defined
+set of error reporting requirements. Advanced Error Reporting
+capability is implemented with a PCIe Advanced Error Reporting
+extended capability structure providing more robust error reporting.
+
+The PCIe AER driver provides the infrastructure to support PCIe Advanced
+Error Reporting capability. The PCIe AER driver provides three basic
+functions:
+
+ - Gathers the comprehensive error information if errors occurred.
+ - Reports error to the users.
+ - Performs error recovery actions.
+
+The AER driver only attaches to Root Ports and RCECs that support the PCIe
+AER capability.
+
+
+User Guide
+==========
+
+Include the PCIe AER Root Driver into the Linux Kernel
+------------------------------------------------------
+
+The PCIe AER driver is a Root Port service driver attached
+via the PCIe Port Bus driver. If a user wants to use it, the driver
+must be compiled. It is enabled with CONFIG_PCIEAER, which
+depends on CONFIG_PCIEPORTBUS.
+
+Load PCIe AER Root Driver
+-------------------------
+
+Some systems have AER support in firmware. Enabling Linux AER support at
+the same time the firmware handles AER would result in unpredictable
+behavior. Therefore, Linux does not handle AER events unless the firmware
+grants AER control to the OS via the ACPI _OSC method. See the PCI Firmware
+Specification for details regarding _OSC usage.
+
+AER error output
+----------------
+
+When a PCIe AER error is captured, an error message will be output to
+console. If it's a correctable error, it is output as a warning message.
+Otherwise, it is printed as an error. So users could choose different
+log level to filter out correctable error messages.
+
+Below shows an example::
+
+ 0000:50:00.0: PCIe Bus Error: severity=Uncorrectable (Fatal), type=Transaction Layer, (Requester ID)
+ 0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000
+ 0000:50:00.0: [20] UnsupReq (First)
+ 0000:50:00.0: TLP Header: 0x04000001 0x00200a03 0x05010000 0x00050100
+
+In the example, 'Requester ID' means the ID of the device that sent
+the error message to the Root Port. Please refer to PCIe specs for other
+fields.
+
+AER Ratelimits
+--------------
+
+Since error messages can be generated for each transaction, we may see
+large volumes of errors reported. To prevent spammy devices from flooding
+the console/stalling execution, messages are throttled by device and error
+type (correctable vs. non-fatal uncorrectable). Fatal errors, including
+DPC errors, are not ratelimited.
+
+AER uses the default ratelimit of DEFAULT_RATELIMIT_BURST (10 events) over
+DEFAULT_RATELIMIT_INTERVAL (5 seconds).
+
+Ratelimits are exposed in the form of sysfs attributes and configurable.
+See Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
+
+AER Statistics / Counters
+-------------------------
+
+When PCIe AER errors are captured, the counters / statistics are also exposed
+in the form of sysfs attributes which are documented at
+Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
+
+Developer Guide
+===============
+
+To enable error recovery, a software driver must provide callbacks.
+
+To support AER better, developers need to understand how AER works.
+
+PCIe errors are classified into two types: correctable errors
+and uncorrectable errors. This classification is based on the impact
+of those errors, which may result in degraded performance or function
+failure.
+
+Correctable errors pose no impacts on the functionality of the
+interface. The PCIe protocol can recover without any software
+intervention or any loss of data. These errors are detected and
+corrected by hardware.
+
+Unlike correctable errors, uncorrectable
+errors impact functionality of the interface. Uncorrectable errors
+can cause a particular transaction or a particular PCIe link
+to be unreliable. Depending on those error conditions, uncorrectable
+errors are further classified into non-fatal errors and fatal errors.
+Non-fatal errors cause the particular transaction to be unreliable,
+but the PCIe link itself is fully functional. Fatal errors, on
+the other hand, cause the link to be unreliable.
+
+When PCIe error reporting is enabled, a device will automatically send an
+error message to the Root Port above it when it captures
+an error. The Root Port, upon receiving an error reporting message,
+internally processes and logs the error message in its AER
+Capability structure. Error information being logged includes storing
+the error reporting agent's Requester ID into the Error Source
+Identification Registers and setting the error bits of the Root Error
+Status Register accordingly. If AER error reporting is enabled in the Root
+Error Command Register, the Root Port generates an interrupt when an
+error is detected.
+
+Note that the errors as described above are related to the PCIe
+hierarchy and links. These errors do not include any device specific
+errors because device specific errors will still get sent directly to
+the device driver.
+
+Provide callbacks
+-----------------
+
+PCI error-recovery callbacks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The PCIe AER Root driver uses error callbacks to coordinate
+with downstream device drivers associated with a hierarchy in question
+when performing error recovery actions.
+
+Data struct pci_driver has a pointer, err_handler, to point to
+pci_error_handlers who consists of a couple of callback function
+pointers. The AER driver follows the rules defined in
+pci-error-recovery.rst except PCIe-specific parts (see
+below). Please refer to pci-error-recovery.rst for detailed
+definitions of the callbacks.
+
+The sections below specify when to call the error callback functions.
+
+Correctable errors
+~~~~~~~~~~~~~~~~~~
+
+Correctable errors pose no impacts on the functionality of
+the interface. The PCIe protocol can recover without any
+software intervention or any loss of data. These errors do not
+require any recovery actions. The AER driver clears the device's
+correctable error status register accordingly and logs these errors.
+
+Uncorrectable (non-fatal and fatal) errors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The AER driver performs a Secondary Bus Reset to recover from
+uncorrectable errors. The reset is applied at the port above
+the originating device: If the originating device is an Endpoint,
+only the Endpoint is reset. If on the other hand the originating
+device has subordinate devices, those are all affected by the
+reset as well.
+
+If the originating device is a Root Complex Integrated Endpoint,
+there's no port above where a Secondary Bus Reset could be applied.
+In this case, the AER driver instead applies a Function Level Reset.
+
+If an error message indicates a non-fatal error, performing a reset
+at upstream is not required. The AER driver calls error_detected(dev,
+pci_channel_io_normal) to all drivers associated within a hierarchy in
+question. For example::
+
+ Endpoint <==> Downstream Port B <==> Upstream Port A <==> Root Port
+
+If Upstream Port A captures an AER error, the hierarchy consists of
+Downstream Port B and Endpoint.
+
+A driver may return PCI_ERS_RESULT_CAN_RECOVER,
+PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
+whether it can recover without a reset, considers the device unrecoverable
+or needs a reset for recovery. If all affected drivers agree that they can
+recover without a reset, it is skipped. Should one driver request a reset,
+it overrides all other drivers.
+
+If an error message indicates a fatal error, kernel will broadcast
+error_detected(dev, pci_channel_io_frozen) to all drivers within
+a hierarchy in question. Then, performing a reset at upstream is
+necessary. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER
+to indicate that recovery without a reset is possible, the error
+handling goes to mmio_enabled, but afterwards a reset is still
+performed.
+
+In other words, for non-fatal errors, drivers may opt in to a reset.
+But for fatal errors, they cannot opt out of a reset, based on the
+assumption that the link is unreliable.
+
+Frequently Asked Questions
+--------------------------
+
+Q:
+ What happens if a PCIe device driver does not provide an
+ error recovery handler (pci_driver->err_handler is equal to NULL)?
+
+A:
+ The devices attached with the driver won't be recovered.
+ The kernel will print out informational messages to identify
+ unrecoverable devices.
+
+
+Software error injection
+========================
+
+Debugging PCIe AER error recovery code is quite difficult because it
+is hard to trigger real hardware errors. Software based error
+injection can be used to fake various kinds of PCIe errors.
+
+First you should enable PCIe AER software error injection in kernel
+configuration, that is, following item should be in your .config.
+
+CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
+
+After reboot with new kernel or insert the module, a device file named
+/dev/aer_inject should be created.
+
+Then, you need a user space tool named aer-inject, which can be gotten
+from:
+
+ https://github.com/intel/aer-inject.git
+
+More information about aer-inject can be found in the document in
+its source code.
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
deleted file mode 100644
index 48ce7903e3c6..000000000000
--- a/Documentation/PCI/pcieaer-howto.txt
+++ /dev/null
@@ -1,267 +0,0 @@
- The PCI Express Advanced Error Reporting Driver Guide HOWTO
- T. Long Nguyen <tom.l.nguyen@intel.com>
- Yanmin Zhang <yanmin.zhang@intel.com>
- 07/29/2006
-
-
-1. Overview
-
-1.1 About this guide
-
-This guide describes the basics of the PCI Express Advanced Error
-Reporting (AER) driver and provides information on how to use it, as
-well as how to enable the drivers of endpoint devices to conform with
-PCI Express AER driver.
-
-1.2 Copyright (C) Intel Corporation 2006.
-
-1.3 What is the PCI Express AER Driver?
-
-PCI Express error signaling can occur on the PCI Express link itself
-or on behalf of transactions initiated on the link. PCI Express
-defines two error reporting paradigms: the baseline capability and
-the Advanced Error Reporting capability. The baseline capability is
-required of all PCI Express components providing a minimum defined
-set of error reporting requirements. Advanced Error Reporting
-capability is implemented with a PCI Express advanced error reporting
-extended capability structure providing more robust error reporting.
-
-The PCI Express AER driver provides the infrastructure to support PCI
-Express Advanced Error Reporting capability. The PCI Express AER
-driver provides three basic functions:
-
-- Gathers the comprehensive error information if errors occurred.
-- Reports error to the users.
-- Performs error recovery actions.
-
-AER driver only attaches root ports which support PCI-Express AER
-capability.
-
-
-2. User Guide
-
-2.1 Include the PCI Express AER Root Driver into the Linux Kernel
-
-The PCI Express AER Root driver is a Root Port service driver attached
-to the PCI Express Port Bus driver. If a user wants to use it, the driver
-has to be compiled. Option CONFIG_PCIEAER supports this capability. It
-depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
-CONFIG_PCIEAER = y.
-
-2.2 Load PCI Express AER Root Driver
-
-Some systems have AER support in firmware. Enabling Linux AER support at
-the same time the firmware handles AER may result in unpredictable
-behavior. Therefore, Linux does not handle AER events unless the firmware
-grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
-Specification for details regarding _OSC usage.
-
-2.3 AER error output
-
-When a PCIe AER error is captured, an error message will be output to
-console. If it's a correctable error, it is output as a warning.
-Otherwise, it is printed as an error. So users could choose different
-log level to filter out correctable error messages.
-
-Below shows an example:
-0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
-0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000
-0000:50:00.0: [20] Unsupported Request (First)
-0000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100
-
-In the example, 'Requester ID' means the ID of the device who sends
-the error message to root port. Pls. refer to pci express specs for
-other fields.
-
-2.4 AER Statistics / Counters
-
-When PCIe AER errors are captured, the counters / statistics are also exposed
-in the form of sysfs attributes which are documented at
-Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
-
-3. Developer Guide
-
-To enable AER aware support requires a software driver to configure
-the AER capability structure within its device and to provide callbacks.
-
-To support AER better, developers need understand how AER does work
-firstly.
-
-PCI Express errors are classified into two types: correctable errors
-and uncorrectable errors. This classification is based on the impacts
-of those errors, which may result in degraded performance or function
-failure.
-
-Correctable errors pose no impacts on the functionality of the
-interface. The PCI Express protocol can recover without any software
-intervention or any loss of data. These errors are detected and
-corrected by hardware. Unlike correctable errors, uncorrectable
-errors impact functionality of the interface. Uncorrectable errors
-can cause a particular transaction or a particular PCI Express link
-to be unreliable. Depending on those error conditions, uncorrectable
-errors are further classified into non-fatal errors and fatal errors.
-Non-fatal errors cause the particular transaction to be unreliable,
-but the PCI Express link itself is fully functional. Fatal errors, on
-the other hand, cause the link to be unreliable.
-
-When AER is enabled, a PCI Express device will automatically send an
-error message to the PCIe root port above it when the device captures
-an error. The Root Port, upon receiving an error reporting message,
-internally processes and logs the error message in its PCI Express
-capability structure. Error information being logged includes storing
-the error reporting agent's requestor ID into the Error Source
-Identification Registers and setting the error bits of the Root Error
-Status Register accordingly. If AER error reporting is enabled in Root
-Error Command Register, the Root Port generates an interrupt if an
-error is detected.
-
-Note that the errors as described above are related to the PCI Express
-hierarchy and links. These errors do not include any device specific
-errors because device specific errors will still get sent directly to
-the device driver.
-
-3.1 Configure the AER capability structure
-
-AER aware drivers of PCI Express component need change the device
-control registers to enable AER. They also could change AER registers,
-including mask and severity registers. Helper function
-pci_enable_pcie_error_reporting could be used to enable AER. See
-section 3.3.
-
-3.2. Provide callbacks
-
-3.2.1 callback reset_link to reset pci express link
-
-This callback is used to reset the pci express physical link when a
-fatal error happens. The root port aer service driver provides a
-default reset_link function, but different upstream ports might
-have different specifications to reset pci express link, so all
-upstream ports should provide their own reset_link functions.
-
-In struct pcie_port_service_driver, a new pointer, reset_link, is
-added.
-
-pci_ers_result_t (*reset_link) (struct pci_dev *dev);
-
-Section 3.2.2.2 provides more detailed info on when to call
-reset_link.
-
-3.2.2 PCI error-recovery callbacks
-
-The PCI Express AER Root driver uses error callbacks to coordinate
-with downstream device drivers associated with a hierarchy in question
-when performing error recovery actions.
-
-Data struct pci_driver has a pointer, err_handler, to point to
-pci_error_handlers who consists of a couple of callback function
-pointers. AER driver follows the rules defined in
-pci-error-recovery.txt except pci express specific parts (e.g.
-reset_link). Pls. refer to pci-error-recovery.txt for detailed
-definitions of the callbacks.
-
-Below sections specify when to call the error callback functions.
-
-3.2.2.1 Correctable errors
-
-Correctable errors pose no impacts on the functionality of
-the interface. The PCI Express protocol can recover without any
-software intervention or any loss of data. These errors do not
-require any recovery actions. The AER driver clears the device's
-correctable error status register accordingly and logs these errors.
-
-3.2.2.2 Non-correctable (non-fatal and fatal) errors
-
-If an error message indicates a non-fatal error, performing link reset
-at upstream is not required. The AER driver calls error_detected(dev,
-pci_channel_io_normal) to all drivers associated within a hierarchy in
-question. for example,
-EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort.
-If Upstream port A captures an AER error, the hierarchy consists of
-Downstream port B and EndPoint.
-
-A driver may return PCI_ERS_RESULT_CAN_RECOVER,
-PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
-whether it can recover or the AER driver calls mmio_enabled as next.
-
-If an error message indicates a fatal error, kernel will broadcast
-error_detected(dev, pci_channel_io_frozen) to all drivers within
-a hierarchy in question. Then, performing link reset at upstream is
-necessary. As different kinds of devices might use different approaches
-to reset link, AER port service driver is required to provide the
-function to reset link. Firstly, kernel looks for if the upstream
-component has an aer driver. If it has, kernel uses the reset_link
-callback of the aer driver. If the upstream component has no aer driver
-and the port is downstream port, we will perform a hot reset as the
-default by setting the Secondary Bus Reset bit of the Bridge Control
-register associated with the downstream port. As for upstream ports,
-they should provide their own aer service drivers with reset_link
-function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
-reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
-to mmio_enabled.
-
-3.3 helper functions
-
-3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-pci_enable_pcie_error_reporting enables the device to send error
-messages to root port when an error is detected. Note that devices
-don't enable the error reporting by default, so device drivers need
-call this function to enable it.
-
-3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
-pci_disable_pcie_error_reporting disables the device to send error
-messages to root port when an error is detected.
-
-3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
-pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
-error status register.
-
-3.4 Frequent Asked Questions
-
-Q: What happens if a PCI Express device driver does not provide an
-error recovery handler (pci_driver->err_handler is equal to NULL)?
-
-A: The devices attached with the driver won't be recovered. If the
-error is fatal, kernel will print out warning messages. Please refer
-to section 3 for more information.
-
-Q: What happens if an upstream port service driver does not provide
-callback reset_link?
-
-A: Fatal error recovery will fail if the errors are reported by the
-upstream ports who are attached by the service driver.
-
-Q: How does this infrastructure deal with driver that is not PCI
-Express aware?
-
-A: This infrastructure calls the error callback functions of the
-driver when an error happens. But if the driver is not aware of
-PCI Express, the device might not report its own errors to root
-port.
-
-Q: What modifications will that driver need to make it compatible
-with the PCI Express AER Root driver?
-
-A: It could call the helper functions to enable AER in devices and
-cleanup uncorrectable status register. Pls. refer to section 3.3.
-
-
-4. Software error injection
-
-Debugging PCIe AER error recovery code is quite difficult because it
-is hard to trigger real hardware errors. Software based error
-injection can be used to fake various kinds of PCIe errors.
-
-First you should enable PCIe AER software error injection in kernel
-configuration, that is, following item should be in your .config.
-
-CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
-
-After reboot with new kernel or insert the module, a device file named
-/dev/aer_inject should be created.
-
-Then, you need a user space tool named aer-inject, which can be gotten
-from:
- https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
-
-More information about aer-inject can be found in the document comes
-with its source code.
diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst
new file mode 100644
index 000000000000..375d9ce171f6
--- /dev/null
+++ b/Documentation/PCI/pciebus-howto.rst
@@ -0,0 +1,228 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================
+The PCI Express Port Bus Driver Guide HOWTO
+===========================================
+
+:Author: Tom L Nguyen tom.l.nguyen@intel.com 11/03/2004
+:Copyright: |copy| 2004 Intel Corporation
+
+About this guide
+================
+
+This guide describes the basics of the PCI Express Port Bus driver
+and provides information on how to enable the service drivers to
+register/unregister with the PCI Express Port Bus Driver.
+
+
+What is the PCI Express Port Bus Driver
+=======================================
+
+A PCI Express Port is a logical PCI-PCI Bridge structure. There
+are two types of PCI Express Port: the Root Port and the Switch
+Port. The Root Port originates a PCI Express link from a PCI Express
+Root Complex and the Switch Port connects PCI Express links to
+internal logical PCI buses. The Switch Port, which has its secondary
+bus representing the switch's internal routing logic, is called the
+switch's Upstream Port. The switch's Downstream Port is bridging from
+switch's internal routing bus to a bus representing the downstream
+PCI Express link from the PCI Express Switch.
+
+A PCI Express Port can provide up to four distinct functions,
+referred to in this document as services, depending on its port type.
+PCI Express Port's services include native hotplug support (HP),
+power management event support (PME), advanced error reporting
+support (AER), and virtual channel support (VC). These services may
+be handled by a single complex driver or be individually distributed
+and handled by corresponding service drivers.
+
+Why use the PCI Express Port Bus Driver?
+========================================
+
+In existing Linux kernels, the Linux Device Driver Model allows a
+physical device to be handled by only a single driver. The PCI
+Express Port is a PCI-PCI Bridge device with multiple distinct
+services. To maintain a clean and simple solution each service
+may have its own software service driver. In this case several
+service drivers will compete for a single PCI-PCI Bridge device.
+For example, if the PCI Express Root Port native hotplug service
+driver is loaded first, it claims a PCI-PCI Bridge Root Port. The
+kernel therefore does not load other service drivers for that Root
+Port. In other words, it is impossible to have multiple service
+drivers load and run on a PCI-PCI Bridge device simultaneously
+using the current driver model.
+
+To enable multiple service drivers running simultaneously requires
+having a PCI Express Port Bus driver, which manages all populated
+PCI Express Ports and distributes all provided service requests
+to the corresponding service drivers as required. Some key
+advantages of using the PCI Express Port Bus driver are listed below:
+
+ - Allow multiple service drivers to run simultaneously on
+ a PCI-PCI Bridge Port device.
+
+ - Allow service drivers implemented in an independent
+ staged approach.
+
+ - Allow one service driver to run on multiple PCI-PCI Bridge
+ Port devices.
+
+ - Manage and distribute resources of a PCI-PCI Bridge Port
+ device to requested service drivers.
+
+Configuring the PCI Express Port Bus Driver vs. Service Drivers
+===============================================================
+
+Including the PCI Express Port Bus Driver Support into the Kernel
+-----------------------------------------------------------------
+
+Including the PCI Express Port Bus driver depends on whether the PCI
+Express support is included in the kernel config. The kernel will
+automatically include the PCI Express Port Bus driver as a kernel
+driver when the PCI Express support is enabled in the kernel.
+
+Enabling Service Driver Support
+-------------------------------
+
+PCI device drivers are implemented based on Linux Device Driver Model.
+All service drivers are PCI device drivers. As discussed above, it is
+impossible to load any service driver once the kernel has loaded the
+PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver
+Model requires some minimal changes on existing service drivers that
+imposes no impact on the functionality of existing service drivers.
+
+A service driver is required to use the two APIs shown below to
+register its service with the PCI Express Port Bus driver (see
+section 5.2.1 & 5.2.2). It is important that a service driver
+initializes the pcie_port_service_driver data structure, included in
+header file /include/linux/pcieport_if.h, before calling these APIs.
+Failure to do so will result an identity mismatch, which prevents
+the PCI Express Port Bus driver from loading a service driver.
+
+pcie_port_service_register
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+::
+
+ int pcie_port_service_register(struct pcie_port_service_driver *new)
+
+This API replaces the Linux Driver Model's pci_register_driver API. A
+service driver should always calls pcie_port_service_register at
+module init. Note that after service driver being loaded, calls
+such as pci_enable_device(dev) and pci_set_master(dev) are no longer
+necessary since these calls are executed by the PCI Port Bus driver.
+
+pcie_port_service_unregister
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+::
+
+ void pcie_port_service_unregister(struct pcie_port_service_driver *new)
+
+pcie_port_service_unregister replaces the Linux Driver Model's
+pci_unregister_driver. It's always called by service driver when a
+module exits.
+
+Sample Code
+~~~~~~~~~~~
+
+Below is sample service driver code to initialize the port service
+driver data structure.
+::
+
+ static struct pcie_port_service_id service_id[] = { {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .port_type = PCIE_RC_PORT,
+ .service_type = PCIE_PORT_SERVICE_AER,
+ }, { /* end: all zeroes */ }
+ };
+
+ static struct pcie_port_service_driver root_aerdrv = {
+ .name = (char *)device_name,
+ .id_table = service_id,
+
+ .probe = aerdrv_load,
+ .remove = aerdrv_unload,
+
+ .suspend = aerdrv_suspend,
+ .resume = aerdrv_resume,
+ };
+
+Below is a sample code for registering/unregistering a service
+driver.
+::
+
+ static int __init aerdrv_service_init(void)
+ {
+ int retval = 0;
+
+ retval = pcie_port_service_register(&root_aerdrv);
+ if (!retval) {
+ /*
+ * FIX ME
+ */
+ }
+ return retval;
+ }
+
+ static void __exit aerdrv_service_exit(void)
+ {
+ pcie_port_service_unregister(&root_aerdrv);
+ }
+
+ module_init(aerdrv_service_init);
+ module_exit(aerdrv_service_exit);
+
+Possible Resource Conflicts
+===========================
+
+Since all service drivers of a PCI-PCI Bridge Port device are
+allowed to run simultaneously, below lists a few of possible resource
+conflicts with proposed solutions.
+
+MSI and MSI-X Vector Resource
+-----------------------------
+
+Once MSI or MSI-X interrupts are enabled on a device, it stays in this
+mode until they are disabled again. Since service drivers of the same
+PCI-PCI Bridge port share the same physical device, if an individual
+service driver enables or disables MSI/MSI-X mode it may result
+unpredictable behavior.
+
+To avoid this situation all service drivers are not permitted to
+switch interrupt mode on its device. The PCI Express Port Bus driver
+is responsible for determining the interrupt mode and this should be
+transparent to service drivers. Service drivers need to know only
+the vector IRQ assigned to the field irq of struct pcie_device, which
+is passed in when the PCI Express Port Bus driver probes each service
+driver. Service drivers should use (struct pcie_device*)dev->irq to
+call request_irq/free_irq. In addition, the interrupt mode is stored
+in the field interrupt_mode of struct pcie_device.
+
+PCI Memory/IO Mapped Regions
+----------------------------
+
+Service drivers for PCI Express Power Management (PME), Advanced
+Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
+PCI configuration space on the PCI Express port. In all cases the
+registers accessed are independent of each other. This patch assumes
+that all service drivers will be well behaved and not overwrite
+other service driver's configuration settings.
+
+PCI Config Registers
+--------------------
+
+Each service driver runs its PCI config operations on its own
+capability structure except the PCI Express capability structure,
+that is shared between many drivers including the service drivers.
+RMW Capability accessors (pcie_capability_clear_and_set_word(),
+pcie_capability_set_word(), and pcie_capability_clear_word()) protect
+a selected set of PCI Express Capability Registers:
+
+* Link Control Register
+* Root Control Register
+* Link Control 2 Register
+
+Any change to those registers should be performed using RMW accessors to
+avoid problems due to concurrent updates. For the up-to-date list of
+protected registers, see pcie_capability_clear_and_set_word().
diff --git a/Documentation/PCI/sysfs-pci.rst b/Documentation/PCI/sysfs-pci.rst
new file mode 100644
index 000000000000..f495185aa88a
--- /dev/null
+++ b/Documentation/PCI/sysfs-pci.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Accessing PCI device resources through sysfs
+============================================
+
+sysfs, usually mounted at /sys, provides access to PCI resources on platforms
+that support it. For example, a given bus might look like this::
+
+ /sys/devices/pci0000:17
+ |-- 0000:17:00.0
+ | |-- class
+ | |-- config
+ | |-- device
+ | |-- enable
+ | |-- irq
+ | |-- local_cpus
+ | |-- remove
+ | |-- resource
+ | |-- resource0
+ | |-- resource1
+ | |-- resource2
+ | |-- revision
+ | |-- rom
+ | |-- subsystem_device
+ | |-- subsystem_vendor
+ | `-- vendor
+ `-- ...
+
+The topmost element describes the PCI domain and bus number. In this case,
+the domain number is 0000 and the bus number is 17 (both values are in hex).
+This bus contains a single function device in slot 0. The domain and bus
+numbers are reproduced for convenience. Under the device directory are several
+files, each with their own function.
+
+ =================== =====================================================
+ file function
+ =================== =====================================================
+ class PCI class (ascii, ro)
+ config PCI config space (binary, rw)
+ device PCI device (ascii, ro)
+ enable Whether the device is enabled (ascii, rw)
+ irq IRQ number (ascii, ro)
+ local_cpus nearby CPU mask (cpumask, ro)
+ remove remove device from kernel's list (ascii, wo)
+ resource PCI resource host addresses (ascii, ro)
+ resource0..N PCI resource N, if present (binary, mmap, rw\ [1]_)
+ resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
+ revision PCI revision (ascii, ro)
+ rom PCI ROM resource, if present (binary, ro)
+ subsystem_device PCI subsystem device (ascii, ro)
+ subsystem_vendor PCI subsystem vendor (ascii, ro)
+ vendor PCI vendor (ascii, ro)
+ =================== =====================================================
+
+::
+
+ ro - read only file
+ rw - file is readable and writable
+ wo - write only file
+ mmap - file is mmapable
+ ascii - file contains ascii text
+ binary - file contains binary data
+ cpumask - file contains a cpumask type
+
+.. [1] rw for IORESOURCE_IO (I/O port) regions only
+
+The read only files are informational, writes to them will be ignored, with
+the exception of the 'rom' file. Writable files can be used to perform
+actions on the device (e.g. changing config space, detaching a device).
+mmapable files are available via an mmap of the file at offset 0 and can be
+used to do actual device programming from userspace. Note that some platforms
+don't support mmapping of certain resources, so be sure to check the return
+value from any attempted mmap. The most notable of these are I/O port
+resources, which also provide read/write access.
+
+The 'enable' file provides a counter that indicates how many times the device
+has been enabled. If the 'enable' file currently returns '4', and a '1' is
+echoed into it, it will then return '5'. Echoing a '0' into it will decrease
+the count. Even when it returns to 0, though, some of the initialisation
+may not be reversed.
+
+The 'rom' file is special in that it provides read-only access to the device's
+ROM file, if available. It's disabled by default, however, so applications
+should write the string "1" to the file to enable it before attempting a read
+call, and disable it following the access by writing "0" to the file. Note
+that the device must be enabled for a rom read to return data successfully.
+In the event a driver is not bound to the device, it can be enabled using the
+'enable' file, documented above.
+
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file. This does not involve any kind of hot-plug functionality,
+e.g. powering off the device. The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
+Accessing legacy resources through sysfs
+----------------------------------------
+
+Legacy I/O port and ISA memory resources are also provided in sysfs if the
+underlying platform supports them. They're located in the PCI class hierarchy,
+e.g.::
+
+ /sys/class/pci_bus/0000:17/
+ |-- bridge -> ../../../devices/pci0000:17
+ |-- cpuaffinity
+ |-- legacy_io
+ `-- legacy_mem
+
+The legacy_io file is a read/write file that can be used by applications to
+do legacy port I/O. The application should open the file, seek to the desired
+port (e.g. 0x3e8) and do a read or a write of 1, 2 or 4 bytes. The legacy_mem
+file should be mmapped with an offset corresponding to the memory offset
+desired, e.g. 0xa0000 for the VGA frame buffer. The application can then
+simply dereference the returned pointer (after checking for errors of course)
+to access legacy memory space.
+
+Supporting PCI access on new platforms
+--------------------------------------
+
+In order to support PCI resource mapping as described above, Linux platform
+code should ideally define ARCH_GENERIC_PCI_MMAP_RESOURCE and use the generic
+implementation of that functionality. To support the historical interface of
+mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP.
+
+Alternatively, platforms which set HAVE_PCI_MMAP may provide their own
+implementation of pci_mmap_resource_range() instead of defining
+ARCH_GENERIC_PCI_MMAP_RESOURCE.
+
+Platforms which support write-combining maps of PCI resources must define
+arch_can_pci_mmap_wc() which shall evaluate to non-zero at runtime when
+write-combining is permitted. Platforms which support maps of I/O resources
+define arch_can_pci_mmap_io() similarly.
+
+Legacy resources are protected by the HAVE_PCI_LEGACY define. Platforms
+wishing to support legacy functionality should define it and provide
+pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.
diff --git a/Documentation/PCI/tph.rst b/Documentation/PCI/tph.rst
new file mode 100644
index 000000000000..e8993be64fd6
--- /dev/null
+++ b/Documentation/PCI/tph.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+===========
+TPH Support
+===========
+
+:Copyright: 2024 Advanced Micro Devices, Inc.
+:Authors: - Eric van Tassell <eric.vantassell@amd.com>
+ - Wei Huang <wei.huang2@amd.com>
+
+
+Overview
+========
+
+TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices
+to provide optimization hints for requests that target memory space.
+These hints, in a format called Steering Tags (STs), are embedded in the
+requester's TLP headers, enabling the system hardware, such as the Root
+Complex, to better manage platform resources for these requests.
+
+For example, on platforms with TPH-based direct data cache injection
+support, an endpoint device can include appropriate STs in its DMA
+traffic to specify which cache the data should be written to. This allows
+the CPU core to have a higher probability of getting data from cache,
+potentially improving performance and reducing latency in data
+processing.
+
+
+How to Use TPH
+==============
+
+TPH is presented as an optional extended capability in PCIe. The Linux
+kernel handles TPH discovery during boot, but it is up to the device
+driver to request TPH enablement if it is to be utilized. Once enabled,
+the driver uses the provided API to obtain the Steering Tag for the
+target memory and to program the ST into the device's ST table.
+
+Enable TPH support in Linux
+---------------------------
+
+To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option
+enabled.
+
+Manage TPH
+----------
+
+To enable TPH for a device, use the following function::
+
+ int pcie_enable_tph(struct pci_dev *pdev, int mode);
+
+This function enables TPH support for device with a specific ST mode.
+Current supported modes include:
+
+ * PCI_TPH_ST_NS_MODE - NO ST Mode
+ * PCI_TPH_ST_IV_MODE - Interrupt Vector Mode
+ * PCI_TPH_ST_DS_MODE - Device Specific Mode
+
+`pcie_enable_tph()` checks whether the requested mode is actually
+supported by the device before enabling. The device driver can figure out
+which TPH mode is supported and can be properly enabled based on the
+return value of `pcie_enable_tph()`.
+
+To disable TPH, use the following function::
+
+ void pcie_disable_tph(struct pci_dev *pdev);
+
+Manage ST
+---------
+
+Steering Tags are platform specific. PCIe spec does not specify where STs
+are from. Instead PCI Firmware Specification defines an ACPI _DSM method
+(see the `Revised _DSM for Cache Locality TPH Features ECN
+<https://members.pcisig.com/wg/PCI-SIG/document/15470>`_) for retrieving
+STs for a target memory of various properties. This method is what is
+supported in this implementation.
+
+To retrieve a Steering Tag for a target memory associated with a specific
+CPU, use the following function::
+
+ int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
+ unsigned int cpu_uid, u16 *tag);
+
+The `type` argument is used to specify the memory type, either volatile
+or persistent, of the target memory. The `cpu_uid` argument specifies the
+CPU where the memory is associated to.
+
+After the ST value is retrieved, the device driver can use the following
+function to write the ST into the device::
+
+ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index,
+ u16 tag);
+
+The `index` argument is the ST table entry index the ST tag will be
+written into. `pcie_tph_set_st_entry()` will figure out the proper
+location of ST table, either in the MSI-X table or in the TPH Extended
+Capability space, and write the Steering Tag into the ST entry pointed by
+the `index` argument.
+
+It is completely up to the driver to decide how to use these TPH
+functions. For example a network device driver can use the TPH APIs above
+to update the Steering Tag when interrupt affinity of a RX/TX queue has
+been changed. Here is a sample code for IRQ affinity notifier:
+
+.. code-block:: c
+
+ static void irq_affinity_notified(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+ {
+ struct drv_irq *irq;
+ unsigned int cpu_id;
+ u16 tag;
+
+ irq = container_of(notify, struct drv_irq, affinity_notify);
+ cpumask_copy(irq->cpu_mask, mask);
+
+ /* Pick a right CPU as the target - here is just an example */
+ cpu_id = cpumask_first(irq->cpu_mask);
+
+ if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id,
+ &tag))
+ return;
+
+ if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag))
+ return;
+ }
+
+Disable TPH system-wide
+-----------------------
+
+There is a kernel command line option available to control TPH feature:
+ * "notph": TPH will be disabled for all endpoint devices.
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
deleted file mode 100644
index 9bbb1944f962..000000000000
--- a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
+++ /dev/null
@@ -1,499 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
-
-<!-- CreationDate: Wed Dec 9 17:26:09 2015 -->
-
-<!-- Magnification: 2.000 -->
-
-<svg
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:cc="http://creativecommons.org/ns#"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:svg="http://www.w3.org/2000/svg"
- xmlns="http://www.w3.org/2000/svg"
- xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
- xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="5.7in"
- height="6.6in"
- viewBox="-44 -44 6838 7888"
- id="svg2"
- version="1.1"
- inkscape:version="0.48.4 r9939"
- sodipodi:docname="BigTreeClassicRCUBH.fig">
- <metadata
- id="metadata110">
- <rdf:RDF>
- <cc:Work
- rdf:about="">
- <dc:format>image/svg+xml</dc:format>
- <dc:type
- rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
- <dc:title></dc:title>
- </cc:Work>
- </rdf:RDF>
- </metadata>
- <defs
- id="defs108">
- <marker
- inkscape:stockid="Arrow1Mend"
- orient="auto"
- refY="0.0"
- refX="0.0"
- id="Arrow1Mend"
- style="overflow:visible;">
- <path
- id="path3868"
- d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
- style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
- transform="scale(0.4) rotate(180) translate(10,0)" />
- </marker>
- <marker
- inkscape:stockid="Arrow2Mend"
- orient="auto"
- refY="0.0"
- refX="0.0"
- id="Arrow2Mend"
- style="overflow:visible;">
- <path
- id="path3886"
- style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
- d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
- transform="scale(0.6) rotate(180) translate(0,0)" />
- </marker>
- </defs>
- <sodipodi:namedview
- pagecolor="#ffffff"
- bordercolor="#666666"
- borderopacity="1"
- objecttolerance="10"
- gridtolerance="10"
- guidetolerance="10"
- inkscape:pageopacity="0"
- inkscape:pageshadow="2"
- inkscape:window-width="878"
- inkscape:window-height="1148"
- id="namedview106"
- showgrid="false"
- inkscape:zoom="1.3547758"
- inkscape:cx="256.5"
- inkscape:cy="297"
- inkscape:window-x="45"
- inkscape:window-y="24"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4" />
- <g
- style="stroke-width:.025in; fill:none"
- id="g4">
- <!-- Line: box -->
- <rect
- x="450"
- y="0"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect6" />
- <!-- Line: box -->
- <rect
- x="4950"
- y="4950"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect8" />
- <!-- Line: box -->
- <rect
- x="750"
- y="600"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect10" />
- <!-- Line: box -->
- <rect
- x="0"
- y="450"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect12" />
- <!-- Line: box -->
- <rect
- x="300"
- y="1050"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect14" />
- <!-- Circle -->
- <circle
- cx="2850"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle16" />
- <!-- Circle -->
- <circle
- cx="3150"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle18" />
- <!-- Circle -->
- <circle
- cx="3450"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle20" />
- <!-- Circle -->
- <circle
- cx="1350"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle22" />
- <!-- Circle -->
- <circle
- cx="1650"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle24" />
- <!-- Circle -->
- <circle
- cx="1950"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle26" />
- <!-- Circle -->
- <circle
- cx="4350"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle28" />
- <!-- Circle -->
- <circle
- cx="4650"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle30" />
- <!-- Circle -->
- <circle
- cx="4950"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle32" />
- <!-- Line -->
- <polyline
- points="1350,3450 2350,2590 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline34" />
- <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
- <!-- Line -->
- <polyline
- points="4950,3450 3948,2590 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline38" />
- <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
- <!-- Line: box -->
- <rect
- x="750"
- y="3450"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect42" />
- <!-- Line -->
- <polyline
- points="2250,5400 2250,4414 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline44" />
- <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
- <!-- Line: box -->
- <rect
- x="1500"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect48" />
- <!-- Line: box -->
- <rect
- x="300"
- y="6600"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect50" />
- <!-- Line: box -->
- <rect
- x="3750"
- y="3450"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect52" />
- <!-- Line: box -->
- <rect
- x="4500"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect54" />
- <!-- Line: box -->
- <rect
- x="3300"
- y="6600"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect56" />
- <!-- Line: box -->
- <rect
- x="2250"
- y="1650"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect58" />
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6450"
- y="300"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text60">rcu_bh</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="1950"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text62">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="2250"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text64">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="3750"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text66">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="4050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text68">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="4050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text70">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="3750"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text72">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="5700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text74">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="6000"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text76">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="6900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text78">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="7200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text80">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="5700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text82">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="6000"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text84">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="6900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text86">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="7200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text88">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="450"
- y="1350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="start"
- id="text90">struct rcu_state</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6000"
- y="750"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text92">rcu_sched</text>
- <!-- Line -->
- <polyline
- points="5250,5400 5250,4414 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline94" />
- <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
- <!-- Line -->
- <polyline
- points="4050,6600 4050,4414 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline98" />
- <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
- <!-- Line -->
- <polyline
- points="1050,6600 1050,4414 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline102" />
- <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
- </g>
-</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
deleted file mode 100644
index 21ba7823479d..000000000000
--- a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
+++ /dev/null
@@ -1,695 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
-
-<!-- CreationDate: Wed Dec 9 17:20:02 2015 -->
-
-<!-- Magnification: 2.000 -->
-
-<svg
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:cc="http://creativecommons.org/ns#"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:svg="http://www.w3.org/2000/svg"
- xmlns="http://www.w3.org/2000/svg"
- xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
- xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="5.7in"
- height="8.6in"
- viewBox="-44 -44 6838 10288"
- id="svg2"
- version="1.1"
- inkscape:version="0.48.4 r9939"
- sodipodi:docname="BigTreeClassicRCUBHdyntick.fig">
- <metadata
- id="metadata166">
- <rdf:RDF>
- <cc:Work
- rdf:about="">
- <dc:format>image/svg+xml</dc:format>
- <dc:type
- rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
- <dc:title></dc:title>
- </cc:Work>
- </rdf:RDF>
- </metadata>
- <defs
- id="defs164">
- <marker
- inkscape:stockid="Arrow1Mend"
- orient="auto"
- refY="0.0"
- refX="0.0"
- id="Arrow1Mend"
- style="overflow:visible;">
- <path
- id="path3924"
- d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
- style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
- transform="scale(0.4) rotate(180) translate(10,0)" />
- </marker>
- <marker
- inkscape:stockid="Arrow2Lend"
- orient="auto"
- refY="0.0"
- refX="0.0"
- id="Arrow2Lend"
- style="overflow:visible;">
- <path
- id="path3936"
- style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
- d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
- transform="scale(1.1) rotate(180) translate(1,0)" />
- </marker>
- </defs>
- <sodipodi:namedview
- pagecolor="#ffffff"
- bordercolor="#666666"
- borderopacity="1"
- objecttolerance="10"
- gridtolerance="10"
- guidetolerance="10"
- inkscape:pageopacity="0"
- inkscape:pageshadow="2"
- inkscape:window-width="845"
- inkscape:window-height="988"
- id="namedview162"
- showgrid="false"
- inkscape:zoom="1.0452196"
- inkscape:cx="256.5"
- inkscape:cy="387.00003"
- inkscape:window-x="356"
- inkscape:window-y="61"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4" />
- <g
- style="stroke-width:.025in; fill:none"
- id="g4">
- <!-- Line: box -->
- <rect
- x="450"
- y="0"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect6" />
- <!-- Line: box -->
- <rect
- x="4950"
- y="4950"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect8" />
- <!-- Line: box -->
- <rect
- x="750"
- y="600"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect10" />
- <!-- Line -->
- <polyline
- points="5250,8100 5688,5912 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline12" />
- <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
- <polyline
- points="5714 6068 5704 5822 5598 6044 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline14" />
- <!-- Line -->
- <polyline
- points="4050,9300 4486,7262 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline16" />
- <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
- <polyline
- points="4514 7418 4506 7172 4396 7394 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline18" />
- <!-- Line -->
- <polyline
- points="1040,9300 1476,7262 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline20" />
- <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
- <polyline
- points="1504 7418 1496 7172 1386 7394 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline22" />
- <!-- Line -->
- <polyline
- points="2240,8100 2676,6062 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline24" />
- <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
- <polyline
- points="2704 6218 2696 5972 2586 6194 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline26" />
- <!-- Line: box -->
- <rect
- x="0"
- y="450"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect28" />
- <!-- Line: box -->
- <rect
- x="300"
- y="1050"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect30" />
- <!-- Line -->
- <polyline
- points="1350,3450 2350,2590 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline32" />
- <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
- <!-- Line -->
- <polyline
- points="4950,3450 3948,2590 "
- style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline36" />
- <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
- <!-- Line -->
- <polyline
- points="4050,6600 4050,4414 "
- style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline40" />
- <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
- <!-- Line -->
- <polyline
- points="1050,6600 1050,4414 "
- style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline44" />
- <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
- <!-- Line -->
- <polyline
- points="2250,5400 2250,4414 "
- style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline48" />
- <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
- <!-- Line -->
- <polyline
- points="2250,8100 2250,6364 "
- style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline52" />
- <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
- <!-- Line -->
- <polyline
- points="1050,9300 1050,7564 "
- style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline56" />
- <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
- <!-- Line -->
- <polyline
- points="4050,9300 4050,7564 "
- style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline60" />
- <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
- <!-- Line -->
- <polyline
- points="5250,8100 5250,6364 "
- style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline64" />
- <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
- <!-- Circle -->
- <circle
- cx="2850"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle68" />
- <!-- Circle -->
- <circle
- cx="3150"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle70" />
- <!-- Circle -->
- <circle
- cx="3450"
- cy="3900"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle72" />
- <!-- Circle -->
- <circle
- cx="1350"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle74" />
- <!-- Circle -->
- <circle
- cx="1650"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle76" />
- <!-- Circle -->
- <circle
- cx="1950"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle78" />
- <!-- Circle -->
- <circle
- cx="4350"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle80" />
- <!-- Circle -->
- <circle
- cx="4650"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle82" />
- <!-- Circle -->
- <circle
- cx="4950"
- cy="5100"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle84" />
- <!-- Line: box -->
- <rect
- x="750"
- y="3450"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect86" />
- <!-- Line: box -->
- <rect
- x="300"
- y="6600"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect88" />
- <!-- Line: box -->
- <rect
- x="3750"
- y="3450"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect90" />
- <!-- Line: box -->
- <rect
- x="4500"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect92" />
- <!-- Line: box -->
- <rect
- x="3300"
- y="6600"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect94" />
- <!-- Line: box -->
- <rect
- x="2250"
- y="1650"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect96" />
- <!-- Line: box -->
- <rect
- x="0"
- y="9300"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect98" />
- <!-- Line: box -->
- <rect
- x="1350"
- y="8100"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect100" />
- <!-- Line: box -->
- <rect
- x="3000"
- y="9300"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect102" />
- <!-- Line: box -->
- <rect
- x="4350"
- y="8100"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect104" />
- <!-- Line: box -->
- <rect
- x="1500"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect106" />
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6450"
- y="300"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text108">rcu_bh</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="1950"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text110">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="2250"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text112">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="3750"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text114">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="4050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text116">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="4050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text118">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="3750"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text120">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="5700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text122">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="6000"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text124">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="6900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text126">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="7200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text128">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="5700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text130">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="6000"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text132">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="6900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text134">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="7200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text136">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="450"
- y="1350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="start"
- id="text138">struct rcu_state</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="9600"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text140">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="9900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text142">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="9600"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text144">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="9900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text146">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8400"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text148">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text150">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8400"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text152">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text154">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6000"
- y="750"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text156">rcu_sched</text>
- <!-- Line -->
- <polyline
- points="5250,5400 5250,4414 "
- style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline158" />
- <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
- </g>
-</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
deleted file mode 100644
index 15adcac036c7..000000000000
--- a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
+++ /dev/null
@@ -1,741 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
-
-<!-- CreationDate: Wed Dec 9 17:32:59 2015 -->
-
-<!-- Magnification: 2.000 -->
-
-<svg
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:cc="http://creativecommons.org/ns#"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:svg="http://www.w3.org/2000/svg"
- xmlns="http://www.w3.org/2000/svg"
- xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
- xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="6.1in"
- height="8.9in"
- viewBox="-44 -44 7288 10738"
- id="svg2"
- version="1.1"
- inkscape:version="0.48.4 r9939"
- sodipodi:docname="BigTreePreemptRCUBHdyntick.fig">
- <metadata
- id="metadata182">
- <rdf:RDF>
- <cc:Work
- rdf:about="">
- <dc:format>image/svg+xml</dc:format>
- <dc:type
- rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
- <dc:title></dc:title>
- </cc:Work>
- </rdf:RDF>
- </metadata>
- <defs
- id="defs180">
- <marker
- inkscape:stockid="Arrow1Mend"
- orient="auto"
- refY="0.0"
- refX="0.0"
- id="Arrow1Mend"
- style="overflow:visible;">
- <path
- id="path3940"
- d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
- style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
- transform="scale(0.4) rotate(180) translate(10,0)" />
- </marker>
- </defs>
- <sodipodi:namedview
- pagecolor="#ffffff"
- bordercolor="#666666"
- borderopacity="1"
- objecttolerance="10"
- gridtolerance="10"
- guidetolerance="10"
- inkscape:pageopacity="0"
- inkscape:pageshadow="2"
- inkscape:window-width="874"
- inkscape:window-height="1148"
- id="namedview178"
- showgrid="false"
- inkscape:zoom="1.2097379"
- inkscape:cx="274.5"
- inkscape:cy="400.49997"
- inkscape:window-x="946"
- inkscape:window-y="24"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4" />
- <g
- style="stroke-width:.025in; fill:none"
- id="g4">
- <!-- Line: box -->
- <rect
- x="900"
- y="0"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect6" />
- <!-- Line: box -->
- <rect
- x="1200"
- y="600"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect8" />
- <!-- Line: box -->
- <rect
- x="5400"
- y="4950"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect10" />
- <!-- Line: box -->
- <rect
- x="450"
- y="450"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect12" />
- <!-- Line: box -->
- <rect
- x="750"
- y="1050"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect14" />
- <!-- Line: box -->
- <rect
- x="4950"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect16" />
- <!-- Line -->
- <polyline
- points="5250,8550 5688,6362 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline18" />
- <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
- <polyline
- points="5714 6518 5704 6272 5598 6494 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline20" />
- <!-- Line -->
- <polyline
- points="4050,9750 4486,7712 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline22" />
- <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
- <polyline
- points="4514 7868 4506 7622 4396 7844 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline24" />
- <!-- Line -->
- <polyline
- points="1040,9750 1476,7712 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline26" />
- <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
- <polyline
- points="1504 7868 1496 7622 1386 7844 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline28" />
- <!-- Line -->
- <polyline
- points="2240,8550 2676,6512 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline30" />
- <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
- <polyline
- points="2704 6668 2696 6422 2586 6644 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline32" />
- <!-- Line -->
- <polyline
- points="4050,9750 5682,6360 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline34" />
- <!-- Arrowhead on XXXpoint 4050 9750 - 5736 6246-->
- <polyline
- points="5672 6518 5722 6276 5562 6466 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline36" />
- <!-- Line -->
- <polyline
- points="1010,9750 2642,6360 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline38" />
- <!-- Arrowhead on XXXpoint 1010 9750 - 2696 6246-->
- <polyline
- points="2632 6518 2682 6276 2522 6466 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline40" />
- <!-- Line: box -->
- <rect
- x="0"
- y="900"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect42" />
- <!-- Line: box -->
- <rect
- x="300"
- y="1500"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect44" />
- <!-- Line -->
- <polyline
- points="1350,3900 2350,3040 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline46" />
- <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
- <!-- Line -->
- <polyline
- points="4950,3900 3948,3040 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline50" />
- <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
- <!-- Line -->
- <polyline
- points="4050,7050 4050,4864 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline54" />
- <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
- <!-- Line -->
- <polyline
- points="1050,7050 1050,4864 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline58" />
- <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
- <!-- Line -->
- <polyline
- points="2250,5850 2250,4864 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline62" />
- <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
- <!-- Line -->
- <polyline
- points="2250,8550 2250,6814 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline66" />
- <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
- <!-- Line -->
- <polyline
- points="1050,9750 1050,8014 "
- style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline70" />
- <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
- <!-- Line -->
- <polyline
- points="4050,9750 4050,8014 "
- style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline74" />
- <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
- <!-- Line -->
- <polyline
- points="5250,8550 5250,6814 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline78" />
- <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
- <!-- Circle -->
- <circle
- cx="2850"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle82" />
- <!-- Circle -->
- <circle
- cx="3150"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle84" />
- <!-- Circle -->
- <circle
- cx="3450"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle86" />
- <!-- Circle -->
- <circle
- cx="1350"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle88" />
- <!-- Circle -->
- <circle
- cx="1650"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle90" />
- <!-- Circle -->
- <circle
- cx="1950"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle92" />
- <!-- Circle -->
- <circle
- cx="4350"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle94" />
- <!-- Circle -->
- <circle
- cx="4650"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle96" />
- <!-- Circle -->
- <circle
- cx="4950"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle98" />
- <!-- Line: box -->
- <rect
- x="750"
- y="3900"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect100" />
- <!-- Line: box -->
- <rect
- x="300"
- y="7050"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect102" />
- <!-- Line: box -->
- <rect
- x="3750"
- y="3900"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect104" />
- <!-- Line: box -->
- <rect
- x="4500"
- y="5850"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect106" />
- <!-- Line: box -->
- <rect
- x="3300"
- y="7050"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect108" />
- <!-- Line: box -->
- <rect
- x="2250"
- y="2100"
- width="1800"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
- id="rect110" />
- <!-- Line: box -->
- <rect
- x="0"
- y="9750"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect112" />
- <!-- Line: box -->
- <rect
- x="1350"
- y="8550"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect114" />
- <!-- Line: box -->
- <rect
- x="3000"
- y="9750"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect116" />
- <!-- Line: box -->
- <rect
- x="4350"
- y="8550"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect118" />
- <!-- Line: box -->
- <rect
- x="1500"
- y="5850"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect120" />
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6450"
- y="750"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text122">rcu_bh</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="2400"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text124">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="3150"
- y="2700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text126">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="4200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text128">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1650"
- y="4500"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text130">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="4500"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text132">rcu_node</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4650"
- y="4200"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text134">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="6150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text136">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2250"
- y="6450"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text138">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="7350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text140">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="7650"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text142">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="6150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text144">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5250"
- y="6450"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text146">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="7350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text148">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="7650"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text150">rcu_data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="450"
- y="1800"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="start"
- id="text152">struct rcu_state</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="10050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text154">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="10350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text156">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="10050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text158">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="10350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text160">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8850"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text162">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="9150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text164">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8850"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text166">struct</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="9150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text168">rcu_dynticks</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6900"
- y="300"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text170">rcu_preempt</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="6000"
- y="1200"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text172">rcu_sched</text>
- <!-- Line -->
- <polyline
- points="5250,5850 5250,4864 "
- style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline174" />
- <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
- </g>
-</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
index bbc3801470d0..3a1a4f85dc3a 100644
--- a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
@@ -13,12 +13,12 @@
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="7.4in"
- height="9.9in"
- viewBox="-44 -44 8938 11938"
+ width="7.4000001in"
+ height="7.9000001in"
+ viewBox="-44 -44 8938 9526.283"
id="svg2"
version="1.1"
- inkscape:version="0.48.4 r9939"
+ inkscape:version="0.92.2pre0 (973e216, 2017-07-25)"
sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg">
<metadata
id="metadata212">
@@ -37,15 +37,46 @@
<marker
inkscape:stockid="Arrow1Mend"
orient="auto"
- refY="0.0"
- refX="0.0"
+ refY="0"
+ refX="0"
+ id="marker1177"
+ style="overflow:visible"
+ inkscape:isstock="true">
+ <path
+ id="path897"
+ d="M 0,0 5,-5 -12.5,0 5,5 Z"
+ style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+ transform="matrix(-0.4,0,0,-0.4,-4,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow1Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow1Lend"
+ style="overflow:visible"
+ inkscape:isstock="true">
+ <path
+ id="path891"
+ d="M 0,0 5,-5 -12.5,0 5,5 Z"
+ style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+ transform="matrix(-0.8,0,0,-0.8,-10,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0"
+ refX="0"
id="Arrow1Mend"
- style="overflow:visible;">
+ style="overflow:visible">
<path
id="path3970"
- d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
- style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
- transform="scale(0.4) rotate(180) translate(10,0)" />
+ d="M 0,0 5,-5 -12.5,0 5,5 Z"
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt"
+ transform="matrix(-0.4,0,0,-0.4,-4,0)"
+ inkscape:connector-curvature="0" />
</marker>
</defs>
<sodipodi:namedview
@@ -57,802 +88,575 @@
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
- inkscape:window-width="881"
- inkscape:window-height="1128"
+ inkscape:window-width="1920"
+ inkscape:window-height="1019"
id="namedview208"
showgrid="false"
inkscape:zoom="1.0195195"
- inkscape:cx="333"
- inkscape:cy="445.49997"
- inkscape:window-x="936"
- inkscape:window-y="24"
- inkscape:window-maximized="0"
+ inkscape:cx="166.25478"
+ inkscape:cy="362.18693"
+ inkscape:window-x="0"
+ inkscape:window-y="0"
+ inkscape:window-maximized="1"
inkscape:current-layer="g4" />
<g
- style="stroke-width:.025in; fill:none"
- id="g4">
+ style="fill:none;stroke-width:0.025in"
+ id="g4"
+ transform="translate(0,-2415.6743)">
<!-- Line: box -->
- <rect
- x="900"
- y="0"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect6" />
<!-- Line: box -->
- <rect
- x="1200"
- y="600"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect8" />
<!-- Line: box -->
- <rect
- x="5400"
- y="4950"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect10" />
<!-- Line: box -->
- <rect
- x="450"
- y="450"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect12" />
<!-- Line: box -->
- <rect
- x="750"
- y="1050"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect14" />
<!-- Line: box -->
- <rect
- x="4950"
- y="5400"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect16" />
<!-- Line -->
- <polyline
- points="5250,8550 5688,6362 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline18" />
<!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
<polyline
points="5714 6518 5704 6272 5598 6494 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline20" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline20"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line -->
- <polyline
- points="4050,9750 4486,7712 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline22" />
<!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
<polyline
points="4514 7868 4506 7622 4396 7844 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline24" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline24"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line -->
- <polyline
- points="1040,9750 1476,7712 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline26" />
<!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
<polyline
points="1504 7868 1496 7622 1386 7844 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline28" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline28"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line -->
- <polyline
- points="2240,8550 2676,6512 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline30" />
<!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
<polyline
points="2704 6668 2696 6422 2586 6644 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline32" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline32"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line -->
- <polyline
- points="4050,9600 5692,6062 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline34" />
<!-- Arrowhead on XXXpoint 4050 9600 - 5744 5948-->
<polyline
points="5682 6220 5730 5978 5574 6170 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline36" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline36"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line -->
- <polyline
- points="1086,9600 2728,6062 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline38" />
<!-- Arrowhead on XXXpoint 1086 9600 - 2780 5948-->
<polyline
points="2718 6220 2766 5978 2610 6170 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline40" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline40"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Line: box -->
<rect
- x="0"
- y="900"
+ x="12.340758"
+ y="2442.5947"
width="6300"
- height="7350"
+ height="7045.3135"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ style="fill:#ffffff;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect42" />
<!-- Line: box -->
<rect
- x="300"
- y="1500"
+ x="312.34076"
+ y="3017.7224"
width="5700"
- height="3750"
+ height="3594.5476"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ style="fill:#ffff00;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect44" />
<!-- Line -->
<polyline
points="1350,3900 2350,3040 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline46" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
<!-- Line -->
<polyline
points="4950,3900 3948,3040 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline50" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
<!-- Line -->
<polyline
points="4050,7050 4050,4864 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline54" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
<!-- Line -->
<polyline
points="1050,7050 1050,4864 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline58" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline58"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
<!-- Line -->
<polyline
points="2250,5850 2250,4864 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline62" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline62"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
<!-- Line -->
- <polyline
- points="2250,8550 2250,6814 "
- style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline66" />
<!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
<!-- Line -->
- <polyline
- points="1050,9750 1050,8014 "
- style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline70" />
<!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
<!-- Line -->
- <polyline
- points="4050,9750 4050,8014 "
- style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline74" />
<!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
<!-- Line -->
- <polyline
- points="5250,8550 5250,6814 "
- style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline78" />
<!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
<!-- Line -->
- <polyline
- points="6000,6300 8048,7910 "
- style="stroke:#87cfff;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline82" />
<!-- Arrowhead on XXXpoint 6000 6300 - 8146 7986-->
<!-- Circle -->
- <circle
- cx="2850"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle86" />
+ <ellipse
+ cx="2862.3408"
+ cy="5749.5786"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle86"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="3150"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle88" />
+ <ellipse
+ cx="3162.3408"
+ cy="5749.5786"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle88"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="3450"
- cy="4350"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle90" />
+ <ellipse
+ cx="3462.3408"
+ cy="5749.5786"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle90"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="1350"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle92" />
+ <ellipse
+ cx="1362.3407"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle92"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="1650"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle94" />
+ <ellipse
+ cx="1662.3407"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle94"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="1950"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle96" />
+ <ellipse
+ cx="1962.3407"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle96"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="4350"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle98" />
+ <ellipse
+ cx="4362.3408"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle98"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="4650"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle100" />
+ <ellipse
+ cx="4662.3408"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle100"
+ rx="76"
+ ry="72.849495" />
<!-- Circle -->
- <circle
- cx="4950"
- cy="5550"
- r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
- id="circle102" />
+ <ellipse
+ cx="4962.3408"
+ cy="6899.834"
+ style="fill:#000000;stroke:#000000;stroke-width:13.70675087"
+ id="circle102"
+ rx="76"
+ ry="72.849495" />
<!-- Line: box -->
<rect
- x="7350"
- y="7950"
+ x="6745.3027"
+ y="8146.0654"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect104" />
<!-- Line: box -->
<rect
- x="7350"
- y="9450"
+ x="6745.3027"
+ y="9583.8857"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect106" />
<!-- Line -->
<polyline
points="8100,8850 8100,9384 "
- style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline108" />
+ style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Mend)"
+ id="polyline108"
+ transform="matrix(1,0,0,0.95854605,-604.69715,525.62477)" />
<!-- Arrowhead on XXXpoint 8100 8850 - 8100 9510-->
<!-- Line: box -->
<rect
- x="7350"
- y="10950"
+ x="6745.3027"
+ y="11021.704"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect112" />
<!-- Line -->
<polyline
points="8100,10350 8100,10884 "
- style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
- id="polyline114" />
+ style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Mend)"
+ id="polyline114"
+ transform="matrix(1,0,0,0.95854605,-604.69715,525.62477)" />
<!-- Arrowhead on XXXpoint 8100 10350 - 8100 11010-->
<!-- Line: box -->
<rect
- x="750"
- y="3900"
+ x="762.34076"
+ y="5318.2324"
width="1800"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect118" />
<!-- Line: box -->
<rect
- x="300"
- y="7050"
+ x="312.34076"
+ y="8337.6533"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect120" />
<!-- Line: box -->
<rect
- x="3750"
- y="3900"
+ x="3762.3408"
+ y="5318.2324"
width="1800"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect122" />
<!-- Line: box -->
<rect
- x="4500"
- y="5850"
+ x="4512.3408"
+ y="7187.3975"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect124" />
<!-- Line: box -->
<rect
- x="3300"
- y="7050"
+ x="3312.3408"
+ y="8337.6533"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect126" />
<!-- Line: box -->
<rect
- x="2250"
- y="2100"
+ x="2262.3408"
+ y="3592.8503"
width="1800"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect128" />
<!-- Line: box -->
- <rect
- x="0"
- y="9750"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect130" />
<!-- Line: box -->
- <rect
- x="1350"
- y="8550"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect132" />
<!-- Line: box -->
- <rect
- x="3000"
- y="9750"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect134" />
<!-- Line: box -->
- <rect
- x="4350"
- y="8550"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect136" />
<!-- Line: box -->
<rect
- x="1500"
- y="5850"
+ x="1512.3407"
+ y="7187.3975"
width="1500"
- height="900"
+ height="862.69141"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:29.37160873;stroke-linecap:butt;stroke-linejoin:miter"
id="rect138" />
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="8250"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="8614.0625"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text140">struct</text>
+ id="text140"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="8550"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="8907.7783"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text142">rcu_head</text>
+ id="text142"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_head</text>
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="9750"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="10082.644"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text144">struct</text>
+ id="text144"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="10050"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="10376.36"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text146">rcu_head</text>
+ id="text146"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_head</text>
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="11250"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="11551.224"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text148">struct</text>
+ id="text148"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="8100"
- y="11550"
- fill="#000000"
- font-family="Courier"
+ x="7338.3037"
+ y="11844.94"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text150">rcu_head</text>
+ id="text150"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_head</text>
<!-- Text -->
<text
xml:space="preserve"
- x="6000"
- y="1200"
- fill="#000000"
- font-family="Helvetica"
+ x="5886.4043"
+ y="2788.5688"
font-style="normal"
font-weight="normal"
font-size="192"
- text-anchor="end"
- id="text152">rcu_sched</text>
+ id="text152"
+ style="font-style:normal;font-weight:normal;font-size:187.978302px;font-family:Helvetica;text-anchor:end;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_state</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="6450"
- y="750"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text154">rcu_bh</text>
<!-- Text -->
<text
xml:space="preserve"
- x="3150"
- y="2400"
- fill="#000000"
- font-family="Courier"
+ x="3096.1016"
+ y="3963.4336"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text156">struct</text>
+ id="text156"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="3150"
- y="2700"
- fill="#000000"
- font-family="Courier"
+ x="3096.1016"
+ y="4257.1494"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text158">rcu_node</text>
+ id="text158"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1650"
- y="4200"
- fill="#000000"
- font-family="Courier"
+ x="1627.5209"
+ y="5725.7305"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text160">struct</text>
+ id="text160"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1650"
- y="4500"
- fill="#000000"
- font-family="Courier"
+ x="1627.5209"
+ y="6019.4463"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text162">rcu_node</text>
+ id="text162"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4650"
- y="4500"
- fill="#000000"
- font-family="Courier"
+ x="4564.6821"
+ y="6019.4463"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text164">rcu_node</text>
+ id="text164"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4650"
- y="4200"
- fill="#000000"
- font-family="Courier"
+ x="4564.6821"
+ y="5725.7305"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text166">struct</text>
+ id="text166"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="2250"
- y="6150"
- fill="#000000"
- font-family="Courier"
+ x="2214.9531"
+ y="7634.8848"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text168">struct</text>
+ id="text168"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="2250"
- y="6450"
- fill="#000000"
- font-family="Courier"
+ x="2214.9531"
+ y="7928.6011"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text170">rcu_data</text>
+ id="text170"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1050"
- y="7350"
- fill="#000000"
- font-family="Courier"
+ x="1040.0886"
+ y="8809.749"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text172">struct</text>
+ id="text172"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1050"
- y="7650"
- fill="#000000"
- font-family="Courier"
+ x="1040.0886"
+ y="9103.4648"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text174">rcu_data</text>
+ id="text174"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="5250"
- y="6150"
- fill="#000000"
- font-family="Courier"
+ x="5152.1138"
+ y="7634.8848"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text176">struct</text>
+ id="text176"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="5250"
- y="6450"
- fill="#000000"
- font-family="Courier"
+ x="5152.1138"
+ y="7928.6011"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text178">rcu_data</text>
+ id="text178"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4050"
- y="7350"
- fill="#000000"
- font-family="Courier"
+ x="3977.2495"
+ y="8809.749"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text180">struct</text>
+ id="text180"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4050"
- y="7650"
- fill="#000000"
- font-family="Courier"
+ x="3977.2495"
+ y="9103.4648"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text182">rcu_data</text>
+ id="text182"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="450"
- y="1800"
- fill="#000000"
- font-family="Courier"
+ x="452.6564"
+ y="3376.0012"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="start"
- id="text184">struct rcu_state</text>
+ id="text184"
+ style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:start;fill:#000000;stroke-width:0.02447634in"
+ transform="scale(1.0213945,0.97905363)">struct rcu_state</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="10050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text186">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="10350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text188">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="10050"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text190">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="10350"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text192">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8850"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text194">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="9150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text196">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8850"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text198">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="9150"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text200">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="6900"
- y="300"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text202">rcu_preempt</text>
<!-- Line -->
<polyline
points="5250,5850 5250,4864 "
- style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline204" />
+ style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline204"
+ transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" />
<!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+ <path
+ style="fill:none;stroke:#000000;stroke-width:34.24744034;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker1177)"
+ d="m 6000.1472,7564.2558 c 1498.5508,0 1498.5508,0 1498.5508,0 v 520.0252"
+ id="path886"
+ inkscape:connector-curvature="0" />
</g>
</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
deleted file mode 100644
index 1d2051c0c3fc..000000000000
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ /dev/null
@@ -1,1455 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
- <p>December 18, 2016</p>
- <p>This article was contributed by Paul E.&nbsp;McKenney</p>
-
-<h3>Introduction</h3>
-
-This document describes RCU's major data structures and their relationship
-to each other.
-
-<ol>
-<li> <a href="#Data-Structure Relationships">
- Data-Structure Relationships</a>
-<li> <a href="#The rcu_state Structure">
- The <tt>rcu_state</tt> Structure</a>
-<li> <a href="#The rcu_node Structure">
- The <tt>rcu_node</tt> Structure</a>
-<li> <a href="#The rcu_segcblist Structure">
- The <tt>rcu_segcblist</tt> Structure</a>
-<li> <a href="#The rcu_data Structure">
- The <tt>rcu_data</tt> Structure</a>
-<li> <a href="#The rcu_dynticks Structure">
- The <tt>rcu_dynticks</tt> Structure</a>
-<li> <a href="#The rcu_head Structure">
- The <tt>rcu_head</tt> Structure</a>
-<li> <a href="#RCU-Specific Fields in the task_struct Structure">
- RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
-<li> <a href="#Accessor Functions">
- Accessor Functions</a>
-</ol>
-
-<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
-
-<p>RCU is for all intents and purposes a large state machine, and its
-data structures maintain the state in such a way as to allow RCU readers
-to execute extremely quickly, while also processing the RCU grace periods
-requested by updaters in an efficient and extremely scalable fashion.
-The efficiency and scalability of RCU updaters is provided primarily
-by a combining tree, as shown below:
-
-</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
-
-</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
-containing a tree of <tt>rcu_node</tt> structures.
-Each leaf node of the <tt>rcu_node</tt> tree has up to 16
-<tt>rcu_data</tt> structures associated with it, so that there
-are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
-one for each possible CPU.
-This structure is adjusted at boot time, if needed, to handle the
-common case where <tt>nr_cpu_ids</tt> is much less than
-<tt>NR_CPUs</tt>.
-For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
-which results in a three-level <tt>rcu_node</tt> tree.
-If the actual hardware has only 16 CPUs, RCU will adjust itself
-at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
-
-</p><p>The purpose of this combining tree is to allow per-CPU events
-such as quiescent states, dyntick-idle transitions,
-and CPU hotplug operations to be processed efficiently
-and scalably.
-Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
-and other events are recorded by the leaf-level <tt>rcu_node</tt>
-structures.
-All of these events are combined at each level of the tree until finally
-grace periods are completed at the tree's root <tt>rcu_node</tt>
-structure.
-A grace period can be completed at the root once every CPU
-(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
-has passed through a quiescent state.
-Once a grace period has completed, record of that fact is propagated
-back down the tree.
-
-</p><p>As can be seen from the diagram, on a 64-bit system
-a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
-of 64 at the root and a fanout of 16 at the leaves.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why isn't the fanout at the leaves also 64?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because there are more types of events that affect the leaf-level
- <tt>rcu_node</tt> structures than further up the tree.
- Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
- 64, the contention on these structures' <tt>-&gt;structures</tt>
- becomes excessive.
- Experimentation on a wide variety of systems has shown that a fanout
- of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
- </font>
-
- <p><font color="ffffff">Of course, further experience with
- systems having hundreds or thousands of CPUs may demonstrate
- that the fanout for the non-leaf <tt>rcu_node</tt> structures
- must also be reduced.
- Such reduction can be easily carried out when and if it proves
- necessary.
- In the meantime, if you are using such a system and running into
- contention problems on the non-leaf <tt>rcu_node</tt> structures,
- you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
- parameter to reduce the non-leaf fanout as needed.
- </font>
-
- <p><font color="ffffff">Kernels built for systems with
- strong NUMA characteristics might also need to adjust
- <tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
- <tt>rcu_node</tt> structures align with hardware boundaries.
- However, there has thus far been no need for this.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
-a 32-bit system), then RCU will automatically add more levels to the
-tree.
-For example, if you are crazy enough to build a 64-bit system with 65,536
-CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
-
-</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
-
-</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
-accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
-32-bit systems.
-On the other hand, you can set <tt>CONFIG_RCU_FANOUT</tt> to be
-as small as 2 if you wish, which would permit only 16 CPUs, which
-is useful for testing.
-
-</p><p>This multi-level combining tree allows us to get most of the
-performance and scalability
-benefits of partitioning, even though RCU grace-period detection is
-inherently a global operation.
-The trick here is that only the last CPU to report a quiescent state
-into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
-structure at the next level up the tree.
-This means that at the leaf-level <tt>rcu_node</tt> structure, only
-one access out of sixteen will progress up the tree.
-For the internal <tt>rcu_node</tt> structures, the situation is even
-more extreme: Only one access out of sixty-four will progress up
-the tree.
-Because the vast majority of the CPUs do not progress up the tree,
-the lock contention remains roughly constant up the tree.
-No matter how many CPUs there are in the system, at most 64 quiescent-state
-reports per grace period will progress all the way to the root
-<tt>rcu_node</tt> structure, thus ensuring that the lock contention
-on that root <tt>rcu_node</tt> structure remains acceptably low.
-
-</p><p>In effect, the combining tree acts like a big shock absorber,
-keeping lock contention under control at all tree levels regardless
-of the level of loading on the system.
-
-</p><p>The Linux kernel actually supports multiple flavors of RCU
-running concurrently, so RCU builds separate data structures for each
-flavor.
-For example, for <tt>CONFIG_TREE_RCU=y</tt> kernels, RCU provides
-rcu_sched and rcu_bh, as shown below:
-
-</p><p><img src="BigTreeClassicRCUBH.svg" alt="BigTreeClassicRCUBH.svg" width="33%">
-
-</p><p>Energy efficiency is increasingly important, and for that
-reason the Linux kernel provides <tt>CONFIG_NO_HZ_IDLE</tt>, which
-turns off the scheduling-clock interrupts on idle CPUs, which in
-turn allows those CPUs to attain deeper sleep states and to consume
-less energy.
-CPUs whose scheduling-clock interrupts have been turned off are
-said to be in <i>dyntick-idle mode</i>.
-RCU must handle dyntick-idle CPUs specially
-because RCU would otherwise wake up each CPU on every grace period,
-which would defeat the whole purpose of <tt>CONFIG_NO_HZ_IDLE</tt>.
-RCU uses the <tt>rcu_dynticks</tt> structure to track
-which CPUs are in dyntick idle mode, as shown below:
-
-</p><p><img src="BigTreeClassicRCUBHdyntick.svg" alt="BigTreeClassicRCUBHdyntick.svg" width="33%">
-
-</p><p>However, if a CPU is in dyntick-idle mode, it is in that mode
-for all flavors of RCU.
-Therefore, a single <tt>rcu_dynticks</tt> structure is allocated per
-CPU, and all of a given CPU's <tt>rcu_data</tt> structures share
-that <tt>rcu_dynticks</tt>, as shown in the figure.
-
-</p><p>Kernels built with <tt>CONFIG_PREEMPT_RCU</tt> support
-rcu_preempt in addition to rcu_sched and rcu_bh, as shown below:
-
-</p><p><img src="BigTreePreemptRCUBHdyntick.svg" alt="BigTreePreemptRCUBHdyntick.svg" width="35%">
-
-</p><p>RCU updaters wait for normal grace periods by registering
-RCU callbacks, either directly via <tt>call_rcu()</tt> and
-friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
-there being a separate interface per flavor of RCU)
-or indirectly via <tt>synchronize_rcu()</tt> and friends.
-RCU callbacks are represented by <tt>rcu_head</tt> structures,
-which are queued on <tt>rcu_data</tt> structures while they are
-waiting for a grace period to elapse, as shown in the following figure:
-
-</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
-
-</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
-<tt>PREEMPT_RCU</tt>'s major data structures are related.
-Lesser data structures will be introduced with the algorithms that
-make use of them.
-
-</p><p>Note that each of the data structures in the above figure has
-its own synchronization:
-
-<p><ol>
-<li> Each <tt>rcu_state</tt> structures has a lock and a mutex,
- and some fields are protected by the corresponding root
- <tt>rcu_node</tt> structure's lock.
-<li> Each <tt>rcu_node</tt> structure has a spinlock.
-<li> The fields in <tt>rcu_data</tt> are private to the corresponding
- CPU, although a few can be read and written by other CPUs.
-<li> Similarly, the fields in <tt>rcu_dynticks</tt> are private
- to the corresponding CPU, although a few can be read by
- other CPUs.
-</ol>
-
-<p>It is important to note that different data structures can have
-very different ideas about the state of RCU at any given time.
-For but one example, awareness of the start or end of a given RCU
-grace period propagates slowly through the data structures.
-This slow propagation is absolutely necessary for RCU to have good
-read-side performance.
-If this balkanized implementation seems foreign to you, one useful
-trick is to consider each instance of these data structures to be
-a different person, each having the usual slightly different
-view of reality.
-
-</p><p>The general role of each of these data structures is as
-follows:
-
-</p><ol>
-<li> <tt>rcu_state</tt>:
- This structure forms the interconnection between the
- <tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
- tracks grace periods, serves as short-term repository
- for callbacks orphaned by CPU-hotplug events,
- maintains <tt>rcu_barrier()</tt> state,
- tracks expedited grace-period state,
- and maintains state used to force quiescent states when
- grace periods extend too long,
-<li> <tt>rcu_node</tt>: This structure forms the combining
- tree that propagates quiescent-state
- information from the leaves to the root, and also propagates
- grace-period information from the root to the leaves.
- It provides local copies of the grace-period state in order
- to allow this information to be accessed in a synchronized
- manner without suffering the scalability limitations that
- would otherwise be imposed by global locking.
- In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
- of tasks that have blocked while in their current
- RCU read-side critical section.
- In <tt>CONFIG_PREEMPT_RCU</tt> with
- <tt>CONFIG_RCU_BOOST</tt>, it manages the
- per-<tt>rcu_node</tt> priority-boosting
- kernel threads (kthreads) and state.
- Finally, it records CPU-hotplug state in order to determine
- which CPUs should be ignored during a given grace period.
-<li> <tt>rcu_data</tt>: This per-CPU structure is the
- focus of quiescent-state detection and RCU callback queuing.
- It also tracks its relationship to the corresponding leaf
- <tt>rcu_node</tt> structure to allow more-efficient
- propagation of quiescent states up the <tt>rcu_node</tt>
- combining tree.
- Like the <tt>rcu_node</tt> structure, it provides a local
- copy of the grace-period information to allow for-free
- synchronized
- access to this information from the corresponding CPU.
- Finally, this structure records past dyntick-idle state
- for the corresponding CPU and also tracks statistics.
-<li> <tt>rcu_dynticks</tt>:
- This per-CPU structure tracks the current dyntick-idle
- state for the corresponding CPU.
- Unlike the other three structures, the <tt>rcu_dynticks</tt>
- structure is not replicated per RCU flavor.
-<li> <tt>rcu_head</tt>:
- This structure represents RCU callbacks, and is the
- only structure allocated and managed by RCU users.
- The <tt>rcu_head</tt> structure is normally embedded
- within the RCU-protected data structure.
-</ol>
-
-<p>If all you wanted from this article was a general notion of how
-RCU's data structures are related, you are done.
-Otherwise, each of the following sections give more details on
-the <tt>rcu_state</tt>, <tt>rcu_node</tt>, <tt>rcu_data</tt>,
-and <tt>rcu_dynticks</tt> data structures.
-
-<h3><a name="The rcu_state Structure">
-The <tt>rcu_state</tt> Structure</a></h3>
-
-<p>The <tt>rcu_state</tt> structure is the base structure that
-represents a flavor of RCU.
-This structure forms the interconnection between the
-<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
-tracks grace periods, contains the lock used to
-synchronize with CPU-hotplug events,
-and maintains state used to force quiescent states when
-grace periods extend too long,
-
-</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-The more specialized fields are covered in the discussion of their
-use.
-
-<h5>Relationship to rcu_node and rcu_data Structures</h5>
-
-This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
- 1 struct rcu_node node[NUM_RCU_NODES];
- 2 struct rcu_node *level[NUM_RCU_LVLS + 1];
- 3 struct rcu_data __percpu *rda;
-</pre>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Wait a minute!
- You said that the <tt>rcu_node</tt> structures formed a tree,
- but they are declared as a flat array!
- What gives?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- The tree is laid out in the array.
- The first node In the array is the head, the next set of nodes in the
- array are children of the head node, and so on until the last set of
- nodes in the array are the leaves.
- </font>
-
- <p><font color="ffffff">See the following diagrams to see how
- this works.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The <tt>rcu_node</tt> tree is embedded into the
-<tt>-&gt;node[]</tt> array as shown in the following figure:
-
-</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
-
-</p><p>One interesting consequence of this mapping is that a
-breadth-first traversal of the tree is implemented as a simple
-linear scan of the array, which is in fact what the
-<tt>rcu_for_each_node_breadth_first()</tt> macro does.
-This macro is used at the beginning and ends of grace periods.
-
-</p><p>Each entry of the <tt>-&gt;level</tt> array references
-the first <tt>rcu_node</tt> structure on the corresponding level
-of the tree, for example, as shown below:
-
-</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
-
-</p><p>The zero<sup>th</sup> element of the array references the root
-<tt>rcu_node</tt> structure, the first element references the
-first child of the root <tt>rcu_node</tt>, and finally the second
-element references the first leaf <tt>rcu_node</tt> structure.
-
-</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
-rather than array-shaped, it is easy to draw a planar representation:
-
-</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
-
-</p><p>Finally, the <tt>-&gt;rda</tt> field references a per-CPU
-pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
-
-</p><p>All of these fields are constant once initialization is complete,
-and therefore need no protection.
-
-<h5>Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
- 1 unsigned long gp_seq;
-</pre>
-
-<p>RCU grace periods are numbered, and
-the <tt>-&gt;gp_seq</tt> field contains the current grace-period
-sequence number.
-The bottom two bits are the state of the current grace period,
-which can be zero for not yet started or one for in progress.
-In other words, if the bottom two bits of <tt>-&gt;gp_seq</tt> are
-zero, the corresponding flavor of RCU is idle.
-Any other value in the bottom two bits indicates that something is broken.
-This field is protected by the root <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt> field.
-
-</p><p>There are <tt>-&gt;gp_seq</tt> fields
-in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
-as well.
-The fields in the <tt>rcu_state</tt> structure represent the
-most current value, and those of the other structures are compared
-in order to detect the beginnings and ends of grace periods in a distributed
-fashion.
-The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
-(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
-
-<h5>Miscellaneous</h5>
-
-<p>This portion of the <tt>rcu_state</tt> structure is declared
-as follows:
-
-<pre>
- 1 unsigned long gp_max;
- 2 char abbr;
- 3 char *name;
-</pre>
-
-<p>The <tt>-&gt;gp_max</tt> field tracks the duration of the longest
-grace period in jiffies.
-It is protected by the root <tt>rcu_node</tt>'s <tt>-&gt;lock</tt>.
-
-<p>The <tt>-&gt;name</tt> field points to the name of the RCU flavor
-(for example, &ldquo;rcu_sched&rdquo;), and is constant.
-The <tt>-&gt;abbr</tt> field contains a one-character abbreviation,
-for example, &ldquo;s&rdquo; for RCU-sched.
-
-<h3><a name="The rcu_node Structure">
-The <tt>rcu_node</tt> Structure</a></h3>
-
-<p>The <tt>rcu_node</tt> structures form the combining
-tree that propagates quiescent-state
-information from the leaves to the root and also that propagates
-grace-period information from the root down to the leaves.
-They provides local copies of the grace-period state in order
-to allow this information to be accessed in a synchronized
-manner without suffering the scalability limitations that
-would otherwise be imposed by global locking.
-In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
-of tasks that have blocked while in their current
-RCU read-side critical section.
-In <tt>CONFIG_PREEMPT_RCU</tt> with
-<tt>CONFIG_RCU_BOOST</tt>, they manage the
-per-<tt>rcu_node</tt> priority-boosting
-kernel threads (kthreads) and state.
-Finally, they record CPU-hotplug state in order to determine
-which CPUs should be ignored during a given grace period.
-
-</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-
-<h5>Connection to Combining Tree</h5>
-
-<p>This portion of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
- 1 struct rcu_node *parent;
- 2 u8 level;
- 3 u8 grpnum;
- 4 unsigned long grpmask;
- 5 int grplo;
- 6 int grphi;
-</pre>
-
-<p>The <tt>-&gt;parent</tt> pointer references the <tt>rcu_node</tt>
-one level up in the tree, and is <tt>NULL</tt> for the root
-<tt>rcu_node</tt>.
-The RCU implementation makes heavy use of this field to push quiescent
-states up the tree.
-The <tt>-&gt;level</tt> field gives the level in the tree, with
-the root being at level zero, its children at level one, and so on.
-The <tt>-&gt;grpnum</tt> field gives this node's position within
-the children of its parent, so this number can range between 0 and 31
-on 32-bit systems and between 0 and 63 on 64-bit systems.
-The <tt>-&gt;level</tt> and <tt>-&gt;grpnum</tt> fields are
-used only during initialization and for tracing.
-The <tt>-&gt;grpmask</tt> field is the bitmask counterpart of
-<tt>-&gt;grpnum</tt>, and therefore always has exactly one bit set.
-This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
-structure in its parent's bitmasks, which are described later.
-Finally, the <tt>-&gt;grplo</tt> and <tt>-&gt;grphi</tt> fields
-contain the lowest and highest numbered CPU served by this
-<tt>rcu_node</tt> structure, respectively.
-
-</p><p>All of these fields are constant, and thus do not require any
-synchronization.
-
-<h5>Synchronization</h5>
-
-<p>This field of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
- 1 raw_spinlock_t lock;
-</pre>
-
-<p>This field is used to protect the remaining fields in this structure,
-unless otherwise stated.
-That said, all of the fields in this structure can be accessed without
-locking for tracing purposes.
-Yes, this can result in confusing traces, but better some tracing confusion
-than to be heisenbugged out of existence.
-
-<h5>Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_node</tt> structure is declared
-as follows:
-
-<pre>
- 1 unsigned long gp_seq;
- 2 unsigned long gp_seq_needed;
-</pre>
-
-<p>The <tt>rcu_node</tt> structures' <tt>-&gt;gp_seq</tt> fields are
-the counterparts of the field of the same name in the <tt>rcu_state</tt>
-structure.
-They each may lag up to one step behind their <tt>rcu_state</tt>
-counterpart.
-If the bottom two bits of a given <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field is zero, then this <tt>rcu_node</tt>
-structure believes that RCU is idle.
-</p><p>The <tt>&gt;gp_seq</tt> field of each <tt>rcu_node</tt>
-structure is updated at the beginning and the end
-of each grace period.
-
-<p>The <tt>-&gt;gp_seq_needed</tt> fields record the
-furthest-in-the-future grace period request seen by the corresponding
-<tt>rcu_node</tt> structure. The request is considered fulfilled when
-the value of the <tt>-&gt;gp_seq</tt> field equals or exceeds that of
-the <tt>-&gt;gp_seq_needed</tt> field.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Suppose that this <tt>rcu_node</tt> structure doesn't see
- a request for a very long time.
- Won't wrapping of the <tt>-&gt;gp_seq</tt> field cause
- problems?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- No, because if the <tt>-&gt;gp_seq_needed</tt> field lags behind the
- <tt>-&gt;gp_seq</tt> field, the <tt>-&gt;gp_seq_needed</tt> field
- will be updated at the end of the grace period.
- Modulo-arithmetic comparisons therefore will always get the
- correct answer, even with wrapping.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h5>Quiescent-State Tracking</h5>
-
-<p>These fields manage the propagation of quiescent states up the
-combining tree.
-
-</p><p>This portion of the <tt>rcu_node</tt> structure has fields
-as follows:
-
-<pre>
- 1 unsigned long qsmask;
- 2 unsigned long expmask;
- 3 unsigned long qsmaskinit;
- 4 unsigned long expmaskinit;
-</pre>
-
-<p>The <tt>-&gt;qsmask</tt> field tracks which of this
-<tt>rcu_node</tt> structure's children still need to report
-quiescent states for the current normal grace period.
-Such children will have a value of 1 in their corresponding bit.
-Note that the leaf <tt>rcu_node</tt> structures should be
-thought of as having <tt>rcu_data</tt> structures as their
-children.
-Similarly, the <tt>-&gt;expmask</tt> field tracks which
-of this <tt>rcu_node</tt> structure's children still need to report
-quiescent states for the current expedited grace period.
-An expedited grace period has
-the same conceptual properties as a normal grace period, but the
-expedited implementation accepts extreme CPU overhead to obtain
-much lower grace-period latency, for example, consuming a few
-tens of microseconds worth of CPU time to reduce grace-period
-duration from milliseconds to tens of microseconds.
-The <tt>-&gt;qsmaskinit</tt> field tracks which of this
-<tt>rcu_node</tt> structure's children cover for at least
-one online CPU.
-This mask is used to initialize <tt>-&gt;qsmask</tt>,
-and <tt>-&gt;expmaskinit</tt> is used to initialize
-<tt>-&gt;expmask</tt> and the beginning of the
-normal and expedited grace periods, respectively.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why are these bitmasks protected by locking?
- Come on, haven't you heard of atomic instructions???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Lockless grace-period computation! Such a tantalizing possibility!
- </font>
-
- <p><font color="ffffff">But consider the following sequence of events:
- </font>
-
- <ol>
- <li> <font color="ffffff">CPU&nbsp;0 has been in dyntick-idle
- mode for quite some time.
- When it wakes up, it notices that the current RCU
- grace period needs it to report in, so it sets a
- flag where the scheduling clock interrupt will find it.
- </font><p>
- <li> <font color="ffffff">Meanwhile, CPU&nbsp;1 is running
- <tt>force_quiescent_state()</tt>,
- and notices that CPU&nbsp;0 has been in dyntick idle mode,
- which qualifies as an extended quiescent state.
- </font><p>
- <li> <font color="ffffff">CPU&nbsp;0's scheduling clock
- interrupt fires in the
- middle of an RCU read-side critical section, and notices
- that the RCU core needs something, so commences RCU softirq
- processing.
- </font>
- <p>
- <li> <font color="ffffff">CPU&nbsp;0's softirq handler
- executes and is just about ready
- to report its quiescent state up the <tt>rcu_node</tt>
- tree.
- </font><p>
- <li> <font color="ffffff">But CPU&nbsp;1 beats it to the punch,
- completing the current
- grace period and starting a new one.
- </font><p>
- <li> <font color="ffffff">CPU&nbsp;0 now reports its quiescent
- state for the wrong
- grace period.
- That grace period might now end before the RCU read-side
- critical section.
- If that happens, disaster will ensue.
- </font>
- </ol>
-
- <p><font color="ffffff">So the locking is absolutely required in
- order to coordinate clearing of the bits with updating of the
- grace-period sequence number in <tt>-&gt;gp_seq</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h5>Blocked-Task Management</h5>
-
-<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
-midst of their RCU read-side critical sections, and these tasks
-must be tracked explicitly.
-The details of exactly why and how they are tracked will be covered
-in a separate article on RCU read-side processing.
-For now, it is enough to know that the <tt>rcu_node</tt>
-structure tracks them.
-
-<pre>
- 1 struct list_head blkd_tasks;
- 2 struct list_head *gp_tasks;
- 3 struct list_head *exp_tasks;
- 4 bool wait_blkd_tasks;
-</pre>
-
-<p>The <tt>-&gt;blkd_tasks</tt> field is a list header for
-the list of blocked and preempted tasks.
-As tasks undergo context switches within RCU read-side critical
-sections, their <tt>task_struct</tt> structures are enqueued
-(via the <tt>task_struct</tt>'s <tt>-&gt;rcu_node_entry</tt>
-field) onto the head of the <tt>-&gt;blkd_tasks</tt> list for the
-leaf <tt>rcu_node</tt> structure corresponding to the CPU
-on which the outgoing context switch executed.
-As these tasks later exit their RCU read-side critical sections,
-they remove themselves from the list.
-This list is therefore in reverse time order, so that if one of the tasks
-is blocking the current grace period, all subsequent tasks must
-also be blocking that same grace period.
-Therefore, a single pointer into this list suffices to track
-all tasks blocking a given grace period.
-That pointer is stored in <tt>-&gt;gp_tasks</tt> for normal
-grace periods and in <tt>-&gt;exp_tasks</tt> for expedited
-grace periods.
-These last two fields are <tt>NULL</tt> if either there is
-no grace period in flight or if there are no blocked tasks
-preventing that grace period from completing.
-If either of these two pointers is referencing a task that
-removes itself from the <tt>-&gt;blkd_tasks</tt> list,
-then that task must advance the pointer to the next task on
-the list, or set the pointer to <tt>NULL</tt> if there
-are no subsequent tasks on the list.
-
-</p><p>For example, suppose that tasks&nbsp;T1, T2, and&nbsp;T3 are
-all hard-affinitied to the largest-numbered CPU in the system.
-Then if task&nbsp;T1 blocked in an RCU read-side
-critical section, then an expedited grace period started,
-then task&nbsp;T2 blocked in an RCU read-side critical section,
-then a normal grace period started, and finally task&nbsp;3 blocked
-in an RCU read-side critical section, then the state of the
-last leaf <tt>rcu_node</tt> structure's blocked-task list
-would be as shown below:
-
-</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
-
-</p><p>Task&nbsp;T1 is blocking both grace periods, task&nbsp;T2 is
-blocking only the normal grace period, and task&nbsp;T3 is blocking
-neither grace period.
-Note that these tasks will not remove themselves from this list
-immediately upon resuming execution.
-They will instead remain on the list until they execute the outermost
-<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
-section.
-
-<p>
-The <tt>-&gt;wait_blkd_tasks</tt> field indicates whether or not
-the current grace period is waiting on a blocked task.
-
-<h5>Sizing the <tt>rcu_node</tt> Array</h5>
-
-<p>The <tt>rcu_node</tt> array is sized via a series of
-C-preprocessor expressions as follows:
-
-<pre>
- 1 #ifdef CONFIG_RCU_FANOUT
- 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
- 3 #else
- 4 # ifdef CONFIG_64BIT
- 5 # define RCU_FANOUT 64
- 6 # else
- 7 # define RCU_FANOUT 32
- 8 # endif
- 9 #endif
-10
-11 #ifdef CONFIG_RCU_FANOUT_LEAF
-12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
-13 #else
-14 # ifdef CONFIG_64BIT
-15 # define RCU_FANOUT_LEAF 64
-16 # else
-17 # define RCU_FANOUT_LEAF 32
-18 # endif
-19 #endif
-20
-21 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
-22 #define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT)
-23 #define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT)
-24 #define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT)
-25
-26 #if NR_CPUS &lt;= RCU_FANOUT_1
-27 # define RCU_NUM_LVLS 1
-28 # define NUM_RCU_LVL_0 1
-29 # define NUM_RCU_NODES NUM_RCU_LVL_0
-30 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
-31 # define RCU_NODE_NAME_INIT { "rcu_node_0" }
-32 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
-33 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
-34 #elif NR_CPUS &lt;= RCU_FANOUT_2
-35 # define RCU_NUM_LVLS 2
-36 # define NUM_RCU_LVL_0 1
-37 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-38 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
-39 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
-40 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
-41 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-42 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
-43 #elif NR_CPUS &lt;= RCU_FANOUT_3
-44 # define RCU_NUM_LVLS 3
-45 # define NUM_RCU_LVL_0 1
-46 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-47 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-48 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
-49 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
-50 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
-51 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-52 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
-53 #elif NR_CPUS &lt;= RCU_FANOUT_4
-54 # define RCU_NUM_LVLS 4
-55 # define NUM_RCU_LVL_0 1
-56 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
-57 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
-58 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
-59 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
-60 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
-61 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
-62 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-63 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
-64 #else
-65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-66 #endif
-</pre>
-
-<p>The maximum number of levels in the <tt>rcu_node</tt> structure
-is currently limited to four, as specified by lines&nbsp;21-24
-and the structure of the subsequent &ldquo;if&rdquo; statement.
-For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
-should be sufficient for the next few years at least.
-For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
-should see us through the next decade or so.
-This four-level tree also allows kernels built with
-<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
-which might be useful in very large systems having eight CPUs per
-socket (but please note that no one has yet shown any measurable
-performance degradation due to misaligned socket and <tt>rcu_node</tt>
-boundaries).
-In addition, building kernels with a full four levels of <tt>rcu_node</tt>
-tree permits better testing of RCU's combining-tree code.
-
-</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
-are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
-If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
-it is set based on the word size of the system, which is also
-the Kconfig default.
-
-</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
-handled by each leaf <tt>rcu_node</tt> structure.
-Experience has shown that allowing a given leaf <tt>rcu_node</tt>
-structure to handle 64 CPUs, as permitted by the number of bits in
-the <tt>-&gt;qsmask</tt> field on a 64-bit system, results in
-excessive contention for the leaf <tt>rcu_node</tt> structures'
-<tt>-&gt;lock</tt> fields.
-The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
-limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
-If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
-selected is based on the word size of the system, just as for
-<tt>CONFIG_RCU_FANOUT</tt>.
-Lines&nbsp;11-19 perform this computation.
-
-</p><p>Lines&nbsp;21-24 compute the maximum number of CPUs supported by
-a single-level (which contains a single <tt>rcu_node</tt> structure),
-two-level, three-level, and four-level <tt>rcu_node</tt> tree,
-respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
-and <tt>RCU_FANOUT_LEAF</tt>.
-These numbers of CPUs are retained in the
-<tt>RCU_FANOUT_1</tt>,
-<tt>RCU_FANOUT_2</tt>,
-<tt>RCU_FANOUT_3</tt>, and
-<tt>RCU_FANOUT_4</tt>
-C-preprocessor variables, respectively.
-
-</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
-statement spanning lines&nbsp;26-66 that computes the number of
-<tt>rcu_node</tt> structures required for each level of the tree,
-as well as the number of levels required.
-The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
-C-preprocessor variable by lines&nbsp;27, 35, 44, and&nbsp;54.
-The number of <tt>rcu_node</tt> structures for the topmost level
-of the tree is always exactly one, and this value is unconditionally
-placed into <tt>NUM_RCU_LVL_0</tt> by lines&nbsp;28, 36, 45, and&nbsp;55.
-The rest of the levels (if any) of the <tt>rcu_node</tt> tree
-are computed by dividing the maximum number of CPUs by the
-fanout supported by the number of levels from the current level down,
-rounding up. This computation is performed by lines&nbsp;37,
-46-47, and&nbsp;56-58.
-Lines&nbsp;31-33, 40-42, 50-52, and&nbsp;62-63 create initializers
-for lockdep lock-class names.
-Finally, lines&nbsp;64-66 produce an error if the maximum number of
-CPUs is too large for the specified fanout.
-
-<h3><a name="The rcu_segcblist Structure">
-The <tt>rcu_segcblist</tt> Structure</a></h3>
-
-The <tt>rcu_segcblist</tt> structure maintains a segmented list of
-callbacks as follows:
-
-<pre>
- 1 #define RCU_DONE_TAIL 0
- 2 #define RCU_WAIT_TAIL 1
- 3 #define RCU_NEXT_READY_TAIL 2
- 4 #define RCU_NEXT_TAIL 3
- 5 #define RCU_CBLIST_NSEGS 4
- 6
- 7 struct rcu_segcblist {
- 8 struct rcu_head *head;
- 9 struct rcu_head **tails[RCU_CBLIST_NSEGS];
-10 unsigned long gp_seq[RCU_CBLIST_NSEGS];
-11 long len;
-12 long len_lazy;
-13 };
-</pre>
-
-<p>
-The segments are as follows:
-
-<ol>
-<li> <tt>RCU_DONE_TAIL</tt>: Callbacks whose grace periods have elapsed.
- These callbacks are ready to be invoked.
-<li> <tt>RCU_WAIT_TAIL</tt>: Callbacks that are waiting for the
- current grace period.
- Note that different CPUs can have different ideas about which
- grace period is current, hence the <tt>-&gt;gp_seq</tt> field.
-<li> <tt>RCU_NEXT_READY_TAIL</tt>: Callbacks waiting for the next
- grace period to start.
-<li> <tt>RCU_NEXT_TAIL</tt>: Callbacks that have not yet been
- associated with a grace period.
-</ol>
-
-<p>
-The <tt>-&gt;head</tt> pointer references the first callback or
-is <tt>NULL</tt> if the list contains no callbacks (which is
-<i>not</i> the same as being empty).
-Each element of the <tt>-&gt;tails[]</tt> array references the
-<tt>-&gt;next</tt> pointer of the last callback in the corresponding
-segment of the list, or the list's <tt>-&gt;head</tt> pointer if
-that segment and all previous segments are empty.
-If the corresponding segment is empty but some previous segment is
-not empty, then the array element is identical to its predecessor.
-Older callbacks are closer to the head of the list, and new callbacks
-are added at the tail.
-This relationship between the <tt>-&gt;head</tt> pointer, the
-<tt>-&gt;tails[]</tt> array, and the callbacks is shown in this
-diagram:
-
-</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
-
-</p><p>In this figure, the <tt>-&gt;head</tt> pointer references the
-first
-RCU callback in the list.
-The <tt>-&gt;tails[RCU_DONE_TAIL]</tt> array element references
-the <tt>-&gt;head</tt> pointer itself, indicating that none
-of the callbacks is ready to invoke.
-The <tt>-&gt;tails[RCU_WAIT_TAIL]</tt> array element references callback
-CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
-CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period,
-give or take possible disagreements about exactly which grace period
-is the current one.
-The <tt>-&gt;tails[RCU_NEXT_READY_TAIL]</tt> array element
-references the same RCU callback that <tt>-&gt;tails[RCU_WAIT_TAIL]</tt>
-does, which indicates that there are no callbacks waiting on the next
-RCU grace period.
-The <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element references
-CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
-remaining RCU callbacks have not yet been assigned to an RCU grace
-period.
-Note that the <tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element
-always references the last RCU callback's <tt>-&gt;next</tt> pointer
-unless the callback list is empty, in which case it references
-the <tt>-&gt;head</tt> pointer.
-
-<p>
-There is one additional important special case for the
-<tt>-&gt;tails[RCU_NEXT_TAIL]</tt> array element: It can be <tt>NULL</tt>
-when this list is <i>disabled</i>.
-Lists are disabled when the corresponding CPU is offline or when
-the corresponding CPU's callbacks are offloaded to a kthread,
-both of which are described elsewhere.
-
-</p><p>CPUs advance their callbacks from the
-<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
-<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
-as grace periods advance.
-
-</p><p>The <tt>-&gt;gp_seq[]</tt> array records grace-period
-numbers corresponding to the list segments.
-This is what allows different CPUs to have different ideas as to
-which is the current grace period while still avoiding premature
-invocation of their callbacks.
-In particular, this allows CPUs that go idle for extended periods
-to determine which of their callbacks are ready to be invoked after
-reawakening.
-
-</p><p>The <tt>-&gt;len</tt> counter contains the number of
-callbacks in <tt>-&gt;head</tt>, and the
-<tt>-&gt;len_lazy</tt> contains the number of those callbacks that
-are known to only free memory, and whose invocation can therefore
-be safely deferred.
-
-<p><b>Important note</b>: It is the <tt>-&gt;len</tt> field that
-determines whether or not there are callbacks associated with
-this <tt>rcu_segcblist</tt> structure, <i>not</i> the <tt>-&gt;head</tt>
-pointer.
-The reason for this is that all the ready-to-invoke callbacks
-(that is, those in the <tt>RCU_DONE_TAIL</tt> segment) are extracted
-all at once at callback-invocation time.
-If callback invocation must be postponed, for example, because a
-high-priority process just woke up on this CPU, then the remaining
-callbacks are placed back on the <tt>RCU_DONE_TAIL</tt> segment.
-Either way, the <tt>-&gt;len</tt> and <tt>-&gt;len_lazy</tt> counts
-are adjusted after the corresponding callbacks have been invoked, and so
-again it is the <tt>-&gt;len</tt> count that accurately reflects whether
-or not there are callbacks associated with this <tt>rcu_segcblist</tt>
-structure.
-Of course, off-CPU sampling of the <tt>-&gt;len</tt> count requires
-the use of appropriate synchronization, for example, memory barriers.
-This synchronization can be a bit subtle, particularly in the case
-of <tt>rcu_barrier()</tt>.
-
-<h3><a name="The rcu_data Structure">
-The <tt>rcu_data</tt> Structure</a></h3>
-
-<p>The <tt>rcu_data</tt> maintains the per-CPU state for the
-corresponding flavor of RCU.
-The fields in this structure may be accessed only from the corresponding
-CPU (and from tracing) unless otherwise stated.
-This structure is the
-focus of quiescent-state detection and RCU callback queuing.
-It also tracks its relationship to the corresponding leaf
-<tt>rcu_node</tt> structure to allow more-efficient
-propagation of quiescent states up the <tt>rcu_node</tt>
-combining tree.
-Like the <tt>rcu_node</tt> structure, it provides a local
-copy of the grace-period information to allow for-free
-synchronized
-access to this information from the corresponding CPU.
-Finally, this structure records past dyntick-idle state
-for the corresponding CPU and also tracks statistics.
-
-</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
-singly and in groups, in the following sections.
-
-<h5>Connection to Other Data Structures</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
- 1 int cpu;
- 2 struct rcu_state *rsp;
- 3 struct rcu_node *mynode;
- 4 struct rcu_dynticks *dynticks;
- 5 unsigned long grpmask;
- 6 bool beenonline;
-</pre>
-
-<p>The <tt>-&gt;cpu</tt> field contains the number of the
-corresponding CPU, the <tt>-&gt;rsp</tt> pointer references
-the corresponding <tt>rcu_state</tt> structure (and is most frequently
-used to locate the name of the corresponding flavor of RCU for tracing),
-and the <tt>-&gt;mynode</tt> field references the corresponding
-<tt>rcu_node</tt> structure.
-The <tt>-&gt;mynode</tt> is used to propagate quiescent states
-up the combining tree.
-<p>The <tt>-&gt;dynticks</tt> pointer references the
-<tt>rcu_dynticks</tt> structure corresponding to this
-CPU.
-Recall that a single per-CPU instance of the <tt>rcu_dynticks</tt>
-structure is shared among all flavors of RCU.
-These first four fields are constant and therefore require not
-synchronization.
-
-</p><p>The <tt>-&gt;grpmask</tt> field indicates the bit in
-the <tt>-&gt;mynode-&gt;qsmask</tt> corresponding to this
-<tt>rcu_data</tt> structure, and is also used when propagating
-quiescent states.
-The <tt>-&gt;beenonline</tt> flag is set whenever the corresponding
-CPU comes online, which means that the debugfs tracing need not dump
-out any <tt>rcu_data</tt> structure for which this flag is not set.
-
-<h5>Quiescent-State and Grace-Period Tracking</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
- 1 unsigned long gp_seq;
- 2 unsigned long gp_seq_needed;
- 3 bool cpu_no_qs;
- 4 bool core_needs_qs;
- 5 bool gpwrap;
- 6 unsigned long rcu_qs_ctr_snap;
-</pre>
-
-<p>The <tt>-&gt;gp_seq</tt> and <tt>-&gt;gp_seq_needed</tt>
-fields are the counterparts of the fields of the same name
-in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
-They may each lag up to one behind their <tt>rcu_node</tt>
-counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
-<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
-arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
-will catch up upon exit from dyntick-idle mode).
-If the lower two bits of a given <tt>rcu_data</tt> structure's
-<tt>-&gt;gp_seq</tt> are zero, then this <tt>rcu_data</tt>
-structure believes that RCU is idle.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- All this replication of the grace period numbers can only cause
- massive confusion.
- Why not just keep a global sequence number and be done with it???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because if there was only a single global sequence
- numbers, there would need to be a single global lock to allow
- safely accessing and updating it.
- And if we are not going to have a single global lock, we need
- to carefully manage the numbers on a per-node basis.
- Recall from the answer to a previous Quick Quiz that the consequences
- of applying a previously sampled quiescent state to the wrong
- grace period are quite severe.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The <tt>-&gt;cpu_no_qs</tt> flag indicates that the
-CPU has not yet passed through a quiescent state,
-while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
-RCU core needs a quiescent state from the corresponding CPU.
-The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
-CPU has remained idle for so long that the
-<tt>gp_seq</tt> counter is in danger of overflow, which
-will cause the CPU to disregard the values of its counters on
-its next exit from idle.
-Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
-cases where a given operation has resulted in a quiescent state
-for all flavors of RCU, for example, <tt>cond_resched()</tt>
-when RCU has indicated a need for quiescent states.
-
-<h5>RCU Callback Handling</h5>
-
-<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
-the same CPU that registered them.
-This is strictly a cache-locality optimization: callbacks can and
-do get invoked on CPUs other than the one that registered them.
-After all, if the CPU that registered a given callback has gone
-offline before the callback can be invoked, there really is no other
-choice.
-
-</p><p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
- 1 struct rcu_segcblist cblist;
- 2 long qlen_last_fqs_check;
- 3 unsigned long n_cbs_invoked;
- 4 unsigned long n_nocbs_invoked;
- 5 unsigned long n_cbs_orphaned;
- 6 unsigned long n_cbs_adopted;
- 7 unsigned long n_force_qs_snap;
- 8 long blimit;
-</pre>
-
-<p>The <tt>-&gt;cblist</tt> structure is the segmented callback list
-described earlier.
-The CPU advances the callbacks in its <tt>rcu_data</tt> structure
-whenever it notices that another RCU grace period has completed.
-The CPU detects the completion of an RCU grace period by noticing
-that the value of its <tt>rcu_data</tt> structure's
-<tt>-&gt;gp_seq</tt> field differs from that of its leaf
-<tt>rcu_node</tt> structure.
-Recall that each <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field is updated at the beginnings and ends of each
-grace period.
-
-<p>
-The <tt>-&gt;qlen_last_fqs_check</tt> and
-<tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
-states from <tt>call_rcu()</tt> and friends when callback
-lists grow excessively long.
-
-</p><p>The <tt>-&gt;n_cbs_invoked</tt>,
-<tt>-&gt;n_cbs_orphaned</tt>, and <tt>-&gt;n_cbs_adopted</tt>
-fields count the number of callbacks invoked,
-sent to other CPUs when this CPU goes offline,
-and received from other CPUs when those other CPUs go offline.
-The <tt>-&gt;n_nocbs_invoked</tt> is used when the CPU's callbacks
-are offloaded to a kthread.
-
-<p>
-Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
-RCU callbacks that may be invoked at a given time.
-
-<h5>Dyntick-Idle Handling</h5>
-
-<p>This portion of the <tt>rcu_data</tt> structure is declared
-as follows:
-
-<pre>
- 1 int dynticks_snap;
- 2 unsigned long dynticks_fqs;
-</pre>
-
-The <tt>-&gt;dynticks_snap</tt> field is used to take a snapshot
-of the corresponding CPU's dyntick-idle state when forcing
-quiescent states, and is therefore accessed from other CPUs.
-Finally, the <tt>-&gt;dynticks_fqs</tt> field is used to
-count the number of times this CPU is determined to be in
-dyntick-idle state, and is used for tracing and debugging purposes.
-
-<h3><a name="The rcu_dynticks Structure">
-The <tt>rcu_dynticks</tt> Structure</a></h3>
-
-<p>The <tt>rcu_dynticks</tt> maintains the per-CPU dyntick-idle state
-for the corresponding CPU.
-Unlike the other structures, <tt>rcu_dynticks</tt> is not
-replicated over the different flavors of RCU.
-The fields in this structure may be accessed only from the corresponding
-CPU (and from tracing) unless otherwise stated.
-Its fields are as follows:
-
-<pre>
- 1 long dynticks_nesting;
- 2 long dynticks_nmi_nesting;
- 3 atomic_t dynticks;
- 4 bool rcu_need_heavy_qs;
- 5 unsigned long rcu_qs_ctr;
- 6 bool rcu_urgent_qs;
-</pre>
-
-<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
-nesting depth of process execution, so that in normal circumstances
-this counter has value zero or one.
-NMIs, irqs, and tracers are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
-field.
-Because NMIs cannot be masked, changes to this variable have to be
-undertaken carefully using an algorithm provided by Andy Lutomirski.
-The initial transition from idle adds one, and nested transitions
-add two, so that a nesting level of five is represented by a
-<tt>-&gt;dynticks_nmi_nesting</tt> value of nine.
-This counter can therefore be thought of as counting the number
-of reasons why this CPU cannot be permitted to enter dyntick-idle
-mode, aside from process-level transitions.
-
-<p>However, it turns out that when running in non-idle kernel context,
-the Linux kernel is fully capable of entering interrupt handlers that
-never exit and perhaps also vice versa.
-Therefore, whenever the <tt>-&gt;dynticks_nesting</tt> field is
-incremented up from zero, the <tt>-&gt;dynticks_nmi_nesting</tt> field
-is set to a large positive number, and whenever the
-<tt>-&gt;dynticks_nesting</tt> field is decremented down to zero,
-the the <tt>-&gt;dynticks_nmi_nesting</tt> field is set to zero.
-Assuming that the number of misnested interrupts is not sufficient
-to overflow the counter, this approach corrects the
-<tt>-&gt;dynticks_nmi_nesting</tt> field every time the corresponding
-CPU enters the idle loop from process context.
-
-</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
-CPU's transitions to and from either dyntick-idle or user mode, so
-that this counter has an even value when the CPU is in dyntick-idle
-mode or user mode and an odd value otherwise. The transitions to/from
-user mode need to be counted for user mode adaptive-ticks support
-(see timers/NO_HZ.txt).
-
-</p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
-to record the fact that the RCU core code would really like to
-see a quiescent state from the corresponding CPU, so much so that
-it is willing to call for heavy-weight dyntick-counter operations.
-This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
-code, which provide a momentary idle sojourn in response.
-
-</p><p>The <tt>-&gt;rcu_qs_ctr</tt> field is used to record
-quiescent states from <tt>cond_resched()</tt>.
-Because <tt>cond_resched()</tt> can execute quite frequently, this
-must be quite lightweight, as in a non-atomic increment of this
-per-CPU field.
-
-</p><p>Finally, the <tt>-&gt;rcu_urgent_qs</tt> field is used to record
-the fact that the RCU core code would really like to see a quiescent
-state from the corresponding CPU, with the various other fields indicating
-just how badly RCU wants this quiescent state.
-This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
-code, which, if nothing else, non-atomically increment <tt>-&gt;rcu_qs_ctr</tt>
-in response.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why not simply combine the <tt>-&gt;dynticks_nesting</tt>
- and <tt>-&gt;dynticks_nmi_nesting</tt> counters into a
- single counter that just counts the number of reasons that
- the corresponding CPU is non-idle?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because this would fail in the presence of interrupts whose
- handlers never return and of handlers that manage to return
- from a made-up interrupt.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>Additional fields are present for some special-purpose
-builds, and are discussed separately.
-
-<h3><a name="The rcu_head Structure">
-The <tt>rcu_head</tt> Structure</a></h3>
-
-<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
-These structures are normally embedded within RCU-protected data
-structures whose algorithms use asynchronous grace periods.
-In contrast, when using algorithms that block waiting for RCU grace periods,
-RCU users need not provide <tt>rcu_head</tt> structures.
-
-</p><p>The <tt>rcu_head</tt> structure has fields as follows:
-
-<pre>
- 1 struct rcu_head *next;
- 2 void (*func)(struct rcu_head *head);
-</pre>
-
-<p>The <tt>-&gt;next</tt> field is used
-to link the <tt>rcu_head</tt> structures together in the
-lists within the <tt>rcu_data</tt> structures.
-The <tt>-&gt;func</tt> field is a pointer to the function
-to be called when the callback is ready to be invoked, and
-this function is passed a pointer to the <tt>rcu_head</tt>
-structure.
-However, <tt>kfree_rcu()</tt> uses the <tt>-&gt;func</tt>
-field to record the offset of the <tt>rcu_head</tt>
-structure within the enclosing RCU-protected data structure.
-
-</p><p>Both of these fields are used internally by RCU.
-From the viewpoint of RCU users, this structure is an
-opaque &ldquo;cookie&rdquo;.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Given that the callback function <tt>-&gt;func</tt>
- is passed a pointer to the <tt>rcu_head</tt> structure,
- how is that function supposed to find the beginning of the
- enclosing RCU-protected data structure?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- In actual practice, there is a separate callback function per
- type of RCU-protected data structure.
- The callback function can therefore use the <tt>container_of()</tt>
- macro in the Linux kernel (or other pointer-manipulation facilities
- in other software environments) to find the beginning of the
- enclosing structure.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="RCU-Specific Fields in the task_struct Structure">
-RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
-
-<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
-additional fields in the <tt>task_struct</tt> structure:
-
-<pre>
- 1 #ifdef CONFIG_PREEMPT_RCU
- 2 int rcu_read_lock_nesting;
- 3 union rcu_special rcu_read_unlock_special;
- 4 struct list_head rcu_node_entry;
- 5 struct rcu_node *rcu_blocked_node;
- 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
- 7 #ifdef CONFIG_TASKS_RCU
- 8 unsigned long rcu_tasks_nvcsw;
- 9 bool rcu_tasks_holdout;
-10 struct list_head rcu_tasks_holdout_list;
-11 int rcu_tasks_idle_cpu;
-12 #endif /* #ifdef CONFIG_TASKS_RCU */
-</pre>
-
-<p>The <tt>-&gt;rcu_read_lock_nesting</tt> field records the
-nesting level for RCU read-side critical sections, and
-the <tt>-&gt;rcu_read_unlock_special</tt> field is a bitmask
-that records special conditions that require <tt>rcu_read_unlock()</tt>
-to do additional work.
-The <tt>-&gt;rcu_node_entry</tt> field is used to form lists of
-tasks that have blocked within preemptible-RCU read-side critical
-sections and the <tt>-&gt;rcu_blocked_node</tt> field references
-the <tt>rcu_node</tt> structure whose list this task is a member of,
-or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
-read-side critical section.
-
-<p>The <tt>-&gt;rcu_tasks_nvcsw</tt> field tracks the number of
-voluntary context switches that this task had undergone at the
-beginning of the current tasks-RCU grace period,
-<tt>-&gt;rcu_tasks_holdout</tt> is set if the current tasks-RCU
-grace period is waiting on this task, <tt>-&gt;rcu_tasks_holdout_list</tt>
-is a list element enqueuing this task on the holdout list,
-and <tt>-&gt;rcu_tasks_idle_cpu</tt> tracks which CPU this
-idle task is running, but only if the task is currently running,
-that is, if the CPU is currently idle.
-
-<h3><a name="Accessor Functions">
-Accessor Functions</a></h3>
-
-<p>The following listing shows the
-<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt> and
-<tt>rcu_for_each_leaf_node()</tt> function and macros:
-
-<pre>
- 1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
- 2 {
- 3 return &amp;rsp-&gt;node[0];
- 4 }
- 5
- 6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
- 7 for ((rnp) = &amp;(rsp)-&gt;node[0]; \
- 8 (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
- 9
- 10 #define rcu_for_each_leaf_node(rsp, rnp) \
- 11 for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
- 12 (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
-</pre>
-
-<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
-first element of the specified <tt>rcu_state</tt> structure's
-<tt>-&gt;node[]</tt> array, which is the root <tt>rcu_node</tt>
-structure.
-
-</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
-macro takes advantage of the layout of the <tt>rcu_node</tt>
-structures in the <tt>rcu_state</tt> structure's
-<tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
-simply traversing the array in order.
-Similarly, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
-the last part of the array, thus traversing only the leaf
-<tt>rcu_node</tt> structures.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- What does
- <tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
- contains only a single node?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- In the single-node case,
- <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Summary">
-Summary</a></h3>
-
-So each flavor of RCU is represented by an <tt>rcu_state</tt> structure,
-which contains a combining tree of <tt>rcu_node</tt> and
-<tt>rcu_data</tt> structures.
-Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
-state is tracked by an <tt>rcu_dynticks</tt> structure.
-
-If you made it this far, you are well prepared to read the code
-walkthroughs in the other articles in this series.
-
-<h3><a name="Acknowledgments">
-Acknowledgments</a></h3>
-
-I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
-Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
-for helping me get this document into a more human-readable state.
-
-<h3><a name="Legal Statement">
-Legal Statement</a></h3>
-
-<p>This work represents the view of the author and does not necessarily
-represent the view of IBM.
-
-</p><p>Linux is a registered trademark of Linus Torvalds.
-
-</p><p>Other company, product, and service names may be trademarks or
-service marks of others.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
new file mode 100644
index 000000000000..1b0aad184dd7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -0,0 +1,1196 @@
+===================================================
+A Tour Through TREE_RCU's Data Structures [LWN.net]
+===================================================
+
+December 18, 2016
+
+This article was contributed by Paul E. McKenney
+
+Introduction
+============
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+Data-Structure Relationships
+============================
+
+RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+.. kernel-figure:: BigTreeClassicRCU.svg
+
+This diagram shows an enclosing ``rcu_state`` structure containing a tree
+of ``rcu_node`` structures. Each leaf node of the ``rcu_node`` tree has up
+to 16 ``rcu_data`` structures associated with it, so that there are
+``NR_CPUS`` number of ``rcu_data`` structures, one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the common
+case where ``nr_cpu_ids`` is much less than ``NR_CPUs``.
+For example, a number of Linux distributions set ``NR_CPUs=4096``,
+which results in a three-level ``rcu_node`` tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an ``rcu_node`` tree with only a single node.
+
+The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU ``rcu_data`` structures,
+and other events are recorded by the leaf-level ``rcu_node``
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root ``rcu_node``
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of ``CONFIG_PREEMPT_RCU``, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why isn't the fanout at the leaves also 64? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because there are more types of events that affect the leaf-level |
+| ``rcu_node`` structures than further up the tree. Therefore, if the |
+| leaf ``rcu_node`` structures have fanout of 64, the contention on |
+| these structures' ``->structures`` becomes excessive. Experimentation |
+| on a wide variety of systems has shown that a fanout of 16 works well |
+| for the leaves of the ``rcu_node`` tree. |
+| |
+| Of course, further experience with systems having hundreds or |
+| thousands of CPUs may demonstrate that the fanout for the non-leaf |
+| ``rcu_node`` structures must also be reduced. Such reduction can be |
+| easily carried out when and if it proves necessary. In the meantime, |
+| if you are using such a system and running into contention problems |
+| on the non-leaf ``rcu_node`` structures, you may use the |
+| ``CONFIG_RCU_FANOUT`` kernel configuration parameter to reduce the |
+| non-leaf fanout as needed. |
+| |
+| Kernels built for systems with strong NUMA characteristics might |
+| also need to adjust ``CONFIG_RCU_FANOUT`` so that the domains of |
+| the ``rcu_node`` structures align with hardware boundaries. |
+| However, there has thus far been no need for this. |
++-----------------------------------------------------------------------+
+
+If your system has more than 1,024 CPUs (or more than 512 CPUs on a
+32-bit system), then RCU will automatically add more levels to the tree.
+For example, if you are crazy enough to build a 64-bit system with
+65,536 CPUs, RCU would configure the ``rcu_node`` tree as follows:
+
+.. kernel-figure:: HugeTreeClassicRCU.svg
+
+RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems. On the other hand, you can set both
+``CONFIG_RCU_FANOUT`` and ``CONFIG_RCU_FANOUT_LEAF`` to be as small as
+2, which would result in a 16-CPU test using a 4-level tree. This can be
+useful for testing large-system capabilities on small test machines.
+
+This multi-level combining tree allows us to get most of the performance
+and scalability benefits of partitioning, even though RCU grace-period
+detection is inherently a global operation. The trick here is that only
+the last CPU to report a quiescent state into a given ``rcu_node``
+structure need advance to the ``rcu_node`` structure at the next level
+up the tree. This means that at the leaf-level ``rcu_node`` structure,
+only one access out of sixteen will progress up the tree. For the
+internal ``rcu_node`` structures, the situation is even more extreme:
+Only one access out of sixty-four will progress up the tree. Because the
+vast majority of the CPUs do not progress up the tree, the lock
+contention remains roughly constant up the tree. No matter how many CPUs
+there are in the system, at most 64 quiescent-state reports per grace
+period will progress all the way to the root ``rcu_node`` structure,
+thus ensuring that the lock contention on that root ``rcu_node``
+structure remains acceptably low.
+
+In effect, the combining tree acts like a big shock absorber, keeping
+lock contention under control at all tree levels regardless of the level
+of loading on the system.
+
+RCU updaters wait for normal grace periods by registering RCU callbacks,
+either directly via ``call_rcu()`` or indirectly via
+``synchronize_rcu()`` and friends. RCU callbacks are represented by
+``rcu_head`` structures, which are queued on ``rcu_data`` structures
+while they are waiting for a grace period to elapse, as shown in the
+following figure:
+
+.. kernel-figure:: BigTreePreemptRCUBHdyntickCB.svg
+
+This figure shows how ``TREE_RCU``'s and ``PREEMPT_RCU``'s major data
+structures are related. Lesser data structures will be introduced with
+the algorithms that make use of them.
+
+Note that each of the data structures in the above figure has its own
+synchronization:
+
+#. Each ``rcu_state`` structures has a lock and a mutex, and some fields
+ are protected by the corresponding root ``rcu_node`` structure's lock.
+#. Each ``rcu_node`` structure has a spinlock.
+#. The fields in ``rcu_data`` are private to the corresponding CPU,
+ although a few can be read and written by other CPUs.
+
+It is important to note that different data structures can have very
+different ideas about the state of RCU at any given time. For but one
+example, awareness of the start or end of a given RCU grace period
+propagates slowly through the data structures. This slow propagation is
+absolutely necessary for RCU to have good read-side performance. If this
+balkanized implementation seems foreign to you, one useful trick is to
+consider each instance of these data structures to be a different
+person, each having the usual slightly different view of reality.
+
+The general role of each of these data structures is as follows:
+
+#. ``rcu_state``: This structure forms the interconnection between the
+ ``rcu_node`` and ``rcu_data`` structures, tracks grace periods,
+ serves as short-term repository for callbacks orphaned by CPU-hotplug
+ events, maintains ``rcu_barrier()`` state, tracks expedited
+ grace-period state, and maintains state used to force quiescent
+ states when grace periods extend too long,
+#. ``rcu_node``: This structure forms the combining tree that propagates
+ quiescent-state information from the leaves to the root, and also
+ propagates grace-period information from the root to the leaves. It
+ provides local copies of the grace-period state in order to allow
+ this information to be accessed in a synchronized manner without
+ suffering the scalability limitations that would otherwise be imposed
+ by global locking. In ``CONFIG_PREEMPT_RCU`` kernels, it manages the
+ lists of tasks that have blocked while in their current RCU read-side
+ critical section. In ``CONFIG_PREEMPT_RCU`` with
+ ``CONFIG_RCU_BOOST``, it manages the per-\ ``rcu_node``
+ priority-boosting kernel threads (kthreads) and state. Finally, it
+ records CPU-hotplug state in order to determine which CPUs should be
+ ignored during a given grace period.
+#. ``rcu_data``: This per-CPU structure is the focus of quiescent-state
+ detection and RCU callback queuing. It also tracks its relationship
+ to the corresponding leaf ``rcu_node`` structure to allow
+ more-efficient propagation of quiescent states up the ``rcu_node``
+ combining tree. Like the ``rcu_node`` structure, it provides a local
+ copy of the grace-period information to allow for-free synchronized
+ access to this information from the corresponding CPU. Finally, this
+ structure records past dyntick-idle state for the corresponding CPU
+ and also tracks statistics.
+#. ``rcu_head``: This structure represents RCU callbacks, and is the
+ only structure allocated and managed by RCU users. The ``rcu_head``
+ structure is normally embedded within the RCU-protected data
+ structure.
+
+If all you wanted from this article was a general notion of how RCU's
+data structures are related, you are done. Otherwise, each of the
+following sections give more details on the ``rcu_state``, ``rcu_node``
+and ``rcu_data`` data structures.
+
+The ``rcu_state`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_state`` structure is the base structure that represents the
+state of RCU in the system. This structure forms the interconnection
+between the ``rcu_node`` and ``rcu_data`` structures, tracks grace
+periods, contains the lock used to synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when grace periods
+extend too long,
+
+A few of the ``rcu_state`` structure's fields are discussed, singly and
+in groups, in the following sections. The more specialized fields are
+covered in the discussion of their use.
+
+Relationship to rcu_node and rcu_data Structures
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+ 1 struct rcu_node node[NUM_RCU_NODES];
+ 2 struct rcu_node *level[NUM_RCU_LVLS + 1];
+ 3 struct rcu_data __percpu *rda;
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Wait a minute! You said that the ``rcu_node`` structures formed a |
+| tree, but they are declared as a flat array! What gives? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| The tree is laid out in the array. The first node In the array is the |
+| head, the next set of nodes in the array are children of the head |
+| node, and so on until the last set of nodes in the array are the |
+| leaves. |
+| See the following diagrams to see how this works. |
++-----------------------------------------------------------------------+
+
+The ``rcu_node`` tree is embedded into the ``->node[]`` array as shown
+in the following figure:
+
+.. kernel-figure:: TreeMapping.svg
+
+One interesting consequence of this mapping is that a breadth-first
+traversal of the tree is implemented as a simple linear scan of the
+array, which is in fact what the ``rcu_for_each_node_breadth_first()``
+macro does. This macro is used at the beginning and ends of grace
+periods.
+
+Each entry of the ``->level`` array references the first ``rcu_node``
+structure on the corresponding level of the tree, for example, as shown
+below:
+
+.. kernel-figure:: TreeMappingLevel.svg
+
+The zero\ :sup:`th` element of the array references the root
+``rcu_node`` structure, the first element references the first child of
+the root ``rcu_node``, and finally the second element references the
+first leaf ``rcu_node`` structure.
+
+For whatever it is worth, if you draw the tree to be tree-shaped rather
+than array-shaped, it is easy to draw a planar representation:
+
+.. kernel-figure:: TreeLevel.svg
+
+Finally, the ``->rda`` field references a per-CPU pointer to the
+corresponding CPU's ``rcu_data`` structure.
+
+All of these fields are constant once initialization is complete, and
+therefore need no protection.
+
+Grace-Period Tracking
+'''''''''''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+ 1 unsigned long gp_seq;
+
+RCU grace periods are numbered, and the ``->gp_seq`` field contains the
+current grace-period sequence number. The bottom two bits are the state
+of the current grace period, which can be zero for not yet started or
+one for in progress. In other words, if the bottom two bits of
+``->gp_seq`` are zero, then RCU is idle. Any other value in the bottom
+two bits indicates that something is broken. This field is protected by
+the root ``rcu_node`` structure's ``->lock`` field.
+
+There are ``->gp_seq`` fields in the ``rcu_node`` and ``rcu_data``
+structures as well. The fields in the ``rcu_state`` structure represent
+the most current value, and those of the other structures are compared
+in order to detect the beginnings and ends of grace periods in a
+distributed fashion. The values flow from ``rcu_state`` to ``rcu_node``
+(down the tree from the root to the leaves) to ``rcu_data``.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Given that the root rcu_node structure has a gp_seq field, |
+| why does RCU maintain a separate gp_seq in the rcu_state structure? |
+| Why not just use the root rcu_node's gp_seq as the official record |
+| and update it directly when starting a new grace period? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| On single-node RCU trees (where the root node is also a leaf), |
+| updating the root node's gp_seq immediately would create unnecessary |
+| lock contention. Here's why: |
+| |
+| If we did rcu_seq_start() directly on the root node's gp_seq: |
+| |
+| 1. All CPUs would immediately see their node's gp_seq from their rdp's|
+| gp_seq, in rcu_pending(). They would all then invoke the RCU-core. |
+| 2. Which calls note_gp_changes() and try to acquire the node lock. |
+| 3. But rnp->qsmask isn't initialized yet (happens later in |
+| rcu_gp_init()) |
+| 4. So each CPU would acquire the lock, find it can't determine if it |
+| needs to report quiescent state (no qsmask), update rdp->gp_seq, |
+| and release the lock. |
+| 5. Result: Lots of lock acquisitions with no grace period progress |
+| |
+| By having a separate rcu_state.gp_seq, we can increment the official |
+| grace period counter without immediately affecting what CPUs see in |
+| their nodes. The hierarchical propagation in rcu_gp_init() then |
+| updates the root node's gp_seq and qsmask together under the same lock|
+| acquisition, avoiding this useless contention. |
++-----------------------------------------------------------------------+
+
+Miscellaneous
+'''''''''''''
+
+This portion of the ``rcu_state`` structure is declared as follows:
+
+::
+
+ 1 unsigned long gp_max;
+ 2 char abbr;
+ 3 char *name;
+
+The ``->gp_max`` field tracks the duration of the longest grace period
+in jiffies. It is protected by the root ``rcu_node``'s ``->lock``.
+
+The ``->name`` and ``->abbr`` fields distinguish between preemptible RCU
+(“rcu_preempt” and “p”) and non-preemptible RCU (“rcu_sched” and “s”).
+These fields are used for diagnostic and tracing purposes.
+
+The ``rcu_node`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_node`` structures form the combining tree that propagates
+quiescent-state information from the leaves to the root and also that
+propagates grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order to allow
+this information to be accessed in a synchronized manner without
+suffering the scalability limitations that would otherwise be imposed by
+global locking. In ``CONFIG_PREEMPT_RCU`` kernels, they manage the lists
+of tasks that have blocked while in their current RCU read-side critical
+section. In ``CONFIG_PREEMPT_RCU`` with ``CONFIG_RCU_BOOST``, they
+manage the per-\ ``rcu_node`` priority-boosting kernel threads
+(kthreads) and state. Finally, they record CPU-hotplug state in order to
+determine which CPUs should be ignored during a given grace period.
+
+The ``rcu_node`` structure's fields are discussed, singly and in groups,
+in the following sections.
+
+Connection to Combining Tree
+''''''''''''''''''''''''''''
+
+This portion of the ``rcu_node`` structure is declared as follows:
+
+::
+
+ 1 struct rcu_node *parent;
+ 2 u8 level;
+ 3 u8 grpnum;
+ 4 unsigned long grpmask;
+ 5 int grplo;
+ 6 int grphi;
+
+The ``->parent`` pointer references the ``rcu_node`` one level up in the
+tree, and is ``NULL`` for the root ``rcu_node``. The RCU implementation
+makes heavy use of this field to push quiescent states up the tree. The
+``->level`` field gives the level in the tree, with the root being at
+level zero, its children at level one, and so on. The ``->grpnum`` field
+gives this node's position within the children of its parent, so this
+number can range between 0 and 31 on 32-bit systems and between 0 and 63
+on 64-bit systems. The ``->level`` and ``->grpnum`` fields are used only
+during initialization and for tracing. The ``->grpmask`` field is the
+bitmask counterpart of ``->grpnum``, and therefore always has exactly
+one bit set. This mask is used to clear the bit corresponding to this
+``rcu_node`` structure in its parent's bitmasks, which are described
+later. Finally, the ``->grplo`` and ``->grphi`` fields contain the
+lowest and highest numbered CPU served by this ``rcu_node`` structure,
+respectively.
+
+All of these fields are constant, and thus do not require any
+synchronization.
+
+Synchronization
+'''''''''''''''
+
+This field of the ``rcu_node`` structure is declared as follows:
+
+::
+
+ 1 raw_spinlock_t lock;
+
+This field is used to protect the remaining fields in this structure,
+unless otherwise stated. That said, all of the fields in this structure
+can be accessed without locking for tracing purposes. Yes, this can
+result in confusing traces, but better some tracing confusion than to be
+heisenbugged out of existence.
+
+.. _grace-period-tracking-1:
+
+Grace-Period Tracking
+'''''''''''''''''''''
+
+This portion of the ``rcu_node`` structure is declared as follows:
+
+::
+
+ 1 unsigned long gp_seq;
+ 2 unsigned long gp_seq_needed;
+
+The ``rcu_node`` structures' ``->gp_seq`` fields are the counterparts of
+the field of the same name in the ``rcu_state`` structure. They each may
+lag up to one step behind their ``rcu_state`` counterpart. If the bottom
+two bits of a given ``rcu_node`` structure's ``->gp_seq`` field is zero,
+then this ``rcu_node`` structure believes that RCU is idle.
+
+The ``>gp_seq`` field of each ``rcu_node`` structure is updated at the
+beginning and the end of each grace period.
+
+The ``->gp_seq_needed`` fields record the furthest-in-the-future grace
+period request seen by the corresponding ``rcu_node`` structure. The
+request is considered fulfilled when the value of the ``->gp_seq`` field
+equals or exceeds that of the ``->gp_seq_needed`` field.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Suppose that this ``rcu_node`` structure doesn't see a request for a |
+| very long time. Won't wrapping of the ``->gp_seq`` field cause |
+| problems? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| No, because if the ``->gp_seq_needed`` field lags behind the |
+| ``->gp_seq`` field, the ``->gp_seq_needed`` field will be updated at |
+| the end of the grace period. Modulo-arithmetic comparisons therefore |
+| will always get the correct answer, even with wrapping. |
++-----------------------------------------------------------------------+
+
+Quiescent-State Tracking
+''''''''''''''''''''''''
+
+These fields manage the propagation of quiescent states up the combining
+tree.
+
+This portion of the ``rcu_node`` structure has fields as follows:
+
+::
+
+ 1 unsigned long qsmask;
+ 2 unsigned long expmask;
+ 3 unsigned long qsmaskinit;
+ 4 unsigned long expmaskinit;
+
+The ``->qsmask`` field tracks which of this ``rcu_node`` structure's
+children still need to report quiescent states for the current normal
+grace period. Such children will have a value of 1 in their
+corresponding bit. Note that the leaf ``rcu_node`` structures should be
+thought of as having ``rcu_data`` structures as their children.
+Similarly, the ``->expmask`` field tracks which of this ``rcu_node``
+structure's children still need to report quiescent states for the
+current expedited grace period. An expedited grace period has the same
+conceptual properties as a normal grace period, but the expedited
+implementation accepts extreme CPU overhead to obtain much lower
+grace-period latency, for example, consuming a few tens of microseconds
+worth of CPU time to reduce grace-period duration from milliseconds to
+tens of microseconds. The ``->qsmaskinit`` field tracks which of this
+``rcu_node`` structure's children cover for at least one online CPU.
+This mask is used to initialize ``->qsmask``, and ``->expmaskinit`` is
+used to initialize ``->expmask`` and the beginning of the normal and
+expedited grace periods, respectively.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why are these bitmasks protected by locking? Come on, haven't you |
+| heard of atomic instructions??? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Lockless grace-period computation! Such a tantalizing possibility! |
+| But consider the following sequence of events: |
+| |
+| #. CPU 0 has been in dyntick-idle mode for quite some time. When it |
+| wakes up, it notices that the current RCU grace period needs it to |
+| report in, so it sets a flag where the scheduling clock interrupt |
+| will find it. |
+| #. Meanwhile, CPU 1 is running ``force_quiescent_state()``, and |
+| notices that CPU 0 has been in dyntick idle mode, which qualifies |
+| as an extended quiescent state. |
+| #. CPU 0's scheduling clock interrupt fires in the middle of an RCU |
+| read-side critical section, and notices that the RCU core needs |
+| something, so commences RCU softirq processing. |
+| #. CPU 0's softirq handler executes and is just about ready to report |
+| its quiescent state up the ``rcu_node`` tree. |
+| #. But CPU 1 beats it to the punch, completing the current grace |
+| period and starting a new one. |
+| #. CPU 0 now reports its quiescent state for the wrong grace period. |
+| That grace period might now end before the RCU read-side critical |
+| section. If that happens, disaster will ensue. |
+| |
+| So the locking is absolutely required in order to coordinate clearing |
+| of the bits with updating of the grace-period sequence number in |
+| ``->gp_seq``. |
++-----------------------------------------------------------------------+
+
+Blocked-Task Management
+'''''''''''''''''''''''
+
+``PREEMPT_RCU`` allows tasks to be preempted in the midst of their RCU
+read-side critical sections, and these tasks must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered in a
+separate article on RCU read-side processing. For now, it is enough to
+know that the ``rcu_node`` structure tracks them.
+
+::
+
+ 1 struct list_head blkd_tasks;
+ 2 struct list_head *gp_tasks;
+ 3 struct list_head *exp_tasks;
+ 4 bool wait_blkd_tasks;
+
+The ``->blkd_tasks`` field is a list header for the list of blocked and
+preempted tasks. As tasks undergo context switches within RCU read-side
+critical sections, their ``task_struct`` structures are enqueued (via
+the ``task_struct``'s ``->rcu_node_entry`` field) onto the head of the
+``->blkd_tasks`` list for the leaf ``rcu_node`` structure corresponding
+to the CPU on which the outgoing context switch executed. As these tasks
+later exit their RCU read-side critical sections, they remove themselves
+from the list. This list is therefore in reverse time order, so that if
+one of the tasks is blocking the current grace period, all subsequent
+tasks must also be blocking that same grace period. Therefore, a single
+pointer into this list suffices to track all tasks blocking a given
+grace period. That pointer is stored in ``->gp_tasks`` for normal grace
+periods and in ``->exp_tasks`` for expedited grace periods. These last
+two fields are ``NULL`` if either there is no grace period in flight or
+if there are no blocked tasks preventing that grace period from
+completing. If either of these two pointers is referencing a task that
+removes itself from the ``->blkd_tasks`` list, then that task must
+advance the pointer to the next task on the list, or set the pointer to
+``NULL`` if there are no subsequent tasks on the list.
+
+For example, suppose that tasks T1, T2, and T3 are all hard-affinitied
+to the largest-numbered CPU in the system. Then if task T1 blocked in an
+RCU read-side critical section, then an expedited grace period started,
+then task T2 blocked in an RCU read-side critical section, then a normal
+grace period started, and finally task 3 blocked in an RCU read-side
+critical section, then the state of the last leaf ``rcu_node``
+structure's blocked-task list would be as shown below:
+
+.. kernel-figure:: blkd_task.svg
+
+Task T1 is blocking both grace periods, task T2 is blocking only the
+normal grace period, and task T3 is blocking neither grace period. Note
+that these tasks will not remove themselves from this list immediately
+upon resuming execution. They will instead remain on the list until they
+execute the outermost ``rcu_read_unlock()`` that ends their RCU
+read-side critical section.
+
+The ``->wait_blkd_tasks`` field indicates whether or not the current
+grace period is waiting on a blocked task.
+
+Sizing the ``rcu_node`` Array
+'''''''''''''''''''''''''''''
+
+The ``rcu_node`` array is sized via a series of C-preprocessor
+expressions as follows:
+
+::
+
+ 1 #ifdef CONFIG_RCU_FANOUT
+ 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+ 3 #else
+ 4 # ifdef CONFIG_64BIT
+ 5 # define RCU_FANOUT 64
+ 6 # else
+ 7 # define RCU_FANOUT 32
+ 8 # endif
+ 9 #endif
+ 10
+ 11 #ifdef CONFIG_RCU_FANOUT_LEAF
+ 12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+ 13 #else
+ 14 # ifdef CONFIG_64BIT
+ 15 # define RCU_FANOUT_LEAF 64
+ 16 # else
+ 17 # define RCU_FANOUT_LEAF 32
+ 18 # endif
+ 19 #endif
+ 20
+ 21 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
+ 22 #define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT)
+ 23 #define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT)
+ 24 #define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT)
+ 25
+ 26 #if NR_CPUS <= RCU_FANOUT_1
+ 27 # define RCU_NUM_LVLS 1
+ 28 # define NUM_RCU_LVL_0 1
+ 29 # define NUM_RCU_NODES NUM_RCU_LVL_0
+ 30 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
+ 31 # define RCU_NODE_NAME_INIT { "rcu_node_0" }
+ 32 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
+ 33 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
+ 34 #elif NR_CPUS <= RCU_FANOUT_2
+ 35 # define RCU_NUM_LVLS 2
+ 36 # define NUM_RCU_LVL_0 1
+ 37 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+ 38 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+ 39 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+ 40 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
+ 41 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+ 42 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
+ 43 #elif NR_CPUS <= RCU_FANOUT_3
+ 44 # define RCU_NUM_LVLS 3
+ 45 # define NUM_RCU_LVL_0 1
+ 46 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+ 47 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+ 48 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+ 49 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+ 50 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+ 51 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+ 52 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+ 53 #elif NR_CPUS <= RCU_FANOUT_4
+ 54 # define RCU_NUM_LVLS 4
+ 55 # define NUM_RCU_LVL_0 1
+ 56 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+ 57 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+ 58 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+ 59 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+ 60 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+ 61 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+ 62 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+ 63 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+ 64 #else
+ 65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+ 66 #endif
+
+The maximum number of levels in the ``rcu_node`` structure is currently
+limited to four, as specified by lines 21-24 and the structure of the
+subsequent “if” statement. For 32-bit systems, this allows
+16*32*32*32=524,288 CPUs, which should be sufficient for the next few
+years at least. For 64-bit systems, 16*64*64*64=4,194,304 CPUs is
+allowed, which should see us through the next decade or so. This
+four-level tree also allows kernels built with ``CONFIG_RCU_FANOUT=8``
+to support up to 4096 CPUs, which might be useful in very large systems
+having eight CPUs per socket (but please note that no one has yet shown
+any measurable performance degradation due to misaligned socket and
+``rcu_node`` boundaries). In addition, building kernels with a full four
+levels of ``rcu_node`` tree permits better testing of RCU's
+combining-tree code.
+
+The ``RCU_FANOUT`` symbol controls how many children are permitted at
+each non-leaf level of the ``rcu_node`` tree. If the
+``CONFIG_RCU_FANOUT`` Kconfig option is not specified, it is set based
+on the word size of the system, which is also the Kconfig default.
+
+The ``RCU_FANOUT_LEAF`` symbol controls how many CPUs are handled by
+each leaf ``rcu_node`` structure. Experience has shown that allowing a
+given leaf ``rcu_node`` structure to handle 64 CPUs, as permitted by the
+number of bits in the ``->qsmask`` field on a 64-bit system, results in
+excessive contention for the leaf ``rcu_node`` structures' ``->lock``
+fields. The number of CPUs per leaf ``rcu_node`` structure is therefore
+limited to 16 given the default value of ``CONFIG_RCU_FANOUT_LEAF``. If
+``CONFIG_RCU_FANOUT_LEAF`` is unspecified, the value selected is based
+on the word size of the system, just as for ``CONFIG_RCU_FANOUT``.
+Lines 11-19 perform this computation.
+
+Lines 21-24 compute the maximum number of CPUs supported by a
+single-level (which contains a single ``rcu_node`` structure),
+two-level, three-level, and four-level ``rcu_node`` tree, respectively,
+given the fanout specified by ``RCU_FANOUT`` and ``RCU_FANOUT_LEAF``.
+These numbers of CPUs are retained in the ``RCU_FANOUT_1``,
+``RCU_FANOUT_2``, ``RCU_FANOUT_3``, and ``RCU_FANOUT_4`` C-preprocessor
+variables, respectively.
+
+These variables are used to control the C-preprocessor ``#if`` statement
+spanning lines 26-66 that computes the number of ``rcu_node`` structures
+required for each level of the tree, as well as the number of levels
+required. The number of levels is placed in the ``NUM_RCU_LVLS``
+C-preprocessor variable by lines 27, 35, 44, and 54. The number of
+``rcu_node`` structures for the topmost level of the tree is always
+exactly one, and this value is unconditionally placed into
+``NUM_RCU_LVL_0`` by lines 28, 36, 45, and 55. The rest of the levels
+(if any) of the ``rcu_node`` tree are computed by dividing the maximum
+number of CPUs by the fanout supported by the number of levels from the
+current level down, rounding up. This computation is performed by
+lines 37, 46-47, and 56-58. Lines 31-33, 40-42, 50-52, and 62-63 create
+initializers for lockdep lock-class names. Finally, lines 64-66 produce
+an error if the maximum number of CPUs is too large for the specified
+fanout.
+
+The ``rcu_segcblist`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_segcblist`` structure maintains a segmented list of callbacks
+as follows:
+
+::
+
+ 1 #define RCU_DONE_TAIL 0
+ 2 #define RCU_WAIT_TAIL 1
+ 3 #define RCU_NEXT_READY_TAIL 2
+ 4 #define RCU_NEXT_TAIL 3
+ 5 #define RCU_CBLIST_NSEGS 4
+ 6
+ 7 struct rcu_segcblist {
+ 8 struct rcu_head *head;
+ 9 struct rcu_head **tails[RCU_CBLIST_NSEGS];
+ 10 unsigned long gp_seq[RCU_CBLIST_NSEGS];
+ 11 long len;
+ 12 long len_lazy;
+ 13 };
+
+The segments are as follows:
+
+#. ``RCU_DONE_TAIL``: Callbacks whose grace periods have elapsed. These
+ callbacks are ready to be invoked.
+#. ``RCU_WAIT_TAIL``: Callbacks that are waiting for the current grace
+ period. Note that different CPUs can have different ideas about which
+ grace period is current, hence the ``->gp_seq`` field.
+#. ``RCU_NEXT_READY_TAIL``: Callbacks waiting for the next grace period
+ to start.
+#. ``RCU_NEXT_TAIL``: Callbacks that have not yet been associated with a
+ grace period.
+
+The ``->head`` pointer references the first callback or is ``NULL`` if
+the list contains no callbacks (which is *not* the same as being empty).
+Each element of the ``->tails[]`` array references the ``->next``
+pointer of the last callback in the corresponding segment of the list,
+or the list's ``->head`` pointer if that segment and all previous
+segments are empty. If the corresponding segment is empty but some
+previous segment is not empty, then the array element is identical to
+its predecessor. Older callbacks are closer to the head of the list, and
+new callbacks are added at the tail. This relationship between the
+``->head`` pointer, the ``->tails[]`` array, and the callbacks is shown
+in this diagram:
+
+.. kernel-figure:: nxtlist.svg
+
+In this figure, the ``->head`` pointer references the first RCU callback
+in the list. The ``->tails[RCU_DONE_TAIL]`` array element references the
+``->head`` pointer itself, indicating that none of the callbacks is
+ready to invoke. The ``->tails[RCU_WAIT_TAIL]`` array element references
+callback CB 2's ``->next`` pointer, which indicates that CB 1 and CB 2
+are both waiting on the current grace period, give or take possible
+disagreements about exactly which grace period is the current one. The
+``->tails[RCU_NEXT_READY_TAIL]`` array element references the same RCU
+callback that ``->tails[RCU_WAIT_TAIL]`` does, which indicates that
+there are no callbacks waiting on the next RCU grace period. The
+``->tails[RCU_NEXT_TAIL]`` array element references CB 4's ``->next``
+pointer, indicating that all the remaining RCU callbacks have not yet
+been assigned to an RCU grace period. Note that the
+``->tails[RCU_NEXT_TAIL]`` array element always references the last RCU
+callback's ``->next`` pointer unless the callback list is empty, in
+which case it references the ``->head`` pointer.
+
+There is one additional important special case for the
+``->tails[RCU_NEXT_TAIL]`` array element: It can be ``NULL`` when this
+list is *disabled*. Lists are disabled when the corresponding CPU is
+offline or when the corresponding CPU's callbacks are offloaded to a
+kthread, both of which are described elsewhere.
+
+CPUs advance their callbacks from the ``RCU_NEXT_TAIL`` to the
+``RCU_NEXT_READY_TAIL`` to the ``RCU_WAIT_TAIL`` to the
+``RCU_DONE_TAIL`` list segments as grace periods advance.
+
+The ``->gp_seq[]`` array records grace-period numbers corresponding to
+the list segments. This is what allows different CPUs to have different
+ideas as to which is the current grace period while still avoiding
+premature invocation of their callbacks. In particular, this allows CPUs
+that go idle for extended periods to determine which of their callbacks
+are ready to be invoked after reawakening.
+
+The ``->len`` counter contains the number of callbacks in ``->head``,
+and the ``->len_lazy`` contains the number of those callbacks that are
+known to only free memory, and whose invocation can therefore be safely
+deferred.
+
+.. important::
+
+ It is the ``->len`` field that determines whether or
+ not there are callbacks associated with this ``rcu_segcblist``
+ structure, *not* the ``->head`` pointer. The reason for this is that all
+ the ready-to-invoke callbacks (that is, those in the ``RCU_DONE_TAIL``
+ segment) are extracted all at once at callback-invocation time
+ (``rcu_do_batch``), due to which ``->head`` may be set to NULL if there
+ are no not-done callbacks remaining in the ``rcu_segcblist``. If
+ callback invocation must be postponed, for example, because a
+ high-priority process just woke up on this CPU, then the remaining
+ callbacks are placed back on the ``RCU_DONE_TAIL`` segment and
+ ``->head`` once again points to the start of the segment. In short, the
+ head field can briefly be ``NULL`` even though the CPU has callbacks
+ present the entire time. Therefore, it is not appropriate to test the
+ ``->head`` pointer for ``NULL``.
+
+In contrast, the ``->len`` and ``->len_lazy`` counts are adjusted only
+after the corresponding callbacks have been invoked. This means that the
+``->len`` count is zero only if the ``rcu_segcblist`` structure really
+is devoid of callbacks. Of course, off-CPU sampling of the ``->len``
+count requires careful use of appropriate synchronization, for example,
+memory barriers. This synchronization can be a bit subtle, particularly
+in the case of ``rcu_barrier()``.
+
+The ``rcu_data`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rcu_data`` maintains the per-CPU state for the RCU subsystem. The
+fields in this structure may be accessed only from the corresponding CPU
+(and from tracing) unless otherwise stated. This structure is the focus
+of quiescent-state detection and RCU callback queuing. It also tracks
+its relationship to the corresponding leaf ``rcu_node`` structure to
+allow more-efficient propagation of quiescent states up the ``rcu_node``
+combining tree. Like the ``rcu_node`` structure, it provides a local
+copy of the grace-period information to allow for-free synchronized
+access to this information from the corresponding CPU. Finally, this
+structure records past dyntick-idle state for the corresponding CPU and
+also tracks statistics.
+
+The ``rcu_data`` structure's fields are discussed, singly and in groups,
+in the following sections.
+
+Connection to Other Data Structures
+'''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+ 1 int cpu;
+ 2 struct rcu_node *mynode;
+ 3 unsigned long grpmask;
+ 4 bool beenonline;
+
+The ``->cpu`` field contains the number of the corresponding CPU and the
+``->mynode`` field references the corresponding ``rcu_node`` structure.
+The ``->mynode`` is used to propagate quiescent states up the combining
+tree. These two fields are constant and therefore do not require
+synchronization.
+
+The ``->grpmask`` field indicates the bit in the ``->mynode->qsmask``
+corresponding to this ``rcu_data`` structure, and is also used when
+propagating quiescent states. The ``->beenonline`` flag is set whenever
+the corresponding CPU comes online, which means that the debugfs tracing
+need not dump out any ``rcu_data`` structure for which this flag is not
+set.
+
+Quiescent-State and Grace-Period Tracking
+'''''''''''''''''''''''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+ 1 unsigned long gp_seq;
+ 2 unsigned long gp_seq_needed;
+ 3 bool cpu_no_qs;
+ 4 bool core_needs_qs;
+ 5 bool gpwrap;
+
+The ``->gp_seq`` field is the counterpart of the field of the same name
+in the ``rcu_state`` and ``rcu_node`` structures. The
+``->gp_seq_needed`` field is the counterpart of the field of the same
+name in the rcu_node structure. They may each lag up to one behind their
+``rcu_node`` counterparts, but in ``CONFIG_NO_HZ_IDLE`` and
+``CONFIG_NO_HZ_FULL`` kernels can lag arbitrarily far behind for CPUs in
+dyntick-idle mode (but these counters will catch up upon exit from
+dyntick-idle mode). If the lower two bits of a given ``rcu_data``
+structure's ``->gp_seq`` are zero, then this ``rcu_data`` structure
+believes that RCU is idle.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| All this replication of the grace period numbers can only cause |
+| massive confusion. Why not just keep a global sequence number and be |
+| done with it??? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because if there was only a single global sequence numbers, there |
+| would need to be a single global lock to allow safely accessing and |
+| updating it. And if we are not going to have a single global lock, we |
+| need to carefully manage the numbers on a per-node basis. Recall from |
+| the answer to a previous Quick Quiz that the consequences of applying |
+| a previously sampled quiescent state to the wrong grace period are |
+| quite severe. |
++-----------------------------------------------------------------------+
+
+The ``->cpu_no_qs`` flag indicates that the CPU has not yet passed
+through a quiescent state, while the ``->core_needs_qs`` flag indicates
+that the RCU core needs a quiescent state from the corresponding CPU.
+The ``->gpwrap`` field indicates that the corresponding CPU has remained
+idle for so long that the ``gp_seq`` counter is in danger of overflow,
+which will cause the CPU to disregard the values of its counters on its
+next exit from idle.
+
+RCU Callback Handling
+'''''''''''''''''''''
+
+In the absence of CPU-hotplug events, RCU callbacks are invoked by the
+same CPU that registered them. This is strictly a cache-locality
+optimization: callbacks can and do get invoked on CPUs other than the
+one that registered them. After all, if the CPU that registered a given
+callback has gone offline before the callback can be invoked, there
+really is no other choice.
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+ 1 struct rcu_segcblist cblist;
+ 2 long qlen_last_fqs_check;
+ 3 unsigned long n_cbs_invoked;
+ 4 unsigned long n_nocbs_invoked;
+ 5 unsigned long n_cbs_orphaned;
+ 6 unsigned long n_cbs_adopted;
+ 7 unsigned long n_force_qs_snap;
+ 8 long blimit;
+
+The ``->cblist`` structure is the segmented callback list described
+earlier. The CPU advances the callbacks in its ``rcu_data`` structure
+whenever it notices that another RCU grace period has completed. The CPU
+detects the completion of an RCU grace period by noticing that the value
+of its ``rcu_data`` structure's ``->gp_seq`` field differs from that of
+its leaf ``rcu_node`` structure. Recall that each ``rcu_node``
+structure's ``->gp_seq`` field is updated at the beginnings and ends of
+each grace period.
+
+The ``->qlen_last_fqs_check`` and ``->n_force_qs_snap`` coordinate the
+forcing of quiescent states from ``call_rcu()`` and friends when
+callback lists grow excessively long.
+
+The ``->n_cbs_invoked``, ``->n_cbs_orphaned``, and ``->n_cbs_adopted``
+fields count the number of callbacks invoked, sent to other CPUs when
+this CPU goes offline, and received from other CPUs when those other
+CPUs go offline. The ``->n_nocbs_invoked`` is used when the CPU's
+callbacks are offloaded to a kthread.
+
+Finally, the ``->blimit`` counter is the maximum number of RCU callbacks
+that may be invoked at a given time.
+
+Dyntick-Idle Handling
+'''''''''''''''''''''
+
+This portion of the ``rcu_data`` structure is declared as follows:
+
+::
+
+ 1 int watching_snap;
+ 2 unsigned long dynticks_fqs;
+
+The ``->watching_snap`` field is used to take a snapshot of the
+corresponding CPU's dyntick-idle state when forcing quiescent states,
+and is therefore accessed from other CPUs. Finally, the
+``->dynticks_fqs`` field is used to count the number of times this CPU
+is determined to be in dyntick-idle state, and is used for tracing and
+debugging purposes.
+
+This portion of the rcu_data structure is declared as follows:
+
+::
+
+ 1 long nesting;
+ 2 long nmi_nesting;
+ 3 atomic_t dynticks;
+ 4 bool rcu_need_heavy_qs;
+ 5 bool rcu_urgent_qs;
+
+These fields in the rcu_data structure maintain the per-CPU dyntick-idle
+state for the corresponding CPU. The fields may be accessed only from
+the corresponding CPU (and from tracing) unless otherwise stated.
+
+The ``->nesting`` field counts the nesting depth of process
+execution, so that in normal circumstances this counter has value zero
+or one. NMIs, irqs, and tracers are counted by the
+``->nmi_nesting`` field. Because NMIs cannot be masked, changes
+to this variable have to be undertaken carefully using an algorithm
+provided by Andy Lutomirski. The initial transition from idle adds one,
+and nested transitions add two, so that a nesting level of five is
+represented by a ``->nmi_nesting`` value of nine. This counter
+can therefore be thought of as counting the number of reasons why this
+CPU cannot be permitted to enter dyntick-idle mode, aside from
+process-level transitions.
+
+However, it turns out that when running in non-idle kernel context, the
+Linux kernel is fully capable of entering interrupt handlers that never
+exit and perhaps also vice versa. Therefore, whenever the
+``->nesting`` field is incremented up from zero, the
+``->nmi_nesting`` field is set to a large positive number, and
+whenever the ``->nesting`` field is decremented down to zero,
+the ``->nmi_nesting`` field is set to zero. Assuming that
+the number of misnested interrupts is not sufficient to overflow the
+counter, this approach corrects the ``->nmi_nesting`` field
+every time the corresponding CPU enters the idle loop from process
+context.
+
+The ``->dynticks`` field counts the corresponding CPU's transitions to
+and from either dyntick-idle or user mode, so that this counter has an
+even value when the CPU is in dyntick-idle mode or user mode and an odd
+value otherwise. The transitions to/from user mode need to be counted
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
+
+The ``->rcu_need_heavy_qs`` field is used to record the fact that the
+RCU core code would really like to see a quiescent state from the
+corresponding CPU, so much so that it is willing to call for
+heavy-weight dyntick-counter operations. This flag is checked by RCU's
+context-switch and ``cond_resched()`` code, which provide a momentary
+idle sojourn in response.
+
+Finally, the ``->rcu_urgent_qs`` field is used to record the fact that
+the RCU core code would really like to see a quiescent state from the
+corresponding CPU, with the various other fields indicating just how
+badly RCU wants this quiescent state. This flag is checked by RCU's
+context-switch path (``rcu_note_context_switch``) and the cond_resched
+code.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why not simply combine the ``->nesting`` and |
+| ``->nmi_nesting`` counters into a single counter that just |
+| counts the number of reasons that the corresponding CPU is non-idle? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because this would fail in the presence of interrupts whose handlers |
+| never return and of handlers that manage to return from a made-up |
+| interrupt. |
++-----------------------------------------------------------------------+
+
+Additional fields are present for some special-purpose builds, and are
+discussed separately.
+
+The ``rcu_head`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each ``rcu_head`` structure represents an RCU callback. These structures
+are normally embedded within RCU-protected data structures whose
+algorithms use asynchronous grace periods. In contrast, when using
+algorithms that block waiting for RCU grace periods, RCU users need not
+provide ``rcu_head`` structures.
+
+The ``rcu_head`` structure has fields as follows:
+
+::
+
+ 1 struct rcu_head *next;
+ 2 void (*func)(struct rcu_head *head);
+
+The ``->next`` field is used to link the ``rcu_head`` structures
+together in the lists within the ``rcu_data`` structures. The ``->func``
+field is a pointer to the function to be called when the callback is
+ready to be invoked, and this function is passed a pointer to the
+``rcu_head`` structure. However, ``kfree_rcu()`` uses the ``->func``
+field to record the offset of the ``rcu_head`` structure within the
+enclosing RCU-protected data structure.
+
+Both of these fields are used internally by RCU. From the viewpoint of
+RCU users, this structure is an opaque “cookie”.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Given that the callback function ``->func`` is passed a pointer to |
+| the ``rcu_head`` structure, how is that function supposed to find the |
+| beginning of the enclosing RCU-protected data structure? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| In actual practice, there is a separate callback function per type of |
+| RCU-protected data structure. The callback function can therefore use |
+| the ``container_of()`` macro in the Linux kernel (or other |
+| pointer-manipulation facilities in other software environments) to |
+| find the beginning of the enclosing structure. |
++-----------------------------------------------------------------------+
+
+RCU-Specific Fields in the ``task_struct`` Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``CONFIG_PREEMPT_RCU`` implementation uses some additional fields in
+the ``task_struct`` structure:
+
+::
+
+ 1 #ifdef CONFIG_PREEMPT_RCU
+ 2 int rcu_read_lock_nesting;
+ 3 union rcu_special rcu_read_unlock_special;
+ 4 struct list_head rcu_node_entry;
+ 5 struct rcu_node *rcu_blocked_node;
+ 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+ 7 #ifdef CONFIG_TASKS_RCU
+ 8 unsigned long rcu_tasks_nvcsw;
+ 9 bool rcu_tasks_holdout;
+ 10 struct list_head rcu_tasks_holdout_list;
+ 11 int rcu_tasks_idle_cpu;
+ 12 #endif /* #ifdef CONFIG_TASKS_RCU */
+
+The ``->rcu_read_lock_nesting`` field records the nesting level for RCU
+read-side critical sections, and the ``->rcu_read_unlock_special`` field
+is a bitmask that records special conditions that require
+``rcu_read_unlock()`` to do additional work. The ``->rcu_node_entry``
+field is used to form lists of tasks that have blocked within
+preemptible-RCU read-side critical sections and the
+``->rcu_blocked_node`` field references the ``rcu_node`` structure whose
+list this task is a member of, or ``NULL`` if it is not blocked within a
+preemptible-RCU read-side critical section.
+
+The ``->rcu_tasks_nvcsw`` field tracks the number of voluntary context
+switches that this task had undergone at the beginning of the current
+tasks-RCU grace period, ``->rcu_tasks_holdout`` is set if the current
+tasks-RCU grace period is waiting on this task,
+``->rcu_tasks_holdout_list`` is a list element enqueuing this task on
+the holdout list, and ``->rcu_tasks_idle_cpu`` tracks which CPU this
+idle task is running, but only if the task is currently running, that
+is, if the CPU is currently idle.
+
+Accessor Functions
+~~~~~~~~~~~~~~~~~~
+
+The following listing shows the ``rcu_get_root()``,
+``rcu_for_each_node_breadth_first`` and ``rcu_for_each_leaf_node()``
+function and macros:
+
+::
+
+ 1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+ 2 {
+ 3 return &rsp->node[0];
+ 4 }
+ 5
+ 6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+ 7 for ((rnp) = &(rsp)->node[0]; \
+ 8 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+ 9
+ 10 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 11 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
+ 12 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
+The ``rcu_get_root()`` simply returns a pointer to the first element of
+the specified ``rcu_state`` structure's ``->node[]`` array, which is the
+root ``rcu_node`` structure.
+
+As noted earlier, the ``rcu_for_each_node_breadth_first()`` macro takes
+advantage of the layout of the ``rcu_node`` structures in the
+``rcu_state`` structure's ``->node[]`` array, performing a breadth-first
+traversal by simply traversing the array in order. Similarly, the
+``rcu_for_each_leaf_node()`` macro traverses only the last part of the
+array, thus traversing only the leaf ``rcu_node`` structures.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| What does ``rcu_for_each_leaf_node()`` do if the ``rcu_node`` tree |
+| contains only a single node? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| In the single-node case, ``rcu_for_each_leaf_node()`` traverses the |
+| single node. |
++-----------------------------------------------------------------------+
+
+Summary
+~~~~~~~
+
+So the state of RCU is represented by an ``rcu_state`` structure, which
+contains a combining tree of ``rcu_node`` and ``rcu_data`` structures.
+Finally, in ``CONFIG_NO_HZ_IDLE`` kernels, each CPU's dyntick-idle state
+is tracked by dynticks-related fields in the ``rcu_data`` structure. If
+you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+Acknowledgments
+~~~~~~~~~~~~~~~
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn for
+helping me get this document into a more human-readable state.
+
+Legal Statement
+~~~~~~~~~~~~~~~
+
+This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+Linux is a registered trademark of Linus Torvalds.
+
+Other company, product, and service names may be trademarks or service
+marks of others.
diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
index 00e810bb8419..bed13e9ecab8 100644
--- a/Documentation/RCU/Design/Data-Structures/blkd_task.svg
+++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
@@ -14,12 +14,12 @@
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="10.1in"
- height="8.6in"
- viewBox="-44 -44 12088 10288"
+ height="6.5999999in"
+ viewBox="-44 -44 12088 7895.4414"
id="svg2"
version="1.1"
- inkscape:version="0.48.4 r9939"
- sodipodi:docname="blkd_task.fig">
+ inkscape:version="0.92.2pre0 (973e216, 2017-07-25)"
+ sodipodi:docname="blkd_task.svg">
<metadata
id="metadata212">
<rdf:RDF>
@@ -37,15 +37,16 @@
<marker
inkscape:stockid="Arrow1Mend"
orient="auto"
- refY="0.0"
- refX="0.0"
+ refY="0"
+ refX="0"
id="Arrow1Mend"
- style="overflow:visible;">
+ style="overflow:visible">
<path
id="path3970"
- d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
- style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
- transform="scale(0.4) rotate(180) translate(10,0)" />
+ d="M 0,0 5,-5 -12.5,0 5,5 Z"
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt"
+ transform="matrix(-0.4,0,0,-0.4,-4,0)"
+ inkscape:connector-curvature="0" />
</marker>
</defs>
<sodipodi:namedview
@@ -57,787 +58,574 @@
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
- inkscape:window-width="1087"
- inkscape:window-height="1144"
+ inkscape:window-width="1920"
+ inkscape:window-height="1019"
id="namedview208"
showgrid="false"
inkscape:zoom="1.0495049"
- inkscape:cx="454.50003"
- inkscape:cy="387.00003"
- inkscape:window-x="833"
- inkscape:window-y="28"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4" />
+ inkscape:cx="456.40569"
+ inkscape:cy="348.88682"
+ inkscape:window-x="0"
+ inkscape:window-y="0"
+ inkscape:window-maximized="1"
+ inkscape:current-layer="g4"
+ showguides="false" />
<g
- style="stroke-width:.025in; fill:none"
- id="g4">
+ style="fill:none;stroke-width:0.025in"
+ id="g4"
+ transform="translate(0,-2393.6637)">
<!-- Line: box -->
- <rect
- x="450"
- y="0"
- width="6300"
- height="7350"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
- id="rect6" />
<!-- Line: box -->
- <rect
- x="4950"
- y="4950"
- width="1500"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
- id="rect8" />
<!-- Line: box -->
- <rect
- x="750"
- y="600"
- width="5700"
- height="3750"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
- id="rect10" />
<!-- Line -->
- <polyline
- points="5250,8100 5688,5912 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline12" />
<!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
<polyline
points="5714 6068 5704 5822 5598 6044 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline14" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline14"
+ transform="translate(23.757862,2185.7233)" />
<!-- Line -->
- <polyline
- points="4050,9300 4486,7262 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline16" />
<!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
<polyline
points="4514 7418 4506 7172 4396 7394 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline18" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline18"
+ transform="translate(23.757862,2185.7233)" />
<!-- Line -->
- <polyline
- points="1040,9300 1476,7262 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline20" />
<!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
<polyline
points="1504 7418 1496 7172 1386 7394 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline22" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline22"
+ transform="translate(23.757862,2185.7233)" />
<!-- Line -->
- <polyline
- points="2240,8100 2676,6062 "
- style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
- id="polyline24" />
<!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
<polyline
points="2704 6218 2696 5972 2586 6194 "
- style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
- id="polyline26" />
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8"
+ id="polyline26"
+ transform="translate(23.757862,2185.7233)" />
<!-- Line: box -->
<rect
- x="0"
- y="450"
+ x="23.757858"
+ y="2635.7231"
width="6300"
height="7350"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ style="fill:#ffffff;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect28" />
<!-- Line: box -->
<rect
- x="300"
- y="1050"
+ x="323.75787"
+ y="3235.7231"
width="5700"
height="3750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ style="fill:#ffff00;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect30" />
<!-- Line -->
<polyline
points="1350,3450 2350,2590 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline32" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
<!-- Line -->
<polyline
points="4950,3450 3948,2590 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline36" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
<!-- Line -->
<polyline
points="4050,6600 4050,4414 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline40" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline40"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
<!-- Line -->
<polyline
points="1050,6600 1050,4414 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline44" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
<!-- Line -->
<polyline
points="2250,5400 2250,4414 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline48" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline48"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
<!-- Line -->
- <polyline
- points="2250,8100 2250,6364 "
- style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline52" />
<!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
<!-- Line -->
- <polyline
- points="1050,9300 1050,7564 "
- style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline56" />
<!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
<!-- Line -->
- <polyline
- points="4050,9300 4050,7564 "
- style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline60" />
<!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
<!-- Line -->
- <polyline
- points="5250,8100 5250,6364 "
- style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline64" />
<!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
<!-- Circle -->
<circle
- cx="2850"
- cy="3900"
+ cx="2873.7581"
+ cy="6085.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle68" />
<!-- Circle -->
<circle
- cx="3150"
- cy="3900"
+ cx="3173.7581"
+ cy="6085.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle70" />
<!-- Circle -->
<circle
- cx="3450"
- cy="3900"
+ cx="3473.7581"
+ cy="6085.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle72" />
<!-- Circle -->
<circle
- cx="1350"
- cy="5100"
+ cx="1373.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle74" />
<!-- Circle -->
<circle
- cx="1650"
- cy="5100"
+ cx="1673.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle76" />
<!-- Circle -->
<circle
- cx="1950"
- cy="5100"
+ cx="1973.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle78" />
<!-- Circle -->
<circle
- cx="4350"
- cy="5100"
+ cx="4373.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle80" />
<!-- Circle -->
<circle
- cx="4650"
- cy="5100"
+ cx="4673.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle82" />
<!-- Circle -->
<circle
- cx="4950"
- cy="5100"
+ cx="4973.7578"
+ cy="7285.7236"
r="76"
- style="fill:#000000;stroke:#000000;stroke-width:14;"
+ style="fill:#000000;stroke:#000000;stroke-width:14"
id="circle84" />
<!-- Line: box -->
<rect
- x="750"
- y="3450"
+ x="773.75781"
+ y="5635.7236"
width="1800"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect86" />
<!-- Line: box -->
<rect
- x="300"
- y="6600"
+ x="323.75787"
+ y="8785.7227"
width="1500"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect88" />
<!-- Line: box -->
<rect
- x="4500"
- y="5400"
+ x="4523.7578"
+ y="7585.7236"
width="1500"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect90" />
<!-- Line: box -->
<rect
- x="3300"
- y="6600"
+ x="3323.7581"
+ y="8785.7227"
width="1500"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect92" />
<!-- Line: box -->
<rect
- x="2250"
- y="1650"
+ x="2273.7581"
+ y="3835.7231"
width="1800"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect94" />
<!-- Line: box -->
- <rect
- x="0"
- y="9300"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect96" />
<!-- Line: box -->
- <rect
- x="1350"
- y="8100"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect98" />
<!-- Line: box -->
- <rect
- x="3000"
- y="9300"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect100" />
<!-- Line: box -->
- <rect
- x="4350"
- y="8100"
- width="2100"
- height="900"
- rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
- id="rect102" />
<!-- Line: box -->
<rect
- x="1500"
- y="5400"
+ x="1523.7578"
+ y="7585.7236"
width="1500"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ style="fill:#87cfff;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect104" />
<!-- Line -->
<polygon
- points="5550,3450 7350,2850 7350,5100 5550,4350 5550,3450 "
- style="stroke:#000000;stroke-width:14; stroke-linejoin:miter; stroke-linecap:butt; stroke-dasharray:120 120;fill:#ffbfbf; "
- id="polygon106" />
+ points="7350,2850 7350,5100 5550,4350 5550,3450 "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:14;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:120, 120"
+ id="polygon106"
+ transform="translate(23.757862,2185.7233)" />
<!-- Line -->
<polyline
points="9300,3150 10734,3150 "
- style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline108" />
+ style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline108"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 9300 3150 - 10860 3150-->
<!-- Line: box -->
<rect
- x="10800"
- y="2850"
+ x="10823.758"
+ y="5035.7236"
width="1200"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect112" />
<!-- Line -->
<polyline
points="11400,3600 11400,4284 "
- style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline114" />
+ style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline114"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 11400 3600 - 11400 4410-->
<!-- Line: box -->
<rect
- x="10800"
- y="4350"
+ x="10823.758"
+ y="6535.7236"
width="1200"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect118" />
<!-- Line -->
<polyline
points="11400,5100 11400,5784 "
- style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline120" />
+ style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline120"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 11400 5100 - 11400 5910-->
<!-- Line: box -->
<rect
- x="10800"
- y="5850"
+ x="10823.758"
+ y="8035.7236"
width="1200"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect124" />
<!-- Line -->
<polyline
points="9300,3900 9900,3900 9900,4650 10734,4650 "
- style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline126" />
+ style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline126"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 9900 4650 - 10860 4650-->
<!-- Line -->
<polyline
points="9300,4650 9600,4650 9600,6150 10734,6150 "
- style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline130" />
+ style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline130"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 9600 6150 - 10860 6150-->
<!-- Text -->
- <text
- xml:space="preserve"
- x="6450"
- y="300"
- fill="#000000"
- font-family="Helvetica"
- font-style="normal"
- font-weight="normal"
- font-size="192"
- text-anchor="end"
- id="text134">rcu_bh</text>
<!-- Text -->
<text
xml:space="preserve"
- x="3150"
- y="1950"
- fill="#000000"
- font-family="Courier"
+ x="3173.7581"
+ y="4135.7231"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text136">struct</text>
+ id="text136"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="3150"
- y="2250"
- fill="#000000"
- font-family="Courier"
+ x="3173.7581"
+ y="4435.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text138">rcu_node</text>
+ id="text138"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1650"
- y="3750"
- fill="#000000"
- font-family="Courier"
+ x="1673.7578"
+ y="5935.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text140">struct</text>
+ id="text140"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1650"
- y="4050"
- fill="#000000"
- font-family="Courier"
+ x="1673.7578"
+ y="6235.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text142">rcu_node</text>
+ id="text142"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="2250"
- y="5700"
- fill="#000000"
- font-family="Courier"
+ x="2273.7581"
+ y="7885.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text144">struct</text>
+ id="text144"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="2250"
- y="6000"
- fill="#000000"
- font-family="Courier"
+ x="2273.7581"
+ y="8185.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text146">rcu_data</text>
+ id="text146"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1050"
- y="6900"
- fill="#000000"
- font-family="Courier"
+ x="1073.7578"
+ y="9085.7227"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text148">struct</text>
+ id="text148"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="1050"
- y="7200"
- fill="#000000"
- font-family="Courier"
+ x="1073.7578"
+ y="9385.7227"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text150">rcu_data</text>
+ id="text150"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="5250"
- y="5700"
- fill="#000000"
- font-family="Courier"
+ x="5273.7578"
+ y="7885.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text152">struct</text>
+ id="text152"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="5250"
- y="6000"
- fill="#000000"
- font-family="Courier"
+ x="5273.7578"
+ y="8185.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text154">rcu_data</text>
+ id="text154"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4050"
- y="6900"
- fill="#000000"
- font-family="Courier"
+ x="4073.7578"
+ y="9085.7227"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text156">struct</text>
+ id="text156"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4050"
- y="7200"
- fill="#000000"
- font-family="Courier"
+ x="4073.7578"
+ y="9385.7227"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text158">rcu_data</text>
+ id="text158"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_data</text>
<!-- Text -->
<text
xml:space="preserve"
- x="450"
- y="1350"
- fill="#000000"
- font-family="Courier"
+ x="473.75784"
+ y="3535.7231"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="start"
- id="text160">struct rcu_state</text>
+ id="text160"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">struct rcu_state</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="9600"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text162">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="1050"
- y="9900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text164">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="9600"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text166">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="4050"
- y="9900"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text168">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8400"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text170">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="2400"
- y="8700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text172">rcu_dynticks</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8400"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text174">struct</text>
<!-- Text -->
- <text
- xml:space="preserve"
- x="5400"
- y="8700"
- fill="#000000"
- font-family="Courier"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- text-anchor="middle"
- id="text176">rcu_dynticks</text>
<!-- Text -->
<text
xml:space="preserve"
- x="6000"
- y="750"
- fill="#000000"
- font-family="Helvetica"
+ x="6023.7578"
+ y="2935.7231"
font-style="normal"
font-weight="normal"
font-size="192"
- text-anchor="end"
- id="text178">rcu_sched</text>
+ id="text178"
+ style="font-style:normal;font-weight:normal;font-size:192px;font-family:Helvetica;text-anchor:end;fill:#000000">rcu_state</text>
<!-- Text -->
<text
xml:space="preserve"
- x="11400"
- y="3300"
- fill="#000000"
- font-family="Helvetica"
+ x="11423.758"
+ y="5485.7236"
font-style="normal"
font-weight="normal"
font-size="216"
- text-anchor="middle"
- id="text180">T3</text>
+ id="text180"
+ style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T3</text>
<!-- Text -->
<text
xml:space="preserve"
- x="11400"
- y="4800"
- fill="#000000"
- font-family="Helvetica"
+ x="11423.758"
+ y="6985.7236"
font-style="normal"
font-weight="normal"
font-size="216"
- text-anchor="middle"
- id="text182">T2</text>
+ id="text182"
+ style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T2</text>
<!-- Text -->
<text
xml:space="preserve"
- x="11400"
- y="6300"
- fill="#000000"
- font-family="Helvetica"
+ x="11423.758"
+ y="8485.7227"
font-style="normal"
font-weight="normal"
font-size="216"
- text-anchor="middle"
- id="text184">T1</text>
+ id="text184"
+ style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T1</text>
<!-- Line -->
<polyline
points="5250,5400 5250,4414 "
- style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
- id="polyline186" />
+ style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline186"
+ transform="translate(23.757862,2185.7233)" />
<!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
<!-- Line: box -->
<rect
- x="3750"
- y="3450"
+ x="3773.7581"
+ y="5635.7236"
width="1800"
height="900"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect190" />
<!-- Line: box -->
<rect
- x="7350"
- y="2850"
+ x="7373.7578"
+ y="5035.7236"
width="1950"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect192" />
<!-- Line: box -->
<rect
- x="7350"
- y="3600"
+ x="7373.7578"
+ y="5785.7236"
width="1950"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect194" />
<!-- Line: box -->
<rect
- x="7350"
- y="4350"
+ x="7373.7578"
+ y="6535.7236"
width="1950"
height="750"
rx="0"
- style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ style="fill:#ffbfbf;stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter"
id="rect196" />
<!-- Text -->
<text
xml:space="preserve"
- x="4650"
- y="4050"
- fill="#000000"
- font-family="Courier"
+ x="4673.7578"
+ y="6235.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text198">rcu_node</text>
+ id="text198"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_node</text>
<!-- Text -->
<text
xml:space="preserve"
- x="4650"
- y="3750"
- fill="#000000"
- font-family="Courier"
+ x="4673.7578"
+ y="5935.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="middle"
- id="text200">struct</text>
+ id="text200"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct</text>
<!-- Text -->
<text
xml:space="preserve"
- x="7500"
- y="3300"
- fill="#000000"
- font-family="Courier"
+ x="7523.7578"
+ y="5485.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="start"
- id="text202">blkd_tasks</text>
+ id="text202"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">blkd_tasks</text>
<!-- Text -->
<text
xml:space="preserve"
- x="7500"
- y="4050"
- fill="#000000"
- font-family="Courier"
+ x="7523.7578"
+ y="6235.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="start"
- id="text204">gp_tasks</text>
+ id="text204"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">gp_tasks</text>
<!-- Text -->
<text
xml:space="preserve"
- x="7500"
- y="4800"
- fill="#000000"
- font-family="Courier"
+ x="7523.7578"
+ y="6985.7236"
font-style="normal"
font-weight="bold"
font-size="192"
- text-anchor="start"
- id="text206">exp_tasks</text>
+ id="text206"
+ style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">exp_tasks</text>
</g>
</svg>
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg
index e4233ac93c2b..6189ffcc6aff 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg
@@ -328,13 +328,13 @@
inkscape:window-height="1148"
id="namedview90"
showgrid="true"
- inkscape:zoom="0.80021373"
- inkscape:cx="462.49289"
- inkscape:cy="473.6718"
+ inkscape:zoom="0.69092787"
+ inkscape:cx="476.34085"
+ inkscape:cy="712.80957"
inkscape:window-x="770"
inkscape:window-y="24"
inkscape:window-maximized="0"
- inkscape:current-layer="g4114-9-3-9"
+ inkscape:current-layer="g4"
inkscape:snap-grids="false"
fit-margin-top="5"
fit-margin-right="5"
@@ -813,14 +813,18 @@
<text
sodipodi:linespacing="125%"
id="text4110-5-7-6-2-4-0"
- y="841.88086"
+ y="670.74316"
x="1460.1007"
style="font-size:267.24359131px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
xml:space="preserve"><tspan
- y="841.88086"
+ y="670.74316"
+ x="1460.1007"
+ sodipodi:role="line"
+ id="tspan4925-1-2-4-5">Request</tspan><tspan
+ y="1004.7976"
x="1460.1007"
sodipodi:role="line"
- id="tspan4925-1-2-4-5">reched_cpu()</tspan></text>
+ id="tspan3100">context switch</tspan></text>
</g>
</g>
</svg>
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
deleted file mode 100644
index e62c7c34a369..000000000000
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ /dev/null
@@ -1,666 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through TREE_RCU's Expedited Grace Periods</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-<h2>Introduction</h2>
-
-This document describes RCU's expedited grace periods.
-Unlike RCU's normal grace periods, which accept long latencies to attain
-high efficiency and minimal disturbance, expedited grace periods accept
-lower efficiency and significant disturbance to attain shorter latencies.
-
-<p>
-There are two flavors of RCU (RCU-preempt and RCU-sched), with an earlier
-third RCU-bh flavor having been implemented in terms of the other two.
-Each of the two implementations is covered in its own section.
-
-<ol>
-<li> <a href="#Expedited Grace Period Design">
- Expedited Grace Period Design</a>
-<li> <a href="#RCU-preempt Expedited Grace Periods">
- RCU-preempt Expedited Grace Periods</a>
-<li> <a href="#RCU-sched Expedited Grace Periods">
- RCU-sched Expedited Grace Periods</a>
-<li> <a href="#Expedited Grace Period and CPU Hotplug">
- Expedited Grace Period and CPU Hotplug</a>
-<li> <a href="#Expedited Grace Period Refinements">
- Expedited Grace Period Refinements</a>
-</ol>
-
-<h2><a name="Expedited Grace Period Design">
-Expedited Grace Period Design</a></h2>
-
-<p>
-The expedited RCU grace periods cannot be accused of being subtle,
-given that they for all intents and purposes hammer every CPU that
-has not yet provided a quiescent state for the current expedited
-grace period.
-The one saving grace is that the hammer has grown a bit smaller
-over time: The old call to <tt>try_stop_cpus()</tt> has been
-replaced with a set of calls to <tt>smp_call_function_single()</tt>,
-each of which results in an IPI to the target CPU.
-The corresponding handler function checks the CPU's state, motivating
-a faster quiescent state where possible, and triggering a report
-of that quiescent state.
-As always for RCU, once everything has spent some time in a quiescent
-state, the expedited grace period has completed.
-
-<p>
-The details of the <tt>smp_call_function_single()</tt> handler's
-operation depend on the RCU flavor, as described in the following
-sections.
-
-<h2><a name="RCU-preempt Expedited Grace Periods">
-RCU-preempt Expedited Grace Periods</a></h2>
-
-<p>
-The overall flow of the handling of a given CPU by an RCU-preempt
-expedited grace period is shown in the following diagram:
-
-<p><img src="ExpRCUFlow.svg" alt="ExpRCUFlow.svg" width="55%">
-
-<p>
-The solid arrows denote direct action, for example, a function call.
-The dotted arrows denote indirect action, for example, an IPI
-or a state that is reached after some time.
-
-<p>
-If a given CPU is offline or idle, <tt>synchronize_rcu_expedited()</tt>
-will ignore it because idle and offline CPUs are already residing
-in quiescent states.
-Otherwise, the expedited grace period will use
-<tt>smp_call_function_single()</tt> to send the CPU an IPI, which
-is handled by <tt>sync_rcu_exp_handler()</tt>.
-
-<p>
-However, because this is preemptible RCU, <tt>sync_rcu_exp_handler()</tt>
-can check to see if the CPU is currently running in an RCU read-side
-critical section.
-If not, the handler can immediately report a quiescent state.
-Otherwise, it sets flags so that the outermost <tt>rcu_read_unlock()</tt>
-invocation will provide the needed quiescent-state report.
-This flag-setting avoids the previous forced preemption of all
-CPUs that might have RCU read-side critical sections.
-In addition, this flag-setting is done so as to avoid increasing
-the overhead of the common-case fastpath through the scheduler.
-
-<p>
-Again because this is preemptible RCU, an RCU read-side critical section
-can be preempted.
-When that happens, RCU will enqueue the task, which will the continue to
-block the current expedited grace period until it resumes and finds its
-outermost <tt>rcu_read_unlock()</tt>.
-The CPU will report a quiescent state just after enqueuing the task because
-the CPU is no longer blocking the grace period.
-It is instead the preempted task doing the blocking.
-The list of blocked tasks is managed by <tt>rcu_preempt_ctxt_queue()</tt>,
-which is called from <tt>rcu_preempt_note_context_switch()</tt>, which
-in turn is called from <tt>rcu_note_context_switch()</tt>, which in
-turn is called from the scheduler.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why not just have the expedited grace period check the
- state of all the CPUs?
- After all, that would avoid all those real-time-unfriendly IPIs.
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because we want the RCU read-side critical sections to run fast,
- which means no memory barriers.
- Therefore, it is not possible to safely check the state from some
- other CPU.
- And even if it was possible to safely check the state, it would
- still be necessary to IPI the CPU to safely interact with the
- upcoming <tt>rcu_read_unlock()</tt> invocation, which means that
- the remote state testing would not help the worst-case
- latency that real-time applications care about.
-
- <p><font color="ffffff">One way to prevent your real-time
- application from getting hit with these IPIs is to
- build your kernel with <tt>CONFIG_NO_HZ_FULL=y</tt>.
- RCU would then perceive the CPU running your application
- as being idle, and it would be able to safely detect that
- state without needing to IPI the CPU.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-Please note that this is just the overall flow:
-Additional complications can arise due to races with CPUs going idle
-or offline, among other things.
-
-<h2><a name="RCU-sched Expedited Grace Periods">
-RCU-sched Expedited Grace Periods</a></h2>
-
-<p>
-The overall flow of the handling of a given CPU by an RCU-sched
-expedited grace period is shown in the following diagram:
-
-<p><img src="ExpSchedFlow.svg" alt="ExpSchedFlow.svg" width="55%">
-
-<p>
-As with RCU-preempt's <tt>synchronize_rcu_expedited()</tt>,
-<tt>synchronize_sched_expedited()</tt> ignores offline and
-idle CPUs, again because they are in remotely detectable
-quiescent states.
-However, the <tt>synchronize_rcu_expedited()</tt> handler
-is <tt>sync_sched_exp_handler()</tt>, and because the
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-leave no trace of their invocation, in general it is not possible to tell
-whether or not the current CPU is in an RCU read-side critical section.
-The best that <tt>sync_sched_exp_handler()</tt> can do is to check
-for idle, on the off-chance that the CPU went idle while the IPI
-was in flight.
-If the CPU is idle, then <tt>sync_sched_exp_handler()</tt> reports
-the quiescent state.
-
-<p>
-Otherwise, the handler invokes <tt>resched_cpu()</tt>, which forces
-a future context switch.
-At the time of the context switch, the CPU reports the quiescent state.
-Should the CPU go offline first, it will report the quiescent state
-at that time.
-
-<h2><a name="Expedited Grace Period and CPU Hotplug">
-Expedited Grace Period and CPU Hotplug</a></h2>
-
-<p>
-The expedited nature of expedited grace periods require a much tighter
-interaction with CPU hotplug operations than is required for normal
-grace periods.
-In addition, attempting to IPI offline CPUs will result in splats, but
-failing to IPI online CPUs can result in too-short grace periods.
-Neither option is acceptable in production kernels.
-
-<p>
-The interaction between expedited grace periods and CPU hotplug operations
-is carried out at several levels:
-
-<ol>
-<li> The number of CPUs that have ever been online is tracked
- by the <tt>rcu_state</tt> structure's <tt>-&gt;ncpus</tt>
- field.
- The <tt>rcu_state</tt> structure's <tt>-&gt;ncpus_snap</tt>
- field tracks the number of CPUs that have ever been online
- at the beginning of an RCU expedited grace period.
- Note that this number never decreases, at least in the absence
- of a time machine.
-<li> The identities of the CPUs that have ever been online is
- tracked by the <tt>rcu_node</tt> structure's
- <tt>-&gt;expmaskinitnext</tt> field.
- The <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
- field tracks the identities of the CPUs that were online
- at least once at the beginning of the most recent RCU
- expedited grace period.
- The <tt>rcu_state</tt> structure's <tt>-&gt;ncpus</tt> and
- <tt>-&gt;ncpus_snap</tt> fields are used to detect when
- new CPUs have come online for the first time, that is,
- when the <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinitnext</tt>
- field has changed since the beginning of the last RCU
- expedited grace period, which triggers an update of each
- <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
- field from its <tt>-&gt;expmaskinitnext</tt> field.
-<li> Each <tt>rcu_node</tt> structure's <tt>-&gt;expmaskinit</tt>
- field is used to initialize that structure's
- <tt>-&gt;expmask</tt> at the beginning of each RCU
- expedited grace period.
- This means that only those CPUs that have been online at least
- once will be considered for a given grace period.
-<li> Any CPU that goes offline will clear its bit in its leaf
- <tt>rcu_node</tt> structure's <tt>-&gt;qsmaskinitnext</tt>
- field, so any CPU with that bit clear can safely be ignored.
- However, it is possible for a CPU coming online or going offline
- to have this bit set for some time while <tt>cpu_online</tt>
- returns <tt>false</tt>.
-<li> For each non-idle CPU that RCU believes is currently online, the grace
- period invokes <tt>smp_call_function_single()</tt>.
- If this succeeds, the CPU was fully online.
- Failure indicates that the CPU is in the process of coming online
- or going offline, in which case it is necessary to wait for a
- short time period and try again.
- The purpose of this wait (or series of waits, as the case may be)
- is to permit a concurrent CPU-hotplug operation to complete.
-<li> In the case of RCU-sched, one of the last acts of an outgoing CPU
- is to invoke <tt>rcu_report_dead()</tt>, which
- reports a quiescent state for that CPU.
- However, this is likely paranoia-induced redundancy. <!-- @@@ -->
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why all the dancing around with multiple counters and masks
- tracking CPUs that were once online?
- Why not just have a single set of masks tracking the currently
- online CPUs and be done with it?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Maintaining single set of masks tracking the online CPUs <i>sounds</i>
- easier, at least until you try working out all the race conditions
- between grace-period initialization and CPU-hotplug operations.
- For example, suppose initialization is progressing down the
- tree while a CPU-offline operation is progressing up the tree.
- This situation can result in bits set at the top of the tree
- that have no counterparts at the bottom of the tree.
- Those bits will never be cleared, which will result in
- grace-period hangs.
- In short, that way lies madness, to say nothing of a great many
- bugs, hangs, and deadlocks.
-
- <p><font color="ffffff">
- In contrast, the current multi-mask multi-counter scheme ensures
- that grace-period initialization will always see consistent masks
- up and down the tree, which brings significant simplifications
- over the single-mask method.
-
- <p><font color="ffffff">
- This is an instance of
- <a href="http://www.cs.columbia.edu/~library/TR-repository/reports/reports-1992/cucs-039-92.ps.gz"><font color="ffffff">
- deferring work in order to avoid synchronization</a>.
- Lazily recording CPU-hotplug events at the beginning of the next
- grace period greatly simplifies maintenance of the CPU-tracking
- bitmasks in the <tt>rcu_node</tt> tree.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h2><a name="Expedited Grace Period Refinements">
-Expedited Grace Period Refinements</a></h2>
-
-<ol>
-<li> <a href="#Idle-CPU Checks">Idle-CPU checks</a>.
-<li> <a href="#Batching via Sequence Counter">
- Batching via sequence counter</a>.
-<li> <a href="#Funnel Locking and Wait/Wakeup">
- Funnel locking and wait/wakeup</a>.
-<li> <a href="#Use of Workqueues">Use of Workqueues</a>.
-<li> <a href="#Stall Warnings">Stall warnings</a>.
-<li> <a href="#Mid-Boot Operation">Mid-boot operation</a>.
-</ol>
-
-<h3><a name="Idle-CPU Checks">Idle-CPU Checks</a></h3>
-
-<p>
-Each expedited grace period checks for idle CPUs when initially forming
-the mask of CPUs to be IPIed and again just before IPIing a CPU
-(both checks are carried out by <tt>sync_rcu_exp_select_cpus()</tt>).
-If the CPU is idle at any time between those two times, the CPU will
-not be IPIed.
-Instead, the task pushing the grace period forward will include the
-idle CPUs in the mask passed to <tt>rcu_report_exp_cpu_mult()</tt>.
-
-<p>
-For RCU-sched, there is an additional check for idle in the IPI
-handler, <tt>sync_sched_exp_handler()</tt>.
-If the IPI has interrupted the idle loop, then
-<tt>sync_sched_exp_handler()</tt> invokes <tt>rcu_report_exp_rdp()</tt>
-to report the corresponding quiescent state.
-
-<p>
-For RCU-preempt, there is no specific check for idle in the
-IPI handler (<tt>sync_rcu_exp_handler()</tt>), but because
-RCU read-side critical sections are not permitted within the
-idle loop, if <tt>sync_rcu_exp_handler()</tt> sees that the CPU is within
-RCU read-side critical section, the CPU cannot possibly be idle.
-Otherwise, <tt>sync_rcu_exp_handler()</tt> invokes
-<tt>rcu_report_exp_rdp()</tt> to report the corresponding quiescent
-state, regardless of whether or not that quiescent state was due to
-the CPU being idle.
-
-<p>
-In summary, RCU expedited grace periods check for idle when building
-the bitmask of CPUs that must be IPIed, just before sending each IPI,
-and (either explicitly or implicitly) within the IPI handler.
-
-<h3><a name="Batching via Sequence Counter">
-Batching via Sequence Counter</a></h3>
-
-<p>
-If each grace-period request was carried out separately, expedited
-grace periods would have abysmal scalability and
-problematic high-load characteristics.
-Because each grace-period operation can serve an unlimited number of
-updates, it is important to <i>batch</i> requests, so that a single
-expedited grace-period operation will cover all requests in the
-corresponding batch.
-
-<p>
-This batching is controlled by a sequence counter named
-<tt>-&gt;expedited_sequence</tt> in the <tt>rcu_state</tt> structure.
-This counter has an odd value when there is an expedited grace period
-in progress and an even value otherwise, so that dividing the counter
-value by two gives the number of completed grace periods.
-During any given update request, the counter must transition from
-even to odd and then back to even, thus indicating that a grace
-period has elapsed.
-Therefore, if the initial value of the counter is <tt>s</tt>,
-the updater must wait until the counter reaches at least the
-value <tt>(s+3)&amp;~0x1</tt>.
-This counter is managed by the following access functions:
-
-<ol>
-<li> <tt>rcu_exp_gp_seq_start()</tt>, which marks the start of
- an expedited grace period.
-<li> <tt>rcu_exp_gp_seq_end()</tt>, which marks the end of an
- expedited grace period.
-<li> <tt>rcu_exp_gp_seq_snap()</tt>, which obtains a snapshot of
- the counter.
-<li> <tt>rcu_exp_gp_seq_done()</tt>, which returns <tt>true</tt>
- if a full expedited grace period has elapsed since the
- corresponding call to <tt>rcu_exp_gp_seq_snap()</tt>.
-</ol>
-
-<p>
-Again, only one request in a given batch need actually carry out
-a grace-period operation, which means there must be an efficient
-way to identify which of many concurrent reqeusts will initiate
-the grace period, and that there be an efficient way for the
-remaining requests to wait for that grace period to complete.
-However, that is the topic of the next section.
-
-<h3><a name="Funnel Locking and Wait/Wakeup">
-Funnel Locking and Wait/Wakeup</a></h3>
-
-<p>
-The natural way to sort out which of a batch of updaters will initiate
-the expedited grace period is to use the <tt>rcu_node</tt> combining
-tree, as implemented by the <tt>exp_funnel_lock()</tt> function.
-The first updater corresponding to a given grace period arriving
-at a given <tt>rcu_node</tt> structure records its desired grace-period
-sequence number in the <tt>-&gt;exp_seq_rq</tt> field and moves up
-to the next level in the tree.
-Otherwise, if the <tt>-&gt;exp_seq_rq</tt> field already contains
-the sequence number for the desired grace period or some later one,
-the updater blocks on one of four wait queues in the
-<tt>-&gt;exp_wq[]</tt> array, using the second-from-bottom
-and third-from bottom bits as an index.
-An <tt>-&gt;exp_lock</tt> field in the <tt>rcu_node</tt> structure
-synchronizes access to these fields.
-
-<p>
-An empty <tt>rcu_node</tt> tree is shown in the following diagram,
-with the white cells representing the <tt>-&gt;exp_seq_rq</tt> field
-and the red cells representing the elements of the
-<tt>-&gt;exp_wq[]</tt> array.
-
-<p><img src="Funnel0.svg" alt="Funnel0.svg" width="75%">
-
-<p>
-The next diagram shows the situation after the arrival of Task&nbsp;A
-and Task&nbsp;B at the leftmost and rightmost leaf <tt>rcu_node</tt>
-structures, respectively.
-The current value of the <tt>rcu_state</tt> structure's
-<tt>-&gt;expedited_sequence</tt> field is zero, so adding three and
-clearing the bottom bit results in the value two, which both tasks
-record in the <tt>-&gt;exp_seq_rq</tt> field of their respective
-<tt>rcu_node</tt> structures:
-
-<p><img src="Funnel1.svg" alt="Funnel1.svg" width="75%">
-
-<p>
-Each of Tasks&nbsp;A and&nbsp;B will move up to the root
-<tt>rcu_node</tt> structure.
-Suppose that Task&nbsp;A wins, recording its desired grace-period sequence
-number and resulting in the state shown below:
-
-<p><img src="Funnel2.svg" alt="Funnel2.svg" width="75%">
-
-<p>
-Task&nbsp;A now advances to initiate a new grace period, while Task&nbsp;B
-moves up to the root <tt>rcu_node</tt> structure, and, seeing that
-its desired sequence number is already recorded, blocks on
-<tt>-&gt;exp_wq[1]</tt>.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why <tt>-&gt;exp_wq[1]</tt>?
- Given that the value of these tasks' desired sequence number is
- two, so shouldn't they instead block on <tt>-&gt;exp_wq[2]</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- No.
-
- <p><font color="ffffff">
- Recall that the bottom bit of the desired sequence number indicates
- whether or not a grace period is currently in progress.
- It is therefore necessary to shift the sequence number right one
- bit position to obtain the number of the grace period.
- This results in <tt>-&gt;exp_wq[1]</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-If Tasks&nbsp;C and&nbsp;D also arrive at this point, they will compute the
-same desired grace-period sequence number, and see that both leaf
-<tt>rcu_node</tt> structures already have that value recorded.
-They will therefore block on their respective <tt>rcu_node</tt>
-structures' <tt>-&gt;exp_wq[1]</tt> fields, as shown below:
-
-<p><img src="Funnel3.svg" alt="Funnel3.svg" width="75%">
-
-<p>
-Task&nbsp;A now acquires the <tt>rcu_state</tt> structure's
-<tt>-&gt;exp_mutex</tt> and initiates the grace period, which
-increments <tt>-&gt;expedited_sequence</tt>.
-Therefore, if Tasks&nbsp;E and&nbsp;F arrive, they will compute
-a desired sequence number of 4 and will record this value as
-shown below:
-
-<p><img src="Funnel4.svg" alt="Funnel4.svg" width="75%">
-
-<p>
-Tasks&nbsp;E and&nbsp;F will propagate up the <tt>rcu_node</tt>
-combining tree, with Task&nbsp;F blocking on the root <tt>rcu_node</tt>
-structure and Task&nbsp;E wait for Task&nbsp;A to finish so that
-it can start the next grace period.
-The resulting state is as shown below:
-
-<p><img src="Funnel5.svg" alt="Funnel5.svg" width="75%">
-
-<p>
-Once the grace period completes, Task&nbsp;A
-starts waking up the tasks waiting for this grace period to complete,
-increments the <tt>-&gt;expedited_sequence</tt>,
-acquires the <tt>-&gt;exp_wake_mutex</tt> and then releases the
-<tt>-&gt;exp_mutex</tt>.
-This results in the following state:
-
-<p><img src="Funnel6.svg" alt="Funnel6.svg" width="75%">
-
-<p>
-Task&nbsp;E can then acquire <tt>-&gt;exp_mutex</tt> and increment
-<tt>-&gt;expedited_sequence</tt> to the value three.
-If new tasks&nbsp;G and&nbsp;H arrive and moves up the combining tree at the
-same time, the state will be as follows:
-
-<p><img src="Funnel7.svg" alt="Funnel7.svg" width="75%">
-
-<p>
-Note that three of the root <tt>rcu_node</tt> structure's
-waitqueues are now occupied.
-However, at some point, Task&nbsp;A will wake up the
-tasks blocked on the <tt>-&gt;exp_wq</tt> waitqueues, resulting
-in the following state:
-
-<p><img src="Funnel8.svg" alt="Funnel8.svg" width="75%">
-
-<p>
-Execution will continue with Tasks&nbsp;E and&nbsp;H completing
-their grace periods and carrying out their wakeups.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- What happens if Task&nbsp;A takes so long to do its wakeups
- that Task&nbsp;E's grace period completes?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Then Task&nbsp;E will block on the <tt>-&gt;exp_wake_mutex</tt>,
- which will also prevent it from releasing <tt>-&gt;exp_mutex</tt>,
- which in turn will prevent the next grace period from starting.
- This last is important in preventing overflow of the
- <tt>-&gt;exp_wq[]</tt> array.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Use of Workqueues">Use of Workqueues</a></h3>
-
-<p>
-In earlier implementations, the task requesting the expedited
-grace period also drove it to completion.
-This straightforward approach had the disadvantage of needing to
-account for POSIX signals sent to user tasks,
-so more recent implemementations use the Linux kernel's
-<a href="https://www.kernel.org/doc/Documentation/core-api/workqueue.rst">workqueues</a>.
-
-<p>
-The requesting task still does counter snapshotting and funnel-lock
-processing, but the task reaching the top of the funnel lock
-does a <tt>schedule_work()</tt> (from <tt>_synchronize_rcu_expedited()</tt>
-so that a workqueue kthread does the actual grace-period processing.
-Because workqueue kthreads do not accept POSIX signals, grace-period-wait
-processing need not allow for POSIX signals.
-
-In addition, this approach allows wakeups for the previous expedited
-grace period to be overlapped with processing for the next expedited
-grace period.
-Because there are only four sets of waitqueues, it is necessary to
-ensure that the previous grace period's wakeups complete before the
-next grace period's wakeups start.
-This is handled by having the <tt>-&gt;exp_mutex</tt>
-guard expedited grace-period processing and the
-<tt>-&gt;exp_wake_mutex</tt> guard wakeups.
-The key point is that the <tt>-&gt;exp_mutex</tt> is not released
-until the first wakeup is complete, which means that the
-<tt>-&gt;exp_wake_mutex</tt> has already been acquired at that point.
-This approach ensures that the previous grace period's wakeups can
-be carried out while the current grace period is in process, but
-that these wakeups will complete before the next grace period starts.
-This means that only three waitqueues are required, guaranteeing that
-the four that are provided are sufficient.
-
-<h3><a name="Stall Warnings">Stall Warnings</a></h3>
-
-<p>
-Expediting grace periods does nothing to speed things up when RCU
-readers take too long, and therefore expedited grace periods check
-for stalls just as normal grace periods do.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But why not just let the normal grace-period machinery
- detect the stalls, given that a given reader must block
- both normal and expedited grace periods?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because it is quite possible that at a given time there
- is no normal grace period in progress, in which case the
- normal grace period cannot emit a stall warning.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-The <tt>synchronize_sched_expedited_wait()</tt> function loops waiting
-for the expedited grace period to end, but with a timeout set to the
-current RCU CPU stall-warning time.
-If this time is exceeded, any CPUs or <tt>rcu_node</tt> structures
-blocking the current grace period are printed.
-Each stall warning results in another pass through the loop, but the
-second and subsequent passes use longer stall times.
-
-<h3><a name="Mid-Boot Operation">Mid-boot operation</a></h3>
-
-<p>
-The use of workqueues has the advantage that the expedited
-grace-period code need not worry about POSIX signals.
-Unfortunately, it has the
-corresponding disadvantage that workqueues cannot be used until
-they are initialized, which does not happen until some time after
-the scheduler spawns the first task.
-Given that there are parts of the kernel that really do want to
-execute grace periods during this mid-boot &ldquo;dead zone&rdquo;,
-expedited grace periods must do something else during thie time.
-
-<p>
-What they do is to fall back to the old practice of requiring that the
-requesting task drive the expedited grace period, as was the case
-before the use of workqueues.
-However, the requesting task is only required to drive the grace period
-during the mid-boot dead zone.
-Before mid-boot, a synchronous grace period is a no-op.
-Some time after mid-boot, workqueues are used.
-
-<p>
-Non-expedited non-SRCU synchronous grace periods must also operate
-normally during mid-boot.
-This is handled by causing non-expedited grace periods to take the
-expedited code path during mid-boot.
-
-<p>
-The current code assumes that there are no POSIX signals during
-the mid-boot dead zone.
-However, if an overwhelming need for POSIX signals somehow arises,
-appropriate adjustments can be made to the expedited stall-warning code.
-One such adjustment would reinstate the pre-workqueue stall-warning
-checks, but only during the mid-boot dead zone.
-
-<p>
-With this refinement, synchronous grace periods can now be used from
-task context pretty much any time during the life of the kernel.
-
-<h3><a name="Summary">
-Summary</a></h3>
-
-<p>
-Expedited grace periods use a sequence-number approach to promote
-batching, so that a single grace-period operation can serve numerous
-requests.
-A funnel lock is used to efficiently identify the one task out of
-a concurrent group that will request the grace period.
-All members of the group will block on waitqueues provided in
-the <tt>rcu_node</tt> structure.
-The actual grace-period processing is carried out by a workqueue.
-
-<p>
-CPU-hotplug operations are noted lazily in order to prevent the need
-for tight synchronization between expedited grace periods and
-CPU-hotplug operations.
-The dyntick-idle counters are used to avoid sending IPIs to idle CPUs,
-at least in the common case.
-RCU-preempt and RCU-sched use different IPI handlers and different
-code to respond to the state changes carried out by those handlers,
-but otherwise use common code.
-
-<p>
-Quiescent states are tracked using the <tt>rcu_node</tt> tree,
-and once all necessary quiescent states have been reported,
-all tasks waiting on this expedited grace period are awakened.
-A pair of mutexes are used to allow one grace period's wakeups
-to proceed concurrently with the next grace period's processing.
-
-<p>
-This combination of mechanisms allows expedited grace periods to
-run reasonably efficiently.
-However, for non-time-critical tasks, normal grace periods should be
-used instead because their longer duration permits much higher
-degrees of batching, and thus much lower per-request overheads.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
new file mode 100644
index 000000000000..414f8a2012d6
--- /dev/null
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -0,0 +1,521 @@
+=================================================
+A Tour Through TREE_RCU's Expedited Grace Periods
+=================================================
+
+Introduction
+============
+
+This document describes RCU's expedited grace periods.
+Unlike RCU's normal grace periods, which accept long latencies to attain
+high efficiency and minimal disturbance, expedited grace periods accept
+lower efficiency and significant disturbance to attain shorter latencies.
+
+There are two flavors of RCU (RCU-preempt and RCU-sched), with an earlier
+third RCU-bh flavor having been implemented in terms of the other two.
+Each of the two implementations is covered in its own section.
+
+Expedited Grace Period Design
+=============================
+
+The expedited RCU grace periods cannot be accused of being subtle,
+given that they for all intents and purposes hammer every CPU that
+has not yet provided a quiescent state for the current expedited
+grace period.
+The one saving grace is that the hammer has grown a bit smaller
+over time: The old call to ``try_stop_cpus()`` has been
+replaced with a set of calls to ``smp_call_function_single()``,
+each of which results in an IPI to the target CPU.
+The corresponding handler function checks the CPU's state, motivating
+a faster quiescent state where possible, and triggering a report
+of that quiescent state.
+As always for RCU, once everything has spent some time in a quiescent
+state, the expedited grace period has completed.
+
+The details of the ``smp_call_function_single()`` handler's
+operation depend on the RCU flavor, as described in the following
+sections.
+
+RCU-preempt Expedited Grace Periods
+===================================
+
+``CONFIG_PREEMPTION=y`` kernels implement RCU-preempt.
+The overall flow of the handling of a given CPU by an RCU-preempt
+expedited grace period is shown in the following diagram:
+
+.. kernel-figure:: ExpRCUFlow.svg
+
+The solid arrows denote direct action, for example, a function call.
+The dotted arrows denote indirect action, for example, an IPI
+or a state that is reached after some time.
+
+If a given CPU is offline or idle, ``synchronize_rcu_expedited()``
+will ignore it because idle and offline CPUs are already residing
+in quiescent states.
+Otherwise, the expedited grace period will use
+``smp_call_function_single()`` to send the CPU an IPI, which
+is handled by ``rcu_exp_handler()``.
+
+However, because this is preemptible RCU, ``rcu_exp_handler()``
+can check to see if the CPU is currently running in an RCU read-side
+critical section.
+If not, the handler can immediately report a quiescent state.
+Otherwise, it sets flags so that the outermost ``rcu_read_unlock()``
+invocation will provide the needed quiescent-state report.
+This flag-setting avoids the previous forced preemption of all
+CPUs that might have RCU read-side critical sections.
+In addition, this flag-setting is done so as to avoid increasing
+the overhead of the common-case fastpath through the scheduler.
+
+Again because this is preemptible RCU, an RCU read-side critical section
+can be preempted.
+When that happens, RCU will enqueue the task, which will the continue to
+block the current expedited grace period until it resumes and finds its
+outermost ``rcu_read_unlock()``.
+The CPU will report a quiescent state just after enqueuing the task because
+the CPU is no longer blocking the grace period.
+It is instead the preempted task doing the blocking.
+The list of blocked tasks is managed by ``rcu_preempt_ctxt_queue()``,
+which is called from ``rcu_preempt_note_context_switch()``, which
+in turn is called from ``rcu_note_context_switch()``, which in
+turn is called from the scheduler.
+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why not just have the expedited grace period check the state of all |
+| the CPUs? After all, that would avoid all those real-time-unfriendly |
+| IPIs. |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because we want the RCU read-side critical sections to run fast, |
+| which means no memory barriers. Therefore, it is not possible to |
+| safely check the state from some other CPU. And even if it was |
+| possible to safely check the state, it would still be necessary to |
+| IPI the CPU to safely interact with the upcoming |
+| ``rcu_read_unlock()`` invocation, which means that the remote state |
+| testing would not help the worst-case latency that real-time |
+| applications care about. |
+| |
+| One way to prevent your real-time application from getting hit with |
+| these IPIs is to build your kernel with ``CONFIG_NO_HZ_FULL=y``. RCU |
+| would then perceive the CPU running your application as being idle, |
+| and it would be able to safely detect that state without needing to |
+| IPI the CPU. |
++-----------------------------------------------------------------------+
+
+Please note that this is just the overall flow: Additional complications
+can arise due to races with CPUs going idle or offline, among other
+things.
+
+RCU-sched Expedited Grace Periods
+---------------------------------
+
+``CONFIG_PREEMPTION=n`` kernels implement RCU-sched. The overall flow of
+the handling of a given CPU by an RCU-sched expedited grace period is
+shown in the following diagram:
+
+.. kernel-figure:: ExpSchedFlow.svg
+
+As with RCU-preempt, RCU-sched's ``synchronize_rcu_expedited()`` ignores
+offline and idle CPUs, again because they are in remotely detectable
+quiescent states. However, because the ``rcu_read_lock_sched()`` and
+``rcu_read_unlock_sched()`` leave no trace of their invocation, in
+general it is not possible to tell whether or not the current CPU is in
+an RCU read-side critical section. The best that RCU-sched's
+``rcu_exp_handler()`` can do is to check for idle, on the off-chance
+that the CPU went idle while the IPI was in flight. If the CPU is idle,
+then ``rcu_exp_handler()`` reports the quiescent state.
+
+Otherwise, the handler forces a future context switch by setting the
+NEED_RESCHED flag of the current task's thread flag and the CPU preempt
+counter. At the time of the context switch, the CPU reports the
+quiescent state. Should the CPU go offline first, it will report the
+quiescent state at that time.
+
+Expedited Grace Period and CPU Hotplug
+--------------------------------------
+
+The expedited nature of expedited grace periods require a much tighter
+interaction with CPU hotplug operations than is required for normal
+grace periods. In addition, attempting to IPI offline CPUs will result
+in splats, but failing to IPI online CPUs can result in too-short grace
+periods. Neither option is acceptable in production kernels.
+
+The interaction between expedited grace periods and CPU hotplug
+operations is carried out at several levels:
+
+#. The number of CPUs that have ever been online is tracked by the
+ ``rcu_state`` structure's ``->ncpus`` field. The ``rcu_state``
+ structure's ``->ncpus_snap`` field tracks the number of CPUs that
+ have ever been online at the beginning of an RCU expedited grace
+ period. Note that this number never decreases, at least in the
+ absence of a time machine.
+#. The identities of the CPUs that have ever been online is tracked by
+ the ``rcu_node`` structure's ``->expmaskinitnext`` field. The
+ ``rcu_node`` structure's ``->expmaskinit`` field tracks the
+ identities of the CPUs that were online at least once at the
+ beginning of the most recent RCU expedited grace period. The
+ ``rcu_state`` structure's ``->ncpus`` and ``->ncpus_snap`` fields are
+ used to detect when new CPUs have come online for the first time,
+ that is, when the ``rcu_node`` structure's ``->expmaskinitnext``
+ field has changed since the beginning of the last RCU expedited grace
+ period, which triggers an update of each ``rcu_node`` structure's
+ ``->expmaskinit`` field from its ``->expmaskinitnext`` field.
+#. Each ``rcu_node`` structure's ``->expmaskinit`` field is used to
+ initialize that structure's ``->expmask`` at the beginning of each
+ RCU expedited grace period. This means that only those CPUs that have
+ been online at least once will be considered for a given grace
+ period.
+#. Any CPU that goes offline will clear its bit in its leaf ``rcu_node``
+ structure's ``->qsmaskinitnext`` field, so any CPU with that bit
+ clear can safely be ignored. However, it is possible for a CPU coming
+ online or going offline to have this bit set for some time while
+ ``cpu_online`` returns ``false``.
+#. For each non-idle CPU that RCU believes is currently online, the
+ grace period invokes ``smp_call_function_single()``. If this
+ succeeds, the CPU was fully online. Failure indicates that the CPU is
+ in the process of coming online or going offline, in which case it is
+ necessary to wait for a short time period and try again. The purpose
+ of this wait (or series of waits, as the case may be) is to permit a
+ concurrent CPU-hotplug operation to complete.
+#. In the case of RCU-sched, one of the last acts of an outgoing CPU is
+ to invoke ``rcutree_report_cpu_dead()``, which reports a quiescent state for
+ that CPU. However, this is likely paranoia-induced redundancy.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why all the dancing around with multiple counters and masks tracking |
+| CPUs that were once online? Why not just have a single set of masks |
+| tracking the currently online CPUs and be done with it? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Maintaining single set of masks tracking the online CPUs *sounds* |
+| easier, at least until you try working out all the race conditions |
+| between grace-period initialization and CPU-hotplug operations. For |
+| example, suppose initialization is progressing down the tree while a |
+| CPU-offline operation is progressing up the tree. This situation can |
+| result in bits set at the top of the tree that have no counterparts |
+| at the bottom of the tree. Those bits will never be cleared, which |
+| will result in grace-period hangs. In short, that way lies madness, |
+| to say nothing of a great many bugs, hangs, and deadlocks. |
+| In contrast, the current multi-mask multi-counter scheme ensures that |
+| grace-period initialization will always see consistent masks up and |
+| down the tree, which brings significant simplifications over the |
+| single-mask method. |
+| |
+| This is an instance of `deferring work in order to avoid |
+| synchronization <http://www.cs.columbia.edu/~library/TR-repository/re |
+| ports/reports-1992/cucs-039-92.ps.gz>`__. |
+| Lazily recording CPU-hotplug events at the beginning of the next |
+| grace period greatly simplifies maintenance of the CPU-tracking |
+| bitmasks in the ``rcu_node`` tree. |
++-----------------------------------------------------------------------+
+
+Expedited Grace Period Refinements
+----------------------------------
+
+Idle-CPU Checks
+~~~~~~~~~~~~~~~
+
+Each expedited grace period checks for idle CPUs when initially forming
+the mask of CPUs to be IPIed and again just before IPIing a CPU (both
+checks are carried out by ``sync_rcu_exp_select_cpus()``). If the CPU is
+idle at any time between those two times, the CPU will not be IPIed.
+Instead, the task pushing the grace period forward will include the idle
+CPUs in the mask passed to ``rcu_report_exp_cpu_mult()``.
+
+For RCU-sched, there is an additional check: If the IPI has interrupted
+the idle loop, then ``rcu_exp_handler()`` invokes
+``rcu_report_exp_rdp()`` to report the corresponding quiescent state.
+
+For RCU-preempt, there is no specific check for idle in the IPI handler
+(``rcu_exp_handler()``), but because RCU read-side critical sections are
+not permitted within the idle loop, if ``rcu_exp_handler()`` sees that
+the CPU is within RCU read-side critical section, the CPU cannot
+possibly be idle. Otherwise, ``rcu_exp_handler()`` invokes
+``rcu_report_exp_rdp()`` to report the corresponding quiescent state,
+regardless of whether or not that quiescent state was due to the CPU
+being idle.
+
+In summary, RCU expedited grace periods check for idle when building the
+bitmask of CPUs that must be IPIed, just before sending each IPI, and
+(either explicitly or implicitly) within the IPI handler.
+
+Batching via Sequence Counter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If each grace-period request was carried out separately, expedited grace
+periods would have abysmal scalability and problematic high-load
+characteristics. Because each grace-period operation can serve an
+unlimited number of updates, it is important to *batch* requests, so
+that a single expedited grace-period operation will cover all requests
+in the corresponding batch.
+
+This batching is controlled by a sequence counter named
+``->expedited_sequence`` in the ``rcu_state`` structure. This counter
+has an odd value when there is an expedited grace period in progress and
+an even value otherwise, so that dividing the counter value by two gives
+the number of completed grace periods. During any given update request,
+the counter must transition from even to odd and then back to even, thus
+indicating that a grace period has elapsed. Therefore, if the initial
+value of the counter is ``s``, the updater must wait until the counter
+reaches at least the value ``(s+3)&~0x1``. This counter is managed by
+the following access functions:
+
+#. ``rcu_exp_gp_seq_start()``, which marks the start of an expedited
+ grace period.
+#. ``rcu_exp_gp_seq_end()``, which marks the end of an expedited grace
+ period.
+#. ``rcu_exp_gp_seq_snap()``, which obtains a snapshot of the counter.
+#. ``rcu_exp_gp_seq_done()``, which returns ``true`` if a full expedited
+ grace period has elapsed since the corresponding call to
+ ``rcu_exp_gp_seq_snap()``.
+
+Again, only one request in a given batch need actually carry out a
+grace-period operation, which means there must be an efficient way to
+identify which of many concurrent requests will initiate the grace
+period, and that there be an efficient way for the remaining requests to
+wait for that grace period to complete. However, that is the topic of
+the next section.
+
+Funnel Locking and Wait/Wakeup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The natural way to sort out which of a batch of updaters will initiate
+the expedited grace period is to use the ``rcu_node`` combining tree, as
+implemented by the ``exp_funnel_lock()`` function. The first updater
+corresponding to a given grace period arriving at a given ``rcu_node``
+structure records its desired grace-period sequence number in the
+``->exp_seq_rq`` field and moves up to the next level in the tree.
+Otherwise, if the ``->exp_seq_rq`` field already contains the sequence
+number for the desired grace period or some later one, the updater
+blocks on one of four wait queues in the ``->exp_wq[]`` array, using the
+second-from-bottom and third-from bottom bits as an index. An
+``->exp_lock`` field in the ``rcu_node`` structure synchronizes access
+to these fields.
+
+An empty ``rcu_node`` tree is shown in the following diagram, with the
+white cells representing the ``->exp_seq_rq`` field and the red cells
+representing the elements of the ``->exp_wq[]`` array.
+
+.. kernel-figure:: Funnel0.svg
+
+The next diagram shows the situation after the arrival of Task A and
+Task B at the leftmost and rightmost leaf ``rcu_node`` structures,
+respectively. The current value of the ``rcu_state`` structure's
+``->expedited_sequence`` field is zero, so adding three and clearing the
+bottom bit results in the value two, which both tasks record in the
+``->exp_seq_rq`` field of their respective ``rcu_node`` structures:
+
+.. kernel-figure:: Funnel1.svg
+
+Each of Tasks A and B will move up to the root ``rcu_node`` structure.
+Suppose that Task A wins, recording its desired grace-period sequence
+number and resulting in the state shown below:
+
+.. kernel-figure:: Funnel2.svg
+
+Task A now advances to initiate a new grace period, while Task B moves
+up to the root ``rcu_node`` structure, and, seeing that its desired
+sequence number is already recorded, blocks on ``->exp_wq[1]``.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why ``->exp_wq[1]``? Given that the value of these tasks' desired |
+| sequence number is two, so shouldn't they instead block on |
+| ``->exp_wq[2]``? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| No. |
+| Recall that the bottom bit of the desired sequence number indicates |
+| whether or not a grace period is currently in progress. It is |
+| therefore necessary to shift the sequence number right one bit |
+| position to obtain the number of the grace period. This results in |
+| ``->exp_wq[1]``. |
++-----------------------------------------------------------------------+
+
+If Tasks C and D also arrive at this point, they will compute the same
+desired grace-period sequence number, and see that both leaf
+``rcu_node`` structures already have that value recorded. They will
+therefore block on their respective ``rcu_node`` structures'
+``->exp_wq[1]`` fields, as shown below:
+
+.. kernel-figure:: Funnel3.svg
+
+Task A now acquires the ``rcu_state`` structure's ``->exp_mutex`` and
+initiates the grace period, which increments ``->expedited_sequence``.
+Therefore, if Tasks E and F arrive, they will compute a desired sequence
+number of 4 and will record this value as shown below:
+
+.. kernel-figure:: Funnel4.svg
+
+Tasks E and F will propagate up the ``rcu_node`` combining tree, with
+Task F blocking on the root ``rcu_node`` structure and Task E wait for
+Task A to finish so that it can start the next grace period. The
+resulting state is as shown below:
+
+.. kernel-figure:: Funnel5.svg
+
+Once the grace period completes, Task A starts waking up the tasks
+waiting for this grace period to complete, increments the
+``->expedited_sequence``, acquires the ``->exp_wake_mutex`` and then
+releases the ``->exp_mutex``. This results in the following state:
+
+.. kernel-figure:: Funnel6.svg
+
+Task E can then acquire ``->exp_mutex`` and increment
+``->expedited_sequence`` to the value three. If new tasks G and H arrive
+and moves up the combining tree at the same time, the state will be as
+follows:
+
+.. kernel-figure:: Funnel7.svg
+
+Note that three of the root ``rcu_node`` structure's waitqueues are now
+occupied. However, at some point, Task A will wake up the tasks blocked
+on the ``->exp_wq`` waitqueues, resulting in the following state:
+
+.. kernel-figure:: Funnel8.svg
+
+Execution will continue with Tasks E and H completing their grace
+periods and carrying out their wakeups.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| What happens if Task A takes so long to do its wakeups that Task E's |
+| grace period completes? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Then Task E will block on the ``->exp_wake_mutex``, which will also |
+| prevent it from releasing ``->exp_mutex``, which in turn will prevent |
+| the next grace period from starting. This last is important in |
+| preventing overflow of the ``->exp_wq[]`` array. |
++-----------------------------------------------------------------------+
+
+Use of Workqueues
+~~~~~~~~~~~~~~~~~
+
+In earlier implementations, the task requesting the expedited grace
+period also drove it to completion. This straightforward approach had
+the disadvantage of needing to account for POSIX signals sent to user
+tasks, so more recent implementations use the Linux kernel's
+workqueues (see Documentation/core-api/workqueue.rst).
+
+The requesting task still does counter snapshotting and funnel-lock
+processing, but the task reaching the top of the funnel lock does a
+``schedule_work()`` (from ``_synchronize_rcu_expedited()`` so that a
+workqueue kthread does the actual grace-period processing. Because
+workqueue kthreads do not accept POSIX signals, grace-period-wait
+processing need not allow for POSIX signals. In addition, this approach
+allows wakeups for the previous expedited grace period to be overlapped
+with processing for the next expedited grace period. Because there are
+only four sets of waitqueues, it is necessary to ensure that the
+previous grace period's wakeups complete before the next grace period's
+wakeups start. This is handled by having the ``->exp_mutex`` guard
+expedited grace-period processing and the ``->exp_wake_mutex`` guard
+wakeups. The key point is that the ``->exp_mutex`` is not released until
+the first wakeup is complete, which means that the ``->exp_wake_mutex``
+has already been acquired at that point. This approach ensures that the
+previous grace period's wakeups can be carried out while the current
+grace period is in process, but that these wakeups will complete before
+the next grace period starts. This means that only three waitqueues are
+required, guaranteeing that the four that are provided are sufficient.
+
+Stall Warnings
+~~~~~~~~~~~~~~
+
+Expediting grace periods does nothing to speed things up when RCU
+readers take too long, and therefore expedited grace periods check for
+stalls just as normal grace periods do.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But why not just let the normal grace-period machinery detect the |
+| stalls, given that a given reader must block both normal and |
+| expedited grace periods? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because it is quite possible that at a given time there is no normal |
+| grace period in progress, in which case the normal grace period |
+| cannot emit a stall warning. |
++-----------------------------------------------------------------------+
+
+The ``synchronize_sched_expedited_wait()`` function loops waiting for
+the expedited grace period to end, but with a timeout set to the current
+RCU CPU stall-warning time. If this time is exceeded, any CPUs or
+``rcu_node`` structures blocking the current grace period are printed.
+Each stall warning results in another pass through the loop, but the
+second and subsequent passes use longer stall times.
+
+Mid-boot operation
+~~~~~~~~~~~~~~~~~~
+
+The use of workqueues has the advantage that the expedited grace-period
+code need not worry about POSIX signals. Unfortunately, it has the
+corresponding disadvantage that workqueues cannot be used until they are
+initialized, which does not happen until some time after the scheduler
+spawns the first task. Given that there are parts of the kernel that
+really do want to execute grace periods during this mid-boot “dead
+zone”, expedited grace periods must do something else during this time.
+
+What they do is to fall back to the old practice of requiring that the
+requesting task drive the expedited grace period, as was the case before
+the use of workqueues. However, the requesting task is only required to
+drive the grace period during the mid-boot dead zone. Before mid-boot, a
+synchronous grace period is a no-op. Some time after mid-boot,
+workqueues are used.
+
+Non-expedited non-SRCU synchronous grace periods must also operate
+normally during mid-boot. This is handled by causing non-expedited grace
+periods to take the expedited code path during mid-boot.
+
+The current code assumes that there are no POSIX signals during the
+mid-boot dead zone. However, if an overwhelming need for POSIX signals
+somehow arises, appropriate adjustments can be made to the expedited
+stall-warning code. One such adjustment would reinstate the
+pre-workqueue stall-warning checks, but only during the mid-boot dead
+zone.
+
+With this refinement, synchronous grace periods can now be used from
+task context pretty much any time during the life of the kernel. That
+is, aside from some points in the suspend, hibernate, or shutdown code
+path.
+
+Summary
+~~~~~~~
+
+Expedited grace periods use a sequence-number approach to promote
+batching, so that a single grace-period operation can serve numerous
+requests. A funnel lock is used to efficiently identify the one task out
+of a concurrent group that will request the grace period. All members of
+the group will block on waitqueues provided in the ``rcu_node``
+structure. The actual grace-period processing is carried out by a
+workqueue.
+
+CPU-hotplug operations are noted lazily in order to prevent the need for
+tight synchronization between expedited grace periods and CPU-hotplug
+operations. The dyntick-idle counters are used to avoid sending IPIs to
+idle CPUs, at least in the common case. RCU-preempt and RCU-sched use
+different IPI handlers and different code to respond to the state
+changes carried out by those handlers, but otherwise use common code.
+
+Quiescent states are tracked using the ``rcu_node`` tree, and once all
+necessary quiescent states have been reported, all tasks waiting on this
+expedited grace period are awakened. A pair of mutexes are used to allow
+one grace period's wakeups to proceed concurrently with the next grace
+period's processing.
+
+This combination of mechanisms allows expedited grace periods to run
+reasonably efficiently. However, for non-time-critical tasks, normal
+grace periods should be used instead because their longer duration
+permits much higher degrees of batching, and thus much lower per-request
+overheads.
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel0.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel0.svg
index 98af66557908..16b1ff0ad38c 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel0.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel0.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel1.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel1.svg
index e0184a37aec7..684a4b969725 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel1.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel1.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel2.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel2.svg
index 1bc3fed54d58..8fb2454d9544 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel2.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel2.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel3.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel3.svg
index 6d8a1bffb3e4..5d4f22d5662c 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel3.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel3.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel4.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel4.svg
index 44018fd6342b..b89b02869914 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel4.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel4.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel5.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel5.svg
index e5eef50454fb..90f1c77bea2f 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel5.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel5.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel6.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel6.svg
index fbd2c1892886..3e5651da031a 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel6.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel6.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel7.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel7.svg
index 502e159ed278..9483f08d345e 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel7.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel7.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel8.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel8.svg
index 677401551c7d..1101ec30e604 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel8.svg
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Funnel8.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -125,7 +125,7 @@
y="492.36218" /></flowRegion><flowPara
id="flowPara2991" /></flowRoot> <text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="362.371"
y="262.51819"
id="text4441"
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html
deleted file mode 100644
index e5b42a798ff3..000000000000
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Diagram.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Diagram of TREE_RCU's Grace-Period Memory Ordering</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
-<p><img src="TreeRCU-gp.svg" alt="TreeRCU-gp.svg">
-
-</body></html>
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
deleted file mode 100644
index a346ce0116eb..000000000000
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ /dev/null
@@ -1,705 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through TREE_RCU's Grace-Period Memory Ordering</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
-
- <p>August 8, 2017</p>
- <p>This article was contributed by Paul E.&nbsp;McKenney</p>
-
-<h3>Introduction</h3>
-
-<p>This document gives a rough visual overview of how Tree RCU's
-grace-period memory ordering guarantee is provided.
-
-<ol>
-<li> <a href="#What Is Tree RCU's Grace Period Memory Ordering Guarantee?">
- What Is Tree RCU's Grace Period Memory Ordering Guarantee?</a>
-<li> <a href="#Tree RCU Grace Period Memory Ordering Building Blocks">
- Tree RCU Grace Period Memory Ordering Building Blocks</a>
-<li> <a href="#Tree RCU Grace Period Memory Ordering Components">
- Tree RCU Grace Period Memory Ordering Components</a>
-<li> <a href="#Putting It All Together">Putting It All Together</a>
-</ol>
-
-<h3><a name="What Is Tree RCU's Grace Period Memory Ordering Guarantee?">
-What Is Tree RCU's Grace Period Memory Ordering Guarantee?</a></h3>
-
-<p>RCU grace periods provide extremely strong memory-ordering guarantees
-for non-idle non-offline code.
-Any code that happens after the end of a given RCU grace period is guaranteed
-to see the effects of all accesses prior to the beginning of that grace
-period that are within RCU read-side critical sections.
-Similarly, any code that happens before the beginning of a given RCU grace
-period is guaranteed to see the effects of all accesses following the end
-of that grace period that are within RCU read-side critical sections.
-
-<p>This guarantee is particularly pervasive for <tt>synchronize_sched()</tt>,
-for which RCU-sched read-side critical sections include any region
-of code for which preemption is disabled.
-Given that each individual machine instruction can be thought of as
-an extremely small region of preemption-disabled code, one can think of
-<tt>synchronize_sched()</tt> as <tt>smp_mb()</tt> on steroids.
-
-<p>RCU updaters use this guarantee by splitting their updates into
-two phases, one of which is executed before the grace period and
-the other of which is executed after the grace period.
-In the most common use case, phase one removes an element from
-a linked RCU-protected data structure, and phase two frees that element.
-For this to work, any readers that have witnessed state prior to the
-phase-one update (in the common case, removal) must not witness state
-following the phase-two update (in the common case, freeing).
-
-<p>The RCU implementation provides this guarantee using a network
-of lock-based critical sections, memory barriers, and per-CPU
-processing, as is described in the following sections.
-
-<h3><a name="Tree RCU Grace Period Memory Ordering Building Blocks">
-Tree RCU Grace Period Memory Ordering Building Blocks</a></h3>
-
-<p>The workhorse for RCU's grace-period memory ordering is the
-critical section for the <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt>.
-These critical sections use helper functions for lock acquisition, including
-<tt>raw_spin_lock_rcu_node()</tt>,
-<tt>raw_spin_lock_irq_rcu_node()</tt>, and
-<tt>raw_spin_lock_irqsave_rcu_node()</tt>.
-Their lock-release counterparts are
-<tt>raw_spin_unlock_rcu_node()</tt>,
-<tt>raw_spin_unlock_irq_rcu_node()</tt>, and
-<tt>raw_spin_unlock_irqrestore_rcu_node()</tt>,
-respectively.
-For completeness, a
-<tt>raw_spin_trylock_rcu_node()</tt>
-is also provided.
-The key point is that the lock-acquisition functions, including
-<tt>raw_spin_trylock_rcu_node()</tt>, all invoke
-<tt>smp_mb__after_unlock_lock()</tt> immediately after successful
-acquisition of the lock.
-
-<p>Therefore, for any given <tt>rcu_node</tt> struction, any access
-happening before one of the above lock-release functions will be seen
-by all CPUs as happening before any access happening after a later
-one of the above lock-acquisition functions.
-Furthermore, any access happening before one of the
-above lock-release function on any given CPU will be seen by all
-CPUs as happening before any access happening after a later one
-of the above lock-acquisition functions executing on that same CPU,
-even if the lock-release and lock-acquisition functions are operating
-on different <tt>rcu_node</tt> structures.
-Tree RCU uses these two ordering guarantees to form an ordering
-network among all CPUs that were in any way involved in the grace
-period, including any CPUs that came online or went offline during
-the grace period in question.
-
-<p>The following litmus test exhibits the ordering effects of these
-lock-acquisition and lock-release functions:
-
-<pre>
- 1 int x, y, z;
- 2
- 3 void task0(void)
- 4 {
- 5 raw_spin_lock_rcu_node(rnp);
- 6 WRITE_ONCE(x, 1);
- 7 r1 = READ_ONCE(y);
- 8 raw_spin_unlock_rcu_node(rnp);
- 9 }
-10
-11 void task1(void)
-12 {
-13 raw_spin_lock_rcu_node(rnp);
-14 WRITE_ONCE(y, 1);
-15 r2 = READ_ONCE(z);
-16 raw_spin_unlock_rcu_node(rnp);
-17 }
-18
-19 void task2(void)
-20 {
-21 WRITE_ONCE(z, 1);
-22 smp_mb();
-23 r3 = READ_ONCE(x);
-24 }
-25
-26 WARN_ON(r1 == 0 &amp;&amp; r2 == 0 &amp;&amp; r3 == 0);
-</pre>
-
-<p>The <tt>WARN_ON()</tt> is evaluated at &ldquo;the end of time&rdquo;,
-after all changes have propagated throughout the system.
-Without the <tt>smp_mb__after_unlock_lock()</tt> provided by the
-acquisition functions, this <tt>WARN_ON()</tt> could trigger, for example
-on PowerPC.
-The <tt>smp_mb__after_unlock_lock()</tt> invocations prevent this
-<tt>WARN_ON()</tt> from triggering.
-
-<p>This approach must be extended to include idle CPUs, which need
-RCU's grace-period memory ordering guarantee to extend to any
-RCU read-side critical sections preceding and following the current
-idle sojourn.
-This case is handled by calls to the strongly ordered
-<tt>atomic_add_return()</tt> read-modify-write atomic operation that
-is invoked within <tt>rcu_dynticks_eqs_enter()</tt> at idle-entry
-time and within <tt>rcu_dynticks_eqs_exit()</tt> at idle-exit time.
-The grace-period kthread invokes <tt>rcu_dynticks_snap()</tt> and
-<tt>rcu_dynticks_in_eqs_since()</tt> (both of which invoke
-an <tt>atomic_add_return()</tt> of zero) to detect idle CPUs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But what about CPUs that remain offline for the entire
- grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Such CPUs will be offline at the beginning of the grace period,
- so the grace period won't expect quiescent states from them.
- Races between grace-period start and CPU-hotplug operations
- are mediated by the CPU's leaf <tt>rcu_node</tt> structure's
- <tt>-&gt;lock</tt> as described above.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>The approach must be extended to handle one final case, that
-of waking a task blocked in <tt>synchronize_rcu()</tt>.
-This task might be affinitied to a CPU that is not yet aware that
-the grace period has ended, and thus might not yet be subject to
-the grace period's memory ordering.
-Therefore, there is an <tt>smp_mb()</tt> after the return from
-<tt>wait_for_completion()</tt> in the <tt>synchronize_rcu()</tt>
-code path.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- What? Where???
- I don't see any <tt>smp_mb()</tt> after the return from
- <tt>wait_for_completion()</tt>!!!
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- That would be because I spotted the need for that
- <tt>smp_mb()</tt> during the creation of this documentation,
- and it is therefore unlikely to hit mainline before v4.14.
- Kudos to Lance Roy, Will Deacon, Peter Zijlstra, and
- Jonathan Cameron for asking questions that sensitized me
- to the rather elaborate sequence of events that demonstrate
- the need for this memory barrier.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>Tree RCU's grace--period memory-ordering guarantees rely most
-heavily on the <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>
-field, so much so that it is necessary to abbreviate this pattern
-in the diagrams in the next section.
-For example, consider the <tt>rcu_prepare_for_idle()</tt> function
-shown below, which is one of several functions that enforce ordering
-of newly arrived RCU callbacks against future grace periods:
-
-<pre>
- 1 static void rcu_prepare_for_idle(void)
- 2 {
- 3 bool needwake;
- 4 struct rcu_data *rdp;
- 5 struct rcu_dynticks *rdtp = this_cpu_ptr(&amp;rcu_dynticks);
- 6 struct rcu_node *rnp;
- 7 struct rcu_state *rsp;
- 8 int tne;
- 9
-10 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-11 rcu_is_nocb_cpu(smp_processor_id()))
-12 return;
-13 tne = READ_ONCE(tick_nohz_active);
-14 if (tne != rdtp-&gt;tick_nohz_enabled_snap) {
-15 if (rcu_cpu_has_callbacks(NULL))
-16 invoke_rcu_core();
-17 rdtp-&gt;tick_nohz_enabled_snap = tne;
-18 return;
-19 }
-20 if (!tne)
-21 return;
-22 if (rdtp-&gt;all_lazy &amp;&amp;
-23 rdtp-&gt;nonlazy_posted != rdtp-&gt;nonlazy_posted_snap) {
-24 rdtp-&gt;all_lazy = false;
-25 rdtp-&gt;nonlazy_posted_snap = rdtp-&gt;nonlazy_posted;
-26 invoke_rcu_core();
-27 return;
-28 }
-29 if (rdtp-&gt;last_accelerate == jiffies)
-30 return;
-31 rdtp-&gt;last_accelerate = jiffies;
-32 for_each_rcu_flavor(rsp) {
-33 rdp = this_cpu_ptr(rsp-&gt;rda);
-34 if (rcu_segcblist_pend_cbs(&amp;rdp-&gt;cblist))
-35 continue;
-36 rnp = rdp-&gt;mynode;
-37 raw_spin_lock_rcu_node(rnp);
-38 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
-39 raw_spin_unlock_rcu_node(rnp);
-40 if (needwake)
-41 rcu_gp_kthread_wake(rsp);
-42 }
-43 }
-</pre>
-
-<p>But the only part of <tt>rcu_prepare_for_idle()</tt> that really
-matters for this discussion are lines&nbsp;37&ndash;39.
-We will therefore abbreviate this function as follows:
-
-</p><p><img src="rcu_node-lock.svg" alt="rcu_node-lock.svg">
-
-<p>The box represents the <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>
-critical section, with the double line on top representing the additional
-<tt>smp_mb__after_unlock_lock()</tt>.
-
-<h3><a name="Tree RCU Grace Period Memory Ordering Components">
-Tree RCU Grace Period Memory Ordering Components</a></h3>
-
-<p>Tree RCU's grace-period memory-ordering guarantee is provided by
-a number of RCU components:
-
-<ol>
-<li> <a href="#Callback Registry">Callback Registry</a>
-<li> <a href="#Grace-Period Initialization">Grace-Period Initialization</a>
-<li> <a href="#Self-Reported Quiescent States">
- Self-Reported Quiescent States</a>
-<li> <a href="#Dynamic Tick Interface">Dynamic Tick Interface</a>
-<li> <a href="#CPU-Hotplug Interface">CPU-Hotplug Interface</a>
-<li> <a href="Forcing Quiescent States">Forcing Quiescent States</a>
-<li> <a href="Grace-Period Cleanup">Grace-Period Cleanup</a>
-<li> <a href="Callback Invocation">Callback Invocation</a>
-</ol>
-
-<p>Each of the following section looks at the corresponding component
-in detail.
-
-<h4><a name="Callback Registry">Callback Registry</a></h4>
-
-<p>If RCU's grace-period guarantee is to mean anything at all, any
-access that happens before a given invocation of <tt>call_rcu()</tt>
-must also happen before the corresponding grace period.
-The implementation of this portion of RCU's grace period guarantee
-is shown in the following figure:
-
-</p><p><img src="TreeRCU-callback-registry.svg" alt="TreeRCU-callback-registry.svg">
-
-<p>Because <tt>call_rcu()</tt> normally acts only on CPU-local state,
-it provides no ordering guarantees, either for itself or for
-phase one of the update (which again will usually be removal of
-an element from an RCU-protected data structure).
-It simply enqueues the <tt>rcu_head</tt> structure on a per-CPU list,
-which cannot become associated with a grace period until a later
-call to <tt>rcu_accelerate_cbs()</tt>, as shown in the diagram above.
-
-<p>One set of code paths shown on the left invokes
-<tt>rcu_accelerate_cbs()</tt> via
-<tt>note_gp_changes()</tt>, either directly from <tt>call_rcu()</tt> (if
-the current CPU is inundated with queued <tt>rcu_head</tt> structures)
-or more likely from an <tt>RCU_SOFTIRQ</tt> handler.
-Another code path in the middle is taken only in kernels built with
-<tt>CONFIG_RCU_FAST_NO_HZ=y</tt>, which invokes
-<tt>rcu_accelerate_cbs()</tt> via <tt>rcu_prepare_for_idle()</tt>.
-The final code path on the right is taken only in kernels built with
-<tt>CONFIG_HOTPLUG_CPU=y</tt>, which invokes
-<tt>rcu_accelerate_cbs()</tt> via
-<tt>rcu_advance_cbs()</tt>, <tt>rcu_migrate_callbacks</tt>,
-<tt>rcutree_migrate_callbacks()</tt>, and <tt>takedown_cpu()</tt>,
-which in turn is invoked on a surviving CPU after the outgoing
-CPU has been completely offlined.
-
-<p>There are a few other code paths within grace-period processing
-that opportunistically invoke <tt>rcu_accelerate_cbs()</tt>.
-However, either way, all of the CPU's recently queued <tt>rcu_head</tt>
-structures are associated with a future grace-period number under
-the protection of the CPU's lead <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt>.
-In all cases, there is full ordering against any prior critical section
-for that same <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>, and
-also full ordering against any of the current task's or CPU's prior critical
-sections for any <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>.
-
-<p>The next section will show how this ordering ensures that any
-accesses prior to the <tt>call_rcu()</tt> (particularly including phase
-one of the update)
-happen before the start of the corresponding grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But what about <tt>synchronize_rcu()</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- The <tt>synchronize_rcu()</tt> passes <tt>call_rcu()</tt>
- to <tt>wait_rcu_gp()</tt>, which invokes it.
- So either way, it eventually comes down to <tt>call_rcu()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Grace-Period Initialization">Grace-Period Initialization</a></h4>
-
-<p>Grace-period initialization is carried out by
-the grace-period kernel thread, which makes several passes over the
-<tt>rcu_node</tt> tree within the <tt>rcu_gp_init()</tt> function.
-This means that showing the full flow of ordering through the
-grace-period computation will require duplicating this tree.
-If you find this confusing, please note that the state of the
-<tt>rcu_node</tt> changes over time, just like Heraclitus's river.
-However, to keep the <tt>rcu_node</tt> river tractable, the
-grace-period kernel thread's traversals are presented in multiple
-parts, starting in this section with the various phases of
-grace-period initialization.
-
-<p>The first ordering-related grace-period initialization action is to
-advance the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt>
-grace-period-number counter, as shown below:
-
-</p><p><img src="TreeRCU-gp-init-1.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>The actual increment is carried out using <tt>smp_store_release()</tt>,
-which helps reject false-positive RCU CPU stall detection.
-Note that only the root <tt>rcu_node</tt> structure is touched.
-
-<p>The first pass through the <tt>rcu_node</tt> tree updates bitmasks
-based on CPUs having come online or gone offline since the start of
-the previous grace period.
-In the common case where the number of online CPUs for this <tt>rcu_node</tt>
-structure has not transitioned to or from zero,
-this pass will scan only the leaf <tt>rcu_node</tt> structures.
-However, if the number of online CPUs for a given leaf <tt>rcu_node</tt>
-structure has transitioned from zero,
-<tt>rcu_init_new_rnp()</tt> will be invoked for the first incoming CPU.
-Similarly, if the number of online CPUs for a given leaf <tt>rcu_node</tt>
-structure has transitioned to zero,
-<tt>rcu_cleanup_dead_rnp()</tt> will be invoked for the last outgoing CPU.
-The diagram below shows the path of ordering if the leftmost
-<tt>rcu_node</tt> structure onlines its first CPU and if the next
-<tt>rcu_node</tt> structure has no online CPUs
-(or, alternatively if the leftmost <tt>rcu_node</tt> structure offlines
-its last CPU and if the next <tt>rcu_node</tt> structure has no online CPUs).
-
-</p><p><img src="TreeRCU-gp-init-2.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>The final <tt>rcu_gp_init()</tt> pass through the <tt>rcu_node</tt>
-tree traverses breadth-first, setting each <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field to the newly advanced value from the
-<tt>rcu_state</tt> structure, as shown in the following diagram.
-
-</p><p><img src="TreeRCU-gp-init-3.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
-
-<p>This change will also cause each CPU's next call to
-<tt>__note_gp_changes()</tt>
-to notice that a new grace period has started, as described in the next
-section.
-But because the grace-period kthread started the grace period at the
-root (with the advancing of the <tt>rcu_state</tt> structure's
-<tt>-&gt;gp_seq</tt> field) before setting each leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;gp_seq</tt> field, each CPU's observation of
-the start of the grace period will happen after the actual start
-of the grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But what about the CPU that started the grace period?
- Why wouldn't it see the start of the grace period right when
- it started that grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- In some deep philosophical and overly anthromorphized
- sense, yes, the CPU starting the grace period is immediately
- aware of having done so.
- However, if we instead assume that RCU is not self-aware,
- then even the CPU starting the grace period does not really
- become aware of the start of this grace period until its
- first call to <tt>__note_gp_changes()</tt>.
- On the other hand, this CPU potentially gets early notification
- because it invokes <tt>__note_gp_changes()</tt> during its
- last <tt>rcu_gp_init()</tt> pass through its leaf
- <tt>rcu_node</tt> structure.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Self-Reported Quiescent States">
-Self-Reported Quiescent States</a></h4>
-
-<p>When all entities that might block the grace period have reported
-quiescent states (or as described in a later section, had quiescent
-states reported on their behalf), the grace period can end.
-Online non-idle CPUs report their own quiescent states, as shown
-in the following diagram:
-
-</p><p><img src="TreeRCU-qs.svg" alt="TreeRCU-qs.svg" width="75%">
-
-<p>This is for the last CPU to report a quiescent state, which signals
-the end of the grace period.
-Earlier quiescent states would push up the <tt>rcu_node</tt> tree
-only until they encountered an <tt>rcu_node</tt> structure that
-is waiting for additional quiescent states.
-However, ordering is nevertheless preserved because some later quiescent
-state will acquire that <tt>rcu_node</tt> structure's <tt>-&gt;lock</tt>.
-
-<p>Any number of events can lead up to a CPU invoking
-<tt>note_gp_changes</tt> (or alternatively, directly invoking
-<tt>__note_gp_changes()</tt>), at which point that CPU will notice
-the start of a new grace period while holding its leaf
-<tt>rcu_node</tt> lock.
-Therefore, all execution shown in this diagram happens after the
-start of the grace period.
-In addition, this CPU will consider any RCU read-side critical
-section that started before the invocation of <tt>__note_gp_changes()</tt>
-to have started before the grace period, and thus a critical
-section that the grace period must wait on.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But a RCU read-side critical section might have started
- after the beginning of the grace period
- (the advancing of <tt>-&gt;gp_seq</tt> from earlier), so why should
- the grace period wait on such a critical section?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- It is indeed not necessary for the grace period to wait on such
- a critical section.
- However, it is permissible to wait on it.
- And it is furthermore important to wait on it, as this
- lazy approach is far more scalable than a &ldquo;big bang&rdquo;
- all-at-once grace-period start could possibly be.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>If the CPU does a context switch, a quiescent state will be
-noted by <tt>rcu_node_context_switch()</tt> on the left.
-On the other hand, if the CPU takes a scheduler-clock interrupt
-while executing in usermode, a quiescent state will be noted by
-<tt>rcu_check_callbacks()</tt> on the right.
-Either way, the passage through a quiescent state will be noted
-in a per-CPU variable.
-
-<p>The next time an <tt>RCU_SOFTIRQ</tt> handler executes on
-this CPU (for example, after the next scheduler-clock
-interrupt), <tt>__rcu_process_callbacks()</tt> will invoke
-<tt>rcu_check_quiescent_state()</tt>, which will notice the
-recorded quiescent state, and invoke
-<tt>rcu_report_qs_rdp()</tt>.
-If <tt>rcu_report_qs_rdp()</tt> verifies that the quiescent state
-really does apply to the current grace period, it invokes
-<tt>rcu_report_rnp()</tt> which traverses up the <tt>rcu_node</tt>
-tree as shown at the bottom of the diagram, clearing bits from
-each <tt>rcu_node</tt> structure's <tt>-&gt;qsmask</tt> field,
-and propagating up the tree when the result is zero.
-
-<p>Note that traversal passes upwards out of a given <tt>rcu_node</tt>
-structure only if the current CPU is reporting the last quiescent
-state for the subtree headed by that <tt>rcu_node</tt> structure.
-A key point is that if a CPU's traversal stops at a given <tt>rcu_node</tt>
-structure, then there will be a later traversal by another CPU
-(or perhaps the same one) that proceeds upwards
-from that point, and the <tt>rcu_node</tt> <tt>-&gt;lock</tt>
-guarantees that the first CPU's quiescent state happens before the
-remainder of the second CPU's traversal.
-Applying this line of thought repeatedly shows that all CPUs'
-quiescent states happen before the last CPU traverses through
-the root <tt>rcu_node</tt> structure, the &ldquo;last CPU&rdquo;
-being the one that clears the last bit in the root <tt>rcu_node</tt>
-structure's <tt>-&gt;qsmask</tt> field.
-
-<h4><a name="Dynamic Tick Interface">Dynamic Tick Interface</a></h4>
-
-<p>Due to energy-efficiency considerations, RCU is forbidden from
-disturbing idle CPUs.
-CPUs are therefore required to notify RCU when entering or leaving idle
-state, which they do via fully ordered value-returning atomic operations
-on a per-CPU variable.
-The ordering effects are as shown below:
-
-</p><p><img src="TreeRCU-dyntick.svg" alt="TreeRCU-dyntick.svg" width="50%">
-
-<p>The RCU grace-period kernel thread samples the per-CPU idleness
-variable while holding the corresponding CPU's leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;lock</tt>.
-This means that any RCU read-side critical sections that precede the
-idle period (the oval near the top of the diagram above) will happen
-before the end of the current grace period.
-Similarly, the beginning of the current grace period will happen before
-any RCU read-side critical sections that follow the
-idle period (the oval near the bottom of the diagram above).
-
-<p>Plumbing this into the full grace-period execution is described
-<a href="#Forcing Quiescent States">below</a>.
-
-<h4><a name="CPU-Hotplug Interface">CPU-Hotplug Interface</a></h4>
-
-<p>RCU is also forbidden from disturbing offline CPUs, which might well
-be powered off and removed from the system completely.
-CPUs are therefore required to notify RCU of their comings and goings
-as part of the corresponding CPU hotplug operations.
-The ordering effects are shown below:
-
-</p><p><img src="TreeRCU-hotplug.svg" alt="TreeRCU-hotplug.svg" width="50%">
-
-<p>Because CPU hotplug operations are much less frequent than idle transitions,
-they are heavier weight, and thus acquire the CPU's leaf <tt>rcu_node</tt>
-structure's <tt>-&gt;lock</tt> and update this structure's
-<tt>-&gt;qsmaskinitnext</tt>.
-The RCU grace-period kernel thread samples this mask to detect CPUs
-having gone offline since the beginning of this grace period.
-
-<p>Plumbing this into the full grace-period execution is described
-<a href="#Forcing Quiescent States">below</a>.
-
-<h4><a name="Forcing Quiescent States">Forcing Quiescent States</a></h4>
-
-<p>As noted above, idle and offline CPUs cannot report their own
-quiescent states, and therefore the grace-period kernel thread
-must do the reporting on their behalf.
-This process is called &ldquo;forcing quiescent states&rdquo;, it is
-repeated every few jiffies, and its ordering effects are shown below:
-
-</p><p><img src="TreeRCU-gp-fqs.svg" alt="TreeRCU-gp-fqs.svg" width="100%">
-
-<p>Each pass of quiescent state forcing is guaranteed to traverse the
-leaf <tt>rcu_node</tt> structures, and if there are no new quiescent
-states due to recently idled and/or offlined CPUs, then only the
-leaves are traversed.
-However, if there is a newly offlined CPU as illustrated on the left
-or a newly idled CPU as illustrated on the right, the corresponding
-quiescent state will be driven up towards the root.
-As with self-reported quiescent states, the upwards driving stops
-once it reaches an <tt>rcu_node</tt> structure that has quiescent
-states outstanding from other CPUs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- The leftmost drive to root stopped before it reached
- the root <tt>rcu_node</tt> structure, which means that
- there are still CPUs subordinate to that structure on
- which the current grace period is waiting.
- Given that, how is it possible that the rightmost drive
- to root ended the grace period?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Good analysis!
- It is in fact impossible in the absence of bugs in RCU.
- But this diagram is complex enough as it is, so simplicity
- overrode accuracy.
- You can think of it as poetic license, or you can think of
- it as misdirection that is resolved in the
- <a href="#Putting It All Together">stitched-together diagram</a>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h4><a name="Grace-Period Cleanup">Grace-Period Cleanup</a></h4>
-
-<p>Grace-period cleanup first scans the <tt>rcu_node</tt> tree
-breadth-first advancing all the <tt>-&gt;gp_seq</tt> fields, then it
-advances the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt> field.
-The ordering effects are shown below:
-
-</p><p><img src="TreeRCU-gp-cleanup.svg" alt="TreeRCU-gp-cleanup.svg" width="75%">
-
-<p>As indicated by the oval at the bottom of the diagram, once
-grace-period cleanup is complete, the next grace period can begin.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But when precisely does the grace period end?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- There is no useful single point at which the grace period
- can be said to end.
- The earliest reasonable candidate is as soon as the last
- CPU has reported its quiescent state, but it may be some
- milliseconds before RCU becomes aware of this.
- The latest reasonable candidate is once the <tt>rcu_state</tt>
- structure's <tt>-&gt;gp_seq</tt> field has been updated,
- but it is quite possible that some CPUs have already completed
- phase two of their updates by that time.
- In short, if you are going to work with RCU, you need to
- learn to embrace uncertainty.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-
-<h4><a name="Callback Invocation">Callback Invocation</a></h4>
-
-<p>Once a given CPU's leaf <tt>rcu_node</tt> structure's
-<tt>-&gt;gp_seq</tt> field has been updated, that CPU can begin
-invoking its RCU callbacks that were waiting for this grace period
-to end.
-These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
-which is usually invoked by <tt>__note_gp_changes()</tt>.
-As shown in the diagram below, this invocation can be triggered by
-the scheduling-clock interrupt (<tt>rcu_check_callbacks()</tt> on
-the left) or by idle entry (<tt>rcu_cleanup_after_idle()</tt> on
-the right, but only for kernels build with
-<tt>CONFIG_RCU_FAST_NO_HZ=y</tt>).
-Either way, <tt>RCU_SOFTIRQ</tt> is raised, which results in
-<tt>rcu_do_batch()</tt> invoking the callbacks, which in turn
-allows those callbacks to carry out (either directly or indirectly
-via wakeup) the needed phase-two processing for each update.
-
-</p><p><img src="TreeRCU-callback-invocation.svg" alt="TreeRCU-callback-invocation.svg" width="60%">
-
-<p>Please note that callback invocation can also be prompted by any
-number of corner-case code paths, for example, when a CPU notes that
-it has excessive numbers of callbacks queued.
-In all cases, the CPU acquires its leaf <tt>rcu_node</tt> structure's
-<tt>-&gt;lock</tt> before invoking callbacks, which preserves the
-required ordering against the newly completed grace period.
-
-<p>However, if the callback function communicates to other CPUs,
-for example, doing a wakeup, then it is that function's responsibility
-to maintain ordering.
-For example, if the callback function wakes up a task that runs on
-some other CPU, proper ordering must in place in both the callback
-function and the task being awakened.
-To see why this is important, consider the top half of the
-<a href="#Grace-Period Cleanup">grace-period cleanup</a> diagram.
-The callback might be running on a CPU corresponding to the leftmost
-leaf <tt>rcu_node</tt> structure, and awaken a task that is to run on
-a CPU corresponding to the rightmost leaf <tt>rcu_node</tt> structure,
-and the grace-period kernel thread might not yet have reached the
-rightmost leaf.
-In this case, the grace period's memory ordering might not yet have
-reached that CPU, so again the callback function and the awakened
-task must supply proper ordering.
-
-<h3><a name="Putting It All Together">Putting It All Together</a></h3>
-
-<p>A stitched-together diagram is
-<a href="Tree-RCU-Diagram.html">here</a>.
-
-<h3><a name="Legal Statement">
-Legal Statement</a></h3>
-
-<p>This work represents the view of the author and does not necessarily
-represent the view of IBM.
-
-</p><p>Linux is a registered trademark of Linus Torvalds.
-
-</p><p>Other company, product, and service names may be trademarks or
-service marks of others.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
new file mode 100644
index 000000000000..1a5ff1a9f02e
--- /dev/null
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -0,0 +1,648 @@
+======================================================
+A Tour Through TREE_RCU's Grace-Period Memory Ordering
+======================================================
+
+August 8, 2017
+
+This article was contributed by Paul E. McKenney
+
+Introduction
+============
+
+This document gives a rough visual overview of how Tree RCU's
+grace-period memory ordering guarantee is provided.
+
+What Is Tree RCU's Grace Period Memory Ordering Guarantee?
+==========================================================
+
+RCU grace periods provide extremely strong memory-ordering guarantees
+for non-idle non-offline code.
+Any code that happens after the end of a given RCU grace period is guaranteed
+to see the effects of all accesses prior to the beginning of that grace
+period that are within RCU read-side critical sections.
+Similarly, any code that happens before the beginning of a given RCU grace
+period is guaranteed to not see the effects of all accesses following the end
+of that grace period that are within RCU read-side critical sections.
+
+Note well that RCU-sched read-side critical sections include any region
+of code for which preemption is disabled.
+Given that each individual machine instruction can be thought of as
+an extremely small region of preemption-disabled code, one can think of
+``synchronize_rcu()`` as ``smp_mb()`` on steroids.
+
+RCU updaters use this guarantee by splitting their updates into
+two phases, one of which is executed before the grace period and
+the other of which is executed after the grace period.
+In the most common use case, phase one removes an element from
+a linked RCU-protected data structure, and phase two frees that element.
+For this to work, any readers that have witnessed state prior to the
+phase-one update (in the common case, removal) must not witness state
+following the phase-two update (in the common case, freeing).
+
+The RCU implementation provides this guarantee using a network
+of lock-based critical sections, memory barriers, and per-CPU
+processing, as is described in the following sections.
+
+Tree RCU Grace Period Memory Ordering Building Blocks
+=====================================================
+
+The workhorse for RCU's grace-period memory ordering is the
+critical section for the ``rcu_node`` structure's
+``->lock``. These critical sections use helper functions for lock
+acquisition, including ``raw_spin_lock_rcu_node()``,
+``raw_spin_lock_irq_rcu_node()``, and ``raw_spin_lock_irqsave_rcu_node()``.
+Their lock-release counterparts are ``raw_spin_unlock_rcu_node()``,
+``raw_spin_unlock_irq_rcu_node()``, and
+``raw_spin_unlock_irqrestore_rcu_node()``, respectively.
+For completeness, a ``raw_spin_trylock_rcu_node()`` is also provided.
+The key point is that the lock-acquisition functions, including
+``raw_spin_trylock_rcu_node()``, all invoke ``smp_mb__after_unlock_lock()``
+immediately after successful acquisition of the lock.
+
+Therefore, for any given ``rcu_node`` structure, any access
+happening before one of the above lock-release functions will be seen
+by all CPUs as happening before any access happening after a later
+one of the above lock-acquisition functions.
+Furthermore, any access happening before one of the
+above lock-release function on any given CPU will be seen by all
+CPUs as happening before any access happening after a later one
+of the above lock-acquisition functions executing on that same CPU,
+even if the lock-release and lock-acquisition functions are operating
+on different ``rcu_node`` structures.
+Tree RCU uses these two ordering guarantees to form an ordering
+network among all CPUs that were in any way involved in the grace
+period, including any CPUs that came online or went offline during
+the grace period in question.
+
+The following litmus test exhibits the ordering effects of these
+lock-acquisition and lock-release functions::
+
+ 1 int x, y, z;
+ 2
+ 3 void task0(void)
+ 4 {
+ 5 raw_spin_lock_rcu_node(rnp);
+ 6 WRITE_ONCE(x, 1);
+ 7 r1 = READ_ONCE(y);
+ 8 raw_spin_unlock_rcu_node(rnp);
+ 9 }
+ 10
+ 11 void task1(void)
+ 12 {
+ 13 raw_spin_lock_rcu_node(rnp);
+ 14 WRITE_ONCE(y, 1);
+ 15 r2 = READ_ONCE(z);
+ 16 raw_spin_unlock_rcu_node(rnp);
+ 17 }
+ 18
+ 19 void task2(void)
+ 20 {
+ 21 WRITE_ONCE(z, 1);
+ 22 smp_mb();
+ 23 r3 = READ_ONCE(x);
+ 24 }
+ 25
+ 26 WARN_ON(r1 == 0 && r2 == 0 && r3 == 0);
+
+The ``WARN_ON()`` is evaluated at "the end of time",
+after all changes have propagated throughout the system.
+Without the ``smp_mb__after_unlock_lock()`` provided by the
+acquisition functions, this ``WARN_ON()`` could trigger, for example
+on PowerPC.
+The ``smp_mb__after_unlock_lock()`` invocations prevent this
+``WARN_ON()`` from triggering.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But the chain of rcu_node-structure lock acquisitions guarantees |
+| that new readers will see all of the updater's pre-grace-period |
+| accesses and also guarantees that the updater's post-grace-period |
+| accesses will see all of the old reader's accesses. So why do we |
+| need all of those calls to smp_mb__after_unlock_lock()? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because we must provide ordering for RCU's polling grace-period |
+| primitives, for example, get_state_synchronize_rcu() and |
+| poll_state_synchronize_rcu(). Consider this code:: |
+| |
+| CPU 0 CPU 1 |
+| ---- ---- |
+| WRITE_ONCE(X, 1) WRITE_ONCE(Y, 1) |
+| g = get_state_synchronize_rcu() smp_mb() |
+| while (!poll_state_synchronize_rcu(g)) r1 = READ_ONCE(X) |
+| continue; |
+| r0 = READ_ONCE(Y) |
+| |
+| RCU guarantees that the outcome r0 == 0 && r1 == 0 will not |
+| happen, even if CPU 1 is in an RCU extended quiescent state |
+| (idle or offline) and thus won't interact directly with the RCU |
+| core processing at all. |
++-----------------------------------------------------------------------+
+
+This approach must be extended to include idle CPUs, which need
+RCU's grace-period memory ordering guarantee to extend to any
+RCU read-side critical sections preceding and following the current
+idle sojourn.
+This case is handled by calls to the strongly ordered
+``atomic_add_return()`` read-modify-write atomic operation that
+is invoked within ``ct_kernel_exit_state()`` at idle-entry
+time and within ``ct_kernel_enter_state()`` at idle-exit time.
+The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
+(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
+(both of which rely on acquire semantics) to detect idle CPUs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But what about CPUs that remain offline for the entire grace period? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Such CPUs will be offline at the beginning of the grace period, so |
+| the grace period won't expect quiescent states from them. Races |
+| between grace-period start and CPU-hotplug operations are mediated |
+| by the CPU's leaf ``rcu_node`` structure's ``->lock`` as described |
+| above. |
++-----------------------------------------------------------------------+
+
+The approach must be extended to handle one final case, that of waking a
+task blocked in ``synchronize_rcu()``. This task might be affined to
+a CPU that is not yet aware that the grace period has ended, and thus
+might not yet be subject to the grace period's memory ordering.
+Therefore, there is an ``smp_mb()`` after the return from
+``wait_for_completion()`` in the ``synchronize_rcu()`` code path.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| What? Where??? I don't see any ``smp_mb()`` after the return from |
+| ``wait_for_completion()``!!! |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| That would be because I spotted the need for that ``smp_mb()`` during |
+| the creation of this documentation, and it is therefore unlikely to |
+| hit mainline before v4.14. Kudos to Lance Roy, Will Deacon, Peter |
+| Zijlstra, and Jonathan Cameron for asking questions that sensitized |
+| me to the rather elaborate sequence of events that demonstrate the |
+| need for this memory barrier. |
++-----------------------------------------------------------------------+
+
+Tree RCU's grace--period memory-ordering guarantees rely most heavily on
+the ``rcu_node`` structure's ``->lock`` field, so much so that it is
+necessary to abbreviate this pattern in the diagrams in the next
+section. For example, consider the ``rcu_prepare_for_idle()`` function
+shown below, which is one of several functions that enforce ordering of
+newly arrived RCU callbacks against future grace periods:
+
+::
+
+ 1 static void rcu_prepare_for_idle(void)
+ 2 {
+ 3 bool needwake;
+ 4 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ 5 struct rcu_node *rnp;
+ 6 int tne;
+ 7
+ 8 lockdep_assert_irqs_disabled();
+ 9 if (rcu_rdp_is_offloaded(rdp))
+ 10 return;
+ 11
+ 12 /* Handle nohz enablement switches conservatively. */
+ 13 tne = READ_ONCE(tick_nohz_active);
+ 14 if (tne != rdp->tick_nohz_enabled_snap) {
+ 15 if (!rcu_segcblist_empty(&rdp->cblist))
+ 16 invoke_rcu_core(); /* force nohz to see update. */
+ 17 rdp->tick_nohz_enabled_snap = tne;
+ 18 return;
+ 19 }
+ 20 if (!tne)
+ 21 return;
+ 22
+ 23 /*
+ 24 * If we have not yet accelerated this jiffy, accelerate all
+ 25 * callbacks on this CPU.
+ 26 */
+ 27 if (rdp->last_accelerate == jiffies)
+ 28 return;
+ 29 rdp->last_accelerate = jiffies;
+ 30 if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
+ 31 rnp = rdp->mynode;
+ 32 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+ 33 needwake = rcu_accelerate_cbs(rnp, rdp);
+ 34 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
+ 35 if (needwake)
+ 36 rcu_gp_kthread_wake();
+ 37 }
+ 38 }
+
+But the only part of ``rcu_prepare_for_idle()`` that really matters for
+this discussion are lines 32–34. We will therefore abbreviate this
+function as follows:
+
+.. kernel-figure:: rcu_node-lock.svg
+
+The box represents the ``rcu_node`` structure's ``->lock`` critical
+section, with the double line on top representing the additional
+``smp_mb__after_unlock_lock()``.
+
+Tree RCU Grace Period Memory Ordering Components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tree RCU's grace-period memory-ordering guarantee is provided by a
+number of RCU components:
+
+#. `Callback Registry`_
+#. `Grace-Period Initialization`_
+#. `Self-Reported Quiescent States`_
+#. `Dynamic Tick Interface`_
+#. `CPU-Hotplug Interface`_
+#. `Forcing Quiescent States`_
+#. `Grace-Period Cleanup`_
+#. `Callback Invocation`_
+
+Each of the following section looks at the corresponding component in
+detail.
+
+Callback Registry
+^^^^^^^^^^^^^^^^^
+
+If RCU's grace-period guarantee is to mean anything at all, any access
+that happens before a given invocation of ``call_rcu()`` must also
+happen before the corresponding grace period. The implementation of this
+portion of RCU's grace period guarantee is shown in the following
+figure:
+
+.. kernel-figure:: TreeRCU-callback-registry.svg
+
+Because ``call_rcu()`` normally acts only on CPU-local state, it
+provides no ordering guarantees, either for itself or for phase one of
+the update (which again will usually be removal of an element from an
+RCU-protected data structure). It simply enqueues the ``rcu_head``
+structure on a per-CPU list, which cannot become associated with a grace
+period until a later call to ``rcu_accelerate_cbs()``, as shown in the
+diagram above.
+
+One set of code paths shown on the left invokes ``rcu_accelerate_cbs()``
+via ``note_gp_changes()``, either directly from ``call_rcu()`` (if the
+current CPU is inundated with queued ``rcu_head`` structures) or more
+likely from an ``RCU_SOFTIRQ`` handler. Another code path in the middle
+is taken only in kernels built with ``CONFIG_RCU_FAST_NO_HZ=y``, which
+invokes ``rcu_accelerate_cbs()`` via ``rcu_prepare_for_idle()``. The
+final code path on the right is taken only in kernels built with
+``CONFIG_HOTPLUG_CPU=y``, which invokes ``rcu_accelerate_cbs()`` via
+``rcu_advance_cbs()``, ``rcu_migrate_callbacks``,
+``rcutree_migrate_callbacks()``, and ``takedown_cpu()``, which in turn
+is invoked on a surviving CPU after the outgoing CPU has been completely
+offlined.
+
+There are a few other code paths within grace-period processing that
+opportunistically invoke ``rcu_accelerate_cbs()``. However, either way,
+all of the CPU's recently queued ``rcu_head`` structures are associated
+with a future grace-period number under the protection of the CPU's lead
+``rcu_node`` structure's ``->lock``. In all cases, there is full
+ordering against any prior critical section for that same ``rcu_node``
+structure's ``->lock``, and also full ordering against any of the
+current task's or CPU's prior critical sections for any ``rcu_node``
+structure's ``->lock``.
+
+The next section will show how this ordering ensures that any accesses
+prior to the ``call_rcu()`` (particularly including phase one of the
+update) happen before the start of the corresponding grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But what about ``synchronize_rcu()``? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| The ``synchronize_rcu()`` passes ``call_rcu()`` to ``wait_rcu_gp()``, |
+| which invokes it. So either way, it eventually comes down to |
+| ``call_rcu()``. |
++-----------------------------------------------------------------------+
+
+Grace-Period Initialization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Grace-period initialization is carried out by the grace-period kernel
+thread, which makes several passes over the ``rcu_node`` tree within the
+``rcu_gp_init()`` function. This means that showing the full flow of
+ordering through the grace-period computation will require duplicating
+this tree. If you find this confusing, please note that the state of the
+``rcu_node`` changes over time, just like Heraclitus's river. However,
+to keep the ``rcu_node`` river tractable, the grace-period kernel
+thread's traversals are presented in multiple parts, starting in this
+section with the various phases of grace-period initialization.
+
+The first ordering-related grace-period initialization action is to
+advance the ``rcu_state`` structure's ``->gp_seq`` grace-period-number
+counter, as shown below:
+
+.. kernel-figure:: TreeRCU-gp-init-1.svg
+
+The actual increment is carried out using ``smp_store_release()``, which
+helps reject false-positive RCU CPU stall detection. Note that only the
+root ``rcu_node`` structure is touched.
+
+The first pass through the ``rcu_node`` tree updates bitmasks based on
+CPUs having come online or gone offline since the start of the previous
+grace period. In the common case where the number of online CPUs for
+this ``rcu_node`` structure has not transitioned to or from zero, this
+pass will scan only the leaf ``rcu_node`` structures. However, if the
+number of online CPUs for a given leaf ``rcu_node`` structure has
+transitioned from zero, ``rcu_init_new_rnp()`` will be invoked for the
+first incoming CPU. Similarly, if the number of online CPUs for a given
+leaf ``rcu_node`` structure has transitioned to zero,
+``rcu_cleanup_dead_rnp()`` will be invoked for the last outgoing CPU.
+The diagram below shows the path of ordering if the leftmost
+``rcu_node`` structure onlines its first CPU and if the next
+``rcu_node`` structure has no online CPUs (or, alternatively if the
+leftmost ``rcu_node`` structure offlines its last CPU and if the next
+``rcu_node`` structure has no online CPUs).
+
+.. kernel-figure:: TreeRCU-gp-init-2.svg
+
+The final ``rcu_gp_init()`` pass through the ``rcu_node`` tree traverses
+breadth-first, setting each ``rcu_node`` structure's ``->gp_seq`` field
+to the newly advanced value from the ``rcu_state`` structure, as shown
+in the following diagram.
+
+.. kernel-figure:: TreeRCU-gp-init-3.svg
+
+This change will also cause each CPU's next call to
+``__note_gp_changes()`` to notice that a new grace period has started,
+as described in the next section. But because the grace-period kthread
+started the grace period at the root (with the advancing of the
+``rcu_state`` structure's ``->gp_seq`` field) before setting each leaf
+``rcu_node`` structure's ``->gp_seq`` field, each CPU's observation of
+the start of the grace period will happen after the actual start of the
+grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But what about the CPU that started the grace period? Why wouldn't it |
+| see the start of the grace period right when it started that grace |
+| period? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| In some deep philosophical and overly anthromorphized sense, yes, the |
+| CPU starting the grace period is immediately aware of having done so. |
+| However, if we instead assume that RCU is not self-aware, then even |
+| the CPU starting the grace period does not really become aware of the |
+| start of this grace period until its first call to |
+| ``__note_gp_changes()``. On the other hand, this CPU potentially gets |
+| early notification because it invokes ``__note_gp_changes()`` during |
+| its last ``rcu_gp_init()`` pass through its leaf ``rcu_node`` |
+| structure. |
++-----------------------------------------------------------------------+
+
+Self-Reported Quiescent States
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When all entities that might block the grace period have reported
+quiescent states (or as described in a later section, had quiescent
+states reported on their behalf), the grace period can end. Online
+non-idle CPUs report their own quiescent states, as shown in the
+following diagram:
+
+.. kernel-figure:: TreeRCU-qs.svg
+
+This is for the last CPU to report a quiescent state, which signals the
+end of the grace period. Earlier quiescent states would push up the
+``rcu_node`` tree only until they encountered an ``rcu_node`` structure
+that is waiting for additional quiescent states. However, ordering is
+nevertheless preserved because some later quiescent state will acquire
+that ``rcu_node`` structure's ``->lock``.
+
+Any number of events can lead up to a CPU invoking ``note_gp_changes``
+(or alternatively, directly invoking ``__note_gp_changes()``), at which
+point that CPU will notice the start of a new grace period while holding
+its leaf ``rcu_node`` lock. Therefore, all execution shown in this
+diagram happens after the start of the grace period. In addition, this
+CPU will consider any RCU read-side critical section that started before
+the invocation of ``__note_gp_changes()`` to have started before the
+grace period, and thus a critical section that the grace period must
+wait on.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But a RCU read-side critical section might have started after the |
+| beginning of the grace period (the advancing of ``->gp_seq`` from |
+| earlier), so why should the grace period wait on such a critical |
+| section? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| It is indeed not necessary for the grace period to wait on such a |
+| critical section. However, it is permissible to wait on it. And it is |
+| furthermore important to wait on it, as this lazy approach is far |
+| more scalable than a “big bang” all-at-once grace-period start could |
+| possibly be. |
++-----------------------------------------------------------------------+
+
+If the CPU does a context switch, a quiescent state will be noted by
+``rcu_note_context_switch()`` on the left. On the other hand, if the CPU
+takes a scheduler-clock interrupt while executing in usermode, a
+quiescent state will be noted by ``rcu_sched_clock_irq()`` on the right.
+Either way, the passage through a quiescent state will be noted in a
+per-CPU variable.
+
+The next time an ``RCU_SOFTIRQ`` handler executes on this CPU (for
+example, after the next scheduler-clock interrupt), ``rcu_core()`` will
+invoke ``rcu_check_quiescent_state()``, which will notice the recorded
+quiescent state, and invoke ``rcu_report_qs_rdp()``. If
+``rcu_report_qs_rdp()`` verifies that the quiescent state really does
+apply to the current grace period, it invokes ``rcu_report_rnp()`` which
+traverses up the ``rcu_node`` tree as shown at the bottom of the
+diagram, clearing bits from each ``rcu_node`` structure's ``->qsmask``
+field, and propagating up the tree when the result is zero.
+
+Note that traversal passes upwards out of a given ``rcu_node`` structure
+only if the current CPU is reporting the last quiescent state for the
+subtree headed by that ``rcu_node`` structure. A key point is that if a
+CPU's traversal stops at a given ``rcu_node`` structure, then there will
+be a later traversal by another CPU (or perhaps the same one) that
+proceeds upwards from that point, and the ``rcu_node`` ``->lock``
+guarantees that the first CPU's quiescent state happens before the
+remainder of the second CPU's traversal. Applying this line of thought
+repeatedly shows that all CPUs' quiescent states happen before the last
+CPU traverses through the root ``rcu_node`` structure, the “last CPU”
+being the one that clears the last bit in the root ``rcu_node``
+structure's ``->qsmask`` field.
+
+Dynamic Tick Interface
+^^^^^^^^^^^^^^^^^^^^^^
+
+Due to energy-efficiency considerations, RCU is forbidden from
+disturbing idle CPUs. CPUs are therefore required to notify RCU when
+entering or leaving idle state, which they do via fully ordered
+value-returning atomic operations on a per-CPU variable. The ordering
+effects are as shown below:
+
+.. kernel-figure:: TreeRCU-dyntick.svg
+
+The RCU grace-period kernel thread samples the per-CPU idleness variable
+while holding the corresponding CPU's leaf ``rcu_node`` structure's
+``->lock``. This means that any RCU read-side critical sections that
+precede the idle period (the oval near the top of the diagram above)
+will happen before the end of the current grace period. Similarly, the
+beginning of the current grace period will happen before any RCU
+read-side critical sections that follow the idle period (the oval near
+the bottom of the diagram above).
+
+Plumbing this into the full grace-period execution is described
+`below <Forcing Quiescent States_>`__.
+
+CPU-Hotplug Interface
+^^^^^^^^^^^^^^^^^^^^^
+
+RCU is also forbidden from disturbing offline CPUs, which might well be
+powered off and removed from the system completely. CPUs are therefore
+required to notify RCU of their comings and goings as part of the
+corresponding CPU hotplug operations. The ordering effects are shown
+below:
+
+.. kernel-figure:: TreeRCU-hotplug.svg
+
+Because CPU hotplug operations are much less frequent than idle
+transitions, they are heavier weight, and thus acquire the CPU's leaf
+``rcu_node`` structure's ``->lock`` and update this structure's
+``->qsmaskinitnext``. The RCU grace-period kernel thread samples this
+mask to detect CPUs having gone offline since the beginning of this
+grace period.
+
+Plumbing this into the full grace-period execution is described
+`below <Forcing Quiescent States_>`__.
+
+Forcing Quiescent States
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+As noted above, idle and offline CPUs cannot report their own quiescent
+states, and therefore the grace-period kernel thread must do the
+reporting on their behalf. This process is called “forcing quiescent
+states”, it is repeated every few jiffies, and its ordering effects are
+shown below:
+
+.. kernel-figure:: TreeRCU-gp-fqs.svg
+
+Each pass of quiescent state forcing is guaranteed to traverse the leaf
+``rcu_node`` structures, and if there are no new quiescent states due to
+recently idled and/or offlined CPUs, then only the leaves are traversed.
+However, if there is a newly offlined CPU as illustrated on the left or
+a newly idled CPU as illustrated on the right, the corresponding
+quiescent state will be driven up towards the root. As with
+self-reported quiescent states, the upwards driving stops once it
+reaches an ``rcu_node`` structure that has quiescent states outstanding
+from other CPUs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| The leftmost drive to root stopped before it reached the root |
+| ``rcu_node`` structure, which means that there are still CPUs |
+| subordinate to that structure on which the current grace period is |
+| waiting. Given that, how is it possible that the rightmost drive to |
+| root ended the grace period? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Good analysis! It is in fact impossible in the absence of bugs in |
+| RCU. But this diagram is complex enough as it is, so simplicity |
+| overrode accuracy. You can think of it as poetic license, or you can |
+| think of it as misdirection that is resolved in the |
+| `stitched-together diagram <Putting It All Together_>`__. |
++-----------------------------------------------------------------------+
+
+Grace-Period Cleanup
+^^^^^^^^^^^^^^^^^^^^
+
+Grace-period cleanup first scans the ``rcu_node`` tree breadth-first
+advancing all the ``->gp_seq`` fields, then it advances the
+``rcu_state`` structure's ``->gp_seq`` field. The ordering effects are
+shown below:
+
+.. kernel-figure:: TreeRCU-gp-cleanup.svg
+
+As indicated by the oval at the bottom of the diagram, once grace-period
+cleanup is complete, the next grace period can begin.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But when precisely does the grace period end? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| There is no useful single point at which the grace period can be said |
+| to end. The earliest reasonable candidate is as soon as the last CPU |
+| has reported its quiescent state, but it may be some milliseconds |
+| before RCU becomes aware of this. The latest reasonable candidate is |
+| once the ``rcu_state`` structure's ``->gp_seq`` field has been |
+| updated, but it is quite possible that some CPUs have already |
+| completed phase two of their updates by that time. In short, if you |
+| are going to work with RCU, you need to learn to embrace uncertainty. |
++-----------------------------------------------------------------------+
+
+Callback Invocation
+^^^^^^^^^^^^^^^^^^^
+
+Once a given CPU's leaf ``rcu_node`` structure's ``->gp_seq`` field has
+been updated, that CPU can begin invoking its RCU callbacks that were
+waiting for this grace period to end. These callbacks are identified by
+``rcu_advance_cbs()``, which is usually invoked by
+``__note_gp_changes()``. As shown in the diagram below, this invocation
+can be triggered by the scheduling-clock interrupt
+(``rcu_sched_clock_irq()`` on the left) or by idle entry
+(``rcu_cleanup_after_idle()`` on the right, but only for kernels build
+with ``CONFIG_RCU_FAST_NO_HZ=y``). Either way, ``RCU_SOFTIRQ`` is
+raised, which results in ``rcu_do_batch()`` invoking the callbacks,
+which in turn allows those callbacks to carry out (either directly or
+indirectly via wakeup) the needed phase-two processing for each update.
+
+.. kernel-figure:: TreeRCU-callback-invocation.svg
+
+Please note that callback invocation can also be prompted by any number
+of corner-case code paths, for example, when a CPU notes that it has
+excessive numbers of callbacks queued. In all cases, the CPU acquires
+its leaf ``rcu_node`` structure's ``->lock`` before invoking callbacks,
+which preserves the required ordering against the newly completed grace
+period.
+
+However, if the callback function communicates to other CPUs, for
+example, doing a wakeup, then it is that function's responsibility to
+maintain ordering. For example, if the callback function wakes up a task
+that runs on some other CPU, proper ordering must in place in both the
+callback function and the task being awakened. To see why this is
+important, consider the top half of the `grace-period
+cleanup`_ diagram. The callback might be
+running on a CPU corresponding to the leftmost leaf ``rcu_node``
+structure, and awaken a task that is to run on a CPU corresponding to
+the rightmost leaf ``rcu_node`` structure, and the grace-period kernel
+thread might not yet have reached the rightmost leaf. In this case, the
+grace period's memory ordering might not yet have reached that CPU, so
+again the callback function and the awakened task must supply proper
+ordering.
+
+Putting It All Together
+~~~~~~~~~~~~~~~~~~~~~~~
+
+A stitched-together diagram is here:
+
+.. kernel-figure:: TreeRCU-gp.svg
+
+Legal Statement
+~~~~~~~~~~~~~~~
+
+This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+Linux is a registered trademark of Linus Torvalds.
+
+Other company, product, and service names may be trademarks or service
+marks of others.
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
index 832408313d93..3fcf0c17cef2 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
@@ -349,7 +349,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
<rect
x="7069.6187"
y="5087.4678"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg
index 7ac6f9269806..63eff867175a 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg
@@ -566,15 +566,6 @@
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_migrate_callbacks()</text>
<text
xml:space="preserve"
- x="8335.4873"
- y="5357.1006"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- id="text202-7-9-6-0"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_migrate_callbacks()</text>
- <text
- xml:space="preserve"
x="8768.4678"
y="6224.9038"
font-style="normal"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
index 423df00c4df9..3fbc19c48a58 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
@@ -528,7 +528,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@@ -537,7 +537,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
</g>
<g
id="g4504"
@@ -607,7 +607,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -638,7 +638,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
index 7ddc094d7f28..25c7acc8a4c2 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
@@ -844,7 +844,7 @@
font-style="normal"
y="1547.8876"
x="4417.6396"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(6501.9719,-10685.904)"
@@ -899,7 +899,7 @@
font-style="normal"
y="1858.8729"
x="4414.1836"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="14659.87"
@@ -977,7 +977,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -1008,7 +1008,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -1135,7 +1135,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-5"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_report_dead()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -1256,7 +1256,7 @@
font-style="normal"
y="3679.27"
x="-3804.9949"
- xml:space="preserve">rcu_cpu_starting()</text>
+ xml:space="preserve">rcutree_report_cpu_starting()</text>
<g
style="fill:none;stroke-width:0.025in"
id="g3107-7-5-0"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
index acd73c7ad0f4..d05bc7b27edb 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -1448,15 +1448,6 @@
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_migrate_callbacks()</text>
<text
xml:space="preserve"
- x="8335.4873"
- y="5357.1006"
- font-style="normal"
- font-weight="bold"
- font-size="192"
- id="text202-7-9-6-0"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_migrate_callbacks()</text>
- <text
- xml:space="preserve"
x="8768.4678"
y="6224.9038"
font-style="normal"
@@ -2983,7 +2974,7 @@
font-style="normal"
y="38114.047"
x="-334.33856"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(1749.9916,25880.249)"
@@ -3038,7 +3029,7 @@
font-style="normal"
y="38425.035"
x="-337.79462"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="9907.8887"
@@ -3116,7 +3107,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -3147,7 +3138,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -3274,7 +3265,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-5"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_report_dead()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -3395,7 +3386,7 @@
font-style="normal"
y="3679.27"
x="-3804.9949"
- xml:space="preserve">rcu_cpu_starting()</text>
+ xml:space="preserve">rcutree_report_cpu_starting()</text>
<g
style="fill:none;stroke-width:0.025in"
id="g3107-7-5-0"
@@ -3880,7 +3871,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_node_context_switch()</text>
+ xml:space="preserve">rcu_note_context_switch()</text>
</g>
<g
transform="translate(1881.1886,54048.57)"
@@ -3902,7 +3893,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_check_callbacks()</text>
+ xml:space="preserve">rcu_sched_clock_irq()</text>
</g>
<g
transform="translate(-850.30204,55463.106)"
@@ -3924,7 +3915,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_process_callbacks()</text>
+ xml:space="preserve">rcu_core()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-5-3-27-0"
@@ -3933,7 +3924,7 @@
font-style="normal"
y="-4165.7954"
x="3745.7725"
- xml:space="preserve">rcu_check_quiescent_state())</text>
+ xml:space="preserve">rcu_check_quiescent_state()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-5-3-27-0-9"
@@ -4968,7 +4959,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-19"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
<rect
x="5314.2671"
y="82817.688"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
index 2c9310ba29ba..a92356ce4011 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
@@ -516,7 +516,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@@ -525,7 +525,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
sodipodi:linespacing="125%"
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
@@ -607,7 +607,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_report_dead()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -728,7 +728,7 @@
font-style="normal"
y="3679.27"
x="-3804.9949"
- xml:space="preserve">rcu_cpu_starting()</text>
+ xml:space="preserve">rcutree_report_cpu_starting()</text>
<g
style="fill:none;stroke-width:0.025in"
id="g3107-7-5-0"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
index 149bec2a4493..7d6c5f7e505c 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
@@ -753,7 +753,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_node_context_switch()</text>
+ xml:space="preserve">rcu_note_context_switch()</text>
</g>
<g
transform="translate(3131.2648,-585.6713)"
@@ -775,7 +775,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_check_callbacks()</text>
+ xml:space="preserve">rcu_sched_clock_irq()</text>
</g>
<g
transform="translate(399.7744,828.86448)"
@@ -797,7 +797,7 @@
font-style="normal"
y="-4418.6582"
x="3745.7725"
- xml:space="preserve">rcu_process_callbacks()</text>
+ xml:space="preserve">rcu_core()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-5-3-27-0"
@@ -806,7 +806,7 @@
font-style="normal"
y="-4165.7954"
x="3745.7725"
- xml:space="preserve">rcu_check_quiescent_state())</text>
+ xml:space="preserve">rcu_check_quiescent_state()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-5-3-27-0-9"
diff --git a/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
index 4b4014fda770..87851a8fac1e 100644
--- a/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
+++ b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
@@ -88,7 +88,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -103,7 +103,7 @@
id="text2993"
y="-261.66608"
x="412.12299"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
xml:space="preserve"
transform="matrix(0,1,-1,0,0,0)"><tspan
y="-261.66608"
@@ -135,7 +135,7 @@
</g>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.04738"
y="268.18076"
id="text4429"
@@ -146,7 +146,7 @@
y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.04738"
y="439.13766"
id="text4441"
@@ -157,7 +157,7 @@
y="439.13766">WRITE_ONCE(b, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="255.60869"
y="309.29346"
id="text4445"
@@ -168,7 +168,7 @@
y="309.29346">r1 = READ_ONCE(a);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="255.14423"
y="520.61786"
id="text4449"
@@ -179,7 +179,7 @@
y="520.61786">WRITE_ONCE(c, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="384.71124"
id="text4453"
@@ -190,7 +190,7 @@
y="384.71124">r2 = READ_ONCE(b);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="582.13617"
id="text4457"
@@ -201,7 +201,7 @@
y="582.13617">r3 = READ_ONCE(c);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.08231"
y="213.91006"
id="text4461"
@@ -212,7 +212,7 @@
y="213.91006">thread0()</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="252.34512"
y="213.91006"
id="text4461-6"
@@ -223,7 +223,7 @@
y="213.91006">thread1()</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.42557"
y="213.91006"
id="text4461-2"
@@ -251,7 +251,7 @@
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="111.75929"
y="251.53981"
id="text4429-8"
@@ -262,7 +262,7 @@
y="251.53981">rcu_read_lock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="367.91556"
id="text4429-8-9"
@@ -273,7 +273,7 @@
y="367.91556">rcu_read_lock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="597.40289"
id="text4429-8-9-3"
@@ -284,7 +284,7 @@
y="597.40289">rcu_read_unlock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="111.75929"
y="453.15311"
id="text4429-8-9-3-1"
@@ -300,7 +300,7 @@
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="394.94427"
y="345.66351"
id="text4648"
@@ -324,7 +324,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.11968"
y="475.77856"
id="text4648-4"
@@ -361,7 +361,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="254.85066"
y="348.96619"
id="text4648-4-3"
diff --git a/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
index 48cd1623d4d4..e2a8af592bab 100644
--- a/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
+++ b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
@@ -116,7 +116,7 @@
<flowRoot
xml:space="preserve"
id="flowRoot2985"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"><flowRegion
id="flowRegion2987"><rect
id="rect2989"
width="82.85714"
@@ -131,7 +131,7 @@
id="text2993"
y="-261.66608"
x="436.12299"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
xml:space="preserve"
transform="matrix(0,1,-1,0,0,0)"><tspan
y="-261.66608"
@@ -163,7 +163,7 @@
</g>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.04738"
y="268.18076"
id="text4429"
@@ -174,7 +174,7 @@
y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.04738"
y="487.13766"
id="text4441"
@@ -185,7 +185,7 @@
y="487.13766">WRITE_ONCE(b, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="255.60869"
y="297.29346"
id="text4445"
@@ -196,7 +196,7 @@
y="297.29346">r1 = READ_ONCE(a);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="255.14423"
y="554.61786"
id="text4449"
@@ -207,7 +207,7 @@
y="554.61786">WRITE_ONCE(c, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="370.71124"
id="text4453"
@@ -218,7 +218,7 @@
y="370.71124">WRITE_ONCE(d, 1);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="572.13617"
id="text4457"
@@ -229,7 +229,7 @@
y="572.13617">r2 = READ_ONCE(c);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.08231"
y="213.91006"
id="text4461"
@@ -240,7 +240,7 @@
y="213.91006">thread0()</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="252.34512"
y="213.91006"
id="text4461-6"
@@ -251,7 +251,7 @@
y="213.91006">thread1()</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.42557"
y="213.91006"
id="text4461-2"
@@ -281,7 +281,7 @@
sodipodi:nodetypes="cc" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="111.75929"
y="251.53981"
id="text4429-8"
@@ -292,7 +292,7 @@
y="251.53981">rcu_read_lock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="353.91556"
id="text4429-8-9"
@@ -303,7 +303,7 @@
y="353.91556">rcu_read_lock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="396.10254"
y="587.40289"
id="text4429-8-9-3"
@@ -314,7 +314,7 @@
y="587.40289">rcu_read_unlock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="111.75929"
y="501.15311"
id="text4429-8-9-3-1"
@@ -331,7 +331,7 @@
sodipodi:nodetypes="cc" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="394.94427"
y="331.66351"
id="text4648"
@@ -355,7 +355,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="112.11968"
y="523.77856"
id="text4648-4"
@@ -392,7 +392,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="254.85066"
y="336.96619"
id="text4648-4-3"
@@ -421,7 +421,7 @@
id="text2993-7"
y="-261.66608"
x="440.12299"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
xml:space="preserve"
transform="matrix(0,1,-1,0,0,0)"><tspan
y="-261.66608"
@@ -453,7 +453,7 @@
</g>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="541.70508"
y="387.6217"
id="text4445-0"
@@ -464,7 +464,7 @@
y="387.6217">r3 = READ_ONCE(d);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="541.2406"
y="646.94611"
id="text4449-6"
@@ -488,7 +488,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="540.94702"
y="427.29443"
id="text4648-4-3-1"
@@ -499,7 +499,7 @@
y="427.29443">QS</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="686.27747"
y="461.83929"
id="text4453-7"
@@ -510,7 +510,7 @@
y="461.83929">r4 = READ_ONCE(b);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="686.27747"
y="669.26422"
id="text4457-9"
@@ -521,7 +521,7 @@
y="669.26422">r5 = READ_ONCE(e);</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="686.27747"
y="445.04358"
id="text4429-8-9-33"
@@ -532,7 +532,7 @@
y="445.04358">rcu_read_lock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="686.27747"
y="684.53094"
id="text4429-8-9-3-8"
@@ -543,7 +543,7 @@
y="684.53094">rcu_read_unlock();</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="685.11914"
y="422.79153"
id="text4648-9"
@@ -567,7 +567,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="397.85934"
y="609.59003"
id="text4648-5"
@@ -591,7 +591,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="256.75986"
y="586.99133"
id="text4648-5-2"
@@ -615,7 +615,7 @@
sodipodi:open="true" />
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="546.22791"
y="213.91006"
id="text4461-2-5"
@@ -626,7 +626,7 @@
y="213.91006">thread3()</tspan></text>
<text
xml:space="preserve"
- style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:monospace;-inkscape-font-specification:monospace"
x="684.00067"
y="213.91006"
id="text4461-2-1"
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
deleted file mode 100644
index 43c4e2f05f40..000000000000
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ /dev/null
@@ -1,3254 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E.&nbsp;McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li> <a href="#Fundamental Requirements">
- Fundamental Requirements</a>
-<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li> <a href="#Parallelism Facts of Life">
- Parallelism Facts of Life</a>
-<li> <a href="#Quality-of-Implementation Requirements">
- Quality-of-Implementation Requirements</a>
-<li> <a href="#Linux Kernel Complications">
- Linux Kernel Complications</a>
-<li> <a href="#Software-Engineering Requirements">
- Software-Engineering Requirements</a>
-<li> <a href="#Other RCU Flavors">
- Other RCU Flavors</a>
-<li> <a href="#Possible Future Changes">
- Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-however, the answers to each quick quiz immediately follows the quiz.
-Select the big white space with your mouse to see the answer.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li> <a href="#Grace-Period Guarantee">
- Grace-Period Guarantee</a>
-<li> <a href="#Publish-Subscribe Guarantee">
- Publish-Subscribe Guarantee</a>
-<li> <a href="#Memory-Barrier Guarantees">
- Memory-Barrier Guarantees</a>
-<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
- RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li> <a href="#Guaranteed Read-to-Write Upgrade">
- Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5 rcu_read_lock();
- 6 r1 = READ_ONCE(x);
- 7 r2 = READ_ONCE(y);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 WRITE_ONCE(x, 1);
-14 synchronize_rcu();
-15 WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 &amp;&amp; r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Wait a minute!
- You said that updaters can make useful forward progress concurrently
- with readers, but pre-existing readers will block
- <tt>synchronize_rcu()</tt>!!!
- Just who are you trying to fool???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- First, if updaters do not wish to be blocked by readers, they can use
- <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
- be discussed later.
- Second, even when using <tt>synchronize_rcu()</tt>, the other
- update-side code does run concurrently with readers, whether
- pre-existing or not.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL 0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING 2
- 4 #define STATE_WANT_NORMAL 3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10 int state_snap;
-11
-12 rcu_read_lock();
-13 state_snap = READ_ONCE(state);
-14 if (state_snap == STATE_NORMAL)
-15 do_something();
-16 else
-17 do_something_carefully();
-18 rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24 synchronize_rcu();
-25 WRITE_ONCE(state, STATE_RECOVERING);
-26 recovery();
-27 WRITE_ONCE(state, STATE_WANT_NORMAL);
-28 synchronize_rcu();
-29 WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Without that extra grace period, memory reordering could result in
- <tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
- concurrently with the last bits of <tt>recovery()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 p-&gt;a = a;
-12 p-&gt;b = a;
-13 gp = p; /* ORDERING BUG */
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-<b>11 gp = p; /* ORDERING BUG */
-12 p-&gt;a = a;
-13 p-&gt;b = a;</b>
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
-it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 p-&gt;a = a;
-12 p-&gt;b = a;
-13 rcu_assign_pointer(gp, p);
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines&nbsp;11 and&nbsp;12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of &ldquo;interesting&rdquo; compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
- two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
- from being reordered.
- Can't that also cause problems?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- No, it cannot.
- The readers cannot see either of these two fields until
- the assignment to <tt>gp</tt>, by which time both fields are
- fully initialized.
- So reordering the assignments
- to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
- cause any problems.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
- 5 if (p) {
- 6 do_something(p-&gt;a, p-&gt;b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3 rcu_read_lock();
- 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
- 6 rcu_read_unlock();
- 7 return true;
- 8 }
- 9 rcu_read_unlock();
-10 return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp-&gt;a</tt>
-and <tt>gp-&gt;b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = rcu_dereference(gp);
- 5 if (p) {
- 6 do_something(p-&gt;a, p-&gt;b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li> Remove the data element from the enclosing structure.
-<li> Wait for all pre-existing RCU read-side critical sections
- to complete (because only pre-existing readers can possibly have
- a reference to the newly removed data element).
-<li> At this point, only the updater has a reference to the
- newly removed data element, so it can safely reclaim
- the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3 struct foo *p;
- 4
- 5 spin_lock(&amp;gp_lock);
- 6 p = rcu_access_pointer(gp);
- 7 if (!p) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 rcu_assign_pointer(gp, NULL);
-12 spin_unlock(&amp;gp_lock);
-13 synchronize_rcu();
-14 kfree(p);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line&nbsp;13 waiting for a grace
-period before line&nbsp;14 frees the old data element.
-This waiting ensures that readers will reach line&nbsp;7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li> The value returned by <tt>rcu_access_pointer()</tt>
- cannot be dereferenced.
- If you want to access the value pointed to as well as
- the pointer itself, use <tt>rcu_dereference()</tt>
- instead of <tt>rcu_access_pointer()</tt>.
-<li> The call to <tt>rcu_access_pointer()</tt> need not be
- protected.
- In contrast, <tt>rcu_dereference()</tt> must either be
- within an RCU read-side critical section or in a code
- segment where the pointer cannot change, for example, in
- code protected by the corresponding update-side lock.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Without the <tt>rcu_dereference()</tt> or the
- <tt>rcu_access_pointer()</tt>, what destructive optimizations
- might the compiler make use of?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Let's start with what happens to <tt>do_something_gp()</tt>
- if it fails to use <tt>rcu_dereference()</tt>.
- It could reuse a value formerly fetched from this same pointer.
- It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
- manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
- mash-up of two distinct pointer values.
- It might even use value-speculation optimizations, where it makes
- a wrong guess, but by the time it gets around to checking the
- value, an update has changed the pointer to match the wrong guess.
- Too bad about any dereferences that returned pre-initialization garbage
- in the meantime!
- </font>
-
- <p><font color="ffffff">
- For <tt>remove_gp_synchronous()</tt>, as long as all modifications
- to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
- the above optimizations are harmless.
- However, <tt>sparse</tt> will complain if you
- define <tt>gp</tt> with <tt>__rcu</tt> and then
- access it without using
- either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt> and later
-still into <tt>READ_ONCE()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li> Each CPU that has an RCU read-side critical section that
- begins before <tt>synchronize_rcu()</tt> starts is
- guaranteed to execute a full memory barrier between the time
- that the RCU read-side critical section ends and the time that
- <tt>synchronize_rcu()</tt> returns.
- Without this guarantee, a pre-existing RCU read-side critical section
- might hold a reference to the newly removed <tt>struct foo</tt>
- after the <tt>kfree()</tt> on line&nbsp;14 of
- <tt>remove_gp_synchronous()</tt>.
-<li> Each CPU that has an RCU read-side critical section that ends
- after <tt>synchronize_rcu()</tt> returns is guaranteed
- to execute a full memory barrier between the time that
- <tt>synchronize_rcu()</tt> begins and the time that the RCU
- read-side critical section begins.
- Without this guarantee, a later RCU read-side critical section
- running after the <tt>kfree()</tt> on line&nbsp;14 of
- <tt>remove_gp_synchronous()</tt> might
- later run <tt>do_something_gp()</tt> and find the
- newly deleted <tt>struct foo</tt>.
-<li> If the task invoking <tt>synchronize_rcu()</tt> remains
- on a given CPU, then that CPU is guaranteed to execute a full
- memory barrier sometime during the execution of
- <tt>synchronize_rcu()</tt>.
- This guarantee ensures that the <tt>kfree()</tt> on
- line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on line&nbsp;11.
-<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
- among a group of CPUs during that invocation, then each of the
- CPUs in that group is guaranteed to execute a full memory barrier
- sometime during the execution of <tt>synchronize_rcu()</tt>.
- This guarantee also ensures that the <tt>kfree()</tt> on
- line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on
- line&nbsp;11, but also in the case where the thread executing the
- <tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Given that multiple CPUs can start RCU read-side critical sections
- at any time without any ordering whatsoever, how can RCU possibly
- tell whether or not a given RCU read-side critical section starts
- before a given instance of <tt>synchronize_rcu()</tt>?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- If RCU cannot tell whether or not a given
- RCU read-side critical section starts before a
- given instance of <tt>synchronize_rcu()</tt>,
- then it must assume that the RCU read-side critical section
- started first.
- In other words, a given instance of <tt>synchronize_rcu()</tt>
- can avoid waiting on a given RCU read-side critical section only
- if it can prove that <tt>synchronize_rcu()</tt> started first.
- </font>
-
- <p><font color="ffffff">
- A related question is &ldquo;When <tt>rcu_read_lock()</tt>
- doesn't generate any code, why does it matter how it relates
- to a grace period?&rdquo;
- The answer is that it is not the relationship of
- <tt>rcu_read_lock()</tt> itself that is important, but rather
- the relationship of the code within the enclosed RCU read-side
- critical section to the code preceding and following the
- grace period.
- If we take this viewpoint, then a given RCU read-side critical
- section begins before a given grace period when some access
- preceding the grace period observes the effect of some access
- within the critical section, in which case none of the accesses
- within the critical section may observe the effects of any
- access following the grace period.
- </font>
-
- <p><font color="ffffff">
- As of late 2016, mathematical models of RCU take this
- viewpoint, for example, see slides&nbsp;62 and&nbsp;63
- of the
- <a href="http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.2016.10.04c.LCE.pdf">2016 LinuxCon EU</a>
- presentation.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- The first and second guarantees require unbelievably strict ordering!
- Are all these memory barriers <i> really</i> required?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Yes, they really are required.
- To see why the first guarantee is required, consider the following
- sequence of events:
- </font>
-
- <ol>
- <li> <font color="ffffff">
- CPU 1: <tt>rcu_read_lock()</tt>
- </font>
- <li> <font color="ffffff">
- CPU 1: <tt>q = rcu_dereference(gp);
- /* Very likely to return p. */</tt>
- </font>
- <li> <font color="ffffff">
- CPU 0: <tt>list_del_rcu(p);</tt>
- </font>
- <li> <font color="ffffff">
- CPU 0: <tt>synchronize_rcu()</tt> starts.
- </font>
- <li> <font color="ffffff">
- CPU 1: <tt>do_something_with(q-&gt;a);
- /* No smp_mb(), so might happen after kfree(). */</tt>
- </font>
- <li> <font color="ffffff">
- CPU 1: <tt>rcu_read_unlock()</tt>
- </font>
- <li> <font color="ffffff">
- CPU 0: <tt>synchronize_rcu()</tt> returns.
- </font>
- <li> <font color="ffffff">
- CPU 0: <tt>kfree(p);</tt>
- </font>
- </ol>
-
- <p><font color="ffffff">
- Therefore, there absolutely must be a full memory barrier between the
- end of the RCU read-side critical section and the end of the
- grace period.
- </font>
-
- <p><font color="ffffff">
- The sequence of events demonstrating the necessity of the second rule
- is roughly similar:
- </font>
-
- <ol>
- <li> <font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
- </font>
- <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
- </font>
- <li> <font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
- </font>
- <li> <font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
- /* Might return p if no memory barrier. */</tt>
- </font>
- <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
- </font>
- <li> <font color="ffffff">CPU 0: <tt>kfree(p);</tt>
- </font>
- <li> <font color="ffffff">
- CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
- </font>
- <li> <font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
- </font>
- </ol>
-
- <p><font color="ffffff">
- And similarly, without a memory barrier between the beginning of the
- grace period and the beginning of the RCU read-side critical section,
- CPU&nbsp;1 might end up accessing the freelist.
- </font>
-
- <p><font color="ffffff">
- The &ldquo;as if&rdquo; rule of course applies, so that any
- implementation that acts as if the appropriate memory barriers
- were in place is a correct implementation.
- That said, it is much easier to fool yourself into believing
- that you have adhered to the as-if rule than it is to actually
- adhere to it!
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
- generate absolutely no code in some kernel builds.
- This means that the compiler might arbitrarily rearrange consecutive
- RCU read-side critical sections.
- Given such rearrangement, if a given RCU read-side critical section
- is done, how can you be sure that all prior RCU read-side critical
- sections are done?
- Won't the compiler rearrangements make that impossible to determine?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
- generate absolutely no code, RCU infers quiescent states only at
- special locations, for example, within the scheduler.
- Because calls to <tt>schedule()</tt> had better prevent calling-code
- accesses to shared variables from being rearranged across the call to
- <tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
- critical section, it will necessarily detect the end of all prior
- RCU read-side critical sections, no matter how aggressively the
- compiler scrambles the code.
- </font>
-
- <p><font color="ffffff">
- Again, this all assumes that the compiler cannot scramble code across
- calls to the scheduler, out of interrupt handlers, into the idle loop,
- into user-mode code, and so on.
- But if your kernel build allows that sort of scrambling, you have broken
- far more than just RCU!
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But how does the upgrade-to-write operation exclude other readers?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- It doesn't, just like normal RCU updates, which also do not exclude
- RCU readers.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li> <a href="#Readers Impose Minimal Ordering">
- Readers Impose Minimal Ordering</a>
-<li> <a href="#Readers Do Not Exclude Updaters">
- Readers Do Not Exclude Updaters</a>
-<li> <a href="#Updaters Only Wait For Old Readers">
- Updaters Only Wait For Old Readers</a>
-<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
- Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
- Read-Side Critical Sections Don't Partition Grace Periods</a>
-<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
- Disabling Preemption Does Not Block Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(x, 1);
- 5 rcu_read_unlock();
- 6 rcu_read_lock();
- 7 WRITE_ONCE(y, 1);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 rcu_read_lock();
-14 r1 = READ_ONCE(y);
-15 rcu_read_unlock();
-16 rcu_read_lock();
-17 r2 = READ_ONCE(x);
-18 rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design: Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Can't the compiler also reorder this code?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- No, the volatile casts in <tt>READ_ONCE()</tt> and
- <tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
- this particular case.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 r1 = READ_ONCE(y);
- 5 if (r1) {
- 6 do_something_with_nonzero_x();
- 7 r2 = READ_ONCE(x);
- 8 WARN_ON(!r2); /* BUG!!! */
- 9 }
-10 rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15 spin_lock(&amp;my_lock);
-16 WRITE_ONCE(x, 1);
-17 WRITE_ONCE(y, 1);
-18 spin_unlock(&amp;my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Suppose that synchronize_rcu() did wait until <i>all</i>
- readers had completed instead of waiting only on
- pre-existing readers.
- For how long would the updater be able to rely on there
- being no readers?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- For no time at all.
- Even if <tt>synchronize_rcu()</tt> were to wait until
- all readers had completed, a new reader might start immediately after
- <tt>synchronize_rcu()</tt> completed.
- Therefore, the code following
- <tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
- no readers.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 r2 = READ_ONCE(b);
-20 r3 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 r2 = READ_ONCE(c);
-19 synchronize_rcu();
-20 WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25 rcu_read_lock();
-26 r3 = READ_ONCE(b);
-27 r4 = READ_ONCE(d);
-28 rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 WRITE_ONCE(d, 1);
-20 r2 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26 r3 = READ_ONCE(d);
-27 synchronize_rcu();
-28 WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33 rcu_read_lock();
-34 r4 = READ_ONCE(b);
-35 r5 = READ_ONCE(e);
-36 rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- How long a sequence of grace periods, each separated by an RCU
- read-side critical section, would be required to partition the RCU
- read-side critical sections at the beginning and end of the chain?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- In theory, an infinite number.
- In practice, an unknown number that is sensitive to both implementation
- details and timing considerations.
- Therefore, even in practice, RCU users must abide by the
- theoretical rather than the practical answer.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Disabling Preemption Does Not Block Grace Periods">
-Disabling Preemption Does Not Block Grace Periods</a></h3>
-
-<p>
-There was a time when disabling preemption on any given CPU would block
-subsequent grace periods.
-However, this was an accident of implementation and is not a requirement.
-And in the current Linux-kernel implementation, disabling preemption
-on a given CPU in fact does not block grace periods, as Oleg Nesterov
-<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
-
-<p>
-If you need a preempt-disable region to block grace periods, you need to add
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
-as follows:
-
-<blockquote>
-<pre>
- 1 preempt_disable();
- 2 rcu_read_lock();
- 3 do_something();
- 4 rcu_read_unlock();
- 5 preempt_enable();
- 6
- 7 /* Spinlocks implicitly disable preemption. */
- 8 spin_lock(&amp;mylock);
- 9 rcu_read_lock();
-10 do_something();
-11 rcu_read_unlock();
-12 spin_unlock(&amp;mylock);
-</pre>
-</blockquote>
-
-<p>
-In theory, you could enter the RCU read-side critical section first,
-but it is more efficient to keep the entire RCU read-side critical
-section contained in the preempt-disable region as shown above.
-Of course, RCU read-side critical sections that extend outside of
-preempt-disable regions will work correctly, but such critical sections
-can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
-more work.
-And no, this is <i>not</i> an invitation to enclose all of your RCU
-read-side critical sections within preempt-disable regions, because
-doing so would degrade real-time response.
-
-<p>
-This non-requirement appeared with preemptible RCU.
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li> Any CPU or task may be delayed at any time,
- and any attempts to avoid these delays by disabling
- preemption, interrupts, or whatever are completely futile.
- This is most obvious in preemptible user-level
- environments and in virtualized environments (where
- a given guest OS's VCPUs can be preempted at any time by
- the underlying hypervisor), but can also happen in bare-metal
- environments due to ECC errors, NMIs, and other hardware
- events.
- Although a delay of more than about 20 seconds can result
- in splats, the RCU implementation is obligated to use
- algorithms that can tolerate extremely long delays, but where
- &ldquo;extremely long&rdquo; is not long enough to allow
- wrap-around when incrementing a 64-bit counter.
-<li> Both the compiler and the CPU can reorder memory accesses.
- Where it matters, RCU must use compiler directives and
- memory-barrier instructions to preserve ordering.
-<li> Conflicting writes to memory locations in any given cache line
- will result in expensive cache misses.
- Greater numbers of concurrent writes and more-frequent
- concurrent writes will result in more dramatic slowdowns.
- RCU is therefore obligated to use algorithms that have
- sufficient locality to avoid significant performance and
- scalability problems.
-<li> As a rough rule of thumb, only one CPU's worth of processing
- may be carried out under the protection of any given exclusive
- lock.
- RCU must therefore use scalable locking designs.
-<li> Counters are finite, especially on 32-bit systems.
- RCU's use of counters must therefore tolerate counter wrap,
- or be designed such that counter wrap would take way more
- time than a single system is likely to run.
- An uptime of ten years is quite possible, a runtime
- of a century much less so.
- As an example of the latter, RCU's dyntick-idle nesting counter
- allows 54 bits for interrupt nesting level (this counter
- is 64 bits even on a 32-bit system).
- Overflowing this counter requires 2<sup>54</sup>
- half-interrupts on a given CPU without that CPU ever going idle.
- If a half-interrupt happened every microsecond, it would take
- 570 years of runtime to overflow this counter, which is currently
- believed to be an acceptably long time.
-<li> Linux systems can have thousands of CPUs running a single
- Linux kernel in a single shared-memory environment.
- RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li> <a href="#Specialization">Specialization</a>
-<li> <a href="#Performance and Scalability">Performance and Scalability</a>
-<li> <a href="#Composability">Composability</a>
-<li> <a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations,
-which means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-Experience thus far is captured by the following list of situations:
-
-<ol>
-<li> Read-mostly data, where stale and inconsistent data is not
- a problem: RCU works great!
-<li> Read-mostly data, where data must be consistent:
- RCU works well.
-<li> Read-write data, where data must be consistent:
- RCU <i>might</i> work OK.
- Or not.
-<li> Write-mostly data, where data must be consistent:
- RCU is very unlikely to be the right tool for the job,
- with the following exceptions, where RCU can provide:
- <ol type=a>
- <li> Existence guarantees for update-friendly mechanisms.
- <li> Wait-free read-side primitives for real-time use.
- </ol>
-</ol>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- What about sleeping locks?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- These are forbidden within Linux-kernel RCU read-side critical
- sections because it is not legal to place a quiescent state
- (in this case, voluntary context switch) within an RCU read-side
- critical section.
- However, sleeping locks may be used within userspace RCU read-side
- critical sections, and also within Linux-kernel sleepable RCU
- <a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
- read-side critical sections.
- In addition, the -rt patchset turns spinlocks into a
- sleeping locks so that the corresponding critical sections
- can be preempted, which also means that these sleeplockified
- spinlocks (but not other sleeping locks!) may be acquire within
- -rt-Linux-kernel RCU read-side critical sections.
- </font>
-
- <p><font color="ffffff">
- Note that it <i>is</i> legal for a normal RCU read-side
- critical section to conditionally acquire a sleeping locks
- (as in <tt>mutex_trylock()</tt>), but only as long as it does
- not loop indefinitely attempting to conditionally acquire that
- sleeping locks.
- The key point is that things like <tt>mutex_trylock()</tt>
- either return with the mutex held, or return an error indication if
- the mutex was not immediately available.
- Either way, <tt>mutex_trylock()</tt> returns immediately without
- sleeping.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem: In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veterinarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veterinarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veterinarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat.
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given &ldquo;frank and open&rdquo; feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-Here, &ldquo;modest&rdquo; means roughly the same latency
-degradation as a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9 struct foo *p = container_of(rhp, struct foo, rh);
-10
-11 kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16 struct foo *p;
-17
-18 spin_lock(&amp;gp_lock);
-19 p = rcu_dereference(gp);
-20 if (!p) {
-21 spin_unlock(&amp;gp_lock);
-22 return false;
-23 }
-24 rcu_assign_pointer(gp, NULL);
-25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
-26 spin_unlock(&amp;gp_lock);
-27 return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines&nbsp;1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line&nbsp;25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers
-and from idle and offline CPUs.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
- After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
- structure, which would interact badly with concurrent insertions.
- Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
- any changes, including any insertions that <tt>rcu_dereference()</tt>
- would protect against.
- Therefore, any insertions will be delayed until after
- <tt>-&gt;gp_lock</tt>
- is released on line&nbsp;25, which in turn means that
- <tt>rcu_access_pointer()</tt> suffices.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows &ldquo;fire and forget&rdquo; operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9 struct foo *p;
-10
-11 spin_lock(&amp;gp_lock);
-12 p = rcu_dereference(gp);
-13 if (!p) {
-14 spin_unlock(&amp;gp_lock);
-15 return false;
-16 }
-17 rcu_assign_pointer(gp, NULL);
-18 kfree_rcu(p, rh);
-19 spin_unlock(&amp;gp_lock);
-20 return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Earlier it was claimed that <tt>call_rcu()</tt> and
- <tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
- by readers.
- But how can that be correct, given that the invocation of the callback
- and the freeing of the memory (respectively) must still wait for
- a grace period to elapse?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- We could define things this way, but keep in mind that this sort of
- definition would say that updates in garbage-collected languages
- cannot complete until the next time the garbage collector runs,
- which does not seem at all reasonable.
- The key point is that in most cases, an updater using either
- <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
- next update as soon as it has invoked <tt>call_rcu()</tt> or
- <tt>kfree_rcu()</tt>, without having to wait for a subsequent
- grace period.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3 struct foo *p;
- 4 unsigned long s;
- 5
- 6 spin_lock(&amp;gp_lock);
- 7 p = rcu_access_pointer(gp);
- 8 if (!p) {
- 9 spin_unlock(&amp;gp_lock);
-10 return false;
-11 }
-12 rcu_assign_pointer(gp, NULL);
-13 spin_unlock(&amp;gp_lock);
-14 s = get_state_synchronize_rcu();
-15 do_something_while_waiting();
-16 cond_synchronize_rcu(s);
-17 kfree(p);
-18 return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
-&ldquo;cookie&rdquo; from RCU,
-then line&nbsp;15 carries out other tasks,
-and finally, line&nbsp;16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, perhaps in the guise of tail
-recursion, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section: To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods: As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some small read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>smp_call_function_single()</tt>.
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
- everywhere that it is needed, so kernels built with
- <tt>CONFIG_PROVE_RCU=y</tt> will splat if
- <tt>rcu_dereference()</tt> is used outside of an
- RCU read-side critical section.
- Update-side code can use <tt>rcu_dereference_protected()</tt>,
- which takes a
- <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
- to indicate what is providing the protection.
- If the indicated protection is not provided, a lockdep splat
- is emitted.
-
- <p>
- Code shared between readers and updaters can use
- <tt>rcu_dereference_check()</tt>, which also takes a
- lockdep expression, and emits a lockdep splat if neither
- <tt>rcu_read_lock()</tt> nor the indicated protection
- is in place.
- In addition, <tt>rcu_dereference_raw()</tt> is used in those
- (hopefully rare) cases where the required protection cannot
- be easily described.
- Finally, <tt>rcu_read_lock_held()</tt> is provided to
- allow a function to verify that it has been invoked within
- an RCU read-side critical section.
- I was made aware of this set of requirements shortly after Thomas
- Gleixner audited a number of RCU uses.
-<li> A given function might wish to check for RCU-related preconditions
- upon entry, before using any other RCU API.
- The <tt>rcu_lockdep_assert()</tt> does this job,
- asserting the expression in kernels having lockdep enabled
- and doing nothing otherwise.
-<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
- and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
- substituting a simple assignment.
- To catch this sort of error, a given RCU-protected pointer may be
- tagged with <tt>__rcu</tt>, after which sparse
- will complain about simple-assignment accesses to that pointer.
- Arnd Bergmann made me aware of this requirement, and also
- supplied the needed
- <a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
- will splat if a data element is passed to <tt>call_rcu()</tt>
- twice in a row, without a grace period in between.
- (This error is similar to a double free.)
- The corresponding <tt>rcu_head</tt> structures that are
- dynamically allocated are automatically tracked, but
- <tt>rcu_head</tt> structures allocated on the stack
- must be initialized with <tt>init_rcu_head_on_stack()</tt>
- and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
- Similarly, statically allocated non-stack <tt>rcu_head</tt>
- structures must be initialized with <tt>init_rcu_head()</tt>
- and cleaned up with <tt>destroy_rcu_head()</tt>.
- Mathieu Desnoyers made me aware of this requirement, and also
- supplied the needed
- <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li> An infinite loop in an RCU read-side critical section will
- eventually trigger an RCU CPU stall warning splat, with
- the duration of &ldquo;eventually&rdquo; being controlled by the
- <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
- alternatively, by the
- <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
- parameter.
- However, RCU is not obligated to produce this splat
- unless there is a grace period waiting on that particular
- RCU read-side critical section.
- <p>
- Some extreme workloads might intentionally delay
- RCU grace periods, and systems running those workloads can
- be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
- to suppress the splats.
- This kernel parameter may also be set via <tt>sysfs</tt>.
- Furthermore, RCU CPU stall warnings are counter-productive
- during sysrq dumps and during panics.
- RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
- <tt>rcu_sysrq_end()</tt> API members to be called before
- and after long sysrq dumps.
- RCU also supplies the <tt>rcu_panic()</tt> notifier that is
- automatically invoked at the beginning of a panic to suppress
- further RCU CPU stall warnings.
-
- <p>
- This requirement made itself known in the early 1990s, pretty
- much the first time that it was necessary to debug a CPU stall.
- That said, the initial implementation in DYNIX/ptx was quite
- generic in comparison with that of Linux.
-<li> Although it would be very good to detect pointers leaking out
- of RCU read-side critical sections, there is currently no
- good way of doing this.
- One complication is the need to distinguish between pointers
- leaking and pointers that have been handed off from RCU to
- some other synchronization mechanism, for example, reference
- counting.
-<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
- information is provided via event tracing.
-<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
- <tt>rcu_dereference()</tt> to create typical linked
- data structures can be surprisingly error-prone.
- Therefore, RCU-protected
- <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
- and, more recently, RCU-protected
- <a href="https://lwn.net/Articles/612100/">hash tables</a>
- are available.
- Many other special-purpose RCU-protected data structures are
- available in the Linux kernel and the userspace RCU library.
-<li> Some linked structures are created at compile time, but still
- require <tt>__rcu</tt> checking.
- The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
- purpose.
-<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
- when creating linked structures that are to be published via
- a single external pointer.
- The <tt>RCU_INIT_POINTER()</tt> macro is provided for
- this task and also for assigning <tt>NULL</tt> pointers
- at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list: RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li> <a href="#Configuration">Configuration</a>.
-<li> <a href="#Firmware Interface">Firmware Interface</a>.
-<li> <a href="#Early Boot">Early Boot</a>.
-<li> <a href="#Interrupts and NMIs">
- Interrupts and non-maskable interrupts (NMIs)</a>.
-<li> <a href="#Loadable Modules">Loadable Modules</a>.
-<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
-<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
-<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
-<li> <a href="#Scheduling-Clock Interrupts and RCU">
- Scheduling-Clock Interrupts and RCU</a>.
-<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
-<li> <a href="#Performance, Scalability, Response Time, and Reliability">
- Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-&ldquo;out of the box.&rdquo;
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-all of RCU's kthreads have been spawned, which occurs at
-<tt>early_initcall()</tt> time.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt> and
-<tt>synchronize_rcu_expedited()</tt>,
-will operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-However, once the scheduler has spawned its first kthread, this early
-boot trick fails for <tt>synchronize_rcu()</tt> (as well as for
-<tt>synchronize_rcu_expedited()</tt>) in <tt>CONFIG_PREEMPT=y</tt>
-kernels.
-The reason is that an RCU read-side critical section might be preempted,
-which means that a subsequent <tt>synchronize_rcu()</tt> really does have
-to wait for something, as opposed to simply returning immediately.
-Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of
-its kthreads are spawned, which doesn't happen until some time during
-<tt>early_initcalls()</tt> time.
-But this is no excuse: RCU is nevertheless required to correctly handle
-synchronous grace periods during this time period.
-Once all of its kthreads are up and running, RCU starts running
-normally.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- How can RCU possibly handle grace periods before all of its
- kthreads have been spawned???
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Very carefully!
- </font>
-
- <p><font color="ffffff">
- During the &ldquo;dead zone&rdquo; between the time that the
- scheduler spawns the first task and the time that all of RCU's
- kthreads have been spawned, all synchronous grace periods are
- handled by the expedited grace-period mechanism.
- At runtime, this expedited mechanism relies on workqueues, but
- during the dead zone the requesting task itself drives the
- desired expedited grace period.
- Because dead-zone execution takes place within task context,
- everything works.
- Once the dead zone ends, expedited grace periods go back to
- using workqueues, as is required to avoid problems that would
- otherwise occur when a user task received a POSIX signal while
- driving an expedited grace period.
- </font>
-
- <p><font color="ffffff">
- And yes, this does mean that it is unhelpful to send POSIX
- signals to random tasks between the time that the scheduler
- spawns its first kthread and the time that RCU's kthreads
- have all been spawned.
- If there ever turns out to be a good reason for sending POSIX
- signals during that time, appropriate adjustments will be made.
- (If it turns out that POSIX signals are sent during this time for
- no good reason, other adjustments will be made, appropriate
- or otherwise.)
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/r/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/r/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<p>
-Furthermore, NMI handlers can be interrupted by what appear to RCU
-to be normal interrupts.
-One way that this can happen is for code that directly invokes
-<tt>rcu_irq_enter()</tt> and </tt>rcu_irq_exit()</tt> to be called
-from an NMI handler.
-This astonishing fact of life prompted the current code structure,
-which has <tt>rcu_irq_enter()</tt> invoking <tt>rcu_nmi_enter()</tt>
-and <tt>rcu_irq_exit()</tt> invoking <tt>rcu_nmi_exit()</tt>.
-And yes, I also learned of this requirement the hard way.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-going to eventually be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<p>
-<b>Important note</b>: The <tt>rcu_barrier()</tt> function is not,
-repeat, <i>not</i>, obligated to wait for a grace period.
-It is instead only required to wait for RCU callbacks that have
-already been posted.
-Therefore, if there are no RCU callbacks posted anywhere in the system,
-<tt>rcu_barrier()</tt> is within its rights to return immediately.
-Even if there are callbacks posted, <tt>rcu_barrier()</tt> does not
-necessarily need to wait for a grace period.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Wait a minute!
- Each RCU callbacks must wait for a grace period to complete,
- and <tt>rcu_barrier()</tt> must wait for each pre-existing
- callback to be invoked.
- Doesn't <tt>rcu_barrier()</tt> therefore need to wait for
- a full grace period if there is even one callback posted anywhere
- in the system?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Absolutely not!!!
- </font>
-
- <p><font color="ffffff">
- Yes, each RCU callbacks must wait for a grace period to complete,
- but it might well be partly (or even completely) finished waiting
- by the time <tt>rcu_barrier()</tt> is invoked.
- In that case, <tt>rcu_barrier()</tt> need only wait for the
- remaining portion of the grace period to elapse.
- So even if there are quite a few callbacks posted,
- <tt>rcu_barrier()</tt> might well return quite quickly.
- </font>
-
- <p><font color="ffffff">
- So if you need to wait for a grace period as well as for all
- pre-existing callbacks, you will need to invoke both
- <tt>synchronize_rcu()</tt> and <tt>rcu_barrier()</tt>.
- If latency is a concern, you can always use workqueues
- to invoke them concurrently.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU,
-with the exception of <a href="#Sleepable RCU">SRCU</a> read-side
-critical sections.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is &ldquo;interesting.&rdquo;
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even synchronous grace-period operations such as
-<tt>synchronize_rcu()</tt> and <tt>synchronize_rcu_expedited()</tt>.
-
-<p>
-However, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-Furthermore, <tt>rcu_barrier()</tt> blocks CPU-hotplug operations
-during its execution, which results in another type of deadlock
-when invoked from a CPU-hotplug notifier.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-The preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must therefore be written carefully to avoid deadlocks
-involving the scheduler's runqueue and priority-inheritance locks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This scheduler-RCU requirement came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-heavy <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<p>
-In the past, it was forbidden to disable interrupts across an
-<tt>rcu_read_unlock()</tt> unless that interrupt-disabled region
-of code also included the matching <tt>rcu_read_lock()</tt>.
-Violating this restriction could result in deadlocks involving the
-scheduler's runqueue and priority-inheritance spinlocks.
-This restriction was lifted when interrupt-disabled calls to
-<tt>rcu_read_unlock()</tt> started deferring the reporting of
-the resulting RCU-preempt quiescent state until the end of that
-interrupts-disabled region.
-This deferred reporting means that the scheduler's runqueue and
-priority-inheritance locks cannot be held while reporting an RCU-preempt
-quiescent state, which lifts the earlier restriction, at least from
-a deadlock perspective.
-Unfortunately, real-time systems using RCU priority boosting may
-need this restriction to remain in effect because deferred
-quiescent-state reporting also defers deboosting, which in turn
-degrades real-time latencies.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-However, there are some restrictions on the code placed within
-<tt>RCU_NONIDLE()</tt>:
-
-<ol>
-<li> Blocking is prohibited.
- In practice, this is not a serious restriction given that idle
- tasks are prohibited from blocking to begin with.
-<li> Although nesting <tt>RCU_NONIDLE()</tt> is permitted, they cannot
- nest indefinitely deeply.
- However, given that they can be nested on the order of a million
- deep, even on 32-bit systems, this should not be a serious
- restriction.
- This nesting limit would probably be reached long after the
- compiler OOMed or the stack overflowed.
-<li> Any code path that enters <tt>RCU_NONIDLE()</tt> must sequence
- out of that same <tt>RCU_NONIDLE()</tt>.
- For example, the following is grossly illegal:
-
- <blockquote>
- <pre>
- 1 RCU_NONIDLE({
- 2 do_something();
- 3 goto bad_idea; /* BUG!!! */
- 4 do_something_else();});
- 5 bad_idea:
- </pre>
- </blockquote>
-
- <p>
- It is just as illegal to transfer control into the middle of
- <tt>RCU_NONIDLE()</tt>'s argument.
- Yes, in theory, you could transfer in as long as you also
- transferred out, but in practice you could also expect to get sharply
- worded review comments.
-</ol>
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-RCU must therefore be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Scheduling-Clock Interrupts and RCU">
-Scheduling-Clock Interrupts and RCU</a></h3>
-
-<p>
-The kernel transitions between in-kernel non-idle execution, userspace
-execution, and the idle loop.
-Depending on kernel configuration, RCU handles these states differently:
-
-<table border=3>
-<tr><th><tt>HZ</tt> Kconfig</th>
- <th>In-Kernel</th>
- <th>Usermode</th>
- <th>Idle</th></tr>
-<tr><th align="left"><tt>HZ_PERIODIC</tt></th>
- <td>Can rely on scheduling-clock interrupt.</td>
- <td>Can rely on scheduling-clock interrupt and its
- detection of interrupt from usermode.</td>
- <td>Can rely on RCU's dyntick-idle detection.</td></tr>
-<tr><th align="left"><tt>NO_HZ_IDLE</tt></th>
- <td>Can rely on scheduling-clock interrupt.</td>
- <td>Can rely on scheduling-clock interrupt and its
- detection of interrupt from usermode.</td>
- <td>Can rely on RCU's dyntick-idle detection.</td></tr>
-<tr><th align="left"><tt>NO_HZ_FULL</tt></th>
- <td>Can only sometimes rely on scheduling-clock interrupt.
- In other cases, it is necessary to bound kernel execution
- times and/or use IPIs.</td>
- <td>Can rely on RCU's dyntick-idle detection.</td>
- <td>Can rely on RCU's dyntick-idle detection.</td></tr>
-</table>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- Why can't <tt>NO_HZ_FULL</tt> in-kernel execution rely on the
- scheduling-clock interrupt, just like <tt>HZ_PERIODIC</tt>
- and <tt>NO_HZ_IDLE</tt> do?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- Because, as a performance optimization, <tt>NO_HZ_FULL</tt>
- does not necessarily re-enable the scheduling-clock interrupt
- on entry to each and every system call.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-However, RCU must be reliably informed as to whether any given
-CPU is currently in the idle loop, and, for <tt>NO_HZ_FULL</tt>,
-also whether that CPU is executing in usermode, as discussed
-<a href="#Energy Efficiency">earlier</a>.
-It also requires that the scheduling-clock interrupt be enabled when
-RCU needs it to be:
-
-<ol>
-<li> If a CPU is either idle or executing in usermode, and RCU believes
- it is non-idle, the scheduling-clock tick had better be running.
- Otherwise, you will get RCU CPU stall warnings. Or at best,
- very long (11-second) grace periods, with a pointless IPI waking
- the CPU from time to time.
-<li> If a CPU is in a portion of the kernel that executes RCU read-side
- critical sections, and RCU believes this CPU to be idle, you will get
- random memory corruption. <b>DON'T DO THIS!!!</b>
-
- <br>This is one reason to test with lockdep, which will complain
- about this sort of thing.
-<li> If a CPU is in a portion of the kernel that is absolutely
- positively no-joking guaranteed to never execute any RCU read-side
- critical sections, and RCU believes this CPU to to be idle,
- no problem. This sort of thing is used by some architectures
- for light-weight exception handlers, which can then avoid the
- overhead of <tt>rcu_irq_enter()</tt> and <tt>rcu_irq_exit()</tt>
- at exception entry and exit, respectively.
- Some go further and avoid the entireties of <tt>irq_enter()</tt>
- and <tt>irq_exit()</tt>.
-
- <br>Just make very sure you are running some of your tests with
- <tt>CONFIG_PROVE_RCU=y</tt>, just in case one of your code paths
- was in fact joking about not doing RCU read-side critical sections.
-<li> If a CPU is executing in the kernel with the scheduling-clock
- interrupt disabled and RCU believes this CPU to be non-idle,
- and if the CPU goes idle (from an RCU perspective) every few
- jiffies, no problem. It is usually OK for there to be the
- occasional gap between idle periods of up to a second or so.
-
- <br>If the gap grows too long, you get RCU CPU stall warnings.
-<li> If a CPU is either idle or executing in usermode, and RCU believes
- it to be idle, of course no problem.
-<li> If a CPU is executing in the kernel, the kernel code
- path is passing through quiescent states at a reasonable
- frequency (preferably about once per few jiffies, but the
- occasional excursion to a second or so is usually OK) and the
- scheduling-clock interrupt is enabled, of course no problem.
-
- <br>If the gap between a successive pair of quiescent states grows
- too long, you get RCU CPU stall warnings.
-</ol>
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
- But what if my driver has a hardware interrupt handler
- that can run for many seconds?
- I cannot invoke <tt>schedule()</tt> from an hardware
- interrupt handler, after all!
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
- One approach is to do <tt>rcu_irq_exit();rcu_irq_enter();</tt>
- every so often.
- But given that long-running interrupt handlers can cause
- other problems, not least for response time, shouldn't you
- work to keep your interrupt handler's runtime within reasonable
- bounds?
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-But as long as RCU is properly informed of kernel state transitions between
-in-kernel execution, usermode execution, and idle, and as long as the
-scheduling-clock interrupt is enabled when RCU needs it to be, you
-can rest assured that the bugs you encounter will be in some other
-part of RCU or some other part of the kernel!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>-&gt;func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future &ldquo;lazy&rdquo;
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<p>
-That said, there are limits.
-RCU requires that the <tt>rcu_head</tt> structure be aligned to a
-two-byte boundary, and passing a misaligned <tt>rcu_head</tt>
-structure to one of the <tt>call_rcu()</tt> family of functions
-will result in a splat.
-It is therefore necessary to exercise caution when packing
-structures containing fields of type <tt>rcu_head</tt>.
-Why not a four-byte or even eight-byte alignment requirement?
-Because the m68k architecture provides only two-byte alignment,
-and thus acts as alignment's least common denominator.
-
-<p>
-The reason for reserving the bottom bit of pointers to
-<tt>rcu_head</tt> structures is to leave the door open to
-&ldquo;lazy&rdquo; callbacks whose invocations can safely be deferred.
-Deferring invocation could potentially have energy-efficiency
-benefits, but only if the rate of non-lazy callbacks decreases
-significantly for some important workload.
-In the meantime, reserving the bottom bit keeps this option open
-in case it one day becomes useful.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-RCU must avoid degrading real-time response for CPU-bound threads, whether
-executing in usermode (which is one use case for
-<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
-That said, CPU-bound loops in the kernel must execute
-<tt>cond_resched()</tt> at least once per few tens of milliseconds
-in order to avoid receiving an IPI from RCU.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting&mdash;and perhaps unprecedented&mdash;validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a>
-<li> <a href="#Sched Flavor">Sched Flavor (Historical)</a>
-<li> <a href="#Sleepable RCU">Sleepable RCU</a>
-<li> <a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a></h3>
-
-<p>
-The RCU-bh flavor of RCU has since been expressed in terms of
-the other RCU flavors as part of a consolidation of the three
-flavors into a single flavor.
-The read-side API remains, and continues to disable softirq and to
-be accounted for by lockdep.
-Much of the material in this section is therefore strictly historical
-in nature.
-
-<p>
-The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
-hence the &ldquo;_bh&rdquo; abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-However, the update-side APIs are now simple wrappers for other RCU
-flavors, namely RCU-sched in CONFIG_PREEMPT=n kernels and RCU-preempt
-otherwise.
-
-<h3><a name="Sched Flavor">Sched Flavor (Historical)</a></h3>
-
-<p>
-The RCU-sched flavor of RCU has since been expressed in terms of
-the other RCU flavors as part of a consolidation of the three
-flavors into a single flavor.
-The read-side API remains, and continues to disable preemption and to
-be accounted for by lockdep.
-Much of the material in this section is therefore strictly historical
-in nature.
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying &ldquo;I need to block within
-an RCU read-side critical section&rdquo; was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can block waiting, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 synchronize_srcu(&amp;ss);
- 6 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-Also unlike other RCU flavors, SRCU's callbacks-wait function
-<tt>srcu_barrier()</tt> may be invoked from CPU-hotplug notifiers,
-though this is not necessarily a good idea.
-The reason that this is possible is that SRCU is insensitive
-to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt>
-need not exclude CPU-hotplug operations.
-
-<p>
-SRCU also differs from other RCU flavors in that SRCU's expedited and
-non-expedited grace periods are implemented by the same mechanism.
-This means that in the current SRCU implementation, expediting a
-future grace period has the side effect of expediting all prior
-grace periods that have not yet completed.
-(But please note that this is a property of the current implementation,
-not necessarily of future implementations.)
-In addition, if SRCU has been idle for longer than the interval
-specified by the <tt>srcutree.exp_holdoff</tt> kernel boot parameter
-(25&nbsp;microseconds by default),
-and if a <tt>synchronize_srcu()</tt> invocation ends this idle period,
-that invocation will be automatically expedited.
-
-<p>
-As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
-a locking bottleneck present in prior kernel versions.
-Although this will allow users to put much heavier stress on
-<tt>call_srcu()</tt>, it is important to note that SRCU does not
-yet take any special steps to deal with callback flooding.
-So if you are posting (say) 10,000 SRCU callbacks per second per CPU,
-you are probably totally OK, but if you intend to post (say) 1,000,000
-SRCU callbacks per second per CPU, please run some tests first.
-SRCU just might need a few adjustment to deal with that sort of load.
-Of course, your mileage may vary based on the speed of your CPUs and
-the size of your memory.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use &ldquo;trampolines&rdquo; to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-In <tt>CONFIG_PREEMPT=n</tt> kernels, trampolines cannot be preempted,
-so these APIs map to
-<tt>call_rcu()</tt>,
-<tt>synchronize_rcu()</tt>, and
-<tt>rcu_barrier()</tt>, respectively.
-In <tt>CONFIG_PREEMPT=y</tt> kernels, trampolines can be preempted,
-and these three APIs are therefore implemented by separate functions
-that check for voluntary context switches.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-<tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use <tt>rcu_barrier()</tt> in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an &ldquo;interesting&rdquo; architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-
-</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
new file mode 100644
index 000000000000..f24b3c0b9b0d
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -0,0 +1,2865 @@
+=================================
+A Tour Through RCU's Requirements
+=================================
+
+Copyright IBM Corporation, 2015
+
+Author: Paul E. McKenney
+
+The initial version of this document appeared in the
+`LWN <https://lwn.net/>`_ on those articles:
+`part 1 <https://lwn.net/Articles/652156/>`_,
+`part 2 <https://lwn.net/Articles/652677/>`_, and
+`part 3 <https://lwn.net/Articles/653326/>`_.
+
+Introduction
+------------
+
+Read-copy update (RCU) is a synchronization mechanism that is often used
+as a replacement for reader-writer locking. RCU is unusual in that
+updaters do not block readers, which means that RCU's read-side
+primitives can be exceedingly fast and scalable. In addition, updaters
+can make useful forward progress concurrently with readers. However, all
+this concurrency between RCU readers and updaters does raise the
+question of exactly what RCU readers are doing, which in turn raises the
+question of exactly what RCU's requirements are.
+
+This document therefore summarizes RCU's requirements, and can be
+thought of as an informal, high-level specification for RCU. It is
+important to understand that RCU's specification is primarily empirical
+in nature; in fact, I learned about many of these requirements the hard
+way. This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been a
+great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+All that aside, here are the categories of currently known RCU
+requirements:
+
+#. `Fundamental Requirements`_
+#. `Fundamental Non-Requirements`_
+#. `Parallelism Facts of Life`_
+#. `Quality-of-Implementation Requirements`_
+#. `Linux Kernel Complications`_
+#. `Software-Engineering Requirements`_
+#. `Other RCU Flavors`_
+#. `Possible Future Changes`_
+
+This is followed by a summary_, however, the answers to
+each quick quiz immediately follows the quiz. Select the big white space
+with your mouse to see the answer.
+
+Fundamental Requirements
+------------------------
+
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements. These are:
+
+#. `Grace-Period Guarantee`_
+#. `Publish/Subscribe Guarantee`_
+#. `Memory-Barrier Guarantees`_
+#. `RCU Primitives Guaranteed to Execute Unconditionally`_
+#. `Guaranteed Read-to-Write Upgrade`_
+
+Grace-Period Guarantee
+~~~~~~~~~~~~~~~~~~~~~~
+
+RCU's grace-period guarantee is unusual in being premeditated: Jack
+Slingwine and I had this guarantee firmly in mind when we started work
+on RCU (then called “rclock”) in the early 1990s. That said, the past
+two decades of experience with RCU have produced a much more detailed
+understanding of this guarantee.
+
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections. An RCU read-side
+critical section begins with the marker rcu_read_lock() and ends
+with the marker rcu_read_unlock(). These markers may be nested, and
+RCU treats a nested set as one big RCU read-side critical section.
+Production-quality implementations of rcu_read_lock() and
+rcu_read_unlock() are extremely lightweight, and in fact have
+exactly zero overhead in Linux kernels built for production use with
+``CONFIG_PREEMPTION=n``.
+
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+ ::
+
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5 rcu_read_lock();
+ 6 r1 = READ_ONCE(x);
+ 7 r2 = READ_ONCE(y);
+ 8 rcu_read_unlock();
+ 9 }
+ 10
+ 11 void thread1(void)
+ 12 {
+ 13 WRITE_ONCE(x, 1);
+ 14 synchronize_rcu();
+ 15 WRITE_ONCE(y, 1);
+ 16 }
+
+Because the synchronize_rcu() on line 14 waits for all pre-existing
+readers, any instance of thread0() that loads a value of zero from
+``x`` must complete before thread1() stores to ``y``, so that
+instance must also load a value of zero from ``y``. Similarly, any
+instance of thread0() that loads a value of one from ``y`` must have
+started after the synchronize_rcu() started, and must therefore also
+load a value of one from ``x``. Therefore, the outcome:
+
+ ::
+
+ (r1 == 0 && r2 == 1)
+
+cannot happen.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Wait a minute! You said that updaters can make useful forward |
+| progress concurrently with readers, but pre-existing readers will |
+| block synchronize_rcu()!!! |
+| Just who are you trying to fool??? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| First, if updaters do not wish to be blocked by readers, they can use |
+| call_rcu() or kfree_rcu(), which will be discussed later. |
+| Second, even when using synchronize_rcu(), the other update-side |
+| code does run concurrently with readers, whether pre-existing or not. |
++-----------------------------------------------------------------------+
+
+This scenario resembles one of the first uses of RCU in
+`DYNIX/ptx <https://en.wikipedia.org/wiki/DYNIX>`__, which managed a
+distributed lock manager's transition into a state suitable for handling
+recovery from node failure, more or less as follows:
+
+ ::
+
+ 1 #define STATE_NORMAL 0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING 2
+ 4 #define STATE_WANT_NORMAL 3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+ 10 int state_snap;
+ 11
+ 12 rcu_read_lock();
+ 13 state_snap = READ_ONCE(state);
+ 14 if (state_snap == STATE_NORMAL)
+ 15 do_something();
+ 16 else
+ 17 do_something_carefully();
+ 18 rcu_read_unlock();
+ 19 }
+ 20
+ 21 void start_recovery(void)
+ 22 {
+ 23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
+ 24 synchronize_rcu();
+ 25 WRITE_ONCE(state, STATE_RECOVERING);
+ 26 recovery();
+ 27 WRITE_ONCE(state, STATE_WANT_NORMAL);
+ 28 synchronize_rcu();
+ 29 WRITE_ONCE(state, STATE_NORMAL);
+ 30 }
+
+The RCU read-side critical section in do_something_dlm() works with
+the synchronize_rcu() in start_recovery() to guarantee that
+do_something() never runs concurrently with recovery(), but with
+little or no synchronization overhead in do_something_dlm().
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why is the synchronize_rcu() on line 28 needed? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Without that extra grace period, memory reordering could result in |
+| do_something_dlm() executing do_something() concurrently with |
+| the last bits of recovery(). |
++-----------------------------------------------------------------------+
+
+In order to avoid fatal problems such as deadlocks, an RCU read-side
+critical section must not contain calls to synchronize_rcu().
+Similarly, an RCU read-side critical section must not contain anything
+that waits, directly or indirectly, on completion of an invocation of
+synchronize_rcu().
+
+Although RCU's grace-period guarantee is useful in and of itself, with
+`quite a few use cases <https://lwn.net/Articles/573497/>`__, it would
+be good to be able to use RCU to coordinate read-side access to linked
+data structures. For this, the grace-period guarantee is not sufficient,
+as can be seen in function add_gp_buggy() below. We will look at the
+reader's code later, but in the meantime, just think of the reader as
+locklessly picking up the ``gp`` pointer, and, if the value loaded is
+non-\ ``NULL``, locklessly accessing the ``->a`` and ``->b`` fields.
+
+ ::
+
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&gp_lock);
+ 9 return false;
+ 10 }
+ 11 p->a = a;
+ 12 p->b = a;
+ 13 gp = p; /* ORDERING BUG */
+ 14 spin_unlock(&gp_lock);
+ 15 return true;
+ 16 }
+
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+ ::
+
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&gp_lock);
+ 9 return false;
+ 10 }
+ 11 gp = p; /* ORDERING BUG */
+ 12 p->a = a;
+ 13 p->b = a;
+ 14 spin_unlock(&gp_lock);
+ 15 return true;
+ 16 }
+
+If an RCU reader fetches ``gp`` just after ``add_gp_buggy_optimized``
+executes line 11, it will see garbage in the ``->a`` and ``->b`` fields.
+And this is but one of many ways in which compiler and hardware
+optimizations could cause trouble. Therefore, we clearly need some way
+to prevent the compiler and the CPU from reordering in this manner,
+which brings us to the publish-subscribe guarantee discussed in the next
+section.
+
+Publish/Subscribe Guarantee
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RCU's publish-subscribe guarantee allows data to be inserted into a
+linked data structure without disrupting RCU readers. The updater uses
+rcu_assign_pointer() to insert the new data, and readers use
+rcu_dereference() to access data, whether new or old. The following
+shows an example of insertion:
+
+ ::
+
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&gp_lock);
+ 9 return false;
+ 10 }
+ 11 p->a = a;
+ 12 p->b = a;
+ 13 rcu_assign_pointer(gp, p);
+ 14 spin_unlock(&gp_lock);
+ 15 return true;
+ 16 }
+
+The rcu_assign_pointer() on line 13 is conceptually equivalent to a
+simple assignment statement, but also guarantees that its assignment
+will happen after the two assignments in lines 11 and 12, similar to the
+C11 ``memory_order_release`` store operation. It also prevents any
+number of “interesting” compiler optimizations, for example, the use of
+``gp`` as a scratch location immediately preceding the assignment.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But rcu_assign_pointer() does nothing to prevent the two |
+| assignments to ``p->a`` and ``p->b`` from being reordered. Can't that |
+| also cause problems? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| No, it cannot. The readers cannot see either of these two fields |
+| until the assignment to ``gp``, by which time both fields are fully |
+| initialized. So reordering the assignments to ``p->a`` and ``p->b`` |
+| cannot possibly cause any problems. |
++-----------------------------------------------------------------------+
+
+It is tempting to assume that the reader need not do anything special to
+control its accesses to the RCU-protected data, as shown in
+do_something_gp_buggy() below:
+
+ ::
+
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
+ 5 if (p) {
+ 6 do_something(p->a, p->b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+ 10 rcu_read_unlock();
+ 11 return false;
+ 12 }
+
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler (or weak ordering
+CPUs like the DEC Alpha) can trip this code up. For but one example, if
+the compiler were short of registers, it might choose to refetch from
+``gp`` rather than keeping a separate copy in ``p`` as follows:
+
+ ::
+
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
+ 5 do_something(gp->a, gp->b);
+ 6 rcu_read_unlock();
+ 7 return true;
+ 8 }
+ 9 rcu_read_unlock();
+ 10 return false;
+ 11 }
+
+If this function ran concurrently with a series of updates that replaced
+the current structure with a new one, the fetches of ``gp->a`` and
+``gp->b`` might well come from two different structures, which could
+cause serious confusion. To prevent this (and much else besides),
+do_something_gp() uses rcu_dereference() to fetch from ``gp``:
+
+ ::
+
+ 1 bool do_something_gp(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = rcu_dereference(gp);
+ 5 if (p) {
+ 6 do_something(p->a, p->b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+ 10 rcu_read_unlock();
+ 11 return false;
+ 12 }
+
+The rcu_dereference() uses volatile casts and (for DEC Alpha) memory
+barriers in the Linux kernel. Should a |high-quality implementation of
+C11 memory_order_consume [PDF]|_
+ever appear, then rcu_dereference() could be implemented as a
+``memory_order_consume`` load. Regardless of the exact implementation, a
+pointer fetched by rcu_dereference() may not be used outside of the
+outermost RCU read-side critical section containing that
+rcu_dereference(), unless protection of the corresponding data
+element has been passed from RCU to some other synchronization
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).
+
+.. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
+.. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
+
+In short, updaters use rcu_assign_pointer() and readers use
+rcu_dereference(), and these two RCU API elements work together to
+ensure that readers have a consistent view of newly added data elements.
+
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+#. Remove the data element from the enclosing structure.
+#. Wait for all pre-existing RCU read-side critical sections to complete
+ (because only pre-existing readers can possibly have a reference to
+ the newly removed data element).
+#. At this point, only the updater has a reference to the newly removed
+ data element, so it can safely reclaim the data element, for example,
+ by passing it to kfree().
+
+This process is implemented by remove_gp_synchronous():
+
+ ::
+
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3 struct foo *p;
+ 4
+ 5 spin_lock(&gp_lock);
+ 6 p = rcu_access_pointer(gp);
+ 7 if (!p) {
+ 8 spin_unlock(&gp_lock);
+ 9 return false;
+ 10 }
+ 11 rcu_assign_pointer(gp, NULL);
+ 12 spin_unlock(&gp_lock);
+ 13 synchronize_rcu();
+ 14 kfree(p);
+ 15 return true;
+ 16 }
+
+This function is straightforward, with line 13 waiting for a grace
+period before line 14 frees the old data element. This waiting ensures
+that readers will reach line 7 of do_something_gp() before the data
+element referenced by ``p`` is freed. The rcu_access_pointer() on
+line 6 is similar to rcu_dereference(), except that:
+
+#. The value returned by rcu_access_pointer() cannot be
+ dereferenced. If you want to access the value pointed to as well as
+ the pointer itself, use rcu_dereference() instead of
+ rcu_access_pointer().
+#. The call to rcu_access_pointer() need not be protected. In
+ contrast, rcu_dereference() must either be within an RCU
+ read-side critical section or in a code segment where the pointer
+ cannot change, for example, in code protected by the corresponding
+ update-side lock.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Without the rcu_dereference() or the rcu_access_pointer(), |
+| what destructive optimizations might the compiler make use of? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Let's start with what happens to do_something_gp() if it fails to |
+| use rcu_dereference(). It could reuse a value formerly fetched |
+| from this same pointer. It could also fetch the pointer from ``gp`` |
+| in a byte-at-a-time manner, resulting in *load tearing*, in turn |
+| resulting a bytewise mash-up of two distinct pointer values. It might |
+| even use value-speculation optimizations, where it makes a wrong |
+| guess, but by the time it gets around to checking the value, an |
+| update has changed the pointer to match the wrong guess. Too bad |
+| about any dereferences that returned pre-initialization garbage in |
+| the meantime! |
+| For remove_gp_synchronous(), as long as all modifications to |
+| ``gp`` are carried out while holding ``gp_lock``, the above |
+| optimizations are harmless. However, ``sparse`` will complain if you |
+| define ``gp`` with ``__rcu`` and then access it without using either |
+| rcu_access_pointer() or rcu_dereference(). |
++-----------------------------------------------------------------------+
+
+In short, RCU's publish-subscribe guarantee is provided by the
+combination of rcu_assign_pointer() and rcu_dereference(). This
+guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers. This guarantee
+can be used in combination with the grace-period guarantee to also allow
+data elements to be removed from RCU-protected linked data structures,
+again without disrupting RCU readers.
+
+This guarantee was only partially premeditated. DYNIX/ptx used an
+explicit memory barrier for publication, but had nothing resembling
+rcu_dereference() for subscription, nor did it have anything
+resembling the dependency-ordering barrier that was later subsumed
+into rcu_dereference() and later still into READ_ONCE(). The
+need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company. It took the Alpha architects a
+good hour to convince me that any sort of barrier would ever be needed,
+and it then took me a good *two* hours to convince them that their
+documentation did not make this point clear. More recent work with the C
+and C++ standards committees have provided much education on tricks and
+traps from the compiler. In short, compilers were much less tricky in
+the early 1990s, but in 2015, don't even think about omitting
+rcu_dereference()!
+
+Memory-Barrier Guarantees
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+#. Each CPU that has an RCU read-side critical section that begins
+ before synchronize_rcu() starts is guaranteed to execute a full
+ memory barrier between the time that the RCU read-side critical
+ section ends and the time that synchronize_rcu() returns. Without
+ this guarantee, a pre-existing RCU read-side critical section might
+ hold a reference to the newly removed ``struct foo`` after the
+ kfree() on line 14 of remove_gp_synchronous().
+#. Each CPU that has an RCU read-side critical section that ends after
+ synchronize_rcu() returns is guaranteed to execute a full memory
+ barrier between the time that synchronize_rcu() begins and the
+ time that the RCU read-side critical section begins. Without this
+ guarantee, a later RCU read-side critical section running after the
+ kfree() on line 14 of remove_gp_synchronous() might later run
+ do_something_gp() and find the newly deleted ``struct foo``.
+#. If the task invoking synchronize_rcu() remains on a given CPU,
+ then that CPU is guaranteed to execute a full memory barrier sometime
+ during the execution of synchronize_rcu(). This guarantee ensures
+ that the kfree() on line 14 of remove_gp_synchronous() really
+ does execute after the removal on line 11.
+#. If the task invoking synchronize_rcu() migrates among a group of
+ CPUs during that invocation, then each of the CPUs in that group is
+ guaranteed to execute a full memory barrier sometime during the
+ execution of synchronize_rcu(). This guarantee also ensures that
+ the kfree() on line 14 of remove_gp_synchronous() really does
+ execute after the removal on line 11, but also in the case where the
+ thread executing the synchronize_rcu() migrates in the meantime.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Given that multiple CPUs can start RCU read-side critical sections at |
+| any time without any ordering whatsoever, how can RCU possibly tell |
+| whether or not a given RCU read-side critical section starts before a |
+| given instance of synchronize_rcu()? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| If RCU cannot tell whether or not a given RCU read-side critical |
+| section starts before a given instance of synchronize_rcu(), then |
+| it must assume that the RCU read-side critical section started first. |
+| In other words, a given instance of synchronize_rcu() can avoid |
+| waiting on a given RCU read-side critical section only if it can |
+| prove that synchronize_rcu() started first. |
+| A related question is “When rcu_read_lock() doesn't generate any |
+| code, why does it matter how it relates to a grace period?” The |
+| answer is that it is not the relationship of rcu_read_lock() |
+| itself that is important, but rather the relationship of the code |
+| within the enclosed RCU read-side critical section to the code |
+| preceding and following the grace period. If we take this viewpoint, |
+| then a given RCU read-side critical section begins before a given |
+| grace period when some access preceding the grace period observes the |
+| effect of some access within the critical section, in which case none |
+| of the accesses within the critical section may observe the effects |
+| of any access following the grace period. |
+| |
+| As of late 2016, mathematical models of RCU take this viewpoint, for |
+| example, see slides 62 and 63 of the `2016 LinuxCon |
+| EU <http://www2.rdrop.com/users/paulmck/scalability/paper/LinuxMM.201 |
+| 6.10.04c.LCE.pdf>`__ |
+| presentation. |
++-----------------------------------------------------------------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| The first and second guarantees require unbelievably strict ordering! |
+| Are all these memory barriers *really* required? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Yes, they really are required. To see why the first guarantee is |
+| required, consider the following sequence of events: |
+| |
+| #. CPU 1: rcu_read_lock() |
+| #. CPU 1: ``q = rcu_dereference(gp); /* Very likely to return p. */`` |
+| #. CPU 0: ``list_del_rcu(p);`` |
+| #. CPU 0: synchronize_rcu() starts. |
+| #. CPU 1: ``do_something_with(q->a);`` |
+| ``/* No smp_mb(), so might happen after kfree(). */`` |
+| #. CPU 1: rcu_read_unlock() |
+| #. CPU 0: synchronize_rcu() returns. |
+| #. CPU 0: ``kfree(p);`` |
+| |
+| Therefore, there absolutely must be a full memory barrier between the |
+| end of the RCU read-side critical section and the end of the grace |
+| period. |
+| |
+| The sequence of events demonstrating the necessity of the second rule |
+| is roughly similar: |
+| |
+| #. CPU 0: ``list_del_rcu(p);`` |
+| #. CPU 0: synchronize_rcu() starts. |
+| #. CPU 1: rcu_read_lock() |
+| #. CPU 1: ``q = rcu_dereference(gp);`` |
+| ``/* Might return p if no memory barrier. */`` |
+| #. CPU 0: synchronize_rcu() returns. |
+| #. CPU 0: ``kfree(p);`` |
+| #. CPU 1: ``do_something_with(q->a); /* Boom!!! */`` |
+| #. CPU 1: rcu_read_unlock() |
+| |
+| And similarly, without a memory barrier between the beginning of the |
+| grace period and the beginning of the RCU read-side critical section, |
+| CPU 1 might end up accessing the freelist. |
+| |
+| The “as if” rule of course applies, so that any implementation that |
+| acts as if the appropriate memory barriers were in place is a correct |
+| implementation. That said, it is much easier to fool yourself into |
+| believing that you have adhered to the as-if rule than it is to |
+| actually adhere to it! |
++-----------------------------------------------------------------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| You claim that rcu_read_lock() and rcu_read_unlock() generate |
+| absolutely no code in some kernel builds. This means that the |
+| compiler might arbitrarily rearrange consecutive RCU read-side |
+| critical sections. Given such rearrangement, if a given RCU read-side |
+| critical section is done, how can you be sure that all prior RCU |
+| read-side critical sections are done? Won't the compiler |
+| rearrangements make that impossible to determine? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| In cases where rcu_read_lock() and rcu_read_unlock() generate |
+| absolutely no code, RCU infers quiescent states only at special |
+| locations, for example, within the scheduler. Because calls to |
+| schedule() had better prevent calling-code accesses to shared |
+| variables from being rearranged across the call to schedule(), if |
+| RCU detects the end of a given RCU read-side critical section, it |
+| will necessarily detect the end of all prior RCU read-side critical |
+| sections, no matter how aggressively the compiler scrambles the code. |
+| Again, this all assumes that the compiler cannot scramble code across |
+| calls to the scheduler, out of interrupt handlers, into the idle |
+| loop, into user-mode code, and so on. But if your kernel build allows |
+| that sort of scrambling, you have broken far more than just RCU! |
++-----------------------------------------------------------------------+
+
+Note that these memory-barrier requirements do not replace the
+fundamental RCU requirement that a grace period wait for all
+pre-existing readers. On the contrary, the memory barriers called out in
+this section must operate in such a way as to *enforce* this fundamental
+requirement. Of course, different implementations enforce this
+requirement in different ways, but enforce it they must.
+
+RCU Primitives Guaranteed to Execute Unconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The common-case RCU primitives are unconditional. They are invoked, they
+do their job, and they return, with no possibility of error, and no need
+to retry. This is a key RCU design philosophy.
+
+However, this philosophy is pragmatic rather than pigheaded. If someone
+comes up with a good justification for a particular conditional RCU
+primitive, it might well be implemented and added. After all, this
+guarantee was reverse-engineered, not premeditated. The unconditional
+nature of the RCU primitives was initially an accident of
+implementation, and later experience with synchronization primitives
+with conditional primitives caused me to elevate this accident to a
+guarantee. Therefore, the justification for adding a conditional
+primitive to RCU would need to be based on detailed and compelling use
+cases.
+
+Guaranteed Read-to-Write Upgrade
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As far as RCU is concerned, it is always possible to carry out an update
+within an RCU read-side critical section. For example, that RCU
+read-side critical section might search for a given data element, and
+then might acquire the update-side spinlock in order to update that
+element, all while remaining in that RCU read-side critical section. Of
+course, it is necessary to exit the RCU read-side critical section
+before invoking synchronize_rcu(), however, this inconvenience can
+be avoided through use of the call_rcu() and kfree_rcu() API
+members described later in this document.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But how does the upgrade-to-write operation exclude other readers? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| It doesn't, just like normal RCU updates, which also do not exclude |
+| RCU readers. |
++-----------------------------------------------------------------------+
+
+This guarantee allows lookup code to be shared between read-side and
+update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+Fundamental Non-Requirements
+----------------------------
+
+RCU provides extremely lightweight readers, and its read-side
+guarantees, though quite useful, are correspondingly lightweight. It is
+therefore all too easy to assume that RCU is guaranteeing more than it
+really is. Of course, the list of things that RCU does not guarantee is
+infinitely long, however, the following sections list a few
+non-guarantees that have caused confusion. Except where otherwise noted,
+these non-guarantees were premeditated.
+
+#. `Readers Impose Minimal Ordering`_
+#. `Readers Do Not Exclude Updaters`_
+#. `Updaters Only Wait For Old Readers`_
+#. `Grace Periods Don't Partition Read-Side Critical Sections`_
+#. `Read-Side Critical Sections Don't Partition Grace Periods`_
+
+Readers Impose Minimal Ordering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Reader-side markers such as rcu_read_lock() and
+rcu_read_unlock() provide absolutely no ordering guarantees except
+through their interaction with the grace-period APIs such as
+synchronize_rcu(). To see this, consider the following pair of
+threads:
+
+ ::
+
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(x, 1);
+ 5 rcu_read_unlock();
+ 6 rcu_read_lock();
+ 7 WRITE_ONCE(y, 1);
+ 8 rcu_read_unlock();
+ 9 }
+ 10
+ 11 void thread1(void)
+ 12 {
+ 13 rcu_read_lock();
+ 14 r1 = READ_ONCE(y);
+ 15 rcu_read_unlock();
+ 16 rcu_read_lock();
+ 17 r2 = READ_ONCE(x);
+ 18 rcu_read_unlock();
+ 19 }
+
+After thread0() and thread1() execute concurrently, it is quite
+possible to have
+
+ ::
+
+ (r1 == 1 && r2 == 0)
+
+(that is, ``y`` appears to have been assigned before ``x``), which would
+not be possible if rcu_read_lock() and rcu_read_unlock() had
+much in the way of ordering properties. But they do not, so the CPU is
+within its rights to do significant reordering. This is by design: Any
+significant ordering constraints would slow down these fast-path APIs.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Can't the compiler also reorder this code? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| No, the volatile casts in READ_ONCE() and WRITE_ONCE() |
+| prevent the compiler from reordering in this particular case. |
++-----------------------------------------------------------------------+
+
+Readers Do Not Exclude Updaters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Neither rcu_read_lock() nor rcu_read_unlock() exclude updates.
+All they do is to prevent grace periods from ending. The following
+example illustrates this:
+
+ ::
+
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 r1 = READ_ONCE(y);
+ 5 if (r1) {
+ 6 do_something_with_nonzero_x();
+ 7 r2 = READ_ONCE(x);
+ 8 WARN_ON(!r2); /* BUG!!! */
+ 9 }
+ 10 rcu_read_unlock();
+ 11 }
+ 12
+ 13 void thread1(void)
+ 14 {
+ 15 spin_lock(&my_lock);
+ 16 WRITE_ONCE(x, 1);
+ 17 WRITE_ONCE(y, 1);
+ 18 spin_unlock(&my_lock);
+ 19 }
+
+If the thread0() function's rcu_read_lock() excluded the
+thread1() function's update, the WARN_ON() could never fire. But
+the fact is that rcu_read_lock() does not exclude much of anything
+aside from subsequent grace periods, of which thread1() has none, so
+the WARN_ON() can and does fire.
+
+Updaters Only Wait For Old Readers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It might be tempting to assume that after synchronize_rcu()
+completes, there are no readers executing. This temptation must be
+avoided because new readers can start immediately after
+synchronize_rcu() starts, and synchronize_rcu() is under no
+obligation to wait for these new readers.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Suppose that synchronize_rcu() did wait until *all* readers had |
+| completed instead of waiting only on pre-existing readers. For how |
+| long would the updater be able to rely on there being no readers? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| For no time at all. Even if synchronize_rcu() were to wait until |
+| all readers had completed, a new reader might start immediately after |
+| synchronize_rcu() completed. Therefore, the code following |
+| synchronize_rcu() can *never* rely on there being no readers. |
++-----------------------------------------------------------------------+
+
+Grace Periods Don't Partition Read-Side Critical Sections
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections. An example of this
+situation can be illustrated as follows, where ``x``, ``y``, and ``z``
+are initially all zero:
+
+ ::
+
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+ 10 {
+ 11 r1 = READ_ONCE(a);
+ 12 synchronize_rcu();
+ 13 WRITE_ONCE(c, 1);
+ 14 }
+ 15
+ 16 void thread2(void)
+ 17 {
+ 18 rcu_read_lock();
+ 19 r2 = READ_ONCE(b);
+ 20 r3 = READ_ONCE(c);
+ 21 rcu_read_unlock();
+ 22 }
+
+It turns out that the outcome:
+
+ ::
+
+ (r1 == 1 && r2 == 0 && r3 == 1)
+
+is entirely possible. The following figure show how this can happen,
+with each circled ``QS`` indicating the point at which RCU recorded a
+*quiescent state* for each thread, that is, a state in which RCU knows
+that the thread cannot be in the midst of an RCU read-side critical
+section that started before the current grace period:
+
+.. kernel-figure:: GPpartitionReaders1.svg
+
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first grace
+period is known to end before the second grace period starts:
+
+ ::
+
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+ 10 {
+ 11 r1 = READ_ONCE(a);
+ 12 synchronize_rcu();
+ 13 WRITE_ONCE(c, 1);
+ 14 }
+ 15
+ 16 void thread2(void)
+ 17 {
+ 18 r2 = READ_ONCE(c);
+ 19 synchronize_rcu();
+ 20 WRITE_ONCE(d, 1);
+ 21 }
+ 22
+ 23 void thread3(void)
+ 24 {
+ 25 rcu_read_lock();
+ 26 r3 = READ_ONCE(b);
+ 27 r4 = READ_ONCE(d);
+ 28 rcu_read_unlock();
+ 29 }
+
+Here, if ``(r1 == 1)``, then thread0()'s write to ``b`` must happen
+before the end of thread1()'s grace period. If in addition
+``(r4 == 1)``, then thread3()'s read from ``b`` must happen after
+the beginning of thread2()'s grace period. If it is also the case
+that ``(r2 == 1)``, then the end of thread1()'s grace period must
+precede the beginning of thread2()'s grace period. This mean that
+the two RCU read-side critical sections cannot overlap, guaranteeing
+that ``(r3 == 1)``. As a result, the outcome:
+
+ ::
+
+ (r1 == 1 && r2 == 1 && r3 == 0 && r4 == 1)
+
+cannot happen.
+
+This non-requirement was also non-premeditated, but became apparent when
+studying RCU's interaction with memory ordering.
+
+Read-Side Critical Sections Don't Partition Grace Periods
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap. However, this temptation leads nowhere good, as can be
+illustrated by the following, with all variables initially zero:
+
+ ::
+
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+ 10 {
+ 11 r1 = READ_ONCE(a);
+ 12 synchronize_rcu();
+ 13 WRITE_ONCE(c, 1);
+ 14 }
+ 15
+ 16 void thread2(void)
+ 17 {
+ 18 rcu_read_lock();
+ 19 WRITE_ONCE(d, 1);
+ 20 r2 = READ_ONCE(c);
+ 21 rcu_read_unlock();
+ 22 }
+ 23
+ 24 void thread3(void)
+ 25 {
+ 26 r3 = READ_ONCE(d);
+ 27 synchronize_rcu();
+ 28 WRITE_ONCE(e, 1);
+ 29 }
+ 30
+ 31 void thread4(void)
+ 32 {
+ 33 rcu_read_lock();
+ 34 r4 = READ_ONCE(b);
+ 35 r5 = READ_ONCE(e);
+ 36 rcu_read_unlock();
+ 37 }
+
+In this case, the outcome:
+
+ ::
+
+ (r1 == 1 && r2 == 1 && r3 == 1 && r4 == 0 && r5 == 1)
+
+is entirely possible, as illustrated below:
+
+.. kernel-figure:: ReadersPartitionGP1.svg
+
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire grace
+period. As a result, an RCU read-side critical section cannot partition
+a pair of RCU grace periods.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| How long a sequence of grace periods, each separated by an RCU |
+| read-side critical section, would be required to partition the RCU |
+| read-side critical sections at the beginning and end of the chain? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| In theory, an infinite number. In practice, an unknown number that is |
+| sensitive to both implementation details and timing considerations. |
+| Therefore, even in practice, RCU users must abide by the theoretical |
+| rather than the practical answer. |
++-----------------------------------------------------------------------+
+
+Parallelism Facts of Life
+-------------------------
+
+These parallelism facts of life are by no means specific to RCU, but the
+RCU implementation must abide by them. They therefore bear repeating:
+
+#. Any CPU or task may be delayed at any time, and any attempts to avoid
+ these delays by disabling preemption, interrupts, or whatever are
+ completely futile. This is most obvious in preemptible user-level
+ environments and in virtualized environments (where a given guest
+ OS's VCPUs can be preempted at any time by the underlying
+ hypervisor), but can also happen in bare-metal environments due to
+ ECC errors, NMIs, and other hardware events. Although a delay of more
+ than about 20 seconds can result in splats, the RCU implementation is
+ obligated to use algorithms that can tolerate extremely long delays,
+ but where “extremely long” is not long enough to allow wrap-around
+ when incrementing a 64-bit counter.
+#. Both the compiler and the CPU can reorder memory accesses. Where it
+ matters, RCU must use compiler directives and memory-barrier
+ instructions to preserve ordering.
+#. Conflicting writes to memory locations in any given cache line will
+ result in expensive cache misses. Greater numbers of concurrent
+ writes and more-frequent concurrent writes will result in more
+ dramatic slowdowns. RCU is therefore obligated to use algorithms that
+ have sufficient locality to avoid significant performance and
+ scalability problems.
+#. As a rough rule of thumb, only one CPU's worth of processing may be
+ carried out under the protection of any given exclusive lock. RCU
+ must therefore use scalable locking designs.
+#. Counters are finite, especially on 32-bit systems. RCU's use of
+ counters must therefore tolerate counter wrap, or be designed such
+ that counter wrap would take way more time than a single system is
+ likely to run. An uptime of ten years is quite possible, a runtime of
+ a century much less so. As an example of the latter, RCU's
+ dyntick-idle nesting counter allows 54 bits for interrupt nesting
+ level (this counter is 64 bits even on a 32-bit system). Overflowing
+ this counter requires 2\ :sup:`54` half-interrupts on a given CPU
+ without that CPU ever going idle. If a half-interrupt happened every
+ microsecond, it would take 570 years of runtime to overflow this
+ counter, which is currently believed to be an acceptably long time.
+#. Linux systems can have thousands of CPUs running a single Linux
+ kernel in a single shared-memory environment. RCU must therefore pay
+ close attention to high-end scalability.
+
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life. The idea that Linux might
+scale to systems with thousands of CPUs would have been met with some
+skepticism in the 1990s, but these requirements would have otherwise
+have been unsurprising, even in the early 1990s.
+
+Quality-of-Implementation Requirements
+--------------------------------------
+
+These sections list quality-of-implementation requirements. Although an
+RCU implementation that ignores these requirements could still be used,
+it would likely be subject to limitations that would make it
+inappropriate for industrial-strength production use. Classes of
+quality-of-implementation requirements are as follows:
+
+#. `Specialization`_
+#. `Performance and Scalability`_
+#. `Forward Progress`_
+#. `Composability`_
+#. `Corner Cases`_
+
+These classes is covered in the following sections.
+
+Specialization
+~~~~~~~~~~~~~~
+
+RCU is and always has been intended primarily for read-mostly
+situations, which means that RCU's read-side primitives are optimized,
+often at the expense of its update-side primitives. Experience thus far
+is captured by the following list of situations:
+
+#. Read-mostly data, where stale and inconsistent data is not a problem:
+ RCU works great!
+#. Read-mostly data, where data must be consistent: RCU works well.
+#. Read-write data, where data must be consistent: RCU *might* work OK.
+ Or not.
+#. Write-mostly data, where data must be consistent: RCU is very
+ unlikely to be the right tool for the job, with the following
+ exceptions, where RCU can provide:
+
+ a. Existence guarantees for update-friendly mechanisms.
+ b. Wait-free read-side primitives for real-time use.
+
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives. For example, the add_gp() and
+remove_gp_synchronous() examples discussed earlier use RCU to
+protect readers and locking to coordinate updaters. However, the need
+extends much farther, requiring that a variety of synchronization
+primitives be legal within RCU read-side critical sections, including
+spinlocks, sequence locks, atomic operations, reference counters, and
+memory barriers.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| What about sleeping locks? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| These are forbidden within Linux-kernel RCU read-side critical |
+| sections because it is not legal to place a quiescent state (in this |
+| case, voluntary context switch) within an RCU read-side critical |
+| section. However, sleeping locks may be used within userspace RCU |
+| read-side critical sections, and also within Linux-kernel sleepable |
+| RCU `(SRCU) <Sleepable RCU_>`__ read-side critical sections. In |
+| addition, the -rt patchset turns spinlocks into a sleeping locks so |
+| that the corresponding critical sections can be preempted, which also |
+| means that these sleeplockified spinlocks (but not other sleeping |
+| locks!) may be acquire within -rt-Linux-kernel RCU read-side critical |
+| sections. |
+| Note that it *is* legal for a normal RCU read-side critical section |
+| to conditionally acquire a sleeping locks (as in |
+| mutex_trylock()), but only as long as it does not loop |
+| indefinitely attempting to conditionally acquire that sleeping locks. |
+| The key point is that things like mutex_trylock() either return |
+| with the mutex held, or return an error indication if the mutex was |
+| not immediately available. Either way, mutex_trylock() returns |
+| immediately without sleeping. |
++-----------------------------------------------------------------------+
+
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode, with
+network routing being the poster child. Internet routing algorithms take
+significant time to propagate updates, so that by the time an update
+arrives at a given system, that system has been sending network traffic
+the wrong way for a considerable length of time. Having a few threads
+continue to send traffic the wrong way for a few more milliseconds is
+clearly not a problem: In the worst case, TCP retransmissions will
+eventually get the data where it needs to go. In general, when tracking
+the state of the universe outside of the computer, some level of
+inconsistency must be tolerated due to speed-of-light delays if nothing
+else.
+
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veterinarians might use heartbeat to determine
+whether or not a given cat was alive. But how long should they wait
+after the last heartbeat to decide that the cat is in fact dead? Waiting
+less than 400 milliseconds makes no sense because this would mean that a
+relaxed cat would be considered to cycle between death and life more
+than 100 times per minute. Moreover, just as with human beings, a cat's
+heart might stop for some period of time, so the exact wait period is a
+judgment call. One of our pair of veterinarians might wait 30 seconds
+before pronouncing the cat dead, while the other might insist on waiting
+a full minute. The two veterinarians would then disagree on the state of
+the cat during the final 30 seconds of the minute following the last
+heartbeat.
+
+Interestingly enough, this same situation applies to hardware. When push
+comes to shove, how do we tell whether or not some external server has
+failed? We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time. Policy decisions
+can usually tolerate short periods of inconsistency. The policy was
+decided some time ago, and is only now being put into effect, so a few
+milliseconds of delay is normally inconsequential.
+
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore ID
+to the corresponding in-kernel data structure is protected by RCU, but
+it is absolutely forbidden to update a semaphore that has just been
+removed. In the Linux kernel, this need for consistency is accommodated
+by acquiring spinlocks located in the in-kernel data structure from
+within the RCU read-side critical section, and this is indicated by the
+green box in the figure above. Many other techniques may be used, and
+are in fact used within the Linux kernel.
+
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows the
+right mix of synchronization tools to be used for a given job.
+
+Performance and Scalability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Energy efficiency is a critical component of performance today, and
+Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs. I cannot claim that this requirement was
+premeditated. In fact, I learned of it during a telephone conversation
+in which I was given “frank and open” feedback on the importance of
+energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts. So much so that
+mere Linux-kernel-mailing-list posts are insufficient to vent their ire.
+
+Memory consumption is not particularly important for in most situations,
+and has become decreasingly so as memory sizes have expanded and memory
+costs have plummeted. However, as I learned from Matt Mackall's
+`bloatwatch <http://elinux.org/Linux_Tiny-FAQ>`__ efforts, memory
+footprint is critically important on single-CPU systems with
+non-preemptible (``CONFIG_PREEMPTION=n``) kernels, and thus `tiny
+RCU <https://lore.kernel.org/r/20090113221724.GA15307@linux.vnet.ibm.com>`__
+was born. Josh Triplett has since taken over the small-memory banner
+with his `Linux kernel tinification <https://tiny.wiki.kernel.org/>`__
+project, which resulted in `SRCU <Sleepable RCU_>`__ becoming optional
+for those kernels not needing it.
+
+The remaining performance requirements are, for the most part,
+unsurprising. For example, in keeping with RCU's read-side
+specialization, rcu_dereference() should have negligible overhead
+(for example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, rcu_read_lock() and
+rcu_read_unlock() should have exactly zero overhead.
+
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), rcu_read_lock() and
+rcu_read_unlock() should have minimal overhead. In particular, they
+should not contain atomic read-modify-write operations, memory-barrier
+instructions, preemption disabling, interrupt disabling, or backwards
+branches. However, in the case where the RCU read-side critical section
+was preempted, rcu_read_unlock() may acquire spinlocks and disable
+interrupts. This is why it is better to nest an RCU read-side critical
+section within a preempt-disable region than vice versa, at least in
+cases where that critical section is short enough to avoid unduly
+degrading real-time latencies.
+
+The synchronize_rcu() grace-period-wait primitive is optimized for
+throughput. It may therefore incur several milliseconds of latency in
+addition to the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+synchronize_rcu() are required to use batching optimizations so that
+they can be satisfied by a single underlying grace-period-wait
+operation. For example, in the Linux kernel, it is not unusual for a
+single grace-period-wait operation to serve more than `1,000 separate
+invocations <https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response>`__
+of synchronize_rcu(), thus amortizing the per-invocation overhead
+down to nearly zero. However, the grace-period optimization is also
+required to avoid measurable degradation of real-time scheduling and
+interrupt latencies.
+
+In some cases, the multi-millisecond synchronize_rcu() latencies are
+unacceptable. In these cases, synchronize_rcu_expedited() may be
+used instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short. There are currently no special latency
+requirements for synchronize_rcu_expedited() on large systems, but,
+consistent with the empirical nature of the RCU specification, that is
+subject to change. However, there most definitely are scalability
+requirements: A storm of synchronize_rcu_expedited() invocations on
+4096 CPUs should at least make reasonable forward progress. In return
+for its shorter latencies, synchronize_rcu_expedited() is permitted
+to impose modest degradation of real-time latency on non-idle online
+CPUs. Here, “modest” means roughly the same latency degradation as a
+scheduling-clock interrupt.
+
+There are a number of situations where even
+synchronize_rcu_expedited()'s reduced grace-period latency is
+unacceptable. In these situations, the asynchronous call_rcu() can
+be used in place of synchronize_rcu() as follows:
+
+ ::
+
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9 struct foo *p = container_of(rhp, struct foo, rh);
+ 10
+ 11 kfree(p);
+ 12 }
+ 13
+ 14 bool remove_gp_asynchronous(void)
+ 15 {
+ 16 struct foo *p;
+ 17
+ 18 spin_lock(&gp_lock);
+ 19 p = rcu_access_pointer(gp);
+ 20 if (!p) {
+ 21 spin_unlock(&gp_lock);
+ 22 return false;
+ 23 }
+ 24 rcu_assign_pointer(gp, NULL);
+ 25 call_rcu(&p->rh, remove_gp_cb);
+ 26 spin_unlock(&gp_lock);
+ 27 return true;
+ 28 }
+
+A definition of ``struct foo`` is finally needed, and appears on
+lines 1-5. The function remove_gp_cb() is passed to call_rcu()
+on line 25, and will be invoked after the end of a subsequent grace
+period. This gets the same effect as remove_gp_synchronous(), but
+without forcing the updater to wait for a grace period to elapse. The
+call_rcu() function may be used in a number of situations where
+neither synchronize_rcu() nor synchronize_rcu_expedited() would
+be legal, including within preempt-disable code, local_bh_disable()
+code, interrupt-disable code, and interrupt handlers. However, even
+call_rcu() is illegal within NMI handlers and from idle and offline
+CPUs. The callback function (remove_gp_cb() in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel, either within a real softirq handler or under the
+protection of local_bh_disable(). In both the Linux kernel and in
+userspace, it is bad practice to write an RCU callback function that
+takes too long. Long-running operations should be relegated to separate
+threads or (in the Linux kernel) workqueues.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why does line 19 use rcu_access_pointer()? After all, |
+| call_rcu() on line 25 stores into the structure, which would |
+| interact badly with concurrent insertions. Doesn't this mean that |
+| rcu_dereference() is required? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Presumably the ``->gp_lock`` acquired on line 18 excludes any |
+| changes, including any insertions that rcu_dereference() would |
+| protect against. Therefore, any insertions will be delayed until |
+| after ``->gp_lock`` is released on line 25, which in turn means that |
+| rcu_access_pointer() suffices. |
++-----------------------------------------------------------------------+
+
+However, all that remove_gp_cb() is doing is invoking kfree() on
+the data element. This is a common idiom, and is supported by
+kfree_rcu(), which allows “fire and forget” operation as shown
+below:
+
+ ::
+
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9 struct foo *p;
+ 10
+ 11 spin_lock(&gp_lock);
+ 12 p = rcu_dereference(gp);
+ 13 if (!p) {
+ 14 spin_unlock(&gp_lock);
+ 15 return false;
+ 16 }
+ 17 rcu_assign_pointer(gp, NULL);
+ 18 kfree_rcu(p, rh);
+ 19 spin_unlock(&gp_lock);
+ 20 return true;
+ 21 }
+
+Note that remove_gp_faf() simply invokes kfree_rcu() and
+proceeds, without any need to pay any further attention to the
+subsequent grace period and kfree(). It is permissible to invoke
+kfree_rcu() from the same environments as for call_rcu().
+Interestingly enough, DYNIX/ptx had the equivalents of call_rcu()
+and kfree_rcu(), but not synchronize_rcu(). This was due to the
+fact that RCU was not heavily used within DYNIX/ptx, so the very few
+places that needed something like synchronize_rcu() simply
+open-coded it.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Earlier it was claimed that call_rcu() and kfree_rcu() |
+| allowed updaters to avoid being blocked by readers. But how can that |
+| be correct, given that the invocation of the callback and the freeing |
+| of the memory (respectively) must still wait for a grace period to |
+| elapse? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| We could define things this way, but keep in mind that this sort of |
+| definition would say that updates in garbage-collected languages |
+| cannot complete until the next time the garbage collector runs, which |
+| does not seem at all reasonable. The key point is that in most cases, |
+| an updater using either call_rcu() or kfree_rcu() can proceed |
+| to the next update as soon as it has invoked call_rcu() or |
+| kfree_rcu(), without having to wait for a subsequent grace |
+| period. |
++-----------------------------------------------------------------------+
+
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks that can
+be carried out in the meantime? The polling-style
+get_state_synchronize_rcu() and cond_synchronize_rcu() functions
+may be used for this purpose, as shown below:
+
+ ::
+
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3 struct foo *p;
+ 4 unsigned long s;
+ 5
+ 6 spin_lock(&gp_lock);
+ 7 p = rcu_access_pointer(gp);
+ 8 if (!p) {
+ 9 spin_unlock(&gp_lock);
+ 10 return false;
+ 11 }
+ 12 rcu_assign_pointer(gp, NULL);
+ 13 spin_unlock(&gp_lock);
+ 14 s = get_state_synchronize_rcu();
+ 15 do_something_while_waiting();
+ 16 cond_synchronize_rcu(s);
+ 17 kfree(p);
+ 18 return true;
+ 19 }
+
+On line 14, get_state_synchronize_rcu() obtains a “cookie” from RCU,
+then line 15 carries out other tasks, and finally, line 16 returns
+immediately if a grace period has elapsed in the meantime, but otherwise
+waits as required. The need for ``get_state_synchronize_rcu`` and
+cond_synchronize_rcu() has appeared quite recently, so it is too
+early to tell whether they will stand the test of time.
+
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+Forward Progress
+~~~~~~~~~~~~~~~~
+
+In theory, delaying grace-period completion and callback invocation is
+harmless. In practice, not only are memory sizes finite but also
+callbacks sometimes do wakeups, and sufficiently deferred wakeups can be
+difficult to distinguish from system hangs. Therefore, RCU must provide
+a number of mechanisms to promote forward progress.
+
+These mechanisms are not foolproof, nor can they be. For one simple
+example, an infinite loop in an RCU read-side critical section must by
+definition prevent later grace periods from ever completing. For a more
+involved example, consider a 64-CPU system built with
+``CONFIG_RCU_NOCB_CPU=y`` and booted with ``rcu_nocbs=1-63``, where
+CPUs 1 through 63 spin in tight loops that invoke call_rcu(). Even
+if these tight loops also contain calls to cond_resched() (thus
+allowing grace periods to complete), CPU 0 simply will not be able to
+invoke callbacks as fast as the other 63 CPUs can register them, at
+least not until the system runs out of memory. In both of these
+examples, the Spiderman principle applies: With great power comes great
+responsibility. However, short of this level of abuse, RCU is required
+to ensure timely completion of grace periods and timely invocation of
+callbacks.
+
+RCU takes the following steps to encourage timely completion of grace
+periods:
+
+#. If a grace period fails to complete within 100 milliseconds, RCU
+ causes future invocations of cond_resched() on the holdout CPUs
+ to provide an RCU quiescent state. RCU also causes those CPUs'
+ need_resched() invocations to return ``true``, but only after the
+ corresponding CPU's next scheduling-clock.
+#. CPUs mentioned in the ``nohz_full`` kernel boot parameter can run
+ indefinitely in the kernel without scheduling-clock interrupts, which
+ defeats the above need_resched() strategem. RCU will therefore
+ invoke resched_cpu() on any ``nohz_full`` CPUs still holding out
+ after 109 milliseconds.
+#. In kernels built with ``CONFIG_RCU_BOOST=y``, if a given task that
+ has been preempted within an RCU read-side critical section is
+ holding out for more than 500 milliseconds, RCU will resort to
+ priority boosting.
+#. If a CPU is still holding out 10 seconds into the grace period, RCU
+ will invoke resched_cpu() on it regardless of its ``nohz_full``
+ state.
+
+The above values are defaults for systems running with ``HZ=1000``. They
+will vary as the value of ``HZ`` varies, and can also be changed using
+the relevant Kconfig options and kernel boot parameters. RCU currently
+does not do much sanity checking of these parameters, so please use
+caution when changing them. Note that these forward-progress measures
+are provided only for RCU, not for `SRCU <Sleepable RCU_>`__ or `Tasks
+RCU`_.
+
+RCU takes the following steps in call_rcu() to encourage timely
+invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has
+10,000 callbacks, or has 10,000 more callbacks than it had the last time
+encouragement was provided:
+
+#. Starts a grace period, if one is not already in progress.
+#. Forces immediate checking for quiescent states, rather than waiting
+ for three milliseconds to have elapsed since the beginning of the
+ grace period.
+#. Immediately tags the CPU's callbacks with their grace period
+ completion numbers, rather than waiting for the ``RCU_SOFTIRQ``
+ handler to get around to it.
+#. Lifts callback-execution batch limits, which speeds up callback
+ invocation at the expense of degrading realtime response.
+
+Again, these are default values when running at ``HZ=1000``, and can be
+overridden. Again, these forward-progress measures are provided only for
+RCU, not for `SRCU <Sleepable RCU_>`__ or `Tasks
+RCU`_. Even for RCU, callback-invocation forward
+progress for ``rcu_nocbs`` CPUs is much less well-developed, in part
+because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke
+call_rcu() relatively infrequently. If workloads emerge that need
+both ``rcu_nocbs`` CPUs and high call_rcu() invocation rates, then
+additional forward-progress work will be required.
+
+Composability
+~~~~~~~~~~~~~
+
+Composability has received much attention in recent years, perhaps in
+part due to the collision of multicore hardware with object-oriented
+techniques designed in single-threaded environments for single-threaded
+use. And in theory, RCU read-side critical sections may be composed, and
+in fact may be nested arbitrarily deeply. In practice, as with all
+real-world implementations of composable constructs, there are
+limitations.
+
+Implementations of RCU for which rcu_read_lock() and
+rcu_read_unlock() generate no code, such as Linux-kernel RCU when
+``CONFIG_PREEMPTION=n``, can be nested arbitrarily deeply. After all, there
+is no overhead. Except that if all these instances of
+rcu_read_lock() and rcu_read_unlock() are visible to the
+compiler, compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first. If the nesting is
+not visible to the compiler, as is the case with mutually recursive
+functions each in its own translation unit, stack overflow will result.
+If the nesting takes the form of loops, perhaps in the guise of tail
+recursion, either the control variable will overflow or (in the Linux
+kernel) you will get an RCU CPU stall warning. Nevertheless, this class
+of RCU implementations is one of the most composable constructs in
+existence.
+
+RCU implementations that explicitly track nesting depth are limited by
+the nesting-depth counter. For example, the Linux kernel's preemptible
+RCU limits nesting to ``INT_MAX``. This should suffice for almost all
+practical purposes. That said, a consecutive pair of RCU read-side
+critical sections between which there is an operation that waits for a
+grace period cannot be enclosed in another RCU read-side critical
+section. This is because it is not legal to wait for a grace period
+within an RCU read-side critical section: To do so would result either
+in deadlock or in RCU implicitly splitting the enclosing RCU read-side
+critical section, neither of which is conducive to a long-lived and
+prosperous kernel.
+
+It is worth noting that RCU is not alone in limiting composability. For
+example, many transactional-memory implementations prohibit composing a
+pair of transactions separated by an irrevocable operation (for example,
+a network receive operation). For another example, lock-based critical
+sections can be composed surprisingly freely, but only if deadlock is
+avoided.
+
+In short, although RCU read-side critical sections are highly
+composable, care is required in some situations, just as is the case for
+any other composable synchronization mechanism.
+
+Corner Cases
+~~~~~~~~~~~~
+
+A given RCU workload might have an endless and intense stream of RCU
+read-side critical sections, perhaps even so intense that there was
+never a point in time during which there was not at least one RCU
+read-side critical section in flight. RCU cannot allow this situation to
+block grace periods: As long as all the RCU read-side critical sections
+are finite, grace periods must also be finite.
+
+That said, preemptible RCU implementations could potentially result in
+RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side critical
+section. This situation can arise only in heavily loaded systems, but
+systems using real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case. That said, the exact requirements on RCU priority boosting will
+likely evolve as more experience accumulates.
+
+Other workloads might have very high update rates. Although one can
+argue that such workloads should instead use something other than RCU,
+the fact remains that RCU must handle such workloads gracefully. This
+requirement is another factor driving batching of grace periods, but it
+is also the driving force behind the checks for large numbers of queued
+RCU callbacks in the call_rcu() code path. Finally, high update
+rates should not delay RCU read-side critical sections, although some
+small read-side delays can occur when using
+synchronize_rcu_expedited(), courtesy of this function's use of
+smp_call_function_single().
+
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of ``close(open(path))`` in a
+tight loop in the early 2000s suddenly provided a much deeper
+appreciation of the high-update-rate corner case. This test also
+motivated addition of some RCU code to react to high update rates, for
+example, if a given CPU finds itself with more than 10,000 RCU callbacks
+queued, it will cause RCU to take evasive action by more aggressively
+starting grace periods and more aggressively forcing completion of
+grace-period processing. This evasive action causes the grace period to
+complete more quickly, but at the cost of restricting RCU's batching
+optimizations, thus increasing the CPU overhead incurred by that grace
+period.
+
+Software-Engineering Requirements
+---------------------------------
+
+Between Murphy's Law and “To err is human”, it is necessary to guard
+against mishaps and misuse:
+
+#. It is all too easy to forget to use rcu_read_lock() everywhere
+ that it is needed, so kernels built with ``CONFIG_PROVE_RCU=y`` will
+ splat if rcu_dereference() is used outside of an RCU read-side
+ critical section. Update-side code can use
+ rcu_dereference_protected(), which takes a `lockdep
+ expression <https://lwn.net/Articles/371986/>`__ to indicate what is
+ providing the protection. If the indicated protection is not
+ provided, a lockdep splat is emitted.
+ Code shared between readers and updaters can use
+ rcu_dereference_check(), which also takes a lockdep expression,
+ and emits a lockdep splat if neither rcu_read_lock() nor the
+ indicated protection is in place. In addition,
+ rcu_dereference_raw() is used in those (hopefully rare) cases
+ where the required protection cannot be easily described. Finally,
+ rcu_read_lock_held() is provided to allow a function to verify
+ that it has been invoked within an RCU read-side critical section. I
+ was made aware of this set of requirements shortly after Thomas
+ Gleixner audited a number of RCU uses.
+#. A given function might wish to check for RCU-related preconditions
+ upon entry, before using any other RCU API. The
+ rcu_lockdep_assert() does this job, asserting the expression in
+ kernels having lockdep enabled and doing nothing otherwise.
+#. It is also easy to forget to use rcu_assign_pointer() and
+ rcu_dereference(), perhaps (incorrectly) substituting a simple
+ assignment. To catch this sort of error, a given RCU-protected
+ pointer may be tagged with ``__rcu``, after which sparse will
+ complain about simple-assignment accesses to that pointer. Arnd
+ Bergmann made me aware of this requirement, and also supplied the
+ needed `patch series <https://lwn.net/Articles/376011/>`__.
+#. Kernels built with ``CONFIG_DEBUG_OBJECTS_RCU_HEAD=y`` will splat if
+ a data element is passed to call_rcu() twice in a row, without a
+ grace period in between. (This error is similar to a double free.)
+ The corresponding ``rcu_head`` structures that are dynamically
+ allocated are automatically tracked, but ``rcu_head`` structures
+ allocated on the stack must be initialized with
+ init_rcu_head_on_stack() and cleaned up with
+ destroy_rcu_head_on_stack(). Similarly, statically allocated
+ non-stack ``rcu_head`` structures must be initialized with
+ init_rcu_head() and cleaned up with destroy_rcu_head().
+ Mathieu Desnoyers made me aware of this requirement, and also
+ supplied the needed
+ `patch <https://lore.kernel.org/r/20100319013024.GA28456@Krystal>`__.
+#. An infinite loop in an RCU read-side critical section will eventually
+ trigger an RCU CPU stall warning splat, with the duration of
+ “eventually” being controlled by the ``RCU_CPU_STALL_TIMEOUT``
+ ``Kconfig`` option, or, alternatively, by the
+ ``rcupdate.rcu_cpu_stall_timeout`` boot/sysfs parameter. However, RCU
+ is not obligated to produce this splat unless there is a grace period
+ waiting on that particular RCU read-side critical section.
+
+ Some extreme workloads might intentionally delay RCU grace periods,
+ and systems running those workloads can be booted with
+ ``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
+ kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
+ stall warnings are counter-productive during sysrq dumps and during
+ panics. RCU therefore supplies the rcu_sysrq_start() and
+ rcu_sysrq_end() API members to be called before and after long
+ sysrq dumps. RCU also supplies the rcu_panic() notifier that is
+ automatically invoked at the beginning of a panic to suppress further
+ RCU CPU stall warnings.
+
+ This requirement made itself known in the early 1990s, pretty much
+ the first time that it was necessary to debug a CPU stall. That said,
+ the initial implementation in DYNIX/ptx was quite generic in
+ comparison with that of Linux.
+
+#. Although it would be very good to detect pointers leaking out of RCU
+ read-side critical sections, there is currently no good way of doing
+ this. One complication is the need to distinguish between pointers
+ leaking and pointers that have been handed off from RCU to some other
+ synchronization mechanism, for example, reference counting.
+#. In kernels built with ``CONFIG_RCU_TRACE=y``, RCU-related information
+ is provided via event tracing.
+#. Open-coded use of rcu_assign_pointer() and rcu_dereference()
+ to create typical linked data structures can be surprisingly
+ error-prone. Therefore, RCU-protected `linked
+ lists <https://lwn.net/Articles/609973/#RCU%20List%20APIs>`__ and,
+ more recently, RCU-protected `hash
+ tables <https://lwn.net/Articles/612100/>`__ are available. Many
+ other special-purpose RCU-protected data structures are available in
+ the Linux kernel and the userspace RCU library.
+#. Some linked structures are created at compile time, but still require
+ ``__rcu`` checking. The RCU_POINTER_INITIALIZER() macro serves
+ this purpose.
+#. It is not necessary to use rcu_assign_pointer() when creating
+ linked structures that are to be published via a single external
+ pointer. The RCU_INIT_POINTER() macro is provided for this task.
+
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found in
+real-world RCU usage.
+
+Linux Kernel Complications
+--------------------------
+
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU. Some of the relevant points of interest are as
+follows:
+
+#. `Configuration`_
+#. `Firmware Interface`_
+#. `Early Boot`_
+#. `Interrupts and NMIs`_
+#. `Loadable Modules`_
+#. `Hotplug CPU`_
+#. `Scheduler and RCU`_
+#. `Tracing and RCU`_
+#. `Accesses to User Memory and RCU`_
+#. `Energy Efficiency`_
+#. `Scheduling-Clock Interrupts and RCU`_
+#. `Memory Efficiency`_
+#. `Performance, Scalability, Response Time, and Reliability`_
+
+This list is probably incomplete, but it does give a feel for the most
+notable Linux-kernel complications. Each of the following sections
+covers one of the above topics.
+
+Configuration
+~~~~~~~~~~~~~
+
+RCU's goal is automatic configuration, so that almost nobody needs to
+worry about RCU's ``Kconfig`` options. And for almost all users, RCU
+does in fact work well “out of the box.”
+
+However, there are specialized use cases that are handled by kernel boot
+parameters and ``Kconfig`` options. Unfortunately, the ``Kconfig``
+system will explicitly ask users about new ``Kconfig`` options, which
+requires almost all of them be hidden behind a ``CONFIG_RCU_EXPERT``
+``Kconfig`` option.
+
+This all should be quite obvious, but the fact remains that Linus
+Torvalds recently had to
+`remind <https://lore.kernel.org/r/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com>`__
+me of this requirement.
+
+Firmware Interface
+~~~~~~~~~~~~~~~~~~
+
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation. Or the
+translation is accurate, but the original message is bogus.
+
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor. If RCU naively believed the firmware, as it
+used to do, it would create too many per-CPU kthreads. Although the
+resulting system will still run correctly, the extra kthreads needlessly
+consume memory and can cause confusion when they show up in ``ps``
+listings.
+
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists. The
+resulting “ghost CPUs” (which are never going to come online) cause a
+number of `interesting
+complications <https://paulmck.livejournal.com/37494.html>`__.
+
+Early Boot
+~~~~~~~~~~
+
+The Linux kernel's boot sequence is an interesting process, and RCU is
+used early, even before rcu_init() is invoked. In fact, a number of
+RCU's primitives can be used as soon as the initial task's
+``task_struct`` is available and the boot CPU's per-CPU variables are
+set up. The read-side primitives (rcu_read_lock(),
+rcu_read_unlock(), rcu_dereference(), and
+rcu_access_pointer()) will operate normally very early on, as will
+rcu_assign_pointer().
+
+Although call_rcu() may be invoked at any time during boot,
+callbacks are not guaranteed to be invoked until after all of RCU's
+kthreads have been spawned, which occurs at early_initcall() time.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full
+initialization cannot occur until after the scheduler has initialized
+itself to the point where RCU can spawn and run its kthreads. In theory,
+it would be possible to invoke callbacks earlier, however, this is not a
+panacea because there would be severe restrictions on what operations
+those callbacks could invoke.
+
+Perhaps surprisingly, synchronize_rcu() and
+synchronize_rcu_expedited(), will operate normally during very early
+boot, the reason being that there is only one CPU and preemption is
+disabled. This means that the call synchronize_rcu() (or friends)
+itself is a quiescent state and thus a grace period, so the early-boot
+implementation can be a no-op.
+
+However, once the scheduler has spawned its first kthread, this early
+boot trick fails for synchronize_rcu() (as well as for
+synchronize_rcu_expedited()) in ``CONFIG_PREEMPTION=y`` kernels. The
+reason is that an RCU read-side critical section might be preempted,
+which means that a subsequent synchronize_rcu() really does have to
+wait for something, as opposed to simply returning immediately.
+Unfortunately, synchronize_rcu() can't do this until all of its
+kthreads are spawned, which doesn't happen until some time during
+early_initcalls() time. But this is no excuse: RCU is nevertheless
+required to correctly handle synchronous grace periods during this time
+period. Once all of its kthreads are up and running, RCU starts running
+normally.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| How can RCU possibly handle grace periods before all of its kthreads |
+| have been spawned??? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Very carefully! |
+| During the “dead zone” between the time that the scheduler spawns the |
+| first task and the time that all of RCU's kthreads have been spawned, |
+| all synchronous grace periods are handled by the expedited |
+| grace-period mechanism. At runtime, this expedited mechanism relies |
+| on workqueues, but during the dead zone the requesting task itself |
+| drives the desired expedited grace period. Because dead-zone |
+| execution takes place within task context, everything works. Once the |
+| dead zone ends, expedited grace periods go back to using workqueues, |
+| as is required to avoid problems that would otherwise occur when a |
+| user task received a POSIX signal while driving an expedited grace |
+| period. |
+| |
+| And yes, this does mean that it is unhelpful to send POSIX signals to |
+| random tasks between the time that the scheduler spawns its first |
+| kthread and the time that RCU's kthreads have all been spawned. If |
+| there ever turns out to be a good reason for sending POSIX signals |
+| during that time, appropriate adjustments will be made. (If it turns |
+| out that POSIX signals are sent during this time for no good reason, |
+| other adjustments will be made, appropriate or otherwise.) |
++-----------------------------------------------------------------------+
+
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+Interrupts and NMIs
+~~~~~~~~~~~~~~~~~~~
+
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions of
+code, as are invocations of call_rcu().
+
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead
+stealthily transitioning back to process context. This trick is
+sometimes used to invoke system calls from inside the kernel. These
+“half-interrupts” mean that RCU has to be very careful about how it
+counts interrupt nesting levels. I learned of this requirement the hard
+way during a rewrite of RCU's dyntick-idle code.
+
+The Linux kernel has non-maskable interrupts (NMIs), and RCU read-side
+critical sections are legal within NMI handlers. Thankfully, RCU
+update-side primitives, including call_rcu(), are prohibited within
+NMI handlers.
+
+The name notwithstanding, some Linux-kernel architectures can have
+nested NMIs, which RCU must handle correctly. Andy Lutomirski `surprised
+me <https://lore.kernel.org/r/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com>`__
+with this requirement; he also kindly surprised me with `an
+algorithm <https://lore.kernel.org/r/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com>`__
+that meets this requirement.
+
+Furthermore, NMI handlers can be interrupted by what appear to RCU to be
+normal interrupts. One way that this can happen is for code that
+directly invokes ct_irq_enter() and ct_irq_exit() to be called
+from an NMI handler. This astonishing fact of life prompted the current
+code structure, which has ct_irq_enter() invoking
+ct_nmi_enter() and ct_irq_exit() invoking ct_nmi_exit().
+And yes, I also learned of this requirement the hard way.
+
+Loadable Modules
+~~~~~~~~~~~~~~~~
+
+The Linux kernel has loadable modules, and these modules can also be
+unloaded. After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault. The module-unload
+functions must therefore cancel any delayed calls to loadable-module
+functions, for example, any outstanding mod_timer() must be dealt
+with via timer_shutdown_sync() or similar.
+
+Unfortunately, there is no way to cancel an RCU callback; once you
+invoke call_rcu(), the callback function is eventually going to be
+invoked, unless the system goes down first. Because it is normally
+considered socially irresponsible to crash the system in response to a
+module unload request, we need some other way to deal with in-flight RCU
+callbacks.
+
+RCU therefore provides rcu_barrier(), which waits until all
+in-flight RCU callbacks have been invoked. If a module uses
+call_rcu(), its exit function should therefore prevent any future
+invocation of call_rcu(), then invoke rcu_barrier(). In theory,
+the underlying module-unload code could invoke rcu_barrier()
+unconditionally, but in practice this would incur unacceptable
+latencies.
+
+Nikita Danilov noted this requirement for an analogous
+filesystem-unmount situation, and Dipankar Sarma incorporated
+rcu_barrier() into RCU. The need for rcu_barrier() for module
+unloading became apparent later.
+
+.. important::
+
+ The rcu_barrier() function is not, repeat,
+ *not*, obligated to wait for a grace period. It is instead only required
+ to wait for RCU callbacks that have already been posted. Therefore, if
+ there are no RCU callbacks posted anywhere in the system,
+ rcu_barrier() is within its rights to return immediately. Even if
+ there are callbacks posted, rcu_barrier() does not necessarily need
+ to wait for a grace period.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Wait a minute! Each RCU callbacks must wait for a grace period to |
+| complete, and rcu_barrier() must wait for each pre-existing |
+| callback to be invoked. Doesn't rcu_barrier() therefore need to |
+| wait for a full grace period if there is even one callback posted |
+| anywhere in the system? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Absolutely not!!! |
+| Yes, each RCU callbacks must wait for a grace period to complete, but |
+| it might well be partly (or even completely) finished waiting by the |
+| time rcu_barrier() is invoked. In that case, rcu_barrier() |
+| need only wait for the remaining portion of the grace period to |
+| elapse. So even if there are quite a few callbacks posted, |
+| rcu_barrier() might well return quite quickly. |
+| |
+| So if you need to wait for a grace period as well as for all |
+| pre-existing callbacks, you will need to invoke both |
+| synchronize_rcu() and rcu_barrier(). If latency is a concern, |
+| you can always use workqueues to invoke them concurrently. |
++-----------------------------------------------------------------------+
+
+Hotplug CPU
+~~~~~~~~~~~
+
+The Linux kernel supports CPU hotplug, which means that CPUs can come
+and go. It is of course illegal to use any RCU API member from an
+offline CPU, with the exception of `SRCU <Sleepable RCU_>`__ read-side
+critical sections. This requirement was present from day one in
+DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug
+implementation is “interesting.”
+
+The Linux-kernel CPU-hotplug implementation has notifiers that are used
+to allow the various kernel subsystems (including RCU) to respond
+appropriately to a given CPU-hotplug operation. Most RCU operations may
+be invoked from CPU-hotplug notifiers, including even synchronous
+grace-period operations such as (synchronize_rcu() and
+synchronize_rcu_expedited()). However, these synchronous operations
+do block and therefore cannot be invoked from notifiers that execute via
+stop_machine(), specifically those between the ``CPUHP_AP_OFFLINE``
+and ``CPUHP_AP_ONLINE`` states.
+
+In addition, all-callback-wait operations such as rcu_barrier() may
+not be invoked from any CPU-hotplug notifier. This restriction is due
+to the fact that there are phases of CPU-hotplug operations where the
+outgoing CPU's callbacks will not be invoked until after the CPU-hotplug
+operation ends, which could also result in deadlock. Furthermore,
+rcu_barrier() blocks CPU-hotplug operations during its execution,
+which results in another type of deadlock when invoked from a CPU-hotplug
+notifier.
+
+Finally, RCU must avoid deadlocks due to interaction between hotplug,
+timers and grace period processing. It does so by maintaining its own set
+of books that duplicate the centrally maintained ``cpu_online_mask``,
+and also by reporting quiescent states explicitly when a CPU goes
+offline. This explicit reporting of quiescent states avoids any need
+for the force-quiescent-state loop (FQS) to report quiescent states for
+offline CPUs. However, as a debugging measure, the FQS loop does splat
+if offline CPUs block an RCU grace period for too long.
+
+An offline CPU's quiescent state will be reported either:
+
+1. As the CPU goes offline using RCU's hotplug notifier (rcutree_report_cpu_dead()).
+2. When grace period initialization (rcu_gp_init()) detects a
+ race either with CPU offlining or with a task unblocking on a leaf
+ ``rcu_node`` structure whose CPUs are all offline.
+
+The CPU-online path (rcutree_report_cpu_starting()) should never need to report
+a quiescent state for an offline CPU. However, as a debugging measure,
+it does emit a warning if a quiescent state was not already reported
+for that CPU.
+
+During the checking/modification of RCU's hotplug bookkeeping, the
+corresponding CPU's leaf node lock is held. This avoids race conditions
+between RCU's hotplug notifier hooks, the grace period initialization
+code, and the FQS loop, all of which refer to or modify this bookkeeping.
+
+Note that grace period initialization (rcu_gp_init()) must carefully sequence
+CPU hotplug scanning with grace period state changes. For example, the
+following race could occur in rcu_gp_init() if rcu_seq_start() were to happen
+after the CPU hotplug scanning::
+
+ CPU0 (rcu_gp_init) CPU1 CPU2
+ --------------------- ---- ----
+ // Hotplug scan first (WRONG ORDER)
+ rcu_for_each_leaf_node(rnp) {
+ rnp->qsmaskinit = rnp->qsmaskinitnext;
+ }
+ rcutree_report_cpu_starting()
+ rnp->qsmaskinitnext |= mask;
+ rcu_read_lock()
+ r0 = *X;
+ r1 = *X;
+ X = NULL;
+ cookie = get_state_synchronize_rcu();
+ // cookie = 8 (future GP)
+ rcu_seq_start(&rcu_state.gp_seq);
+ // gp_seq = 5
+
+ // CPU1 now invisible to this GP!
+ rcu_for_each_node_breadth_first() {
+ rnp->qsmask = rnp->qsmaskinit;
+ // CPU1 not included!
+ }
+
+ // GP completes without CPU1
+ rcu_seq_end(&rcu_state.gp_seq);
+ // gp_seq = 8
+ poll_state_synchronize_rcu(cookie);
+ // Returns true!
+ kfree(r1);
+ r2 = *r0; // USE-AFTER-FREE!
+
+By incrementing ``gp_seq`` first, CPU1's RCU read-side critical section
+is guaranteed to not be missed by CPU2.
+
+Concurrent Quiescent State Reporting for Offline CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+RCU must ensure that CPUs going offline report quiescent states to avoid
+blocking grace periods. This requires careful synchronization to handle
+race conditions
+
+Race condition causing Offline CPU to hang GP
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A race between CPU offlining and new GP initialization (gp_init()) may occur
+because rcu_report_qs_rnp() in rcutree_report_cpu_dead() must temporarily
+release the ``rcu_node`` lock to wake the RCU grace-period kthread::
+
+ CPU1 (going offline) CPU0 (GP kthread)
+ -------------------- -----------------
+ rcutree_report_cpu_dead()
+ rcu_report_qs_rnp()
+ // Must release rnp->lock to wake GP kthread
+ raw_spin_unlock_irqrestore_rcu_node()
+ // Wakes up and starts new GP
+ rcu_gp_init()
+ // First loop:
+ copies qsmaskinitnext->qsmaskinit
+ // CPU1 still in qsmaskinitnext!
+
+ // Second loop:
+ rnp->qsmask = rnp->qsmaskinit
+ mask = rnp->qsmask & ~rnp->qsmaskinitnext
+ // mask is 0! CPU1 still in both masks
+ // Reacquire lock (but too late)
+ rnp->qsmaskinitnext &= ~mask // Finally clears bit
+
+Without ``ofl_lock``, the new grace period includes the offline CPU and waits
+forever for its quiescent state causing a GP hang.
+
+A solution with ofl_lock
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``ofl_lock`` (offline lock) prevents rcu_gp_init() from running during
+the vulnerable window when rcu_report_qs_rnp() has released ``rnp->lock``::
+
+ CPU0 (rcu_gp_init) CPU1 (rcutree_report_cpu_dead)
+ ------------------ ------------------------------
+ rcu_for_each_leaf_node(rnp) {
+ arch_spin_lock(&ofl_lock) -----> arch_spin_lock(&ofl_lock) [BLOCKED]
+
+ // Safe: CPU1 can't interfere
+ rnp->qsmaskinit = rnp->qsmaskinitnext
+
+ arch_spin_unlock(&ofl_lock) ---> // Now CPU1 can proceed
+ } // But snapshot already taken
+
+Another race causing GP hangs in rcu_gpu_init(): Reporting QS for Now-offline CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After the first loop takes an atomic snapshot of online CPUs, as shown above,
+the second loop in rcu_gp_init() detects CPUs that went offline between
+releasing ``ofl_lock`` and acquiring the per-node ``rnp->lock``.
+This detection is crucial because:
+
+1. The CPU might have gone offline after the snapshot but before the second loop
+2. The offline CPU cannot report its own QS if it's already dead
+3. Without this detection, the grace period would wait forever for CPUs that
+ are now offline.
+
+The second loop performs this detection safely::
+
+ rcu_for_each_node_breadth_first(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->qsmask = rnp->qsmaskinit; // Apply the snapshot
+
+ // Detect CPUs offline after snapshot
+ mask = rnp->qsmask & ~rnp->qsmaskinitnext;
+
+ if (mask && rcu_is_leaf_node(rnp))
+ rcu_report_qs_rnp(mask, ...) // Report QS for offline CPUs
+ }
+
+This approach ensures atomicity: quiescent state reporting for offline CPUs
+happens either in rcu_gp_init() (second loop) or in rcutree_report_cpu_dead(),
+never both and never neither. The ``rnp->lock`` held throughout the sequence
+prevents races - rcutree_report_cpu_dead() also acquires this lock when
+clearing ``qsmaskinitnext``, ensuring mutual exclusion.
+
+Scheduler and RCU
+~~~~~~~~~~~~~~~~~
+
+RCU makes use of kthreads, and it is necessary to avoid excessive CPU-time
+accumulation by these kthreads. This requirement was no surprise, but
+RCU's violation of it when running context-switch-heavy workloads when
+built with ``CONFIG_NO_HZ_FULL=y`` `did come as a surprise
+[PDF] <http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf>`__.
+RCU has made good progress towards meeting this requirement, even for
+context-switch-heavy ``CONFIG_NO_HZ_FULL=y`` workloads, but there is
+room for further improvement.
+
+There is no longer any prohibition against holding any of
+scheduler's runqueue or priority-inheritance spinlocks across an
+rcu_read_unlock(), even if interrupts and preemption were enabled
+somewhere within the corresponding RCU read-side critical section.
+Therefore, it is now perfectly legal to execute rcu_read_lock()
+with preemption enabled, acquire one of the scheduler locks, and hold
+that lock across the matching rcu_read_unlock().
+
+Similarly, the RCU flavor consolidation has removed the need for negative
+nesting. The fact that interrupt-disabled regions of code act as RCU
+read-side critical sections implicitly avoids earlier issues that used
+to result in destructive recursion via interrupt handler's use of RCU.
+
+Tracing and RCU
+~~~~~~~~~~~~~~~
+
+It is possible to use tracing on RCU code, but tracing itself uses RCU.
+For this reason, rcu_dereference_raw_check() is provided for use
+by tracing, which avoids the destructive recursion that could otherwise
+ensue. This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing cannot be
+used. The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+Accesses to User Memory and RCU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The kernel needs to access user-space memory, for example, to access data
+referenced by system-call parameters. The get_user() macro does this job.
+
+However, user-space memory might well be paged out, which means that
+get_user() might well page-fault and thus block while waiting for the
+resulting I/O to complete. It would be a very bad thing for the compiler to
+reorder a get_user() invocation into an RCU read-side critical section.
+
+For example, suppose that the source code looked like this:
+
+ ::
+
+ 1 rcu_read_lock();
+ 2 p = rcu_dereference(gp);
+ 3 v = p->value;
+ 4 rcu_read_unlock();
+ 5 get_user(user_v, user_p);
+ 6 do_something_with(v, user_v);
+
+The compiler must not be permitted to transform this source code into
+the following:
+
+ ::
+
+ 1 rcu_read_lock();
+ 2 p = rcu_dereference(gp);
+ 3 get_user(user_v, user_p); // BUG: POSSIBLE PAGE FAULT!!!
+ 4 v = p->value;
+ 5 rcu_read_unlock();
+ 6 do_something_with(v, user_v);
+
+If the compiler did make this transformation in a ``CONFIG_PREEMPTION=n`` kernel
+build, and if get_user() did page fault, the result would be a quiescent
+state in the middle of an RCU read-side critical section. This misplaced
+quiescent state could result in line 4 being a use-after-free access,
+which could be bad for your kernel's actuarial statistics. Similar examples
+can be constructed with the call to get_user() preceding the
+rcu_read_lock().
+
+Unfortunately, get_user() doesn't have any particular ordering properties,
+and in some architectures the underlying ``asm`` isn't even marked
+``volatile``. And even if it was marked ``volatile``, the above access to
+``p->value`` is not volatile, so the compiler would not have any reason to keep
+those two accesses in order.
+
+Therefore, the Linux-kernel definitions of rcu_read_lock() and
+rcu_read_unlock() must act as compiler barriers, at least for outermost
+instances of rcu_read_lock() and rcu_read_unlock() within a nested set
+of RCU read-side critical sections.
+
+Energy Efficiency
+~~~~~~~~~~~~~~~~~
+
+Interrupting idle CPUs is considered socially unacceptable, especially
+by people with battery-powered embedded systems. RCU therefore conserves
+energy by detecting which CPUs are idle, including tracking CPUs that
+have been interrupted from idle. This is a large part of the
+energy-efficiency requirement, so I learned of this via an irate phone
+call.
+
+Because RCU avoids interrupting idle CPUs, it is illegal to execute an
+RCU read-side critical section on an idle CPU. (Kernels built with
+``CONFIG_PROVE_RCU=y`` will splat if you try it.)
+
+It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
+running in userspace. RCU must therefore track ``nohz_full`` userspace
+execution. RCU must therefore be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent any
+time idle and/or executing in userspace.
+
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of which
+was finally able to demonstrate `real energy savings running on real
+hardware
+[PDF] <http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf>`__.
+As noted earlier, I learned of many of these requirements via angry
+phone calls: Flaming me on the Linux-kernel mailing list was apparently
+not sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+Scheduling-Clock Interrupts and RCU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The kernel transitions between in-kernel non-idle execution, userspace
+execution, and the idle loop. Depending on kernel configuration, RCU
+handles these states differently:
+
++-----------------+------------------+------------------+-----------------+
+| ``HZ`` Kconfig | In-Kernel | Usermode | Idle |
++=================+==================+==================+=================+
+| ``HZ_PERIODIC`` | Can rely on | Can rely on | Can rely on |
+| | scheduling-clock | scheduling-clock | RCU's |
+| | interrupt. | interrupt and | dyntick-idle |
+| | | its detection | detection. |
+| | | of interrupt | |
+| | | from usermode. | |
++-----------------+------------------+------------------+-----------------+
+| ``NO_HZ_IDLE`` | Can rely on | Can rely on | Can rely on |
+| | scheduling-clock | scheduling-clock | RCU's |
+| | interrupt. | interrupt and | dyntick-idle |
+| | | its detection | detection. |
+| | | of interrupt | |
+| | | from usermode. | |
++-----------------+------------------+------------------+-----------------+
+| ``NO_HZ_FULL`` | Can only | Can rely on | Can rely on |
+| | sometimes rely | RCU's | RCU's |
+| | on | dyntick-idle | dyntick-idle |
+| | scheduling-clock | detection. | detection. |
+| | interrupt. In | | |
+| | other cases, it | | |
+| | is necessary to | | |
+| | bound kernel | | |
+| | execution times | | |
+| | and/or use | | |
+| | IPIs. | | |
++-----------------+------------------+------------------+-----------------+
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Why can't ``NO_HZ_FULL`` in-kernel execution rely on the |
+| scheduling-clock interrupt, just like ``HZ_PERIODIC`` and |
+| ``NO_HZ_IDLE`` do? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| Because, as a performance optimization, ``NO_HZ_FULL`` does not |
+| necessarily re-enable the scheduling-clock interrupt on entry to each |
+| and every system call. |
++-----------------------------------------------------------------------+
+
+However, RCU must be reliably informed as to whether any given CPU is
+currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that
+CPU is executing in usermode, as discussed
+`earlier <Energy Efficiency_>`__. It also requires that the
+scheduling-clock interrupt be enabled when RCU needs it to be:
+
+#. If a CPU is either idle or executing in usermode, and RCU believes it
+ is non-idle, the scheduling-clock tick had better be running.
+ Otherwise, you will get RCU CPU stall warnings. Or at best, very long
+ (11-second) grace periods, with a pointless IPI waking the CPU from
+ time to time.
+#. If a CPU is in a portion of the kernel that executes RCU read-side
+ critical sections, and RCU believes this CPU to be idle, you will get
+ random memory corruption. **DON'T DO THIS!!!**
+ This is one reason to test with lockdep, which will complain about
+ this sort of thing.
+#. If a CPU is in a portion of the kernel that is absolutely positively
+ no-joking guaranteed to never execute any RCU read-side critical
+ sections, and RCU believes this CPU to be idle, no problem. This
+ sort of thing is used by some architectures for light-weight
+ exception handlers, which can then avoid the overhead of
+ ct_irq_enter() and ct_irq_exit() at exception entry and
+ exit, respectively. Some go further and avoid the entireties of
+ irq_enter() and irq_exit().
+ Just make very sure you are running some of your tests with
+ ``CONFIG_PROVE_RCU=y``, just in case one of your code paths was in
+ fact joking about not doing RCU read-side critical sections.
+#. If a CPU is executing in the kernel with the scheduling-clock
+ interrupt disabled and RCU believes this CPU to be non-idle, and if
+ the CPU goes idle (from an RCU perspective) every few jiffies, no
+ problem. It is usually OK for there to be the occasional gap between
+ idle periods of up to a second or so.
+ If the gap grows too long, you get RCU CPU stall warnings.
+#. If a CPU is either idle or executing in usermode, and RCU believes it
+ to be idle, of course no problem.
+#. If a CPU is executing in the kernel, the kernel code path is passing
+ through quiescent states at a reasonable frequency (preferably about
+ once per few jiffies, but the occasional excursion to a second or so
+ is usually OK) and the scheduling-clock interrupt is enabled, of
+ course no problem.
+ If the gap between a successive pair of quiescent states grows too
+ long, you get RCU CPU stall warnings.
+
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| But what if my driver has a hardware interrupt handler that can run |
+| for many seconds? I cannot invoke schedule() from an hardware |
+| interrupt handler, after all! |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so |
+| often. But given that long-running interrupt handlers can cause other |
+| problems, not least for response time, shouldn't you work to keep |
+| your interrupt handler's runtime within reasonable bounds? |
++-----------------------------------------------------------------------+
+
+But as long as RCU is properly informed of kernel state transitions
+between in-kernel execution, usermode execution, and idle, and as long
+as the scheduling-clock interrupt is enabled when RCU needs it to be,
+you can rest assured that the bugs you encounter will be in some other
+part of RCU or some other part of the kernel!
+
+Memory Efficiency
+~~~~~~~~~~~~~~~~~
+
+Although small-memory non-realtime systems can simply use Tiny RCU, code
+size is only one aspect of memory efficiency. Another aspect is the size
+of the ``rcu_head`` structure used by call_rcu() and
+kfree_rcu(). Although this structure contains nothing more than a
+pair of pointers, it does appear in many RCU-protected data structures,
+including some that are size critical. The ``page`` structure is a case
+in point, as evidenced by the many occurrences of the ``union`` keyword
+within that structure.
+
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the ``rcu_head`` structures that are
+waiting for a grace period to elapse. It is also the reason why
+``rcu_head`` structures do not contain debug information, such as fields
+tracking the file and line of the call_rcu() or kfree_rcu() that
+posted them. Although this information might appear in debug-only kernel
+builds at some point, in the meantime, the ``->func`` field will often
+provide the needed debug information.
+
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures. Returning to the ``page`` structure, the
+``rcu_head`` field shares storage with a great many other structures
+that are used at various points in the corresponding page's lifetime. In
+order to correctly resolve certain `race
+conditions <https://lore.kernel.org/r/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com>`__,
+the Linux kernel's memory-management subsystem needs a particular bit to
+remain zero during all phases of grace-period processing, and that bit
+happens to map to the bottom bit of the ``rcu_head`` structure's
+``->next`` field. RCU makes this guarantee as long as call_rcu() is
+used to post the callback, as opposed to kfree_rcu() or some future
+“lazy” variant of call_rcu() that might one day be created for
+energy-efficiency purposes.
+
+That said, there are limits. RCU requires that the ``rcu_head``
+structure be aligned to a two-byte boundary, and passing a misaligned
+``rcu_head`` structure to one of the call_rcu() family of functions
+will result in a splat. It is therefore necessary to exercise caution
+when packing structures containing fields of type ``rcu_head``. Why not
+a four-byte or even eight-byte alignment requirement? Because the m68k
+architecture provides only two-byte alignment, and thus acts as
+alignment's least common denominator.
+
+The reason for reserving the bottom bit of pointers to ``rcu_head``
+structures is to leave the door open to “lazy” callbacks whose
+invocations can safely be deferred. Deferring invocation could
+potentially have energy-efficiency benefits, but only if the rate of
+non-lazy callbacks decreases significantly for some important workload.
+In the meantime, reserving the bottom bit keeps this option open in case
+it one day becomes useful.
+
+Performance, Scalability, Response Time, and Reliability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Expanding on the `earlier
+discussion <Performance and Scalability_>`__, RCU is used heavily by
+hot code paths in performance-critical portions of the Linux kernel's
+networking, security, virtualization, and scheduling code paths. RCU
+must therefore use efficient implementations, especially in its
+read-side primitives. To that end, it would be good if preemptible RCU's
+implementation of rcu_read_lock() could be inlined, however, doing
+this requires resolving ``#include`` issues with the ``task_struct``
+structure.
+
+The Linux kernel supports hardware configurations with up to 4096 CPUs,
+which means that RCU must be extremely scalable. Algorithms that involve
+frequent acquisitions of global locks or frequent atomic operations on
+global variables simply cannot be tolerated within the RCU
+implementation. RCU therefore makes heavy use of a combining tree based
+on the ``rcu_node`` structure. RCU is required to tolerate all CPUs
+continuously invoking any combination of RCU's runtime primitives with
+minimal per-operation overhead. In fact, in many cases, increasing load
+must *decrease* the per-operation overhead, witness the batching
+optimizations for synchronize_rcu(), call_rcu(),
+synchronize_rcu_expedited(), and rcu_barrier(). As a general
+rule, RCU must cheerfully accept whatever the rest of the Linux kernel
+decides to throw at it.
+
+The Linux kernel is used for real-time workloads, especially in
+conjunction with the `-rt
+patchset <https://wiki.linuxfoundation.org/realtime/>`__. The
+real-time-latency response requirements are such that the traditional
+approach of disabling preemption across RCU read-side critical sections
+is inappropriate. Kernels built with ``CONFIG_PREEMPTION=y`` therefore use
+an RCU implementation that allows RCU read-side critical sections to be
+preempted. This requirement made its presence known after users made it
+clear that an earlier `real-time
+patch <https://lwn.net/Articles/107930/>`__ did not meet their needs, in
+conjunction with some `RCU
+issues <https://lore.kernel.org/r/20050318002026.GA2693@us.ibm.com>`__
+encountered by a very early version of the -rt patchset.
+
+In addition, RCU must make do with a sub-100-microsecond real-time
+latency budget. In fact, on smaller systems with the -rt patchset, the
+Linux kernel provides sub-20-microsecond real-time latencies for the
+whole kernel, including RCU. RCU's scalability and latency must
+therefore be sufficient for these sorts of configurations. To my
+surprise, the sub-100-microsecond real-time latency budget `applies to
+even the largest systems
+[PDF] <http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf>`__,
+up to and including systems with 4096 CPUs. This real-time requirement
+motivated the grace-period kthread, which also simplified handling of a
+number of race conditions.
+
+RCU must avoid degrading real-time response for CPU-bound threads,
+whether executing in usermode (which is one use case for
+``CONFIG_NO_HZ_FULL=y``) or in the kernel. That said, CPU-bound loops in
+the kernel must execute cond_resched() at least once per few tens of
+milliseconds in order to avoid receiving an IPI from RCU.
+
+Finally, RCU's status as a synchronization primitive means that any RCU
+failure can result in arbitrary memory corruption that can be extremely
+difficult to debug. This means that RCU must be extremely reliable,
+which in practice also means that RCU must have an aggressive
+stress-test suite. This stress-test suite is called ``rcutorture``.
+
+Although the need for ``rcutorture`` was no surprise, the current
+immense popularity of the Linux kernel is posing interesting—and perhaps
+unprecedented—validation challenges. To see this, keep in mind that
+there are well over one billion instances of the Linux kernel running
+today, given Android smartphones, Linux-powered televisions, and
+servers. This number can be expected to increase sharply with the advent
+of the celebrated Internet of Things.
+
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime. This bug will be occurring about
+three times per *day* across the installed base. RCU could simply hide
+behind hardware error rates, given that no one should really expect
+their smartphone to last for a million years. However, anyone taking too
+much comfort from this thought should consider the fact that in most
+jurisdictions, a successful multi-year test of a given mechanism, which
+might include a Linux kernel, suffices for a number of types of
+safety-critical certifications. In fact, rumor has it that the Linux
+kernel is already being used in production for safety-critical
+applications. I don't know about you, but I would feel quite bad if a
+bug in RCU killed someone. Which might explain my recent focus on
+validation and verification.
+
+Other RCU Flavors
+-----------------
+
+One of the more surprising things about RCU is that there are now no
+fewer than five *flavors*, or API families. In addition, the primary
+flavor that has been the sole focus up to this point has two different
+implementations, non-preemptible and preemptible. The other four flavors
+are listed below, with requirements for each described in a separate
+section.
+
+#. `Bottom-Half Flavor (Historical)`_
+#. `Sched Flavor (Historical)`_
+#. `Sleepable RCU`_
+#. `Tasks RCU`_
+#. `Tasks Trace RCU`_
+
+Bottom-Half Flavor (Historical)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The RCU-bh flavor of RCU has since been expressed in terms of the other
+RCU flavors as part of a consolidation of the three flavors into a
+single flavor. The read-side API remains, and continues to disable
+softirq and to be accounted for by lockdep. Much of the material in this
+section is therefore strictly historical in nature.
+
+The softirq-disable (AKA “bottom-half”, hence the “_bh” abbreviations)
+flavor of RCU, or *RCU-bh*, was developed by Dipankar Sarma to provide a
+flavor of RCU that could withstand the network-based denial-of-service
+attacks researched by Robert Olsson. These attacks placed so much
+networking load on the system that some of the CPUs never exited softirq
+execution, which in turn prevented those CPUs from ever executing a
+context switch, which, in the RCU implementation of that time, prevented
+grace periods from ever ending. The result was an out-of-memory
+condition and a system hang.
+
+The solution was the creation of RCU-bh, which does
+local_bh_disable() across its read-side critical sections, and which
+uses the transition from one type of softirq processing to another as a
+quiescent state in addition to context switch, idle, user mode, and
+offline. This means that RCU-bh grace periods can complete even when
+some of the CPUs execute in softirq indefinitely, thus allowing
+algorithms based on RCU-bh to withstand network-based denial-of-service
+attacks.
+
+Because rcu_read_lock_bh() and rcu_read_unlock_bh() disable and
+re-enable softirq handlers, any attempt to start a softirq handlers
+during the RCU-bh read-side critical section will be deferred. In this
+case, rcu_read_unlock_bh() will invoke softirq processing, which can
+take considerable time. One can of course argue that this softirq
+overhead should be associated with the code following the RCU-bh
+read-side critical section rather than rcu_read_unlock_bh(), but the
+fact is that most profiling tools cannot be expected to make this sort
+of fine distinction. For example, suppose that a three-millisecond-long
+RCU-bh read-side critical section executes during a time of heavy
+networking load. There will very likely be an attempt to invoke at least
+one softirq handler during that three milliseconds, but any such
+invocation will be delayed until the time of the
+rcu_read_unlock_bh(). This can of course make it appear at first
+glance as if rcu_read_unlock_bh() was executing very slowly.
+
+The `RCU-bh
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes rcu_read_lock_bh(), rcu_read_unlock_bh(), rcu_dereference_bh(),
+rcu_dereference_bh_check(), and rcu_read_lock_bh_held(). However, the
+old RCU-bh update-side APIs are now gone, replaced by synchronize_rcu(),
+synchronize_rcu_expedited(), call_rcu(), and rcu_barrier(). In addition,
+anything that disables bottom halves also marks an RCU-bh read-side
+critical section, including local_bh_disable() and local_bh_enable(),
+local_irq_save() and local_irq_restore(), and so on.
+
+Sched Flavor (Historical)
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The RCU-sched flavor of RCU has since been expressed in terms of the
+other RCU flavors as part of a consolidation of the three flavors into a
+single flavor. The read-side API remains, and continues to disable
+preemption and to be accounted for by lockdep. Much of the material in
+this section is therefore strictly historical in nature.
+
+Before preemptible RCU, waiting for an RCU grace period had the side
+effect of also waiting for all pre-existing interrupt and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that do
+not have this property, given that any point in the code outside of an
+RCU read-side critical section can be a quiescent state. Therefore,
+*RCU-sched* was created, which follows “classic” RCU in that an
+RCU-sched grace period waits for pre-existing interrupt and NMI
+handlers. In kernels built with ``CONFIG_PREEMPTION=n``, the RCU and
+RCU-sched APIs have identical implementations, while kernels built with
+``CONFIG_PREEMPTION=y`` provide a separate implementation for each.
+
+Note well that in ``CONFIG_PREEMPTION=y`` kernels,
+rcu_read_lock_sched() and rcu_read_unlock_sched() disable and
+re-enable preemption, respectively. This means that if there was a
+preemption attempt during the RCU-sched read-side critical section,
+rcu_read_unlock_sched() will enter the scheduler, with all the
+latency and overhead entailed. Just as with rcu_read_unlock_bh(),
+this can make it look as if rcu_read_unlock_sched() was executing
+very slowly. However, the highest-priority task won't be preempted, so
+that task will enjoy low-overhead rcu_read_unlock_sched()
+invocations.
+
+The `RCU-sched
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes rcu_read_lock_sched(), rcu_read_unlock_sched(),
+rcu_read_lock_sched_notrace(), rcu_read_unlock_sched_notrace(),
+rcu_dereference_sched(), rcu_dereference_sched_check(), and
+rcu_read_lock_sched_held(). However, the old RCU-sched update-side APIs
+are now gone, replaced by synchronize_rcu(), synchronize_rcu_expedited(),
+call_rcu(), and rcu_barrier(). In addition, anything that disables
+preemption also marks an RCU-sched read-side critical section,
+including preempt_disable() and preempt_enable(), local_irq_save()
+and local_irq_restore(), and so on.
+
+Sleepable RCU
+~~~~~~~~~~~~~
+
+For well over a decade, someone saying “I need to block within an RCU
+read-side critical section” was a reliable indication that this someone
+did not understand RCU. After all, if you are always blocking in an RCU
+read-side critical section, you can probably afford to use a
+higher-overhead synchronization mechanism. However, that changed with
+the advent of the Linux kernel's notifiers, whose RCU read-side critical
+sections almost never sleep, but sometimes need to. This resulted in the
+introduction of `sleepable RCU <https://lwn.net/Articles/202847/>`__, or
+*SRCU*.
+
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an ``srcu_struct`` structure. A pointer to
+this structure must be passed in to each SRCU function, for example,
+``synchronize_srcu(&ss)``, where ``ss`` is the ``srcu_struct``
+structure. The key benefit of these domains is that a slow SRCU reader
+in one domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code must
+pass a “cookie” from srcu_read_lock() to srcu_read_unlock(), for
+example, as follows:
+
+ ::
+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&ss, idx);
+
+As noted above, it is legal to block within SRCU read-side critical
+sections, however, with great power comes great responsibility. If you
+block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can wait, either directly or indirectly, for that domain's grace
+period to elapse. For example, this results in a self-deadlock:
+
+ ::
+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 synchronize_srcu(&ss);
+ 6 srcu_read_unlock(&ss, idx);
+
+However, if line 5 acquired a mutex that was held across a
+synchronize_srcu() for domain ``ss``, deadlock would still be
+possible. Furthermore, if line 5 acquired a mutex that was held across a
+synchronize_srcu() for some other domain ``ss1``, and if an
+``ss1``-domain SRCU read-side critical section acquired another mutex
+that was held across as ``ss``-domain synchronize_srcu(), deadlock
+would again be possible. Such a deadlock cycle could extend across an
+arbitrarily large number of different SRCU domains. Again, with great
+power comes great responsibility.
+
+Unlike the other RCU flavors, SRCU read-side critical sections can run
+on idle and even offline CPUs. This ability requires that
+srcu_read_lock() and srcu_read_unlock() contain memory barriers,
+which means that SRCU readers will run a bit slower than would RCU
+readers. It also motivates the smp_mb__after_srcu_read_unlock() API,
+which, in combination with srcu_read_unlock(), guarantees a full
+memory barrier.
+
+Also unlike other RCU flavors, synchronize_srcu() may **not** be
+invoked from CPU-hotplug notifiers, due to the fact that SRCU grace
+periods make use of timers and the possibility of timers being
+temporarily “stranded” on the outgoing CPU. This stranding of timers
+means that timers posted to the outgoing CPU will not fire until late in
+the CPU-hotplug process. The problem is that if a notifier is waiting on
+an SRCU grace period, that grace period is waiting on a timer, and that
+timer is stranded on the outgoing CPU, then the notifier will never be
+awakened, in other words, deadlock has occurred. This same situation of
+course also prohibits srcu_barrier() from being invoked from
+CPU-hotplug notifiers.
+
+SRCU also differs from other RCU flavors in that SRCU's expedited and
+non-expedited grace periods are implemented by the same mechanism. This
+means that in the current SRCU implementation, expediting a future grace
+period has the side effect of expediting all prior grace periods that
+have not yet completed. (But please note that this is a property of the
+current implementation, not necessarily of future implementations.) In
+addition, if SRCU has been idle for longer than the interval specified
+by the ``srcutree.exp_holdoff`` kernel boot parameter (25 microseconds
+by default), and if a synchronize_srcu() invocation ends this idle
+period, that invocation will be automatically expedited.
+
+As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating a
+locking bottleneck present in prior kernel versions. Although this will
+allow users to put much heavier stress on call_srcu(), it is
+important to note that SRCU does not yet take any special steps to deal
+with callback flooding. So if you are posting (say) 10,000 SRCU
+callbacks per second per CPU, you are probably totally OK, but if you
+intend to post (say) 1,000,000 SRCU callbacks per second per CPU, please
+run some tests first. SRCU just might need a few adjustment to deal with
+that sort of load. Of course, your mileage may vary based on the speed
+of your CPUs and the size of your memory.
+
+The `SRCU
+API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+includes srcu_read_lock(), srcu_read_unlock(),
+srcu_dereference(), srcu_dereference_check(),
+synchronize_srcu(), synchronize_srcu_expedited(),
+call_srcu(), srcu_barrier(), and srcu_read_lock_held(). It
+also includes DEFINE_SRCU(), DEFINE_STATIC_SRCU(), and
+init_srcu_struct() APIs for defining and initializing
+``srcu_struct`` structures.
+
+More recently, the SRCU API has added polling interfaces:
+
+#. start_poll_synchronize_srcu() returns a cookie identifying
+ the completion of a future SRCU grace period and ensures
+ that this grace period will be started.
+#. poll_state_synchronize_srcu() returns ``true`` iff the
+ specified cookie corresponds to an already-completed
+ SRCU grace period.
+#. get_state_synchronize_srcu() returns a cookie just like
+ start_poll_synchronize_srcu() does, but differs in that
+ it does nothing to ensure that any future SRCU grace period
+ will be started.
+
+These functions are used to avoid unnecessary SRCU grace periods in
+certain types of buffer-cache algorithms having multi-stage age-out
+mechanisms. The idea is that by the time the block has aged completely
+from the cache, an SRCU grace period will be very likely to have elapsed.
+
+Tasks RCU
+~~~~~~~~~
+
+Some forms of tracing use “trampolines” to handle the binary rewriting
+required to install different types of probes. It would be good to be
+able to free old trampolines, which sounds like a job for some form of
+RCU. However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers such
+as rcu_read_lock() and rcu_read_unlock(). In addition, it does
+not work to have these markers in the trampoline itself, because there
+would need to be instructions following rcu_read_unlock(). Although
+synchronize_rcu() would guarantee that execution reached the
+rcu_read_unlock(), it would not be able to guarantee that execution
+had completely left the trampoline. Worse yet, in some situations
+the trampoline's protection must extend a few instructions *prior* to
+execution reaching the trampoline. For example, these few instructions
+might calculate the address of the trampoline, so that entering the
+trampoline would be pre-ordained a surprisingly long time before execution
+actually reached the trampoline itself.
+
+The solution, in the form of `Tasks
+RCU <https://lwn.net/Articles/607117/>`__, is to have implicit read-side
+critical sections that are delimited by voluntary context switches, that
+is, calls to schedule(), cond_resched(), and
+synchronize_rcu_tasks(). In addition, transitions to and from
+userspace execution also delimit tasks-RCU read-side critical sections.
+Idle tasks are ignored by Tasks RCU, and Tasks Rude RCU may be used to
+interact with them.
+
+Note well that involuntary context switches are *not* Tasks-RCU quiescent
+states. After all, in preemptible kernels, a task executing code in a
+trampoline might be preempted. In this case, the Tasks-RCU grace period
+clearly cannot end until that task resumes and its execution leaves that
+trampoline. This means, among other things, that cond_resched() does
+not provide a Tasks RCU quiescent state. (Instead, use rcu_softirq_qs()
+from softirq or rcu_tasks_classic_qs() otherwise.)
+
+The tasks-RCU API is quite compact, consisting only of
+call_rcu_tasks(), synchronize_rcu_tasks(), and
+rcu_barrier_tasks(). In ``CONFIG_PREEMPTION=n`` kernels, trampolines
+cannot be preempted, so these APIs map to call_rcu(),
+synchronize_rcu(), and rcu_barrier(), respectively. In
+``CONFIG_PREEMPTION=y`` kernels, trampolines can be preempted, and these
+three APIs are therefore implemented by separate functions that check
+for voluntary context switches.
+
+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching. This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead. This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker. And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+Once kernel entry/exit and deep-idle functions have been properly tagged
+``noinstr``, Tasks RCU can start paying attention to idle tasks (except
+those that are idle from RCU's perspective) and then Tasks Rude RCU can
+be removed from the kernel.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting solely of synchronize_rcu_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock(). This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers. Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
+Possible Future Changes
+-----------------------
+
+One of the tricks that RCU uses to attain update-side scalability is to
+increase grace-period latency with increasing numbers of CPUs. If this
+becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+rcu_barrier() operations. If there is a strong reason to use
+rcu_barrier() in CPU-hotplug notifiers, it will be necessary to
+avoid disabling CPU hotplug. This would introduce some complexity, so
+there had better be a *very* good reason.
+
+The tradeoff between grace-period latency on the one hand and
+interruptions of other CPUs on the other hand may need to be
+re-examined. The desire is of course for zero grace-period latency as
+well as zero interprocessor interrupts undertaken during an expedited
+grace period operation. While this ideal is unlikely to be achievable,
+it is quite possible that further improvements can be made.
+
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such as
+sockets or cores. Such spreading and alignment is currently believed to
+be unnecessary because the hotpath read-side primitives do not access
+the combining tree, nor does call_rcu() in the common case. If you
+believe that your architecture needs such spreading and alignment, then
+your architecture should also benefit from the
+``rcutree.rcu_fanout_leaf`` boot parameter, which can be set to the
+number of CPUs in a socket, NUMA node, or whatever. If the number of
+CPUs is too large, use a fraction of the number of CPUs. If the number
+of CPUs is a large prime number, well, that certainly is an
+“interesting” architectural choice! More flexible arrangements might be
+considered, but only if ``rcutree.rcu_fanout_leaf`` has proven
+inadequate, and only if the inadequacy has been demonstrated by a
+carefully run and realistic system-level workload.
+
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+RCU's various kthreads are reasonably recent additions. It is quite
+likely that adjustments will be required to more gracefully handle
+extreme loads. It might also be necessary to be able to relate CPU
+utilization by RCU's kthreads and softirq handlers to the code that
+instigated this CPU utilization. For example, RCU callback overhead
+might be charged back to the originating call_rcu() instance, though
+probably not in production kernels.
+
+Additional work may be required to provide reasonable forward-progress
+guarantees under heavy load for grace periods and for callback
+invocation.
+
+Summary
+-------
+
+This document has presented more than two decade's worth of RCU
+requirements. Given that the requirements keep changing, this will not
+be the last word on this subject, but at least it serves to get an
+important subset of the requirements set forth.
+
+Acknowledgments
+---------------
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar, Oleg
+Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and Andy
+Lutomirski for their help in rendering this article human readable, and
+to Michelle Rankin for her support of this effort. Other contributions
+are acknowledged in the Linux kernel's git archive.
diff --git a/Documentation/RCU/NMI-RCU.rst b/Documentation/RCU/NMI-RCU.rst
new file mode 100644
index 000000000000..dff60a80b386
--- /dev/null
+++ b/Documentation/RCU/NMI-RCU.rst
@@ -0,0 +1,123 @@
+.. _NMI_rcu_doc:
+
+Using RCU to Protect Dynamic NMI Handlers
+=========================================
+
+
+Although RCU is usually used to protect read-mostly data structures,
+it is possible to use RCU to provide dynamic non-maskable interrupt
+handlers, as well as dynamic irq handlers. This document describes
+how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
+work in an old version of "arch/x86/kernel/traps.c".
+
+The relevant pieces of code are listed below, each followed by a
+brief explanation::
+
+ static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
+ {
+ return 0;
+ }
+
+The dummy_nmi_callback() function is a "dummy" NMI handler that does
+nothing, but returns zero, thus saying that it did nothing, allowing
+the NMI handler to take the default machine-specific action::
+
+ static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+This nmi_callback variable is a global function pointer to the current
+NMI handler::
+
+ void do_nmi(struct pt_regs * regs, long error_code)
+ {
+ int cpu;
+
+ nmi_enter();
+
+ cpu = smp_processor_id();
+ ++nmi_count(cpu);
+
+ if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
+ default_do_nmi(regs);
+
+ nmi_exit();
+ }
+
+The do_nmi() function processes each NMI. It first disables preemption
+in the same way that a hardware irq would, then increments the per-CPU
+count of NMIs. It then invokes the NMI handler stored in the nmi_callback
+function pointer. If this handler returns zero, do_nmi() invokes the
+default_do_nmi() function to handle a machine-specific NMI. Finally,
+preemption is restored.
+
+In theory, rcu_dereference_sched() is not needed, since this code runs
+only on i386, which in theory does not need rcu_dereference_sched()
+anyway. However, in practice it is a good documentation aid, particularly
+for anyone attempting to do something similar on Alpha or on systems
+with aggressive optimizing compilers.
+
+Quick Quiz:
+ Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only?
+
+:ref:`Answer to Quick Quiz <answer_quick_quiz_NMI>`
+
+Back to the discussion of NMI and RCU::
+
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ rcu_assign_pointer(nmi_callback, callback);
+ }
+
+The set_nmi_callback() function registers an NMI handler. Note that any
+data that is to be used by the callback must be initialized up -before-
+the call to set_nmi_callback(). On architectures that do not order
+writes, the rcu_assign_pointer() ensures that the NMI handler sees the
+initialized values::
+
+ void unset_nmi_callback(void)
+ {
+ rcu_assign_pointer(nmi_callback, dummy_nmi_callback);
+ }
+
+This function unregisters an NMI handler, restoring the original
+dummy_nmi_handler(). However, there may well be an NMI handler
+currently executing on some other CPU. We therefore cannot free
+up any data structures used by the old NMI handler until execution
+of it completes on all other CPUs.
+
+One way to accomplish this is via synchronize_rcu(), perhaps as
+follows::
+
+ unset_nmi_callback();
+ synchronize_rcu();
+ kfree(my_nmi_data);
+
+This works because (as of v4.20) synchronize_rcu() blocks until all
+CPUs complete any preemption-disabled segments of code that they were
+executing.
+Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
+not to return until all ongoing NMI handlers exit. It is therefore safe
+to free up the handler's data as soon as synchronize_rcu() returns.
+
+Important note: for this to work, the architecture in question must
+invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
+
+.. _answer_quick_quiz_NMI:
+
+Answer to Quick Quiz:
+ Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only?
+
+ The caller to set_nmi_callback() might well have
+ initialized some data that is to be used by the new NMI
+ handler. In this case, the rcu_dereference_sched() would
+ be needed, because otherwise a CPU that received an NMI
+ just after the new handler was set might see the pointer
+ to the new NMI handler, but the old pre-initialized
+ version of the handler's data.
+
+ This same sad story can happen on other CPUs when using
+ a compiler with aggressive pointer-value speculation
+ optimizations. (But please don't!)
+
+ More important, the rcu_dereference_sched() makes it
+ clear to someone reading the code that the pointer is
+ being protected by RCU-sched.
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
deleted file mode 100644
index 687777f83b23..000000000000
--- a/Documentation/RCU/NMI-RCU.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-Using RCU to Protect Dynamic NMI Handlers
-
-
-Although RCU is usually used to protect read-mostly data structures,
-it is possible to use RCU to provide dynamic non-maskable interrupt
-handlers, as well as dynamic irq handlers. This document describes
-how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
-work in "arch/x86/oprofile/nmi_timer_int.c" and in
-"arch/x86/kernel/traps.c".
-
-The relevant pieces of code are listed below, each followed by a
-brief explanation.
-
- static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
- {
- return 0;
- }
-
-The dummy_nmi_callback() function is a "dummy" NMI handler that does
-nothing, but returns zero, thus saying that it did nothing, allowing
-the NMI handler to take the default machine-specific action.
-
- static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
-This nmi_callback variable is a global function pointer to the current
-NMI handler.
-
- void do_nmi(struct pt_regs * regs, long error_code)
- {
- int cpu;
-
- nmi_enter();
-
- cpu = smp_processor_id();
- ++nmi_count(cpu);
-
- if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
-
- nmi_exit();
- }
-
-The do_nmi() function processes each NMI. It first disables preemption
-in the same way that a hardware irq would, then increments the per-CPU
-count of NMIs. It then invokes the NMI handler stored in the nmi_callback
-function pointer. If this handler returns zero, do_nmi() invokes the
-default_do_nmi() function to handle a machine-specific NMI. Finally,
-preemption is restored.
-
-In theory, rcu_dereference_sched() is not needed, since this code runs
-only on i386, which in theory does not need rcu_dereference_sched()
-anyway. However, in practice it is a good documentation aid, particularly
-for anyone attempting to do something similar on Alpha or on systems
-with aggressive optimizing compilers.
-
-Quick Quiz: Why might the rcu_dereference_sched() be necessary on Alpha,
- given that the code referenced by the pointer is read-only?
-
-
-Back to the discussion of NMI and RCU...
-
- void set_nmi_callback(nmi_callback_t callback)
- {
- rcu_assign_pointer(nmi_callback, callback);
- }
-
-The set_nmi_callback() function registers an NMI handler. Note that any
-data that is to be used by the callback must be initialized up -before-
-the call to set_nmi_callback(). On architectures that do not order
-writes, the rcu_assign_pointer() ensures that the NMI handler sees the
-initialized values.
-
- void unset_nmi_callback(void)
- {
- rcu_assign_pointer(nmi_callback, dummy_nmi_callback);
- }
-
-This function unregisters an NMI handler, restoring the original
-dummy_nmi_handler(). However, there may well be an NMI handler
-currently executing on some other CPU. We therefore cannot free
-up any data structures used by the old NMI handler until execution
-of it completes on all other CPUs.
-
-One way to accomplish this is via synchronize_sched(), perhaps as
-follows:
-
- unset_nmi_callback();
- synchronize_sched();
- kfree(my_nmi_data);
-
-This works because synchronize_sched() blocks until all CPUs complete
-any preemption-disabled segments of code that they were executing.
-Since NMI handlers disable preemption, synchronize_sched() is guaranteed
-not to return until all ongoing NMI handlers exit. It is therefore safe
-to free up the handler's data as soon as synchronize_sched() returns.
-
-Important note: for this to work, the architecture in question must
-invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
-
-
-Answer to Quick Quiz
-
- Why might the rcu_dereference_sched() be necessary on Alpha, given
- that the code referenced by the pointer is read-only?
-
- Answer: The caller to set_nmi_callback() might well have
- initialized some data that is to be used by the new NMI
- handler. In this case, the rcu_dereference_sched() would
- be needed, because otherwise a CPU that received an NMI
- just after the new handler was set might see the pointer
- to the new NMI handler, but the old pre-initialized
- version of the handler's data.
-
- This same sad story can happen on other CPUs when using
- a compiler with aggressive pointer-value speculation
- optimizations.
-
- More important, the rcu_dereference_sched() makes it
- clear to someone reading the code that the pointer is
- being protected by RCU-sched.
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 9bccf16736f7..8d4e8de4c460 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -201,7 +201,7 @@ work looked at debugging uses of RCU [Seyster:2011:RFA:2075416.2075425].
In 2012, Josh Triplett received his Ph.D. with his dissertation
covering RCU-protected resizable hash tables and the relationship
between memory barriers and read-side traversal order: If the updater
-is making changes in the opposite direction from the read-side traveral
+is making changes in the opposite direction from the read-side traversal
order, the updater need only execute a memory-barrier instruction,
but if in the same direction, the updater needs to wait for a grace
period between the individual updates [JoshTriplettPhD]. Also in 2012,
@@ -641,7 +641,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
,Month="July"
,Year="2001"
,note="Available:
-\url{http://www.linuxsymposium.org/2001/abstracts/readcopy.php}
+\url{https://kernel.org/doc/ols/2001/read-copy.pdf}
\url{http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf}
[Viewed June 23, 2004]"
,annotation={
@@ -683,7 +683,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
,month="October"
,year="2001"
,note="Available:
-\url{http://lkml.org/lkml/2001/10/13/105}
+\url{https://lore.kernel.org/r/Pine.LNX.4.33.0110131015410.8707-100000@penguin.transmeta.com}
[Viewed August 21, 2004]"
,annotation={
}
@@ -826,7 +826,7 @@ Symposium on Distributed Computing}
,month="October"
,year="2002"
,note="Available:
-\url{https://lkml.org/lkml/2002/10/24/262}
+\url{https://lore.kernel.org/r/3DB86B05.447E7410@us.ibm.com}
[Viewed February 15, 2014]"
,annotation={
Mingming Cao's patch to introduce RCU to SysV IPC.
@@ -839,7 +839,7 @@ Symposium on Distributed Computing}
,month="March"
,year="2003"
,note="Available:
-\url{http://lkml.org/lkml/2003/3/9/205}
+\url{https://lore.kernel.org/r/Pine.LNX.4.44.0303091831560.2129-100000@home.transmeta.com}
[Viewed March 13, 2006]"
,annotation={
Linus suggests replacing brlock with RCU and/or seqlocks:
@@ -847,7 +847,7 @@ Symposium on Distributed Computing}
'It's entirely possible that the current user could be replaced
by RCU and/or seqlocks, and we could get rid of brlocks entirely.'
.
- Steve Hemminger responds by replacing them with RCU.
+ Stephen Hemminger responds by replacing them with RCU.
}
}
@@ -1036,15 +1036,15 @@ Add per-cpu batch counter"
,annotation={
RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches,
which may be found at:
- https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines)
- https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch)
- https://lkml.org/lkml/2004/5/25/24 (0/5)
- https://lkml.org/lkml/2004/5/25/23 (1/5)
- https://lkml.org/lkml/2004/5/25/265 (works for Jack)
- https://lkml.org/lkml/2004/5/25/20 (2/5)
- https://lkml.org/lkml/2004/5/25/22 (3/5)
- https://lkml.org/lkml/2004/5/25/19 (4/5)
- https://lkml.org/lkml/2004/5/25/21 (5/5)
+ https://lore.kernel.org/r/40AC9823.6020709@colorfullife.com (split vars into cachelines)
+ https://lore.kernel.org/r/Pine.LNX.4.44.0405222141260.11106-100000@dbl.q-ag.de (cpu_quiet() patch)
+ https://lore.kernel.org/r/200405250535.i4P5ZJo8017583@dbl.q-ag.de (0/5)
+ https://lore.kernel.org/r/200405250535.i4P5ZKAQ017591@dbl.q-ag.de (1/5)
+ https://lore.kernel.org/r/20040525203215.GB5127@sgi.com (works for Jack)
+ https://lore.kernel.org/r/200405250535.i4P5ZLiR017599@dbl.q-ag.de (2/5)
+ https://lore.kernel.org/r/200405250535.i4P5ZMFt017607@dbl.q-ag.de (3/5)
+ https://lore.kernel.org/r/200405250535.i4P5ZN6g017615@dbl.q-ag.de (4/5)
+ https://lore.kernel.org/r/200405250535.i4P5ZO7I017623@dbl.q-ag.de (5/5)
}
}
@@ -1106,7 +1106,7 @@ Oregon Health and Sciences University"
,month="August"
,year="2004"
,note="Available:
-\url{http://lkml.org/lkml/2004/8/6/237}
+\url{https://lore.kernel.org/r/20040807192424.GF3936@in.ibm.com}
[Viewed June 8, 2010]"
,annotation={
Introduce rcu_dereference().
@@ -1119,7 +1119,7 @@ Oregon Health and Sciences University"
,month="August"
,year="2004"
,note="Available:
-\url{http://lkml.org/lkml/2004/8/30/87}
+\url{https://lore.kernel.org/r/1093873222.984.12.camel@new.localdomain}
[Viewed February 17, 2005]"
,annotation={
Uses active code in rcu_read_lock() and rcu_read_unlock() to
@@ -1186,7 +1186,7 @@ Oregon Health and Sciences University"
,month="October"
,year="2004"
,note="Available:
-\url{http://lkml.org/lkml/2004/10/23/241}
+\url{https://lore.kernel.org/r/20041023202723.GA1930@us.ibm.com}
[Viewed June 8, 2010]"
,annotation={
Introduce rcu_assign_pointer().
@@ -1203,7 +1203,7 @@ Oregon Health and Sciences University"
,annotation={
James Morris posts Kaigai Kohei's patch to LKML.
[Viewed December 10, 2004]
- Kaigai's patch is at https://lkml.org/lkml/2004/9/27/52
+ Kaigai's patch is at https://lore.kernel.org/r/200409271057.i8RAvcA1007873@mailsv.bs1.fc.nec.co.jp
}
}
@@ -1241,11 +1241,11 @@ Oregon Health and Sciences University"
,year="2005"
,day="17"
,note="Available:
-\url{http://lkml.org/lkml/2005/3/17/199}
+\url{https://lore.kernel.org/r/20050318002026.GA2693@us.ibm.com}
[Viewed September 5, 2005]"
,annotation={
First posting showing how RCU can be safely adapted for
- preemptable RCU read side critical sections.
+ preemptible RCU read side critical sections.
}
}
@@ -1256,7 +1256,7 @@ Oregon Health and Sciences University"
,year="2005"
,day="18"
,note="Available:
-\url{http://lkml.org/lkml/2005/3/18/122}
+\url{https://lore.kernel.org/r/Pine.OSF.4.05.10503181336310.2466-100000@da410.phys.au.dk}
[Viewed March 30, 2006]"
,annotation={
Esben Neilsen suggests read-side suppression of grace-period
@@ -1302,7 +1302,7 @@ Data Structures"
,month="May"
,year="2005"
,note="Available:
-\url{http://lkml.org/lkml/2005/5/9/185}
+\url{https://lore.kernel.org/r/20050510012444.GA3011@us.ibm.com}
[Viewed May 13, 2005]"
,annotation={
First publication of working lock-based deferred free patches
@@ -1385,7 +1385,7 @@ Data Structures"
,day="1"
,year="2005"
,note="Available:
-\url{http://lkml.org/lkml/2005/8/1/155}
+\url{https://lore.kernel.org/r/20050801171137.GA1754@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First operating counter-based realtime RCU patch posted to LKML.
@@ -1399,7 +1399,7 @@ Data Structures"
,day="8"
,year="2005"
,note="Available:
-\url{http://lkml.org/lkml/2005/8/8/108}
+\url{https://lore.kernel.org/r/20050808144216.GA1307@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First operating counter-based realtime RCU patch posted to LKML,
@@ -1415,7 +1415,7 @@ Data Structures"
,day="1"
,year="2005"
,note="Available:
-\url{http://lkml.org/lkml/2005/10/1/70}
+\url{https://lore.kernel.org/r/20051001182056.GA1613@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First rcutorture patch.
@@ -1429,7 +1429,7 @@ Data Structures"
,day="6"
,year="2006"
,note="Available:
-\url{https://lkml.org/lkml/2006/1/7/22}
+\url{https://lore.kernel.org/r/20060106.231054.43576567.davem@davemloft.net}
[Viewed February 29, 2012]"
,annotation={
David Miller's view on hashed arrays of locks: used to really
@@ -1464,7 +1464,7 @@ Distributed Processing Symposium"
,day="20"
,year="2006"
,note="Available:
-\url{http://lkml.org/lkml/2006/6/20/238}
+\url{https://lore.kernel.org/r/20060408134707.22479.33814.sendpatchset@linux.site}
[Viewed March 25, 2008]"
,annotation={
RCU-protected radix tree.
@@ -1480,7 +1480,7 @@ Suparna Bhattacharya"
,Year="2006"
,pages="v2 123-138"
,note="Available:
-\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{https://kernel.org/doc/ols/2006/ols2006v2-pages-131-146.pdf}
\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
[Viewed January 1, 2007]"
,annotation={
@@ -1511,7 +1511,7 @@ Canis Rufus and Zoicon5 and Anome and Hal Eisen"
,Year="2006"
,pages="v2 249-254"
,note="Available:
-\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{https://kernel.org/doc/ols/2006/ols2006v2-pages-249-262.pdf}
[Viewed January 11, 2009]"
,annotation={
Uses RCU-protected radix tree for a lockless page cache.
@@ -1554,7 +1554,7 @@ Revised:
,day="28"
,year="2006"
,note="Available:
-\url{http://lkml.org/lkml/2006/9/28/160}
+\url{https://lore.kernel.org/r/20060928142616.GA20185@infradead.org}
[Viewed March 27, 2008]"
}
@@ -1593,7 +1593,7 @@ Revised:
,year="2006"
,day=26
,note="Available:
-\url{http://lkml.org/lkml/2006/10/26/73}
+\url{https://lore.kernel.org/r/20061026105731.GE11803@in.ibm.com}
[Viewed January 26, 2009]"
,annotation={
RCU-based reader-writer lock that allows readers to proceed with
@@ -1612,12 +1612,12 @@ Revised:
,year="2006"
,day=17
,note="Available:
-\url{http://lkml.org/lkml/2006/11/17/56}
+\url{https://lore.kernel.org/r/20061117092925.GT7164@kernel.dk}
[Viewed May 28, 2007]"
,annotation={
SRCU's grace periods are too slow for Jens, even after a
factor-of-three speedup.
- Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359.
+ Sped-up version of SRCU at https://lore.kernel.org/r/20061118002845.GF2632@us.ibm.com.
}
}
@@ -1629,7 +1629,7 @@ Revised:
,year="2006"
,day=19
,note="Available:
-\url{http://lkml.org/lkml/2006/11/19/69}
+\url{https://lore.kernel.org/r/20061119190027.GA3676@oleg}
[Viewed May 28, 2007]"
,annotation={
First cut of QRCU. Expanded/corrected versions followed.
@@ -1644,7 +1644,7 @@ Revised:
,year="2006"
,day=30
,note="Available:
-\url{http://lkml.org/lkml/2006/11/29/330}
+\url{https://lore.kernel.org/r/20061130015714.GC1350@oleg}
[Viewed November 26, 2008]"
,annotation={
Expanded/corrected version of QRCU.
@@ -1709,7 +1709,7 @@ Revised:
,year="2007"
,day=3
,note="Available:
-\url{http://lkml.org/lkml/2007/1/3/112}
+\url{https://lore.kernel.org/r/20070103152738.GA16063@localdomain}
[Viewed May 28, 2007]"
,annotation={
Patch for list_splice_rcu().
@@ -1737,7 +1737,7 @@ Revised:
,year="2007"
,day=28
,note="Available:
-\url{http://lkml.org/lkml/2007/1/28/34}
+\url{https://lore.kernel.org/r/20070128120509.719287000@programming.kicks-ass.net}
[Viewed March 27, 2008]"
,annotation={
RCU-like implementation for frequent updaters and rare readers(!).
@@ -1767,7 +1767,7 @@ Revised:
,year="2007"
,day=24
,note="Available:
-\url{http://lkml.org/lkml/2007/2/25/18}
+\url{https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com}
[Viewed March 27, 2008]"
,annotation={
Patch for QRCU supplying lock-free fast path.
@@ -1846,7 +1846,7 @@ Revised:
,annotation={
LWN article describing Promela and spin, and also using Oleg
Nesterov's QRCU as an example (with Paul McKenney's fastpath).
- Merged patch at: http://lkml.org/lkml/2007/2/25/18
+ Merged patch at: https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com
}
}
@@ -1885,10 +1885,10 @@ Revised:
,day="10"
,year="2007"
,note="Available:
-\url{http://lkml.org/lkml/2007/9/10/213}
+\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com}
[Viewed October 25, 2007]"
,annotation={
- Final patch for preemptable RCU to -rt. (Later patches were
+ Final patch for preemptible RCU to -rt. (Later patches were
to mainline, eventually incorporated.)
}
}
@@ -1933,7 +1933,7 @@ Revised:
,day="20"
,year="2007"
,note="Available:
-\url{http://lkml.org/lkml/2007/12/20/244}
+\url{https://lore.kernel.org/r/20071220142540.GB22523@Krystal}
[Viewed March 27, 2008]"
,annotation={
Request for call_rcu_sched() and rcu_barrier_sched().
@@ -2013,7 +2013,7 @@ Revised:
,day="29"
,year="2008"
,note="Available:
-\url{http://lkml.org/lkml/2008/1/29/208}
+\url{https://lore.kernel.org/r/Pine.LNX.4.58.0801291113350.20371@gandalf.stny.rr.com}
[Viewed March 27, 2008]"
,annotation={
Patch that prevents preemptible RCU from unnecessarily waking
@@ -2028,7 +2028,7 @@ Revised:
,day="1"
,year="2008"
,note="Available:
-\url{http://lkml.org/lkml/2008/2/2/255}
+\url{https://lore.kernel.org/r/20080202214124.GA28612@linux.vnet.ibm.com}
[Viewed October 18, 2008]"
,annotation={
Explanation of compilers violating dependency ordering.
@@ -2088,7 +2088,7 @@ lot of {Linux} into your technology!!!"
,day="3"
,year="2008"
,note="Available:
-\url{http://lkml.org/lkml/2008/6/2/539}
+\url{https://lore.kernel.org/r/4844BE83.5010401@cn.fujitsu.com}
[Viewed December 10, 2008]"
,annotation={
Updated RCU classic algorithm. Introduced multi-tailed list
@@ -2122,7 +2122,7 @@ lot of {Linux} into your technology!!!"
,day="21"
,year="2008"
,note="Available:
-\url{http://lkml.org/lkml/2008/8/21/336}
+\url{https://lore.kernel.org/r/48AD8969.7060900@colorfullife.com}
[Viewed December 8, 2008]"
,annotation={
State-based RCU. One key thing that this patch does is to
@@ -2137,7 +2137,7 @@ lot of {Linux} into your technology!!!"
,day="6"
,year="2008"
,note="Available:
-\url{http://lkml.org/lkml/2008/9/6/86}
+\url{https://lore.kernel.org/r/48C2B1D2.5070801@colorfullife.com}
[Viewed December 8, 2008]"
,annotation={
Manfred notes a fix required to my attempt to separate irq
@@ -2183,7 +2183,7 @@ lot of {Linux} into your technology!!!"
,day="14"
,year="2009"
,note="Available:
-\url{http://lkml.org/lkml/2009/1/14/449}
+\url{https://lore.kernel.org/r/20090114202044.GJ6734@linux.vnet.ibm.com}
[Viewed January 15, 2009]"
,annotation={
Small-footprint implementation of RCU for uniprocessor
@@ -2218,7 +2218,7 @@ lot of {Linux} into your technology!!!"
git://lttng.org/userspace-rcu.git
http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git
http://lttng.org/urcu
- http://lkml.org/lkml/2009/2/5/572
+ https://lore.kernel.org/r/20090206030543.GB8560@Krystal
}
}
@@ -2258,7 +2258,7 @@ lot of {Linux} into your technology!!!"
,day="25"
,year="2009"
,note="Available:
-\url{http://lkml.org/lkml/2009/6/25/306}
+\url{https://lore.kernel.org/r/20090625160706.GA9467@linux.vnet.ibm.com}
[Viewed August 16, 2009]"
,annotation={
First posting of expedited RCU to be accepted into -tip.
@@ -2272,10 +2272,10 @@ lot of {Linux} into your technology!!!"
,day="23"
,year="2009"
,note="Available:
-\url{http://lkml.org/lkml/2009/7/23/294}
+\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com}
[Viewed August 15, 2009]"
,annotation={
- First posting of simple and fast preemptable RCU.
+ First posting of simple and fast preemptible RCU.
}
}
@@ -2350,7 +2350,7 @@ lot of {Linux} into your technology!!!"
,month="December"
,year="2009"
,note="Available:
-\url{http://lkml.org/lkml/2009/10/18/129}
+\url{https://lore.kernel.org/r/20091018232918.GA7385@Krystal}
[Viewed December 29, 2009]"
,annotation={
Mathieu proposed defer_rcu() with fixed-size per-thread pool
@@ -2518,7 +2518,7 @@ lot of {Linux} into your technology!!!"
,month="January"
,year="2011"
,note="Available:
-\url{https://lkml.org/lkml/2011/1/18/322}
+\url{https://lore.kernel.org/r/AANLkTimajU0x1v6y3rH2+jr-bZ=tNLs1S_agXdGGAa3S@mail.gmail.com}
[Viewed March 4, 2011]"
,annotation={
"The RCU-based name lookup is at the other end of the spectrum - the
@@ -2639,7 +2639,7 @@ lot of {Linux} into your technology!!!"
RCU-protected hash tables, barriers vs. read-side traversal order.
.
If the updater is making changes in the opposite direction from
- the read-side traveral order, the updater need only execute a
+ the read-side traversal order, the updater need only execute a
memory-barrier instruction, but if in the same direction, the
updater needs to wait for a grace period between the individual
updates.
diff --git a/Documentation/RCU/UP.rst b/Documentation/RCU/UP.rst
new file mode 100644
index 000000000000..4060d7a2f62a
--- /dev/null
+++ b/Documentation/RCU/UP.rst
@@ -0,0 +1,152 @@
+.. _up_doc:
+
+RCU on Uniprocessor Systems
+===========================
+
+A common misconception is that, on UP systems, the call_rcu() primitive
+may immediately invoke its function. The basis of this misconception
+is that since there is only one CPU, it should not be necessary to
+wait for anything else to get done, since there are no other CPUs for
+anything else to be happening on. Although this approach will *sort of*
+work a surprising amount of the time, it is a very bad idea in general.
+This document presents three examples that demonstrate exactly how bad
+an idea this is.
+
+Example 1: softirq Suicide
+--------------------------
+
+Suppose that an RCU-based algorithm scans a linked list containing
+elements A, B, and C in process context, and can delete elements from
+this same list in softirq context. Suppose that the process-context scan
+is referencing element B when it is interrupted by softirq processing,
+which deletes element B, and then invokes call_rcu() to free element B
+after a grace period.
+
+Now, if call_rcu() were to directly invoke its arguments, then upon return
+from softirq, the list scan would find itself referencing a newly freed
+element B. This situation can greatly decrease the life expectancy of
+your kernel.
+
+This same problem can occur if call_rcu() is invoked from a hardware
+interrupt handler.
+
+Example 2: Function-Call Fatality
+---------------------------------
+
+Of course, one could avert the suicide described in the preceding example
+by having call_rcu() directly invoke its arguments only if it was called
+from process context. However, this can fail in a similar manner.
+
+Suppose that an RCU-based algorithm again scans a linked list containing
+elements A, B, and C in process context, but that it invokes a function
+on each element as it is scanned. Suppose further that this function
+deletes element B from the list, then passes it to call_rcu() for deferred
+freeing. This may be a bit unconventional, but it is perfectly legal
+RCU usage, since call_rcu() must wait for a grace period to elapse.
+Therefore, in this case, allowing call_rcu() to immediately invoke
+its arguments would cause it to fail to make the fundamental guarantee
+underlying RCU, namely that call_rcu() defers invoking its arguments until
+all RCU read-side critical sections currently executing have completed.
+
+Quick Quiz #1:
+ Why is it *not* legal to invoke synchronize_rcu() in this case?
+
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
+
+Example 3: Death by Deadlock
+----------------------------
+
+Suppose that call_rcu() is invoked while holding a lock, and that the
+callback function must acquire this same lock. In this case, if
+call_rcu() were to directly invoke the callback, the result would
+be self-deadlock *even if* this invocation occurred from a later
+call_rcu() invocation a full grace period later.
+
+In some cases, it would possible to restructure to code so that
+the call_rcu() is delayed until after the lock is released. However,
+there are cases where this can be quite ugly:
+
+1. If a number of items need to be passed to call_rcu() within
+ the same critical section, then the code would need to create
+ a list of them, then traverse the list once the lock was
+ released.
+
+2. In some cases, the lock will be held across some kernel API,
+ so that delaying the call_rcu() until the lock is released
+ requires that the data item be passed up via a common API.
+ It is far better to guarantee that callbacks are invoked
+ with no locks held than to have to modify such APIs to allow
+ arbitrary data items to be passed back up through them.
+
+If call_rcu() directly invokes the callback, painful locking restrictions
+or API changes would be required.
+
+Quick Quiz #2:
+ What locking restriction must RCU callbacks respect?
+
+:ref:`Answers to Quick Quiz <answer_quick_quiz_up>`
+
+It is important to note that userspace RCU implementations *do*
+permit call_rcu() to directly invoke callbacks, but only if a full
+grace period has elapsed since those callbacks were queued. This is
+the case because some userspace environments are extremely constrained.
+Nevertheless, people writing userspace RCU implementations are strongly
+encouraged to avoid invoking callbacks from call_rcu(), thus obtaining
+the deadlock-avoidance benefits called out above.
+
+Summary
+-------
+
+Permitting call_rcu() to immediately invoke its arguments breaks RCU,
+even on a UP system. So do not do it! Even on a UP system, the RCU
+infrastructure *must* respect grace periods, and *must* invoke callbacks
+from a known environment in which no locks are held.
+
+Note that it *is* safe for synchronize_rcu() to return immediately on
+UP systems, including PREEMPT SMP builds running on UP systems.
+
+Quick Quiz #3:
+ Why can't synchronize_rcu() return immediately on UP systems running
+ preemptible RCU?
+
+.. _answer_quick_quiz_up:
+
+Answer to Quick Quiz #1:
+ Why is it *not* legal to invoke synchronize_rcu() in this case?
+
+ Because the calling function is scanning an RCU-protected linked
+ list, and is therefore within an RCU read-side critical section.
+ Therefore, the called function has been invoked within an RCU
+ read-side critical section, and is not permitted to block.
+
+Answer to Quick Quiz #2:
+ What locking restriction must RCU callbacks respect?
+
+ Any lock that is acquired within an RCU callback must be acquired
+ elsewhere using an _bh variant of the spinlock primitive.
+ For example, if "mylock" is acquired by an RCU callback, then
+ a process-context acquisition of this lock must use something
+ like spin_lock_bh() to acquire the lock. Please note that
+ it is also OK to use _irq variants of spinlocks, for example,
+ spin_lock_irqsave().
+
+ If the process-context code were to simply use spin_lock(),
+ then, since RCU callbacks can be invoked from softirq context,
+ the callback might be called from a softirq that interrupted
+ the process-context critical section. This would result in
+ self-deadlock.
+
+ This restriction might seem gratuitous, since very few RCU
+ callbacks acquire locks directly. However, a great many RCU
+ callbacks do acquire locks *indirectly*, for example, via
+ the kfree() primitive.
+
+Answer to Quick Quiz #3:
+ Why can't synchronize_rcu() return immediately on UP systems
+ running preemptible RCU?
+
+ Because some other task might have been preempted in the middle
+ of an RCU read-side critical section. If synchronize_rcu()
+ simply immediately returned, it would prematurely signal the
+ end of the grace period, which would come as a nasty shock to
+ that other thread when it started running again.
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
deleted file mode 100644
index 90ec5341ee98..000000000000
--- a/Documentation/RCU/UP.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-RCU on Uniprocessor Systems
-
-
-A common misconception is that, on UP systems, the call_rcu() primitive
-may immediately invoke its function. The basis of this misconception
-is that since there is only one CPU, it should not be necessary to
-wait for anything else to get done, since there are no other CPUs for
-anything else to be happening on. Although this approach will -sort- -of-
-work a surprising amount of the time, it is a very bad idea in general.
-This document presents three examples that demonstrate exactly how bad
-an idea this is.
-
-
-Example 1: softirq Suicide
-
-Suppose that an RCU-based algorithm scans a linked list containing
-elements A, B, and C in process context, and can delete elements from
-this same list in softirq context. Suppose that the process-context scan
-is referencing element B when it is interrupted by softirq processing,
-which deletes element B, and then invokes call_rcu() to free element B
-after a grace period.
-
-Now, if call_rcu() were to directly invoke its arguments, then upon return
-from softirq, the list scan would find itself referencing a newly freed
-element B. This situation can greatly decrease the life expectancy of
-your kernel.
-
-This same problem can occur if call_rcu() is invoked from a hardware
-interrupt handler.
-
-
-Example 2: Function-Call Fatality
-
-Of course, one could avert the suicide described in the preceding example
-by having call_rcu() directly invoke its arguments only if it was called
-from process context. However, this can fail in a similar manner.
-
-Suppose that an RCU-based algorithm again scans a linked list containing
-elements A, B, and C in process contexts, but that it invokes a function
-on each element as it is scanned. Suppose further that this function
-deletes element B from the list, then passes it to call_rcu() for deferred
-freeing. This may be a bit unconventional, but it is perfectly legal
-RCU usage, since call_rcu() must wait for a grace period to elapse.
-Therefore, in this case, allowing call_rcu() to immediately invoke
-its arguments would cause it to fail to make the fundamental guarantee
-underlying RCU, namely that call_rcu() defers invoking its arguments until
-all RCU read-side critical sections currently executing have completed.
-
-Quick Quiz #1: why is it -not- legal to invoke synchronize_rcu() in
- this case?
-
-
-Example 3: Death by Deadlock
-
-Suppose that call_rcu() is invoked while holding a lock, and that the
-callback function must acquire this same lock. In this case, if
-call_rcu() were to directly invoke the callback, the result would
-be self-deadlock.
-
-In some cases, it would possible to restructure to code so that
-the call_rcu() is delayed until after the lock is released. However,
-there are cases where this can be quite ugly:
-
-1. If a number of items need to be passed to call_rcu() within
- the same critical section, then the code would need to create
- a list of them, then traverse the list once the lock was
- released.
-
-2. In some cases, the lock will be held across some kernel API,
- so that delaying the call_rcu() until the lock is released
- requires that the data item be passed up via a common API.
- It is far better to guarantee that callbacks are invoked
- with no locks held than to have to modify such APIs to allow
- arbitrary data items to be passed back up through them.
-
-If call_rcu() directly invokes the callback, painful locking restrictions
-or API changes would be required.
-
-Quick Quiz #2: What locking restriction must RCU callbacks respect?
-
-
-Summary
-
-Permitting call_rcu() to immediately invoke its arguments breaks RCU,
-even on a UP system. So do not do it! Even on a UP system, the RCU
-infrastructure -must- respect grace periods, and -must- invoke callbacks
-from a known environment in which no locks are held.
-
-It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
-immediately on an UP system. It is also safe for synchronize_rcu()
-to return immediately on UP systems, except when running preemptable
-RCU.
-
-Quick Quiz #3: Why can't synchronize_rcu() return immediately on
- UP systems running preemptable RCU?
-
-
-Answer to Quick Quiz #1:
- Why is it -not- legal to invoke synchronize_rcu() in this case?
-
- Because the calling function is scanning an RCU-protected linked
- list, and is therefore within an RCU read-side critical section.
- Therefore, the called function has been invoked within an RCU
- read-side critical section, and is not permitted to block.
-
-Answer to Quick Quiz #2:
- What locking restriction must RCU callbacks respect?
-
- Any lock that is acquired within an RCU callback must be
- acquired elsewhere using an _irq variant of the spinlock
- primitive. For example, if "mylock" is acquired by an
- RCU callback, then a process-context acquisition of this
- lock must use something like spin_lock_irqsave() to
- acquire the lock.
-
- If the process-context code were to simply use spin_lock(),
- then, since RCU callbacks can be invoked from softirq context,
- the callback might be called from a softirq that interrupted
- the process-context critical section. This would result in
- self-deadlock.
-
- This restriction might seem gratuitous, since very few RCU
- callbacks acquire locks directly. However, a great many RCU
- callbacks do acquire locks -indirectly-, for example, via
- the kfree() primitive.
-
-Answer to Quick Quiz #3:
- Why can't synchronize_rcu() return immediately on UP systems
- running preemptable RCU?
-
- Because some other task might have been preempted in the middle
- of an RCU read-side critical section. If synchronize_rcu()
- simply immediately returned, it would prematurely signal the
- end of the grace period, which would come as a nasty shock to
- that other thread when it started running again.
diff --git a/Documentation/RCU/arrayRCU.txt b/Documentation/RCU/arrayRCU.txt
deleted file mode 100644
index f05a9afb2c39..000000000000
--- a/Documentation/RCU/arrayRCU.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-Using RCU to Protect Read-Mostly Arrays
-
-
-Although RCU is more commonly used to protect linked lists, it can
-also be used to protect arrays. Three situations are as follows:
-
-1. Hash Tables
-
-2. Static Arrays
-
-3. Resizeable Arrays
-
-Each of these three situations involves an RCU-protected pointer to an
-array that is separately indexed. It might be tempting to consider use
-of RCU to instead protect the index into an array, however, this use
-case is -not- supported. The problem with RCU-protected indexes into
-arrays is that compilers can play way too many optimization games with
-integers, which means that the rules governing handling of these indexes
-are far more trouble than they are worth. If RCU-protected indexes into
-arrays prove to be particularly valuable (which they have not thus far),
-explicit cooperation from the compiler will be required to permit them
-to be safely used.
-
-That aside, each of the three RCU-protected pointer situations are
-described in the following sections.
-
-
-Situation 1: Hash Tables
-
-Hash tables are often implemented as an array, where each array entry
-has a linked-list hash chain. Each hash chain can be protected by RCU
-as described in the listRCU.txt document. This approach also applies
-to other array-of-list situations, such as radix trees.
-
-
-Situation 2: Static Arrays
-
-Static arrays, where the data (rather than a pointer to the data) is
-located in each array element, and where the array is never resized,
-have not been used with RCU. Rik van Riel recommends using seqlock in
-this situation, which would also have minimal read-side overhead as long
-as updates are rare.
-
-Quick Quiz: Why is it so important that updates be rare when
- using seqlock?
-
-
-Situation 3: Resizeable Arrays
-
-Use of RCU for resizeable arrays is demonstrated by the grow_ary()
-function formerly used by the System V IPC code. The array is used
-to map from semaphore, message-queue, and shared-memory IDs to the data
-structure that represents the corresponding IPC construct. The grow_ary()
-function does not acquire any locks; instead its caller must hold the
-ids->sem semaphore.
-
-The grow_ary() function, shown below, does some limit checks, allocates a
-new ipc_id_ary, copies the old to the new portion of the new, initializes
-the remainder of the new, updates the ids->entries pointer to point to
-the new array, and invokes ipc_rcu_putref() to free up the old array.
-Note that rcu_assign_pointer() is used to update the ids->entries pointer,
-which includes any memory barriers required on whatever architecture
-you are running on.
-
- static int grow_ary(struct ipc_ids* ids, int newsize)
- {
- struct ipc_id_ary* new;
- struct ipc_id_ary* old;
- int i;
- int size = ids->entries->size;
-
- if(newsize > IPCMNI)
- newsize = IPCMNI;
- if(newsize <= size)
- return newsize;
-
- new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
- sizeof(struct ipc_id_ary));
- if(new == NULL)
- return size;
- new->size = newsize;
- memcpy(new->p, ids->entries->p,
- sizeof(struct kern_ipc_perm *)*size +
- sizeof(struct ipc_id_ary));
- for(i=size;i<newsize;i++) {
- new->p[i] = NULL;
- }
- old = ids->entries;
-
- /*
- * Use rcu_assign_pointer() to make sure the memcpyed
- * contents of the new array are visible before the new
- * array becomes visible.
- */
- rcu_assign_pointer(ids->entries, new);
-
- ipc_rcu_putref(old);
- return newsize;
- }
-
-The ipc_rcu_putref() function decrements the array's reference count
-and then, if the reference count has dropped to zero, uses call_rcu()
-to free the array after a grace period has elapsed.
-
-The array is traversed by the ipc_lock() function. This function
-indexes into the array under the protection of rcu_read_lock(),
-using rcu_dereference() to pick up the pointer to the array so
-that it may later safely be dereferenced -- memory barriers are
-required on the Alpha CPU. Since the size of the array is stored
-with the array itself, there can be no array-size mismatches, so
-a simple check suffices. The pointer to the structure corresponding
-to the desired IPC object is placed in "out", with NULL indicating
-a non-existent entry. After acquiring "out->lock", the "out->deleted"
-flag indicates whether the IPC object is in the process of being
-deleted, and, if not, the pointer is returned.
-
- struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
- {
- struct kern_ipc_perm* out;
- int lid = id % SEQ_MULTIPLIER;
- struct ipc_id_ary* entries;
-
- rcu_read_lock();
- entries = rcu_dereference(ids->entries);
- if(lid >= entries->size) {
- rcu_read_unlock();
- return NULL;
- }
- out = entries->p[lid];
- if(out == NULL) {
- rcu_read_unlock();
- return NULL;
- }
- spin_lock(&out->lock);
-
- /* ipc_rmid() may have already freed the ID while ipc_lock
- * was spinning: here verify that the structure is still valid
- */
- if (out->deleted) {
- spin_unlock(&out->lock);
- rcu_read_unlock();
- return NULL;
- }
- return out;
- }
-
-
-Answer to Quick Quiz:
-
- The reason that it is important that updates be rare when
- using seqlock is that frequent updates can livelock readers.
- One way to avoid this problem is to assign a seqlock for
- each array entry rather than to the entire array.
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
new file mode 100644
index 000000000000..c9bfb2b218e5
--- /dev/null
+++ b/Documentation/RCU/checklist.rst
@@ -0,0 +1,554 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Review Checklist for RCU Patches
+================================
+
+
+This document contains a checklist for producing and reviewing patches
+that make use of RCU. Violating any of the rules listed below will
+result in the same sorts of problems that leaving out a locking primitive
+would cause. This list is based on experiences reviewing such patches
+over a rather long period of time, but improvements are always welcome!
+
+0. Is RCU being applied to a read-mostly situation? If the data
+ structure is updated more than about 10% of the time, then you
+ should strongly consider some other approach, unless detailed
+ performance measurements show that RCU is nonetheless the right
+ tool for the job. Yes, RCU does reduce read-side overhead by
+ increasing write-side overhead, which is exactly why normal uses
+ of RCU will do much more reading than updating.
+
+ Another exception is where performance is not an issue, and RCU
+ provides a simpler implementation. An example of this situation
+ is the dynamic NMI code in the Linux 2.6 kernel, at least on
+ architectures where NMIs are rare.
+
+ Yet another exception is where the low real-time latency of RCU's
+ read-side primitives is critically important.
+
+ One final exception is where RCU readers are used to prevent
+ the ABA problem (https://en.wikipedia.org/wiki/ABA_problem)
+ for lockless updates. This does result in the mildly
+ counter-intuitive situation where rcu_read_lock() and
+ rcu_read_unlock() are used to protect updates, however, this
+ approach can provide the same simplifications to certain types
+ of lockless algorithms that garbage collectors do.
+
+1. Does the update code have proper mutual exclusion?
+
+ RCU does allow *readers* to run (almost) naked, but *writers* must
+ still use some sort of mutual exclusion, such as:
+
+ a. locking,
+ b. atomic operations, or
+ c. restricting updates to a single task.
+
+ If you choose #b, be prepared to describe how you have handled
+ memory barriers on weakly ordered machines (pretty much all of
+ them -- even x86 allows later loads to be reordered to precede
+ earlier stores), and be prepared to explain why this added
+ complexity is worthwhile. If you choose #c, be prepared to
+ explain how this single task does not become a major bottleneck
+ on large systems (for example, if the task is updating information
+ relating to itself that other tasks can read, there by definition
+ can be no bottleneck). Note that the definition of "large" has
+ changed significantly: Eight CPUs was "large" in the year 2000,
+ but a hundred CPUs was unremarkable in 2017.
+
+2. Do the RCU read-side critical sections make proper use of
+ rcu_read_lock() and friends? These primitives are needed
+ to prevent grace periods from ending prematurely, which
+ could result in data being unceremoniously freed out from
+ under your read-side code, which can greatly increase the
+ actuarial risk of your kernel.
+
+ As a rough rule of thumb, any dereference of an RCU-protected
+ pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
+ rcu_read_lock_sched(), or by the appropriate update-side lock.
+ Explicit disabling of preemption (preempt_disable(), for example)
+ can serve as rcu_read_lock_sched(), but is less readable and
+ prevents lockdep from detecting locking issues. Acquiring a
+ raw spinlock also enters an RCU read-side critical section.
+
+ The guard(rcu)() and scoped_guard(rcu) primitives designate
+ the remainder of the current scope or the next statement,
+ respectively, as the RCU read-side critical section. Use of
+ these guards can be less error-prone than rcu_read_lock(),
+ rcu_read_unlock(), and friends.
+
+ Please note that you *cannot* rely on code known to be built
+ only in non-preemptible kernels. Such code can and will break,
+ especially in kernels built with CONFIG_PREEMPT_COUNT=y.
+
+ Letting RCU-protected pointers "leak" out of an RCU read-side
+ critical section is every bit as bad as letting them leak out
+ from under a lock. Unless, of course, you have arranged some
+ other means of protection, such as a lock or a reference count
+ *before* letting them out of the RCU read-side critical section.
+
+3. Does the update code tolerate concurrent accesses?
+
+ The whole point of RCU is to permit readers to run without
+ any locks or atomic operations. This means that readers will
+ be running while updates are in progress. There are a number
+ of ways to handle this concurrency, depending on the situation:
+
+ a. Use the RCU variants of the list and hlist update
+ primitives to add, remove, and replace elements on
+ an RCU-protected list. Alternatively, use the other
+ RCU-protected data structures that have been added to
+ the Linux kernel.
+
+ This is almost always the best approach.
+
+ b. Proceed as in (a) above, but also maintain per-element
+ locks (that are acquired by both readers and writers)
+ that guard per-element state. Fields that the readers
+ refrain from accessing can be guarded by some other lock
+ acquired only by updaters, if desired.
+
+ This also works quite well.
+
+ c. Make updates appear atomic to readers. For example,
+ pointer updates to properly aligned fields will
+ appear atomic, as will individual atomic primitives.
+ Sequences of operations performed under a lock will *not*
+ appear to be atomic to RCU readers, nor will sequences
+ of multiple atomic primitives. One alternative is to
+ move multiple individual fields to a separate structure,
+ thus solving the multiple-field problem by imposing an
+ additional level of indirection.
+
+ This can work, but is starting to get a bit tricky.
+
+ d. Carefully order the updates and the reads so that readers
+ see valid data at all phases of the update. This is often
+ more difficult than it sounds, especially given modern
+ CPUs' tendency to reorder memory references. One must
+ usually liberally sprinkle memory-ordering operations
+ through the code, making it difficult to understand and
+ to test. Where it works, it is better to use things
+ like smp_store_release() and smp_load_acquire(), but in
+ some cases the smp_mb() full memory barrier is required.
+
+ As noted earlier, it is usually better to group the
+ changing data into a separate structure, so that the
+ change may be made to appear atomic by updating a pointer
+ to reference a new structure containing updated values.
+
+4. Weakly ordered CPUs pose special challenges. Almost all CPUs
+ are weakly ordered -- even x86 CPUs allow later loads to be
+ reordered to precede earlier stores. RCU code must take all of
+ the following measures to prevent memory-corruption problems:
+
+ a. Readers must maintain proper ordering of their memory
+ accesses. The rcu_dereference() primitive ensures that
+ the CPU picks up the pointer before it picks up the data
+ that the pointer points to. This really is necessary
+ on Alpha CPUs.
+
+ The rcu_dereference() primitive is also an excellent
+ documentation aid, letting the person reading the
+ code know exactly which pointers are protected by RCU.
+ Please note that compilers can also reorder code, and
+ they are becoming increasingly aggressive about doing
+ just that. The rcu_dereference() primitive therefore also
+ prevents destructive compiler optimizations. However,
+ with a bit of devious creativity, it is possible to
+ mishandle the return value from rcu_dereference().
+ Please see rcu_dereference.rst for more information.
+
+ The rcu_dereference() primitive is used by the
+ various "_rcu()" list-traversal primitives, such
+ as the list_for_each_entry_rcu(). Note that it is
+ perfectly legal (if redundant) for update-side code to
+ use rcu_dereference() and the "_rcu()" list-traversal
+ primitives. This is particularly useful in code that
+ is common to readers and updaters. However, lockdep
+ will complain if you access rcu_dereference() outside
+ of an RCU read-side critical section. See lockdep.rst
+ to learn what to do about this.
+
+ Of course, neither rcu_dereference() nor the "_rcu()"
+ list-traversal primitives can substitute for a good
+ concurrency design coordinating among multiple updaters.
+
+ b. If the list macros are being used, the list_add_tail_rcu()
+ and list_add_rcu() primitives must be used in order
+ to prevent weakly ordered machines from misordering
+ structure initialization and pointer planting.
+ Similarly, if the hlist macros are being used, the
+ hlist_add_head_rcu() primitive is required.
+
+ c. If the list macros are being used, the list_del_rcu()
+ primitive must be used to keep list_del()'s pointer
+ poisoning from inflicting toxic effects on concurrent
+ readers. Similarly, if the hlist macros are being used,
+ the hlist_del_rcu() primitive is required.
+
+ The list_replace_rcu() and hlist_replace_rcu() primitives
+ may be used to replace an old structure with a new one
+ in their respective types of RCU-protected lists.
+
+ d. Rules similar to (4b) and (4c) apply to the "hlist_nulls"
+ type of RCU-protected linked lists.
+
+ e. Updates must ensure that initialization of a given
+ structure happens before pointers to that structure are
+ publicized. Use the rcu_assign_pointer() primitive
+ when publicizing a pointer to a structure that can
+ be traversed by an RCU read-side critical section.
+
+5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
+ call_rcu_tasks_trace() is used, the callback function may be
+ invoked from softirq context, and in any case with bottom halves
+ disabled. In particular, this callback function cannot block.
+ If you need the callback to block, run that code in a workqueue
+ handler scheduled from the callback. The queue_rcu_work()
+ function does this for you in the case of call_rcu().
+
+6. Since synchronize_rcu() can block, it cannot be called
+ from any sort of irq context. The same rule applies
+ for synchronize_srcu(), synchronize_rcu_expedited(),
+ synchronize_srcu_expedited(), synchronize_rcu_tasks(),
+ synchronize_rcu_tasks_rude(), and synchronize_rcu_tasks_trace().
+
+ The expedited forms of these primitives have the same semantics
+ as the non-expedited forms, but expediting is more CPU intensive.
+ Use of the expedited primitives should be restricted to rare
+ configuration-change operations that would not normally be
+ undertaken while a real-time workload is running. Note that
+ IPI-sensitive real-time workloads can use the rcupdate.rcu_normal
+ kernel boot parameter to completely disable expedited grace
+ periods, though this might have performance implications.
+
+ In particular, if you find yourself invoking one of the expedited
+ primitives repeatedly in a loop, please do everyone a favor:
+ Restructure your code so that it batches the updates, allowing
+ a single non-expedited primitive to cover the entire batch.
+ This will very likely be faster than the loop containing the
+ expedited primitive, and will be much much easier on the rest
+ of the system, especially to real-time workloads running on the
+ rest of the system. Alternatively, instead use asynchronous
+ primitives such as call_rcu().
+
+7. As of v4.20, a given kernel implements only one RCU flavor, which
+ is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
+ If the updater uses call_rcu() or synchronize_rcu(), then
+ the corresponding readers may use: (1) rcu_read_lock() and
+ rcu_read_unlock(), (2) any pair of primitives that disables
+ and re-enables softirq, for example, rcu_read_lock_bh() and
+ rcu_read_unlock_bh(), or (3) any pair of primitives that disables
+ and re-enables preemption, for example, rcu_read_lock_sched() and
+ rcu_read_unlock_sched(). If the updater uses synchronize_srcu()
+ or call_srcu(), then the corresponding readers must use
+ srcu_read_lock() and srcu_read_unlock(), and with the same
+ srcu_struct. The rules for the expedited RCU grace-period-wait
+ primitives are the same as for their non-expedited counterparts.
+
+ Similarly, it is necessary to correctly use the RCU Tasks flavors:
+
+ a. If the updater uses synchronize_rcu_tasks() or
+ call_rcu_tasks(), then the readers must refrain from
+ executing voluntary context switches, that is, from
+ blocking.
+
+ b. If the updater uses call_rcu_tasks_trace()
+ or synchronize_rcu_tasks_trace(), then the
+ corresponding readers must use rcu_read_lock_trace()
+ and rcu_read_unlock_trace().
+
+ c. If an updater uses synchronize_rcu_tasks_rude(),
+ then the corresponding readers must use anything that
+ disables preemption, for example, preempt_disable()
+ and preempt_enable().
+
+ Mixing things up will result in confusion and broken kernels, and
+ has even resulted in an exploitable security issue. Therefore,
+ when using non-obvious pairs of primitives, commenting is
+ of course a must. One example of non-obvious pairing is
+ the XDP feature in networking, which calls BPF programs from
+ network-driver NAPI (softirq) context. BPF relies heavily on RCU
+ protection for its data structures, but because the BPF program
+ invocation happens entirely within a single local_bh_disable()
+ section in a NAPI poll cycle, this usage is safe. The reason
+ that this usage is safe is that readers can use anything that
+ disables BH when updaters use call_rcu() or synchronize_rcu().
+
+8. Although synchronize_rcu() is slower than is call_rcu(),
+ it usually results in simpler code. So, unless update
+ performance is critically important, the updaters cannot block,
+ or the latency of synchronize_rcu() is visible from userspace,
+ synchronize_rcu() should be used in preference to call_rcu().
+ Furthermore, kfree_rcu() and kvfree_rcu() usually result
+ in even simpler code than does synchronize_rcu() without
+ synchronize_rcu()'s multi-millisecond latency. So please take
+ advantage of kfree_rcu()'s and kvfree_rcu()'s "fire and forget"
+ memory-freeing capabilities where it applies.
+
+ An especially important property of the synchronize_rcu()
+ primitive is that it automatically self-limits: if grace periods
+ are delayed for whatever reason, then the synchronize_rcu()
+ primitive will correspondingly delay updates. In contrast,
+ code using call_rcu() should explicitly limit update rate in
+ cases where grace periods are delayed, as failing to do so can
+ result in excessive realtime latencies or even OOM conditions.
+
+ Ways of gaining this self-limiting property when using call_rcu(),
+ kfree_rcu(), or kvfree_rcu() include:
+
+ a. Keeping a count of the number of data-structure elements
+ used by the RCU-protected data structure, including
+ those waiting for a grace period to elapse. Enforce a
+ limit on this number, stalling updates as needed to allow
+ previously deferred frees to complete. Alternatively,
+ limit only the number awaiting deferred free rather than
+ the total number of elements.
+
+ One way to stall the updates is to acquire the update-side
+ mutex. (Don't try this with a spinlock -- other CPUs
+ spinning on the lock could prevent the grace period
+ from ever ending.) Another way to stall the updates
+ is for the updates to use a wrapper function around
+ the memory allocator, so that this wrapper function
+ simulates OOM when there is too much memory awaiting an
+ RCU grace period. There are of course many other
+ variations on this theme.
+
+ b. Limiting update rate. For example, if updates occur only
+ once per hour, then no explicit rate limiting is
+ required, unless your system is already badly broken.
+ Older versions of the dcache subsystem take this approach,
+ guarding updates with a global lock, limiting their rate.
+
+ c. Trusted update -- if updates can only be done manually by
+ superuser or some other trusted user, then it might not
+ be necessary to automatically limit them. The theory
+ here is that superuser already has lots of ways to crash
+ the machine.
+
+ d. Periodically invoke rcu_barrier(), permitting a limited
+ number of updates per grace period.
+
+ The same cautions apply to call_srcu(), call_rcu_tasks(), and
+ call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
+ rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
+
+ Note that although these primitives do take action to avoid
+ memory exhaustion when any given CPU has too many callbacks,
+ a determined user or administrator can still exhaust memory.
+ This is especially the case if a system with a large number of
+ CPUs has been configured to offload all of its RCU callbacks onto
+ a single CPU, or if the system has relatively little free memory.
+
+9. All RCU list-traversal primitives, which include
+ rcu_dereference(), list_for_each_entry_rcu(), and
+ list_for_each_safe_rcu(), must be either within an RCU read-side
+ critical section or must be protected by appropriate update-side
+ locks. RCU read-side critical sections are delimited by
+ rcu_read_lock() and rcu_read_unlock(), or by similar primitives
+ such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
+ case the matching rcu_dereference() primitive must be used in
+ order to keep lockdep happy, in this case, rcu_dereference_bh().
+
+ The reason that it is permissible to use RCU list-traversal
+ primitives when the update-side lock is held is that doing so
+ can be quite helpful in reducing code bloat when common code is
+ shared between readers and updaters. Additional primitives
+ are provided for this case, as discussed in lockdep.rst.
+
+ One exception to this rule is when data is only ever added to
+ the linked data structure, and is never removed during any
+ time that readers might be accessing that structure. In such
+ cases, READ_ONCE() may be used in place of rcu_dereference()
+ and the read-side markers (rcu_read_lock() and rcu_read_unlock(),
+ for example) may be omitted.
+
+10. Conversely, if you are in an RCU read-side critical section,
+ and you don't hold the appropriate update-side lock, you *must*
+ use the "_rcu()" variants of the list macros. Failing to do so
+ will break Alpha, cause aggressive compilers to generate bad code,
+ and confuse people trying to understand your code.
+
+11. Any lock acquired by an RCU callback must be acquired elsewhere
+ with softirq disabled, e.g., via spin_lock_bh(). Failing to
+ disable softirq on a given acquisition of that lock will result
+ in deadlock as soon as the RCU softirq handler happens to run
+ your RCU callback while interrupting that acquisition's critical
+ section.
+
+12. RCU callbacks can be and are executed in parallel. In many cases,
+ the callback code simply wrappers around kfree(), so that this
+ is not an issue (or, more accurately, to the extent that it is
+ an issue, the memory-allocator locking handles it). However,
+ if the callbacks do manipulate a shared data structure, they
+ must use whatever locking or other synchronization is required
+ to safely access and/or modify that data structure.
+
+ Do not assume that RCU callbacks will be executed on the same
+ CPU that executed the corresponding call_rcu(), call_srcu(),
+ call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
+ a given CPU goes offline while having an RCU callback pending,
+ then that RCU callback will execute on some surviving CPU.
+ (If this was not the case, a self-spawning RCU callback would
+ prevent the victim CPU from ever going offline.) Furthermore,
+ CPUs designated by rcu_nocbs= might well *always* have their
+ RCU callbacks executed on some other CPUs, in fact, for some
+ real-time workloads, this is the whole point of using the
+ rcu_nocbs= kernel boot parameter.
+
+ In addition, do not assume that callbacks queued in a given order
+ will be invoked in that order, even if they all are queued on the
+ same CPU. Furthermore, do not assume that same-CPU callbacks will
+ be invoked serially. For example, in recent kernels, CPUs can be
+ switched between offloaded and de-offloaded callback invocation,
+ and while a given CPU is undergoing such a switch, its callbacks
+ might be concurrently invoked by that CPU's softirq handler and
+ that CPU's rcuo kthread. At such times, that CPU's callbacks
+ might be executed both concurrently and out of order.
+
+13. Unlike most flavors of RCU, it *is* permissible to block in an
+ SRCU read-side critical section (demarked by srcu_read_lock()
+ and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
+ As with RCU, guard(srcu)() and scoped_guard(srcu) forms are
+ available, and often provide greater ease of use. Please note
+ that if you don't need to sleep in read-side critical sections,
+ you should be using RCU rather than SRCU, because RCU is almost
+ always faster and easier to use than is SRCU.
+
+ Also unlike other forms of RCU, explicit initialization and
+ cleanup is required either at build time via DEFINE_SRCU()
+ or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
+ and cleanup_srcu_struct(). These last two are passed a
+ "struct srcu_struct" that defines the scope of a given
+ SRCU domain. Once initialized, the srcu_struct is passed
+ to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
+ synchronize_srcu_expedited(), and call_srcu(). A given
+ synchronize_srcu() waits only for SRCU read-side critical
+ sections governed by srcu_read_lock() and srcu_read_unlock()
+ calls that have been passed the same srcu_struct. This property
+ is what makes sleeping read-side critical sections tolerable --
+ a given subsystem delays only its own updates, not those of other
+ subsystems using SRCU. Therefore, SRCU is less prone to OOM the
+ system than RCU would be if RCU's read-side critical sections
+ were permitted to sleep.
+
+ The ability to sleep in read-side critical sections does not
+ come for free. First, corresponding srcu_read_lock() and
+ srcu_read_unlock() calls must be passed the same srcu_struct.
+ Second, grace-period-detection overhead is amortized only
+ over those updates sharing a given srcu_struct, rather than
+ being globally amortized as they are for other forms of RCU.
+ Therefore, SRCU should be used in preference to rw_semaphore
+ only in extremely read-intensive situations, or in situations
+ requiring SRCU's read-side deadlock immunity or low read-side
+ realtime latency. You should also consider percpu_rw_semaphore
+ when you need lightweight readers.
+
+ SRCU's expedited primitive (synchronize_srcu_expedited())
+ never sends IPIs to other CPUs, so it is easier on
+ real-time workloads than is synchronize_rcu_expedited().
+
+ It is also permissible to sleep in RCU Tasks Trace read-side
+ critical section, which are delimited by rcu_read_lock_trace()
+ and rcu_read_unlock_trace(). However, this is a specialized
+ flavor of RCU, and you should not use it without first checking
+ with its current users. In most cases, you should instead
+ use SRCU. As with RCU and SRCU, guard(rcu_tasks_trace)() and
+ scoped_guard(rcu_tasks_trace) are available, and often provide
+ greater ease of use.
+
+ Note that rcu_assign_pointer() relates to SRCU just as it does to
+ other forms of RCU, but instead of rcu_dereference() you should
+ use srcu_dereference() in order to avoid lockdep splats.
+
+14. The whole point of call_rcu(), synchronize_rcu(), and friends
+ is to wait until all pre-existing readers have finished before
+ carrying out some otherwise-destructive operation. It is
+ therefore critically important to *first* remove any path
+ that readers can follow that could be affected by the
+ destructive operation, and *only then* invoke call_rcu(),
+ synchronize_rcu(), or friends.
+
+ Because these primitives only wait for pre-existing readers, it
+ is the caller's responsibility to guarantee that any subsequent
+ readers will execute safely.
+
+15. The various RCU read-side primitives do *not* necessarily contain
+ memory barriers. You should therefore plan for the CPU
+ and the compiler to freely reorder code into and out of RCU
+ read-side critical sections. It is the responsibility of the
+ RCU update-side primitives to deal with this.
+
+ For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
+ immediately after an srcu_read_unlock() to get a full barrier.
+
+16. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+ __rcu sparse checks to validate your RCU code. These can help
+ find problems as follows:
+
+ CONFIG_PROVE_LOCKING:
+ check that accesses to RCU-protected data structures
+ are carried out under the proper RCU read-side critical
+ section, while holding the right combination of locks,
+ or whatever other conditions are appropriate.
+
+ CONFIG_DEBUG_OBJECTS_RCU_HEAD:
+ check that you don't pass the same object to call_rcu()
+ (or friends) before an RCU grace period has elapsed
+ since the last time that you passed that same object to
+ call_rcu() (or friends).
+
+ CONFIG_RCU_STRICT_GRACE_PERIOD:
+ combine with KASAN to check for pointers leaked out
+ of RCU read-side critical sections. This Kconfig
+ option is tough on both performance and scalability,
+ and so is limited to four-CPU systems.
+
+ __rcu sparse checks:
+ tag the pointer to the RCU-protected data structure
+ with __rcu, and sparse will warn you if you access that
+ pointer without the services of one of the variants
+ of rcu_dereference().
+
+ These debugging aids can help you find problems that are
+ otherwise extremely difficult to spot.
+
+17. If you pass a callback function defined within a module
+ to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
+ call_rcu_tasks_trace(), then it is necessary to wait for all
+ pending callbacks to be invoked before unloading that module.
+ Note that it is absolutely *not* sufficient to wait for a grace
+ period! For example, synchronize_rcu() implementation is *not*
+ guaranteed to wait for callbacks registered on other CPUs via
+ call_rcu(). Or even on the current CPU if that CPU recently
+ went offline and came back online.
+
+ You instead need to use one of the barrier functions:
+
+ - call_rcu() -> rcu_barrier()
+ - call_srcu() -> srcu_barrier()
+ - call_rcu_tasks() -> rcu_barrier_tasks()
+ - call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
+
+ However, these barrier functions are absolutely *not* guaranteed
+ to wait for a grace period. For example, if there are no
+ call_rcu() callbacks queued anywhere in the system, rcu_barrier()
+ can and will return immediately.
+
+ So if you need to wait for both a grace period and for all
+ pre-existing callbacks, you will need to invoke both functions,
+ with the pair depending on the flavor of RCU:
+
+ - Either synchronize_rcu() or synchronize_rcu_expedited(),
+ together with rcu_barrier()
+ - Either synchronize_srcu() or synchronize_srcu_expedited(),
+ together with and srcu_barrier()
+ - synchronize_rcu_tasks() and rcu_barrier_tasks()
+ - synchronize_tasks_trace() and rcu_barrier_tasks_trace()
+
+ If necessary, you can use something like workqueues to execute
+ the requisite pair of functions concurrently.
+
+ See rcubarrier.rst for more information.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
deleted file mode 100644
index 49747717d905..000000000000
--- a/Documentation/RCU/checklist.txt
+++ /dev/null
@@ -1,486 +0,0 @@
-Review Checklist for RCU Patches
-
-
-This document contains a checklist for producing and reviewing patches
-that make use of RCU. Violating any of the rules listed below will
-result in the same sorts of problems that leaving out a locking primitive
-would cause. This list is based on experiences reviewing such patches
-over a rather long period of time, but improvements are always welcome!
-
-0. Is RCU being applied to a read-mostly situation? If the data
- structure is updated more than about 10% of the time, then you
- should strongly consider some other approach, unless detailed
- performance measurements show that RCU is nonetheless the right
- tool for the job. Yes, RCU does reduce read-side overhead by
- increasing write-side overhead, which is exactly why normal uses
- of RCU will do much more reading than updating.
-
- Another exception is where performance is not an issue, and RCU
- provides a simpler implementation. An example of this situation
- is the dynamic NMI code in the Linux 2.6 kernel, at least on
- architectures where NMIs are rare.
-
- Yet another exception is where the low real-time latency of RCU's
- read-side primitives is critically important.
-
- One final exception is where RCU readers are used to prevent
- the ABA problem (https://en.wikipedia.org/wiki/ABA_problem)
- for lockless updates. This does result in the mildly
- counter-intuitive situation where rcu_read_lock() and
- rcu_read_unlock() are used to protect updates, however, this
- approach provides the same potential simplifications that garbage
- collectors do.
-
-1. Does the update code have proper mutual exclusion?
-
- RCU does allow -readers- to run (almost) naked, but -writers- must
- still use some sort of mutual exclusion, such as:
-
- a. locking,
- b. atomic operations, or
- c. restricting updates to a single task.
-
- If you choose #b, be prepared to describe how you have handled
- memory barriers on weakly ordered machines (pretty much all of
- them -- even x86 allows later loads to be reordered to precede
- earlier stores), and be prepared to explain why this added
- complexity is worthwhile. If you choose #c, be prepared to
- explain how this single task does not become a major bottleneck on
- big multiprocessor machines (for example, if the task is updating
- information relating to itself that other tasks can read, there
- by definition can be no bottleneck). Note that the definition
- of "large" has changed significantly: Eight CPUs was "large"
- in the year 2000, but a hundred CPUs was unremarkable in 2017.
-
-2. Do the RCU read-side critical sections make proper use of
- rcu_read_lock() and friends? These primitives are needed
- to prevent grace periods from ending prematurely, which
- could result in data being unceremoniously freed out from
- under your read-side code, which can greatly increase the
- actuarial risk of your kernel.
-
- As a rough rule of thumb, any dereference of an RCU-protected
- pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
- rcu_read_lock_sched(), or by the appropriate update-side lock.
- Disabling of preemption can serve as rcu_read_lock_sched(), but
- is less readable.
-
- Letting RCU-protected pointers "leak" out of an RCU read-side
- critical section is every bid as bad as letting them leak out
- from under a lock. Unless, of course, you have arranged some
- other means of protection, such as a lock or a reference count
- -before- letting them out of the RCU read-side critical section.
-
-3. Does the update code tolerate concurrent accesses?
-
- The whole point of RCU is to permit readers to run without
- any locks or atomic operations. This means that readers will
- be running while updates are in progress. There are a number
- of ways to handle this concurrency, depending on the situation:
-
- a. Use the RCU variants of the list and hlist update
- primitives to add, remove, and replace elements on
- an RCU-protected list. Alternatively, use the other
- RCU-protected data structures that have been added to
- the Linux kernel.
-
- This is almost always the best approach.
-
- b. Proceed as in (a) above, but also maintain per-element
- locks (that are acquired by both readers and writers)
- that guard per-element state. Of course, fields that
- the readers refrain from accessing can be guarded by
- some other lock acquired only by updaters, if desired.
-
- This works quite well, also.
-
- c. Make updates appear atomic to readers. For example,
- pointer updates to properly aligned fields will
- appear atomic, as will individual atomic primitives.
- Sequences of operations performed under a lock will -not-
- appear to be atomic to RCU readers, nor will sequences
- of multiple atomic primitives.
-
- This can work, but is starting to get a bit tricky.
-
- d. Carefully order the updates and the reads so that
- readers see valid data at all phases of the update.
- This is often more difficult than it sounds, especially
- given modern CPUs' tendency to reorder memory references.
- One must usually liberally sprinkle memory barriers
- (smp_wmb(), smp_rmb(), smp_mb()) through the code,
- making it difficult to understand and to test.
-
- It is usually better to group the changing data into
- a separate structure, so that the change may be made
- to appear atomic by updating a pointer to reference
- a new structure containing updated values.
-
-4. Weakly ordered CPUs pose special challenges. Almost all CPUs
- are weakly ordered -- even x86 CPUs allow later loads to be
- reordered to precede earlier stores. RCU code must take all of
- the following measures to prevent memory-corruption problems:
-
- a. Readers must maintain proper ordering of their memory
- accesses. The rcu_dereference() primitive ensures that
- the CPU picks up the pointer before it picks up the data
- that the pointer points to. This really is necessary
- on Alpha CPUs. If you don't believe me, see:
-
- http://www.openvms.compaq.com/wizard/wiz_2637.html
-
- The rcu_dereference() primitive is also an excellent
- documentation aid, letting the person reading the
- code know exactly which pointers are protected by RCU.
- Please note that compilers can also reorder code, and
- they are becoming increasingly aggressive about doing
- just that. The rcu_dereference() primitive therefore also
- prevents destructive compiler optimizations. However,
- with a bit of devious creativity, it is possible to
- mishandle the return value from rcu_dereference().
- Please see rcu_dereference.txt in this directory for
- more information.
-
- The rcu_dereference() primitive is used by the
- various "_rcu()" list-traversal primitives, such
- as the list_for_each_entry_rcu(). Note that it is
- perfectly legal (if redundant) for update-side code to
- use rcu_dereference() and the "_rcu()" list-traversal
- primitives. This is particularly useful in code that
- is common to readers and updaters. However, lockdep
- will complain if you access rcu_dereference() outside
- of an RCU read-side critical section. See lockdep.txt
- to learn what to do about this.
-
- Of course, neither rcu_dereference() nor the "_rcu()"
- list-traversal primitives can substitute for a good
- concurrency design coordinating among multiple updaters.
-
- b. If the list macros are being used, the list_add_tail_rcu()
- and list_add_rcu() primitives must be used in order
- to prevent weakly ordered machines from misordering
- structure initialization and pointer planting.
- Similarly, if the hlist macros are being used, the
- hlist_add_head_rcu() primitive is required.
-
- c. If the list macros are being used, the list_del_rcu()
- primitive must be used to keep list_del()'s pointer
- poisoning from inflicting toxic effects on concurrent
- readers. Similarly, if the hlist macros are being used,
- the hlist_del_rcu() primitive is required.
-
- The list_replace_rcu() and hlist_replace_rcu() primitives
- may be used to replace an old structure with a new one
- in their respective types of RCU-protected lists.
-
- d. Rules similar to (4b) and (4c) apply to the "hlist_nulls"
- type of RCU-protected linked lists.
-
- e. Updates must ensure that initialization of a given
- structure happens before pointers to that structure are
- publicized. Use the rcu_assign_pointer() primitive
- when publicizing a pointer to a structure that can
- be traversed by an RCU read-side critical section.
-
-5. If call_rcu(), or a related primitive such as call_rcu_bh(),
- call_rcu_sched(), or call_srcu() is used, the callback function
- will be called from softirq context. In particular, it cannot
- block.
-
-6. Since synchronize_rcu() can block, it cannot be called from
- any sort of irq context. The same rule applies for
- synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
- synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
- synchronize_sched_expedite(), and synchronize_srcu_expedited().
-
- The expedited forms of these primitives have the same semantics
- as the non-expedited forms, but expediting is both expensive and
- (with the exception of synchronize_srcu_expedited()) unfriendly
- to real-time workloads. Use of the expedited primitives should
- be restricted to rare configuration-change operations that would
- not normally be undertaken while a real-time workload is running.
- However, real-time workloads can use rcupdate.rcu_normal kernel
- boot parameter to completely disable expedited grace periods,
- though this might have performance implications.
-
- In particular, if you find yourself invoking one of the expedited
- primitives repeatedly in a loop, please do everyone a favor:
- Restructure your code so that it batches the updates, allowing
- a single non-expedited primitive to cover the entire batch.
- This will very likely be faster than the loop containing the
- expedited primitive, and will be much much easier on the rest
- of the system, especially to real-time workloads running on
- the rest of the system.
-
-7. If the updater uses call_rcu() or synchronize_rcu(), then the
- corresponding readers must use rcu_read_lock() and
- rcu_read_unlock(). If the updater uses call_rcu_bh() or
- synchronize_rcu_bh(), then the corresponding readers must
- use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the
- updater uses call_rcu_sched() or synchronize_sched(), then
- the corresponding readers must disable preemption, possibly
- by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
- If the updater uses synchronize_srcu() or call_srcu(), then
- the corresponding readers must use srcu_read_lock() and
- srcu_read_unlock(), and with the same srcu_struct. The rules for
- the expedited primitives are the same as for their non-expedited
- counterparts. Mixing things up will result in confusion and
- broken kernels.
-
- One exception to this rule: rcu_read_lock() and rcu_read_unlock()
- may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
- in cases where local bottom halves are already known to be
- disabled, for example, in irq or softirq context. Commenting
- such cases is a must, of course! And the jury is still out on
- whether the increased speed is worth it.
-
-8. Although synchronize_rcu() is slower than is call_rcu(), it
- usually results in simpler code. So, unless update performance is
- critically important, the updaters cannot block, or the latency of
- synchronize_rcu() is visible from userspace, synchronize_rcu()
- should be used in preference to call_rcu(). Furthermore,
- kfree_rcu() usually results in even simpler code than does
- synchronize_rcu() without synchronize_rcu()'s multi-millisecond
- latency. So please take advantage of kfree_rcu()'s "fire and
- forget" memory-freeing capabilities where it applies.
-
- An especially important property of the synchronize_rcu()
- primitive is that it automatically self-limits: if grace periods
- are delayed for whatever reason, then the synchronize_rcu()
- primitive will correspondingly delay updates. In contrast,
- code using call_rcu() should explicitly limit update rate in
- cases where grace periods are delayed, as failing to do so can
- result in excessive realtime latencies or even OOM conditions.
-
- Ways of gaining this self-limiting property when using call_rcu()
- include:
-
- a. Keeping a count of the number of data-structure elements
- used by the RCU-protected data structure, including
- those waiting for a grace period to elapse. Enforce a
- limit on this number, stalling updates as needed to allow
- previously deferred frees to complete. Alternatively,
- limit only the number awaiting deferred free rather than
- the total number of elements.
-
- One way to stall the updates is to acquire the update-side
- mutex. (Don't try this with a spinlock -- other CPUs
- spinning on the lock could prevent the grace period
- from ever ending.) Another way to stall the updates
- is for the updates to use a wrapper function around
- the memory allocator, so that this wrapper function
- simulates OOM when there is too much memory awaiting an
- RCU grace period. There are of course many other
- variations on this theme.
-
- b. Limiting update rate. For example, if updates occur only
- once per hour, then no explicit rate limiting is
- required, unless your system is already badly broken.
- Older versions of the dcache subsystem take this approach,
- guarding updates with a global lock, limiting their rate.
-
- c. Trusted update -- if updates can only be done manually by
- superuser or some other trusted user, then it might not
- be necessary to automatically limit them. The theory
- here is that superuser already has lots of ways to crash
- the machine.
-
- d. Use call_rcu_bh() rather than call_rcu(), in order to take
- advantage of call_rcu_bh()'s faster grace periods. (This
- is only a partial solution, though.)
-
- e. Periodically invoke synchronize_rcu(), permitting a limited
- number of updates per grace period.
-
- The same cautions apply to call_rcu_bh(), call_rcu_sched(),
- call_srcu(), and kfree_rcu().
-
- Note that although these primitives do take action to avoid memory
- exhaustion when any given CPU has too many callbacks, a determined
- user could still exhaust memory. This is especially the case
- if a system with a large number of CPUs has been configured to
- offload all of its RCU callbacks onto a single CPU, or if the
- system has relatively little free memory.
-
-9. All RCU list-traversal primitives, which include
- rcu_dereference(), list_for_each_entry_rcu(), and
- list_for_each_safe_rcu(), must be either within an RCU read-side
- critical section or must be protected by appropriate update-side
- locks. RCU read-side critical sections are delimited by
- rcu_read_lock() and rcu_read_unlock(), or by similar primitives
- such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
- case the matching rcu_dereference() primitive must be used in
- order to keep lockdep happy, in this case, rcu_dereference_bh().
-
- The reason that it is permissible to use RCU list-traversal
- primitives when the update-side lock is held is that doing so
- can be quite helpful in reducing code bloat when common code is
- shared between readers and updaters. Additional primitives
- are provided for this case, as discussed in lockdep.txt.
-
-10. Conversely, if you are in an RCU read-side critical section,
- and you don't hold the appropriate update-side lock, you -must-
- use the "_rcu()" variants of the list macros. Failing to do so
- will break Alpha, cause aggressive compilers to generate bad code,
- and confuse people trying to read your code.
-
-11. Note that synchronize_rcu() -only- guarantees to wait until
- all currently executing rcu_read_lock()-protected RCU read-side
- critical sections complete. It does -not- necessarily guarantee
- that all currently running interrupts, NMIs, preempt_disable()
- code, or idle loops will complete. Therefore, if your
- read-side critical sections are protected by something other
- than rcu_read_lock(), do -not- use synchronize_rcu().
-
- Similarly, disabling preemption is not an acceptable substitute
- for rcu_read_lock(). Code that attempts to use preemption
- disabling where it should be using rcu_read_lock() will break
- in CONFIG_PREEMPT=y kernel builds.
-
- If you want to wait for interrupt handlers, NMI handlers, and
- code under the influence of preempt_disable(), you instead
- need to use synchronize_irq() or synchronize_sched().
-
- This same limitation also applies to synchronize_rcu_bh()
- and synchronize_srcu(), as well as to the asynchronous and
- expedited forms of the three primitives, namely call_rcu(),
- call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(),
- synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited().
-
-12. Any lock acquired by an RCU callback must be acquired elsewhere
- with softirq disabled, e.g., via spin_lock_irqsave(),
- spin_lock_bh(), etc. Failing to disable irq on a given
- acquisition of that lock will result in deadlock as soon as
- the RCU softirq handler happens to run your RCU callback while
- interrupting that acquisition's critical section.
-
-13. RCU callbacks can be and are executed in parallel. In many cases,
- the callback code simply wrappers around kfree(), so that this
- is not an issue (or, more accurately, to the extent that it is
- an issue, the memory-allocator locking handles it). However,
- if the callbacks do manipulate a shared data structure, they
- must use whatever locking or other synchronization is required
- to safely access and/or modify that data structure.
-
- RCU callbacks are -usually- executed on the same CPU that executed
- the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
- but are by -no- means guaranteed to be. For example, if a given
- CPU goes offline while having an RCU callback pending, then that
- RCU callback will execute on some surviving CPU. (If this was
- not the case, a self-spawning RCU callback would prevent the
- victim CPU from ever going offline.)
-
-14. Unlike other forms of RCU, it -is- permissible to block in an
- SRCU read-side critical section (demarked by srcu_read_lock()
- and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
- Please note that if you don't need to sleep in read-side critical
- sections, you should be using RCU rather than SRCU, because RCU
- is almost always faster and easier to use than is SRCU.
-
- Also unlike other forms of RCU, explicit initialization and
- cleanup is required either at build time via DEFINE_SRCU()
- or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
- and cleanup_srcu_struct(). These last two are passed a
- "struct srcu_struct" that defines the scope of a given
- SRCU domain. Once initialized, the srcu_struct is passed
- to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
- synchronize_srcu_expedited(), and call_srcu(). A given
- synchronize_srcu() waits only for SRCU read-side critical
- sections governed by srcu_read_lock() and srcu_read_unlock()
- calls that have been passed the same srcu_struct. This property
- is what makes sleeping read-side critical sections tolerable --
- a given subsystem delays only its own updates, not those of other
- subsystems using SRCU. Therefore, SRCU is less prone to OOM the
- system than RCU would be if RCU's read-side critical sections
- were permitted to sleep.
-
- The ability to sleep in read-side critical sections does not
- come for free. First, corresponding srcu_read_lock() and
- srcu_read_unlock() calls must be passed the same srcu_struct.
- Second, grace-period-detection overhead is amortized only
- over those updates sharing a given srcu_struct, rather than
- being globally amortized as they are for other forms of RCU.
- Therefore, SRCU should be used in preference to rw_semaphore
- only in extremely read-intensive situations, or in situations
- requiring SRCU's read-side deadlock immunity or low read-side
- realtime latency. You should also consider percpu_rw_semaphore
- when you need lightweight readers.
-
- SRCU's expedited primitive (synchronize_srcu_expedited())
- never sends IPIs to other CPUs, so it is easier on
- real-time workloads than is synchronize_rcu_expedited(),
- synchronize_rcu_bh_expedited() or synchronize_sched_expedited().
-
- Note that rcu_dereference() and rcu_assign_pointer() relate to
- SRCU just as they do to other forms of RCU.
-
-15. The whole point of call_rcu(), synchronize_rcu(), and friends
- is to wait until all pre-existing readers have finished before
- carrying out some otherwise-destructive operation. It is
- therefore critically important to -first- remove any path
- that readers can follow that could be affected by the
- destructive operation, and -only- -then- invoke call_rcu(),
- synchronize_rcu(), or friends.
-
- Because these primitives only wait for pre-existing readers, it
- is the caller's responsibility to guarantee that any subsequent
- readers will execute safely.
-
-16. The various RCU read-side primitives do -not- necessarily contain
- memory barriers. You should therefore plan for the CPU
- and the compiler to freely reorder code into and out of RCU
- read-side critical sections. It is the responsibility of the
- RCU update-side primitives to deal with this.
-
-17. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
- __rcu sparse checks to validate your RCU code. These can help
- find problems as follows:
-
- CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
- structures are carried out under the proper RCU
- read-side critical section, while holding the right
- combination of locks, or whatever other conditions
- are appropriate.
-
- CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
- same object to call_rcu() (or friends) before an RCU
- grace period has elapsed since the last time that you
- passed that same object to call_rcu() (or friends).
-
- __rcu sparse checks: tag the pointer to the RCU-protected data
- structure with __rcu, and sparse will warn you if you
- access that pointer without the services of one of the
- variants of rcu_dereference().
-
- These debugging aids can help you find problems that are
- otherwise extremely difficult to spot.
-
-18. If you register a callback using call_rcu(), call_rcu_bh(),
- call_rcu_sched(), or call_srcu(), and pass in a function defined
- within a loadable module, then it in necessary to wait for
- all pending callbacks to be invoked after the last invocation
- and before unloading that module. Note that it is absolutely
- -not- sufficient to wait for a grace period! The current (say)
- synchronize_rcu() implementation waits only for all previous
- callbacks registered on the CPU that synchronize_rcu() is running
- on, but it is -not- guaranteed to wait for callbacks registered
- on other CPUs.
-
- You instead need to use one of the barrier functions:
-
- o call_rcu() -> rcu_barrier()
- o call_rcu_bh() -> rcu_barrier_bh()
- o call_rcu_sched() -> rcu_barrier_sched()
- o call_srcu() -> srcu_barrier()
-
- However, these barrier functions are absolutely -not- guaranteed
- to wait for a grace period. In fact, if there are no call_rcu()
- callbacks waiting anywhere in the system, rcu_barrier() is within
- its rights to return immediately.
-
- So if you need to wait for both an RCU grace period and for
- all pre-existing call_rcu() callbacks, you will need to execute
- both rcu_barrier() and synchronize_rcu(), if necessary, using
- something like workqueues to to execute them concurrently.
-
- See rcubarrier.txt for more information.
diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
new file mode 100644
index 000000000000..ef26c78507d3
--- /dev/null
+++ b/Documentation/RCU/index.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _rcu_handbook:
+
+============
+RCU Handbook
+============
+
+.. toctree::
+ :maxdepth: 2
+
+ checklist
+ lockdep
+ lockdep-splat
+ rcubarrier
+ rcu_dereference
+ whatisRCU
+ rcu
+ rculist_nulls
+ rcuref
+ torture
+ stallwarn
+ listRCU
+ NMI-RCU
+ UP
+
+ Design/Memory-Ordering/Tree-RCU-Memory-Ordering
+ Design/Expedited-Grace-Periods/Expedited-Grace-Periods
+ Design/Requirements/Requirements
+ Design/Data-Structures/Data-Structures
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
new file mode 100644
index 000000000000..d8bb98623c12
--- /dev/null
+++ b/Documentation/RCU/listRCU.rst
@@ -0,0 +1,511 @@
+.. _list_rcu_doc:
+
+Using RCU to Protect Read-Mostly Linked Lists
+=============================================
+
+One of the most common uses of RCU is protecting read-mostly linked lists
+(``struct list_head`` in list.h). One big advantage of this approach is
+that all of the required memory ordering is provided by the list macros.
+This document describes several list-based RCU use cases.
+
+When iterating a list while holding the rcu_read_lock(), writers may
+modify the list. The reader is guaranteed to see all of the elements
+which were added to the list before they acquired the rcu_read_lock()
+and are still on the list when they drop the rcu_read_unlock().
+Elements which are added to, or removed from the list may or may not
+be seen. If the writer calls list_replace_rcu(), the reader may see
+either the old element or the new element; they will not see both,
+nor will they see neither.
+
+
+Example 1: Read-mostly list: Deferred Destruction
+-------------------------------------------------
+
+A widely used usecase for RCU lists in the kernel is lockless iteration over
+all processes in the system. ``task_struct::tasks`` represents the list node that
+links all the processes. The list can be traversed in parallel to any list
+additions or removals.
+
+The traversal of the list is done using ``for_each_process()`` which is defined
+by the 2 macros::
+
+ #define next_task(p) \
+ list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+ #define for_each_process(p) \
+ for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+The code traversing the list of all processes typically looks like::
+
+ rcu_read_lock();
+ for_each_process(p) {
+ /* Do something with p */
+ }
+ rcu_read_unlock();
+
+The simplified and heavily inlined code for removing a process from a
+task list is::
+
+ void release_task(struct task_struct *p)
+ {
+ write_lock(&tasklist_lock);
+ list_del_rcu(&p->tasks);
+ write_unlock(&tasklist_lock);
+ call_rcu(&p->rcu, delayed_put_task_struct);
+ }
+
+When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)``
+via __exit_signal() and __unhash_process() under ``tasklist_lock``
+writer lock protection. The list_del_rcu() invocation removes
+the task from the list of all tasks. The ``tasklist_lock``
+prevents concurrent list additions/removals from corrupting the
+list. Readers using ``for_each_process()`` are not protected with the
+``tasklist_lock``. To prevent readers from noticing changes in the list
+pointers, the ``task_struct`` object is freed only after one or more
+grace periods elapse, with the help of call_rcu(), which is invoked via
+put_task_struct_rcu_user(). This deferring of destruction ensures that
+any readers traversing the list will see valid ``p->tasks.next`` pointers
+and deletion/freeing can happen in parallel with traversal of the list.
+This pattern is also called an **existence lock**, since RCU refrains
+from invoking the delayed_put_task_struct() callback function until
+all existing readers finish, which guarantees that the ``task_struct``
+object in question will remain in existence until after the completion
+of all RCU readers that might possibly have a reference to that object.
+
+
+Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates
+----------------------------------------------------------------------
+
+Some reader-writer locking use cases compute a value while holding
+the read-side lock, but continue to use that value after that lock is
+released. These use cases are often good candidates for conversion
+to RCU. One prominent example involves network packet routing.
+Because the packet-routing data tracks the state of equipment outside
+of the computer, it will at times contain stale data. Therefore, once
+the route has been computed, there is no need to hold the routing table
+static during transmission of the packet. After all, you can hold the
+routing table static all you want, but that won't keep the external
+Internet from changing, and it is the state of the external Internet
+that really matters. In addition, routing entries are typically added
+or deleted, rather than being modified in place. This is a rare example
+of the finite speed of light and the non-zero size of atoms actually
+helping make synchronization be lighter weight.
+
+A straightforward example of this type of RCU use case may be found in
+the system-call auditing support. For example, a reader-writer locked
+implementation of ``audit_filter_task()`` might be as follows::
+
+ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ read_lock(&auditsc_lock);
+ /* Note: audit_filter_mutex held by caller. */
+ list_for_each_entry(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
+ read_unlock(&auditsc_lock);
+ return state;
+ }
+ }
+ read_unlock(&auditsc_lock);
+ return AUDIT_BUILD_CONTEXT;
+ }
+
+Here the list is searched under the lock, but the lock is dropped before
+the corresponding value is returned. By the time that this value is acted
+on, the list may well have been modified. This makes sense, since if
+you are turning auditing off, it is OK to audit a few extra system calls.
+
+This means that RCU can be easily applied to the read side, as follows::
+
+ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ rcu_read_lock();
+ /* Note: audit_filter_mutex held by caller. */
+ list_for_each_entry_rcu(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
+ rcu_read_unlock();
+ return state;
+ }
+ }
+ rcu_read_unlock();
+ return AUDIT_BUILD_CONTEXT;
+ }
+
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry()
+has become list_for_each_entry_rcu(). The **_rcu()** list-traversal
+primitives add READ_ONCE() and diagnostic checks for incorrect use
+outside of an RCU read-side critical section.
+
+The changes to the update side are also straightforward. A reader-writer lock
+might be used as follows for deletion and insertion in these simplified
+versions of audit_del_rule() and audit_add_rule()::
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ write_lock(&auditsc_lock);
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ list_del(&e->list);
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+ }
+ write_unlock(&auditsc_lock);
+ return -EFAULT; /* No matching rule */
+ }
+
+ static inline int audit_add_rule(struct audit_entry *entry,
+ struct list_head *list)
+ {
+ write_lock(&auditsc_lock);
+ if (entry->rule.flags & AUDIT_PREPEND) {
+ entry->rule.flags &= ~AUDIT_PREPEND;
+ list_add(&entry->list, list);
+ } else {
+ list_add_tail(&entry->list, list);
+ }
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+
+Following are the RCU equivalents for these two functions::
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ /* No need to use the _rcu iterator here, since this is the only
+ * deletion routine. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ list_del_rcu(&e->list);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+ static inline int audit_add_rule(struct audit_entry *entry,
+ struct list_head *list)
+ {
+ if (entry->rule.flags & AUDIT_PREPEND) {
+ entry->rule.flags &= ~AUDIT_PREPEND;
+ list_add_rcu(&entry->list, list);
+ } else {
+ list_add_tail_rcu(&entry->list, list);
+ }
+ return 0;
+ }
+
+Normally, the write_lock() and write_unlock() would be replaced by a
+spin_lock() and a spin_unlock(). But in this case, all callers hold
+``audit_filter_mutex``, so no additional locking is required. The
+auditsc_lock can therefore be eliminated, since use of RCU eliminates the
+need for writers to exclude readers.
+
+The list_del(), list_add(), and list_add_tail() primitives have been
+replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
+The **_rcu()** list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs. The list_del_rcu() primitive omits the
+pointer poisoning debug-assist code that would otherwise cause concurrent
+readers to fail spectacularly.
+
+So, when readers can tolerate stale data and when entries are either added or
+deleted, without in-place modification, it is very easy to use RCU!
+
+
+Example 3: Handling In-Place Updates
+------------------------------------
+
+The system-call auditing code does not update auditing rules in place. However,
+if it did, the reader-writer-locked code to do so might look as follows
+(assuming only ``field_count`` is updated, otherwise, the added fields would
+need to be filled in)::
+
+ static inline int audit_upd_rule(struct audit_rule *rule,
+ struct list_head *list,
+ __u32 newaction,
+ __u32 newfield_count)
+ {
+ struct audit_entry *e;
+ struct audit_entry *ne;
+
+ write_lock(&auditsc_lock);
+ /* Note: audit_filter_mutex held by caller. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ e->rule.action = newaction;
+ e->rule.field_count = newfield_count;
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+ }
+ write_unlock(&auditsc_lock);
+ return -EFAULT; /* No matching rule */
+ }
+
+The RCU version creates a copy, updates the copy, then replaces the old
+entry with the newly updated entry. This sequence of actions, allowing
+concurrent reads while making a copy to perform an update, is what gives
+RCU (*read-copy update*) its name.
+
+The RCU version of audit_upd_rule() is as follows::
+
+ static inline int audit_upd_rule(struct audit_rule *rule,
+ struct list_head *list,
+ __u32 newaction,
+ __u32 newfield_count)
+ {
+ struct audit_entry *e;
+ struct audit_entry *ne;
+
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ ne = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (ne == NULL)
+ return -ENOMEM;
+ audit_copy_rule(&ne->rule, &e->rule);
+ ne->rule.action = newaction;
+ ne->rule.field_count = newfield_count;
+ list_replace_rcu(&e->list, &ne->list);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+Again, this assumes that the caller holds ``audit_filter_mutex``. Normally, the
+writer lock would become a spinlock in this sort of code.
+
+The update_lsm_rule() does something very similar, for those who would
+prefer to look at real Linux-kernel code.
+
+Another use of this pattern can be found in the openswitch driver's *connection
+tracking table* code in ``ct_limit_set()``. The table holds connection tracking
+entries and has a limit on the maximum entries. There is one such table
+per-zone and hence one *limit* per zone. The zones are mapped to their limits
+through a hashtable using an RCU-managed hlist for the hash chains. When a new
+limit is set, a new limit object is allocated and ``ct_limit_set()`` is called
+to replace the old limit object with the new one using list_replace_rcu().
+The old limit object is then freed after a grace period using kfree_rcu().
+
+
+Example 4: Eliminating Stale Data
+---------------------------------
+
+The auditing example above tolerates stale data, as do most algorithms
+that are tracking external state. After all, given there is a delay
+from the time the external state changes before Linux becomes aware
+of the change, and so as noted earlier, a small quantity of additional
+RCU-induced staleness is generally not a problem.
+
+However, there are many examples where stale data cannot be tolerated.
+One example in the Linux kernel is the System V IPC (see the shm_lock()
+function in ipc/shm.c). This code checks a *deleted* flag under a
+per-entry spinlock, and, if the *deleted* flag is set, pretends that the
+entry does not exist. For this to be helpful, the search function must
+return holding the per-entry spinlock, as shm_lock() does in fact do.
+
+.. _quick_quiz:
+
+Quick Quiz:
+ For the deleted-flag technique to be helpful, why is it necessary
+ to hold the per-entry lock while returning from the search function?
+
+:ref:`Answer to Quick Quiz <quick_quiz_answer>`
+
+If the system-call audit module were to ever need to reject stale data, one way
+to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
+``audit_entry`` structure, and modify audit_filter_task() as follows::
+
+ static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ spin_lock(&e->lock);
+ if (e->deleted) {
+ spin_unlock(&e->lock);
+ rcu_read_unlock();
+ return NULL;
+ }
+ rcu_read_unlock();
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
+ /* As long as e->lock is held, e is valid and
+ * its value is not stale */
+ return e;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+ }
+
+The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
+spinlock as follows::
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ /* No need to use the _rcu iterator here, since this
+ * is the only deletion routine. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ spin_lock(&e->lock);
+ list_del_rcu(&e->list);
+ e->deleted = 1;
+ spin_unlock(&e->lock);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+This too assumes that the caller holds ``audit_filter_mutex``.
+
+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the update-in-place
+performed by audit_upd_rule(). For one thing, audit_upd_rule() would
+need to hold the locks of both the old ``audit_entry`` and its replacement
+while executing the list_replace_rcu().
+
+
+Example 5: Skipping Stale Objects
+---------------------------------
+
+For some use cases, reader performance can be improved by skipping
+stale objects during read-side list traversal, where stale objects
+are those that will be removed and destroyed after one or more grace
+periods. One such example can be found in the timerfd subsystem. When a
+``CLOCK_REALTIME`` clock is reprogrammed (for example due to setting
+of the system time) then all programmed ``timerfds`` that depend on
+this clock get triggered and processes waiting on them are awakened in
+advance of their scheduled expiry. To facilitate this, all such timers
+are added to an RCU-managed ``cancel_list`` when they are setup in
+``timerfd_setup_cancel()``::
+
+ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+ {
+ spin_lock(&ctx->cancel_lock);
+ if ((ctx->clockid == CLOCK_REALTIME ||
+ ctx->clockid == CLOCK_REALTIME_ALARM) &&
+ (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
+ if (!ctx->might_cancel) {
+ ctx->might_cancel = true;
+ spin_lock(&cancel_lock);
+ list_add_rcu(&ctx->clist, &cancel_list);
+ spin_unlock(&cancel_lock);
+ }
+ } else {
+ __timerfd_remove_cancel(ctx);
+ }
+ spin_unlock(&ctx->cancel_lock);
+ }
+
+When a timerfd is freed (fd is closed), then the ``might_cancel``
+flag of the timerfd object is cleared, the object removed from the
+``cancel_list`` and destroyed, as shown in this simplified and inlined
+version of timerfd_release()::
+
+ int timerfd_release(struct inode *inode, struct file *file)
+ {
+ struct timerfd_ctx *ctx = file->private_data;
+
+ spin_lock(&ctx->cancel_lock);
+ if (ctx->might_cancel) {
+ ctx->might_cancel = false;
+ spin_lock(&cancel_lock);
+ list_del_rcu(&ctx->clist);
+ spin_unlock(&cancel_lock);
+ }
+ spin_unlock(&ctx->cancel_lock);
+
+ if (isalarm(ctx))
+ alarm_cancel(&ctx->t.alarm);
+ else
+ hrtimer_cancel(&ctx->t.tmr);
+ kfree_rcu(ctx, rcu);
+ return 0;
+ }
+
+If the ``CLOCK_REALTIME`` clock is set, for example by a time server, the
+hrtimer framework calls ``timerfd_clock_was_set()`` which walks the
+``cancel_list`` and wakes up processes waiting on the timerfd. While iterating
+the ``cancel_list``, the ``might_cancel`` flag is consulted to skip stale
+objects::
+
+ void timerfd_clock_was_set(void)
+ {
+ ktime_t moffs = ktime_mono_to_real(0);
+ struct timerfd_ctx *ctx;
+ unsigned long flags;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+ if (!ctx->might_cancel)
+ continue;
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ if (ctx->moffs != moffs) {
+ ctx->moffs = KTIME_MAX;
+ ctx->ticks++;
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+ }
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ }
+ rcu_read_unlock();
+ }
+
+The key point is that because RCU-protected traversal of the
+``cancel_list`` happens concurrently with object addition and removal,
+sometimes the traversal can access an object that has been removed from
+the list. In this example, a flag is used to skip such objects.
+
+
+Summary
+-------
+
+Read-mostly list-based data structures that can tolerate stale data are
+the most amenable to use of RCU. The simplest case is where entries are
+either added or deleted from the data structure (or atomically modified
+in place), but non-atomic in-place modifications can be handled by making
+a copy, updating the copy, then replacing the original with the copy.
+If stale data cannot be tolerated, then a *deleted* flag may be used
+in conjunction with a per-entry spinlock in order to allow the search
+function to reject newly deleted data.
+
+.. _quick_quiz_answer:
+
+Answer to Quick Quiz:
+ For the deleted-flag technique to be helpful, why is it necessary
+ to hold the per-entry lock while returning from the search function?
+
+ If the search function drops the per-entry lock before returning,
+ then the caller will be processing stale data in any case. If it
+ is really OK to be processing stale data, then you don't need a
+ *deleted* flag. If processing stale data really is a problem,
+ then you need to hold the per-entry lock across all of the code
+ that uses the value that was returned.
+
+:ref:`Back to Quick Quiz <quick_quiz>`
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
deleted file mode 100644
index adb5a3782846..000000000000
--- a/Documentation/RCU/listRCU.txt
+++ /dev/null
@@ -1,315 +0,0 @@
-Using RCU to Protect Read-Mostly Linked Lists
-
-
-One of the best applications of RCU is to protect read-mostly linked lists
-("struct list_head" in list.h). One big advantage of this approach
-is that all of the required memory barriers are included for you in
-the list macros. This document describes several applications of RCU,
-with the best fits first.
-
-
-Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
-
-The best applications are cases where, if reader-writer locking were
-used, the read-side lock would be dropped before taking any action
-based on the results of the search. The most celebrated example is
-the routing table. Because the routing table is tracking the state of
-equipment outside of the computer, it will at times contain stale data.
-Therefore, once the route has been computed, there is no need to hold
-the routing table static during transmission of the packet. After all,
-you can hold the routing table static all you want, but that won't keep
-the external Internet from changing, and it is the state of the external
-Internet that really matters. In addition, routing entries are typically
-added or deleted, rather than being modified in place.
-
-A straightforward example of this use of RCU may be found in the
-system-call auditing support. For example, a reader-writer locked
-implementation of audit_filter_task() might be as follows:
-
- static enum audit_state audit_filter_task(struct task_struct *tsk)
- {
- struct audit_entry *e;
- enum audit_state state;
-
- read_lock(&auditsc_lock);
- /* Note: audit_netlink_sem held by caller. */
- list_for_each_entry(e, &audit_tsklist, list) {
- if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
- read_unlock(&auditsc_lock);
- return state;
- }
- }
- read_unlock(&auditsc_lock);
- return AUDIT_BUILD_CONTEXT;
- }
-
-Here the list is searched under the lock, but the lock is dropped before
-the corresponding value is returned. By the time that this value is acted
-on, the list may well have been modified. This makes sense, since if
-you are turning auditing off, it is OK to audit a few extra system calls.
-
-This means that RCU can be easily applied to the read side, as follows:
-
- static enum audit_state audit_filter_task(struct task_struct *tsk)
- {
- struct audit_entry *e;
- enum audit_state state;
-
- rcu_read_lock();
- /* Note: audit_netlink_sem held by caller. */
- list_for_each_entry_rcu(e, &audit_tsklist, list) {
- if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
- rcu_read_unlock();
- return state;
- }
- }
- rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
- }
-
-The read_lock() and read_unlock() calls have become rcu_read_lock()
-and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu(). The _rcu() list-traversal primitives
-insert the read-side memory barriers that are required on DEC Alpha CPUs.
-
-The changes to the update side are also straightforward. A reader-writer
-lock might be used as follows for deletion and insertion:
-
- static inline int audit_del_rule(struct audit_rule *rule,
- struct list_head *list)
- {
- struct audit_entry *e;
-
- write_lock(&auditsc_lock);
- list_for_each_entry(e, list, list) {
- if (!audit_compare_rule(rule, &e->rule)) {
- list_del(&e->list);
- write_unlock(&auditsc_lock);
- return 0;
- }
- }
- write_unlock(&auditsc_lock);
- return -EFAULT; /* No matching rule */
- }
-
- static inline int audit_add_rule(struct audit_entry *entry,
- struct list_head *list)
- {
- write_lock(&auditsc_lock);
- if (entry->rule.flags & AUDIT_PREPEND) {
- entry->rule.flags &= ~AUDIT_PREPEND;
- list_add(&entry->list, list);
- } else {
- list_add_tail(&entry->list, list);
- }
- write_unlock(&auditsc_lock);
- return 0;
- }
-
-Following are the RCU equivalents for these two functions:
-
- static inline int audit_del_rule(struct audit_rule *rule,
- struct list_head *list)
- {
- struct audit_entry *e;
-
- /* Do not use the _rcu iterator here, since this is the only
- * deletion routine. */
- list_for_each_entry(e, list, list) {
- if (!audit_compare_rule(rule, &e->rule)) {
- list_del_rcu(&e->list);
- call_rcu(&e->rcu, audit_free_rule);
- return 0;
- }
- }
- return -EFAULT; /* No matching rule */
- }
-
- static inline int audit_add_rule(struct audit_entry *entry,
- struct list_head *list)
- {
- if (entry->rule.flags & AUDIT_PREPEND) {
- entry->rule.flags &= ~AUDIT_PREPEND;
- list_add_rcu(&entry->list, list);
- } else {
- list_add_tail_rcu(&entry->list, list);
- }
- return 0;
- }
-
-Normally, the write_lock() and write_unlock() would be replaced by
-a spin_lock() and a spin_unlock(), but in this case, all callers hold
-audit_netlink_sem, so no additional locking is required. The auditsc_lock
-can therefore be eliminated, since use of RCU eliminates the need for
-writers to exclude readers. Normally, the write_lock() calls would
-be converted into spin_lock() calls.
-
-The list_del(), list_add(), and list_add_tail() primitives have been
-replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The _rcu() list-manipulation primitives add memory barriers that are
-needed on weakly ordered CPUs (most of them!). The list_del_rcu()
-primitive omits the pointer poisoning debug-assist code that would
-otherwise cause concurrent readers to fail spectacularly.
-
-So, when readers can tolerate stale data and when entries are either added
-or deleted, without in-place modification, it is very easy to use RCU!
-
-
-Example 2: Handling In-Place Updates
-
-The system-call auditing code does not update auditing rules in place.
-However, if it did, reader-writer-locked code to do so might look as
-follows (presumably, the field_count is only permitted to decrease,
-otherwise, the added fields would need to be filled in):
-
- static inline int audit_upd_rule(struct audit_rule *rule,
- struct list_head *list,
- __u32 newaction,
- __u32 newfield_count)
- {
- struct audit_entry *e;
- struct audit_newentry *ne;
-
- write_lock(&auditsc_lock);
- /* Note: audit_netlink_sem held by caller. */
- list_for_each_entry(e, list, list) {
- if (!audit_compare_rule(rule, &e->rule)) {
- e->rule.action = newaction;
- e->rule.file_count = newfield_count;
- write_unlock(&auditsc_lock);
- return 0;
- }
- }
- write_unlock(&auditsc_lock);
- return -EFAULT; /* No matching rule */
- }
-
-The RCU version creates a copy, updates the copy, then replaces the old
-entry with the newly updated entry. This sequence of actions, allowing
-concurrent reads while doing a copy to perform an update, is what gives
-RCU ("read-copy update") its name. The RCU code is as follows:
-
- static inline int audit_upd_rule(struct audit_rule *rule,
- struct list_head *list,
- __u32 newaction,
- __u32 newfield_count)
- {
- struct audit_entry *e;
- struct audit_newentry *ne;
-
- list_for_each_entry(e, list, list) {
- if (!audit_compare_rule(rule, &e->rule)) {
- ne = kmalloc(sizeof(*entry), GFP_ATOMIC);
- if (ne == NULL)
- return -ENOMEM;
- audit_copy_rule(&ne->rule, &e->rule);
- ne->rule.action = newaction;
- ne->rule.file_count = newfield_count;
- list_replace_rcu(&e->list, &ne->list);
- call_rcu(&e->rcu, audit_free_rule);
- return 0;
- }
- }
- return -EFAULT; /* No matching rule */
- }
-
-Again, this assumes that the caller holds audit_netlink_sem. Normally,
-the reader-writer lock would become a spinlock in this sort of code.
-
-
-Example 3: Eliminating Stale Data
-
-The auditing examples above tolerate stale data, as do most algorithms
-that are tracking external state. Because there is a delay from the
-time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is normally not a problem.
-
-However, there are many examples where stale data cannot be tolerated.
-One example in the Linux kernel is the System V IPC (see the ipc_lock()
-function in ipc/util.c). This code checks a "deleted" flag under a
-per-entry spinlock, and, if the "deleted" flag is set, pretends that the
-entry does not exist. For this to be helpful, the search function must
-return holding the per-entry spinlock, as ipc_lock() does in fact do.
-
-Quick Quiz: Why does the search function need to return holding the
- per-entry lock for this deleted-flag technique to be helpful?
-
-If the system-call audit module were to ever need to reject stale data,
-one way to accomplish this would be to add a "deleted" flag and a "lock"
-spinlock to the audit_entry structure, and modify audit_filter_task()
-as follows:
-
- static enum audit_state audit_filter_task(struct task_struct *tsk)
- {
- struct audit_entry *e;
- enum audit_state state;
-
- rcu_read_lock();
- list_for_each_entry_rcu(e, &audit_tsklist, list) {
- if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
- spin_lock(&e->lock);
- if (e->deleted) {
- spin_unlock(&e->lock);
- rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
- }
- rcu_read_unlock();
- return state;
- }
- }
- rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
- }
-
-Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the
-update-in-place performed by audit_upd_rule(). For one thing,
-audit_upd_rule() would need additional memory barriers to ensure
-that the list_add_rcu() was really executed before the list_del_rcu().
-
-The audit_del_rule() function would need to set the "deleted"
-flag under the spinlock as follows:
-
- static inline int audit_del_rule(struct audit_rule *rule,
- struct list_head *list)
- {
- struct audit_entry *e;
-
- /* Do not need to use the _rcu iterator here, since this
- * is the only deletion routine. */
- list_for_each_entry(e, list, list) {
- if (!audit_compare_rule(rule, &e->rule)) {
- spin_lock(&e->lock);
- list_del_rcu(&e->list);
- e->deleted = 1;
- spin_unlock(&e->lock);
- call_rcu(&e->rcu, audit_free_rule);
- return 0;
- }
- }
- return -EFAULT; /* No matching rule */
- }
-
-
-Summary
-
-Read-mostly list-based data structures that can tolerate stale data are
-the most amenable to use of RCU. The simplest case is where entries are
-either added or deleted from the data structure (or atomically modified
-in place), but non-atomic in-place modifications can be handled by making
-a copy, updating the copy, then replacing the original with the copy.
-If stale data cannot be tolerated, then a "deleted" flag may be used
-in conjunction with a per-entry spinlock in order to allow the search
-function to reject newly deleted data.
-
-
-Answer to Quick Quiz
- Why does the search function need to return holding the per-entry
- lock for this deleted-flag technique to be helpful?
-
- If the search function drops the per-entry lock before returning,
- then the caller will be processing stale data in any case. If it
- is really OK to be processing stale data, then you don't need a
- "deleted" flag. If processing stale data really is a problem,
- then you need to hold the per-entry lock across all of the code
- that uses the value that was returned.
diff --git a/Documentation/RCU/lockdep-splat.rst b/Documentation/RCU/lockdep-splat.rst
new file mode 100644
index 000000000000..bcbc4b3c88d7
--- /dev/null
+++ b/Documentation/RCU/lockdep-splat.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Lockdep-RCU Splat
+=================
+
+Lockdep-RCU was added to the Linux kernel in early 2010
+(http://lwn.net/Articles/371986/). This facility checks for some common
+misuses of the RCU API, most notably using one of the rcu_dereference()
+family to access an RCU-protected pointer without the proper protection.
+When such misuse is detected, an lockdep-RCU splat is emitted.
+
+The usual cause of a lockdep-RCU splat is someone accessing an
+RCU-protected data structure without either (1) being in the right kind of
+RCU read-side critical section or (2) holding the right update-side lock.
+This problem can therefore be serious: it might result in random memory
+overwriting or worse. There can of course be false positives, this
+being the real world and all that.
+
+So let's look at an example RCU lockdep splat from 3.0-rc5, one that
+has long since been fixed::
+
+ =============================
+ WARNING: suspicious RCU usage
+ -----------------------------
+ block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this::
+
+ rcu_scheduler_active = 1, debug_locks = 0
+ 3 locks held by scsi_scan_6/1552:
+ #0: (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
+ scsi_scan_host_selected+0x5a/0x150
+ #1: (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
+ elevator_exit+0x22/0x60
+ #2: (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
+ cfq_exit_queue+0x43/0x190
+
+ stack backtrace:
+ Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
+ Call Trace:
+ [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
+ [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
+ [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
+ [<ffffffff812a5046>] elevator_exit+0x36/0x60
+ [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
+ [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
+ [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
+ [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
+ [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
+ [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
+ [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
+ [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
+ [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
+ [<ffffffff8145f170>] do_scan_async+0x20/0x160
+ [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
+ [<ffffffff810975b6>] kthread+0xa6/0xb0
+ [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
+ [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
+ [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
+ [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
+ [<ffffffff817db150>] ? gs_change+0xb/0xb
+
+Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows::
+
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+
+This form says that it must be in a plain vanilla RCU read-side critical
+section, but the "other info" list above shows that this is not the
+case. Instead, we hold three locks, one of which might be RCU related.
+And maybe that lock really does protect this reference. If so, the fix
+is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
+take the struct request_queue "q" from cfq_exit_queue() as an argument,
+which would permit us to invoke rcu_dereference_protected as follows::
+
+ if (rcu_dereference_protected(ioc->ioc_data,
+ lockdep_is_held(&q->queue_lock)) == cic) {
+
+With this change, there would be no lockdep-RCU splat emitted if this
+code was invoked either from within an RCU read-side critical section
+or with the ->queue_lock held. In particular, this would have suppressed
+the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
+list above).
+
+On the other hand, perhaps we really do need an RCU read-side critical
+section. In this case, the critical section must span the use of the
+return value from rcu_dereference(), or at least until there is some
+reference count incremented or some such. One way to handle this is to
+add rcu_read_lock() and rcu_read_unlock() as follows::
+
+ rcu_read_lock();
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+ spin_lock(&ioc->lock);
+ rcu_assign_pointer(ioc->ioc_data, NULL);
+ spin_unlock(&ioc->lock);
+ }
+ rcu_read_unlock();
+
+With this change, the rcu_dereference() is always within an RCU
+read-side critical section, which again would have suppressed the
+above lockdep-RCU splat.
+
+But in this particular case, we don't actually dereference the pointer
+returned from rcu_dereference(). Instead, that pointer is just compared
+to the cic pointer, which means that the rcu_dereference() can be replaced
+by rcu_access_pointer() as follows::
+
+ if (rcu_access_pointer(ioc->ioc_data) == cic) {
+
+Because it is legal to invoke rcu_access_pointer() without protection,
+this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
deleted file mode 100644
index 238e9f61352f..000000000000
--- a/Documentation/RCU/lockdep-splat.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-Lockdep-RCU was added to the Linux kernel in early 2010
-(http://lwn.net/Articles/371986/). This facility checks for some common
-misuses of the RCU API, most notably using one of the rcu_dereference()
-family to access an RCU-protected pointer without the proper protection.
-When such misuse is detected, an lockdep-RCU splat is emitted.
-
-The usual cause of a lockdep-RCU slat is someone accessing an
-RCU-protected data structure without either (1) being in the right kind of
-RCU read-side critical section or (2) holding the right update-side lock.
-This problem can therefore be serious: it might result in random memory
-overwriting or worse. There can of course be false positives, this
-being the real world and all that.
-
-So let's look at an example RCU lockdep splat from 3.0-rc5, one that
-has long since been fixed:
-
-===============================
-[ INFO: suspicious RCU usage. ]
--------------------------------
-block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
-
-other info that might help us debug this:
-
-
-rcu_scheduler_active = 1, debug_locks = 0
-3 locks held by scsi_scan_6/1552:
- #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
-scsi_scan_host_selected+0x5a/0x150
- #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
-elevator_exit+0x22/0x60
- #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
-cfq_exit_queue+0x43/0x190
-
-stack backtrace:
-Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
-Call Trace:
- [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
- [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
- [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
- [<ffffffff812a5046>] elevator_exit+0x36/0x60
- [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
- [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
- [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
- [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
- [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
- [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
- [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
- [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
- [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
- [<ffffffff8145f170>] do_scan_async+0x20/0x160
- [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
- [<ffffffff810975b6>] kthread+0xa6/0xb0
- [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
- [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
- [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
- [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
- [<ffffffff817db150>] ? gs_change+0xb/0xb
-
-Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
-
- if (rcu_dereference(ioc->ioc_data) == cic) {
-
-This form says that it must be in a plain vanilla RCU read-side critical
-section, but the "other info" list above shows that this is not the
-case. Instead, we hold three locks, one of which might be RCU related.
-And maybe that lock really does protect this reference. If so, the fix
-is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
-take the struct request_queue "q" from cfq_exit_queue() as an argument,
-which would permit us to invoke rcu_dereference_protected as follows:
-
- if (rcu_dereference_protected(ioc->ioc_data,
- lockdep_is_held(&q->queue_lock)) == cic) {
-
-With this change, there would be no lockdep-RCU splat emitted if this
-code was invoked either from within an RCU read-side critical section
-or with the ->queue_lock held. In particular, this would have suppressed
-the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
-list above).
-
-On the other hand, perhaps we really do need an RCU read-side critical
-section. In this case, the critical section must span the use of the
-return value from rcu_dereference(), or at least until there is some
-reference count incremented or some such. One way to handle this is to
-add rcu_read_lock() and rcu_read_unlock() as follows:
-
- rcu_read_lock();
- if (rcu_dereference(ioc->ioc_data) == cic) {
- spin_lock(&ioc->lock);
- rcu_assign_pointer(ioc->ioc_data, NULL);
- spin_unlock(&ioc->lock);
- }
- rcu_read_unlock();
-
-With this change, the rcu_dereference() is always within an RCU
-read-side critical section, which again would have suppressed the
-above lockdep-RCU splat.
-
-But in this particular case, we don't actually deference the pointer
-returned from rcu_dereference(). Instead, that pointer is just compared
-to the cic pointer, which means that the rcu_dereference() can be replaced
-by rcu_access_pointer() as follows:
-
- if (rcu_access_pointer(ioc->ioc_data) == cic) {
-
-Because it is legal to invoke rcu_access_pointer() without protection,
-this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
new file mode 100644
index 000000000000..741b157bbacb
--- /dev/null
+++ b/Documentation/RCU/lockdep.rst
@@ -0,0 +1,119 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+RCU and lockdep checking
+========================
+
+All flavors of RCU have lockdep checking available, so that lockdep is
+aware of when each task enters and leaves any flavor of RCU read-side
+critical section. Each flavor of RCU is tracked separately (but note
+that this is not the case in 2.6.32 and earlier). This allows lockdep's
+tracking to include RCU state, which can sometimes help when debugging
+deadlocks and the like.
+
+In addition, RCU provides the following primitives that check lockdep's
+state::
+
+ rcu_read_lock_held() for normal RCU.
+ rcu_read_lock_bh_held() for RCU-bh.
+ rcu_read_lock_sched_held() for RCU-sched.
+ rcu_read_lock_any_held() for any of normal RCU, RCU-bh, and RCU-sched.
+ srcu_read_lock_held() for SRCU.
+ rcu_read_lock_trace_held() for RCU Tasks Trace.
+
+These functions are conservative, and will therefore return 1 if they
+aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
+This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
+positives when lockdep is disabled.
+
+In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
+checking of rcu_dereference() primitives:
+
+ rcu_dereference(p):
+ Check for RCU read-side critical section.
+ rcu_dereference_bh(p):
+ Check for RCU-bh read-side critical section.
+ rcu_dereference_sched(p):
+ Check for RCU-sched read-side critical section.
+ srcu_dereference(p, sp):
+ Check for SRCU read-side critical section.
+ rcu_dereference_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_held(). This is useful in code that is
+ invoked by both RCU readers and updaters.
+ rcu_dereference_bh_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_bh_held(). This is useful in code that
+ is invoked by both RCU-bh readers and updaters.
+ rcu_dereference_sched_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_sched_held(). This is useful in code that
+ is invoked by both RCU-sched readers and updaters.
+ srcu_dereference_check(p, c):
+ Use explicit check expression "c" along with
+ srcu_read_lock_held(). This is useful in code that
+ is invoked by both SRCU readers and updaters.
+ rcu_dereference_raw(p):
+ Don't check. (Use sparingly, if at all.)
+ rcu_dereference_raw_check(p):
+ Don't do lockdep at all. (Use sparingly, if at all.)
+ rcu_dereference_protected(p, c):
+ Use explicit check expression "c", and omit all barriers
+ and compiler constraints. This is useful when the data
+ structure cannot change, for example, in code that is
+ invoked only by updaters.
+ rcu_access_pointer(p):
+ Return the value of the pointer and omit all barriers,
+ but retain the compiler constraints that prevent duplicating
+ or coalescing. This is useful when testing the
+ value of the pointer itself, for example, against NULL.
+
+The rcu_dereference_check() check expression can be any boolean
+expression, but would normally include a lockdep expression. For a
+moderately ornate example, consider the following::
+
+ file = rcu_dereference_check(fdt->fd[fd],
+ lockdep_is_held(&files->file_lock) ||
+ atomic_read(&files->count) == 1);
+
+This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
+and, if CONFIG_PROVE_RCU is configured, verifies that this expression
+is used in:
+
+1. An RCU read-side critical section (implicit), or
+2. with files->file_lock held, or
+3. on an unshared files_struct.
+
+In case (1), the pointer is picked up in an RCU-safe manner for vanilla
+RCU read-side critical sections, in case (2) the ->file_lock prevents
+any change from taking place, and finally, in case (3) the current task
+is the only task accessing the file_struct, again preventing any change
+from taking place. If the above statement was invoked only from updater
+code, it could instead be written as follows::
+
+ file = rcu_dereference_protected(fdt->fd[fd],
+ lockdep_is_held(&files->file_lock) ||
+ atomic_read(&files->count) == 1);
+
+This would verify cases #2 and #3 above, and furthermore lockdep would
+complain even if this was used in an RCU read-side critical section unless
+one of these two cases held. Because rcu_dereference_protected() omits
+all barriers and compiler constraints, it generates better code than do
+the other flavors of rcu_dereference(). On the other hand, it is illegal
+to use rcu_dereference_protected() if either the RCU-protected pointer
+or the RCU-protected data that it points to can change concurrently.
+
+Like rcu_dereference(), when lockdep is enabled, RCU list and hlist
+traversal primitives check for being called from within an RCU read-side
+critical section. However, a lockdep expression can be passed to them
+as an additional optional argument. With this lockdep expression, these
+traversal primitives will complain only if the lockdep expression is
+false and they are called from outside any RCU read-side critical section.
+
+For example, the workqueue for_each_pwq() macro is intended to be used
+either within an RCU read-side critical section or with wq->mutex held.
+It is thus implemented as follows::
+
+ #define for_each_pwq(pwq, wq)
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
+ lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
deleted file mode 100644
index da51d3068850..000000000000
--- a/Documentation/RCU/lockdep.txt
+++ /dev/null
@@ -1,102 +0,0 @@
-RCU and lockdep checking
-
-All flavors of RCU have lockdep checking available, so that lockdep is
-aware of when each task enters and leaves any flavor of RCU read-side
-critical section. Each flavor of RCU is tracked separately (but note
-that this is not the case in 2.6.32 and earlier). This allows lockdep's
-tracking to include RCU state, which can sometimes help when debugging
-deadlocks and the like.
-
-In addition, RCU provides the following primitives that check lockdep's
-state:
-
- rcu_read_lock_held() for normal RCU.
- rcu_read_lock_bh_held() for RCU-bh.
- rcu_read_lock_sched_held() for RCU-sched.
- srcu_read_lock_held() for SRCU.
-
-These functions are conservative, and will therefore return 1 if they
-aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
-This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
-positives when lockdep is disabled.
-
-In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
-checking of rcu_dereference() primitives:
-
- rcu_dereference(p):
- Check for RCU read-side critical section.
- rcu_dereference_bh(p):
- Check for RCU-bh read-side critical section.
- rcu_dereference_sched(p):
- Check for RCU-sched read-side critical section.
- srcu_dereference(p, sp):
- Check for SRCU read-side critical section.
- rcu_dereference_check(p, c):
- Use explicit check expression "c" along with
- rcu_read_lock_held(). This is useful in code that is
- invoked by both RCU readers and updaters.
- rcu_dereference_bh_check(p, c):
- Use explicit check expression "c" along with
- rcu_read_lock_bh_held(). This is useful in code that
- is invoked by both RCU-bh readers and updaters.
- rcu_dereference_sched_check(p, c):
- Use explicit check expression "c" along with
- rcu_read_lock_sched_held(). This is useful in code that
- is invoked by both RCU-sched readers and updaters.
- srcu_dereference_check(p, c):
- Use explicit check expression "c" along with
- srcu_read_lock_held()(). This is useful in code that
- is invoked by both SRCU readers and updaters.
- rcu_dereference_raw(p):
- Don't check. (Use sparingly, if at all.)
- rcu_dereference_protected(p, c):
- Use explicit check expression "c", and omit all barriers
- and compiler constraints. This is useful when the data
- structure cannot change, for example, in code that is
- invoked only by updaters.
- rcu_access_pointer(p):
- Return the value of the pointer and omit all barriers,
- but retain the compiler constraints that prevent duplicating
- or coalescsing. This is useful when when testing the
- value of the pointer itself, for example, against NULL.
-
-The rcu_dereference_check() check expression can be any boolean
-expression, but would normally include a lockdep expression. However,
-any boolean expression can be used. For a moderately ornate example,
-consider the following:
-
- file = rcu_dereference_check(fdt->fd[fd],
- lockdep_is_held(&files->file_lock) ||
- atomic_read(&files->count) == 1);
-
-This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
-and, if CONFIG_PROVE_RCU is configured, verifies that this expression
-is used in:
-
-1. An RCU read-side critical section (implicit), or
-2. with files->file_lock held, or
-3. on an unshared files_struct.
-
-In case (1), the pointer is picked up in an RCU-safe manner for vanilla
-RCU read-side critical sections, in case (2) the ->file_lock prevents
-any change from taking place, and finally, in case (3) the current task
-is the only task accessing the file_struct, again preventing any change
-from taking place. If the above statement was invoked only from updater
-code, it could instead be written as follows:
-
- file = rcu_dereference_protected(fdt->fd[fd],
- lockdep_is_held(&files->file_lock) ||
- atomic_read(&files->count) == 1);
-
-This would verify cases #2 and #3 above, and furthermore lockdep would
-complain if this was used in an RCU read-side critical section unless one
-of these two cases held. Because rcu_dereference_protected() omits all
-barriers and compiler constraints, it generates better code than do the
-other flavors of rcu_dereference(). On the other hand, it is illegal
-to use rcu_dereference_protected() if either the RCU-protected pointer
-or the RCU-protected data that it points to can change concurrently.
-
-There are currently only "universal" versions of the rcu_assign_pointer()
-and RCU list-/tree-traversal primitives, which do not (yet) check for
-being in an RCU read-side critical section. In the future, separate
-versions of these primitives might be created.
diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst
new file mode 100644
index 000000000000..bf6617b330a7
--- /dev/null
+++ b/Documentation/RCU/rcu.rst
@@ -0,0 +1,93 @@
+.. _rcu_doc:
+
+RCU Concepts
+============
+
+The basic idea behind RCU (read-copy update) is to split destructive
+operations into two parts, one that prevents anyone from seeing the data
+item being destroyed, and one that actually carries out the destruction.
+A "grace period" must elapse between the two parts, and this grace period
+must be long enough that any readers accessing the item being deleted have
+since dropped their references. For example, an RCU-protected deletion
+from a linked list would first remove the item from the list, wait for
+a grace period to elapse, then free the element. See listRCU.rst for more
+information on using RCU with linked lists.
+
+Frequently Asked Questions
+--------------------------
+
+- Why would anyone want to use RCU?
+
+ The advantage of RCU's two-part approach is that RCU readers need
+ not acquire any locks, perform any atomic instructions, write to
+ shared memory, or (on CPUs other than Alpha) execute any memory
+ barriers. The fact that these operations are quite expensive
+ on modern CPUs is what gives RCU its performance advantages
+ in read-mostly situations. The fact that RCU readers need not
+ acquire locks can also greatly simplify deadlock-avoidance code.
+
+- How can the updater tell when a grace period has completed
+ if the RCU readers give no indication when they are done?
+
+ Just as with spinlocks, RCU readers are not permitted to
+ block, switch to user-mode execution, or enter the idle loop.
+ Therefore, as soon as a CPU is seen passing through any of these
+ three states, we know that that CPU has exited any previous RCU
+ read-side critical sections. So, if we remove an item from a
+ linked list, and then wait until all CPUs have switched context,
+ executed in user mode, or executed in the idle loop, we can
+ safely free up that item.
+
+ Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
+ same effect, but require that the readers manipulate CPU-local
+ counters. These counters allow limited types of blocking within
+ RCU read-side critical sections. SRCU also uses CPU-local
+ counters, and permits general blocking within RCU read-side
+ critical sections. These variants of RCU detect grace periods
+ by sampling these counters.
+
+- If I am running on a uniprocessor kernel, which can only do one
+ thing at a time, why should I wait for a grace period?
+
+ See UP.rst for more information.
+
+- How can I see where RCU is currently used in the Linux kernel?
+
+ Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
+ "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
+ "srcu_read_unlock", "synchronize_rcu", "synchronize_net",
+ "synchronize_srcu", and the other RCU primitives. Or grab one
+ of the cscope databases from:
+
+ (http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html).
+
+- What guidelines should I follow when writing code that uses RCU?
+
+ See checklist.rst.
+
+- Why the name "RCU"?
+
+ "RCU" stands for "read-copy update".
+ listRCU.rst has more information on where this name came from, search
+ for "read-copy update" to find it.
+
+- I hear that RCU is patented? What is with that?
+
+ Yes, it is. There are several known patents related to RCU,
+ search for the string "Patent" in Documentation/RCU/RTFP.txt to find them.
+ Of these, one was allowed to lapse by the assignee, and the
+ others have been contributed to the Linux kernel under GPL.
+ Many (but not all) have long since expired.
+ There are now also LGPL implementations of user-level RCU
+ available (https://liburcu.org/).
+
+- I hear that RCU needs work in order to support realtime kernels?
+
+ Realtime-friendly RCU are enabled via the CONFIG_PREEMPTION
+ kernel configuration parameter.
+
+- Where can I find more information on RCU?
+
+ See the Documentation/RCU/RTFP.txt file.
+ Or point your browser at (https://docs.google.com/document/d/1X0lThx8OK0ZgLMqVoXiR4ZrGURHrXK6NyLRbeXe3Xac/edit)
+ or (https://docs.google.com/document/d/1GCdQC8SDbb54W1shjEXqGZ0Rq8a6kIeYutdSIajfpLA/edit?usp=sharing).
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
deleted file mode 100644
index 721b3e426515..000000000000
--- a/Documentation/RCU/rcu.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-RCU Concepts
-
-
-The basic idea behind RCU (read-copy update) is to split destructive
-operations into two parts, one that prevents anyone from seeing the data
-item being destroyed, and one that actually carries out the destruction.
-A "grace period" must elapse between the two parts, and this grace period
-must be long enough that any readers accessing the item being deleted have
-since dropped their references. For example, an RCU-protected deletion
-from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element. See the listRCU.txt
-file for more information on using RCU with linked lists.
-
-
-Frequently Asked Questions
-
-o Why would anyone want to use RCU?
-
- The advantage of RCU's two-part approach is that RCU readers need
- not acquire any locks, perform any atomic instructions, write to
- shared memory, or (on CPUs other than Alpha) execute any memory
- barriers. The fact that these operations are quite expensive
- on modern CPUs is what gives RCU its performance advantages
- in read-mostly situations. The fact that RCU readers need not
- acquire locks can also greatly simplify deadlock-avoidance code.
-
-o How can the updater tell when a grace period has completed
- if the RCU readers give no indication when they are done?
-
- Just as with spinlocks, RCU readers are not permitted to
- block, switch to user-mode execution, or enter the idle loop.
- Therefore, as soon as a CPU is seen passing through any of these
- three states, we know that that CPU has exited any previous RCU
- read-side critical sections. So, if we remove an item from a
- linked list, and then wait until all CPUs have switched context,
- executed in user mode, or executed in the idle loop, we can
- safely free up that item.
-
- Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
- same effect, but require that the readers manipulate CPU-local
- counters. These counters allow limited types of blocking within
- RCU read-side critical sections. SRCU also uses CPU-local
- counters, and permits general blocking within RCU read-side
- critical sections. These variants of RCU detect grace periods
- by sampling these counters.
-
-o If I am running on a uniprocessor kernel, which can only do one
- thing at a time, why should I wait for a grace period?
-
- See the UP.txt file in this directory.
-
-o How can I see where RCU is currently used in the Linux kernel?
-
- Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
- "rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
- "srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
- "synchronize_net", "synchronize_srcu", and the other RCU
- primitives. Or grab one of the cscope databases from:
-
- http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
-
-o What guidelines should I follow when writing code that uses RCU?
-
- See the checklist.txt file in this directory.
-
-o Why the name "RCU"?
-
- "RCU" stands for "read-copy update". The file listRCU.txt has
- more information on where this name came from, search for
- "read-copy update" to find it.
-
-o I hear that RCU is patented? What is with that?
-
- Yes, it is. There are several known patents related to RCU,
- search for the string "Patent" in RTFP.txt to find them.
- Of these, one was allowed to lapse by the assignee, and the
- others have been contributed to the Linux kernel under GPL.
- There are now also LGPL implementations of user-level RCU
- available (http://liburcu.org/).
-
-o I hear that RCU needs work in order to support realtime kernels?
-
- Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
- kernel configuration parameter.
-
-o Where can I find more information on RCU?
-
- See the RTFP.txt file in this directory.
- Or point your browser at http://www.rdrop.com/users/paulmck/RCU/.
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
new file mode 100644
index 000000000000..2524dcdadde2
--- /dev/null
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -0,0 +1,502 @@
+.. _rcu_dereference_doc:
+
+PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
+===============================================================
+
+Proper care and feeding of address and data dependencies is critically
+important to correct use of things like RCU. To this end, the pointers
+returned from the rcu_dereference() family of primitives carry address and
+data dependencies. These dependencies extend from the rcu_dereference()
+macro's load of the pointer to the later use of that pointer to compute
+either the address of a later memory access (representing an address
+dependency) or the value written by a later memory access (representing
+a data dependency).
+
+Most of the time, these dependencies are preserved, permitting you to
+freely use values from rcu_dereference(). For example, dereferencing
+(prefix "*"), field selection ("->"), assignment ("="), address-of
+("&"), casts, and addition or subtraction of constants all work quite
+naturally and safely. However, because current compilers do not take
+either address or data dependencies into account it is still possible
+to get into trouble.
+
+Follow these rules to preserve the address and data dependencies emanating
+from your calls to rcu_dereference() and friends, thus keeping your RCU
+readers working properly:
+
+- You must use one of the rcu_dereference() family of primitives
+ to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
+ will complain. Worse yet, your code can see random memory-corruption
+ bugs due to games that compilers and DEC Alpha can play.
+ Without one of the rcu_dereference() primitives, compilers
+ can reload the value, and won't your code have fun with two
+ different values for a single pointer! Without rcu_dereference(),
+ DEC Alpha can load a pointer, dereference that pointer, and
+ return data preceding initialization that preceded the store
+ of the pointer. (As noted later, in recent kernels READ_ONCE()
+ also prevents DEC Alpha from playing these tricks.)
+
+ In addition, the volatile cast in rcu_dereference() prevents the
+ compiler from deducing the resulting pointer value. Please see
+ the section entitled "EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH"
+ for an example where the compiler can in fact deduce the exact
+ value of the pointer, and thus cause misordering.
+
+- In the special case where data is added but is never removed
+ while readers are accessing the structure, READ_ONCE() may be used
+ instead of rcu_dereference(). In this case, use of READ_ONCE()
+ takes on the role of the lockless_dereference() primitive that
+ was removed in v4.15.
+
+- You are only permitted to use rcu_dereference() on pointer values.
+ The compiler simply knows too much about integral values to
+ trust it to carry dependencies through integer operations.
+ There are a very few exceptions, namely that you can temporarily
+ cast the pointer to uintptr_t in order to:
+
+ - Set bits and clear bits down in the must-be-zero low-order
+ bits of that pointer. This clearly means that the pointer
+ must have alignment constraints, for example, this does
+ *not* work in general for char* pointers.
+
+ - XOR bits to translate pointers, as is done in some
+ classic buddy-allocator algorithms.
+
+ It is important to cast the value back to pointer before
+ doing much of anything else with it.
+
+- Avoid cancellation when using the "+" and "-" infix arithmetic
+ operators. For example, for a given variable "x", avoid
+ "(x-(uintptr_t)x)" for char* pointers. The compiler is within its
+ rights to substitute zero for this sort of expression, so that
+ subsequent accesses no longer depend on the rcu_dereference(),
+ again possibly resulting in bugs due to misordering.
+
+ Of course, if "p" is a pointer from rcu_dereference(), and "a"
+ and "b" are integers that happen to be equal, the expression
+ "p+a-b" is safe because its value still necessarily depends on
+ the rcu_dereference(), thus maintaining proper ordering.
+
+- If you are using RCU to protect JITed functions, so that the
+ "()" function-invocation operator is applied to a value obtained
+ (directly or indirectly) from rcu_dereference(), you may need to
+ interact directly with the hardware to flush instruction caches.
+ This issue arises on some systems when a newly JITed function is
+ using the same memory that was used by an earlier JITed function.
+
+- Do not use the results from relational operators ("==", "!=",
+ ">", ">=", "<", or "<=") when dereferencing. For example,
+ the following (quite strange) code is buggy::
+
+ int *p;
+ int *q;
+
+ ...
+
+ p = rcu_dereference(gp)
+ q = &global_q;
+ q += p > &oom_p;
+ r1 = *q; /* BUGGY!!! */
+
+ As before, the reason this is buggy is that relational operators
+ are often compiled using branches. And as before, although
+ weak-memory machines such as ARM or PowerPC do order stores
+ after such branches, but can speculate loads, which can again
+ result in misordering bugs.
+
+- Be very careful about comparing pointers obtained from
+ rcu_dereference() against non-NULL values. As Linus Torvalds
+ explained, if the two pointers are equal, the compiler could
+ substitute the pointer you are comparing against for the pointer
+ obtained from rcu_dereference(). For example::
+
+ p = rcu_dereference(gp);
+ if (p == &default_struct)
+ do_default(p->a);
+
+ Because the compiler now knows that the value of "p" is exactly
+ the address of the variable "default_struct", it is free to
+ transform this code into the following::
+
+ p = rcu_dereference(gp);
+ if (p == &default_struct)
+ do_default(default_struct.a);
+
+ On ARM and Power hardware, the load from "default_struct.a"
+ can now be speculated, such that it might happen before the
+ rcu_dereference(). This could result in bugs due to misordering.
+
+ However, comparisons are OK in the following cases:
+
+ - The comparison was against the NULL pointer. If the
+ compiler knows that the pointer is NULL, you had better
+ not be dereferencing it anyway. If the comparison is
+ non-equal, the compiler is none the wiser. Therefore,
+ it is safe to compare pointers from rcu_dereference()
+ against NULL pointers.
+
+ - The pointer is never dereferenced after being compared.
+ Since there are no subsequent dereferences, the compiler
+ cannot use anything it learned from the comparison
+ to reorder the non-existent subsequent dereferences.
+ This sort of comparison occurs frequently when scanning
+ RCU-protected circular linked lists.
+
+ Note that if the pointer comparison is done outside
+ of an RCU read-side critical section, and the pointer
+ is never dereferenced, rcu_access_pointer() should be
+ used in place of rcu_dereference(). In most cases,
+ it is best to avoid accidental dereferences by testing
+ the rcu_access_pointer() return value directly, without
+ assigning it to a variable.
+
+ Within an RCU read-side critical section, there is little
+ reason to use rcu_access_pointer().
+
+ - The comparison is against a pointer that references memory
+ that was initialized "a long time ago." The reason
+ this is safe is that even if misordering occurs, the
+ misordering will not affect the accesses that follow
+ the comparison. So exactly how long ago is "a long
+ time ago"? Here are some possibilities:
+
+ - Compile time.
+
+ - Boot time.
+
+ - Module-init time for module code.
+
+ - Prior to kthread creation for kthread code.
+
+ - During some prior acquisition of the lock that
+ we now hold.
+
+ - Before mod_timer() time for a timer handler.
+
+ There are many other possibilities involving the Linux
+ kernel's wide array of primitives that cause code to
+ be invoked at a later time.
+
+ - The pointer being compared against also came from
+ rcu_dereference(). In this case, both pointers depend
+ on one rcu_dereference() or another, so you get proper
+ ordering either way.
+
+ That said, this situation can make certain RCU usage
+ bugs more likely to happen. Which can be a good thing,
+ at least if they happen during testing. An example
+ of such an RCU usage bug is shown in the section titled
+ "EXAMPLE OF AMPLIFIED RCU-USAGE BUG".
+
+ - All of the accesses following the comparison are stores,
+ so that a control dependency preserves the needed ordering.
+ That said, it is easy to get control dependencies wrong.
+ Please see the "CONTROL DEPENDENCIES" section of
+ Documentation/memory-barriers.txt for more details.
+
+ - The pointers are not equal *and* the compiler does
+ not have enough information to deduce the value of the
+ pointer. Note that the volatile cast in rcu_dereference()
+ will normally prevent the compiler from knowing too much.
+
+ However, please note that if the compiler knows that the
+ pointer takes on only one of two values, a not-equal
+ comparison will provide exactly the information that the
+ compiler needs to deduce the value of the pointer.
+
+- Disable any value-speculation optimizations that your compiler
+ might provide, especially if you are making use of feedback-based
+ optimizations that take data collected from prior runs. Such
+ value-speculation optimizations reorder operations by design.
+
+ There is one exception to this rule: Value-speculation
+ optimizations that leverage the branch-prediction hardware are
+ safe on strongly ordered systems (such as x86), but not on weakly
+ ordered systems (such as ARM or Power). Choose your compiler
+ command-line options wisely!
+
+
+EXAMPLE OF AMPLIFIED RCU-USAGE BUG
+----------------------------------
+
+Because updaters can run concurrently with RCU readers, RCU readers can
+see stale and/or inconsistent values. If RCU readers need fresh or
+consistent values, which they sometimes do, they need to take proper
+precautions. To see this, consider the following code fragment::
+
+ struct foo {
+ int a;
+ int b;
+ int c;
+ };
+ struct foo *gp1;
+ struct foo *gp2;
+
+ void updater(void)
+ {
+ struct foo *p;
+
+ p = kmalloc(...);
+ if (p == NULL)
+ deal_with_it();
+ p->a = 42; /* Each field in its own cache line. */
+ p->b = 43;
+ p->c = 44;
+ rcu_assign_pointer(gp1, p);
+ p->b = 143;
+ p->c = 144;
+ rcu_assign_pointer(gp2, p);
+ }
+
+ void reader(void)
+ {
+ struct foo *p;
+ struct foo *q;
+ int r1, r2;
+
+ rcu_read_lock();
+ p = rcu_dereference(gp2);
+ if (p == NULL)
+ return;
+ r1 = p->b; /* Guaranteed to get 143. */
+ q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
+ if (p == q) {
+ /* The compiler decides that q->c is same as p->c. */
+ r2 = p->c; /* Could get 44 on weakly order system. */
+ } else {
+ r2 = p->c - r1; /* Unconditional access to p->c. */
+ }
+ rcu_read_unlock();
+ do_something_with(r1, r2);
+ }
+
+You might be surprised that the outcome (r1 == 143 && r2 == 44) is possible,
+but you should not be. After all, the updater might have been invoked
+a second time between the time reader() loaded into "r1" and the time
+that it loaded into "r2". The fact that this same result can occur due
+to some reordering from the compiler and CPUs is beside the point.
+
+But suppose that the reader needs a consistent view?
+
+Then one approach is to use locking, for example, as follows::
+
+ struct foo {
+ int a;
+ int b;
+ int c;
+ spinlock_t lock;
+ };
+ struct foo *gp1;
+ struct foo *gp2;
+
+ void updater(void)
+ {
+ struct foo *p;
+
+ p = kmalloc(...);
+ if (p == NULL)
+ deal_with_it();
+ spin_lock(&p->lock);
+ p->a = 42; /* Each field in its own cache line. */
+ p->b = 43;
+ p->c = 44;
+ spin_unlock(&p->lock);
+ rcu_assign_pointer(gp1, p);
+ spin_lock(&p->lock);
+ p->b = 143;
+ p->c = 144;
+ spin_unlock(&p->lock);
+ rcu_assign_pointer(gp2, p);
+ }
+
+ void reader(void)
+ {
+ struct foo *p;
+ struct foo *q;
+ int r1, r2;
+
+ rcu_read_lock();
+ p = rcu_dereference(gp2);
+ if (p == NULL)
+ return;
+ spin_lock(&p->lock);
+ r1 = p->b; /* Guaranteed to get 143. */
+ q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
+ if (p == q) {
+ /* The compiler decides that q->c is same as p->c. */
+ r2 = p->c; /* Locking guarantees r2 == 144. */
+ } else {
+ spin_lock(&q->lock);
+ r2 = q->c - r1;
+ spin_unlock(&q->lock);
+ }
+ rcu_read_unlock();
+ spin_unlock(&p->lock);
+ do_something_with(r1, r2);
+ }
+
+As always, use the right tool for the job!
+
+
+EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH
+-----------------------------------------
+
+If a pointer obtained from rcu_dereference() compares not-equal to some
+other pointer, the compiler normally has no clue what the value of the
+first pointer might be. This lack of knowledge prevents the compiler
+from carrying out optimizations that otherwise might destroy the ordering
+guarantees that RCU depends on. And the volatile cast in rcu_dereference()
+should prevent the compiler from guessing the value.
+
+But without rcu_dereference(), the compiler knows more than you might
+expect. Consider the following code fragment::
+
+ struct foo {
+ int a;
+ int b;
+ };
+ static struct foo variable1;
+ static struct foo variable2;
+ static struct foo *gp = &variable1;
+
+ void updater(void)
+ {
+ initialize_foo(&variable2);
+ rcu_assign_pointer(gp, &variable2);
+ /*
+ * The above is the only store to gp in this translation unit,
+ * and the address of gp is not exported in any way.
+ */
+ }
+
+ int reader(void)
+ {
+ struct foo *p;
+
+ p = gp;
+ barrier();
+ if (p == &variable1)
+ return p->a; /* Must be variable1.a. */
+ else
+ return p->b; /* Must be variable2.b. */
+ }
+
+Because the compiler can see all stores to "gp", it knows that the only
+possible values of "gp" are "variable1" on the one hand and "variable2"
+on the other. The comparison in reader() therefore tells the compiler
+the exact value of "p" even in the not-equals case. This allows the
+compiler to make the return values independent of the load from "gp",
+in turn destroying the ordering between this load and the loads of the
+return values. This can result in "p->b" returning pre-initialization
+garbage values on weakly ordered systems.
+
+In short, rcu_dereference() is *not* optional when you are going to
+dereference the resulting pointer.
+
+
+WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
+------------------------------------------------------------
+
+First, please avoid using rcu_dereference_raw() and also please avoid
+using rcu_dereference_check() and rcu_dereference_protected() with a
+second argument with a constant value of 1 (or true, for that matter).
+With that caution out of the way, here is some guidance for which
+member of the rcu_dereference() to use in various situations:
+
+1. If the access needs to be within an RCU read-side critical
+ section, use rcu_dereference(). With the new consolidated
+ RCU flavors, an RCU read-side critical section is entered
+ using rcu_read_lock(), anything that disables bottom halves,
+ anything that disables interrupts, or anything that disables
+ preemption. Please note that spinlock critical sections
+ are also implied RCU read-side critical sections, even when
+ they are preemptible, as they are in kernels built with
+ CONFIG_PREEMPT_RT=y.
+
+2. If the access might be within an RCU read-side critical section
+ on the one hand, or protected by (say) my_lock on the other,
+ use rcu_dereference_check(), for example::
+
+ p1 = rcu_dereference_check(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock));
+
+
+3. If the access might be within an RCU read-side critical section
+ on the one hand, or protected by either my_lock or your_lock on
+ the other, again use rcu_dereference_check(), for example::
+
+ p1 = rcu_dereference_check(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock) ||
+ lockdep_is_held(&your_lock));
+
+4. If the access is on the update side, so that it is always protected
+ by my_lock, use rcu_dereference_protected()::
+
+ p1 = rcu_dereference_protected(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock));
+
+ This can be extended to handle multiple locks as in #3 above,
+ and both can be extended to check other conditions as well.
+
+5. If the protection is supplied by the caller, and is thus unknown
+ to this code, that is the rare case when rcu_dereference_raw()
+ is appropriate. In addition, rcu_dereference_raw() might be
+ appropriate when the lockdep expression would be excessively
+ complex, except that a better approach in that case might be to
+ take a long hard look at your synchronization design. Still,
+ there are data-locking cases where any one of a very large number
+ of locks or reference counters suffices to protect the pointer,
+ so rcu_dereference_raw() does have its place.
+
+ However, its place is probably quite a bit smaller than one
+ might expect given the number of uses in the current kernel.
+ Ditto for its synonym, rcu_dereference_check( ... , 1), and
+ its close relative, rcu_dereference_protected(... , 1).
+
+
+SPARSE CHECKING OF RCU-PROTECTED POINTERS
+-----------------------------------------
+
+The sparse static-analysis tool checks for non-RCU access to RCU-protected
+pointers, which can result in "interesting" bugs due to compiler
+optimizations involving invented loads and perhaps also load tearing.
+For example, suppose someone mistakenly does something like this::
+
+ p = q->rcu_protected_pointer;
+ do_something_with(p->a);
+ do_something_else_with(p->b);
+
+If register pressure is high, the compiler might optimize "p" out
+of existence, transforming the code to something like this::
+
+ do_something_with(q->rcu_protected_pointer->a);
+ do_something_else_with(q->rcu_protected_pointer->b);
+
+This could fatally disappoint your code if q->rcu_protected_pointer
+changed in the meantime. Nor is this a theoretical problem: Exactly
+this sort of bug cost Paul E. McKenney (and several of his innocent
+colleagues) a three-day weekend back in the early 1990s.
+
+Load tearing could of course result in dereferencing a mashup of a pair
+of pointers, which also might fatally disappoint your code.
+
+These problems could have been avoided simply by making the code instead
+read as follows::
+
+ p = rcu_dereference(q->rcu_protected_pointer);
+ do_something_with(p->a);
+ do_something_else_with(p->b);
+
+Unfortunately, these sorts of bugs can be extremely hard to spot during
+review. This is where the sparse tool comes into play, along with the
+"__rcu" marker. If you mark a pointer declaration, whether in a structure
+or as a formal parameter, with "__rcu", which tells sparse to complain if
+this pointer is accessed directly. It will also cause sparse to complain
+if a pointer not marked with "__rcu" is accessed using rcu_dereference()
+and friends. For example, ->rcu_protected_pointer might be declared as
+follows::
+
+ struct foo __rcu *rcu_protected_pointer;
+
+Use of "__rcu" is opt-in. If you choose not to use it, then you should
+ignore the sparse warnings.
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
deleted file mode 100644
index ab96227bad42..000000000000
--- a/Documentation/RCU/rcu_dereference.txt
+++ /dev/null
@@ -1,353 +0,0 @@
-PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
-
-Most of the time, you can use values from rcu_dereference() or one of
-the similar primitives without worries. Dereferencing (prefix "*"),
-field selection ("->"), assignment ("="), address-of ("&"), addition and
-subtraction of constants, and casts all work quite naturally and safely.
-
-It is nevertheless possible to get into trouble with other operations.
-Follow these rules to keep your RCU code working properly:
-
-o You must use one of the rcu_dereference() family of primitives
- to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
- will complain. Worse yet, your code can see random memory-corruption
- bugs due to games that compilers and DEC Alpha can play.
- Without one of the rcu_dereference() primitives, compilers
- can reload the value, and won't your code have fun with two
- different values for a single pointer! Without rcu_dereference(),
- DEC Alpha can load a pointer, dereference that pointer, and
- return data preceding initialization that preceded the store of
- the pointer.
-
- In addition, the volatile cast in rcu_dereference() prevents the
- compiler from deducing the resulting pointer value. Please see
- the section entitled "EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH"
- for an example where the compiler can in fact deduce the exact
- value of the pointer, and thus cause misordering.
-
-o You are only permitted to use rcu_dereference on pointer values.
- The compiler simply knows too much about integral values to
- trust it to carry dependencies through integer operations.
- There are a very few exceptions, namely that you can temporarily
- cast the pointer to uintptr_t in order to:
-
- o Set bits and clear bits down in the must-be-zero low-order
- bits of that pointer. This clearly means that the pointer
- must have alignment constraints, for example, this does
- -not- work in general for char* pointers.
-
- o XOR bits to translate pointers, as is done in some
- classic buddy-allocator algorithms.
-
- It is important to cast the value back to pointer before
- doing much of anything else with it.
-
-o Avoid cancellation when using the "+" and "-" infix arithmetic
- operators. For example, for a given variable "x", avoid
- "(x-(uintptr_t)x)" for char* pointers. The compiler is within its
- rights to substitute zero for this sort of expression, so that
- subsequent accesses no longer depend on the rcu_dereference(),
- again possibly resulting in bugs due to misordering.
-
- Of course, if "p" is a pointer from rcu_dereference(), and "a"
- and "b" are integers that happen to be equal, the expression
- "p+a-b" is safe because its value still necessarily depends on
- the rcu_dereference(), thus maintaining proper ordering.
-
-o If you are using RCU to protect JITed functions, so that the
- "()" function-invocation operator is applied to a value obtained
- (directly or indirectly) from rcu_dereference(), you may need to
- interact directly with the hardware to flush instruction caches.
- This issue arises on some systems when a newly JITed function is
- using the same memory that was used by an earlier JITed function.
-
-o Do not use the results from relational operators ("==", "!=",
- ">", ">=", "<", or "<=") when dereferencing. For example,
- the following (quite strange) code is buggy:
-
- int *p;
- int *q;
-
- ...
-
- p = rcu_dereference(gp)
- q = &global_q;
- q += p > &oom_p;
- r1 = *q; /* BUGGY!!! */
-
- As before, the reason this is buggy is that relational operators
- are often compiled using branches. And as before, although
- weak-memory machines such as ARM or PowerPC do order stores
- after such branches, but can speculate loads, which can again
- result in misordering bugs.
-
-o Be very careful about comparing pointers obtained from
- rcu_dereference() against non-NULL values. As Linus Torvalds
- explained, if the two pointers are equal, the compiler could
- substitute the pointer you are comparing against for the pointer
- obtained from rcu_dereference(). For example:
-
- p = rcu_dereference(gp);
- if (p == &default_struct)
- do_default(p->a);
-
- Because the compiler now knows that the value of "p" is exactly
- the address of the variable "default_struct", it is free to
- transform this code into the following:
-
- p = rcu_dereference(gp);
- if (p == &default_struct)
- do_default(default_struct.a);
-
- On ARM and Power hardware, the load from "default_struct.a"
- can now be speculated, such that it might happen before the
- rcu_dereference(). This could result in bugs due to misordering.
-
- However, comparisons are OK in the following cases:
-
- o The comparison was against the NULL pointer. If the
- compiler knows that the pointer is NULL, you had better
- not be dereferencing it anyway. If the comparison is
- non-equal, the compiler is none the wiser. Therefore,
- it is safe to compare pointers from rcu_dereference()
- against NULL pointers.
-
- o The pointer is never dereferenced after being compared.
- Since there are no subsequent dereferences, the compiler
- cannot use anything it learned from the comparison
- to reorder the non-existent subsequent dereferences.
- This sort of comparison occurs frequently when scanning
- RCU-protected circular linked lists.
-
- Note that if checks for being within an RCU read-side
- critical section are not required and the pointer is never
- dereferenced, rcu_access_pointer() should be used in place
- of rcu_dereference().
-
- o The comparison is against a pointer that references memory
- that was initialized "a long time ago." The reason
- this is safe is that even if misordering occurs, the
- misordering will not affect the accesses that follow
- the comparison. So exactly how long ago is "a long
- time ago"? Here are some possibilities:
-
- o Compile time.
-
- o Boot time.
-
- o Module-init time for module code.
-
- o Prior to kthread creation for kthread code.
-
- o During some prior acquisition of the lock that
- we now hold.
-
- o Before mod_timer() time for a timer handler.
-
- There are many other possibilities involving the Linux
- kernel's wide array of primitives that cause code to
- be invoked at a later time.
-
- o The pointer being compared against also came from
- rcu_dereference(). In this case, both pointers depend
- on one rcu_dereference() or another, so you get proper
- ordering either way.
-
- That said, this situation can make certain RCU usage
- bugs more likely to happen. Which can be a good thing,
- at least if they happen during testing. An example
- of such an RCU usage bug is shown in the section titled
- "EXAMPLE OF AMPLIFIED RCU-USAGE BUG".
-
- o All of the accesses following the comparison are stores,
- so that a control dependency preserves the needed ordering.
- That said, it is easy to get control dependencies wrong.
- Please see the "CONTROL DEPENDENCIES" section of
- Documentation/memory-barriers.txt for more details.
-
- o The pointers are not equal -and- the compiler does
- not have enough information to deduce the value of the
- pointer. Note that the volatile cast in rcu_dereference()
- will normally prevent the compiler from knowing too much.
-
- However, please note that if the compiler knows that the
- pointer takes on only one of two values, a not-equal
- comparison will provide exactly the information that the
- compiler needs to deduce the value of the pointer.
-
-o Disable any value-speculation optimizations that your compiler
- might provide, especially if you are making use of feedback-based
- optimizations that take data collected from prior runs. Such
- value-speculation optimizations reorder operations by design.
-
- There is one exception to this rule: Value-speculation
- optimizations that leverage the branch-prediction hardware are
- safe on strongly ordered systems (such as x86), but not on weakly
- ordered systems (such as ARM or Power). Choose your compiler
- command-line options wisely!
-
-
-EXAMPLE OF AMPLIFIED RCU-USAGE BUG
-
-Because updaters can run concurrently with RCU readers, RCU readers can
-see stale and/or inconsistent values. If RCU readers need fresh or
-consistent values, which they sometimes do, they need to take proper
-precautions. To see this, consider the following code fragment:
-
- struct foo {
- int a;
- int b;
- int c;
- };
- struct foo *gp1;
- struct foo *gp2;
-
- void updater(void)
- {
- struct foo *p;
-
- p = kmalloc(...);
- if (p == NULL)
- deal_with_it();
- p->a = 42; /* Each field in its own cache line. */
- p->b = 43;
- p->c = 44;
- rcu_assign_pointer(gp1, p);
- p->b = 143;
- p->c = 144;
- rcu_assign_pointer(gp2, p);
- }
-
- void reader(void)
- {
- struct foo *p;
- struct foo *q;
- int r1, r2;
-
- p = rcu_dereference(gp2);
- if (p == NULL)
- return;
- r1 = p->b; /* Guaranteed to get 143. */
- q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
- if (p == q) {
- /* The compiler decides that q->c is same as p->c. */
- r2 = p->c; /* Could get 44 on weakly order system. */
- }
- do_something_with(r1, r2);
- }
-
-You might be surprised that the outcome (r1 == 143 && r2 == 44) is possible,
-but you should not be. After all, the updater might have been invoked
-a second time between the time reader() loaded into "r1" and the time
-that it loaded into "r2". The fact that this same result can occur due
-to some reordering from the compiler and CPUs is beside the point.
-
-But suppose that the reader needs a consistent view?
-
-Then one approach is to use locking, for example, as follows:
-
- struct foo {
- int a;
- int b;
- int c;
- spinlock_t lock;
- };
- struct foo *gp1;
- struct foo *gp2;
-
- void updater(void)
- {
- struct foo *p;
-
- p = kmalloc(...);
- if (p == NULL)
- deal_with_it();
- spin_lock(&p->lock);
- p->a = 42; /* Each field in its own cache line. */
- p->b = 43;
- p->c = 44;
- spin_unlock(&p->lock);
- rcu_assign_pointer(gp1, p);
- spin_lock(&p->lock);
- p->b = 143;
- p->c = 144;
- spin_unlock(&p->lock);
- rcu_assign_pointer(gp2, p);
- }
-
- void reader(void)
- {
- struct foo *p;
- struct foo *q;
- int r1, r2;
-
- p = rcu_dereference(gp2);
- if (p == NULL)
- return;
- spin_lock(&p->lock);
- r1 = p->b; /* Guaranteed to get 143. */
- q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
- if (p == q) {
- /* The compiler decides that q->c is same as p->c. */
- r2 = p->c; /* Locking guarantees r2 == 144. */
- }
- spin_unlock(&p->lock);
- do_something_with(r1, r2);
- }
-
-As always, use the right tool for the job!
-
-
-EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH
-
-If a pointer obtained from rcu_dereference() compares not-equal to some
-other pointer, the compiler normally has no clue what the value of the
-first pointer might be. This lack of knowledge prevents the compiler
-from carrying out optimizations that otherwise might destroy the ordering
-guarantees that RCU depends on. And the volatile cast in rcu_dereference()
-should prevent the compiler from guessing the value.
-
-But without rcu_dereference(), the compiler knows more than you might
-expect. Consider the following code fragment:
-
- struct foo {
- int a;
- int b;
- };
- static struct foo variable1;
- static struct foo variable2;
- static struct foo *gp = &variable1;
-
- void updater(void)
- {
- initialize_foo(&variable2);
- rcu_assign_pointer(gp, &variable2);
- /*
- * The above is the only store to gp in this translation unit,
- * and the address of gp is not exported in any way.
- */
- }
-
- int reader(void)
- {
- struct foo *p;
-
- p = gp;
- barrier();
- if (p == &variable1)
- return p->a; /* Must be variable1.a. */
- else
- return p->b; /* Must be variable2.b. */
- }
-
-Because the compiler can see all stores to "gp", it knows that the only
-possible values of "gp" are "variable1" on the one hand and "variable2"
-on the other. The comparison in reader() therefore tells the compiler
-the exact value of "p" even in the not-equals case. This allows the
-compiler to make the return values independent of the load from "gp",
-in turn destroying the ordering between this load and the loads of the
-return values. This can result in "p->b" returning pre-initialization
-garbage values.
-
-In short, rcu_dereference() is -not- optional when you are going to
-dereference the resulting pointer.
diff --git a/Documentation/RCU/rcubarrier.rst b/Documentation/RCU/rcubarrier.rst
new file mode 100644
index 000000000000..12a7b059654f
--- /dev/null
+++ b/Documentation/RCU/rcubarrier.rst
@@ -0,0 +1,377 @@
+.. _rcu_barrier:
+
+RCU and Unloadable Modules
+==========================
+
+[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
+
+RCU updaters sometimes use call_rcu() to initiate an asynchronous wait for
+a grace period to elapse. This primitive takes a pointer to an rcu_head
+struct placed within the RCU-protected data structure and another pointer
+to a function that may be invoked later to free that structure. Code to
+delete an element p from the linked list from IRQ context might then be
+as follows::
+
+ list_del_rcu(p);
+ call_rcu(&p->rcu, p_callback);
+
+Since call_rcu() never blocks, this code can safely be used from within
+IRQ context. The function p_callback() might be defined as follows::
+
+ static void p_callback(struct rcu_head *rp)
+ {
+ struct pstruct *p = container_of(rp, struct pstruct, rcu);
+
+ kfree(p);
+ }
+
+
+Unloading Modules That Use call_rcu()
+-------------------------------------
+
+But what if the p_callback() function is defined in an unloadable module?
+
+If we unload the module while some RCU callbacks are pending,
+the CPUs executing these callbacks are going to be severely
+disappointed when they are later invoked, as fancifully depicted at
+http://lwn.net/images/ns/kernel/rcu-drop.jpg.
+
+We could try placing a synchronize_rcu() in the module-exit code path,
+but this is not sufficient. Although synchronize_rcu() does wait for a
+grace period to elapse, it does not wait for the callbacks to complete.
+
+One might be tempted to try several back-to-back synchronize_rcu()
+calls, but this is still not guaranteed to work. If there is a very
+heavy RCU-callback load, then some of the callbacks might be deferred in
+order to allow other processing to proceed. For but one example, such
+deferral is required in realtime kernels in order to avoid excessive
+scheduling latencies.
+
+
+rcu_barrier()
+-------------
+
+This situation can be handled by the rcu_barrier() primitive. Rather
+than waiting for a grace period to elapse, rcu_barrier() waits for all
+outstanding RCU callbacks to complete. Please note that rcu_barrier()
+does **not** imply synchronize_rcu(), in particular, if there are no RCU
+callbacks queued anywhere, rcu_barrier() is within its rights to return
+immediately, without waiting for anything, let alone a grace period.
+
+Pseudo-code using rcu_barrier() is as follows:
+
+ 1. Prevent any new RCU callbacks from being posted.
+ 2. Execute rcu_barrier().
+ 3. Allow the module to be unloaded.
+
+There is also an srcu_barrier() function for SRCU, and you of course
+must match the flavor of srcu_barrier() with that of call_srcu().
+If your module uses multiple srcu_struct structures, then it must also
+use multiple invocations of srcu_barrier() when unloading that module.
+For example, if it uses call_rcu(), call_srcu() on srcu_struct_1, and
+call_srcu() on srcu_struct_2, then the following three lines of code
+will be required when unloading::
+
+ 1 rcu_barrier();
+ 2 srcu_barrier(&srcu_struct_1);
+ 3 srcu_barrier(&srcu_struct_2);
+
+If latency is of the essence, workqueues could be used to run these
+three functions concurrently.
+
+An ancient version of the rcutorture module makes use of rcu_barrier()
+in its exit function as follows::
+
+ 1 static void
+ 2 rcu_torture_cleanup(void)
+ 3 {
+ 4 int i;
+ 5
+ 6 fullstop = 1;
+ 7 if (shuffler_task != NULL) {
+ 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
+ 9 kthread_stop(shuffler_task);
+ 10 }
+ 11 shuffler_task = NULL;
+ 12
+ 13 if (writer_task != NULL) {
+ 14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
+ 15 kthread_stop(writer_task);
+ 16 }
+ 17 writer_task = NULL;
+ 18
+ 19 if (reader_tasks != NULL) {
+ 20 for (i = 0; i < nrealreaders; i++) {
+ 21 if (reader_tasks[i] != NULL) {
+ 22 VERBOSE_PRINTK_STRING(
+ 23 "Stopping rcu_torture_reader task");
+ 24 kthread_stop(reader_tasks[i]);
+ 25 }
+ 26 reader_tasks[i] = NULL;
+ 27 }
+ 28 kfree(reader_tasks);
+ 29 reader_tasks = NULL;
+ 30 }
+ 31 rcu_torture_current = NULL;
+ 32
+ 33 if (fakewriter_tasks != NULL) {
+ 34 for (i = 0; i < nfakewriters; i++) {
+ 35 if (fakewriter_tasks[i] != NULL) {
+ 36 VERBOSE_PRINTK_STRING(
+ 37 "Stopping rcu_torture_fakewriter task");
+ 38 kthread_stop(fakewriter_tasks[i]);
+ 39 }
+ 40 fakewriter_tasks[i] = NULL;
+ 41 }
+ 42 kfree(fakewriter_tasks);
+ 43 fakewriter_tasks = NULL;
+ 44 }
+ 45
+ 46 if (stats_task != NULL) {
+ 47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
+ 48 kthread_stop(stats_task);
+ 49 }
+ 50 stats_task = NULL;
+ 51
+ 52 /* Wait for all RCU callbacks to fire. */
+ 53 rcu_barrier();
+ 54
+ 55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
+ 56
+ 57 if (cur_ops->cleanup != NULL)
+ 58 cur_ops->cleanup();
+ 59 if (atomic_read(&n_rcu_torture_error))
+ 60 rcu_torture_print_module_parms("End of test: FAILURE");
+ 61 else
+ 62 rcu_torture_print_module_parms("End of test: SUCCESS");
+ 63 }
+
+Line 6 sets a global variable that prevents any RCU callbacks from
+re-posting themselves. This will not be necessary in most cases, since
+RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
+module is an exception to this rule, and therefore needs to set this
+global variable.
+
+Lines 7-50 stop all the kernel tasks associated with the rcutorture
+module. Therefore, once execution reaches line 53, no more rcutorture
+RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
+for any pre-existing callbacks to complete.
+
+Then lines 55-62 print status and do operation-specific cleanup, and
+then return, permitting the module-unload operation to be completed.
+
+.. _rcubarrier_quiz_1:
+
+Quick Quiz #1:
+ Is there any other situation where rcu_barrier() might
+ be required?
+
+:ref:`Answer to Quick Quiz #1 <answer_rcubarrier_quiz_1>`
+
+Your module might have additional complications. For example, if your
+module invokes call_rcu() from timers, you will need to first refrain
+from posting new timers, cancel (or wait for) all the already-posted
+timers, and only then invoke rcu_barrier() to wait for any remaining
+RCU callbacks to complete.
+
+Of course, if your module uses call_rcu(), you will need to invoke
+rcu_barrier() before unloading. Similarly, if your module uses
+call_srcu(), you will need to invoke srcu_barrier() before unloading,
+and on the same srcu_struct structure. If your module uses call_rcu()
+**and** call_srcu(), then (as noted above) you will need to invoke
+rcu_barrier() **and** srcu_barrier().
+
+
+Implementing rcu_barrier()
+--------------------------
+
+Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
+that RCU callbacks are never reordered once queued on one of the per-CPU
+queues. His implementation queues an RCU callback on each of the per-CPU
+callback queues, and then waits until they have all started executing, at
+which point, all earlier RCU callbacks are guaranteed to have completed.
+
+The original code for rcu_barrier() was roughly as follows::
+
+ 1 void rcu_barrier(void)
+ 2 {
+ 3 BUG_ON(in_interrupt());
+ 4 /* Take cpucontrol mutex to protect against CPU hotplug */
+ 5 mutex_lock(&rcu_barrier_mutex);
+ 6 init_completion(&rcu_barrier_completion);
+ 7 atomic_set(&rcu_barrier_cpu_count, 1);
+ 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+ 9 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+ 10 complete(&rcu_barrier_completion);
+ 11 wait_for_completion(&rcu_barrier_completion);
+ 12 mutex_unlock(&rcu_barrier_mutex);
+ 13 }
+
+Line 3 verifies that the caller is in process context, and lines 5 and 12
+use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
+global completion and counters at a time, which are initialized on lines
+6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
+shown below. Note that the final "1" in on_each_cpu()'s argument list
+ensures that all the calls to rcu_barrier_func() will have completed
+before on_each_cpu() returns. Line 9 removes the initial count from
+rcu_barrier_cpu_count, and if this count is now zero, line 10 finalizes
+the completion, which prevents line 11 from blocking. Either way,
+line 11 then waits (if needed) for the completion.
+
+.. _rcubarrier_quiz_2:
+
+Quick Quiz #2:
+ Why doesn't line 8 initialize rcu_barrier_cpu_count to zero,
+ thereby avoiding the need for lines 9 and 10?
+
+:ref:`Answer to Quick Quiz #2 <answer_rcubarrier_quiz_2>`
+
+This code was rewritten in 2008 and several times thereafter, but this
+still gives the general idea.
+
+The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
+to post an RCU callback, as follows::
+
+ 1 static void rcu_barrier_func(void *notused)
+ 2 {
+ 3 int cpu = smp_processor_id();
+ 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ 5 struct rcu_head *head;
+ 6
+ 7 head = &rdp->barrier;
+ 8 atomic_inc(&rcu_barrier_cpu_count);
+ 9 call_rcu(head, rcu_barrier_callback);
+ 10 }
+
+Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
+which contains the struct rcu_head that needed for the later call to
+call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
+8 increments the global counter. This counter will later be decremented
+by the callback. Line 9 then registers the rcu_barrier_callback() on
+the current CPU's queue.
+
+The rcu_barrier_callback() function simply atomically decrements the
+rcu_barrier_cpu_count variable and finalizes the completion when it
+reaches zero, as follows::
+
+ 1 static void rcu_barrier_callback(struct rcu_head *notused)
+ 2 {
+ 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+ 4 complete(&rcu_barrier_completion);
+ 5 }
+
+.. _rcubarrier_quiz_3:
+
+Quick Quiz #3:
+ What happens if CPU 0's rcu_barrier_func() executes
+ immediately (thus incrementing rcu_barrier_cpu_count to the
+ value one), but the other CPU's rcu_barrier_func() invocations
+ are delayed for a full grace period? Couldn't this result in
+ rcu_barrier() returning prematurely?
+
+:ref:`Answer to Quick Quiz #3 <answer_rcubarrier_quiz_3>`
+
+The current rcu_barrier() implementation is more complex, due to the need
+to avoid disturbing idle CPUs (especially on battery-powered systems)
+and the need to minimally disturb non-idle CPUs in real-time systems.
+In addition, a great many optimizations have been applied. However,
+the code above illustrates the concepts.
+
+
+rcu_barrier() Summary
+---------------------
+
+The rcu_barrier() primitive is used relatively infrequently, since most
+code using RCU is in the core kernel rather than in modules. However, if
+you are using RCU from an unloadable module, you need to use rcu_barrier()
+so that your module may be safely unloaded.
+
+
+Answers to Quick Quizzes
+------------------------
+
+.. _answer_rcubarrier_quiz_1:
+
+Quick Quiz #1:
+ Is there any other situation where rcu_barrier() might
+ be required?
+
+Answer:
+ Interestingly enough, rcu_barrier() was not originally
+ implemented for module unloading. Nikita Danilov was using
+ RCU in a filesystem, which resulted in a similar situation at
+ filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
+ in response, so that Nikita could invoke it during the
+ filesystem-unmount process.
+
+ Much later, yours truly hit the RCU module-unload problem when
+ implementing rcutorture, and found that rcu_barrier() solves
+ this problem as well.
+
+:ref:`Back to Quick Quiz #1 <rcubarrier_quiz_1>`
+
+.. _answer_rcubarrier_quiz_2:
+
+Quick Quiz #2:
+ Why doesn't line 8 initialize rcu_barrier_cpu_count to zero,
+ thereby avoiding the need for lines 9 and 10?
+
+Answer:
+ Suppose that the on_each_cpu() function shown on line 8 was
+ delayed, so that CPU 0's rcu_barrier_func() executed and
+ the corresponding grace period elapsed, all before CPU 1's
+ rcu_barrier_func() started executing. This would result in
+ rcu_barrier_cpu_count being decremented to zero, so that line
+ 11's wait_for_completion() would return immediately, failing to
+ wait for CPU 1's callbacks to be invoked.
+
+ Note that this was not a problem when the rcu_barrier() code
+ was first added back in 2005. This is because on_each_cpu()
+ disables preemption, which acted as an RCU read-side critical
+ section, thus preventing CPU 0's grace period from completing
+ until on_each_cpu() had dealt with all of the CPUs.
+
+ However, with the RCU flavor consolidation around v4.20, this
+ possibility was once again ruled out, because the consolidated
+ RCU once again waits on nonpreemptible regions of code.
+
+ Nevertheless, that extra count might still be a good idea.
+ Relying on these sort of accidents of implementation can result
+ in later surprise bugs when the implementation changes.
+
+:ref:`Back to Quick Quiz #2 <rcubarrier_quiz_2>`
+
+.. _answer_rcubarrier_quiz_3:
+
+Quick Quiz #3:
+ What happens if CPU 0's rcu_barrier_func() executes
+ immediately (thus incrementing rcu_barrier_cpu_count to the
+ value one), but the other CPU's rcu_barrier_func() invocations
+ are delayed for a full grace period? Couldn't this result in
+ rcu_barrier() returning prematurely?
+
+Answer:
+ This cannot happen. The reason is that on_each_cpu() has its last
+ argument, the wait flag, set to "1". This flag is passed through
+ to smp_call_function() and further to smp_call_function_on_cpu(),
+ causing this latter to spin until the cross-CPU invocation of
+ rcu_barrier_func() has completed. This by itself would prevent
+ a grace period from completing on non-CONFIG_PREEMPTION kernels,
+ since each CPU must undergo a context switch (or other quiescent
+ state) before the grace period can complete. However, this is
+ of no use in CONFIG_PREEMPTION kernels.
+
+ Therefore, on_each_cpu() disables preemption across its call
+ to smp_call_function() and also across the local call to
+ rcu_barrier_func(). Because recent RCU implementations treat
+ preemption-disabled regions of code as RCU read-side critical
+ sections, this prevents grace periods from completing. This
+ means that all CPUs have executed rcu_barrier_func() before
+ the first rcu_barrier_callback() can possibly execute, in turn
+ preventing rcu_barrier_cpu_count from prematurely reaching zero.
+
+ But if on_each_cpu() ever decides to forgo disabling preemption,
+ as might well happen due to real-time latency considerations,
+ initializing rcu_barrier_cpu_count to one will save the day.
+
+:ref:`Back to Quick Quiz #3 <rcubarrier_quiz_3>`
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
deleted file mode 100644
index 5d7759071a3e..000000000000
--- a/Documentation/RCU/rcubarrier.txt
+++ /dev/null
@@ -1,326 +0,0 @@
-RCU and Unloadable Modules
-
-[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
-
-RCU (read-copy update) is a synchronization mechanism that can be thought
-of as a replacement for read-writer locking (among other things), but with
-very low-overhead readers that are immune to deadlock, priority inversion,
-and unbounded latency. RCU read-side critical sections are delimited
-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
-kernels, generate no code whatsoever.
-
-This means that RCU writers are unaware of the presence of concurrent
-readers, so that RCU updates to shared data must be undertaken quite
-carefully, leaving an old version of the data structure in place until all
-pre-existing readers have finished. These old versions are needed because
-such readers might hold a reference to them. RCU updates can therefore be
-rather expensive, and RCU is thus best suited for read-mostly situations.
-
-How can an RCU writer possibly determine when all readers are finished,
-given that readers might well leave absolutely no trace of their
-presence? There is a synchronize_rcu() primitive that blocks until all
-pre-existing readers have completed. An updater wishing to delete an
-element p from a linked list might do the following, while holding an
-appropriate lock, of course:
-
- list_del_rcu(p);
- synchronize_rcu();
- kfree(p);
-
-But the above code cannot be used in IRQ context -- the call_rcu()
-primitive must be used instead. This primitive takes a pointer to an
-rcu_head struct placed within the RCU-protected data structure and
-another pointer to a function that may be invoked later to free that
-structure. Code to delete an element p from the linked list from IRQ
-context might then be as follows:
-
- list_del_rcu(p);
- call_rcu(&p->rcu, p_callback);
-
-Since call_rcu() never blocks, this code can safely be used from within
-IRQ context. The function p_callback() might be defined as follows:
-
- static void p_callback(struct rcu_head *rp)
- {
- struct pstruct *p = container_of(rp, struct pstruct, rcu);
-
- kfree(p);
- }
-
-
-Unloading Modules That Use call_rcu()
-
-But what if p_callback is defined in an unloadable module?
-
-If we unload the module while some RCU callbacks are pending,
-the CPUs executing these callbacks are going to be severely
-disappointed when they are later invoked, as fancifully depicted at
-http://lwn.net/images/ns/kernel/rcu-drop.jpg.
-
-We could try placing a synchronize_rcu() in the module-exit code path,
-but this is not sufficient. Although synchronize_rcu() does wait for a
-grace period to elapse, it does not wait for the callbacks to complete.
-
-One might be tempted to try several back-to-back synchronize_rcu()
-calls, but this is still not guaranteed to work. If there is a very
-heavy RCU-callback load, then some of the callbacks might be deferred
-in order to allow other processing to proceed. Such deferral is required
-in realtime kernels in order to avoid excessive scheduling latencies.
-
-
-rcu_barrier()
-
-We instead need the rcu_barrier() primitive. Rather than waiting for
-a grace period to elapse, rcu_barrier() waits for all outstanding RCU
-callbacks to complete. Please note that rcu_barrier() does -not- imply
-synchronize_rcu(), in particular, if there are no RCU callbacks queued
-anywhere, rcu_barrier() is within its rights to return immediately,
-without waiting for a grace period to elapse.
-
-Pseudo-code using rcu_barrier() is as follows:
-
- 1. Prevent any new RCU callbacks from being posted.
- 2. Execute rcu_barrier().
- 3. Allow the module to be unloaded.
-
-There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
-functions for the other flavors of RCU, and you of course must match
-the flavor of rcu_barrier() with that of call_rcu(). If your module
-uses multiple flavors of call_rcu(), then it must also use multiple
-flavors of rcu_barrier() when unloading that module. For example, if
-it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
-srcu_struct_2(), then the following three lines of code will be required
-when unloading:
-
- 1 rcu_barrier_bh();
- 2 srcu_barrier(&srcu_struct_1);
- 3 srcu_barrier(&srcu_struct_2);
-
-The rcutorture module makes use of rcu_barrier() in its exit function
-as follows:
-
- 1 static void
- 2 rcu_torture_cleanup(void)
- 3 {
- 4 int i;
- 5
- 6 fullstop = 1;
- 7 if (shuffler_task != NULL) {
- 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
- 9 kthread_stop(shuffler_task);
-10 }
-11 shuffler_task = NULL;
-12
-13 if (writer_task != NULL) {
-14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
-15 kthread_stop(writer_task);
-16 }
-17 writer_task = NULL;
-18
-19 if (reader_tasks != NULL) {
-20 for (i = 0; i < nrealreaders; i++) {
-21 if (reader_tasks[i] != NULL) {
-22 VERBOSE_PRINTK_STRING(
-23 "Stopping rcu_torture_reader task");
-24 kthread_stop(reader_tasks[i]);
-25 }
-26 reader_tasks[i] = NULL;
-27 }
-28 kfree(reader_tasks);
-29 reader_tasks = NULL;
-30 }
-31 rcu_torture_current = NULL;
-32
-33 if (fakewriter_tasks != NULL) {
-34 for (i = 0; i < nfakewriters; i++) {
-35 if (fakewriter_tasks[i] != NULL) {
-36 VERBOSE_PRINTK_STRING(
-37 "Stopping rcu_torture_fakewriter task");
-38 kthread_stop(fakewriter_tasks[i]);
-39 }
-40 fakewriter_tasks[i] = NULL;
-41 }
-42 kfree(fakewriter_tasks);
-43 fakewriter_tasks = NULL;
-44 }
-45
-46 if (stats_task != NULL) {
-47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
-48 kthread_stop(stats_task);
-49 }
-50 stats_task = NULL;
-51
-52 /* Wait for all RCU callbacks to fire. */
-53 rcu_barrier();
-54
-55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
-56
-57 if (cur_ops->cleanup != NULL)
-58 cur_ops->cleanup();
-59 if (atomic_read(&n_rcu_torture_error))
-60 rcu_torture_print_module_parms("End of test: FAILURE");
-61 else
-62 rcu_torture_print_module_parms("End of test: SUCCESS");
-63 }
-
-Line 6 sets a global variable that prevents any RCU callbacks from
-re-posting themselves. This will not be necessary in most cases, since
-RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
-module is an exception to this rule, and therefore needs to set this
-global variable.
-
-Lines 7-50 stop all the kernel tasks associated with the rcutorture
-module. Therefore, once execution reaches line 53, no more rcutorture
-RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
-for any pre-existing callbacks to complete.
-
-Then lines 55-62 print status and do operation-specific cleanup, and
-then return, permitting the module-unload operation to be completed.
-
-Quick Quiz #1: Is there any other situation where rcu_barrier() might
- be required?
-
-Your module might have additional complications. For example, if your
-module invokes call_rcu() from timers, you will need to first cancel all
-the timers, and only then invoke rcu_barrier() to wait for any remaining
-RCU callbacks to complete.
-
-Of course, if you module uses call_rcu_bh(), you will need to invoke
-rcu_barrier_bh() before unloading. Similarly, if your module uses
-call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
-unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
-call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
-rcu_barrier_bh(), and rcu_barrier_sched().
-
-
-Implementing rcu_barrier()
-
-Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
-that RCU callbacks are never reordered once queued on one of the per-CPU
-queues. His implementation queues an RCU callback on each of the per-CPU
-callback queues, and then waits until they have all started executing, at
-which point, all earlier RCU callbacks are guaranteed to have completed.
-
-The original code for rcu_barrier() was as follows:
-
- 1 void rcu_barrier(void)
- 2 {
- 3 BUG_ON(in_interrupt());
- 4 /* Take cpucontrol mutex to protect against CPU hotplug */
- 5 mutex_lock(&rcu_barrier_mutex);
- 6 init_completion(&rcu_barrier_completion);
- 7 atomic_set(&rcu_barrier_cpu_count, 0);
- 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
- 9 wait_for_completion(&rcu_barrier_completion);
-10 mutex_unlock(&rcu_barrier_mutex);
-11 }
-
-Line 3 verifies that the caller is in process context, and lines 5 and 10
-use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
-global completion and counters at a time, which are initialized on lines
-6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
-shown below. Note that the final "1" in on_each_cpu()'s argument list
-ensures that all the calls to rcu_barrier_func() will have completed
-before on_each_cpu() returns. Line 9 then waits for the completion.
-
-This code was rewritten in 2008 to support rcu_barrier_bh() and
-rcu_barrier_sched() in addition to the original rcu_barrier().
-
-The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
-to post an RCU callback, as follows:
-
- 1 static void rcu_barrier_func(void *notused)
- 2 {
- 3 int cpu = smp_processor_id();
- 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
- 5 struct rcu_head *head;
- 6
- 7 head = &rdp->barrier;
- 8 atomic_inc(&rcu_barrier_cpu_count);
- 9 call_rcu(head, rcu_barrier_callback);
-10 }
-
-Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
-which contains the struct rcu_head that needed for the later call to
-call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
-8 increments a global counter. This counter will later be decremented
-by the callback. Line 9 then registers the rcu_barrier_callback() on
-the current CPU's queue.
-
-The rcu_barrier_callback() function simply atomically decrements the
-rcu_barrier_cpu_count variable and finalizes the completion when it
-reaches zero, as follows:
-
- 1 static void rcu_barrier_callback(struct rcu_head *notused)
- 2 {
- 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
- 4 complete(&rcu_barrier_completion);
- 5 }
-
-Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
- immediately (thus incrementing rcu_barrier_cpu_count to the
- value one), but the other CPU's rcu_barrier_func() invocations
- are delayed for a full grace period? Couldn't this result in
- rcu_barrier() returning prematurely?
-
-The current rcu_barrier() implementation is more complex, due to the need
-to avoid disturbing idle CPUs (especially on battery-powered systems)
-and the need to minimally disturb non-idle CPUs in real-time systems.
-However, the code above illustrates the concepts.
-
-
-rcu_barrier() Summary
-
-The rcu_barrier() primitive has seen relatively little use, since most
-code using RCU is in the core kernel rather than in modules. However, if
-you are using RCU from an unloadable module, you need to use rcu_barrier()
-so that your module may be safely unloaded.
-
-
-Answers to Quick Quizzes
-
-Quick Quiz #1: Is there any other situation where rcu_barrier() might
- be required?
-
-Answer: Interestingly enough, rcu_barrier() was not originally
- implemented for module unloading. Nikita Danilov was using
- RCU in a filesystem, which resulted in a similar situation at
- filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
- in response, so that Nikita could invoke it during the
- filesystem-unmount process.
-
- Much later, yours truly hit the RCU module-unload problem when
- implementing rcutorture, and found that rcu_barrier() solves
- this problem as well.
-
-Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
- immediately (thus incrementing rcu_barrier_cpu_count to the
- value one), but the other CPU's rcu_barrier_func() invocations
- are delayed for a full grace period? Couldn't this result in
- rcu_barrier() returning prematurely?
-
-Answer: This cannot happen. The reason is that on_each_cpu() has its last
- argument, the wait flag, set to "1". This flag is passed through
- to smp_call_function() and further to smp_call_function_on_cpu(),
- causing this latter to spin until the cross-CPU invocation of
- rcu_barrier_func() has completed. This by itself would prevent
- a grace period from completing on non-CONFIG_PREEMPT kernels,
- since each CPU must undergo a context switch (or other quiescent
- state) before the grace period can complete. However, this is
- of no use in CONFIG_PREEMPT kernels.
-
- Therefore, on_each_cpu() disables preemption across its call
- to smp_call_function() and also across the local call to
- rcu_barrier_func(). This prevents the local CPU from context
- switching, again preventing grace periods from completing. This
- means that all CPUs have executed rcu_barrier_func() before
- the first rcu_barrier_callback() can possibly execute, in turn
- preventing rcu_barrier_cpu_count from prematurely reaching zero.
-
- Currently, -rt implementations of RCU keep but a single global
- queue for RCU callbacks, and thus do not suffer from this
- problem. However, when the -rt RCU eventually does have per-CPU
- callback queues, things will have to change. One simple change
- is to add an rcu_read_lock() before line 8 of rcu_barrier()
- and an rcu_read_unlock() after line 8 of this same function. If
- you can think of a better change, please let me know!
diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
new file mode 100644
index 000000000000..21e40fcc08de
--- /dev/null
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -0,0 +1,215 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Using RCU hlist_nulls to protect list and objects
+=================================================
+
+This section describes how to use hlist_nulls to
+protect read-mostly linked lists and
+objects using SLAB_TYPESAFE_BY_RCU allocations.
+
+Please read the basics in listRCU.rst.
+
+Using 'nulls'
+=============
+
+Using special makers (called 'nulls') is a convenient way
+to solve following problem.
+
+Without 'nulls', a typical RCU linked list managing objects which are
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
+algorithms. Following examples assume 'obj' is a pointer to such
+objects, which is having below type.
+
+::
+
+ struct object {
+ struct hlist_node obj_node;
+ atomic_t refcnt;
+ unsigned int key;
+ };
+
+1) Lookup algorithm
+-------------------
+
+::
+
+ begin:
+ rcu_read_lock();
+ obj = lockless_lookup(key);
+ if (obj) {
+ if (!try_get_ref(obj)) { // might fail for free objects
+ rcu_read_unlock();
+ goto begin;
+ }
+ /*
+ * Because a writer could delete object, and a writer could
+ * reuse these object before the RCU grace period, we
+ * must check key after getting the reference on object
+ */
+ if (obj->key != key) { // not the object we expected
+ put_ref(obj);
+ rcu_read_unlock();
+ goto begin;
+ }
+ }
+ rcu_read_unlock();
+
+Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
+but a version with an additional memory barrier (smp_rmb())
+
+::
+
+ lockless_lookup(key)
+ {
+ struct hlist_node *node, *next;
+ for (pos = rcu_dereference((head)->first);
+ pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
+ ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
+ pos = rcu_dereference(next))
+ if (obj->key == key)
+ return obj;
+ return NULL;
+ }
+
+And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
+
+ struct hlist_node *node;
+ for (pos = rcu_dereference((head)->first);
+ pos && ({ prefetch(pos->next); 1; }) &&
+ ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
+ pos = rcu_dereference(pos->next))
+ if (obj->key == key)
+ return obj;
+ return NULL;
+
+Quoting Corey Minyard::
+
+ "If the object is moved from one list to another list in-between the
+ time the hash is calculated and the next field is accessed, and the
+ object has moved to the end of a new list, the traversal will not
+ complete properly on the list it should have, since the object will
+ be on the end of the new list and there's not a way to tell it's on a
+ new list and restart the list traversal. I think that this can be
+ solved by pre-fetching the "next" field (with proper barriers) before
+ checking the key."
+
+2) Insertion algorithm
+----------------------
+
+We need to make sure a reader cannot read the new 'obj->obj_node.next' value
+and previous value of 'obj->key'. Otherwise, an item could be deleted
+from a chain, and inserted into another chain. If new chain was empty
+before the move, 'next' pointer is NULL, and lockless reader can not
+detect the fact that it missed following items in original chain.
+
+::
+
+ /*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+ obj = kmem_cache_alloc(...);
+ lock_chain(); // typically a spin_lock()
+ obj->key = key;
+ atomic_set_release(&obj->refcnt, 1); // key before refcnt
+ hlist_add_head_rcu(&obj->obj_node, list);
+ unlock_chain(); // typically a spin_unlock()
+
+
+3) Removal algorithm
+--------------------
+
+Nothing special here, we can use a standard RCU hlist deletion.
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
+very very fast (before the end of RCU grace period)
+
+::
+
+ if (put_last_reference_on(obj) {
+ lock_chain(); // typically a spin_lock()
+ hlist_del_init_rcu(&obj->obj_node);
+ unlock_chain(); // typically a spin_unlock()
+ kmem_cache_free(cachep, obj);
+ }
+
+
+
+--------------------------------------------------------------------------
+
+Avoiding extra smp_rmb()
+========================
+
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().
+
+For example, if we choose to store the slot number as the 'nulls'
+end-of-list marker for each slot of the hash table, we can detect
+a race (some writer did a delete and/or a move of an object
+to another chain) checking the final 'nulls' value if
+the lookup met the end of chain. If final 'nulls' value
+is not the slot number, then we must restart the lookup at
+the beginning. If the object was moved to the same chain,
+then the reader doesn't care: It might occasionally
+scan the list again without harm.
+
+Note that using hlist_nulls means the type of 'obj_node' field of
+'struct object' becomes 'struct hlist_nulls_node'.
+
+
+1) lookup algorithm
+-------------------
+
+::
+
+ head = &table[slot];
+ begin:
+ rcu_read_lock();
+ hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
+ if (obj->key == key) {
+ if (!try_get_ref(obj)) { // might fail for free objects
+ rcu_read_unlock();
+ goto begin;
+ }
+ if (obj->key != key) { // not the object we expected
+ put_ref(obj);
+ rcu_read_unlock();
+ goto begin;
+ }
+ goto out;
+ }
+ }
+
+ // If the nulls value we got at the end of this lookup is
+ // not the expected one, we must restart lookup.
+ // We probably met an item that was moved to another chain.
+ if (get_nulls_value(node) != slot) {
+ put_ref(obj);
+ rcu_read_unlock();
+ goto begin;
+ }
+ obj = NULL;
+
+ out:
+ rcu_read_unlock();
+
+2) Insert algorithm
+-------------------
+
+Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
+hlist_add_head_rcu().
+
+::
+
+ /*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+ obj = kmem_cache_alloc(cachep);
+ lock_chain(); // typically a spin_lock()
+ obj->key = key;
+ atomic_set_release(&obj->refcnt, 1); // key before refcnt
+ /*
+ * insert obj in RCU way (readers might be traversing chain)
+ */
+ hlist_nulls_add_head_rcu(&obj->obj_node, list);
+ unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
deleted file mode 100644
index 8151f0195f76..000000000000
--- a/Documentation/RCU/rculist_nulls.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_TYPESAFE_BY_RCU allocations.
-
-Please read the basics in Documentation/RCU/listRCU.txt
-
-Using special makers (called 'nulls') is a convenient way
-to solve following problem :
-
-A typical RCU linked list managing objects which are
-allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
-use following algos :
-
-1) Lookup algo
---------------
-rcu_read_lock()
-begin:
-obj = lockless_lookup(key);
-if (obj) {
- if (!try_get_ref(obj)) // might fail for free objects
- goto begin;
- /*
- * Because a writer could delete object, and a writer could
- * reuse these object before the RCU grace period, we
- * must check key after getting the reference on object
- */
- if (obj->key != key) { // not the object we expected
- put_ref(obj);
- goto begin;
- }
-}
-rcu_read_unlock();
-
-Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
-but a version with an additional memory barrier (smp_rmb())
-
-lockless_lookup(key)
-{
- struct hlist_node *node, *next;
- for (pos = rcu_dereference((head)->first);
- pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
- pos = rcu_dereference(next))
- if (obj->key == key)
- return obj;
- return NULL;
-
-And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb() :
-
- struct hlist_node *node;
- for (pos = rcu_dereference((head)->first);
- pos && ({ prefetch(pos->next); 1; }) &&
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
- pos = rcu_dereference(pos->next))
- if (obj->key == key)
- return obj;
- return NULL;
-}
-
-Quoting Corey Minyard :
-
-"If the object is moved from one list to another list in-between the
- time the hash is calculated and the next field is accessed, and the
- object has moved to the end of a new list, the traversal will not
- complete properly on the list it should have, since the object will
- be on the end of the new list and there's not a way to tell it's on a
- new list and restart the list traversal. I think that this can be
- solved by pre-fetching the "next" field (with proper barriers) before
- checking the key."
-
-2) Insert algo :
-----------------
-
-We need to make sure a reader cannot read the new 'obj->obj_next' value
-and previous value of 'obj->key'. Or else, an item could be deleted
-from a chain, and inserted into another chain. If new chain was empty
-before the move, 'next' pointer is NULL, and lockless reader can
-not detect it missed following items in original chain.
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(...);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * we need to make sure obj->key is updated before obj->next
- * or obj->refcnt
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-hlist_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
-
-
-3) Remove algo
---------------
-Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
-very very fast (before the end of RCU grace period)
-
-if (put_last_reference_on(obj) {
- lock_chain(); // typically a spin_lock()
- hlist_del_init_rcu(&obj->obj_node);
- unlock_chain(); // typically a spin_unlock()
- kmem_cache_free(cachep, obj);
-}
-
-
-
---------------------------------------------------------------------------
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra smp_wmb() in insert function.
-
-For example, if we choose to store the slot number as the 'nulls'
-end-of-list marker for each slot of the hash table, we can detect
-a race (some writer did a delete and/or a move of an object
-to another chain) checking the final 'nulls' value if
-the lookup met the end of chain. If final 'nulls' value
-is not the slot number, then we must restart the lookup at
-the beginning. If the object was moved to the same chain,
-then the reader doesn't care : It might eventually
-scan the list again without harm.
-
-
-1) lookup algo
-
- head = &table[slot];
- rcu_read_lock();
-begin:
- hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
- if (obj->key == key) {
- if (!try_get_ref(obj)) // might fail for free objects
- goto begin;
- if (obj->key != key) { // not the object we expected
- put_ref(obj);
- goto begin;
- }
- goto out;
- }
-/*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
- obj = NULL;
-
-out:
- rcu_read_unlock();
-
-2) Insert function :
---------------------
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(cachep);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * changes to obj->key must be visible before refcnt one
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-/*
- * insert obj in RCU way (readers might be traversing chain)
- */
-hlist_nulls_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/RCU/rcuref.rst b/Documentation/RCU/rcuref.rst
new file mode 100644
index 000000000000..b33aeb14fde3
--- /dev/null
+++ b/Documentation/RCU/rcuref.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================================
+Reference-count design for elements of lists/arrays protected by RCU
+====================================================================
+
+
+Please note that the percpu-ref feature is likely your first
+stop if you need to combine reference counts and RCU. Please see
+include/linux/percpu-refcount.h for more information. However, in
+those unusual cases where percpu-ref would consume too much memory,
+please read on.
+
+------------------------------------------------------------------------
+
+Reference counting on elements of lists which are protected by traditional
+reader/writer spinlocks or semaphores are straightforward:
+
+CODE LISTING A::
+
+ 1. 2.
+ add() search_and_reference()
+ { {
+ alloc_object read_lock(&list_lock);
+ ... search_for_element
+ atomic_set(&el->rc, 1); atomic_inc(&el->rc);
+ write_lock(&list_lock); ...
+ add_element read_unlock(&list_lock);
+ ... ...
+ write_unlock(&list_lock); }
+ }
+
+ 3. 4.
+ release_referenced() delete()
+ { {
+ ... write_lock(&list_lock);
+ if(atomic_dec_and_test(&el->rc)) ...
+ kfree(el);
+ ... remove_element
+ } write_unlock(&list_lock);
+ ...
+ if (atomic_dec_and_test(&el->rc))
+ kfree(el);
+ ...
+ }
+
+If this list/array is made lock free using RCU as in changing the
+write_lock() in add() and delete() to spin_lock() and changing read_lock()
+in search_and_reference() to rcu_read_lock(), the atomic_inc() in
+search_and_reference() could potentially hold reference to an element which
+has already been deleted from the list/array. Use atomic_inc_not_zero()
+in this scenario as follows:
+
+CODE LISTING B::
+
+ 1. 2.
+ add() search_and_reference()
+ { {
+ alloc_object rcu_read_lock();
+ ... search_for_element
+ atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) {
+ spin_lock(&list_lock); rcu_read_unlock();
+ return FAIL;
+ add_element }
+ ... ...
+ spin_unlock(&list_lock); rcu_read_unlock();
+ } }
+ 3. 4.
+ release_referenced() delete()
+ { {
+ ... spin_lock(&list_lock);
+ if (atomic_dec_and_test(&el->rc)) ...
+ call_rcu(&el->head, el_free); remove_element
+ ... spin_unlock(&list_lock);
+ } ...
+ if (atomic_dec_and_test(&el->rc))
+ call_rcu(&el->head, el_free);
+ ...
+ }
+
+Sometimes, a reference to the element needs to be obtained in the
+update (write) stream. In such cases, atomic_inc_not_zero() might be
+overkill, since we hold the update-side spinlock. One might instead
+use atomic_inc() in such cases.
+
+It is not always convenient to deal with "FAIL" in the
+search_and_reference() code path. In such cases, the
+atomic_dec_and_test() may be moved from delete() to el_free()
+as follows:
+
+CODE LISTING C::
+
+ 1. 2.
+ add() search_and_reference()
+ { {
+ alloc_object rcu_read_lock();
+ ... search_for_element
+ atomic_set(&el->rc, 1); atomic_inc(&el->rc);
+ spin_lock(&list_lock); ...
+
+ add_element rcu_read_unlock();
+ ... }
+ spin_unlock(&list_lock); 4.
+ } delete()
+ 3. {
+ release_referenced() spin_lock(&list_lock);
+ { ...
+ ... remove_element
+ if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock);
+ kfree(el); ...
+ ... call_rcu(&el->head, el_free);
+ } ...
+ 5. }
+ void el_free(struct rcu_head *rhp)
+ {
+ release_referenced();
+ }
+
+The key point is that the initial reference added by add() is not removed
+until after a grace period has elapsed following removal. This means that
+search_and_reference() cannot find this element, which means that the value
+of el->rc cannot increase. Thus, once it reaches zero, there are no
+readers that can or ever will be able to reference the element. The
+element can therefore safely be freed. This in turn guarantees that if
+any reader finds the element, that reader may safely acquire a reference
+without checking the value of the reference counter.
+
+A clear advantage of the RCU-based pattern in listing C over the one
+in listing B is that any call to search_and_reference() that locates
+a given object will succeed in obtaining a reference to that object,
+even given a concurrent invocation of delete() for that same object.
+Similarly, a clear advantage of both listings B and C over listing A is
+that a call to delete() is not delayed even if there are an arbitrarily
+large number of calls to search_and_reference() searching for the same
+object that delete() was invoked on. Instead, all that is delayed is
+the eventual invocation of kfree(), which is usually not a problem on
+modern computer systems, even the small ones.
+
+In cases where delete() can sleep, synchronize_rcu() can be called from
+delete(), so that el_free() can be subsumed into delete as follows::
+
+ 4.
+ delete()
+ {
+ spin_lock(&list_lock);
+ ...
+ remove_element
+ spin_unlock(&list_lock);
+ ...
+ synchronize_rcu();
+ if (atomic_dec_and_test(&el->rc))
+ kfree(el);
+ ...
+ }
+
+As additional examples in the kernel, the pattern in listing C is used by
+reference counting of struct pid, while the pattern in listing B is used by
+struct posix_acl.
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
deleted file mode 100644
index 613033ff2b9b..000000000000
--- a/Documentation/RCU/rcuref.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-Reference-count design for elements of lists/arrays protected by RCU.
-
-
-Please note that the percpu-ref feature is likely your first
-stop if you need to combine reference counts and RCU. Please see
-include/linux/percpu-refcount.h for more information. However, in
-those unusual cases where percpu-ref would consume too much memory,
-please read on.
-
-------------------------------------------------------------------------
-
-Reference counting on elements of lists which are protected by traditional
-reader/writer spinlocks or semaphores are straightforward:
-
-1. 2.
-add() search_and_reference()
-{ {
- alloc_object read_lock(&list_lock);
- ... search_for_element
- atomic_set(&el->rc, 1); atomic_inc(&el->rc);
- write_lock(&list_lock); ...
- add_element read_unlock(&list_lock);
- ... ...
- write_unlock(&list_lock); }
-}
-
-3. 4.
-release_referenced() delete()
-{ {
- ... write_lock(&list_lock);
- atomic_dec(&el->rc, relfunc) ...
- ... remove_element
-} write_unlock(&list_lock);
- ...
- if (atomic_dec_and_test(&el->rc))
- kfree(el);
- ...
- }
-
-If this list/array is made lock free using RCU as in changing the
-write_lock() in add() and delete() to spin_lock() and changing read_lock()
-in search_and_reference() to rcu_read_lock(), the atomic_inc() in
-search_and_reference() could potentially hold reference to an element which
-has already been deleted from the list/array. Use atomic_inc_not_zero()
-in this scenario as follows:
-
-1. 2.
-add() search_and_reference()
-{ {
- alloc_object rcu_read_lock();
- ... search_for_element
- atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) {
- spin_lock(&list_lock); rcu_read_unlock();
- return FAIL;
- add_element }
- ... ...
- spin_unlock(&list_lock); rcu_read_unlock();
-} }
-3. 4.
-release_referenced() delete()
-{ {
- ... spin_lock(&list_lock);
- if (atomic_dec_and_test(&el->rc)) ...
- call_rcu(&el->head, el_free); remove_element
- ... spin_unlock(&list_lock);
-} ...
- if (atomic_dec_and_test(&el->rc))
- call_rcu(&el->head, el_free);
- ...
- }
-
-Sometimes, a reference to the element needs to be obtained in the
-update (write) stream. In such cases, atomic_inc_not_zero() might be
-overkill, since we hold the update-side spinlock. One might instead
-use atomic_inc() in such cases.
-
-It is not always convenient to deal with "FAIL" in the
-search_and_reference() code path. In such cases, the
-atomic_dec_and_test() may be moved from delete() to el_free()
-as follows:
-
-1. 2.
-add() search_and_reference()
-{ {
- alloc_object rcu_read_lock();
- ... search_for_element
- atomic_set(&el->rc, 1); atomic_inc(&el->rc);
- spin_lock(&list_lock); ...
-
- add_element rcu_read_unlock();
- ... }
- spin_unlock(&list_lock); 4.
-} delete()
-3. {
-release_referenced() spin_lock(&list_lock);
-{ ...
- ... remove_element
- if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock);
- kfree(el); ...
- ... call_rcu(&el->head, el_free);
-} ...
-5. }
-void el_free(struct rcu_head *rhp)
-{
- release_referenced();
-}
-
-The key point is that the initial reference added by add() is not removed
-until after a grace period has elapsed following removal. This means that
-search_and_reference() cannot find this element, which means that the value
-of el->rc cannot increase. Thus, once it reaches zero, there are no
-readers that can or ever will be able to reference the element. The
-element can therefore safely be freed. This in turn guarantees that if
-any reader finds the element, that reader may safely acquire a reference
-without checking the value of the reference counter.
-
-In cases where delete() can sleep, synchronize_rcu() can be called from
-delete(), so that el_free() can be subsumed into delete as follows:
-
-4.
-delete()
-{
- spin_lock(&list_lock);
- ...
- remove_element
- spin_unlock(&list_lock);
- ...
- synchronize_rcu();
- if (atomic_dec_and_test(&el->rc))
- kfree(el);
- ...
-}
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
new file mode 100644
index 000000000000..d7c8eff63317
--- /dev/null
+++ b/Documentation/RCU/stallwarn.rst
@@ -0,0 +1,491 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Using RCU's CPU Stall Detector
+==============================
+
+This document first discusses what sorts of issues RCU's CPU stall
+detector can locate, and then discusses kernel parameters and Kconfig
+options that can be used to fine-tune the detector's operation. Finally,
+this document explains the stall detector's "splat" format.
+
+
+What Causes RCU CPU Stall Warnings?
+===================================
+
+So your kernel printed an RCU CPU stall warning. The next question is
+"What caused it?" The following problems can result in RCU CPU stall
+warnings:
+
+- A CPU looping in an RCU read-side critical section.
+
+- A CPU looping with interrupts disabled.
+
+- A CPU looping with preemption disabled.
+
+- A CPU looping with bottom halves disabled.
+
+- For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the
+ kernel without potentially invoking schedule(). If the looping
+ in the kernel is really expected and desirable behavior, you
+ might need to add some calls to cond_resched().
+
+- Booting Linux using a console connection that is too slow to
+ keep up with the boot-time console-message rate. For example,
+ a 115Kbaud serial console can be *way* too slow to keep up
+ with boot-time message rates, and will frequently result in
+ RCU CPU stall warning messages. Especially if you have added
+ debug printk()s.
+
+- Anything that prevents RCU's grace-period kthreads from running.
+ This can result in the "All QSes seen" console-log message.
+ This message will include information on when the kthread last
+ ran and how often it should be expected to run. It can also
+ result in the ``rcu_.*kthread starved for`` console-log message,
+ which will include additional debugging information.
+
+- A CPU-bound real-time task in a CONFIG_PREEMPTION kernel, which might
+ happen to preempt a low-priority task in the middle of an RCU
+ read-side critical section. This is especially damaging if
+ that low-priority task is not permitted to run on any other CPU,
+ in which case the next RCU grace period can never complete, which
+ will eventually cause the system to run out of memory and hang.
+ While the system is in the process of running itself out of
+ memory, you might see stall-warning messages.
+
+- A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+ is running at a higher priority than the RCU softirq threads.
+ This will prevent RCU callbacks from ever being invoked,
+ and in a CONFIG_PREEMPT_RCU kernel will further prevent
+ RCU grace periods from ever completing. Either way, the
+ system will eventually run out of memory and hang. In the
+ CONFIG_PREEMPT_RCU case, you might see stall-warning
+ messages.
+
+ You can use the rcutree.kthread_prio kernel boot parameter to
+ increase the scheduling priority of RCU's kthreads, which can
+ help avoid this problem. However, please note that doing this
+ can increase your system's context-switch rate and thus degrade
+ performance.
+
+- A periodic interrupt whose handler takes longer than the time
+ interval between successive pairs of interrupts. This can
+ prevent RCU's kthreads and softirq handlers from running.
+ Note that certain high-overhead debugging options, for example
+ the function_graph tracer, can result in interrupt handler taking
+ considerably longer than normal, which can in turn result in
+ RCU CPU stall warnings.
+
+- Testing a workload on a fast system, tuning the stall-warning
+ timeout down to just barely avoid RCU CPU stall warnings, and then
+ running the same workload with the same stall-warning timeout on a
+ slow system. Note that thermal throttling and on-demand governors
+ can cause a single system to be sometimes fast and sometimes slow!
+
+- A hardware or software issue shuts off the scheduler-clock
+ interrupt on a CPU that is not in dyntick-idle mode. This
+ problem really has happened, and seems to be most likely to
+ result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
+
+- A hardware or software issue that prevents time-based wakeups
+ from occurring. These issues can range from misconfigured or
+ buggy timer hardware through bugs in the interrupt or exception
+ path (whether hardware, firmware, or software) through bugs
+ in Linux's timer subsystem through bugs in the scheduler, and,
+ yes, even including bugs in RCU itself. It can also result in
+ the ``rcu_.*timer wakeup didn't happen for`` console-log message,
+ which will include additional debugging information.
+
+- A timer issue causes time to appear to jump forward, so that RCU
+ believes that the RCU CPU stall-warning timeout has been exceeded
+ when in fact much less time has passed. This could be due to
+ timer hardware bugs, timer driver bugs, or even corruption of
+ the "jiffies" global variable. These sorts of timer hardware
+ and driver bugs are not uncommon when testing new hardware.
+
+- A low-level kernel issue that either fails to invoke one of the
+ variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(),
+ ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
+ hand, or that invokes one of them too many times on the other.
+ Historically, the most frequent issue has been an omission
+ of either irq_enter() or irq_exit(), which in turn invoke
+ ct_irq_enter() or ct_irq_exit(), respectively. Building your
+ kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
+ of issues, which sometimes arise in architecture-specific code.
+
+- A bug in the RCU implementation.
+
+- A hardware failure. This is quite unlikely, but is not at all
+ uncommon in large datacenter. In one memorable case some decades
+ back, a CPU failed in a running system, becoming unresponsive,
+ but not causing an immediate crash. This resulted in a series
+ of RCU CPU stall warnings, eventually leading to the realization
+ that the CPU had failed.
+
+The RCU, RCU-sched, RCU-tasks, and RCU-tasks-trace implementations have
+CPU stall warning. Note that SRCU does *not* have CPU stall warnings.
+Please note that RCU only detects CPU stalls when there is a grace period
+in progress. No grace period, no CPU stall warnings.
+
+To diagnose the cause of the stall, inspect the stack traces.
+The offending function will usually be near the top of the stack.
+If you have a series of stall warnings from a single extended stall,
+comparing the stack traces can often help determine where the stall
+is occurring, which will usually be in the function nearest the top of
+that portion of the stack which remains the same from trace to trace.
+If you can reliably trigger the stall, ftrace can be quite helpful.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing. For information on RCU's event tracing,
+see include/trace/events/rcu.h.
+
+
+Fine-Tuning the RCU CPU Stall Detector
+======================================
+
+The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
+CPU stall detector, which detects conditions that unduly delay RCU grace
+periods. This module parameter enables CPU stall detection by default,
+but may be overridden via boot-time parameter or at runtime via sysfs.
+The stall detector's idea of what constitutes "unduly delayed" is
+controlled by a set of kernel configuration variables and cpp macros:
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+----------------------------
+
+ This kernel configuration parameter defines the period of time
+ that RCU will wait from the beginning of a grace period until it
+ issues an RCU CPU stall warning. This time period is normally
+ 21 seconds.
+
+ This configuration parameter may be changed at runtime via the
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however
+ this parameter is checked only at the beginning of a cycle.
+ So if you are 10 seconds into a 40-second stall, setting this
+ sysfs parameter to (say) five will shorten the timeout for the
+ *next* stall, or the following warning for the current stall
+ (assuming the stall lasts long enough). It will not affect the
+ timing of the next warning for the current stall.
+
+ Stall-warning messages may be enabled and disabled completely via
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+ Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+ the expedited grace period. This parameter defines the period
+ of time that RCU will wait from the beginning of an expedited
+ grace period until it issues an RCU CPU stall warning. This time
+ period is normally 20 milliseconds on Android devices. A zero
+ value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+ after conversion to milliseconds.
+
+ This configuration parameter may be changed at runtime via the
+ /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+ this parameter is checked only at the beginning of a cycle. If you
+ are in a current stall cycle, setting it to a new value will change
+ the timeout for the -next- stall.
+
+ Stall-warning messages may be enabled and disabled completely via
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
+RCU_STALL_DELAY_DELTA
+---------------------
+
+ Although the lockdep facility is extremely useful, it does add
+ some overhead. Therefore, under CONFIG_PROVE_RCU, the
+ RCU_STALL_DELAY_DELTA macro allows five extra seconds before
+ giving an RCU CPU stall warning message. (This is a cpp
+ macro, not a kernel configuration parameter.)
+
+RCU_STALL_RAT_DELAY
+-------------------
+
+ The CPU stall detector tries to make the offending CPU print its
+ own warnings, as this often gives better-quality stack traces.
+ However, if the offending CPU does not detect its own stall in
+ the number of jiffies specified by RCU_STALL_RAT_DELAY, then
+ some other CPU will complain. This delay is normally set to
+ two jiffies. (This is a cpp macro, not a kernel configuration
+ parameter.)
+
+rcupdate.rcu_task_stall_timeout
+-------------------------------
+
+ This boot/sysfs parameter controls the RCU-tasks and
+ RCU-tasks-trace stall warning intervals. A value of zero or less
+ suppresses RCU-tasks stall warnings. A positive value sets the
+ stall-warning interval in seconds. An RCU-tasks stall warning
+ starts with the line:
+
+ INFO: rcu_tasks detected stalls on tasks:
+
+ And continues with the output of sched_show_task() for each
+ task stalling the current RCU-tasks grace period.
+
+ An RCU-tasks-trace stall warning starts (and continues) similarly:
+
+ INFO: rcu_tasks_trace detected stalls on tasks
+
+
+Interpreting RCU's CPU Stall-Detector "Splats"
+==============================================
+
+For non-RCU-tasks flavors of RCU, when a CPU detects that some other
+CPU is stalling, it will print a message similar to the following::
+
+ INFO: rcu_sched detected stalls on CPUs/tasks:
+ 2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
+ 16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0
+ (detected by 32, t=2603 jiffies, g=7075, q=625)
+
+This message indicates that CPU 32 detected that CPUs 2 and 16 were both
+causing stalls, and that the stall was affecting RCU-sched. This message
+will normally be followed by stack dumps for each CPU. Please note that
+PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that
+the tasks will be indicated by PID, for example, "P3421". It is even
+possible for an rcu_state stall to be caused by both CPUs *and* tasks,
+in which case the offending CPUs and tasks will all be called out in the list.
+In some cases, CPUs will detect themselves stalling, which will result
+in a self-detected stall.
+
+CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with
+the RCU core for the past three grace periods. In contrast, CPU 16's "(0
+ticks this GP)" indicates that this CPU has not taken any scheduling-clock
+interrupts during the current stalled grace period.
+
+The "idle=" portion of the message prints the dyntick-idle state.
+The hex number before the first "/" is the low-order 16 bits of the
+dynticks counter, which will have an even-numbered value if the CPU
+is in dyntick-idle mode and an odd-numbered value otherwise. The hex
+number between the two "/"s is the value of the nesting, which will be
+a small non-negative number if in the idle loop (as shown above) and a
+very large positive number otherwise. The number following the final
+"/" is the NMI nesting, which will be a small non-negative number.
+
+The "softirq=" portion of the message tracks the number of RCU softirq
+handlers that the stalled CPU has executed. The number before the "/"
+is the number that had executed since boot at the time that this CPU
+last noted the beginning of a grace period, which might be the current
+(stalled) grace period, or it might be some earlier grace period (for
+example, if the CPU might have been in dyntick-idle mode for an extended
+time period). The number after the "/" is the number that have executed
+since boot until the current time. If this latter number stays constant
+across repeated stall-warning messages, it is possible that RCU's softirq
+handlers are no longer able to execute on this CPU. This can happen if
+the stalled CPU is spinning with interrupts are disabled, or, in -rt
+kernels, if a high-priority process is starving RCU's softirq handler.
+
+The "fqs=" shows the number of force-quiescent-state idle/offline
+detection passes that the grace-period kthread has made across this
+CPU since the last time that this CPU noted the beginning of a grace
+period.
+
+The "detected by" line indicates which CPU detected the stall (in this
+case, CPU 32), how many jiffies have elapsed since the start of the grace
+period (in this case 2603), the grace-period sequence number (7075), and
+an estimate of the total number of RCU callbacks queued across all CPUs
+(625 in this case).
+
+If the grace period ends just as the stall warning starts printing,
+there will be a spurious stall-warning message, which will include
+the following::
+
+ INFO: Stall ended before state dump start
+
+This is rare, but does happen from time to time in real life. It is also
+possible for a zero-jiffy stall to be flagged in this case, depending
+on how the stall warning and the grace-period initialization happen to
+interact. Please note that it is not possible to entirely eliminate this
+sort of false positive without resorting to things like stop_machine(),
+which is overkill for this sort of problem.
+
+If all CPUs and tasks have passed through quiescent states, but the
+grace period has nevertheless failed to end, the stall-warning splat
+will include something like the following::
+
+ All QSes seen, last rcu_preempt kthread activity 23807 (4297905177-4297881370), jiffies_till_next_fqs=3, root ->qsmask 0x0
+
+The "23807" indicates that it has been more than 23 thousand jiffies
+since the grace-period kthread ran. The "jiffies_till_next_fqs"
+indicates how frequently that kthread should run, giving the number
+of jiffies between force-quiescent-state scans, in this case three,
+which is way less than 23807. Finally, the root rcu_node structure's
+->qsmask field is printed, which will normally be zero.
+
+If the relevant grace-period kthread has been unable to run prior to
+the stall warning, as was the case in the "All QSes seen" line above,
+the following additional line is printed::
+
+ rcu_sched kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
+ Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior.
+
+Starving the grace-period kthreads of CPU time can of course result
+in RCU CPU stall warnings even when all CPUs and tasks have passed
+through the required quiescent states. The "g" number shows the current
+grace-period sequence number, the "f" precedes the ->gp_flags command
+to the grace-period kthread, the "RCU_GP_WAIT_FQS" indicates that the
+kthread is waiting for a short timeout, the "state" precedes value of the
+task_struct ->state field, and the "cpu" indicates that the grace-period
+kthread last ran on CPU 5.
+
+If the relevant grace-period kthread does not wake from FQS wait in a
+reasonable time, then the following additional line is printed::
+
+ kthread timer wakeup didn't happen for 23804 jiffies! g7076 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
+
+The "23804" indicates that kthread's timer expired more than 23 thousand
+jiffies ago. The rest of the line has meaning similar to the kthread
+starvation case.
+
+Additionally, the following line is printed::
+
+ Possible timer handling issue on cpu=4 timer-softirq=11142
+
+Here "cpu" indicates that the grace-period kthread last ran on CPU 4,
+where it queued the fqs timer. The number following the "timer-softirq"
+is the current ``TIMER_SOFTIRQ`` count on cpu 4. If this value does not
+change on successive RCU CPU stall warnings, there is further reason to
+suspect a timer problem.
+
+These messages are usually followed by stack dumps of the CPUs and tasks
+involved in the stall. These stack traces can help you locate the cause
+of the stall, keeping in mind that the CPU detecting the stall will have
+an interrupt frame that is mainly devoted to detecting the stall.
+
+
+Multiple Warnings From One Stall
+================================
+
+If a stall lasts long enough, multiple stall-warning messages will
+be printed for it. The second and subsequent messages are printed at
+longer intervals, so that the time between (say) the first and second
+message will be about three times the interval between the beginning
+of the stall and the first message. It can be helpful to compare the
+stack dumps for the different messages for the same stalled grace period.
+
+
+Stall Warnings for Expedited Grace Periods
+==========================================
+
+If an expedited grace period detects a stall, it will place a message
+like the following in dmesg::
+
+ INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 7-... } 21119 jiffies s: 73 root: 0x2/.
+
+This indicates that CPU 7 has failed to respond to a reschedule IPI.
+The three periods (".") following the CPU number indicate that the CPU
+is online (otherwise the first period would instead have been "O"),
+that the CPU was online at the beginning of the expedited grace period
+(otherwise the second period would have instead been "o"), and that
+the CPU has been online at least once since boot (otherwise, the third
+period would instead have been "N"). The number before the "jiffies"
+indicates that the expedited grace period has been going on for 21,119
+jiffies. The number following the "s:" indicates that the expedited
+grace-period sequence counter is 73. The fact that this last value is
+odd indicates that an expedited grace period is in flight. The number
+following "root:" is a bitmask that indicates which children of the root
+rcu_node structure correspond to CPUs and/or tasks that are blocking the
+current expedited grace period. If the tree had more than one level,
+additional hex numbers would be printed for the states of the other
+rcu_node structures in the tree.
+
+As with normal grace periods, PREEMPT_RCU builds can be stalled by
+tasks as well as by CPUs, and that the tasks will be indicated by PID,
+for example, "P3421".
+
+It is entirely possible to see stall warnings from normal and from
+expedited grace periods at about the same time during the same run.
+
+RCU_CPU_STALL_CPUTIME
+=====================
+
+In kernels built with CONFIG_RCU_CPU_STALL_CPUTIME=y or booted with
+rcupdate.rcu_cpu_stall_cputime=1, the following additional information
+is supplied with each RCU CPU stall warning::
+
+ rcu: hardirqs softirqs csw/system
+ rcu: number: 624 45 0
+ rcu: cputime: 69 1 2425 ==> 2500(ms)
+
+These statistics are collected during the sampling period. The values
+in row "number:" are the number of hard interrupts, number of soft
+interrupts, and number of context switches on the stalled CPU. The
+first three values in row "cputime:" indicate the CPU time in
+milliseconds consumed by hard interrupts, soft interrupts, and tasks
+on the stalled CPU. The last number is the measurement interval, again
+in milliseconds. Because user-mode tasks normally do not cause RCU CPU
+stalls, these tasks are typically kernel tasks, which is why only the
+system CPU time are considered.
+
+The sampling period is shown as follows::
+
+ |<------------first timeout---------->|<-----second timeout----->|
+ |<--half timeout-->|<--half timeout-->| |
+ | |<--first period-->| |
+ | |<-----------second sampling period---------->|
+ | | | |
+ snapshot time point 1st-stall 2nd-stall
+
+The following describes four typical scenarios:
+
+1. A CPU looping with interrupts disabled.
+
+ ::
+
+ rcu: hardirqs softirqs csw/system
+ rcu: number: 0 0 0
+ rcu: cputime: 0 0 0 ==> 2500(ms)
+
+ Because interrupts have been disabled throughout the measurement
+ interval, there are no interrupts and no context switches.
+ Furthermore, because CPU time consumption was measured using interrupt
+ handlers, the system CPU consumption is misleadingly measured as zero.
+ This scenario will normally also have "(0 ticks this GP)" printed on
+ this CPU's summary line.
+
+2. A CPU looping with bottom halves disabled.
+
+ This is similar to the previous example, but with non-zero number of
+ and CPU time consumed by hard interrupts, along with non-zero CPU
+ time consumed by in-kernel execution::
+
+ rcu: hardirqs softirqs csw/system
+ rcu: number: 624 0 0
+ rcu: cputime: 49 0 2446 ==> 2500(ms)
+
+ The fact that there are zero softirqs gives a hint that these were
+ disabled, perhaps via local_bh_disable(). It is of course possible
+ that there were no softirqs, perhaps because all events that would
+ result in softirq execution are confined to other CPUs. In this case,
+ the diagnosis should continue as shown in the next example.
+
+3. A CPU looping with preemption disabled.
+
+ Here, only the number of context switches is zero::
+
+ rcu: hardirqs softirqs csw/system
+ rcu: number: 624 45 0
+ rcu: cputime: 69 1 2425 ==> 2500(ms)
+
+ This situation hints that the stalled CPU was looping with preemption
+ disabled.
+
+4. No looping, but massive hard and soft interrupts.
+
+ ::
+
+ rcu: hardirqs softirqs csw/system
+ rcu: number: xx xx 0
+ rcu: cputime: xx xx 0 ==> 2500(ms)
+
+ Here, the number and CPU time of hard interrupts are all non-zero,
+ but the number of context switches and the in-kernel CPU time consumed
+ are zero. The number and cputime of soft interrupts will usually be
+ non-zero, but could be zero, for example, if the CPU was spinning
+ within a single hard interrupt handler.
+
+ If this type of RCU CPU stall warning can be reproduced, you can
+ narrow it down by looking at /proc/interrupts or by writing code to
+ trace each interrupt, for example, by referring to show_interrupts().
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
deleted file mode 100644
index 491043fd976f..000000000000
--- a/Documentation/RCU/stallwarn.txt
+++ /dev/null
@@ -1,315 +0,0 @@
-Using RCU's CPU Stall Detector
-
-This document first discusses what sorts of issues RCU's CPU stall
-detector can locate, and then discusses kernel parameters and Kconfig
-options that can be used to fine-tune the detector's operation. Finally,
-this document explains the stall detector's "splat" format.
-
-
-What Causes RCU CPU Stall Warnings?
-
-So your kernel printed an RCU CPU stall warning. The next question is
-"What caused it?" The following problems can result in RCU CPU stall
-warnings:
-
-o A CPU looping in an RCU read-side critical section.
-
-o A CPU looping with interrupts disabled.
-
-o A CPU looping with preemption disabled.
-
-o A CPU looping with bottom halves disabled.
-
-o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
- without invoking schedule(). If the looping in the kernel is
- really expected and desirable behavior, you might need to add
- some calls to cond_resched().
-
-o Booting Linux using a console connection that is too slow to
- keep up with the boot-time console-message rate. For example,
- a 115Kbaud serial console can be -way- too slow to keep up
- with boot-time message rates, and will frequently result in
- RCU CPU stall warning messages. Especially if you have added
- debug printk()s.
-
-o Anything that prevents RCU's grace-period kthreads from running.
- This can result in the "All QSes seen" console-log message.
- This message will include information on when the kthread last
- ran and how often it should be expected to run. It can also
- result in the "rcu_.*kthread starved for" console-log message,
- which will include additional debugging information.
-
-o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
- happen to preempt a low-priority task in the middle of an RCU
- read-side critical section. This is especially damaging if
- that low-priority task is not permitted to run on any other CPU,
- in which case the next RCU grace period can never complete, which
- will eventually cause the system to run out of memory and hang.
- While the system is in the process of running itself out of
- memory, you might see stall-warning messages.
-
-o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
- is running at a higher priority than the RCU softirq threads.
- This will prevent RCU callbacks from ever being invoked,
- and in a CONFIG_PREEMPT_RCU kernel will further prevent
- RCU grace periods from ever completing. Either way, the
- system will eventually run out of memory and hang. In the
- CONFIG_PREEMPT_RCU case, you might see stall-warning
- messages.
-
-o A periodic interrupt whose handler takes longer than the time
- interval between successive pairs of interrupts. This can
- prevent RCU's kthreads and softirq handlers from running.
- Note that certain high-overhead debugging options, for example
- the function_graph tracer, can result in interrupt handler taking
- considerably longer than normal, which can in turn result in
- RCU CPU stall warnings.
-
-o Testing a workload on a fast system, tuning the stall-warning
- timeout down to just barely avoid RCU CPU stall warnings, and then
- running the same workload with the same stall-warning timeout on a
- slow system. Note that thermal throttling and on-demand governors
- can cause a single system to be sometimes fast and sometimes slow!
-
-o A hardware or software issue shuts off the scheduler-clock
- interrupt on a CPU that is not in dyntick-idle mode. This
- problem really has happened, and seems to be most likely to
- result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
-
-o A bug in the RCU implementation.
-
-o A hardware failure. This is quite unlikely, but has occurred
- at least once in real life. A CPU failed in a running system,
- becoming unresponsive, but not causing an immediate crash.
- This resulted in a series of RCU CPU stall warnings, eventually
- leading the realization that the CPU had failed.
-
-The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
-Note that SRCU does -not- have CPU stall warnings. Please note that
-RCU only detects CPU stalls when there is a grace period in progress.
-No grace period, no CPU stall warnings.
-
-To diagnose the cause of the stall, inspect the stack traces.
-The offending function will usually be near the top of the stack.
-If you have a series of stall warnings from a single extended stall,
-comparing the stack traces can often help determine where the stall
-is occurring, which will usually be in the function nearest the top of
-that portion of the stack which remains the same from trace to trace.
-If you can reliably trigger the stall, ftrace can be quite helpful.
-
-RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
-and with RCU's event tracing. For information on RCU's event tracing,
-see include/trace/events/rcu.h.
-
-
-Fine-Tuning the RCU CPU Stall Detector
-
-The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
-CPU stall detector, which detects conditions that unduly delay RCU grace
-periods. This module parameter enables CPU stall detection by default,
-but may be overridden via boot-time parameter or at runtime via sysfs.
-The stall detector's idea of what constitutes "unduly delayed" is
-controlled by a set of kernel configuration variables and cpp macros:
-
-CONFIG_RCU_CPU_STALL_TIMEOUT
-
- This kernel configuration parameter defines the period of time
- that RCU will wait from the beginning of a grace period until it
- issues an RCU CPU stall warning. This time period is normally
- 21 seconds.
-
- This configuration parameter may be changed at runtime via the
- /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however
- this parameter is checked only at the beginning of a cycle.
- So if you are 10 seconds into a 40-second stall, setting this
- sysfs parameter to (say) five will shorten the timeout for the
- -next- stall, or the following warning for the current stall
- (assuming the stall lasts long enough). It will not affect the
- timing of the next warning for the current stall.
-
- Stall-warning messages may be enabled and disabled completely via
- /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
-
-RCU_STALL_DELAY_DELTA
-
- Although the lockdep facility is extremely useful, it does add
- some overhead. Therefore, under CONFIG_PROVE_RCU, the
- RCU_STALL_DELAY_DELTA macro allows five extra seconds before
- giving an RCU CPU stall warning message. (This is a cpp
- macro, not a kernel configuration parameter.)
-
-RCU_STALL_RAT_DELAY
-
- The CPU stall detector tries to make the offending CPU print its
- own warnings, as this often gives better-quality stack traces.
- However, if the offending CPU does not detect its own stall in
- the number of jiffies specified by RCU_STALL_RAT_DELAY, then
- some other CPU will complain. This delay is normally set to
- two jiffies. (This is a cpp macro, not a kernel configuration
- parameter.)
-
-rcupdate.rcu_task_stall_timeout
-
- This boot/sysfs parameter controls the RCU-tasks stall warning
- interval. A value of zero or less suppresses RCU-tasks stall
- warnings. A positive value sets the stall-warning interval
- in jiffies. An RCU-tasks stall warning starts with the line:
-
- INFO: rcu_tasks detected stalls on tasks:
-
- And continues with the output of sched_show_task() for each
- task stalling the current RCU-tasks grace period.
-
-
-Interpreting RCU's CPU Stall-Detector "Splats"
-
-For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
-it will print a message similar to the following:
-
- INFO: rcu_sched detected stalls on CPUs/tasks:
- 2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
- 16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0
- (detected by 32, t=2603 jiffies, g=7075, q=625)
-
-This message indicates that CPU 32 detected that CPUs 2 and 16 were both
-causing stalls, and that the stall was affecting RCU-sched. This message
-will normally be followed by stack dumps for each CPU. Please note that
-PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that
-the tasks will be indicated by PID, for example, "P3421". It is even
-possible for a rcu_preempt_state stall to be caused by both CPUs -and-
-tasks, in which case the offending CPUs and tasks will all be called
-out in the list.
-
-CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with
-the RCU core for the past three grace periods. In contrast, CPU 16's "(0
-ticks this GP)" indicates that this CPU has not taken any scheduling-clock
-interrupts during the current stalled grace period.
-
-The "idle=" portion of the message prints the dyntick-idle state.
-The hex number before the first "/" is the low-order 12 bits of the
-dynticks counter, which will have an even-numbered value if the CPU
-is in dyntick-idle mode and an odd-numbered value otherwise. The hex
-number between the two "/"s is the value of the nesting, which will be
-a small non-negative number if in the idle loop (as shown above) and a
-very large positive number otherwise.
-
-The "softirq=" portion of the message tracks the number of RCU softirq
-handlers that the stalled CPU has executed. The number before the "/"
-is the number that had executed since boot at the time that this CPU
-last noted the beginning of a grace period, which might be the current
-(stalled) grace period, or it might be some earlier grace period (for
-example, if the CPU might have been in dyntick-idle mode for an extended
-time period. The number after the "/" is the number that have executed
-since boot until the current time. If this latter number stays constant
-across repeated stall-warning messages, it is possible that RCU's softirq
-handlers are no longer able to execute on this CPU. This can happen if
-the stalled CPU is spinning with interrupts are disabled, or, in -rt
-kernels, if a high-priority process is starving RCU's softirq handler.
-
-The "fps=" shows the number of force-quiescent-state idle/offline
-detection passes that the grace-period kthread has made across this
-CPU since the last time that this CPU noted the beginning of a grace
-period.
-
-The "detected by" line indicates which CPU detected the stall (in this
-case, CPU 32), how many jiffies have elapsed since the start of the grace
-period (in this case 2603), the grace-period sequence number (7075), and
-an estimate of the total number of RCU callbacks queued across all CPUs
-(625 in this case).
-
-In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
-for each CPU:
-
- 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D
-
-The "last_accelerate:" prints the low-order 16 bits (in hex) of the
-jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
-from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
-rcu_prepare_for_idle(). The "nonlazy_posted:" prints the number
-of non-lazy callbacks posted since the last call to rcu_needs_cpu().
-Finally, an "L" indicates that there are currently no non-lazy callbacks
-("." is printed otherwise, as shown above) and "D" indicates that
-dyntick-idle processing is enabled ("." is printed otherwise, for example,
-if disabled via the "nohz=" kernel boot parameter).
-
-If the grace period ends just as the stall warning starts printing,
-there will be a spurious stall-warning message, which will include
-the following:
-
- INFO: Stall ended before state dump start
-
-This is rare, but does happen from time to time in real life. It is also
-possible for a zero-jiffy stall to be flagged in this case, depending
-on how the stall warning and the grace-period initialization happen to
-interact. Please note that it is not possible to entirely eliminate this
-sort of false positive without resorting to things like stop_machine(),
-which is overkill for this sort of problem.
-
-If all CPUs and tasks have passed through quiescent states, but the
-grace period has nevertheless failed to end, the stall-warning splat
-will include something like the following:
-
- All QSes seen, last rcu_preempt kthread activity 23807 (4297905177-4297881370), jiffies_till_next_fqs=3, root ->qsmask 0x0
-
-The "23807" indicates that it has been more than 23 thousand jiffies
-since the grace-period kthread ran. The "jiffies_till_next_fqs"
-indicates how frequently that kthread should run, giving the number
-of jiffies between force-quiescent-state scans, in this case three,
-which is way less than 23807. Finally, the root rcu_node structure's
-->qsmask field is printed, which will normally be zero.
-
-If the relevant grace-period kthread has been unable to run prior to
-the stall warning, as was the case in the "All QSes seen" line above,
-the following additional line is printed:
-
- kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
-
-Starving the grace-period kthreads of CPU time can of course result
-in RCU CPU stall warnings even when all CPUs and tasks have passed
-through the required quiescent states. The "g" number shows the current
-grace-period sequence number, the "f" precedes the ->gp_flags command
-to the grace-period kthread, the "RCU_GP_WAIT_FQS" indicates that the
-kthread is waiting for a short timeout, the "state" precedes value of the
-task_struct ->state field, and the "cpu" indicates that the grace-period
-kthread last ran on CPU 5.
-
-
-Multiple Warnings From One Stall
-
-If a stall lasts long enough, multiple stall-warning messages will be
-printed for it. The second and subsequent messages are printed at
-longer intervals, so that the time between (say) the first and second
-message will be about three times the interval between the beginning
-of the stall and the first message.
-
-
-Stall Warnings for Expedited Grace Periods
-
-If an expedited grace period detects a stall, it will place a message
-like the following in dmesg:
-
- INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 7-... } 21119 jiffies s: 73 root: 0x2/.
-
-This indicates that CPU 7 has failed to respond to a reschedule IPI.
-The three periods (".") following the CPU number indicate that the CPU
-is online (otherwise the first period would instead have been "O"),
-that the CPU was online at the beginning of the expedited grace period
-(otherwise the second period would have instead been "o"), and that
-the CPU has been online at least once since boot (otherwise, the third
-period would instead have been "N"). The number before the "jiffies"
-indicates that the expedited grace period has been going on for 21,119
-jiffies. The number following the "s:" indicates that the expedited
-grace-period sequence counter is 73. The fact that this last value is
-odd indicates that an expedited grace period is in flight. The number
-following "root:" is a bitmask that indicates which children of the root
-rcu_node structure correspond to CPUs and/or tasks that are blocking the
-current expedited grace period. If the tree had more than one level,
-additional hex numbers would be printed for the states of the other
-rcu_node structures in the tree.
-
-As with normal grace periods, PREEMPT_RCU builds can be stalled by
-tasks as well as by CPUs, and that the tasks will be indicated by PID,
-for example, "P3421".
-
-It is entirely possible to see stall warnings from normal and from
-expedited grace periods at about the same time during the same run.
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
new file mode 100644
index 000000000000..1ad5cc793811
--- /dev/null
+++ b/Documentation/RCU/torture.rst
@@ -0,0 +1,374 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+RCU Torture Test Operation
+==========================
+
+
+CONFIG_RCU_TORTURE_TEST
+=======================
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations. It creates an rcutorture kernel module that can
+be loaded to run a torture test. The test periodically outputs
+status messages via printk(), which can be examined via the dmesg
+command (perhaps grepping for "torture"). The test is started
+when the module is loaded, and stops when the module is unloaded.
+
+Module parameters are prefixed by "rcutorture." in
+Documentation/admin-guide/kernel-parameters.txt.
+
+Output
+======
+
+The statistics output is as follows::
+
+ rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+ rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+ rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
+ rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
+ rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
+ rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+
+The command "dmesg | grep torture:" will extract this information on
+most systems. On more esoteric configurations, it may be necessary to
+use other commands to access the output of the printk()s used by
+the RCU torture test. The printk()s use KERN_ALERT, so they should
+be evident. ;-)
+
+The first and last lines show the rcutorture module parameters, and the
+last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
+automatic determination as to whether RCU operated correctly.
+
+The entries are as follows:
+
+* "rtc": The hexadecimal address of the structure currently visible
+ to readers.
+
+* "ver": The number of times since boot that the RCU writer task
+ has changed the structure visible to readers.
+
+* "tfle": If non-zero, indicates that the "torture freelist"
+ containing structures to be placed into the "rtc" area is empty.
+ This condition is important, since it can fool you into thinking
+ that RCU is working when it is not. :-/
+
+* "rta": Number of structures allocated from the torture freelist.
+
+* "rtaf": Number of allocations from the torture freelist that have
+ failed due to the list being empty. It is not unusual for this
+ to be non-zero, but it is bad for it to be a large fraction of
+ the value indicated by "rta".
+
+* "rtf": Number of frees into the torture freelist.
+
+* "rtmbe": A non-zero value indicates that rcutorture believes that
+ rcu_assign_pointer() and rcu_dereference() are not working
+ correctly. This value should be zero.
+
+* "rtbe": A non-zero value indicates that one of the rcu_barrier()
+ family of functions is not working correctly.
+
+* "rtbke": rcutorture was unable to create the real-time kthreads
+ used to force RCU priority inversion. This value should be zero.
+
+* "rtbre": Although rcutorture successfully created the kthreads
+ used to force RCU priority inversion, it was unable to set them
+ to the real-time priority level of 1. This value should be zero.
+
+* "rtbf": The number of times that RCU priority boosting failed
+ to resolve RCU priority inversion.
+
+* "rtb": The number of times that rcutorture attempted to force
+ an RCU priority inversion condition. If you are testing RCU
+ priority boosting via the "test_boost" module parameter, this
+ value should be non-zero.
+
+* "nt": The number of times rcutorture ran RCU read-side code from
+ within a timer handler. This value should be non-zero only
+ if you specified the "irqreader" module parameter.
+
+* "Reader Pipe": Histogram of "ages" of structures seen by readers.
+ If any entries past the first two are non-zero, RCU is broken.
+ And rcutorture prints the error flag string "!!!" to make sure
+ you notice. The age of a newly allocated structure is zero,
+ it becomes one when removed from reader visibility, and is
+ incremented once per grace period subsequently -- and is freed
+ after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+ The output displayed above was taken from a correctly working
+ RCU. If you want to see what it looks like when broken, break
+ it yourself. ;-)
+
+* "Reader Batch": Another histogram of "ages" of structures seen
+ by readers, but in terms of counter flips (or batches) rather
+ than in terms of grace periods. The legal number of non-zero
+ entries is again two. The reason for this separate view is that
+ it is sometimes easier to get the third entry to show up in the
+ "Reader Batch" list than in the "Reader Pipe" list.
+
+* "Free-Block Circulation": Shows the number of torture structures
+ that have reached a given point in the pipeline. The first element
+ should closely correspond to the number of structures allocated,
+ the second to the number that have been removed from reader view,
+ and all but the last remaining to the corresponding number of
+ passes through a grace period. The last entry should be zero,
+ as it is only incremented if a torture structure's counter
+ somehow gets incremented farther than it should.
+
+Different implementations of RCU can provide implementation-specific
+additional information. For example, Tree SRCU provides the following
+additional line::
+
+ srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
+
+This line shows the per-CPU counter state, in this case for Tree SRCU
+using a dynamically allocated srcu_struct (hence "srcud-" rather than
+"srcu-"). The numbers in parentheses are the values of the "old" and
+"current" counters for the corresponding CPU. The "idx" value maps the
+"old" and "current" values to the underlying array, and is useful for
+debugging. The final "T" entry contains the totals of the counters.
+
+Usage on Specific Kernel Builds
+===============================
+
+It is sometimes desirable to torture RCU on a specific kernel build,
+for example, when preparing to put that kernel build into production.
+In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m
+so that the test can be started using modprobe and terminated using rmmod.
+
+For example, the following script may be used to torture RCU::
+
+ #!/bin/sh
+
+ modprobe rcutorture
+ sleep 3600
+ rmmod rcutorture
+ dmesg | grep torture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors. The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
+two are self-explanatory, while the last indicates that while there
+were no RCU failures, CPU-hotplug problems were detected.
+
+
+Usage on Mainline Kernels
+=========================
+
+When using rcutorture to test changes to RCU itself, it is often
+necessary to build a number of kernels in order to test that change
+across a broad range of combinations of the relevant Kconfig options
+and of the relevant kernel boot parameters. In this situation, use
+of modprobe and rmmod can be quite time-consuming and error-prone.
+
+Therefore, the tools/testing/selftests/rcutorture/bin/kvm.sh
+script is available for mainline testing for x86, arm64, and
+powerpc. By default, it will run the series of tests specified by
+tools/testing/selftests/rcutorture/configs/rcu/CFLIST, with each test
+running for 30 minutes within a guest OS using a minimal userspace
+supplied by an automatically generated initrd. After the tests are
+complete, the resulting build products and console output are analyzed
+for errors and the results of the runs are summarized.
+
+On larger systems, rcutorture testing can be accelerated by passing the
+--cpus argument to kvm.sh. For example, on a 64-CPU system, "--cpus 43"
+would use up to 43 CPUs to run tests concurrently, which as of v5.4 would
+complete all the scenarios in two batches, reducing the time to complete
+from about eight hours to about one hour (not counting the time to build
+the sixteen kernels). The "--dryrun sched" argument will not run tests,
+but rather tell you how the tests would be scheduled into batches. This
+can be useful when working out how many CPUs to specify in the --cpus
+argument.
+
+Not all changes require that all scenarios be run. For example, a change
+to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
+--configs argument to kvm.sh as follows: "--configs 'SRCU-N SRCU-P'".
+Large systems can run multiple copies of the full set of scenarios,
+for example, a system with 448 hardware threads can run five instances
+of the full set concurrently. To make this happen::
+
+ kvm.sh --cpus 448 --configs '5*CFLIST'
+
+Alternatively, such a system can run 56 concurrent instances of a single
+eight-CPU scenario::
+
+ kvm.sh --cpus 448 --configs '56*TREE04'
+
+Or 28 concurrent instances of each of two eight-CPU scenarios::
+
+ kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'
+
+Of course, each concurrent instance will use memory, which can be
+limited using the --memory argument, which defaults to 512M. Small
+values for memory may require disabling the callback-flooding tests
+using the --bootargs parameter discussed below.
+
+Sometimes additional debugging is useful, and in such cases the --kconfig
+parameter to kvm.sh may be used, for example, ``--kconfig 'CONFIG_RCU_EQS_DEBUG=y'``.
+In addition, there are the --gdb, --kasan, and --kcsan parameters.
+Note that --gdb limits you to one scenario per kvm.sh run and requires
+that you have another window open from which to run ``gdb`` as instructed
+by the script.
+
+Kernel boot arguments can also be supplied, for example, to control
+rcutorture's module parameters. For example, to test a change to RCU's
+CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
+This will of course result in the scripting reporting a failure, namely
+the resulting RCU CPU stall warning. As noted above, reducing memory may
+require disabling rcutorture's callback-flooding tests::
+
+ kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
+ --bootargs 'rcutorture.fwd_progress=0'
+
+Sometimes all that is needed is a full set of kernel builds. This is
+what the --buildonly parameter does.
+
+The --duration parameter can override the default run time of 30 minutes.
+For example, ``--duration 2d`` would run for two days, ``--duration 3h``
+would run for three hours, ``--duration 5m`` would run for five minutes,
+and ``--duration 45s`` would run for 45 seconds. This last can be useful
+for tracking down rare boot-time failures.
+
+Finally, the --trust-make parameter allows each kernel build to reuse what
+it can from the previous kernel build. Please note that without the
+--trust-make parameter, your tags files may be demolished.
+
+There are additional more arcane arguments that are documented in the
+source code of the kvm.sh script.
+
+If a run contains failures, the number of buildtime and runtime failures
+is listed at the end of the kvm.sh output, which you really should redirect
+to a file. The build products and console output of each run is kept in
+tools/testing/selftests/rcutorture/res in timestamped directories. A
+given directory can be supplied to kvm-find-errors.sh in order to have
+it cycle you through summaries of errors and full error logs. For example::
+
+ tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
+ tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
+
+However, it is often more convenient to access the files directly.
+Files pertaining to all scenarios in a run reside in the top-level
+directory (2020.01.20-15.54.23 in the example above), while per-scenario
+files reside in a subdirectory named after the scenario (for example,
+"TREE04"). If a given scenario ran more than once (as in "--configs
+'56*TREE04'" above), the directories corresponding to the second and
+subsequent runs of that scenario include a sequence number, for example,
+"TREE04.2", "TREE04.3", and so on.
+
+The most frequently used file in the top-level directory is testid.txt.
+If the test ran in a git repository, then this file contains the commit
+that was tested and any uncommitted changes in diff format.
+
+The most frequently used files in each per-scenario-run directory are:
+
+.config:
+ This file contains the Kconfig options.
+
+Make.out:
+ This contains build output for a specific scenario.
+
+console.log:
+ This contains the console output for a specific scenario.
+ This file may be examined once the kernel has booted, but
+ it might not exist if the build failed.
+
+vmlinux:
+ This contains the kernel, which can be useful with tools like
+ objdump and gdb.
+
+A number of additional files are available, but are less frequently used.
+Many are intended for debugging of rcutorture itself or of its scripting.
+
+As of v5.4, a successful run with the default set of scenarios produces
+the following summary at the end of the run on a 12-CPU system::
+
+ SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
+ SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
+ SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
+ SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
+ TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
+ TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
+ TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
+ TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
+ TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
+ TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
+ TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
+ TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
+ CPU count limited from 16 to 12
+ TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
+ TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
+ TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
+ CPU count limited from 16 to 12
+ TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
+
+
+Repeated Runs
+=============
+
+Suppose that you are chasing down a rare boot-time failure. Although you
+could use kvm.sh, doing so will rebuild the kernel on each run. If you
+need (say) 1,000 runs to have confidence that you have fixed the bug,
+these pointless rebuilds can become extremely annoying.
+
+This is why kvm-again.sh exists.
+
+Suppose that a previous kvm.sh run left its output in this directory::
+
+ tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
+
+Then this run can be re-run without rebuilding as follow::
+
+ kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
+
+A few of the original run's kvm.sh parameters may be overridden, perhaps
+most notably --duration and --bootargs. For example::
+
+ kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28 \
+ --duration 45s
+
+would re-run the previous test, but for only 45 seconds, thus facilitating
+tracking down the aforementioned rare boot-time failure.
+
+
+Distributed Runs
+================
+
+Although kvm.sh is quite useful, its testing is confined to a single
+system. It is not all that hard to use your favorite framework to cause
+(say) 5 instances of kvm.sh to run on your 5 systems, but this will very
+likely unnecessarily rebuild kernels. In addition, manually distributing
+the desired rcutorture scenarios across the available systems can be
+painstaking and error-prone.
+
+And this is why the kvm-remote.sh script exists.
+
+If the following command works::
+
+ ssh system0 date
+
+and if it also works for system1, system2, system3, system4, and system5,
+and all of these systems have 64 CPUs, you can type::
+
+ kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
+ --cpus 64 --duration 8h --configs "5*CFLIST"
+
+This will build each default scenario's kernel on the local system, then
+spread each of five instances of each scenario over the systems listed,
+running each scenario for eight hours. At the end of the runs, the
+results will be gathered, recorded, and printed. Most of the parameters
+that kvm.sh will accept can be passed to kvm-remote.sh, but the list of
+systems must come first.
+
+The kvm.sh ``--dryrun scenarios`` argument is useful for working out
+how many scenarios may be run in one batch across a group of systems.
+
+You can also re-run a previous remote run in a manner similar to kvm.sh::
+
+ kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
+ tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
+ --duration 24h
+
+In this case, most of the kvm-again.sh parameters may be supplied following
+the pathname of the old run-results directory.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
deleted file mode 100644
index 55918b54808b..000000000000
--- a/Documentation/RCU/torture.txt
+++ /dev/null
@@ -1,314 +0,0 @@
-RCU Torture Test Operation
-
-
-CONFIG_RCU_TORTURE_TEST
-
-The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
-implementations. It creates an rcutorture kernel module that can
-be loaded to run a torture test. The test periodically outputs
-status messages via printk(), which can be examined via the dmesg
-command (perhaps grepping for "torture"). The test is started
-when the module is loaded, and stops when the module is unloaded.
-
-
-MODULE PARAMETERS
-
-This module has the following parameters:
-
-fqs_duration Duration (in microseconds) of artificially induced bursts
- of force_quiescent_state() invocations. In RCU
- implementations having force_quiescent_state(), these
- bursts help force races between forcing a given grace
- period and that grace period ending on its own.
-
-fqs_holdoff Holdoff time (in microseconds) between consecutive calls
- to force_quiescent_state() within a burst.
-
-fqs_stutter Wait time (in seconds) between consecutive bursts
- of calls to force_quiescent_state().
-
-gp_normal Make the fake writers use normal synchronous grace-period
- primitives.
-
-gp_exp Make the fake writers use expedited synchronous grace-period
- primitives. If both gp_normal and gp_exp are set, or
- if neither gp_normal nor gp_exp are set, then randomly
- choose the primitive so that about 50% are normal and
- 50% expedited. By default, neither are set, which
- gives best overall test coverage.
-
-irqreader Says to invoke RCU readers from irq level. This is currently
- done via timers. Defaults to "1" for variants of RCU that
- permit this. (Or, more accurately, variants of RCU that do
- -not- permit this know to ignore this variable.)
-
-n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted,
- in which case n_barrier_cbs specifies the number of
- RCU callbacks (and corresponding kthreads) to use for
- this testing. The value cannot be negative. If you
- specify this to be non-zero when torture_type indicates a
- synchronous RCU implementation (one for which a member of
- the synchronize_rcu() rather than the call_rcu() family is
- used -- see the documentation for torture_type below), an
- error will be reported and no testing will be carried out.
-
-nfakewriters This is the number of RCU fake writer threads to run. Fake
- writer threads repeatedly use the synchronous "wait for
- current readers" function of the interface selected by
- torture_type, with a delay between calls to allow for various
- different numbers of writers running in parallel.
- nfakewriters defaults to 4, which provides enough parallelism
- to trigger special cases caused by multiple writers, such as
- the synchronize_srcu() early return optimization.
-
-nreaders This is the number of RCU reading threads supported.
- The default is twice the number of CPUs. Why twice?
- To properly exercise RCU implementations with preemptible
- read-side critical sections.
-
-onoff_interval
- The number of seconds between each attempt to execute a
- randomly selected CPU-hotplug operation. Defaults to
- zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
- kernels, rcutorture will silently refuse to do any
- CPU-hotplug operations regardless of what value is
- specified for onoff_interval.
-
-onoff_holdoff The number of seconds to wait until starting CPU-hotplug
- operations. This would normally only be used when
- rcutorture was built into the kernel and started
- automatically at boot time, in which case it is useful
- in order to avoid confusing boot-time code with CPUs
- coming and going.
-
-shuffle_interval
- The number of seconds to keep the test threads affinitied
- to a particular subset of the CPUs, defaults to 3 seconds.
- Used in conjunction with test_no_idle_hz.
-
-shutdown_secs The number of seconds to run the test before terminating
- the test and powering off the system. The default is
- zero, which disables test termination and system shutdown.
- This capability is useful for automated testing.
-
-stall_cpu The number of seconds that a CPU should be stalled while
- within both an rcu_read_lock() and a preempt_disable().
- This stall happens only once per rcutorture run.
- If you need multiple stalls, use modprobe and rmmod to
- repeatedly run rcutorture. The default for stall_cpu
- is zero, which prevents rcutorture from stalling a CPU.
-
- Note that attempts to rmmod rcutorture while the stall
- is ongoing will hang, so be careful what value you
- choose for this module parameter! In addition, too-large
- values for stall_cpu might well induce failures and
- warnings in other parts of the kernel. You have been
- warned!
-
-stall_cpu_holdoff
- The number of seconds to wait after rcutorture starts
- before stalling a CPU. Defaults to 10 seconds.
-
-stat_interval The number of seconds between output of torture
- statistics (via printk()). Regardless of the interval,
- statistics are printed when the module is unloaded.
- Setting the interval to zero causes the statistics to
- be printed -only- when the module is unloaded, and this
- is the default.
-
-stutter The length of time to run the test before pausing for this
- same period of time. Defaults to "stutter=5", so as
- to run and pause for (roughly) five-second intervals.
- Specifying "stutter=0" causes the test to run continuously
- without pausing, which is the old default behavior.
-
-test_boost Whether or not to test the ability of RCU to do priority
- boosting. Defaults to "test_boost=1", which performs
- RCU priority-inversion testing only if the selected
- RCU implementation supports priority boosting. Specifying
- "test_boost=0" never performs RCU priority-inversion
- testing. Specifying "test_boost=2" performs RCU
- priority-inversion testing even if the selected RCU
- implementation does not support RCU priority boosting,
- which can be used to test rcutorture's ability to
- carry out RCU priority-inversion testing.
-
-test_boost_interval
- The number of seconds in an RCU priority-inversion test
- cycle. Defaults to "test_boost_interval=7". It is
- usually wise for this value to be relatively prime to
- the value selected for "stutter".
-
-test_boost_duration
- The number of seconds to do RCU priority-inversion testing
- within any given "test_boost_interval". Defaults to
- "test_boost_duration=4".
-
-test_no_idle_hz Whether or not to test the ability of RCU to operate in
- a kernel that disables the scheduling-clock interrupt to
- idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
- Defaults to omitting this test.
-
-torture_type The type of RCU to test, with string values as follows:
-
- "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(),
- along with expedited, synchronous, and polling
- variants.
-
- "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
- call_rcu_bh(), along with expedited and synchronous
- variants.
-
- "rcu_busted": This tests an intentionally incorrect version
- of RCU in order to help test rcutorture itself.
-
- "srcu": srcu_read_lock(), srcu_read_unlock() and
- call_srcu(), along with expedited and
- synchronous variants.
-
- "sched": preempt_disable(), preempt_enable(), and
- call_rcu_sched(), along with expedited,
- synchronous, and polling variants.
-
- "tasks": voluntary context switch and call_rcu_tasks(),
- along with expedited and synchronous variants.
-
- Defaults to "rcu".
-
-verbose Enable debug printk()s. Default is disabled.
-
-
-OUTPUT
-
-The statistics output is as follows:
-
- rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
- rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
- rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
- rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
- rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
- rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
-
-The command "dmesg | grep torture:" will extract this information on
-most systems. On more esoteric configurations, it may be necessary to
-use other commands to access the output of the printk()s used by
-the RCU torture test. The printk()s use KERN_ALERT, so they should
-be evident. ;-)
-
-The first and last lines show the rcutorture module parameters, and the
-last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
-automatic determination as to whether RCU operated correctly.
-
-The entries are as follows:
-
-o "rtc": The hexadecimal address of the structure currently visible
- to readers.
-
-o "ver": The number of times since boot that the RCU writer task
- has changed the structure visible to readers.
-
-o "tfle": If non-zero, indicates that the "torture freelist"
- containing structures to be placed into the "rtc" area is empty.
- This condition is important, since it can fool you into thinking
- that RCU is working when it is not. :-/
-
-o "rta": Number of structures allocated from the torture freelist.
-
-o "rtaf": Number of allocations from the torture freelist that have
- failed due to the list being empty. It is not unusual for this
- to be non-zero, but it is bad for it to be a large fraction of
- the value indicated by "rta".
-
-o "rtf": Number of frees into the torture freelist.
-
-o "rtmbe": A non-zero value indicates that rcutorture believes that
- rcu_assign_pointer() and rcu_dereference() are not working
- correctly. This value should be zero.
-
-o "rtbe": A non-zero value indicates that one of the rcu_barrier()
- family of functions is not working correctly.
-
-o "rtbke": rcutorture was unable to create the real-time kthreads
- used to force RCU priority inversion. This value should be zero.
-
-o "rtbre": Although rcutorture successfully created the kthreads
- used to force RCU priority inversion, it was unable to set them
- to the real-time priority level of 1. This value should be zero.
-
-o "rtbf": The number of times that RCU priority boosting failed
- to resolve RCU priority inversion.
-
-o "rtb": The number of times that rcutorture attempted to force
- an RCU priority inversion condition. If you are testing RCU
- priority boosting via the "test_boost" module parameter, this
- value should be non-zero.
-
-o "nt": The number of times rcutorture ran RCU read-side code from
- within a timer handler. This value should be non-zero only
- if you specified the "irqreader" module parameter.
-
-o "Reader Pipe": Histogram of "ages" of structures seen by readers.
- If any entries past the first two are non-zero, RCU is broken.
- And rcutorture prints the error flag string "!!!" to make sure
- you notice. The age of a newly allocated structure is zero,
- it becomes one when removed from reader visibility, and is
- incremented once per grace period subsequently -- and is freed
- after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
-
- The output displayed above was taken from a correctly working
- RCU. If you want to see what it looks like when broken, break
- it yourself. ;-)
-
-o "Reader Batch": Another histogram of "ages" of structures seen
- by readers, but in terms of counter flips (or batches) rather
- than in terms of grace periods. The legal number of non-zero
- entries is again two. The reason for this separate view is that
- it is sometimes easier to get the third entry to show up in the
- "Reader Batch" list than in the "Reader Pipe" list.
-
-o "Free-Block Circulation": Shows the number of torture structures
- that have reached a given point in the pipeline. The first element
- should closely correspond to the number of structures allocated,
- the second to the number that have been removed from reader view,
- and all but the last remaining to the corresponding number of
- passes through a grace period. The last entry should be zero,
- as it is only incremented if a torture structure's counter
- somehow gets incremented farther than it should.
-
-Different implementations of RCU can provide implementation-specific
-additional information. For example, Tree SRCU provides the following
-additional line:
-
- srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
-
-This line shows the per-CPU counter state, in this case for Tree SRCU
-using a dynamically allocated srcu_struct (hence "srcud-" rather than
-"srcu-"). The numbers in parentheses are the values of the "old" and
-"current" counters for the corresponding CPU. The "idx" value maps the
-"old" and "current" values to the underlying array, and is useful for
-debugging. The final "T" entry contains the totals of the counters.
-
-
-USAGE
-
-The following script may be used to torture RCU:
-
- #!/bin/sh
-
- modprobe rcutorture
- sleep 3600
- rmmod rcutorture
- dmesg | grep torture:
-
-The output can be manually inspected for the error flag of "!!!".
-One could of course create a more elaborate script that automatically
-checked for such errors. The "rmmod" command forces a "SUCCESS",
-"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
-two are self-explanatory, while the last indicates that while there
-were no RCU failures, CPU-hotplug problems were detected.
-
-However, the tools/testing/selftests/rcutorture/bin/kvm.sh script
-provides better automation, including automatic failure analysis.
-It assumes a qemu/kvm-enabled platform, and runs guest OSes out of initrd.
-See tools/testing/selftests/rcutorture/doc/initrd.txt for instructions
-on setting up such an initrd.
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
new file mode 100644
index 000000000000..cf0b0ac9f463
--- /dev/null
+++ b/Documentation/RCU/whatisRCU.rst
@@ -0,0 +1,1412 @@
+.. _whatisrcu_doc:
+
+What is RCU? -- "Read, Copy, Update"
+======================================
+
+Please note that the "What is RCU?" LWN series is an excellent place
+to start learning about RCU:
+
+| 1. What is RCU, Fundamentally? https://lwn.net/Articles/262464/
+| 2. What is RCU? Part 2: Usage https://lwn.net/Articles/263130/
+| 3. RCU part 3: the RCU API https://lwn.net/Articles/264090/
+| 4. The RCU API, 2010 Edition https://lwn.net/Articles/418853/
+| 2010 Big API Table https://lwn.net/Articles/419086/
+| 5. The RCU API, 2014 Edition https://lwn.net/Articles/609904/
+| 2014 Big API Table https://lwn.net/Articles/609973/
+| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
+| 2019 Big API Table https://lwn.net/Articles/777165/
+| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/
+| 2024 Background Information https://lwn.net/Articles/988641/
+| 2024 Big API Table https://lwn.net/Articles/988666/
+
+For those preferring video:
+
+| 1. Unraveling RCU Mysteries: Fundamentals https://www.linuxfoundation.org/webinars/unraveling-rcu-usage-mysteries
+| 2. Unraveling RCU Mysteries: Additional Use Cases https://www.linuxfoundation.org/webinars/unraveling-rcu-usage-mysteries-additional-use-cases
+
+
+What is RCU?
+
+RCU is a synchronization mechanism that was added to the Linux kernel
+during the 2.5 development effort that is optimized for read-mostly
+situations. Although RCU is actually quite simple, making effective use
+of it requires you to think differently about your code. Another part
+of the problem is the mistaken assumption that there is "one true way" to
+describe and to use RCU. Instead, the experience has been that different
+people must take different paths to arrive at an understanding of RCU,
+depending on their experiences and use cases. This document provides
+several different paths, as follows:
+
+:ref:`1. RCU OVERVIEW <1_whatisRCU>`
+
+:ref:`2. WHAT IS RCU'S CORE API? <2_whatisRCU>`
+
+:ref:`3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API? <3_whatisRCU>`
+
+:ref:`4. WHAT IF MY UPDATING THREAD CANNOT BLOCK? <4_whatisRCU>`
+
+:ref:`5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU? <5_whatisRCU>`
+
+:ref:`6. ANALOGY WITH READER-WRITER LOCKING <6_whatisRCU>`
+
+:ref:`7. ANALOGY WITH REFERENCE COUNTING <7_whatisRCU>`
+
+:ref:`8. FULL LIST OF RCU APIs <8_whatisRCU>`
+
+:ref:`9. ANSWERS TO QUICK QUIZZES <9_whatisRCU>`
+
+People who prefer starting with a conceptual overview should focus on
+Section 1, though most readers will profit by reading this section at
+some point. People who prefer to start with an API that they can then
+experiment with should focus on Section 2. People who prefer to start
+with example uses should focus on Sections 3 and 4. People who need to
+understand the RCU implementation should focus on Section 5, then dive
+into the kernel source code. People who reason best by analogy should
+focus on Section 6 and 7. Section 8 serves as an index to the docbook
+API documentation, and Section 9 is the traditional answer key.
+
+So, start with the section that makes the most sense to you and your
+preferred method of learning. If you need to know everything about
+everything, feel free to read the whole thing -- but if you are really
+that type of person, you have perused the source code and will therefore
+never need this document anyway. ;-)
+
+.. _1_whatisRCU:
+
+1. RCU OVERVIEW
+----------------
+
+The basic idea behind RCU is to split updates into "removal" and
+"reclamation" phases. The removal phase removes references to data items
+within a data structure (possibly by replacing them with references to
+new versions of these data items), and can run concurrently with readers.
+The reason that it is safe to run the removal phase concurrently with
+readers is the semantics of modern CPUs guarantee that readers will see
+either the old or the new version of the data structure rather than a
+partially updated reference. The reclamation phase does the work of reclaiming
+(e.g., freeing) the data items removed from the data structure during the
+removal phase. Because reclaiming data items can disrupt any readers
+concurrently referencing those data items, the reclamation phase must
+not start until readers no longer hold references to those data items.
+
+Splitting the update into removal and reclamation phases permits the
+updater to perform the removal phase immediately, and to defer the
+reclamation phase until all readers active during the removal phase have
+completed, either by blocking until they finish or by registering a
+callback that is invoked after they finish. Only readers that are active
+during the removal phase need be considered, because any reader starting
+after the removal phase will be unable to gain a reference to the removed
+data items, and therefore cannot be disrupted by the reclamation phase.
+
+So the typical RCU update sequence goes something like the following:
+
+a. Remove pointers to a data structure, so that subsequent
+ readers cannot gain a reference to it.
+
+b. Wait for all previous readers to complete their RCU read-side
+ critical sections.
+
+c. At this point, there cannot be any readers who hold references
+ to the data structure, so it now may safely be reclaimed
+ (e.g., kfree()d).
+
+Step (b) above is the key idea underlying RCU's deferred destruction.
+The ability to wait until all readers are done allows RCU readers to
+use much lighter-weight synchronization, in some cases, absolutely no
+synchronization at all. In contrast, in more conventional lock-based
+schemes, readers must use heavy-weight synchronization in order to
+prevent an updater from deleting the data structure out from under them.
+This is because lock-based updaters typically update data items in place,
+and must therefore exclude readers. In contrast, RCU-based updaters
+typically take advantage of the fact that writes to single aligned
+pointers are atomic on modern CPUs, allowing atomic insertion, removal,
+and replacement of data items in a linked structure without disrupting
+readers. Concurrent RCU readers can then continue accessing the old
+versions, and can dispense with the atomic operations, memory barriers,
+and communications cache misses that are so expensive on present-day
+SMP computer systems, even in absence of lock contention.
+
+In the three-step procedure shown above, the updater is performing both
+the removal and the reclamation step, but it is often helpful for an
+entirely different thread to do the reclamation, as is in fact the case
+in the Linux kernel's directory-entry cache (dcache). Even if the same
+thread performs both the update step (step (a) above) and the reclamation
+step (step (c) above), it is often helpful to think of them separately.
+For example, RCU readers and updaters need not communicate at all,
+but RCU provides implicit low-overhead communication between readers
+and reclaimers, namely, in step (b) above.
+
+So how the heck can a reclaimer tell when a reader is done, given
+that readers are not doing any sort of synchronization operations???
+Read on to learn about how RCU's API makes this easy.
+
+.. _2_whatisRCU:
+
+2. WHAT IS RCU'S CORE API?
+---------------------------
+
+The core RCU API is quite small:
+
+a. rcu_read_lock()
+b. rcu_read_unlock()
+c. synchronize_rcu() / call_rcu()
+d. rcu_assign_pointer()
+e. rcu_dereference()
+
+There are many other members of the RCU API, but the rest can be
+expressed in terms of these five, though most implementations instead
+express synchronize_rcu() in terms of the call_rcu() callback API.
+
+The five core RCU APIs are described below, the other 18 will be enumerated
+later. See the kernel docbook documentation for more info, or look directly
+at the function header comments.
+
+rcu_read_lock()
+^^^^^^^^^^^^^^^
+ void rcu_read_lock(void);
+
+ This temporal primitive is used by a reader to inform the
+ reclaimer that the reader is entering an RCU read-side critical
+ section. It is illegal to block while in an RCU read-side
+ critical section, though kernels built with CONFIG_PREEMPT_RCU
+ can preempt RCU read-side critical sections. Any RCU-protected
+ data structure accessed during an RCU read-side critical section
+ is guaranteed to remain unreclaimed for the full duration of that
+ critical section. Reference counts may be used in conjunction
+ with RCU to maintain longer-term references to data structures.
+
+ Note that anything that disables bottom halves, preemption,
+ or interrupts also enters an RCU read-side critical section.
+ Acquiring a spinlock also enters an RCU read-side critical
+ sections, even for spinlocks that do not disable preemption,
+ as is the case in kernels built with CONFIG_PREEMPT_RT=y.
+ Sleeplocks do *not* enter RCU read-side critical sections.
+
+rcu_read_unlock()
+^^^^^^^^^^^^^^^^^
+ void rcu_read_unlock(void);
+
+ This temporal primitives is used by a reader to inform the
+ reclaimer that the reader is exiting an RCU read-side critical
+ section. Anything that enables bottom halves, preemption,
+ or interrupts also exits an RCU read-side critical section.
+ Releasing a spinlock also exits an RCU read-side critical section.
+
+ Note that RCU read-side critical sections may be nested and/or
+ overlapping.
+
+synchronize_rcu()
+^^^^^^^^^^^^^^^^^
+ void synchronize_rcu(void);
+
+ This temporal primitive marks the end of updater code and the
+ beginning of reclaimer code. It does this by blocking until
+ all pre-existing RCU read-side critical sections on all CPUs
+ have completed. Note that synchronize_rcu() will **not**
+ necessarily wait for any subsequent RCU read-side critical
+ sections to complete. For example, consider the following
+ sequence of events::
+
+ CPU 0 CPU 1 CPU 2
+ ----------------- ------------------------- ---------------
+ 1. rcu_read_lock()
+ 2. enters synchronize_rcu()
+ 3. rcu_read_lock()
+ 4. rcu_read_unlock()
+ 5. exits synchronize_rcu()
+ 6. rcu_read_unlock()
+
+ To reiterate, synchronize_rcu() waits only for ongoing RCU
+ read-side critical sections to complete, not necessarily for
+ any that begin after synchronize_rcu() is invoked.
+
+ Of course, synchronize_rcu() does not necessarily return
+ **immediately** after the last pre-existing RCU read-side critical
+ section completes. For one thing, there might well be scheduling
+ delays. For another thing, many RCU implementations process
+ requests in batches in order to improve efficiencies, which can
+ further delay synchronize_rcu().
+
+ Since synchronize_rcu() is the API that must figure out when
+ readers are done, its implementation is key to RCU. For RCU
+ to be useful in all but the most read-intensive situations,
+ synchronize_rcu()'s overhead must also be quite small.
+
+ The call_rcu() API is an asynchronous callback form of
+ synchronize_rcu(), and is described in more detail in a later
+ section. Instead of blocking, it registers a function and
+ argument which are invoked after all ongoing RCU read-side
+ critical sections have completed. This callback variant is
+ particularly useful in situations where it is illegal to block
+ or where update-side performance is critically important.
+
+ However, the call_rcu() API should not be used lightly, as use
+ of the synchronize_rcu() API generally results in simpler code.
+ In addition, the synchronize_rcu() API has the nice property
+ of automatically limiting update rate should grace periods
+ be delayed. This property results in system resilience in face
+ of denial-of-service attacks. Code using call_rcu() should limit
+ update rate in order to gain this same sort of resilience. See
+ checklist.rst for some approaches to limiting the update rate.
+
+rcu_assign_pointer()
+^^^^^^^^^^^^^^^^^^^^
+ void rcu_assign_pointer(p, typeof(p) v);
+
+ Yes, rcu_assign_pointer() **is** implemented as a macro, though
+ it would be cool to be able to declare a function in this manner.
+ (And there has been some discussion of adding overloaded functions
+ to the C language, so who knows?)
+
+ The updater uses this spatial macro to assign a new value to an
+ RCU-protected pointer, in order to safely communicate the change
+ in value from the updater to the reader. This is a spatial (as
+ opposed to temporal) macro. It does not evaluate to an rvalue,
+ but it does provide any compiler directives and memory-barrier
+ instructions required for a given compile or CPU architecture.
+ Its ordering properties are that of a store-release operation,
+ that is, any prior loads and stores required to initialize the
+ structure are ordered before the store that publishes the pointer
+ to that structure.
+
+ Perhaps just as important, rcu_assign_pointer() serves to document
+ (1) which pointers are protected by RCU and (2) the point at which
+ a given structure becomes accessible to other CPUs. That said,
+ rcu_assign_pointer() is most frequently used indirectly, via
+ the _rcu list-manipulation primitives such as list_add_rcu().
+
+rcu_dereference()
+^^^^^^^^^^^^^^^^^
+ typeof(p) rcu_dereference(p);
+
+ Like rcu_assign_pointer(), rcu_dereference() must be implemented
+ as a macro.
+
+ The reader uses the spatial rcu_dereference() macro to fetch
+ an RCU-protected pointer, which returns a value that may
+ then be safely dereferenced. Note that rcu_dereference()
+ does not actually dereference the pointer, instead, it
+ protects the pointer for later dereferencing. It also
+ executes any needed memory-barrier instructions for a given
+ CPU architecture. Currently, only Alpha needs memory barriers
+ within rcu_dereference() -- on other CPUs, it compiles to a
+ volatile load. However, no mainstream C compilers respect
+ address dependencies, so rcu_dereference() uses volatile casts,
+ which, in combination with the coding guidelines listed in
+ rcu_dereference.rst, prevent current compilers from breaking
+ these dependencies.
+
+ Common coding practice uses rcu_dereference() to copy an
+ RCU-protected pointer to a local variable, then dereferences
+ this local variable, for example as follows::
+
+ p = rcu_dereference(head.next);
+ return p->data;
+
+ However, in this case, one could just as easily combine these
+ into one statement::
+
+ return rcu_dereference(head.next)->data;
+
+ If you are going to be fetching multiple fields from the
+ RCU-protected structure, using the local variable is of
+ course preferred. Repeated rcu_dereference() calls look
+ ugly, do not guarantee that the same pointer will be returned
+ if an update happened while in the critical section, and incur
+ unnecessary overhead on Alpha CPUs.
+
+ Note that the value returned by rcu_dereference() is valid
+ only within the enclosing RCU read-side critical section [1]_.
+ For example, the following is **not** legal::
+
+ rcu_read_lock();
+ p = rcu_dereference(head.next);
+ rcu_read_unlock();
+ x = p->address; /* BUG!!! */
+ rcu_read_lock();
+ y = p->data; /* BUG!!! */
+ rcu_read_unlock();
+
+ Holding a reference from one RCU read-side critical section
+ to another is just as illegal as holding a reference from
+ one lock-based critical section to another! Similarly,
+ using a reference outside of the critical section in which
+ it was acquired is just as illegal as doing so with normal
+ locking.
+
+ As with rcu_assign_pointer(), an important function of
+ rcu_dereference() is to document which pointers are protected by
+ RCU, in particular, flagging a pointer that is subject to changing
+ at any time, including immediately after the rcu_dereference().
+ And, again like rcu_assign_pointer(), rcu_dereference() is
+ typically used indirectly, via the _rcu list-manipulation
+ primitives, such as list_for_each_entry_rcu() [2]_.
+
+.. [1] The variant rcu_dereference_protected() can be used outside
+ of an RCU read-side critical section as long as the usage is
+ protected by locks acquired by the update-side code. This variant
+ avoids the lockdep warning that would happen when using (for
+ example) rcu_dereference() without rcu_read_lock() protection.
+ Using rcu_dereference_protected() also has the advantage
+ of permitting compiler optimizations that rcu_dereference()
+ must prohibit. The rcu_dereference_protected() variant takes
+ a lockdep expression to indicate which locks must be acquired
+ by the caller. If the indicated protection is not provided,
+ a lockdep splat is emitted. See Design/Requirements/Requirements.rst
+ and the API's code comments for more details and example usage.
+
+.. [2] If the list_for_each_entry_rcu() instance might be used by
+ update-side code as well as by RCU readers, then an additional
+ lockdep expression can be added to its list of arguments.
+ For example, given an additional "lock_is_held(&mylock)" argument,
+ the RCU lockdep code would complain only if this instance was
+ invoked outside of an RCU read-side critical section and without
+ the protection of mylock.
+
+The following diagram shows how each API communicates among the
+reader, updater, and reclaimer.
+::
+
+
+ rcu_assign_pointer()
+ +--------+
+ +---------------------->| reader |---------+
+ | +--------+ |
+ | | |
+ | | | Protect:
+ | | | rcu_read_lock()
+ | | | rcu_read_unlock()
+ | rcu_dereference() | |
+ +---------+ | |
+ | updater |<----------------+ |
+ +---------+ V
+ | +-----------+
+ +----------------------------------->| reclaimer |
+ +-----------+
+ Defer:
+ synchronize_rcu() & call_rcu()
+
+
+The RCU infrastructure observes the temporal sequence of rcu_read_lock(),
+rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
+order to determine when (1) synchronize_rcu() invocations may return
+to their callers and (2) call_rcu() callbacks may be invoked. Efficient
+implementations of the RCU infrastructure make heavy use of batching in
+order to amortize their overhead over many uses of the corresponding APIs.
+The rcu_assign_pointer() and rcu_dereference() invocations communicate
+spatial changes via stores to and loads from the RCU-protected pointer in
+question.
+
+There are at least three flavors of RCU usage in the Linux kernel. The diagram
+above shows the most common one. On the updater side, the rcu_assign_pointer(),
+synchronize_rcu() and call_rcu() primitives used are the same for all three
+flavors. However for protection (on the reader side), the primitives used vary
+depending on the flavor:
+
+a. rcu_read_lock() / rcu_read_unlock()
+ rcu_dereference()
+
+b. rcu_read_lock_bh() / rcu_read_unlock_bh()
+ local_bh_disable() / local_bh_enable()
+ rcu_dereference_bh()
+
+c. rcu_read_lock_sched() / rcu_read_unlock_sched()
+ preempt_disable() / preempt_enable()
+ local_irq_save() / local_irq_restore()
+ hardirq enter / hardirq exit
+ NMI enter / NMI exit
+ rcu_dereference_sched()
+
+These three flavors are used as follows:
+
+a. RCU applied to normal data structures.
+
+b. RCU applied to networking data structures that may be subjected
+ to remote denial-of-service attacks.
+
+c. RCU applied to scheduler and interrupt/NMI-handler tasks.
+
+Again, most uses will be of (a). The (b) and (c) cases are important
+for specialized uses, but are relatively uncommon. The SRCU, RCU-Tasks,
+RCU-Tasks-Rude, and RCU-Tasks-Trace have similar relationships among
+their assorted primitives.
+
+.. _3_whatisRCU:
+
+3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
+-----------------------------------------------
+
+This section shows a simple use of the core RCU API to protect a
+global pointer to a dynamically allocated structure. More-typical
+uses of RCU may be found in listRCU.rst and NMI-RCU.rst.
+::
+
+ struct foo {
+ int a;
+ char b;
+ long c;
+ };
+ DEFINE_SPINLOCK(foo_mutex);
+
+ struct foo __rcu *gbl_foo;
+
+ /*
+ * Create a new struct foo that is the same as the one currently
+ * pointed to by gbl_foo, except that field "a" is replaced
+ * with "new_a". Points gbl_foo to the new structure, and
+ * frees up the old structure after a grace period.
+ *
+ * Uses rcu_assign_pointer() to ensure that concurrent readers
+ * see the initialized version of the new structure.
+ *
+ * Uses synchronize_rcu() to ensure that any readers that might
+ * have references to the old structure complete before freeing
+ * the old structure.
+ */
+ void foo_update_a(int new_a)
+ {
+ struct foo *new_fp;
+ struct foo *old_fp;
+
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+ spin_lock(&foo_mutex);
+ old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
+ *new_fp = *old_fp;
+ new_fp->a = new_a;
+ rcu_assign_pointer(gbl_foo, new_fp);
+ spin_unlock(&foo_mutex);
+ synchronize_rcu();
+ kfree(old_fp);
+ }
+
+ /*
+ * Return the value of field "a" of the current gbl_foo
+ * structure. Use rcu_read_lock() and rcu_read_unlock()
+ * to ensure that the structure does not get deleted out
+ * from under us, and use rcu_dereference() to ensure that
+ * we see the initialized version of the structure (important
+ * for DEC Alpha and for people reading the code).
+ */
+ int foo_get_a(void)
+ {
+ int retval;
+
+ rcu_read_lock();
+ retval = rcu_dereference(gbl_foo)->a;
+ rcu_read_unlock();
+ return retval;
+ }
+
+So, to sum up:
+
+- Use rcu_read_lock() and rcu_read_unlock() to guard RCU
+ read-side critical sections.
+
+- Within an RCU read-side critical section, use rcu_dereference()
+ to dereference RCU-protected pointers.
+
+- Use some solid design (such as locks or semaphores) to
+ keep concurrent updates from interfering with each other.
+
+- Use rcu_assign_pointer() to update an RCU-protected pointer.
+ This primitive protects concurrent readers from the updater,
+ **not** concurrent updates from each other! You therefore still
+ need to use locking (or something similar) to keep concurrent
+ rcu_assign_pointer() primitives from interfering with each other.
+
+- Use synchronize_rcu() **after** removing a data element from an
+ RCU-protected data structure, but **before** reclaiming/freeing
+ the data element, in order to wait for the completion of all
+ RCU read-side critical sections that might be referencing that
+ data item.
+
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst
+and NMI-RCU.rst.
+
+.. _4_whatisRCU:
+
+4. WHAT IF MY UPDATING THREAD CANNOT BLOCK?
+--------------------------------------------
+
+In the example above, foo_update_a() blocks until a grace period elapses.
+This is quite simple, but in some cases one cannot afford to wait so
+long -- there might be other high-priority work to be done.
+
+In such cases, one uses call_rcu() rather than synchronize_rcu().
+The call_rcu() API is as follows::
+
+ void call_rcu(struct rcu_head *head, rcu_callback_t func);
+
+This function invokes func(head) after a grace period has elapsed.
+This invocation might happen from either softirq or process context,
+so the function is not permitted to block. The foo struct needs to
+have an rcu_head structure added, perhaps as follows::
+
+ struct foo {
+ int a;
+ char b;
+ long c;
+ struct rcu_head rcu;
+ };
+
+The foo_update_a() function might then be written as follows::
+
+ /*
+ * Create a new struct foo that is the same as the one currently
+ * pointed to by gbl_foo, except that field "a" is replaced
+ * with "new_a". Points gbl_foo to the new structure, and
+ * frees up the old structure after a grace period.
+ *
+ * Uses rcu_assign_pointer() to ensure that concurrent readers
+ * see the initialized version of the new structure.
+ *
+ * Uses call_rcu() to ensure that any readers that might have
+ * references to the old structure complete before freeing the
+ * old structure.
+ */
+ void foo_update_a(int new_a)
+ {
+ struct foo *new_fp;
+ struct foo *old_fp;
+
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+ spin_lock(&foo_mutex);
+ old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
+ *new_fp = *old_fp;
+ new_fp->a = new_a;
+ rcu_assign_pointer(gbl_foo, new_fp);
+ spin_unlock(&foo_mutex);
+ call_rcu(&old_fp->rcu, foo_reclaim);
+ }
+
+The foo_reclaim() function might appear as follows::
+
+ void foo_reclaim(struct rcu_head *rp)
+ {
+ struct foo *fp = container_of(rp, struct foo, rcu);
+
+ foo_cleanup(fp->a);
+
+ kfree(fp);
+ }
+
+The container_of() primitive is a macro that, given a pointer into a
+struct, the type of the struct, and the pointed-to field within the
+struct, returns a pointer to the beginning of the struct.
+
+The use of call_rcu() permits the caller of foo_update_a() to
+immediately regain control, without needing to worry further about the
+old version of the newly updated element. It also clearly shows the
+RCU distinction between updater, namely foo_update_a(), and reclaimer,
+namely foo_reclaim().
+
+The summary of advice is the same as for the previous section, except
+that we are now using call_rcu() rather than synchronize_rcu():
+
+- Use call_rcu() **after** removing a data element from an
+ RCU-protected data structure in order to register a callback
+ function that will be invoked after the completion of all RCU
+ read-side critical sections that might be referencing that
+ data item.
+
+If the callback for call_rcu() is not doing anything more than calling
+kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
+to avoid having to write your own callback::
+
+ kfree_rcu(old_fp, rcu);
+
+If the occasional sleep is permitted, the single-argument form may
+be used, omitting the rcu_head structure from struct foo.
+
+ kfree_rcu_mightsleep(old_fp);
+
+This variant almost never blocks, but might do so by invoking
+synchronize_rcu() in response to memory-allocation failure.
+
+Again, see checklist.rst for additional rules governing the use of RCU.
+
+.. _5_whatisRCU:
+
+5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
+------------------------------------------------
+
+One of the nice things about RCU is that it has extremely simple "toy"
+implementations that are a good first step towards understanding the
+production-quality implementations in the Linux kernel. This section
+presents two such "toy" implementations of RCU, one that is implemented
+in terms of familiar locking primitives, and another that more closely
+resembles "classic" RCU. Both are way too simple for real-world use,
+lacking both functionality and performance. However, they are useful
+in getting a feel for how RCU works. See kernel/rcu/update.c for a
+production-quality implementation, and see:
+
+ https://docs.google.com/document/d/1X0lThx8OK0ZgLMqVoXiR4ZrGURHrXK6NyLRbeXe3Xac/edit
+
+for papers describing the Linux kernel RCU implementation. The OLS'01
+and OLS'02 papers are a good introduction, and the dissertation provides
+more details on the current implementation as of early 2004.
+
+
+5A. "TOY" IMPLEMENTATION #1: LOCKING
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+This section presents a "toy" RCU implementation that is based on
+familiar locking primitives. Its overhead makes it a non-starter for
+real-life use, as does its lack of scalability. It is also unsuitable
+for realtime use, since it allows scheduling latency to "bleed" from
+one read-side critical section to another. It also assumes recursive
+reader-writer locks: If you try this with non-recursive locks, and
+you allow nested rcu_read_lock() calls, you can deadlock.
+
+However, it is probably the easiest implementation to relate to, so is
+a good starting point.
+
+It is extremely simple::
+
+ static DEFINE_RWLOCK(rcu_gp_mutex);
+
+ void rcu_read_lock(void)
+ {
+ read_lock(&rcu_gp_mutex);
+ }
+
+ void rcu_read_unlock(void)
+ {
+ read_unlock(&rcu_gp_mutex);
+ }
+
+ void synchronize_rcu(void)
+ {
+ write_lock(&rcu_gp_mutex);
+ smp_mb__after_spinlock();
+ write_unlock(&rcu_gp_mutex);
+ }
+
+[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
+much. But here are simplified versions anyway. And whatever you do,
+don't forget about them when submitting patches making use of RCU!]::
+
+ #define rcu_assign_pointer(p, v) \
+ ({ \
+ smp_store_release(&(p), (v)); \
+ })
+
+ #define rcu_dereference(p) \
+ ({ \
+ typeof(p) _________p1 = READ_ONCE(p); \
+ (_________p1); \
+ })
+
+
+The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
+and release a global reader-writer lock. The synchronize_rcu()
+primitive write-acquires this same lock, then releases it. This means
+that once synchronize_rcu() exits, all RCU read-side critical sections
+that were in progress before synchronize_rcu() was called are guaranteed
+to have completed -- there is no way that synchronize_rcu() would have
+been able to write-acquire the lock otherwise. The smp_mb__after_spinlock()
+promotes synchronize_rcu() to a full memory barrier in compliance with
+the "Memory-Barrier Guarantees" listed in:
+
+ Design/Requirements/Requirements.rst
+
+It is possible to nest rcu_read_lock(), since reader-writer locks may
+be recursively acquired. Note also that rcu_read_lock() is immune
+from deadlock (an important property of RCU). The reason for this is
+that the only thing that can block rcu_read_lock() is a synchronize_rcu().
+But synchronize_rcu() does not acquire any locks while holding rcu_gp_mutex,
+so there can be no deadlock cycle.
+
+.. _quiz_1:
+
+Quick Quiz #1:
+ Why is this argument naive? How could a deadlock
+ occur when using this algorithm in a real-world Linux
+ kernel? How could this deadlock be avoided?
+
+:ref:`Answers to Quick Quiz <9_whatisRCU>`
+
+5B. "TOY" EXAMPLE #2: CLASSIC RCU
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+This section presents a "toy" RCU implementation that is based on
+"classic RCU". It is also short on performance (but only for updates) and
+on features such as hotplug CPU and the ability to run in CONFIG_PREEMPTION
+kernels. The definitions of rcu_dereference() and rcu_assign_pointer()
+are the same as those shown in the preceding section, so they are omitted.
+::
+
+ void rcu_read_lock(void) { }
+
+ void rcu_read_unlock(void) { }
+
+ void synchronize_rcu(void)
+ {
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ run_on(cpu);
+ }
+
+Note that rcu_read_lock() and rcu_read_unlock() do absolutely nothing.
+This is the great strength of classic RCU in a non-preemptive kernel:
+read-side overhead is precisely zero, at least on non-Alpha CPUs.
+And there is absolutely no way that rcu_read_lock() can possibly
+participate in a deadlock cycle!
+
+The implementation of synchronize_rcu() simply schedules itself on each
+CPU in turn. The run_on() primitive can be implemented straightforwardly
+in terms of the sched_setaffinity() primitive. Of course, a somewhat less
+"toy" implementation would restore the affinity upon completion rather
+than just leaving all tasks running on the last CPU, but when I said
+"toy", I meant **toy**!
+
+So how the heck is this supposed to work???
+
+Remember that it is illegal to block while in an RCU read-side critical
+section. Therefore, if a given CPU executes a context switch, we know
+that it must have completed all preceding RCU read-side critical sections.
+Once **all** CPUs have executed a context switch, then **all** preceding
+RCU read-side critical sections will have completed.
+
+So, suppose that we remove a data item from its structure and then invoke
+synchronize_rcu(). Once synchronize_rcu() returns, we are guaranteed
+that there are no RCU read-side critical sections holding a reference
+to that data item, so we can safely reclaim it.
+
+.. _quiz_2:
+
+Quick Quiz #2:
+ Give an example where Classic RCU's read-side
+ overhead is **negative**.
+
+:ref:`Answers to Quick Quiz <9_whatisRCU>`
+
+.. _quiz_3:
+
+Quick Quiz #3:
+ If it is illegal to block in an RCU read-side
+ critical section, what the heck do you do in
+ CONFIG_PREEMPT_RT, where normal spinlocks can block???
+
+:ref:`Answers to Quick Quiz <9_whatisRCU>`
+
+.. _6_whatisRCU:
+
+6. ANALOGY WITH READER-WRITER LOCKING
+--------------------------------------
+
+Although RCU can be used in many different ways, a very common use of
+RCU is analogous to reader-writer locking. The following unified
+diff shows how closely related RCU and reader-writer locking can be.
+::
+
+ @@ -5,5 +5,5 @@ struct el {
+ int data;
+ /* Other data fields */
+ };
+ -rwlock_t listmutex;
+ +spinlock_t listmutex;
+ struct el head;
+
+ @@ -13,15 +14,15 @@
+ struct list_head *lp;
+ struct el *p;
+
+ - read_lock(&listmutex);
+ - list_for_each_entry(p, head, lp) {
+ + rcu_read_lock();
+ + list_for_each_entry_rcu(p, head, lp) {
+ if (p->key == key) {
+ *result = p->data;
+ - read_unlock(&listmutex);
+ + rcu_read_unlock();
+ return 1;
+ }
+ }
+ - read_unlock(&listmutex);
+ + rcu_read_unlock();
+ return 0;
+ }
+
+ @@ -29,15 +30,16 @@
+ {
+ struct el *p;
+
+ - write_lock(&listmutex);
+ + spin_lock(&listmutex);
+ list_for_each_entry(p, head, lp) {
+ if (p->key == key) {
+ - list_del(&p->list);
+ - write_unlock(&listmutex);
+ + list_del_rcu(&p->list);
+ + spin_unlock(&listmutex);
+ + synchronize_rcu();
+ kfree(p);
+ return 1;
+ }
+ }
+ - write_unlock(&listmutex);
+ + spin_unlock(&listmutex);
+ return 0;
+ }
+
+Or, for those who prefer a side-by-side listing::
+
+ 1 struct el { 1 struct el {
+ 2 struct list_head list; 2 struct list_head list;
+ 3 long key; 3 long key;
+ 4 spinlock_t mutex; 4 spinlock_t mutex;
+ 5 int data; 5 int data;
+ 6 /* Other data fields */ 6 /* Other data fields */
+ 7 }; 7 };
+ 8 rwlock_t listmutex; 8 spinlock_t listmutex;
+ 9 struct el head; 9 struct el head;
+
+::
+
+ 1 int search(long key, int *result) 1 int search(long key, int *result)
+ 2 { 2 {
+ 3 struct list_head *lp; 3 struct list_head *lp;
+ 4 struct el *p; 4 struct el *p;
+ 5 5
+ 6 read_lock(&listmutex); 6 rcu_read_lock();
+ 7 list_for_each_entry(p, head, lp) { 7 list_for_each_entry_rcu(p, head, lp) {
+ 8 if (p->key == key) { 8 if (p->key == key) {
+ 9 *result = p->data; 9 *result = p->data;
+ 10 read_unlock(&listmutex); 10 rcu_read_unlock();
+ 11 return 1; 11 return 1;
+ 12 } 12 }
+ 13 } 13 }
+ 14 read_unlock(&listmutex); 14 rcu_read_unlock();
+ 15 return 0; 15 return 0;
+ 16 } 16 }
+
+::
+
+ 1 int delete(long key) 1 int delete(long key)
+ 2 { 2 {
+ 3 struct el *p; 3 struct el *p;
+ 4 4
+ 5 write_lock(&listmutex); 5 spin_lock(&listmutex);
+ 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) {
+ 7 if (p->key == key) { 7 if (p->key == key) {
+ 8 list_del(&p->list); 8 list_del_rcu(&p->list);
+ 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex);
+ 10 synchronize_rcu();
+ 10 kfree(p); 11 kfree(p);
+ 11 return 1; 12 return 1;
+ 12 } 13 }
+ 13 } 14 }
+ 14 write_unlock(&listmutex); 15 spin_unlock(&listmutex);
+ 15 return 0; 16 return 0;
+ 16 } 17 }
+
+Either way, the differences are quite small. Read-side locking moves
+to rcu_read_lock() and rcu_read_unlock, update-side locking moves from
+a reader-writer lock to a simple spinlock, and a synchronize_rcu()
+precedes the kfree().
+
+However, there is one potential catch: the read-side and update-side
+critical sections can now run concurrently. In many cases, this will
+not be a problem, but it is necessary to check carefully regardless.
+For example, if multiple independent list updates must be seen as
+a single atomic update, converting to RCU will require special care.
+
+Also, the presence of synchronize_rcu() means that the RCU version of
+delete() can now block. If this is a problem, there is a callback-based
+mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
+be used in place of synchronize_rcu().
+
+.. _7_whatisRCU:
+
+7. ANALOGY WITH REFERENCE COUNTING
+-----------------------------------
+
+The reader-writer analogy (illustrated by the previous section) is not
+always the best way to think about using RCU. Another helpful analogy
+considers RCU an effective reference count on everything which is
+protected by RCU.
+
+A reference count typically does not prevent the referenced object's
+values from changing, but does prevent changes to type -- particularly the
+gross change of type that happens when that object's memory is freed and
+re-allocated for some other purpose. Once a type-safe reference to the
+object is obtained, some other mechanism is needed to ensure consistent
+access to the data in the object. This could involve taking a spinlock,
+but with RCU the typical approach is to perform reads with SMP-aware
+operations such as smp_load_acquire(), to perform updates with atomic
+read-modify-write operations, and to provide the necessary ordering.
+RCU provides a number of support functions that embed the required
+operations and ordering, such as the list_for_each_entry_rcu() macro
+used in the previous section.
+
+A more focused view of the reference counting behavior is that,
+between rcu_read_lock() and rcu_read_unlock(), any reference taken with
+rcu_dereference() on a pointer marked as ``__rcu`` can be treated as
+though a reference-count on that object has been temporarily increased.
+This prevents the object from changing type. Exactly what this means
+will depend on normal expectations of objects of that type, but it
+typically includes that spinlocks can still be safely locked, normal
+reference counters can be safely manipulated, and ``__rcu`` pointers
+can be safely dereferenced.
+
+Some operations that one might expect to see on an object for
+which an RCU reference is held include:
+
+ - Copying out data that is guaranteed to be stable by the object's type.
+ - Using kref_get_unless_zero() or similar to get a longer-term
+ reference. This may fail of course.
+ - Acquiring a spinlock in the object, and checking if the object still
+ is the expected object and if so, manipulating it freely.
+
+The understanding that RCU provides a reference that only prevents a
+change of type is particularly visible with objects allocated from a
+slab cache marked ``SLAB_TYPESAFE_BY_RCU``. RCU operations may yield a
+reference to an object from such a cache that has been concurrently freed
+and the memory reallocated to a completely different object, though of
+the same type. In this case RCU doesn't even protect the identity of the
+object from changing, only its type. So the object found may not be the
+one expected, but it will be one where it is safe to take a reference
+(and then potentially acquiring a spinlock), allowing subsequent code
+to check whether the identity matches expectations. It is tempting
+to simply acquire the spinlock without first taking the reference, but
+unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be
+initialized after each and every call to kmem_cache_alloc(), which renders
+reference-free spinlock acquisition completely unsafe. Therefore, when
+using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter.
+If using refcount_t, the specialized refcount_{add|inc}_not_zero_acquire()
+and refcount_set_release() APIs should be used to ensure correct operation
+ordering when verifying object identity and when initializing newly
+allocated objects. Acquire fence in refcount_{add|inc}_not_zero_acquire()
+ensures that identity checks happen *after* reference count is taken.
+refcount_set_release() should be called after a newly allocated object is
+fully initialized and release fence ensures that new values are visible
+*before* refcount can be successfully taken by other users. Once
+refcount_set_release() is called, the object should be considered visible
+by other tasks.
+(Those willing to initialize their locks in a kmem_cache constructor
+may also use locking, including cache-friendly sequence locking.)
+
+With traditional reference counting -- such as that implemented by the
+kref library in Linux -- there is typically code that runs when the last
+reference to an object is dropped. With kref, this is the function
+passed to kref_put(). When RCU is being used, such finalization code
+must not be run until all ``__rcu`` pointers referencing the object have
+been updated, and then a grace period has passed. Every remaining
+globally visible pointer to the object must be considered to be a
+potential counted reference, and the finalization code is typically run
+using call_rcu() only after all those pointers have been changed.
+
+To see how to choose between these two analogies -- of RCU as a
+reader-writer lock and RCU as a reference counting system -- it is useful
+to reflect on the scale of the thing being protected. The reader-writer
+lock analogy looks at larger multi-part objects such as a linked list
+and shows how RCU can facilitate concurrency while elements are added
+to, and removed from, the list. The reference-count analogy looks at
+the individual objects and looks at how they can be accessed safely
+within whatever whole they are a part of.
+
+.. _8_whatisRCU:
+
+8. FULL LIST OF RCU APIs
+-------------------------
+
+The RCU APIs are documented in docbook-format header comments in the
+Linux-kernel source code, but it helps to have a full list of the
+APIs, since there does not appear to be a way to categorize them
+in docbook. Here is the list, by category.
+
+RCU list traversal::
+
+ list_entry_rcu
+ list_entry_lockless
+ list_first_entry_rcu
+ list_first_or_null_rcu
+ list_tail_rcu
+ list_next_rcu
+ list_next_or_null_rcu
+ list_for_each_entry_rcu
+ list_for_each_entry_continue_rcu
+ list_for_each_entry_from_rcu
+ list_for_each_entry_lockless
+ hlist_first_rcu
+ hlist_next_rcu
+ hlist_pprev_rcu
+ hlist_for_each_entry_rcu
+ hlist_for_each_entry_rcu_notrace
+ hlist_for_each_entry_rcu_bh
+ hlist_for_each_entry_from_rcu
+ hlist_for_each_entry_continue_rcu
+ hlist_for_each_entry_continue_rcu_bh
+ hlist_nulls_first_rcu
+ hlist_nulls_next_rcu
+ hlist_nulls_for_each_entry_rcu
+ hlist_nulls_for_each_entry_safe
+ hlist_bl_first_rcu
+ hlist_bl_for_each_entry_rcu
+
+RCU pointer/list update::
+
+ rcu_assign_pointer
+ rcu_replace_pointer
+ INIT_LIST_HEAD_RCU
+ list_add_rcu
+ list_add_tail_rcu
+ list_del_rcu
+ list_replace_rcu
+ list_splice_init_rcu
+ list_splice_tail_init_rcu
+ hlist_add_behind_rcu
+ hlist_add_before_rcu
+ hlist_add_head_rcu
+ hlist_add_tail_rcu
+ hlist_del_rcu
+ hlist_del_init_rcu
+ hlist_replace_rcu
+ hlist_nulls_del_init_rcu
+ hlist_nulls_del_rcu
+ hlist_nulls_add_head_rcu
+ hlist_nulls_add_tail_rcu
+ hlist_nulls_add_fake
+ hlists_swap_heads_rcu
+ hlist_bl_add_head_rcu
+ hlist_bl_del_rcu
+ hlist_bl_set_first_rcu
+
+RCU::
+
+ Critical sections Grace period Barrier
+
+ rcu_read_lock synchronize_net rcu_barrier
+ rcu_read_unlock synchronize_rcu
+ guard(rcu)() synchronize_rcu_expedited
+ scoped_guard(rcu) synchronize_rcu_mult
+ rcu_dereference call_rcu
+ rcu_dereference_check call_rcu_hurry
+ rcu_dereference_protected kfree_rcu
+ rcu_read_lock_held kvfree_rcu
+ rcu_read_lock_any_held kfree_rcu_mightsleep
+ rcu_pointer_handoff cond_synchronize_rcu
+ unrcu_pointer cond_synchronize_rcu_full
+ cond_synchronize_rcu_expedited
+ cond_synchronize_rcu_expedited_full
+ get_completed_synchronize_rcu
+ get_completed_synchronize_rcu_full
+ get_state_synchronize_rcu
+ get_state_synchronize_rcu_full
+ poll_state_synchronize_rcu
+ poll_state_synchronize_rcu_full
+ same_state_synchronize_rcu
+ same_state_synchronize_rcu_full
+ start_poll_synchronize_rcu
+ start_poll_synchronize_rcu_full
+ start_poll_synchronize_rcu_expedited
+ start_poll_synchronize_rcu_expedited_full
+
+bh::
+
+ Critical sections Grace period Barrier
+
+ rcu_read_lock_bh [Same as RCU] [Same as RCU]
+ rcu_read_unlock_bh
+ [local_bh_disable]
+ [and friends]
+ rcu_dereference_bh
+ rcu_dereference_bh_check
+ rcu_dereference_bh_protected
+ rcu_read_lock_bh_held
+
+sched::
+
+ Critical sections Grace period Barrier
+
+ rcu_read_lock_sched [Same as RCU] [Same as RCU]
+ rcu_read_unlock_sched
+ [preempt_disable]
+ [and friends]
+ rcu_read_lock_sched_notrace
+ rcu_read_unlock_sched_notrace
+ rcu_dereference_sched
+ rcu_dereference_sched_check
+ rcu_dereference_sched_protected
+ rcu_read_lock_sched_held
+
+
+RCU: Initialization/cleanup/ordering::
+
+ RCU_INIT_POINTER
+ RCU_INITIALIZER
+ RCU_POINTER_INITIALIZER
+ init_rcu_head
+ destroy_rcu_head
+ init_rcu_head_on_stack
+ destroy_rcu_head_on_stack
+ SLAB_TYPESAFE_BY_RCU
+
+
+RCU: Quiescents states and control::
+
+ cond_resched_tasks_rcu_qs
+ rcu_all_qs
+ rcu_softirq_qs_periodic
+ rcu_end_inkernel_boot
+ rcu_expedite_gp
+ rcu_gp_is_expedited
+ rcu_unexpedite_gp
+ rcu_cpu_stall_reset
+ rcu_head_after_call_rcu
+ rcu_is_watching
+
+
+RCU-sync primitive::
+
+ rcu_sync_is_idle
+ rcu_sync_init
+ rcu_sync_enter
+ rcu_sync_exit
+ rcu_sync_dtor
+
+
+RCU-Tasks::
+
+ Critical sections Grace period Barrier
+
+ N/A call_rcu_tasks rcu_barrier_tasks
+ synchronize_rcu_tasks
+
+
+RCU-Tasks-Rude::
+
+ Critical sections Grace period Barrier
+
+ N/A synchronize_rcu_tasks_rude rcu_barrier_tasks_rude
+ call_rcu_tasks_rude
+
+
+RCU-Tasks-Trace::
+
+ Critical sections Grace period Barrier
+
+ rcu_read_lock_trace call_rcu_tasks_trace rcu_barrier_tasks_trace
+ rcu_read_unlock_trace synchronize_rcu_tasks_trace
+ guard(rcu_tasks_trace)()
+ scoped_guard(rcu_tasks_trace)
+
+
+SRCU list traversal::
+ list_for_each_entry_srcu
+ hlist_for_each_entry_srcu
+
+
+SRCU::
+
+ Critical sections Grace period Barrier
+
+ srcu_read_lock call_srcu srcu_barrier
+ srcu_read_unlock synchronize_srcu
+ srcu_read_lock_fast synchronize_srcu_expedited
+ srcu_read_unlock_fast get_state_synchronize_srcu
+ srcu_read_lock_nmisafe start_poll_synchronize_srcu
+ srcu_read_unlock_nmisafe start_poll_synchronize_srcu_expedited
+ srcu_read_lock_notrace poll_state_synchronize_srcu
+ srcu_read_unlock_notrace
+ srcu_down_read
+ srcu_up_read
+ srcu_down_read_fast
+ srcu_up_read_fast
+ guard(srcu)()
+ scoped_guard(srcu)
+ srcu_read_lock_held
+ srcu_dereference
+ srcu_dereference_check
+ srcu_dereference_notrace
+ srcu_read_lock_held
+
+
+SRCU: Initialization/cleanup/ordering::
+
+ DEFINE_SRCU
+ DEFINE_STATIC_SRCU
+ init_srcu_struct
+ cleanup_srcu_struct
+ smp_mb__after_srcu_read_unlock
+
+All: lockdep-checked RCU utility APIs::
+
+ RCU_LOCKDEP_WARN
+ rcu_sleep_check
+
+All: Unchecked RCU-protected pointer access::
+
+ rcu_dereference_raw
+
+All: Unchecked RCU-protected pointer access with dereferencing prohibited::
+
+ rcu_access_pointer
+
+See the comment headers in the source code (or the docbook generated
+from them) for more information.
+
+However, given that there are no fewer than four families of RCU APIs
+in the Linux kernel, how do you choose which one to use? The following
+list can be helpful:
+
+a. Will readers need to block? If so, you need SRCU.
+
+b. Will readers need to block and are you doing tracing, for
+ example, ftrace or BPF? If so, you need RCU-tasks,
+ RCU-tasks-rude, and/or RCU-tasks-trace.
+
+c. What about the -rt patchset? If readers would need to block in
+ an non-rt kernel, you need SRCU. If readers would block when
+ acquiring spinlocks in a -rt kernel, but not in a non-rt kernel,
+ SRCU is not necessary. (The -rt patchset turns spinlocks into
+ sleeplocks, hence this distinction.)
+
+d. Do you need to treat NMI handlers, hardirq handlers,
+ and code segments with preemption disabled (whether
+ via preempt_disable(), local_irq_save(), local_bh_disable(),
+ or some other mechanism) as if they were explicit RCU readers?
+ If so, RCU-sched readers are the only choice that will work
+ for you, but since about v4.20 you use can use the vanilla RCU
+ update primitives.
+
+e. Do you need RCU grace periods to complete even in the face of
+ softirq monopolization of one or more of the CPUs? For example,
+ is your code subject to network-based denial-of-service attacks?
+ If so, you should disable softirq across your readers, for
+ example, by using rcu_read_lock_bh(). Since about v4.20 you
+ use can use the vanilla RCU update primitives.
+
+f. Is your workload too update-intensive for normal use of
+ RCU, but inappropriate for other synchronization mechanisms?
+ If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+ named SLAB_DESTROY_BY_RCU). But please be careful!
+
+g. Do you need read-side critical sections that are respected even
+ on CPUs that are deep in the idle loop, during entry to or exit
+ from user-mode execution, or on an offlined CPU? If so, SRCU
+ and RCU Tasks Trace are the only choices that will work for you,
+ with SRCU being strongly preferred in almost all cases.
+
+h. Otherwise, use RCU.
+
+Of course, this all assumes that you have determined that RCU is in fact
+the right tool for your job.
+
+.. _9_whatisRCU:
+
+9. ANSWERS TO QUICK QUIZZES
+----------------------------
+
+Quick Quiz #1:
+ Why is this argument naive? How could a deadlock
+ occur when using this algorithm in a real-world Linux
+ kernel? [Referring to the lock-based "toy" RCU
+ algorithm.]
+
+Answer:
+ Consider the following sequence of events:
+
+ 1. CPU 0 acquires some unrelated lock, call it
+ "problematic_lock", disabling irq via
+ spin_lock_irqsave().
+
+ 2. CPU 1 enters synchronize_rcu(), write-acquiring
+ rcu_gp_mutex.
+
+ 3. CPU 0 enters rcu_read_lock(), but must wait
+ because CPU 1 holds rcu_gp_mutex.
+
+ 4. CPU 1 is interrupted, and the irq handler
+ attempts to acquire problematic_lock.
+
+ The system is now deadlocked.
+
+ One way to avoid this deadlock is to use an approach like
+ that of CONFIG_PREEMPT_RT, where all normal spinlocks
+ become blocking locks, and all irq handlers execute in
+ the context of special tasks. In this case, in step 4
+ above, the irq handler would block, allowing CPU 1 to
+ release rcu_gp_mutex, avoiding the deadlock.
+
+ Even in the absence of deadlock, this RCU implementation
+ allows latency to "bleed" from readers to other
+ readers through synchronize_rcu(). To see this,
+ consider task A in an RCU read-side critical section
+ (thus read-holding rcu_gp_mutex), task B blocked
+ attempting to write-acquire rcu_gp_mutex, and
+ task C blocked in rcu_read_lock() attempting to
+ read_acquire rcu_gp_mutex. Task A's RCU read-side
+ latency is holding up task C, albeit indirectly via
+ task B.
+
+ Realtime RCU implementations therefore use a counter-based
+ approach where tasks in RCU read-side critical sections
+ cannot be blocked by tasks executing synchronize_rcu().
+
+:ref:`Back to Quick Quiz #1 <quiz_1>`
+
+Quick Quiz #2:
+ Give an example where Classic RCU's read-side
+ overhead is **negative**.
+
+Answer:
+ Imagine a single-CPU system with a non-CONFIG_PREEMPTION
+ kernel where a routing table is used by process-context
+ code, but can be updated by irq-context code (for example,
+ by an "ICMP REDIRECT" packet). The usual way of handling
+ this would be to have the process-context code disable
+ interrupts while searching the routing table. Use of
+ RCU allows such interrupt-disabling to be dispensed with.
+ Thus, without RCU, you pay the cost of disabling interrupts,
+ and with RCU you don't.
+
+ One can argue that the overhead of RCU in this
+ case is negative with respect to the single-CPU
+ interrupt-disabling approach. Others might argue that
+ the overhead of RCU is merely zero, and that replacing
+ the positive overhead of the interrupt-disabling scheme
+ with the zero-overhead RCU scheme does not constitute
+ negative overhead.
+
+ In real life, of course, things are more complex. But
+ even the theoretical possibility of negative overhead for
+ a synchronization primitive is a bit unexpected. ;-)
+
+:ref:`Back to Quick Quiz #2 <quiz_2>`
+
+Quick Quiz #3:
+ If it is illegal to block in an RCU read-side
+ critical section, what the heck do you do in
+ CONFIG_PREEMPT_RT, where normal spinlocks can block???
+
+Answer:
+ Just as CONFIG_PREEMPT_RT permits preemption of spinlock
+ critical sections, it permits preemption of RCU
+ read-side critical sections. It also permits
+ spinlocks blocking while in RCU read-side critical
+ sections.
+
+ Why the apparent inconsistency? Because it is
+ possible to use priority boosting to keep the RCU
+ grace periods short if need be (for example, if running
+ short of memory). In contrast, if blocking waiting
+ for (say) network reception, there is no way to know
+ what should be boosted. Especially given that the
+ process we need to boost might well be a human being
+ who just went out for a pizza or something. And although
+ a computer-operated cattle prod might arouse serious
+ interest, it might also provoke serious objections.
+ Besides, how does the computer know what pizza parlor
+ the human being went to???
+
+:ref:`Back to Quick Quiz #3 <quiz_3>`
+
+ACKNOWLEDGEMENTS
+
+My thanks to the people who helped make this human-readable, including
+Jon Walpole, Josh Triplett, Serge Hallyn, Suzanne Wood, and Alan Stern.
+
+
+For more information, see http://www.rdrop.com/users/paulmck/RCU.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
deleted file mode 100644
index 86d82f7f3500..000000000000
--- a/Documentation/RCU/whatisRCU.txt
+++ /dev/null
@@ -1,1057 +0,0 @@
-What is RCU? -- "Read, Copy, Update"
-
-Please note that the "What is RCU?" LWN series is an excellent place
-to start learning about RCU:
-
-1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
-2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
-3. RCU part 3: the RCU API http://lwn.net/Articles/264090/
-4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/
- 2010 Big API Table http://lwn.net/Articles/419086/
-5. The RCU API, 2014 Edition http://lwn.net/Articles/609904/
- 2014 Big API Table http://lwn.net/Articles/609973/
-
-
-What is RCU?
-
-RCU is a synchronization mechanism that was added to the Linux kernel
-during the 2.5 development effort that is optimized for read-mostly
-situations. Although RCU is actually quite simple once you understand it,
-getting there can sometimes be a challenge. Part of the problem is that
-most of the past descriptions of RCU have been written with the mistaken
-assumption that there is "one true way" to describe RCU. Instead,
-the experience has been that different people must take different paths
-to arrive at an understanding of RCU. This document provides several
-different paths, as follows:
-
-1. RCU OVERVIEW
-2. WHAT IS RCU'S CORE API?
-3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
-4. WHAT IF MY UPDATING THREAD CANNOT BLOCK?
-5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
-6. ANALOGY WITH READER-WRITER LOCKING
-7. FULL LIST OF RCU APIs
-8. ANSWERS TO QUICK QUIZZES
-
-People who prefer starting with a conceptual overview should focus on
-Section 1, though most readers will profit by reading this section at
-some point. People who prefer to start with an API that they can then
-experiment with should focus on Section 2. People who prefer to start
-with example uses should focus on Sections 3 and 4. People who need to
-understand the RCU implementation should focus on Section 5, then dive
-into the kernel source code. People who reason best by analogy should
-focus on Section 6. Section 7 serves as an index to the docbook API
-documentation, and Section 8 is the traditional answer key.
-
-So, start with the section that makes the most sense to you and your
-preferred method of learning. If you need to know everything about
-everything, feel free to read the whole thing -- but if you are really
-that type of person, you have perused the source code and will therefore
-never need this document anyway. ;-)
-
-
-1. RCU OVERVIEW
-
-The basic idea behind RCU is to split updates into "removal" and
-"reclamation" phases. The removal phase removes references to data items
-within a data structure (possibly by replacing them with references to
-new versions of these data items), and can run concurrently with readers.
-The reason that it is safe to run the removal phase concurrently with
-readers is the semantics of modern CPUs guarantee that readers will see
-either the old or the new version of the data structure rather than a
-partially updated reference. The reclamation phase does the work of reclaiming
-(e.g., freeing) the data items removed from the data structure during the
-removal phase. Because reclaiming data items can disrupt any readers
-concurrently referencing those data items, the reclamation phase must
-not start until readers no longer hold references to those data items.
-
-Splitting the update into removal and reclamation phases permits the
-updater to perform the removal phase immediately, and to defer the
-reclamation phase until all readers active during the removal phase have
-completed, either by blocking until they finish or by registering a
-callback that is invoked after they finish. Only readers that are active
-during the removal phase need be considered, because any reader starting
-after the removal phase will be unable to gain a reference to the removed
-data items, and therefore cannot be disrupted by the reclamation phase.
-
-So the typical RCU update sequence goes something like the following:
-
-a. Remove pointers to a data structure, so that subsequent
- readers cannot gain a reference to it.
-
-b. Wait for all previous readers to complete their RCU read-side
- critical sections.
-
-c. At this point, there cannot be any readers who hold references
- to the data structure, so it now may safely be reclaimed
- (e.g., kfree()d).
-
-Step (b) above is the key idea underlying RCU's deferred destruction.
-The ability to wait until all readers are done allows RCU readers to
-use much lighter-weight synchronization, in some cases, absolutely no
-synchronization at all. In contrast, in more conventional lock-based
-schemes, readers must use heavy-weight synchronization in order to
-prevent an updater from deleting the data structure out from under them.
-This is because lock-based updaters typically update data items in place,
-and must therefore exclude readers. In contrast, RCU-based updaters
-typically take advantage of the fact that writes to single aligned
-pointers are atomic on modern CPUs, allowing atomic insertion, removal,
-and replacement of data items in a linked structure without disrupting
-readers. Concurrent RCU readers can then continue accessing the old
-versions, and can dispense with the atomic operations, memory barriers,
-and communications cache misses that are so expensive on present-day
-SMP computer systems, even in absence of lock contention.
-
-In the three-step procedure shown above, the updater is performing both
-the removal and the reclamation step, but it is often helpful for an
-entirely different thread to do the reclamation, as is in fact the case
-in the Linux kernel's directory-entry cache (dcache). Even if the same
-thread performs both the update step (step (a) above) and the reclamation
-step (step (c) above), it is often helpful to think of them separately.
-For example, RCU readers and updaters need not communicate at all,
-but RCU provides implicit low-overhead communication between readers
-and reclaimers, namely, in step (b) above.
-
-So how the heck can a reclaimer tell when a reader is done, given
-that readers are not doing any sort of synchronization operations???
-Read on to learn about how RCU's API makes this easy.
-
-
-2. WHAT IS RCU'S CORE API?
-
-The core RCU API is quite small:
-
-a. rcu_read_lock()
-b. rcu_read_unlock()
-c. synchronize_rcu() / call_rcu()
-d. rcu_assign_pointer()
-e. rcu_dereference()
-
-There are many other members of the RCU API, but the rest can be
-expressed in terms of these five, though most implementations instead
-express synchronize_rcu() in terms of the call_rcu() callback API.
-
-The five core RCU APIs are described below, the other 18 will be enumerated
-later. See the kernel docbook documentation for more info, or look directly
-at the function header comments.
-
-rcu_read_lock()
-
- void rcu_read_lock(void);
-
- Used by a reader to inform the reclaimer that the reader is
- entering an RCU read-side critical section. It is illegal
- to block while in an RCU read-side critical section, though
- kernels built with CONFIG_PREEMPT_RCU can preempt RCU
- read-side critical sections. Any RCU-protected data structure
- accessed during an RCU read-side critical section is guaranteed to
- remain unreclaimed for the full duration of that critical section.
- Reference counts may be used in conjunction with RCU to maintain
- longer-term references to data structures.
-
-rcu_read_unlock()
-
- void rcu_read_unlock(void);
-
- Used by a reader to inform the reclaimer that the reader is
- exiting an RCU read-side critical section. Note that RCU
- read-side critical sections may be nested and/or overlapping.
-
-synchronize_rcu()
-
- void synchronize_rcu(void);
-
- Marks the end of updater code and the beginning of reclaimer
- code. It does this by blocking until all pre-existing RCU
- read-side critical sections on all CPUs have completed.
- Note that synchronize_rcu() will -not- necessarily wait for
- any subsequent RCU read-side critical sections to complete.
- For example, consider the following sequence of events:
-
- CPU 0 CPU 1 CPU 2
- ----------------- ------------------------- ---------------
- 1. rcu_read_lock()
- 2. enters synchronize_rcu()
- 3. rcu_read_lock()
- 4. rcu_read_unlock()
- 5. exits synchronize_rcu()
- 6. rcu_read_unlock()
-
- To reiterate, synchronize_rcu() waits only for ongoing RCU
- read-side critical sections to complete, not necessarily for
- any that begin after synchronize_rcu() is invoked.
-
- Of course, synchronize_rcu() does not necessarily return
- -immediately- after the last pre-existing RCU read-side critical
- section completes. For one thing, there might well be scheduling
- delays. For another thing, many RCU implementations process
- requests in batches in order to improve efficiencies, which can
- further delay synchronize_rcu().
-
- Since synchronize_rcu() is the API that must figure out when
- readers are done, its implementation is key to RCU. For RCU
- to be useful in all but the most read-intensive situations,
- synchronize_rcu()'s overhead must also be quite small.
-
- The call_rcu() API is a callback form of synchronize_rcu(),
- and is described in more detail in a later section. Instead of
- blocking, it registers a function and argument which are invoked
- after all ongoing RCU read-side critical sections have completed.
- This callback variant is particularly useful in situations where
- it is illegal to block or where update-side performance is
- critically important.
-
- However, the call_rcu() API should not be used lightly, as use
- of the synchronize_rcu() API generally results in simpler code.
- In addition, the synchronize_rcu() API has the nice property
- of automatically limiting update rate should grace periods
- be delayed. This property results in system resilience in face
- of denial-of-service attacks. Code using call_rcu() should limit
- update rate in order to gain this same sort of resilience. See
- checklist.txt for some approaches to limiting the update rate.
-
-rcu_assign_pointer()
-
- typeof(p) rcu_assign_pointer(p, typeof(p) v);
-
- Yes, rcu_assign_pointer() -is- implemented as a macro, though it
- would be cool to be able to declare a function in this manner.
- (Compiler experts will no doubt disagree.)
-
- The updater uses this function to assign a new value to an
- RCU-protected pointer, in order to safely communicate the change
- in value from the updater to the reader. This function returns
- the new value, and also executes any memory-barrier instructions
- required for a given CPU architecture.
-
- Perhaps just as important, it serves to document (1) which
- pointers are protected by RCU and (2) the point at which a
- given structure becomes accessible to other CPUs. That said,
- rcu_assign_pointer() is most frequently used indirectly, via
- the _rcu list-manipulation primitives such as list_add_rcu().
-
-rcu_dereference()
-
- typeof(p) rcu_dereference(p);
-
- Like rcu_assign_pointer(), rcu_dereference() must be implemented
- as a macro.
-
- The reader uses rcu_dereference() to fetch an RCU-protected
- pointer, which returns a value that may then be safely
- dereferenced. Note that rcu_dereference() does not actually
- dereference the pointer, instead, it protects the pointer for
- later dereferencing. It also executes any needed memory-barrier
- instructions for a given CPU architecture. Currently, only Alpha
- needs memory barriers within rcu_dereference() -- on other CPUs,
- it compiles to nothing, not even a compiler directive.
-
- Common coding practice uses rcu_dereference() to copy an
- RCU-protected pointer to a local variable, then dereferences
- this local variable, for example as follows:
-
- p = rcu_dereference(head.next);
- return p->data;
-
- However, in this case, one could just as easily combine these
- into one statement:
-
- return rcu_dereference(head.next)->data;
-
- If you are going to be fetching multiple fields from the
- RCU-protected structure, using the local variable is of
- course preferred. Repeated rcu_dereference() calls look
- ugly, do not guarantee that the same pointer will be returned
- if an update happened while in the critical section, and incur
- unnecessary overhead on Alpha CPUs.
-
- Note that the value returned by rcu_dereference() is valid
- only within the enclosing RCU read-side critical section.
- For example, the following is -not- legal:
-
- rcu_read_lock();
- p = rcu_dereference(head.next);
- rcu_read_unlock();
- x = p->address; /* BUG!!! */
- rcu_read_lock();
- y = p->data; /* BUG!!! */
- rcu_read_unlock();
-
- Holding a reference from one RCU read-side critical section
- to another is just as illegal as holding a reference from
- one lock-based critical section to another! Similarly,
- using a reference outside of the critical section in which
- it was acquired is just as illegal as doing so with normal
- locking.
-
- As with rcu_assign_pointer(), an important function of
- rcu_dereference() is to document which pointers are protected by
- RCU, in particular, flagging a pointer that is subject to changing
- at any time, including immediately after the rcu_dereference().
- And, again like rcu_assign_pointer(), rcu_dereference() is
- typically used indirectly, via the _rcu list-manipulation
- primitives, such as list_for_each_entry_rcu().
-
-The following diagram shows how each API communicates among the
-reader, updater, and reclaimer.
-
-
- rcu_assign_pointer()
- +--------+
- +---------------------->| reader |---------+
- | +--------+ |
- | | |
- | | | Protect:
- | | | rcu_read_lock()
- | | | rcu_read_unlock()
- | rcu_dereference() | |
- +---------+ | |
- | updater |<---------------------+ |
- +---------+ V
- | +-----------+
- +----------------------------------->| reclaimer |
- +-----------+
- Defer:
- synchronize_rcu() & call_rcu()
-
-
-The RCU infrastructure observes the time sequence of rcu_read_lock(),
-rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
-order to determine when (1) synchronize_rcu() invocations may return
-to their callers and (2) call_rcu() callbacks may be invoked. Efficient
-implementations of the RCU infrastructure make heavy use of batching in
-order to amortize their overhead over many uses of the corresponding APIs.
-
-There are no fewer than three RCU mechanisms in the Linux kernel; the
-diagram above shows the first one, which is by far the most commonly used.
-The rcu_dereference() and rcu_assign_pointer() primitives are used for
-all three mechanisms, but different defer and protect primitives are
-used as follows:
-
- Defer Protect
-
-a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock()
- call_rcu() rcu_dereference()
-
-b. synchronize_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh()
- call_rcu_bh() rcu_dereference_bh()
-
-c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched()
- call_rcu_sched() preempt_disable() / preempt_enable()
- local_irq_save() / local_irq_restore()
- hardirq enter / hardirq exit
- NMI enter / NMI exit
- rcu_dereference_sched()
-
-These three mechanisms are used as follows:
-
-a. RCU applied to normal data structures.
-
-b. RCU applied to networking data structures that may be subjected
- to remote denial-of-service attacks.
-
-c. RCU applied to scheduler and interrupt/NMI-handler tasks.
-
-Again, most uses will be of (a). The (b) and (c) cases are important
-for specialized uses, but are relatively uncommon.
-
-
-3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
-
-This section shows a simple use of the core RCU API to protect a
-global pointer to a dynamically allocated structure. More-typical
-uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
-
- struct foo {
- int a;
- char b;
- long c;
- };
- DEFINE_SPINLOCK(foo_mutex);
-
- struct foo __rcu *gbl_foo;
-
- /*
- * Create a new struct foo that is the same as the one currently
- * pointed to by gbl_foo, except that field "a" is replaced
- * with "new_a". Points gbl_foo to the new structure, and
- * frees up the old structure after a grace period.
- *
- * Uses rcu_assign_pointer() to ensure that concurrent readers
- * see the initialized version of the new structure.
- *
- * Uses synchronize_rcu() to ensure that any readers that might
- * have references to the old structure complete before freeing
- * the old structure.
- */
- void foo_update_a(int new_a)
- {
- struct foo *new_fp;
- struct foo *old_fp;
-
- new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
- spin_lock(&foo_mutex);
- old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
- *new_fp = *old_fp;
- new_fp->a = new_a;
- rcu_assign_pointer(gbl_foo, new_fp);
- spin_unlock(&foo_mutex);
- synchronize_rcu();
- kfree(old_fp);
- }
-
- /*
- * Return the value of field "a" of the current gbl_foo
- * structure. Use rcu_read_lock() and rcu_read_unlock()
- * to ensure that the structure does not get deleted out
- * from under us, and use rcu_dereference() to ensure that
- * we see the initialized version of the structure (important
- * for DEC Alpha and for people reading the code).
- */
- int foo_get_a(void)
- {
- int retval;
-
- rcu_read_lock();
- retval = rcu_dereference(gbl_foo)->a;
- rcu_read_unlock();
- return retval;
- }
-
-So, to sum up:
-
-o Use rcu_read_lock() and rcu_read_unlock() to guard RCU
- read-side critical sections.
-
-o Within an RCU read-side critical section, use rcu_dereference()
- to dereference RCU-protected pointers.
-
-o Use some solid scheme (such as locks or semaphores) to
- keep concurrent updates from interfering with each other.
-
-o Use rcu_assign_pointer() to update an RCU-protected pointer.
- This primitive protects concurrent readers from the updater,
- -not- concurrent updates from each other! You therefore still
- need to use locking (or something similar) to keep concurrent
- rcu_assign_pointer() primitives from interfering with each other.
-
-o Use synchronize_rcu() -after- removing a data element from an
- RCU-protected data structure, but -before- reclaiming/freeing
- the data element, in order to wait for the completion of all
- RCU read-side critical sections that might be referencing that
- data item.
-
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in listRCU.txt,
-arrayRCU.txt, and NMI-RCU.txt.
-
-
-4. WHAT IF MY UPDATING THREAD CANNOT BLOCK?
-
-In the example above, foo_update_a() blocks until a grace period elapses.
-This is quite simple, but in some cases one cannot afford to wait so
-long -- there might be other high-priority work to be done.
-
-In such cases, one uses call_rcu() rather than synchronize_rcu().
-The call_rcu() API is as follows:
-
- void call_rcu(struct rcu_head * head,
- void (*func)(struct rcu_head *head));
-
-This function invokes func(head) after a grace period has elapsed.
-This invocation might happen from either softirq or process context,
-so the function is not permitted to block. The foo struct needs to
-have an rcu_head structure added, perhaps as follows:
-
- struct foo {
- int a;
- char b;
- long c;
- struct rcu_head rcu;
- };
-
-The foo_update_a() function might then be written as follows:
-
- /*
- * Create a new struct foo that is the same as the one currently
- * pointed to by gbl_foo, except that field "a" is replaced
- * with "new_a". Points gbl_foo to the new structure, and
- * frees up the old structure after a grace period.
- *
- * Uses rcu_assign_pointer() to ensure that concurrent readers
- * see the initialized version of the new structure.
- *
- * Uses call_rcu() to ensure that any readers that might have
- * references to the old structure complete before freeing the
- * old structure.
- */
- void foo_update_a(int new_a)
- {
- struct foo *new_fp;
- struct foo *old_fp;
-
- new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
- spin_lock(&foo_mutex);
- old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
- *new_fp = *old_fp;
- new_fp->a = new_a;
- rcu_assign_pointer(gbl_foo, new_fp);
- spin_unlock(&foo_mutex);
- call_rcu(&old_fp->rcu, foo_reclaim);
- }
-
-The foo_reclaim() function might appear as follows:
-
- void foo_reclaim(struct rcu_head *rp)
- {
- struct foo *fp = container_of(rp, struct foo, rcu);
-
- foo_cleanup(fp->a);
-
- kfree(fp);
- }
-
-The container_of() primitive is a macro that, given a pointer into a
-struct, the type of the struct, and the pointed-to field within the
-struct, returns a pointer to the beginning of the struct.
-
-The use of call_rcu() permits the caller of foo_update_a() to
-immediately regain control, without needing to worry further about the
-old version of the newly updated element. It also clearly shows the
-RCU distinction between updater, namely foo_update_a(), and reclaimer,
-namely foo_reclaim().
-
-The summary of advice is the same as for the previous section, except
-that we are now using call_rcu() rather than synchronize_rcu():
-
-o Use call_rcu() -after- removing a data element from an
- RCU-protected data structure in order to register a callback
- function that will be invoked after the completion of all RCU
- read-side critical sections that might be referencing that
- data item.
-
-If the callback for call_rcu() is not doing anything more than calling
-kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
-to avoid having to write your own callback:
-
- kfree_rcu(old_fp, rcu);
-
-Again, see checklist.txt for additional rules governing the use of RCU.
-
-
-5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
-
-One of the nice things about RCU is that it has extremely simple "toy"
-implementations that are a good first step towards understanding the
-production-quality implementations in the Linux kernel. This section
-presents two such "toy" implementations of RCU, one that is implemented
-in terms of familiar locking primitives, and another that more closely
-resembles "classic" RCU. Both are way too simple for real-world use,
-lacking both functionality and performance. However, they are useful
-in getting a feel for how RCU works. See kernel/rcupdate.c for a
-production-quality implementation, and see:
-
- http://www.rdrop.com/users/paulmck/RCU
-
-for papers describing the Linux kernel RCU implementation. The OLS'01
-and OLS'02 papers are a good introduction, and the dissertation provides
-more details on the current implementation as of early 2004.
-
-
-5A. "TOY" IMPLEMENTATION #1: LOCKING
-
-This section presents a "toy" RCU implementation that is based on
-familiar locking primitives. Its overhead makes it a non-starter for
-real-life use, as does its lack of scalability. It is also unsuitable
-for realtime use, since it allows scheduling latency to "bleed" from
-one read-side critical section to another. It also assumes recursive
-reader-writer locks: If you try this with non-recursive locks, and
-you allow nested rcu_read_lock() calls, you can deadlock.
-
-However, it is probably the easiest implementation to relate to, so is
-a good starting point.
-
-It is extremely simple:
-
- static DEFINE_RWLOCK(rcu_gp_mutex);
-
- void rcu_read_lock(void)
- {
- read_lock(&rcu_gp_mutex);
- }
-
- void rcu_read_unlock(void)
- {
- read_unlock(&rcu_gp_mutex);
- }
-
- void synchronize_rcu(void)
- {
- write_lock(&rcu_gp_mutex);
- smp_mb__after_spinlock();
- write_unlock(&rcu_gp_mutex);
- }
-
-[You can ignore rcu_assign_pointer() and rcu_dereference() without missing
-much. But here are simplified versions anyway. And whatever you do,
-don't forget about them when submitting patches making use of RCU!]
-
- #define rcu_assign_pointer(p, v) \
- ({ \
- smp_store_release(&(p), (v)); \
- })
-
- #define rcu_dereference(p) \
- ({ \
- typeof(p) _________p1 = READ_ONCE(p); \
- (_________p1); \
- })
-
-
-The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
-and release a global reader-writer lock. The synchronize_rcu()
-primitive write-acquires this same lock, then releases it. This means
-that once synchronize_rcu() exits, all RCU read-side critical sections
-that were in progress before synchronize_rcu() was called are guaranteed
-to have completed -- there is no way that synchronize_rcu() would have
-been able to write-acquire the lock otherwise. The smp_mb__after_spinlock()
-promotes synchronize_rcu() to a full memory barrier in compliance with
-the "Memory-Barrier Guarantees" listed in:
-
- Documentation/RCU/Design/Requirements/Requirements.html.
-
-It is possible to nest rcu_read_lock(), since reader-writer locks may
-be recursively acquired. Note also that rcu_read_lock() is immune
-from deadlock (an important property of RCU). The reason for this is
-that the only thing that can block rcu_read_lock() is a synchronize_rcu().
-But synchronize_rcu() does not acquire any locks while holding rcu_gp_mutex,
-so there can be no deadlock cycle.
-
-Quick Quiz #1: Why is this argument naive? How could a deadlock
- occur when using this algorithm in a real-world Linux
- kernel? How could this deadlock be avoided?
-
-
-5B. "TOY" EXAMPLE #2: CLASSIC RCU
-
-This section presents a "toy" RCU implementation that is based on
-"classic RCU". It is also short on performance (but only for updates) and
-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
-kernels. The definitions of rcu_dereference() and rcu_assign_pointer()
-are the same as those shown in the preceding section, so they are omitted.
-
- void rcu_read_lock(void) { }
-
- void rcu_read_unlock(void) { }
-
- void synchronize_rcu(void)
- {
- int cpu;
-
- for_each_possible_cpu(cpu)
- run_on(cpu);
- }
-
-Note that rcu_read_lock() and rcu_read_unlock() do absolutely nothing.
-This is the great strength of classic RCU in a non-preemptive kernel:
-read-side overhead is precisely zero, at least on non-Alpha CPUs.
-And there is absolutely no way that rcu_read_lock() can possibly
-participate in a deadlock cycle!
-
-The implementation of synchronize_rcu() simply schedules itself on each
-CPU in turn. The run_on() primitive can be implemented straightforwardly
-in terms of the sched_setaffinity() primitive. Of course, a somewhat less
-"toy" implementation would restore the affinity upon completion rather
-than just leaving all tasks running on the last CPU, but when I said
-"toy", I meant -toy-!
-
-So how the heck is this supposed to work???
-
-Remember that it is illegal to block while in an RCU read-side critical
-section. Therefore, if a given CPU executes a context switch, we know
-that it must have completed all preceding RCU read-side critical sections.
-Once -all- CPUs have executed a context switch, then -all- preceding
-RCU read-side critical sections will have completed.
-
-So, suppose that we remove a data item from its structure and then invoke
-synchronize_rcu(). Once synchronize_rcu() returns, we are guaranteed
-that there are no RCU read-side critical sections holding a reference
-to that data item, so we can safely reclaim it.
-
-Quick Quiz #2: Give an example where Classic RCU's read-side
- overhead is -negative-.
-
-Quick Quiz #3: If it is illegal to block in an RCU read-side
- critical section, what the heck do you do in
- PREEMPT_RT, where normal spinlocks can block???
-
-
-6. ANALOGY WITH READER-WRITER LOCKING
-
-Although RCU can be used in many different ways, a very common use of
-RCU is analogous to reader-writer locking. The following unified
-diff shows how closely related RCU and reader-writer locking can be.
-
- @@ -5,5 +5,5 @@ struct el {
- int data;
- /* Other data fields */
- };
- -rwlock_t listmutex;
- +spinlock_t listmutex;
- struct el head;
-
- @@ -13,15 +14,15 @@
- struct list_head *lp;
- struct el *p;
-
- - read_lock(&listmutex);
- - list_for_each_entry(p, head, lp) {
- + rcu_read_lock();
- + list_for_each_entry_rcu(p, head, lp) {
- if (p->key == key) {
- *result = p->data;
- - read_unlock(&listmutex);
- + rcu_read_unlock();
- return 1;
- }
- }
- - read_unlock(&listmutex);
- + rcu_read_unlock();
- return 0;
- }
-
- @@ -29,15 +30,16 @@
- {
- struct el *p;
-
- - write_lock(&listmutex);
- + spin_lock(&listmutex);
- list_for_each_entry(p, head, lp) {
- if (p->key == key) {
- - list_del(&p->list);
- - write_unlock(&listmutex);
- + list_del_rcu(&p->list);
- + spin_unlock(&listmutex);
- + synchronize_rcu();
- kfree(p);
- return 1;
- }
- }
- - write_unlock(&listmutex);
- + spin_unlock(&listmutex);
- return 0;
- }
-
-Or, for those who prefer a side-by-side listing:
-
- 1 struct el { 1 struct el {
- 2 struct list_head list; 2 struct list_head list;
- 3 long key; 3 long key;
- 4 spinlock_t mutex; 4 spinlock_t mutex;
- 5 int data; 5 int data;
- 6 /* Other data fields */ 6 /* Other data fields */
- 7 }; 7 };
- 8 rwlock_t listmutex; 8 spinlock_t listmutex;
- 9 struct el head; 9 struct el head;
-
- 1 int search(long key, int *result) 1 int search(long key, int *result)
- 2 { 2 {
- 3 struct list_head *lp; 3 struct list_head *lp;
- 4 struct el *p; 4 struct el *p;
- 5 5
- 6 read_lock(&listmutex); 6 rcu_read_lock();
- 7 list_for_each_entry(p, head, lp) { 7 list_for_each_entry_rcu(p, head, lp) {
- 8 if (p->key == key) { 8 if (p->key == key) {
- 9 *result = p->data; 9 *result = p->data;
-10 read_unlock(&listmutex); 10 rcu_read_unlock();
-11 return 1; 11 return 1;
-12 } 12 }
-13 } 13 }
-14 read_unlock(&listmutex); 14 rcu_read_unlock();
-15 return 0; 15 return 0;
-16 } 16 }
-
- 1 int delete(long key) 1 int delete(long key)
- 2 { 2 {
- 3 struct el *p; 3 struct el *p;
- 4 4
- 5 write_lock(&listmutex); 5 spin_lock(&listmutex);
- 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) {
- 7 if (p->key == key) { 7 if (p->key == key) {
- 8 list_del(&p->list); 8 list_del_rcu(&p->list);
- 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex);
- 10 synchronize_rcu();
-10 kfree(p); 11 kfree(p);
-11 return 1; 12 return 1;
-12 } 13 }
-13 } 14 }
-14 write_unlock(&listmutex); 15 spin_unlock(&listmutex);
-15 return 0; 16 return 0;
-16 } 17 }
-
-Either way, the differences are quite small. Read-side locking moves
-to rcu_read_lock() and rcu_read_unlock, update-side locking moves from
-a reader-writer lock to a simple spinlock, and a synchronize_rcu()
-precedes the kfree().
-
-However, there is one potential catch: the read-side and update-side
-critical sections can now run concurrently. In many cases, this will
-not be a problem, but it is necessary to check carefully regardless.
-For example, if multiple independent list updates must be seen as
-a single atomic update, converting to RCU will require special care.
-
-Also, the presence of synchronize_rcu() means that the RCU version of
-delete() can now block. If this is a problem, there is a callback-based
-mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
-be used in place of synchronize_rcu().
-
-
-7. FULL LIST OF RCU APIs
-
-The RCU APIs are documented in docbook-format header comments in the
-Linux-kernel source code, but it helps to have a full list of the
-APIs, since there does not appear to be a way to categorize them
-in docbook. Here is the list, by category.
-
-RCU list traversal:
-
- list_entry_rcu
- list_first_entry_rcu
- list_next_rcu
- list_for_each_entry_rcu
- list_for_each_entry_continue_rcu
- list_for_each_entry_from_rcu
- hlist_first_rcu
- hlist_next_rcu
- hlist_pprev_rcu
- hlist_for_each_entry_rcu
- hlist_for_each_entry_rcu_bh
- hlist_for_each_entry_from_rcu
- hlist_for_each_entry_continue_rcu
- hlist_for_each_entry_continue_rcu_bh
- hlist_nulls_first_rcu
- hlist_nulls_for_each_entry_rcu
- hlist_bl_first_rcu
- hlist_bl_for_each_entry_rcu
-
-RCU pointer/list update:
-
- rcu_assign_pointer
- list_add_rcu
- list_add_tail_rcu
- list_del_rcu
- list_replace_rcu
- hlist_add_behind_rcu
- hlist_add_before_rcu
- hlist_add_head_rcu
- hlist_del_rcu
- hlist_del_init_rcu
- hlist_replace_rcu
- list_splice_init_rcu()
- hlist_nulls_del_init_rcu
- hlist_nulls_del_rcu
- hlist_nulls_add_head_rcu
- hlist_bl_add_head_rcu
- hlist_bl_del_init_rcu
- hlist_bl_del_rcu
- hlist_bl_set_first_rcu
-
-RCU: Critical sections Grace period Barrier
-
- rcu_read_lock synchronize_net rcu_barrier
- rcu_read_unlock synchronize_rcu
- rcu_dereference synchronize_rcu_expedited
- rcu_read_lock_held call_rcu
- rcu_dereference_check kfree_rcu
- rcu_dereference_protected
-
-bh: Critical sections Grace period Barrier
-
- rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
- rcu_read_unlock_bh synchronize_rcu_bh
- rcu_dereference_bh synchronize_rcu_bh_expedited
- rcu_dereference_bh_check
- rcu_dereference_bh_protected
- rcu_read_lock_bh_held
-
-sched: Critical sections Grace period Barrier
-
- rcu_read_lock_sched synchronize_sched rcu_barrier_sched
- rcu_read_unlock_sched call_rcu_sched
- [preempt_disable] synchronize_sched_expedited
- [and friends]
- rcu_read_lock_sched_notrace
- rcu_read_unlock_sched_notrace
- rcu_dereference_sched
- rcu_dereference_sched_check
- rcu_dereference_sched_protected
- rcu_read_lock_sched_held
-
-
-SRCU: Critical sections Grace period Barrier
-
- srcu_read_lock synchronize_srcu srcu_barrier
- srcu_read_unlock call_srcu
- srcu_dereference synchronize_srcu_expedited
- srcu_dereference_check
- srcu_read_lock_held
-
-SRCU: Initialization/cleanup
- DEFINE_SRCU
- DEFINE_STATIC_SRCU
- init_srcu_struct
- cleanup_srcu_struct
-
-All: lockdep-checked RCU-protected pointer access
-
- rcu_access_pointer
- rcu_dereference_raw
- RCU_LOCKDEP_WARN
- rcu_sleep_check
- RCU_NONIDLE
-
-See the comment headers in the source code (or the docbook generated
-from them) for more information.
-
-However, given that there are no fewer than four families of RCU APIs
-in the Linux kernel, how do you choose which one to use? The following
-list can be helpful:
-
-a. Will readers need to block? If so, you need SRCU.
-
-b. What about the -rt patchset? If readers would need to block
- in an non-rt kernel, you need SRCU. If readers would block
- in a -rt kernel, but not in a non-rt kernel, SRCU is not
- necessary. (The -rt patchset turns spinlocks into sleeplocks,
- hence this distinction.)
-
-c. Do you need to treat NMI handlers, hardirq handlers,
- and code segments with preemption disabled (whether
- via preempt_disable(), local_irq_save(), local_bh_disable(),
- or some other mechanism) as if they were explicit RCU readers?
- If so, RCU-sched is the only choice that will work for you.
-
-d. Do you need RCU grace periods to complete even in the face
- of softirq monopolization of one or more of the CPUs? For
- example, is your code subject to network-based denial-of-service
- attacks? If so, you should disable softirq across your readers,
- for example, by using rcu_read_lock_bh().
-
-e. Is your workload too update-intensive for normal use of
- RCU, but inappropriate for other synchronization mechanisms?
- If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
- named SLAB_DESTROY_BY_RCU). But please be careful!
-
-f. Do you need read-side critical sections that are respected
- even though they are in the middle of the idle loop, during
- user-mode execution, or on an offlined CPU? If so, SRCU is the
- only choice that will work for you.
-
-g. Otherwise, use RCU.
-
-Of course, this all assumes that you have determined that RCU is in fact
-the right tool for your job.
-
-
-8. ANSWERS TO QUICK QUIZZES
-
-Quick Quiz #1: Why is this argument naive? How could a deadlock
- occur when using this algorithm in a real-world Linux
- kernel? [Referring to the lock-based "toy" RCU
- algorithm.]
-
-Answer: Consider the following sequence of events:
-
- 1. CPU 0 acquires some unrelated lock, call it
- "problematic_lock", disabling irq via
- spin_lock_irqsave().
-
- 2. CPU 1 enters synchronize_rcu(), write-acquiring
- rcu_gp_mutex.
-
- 3. CPU 0 enters rcu_read_lock(), but must wait
- because CPU 1 holds rcu_gp_mutex.
-
- 4. CPU 1 is interrupted, and the irq handler
- attempts to acquire problematic_lock.
-
- The system is now deadlocked.
-
- One way to avoid this deadlock is to use an approach like
- that of CONFIG_PREEMPT_RT, where all normal spinlocks
- become blocking locks, and all irq handlers execute in
- the context of special tasks. In this case, in step 4
- above, the irq handler would block, allowing CPU 1 to
- release rcu_gp_mutex, avoiding the deadlock.
-
- Even in the absence of deadlock, this RCU implementation
- allows latency to "bleed" from readers to other
- readers through synchronize_rcu(). To see this,
- consider task A in an RCU read-side critical section
- (thus read-holding rcu_gp_mutex), task B blocked
- attempting to write-acquire rcu_gp_mutex, and
- task C blocked in rcu_read_lock() attempting to
- read_acquire rcu_gp_mutex. Task A's RCU read-side
- latency is holding up task C, albeit indirectly via
- task B.
-
- Realtime RCU implementations therefore use a counter-based
- approach where tasks in RCU read-side critical sections
- cannot be blocked by tasks executing synchronize_rcu().
-
-Quick Quiz #2: Give an example where Classic RCU's read-side
- overhead is -negative-.
-
-Answer: Imagine a single-CPU system with a non-CONFIG_PREEMPT
- kernel where a routing table is used by process-context
- code, but can be updated by irq-context code (for example,
- by an "ICMP REDIRECT" packet). The usual way of handling
- this would be to have the process-context code disable
- interrupts while searching the routing table. Use of
- RCU allows such interrupt-disabling to be dispensed with.
- Thus, without RCU, you pay the cost of disabling interrupts,
- and with RCU you don't.
-
- One can argue that the overhead of RCU in this
- case is negative with respect to the single-CPU
- interrupt-disabling approach. Others might argue that
- the overhead of RCU is merely zero, and that replacing
- the positive overhead of the interrupt-disabling scheme
- with the zero-overhead RCU scheme does not constitute
- negative overhead.
-
- In real life, of course, things are more complex. But
- even the theoretical possibility of negative overhead for
- a synchronization primitive is a bit unexpected. ;-)
-
-Quick Quiz #3: If it is illegal to block in an RCU read-side
- critical section, what the heck do you do in
- PREEMPT_RT, where normal spinlocks can block???
-
-Answer: Just as PREEMPT_RT permits preemption of spinlock
- critical sections, it permits preemption of RCU
- read-side critical sections. It also permits
- spinlocks blocking while in RCU read-side critical
- sections.
-
- Why the apparent inconsistency? Because it is it
- possible to use priority boosting to keep the RCU
- grace periods short if need be (for example, if running
- short of memory). In contrast, if blocking waiting
- for (say) network reception, there is no way to know
- what should be boosted. Especially given that the
- process we need to boost might well be a human being
- who just went out for a pizza or something. And although
- a computer-operated cattle prod might arouse serious
- interest, it might also provoke serious objections.
- Besides, how does the computer know what pizza parlor
- the human being went to???
-
-
-ACKNOWLEDGEMENTS
-
-My thanks to the people who helped make this human-readable, including
-Jon Walpole, Josh Triplett, Serge Hallyn, Suzanne Wood, and Alan Stern.
-
-
-For more information, see http://www.rdrop.com/users/paulmck/RCU.
diff --git a/Documentation/accel/amdxdna/amdnpu.rst b/Documentation/accel/amdxdna/amdnpu.rst
new file mode 100644
index 000000000000..42e54904f9a8
--- /dev/null
+++ b/Documentation/accel/amdxdna/amdnpu.rst
@@ -0,0 +1,281 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+.. include:: <isonum.txt>
+
+=========
+ AMD NPU
+=========
+
+:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
+:Author: Sonal Santan <sonal.santan@amd.com>
+
+Overview
+========
+
+AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
+integrated into AMD client APU. NPU enables efficient execution of Machine
+Learning applications like CNN, LLM, etc. NPU is based on
+`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
+
+
+Hardware Description
+====================
+
+AMD NPU consists of the following hardware components:
+
+AMD XDNA Array
+--------------
+
+AMD XDNA Array comprises of 2D array of compute and memory tiles built with
+`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
+row of memory tile. Each compute tile contains a VLIW processor with its own
+dedicated program and data memory. The memory tile acts as L2 memory. The 2D
+array can be partitioned at a column boundary creating a spatially isolated
+partition which can be bound to a workload context.
+
+Each column also has dedicated DMA engines to move data between host DDR and
+memory tile.
+
+AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
+compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
+topology, i.e., 4 rows of compute tiles arranged into 8 columns.
+
+Shared L2 Memory
+----------------
+
+The single row of memory tiles create a pool of software managed on chip L2
+memory. DMA engines are used to move data between host DDR and memory tiles.
+AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
+AMD Strix Point NPU has a total of 4096 KB of L2 memory.
+
+Microcontroller
+---------------
+
+A microcontroller runs NPU Firmware which is responsible for command processing,
+XDNA Array partition setup, XDNA Array configuration, workload context
+management and workload orchestration.
+
+NPU Firmware uses a dedicated instance of an isolated non-privileged context
+called ERT to service each workload context. ERT is also used to execute user
+provided ``ctrlcode`` associated with the workload context.
+
+NPU Firmware uses a single isolated privileged context called MERT to service
+management commands from the amdxdna driver.
+
+Mailboxes
+---------
+
+The microcontroller and amdxdna driver use a privileged channel for management
+tasks like setting up of contexts, telemetry, query, error handling, setting up
+user channel, etc. As mentioned before, privileged channel requests are
+serviced by MERT. The privileged channel is bound to a single mailbox.
+
+The microcontroller and amdxdna driver use a dedicated user channel per
+workload context. The user channel is primarily used for submitting work to
+the NPU. As mentioned before, a user channel requests are serviced by an
+instance of ERT. Each user channel is bound to its own dedicated mailbox.
+
+PCIe EP
+-------
+
+NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
+MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
+for reading or writing into host memory. Each instance of ERT gets its own
+dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
+
+The number of PCIe BARs varies depending on the specific device. Based on their
+functions, PCIe BARs can generally be categorized into the following types.
+
+* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
+* SMU BAR: Expose the AMD SMU (System Management Unit) function
+* SRAM BAR: Expose ring buffers for the mailbox
+* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
+ registers etc.)
+* Public Register BAR: Expose public registers
+
+On specific devices, the above-mentioned BAR type might be combined into a
+single physical PCIe BAR. Or a module might require two physical PCIe BARs to
+be fully functional. For example,
+
+* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
+* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
+ index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
+ and PCIe BAR index 4 (PSP BAR).
+
+Process Isolation Hardware
+--------------------------
+
+As explained before, XDNA Array can be dynamically divided into isolated
+spatial partitions, each of which may have one or more columns. The spatial
+partition is setup by programming the column isolation registers by the
+microcontroller. Each spatial partition is associated with a PASID which is
+also programmed by the microcontroller. Hence multiple spatial partitions in
+the NPU can make concurrent host access protected by PASID.
+
+The NPU FW itself uses microcontroller MMU enforced isolated contexts for
+servicing user and privileged channel requests.
+
+
+Mixed Spatial and Temporal Scheduling
+=====================================
+
+AMD XDNA architecture supports mixed spatial and temporal (time sharing)
+scheduling of 2D array. This means that spatial partitions may be setup and
+torn down dynamically to accommodate various workloads. A *spatial* partition
+may be *exclusively* bound to one workload context while another partition may
+be *temporarily* bound to more than one workload contexts. The microcontroller
+updates the PASID for a temporarily shared partition to match the context that
+has been bound to the partition at any moment.
+
+Resource Solver
+---------------
+
+The Resource Solver component of the amdxdna driver manages the allocation
+of 2D array among various workloads. Every workload describes the number
+of columns required to run the NPU binary in its metadata. The Resource Solver
+component uses hints passed by the workload and its own heuristics to
+decide 2D array (re)partition strategy and mapping of workloads for spatial and
+temporal sharing of columns. The FW enforces the context-to-column(s) resource
+binding decisions made by the Resource Solver.
+
+AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
+contexts. AMD Strix Point can support 16 concurrent workload contexts.
+
+
+Application Binaries
+====================
+
+A NPU application workload is comprised of two separate binaries which are
+generated by the NPU compiler.
+
+1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
+ The overlay contains instructions for setting up the stream switch
+ configuration and ELF for the compute tiles. The overlay is loaded on the
+ spatial partition bound to the workload by the associated ERT instance.
+ Refer to the
+ `Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
+
+2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
+ partition. ``ctrlcode`` is executed by the ERT running in protected mode on
+ the microcontroller in the context of the workload. ``ctrlcode`` is made up
+ of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
+ `AI Engine Run Time`_ for more details.
+
+
+Special Host Buffers
+====================
+
+Per-context Instruction Buffer
+------------------------------
+
+Every workload context uses a host resident 64 MB buffer which is memory
+mapped into the ERT instance created to service the workload. The ``ctrlcode``
+used by the workload is copied into this special memory. This buffer is
+protected by PASID like all other input/output buffers used by that workload.
+Instruction buffer is also mapped into the user space of the workload.
+
+Global Privileged Buffer
+------------------------
+
+In addition, the driver also allocates a single buffer for maintenance tasks
+like recording errors from MERT. This global buffer uses the global IOMMU
+domain and is only accessible by MERT.
+
+
+High-level Use Flow
+===================
+
+Here are the steps to run a workload on AMD NPU:
+
+1. Compile the workload into an overlay and a ``ctrlcode`` binary.
+2. Userspace opens a context in the driver and provides the overlay.
+3. The driver checks with the Resource Solver for provisioning a set of columns
+ for the workload.
+4. The driver then asks MERT to create a context on the device with the desired
+ columns.
+5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
+ into ERT memory.
+6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
+7. Userspace then creates a command buffer with pointers to input, output, and
+ instruction buffer; it then submits command buffer with the driver and goes
+ to sleep waiting for completion.
+8. The driver sends the command over the Mailbox to ERT.
+9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
+10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
+ AMD XDNA Array is running.
+11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
+ signal to the driver which then wakes up the waiting workload.
+
+
+Boot Flow
+=========
+
+amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
+of the NPU microcontroller. amdxdna driver then waits for the alive signal in
+a special location on BAR 0. The NPU is switched off during SoC suspend and
+turned on after resume where the NPU FW is reloaded, and the handshake is
+performed again.
+
+
+Userspace components
+====================
+
+Compiler
+--------
+
+Peano is an LLVM based open-source single core compiler for AMD XDNA Array
+compute tile. Peano is available at:
+https://github.com/Xilinx/llvm-aie
+
+IRON is an open-source array compiler for AMD XDNA Array based NPU which uses
+Peano underneath. IRON is available at:
+https://github.com/Xilinx/mlir-aie
+
+Usermode Driver (UMD)
+---------------------
+
+The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
+can be found at:
+https://github.com/Xilinx/XRT
+
+The open-source XRT shim for NPU is can be found at:
+https://github.com/amd/xdna-driver
+
+
+DMA Operation
+=============
+
+DMA operation instructions are encoded in the ``ctrlcode`` as
+``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
+operations between host DDR and L2 memory are effected.
+
+
+Error Handling
+==============
+
+When MERT detects an error in AMD XDNA Array, it pauses execution for that
+workload context and sends an asynchronous message to the driver over the
+privileged channel. The driver then sends a buffer pointer to MERT to capture
+the register states for the partition bound to faulting workload context. The
+driver then decodes the error by reading the contents of the buffer pointer.
+
+
+Telemetry
+=========
+
+MERT can report various kinds of telemetry information like the following:
+
+* L1 interrupt counter
+* DMA counter
+* Deep Sleep counter
+* etc.
+
+
+References
+==========
+
+- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
+- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
+- `Peano <https://github.com/Xilinx/llvm-aie>`_
+- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
+- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_
diff --git a/Documentation/accel/amdxdna/index.rst b/Documentation/accel/amdxdna/index.rst
new file mode 100644
index 000000000000..38c16939f1fc
--- /dev/null
+++ b/Documentation/accel/amdxdna/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/amdxdna NPU driver
+=====================================
+
+The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
+
+.. toctree::
+
+ amdnpu
diff --git a/Documentation/accel/index.rst b/Documentation/accel/index.rst
new file mode 100644
index 000000000000..d8fa332d60a8
--- /dev/null
+++ b/Documentation/accel/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Compute Accelerators
+====================
+
+.. toctree::
+ :maxdepth: 1
+
+ introduction
+ amdxdna/index
+ qaic/index
+ rocket/index
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/accel/introduction.rst b/Documentation/accel/introduction.rst
new file mode 100644
index 000000000000..ae3030136637
--- /dev/null
+++ b/Documentation/accel/introduction.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Introduction
+============
+
+The Linux compute accelerators subsystem is designed to expose compute
+accelerators in a common way to user-space and provide a common set of
+functionality.
+
+These devices can be either stand-alone ASICs or IP blocks inside an SoC/GPU.
+Although these devices are typically designed to accelerate
+Machine-Learning (ML) and/or Deep-Learning (DL) computations, the accel layer
+is not limited to handling these types of accelerators.
+
+Typically, a compute accelerator will belong to one of the following
+categories:
+
+- Edge AI - doing inference at an edge device. It can be an embedded ASIC/FPGA,
+ or an IP inside a SoC (e.g. laptop web camera). These devices
+ are typically configured using registers and can work with or without DMA.
+
+- Inference data-center - single/multi user devices in a large server. This
+ type of device can be stand-alone or an IP inside a SoC or a GPU. It will
+ have on-board DRAM (to hold the DL topology), DMA engines and
+ command submission queues (either kernel or user-space queues).
+ It might also have an MMU to manage multiple users and might also enable
+ virtualization (SR-IOV) to support multiple VMs on the same device. In
+ addition, these devices will usually have some tools, such as profiler and
+ debugger.
+
+- Training data-center - Similar to Inference data-center cards, but typically
+ have more computational power and memory b/w (e.g. HBM) and will likely have
+ a method of scaling-up/out, i.e. connecting to other training cards inside
+ the server or in other servers, respectively.
+
+All these devices typically have different runtime user-space software stacks,
+that are tailored-made to their h/w. In addition, they will also probably
+include a compiler to generate programs to their custom-made computational
+engines. Typically, the common layer in user-space will be the DL frameworks,
+such as PyTorch and TensorFlow.
+
+Sharing code with DRM
+=====================
+
+Because this type of devices can be an IP inside GPUs or have similar
+characteristics as those of GPUs, the accel subsystem will use the
+DRM subsystem's code and functionality. i.e. the accel core code will
+be part of the DRM subsystem and an accel device will be a new type of DRM
+device.
+
+This will allow us to leverage the extensive DRM code-base and
+collaborate with DRM developers that have experience with this type of
+devices. In addition, new features that will be added for the accelerator
+drivers can be of use to GPU drivers as well.
+
+Differentiation from GPUs
+=========================
+
+Because we want to prevent the extensive user-space graphic software stack
+from trying to use an accelerator as a GPU, the compute accelerators will be
+differentiated from GPUs by using a new major number and new device char files.
+
+Furthermore, the drivers will be located in a separate place in the kernel
+tree - drivers/accel/.
+
+The accelerator devices will be exposed to the user space with the dedicated
+261 major number and will have the following convention:
+
+- device char files - /dev/accel/accel\*
+- sysfs - /sys/class/accel/accel\*/
+- debugfs - /sys/kernel/debug/accel/\*/
+
+Getting Started
+===============
+
+First, read the DRM documentation at Documentation/gpu/index.rst.
+Not only it will explain how to write a new DRM driver but it will also
+contain all the information on how to contribute, the Code Of Conduct and
+what is the coding style/documentation. All of that is the same for the
+accel subsystem.
+
+Second, make sure the kernel is configured with CONFIG_DRM_ACCEL.
+
+To expose your device as an accelerator, two changes are needed to
+be done in your driver (as opposed to a standard DRM driver):
+
+- Add the DRIVER_COMPUTE_ACCEL feature flag in your drm_driver's
+ driver_features field. It is important to note that this driver feature is
+ mutually exclusive with DRIVER_RENDER and DRIVER_MODESET. Devices that want
+ to expose both graphics and compute device char files should be handled by
+ two drivers that are connected using the auxiliary bus framework.
+
+- Change the open callback in your driver fops structure to accel_open().
+ Alternatively, your driver can use DEFINE_DRM_ACCEL_FOPS macro to easily
+ set the correct function operations pointers structure.
+
+External References
+===================
+
+email threads
+-------------
+
+* `Initial discussion on the New subsystem for acceleration devices <https://lore.kernel.org/lkml/CAFCwf11=9qpNAepL7NL+YAV_QO=Wv6pnWPhKHKAepK3fNn+2Dg@mail.gmail.com/>`_ - Oded Gabbay (2022)
+* `patch-set to add the new subsystem <https://lore.kernel.org/lkml/20221022214622.18042-1-ogabbay@kernel.org/>`_ - Oded Gabbay (2022)
+
+Conference talks
+----------------
+
+* `LPC 2022 Accelerators BOF outcomes summary <https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html>`_ - Dave Airlie (2022)
diff --git a/Documentation/accel/qaic/aic080.rst b/Documentation/accel/qaic/aic080.rst
new file mode 100644
index 000000000000..d563771ea6ce
--- /dev/null
+++ b/Documentation/accel/qaic/aic080.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+===============================
+ Qualcomm Cloud AI 80 (AIC080)
+===============================
+
+Overview
+========
+
+The Qualcomm Cloud AI 80/AIC080 family of products are a derivative of AIC100.
+The number of NSPs and clock rates are reduced to fit within resource
+constrained solutions. The PCIe Product ID is 0xa080.
+
+As a derivative product, all AIC100 documentation applies.
diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
new file mode 100644
index 000000000000..273da6192fb3
--- /dev/null
+++ b/Documentation/accel/qaic/aic100.rst
@@ -0,0 +1,517 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+===============================
+ Qualcomm Cloud AI 100 (AIC100)
+===============================
+
+Overview
+========
+
+The Qualcomm Cloud AI 100/AIC100 family of products (including SA9000P - part of
+Snapdragon Ride) are PCIe adapter cards which contain a dedicated SoC ASIC for
+the purpose of efficiently running Artificial Intelligence (AI) Deep Learning
+inference workloads. They are AI accelerators.
+
+The PCIe interface of AIC100 is capable of PCIe Gen4 speeds over eight lanes
+(x8). An individual SoC on a card can have up to 16 NSPs for running workloads.
+Each SoC has an A53 management CPU. On card, there can be up to 32 GB of DDR.
+
+Multiple AIC100 cards can be hosted in a single system to scale overall
+performance. AIC100 cards are multi-user capable and able to execute workloads
+from multiple users in a concurrent manner.
+
+Hardware Description
+====================
+
+An AIC100 card consists of an AIC100 SoC, on-card DDR, and a set of misc
+peripherals (PMICs, etc).
+
+An AIC100 card can either be a PCIe HHHL form factor (a traditional PCIe card),
+or a Dual M.2 card. Both use PCIe to connect to the host system.
+
+As a PCIe endpoint/adapter, AIC100 uses the standard VendorID(VID)/
+DeviceID(DID) combination to uniquely identify itself to the host. AIC100
+uses the standard Qualcomm VID (0x17cb). All AIC100 SKUs use the same
+AIC100 DID (0xa100).
+
+AIC100 does not implement FLR (function level reset).
+
+AIC100 implements MSI but does not implement MSI-X. AIC100 prefers 17 MSIs to
+operate (1 for MHI, 16 for the DMA Bridge). Falling back to 1 MSI is possible in
+scenarios where reserving 32 MSIs isn't feasible.
+
+As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device
+hardware. AIC100 provides 3, 64-bit BARs.
+
+* The first BAR is 4K in size, and exposes the MHI interface to the host.
+
+* The second BAR is 2M in size, and exposes the DMA Bridge interface to the
+ host.
+
+* The third BAR is variable in size based on an individual AIC100's
+ configuration, but defaults to 64K. This BAR currently has no purpose.
+
+From the host perspective, AIC100 has several key hardware components -
+
+* MHI (Modem Host Interface)
+* QSM (QAIC Service Manager)
+* NSPs (Neural Signal Processor)
+* DMA Bridge
+* DDR
+
+MHI
+---
+
+AIC100 has one MHI interface over PCIe. MHI itself is documented at
+Documentation/mhi/index.rst MHI is the mechanism the host uses to communicate
+with the QSM. Except for workload data via the DMA Bridge, all interaction with
+the device occurs via MHI.
+
+QSM
+---
+
+QAIC Service Manager. This is an ARM A53 CPU that runs the primary
+firmware of the card and performs on-card management tasks. It also
+communicates with the host via MHI. Each AIC100 has one of
+these.
+
+NSP
+---
+
+Neural Signal Processor. Each AIC100 has up to 16 of these. These are
+the processors that run the workloads on AIC100. Each NSP is a Qualcomm Hexagon
+(Q6) DSP with HVX and HMX. Each NSP can only run one workload at a time, but
+multiple NSPs may be assigned to a single workload. Since each NSP can only run
+one workload, AIC100 is limited to 16 concurrent workloads. Workload
+"scheduling" is under the purview of the host. AIC100 does not automatically
+timeslice.
+
+DMA Bridge
+----------
+
+The DMA Bridge is custom DMA engine that manages the flow of data
+in and out of workloads. AIC100 has one of these. The DMA Bridge has 16
+channels, each consisting of a set of request/response FIFOs. Each active
+workload is assigned a single DMA Bridge channel. The DMA Bridge exposes
+hardware registers to manage the FIFOs (head/tail pointers), but requires host
+memory to store the FIFOs.
+
+DDR
+---
+
+AIC100 has on-card DDR. In total, an AIC100 can have up to 32 GB of DDR.
+This DDR is used to store workloads, data for the workloads, and is used by the
+QSM for managing the device. NSPs are granted access to sections of the DDR by
+the QSM. The host does not have direct access to the DDR, and must make
+requests to the QSM to transfer data to the DDR.
+
+High-level Use Flow
+===================
+
+AIC100 is a multi-user, programmable accelerator typically used for running
+neural networks in inferencing mode to efficiently perform AI operations.
+AIC100 is not intended for training neural networks. AIC100 can be utilized
+for generic compute workloads.
+
+Assuming a user wants to utilize AIC100, they would follow these steps:
+
+1. Compile the workload into an ELF targeting the NSP(s)
+2. Make requests to the QSM to load the workload and related artifacts into the
+ device DDR
+3. Make a request to the QSM to activate the workload onto a set of idle NSPs
+4. Make requests to the DMA Bridge to send input data to the workload to be
+ processed, and other requests to receive processed output data from the
+ workload.
+5. Once the workload is no longer required, make a request to the QSM to
+ deactivate the workload, thus putting the NSPs back into an idle state.
+6. Once the workload and related artifacts are no longer needed for future
+ sessions, make requests to the QSM to unload the data from DDR. This frees
+ the DDR to be used by other users.
+
+
+Boot Flow
+=========
+
+AIC100 uses a flashless boot flow, derived from Qualcomm MSMs.
+
+When AIC100 is first powered on, it begins executing PBL (Primary Bootloader)
+from ROM. PBL enumerates the PCIe link, and initializes the BHI (Boot Host
+Interface) component of MHI.
+
+Using BHI, the host points PBL to the location of the SBL (Secondary Bootloader)
+image. The PBL pulls the image from the host, validates it, and begins
+execution of SBL.
+
+SBL initializes MHI, and uses MHI to notify the host that the device has entered
+the SBL stage. SBL performs a number of operations:
+
+* SBL initializes the majority of hardware (anything PBL left uninitialized),
+ including DDR.
+* SBL offloads the bootlog to the host.
+* SBL synchronizes timestamps with the host for future logging.
+* SBL uses the Sahara protocol to obtain the runtime firmware images from the
+ host.
+
+Once SBL has obtained and validated the runtime firmware, it brings the NSPs out
+of reset, and jumps into the QSM.
+
+The QSM uses MHI to notify the host that the device has entered the QSM stage
+(AMSS in MHI terms). At this point, the AIC100 device is fully functional, and
+ready to process workloads.
+
+Userspace components
+====================
+
+Compiler
+--------
+
+An open compiler for AIC100 based on upstream LLVM can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100-cc
+
+Usermode Driver (UMD)
+---------------------
+
+An open UMD that interfaces with the qaic kernel driver can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100
+
+Sahara loader
+-------------
+
+An open implementation of the Sahara protocol called kickstart can be found at:
+https://github.com/andersson/qdl
+
+MHI Channels
+============
+
+AIC100 defines a number of MHI channels for different purposes. This is a list
+of the defined channels, and their uses.
+
++----------------+---------+----------+----------------------------------------+
+| Channel name | IDs | EEs | Purpose |
++================+=========+==========+========================================+
+| QAIC_LOOPBACK | 0 & 1 | AMSS | Any data sent to the device on this |
+| | | | channel is sent back to the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SAHARA | 2 & 3 | SBL | Used by SBL to obtain the runtime |
+| | | | firmware from the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DIAG | 4 & 5 | AMSS | Used to communicate with QSM via the |
+| | | | DIAG protocol. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SSR | 6 & 7 | AMSS | Used to notify the host of subsystem |
+| | | | restart events, and to offload SSR |
+| | | | crashdumps. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_QDSS | 8 & 9 | AMSS | Used for the Qualcomm Debug Subsystem. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_CONTROL | 10 & 11 | AMSS | Used for the Neural Network Control |
+| | | | (NNC) protocol. This is the primary |
+| | | | channel between host and QSM for |
+| | | | managing workloads. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_LOGGING | 12 & 13 | SBL | Used by the SBL to send the bootlog to |
+| | | | the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_STATUS | 14 & 15 | AMSS | Used to notify the host of Reliability,|
+| | | | Accessibility, Serviceability (RAS) |
+| | | | events. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TELEMETRY | 16 & 17 | AMSS | Used to get/set power/thermal/etc |
+| | | | attributes. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DEBUG | 18 & 19 | AMSS | Not used. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TIMESYNC | 20 & 21 | SBL | Used to synchronize timestamps in the |
+| | | | device side logs with the host time |
+| | | | source. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TIMESYNC | 22 & 23 | AMSS | Used to periodically synchronize |
+| _PERIODIC | | | timestamps in the device side logs with|
+| | | | the host time source. |
++----------------+---------+----------+----------------------------------------+
+| IPCR | 24 & 25 | AMSS | AF_QIPCRTR clients and servers. |
++----------------+---------+----------+----------------------------------------+
+
+DMA Bridge
+==========
+
+Overview
+--------
+
+The DMA Bridge is one of the main interfaces to the host from the device
+(the other being MHI). As part of activating a workload to run on NSPs, the QSM
+assigns that network a DMA Bridge channel. A workload's DMA Bridge channel
+(DBC for short) is solely for the use of that workload and is not shared with
+other workloads.
+
+Each DBC is a pair of FIFOs that manage data in and out of the workload. One
+FIFO is the request FIFO. The other FIFO is the response FIFO.
+
+Each DBC contains 4 registers in hardware:
+
+* Request FIFO head pointer (offset 0x0). Read only by the host. Indicates the
+ latest item in the FIFO the device has consumed.
+* Request FIFO tail pointer (offset 0x4). Read/write by the host. Host
+ increments this register to add new items to the FIFO.
+* Response FIFO head pointer (offset 0x8). Read/write by the host. Indicates
+ the latest item in the FIFO the host has consumed.
+* Response FIFO tail pointer (offset 0xc). Read only by the host. Device
+ increments this register to add new items to the FIFO.
+
+The values in each register are indexes in the FIFO. To get the location of the
+FIFO element pointed to by the register: FIFO base address + register * element
+size.
+
+DBC registers are exposed to the host via the second BAR. Each DBC consumes
+4KB of space in the BAR.
+
+The actual FIFOs are backed by host memory. When sending a request to the QSM
+to activate a network, the host must donate memory to be used for the FIFOs.
+Due to internal mapping limitations of the device, a single contiguous chunk of
+memory must be provided per DBC, which hosts both FIFOs. The request FIFO will
+consume the beginning of the memory chunk, and the response FIFO will consume
+the end of the memory chunk.
+
+Request FIFO
+------------
+
+A request FIFO element has the following structure:
+
+.. code-block:: c
+
+ struct request_elem {
+ u16 req_id;
+ u8 seq_id;
+ u8 pcie_dma_cmd;
+ u32 reserved;
+ u64 pcie_dma_source_addr;
+ u64 pcie_dma_dest_addr;
+ u32 pcie_dma_len;
+ u32 reserved;
+ u64 doorbell_addr;
+ u8 doorbell_attr;
+ u8 reserved;
+ u16 reserved;
+ u32 doorbell_data;
+ u32 sem_cmd0;
+ u32 sem_cmd1;
+ u32 sem_cmd2;
+ u32 sem_cmd3;
+ };
+
+Request field descriptions:
+
+req_id
+ request ID. A request FIFO element and a response FIFO element with
+ the same request ID refer to the same command.
+
+seq_id
+ sequence ID within a request. Ignored by the DMA Bridge.
+
+pcie_dma_cmd
+ describes the DMA element of this request.
+
+ * Bit(7) is the force msi flag, which overrides the DMA Bridge MSI logic
+ and generates a MSI when this request is complete, and QSM
+ configures the DMA Bridge to look at this bit.
+ * Bits(6:5) are reserved.
+ * Bit(4) is the completion code flag, and indicates that the DMA Bridge
+ shall generate a response FIFO element when this request is
+ complete.
+ * Bit(3) indicates if this request is a linked list transfer(0) or a bulk
+ transfer(1).
+ * Bit(2) is reserved.
+ * Bits(1:0) indicate the type of transfer. No transfer(0), to device(1),
+ from device(2). Value 3 is illegal.
+
+pcie_dma_source_addr
+ source address for a bulk transfer, or the address of the linked list.
+
+pcie_dma_dest_addr
+ destination address for a bulk transfer.
+
+pcie_dma_len
+ length of the bulk transfer. Note that the size of this field
+ limits transfers to 4G in size.
+
+doorbell_addr
+ address of the doorbell to ring when this request is complete.
+
+doorbell_attr
+ doorbell attributes.
+
+ * Bit(7) indicates if a write to a doorbell is to occur.
+ * Bits(6:2) are reserved.
+ * Bits(1:0) contain the encoding of the doorbell length. 0 is 32-bit,
+ 1 is 16-bit, 2 is 8-bit, 3 is reserved. The doorbell address
+ must be naturally aligned to the specified length.
+
+doorbell_data
+ data to write to the doorbell. Only the bits corresponding to
+ the doorbell length are valid.
+
+sem_cmdN
+ semaphore command.
+
+ * Bit(31) indicates this semaphore command is enabled.
+ * Bit(30) is the to-device DMA fence. Block this request until all
+ to-device DMA transfers are complete.
+ * Bit(29) is the from-device DMA fence. Block this request until all
+ from-device DMA transfers are complete.
+ * Bits(28:27) are reserved.
+ * Bits(26:24) are the semaphore command. 0 is NOP. 1 is init with the
+ specified value. 2 is increment. 3 is decrement. 4 is wait
+ until the semaphore is equal to the specified value. 5 is wait
+ until the semaphore is greater or equal to the specified value.
+ 6 is "P", wait until semaphore is greater than 0, then
+ decrement by 1. 7 is reserved.
+ * Bit(23) is reserved.
+ * Bit(22) is the semaphore sync. 0 is post sync, which means that the
+ semaphore operation is done after the DMA transfer. 1 is
+ presync, which gates the DMA transfer. Only one presync is
+ allowed per request.
+ * Bit(21) is reserved.
+ * Bits(20:16) is the index of the semaphore to operate on.
+ * Bits(15:12) are reserved.
+ * Bits(11:0) are the semaphore value to use in operations.
+
+Overall, a request is processed in 4 steps:
+
+1. If specified, the presync semaphore condition must be true
+2. If enabled, the DMA transfer occurs
+3. If specified, the postsync semaphore conditions must be true
+4. If enabled, the doorbell is written
+
+By using the semaphores in conjunction with the workload running on the NSPs,
+the data pipeline can be synchronized such that the host can queue multiple
+requests of data for the workload to process, but the DMA Bridge will only copy
+the data into the memory of the workload when the workload is ready to process
+the next input.
+
+Response FIFO
+-------------
+
+Once a request is fully processed, a response FIFO element is generated if
+specified in pcie_dma_cmd. The structure of a response FIFO element:
+
+.. code-block:: c
+
+ struct response_elem {
+ u16 req_id;
+ u16 completion_code;
+ };
+
+req_id
+ matches the req_id of the request that generated this element.
+
+completion_code
+ status of this request. 0 is success. Non-zero is an error.
+
+The DMA Bridge will generate a MSI to the host as a reaction to activity in the
+response FIFO of a DBC. The DMA Bridge hardware has an IRQ storm mitigation
+algorithm, where it will only generate a MSI when the response FIFO transitions
+from empty to non-empty (unless force MSI is enabled and triggered). In
+response to this MSI, the host is expected to drain the response FIFO, and must
+take care to handle any race conditions between draining the FIFO, and the
+device inserting elements into the FIFO.
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The NNC protocol is how the host makes requests to the QSM to manage workloads.
+It uses the QAIC_CONTROL MHI channel.
+
+Each NNC request is packaged into a message. Each message is a series of
+transactions. A passthrough type transaction can contain elements known as
+commands.
+
+QSM requires NNC messages be little endian encoded and the fields be naturally
+aligned. Since there are 64-bit elements in some NNC messages, 64-bit alignment
+must be maintained.
+
+A message contains a header and then a series of transactions. A message may be
+at most 4K in size from QSM to the host. From the host to the QSM, a message
+can be at most 64K (maximum size of a single MHI packet), but there is a
+continuation feature where message N+1 can be marked as a continuation of
+message N. This is used for exceedingly large DMA xfer transactions.
+
+Transaction descriptions
+------------------------
+
+passthrough
+ Allows userspace to send an opaque payload directly to the QSM.
+ This is used for NNC commands. Userspace is responsible for managing
+ the QSM message requirements in the payload.
+
+dma_xfer
+ DMA transfer. Describes an object that the QSM should DMA into the
+ device via address and size tuples.
+
+activate
+ Activate a workload onto NSPs. The host must provide memory to be
+ used by the DBC.
+
+deactivate
+ Deactivate an active workload and return the NSPs to idle.
+
+status
+ Query the QSM about it's NNC implementation. Returns the NNC version,
+ and if CRC is used.
+
+terminate
+ Release a user's resources.
+
+dma_xfer_cont
+ Continuation of a previous DMA transfer. If a DMA transfer
+ cannot be specified in a single message (highly fragmented), this
+ transaction can be used to specify more ranges.
+
+validate_partition
+ Query to QSM to determine if a partition identifier is valid.
+
+Each message is tagged with a user id, and a partition id. The user id allows
+QSM to track resources, and release them when the user goes away (eg the process
+crashes). A partition id identifies the resource partition that QSM manages,
+which this message applies to.
+
+Messages may have CRCs. Messages should have CRCs applied until the QSM
+reports via the status transaction that CRCs are not needed. The QSM on the
+SA9000P requires CRCs for black channel safing.
+
+Subsystem Restart (SSR)
+=======================
+
+SSR is the concept of limiting the impact of an error. An AIC100 device may
+have multiple users, each with their own workload running. If the workload of
+one user crashes, the fallout of that should be limited to that workload and not
+impact other workloads. SSR accomplishes this.
+
+If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI
+channel. This notification identifies the workload by it's assigned DBC. A
+multi-stage recovery process is then used to cleanup both sides, and get the
+DBC/NSPs into a working state.
+
+When SSR occurs, any state in the workload is lost. Any inputs that were in
+process, or queued by not yet serviced, are lost. The loaded artifacts will
+remain in on-card DDR, but the host will need to re-activate the workload if
+it desires to recover the workload.
+
+Reliability, Accessibility, Serviceability (RAS)
+================================================
+
+AIC100 is expected to be deployed in server systems where RAS ideology is
+applied. Simply put, RAS is the concept of detecting, classifying, and
+reporting errors. While PCIe has AER (Advanced Error Reporting) which factors
+into RAS, AER does not allow for a device to report details about internal
+errors. Therefore, AIC100 implements a custom RAS mechanism. When a RAS event
+occurs, QSM will report the event with appropriate details via the QAIC_STATUS
+MHI channel. A sysadmin may determine that a particular device needs
+additional service based on RAS reports.
+
+Telemetry
+=========
+
+QSM has the ability to report various physical attributes of the device, and in
+some cases, to allow the host to control them. Examples include thermal limits,
+thermal readings, and power readings. These items are communicated via the
+QAIC_TELEMETRY MHI channel.
diff --git a/Documentation/accel/qaic/index.rst b/Documentation/accel/qaic/index.rst
new file mode 100644
index 000000000000..967b9dd8bace
--- /dev/null
+++ b/Documentation/accel/qaic/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/qaic Qualcomm Cloud AI driver
+=====================================
+
+The accel/qaic driver supports the Qualcomm Cloud AI machine learning
+accelerator cards.
+
+.. toctree::
+
+ qaic
+ aic080
+ aic100
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
new file mode 100644
index 000000000000..018d6cc173d7
--- /dev/null
+++ b/Documentation/accel/qaic/qaic.rst
@@ -0,0 +1,209 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=============
+ QAIC driver
+=============
+
+The QAIC driver is the Kernel Mode Driver (KMD) for the AIC100 family of AI
+accelerator products.
+
+Interrupts
+==========
+
+IRQ Storm Mitigation
+--------------------
+
+While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation
+mechanism, it is still possible for an IRQ storm to occur. A storm can happen
+if the workload is particularly quick, and the host is responsive. If the host
+can drain the response FIFO as quickly as the device can insert elements into
+it, then the device will frequently transition the response FIFO from empty to
+non-empty and generate MSIs at a rate equivalent to the speed of the
+workload's ability to process inputs. The lprnet (license plate reader network)
+workload is known to trigger this condition, and can generate in excess of 100k
+MSIs per second. It has been observed that most systems cannot tolerate this
+for long, and will crash due to some form of watchdog due to the overhead of
+the interrupt controller interrupting the host CPU.
+
+To mitigate this issue, the QAIC driver implements specific IRQ handling. When
+QAIC receives an IRQ, it disables that line. This prevents the interrupt
+controller from interrupting the CPU. Then AIC drains the FIFO. Once the FIFO
+is drained, QAIC implements a "last chance" polling algorithm where QAIC will
+sleep for a time to see if the workload will generate more activity. The IRQ
+line remains disabled during this time. If no activity is detected, QAIC exits
+polling mode and reenables the IRQ line.
+
+This mitigation in QAIC is very effective. The same lprnet usecase that
+generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
+IRQs over 5 minutes while keeping the host system stable, and having the same
+workload throughput performance (within run to run noise variation).
+
+Single MSI Mode
+---------------
+
+MultiMSI is not well supported on all systems; virtualized ones even less so
+(circa 2023). Between hypervisors masking the PCIe MSI capability structure to
+large memory requirements for vIOMMUs (required for supporting MultiMSI), it is
+useful to be able to fall back to a single MSI when needed.
+
+To support this fallback, we allow the case where only one MSI is able to be
+allocated, and share that one MSI between MHI and the DBCs. The device detects
+when only one MSI has been configured and directs the interrupts for the DBCs
+to the interrupt normally used for MHI. Unfortunately this means that the
+interrupt handlers for every DBC and MHI wake up for every interrupt that
+arrives; however, the DBC threaded irq handlers only are started when work to be
+done is detected (MHI will always start its threaded handler).
+
+If the DBC is configured to force MSI interrupts, this can circumvent the
+software IRQ storm mitigation mentioned above. Since the MSI is shared it is
+never disabled, allowing each new entry to the FIFO to trigger a new interrupt.
+
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The implementation of NNC is split between the KMD (QAIC) and UMD. In general
+QAIC understands how to encode/decode NNC wire protocol, and elements of the
+protocol which require kernel space knowledge to process (for example, mapping
+host memory to device IOVAs). QAIC understands the structure of a message, and
+all of the transactions. QAIC does not understand commands (the payload of a
+passthrough transaction).
+
+QAIC handles and enforces the required little endianness and 64-bit alignment,
+to the degree that it can. Since QAIC does not know the contents of a
+passthrough transaction, it relies on the UMD to satisfy the requirements.
+
+The terminate transaction is of particular use to QAIC. QAIC is not aware of
+the resources that are loaded onto a device since the majority of that activity
+occurs within NNC commands. As a result, QAIC does not have the means to
+roll back userspace activity. To ensure that a userspace client's resources
+are fully released in the case of a process crash, or a bug, QAIC uses the
+terminate command to let QSM know when a user has gone away, and the resources
+can be released.
+
+QSM can report a version number of the NNC protocol it supports. This is in the
+form of a Major number and a Minor number.
+
+Major number updates indicate changes to the NNC protocol which impact the
+message format, or transactions (impacts QAIC).
+
+Minor number updates indicate changes to the NNC protocol which impact the
+commands (does not impact QAIC).
+
+uAPI
+====
+
+QAIC creates an accel device per physical PCIe device. This accel device exists
+for as long as the PCIe device is known to Linux.
+
+The PCIe device may not be in the state to accept requests from userspace at
+all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the
+device can accept requests (ONLINE) and when the device is no longer accepting
+requests (OFFLINE) because of a reset or other state transition.
+
+QAIC defines a number of driver specific IOCTLs as part of the userspace API.
+
+DRM_IOCTL_QAIC_MANAGE
+ This IOCTL allows userspace to send a NNC request to the QSM. The call will
+ block until a response is received, or the request has timed out.
+
+DRM_IOCTL_QAIC_CREATE_BO
+ This IOCTL allows userspace to allocate a buffer object (BO) which can send
+ or receive data from a workload. The call will return a GEM handle that
+ represents the allocated buffer. The BO is not usable until it has been
+ sliced (see DRM_IOCTL_QAIC_ATTACH_SLICE_BO).
+
+DRM_IOCTL_QAIC_MMAP_BO
+ This IOCTL allows userspace to prepare an allocated BO to be mmap'd into the
+ userspace process.
+
+DRM_IOCTL_QAIC_ATTACH_SLICE_BO
+ This IOCTL allows userspace to slice a BO in preparation for sending the BO
+ to the device. Slicing is the operation of describing what portions of a BO
+ get sent where to a workload. This requires a set of DMA transfers for the
+ DMA Bridge, and as such, locks the BO to a specific DBC.
+
+DRM_IOCTL_QAIC_EXECUTE_BO
+ This IOCTL allows userspace to submit a set of sliced BOs to the device. The
+ call is non-blocking. Success only indicates that the BOs have been queued
+ to the device, but does not guarantee they have been executed.
+
+DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO
+ This IOCTL operates like DRM_IOCTL_QAIC_EXECUTE_BO, but it allows userspace
+ to shrink the BOs sent to the device for this specific call. If a BO
+ typically has N inputs, but only a subset of those is available, this IOCTL
+ allows userspace to indicate that only the first M bytes of the BO should be
+ sent to the device to minimize data transfer overhead. This IOCTL dynamically
+ recomputes the slicing, and therefore has some processing overhead before the
+ BOs can be queued to the device.
+
+DRM_IOCTL_QAIC_WAIT_BO
+ This IOCTL allows userspace to determine when a particular BO has been
+ processed by the device. The call will block until either the BO has been
+ processed and can be re-queued to the device, or a timeout occurs.
+
+DRM_IOCTL_QAIC_PERF_STATS_BO
+ This IOCTL allows userspace to collect performance statistics on the most
+ recent execution of a BO. This allows userspace to construct an end to end
+ timeline of the BO processing for a performance analysis.
+
+DRM_IOCTL_QAIC_DETACH_SLICE_BO
+ This IOCTL allows userspace to remove the slicing information from a BO that
+ was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
+ is the inverse of DRM_IOCTL_QAIC_ATTACH_SLICE_BO. The BO must be idle for
+ DRM_IOCTL_QAIC_DETACH_SLICE_BO to be called. After a successful detach slice
+ operation the BO may have new slicing information attached with a new call
+ to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. After detach slice, the BO cannot be
+ executed until after a new attach slice operation. Combining attach slice
+ and detach slice calls allows userspace to use a BO with multiple workloads.
+
+Userspace Client Isolation
+==========================
+
+AIC100 supports multiple clients. Multiple DBCs can be consumed by a single
+client, and multiple clients can each consume one or more DBCs. Workloads
+may contain sensitive information therefore only the client that owns the
+workload should be allowed to interface with the DBC.
+
+Clients are identified by the instance associated with their open(). A client
+may only use memory they allocate, and DBCs that are assigned to their
+workloads. Attempts to access resources assigned to other clients will be
+rejected.
+
+Module parameters
+=================
+
+QAIC supports the following module parameters:
+
+**datapath_polling (bool)**
+
+Configures QAIC to use a polling thread for datapath events instead of relying
+on the device interrupts. Useful for platforms with broken multiMSI. Must be
+set at QAIC driver initialization. Default is 0 (off).
+
+**mhi_timeout_ms (unsigned int)**
+
+Sets the timeout value for MHI operations in milliseconds (ms). Must be set
+at the time the driver detects a device. Default is 2000 (2 seconds).
+
+**control_resp_timeout_s (unsigned int)**
+
+Sets the timeout value for QSM responses to NNC messages in seconds (s). Must
+be set at the time the driver is sending a request to QSM. Default is 60 (one
+minute).
+
+**wait_exec_default_timeout_ms (unsigned int)**
+
+Sets the default timeout for the wait_exec ioctl in milliseconds (ms). Must be
+set prior to the waic_exec ioctl call. A value specified in the ioctl call
+overrides this for that call. Default is 5000 (5 seconds).
+
+**datapath_poll_interval_us (unsigned int)**
+
+Sets the polling interval in microseconds (us) when datapath polling is active.
+Takes effect at the next polling interval. Default is 100 (100 us).
+
+**timesync_delay_ms (unsigned int)**
+
+Sets the time interval in milliseconds (ms) between two consecutive timesync
+operations. Default is 1000 (1000 ms).
diff --git a/Documentation/accel/rocket/index.rst b/Documentation/accel/rocket/index.rst
new file mode 100644
index 000000000000..70f97bccf100
--- /dev/null
+++ b/Documentation/accel/rocket/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/rocket Rockchip NPU driver
+=====================================
+
+The accel/rocket driver supports the Neural Processing Units (NPUs) inside some
+Rockchip SoCs such as the RK3588. Rockchip calls it RKNN and sometimes RKNPU.
+
+The hardware is described in chapter 36 in the RK3588 TRM.
+
+This driver just powers the hardware on and off, allocates and maps buffers to
+the device and submits jobs to the frontend unit. Everything else is done in
+userspace, as a Gallium driver (also called rocket) that is part of the Mesa3D
+project.
+
+Hardware currently supported:
+
+* RK3588
diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst
deleted file mode 100644
index 14cefc020e2d..000000000000
--- a/Documentation/accelerators/ocxl.rst
+++ /dev/null
@@ -1,176 +0,0 @@
-========================================================
-OpenCAPI (Open Coherent Accelerator Processor Interface)
-========================================================
-
-OpenCAPI is an interface between processors and accelerators. It aims
-at being low-latency and high-bandwidth. The specification is
-developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
-
-It allows an accelerator (which could be a FPGA, ASICs, ...) to access
-the host memory coherently, using virtual addresses. An OpenCAPI
-device can also host its own memory, that can be accessed from the
-host.
-
-OpenCAPI is known in linux as 'ocxl', as the open, processor-agnostic
-evolution of 'cxl' (the driver for the IBM CAPI interface for
-powerpc), which was named that way to avoid confusion with the ISDN
-CAPI subsystem.
-
-
-High-level view
-===============
-
-OpenCAPI defines a Data Link Layer (DL) and Transaction Layer (TL), to
-be implemented on top of a physical link. Any processor or device
-implementing the DL and TL can start sharing memory.
-
-::
-
- +-----------+ +-------------+
- | | | |
- | | | Accelerated |
- | Processor | | Function |
- | | +--------+ | Unit | +--------+
- | |--| Memory | | (AFU) |--| Memory |
- | | +--------+ | | +--------+
- +-----------+ +-------------+
- | |
- +-----------+ +-------------+
- | TL | | TLX |
- +-----------+ +-------------+
- | |
- +-----------+ +-------------+
- | DL | | DLX |
- +-----------+ +-------------+
- | |
- | PHY |
- +---------------------------------------+
-
-
-
-Device discovery
-================
-
-OpenCAPI relies on a PCI-like configuration space, implemented on the
-device. So the host can discover AFUs by querying the config space.
-
-OpenCAPI devices in Linux are treated like PCI devices (with a few
-caveats). The firmware is expected to abstract the hardware as if it
-was a PCI link. A lot of the existing PCI infrastructure is reused:
-devices are scanned and BARs are assigned during the standard PCI
-enumeration. Commands like 'lspci' can therefore be used to see what
-devices are available.
-
-The configuration space defines the AFU(s) that can be found on the
-physical adapter, such as its name, how many memory contexts it can
-work with, the size of its MMIO areas, ...
-
-
-
-MMIO
-====
-
-OpenCAPI defines two MMIO areas for each AFU:
-
-* the global MMIO area, with registers pertinent to the whole AFU.
-* a per-process MMIO area, which has a fixed size for each context.
-
-
-
-AFU interrupts
-==============
-
-OpenCAPI includes the possibility for an AFU to send an interrupt to a
-host process. It is done through a 'intrp_req' defined in the
-Transaction Layer, specifying a 64-bit object handle which defines the
-interrupt.
-
-The driver allows a process to allocate an interrupt and obtain its
-64-bit object handle, that can be passed to the AFU.
-
-
-
-char devices
-============
-
-The driver creates one char device per AFU found on the physical
-device. A physical device may have multiple functions and each
-function can have multiple AFUs. At the time of this writing though,
-it has only been tested with devices exporting only one AFU.
-
-Char devices can be found in /dev/ocxl/ and are named as:
-/dev/ocxl/<AFU name>.<location>.<index>
-
-where <AFU name> is a max 20-character long name, as found in the
-config space of the AFU.
-<location> is added by the driver and can help distinguish devices
-when a system has more than one instance of the same OpenCAPI device.
-<index> is also to help distinguish AFUs in the unlikely case where a
-device carries multiple copies of the same AFU.
-
-
-
-Sysfs class
-===========
-
-An ocxl class is added for the devices representing the AFUs. See
-/sys/class/ocxl. The layout is described in
-Documentation/ABI/testing/sysfs-class-ocxl
-
-
-
-User API
-========
-
-open
-----
-
-Based on the AFU definition found in the config space, an AFU may
-support working with more than one memory context, in which case the
-associated char device may be opened multiple times by different
-processes.
-
-
-ioctl
------
-
-OCXL_IOCTL_ATTACH:
-
- Attach the memory context of the calling process to the AFU so that
- the AFU can access its memory.
-
-OCXL_IOCTL_IRQ_ALLOC:
-
- Allocate an AFU interrupt and return an identifier.
-
-OCXL_IOCTL_IRQ_FREE:
-
- Free a previously allocated AFU interrupt.
-
-OCXL_IOCTL_IRQ_SET_FD:
-
- Associate an event fd to an AFU interrupt so that the user process
- can be notified when the AFU sends an interrupt.
-
-OCXL_IOCTL_GET_METADATA:
-
- Obtains configuration information from the card, such at the size of
- MMIO areas, the AFU version, and the PASID for the current context.
-
-OCXL_IOCTL_ENABLE_P9_WAIT:
-
- Allows the AFU to wake a userspace thread executing 'wait'. Returns
- information to userspace to allow it to configure the AFU. Note that
- this is only available on POWER9.
-
-OCXL_IOCTL_GET_FEATURES:
-
- Reports on which CPU features that affect OpenCAPI are usable from
- userspace.
-
-
-mmap
-----
-
-A process can mmap the per-process MMIO area for interactions with the
-AFU.
diff --git a/Documentation/accounting/cgroupstats.rst b/Documentation/accounting/cgroupstats.rst
new file mode 100644
index 000000000000..85186e7d4035
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.rst
@@ -0,0 +1,31 @@
+==================
+Control Groupstats
+==================
+
+Control Groupstats is inspired by the discussion at
+https://lore.kernel.org/r/461CF883.2030308@sw.ru and implements per cgroup statistics as
+suggested by Andrew Morton in https://lore.kernel.org/r/20070411114927.1277d7c9.akpm@linux-foundation.org.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below::
+
+ ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
+ sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+ ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
+ sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
deleted file mode 100644
index d16a9849e60e..000000000000
--- a/Documentation/accounting/cgroupstats.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Control Groupstats is inspired by the discussion at
-http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
-suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
-
-Per cgroup statistics infrastructure re-uses code from the taskstats
-interface. A new set of cgroup operations are registered with commands
-and attributes specific to cgroups. It should be very easy to
-extend per cgroup statistics, by adding members to the cgroupstats
-structure.
-
-The current model for cgroupstats is a pull, a push model (to post
-statistics on interesting events), should be very easy to add. Currently
-user space requests for statistics by passing the cgroup path.
-Statistics about the state of all the tasks in the cgroup is returned to
-user space.
-
-NOTE: We currently rely on delay accounting for extracting information
-about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
-information will not be available.
-
-To extract cgroup statistics a utility very similar to getdelays.c
-has been developed, the sample output of the utility is shown below
-
-~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
-sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
-~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
-sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
new file mode 100644
index 000000000000..86d7902a657f
--- /dev/null
+++ b/Documentation/accounting/delay-accounting.rst
@@ -0,0 +1,214 @@
+================
+Delay accounting
+================
+
+Tasks encounter delays in execution when they wait
+for some kernel resource to become available e.g. a
+runnable task may wait for a free CPU to run on.
+
+The per-task delay accounting functionality measures
+the delays experienced by a task while
+
+a) waiting for a CPU (while being runnable)
+b) completion of synchronous block I/O initiated by the task
+c) swapping in pages
+d) memory reclaim
+e) thrashing
+f) direct compact
+g) write-protect copy
+h) IRQ/SOFTIRQ
+
+and makes these statistics available to userspace through
+the taskstats interface.
+
+Such delays provide feedback for setting a task's cpu priority,
+io priority and rss limit values appropriately. Long delays for
+important tasks could be a trigger for raising its corresponding priority.
+
+The functionality, through its use of the taskstats interface, also provides
+delay statistics aggregated for all tasks (or threads) belonging to a
+thread group (corresponding to a traditional Unix process). This is a commonly
+needed aggregation that is more efficiently done by the kernel.
+
+Userspace utilities, particularly resource management applications, can also
+aggregate delay statistics into arbitrary groups. To enable this, delay
+statistics of a task are available both during its lifetime as well as on its
+exit, ensuring continuous and complete monitoring can be done.
+
+
+Interface
+---------
+
+Delay accounting uses the taskstats interface which is described
+in detail in a separate document in this directory. Taskstats returns a
+generic data structure to userspace corresponding to per-pid and per-tgid
+statistics. The delay accounting functionality populates specific fields of
+this structure. See
+
+ include/uapi/linux/taskstats.h
+
+for a description of the fields pertaining to delay accounting.
+It will generally be in the form of counters returning the cumulative
+delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
+cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.
+
+Taking the difference of two successive readings of a given
+counter (say cpu_delay_total) for a task will give the delay
+experienced by the task waiting for the corresponding resource
+in that interval.
+
+When a task exits, records containing the per-task statistics
+are sent to userspace without requiring a command. If it is the last exiting
+task of a thread group, the per-tgid statistics are also sent. More details
+are given in the taskstats interface description.
+
+The getdelays.c userspace utility in tools/accounting directory allows simple
+commands to be run and the corresponding delay statistics to be displayed. It
+also serves as an example of using the taskstats interface.
+
+Usage
+-----
+
+Compile the kernel with::
+
+ CONFIG_TASK_DELAY_ACCT=y
+ CONFIG_TASKSTATS=y
+
+Delay accounting is disabled by default at boot up.
+To enable, add::
+
+ delayacct
+
+to the kernel boot options. The rest of the instructions below assume this has
+been done. Alternatively, use sysctl kernel.task_delayacct to switch the state
+at runtime. Note however that only tasks started after enabling it will have
+delayacct information.
+
+After the system has booted up, use a utility
+similar to getdelays.c to access the delays
+seen by a given task or a task group (tgid).
+The utility also allows a given command to be
+executed and the corresponding delays to be
+seen.
+
+General format of the getdelays command::
+
+ getdelays [-dilv] [-t tgid] [-p pid]
+
+Get delays, since system boot, for pid 10::
+
+ # ./getdelays -d -p 10
+ (output similar to next case)
+
+Get sum and peak of delays, since system boot, for all pids with tgid 242::
+
+ bash-4.4# ./getdelays -d -t 242
+ print delayacct stats ON
+ TGID 242
+
+
+ CPU count real total virtual total delay total delay average delay max delay min
+ 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
+ IO count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ SWAP count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ RECLAIM count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ THRASHING count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ COMPACT count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ WPCOPY count delay total delay average delay max delay min
+ 156 11215873 0.072ms 0.207403ms 0.033913ms
+ IRQ count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+
+Get IO accounting for pid 1, it works only with -p::
+
+ # ./getdelays -i -p 1
+ printing IO accounting
+ linuxrc: read=65536, write=0, cancelled_write=0
+
+The above command can be used with -v to get more debug information.
+
+After the system starts, use `delaytop` to get the system-wide delay information,
+which includes system-wide PSI information and Top-N high-latency tasks.
+Note: PSI support requires `CONFIG_PSI=y` and `psi=1` for full functionality.
+
+`delaytop` is an interactive tool for monitoring system pressure and task delays.
+It supports multiple sorting options, display modes, and real-time keyboard controls.
+
+Basic usage with default settings (sorts by CPU delay, shows top 20 tasks, refreshes every 2 seconds)::
+
+ bash# ./delaytop
+ System Pressure Information: (avg10/avg60vg300/total)
+ CPU some: 0.0%/ 0.0%/ 0.0%/ 106137(ms)
+ CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ IO full: 0.0%/ 0.0%/ 0.0%/ 2240(ms)
+ IO some: 0.0%/ 0.0%/ 0.0%/ 2783(ms)
+ IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ [o]sort [M]memverbose [q]quit
+ Top 20 processes (sorted by cpu delay):
+ PID TGID COMMAND CPU(ms) IO(ms) IRQ(ms) MEM(ms)
+ ------------------------------------------------------------------------
+ 110 110 kworker/15:0H-s 27.91 0.00 0.00 0.00
+ 57 57 cpuhp/7 3.18 0.00 0.00 0.00
+ 99 99 cpuhp/14 2.97 0.00 0.00 0.00
+ 51 51 cpuhp/6 0.90 0.00 0.00 0.00
+ 44 44 kworker/4:0H-sy 0.80 0.00 0.00 0.00
+ 60 60 ksoftirqd/7 0.74 0.00 0.00 0.00
+ 76 76 idle_inject/10 0.31 0.00 0.00 0.00
+ 100 100 idle_inject/14 0.30 0.00 0.00 0.00
+ 1309 1309 systemsettings 0.29 0.00 0.00 0.00
+ 45 45 cpuhp/5 0.22 0.00 0.00 0.00
+ 63 63 cpuhp/8 0.20 0.00 0.00 0.00
+ 87 87 cpuhp/12 0.18 0.00 0.00 0.00
+ 93 93 cpuhp/13 0.17 0.00 0.00 0.00
+ 1265 1265 acpid 0.17 0.00 0.00 0.00
+ 1552 1552 sshd 0.17 0.00 0.00 0.00
+ 2584 2584 sddm-helper 0.16 0.00 0.00 0.00
+ 1284 1284 rtkit-daemon 0.15 0.00 0.00 0.00
+ 1326 1326 nde-netfilter 0.14 0.00 0.00 0.00
+ 27 27 cpuhp/2 0.13 0.00 0.00 0.00
+ 631 631 kworker/11:2-rc 0.11 0.00 0.00 0.00
+
+Interactive keyboard controls during runtime::
+
+ o - Select sort field (CPU, IO, IRQ, Memory, etc.)
+ M - Toggle display mode (Default/Memory Verbose)
+ q - Quit
+
+Available sort fields(use -s/--sort or interactive command)::
+
+ cpu(c) - CPU delay
+ blkio(i) - I/O delay
+ irq(q) - IRQ delay
+ mem(m) - Total memory delay
+ swapin(s) - Swapin delay (memory verbose mode only)
+ freepages(r) - Freepages reclaim delay (memory verbose mode only)
+ thrashing(t) - Thrashing delay (memory verbose mode only)
+ compact(p) - Compaction delay (memory verbose mode only)
+ wpcopy(w) - Write page copy delay (memory verbose mode only)
+
+Advanced usage examples::
+
+ # ./delaytop -s blkio
+ Sorted by IO delay
+
+ # ./delaytop -s mem -M
+ Sorted by memory delay in memory verbose mode
+
+ # ./delaytop -p pid
+ Print delayacct stats
+
+ # ./delaytop -P num
+ Display the top N tasks
+
+ # ./delaytop -n num
+ Set delaytop refresh frequency (num times)
+
+ # ./delaytop -d secs
+ Specify refresh interval as secs
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
deleted file mode 100644
index 042ea59b5853..000000000000
--- a/Documentation/accounting/delay-accounting.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-Delay accounting
-----------------
-
-Tasks encounter delays in execution when they wait
-for some kernel resource to become available e.g. a
-runnable task may wait for a free CPU to run on.
-
-The per-task delay accounting functionality measures
-the delays experienced by a task while
-
-a) waiting for a CPU (while being runnable)
-b) completion of synchronous block I/O initiated by the task
-c) swapping in pages
-d) memory reclaim
-
-and makes these statistics available to userspace through
-the taskstats interface.
-
-Such delays provide feedback for setting a task's cpu priority,
-io priority and rss limit values appropriately. Long delays for
-important tasks could be a trigger for raising its corresponding priority.
-
-The functionality, through its use of the taskstats interface, also provides
-delay statistics aggregated for all tasks (or threads) belonging to a
-thread group (corresponding to a traditional Unix process). This is a commonly
-needed aggregation that is more efficiently done by the kernel.
-
-Userspace utilities, particularly resource management applications, can also
-aggregate delay statistics into arbitrary groups. To enable this, delay
-statistics of a task are available both during its lifetime as well as on its
-exit, ensuring continuous and complete monitoring can be done.
-
-
-Interface
----------
-
-Delay accounting uses the taskstats interface which is described
-in detail in a separate document in this directory. Taskstats returns a
-generic data structure to userspace corresponding to per-pid and per-tgid
-statistics. The delay accounting functionality populates specific fields of
-this structure. See
- include/linux/taskstats.h
-for a description of the fields pertaining to delay accounting.
-It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
-
-Taking the difference of two successive readings of a given
-counter (say cpu_delay_total) for a task will give the delay
-experienced by the task waiting for the corresponding resource
-in that interval.
-
-When a task exits, records containing the per-task statistics
-are sent to userspace without requiring a command. If it is the last exiting
-task of a thread group, the per-tgid statistics are also sent. More details
-are given in the taskstats interface description.
-
-The getdelays.c userspace utility in tools/accounting directory allows simple
-commands to be run and the corresponding delay statistics to be displayed. It
-also serves as an example of using the taskstats interface.
-
-Usage
------
-
-Compile the kernel with
- CONFIG_TASK_DELAY_ACCT=y
- CONFIG_TASKSTATS=y
-
-Delay accounting is enabled by default at boot up.
-To disable, add
- nodelayacct
-to the kernel boot options. The rest of the instructions
-below assume this has not been done.
-
-After the system has booted up, use a utility
-similar to getdelays.c to access the delays
-seen by a given task or a task group (tgid).
-The utility also allows a given command to be
-executed and the corresponding delays to be
-seen.
-
-General format of the getdelays command
-
-getdelays [-t tgid] [-p pid] [-c cmd...]
-
-
-Get delays, since system boot, for pid 10
-# ./getdelays -p 10
-(output similar to next case)
-
-Get sum of delays, since system boot, for all pids with tgid 5
-# ./getdelays -t 5
-
-
-CPU count real total virtual total delay total
- 7876 92005750 100000000 24001500
-IO count delay total
- 0 0
-SWAP count delay total
- 0 0
-RECLAIM count delay total
- 0 0
-
-Get delays seen in executing a given simple command
-# ./getdelays -c ls /
-
-bin data1 data3 data5 dev home media opt root srv sys usr
-boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
-
-
-CPU count real total virtual total delay total
- 6 4000250 4000000 0
-IO count delay total
- 0 0
-SWAP count delay total
- 0 0
-RECLAIM count delay total
- 0 0
diff --git a/Documentation/accounting/index.rst b/Documentation/accounting/index.rst
new file mode 100644
index 000000000000..9369d8bf32be
--- /dev/null
+++ b/Documentation/accounting/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Accounting
+==========
+
+.. toctree::
+ :maxdepth: 1
+
+ cgroupstats
+ delay-accounting
+ psi
+ taskstats
+ taskstats-struct
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
new file mode 100644
index 000000000000..d455db3e5808
--- /dev/null
+++ b/Documentation/accounting/psi.rst
@@ -0,0 +1,188 @@
+.. _psi:
+
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format is as such::
+
+ some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+CPU full is undefined at the system level, but has been reported
+since 5.13, so it is set to zero for backward compatibility.
+
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
+
+Monitoring for pressure thresholds
+==================================
+
+Users can register triggers and use poll() to be woken up when resource
+pressure exceeds certain thresholds.
+
+A trigger describes the maximum cumulative stall time over a specific
+time window, e.g. 100ms of total stall time within any 500ms window to
+generate a wakeup event.
+
+To register a trigger user has to open psi interface file under
+/proc/pressure/ representing the resource to be monitored and write the
+desired threshold and time window. The open file descriptor should be
+used to wait for trigger events using select(), poll() or epoll().
+The following format is used::
+
+ <some|full> <stall amount in us> <time window in us>
+
+For example writing "some 150000 1000000" into /proc/pressure/memory
+would add 150ms threshold for partial memory stall measured within
+1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
+would add 50ms threshold for full io stall measured within 1sec time window.
+
+Triggers can be set on more than one psi metric and more than one trigger
+for the same psi metric can be specified. However for each trigger a separate
+file descriptor is required to be able to poll it separately from others,
+therefore for each trigger a separate open() syscall should be made even
+when opening the same psi interface file. Write operations to a file descriptor
+with an already existing psi trigger will fail with EBUSY.
+
+Monitors activate only when system enters stall state for the monitored
+psi metric and deactivates upon exit from the stall state. While system is
+in the stall state psi signal growth is monitored at a rate of 10 times per
+tracking window.
+
+The kernel accepts window sizes ranging from 500ms to 10s, therefore min
+monitoring update interval is 50ms and max is 1s. Min limit is set to
+prevent overly frequent polling. Max limit is chosen as a high enough number
+after which monitors are most likely not needed and psi averages can be used
+instead.
+
+Unprivileged users can also create monitors, with the only limitation that the
+window size must be a multiple of 2s, in order to prevent excessive resource
+usage.
+
+When activated, psi monitor stays active for at least the duration of one
+tracking window to avoid repeated activations/deactivations when system is
+bouncing in and out of the stall state.
+
+Notifications to the userspace are rate-limited to one per tracking window.
+
+The trigger will de-register when the file descriptor used to define the
+trigger is closed.
+
+Userspace monitor usage example
+===============================
+
+::
+
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <stdio.h>
+ #include <poll.h>
+ #include <string.h>
+ #include <unistd.h>
+
+ /*
+ * Monitor memory partial stall with 1s tracking window size
+ * and 150ms threshold.
+ */
+ int main() {
+ const char trig[] = "some 150000 1000000";
+ struct pollfd fds;
+ int n;
+
+ fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+ if (fds.fd < 0) {
+ printf("/proc/pressure/memory open error: %s\n",
+ strerror(errno));
+ return 1;
+ }
+ fds.events = POLLPRI;
+
+ if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+ printf("/proc/pressure/memory write error: %s\n",
+ strerror(errno));
+ return 1;
+ }
+
+ printf("waiting for events...\n");
+ while (1) {
+ n = poll(&fds, 1, -1);
+ if (n < 0) {
+ printf("poll error: %s\n", strerror(errno));
+ return 1;
+ }
+ if (fds.revents & POLLERR) {
+ printf("got POLLERR, event source is gone\n");
+ return 0;
+ }
+ if (fds.revents & POLLPRI) {
+ printf("event triggered!\n");
+ } else {
+ printf("unknown event received: 0x%x\n", fds.revents);
+ return 1;
+ }
+ }
+
+ return 0;
+ }
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUPS=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.
+
+Per-cgroup psi monitors can be specified and used the same way as
+system-wide ones.
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
deleted file mode 100644
index b8ca28b60215..000000000000
--- a/Documentation/accounting/psi.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-================================
-PSI - Pressure Stall Information
-================================
-
-:Date: April, 2018
-:Author: Johannes Weiner <hannes@cmpxchg.org>
-
-When CPU, memory or IO devices are contended, workloads experience
-latency spikes, throughput losses, and run the risk of OOM kills.
-
-Without an accurate measure of such contention, users are forced to
-either play it safe and under-utilize their hardware resources, or
-roll the dice and frequently suffer the disruptions resulting from
-excessive overcommit.
-
-The psi feature identifies and quantifies the disruptions caused by
-such resource crunches and the time impact it has on complex workloads
-or even entire systems.
-
-Having an accurate measure of productivity losses caused by resource
-scarcity aids users in sizing workloads to hardware--or provisioning
-hardware according to workload demand.
-
-As psi aggregates this information in realtime, systems can be managed
-dynamically using techniques such as load shedding, migrating jobs to
-other systems or data centers, or strategically pausing or killing low
-priority or restartable batch jobs.
-
-This allows maximizing hardware utilization without sacrificing
-workload health or risking major disruptions such as OOM kills.
-
-Pressure interface
-==================
-
-Pressure information for each resource is exported through the
-respective file in /proc/pressure/ -- cpu, memory, and io.
-
-The format for CPU is as such:
-
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-and for memory and IO:
-
-some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-full avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-The "some" line indicates the share of time in which at least some
-tasks are stalled on a given resource.
-
-The "full" line indicates the share of time in which all non-idle
-tasks are stalled on a given resource simultaneously. In this state
-actual CPU cycles are going to waste, and a workload that spends
-extended time in this state is considered to be thrashing. This has
-severe impact on performance, and it's useful to distinguish this
-situation from a state where some tasks are stalled but the CPU is
-still doing productive work. As such, time spent in this subset of the
-stall state is tracked separately and exported in the "full" averages.
-
-The ratios are tracked as recent trends over ten, sixty, and three
-hundred second windows, which gives insight into short term events as
-well as medium and long term trends. The total absolute stall time is
-tracked and exported as well, to allow detection of latency spikes
-which wouldn't necessarily make a dent in the time averages, or to
-average trends over custom time frames.
-
-Cgroup2 interface
-=================
-
-In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
-mounted, pressure stall information is also tracked for tasks grouped
-into cgroups. Each subdirectory in the cgroupfs mountpoint contains
-cpu.pressure, memory.pressure, and io.pressure files; the format is
-the same as the /proc/pressure/ files.
diff --git a/Documentation/accounting/taskstats-struct.rst b/Documentation/accounting/taskstats-struct.rst
new file mode 100644
index 000000000000..acca51c34157
--- /dev/null
+++ b/Documentation/accounting/taskstats-struct.rst
@@ -0,0 +1,199 @@
+====================
+The struct taskstats
+====================
+
+This document contains an explanation of the struct taskstats fields.
+
+There are three different groups of fields in the struct taskstats:
+
+1) Common and basic accounting fields
+ If CONFIG_TASKSTATS is set, the taskstats interface is enabled and
+ the common fields and basic accounting fields are collected for
+ delivery at do_exit() of a task.
+2) Delay accounting fields
+ These fields are placed between::
+
+ /* Delay accounting fields start */
+
+ and::
+
+ /* Delay accounting fields end */
+
+ Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
+3) Extended accounting fields
+ These fields are placed between::
+
+ /* Extended accounting fields start */
+
+ and::
+
+ /* Extended accounting fields end */
+
+ Their values are collected if CONFIG_TASK_XACCT is set.
+
+4) Per-task and per-thread context switch count statistics
+
+5) Time accounting for SMT machines
+
+6) Extended delay accounting fields for memory reclaim
+
+Future extension should add fields to the end of the taskstats struct, and
+should not change the relative position of each field within the struct.
+
+::
+
+ struct taskstats {
+
+1) Common and basic accounting fields::
+
+ /* The version number of this struct. This field is always set to
+ * TASKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+ * Each time the struct is changed, the value should be incremented.
+ */
+ __u16 version;
+
+ /* The exit code of a task. */
+ __u32 ac_exitcode; /* Exit status */
+
+ /* The accounting flags of a task as defined in <linux/acct.h>
+ * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+ */
+ __u8 ac_flag; /* Record flags */
+
+ /* The value of task_nice() of a task. */
+ __u8 ac_nice; /* task_nice */
+
+ /* The name of the command that started this task. */
+ char ac_comm[TS_COMM_LEN]; /* Command name */
+
+ /* The scheduling discipline as set in task->policy field. */
+ __u8 ac_sched; /* Scheduling discipline */
+
+ __u8 ac_pad[3];
+ __u32 ac_uid; /* User ID */
+ __u32 ac_gid; /* Group ID */
+ __u32 ac_pid; /* Process ID */
+ __u32 ac_ppid; /* Parent process ID */
+
+ /* The time when a task begins, in [secs] since 1970. */
+ __u32 ac_btime; /* Begin time [sec since 1970] */
+
+ /* The elapsed time of a task, in [usec]. */
+ __u64 ac_etime; /* Elapsed time [usec] */
+
+ /* The user CPU time of a task, in [usec]. */
+ __u64 ac_utime; /* User CPU time [usec] */
+
+ /* The system CPU time of a task, in [usec]. */
+ __u64 ac_stime; /* System CPU time [usec] */
+
+ /* The minor page fault count of a task, as set in task->min_flt. */
+ __u64 ac_minflt; /* Minor Page Fault Count */
+
+ /* The major page fault count of a task, as set in task->maj_flt. */
+ __u64 ac_majflt; /* Major Page Fault Count */
+
+
+2) Delay accounting fields::
+
+ /* Delay accounting fields start
+ *
+ * All values, until the comment "Delay accounting fields end" are
+ * available only if delay accounting is enabled, even though the last
+ * few fields are not delays
+ *
+ * xxx_count is the number of delay values recorded
+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+ *
+ * xxx_delay_total wraps around to zero on overflow
+ * xxx_count incremented regardless of overflow
+ */
+
+ /* Delay waiting for cpu, while runnable
+ * count, delay_total NOT updated atomically
+ */
+ __u64 cpu_count;
+ __u64 cpu_delay_total;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+ /* Delay waiting for synchronous block I/O to complete
+ * does not account for delays in I/O submission
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+ * from the kernel in involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_real_total;
+
+ /* cpu "virtual" running time
+ * Uses time intervals seen by the kernel i.e. no adjustment
+ * for kernel's involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_virtual_total;
+ /* Delay accounting fields end */
+ /* version 1 ends here */
+
+
+3) Extended accounting fields::
+
+ /* Extended accounting fields start */
+
+ /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
+ * The current rss usage is added to this counter every time
+ * a tick is charged to a task's system time. So, at the end we
+ * will have memory usage multiplied by system time. Thus an
+ * average usage per system time unit can be calculated.
+ */
+ __u64 coremem; /* accumulated RSS usage in MB-usec */
+
+ /* Accumulated virtual memory usage in duration of a task.
+ * Same as acct_rss_mem1 above except that we keep track of VM usage.
+ */
+ __u64 virtmem; /* accumulated VM usage in MB-usec */
+
+ /* High watermark of RSS usage in duration of a task, in KBytes. */
+ __u64 hiwater_rss; /* High-watermark of RSS usage */
+
+ /* High watermark of VM usage in duration of a task, in KBytes. */
+ __u64 hiwater_vm; /* High-water virtual memory usage */
+
+ /* The following four fields are I/O statistics of a task. */
+ __u64 read_char; /* bytes read */
+ __u64 write_char; /* bytes written */
+ __u64 read_syscalls; /* read syscalls */
+ __u64 write_syscalls; /* write syscalls */
+
+ /* Extended accounting fields end */
+
+4) Per-task and per-thread statistics::
+
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
+5) Time accounting for SMT machines::
+
+ __u64 ac_utimescaled; /* utime scaled on frequency etc */
+ __u64 ac_stimescaled; /* stime scaled on frequency etc */
+ __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim::
+
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
+
+::
+
+ }
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
deleted file mode 100644
index e7512c061c15..000000000000
--- a/Documentation/accounting/taskstats-struct.txt
+++ /dev/null
@@ -1,180 +0,0 @@
-The struct taskstats
---------------------
-
-This document contains an explanation of the struct taskstats fields.
-
-There are three different groups of fields in the struct taskstats:
-
-1) Common and basic accounting fields
- If CONFIG_TASKSTATS is set, the taskstats interface is enabled and
- the common fields and basic accounting fields are collected for
- delivery at do_exit() of a task.
-2) Delay accounting fields
- These fields are placed between
- /* Delay accounting fields start */
- and
- /* Delay accounting fields end */
- Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
-3) Extended accounting fields
- These fields are placed between
- /* Extended accounting fields start */
- and
- /* Extended accounting fields end */
- Their values are collected if CONFIG_TASK_XACCT is set.
-
-4) Per-task and per-thread context switch count statistics
-
-5) Time accounting for SMT machines
-
-6) Extended delay accounting fields for memory reclaim
-
-Future extension should add fields to the end of the taskstats struct, and
-should not change the relative position of each field within the struct.
-
-
-struct taskstats {
-
-1) Common and basic accounting fields:
- /* The version number of this struct. This field is always set to
- * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
- * Each time the struct is changed, the value should be incremented.
- */
- __u16 version;
-
- /* The exit code of a task. */
- __u32 ac_exitcode; /* Exit status */
-
- /* The accounting flags of a task as defined in <linux/acct.h>
- * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
- */
- __u8 ac_flag; /* Record flags */
-
- /* The value of task_nice() of a task. */
- __u8 ac_nice; /* task_nice */
-
- /* The name of the command that started this task. */
- char ac_comm[TS_COMM_LEN]; /* Command name */
-
- /* The scheduling discipline as set in task->policy field. */
- __u8 ac_sched; /* Scheduling discipline */
-
- __u8 ac_pad[3];
- __u32 ac_uid; /* User ID */
- __u32 ac_gid; /* Group ID */
- __u32 ac_pid; /* Process ID */
- __u32 ac_ppid; /* Parent process ID */
-
- /* The time when a task begins, in [secs] since 1970. */
- __u32 ac_btime; /* Begin time [sec since 1970] */
-
- /* The elapsed time of a task, in [usec]. */
- __u64 ac_etime; /* Elapsed time [usec] */
-
- /* The user CPU time of a task, in [usec]. */
- __u64 ac_utime; /* User CPU time [usec] */
-
- /* The system CPU time of a task, in [usec]. */
- __u64 ac_stime; /* System CPU time [usec] */
-
- /* The minor page fault count of a task, as set in task->min_flt. */
- __u64 ac_minflt; /* Minor Page Fault Count */
-
- /* The major page fault count of a task, as set in task->maj_flt. */
- __u64 ac_majflt; /* Major Page Fault Count */
-
-
-2) Delay accounting fields:
- /* Delay accounting fields start
- *
- * All values, until the comment "Delay accounting fields end" are
- * available only if delay accounting is enabled, even though the last
- * few fields are not delays
- *
- * xxx_count is the number of delay values recorded
- * xxx_delay_total is the corresponding cumulative delay in nanoseconds
- *
- * xxx_delay_total wraps around to zero on overflow
- * xxx_count incremented regardless of overflow
- */
-
- /* Delay waiting for cpu, while runnable
- * count, delay_total NOT updated atomically
- */
- __u64 cpu_count;
- __u64 cpu_delay_total;
-
- /* Following four fields atomically updated using task->delays->lock */
-
- /* Delay waiting for synchronous block I/O to complete
- * does not account for delays in I/O submission
- */
- __u64 blkio_count;
- __u64 blkio_delay_total;
-
- /* Delay waiting for page fault I/O (swap in only) */
- __u64 swapin_count;
- __u64 swapin_delay_total;
-
- /* cpu "wall-clock" running time
- * On some architectures, value will adjust for cpu time stolen
- * from the kernel in involuntary waits due to virtualization.
- * Value is cumulative, in nanoseconds, without a corresponding count
- * and wraps around to zero silently on overflow
- */
- __u64 cpu_run_real_total;
-
- /* cpu "virtual" running time
- * Uses time intervals seen by the kernel i.e. no adjustment
- * for kernel's involuntary waits due to virtualization.
- * Value is cumulative, in nanoseconds, without a corresponding count
- * and wraps around to zero silently on overflow
- */
- __u64 cpu_run_virtual_total;
- /* Delay accounting fields end */
- /* version 1 ends here */
-
-
-3) Extended accounting fields
- /* Extended accounting fields start */
-
- /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
- * The current rss usage is added to this counter every time
- * a tick is charged to a task's system time. So, at the end we
- * will have memory usage multiplied by system time. Thus an
- * average usage per system time unit can be calculated.
- */
- __u64 coremem; /* accumulated RSS usage in MB-usec */
-
- /* Accumulated virtual memory usage in duration of a task.
- * Same as acct_rss_mem1 above except that we keep track of VM usage.
- */
- __u64 virtmem; /* accumulated VM usage in MB-usec */
-
- /* High watermark of RSS usage in duration of a task, in KBytes. */
- __u64 hiwater_rss; /* High-watermark of RSS usage */
-
- /* High watermark of VM usage in duration of a task, in KBytes. */
- __u64 hiwater_vm; /* High-water virtual memory usage */
-
- /* The following four fields are I/O statistics of a task. */
- __u64 read_char; /* bytes read */
- __u64 write_char; /* bytes written */
- __u64 read_syscalls; /* read syscalls */
- __u64 write_syscalls; /* write syscalls */
-
- /* Extended accounting fields end */
-
-4) Per-task and per-thread statistics
- __u64 nvcsw; /* Context voluntary switch counter */
- __u64 nivcsw; /* Context involuntary switch counter */
-
-5) Time accounting for SMT machines
- __u64 ac_utimescaled; /* utime scaled on frequency etc */
- __u64 ac_stimescaled; /* stime scaled on frequency etc */
- __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
-
-6) Extended delay accounting fields for memory reclaim
- /* Delay waiting for memory reclaim */
- __u64 freepages_count;
- __u64 freepages_delay_total;
-}
diff --git a/Documentation/accounting/taskstats.rst b/Documentation/accounting/taskstats.rst
new file mode 100644
index 000000000000..2a28b7f55c10
--- /dev/null
+++ b/Documentation/accounting/taskstats.rst
@@ -0,0 +1,180 @@
+=============================
+Per-task statistics interface
+=============================
+
+
+Taskstats is a netlink-based interface for sending per-task and
+per-process statistics from the kernel to userspace.
+
+Taskstats was designed for the following benefits:
+
+- efficiently provide statistics during lifetime of a task and on its exit
+- unified interface for multiple accounting subsystems
+- extensibility for use by future accounting patches
+
+Terminology
+-----------
+
+"pid", "tid" and "task" are used interchangeably and refer to the standard
+Linux task defined by struct task_struct. per-pid stats are the same as
+per-task stats.
+
+"tgid", "process" and "thread group" are used interchangeably and refer to the
+tasks that share an mm_struct i.e. the traditional Unix process. Despite the
+use of tgid, there is no special treatment for the task that is thread group
+leader - a process is deemed alive as long as it has any task belonging to it.
+
+Usage
+-----
+
+To get statistics during a task's lifetime, userspace opens a unicast netlink
+socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
+The response contains statistics for a task (if pid is specified) or the sum of
+statistics for all tasks of the process (if tgid is specified).
+
+To obtain statistics for tasks which are exiting, the userspace listener
+sends a register command and specifies a cpumask. Whenever a task exits on
+one of the cpus in the cpumask, its per-pid statistics are sent to the
+registered listener. Using cpumasks allows the data received by one listener
+to be limited and assists in flow control over the netlink interface and is
+explained in more detail below.
+
+If the exiting task is the last thread exiting its thread group,
+an additional record containing the per-tgid stats is also sent to userspace.
+The latter contains the sum of per-pid stats for all threads in the thread
+group, both past and present.
+
+getdelays.c is a simple utility demonstrating usage of the taskstats interface
+for reporting delay accounting statistics. Users can register cpumasks,
+send commands and process responses, listen for per-tid/tgid exit data,
+write the data received to a file and do basic flow control by increasing
+receive buffer sizes.
+
+Interface
+---------
+
+The user-kernel interface is encapsulated in include/linux/taskstats.h
+
+To avoid this documentation becoming obsolete as the interface evolves, only
+an outline of the current version is given. taskstats.h always overrides the
+description here.
+
+struct taskstats is the common accounting structure for both per-pid and
+per-tgid data. It is versioned and can be extended by each accounting subsystem
+that is added to the kernel. The fields and their semantics are defined in the
+taskstats.h file.
+
+The data exchanged between user and kernel space is a netlink message belonging
+to the NETLINK_GENERIC family and using the netlink attributes interface.
+The messages are in the format::
+
+ +----------+- - -+-------------+-------------------+
+ | nlmsghdr | Pad | genlmsghdr | taskstats payload |
+ +----------+- - -+-------------+-------------------+
+
+
+The taskstats payload is one of the following three kinds:
+
+1. Commands: Sent from user to kernel. Commands to get data on
+a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
+containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
+the task/process for which userspace wants statistics.
+
+Commands to register/deregister interest in exit data from a set of cpus
+consist of one attribute, of type
+TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
+attribute payload. The cpumask is specified as an ascii string of
+comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
+the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
+in cpus before closing the listening socket, the kernel cleans up its interest
+set over time. However, for the sake of efficiency, an explicit deregistration
+is advisable.
+
+2. Response for a command: sent from the kernel in response to a userspace
+command. The payload is a series of three attributes of type:
+
+a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
+a pid/tgid will be followed by some stats.
+
+b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
+are being returned.
+
+c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
+same structure is used for both per-pid and per-tgid stats.
+
+3. New message sent by kernel whenever a task exits. The payload consists of a
+ series of attributes of the following type:
+
+a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
+b) TASKSTATS_TYPE_PID: contains exiting task's pid
+c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
+d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
+e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
+f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
+
+
+per-tgid stats
+--------------
+
+Taskstats provides per-process stats, in addition to per-task stats, since
+resource management is often done at a process granularity and aggregating task
+stats in userspace alone is inefficient and potentially inaccurate (due to lack
+of atomicity).
+
+However, maintaining per-process, in addition to per-task stats, within the
+kernel has space and time overheads. To address this, the taskstats code
+accumulates each exiting task's statistics into a process-wide data structure.
+When the last task of a process exits, the process level data accumulated also
+gets sent to userspace (along with the per-task data).
+
+When a user queries to get per-tgid data, the sum of all other live threads in
+the group is added up and added to the accumulated total for previously exited
+threads of the same thread group.
+
+Extending taskstats
+-------------------
+
+There are two ways to extend the taskstats interface to export more
+per-task/process stats as patches to collect them get added to the kernel
+in future:
+
+1. Adding more fields to the end of the existing struct taskstats. Backward
+ compatibility is ensured by the version number within the
+ structure. Userspace will use only the fields of the struct that correspond
+ to the version its using.
+
+2. Defining separate statistic structs and using the netlink attributes
+ interface to return them. Since userspace processes each netlink attribute
+ independently, it can always ignore attributes whose type it does not
+ understand (because it is using an older version of the interface).
+
+
+Choosing between 1. and 2. is a matter of trading off flexibility and
+overhead. If only a few fields need to be added, then 1. is the preferable
+path since the kernel and userspace don't need to incur the overhead of
+processing new netlink attributes. But if the new fields expand the existing
+struct too much, requiring disparate userspace accounting utilities to
+unnecessarily receive large structures whose fields are of no interest, then
+extending the attributes structure would be worthwhile.
+
+Flow control for taskstats
+--------------------------
+
+When the rate of task exits becomes large, a listener may not be able to keep
+up with the kernel's rate of sending per-tid/tgid exit data leading to data
+loss. This possibility gets compounded when the taskstats structure gets
+extended and the number of cpus grows large.
+
+To avoid losing statistics, userspace should do one or more of the following:
+
+- increase the receive buffer sizes for the netlink sockets opened by
+ listeners to receive exit data.
+
+- create more listeners and reduce the number of cpus being listened to by
+ each listener. In the extreme case, there could be one listener for each cpu.
+ Users may also consider setting the cpu affinity of the listener to the subset
+ of cpus to which it listens, especially if they are listening to just one cpu.
+
+Despite these measures, if the userspace receives ENOBUFS error messages
+indicated overflow of receive buffers, it should take measures to handle the
+loss of data.
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
deleted file mode 100644
index ff06b738bb88..000000000000
--- a/Documentation/accounting/taskstats.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-Per-task statistics interface
------------------------------
-
-
-Taskstats is a netlink-based interface for sending per-task and
-per-process statistics from the kernel to userspace.
-
-Taskstats was designed for the following benefits:
-
-- efficiently provide statistics during lifetime of a task and on its exit
-- unified interface for multiple accounting subsystems
-- extensibility for use by future accounting patches
-
-Terminology
------------
-
-"pid", "tid" and "task" are used interchangeably and refer to the standard
-Linux task defined by struct task_struct. per-pid stats are the same as
-per-task stats.
-
-"tgid", "process" and "thread group" are used interchangeably and refer to the
-tasks that share an mm_struct i.e. the traditional Unix process. Despite the
-use of tgid, there is no special treatment for the task that is thread group
-leader - a process is deemed alive as long as it has any task belonging to it.
-
-Usage
------
-
-To get statistics during a task's lifetime, userspace opens a unicast netlink
-socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
-The response contains statistics for a task (if pid is specified) or the sum of
-statistics for all tasks of the process (if tgid is specified).
-
-To obtain statistics for tasks which are exiting, the userspace listener
-sends a register command and specifies a cpumask. Whenever a task exits on
-one of the cpus in the cpumask, its per-pid statistics are sent to the
-registered listener. Using cpumasks allows the data received by one listener
-to be limited and assists in flow control over the netlink interface and is
-explained in more detail below.
-
-If the exiting task is the last thread exiting its thread group,
-an additional record containing the per-tgid stats is also sent to userspace.
-The latter contains the sum of per-pid stats for all threads in the thread
-group, both past and present.
-
-getdelays.c is a simple utility demonstrating usage of the taskstats interface
-for reporting delay accounting statistics. Users can register cpumasks,
-send commands and process responses, listen for per-tid/tgid exit data,
-write the data received to a file and do basic flow control by increasing
-receive buffer sizes.
-
-Interface
----------
-
-The user-kernel interface is encapsulated in include/linux/taskstats.h
-
-To avoid this documentation becoming obsolete as the interface evolves, only
-an outline of the current version is given. taskstats.h always overrides the
-description here.
-
-struct taskstats is the common accounting structure for both per-pid and
-per-tgid data. It is versioned and can be extended by each accounting subsystem
-that is added to the kernel. The fields and their semantics are defined in the
-taskstats.h file.
-
-The data exchanged between user and kernel space is a netlink message belonging
-to the NETLINK_GENERIC family and using the netlink attributes interface.
-The messages are in the format
-
- +----------+- - -+-------------+-------------------+
- | nlmsghdr | Pad | genlmsghdr | taskstats payload |
- +----------+- - -+-------------+-------------------+
-
-
-The taskstats payload is one of the following three kinds:
-
-1. Commands: Sent from user to kernel. Commands to get data on
-a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
-containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
-the task/process for which userspace wants statistics.
-
-Commands to register/deregister interest in exit data from a set of cpus
-consist of one attribute, of type
-TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
-attribute payload. The cpumask is specified as an ascii string of
-comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
-the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
-in cpus before closing the listening socket, the kernel cleans up its interest
-set over time. However, for the sake of efficiency, an explicit deregistration
-is advisable.
-
-2. Response for a command: sent from the kernel in response to a userspace
-command. The payload is a series of three attributes of type:
-
-a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
-a pid/tgid will be followed by some stats.
-
-b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
-are being returned.
-
-c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
-same structure is used for both per-pid and per-tgid stats.
-
-3. New message sent by kernel whenever a task exits. The payload consists of a
- series of attributes of the following type:
-
-a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
-b) TASKSTATS_TYPE_PID: contains exiting task's pid
-c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
-d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
-e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
-f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
-
-
-per-tgid stats
---------------
-
-Taskstats provides per-process stats, in addition to per-task stats, since
-resource management is often done at a process granularity and aggregating task
-stats in userspace alone is inefficient and potentially inaccurate (due to lack
-of atomicity).
-
-However, maintaining per-process, in addition to per-task stats, within the
-kernel has space and time overheads. To address this, the taskstats code
-accumulates each exiting task's statistics into a process-wide data structure.
-When the last task of a process exits, the process level data accumulated also
-gets sent to userspace (along with the per-task data).
-
-When a user queries to get per-tgid data, the sum of all other live threads in
-the group is added up and added to the accumulated total for previously exited
-threads of the same thread group.
-
-Extending taskstats
--------------------
-
-There are two ways to extend the taskstats interface to export more
-per-task/process stats as patches to collect them get added to the kernel
-in future:
-
-1. Adding more fields to the end of the existing struct taskstats. Backward
- compatibility is ensured by the version number within the
- structure. Userspace will use only the fields of the struct that correspond
- to the version its using.
-
-2. Defining separate statistic structs and using the netlink attributes
- interface to return them. Since userspace processes each netlink attribute
- independently, it can always ignore attributes whose type it does not
- understand (because it is using an older version of the interface).
-
-
-Choosing between 1. and 2. is a matter of trading off flexibility and
-overhead. If only a few fields need to be added, then 1. is the preferable
-path since the kernel and userspace don't need to incur the overhead of
-processing new netlink attributes. But if the new fields expand the existing
-struct too much, requiring disparate userspace accounting utilities to
-unnecessarily receive large structures whose fields are of no interest, then
-extending the attributes structure would be worthwhile.
-
-Flow control for taskstats
---------------------------
-
-When the rate of task exits becomes large, a listener may not be able to keep
-up with the kernel's rate of sending per-tid/tgid exit data leading to data
-loss. This possibility gets compounded when the taskstats structure gets
-extended and the number of cpus grows large.
-
-To avoid losing statistics, userspace should do one or more of the following:
-
-- increase the receive buffer sizes for the netlink sockets opened by
-listeners to receive exit data.
-
-- create more listeners and reduce the number of cpus being listened to by
-each listener. In the extreme case, there could be one listener for each cpu.
-Users may also consider setting the cpu affinity of the listener to the subset
-of cpus to which it listens, especially if they are listening to just one cpu.
-
-Despite these measures, if the userspace receives ENOBUFS error messages
-indicated overflow of receive buffers, it should take measures to handle the
-loss of data.
-
-----
diff --git a/Documentation/acpi/DSD-properties-rules.txt b/Documentation/acpi/DSD-properties-rules.txt
deleted file mode 100644
index 3e4862bdad98..000000000000
--- a/Documentation/acpi/DSD-properties-rules.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-_DSD Device Properties Usage Rules
-----------------------------------
-
-Properties, Property Sets and Property Subsets
-----------------------------------------------
-
-The _DSD (Device Specific Data) configuration object, introduced in ACPI 5.1,
-allows any type of device configuration data to be provided via the ACPI
-namespace. In principle, the format of the data may be arbitrary, but it has to
-be identified by a UUID which must be recognized by the driver processing the
-_DSD output. However, there are generic UUIDs defined for _DSD recognized by
-the ACPI subsystem in the Linux kernel which automatically processes the data
-packages associated with them and makes those data available to device drivers
-as "device properties".
-
-A device property is a data item consisting of a string key and a value (of a
-specific type) associated with it.
-
-In the ACPI _DSD context it is an element of the sub-package following the
-generic Device Properties UUID in the _DSD return package as specified in the
-Device Properties UUID definition document [1].
-
-It also may be regarded as the definition of a key and the associated data type
-that can be returned by _DSD in the Device Properties UUID sub-package for a
-given device.
-
-A property set is a collection of properties applicable to a hardware entity
-like a device. In the ACPI _DSD context it is the set of all properties that
-can be returned in the Device Properties UUID sub-package for the device in
-question.
-
-Property subsets are nested collections of properties. Each of them is
-associated with an additional key (name) allowing the subset to be referred
-to as a whole (and to be treated as a separate entity). The canonical
-representation of property subsets is via the mechanism specified in the
-Hierarchical Properties Extension UUID definition document [2].
-
-Property sets may be hierarchical. That is, a property set may contain
-multiple property subsets that each may contain property subsets of its
-own and so on.
-
-General Validity Rule for Property Sets
----------------------------------------
-
-Valid property sets must follow the guidance given by the Device Properties UUID
-definition document [1].
-
-_DSD properties are intended to be used in addition to, and not instead of, the
-existing mechanisms defined by the ACPI specification. Therefore, as a rule,
-they should only be used if the ACPI specification does not make direct
-provisions for handling the underlying use case. It generally is invalid to
-return property sets which do not follow that rule from _DSD in data packages
-associated with the Device Properties UUID.
-
-Additional Considerations
--------------------------
-
-There are cases in which, even if the general rule given above is followed in
-principle, the property set may still not be regarded as a valid one.
-
-For example, that applies to device properties which may cause kernel code
-(either a device driver or a library/subsystem) to access hardware in a way
-possibly leading to a conflict with AML methods in the ACPI namespace. In
-particular, that may happen if the kernel code uses device properties to
-manipulate hardware normally controlled by ACPI methods related to power
-management, like _PSx and _DSW (for device objects) or _ON and _OFF (for power
-resource objects), or by ACPI device disabling/enabling methods, like _DIS and
-_SRS.
-
-In all cases in which kernel code may do something that will confuse AML as a
-result of using device properties, the device properties in question are not
-suitable for the ACPI environment and consequently they cannot belong to a valid
-property set.
-
-Property Sets and Device Tree Bindings
---------------------------------------
-
-It often is useful to make _DSD return property sets that follow Device Tree
-bindings.
-
-In those cases, however, the above validity considerations must be taken into
-account in the first place and returning invalid property sets from _DSD must be
-avoided. For this reason, it may not be possible to make _DSD return a property
-set following the given DT binding literally and completely. Still, for the
-sake of code re-use, it may make sense to provide as much of the configuration
-data as possible in the form of device properties and complement that with an
-ACPI-specific mechanism suitable for the use case at hand.
-
-In any case, property sets following DT bindings literally should not be
-expected to automatically work in the ACPI environment regardless of their
-contents.
-
-References
-----------
-
-[1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-[2] http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf
diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
deleted file mode 100644
index effe7af3a5af..000000000000
--- a/Documentation/acpi/acpi-lid.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-Special Usage Model of the ACPI Control Method Lid Device
-
-Copyright (C) 2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-Abstract:
-
-Platforms containing lids convey lid state (open/close) to OSPMs using a
-control method lid device. To implement this, the AML tables issue
-Notify(lid_device, 0x80) to notify the OSPMs whenever the lid state has
-changed. The _LID control method for the lid device must be implemented to
-report the "current" state of the lid as either "opened" or "closed".
-
-For most platforms, both the _LID method and the lid notifications are
-reliable. However, there are exceptions. In order to work with these
-exceptional buggy platforms, special restrictions and expections should be
-taken into account. This document describes the restrictions and the
-expections of the Linux ACPI lid device driver.
-
-
-1. Restrictions of the returning value of the _LID control method
-
-The _LID control method is described to return the "current" lid state.
-However the word of "current" has ambiguity, some buggy AML tables return
-the lid state upon the last lid notification instead of returning the lid
-state upon the last _LID evaluation. There won't be difference when the
-_LID control method is evaluated during the runtime, the problem is its
-initial returning value. When the AML tables implement this control method
-with cached value, the initial returning value is likely not reliable.
-There are platforms always retun "closed" as initial lid state.
-
-2. Restrictions of the lid state change notifications
-
-There are buggy AML tables never notifying when the lid device state is
-changed to "opened". Thus the "opened" notification is not guaranteed. But
-it is guaranteed that the AML tables always notify "closed" when the lid
-state is changed to "closed". The "closed" notification is normally used to
-trigger some system power saving operations on Windows. Since it is fully
-tested, it is reliable from all AML tables.
-
-3. Expections for the userspace users of the ACPI lid device driver
-
-The ACPI button driver exports the lid state to the userspace via the
-following file:
- /proc/acpi/button/lid/LID0/state
-This file actually calls the _LID control method described above. And given
-the previous explanation, it is not reliable enough on some platforms. So
-it is advised for the userspace program to not to solely rely on this file
-to determine the actual lid state.
-
-The ACPI button driver emits the following input event to the userspace:
- SW_LID
-The ACPI lid device driver is implemented to try to deliver the platform
-triggered events to the userspace. However, given the fact that the buggy
-firmware cannot make sure "opened"/"closed" events are paired, the ACPI
-button driver uses the following 3 modes in order not to trigger issues.
-
-If the userspace hasn't been prepared to ignore the unreliable "opened"
-events and the unreliable initial state notification, Linux users can use
-the following kernel parameters to handle the possible issues:
-A. button.lid_init_state=method:
- When this option is specified, the ACPI button driver reports the
- initial lid state using the returning value of the _LID control method
- and whether the "opened"/"closed" events are paired fully relies on the
- firmware implementation.
- This option can be used to fix some platforms where the returning value
- of the _LID control method is reliable but the initial lid state
- notification is missing.
- This option is the default behavior during the period the userspace
- isn't ready to handle the buggy AML tables.
-B. button.lid_init_state=open:
- When this option is specified, the ACPI button driver always reports the
- initial lid state as "opened" and whether the "opened"/"closed" events
- are paired fully relies on the firmware implementation.
- This may fix some platforms where the returning value of the _LID
- control method is not reliable and the initial lid state notification is
- missing.
-
-If the userspace has been prepared to ignore the unreliable "opened" events
-and the unreliable initial state notification, Linux users should always
-use the following kernel parameter:
-C. button.lid_init_state=ignore:
- When this option is specified, the ACPI button driver never reports the
- initial lid state and there is a compensation mechanism implemented to
- ensure that the reliable "closed" notifications can always be delievered
- to the userspace by always pairing "closed" input events with complement
- "opened" input events. But there is still no guarantee that the "opened"
- notifications can be delivered to the userspace when the lid is actually
- opens given that some AML tables do not send "opened" notifications
- reliably.
- In this mode, if everything is correctly implemented by the platform
- firmware, the old userspace programs should still work. Otherwise, the
- new userspace programs are required to work with the ACPI button driver.
- This option will be the default behavior after the userspace is ready to
- handle the buggy AML tables.
diff --git a/Documentation/acpi/aml-debugger.txt b/Documentation/acpi/aml-debugger.txt
deleted file mode 100644
index e851cc5de63f..000000000000
--- a/Documentation/acpi/aml-debugger.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-The AML Debugger
-
-Copyright (C) 2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-This document describes the usage of the AML debugger embedded in the Linux
-kernel.
-
-1. Build the debugger
-
- The following kernel configuration items are required to enable the AML
- debugger interface from the Linux kernel:
-
- CONFIG_ACPI_DEBUGGER=y
- CONFIG_ACPI_DEBUGGER_USER=m
-
- The userspace utilities can be built from the kernel source tree using
- the following commands:
-
- $ cd tools
- $ make acpi
-
- The resultant userspace tool binary is then located at:
-
- tools/acpi/power/acpi/acpidbg/acpidbg
-
- It can be installed to system directories by running "make install" (as a
- sufficiently privileged user).
-
-2. Start the userspace debugger interface
-
- After booting the kernel with the debugger built-in, the debugger can be
- started by using the following commands:
-
- # mount -t debugfs none /sys/kernel/debug
- # modprobe acpi_dbg
- # tools/acpi/power/acpi/acpidbg/acpidbg
-
- That spawns the interactive AML debugger environment where you can execute
- debugger commands.
-
- The commands are documented in the "ACPICA Overview and Programmer Reference"
- that can be downloaded from
-
- https://acpica.org/documentation
-
- The detailed debugger commands reference is located in Chapter 12 "ACPICA
- Debugger Reference". The "help" command can be used for a quick reference.
-
-3. Stop the userspace debugger interface
-
- The interactive debugger interface can be closed by pressing Ctrl+C or using
- the "quit" or "exit" commands. When finished, unload the module with:
-
- # rmmod acpi_dbg
-
- The module unloading may fail if there is an acpidbg instance running.
-
-4. Run the debugger in a script
-
- It may be useful to run the AML debugger in a test script. "acpidbg" supports
- this in a special "batch" mode. For example, the following command outputs
- the entire ACPI namespace:
-
- # acpidbg -b "namespace"
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
deleted file mode 100644
index e550c8b98139..000000000000
--- a/Documentation/acpi/apei/einj.txt
+++ /dev/null
@@ -1,177 +0,0 @@
- APEI Error INJection
- ~~~~~~~~~~~~~~~~~~~~
-
-EINJ provides a hardware error injection mechanism. It is very useful
-for debugging and testing APEI and RAS features in general.
-
-You need to check whether your BIOS supports EINJ first. For that, look
-for early boot messages similar to this one:
-
-ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL 00000001 INTL 00000001)
-
-which shows that the BIOS is exposing an EINJ table - it is the
-mechanism through which the injection is done.
-
-Alternatively, look in /sys/firmware/acpi/tables for an "EINJ" file,
-which is a different representation of the same thing.
-
-It doesn't necessarily mean that EINJ is not supported if those above
-don't exist: before you give up, go into BIOS setup to see if the BIOS
-has an option to enable error injection. Look for something called WHEA
-or similar. Often, you need to enable an ACPI5 support option prior, in
-order to see the APEI,EINJ,... functionality supported and exposed by
-the BIOS menu.
-
-To use EINJ, make sure the following are options enabled in your kernel
-configuration:
-
-CONFIG_DEBUG_FS
-CONFIG_ACPI_APEI
-CONFIG_ACPI_APEI_EINJ
-
-The EINJ user interface is in <debugfs mount point>/apei/einj.
-
-The following files belong to it:
-
-- available_error_type
-
- This file shows which error types are supported:
-
- Error Type Value Error Description
- ================ =================
- 0x00000001 Processor Correctable
- 0x00000002 Processor Uncorrectable non-fatal
- 0x00000004 Processor Uncorrectable fatal
- 0x00000008 Memory Correctable
- 0x00000010 Memory Uncorrectable non-fatal
- 0x00000020 Memory Uncorrectable fatal
- 0x00000040 PCI Express Correctable
- 0x00000080 PCI Express Uncorrectable fatal
- 0x00000100 PCI Express Uncorrectable non-fatal
- 0x00000200 Platform Correctable
- 0x00000400 Platform Uncorrectable non-fatal
- 0x00000800 Platform Uncorrectable fatal
-
- The format of the file contents are as above, except present are only
- the available error types.
-
-- error_type
-
- Set the value of the error type being injected. Possible error types
- are defined in the file available_error_type above.
-
-- error_inject
-
- Write any integer to this file to trigger the error injection. Make
- sure you have specified all necessary error parameters, i.e. this
- write should be the last step when injecting errors.
-
-- flags
-
- Present for kernel versions 3.13 and above. Used to specify which
- of param{1..4} are valid and should be used by the firmware during
- injection. Value is a bitmask as specified in ACPI5.0 spec for the
- SET_ERROR_TYPE_WITH_ADDRESS data structure:
-
- Bit 0 - Processor APIC field valid (see param3 below).
- Bit 1 - Memory address and mask valid (param1 and param2).
- Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
-
- If set to zero, legacy behavior is mimicked where the type of
- injection specifies just one bit set, and param1 is multiplexed.
-
-- param1
-
- This file is used to set the first error parameter value. Its effect
- depends on the error type specified in error_type. For example, if
- error type is memory related type, the param1 should be a valid
- physical memory address. [Unless "flag" is set - see above]
-
-- param2
-
- Same use as param1 above. For example, if error type is of memory
- related type, then param2 should be a physical memory address mask.
- Linux requires page or narrower granularity, say, 0xfffffffffffff000.
-
-- param3
-
- Used when the 0x1 bit is set in "flags" to specify the APIC id
-
-- param4
- Used when the 0x4 bit is set in "flags" to specify target PCIe device
-
-- notrigger
-
- The error injection mechanism is a two-step process. First inject the
- error, then perform some actions to trigger it. Setting "notrigger"
- to 1 skips the trigger phase, which *may* allow the user to cause the
- error in some other context by a simple access to the CPU, memory
- location, or device that is the target of the error injection. Whether
- this actually works depends on what operations the BIOS actually
- includes in the trigger phase.
-
-BIOS versions based on the ACPI 4.0 specification have limited options
-in controlling where the errors are injected. Your BIOS may support an
-extension (enabled with the param_extension=1 module parameter, or boot
-command line einj.param_extension=1). This allows the address and mask
-for memory injections to be specified by the param1 and param2 files in
-apei/einj.
-
-BIOS versions based on the ACPI 5.0 specification have more control over
-the target of the injection. For processor-related errors (type 0x1, 0x2
-and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
-param2 for bit 1) so that you have more information added to the error
-signature being injected. The actual data passed is this:
-
- memory_address = param1;
- memory_address_range = param2;
- apicid = param3;
- pcie_sbdf = param4;
-
-For memory errors (type 0x8, 0x10 and 0x20) the address is set using
-param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
-express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
-function are specified using param1:
-
- 31 24 23 16 15 11 10 8 7 0
- +-------------------------------------------------+
- | segment | bus | device | function | reserved |
- +-------------------------------------------------+
-
-Anyway, you get the idea, if there's doubt just take a look at the code
-in drivers/acpi/apei/einj.c.
-
-An ACPI 5.0 BIOS may also allow vendor-specific errors to be injected.
-In this case a file named vendor will contain identifying information
-from the BIOS that hopefully will allow an application wishing to use
-the vendor-specific extension to tell that they are running on a BIOS
-that supports it. All vendor extensions have the 0x80000000 bit set in
-error_type. A file vendor_flags controls the interpretation of param1
-and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor
-documentation for details (and expect changes to this API if vendors
-creativity in using this feature expands beyond our expectations).
-
-
-An error injection example:
-
-# cd /sys/kernel/debug/apei/einj
-# cat available_error_type # See which errors can be injected
-0x00000002 Processor Uncorrectable non-fatal
-0x00000008 Memory Correctable
-0x00000010 Memory Uncorrectable non-fatal
-# echo 0x12345000 > param1 # Set memory address for injection
-# echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page
-# echo 0x8 > error_type # Choose correctable memory error
-# echo 1 > error_inject # Inject now
-
-You should see something like this in dmesg:
-
-[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
-[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
-[22715.834759] EDAC sbridge MC3: TSC 0
-[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
-[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
-[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
-
-For more information about EINJ, please refer to ACPI specification
-version 4.0, section 17.5 and ACPI 5.0, section 18.6.
diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
deleted file mode 100644
index 0c49c197c47a..000000000000
--- a/Documentation/acpi/apei/output_format.txt
+++ /dev/null
@@ -1,147 +0,0 @@
- APEI output format
- ~~~~~~~~~~~~~~~~~~
-
-APEI uses printk as hardware error reporting interface, the output
-format is as follow.
-
-<error record> :=
-APEI generic hardware error status
-severity: <integer>, <severity string>
-section: <integer>, severity: <integer>, <severity string>
-flags: <integer>
-<section flags strings>
-fru_id: <uuid string>
-fru_text: <string>
-section_type: <section type string>
-<section data>
-
-<severity string>* := recoverable | fatal | corrected | info
-
-<section flags strings># :=
-[primary][, containment warning][, reset][, threshold exceeded]\
-[, resource not accessible][, latent error]
-
-<section type string> := generic processor error | memory error | \
-PCIe error | unknown, <uuid string>
-
-<section data> :=
-<generic processor section data> | <memory section data> | \
-<pcie section data> | <null>
-
-<generic processor section data> :=
-[processor_type: <integer>, <proc type string>]
-[processor_isa: <integer>, <proc isa string>]
-[error_type: <integer>
-<proc error type strings>]
-[operation: <integer>, <proc operation string>]
-[flags: <integer>
-<proc flags strings>]
-[level: <integer>]
-[version_info: <integer>]
-[processor_id: <integer>]
-[target_address: <integer>]
-[requestor_id: <integer>]
-[responder_id: <integer>]
-[IP: <integer>]
-
-<proc type string>* := IA32/X64 | IA64
-
-<proc isa string>* := IA32 | IA64 | X64
-
-<processor error type strings># :=
-[cache error][, TLB error][, bus error][, micro-architectural error]
-
-<proc operation string>* := unknown or generic | data read | data write | \
-instruction execution
-
-<proc flags strings># :=
-[restartable][, precise IP][, overflow][, corrected]
-
-<memory section data> :=
-[error_status: <integer>]
-[physical_address: <integer>]
-[physical_address_mask: <integer>]
-[node: <integer>]
-[card: <integer>]
-[module: <integer>]
-[bank: <integer>]
-[device: <integer>]
-[row: <integer>]
-[column: <integer>]
-[bit_position: <integer>]
-[requestor_id: <integer>]
-[responder_id: <integer>]
-[target_id: <integer>]
-[error_type: <integer>, <mem error type string>]
-
-<mem error type string>* :=
-unknown | no error | single-bit ECC | multi-bit ECC | \
-single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
-target abort | parity error | watchdog timeout | invalid address | \
-mirror Broken | memory sparing | scrub corrected error | \
-scrub uncorrected error
-
-<pcie section data> :=
-[port_type: <integer>, <pcie port type string>]
-[version: <integer>.<integer>]
-[command: <integer>, status: <integer>]
-[device_id: <integer>:<integer>:<integer>.<integer>
-slot: <integer>
-secondary_bus: <integer>
-vendor_id: <integer>, device_id: <integer>
-class_code: <integer>]
-[serial number: <integer>, <integer>]
-[bridge: secondary_status: <integer>, control: <integer>]
-[aer_status: <integer>, aer_mask: <integer>
-<aer status string>
-[aer_uncor_severity: <integer>]
-aer_layer=<aer layer string>, aer_agent=<aer agent string>
-aer_tlp_header: <integer> <integer> <integer> <integer>]
-
-<pcie port type string>* := PCIe end point | legacy PCI end point | \
-unknown | unknown | root port | upstream switch port | \
-downstream switch port | PCIe to PCI/PCI-X bridge | \
-PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
-root complex event collector
-
-if section severity is fatal or recoverable
-<aer status string># :=
-unknown | unknown | unknown | unknown | Data Link Protocol | \
-unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
-Poisoned TLP | Flow Control Protocol | Completion Timeout | \
-Completer Abort | Unexpected Completion | Receiver Overflow | \
-Malformed TLP | ECRC | Unsupported Request
-else
-<aer status string># :=
-Receiver Error | unknown | unknown | unknown | unknown | unknown | \
-Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
-Replay Timer Timeout | Advisory Non-Fatal
-fi
-
-<aer layer string> :=
-Physical Layer | Data Link Layer | Transaction Layer
-
-<aer agent string> :=
-Receiver ID | Requester ID | Completer ID | Transmitter ID
-
-Where, [] designate corresponding content is optional
-
-All <field string> description with * has the following format:
-
-field: <integer>, <field string>
-
-Where value of <integer> should be the position of "string" in <field
-string> description. Otherwise, <field string> will be "unknown".
-
-All <field strings> description with # has the following format:
-
-field: <integer>
-<field strings>
-
-Where each string in <fields strings> corresponding to one set bit of
-<integer>. The bit position is the position of "string" in <field
-strings> description.
-
-For more detailed explanation of every field, please refer to UEFI
-specification version 2.3 or later, section Appendix N: Common
-Platform Error Record.
diff --git a/Documentation/acpi/cppc_sysfs.txt b/Documentation/acpi/cppc_sysfs.txt
deleted file mode 100644
index f20fb445135d..000000000000
--- a/Documentation/acpi/cppc_sysfs.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-
- Collaborative Processor Performance Control (CPPC)
-
-CPPC defined in the ACPI spec describes a mechanism for the OS to manage the
-performance of a logical processor on a contigious and abstract performance
-scale. CPPC exposes a set of registers to describe abstract performance scale,
-to request performance levels and to measure per-cpu delivered performance.
-
-For more details on CPPC please refer to the ACPI specification at:
-
-http://uefi.org/specifications
-
-Some of the CPPC registers are exposed via sysfs under:
-
-/sys/devices/system/cpu/cpuX/acpi_cppc/
-
-for each cpu X
-
---------------------------------------------------------------------------------
-
-$ ls -lR /sys/devices/system/cpu/cpu0/acpi_cppc/
-/sys/devices/system/cpu/cpu0/acpi_cppc/:
-total 0
--r--r--r-- 1 root root 65536 Mar 5 19:38 feedback_ctrs
--r--r--r-- 1 root root 65536 Mar 5 19:38 highest_perf
--r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_freq
--r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_nonlinear_perf
--r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_perf
--r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_freq
--r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_perf
--r--r--r-- 1 root root 65536 Mar 5 19:38 reference_perf
--r--r--r-- 1 root root 65536 Mar 5 19:38 wraparound_time
-
---------------------------------------------------------------------------------
-
-* highest_perf : Highest performance of this processor (abstract scale).
-* nominal_perf : Highest sustained performance of this processor (abstract scale).
-* lowest_nonlinear_perf : Lowest performance of this processor with nonlinear
- power savings (abstract scale).
-* lowest_perf : Lowest performance of this processor (abstract scale).
-
-* lowest_freq : CPU frequency corresponding to lowest_perf (in MHz).
-* nominal_freq : CPU frequency corresponding to nominal_perf (in MHz).
- The above frequencies should only be used to report processor performance in
- freqency instead of abstract scale. These values should not be used for any
- functional decisions.
-
-* feedback_ctrs : Includes both Reference and delivered performance counter.
- Reference counter ticks up proportional to processor's reference performance.
- Delivered counter ticks up proportional to processor's delivered performance.
-* wraparound_time: Minimum time for the feedback counters to wraparound (seconds).
-* reference_perf : Performance level at which reference performance counter
- accumulates (abstract scale).
-
---------------------------------------------------------------------------------
-
- Computing Average Delivered Performance
-
-Below describes the steps to compute the average performance delivered by taking
-two different snapshots of feedback counters at time T1 and T2.
-
-T1: Read feedback_ctrs as fbc_t1
- Wait or run some workload
-T2: Read feedback_ctrs as fbc_t2
-
-delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
-reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
-
-delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
diff --git a/Documentation/acpi/debug.txt b/Documentation/acpi/debug.txt
deleted file mode 100644
index 65bf47c46b6d..000000000000
--- a/Documentation/acpi/debug.txt
+++ /dev/null
@@ -1,148 +0,0 @@
- ACPI Debug Output
-
-
-The ACPI CA, the Linux ACPI core, and some ACPI drivers can generate debug
-output. This document describes how to use this facility.
-
-Compile-time configuration
---------------------------
-
-ACPI debug output is globally enabled by CONFIG_ACPI_DEBUG. If this config
-option is turned off, the debug messages are not even built into the
-kernel.
-
-Boot- and run-time configuration
---------------------------------
-
-When CONFIG_ACPI_DEBUG=y, you can select the component and level of messages
-you're interested in. At boot-time, use the acpi.debug_layer and
-acpi.debug_level kernel command line options. After boot, you can use the
-debug_layer and debug_level files in /sys/module/acpi/parameters/ to control
-the debug messages.
-
-debug_layer (component)
------------------------
-
-The "debug_layer" is a mask that selects components of interest, e.g., a
-specific driver or part of the ACPI interpreter. To build the debug_layer
-bitmask, look for the "#define _COMPONENT" in an ACPI source file.
-
-You can set the debug_layer mask at boot-time using the acpi.debug_layer
-command line argument, and you can change it after boot by writing values
-to /sys/module/acpi/parameters/debug_layer.
-
-The possible components are defined in include/acpi/acoutput.h and
-include/acpi/acpi_drivers.h. Reading /sys/module/acpi/parameters/debug_layer
-shows the supported mask values, currently these:
-
- ACPI_UTILITIES 0x00000001
- ACPI_HARDWARE 0x00000002
- ACPI_EVENTS 0x00000004
- ACPI_TABLES 0x00000008
- ACPI_NAMESPACE 0x00000010
- ACPI_PARSER 0x00000020
- ACPI_DISPATCHER 0x00000040
- ACPI_EXECUTER 0x00000080
- ACPI_RESOURCES 0x00000100
- ACPI_CA_DEBUGGER 0x00000200
- ACPI_OS_SERVICES 0x00000400
- ACPI_CA_DISASSEMBLER 0x00000800
- ACPI_COMPILER 0x00001000
- ACPI_TOOLS 0x00002000
- ACPI_BUS_COMPONENT 0x00010000
- ACPI_AC_COMPONENT 0x00020000
- ACPI_BATTERY_COMPONENT 0x00040000
- ACPI_BUTTON_COMPONENT 0x00080000
- ACPI_SBS_COMPONENT 0x00100000
- ACPI_FAN_COMPONENT 0x00200000
- ACPI_PCI_COMPONENT 0x00400000
- ACPI_POWER_COMPONENT 0x00800000
- ACPI_CONTAINER_COMPONENT 0x01000000
- ACPI_SYSTEM_COMPONENT 0x02000000
- ACPI_THERMAL_COMPONENT 0x04000000
- ACPI_MEMORY_DEVICE_COMPONENT 0x08000000
- ACPI_VIDEO_COMPONENT 0x10000000
- ACPI_PROCESSOR_COMPONENT 0x20000000
-
-debug_level
------------
-
-The "debug_level" is a mask that selects different types of messages, e.g.,
-those related to initialization, method execution, informational messages, etc.
-To build debug_level, look at the level specified in an ACPI_DEBUG_PRINT()
-statement.
-
-The ACPI interpreter uses several different levels, but the Linux
-ACPI core and ACPI drivers generally only use ACPI_LV_INFO.
-
-You can set the debug_level mask at boot-time using the acpi.debug_level
-command line argument, and you can change it after boot by writing values
-to /sys/module/acpi/parameters/debug_level.
-
-The possible levels are defined in include/acpi/acoutput.h. Reading
-/sys/module/acpi/parameters/debug_level shows the supported mask values,
-currently these:
-
- ACPI_LV_INIT 0x00000001
- ACPI_LV_DEBUG_OBJECT 0x00000002
- ACPI_LV_INFO 0x00000004
- ACPI_LV_INIT_NAMES 0x00000020
- ACPI_LV_PARSE 0x00000040
- ACPI_LV_LOAD 0x00000080
- ACPI_LV_DISPATCH 0x00000100
- ACPI_LV_EXEC 0x00000200
- ACPI_LV_NAMES 0x00000400
- ACPI_LV_OPREGION 0x00000800
- ACPI_LV_BFIELD 0x00001000
- ACPI_LV_TABLES 0x00002000
- ACPI_LV_VALUES 0x00004000
- ACPI_LV_OBJECTS 0x00008000
- ACPI_LV_RESOURCES 0x00010000
- ACPI_LV_USER_REQUESTS 0x00020000
- ACPI_LV_PACKAGE 0x00040000
- ACPI_LV_ALLOCATIONS 0x00100000
- ACPI_LV_FUNCTIONS 0x00200000
- ACPI_LV_OPTIMIZATIONS 0x00400000
- ACPI_LV_MUTEX 0x01000000
- ACPI_LV_THREADS 0x02000000
- ACPI_LV_IO 0x04000000
- ACPI_LV_INTERRUPTS 0x08000000
- ACPI_LV_AML_DISASSEMBLE 0x10000000
- ACPI_LV_VERBOSE_INFO 0x20000000
- ACPI_LV_FULL_TABLES 0x40000000
- ACPI_LV_EVENTS 0x80000000
-
-Examples
---------
-
-For example, drivers/acpi/bus.c contains this:
-
- #define _COMPONENT ACPI_BUS_COMPONENT
- ...
- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device insertion detected\n"));
-
-To turn on this message, set the ACPI_BUS_COMPONENT bit in acpi.debug_layer
-and the ACPI_LV_INFO bit in acpi.debug_level. (The ACPI_DEBUG_PRINT
-statement uses ACPI_DB_INFO, which is macro based on the ACPI_LV_INFO
-definition.)
-
-Enable all AML "Debug" output (stores to the Debug object while interpreting
-AML) during boot:
-
- acpi.debug_layer=0xffffffff acpi.debug_level=0x2
-
-Enable PCI and PCI interrupt routing debug messages:
-
- acpi.debug_layer=0x400000 acpi.debug_level=0x4
-
-Enable all ACPI hardware-related messages:
-
- acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
-
-Enable all ACPI_DB_INFO messages after boot:
-
- # echo 0x4 > /sys/module/acpi/parameters/debug_level
-
-Show all valid component values:
-
- # cat /sys/module/acpi/parameters/debug_layer
diff --git a/Documentation/acpi/dsd/data-node-references.txt b/Documentation/acpi/dsd/data-node-references.txt
deleted file mode 100644
index c3871565c8cf..000000000000
--- a/Documentation/acpi/dsd/data-node-references.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-Copyright (C) 2018 Intel Corporation
-Author: Sakari Ailus <sakari.ailus@linux.intel.com>
-
-
-Referencing hierarchical data nodes
------------------------------------
-
-ACPI in general allows referring to device objects in the tree only.
-Hierarchical data extension nodes may not be referred to directly, hence this
-document defines a scheme to implement such references.
-
-A reference consist of the device object name followed by one or more
-hierarchical data extension [1] keys. Specifically, the hierarchical data
-extension node which is referred to by the key shall lie directly under the
-parent object i.e. either the device object or another hierarchical data
-extension node.
-
-The keys in the hierarchical data nodes shall consist of the name of the node,
-"@" character and the number of the node in hexadecimal notation (without pre-
-or postfixes). The same ACPI object shall include the _DSD property extension
-with a property "reg" that shall have the same numerical value as the number of
-the node.
-
-In case a hierarchical data extensions node has no numerical value, then the
-"reg" property shall be omitted from the ACPI object's _DSD properties and the
-"@" character and the number shall be omitted from the hierarchical data
-extension key.
-
-
-Example
--------
-
- In the ASL snippet below, the "reference" _DSD property [2] contains a
- device object reference to DEV0 and under that device object, a
- hierarchical data extension key "node@1" referring to the NOD1 object
- and lastly, a hierarchical data extension key "anothernode" referring to
- the ANOD object which is also the final target node of the reference.
-
- Device (DEV0)
- {
- Name (_DSD, Package () {
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "node@0", NOD0 },
- Package () { "node@1", NOD1 },
- }
- })
- Name (NOD0, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "random-property", 3 },
- }
- })
- Name (NOD1, Package() {
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "anothernode", ANOD },
- }
- })
- Name (ANOD, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "random-property", 0 },
- }
- })
- }
-
- Device (DEV1)
- {
- Name (_DSD, Package () {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reference", ^DEV0, "node@1", "anothernode" },
- }
- })
- }
-
-Please also see a graph example in graph.txt .
-
-References
-----------
-
-[1] Hierarchical Data Extension UUID For _DSD.
- <URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
- referenced 2018-07-17.
-
-[2] Device Properties UUID For _DSD.
- <URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
- referenced 2016-10-04.
diff --git a/Documentation/acpi/dsd/graph.txt b/Documentation/acpi/dsd/graph.txt
deleted file mode 100644
index b9ce910781dc..000000000000
--- a/Documentation/acpi/dsd/graph.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-Graphs
-
-
-_DSD
-----
-
-_DSD (Device Specific Data) [7] is a predefined ACPI device
-configuration object that can be used to convey information on
-hardware features which are not specifically covered by the ACPI
-specification [1][6]. There are two _DSD extensions that are relevant
-for graphs: property [4] and hierarchical data extensions [5]. The
-property extension provides generic key-value pairs whereas the
-hierarchical data extension supports nodes with references to other
-nodes, forming a tree. The nodes in the tree may contain properties as
-defined by the property extension. The two extensions together provide
-a tree-like structure with zero or more properties (key-value pairs)
-in each node of the tree.
-
-The data structure may be accessed at runtime by using the device_*
-and fwnode_* functions defined in include/linux/fwnode.h .
-
-Fwnode represents a generic firmware node object. It is independent on
-the firmware type. In ACPI, fwnodes are _DSD hierarchical data
-extensions objects. A device's _DSD object is represented by an
-fwnode.
-
-The data structure may be referenced to elsewhere in the ACPI tables
-by using a hard reference to the device itself and an index to the
-hierarchical data extension array on each depth.
-
-
-Ports and endpoints
--------------------
-
-The port and endpoint concepts are very similar to those in Devicetree
-[3]. A port represents an interface in a device, and an endpoint
-represents a connection to that interface.
-
-All port nodes are located under the device's "_DSD" node in the hierarchical
-data extension tree. The data extension related to each port node must begin
-with "port" and must be followed by the "@" character and the number of the port
-as its key. The target object it refers to should be called "PRTX", where "X" is
-the number of the port. An example of such a package would be:
-
- Package() { "port@4", PRT4 }
-
-Further on, endpoints are located under the port nodes. The hierarchical
-data extension key of the endpoint nodes must begin with
-"endpoint" and must be followed by the "@" character and the number of the
-endpoint. The object it refers to should be called "EPXY", where "X" is the
-number of the port and "Y" is the number of the endpoint. An example of such a
-package would be:
-
- Package() { "endpoint@0", EP40 }
-
-Each port node contains a property extension key "port", the value of which is
-the number of the port. Each endpoint is similarly numbered with a property
-extension key "reg", the value of which is the number of the endpoint. Port
-numbers must be unique within a device and endpoint numbers must be unique
-within a port. If a device object may only has a single port, then the number
-of that port shall be zero. Similarly, if a port may only have a single
-endpoint, the number of that endpoint shall be zero.
-
-The endpoint reference uses property extension with "remote-endpoint" property
-name followed by a reference in the same package. Such references consist of the
-the remote device reference, the first package entry of the port data extension
-reference under the device and finally the first package entry of the endpoint
-data extension reference under the port. Individual references thus appear as:
-
- Package() { device, "port@X", "endpoint@Y" }
-
-In the above example, "X" is the number of the port and "Y" is the number of the
-endpoint.
-
-The references to endpoints must be always done both ways, to the
-remote endpoint and back from the referred remote endpoint node.
-
-A simple example of this is show below:
-
- Scope (\_SB.PCI0.I2C2)
- {
- Device (CAM0)
- {
- Name (_DSD, Package () {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "compatible", Package () { "nokia,smia" } },
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "port@0", PRT0 },
- }
- })
- Name (PRT0, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "endpoint@0", EP00 },
- }
- })
- Name (EP00, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
- }
- })
- }
- }
-
- Scope (\_SB.PCI0)
- {
- Device (ISP)
- {
- Name (_DSD, Package () {
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "port@4", PRT4 },
- }
- })
-
- Name (PRT4, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 4 }, /* CSI-2 port number */
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "endpoint@0", EP40 },
- }
- })
-
- Name (EP40, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
- }
- })
- }
- }
-
-Here, the port 0 of the "CAM0" device is connected to the port 4 of
-the "ISP" device and vice versa.
-
-
-References
-----------
-
-[1] _DSD (Device Specific Data) Implementation Guide.
- <URL:http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel-1_1.htm>,
- referenced 2016-10-03.
-
-[2] Devicetree. <URL:http://www.devicetree.org>, referenced 2016-10-03.
-
-[3] Documentation/devicetree/bindings/graph.txt
-
-[4] Device Properties UUID For _DSD.
- <URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
- referenced 2016-10-04.
-
-[5] Hierarchical Data Extension UUID For _DSD.
- <URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
- referenced 2016-10-04.
-
-[6] Advanced Configuration and Power Interface Specification.
- <URL:http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf>,
- referenced 2016-10-04.
-
-[7] _DSD Device Properties Usage Rules.
- Documentation/acpi/DSD-properties-rules.txt
diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/acpi/dsdt-override.txt
deleted file mode 100644
index 784841caa6e6..000000000000
--- a/Documentation/acpi/dsdt-override.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Linux supports a method of overriding the BIOS DSDT:
-
-CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
-
-When to use this method is described in detail on the
-Linux/ACPI home page:
-https://01.org/linux-acpi/documentation/overriding-dsdt
diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
deleted file mode 100644
index 7bcf9c3d9fbe..000000000000
--- a/Documentation/acpi/enumeration.txt
+++ /dev/null
@@ -1,426 +0,0 @@
-ACPI based device enumeration
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus,
-SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave
-devices behind serial bus controllers.
-
-In addition we are starting to see peripherals integrated in the
-SoC/Chipset to appear only in ACPI namespace. These are typically devices
-that are accessed through memory-mapped registers.
-
-In order to support this and re-use the existing drivers as much as
-possible we decided to do following:
-
- o Devices that have no bus connector resource are represented as
- platform devices.
-
- o Devices behind real busses where there is a connector resource
- are represented as struct spi_device or struct i2c_device
- (standard UARTs are not busses so there is no struct uart_device).
-
-As both ACPI and Device Tree represent a tree of devices (and their
-resources) this implementation follows the Device Tree way as much as
-possible.
-
-The ACPI implementation enumerates devices behind busses (platform, SPI and
-I2C), creates the physical devices and binds them to their ACPI handle in
-the ACPI namespace.
-
-This means that when ACPI_HANDLE(dev) returns non-NULL the device was
-enumerated from ACPI namespace. This handle can be used to extract other
-device-specific configuration. There is an example of this below.
-
-Platform bus support
-~~~~~~~~~~~~~~~~~~~~
-Since we are using platform devices to represent devices that are not
-connected to any physical bus we only need to implement a platform driver
-for the device and add supported ACPI IDs. If this same IP-block is used on
-some other non-ACPI platform, the driver might work out of the box or needs
-some minor changes.
-
-Adding ACPI support for an existing driver should be pretty
-straightforward. Here is the simplest example:
-
- #ifdef CONFIG_ACPI
- static const struct acpi_device_id mydrv_acpi_match[] = {
- /* ACPI IDs here */
- { }
- };
- MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match);
- #endif
-
- static struct platform_driver my_driver = {
- ...
- .driver = {
- .acpi_match_table = ACPI_PTR(mydrv_acpi_match),
- },
- };
-
-If the driver needs to perform more complex initialization like getting and
-configuring GPIOs it can get its ACPI handle and extract this information
-from ACPI tables.
-
-DMA support
-~~~~~~~~~~~
-DMA controllers enumerated via ACPI should be registered in the system to
-provide generic access to their resources. For example, a driver that would
-like to be accessible to slave devices via generic API call
-dma_request_slave_channel() must register itself at the end of the probe
-function like this:
-
- err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
- /* Handle the error if it's not a case of !CONFIG_ACPI */
-
-and implement custom xlate function if needed (usually acpi_dma_simple_xlate()
-is enough) which converts the FixedDMA resource provided by struct
-acpi_dma_spec into the corresponding DMA channel. A piece of code for that case
-could look like:
-
- #ifdef CONFIG_ACPI
- struct filter_args {
- /* Provide necessary information for the filter_func */
- ...
- };
-
- static bool filter_func(struct dma_chan *chan, void *param)
- {
- /* Choose the proper channel */
- ...
- }
-
- static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
- struct acpi_dma *adma)
- {
- dma_cap_mask_t cap;
- struct filter_args args;
-
- /* Prepare arguments for filter_func */
- ...
- return dma_request_channel(cap, filter_func, &args);
- }
- #else
- static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
- struct acpi_dma *adma)
- {
- return NULL;
- }
- #endif
-
-dma_request_slave_channel() will call xlate_func() for each registered DMA
-controller. In the xlate function the proper channel must be chosen based on
-information in struct acpi_dma_spec and the properties of the controller
-provided by struct acpi_dma.
-
-Clients must call dma_request_slave_channel() with the string parameter that
-corresponds to a specific FixedDMA resource. By default "tx" means the first
-entry of the FixedDMA resource array, "rx" means the second entry. The table
-below shows a layout:
-
- Device (I2C0)
- {
- ...
- Method (_CRS, 0, NotSerialized)
- {
- Name (DBUF, ResourceTemplate ()
- {
- FixedDMA (0x0018, 0x0004, Width32bit, _Y48)
- FixedDMA (0x0019, 0x0005, Width32bit, )
- })
- ...
- }
- }
-
-So, the FixedDMA with request line 0x0018 is "tx" and next one is "rx" in
-this example.
-
-In robust cases the client unfortunately needs to call
-acpi_dma_request_slave_chan_by_index() directly and therefore choose the
-specific FixedDMA resource by its index.
-
-SPI serial bus support
-~~~~~~~~~~~~~~~~~~~~~~
-Slave devices behind SPI bus have SpiSerialBus resource attached to them.
-This is extracted automatically by the SPI core and the slave devices are
-enumerated once spi_register_master() is called by the bus driver.
-
-Here is what the ACPI namespace for a SPI slave might look like:
-
- Device (EEP0)
- {
- Name (_ADR, 1)
- Name (_CID, Package() {
- "ATML0025",
- "AT25",
- })
- ...
- Method (_CRS, 0, NotSerialized)
- {
- SPISerialBus(1, PolarityLow, FourWireMode, 8,
- ControllerInitiated, 1000000, ClockPolarityLow,
- ClockPhaseFirst, "\\_SB.PCI0.SPI1",)
- }
- ...
-
-The SPI device drivers only need to add ACPI IDs in a similar way than with
-the platform device drivers. Below is an example where we add ACPI support
-to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
-
- #ifdef CONFIG_ACPI
- static const struct acpi_device_id at25_acpi_match[] = {
- { "AT25", 0 },
- { },
- };
- MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
- #endif
-
- static struct spi_driver at25_driver = {
- .driver = {
- ...
- .acpi_match_table = ACPI_PTR(at25_acpi_match),
- },
- };
-
-Note that this driver actually needs more information like page size of the
-eeprom etc. but at the time writing this there is no standard way of
-passing those. One idea is to return this in _DSM method like:
-
- Device (EEP0)
- {
- ...
- Method (_DSM, 4, NotSerialized)
- {
- Store (Package (6)
- {
- "byte-len", 1024,
- "addr-mode", 2,
- "page-size, 32
- }, Local0)
-
- // Check UUIDs etc.
-
- Return (Local0)
- }
-
-Then the at25 SPI driver can get this configuration by calling _DSM on its
-ACPI handle like:
-
- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_object_list input;
- acpi_status status;
-
- /* Fill in the input buffer */
-
- status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM",
- &input, &output);
- if (ACPI_FAILURE(status))
- /* Handle the error */
-
- /* Extract the data here */
-
- kfree(output.pointer);
-
-I2C serial bus support
-~~~~~~~~~~~~~~~~~~~~~~
-The slaves behind I2C bus controller only need to add the ACPI IDs like
-with the platform and SPI drivers. The I2C core automatically enumerates
-any slave devices behind the controller device once the adapter is
-registered.
-
-Below is an example of how to add ACPI support to the existing mpu3050
-input driver:
-
- #ifdef CONFIG_ACPI
- static const struct acpi_device_id mpu3050_acpi_match[] = {
- { "MPU3050", 0 },
- { },
- };
- MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
- #endif
-
- static struct i2c_driver mpu3050_i2c_driver = {
- .driver = {
- .name = "mpu3050",
- .owner = THIS_MODULE,
- .pm = &mpu3050_pm,
- .of_match_table = mpu3050_of_match,
- .acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
- },
- .probe = mpu3050_probe,
- .remove = mpu3050_remove,
- .id_table = mpu3050_ids,
- };
-
-GPIO support
-~~~~~~~~~~~~
-ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
-and GpioInt. These resources can be used to pass GPIO numbers used by
-the device to the driver. ACPI 5.1 extended this with _DSD (Device
-Specific Data) which made it possible to name the GPIOs among other things.
-
-For example:
-
-Device (DEV)
-{
- Method (_CRS, 0, NotSerialized)
- {
- Name (SBUF, ResourceTemplate()
- {
- ...
- // Used to power on/off the device
- GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
- IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
- 0x00, ResourceConsumer,,)
- {
- // Pin List
- 0x0055
- }
-
- // Interrupt for the device
- GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
- 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
- {
- // Pin list
- 0x0058
- }
-
- ...
-
- }
-
- Return (SBUF)
- }
-
- // ACPI 5.1 _DSD used for naming the GPIOs
- Name (_DSD, Package ()
- {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package ()
- {
- Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
- Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
- }
- })
- ...
-
-These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
-specifies the path to the controller. In order to use these GPIOs in Linux
-we need to translate them to the corresponding Linux GPIO descriptors.
-
-There is a standard GPIO API for that and is documented in
-Documentation/gpio/.
-
-In the above example we can get the corresponding two GPIO descriptors with
-a code like this:
-
- #include <linux/gpio/consumer.h>
- ...
-
- struct gpio_desc *irq_desc, *power_desc;
-
- irq_desc = gpiod_get(dev, "irq");
- if (IS_ERR(irq_desc))
- /* handle error */
-
- power_desc = gpiod_get(dev, "power");
- if (IS_ERR(power_desc))
- /* handle error */
-
- /* Now we can use the GPIO descriptors */
-
-There are also devm_* versions of these functions which release the
-descriptors once the device is released.
-
-See Documentation/acpi/gpio-properties.txt for more information about the
-_DSD binding related to GPIOs.
-
-MFD devices
-~~~~~~~~~~~
-The MFD devices register their children as platform devices. For the child
-devices there needs to be an ACPI handle that they can use to reference
-parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
-we provide two ways:
-
- o The children share the parent ACPI handle.
- o The MFD cell can specify the ACPI id of the device.
-
-For the first case, the MFD drivers do not need to do anything. The
-resulting child platform device will have its ACPI_COMPANION() set to point
-to the parent device.
-
-If the ACPI namespace has a device that we can match using an ACPI id or ACPI
-adr, the cell should be set like:
-
- static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
- .pnpid = "XYZ0001",
- .adr = 0,
- };
-
- static struct mfd_cell my_subdevice_cell = {
- .name = "my_subdevice",
- /* set the resources relative to the parent */
- .acpi_match = &my_subdevice_cell_acpi_match,
- };
-
-The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
-the MFD device and if found, that ACPI companion device is bound to the
-resulting child platform device.
-
-Device Tree namespace link device ID
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The Device Tree protocol uses device identification based on the "compatible"
-property whose value is a string or an array of strings recognized as device
-identifiers by drivers and the driver core. The set of all those strings may be
-regarded as a device identification namespace analogous to the ACPI/PNP device
-ID namespace. Consequently, in principle it should not be necessary to allocate
-a new (and arguably redundant) ACPI/PNP device ID for a devices with an existing
-identification string in the Device Tree (DT) namespace, especially if that ID
-is only needed to indicate that a given device is compatible with another one,
-presumably having a matching driver in the kernel already.
-
-In ACPI, the device identification object called _CID (Compatible ID) is used to
-list the IDs of devices the given one is compatible with, but those IDs must
-belong to one of the namespaces prescribed by the ACPI specification (see
-Section 6.1.2 of ACPI 6.0 for details) and the DT namespace is not one of them.
-Moreover, the specification mandates that either a _HID or an _ADR identification
-object be present for all ACPI objects representing devices (Section 6.1 of ACPI
-6.0). For non-enumerable bus types that object must be _HID and its value must
-be a device ID from one of the namespaces prescribed by the specification too.
-
-The special DT namespace link device ID, PRP0001, provides a means to use the
-existing DT-compatible device identification in ACPI and to satisfy the above
-requirements following from the ACPI specification at the same time. Namely,
-if PRP0001 is returned by _HID, the ACPI subsystem will look for the
-"compatible" property in the device object's _DSD and will use the value of that
-property to identify the corresponding device in analogy with the original DT
-device identification algorithm. If the "compatible" property is not present
-or its value is not valid, the device will not be enumerated by the ACPI
-subsystem. Otherwise, it will be enumerated automatically as a platform device
-(except when an I2C or SPI link from the device to its parent is present, in
-which case the ACPI core will leave the device enumeration to the parent's
-driver) and the identification strings from the "compatible" property value will
-be used to find a driver for the device along with the device IDs listed by _CID
-(if present).
-
-Analogously, if PRP0001 is present in the list of device IDs returned by _CID,
-the identification strings listed by the "compatible" property value (if present
-and valid) will be used to look for a driver matching the device, but in that
-case their relative priority with respect to the other device IDs listed by
-_HID and _CID depends on the position of PRP0001 in the _CID return package.
-Specifically, the device IDs returned by _HID and preceding PRP0001 in the _CID
-return package will be checked first. Also in that case the bus type the device
-will be enumerated to depends on the device ID returned by _HID.
-
-It is valid to define device objects with a _HID returning PRP0001 and without
-the "compatible" property in the _DSD or a _CID as long as one of their
-ancestors provides a _DSD with a valid "compatible" property. Such device
-objects are then simply regarded as additional "blocks" providing hierarchical
-configuration information to the driver of the composite ancestor device.
-
-However, PRP0001 can only be returned from either _HID or _CID of a device
-object if all of the properties returned by the _DSD associated with it (either
-the _DSD of the device object itself or the _DSD of its ancestor in the
-"composite device" case described above) can be used in the ACPI environment.
-Otherwise, the _DSD itself is regarded as invalid and therefore the "compatible"
-property returned by it is meaningless.
-
-Refer to DSD-properties-rules.txt for more information.
diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt
deleted file mode 100644
index 88c65cb5bf0a..000000000000
--- a/Documentation/acpi/gpio-properties.txt
+++ /dev/null
@@ -1,223 +0,0 @@
-_DSD Device Properties Related to GPIO
---------------------------------------
-
-With the release of ACPI 5.1, the _DSD configuration object finally
-allows names to be given to GPIOs (and other things as well) returned
-by _CRS. Previously, we were only able to use an integer index to find
-the corresponding GPIO, which is pretty error prone (it depends on
-the _CRS output ordering, for example).
-
-With _DSD we can now query GPIOs using a name instead of an integer
-index, like the ASL example below shows:
-
- // Bluetooth device with reset and shutdown GPIOs
- Device (BTH)
- {
- Name (_HID, ...)
-
- Name (_CRS, ResourceTemplate ()
- {
- GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly,
- "\\_SB.GPO0", 0, ResourceConsumer) {15}
- GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly,
- "\\_SB.GPO0", 0, ResourceConsumer) {27, 31}
- })
-
- Name (_DSD, Package ()
- {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package ()
- {
- Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
- Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
- }
- })
- }
-
-The format of the supported GPIO property is:
-
- Package () { "name", Package () { ref, index, pin, active_low }}
-
- ref - The device that has _CRS containing GpioIo()/GpioInt() resources,
- typically this is the device itself (BTH in our case).
- index - Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
- pin - Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
- active_low - If 1 the GPIO is marked as active_low.
-
-Since ACPI GpioIo() resource does not have a field saying whether it is
-active low or high, the "active_low" argument can be used here. Setting
-it to 1 marks the GPIO as active low.
-
-In our Bluetooth example the "reset-gpios" refers to the second GpioIo()
-resource, second pin in that resource with the GPIO number of 31.
-
-It is possible to leave holes in the array of GPIOs. This is useful in
-cases like with SPI host controllers where some chip selects may be
-implemented as GPIOs and some as native signals. For example a SPI host
-controller can have chip selects 0 and 2 implemented as GPIOs and 1 as
-native:
-
- Package () {
- "cs-gpios",
- Package () {
- ^GPIO, 19, 0, 0, // chip select 0: GPIO
- 0, // chip select 1: native signal
- ^GPIO, 20, 0, 0, // chip select 2: GPIO
- }
- }
-
-Other supported properties
---------------------------
-
-Following Device Tree compatible device properties are also supported by
-_DSD device properties for GPIO controllers:
-
-- gpio-hog
-- output-high
-- output-low
-- input
-- line-name
-
-Example:
-
- Name (_DSD, Package () {
- // _DSD Hierarchical Properties Extension UUID
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () {"hog-gpio8", "G8PU"}
- }
- })
-
- Name (G8PU, Package () {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () {"gpio-hog", 1},
- Package () {"gpios", Package () {8, 0}},
- Package () {"output-high", 1},
- Package () {"line-name", "gpio8-pullup"},
- }
- })
-
-- gpio-line-names
-
-Example:
-
- Package () {
- "gpio-line-names",
- Package () {
- "SPI0_CS_N", "EXP2_INT", "MUX6_IO", "UART0_RXD", "MUX7_IO",
- "LVL_C_A1", "MUX0_IO", "SPI1_MISO"
- }
- }
-
-See Documentation/devicetree/bindings/gpio/gpio.txt for more information
-about these properties.
-
-ACPI GPIO Mappings Provided by Drivers
---------------------------------------
-
-There are systems in which the ACPI tables do not contain _DSD but provide _CRS
-with GpioIo()/GpioInt() resources and device drivers still need to work with
-them.
-
-In those cases ACPI device identification objects, _HID, _CID, _CLS, _SUB, _HRV,
-available to the driver can be used to identify the device and that is supposed
-to be sufficient to determine the meaning and purpose of all of the GPIO lines
-listed by the GpioIo()/GpioInt() resources returned by _CRS. In other words,
-the driver is supposed to know what to use the GpioIo()/GpioInt() resources for
-once it has identified the device. Having done that, it can simply assign names
-to the GPIO lines it is going to use and provide the GPIO subsystem with a
-mapping between those names and the ACPI GPIO resources corresponding to them.
-
-To do that, the driver needs to define a mapping table as a NULL-terminated
-array of struct acpi_gpio_mapping objects that each contain a name, a pointer
-to an array of line data (struct acpi_gpio_params) objects and the size of that
-array. Each struct acpi_gpio_params object consists of three fields,
-crs_entry_index, line_index, active_low, representing the index of the target
-GpioIo()/GpioInt() resource in _CRS starting from zero, the index of the target
-line in that resource starting from zero, and the active-low flag for that line,
-respectively, in analogy with the _DSD GPIO property format specified above.
-
-For the example Bluetooth device discussed previously the data structures in
-question would look like this:
-
-static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
-static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
-
-static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
- { "reset-gpios", &reset_gpio, 1 },
- { "shutdown-gpios", &shutdown_gpio, 1 },
- { },
-};
-
-Next, the mapping table needs to be passed as the second argument to
-acpi_dev_add_driver_gpios() that will register it with the ACPI device object
-pointed to by its first argument. That should be done in the driver's .probe()
-routine. On removal, the driver should unregister its GPIO mapping table by
-calling acpi_dev_remove_driver_gpios() on the ACPI device object where that
-table was previously registered.
-
-Using the _CRS fallback
------------------------
-
-If a device does not have _DSD or the driver does not create ACPI GPIO
-mapping, the Linux GPIO framework refuses to return any GPIOs. This is
-because the driver does not know what it actually gets. For example if we
-have a device like below:
-
- Device (BTH)
- {
- Name (_HID, ...)
-
- Name (_CRS, ResourceTemplate () {
- GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
- "\\_SB.GPO0", 0, ResourceConsumer) {15}
- GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
- "\\_SB.GPO0", 0, ResourceConsumer) {27}
- })
- }
-
-The driver might expect to get the right GPIO when it does:
-
- desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
-
-but since there is no way to know the mapping between "reset" and
-the GpioIo() in _CRS desc will hold ERR_PTR(-ENOENT).
-
-The driver author can solve this by passing the mapping explictly
-(the recommended way and documented in the above chapter).
-
-The ACPI GPIO mapping tables should not contaminate drivers that are not
-knowing about which exact device they are servicing on. It implies that
-the ACPI GPIO mapping tables are hardly linked to ACPI ID and certain
-objects, as listed in the above chapter, of the device in question.
-
-Getting GPIO descriptor
------------------------
-
-There are two main approaches to get GPIO resource from ACPI:
- desc = gpiod_get(dev, connection_id, flags);
- desc = gpiod_get_index(dev, connection_id, index, flags);
-
-We may consider two different cases here, i.e. when connection ID is
-provided and otherwise.
-
-Case 1:
- desc = gpiod_get(dev, "non-null-connection-id", flags);
- desc = gpiod_get_index(dev, "non-null-connection-id", index, flags);
-
-Case 2:
- desc = gpiod_get(dev, NULL, flags);
- desc = gpiod_get_index(dev, NULL, index, flags);
-
-Case 1 assumes that corresponding ACPI device description must have
-defined device properties and will prevent to getting any GPIO resources
-otherwise.
-
-Case 2 explicitly tells GPIO core to look for resources in _CRS.
-
-Be aware that gpiod_get_index() in cases 1 and 2, assuming that there
-are two versions of ACPI device description provided and no mapping is
-present in the driver, will return different resources. That's why a
-certain driver has to handle them carefully as explained in previous
-chapter.
diff --git a/Documentation/acpi/i2c-muxes.txt b/Documentation/acpi/i2c-muxes.txt
deleted file mode 100644
index 9fcc4f0b885e..000000000000
--- a/Documentation/acpi/i2c-muxes.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-ACPI I2C Muxes
---------------
-
-Describing an I2C device hierarchy that includes I2C muxes requires an ACPI
-Device () scope per mux channel.
-
-Consider this topology:
-
-+------+ +------+
-| SMB1 |-->| MUX0 |--CH00--> i2c client A (0x50)
-| | | 0x70 |--CH01--> i2c client B (0x50)
-+------+ +------+
-
-which corresponds to the following ASL:
-
-Device (SMB1)
-{
- Name (_HID, ...)
- Device (MUX0)
- {
- Name (_HID, ...)
- Name (_CRS, ResourceTemplate () {
- I2cSerialBus (0x70, ControllerInitiated, I2C_SPEED,
- AddressingMode7Bit, "^SMB1", 0x00,
- ResourceConsumer,,)
- }
-
- Device (CH00)
- {
- Name (_ADR, 0)
-
- Device (CLIA)
- {
- Name (_HID, ...)
- Name (_CRS, ResourceTemplate () {
- I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
- AddressingMode7Bit, "^CH00", 0x00,
- ResourceConsumer,,)
- }
- }
- }
-
- Device (CH01)
- {
- Name (_ADR, 1)
-
- Device (CLIB)
- {
- Name (_HID, ...)
- Name (_CRS, ResourceTemplate () {
- I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
- AddressingMode7Bit, "^CH01", 0x00,
- ResourceConsumer,,)
- }
- }
- }
- }
-}
diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
deleted file mode 100644
index eb651a6aa285..000000000000
--- a/Documentation/acpi/initrd_table_override.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-Upgrading ACPI tables via initrd
-================================
-
-1) Introduction (What is this about)
-2) What is this for
-3) How does it work
-4) References (Where to retrieve userspace tools)
-
-1) What is this about
----------------------
-
-If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
-upgrade the ACPI execution environment that is defined by the ACPI tables
-via upgrading the ACPI tables provided by the BIOS with an instrumented,
-modified, more recent version one, or installing brand new ACPI tables.
-
-For a full list of ACPI tables that can be upgraded/installed, take a look
-at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
-drivers/acpi/tables.c.
-All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
-be overridable, except:
- - ACPI_SIG_RSDP (has a signature of 6 bytes)
- - ACPI_SIG_FACS (does not have an ordinary ACPI table header)
-Both could get implemented as well.
-
-
-2) What is this for
--------------------
-
-Complain to your platform/BIOS vendor if you find a bug which is so severe
-that a workaround is not accepted in the Linux kernel. And this facility
-allows you to upgrade the buggy tables before your platform/BIOS vendor
-releases an upgraded BIOS binary.
-
-This facility can be used by platform/BIOS vendors to provide a Linux
-compatible environment without modifying the underlying platform firmware.
-
-This facility also provides a powerful feature to easily debug and test
-ACPI BIOS table compatibility with the Linux kernel by modifying old
-platform provided ACPI tables or inserting new ACPI tables.
-
-It can and should be enabled in any kernel because there is no functional
-change with not instrumented initrds.
-
-
-3) How does it work
--------------------
-
-# Extract the machine's ACPI tables:
-cd /tmp
-acpidump >acpidump
-acpixtract -a acpidump
-# Disassemble, modify and recompile them:
-iasl -d *.dat
-# For example add this statement into a _PRT (PCI Routing Table) function
-# of the DSDT:
-Store("HELLO WORLD", debug)
-# And increase the OEM Revision. For example, before modification:
-DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
-# After modification:
-DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
-iasl -sa dsdt.dsl
-# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the cpio
-# archive. Note that if the table put here matches a platform table
-# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
-# with a more recent OEM Revision, the platform table will be upgraded by
-# this table. If the table put here doesn't match a platform table
-# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
-# ID), this table will be appended.
-mkdir -p kernel/firmware/acpi
-cp dsdt.aml kernel/firmware/acpi
-# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
-# (see osl.c):
-iasl -sa facp.dsl
-iasl -sa ssdt1.dsl
-cp facp.aml kernel/firmware/acpi
-cp ssdt1.aml kernel/firmware/acpi
-# The uncompressed cpio archive must be the first. Other, typically
-# compressed cpio archives, must be concatenated on top of the uncompressed
-# one. Following command creates the uncompressed cpio archive and
-# concatenates the original initrd on top:
-find kernel | cpio -H newc --create > /boot/instrumented_initrd
-cat /boot/initrd >>/boot/instrumented_initrd
-# reboot with increased acpi debug level, e.g. boot params:
-acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
-# and check your syslog:
-[ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
-[ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD"
-
-iasl is able to disassemble and recompile quite a lot different,
-also static ACPI tables.
-
-
-4) Where to retrieve userspace tools
-------------------------------------
-
-iasl and acpixtract are part of Intel's ACPICA project:
-http://acpica.org/
-and should be packaged by distributions (for example in the acpica package
-on SUSE).
-
-acpidump can be found in Len Browns pmtools:
-ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
-This tool is also part of the acpica package on SUSE.
-Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
-/sys/firmware/acpi/tables
diff --git a/Documentation/acpi/linuxized-acpica.txt b/Documentation/acpi/linuxized-acpica.txt
deleted file mode 100644
index 3ad7b0dfb083..000000000000
--- a/Documentation/acpi/linuxized-acpica.txt
+++ /dev/null
@@ -1,262 +0,0 @@
-Linuxized ACPICA - Introduction to ACPICA Release Automation
-
-Copyright (C) 2013-2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-Abstract:
-
-This document describes the ACPICA project and the relationship between
-ACPICA and Linux. It also describes how ACPICA code in drivers/acpi/acpica,
-include/acpi and tools/power/acpi is automatically updated to follow the
-upstream.
-
-
-1. ACPICA Project
-
- The ACPI Component Architecture (ACPICA) project provides an operating
- system (OS)-independent reference implementation of the Advanced
- Configuration and Power Interface Specification (ACPI). It has been
- adapted by various host OSes. By directly integrating ACPICA, Linux can
- also benefit from the application experiences of ACPICA from other host
- OSes.
-
- The homepage of ACPICA project is: www.acpica.org, it is maintained and
- supported by Intel Corporation.
-
- The following figure depicts the Linux ACPI subsystem where the ACPICA
- adaptation is included:
-
- +---------------------------------------------------------+
- | |
- | +---------------------------------------------------+ |
- | | +------------------+ | |
- | | | Table Management | | |
- | | +------------------+ | |
- | | +----------------------+ | |
- | | | Namespace Management | | |
- | | +----------------------+ | |
- | | +------------------+ ACPICA Components | |
- | | | Event Management | | |
- | | +------------------+ | |
- | | +---------------------+ | |
- | | | Resource Management | | |
- | | +---------------------+ | |
- | | +---------------------+ | |
- | | | Hardware Management | | |
- | | +---------------------+ | |
- | +---------------------------------------------------+ | |
- | | | +------------------+ | | |
- | | | | OS Service Layer | | | |
- | | | +------------------+ | | |
- | | +-------------------------------------------------|-+ |
- | | +--------------------+ | |
- | | | Device Enumeration | | |
- | | +--------------------+ | |
- | | +------------------+ | |
- | | | Power Management | | |
- | | +------------------+ Linux/ACPI Components | |
- | | +--------------------+ | |
- | | | Thermal Management | | |
- | | +--------------------+ | |
- | | +--------------------------+ | |
- | | | Drivers for ACPI Devices | | |
- | | +--------------------------+ | |
- | | +--------+ | |
- | | | ...... | | |
- | | +--------+ | |
- | +---------------------------------------------------+ |
- | |
- +---------------------------------------------------------+
-
- Figure 1. Linux ACPI Software Components
-
- NOTE:
- A. OS Service Layer - Provided by Linux to offer OS dependent
- implementation of the predefined ACPICA interfaces (acpi_os_*).
- include/acpi/acpiosxf.h
- drivers/acpi/osl.c
- include/acpi/platform
- include/asm/acenv.h
- B. ACPICA Functionality - Released from ACPICA code base to offer
- OS independent implementation of the ACPICA interfaces (acpi_*).
- drivers/acpi/acpica
- include/acpi/ac*.h
- tools/power/acpi
- C. Linux/ACPI Functionality - Providing Linux specific ACPI
- functionality to the other Linux kernel subsystems and user space
- programs.
- drivers/acpi
- include/linux/acpi.h
- include/linux/acpi*.h
- include/acpi
- tools/power/acpi
- D. Architecture Specific ACPICA/ACPI Functionalities - Provided by the
- ACPI subsystem to offer architecture specific implementation of the
- ACPI interfaces. They are Linux specific components and are out of
- the scope of this document.
- include/asm/acpi.h
- include/asm/acpi*.h
- arch/*/acpi
-
-2. ACPICA Release
-
- The ACPICA project maintains its code base at the following repository URL:
- https://github.com/acpica/acpica.git. As a rule, a release is made every
- month.
-
- As the coding style adopted by the ACPICA project is not acceptable by
- Linux, there is a release process to convert the ACPICA git commits into
- Linux patches. The patches generated by this process are referred to as
- "linuxized ACPICA patches". The release process is carried out on a local
- copy the ACPICA git repository. Each commit in the monthly release is
- converted into a linuxized ACPICA patch. Together, they form the monthly
- ACPICA release patchset for the Linux ACPI community. This process is
- illustrated in the following figure:
-
- +-----------------------------+
- | acpica / master (-) commits |
- +-----------------------------+
- /|\ |
- | \|/
- | /---------------------\ +----------------------+
- | < Linuxize repo Utility >-->| old linuxized acpica |--+
- | \---------------------/ +----------------------+ |
- | |
- /---------\ |
- < git reset > \
- \---------/ \
- /|\ /+-+
- | / |
- +-----------------------------+ | |
- | acpica / master (+) commits | | |
- +-----------------------------+ | |
- | | |
- \|/ | |
- /-----------------------\ +----------------------+ | |
- < Linuxize repo Utilities >-->| new linuxized acpica |--+ |
- \-----------------------/ +----------------------+ |
- \|/
- +--------------------------+ /----------------------\
- | Linuxized ACPICA Patches |<----------------< Linuxize patch Utility >
- +--------------------------+ \----------------------/
- |
- \|/
- /---------------------------\
- < Linux ACPI Community Review >
- \---------------------------/
- |
- \|/
- +-----------------------+ /------------------\ +----------------+
- | linux-pm / linux-next |-->< Linux Merge Window >-->| linux / master |
- +-----------------------+ \------------------/ +----------------+
-
- Figure 2. ACPICA -> Linux Upstream Process
-
- NOTE:
- A. Linuxize Utilities - Provided by the ACPICA repository, including a
- utility located in source/tools/acpisrc folder and a number of
- scripts located in generate/linux folder.
- B. acpica / master - "master" branch of the git repository at
- <https://github.com/acpica/acpica.git>.
- C. linux-pm / linux-next - "linux-next" branch of the git repository at
- <http://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git>.
- D. linux / master - "master" branch of the git repository at
- <http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git>.
-
- Before the linuxized ACPICA patches are sent to the Linux ACPI community
- for review, there is a quality assurance build test process to reduce
- porting issues. Currently this build process only takes care of the
- following kernel configuration options:
- CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER
-
-3. ACPICA Divergences
-
- Ideally, all of the ACPICA commits should be converted into Linux patches
- automatically without manual modifications, the "linux / master" tree should
- contain the ACPICA code that exactly corresponds to the ACPICA code
- contained in "new linuxized acpica" tree and it should be possible to run
- the release process fully automatically.
-
- As a matter of fact, however, there are source code differences between
- the ACPICA code in Linux and the upstream ACPICA code, referred to as
- "ACPICA Divergences".
-
- The various sources of ACPICA divergences include:
- 1. Legacy divergences - Before the current ACPICA release process was
- established, there already had been divergences between Linux and
- ACPICA. Over the past several years those divergences have been greatly
- reduced, but there still are several ones and it takes time to figure
- out the underlying reasons for their existence.
- 2. Manual modifications - Any manual modification (eg. coding style fixes)
- made directly in the Linux sources obviously hurts the ACPICA release
- automation. Thus it is recommended to fix such issues in the ACPICA
- upstream source code and generate the linuxized fix using the ACPICA
- release utilities (please refer to Section 4 below for the details).
- 3. Linux specific features - Sometimes it's impossible to use the
- current ACPICA APIs to implement features required by the Linux kernel,
- so Linux developers occasionally have to change ACPICA code directly.
- Those changes may not be acceptable by ACPICA upstream and in such cases
- they are left as committed ACPICA divergences unless the ACPICA side can
- implement new mechanisms as replacements for them.
- 4. ACPICA release fixups - ACPICA only tests commits using a set of the
- user space simulation utilities, thus the linuxized ACPICA patches may
- break the Linux kernel, leaving us build/boot failures. In order to
- avoid breaking Linux bisection, fixes are applied directly to the
- linuxized ACPICA patches during the release process. When the release
- fixups are backported to the upstream ACPICA sources, they must follow
- the upstream ACPICA rules and so further modifications may appear.
- That may result in the appearance of new divergences.
- 5. Fast tracking of ACPICA commits - Some ACPICA commits are regression
- fixes or stable-candidate material, so they are applied in advance with
- respect to the ACPICA release process. If such commits are reverted or
- rebased on the ACPICA side in order to offer better solutions, new ACPICA
- divergences are generated.
-
-4. ACPICA Development
-
- This paragraph guides Linux developers to use the ACPICA upstream release
- utilities to obtain Linux patches corresponding to upstream ACPICA commits
- before they become available from the ACPICA release process.
-
- 1. Cherry-pick an ACPICA commit
-
- First you need to git clone the ACPICA repository and the ACPICA change
- you want to cherry pick must be committed into the local repository.
-
- Then the gen-patch.sh command can help to cherry-pick an ACPICA commit
- from the ACPICA local repository:
-
- $ git clone https://github.com/acpica/acpica
- $ cd acpica
- $ generate/linux/gen-patch.sh -u [commit ID]
-
- Here the commit ID is the ACPICA local repository commit ID you want to
- cherry pick. It can be omitted if the commit is "HEAD".
-
- 2. Cherry-pick recent ACPICA commits
-
- Sometimes you need to rebase your code on top of the most recent ACPICA
- changes that haven't been applied to Linux yet.
-
- You can generate the ACPICA release series yourself and rebase your code on
- top of the generated ACPICA release patches:
-
- $ git clone https://github.com/acpica/acpica
- $ cd acpica
- $ generate/linux/make-patches.sh -u [commit ID]
-
- The commit ID should be the last ACPICA commit accepted by Linux. Usually,
- it is the commit modifying ACPI_CA_VERSION. It can be found by executing
- "git blame source/include/acpixf.h" and referencing the line that contains
- "ACPI_CA_VERSION".
-
- 3. Inspect the current divergences
-
- If you have local copies of both Linux and upstream ACPICA, you can generate
- a diff file indicating the state of the current divergences:
-
- # git clone https://github.com/acpica/acpica
- # git clone http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
- # cd acpica
- # generate/linux/divergences.sh -s ../linux
diff --git a/Documentation/acpi/lpit.txt b/Documentation/acpi/lpit.txt
deleted file mode 100644
index b426398d2e97..000000000000
--- a/Documentation/acpi/lpit.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-To enumerate platform Low Power Idle states, Intel platforms are using
-“Low Power Idle Table” (LPIT). More details about this table can be
-downloaded from:
-http://www.uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf
-
-Residencies for each low power state can be read via FFH
-(Function fixed hardware) or a memory mapped interface.
-
-On platforms supporting S0ix sleep states, there can be two types of
-residencies:
-- CPU PKG C10 (Read via FFH interface)
-- Platform Controller Hub (PCH) SLP_S0 (Read via memory mapped interface)
-
-The following attributes are added dynamically to the cpuidle
-sysfs attribute group:
- /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
- /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
-
-The "low_power_idle_cpu_residency_us" attribute shows time spent
-by the CPU package in PKG C10
-
-The "low_power_idle_system_residency_us" attribute shows SLP_S0
-residency, or system time spent with the SLP_S0# signal asserted.
-This is the lowest possible system power state, achieved only when CPU is in
-PKG C10 and all functional blocks in PCH are in a low power state.
diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt
deleted file mode 100644
index 7235da975f23..000000000000
--- a/Documentation/acpi/method-customizing.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-Linux ACPI Custom Control Method How To
-=======================================
-
-Written by Zhang Rui <rui.zhang@intel.com>
-
-
-Linux supports customizing ACPI control methods at runtime.
-
-Users can use this to
-1. override an existing method which may not work correctly,
- or just for debugging purposes.
-2. insert a completely new method in order to create a missing
- method such as _OFF, _ON, _STA, _INI, etc.
-For these cases, it is far simpler to dynamically install a single
-control method rather than override the entire DSDT, because kernel
-rebuild/reboot is not needed and test result can be got in minutes.
-
-Note: Only ACPI METHOD can be overridden, any other object types like
- "Device", "OperationRegion", are not recognized. Methods
- declared inside scope operators are also not supported.
-Note: The same ACPI control method can be overridden for many times,
- and it's always the latest one that used by Linux/kernel.
-Note: To get the ACPI debug object output (Store (AAAA, Debug)),
- please run "echo 1 > /sys/module/acpi/parameters/aml_debug_output".
-
-1. override an existing method
- a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
- just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
- b) disassemble the table by running "iasl -d dsdt.dat".
- c) rewrite the ASL code of the method and save it in a new file,
- d) package the new file (psr.asl) to an ACPI table format.
- Here is an example of a customized \_SB._AC._PSR method,
-
- DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
- {
- Method (\_SB_.AC._PSR, 0, NotSerialized)
- {
- Store ("In AC _PSR", Debug)
- Return (ACON)
- }
- }
- Note that the full pathname of the method in ACPI namespace
- should be used.
- e) assemble the file to generate the AML code of the method.
- e.g. "iasl -vw 6084 psr.asl" (psr.aml is generated as a result)
- If parameter "-vw 6084" is not supported by your iASL compiler,
- please try a newer version.
- f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
- g) override the old method via the debugfs by running
- "cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
-
-2. insert a new method
- This is easier than overriding an existing method.
- We just need to create the ASL code of the method we want to
- insert and then follow the step c) ~ g) in section 1.
-
-3. undo your changes
- The "undo" operation is not supported for a new inserted method
- right now, i.e. we can not remove a method currently.
- For an overridden method, in order to undo your changes, please
- save a copy of the method original ASL code in step c) section 1,
- and redo step c) ~ g) to override the method with the original one.
-
-
-Note: We can use a kernel with multiple custom ACPI method running,
- But each individual write to debugfs can implement a SINGLE
- method override. i.e. if we want to insert/override multiple
- ACPI methods, we need to redo step c) ~ g) for multiple times.
-
-Note: Be aware that root can mis-use this driver to modify arbitrary
- memory and gain additional rights, if root's privileges got
- restricted (for example if root is not allowed to load additional
- modules after boot).
diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt
deleted file mode 100644
index 0aba14c8f459..000000000000
--- a/Documentation/acpi/method-tracing.txt
+++ /dev/null
@@ -1,192 +0,0 @@
-ACPICA Trace Facility
-
-Copyright (C) 2015, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-Abstract:
-
-This document describes the functions and the interfaces of the method
-tracing facility.
-
-1. Functionalities and usage examples:
-
- ACPICA provides method tracing capability. And two functions are
- currently implemented using this capability.
-
- A. Log reducer
- ACPICA subsystem provides debugging outputs when CONFIG_ACPI_DEBUG is
- enabled. The debugging messages which are deployed via
- ACPI_DEBUG_PRINT() macro can be reduced at 2 levels - per-component
- level (known as debug layer, configured via
- /sys/module/acpi/parameters/debug_layer) and per-type level (known as
- debug level, configured via /sys/module/acpi/parameters/debug_level).
-
- But when the particular layer/level is applied to the control method
- evaluations, the quantity of the debugging outputs may still be too
- large to be put into the kernel log buffer. The idea thus is worked out
- to only enable the particular debug layer/level (normally more detailed)
- logs when the control method evaluation is started, and disable the
- detailed logging when the control method evaluation is stopped.
-
- The following command examples illustrate the usage of the "log reducer"
- functionality:
- a. Filter out the debug layer/level matched logs when control methods
- are being evaluated:
- # cd /sys/module/acpi/parameters
- # echo "0xXXXXXXXX" > trace_debug_layer
- # echo "0xYYYYYYYY" > trace_debug_level
- # echo "enable" > trace_state
- b. Filter out the debug layer/level matched logs when the specified
- control method is being evaluated:
- # cd /sys/module/acpi/parameters
- # echo "0xXXXXXXXX" > trace_debug_layer
- # echo "0xYYYYYYYY" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "method" > /sys/module/acpi/parameters/trace_state
- c. Filter out the debug layer/level matched logs when the specified
- control method is being evaluated for the first time:
- # cd /sys/module/acpi/parameters
- # echo "0xXXXXXXXX" > trace_debug_layer
- # echo "0xYYYYYYYY" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "method-once" > /sys/module/acpi/parameters/trace_state
- Where:
- 0xXXXXXXXX/0xYYYYYYYY: Refer to Documentation/acpi/debug.txt for
- possible debug layer/level masking values.
- \PPPP.AAAA.TTTT.HHHH: Full path of a control method that can be found
- in the ACPI namespace. It needn't be an entry
- of a control method evaluation.
-
- B. AML tracer
-
- There are special log entries added by the method tracing facility at
- the "trace points" the AML interpreter starts/stops to execute a control
- method, or an AML opcode. Note that the format of the log entries are
- subject to change:
- [ 0.186427] exdebug-0398 ex_trace_point : Method Begin [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
- [ 0.186630] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905c88:If] execution.
- [ 0.186820] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:LEqual] execution.
- [ 0.187010] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905a20:-NamePath-] execution.
- [ 0.187214] exdebug-0398 ex_trace_point : Opcode End [0xf5905a20:-NamePath-] execution.
- [ 0.187407] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
- [ 0.187594] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
- [ 0.187789] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:LEqual] execution.
- [ 0.187980] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:Return] execution.
- [ 0.188146] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
- [ 0.188334] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
- [ 0.188524] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:Return] execution.
- [ 0.188712] exdebug-0398 ex_trace_point : Opcode End [0xf5905c88:If] execution.
- [ 0.188903] exdebug-0398 ex_trace_point : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
-
- Developers can utilize these special log entries to track the AML
- interpretion, thus can aid issue debugging and performance tuning. Note
- that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT()
- macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling
- "AML tracer" logs.
-
- The following command examples illustrate the usage of the "AML tracer"
- functionality:
- a. Filter out the method start/stop "AML tracer" logs when control
- methods are being evaluated:
- # cd /sys/module/acpi/parameters
- # echo "0x80" > trace_debug_layer
- # echo "0x10" > trace_debug_level
- # echo "enable" > trace_state
- b. Filter out the method start/stop "AML tracer" when the specified
- control method is being evaluated:
- # cd /sys/module/acpi/parameters
- # echo "0x80" > trace_debug_layer
- # echo "0x10" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "method" > trace_state
- c. Filter out the method start/stop "AML tracer" logs when the specified
- control method is being evaluated for the first time:
- # cd /sys/module/acpi/parameters
- # echo "0x80" > trace_debug_layer
- # echo "0x10" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "method-once" > trace_state
- d. Filter out the method/opcode start/stop "AML tracer" when the
- specified control method is being evaluated:
- # cd /sys/module/acpi/parameters
- # echo "0x80" > trace_debug_layer
- # echo "0x10" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "opcode" > trace_state
- e. Filter out the method/opcode start/stop "AML tracer" when the
- specified control method is being evaluated for the first time:
- # cd /sys/module/acpi/parameters
- # echo "0x80" > trace_debug_layer
- # echo "0x10" > trace_debug_level
- # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
- # echo "opcode-opcode" > trace_state
-
- Note that all above method tracing facility related module parameters can
- be used as the boot parameters, for example:
- acpi.trace_debug_layer=0x80 acpi.trace_debug_level=0x10 \
- acpi.trace_method_name=\_SB.LID0._LID acpi.trace_state=opcode-once
-
-2. Interface descriptions:
-
- All method tracing functions can be configured via ACPI module
- parameters that are accessible at /sys/module/acpi/parameters/:
-
- trace_method_name
- The full path of the AML method that the user wants to trace.
- Note that the full path shouldn't contain the trailing "_"s in its
- name segments but may contain "\" to form an absolute path.
-
- trace_debug_layer
- The temporary debug_layer used when the tracing feature is enabled.
- Using ACPI_EXECUTER (0x80) by default, which is the debug_layer
- used to match all "AML tracer" logs.
-
- trace_debug_level
- The temporary debug_level used when the tracing feature is enabled.
- Using ACPI_LV_TRACE_POINT (0x10) by default, which is the
- debug_level used to match all "AML tracer" logs.
-
- trace_state
- The status of the tracing feature.
- Users can enable/disable this debug tracing feature by executing
- the following command:
- # echo string > /sys/module/acpi/parameters/trace_state
- Where "string" should be one of the following:
- "disable"
- Disable the method tracing feature.
- "enable"
- Enable the method tracing feature.
- ACPICA debugging messages matching
- "trace_debug_layer/trace_debug_level" during any method
- execution will be logged.
- "method"
- Enable the method tracing feature.
- ACPICA debugging messages matching
- "trace_debug_layer/trace_debug_level" during method execution
- of "trace_method_name" will be logged.
- "method-once"
- Enable the method tracing feature.
- ACPICA debugging messages matching
- "trace_debug_layer/trace_debug_level" during method execution
- of "trace_method_name" will be logged only once.
- "opcode"
- Enable the method tracing feature.
- ACPICA debugging messages matching
- "trace_debug_layer/trace_debug_level" during method/opcode
- execution of "trace_method_name" will be logged.
- "opcode-once"
- Enable the method tracing feature.
- ACPICA debugging messages matching
- "trace_debug_layer/trace_debug_level" during method/opcode
- execution of "trace_method_name" will be logged only once.
- Note that, the difference between the "enable" and other feature
- enabling options are:
- 1. When "enable" is specified, since
- "trace_debug_layer/trace_debug_level" shall apply to all control
- method evaluations, after configuring "trace_state" to "enable",
- "trace_method_name" will be reset to NULL.
- 2. When "method/opcode" is specified, if
- "trace_method_name" is NULL when "trace_state" is configured to
- these options, the "trace_debug_layer/trace_debug_level" will
- apply to all control method evaluations.
diff --git a/Documentation/acpi/namespace.txt b/Documentation/acpi/namespace.txt
deleted file mode 100644
index 1860cb3865c6..000000000000
--- a/Documentation/acpi/namespace.txt
+++ /dev/null
@@ -1,388 +0,0 @@
-ACPI Device Tree - Representation of ACPI Namespace
-
-Copyright (C) 2013, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-Abstract:
-
-The Linux ACPI subsystem converts ACPI namespace objects into a Linux
-device tree under the /sys/devices/LNXSYSTEM:00 and updates it upon
-receiving ACPI hotplug notification events. For each device object in this
-hierarchy there is a corresponding symbolic link in the
-/sys/bus/acpi/devices.
-This document illustrates the structure of the ACPI device tree.
-
-
-Credit:
-
-Thanks for the help from Zhang Rui <rui.zhang@intel.com> and Rafael J.
-Wysocki <rafael.j.wysocki@intel.com>.
-
-
-1. ACPI Definition Blocks
-
- The ACPI firmware sets up RSDP (Root System Description Pointer) in the
- system memory address space pointing to the XSDT (Extended System
- Description Table). The XSDT always points to the FADT (Fixed ACPI
- Description Table) using its first entry, the data within the FADT
- includes various fixed-length entries that describe fixed ACPI features
- of the hardware. The FADT contains a pointer to the DSDT
- (Differentiated System Descripition Table). The XSDT also contains
- entries pointing to possibly multiple SSDTs (Secondary System
- Description Table).
-
- The DSDT and SSDT data is organized in data structures called definition
- blocks that contain definitions of various objects, including ACPI
- control methods, encoded in AML (ACPI Machine Language). The data block
- of the DSDT along with the contents of SSDTs represents a hierarchical
- data structure called the ACPI namespace whose topology reflects the
- structure of the underlying hardware platform.
-
- The relationships between ACPI System Definition Tables described above
- are illustrated in the following diagram.
-
- +---------+ +-------+ +--------+ +------------------------+
- | RSDP | +->| XSDT | +->| FADT | | +-------------------+ |
- +---------+ | +-------+ | +--------+ +-|->| DSDT | |
- | Pointer | | | Entry |-+ | ...... | | | +-------------------+ |
- +---------+ | +-------+ | X_DSDT |--+ | | Definition Blocks | |
- | Pointer |-+ | ..... | | ...... | | +-------------------+ |
- +---------+ +-------+ +--------+ | +-------------------+ |
- | Entry |------------------|->| SSDT | |
- +- - - -+ | +-------------------| |
- | Entry | - - - - - - - -+ | | Definition Blocks | |
- +- - - -+ | | +-------------------+ |
- | | +- - - - - - - - - -+ |
- +-|->| SSDT | |
- | +-------------------+ |
- | | Definition Blocks | |
- | +- - - - - - - - - -+ |
- +------------------------+
- |
- OSPM Loading |
- \|/
- +----------------+
- | ACPI Namespace |
- +----------------+
-
- Figure 1. ACPI Definition Blocks
-
- NOTE: RSDP can also contain a pointer to the RSDT (Root System
- Description Table). Platforms provide RSDT to enable
- compatibility with ACPI 1.0 operating systems. The OS is expected
- to use XSDT, if present.
-
-
-2. Example ACPI Namespace
-
- All definition blocks are loaded into a single namespace. The namespace
- is a hierarchy of objects identified by names and paths.
- The following naming conventions apply to object names in the ACPI
- namespace:
- 1. All names are 32 bits long.
- 2. The first byte of a name must be one of 'A' - 'Z', '_'.
- 3. Each of the remaining bytes of a name must be one of 'A' - 'Z', '0'
- - '9', '_'.
- 4. Names starting with '_' are reserved by the ACPI specification.
- 5. The '\' symbol represents the root of the namespace (i.e. names
- prepended with '\' are relative to the namespace root).
- 6. The '^' symbol represents the parent of the current namespace node
- (i.e. names prepended with '^' are relative to the parent of the
- current namespace node).
-
- The figure below shows an example ACPI namespace.
-
- +------+
- | \ | Root
- +------+
- |
- | +------+
- +-| _PR | Scope(_PR): the processor namespace
- | +------+
- | |
- | | +------+
- | +-| CPU0 | Processor(CPU0): the first processor
- | +------+
- |
- | +------+
- +-| _SB | Scope(_SB): the system bus namespace
- | +------+
- | |
- | | +------+
- | +-| LID0 | Device(LID0); the lid device
- | | +------+
- | | |
- | | | +------+
- | | +-| _HID | Name(_HID, "PNP0C0D"): the hardware ID
- | | | +------+
- | | |
- | | | +------+
- | | +-| _STA | Method(_STA): the status control method
- | | +------+
- | |
- | | +------+
- | +-| PCI0 | Device(PCI0); the PCI root bridge
- | +------+
- | |
- | | +------+
- | +-| _HID | Name(_HID, "PNP0A08"): the hardware ID
- | | +------+
- | |
- | | +------+
- | +-| _CID | Name(_CID, "PNP0A03"): the compatible ID
- | | +------+
- | |
- | | +------+
- | +-| RP03 | Scope(RP03): the PCI0 power scope
- | | +------+
- | | |
- | | | +------+
- | | +-| PXP3 | PowerResource(PXP3): the PCI0 power resource
- | | +------+
- | |
- | | +------+
- | +-| GFX0 | Device(GFX0): the graphics adapter
- | +------+
- | |
- | | +------+
- | +-| _ADR | Name(_ADR, 0x00020000): the PCI bus address
- | | +------+
- | |
- | | +------+
- | +-| DD01 | Device(DD01): the LCD output device
- | +------+
- | |
- | | +------+
- | +-| _BCL | Method(_BCL): the backlight control method
- | +------+
- |
- | +------+
- +-| _TZ | Scope(_TZ): the thermal zone namespace
- | +------+
- | |
- | | +------+
- | +-| FN00 | PowerResource(FN00): the FAN0 power resource
- | | +------+
- | |
- | | +------+
- | +-| FAN0 | Device(FAN0): the FAN0 cooling device
- | | +------+
- | | |
- | | | +------+
- | | +-| _HID | Name(_HID, "PNP0A0B"): the hardware ID
- | | +------+
- | |
- | | +------+
- | +-| TZ00 | ThermalZone(TZ00); the FAN thermal zone
- | +------+
- |
- | +------+
- +-| _GPE | Scope(_GPE): the GPE namespace
- +------+
-
- Figure 2. Example ACPI Namespace
-
-
-3. Linux ACPI Device Objects
-
- The Linux kernel's core ACPI subsystem creates struct acpi_device
- objects for ACPI namespace objects representing devices, power resources
- processors, thermal zones. Those objects are exported to user space via
- sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00. The
- format of their names is <bus_id:instance>, where 'bus_id' refers to the
- ACPI namespace representation of the given object and 'instance' is used
- for distinguishing different object of the same 'bus_id' (it is
- two-digit decimal representation of an unsigned integer).
-
- The value of 'bus_id' depends on the type of the object whose name it is
- part of as listed in the table below.
-
- +---+-----------------+-------+----------+
- | | Object/Feature | Table | bus_id |
- +---+-----------------+-------+----------+
- | N | Root | xSDT | LNXSYSTM |
- +---+-----------------+-------+----------+
- | N | Device | xSDT | _HID |
- +---+-----------------+-------+----------+
- | N | Processor | xSDT | LNXCPU |
- +---+-----------------+-------+----------+
- | N | ThermalZone | xSDT | LNXTHERM |
- +---+-----------------+-------+----------+
- | N | PowerResource | xSDT | LNXPOWER |
- +---+-----------------+-------+----------+
- | N | Other Devices | xSDT | device |
- +---+-----------------+-------+----------+
- | F | PWR_BUTTON | FADT | LNXPWRBN |
- +---+-----------------+-------+----------+
- | F | SLP_BUTTON | FADT | LNXSLPBN |
- +---+-----------------+-------+----------+
- | M | Video Extension | xSDT | LNXVIDEO |
- +---+-----------------+-------+----------+
- | M | ATA Controller | xSDT | LNXIOBAY |
- +---+-----------------+-------+----------+
- | M | Docking Station | xSDT | LNXDOCK |
- +---+-----------------+-------+----------+
-
- Table 1. ACPI Namespace Objects Mapping
-
- The following rules apply when creating struct acpi_device objects on
- the basis of the contents of ACPI System Description Tables (as
- indicated by the letter in the first column and the notation in the
- second column of the table above):
- N:
- The object's source is an ACPI namespace node (as indicated by the
- named object's type in the second column). In that case the object's
- directory in sysfs will contain the 'path' attribute whose value is
- the full path to the node from the namespace root.
- F:
- The struct acpi_device object is created for a fixed hardware
- feature (as indicated by the fixed feature flag's name in the second
- column), so its sysfs directory will not contain the 'path'
- attribute.
- M:
- The struct acpi_device object is created for an ACPI namespace node
- with specific control methods (as indicated by the ACPI defined
- device's type in the second column). The 'path' attribute containing
- its namespace path will be present in its sysfs directory. For
- example, if the _BCL method is present for an ACPI namespace node, a
- struct acpi_device object with LNXVIDEO 'bus_id' will be created for
- it.
-
- The third column of the above table indicates which ACPI System
- Description Tables contain information used for the creation of the
- struct acpi_device objects represented by the given row (xSDT means DSDT
- or SSDT).
-
- The forth column of the above table indicates the 'bus_id' generation
- rule of the struct acpi_device object:
- _HID:
- _HID in the last column of the table means that the object's bus_id
- is derived from the _HID/_CID identification objects present under
- the corresponding ACPI namespace node. The object's sysfs directory
- will then contain the 'hid' and 'modalias' attributes that can be
- used to retrieve the _HID and _CIDs of that object.
- LNXxxxxx:
- The 'modalias' attribute is also present for struct acpi_device
- objects having bus_id of the "LNXxxxxx" form (pseudo devices), in
- which cases it contains the bus_id string itself.
- device:
- 'device' in the last column of the table indicates that the object's
- bus_id cannot be determined from _HID/_CID of the corresponding
- ACPI namespace node, although that object represents a device (for
- example, it may be a PCI device with _ADR defined and without _HID
- or _CID). In that case the string 'device' will be used as the
- object's bus_id.
-
-
-4. Linux ACPI Physical Device Glue
-
- ACPI device (i.e. struct acpi_device) objects may be linked to other
- objects in the Linux' device hierarchy that represent "physical" devices
- (for example, devices on the PCI bus). If that happens, it means that
- the ACPI device object is a "companion" of a device otherwise
- represented in a different way and is used (1) to provide configuration
- information on that device which cannot be obtained by other means and
- (2) to do specific things to the device with the help of its ACPI
- control methods. One ACPI device object may be linked this way to
- multiple "physical" devices.
-
- If an ACPI device object is linked to a "physical" device, its sysfs
- directory contains the "physical_node" symbolic link to the sysfs
- directory of the target device object. In turn, the target device's
- sysfs directory will then contain the "firmware_node" symbolic link to
- the sysfs directory of the companion ACPI device object.
- The linking mechanism relies on device identification provided by the
- ACPI namespace. For example, if there's an ACPI namespace object
- representing a PCI device (i.e. a device object under an ACPI namespace
- object representing a PCI bridge) whose _ADR returns 0x00020000 and the
- bus number of the parent PCI bridge is 0, the sysfs directory
- representing the struct acpi_device object created for that ACPI
- namespace object will contain the 'physical_node' symbolic link to the
- /sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
- corresponding PCI device.
-
- The linking mechanism is generally bus-specific. The core of its
- implementation is located in the drivers/acpi/glue.c file, but there are
- complementary parts depending on the bus types in question located
- elsewhere. For example, the PCI-specific part of it is located in
- drivers/pci/pci-acpi.c.
-
-
-5. Example Linux ACPI Device Tree
-
- The sysfs hierarchy of struct acpi_device objects corresponding to the
- example ACPI namespace illustrated in Figure 2 with the addition of
- fixed PWR_BUTTON/SLP_BUTTON devices is shown below.
-
- +--------------+---+-----------------+
- | LNXSYSTEM:00 | \ | acpi:LNXSYSTEM: |
- +--------------+---+-----------------+
- |
- | +-------------+-----+----------------+
- +-| LNXPWRBN:00 | N/A | acpi:LNXPWRBN: |
- | +-------------+-----+----------------+
- |
- | +-------------+-----+----------------+
- +-| LNXSLPBN:00 | N/A | acpi:LNXSLPBN: |
- | +-------------+-----+----------------+
- |
- | +-----------+------------+--------------+
- +-| LNXCPU:00 | \_PR_.CPU0 | acpi:LNXCPU: |
- | +-----------+------------+--------------+
- |
- | +-------------+-------+----------------+
- +-| LNXSYBUS:00 | \_SB_ | acpi:LNXSYBUS: |
- | +-------------+-------+----------------+
- | |
- | | +- - - - - - - +- - - - - - +- - - - - - - -+
- | +-| PNP0C0D:00 | \_SB_.LID0 | acpi:PNP0C0D: |
- | | +- - - - - - - +- - - - - - +- - - - - - - -+
- | |
- | | +------------+------------+-----------------------+
- | +-| PNP0A08:00 | \_SB_.PCI0 | acpi:PNP0A08:PNP0A03: |
- | +------------+------------+-----------------------+
- | |
- | | +-----------+-----------------+-----+
- | +-| device:00 | \_SB_.PCI0.RP03 | N/A |
- | | +-----------+-----------------+-----+
- | | |
- | | | +-------------+----------------------+----------------+
- | | +-| LNXPOWER:00 | \_SB_.PCI0.RP03.PXP3 | acpi:LNXPOWER: |
- | | +-------------+----------------------+----------------+
- | |
- | | +-------------+-----------------+----------------+
- | +-| LNXVIDEO:00 | \_SB_.PCI0.GFX0 | acpi:LNXVIDEO: |
- | +-------------+-----------------+----------------+
- | |
- | | +-----------+-----------------+-----+
- | +-| device:01 | \_SB_.PCI0.DD01 | N/A |
- | +-----------+-----------------+-----+
- |
- | +-------------+-------+----------------+
- +-| LNXSYBUS:01 | \_TZ_ | acpi:LNXSYBUS: |
- +-------------+-------+----------------+
- |
- | +-------------+------------+----------------+
- +-| LNXPOWER:0a | \_TZ_.FN00 | acpi:LNXPOWER: |
- | +-------------+------------+----------------+
- |
- | +------------+------------+---------------+
- +-| PNP0C0B:00 | \_TZ_.FAN0 | acpi:PNP0C0B: |
- | +------------+------------+---------------+
- |
- | +-------------+------------+----------------+
- +-| LNXTHERM:00 | \_TZ_.TZ00 | acpi:LNXTHERM: |
- +-------------+------------+----------------+
-
- Figure 3. Example Linux ACPI Device Tree
-
- NOTE: Each node is represented as "object/path/modalias", where:
- 1. 'object' is the name of the object's directory in sysfs.
- 2. 'path' is the ACPI namespace path of the corresponding
- ACPI namespace object, as returned by the object's 'path'
- sysfs attribute.
- 3. 'modalias' is the value of the object's 'modalias' sysfs
- attribute (as described earlier in this document).
- NOTE: N/A indicates the device object does not have the 'path' or the
- 'modalias' attribute.
diff --git a/Documentation/acpi/osi.txt b/Documentation/acpi/osi.txt
deleted file mode 100644
index 50cde0ceb9b0..000000000000
--- a/Documentation/acpi/osi.txt
+++ /dev/null
@@ -1,187 +0,0 @@
-ACPI _OSI and _REV methods
---------------------------
-
-An ACPI BIOS can use the "Operating System Interfaces" method (_OSI)
-to find out what the operating system supports. Eg. If BIOS
-AML code includes _OSI("XYZ"), the kernel's AML interpreter
-can evaluate that method, look to see if it supports 'XYZ'
-and answer YES or NO to the BIOS.
-
-The ACPI _REV method returns the "Revision of the ACPI specification
-that OSPM supports"
-
-This document explains how and why the BIOS and Linux should use these methods.
-It also explains how and why they are widely misused.
-
-How to use _OSI
----------------
-
-Linux runs on two groups of machines -- those that are tested by the OEM
-to be compatible with Linux, and those that were never tested with Linux,
-but where Linux was installed to replace the original OS (Windows or OSX).
-
-The larger group is the systems tested to run only Windows. Not only that,
-but many were tested to run with just one specific version of Windows.
-So even though the BIOS may use _OSI to query what version of Windows is running,
-only a single path through the BIOS has actually been tested.
-Experience shows that taking untested paths through the BIOS
-exposes Linux to an entire category of BIOS bugs.
-For this reason, Linux _OSI defaults must continue to claim compatibility
-with all versions of Windows.
-
-But Linux isn't actually compatible with Windows, and the Linux community
-has also been hurt with regressions when Linux adds the latest version of
-Windows to its list of _OSI strings. So it is possible that additional strings
-will be more thoroughly vetted before shipping upstream in the future.
-But it is likely that they will all eventually be added.
-
-What should an OEM do if they want to support Linux and Windows
-using the same BIOS image? Often they need to do something different
-for Linux to deal with how Linux is different from Windows.
-Here the BIOS should ask exactly what it wants to know:
-
-_OSI("Linux-OEM-my_interface_name")
-where 'OEM' is needed if this is an OEM-specific hook,
-and 'my_interface_name' describes the hook, which could be a
-quirk, a bug, or a bug-fix.
-
-In addition, the OEM should send a patch to upstream Linux
-via the linux-acpi@vger.kernel.org mailing list. When that patch
-is checked into Linux, the OS will answer "YES" when the BIOS
-on the OEM's system uses _OSI to ask if the interface is supported
-by the OS. Linux distributors can back-port that patch for Linux
-pre-installs, and it will be included by all distributions that
-re-base to upstream. If the distribution can not update the kernel binary,
-they can also add an acpi_osi=Linux-OEM-my_interface_name
-cmdline parameter to the boot loader, as needed.
-
-If the string refers to a feature where the upstream kernel
-eventually grows support, a patch should be sent to remove
-the string when that support is added to the kernel.
-
-That was easy. Read on, to find out how to do it wrong.
-
-Before _OSI, there was _OS
---------------------------
-
-ACPI 1.0 specified "_OS" as an
-"object that evaluates to a string that identifies the operating system."
-
-The ACPI BIOS flow would include an evaluation of _OS, and the AML
-interpreter in the kernel would return to it a string identifying the OS:
-
-Windows 98, SE: "Microsoft Windows"
-Windows ME: "Microsoft WindowsME:Millenium Edition"
-Windows NT: "Microsoft Windows NT"
-
-The idea was on a platform tasked with running multiple OS's,
-the BIOS could use _OS to enable devices that an OS
-might support, or enable quirks or bug workarounds
-necessary to make the platform compatible with that pre-existing OS.
-
-But _OS had fundamental problems. First, the BIOS needed to know the name
-of every possible version of the OS that would run on it, and needed to know
-all the quirks of those OS's. Certainly it would make more sense
-for the BIOS to ask *specific* things of the OS, such
-"do you support a specific interface", and thus in ACPI 3.0,
-_OSI was born to replace _OS.
-
-_OS was abandoned, though even today, many BIOS look for
-_OS "Microsoft Windows NT", though it seems somewhat far-fetched
-that anybody would install those old operating systems
-over what came with the machine.
-
-Linux answers "Microsoft Windows NT" to please that BIOS idiom.
-That is the *only* viable strategy, as that is what modern Windows does,
-and so doing otherwise could steer the BIOS down an untested path.
-
-_OSI is born, and immediately misused
---------------------------------------
-
-With _OSI, the *BIOS* provides the string describing an interface,
-and asks the OS: "YES/NO, are you compatible with this interface?"
-
-eg. _OSI("3.0 Thermal Model") would return TRUE if the OS knows how
-to deal with the thermal extensions made to the ACPI 3.0 specification.
-An old OS that doesn't know about those extensions would answer FALSE,
-and a new OS may be able to return TRUE.
-
-For an OS-specific interface, the ACPI spec said that the BIOS and the OS
-were to agree on a string of the form such as "Windows-interface_name".
-
-But two bad things happened. First, the Windows ecosystem used _OSI
-not as designed, but as a direct replacement for _OS -- identifying
-the OS version, rather than an OS supported interface. Indeed, right
-from the start, the ACPI 3.0 spec itself codified this misuse
-in example code using _OSI("Windows 2001").
-
-This misuse was adopted and continues today.
-
-Linux had no choice but to also return TRUE to _OSI("Windows 2001")
-and its successors. To do otherwise would virtually guarantee breaking
-a BIOS that has been tested only with that _OSI returning TRUE.
-
-This strategy is problematic, as Linux is never completely compatible with
-the latest version of Windows, and sometimes it takes more than a year
-to iron out incompatibilities.
-
-Not to be out-done, the Linux community made things worse by returning TRUE
-to _OSI("Linux"). Doing so is even worse than the Windows misuse
-of _OSI, as "Linux" does not even contain any version information.
-_OSI("Linux") led to some BIOS' malfunctioning due to BIOS writer's
-using it in untested BIOS flows. But some OEM's used _OSI("Linux")
-in tested flows to support real Linux features. In 2009, Linux
-removed _OSI("Linux"), and added a cmdline parameter to restore it
-for legacy systems still needed it. Further a BIOS_BUG warning prints
-for all BIOS's that invoke it.
-
-No BIOS should use _OSI("Linux").
-
-The result is a strategy for Linux to maximize compatibility with
-ACPI BIOS that are tested on Windows machines. There is a real risk
-of over-stating that compatibility; but the alternative has often been
-catastrophic failure resulting from the BIOS taking paths that
-were never validated under *any* OS.
-
-Do not use _REV
----------------
-
-Since _OSI("Linux") went away, some BIOS writers used _REV
-to support Linux and Windows differences in the same BIOS.
-
-_REV was defined in ACPI 1.0 to return the version of ACPI
-supported by the OS and the OS AML interpreter.
-
-Modern Windows returns _REV = 2. Linux used ACPI_CA_SUPPORT_LEVEL,
-which would increment, based on the version of the spec supported.
-
-Unfortunately, _REV was also misused. eg. some BIOS would check
-for _REV = 3, and do something for Linux, but when Linux returned
-_REV = 4, that support broke.
-
-In response to this problem, Linux returns _REV = 2 always,
-from mid-2015 onward. The ACPI specification will also be updated
-to reflect that _REV is deprecated, and always returns 2.
-
-Apple Mac and _OSI("Darwin")
-----------------------------
-
-On Apple's Mac platforms, the ACPI BIOS invokes _OSI("Darwin")
-to determine if the machine is running Apple OSX.
-
-Like Linux's _OSI("*Windows*") strategy, Linux defaults to
-answering YES to _OSI("Darwin") to enable full access
-to the hardware and validated BIOS paths seen by OSX.
-Just like on Windows-tested platforms, this strategy has risks.
-
-Starting in Linux-3.18, the kernel answered YES to _OSI("Darwin")
-for the purpose of enabling Mac Thunderbolt support. Further,
-if the kernel noticed _OSI("Darwin") being invoked, it additionally
-disabled all _OSI("*Windows*") to keep poorly written Mac BIOS
-from going down untested combinations of paths.
-
-The Linux-3.18 change in default caused power regressions on Mac
-laptops, and the 3.18 implementation did not allow changing
-the default via cmdline "acpi_osi=!Darwin". Linux-4.7 fixed
-the ability to use acpi_osi=!Darwin as a workaround, and
-we hope to see Mac Thunderbolt power management support in Linux-4.11.
diff --git a/Documentation/acpi/scan_handlers.txt b/Documentation/acpi/scan_handlers.txt
deleted file mode 100644
index 3246ccf15992..000000000000
--- a/Documentation/acpi/scan_handlers.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-ACPI Scan Handlers
-
-Copyright (C) 2012, Intel Corporation
-Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-
-During system initialization and ACPI-based device hot-add, the ACPI namespace
-is scanned in search of device objects that generally represent various pieces
-of hardware. This causes a struct acpi_device object to be created and
-registered with the driver core for every device object in the ACPI namespace
-and the hierarchy of those struct acpi_device objects reflects the namespace
-layout (i.e. parent device objects in the namespace are represented by parent
-struct acpi_device objects and analogously for their children). Those struct
-acpi_device objects are referred to as "device nodes" in what follows, but they
-should not be confused with struct device_node objects used by the Device Trees
-parsing code (although their role is analogous to the role of those objects).
-
-During ACPI-based device hot-remove device nodes representing pieces of hardware
-being removed are unregistered and deleted.
-
-The core ACPI namespace scanning code in drivers/acpi/scan.c carries out basic
-initialization of device nodes, such as retrieving common configuration
-information from the device objects represented by them and populating them with
-appropriate data, but some of them require additional handling after they have
-been registered. For example, if the given device node represents a PCI host
-bridge, its registration should cause the PCI bus under that bridge to be
-enumerated and PCI devices on that bus to be registered with the driver core.
-Similarly, if the device node represents a PCI interrupt link, it is necessary
-to configure that link so that the kernel can use it.
-
-Those additional configuration tasks usually depend on the type of the hardware
-component represented by the given device node which can be determined on the
-basis of the device node's hardware ID (HID). They are performed by objects
-called ACPI scan handlers represented by the following structure:
-
-struct acpi_scan_handler {
- const struct acpi_device_id *ids;
- struct list_head list_node;
- int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
- void (*detach)(struct acpi_device *dev);
-};
-
-where ids is the list of IDs of device nodes the given handler is supposed to
-take care of, list_node is the hook to the global list of ACPI scan handlers
-maintained by the ACPI core and the .attach() and .detach() callbacks are
-executed, respectively, after registration of new device nodes and before
-unregistration of device nodes the handler attached to previously.
-
-The namespace scanning function, acpi_bus_scan(), first registers all of the
-device nodes in the given namespace scope with the driver core. Then, it tries
-to match a scan handler against each of them using the ids arrays of the
-available scan handlers. If a matching scan handler is found, its .attach()
-callback is executed for the given device node. If that callback returns 1,
-that means that the handler has claimed the device node and is now responsible
-for carrying out any additional configuration tasks related to it. It also will
-be responsible for preparing the device node for unregistration in that case.
-The device node's handler field is then populated with the address of the scan
-handler that has claimed it.
-
-If the .attach() callback returns 0, it means that the device node is not
-interesting to the given scan handler and may be matched against the next scan
-handler in the list. If it returns a (negative) error code, that means that
-the namespace scan should be terminated due to a serious error. The error code
-returned should then reflect the type of the error.
-
-The namespace trimming function, acpi_bus_trim(), first executes .detach()
-callbacks from the scan handlers of all device nodes in the given namespace
-scope (if they have scan handlers). Next, it unregisters all of the device
-nodes in that scope.
-
-ACPI scan handlers can be added to the list maintained by the ACPI core with the
-help of the acpi_scan_add_handler() function taking a pointer to the new scan
-handler as an argument. The order in which scan handlers are added to the list
-is the order in which they are matched against device nodes during namespace
-scans.
-
-All scan handles must be added to the list before acpi_bus_scan() is run for the
-first time and they cannot be removed from it.
diff --git a/Documentation/acpi/ssdt-overlays.txt b/Documentation/acpi/ssdt-overlays.txt
deleted file mode 100644
index 5ae13f161ea2..000000000000
--- a/Documentation/acpi/ssdt-overlays.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-
-In order to support ACPI open-ended hardware configurations (e.g. development
-boards) we need a way to augment the ACPI configuration provided by the firmware
-image. A common example is connecting sensors on I2C / SPI buses on development
-boards.
-
-Although this can be accomplished by creating a kernel platform driver or
-recompiling the firmware image with updated ACPI tables, neither is practical:
-the former proliferates board specific kernel code while the latter requires
-access to firmware tools which are often not publicly available.
-
-Because ACPI supports external references in AML code a more practical
-way to augment firmware ACPI configuration is by dynamically loading
-user defined SSDT tables that contain the board specific information.
-
-For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
-Minnowboard MAX development board exposed via the LSE connector [1], the
-following ASL code can be used:
-
-DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
-{
- External (\_SB.I2C6, DeviceObj)
-
- Scope (\_SB.I2C6)
- {
- Device (STAC)
- {
- Name (_ADR, Zero)
- Name (_HID, "BMA222E")
-
- Method (_CRS, 0, Serialized)
- {
- Name (RBUF, ResourceTemplate ()
- {
- I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
- AddressingMode7Bit, "\\_SB.I2C6", 0x00,
- ResourceConsumer, ,)
- GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
- "\\_SB.GPO2", 0x00, ResourceConsumer, , )
- { // Pin list
- 0
- }
- })
- Return (RBUF)
- }
- }
- }
-}
-
-which can then be compiled to AML binary format:
-
-$ iasl minnowmax.asl
-
-Intel ACPI Component Architecture
-ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
-Copyright (c) 2000 - 2014 Intel Corporation
-
-ASL Input: minnomax.asl - 30 lines, 614 bytes, 7 keywords
-AML Output: minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
-
-[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
-
-The resulting AML code can then be loaded by the kernel using one of the methods
-below.
-
-== Loading ACPI SSDTs from initrd ==
-
-This option allows loading of user defined SSDTs from initrd and it is useful
-when the system does not support EFI or when there is not enough EFI storage.
-
-It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
-aml code must be placed in the first, uncompressed, initrd under the
-"kernel/firmware/acpi" path. Multiple files can be used and this will translate
-in loading multiple tables. Only SSDT and OEM tables are allowed. See
-initrd_table_override.txt for more details.
-
-Here is an example:
-
-# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the
-# cpio archive.
-# The uncompressed cpio archive must be the first.
-# Other, typically compressed cpio archives, must be
-# concatenated on top of the uncompressed one.
-mkdir -p kernel/firmware/acpi
-cp ssdt.aml kernel/firmware/acpi
-
-# Create the uncompressed cpio archive and concatenate the original initrd
-# on top:
-find kernel | cpio -H newc --create > /boot/instrumented_initrd
-cat /boot/initrd >>/boot/instrumented_initrd
-
-== Loading ACPI SSDTs from EFI variables ==
-
-This is the preferred method, when EFI is supported on the platform, because it
-allows a persistent, OS independent way of storing the user defined SSDTs. There
-is also work underway to implement EFI support for loading user defined SSDTs
-and using this method will make it easier to convert to the EFI loading
-mechanism when that will arrive.
-
-In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
-parameter can be used. The argument for the option is the variable name to
-use. If there are multiple variables with the same name but with different
-vendor GUIDs, all of them will be loaded.
-
-In order to store the AML code in an EFI variable the efivarfs filesystem can be
-used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
-recent distribution.
-
-Creating a new file in /sys/firmware/efi/efivars will automatically create a new
-EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
-variable. Please note that the file name needs to be specially formatted as
-"Name-GUID" and that the first 4 bytes in the file (little-endian format)
-represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
-include/linux/efi.h). Writing to the file must also be done with one write
-operation.
-
-For example, you can use the following bash script to create/update an EFI
-variable with the content from a given file:
-
-#!/bin/sh -e
-
-while ! [ -z "$1" ]; do
- case "$1" in
- "-f") filename="$2"; shift;;
- "-g") guid="$2"; shift;;
- *) name="$1";;
- esac
- shift
-done
-
-usage()
-{
- echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
- exit 1
-}
-
-[ -n "$name" -a -f "$filename" ] || usage
-
-EFIVARFS="/sys/firmware/efi/efivars"
-
-[ -d "$EFIVARFS" ] || exit 2
-
-if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
- mount -t efivarfs none $EFIVARFS
-fi
-
-# try to pick up an existing GUID
-[ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
-
-# use a randomly generated GUID
-[ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
-
-# efivarfs expects all of the data in one write
-tmp=$(mktemp)
-/bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
-dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
-rm $tmp
-
-== Loading ACPI SSDTs from configfs ==
-
-This option allows loading of user defined SSDTs from userspace via the configfs
-interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
-mounted. In the following examples, we assume that configfs has been mounted in
-/config.
-
-New tables can be loading by creating new directories in /config/acpi/table/ and
-writing the SSDT aml code in the aml attribute:
-
-cd /config/acpi/table
-mkdir my_ssdt
-cat ~/ssdt.aml > my_ssdt/aml
diff --git a/Documentation/acpi/video_extension.txt b/Documentation/acpi/video_extension.txt
deleted file mode 100644
index 79bf6a4921be..000000000000
--- a/Documentation/acpi/video_extension.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-ACPI video extensions
-~~~~~~~~~~~~~~~~~~~~~
-
-This driver implement the ACPI Extensions For Display Adapters for
-integrated graphics devices on motherboard, as specified in ACPI 2.0
-Specification, Appendix B, allowing to perform some basic control like
-defining the video POST device, retrieving EDID information or to
-setup a video output, etc. Note that this is an ref. implementation
-only. It may or may not work for your integrated video device.
-
-The ACPI video driver does 3 things regarding backlight control:
-
-1 Export a sysfs interface for user space to control backlight level
-
-If the ACPI table has a video device, and acpi_backlight=vendor kernel
-command line is not present, the driver will register a backlight device
-and set the required backlight operation structure for it for the sysfs
-interface control. For every registered class device, there will be a
-directory named acpi_videoX under /sys/class/backlight.
-
-The backlight sysfs interface has a standard definition here:
-Documentation/ABI/stable/sysfs-class-backlight.
-
-And what ACPI video driver does is:
-actual_brightness: on read, control method _BQC will be evaluated to
-get the brightness level the firmware thinks it is at;
-bl_power: not implemented, will set the current brightness instead;
-brightness: on write, control method _BCM will run to set the requested
-brightness level;
-max_brightness: Derived from the _BCL package(see below);
-type: firmware
-
-Note that ACPI video backlight driver will always use index for
-brightness, actual_brightness and max_brightness. So if we have
-the following _BCL package:
-
-Method (_BCL, 0, NotSerialized)
-{
- Return (Package (0x0C)
- {
- 0x64,
- 0x32,
- 0x0A,
- 0x14,
- 0x1E,
- 0x28,
- 0x32,
- 0x3C,
- 0x46,
- 0x50,
- 0x5A,
- 0x64
- })
-}
-
-The first two levels are for when laptop are on AC or on battery and are
-not used by Linux currently. The remaining 10 levels are supported levels
-that we can choose from. The applicable index values are from 0 (that
-corresponds to the 0x0A brightness value) to 9 (that corresponds to the
-0x64 brightness value) inclusive. Each of those index values is regarded
-as a "brightness level" indicator. Thus from the user space perspective
-the range of available brightness levels is from 0 to 9 (max_brightness)
-inclusive.
-
-2 Notify user space about hotkey event
-
-There are generally two cases for hotkey event reporting:
-i) For some laptops, when user presses the hotkey, a scancode will be
- generated and sent to user space through the input device created by
- the keyboard driver as a key type input event, with proper remap, the
- following key code will appear to user space:
-
- EV_KEY, KEY_BRIGHTNESSUP
- EV_KEY, KEY_BRIGHTNESSDOWN
- etc.
-
-For this case, ACPI video driver does not need to do anything(actually,
-it doesn't even know this happened).
-
-ii) For some laptops, the press of the hotkey will not generate the
- scancode, instead, firmware will notify the video device ACPI node
- about the event. The event value is defined in the ACPI spec. ACPI
- video driver will generate an key type input event according to the
- notify value it received and send the event to user space through the
- input device it created:
-
- event keycode
- 0x86 KEY_BRIGHTNESSUP
- 0x87 KEY_BRIGHTNESSDOWN
- etc.
-
-so this would lead to the same effect as case i) now.
-
-Once user space tool receives this event, it can modify the backlight
-level through the sysfs interface.
-
-3 Change backlight level in the kernel
-
-This works for machines covered by case ii) in Section 2. Once the driver
-received a notification, it will set the backlight level accordingly. This does
-not affect the sending of event to user space, they are always sent to user
-space regardless of whether or not the video module controls the backlight level
-directly. This behaviour can be controlled through the brightness_switch_enabled
-module parameter as documented in admin-guide/kernel-parameters.rst. It is recommended to
-disable this behaviour once a GUI environment starts up and wants to have full
-control of the backlight level.
diff --git a/Documentation/admin-guide/LSM/LoadPin.rst b/Documentation/admin-guide/LSM/LoadPin.rst
index 32070762d24c..dd3ca68b5df1 100644
--- a/Documentation/admin-guide/LSM/LoadPin.rst
+++ b/Documentation/admin-guide/LSM/LoadPin.rst
@@ -11,11 +11,21 @@ restrictions without needing to sign the files individually.
The LSM is selectable at build-time with ``CONFIG_SECURITY_LOADPIN``, and
can be controlled at boot-time with the kernel command line option
-"``loadpin.enabled``". By default, it is enabled, but can be disabled at
-boot ("``loadpin.enabled=0``").
+"``loadpin.enforce``". By default, it is enabled, but can be disabled at
+boot ("``loadpin.enforce=0``").
LoadPin starts pinning when it sees the first file loaded. If the
block device backing the filesystem is not read-only, a sysctl is
created to toggle pinning: ``/proc/sys/kernel/loadpin/enabled``. (Having
a mutable filesystem means pinning is mutable too, but having the
sysctl allows for easy testing on systems with a mutable filesystem.)
+
+It's also possible to exclude specific file types from LoadPin using kernel
+command line option "``loadpin.exclude``". By default, all files are
+included, but they can be excluded using kernel command line option such
+as "``loadpin.exclude=kernel-module,kexec-image``". This allows to use
+different mechanisms such as ``CONFIG_MODULE_SIG`` and
+``CONFIG_KEXEC_VERIFY_SIG`` to verify kernel module and kernel image while
+still use LoadPin to protect the integrity of other files kernel loads. The
+full list of valid file types can be found in ``kernel_read_file_str``
+defined in ``include/linux/kernel_read_file.h``.
diff --git a/Documentation/admin-guide/LSM/SELinux.rst b/Documentation/admin-guide/LSM/SELinux.rst
index f722c9b4173a..cdd65164ca96 100644
--- a/Documentation/admin-guide/LSM/SELinux.rst
+++ b/Documentation/admin-guide/LSM/SELinux.rst
@@ -2,11 +2,22 @@
SELinux
=======
+Information about the SELinux kernel subsystem can be found at the
+following links:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git/tree/README.md
+
+ https://github.com/selinuxproject/selinux-kernel/wiki
+
+Information about the SELinux userspace can be found at:
+
+ https://github.com/SELinuxProject/selinux/wiki
+
If you want to use SELinux, chances are you will want
to use the distro-provided policies, or install the
latest reference policy release from
- http://oss.tresys.com/projects/refpolicy
+ https://github.com/SELinuxProject/refpolicy
However, if you want to install a dummy policy for
testing, you can do using ``mdp`` provided under
diff --git a/Documentation/admin-guide/LSM/SafeSetID.rst b/Documentation/admin-guide/LSM/SafeSetID.rst
new file mode 100644
index 000000000000..6d439c987563
--- /dev/null
+++ b/Documentation/admin-guide/LSM/SafeSetID.rst
@@ -0,0 +1,118 @@
+=========
+SafeSetID
+=========
+SafeSetID is an LSM module that gates the setid family of syscalls to restrict
+UID/GID transitions from a given UID/GID to only those approved by a
+system-wide allowlist. These restrictions also prohibit the given UIDs/GIDs
+from obtaining auxiliary privileges associated with CAP_SET{U/G}ID, such as
+allowing a user to set up user namespace UID/GID mappings.
+
+
+Background
+==========
+In absence of file capabilities, processes spawned on a Linux system that need
+to switch to a different user must be spawned with CAP_SETUID privileges.
+CAP_SETUID is granted to programs running as root or those running as a non-root
+user that have been explicitly given the CAP_SETUID runtime capability. It is
+often preferable to use Linux runtime capabilities rather than file
+capabilities, since using file capabilities to run a program with elevated
+privileges opens up possible security holes since any user with access to the
+file can exec() that program to gain the elevated privileges.
+
+While it is possible to implement a tree of processes by giving full
+CAP_SET{U/G}ID capabilities, this is often at odds with the goals of running a
+tree of processes under non-root user(s) in the first place. Specifically,
+since CAP_SETUID allows changing to any user on the system, including the root
+user, it is an overpowered capability for what is needed in this scenario,
+especially since programs often only call setuid() to drop privileges to a
+lesser-privileged user -- not elevate privileges. Unfortunately, there is no
+generally feasible way in Linux to restrict the potential UIDs that a user can
+switch to through setuid() beyond allowing a switch to any user on the system.
+This SafeSetID LSM seeks to provide a solution for restricting setid
+capabilities in such a way.
+
+The main use case for this LSM is to allow a non-root program to transition to
+other untrusted uids without full blown CAP_SETUID capabilities. The non-root
+program would still need CAP_SETUID to do any kind of transition, but the
+additional restrictions imposed by this LSM would mean it is a "safer" version
+of CAP_SETUID since the non-root program cannot take advantage of CAP_SETUID to
+do any unapproved actions (e.g. setuid to uid 0 or create/enter new user
+namespace). The higher level goal is to allow for uid-based sandboxing of system
+services without having to give out CAP_SETUID all over the place just so that
+non-root programs can drop to even-lesser-privileged uids. This is especially
+relevant when one non-root daemon on the system should be allowed to spawn other
+processes as different uids, but it's undesirable to give the daemon a
+basically-root-equivalent CAP_SETUID.
+
+
+Other Approaches Considered
+===========================
+
+Solve this problem in userspace
+-------------------------------
+For candidate applications that would like to have restricted setid capabilities
+as implemented in this LSM, an alternative option would be to simply take away
+setid capabilities from the application completely and refactor the process
+spawning semantics in the application (e.g. by using a privileged helper program
+to do process spawning and UID/GID transitions). Unfortunately, there are a
+number of semantics around process spawning that would be affected by this, such
+as fork() calls where the program doesn't immediately call exec() after the
+fork(), parent processes specifying custom environment variables or command line
+args for spawned child processes, or inheritance of file handles across a
+fork()/exec(). Because of this, as solution that uses a privileged helper in
+userspace would likely be less appealing to incorporate into existing projects
+that rely on certain process-spawning semantics in Linux.
+
+Use user namespaces
+-------------------
+Another possible approach would be to run a given process tree in its own user
+namespace and give programs in the tree setid capabilities. In this way,
+programs in the tree could change to any desired UID/GID in the context of their
+own user namespace, and only approved UIDs/GIDs could be mapped back to the
+initial system user namespace, affectively preventing privilege escalation.
+Unfortunately, it is not generally feasible to use user namespaces in isolation,
+without pairing them with other namespace types, which is not always an option.
+Linux checks for capabilities based off of the user namespace that "owns" some
+entity. For example, Linux has the notion that network namespaces are owned by
+the user namespace in which they were created. A consequence of this is that
+capability checks for access to a given network namespace are done by checking
+whether a task has the given capability in the context of the user namespace
+that owns the network namespace -- not necessarily the user namespace under
+which the given task runs. Therefore spawning a process in a new user namespace
+effectively prevents it from accessing the network namespace owned by the
+initial namespace. This is a deal-breaker for any application that expects to
+retain the CAP_NET_ADMIN capability for the purpose of adjusting network
+configurations. Using user namespaces in isolation causes problems regarding
+other system interactions, including use of pid namespaces and device creation.
+
+Use an existing LSM
+-------------------
+None of the other in-tree LSMs have the capability to gate setid transitions, or
+even employ the security_task_fix_setuid hook at all. SELinux says of that hook:
+"Since setuid only affects the current process, and since the SELinux controls
+are not based on the Linux identity attributes, SELinux does not need to control
+this operation."
+
+
+Directions for use
+==================
+This LSM hooks the setid syscalls to make sure transitions are allowed if an
+applicable restriction policy is in place. Policies are configured through
+securityfs by writing to the safesetid/uid_allowlist_policy and
+safesetid/gid_allowlist_policy files at the location where securityfs is
+mounted. The format for adding a policy is '<UID>:<UID>' or '<GID>:<GID>',
+using literal numbers, and ending with a newline character such as '123:456\n'.
+Writing an empty string "" will flush the policy. Again, configuring a policy
+for a UID/GID will prevent that UID/GID from obtaining auxiliary setid
+privileges, such as allowing a user to set up user namespace UID/GID mappings.
+
+Note on GID policies and setgroups()
+====================================
+In v5.9 we are adding support for limiting CAP_SETGID privileges as was done
+previously for CAP_SETUID. However, for compatibility with common sandboxing
+related code conventions in userspace, we currently allow arbitrary
+setgroups() calls for processes with CAP_SETGID restrictions. Until we add
+support in a future release for restricting setgroups() calls, these GID
+policies add no meaningful security. setgroups() restrictions will be enforced
+once we have the policy checking code in place, which will rely on GID policy
+configuration code added in v5.9.
diff --git a/Documentation/admin-guide/LSM/Smack.rst b/Documentation/admin-guide/LSM/Smack.rst
index 6a5826a13aea..6d44f4fdbf59 100644
--- a/Documentation/admin-guide/LSM/Smack.rst
+++ b/Documentation/admin-guide/LSM/Smack.rst
@@ -818,6 +818,10 @@ Smack supports some mount options:
specifies a label to which all labels set on the
filesystem must have read access. Not yet enforced.
+ smackfstransmute=label:
+ behaves exactly like smackfsroot except that it also
+ sets the transmute flag on the root of the mount
+
These mount options apply to all file system types.
Smack auditing
diff --git a/Documentation/admin-guide/LSM/Yama.rst b/Documentation/admin-guide/LSM/Yama.rst
index d0a060de3973..d9cd937ebd2d 100644
--- a/Documentation/admin-guide/LSM/Yama.rst
+++ b/Documentation/admin-guide/LSM/Yama.rst
@@ -19,9 +19,10 @@ attach to other running processes (e.g. Firefox, SSH sessions, GPG agent,
etc) to extract additional credentials and continue to expand the scope
of their attack without resorting to user-assisted phishing.
-This is not a theoretical problem. SSH session hijacking
-(http://www.storm.net.nz/projects/7) and arbitrary code injection
-(http://c-skills.blogspot.com/2007/05/injectso.html) attacks already
+This is not a theoretical problem. `SSH session hijacking
+<https://www.blackhat.com/presentations/bh-usa-05/bh-us-05-boileau.pdf>`_
+and `arbitrary code injection
+<https://c-skills.blogspot.com/2007/05/injectso.html>`_ attacks already
exist and remain possible if ptrace is allowed to operate as before.
Since ptrace is not commonly used by non-developers and non-admins, system
builders should be allowed the option to disable this debugging system.
diff --git a/Documentation/admin-guide/LSM/apparmor.rst b/Documentation/admin-guide/LSM/apparmor.rst
index 6cf81bbd7ce8..47939ee89d74 100644
--- a/Documentation/admin-guide/LSM/apparmor.rst
+++ b/Documentation/admin-guide/LSM/apparmor.rst
@@ -18,8 +18,11 @@ set ``CONFIG_SECURITY_APPARMOR=y``
If AppArmor should be selected as the default security module then set::
- CONFIG_DEFAULT_SECURITY="apparmor"
- CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE=1
+ CONFIG_DEFAULT_SECURITY_APPARMOR=y
+
+The CONFIG_LSM parameter manages the order and selection of LSMs.
+Specify apparmor as the first "major" module (e.g. AppArmor, SELinux, Smack)
+in the list.
Build the kernel
diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst
index c980dfe9abf1..b44ef68f6e4d 100644
--- a/Documentation/admin-guide/LSM/index.rst
+++ b/Documentation/admin-guide/LSM/index.rst
@@ -17,9 +17,8 @@ MAC extensions, other extensions can be built using the LSM to provide
specific changes to system operation when these tweaks are not available
in the core functionality of Linux itself.
-Without a specific LSM built into the kernel, the default LSM will be the
-Linux capabilities system. Most LSMs choose to extend the capabilities
-system, building their checks on top of the defined capability hooks.
+The Linux capabilities modules will always be included. This may be
+followed by any number of "minor" modules and at most one "major" module.
For more details on capabilities, see ``capabilities(7)`` in the Linux
man-pages project.
@@ -30,6 +29,14 @@ order in which checks are made. The capability module will always
be first, followed by any "minor" modules (e.g. Yama) and then
the one "major" module (e.g. SELinux) if there is one configured.
+Process attributes associated with "major" security modules should
+be accessed and maintained using the special files in ``/proc/.../attr``.
+A security module may maintain a module specific subdirectory there,
+named after the module. ``/proc/.../attr/smack`` is provided by the Smack
+security module and contains all its special files. The files directly
+in ``/proc/.../attr`` remain as legacy interfaces for modules that provide
+subdirectories.
+
.. toctree::
:maxdepth: 1
@@ -39,3 +46,6 @@ the one "major" module (e.g. SELinux) if there is one configured.
Smack
tomoyo
Yama
+ SafeSetID
+ ipe
+ landlock
diff --git a/Documentation/admin-guide/LSM/ipe.rst b/Documentation/admin-guide/LSM/ipe.rst
new file mode 100644
index 000000000000..dc7088451f9d
--- /dev/null
+++ b/Documentation/admin-guide/LSM/ipe.rst
@@ -0,0 +1,824 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Integrity Policy Enforcement (IPE)
+==================================
+
+.. NOTE::
+
+ This is the documentation for admins, system builders, or individuals
+ attempting to use IPE. If you're looking for more developer-focused
+ documentation about IPE please see :doc:`the design docs </security/ipe>`.
+
+Overview
+--------
+
+Integrity Policy Enforcement (IPE) is a Linux Security Module that takes a
+complementary approach to access control. Unlike traditional access control
+mechanisms that rely on labels and paths for decision-making, IPE focuses
+on the immutable security properties inherent to system components. These
+properties are fundamental attributes or features of a system component
+that cannot be altered, ensuring a consistent and reliable basis for
+security decisions.
+
+To elaborate, in the context of IPE, system components primarily refer to
+files or the devices these files reside on. However, this is just a
+starting point. The concept of system components is flexible and can be
+extended to include new elements as the system evolves. The immutable
+properties include the origin of a file, which remains constant and
+unchangeable over time. For example, IPE policies can be crafted to trust
+files originating from the initramfs. Since initramfs is typically verified
+by the bootloader, its files are deemed trustworthy; "file is from
+initramfs" becomes an immutable property under IPE's consideration.
+
+The immutable property concept extends to the security features enabled on
+a file's origin, such as dm-verity or fs-verity, which provide a layer of
+integrity and trust. For example, IPE allows the definition of policies
+that trust files from a dm-verity protected device. dm-verity ensures the
+integrity of an entire device by providing a verifiable and immutable state
+of its contents. Similarly, fs-verity offers filesystem-level integrity
+checks, allowing IPE to enforce policies that trust files protected by
+fs-verity. These two features cannot be turned off once established, so
+they are considered immutable properties. These examples demonstrate how
+IPE leverages immutable properties, such as a file's origin and its
+integrity protection mechanisms, to make access control decisions.
+
+For the IPE policy, specifically, it grants the ability to enforce
+stringent access controls by assessing security properties against
+reference values defined within the policy. This assessment can be based on
+the existence of a security property (e.g., verifying if a file originates
+from initramfs) or evaluating the internal state of an immutable security
+property. The latter includes checking the roothash of a dm-verity
+protected device, determining whether dm-verity possesses a valid
+signature, assessing the digest of a fs-verity protected file, or
+determining whether fs-verity possesses a valid built-in signature. This
+nuanced approach to policy enforcement enables a highly secure and
+customizable system defense mechanism, tailored to specific security
+requirements and trust models.
+
+To enable IPE, ensure that ``CONFIG_SECURITY_IPE`` (under
+:menuselection:`Security -> Integrity Policy Enforcement (IPE)`) config
+option is enabled.
+
+Use Cases
+---------
+
+IPE works best in fixed-function devices: devices in which their purpose
+is clearly defined and not supposed to be changed (e.g. network firewall
+device in a data center, an IoT device, etcetera), where all software and
+configuration is built and provisioned by the system owner.
+
+IPE is a long-way off for use in general-purpose computing: the Linux
+community as a whole tends to follow a decentralized trust model (known as
+the web of trust), which IPE has no support for it yet. Instead, IPE
+supports PKI (public key infrastructure), which generally designates a
+set of trusted entities that provide a measure of absolute trust.
+
+Additionally, while most packages are signed today, the files inside
+the packages (for instance, the executables), tend to be unsigned. This
+makes it difficult to utilize IPE in systems where a package manager is
+expected to be functional, without major changes to the package manager
+and ecosystem behind it.
+
+The digest_cache LSM [#digest_cache_lsm]_ is a system that when combined with IPE,
+could be used to enable and support general-purpose computing use cases.
+
+Known Limitations
+-----------------
+
+IPE cannot verify the integrity of anonymous executable memory, such as
+the trampolines created by gcc closures and libffi (<3.4.2), or JIT'd code.
+Unfortunately, as this is dynamically generated code, there is no way
+for IPE to ensure the integrity of this code to form a trust basis.
+
+IPE cannot verify the integrity of programs written in interpreted
+languages when these scripts are invoked by passing these program files
+to the interpreter. This is because the way interpreters execute these
+files; the scripts themselves are not evaluated as executable code
+through one of IPE's hooks, but they are merely text files that are read
+(as opposed to compiled executables) [#interpreters]_.
+
+Threat Model
+------------
+
+IPE specifically targets the risk of tampering with user-space executable
+code after the kernel has initially booted, including the kernel modules
+loaded from userspace via ``modprobe`` or ``insmod``.
+
+To illustrate, consider a scenario where an untrusted binary, possibly
+malicious, is downloaded along with all necessary dependencies, including a
+loader and libc. The primary function of IPE in this context is to prevent
+the execution of such binaries and their dependencies.
+
+IPE achieves this by verifying the integrity and authenticity of all
+executable code before allowing them to run. It conducts a thorough
+check to ensure that the code's integrity is intact and that they match an
+authorized reference value (digest, signature, etc) as per the defined
+policy. If a binary does not pass this verification process, either
+because its integrity has been compromised or it does not meet the
+authorization criteria, IPE will deny its execution. Additionally, IPE
+generates audit logs which may be utilized to detect and analyze failures
+resulting from policy violation.
+
+Tampering threat scenarios include modification or replacement of
+executable code by a range of actors including:
+
+- Actors with physical access to the hardware
+- Actors with local network access to the system
+- Actors with access to the deployment system
+- Compromised internal systems under external control
+- Malicious end users of the system
+- Compromised end users of the system
+- Remote (external) compromise of the system
+
+IPE does not mitigate threats arising from malicious but authorized
+developers (with access to a signing certificate), or compromised
+developer tools used by them (i.e. return-oriented programming attacks).
+Additionally, IPE draws hard security boundary between userspace and
+kernelspace. As a result, kernel-level exploits are considered outside
+the scope of IPE and mitigation is left to other mechanisms.
+
+Policy
+------
+
+IPE policy is a plain-text [#devdoc]_ policy composed of multiple statements
+over several lines. There is one required line, at the top of the
+policy, indicating the policy name, and the policy version, for
+instance::
+
+ policy_name=Ex_Policy policy_version=0.0.0
+
+The policy name is a unique key identifying this policy in a human
+readable name. This is used to create nodes under securityfs as well as
+uniquely identify policies to deploy new policies vs update existing
+policies.
+
+The policy version indicates the current version of the policy (NOT the
+policy syntax version). This is used to prevent rollback of policy to
+potentially insecure previous versions of the policy.
+
+The next portion of IPE policy are rules. Rules are formed by key=value
+pairs, known as properties. IPE rules require two properties: ``action``,
+which determines what IPE does when it encounters a match against the
+rule, and ``op``, which determines when the rule should be evaluated.
+The ordering is significant, a rule must start with ``op``, and end with
+``action``. Thus, a minimal rule is::
+
+ op=EXECUTE action=ALLOW
+
+This example will allow any execution. Additional properties are used to
+assess immutable security properties about the files being evaluated.
+These properties are intended to be descriptions of systems within the
+kernel that can provide a measure of integrity verification, such that IPE
+can determine the trust of the resource based on the value of the property.
+
+Rules are evaluated top-to-bottom. As a result, any revocation rules,
+or denies should be placed early in the file to ensure that these rules
+are evaluated before a rule with ``action=ALLOW``.
+
+IPE policy supports comments. The character '#' will function as a
+comment, ignoring all characters to the right of '#' until the newline.
+
+The default behavior of IPE evaluations can also be expressed in policy,
+through the ``DEFAULT`` statement. This can be done at a global level,
+or a per-operation level::
+
+ # Global
+ DEFAULT action=ALLOW
+
+ # Operation Specific
+ DEFAULT op=EXECUTE action=ALLOW
+
+A default must be set for all known operations in IPE. If you want to
+preserve older policies being compatible with newer kernels that can introduce
+new operations, set a global default of ``ALLOW``, then override the
+defaults on a per-operation basis (as above).
+
+With configurable policy-based LSMs, there's several issues with
+enforcing the configurable policies at startup, around reading and
+parsing the policy:
+
+1. The kernel *should* not read files from userspace, so directly reading
+ the policy file is prohibited.
+2. The kernel command line has a character limit, and one kernel module
+ should not reserve the entire character limit for its own
+ configuration.
+3. There are various boot loaders in the kernel ecosystem, so handing
+ off a memory block would be costly to maintain.
+
+As a result, IPE has addressed this problem through a concept of a "boot
+policy". A boot policy is a minimal policy which is compiled into the
+kernel. This policy is intended to get the system to a state where
+userspace is set up and ready to receive commands, at which point a more
+complex policy can be deployed via securityfs. The boot policy can be
+specified via ``SECURITY_IPE_BOOT_POLICY`` config option, which accepts
+a path to a plain-text version of the IPE policy to apply. This policy
+will be compiled into the kernel. If not specified, IPE will be disabled
+until a policy is deployed and activated through securityfs.
+
+Deploying Policies
+~~~~~~~~~~~~~~~~~~
+
+Policies can be deployed from userspace through securityfs. These policies
+are signed through the PKCS#7 message format to enforce some level of
+authorization of the policies (prohibiting an attacker from gaining
+unconstrained root, and deploying an "allow all" policy). These
+policies must be signed by a certificate that chains to the
+``SYSTEM_TRUSTED_KEYRING``, or to the secondary and/or platform keyrings if
+``CONFIG_IPE_POLICY_SIG_SECONDARY_KEYRING`` and/or
+``CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING`` are enabled, respectively.
+With openssl, the policy can be signed by::
+
+ openssl smime -sign \
+ -in "$MY_POLICY" \
+ -signer "$MY_CERTIFICATE" \
+ -inkey "$MY_PRIVATE_KEY" \
+ -noattr \
+ -nodetach \
+ -nosmimecap \
+ -outform der \
+ -out "$MY_POLICY.p7b"
+
+Deploying the policies is done through securityfs, through the
+``new_policy`` node. To deploy a policy, simply cat the file into the
+securityfs node::
+
+ cat "$MY_POLICY.p7b" > /sys/kernel/security/ipe/new_policy
+
+Upon success, this will create one subdirectory under
+``/sys/kernel/security/ipe/policies/``. The subdirectory will be the
+``policy_name`` field of the policy deployed, so for the example above,
+the directory will be ``/sys/kernel/security/ipe/policies/Ex_Policy``.
+Within this directory, there will be seven files: ``pkcs7``, ``policy``,
+``name``, ``version``, ``active``, ``update``, and ``delete``.
+
+The ``pkcs7`` file is read-only. Reading it returns the raw PKCS#7 data
+that was provided to the kernel, representing the policy. If the policy being
+read is the boot policy, this will return ``ENOENT``, as it is not signed.
+
+The ``policy`` file is read only. Reading it returns the PKCS#7 inner
+content of the policy, which will be the plain text policy.
+
+The ``active`` file is used to set a policy as the currently active policy.
+This file is rw, and accepts a value of ``"1"`` to set the policy as active.
+Since only a single policy can be active at one time, all other policies
+will be marked inactive. The policy being marked active must have a policy
+version greater or equal to the currently-running version.
+
+The ``update`` file is used to update a policy that is already present
+in the kernel. This file is write-only and accepts a PKCS#7 signed
+policy. Two checks will always be performed on this policy: First, the
+``policy_names`` must match with the updated version and the existing
+version. Second the updated policy must have a policy version greater than
+the currently-running version. This is to prevent rollback attacks.
+
+The ``delete`` file is used to remove a policy that is no longer needed.
+This file is write-only and accepts a value of ``1`` to delete the policy.
+On deletion, the securityfs node representing the policy will be removed.
+However, delete the current active policy is not allowed and will return
+an operation not permitted error.
+
+Similarly, writing to both ``update`` and ``new_policy`` could result in
+bad message(policy syntax error) or file exists error. The latter error happens
+when trying to deploy a policy with a ``policy_name`` while the kernel already
+has a deployed policy with the same ``policy_name``.
+
+Deploying a policy will *not* cause IPE to start enforcing the policy. IPE will
+only enforce the policy marked active. Note that only one policy can be active
+at a time.
+
+Once deployment is successful, the policy can be activated, by writing file
+``/sys/kernel/security/ipe/policies/$policy_name/active``.
+For example, the ``Ex_Policy`` can be activated by::
+
+ echo 1 > "/sys/kernel/security/ipe/policies/Ex_Policy/active"
+
+From above point on, ``Ex_Policy`` is now the enforced policy on the
+system.
+
+IPE also provides a way to delete policies. This can be done via the
+``delete`` securityfs node,
+``/sys/kernel/security/ipe/policies/$policy_name/delete``.
+Writing ``1`` to that file deletes the policy::
+
+ echo 1 > "/sys/kernel/security/ipe/policies/$policy_name/delete"
+
+There is only one requirement to delete a policy: the policy being deleted
+must be inactive.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack), all
+ writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Modes
+~~~~~
+
+IPE supports two modes of operation: permissive (similar to SELinux's
+permissive mode) and enforced. In permissive mode, all events are
+checked and policy violations are logged, but the policy is not really
+enforced. This allows users to test policies before enforcing them.
+
+The default mode is enforce, and can be changed via the kernel command
+line parameter ``ipe.enforce=(0|1)``, or the securityfs node
+``/sys/kernel/security/ipe/enforce``.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
+ all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Audit Events
+~~~~~~~~~~~~
+
+1420 AUDIT_IPE_ACCESS
+^^^^^^^^^^^^^^^^^^^^^
+Event Examples::
+
+ type=1420 audit(1653364370.067:61): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2241 comm="ld-linux.so" path="/deny/lib/libc.so.6" dev="sda2" ino=14549020 rule="DEFAULT action=DENY"
+ type=1300 audit(1653364370.067:61): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=7f1105a28000 a1=195000 a2=5 a3=812 items=0 ppid=2219 pid=2241 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="ld-linux.so" exe="/tmp/ipe-test/lib/ld-linux.so" subj=unconfined key=(null)
+ type=1327 audit(1653364370.067:61): 707974686F6E3300746573742F6D61696E2E7079002D6E00
+
+ type=1420 audit(1653364735.161:64): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2472 comm="mmap_test" path=? dev=? ino=? rule="DEFAULT action=DENY"
+ type=1300 audit(1653364735.161:64): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=0 a1=1000 a2=4 a3=21 items=0 ppid=2219 pid=2472 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="mmap_test" exe="/root/overlake_test/upstream_test/vol_fsverity/bin/mmap_test" subj=unconfined key=(null)
+ type=1327 audit(1653364735.161:64): 707974686F6E3300746573742F6D61696E2E7079002D6E00
+
+This event indicates that IPE made an access control decision; the IPE
+specific record (1420) is always emitted in conjunction with a
+``AUDITSYSCALL`` record.
+
+Determining whether IPE is in permissive or enforced mode can be derived
+from ``success`` property and exit code of the ``AUDITSYSCALL`` record.
+
+
+Field descriptions:
+
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++===========+============+===========+=================================================================================+
+| ipe_op | string | No | The IPE operation name associated with the log |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| ipe_hook | string | No | The name of the LSM hook that triggered the IPE event |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| enforcing | integer | No | The current IPE enforcing state 1 is in enforcing mode, 0 is in permissive mode |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| pid | integer | No | The pid of the process that triggered the IPE event. |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| comm | string | No | The command line program name of the process that triggered the IPE event |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| path | string | Yes | The absolute path to the evaluated file |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| ino | integer | Yes | The inode number of the evaluated file |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| dev | string | Yes | The device name of the evaluated file, e.g. vda |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| rule | string | No | The matched policy rule |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+
+1421 AUDIT_IPE_CONFIG_CHANGE
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Event Example::
+
+ type=1421 audit(1653425583.136:54): old_active_pol_name="Allow_All" old_active_pol_version=0.0.0 old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 new_active_pol_name="boot_verified" new_active_pol_version=0.0.0 new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
+ type=1300 audit(1653425583.136:54): SYSCALL arch=c000003e syscall=1 success=yes exit=2 a0=3 a1=5596fcae1fb0 a2=2 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
+ type=1327 audit(1653425583.136:54): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2
+
+This event indicates that IPE switched the active poliy from one to another
+along with the version and the hash digest of the two policies.
+Note IPE can only have one policy active at a time, all access decision
+evaluation is based on the current active policy.
+The normal procedure to deploy a new policy is loading the policy to deploy
+into the kernel first, then switch the active policy to it.
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++------------------------+------------+-----------+---------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++========================+============+===========+===================================================+
+| old_active_pol_name | string | Yes | The name of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| old_active_pol_version | string | Yes | The version of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| old_policy_digest | string | Yes | The hash of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_active_pol_name | string | No | The name of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_active_pol_version | string | No | The version of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_policy_digest | string | No | The hash of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| auid | integer | No | The login user ID |
++------------------------+------------+-----------+---------------------------------------------------+
+| ses | integer | No | The login session ID |
++------------------------+------------+-----------+---------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++------------------------+------------+-----------+---------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++------------------------+------------+-----------+---------------------------------------------------+
+
+1422 AUDIT_IPE_POLICY_LOAD
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Event Example::
+
+ type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1 errno=0
+ type=1300 audit(1653425529.927:53): arch=c000003e syscall=1 success=yes exit=2567 a0=3 a1=5596fcae1fb0 a2=a07 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
+ type=1327 audit(1653425529.927:53): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2E
+
+This record indicates a new policy has been loaded into the kernel with the policy name, policy version and policy hash.
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++----------------+------------+-----------+-------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++================+============+===========+=============================================================+
+| policy_name | string | Yes | The policy_name |
++----------------+------------+-----------+-------------------------------------------------------------+
+| policy_version | string | Yes | The policy_version |
++----------------+------------+-----------+-------------------------------------------------------------+
+| policy_digest | string | Yes | The policy hash |
++----------------+------------+-----------+-------------------------------------------------------------+
+| auid | integer | No | The login user ID |
++----------------+------------+-----------+-------------------------------------------------------------+
+| ses | integer | No | The login session ID |
++----------------+------------+-----------+-------------------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++----------------+------------+-----------+-------------------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++----------------+------------+-----------+-------------------------------------------------------------+
+| errno | integer | No | Error code from policy loading operations (see table below) |
++----------------+------------+-----------+-------------------------------------------------------------+
+
+Policy error codes (errno):
+
+The following table lists the error codes that may appear in the errno field while loading or updating the policy:
+
++----------------+--------------------------------------------------------+
+| Error Code | Description |
++================+========================================================+
+| 0 | Success |
++----------------+--------------------------------------------------------+
+| -EPERM | Insufficient permission |
++----------------+--------------------------------------------------------+
+| -EEXIST | Same name policy already deployed |
++----------------+--------------------------------------------------------+
+| -EBADMSG | Policy is invalid |
++----------------+--------------------------------------------------------+
+| -ENOMEM | Out of memory (OOM) |
++----------------+--------------------------------------------------------+
+| -ERANGE | Policy version number overflow |
++----------------+--------------------------------------------------------+
+| -EINVAL | Policy version parsing error |
++----------------+--------------------------------------------------------+
+| -ENOKEY | Key used to sign the IPE policy not found in keyring |
++----------------+--------------------------------------------------------+
+| -EKEYREJECTED | Policy signature verification failed |
++----------------+--------------------------------------------------------+
+| -ESTALE | Attempting to update an IPE policy with older version |
++----------------+--------------------------------------------------------+
+| -ENOENT | Policy was deleted while updating |
++----------------+--------------------------------------------------------+
+
+1404 AUDIT_MAC_STATUS
+^^^^^^^^^^^^^^^^^^^^^
+
+Event Examples::
+
+ type=1404 audit(1653425689.008:55): enforcing=0 old_enforcing=1 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
+ type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
+ type=1327 audit(1653425689.008:55): proctitle="-bash"
+
+ type=1404 audit(1653425689.008:55): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
+ type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
+ type=1327 audit(1653425689.008:55): proctitle="-bash"
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++===============+============+===========+=================================================================================================+
+| enforcing | integer | No | The enforcing state IPE is being switched to, 1 is in enforcing mode, 0 is in permissive mode |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| old_enforcing | integer | No | The enforcing state IPE is being switched from, 1 is in enforcing mode, 0 is in permissive mode |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| auid | integer | No | The login user ID |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| ses | integer | No | The login session ID |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| enabled | integer | No | The new TTY audit enabled setting |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| old-enabled | integer | No | The old TTY audit enabled setting |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+
+
+Success Auditing
+^^^^^^^^^^^^^^^^
+
+IPE supports success auditing. When enabled, all events that pass IPE
+policy and are not blocked will emit an audit event. This is disabled by
+default, and can be enabled via the kernel command line
+``ipe.success_audit=(0|1)`` or
+``/sys/kernel/security/ipe/success_audit`` securityfs file.
+
+This is *very* noisy, as IPE will check every userspace binary on the
+system, but is useful for debugging policies.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
+ all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Properties
+----------
+
+As explained above, IPE properties are ``key=value`` pairs expressed in IPE
+policy. Two properties are built-into the policy parser: 'op' and 'action'.
+The other properties are used to restrict immutable security properties
+about the files being evaluated. Currently those properties are:
+'``boot_verified``', '``dmverity_signature``', '``dmverity_roothash``',
+'``fsverity_signature``', '``fsverity_digest``'. A description of all
+properties supported by IPE are listed below:
+
+op
+~~
+
+Indicates the operation for a rule to apply to. Must be in every rule,
+as the first token. IPE supports the following operations:
+
+ ``EXECUTE``
+
+ Pertains to any file attempting to be executed, or loaded as an
+ executable.
+
+ ``FIRMWARE``:
+
+ Pertains to firmware being loaded via the firmware_class interface.
+ This covers both the preallocated buffer and the firmware file
+ itself.
+
+ ``KMODULE``:
+
+ Pertains to loading kernel modules via ``modprobe`` or ``insmod``.
+
+ ``KEXEC_IMAGE``:
+
+ Pertains to kernel images loading via ``kexec``.
+
+ ``KEXEC_INITRAMFS``
+
+ Pertains to initrd images loading via ``kexec --initrd``.
+
+ ``POLICY``:
+
+ Controls loading policies via reading a kernel-space initiated read.
+
+ An example of such is loading IMA policies by writing the path
+ to the policy file to ``$securityfs/ima/policy``
+
+ ``X509_CERT``:
+
+ Controls loading IMA certificates through the Kconfigs,
+ ``CONFIG_IMA_X509_PATH`` and ``CONFIG_EVM_X509_PATH``.
+
+action
+~~~~~~
+
+ Determines what IPE should do when a rule matches. Must be in every
+ rule, as the final clause. Can be one of:
+
+ ``ALLOW``:
+
+ If the rule matches, explicitly allow access to the resource to proceed
+ without executing any more rules.
+
+ ``DENY``:
+
+ If the rule matches, explicitly prohibit access to the resource to
+ proceed without executing any more rules.
+
+boot_verified
+~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of files from initramfs.
+ The format of this property is::
+
+ boot_verified=(TRUE|FALSE)
+
+
+ .. WARNING::
+
+ This property will trust files from initramfs(rootfs). It should
+ only be used during early booting stage. Before mounting the real
+ rootfs on top of the initramfs, initramfs script will recursively
+ remove all files and directories on the initramfs. This is typically
+ implemented by using switch_root(8) [#switch_root]_. Therefore the
+ initramfs will be empty and not accessible after the real
+ rootfs takes over. It is advised to switch to a different policy
+ that doesn't rely on the property after this point.
+ This ensures that the trust policies remain relevant and effective
+ throughout the system's operation.
+
+dmverity_roothash
+~~~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization or revocation of
+ specific dm-verity volumes, identified via their root hashes. It has a
+ dependency on the DM_VERITY module. This property is controlled by
+ the ``IPE_PROP_DM_VERITY`` config option, it will be automatically
+ selected when ``SECURITY_IPE`` and ``DM_VERITY`` are all enabled.
+ The format of this property is::
+
+ dmverity_roothash=DigestName:HexadecimalString
+
+ The supported DigestNames for dmverity_roothash are [#dmveritydigests]_
+
+ + blake2b-512
+ + blake2s-256
+ + sha256
+ + sha384
+ + sha512
+ + sha3-224
+ + sha3-256
+ + sha3-384
+ + sha3-512
+ + sm3
+ + rmd160
+
+dmverity_signature
+~~~~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of all dm-verity
+ volumes that have a signed roothash that validated by a keyring
+ specified by dm-verity's configuration, either the system trusted
+ keyring, or the secondary keyring. It depends on
+ ``DM_VERITY_VERIFY_ROOTHASH_SIG`` config option and is controlled by
+ the ``IPE_PROP_DM_VERITY_SIGNATURE`` config option, it will be automatically
+ selected when ``SECURITY_IPE``, ``DM_VERITY`` and
+ ``DM_VERITY_VERIFY_ROOTHASH_SIG`` are all enabled.
+ The format of this property is::
+
+ dmverity_signature=(TRUE|FALSE)
+
+fsverity_digest
+~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of specific fsverity
+ enabled files, identified via their fsverity digests.
+ It depends on ``FS_VERITY`` config option and is controlled by
+ the ``IPE_PROP_FS_VERITY`` config option, it will be automatically
+ selected when ``SECURITY_IPE`` and ``FS_VERITY`` are all enabled.
+ The format of this property is::
+
+ fsverity_digest=DigestName:HexadecimalString
+
+ The supported DigestNames for fsverity_digest are [#fsveritydigest]_
+
+ + sha256
+ + sha512
+
+fsverity_signature
+~~~~~~~~~~~~~~~~~~
+
+ This property is used to authorize all fs-verity enabled files that have
+ been verified by fs-verity's built-in signature mechanism. The signature
+ verification relies on a key stored within the ".fs-verity" keyring. It
+ depends on ``FS_VERITY_BUILTIN_SIGNATURES`` config option and
+ it is controlled by the ``IPE_PROP_FS_VERITY`` config option,
+ it will be automatically selected when ``SECURITY_IPE``, ``FS_VERITY``
+ and ``FS_VERITY_BUILTIN_SIGNATURES`` are all enabled.
+ The format of this property is::
+
+ fsverity_signature=(TRUE|FALSE)
+
+Policy Examples
+---------------
+
+Allow all
+~~~~~~~~~
+
+::
+
+ policy_name=Allow_All policy_version=0.0.0
+ DEFAULT action=ALLOW
+
+Allow only initramfs
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Initramfs policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+
+Allow any signed and validated dm-verity volume and the initramfs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Signed_DMV_And_Initramfs policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+ op=EXECUTE dmverity_signature=TRUE action=ALLOW
+
+Prohibit execution from a specific dm-verity volume
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Deny_DMV_By_Roothash policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE dmverity_roothash=sha256:cd2c5bae7c6c579edaae4353049d58eb5f2e8be0244bf05345bc8e5ed257baff action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+ op=EXECUTE dmverity_signature=TRUE action=ALLOW
+
+Allow only a specific dm-verity volume
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_DMV_By_Roothash policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE dmverity_roothash=sha256:401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW
+
+Allow any fs-verity file with a valid built-in signature
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Signed_And_Validated_FSVerity policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE fsverity_signature=TRUE action=ALLOW
+
+Allow execution of a specific fs-verity file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=ALLOW_FSV_By_Digest policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE fsverity_digest=sha256:fd88f2b8824e197f850bf4c5109bea5cf0ee38104f710843bb72da796ba5af9e action=ALLOW
+
+Additional Information
+----------------------
+
+- `Github Repository <https://github.com/microsoft/ipe>`_
+- :doc:`Developer and design docs for IPE </security/ipe>`
+
+FAQ
+---
+
+Q:
+ What's the difference between other LSMs which provide a measure of
+ trust-based access control?
+
+A:
+
+ In general, there's two other LSMs that can provide similar functionality:
+ IMA, and Loadpin.
+
+ IMA and IPE are functionally very similar. The significant difference between
+ the two is the policy. [#devdoc]_
+
+ Loadpin and IPE differ fairly dramatically, as Loadpin only covers the IPE's
+ kernel read operations, whereas IPE is capable of controlling execution
+ on top of kernel read. The trust model is also different; Loadpin roots its
+ trust in the initial super-block, whereas trust in IPE is stemmed from kernel
+ itself (via ``SYSTEM_TRUSTED_KEYS``).
+
+-----------
+
+.. [#digest_cache_lsm] https://lore.kernel.org/lkml/20240415142436.2545003-1-roberto.sassu@huaweicloud.com/
+
+.. [#interpreters] There is `some interest in solving this issue <https://lore.kernel.org/lkml/20220321161557.495388-1-mic@digikod.net/>`_.
+
+.. [#devdoc] Please see :doc:`the design docs </security/ipe>` for more on
+ this topic.
+
+.. [#switch_root] https://man7.org/linux/man-pages/man8/switch_root.8.html
+
+.. [#dmveritydigests] These hash algorithms are based on values accepted by
+ the Linux crypto API; IPE does not impose any
+ restrictions on the digest algorithm itself;
+ thus, this list may be out of date.
+
+.. [#fsveritydigest] These hash algorithms are based on values accepted by the
+ kernel's fsverity support; IPE does not impose any
+ restrictions on the digest algorithm itself;
+ thus, this list may be out of date.
diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst
new file mode 100644
index 000000000000..9e61607def08
--- /dev/null
+++ b/Documentation/admin-guide/LSM/landlock.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright © 2025 Microsoft Corporation
+
+================================
+Landlock: system-wide management
+================================
+
+:Author: Mickaël Salaün
+:Date: March 2025
+
+Landlock can leverage the audit framework to log events.
+
+User space documentation can be found here:
+Documentation/userspace-api/landlock.rst.
+
+Audit
+=====
+
+Denied access requests are logged by default for a sandboxed program if `audit`
+is enabled. This default behavior can be changed with the
+sys_landlock_restrict_self() flags (cf.
+Documentation/userspace-api/landlock.rst). Landlock logs can also be masked
+thanks to audit rules. Landlock can generate 2 audit record types.
+
+Record types
+------------
+
+AUDIT_LANDLOCK_ACCESS
+ This record type identifies a denied access request to a kernel resource.
+ The ``domain`` field indicates the ID of the domain that blocked the
+ request. The ``blockers`` field indicates the cause(s) of this denial
+ (separated by a comma), and the following fields identify the kernel object
+ (similar to SELinux). There may be more than one of this record type per
+ audit event.
+
+ Example with a file link request generating two records in the same event::
+
+ domain=195ba459b blockers=fs.refer path="/usr/bin" dev="vda2" ino=351
+ domain=195ba459b blockers=fs.make_reg,fs.refer path="/usr/local" dev="vda2" ino=365
+
+AUDIT_LANDLOCK_DOMAIN
+ This record type describes the status of a Landlock domain. The ``status``
+ field can be either ``allocated`` or ``deallocated``.
+
+ The ``allocated`` status is part of the same audit event and follows
+ the first logged ``AUDIT_LANDLOCK_ACCESS`` record of a domain. It identifies
+ Landlock domain information at the time of the sys_landlock_restrict_self()
+ call with the following fields:
+
+ - the ``domain`` ID
+ - the enforcement ``mode``
+ - the domain creator's ``pid``
+ - the domain creator's ``uid``
+ - the domain creator's executable path (``exe``)
+ - the domain creator's command line (``comm``)
+
+ Example::
+
+ domain=195ba459b status=allocated mode=enforcing pid=300 uid=0 exe="/root/sandboxer" comm="sandboxer"
+
+ The ``deallocated`` status is an event on its own and it identifies a
+ Landlock domain release. After such event, it is guarantee that the
+ related domain ID will never be reused during the lifetime of the system.
+ The ``domain`` field indicates the ID of the domain which is released, and
+ the ``denials`` field indicates the total number of denied access request,
+ which might not have been logged according to the audit rules and
+ sys_landlock_restrict_self()'s flags.
+
+ Example::
+
+ domain=195ba459b status=deallocated denials=3
+
+
+Event samples
+--------------
+
+Here are two examples of log events (see serial numbers).
+
+In this example a sandboxed program (``kill``) tries to send a signal to the
+init process, which is denied because of the signal scoping restriction
+(``LL_SCOPED=s``)::
+
+ $ LL_FS_RO=/ LL_FS_RW=/ LL_SCOPED=s LL_FORCE_LOG=1 ./sandboxer kill 1
+
+This command generates two events, each identified with a unique serial
+number following a timestamp (``msg=audit(1729738800.268:30)``). The first
+event (serial ``30``) contains 4 records. The first record
+(``type=LANDLOCK_ACCESS``) shows an access denied by the domain `1a6fdc66f`.
+The cause of this denial is signal scopping restriction
+(``blockers=scope.signal``). The process that would have receive this signal
+is the init process (``opid=1 ocomm="systemd"``).
+
+The second record (``type=LANDLOCK_DOMAIN``) describes (``status=allocated``)
+domain `1a6fdc66f`. This domain was created by process ``286`` executing the
+``/root/sandboxer`` program launched by the root user.
+
+The third record (``type=SYSCALL``) describes the syscall, its provided
+arguments, its result (``success=no exit=-1``), and the process that called it.
+
+The fourth record (``type=PROCTITLE``) shows the command's name as an
+hexadecimal value. This can be translated with ``python -c
+'print(bytes.fromhex("6B696C6C0031"))'``.
+
+Finally, the last record (``type=LANDLOCK_DOMAIN``) is also the only one from
+the second event (serial ``31``). It is not tied to a direct user space action
+but an asynchronous one to free resources tied to a Landlock domain
+(``status=deallocated``). This can be useful to know that the following logs
+will not concern the domain ``1a6fdc66f`` anymore. This record also summarize
+the number of requests this domain denied (``denials=1``), whether they were
+logged or not.
+
+.. code-block::
+
+ type=LANDLOCK_ACCESS msg=audit(1729738800.268:30): domain=1a6fdc66f blockers=scope.signal opid=1 ocomm="systemd"
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.268:30): domain=1a6fdc66f status=allocated mode=enforcing pid=286 uid=0 exe="/root/sandboxer" comm="sandboxer"
+ type=SYSCALL msg=audit(1729738800.268:30): arch=c000003e syscall=62 success=no exit=-1 [..] ppid=272 pid=286 auid=0 uid=0 gid=0 [...] comm="kill" [...]
+ type=PROCTITLE msg=audit(1729738800.268:30): proctitle=6B696C6C0031
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.324:31): domain=1a6fdc66f status=deallocated denials=1
+
+Here is another example showcasing filesystem access control::
+
+ $ LL_FS_RO=/ LL_FS_RW=/tmp LL_FORCE_LOG=1 ./sandboxer sh -c "echo > /etc/passwd"
+
+The related audit logs contains 8 records from 3 different events (serials 33,
+34 and 35) created by the same domain `1a6fdc679`::
+
+ type=LANDLOCK_ACCESS msg=audit(1729738800.221:33): domain=1a6fdc679 blockers=fs.write_file path="/dev/tty" dev="devtmpfs" ino=9
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.221:33): domain=1a6fdc679 status=allocated mode=enforcing pid=289 uid=0 exe="/root/sandboxer" comm="sandboxer"
+ type=SYSCALL msg=audit(1729738800.221:33): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...]
+ type=PROCTITLE msg=audit(1729738800.221:33): proctitle=7368002D63006563686F203E202F6574632F706173737764
+ type=LANDLOCK_ACCESS msg=audit(1729738800.221:34): domain=1a6fdc679 blockers=fs.write_file path="/etc/passwd" dev="vda2" ino=143821
+ type=SYSCALL msg=audit(1729738800.221:34): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...]
+ type=PROCTITLE msg=audit(1729738800.221:34): proctitle=7368002D63006563686F203E202F6574632F706173737764
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.261:35): domain=1a6fdc679 status=deallocated denials=2
+
+
+Event filtering
+---------------
+
+If you get spammed with audit logs related to Landlock, this is either an
+attack attempt or a bug in the security policy. We can put in place some
+filters to limit noise with two complementary ways:
+
+- with sys_landlock_restrict_self()'s flags if we can fix the sandboxed
+ programs,
+- or with audit rules (see :manpage:`auditctl(8)`).
+
+Additional documentation
+========================
+
+* `Linux Audit Documentation`_
+* Documentation/userspace-api/landlock.rst
+* Documentation/security/landlock.rst
+* https://landlock.io
+
+.. Links
+.. _Linux Audit Documentation:
+ https://github.com/linux-audit/audit-documentation/wiki
diff --git a/Documentation/admin-guide/LSM/tomoyo.rst b/Documentation/admin-guide/LSM/tomoyo.rst
index e2d6b6e15082..bdb2c2e2a1b2 100644
--- a/Documentation/admin-guide/LSM/tomoyo.rst
+++ b/Documentation/admin-guide/LSM/tomoyo.rst
@@ -9,8 +9,8 @@ TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
LiveCD-based tutorials are available at
-http://tomoyo.sourceforge.jp/1.8/ubuntu12.04-live.html
-http://tomoyo.sourceforge.jp/1.8/centos6-live.html
+https://tomoyo.sourceforge.net/1.8/ubuntu12.04-live.html
+https://tomoyo.sourceforge.net/1.8/centos6-live.html
Though these tutorials use non-LSM version of TOMOYO, they are useful for you
to know what TOMOYO is.
@@ -21,45 +21,32 @@ How to enable TOMOYO?
Build the kernel with ``CONFIG_SECURITY_TOMOYO=y`` and pass ``security=tomoyo`` on
kernel's command line.
-Please see http://tomoyo.osdn.jp/2.5/ for details.
+Please see https://tomoyo.sourceforge.net/2.6/ for details.
Where is documentation?
=======================
User <-> Kernel interface documentation is available at
-http://tomoyo.osdn.jp/2.5/policy-specification/index.html .
+https://tomoyo.sourceforge.net/2.6/policy-specification/index.html .
Materials we prepared for seminars and symposiums are available at
-http://osdn.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+https://sourceforge.net/projects/tomoyo/files/docs/ .
Below lists are chosen from three aspects.
What is TOMOYO?
TOMOYO Linux Overview
- http://osdn.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lca2009-takeda.pdf
TOMOYO Linux: pragmatic and manageable security for Linux
- http://osdn.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/freedomhectaipei-tomoyo.pdf
TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
- http://osdn.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/PacSec2007-en-no-demo.pdf
What can TOMOYO do?
Deep inside TOMOYO Linux
- http://osdn.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lca2009-kumaneko.pdf
The role of "pathname based access control" in security.
- http://osdn.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lfj2008-bof.pdf
History of TOMOYO?
Realities of Mainlining
- http://osdn.jp/projects/tomoyo/docs/lfj2008.pdf
-
-What is future plan?
-====================
-
-We believe that inode based security and name based security are complementary
-and both should be used together. But unfortunately, so far, we cannot enable
-multiple LSM modules at the same time. We feel sorry that you have to give up
-SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
-
-We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
-version of TOMOYO, available at http://tomoyo.osdn.jp/1.8/ .
-LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
-to port non-LSM version's functionalities to LSM versions.
+ https://sourceforge.net/projects/tomoyo/files/docs/lfj2008.pdf
diff --git a/Documentation/admin-guide/RAS/address-translation.rst b/Documentation/admin-guide/RAS/address-translation.rst
new file mode 100644
index 000000000000..f0ca17b43cd3
--- /dev/null
+++ b/Documentation/admin-guide/RAS/address-translation.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Address translation
+===================
+
+x86 AMD
+-------
+
+Zen-based AMD systems include a Data Fabric that manages the layout of
+physical memory. Devices attached to the Fabric, like memory controllers,
+I/O, etc., may not have a complete view of the system physical memory map.
+These devices may provide a "normalized", i.e. device physical, address
+when reporting memory errors. Normalized addresses must be translated to
+a system physical address for the kernel to action on the memory.
+
+AMD Address Translation Library (CONFIG_AMD_ATL) provides translation for
+this case.
+
+Glossary of acronyms used in address translation for Zen-based systems
+
+* CCM = Cache Coherent Moderator
+* COD = Cluster-on-Die
+* COH_ST = Coherent Station
+* DF = Data Fabric
diff --git a/Documentation/admin-guide/RAS/error-decoding.rst b/Documentation/admin-guide/RAS/error-decoding.rst
new file mode 100644
index 000000000000..26a72f3fe5de
--- /dev/null
+++ b/Documentation/admin-guide/RAS/error-decoding.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Error decoding
+==============
+
+x86
+---
+
+Error decoding on AMD systems should be done using the rasdaemon tool:
+https://github.com/mchehab/rasdaemon/
+
+While the daemon is running, it would automatically log and decode
+errors. If not, one can still decode such errors by supplying the
+hardware information from the error::
+
+ $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca
+
+Also, the user can pass particular family and model to decode the error
+string::
+
+ $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca --family <CPU Family> --model <CPU Model> --bank <BANK_NUM>
diff --git a/Documentation/admin-guide/RAS/index.rst b/Documentation/admin-guide/RAS/index.rst
new file mode 100644
index 000000000000..f4087040a7c0
--- /dev/null
+++ b/Documentation/admin-guide/RAS/index.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. toctree::
+ :maxdepth: 2
+
+ main
+ error-decoding
+ address-translation
diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
new file mode 100644
index 000000000000..447bfde509fb
--- /dev/null
+++ b/Documentation/admin-guide/RAS/main.rst
@@ -0,0 +1,1223 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==================================================
+Reliability, Availability and Serviceability (RAS)
+==================================================
+
+This documents different aspects of the RAS functionality present in the
+kernel.
+
+RAS concepts
+************
+
+Reliability, Availability and Serviceability (RAS) is a concept used on
+servers meant to measure their robustness.
+
+Reliability
+ is the probability that a system will produce correct outputs.
+
+ * Generally measured as Mean Time Between Failures (MTBF)
+ * Enhanced by features that help to avoid, detect and repair hardware faults
+
+Availability
+ is the probability that a system is operational at a given time
+
+ * Generally measured as a percentage of downtime per a period of time
+ * Often uses mechanisms to detect and correct hardware faults in
+ runtime;
+
+Serviceability (or maintainability)
+ is the simplicity and speed with which a system can be repaired or
+ maintained
+
+ * Generally measured on Mean Time Between Repair (MTBR)
+
+Improving RAS
+-------------
+
+In order to reduce systems downtime, a system should be capable of detecting
+hardware errors, and, when possible correcting them in runtime. It should
+also provide mechanisms to detect hardware degradation, in order to warn
+the system administrator to take the action of replacing a component before
+it causes data loss or system downtime.
+
+Among the monitoring measures, the most usual ones include:
+
+* CPU – detect errors at instruction execution and at L1/L2/L3 caches;
+* Memory – add error correction logic (ECC) to detect and correct errors;
+* I/O – add CRC checksums for transferred data;
+* Storage – RAID, journal file systems, checksums,
+ Self-Monitoring, Analysis and Reporting Technology (SMART).
+
+By monitoring the number of occurrences of error detections, it is possible
+to identify if the probability of hardware errors is increasing, and, on such
+case, do a preventive maintenance to replace a degraded component while
+those errors are correctable.
+
+Types of errors
+---------------
+
+Most mechanisms used on modern systems use technologies like Hamming
+Codes that allow error correction when the number of errors on a bit packet
+is below a threshold. If the number of errors is above, those mechanisms
+can indicate with a high degree of confidence that an error happened, but
+they can't correct.
+
+Also, sometimes an error occur on a component that it is not used. For
+example, a part of the memory that it is not currently allocated.
+
+That defines some categories of errors:
+
+* **Correctable Error (CE)** - the error detection mechanism detected and
+ corrected the error. Such errors are usually not fatal, although some
+ Kernel mechanisms allow the system administrator to consider them as fatal.
+
+* **Uncorrected Error (UE)** - the amount of errors happened above the error
+ correction threshold, and the system was unable to auto-correct.
+
+* **Fatal Error** - when an UE error happens on a critical component of the
+ system (for example, a piece of the Kernel got corrupted by an UE), the
+ only reliable way to avoid data corruption is to hang or reboot the machine.
+
+* **Non-fatal Error** - when an UE error happens on an unused component,
+ like a CPU in power down state or an unused memory bank, the system may
+ still run, eventually replacing the affected hardware by a hot spare,
+ if available.
+
+ Also, when an error happens on a userspace process, it is also possible to
+ kill such process and let userspace restart it.
+
+The mechanism for handling non-fatal errors is usually complex and may
+require the help of some userspace application, in order to apply the
+policy desired by the system administrator.
+
+Identifying a bad hardware component
+------------------------------------
+
+Just detecting a hardware flaw is usually not enough, as the system needs
+to pinpoint to the minimal replaceable unit (MRU) that should be exchanged
+to make the hardware reliable again.
+
+So, it requires not only error logging facilities, but also mechanisms that
+will translate the error message to the silkscreen or component label for
+the MRU.
+
+Typically, it is very complex for memory, as modern CPUs interlace memory
+from different memory modules, in order to provide a better performance. The
+DMI BIOS usually have a list of memory module labels, with can be obtained
+using the ``dmidecode`` tool. For example, on a desktop machine, it shows::
+
+ Memory Device
+ Total Width: 64 bits
+ Data Width: 64 bits
+ Size: 16384 MB
+ Form Factor: SODIMM
+ Set: None
+ Locator: ChannelA-DIMM0
+ Bank Locator: BANK 0
+ Type: DDR4
+ Type Detail: Synchronous
+ Speed: 2133 MHz
+ Rank: 2
+ Configured Clock Speed: 2133 MHz
+
+On the above example, a DDR4 SO-DIMM memory module is located at the
+system's memory labeled as "BANK 0", as given by the *bank locator* field.
+Please notice that, on such system, the *total width* is equal to the
+*data width*. It means that such memory module doesn't have error
+detection/correction mechanisms.
+
+Unfortunately, not all systems use the same field to specify the memory
+bank. On this example, from an older server, ``dmidecode`` shows::
+
+ Memory Device
+ Array Handle: 0x1000
+ Error Information Handle: Not Provided
+ Total Width: 72 bits
+ Data Width: 64 bits
+ Size: 8192 MB
+ Form Factor: DIMM
+ Set: 1
+ Locator: DIMM_A1
+ Bank Locator: Not Specified
+ Type: DDR3
+ Type Detail: Synchronous Registered (Buffered)
+ Speed: 1600 MHz
+ Rank: 2
+ Configured Clock Speed: 1600 MHz
+
+There, the DDR3 RDIMM memory module is located at the system's memory labeled
+as "DIMM_A1", as given by the *locator* field. Please notice that this
+memory module has 64 bits of *data width* and 72 bits of *total width*. So,
+it has 8 extra bits to be used by error detection and correction mechanisms.
+Such kind of memory is called Error-correcting code memory (ECC memory).
+
+To make things even worse, it is not uncommon that systems with different
+labels on their system's board to use exactly the same BIOS, meaning that
+the labels provided by the BIOS won't match the real ones.
+
+ECC memory
+----------
+
+As mentioned in the previous section, ECC memory has extra bits to be
+used for error correction. In the above example, a memory module has
+64 bits of *data width*, and 72 bits of *total width*. The extra 8
+bits which are used for the error detection and correction mechanisms
+are referred to as the *syndrome*\ [#f1]_\ [#f2]_.
+
+So, when the cpu requests the memory controller to write a word with
+*data width*, the memory controller calculates the *syndrome* in real time,
+using Hamming code, or some other error correction code, like SECDED+,
+producing a code with *total width* size. Such code is then written
+on the memory modules.
+
+At read, the *total width* bits code is converted back, using the same
+ECC code used on write, producing a word with *data width* and a *syndrome*.
+The word with *data width* is sent to the CPU, even when errors happen.
+
+The memory controller also looks at the *syndrome* in order to check if
+there was an error, and if the ECC code was able to fix such error.
+If the error was corrected, a Corrected Error (CE) happened. If not, an
+Uncorrected Error (UE) happened.
+
+The information about the CE/UE errors is stored on some special registers
+at the memory controller and can be accessed by reading such registers,
+either by BIOS, by some special CPUs or by Linux EDAC driver. On x86 64
+bit CPUs, such errors can also be retrieved via the Machine Check
+Architecture (MCA)\ [#f3]_.
+
+.. [#f1] Please notice that several memory controllers allow operation on a
+ mode called "Lock-Step", where it groups two memory modules together,
+ doing 128-bit reads/writes. That gives 16 bits for error correction, with
+ significantly improves the error correction mechanism, at the expense
+ that, when an error happens, there's no way to know what memory module is
+ to blame. So, it has to blame both memory modules.
+
+.. [#f2] Some memory controllers also allow using memory in mirror mode.
+ On such mode, the same data is written to two memory modules. At read,
+ the system checks both memory modules, in order to check if both provide
+ identical data. On such configuration, when an error happens, there's no
+ way to know what memory module is to blame. So, it has to blame both
+ memory modules (or 4 memory modules, if the system is also on Lock-step
+ mode).
+
+.. [#f3] For more details about the Machine Check Architecture (MCA),
+ please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
+
+EDAC - Error Detection And Correction
+*************************************
+
+.. note::
+
+ "bluesmoke" was the name for this device driver subsystem when it
+ was "out-of-tree" and maintained at http://bluesmoke.sourceforge.net.
+ That site is mostly archaic now and can be used only for historical
+ purposes.
+
+ When the subsystem was pushed upstream for the first time, on
+ Kernel 2.6.16, it was renamed to ``EDAC``.
+
+Purpose
+-------
+
+The ``edac`` kernel module's goal is to detect and report hardware errors
+that occur within the computer system running under linux.
+
+Memory
+------
+
+Memory Correctable Errors (CE) and Uncorrectable Errors (UE) are the
+primary errors being harvested. These types of errors are harvested by
+the ``edac_mc`` device.
+
+Detecting CE events, then harvesting those events and reporting them,
+**can** but must not necessarily be a predictor of future UE events. With
+CE events only, the system can and will continue to operate as no data
+has been damaged yet.
+
+However, preventive maintenance and proactive part replacement of memory
+modules exhibiting CEs can reduce the likelihood of the dreaded UE events
+and system panics.
+
+Other hardware elements
+-----------------------
+
+A new feature for EDAC, the ``edac_device`` class of device, was added in
+the 2.6.23 version of the kernel.
+
+This new device type allows for non-memory type of ECC hardware detectors
+to have their states harvested and presented to userspace via the sysfs
+interface.
+
+Some architectures have ECC detectors for L1, L2 and L3 caches,
+along with DMA engines, fabric switches, main data path switches,
+interconnections, and various other hardware data paths. If the hardware
+reports it, then an edac_device device probably can be constructed to
+harvest and present that to userspace.
+
+
+PCI bus scanning
+----------------
+
+In addition, PCI devices are scanned for PCI Bus Parity and SERR Errors
+in order to determine if errors are occurring during data transfers.
+
+The presence of PCI Parity errors must be examined with a grain of salt.
+There are several add-in adapters that do **not** follow the PCI specification
+with regards to Parity generation and reporting. The specification says
+the vendor should tie the parity status bits to 0 if they do not intend
+to generate parity. Some vendors do not do this, and thus the parity bit
+can "float" giving false positives.
+
+There is a PCI device attribute located in sysfs that is checked by
+the EDAC PCI scanning code. If that attribute is set, PCI parity/error
+scanning is skipped for that device. The attribute is::
+
+ broken_parity_status
+
+and is located in ``/sys/devices/pci<XXX>/0000:XX:YY.Z`` directories for
+PCI devices.
+
+
+Versioning
+----------
+
+EDAC is composed of a "core" module (``edac_core.ko``) and several Memory
+Controller (MC) driver modules. On a given system, the CORE is loaded
+and one MC driver will be loaded. Both the CORE and the MC driver (or
+``edac_device`` driver) have individual versions that reflect current
+release level of their respective modules.
+
+Thus, to "report" on what version a system is running, one must report
+both the CORE's and the MC driver's versions.
+
+
+Loading
+-------
+
+If ``edac`` was statically linked with the kernel then no loading
+is necessary. If ``edac`` was built as modules then simply modprobe
+the ``edac`` pieces that you need. You should be able to modprobe
+hardware-specific modules and have the dependencies load the necessary
+core modules.
+
+Example::
+
+ $ modprobe amd76x_edac
+
+loads both the ``amd76x_edac.ko`` memory controller module and the
+``edac_mc.ko`` core module.
+
+
+Sysfs interface
+---------------
+
+EDAC presents a ``sysfs`` interface for control and reporting purposes. It
+lives in the /sys/devices/system/edac directory.
+
+Within this directory there currently reside 2 components:
+
+ ======= ==============================
+ mc memory controller(s) system
+ pci PCI control and status system
+ ======= ==============================
+
+
+
+Memory Controller (mc) Model
+----------------------------
+
+Each ``mc`` device controls a set of memory modules [#f4]_. These modules
+are laid out in a Chip-Select Row (``csrowX``) and Channel table (``chX``).
+There can be multiple csrows and multiple channels.
+
+.. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
+ used to refer to a memory module, although there are other memory
+ packaging alternatives, like SO-DIMM, SIMM, etc. The UEFI
+ specification (Version 2.7) defines a memory module in the Common
+ Platform Error Record (CPER) section to be an SMBIOS Memory Device
+ (Type 17). Along this document, and inside the EDAC subsystem, the term
+ "dimm" is used for all memory modules, even when they use a
+ different kind of packaging.
+
+Memory controllers allow for several csrows, with 8 csrows being a
+typical value. Yet, the actual number of csrows depends on the layout of
+a given motherboard, memory controller and memory module characteristics.
+
+Dual channels allow for dual data length (e. g. 128 bits, on 64 bit systems)
+data transfers to/from the CPU from/to memory. Some newer chipsets allow
+for more than 2 channels, like Fully Buffered DIMMs (FB-DIMMs) memory
+controllers. The following example will assume 2 channels:
+
+ +------------+-----------------------+
+ | CS Rows | Channels |
+ +------------+-----------+-----------+
+ | | ``ch0`` | ``ch1`` |
+ +============+===========+===========+
+ | |**DIMM_A0**|**DIMM_B0**|
+ +------------+-----------+-----------+
+ | ``csrow0`` | rank0 | rank0 |
+ +------------+-----------+-----------+
+ | ``csrow1`` | rank1 | rank1 |
+ +------------+-----------+-----------+
+ | |**DIMM_A1**|**DIMM_B1**|
+ +------------+-----------+-----------+
+ | ``csrow2`` | rank0 | rank0 |
+ +------------+-----------+-----------+
+ | ``csrow3`` | rank1 | rank1 |
+ +------------+-----------+-----------+
+
+In the above example, there are 4 physical slots on the motherboard
+for memory DIMMs:
+
+ +---------+---------+
+ | DIMM_A0 | DIMM_B0 |
+ +---------+---------+
+ | DIMM_A1 | DIMM_B1 |
+ +---------+---------+
+
+Labels for these slots are usually silk-screened on the motherboard.
+Slots labeled ``A`` are channel 0 in this example. Slots labeled ``B`` are
+channel 1. Notice that there are two csrows possible on a physical DIMM.
+These csrows are allocated their csrow assignment based on the slot into
+which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
+Channel, the csrows cross both DIMMs.
+
+Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
+In the example above 2 dual ranked DIMMs are similarly placed. Thus,
+both csrow0 and csrow1 are populated. On the other hand, when 2 single
+ranked DIMMs are placed in slots DIMM_A0 and DIMM_B0, then they will
+have just one csrow (csrow0) and csrow1 will be empty. The pattern
+repeats itself for csrow2 and csrow3. Also note that some memory
+controllers don't have any logic to identify the memory module, see
+``rankX`` directories below.
+
+The representation of the above is reflected in the directory
+tree in EDAC's sysfs interface. Starting in directory
+``/sys/devices/system/edac/mc``, each memory controller will be
+represented by its own ``mcX`` directory, where ``X`` is the
+index of the MC::
+
+ ..../edac/mc/
+ |
+ |->mc0
+ |->mc1
+ |->mc2
+ ....
+
+Under each ``mcX`` directory each ``csrowX`` is again represented by a
+``csrowX``, where ``X`` is the csrow index::
+
+ .../mc/mc0/
+ |
+ |->csrow0
+ |->csrow2
+ |->csrow3
+ ....
+
+Notice that there is no csrow1, which indicates that csrow0 is composed
+of a single ranked DIMMs. This should also apply in both Channels, in
+order to have dual-channel mode be operational. Since both csrow2 and
+csrow3 are populated, this indicates a dual ranked set of DIMMs for
+channels 0 and 1.
+
+Within each of the ``mcX`` and ``csrowX`` directories are several EDAC
+control and attribute files.
+
+``mcX`` directories
+-------------------
+
+In ``mcX`` directories are EDAC control and attribute files for
+this ``X`` instance of the memory controllers.
+
+For a description of the sysfs API, please see:
+
+ Documentation/ABI/testing/sysfs-devices-edac
+
+
+``dimmX`` or ``rankX`` directories
+----------------------------------
+
+The recommended way to use the EDAC subsystem is to look at the information
+provided by the ``dimmX`` or ``rankX`` directories [#f5]_.
+
+A typical EDAC system has the following structure under
+``/sys/devices/system/edac/``\ [#f6]_::
+
+ /sys/devices/system/edac/
+ ├── mc
+ │   ├── mc0
+ │   │   ├── ce_count
+ │   │   ├── ce_noinfo_count
+ │   │   ├── dimm0
+ │   │   │   ├── dimm_ce_count
+ │   │   │   ├── dimm_dev_type
+ │   │   │   ├── dimm_edac_mode
+ │   │   │   ├── dimm_label
+ │   │   │   ├── dimm_location
+ │   │   │   ├── dimm_mem_type
+ │   │   │   ├── dimm_ue_count
+ │   │   │   ├── size
+ │   │   │   └── uevent
+ │   │   ├── max_location
+ │   │   ├── mc_name
+ │   │   ├── reset_counters
+ │   │   ├── seconds_since_reset
+ │   │   ├── size_mb
+ │   │   ├── ue_count
+ │   │   ├── ue_noinfo_count
+ │   │   └── uevent
+ │   ├── mc1
+ │   │   ├── ce_count
+ │   │   ├── ce_noinfo_count
+ │   │   ├── dimm0
+ │   │   │   ├── dimm_ce_count
+ │   │   │   ├── dimm_dev_type
+ │   │   │   ├── dimm_edac_mode
+ │   │   │   ├── dimm_label
+ │   │   │   ├── dimm_location
+ │   │   │   ├── dimm_mem_type
+ │   │   │   ├── dimm_ue_count
+ │   │   │   ├── size
+ │   │   │   └── uevent
+ │   │   ├── max_location
+ │   │   ├── mc_name
+ │   │   ├── reset_counters
+ │   │   ├── seconds_since_reset
+ │   │   ├── size_mb
+ │   │   ├── ue_count
+ │   │   ├── ue_noinfo_count
+ │   │   └── uevent
+ │   └── uevent
+ └── uevent
+
+In the ``dimmX`` directories are EDAC control and attribute files for
+this ``X`` memory module:
+
+- ``size`` - Total memory managed by this csrow attribute file
+
+ This attribute file displays, in count of megabytes, the memory
+ that this csrow contains.
+
+- ``dimm_ue_count`` - Uncorrectable Errors count attribute file
+
+ This attribute file displays the total count of uncorrectable
+ errors that have occurred on this DIMM. If panic_on_ue is set
+ this counter will not have a chance to increment, since EDAC
+ will panic the system.
+
+- ``dimm_ce_count`` - Correctable Errors count attribute file
+
+ This attribute file displays the total count of correctable
+ errors that have occurred on this DIMM. This count is very
+ important to examine. CEs provide early indications that a
+ DIMM is beginning to fail. This count field should be
+ monitored for non-zero values and report such information
+ to the system administrator.
+
+- ``dimm_dev_type`` - Device type attribute file
+
+ This attribute file will display what type of DRAM device is
+ being utilized on this DIMM.
+ Examples:
+
+ - x1
+ - x2
+ - x4
+ - x8
+
+- ``dimm_edac_mode`` - EDAC Mode of operation attribute file
+
+ This attribute file will display what type of Error detection
+ and correction is being utilized.
+
+- ``dimm_label`` - memory module label control file
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+- ``dimm_location`` - location of the memory module
+
+ The location can have up to 3 levels, and describe how the
+ memory controller identifies the location of a memory module.
+ Depending on the type of memory and memory controller, it
+ can be:
+
+ - *csrow* and *channel* - used when the memory controller
+ doesn't identify a single DIMM - e. g. in ``rankX`` dir;
+ - *branch*, *channel*, *slot* - typically used on FB-DIMM memory
+ controllers;
+ - *channel*, *slot* - used on Nehalem and newer Intel drivers.
+
+- ``dimm_mem_type`` - Memory Type attribute file
+
+ This attribute file will display what type of memory is currently
+ on this csrow. Normally, either buffered or unbuffered memory.
+ Examples:
+
+ - Registered-DDR
+ - Unbuffered-DDR
+
+.. [#f5] On some systems, the memory controller doesn't have any logic
+ to identify the memory module. On such systems, the directory is called ``rankX`` and works on a similar way as the ``csrowX`` directories.
+ On modern Intel memory controllers, the memory controller identifies the
+ memory modules directly. On such systems, the directory is called ``dimmX``.
+
+.. [#f6] There are also some ``power`` directories and ``subsystem``
+ symlinks inside the sysfs mapping that are automatically created by
+ the sysfs subsystem. Currently, they serve no purpose.
+
+``csrowX`` directories
+----------------------
+
+When CONFIG_EDAC_LEGACY_SYSFS is enabled, sysfs will contain the ``csrowX``
+directories. As this API doesn't work properly for Rambus, FB-DIMMs and
+modern Intel Memory Controllers, this is being deprecated in favor of
+``dimmX`` directories.
+
+In the ``csrowX`` directories are EDAC control and attribute files for
+this ``X`` instance of csrow:
+
+
+- ``ue_count`` - Total Uncorrectable Errors count attribute file
+
+ This attribute file displays the total count of uncorrectable
+ errors that have occurred on this csrow. If panic_on_ue is set
+ this counter will not have a chance to increment, since EDAC
+ will panic the system.
+
+
+- ``ce_count`` - Total Correctable Errors count attribute file
+
+ This attribute file displays the total count of correctable
+ errors that have occurred on this csrow. This count is very
+ important to examine. CEs provide early indications that a
+ DIMM is beginning to fail. This count field should be
+ monitored for non-zero values and report such information
+ to the system administrator.
+
+
+- ``size_mb`` - Total memory managed by this csrow attribute file
+
+ This attribute file displays, in count of megabytes, the memory
+ that this csrow contains.
+
+
+- ``mem_type`` - Memory Type attribute file
+
+ This attribute file will display what type of memory is currently
+ on this csrow. Normally, either buffered or unbuffered memory.
+ Examples:
+
+ - Registered-DDR
+ - Unbuffered-DDR
+
+
+- ``edac_mode`` - EDAC Mode of operation attribute file
+
+ This attribute file will display what type of Error detection
+ and correction is being utilized.
+
+
+- ``dev_type`` - Device type attribute file
+
+ This attribute file will display what type of DRAM device is
+ being utilized on this DIMM.
+ Examples:
+
+ - x1
+ - x2
+ - x4
+ - x8
+
+
+- ``ch0_ce_count`` - Channel 0 CE Count attribute file
+
+ This attribute file will display the count of CEs on this
+ DIMM located in channel 0.
+
+
+- ``ch0_ue_count`` - Channel 0 UE Count attribute file
+
+ This attribute file will display the count of UEs on this
+ DIMM located in channel 0.
+
+
+- ``ch0_dimm_label`` - Channel 0 DIMM Label control file
+
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+
+- ``ch1_ce_count`` - Channel 1 CE Count attribute file
+
+
+ This attribute file will display the count of CEs on this
+ DIMM located in channel 1.
+
+
+- ``ch1_ue_count`` - Channel 1 UE Count attribute file
+
+
+ This attribute file will display the count of UEs on this
+ DIMM located in channel 0.
+
+
+- ``ch1_dimm_label`` - Channel 1 DIMM Label control file
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+
+System Logging
+--------------
+
+If logging for UEs and CEs is enabled, then system logs will contain
+information indicating that errors have been detected::
+
+ EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, channel 1 "DIMM_B1": amd76x_edac
+ EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, channel 1 "DIMM_B1": amd76x_edac
+
+
+The structure of the message is:
+
+ +---------------------------------------+-------------+
+ | Content | Example |
+ +=======================================+=============+
+ | The memory controller | MC0 |
+ +---------------------------------------+-------------+
+ | Error type | CE |
+ +---------------------------------------+-------------+
+ | Memory page | 0x283 |
+ +---------------------------------------+-------------+
+ | Offset in the page | 0xce0 |
+ +---------------------------------------+-------------+
+ | The byte granularity | grain 8 |
+ | or resolution of the error | |
+ +---------------------------------------+-------------+
+ | The error syndrome | 0xb741 |
+ +---------------------------------------+-------------+
+ | Memory row | row 0 |
+ +---------------------------------------+-------------+
+ | Memory channel | channel 1 |
+ +---------------------------------------+-------------+
+ | DIMM label, if set prior | DIMM B1 |
+ +---------------------------------------+-------------+
+ | And then an optional, driver-specific | |
+ | message that may have additional | |
+ | information. | |
+ +---------------------------------------+-------------+
+
+Both UEs and CEs with no info will lack all but memory controller, error
+type, a notice of "no info" and then an optional, driver-specific error
+message.
+
+
+PCI Bus Parity Detection
+------------------------
+
+On Header Type 00 devices, the primary status is looked at for any
+parity error regardless of whether parity is enabled on the device or
+not. (The spec indicates parity is generated in some cases). On Header
+Type 01 bridges, the secondary status register is also looked at to see
+if parity occurred on the bus on the other side of the bridge.
+
+
+Sysfs configuration
+-------------------
+
+Under ``/sys/devices/system/edac/pci`` are control and attribute files as
+follows:
+
+
+- ``check_pci_parity`` - Enable/Disable PCI Parity checking control file
+
+ This control file enables or disables the PCI Bus Parity scanning
+ operation. Writing a 1 to this file enables the scanning. Writing
+ a 0 to this file disables the scanning.
+
+ Enable::
+
+ echo "1" >/sys/devices/system/edac/pci/check_pci_parity
+
+ Disable::
+
+ echo "0" >/sys/devices/system/edac/pci/check_pci_parity
+
+
+- ``pci_parity_count`` - Parity Count
+
+ This attribute file will display the number of parity errors that
+ have been detected.
+
+
+Module parameters
+-----------------
+
+- ``edac_mc_panic_on_ue`` - Panic on UE control file
+
+ An uncorrectable error will cause a machine panic. This is usually
+ desirable. It is a bad idea to continue when an uncorrectable error
+ occurs - it is indeterminate what was uncorrected and the operating
+ system context might be so mangled that continuing will lead to further
+ corruption. If the kernel has MCE configured, then EDAC will never
+ notice the UE.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+- ``edac_mc_log_ue`` - Log UE control file
+
+
+ Generate kernel messages describing uncorrectable errors. These errors
+ are reported through the system message log system. UE statistics
+ will be accumulated even when UE logging is disabled.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_log_ue=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+- ``edac_mc_log_ce`` - Log CE control file
+
+
+ Generate kernel messages describing correctable errors. These
+ errors are reported through the system message log system.
+ CE statistics will be accumulated even when CE logging is disabled.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_log_ce=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+- ``edac_mc_poll_msec`` - Polling period control file
+
+
+ The time period, in milliseconds, for polling for error information.
+ Too small a value wastes resources. Too large a value might delay
+ necessary handling of errors and might loose valuable information for
+ locating the error. 1000 milliseconds (once each second) is the current
+ default. Systems which require all the bandwidth they can get, may
+ increase this.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+ RUN TIME::
+
+ echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
+
+- ``panic_on_pci_parity`` - Panic on PCI PARITY Error
+
+
+ This control file enables or disables panicking when a parity
+ error has been detected.
+
+
+ module/kernel parameter::
+
+ edac_panic_on_pci_pe=[0|1]
+
+ Enable::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+ Disable::
+
+ echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+
+
+EDAC device type
+----------------
+
+In the header file, edac_pci.h, there is a series of edac_device structures
+and APIs for the EDAC_DEVICE.
+
+User space access to an edac_device is through the sysfs interface.
+
+At the location ``/sys/devices/system/edac`` (sysfs) new edac_device devices
+will appear.
+
+There is a three level tree beneath the above ``edac`` directory. For example,
+the ``test_device_edac`` device (found at the http://bluesmoke.sourceforget.net
+website) installs itself as::
+
+ /sys/devices/system/edac/test-instance
+
+in this directory are various controls, a symlink and one or more ``instance``
+directories.
+
+The standard default controls are:
+
+ ============== =======================================================
+ log_ce boolean to log CE events
+ log_ue boolean to log UE events
+ panic_on_ue boolean to ``panic`` the system if an UE is encountered
+ (default off, can be set true via startup script)
+ poll_msec time period between POLL cycles for events
+ ============== =======================================================
+
+The test_device_edac device adds at least one of its own custom control:
+
+ ============== ==================================================
+ test_bits which in the current test driver does nothing but
+ show how it is installed. A ported driver can
+ add one or more such controls and/or attributes
+ for specific uses.
+ One out-of-tree driver uses controls here to allow
+ for ERROR INJECTION operations to hardware
+ injection registers
+ ============== ==================================================
+
+The symlink points to the 'struct dev' that is registered for this edac_device.
+
+Instances
+---------
+
+One or more instance directories are present. For the ``test_device_edac``
+case:
+
+ +----------------+
+ | test-instance0 |
+ +----------------+
+
+
+In this directory there are two default counter attributes, which are totals of
+counter in deeper subdirectories.
+
+ ============== ====================================
+ ce_count total of CE events of subdirectories
+ ue_count total of UE events of subdirectories
+ ============== ====================================
+
+Blocks
+------
+
+At the lowest directory level is the ``block`` directory. There can be 0, 1
+or more blocks specified in each instance:
+
+ +-------------+
+ | test-block0 |
+ +-------------+
+
+In this directory the default attributes are:
+
+ ============== ================================================
+ ce_count which is counter of CE events for this ``block``
+ of hardware being monitored
+ ue_count which is counter of UE events for this ``block``
+ of hardware being monitored
+ ============== ================================================
+
+
+The ``test_device_edac`` device adds 4 attributes and 1 control:
+
+ ================== ====================================================
+ test-block-bits-0 for every POLL cycle this counter
+ is incremented
+ test-block-bits-1 every 10 cycles, this counter is bumped once,
+ and test-block-bits-0 is set to 0
+ test-block-bits-2 every 100 cycles, this counter is bumped once,
+ and test-block-bits-1 is set to 0
+ test-block-bits-3 every 1000 cycles, this counter is bumped once,
+ and test-block-bits-2 is set to 0
+ ================== ====================================================
+
+
+ ================== ====================================================
+ reset-counters writing ANY thing to this control will
+ reset all the above counters.
+ ================== ====================================================
+
+
+Use of the ``test_device_edac`` driver should enable any others to create their own
+unique drivers for their hardware systems.
+
+The ``test_device_edac`` sample driver is located at the
+http://bluesmoke.sourceforge.net project site for EDAC.
+
+
+Usage of EDAC APIs on Nehalem and newer Intel CPUs
+--------------------------------------------------
+
+On older Intel architectures, the memory controller was part of the North
+Bridge chipset. Nehalem, Sandy Bridge, Ivy Bridge, Haswell, Sky Lake and
+newer Intel architectures integrated an enhanced version of the memory
+controller (MC) inside the CPUs.
+
+This chapter will cover the differences of the enhanced memory controllers
+found on newer Intel CPUs, such as ``i7core_edac``, ``sb_edac`` and
+``sbx_edac`` drivers.
+
+.. note::
+
+ The Xeon E7 processor families use a separate chip for the memory
+ controller, called Intel Scalable Memory Buffer. This section doesn't
+ apply for such families.
+
+1) There is one Memory Controller per Quick Patch Interconnect
+ (QPI). At the driver, the term "socket" means one QPI. This is
+ associated with a physical CPU socket.
+
+ Each MC have 3 physical read channels, 3 physical write channels and
+ 3 logic channels. The driver currently sees it as just 3 channels.
+ Each channel can have up to 3 DIMMs.
+
+ The minimum known unity is DIMMs. There are no information about csrows.
+ As EDAC API maps the minimum unity is csrows, the driver sequentially
+ maps channel/DIMM into different csrows.
+
+ For example, supposing the following layout::
+
+ Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+
+ The driver will map it as::
+
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+
+ exports one DIMM per csrow.
+
+ Each QPI is exported as a different memory controller.
+
+2) The MC has the ability to inject errors to test drivers. The drivers
+ implement this functionality via some error injection nodes:
+
+ For injecting a memory error, there are some sysfs nodes, under
+ ``/sys/devices/system/edac/mc/mc?/``:
+
+ - ``inject_addrmatch/*``:
+ Controls the error injection mask register. It is possible to specify
+ several characteristics of the address to match an error code::
+
+ dimm = the affected dimm. Numbers are relative to a channel;
+ rank = the memory rank;
+ channel = the channel that will generate an error;
+ bank = the affected bank;
+ page = the page address;
+ column (or col) = the address column.
+
+ each of the above values can be set to "any" to match any valid value.
+
+ At driver init, all values are set to any.
+
+ For example, to generate an error at rank 1 of dimm 2, for any channel,
+ any bank, any page, any column::
+
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ To return to the default behaviour of matching any, you can do::
+
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ - ``inject_eccmask``:
+ specifies what bits will have troubles,
+
+ - ``inject_section``:
+ specifies what ECC cache section will get the error::
+
+ 3 for both
+ 2 for the highest
+ 1 for the lowest
+
+ - ``inject_type``:
+ specifies the type of error, being a combination of the following bits::
+
+ bit 0 - repeat
+ bit 1 - ecc
+ bit 2 - parity
+
+ - ``inject_enable``:
+ starts the error generation when something different than 0 is written.
+
+ All inject vars can be read. root permission is needed for write.
+
+ Datasheet states that the error will only be generated after a write on an
+ address that matches inject_addrmatch. It seems, however, that reading will
+ also produce an error.
+
+ For example, the following code will generate an error for any write access
+ at socket 0, on any DIMM/address on channel 2::
+
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_type
+ echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask
+ echo 3 >/sys/devices/system/edac/mc/mc0/inject_section
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
+ dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null
+
+ For socket 1, it is needed to replace "mc0" by "mc1" at the above
+ commands.
+
+ The generated error message will look like::
+
+ EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error))
+
+3) Corrected Error memory register counters
+
+ Those newer MCs have some registers to count memory errors. The driver
+ uses those registers to report Corrected Errors on devices with Registered
+ DIMMs.
+
+ However, those counters don't work with Unregistered DIMM. As the chipset
+ offers some counters that also work with UDIMMs (but with a worse level of
+ granularity than the default ones), the driver exposes those registers for
+ UDIMM memories.
+
+ They can be read by looking at the contents of ``all_channel_counts/``::
+
+ $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2
+ 0
+
+ What happens here is that errors on different csrows, but at the same
+ dimm number will increment the same counter.
+ So, in this memory mapping::
+
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+
+ The hardware will increment udimm0 for an error at the first dimm at either
+ csrow0, csrow2 or csrow3;
+
+ The hardware will increment udimm1 for an error at the second dimm at either
+ csrow0, csrow2 or csrow3;
+
+ The hardware will increment udimm2 for an error at the third dimm at either
+ csrow0, csrow2 or csrow3;
+
+4) Standard error counters
+
+ The standard error counters are generated when an mcelog error is received
+ by the driver. Since, with UDIMM, this is counted by software, it is
+ possible that some errors could be lost. With RDIMM's, they display the
+ contents of the registers
+
+Reference documents used on ``amd64_edac``
+------------------------------------------
+
+``amd64_edac`` module is based on the following documents
+(available from http://support.amd.com/en-us/search/tech-docs):
+
+1. :Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
+ Opteron Processors
+ :AMD publication #: 26094
+ :Revision: 3.26
+ :Link: http://support.amd.com/TechDocs/26094.PDF
+
+2. :Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
+ Processors
+ :AMD publication #: 32559
+ :Revision: 3.00
+ :Issue Date: May 2006
+ :Link: http://support.amd.com/TechDocs/32559.pdf
+
+3. :Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
+ Processors
+ :AMD publication #: 31116
+ :Revision: 3.00
+ :Issue Date: September 07, 2007
+ :Link: http://support.amd.com/TechDocs/31116.pdf
+
+4. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
+ Models 30h-3Fh Processors
+ :AMD publication #: 49125
+ :Revision: 3.06
+ :Issue Date: 2/12/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf
+
+5. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
+ Models 60h-6Fh Processors
+ :AMD publication #: 50742
+ :Revision: 3.01
+ :Issue Date: 7/23/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf
+
+6. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 16h
+ Models 00h-0Fh Processors
+ :AMD publication #: 48751
+ :Revision: 3.03
+ :Issue Date: 2/23/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/48751_16h_bkdg.pdf
+
+Credits
+=======
+
+* Written by Doug Thompson <dougthompson@xmission.com>
+
+ - 7 Dec 2005
+ - 17 Jul 2007 Updated
+
+* |copy| Mauro Carvalho Chehab
+
+ - 05 Aug 2009 Nehalem interface
+ - 26 Oct 2016 Converted to ReST and cleanups at the Nehalem section
+
+* EDAC authors/maintainers:
+
+ - Doug Thompson, Dave Jiang, Dave Peterson et al,
+ - Mauro Carvalho Chehab
+ - Borislav Petkov
+ - original author: Thayne Harbaugh
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 0797eec76be1..05301f03b717 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -1,9 +1,9 @@
.. _readme:
-Linux kernel release 4.x <http://kernel.org/>
+Linux kernel release 6.x <http://kernel.org/>
=============================================
-These are the release notes for Linux version 4. Read them carefully,
+These are the release notes for Linux version 6. Read them carefully,
as they tell you what this is all about, explain how to install the
kernel, and what to do if something goes wrong.
@@ -63,7 +63,7 @@ Installing the kernel source
directory where you have permissions (e.g. your home directory) and
unpack it::
- xz -cd linux-4.X.tar.xz | tar xvf -
+ xz -cd linux-6.x.tar.xz | tar xvf -
Replace "X" with the version number of the latest kernel.
@@ -72,26 +72,26 @@ Installing the kernel source
files. They should match the library, and not get messed up by
whatever the kernel-du-jour happens to be.
- - You can also upgrade between 4.x releases by patching. Patches are
+ - You can also upgrade between 6.x releases by patching. Patches are
distributed in the xz format. To install by patching, get all the
newer patch files, enter the top level directory of the kernel source
- (linux-4.X) and execute::
+ (linux-6.x) and execute::
- xz -cd ../patch-4.x.xz | patch -p1
+ xz -cd ../patch-6.x.xz | patch -p1
- Replace "x" for all versions bigger than the version "X" of your current
+ Replace "x" for all versions bigger than the version "x" of your current
source tree, **in_order**, and you should be ok. You may want to remove
the backup files (some-file-name~ or some-file-name.orig), and make sure
that there are no failed patches (some-file-name# or some-file-name.rej).
If there are, either you or I have made a mistake.
- Unlike patches for the 4.x kernels, patches for the 4.x.y kernels
+ Unlike patches for the 6.x kernels, patches for the 6.x.y kernels
(also known as the -stable kernels) are not incremental but instead apply
- directly to the base 4.x kernel. For example, if your base kernel is 4.0
- and you want to apply the 4.0.3 patch, you must not first apply the 4.0.1
- and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and
- want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is,
- patch -R) **before** applying the 4.0.3 patch. You can read more on this in
+ directly to the base 6.x kernel. For example, if your base kernel is 6.0
+ and you want to apply the 6.0.3 patch, you must not first apply the 6.0.1
+ and 6.0.2 patches. Similarly, if you are running kernel version 6.0.2 and
+ want to jump to 6.0.3, you must first reverse the 6.0.2 patch (that is,
+ patch -R) **before** applying the 6.0.3 patch. You can read more on this in
:ref:`Documentation/process/applying-patches.rst <applying_patches>`.
Alternatively, the script patch-kernel can be used to automate this
@@ -114,7 +114,7 @@ Installing the kernel source
Software requirements
---------------------
- Compiling and running the 4.x kernels requires up-to-date
+ Compiling and running the 6.x kernels requires up-to-date
versions of various software packages. Consult
:ref:`Documentation/process/changes.rst <changes>` for the minimum version numbers
required and how to get updates for these packages. Beware that using
@@ -132,12 +132,12 @@ Build directory for the kernel
place for the output files (including .config).
Example::
- kernel source code: /usr/src/linux-4.X
+ kernel source code: /usr/src/linux-6.x
build directory: /home/name/build/kernel
To configure and build the kernel, use::
- cd /usr/src/linux-4.X
+ cd /usr/src/linux-6.x
make O=/home/name/build/kernel menuconfig
make O=/home/name/build/kernel
sudo make O=/home/name/build/kernel modules_install install
@@ -165,7 +165,7 @@ Configuring the kernel
"make xconfig" Qt based configuration tool.
- "make gconfig" GTK+ based configuration tool.
+ "make gconfig" GTK based configuration tool.
"make oldconfig" Default all questions based on the contents of
your existing ./.config file and asking about
@@ -176,7 +176,7 @@ Configuring the kernel
values without prompting.
"make defconfig" Create a ./.config file by using the default
- symbol values from either arch/$ARCH/defconfig
+ symbol values from either arch/$ARCH/configs/defconfig
or arch/$ARCH/configs/${PLATFORM}_defconfig,
depending on the architecture.
@@ -209,25 +209,33 @@ Configuring the kernel
store the lsmod of that machine into a file
and pass it in as a LSMOD parameter.
+ Also, you can preserve modules in certain folders
+ or kconfig files by specifying their paths in
+ parameter LMC_KEEP.
+
target$ lsmod > /tmp/mylsmod
target$ scp /tmp/mylsmod host:/tmp
- host$ make LSMOD=/tmp/mylsmod localmodconfig
+ host$ make LSMOD=/tmp/mylsmod \
+ LMC_KEEP="drivers/usb:drivers/gpu:fs" \
+ localmodconfig
The above also works when cross compiling.
"make localyesconfig" Similar to localmodconfig, except it will convert
- all module options to built in (=y) options.
+ all module options to built in (=y) options. You can
+ also preserve modules by LMC_KEEP.
- "make kvmconfig" Enable additional options for kvm guest kernel support.
+ "make kvm_guest.config" Enable additional options for kvm guest kernel
+ support.
- "make xenconfig" Enable additional options for xen dom0 guest kernel
- support.
+ "make xen.config" Enable additional options for xen dom0 guest kernel
+ support.
"make tinyconfig" Configure the tiniest possible kernel.
You can find more information on using the Linux kernel config tools
- in Documentation/kbuild/kconfig.txt.
+ in Documentation/kbuild/kconfig.rst.
- NOTES on ``make config``:
@@ -251,14 +259,14 @@ Configuring the kernel
Compiling the kernel
--------------------
- - Make sure you have at least gcc 3.2 available.
+ - Make sure you have at least gcc 8.1 available.
For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
- Please note that you can still run a.out user programs with this kernel.
-
- - Do a ``make`` to create a compressed kernel image. It is also
- possible to do ``make install`` if you have lilo installed to suit the
- kernel makefiles, but you may want to check your particular lilo setup first.
+ - Do a ``make`` to create a compressed kernel image. It is also possible to do
+ ``make install`` if you have lilo installed or if your distribution has an
+ install script recognised by the kernel's installer. Most popular
+ distributions will have a recognized install script. You may want to
+ check your distribution's setup first.
To do the actual install, you have to be root, but none of the normal
build should require that. Don't take the name of root in vain.
@@ -295,114 +303,58 @@ Compiling the kernel
image (e.g. .../linux/arch/x86/boot/bzImage after compilation)
to the place where your regular bootable kernel is found.
- - Booting a kernel directly from a floppy without the assistance of a
- bootloader such as LILO, is no longer supported.
-
- If you boot Linux from the hard drive, chances are you use LILO, which
- uses the kernel image as specified in the file /etc/lilo.conf. The
- kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
- /boot/bzImage. To use the new kernel, save a copy of the old image
- and copy the new image over the old one. Then, you MUST RERUN LILO
- to update the loading map! If you don't, you won't be able to boot
- the new kernel image.
-
- Reinstalling LILO is usually a matter of running /sbin/lilo.
- You may wish to edit /etc/lilo.conf to specify an entry for your
- old kernel image (say, /vmlinux.old) in case the new one does not
- work. See the LILO docs for more information.
-
- After reinstalling LILO, you should be all set. Shutdown the system,
+ - Booting a kernel directly from a storage device without the assistance
+ of a bootloader such as LILO or GRUB, is no longer supported in BIOS
+ (non-EFI systems). On UEFI/EFI systems, however, you can use EFISTUB
+ which allows the motherboard to boot directly to the kernel.
+ On modern workstations and desktops, it's generally recommended to use a
+ bootloader as difficulties can arise with multiple kernels and secure boot.
+ For more details on EFISTUB,
+ see "Documentation/admin-guide/efi-stub.rst".
+
+ - It's important to note that as of 2016 LILO (LInux LOader) is no longer in
+ active development, though as it was extremely popular, it often comes up
+ in documentation. Popular alternatives include GRUB2, rEFInd, Syslinux,
+ systemd-boot, or EFISTUB. For various reasons, it's not recommended to use
+ software that's no longer in active development.
+
+ - Chances are your distribution includes an install script and running
+ ``make install`` will be all that's needed. Should that not be the case
+ you'll have to identify your bootloader and reference its documentation or
+ configure your EFI.
+
+Legacy LILO Instructions
+------------------------
+
+
+ - If you use LILO the kernel images are specified in the file /etc/lilo.conf.
+ The kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
+ /boot/bzImage. To use the new kernel, save a copy of the old image and copy
+ the new image over the old one. Then, you MUST RERUN LILO to update the
+ loading map! If you don't, you won't be able to boot the new kernel image.
+
+ - Reinstalling LILO is usually a matter of running /sbin/lilo. You may wish
+ to edit /etc/lilo.conf to specify an entry for your old kernel image
+ (say, /vmlinux.old) in case the new one does not work. See the LILO docs
+ for more information.
+
+ - After reinstalling LILO, you should be all set. Shutdown the system,
reboot, and enjoy!
- If you ever need to change the default root device, video mode,
- ramdisk size, etc. in the kernel image, use the ``rdev`` program (or
- alternatively the LILO boot options when appropriate). No need to
- recompile the kernel to change these parameters.
+ - If you ever need to change the default root device, video mode, etc. in the
+ kernel image, use your bootloader's boot options where appropriate. No need
+ to recompile the kernel to change these parameters.
- Reboot with the new kernel and enjoy.
+
If something goes wrong
-----------------------
- - If you have problems that seem to be due to kernel bugs, please check
- the file MAINTAINERS to see if there is a particular person associated
- with the part of the kernel that you are having trouble with. If there
- isn't anyone listed there, then the second best thing is to mail
- them to me (torvalds@linux-foundation.org), and possibly to any other
- relevant mailing-list or to the newsgroup.
-
- - In all bug-reports, *please* tell what kernel you are talking about,
- how to duplicate the problem, and what your setup is (use your common
- sense). If the problem is new, tell me so, and if the problem is
- old, please try to tell me when you first noticed it.
-
- - If the bug results in a message like::
-
- unable to handle kernel paging request at address C0000010
- Oops: 0002
- EIP: 0010:XXXXXXXX
- eax: xxxxxxxx ebx: xxxxxxxx ecx: xxxxxxxx edx: xxxxxxxx
- esi: xxxxxxxx edi: xxxxxxxx ebp: xxxxxxxx
- ds: xxxx es: xxxx fs: xxxx gs: xxxx
- Pid: xx, process nr: xx
- xx xx xx xx xx xx xx xx xx xx
-
- or similar kernel debugging information on your screen or in your
- system log, please duplicate it *exactly*. The dump may look
- incomprehensible to you, but it does contain information that may
- help debugging the problem. The text above the dump is also
- important: it tells something about why the kernel dumped code (in
- the above example, it's due to a bad kernel pointer). More information
- on making sense of the dump is in Documentation/admin-guide/bug-hunting.rst
-
- - If you compiled the kernel with CONFIG_KALLSYMS you can send the dump
- as is, otherwise you will have to use the ``ksymoops`` program to make
- sense of the dump (but compiling with CONFIG_KALLSYMS is usually preferred).
- This utility can be downloaded from
- https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ .
- Alternatively, you can do the dump lookup by hand:
-
- - In debugging dumps like the above, it helps enormously if you can
- look up what the EIP value means. The hex value as such doesn't help
- me or anybody else very much: it will depend on your particular
- kernel setup. What you should do is take the hex value from the EIP
- line (ignore the ``0010:``), and look it up in the kernel namelist to
- see which kernel function contains the offending address.
-
- To find out the kernel function name, you'll need to find the system
- binary associated with the kernel that exhibited the symptom. This is
- the file 'linux/vmlinux'. To extract the namelist and match it against
- the EIP from the kernel crash, do::
-
- nm vmlinux | sort | less
-
- This will give you a list of kernel addresses sorted in ascending
- order, from which it is simple to find the function that contains the
- offending address. Note that the address given by the kernel
- debugging messages will not necessarily match exactly with the
- function addresses (in fact, that is very unlikely), so you can't
- just 'grep' the list: the list will, however, give you the starting
- point of each kernel function, so by looking for the function that
- has a starting address lower than the one you are searching for but
- is followed by a function with a higher address you will find the one
- you want. In fact, it may be a good idea to include a bit of
- "context" in your problem report, giving a few lines around the
- interesting one.
-
- If you for some reason cannot do the above (you have a pre-compiled
- kernel image or similar), telling me as much about your setup as
- possible will help. Please read the :ref:`admin-guide/reporting-bugs.rst <reportingbugs>`
- document for details.
-
- - Alternatively, you can use gdb on a running kernel. (read-only; i.e. you
- cannot change values or set break points.) To do this, first compile the
- kernel with -g; edit arch/x86/Makefile appropriately, then do a ``make
- clean``. You'll also need to enable CONFIG_PROC_FS (via ``make config``).
-
- After you've rebooted with the new kernel, do ``gdb vmlinux /proc/kcore``.
- You can now use all the usual gdb commands. The command to look up the
- point where your system crashed is ``l *0xXXXXXXXX``. (Replace the XXXes
- with the EIP value.)
-
- gdb'ing a non-running kernel currently fails because ``gdb`` (wrongly)
- disregards the starting offset for which the kernel is compiled.
+If you have problems that seem to be due to kernel bugs, please follow the
+instructions at 'Documentation/admin-guide/reporting-issues.rst'.
+
+Hints on understanding kernel bug reports are in
+'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel
+with gdb is in 'Documentation/process/debugging/gdb-kernel-debugging.rst' and
+'Documentation/process/debugging/kgdb.rst'.
diff --git a/Documentation/admin-guide/abi-obsolete-files.rst b/Documentation/admin-guide/abi-obsolete-files.rst
new file mode 100644
index 000000000000..3061a916b4b5
--- /dev/null
+++ b/Documentation/admin-guide/abi-obsolete-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Obsolete ABI Files
+==================
+
+.. kernel-abi:: obsolete
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst
new file mode 100644
index 000000000000..640f3903e847
--- /dev/null
+++ b/Documentation/admin-guide/abi-obsolete.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ABI obsolete symbols
+====================
+
+Documents interfaces that are still remaining in the kernel, but are
+marked to be removed at some later point in time.
+
+The description of the interface will document the reason why it is
+obsolete and when it can be expected to be removed.
+
+.. kernel-abi:: obsolete
+ :no-files:
diff --git a/Documentation/admin-guide/abi-removed-files.rst b/Documentation/admin-guide/abi-removed-files.rst
new file mode 100644
index 000000000000..f1bdfadd2ec4
--- /dev/null
+++ b/Documentation/admin-guide/abi-removed-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Removed ABI Files
+=================
+
+.. kernel-abi:: removed
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst
new file mode 100644
index 000000000000..88832d3eacd6
--- /dev/null
+++ b/Documentation/admin-guide/abi-removed.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ABI removed symbols
+===================
+
+.. kernel-abi:: removed
+ :no-files:
diff --git a/Documentation/admin-guide/abi-stable-files.rst b/Documentation/admin-guide/abi-stable-files.rst
new file mode 100644
index 000000000000..f867738fc178
--- /dev/null
+++ b/Documentation/admin-guide/abi-stable-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Stable ABI Files
+================
+
+.. kernel-abi:: stable
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst
new file mode 100644
index 000000000000..528c68401f4b
--- /dev/null
+++ b/Documentation/admin-guide/abi-stable.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ABI stable symbols
+==================
+
+Documents the interfaces that the developer has defined to be stable.
+
+Userspace programs are free to use these interfaces with no
+restrictions, and backward compatibility for them will be guaranteed
+for at least 2 years.
+
+Most interfaces (like syscalls) are expected to never change and always
+be available.
+
+.. kernel-abi:: stable
+ :no-files:
diff --git a/Documentation/admin-guide/abi-testing-files.rst b/Documentation/admin-guide/abi-testing-files.rst
new file mode 100644
index 000000000000..1da868e42fdb
--- /dev/null
+++ b/Documentation/admin-guide/abi-testing-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Testing ABI Files
+=================
+
+.. kernel-abi:: testing
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst
new file mode 100644
index 000000000000..6153ebd38e2d
--- /dev/null
+++ b/Documentation/admin-guide/abi-testing.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ABI testing symbols
+===================
+
+Documents interfaces that are felt to be stable,
+as the main development of this interface has been completed.
+
+The interface can be changed to add new features, but the
+current interface will not break by doing this, unless grave
+errors or security problems are found in them.
+
+Userspace programs can start to rely on these interfaces, but they must
+be aware of changes that can occur before these interfaces move to
+be marked stable.
+
+Programs that use these interfaces are strongly encouraged to add their
+name to the description of these interfaces, so that the kernel
+developers can easily notify them if any changes occur.
+
+.. kernel-abi:: testing
+ :no-files:
diff --git a/Documentation/admin-guide/abi.rst b/Documentation/admin-guide/abi.rst
new file mode 100644
index 000000000000..c6039359e585
--- /dev/null
+++ b/Documentation/admin-guide/abi.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Linux ABI description
+=====================
+
+.. kernel-abi:: README
+
+ABI symbols
+-----------
+
+.. toctree::
+ :maxdepth: 2
+
+ abi-stable
+ abi-testing
+ abi-obsolete
+ abi-removed
+
+ABI files
+---------
+
+.. toctree::
+ :maxdepth: 2
+
+ abi-stable-files
+ abi-testing-files
+ abi-obsolete-files
+ abi-removed-files
diff --git a/Documentation/admin-guide/acpi/cppc_sysfs.rst b/Documentation/admin-guide/acpi/cppc_sysfs.rst
new file mode 100644
index 000000000000..36981c667823
--- /dev/null
+++ b/Documentation/admin-guide/acpi/cppc_sysfs.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================================
+Collaborative Processor Performance Control (CPPC)
+==================================================
+
+.. _cppc_sysfs:
+
+CPPC
+====
+
+CPPC defined in the ACPI spec describes a mechanism for the OS to manage the
+performance of a logical processor on a contiguous and abstract performance
+scale. CPPC exposes a set of registers to describe abstract performance scale,
+to request performance levels and to measure per-cpu delivered performance.
+
+For more details on CPPC please refer to the ACPI specification at:
+
+http://uefi.org/specifications
+
+Some of the CPPC registers are exposed via sysfs under::
+
+ /sys/devices/system/cpu/cpuX/acpi_cppc/
+
+for each cpu X::
+
+ $ ls -lR /sys/devices/system/cpu/cpu0/acpi_cppc/
+ /sys/devices/system/cpu/cpu0/acpi_cppc/:
+ total 0
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 feedback_ctrs
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 highest_perf
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_freq
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_nonlinear_perf
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_perf
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_freq
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_perf
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 reference_perf
+ -r--r--r-- 1 root root 65536 Mar 5 19:38 wraparound_time
+
+* highest_perf : Highest performance of this processor (abstract scale).
+* nominal_perf : Highest sustained performance of this processor
+ (abstract scale).
+* lowest_nonlinear_perf : Lowest performance of this processor with nonlinear
+ power savings (abstract scale).
+* lowest_perf : Lowest performance of this processor (abstract scale).
+
+* lowest_freq : CPU frequency corresponding to lowest_perf (in MHz).
+* nominal_freq : CPU frequency corresponding to nominal_perf (in MHz).
+ The above frequencies should only be used to report processor performance in
+ frequency instead of abstract scale. These values should not be used for any
+ functional decisions.
+
+* feedback_ctrs : Includes both Reference and delivered performance counter.
+ Reference counter ticks up proportional to processor's reference performance.
+ Delivered counter ticks up proportional to processor's delivered performance.
+* wraparound_time: Minimum time for the feedback counters to wraparound
+ (seconds).
+* reference_perf : Performance level at which reference performance counter
+ accumulates (abstract scale).
+
+
+Computing Average Delivered Performance
+=======================================
+
+Below describes the steps to compute the average performance delivered by
+taking two different snapshots of feedback counters at time T1 and T2.
+
+ T1: Read feedback_ctrs as fbc_t1
+ Wait or run some workload
+
+ T2: Read feedback_ctrs as fbc_t2
+
+::
+
+ delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
+ reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
+
+ delivered_perf = (reference_perf x delivered_counter_delta) / reference_counter_delta
diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
new file mode 100644
index 000000000000..b9e4b4d146c1
--- /dev/null
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -0,0 +1,90 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+ACPI Fan Performance States
+===========================
+
+When the optional _FPS object is present under an ACPI device representing a
+fan (for example, PNP0C0B or INT3404), the ACPI fan driver creates additional
+"state*" attributes in the sysfs directory of the ACPI device in question.
+These attributes list properties of fan performance states.
+
+For more information on _FPS refer to the ACPI specification at:
+
+http://uefi.org/specifications
+
+For instance, the contents of the INT3404 ACPI device sysfs directory
+may look as follows::
+
+ $ ls -l /sys/bus/acpi/devices/INT3404:00/
+ total 0
+ ...
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state0
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state1
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state10
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state11
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state2
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state3
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state4
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state5
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state6
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state7
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state8
+ -r--r--r-- 1 root root 4096 Dec 13 20:38 state9
+ -r--r--r-- 1 root root 4096 Dec 13 01:00 status
+ ...
+
+where each of the "state*" files represents one performance state of the fan
+and contains a colon-separated list of 5 integer numbers (fields) with the
+following interpretation::
+
+ control_percent:trip_point_index:speed_rpm:noise_level_mdb:power_mw
+
+* ``control_percent``: The percent value to be used to set the fan speed to a
+ specific level using the _FSL object (0-100).
+
+* ``trip_point_index``: The active cooling trip point number that corresponds
+ to this performance state (0-9).
+
+* ``speed_rpm``: Speed of the fan in rotations per minute.
+
+* ``noise_level_mdb``: Audible noise emitted by the fan in this state in
+ millidecibels.
+
+* ``power_mw``: Power draw of the fan in this state in milliwatts.
+
+For example::
+
+ $cat /sys/bus/acpi/devices/INT3404:00/state1
+ 25:0:3200:12500:1250
+
+When a given field is not populated or its value provided by the platform
+firmware is invalid, the "not-defined" string is shown instead of the value.
+
+ACPI Fan Fine Grain Control
+=============================
+
+When _FIF object specifies support for fine grain control, then fan speed
+can be set from 0 to 100% with the recommended minimum "step size" via
+_FSL object. User can adjust fan speed using thermal sysfs cooling device.
+
+Here use can look at fan performance states for a reference speed (speed_rpm)
+and set it by changing cooling device cur_state. If the fine grain control
+is supported then user can also adjust to some other speeds which are
+not defined in the performance states.
+
+The support of fine grain control is presented via sysfs attribute
+"fine_grain_control". If fine grain control is present, this attribute
+will show "1" otherwise "0".
+
+This sysfs attribute is presented in the same directory as performance states.
+
+ACPI Fan Performance Feedback
+=============================
+
+The optional _FST object provides status information for the fan device.
+This includes field to provide current fan speed in revolutions per minute
+at which the fan is rotating.
+
+This speed is presented in the sysfs using the attribute "fan_speed_rpm",
+in the same directory as performance states.
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
new file mode 100644
index 000000000000..b078fdb8f4c9
--- /dev/null
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -0,0 +1,14 @@
+============
+ACPI Support
+============
+
+Here we document in detail how to interact with various mechanisms in
+the Linux ACPI support.
+
+.. toctree::
+ :maxdepth: 1
+
+ initrd_table_override
+ ssdt-overlays
+ cppc_sysfs
+ fan_performance_states
diff --git a/Documentation/admin-guide/acpi/initrd_table_override.rst b/Documentation/admin-guide/acpi/initrd_table_override.rst
new file mode 100644
index 000000000000..bb24fa6b5fbe
--- /dev/null
+++ b/Documentation/admin-guide/acpi/initrd_table_override.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Upgrading ACPI tables via initrd
+================================
+
+What is this about
+==================
+
+If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
+upgrade the ACPI execution environment that is defined by the ACPI tables
+via upgrading the ACPI tables provided by the BIOS with an instrumented,
+modified, more recent version one, or installing brand new ACPI tables.
+
+When building initrd with kernel in a single image, option
+ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
+feature to work.
+
+For a full list of ACPI tables that can be upgraded/installed, take a look
+at the char `*table_sigs[MAX_ACPI_SIGNATURE];` definition in
+drivers/acpi/tables.c.
+
+All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
+be overridable, except:
+
+ - ACPI_SIG_RSDP (has a signature of 6 bytes)
+ - ACPI_SIG_FACS (does not have an ordinary ACPI table header)
+
+Both could get implemented as well.
+
+
+What is this for
+================
+
+Complain to your platform/BIOS vendor if you find a bug which is so severe
+that a workaround is not accepted in the Linux kernel. And this facility
+allows you to upgrade the buggy tables before your platform/BIOS vendor
+releases an upgraded BIOS binary.
+
+This facility can be used by platform/BIOS vendors to provide a Linux
+compatible environment without modifying the underlying platform firmware.
+
+This facility also provides a powerful feature to easily debug and test
+ACPI BIOS table compatibility with the Linux kernel by modifying old
+platform provided ACPI tables or inserting new ACPI tables.
+
+It can and should be enabled in any kernel because there is no functional
+change with not instrumented initrds.
+
+
+How does it work
+================
+::
+
+ # Extract the machine's ACPI tables:
+ cd /tmp
+ acpidump >acpidump
+ acpixtract -a acpidump
+ # Disassemble, modify and recompile them:
+ iasl -d *.dat
+ # For example add this statement into a _PRT (PCI Routing Table) function
+ # of the DSDT:
+ Store("HELLO WORLD", debug)
+ # And increase the OEM Revision. For example, before modification:
+ DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
+ # After modification:
+ DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
+ iasl -sa dsdt.dsl
+ # Add the raw ACPI tables to an uncompressed cpio archive.
+ # They must be put into a /kernel/firmware/acpi directory inside the cpio
+ # archive. Note that if the table put here matches a platform table
+ # (similar Table Signature, and similar OEMID, and similar OEM Table ID)
+ # with a more recent OEM Revision, the platform table will be upgraded by
+ # this table. If the table put here doesn't match a platform table
+ # (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
+ # ID), this table will be appended.
+ mkdir -p kernel/firmware/acpi
+ cp dsdt.aml kernel/firmware/acpi
+ # A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
+ # (see osl.c):
+ iasl -sa facp.dsl
+ iasl -sa ssdt1.dsl
+ cp facp.aml kernel/firmware/acpi
+ cp ssdt1.aml kernel/firmware/acpi
+ # The uncompressed cpio archive must be the first. Other, typically
+ # compressed cpio archives, must be concatenated on top of the uncompressed
+ # one. Following command creates the uncompressed cpio archive and
+ # concatenates the original initrd on top:
+ find kernel | cpio -H newc --create > /boot/instrumented_initrd
+ cat /boot/initrd >>/boot/instrumented_initrd
+ # reboot with increased acpi debug level, e.g. boot params:
+ acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
+ # and check your syslog:
+ [ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
+ [ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD"
+
+iasl is able to disassemble and recompile quite a lot different,
+also static ACPI tables.
+
+
+Where to retrieve userspace tools
+=================================
+
+iasl and acpixtract are part of Intel's ACPICA project:
+https://acpica.org/
+
+and should be packaged by distributions (for example in the acpica package
+on SUSE).
+
+acpidump can be found in Len Browns pmtools:
+ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
+
+This tool is also part of the acpica package on SUSE.
+Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
+/sys/firmware/acpi/tables
diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst
new file mode 100644
index 000000000000..5ea9f4a3b76e
--- /dev/null
+++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst
@@ -0,0 +1,181 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+SSDT Overlays
+=============
+
+In order to support ACPI open-ended hardware configurations (e.g. development
+boards) we need a way to augment the ACPI configuration provided by the firmware
+image. A common example is connecting sensors on I2C / SPI buses on development
+boards.
+
+Although this can be accomplished by creating a kernel platform driver or
+recompiling the firmware image with updated ACPI tables, neither is practical:
+the former proliferates board specific kernel code while the latter requires
+access to firmware tools which are often not publicly available.
+
+Because ACPI supports external references in AML code a more practical
+way to augment firmware ACPI configuration is by dynamically loading
+user defined SSDT tables that contain the board specific information.
+
+For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
+Minnowboard MAX development board exposed via the LSE connector [1], the
+following ASL code can be used::
+
+ DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
+ {
+ External (\_SB.I2C6, DeviceObj)
+
+ Scope (\_SB.I2C6)
+ {
+ Device (STAC)
+ {
+ Name (_HID, "BMA222E")
+ Name (RBUF, ResourceTemplate ()
+ {
+ I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
+ AddressingMode7Bit, "\\_SB.I2C6", 0x00,
+ ResourceConsumer, ,)
+ GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
+ "\\_SB.GPO2", 0x00, ResourceConsumer, , )
+ { // Pin list
+ 0
+ }
+ })
+
+ Method (_CRS, 0, Serialized)
+ {
+ Return (RBUF)
+ }
+ }
+ }
+ }
+
+which can then be compiled to AML binary format::
+
+ $ iasl minnowmax.asl
+
+ Intel ACPI Component Architecture
+ ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
+ Copyright (c) 2000 - 2014 Intel Corporation
+
+ ASL Input: minnomax.asl - 30 lines, 614 bytes, 7 keywords
+ AML Output: minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
+
+[1] https://www.elinux.org/Minnowboard:MinnowMax#Low_Speed_Expansion_.28Top.29
+
+The resulting AML code can then be loaded by the kernel using one of the methods
+below.
+
+Loading ACPI SSDTs from initrd
+==============================
+
+This option allows loading of user defined SSDTs from initrd and it is useful
+when the system does not support EFI or when there is not enough EFI storage.
+
+It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
+AML code must be placed in the first, uncompressed, initrd under the
+"kernel/firmware/acpi" path. Multiple files can be used and this will translate
+in loading multiple tables. Only SSDT and OEM tables are allowed. See
+initrd_table_override.txt for more details.
+
+Here is an example::
+
+ # Add the raw ACPI tables to an uncompressed cpio archive.
+ # They must be put into a /kernel/firmware/acpi directory inside the
+ # cpio archive.
+ # The uncompressed cpio archive must be the first.
+ # Other, typically compressed cpio archives, must be
+ # concatenated on top of the uncompressed one.
+ mkdir -p kernel/firmware/acpi
+ cp ssdt.aml kernel/firmware/acpi
+
+ # Create the uncompressed cpio archive and concatenate the original initrd
+ # on top:
+ find kernel | cpio -H newc --create > /boot/instrumented_initrd
+ cat /boot/initrd >>/boot/instrumented_initrd
+
+Loading ACPI SSDTs from EFI variables
+=====================================
+
+This is the preferred method, when EFI is supported on the platform, because it
+allows a persistent, OS independent way of storing the user defined SSDTs. There
+is also work underway to implement EFI support for loading user defined SSDTs
+and using this method will make it easier to convert to the EFI loading
+mechanism when that will arrive. To enable it, the
+CONFIG_EFI_CUSTOM_SSDT_OVERLAYS should be chosen to y.
+
+In order to load SSDTs from an EFI variable the ``"efivar_ssdt=..."`` kernel
+command line parameter can be used (the name has a limitation of 16 characters).
+The argument for the option is the variable name to use. If there are multiple
+variables with the same name but with different vendor GUIDs, all of them will
+be loaded.
+
+In order to store the AML code in an EFI variable the efivarfs filesystem can be
+used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
+recent distribution.
+
+Creating a new file in /sys/firmware/efi/efivars will automatically create a new
+EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
+variable. Please note that the file name needs to be specially formatted as
+"Name-GUID" and that the first 4 bytes in the file (little-endian format)
+represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
+include/linux/efi.h). Writing to the file must also be done with one write
+operation.
+
+For example, you can use the following bash script to create/update an EFI
+variable with the content from a given file::
+
+ #!/bin/sh -e
+
+ while [ -n "$1" ]; do
+ case "$1" in
+ "-f") filename="$2"; shift;;
+ "-g") guid="$2"; shift;;
+ *) name="$1";;
+ esac
+ shift
+ done
+
+ usage()
+ {
+ echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
+ exit 1
+ }
+
+ [ -n "$name" -a -f "$filename" ] || usage
+
+ EFIVARFS="/sys/firmware/efi/efivars"
+
+ [ -d "$EFIVARFS" ] || exit 2
+
+ if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
+ mount -t efivarfs none $EFIVARFS
+ fi
+
+ # try to pick up an existing GUID
+ [ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
+
+ # use a randomly generated GUID
+ [ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
+
+ # efivarfs expects all of the data in one write
+ tmp=$(mktemp)
+ /bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
+ dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
+ rm $tmp
+
+Loading ACPI SSDTs from configfs
+================================
+
+This option allows loading of user defined SSDTs from user space via the configfs
+interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
+mounted. In the following examples, we assume that configfs has been mounted in
+/sys/kernel/config.
+
+New tables can be loading by creating new directories in /sys/kernel/config/acpi/table
+and writing the SSDT AML code in the aml attribute::
+
+ cd /sys/kernel/config/acpi/table
+ mkdir my_ssdt
+ cat ~/ssdt.aml > my_ssdt/aml
diff --git a/Documentation/admin-guide/aoe/aoe.rst b/Documentation/admin-guide/aoe/aoe.rst
new file mode 100644
index 000000000000..a05e751363a0
--- /dev/null
+++ b/Documentation/admin-guide/aoe/aoe.rst
@@ -0,0 +1,150 @@
+Introduction
+============
+
+ATA over Ethernet is a network protocol that provides simple access to
+block storage on the LAN.
+
+ http://support.coraid.com/documents/AoEr11.txt
+
+The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
+
+ http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
+
+It has many tips and hints! Please see, especially, recommended
+tunings for virtual memory:
+
+ http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
+
+The aoetools are userland programs that are designed to work with this
+driver. The aoetools are on sourceforge.
+
+ http://aoetools.sourceforge.net/
+
+The scripts in this Documentation/admin-guide/aoe directory are intended to
+document the use of the driver and are not necessary if you install
+the aoetools.
+
+
+Creating Device Nodes
+=====================
+
+ Users of udev should find the block device nodes created
+ automatically, but to create all the necessary device nodes, use the
+ udev configuration rules provided in udev.txt (in this directory).
+
+ There is a udev-install.sh script that shows how to install these
+ rules on your system.
+
+ There is also an autoload script that shows how to edit
+ /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
+ necessary. Preloading the aoe module is preferable to autoloading,
+ however, because AoE discovery takes a few seconds. It can be
+ confusing when an AoE device is not present the first time the a
+ command is run but appears a second later.
+
+Using Device Nodes
+==================
+
+ "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
+ like any retransmitted packets.
+
+ "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
+ limit ATA over Ethernet traffic to eth2 and eth4. AoE traffic from
+ untrusted networks should be ignored as a matter of security. See
+ also the aoe_iflist driver option described below.
+
+ "echo > /dev/etherd/discover" tells the driver to find out what AoE
+ devices are available.
+
+ In the future these character devices may disappear and be replaced
+ by sysfs counterparts. Using the commands in aoetools insulates
+ users from these implementation details.
+
+ The block devices are named like this::
+
+ e{shelf}.{slot}
+ e{shelf}.{slot}p{part}
+
+ ... so that "e0.2" is the third blade from the left (slot 2) in the
+ first shelf (shelf address zero). That's the whole disk. The first
+ partition on that disk would be "e0.2p1".
+
+Using sysfs
+===========
+
+ Each aoe block device in /sys/block has the extra attributes of
+ state, mac, and netif. The state attribute is "up" when the device
+ is ready for I/O and "down" if detected but unusable. The
+ "down,closewait" state shows that the device is still open and
+ cannot come up again until it has been closed.
+
+ The mac attribute is the ethernet address of the remote AoE device.
+ The netif attribute is the network interface on the localhost
+ through which we are communicating with the remote AoE device.
+
+ There is a script in this directory that formats this information in
+ a convenient way. Users with aoetools should use the aoe-stat
+ command::
+
+ root@makki root# sh Documentation/admin-guide/aoe/status.sh
+ e10.0 eth3 up
+ e10.1 eth3 up
+ e10.2 eth3 up
+ e10.3 eth3 up
+ e10.4 eth3 up
+ e10.5 eth3 up
+ e10.6 eth3 up
+ e10.7 eth3 up
+ e10.8 eth3 up
+ e10.9 eth3 up
+ e4.0 eth1 up
+ e4.1 eth1 up
+ e4.2 eth1 up
+ e4.3 eth1 up
+ e4.4 eth1 up
+ e4.5 eth1 up
+ e4.6 eth1 up
+ e4.7 eth1 up
+ e4.8 eth1 up
+ e4.9 eth1 up
+
+ Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
+ option discussed below) instead of /dev/etherd/interfaces to limit
+ AoE traffic to the network interfaces in the given
+ whitespace-separated list. Unlike the old character device, the
+ sysfs entry can be read from as well as written to.
+
+ It's helpful to trigger discovery after setting the list of allowed
+ interfaces. The aoetools package provides an aoe-discover script
+ for this purpose. You can also directly use the
+ /dev/etherd/discover special file described above.
+
+Driver Options
+==============
+
+ There is a boot option for the built-in aoe driver and a
+ corresponding module parameter, aoe_iflist. Without this option,
+ all network interfaces may be used for ATA over Ethernet. Here is a
+ usage example for the module parameter::
+
+ modprobe aoe_iflist="eth1 eth3"
+
+ The aoe_deadsecs module parameter determines the maximum number of
+ seconds that the driver will wait for an AoE device to provide a
+ response to an AoE command. After aoe_deadsecs seconds have
+ elapsed, the AoE device will be marked as "down". A value of zero
+ is supported for testing purposes and makes the aoe driver keep
+ trying AoE commands forever.
+
+ The aoe_maxout module parameter has a default of 128. This is the
+ maximum number of unresponded packets that will be sent to an AoE
+ target at one time.
+
+ The aoe_dyndevs module parameter defaults to 1, meaning that the
+ driver will assign a block device minor number to a discovered AoE
+ target based on the order of its discovery. With dynamic minor
+ device numbers in use, a greater range of AoE shelf and slot
+ addresses can be supported. Users with udev will never have to
+ think about minor numbers. Using aoe_dyndevs=0 allows device nodes
+ to be pre-created using a static minor-number scheme with the
+ aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/autoload.sh b/Documentation/admin-guide/aoe/autoload.sh
index 815dff4691c9..815dff4691c9 100644
--- a/Documentation/aoe/autoload.sh
+++ b/Documentation/admin-guide/aoe/autoload.sh
diff --git a/Documentation/admin-guide/aoe/examples.rst b/Documentation/admin-guide/aoe/examples.rst
new file mode 100644
index 000000000000..91f3198e52c1
--- /dev/null
+++ b/Documentation/admin-guide/aoe/examples.rst
@@ -0,0 +1,23 @@
+Example of udev rules
+---------------------
+
+ .. include:: udev.txt
+ :literal:
+
+Example of udev install rules script
+------------------------------------
+
+ .. literalinclude:: udev-install.sh
+ :language: shell
+
+Example script to get status
+----------------------------
+
+ .. literalinclude:: status.sh
+ :language: shell
+
+Example of AoE autoload script
+------------------------------
+
+ .. literalinclude:: autoload.sh
+ :language: shell
diff --git a/Documentation/admin-guide/aoe/index.rst b/Documentation/admin-guide/aoe/index.rst
new file mode 100644
index 000000000000..d71c5df15922
--- /dev/null
+++ b/Documentation/admin-guide/aoe/index.rst
@@ -0,0 +1,17 @@
+=======================
+ATA over Ethernet (AoE)
+=======================
+
+.. toctree::
+ :maxdepth: 1
+
+ aoe
+ todo
+ examples
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/aoe/status.sh b/Documentation/admin-guide/aoe/status.sh
index eeec7baae57a..eeec7baae57a 100644
--- a/Documentation/aoe/status.sh
+++ b/Documentation/admin-guide/aoe/status.sh
diff --git a/Documentation/admin-guide/aoe/todo.rst b/Documentation/admin-guide/aoe/todo.rst
new file mode 100644
index 000000000000..dea8db5a33e1
--- /dev/null
+++ b/Documentation/admin-guide/aoe/todo.rst
@@ -0,0 +1,17 @@
+TODO
+====
+
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage. If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation. This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructor member of the struct sk_buff is available to the aoe
+driver. By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/admin-guide/aoe/udev-install.sh
index 15e86f58c036..15e86f58c036 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/admin-guide/aoe/udev-install.sh
diff --git a/Documentation/aoe/udev.txt b/Documentation/admin-guide/aoe/udev.txt
index 1f06daf03f5b..d55ecb411c21 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/admin-guide/aoe/udev.txt
@@ -2,7 +2,7 @@
# They may be installed along the following lines. Check the section
# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
-#
+#
# ecashin@makki ~$ su
# Password:
# bash# find /etc -type f -name udev.conf
@@ -11,9 +11,9 @@
# udev_rules="/etc/udev/rules.d/"
# bash# ls /etc/udev/rules.d/
# 10-wacom.rules 50-udev.rules
-# bash# cp /path/to/linux-2.6.xx/Documentation/aoe/udev.txt \
+# bash# cp /path/to/linux/Documentation/admin-guide/aoe/udev.txt \
# /etc/udev/rules.d/60-aoe.rules
-#
+#
# aoe char devices
SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
@@ -22,5 +22,5 @@ SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="02
SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220"
-# aoe block devices
+# aoe block devices
KERNEL=="etherd*", GROUP="disk"
diff --git a/Documentation/admin-guide/auxdisplay/cfag12864b.rst b/Documentation/admin-guide/auxdisplay/cfag12864b.rst
new file mode 100644
index 000000000000..da385d851acc
--- /dev/null
+++ b/Documentation/admin-guide/auxdisplay/cfag12864b.rst
@@ -0,0 +1,98 @@
+===================================
+cfag12864b LCD Driver Documentation
+===================================
+
+:License: GPLv2
+:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
+:Date: 2006-10-27
+
+
+
+.. INDEX
+
+ 1. DRIVER INFORMATION
+ 2. DEVICE INFORMATION
+ 3. WIRING
+ 4. USERSPACE PROGRAMMING
+
+1. Driver Information
+---------------------
+
+This driver supports a cfag12864b LCD.
+
+
+2. Device Information
+---------------------
+
+:Manufacturer: Crystalfontz
+:Device Name: Crystalfontz 12864b LCD Series
+:Device Code: cfag12864b
+:Webpage: http://www.crystalfontz.com
+:Device Webpage: http://www.crystalfontz.com/products/12864b/
+:Type: LCD (Liquid Crystal Display)
+:Width: 128
+:Height: 64
+:Colors: 2 (B/N)
+:Controller: ks0108
+:Controllers: 2
+:Pages: 8 each controller
+:Addresses: 64 each page
+:Data size: 1 byte each address
+:Memory size: 2 * 8 * 64 * 1 = 1024 bytes = 1 Kbyte
+
+
+3. Wiring
+---------
+
+The cfag12864b LCD Series don't have official wiring.
+
+The common wiring is done to the parallel port as shown::
+
+ Parallel Port cfag12864b
+
+ Name Pin# Pin# Name
+
+ Strobe ( 1)------------------------------(17) Enable
+ Data 0 ( 2)------------------------------( 4) Data 0
+ Data 1 ( 3)------------------------------( 5) Data 1
+ Data 2 ( 4)------------------------------( 6) Data 2
+ Data 3 ( 5)------------------------------( 7) Data 3
+ Data 4 ( 6)------------------------------( 8) Data 4
+ Data 5 ( 7)------------------------------( 9) Data 5
+ Data 6 ( 8)------------------------------(10) Data 6
+ Data 7 ( 9)------------------------------(11) Data 7
+ (10) [+5v]---( 1) Vdd
+ (11) [GND]---( 2) Ground
+ (12) [+5v]---(14) Reset
+ (13) [GND]---(15) Read / Write
+ Line (14)------------------------------(13) Controller Select 1
+ (15)
+ Init (16)------------------------------(12) Controller Select 2
+ Select (17)------------------------------(16) Data / Instruction
+ Ground (18)---[GND] [+5v]---(19) LED +
+ Ground (19)---[GND]
+ Ground (20)---[GND] E A Values:
+ Ground (21)---[GND] [GND]---[P1]---(18) Vee - R = Resistor = 22 ohm
+ Ground (22)---[GND] | - P1 = Preset = 10 Kohm
+ Ground (23)---[GND] ---- S ------( 3) V0 - P2 = Preset = 1 Kohm
+ Ground (24)---[GND] | |
+ Ground (25)---[GND] [GND]---[P2]---[R]---(20) LED -
+
+
+4. Userspace Programming
+------------------------
+
+The cfag12864bfb describes a framebuffer device (/dev/fbX).
+
+It has a size of 1024 bytes = 1 Kbyte.
+Each bit represents one pixel. If the bit is high, the pixel will
+turn on. If the pixel is low, the pixel will turn off.
+
+You can use the framebuffer as a file: fopen, fwrite, fclose...
+Although the LCD won't get updated until the next refresh time arrives.
+
+Also, you can mmap the framebuffer: open & mmap, munmap & close...
+which is the best option for most uses.
+
+Check samples/auxdisplay/cfag12864b-example.c
+for a real working userspace complete program with usage examples.
diff --git a/Documentation/admin-guide/auxdisplay/index.rst b/Documentation/admin-guide/auxdisplay/index.rst
new file mode 100644
index 000000000000..e466f0595248
--- /dev/null
+++ b/Documentation/admin-guide/auxdisplay/index.rst
@@ -0,0 +1,16 @@
+=========================
+Auxiliary Display Support
+=========================
+
+.. toctree::
+ :maxdepth: 1
+
+ ks0108.rst
+ cfag12864b.rst
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/auxdisplay/ks0108.rst b/Documentation/admin-guide/auxdisplay/ks0108.rst
new file mode 100644
index 000000000000..a7d3fe509373
--- /dev/null
+++ b/Documentation/admin-guide/auxdisplay/ks0108.rst
@@ -0,0 +1,50 @@
+==========================================
+ks0108 LCD Controller Driver Documentation
+==========================================
+
+:License: GPLv2
+:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
+:Date: 2006-10-27
+
+
+
+.. INDEX
+
+ 1. DRIVER INFORMATION
+ 2. DEVICE INFORMATION
+ 3. WIRING
+
+
+1. Driver Information
+---------------------
+
+This driver supports the ks0108 LCD controller.
+
+
+2. Device Information
+---------------------
+
+:Manufacturer: Samsung
+:Device Name: KS0108 LCD Controller
+:Device Code: ks0108
+:Webpage: -
+:Device Webpage: -
+:Type: LCD Controller (Liquid Crystal Display Controller)
+:Width: 64
+:Height: 64
+:Colors: 2 (B/N)
+:Pages: 8
+:Addresses: 64 each page
+:Data size: 1 byte each address
+:Memory size: 8 * 64 * 1 = 512 bytes
+
+
+3. Wiring
+---------
+
+The driver supports data parallel port wiring.
+
+If you aren't building LCD related hardware, you should check
+your LCD specific wiring information in the same folder.
+
+For example, check Documentation/admin-guide/auxdisplay/cfag12864b.rst
diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
index c0ce64d75bbf..6fdb495ac466 100644
--- a/Documentation/admin-guide/bcache.rst
+++ b/Documentation/admin-guide/bcache.rst
@@ -5,11 +5,14 @@ A block layer cache (bcache)
Say you've got a big slow raid 6, and an ssd or three. Wouldn't it be
nice if you could use them as cache... Hence bcache.
-Wiki and git repositories are at:
+The bcache wiki can be found at:
+ https://bcache.evilpiepirate.org
- - http://bcache.evilpiepirate.org
- - http://evilpiepirate.org/git/linux-bcache.git
- - http://evilpiepirate.org/git/bcache-tools.git
+This is the git repository of bcache-tools:
+ https://git.kernel.org/pub/scm/linux/kernel/git/colyli/bcache-tools.git/
+
+The latest bcache kernel code can be found from mainline Linux kernel:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/
It's designed around the performance characteristics of SSDs - it only allocates
in erase block sized buckets, and it uses a hybrid btree/log to track cached
@@ -41,17 +44,21 @@ in the cache it first disables writeback caching and waits for all dirty data
to be flushed.
Getting started:
-You'll need make-bcache from the bcache-tools repository. Both the cache device
+You'll need bcache util from the bcache-tools repository. Both the cache device
and backing device must be formatted before use::
- make-bcache -B /dev/sdb
- make-bcache -C /dev/sdc
+ bcache make -B /dev/sdb
+ bcache make -C /dev/sdc
-make-bcache has the ability to format multiple devices at the same time - if
+`bcache make` has the ability to format multiple devices at the same time - if
you format your backing devices and cache device at the same time, you won't
have to manually attach::
- make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
+ bcache make -B /dev/sda /dev/sdb -C /dev/sdc
+
+If your bcache-tools is not updated to latest version and does not have the
+unified `bcache` utility, you may use the legacy `make-bcache` utility to format
+bcache device with same -B and -C parameters.
bcache-tools now ships udev rules, and bcache devices are known to the kernel
immediately. Without udev, you can manually register devices like this::
@@ -188,7 +195,7 @@ D) Recovering data without bcache:
If bcache is not available in the kernel, a filesystem on the backing
device is still available at an 8KiB offset. So either via a loopdev
of the backing device created with --offset 8K, or any value defined by
---data-offset when you originally formatted bcache with `make-bcache`.
+--data-offset when you originally formatted bcache with `bcache make`.
For example::
@@ -197,7 +204,7 @@ For example::
This should present your unmodified backing device data in /dev/loop0
If your cache is in writethrough mode, then you can safely discard the
-cache device without loosing data.
+cache device without losing data.
E) Wiping a cache device
@@ -210,7 +217,7 @@ E) Wiping a cache device
After you boot back with bcache enabled, you recreate the cache and attach it::
- host:~# make-bcache -C /dev/sdh2
+ host:~# bcache make -C /dev/sdh2
UUID: 7be7e175-8f4c-4f99-94b2-9c904d227045
Set UUID: 5bc072a8-ab17-446d-9744-e247949913c1
version: 0
@@ -318,7 +325,7 @@ want for getting the best possible numbers when benchmarking.
The default metadata size in bcache is 8k. If your backing device is
RAID based, then be sure to align this by a multiple of your stride
- width using `make-bcache --data-offset`. If you intend to expand your
+ width using `bcache make --data-offset`. If you intend to expand your
disk array in the future, then multiply a series of primes by your
raid stripe size to get the disk multiples that you would like.
@@ -501,9 +508,6 @@ cache_miss_collisions
cache miss, but raced with a write and data was already present (usually 0
since the synchronization for cache misses was rewritten)
-cache_readaheads
- Count of times readahead occurred.
-
Sysfs - cache set
~~~~~~~~~~~~~~~~~
diff --git a/Documentation/admin-guide/binderfs.rst b/Documentation/admin-guide/binderfs.rst
new file mode 100644
index 000000000000..41a4db00df8d
--- /dev/null
+++ b/Documentation/admin-guide/binderfs.rst
@@ -0,0 +1,87 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Android binderfs Filesystem
+===============================
+
+Android binderfs is a filesystem for the Android binder IPC mechanism. It
+allows to dynamically add and remove binder devices at runtime. Binder devices
+located in a new binderfs instance are independent of binder devices located in
+other binderfs instances. Mounting a new binderfs instance makes it possible
+to get a set of private binder devices.
+
+Mounting binderfs
+-----------------
+
+Android binderfs can be mounted with::
+
+ mkdir /dev/binderfs
+ mount -t binder binder /dev/binderfs
+
+at which point a new instance of binderfs will show up at ``/dev/binderfs``.
+In a fresh instance of binderfs no binder devices will be present. There will
+only be a ``binder-control`` device which serves as the request handler for
+binderfs. Mounting another binderfs instance at a different location will
+create a new and separate instance from all other binderfs mounts. This is
+identical to the behavior of e.g. ``devpts`` and ``tmpfs``. The Android
+binderfs filesystem can be mounted in user namespaces.
+
+Options
+-------
+max
+ binderfs instances can be mounted with a limit on the number of binder
+ devices that can be allocated. The ``max=<count>`` mount option serves as
+ a per-instance limit. If ``max=<count>`` is set then only ``<count>`` number
+ of binder devices can be allocated in this binderfs instance.
+
+stats
+ Using ``stats=global`` enables global binder statistics.
+ ``stats=global`` is only available for a binderfs instance mounted in the
+ initial user namespace. An attempt to use the option to mount a binderfs
+ instance in another user namespace will return a permission error.
+
+Allocating binder Devices
+-------------------------
+
+.. _ioctl: http://man7.org/linux/man-pages/man2/ioctl.2.html
+
+To allocate a new binder device in a binderfs instance a request needs to be
+sent through the ``binder-control`` device node. A request is sent in the form
+of an `ioctl() <ioctl_>`_.
+
+What a program needs to do is to open the ``binder-control`` device node and
+send a ``BINDER_CTL_ADD`` request to the kernel. Users of binderfs need to
+tell the kernel which name the new binder device should get. By default a name
+can only contain up to ``BINDERFS_MAX_NAME`` chars including the terminating
+zero byte.
+
+Once the request is made via an `ioctl() <ioctl_>`_ passing a ``struct
+binder_device`` with the name to the kernel it will allocate a new binder
+device and return the major and minor number of the new device in the struct
+(This is necessary because binderfs allocates a major device number
+dynamically.). After the `ioctl() <ioctl_>`_ returns there will be a new
+binder device located under /dev/binderfs with the chosen name.
+
+Deleting binder Devices
+-----------------------
+
+.. _unlink: http://man7.org/linux/man-pages/man2/unlink.2.html
+.. _rm: http://man7.org/linux/man-pages/man1/rm.1.html
+
+Binderfs binder devices can be deleted via `unlink() <unlink_>`_. This means
+that the `rm() <rm_>`_ tool can be used to delete them. Note that the
+``binder-control`` device cannot be deleted since this would make the binderfs
+instance unusable. The ``binder-control`` device will be deleted when the
+binderfs instance is unmounted and all references to it have been dropped.
+
+Binder features
+---------------
+
+Assuming an instance of binderfs has been mounted at ``/dev/binderfs``, the
+features supported by the binder driver can be located under
+``/dev/binderfs/features/``. The presence of individual files can be tested
+to determine whether a particular feature is supported by the driver.
+
+Example::
+
+ cat /dev/binderfs/features/oneway_spam_detection
+ 1
diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst
index 97b0d7927078..59cd902e3549 100644
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -1,5 +1,5 @@
-Kernel Support for miscellaneous (your favourite) Binary Formats v1.1
-=====================================================================
+Kernel Support for miscellaneous Binary Formats (binfmt_misc)
+=============================================================
This Kernel feature allows you to invoke almost (for restrictions see below)
every program by simply typing its name in the shell.
@@ -23,7 +23,7 @@ Here is what the fields mean:
- ``name``
is an identifier string. A new /proc file will be created with this
- ``name below /proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for
+ name below ``/proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for
obvious reasons.
- ``type``
is the type of recognition. Give ``M`` for magic and ``E`` for extension.
@@ -83,7 +83,7 @@ Here is what the fields mean:
``F`` - fix binary
The usual behaviour of binfmt_misc is to spawn the
binary lazily when the misc format file is invoked. However,
- this doesn``t work very well in the face of mount namespaces and
+ this doesn't work very well in the face of mount namespaces and
changeroots, so the ``F`` mode opens the binary as soon as the
emulation is installed and uses the opened image to spawn the
emulator, meaning it is always available once installed,
@@ -140,8 +140,8 @@ Hints
-----
If you want to pass special arguments to your interpreter, you can
-write a wrapper script for it. See Documentation/admin-guide/java.rst for an
-example.
+write a wrapper script for it.
+See :doc:`Documentation/admin-guide/java.rst <./java>` for an example.
Your interpreter should NOT look in the PATH for the filename; the kernel
passes it the full filename (or the file descriptor) to use. Using ``$PATH`` can
diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
index f87cfa0dc2fb..f87cfa0dc2fb 100644
--- a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg
diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
index 48a1e2165fec..48a1e2165fec 100644
--- a/Documentation/blockdev/drbd/DRBD-data-packets.svg
+++ b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg
diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
index 025e8cf5e64a..025e8cf5e64a 100644
--- a/Documentation/blockdev/drbd/conn-states-8.dot
+++ b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot
diff --git a/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
new file mode 100644
index 000000000000..66036b901644
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst
@@ -0,0 +1,42 @@
+================================
+kernel data structure for DRBD-9
+================================
+
+This describes the in kernel data structure for DRBD-9. Starting with
+Linux v3.14 we are reorganizing DRBD to use this data structure.
+
+Basic Data Structure
+====================
+
+A node has a number of DRBD resources. Each such resource has a number of
+devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
+device is represented by a block device locally.
+
+The DRBD objects are interconnected to form a matrix as depicted below; a
+drbd_peer_device object sits at each intersection between a drbd_device and a
+drbd_connection::
+
+ /--------------+---------------+.....+---------------\
+ | resource | device | | device |
+ +--------------+---------------+.....+---------------+
+ | connection | peer_device | | peer_device |
+ +--------------+---------------+.....+---------------+
+ : : : : :
+ : : : : :
+ +--------------+---------------+.....+---------------+
+ | connection | peer_device | | peer_device |
+ \--------------+---------------+.....+---------------/
+
+In this table, horizontally, devices can be accessed from resources by their
+volume number. Likewise, peer_devices can be accessed from connections by
+their volume number. Objects in the vertical direction are connected by double
+linked lists. There are back pointers from peer_devices to their connections a
+devices, and from connections and devices to their resource.
+
+All resources are in the drbd_resources double-linked list. In addition, all
+devices can be accessed by their minor device number via the drbd_devices idr.
+
+The drbd_resource, drbd_connection, and drbd_device objects are reference
+counted. The peer_device objects only serve to establish the links between
+devices and connections; their lifetime is determined by the lifetime of the
+device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
index d06cfb46fb98..d06cfb46fb98 100644
--- a/Documentation/blockdev/drbd/disk-states-8.dot
+++ b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot
diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
index 6d9cf0a7b11d..6d9cf0a7b11d 100644
--- a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
+++ b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot
diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst
new file mode 100644
index 000000000000..9f73253ea353
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/figures.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. The here included files are intended to help understand the implementation
+
+Data flows that Relate some functions, and write packets
+========================================================
+
+.. kernel-figure:: DRBD-8.3-data-packets.svg
+ :alt: DRBD-8.3-data-packets.svg
+ :align: center
+
+.. kernel-figure:: DRBD-data-packets.svg
+ :alt: DRBD-data-packets.svg
+ :align: center
+
+
+Sub graphs of DRBD's state transitions
+======================================
+
+.. kernel-figure:: conn-states-8.dot
+ :alt: conn-states-8.dot
+ :align: center
+
+.. kernel-figure:: disk-states-8.dot
+ :alt: disk-states-8.dot
+ :align: center
+
+.. kernel-figure:: peer-states-8.dot
+ :alt: peer-states-8.dot
+ :align: center
diff --git a/Documentation/admin-guide/blockdev/drbd/index.rst b/Documentation/admin-guide/blockdev/drbd/index.rst
new file mode 100644
index 000000000000..561fd1e35917
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/index.rst
@@ -0,0 +1,19 @@
+==========================================
+Distributed Replicated Block Device - DRBD
+==========================================
+
+Description
+===========
+
+ DRBD is a shared-nothing, synchronously replicated block device. It
+ is designed to serve as a building block for high availability
+ clusters and in this context, is a "drop-in" replacement for shared
+ storage. Simplistically, you could see it as a network RAID 1.
+
+ Please visit https://www.drbd.org to find out more.
+
+.. toctree::
+ :maxdepth: 1
+
+ data-structure-v9
+ figures
diff --git a/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot
new file mode 100644
index 000000000000..6dc3954954d6
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot
@@ -0,0 +1,8 @@
+digraph peer_states {
+ Secondary -> Primary [ label = "recv state packet" ]
+ Primary -> Secondary [ label = "recv state packet" ]
+ Primary -> Unknown [ label = "connection lost" ]
+ Secondary -> Unknown [ label = "connection lost" ]
+ Unknown -> Primary [ label = "connected" ]
+ Unknown -> Secondary [ label = "connected" ]
+}
diff --git a/Documentation/admin-guide/blockdev/floppy.rst b/Documentation/admin-guide/blockdev/floppy.rst
new file mode 100644
index 000000000000..0328438ebe2c
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/floppy.rst
@@ -0,0 +1,255 @@
+=============
+Floppy Driver
+=============
+
+FAQ list:
+=========
+
+A FAQ list may be found in the fdutils package (see below), and also
+at <https://fdutils.linux.lu/faq.html>.
+
+
+LILO configuration options (Thinkpad users, read this)
+======================================================
+
+The floppy driver is configured using the 'floppy=' option in
+lilo. This option can be typed at the boot prompt, or entered in the
+lilo configuration file.
+
+Example: If your kernel is called linux-2.6.9, type the following line
+at the lilo boot prompt (if you have a thinkpad)::
+
+ linux-2.6.9 floppy=thinkpad
+
+You may also enter the following line in /etc/lilo.conf, in the description
+of linux-2.6.9::
+
+ append = "floppy=thinkpad"
+
+Several floppy related options may be given, example::
+
+ linux-2.6.9 floppy=daring floppy=two_fdc
+ append = "floppy=daring floppy=two_fdc"
+
+If you give options both in the lilo config file and on the boot
+prompt, the option strings of both places are concatenated, the boot
+prompt options coming last. That's why there are also options to
+restore the default behavior.
+
+
+Module configuration options
+============================
+
+If you use the floppy driver as a module, use the following syntax::
+
+ modprobe floppy floppy="<options>"
+
+Example::
+
+ modprobe floppy floppy="omnibook messages"
+
+If you need certain options enabled every time you load the floppy driver,
+you can put::
+
+ options floppy floppy="omnibook messages"
+
+in a configuration file in /etc/modprobe.d/.
+
+
+The floppy driver related options are:
+
+ floppy=asus_pci
+ Sets the bit mask to allow only units 0 and 1. (default)
+
+ floppy=daring
+ Tells the floppy driver that you have a well behaved floppy controller.
+ This allows more efficient and smoother operation, but may fail on
+ certain controllers. This may speed up certain operations.
+
+ floppy=0,daring
+ Tells the floppy driver that your floppy controller should be used
+ with caution.
+
+ floppy=one_fdc
+ Tells the floppy driver that you have only one floppy controller.
+ (default)
+
+ floppy=two_fdc / floppy=<address>,two_fdc
+ Tells the floppy driver that you have two floppy controllers.
+ The second floppy controller is assumed to be at <address>.
+ This option is not needed if the second controller is at address
+ 0x370, and if you use the 'cmos' option.
+
+ floppy=thinkpad
+ Tells the floppy driver that you have a Thinkpad. Thinkpads use an
+ inverted convention for the disk change line.
+
+ floppy=0,thinkpad
+ Tells the floppy driver that you don't have a Thinkpad.
+
+ floppy=omnibook / floppy=nodma
+ Tells the floppy driver not to use Dma for data transfers.
+ This is needed on HP Omnibooks, which don't have a workable
+ DMA channel for the floppy driver. This option is also useful
+ if you frequently get "Unable to allocate DMA memory" messages.
+ Indeed, dma memory needs to be continuous in physical memory,
+ and is thus harder to find, whereas non-dma buffers may be
+ allocated in virtual memory. However, I advise against this if
+ you have an FDC without a FIFO (8272A or 82072). 82072A and
+ later are OK. You also need at least a 486 to use nodma.
+ If you use nodma mode, I suggest you also set the FIFO
+ threshold to 10 or lower, in order to limit the number of data
+ transfer interrupts.
+
+ If you have a FIFO-able FDC, the floppy driver automatically
+ falls back on non DMA mode if no DMA-able memory can be found.
+ If you want to avoid this, explicitly ask for 'yesdma'.
+
+ floppy=yesdma
+ Tells the floppy driver that a workable DMA channel is available.
+ (default)
+
+ floppy=nofifo
+ Disables the FIFO entirely. This is needed if you get "Bus
+ master arbitration error" messages from your Ethernet card (or
+ from other devices) while accessing the floppy.
+
+ floppy=usefifo
+ Enables the FIFO. (default)
+
+ floppy=<threshold>,fifo_depth
+ Sets the FIFO threshold. This is mostly relevant in DMA
+ mode. If this is higher, the floppy driver tolerates more
+ interrupt latency, but it triggers more interrupts (i.e. it
+ imposes more load on the rest of the system). If this is
+ lower, the interrupt latency should be lower too (faster
+ processor). The benefit of a lower threshold is less
+ interrupts.
+
+ To tune the fifo threshold, switch on over/underrun messages
+ using 'floppycontrol --messages'. Then access a floppy
+ disk. If you get a huge amount of "Over/Underrun - retrying"
+ messages, then the fifo threshold is too low. Try with a
+ higher value, until you only get an occasional Over/Underrun.
+ It is a good idea to compile the floppy driver as a module
+ when doing this tuning. Indeed, it allows to try different
+ fifo values without rebooting the machine for each test. Note
+ that you need to do 'floppycontrol --messages' every time you
+ re-insert the module.
+
+ Usually, tuning the fifo threshold should not be needed, as
+ the default (0xa) is reasonable.
+
+ floppy=<drive>,<type>,cmos
+ Sets the CMOS type of <drive> to <type>. This is mandatory if
+ you have more than two floppy drives (only two can be
+ described in the physical CMOS), or if your BIOS uses
+ non-standard CMOS types. The CMOS types are:
+
+ == ==================================
+ 0 Use the value of the physical CMOS
+ 1 5 1/4 DD
+ 2 5 1/4 HD
+ 3 3 1/2 DD
+ 4 3 1/2 HD
+ 5 3 1/2 ED
+ 6 3 1/2 ED
+ 16 unknown or not installed
+ == ==================================
+
+ (Note: there are two valid types for ED drives. This is because 5 was
+ initially chosen to represent floppy *tapes*, and 6 for ED drives.
+ AMI ignored this, and used 5 for ED drives. That's why the floppy
+ driver handles both.)
+
+ floppy=unexpected_interrupts
+ Print a warning message when an unexpected interrupt is received.
+ (default)
+
+ floppy=no_unexpected_interrupts / floppy=L40SX
+ Don't print a message when an unexpected interrupt is received. This
+ is needed on IBM L40SX laptops in certain video modes. (There seems
+ to be an interaction between video and floppy. The unexpected
+ interrupts affect only performance, and can be safely ignored.)
+
+ floppy=broken_dcl
+ Don't use the disk change line, but assume that the disk was
+ changed whenever the device node is reopened. Needed on some
+ boxes where the disk change line is broken or unsupported.
+ This should be regarded as a stopgap measure, indeed it makes
+ floppy operation less efficient due to unneeded cache
+ flushings, and slightly more unreliable. Please verify your
+ cable, connection and jumper settings if you have any DCL
+ problems. However, some older drives, and also some laptops
+ are known not to have a DCL.
+
+ floppy=debug
+ Print debugging messages.
+
+ floppy=messages
+ Print informational messages for some operations (disk change
+ notifications, warnings about over and underruns, and about
+ autodetection).
+
+ floppy=silent_dcl_clear
+ Uses a less noisy way to clear the disk change line (which
+ doesn't involve seeks). Implied by 'daring' option.
+
+ floppy=<nr>,irq
+ Sets the floppy IRQ to <nr> instead of 6.
+
+ floppy=<nr>,dma
+ Sets the floppy DMA channel to <nr> instead of 2.
+
+ floppy=slow
+ Use PS/2 stepping rate::
+
+ PS/2 floppies have much slower step rates than regular floppies.
+ It's been recommended that take about 1/4 of the default speed
+ in some more extreme cases.
+
+
+Supporting utilities and additional documentation:
+==================================================
+
+Additional parameters of the floppy driver can be configured at
+runtime. Utilities which do this can be found in the fdutils package.
+This package also contains a new version of mtools which allows to
+access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
+It also contains additional documentation about the floppy driver.
+
+The latest version can be found at fdutils homepage:
+
+ https://fdutils.linux.lu
+
+The fdutils releases can be found at:
+
+ https://fdutils.linux.lu/download.html
+
+ http://www.tux.org/pub/knaff/fdutils/
+
+ ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
+
+Reporting problems about the floppy driver
+==========================================
+
+If you have a question or a bug report about the floppy driver, mail
+me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
+comp.os.linux.hardware. As the volume in these groups is rather high,
+be sure to include the word "floppy" (or "FLOPPY") in the subject
+line. If the reported problem happens when mounting floppy disks, be
+sure to mention also the type of the filesystem in the subject line.
+
+Be sure to read the FAQ before mailing/posting any bug reports!
+
+Alain
+
+Changelog
+=========
+
+10-30-2004 :
+ Cleanup, updating, add reference to module configuration.
+ James Nelson <james4765@gmail.com>
+
+6-3-2000 :
+ Original Document
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
new file mode 100644
index 000000000000..3262397ebe8f
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Block Devices
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ floppy
+ nbd
+ paride
+ ramdisk
+ zoned_loop
+ zram
+
+ drbd/index
diff --git a/Documentation/admin-guide/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst
new file mode 100644
index 000000000000..faf2ac4b1509
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/nbd.rst
@@ -0,0 +1,31 @@
+==================================
+Network Block Device (TCP version)
+==================================
+
+1) Overview
+-----------
+
+What is it: With this compiled in the kernel (or as a module), Linux
+can use a remote server as one of its block devices. So every time
+the client computer wants to read, e.g., /dev/nb0, it sends a
+request over TCP to the server, which will reply with the data read.
+This can be used for stations with low disk space (or even diskless)
+to borrow disk space from another computer.
+Unlike NFS, it is possible to put any filesystem on it, etc.
+
+For more information, or to download the nbd-client and nbd-server
+tools, go to https://github.com/NetworkBlockDevice/nbd.
+
+The nbd kernel module need only be installed on the client
+system, as the nbd-server is completely in userspace. In fact,
+the nbd-server has been successfully ported to other operating
+systems, including Windows.
+
+A) NBD parameters
+-----------------
+
+max_part
+ Number of partitions per device (default: 0).
+
+nbds_max
+ Number of block devices that should be initialized (default: 16).
diff --git a/Documentation/admin-guide/blockdev/paride.rst b/Documentation/admin-guide/blockdev/paride.rst
new file mode 100644
index 000000000000..b2f627d4c2f8
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/paride.rst
@@ -0,0 +1,207 @@
+===================================
+Linux and parallel port IDE devices
+===================================
+
+PARIDE v1.03 (c) 1997-8 Grant Guenther <grant@torque.net>
+PATA_PARPORT (c) 2023 Ondrej Zary
+
+1. Introduction
+===============
+
+Owing to the simplicity and near universality of the parallel port interface
+to personal computers, many external devices such as portable hard-disk,
+CD-ROM, LS-120 and tape drives use the parallel port to connect to their
+host computer. While some devices (notably scanners) use ad-hoc methods
+to pass commands and data through the parallel port interface, most
+external devices are actually identical to an internal model, but with
+a parallel-port adapter chip added in. Some of the original parallel port
+adapters were little more than mechanisms for multiplexing a SCSI bus.
+(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
+approach). Most current designs, however, take a different approach.
+The adapter chip reproduces a small ISA or IDE bus in the external device
+and the communication protocol provides operations for reading and writing
+device registers, as well as data block transfer functions. Sometimes,
+the device being addressed via the parallel cable is a standard SCSI
+controller like an NCR 5380. The "ditto" family of external tape
+drives use the ISA replicator to interface a floppy disk controller,
+which is then connected to a floppy-tape mechanism. The vast majority
+of external parallel port devices, however, are now based on standard
+IDE type devices, which require no intermediate controller. If one
+were to open up a parallel port CD-ROM drive, for instance, one would
+find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
+that interconnected a standard PC parallel port cable and a standard
+IDE cable. It is usually possible to exchange the CD-ROM device with
+any other device using the IDE interface.
+
+The document describes the support in Linux for parallel port IDE
+devices. It does not cover parallel port SCSI devices, "ditto" tape
+drives or scanners. Many different devices are supported by the
+parallel port IDE subsystem, including:
+
+ - MicroSolutions backpack CD-ROM
+ - MicroSolutions backpack PD/CD
+ - MicroSolutions backpack hard-drives
+ - MicroSolutions backpack 8000t tape drive
+ - SyQuest EZ-135, EZ-230 & SparQ drives
+ - Avatar Shark
+ - Imation Superdisk LS-120
+ - Maxell Superdisk LS-120
+ - FreeCom Power CD
+ - Hewlett-Packard 5GB and 8GB tape drives
+ - Hewlett-Packard 7100 and 7200 CD-RW drives
+
+as well as most of the clone and no-name products on the market.
+
+To support such a wide range of devices, pata_parport is actually structured
+in two parts. There is a base pata_parport module which provides an interface
+to kernel libata subsystem, registry and some common methods for accessing
+the parallel ports.
+
+The second component is a set of low-level protocol drivers for each of the
+parallel port IDE adapter chips. Thanks to the interest and encouragement of
+Linux users from many parts of the world, support is available for almost all
+known adapter protocols:
+
+ ==== ====================================== ====
+ aten ATEN EH-100 (HK)
+ bpck Microsolutions backpack (US)
+ comm DataStor (old-type) "commuter" adapter (TW)
+ dstr DataStor EP-2000 (TW)
+ epat Shuttle EPAT (UK)
+ epia Shuttle EPIA (UK)
+ fit2 FIT TD-2000 (US)
+ fit3 FIT TD-3000 (US)
+ friq Freecom IQ cable (DE)
+ frpw Freecom Power (DE)
+ kbic KingByte KBIC-951A and KBIC-971A (TW)
+ ktti KT Technology PHd adapter (SG)
+ on20 OnSpec 90c20 (US)
+ on26 OnSpec 90c26 (US)
+ ==== ====================================== ====
+
+
+2. Using pata_parport subsystem
+===============================
+
+While configuring the Linux kernel, you may choose either to build
+the pata_parport drivers into your kernel, or to build them as modules.
+
+In either case, you will need to select "Parallel port IDE device support"
+and at least one of the parallel port communication protocols.
+If you do not know what kind of parallel port adapter is used in your drive,
+you could begin by checking the file names and any text files on your DOS
+installation floppy. Alternatively, you can look at the markings on
+the adapter chip itself. That's usually sufficient to identify the
+correct device.
+
+You can actually select all the protocol modules, and allow the pata_parport
+subsystem to try them all for you.
+
+For the "brand-name" products listed above, here are the protocol
+and high-level drivers that you would use:
+
+ ================ ============ ========
+ Manufacturer Model Protocol
+ ================ ============ ========
+ MicroSolutions CD-ROM bpck
+ MicroSolutions PD drive bpck
+ MicroSolutions hard-drive bpck
+ MicroSolutions 8000t tape bpck
+ SyQuest EZ, SparQ epat
+ Imation Superdisk epat
+ Maxell Superdisk friq
+ Avatar Shark epat
+ FreeCom CD-ROM frpw
+ Hewlett-Packard 5GB Tape epat
+ Hewlett-Packard 7200e (CD) epat
+ Hewlett-Packard 7200e (CD-R) epat
+ ================ ============ ========
+
+All parports and all protocol drivers are probed automatically unless probe=0
+parameter is used. So just "modprobe epat" is enough for an Imation SuperDisk
+drive to work.
+
+Manual device creation::
+
+ # echo "port protocol mode unit delay" >/sys/bus/pata_parport/new_device
+
+where:
+
+ ======== ================================================
+ port parport name (or "auto" for all parports)
+ protocol protocol name (or "auto" for all protocols)
+ mode mode number (protocol-specific) or -1 for probe
+ unit unit number (for backpack only, see below)
+ delay I/O delay (see troubleshooting section below)
+ ======== ================================================
+
+If you happen to be using a MicroSolutions backpack device, you will
+also need to know the unit ID number for each drive. This is usually
+the last two digits of the drive's serial number (but read MicroSolutions'
+documentation about this).
+
+If you omit the parameters from the end, defaults will be used, e.g.:
+
+Probe all parports with all protocols::
+
+ # echo auto >/sys/bus/pata_parport/new_device
+
+Probe parport0 using protocol epat and mode 4 (EPP-16)::
+
+ # echo "parport0 epat 4" >/sys/bus/pata_parport/new_device
+
+Probe parport0 using all protocols::
+
+ # echo "parport0 auto" >/sys/bus/pata_parport/new_device
+
+Probe all parports using protoocol epat::
+
+ # echo "auto epat" >/sys/bus/pata_parport/new_device
+
+Deleting devices::
+
+ # echo pata_parport.0 >/sys/bus/pata_parport/delete_device
+
+
+3. Troubleshooting
+==================
+
+3.1 Use EPP mode if you can
+----------------------------
+
+The most common problems that people report with the pata_parport drivers
+concern the parallel port CMOS settings. At this time, none of the
+protocol modules support ECP mode, or any ECP combination modes.
+If you are able to do so, please set your parallel port into EPP mode
+using your CMOS setup procedure.
+
+3.2 Check the port delay
+-------------------------
+
+Some parallel ports cannot reliably transfer data at full speed. To
+offset the errors, the protocol modules introduce a "port
+delay" between each access to the i/o ports. Each protocol sets
+a default value for this delay. In most cases, the user can override
+the default and set it to 0 - resulting in somewhat higher transfer
+rates. In some rare cases (especially with older 486 systems) the
+default delays are not long enough. if you experience corrupt data
+transfers, or unexpected failures, you may wish to increase the
+port delay.
+
+3.3 Some drives need a printer reset
+-------------------------------------
+
+There appear to be a number of "noname" external drives on the market
+that do not always power up correctly. We have noticed this with some
+drives based on OnSpec and older Freecom adapters. In these rare cases,
+the adapter can often be reinitialised by issuing a "printer reset" on
+the parallel port. As the reset operation is potentially disruptive in
+multiple device environments, the pata_parport drivers will not do it
+automatically. You can however, force a printer reset by doing::
+
+ insmod lp reset=1
+ rmmod lp
+
+If you have one of these marginal cases, you should probably build
+your pata_parport drivers as modules, and arrange to do the printer reset
+before loading the pata_parport drivers.
diff --git a/Documentation/admin-guide/blockdev/ramdisk.rst b/Documentation/admin-guide/blockdev/ramdisk.rst
new file mode 100644
index 000000000000..9ce6101e8dd9
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/ramdisk.rst
@@ -0,0 +1,153 @@
+==========================================
+Using the RAM disk block device with Linux
+==========================================
+
+.. Contents:
+
+ 1) Overview
+ 2) Kernel Command Line Parameters
+ 3) Using "rdev"
+ 4) An Example of Creating a Compressed RAM Disk
+
+
+1) Overview
+-----------
+
+The RAM disk driver is a way to use main system memory as a block device. It
+is required for initrd, an initial filesystem used if you need to load modules
+in order to access the root filesystem (see Documentation/admin-guide/initrd.rst). It can
+also be used for a temporary filesystem for crypto work, since the contents
+are erased on reboot.
+
+The RAM disk dynamically grows as more space is required. It does this by using
+RAM from the buffer cache. The driver marks the buffers it is using as dirty
+so that the VM subsystem does not try to reclaim them later.
+
+The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
+to support an unlimited number of RAM disks (at your own risk). Just change
+the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
+and (re)build the kernel.
+
+To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
+directory. RAM disks are all major number 1, and start with minor number 0
+for /dev/ram0, etc. If used, modern kernels use /dev/ram0 for an initrd.
+
+The new RAM disk also has the ability to load compressed RAM disk images,
+allowing one to squeeze more programs onto an average installation or
+rescue floppy disk.
+
+
+2) Parameters
+---------------------------------
+
+2a) Kernel Command Line Parameters
+
+ ramdisk_size=N
+ Size of the ramdisk.
+
+This parameter tells the RAM disk driver to set up RAM disks of N k size. The
+default is 4096 (4 MB).
+
+2b) Module parameters
+
+ rd_nr
+ /dev/ramX devices created.
+
+ max_part
+ Maximum partition number.
+
+ rd_size
+ See ramdisk_size.
+
+3) Using "rdev"
+---------------
+
+"rdev" is an obsolete, deprecated, antiquated utility that could be used
+to set the boot device in a Linux kernel image.
+
+Instead of using rdev, just place the boot device information on the
+kernel command line and pass it to the kernel from the bootloader.
+
+You can also pass arguments to the kernel by setting FDARGS in
+arch/x86/boot/Makefile and specify in initrd image by setting FDINITRD in
+arch/x86/boot/Makefile.
+
+Some of the kernel command line boot options that may apply here are::
+
+ ramdisk_start=N
+ ramdisk_size=M
+
+If you make a boot disk that has LILO, then for the above, you would use::
+
+ append = "ramdisk_start=N ramdisk_size=M"
+
+4) An Example of Creating a Compressed RAM Disk
+-----------------------------------------------
+
+To create a RAM disk image, you will need a spare block device to
+construct it on. This can be the RAM disk device itself, or an
+unused disk partition (such as an unmounted swap partition). For this
+example, we will use the RAM disk device, "/dev/ram0".
+
+Note: This technique should not be done on a machine with less than 8 MB
+of RAM. If using a spare disk partition instead of /dev/ram0, then this
+restriction does not apply.
+
+a) Decide on the RAM disk size that you want. Say 2 MB for this example.
+ Create it by writing to the RAM disk device. (This step is not currently
+ required, but may be in the future.) It is wise to zero out the
+ area (esp. for disks) so that maximal compression is achieved for
+ the unused blocks of the image that you are about to create::
+
+ dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
+
+b) Make a filesystem on it. Say ext2fs for this example::
+
+ mke2fs -vm0 /dev/ram0 2048
+
+c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
+ and unmount it again.
+
+d) Compress the contents of the RAM disk. The level of compression
+ will be approximately 50% of the space used by the files. Unused
+ space on the RAM disk will compress to almost nothing::
+
+ dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
+
+e) Put the kernel onto the floppy::
+
+ dd if=zImage of=/dev/fd0 bs=1k
+
+f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
+ that is slightly larger than the kernel, so that you can put another
+ (possibly larger) kernel onto the same floppy later without overlapping
+ the RAM disk image. An offset of 400 kB for kernels about 350 kB in
+ size would be reasonable. Make sure offset+size of ram_image.gz is
+ not larger than the total space on your floppy (usually 1440 kB)::
+
+ dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
+
+g) Make sure that you have already specified the boot information in
+ FDARGS and FDINITRD or that you use a bootloader to pass kernel
+ command line boot options to the kernel.
+
+That is it. You now have your boot/root compressed RAM disk floppy. Some
+users may wish to combine steps (d) and (f) by using a pipe.
+
+
+ Paul Gortmaker 12/95
+
+Changelog:
+----------
+
+SEPT-2020 :
+
+ Removed usage of "rdev"
+
+10-22-04 :
+ Updated to reflect changes in command line options, remove
+ obsolete references, general cleanup.
+ James Nelson (james4765@gmail.com)
+
+12-95 :
+ Original Document
diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
new file mode 100644
index 000000000000..64dcfde7450a
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Zoned Loop Block Device
+=======================
+
+.. Contents:
+
+ 1) Overview
+ 2) Creating a Zoned Device
+ 3) Deleting a Zoned Device
+ 4) Example
+
+
+1) Overview
+-----------
+
+The zoned loop block device driver (zloop) allows a user to create a zoned block
+device using one regular file per zone as backing storage. This driver does not
+directly control any hardware and uses read, write and truncate operations to
+regular files of a file system to emulate a zoned block device.
+
+Using zloop, zoned block devices with a configurable capacity, zone size and
+number of conventional zones can be created. The storage for each zone of the
+device is implemented using a regular file with a maximum size equal to the zone
+size. The size of a file backing a conventional zone is always equal to the zone
+size. The size of a file backing a sequential zone indicates the amount of data
+sequentially written to the file, that is, the size of the file directly
+indicates the position of the write pointer of the zone.
+
+When resetting a sequential zone, its backing file size is truncated to zero.
+Conversely, for a zone finish operation, the backing file is truncated to the
+zone size. With this, the maximum capacity of a zloop zoned block device created
+can be larger configured to be larger than the storage space available on the
+backing file system. Of course, for such configuration, writing more data than
+the storage space available on the backing file system will result in write
+errors.
+
+The zoned loop block device driver implements a complete zone transition state
+machine. That is, zones can be empty, implicitly opened, explicitly opened,
+closed or full. The current implementation does not support any limits on the
+maximum number of open and active zones.
+
+No user tools are necessary to create and delete zloop devices.
+
+2) Creating a Zoned Device
+--------------------------
+
+Once the zloop module is loaded (or if zloop is compiled in the kernel), the
+character device file /dev/zloop-control can be used to add a zloop device.
+This is done by writing an "add" command directly to the /dev/zloop-control
+device::
+
+ $ modprobe zloop
+ $ ls -l /dev/zloop*
+ crw-------. 1 root root 10, 123 Jan 6 19:18 /dev/zloop-control
+
+ $ mkdir -p <base directory/<device ID>
+ $ echo "add [options]" > /dev/zloop-control
+
+The options available for the add command can be listed by reading the
+/dev/zloop-control device::
+
+ $ cat /dev/zloop-control
+ add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io
+ remove id=%d
+
+In more details, the options that can be used with the "add" command are as
+follows.
+
+================ ===========================================================
+id Device number (the X in /dev/zloopX).
+ Default: automatically assigned.
+capacity_mb Device total capacity in MiB. This is always rounded up to
+ the nearest higher multiple of the zone size.
+ Default: 16384 MiB (16 GiB).
+zone_size_mb Device zone size in MiB. Default: 256 MiB.
+zone_capacity_mb Device zone capacity (must always be equal to or lower than
+ the zone size. Default: zone size.
+conv_zones Total number of conventioanl zones starting from sector 0.
+ Default: 8.
+base_dir Path to the base directory where to create the directory
+ containing the zone files of the device.
+ Default=/var/local/zloop.
+ The device directory containing the zone files is always
+ named with the device ID. E.g. the default zone file
+ directory for /dev/zloop0 is /var/local/zloop/0.
+nr_queues Number of I/O queues of the zoned block device. This value is
+ always capped by the number of online CPUs
+ Default: 1
+queue_depth Maximum I/O queue depth per I/O queue.
+ Default: 64
+buffered_io Do buffered IOs instead of direct IOs (default: false)
+================ ===========================================================
+
+3) Deleting a Zoned Device
+--------------------------
+
+Deleting an unused zoned loop block device is done by issuing the "remove"
+command to /dev/zloop-control, specifying the ID of the device to remove::
+
+ $ echo "remove id=X" > /dev/zloop-control
+
+The remove command does not have any option.
+
+A zoned device that was removed can be re-added again without any change to the
+state of the device zones: the device zones are restored to their last state
+before the device was removed. Adding again a zoned device after it was removed
+must always be done using the same configuration as when the device was first
+added. If a zone configuration change is detected, an error will be returned and
+the zoned device will not be created.
+
+To fully delete a zoned device, after executing the remove operation, the device
+base directory containing the backing files of the device zones must be deleted.
+
+4) Example
+----------
+
+The following sequence of commands creates a 2GB zoned device with zones of 64
+MB and a zone capacity of 63 MB::
+
+ $ modprobe zloop
+ $ mkdir -p /var/local/zloop/0
+ $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control
+
+For the device created (/dev/zloop0), the zone backing files are all created
+under the default base directory (/var/local/zloop)::
+
+ $ ls -l /var/local/zloop/0
+ total 0
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000000
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000001
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000002
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000003
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000004
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000005
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000006
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000007
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000008
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000009
+ ...
+
+The zoned device created (/dev/zloop0) can then be used normally::
+
+ $ lsblk -z
+ NAME ZONED ZONE-SZ ZONE-NR ZONE-AMAX ZONE-OMAX ZONE-APP ZONE-WGRAN
+ zloop0 host-managed 64M 32 0 0 1M 4K
+ $ blkzone report /dev/zloop0
+ start: 0x000000000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000020000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000040000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000060000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000080000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000a0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000c0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000e0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000100000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ start: 0x000120000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ ...
+
+Deleting this device is done using the command::
+
+ $ echo "remove id=0" > /dev/zloop-control
+
+The removed device can be re-added again using the same "add" command as when
+the device was first created. To fully delete a zoned device, its backing files
+should also be deleted after executing the remove command::
+
+ $ rm -r /var/local/zloop/0
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
new file mode 100644
index 000000000000..3e273c1bb749
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -0,0 +1,569 @@
+========================================
+zram: Compressed RAM-based block devices
+========================================
+
+Introduction
+============
+
+The zram module creates RAM-based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the use cases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more. :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Usage
+=====
+
+There are several ways to configure and manage zram device(-s):
+
+a) using zram and zram_control sysfs attributes
+b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
+
+In this document we will describe only 'manual' zram configuration steps,
+IOW, zram and zram_control sysfs attributes.
+
+In order to get a better idea about zramctl please consult util-linux
+documentation, zramctl man-page or `zramctl --help`. Please be informed
+that zram maintainers do not develop/maintain util-linux or zramctl, should
+you have any questions please contact util-linux@vger.kernel.org
+
+Following shows a typical sequence of steps for using zram.
+
+WARNING
+=======
+
+For the sake of simplicity we skip error checking parts in most of the
+examples below. However, it is your sole responsibility to handle errors.
+
+zram sysfs attributes always return negative values in case of errors.
+The list of possible return codes:
+
+======== =============================================================
+-EBUSY an attempt to modify an attribute that cannot be changed once
+ the device has been initialised. Please reset device first.
+-ENOMEM zram was not able to allocate enough memory to fulfil your
+ needs.
+-EINVAL invalid input has been provided.
+-EAGAIN re-try operation later (e.g. when attempting to run recompress
+ and writeback simultaneously).
+======== =============================================================
+
+If you use 'echo', the returned value is set by the 'echo' utility,
+and, in general case, something like::
+
+ echo foo > /sys/block/zram0/comp_algorithm
+ if [ $? -ne 0 ]; then
+ handle_error
+ fi
+
+should suffice.
+
+1) Load Module
+==============
+
+::
+
+ modprobe zram num_devices=4
+
+This creates 4 devices: /dev/zram{0,1,2,3}
+
+num_devices parameter is optional and tells zram how many devices should be
+pre-created. Default: 1.
+
+2) Select compression algorithm
+===============================
+
+Using comp_algorithm device attribute one can see available and
+currently selected (shown in square brackets) compression algorithms,
+or change the selected compression algorithm (once the device is initialised
+there is no way to change compression algorithm).
+
+Examples::
+
+ #show supported compression algorithms
+ cat /sys/block/zram0/comp_algorithm
+ lzo [lz4]
+
+ #select lzo compression algorithm
+ echo lzo > /sys/block/zram0/comp_algorithm
+
+For the time being, the `comp_algorithm` content shows only compression
+algorithms that are supported by zram.
+
+3) Set compression algorithm parameters: Optional
+=================================================
+
+Compression algorithms may support specific parameters which can be
+tweaked for particular dataset. ZRAM has an `algorithm_params` device
+attribute which provides a per-algorithm params configuration.
+
+For example, several compression algorithms support `level` parameter.
+In addition, certain compression algorithms support pre-trained dictionaries,
+which significantly change algorithms' characteristics. In order to configure
+compression algorithm to use external pre-trained dictionary, pass full
+path to the `dict` along with other parameters::
+
+ #pass path to pre-trained zstd dictionary
+ echo "algo=zstd dict=/etc/dictionary" > /sys/block/zram0/algorithm_params
+
+ #same, but using algorithm priority
+ echo "priority=1 dict=/etc/dictionary" > \
+ /sys/block/zram0/algorithm_params
+
+ #pass path to pre-trained zstd dictionary and compression level
+ echo "algo=zstd level=8 dict=/etc/dictionary" > \
+ /sys/block/zram0/algorithm_params
+
+Parameters are algorithm specific: not all algorithms support pre-trained
+dictionaries, not all algorithms support `level`. Furthermore, for certain
+algorithms `level` controls the compression level (the higher the value the
+better the compression ratio, it even can take negatives values for some
+algorithms), for other algorithms `level` is acceleration level (the higher
+the value the lower the compression ratio).
+
+4) Set Disksize
+===============
+
+Set disk size by writing the value to sysfs node 'disksize'.
+The value can be either in bytes or you can use mem suffixes.
+Examples::
+
+ # Initialize /dev/zram0 with 50MB disksize
+ echo $((50*1024*1024)) > /sys/block/zram0/disksize
+
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/disksize
+ echo 512M > /sys/block/zram0/disksize
+ echo 1G > /sys/block/zram0/disksize
+
+Note:
+There is little point creating a zram of greater than twice the size of memory
+since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
+size of the disk when not in use so a huge zram is wasteful.
+
+5) Set memory limit: Optional
+=============================
+
+Set memory limit by writing the value to sysfs node 'mem_limit'.
+The value can be either in bytes or you can use mem suffixes.
+In addition, you could change the value in runtime.
+Examples::
+
+ # limit /dev/zram0 with 50MB memory
+ echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/mem_limit
+ echo 512M > /sys/block/zram0/mem_limit
+ echo 1G > /sys/block/zram0/mem_limit
+
+ # To disable memory limit
+ echo 0 > /sys/block/zram0/mem_limit
+
+6) Activate
+===========
+
+::
+
+ mkswap /dev/zram0
+ swapon /dev/zram0
+
+ mkfs.ext4 /dev/zram1
+ mount /dev/zram1 /tmp
+
+7) Add/remove zram devices
+==========================
+
+zram provides a control interface, which enables dynamic (on-demand) device
+addition and removal.
+
+In order to add a new /dev/zramX device, perform a read operation on the hot_add
+attribute. This will return either the new device's device id (meaning that you
+can use /dev/zram<id>) or an error code.
+
+Example::
+
+ cat /sys/class/zram-control/hot_add
+ 1
+
+To remove the existing /dev/zramX device (where X is a device id)
+execute::
+
+ echo X > /sys/class/zram-control/hot_remove
+
+8) Stats
+========
+
+Per-device statistics are exported as various nodes under /sys/block/zram<id>/
+
+A brief description of exported device attributes follows. For more details
+please read Documentation/ABI/testing/sysfs-block-zram.
+
+====================== ====== ===============================================
+Name access description
+====================== ====== ===============================================
+disksize RW show and set the device's disk size
+initstate RO shows the initialization state of the device
+reset WO trigger device reset
+mem_used_max WO reset the `mem_used_max` counter (see later)
+mem_limit WO specifies the maximum amount of memory ZRAM can
+ use to store the compressed data
+writeback_limit WO specifies the maximum amount of write IO zram
+ can write out to backing device as 4KB unit
+writeback_limit_enable RW show and set writeback_limit feature
+comp_algorithm RW show and change the compression algorithm
+algorithm_params WO setup compression algorithm parameters
+compact WO trigger memory compaction
+debug_stat RO this file is used for zram debugging purposes
+backing_dev RW set up backend storage for zram to write out
+idle WO mark allocated slot as idle
+====================== ====== ===============================================
+
+
+User space is advised to use the following files to read the device statistics.
+
+File /sys/block/zram<id>/stat
+
+Represents block layer statistics. Read Documentation/block/stat.rst for
+details.
+
+File /sys/block/zram<id>/io_stat
+
+The stat file represents device's I/O statistics not accounted by block
+layer and, thus, not available in zram<id>/stat file. It consists of a
+single line of text and contains the following stats separated by
+whitespace:
+
+ ============= =============================================================
+ failed_reads The number of failed reads
+ failed_writes The number of failed writes
+ invalid_io The number of non-page-size-aligned I/O requests
+ notify_free Depending on device usage scenario it may account
+
+ a) the number of pages freed because of swap slot free
+ notifications
+ b) the number of pages freed because of
+ REQ_OP_DISCARD requests sent by bio. The former ones are
+ sent to a swap block device when a swap slot is freed,
+ which implies that this disk is being used as a swap disk.
+
+ The latter ones are sent by filesystem mounted with
+ discard option, whenever some data blocks are getting
+ discarded.
+ ============= =============================================================
+
+File /sys/block/zram<id>/mm_stat
+
+The mm_stat file represents the device's mm statistics. It consists of a single
+line of text and contains the following stats separated by whitespace:
+
+ ================ =============================================================
+ orig_data_size uncompressed size of data stored in this disk.
+ Unit: bytes
+ compr_data_size compressed size of data stored in this disk
+ mem_used_total the amount of memory allocated for this disk. This
+ includes allocator fragmentation and metadata overhead,
+ allocated for this disk. So, allocator space efficiency
+ can be calculated using compr_data_size and this statistic.
+ Unit: bytes
+ mem_limit the maximum amount of memory ZRAM can use to store
+ the compressed data
+ mem_used_max the maximum amount of memory zram has consumed to
+ store the data
+ same_pages the number of same element filled pages written to this disk.
+ No memory is allocated for such pages.
+ pages_compacted the number of pages freed during compaction
+ huge_pages the number of incompressible pages
+ huge_pages_since the number of incompressible pages since zram set up
+ ================ =============================================================
+
+File /sys/block/zram<id>/bd_stat
+
+The bd_stat file represents a device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+
+ ============== =============================================================
+ bd_count size of data written in backing device.
+ Unit: 4K bytes
+ bd_reads the number of reads from backing device
+ Unit: 4K bytes
+ bd_writes the number of writes to backing device
+ Unit: 4K bytes
+ ============== =============================================================
+
+9) Deactivate
+==============
+
+::
+
+ swapoff /dev/zram0
+ umount /dev/zram1
+
+10) Reset
+=========
+
+ Write any positive value to 'reset' sysfs node::
+
+ echo 1 > /sys/block/zram0/reset
+ echo 1 > /sys/block/zram1/reset
+
+ This frees all the memory allocated for the given device and
+ resets the disksize to zero. You must set the disksize again
+ before reusing the device.
+
+Optional Feature
+================
+
+IDLE pages tracking
+-------------------
+
+zram has built-in support for idle pages tracking (that is, allocated but
+not used pages). This feature is useful for e.g. zram writeback and
+recompression. In order to mark pages as idle, execute the following command::
+
+ echo all > /sys/block/zramX/idle
+
+This will mark all allocated zram pages as idle. The idle mark will be
+removed only when the page (block) is accessed (e.g. overwritten or freed).
+Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled, pages can be
+marked as idle based on how many seconds have passed since the last access to
+a particular zram page::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example, all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.
+
+writeback
+---------
+
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
+to backing storage rather than keeping it in memory.
+To use the feature, admin should set up backing device via::
+
+ echo /dev/sda5 > /sys/block/zramX/backing_dev
+
+before disksize setting. It supports only partitions at this moment.
+If admin wants to use incompressible page writeback, they could do it via::
+
+ echo huge > /sys/block/zramX/writeback
+
+Admin can request writeback of idle pages at right timing via::
+
+ echo idle > /sys/block/zramX/writeback
+
+With the command, zram will writeback idle pages from memory to the storage.
+
+Additionally, if a user choose to writeback only huge and idle pages
+this can be accomplished with::
+
+ echo huge_idle > /sys/block/zramX/writeback
+
+If a user chooses to writeback only incompressible pages (pages that none of
+algorithms can compress) this can be accomplished with::
+
+ echo incompressible > /sys/block/zramX/writeback
+
+If an admin wants to write a specific page in zram device to the backing device,
+they could write a page index into the interface::
+
+ echo "page_index=1251" > /sys/block/zramX/writeback
+
+In Linux 6.16 this interface underwent some rework. First, the interface
+now supports `key=value` format for all of its parameters (`type=huge_idle`,
+etc.) Second, the support for `page_indexes` was introduced, which specify
+`LOW-HIGH` range (or ranges) of pages to be written-back. This reduces the
+number of syscalls, but more importantly this enables optimal post-processing
+target selection strategy. Usage example::
+
+ echo "type=idle" > /sys/block/zramX/writeback
+ echo "page_indexes=1-100 page_indexes=200-300" > \
+ /sys/block/zramX/writeback
+
+We also now permit multiple page_index params per call and a mix of
+single pages and page ranges::
+
+ echo page_index=42 page_index=99 page_indexes=100-200 \
+ page_indexes=500-700 > /sys/block/zramX/writeback
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin wants to apply writeback budget, they should
+enable writeback_limit_enable via::
+
+ $ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin sets the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaningless.)
+
+If admin wants to limit writeback as per-day 400M, they could do it
+like below::
+
+ $ MB_SHIFT=20
+ $ 4K_SHIFT=12
+ $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+ /sys/block/zram0/writeback_limit.
+ $ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admins want to allow further write again once the budget is exhausted,
+they could do it like below::
+
+ $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+ /sys/block/zram0/writeback_limit
+
+If an admin wants to see the remaining writeback budget since last set::
+
+ $ cat /sys/block/zramX/writeback_limit
+
+If an admin wants to disable writeback limit, they could do::
+
+ $ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram (e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin wants to measure writeback count in a certain period, they could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
+recompression
+-------------
+
+With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative
+(secondary) compression algorithms. The basic idea is that alternative
+compression algorithm can provide better compression ratio at a price of
+(potentially) slower compression/decompression speeds. Alternative compression
+algorithm can, for example, be more successful compressing huge pages (those
+that default algorithm failed to compress). Another application is idle pages
+recompression - pages that are cold and sit in the memory can be recompressed
+using more effective algorithm and, hence, reduce zsmalloc memory usage.
+
+With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms:
+one primary and up to 3 secondary ones. Primary zram compressor is explained
+in "3) Select compression algorithm", secondary algorithms are configured
+using recomp_algorithm device attribute.
+
+Example:::
+
+ #show supported recompression algorithms
+ cat /sys/block/zramX/recomp_algorithm
+ #1: lzo lzo-rle lz4 lz4hc [zstd]
+ #2: lzo lzo-rle lz4 [lz4hc] zstd
+
+Alternative compression algorithms are sorted by priority. In the example
+above, zstd is used as the first alternative algorithm, which has priority
+of 1, while lz4hc is configured as a compression algorithm with priority 2.
+Alternative compression algorithm's priority is provided during algorithms
+configuration:::
+
+ #select zstd recompression algorithm, priority 1
+ echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
+
+ #select deflate recompression algorithm, priority 2
+ echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
+
+Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress,
+which controls recompression.
+
+Examples:::
+
+ #IDLE pages recompression is activated by `idle` mode
+ echo "type=idle" > /sys/block/zramX/recompress
+
+ #HUGE pages recompression is activated by `huge` mode
+ echo "type=huge" > /sys/block/zram0/recompress
+
+ #HUGE_IDLE pages recompression is activated by `huge_idle` mode
+ echo "type=huge_idle" > /sys/block/zramX/recompress
+
+The number of idle pages can be significant, so user-space can pass a size
+threshold (in bytes) to the recompress knob: zram will recompress only pages
+of equal or greater size:::
+
+ #recompress all pages larger than 3000 bytes
+ echo "threshold=3000" > /sys/block/zramX/recompress
+
+ #recompress idle pages larger than 2000 bytes
+ echo "type=idle threshold=2000" > /sys/block/zramX/recompress
+
+It is also possible to limit the number of pages zram re-compression will
+attempt to recompress:::
+
+ echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress
+
+During re-compression for every page, that matches re-compression criteria,
+ZRAM iterates the list of registered alternative compression algorithms in
+order of their priorities. ZRAM stops either when re-compression was
+successful (re-compressed object is smaller in size than the original one)
+and matches re-compression criteria (e.g. size threshold) or when there are
+no secondary algorithms left to try. If none of the secondary algorithms can
+successfully re-compressed the page such a page is marked as incompressible,
+so ZRAM will not attempt to re-compress it in the future.
+
+This re-compression behaviour, when it iterates through the list of
+registered compression algorithms, increases our chances of finding the
+algorithm that successfully compresses a particular page. Sometimes, however,
+it is convenient (and sometimes even necessary) to limit recompression to
+only one particular algorithm so that it will not try any other algorithms.
+This can be achieved by providing a `algo` or `priority` parameter:::
+
+ #use zstd algorithm only (if registered)
+ echo "type=huge algo=zstd" > /sys/block/zramX/recompress
+
+ #use zstd algorithm only (if zstd was registered under priority 1)
+ echo "type=huge priority=1" > /sys/block/zramX/recompress
+
+memory tracking
+===============
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows::
+
+ 300 75.033841 .wh...
+ 301 63.806904 s.....
+ 302 63.806919 ..hi..
+ 303 62.801919 ....r.
+ 304 146.781902 ..hi.n
+
+First column
+ zram's block index.
+Second column
+ access time since the system was booted
+Third column
+ state of the block:
+
+ s:
+ same page
+ w:
+ written page to backing store
+ h:
+ huge page
+ i:
+ idle page
+ r:
+ recompressed page (secondary compression algorithm)
+ n:
+ none (including secondary) of algorithms could compress it
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
+
+Nitin Gupta
+ngupta@vflare.org
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
new file mode 100644
index 000000000000..7a86042c9b6d
--- /dev/null
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -0,0 +1,327 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _bootconfig:
+
+==================
+Boot Configuration
+==================
+
+:Author: Masami Hiramatsu <mhiramat@kernel.org>
+
+Overview
+========
+
+The boot configuration expands the current kernel command line to support
+additional key-value data when booting the kernel in an efficient way.
+This allows administrators to pass a structured-Key config file.
+
+Config File Syntax
+==================
+
+The boot config syntax is a simple structured key-value. Each key consists
+of dot-connected-words, and key and value are connected by ``=``. The value
+has to be terminated by semi-colon (``;``) or newline (``\n``).
+For array value, array entries are separated by comma (``,``). ::
+
+ KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
+
+Unlike the kernel command line syntax, spaces are OK around the comma and ``=``.
+
+Each key word must contain only alphabets, numbers, dash (``-``) or underscore
+(``_``). And each value only contains printable characters or spaces except
+for delimiters such as semi-colon (``;``), new-line (``\n``), comma (``,``),
+hash (``#``) and closing brace (``}``).
+
+If you want to use those delimiters in a value, you can use either double-
+quotes (``"VALUE"``) or single-quotes (``'VALUE'``) to quote it. Note that
+you can not escape these quotes.
+
+There can be a key which doesn't have value or has an empty value. Those keys
+are used for checking if the key exists or not (like a boolean).
+
+Key-Value Syntax
+----------------
+
+The boot config file syntax allows user to merge partially same word keys
+by brace. For example::
+
+ foo.bar.baz = value1
+ foo.bar.qux.quux = value2
+
+These can be written also in::
+
+ foo.bar {
+ baz = value1
+ qux.quux = value2
+ }
+
+Or more shorter, written as following::
+
+ foo.bar { baz = value1; qux.quux = value2 }
+
+In both styles, same key words are automatically merged when parsing it
+at boot time. So you can append similar trees or key-values.
+
+Same-key Values
+---------------
+
+It is prohibited that two or more values or arrays share a same-key.
+For example,::
+
+ foo = bar, baz
+ foo = qux # !ERROR! we can not re-define same key
+
+If you want to update the value, you must use the override operator
+``:=`` explicitly. For example::
+
+ foo = bar, baz
+ foo := qux
+
+then, the ``qux`` is assigned to ``foo`` key. This is useful for
+overriding the default value by adding (partial) custom bootconfigs
+without parsing the default bootconfig.
+
+If you want to append the value to existing key as an array member,
+you can use ``+=`` operator. For example::
+
+ foo = bar, baz
+ foo += qux
+
+In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``.
+
+Moreover, sub-keys and a value can coexist under a parent key.
+For example, following config is allowed.::
+
+ foo = value1
+ foo.bar = value2
+ foo := value3 # This will update foo's value.
+
+Note, since there is no syntax to put a raw value directly under a
+structured key, you have to define it outside of the brace. For example::
+
+ foo {
+ bar = value1
+ bar {
+ baz = value2
+ qux = value3
+ }
+ }
+
+Also, the order of the value node under a key is fixed. If there
+are a value and subkeys, the value is always the first child node
+of the key. Thus if user specifies subkeys first, e.g.::
+
+ foo.bar = value1
+ foo = value2
+
+In the program (and /proc/bootconfig), it will be shown as below::
+
+ foo = value2
+ foo.bar = value1
+
+Comments
+--------
+
+The config syntax accepts shell-script style comments. The comments starting
+with hash ("#") until newline ("\n") will be ignored.
+
+::
+
+ # comment line
+ foo = value # value is set to foo.
+ bar = 1, # 1st element
+ 2, # 2nd element
+ 3 # 3rd element
+
+This is parsed as below::
+
+ foo = value
+ bar = 1, 2, 3
+
+Note that you can not put a comment between value and delimiter(``,`` or
+``;``). This means following config has a syntax error ::
+
+ key = 1 # comment
+ ,2
+
+
+/proc/bootconfig
+================
+
+/proc/bootconfig is a user-space interface of the boot config.
+Unlike /proc/cmdline, this file shows the key-value style list.
+Each key-value pair is shown in each line with following style::
+
+ KEY[.WORDS...] = "[VALUE]"[,"VALUE2"...]
+
+
+Boot Kernel With a Boot Config
+==============================
+
+There are two options to boot the kernel with bootconfig: attaching the
+bootconfig to the initrd image or embedding it in the kernel itself.
+
+Attaching a Boot Config to Initrd
+---------------------------------
+
+Since the boot configuration file is loaded with initrd by default,
+it will be added to the end of the initrd (initramfs) image file with
+padding, size, checksum and 12-byte magic word as below.
+
+[initrd][bootconfig][padding][size(le32)][checksum(le32)][#BOOTCONFIG\n]
+
+The size and checksum fields are unsigned 32bit little endian value.
+
+When the boot configuration is added to the initrd image, the total
+file size is aligned to 4 bytes. To fill the gap, null characters
+(``\0``) will be added. Thus the ``size`` is the length of the bootconfig
+file + padding bytes.
+
+The Linux kernel decodes the last part of the initrd image in memory to
+get the boot configuration data.
+Because of this "piggyback" method, there is no need to change or
+update the boot loader and the kernel image itself as long as the boot
+loader passes the correct initrd file size. If by any chance, the boot
+loader passes a longer size, the kernel fails to find the bootconfig data.
+
+To do this operation, Linux kernel provides ``bootconfig`` command under
+tools/bootconfig, which allows admin to apply or delete the config file
+to/from initrd image. You can build it by the following command::
+
+ # make -C tools/bootconfig
+
+To add your boot config file to initrd image, run bootconfig as below
+(Old data is removed automatically if exists)::
+
+ # tools/bootconfig/bootconfig -a your-config /boot/initrd.img-X.Y.Z
+
+To remove the config from the image, you can use -d option as below::
+
+ # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z
+
+Then add "bootconfig" on the normal kernel command line to tell the
+kernel to look for the bootconfig at the end of the initrd file.
+Alternatively, build your kernel with the ``CONFIG_BOOT_CONFIG_FORCE``
+Kconfig option selected.
+
+Embedding a Boot Config into Kernel
+-----------------------------------
+
+If you can not use initrd, you can also embed the bootconfig file in the
+kernel by Kconfig options. In this case, you need to recompile the kernel
+with the following configs::
+
+ CONFIG_BOOT_CONFIG_EMBED=y
+ CONFIG_BOOT_CONFIG_EMBED_FILE="/PATH/TO/BOOTCONFIG/FILE"
+
+``CONFIG_BOOT_CONFIG_EMBED_FILE`` requires an absolute path or a relative
+path to the bootconfig file from source tree or object tree.
+The kernel will embed it as the default bootconfig.
+
+Just as when attaching the bootconfig to the initrd, you need ``bootconfig``
+option on the kernel command line to enable the embedded bootconfig, or,
+alternatively, build your kernel with the ``CONFIG_BOOT_CONFIG_FORCE``
+Kconfig option selected.
+
+Note that even if you set this option, you can override the embedded
+bootconfig by another bootconfig which attached to the initrd.
+
+Kernel parameters via Boot Config
+=================================
+
+In addition to the kernel command line, the boot config can be used for
+passing the kernel parameters. All the key-value pairs under ``kernel``
+key will be passed to kernel cmdline directly. Moreover, the key-value
+pairs under ``init`` will be passed to init process via the cmdline.
+The parameters are concatenated with user-given kernel cmdline string
+as the following order, so that the command line parameter can override
+bootconfig parameters (this depends on how the subsystem handles parameters
+but in general, earlier parameter will be overwritten by later one.)::
+
+ [bootconfig params][cmdline params] -- [bootconfig init params][cmdline init params]
+
+Here is an example of the bootconfig file for kernel/init parameters.::
+
+ kernel {
+ root = 01234567-89ab-cdef-0123-456789abcd
+ }
+ init {
+ splash
+ }
+
+This will be copied into the kernel cmdline string as the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" -- splash
+
+If user gives some other command line like,::
+
+ ro bootconfig -- quiet
+
+The final kernel cmdline will be the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" ro bootconfig -- splash quiet
+
+
+Config File Limitation
+======================
+
+Currently the maximum config size is 32KB and the total key-words (not
+key-value entries) must be under 1024 nodes.
+Note: this is not the number of entries but nodes, an entry must consume
+more than 2 nodes (a key-word and a value). So theoretically, it will be
+up to 512 key-value pairs. If keys contains 3 words in average, it can
+contain 256 key-value pairs. In most cases, the number of config items
+will be under 100 entries and smaller than 8KB, so it would be enough.
+If the node number exceeds 1024, parser returns an error even if the file
+size is smaller than 32KB. (Note that this maximum size is not including
+the padding null characters.)
+Anyway, since bootconfig command verifies it when appending a boot config
+to initrd image, user can notice it before boot.
+
+
+Bootconfig APIs
+===============
+
+User can query or loop on key-value pairs, also it is possible to find
+a root (prefix) key node and find key-values under that node.
+
+If you have a key string, you can query the value directly with the key
+using xbc_find_value(). If you want to know what keys exist in the boot
+config, you can use xbc_for_each_key_value() to iterate key-value pairs.
+Note that you need to use xbc_array_for_each_value() for accessing
+each array's value, e.g.::
+
+ vnode = NULL;
+ xbc_find_value("key.word", &vnode);
+ if (vnode && xbc_node_is_array(vnode))
+ xbc_array_for_each_value(vnode, value) {
+ printk("%s ", value);
+ }
+
+If you want to focus on keys which have a prefix string, you can use
+xbc_find_node() to find a node by the prefix string, and iterate
+keys under the prefix node with xbc_node_for_each_key_value().
+
+But the most typical usage is to get the named value under prefix
+or get the named array under prefix as below::
+
+ root = xbc_find_node("key.prefix");
+ value = xbc_node_find_value(root, "option", &vnode);
+ ...
+ xbc_node_for_each_array_value(root, "array-option", value, anode) {
+ ...
+ }
+
+This accesses a value of "key.prefix.option" and an array of
+"key.prefix.array-option".
+
+Locking is not needed, since after initialization, the config becomes
+read-only. All data and keys must be copied if you need to modify it.
+
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/bootconfig.h
+.. kernel-doc:: lib/bootconfig.c
+
diff --git a/Documentation/admin-guide/braille-console.rst b/Documentation/admin-guide/braille-console.rst
index 18e79337dcfd..153472e93cae 100644
--- a/Documentation/admin-guide/braille-console.rst
+++ b/Documentation/admin-guide/braille-console.rst
@@ -21,8 +21,8 @@ override the baud rate to 115200, etc.
By default, the braille device will just show the last kernel message (console
mode). To review previous messages, press the Insert key to switch to the VT
review mode. In review mode, the arrow keys permit to browse in the VT content,
-:kbd:`PAGE-UP`/:kbd:`PAGE-DOWN` keys go at the top/bottom of the screen, and
-the :kbd:`HOME` key goes back
+`PAGE-UP`/`PAGE-DOWN` keys go at the top/bottom of the screen, and
+the `HOME` key goes back
to the cursor, hence providing very basic screen reviewing facility.
Sound feedback can be obtained by adding the ``braille_console.sound=1`` kernel
diff --git a/Documentation/btmrvl.txt b/Documentation/admin-guide/btmrvl.rst
index ec57740ead0c..ec57740ead0c 100644
--- a/Documentation/btmrvl.txt
+++ b/Documentation/admin-guide/btmrvl.rst
diff --git a/Documentation/admin-guide/bug-bisect.rst b/Documentation/admin-guide/bug-bisect.rst
index 59567da344e8..f4f867cabb17 100644
--- a/Documentation/admin-guide/bug-bisect.rst
+++ b/Documentation/admin-guide/bug-bisect.rst
@@ -1,76 +1,165 @@
-Bisecting a bug
-+++++++++++++++
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
-Last updated: 28 October 2016
+======================
+Bisecting a regression
+======================
-Introduction
-============
+This document describes how to use a ``git bisect`` to find the source code
+change that broke something -- for example when some functionality stopped
+working after upgrading from Linux 6.0 to 6.1.
-Always try the latest kernel from kernel.org and build from source. If you are
-not confident in doing that please report the bug to your distribution vendor
-instead of to a kernel developer.
+The text focuses on the gist of the process. If you are new to bisecting the
+kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+instead: it depicts everything from start to finish while covering multiple
+aspects even kernel developers occasionally forget. This includes detecting
+situations early where a bisection would be a waste of time, as nobody would
+care about the result -- for example, because the problem happens after the
+kernel marked itself as 'tainted', occurs in an abandoned version, was already
+fixed, or is caused by a .config change you or your Linux distributor performed.
-Finding bugs is not always easy. Have a go though. If you can't find it don't
-give up. Report as much as you have found to the relevant maintainer. See
-MAINTAINERS for who that is for the subsystem you have worked on.
+Finding the change causing a kernel issue using a bisection
+===========================================================
-Before you submit a bug report read
-:ref:`Documentation/admin-guide/reporting-bugs.rst <reportingbugs>`.
+*Note: the following process assumes you prepared everything for a bisection.
+This includes having a Git clone with the appropriate sources, installing the
+software required to build and install kernels, as well as a .config file stored
+in a safe place (the following example assumes '~/prepared_kernel_.config') to
+use as pristine base at each bisection step; ideally, you have also worked out
+a fully reliable and straight-forward way to reproduce the regression, too.*
-Devices not appearing
-=====================
+* Preparation: start the bisection and tell Git about the points in the history
+ you consider to be working and broken, which Git calls 'good' and 'bad'::
-Often this is caused by udev/systemd. Check that first before blaming it
-on the kernel.
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1
-Finding patch that caused a bug
-===============================
+ Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
-Using the provided tools with ``git`` makes finding bugs easy provided the bug
-is reproducible.
+1. Copy your prepared .config into the build directory and adjust it to the
+ needs of the codebase Git checked out for testing::
-Steps to do it:
+ cp ~/prepared_kernel_.config .config
+ make olddefconfig
-- build the Kernel from its git source
-- start bisect with [#f1]_::
-
- $ git bisect start
-
-- mark the broken changeset with::
-
- $ git bisect bad [commit]
-
-- mark a changeset where the code is known to work with::
-
- $ git bisect good [commit]
-
-- rebuild the Kernel and test
-- interact with git bisect by using either::
-
- $ git bisect good
-
- or::
-
- $ git bisect bad
-
- depending if the bug happened on the changeset you're testing
-- After some interactions, git bisect will give you the changeset that
- likely caused the bug.
-
-- For example, if you know that the current version is bad, and version
- 4.8 is good, you could do::
-
- $ git bisect start
- $ git bisect bad # Current version is bad
- $ git bisect good v4.8
-
-
-.. [#f1] You can, optionally, provide both good and bad arguments at git
- start with ``git bisect start [BAD] [GOOD]``
-
-For further references, please read:
-
-- The man page for ``git-bisect``
-- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
-- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
-- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
+2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
+ for example, when a compile error happens at the current stage of the
+ bisection a later change resolves. In such cases run ``git bisect skip`` and
+ go back to step 1.
+
+3. Check if the functionality that regressed works in the kernel you just built.
+
+ If it works, execute::
+
+ git bisect good
+
+ If it is broken, run::
+
+ git bisect bad
+
+ Note, getting this wrong just once will send the rest of the bisection
+ totally off course. To prevent having to start anew later you thus want to
+ ensure what you tell Git is correct; it is thus often wise to spend a few
+ minutes more on testing in case your reproducer is unreliable.
+
+ After issuing one of these two commands, Git will usually check out another
+ bisection point and print something like 'Bisecting: 675 revisions left to
+ test after this (roughly 10 steps)'. In that case go back to step 1.
+
+ If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
+ is the first bad commit', then you have finished the bisection. In that case
+ move to the next point below. Note, right after displaying that line Git will
+ show some details about the culprit including its patch description; this can
+ easily fill your terminal, so you might need to scroll up to see the message
+ mentioning the culprit's commit-id.
+
+ In case you missed Git's output, you can always run ``git bisect log`` to
+ print the status: it will show how many steps remain or mention the result of
+ the bisection.
+
+* Recommended complementary task: put the bisection log and the current .config
+ file aside for the bug report; furthermore tell Git to reset the sources to
+ the state before the bisection::
+
+ git bisect log > ~/bisection-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+* Recommended optional task: try reverting the culprit on top of the latest
+ codebase and check if that fixes your bug; if that is the case, it validates
+ the bisection and enables developers to resolve the regression through a
+ revert.
+
+ To try this, update your clone and check out latest mainline. Then tell Git
+ to revert the change by specifying its commit-id::
+
+ git revert --no-edit cafec0cacaca0
+
+ Git might reject this, for example when the bisection landed on a merge
+ commit. In that case, abandon the attempt. Do the same, if Git fails to revert
+ the culprit on its own because later changes depend on it -- at least unless
+ you bisected a stable or longterm kernel series, in which case you want to
+ check out its latest codebase and try a revert there.
+
+ If a revert succeeds, build and test another kernel to check if reverting
+ resolved your regression.
+
+With that the process is complete. Now report the regression as described by
+Documentation/admin-guide/reporting-issues.rst.
+
+Bisecting linux-next
+--------------------
+
+If you face a problem only happening in linux-next, bisect between the
+linux-next branches 'stable' and 'master'. The following commands will start
+the process for a linux-next tree you added as a remote called 'next'::
+
+ git bisect start
+ git bisect good next/stable
+ git bisect bad next/master
+
+The 'stable' branch refers to the state of linux-mainline that the current
+linux-next release (found in the 'master' branch) is based on -- the former
+thus should be free of any problems that show up in -next, but not in Linus'
+tree.
+
+This will bisect across a wide range of changes, some of which you might have
+used in earlier linux-next releases without problems. Sadly there is no simple
+way to avoid checking them: bisecting from one linux-next release to a later
+one (say between 'next-20241020' and 'next-20241021') is impossible, as they
+share no common history.
+
+Additional reading material
+---------------------------
+
+* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
+ `fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
+ in the Git documentation.
+* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
+ from kernel developer Nathan Chancellor.
+* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
+* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
+
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index f278b289e260..7da0504388ec 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -49,15 +49,19 @@ the issue, it may also contain the word **Oops**, as on this one::
Despite being an **Oops** or some other sort of stack trace, the offended
line is usually required to identify and handle the bug. Along this chapter,
-we'll refer to "Oops" for all kinds of stack traces that need to be analized.
+we'll refer to "Oops" for all kinds of stack traces that need to be analyzed.
-.. note::
+If the kernel is compiled with ``CONFIG_DEBUG_INFO``, you can enhance the
+quality of the stack trace by using file:`scripts/decode_stacktrace.sh`.
+
+Modules linked in
+-----------------
+
+Modules that are tainted or are being loaded or unloaded are marked with
+"(...)", where the taint flags are described in
+file:`Documentation/admin-guide/tainted-kernels.rst`, "being loaded" is
+annotated with "+", and "being unloaded" is annotated with "-".
- ``ksymoops`` is useless on 2.6 or upper. Please use the Oops in its original
- format (from ``dmesg``, etc). Ignore any references in this or other docs to
- "decoding the Oops" or "running it through ksymoops".
- If you post an Oops from 2.6+ that has been run through ``ksymoops``,
- people will just tell you to repost it.
Where is the Oops message is located?
-------------------------------------
@@ -71,7 +75,7 @@ by running ``journalctl`` command.
Sometimes ``klogd`` dies, in which case you can run ``dmesg > file`` to
read the data from the kernel buffers and save it. Or you can
``cat /proc/kmsg > file``, however you have to break in to stop the transfer,
-``kmsg`` is a "never ending file".
+since ``kmsg`` is a "never ending file".
If the machine has crashed so badly that you cannot enter commands or
the disk is not available then you have three options:
@@ -81,18 +85,18 @@ the disk is not available then you have three options:
planned for a crash. Alternatively, you can take a picture of
the screen with a digital camera - not nice, but better than
nothing. If the messages scroll off the top of the console, you
- may find that booting with a higher resolution (eg, ``vga=791``)
+ may find that booting with a higher resolution (e.g., ``vga=791``)
will allow you to read more of the text. (Caveat: This needs ``vesafb``,
- so won't help for 'early' oopses)
+ so won't help for 'early' oopses.)
(2) Boot with a serial console (see
:ref:`Documentation/admin-guide/serial-console.rst <serial_console>`),
run a null modem to a second machine and capture the output there
using your favourite communication program. Minicom works well.
-(3) Use Kdump (see Documentation/kdump/kdump.txt),
+(3) Use Kdump (see Documentation/admin-guide/kdump/kdump.rst),
extract the kernel ring buffer from old memory with using dmesg
- gdbmacro in Documentation/kdump/gdbmacros.txt.
+ gdbmacro in Documentation/admin-guide/kdump/gdbmacros.txt.
Finding the bug's location
--------------------------
@@ -104,7 +108,7 @@ Kernel source file. There are two methods for doing that. Usually, using
gdb
^^^
-The GNU debug (``gdb``) is the best way to figure out the exact file and line
+The GNU debugger (``gdb``) is the best way to figure out the exact file and line
number of the OOPS from the ``vmlinux`` file.
The usage of gdb works best on a kernel compiled with ``CONFIG_DEBUG_INFO``.
@@ -165,7 +169,7 @@ If you have a call trace, such as::
[<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
...
-this shows the problem likely in the :jbd: module. You can load that module
+this shows the problem likely is in the :jbd: module. You can load that module
in gdb and list the relevant code::
$ gdb fs/jbd/jbd.ko
@@ -192,15 +196,16 @@ will see the assembler code for the routine shown, but if your kernel has
debug symbols the C code will also be available. (Debug symbols can be enabled
in the kernel hacking menu of the menu configuration.) For example::
- $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+ $ objdump -r -S -l --disassemble net/ipv4/tcp.o
.. note::
You need to be at the top level of the kernel tree for this to pick up
your C files.
-If you don't have access to the code you can also debug on some crash dumps
-e.g. crash dump output as shown by Dave Miller::
+If you don't have access to the source code you can still debug some crash
+dumps using the following method (example crash dump output as shown by
+Dave Miller)::
EIP is at +0x14/0x4c0
...
@@ -230,21 +235,24 @@ e.g. crash dump output as shown by Dave Miller::
mov 0x8(%ebp), %ebx ! %ebx = skb->sk
mov 0x13c(%ebx), %eax ! %eax = inet_sk(sk)->opt
+file:`scripts/decodecode` can be used to automate most of this, depending
+on what CPU architecture is being debugged.
+
Reporting the bug
-----------------
Once you find where the bug happened, by inspecting its location,
you could either try to fix it yourself or report it upstream.
-In order to report it upstream, you should identify the mailing list
-used for the development of the affected code. This can be done by using
-the ``get_maintainer.pl`` script.
+In order to report it upstream, you should identify the bug tracker, if any, or
+mailing list used for the development of the affected code. This can be done by
+using the ``get_maintainer.pl`` script.
For example, if you find a bug at the gspca's sonixj.c file, you can get
-their maintainers with::
+its maintainers with::
- $ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
- Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
+ $ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
+ Hans Verkuil <hverkuil@kernel.org> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
Bhaktipriya Shridhar <bhaktipriya96@gmail.com> (commit_signer:1/1=100%,authored:1/1=100%,added_lines:4/4=100%,removed_lines:9/9=100%)
@@ -253,16 +261,18 @@ their maintainers with::
Please notice that it will point to:
-- The last developers that touched on the source code. On the above example,
- Tejun and Bhaktipriya (in this specific case, none really envolved on the
- development of this file);
+- The last developers that touched the source code (if this is done inside
+ a git tree). On the above example, Tejun and Bhaktipriya (in this
+ specific case, none really involved on the development of this file);
- The driver maintainer (Hans Verkuil);
- The subsystem maintainer (Mauro Carvalho Chehab);
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
-- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
+- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
+- The bug reporting URIs for the driver/subsystem (none in the above example).
-Usually, the fastest way to have your bug fixed is to report it to mailing
-list used for the development of the code (linux-media ML) copying the driver maintainer (Hans).
+If the listing contains bug reporting URIs at the end, please prefer them over
+email. Otherwise, please report bugs to the mailing list used for the
+development of the code (linux-media ML) copying the driver maintainer (Hans).
If you are totally stumped as to whom to send the report, and
``get_maintainer.pl`` didn't provide you anything useful, send it to
@@ -303,9 +313,9 @@ protection fault message can be simply cut out of the message files
and forwarded to the kernel developers.
Two types of address resolution are performed by ``klogd``. The first is
-static translation and the second is dynamic translation. Static
-translation uses the System.map file in much the same manner that
-ksymoops does. In order to do static translation the ``klogd`` daemon
+static translation and the second is dynamic translation.
+Static translation uses the System.map file.
+In order to do static translation the ``klogd`` daemon
must be able to find a system map file at daemon initialization time.
See the klogd man page for information on how ``klogd`` searches for map
files.
@@ -358,12 +368,3 @@ processed by ``klogd``::
Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
----------------------------------------------------------------------------
-
-::
-
- Dr. G.W. Wettstein Oncology Research Div. Computing Facility
- Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
- 820 4th St. N.
- Fargo, ND 58122
- Phone: 701-234-7556
diff --git a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
new file mode 100644
index 000000000000..dabb80cdd25a
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
@@ -0,0 +1,301 @@
+===================
+Block IO Controller
+===================
+
+Overview
+========
+cgroup subsys "blkio" implements the block io controller. There seems to be
+a need of various kinds of IO control policies (like proportional BW, max BW)
+both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
+Plan is to use the same cgroup based management interface for blkio controller
+and based on user options switch IO policies in the background.
+
+One IO control policy is throttling policy which can be used to
+specify upper IO rate limits on devices. This policy is implemented in
+generic block layer and can be used on leaf nodes as well as higher
+level logical devices like device mapper.
+
+HOWTO
+=====
+
+Throttling/Upper Limit policy
+-----------------------------
+Enable Block IO controller::
+
+ CONFIG_BLK_CGROUP=y
+
+Enable throttling in block layer::
+
+ CONFIG_BLK_DEV_THROTTLING=y
+
+Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
+
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+Specify a bandwidth rate on particular device for root group. The format
+for policy is "<major>:<minor> <bytes_per_second>"::
+
+ echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
+
+This will put a limit of 1MB/second on reads happening for root group
+on device having major/minor number 8:16.
+
+Run dd to read a file and see if rate is throttled to 1MB/s or not::
+
+ # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
+ 1024+0 records in
+ 1024+0 records out
+ 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
+
+Limits for writes can be put using blkio.throttle.write_bps_device file.
+
+Hierarchical Cgroups
+====================
+
+Throttling implements hierarchy support; however,
+throttling's hierarchy support is enabled iff "sane_behavior" is
+enabled from cgroup side, which currently is a development option and
+not publicly available.
+
+If somebody created a hierarchy like as follows::
+
+ root
+ / \
+ test1 test2
+ |
+ test3
+
+Throttling with "sane_behavior" will handle the
+hierarchy correctly. For throttling, all limits apply
+to the whole subtree while all statistics are local to the IOs
+directly generated by tasks in that cgroup.
+
+Throttling without "sane_behavior" enabled from cgroup side will
+practically treat all groups at same level as if it looks like the
+following::
+
+ pivot
+ / / \ \
+ root test1 test2 test3
+
+Various user visible config options
+===================================
+
+ CONFIG_BLK_CGROUP
+ Block IO controller.
+
+ CONFIG_BFQ_CGROUP_DEBUG
+ Debug help. Right now some additional stats file show up in cgroup
+ if this option is enabled.
+
+ CONFIG_BLK_DEV_THROTTLING
+ Enable block device throttling support in block layer.
+
+Details of cgroup files
+=======================
+
+Proportional weight policy files
+--------------------------------
+
+ blkio.bfq.weight
+ Specifies per cgroup weight. This is default weight of the group
+ on all the devices until and unless overridden by per device rule
+ (see `blkio.bfq.weight_device` below).
+
+ Currently allowed range of weights is from 1 to 1000. For more details,
+ see Documentation/block/bfq-iosched.rst.
+
+ blkio.bfq.weight_device
+ Specifies per cgroup per device weights, overriding the default group
+ weight. For more details, see Documentation/block/bfq-iosched.rst.
+
+ Following is the format::
+
+ # echo dev_maj:dev_minor weight > blkio.bfq.weight_device
+
+ Configure weight=300 on /dev/sdb (8:16) in this cgroup::
+
+ # echo 8:16 300 > blkio.bfq.weight_device
+ # cat blkio.bfq.weight_device
+ dev weight
+ 8:16 300
+
+ Configure weight=500 on /dev/sda (8:0) in this cgroup::
+
+ # echo 8:0 500 > blkio.bfq.weight_device
+ # cat blkio.bfq.weight_device
+ dev weight
+ 8:0 500
+ 8:16 300
+
+ Remove specific weight for /dev/sda in this cgroup::
+
+ # echo 8:0 0 > blkio.bfq.weight_device
+ # cat blkio.bfq.weight_device
+ dev weight
+ 8:16 300
+
+ blkio.time
+ Disk time allocated to cgroup per device in milliseconds. First
+ two fields specify the major and minor number of the device and
+ third field specifies the disk time allocated to group in
+ milliseconds.
+
+ blkio.sectors
+ Number of sectors transferred to/from disk by the group. First
+ two fields specify the major and minor number of the device and
+ third field specifies the number of sectors transferred by the
+ group to/from the device.
+
+ blkio.io_service_bytes
+ Number of bytes transferred to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of bytes.
+
+ blkio.io_serviced
+ Number of IOs (bio) issued to the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of IOs.
+
+ blkio.io_service_time
+ Total amount of time between request dispatch and request completion
+ for the IOs done by this cgroup. This is in nanoseconds to make it
+ meaningful for flash devices too. For devices with queue depth of 1,
+ this time represents the actual service time. When queue_depth > 1,
+ that is no longer true as requests may be served out of order. This
+ may cause the service time for a given IO to include the service time
+ of multiple IOs when served out of order which may result in total
+ io_service_time > actual time elapsed. This time is further divided by
+ the type of operation - read or write, sync or async. First two fields
+ specify the major and minor number of the device, third field
+ specifies the operation type and the fourth field specifies the
+ io_service_time in ns.
+
+ blkio.io_wait_time
+ Total amount of time the IOs for this cgroup spent waiting in the
+ scheduler queues for service. This can be greater than the total time
+ elapsed since it is cumulative io_wait_time for all IOs. It is not a
+ measure of total time the cgroup spent waiting but rather a measure of
+ the wait_time for its individual IOs. For devices with queue_depth > 1
+ this metric does not include the time spent waiting for service once
+ the IO is dispatched to the device but till it actually gets serviced
+ (there might be a time lag here due to re-ordering of requests by the
+ device). This is in nanoseconds to make it meaningful for flash
+ devices too. This time is further divided by the type of operation -
+ read or write, sync or async. First two fields specify the major and
+ minor number of the device, third field specifies the operation type
+ and the fourth field specifies the io_wait_time in ns.
+
+ blkio.io_merged
+ Total number of bios/requests merged into requests belonging to this
+ cgroup. This is further divided by the type of operation - read or
+ write, sync or async.
+
+ blkio.io_queued
+ Total number of requests queued up at any given instant for this
+ cgroup. This is further divided by the type of operation - read or
+ write, sync or async.
+
+ blkio.avg_queue_size
+ Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+ The average queue size for this cgroup over the entire time of this
+ cgroup's existence. Queue size samples are taken each time one of the
+ queues of this cgroup gets a timeslice.
+
+ blkio.group_wait_time
+ Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+ This is the amount of time the cgroup had to wait since it became busy
+ (i.e., went from 0 to 1 request queued) to get a timeslice for one of
+ its queues. This is different from the io_wait_time which is the
+ cumulative total of the amount of time spent by each IO in that cgroup
+ waiting in the scheduler queue. This is in nanoseconds. If this is
+ read when the cgroup is in a waiting (for timeslice) state, the stat
+ will only report the group_wait_time accumulated till the last time it
+ got a timeslice and will not include the current delta.
+
+ blkio.empty_time
+ Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+ This is the amount of time a cgroup spends without any pending
+ requests when not being served, i.e., it does not include any time
+ spent idling for one of the queues of the cgroup. This is in
+ nanoseconds. If this is read when the cgroup is in an empty state,
+ the stat will only report the empty_time accumulated till the last
+ time it had a pending request and will not include the current delta.
+
+ blkio.idle_time
+ Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+ This is the amount of time spent by the IO scheduler idling for a
+ given cgroup in anticipation of a better request than the existing ones
+ from other queues/cgroups. This is in nanoseconds. If this is read
+ when the cgroup is in an idling state, the stat will only report the
+ idle_time accumulated till the last idle period and will not include
+ the current delta.
+
+ blkio.dequeue
+ Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
+ gives the statistics about how many a times a group was dequeued
+ from service tree of the device. First two fields specify the major
+ and minor number of the device and third field specifies the number
+ of times a group was dequeued from a particular device.
+
+ blkio.*_recursive
+ Recursive version of various stats. These files show the
+ same information as their non-recursive counterparts but
+ include stats from all the descendant cgroups.
+
+Throttling/Upper limit policy files
+-----------------------------------
+ blkio.throttle.read_bps_device
+ Specifies upper limit on READ rate from the device. IO rate is
+ specified in bytes per second. Rules are per device. Following is
+ the format::
+
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
+
+ blkio.throttle.write_bps_device
+ Specifies upper limit on WRITE rate to the device. IO rate is
+ specified in bytes per second. Rules are per device. Following is
+ the format::
+
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
+
+ blkio.throttle.read_iops_device
+ Specifies upper limit on READ rate from the device. IO rate is
+ specified in IO per second. Rules are per device. Following is
+ the format::
+
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
+
+ blkio.throttle.write_iops_device
+ Specifies upper limit on WRITE rate to the device. IO rate is
+ specified in io per second. Rules are per device. Following is
+ the format::
+
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
+
+ Note: If both BW and IOPS rules are specified for a device, then IO is
+ subjected to both the constraints.
+
+ blkio.throttle.io_serviced
+ Number of IOs (bio) issued to the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of IOs.
+
+ blkio.throttle.io_service_bytes
+ Number of bytes transferred to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of bytes.
+
+Common files among various policies
+-----------------------------------
+ blkio.reset_stats
+ Writing an int to this file will result in resetting all the stats
+ for that cgroup.
diff --git a/Documentation/admin-guide/cgroup-v1/cgroups.rst b/Documentation/admin-guide/cgroup-v1/cgroups.rst
new file mode 100644
index 000000000000..463f98453323
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cgroups.rst
@@ -0,0 +1,697 @@
+==============
+Control Groups
+==============
+
+Written by Paul Menage <menage@google.com> based on
+Documentation/admin-guide/cgroup-v1/cpusets.rst
+
+Original copyright statements from cpusets.txt:
+
+Portions Copyright (C) 2004 BULL SA.
+
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+
+Modified by Paul Jackson <pj@sgi.com>
+
+Modified by Christoph Lameter <cl@gentwo.org>
+
+.. CONTENTS:
+
+ 1. Control Groups
+ 1.1 What are cgroups ?
+ 1.2 Why are cgroups needed ?
+ 1.3 How are cgroups implemented ?
+ 1.4 What does notify_on_release do ?
+ 1.5 What does clone_children do ?
+ 1.6 How do I use cgroups ?
+ 2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Attaching processes
+ 2.3 Mounting hierarchies by name
+ 3. Kernel API
+ 3.1 Overview
+ 3.2 Synchronization
+ 3.3 Subsystem API
+ 4. Extended attributes usage
+ 5. Questions
+
+1. Control Groups
+=================
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy. Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierarchies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User-level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task PIDs assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/admin-guide/cgroup-v1/cpusets.rst) allow
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+.. _cgroups-why-needed:
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource-tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+up in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines::
+
+ CPU : "Top cpuset"
+ / \
+ CPUSet1 CPUSet2
+ | |
+ (Professors) (Students)
+
+ In addition (system tasks) are attached to topcpuset (so
+ that they can run anywhere) with a limit of 20%
+
+ Memory : Professors (50%), Students (30%), system (20%)
+
+ Disk : Professors (50%), Students (30%), system (20%)
+
+ Network : WWW browsing (20%), Network File System (60%), others (20%)
+ / \
+ Professors (15%) students (5%)
+
+Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
+into the NFS network class.
+
+At the same time Firefox/Lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies),
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can::
+
+ # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+appropriate network and other resource class. This may lead to
+proliferation of such cgroups.
+
+Also let's say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :)) OR give one of the student's simulation
+apps enhanced CPU power.
+
+With ability to write PIDs directly to resource classes, it's just a
+matter of::
+
+ # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
+ (after some time)
+ # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
+
+Without this ability, the administrator would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+ css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+ cgroup_subsys_state objects, one for each cgroup subsystem
+ registered in the system. There is no direct link from a task to
+ the cgroup of which it's a member in each hierarchy, but this
+ can be determined by following pointers through the
+ cgroup_subsys_state objects. This is because accessing the
+ subsystem state is something that's expected to happen frequently
+ and in performance-critical code, whereas operations that require a
+ task's actual cgroup assignments (in particular, moving between
+ cgroups) are less common. A linked list runs through the cg_list
+ field of each task_struct using the css_set, anchored at
+ css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted for browsing and
+ manipulation from user space.
+
+ - You can list all the tasks (by PID) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance-critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+ css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition, a new file system of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel. When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options. By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by PID) attached to that cgroup. This list
+ is not guaranteed to be sorted. Writing a thread ID into this file
+ moves the thread into this cgroup.
+ - cgroup.procs: list of thread group IDs in the cgroup. This list is
+ not guaranteed to be sorted or free of duplicate TGIDs, and userspace
+ should sort/uniquify the list if this property is required.
+ Writing a thread group ID into this file moves all threads in that
+ group into this cgroup.
+ - notify_on_release flag: run the release agent on exit?
+ - release_agent: the path to use for release notifications (this file
+ exists in the top cgroup only)
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir.
+
+New cgroups are created using the mkdir system call or shell
+command. The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks. A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, otherwise a new
+css_set is allocated. The appropriate existing css_set is located by
+looking into a hash table.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cgrp_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup. This enables automatic
+removal of abandoned cgroups. The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0). The default value of other cgroups at creation is the current
+value of their parents' notify_on_release settings. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 What does clone_children do ?
+---------------------------------
+
+This flag only affects the cpuset controller. If the clone_children
+flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
+configuration from the parent during initialization.
+
+1.6 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like::
+
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup/cpuset virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its PID to the
+ /sys/fs/cgroup/cpuset tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup::
+
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/cpuset
+ mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
+ mkdir Charlie
+ cd Charlie
+ /bin/echo 2-3 > cpuset.cpus
+ /bin/echo 1 > cpuset.mems
+ /bin/echo $$ > tasks
+ sh
+ # The subshell 'sh' is now running in cgroup Charlie
+ # The next line should display '/Charlie'
+ cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy with all available subsystems, type::
+
+ # mount -t cgroup xxx /sys/fs/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+Note: Some subsystems do not work without some user input first. For instance,
+if cpusets are enabled the user will have to populate the cpus and mems files
+for each new cgroup created before that group can be used.
+
+As explained in section `1.2 Why are cgroups needed?` you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group::
+
+ # mount -t tmpfs cgroup_root /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/rg1
+
+To mount a cgroup hierarchy with just the cpuset and memory
+subsystems, type::
+
+ # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
+
+While remounting cgroups is currently supported, it is not recommend
+to use it. Remounting allows changing bound subsystems and
+release_agent. Rebinding is hardly useful as it only works when the
+hierarchy is empty and release_agent itself should be replaced with
+conventional fsnotify. The support for remounting will be removed in
+the future.
+
+To Specify a hierarchy's release_agent::
+
+ # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+ xxx /sys/fs/cgroup/rg1
+
+Note that specifying 'release_agent' more than once will return failure.
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
+is the cgroup that holds the whole system.
+
+If you want to change the value of release_agent::
+
+ # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
+
+It can also be changed via remount.
+
+If you want to create a new cgroup under /sys/fs/cgroup/rg1::
+
+ # cd /sys/fs/cgroup/rg1
+ # mkdir my_cgroup
+
+Now you want to do something with this cgroup:
+
+ # cd my_cgroup
+
+In this directory you can find several files::
+
+ # ls
+ cgroup.procs notify_on_release tasks
+ (plus whatever files added by the attached subsystems)
+
+Now attach your shell to this cgroup::
+
+ # /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory::
+
+ # mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir::
+
+ # rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+::
+
+ # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+ # /bin/echo PID1 > tasks
+ # /bin/echo PID2 > tasks
+ ...
+ # /bin/echo PIDn > tasks
+
+You can attach the current shell task by echoing 0::
+
+ # echo 0 > tasks
+
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the PID of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
+Note: Since every task is always a member of exactly one cgroup in each
+mounted hierarchy, to remove a task from its current cgroup you must
+move it into a new cgroup (possibly the root cgroup) by writing to the
+new cgroup's tasks file.
+
+Note: Due to some restrictions enforced by some cgroup subsystems, moving
+a process to another cgroup can fail.
+
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy. This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems. Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem ID which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+ entry in cgroup->subsys[] this subsystem should be managing.
+
+- name: should be initialized to a unique subsystem name. Should be
+ no longer than MAX_CGROUP_TYPE_NAMELEN.
+
+- early_init: indicate if the subsystem needs early initialization
+ at system boot.
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem ID; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+-----------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_cgrp_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are css_alloc/free. Any others that are null are presumed to
+be successful no-ops.
+
+``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called to allocate a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+ERR_PTR() value. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+``int css_online(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+Called after @cgrp successfully completed all allocations and made
+visible to cgroup_for_each_child/descendant_*() iterators. The
+subsystem may choose to fail creation by returning -errno. This
+callback can be used to implement reliable state sharing and
+propagation along the hierarchy. See the comment on
+cgroup_for_each_live_descendant_pre() for details.
+
+``void css_offline(struct cgroup *cgrp);``
+(cgroup_mutex held by caller)
+
+This is the counterpart of css_online() and called iff css_online()
+has succeeded on @cgrp. This signifies the beginning of the end of
+@cgrp. @cgrp is being removed and the subsystem should start dropping
+all references it's holding on @cgrp. When all references are dropped,
+cgroup removal will proceed to the next step - css_free(). After this
+callback, @cgrp should be considered dead to the subsystem.
+
+``void css_free(struct cgroup *cgrp)``
+(cgroup_mutex held by caller)
+
+The cgroup system is about to free @cgrp; the subsystem should free
+its subsystem state object. By the time this method is called, @cgrp
+is completely unused; @cgrp->parent is still valid. (Note - can also
+be called for a newly-created cgroup if an error occurs after this
+subsystem's create() method has been called for the new cgroup).
+
+``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called prior to moving one or more tasks into a cgroup; if the
+subsystem returns an error, this will abort the attach operation.
+@tset contains the tasks to be attached and is guaranteed to have at
+least one task in it.
+
+If there are multiple tasks in the taskset, then:
+ - it's guaranteed that all are from the same thread group
+ - @tset contains all tasks from the thread group whether or not
+ they're switching cgroups
+ - the first task is the leader
+
+Each @tset entry also contains the task's old cgroup and tasks which
+aren't switching cgroup can be skipped easily using the
+cgroup_taskset_for_each() iterator. Note that this isn't called on a
+fork. If this method returns 0 (success) then this should remain valid
+while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future.
+
+``void css_reset(struct cgroup_subsys_state *css)``
+(cgroup_mutex held by caller)
+
+An optional operation which should restore @css's configuration to the
+initial state. This is currently only used on the unified hierarchy
+when a subsystem is disabled on a cgroup through
+"cgroup.subtree_control" but should remain enabled because other
+subsystems depend on it. cgroup core makes such a css invisible by
+removing the associated interface files and invokes this callback so
+that the hidden subsystem can return to the initial neutral state.
+This prevents unexpected resource control from a hidden css and
+ensures that the configuration is in the initial state when it is made
+visible again later.
+
+``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsystem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded. The parameters are identical to can_attach().
+
+``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
+(cgroup_mutex held by caller)
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+The parameters are identical to can_attach().
+
+``void fork(struct task_struct *task)``
+
+Called when a task is forked into a cgroup.
+
+``void exit(struct task_struct *task)``
+
+Called during task exit.
+
+``void free(struct task_struct *task)``
+
+Called when the task_struct is freed.
+
+``void bind(struct cgroup *root)``
+(cgroup_mutex held by caller)
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Extended attribute usage
+===========================
+
+cgroup filesystem supports certain types of extended attributes in its
+directories and files. The current supported types are:
+
+ - Trusted (XATTR_TRUSTED)
+ - Security (XATTR_SECURITY)
+
+Both require CAP_SYS_ADMIN capability to set.
+
+Like in tmpfs, the extended attributes in cgroup filesystem are stored
+using kernel memory and it's advised to keep the usage at minimum. This
+is the reason why user defined extended attributes are not supported, since
+any user can do it and there's no limit in the value size.
+
+The current known users for this feature are SELinux to limit cgroup usage
+in containers and systemd for assorted meta data like main PID in a cgroup
+(systemd creates a cgroup per service).
+
+5. Questions
+============
+
+::
+
+ Q: what's up with this '/bin/echo' ?
+ A: bash's builtin 'echo' command does not check calls to write() against
+ errors. If you use it in the cgroup file system, you won't be
+ able to tell whether a command succeeded or failed.
+
+ Q: When I attach processes, only the first of the line gets really attached !
+ A: We can only return one error code per call to write(). So you should also
+ put only ONE PID.
diff --git a/Documentation/admin-guide/cgroup-v1/cpuacct.rst b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
new file mode 100644
index 000000000000..d30ed81d2ad7
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cpuacct.rst
@@ -0,0 +1,50 @@
+=========================
+CPU Accounting Controller
+=========================
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem::
+
+ # mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /sys/fs/cgroup::
+
+ # cd /sys/fs/cgroup
+ # mkdir g1
+ # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/sys/fs/cgroup/cpuacct.usage also.
+
+cpuacct.stat file lists a few statistics which further divide the
+CPU time obtained by the cgroup into user and system times. Currently
+the following statistics are supported:
+
+user: Time spent by tasks of the cgroup in user mode.
+system: Time spent by tasks of the cgroup in kernel mode.
+
+user and system are in USER_HZ unit.
+
+cpuacct controller uses percpu_counter interface to collect user and
+system times. This has two side effects:
+
+- It is theoretically possible to see wrong values for user and system times.
+ This is because percpu_counter_read() on 32bit systems isn't safe
+ against concurrent writes.
+- It is possible to see slightly outdated values for user and system times
+ due to the batch processing nature of percpu_counter.
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
new file mode 100644
index 000000000000..c7909e5ac136
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -0,0 +1,884 @@
+.. _cpusets:
+
+=======
+CPUSETS
+=======
+
+Copyright (C) 2004 BULL SA.
+
+Written by Simon.Derr@bull.net
+
+- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+- Modified by Paul Jackson <pj@sgi.com>
+- Modified by Christoph Lameter <cl@gentwo.org>
+- Modified by Paul Menage <menage@google.com>
+- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+
+.. CONTENTS:
+
+ 1. Cpusets
+ 1.1 What are cpusets ?
+ 1.2 Why are cpusets needed ?
+ 1.3 How are cpusets implemented ?
+ 1.4 What are exclusive cpusets ?
+ 1.5 What is memory_pressure ?
+ 1.6 What is memory spread ?
+ 1.7 What is sched_load_balance ?
+ 1.8 What is sched_relax_domain_level ?
+ 1.9 How do I use cpusets ?
+ 2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Adding/removing cpus
+ 2.3 Setting flags
+ 2.4 Attaching processes
+ 3. Questions
+ 4. Contact
+
+1. Cpusets
+==========
+
+1.1 What are cpusets ?
+----------------------
+
+Cpusets provide a mechanism for assigning a set of CPUs and Memory
+Nodes to a set of tasks. In this document "Memory Node" refers to
+an on-line node that contains memory.
+
+Cpusets constrain the CPU and Memory placement of tasks to only
+the resources within a task's current cpuset. They form a nested
+hierarchy visible in a virtual file system. These are the essential
+hooks, beyond what is already present, required to manage dynamic
+job placement on large systems.
+
+Cpusets use the generic cgroup subsystem described in
+Documentation/admin-guide/cgroup-v1/cgroups.rst.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that task's cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset. The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting task's mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
+virtual file system, manage the attributes and permissions of these
+cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+specify and query to which cpuset a task is assigned, and list the
+task pids assigned to a cpuset.
+
+
+1.2 Why are cpusets needed ?
+----------------------------
+
+The management of large computer systems, with many processors (CPUs),
+complex memory cache hierarchies and multiple Memory Nodes having
+non-uniform access times (NUMA) presents additional challenges for
+the efficient scheduling and memory placement of processes.
+
+Frequently more modest sized systems can be operated with adequate
+efficiency just by letting the operating system automatically share
+the available CPU and Memory resources amongst the requesting tasks.
+
+But larger systems, which benefit more from careful processor and
+memory placement to reduce memory access times and contention,
+and which typically represent a larger investment for the customer,
+can benefit from explicitly placing jobs on properly sized subsets of
+the system.
+
+This can be especially valuable on:
+
+ * Web Servers running multiple instances of the same web application,
+ * Servers running different applications (for instance, a web server
+ and a database), or
+ * NUMA systems running large HPC applications with demanding
+ performance characteristics.
+
+These subsets, or "soft partitions" must be able to be dynamically
+adjusted, as the job mix changes, without impacting other concurrently
+executing jobs. The location of the running jobs pages may also be moved
+when the memory locations are changed.
+
+The kernel cpuset patch provides the minimum essential kernel
+mechanisms required to efficiently implement such subsets. It
+leverages existing CPU and Memory Placement facilities in the Linux
+kernel to avoid any additional impact on the critical scheduler or
+memory allocator code.
+
+
+1.3 How are cpusets implemented ?
+---------------------------------
+
+Cpusets provide a Linux kernel mechanism to constrain which CPUs and
+Memory Nodes are used by a process or set of processes.
+
+The Linux kernel already has a pair of mechanisms to specify on which
+CPUs a task may be scheduled (sched_setaffinity) and on which Memory
+Nodes it may obtain memory (mbind, set_mempolicy).
+
+Cpusets extends these two mechanisms as follows:
+
+ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+ kernel.
+ - Each task in the system is attached to a cpuset, via a pointer
+ in the task structure to a reference counted cgroup structure.
+ - Calls to sched_setaffinity are filtered to just those CPUs
+ allowed in that task's cpuset.
+ - Calls to mbind and set_mempolicy are filtered to just
+ those Memory Nodes allowed in that task's cpuset.
+ - The root cpuset contains all the systems CPUs and Memory
+ Nodes.
+ - For any cpuset, one can define child cpusets containing a subset
+ of the parents CPU and Memory Node resources.
+ - The hierarchy of cpusets can be mounted at /dev/cpuset, for
+ browsing and manipulation from user space.
+ - A cpuset may be marked exclusive, which ensures that no other
+ cpuset (except direct ancestors and descendants) may contain
+ any overlapping CPUs or Memory Nodes.
+ - You can list all the tasks (by pid) attached to any cpuset.
+
+The implementation of cpusets requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cpuset at system boot.
+ - in fork and exit, to attach and detach a task from its cpuset.
+ - in sched_setaffinity, to mask the requested CPUs by what's
+ allowed in that task's cpuset.
+ - in sched.c migrate_live_tasks(), to keep migrating tasks within
+ the CPUs allowed by their cpuset, if possible.
+ - in the mbind and set_mempolicy system calls, to mask the requested
+ Memory Nodes by what's allowed in that task's cpuset.
+ - in page_alloc.c, to restrict memory to allowed nodes.
+ - in vmscan.c, to restrict page recovery to the current cpuset.
+
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel. No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
+
+The /proc/<pid>/status file for each task has four added lines,
+displaying the task's cpus_allowed (on which CPUs it may be scheduled)
+and mems_allowed (on which Memory Nodes it may obtain memory),
+in the two formats seen in the following example::
+
+ Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
+ Cpus_allowed_list: 0-127
+ Mems_allowed: ffffffff,ffffffff
+ Mems_allowed_list: 0-63
+
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
+
+ - cpuset.cpus: list of CPUs in that cpuset
+ - cpuset.mems: list of Memory Nodes in that cpuset
+ - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
+ - cpuset.cpu_exclusive flag: is cpu placement exclusive?
+ - cpuset.mem_exclusive flag: is memory placement exclusive?
+ - cpuset.mem_hardwall flag: is memory allocation hardwalled
+ - cpuset.memory_pressure: measure of how much paging pressure in cpuset
+ - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: OBSOLETE. Doesn't have any function.
+ - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
+ - cpuset.sched_relax_domain_level: the searching range when migrating tasks
+
+In addition, only the root cpuset has the following file:
+
+ - cpuset.memory_pressure_enabled flag: compute memory_pressure?
+
+New cpusets are created using the mkdir system call or shell
+command. The properties of a cpuset, such as its flags, allowed
+CPUs and Memory Nodes, and attached tasks, are modified by writing
+to the appropriate file in that cpusets directory, as listed above.
+
+The named hierarchical structure of nested cpusets allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cpuset allows organizing the work load
+on a system into related sets of tasks such that each set is constrained
+to using the CPUs and Memory Nodes of a particular cpuset. A task
+may be re-attached to any other cpuset, if allowed by the permissions
+on the necessary cpuset file system directories.
+
+Such management of a system "in the large" integrates smoothly with
+the detailed placement done on individual tasks and memory regions
+using the sched_setaffinity, mbind and set_mempolicy system calls.
+
+The following rules apply to each cpuset:
+
+ - Its CPUs and Memory Nodes must be a subset of its parents.
+ - It can't be marked exclusive unless its parent is.
+ - If its cpu or memory is exclusive, they may not overlap any sibling.
+
+These rules, and the natural hierarchy of cpusets, enable efficient
+enforcement of the exclusive guarantee, without having to scan all
+cpusets every time any of them change to ensure nothing overlaps a
+exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
+to represent the cpuset hierarchy provides for a familiar permission
+and name space for cpusets, with a minimum of additional kernel code.
+
+The cpus and mems files in the root (top_cpuset) cpuset are
+read-only. The cpus file automatically tracks the value of
+cpu_online_mask using a CPU hotplug notifier, and the mems file
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
+nodes with memory--using the cpuset_track_online_nodes() hook.
+
+The cpuset.effective_cpus and cpuset.effective_mems files are
+normally read-only copies of cpuset.cpus and cpuset.mems files
+respectively. If the cpuset cgroup filesystem is mounted with the
+special "cpuset_v2_mode" option, the behavior of these files will become
+similar to the corresponding files in cpuset v2. In other words, hotplug
+events will not change cpuset.cpus and cpuset.mems. Those events will
+only affect cpuset.effective_cpus and cpuset.effective_mems which show
+the actual cpus and memory nodes that are currently used by this cpuset.
+See Documentation/admin-guide/cgroup-v2.rst for more information about
+cpuset v2 behavior.
+
+
+1.4 What are exclusive cpusets ?
+--------------------------------
+
+If a cpuset is cpu or mem exclusive, no other cpuset, other than
+a direct ancestor or descendant, may share any of the same CPUs or
+Memory Nodes.
+
+A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
+i.e. it restricts kernel allocations for page, buffer and other data
+commonly shared by the kernel across multiple users. All cpusets,
+whether hardwalled or not, restrict allocations of memory for user
+space. This enables configuring a system so that several independent
+jobs can share common kernel data, such as file system pages, while
+isolating each job's user allocation in its own cpuset. To do this,
+construct a large mem_exclusive cpuset to hold all the jobs, and
+construct child, non-mem_exclusive cpusets for each individual job.
+Only a small amount of typical kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+mem_exclusive cpuset.
+
+
+1.5 What is memory_pressure ?
+-----------------------------
+The memory_pressure of a cpuset provides a simple per-cpuset metric
+of the rate that the tasks in a cpuset are attempting to free up in
+use memory on the nodes of the cpuset to satisfy additional memory
+requests.
+
+This enables batch managers monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or re-prioritize jobs that
+are trying to use more memory than allowed on the nodes assigned to them,
+and with tightly coupled, long running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure. It's up to the
+batch manager or other user code to decide what to do about it and
+take action.
+
+==>
+ Unless this feature is enabled by writing "1" to the special file
+ /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
+ code of __alloc_pages() for this metric reduces to simply noticing
+ that the cpuset_memory_pressure_enabled flag is zero. So only
+ systems that enable this feature will compute the metric.
+
+Why a per-cpuset, running average:
+
+ Because this meter is per-cpuset, rather than per-task or mm,
+ the system load imposed by a batch scheduler monitoring this
+ metric is sharply reduced on large systems, because a scan of
+ the tasklist can be avoided on each set of queries.
+
+ Because this meter is a running average, instead of an accumulating
+ counter, a batch scheduler can detect memory pressure with a
+ single read, instead of having to read and accumulate results
+ for a period of time.
+
+ Because this meter is per-cpuset rather than per-task or mm,
+ the batch scheduler can obtain the key information, memory
+ pressure in a cpuset, with a single read, rather than having to
+ query and accumulate results over all the (dynamically changing)
+ set of tasks in the cpuset.
+
+A per-cpuset simple digital filter (requires a spinlock and 3 words
+of data per-cpuset) is kept, and updated by any task attached to that
+cpuset, if it enters the synchronous (direct) page reclaim code.
+
+A per-cpuset file provides an integer number representing the recent
+(half-life of 10 seconds) rate of direct page reclaims caused by
+the tasks in the cpuset, in units of reclaims attempted per second,
+times 1000.
+
+
+1.6 What is memory spread ?
+---------------------------
+There are two boolean flag files per cpuset that control where the
+kernel allocates pages for the file system buffers and related in
+kernel data structures. They are called 'cpuset.memory_spread_page' and
+'cpuset.memory_spread_slab'.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
+the kernel will spread the file system buffers (page cache) evenly
+over all the nodes that the faulting task is allowed to use, instead
+of preferring to put those pages on the node where the task is running.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
+then the kernel will spread some file system related slab caches,
+such as for inodes and dentries evenly over all the nodes that the
+faulting task is allowed to use, instead of preferring to put those
+pages on the node where the task is running.
+
+The setting of these flags does not affect anonymous data segment or
+stack segment pages of a task.
+
+By default, both kinds of memory spreading are off, and memory
+pages are allocated on the node local to where the task is running,
+except perhaps as modified by the task's NUMA mempolicy or cpuset
+configuration, so long as sufficient free memory pages are available.
+
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+
+Setting memory spreading causes allocations for the affected page
+or slab caches to ignore the task's NUMA mempolicy and be spread
+instead. Tasks using mbind() or set_mempolicy() calls to set NUMA
+mempolicies will not notice any change in these calls as a result of
+their containing task's memory spread settings. If memory spreading
+is turned off, then the currently specified NUMA mempolicy once again
+applies to memory page allocations.
+
+Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
+files. By default they contain "0", meaning that the feature is off
+for that cpuset. If a "1" is written to that file, then that turns
+the named feature on.
+
+The implementation is simple.
+
+Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
+PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
+joins that cpuset. The page allocation calls for the page cache
+is modified to perform an inline check for this PFA_SPREAD_PAGE task
+flag, and if set, a call to a new routine cpuset_mem_spread_node()
+returns the node to prefer for the allocation.
+
+Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
+PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
+pages from the node returned by cpuset_mem_spread_node().
+
+The cpuset_mem_spread_node() routine is also simple. It uses the
+value of a per-task rotor cpuset_mem_spread_rotor to select the next
+node in the current task's mems_allowed to prefer for the allocation.
+
+This memory placement policy is also known (in other contexts) as
+round-robin or interleave.
+
+This policy can provide substantial improvements for jobs that need
+to place thread local data on the corresponding node, but that need
+to access large file system data sets that need to be spread across
+the several nodes in the jobs cpuset in order to fit. Without this
+policy, especially for jobs that might have one thread reading in the
+data set, the memory allocation across the nodes in the jobs cpuset
+can become very uneven.
+
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched/core.c) automatically load balances
+tasks. If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced. So the scheduler
+has support to partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, including those
+marked isolated using the kernel boot time "isolcpus=" argument. However,
+the isolated CPUs will not participate in load balancing, and will not
+have tasks running on them unless explicitly assigned.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+
+ 1) On large systems, load balancing across many CPUs is expensive.
+ If the system is managed using cpusets to place independent jobs
+ on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+ system overhead on those CPUs, including avoiding task load
+ balancing if that is not needed.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendant cpusets. Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest. Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding. So if each of two partially
+overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
+form a single sched domain that is a superset of both. We won't move
+a task to a CPU outside its cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "cpuset.sched_load_balance" enabled,
+and the sched domain configuration. If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above. In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+CPUs in "cpuset.isolcpus" were excluded from load balancing by the
+isolcpus= kernel boot option, and will never be load balanced regardless
+of the value of "cpuset.sched_load_balance" in any cpuset.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.) When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can. It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
+all the CPUs that must be load balanced.
+
+The cpuset code builds a new such partition and passes it to the
+scheduler sched domain setup code, to have the sched domains rebuilt
+as necessary, whenever:
+
+ - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
+ - or CPUs come or go from a cpuset with this flag enabled,
+ - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
+ and with this flag enabled changes,
+ - or a cpuset with non-empty CPUs and with this flag enabled is removed,
+ - or a cpu is offlined/onlined.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (struct cpumask) in the
+partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
+
+
+1.8 What is sched_relax_domain_level ?
+--------------------------------------
+
+In sched domain, the scheduler migrates tasks in 2 ways; periodic load
+balance on tick, and at time of some schedule events.
+
+When a task is woken up, scheduler try to move the task on idle CPU.
+For example, if a task A running on CPU X activates another task B
+on the same CPU X, and if CPU Y is X's sibling and performing idle,
+then scheduler migrate task B to CPU Y so that task B can start on
+CPU Y without waiting task A on CPU X.
+
+And if a CPU run out of tasks in its runqueue, the CPU try to pull
+extra tasks from other busy CPUs to help them before it is going to
+be idle.
+
+Of course it takes some searching cost to find movable tasks and/or
+idle CPUs, the scheduler might not search all CPUs in the domain
+every time. In fact, in some architectures, the searching ranges on
+events are limited in the same socket or node where the CPU locates,
+while the load balance on tick searches all.
+
+For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
+is idle while CPU X and the siblings are busy, scheduler can't migrate
+woken task B from X to Z since it is out of its searching range.
+As the result, task B on CPU X need to wait task A or wait load balance
+on the next tick. For some applications in special situation, waiting
+1 tick may be too long.
+
+The 'cpuset.sched_relax_domain_level' file allows you to request changing
+this searching range as you like. This file takes int value which
+indicates size of searching range in levels approximately as follows,
+otherwise initial value -1 that indicates the cpuset has no request.
+
+====== ===========================================================
+ -1 no request. use system default or follow request of others.
+ 0 no search.
+ 1 search siblings (hyperthreads in a core).
+ 2 search cores in a package.
+ 3 search cpus in a node [= system wide on non-NUMA system]
+ 4 search nodes in a chunk of node [on NUMA system]
+ 5 search system wide [on NUMA system]
+====== ===========================================================
+
+Not all levels can be present and values can change depending on the
+system architecture and kernel configuration. Check
+/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
+details.
+
+The system default is architecture dependent. The system default
+can be changed using the relax_domain_level= boot parameter.
+
+This file is per-cpuset and affect the sched domain where the cpuset
+belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
+is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
+there is no sched domain belonging the cpuset.
+
+If multiple cpusets are overlapping and hence they form a single sched
+domain, the largest value among those is used. Be careful, if one
+requests 0 and others are -1 then 0 is used.
+
+Note that modifying this file will have both good and bad effects,
+and whether it is acceptable or not depends on your situation.
+Don't modify this file if you are not sure.
+
+If your situation is:
+
+ - The migration costs between each cpu can be assumed considerably
+ small(for you) due to your special application's behavior or
+ special hardware support for CPU cache etc.
+ - The searching cost doesn't have impact(for you) or you can make
+ the searching cost enough small by managing cpuset to compact etc.
+ - The latency is required even it sacrifices cache hit rate etc.
+ then increasing 'sched_relax_domain_level' would benefit you.
+
+
+1.9 How do I use cpusets ?
+--------------------------
+
+In order to minimize the impact of cpusets on critical kernel
+code, such as the scheduler, and due to the fact that the kernel
+does not support one task updating the memory placement of another
+task directly, the impact on a task of changing its cpuset CPU
+or Memory Node placement, or of changing to which cpuset a task
+is attached, is subtle.
+
+If a cpuset has its Memory Nodes modified, then for each task attached
+to that cpuset, the next time that the kernel attempts to allocate
+a page of memory for that task, the kernel will notice the change
+in the task's cpuset, and update its per-task memory placement to
+remain within the new cpusets memory placement. If the task was using
+mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
+its new cpuset, then the task will continue to use whatever subset
+of MPOL_BIND nodes are still allowed in the new cpuset. If the task
+was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
+in the new cpuset, then the task will be essentially treated as if it
+was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
+as queried by get_mempolicy(), doesn't change). If a task is moved
+from one cpuset to another, then the kernel will adjust the task's
+memory placement, as above, the next time that the kernel attempts
+to allocate a page of memory for that task.
+
+If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
+will have its allowed CPU placement changed immediately. Similarly,
+if a task's pid is written to another cpuset's 'tasks' file, then its
+allowed CPU placement is changed immediately. If such a task had been
+bound to some subset of its cpuset using the sched_setaffinity() call,
+the task will be allowed to run on any CPU allowed in its new cpuset,
+negating the effect of the prior sched_setaffinity() call.
+
+In summary, the memory placement of a task whose cpuset is changed is
+updated by the kernel, on the next allocation of a page for that task,
+and the processor placement is updated immediately.
+
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'cpuset.mems' subsequently changes.
+If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the task's new cpuset. The relative placement of the page within
+the cpuset is preserved during these migration operations if possible.
+For example if the page was on the second valid node of the prior cpuset
+then the page will be placed on the second valid node of the new cpuset.
+
+Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
+'cpuset.mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'cpuset.mems',
+will be moved to nodes in the new setting of 'mems.'
+Pages that were not in the task's prior cpuset, or in the cpuset's
+prior 'cpuset.mems' setting, will not be moved.
+
+There is an exception to the above. If hotplug functionality is used
+to remove all the CPUs that are currently assigned to a cpuset,
+then all the tasks in that cpuset will be moved to the nearest ancestor
+with non-empty cpus. But the moving of some (or all) tasks might fail if
+cpuset is bound with another cgroup subsystem which has some restrictions
+on task attaching. In this failing case, those tasks will stay
+in the original cpuset, and the kernel will automatically update
+their cpus_allowed to allow all online CPUs. When memory hotplug
+functionality for removing Memory Nodes is available, a similar exception
+is expected to apply there as well. In general, the kernel prefers to
+violate cpuset placement, over starving a task that has had all
+its allowed CPUs or Memory Nodes taken offline.
+
+There is a second exception to the above. GFP_ATOMIC requests are
+kernel internal allocations that must be satisfied, immediately.
+The kernel may drop some request, in rare cases even panic, if a
+GFP_ATOMIC alloc fails. If the request cannot be satisfied within
+the current task's cpuset, then we relax the cpuset, and look for
+memory anywhere we can find it. It's better to violate the cpuset
+than stress the kernel.
+
+To start a new job that is to be contained within a cpuset, the steps are:
+
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup/cpuset virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cpuset by writing its pid to the
+ /sys/fs/cgroup/cpuset tasks file for that cpuset.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cpuset
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cpuset::
+
+ mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
+ mkdir Charlie
+ cd Charlie
+ /bin/echo 2-3 > cpuset.cpus
+ /bin/echo 1 > cpuset.mems
+ /bin/echo $$ > tasks
+ sh
+ # The subshell 'sh' is now running in cpuset Charlie
+ # The next line should display '/Charlie'
+ cat /proc/self/cpuset
+
+There are ways to query or modify cpusets:
+
+ - via the cpuset file system directly, using the various cd, mkdir, echo,
+ cat, rmdir commands from the shell, or their equivalent from C.
+ - via the C library libcpuset.
+ - via the C library libcgroup.
+ (https://github.com/libcgroup/libcgroup/)
+ - via the python application cset.
+ (http://code.google.com/p/cpuset/)
+
+The sched_setaffinity calls can also be done at the shell prompt using
+SGI's runon or Robert Love's taskset. The mbind and set_mempolicy
+calls can be done at the shell prompt using the numactl command
+(part of Andi Kleen's numa package).
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cpusets can be done through the cpuset
+virtual filesystem.
+
+To mount it, type:
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
+
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
+is the cpuset that holds the whole system.
+
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset::
+
+ # cd /sys/fs/cgroup/cpuset
+ # mkdir my_cpuset
+
+Now you want to do something with this cpuset::
+
+ # cd my_cpuset
+
+In this directory you can find several files::
+
+ # ls
+ cgroup.clone_children cpuset.memory_pressure
+ cgroup.event_control cpuset.memory_spread_page
+ cgroup.procs cpuset.memory_spread_slab
+ cpuset.cpu_exclusive cpuset.mems
+ cpuset.cpus cpuset.sched_load_balance
+ cpuset.mem_exclusive cpuset.sched_relax_domain_level
+ cpuset.mem_hardwall notify_on_release
+ cpuset.memory_migrate tasks
+
+Reading them will give you information about the state of this cpuset:
+the CPUs and Memory Nodes it can use, the processes that are using
+it, its properties. By writing to these files you can manipulate
+the cpuset.
+
+Set some flags::
+
+ # /bin/echo 1 > cpuset.cpu_exclusive
+
+Add some cpus::
+
+ # /bin/echo 0-7 > cpuset.cpus
+
+Add some mems::
+
+ # /bin/echo 0-7 > cpuset.mems
+
+Now attach your shell to this cpuset::
+
+ # /bin/echo $$ > tasks
+
+You can also create cpusets inside your cpuset by using mkdir in this
+directory::
+
+ # mkdir my_sub_cs
+
+To remove a cpuset, just use rmdir::
+
+ # rmdir my_sub_cs
+
+This will fail if the cpuset is in use (has cpusets inside, or has
+processes attached).
+
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command::
+
+ mount -t cpuset X /sys/fs/cgroup/cpuset
+
+is equivalent to::
+
+ mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+ echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
+
+2.2 Adding/removing cpus
+------------------------
+
+This is the syntax to use when writing in the cpus or mems files
+in cpuset directories::
+
+ # /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+ # /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+
+To add a CPU to a cpuset, write the new list of CPUs including the
+CPU to be added. To add 6 to the above cpuset::
+
+ # /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6
+
+Similarly to remove a CPU from a cpuset, write the new list of CPUs
+without the CPU to be removed.
+
+To remove all the CPUs::
+
+ # /bin/echo "" > cpuset.cpus -> clear cpus list
+
+2.3 Setting flags
+-----------------
+
+The syntax is very simple::
+
+ # /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive'
+ # /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive'
+
+2.4 Attaching processes
+-----------------------
+
+::
+
+ # /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another::
+
+ # /bin/echo PID1 > tasks
+ # /bin/echo PID2 > tasks
+ ...
+ # /bin/echo PIDn > tasks
+
+
+3. Questions
+============
+
+Q:
+ what's up with this '/bin/echo' ?
+
+A:
+ bash's builtin 'echo' command does not check calls to write() against
+ errors. If you use it in the cpuset file system, you won't be
+ able to tell whether a command succeeded or failed.
+
+Q:
+ When I attach processes, only the first of the line gets really attached !
+
+A:
+ We can only return one error code per call to write(). So you should also
+ put only ONE pid.
+
+4. Contact
+==========
+
+Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/admin-guide/cgroup-v1/devices.rst b/Documentation/admin-guide/cgroup-v1/devices.rst
new file mode 100644
index 000000000000..e1886783961e
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/devices.rst
@@ -0,0 +1,132 @@
+===========================
+Device Whitelist Controller
+===========================
+
+1. Description
+==============
+
+Implement a cgroup to track and enforce open and mknod restrictions
+on device files. A device cgroup associates a device access
+whitelist with each cgroup. A whitelist entry has 4 fields.
+'type' is a (all), c (char), or b (block). 'all' means it applies
+to all types and all major and minor numbers. Major and minor are
+either an integer or * for all. Access is a composition of r
+(read), w (write), and m (mknod).
+
+The root device cgroup starts with rwm to 'all'. A child device
+cgroup gets a copy of the parent. Administrators can then remove
+devices from the whitelist or add new entries. A child cgroup can
+never receive a device access which is denied by its parent.
+
+2. User Interface
+=================
+
+An entry is added using devices.allow, and removed using
+devices.deny. For instance::
+
+ echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
+
+allows cgroup 1 to read and mknod the device usually known as
+/dev/null. Doing::
+
+ echo a > /sys/fs/cgroup/1/devices.deny
+
+will remove the default 'a *:* rwm' entry. Doing::
+
+ echo a > /sys/fs/cgroup/1/devices.allow
+
+will add the 'a *:* rwm' entry to the whitelist.
+
+3. Security
+===========
+
+Any task can move itself between cgroups. This clearly won't
+suffice, but we can decide the best way to adequately restrict
+movement as people get some experience with this. We may just want
+to require CAP_SYS_ADMIN, which at least is a separate bit from
+CAP_MKNOD. We may want to just refuse moving to a cgroup which
+isn't a descendant of the current one. Or we may want to use
+CAP_MAC_ADMIN, since we really are trying to lock down root.
+
+CAP_SYS_ADMIN is needed to modify the whitelist or move another
+task to a new cgroup. (Again we'll probably want to change that).
+
+A cgroup may not be granted more permissions than the cgroup's
+parent has.
+
+4. Hierarchy
+============
+
+device cgroups maintain hierarchy by making sure a cgroup never has more
+access permissions than its parent. Every time an entry is written to
+a cgroup's devices.deny file, all its children will have that entry removed
+from their whitelist and all the locally set whitelist entries will be
+re-evaluated. In case one of the locally set whitelist entries would provide
+more access than the cgroup's parent, it'll be removed from the whitelist.
+
+Example::
+
+ A
+ / \
+ B
+
+ group behavior exceptions
+ A allow "b 8:* rwm", "c 116:1 rw"
+ B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
+
+If a device is denied in group A::
+
+ # echo "c 116:* r" > A/devices.deny
+
+it'll propagate down and after revalidating B's entries, the whitelist entry
+"c 116:2 rwm" will be removed::
+
+ group whitelist entries denied devices
+ A all "b 8:* rwm", "c 116:* rw"
+ B "c 1:3 rwm", "b 3:* rwm" all the rest
+
+In case parent's exceptions change and local exceptions are not allowed
+anymore, they'll be deleted.
+
+Notice that new whitelist entries will not be propagated::
+
+ A
+ / \
+ B
+
+ group whitelist entries denied devices
+ A "c 1:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+when adding ``c *:3 rwm``::
+
+ # echo "c *:3 rwm" >A/devices.allow
+
+the result::
+
+ group whitelist entries denied devices
+ A "c *:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+but now it'll be possible to add new entries to B::
+
+ # echo "c 2:3 rwm" >B/devices.allow
+ # echo "c 50:3 r" >B/devices.allow
+
+or even::
+
+ # echo "c *:3 rwm" >B/devices.allow
+
+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
+not be possible once the device cgroups has children.
+
+4.1 Hierarchy (internal implementation)
+---------------------------------------
+
+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
+list of exceptions. The internal state is controlled using the same user
+interface to preserve compatibility with the previous whitelist-only
+implementation. Removal or addition of exceptions that will reduce the access
+to devices will be propagated down the hierarchy.
+For every propagated exception, the effective rules will be re-evaluated based
+on current parent's access rules.
diff --git a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
new file mode 100644
index 000000000000..a964aff373b1
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
@@ -0,0 +1,131 @@
+==============
+Cgroup Freezer
+==============
+
+The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells::
+
+ $ echo $$
+ 16644
+ $ bash
+ $ echo $$
+ 16690
+
+ From a second, unrelated bash shell:
+ $ kill -SIGSTOP 16690
+ $ kill -SIGCONT 16690
+
+ <at this point 16690 exits and causes 16644 to exit too>
+
+This happens because bash can observe both signals and choose how it
+responds to them.
+
+Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+The cgroup freezer is hierarchical. Freezing a cgroup freezes all
+tasks belonging to the cgroup and all its descendant cgroups. Each
+cgroup has its own state (self-state) and the state inherited from the
+parent (parent-state). Iff both states are THAWED, the cgroup is
+THAWED.
+
+The following cgroupfs files are created by cgroup freezer.
+
+* freezer.state: Read-write.
+
+ When read, returns the effective state of the cgroup - "THAWED",
+ "FREEZING" or "FROZEN". This is the combined self and parent-states.
+ If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
+
+ FREEZING cgroup transitions into FROZEN state when all tasks
+ belonging to the cgroup and its descendants become frozen. Note that
+ a cgroup reverts to FREEZING from FROZEN after a new task is added
+ to the cgroup or one of its descendant cgroups until the new task is
+ frozen.
+
+ When written, sets the self-state of the cgroup. Two values are
+ allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
+ if not already freezing, enters FREEZING state along with all its
+ descendant cgroups.
+
+ If THAWED is written, the self-state of the cgroup is changed to
+ THAWED. Note that the effective state may not change to THAWED if
+ the parent-state is still freezing. If a cgroup's effective state
+ becomes THAWED, all its descendants which are freezing because of
+ the cgroup also leave the freezing state.
+
+* freezer.self_freezing: Read only.
+
+ Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
+ This value is 1 iff the last write to freezer.state was "FROZEN".
+
+* freezer.parent_freezing: Read only.
+
+ Shows the parent-state. 0 if none of the cgroup's ancestors is
+ frozen; otherwise, 1.
+
+The root cgroup is non-freezable and the above interface files don't
+exist.
+
+* Examples of usage::
+
+ # mkdir /sys/fs/cgroup/freezer
+ # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+ # mkdir /sys/fs/cgroup/freezer/0
+ # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
+
+to get status of the freezer subsystem::
+
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ THAWED
+
+to freeze all tasks in the container::
+
+ # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ FREEZING
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ FROZEN
+
+to unfreeze all tasks in the container::
+
+ # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
+
+This freezer implementation is affected by shortcomings (see commit
+76f969e8948d8 ("cgroup: cgroup v2 freezer")) and cgroup v2 freezer is
+recommended.
diff --git a/Documentation/admin-guide/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
new file mode 100644
index 000000000000..493a8e386700
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@@ -0,0 +1,139 @@
+==================
+HugeTLB Controller
+==================
+
+HugeTLB controller can be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -o hugetlb none /sys/fs/cgroup
+
+With the above step, the initial or the parent HugeTLB group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+
+New groups can be created under the parent group /sys/fs/cgroup::
+
+ # cd /sys/fs/cgroup
+ # mkdir g1
+ # echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it.
+
+Brief summary of control files::
+
+ hugetlb.<hugepagesize>.rsvd.limit_in_bytes # set/show limit of "hugepagesize" hugetlb reservations
+ hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes # show max "hugepagesize" hugetlb reservations and no-reserve faults
+ hugetlb.<hugepagesize>.rsvd.usage_in_bytes # show current reservations and no-reserve faults for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.rsvd.failcnt # show the number of allocation failure due to HugeTLB reservation limit
+ hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb faults
+ hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
+ hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB usage limit
+ hugetlb.<hugepagesize>.numa_stat # show the numa information of the hugetlb memory charged to this cgroup
+
+For a system supporting three hugepage sizes (64k, 32M and 1G), the control
+files include::
+
+ hugetlb.1GB.limit_in_bytes
+ hugetlb.1GB.max_usage_in_bytes
+ hugetlb.1GB.numa_stat
+ hugetlb.1GB.usage_in_bytes
+ hugetlb.1GB.failcnt
+ hugetlb.1GB.rsvd.limit_in_bytes
+ hugetlb.1GB.rsvd.max_usage_in_bytes
+ hugetlb.1GB.rsvd.usage_in_bytes
+ hugetlb.1GB.rsvd.failcnt
+ hugetlb.64KB.limit_in_bytes
+ hugetlb.64KB.max_usage_in_bytes
+ hugetlb.64KB.numa_stat
+ hugetlb.64KB.usage_in_bytes
+ hugetlb.64KB.failcnt
+ hugetlb.64KB.rsvd.limit_in_bytes
+ hugetlb.64KB.rsvd.max_usage_in_bytes
+ hugetlb.64KB.rsvd.usage_in_bytes
+ hugetlb.64KB.rsvd.failcnt
+ hugetlb.32MB.limit_in_bytes
+ hugetlb.32MB.max_usage_in_bytes
+ hugetlb.32MB.numa_stat
+ hugetlb.32MB.usage_in_bytes
+ hugetlb.32MB.failcnt
+ hugetlb.32MB.rsvd.limit_in_bytes
+ hugetlb.32MB.rsvd.max_usage_in_bytes
+ hugetlb.32MB.rsvd.usage_in_bytes
+ hugetlb.32MB.rsvd.failcnt
+
+
+1. Page fault accounting
+
+::
+
+ hugetlb.<hugepagesize>.limit_in_bytes
+ hugetlb.<hugepagesize>.max_usage_in_bytes
+ hugetlb.<hugepagesize>.usage_in_bytes
+ hugetlb.<hugepagesize>.failcnt
+
+The HugeTLB controller allows users to limit the HugeTLB usage (page fault) per
+control group and enforces the limit during page fault. Since HugeTLB
+doesn't support page reclaim, enforcing the limit at page fault time implies
+that, the application will get SIGBUS signal if it tries to fault in HugeTLB
+pages beyond its limit. Therefore the application needs to know exactly how many
+HugeTLB pages it uses before hand, and the sysadmin needs to make sure that
+there are enough available on the machine for all the users to avoid processes
+getting SIGBUS.
+
+
+2. Reservation accounting
+
+::
+
+ hugetlb.<hugepagesize>.rsvd.limit_in_bytes
+ hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.failcnt
+
+The HugeTLB controller allows to limit the HugeTLB reservations per control
+group and enforces the controller limit at reservation time and at the fault of
+HugeTLB memory for which no reservation exists. Since reservation limits are
+enforced at reservation time (on mmap or shget), reservation limits never causes
+the application to get SIGBUS signal if the memory was reserved before hand. For
+MAP_NORESERVE allocations, the reservation limit behaves the same as the fault
+limit, enforcing memory usage at fault time and causing the application to
+receive a SIGBUS if it's crossing its limit.
+
+Reservation limits are superior to page fault limits described above, since
+reservation limits are enforced at reservation time (on mmap or shget), and
+never causes the application to get SIGBUS signal if the memory was reserved
+before hand. This allows for easier fallback to alternatives such as
+non-HugeTLB memory for example. In the case of page fault accounting, it's very
+hard to avoid processes getting SIGBUS since the sysadmin needs precisely know
+the HugeTLB usage of all the tasks in the system and make sure there is enough
+pages to satisfy all requests. Avoiding tasks getting SIGBUS on overcommited
+systems is practically impossible with page fault accounting.
+
+
+3. Caveats with shared memory
+
+For shared HugeTLB memory, both HugeTLB reservation and page faults are charged
+to the first task that causes the memory to be reserved or faulted, and all
+subsequent uses of this reserved or faulted memory is done without charging.
+
+Shared HugeTLB memory is only uncharged when it is unreserved or deallocated.
+This is usually when the HugeTLB file is deleted, and not when the task that
+caused the reservation or fault has exited.
+
+
+4. Caveats with HugeTLB cgroup offline.
+
+When a HugeTLB cgroup goes offline with some reservations or faults still
+charged to it, the behavior is as follows:
+
+- The fault charges are charged to the parent HugeTLB cgroup (reparented),
+- the reservation charges remain on the offline HugeTLB cgroup.
+
+This means that if a HugeTLB cgroup gets offlined while there is still HugeTLB
+reservations charged to it, that cgroup persists as a zombie until all HugeTLB
+reservations are uncharged. HugeTLB reservations behave in this manner to match
+the memory controller whose cgroups also persist as zombie until all charged
+memory is uncharged. Also, the tracking of HugeTLB reservations is a bit more
+complex compared to the tracking of HugeTLB faults, so it is significantly
+harder to reparent reservations at offline time.
diff --git a/Documentation/admin-guide/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst
new file mode 100644
index 000000000000..99fbc8a64ba9
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@@ -0,0 +1,31 @@
+.. _cgroup-v1:
+
+========================
+Control Groups version 1
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ cgroups
+
+ blkio-controller
+ cpuacct
+ cpusets
+ devices
+ freezer-subsystem
+ hugetlb
+ memcg_test
+ memory
+ misc
+ net_cls
+ net_prio
+ pids
+ rdma
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
new file mode 100644
index 000000000000..9f8e27355cba
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -0,0 +1,344 @@
+=====================================================
+Memory Resource Controller(Memcg) Implementation Memo
+=====================================================
+
+Last Updated: 2010/2
+
+Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
+
+Because VM is getting complex (one of reasons is memcg...), memcg's behavior
+is complex. This is a document for memcg's internal behavior.
+Please note that implementation details can be changed.
+
+(*) Topics on API should be in Documentation/admin-guide/cgroup-v1/memory.rst)
+
+0. How to record usage ?
+========================
+
+ 2 objects are used.
+
+ page_cgroup ....an object per page.
+
+ Allocated at boot or memory hotplug. Freed at memory hot removal.
+
+ swap_cgroup ... an entry per swp_entry.
+
+ Allocated at swapon(). Freed at swapoff().
+
+ The page_cgroup has USED bit and double count against a page_cgroup never
+ occurs. swap_cgroup is used only when a charged page is swapped-out.
+
+1. Charge
+=========
+
+ a page/swp_entry may be charged (usage += PAGE_SIZE) at
+
+ mem_cgroup_try_charge()
+
+2. Uncharge
+===========
+
+ a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
+
+ mem_cgroup_uncharge()
+ Called when a page's refcount goes down to 0.
+
+ mem_cgroup_uncharge_swap()
+ Called when swp_entry's refcnt goes down to 0. A charge against swap
+ disappears.
+
+3. charge-commit-cancel
+=======================
+
+ Memcg pages are charged in two steps:
+
+ - mem_cgroup_try_charge()
+ - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
+
+ At try_charge(), there are no flags to say "this page is charged".
+ at this point, usage += PAGE_SIZE.
+
+ At commit(), the page is associated with the memcg.
+
+ At cancel(), simply usage -= PAGE_SIZE.
+
+Under below explanation, we assume CONFIG_SWAP=y.
+
+4. Anonymous
+============
+
+ Anonymous page is newly allocated at
+ - page fault into MAP_ANONYMOUS mapping.
+ - Copy-On-Write.
+
+ 4.1 Swap-in.
+ At swap-in, the page is taken from swap-cache. There are 2 cases.
+
+ (a) If the SwapCache is newly allocated and read, it has no charges.
+ (b) If the SwapCache has been mapped by processes, it has been
+ charged already.
+
+ 4.2 Swap-out.
+ At swap-out, typical state transition is below.
+
+ (a) add to swap cache. (marked as SwapCache)
+ swp_entry's refcnt += 1.
+ (b) fully unmapped.
+ swp_entry's refcnt += # of ptes.
+ (c) write back to swap.
+ (d) delete from swap cache. (remove from SwapCache)
+ swp_entry's refcnt -= 1.
+
+
+ Finally, at task exit,
+ (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
+
+5. Page Cache
+=============
+
+ Page Cache is charged at
+ - filemap_add_folio().
+
+ The logic is very clear. (About migration, see below)
+
+ Note:
+ __filemap_remove_folio() is called by filemap_remove_folio()
+ and __remove_mapping().
+
+6. Shmem(tmpfs) Page Cache
+===========================
+
+ The best way to understand shmem's page state transition is to read
+ mm/shmem.c.
+
+ But brief explanation of the behavior of memcg around shmem will be
+ helpful to understand the logic.
+
+ Shmem's page (just leaf page, not direct/indirect block) can be on
+
+ - radix-tree of shmem's inode.
+ - SwapCache.
+ - Both on radix-tree and SwapCache. This happens at swap-in
+ and swap-out,
+
+ It's charged when...
+
+ - A new page is added to shmem's radix-tree.
+ - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
+
+7. Page Migration
+=================
+
+ mem_cgroup_migrate()
+
+8. LRU
+======
+ Each memcg has its own vector of LRUs (inactive anon, active anon,
+ inactive file, active file, unevictable) of pages from each node,
+ each LRU handled under a single lru_lock for that memcg and node.
+
+9. Typical Tests.
+=================
+
+ Tests for racy cases.
+
+9.1 Small limit to memcg.
+-------------------------
+
+ When you do test to do racy case, it's good test to set memcg's limit
+ to be very small rather than GB. Many races found in the test under
+ xKB or xxMB limits.
+
+ (Memory behavior under GB and Memory behavior under MB shows very
+ different situation.)
+
+9.2 Shmem
+---------
+
+ Historically, memcg's shmem handling was poor and we saw some amount
+ of troubles here. This is because shmem is page-cache but can be
+ SwapCache. Test with shmem/tmpfs is always good test.
+
+9.3 Migration
+-------------
+
+ For NUMA, migration is an another special case. To do easy test, cpuset
+ is useful. Following is a sample script to do migration::
+
+ mount -t cgroup -o cpuset none /opt/cpuset
+
+ mkdir /opt/cpuset/01
+ echo 1 > /opt/cpuset/01/cpuset.cpus
+ echo 0 > /opt/cpuset/01/cpuset.mems
+ echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+ mkdir /opt/cpuset/02
+ echo 1 > /opt/cpuset/02/cpuset.cpus
+ echo 1 > /opt/cpuset/02/cpuset.mems
+ echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+
+ In above set, when you moves a task from 01 to 02, page migration to
+ node 0 to node 1 will occur. Following is a script to migrate all
+ under cpuset.::
+
+ --
+ move_task()
+ {
+ for pid in $1
+ do
+ /bin/echo $pid >$2/tasks 2>/dev/null
+ echo -n $pid
+ echo -n " "
+ done
+ echo END
+ }
+
+ G1_TASK=`cat ${G1}/tasks`
+ G2_TASK=`cat ${G2}/tasks`
+ move_task "${G1_TASK}" ${G2} &
+ --
+
+9.4 Memory hotplug
+------------------
+
+ memory hotplug test is one of good test.
+
+ to offline memory, do following::
+
+ # echo offline > /sys/devices/system/memory/memoryXXX/state
+
+ (XXX is the place of memory)
+
+ This is an easy way to test page migration, too.
+
+9.5 nested cgroups
+------------------
+
+ Use tests like the following for testing nested cgroups::
+
+ mkdir /opt/cgroup/01/child_a
+ mkdir /opt/cgroup/01/child_b
+
+ set limit to 01.
+ add limit to 01/child_b
+ run jobs under child_a and child_b
+
+ create/delete following groups at random while jobs are running::
+
+ /opt/cgroup/01/child_a/child_aa
+ /opt/cgroup/01/child_b/child_bb
+ /opt/cgroup/01/child_c
+
+ running new jobs in new group is also good.
+
+9.6 Mount with other subsystems
+-------------------------------
+
+ Mounting with other subsystems is a good test because there is a
+ race and lock dependency with other cgroup subsystems.
+
+ example::
+
+ # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
+
+ and do task move, mkdir, rmdir etc...under this.
+
+9.7 swapoff
+-----------
+
+ Besides management of swap is one of complicated parts of memcg,
+ call path of swap-in at swapoff is not same as usual swap-in path..
+ It's worth to be tested explicitly.
+
+ For example, test like following is good:
+
+ (Shell-A)::
+
+ # mount -t cgroup none /cgroup -o memory
+ # mkdir /cgroup/test
+ # echo 40M > /cgroup/test/memory.limit_in_bytes
+ # echo 0 > /cgroup/test/tasks
+
+ Run malloc(100M) program under this. You'll see 60M of swaps.
+
+ (Shell-B)::
+
+ # move all tasks in /cgroup/test to /cgroup
+ # /sbin/swapoff -a
+ # rmdir /cgroup/test
+ # kill malloc task.
+
+ Of course, tmpfs v.s. swapoff test should be tested, too.
+
+9.8 OOM-Killer
+--------------
+
+ Out-of-memory caused by memcg's limit will kill tasks under
+ the memcg. When hierarchy is used, a task under hierarchy
+ will be killed by the kernel.
+
+ In this case, panic_on_oom shouldn't be invoked and tasks
+ in other groups shouldn't be killed.
+
+ It's not difficult to cause OOM under memcg as following.
+
+ Case A) when you can swapoff::
+
+ #swapoff -a
+ #echo 50M > /memory.limit_in_bytes
+
+ run 51M of malloc
+
+ Case B) when you use mem+swap limitation::
+
+ #echo 50M > memory.limit_in_bytes
+ #echo 50M > memory.memsw.limit_in_bytes
+
+ run 51M of malloc
+
+9.9 Move charges at task migration
+----------------------------------
+
+ Charges associated with a task can be moved along with task migration.
+
+ (Shell-A)::
+
+ #mkdir /cgroup/A
+ #echo $$ >/cgroup/A/tasks
+
+ run some programs which uses some amount of memory in /cgroup/A.
+
+ (Shell-B)::
+
+ #mkdir /cgroup/B
+ #echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+ #echo "pid of the program running in group A" >/cgroup/B/tasks
+
+ You can see charges have been moved by reading ``*.usage_in_bytes`` or
+ memory.stat of both A and B.
+
+ See 8.2 of Documentation/admin-guide/cgroup-v1/memory.rst to see what value should
+ be written to move_charge_at_immigrate.
+
+9.10 Memory thresholds
+----------------------
+
+ Memory controller implements memory thresholds using cgroups notification
+ API. You can use tools/cgroup/cgroup_event_listener.c to test it.
+
+ (Shell-A) Create cgroup and run event listener::
+
+ # mkdir /cgroup/A
+ # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+ (Shell-B) Add task to cgroup and try to allocate and free memory::
+
+ # echo $$ >/cgroup/A/tasks
+ # a="$(dd if=/dev/zero bs=1M count=10)"
+ # a=
+
+ You will see message from cgroup_event_listener every time you cross
+ the thresholds.
+
+ Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
+
+ It's good idea to test root cgroup as well.
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
new file mode 100644
index 000000000000..d6b1db8cc7eb
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -0,0 +1,964 @@
+==========================
+Memory Resource Controller
+==========================
+
+.. caution::
+ This document is hopelessly outdated and it asks for a complete
+ rewrite. It still contains a useful information so we are keeping it
+ here but make sure to check the current code if you need a deeper
+ understanding.
+
+.. note::
+ The Memory Resource Controller has generically been referred to as the
+ memory controller in this document. Do not confuse memory controller
+ used here with the memory controller that is used in hardware.
+
+.. hint::
+ When we mention a cgroup (cgroupfs's directory) with memory controller,
+ we call it "memory cgroup". When you see git-log and source code, you'll
+ see patch's title and function names tend to use "memcg".
+ In this document, we avoid using it.
+
+Benefits and Purpose of the memory controller
+=============================================
+
+The memory controller isolates the memory behaviour of a group of tasks
+from the rest of the system. The article on LWN [12]_ mentions some probable
+uses of the memory controller. The memory controller can be used to
+
+a. Isolate an application or a group of applications
+ Memory-hungry applications can be isolated and limited to a smaller
+ amount of memory.
+b. Create a cgroup with a limited amount of memory; this can be used
+ as a good alternative to booting with mem=XXXX.
+c. Virtualization solutions can control the amount of memory they want
+ to assign to a virtual machine instance.
+d. A CD/DVD burner could control the amount of memory used by the
+ rest of the system to ensure that burning does not fail due to lack
+ of available memory.
+e. There are several other use cases; find one or use the controller just
+ for fun (to learn and hack on the VM subsystem).
+
+Current Status: linux-2.6.34-mmotm(development version of 2010/April)
+
+Features:
+
+ - accounting anonymous pages, file caches, swap caches usage and limiting them.
+ - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
+ - optionally, memory+swap usage can be accounted and limited.
+ - hierarchical accounting
+ - soft limit
+ - moving (recharging) account at moving a task is selectable.
+ - usage threshold notifier
+ - memory pressure notifier
+ - oom-killer disable knob and oom-notifier
+ - Root cgroup has no limit controls.
+
+ Kernel memory support is a work in progress, and the current version provides
+ basically functionality. (See :ref:`section 2.7
+ <cgroup-v1-memory-kernel-extension>`)
+
+Brief summary of control files.
+
+==================================== ==========================================
+ tasks attach a task(thread) and show list of
+ threads
+ cgroup.procs show list of processes
+ cgroup.event_control an interface for event_fd()
+ This knob is not available on CONFIG_PREEMPT_RT systems.
+ memory.usage_in_bytes show current usage for memory
+ (See 5.5 for details)
+ memory.memsw.usage_in_bytes show current usage for memory+Swap
+ (See 5.5 for details)
+ memory.limit_in_bytes set/show limit of memory usage
+ memory.memsw.limit_in_bytes set/show limit of memory+Swap usage
+ memory.failcnt show the number of memory usage hits limits
+ memory.memsw.failcnt show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes show max memory usage recorded
+ memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes set/show soft limit of memory usage
+ This knob is not available on CONFIG_PREEMPT_RT systems.
+ This knob is deprecated and shouldn't be
+ used.
+ memory.stat show various statistics
+ memory.use_hierarchy set/show hierarchical account enabled
+ This knob is deprecated and shouldn't be
+ used.
+ memory.force_empty trigger forced page reclaim
+ memory.pressure_level set memory pressure notifications
+ This knob is deprecated and shouldn't be
+ used.
+ memory.swappiness set/show swappiness parameter of vmscan
+ (See sysctl's vm.swappiness)
+ Per memcg knob does not exist in cgroup v2.
+ memory.move_charge_at_immigrate This knob is deprecated.
+ memory.oom_control set/show oom controls.
+ This knob is deprecated and shouldn't be
+ used.
+ memory.numa_stat show the number of memory usage per numa
+ node
+ memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
+ memory hard limit. Kernel hard limit is not
+ supported since 5.16. Writing any value to
+ do file will not have any effect same as if
+ nokmem kernel parameter was specified.
+ Kernel memory is still charged and reported
+ by memory.kmem.usage_in_bytes.
+ memory.kmem.usage_in_bytes show current kernel memory allocation
+ memory.kmem.failcnt show the number of kernel memory usage
+ hits limits
+ memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
+
+ memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
+ This knob is deprecated and shouldn't be
+ used.
+ memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
+ This knob is deprecated and shouldn't be
+ used.
+ memory.kmem.tcp.failcnt show the number of tcp buf memory usage
+ hits limits
+ This knob is deprecated and shouldn't be
+ used.
+ memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
+ This knob is deprecated and shouldn't be
+ used.
+==================================== ==========================================
+
+1. History
+==========
+
+The memory controller has a long history. A request for comments for the memory
+controller was posted by Balbir Singh [1]_. At the time the RFC was posted
+there were several implementations for memory control. The goal of the
+RFC was to build consensus and agreement for the minimal features required
+for memory control. The first RSS controller was posted by Balbir Singh [2]_
+in Feb 2007. Pavel Emelianov [3]_ [4]_ [5]_ has since posted three versions
+of the RSS controller. At OLS, at the resource management BoF, everyone
+suggested that we handle both page cache and RSS together. Another request was
+raised to allow user space handling of OOM. The current memory controller is
+at version 6; it combines both mapped (RSS) and unmapped Page
+Cache Control [11]_.
+
+2. Memory Control
+=================
+
+Memory is a unique resource in the sense that it is present in a limited
+amount. If a task requires a lot of CPU processing, the task can spread
+its processing over a period of hours, days, months or years, but with
+memory, the same physical memory needs to be reused to accomplish the task.
+
+The memory controller implementation has been divided into phases. These
+are:
+
+1. Memory controller
+2. mlock(2) controller
+3. Kernel user memory accounting and slab control
+4. user mappings length controller
+
+The memory controller is the first controller developed.
+
+2.1. Design
+-----------
+
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
+
+2.2. Accounting
+---------------
+
+.. code-block::
+ :caption: Figure 1: Hierarchy of Accounting
+
+ +--------------------+
+ | mem_cgroup |
+ | (page_counter) |
+ +--------------------+
+ / ^ \
+ / | \
+ +---------------+ | +---------------+
+ | mm_struct | |.... | mm_struct |
+ | | | | |
+ +---------------+ | +---------------+
+ |
+ + --------------+
+ |
+ +---------------+ +------+--------+
+ | page +----------> page_cgroup|
+ | | | |
+ +---------------+ +---------------+
+
+
+
+Figure 1 shows the important aspects of the controller
+
+1. Accounting happens per cgroup
+2. Each mm_struct knows about which cgroup it belongs to
+3. Each page has a pointer to the page_cgroup, which in turn knows the
+ cgroup it belongs to
+
+The accounting is done as follows: mem_cgroup_charge_common() is invoked to
+set up the necessary data structures and check if the cgroup that is being
+charged is over its limit. If it is, then reclaim is invoked on the cgroup.
+More details can be found in the reclaim section of this document.
+If everything goes well, a page meta-data-structure called page_cgroup is
+updated. page_cgroup has its own LRU on cgroup.
+(*) page_cgroup structure is allocated at boot/memory-hotplug time.
+
+2.2.1 Accounting details
+------------------------
+
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+Some pages which are never reclaimable and will not be on the LRU
+are not accounted. We just account pages under usual VM management.
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (xarray). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+An RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from xarray. Even if RSS pages are fully
+unmapped (by kswapd), they may exist as SwapCache in the system until they
+are really freed. Such SwapCaches are also accounted.
+A swapped-in page is accounted after adding into swapcache.
+
+Note: The kernel does swapin-readahead and reads multiple swaps at once.
+Since page's memcg recorded into swap whatever memsw enabled, the page will
+be accounted after swapin.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-LRU because our purpose is to control amount
+of used pages; not-on-LRU pages tend to be out-of-control from VM view.
+
+2.3 Shared Page Accounting
+--------------------------
+
+Shared pages are accounted on the basis of the first touch approach. The
+cgroup that first touches a page is accounted for the page. The principle
+behind this approach is that a cgroup that aggressively uses a shared
+page will eventually get charged for it (once it is uncharged from
+the cgroup that brought it in -- this will happen on memory pressure).
+
+2.4 Swap Extension
+--------------------------------------
+
+Swap usage is always recorded for each of cgroup. Swap Extension allows you to
+read and limit it.
+
+When CONFIG_SWAP is enabled, following files are added.
+
+ - memory.memsw.usage_in_bytes.
+ - memory.memsw.limit_in_bytes.
+
+memsw means memory+swap. Usage of memory+swap is limited by
+memsw.limit_in_bytes.
+
+Example: Assume a system with 4G of swap. A task which allocates 6G of memory
+(by mistake) under 2G memory limitation will use all swap.
+In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
+By using the memsw limit, you can avoid system OOM which can be caused by swap
+shortage.
+
+2.4.1 why 'memory+swap' rather than swap
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
+to move account from memory to swap...there is no change in usage of
+memory+swap. In other words, when we want to limit the usage of swap without
+affecting global LRU, memory+swap limit is better than just limiting swap from
+an OS point of view.
+
+2.4.2. What happens when a cgroup hits memory.memsw.limit_in_bytes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
+in this cgroup. Then, swap-out will not be done by cgroup routine and file
+caches are dropped. But as mentioned above, global LRU can do swapout memory
+from it for sanity of the system's memory management state. You can't forbid
+it by cgroup.
+
+2.5 Reclaim
+-----------
+
+Each cgroup maintains a per cgroup LRU which has the same structure as
+global VM. When a cgroup goes over its limit, we first try
+to reclaim memory from the cgroup so as to make space for the new
+pages that the cgroup has touched. If the reclaim is unsuccessful,
+an OOM routine is invoked to select and kill the bulkiest task in the
+cgroup. (See :ref:`10. OOM Control <cgroup-v1-memory-oom-control>` below.)
+
+The reclaim algorithm has not been modified for cgroups, except that
+pages that are selected for reclaiming come from the per-cgroup LRU
+list.
+
+.. note::
+ Reclaim does not work for the root cgroup, since we cannot set any
+ limits on the root cgroup.
+
+.. note::
+ When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered.
+(See :ref:`oom_control <cgroup-v1-memory-oom-control>` section)
+
+2.6 Locking
+-----------
+
+Lock order is as follows::
+
+ folio_lock
+ mm->page_table_lock or split pte_lock
+ folio_memcg_lock (memcg->move_lock)
+ mapping->i_pages lock
+ lruvec->lru_lock.
+
+Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
+lruvec->lru_lock; the folio LRU flag is cleared before
+isolating a page from its LRU under lruvec->lru_lock.
+
+.. _cgroup-v1-memory-kernel-extension:
+
+2.7 Kernel Memory Extension
+-----------------------------------------------
+
+With the Kernel memory extension, the Memory Controller is able to limit
+the amount of kernel memory used by the system. Kernel memory is fundamentally
+different than user memory, since it can't be swapped out, which makes it
+possible to DoS the system by consuming too much of this precious resource.
+
+Kernel memory accounting is enabled for all memory cgroups by default. But
+it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
+at boot time. In this case, kernel memory will not be accounted at all.
+
+Kernel memory limits are not imposed for the root cgroup. Usage for the root
+cgroup may or may not be accounted. The memory used is accumulated into
+memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
+(currently only for tcp).
+
+The main "kmem" counter is fed into the main counter, so kmem charges will
+also be visible from the user counter.
+
+Currently no soft limit is implemented for kernel memory. It is future work
+to trigger slab reclaim when those limits are reached.
+
+2.7.1 Current Kernel Memory resources accounted
+-----------------------------------------------
+
+stack pages:
+ every process consumes some stack pages. By accounting into
+ kernel memory, we prevent new processes from being created when the kernel
+ memory usage is too high.
+
+slab pages:
+ pages allocated by the SLAB or SLUB allocator are tracked. A copy
+ of each kmem_cache is created every time the cache is touched by the first time
+ from inside the memcg. The creation is done lazily, so some objects can still be
+ skipped while the cache is being created. All objects in a slab page should
+ belong to the same memcg. This only fails to hold when a task is migrated to a
+ different memcg during the page allocation by the cache.
+
+sockets memory pressure:
+ some sockets protocols have memory pressure
+ thresholds. The Memory Controller allows them to be controlled individually
+ per cgroup, instead of globally.
+
+tcp memory pressure:
+ sockets memory pressure for the tcp protocol.
+
+2.7.2 Common use cases
+----------------------
+
+Because the "kmem" counter is fed to the main user counter, kernel memory can
+never be limited completely independently of user memory. Say "U" is the user
+limit, and "K" the kernel limit. There are three possible ways limits can be
+set:
+
+U != 0, K = unlimited:
+ This is the standard memcg limitation mechanism already present before kmem
+ accounting. Kernel memory is completely ignored.
+
+U != 0, K < U:
+ Kernel memory is a subset of the user memory. This setup is useful in
+ deployments where the total amount of memory per-cgroup is overcommitted.
+ Overcommitting kernel memory limits is definitely not recommended, since the
+ box can still run out of non-reclaimable memory.
+ In this case, the admin could set up K so that the sum of all groups is
+ never greater than the total memory, and freely set U at the cost of his
+ QoS.
+
+ .. warning::
+ In the current implementation, memory reclaim will NOT be triggered for
+ a cgroup when it hits K while staying below U, which makes this setup
+ impractical.
+
+U != 0, K >= U:
+ Since kmem charges will also be fed to the user counter and reclaim will be
+ triggered for the cgroup for both kinds of memory. This setup gives the
+ admin a unified view of memory, and it is also useful for people who just
+ want to track kernel memory usage.
+
+3. User Interface
+=================
+
+To use the user interface:
+
+1. Enable CONFIG_CGROUPS and CONFIG_MEMCG options
+2. Prepare the cgroups (see :ref:`Why are cgroups needed?
+ <cgroups-why-needed>` for the background information)::
+
+ # mount -t tmpfs none /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/memory
+ # mount -t cgroup none /sys/fs/cgroup/memory -o memory
+
+3. Make the new group and move bash into it::
+
+ # mkdir /sys/fs/cgroup/memory/0
+ # echo $$ > /sys/fs/cgroup/memory/0/tasks
+
+4. Since now we're in the 0 cgroup, we can alter the memory limit::
+
+ # echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+
+ The limit can now be queried::
+
+ # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+ 4194304
+
+.. note::
+ We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
+ Gibibytes.)
+
+.. note::
+ We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
+
+.. note::
+ We cannot set limits on the root cgroup any more.
+
+
+We can check the usage::
+
+ # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
+ 1216512
+
+A successful write to this file does not guarantee a successful setting of
+this limit to the value written into the file. This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system. The user is required to re-read
+this file after a write to guarantee the value committed by the kernel::
+
+ # echo 1 > memory.limit_in_bytes
+ # cat memory.limit_in_bytes
+ 4096
+
+The memory.failcnt field gives the number of times that the cgroup limit was
+exceeded.
+
+The memory.stat file gives accounting information. Now, the number of
+caches, RSS and Active pages/Inactive pages are shown.
+
+4. Testing
+==========
+
+For testing features and implementation, see memcg_test.txt.
+
+Performance test is also important. To see pure memory controller's overhead,
+testing on tmpfs will give you good numbers of small overheads.
+Example: do kernel make on tmpfs.
+
+Page-fault scalability is also important. At measuring parallel
+page fault test, multi-process test may be better than multi-thread
+test because it has noise of shared objects/status.
+
+But the above two are testing extreme situations.
+Trying usual test under memory controller is always helpful.
+
+.. _cgroup-v1-memory-test-troubleshoot:
+
+4.1 Troubleshooting
+-------------------
+
+Sometimes a user might find that the application under a cgroup is
+terminated by the OOM killer. There are several causes for this:
+
+1. The cgroup limit is too low (just too low to do anything useful)
+2. The user is using anonymous memory and swap is turned off or too low
+
+A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
+some of the pages cached in the cgroup (page cache pages).
+
+To know what happens, disabling OOM_Kill as per :ref:`"10. OOM Control"
+<cgroup-v1-memory-oom-control>` (below) and seeing what happens will be
+helpful.
+
+.. _cgroup-v1-memory-test-task-migration:
+
+4.2 Task migration
+------------------
+
+When a task migrates from one cgroup to another, its charge is not
+carried forward by default. The pages allocated from the original cgroup still
+remain charged to it, the charge is dropped when the page is freed or
+reclaimed.
+
+You can move charges of a task along with task migration.
+See :ref:`8. "Move charges at task migration" <cgroup-v1-memory-move-charges>`
+
+4.3 Removing a cgroup
+---------------------
+
+A cgroup can be removed by rmdir, but as discussed in :ref:`sections 4.1
+<cgroup-v1-memory-test-troubleshoot>` and :ref:`4.2
+<cgroup-v1-memory-test-task-migration>`, a cgroup might have some charge
+associated with it, even though all tasks have migrated away from it. (because
+we charge against pages, not against tasks.)
+
+We move the stats to parent, and no change on the charge except uncharging
+from the child.
+
+Charges recorded in swap information is not updated at removal of cgroup.
+Recorded information is discarded and a cgroup which uses swap (swapcache)
+will be charged as a new owner of it.
+
+5. Misc. interfaces
+===================
+
+5.1 force_empty
+---------------
+ memory.force_empty interface is provided to make cgroup's memory usage empty.
+ When writing anything to this::
+
+ # echo 0 > memory.force_empty
+
+ the cgroup will be reclaimed and as many pages reclaimed as possible.
+
+ The typical use case for this interface is before calling rmdir().
+ Though rmdir() offlines memcg, but the memcg may still stay there due to
+ charged file caches. Some out-of-use page caches may keep charged until
+ memory pressure happens. If you want to avoid that, force_empty will be useful.
+
+5.2 stat file
+-------------
+
+memory.stat file includes following statistics:
+
+ * per-memory cgroup local status
+
+ =============== ===============================================================
+ cache # of bytes of page cache memory.
+ rss # of bytes of anonymous and swap cache memory (includes
+ transparent hugepages).
+ rss_huge # of bytes of anonymous transparent hugepages.
+ mapped_file # of bytes of mapped file (includes tmpfs/shmem)
+ pgpgin # of charging events to the memory cgroup. The charging
+ event happens each time a page is accounted as either mapped
+ anon page(RSS) or cache page(Page Cache) to the cgroup.
+ pgpgout # of uncharging events to the memory cgroup. The uncharging
+ event happens each time a page is unaccounted from the
+ cgroup.
+ swap # of bytes of swap usage
+ swapcached # of bytes of swap cached in memory
+ dirty # of bytes that are waiting to get written back to the disk.
+ writeback # of bytes of file/anon cache that are queued for syncing to
+ disk.
+ inactive_anon # of bytes of anonymous and swap cache memory on inactive
+ LRU list.
+ active_anon # of bytes of anonymous and swap cache memory on active
+ LRU list.
+ inactive_file # of bytes of file-backed memory and MADV_FREE anonymous
+ memory (LazyFree pages) on inactive LRU list.
+ active_file # of bytes of file-backed memory on active LRU list.
+ unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
+ =============== ===============================================================
+
+ * status considering hierarchy (see memory.use_hierarchy settings):
+
+ ========================= ===================================================
+ hierarchical_memory_limit # of bytes of memory limit with regard to
+ hierarchy
+ under which the memory cgroup is
+ hierarchical_memsw_limit # of bytes of memory+swap limit with regard to
+ hierarchy under which memory cgroup is.
+
+ total_<counter> # hierarchical version of <counter>, which in
+ addition to the cgroup's own value includes the
+ sum of all hierarchical children's values of
+ <counter>, i.e. total_cache
+ ========================= ===================================================
+
+ * additional vm parameters (depends on CONFIG_DEBUG_VM):
+
+ ========================= ========================================
+ recent_rotated_anon VM internal parameter. (see mm/vmscan.c)
+ recent_rotated_file VM internal parameter. (see mm/vmscan.c)
+ recent_scanned_anon VM internal parameter. (see mm/vmscan.c)
+ recent_scanned_file VM internal parameter. (see mm/vmscan.c)
+ ========================= ========================================
+
+.. hint::
+ recent_rotated means recent frequency of LRU rotation.
+ recent_scanned means recent # of scans to LRU.
+ showing for better debug please see the code for meanings.
+
+.. note::
+ Only anonymous and swap cache memory is listed as part of 'rss' stat.
+ This should not be confused with the true 'resident set size' or the
+ amount of physical memory used by the cgroup.
+
+ 'rss + mapped_file" will give you resident set size of cgroup.
+
+ Note that some kernel configurations might account complete larger
+ allocations (e.g., THP) towards 'rss' and 'mapped_file', even if
+ only some, but not all that memory is mapped.
+
+ (Note: file and shmem may be shared among other cgroups. In that case,
+ mapped_file is accounted only when the memory cgroup is owner of page
+ cache.)
+
+5.3 swappiness
+--------------
+
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
+
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
+
+5.4 failcnt
+-----------
+
+A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
+This failcnt(== failure count) shows the number of times that a usage counter
+hit its limit. When a memory cgroup hits a limit, failcnt increases and
+memory under it will be reclaimed.
+
+You can reset failcnt by writing 0 to failcnt file::
+
+ # echo 0 > .../memory.failcnt
+
+5.5 usage_in_bytes
+------------------
+
+For efficiency, as other kernel components, memory cgroup uses some optimization
+to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
+method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
+value for efficient access. (Of course, when necessary, it's synchronized.)
+If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
+value in memory.stat(see 5.2).
+
+5.6 numa_stat
+-------------
+
+This is similar to numa_maps but operates on a per-memcg basis. This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node. One of the use cases is evaluating application performance by
+combining this information with the application's CPU allocation.
+
+Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
+per-node page counts including "hierarchical_<counter>" which sums up all
+hierarchical children's values in addition to the memcg's own value.
+
+The output format of memory.numa_stat is::
+
+ total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+The "total" count is sum of file + anon + unevictable.
+
+6. Hierarchy support
+====================
+
+The memory controller supports a deep hierarchy and hierarchical accounting.
+The hierarchy is created by creating the appropriate cgroups in the
+cgroup filesystem. Consider for example, the following cgroup filesystem
+hierarchy::
+
+ root
+ / | \
+ / | \
+ a b c
+ | \
+ | \
+ d e
+
+In the diagram above, with hierarchical accounting enabled, all memory
+usage of e, is accounted to its ancestors up until the root (i.e, c and root).
+If one of the ancestors goes over its limit, the reclaim algorithm reclaims
+from the tasks in the ancestor and the children of the ancestor.
+
+6.1 Hierarchical accounting and reclaim
+---------------------------------------
+
+Hierarchical accounting is enabled by default. Disabling the hierarchical
+accounting is deprecated. An attempt to do it will result in a failure
+and a warning printed to dmesg.
+
+For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
+
+ # echo 1 > memory.use_hierarchy
+
+7. Soft limits (DEPRECATED)
+===========================
+
+THIS IS DEPRECATED!
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory, control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best-effort feature; it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is set up such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+-------------
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 MiB)::
+
+ # echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use::
+
+ # echo 1G > memory.soft_limit_in_bytes
+
+.. note::
+ Soft limits take effect over a long period of time, since they involve
+ reclaiming memory for balancing between memory cgroups
+
+.. note::
+ It is recommended to set the soft limit always below the hard limit,
+ otherwise the hard limit will take precedence.
+
+.. _cgroup-v1-memory-move-charges:
+
+8. Move charges at task migration (DEPRECATED!)
+===============================================
+
+THIS IS DEPRECATED!
+
+Reading memory.move_charge_at_immigrate will always return 0 and writing
+to it will always return -EINVAL.
+
+9. Memory thresholds
+====================
+
+Memory cgroup implements memory thresholds using the cgroups notification
+API (see cgroups.txt). It allows to register multiple memory and memsw
+thresholds and gets notifications when it crosses.
+
+To register a threshold, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
+- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
+ cgroup.event_control.
+
+Application will be notified through eventfd when memory usage crosses
+threshold in any direction.
+
+It's applicable for root and non-root cgroup.
+
+.. _cgroup-v1-memory-oom-control:
+
+10. OOM Control (DEPRECATED)
+============================
+
+THIS IS DEPRECATED!
+
+memory.oom_control file is for OOM notification and other controls.
+
+Memory cgroup implements OOM notifier using the cgroup notification
+API (See cgroups.txt). It allows to register multiple OOM notification
+delivery and gets notification when OOM happens.
+
+To register a notifier, an application must:
+
+ - create an eventfd using eventfd(2)
+ - open memory.oom_control file
+ - write string like "<event_fd> <fd of memory.oom_control>" to
+ cgroup.event_control
+
+The application will be notified through eventfd when OOM happens.
+OOM notification doesn't work for the root cgroup.
+
+You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
+
+ #echo 1 > memory.oom_control
+
+If OOM-killer is disabled, tasks under cgroup will hang/sleep
+in memory cgroup's OOM-waitqueue when they request accountable memory.
+
+For running them, you have to relax the memory cgroup's OOM status by
+
+ * enlarge limit or reduce usage.
+
+To reduce usage,
+
+ * kill some tasks.
+ * move some tasks to other group with account migration.
+ * remove some files (on tmpfs?)
+
+Then, stopped tasks will work again.
+
+At reading, current status of OOM is shown.
+
+ - oom_kill_disable 0 or 1
+ (if 1, oom-killer is disabled)
+ - under_oom 0 or 1
+ (if 1, the memory cgroup is under OOM, tasks may be stopped.)
+ - oom_kill integer counter
+ The number of processes belonging to this cgroup killed by any
+ kind of OOM killer.
+
+11. Memory Pressure (DEPRECATED)
+================================
+
+THIS IS DEPRECATED!
+
+The pressure level notifications can be used to monitor the memory
+allocation cost; based on the pressure, applications can implement
+different strategies of managing their memory resources. The pressure
+levels are defined as following:
+
+The "low" level means that the system is reclaiming memory for new
+allocations. Monitoring this reclaiming activity might be useful for
+maintaining cache level. Upon notification, the program (typically
+"Activity Manager") might analyze vmstat and act in advance (i.e.
+prematurely shutdown unimportant services).
+
+The "medium" level means that the system is experiencing medium memory
+pressure, the system might be making swap, paging out active file caches,
+etc. Upon this event applications may decide to further analyze
+vmstat/zoneinfo/memcg or internal memory usage statistics and free any
+resources that can be easily reconstructed or re-read from a disk.
+
+The "critical" level means that the system is actively thrashing, it is
+about to out of memory (OOM) or even the in-kernel OOM killer is on its
+way to trigger. Applications should do whatever they can to help the
+system. It might be too late to consult with vmstat or any other
+statistics, so it's advisable to take an immediate action.
+
+By default, events are propagated upward until the event is handled, i.e. the
+events are not pass-through. For example, you have three cgroups: A->B->C. Now
+you set up an event listener on cgroups A, B and C, and suppose group C
+experiences some pressure. In this situation, only group C will receive the
+notification, i.e. groups A and B will not receive it. This is done to avoid
+excessive "broadcasting" of messages, which disturbs the system and which is
+especially bad if we are low on memory or thrashing. Group B, will receive
+notification only if there are no event listeners for group C.
+
+There are three optional modes that specify different propagation behavior:
+
+ - "default": this is the default behavior specified above. This mode is the
+ same as omitting the optional mode parameter, preserved by backwards
+ compatibility.
+
+ - "hierarchy": events always propagate up to the root, similar to the default
+ behavior, except that propagation continues regardless of whether there are
+ event listeners at each level, with the "hierarchy" mode. In the above
+ example, groups A, B, and C will receive notification of memory pressure.
+
+ - "local": events are pass-through, i.e. they only receive notifications when
+ memory pressure is experienced in the memcg for which the notification is
+ registered. In the above example, group C will receive notification if
+ registered for "local" notification and the group experiences memory
+ pressure. However, group B will never receive notification, regardless if
+ there is an event listener for group C or not, if group B is registered for
+ local notification.
+
+The level and event notification mode ("hierarchy" or "local", if necessary) are
+specified by a comma-delimited string, i.e. "low,hierarchy" specifies
+hierarchical, pass-through, notification for all ancestor memcgs. Notification
+that is the default, non pass-through behavior, does not specify a mode.
+"medium,local" specifies pass-through notification for the medium level.
+
+The file memory.pressure_level is only used to setup an eventfd. To
+register a notification, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.pressure_level;
+- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
+ to cgroup.event_control.
+
+Application will be notified through eventfd when memory pressure is at
+the specific level (or higher). Read/write operations to
+memory.pressure_level are no implemented.
+
+Test:
+
+ Here is a small script example that makes a new cgroup, sets up a
+ memory limit, sets up a notification in the cgroup and then makes child
+ cgroup experience a critical pressure::
+
+ # cd /sys/fs/cgroup/memory/
+ # mkdir foo
+ # cd foo
+ # cgroup_event_listener memory.pressure_level low,hierarchy &
+ # echo 8000000 > memory.limit_in_bytes
+ # echo 8000000 > memory.memsw.limit_in_bytes
+ # echo $$ > tasks
+ # dd if=/dev/zero | read x
+
+ (Expect a bunch of notifications, and eventually, the oom-killer will
+ trigger.)
+
+12. TODO
+========
+
+1. Make per-cgroup scanner reclaim not-shared pages first
+2. Teach controller to account for shared-pages
+3. Start reclamation in the background when the limit is
+ not yet hit but the usage is getting closer
+
+Summary
+=======
+
+Overall, the memory controller has been a stable controller and has been
+commented and discussed quite extensively in the community.
+
+References
+==========
+
+.. [1] Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
+.. [2] Singh, Balbir. Memory Controller (RSS Control),
+ http://lwn.net/Articles/222762/
+.. [3] Emelianov, Pavel. Resource controllers based on process cgroups
+ https://lore.kernel.org/r/45ED7DEC.7010403@sw.ru
+.. [4] Emelianov, Pavel. RSS controller based on process cgroups (v2)
+ https://lore.kernel.org/r/461A3010.90403@sw.ru
+.. [5] Emelianov, Pavel. RSS controller based on process cgroups (v3)
+ https://lore.kernel.org/r/465D9739.8070209@openvz.org
+
+6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
+7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
+ subsystem (v3), http://lwn.net/Articles/235534/
+8. Singh, Balbir. RSS controller v2 test results (lmbench),
+ https://lore.kernel.org/r/464C95D4.7070806@linux.vnet.ibm.com
+9. Singh, Balbir. RSS controller v2 AIM9 results
+ https://lore.kernel.org/r/464D267A.50107@linux.vnet.ibm.com
+10. Singh, Balbir. Memory controller v6 test results,
+ https://lore.kernel.org/r/20070819094658.654.84837.sendpatchset@balbir-laptop
+
+.. [11] Singh, Balbir. Memory controller introduction (v6),
+ https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop
+.. [12] Corbet, Jonathan, Controlling memory use in cgroups,
+ http://lwn.net/Articles/243795/
diff --git a/Documentation/admin-guide/cgroup-v1/misc.rst b/Documentation/admin-guide/cgroup-v1/misc.rst
new file mode 100644
index 000000000000..661614c24df3
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/misc.rst
@@ -0,0 +1,4 @@
+===============
+Misc controller
+===============
+Please refer "Misc" documentation in Documentation/admin-guide/cgroup-v2.rst
diff --git a/Documentation/admin-guide/cgroup-v1/net_cls.rst b/Documentation/admin-guide/cgroup-v1/net_cls.rst
new file mode 100644
index 000000000000..a2cf272af7a0
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/net_cls.rst
@@ -0,0 +1,44 @@
+=========================
+Network classifier cgroup
+=========================
+
+The Network classifier cgroup provides an interface to
+tag network packets with a class identifier (classid).
+
+The Traffic Controller (tc) can be used to assign
+different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
+
+Creating a net_cls cgroups instance creates a net_cls.classid file.
+This net_cls.classid value is initialized to 0.
+
+You can write hexadecimal values to net_cls.classid; the format for these
+values is 0xAAAABBBB; AAAA is the major handle number and BBBB
+is the minor handle number.
+Reading net_cls.classid yields a decimal result.
+
+Example::
+
+ mkdir /sys/fs/cgroup/net_cls
+ mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+ mkdir /sys/fs/cgroup/net_cls/0
+ echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid
+
+- setting a 10:1 handle::
+
+ cat /sys/fs/cgroup/net_cls/0/net_cls.classid
+ 1048577
+
+- configuring tc::
+
+ tc qdisc add dev eth0 root handle 10: htb
+ tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
+
+- creating traffic class 10:1::
+
+ tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example::
+
+ iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/admin-guide/cgroup-v1/net_prio.rst b/Documentation/admin-guide/cgroup-v1/net_prio.rst
new file mode 100644
index 000000000000..b40905871c64
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/net_prio.rst
@@ -0,0 +1,57 @@
+=======================
+Network priority cgroup
+=======================
+
+The Network priority cgroup provides an interface to allow an administrator to
+dynamically set the priority of network traffic generated by various
+applications
+
+Nominally, an application would set the priority of its traffic via the
+SO_PRIORITY socket option. This however, is not always possible because:
+
+1) The application may not have been coded to set this value
+2) The priority of application traffic is often a site-specific administrative
+ decision rather than an application defined one.
+
+This cgroup allows an administrator to assign a process to a group which defines
+the priority of egress traffic on a given interface. Network priority groups can
+be created by first mounting the cgroup filesystem::
+
+ # mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
+
+With the above step, the initial group acting as the parent accounting group
+becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in
+the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
+
+Each net_prio cgroup contains two files that are subsystem specific
+
+net_prio.prioidx
+ This file is read-only, and is simply informative. It contains a unique
+ integer value that the kernel uses as an internal representation of this
+ cgroup.
+
+net_prio.ifpriomap
+ This file contains a map of the priorities assigned to traffic originating
+ from processes in this group and egressing the system on various interfaces.
+ It contains a list of tuples in the form <ifname priority>. Contents of this
+ file can be modified by echoing a string into the file using the same tuple
+ format. For example::
+
+ echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
+
+This command would force any traffic originating from processes belonging to the
+iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
+said traffic set to the value 5. The parent accounting group also has a
+writeable 'net_prio.ifpriomap' file that can be used to set a system default
+priority.
+
+Priorities are set immediately prior to queueing a frame to the device
+queueing discipline (qdisc) so priorities will be assigned prior to the hardware
+queue selection being made.
+
+One usage for the net_prio cgroup is with mqprio qdisc allowing application
+traffic to be steered to hardware/driver based traffic classes. These mappings
+can then be managed by administrators or other networking protocols such as
+DCBX.
+
+A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/admin-guide/cgroup-v1/pids.rst b/Documentation/admin-guide/cgroup-v1/pids.rst
new file mode 100644
index 000000000000..0f9f9a7b1f6c
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/pids.rst
@@ -0,0 +1,93 @@
+=========================
+Process Number Controller
+=========================
+
+Abstract
+--------
+
+The process number controller is used to allow a cgroup hierarchy to stop any
+new tasks from being fork()'d or clone()'d after a certain limit is reached.
+
+Since it is trivial to hit the task limit without hitting any kmemcg limits in
+place, PIDs are a fundamental resource. As such, PID exhaustion must be
+preventable in the scope of a cgroup hierarchy by allowing resource limiting of
+the number of tasks in a cgroup.
+
+Usage
+-----
+
+In order to use the `pids` controller, set the maximum number of tasks in
+pids.max (this is not available in the root cgroup for obvious reasons). The
+number of processes currently in the cgroup is given by pids.current.
+
+Organisational operations are not blocked by cgroup policies, so it is possible
+to have pids.current > pids.max. This can be done by either setting the limit to
+be smaller than pids.current, or attaching enough processes to the cgroup such
+that pids.current > pids.max. However, it is not possible to violate a cgroup
+policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
+creation of a new process would cause a cgroup policy to be violated.
+
+To set a cgroup to have no limit, set pids.max to "max". This is the default for
+all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
+limit in the hierarchy is followed).
+
+pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
+superset of parent/child/pids.current.
+
+The pids.events file contains event counters:
+
+ - max: Number of times fork failed in the cgroup because limit was hit in
+ self or ancestors.
+
+Example
+-------
+
+First, we mount the pids controller::
+
+ # mkdir -p /sys/fs/cgroup/pids
+ # mount -t cgroup -o pids none /sys/fs/cgroup/pids
+
+Then we create a hierarchy, set limits and attach processes to it::
+
+ # mkdir -p /sys/fs/cgroup/pids/parent/child
+ # echo 2 > /sys/fs/cgroup/pids/parent/pids.max
+ # echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ #
+
+It should be noted that attempts to overcome the set limit (2 in this case) will
+fail::
+
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ # ( /bin/echo "Here's some processes for you." | cat )
+ sh: fork: Resource temporary unavailable
+ #
+
+Even if we migrate to a child cgroup (which doesn't have a set limit), we will
+not be able to overcome the most stringent limit in the hierarchy (in this case,
+parent's)::
+
+ # echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ # cat /sys/fs/cgroup/pids/parent/child/pids.current
+ 2
+ # cat /sys/fs/cgroup/pids/parent/child/pids.max
+ max
+ # ( /bin/echo "Here's some processes for you." | cat )
+ sh: fork: Resource temporary unavailable
+ #
+
+We can set a limit that is smaller than pids.current, which will stop any new
+processes from being forked at all (note that the shell itself counts towards
+pids.current)::
+
+ # echo 1 > /sys/fs/cgroup/pids/parent/pids.max
+ # /bin/echo "We can't even spawn a single process now."
+ sh: fork: Resource temporary unavailable
+ # echo 0 > /sys/fs/cgroup/pids/parent/pids.max
+ # /bin/echo "We can't even spawn a single process now."
+ sh: fork: Resource temporary unavailable
+ #
diff --git a/Documentation/admin-guide/cgroup-v1/rdma.rst b/Documentation/admin-guide/cgroup-v1/rdma.rst
new file mode 100644
index 000000000000..e69369b7252e
--- /dev/null
+++ b/Documentation/admin-guide/cgroup-v1/rdma.rst
@@ -0,0 +1,117 @@
+===============
+RDMA Controller
+===============
+
+.. Contents
+
+ 1. Overview
+ 1-1. What is RDMA controller?
+ 1-2. Why RDMA controller needed?
+ 1-3. How is RDMA controller implemented?
+ 2. Usage Examples
+
+1. Overview
+===========
+
+1-1. What is RDMA controller?
+-----------------------------
+
+RDMA controller allows user to limit RDMA/IB specific resources that a given
+set of processes can use. These processes are grouped using RDMA controller.
+
+RDMA controller defines two resources which can be limited for processes of a
+cgroup.
+
+1-2. Why RDMA controller needed?
+--------------------------------
+
+Currently user space applications can easily take away all the rdma verb
+specific resources such as AH, CQ, QP, MR etc. Due to which other applications
+in other cgroup or kernel space ULPs may not even get chance to allocate any
+rdma resources. This can lead to service unavailability.
+
+Therefore RDMA controller is needed through which resource consumption
+of processes can be limited. Through this controller different rdma
+resources can be accounted.
+
+1-3. How is RDMA controller implemented?
+----------------------------------------
+
+RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
+resource accounting per cgroup, per device using resource pool structure.
+Each such resource pool is limited up to 64 resources in given resource pool
+by rdma cgroup, which can be extended later if required.
+
+This resource pool object is linked to the cgroup css. Typically there
+are 0 to 4 resource pool instances per cgroup, per device in most use cases.
+But nothing limits to have it more. At present hundreds of RDMA devices per
+single cgroup may not be handled optimally, however there is no
+known use case or requirement for such configuration either.
+
+Since RDMA resources can be allocated from any process and can be freed by any
+of the child processes which shares the address space, rdma resources are
+always owned by the creator cgroup css. This allows process migration from one
+to other cgroup without major complexity of transferring resource ownership;
+because such ownership is not really present due to shared nature of
+rdma resources. Linking resources around css also ensures that cgroups can be
+deleted after processes migrated. This allow progress migration as well with
+active resources, even though that is not a primary use case.
+
+Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
+the caller. Same rdma cgroup should be passed while uncharging the resource.
+This also allows process migrated with active RDMA resource to charge
+to new owner cgroup for new resource. It also allows to uncharge resource of
+a process from previously charged cgroup which is migrated to new cgroup,
+even though that is not a primary use case.
+
+Resource pool object is created in following situations.
+(a) User sets the limit and no previous resource pool exist for the device
+of interest for the cgroup.
+(b) No resource limits were configured, but IB/RDMA stack tries to
+charge the resource. So that it correctly uncharge them when applications are
+running without limits and later on when limits are enforced during uncharging,
+otherwise usage count will drop to negative.
+
+Resource pool is destroyed if all the resource limits are set to max and
+it is the last resource getting deallocated.
+
+User should set all the limit to max value if it intents to remove/unconfigure
+the resource pool for a particular device.
+
+IB stack honors limits enforced by the rdma controller. When application
+query about maximum resource limits of IB device, it returns minimum of
+what is configured by user for a given cgroup and what is supported by
+IB device.
+
+Following resources can be accounted by rdma controller.
+
+ ========== =============================
+ hca_handle Maximum number of HCA Handles
+ hca_object Maximum number of HCA Objects
+ ========== =============================
+
+2. Usage Examples
+=================
+
+(a) Configure resource limit::
+
+ echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
+ echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
+
+(b) Query resource limit::
+
+ cat /sys/fs/cgroup/rdma/2/rdma.max
+ #Output:
+ mlx4_0 hca_handle=2 hca_object=2000
+ ocrdma1 hca_handle=3 hca_object=max
+
+(c) Query current usage::
+
+ cat /sys/fs/cgroup/rdma/2/rdma.current
+ #Output:
+ mlx4_0 hca_handle=1 hca_object=20
+ ocrdma1 hca_handle=1 hca_object=23
+
+(d) Delete resource limit::
+
+ echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 476722b7b636..0e6c67ac585a 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1,3 +1,5 @@
+.. _cgroup-v2:
+
================
Control Group v2
================
@@ -9,10 +11,13 @@ This is the authoritative documentation on the design, interface and
conventions of cgroup v2. It describes all userland-visible aspects
of cgroup including core and specific controller behaviors. All
future changes must be reflected in this document. Documentation for
-v1 is available under Documentation/cgroup-v1/.
+v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgroup-v1>`.
.. CONTENTS
+ [Whenever any new section is added to this document, please also add
+ an entry here.]
+
1. Introduction
1-1. Terminology
1-2. What is cgroup?
@@ -23,9 +28,10 @@ v1 is available under Documentation/cgroup-v1/.
2-2-2. Threads
2-3. [Un]populated Notification
2-4. Controlling Controllers
- 2-4-1. Enabling and Disabling
- 2-4-2. Top-down Constraint
- 2-4-3. No Internal Process Constraint
+ 2-4-1. Availability
+ 2-4-2. Enabling and Disabling
+ 2-4-3. Top-down Constraint
+ 2-4-4. No Internal Process Constraint
2-5. Delegation
2-5-1. Model of Delegation
2-5-2. Delegation Containment
@@ -54,13 +60,23 @@ v1 is available under Documentation/cgroup-v1/.
5-3-3. IO Latency
5-3-3-1. How IO Latency Throttling Works
5-3-3-2. IO Latency Interface Files
+ 5-3-4. IO Priority
5-4. PID
5-4-1. PID Interface Files
- 5-5. Device
- 5-6. RDMA
- 5-6-1. RDMA Interface Files
- 5-7. Misc
- 5-7-1. perf_event
+ 5-5. Cpuset
+ 5.5-1. Cpuset Interface Files
+ 5-6. Device controller
+ 5-7. RDMA
+ 5-7-1. RDMA Interface Files
+ 5-8. DMEM
+ 5-8-1. DMEM Interface Files
+ 5-9. HugeTLB
+ 5.9-1. HugeTLB Interface Files
+ 5-10. Misc
+ 5.10-1 Misc Interface Files
+ 5.10-2 Migration and Ownership
+ 5-11. Others
+ 5-11-1. perf_event
5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@@ -168,13 +184,74 @@ disabling controllers in v1 and make them always available in v2.
cgroup v2 currently supports the following mount options.
nsdelegate
-
Consider cgroup namespaces as delegation boundaries. This
option is system wide and can only be set on mount or modified
through remount from the init namespace. The mount option is
ignored on non-init namespace mounts. Please refer to the
Delegation section for details.
+ favordynmods
+ Reduce the latencies of dynamic cgroup modifications such as
+ task migrations and controller on/offs at the cost of making
+ hot path operations such as forks and exits more expensive.
+ The static usage pattern of creating a cgroup, enabling
+ controllers, and then seeding it with CLONE_INTO_CGROUP is
+ not affected by this option.
+
+ memory_localevents
+ Only populate memory.events with data for the current cgroup,
+ and not any subtrees. This is legacy behaviour, the default
+ behaviour without this option is to include subtree counts.
+ This option is system wide and can only be set on mount or
+ modified through remount from the init namespace. The mount
+ option is ignored on non-init namespace mounts.
+
+ memory_recursiveprot
+ Recursively apply memory.min and memory.low protection to
+ entire subtrees, without requiring explicit downward
+ propagation into leaf cgroups. This allows protecting entire
+ subtrees from one another, while retaining free competition
+ within those subtrees. This should have been the default
+ behavior but is a mount-option to avoid regressing setups
+ relying on the original semantics (e.g. specifying bogusly
+ high 'bypass' protection values at higher tree levels).
+
+ memory_hugetlb_accounting
+ Count HugeTLB memory usage towards the cgroup's overall
+ memory usage for the memory controller (for the purpose of
+ statistics reporting and memory protetion). This is a new
+ behavior that could regress existing setups, so it must be
+ explicitly opted in with this mount option.
+
+ A few caveats to keep in mind:
+
+ * There is no HugeTLB pool management involved in the memory
+ controller. The pre-allocated pool does not belong to anyone.
+ Specifically, when a new HugeTLB folio is allocated to
+ the pool, it is not accounted for from the perspective of the
+ memory controller. It is only charged to a cgroup when it is
+ actually used (for e.g at page fault time). Host memory
+ overcommit management has to consider this when configuring
+ hard limits. In general, HugeTLB pool management should be
+ done via other mechanisms (such as the HugeTLB controller).
+ * Failure to charge a HugeTLB folio to the memory controller
+ results in SIGBUS. This could happen even if the HugeTLB pool
+ still has pages available (but the cgroup limit is hit and
+ reclaim attempt fails).
+ * Charging HugeTLB memory towards the memory controller affects
+ memory protection and reclaim dynamics. Any userspace tuning
+ (of low, min limits for e.g) needs to take this into account.
+ * HugeTLB pages utilized while this option is not selected
+ will not be tracked by the memory controller (even if cgroup
+ v2 is remounted later on).
+
+ pids_localevents
+ The option restores v1-like behavior of pids.events:max, that is only
+ local (inside cgroup proper) fork failures are counted. Without this
+ option pids.events.max represents any pids.max enforcemnt across
+ cgroup's subtree.
+
+
Organizing Processes and Threads
--------------------------------
@@ -329,6 +406,13 @@ constraint, a threaded controller must be able to handle competition
between threads in a non-leaf cgroup and its child cgroups. Each
threaded controller defines how such competitions are handled.
+Currently, the following controllers are threaded and can be enabled
+in a threaded cgroup::
+
+- cpu
+- cpuset
+- perf_event
+- pids
[Un]populated Notification
--------------------------
@@ -356,6 +440,15 @@ both cgroups.
Controlling Controllers
-----------------------
+Availability
+~~~~~~~~~~~~
+
+A controller is available in a cgroup when it is supported by the kernel (i.e.,
+compiled in, not disabled and not attached to a v1 hierarchy) and listed in the
+"cgroup.controllers" file. Availability means the controller's interface files
+are exposed in the cgroup’s directory, allowing the distribution of the target
+resource to be observed or controlled within that cgroup.
+
Enabling and Disabling
~~~~~~~~~~~~~~~~~~~~~~
@@ -455,10 +548,12 @@ cgroup namespace on namespace creation.
Because the resource control interface files in a given directory
control the distribution of the parent's resources, the delegatee
shouldn't be allowed to write to them. For the first method, this is
-achieved by not granting access to these files. For the second, the
-kernel rejects writes to all files other than "cgroup.procs" and
-"cgroup.subtree_control" on a namespace root from inside the
-namespace.
+achieved by not granting access to these files. For the second, files
+outside the namespace should be hidden from the delegatee by the means
+of at least mount namespacing, and the kernel rejects writes to all
+files on a namespace root from inside the cgroup namespace, except for
+those files listed in "/sys/kernel/cgroup/delegate" (including
+"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
The end results are equivalent for both delegation types. Once
delegated, the user can build sub-hierarchy under the directory,
@@ -584,10 +679,12 @@ process migrations.
and is an example of this type.
+.. _cgroupv2-limits-distributor:
+
Limits
------
-A child can only consume upto the configured amount of the resource.
+A child can only consume up to the configured amount of the resource.
Limits can be over-committed - the sum of the limits of children can
exceed the amount of resource available to the parent.
@@ -600,15 +697,16 @@ process migrations.
"io.max" limits the maximum BPS and/or IOPS that a cgroup can consume
on an IO device and is an example of this type.
+.. _cgroupv2-protections-distributor:
Protections
-----------
-A cgroup is protected to be allocated upto the configured amount of
-the resource if the usages of all its ancestors are under their
+A cgroup is protected up to the configured amount of the resource
+as long as the usages of all its ancestors are under their
protected levels. Protections can be hard guarantees or best effort
soft boundaries. Protections can also be over-committed in which case
-only upto the amount available to the parent is protected among
+only up to the amount available to the parent is protected among
children.
Protections are in the range [0, max] and defaults to 0, which is
@@ -690,9 +788,13 @@ Conventions
- Settings for a single feature should be contained in a single file.
- The root cgroup should be exempt from resource control and thus
- shouldn't have resource control interface files. Also,
- informational files on the root cgroup which end up showing global
- information available elsewhere shouldn't exist.
+ shouldn't have resource control interface files.
+
+- The default time unit is microseconds. If a different unit is ever
+ used, an explicit unit suffix must be present.
+
+- A parts-per quantity should use a percentage decimal with at least
+ two digit fractional part - e.g. 13.40.
- If a controller implements weight based resource distribution, its
interface file should be named "weight" and have the range [1,
@@ -758,7 +860,6 @@ Core Interface Files
All cgroup core files are prefixed with "cgroup."
cgroup.type
-
A read-write single value file which exists on non-root
cgroups.
@@ -862,6 +963,8 @@ All cgroup core files are prefixed with "cgroup."
populated
1 if the cgroup or its descendants contains any live
processes; otherwise, 0.
+ frozen
+ 1 if the cgroup is frozen; otherwise, 0.
cgroup.max.descendants
A read-write single value files. The default is "max".
@@ -895,10 +998,101 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
+ nr_subsys_<cgroup_subsys>
+ Total number of live cgroup subsystems (e.g memory
+ cgroup) at and beneath the current cgroup.
+
+ nr_dying_subsys_<cgroup_subsys>
+ Total number of dying cgroup subsystems (e.g. memory
+ cgroup) at and beneath the current cgroup.
+
+ cgroup.stat.local
+ A read-only flat-keyed file which exists in non-root cgroups.
+ The following entry is defined:
+
+ frozen_usec
+ Cumulative time that this cgroup has spent between freezing and
+ thawing, regardless of whether by self or ancestor groups.
+ NB: (not) reaching "frozen" state is not accounted here.
+
+ Using the following ASCII representation of a cgroup's freezer
+ state, ::
+
+ 1 _____
+ frozen 0 __/ \__
+ ab cd
+
+ the duration being measured is the span between a and c.
+
+ cgroup.freeze
+ A read-write single value file which exists on non-root cgroups.
+ Allowed values are "0" and "1". The default is "0".
+
+ Writing "1" to the file causes freezing of the cgroup and all
+ descendant cgroups. This means that all belonging processes will
+ be stopped and will not run until the cgroup will be explicitly
+ unfrozen. Freezing of the cgroup may take some time; when this action
+ is completed, the "frozen" value in the cgroup.events control file
+ will be updated to "1" and the corresponding notification will be
+ issued.
+
+ A cgroup can be frozen either by its own settings, or by settings
+ of any ancestor cgroups. If any of ancestor cgroups is frozen, the
+ cgroup will remain frozen.
+
+ Processes in the frozen cgroup can be killed by a fatal signal.
+ They also can enter and leave a frozen cgroup: either by an explicit
+ move by a user, or if freezing of the cgroup races with fork().
+ If a process is moved to a frozen cgroup, it stops. If a process is
+ moved out of a frozen cgroup, it becomes running.
+
+ Frozen status of a cgroup doesn't affect any cgroup tree operations:
+ it's possible to delete a frozen (and empty) cgroup, as well as
+ create new sub-cgroups.
+
+ cgroup.kill
+ A write-only single value file which exists in non-root cgroups.
+ The only allowed value is "1".
+
+ Writing "1" to the file causes the cgroup and all descendant cgroups to
+ be killed. This means that all processes located in the affected cgroup
+ tree will be killed via SIGKILL.
+
+ Killing a cgroup tree will deal with concurrent forks appropriately and
+ is protected against migrations.
+
+ In a threaded cgroup, writing this file fails with EOPNOTSUPP as
+ killing cgroups is a process directed operation, i.e. it affects
+ the whole thread-group.
+
+ cgroup.pressure
+ A read-write single value file that allowed values are "0" and "1".
+ The default is "1".
+
+ Writing "0" to the file will disable the cgroup PSI accounting.
+ Writing "1" to the file will re-enable the cgroup PSI accounting.
+
+ This control attribute is not hierarchical, so disable or enable PSI
+ accounting in a cgroup does not affect PSI accounting in descendants
+ and doesn't need pass enablement via ancestors from root.
+
+ The reason this control attribute exists is that PSI accounts stalls for
+ each cgroup separately and aggregates it at each level of the hierarchy.
+ This may cause non-negligible overhead for some workloads when under
+ deep level of the hierarchy, in which case this control attribute can
+ be used to disable PSI accounting in the non-leaf cgroups.
+
+ irq.pressure
+ A read-write nested-keyed file.
+
+ Shows pressure stall information for IRQ/SOFTIRQ. See
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Controllers
===========
+.. _cgroup-v2-cpu:
+
CPU
---
@@ -907,40 +1101,80 @@ controller implements weight and absolute bandwidth limit models for
normal scheduling policy and absolute bandwidth allocation model for
realtime scheduling policy.
-WARNING: cgroup2 doesn't yet support control of realtime processes and
-the cpu controller can only be enabled when all RT processes are in
-the root cgroup. Be aware that system management software may already
-have placed RT processes into nonroot cgroups during the system boot
-process, and these processes may need to be moved to the root cgroup
-before the cpu controller can be enabled.
+In all the above models, cycles distribution is defined only on a temporal
+base and it does not account for the frequency at which tasks are executed.
+The (optional) utilization clamping support allows to hint the schedutil
+cpufreq governor about the minimum desired frequency which should always be
+provided by a CPU, as well as the maximum desired frequency, which should not
+be exceeded by a CPU.
+
+WARNING: cgroup2 cpu controller doesn't yet support the (bandwidth) control of
+realtime processes. For a kernel built with the CONFIG_RT_GROUP_SCHED option
+enabled for group scheduling of realtime processes, the cpu controller can only
+be enabled when all RT processes are in the root cgroup. Be aware that system
+management software may already have placed RT processes into non-root cgroups
+during the system boot process, and these processes may need to be moved to the
+root cgroup before the cpu controller can be enabled with a
+CONFIG_RT_GROUP_SCHED enabled kernel.
+
+With CONFIG_RT_GROUP_SCHED disabled, this limitation does not apply and some of
+the interface files either affect realtime processes or account for them. See
+the following section for details. Only the cpu controller is affected by
+CONFIG_RT_GROUP_SCHED. Other controllers can be used for the resource control of
+realtime processes irrespective of CONFIG_RT_GROUP_SCHED.
CPU Interface Files
~~~~~~~~~~~~~~~~~~~
-All time durations are in microseconds.
+The interaction of a process with the cpu controller depends on its scheduling
+policy and the underlying scheduler. From the point of view of the cpu controller,
+processes can be categorized as follows:
+
+* Processes under the fair-class scheduler
+* Processes under a BPF scheduler with the ``cgroup_set_weight`` callback
+* Everything else: ``SCHED_{FIFO,RR,DEADLINE}`` and processes under a BPF scheduler
+ without the ``cgroup_set_weight`` callback
+
+For details on when a process is under the fair-class scheduler or a BPF scheduler,
+check out :ref:`Documentation/scheduler/sched-ext.rst <sched-ext>`.
+
+For each of the following interface files, the above categories
+will be referred to. All time durations are in microseconds.
cpu.stat
- A read-only flat-keyed file which exists on non-root cgroups.
+ A read-only flat-keyed file.
This file exists whether the controller is enabled or not.
- It always reports the following three stats:
+ It always reports the following three stats, which account for all the
+ processes in the cgroup:
- usage_usec
- user_usec
- system_usec
- and the following three when the controller is enabled:
+ and the following five when the controller is enabled, which account for
+ only the processes under the fair-class scheduler:
- nr_periods
- nr_throttled
- throttled_usec
+ - nr_bursts
+ - burst_usec
cpu.weight
A read-write single value file which exists on non-root
cgroups. The default is "100".
- The weight in the range [1, 10000].
+ For non idle groups (cpu.idle = 0), the weight is in the
+ range [1, 10000].
+
+ If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
+ then the weight will show as a 0.
+
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
cpu.weight.nice
A read-write single value file which exists on non-root
@@ -954,6 +1188,10 @@ All time durations are in microseconds.
granularity is coarser for the nice values, the read value is
the closest approximation of the current weight.
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
+
cpu.max
A read-write two value file which exists on non-root cgroups.
The default is "max 100000".
@@ -962,16 +1200,71 @@ All time durations are in microseconds.
$MAX $PERIOD
- which indicates that the group may consume upto $MAX in each
+ which indicates that the group may consume up to $MAX in each
$PERIOD duration. "max" for $MAX indicates no limit. If only
one number is written, $MAX is updated.
+ This file affects only processes under the fair-class scheduler.
+
+ cpu.max.burst
+ A read-write single value file which exists on non-root
+ cgroups. The default is "0".
+
+ The burst in the range [0, $MAX].
+
+ This file affects only processes under the fair-class scheduler.
+
cpu.pressure
- A read-only nested-key file which exists on non-root cgroups.
+ A read-write nested-keyed file.
Shows pressure stall information for CPU. See
- Documentation/accounting/psi.txt for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
+
+ This file accounts for all the processes in the cgroup.
+ cpu.uclamp.min
+ A read-write single value file which exists on non-root cgroups.
+ The default is "0", i.e. no utilization boosting.
+
+ The requested minimum utilization (protection) as a percentage
+ rational number, e.g. 12.34 for 12.34%.
+
+ This interface allows reading and setting minimum utilization clamp
+ values similar to the sched_setattr(2). This minimum utilization
+ value is used to clamp the task specific minimum utilization clamp,
+ including those of realtime processes.
+
+ The requested minimum utilization (protection) is always capped by
+ the current value for the maximum utilization (limit), i.e.
+ `cpu.uclamp.max`.
+
+ This file affects all the processes in the cgroup.
+
+ cpu.uclamp.max
+ A read-write single value file which exists on non-root cgroups.
+ The default is "max". i.e. no utilization capping
+
+ The requested maximum utilization (limit) as a percentage rational
+ number, e.g. 98.76 for 98.76%.
+
+ This interface allows reading and setting maximum utilization clamp
+ values similar to the sched_setattr(2). This maximum utilization
+ value is used to clamp the task specific maximum utilization clamp,
+ including those of realtime processes.
+
+ This file affects all the processes in the cgroup.
+
+ cpu.idle
+ A read-write single value file which exists on non-root cgroups.
+ The default is 0.
+
+ This is the cgroup analog of the per-task SCHED_IDLE sched policy.
+ Setting this value to a 1 will make the scheduling policy of the
+ cgroup SCHED_IDLE. The threads inside the cgroup will retain their
+ own relative priorities, but the cgroup itself will be treated as
+ very low priority relative to its peers.
+
+ This file affects only processes under the fair-class scheduler.
Memory
------
@@ -1018,9 +1311,12 @@ PAGE_SIZE multiple when read back.
is within its effective min boundary, the cgroup's memory
won't be reclaimed under any conditions. If there is no
unprotected reclaimable memory available, OOM killer
- is invoked.
+ is invoked. Above the effective min boundary (or
+ effective low boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
- Effective min boundary is limited by memory.min values of
+ Effective min boundary is limited by memory.min values of
all ancestor cgroups. If there is memory.min overcommitment
(child cgroup or cgroups are requiring more protected memory
than parent will allow), then each child cgroup will get
@@ -1039,8 +1335,12 @@ PAGE_SIZE multiple when read back.
Best-effort memory protection. If the memory usage of a
cgroup is within its effective low boundary, the cgroup's
- memory won't be reclaimed unless memory can be reclaimed
- from unprotected cgroups.
+ memory won't be reclaimed unless there is no reclaimable
+ memory available in unprotected cgroups.
+ Above the effective low boundary (or
+ effective min boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
Effective low boundary is limited by memory.low values of
all ancestor cgroups. If there is memory.low overcommitment
@@ -1056,27 +1356,101 @@ PAGE_SIZE multiple when read back.
A read-write single value file which exists on non-root
cgroups. The default is "max".
- Memory usage throttle limit. This is the main mechanism to
- control memory usage of a cgroup. If a cgroup's usage goes
+ Memory usage throttle limit. If a cgroup's usage goes
over the high boundary, the processes of the cgroup are
throttled and put under heavy reclaim pressure.
Going over the high limit never invokes the OOM killer and
- under extreme conditions the limit may be breached.
+ under extreme conditions the limit may be breached. The high
+ limit should be used in scenarios where an external process
+ monitors the limited cgroup to alleviate heavy reclaim
+ pressure.
+
+ If memory.high is opened with O_NONBLOCK then the synchronous
+ reclaim is bypassed. This is useful for admin processes that
+ need to dynamically adjust the job's memory limits without
+ expending their own CPU resources on memory reclamation. The
+ job will trigger the reclaim and/or get throttled on its
+ next charge request.
+
+ Please note that with O_NONBLOCK, there is a chance that the
+ target memory cgroup may take indefinite amount of time to
+ reduce usage below the limit due to delayed charge request or
+ busy-hitting its memory to slow down reclaim.
memory.max
A read-write single value file which exists on non-root
cgroups. The default is "max".
- Memory usage hard limit. This is the final protection
- mechanism. If a cgroup's memory usage reaches this limit and
- can't be reduced, the OOM killer is invoked in the cgroup.
- Under certain circumstances, the usage may go over the limit
- temporarily.
+ Memory usage hard limit. This is the main mechanism to limit
+ memory usage of a cgroup. If a cgroup's memory usage reaches
+ this limit and can't be reduced, the OOM killer is invoked in
+ the cgroup. Under certain circumstances, the usage may go
+ over the limit temporarily.
+
+ In default configuration regular 0-order allocations always
+ succeed unless OOM killer chooses current task as a victim.
+
+ Some kinds of allocations don't invoke the OOM killer.
+ Caller could retry them differently, return into userspace
+ as -ENOMEM or silently ignore in cases like disk readahead.
+
+ If memory.max is opened with O_NONBLOCK, then the synchronous
+ reclaim and oom-kill are bypassed. This is useful for admin
+ processes that need to dynamically adjust the job's memory limits
+ without expending their own CPU resources on memory reclamation.
+ The job will trigger the reclaim and/or oom-kill on its next
+ charge request.
+
+ Please note that with O_NONBLOCK, there is a chance that the
+ target memory cgroup may take indefinite amount of time to
+ reduce usage below the limit due to delayed charge request or
+ busy-hitting its memory to slow down reclaim.
- This is the ultimate protection mechanism. As long as the
- high limit is used and monitored properly, this limit's
- utility is limited to providing the final safety net.
+ memory.reclaim
+ A write-only nested-keyed file which exists for all cgroups.
+
+ This is a simple interface to trigger memory reclaim in the
+ target cgroup.
+
+ Example::
+
+ echo "1G" > memory.reclaim
+
+ Please note that the kernel can over or under reclaim from
+ the target cgroup. If less bytes are reclaimed than the
+ specified amount, -EAGAIN is returned.
+
+ Please note that the proactive reclaim (triggered by this
+ interface) is not meant to indicate memory pressure on the
+ memory cgroup. Therefore socket memory balancing triggered by
+ the memory reclaim normally is not exercised in this case.
+ This means that the networking layer will not adapt based on
+ reclaim induced by memory.reclaim.
+
+The following nested keys are defined.
+
+ ========== ================================
+ swappiness Swappiness value to reclaim with
+ ========== ================================
+
+ Specifying a swappiness value instructs the kernel to perform
+ the reclaim with that swappiness value. Note that this has the
+ same semantics as vm.swappiness applied to memcg reclaim with
+ all the existing limitations and potential future extensions.
+
+ The valid range for swappiness is [0-200, max], setting
+ swappiness=max exclusively reclaims anonymous memory.
+
+ memory.peak
+ A read-write single value file which exists on non-root cgroups.
+
+ The max memory usage recorded for the cgroup and its descendants since
+ either the creation of the cgroup or the most recent reset for that FD.
+
+ A write of any non-empty string to this file resets it to the
+ current memory usage for subsequent reads through the same
+ file descriptor.
memory.oom.group
A read-write single value file which exists on non-root
@@ -1102,6 +1476,11 @@ PAGE_SIZE multiple when read back.
otherwise, a value change in this file generates a file
modified event.
+ Note that all fields in this file are hierarchical and the
+ file modified event can be generated due to an event down the
+ hierarchy. For the local events at the cgroup level see
+ memory.events.local.
+
low
The number of times the cgroup is reclaimed due to
high memory pressure even though its usage is under
@@ -1125,22 +1504,22 @@ PAGE_SIZE multiple when read back.
The number of time the cgroup's memory usage was
reached the limit and allocation was about to fail.
- Depending on context result could be invocation of OOM
- killer and retrying allocation or failing allocation.
-
- Failed allocation in its turn could be returned into
- userspace as -ENOMEM or silently ignored in cases like
- disk readahead. For now OOM in memory cgroup kills
- tasks iff shortage has happened inside page fault.
-
This event is not raised if the OOM killer is not
considered as an option, e.g. for failed high-order
- allocations.
+ allocations or if caller asked to not retry attempts.
oom_kill
The number of processes belonging to this cgroup
killed by any kind of OOM killer.
+ oom_group_kill
+ The number of times a group OOM has occurred.
+
+ memory.events.local
+ Similar to memory.events but the fields in the file are local
+ to the cgroup i.e. not hierarchical. The file modified event
+ generated on this file reflects only the local events.
+
memory.stat
A read-only flat-keyed file which exists on non-root cgroups.
@@ -1154,30 +1533,62 @@ PAGE_SIZE multiple when read back.
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
+ If the entry has no per-node counter (or not show in the
+ memory.numa_stat). We use 'npn' (non-per-node) as the tag
+ to indicate that it will not show in the memory.numa_stat.
+
anon
Amount of memory used in anonymous mappings such as
- brk(), sbrk(), and mmap(MAP_ANONYMOUS)
+ brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that
+ some kernel configurations might account complete larger
+ allocations (e.g., THP) if only some, but not all the
+ memory of such an allocation is mapped anymore.
file
Amount of memory used to cache filesystem data,
including tmpfs and shared memory.
+ kernel (npn)
+ Amount of total kernel memory, including
+ (kernel_stack, pagetables, percpu, vmalloc, slab) in
+ addition to other kernel memory use cases.
+
kernel_stack
Amount of memory allocated to kernel stacks.
- slab
- Amount of memory used for storing in-kernel data
- structures.
+ pagetables
+ Amount of memory allocated for page tables.
+
+ sec_pagetables
+ Amount of memory allocated for secondary page tables,
+ this currently includes KVM mmu allocations on x86
+ and arm64 and IOMMU page tables.
- sock
+ percpu (npn)
+ Amount of memory used for storing per-cpu kernel
+ data structures.
+
+ sock (npn)
Amount of memory used in network transmission buffers
+ vmalloc (npn)
+ Amount of memory used for vmap backed memory.
+
shmem
Amount of cached filesystem data that is swap-backed,
such as tmpfs, shm segments, shared anonymous mmap()s
+ zswap
+ Amount of memory consumed by the zswap compression backend.
+
+ zswapped
+ Amount of application memory swapped out to zswap.
+
file_mapped
- Amount of cached filesystem data mapped with mmap()
+ Amount of cached filesystem data mapped with mmap(). Note
+ that some kernel configurations might account complete
+ larger allocations (e.g., THP) if only some, but not
+ not all the memory of such an allocation is mapped.
file_dirty
Amount of cached filesystem data that was modified but
@@ -1187,10 +1598,31 @@ PAGE_SIZE multiple when read back.
Amount of cached filesystem data that was modified and
is currently being written back to disk
+ swapcached
+ Amount of swap cached in memory. The swapcache is accounted
+ against both memory and swap usage.
+
+ anon_thp
+ Amount of memory used in anonymous mappings backed by
+ transparent hugepages
+
+ file_thp
+ Amount of cached filesystem data backed by transparent
+ hugepages
+
+ shmem_thp
+ Amount of shm, tmpfs, shared anonymous mmap()s backed by
+ transparent hugepages
+
inactive_anon, active_anon, inactive_file, active_file, unevictable
Amount of memory, swap-backed and filesystem-backed,
on the internal memory management lists used by the
- page reclaim algorithm
+ page reclaim algorithm.
+
+ As these represent internal list state (eg. shmem pages are on anon
+ memory management lists), inactive_foo + active_foo may not be equal to
+ the value for the foo counter, since the foo counter is type-based, not
+ list-based.
slab_reclaimable
Part of "slab" that might be reclaimed, such as
@@ -1200,52 +1632,180 @@ PAGE_SIZE multiple when read back.
Part of "slab" that cannot be reclaimed on memory
pressure.
- pgfault
- Total number of page faults incurred
+ slab (npn)
+ Amount of memory used for storing in-kernel data
+ structures.
- pgmajfault
- Number of major page faults incurred
+ workingset_refault_anon
+ Number of refaults of previously evicted anonymous pages.
- workingset_refault
+ workingset_refault_file
+ Number of refaults of previously evicted file pages.
- Number of refaults of previously evicted pages
+ workingset_activate_anon
+ Number of refaulted anonymous pages that were immediately
+ activated.
- workingset_activate
+ workingset_activate_file
+ Number of refaulted file pages that were immediately activated.
- Number of refaulted pages that were immediately activated
+ workingset_restore_anon
+ Number of restored anonymous pages which have been detected as
+ an active workingset before they got reclaimed.
- workingset_nodereclaim
+ workingset_restore_file
+ Number of restored file pages which have been detected as an
+ active workingset before they got reclaimed.
+ workingset_nodereclaim
Number of times a shadow node has been reclaimed
- pgrefill
-
- Amount of scanned pages (in an active LRU list)
+ pswpin (npn)
+ Number of pages swapped into memory
- pgscan
+ pswpout (npn)
+ Number of pages swapped out of memory
+ pgscan (npn)
Amount of scanned pages (in an inactive LRU list)
- pgsteal
-
+ pgsteal (npn)
Amount of reclaimed pages
- pgactivate
+ pgscan_kswapd (npn)
+ Amount of scanned pages by kswapd (in an inactive LRU list)
- Amount of pages moved to the active LRU list
+ pgscan_direct (npn)
+ Amount of scanned pages directly (in an inactive LRU list)
- pgdeactivate
+ pgscan_khugepaged (npn)
+ Amount of scanned pages by khugepaged (in an inactive LRU list)
- Amount of pages moved to the inactive LRU lis
+ pgscan_proactive (npn)
+ Amount of scanned pages proactively (in an inactive LRU list)
- pglazyfree
+ pgsteal_kswapd (npn)
+ Amount of reclaimed pages by kswapd
- Amount of pages postponed to be freed under memory pressure
+ pgsteal_direct (npn)
+ Amount of reclaimed pages directly
+
+ pgsteal_khugepaged (npn)
+ Amount of reclaimed pages by khugepaged
+
+ pgsteal_proactive (npn)
+ Amount of reclaimed pages proactively
- pglazyfreed
+ pgfault (npn)
+ Total number of page faults incurred
+
+ pgmajfault (npn)
+ Number of major page faults incurred
+ pgrefill (npn)
+ Amount of scanned pages (in an active LRU list)
+
+ pgactivate (npn)
+ Amount of pages moved to the active LRU list
+
+ pgdeactivate (npn)
+ Amount of pages moved to the inactive LRU list
+
+ pglazyfree (npn)
+ Amount of pages postponed to be freed under memory pressure
+
+ pglazyfreed (npn)
Amount of reclaimed lazyfree pages
+ swpin_zero
+ Number of pages swapped into memory and filled with zero, where I/O
+ was optimized out because the page content was detected to be zero
+ during swapout.
+
+ swpout_zero
+ Number of zero-filled pages swapped out with I/O skipped due to the
+ content being detected as zero.
+
+ zswpin
+ Number of pages moved in to memory from zswap.
+
+ zswpout
+ Number of pages moved out of memory to zswap.
+
+ zswpwb
+ Number of pages written from zswap to swap.
+
+ thp_fault_alloc (npn)
+ Number of transparent hugepages which were allocated to satisfy
+ a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
+ is not set.
+
+ thp_collapse_alloc (npn)
+ Number of transparent hugepages which were allocated to allow
+ collapsing an existing range of pages. This counter is not
+ present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
+
+ thp_swpout (npn)
+ Number of transparent hugepages which are swapout in one piece
+ without splitting.
+
+ thp_swpout_fallback (npn)
+ Number of transparent hugepages which were split before swapout.
+ Usually because failed to allocate some continuous swap space
+ for the huge page.
+
+ numa_pages_migrated (npn)
+ Number of pages migrated by NUMA balancing.
+
+ numa_pte_updates (npn)
+ Number of pages whose page table entries are modified by
+ NUMA balancing to produce NUMA hinting faults on access.
+
+ numa_hint_faults (npn)
+ Number of NUMA hinting faults.
+
+ pgdemote_kswapd
+ Number of pages demoted by kswapd.
+
+ pgdemote_direct
+ Number of pages demoted directly.
+
+ pgdemote_khugepaged
+ Number of pages demoted by khugepaged.
+
+ pgdemote_proactive
+ Number of pages demoted by proactively.
+
+ hugetlb
+ Amount of memory used by hugetlb pages. This metric only shows
+ up if hugetlb usage is accounted for in memory.current (i.e.
+ cgroup is mounted with the memory_hugetlb_accounting option).
+
+ memory.numa_stat
+ A read-only nested-keyed file which exists on non-root cgroups.
+
+ This breaks down the cgroup's memory footprint into different
+ types of memory, type-specific details, and other information
+ per node on the state of the memory management system.
+
+ This is useful for providing visibility into the NUMA locality
+ information within an memcg since the pages are allowed to be
+ allocated from any physical node. One of the use case is evaluating
+ application performance by combining this information with the
+ application's CPU allocation.
+
+ All memory amounts are in bytes.
+
+ The output format of memory.numa_stat is::
+
+ type N0=<bytes in node 0> N1=<bytes in node 1> ...
+
+ The entries are ordered to be human readable, and new entries
+ can show up in the middle. Don't rely on items remaining in a
+ fixed position; use the keys to look up specific values!
+
+ The entries can refer to the memory.stat.
+
memory.swap.current
A read-only single value file which exists on non-root
cgroups.
@@ -1253,6 +1813,32 @@ PAGE_SIZE multiple when read back.
The total amount of swap currently being used by the cgroup
and its descendants.
+ memory.swap.high
+ A read-write single value file which exists on non-root
+ cgroups. The default is "max".
+
+ Swap usage throttle limit. If a cgroup's swap usage exceeds
+ this limit, all its further allocations will be throttled to
+ allow userspace to implement custom out-of-memory procedures.
+
+ This limit marks a point of no return for the cgroup. It is NOT
+ designed to manage the amount of swapping a workload does
+ during regular operation. Compare to memory.swap.max, which
+ prohibits swapping past a set amount, but lets the cgroup
+ continue unimpeded as long as other memory can be reclaimed.
+
+ Healthy workloads are not expected to reach this limit.
+
+ memory.swap.peak
+ A read-write single value file which exists on non-root cgroups.
+
+ The max swap usage recorded for the cgroup and its descendants since
+ the creation of the cgroup or the most recent reset for that FD.
+
+ A write of any non-empty string to this file resets it to the
+ current memory usage for subsequent reads through the same
+ file descriptor.
+
memory.swap.max
A read-write single value file which exists on non-root
cgroups. The default is "max".
@@ -1266,6 +1852,10 @@ PAGE_SIZE multiple when read back.
otherwise, a value change in this file generates a file
modified event.
+ high
+ The number of times the cgroup's swap usage was over
+ the high threshold.
+
max
The number of times the cgroup's swap usage was about
to go over the max boundary and swap allocation
@@ -1281,11 +1871,44 @@ PAGE_SIZE multiple when read back.
higher than the limit for an extended period of time. This
reduces the impact on the workload and memory management.
+ memory.zswap.current
+ A read-only single value file which exists on non-root
+ cgroups.
+
+ The total amount of memory consumed by the zswap compression
+ backend.
+
+ memory.zswap.max
+ A read-write single value file which exists on non-root
+ cgroups. The default is "max".
+
+ Zswap usage hard limit. If a cgroup's zswap pool reaches this
+ limit, it will refuse to take any more stores before existing
+ entries fault back in or are written out to disk.
+
+ memory.zswap.writeback
+ A read-write single value file. The default value is "1".
+ Note that this setting is hierarchical, i.e. the writeback would be
+ implicitly disabled for child cgroups if the upper hierarchy
+ does so.
+
+ When this is set to 0, all swapping attempts to swapping devices
+ are disabled. This included both zswap writebacks, and swapping due
+ to zswap store failures. If the zswap store failures are recurring
+ (for e.g if the pages are incompressible), users can observe
+ reclaim inefficiency after disabling writeback (because the same
+ pages might be rejected again and again).
+
+ Note that this is subtly different from setting memory.swap.max to
+ 0, as it still allows for pages to be written to the zswap pool.
+ This setting has no effect if zswap is disabled, and swapping
+ is allowed unless memory.swap.max is set to 0.
+
memory.pressure
- A read-only nested-key file which exists on non-root cgroups.
+ A read-only nested-keyed file.
Shows pressure stall information for memory. See
- Documentation/accounting/psi.txt for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Usage Guidelines
@@ -1345,8 +1968,7 @@ IO Interface Files
~~~~~~~~~~~~~~~~~~
io.stat
- A read-only nested-keyed file which exists on non-root
- cgroups.
+ A read-only nested-keyed file.
Lines are keyed by $MAJ:$MIN device numbers and not ordered.
The following nested keys are defined.
@@ -1360,11 +1982,108 @@ IO Interface Files
dios Number of discard IOs
====== =====================
- An example read output follows:
+ An example read output follows::
8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
+ io.cost.qos
+ A read-write nested-keyed file which exists only on the root
+ cgroup.
+
+ This file configures the Quality of Service of the IO cost
+ model based controller (CONFIG_BLK_CGROUP_IOCOST) which
+ currently implements "io.weight" proportional control. Lines
+ are keyed by $MAJ:$MIN device numbers and not ordered. The
+ line for a given device is populated on the first write for
+ the device on "io.cost.qos" or "io.cost.model". The following
+ nested keys are defined.
+
+ ====== =====================================
+ enable Weight-based control enable
+ ctrl "auto" or "user"
+ rpct Read latency percentile [0, 100]
+ rlat Read latency threshold
+ wpct Write latency percentile [0, 100]
+ wlat Write latency threshold
+ min Minimum scaling percentage [1, 10000]
+ max Maximum scaling percentage [1, 10000]
+ ====== =====================================
+
+ The controller is disabled by default and can be enabled by
+ setting "enable" to 1. "rpct" and "wpct" parameters default
+ to zero and the controller uses internal device saturation
+ state to adjust the overall IO rate between "min" and "max".
+
+ When a better control quality is needed, latency QoS
+ parameters can be configured. For example::
+
+ 8:16 enable=1 ctrl=auto rpct=95.00 rlat=75000 wpct=95.00 wlat=150000 min=50.00 max=150.0
+
+ shows that on sdb, the controller is enabled, will consider
+ the device saturated if the 95th percentile of read completion
+ latencies is above 75ms or write 150ms, and adjust the overall
+ IO issue rate between 50% and 150% accordingly.
+
+ The lower the saturation point, the better the latency QoS at
+ the cost of aggregate bandwidth. The narrower the allowed
+ adjustment range between "min" and "max", the more conformant
+ to the cost model the IO behavior. Note that the IO issue
+ base rate may be far off from 100% and setting "min" and "max"
+ blindly can lead to a significant loss of device capacity or
+ control quality. "min" and "max" are useful for regulating
+ devices which show wide temporary behavior changes - e.g. a
+ ssd which accepts writes at the line speed for a while and
+ then completely stalls for multiple seconds.
+
+ When "ctrl" is "auto", the parameters are controlled by the
+ kernel and may change automatically. Setting "ctrl" to "user"
+ or setting any of the percentile and latency parameters puts
+ it into "user" mode and disables the automatic changes. The
+ automatic mode can be restored by setting "ctrl" to "auto".
+
+ io.cost.model
+ A read-write nested-keyed file which exists only on the root
+ cgroup.
+
+ This file configures the cost model of the IO cost model based
+ controller (CONFIG_BLK_CGROUP_IOCOST) which currently
+ implements "io.weight" proportional control. Lines are keyed
+ by $MAJ:$MIN device numbers and not ordered. The line for a
+ given device is populated on the first write for the device on
+ "io.cost.qos" or "io.cost.model". The following nested keys
+ are defined.
+
+ ===== ================================
+ ctrl "auto" or "user"
+ model The cost model in use - "linear"
+ ===== ================================
+
+ When "ctrl" is "auto", the kernel may change all parameters
+ dynamically. When "ctrl" is set to "user" or any other
+ parameters are written to, "ctrl" become "user" and the
+ automatic changes are disabled.
+
+ When "model" is "linear", the following model parameters are
+ defined.
+
+ ============= ========================================
+ [r|w]bps The maximum sequential IO throughput
+ [r|w]seqiops The maximum 4k sequential IOs per second
+ [r|w]randiops The maximum 4k random IOs per second
+ ============= ========================================
+
+ From the above, the builtin linear model determines the base
+ costs of a sequential and random IO and the cost coefficient
+ for the IO size. While simple, this model can cover most
+ common device classes acceptably.
+
+ The IO cost model isn't expected to be accurate in absolute
+ sense and is scaled to the device behavior dynamically.
+
+ If needed, tools/cgroup/iocost_coef_gen.py can be used to
+ generate device-specific coefficients.
+
io.weight
A read-write flat-keyed file which exists on non-root cgroups.
The default is "default 100".
@@ -1425,10 +2144,10 @@ IO Interface Files
8:16 rbps=2097152 wbps=max riops=max wiops=max
io.pressure
- A read-only nested-key file which exists on non-root cgroups.
+ A read-only nested-keyed file.
Shows pressure stall information for IO. See
- Documentation/accounting/psi.txt for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Writeback
@@ -1449,9 +2168,9 @@ per-cgroup dirty memory states are examined and the more restrictive
of the two is enforced.
cgroup writeback requires explicit support from the underlying
-filesystem. Currently, cgroup writeback is implemented on ext2, ext4
-and btrfs. On other filesystems, all writeback IOs are attributed to
-the root cgroup.
+filesystem. Currently, cgroup writeback is implemented on ext2, ext4,
+btrfs, f2fs, and xfs. On other filesystems, all writeback IOs are
+attributed to the root cgroup.
There are inherent differences in memory and writeback management
which affects how cgroup ownership is tracked. Memory is tracked per
@@ -1501,7 +2220,7 @@ protected workload.
The limits are only applied at the peer level in the hierarchy. This means that
in the diagram below, only groups A, B, and C will influence each other, and
-groups D and F will influence each other. Group G will influence nobody.
+groups D and F will influence each other. Group G will influence nobody::
[root]
/ | \
@@ -1550,7 +2269,7 @@ IO Latency Interface Files
io.latency
This takes a similar format as the other controllers.
- "MAJOR:MINOR target=<target time in microseconds"
+ "MAJOR:MINOR target=<target time in microseconds>"
io.stat
If the controller is enabled you will see extra stats in io.stat in
@@ -1570,6 +2289,68 @@ IO Latency Interface Files
duration of time between evaluation events. Windows only elapse
with IO activity. Idle periods extend the most recent window.
+IO Priority
+~~~~~~~~~~~
+
+A single attribute controls the behavior of the I/O priority cgroup policy,
+namely the io.prio.class attribute. The following values are accepted for
+that attribute:
+
+ no-change
+ Do not modify the I/O priority class.
+
+ promote-to-rt
+ For requests that have a non-RT I/O priority class, change it into RT.
+ Also change the priority level of these requests to 4. Do not modify
+ the I/O priority of requests that have priority class RT.
+
+ restrict-to-be
+ For requests that do not have an I/O priority class or that have I/O
+ priority class RT, change it into BE. Also change the priority level
+ of these requests to 0. Do not modify the I/O priority class of
+ requests that have priority class IDLE.
+
+ idle
+ Change the I/O priority class of all requests into IDLE, the lowest
+ I/O priority class.
+
+ none-to-rt
+ Deprecated. Just an alias for promote-to-rt.
+
+The following numerical values are associated with the I/O priority policies:
+
++----------------+---+
+| no-change | 0 |
++----------------+---+
+| promote-to-rt | 1 |
++----------------+---+
+| restrict-to-be | 2 |
++----------------+---+
+| idle | 3 |
++----------------+---+
+
+The numerical value that corresponds to each I/O priority class is as follows:
+
++-------------------------------+---+
+| IOPRIO_CLASS_NONE | 0 |
++-------------------------------+---+
+| IOPRIO_CLASS_RT (real-time) | 1 |
++-------------------------------+---+
+| IOPRIO_CLASS_BE (best effort) | 2 |
++-------------------------------+---+
+| IOPRIO_CLASS_IDLE | 3 |
++-------------------------------+---+
+
+The algorithm to set the I/O priority class for a request is as follows:
+
+- If I/O priority class policy is promote-to-rt, change the request I/O
+ priority class to IOPRIO_CLASS_RT and change the request I/O priority
+ level to 4.
+- If I/O priority class policy is not promote-to-rt, translate the I/O priority
+ class policy into a number, then change the request I/O priority class
+ into the maximum of the I/O priority class policy number and the numerical
+ I/O priority class.
+
PID
---
@@ -1596,11 +2377,31 @@ PID Interface Files
Hard limit of number of processes.
pids.current
- A read-only single value file which exists on all cgroups.
+ A read-only single value file which exists on non-root cgroups.
The number of processes currently in the cgroup and its
descendants.
+ pids.peak
+ A read-only single value file which exists on non-root cgroups.
+
+ The maximum value that the number of processes in the cgroup and its
+ descendants has ever reached.
+
+ pids.events
+ A read-only flat-keyed file which exists on non-root cgroups. Unless
+ specified otherwise, a value change in this file generates a file
+ modified event. The following entries are defined.
+
+ max
+ The number of times the cgroup's total number of processes hit the pids.max
+ limit (see also pids_localevents).
+
+ pids.events.local
+ Similar to pids.events but the fields in the file are local
+ to the cgroup i.e. not hierarchical. The file modified event
+ generated on this file reflects only the local events.
+
Organisational operations are not blocked by cgroup policies, so it is
possible to have pids.current > pids.max. This can be done by either
setting the limit to be smaller than pids.current, or attaching enough
@@ -1610,6 +2411,279 @@ through fork() or clone(). These will return -EAGAIN if the creation
of a new process would cause a cgroup policy to be violated.
+Cpuset
+------
+
+The "cpuset" controller provides a mechanism for constraining
+the CPU and memory node placement of tasks to only the resources
+specified in the cpuset interface files in a task's current cgroup.
+This is especially valuable on large NUMA systems where placing jobs
+on properly sized subsets of the systems with careful processor and
+memory placement to reduce cross-node memory access and contention
+can improve overall system performance.
+
+The "cpuset" controller is hierarchical. That means the controller
+cannot use CPUs or memory nodes not allowed in its parent.
+
+
+Cpuset Interface Files
+~~~~~~~~~~~~~~~~~~~~~~
+
+ cpuset.cpus
+ A read-write multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the requested CPUs to be used by tasks within this
+ cgroup. The actual list of CPUs to be granted, however, is
+ subjected to constraints imposed by its parent and can differ
+ from the requested CPUs.
+
+ The CPU numbers are comma-separated numbers or ranges.
+ For example::
+
+ # cat cpuset.cpus
+ 0-4,6,8-10
+
+ An empty value indicates that the cgroup is using the same
+ setting as the nearest cgroup ancestor with a non-empty
+ "cpuset.cpus" or all the available CPUs if none is found.
+
+ The value of "cpuset.cpus" stays constant until the next update
+ and won't be affected by any CPU hotplug events.
+
+ cpuset.cpus.effective
+ A read-only multiple values file which exists on all
+ cpuset-enabled cgroups.
+
+ It lists the onlined CPUs that are actually granted to this
+ cgroup by its parent. These CPUs are allowed to be used by
+ tasks within the current cgroup.
+
+ If "cpuset.cpus" is empty, the "cpuset.cpus.effective" file shows
+ all the CPUs from the parent cgroup that can be available to
+ be used by this cgroup. Otherwise, it should be a subset of
+ "cpuset.cpus" unless none of the CPUs listed in "cpuset.cpus"
+ can be granted. In this case, it will be treated just like an
+ empty "cpuset.cpus".
+
+ Its value will be affected by CPU hotplug events.
+
+ cpuset.mems
+ A read-write multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists the requested memory nodes to be used by tasks within
+ this cgroup. The actual list of memory nodes granted, however,
+ is subjected to constraints imposed by its parent and can differ
+ from the requested memory nodes.
+
+ The memory node numbers are comma-separated numbers or ranges.
+ For example::
+
+ # cat cpuset.mems
+ 0-1,3
+
+ An empty value indicates that the cgroup is using the same
+ setting as the nearest cgroup ancestor with a non-empty
+ "cpuset.mems" or all the available memory nodes if none
+ is found.
+
+ The value of "cpuset.mems" stays constant until the next update
+ and won't be affected by any memory nodes hotplug events.
+
+ Setting a non-empty value to "cpuset.mems" causes memory of
+ tasks within the cgroup to be migrated to the designated nodes if
+ they are currently using memory outside of the designated nodes.
+
+ There is a cost for this memory migration. The migration
+ may not be complete and some memory pages may be left behind.
+ So it is recommended that "cpuset.mems" should be set properly
+ before spawning new tasks into the cpuset. Even if there is
+ a need to change "cpuset.mems" with active tasks, it shouldn't
+ be done frequently.
+
+ cpuset.mems.effective
+ A read-only multiple values file which exists on all
+ cpuset-enabled cgroups.
+
+ It lists the onlined memory nodes that are actually granted to
+ this cgroup by its parent. These memory nodes are allowed to
+ be used by tasks within the current cgroup.
+
+ If "cpuset.mems" is empty, it shows all the memory nodes from the
+ parent cgroup that will be available to be used by this cgroup.
+ Otherwise, it should be a subset of "cpuset.mems" unless none of
+ the memory nodes listed in "cpuset.mems" can be granted. In this
+ case, it will be treated just like an empty "cpuset.mems".
+
+ Its value will be affected by memory nodes hotplug events.
+
+ cpuset.cpus.exclusive
+ A read-write multiple values file which exists on non-root
+ cpuset-enabled cgroups.
+
+ It lists all the exclusive CPUs that are allowed to be used
+ to create a new cpuset partition. Its value is not used
+ unless the cgroup becomes a valid partition root. See the
+ "cpuset.cpus.partition" section below for a description of what
+ a cpuset partition is.
+
+ When the cgroup becomes a partition root, the actual exclusive
+ CPUs that are allocated to that partition are listed in
+ "cpuset.cpus.exclusive.effective" which may be different
+ from "cpuset.cpus.exclusive". If "cpuset.cpus.exclusive"
+ has previously been set, "cpuset.cpus.exclusive.effective"
+ is always a subset of it.
+
+ Users can manually set it to a value that is different from
+ "cpuset.cpus". One constraint in setting it is that the list of
+ CPUs must be exclusive with respect to "cpuset.cpus.exclusive"
+ of its sibling. If "cpuset.cpus.exclusive" of a sibling cgroup
+ isn't set, its "cpuset.cpus" value, if set, cannot be a subset
+ of it to leave at least one CPU available when the exclusive
+ CPUs are taken away.
+
+ For a parent cgroup, any one of its exclusive CPUs can only
+ be distributed to at most one of its child cgroups. Having an
+ exclusive CPU appearing in two or more of its child cgroups is
+ not allowed (the exclusivity rule). A value that violates the
+ exclusivity rule will be rejected with a write error.
+
+ The root cgroup is a partition root and all its available CPUs
+ are in its exclusive CPU set.
+
+ cpuset.cpus.exclusive.effective
+ A read-only multiple values file which exists on all non-root
+ cpuset-enabled cgroups.
+
+ This file shows the effective set of exclusive CPUs that
+ can be used to create a partition root. The content
+ of this file will always be a subset of its parent's
+ "cpuset.cpus.exclusive.effective" if its parent is not the root
+ cgroup. It will also be a subset of "cpuset.cpus.exclusive"
+ if it is set. If "cpuset.cpus.exclusive" is not set, it is
+ treated to have an implicit value of "cpuset.cpus" in the
+ formation of local partition.
+
+ cpuset.cpus.isolated
+ A read-only and root cgroup only multiple values file.
+
+ This file shows the set of all isolated CPUs used in existing
+ isolated partitions. It will be empty if no isolated partition
+ is created.
+
+ cpuset.cpus.partition
+ A read-write single value file which exists on non-root
+ cpuset-enabled cgroups. This flag is owned by the parent cgroup
+ and is not delegatable.
+
+ It accepts only the following input values when written to.
+
+ ========== =====================================
+ "member" Non-root member of a partition
+ "root" Partition root
+ "isolated" Partition root without load balancing
+ ========== =====================================
+
+ A cpuset partition is a collection of cpuset-enabled cgroups with
+ a partition root at the top of the hierarchy and its descendants
+ except those that are separate partition roots themselves and
+ their descendants. A partition has exclusive access to the
+ set of exclusive CPUs allocated to it. Other cgroups outside
+ of that partition cannot use any CPUs in that set.
+
+ There are two types of partitions - local and remote. A local
+ partition is one whose parent cgroup is also a valid partition
+ root. A remote partition is one whose parent cgroup is not a
+ valid partition root itself. Writing to "cpuset.cpus.exclusive"
+ is optional for the creation of a local partition as its
+ "cpuset.cpus.exclusive" file will assume an implicit value that
+ is the same as "cpuset.cpus" if it is not set. Writing the
+ proper "cpuset.cpus.exclusive" values down the cgroup hierarchy
+ before the target partition root is mandatory for the creation
+ of a remote partition.
+
+ Currently, a remote partition cannot be created under a local
+ partition. All the ancestors of a remote partition root except
+ the root cgroup cannot be a partition root.
+
+ The root cgroup is always a partition root and its state cannot
+ be changed. All other non-root cgroups start out as "member".
+
+ When set to "root", the current cgroup is the root of a new
+ partition or scheduling domain. The set of exclusive CPUs is
+ determined by the value of its "cpuset.cpus.exclusive.effective".
+
+ When set to "isolated", the CPUs in that partition will be in
+ an isolated state without any load balancing from the scheduler
+ and excluded from the unbound workqueues. Tasks placed in such
+ a partition with multiple CPUs should be carefully distributed
+ and bound to each of the individual CPUs for optimal performance.
+
+ A partition root ("root" or "isolated") can be in one of the
+ two possible states - valid or invalid. An invalid partition
+ root is in a degraded state where some state information may
+ be retained, but behaves more like a "member".
+
+ All possible state transitions among "member", "root" and
+ "isolated" are allowed.
+
+ On read, the "cpuset.cpus.partition" file can show the following
+ values.
+
+ ============================= =====================================
+ "member" Non-root member of a partition
+ "root" Partition root
+ "isolated" Partition root without load balancing
+ "root invalid (<reason>)" Invalid partition root
+ "isolated invalid (<reason>)" Invalid isolated partition root
+ ============================= =====================================
+
+ In the case of an invalid partition root, a descriptive string on
+ why the partition is invalid is included within parentheses.
+
+ For a local partition root to be valid, the following conditions
+ must be met.
+
+ 1) The parent cgroup is a valid partition root.
+ 2) The "cpuset.cpus.exclusive.effective" file cannot be empty,
+ though it may contain offline CPUs.
+ 3) The "cpuset.cpus.effective" cannot be empty unless there is
+ no task associated with this partition.
+
+ For a remote partition root to be valid, all the above conditions
+ except the first one must be met.
+
+ External events like hotplug or changes to "cpuset.cpus" or
+ "cpuset.cpus.exclusive" can cause a valid partition root to
+ become invalid and vice versa. Note that a task cannot be
+ moved to a cgroup with empty "cpuset.cpus.effective".
+
+ A valid non-root parent partition may distribute out all its CPUs
+ to its child local partitions when there is no task associated
+ with it.
+
+ Care must be taken to change a valid partition root to "member"
+ as all its child local partitions, if present, will become
+ invalid causing disruption to tasks running in those child
+ partitions. These inactivated partitions could be recovered if
+ their parent is switched back to a partition root with a proper
+ value in "cpuset.cpus" or "cpuset.cpus.exclusive".
+
+ Poll and inotify events are triggered whenever the state of
+ "cpuset.cpus.partition" changes. That includes changes caused
+ by write to "cpuset.cpus.partition", cpu hotplug or other
+ changes that modify the validity status of the partition.
+ This will allow user space agents to monitor unexpected changes
+ to "cpuset.cpus.partition" without the need to do continuous
+ polling.
+
+ A user can pre-configure certain CPUs to an isolated state
+ with load balancing disabled at boot time with the "isolcpus"
+ kernel boot command line option. If those CPUs are to be put
+ into a partition, they have to be used in an isolated partition.
+
+
Device controller
-----------------
@@ -1619,26 +2693,26 @@ existing device files.
Cgroup v2 device controller has no interface files and is implemented
on top of cgroup BPF. To control access to device files, a user may
-create bpf programs of the BPF_CGROUP_DEVICE type and attach them
-to cgroups. On an attempt to access a device file, corresponding
-BPF programs will be executed, and depending on the return value
-the attempt will succeed or fail with -EPERM.
+create bpf programs of type BPF_PROG_TYPE_CGROUP_DEVICE and attach
+them to cgroups with BPF_CGROUP_DEVICE flag. On an attempt to access a
+device file, corresponding BPF programs will be executed, and depending
+on the return value the attempt will succeed or fail with -EPERM.
-A BPF_CGROUP_DEVICE program takes a pointer to the bpf_cgroup_dev_ctx
-structure, which describes the device access attempt: access type
-(mknod/read/write) and device (type, major and minor numbers).
-If the program returns 0, the attempt fails with -EPERM, otherwise
-it succeeds.
+A BPF_PROG_TYPE_CGROUP_DEVICE program takes a pointer to the
+bpf_cgroup_dev_ctx structure, which describes the device access attempt:
+access type (mknod/read/write) and device (type, major and minor numbers).
+If the program returns 0, the attempt fails with -EPERM, otherwise it
+succeeds.
-An example of BPF_CGROUP_DEVICE program may be found in the kernel
-source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
+An example of BPF_PROG_TYPE_CGROUP_DEVICE program may be found in
+tools/testing/selftests/bpf/progs/dev_cgroup.c in the kernel source tree.
RDMA
----
The "rdma" controller regulates the distribution and accounting of
-of RDMA resources.
+RDMA resources.
RDMA Interface Files
~~~~~~~~~~~~~~~~~~~~
@@ -1673,10 +2747,175 @@ RDMA Interface Files
mlx4_0 hca_handle=1 hca_object=20
ocrdma1 hca_handle=1 hca_object=23
+DMEM
+----
+
+The "dmem" controller regulates the distribution and accounting of
+device memory regions. Because each memory region may have its own page size,
+which does not have to be equal to the system page size, the units are always bytes.
+
+DMEM Interface Files
+~~~~~~~~~~~~~~~~~~~~
+
+ dmem.max, dmem.min, dmem.low
+ A readwrite nested-keyed file that exists for all the cgroups
+ except root that describes current configured resource limit
+ for a region.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 1073741824
+ drm/0000:03:00.0/stolen max
+
+ The semantics are the same as for the memory cgroup controller, and are
+ calculated in the same way.
+
+ dmem.capacity
+ A read-only file that describes maximum region capacity.
+ It only exists on the root cgroup. Not all memory can be
+ allocated by cgroups, as the kernel reserves some for
+ internal use.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 8514437120
+ drm/0000:03:00.0/stolen 67108864
+
+ dmem.current
+ A read-only file that describes current resource usage.
+ It exists for all the cgroup except root.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 12550144
+ drm/0000:03:00.0/stolen 8650752
+
+HugeTLB
+-------
+
+The HugeTLB controller allows to limit the HugeTLB usage per control group and
+enforces the controller limit during page fault.
+
+HugeTLB Interface Files
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ hugetlb.<hugepagesize>.current
+ Show current usage for "hugepagesize" hugetlb. It exists for all
+ the cgroup except root.
+
+ hugetlb.<hugepagesize>.max
+ Set/show the hard limit of "hugepagesize" hugetlb usage.
+ The default value is "max". It exists for all the cgroup except root.
+
+ hugetlb.<hugepagesize>.events
+ A read-only flat-keyed file which exists on non-root cgroups.
+
+ max
+ The number of allocation failure due to HugeTLB limit
+
+ hugetlb.<hugepagesize>.events.local
+ Similar to hugetlb.<hugepagesize>.events but the fields in the file
+ are local to the cgroup i.e. not hierarchical. The file modified event
+ generated on this file reflects only the local events.
+
+ hugetlb.<hugepagesize>.numa_stat
+ Similar to memory.numa_stat, it shows the numa information of the
+ hugetlb pages of <hugepagesize> in this cgroup. Only active in
+ use hugetlb pages are included. The per-node values are in bytes.
Misc
----
+The Miscellaneous cgroup provides the resource limiting and tracking
+mechanism for the scalar resources which cannot be abstracted like the other
+cgroup resources. Controller is enabled by the CONFIG_CGROUP_MISC config
+option.
+
+A resource can be added to the controller via enum misc_res_type{} in the
+include/linux/misc_cgroup.h file and the corresponding name via misc_res_name[]
+in the kernel/cgroup/misc.c file. Provider of the resource must set its
+capacity prior to using the resource by calling misc_cg_set_capacity().
+
+Once a capacity is set then the resource usage can be updated using charge and
+uncharge APIs. All of the APIs to interact with misc controller are in
+include/linux/misc_cgroup.h.
+
+Misc Interface Files
+~~~~~~~~~~~~~~~~~~~~
+
+Miscellaneous controller provides 3 interface files. If two misc resources (res_a and res_b) are registered then:
+
+ misc.capacity
+ A read-only flat-keyed file shown only in the root cgroup. It shows
+ miscellaneous scalar resources available on the platform along with
+ their quantities::
+
+ $ cat misc.capacity
+ res_a 50
+ res_b 10
+
+ misc.current
+ A read-only flat-keyed file shown in the all cgroups. It shows
+ the current usage of the resources in the cgroup and its children.::
+
+ $ cat misc.current
+ res_a 3
+ res_b 0
+
+ misc.peak
+ A read-only flat-keyed file shown in all cgroups. It shows the
+ historical maximum usage of the resources in the cgroup and its
+ children.::
+
+ $ cat misc.peak
+ res_a 10
+ res_b 8
+
+ misc.max
+ A read-write flat-keyed file shown in the non root cgroups. Allowed
+ maximum usage of the resources in the cgroup and its children.::
+
+ $ cat misc.max
+ res_a max
+ res_b 4
+
+ Limit can be set by::
+
+ # echo res_a 1 > misc.max
+
+ Limit can be set to max by::
+
+ # echo res_a max > misc.max
+
+ Limits can be set higher than the capacity value in the misc.capacity
+ file.
+
+ misc.events
+ A read-only flat-keyed file which exists on non-root cgroups. The
+ following entries are defined. Unless specified otherwise, a value
+ change in this file generates a file modified event. All fields in
+ this file are hierarchical.
+
+ max
+ The number of times the cgroup's resource usage was
+ about to go over the max boundary.
+
+ misc.events.local
+ Similar to misc.events but the fields in the file are local to the
+ cgroup i.e. not hierarchical. The file modified event generated on
+ this file reflects only the local events.
+
+Migration and Ownership
+~~~~~~~~~~~~~~~~~~~~~~~
+
+A miscellaneous scalar resource is charged to the cgroup in which it is used
+first, and stays charged to that cgroup until that resource is freed. Migrating
+a process to a different cgroup does not move the charge to the destination
+cgroup where the process has moved.
+
+Others
+------
+
perf_event
~~~~~~~~~~
@@ -1733,7 +2972,7 @@ Without cgroup namespace, the "/proc/$PID/cgroup" file shows the
complete path of the cgroup of a process. In a container setup where
a set of cgroups and namespaces are intended to isolate processes the
"/proc/$PID/cgroup" file may leak potential system level information
-to the isolated processes. For Example::
+to the isolated processes. For example::
# cat /proc/self/cgroup
0::/batchjobs/container_id1
@@ -1874,15 +3113,17 @@ Filesystem Support for Writeback
--------------------------------
A filesystem can support cgroup writeback by updating
-address_space_operations->writepage[s]() to annotate bio's using the
+address_space_operations->writepages() to annotate bio's using the
following two functions.
wbc_init_bio(@wbc, @bio)
Should be called for each bio carrying writeback data and
- associates the bio with the inode's owner cgroup. Can be
- called anytime between bio allocation and submission.
+ associates the bio with the inode's owner cgroup and the
+ corresponding request queue. This must be called after
+ a queue (device) has been associated with the bio and
+ before submission.
- wbc_account_io(@wbc, @page, @bytes)
+ wbc_account_cgroup_owner(@wbc, @folio, @bytes)
Should be called for each data segment being written out.
While this function doesn't care exactly when it's called
during the writeback session, it's the easiest and most
@@ -1899,7 +3140,7 @@ the configuration, the bio may be executed at a lower priority and if
the writeback session is holding shared resources, e.g. a journal
entry, may lead to priority inversion. There is no one easy solution
for the problem. Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_blkcg()
+cases by skipping wbc_init_bio() and using bio_associate_blkg()
directly.
@@ -1914,8 +3155,8 @@ Deprecated v1 Core Features
- "cgroup.clone_children" is removed.
-- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
- at the root instead.
+- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
+ "cgroup.stat" files at the root instead.
Issues with v1 and Rationales for v2
@@ -2109,8 +3350,10 @@ system performance due to overreclaim, to the point where the feature
becomes self-defeating.
The memory.low boundary on the other hand is a top-down allocated
-reserve. A cgroup enjoys reclaim protection when it's within its low,
-which makes delegation of subtrees possible.
+reserve. A cgroup enjoys reclaim protection when it's within its
+effective low, which makes delegation of subtrees possible. It also
+enjoys having reclaim pressure proportional to its overage when
+above its effective low.
The original high boundary, the hard limit, is defined as a strict
limit that can not budge, even if the OOM killer has to be called.
diff --git a/Documentation/admin-guide/cifs/authors.rst b/Documentation/admin-guide/cifs/authors.rst
new file mode 100644
index 000000000000..5c1d2f0fa7d1
--- /dev/null
+++ b/Documentation/admin-guide/cifs/authors.rst
@@ -0,0 +1,69 @@
+=======
+Authors
+=======
+
+Original Author
+---------------
+
+Steve French (smfrench@gmail.com, sfrench@samba.org)
+
+The author wishes to express his appreciation and thanks to:
+Andrew Tridgell (Samba team) for his early suggestions about SMB/CIFS VFS
+improvements. Thanks to IBM for allowing me time and test resources to pursue
+this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
+the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
+Jeremy Allison of the Samba team has done invaluable work in adding the server
+side of the original CIFS Unix extensions and reviewing and implementing
+portions of the newer CIFS POSIX extensions into the Samba 3 file server. Thank
+Dave Boutcher of IBM Rochester (author of the OS/400 smb/cifs filesystem client)
+for proving years ago that very good smb/cifs clients could be done on Unix-like
+operating systems. Volker Lendecke, Andrew Tridgell, Urban Widmark, John
+Newbigin and others for their work on the Linux smbfs module. Thanks to
+the other members of the Storage Network Industry Association CIFS Technical
+Workgroup for their work specifying this highly complex protocol and finally
+thanks to the Samba team for their technical advice and encouragement.
+
+Patch Contributors
+------------------
+
+- Zwane Mwaikambo
+- Andi Kleen
+- Amrut Joshi
+- Shobhit Dayal
+- Sergey Vlasov
+- Richard Hughes
+- Yury Umanets
+- Mark Hamzy (for some of the early cifs IPv6 work)
+- Domen Puncer
+- Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
+- Vince Negri and Dave Stahl (for finding an important caching bug)
+- Adrian Bunk (kcalloc cleanups)
+- Miklos Szeredi
+- Kazeon team for various fixes especially for 2.4 version.
+- Asser Ferno (Change Notify support)
+- Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
+- Gunter Kukkukk (testing and suggestions for support of old servers)
+- Igor Mammedov (DFS support)
+- Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
+- Scott Lovenberg
+- Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features)
+- Aurelien Aptel (for DFS SMB3 work and some key bug fixes)
+- Ronnie Sahlberg (for SMB3 xattr work, bug fixes, and lots of great work on compounding)
+- Shirish Pargaonkar (for many ACL patches over the years)
+- Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
+- Paulo Alcantara (for some excellent work in DFS, and in booting from SMB3)
+- Long Li (some great work on RDMA, SMB Direct)
+
+
+Test case and Bug Report contributors
+-------------------------------------
+Thanks to those in the community who have submitted detailed bug reports
+and debug of problems they have found: Jochen Dolze, David Blaine,
+Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori,
+Lars Muller, Urban Widmark, Massimiliano Ferrero, Howard Owen,
+Olaf Kirch, Kieron Briggs, Nick Millington and others. Also special
+mention to the Stanford Checker (SWAT) which pointed out many minor
+bugs in error paths. Valuable suggestions also have come from Al Viro
+and Dave Miller.
+
+And thanks to the IBM LTC and Power test teams and SuSE and Citrix and RedHat testers for finding multiple bugs during excellent stress test runs.
diff --git a/Documentation/admin-guide/cifs/changes.rst b/Documentation/admin-guide/cifs/changes.rst
new file mode 100644
index 000000000000..8c42c4de510b
--- /dev/null
+++ b/Documentation/admin-guide/cifs/changes.rst
@@ -0,0 +1,9 @@
+=======
+Changes
+=======
+
+See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
+information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
+to cifs.ko module) by kernel version (and cifs internal module version).
+This may be easier to read than parsing the output of
+"git log fs/smb/client" by release.
diff --git a/Documentation/admin-guide/cifs/index.rst b/Documentation/admin-guide/cifs/index.rst
new file mode 100644
index 000000000000..fad5268635f5
--- /dev/null
+++ b/Documentation/admin-guide/cifs/index.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+CIFS
+====
+
+.. toctree::
+ :maxdepth: 2
+
+ introduction
+ usage
+ todo
+ changes
+ authors
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/cifs/introduction.rst b/Documentation/admin-guide/cifs/introduction.rst
new file mode 100644
index 000000000000..ffc6e2564dd5
--- /dev/null
+++ b/Documentation/admin-guide/cifs/introduction.rst
@@ -0,0 +1,53 @@
+============
+Introduction
+============
+
+ This is the client VFS module for the SMB3 NAS protocol as well
+ as for older dialects such as the Common Internet File System (CIFS)
+ protocol which was the successor to the Server Message Block
+ (SMB) protocol, the native file sharing mechanism for most early
+ PC operating systems. New and improved versions of CIFS are now
+ called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1
+ the most current dialect) is strongly preferred over using older
+ dialects like CIFS due to security reasons. All modern dialects,
+ including the most recent, SMB3.1.1, are supported by the CIFS VFS
+ module. The SMB3 protocol is implemented and supported by all major
+ file servers such as Windows (including Windows 2019 Server), as
+ well as by Samba (which provides excellent CIFS/SMB2/SMB3 server
+ support and tools for Linux and many other operating systems).
+ Apple systems also support SMB3 well, as do most Network Attached
+ Storage vendors, so this network filesystem client can mount to a
+ wide variety of systems. It also supports mounting to the cloud
+ (for example Microsoft Azure), including the necessary security
+ features.
+
+ The intent of this module is to provide the most advanced network
+ file system function for SMB3 compliant servers, including advanced
+ security features, excellent parallelized high performance i/o, better
+ POSIX compliance, secure per-user session establishment, encryption,
+ high performance safe distributed caching (leases/oplocks), optional packet
+ signing, large files, Unicode support and other internationalization
+ improvements. Since both Samba server and this filesystem client support the
+ CIFS Unix extensions, and the Linux client also supports SMB3 POSIX extensions,
+ the combination can provide a reasonable alternative to other network and
+ cluster file systems for fileserving in some Linux to Linux environments,
+ not just in Linux to Windows (or Linux to Mac) environments.
+
+ This filesystem has a mount utility (mount.cifs) and various user space
+ tools (including smbinfo and setcifsacl) that can be obtained from
+
+ https://git.samba.org/?p=cifs-utils.git
+
+ or
+
+ git://git.samba.org/cifs-utils.git
+
+ mount.cifs should be installed in the directory with the other mount helpers.
+
+ For more information on the module see the project wiki page at
+
+ https://wiki.samba.org/index.php/LinuxCIFS
+
+ and
+
+ https://wiki.samba.org/index.php/LinuxCIFS_utils
diff --git a/Documentation/admin-guide/cifs/todo.rst b/Documentation/admin-guide/cifs/todo.rst
new file mode 100644
index 000000000000..9a65c670774e
--- /dev/null
+++ b/Documentation/admin-guide/cifs/todo.rst
@@ -0,0 +1,133 @@
+====
+TODO
+====
+
+As of 6.7 kernel. See https://wiki.samba.org/index.php/LinuxCIFSKernel
+for list of features added by release
+
+A Partial List of Missing Features
+==================================
+
+Contributions are welcome. There are plenty of opportunities
+for visible, important contributions to this module. Here
+is a partial list of the known problems and missing features:
+
+a) SMB3 (and SMB3.1.1) missing optional features:
+ multichannel performance optimizations, algorithmic channel selection,
+ directory leases optimizations,
+ support for faster packet signing (GMAC),
+ support for compression over the network,
+ T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
+ are currently the only two server side copy mechanisms supported)
+
+b) Better optimized compounding and error handling for sparse file support,
+ perhaps addition of new optional SMB3.1.1 fsctls to make collapse range
+ and insert range more atomic
+
+c) Support for SMB3.1.1 over QUIC (and perhaps other socket based protocols
+ like SCTP)
+
+d) quota support (needs minor kernel change since quota calls otherwise
+ won't make it to network filesystems or deviceless filesystems).
+
+e) Additional use cases can be optimized to use "compounding" (e.g.
+ open/query/close and open/setinfo/close) to reduce the number of
+ roundtrips to the server and improve performance. Various cases
+ (stat, statfs, create, unlink, mkdir, xattrs) already have been improved by
+ using compounding but more can be done. In addition we could
+ significantly reduce redundant opens by using deferred close (with
+ handle caching leases) and better using reference counters on file
+ handles.
+
+f) Finish inotify support so kde and gnome file list windows
+ will autorefresh (partially complete by Asser). Needs minor kernel
+ vfs change to support removing D_NOTIFY on a file.
+
+g) Add GUI tool to configure /proc/fs/cifs settings and for display of
+ the CIFS statistics (started)
+
+h) implement support for security and trusted categories of xattrs
+ (requires minor protocol extension) to enable better support for SELINUX
+
+i) Add support for tree connect contexts (see MS-SMB2) a new SMB3.1.1 protocol
+ feature (may be especially useful for virtualization).
+
+j) Create UID mapping facility so server UIDs can be mapped on a per
+ mount or a per server basis to client UIDs or nobody if no mapping
+ exists. Also better integration with winbind for resolving SID owners
+
+k) Add tools to take advantage of more smb3 specific ioctls and features
+ (passthrough ioctl/fsctl is now implemented in cifs.ko to allow
+ sending various SMB3 fsctls and query info and set info calls
+ directly from user space) Add tools to make setting various non-POSIX
+ metadata attributes easier from tools (e.g. extending what was done
+ in smb-info tool).
+
+l) encrypted file support (currently the attribute showing the file is
+ encrypted on the server is reported, but changing the attribute is not
+ supported).
+
+m) improved stats gathering tools (perhaps integration with nfsometer?)
+ to extend and make easier to use what is currently in /proc/fs/cifs/Stats
+
+n) Add support for claims based ACLs ("DAC")
+
+o) mount helper GUI (to simplify the various configuration options on mount)
+
+p) Expand support for witness protocol to allow for notification of share
+ move, and server network adapter changes. Currently only notifications by
+ the witness protocol for server move is supported by the Linux client.
+
+q) Allow mount.cifs to be more verbose in reporting errors with dialect
+ or unsupported feature errors. This would now be easier due to the
+ implementation of the new mount API.
+
+r) updating cifs documentation, and user guide.
+
+s) Addressing bugs found by running a broader set of xfstests in standard
+ file system xfstest suite.
+
+t) split cifs and smb3 support into separate modules so legacy (and less
+ secure) CIFS dialect can be disabled in environments that don't need it
+ and simplify the code.
+
+v) Additional testing of POSIX Extensions for SMB3.1.1
+
+w) Support for the Mac SMB3.1.1 extensions to improve interop with Apple servers
+
+x) Support for additional authentication options (e.g. IAKERB, peer-to-peer
+ Kerberos, SCRAM and others supported by existing servers)
+
+y) Improved tracing, more eBPF trace points, better scripts for performance
+ analysis
+
+Known Bugs
+==========
+
+See https://bugzilla.samba.org - search on product "CifsVFS" for
+current bug list. Also check http://bugzilla.kernel.org (Product = File System, Component = CIFS)
+and xfstest results e.g. https://wiki.samba.org/index.php/Xfstest-results-smb3
+
+Misc testing to do
+==================
+1) check out max path names and max path name components against various server
+ types. Try nested symlinks (8 deep). Return max path name in stat -f information
+
+2) Improve xfstest's cifs/smb3 enablement and adapt xfstests where needed to test
+ cifs/smb3 better
+
+3) Additional performance testing and optimization using iozone and similar -
+ there are some easy changes that can be done to parallelize sequential writes,
+ and when signing is disabled to request larger read sizes (larger than
+ negotiated size) and send larger write sizes to modern servers.
+
+4) More exhaustively test against less common servers
+
+5) Continue to extend the smb3 "buildbot" which does automated xfstesting
+ against Windows, Samba and Azure currently - to add additional tests and
+ to allow the buildbot to execute the tests faster. The URL for the
+ buildbot is: http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com
+
+6) Address various coverity warnings (most are not bugs per-se, but
+ the more warnings are addressed, the easier it is to spot real
+ problems that static analyzers will point out in the future).
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
new file mode 100644
index 000000000000..d989ae5778ba
--- /dev/null
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -0,0 +1,864 @@
+=====
+Usage
+=====
+
+This module supports the SMB3 family of advanced network protocols (as well
+as older dialects, originally called "CIFS" or SMB1).
+
+The CIFS VFS module for Linux supports many advanced network filesystem
+features such as hierarchical DFS like namespace, hardlinks, locking and more.
+It was designed to comply with the SNIA CIFS Technical Reference (which
+supersedes the 1992 X/Open SMB Standard) as well as to perform best practice
+practical interoperability with Windows 2000, Windows XP, Samba and equivalent
+servers. This code was developed in participation with the Protocol Freedom
+Information Foundation. CIFS and now SMB3 has now become a defacto
+standard for interoperating between Macs and Windows and major NAS appliances.
+
+Please see
+MS-SMB2 (for detailed SMB2/SMB3/SMB3.1.1 protocol specification)
+or https://samba.org/samba/PFIF/
+for more details.
+
+
+For questions or bug reports please contact:
+
+ smfrench@gmail.com
+
+See the project page at: https://wiki.samba.org/index.php/LinuxCIFS_utils
+
+Build instructions
+==================
+
+For Linux:
+
+1) Download the kernel (e.g. from https://www.kernel.org)
+ and change directory into the top of the kernel directory tree
+ (e.g. /usr/src/linux-2.5.73)
+2) make menuconfig (or make xconfig)
+3) select cifs from within the network filesystem choices
+4) save and exit
+5) make
+
+
+Installation instructions
+=========================
+
+If you have built the CIFS vfs as module (successfully) simply
+type ``make modules_install`` (or if you prefer, manually copy the file to
+the modules directory e.g. /lib/modules/6.3.0-060300-generic/kernel/fs/smb/client/cifs.ko).
+
+If you have built the CIFS vfs into the kernel itself, follow the instructions
+for your distribution on how to install a new kernel (usually you
+would simply type ``make install``).
+
+If you do not have the utility mount.cifs (in the Samba 4.x source tree and on
+the CIFS VFS web site) copy it to the same directory in which mount helpers
+reside (usually /sbin). Although the helper software is not
+required, mount.cifs is recommended. Most distros include a ``cifs-utils``
+package that includes this utility so it is recommended to install this.
+
+Note that running the Winbind pam/nss module (logon service) on all of your
+Linux clients is useful in mapping Uids and Gids consistently across the
+domain to the proper network user. The mount.cifs mount helper can be
+found at cifs-utils.git on git.samba.org
+
+If cifs is built as a module, then the size and number of network buffers
+and maximum number of simultaneous requests to one server can be configured.
+Changing these from their defaults is not recommended. By executing modinfo::
+
+ modinfo <path to cifs.ko>
+
+on kernel/fs/smb/client/cifs.ko the list of configuration changes that can be made
+at module initialization time (by running insmod cifs.ko) can be seen.
+
+Recommendations
+===============
+
+To improve security the SMB2.1 dialect or later (usually will get SMB3.1.1) is now
+the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0"
+on mount (or vers=2.0 for Windows Vista). Note that the CIFS (vers=1.0) is
+much older and less secure than the default dialect SMB3 which includes
+many advanced security features such as downgrade attack detection
+and encrypted shares and stronger signing and authentication algorithms.
+There are additional mount options that may be helpful for SMB3 to get
+improved POSIX behavior (NB: can use vers=3 to force SMB3 or later, never 2.1):
+
+ ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``)
+
+Allowing User Mounts
+====================
+
+To permit users to mount and unmount over directories they own is possible
+with the cifs vfs. A way to enable such mounting is to mark the mount.cifs
+utility as suid (e.g. ``chmod +s /sbin/mount.cifs``). To enable users to
+umount shares they mount requires
+
+1) mount.cifs version 1.4 or later
+2) an entry for the share in /etc/fstab indicating that a user may
+ unmount it e.g.::
+
+ //server/usersharename /mnt/username cifs user 0 0
+
+Note that when the mount.cifs utility is run suid (allowing user mounts),
+in order to reduce risks, the ``nosuid`` mount flag is passed in on mount to
+disallow execution of an suid program mounted on the remote target.
+When mount is executed as root, nosuid is not passed in by default,
+and execution of suid programs on the remote target would be enabled
+by default. This can be changed, as with nfs and other filesystems,
+by simply specifying ``nosuid`` among the mount options. For user mounts
+though to be able to pass the suid flag to mount requires rebuilding
+mount.cifs with the following flag: CIFS_ALLOW_USR_SUID
+
+There is a corresponding manual page for cifs mounting in the Samba 3.0 and
+later source tree in docs/manpages/mount.cifs.8
+
+Allowing User Unmounts
+======================
+
+To permit users to unmount directories that they have user mounted (see above),
+the utility umount.cifs may be used. It may be invoked directly, or if
+umount.cifs is placed in /sbin, umount can invoke the cifs umount helper
+(at least for most versions of the umount utility) for umount of cifs
+mounts, unless umount is invoked with -i (which will avoid invoking a umount
+helper). As with mount.cifs, to enable user unmounts umount.cifs must be marked
+as suid (e.g. ``chmod +s /sbin/umount.cifs``) or equivalent (some distributions
+allow adding entries to a file to the /etc/permissions file to achieve the
+equivalent suid effect). For this utility to succeed the target path
+must be a cifs mount, and the uid of the current user must match the uid
+of the user who mounted the resource.
+
+Also note that the customary way of allowing user mounts and unmounts is
+(instead of using mount.cifs and unmount.cifs as suid) to add a line
+to the file /etc/fstab for each //server/share you wish to mount, but
+this can become unwieldy when potential mount targets include many
+or unpredictable UNC names.
+
+Samba Considerations
+====================
+
+Most current servers support SMB2.1 and SMB3 which are more secure,
+but there are useful protocol extensions for the older less secure CIFS
+dialect, so to get the maximum benefit if mounting using the older dialect
+(CIFS/SMB1), we recommend using a server that supports the SNIA CIFS
+Unix Extensions standard (e.g. almost any version of Samba ie version
+2.2.5 or later) but the CIFS vfs works fine with a wide variety of CIFS servers.
+Note that uid, gid and file permissions will display default values if you do
+not have a server that supports the Unix extensions for CIFS (such as Samba
+2.2.5 or later). To enable the Unix CIFS Extensions in the Samba server, add
+the line::
+
+ unix extensions = yes
+
+to your smb.conf file on the server. Note that the following smb.conf settings
+are also useful (on the Samba server) when the majority of clients are Unix or
+Linux::
+
+ case sensitive = yes
+ delete readonly = yes
+ ea support = yes
+
+Note that server ea support is required for supporting xattrs from the Linux
+cifs client, and that EA support is present in later versions of Samba (e.g.
+3.0.6 and later (also EA support works in all versions of Windows, at least to
+shares on NTFS filesystems). Extended Attribute (xattr) support is an optional
+feature of most Linux filesystems which may require enabling via
+make menuconfig. Client support for extended attributes (user xattr) can be
+disabled on a per-mount basis by specifying ``nouser_xattr`` on mount.
+
+The CIFS client can get and set POSIX ACLs (getfacl, setfacl) to Samba servers
+version 3.10 and later. Setting POSIX ACLs requires enabling both XATTR and
+then POSIX support in the CIFS configuration options when building the cifs
+module. POSIX ACL support can be disabled on a per mount basic by specifying
+``noacl`` on mount.
+
+Some administrators may want to change Samba's smb.conf ``map archive`` and
+``create mask`` parameters from the default. Unless the create mask is changed
+newly created files can end up with an unnecessarily restrictive default mode,
+which may not be what you want, although if the CIFS Unix extensions are
+enabled on the server and client, subsequent setattr calls (e.g. chmod) can
+fix the mode. Note that creating special devices (mknod) remotely
+may require specifying a mkdev function to Samba if you are not using
+Samba 3.0.6 or later. For more information on these see the manual pages
+(``man smb.conf``) on the Samba server system. Note that the cifs vfs,
+unlike the smbfs vfs, does not read the smb.conf on the client system
+(the few optional settings are passed in on mount via -o parameters instead).
+Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete
+open files (required for strict POSIX compliance). Windows Servers already
+supported this feature. Samba server does not allow symlinks that refer to files
+outside of the share, so in Samba versions prior to 3.0.6, most symlinks to
+files with absolute paths (ie beginning with slash) such as::
+
+ ln -s /mnt/foo bar
+
+would be forbidden. Samba 3.0.6 server or later includes the ability to create
+such symlinks safely by converting unsafe symlinks (ie symlinks to server
+files that are outside of the share) to a samba specific format on the server
+that is ignored by local server applications and non-cifs clients and that will
+not be traversed by the Samba server). This is opaque to the Linux client
+application using the cifs vfs. Absolute symlinks will work to Samba 3.0.5 or
+later, but only for remote clients using the CIFS Unix extensions, and will
+be invisible to Windows clients and typically will not affect local
+applications running on the same server as Samba.
+
+Use instructions
+================
+
+Once the CIFS VFS support is built into the kernel or installed as a module
+(cifs.ko), you can use mount syntax like the following to access Samba or
+Mac or Windows servers::
+
+ mount -t cifs //9.53.216.11/e$ /mnt -o username=myname,password=mypassword
+
+Before -o the option -v may be specified to make the mount.cifs
+mount helper display the mount steps more verbosely.
+After -o the following commonly used cifs vfs specific options
+are supported::
+
+ username=<username>
+ password=<password>
+ domain=<domain name>
+
+Other cifs mount options are described below. Use of TCP names (in addition to
+ip addresses) is available if the mount helper (mount.cifs) is installed. If
+you do not trust the server to which are mounted, or if you do not have
+cifs signing enabled (and the physical network is insecure), consider use
+of the standard mount options ``noexec`` and ``nosuid`` to reduce the risk of
+running an altered binary on your local system (downloaded from a hostile server
+or altered by a hostile router).
+
+Although mounting using format corresponding to the CIFS URL specification is
+not possible in mount.cifs yet, it is possible to use an alternate format
+for the server and sharename (which is somewhat similar to NFS style mount
+syntax) instead of the more widely used UNC format (i.e. \\server\share)::
+
+ mount -t cifs tcp_name_of_server:share_name /mnt -o user=myname,pass=mypasswd
+
+When using the mount helper mount.cifs, passwords may be specified via alternate
+mechanisms, instead of specifying it after -o using the normal ``pass=`` syntax
+on the command line:
+1) By including it in a credential file. Specify credentials=filename as one
+of the mount options. Credential files contain two lines::
+
+ username=someuser
+ password=your_password
+
+2) By specifying the password in the PASSWD environment variable (similarly
+ the user name can be taken from the USER environment variable).
+3) By specifying the password in a file by name via PASSWD_FILE
+4) By specifying the password in a file by file descriptor via PASSWD_FD
+
+If no password is provided, mount.cifs will prompt for password entry
+
+Restrictions
+============
+
+Servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC
+1001/1002 support for "Netbios-Over-TCP/IP." This is not likely to be a
+problem as most servers support this.
+
+Valid filenames differ between Windows and Linux. Windows typically restricts
+filenames which contain certain reserved characters (e.g.the character :
+which is used to delimit the beginning of a stream name by Windows), while
+Linux allows a slightly wider set of valid characters in filenames. Windows
+servers can remap such characters when an explicit mapping is specified in
+the Server's registry. Samba starting with version 3.10 will allow such
+filenames (ie those which contain valid Linux characters, which normally
+would be forbidden for Windows/CIFS semantics) as long as the server is
+configured for Unix Extensions (and the client has not disabled
+/proc/fs/cifs/LinuxExtensionsEnabled). In addition the mount option
+``mapposix`` can be used on CIFS (vers=1.0) to force the mapping of
+illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
+is the default for SMB3). This remap (``mapposix``) range is also
+compatible with Mac (and "Services for Mac" on some older Windows).
+When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
+disabled.
+
+CIFS VFS Mount Options
+======================
+A partial list of the supported mount options follows:
+
+ username
+ The user name to use when trying to establish
+ the CIFS session.
+ password
+ The user password. If the mount helper is
+ installed, the user will be prompted for password
+ if not supplied.
+ ip
+ The ip address of the target server
+ unc
+ The target server Universal Network Name (export) to
+ mount.
+ domain
+ Set the SMB/CIFS workgroup name prepended to the
+ username during CIFS session establishment
+ forceuid
+ Set the default uid for inodes to the uid
+ passed in on mount. For mounts to servers
+ which do support the CIFS Unix extensions, such as a
+ properly configured Samba server, the server provides
+ the uid, gid and mode so this parameter should not be
+ specified unless the server and clients uid and gid
+ numbering differ. If the server and client are in the
+ same domain (e.g. running winbind or nss_ldap) and
+ the server supports the Unix Extensions then the uid
+ and gid can be retrieved from the server (and uid
+ and gid would not have to be specified on the mount.
+ For servers which do not support the CIFS Unix
+ extensions, the default uid (and gid) returned on lookup
+ of existing files will be the uid (gid) of the person
+ who executed the mount (root, except when mount.cifs
+ is configured setuid for user mounts) unless the ``uid=``
+ (gid) mount option is specified. Also note that permission
+ checks (authorization checks) on accesses to a file occur
+ at the server, but there are cases in which an administrator
+ may want to restrict at the client as well. For those
+ servers which do not report a uid/gid owner
+ (such as Windows), permissions can also be checked at the
+ client, and a crude form of client side permission checking
+ can be enabled by specifying file_mode and dir_mode on
+ the client. (default)
+ forcegid
+ (similar to above but for the groupid instead of uid) (default)
+ noforceuid
+ Fill in file owner information (uid) by requesting it from
+ the server if possible. With this option, the value given in
+ the uid= option (on mount) will only be used if the server
+ can not support returning uids on inodes.
+ noforcegid
+ (similar to above but for the group owner, gid, instead of uid)
+ uid
+ Set the default uid for inodes, and indicate to the
+ cifs kernel driver which local user mounted. If the server
+ supports the unix extensions the default uid is
+ not used to fill in the owner fields of inodes (files)
+ unless the ``forceuid`` parameter is specified.
+ gid
+ Set the default gid for inodes (similar to above).
+ file_mode
+ If CIFS Unix extensions are not supported by the server
+ this overrides the default mode for file inodes.
+ fsc
+ Enable local disk caching using FS-Cache (off by default). This
+ option could be useful to improve performance on a slow link,
+ heavily loaded server and/or network where reading from the
+ disk is faster than reading from the server (over the network).
+ This could also impact scalability positively as the
+ number of calls to the server are reduced. However, local
+ caching is not suitable for all workloads for e.g. read-once
+ type workloads. So, you need to consider carefully your
+ workload/scenario before using this option. Currently, local
+ disk caching is functional for CIFS files opened as read-only.
+ dir_mode
+ If CIFS Unix extensions are not supported by the server
+ this overrides the default mode for directory inodes.
+ port
+ attempt to contact the server on this tcp port, before
+ trying the usual ports (port 445, then 139).
+ iocharset
+ Codepage used to convert local path names to and from
+ Unicode. Unicode is used by default for network path
+ names if the server supports it. If iocharset is
+ not specified then the nls_default specified
+ during the local client kernel build will be used.
+ If server does not support Unicode, this parameter is
+ unused.
+ rsize
+ default read size (usually 16K). The client currently
+ can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize
+ defaults to 16K and may be changed (from 8K to the maximum
+ kmalloc size allowed by your kernel) at module install time
+ for cifs.ko. Setting CIFSMaxBufSize to a very large value
+ will cause cifs to use more memory and may reduce performance
+ in some cases. To use rsize greater than 127K (the original
+ cifs protocol maximum) also requires that the server support
+ a new Unix Capability flag (for very large read) which some
+ newer servers (e.g. Samba 3.0.26 or later) do. rsize can be
+ set from a minimum of 2048 to a maximum of 130048 (127K or
+ CIFSMaxBufSize, whichever is smaller)
+ wsize
+ default write size (default 57344)
+ maximum wsize currently allowed by CIFS is 57344 (fourteen
+ 4096 byte pages)
+ actimeo=n
+ attribute cache timeout in seconds (default 1 second).
+ After this timeout, the cifs client requests fresh attribute
+ information from the server. This option allows to tune the
+ attribute cache timeout to suit the workload needs. Shorter
+ timeouts mean better the cache coherency, but increased number
+ of calls to the server. Longer timeouts mean reduced number
+ of calls to the server at the expense of less stricter cache
+ coherency checks (i.e. incorrect attribute cache for a short
+ period of time).
+ rw
+ mount the network share read-write (note that the
+ server may still consider the share read-only)
+ ro
+ mount network share read-only
+ version
+ used to distinguish different versions of the
+ mount helper utility (not typically needed)
+ sep
+ if first mount option (after the -o), overrides
+ the comma as the separator between the mount
+ parameters. e.g.::
+
+ -o user=myname,password=mypassword,domain=mydom
+
+ could be passed instead with period as the separator by::
+
+ -o sep=.user=myname.password=mypassword.domain=mydom
+
+ this might be useful when comma is contained within username
+ or password or domain. This option is less important
+ when the cifs mount helper cifs.mount (version 1.1 or later)
+ is used.
+ nosuid
+ Do not allow remote executables with the suid bit
+ program to be executed. This is only meaningful for mounts
+ to servers such as Samba which support the CIFS Unix Extensions.
+ If you do not trust the servers in your network (your mount
+ targets) it is recommended that you specify this option for
+ greater security.
+ exec
+ Permit execution of binaries on the mount.
+ noexec
+ Do not permit execution of binaries on the mount.
+ dev
+ Recognize block devices on the remote mount.
+ nodev
+ Do not recognize devices on the remote mount.
+ suid
+ Allow remote files on this mountpoint with suid enabled to
+ be executed (default for mounts when executed as root,
+ nosuid is default for user mounts).
+ credentials
+ Although ignored by the cifs kernel component, it is used by
+ the mount helper, mount.cifs. When mount.cifs is installed it
+ opens and reads the credential file specified in order
+ to obtain the userid and password arguments which are passed to
+ the cifs vfs.
+ guest
+ Although ignored by the kernel component, the mount.cifs
+ mount helper will not prompt the user for a password
+ if guest is specified on the mount options. If no
+ password is specified a null password will be used.
+ perm
+ Client does permission checks (vfs_permission check of uid
+ and gid of the file against the mode and desired operation),
+ Note that this is in addition to the normal ACL check on the
+ target machine done by the server software.
+ Client permission checking is enabled by default.
+ noperm
+ Client does not do permission checks. This can expose
+ files on this mount to access by other users on the local
+ client system. It is typically only needed when the server
+ supports the CIFS Unix Extensions but the UIDs/GIDs on the
+ client and server system do not match closely enough to allow
+ access by the user doing the mount, but it may be useful with
+ non CIFS Unix Extension mounts for cases in which the default
+ mode is specified on the mount but is not to be enforced on the
+ client (e.g. perhaps when MultiUserMount is enabled)
+ Note that this does not affect the normal ACL check on the
+ target machine done by the server software (of the server
+ ACL against the user name provided at mount time).
+ serverino
+ Use server's inode numbers instead of generating automatically
+ incrementing inode numbers on the client. Although this will
+ make it easier to spot hardlinked files (as they will have
+ the same inode numbers) and inode numbers may be persistent,
+ note that the server does not guarantee that the inode numbers
+ are unique if multiple server side mounts are exported under a
+ single share (since inode numbers on the servers might not
+ be unique if multiple filesystems are mounted under the same
+ shared higher level directory). Note that some older
+ (e.g. pre-Windows 2000) do not support returning UniqueIDs
+ or the CIFS Unix Extensions equivalent and for those
+ this mount option will have no effect. Exporting cifs mounts
+ under nfsd requires this mount option on the cifs mount.
+ This is now the default if server supports the
+ required network operation.
+ noserverino
+ Client generates inode numbers (rather than using the actual one
+ from the server). These inode numbers will vary after
+ unmount or reboot which can confuse some applications,
+ but not all server filesystems support unique inode
+ numbers.
+ setuids
+ If the CIFS Unix extensions are negotiated with the server
+ the client will attempt to set the effective uid and gid of
+ the local process on newly created files, directories, and
+ devices (create, mkdir, mknod). If the CIFS Unix Extensions
+ are not negotiated, for newly created files and directories
+ instead of using the default uid and gid specified on
+ the mount, cache the new file's uid and gid locally which means
+ that the uid for the file can change when the inode is
+ reloaded (or the user remounts the share).
+ nosetuids
+ The client will not attempt to set the uid and gid on
+ on newly created files, directories, and devices (create,
+ mkdir, mknod) which will result in the server setting the
+ uid and gid to the default (usually the server uid of the
+ user who mounted the share). Letting the server (rather than
+ the client) set the uid and gid is the default. If the CIFS
+ Unix Extensions are not negotiated then the uid and gid for
+ new files will appear to be the uid (gid) of the mounter or the
+ uid (gid) parameter specified on the mount.
+ netbiosname
+ When mounting to servers via port 139, specifies the RFC1001
+ source name to use to represent the client netbios machine
+ name when doing the RFC1001 netbios session initialize.
+ direct
+ Do not do inode data caching on files opened on this mount.
+ This precludes mmapping files on this mount. In some cases
+ with fast networks and little or no caching benefits on the
+ client (e.g. when the application is doing large sequential
+ reads bigger than page size without rereading the same data)
+ this can provide better performance than the default
+ behavior which caches reads (readahead) and writes
+ (writebehind) through the local Linux client pagecache
+ if oplock (caching token) is granted and held. Note that
+ direct allows write operations larger than page size
+ to be sent to the server.
+ strictcache
+ Use for switching on strict cache mode. In this mode the
+ client read from the cache all the time it has Oplock Level II,
+ otherwise - read from the server. All written data are stored
+ in the cache, but if the client doesn't have Exclusive Oplock,
+ it writes the data to the server.
+ rwpidforward
+ Forward pid of a process who opened a file to any read or write
+ operation on that file. This prevent applications like WINE
+ from failing on read and write if we use mandatory brlock style.
+ acl
+ Allow setfacl and getfacl to manage posix ACLs if server
+ supports them. (default)
+ noacl
+ Do not allow setfacl and getfacl calls on this mount
+ user_xattr
+ Allow getting and setting user xattrs (those attributes whose
+ name begins with ``user.`` or ``os2.``) as OS/2 EAs (extended
+ attributes) to the server. This allows support of the
+ setfattr and getfattr utilities. (default)
+ nouser_xattr
+ Do not allow getfattr/setfattr to get/set/list xattrs
+ mapchars
+ Translate six of the seven reserved characters (not backslash)::
+
+ *?<>|:
+
+ to the remap range (above 0xF000), which also
+ allows the CIFS client to recognize files created with
+ such characters by Windows's POSIX emulation. This can
+ also be useful when mounting to most versions of Samba
+ (which also forbids creating and opening files
+ whose names contain any of these seven characters).
+ This has no effect if the server does not support
+ Unicode on the wire.
+ nomapchars
+ Do not translate any of these seven characters (default).
+ nocase
+ Request case insensitive path name matching (case
+ sensitive is the default if the server supports it).
+ (mount option ``ignorecase`` is identical to ``nocase``)
+ posixpaths
+ If CIFS Unix extensions are supported, attempt to
+ negotiate posix path name support which allows certain
+ characters forbidden in typical CIFS filenames, without
+ requiring remapping. (default)
+ noposixpaths
+ If CIFS Unix extensions are supported, do not request
+ posix path name support (this may cause servers to
+ reject creatingfile with certain reserved characters).
+ nounix
+ Disable the CIFS Unix Extensions for this mount (tree
+ connection). This is rarely needed, but it may be useful
+ in order to turn off multiple settings all at once (ie
+ posix acls, posix locks, posix paths, symlink support
+ and retrieving uids/gids/mode from the server) or to
+ work around a bug in server which implement the Unix
+ Extensions.
+ nobrl
+ Do not send byte range lock requests to the server.
+ This is necessary for certain applications that break
+ with cifs style mandatory byte range locks (and most
+ cifs servers do not yet support requesting advisory
+ byte range locks).
+ forcemandatorylock
+ Even if the server supports posix (advisory) byte range
+ locking, send only mandatory lock requests. For some
+ (presumably rare) applications, originally coded for
+ DOS/Windows, which require Windows style mandatory byte range
+ locking, they may be able to take advantage of this option,
+ forcing the cifs client to only send mandatory locks
+ even if the cifs server would support posix advisory locks.
+ ``forcemand`` is accepted as a shorter form of this mount
+ option.
+ nostrictsync
+ If this mount option is set, when an application does an
+ fsync call then the cifs client does not send an SMB Flush
+ to the server (to force the server to write all dirty data
+ for this file immediately to disk), although cifs still sends
+ all dirty (cached) file data to the server and waits for the
+ server to respond to the write. Since SMB Flush can be
+ very slow, and some servers may be reliable enough (to risk
+ delaying slightly flushing the data to disk on the server),
+ turning on this option may be useful to improve performance for
+ applications that fsync too much, at a small risk of server
+ crash. If this mount option is not set, by default cifs will
+ send an SMB flush request (and wait for a response) on every
+ fsync call.
+ nodfs
+ Disable DFS (global name space support) even if the
+ server claims to support it. This can help work around
+ a problem with parsing of DFS paths with Samba server
+ versions 3.0.24 and 3.0.25.
+ remount
+ remount the share (often used to change from ro to rw mounts
+ or vice versa)
+ cifsacl
+ Report mode bits (e.g. on stat) based on the Windows ACL for
+ the file. (EXPERIMENTAL)
+ servern
+ Specify the server 's netbios name (RFC1001 name) to use
+ when attempting to setup a session to the server.
+ This is needed for mounting to some older servers (such
+ as OS/2 or Windows 98 and Windows ME) since they do not
+ support a default server name. A server name can be up
+ to 15 characters long and is usually uppercased.
+ sfu
+ When the CIFS Unix Extensions are not negotiated, attempt to
+ create device files and fifos in a format compatible with
+ Services for Unix (SFU). In addition retrieve bits 10-12
+ of the mode via the SETFILEBITS extended attribute (as
+ SFU does). In the future the bottom 9 bits of the
+ mode also will be emulated using queries of the security
+ descriptor (ACL).
+ mfsymlinks
+ Enable support for Minshall+French symlinks
+ (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks)
+ This option is ignored when specified together with the
+ 'sfu' option. Minshall+French symlinks are used even if
+ the server supports the CIFS Unix Extensions.
+ sign
+ Must use packet signing (helps avoid unwanted data modification
+ by intermediate systems in the route). Note that signing
+ does not work with lanman or plaintext authentication.
+ seal
+ Must seal (encrypt) all data on this mounted share before
+ sending on the network. Requires support for Unix Extensions.
+ Note that this differs from the sign mount option in that it
+ causes encryption of data sent over this mounted share but other
+ shares mounted to the same server are unaffected.
+ locallease
+ This option is rarely needed. Fcntl F_SETLEASE is
+ used by some applications such as Samba and NFSv4 server to
+ check to see whether a file is cacheable. CIFS has no way
+ to explicitly request a lease, but can check whether a file
+ is cacheable (oplocked). Unfortunately, even if a file
+ is not oplocked, it could still be cacheable (ie cifs client
+ could grant fcntl leases if no other local processes are using
+ the file) for cases for example such as when the server does not
+ support oplocks and the user is sure that the only updates to
+ the file will be from this client. Specifying this mount option
+ will allow the cifs client to check for leases (only) locally
+ for files which are not oplocked instead of denying leases
+ in that case. (EXPERIMENTAL)
+ sec
+ Security mode. Allowed values are:
+
+ none
+ attempt to connection as a null user (no name)
+ krb5
+ Use Kerberos version 5 authentication
+ krb5i
+ Use Kerberos authentication and packet signing
+ ntlm
+ Use NTLM password hashing (default)
+ ntlmi
+ Use NTLM password hashing with signing (if
+ /proc/fs/cifs/PacketSigningEnabled on or if
+ server requires signing also can be the default)
+ ntlmv2
+ Use NTLMv2 password hashing
+ ntlmv2i
+ Use NTLMv2 password hashing with packet signing
+ lanman
+ (if configured in kernel config) use older
+ lanman hash
+ hard
+ Retry file operations if server is not responding
+ soft
+ Limit retries to unresponsive servers (usually only
+ one retry) before returning an error. (default)
+
+The mount.cifs mount helper also accepts a few mount options before -o
+including:
+
+=============== ===============================================================
+ -S take password from stdin (equivalent to setting the environment
+ variable ``PASSWD_FD=0``
+ -V print mount.cifs version
+ -? display simple usage information
+=============== ===============================================================
+
+With most 2.6 kernel versions of modutils, the version of the cifs kernel
+module can be displayed via modinfo.
+
+Misc /proc/fs/cifs Flags and Debug Info
+=======================================
+
+Informational pseudo-files:
+
+======================= =======================================================
+DebugData Displays information about active CIFS sessions and
+ shares, features enabled as well as the cifs.ko
+ version.
+Stats Lists summary resource usage information as well as per
+ share statistics.
+open_files List all the open file handles on all active SMB sessions.
+mount_params List of all mount parameters available for the module
+======================= =======================================================
+
+Configuration pseudo-files:
+
+======================= =======================================================
+SecurityFlags Flags which control security negotiation and
+ also packet signing. Authentication (may/must)
+ flags (e.g. for NTLMv2) may be combined with
+ the signing flags. Specifying two different password
+ hashing mechanisms (as "must use") on the other hand
+ does not make much sense. Default flags are::
+
+ 0x00C5
+
+ (NTLMv2 and packet signing allowed). Some SecurityFlags
+ may require enabling a corresponding menuconfig option.
+
+ may use packet signing 0x00001
+ must use packet signing 0x01001
+ may use NTLMv2 0x00004
+ must use NTLMv2 0x04004
+ may use Kerberos security (krb5) 0x00008
+ must use Kerberos 0x08008
+ may use NTLMSSP 0x00080
+ must use NTLMSSP 0x80080
+ seal (packet encryption) 0x00040
+ must seal 0x40040
+
+cifsFYI If set to non-zero value, additional debug information
+ will be logged to the system error log. This field
+ contains three flags controlling different classes of
+ debugging entries. The maximum value it can be set
+ to is 7 which enables all debugging points (default 0).
+ Some debugging statements are not compiled into the
+ cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
+ kernel configuration. cifsFYI may be set to one or
+ more of the following flags (7 sets them all)::
+
+ +-----------------------------------------------+------+
+ | log cifs informational messages | 0x01 |
+ +-----------------------------------------------+------+
+ | log return codes from cifs entry points | 0x02 |
+ +-----------------------------------------------+------+
+ | log slow responses | 0x04 |
+ | (ie which take longer than 1 second) | |
+ | | |
+ | CONFIG_CIFS_STATS2 must be enabled in .config | |
+ +-----------------------------------------------+------+
+
+traceSMB If set to one, debug information is logged to the
+ system error log with the start of smb requests
+ and responses (default 0)
+LookupCacheEnable If set to one, inode information is kept cached
+ for one second improving performance of lookups
+ (default 1)
+LinuxExtensionsEnabled If set to one then the client will attempt to
+ use the CIFS "UNIX" extensions which are optional
+ protocol enhancements that allow CIFS servers
+ to return accurate UID/GID information as well
+ as support symbolic links. If you use servers
+ such as Samba that support the CIFS Unix
+ extensions but do not want to use symbolic link
+ support and want to map the uid and gid fields
+ to values supplied at mount (rather than the
+ actual values, then set this to zero. (default 1)
+dfscache List the content of the DFS cache.
+ If set to 0, the client will clear the cache.
+======================= =======================================================
+
+These experimental features and tracing can be enabled by changing flags in
+/proc/fs/cifs (after the cifs module has been installed or built into the
+kernel, e.g. insmod cifs). To enable a feature set it to 1 e.g. to enable
+tracing to the kernel message log type::
+
+ echo 7 > /proc/fs/cifs/cifsFYI
+
+cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
+logging of various informational messages. 2 enables logging of non-zero
+SMB return codes while 4 enables logging of requests that take longer
+than one second to complete (except for byte range lock requests).
+Setting it to 4 requires CONFIG_CIFS_STATS2 to be set in kernel configuration
+(.config). Setting it to seven enables all three. Finally, tracing
+the start of smb requests and responses can be enabled via::
+
+ echo 1 > /proc/fs/cifs/traceSMB
+
+Per share (per client mount) statistics are available in /proc/fs/cifs/Stats.
+Additional information is available if CONFIG_CIFS_STATS2 is enabled in the
+kernel configuration (.config). The statistics returned include counters which
+represent the number of attempted and failed (ie non-zero return code from the
+server) SMB3 (or cifs) requests grouped by request type (read, write, close etc.).
+Also recorded is the total bytes read and bytes written to the server for
+that share. Note that due to client caching effects this can be less than the
+number of bytes read and written by the application running on the client.
+Statistics can be reset to zero by ``echo 0 > /proc/fs/cifs/Stats`` which may be
+useful if comparing performance of two different scenarios.
+
+Also note that ``cat /proc/fs/cifs/DebugData`` will display information about
+the active sessions and the shares that are mounted.
+
+Enabling Kerberos (extended security) works but requires version 1.2 or later
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
+project(https://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+
+DFS support allows transparent redirection to shares in an MS-DFS name space.
+In addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf. Samba, Windows servers and
+many NAS appliances support DFS as a way of constructing a global name
+space to ease network configuration and improve reliability.
+
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file::
+
+ create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+ create dns_resolver * * /usr/local/sbin/cifs.upcall %k
+
+CIFS kernel module parameters
+=============================
+These module parameters can be specified or modified either during the time of
+module loading or during the runtime by using the interface::
+
+ /sys/module/cifs/parameters/<param>
+
+i.e.::
+
+ echo "value" > /sys/module/cifs/parameters/<param>
+
+More detailed descriptions of the available module parameters and their values
+can be seen by doing:
+
+ modinfo cifs (or modinfo smb3)
+
+================= ==========================================================
+1. enable_oplocks Enable or disable oplocks. Oplocks are enabled by default.
+ [Y/y/1]. To disable use any of [N/n/0].
+================= ==========================================================
diff --git a/Documentation/filesystems/cifs/winucase_convert.pl b/Documentation/admin-guide/cifs/winucase_convert.pl
index 322a9c833f23..993186beea20 100755
--- a/Documentation/filesystems/cifs/winucase_convert.pl
+++ b/Documentation/admin-guide/cifs/winucase_convert.pl
@@ -16,7 +16,7 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
while(<>) {
diff --git a/Documentation/admin-guide/clearing-warn-once.rst b/Documentation/admin-guide/clearing-warn-once.rst
new file mode 100644
index 000000000000..211fd926cf00
--- /dev/null
+++ b/Documentation/admin-guide/clearing-warn-once.rst
@@ -0,0 +1,9 @@
+Clearing WARN_ONCE
+------------------
+
+WARN_ONCE / WARN_ON_ONCE / printk_once only emit a message once.
+
+echo 1 > /sys/kernel/debug/clear_warn_once
+
+clears the state and allows the warnings to print once again.
+This can be useful after test suite runs to reproduce problems.
diff --git a/Documentation/admin-guide/conf.py b/Documentation/admin-guide/conf.py
deleted file mode 100644
index 86f738953799..000000000000
--- a/Documentation/admin-guide/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel User Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'linux-user.tex', 'Linux Kernel User Documentation',
- 'The kernel development community', 'manual'),
-]
diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst
new file mode 100644
index 000000000000..21a984337080
--- /dev/null
+++ b/Documentation/admin-guide/cpu-load.rst
@@ -0,0 +1,117 @@
+========
+CPU load
+========
+
+Linux exports various bits of information via ``/proc/stat`` and
+``/proc/uptime`` that userland tools, such as top(1), use to calculate
+the average time system spent in a particular state, for example::
+
+ $ iostat
+ Linux 2.6.18.3-exp (linmac) 02/20/2007
+
+ avg-cpu: %user %nice %system %iowait %steal %idle
+ 10.01 0.00 2.92 5.44 0.00 81.63
+
+ ...
+
+Here the system thinks that over the default sampling period the
+system spent 10.01% of the time doing work in user space, 2.92% in the
+kernel, and was overall 81.63% of the time idle.
+
+In most cases the ``/proc/stat`` information reflects the reality quite
+closely, however due to the nature of how/when the kernel collects
+this data sometimes it can not be trusted at all.
+
+So how is this information collected? Whenever timer interrupt is
+signalled the kernel looks what kind of task was running at this
+moment and increments the counter that corresponds to this tasks
+kind/state. The problem with this is that the system could have
+switched between various states multiple times between two timer
+interrupts yet the counter is incremented only for the last state.
+
+
+Example
+-------
+
+If we imagine the system with one task that periodically burns cycles
+in the following manner::
+
+ time line between two timer interrupts
+ |--------------------------------------|
+ ^ ^
+ |_ something begins working |
+ |_ something goes to sleep
+ (only to be awaken quite soon)
+
+In the above situation the system will be 0% loaded according to the
+``/proc/stat`` (since the timer interrupt will always happen when the
+system is executing the idle handler), but in reality the load is
+closer to 99%.
+
+One can imagine many more situations where this behavior of the kernel
+will lead to quite erratic information inside ``/proc/stat``::
+
+
+ /* gcc -o hog smallhog.c */
+ #include <time.h>
+ #include <limits.h>
+ #include <signal.h>
+ #include <sys/time.h>
+ #define HIST 10
+
+ static volatile sig_atomic_t stop;
+
+ static void sighandler(int signr)
+ {
+ (void) signr;
+ stop = 1;
+ }
+
+ static unsigned long hog (unsigned long niters)
+ {
+ stop = 0;
+ while (!stop && --niters);
+ return niters;
+ }
+
+ int main (void)
+ {
+ int i;
+ struct itimerval it = {
+ .it_interval = { .tv_sec = 0, .tv_usec = 1 },
+ .it_value = { .tv_sec = 0, .tv_usec = 1 } };
+ sigset_t set;
+ unsigned long v[HIST];
+ double tmp = 0.0;
+ unsigned long n;
+ signal(SIGALRM, &sighandler);
+ setitimer(ITIMER_REAL, &it, NULL);
+
+ hog (ULONG_MAX);
+ for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog(ULONG_MAX);
+ for (i = 0; i < HIST; ++i) tmp += v[i];
+ tmp /= HIST;
+ n = tmp - (tmp / 3.0);
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGALRM);
+
+ for (;;) {
+ hog(n);
+ sigwait(&set, &i);
+ }
+ return 0;
+ }
+
+
+References
+----------
+
+- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org
+- Documentation/filesystems/proc.rst (1.8)
+
+
+Thanks
+------
+
+Con Kolivas, Pavel Machek
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
new file mode 100644
index 000000000000..d29cacc9b3c3
--- /dev/null
+++ b/Documentation/admin-guide/cputopology.rst
@@ -0,0 +1,101 @@
+===========================================
+How CPU topology info is exported via sysfs
+===========================================
+
+CPU topology info is exported via sysfs. Items (attributes) are similar
+to /proc/cpuinfo output of some architectures. They reside in
+/sys/devices/system/cpu/cpuX/topology/. Please refer to the ABI file:
+Documentation/ABI/stable/sysfs-devices-system-cpu.
+
+Architecture-neutral, drivers/base/topology.c, exports these attributes.
+However the die, cluster, book, and drawer hierarchy related sysfs files will
+only be created if an architecture provides the related macros as described
+below.
+
+For an architecture to support this feature, it must define some of
+these macros in include/asm-XXX/topology.h::
+
+ #define topology_physical_package_id(cpu)
+ #define topology_die_id(cpu)
+ #define topology_cluster_id(cpu)
+ #define topology_core_id(cpu)
+ #define topology_book_id(cpu)
+ #define topology_drawer_id(cpu)
+ #define topology_sibling_cpumask(cpu)
+ #define topology_core_cpumask(cpu)
+ #define topology_cluster_cpumask(cpu)
+ #define topology_die_cpumask(cpu)
+ #define topology_book_cpumask(cpu)
+ #define topology_drawer_cpumask(cpu)
+
+The type of ``**_id macros`` is int.
+The type of ``**_cpumask macros`` is ``(const) struct cpumask *``. The latter
+correspond with appropriate ``**_siblings`` sysfs attributes (except for
+topology_sibling_cpumask() which corresponds with thread_siblings).
+
+To be consistent on all architectures, include/linux/topology.h
+provides default definitions for any of the above macros that are
+not defined by include/asm-XXX/topology.h:
+
+1) topology_physical_package_id: -1
+2) topology_die_id: -1
+3) topology_cluster_id: -1
+4) topology_core_id: 0
+5) topology_book_id: -1
+6) topology_drawer_id: -1
+7) topology_sibling_cpumask: just the given CPU
+8) topology_core_cpumask: just the given CPU
+9) topology_cluster_cpumask: just the given CPU
+10) topology_die_cpumask: just the given CPU
+11) topology_book_cpumask: just the given CPU
+12) topology_drawer_cpumask: just the given CPU
+
+Additionally, CPU topology information is provided under
+/sys/devices/system/cpu and includes these files. The internal
+source for the output is in brackets ("[]").
+
+ =========== ==========================================================
+ kernel_max: the maximum CPU index allowed by the kernel configuration.
+ [NR_CPUS-1]
+
+ offline: CPUs that are not online because they have been
+ HOTPLUGGED off or exceed the limit of CPUs allowed by the
+ kernel configuration (kernel_max above).
+ [~cpu_online_mask + cpus >= NR_CPUS]
+
+ online: CPUs that are online and being scheduled [cpu_online_mask]
+
+ possible: CPUs that have been allocated resources and can be
+ brought online if they are present. [cpu_possible_mask]
+
+ present: CPUs that have been identified as being present in the
+ system. [cpu_present_mask]
+ =========== ==========================================================
+
+The format for the above output is compatible with cpulist_parse()
+[see <linux/cpumask.h>]. Some examples follow.
+
+In this example, there are 64 CPUs in the system but cpus 32-63 exceed
+the kernel max which is limited to 0..31 by the NR_CPUS config option
+being 32. Note also that CPUs 2 and 4-31 are not online but could be
+brought online as they are both present and possible::
+
+ kernel_max: 31
+ offline: 2,4-31,32-63
+ online: 0-1,3
+ possible: 0-31
+ present: 0-31
+
+In this example, the NR_CPUS config option is 128, but the kernel was
+started with possible_cpus=144. There are 4 CPUs in the system and cpu2
+was manually taken offline (and is the only CPU that can be brought
+online.)::
+
+ kernel_max: 127
+ offline: 2,4-127,128-143
+ online: 0-1,3
+ possible: 0-127
+ present: 0-3
+
+See Documentation/core-api/cpu_hotplug.rst for the possible_cpus=NUM
+kernel start parameter as well as more information on the various cpumasks.
diff --git a/Documentation/admin-guide/dell_rbu.rst b/Documentation/admin-guide/dell_rbu.rst
new file mode 100644
index 000000000000..2196caf1b939
--- /dev/null
+++ b/Documentation/admin-guide/dell_rbu.rst
@@ -0,0 +1,128 @@
+=========================================
+Dell Remote BIOS Update driver (dell_rbu)
+=========================================
+
+Purpose
+=======
+
+Document demonstrating the use of the Dell Remote BIOS Update driver
+for updating BIOS images on Dell servers and desktops.
+
+Scope
+=====
+
+This document discusses the functionality of the rbu driver only.
+It does not cover the support needed from applications to enable the BIOS to
+update itself with the image downloaded in to the memory.
+
+Overview
+========
+
+This driver works with Dell OpenManage or Dell Update Packages for updating
+the BIOS on Dell servers (starting from servers sold since 1999), desktops
+and notebooks (starting from those sold in 2005).
+
+Please go to http://support.dell.com register and you can find info on
+OpenManage and Dell Update packages (DUP).
+
+Libsmbios can also be used to update BIOS on Dell systems go to
+https://linux.dell.com/libsmbios/ for details.
+
+Dell_RBU driver supports BIOS update using the monolithic image and packetized
+image methods. In case of monolithic the driver allocates a contiguous chunk
+of physical pages having the BIOS image. In case of packetized the app
+using the driver breaks the image in to packets of fixed sizes and the driver
+would place each packet in contiguous physical memory. The driver also
+maintains a link list of packets for reading them back.
+
+If the dell_rbu driver is unloaded all the allocated memory is freed.
+
+The rbu driver needs to have an application (as mentioned above) which will
+inform the BIOS to enable the update in the next system reboot.
+
+The user should not unload the rbu driver after downloading the BIOS image
+or updating.
+
+The driver load creates the following directories under the /sys file system::
+
+ /sys/class/firmware/dell_rbu/loading
+ /sys/class/firmware/dell_rbu/data
+ /sys/devices/platform/dell_rbu/image_type
+ /sys/devices/platform/dell_rbu/data
+ /sys/devices/platform/dell_rbu/packet_size
+
+The driver supports two types of update mechanism; monolithic and packetized.
+These update mechanism depends upon the BIOS currently running on the system.
+Most of the Dell systems support a monolithic update where the BIOS image is
+copied to a single contiguous block of physical memory.
+
+In case of packet mechanism the single memory can be broken in smaller chunks
+of contiguous memory and the BIOS image is scattered in these packets.
+
+By default the driver uses monolithic memory for the update type. This can be
+changed to packets during the driver load time by specifying the load
+parameter image_type=packet. This can also be changed later as below::
+
+ echo packet > /sys/devices/platform/dell_rbu/image_type
+
+In packet update mode the packet size has to be given before any packets can
+be downloaded. It is done as below::
+
+ echo XXXX > /sys/devices/platform/dell_rbu/packet_size
+
+In the packet update mechanism, the user needs to create a new file having
+packets of data arranged back to back. It can be done as follows:
+The user creates packets header, gets the chunk of the BIOS image and
+places it next to the packetheader; now, the packetheader + BIOS image chunk
+added together should match the specified packet_size. This makes one
+packet, the user needs to create more such packets out of the entire BIOS
+image file and then arrange all these packets back to back in to one single
+file.
+
+This file is then copied to /sys/class/firmware/dell_rbu/data.
+Once this file gets to the driver, the driver extracts packet_size data from
+the file and spreads it across the physical memory in contiguous packet_sized
+space.
+
+This method makes sure that all the packets get to the driver in a single operation.
+
+In monolithic update the user simply get the BIOS image (.hdr file) and copies
+to the data file as is without any change to the BIOS image itself.
+
+Do the steps below to download the BIOS image.
+
+1) echo 1 > /sys/class/firmware/dell_rbu/loading
+2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
+3) echo 0 > /sys/class/firmware/dell_rbu/loading
+
+The /sys/class/firmware/dell_rbu/ entries will remain till the following is
+done.
+
+::
+
+ echo -1 > /sys/class/firmware/dell_rbu/loading
+
+Until this step is completed the driver cannot be unloaded.
+
+Also echoing either mono, packet or init in to image_type will free up the
+memory allocated by the driver.
+
+If a user by accident executes steps 1 and 3 above without executing step 2;
+it will make the /sys/class/firmware/dell_rbu/ entries disappear.
+
+The entries can be recreated by doing the following::
+
+ echo init > /sys/devices/platform/dell_rbu/image_type
+
+.. note:: echoing init in image_type does not change its original value.
+
+Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
+read back the image downloaded.
+
+.. note::
+
+ After updating the BIOS image a user mode application needs to execute
+ code which sends the BIOS update request to the BIOS. So on the next reboot
+ the BIOS knows about the new image downloaded and it updates itself.
+ Also don't unload the rbu driver if the image has to be updated.
+
diff --git a/Documentation/admin-guide/device-mapper/cache-policies.rst b/Documentation/admin-guide/device-mapper/cache-policies.rst
new file mode 100644
index 000000000000..13da4d831d46
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/cache-policies.rst
@@ -0,0 +1,131 @@
+=============================
+Guidance for writing policies
+=============================
+
+Try to keep transactionality out of it. The core is careful to
+avoid asking about anything that is migrating. This is a pain, but
+makes it easier to write the policies.
+
+Mappings are loaded into the policy at construction time.
+
+Every bio that is mapped by the target is referred to the policy.
+The policy can return a simple HIT or MISS or issue a migration.
+
+Currently there's no way for the policy to issue background work,
+e.g. to start writing back dirty blocks that are going to be evicted
+soon.
+
+Because we map bios, rather than requests it's easy for the policy
+to get fooled by many small bios. For this reason the core target
+issues periodic ticks to the policy. It's suggested that the policy
+doesn't update states (eg, hit counts) for a block more than once
+for each tick. The core ticks by watching bios complete, and so
+trying to see when the io scheduler has let the ios run.
+
+
+Overview of supplied cache replacement policies
+===============================================
+
+multiqueue (mq)
+---------------
+
+This policy is now an alias for smq (see below).
+
+The following tunables are accepted, but have no effect::
+
+ 'sequential_threshold <#nr_sequential_ios>'
+ 'random_threshold <#nr_random_ios>'
+ 'read_promote_adjustment <value>'
+ 'write_promote_adjustment <value>'
+ 'discard_promote_adjustment <value>'
+
+Stochastic multiqueue (smq)
+---------------------------
+
+This policy is the default.
+
+The stochastic multi-queue (smq) policy addresses some of the problems
+with the multiqueue (mq) policy.
+
+The smq policy (vs mq) offers the promise of less memory utilization,
+improved performance and increased adaptability in the face of changing
+workloads. smq also does not have any cumbersome tuning knobs.
+
+Users may switch from "mq" to "smq" simply by appropriately reloading a
+DM table that is using the cache target. Doing so will cause all of the
+mq policy's hints to be dropped. Also, performance of the cache may
+degrade slightly until smq recalculates the origin device's hotspots
+that should be cached.
+
+Memory usage
+^^^^^^^^^^^^
+
+The mq policy used a lot of memory; 88 bytes per cache block on a 64
+bit machine.
+
+smq uses 28bit indexes to implement its data structures rather than
+pointers. It avoids storing an explicit hit count for each block. It
+has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
+the entries (each hotspot block covers a larger area than a single
+cache block).
+
+All this means smq uses ~25bytes per cache block. Still a lot of
+memory, but a substantial improvement nonetheless.
+
+Level balancing
+^^^^^^^^^^^^^^^
+
+mq placed entries in different levels of the multiqueue structures
+based on their hit count (~ln(hit count)). This meant the bottom
+levels generally had the most entries, and the top ones had very
+few. Having unbalanced levels like this reduced the efficacy of the
+multiqueue.
+
+smq does not maintain a hit count, instead it swaps hit entries with
+the least recently used entry from the level above. The overall
+ordering being a side effect of this stochastic process. With this
+scheme we can decide how many entries occupy each multiqueue level,
+resulting in better promotion/demotion decisions.
+
+Adaptability:
+The mq policy maintained a hit count for each cache block. For a
+different block to get promoted to the cache its hit count has to
+exceed the lowest currently in the cache. This meant it could take a
+long time for the cache to adapt between varying IO patterns.
+
+smq doesn't maintain hit counts, so a lot of this problem just goes
+away. In addition it tracks performance of the hotspot queue, which
+is used to decide which blocks to promote. If the hotspot queue is
+performing badly then it starts moving entries more quickly between
+levels. This lets it adapt to new IO patterns very quickly.
+
+Performance
+^^^^^^^^^^^
+
+Testing smq shows substantially better performance than mq.
+
+cleaner
+-------
+
+The cleaner writes back all dirty blocks in a cache to decommission it.
+
+Examples
+========
+
+The syntax for a table is::
+
+ cache <metadata dev> <cache dev> <origin dev> <block size>
+ <#feature_args> [<feature arg>]*
+ <policy> <#policy_args> [<policy arg>]*
+
+The syntax to send a message using the dmsetup command is::
+
+ dmsetup message <mapped device> 0 sequential_threshold 1024
+ dmsetup message <mapped device> 0 random_threshold 8
+
+Using dmsetup::
+
+ dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
+ /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
+ creates a 128GB large mapped device named 'blah' with the
+ sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/admin-guide/device-mapper/cache.rst b/Documentation/admin-guide/device-mapper/cache.rst
new file mode 100644
index 000000000000..f15e5254d05b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/cache.rst
@@ -0,0 +1,337 @@
+=====
+Cache
+=====
+
+Introduction
+============
+
+dm-cache is a device mapper target written by Joe Thornber, Heinz
+Mauelshagen, and Mike Snitzer.
+
+It aims to improve performance of a block device (eg, a spindle) by
+dynamically migrating some of its data to a faster, smaller device
+(eg, an SSD).
+
+This device-mapper solution allows us to insert this caching at
+different levels of the dm stack, for instance above the data device for
+a thin-provisioning pool. Caching solutions that are integrated more
+closely with the virtual memory system should give better performance.
+
+The target reuses the metadata library used in the thin-provisioning
+library.
+
+The decision as to what data to migrate and when is left to a plug-in
+policy module. Several of these have been written as we experiment,
+and we hope other people will contribute others for specific io
+scenarios (eg. a vm image server).
+
+Glossary
+========
+
+ Migration
+ Movement of the primary copy of a logical block from one
+ device to the other.
+ Promotion
+ Migration from slow device to fast device.
+ Demotion
+ Migration from fast device to slow device.
+
+The origin device always contains a copy of the logical block, which
+may be out of date or kept in sync with the copy on the cache device
+(depending on policy).
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with
+other parameters detailed later):
+
+1. An origin device - the big, slow one.
+
+2. A cache device - the small, fast one.
+
+3. A small metadata device - records which blocks are in the cache,
+ which are dirty, and extra hints for use by the policy object.
+ This information could be put on the cache device, but having it
+ separate allows the volume manager to configure it differently,
+ e.g. as a mirror for extra robustness. This metadata device may only
+ be used by a single cache device.
+
+Fixed block size
+----------------
+
+The origin is divided up into blocks of a fixed size. This block size
+is configurable when you first create the cache. Typically we've been
+using block sizes of 256KB - 1024KB. The block size must be between 64
+sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
+
+Having a fixed block size simplifies the target a lot. But it is
+something of a compromise. For instance, a small part of a block may be
+getting hit a lot, yet the whole block will be promoted to the cache.
+So large block sizes are bad because they waste cache space. And small
+block sizes are bad because they increase the amount of metadata (both
+in core and on disk).
+
+Cache operating modes
+---------------------
+
+The cache has three operating modes: writeback, writethrough and
+passthrough.
+
+If writeback, the default, is selected then a write to a block that is
+cached will go only to the cache and the block will be marked dirty in
+the metadata.
+
+If writethrough is selected then a write to a cached block will not
+complete until it has hit both the origin and cache devices. Clean
+blocks should remain clean.
+
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates. To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency. Coherency that exists is maintained, although
+the cache will gradually cool as writes take place. If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm. Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
+A simple cleaner policy is provided, which will clean (write back) all
+dirty blocks in a cache. Useful for decommissioning a cache or when
+shrinking a cache. Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean. If the
+area being removed from the cache still contains dirty blocks the resize
+will fail. Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean. This is of particular
+importance if writeback mode is used. Writethrough and passthrough
+modes already maintain a clean cache. Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.
+
+Migration throttling
+--------------------
+
+Migrating data between the origin and cache device uses bandwidth.
+The user can set a throttle to prevent more than a certain amount of
+migration occurring at any one time. Currently we're not taking any
+account of normal io traffic going to the devices. More work needs
+doing here to avoid migrating during those peak io moments.
+
+For the time being, a message "migration_threshold <#sectors>"
+can be used to set the maximum number of sectors being migrated,
+the default being 2048 sectors (1MB).
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the cache behaves like a physical disk that has a volatile write
+cache. If power is lost you may lose some recent writes. The metadata
+should always be consistent in spite of any crash.
+
+The 'dirty' state for a cache block changes far too frequently for us
+to keep updating it on the fly. So we treat it as a hint. In normal
+operation it will be written when the dm device is suspended. If the
+system crashes all cache blocks will be assumed dirty when restarted.
+
+Per-block policy hints
+----------------------
+
+Policy plug-ins can store a chunk of data per cache block. It's up to
+the policy how big this chunk is, but it should be kept small. Like the
+dirty flags this data is lost if there's a crash so a safe fallback
+value should always be possible.
+
+Policy hints affect performance, not correctness.
+
+Policy messaging
+----------------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these. Device-mapper
+messages are used. Refer to cache-policies.txt.
+
+Discard bitset resolution
+-------------------------
+
+We can avoid copying data during migration if we know the block has
+been discarded. A prime example of this is when mkfs discards the
+whole block device. We store a bitset tracking the discard state of
+blocks. However, we allow this bitset to have a different block size
+from the cache blocks. This is because we need to track the discard
+state for all of the origin device (compare with the dirty bitset
+which is just for the smaller cache device).
+
+Target interface
+================
+
+Constructor
+-----------
+
+ ::
+
+ cache <metadata dev> <cache dev> <origin dev> <block size>
+ <#feature args> [<feature arg>]*
+ <policy> <#policy args> [policy args]*
+
+ ================ =======================================================
+ metadata dev fast device holding the persistent metadata
+ cache dev fast device holding cached data blocks
+ origin dev slow device holding original data blocks
+ block size cache unit size in sectors
+
+ #feature args number of feature arguments passed
+ feature args writethrough or passthrough (The default is writeback.)
+
+ policy the replacement policy to use
+ #policy args an even number of arguments corresponding to
+ key/value pairs passed to the policy
+ policy args key/value pairs passed to the policy
+ E.g. 'sequential_threshold 1024'
+ See cache-policies.txt for details.
+ ================ =======================================================
+
+Optional feature arguments are:
+
+
+ ==================== ========================================================
+ writethrough write through caching that prohibits cache block
+ content from being different from origin block content.
+ Without this argument, the default behaviour is to write
+ back cache block contents later for performance reasons,
+ so they may differ from the corresponding origin blocks.
+
+ passthrough a degraded mode useful for various cache coherency
+ situations (e.g., rolling back snapshots of
+ underlying storage). Reads and writes always go to
+ the origin. If a write goes to a cached origin
+ block, then the cache block is invalidated.
+ To enable passthrough mode the cache must be clean.
+
+ metadata2 use version 2 of the metadata. This stores the dirty
+ bits in a separate btree, which improves speed of
+ shutting down the cache.
+
+ no_discard_passdown disable passing down discards from the cache
+ to the origin's data device.
+ ==================== ========================================================
+
+A policy called 'default' is always registered. This is an alias for
+the policy we currently think is giving best all round performance.
+
+As the default policy could vary between kernels, if you are relying on
+the characteristics of a specific policy, always request it by name.
+
+Status
+------
+
+::
+
+ <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+ <cache block size> <#used cache blocks>/<#total cache blocks>
+ <#read hits> <#read misses> <#write hits> <#write misses>
+ <#demotions> <#promotions> <#dirty> <#features> <features>*
+ <#core args> <core args>* <policy name> <#policy args> <policy args>*
+ <cache metadata mode>
+
+
+========================= =====================================================
+metadata block size Fixed block size for each metadata block in
+ sectors
+#used metadata blocks Number of metadata blocks used
+#total metadata blocks Total number of metadata blocks
+cache block size Configurable block size for the cache device
+ in sectors
+#used cache blocks Number of blocks resident in the cache
+#total cache blocks Total number of cache blocks
+#read hits Number of times a READ bio has been mapped
+ to the cache
+#read misses Number of times a READ bio has been mapped
+ to the origin
+#write hits Number of times a WRITE bio has been mapped
+ to the cache
+#write misses Number of times a WRITE bio has been
+ mapped to the origin
+#demotions Number of times a block has been removed
+ from the cache
+#promotions Number of times a block has been moved to
+ the cache
+#dirty Number of blocks in the cache that differ
+ from the origin
+#feature args Number of feature args to follow
+feature args 'writethrough' (optional)
+#core args Number of core arguments (must be even)
+core args Key/value pairs for tuning the core
+ e.g. migration_threshold
+policy name Name of the policy
+#policy args Number of policy arguments to follow (must be even)
+policy args Key/value pairs e.g. sequential_threshold
+cache metadata mode ro if read-only, rw if read-write
+
+ In serious cases where even a read-only mode is
+ deemed unsafe no further I/O will be permitted and
+ the status will just contain the string 'Fail'.
+ The userspace recovery tools should then be used.
+needs_check 'needs_check' if set, '-' if not set
+ A metadata operation has failed, resulting in the
+ needs_check flag being set in the metadata's
+ superblock. The metadata device must be
+ deactivated and checked/repaired before the
+ cache can be made fully operational again.
+ '-' indicates needs_check is not set.
+========================= =====================================================
+
+Messages
+--------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these. Device-mapper
+messages are used. (A sysfs interface would also be possible.)
+
+The message format is::
+
+ <key> <value>
+
+E.g.::
+
+ dmsetup message my_cache 0 sequential_threshold 1024
+
+
+Invalidation is removing an entry from the cache without writing it
+back. Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges. Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9. Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexadecimal may be needed to better support efficient
+invalidation of larger caches. The cache must be in passthrough mode
+when invalidate_cblocks is used::
+
+ invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.::
+
+ dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
+Examples
+========
+
+The test suite can be found here:
+
+https://github.com/jthornber/device-mapper-test-suite
+
+::
+
+ dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+ /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
+ dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+ /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
+ mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst
new file mode 100644
index 000000000000..a1e673c0e782
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/delay.rst
@@ -0,0 +1,54 @@
+========
+dm-delay
+========
+
+Device-Mapper's "delay" target delays reads and/or writes
+and/or flushes and optionally maps them to different devices.
+
+Arguments::
+
+ <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
+ [<flush_device> <flush_offset> <flush_delay>]]
+
+Table line has to either have 3, 6 or 9 arguments:
+
+3: apply offset and delay to read, write and flush operations on device
+
+6: apply offset and delay to device, also apply write_offset and write_delay
+ to write and flush operations on optionally different write_device with
+ optionally different sector offset
+
+9: same as 6 arguments plus define flush_offset and flush_delay explicitly
+ on/with optionally different flush_device/flush_offset.
+
+Offsets are specified in sectors.
+
+Delays are specified in milliseconds.
+
+
+Example scripts
+===============
+
+::
+ #!/bin/sh
+ #
+ # Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
+
+::
+ #!/bin/sh
+ #
+ # Create mapped device delaying write and flush operations for 400ms and
+ # splitting reads to device $1 but writes and flushes to different device $2
+ # to different offsets of 2048 and 4096 sectors respectively.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
+
+::
+ #!/bin/sh
+ #
+ # Create mapped device delaying reads for 50ms, writes for 100ms and flushes for 333ms
+ # onto the same backing device at offset 0 sectors.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"
diff --git a/Documentation/admin-guide/device-mapper/dm-clone.rst b/Documentation/admin-guide/device-mapper/dm-clone.rst
new file mode 100644
index 000000000000..b43a34c1430a
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-clone.rst
@@ -0,0 +1,333 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+========
+dm-clone
+========
+
+Introduction
+============
+
+dm-clone is a device mapper target which produces a one-to-one copy of an
+existing, read-only source device into a writable destination device: It
+presents a virtual block device which makes all data appear immediately, and
+redirects reads and writes accordingly.
+
+The main use case of dm-clone is to clone a potentially remote, high-latency,
+read-only, archival-type block device into a writable, fast, primary-type device
+for fast, low-latency I/O. The cloned device is visible/mountable immediately
+and the copy of the source device to the destination device happens in the
+background, in parallel with user I/O.
+
+For example, one could restore an application backup from a read-only copy,
+accessible through a network storage protocol (NBD, Fibre Channel, iSCSI, AoE,
+etc.), into a local SSD or NVMe device, and start using the device immediately,
+without waiting for the restore to complete.
+
+When the cloning completes, the dm-clone table can be removed altogether and be
+replaced, e.g., by a linear table, mapping directly to the destination device.
+
+The dm-clone target reuses the metadata library used by the thin-provisioning
+target.
+
+Glossary
+========
+
+ Hydration
+ The process of filling a region of the destination device with data from
+ the same region of the source device, i.e., copying the region from the
+ source to the destination device.
+
+Once a region gets hydrated we redirect all I/O regarding it to the destination
+device.
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with other
+parameters detailed later):
+
+1. A source device - the read-only device that gets cloned and source of the
+ hydration.
+
+2. A destination device - the destination of the hydration, which will become a
+ clone of the source device.
+
+3. A small metadata device - it records which regions are already valid in the
+ destination device, i.e., which regions have already been hydrated, or have
+ been written to directly, via user I/O.
+
+The size of the destination device must be at least equal to the size of the
+source device.
+
+Regions
+-------
+
+dm-clone divides the source and destination devices in fixed sized regions.
+Regions are the unit of hydration, i.e., the minimum amount of data copied from
+the source to the destination device.
+
+The region size is configurable when you first create the dm-clone device. The
+recommended region size is the same as the file system block size, which usually
+is 4KB. The region size must be between 8 sectors (4KB) and 2097152 sectors
+(1GB) and a power of two.
+
+Reads and writes from/to hydrated regions are serviced from the destination
+device.
+
+A read to a not yet hydrated region is serviced directly from the source device.
+
+A write to a not yet hydrated region will be delayed until the corresponding
+region has been hydrated and the hydration of the region starts immediately.
+
+Note that a write request with size equal to region size will skip copying of
+the corresponding region from the source device and overwrite the region of the
+destination device directly.
+
+Discards
+--------
+
+dm-clone interprets a discard request to a range that hasn't been hydrated yet
+as a hint to skip hydration of the regions covered by the request, i.e., it
+skips copying the region's data from the source to the destination device, and
+only updates its metadata.
+
+If the destination device supports discards, then by default dm-clone will pass
+down discard requests to it.
+
+Background Hydration
+--------------------
+
+dm-clone copies continuously from the source to the destination device, until
+all of the device has been copied.
+
+Copying data from the source to the destination device uses bandwidth. The user
+can set a throttle to prevent more than a certain amount of copying occurring at
+any one time. Moreover, dm-clone takes into account user I/O traffic going to
+the devices and pauses the background hydration when there is I/O in-flight.
+
+A message `hydration_threshold <#regions>` can be used to set the maximum number
+of regions being copied, the default being 1 region.
+
+dm-clone employs dm-kcopyd for copying portions of the source device to the
+destination device. By default, we issue copy requests of size equal to the
+region size. A message `hydration_batch_size <#regions>` can be used to tune the
+size of these copy requests. Increasing the hydration batch size results in
+dm-clone trying to batch together contiguous regions, so we copy the data in
+batches of this many regions.
+
+When the hydration of the destination device finishes, a dm event will be sent
+to user space.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written. If no
+such requests are made then commits will occur every second. This means the
+dm-clone device behaves like a physical disk that has a volatile write cache. If
+power is lost you may lose some recent writes. The metadata should always be
+consistent in spite of any crash.
+
+Target Interface
+================
+
+Constructor
+-----------
+
+ ::
+
+ clone <metadata dev> <destination dev> <source dev> <region size>
+ [<#feature args> [<feature arg>]* [<#core args> [<core arg>]*]]
+
+ ================ ==============================================================
+ metadata dev Fast device holding the persistent metadata
+ destination dev The destination device, where the source will be cloned
+ source dev Read only device containing the data that gets cloned
+ region size The size of a region in sectors
+
+ #feature args Number of feature arguments passed
+ feature args no_hydration or no_discard_passdown
+
+ #core args An even number of arguments corresponding to key/value pairs
+ passed to dm-clone
+ core args Key/value pairs passed to dm-clone, e.g. `hydration_threshold
+ 256`
+ ================ ==============================================================
+
+Optional feature arguments are:
+
+ ==================== =========================================================
+ no_hydration Create a dm-clone instance with background hydration
+ disabled
+ no_discard_passdown Disable passing down discards to the destination device
+ ==================== =========================================================
+
+Optional core arguments are:
+
+ ================================ ==============================================
+ hydration_threshold <#regions> Maximum number of regions being copied from
+ the source to the destination device at any
+ one time, during background hydration.
+ hydration_batch_size <#regions> During background hydration, try to batch
+ together contiguous regions, so we copy data
+ from the source to the destination device in
+ batches of this many regions.
+ ================================ ==============================================
+
+Status
+------
+
+ ::
+
+ <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+ <region size> <#hydrated regions>/<#total regions> <#hydrating regions>
+ <#feature args> <feature args>* <#core args> <core args>*
+ <clone metadata mode>
+
+ ======================= =======================================================
+ metadata block size Fixed block size for each metadata block in sectors
+ #used metadata blocks Number of metadata blocks used
+ #total metadata blocks Total number of metadata blocks
+ region size Configurable region size for the device in sectors
+ #hydrated regions Number of regions that have finished hydrating
+ #total regions Total number of regions to hydrate
+ #hydrating regions Number of regions currently hydrating
+ #feature args Number of feature arguments to follow
+ feature args Feature arguments, e.g. `no_hydration`
+ #core args Even number of core arguments to follow
+ core args Key/value pairs for tuning the core, e.g.
+ `hydration_threshold 256`
+ clone metadata mode ro if read-only, rw if read-write
+
+ In serious cases where even a read-only mode is deemed
+ unsafe no further I/O will be permitted and the status
+ will just contain the string 'Fail'. If the metadata
+ mode changes, a dm event will be sent to user space.
+ ======================= =======================================================
+
+Messages
+--------
+
+ `disable_hydration`
+ Disable the background hydration of the destination device.
+
+ `enable_hydration`
+ Enable the background hydration of the destination device.
+
+ `hydration_threshold <#regions>`
+ Set background hydration threshold.
+
+ `hydration_batch_size <#regions>`
+ Set background hydration batch size.
+
+Examples
+========
+
+Clone a device containing a file system
+---------------------------------------
+
+1. Create the dm-clone device.
+
+ ::
+
+ dmsetup create clone --table "0 1048576000 clone $metadata_dev $dest_dev \
+ $source_dev 8 1 no_hydration"
+
+2. Mount the device and trim the file system. dm-clone interprets the discards
+ sent by the file system and it will not hydrate the unused space.
+
+ ::
+
+ mount /dev/mapper/clone /mnt/cloned-fs
+ fstrim /mnt/cloned-fs
+
+3. Enable background hydration of the destination device.
+
+ ::
+
+ dmsetup message clone 0 enable_hydration
+
+4. When the hydration finishes, we can replace the dm-clone table with a linear
+ table.
+
+ ::
+
+ dmsetup suspend clone
+ dmsetup load clone --table "0 1048576000 linear $dest_dev 0"
+ dmsetup resume clone
+
+ The metadata device is no longer needed and can be safely discarded or reused
+ for other purposes.
+
+Known issues
+============
+
+1. We redirect reads, to not-yet-hydrated regions, to the source device. If
+ reading the source device has high latency and the user repeatedly reads from
+ the same regions, this behaviour could degrade performance. We should use
+ these reads as hints to hydrate the relevant regions sooner. Currently, we
+ rely on the page cache to cache these regions, so we hopefully don't end up
+ reading them multiple times from the source device.
+
+2. Release in-core resources, i.e., the bitmaps tracking which regions are
+ hydrated, after the hydration has finished.
+
+3. During background hydration, if we fail to read the source or write to the
+ destination device, we print an error message, but the hydration process
+ continues indefinitely, until it succeeds. We should stop the background
+ hydration after a number of failures and emit a dm event for user space to
+ notice.
+
+Why not...?
+===========
+
+We explored the following alternatives before implementing dm-clone:
+
+1. Use dm-cache with cache size equal to the source device and implement a new
+ cloning policy:
+
+ * The resulting cache device is not a one-to-one mirror of the source device
+ and thus we cannot remove the cache device once cloning completes.
+
+ * dm-cache writes to the source device, which violates our requirement that
+ the source device must be treated as read-only.
+
+ * Caching is semantically different from cloning.
+
+2. Use dm-snapshot with a COW device equal to the source device:
+
+ * dm-snapshot stores its metadata in the COW device, so the resulting device
+ is not a one-to-one mirror of the source device.
+
+ * No background copying mechanism.
+
+ * dm-snapshot needs to commit its metadata whenever a pending exception
+ completes, to ensure snapshot consistency. In the case of cloning, we don't
+ need to be so strict and can rely on committing metadata every time a FLUSH
+ or FUA bio is written, or periodically, like dm-thin and dm-cache do. This
+ improves the performance significantly.
+
+3. Use dm-mirror: The mirror target has a background copying/mirroring
+ mechanism, but it writes to all mirrors, thus violating our requirement that
+ the source device must be treated as read-only.
+
+4. Use dm-thin's external snapshot functionality. This approach is the most
+ promising among all alternatives, as the thinly-provisioned volume is a
+ one-to-one mirror of the source device and handles reads and writes to
+ un-provisioned/not-yet-cloned areas the same way as dm-clone does.
+
+ Still:
+
+ * There is no background copying mechanism, though one could be implemented.
+
+ * Most importantly, we want to support arbitrary block devices as the
+ destination of the cloning process and not restrict ourselves to
+ thinly-provisioned volumes. Thin-provisioning has an inherent metadata
+ overhead, for maintaining the thin volume mappings, which significantly
+ degrades performance.
+
+ Moreover, cloning a device shouldn't force the use of thin-provisioning. On
+ the other hand, if we wish to use thin provisioning, we can just use a thin
+ LV as dm-clone's destination device.
diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
new file mode 100644
index 000000000000..4467f6d4b632
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -0,0 +1,212 @@
+========
+dm-crypt
+========
+
+Device-Mapper's "crypt" target provides transparent encryption of block devices
+using the kernel crypto API.
+
+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
+Parameters::
+
+ <cipher> <key> <iv_offset> <device path> \
+ <offset> [<#opt_params> <opt_params>]
+
+<cipher>
+ Encryption cipher, encryption mode and Initial Vector (IV) generator.
+
+ The cipher specifications format is::
+
+ cipher[:keycount]-chainmode-ivmode[:ivopts]
+
+ Examples::
+
+ aes-cbc-essiv:sha256
+ aes-xts-plain64
+ serpent-xts-plain64
+
+ Cipher format also supports direct specification with kernel crypt API
+ format (selected by capi: prefix). The IV specification is the same
+ as for the first format type.
+ This format is mainly used for specification of authenticated modes.
+
+ The crypto API cipher specifications format is::
+
+ capi:cipher_api_spec-ivmode[:ivopts]
+
+ Examples::
+
+ capi:cbc(aes)-essiv:sha256
+ capi:xts(aes)-plain64
+
+ Examples of authenticated modes::
+
+ capi:gcm(aes)-random
+ capi:authenc(hmac(sha256),xts(aes))-random
+ capi:rfc7539(chacha20,poly1305)-random
+
+ The /proc/crypto contains a list of currently loaded crypto modes.
+
+<key>
+ Key used for encryption. It is encoded either as a hexadecimal number
+ or it can be passed as <key_string> prefixed with single colon
+ character (':') for keys residing in kernel keyring service.
+ You can only use key sizes that are valid for the selected cipher
+ in combination with the selected iv mode.
+ Note that for some iv modes the key string can contain additional
+ keys (for example IV seed) so the key contains more parts concatenated
+ into a single string.
+
+<key_string>
+ The kernel keyring key is identified by string in following format:
+ <key_size>:<key_type>:<key_description>.
+
+<key_size>
+ The encryption key size in bytes. The kernel key payload size must match
+ the value passed in <key_size>.
+
+<key_type>
+ Either 'logon', 'user', 'encrypted' or 'trusted' kernel key type.
+
+<key_description>
+ The kernel keyring key description crypt target should look for
+ when loading key of <key_type>.
+
+<keycount>
+ Multi-key compatibility mode. You can define <keycount> keys and
+ then sectors are encrypted according to their offsets (sector 0 uses key0;
+ sector 1 uses key1 etc.). <keycount> must be a power of two.
+
+<iv_offset>
+ The IV offset is a sector count that is added to the sector number
+ before creating the IV.
+
+<device path>
+ This is the device that is going to be used as backend and contains the
+ encrypted data. You can specify it as a path like /dev/xxx or a device
+ number <major>:<minor>.
+
+<offset>
+ Starting sector within the device where the encrypted data begins.
+
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional parameters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
+
+allow_discards
+ Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+ The default is to ignore discard requests.
+
+ WARNING: Assess the specific security risks carefully before enabling this
+ option. For example, allowing discards on encrypted devices may lead to
+ the leak of information about the ciphertext device (filesystem type,
+ used space etc.) if the discarded blocks can be located easily on the
+ device later.
+
+same_cpu_crypt
+ Perform encryption using the same cpu that IO was submitted on.
+ The default is to use an unbound workqueue so that encryption work
+ is automatically balanced between available CPUs.
+
+high_priority
+ Set dm-crypt workqueues and the writer thread to high priority. This
+ improves throughput and latency of dm-crypt while degrading general
+ responsiveness of the system.
+
+submit_from_crypt_cpus
+ Disable offloading writes to a separate thread after encryption.
+ There are some situations where offloading write bios from the
+ encryption threads to a single thread degrades performance
+ significantly. The default is to offload write bios to the same
+ thread because it benefits CFQ to have writes submitted using the
+ same context.
+
+no_read_workqueue
+ Bypass dm-crypt internal workqueue and process read requests synchronously.
+
+no_write_workqueue
+ Bypass dm-crypt internal workqueue and process write requests synchronously.
+ This option is automatically enabled for host-managed zoned block devices
+ (e.g. host-managed SMR hard-disks).
+
+integrity:<bytes>:<type>
+ The device requires additional <bytes> metadata per-sector stored
+ in per-bio integrity structure. This metadata must by provided
+ by underlying dm-integrity target.
+
+ The <type> can be "none" if metadata is used only for persistent IV.
+
+ For Authenticated Encryption with Additional Data (AEAD)
+ the <type> is "aead". An AEAD mode additionally calculates and verifies
+ integrity for the encrypted device. The additional space is then
+ used for storing authentication tag (and persistent IV if needed).
+
+integrity_key_size:<bytes>
+ Optionally set the integrity key size if it differs from the digest size.
+ It allows the use of wrapped key algorithms where the key size is
+ independent of the cryptographic key size.
+
+sector_size:<bytes>
+ Use <bytes> as the encryption unit instead of 512 bytes sectors.
+ This option can be in range 512 - 4096 bytes and must be power of two.
+ Virtual device will announce this size as a minimal IO and logical sector.
+
+iv_large_sectors
+ IV generators will use sector number counted in <sector_size> units
+ instead of default 512 bytes sectors.
+
+ For example, if <sector_size> is 4096 bytes, plain64 IV for the second
+ sector will be 8 (without flag) and 1 if iv_large_sectors is present.
+ The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
+ if this flag is specified.
+
+integrity_key_size:<bytes>
+ Use an integrity key of <bytes> size instead of using an integrity key size
+ of the digest size of the used HMAC algorithm.
+
+
+Module parameters::
+ max_read_size
+ Maximum size of read requests. When a request larger than this size
+ is received, dm-crypt will split the request. The splitting improves
+ concurrency (the split requests could be encrypted in parallel by multiple
+ cores), but it also causes overhead. The user should tune this parameters to
+ fit the actual workload.
+
+ max_write_size
+ Maximum size of write requests. When a request larger than this size
+ is received, dm-crypt will split the request. The splitting improves
+ concurrency (the split requests could be encrypted in parallel by multiple
+ cores), but it also causes overhead. The user should tune this parameters to
+ fit the actual workload.
+
+
+Example scripts
+===============
+LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
+encryption with dm-crypt using the 'cryptsetup' utility, see
+https://gitlab.com/cryptsetup/cryptsetup
+
+::
+
+ #!/bin/sh
+ # Create a crypt device using dmsetup
+ dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
+
+::
+
+ #!/bin/sh
+ # Create a crypt device using dmsetup when encryption key is stored in keyring service
+ dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
+
+::
+
+ #!/bin/sh
+ # Create a crypt device using cryptsetup and LUKS header with default cipher
+ cryptsetup luksFormat $1
+ cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/admin-guide/device-mapper/dm-dust.rst b/Documentation/admin-guide/device-mapper/dm-dust.rst
new file mode 100644
index 000000000000..e35ec8cd2f88
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-dust.rst
@@ -0,0 +1,305 @@
+dm-dust
+=======
+
+This target emulates the behavior of bad sectors at arbitrary
+locations, and the ability to enable the emulation of the failures
+at an arbitrary time.
+
+This target behaves similarly to a linear target. At a given time,
+the user can send a message to the target to start failing read
+requests on specific blocks (to emulate the behavior of a hard disk
+drive with bad sectors).
+
+When the failure behavior is enabled (i.e.: when the output of
+"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
+in the "bad block list" will fail with EIO ("Input/output error").
+
+Writes of blocks in the "bad block list will result in the following:
+
+1. Remove the block from the "bad block list".
+2. Successfully complete the write.
+
+This emulates the "remapped sector" behavior of a drive with bad
+sectors.
+
+Normally, a drive that is encountering bad sectors will most likely
+encounter more bad sectors, at an unknown time or location.
+With dm-dust, the user can use the "addbadblock" and "removebadblock"
+messages to add arbitrary bad blocks at new locations, and the
+"enable" and "disable" messages to modulate the state of whether the
+configured "bad blocks" will be treated as bad, or bypassed.
+This allows the pre-writing of test data and metadata prior to
+simulating a "failure" event where bad sectors start to appear.
+
+Table parameters
+----------------
+<device_path> <offset> <blksz>
+
+Mandatory parameters:
+ <device_path>:
+ Path to the block device.
+
+ <offset>:
+ Offset to data area from start of device_path
+
+ <blksz>:
+ Block size in bytes
+
+ (minimum 512, maximum 1073741824, must be a power of 2)
+
+Usage instructions
+------------------
+
+First, find the size (in 512-byte sectors) of the device to be used::
+
+ $ sudo blockdev --getsz /dev/vdb1
+ 33552384
+
+Create the dm-dust device:
+(For a device with a block size of 512 bytes)
+
+::
+
+ $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
+
+(For a device with a block size of 4096 bytes)
+
+::
+
+ $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
+
+Check the status of the read behavior ("bypass" indicates that all I/O
+will be passed through to the underlying device; "verbose" indicates that
+bad block additions, removals, and remaps will be verbosely logged)::
+
+ $ sudo dmsetup status dust1
+ 0 33552384 dust 252:17 bypass verbose
+
+ $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
+ 128+0 records in
+ 128+0 records out
+
+ $ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+ 128+0 records in
+ 128+0 records out
+
+Adding and removing bad blocks
+------------------------------
+
+At any time (i.e.: whether the device has the "bad block" emulation
+enabled or disabled), bad blocks may be added or removed from the
+device via the "addbadblock" and "removebadblock" messages::
+
+ $ sudo dmsetup message dust1 0 addbadblock 60
+ kernel: device-mapper: dust: badblock added at block 60
+
+ $ sudo dmsetup message dust1 0 addbadblock 67
+ kernel: device-mapper: dust: badblock added at block 67
+
+ $ sudo dmsetup message dust1 0 addbadblock 72
+ kernel: device-mapper: dust: badblock added at block 72
+
+These bad blocks will be stored in the "bad block list".
+While the device is in "bypass" mode, reads and writes will succeed::
+
+ $ sudo dmsetup status dust1
+ 0 33552384 dust 252:17 bypass
+
+Enabling block read failures
+----------------------------
+
+To enable the "fail read on bad block" behavior, send the "enable" message::
+
+ $ sudo dmsetup message dust1 0 enable
+ kernel: device-mapper: dust: enabling read failures on bad sectors
+
+ $ sudo dmsetup status dust1
+ 0 33552384 dust 252:17 fail_read_on_bad_block
+
+With the device in "fail read on bad block" mode, attempting to read a
+block will encounter an "Input/output error"::
+
+ $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
+ dd: error reading '/dev/mapper/dust1': Input/output error
+ 0+0 records in
+ 0+0 records out
+ 0 bytes copied, 0.00040651 s, 0.0 kB/s
+
+...and writing to the bad blocks will remove the blocks from the list,
+therefore emulating the "remap" behavior of hard disk drives::
+
+ $ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+ 128+0 records in
+ 128+0 records out
+
+ kernel: device-mapper: dust: block 60 removed from badblocklist by write
+ kernel: device-mapper: dust: block 67 removed from badblocklist by write
+ kernel: device-mapper: dust: block 72 removed from badblocklist by write
+ kernel: device-mapper: dust: block 87 removed from badblocklist by write
+
+Bad block add/remove error handling
+-----------------------------------
+
+Attempting to add a bad block that already exists in the list will
+result in an "Invalid argument" error, as well as a helpful message::
+
+ $ sudo dmsetup message dust1 0 addbadblock 88
+ device-mapper: message ioctl on dust1 failed: Invalid argument
+ kernel: device-mapper: dust: block 88 already in badblocklist
+
+Attempting to remove a bad block that doesn't exist in the list will
+result in an "Invalid argument" error, as well as a helpful message::
+
+ $ sudo dmsetup message dust1 0 removebadblock 87
+ device-mapper: message ioctl on dust1 failed: Invalid argument
+ kernel: device-mapper: dust: block 87 not found in badblocklist
+
+Counting the number of bad blocks in the bad block list
+-------------------------------------------------------
+
+To count the number of bad blocks configured in the device, run the
+following message command::
+
+ $ sudo dmsetup message dust1 0 countbadblocks
+
+A message will print with the number of bad blocks currently
+configured on the device::
+
+ countbadblocks: 895 badblock(s) found
+
+Querying for specific bad blocks
+--------------------------------
+
+To find out if a specific block is in the bad block list, run the
+following message command::
+
+ $ sudo dmsetup message dust1 0 queryblock 72
+
+The following message will print if the block is in the list::
+
+ dust_query_block: block 72 found in badblocklist
+
+The following message will print if the block is not in the list::
+
+ dust_query_block: block 72 not found in badblocklist
+
+The "queryblock" message command will work in both the "enabled"
+and "disabled" modes, allowing the verification of whether a block
+will be treated as "bad" without having to issue I/O to the device,
+or having to "enable" the bad block emulation.
+
+Clearing the bad block list
+---------------------------
+
+To clear the bad block list (without needing to individually run
+a "removebadblock" message command for every block), run the
+following message command::
+
+ $ sudo dmsetup message dust1 0 clearbadblocks
+
+After clearing the bad block list, the following message will appear::
+
+ dust_clear_badblocks: badblocks cleared
+
+If there were no bad blocks to clear, the following message will
+appear::
+
+ dust_clear_badblocks: no badblocks found
+
+Listing the bad block list
+--------------------------
+
+To list all bad blocks in the bad block list (using an example device
+with blocks 1 and 2 in the bad block list), run the following message
+command::
+
+ $ sudo dmsetup message dust1 0 listbadblocks
+ 1
+ 2
+
+If there are no bad blocks in the bad block list, the command will
+execute with no output::
+
+ $ sudo dmsetup message dust1 0 listbadblocks
+
+Message commands list
+---------------------
+
+Below is a list of the messages that can be sent to a dust device:
+
+Operations on blocks (requires a <blknum> argument)::
+
+ addbadblock <blknum>
+ queryblock <blknum>
+ removebadblock <blknum>
+
+...where <blknum> is a block number within range of the device
+(corresponding to the block size of the device.)
+
+Single argument message commands::
+
+ countbadblocks
+ clearbadblocks
+ listbadblocks
+ disable
+ enable
+ quiet
+
+Device removal
+--------------
+
+When finished, remove the device via the "dmsetup remove" command::
+
+ $ sudo dmsetup remove dust1
+
+Quiet mode
+----------
+
+On test runs with many bad blocks, it may be desirable to avoid
+excessive logging (from bad blocks added, removed, or "remapped").
+This can be done by enabling "quiet mode" via the following message::
+
+ $ sudo dmsetup message dust1 0 quiet
+
+This will suppress log messages from add / remove / removed by write
+operations. Log messages from "countbadblocks" or "queryblock"
+message commands will still print in quiet mode.
+
+The status of quiet mode can be seen by running "dmsetup status"::
+
+ $ sudo dmsetup status dust1
+ 0 33552384 dust 252:17 fail_read_on_bad_block quiet
+
+To disable quiet mode, send the "quiet" message again::
+
+ $ sudo dmsetup message dust1 0 quiet
+
+ $ sudo dmsetup status dust1
+ 0 33552384 dust 252:17 fail_read_on_bad_block verbose
+
+(The presence of "verbose" indicates normal logging.)
+
+"Why not...?"
+-------------
+
+scsi_debug has a "medium error" mode that can fail reads on one
+specified sector (sector 0x1234, hardcoded in the source code), but
+it uses RAM for the persistent storage, which drastically decreases
+the potential device size.
+
+dm-flakey fails all I/O from all block locations at a specified time
+frequency, and not a given point in time.
+
+When a bad sector occurs on a hard disk drive, reads to that sector
+are failed by the device, usually resulting in an error code of EIO
+("I/O error") or ENODATA ("No data available"). However, a write to
+the sector may succeed, and result in the sector becoming readable
+after the device controller no longer experiences errors reading the
+sector (or after a reallocation of the sector). However, there may
+be bad sectors that occur on the device in the future, in a different,
+unpredictable location.
+
+This target seeks to provide a device that can exhibit the behavior
+of a bad sector at a known sector location, at a known time, based
+on a large storage device (at least tens of gigabytes, not occupying
+system memory).
diff --git a/Documentation/admin-guide/device-mapper/dm-ebs.rst b/Documentation/admin-guide/device-mapper/dm-ebs.rst
new file mode 100644
index 000000000000..c09f66db5621
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-ebs.rst
@@ -0,0 +1,51 @@
+======
+dm-ebs
+======
+
+
+This target is similar to the linear target except that it emulates
+a smaller logical block size on a device with a larger logical block
+size. Its main purpose is to provide emulation of 512 byte sectors on
+devices that do not provide this emulation (i.e. 4K native disks).
+
+Supported emulated logical block sizes 512, 1024, 2048 and 4096.
+
+Underlying block size can be set to > 4K to test buffering larger units.
+
+
+Table parameters
+----------------
+ <dev path> <offset> <emulated sectors> [<underlying sectors>]
+
+Mandatory parameters:
+
+ <dev path>:
+ Full pathname to the underlying block-device,
+ or a "major:minor" device-number.
+ <offset>:
+ Starting sector within the device;
+ has to be a multiple of <emulated sectors>.
+ <emulated sectors>:
+ Number of sectors defining the logical block size to be emulated;
+ 1, 2, 4, 8 sectors of 512 bytes supported.
+
+Optional parameter:
+
+ <underlying sectors>:
+ Number of sectors defining the logical block size of <dev path>.
+ 2^N supported, e.g. 8 = emulate 8 sectors of 512 bytes = 4KiB.
+ If not provided, the logical block size of <dev path> will be used.
+
+
+Examples:
+
+Emulate 1 sector = 512 bytes logical block size on /dev/sda starting at
+offset 1024 sectors with underlying devices block size automatically set:
+
+ebs /dev/sda 1024 1
+
+Emulate 2 sector = 1KiB logical block size on /dev/sda starting at
+offset 128 sectors, enforce 2KiB underlying device block size.
+This presumes 2KiB logical blocksize on /dev/sda or less to work:
+
+ebs /dev/sda 128 2 4
diff --git a/Documentation/admin-guide/device-mapper/dm-flakey.rst b/Documentation/admin-guide/device-mapper/dm-flakey.rst
new file mode 100644
index 000000000000..f967c5fea219
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-flakey.rst
@@ -0,0 +1,88 @@
+=========
+dm-flakey
+=========
+
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically. It's been found useful in simulating
+failing devices for testing purposes.
+
+Starting from the time the table is loaded, the device is available for
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
+
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+
+::
+
+ <dev path> <offset> <up interval> <down interval> \
+ [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
+
+ <dev path>:
+ Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>:
+ Starting sector within the device.
+ <up interval>:
+ Number of seconds device is available.
+ <down interval>:
+ Number of seconds device returns errors.
+
+Optional feature parameters:
+
+ If no feature parameters are present, during the periods of
+ unreliability, all I/O returns errors.
+
+ error_reads:
+ All read I/O is failed with an error signalled.
+ Write I/O is handled correctly.
+
+ drop_writes:
+ All write I/O is silently ignored.
+ Read I/O is handled correctly.
+
+ error_writes:
+ All write I/O is failed with an error signalled.
+ Read I/O is handled correctly.
+
+ corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+ During <down interval>, replace <Nth_byte> of the data of
+ each matching bio with <value>.
+
+ <Nth_byte>:
+ The offset of the byte to replace.
+ Counting starts at 1, to replace the first byte.
+ <direction>:
+ Either 'r' to corrupt reads or 'w' to corrupt writes.
+ 'w' is incompatible with drop_writes.
+ <value>:
+ The value (from 0-255) to write.
+ <flags>:
+ Perform the replacement only if bio->bi_opf has all the
+ selected flags set.
+
+ random_read_corrupt <probability>
+ During <down interval>, replace random byte in a read bio
+ with a random value. probability is an integer between
+ 0 and 1000000000 meaning 0% to 100% probability of corruption.
+
+ random_write_corrupt <probability>
+ During <down interval>, replace random byte in a write bio
+ with a random value. probability is an integer between
+ 0 and 1000000000 meaning 0% to 100% probability of corruption.
+
+Examples:
+
+Replaces the 32nd byte of READ bios with the value 1::
+
+ corrupt_bio_byte 32 r 1 0
+
+Replaces the 224th byte of REQ_META (=32) bios with the value 0::
+
+ corrupt_bio_byte 224 w 0 32
diff --git a/Documentation/admin-guide/device-mapper/dm-ima.rst b/Documentation/admin-guide/device-mapper/dm-ima.rst
new file mode 100644
index 000000000000..a4aa50a828e0
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-ima.rst
@@ -0,0 +1,715 @@
+======
+dm-ima
+======
+
+For a given system, various external services/infrastructure tools
+(including the attestation service) interact with it - both during the
+setup and during rest of the system run-time. They share sensitive data
+and/or execute critical workload on that system. The external services
+may want to verify the current run-time state of the relevant kernel
+subsystems before fully trusting the system with business-critical
+data/workload.
+
+Device mapper plays a critical role on a given system by providing
+various important functionalities to the block devices using various
+target types like crypt, verity, integrity etc. Each of these target
+types’ functionalities can be configured with various attributes.
+The attributes chosen to configure these target types can significantly
+impact the security profile of the block device, and in-turn, of the
+system itself. For instance, the type of encryption algorithm and the
+key size determines the strength of encryption for a given block device.
+
+Therefore, verifying the current state of various block devices as well
+as their various target attributes is crucial for external services before
+fully trusting the system with business-critical data/workload.
+
+IMA kernel subsystem provides the necessary functionality for
+device mapper to measure the state and configuration of
+various block devices -
+
+- by device mapper itself, from within the kernel,
+- in a tamper resistant way,
+- and re-measured - triggered on state/configuration change.
+
+Setting the IMA Policy:
+=======================
+For IMA to measure the data on a given system, the IMA policy on the
+system needs to be updated to have following line, and the system needs
+to be restarted for the measurements to take effect.
+
+::
+
+ /etc/ima/ima-policy
+ measure func=CRITICAL_DATA label=device-mapper template=ima-buf
+
+The measurements will be reflected in the IMA logs, which are located at:
+
+::
+
+ /sys/kernel/security/integrity/ima/ascii_runtime_measurements
+ /sys/kernel/security/integrity/ima/binary_runtime_measurements
+
+Then IMA ASCII measurement log has the following format:
+
+::
+
+ <PCR> <TEMPLATE_DATA_DIGEST> <TEMPLATE_NAME> <TEMPLATE_DATA>
+
+ PCR := Platform Configuration Register, in which the values are registered.
+ This is applicable if TPM chip is in use.
+
+ TEMPLATE_DATA_DIGEST := Template data digest of the IMA record.
+ TEMPLATE_NAME := Template name that registered the integrity value (e.g. ima-buf).
+
+ TEMPLATE_DATA := <ALG> ":" <EVENT_DIGEST> <EVENT_NAME> <EVENT_DATA>
+ It contains data for the specific event to be measured,
+ in a given template data format.
+
+ ALG := Algorithm to compute event digest
+ EVENT_DIGEST := Digest of the event data
+ EVENT_NAME := Description of the event (e.g. 'dm_table_load').
+ EVENT_DATA := The event data to be measured.
+
+|
+
+| *NOTE #1:*
+| The DM target data measured by IMA subsystem can alternatively
+ be queried from userspace by setting DM_IMA_MEASUREMENT_FLAG with
+ DM_TABLE_STATUS_CMD.
+
+|
+
+| *NOTE #2:*
+| The Kernel configuration CONFIG_IMA_DISABLE_HTABLE allows measurement of duplicate records.
+| To support recording duplicate IMA events in the IMA log, the Kernel needs to be configured with
+ CONFIG_IMA_DISABLE_HTABLE=y.
+
+Supported Device States:
+========================
+Following device state changes will trigger IMA measurements:
+
+ 1. Table load
+ #. Device resume
+ #. Device remove
+ #. Table clear
+ #. Device rename
+
+1. Table load:
+---------------
+When a new table is loaded in a device's inactive table slot,
+the device information and target specific details from the
+targets in the table are measured.
+
+The IMA measurement log has the following format for 'dm_table_load':
+
+::
+
+ EVENT_NAME := "dm_table_load"
+ EVENT_DATA := <dm_version_str> ";" <device_metadata> ";" <table_load_data>
+
+ dm_version_str := "dm_version=" <N> "." <N> "." <N>
+ Same as Device Mapper driver version.
+ device_metadata := <device_name> "," <device_uuid> "," <device_major> "," <device_minor> ","
+ <minor_count> "," <num_device_targets> ";"
+
+ device_name := "name=" <dm-device-name>
+ device_uuid := "uuid=" <dm-device-uuid>
+ device_major := "major=" <N>
+ device_minor := "minor=" <N>
+ minor_count := "minor_count=" <N>
+ num_device_targets := "num_targets=" <N>
+ dm-device-name := Name of the device. If it contains special characters like '\', ',', ';',
+ they are prefixed with '\'.
+ dm-device-uuid := UUID of the device. If it contains special characters like '\', ',', ';',
+ they are prefixed with '\'.
+
+ table_load_data := <target_data>
+ Represents the data (as name=value pairs) from various targets in the table,
+ which is being loaded into the DM device's inactive table slot.
+ target_data := <target_data_row> | <target_data><target_data_row>
+
+ target_data_row := <target_index> "," <target_begin> "," <target_len> "," <target_name> ","
+ <target_version> "," <target_attributes> ";"
+ target_index := "target_index=" <N>
+ Represents nth target in the table (from 0 to N-1 targets specified in <num_device_targets>)
+ If all the data for N targets doesn't fit in the given buffer - then the data that fits
+ in the buffer (say from target 0 to x) is measured in a given IMA event.
+ The remaining data from targets x+1 to N-1 is measured in the subsequent IMA events,
+ with the same format as that of 'dm_table_load'
+ i.e. <dm_version_str> ";" <device_metadata> ";" <table_load_data>.
+
+ target_begin := "target_begin=" <N>
+ target_len := "target_len=" <N>
+ target_name := Name of the target. 'linear', 'crypt', 'integrity' etc.
+ The targets that are supported for IMA measurements are documented below in the
+ 'Supported targets' section.
+ target_version := "target_version=" <N> "." <N> "." <N>
+ target_attributes := Data containing comma separated list of name=value pairs of target specific attributes.
+
+ For instance, if a linear device is created with the following table entries,
+ # dmsetup create linear1
+ 0 2 linear /dev/loop0 512
+ 2 2 linear /dev/loop0 512
+ 4 2 linear /dev/loop0 512
+ 6 2 linear /dev/loop0 512
+
+ Then IMA ASCII measurement log will have the following entry:
+ (converted from ASCII to text for readability)
+
+ 10 a8c5ff755561c7a28146389d1514c318592af49a ima-buf sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72
+ dm_table_load
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4;
+ target_index=0,target_begin=0,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=1,target_begin=2,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=2,target_begin=4,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+ target_index=3,target_begin=6,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512;
+
+2. Device resume:
+------------------
+When a suspended device is resumed, the device information and the hash of the
+data from previous load of an active table are measured.
+
+The IMA measurement log has the following format for 'dm_device_resume':
+
+::
+
+ EVENT_NAME := "dm_device_resume"
+ EVENT_DATA := <dm_version_str> ";" <device_metadata> ";" <active_table_hash> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_metadata := As described in the 'Table load' section above.
+ active_table_hash := "active_table_hash=" <table_hash_alg> ":" <table_hash>
+ Rerpresents the hash of the IMA data being measured for the
+ active table for the device.
+ table_hash_alg := Algorithm used to compute the hash.
+ table_hash := Hash of the (<dm_version_str> ";" <device_metadata> ";" <table_load_data> ";")
+ as described in the 'dm_table_load' above.
+ Note: If the table_load data spans across multiple IMA 'dm_table_load'
+ events for a given device, the hash is computed combining all the event data
+ i.e. (<dm_version_str> ";" <device_metadata> ";" <table_load_data> ";")
+ across all those events.
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device is resumed with the following command,
+ #dmsetup resume linear1
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 56c00cc062ffc24ccd9ac2d67d194af3282b934e ima-buf sha256:e7d12c03b958b4e0e53e7363a06376be88d98a1ac191fdbd3baf5e4b77f329b6
+ dm_device_resume
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4;
+ active_table_hash=sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72;current_device_capacity=8;
+
+3. Device remove:
+------------------
+When a device is removed, the device information and a sha256 hash of the
+data from an active and inactive table are measured.
+
+The IMA measurement log has the following format for 'dm_device_remove':
+
+::
+
+ EVENT_NAME := "dm_device_remove"
+ EVENT_DATA := <dm_version_str> ";" <device_active_metadata> ";" <device_inactive_metadata> ";"
+ <active_table_hash> "," <inactive_table_hash> "," <remove_all> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_active_metadata := Device metadata that reflects the currently loaded active table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ device_inactive_metadata := Device metadata that reflects the inactive table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ active_table_hash := Hash of the currently loaded active table.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ inactive_table_hash := Hash of the inactive table.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ remove_all := "remove_all=" <yes_no>
+ yes_no := "y" | "n"
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device is removed with the following command,
+ #dmsetup remove l1
+
+ then IMA ASCII measurement log will have the following entry:
+ (converted from ASCII to text for readability)
+
+ 10 790e830a3a7a31590824ac0642b3b31c2d0e8b38 ima-buf sha256:ab9f3c959367a8f5d4403d6ce9c3627dadfa8f9f0e7ec7899299782388de3840
+ dm_device_remove
+ dm_version=4.45.0;
+ device_active_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=2;
+ device_inactive_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ active_table_hash=sha256:4a7e62efaebfc86af755831998b7db6f59b60d23c9534fb16a4455907957953a,
+ inactive_table_hash=sha256:9d79c175bc2302d55a183e8f50ad4bafd60f7692fd6249e5fd213e2464384b86,remove_all=n;
+ current_device_capacity=2048;
+
+4. Table clear:
+----------------
+When an inactive table is cleared from the device, the device information and a sha256 hash of the
+data from an inactive table are measured.
+
+The IMA measurement log has the following format for 'dm_table_clear':
+
+::
+
+ EVENT_NAME := "dm_table_clear"
+ EVENT_DATA := <dm_version_str> ";" <device_inactive_metadata> ";" <inactive_table_hash> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_inactive_metadata := Device metadata that was captured during the load time inactive table being cleared.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ inactive_table_hash := Hash of the inactive table being cleared from the device.
+ The format is same as 'active_table_hash' described in the 'Device resume' section above.
+ current_device_capacity := "current_device_capacity=" <N>
+
+ For instance, if a linear device's inactive table is cleared,
+ #dmsetup clear l1
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 77d347408f557f68f0041acb0072946bb2367fe5 ima-buf sha256:42f9ca22163fdfa548e6229dece2959bc5ce295c681644240035827ada0e1db5
+ dm_table_clear
+ dm_version=4.45.0;
+ name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ inactive_table_hash=sha256:75c0dc347063bf474d28a9907037eba060bfe39d8847fc0646d75e149045d545;current_device_capacity=1024;
+
+5. Device rename:
+------------------
+When an device's NAME or UUID is changed, the device information and the new NAME and UUID
+are measured.
+
+The IMA measurement log has the following format for 'dm_device_rename':
+
+::
+
+ EVENT_NAME := "dm_device_rename"
+ EVENT_DATA := <dm_version_str> ";" <device_active_metadata> ";" <new_device_name> "," <new_device_uuid> ";" <current_device_capacity> ";"
+
+ dm_version_str := As described in the 'Table load' section above.
+ device_active_metadata := Device metadata that reflects the currently loaded active table.
+ The format is same as 'device_metadata' described in the 'Table load' section above.
+ new_device_name := "new_name=" <dm-device-name>
+ dm-device-name := Same as <dm-device-name> described in 'Table load' section above
+ new_device_uuid := "new_uuid=" <dm-device-uuid>
+ dm-device-uuid := Same as <dm-device-uuid> described in 'Table load' section above
+ current_device_capacity := "current_device_capacity=" <N>
+
+ E.g 1: if a linear device's name is changed with the following command,
+ #dmsetup rename linear1 --setuuid 1234-5678
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 8b0423209b4c66ac1523f4c9848c9b51ee332f48 ima-buf sha256:6847b7258134189531db593e9230b257c84f04038b5a18fd2e1473860e0569ac
+ dm_device_rename
+ dm_version=4.45.0;
+ name=linear1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;new_name=linear1,new_uuid=1234-5678;
+ current_device_capacity=1024;
+
+ E.g 2: if a linear device's name is changed with the following command,
+ # dmsetup rename linear1 linear=2
+
+ then IMA ASCII measurement log will have an entry with:
+ (converted from ASCII to text for readability)
+
+ 10 bef70476b99c2bdf7136fae033aa8627da1bf76f ima-buf sha256:8c6f9f53b9ef9dc8f92a2f2cca8910e622543d0f0d37d484870cb16b95111402
+ dm_device_rename
+ dm_version=4.45.0;
+ name=linear1,uuid=1234-5678,major=253,minor=2,minor_count=1,num_targets=1;
+ new_name=linear\=2,new_uuid=1234-5678;
+ current_device_capacity=1024;
+
+Supported targets:
+==================
+
+Following targets are supported to measure their data using IMA:
+
+ 1. cache
+ #. crypt
+ #. integrity
+ #. linear
+ #. mirror
+ #. multipath
+ #. raid
+ #. snapshot
+ #. striped
+ #. verity
+
+1. cache
+---------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'cache' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <metadata_mode> "," <cache_metadata_device> ","
+ <cache_device> "," <cache_origin_device> "," <writethrough> "," <writeback> ","
+ <passthrough> "," <no_discard_passdown> ";"
+
+ target_name := "target_name=cache"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ metadata_mode := "metadata_mode=" <cache_metadata_mode>
+ cache_metadata_mode := "fail" | "ro" | "rw"
+ cache_device := "cache_device=" <cache_device_name_string>
+ cache_origin_device := "cache_origin_device=" <cache_origin_device_string>
+ writethrough := "writethrough=" <yes_no>
+ writeback := "writeback=" <yes_no>
+ passthrough := "passthrough=" <yes_no>
+ no_discard_passdown := "no_discard_passdown=" <yes_no>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'cache' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'cache' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;name=cache1,uuid=cache_uuid,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=28672,target_name=cache,target_version=2.2.0,metadata_mode=rw,
+ cache_metadata_device=253:4,cache_device=253:3,cache_origin_device=253:5,writethrough=y,writeback=n,
+ passthrough=n,metadata2=y,no_discard_passdown=n;
+
+
+2. crypt
+---------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'crypt' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <allow_discards> "," <same_cpu_crypt> ","
+ <submit_from_crypt_cpus> "," <no_read_workqueue> "," <no_write_workqueue> ","
+ <iv_large_sectors> "," <iv_large_sectors> "," [<integrity_tag_size> ","] [<cipher_auth> ","]
+ [<sector_size> ","] [<cipher_string> ","] <key_size> "," <key_parts> ","
+ <key_extra_size> "," <key_mac_size> ";"
+
+ target_name := "target_name=crypt"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ allow_discards := "allow_discards=" <yes_no>
+ same_cpu_crypt := "same_cpu_crypt=" <yes_no>
+ submit_from_crypt_cpus := "submit_from_crypt_cpus=" <yes_no>
+ no_read_workqueue := "no_read_workqueue=" <yes_no>
+ no_write_workqueue := "no_write_workqueue=" <yes_no>
+ iv_large_sectors := "iv_large_sectors=" <yes_no>
+ integrity_tag_size := "integrity_tag_size=" <N>
+ cipher_auth := "cipher_auth=" <string>
+ sector_size := "sector_size=" <N>
+ cipher_string := "cipher_string="
+ key_size := "key_size=" <N>
+ key_parts := "key_parts=" <N>
+ key_extra_size := "key_extra_size=" <N>
+ key_mac_size := "key_mac_size=" <N>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'crypt' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'crypt' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=crypt1,uuid=crypt_uuid1,major=253,minor=0,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=1953125,target_name=crypt,target_version=1.23.0,
+ allow_discards=y,same_cpu=n,submit_from_crypt_cpus=n,no_read_workqueue=n,no_write_workqueue=n,
+ iv_large_sectors=n,cipher_string=aes-xts-plain64,key_size=32,key_parts=1,key_extra_size=0,key_mac_size=0;
+
+3. integrity
+-------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'integrity' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <dev_name> "," <start>
+ <tag_size> "," <mode> "," [<meta_device> ","] [<block_size> ","] <recalculate> ","
+ <allow_discards> "," <fix_padding> "," <fix_hmac> "," <legacy_recalculate> ","
+ <journal_sectors> "," <interleave_sectors> "," <buffer_sectors> ";"
+
+ target_name := "target_name=integrity"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ dev_name := "dev_name=" <device_name_str>
+ start := "start=" <N>
+ tag_size := "tag_size=" <N>
+ mode := "mode=" <integrity_mode_str>
+ integrity_mode_str := "J" | "B" | "D" | "R"
+ meta_device := "meta_device=" <meta_device_str>
+ block_size := "block_size=" <N>
+ recalculate := "recalculate=" <yes_no>
+ allow_discards := "allow_discards=" <yes_no>
+ fix_padding := "fix_padding=" <yes_no>
+ fix_hmac := "fix_hmac=" <yes_no>
+ legacy_recalculate := "legacy_recalculate=" <yes_no>
+ journal_sectors := "journal_sectors=" <N>
+ interleave_sectors := "interleave_sectors=" <N>
+ buffer_sectors := "buffer_sectors=" <N>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'integrity' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'integrity' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=integrity1,uuid=,major=253,minor=1,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=7856,target_name=integrity,target_version=1.10.0,
+ dev_name=253:0,start=0,tag_size=32,mode=J,recalculate=n,allow_discards=n,fix_padding=n,
+ fix_hmac=n,legacy_recalculate=n,journal_sectors=88,interleave_sectors=32768,buffer_sectors=128;
+
+
+4. linear
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'linear' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <device_name> <,> <start> ";"
+
+ target_name := "target_name=linear"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ device_name := "device_name=" <linear_device_name_str>
+ start := "start=" <N>
+
+ E.g.
+ When a 'linear' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'linear' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=linear1,uuid=linear_uuid1,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=28672,target_name=linear,target_version=1.4.0,
+ device_name=253:1,start=2048;
+
+5. mirror
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'mirror' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <nr_mirrors> ","
+ <mirror_device_data> "," <handle_errors> "," <keep_log> "," <log_type_status> ";"
+
+ target_name := "target_name=mirror"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ nr_mirrors := "nr_mirrors=" <NR>
+ mirror_device_data := <mirror_device_row> | <mirror_device_data><mirror_device_row>
+ mirror_device_row is repeated <NR> times - for <NR> described in <nr_mirrors>.
+ mirror_device_row := <mirror_device_name> "," <mirror_device_status>
+ mirror_device_name := "mirror_device_" <X> "=" <mirror_device_name_str>
+ where <X> ranges from 0 to (<NR> -1) - for <NR> described in <nr_mirrors>.
+ mirror_device_status := "mirror_device_" <X> "_status=" <mirror_device_status_char>
+ where <X> ranges from 0 to (<NR> -1) - for <NR> described in <nr_mirrors>.
+ mirror_device_status_char := "A" | "F" | "D" | "S" | "R" | "U"
+ handle_errors := "handle_errors=" <yes_no>
+ keep_log := "keep_log=" <yes_no>
+ log_type_status := "log_type_status=" <log_type_status_str>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'mirror' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'mirror' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=mirror1,uuid=mirror_uuid1,major=253,minor=6,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2048,target_name=mirror,target_version=1.14.0,nr_mirrors=2,
+ mirror_device_0=253:4,mirror_device_0_status=A,
+ mirror_device_1=253:5,mirror_device_1_status=A,
+ handle_errors=y,keep_log=n,log_type_status=;
+
+6. multipath
+-------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'multipath' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <nr_priority_groups>
+ ["," <pg_state> "," <priority_groups> "," <priority_group_paths>] ";"
+
+ target_name := "target_name=multipath"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ nr_priority_groups := "nr_priority_groups=" <NPG>
+ priority_groups := <priority_groups_row>|<priority_groups_row><priority_groups>
+ priority_groups_row := "pg_state_" <X> "=" <pg_state_str> "," "nr_pgpaths_" <X> "=" <NPGP> ","
+ "path_selector_name_" <X> "=" <string> "," <priority_group_paths>
+ where <X> ranges from 0 to (<NPG> -1) - for <NPG> described in <nr_priority_groups>.
+ pg_state_str := "E" | "A" | "D"
+ <priority_group_paths> := <priority_group_paths_row> | <priority_group_paths_row><priority_group_paths>
+ priority_group_paths_row := "path_name_" <X> "_" <Y> "=" <string> "," "is_active_" <X> "_" <Y> "=" <is_active_str>
+ "fail_count_" <X> "_" <Y> "=" <N> "," "path_selector_status_" <X> "_" <Y> "=" <path_selector_status_str>
+ where <X> ranges from 0 to (<NPG> -1) - for <NPG> described in <nr_priority_groups>,
+ and <Y> ranges from 0 to (<NPGP> -1) - for <NPGP> described in <priority_groups_row>.
+ is_active_str := "A" | "F"
+
+ E.g.
+ When a 'multipath' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'multipath' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=mp,uuid=,major=253,minor=0,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2097152,target_name=multipath,target_version=1.14.0,nr_priority_groups=2,
+ pg_state_0=E,nr_pgpaths_0=2,path_selector_name_0=queue-length,
+ path_name_0_0=8:16,is_active_0_0=A,fail_count_0_0=0,path_selector_status_0_0=,
+ path_name_0_1=8:32,is_active_0_1=A,fail_count_0_1=0,path_selector_status_0_1=,
+ pg_state_1=E,nr_pgpaths_1=2,path_selector_name_1=queue-length,
+ path_name_1_0=8:48,is_active_1_0=A,fail_count_1_0=0,path_selector_status_1_0=,
+ path_name_1_1=8:64,is_active_1_1=A,fail_count_1_1=0,path_selector_status_1_1=;
+
+7. raid
+--------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'raid' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <raid_type> "," <raid_disks> "," <raid_state>
+ <raid_device_status> ["," journal_dev_mode] ";"
+
+ target_name := "target_name=raid"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ raid_type := "raid_type=" <raid_type_str>
+ raid_disks := "raid_disks=" <NRD>
+ raid_state := "raid_state=" <raid_state_str>
+ raid_state_str := "frozen" | "reshape" |"resync" | "check" | "repair" | "recover" | "idle" |"undef"
+ raid_device_status := <raid_device_status_row> | <raid_device_status_row><raid_device_status>
+ <raid_device_status_row> is repeated <NRD> times - for <NRD> described in <raid_disks>.
+ raid_device_status_row := "raid_device_" <X> "_status=" <raid_device_status_str>
+ where <X> ranges from 0 to (<NRD> -1) - for <NRD> described in <raid_disks>.
+ raid_device_status_str := "A" | "D" | "a" | "-"
+ journal_dev_mode := "journal_dev_mode=" <journal_dev_mode_str>
+ journal_dev_mode_str := "writethrough" | "writeback" | "invalid"
+
+ E.g.
+ When a 'raid' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'raid' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=raid_LV1,uuid=uuid_raid_LV1,major=253,minor=12,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=2048,target_name=raid,target_version=1.15.1,
+ raid_type=raid10,raid_disks=4,raid_state=idle,
+ raid_device_0_status=A,
+ raid_device_1_status=A,
+ raid_device_2_status=A,
+ raid_device_3_status=A;
+
+
+8. snapshot
+------------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'snapshot' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <snap_origin_name> ","
+ <snap_cow_name> "," <snap_valid> "," <snap_merge_failed> "," <snapshot_overflowed> ";"
+
+ target_name := "target_name=snapshot"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ snap_origin_name := "snap_origin_name=" <string>
+ snap_cow_name := "snap_cow_name=" <string>
+ snap_valid := "snap_valid=" <yes_no>
+ snap_merge_failed := "snap_merge_failed=" <yes_no>
+ snapshot_overflowed := "snapshot_overflowed=" <yes_no>
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'snapshot' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'snapshot' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=snap1,uuid=snap_uuid1,major=253,minor=13,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=4096,target_name=snapshot,target_version=1.16.0,
+ snap_origin_name=253:11,snap_cow_name=253:12,snap_valid=y,snap_merge_failed=n,snapshot_overflowed=n;
+
+9. striped
+-----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'striped' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <stripes> "," <chunk_size> ","
+ <stripe_data> ";"
+
+ target_name := "target_name=striped"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ stripes := "stripes=" <NS>
+ chunk_size := "chunk_size=" <N>
+ stripe_data := <stripe_data_row>|<stripe_data><stripe_data_row>
+ stripe_data_row := <stripe_device_name> "," <stripe_physical_start> "," <stripe_status>
+ stripe_device_name := "stripe_" <X> "_device_name=" <stripe_device_name_str>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_physical_start := "stripe_" <X> "_physical_start=" <N>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_status := "stripe_" <X> "_status=" <stripe_status_str>
+ where <X> ranges from 0 to (<NS> -1) - for <NS> described in <stripes>.
+ stripe_status_str := "D" | "A"
+
+ E.g.
+ When a 'striped' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'striped' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=striped1,uuid=striped_uuid1,major=253,minor=5,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=640,target_name=striped,target_version=1.6.0,stripes=2,chunk_size=64,
+ stripe_0_device_name=253:0,stripe_0_physical_start=2048,stripe_0_status=A,
+ stripe_1_device_name=253:3,stripe_1_physical_start=2048,stripe_1_status=A;
+
+10. verity
+----------
+The 'target_attributes' (described as part of EVENT_DATA in 'Table load'
+section above) has the following data format for 'verity' target.
+
+::
+
+ target_attributes := <target_name> "," <target_version> "," <hash_failed> "," <verity_version> ","
+ <data_device_name> "," <hash_device_name> "," <verity_algorithm> "," <root_digest> ","
+ <salt> "," <ignore_zero_blocks> "," <check_at_most_once> ["," <root_hash_sig_key_desc>]
+ ["," <verity_mode>] ";"
+
+ target_name := "target_name=verity"
+ target_version := "target_version=" <N> "." <N> "." <N>
+ hash_failed := "hash_failed=" <hash_failed_str>
+ hash_failed_str := "C" | "V"
+ verity_version := "verity_version=" <verity_version_str>
+ data_device_name := "data_device_name=" <data_device_name_str>
+ hash_device_name := "hash_device_name=" <hash_device_name_str>
+ verity_algorithm := "verity_algorithm=" <verity_algorithm_str>
+ root_digest := "root_digest=" <root_digest_str>
+ salt := "salt=" <salt_str>
+ salt_str := "-" <verity_salt_str>
+ ignore_zero_blocks := "ignore_zero_blocks=" <yes_no>
+ check_at_most_once := "check_at_most_once=" <yes_no>
+ root_hash_sig_key_desc := "root_hash_sig_key_desc="
+ verity_mode := "verity_mode=" <verity_mode_str>
+ verity_mode_str := "ignore_corruption" | "restart_on_corruption" | "panic_on_corruption" | "invalid"
+ yes_no := "y" | "n"
+
+ E.g.
+ When a 'verity' target is loaded, then IMA ASCII measurement log will have an entry
+ similar to the following, depicting what 'verity' attributes are measured in EVENT_DATA
+ for 'dm_table_load' event.
+ (converted from ASCII to text for readability)
+
+ dm_version=4.45.0;
+ name=test-verity,uuid=,major=253,minor=2,minor_count=1,num_targets=1;
+ target_index=0,target_begin=0,target_len=1953120,target_name=verity,target_version=1.8.0,hash_failed=V,
+ verity_version=1,data_device_name=253:1,hash_device_name=253:0,verity_algorithm=sha256,
+ root_digest=29cb87e60ce7b12b443ba6008266f3e41e93e403d7f298f8e3f316b29ff89c5e,
+ salt=e48da609055204e89ae53b655ca2216dd983cf3cb829f34f63a297d106d53e2d,
+ ignore_zero_blocks=n,check_at_most_once=n;
diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst
new file mode 100644
index 000000000000..981d6a907699
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-init.rst
@@ -0,0 +1,133 @@
+================================
+Early creation of mapped devices
+================================
+
+It is possible to configure a device-mapper device to act as the root device for
+your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal userspace
+which configures the device, then pivot_root(8) in to it.
+
+The second is to create one or more device-mappers using the module parameter
+"dm-mod.create=" through the kernel boot command line argument.
+
+The format is specified as a string of data separated by commas and optionally
+semi-colons, where:
+
+ - a comma is used to separate fields like name, uuid, flags and table
+ (specifies one device)
+ - a semi-colon is used to separate devices.
+
+So the format will look like this::
+
+ dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+
+Where::
+
+ <name> ::= The device name.
+ <uuid> ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
+ <minor> ::= The device minor number | ""
+ <flags> ::= "ro" | "rw"
+ <table> ::= <start_sector> <num_sectors> <target_type> <target_args>
+ <target_type> ::= "verity" | "linear" | ... (see list below)
+
+The dm line should be equivalent to the one used by the dmsetup tool with the
+`--concise` argument.
+
+Target types
+============
+
+Not all target types are available as there are serious risks in allowing
+activation of certain DM targets without first using userspace tools to check
+the validity of associated metadata.
+
+======================= =======================================================
+`cache` constrained, userspace should verify cache device
+`crypt` allowed
+`delay` allowed
+`era` constrained, userspace should verify metadata device
+`flakey` constrained, meant for test
+`linear` allowed
+`log-writes` constrained, userspace should verify metadata device
+`mirror` constrained, userspace should verify main/mirror device
+`raid` constrained, userspace should verify metadata device
+`snapshot` constrained, userspace should verify src/dst device
+`snapshot-origin` allowed
+`snapshot-merge` constrained, userspace should verify src/dst device
+`striped` allowed
+`switch` constrained, userspace should verify dev path
+`thin` constrained, requires dm target message from userspace
+`thin-pool` constrained, requires dm target message from userspace
+`verity` allowed
+`writecache` constrained, userspace should verify cache device
+`zero` constrained, not meant for rootfs
+======================= =======================================================
+
+If the target is not listed above, it is constrained by default (not tested).
+
+Examples
+========
+An example of booting to a linear array made up of user-mode linux block
+devices::
+
+ dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
+
+This will boot to a rw dm-linear target of 8192 sectors split across two block
+devices identified by their major:minor numbers. After boot, udev will rename
+this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
+
+An example of multiple device-mappers, with the dm-mod.create="..." contents
+is shown here split on multiple lines for readability::
+
+ dm-linear,,1,rw,
+ 0 32768 linear 8:1 0,
+ 32768 1024000 linear 8:2 0;
+ dm-verity,,3,ro,
+ 0 1638400 verity 1 /dev/sdc1 /dev/sdc2 4096 4096 204800 1 sha256
+ ac87db56303c9c1da433d7209b5a6ef3e4779df141200cbd7c157dcb8dd89c42
+ 5ebfe87f7df3235b80a117ebc4078e44f55045487ad4a96581d1adb564615b51
+
+Other examples (per target):
+
+"crypt"::
+
+ dm-crypt,,8,ro,
+ 0 1048576 crypt aes-xts-plain64
+ babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
+ /dev/sda 0 1 allow_discards
+
+"delay"::
+
+ dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
+
+"linear"::
+
+ dm-linear,,,rw,
+ 0 32768 linear /dev/sda1 0,
+ 32768 1024000 linear /dev/sda2 0,
+ 1056768 204800 linear /dev/sda3 0,
+ 1261568 512000 linear /dev/sda4 0
+
+"snapshot-origin"::
+
+ dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
+
+"striped"::
+
+ dm-striped,,4,ro,0 1638400 striped 4 4096
+ /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
+
+"verity"::
+
+ dm-verity,,4,ro,
+ 0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
+ fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
+ 51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
+
+For setups using device-mapper on top of asynchronously probed block
+devices (MMC, USB, ..), it may be necessary to tell dm-init to
+explicitly wait for them to become available before setting up the
+device-mapper tables. This can be done with the "dm-mod.waitfor="
+module parameter, which takes a list of devices to wait for::
+
+ dm-mod.waitfor=<device1>[,..,<deviceN>]
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
new file mode 100644
index 000000000000..c2e18ecc065c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -0,0 +1,308 @@
+============
+dm-integrity
+============
+
+The dm-integrity target emulates a block device that has additional
+per-sector tags that can be used for storing integrity information.
+
+A general problem with storing integrity tags with every sector is that
+writing the sector and the integrity tag must be atomic - i.e. in case of
+crash, either both sector and integrity tag or none of them is written.
+
+To guarantee write atomicity, the dm-integrity target uses journal, it
+writes sector data and integrity tags into a journal, commits the journal
+and then copies the data and integrity tags to their respective location.
+
+The dm-integrity target can be used with the dm-crypt target - in this
+situation the dm-crypt target creates the integrity data and passes them
+to the dm-integrity target via bio_integrity_payload attached to the bio.
+In this mode, the dm-crypt and dm-integrity targets provide authenticated
+disk encryption - if the attacker modifies the encrypted device, an I/O
+error is returned instead of random data.
+
+The dm-integrity target can also be used as a standalone target, in this
+mode it calculates and verifies the integrity tag internally. In this
+mode, the dm-integrity target can be used to detect silent data
+corruption on the disk or in the I/O path.
+
+There's an alternate mode of operation where dm-integrity uses a bitmap
+instead of a journal. If a bit in the bitmap is 1, the corresponding
+region's data and integrity tags are not synchronized - if the machine
+crashes, the unsynchronized regions will be recalculated. The bitmap mode
+is faster than the journal mode, because we don't have to write the data
+twice, but it is also less reliable, because if data corruption happens
+when the machine crashes, it may not be detected.
+
+When loading the target for the first time, the kernel driver will format
+the device. But it will only format the device if the superblock contains
+zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
+target can't be loaded.
+
+Accesses to the on-disk metadata area containing checksums (aka tags) are
+buffered using dm-bufio. When an access to any given metadata area
+occurs, each unique metadata area gets its own buffer(s). The buffer size
+is capped at the size of the metadata area, but may be smaller, thereby
+requiring multiple buffers to represent the full metadata area. A smaller
+buffer size will produce a smaller resulting read/write operation to the
+metadata area for small reads/writes. The metadata is still read even in
+a full write to the data covered by a single buffer.
+
+To use the target for the first time:
+
+1. overwrite the superblock with zeroes
+2. load the dm-integrity target with one-sector size, the kernel driver
+ will format the device
+3. unload the dm-integrity target
+4. read the "provided_data_sectors" value from the superblock
+5. load the dm-integrity target with the target size
+ "provided_data_sectors"
+6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
+ with the size "provided_data_sectors"
+
+
+Target arguments:
+
+1. the underlying block device
+
+2. the number of reserved sector at the beginning of the device - the
+ dm-integrity won't read of write these sectors
+
+3. the size of the integrity tag (if "-" is used, the size is taken from
+ the internal-hash algorithm)
+
+4. mode:
+
+ D - direct writes (without journal)
+ in this mode, journaling is
+ not used and data sectors and integrity tags are written
+ separately. In case of crash, it is possible that the data
+ and integrity tag doesn't match.
+ J - journaled writes
+ data and integrity tags are written to the
+ journal and atomicity is guaranteed. In case of crash,
+ either both data and tag or none of them are written. The
+ journaled mode degrades write throughput twice because the
+ data have to be written twice.
+ B - bitmap mode - data and metadata are written without any
+ synchronization, the driver maintains a bitmap of dirty
+ regions where data and metadata don't match. This mode can
+ only be used with internal hash.
+ R - recovery mode - in this mode, journal is not replayed,
+ checksums are not checked and writes to the device are not
+ allowed. This mode is useful for data recovery if the
+ device cannot be activated in any of the other standard
+ modes.
+ I - inline mode - in this mode, dm-integrity will store integrity
+ data directly in the underlying device sectors.
+ The underlying device must have an integrity profile that
+ allows storing user integrity data and provides enough
+ space for the selected integrity tag.
+
+5. the number of additional arguments
+
+Additional arguments:
+
+journal_sectors:number
+ The size of journal, this argument is used only if formatting the
+ device. If the device is already formatted, the value from the
+ superblock is used.
+
+interleave_sectors:number (default 32768)
+ The number of interleaved sectors. This values is rounded down to
+ a power of two. If the device is already formatted, the value from
+ the superblock is used.
+
+meta_device:device
+ Don't interleave the data and metadata on the device. Use a
+ separate device for metadata.
+
+buffer_sectors:number (default 128)
+ The number of sectors in one metadata buffer. The value is rounded
+ down to a power of two.
+
+journal_watermark:number (default 50)
+ The journal watermark in percents. When the size of the journal
+ exceeds this watermark, the thread that flushes the journal will
+ be started.
+
+commit_time:number (default 10000)
+ Commit time in milliseconds. When this time passes, the journal is
+ written. The journal is also written immediately if the FLUSH
+ request is received.
+
+internal_hash:algorithm(:key) (the key is optional)
+ Use internal hash or crc.
+ When this argument is used, the dm-integrity target won't accept
+ integrity tags from the upper target, but it will automatically
+ generate and verify the integrity tags.
+
+ You can use a crc algorithm (such as crc32), then integrity target
+ will protect the data against accidental corruption.
+ You can also use a hmac algorithm (for example
+ "hmac(sha256):0123456789abcdef"), in this mode it will provide
+ cryptographic authentication of the data without encryption.
+
+ When this argument is not used, the integrity tags are accepted
+ from an upper layer target, such as dm-crypt. The upper layer
+ target should check the validity of the integrity tags.
+
+recalculate
+ Recalculate the integrity tags automatically. It is only valid
+ when using internal hash.
+
+journal_crypt:algorithm(:key) (the key is optional)
+ Encrypt the journal using given algorithm to make sure that the
+ attacker can't read the journal. You can use a block cipher here
+ (such as "cbc(aes)") or a stream cipher (for example "chacha20"
+ or "ctr(aes)").
+
+ The journal contains history of last writes to the block device,
+ an attacker reading the journal could see the last sector numbers
+ that were written. From the sector numbers, the attacker can infer
+ the size of files that were written. To protect against this
+ situation, you can encrypt the journal.
+
+journal_mac:algorithm(:key) (the key is optional)
+ Protect sector numbers in the journal from accidental or malicious
+ modification. To protect against accidental modification, use a
+ crc algorithm, to protect against malicious modification, use a
+ hmac algorithm with a key.
+
+ This option is not needed when using internal-hash because in this
+ mode, the integrity of journal entries is checked when replaying
+ the journal. Thus, modified sector number would be detected at
+ this stage.
+
+block_size:number (default 512)
+ The size of a data block in bytes. The larger the block size the
+ less overhead there is for per-block integrity metadata.
+ Supported values are 512, 1024, 2048 and 4096 bytes.
+
+sectors_per_bit:number
+ In the bitmap mode, this parameter specifies the number of
+ 512-byte sectors that corresponds to one bitmap bit.
+
+bitmap_flush_interval:number
+ The bitmap flush interval in milliseconds. The metadata buffers
+ are synchronized when this interval expires.
+
+allow_discards
+ Allow block discard requests (a.k.a. TRIM) for the integrity device.
+ Discards are only allowed to devices using internal hash.
+
+fix_padding
+ Use a smaller padding of the tag area that is more
+ space-efficient. If this option is not present, large padding is
+ used - that is for compatibility with older kernels.
+
+fix_hmac
+ Improve security of internal_hash and journal_mac:
+
+ - the section number is mixed to the mac, so that an attacker can't
+ copy sectors from one journal section to another journal section
+ - the superblock is protected by journal_mac
+ - a 16-byte salt stored in the superblock is mixed to the mac, so
+ that the attacker can't detect that two disks have the same hmac
+ key and also to disallow the attacker to move sectors from one
+ disk to another
+
+legacy_recalculate
+ Allow recalculating of volumes with HMAC keys. This is disabled by
+ default for security reasons - an attacker could modify the volume,
+ set recalc_sector to zero, and the kernel would not detect the
+ modification.
+
+The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and
+allow_discards can be changed when reloading the target (load an inactive
+table and swap the tables with suspend and resume). The other arguments
+should not be changed when reloading the target because the layout of disk
+data depend on them and the reloaded target would be non-functional.
+
+For example, on a device using the default interleave_sectors of 32768, a
+block_size of 512, and an internal_hash of crc32c with a tag size of 4
+bytes, it will take 128 KiB of tags to track a full data area, requiring
+256 sectors of metadata per data area. With the default buffer_sectors of
+128, that means there will be 2 buffers per metadata area, or 2 buffers
+per 16 MiB of data.
+
+Status line:
+
+1. the number of integrity mismatches
+2. provided data sectors - that is the number of sectors that the user
+ could use
+3. the current recalculating position (or '-' if we didn't recalculate)
+
+
+The layout of the formatted block device:
+
+* reserved sectors
+ (they are not used by this target, they can be used for
+ storing LUKS metadata or for other purpose), the size of the reserved
+ area is specified in the target arguments
+
+* superblock (4kiB)
+ * magic string - identifies that the device was formatted
+ * version
+ * log2(interleave sectors)
+ * integrity tag size
+ * the number of journal sections
+ * provided data sectors - the number of sectors that this target
+ provides (i.e. the size of the device minus the size of all
+ metadata and padding). The user of this target should not send
+ bios that access data beyond the "provided data sectors" limit.
+ * flags
+ SB_FLAG_HAVE_JOURNAL_MAC
+ - a flag is set if journal_mac is used
+ SB_FLAG_RECALCULATING
+ - recalculating is in progress
+ SB_FLAG_DIRTY_BITMAP
+ - journal area contains the bitmap of dirty
+ blocks
+ * log2(sectors per block)
+ * a position where recalculating finished
+* journal
+ The journal is divided into sections, each section contains:
+
+ * metadata area (4kiB), it contains journal entries
+
+ - every journal entry contains:
+
+ * logical sector (specifies where the data and tag should
+ be written)
+ * last 8 bytes of data
+ * integrity tag (the size is specified in the superblock)
+
+ - every metadata sector ends with
+
+ * mac (8-bytes), all the macs in 8 metadata sectors form a
+ 64-byte value. It is used to store hmac of sector
+ numbers in the journal section, to protect against a
+ possibility that the attacker tampers with sector
+ numbers in the journal.
+ * commit id
+
+ * data area (the size is variable; it depends on how many journal
+ entries fit into the metadata area)
+
+ - every sector in the data area contains:
+
+ * data (504 bytes of data, the last 8 bytes are stored in
+ the journal entry)
+ * commit id
+
+ To test if the whole journal section was written correctly, every
+ 512-byte sector of the journal ends with 8-byte commit id. If the
+ commit id matches on all sectors in a journal section, then it is
+ assumed that the section was written correctly. If the commit id
+ doesn't match, the section was written partially and it should not
+ be replayed.
+
+* one or more runs of interleaved tags and data.
+ Each run contains:
+
+ * tag area - it contains integrity tags. There is one tag for each
+ sector in the data area. The size of this area is always 4KiB or
+ greater.
+ * data area - it contains data sectors. The number of data sectors
+ in one run must be a power of two. log2 of this value is stored
+ in the superblock.
diff --git a/Documentation/admin-guide/device-mapper/dm-io.rst b/Documentation/admin-guide/device-mapper/dm-io.rst
new file mode 100644
index 000000000000..d2492917a1f5
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-io.rst
@@ -0,0 +1,75 @@
+=====
+dm-io
+=====
+
+Dm-io provides synchronous and asynchronous I/O services. There are three
+types of I/O services available, and each type has a sync and an async
+version.
+
+The user must set up an io_region structure to describe the desired location
+of the I/O. Each io_region indicates a block-device along with the starting
+sector and size of the region::
+
+ struct io_region {
+ struct block_device *bdev;
+ sector_t sector;
+ sector_t count;
+ };
+
+Dm-io can read from one io_region or write to one or more io_regions. Writes
+to multiple regions are specified by an array of io_region structures.
+
+The first I/O service type takes a list of memory pages as the data buffer for
+the I/O, along with an offset into the first page::
+
+ struct page_list {
+ struct page_list *next;
+ struct page *page;
+ };
+
+ int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
+ struct page_list *pl, unsigned int offset,
+ unsigned long *error_bits);
+ int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
+ struct page_list *pl, unsigned int offset,
+ io_notify_fn fn, void *context);
+
+The second I/O service type takes an array of bio vectors as the data buffer
+for the I/O. This service can be handy if the caller has a pre-assembled bio,
+but wants to direct different portions of the bio to different devices::
+
+ int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
+ int rw, struct bio_vec *bvec,
+ unsigned long *error_bits);
+ int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
+ int rw, struct bio_vec *bvec,
+ io_notify_fn fn, void *context);
+
+The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
+data buffer for the I/O. This service can be handy if the caller needs to do
+I/O to a large region but doesn't want to allocate a large number of individual
+memory pages::
+
+ int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
+ void *data, unsigned long *error_bits);
+ int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
+ void *data, io_notify_fn fn, void *context);
+
+Callers of the asynchronous I/O services must include the name of a completion
+callback routine and a pointer to some context data for the I/O::
+
+ typedef void (*io_notify_fn)(unsigned long error, void *context);
+
+The "error" parameter in this callback, as well as the `*error` parameter in
+all of the synchronous versions, is a bitset (instead of a simple error value).
+In the case of an write-I/O to multiple regions, this bitset allows dm-io to
+indicate success or failure on each individual region.
+
+Before using any of the dm-io services, the user should call dm_io_get()
+and specify the number of pages they expect to perform I/O on concurrently.
+Dm-io will attempt to resize its mempool to make sure enough pages are
+always available in order to avoid unnecessary waiting while performing I/O.
+
+When the user is finished using the dm-io services, they should call
+dm_io_put() and specify the same number of pages that were given on the
+dm_io_get() call.
diff --git a/Documentation/admin-guide/device-mapper/dm-log.rst b/Documentation/admin-guide/device-mapper/dm-log.rst
new file mode 100644
index 000000000000..ba4fce39bc27
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-log.rst
@@ -0,0 +1,57 @@
+=====================
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered. In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different
+logging implementations are available and provide different
+capabilities. The list includes:
+
+============== ==============================================================
+Type Files
+============== ==============================================================
+disk drivers/md/dm-log.c
+core drivers/md/dm-log.c
+userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+============== ==============================================================
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk. This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory. The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance. This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there. This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h. Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered-disk" and "clustered-core". These implementations
+provide a cluster-coherent log for shared-storage. Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/Documentation/admin-guide/device-mapper/dm-pcache.rst b/Documentation/admin-guide/device-mapper/dm-pcache.rst
new file mode 100644
index 000000000000..09d327ef4b14
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-pcache.rst
@@ -0,0 +1,202 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
+dm-pcache — Persistent Cache
+=================================
+
+*Author: Dongsheng Yang <dongsheng.yang@linux.dev>*
+
+This document describes *dm-pcache*, a Device-Mapper target that lets a
+byte-addressable *DAX* (persistent-memory, “pmem”) region act as a
+high-performance, crash-persistent cache in front of a slower block
+device. The code lives in `drivers/md/dm-pcache/`.
+
+Quick feature summary
+=====================
+
+* *Write-back* caching (only mode currently supported).
+* *16 MiB segments* allocated on the pmem device.
+* *Data CRC32* verification (optional, per cache).
+* Crash-safe: every metadata structure is duplicated (`PCACHE_META_INDEX_MAX
+ == 2`) and protected with CRC+sequence numbers.
+* *Multi-tree indexing* (indexing trees sharded by logical address) for high PMem parallelism
+* Pure *DAX path* I/O – no extra BIO round-trips
+* *Log-structured write-back* that preserves backend crash-consistency
+
+
+Constructor
+===========
+
+::
+
+ pcache <cache_dev> <backing_dev> [<number_of_optional_arguments> <cache_mode writeback> <data_crc true|false>]
+
+========================= ====================================================
+``cache_dev`` Any DAX-capable block device (``/dev/pmem0``…).
+ All metadata *and* cached blocks are stored here.
+
+``backing_dev`` The slow block device to be cached.
+
+``cache_mode`` Optional, Only ``writeback`` is accepted at the
+ moment.
+
+``data_crc`` Optional, default to ``false``
+
+ * ``true`` – store CRC32 for every cached entry
+ and verify on reads
+ * ``false`` – skip CRC (faster)
+========================= ====================================================
+
+Example
+-------
+
+.. code-block:: shell
+
+ dmsetup create pcache_sdb --table \
+ "0 $(blockdev --getsz /dev/sdb) pcache /dev/pmem0 /dev/sdb 4 cache_mode writeback data_crc true"
+
+The first time a pmem device is used, dm-pcache formats it automatically
+(super-block, cache_info, etc.).
+
+
+Status line
+===========
+
+``dmsetup status <device>`` (``STATUSTYPE_INFO``) prints:
+
+::
+
+ <sb_flags> <seg_total> <cache_segs> <segs_used> \
+ <gc_percent> <cache_flags> \
+ <key_head_seg>:<key_head_off> \
+ <dirty_tail_seg>:<dirty_tail_off> \
+ <key_tail_seg>:<key_tail_off>
+
+Field meanings
+--------------
+
+=============================== =============================================
+``sb_flags`` Super-block flags (e.g. endian marker).
+
+``seg_total`` Number of physical *pmem* segments.
+
+``cache_segs`` Number of segments used for cache.
+
+``segs_used`` Segments currently allocated (bitmap weight).
+
+``gc_percent`` Current GC high-water mark (0-90).
+
+``cache_flags`` Bit 0 – DATA_CRC enabled
+ Bit 1 – INIT_DONE (cache initialised)
+ Bits 2-5 – cache mode (0 == WB).
+
+``key_head`` Where new key-sets are being written.
+
+``dirty_tail`` First dirty key-set that still needs
+ write-back to the backing device.
+
+``key_tail`` First key-set that may be reclaimed by GC.
+=============================== =============================================
+
+
+Messages
+========
+
+*Change GC trigger*
+
+::
+
+ dmsetup message <dev> 0 gc_percent <0-90>
+
+
+Theory of operation
+===================
+
+Sub-devices
+-----------
+
+==================== =========================================================
+backing_dev Any block device (SSD/HDD/loop/LVM, etc.).
+cache_dev DAX device; must expose direct-access memory.
+==================== =========================================================
+
+Segments and key-sets
+---------------------
+
+* The pmem space is divided into *16 MiB segments*.
+* Each write allocates space from a per-CPU *data_head* inside a segment.
+* A *cache-key* records a logical range on the origin and where it lives
+ inside pmem (segment + offset + generation).
+* 128 keys form a *key-set* (kset); ksets are written sequentially in pmem
+ and are themselves crash-safe (CRC).
+* The pair *(key_tail, dirty_tail)* delimit clean/dirty and live/dead ksets.
+
+Write-back
+----------
+
+Dirty keys are queued into a tree; a background worker copies data
+back to the backing_dev and advances *dirty_tail*. A FLUSH/FUA bio from the
+upper layers forces an immediate metadata commit.
+
+Garbage collection
+------------------
+
+GC starts when ``segs_used >= seg_total * gc_percent / 100``. It walks
+from *key_tail*, frees segments whose every key has been invalidated, and
+advances *key_tail*.
+
+CRC verification
+----------------
+
+If ``data_crc is enabled`` dm-pcache computes a CRC32 over every cached data
+range when it is inserted and stores it in the on-media key. Reads
+validate the CRC before copying to the caller.
+
+
+Failure handling
+================
+
+* *pmem media errors* – all metadata copies are read with
+ ``copy_mc_to_kernel``; an uncorrectable error logs and aborts initialisation.
+* *Cache full* – if no free segment can be found, writes return ``-EBUSY``;
+ dm-pcache retries internally (request deferral).
+* *System crash* – on attach, the driver replays ksets from *key_tail* to
+ rebuild the in-core trees; every segment’s generation guards against
+ use-after-free keys.
+
+
+Limitations & TODO
+==================
+
+* Only *write-back* mode; other modes planned.
+* Only FIFO cache invalidate; other (LRU, ARC...) planned.
+* Table reload is not supported currently.
+* Discard planned.
+
+
+Example workflow
+================
+
+.. code-block:: shell
+
+ # 1. Create devices
+ dmsetup create pcache_sdb --table \
+ "0 $(blockdev --getsz /dev/sdb) pcache /dev/pmem0 /dev/sdb 4 cache_mode writeback data_crc true"
+
+ # 2. Put a filesystem on top
+ mkfs.ext4 /dev/mapper/pcache_sdb
+ mount /dev/mapper/pcache_sdb /mnt
+
+ # 3. Tune GC threshold to 80 %
+ dmsetup message pcache_sdb 0 gc_percent 80
+
+ # 4. Observe status
+ watch -n1 'dmsetup status pcache_sdb'
+
+ # 5. Shutdown
+ umount /mnt
+ dmsetup remove pcache_sdb
+
+
+``dm-pcache`` is under active development; feedback, bug reports and patches
+are very welcome!
diff --git a/Documentation/admin-guide/device-mapper/dm-queue-length.rst b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
new file mode 100644
index 000000000000..d8e381c1cb02
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-queue-length.rst
@@ -0,0 +1,48 @@
+===============
+dm-queue-length
+===============
+
+dm-queue-length is a path selector module for device-mapper targets,
+which selects a path with the least number of in-flight I/Os.
+The path selector name is 'queue-length'.
+
+Table parameters for each path: [<repeat_count>]
+
+::
+
+ <repeat_count>: The number of I/Os to dispatch using the selected
+ path before switching to the next path.
+ If not given, internal default is used. To check
+ the default value, see the activated table.
+
+Status for each path: <status> <fail-count> <in-flight>
+
+::
+
+ <status>: 'A' if the path is active, 'F' if the path is failed.
+ <fail-count>: The number of path failures.
+ <in-flight>: The number of in-flight I/Os on the path.
+
+
+Algorithm
+=========
+
+dm-queue-length increments/decrements 'in-flight' when an I/O is
+dispatched/completed respectively.
+dm-queue-length selects a path with the minimum 'in-flight'.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128.
+
+::
+
+ # echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
+ dmsetup create test
+ #
+ # dmsetup table
+ test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
+ #
+ # dmsetup status
+ test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
new file mode 100644
index 000000000000..bb17e26e3c1b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
@@ -0,0 +1,423 @@
+=======
+dm-raid
+=======
+
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
+
+
+Mapping Table Interface
+-----------------------
+The target is named "raid" and it accepts the following parameters::
+
+ <raid_type> <#raid_params> <raid_params> \
+ <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+
+ ============= ===============================================================
+ raid0 RAID0 striping (no resilience)
+ raid1 RAID1 mirroring
+ raid4 RAID4 with dedicated last parity disk
+ raid5_n RAID5 with dedicated last parity disk supporting takeover
+ Same as raid4
+
+ - Transitory layout
+ raid5_la RAID5 left asymmetric
+
+ - rotating parity 0 with data continuation
+ raid5_ra RAID5 right asymmetric
+
+ - rotating parity N with data continuation
+ raid5_ls RAID5 left symmetric
+
+ - rotating parity 0 with data restart
+ raid5_rs RAID5 right symmetric
+
+ - rotating parity N with data restart
+ raid6_zr RAID6 zero restart
+
+ - rotating parity zero (left-to-right) with data restart
+ raid6_nr RAID6 N restart
+
+ - rotating parity N (right-to-left) with data restart
+ raid6_nc RAID6 N continue
+
+ - rotating parity N (right-to-left) with data continuation
+ raid6_n_6 RAID6 with dedicate parity disks
+
+ - parity and Q-syndrome on the last 2 disks;
+ layout for takeover from/to raid4/raid5_n
+ raid6_la_6 Same as "raid_la" plus dedicated last Q-syndrome disk
+
+ - layout for takeover from raid5_la from/to raid6
+ raid6_ra_6 Same as "raid5_ra" dedicated last Q-syndrome disk
+
+ - layout for takeover from raid5_ra from/to raid6
+ raid6_ls_6 Same as "raid5_ls" dedicated last Q-syndrome disk
+
+ - layout for takeover from raid5_ls from/to raid6
+ raid6_rs_6 Same as "raid5_rs" dedicated last Q-syndrome disk
+
+ - layout for takeover from raid5_rs from/to raid6
+ raid10 Various RAID10 inspired algorithms chosen by additional params
+ (see raid10_format and raid10_copies below)
+
+ - RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
+ - RAID1E: Integrated Adjacent Stripe Mirroring
+ - RAID1E: Integrated Offset Stripe Mirroring
+ - and other similar RAID10 variants
+ ============= ===============================================================
+
+ Reference: Chapter 4 of
+ https://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+
+ Mandatory parameters:
+ <chunk_size>:
+ Chunk size in sectors. This parameter is often known as
+ "stripe size". It is the only mandatory parameter and
+ is placed first.
+
+ followed by optional parameters (in any order):
+ [sync|nosync]
+ Force or prevent RAID initialization.
+
+ [rebuild <idx>]
+ Rebuild drive number 'idx' (first drive is 0).
+
+ [daemon_sleep <ms>]
+ Interval between runs of the bitmap daemon that
+ clear bits. A longer interval means less bitmap I/O but
+ resyncing after a failure is likely to take longer.
+
+ [min_recovery_rate <kB/sec/disk>]
+ Throttle RAID initialization
+ [max_recovery_rate <kB/sec/disk>]
+ Throttle RAID initialization
+ [write_mostly <idx>]
+ Mark drive index 'idx' write-mostly.
+ [max_write_behind <sectors>]
+ See '--write-behind=' (man mdadm)
+ [stripe_cache <sectors>]
+ Stripe cache size (RAID 4/5/6 only)
+ [region_size <sectors>]
+ The region_size multiplied by the number of regions is the
+ logical size of the array. The bitmap records the device
+ synchronisation state for each region.
+
+ [raid10_copies <# copies>], [raid10_format <near|far|offset>]
+ These two options are used to alter the default layout of
+ a RAID10 configuration. The number of copies is can be
+ specified, but the default is 2. There are also three
+ variations to how the copies are laid down - the default
+ is "near". Near copies are what most people think of with
+ respect to mirroring. If these options are left unspecified,
+ or 'raid10_copies 2' and/or 'raid10_format near' are given,
+ then the layouts for 2, 3 and 4 devices are:
+
+ ======== ========== ==============
+ 2 drives 3 drives 4 drives
+ ======== ========== ==============
+ A1 A1 A1 A1 A2 A1 A1 A2 A2
+ A2 A2 A2 A3 A3 A3 A3 A4 A4
+ A3 A3 A4 A4 A5 A5 A5 A6 A6
+ A4 A4 A5 A6 A6 A7 A7 A8 A8
+ .. .. .. .. .. .. .. .. ..
+ ======== ========== ==============
+
+ The 2-device layout is equivalent 2-way RAID1. The 4-device
+ layout is what a traditional RAID10 would look like. The
+ 3-device layout is what might be called a 'RAID1E - Integrated
+ Adjacent Stripe Mirroring'.
+
+ If 'raid10_copies 2' and 'raid10_format far', then the layouts
+ for 2, 3 and 4 devices are:
+
+ ======== ============ ===================
+ 2 drives 3 drives 4 drives
+ ======== ============ ===================
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ .. .. .. .. .. .. .. .. ..
+ A2 A1 A3 A1 A2 A2 A1 A4 A3
+ A4 A3 A6 A4 A5 A6 A5 A8 A7
+ A6 A5 A9 A7 A8 A10 A9 A12 A11
+ .. .. .. .. .. .. .. .. ..
+ ======== ============ ===================
+
+ If 'raid10_copies 2' and 'raid10_format offset', then the
+ layouts for 2, 3 and 4 devices are:
+
+ ======== ========== ================
+ 2 drives 3 drives 4 drives
+ ======== ========== ================
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A2 A1 A3 A1 A2 A2 A1 A4 A3
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A4 A3 A6 A4 A5 A6 A5 A8 A7
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ A6 A5 A9 A7 A8 A10 A9 A12 A11
+ .. .. .. .. .. .. .. .. ..
+ ======== ========== ================
+
+ Here we see layouts closely akin to 'RAID1E - Integrated
+ Offset Stripe Mirroring'.
+
+ [delta_disks <N>]
+ The delta_disks option value (-251 < N < +251) triggers
+ device removal (negative value) or device addition (positive
+ value) to any reshape supporting raid levels 4/5/6 and 10.
+ RAID levels 4/5/6 allow for addition of devices (metadata
+ and data device tuple), raid10_near and raid10_offset only
+ allow for device addition. raid10_far does not support any
+ reshaping at all.
+ A minimum of devices have to be kept to enforce resilience,
+ which is 3 devices for raid4/5 and 4 devices for raid6.
+
+ [data_offset <sectors>]
+ This option value defines the offset into each data device
+ where the data starts. This is used to provide out-of-place
+ reshaping space to avoid writing over data while
+ changing the layout of stripes, hence an interruption/crash
+ may happen at any time without the risk of losing data.
+ E.g. when adding devices to an existing raid set during
+ forward reshaping, the out-of-place space will be allocated
+ at the beginning of each raid device. The kernel raid4/5/6/10
+ MD personalities supporting such device addition will read the data from
+ the existing first stripes (those with smaller number of stripes)
+ starting at data_offset to fill up a new stripe with the larger
+ number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
+ and write that new stripe to offset 0. Same will be applied to all
+ N-1 other new stripes. This out-of-place scheme is used to change
+ the RAID type (i.e. the allocation algorithm) as well, e.g.
+ changing from raid5_ls to raid5_n.
+
+ [journal_dev <dev>]
+ This option adds a journal device to raid4/5/6 raid sets and
+ uses it to close the 'write hole' caused by the non-atomic updates
+ to the component devices which can cause data loss during recovery.
+ The journal device is used as writethrough thus causing writes to
+ be throttled versus non-journaled raid4/5/6 sets.
+ Takeover/reshape is not possible with a raid4/5/6 journal device;
+ it has to be deconfigured before requesting these.
+
+ [journal_mode <mode>]
+ This option sets the caching mode on journaled raid4/5/6 raid sets
+ (see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
+ If 'writeback' is selected the journal device has to be resilient
+ and must not suffer from the 'write hole' problem itself (e.g. use
+ raid1 or raid10) to avoid a single point of failure.
+
+<#raid_devs>: The number of devices composing the array.
+ Each device consists of two entries. The first is the device
+ containing the metadata (if any); the second is the one containing the
+ data. A Maximum of 64 metadata/data device entries are supported
+ up to target version 1.8.0.
+ 1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
+
+ If a drive has failed or is missing at creation time, a '-' can be
+ given for both the metadata and data drives for a given position.
+
+
+Example Tables
+--------------
+
+::
+
+ # RAID4 - 4 data drives, 1 parity (no metadata devices)
+ # No metadata devices specified to hold superblock/bitmap info
+ # Chunk size of 1MiB
+ # (Lines separated for easy reading)
+
+ 0 1960893648 raid \
+ raid4 1 2048 \
+ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+ # RAID4 - 4 data drives, 1 parity (with metadata devices)
+ # Chunk size of 1MiB, force RAID initialization,
+ # min recovery rate at 20 kiB/sec/disk
+
+ 0 1960893648 raid \
+ raid4 4 2048 sync min_recovery_rate 20 \
+ 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
+
+
+Status Output
+-------------
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
+
+
+'dmsetup status' yields information on the state and health of the array.
+The output is as follows (normally a single line, but expanded here for
+clarity)::
+
+ 1: <s> <l> raid \
+ 2: <raid_type> <#devices> <health_chars> \
+ 3: <sync_ratio> <sync_action> <mismatch_cnt>
+
+Line 1 is the standard output produced by device-mapper.
+
+Line 2 & 3 are produced by the raid target and are best explained by example::
+
+ 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
+
+Here we can see the RAID type is raid4, there are 5 devices - all of
+which are 'A'live, and the array is 2/490221568 complete with its initial
+recovery. Here is a fuller description of the individual fields:
+
+ =============== =========================================================
+ <raid_type> Same as the <raid_type> used to create the array.
+ <health_chars> One char for each device, indicating:
+
+ - 'A' = alive and in-sync
+ - 'a' = alive but not in-sync
+ - 'D' = dead/failed.
+ <sync_ratio> The ratio indicating how much of the array has undergone
+ the process described by 'sync_action'. If the
+ 'sync_action' is "check" or "repair", then the process
+ of "resync" or "recover" can be considered complete.
+ <sync_action> One of the following possible states:
+
+ idle
+ - No synchronization action is being performed.
+ frozen
+ - The current action has been halted.
+ resync
+ - Array is undergoing its initial synchronization
+ or is resynchronizing after an unclean shutdown
+ (possibly aided by a bitmap).
+ recover
+ - A device in the array is being rebuilt or
+ replaced.
+ check
+ - A user-initiated full check of the array is
+ being performed. All blocks are read and
+ checked for consistency. The number of
+ discrepancies found are recorded in
+ <mismatch_cnt>. No changes are made to the
+ array by this action.
+ repair
+ - The same as "check", but discrepancies are
+ corrected.
+ reshape
+ - The array is undergoing a reshape.
+ <mismatch_cnt> The number of discrepancies found between mirror copies
+ in RAID1/10 or wrong parity values found in RAID4/5/6.
+ This value is valid only after a "check" of the array
+ is performed. A healthy array has a 'mismatch_cnt' of 0.
+ <data_offset> The current data offset to the start of the user data on
+ each component device of a raid set (see the respective
+ raid parameter to support out-of-place reshaping).
+ <journal_char> - 'A' - active write-through journal device.
+ - 'a' - active write-back journal device.
+ - 'D' - dead journal device.
+ - '-' - no journal device.
+ =============== =========================================================
+
+
+Message Interface
+-----------------
+The dm-raid target will accept certain actions through the 'message' interface.
+('man dmsetup' for more information on the message interface.) These actions
+include:
+
+ ========= ================================================
+ "idle" Halt the current sync action.
+ "frozen" Freeze the current sync action.
+ "resync" Initiate/continue a resync.
+ "recover" Initiate/continue a recover process.
+ "check" Initiate a check (i.e. a "scrub") of the array.
+ "repair" Initiate a repair of the array.
+ ========= ================================================
+
+
+Discard Support
+---------------
+The implementation of discard support among hardware vendors varies.
+When a block is discarded, some storage devices will return zeroes when
+the block is read. These devices set the 'discard_zeroes_data'
+attribute. Other devices will return random data. Confusingly, some
+devices that advertise 'discard_zeroes_data' will not reliably return
+zeroes when discarded blocks are read! Since RAID 4/5/6 uses blocks
+from a number of devices to calculate parity blocks and (for performance
+reasons) relies on 'discard_zeroes_data' being reliable, it is important
+that the devices be consistent. Blocks may be discarded in the middle
+of a RAID 4/5/6 stripe and if subsequent read results are not
+consistent, the parity blocks may be calculated differently at any time;
+making the parity blocks useless for redundancy. It is important to
+understand how your hardware behaves with discards if you are going to
+enable discards with RAID 4/5/6.
+
+Since the behavior of storage devices is unreliable in this respect,
+even when reporting 'discard_zeroes_data', by default RAID 4/5/6
+discard support is disabled -- this ensures data integrity at the
+expense of losing some performance.
+
+Storage devices that properly support 'discard_zeroes_data' are
+increasingly whitelisted in the kernel and can thus be trusted.
+
+For trusted devices, the following dm-raid module parameter can be set
+to safely enable discard support for RAID 4/5/6:
+
+ 'devices_handle_discards_safely'
+
+
+Version History
+---------------
+
+::
+
+ 1.0.0 Initial version. Support for RAID 4/5/6
+ 1.1.0 Added support for RAID 1
+ 1.2.0 Handle creation of arrays that contain failed devices.
+ 1.3.0 Added support for RAID 10
+ 1.3.1 Allow device replacement/rebuild for RAID 10
+ 1.3.2 Fix/improve redundancy checking for RAID10
+ 1.4.0 Non-functional change. Removes arg from mapping function.
+ 1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
+ 1.4.2 Add RAID10 "far" and "offset" algorithm support.
+ 1.5.0 Add message interface to allow manipulation of the sync_action.
+ New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+ 1.5.1 Add ability to restore transiently failed devices on resume.
+ 1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
+ 1.6.0 Add discard support (and devices_handle_discard_safely module param).
+ 1.7.0 Add support for MD RAID0 mappings.
+ 1.8.0 Explicitly check for compatible flags in the superblock metadata
+ and reject to start the raid set if any are set by a newer
+ target version, thus avoiding data corruption on a raid set
+ with a reshape in progress.
+ 1.9.0 Add support for RAID level takeover/reshape/region size
+ and set size reduction.
+ 1.9.1 Fix activation of existing RAID 4/10 mapped devices
+ 1.9.2 Don't emit '- -' on the status table line in case the constructor
+ fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
+ 'D' on the status line. If '- -' is passed into the constructor, emit
+ '- -' on the table line and '-' as the status line health character.
+ 1.10.0 Add support for raid4/5/6 journal device
+ 1.10.1 Fix data corruption on reshape request
+ 1.11.0 Fix table line argument order
+ (wrong raid10_copies/raid10_format sequence)
+ 1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
+ 1.12.1 Fix for MD deadlock between mddev_suspend() and md_write_start() available
+ 1.13.0 Fix dev_health status at end of "recover" (was 'a', now 'A')
+ 1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an
+ state races.
+ 1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen
+ 1.14.0 Fix reshape race on small devices. Fix stripe adding reshape
+ deadlock/potential data corruption. Update superblock when
+ specific devices are requested via rebuild. Fix RAID leg
+ rebuild errors.
+ 1.15.0 Fix size extensions not being synchronized in case of new MD bitmap
+ pages allocated; also fix those not occurring after previous reductions
+ 1.15.1 Fix argument count and arguments for rebuild/write_mostly/journal_(dev|mode)
+ on the status line.
diff --git a/Documentation/admin-guide/device-mapper/dm-service-time.rst b/Documentation/admin-guide/device-mapper/dm-service-time.rst
new file mode 100644
index 000000000000..facf277fc13c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-service-time.rst
@@ -0,0 +1,101 @@
+===============
+dm-service-time
+===============
+
+dm-service-time is a path selector module for device-mapper targets,
+which selects a path with the shortest estimated service time for
+the incoming I/O.
+
+The service time for each path is estimated by dividing the total size
+of in-flight I/Os on a path with the performance value of the path.
+The performance value is a relative throughput value among all paths
+in a path-group, and it can be specified as a table argument.
+
+The path selector name is 'service-time'.
+
+Table parameters for each path:
+
+ [<repeat_count> [<relative_throughput>]]
+ <repeat_count>:
+ The number of I/Os to dispatch using the selected
+ path before switching to the next path.
+ If not given, internal default is used. To check
+ the default value, see the activated table.
+ <relative_throughput>:
+ The relative throughput value of the path
+ among all paths in the path-group.
+ The valid range is 0-100.
+ If not given, minimum value '1' is used.
+ If '0' is given, the path isn't selected while
+ other paths having a positive value are available.
+
+Status for each path:
+
+ <status> <fail-count> <in-flight-size> <relative_throughput>
+ <status>:
+ 'A' if the path is active, 'F' if the path is failed.
+ <fail-count>:
+ The number of path failures.
+ <in-flight-size>:
+ The size of in-flight I/Os on the path.
+ <relative_throughput>:
+ The relative throughput value of the path
+ among all paths in the path-group.
+
+
+Algorithm
+=========
+
+dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
+dispatched and subtracts when completed.
+Basically, dm-service-time selects a path having minimum service time
+which is calculated by::
+
+ ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
+
+However, some optimizations below are used to reduce the calculation
+as much as possible.
+
+ 1. If the paths have the same 'relative_throughput', skip
+ the division and just compare the 'in-flight-size'.
+
+ 2. If the paths have the same 'in-flight-size', skip the division
+ and just compare the 'relative_throughput'.
+
+ 3. If some paths have non-zero 'relative_throughput' and others
+ have zero 'relative_throughput', ignore those paths with zero
+ 'relative_throughput'.
+
+If such optimizations can't be applied, calculate service time, and
+compare service time.
+If calculated service time is equal, the path having maximum
+'relative_throughput' may be better. So compare 'relative_throughput'
+then.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128
+and sda has an average throughput 1GB/s and sdb has 4GB/s,
+'relative_throughput' value may be '1' for sda and '4' for sdb::
+
+ # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
+ dmsetup create test
+ #
+ # dmsetup table
+ test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
+ #
+ # dmsetup status
+ test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
+
+
+Or '2' for sda and '8' for sdb would be also true::
+
+ # echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
+ dmsetup create test
+ #
+ # dmsetup table
+ test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
+ #
+ # dmsetup status
+ test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/admin-guide/device-mapper/dm-uevent.rst b/Documentation/admin-guide/device-mapper/dm-uevent.rst
new file mode 100644
index 000000000000..4a8ee8d069c9
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-uevent.rst
@@ -0,0 +1,110 @@
+====================
+device-mapper uevent
+====================
+
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents). Previously device-mapper events were only
+available through the ioctl interface. The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events. The first function
+listed creates the event and the second function sends the event(s)::
+
+ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+ const char *path, unsigned nr_valid_paths)
+
+ void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description:
+:Value: Device-mapper specific action that caused the uevent action.
+ PATH_FAILED - A path has failed;
+ PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description: A sequence number for this specific device-mapper device.
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Major and minor number of the path device pertaining to this
+ event.
+:Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+--------------------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: unsigned integer
+:Description:
+:Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: Name of the device-mapper device.
+:Value: Name
+
+Variable Name: DM_UUID
+----------------------
+:Uevent Action(s): KOBJ_CHANGE
+:Type: string
+:Description: UUID of the device-mapper device.
+:Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below
+
+1.) Path failure::
+
+ UEVENT[1192521009.711215] change@/block/dm-3
+ ACTION=change
+ DEVPATH=/block/dm-3
+ SUBSYSTEM=block
+ DM_TARGET=multipath
+ DM_ACTION=PATH_FAILED
+ DM_SEQNUM=1
+ DM_PATH=8:32
+ DM_NR_VALID_PATHS=0
+ DM_NAME=mpath2
+ DM_UUID=mpath-35333333000002328
+ MINOR=3
+ MAJOR=253
+ SEQNUM=1130
+
+2.) Path reinstate::
+
+ UEVENT[1192521132.989927] change@/block/dm-3
+ ACTION=change
+ DEVPATH=/block/dm-3
+ SUBSYSTEM=block
+ DM_TARGET=multipath
+ DM_ACTION=PATH_REINSTATED
+ DM_SEQNUM=2
+ DM_PATH=8:32
+ DM_NR_VALID_PATHS=1
+ DM_NAME=mpath2
+ DM_UUID=mpath-35333333000002328
+ MINOR=3
+ MAJOR=253
+ SEQNUM=1131
diff --git a/Documentation/admin-guide/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst
new file mode 100644
index 000000000000..932383fe6e88
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst
@@ -0,0 +1,194 @@
+========
+dm-zoned
+========
+
+The dm-zoned device mapper target exposes a zoned block device (ZBC and
+ZAC compliant devices) as a regular block device without any write
+pattern constraints. In effect, it implements a drive-managed zoned
+block device which hides from the user (a file system or an application
+doing raw block device accesses) the sequential write constraints of
+host-managed zoned block devices and can mitigate the potential
+device-side performance degradation due to excessive random writes on
+host-aware zoned block devices.
+
+For a more detailed description of the zoned block device models and
+their constraints see (for SCSI devices):
+
+https://www.t10.org/drafts.htm#ZBC_Family
+
+and (for ATA devices):
+
+http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
+
+The dm-zoned implementation is simple and minimizes system overhead (CPU
+and memory usage as well as storage capacity loss). For a 10TB
+host-managed disk with 256 MB zones, dm-zoned memory usage per disk
+instance is at most 4.5 MB and as little as 5 zones will be used
+internally for storing metadata and performing reclaim operations.
+
+dm-zoned target devices are formatted and checked using the dmzadm
+utility available at:
+
+https://github.com/hgst/dm-zoned-tools
+
+Algorithm
+=========
+
+dm-zoned implements an on-disk buffering scheme to handle non-sequential
+write accesses to the sequential zones of a zoned block device.
+Conventional zones are used for caching as well as for storing internal
+metadata. It can also use a regular block device together with the zoned
+block device; in that case the regular block device will be split logically
+in zones with the same size as the zoned block device. These zones will be
+placed in front of the zones from the zoned block device and will be handled
+just like conventional zones.
+
+The zones of the device(s) are separated into 2 types:
+
+1) Metadata zones: these are conventional zones used to store metadata.
+Metadata zones are not reported as usable capacity to the user.
+
+2) Data zones: all remaining zones, the vast majority of which will be
+sequential zones used exclusively to store user data. The conventional
+zones of the device may be used also for buffering user random writes.
+Data in these zones may be directly mapped to the conventional zone, but
+later moved to a sequential zone so that the conventional zone can be
+reused for buffering incoming random writes.
+
+dm-zoned exposes a logical device with a sector size of 4096 bytes,
+irrespective of the physical sector size of the backend zoned block
+device being used. This allows reducing the amount of metadata needed to
+manage valid blocks (blocks written).
+
+The on-disk metadata format is as follows:
+
+1) The first block of the first conventional zone found contains the
+super block which describes the on disk amount and position of metadata
+blocks.
+
+2) Following the super block, a set of blocks is used to describe the
+mapping of the logical device blocks. The mapping is done per chunk of
+blocks, with the chunk size equal to the zoned block device size. The
+mapping table is indexed by chunk number and each mapping entry
+indicates the zone number of the device storing the chunk of data. Each
+mapping entry may also indicate if the zone number of a conventional
+zone used to buffer random modification to the data zone.
+
+3) A set of blocks used to store bitmaps indicating the validity of
+blocks in the data zones follows the mapping table. A valid block is
+defined as a block that was written and not discarded. For a buffered
+data chunk, a block is always valid only in the data zone mapping the
+chunk or in the buffer zone of the chunk.
+
+For a logical chunk mapped to a conventional zone, all write operations
+are processed by directly writing to the zone. If the mapping zone is a
+sequential zone, the write operation is processed directly only if the
+write offset within the logical chunk is equal to the write pointer
+offset within of the sequential data zone (i.e. the write operation is
+aligned on the zone write pointer). Otherwise, write operations are
+processed indirectly using a buffer zone. In that case, an unused
+conventional zone is allocated and assigned to the chunk being
+accessed. Writing a block to the buffer zone of a chunk will
+automatically invalidate the same block in the sequential zone mapping
+the chunk. If all blocks of the sequential zone become invalid, the zone
+is freed and the chunk buffer zone becomes the primary zone mapping the
+chunk, resulting in native random write performance similar to a regular
+block device.
+
+Read operations are processed according to the block validity
+information provided by the bitmaps. Valid blocks are read either from
+the sequential zone mapping a chunk, or if the chunk is buffered, from
+the buffer zone assigned. If the accessed chunk has no mapping, or the
+accessed blocks are invalid, the read buffer is zeroed and the read
+operation terminated.
+
+After some time, the limited number of conventional zones available may
+be exhausted (all used to map chunks or buffer sequential zones) and
+unaligned writes to unbuffered chunks become impossible. To avoid this
+situation, a reclaim process regularly scans used conventional zones and
+tries to reclaim the least recently used zones by copying the valid
+blocks of the buffer zone to a free sequential zone. Once the copy
+completes, the chunk mapping is updated to point to the sequential zone
+and the buffer zone freed for reuse.
+
+Metadata Protection
+===================
+
+To protect metadata against corruption in case of sudden power loss or
+system crash, 2 sets of metadata zones are used. One set, the primary
+set, is used as the main metadata region, while the secondary set is
+used as a staging area. Modified metadata is first written to the
+secondary set and validated by updating the super block in the secondary
+set, a generation counter is used to indicate that this set contains the
+newest metadata. Once this operation completes, in place of metadata
+block updates can be done in the primary metadata set. This ensures that
+one of the set is always consistent (all modifications committed or none
+at all). Flush operations are used as a commit point. Upon reception of
+a flush request, metadata modification activity is temporarily blocked
+(for both incoming BIO processing and reclaim process) and all dirty
+metadata blocks are staged and updated. Normal operation is then
+resumed. Flushing metadata thus only temporarily delays write and
+discard requests. Read requests can be processed concurrently while
+metadata flush is being executed.
+
+If a regular device is used in conjunction with the zoned block device,
+a third set of metadata (without the zone bitmaps) is written to the
+start of the zoned block device. This metadata has a generation counter of
+'0' and will never be updated during normal operation; it just serves for
+identification purposes. The first and second copy of the metadata
+are located at the start of the regular block device.
+
+Usage
+=====
+
+A zoned block device must first be formatted using the dmzadm tool. This
+will analyze the device zone configuration, determine where to place the
+metadata sets on the device and initialize the metadata sets.
+
+Ex::
+
+ dmzadm --format /dev/sdxx
+
+
+If two drives are to be used, both devices must be specified, with the
+regular block device as the first device.
+
+Ex::
+
+ dmzadm --format /dev/sdxx /dev/sdyy
+
+
+Formatted device(s) can be started with the dmzadm utility, too.:
+
+Ex::
+
+ dmzadm --start /dev/sdxx /dev/sdyy
+
+
+Information about the internal layout and current usage of the zones can
+be obtained with the 'status' callback from dmsetup:
+
+Ex::
+
+ dmsetup status /dev/dm-X
+
+will return a line
+
+ 0 <size> zoned <nr_zones> zones <nr_unmap_rnd>/<nr_rnd> random <nr_unmap_seq>/<nr_seq> sequential
+
+where <nr_zones> is the total number of zones, <nr_unmap_rnd> is the number
+of unmapped (ie free) random zones, <nr_rnd> the total number of zones,
+<nr_unmap_seq> the number of unmapped sequential zones, and <nr_seq> the
+total number of sequential zones.
+
+Normally the reclaim process will be started once there are less than 50
+percent free random zones. In order to start the reclaim process manually
+even before reaching this threshold the 'dmsetup message' function can be
+used:
+
+Ex::
+
+ dmsetup message /dev/dm-X 0 reclaim
+
+will start the reclaim process and random zones will be moved to sequential
+zones.
diff --git a/Documentation/admin-guide/device-mapper/era.rst b/Documentation/admin-guide/device-mapper/era.rst
new file mode 100644
index 000000000000..90dd5c670b9f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/era.rst
@@ -0,0 +1,116 @@
+======
+dm-era
+======
+
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target. In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'. Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+era <metadata dev> <origin dev> <block size>
+
+ ================ ======================================================
+ metadata dev fast device holding the persistent metadata
+ origin dev device holding data blocks that may change
+ block size block size of origin data device, granularity that is
+ tracked by the target
+ ================ ======================================================
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era. You shouldn't assume the era has
+incremented. After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+========================= ==============================================
+metadata block size Fixed block size for each metadata block in
+ sectors
+#used metadata blocks Number of metadata blocks used
+#total metadata blocks Total number of metadata blocks
+current era The current era
+held metadata root The location, in blocks, of the metadata root
+ that has been 'held' for userspace read
+ access. '-' indicates there is no held root
+========================= ==============================================
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+ associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+ by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era. It also
+has a spare bitset ready for switching over to a new era. Other than
+that it uses a few 4k blocks for updating metadata::
+
+ (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed. As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+ https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
new file mode 100644
index 000000000000..f1c1f4b824ba
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -0,0 +1,49 @@
+=============
+Device Mapper
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ cache-policies
+ cache
+ delay
+ dm-clone
+ dm-crypt
+ dm-dust
+ dm-ebs
+ dm-flakey
+ dm-ima
+ dm-init
+ dm-integrity
+ dm-io
+ dm-log
+ dm-pcache
+ dm-queue-length
+ dm-raid
+ dm-service-time
+ dm-uevent
+ dm-zoned
+ era
+ kcopyd
+ linear
+ log-writes
+ persistent-data
+ snapshot
+ statistics
+ striped
+ switch
+ thin-provisioning
+ unstriped
+ vdo-design
+ vdo
+ verity
+ writecache
+ zero
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/device-mapper/kcopyd.rst b/Documentation/admin-guide/device-mapper/kcopyd.rst
new file mode 100644
index 000000000000..7651d395127f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/kcopyd.rst
@@ -0,0 +1,47 @@
+======
+kcopyd
+======
+
+Kcopyd provides the ability to copy a range of sectors from one block-device
+to one or more other block-devices, with an asynchronous completion
+notification. It is used by dm-snapshot and dm-mirror.
+
+Users of kcopyd must first create a client and indicate how many memory pages
+to set aside for their copy jobs. This is done with a call to
+kcopyd_client_create()::
+
+ int kcopyd_client_create(unsigned int num_pages,
+ struct kcopyd_client **result);
+
+To start a copy job, the user must set up io_region structures to describe
+the source and destinations of the copy. Each io_region indicates a
+block-device along with the starting sector and size of the region. The source
+of the copy is given as one io_region structure, and the destinations of the
+copy are given as an array of io_region structures::
+
+ struct io_region {
+ struct block_device *bdev;
+ sector_t sector;
+ sector_t count;
+ };
+
+To start the copy, the user calls kcopyd_copy(), passing in the client
+pointer, pointers to the source and destination io_regions, the name of a
+completion callback routine, and a pointer to some context data for the copy::
+
+ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+ unsigned int num_dests, struct io_region *dests,
+ unsigned int flags, kcopyd_notify_fn fn, void *context);
+
+ typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
+ void *context);
+
+When the copy completes, kcopyd will call the user's completion routine,
+passing back the user's context pointer. It will also indicate if a read or
+write error occurred during the copy.
+
+When a user is done with all their copy jobs, they should call
+kcopyd_client_destroy() to delete the kcopyd client, which will release the
+associated memory pages::
+
+ void kcopyd_client_destroy(struct kcopyd_client *kc);
diff --git a/Documentation/admin-guide/device-mapper/linear.rst b/Documentation/admin-guide/device-mapper/linear.rst
new file mode 100644
index 000000000000..9d17fc6e64a9
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/linear.rst
@@ -0,0 +1,63 @@
+=========
+dm-linear
+=========
+
+Device-Mapper's "linear" target maps a linear range of the Device-Mapper
+device onto a linear range of another device. This is the basic building
+block of logical volume managers.
+
+Parameters: <dev path> <offset>
+ <dev path>:
+ Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>:
+ Starting sector within the device.
+
+
+Example scripts
+===============
+
+::
+
+ #!/bin/sh
+ # Create an identity mapping for a device
+ echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
+
+::
+
+ #!/bin/sh
+ # Join 2 devices together
+ size1=`blockdev --getsz $1`
+ size2=`blockdev --getsz $2`
+ echo "0 $size1 linear $1 0
+ $size1 $size2 linear $2 0" | dmsetup create joined
+
+::
+
+ #!/usr/bin/perl -w
+ # Split a device into 4M chunks and then join them together in reverse order.
+
+ my $name = "reverse";
+ my $extent_size = 4 * 1024 * 2;
+ my $dev = $ARGV[0];
+ my $table = "";
+ my $count = 0;
+
+ if (!defined($dev)) {
+ die("Please specify a device.\n");
+ }
+
+ my $dev_size = `blockdev --getsz $dev`;
+ my $extents = int($dev_size / $extent_size) -
+ (($dev_size % $extent_size) ? 1 : 0);
+
+ while ($extents > 0) {
+ my $this_start = $count * $extent_size;
+ $extents--;
+ $count++;
+ my $this_offset = $extents * $extent_size;
+
+ $table .= "$this_start $extent_size linear $dev $this_offset\n";
+ }
+
+ `echo \"$table\" | dmsetup create $name`;
diff --git a/Documentation/admin-guide/device-mapper/log-writes.rst b/Documentation/admin-guide/device-mapper/log-writes.rst
new file mode 100644
index 000000000000..23141f2ffb7c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/log-writes.rst
@@ -0,0 +1,145 @@
+=============
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to. This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log. The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache. This means that normal WRITE requests are not actually logged until the
+next REQ_PREFLUSH request. This is to make it easier for userspace to replay
+the log in a way that correlates to what is on disk and not what is in cache,
+to make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_PREFLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH. Only
+completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures. Consider the
+following example (W means write, C means complete):
+
+ W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following:
+
+ W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request. Consider the following example:
+
+ WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this:
+
+ DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+ log-writes <dev_path> <log_dev_path>
+
+ ============= ==============================================
+ dev_path Device that all of the IO will go to normally.
+ log_dev_path Device where the log entries are written to.
+ ============= ==============================================
+
+ii) Status
+
+ <#logged entries> <highest allocated sector>
+
+ =========================== ========================
+ #logged entries Number of logged entries
+ highest allocated sector Highest allocated sector
+ =========================== ========================
+
+iii) Messages
+
+ mark <description>
+
+ You can use a dmsetup message to set an arbitrary mark in a log.
+ For example say you want to fsck a file system after every
+ write, but first you need to replay up to the mkfs to make sure
+ we're fsck'ing something reasonable, you would do something like
+ this::
+
+ mkfs.btrfs -f /dev/mapper/log
+ dmsetup message log 0 mark mkfs
+ <run test>
+
+ This would allow you to replay the log up to the mkfs mark and
+ then replay from that point on doing the fsck check in the
+ interval that you want.
+
+ Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system. You would do something like
+this::
+
+ TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+ dmsetup create log --table "$TABLE"
+ mkfs.btrfs -f /dev/mapper/log
+ dmsetup message log 0 mark mkfs
+
+ mount /dev/mapper/log /mnt/btrfs-test
+ <some test that does fsync at the end>
+ dmsetup message log 0 mark fsync
+ md5sum /mnt/btrfs-test/foo
+ umount /mnt/btrfs-test
+
+ dmsetup remove log
+ replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+ mount /dev/sdb /mnt/btrfs-test
+ md5sum /mnt/btrfs-test/foo
+ <verify md5sum's are correct>
+
+ Another option is to do a complicated file system operation and verify the file
+ system is consistent during the entire operation. You could do this with:
+
+ TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+ dmsetup create log --table "$TABLE"
+ mkfs.btrfs -f /dev/mapper/log
+ dmsetup message log 0 mark mkfs
+
+ mount /dev/mapper/log /mnt/btrfs-test
+ <fsstress to dirty the fs>
+ btrfs filesystem balance /mnt/btrfs-test
+ umount /mnt/btrfs-test
+ dmsetup remove log
+
+ replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+ btrfsck /dev/sdb
+ replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+ --fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
diff --git a/Documentation/admin-guide/device-mapper/persistent-data.rst b/Documentation/admin-guide/device-mapper/persistent-data.rst
new file mode 100644
index 000000000000..2065c3c5a091
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/persistent-data.rst
@@ -0,0 +1,88 @@
+===============
+Persistent data
+===============
+
+Introduction
+============
+
+The more-sophisticated device-mapper targets require complex metadata
+that is managed in kernel. In late 2010 we were seeing that various
+different targets were rolling their own data structures, for example:
+
+- Mikulas Patocka's multisnap implementation
+- Heinz Mauelshagen's thin provisioning target
+- Another btree-based caching target posted to dm-devel
+- Another multi-snapshot target based on a design of Daniel Phillips
+
+Maintaining these data structures takes a lot of work, so if possible
+we'd like to reduce the number.
+
+The persistent-data library is an attempt to provide a re-usable
+framework for people who want to store metadata in device-mapper
+targets. It's currently used by the thin-provisioning target and an
+upcoming hierarchical storage target.
+
+Overview
+========
+
+The main documentation is in the header files which can all be found
+under drivers/md/persistent-data.
+
+The block manager
+-----------------
+
+dm-block-manager.[hc]
+
+This provides access to the data on disk in fixed sized-blocks. There
+is a read/write locking interface to prevent concurrent accesses, and
+keep data that is being used in the cache.
+
+Clients of persistent-data are unlikely to use this directly.
+
+The transaction manager
+-----------------------
+
+dm-transaction-manager.[hc]
+
+This restricts access to blocks and enforces copy-on-write semantics.
+The only way you can get hold of a writable block through the
+transaction manager is by shadowing an existing block (ie. doing
+copy-on-write) or allocating a fresh one. Shadowing is elided within
+the same transaction so performance is reasonable. The commit method
+ensures that all data is flushed before it writes the superblock.
+On power failure your metadata will be as it was when last committed.
+
+The Space Maps
+--------------
+
+dm-space-map.h
+dm-space-map-metadata.[hc]
+dm-space-map-disk.[hc]
+
+On-disk data structures that keep track of reference counts of blocks.
+Also acts as the allocator of new blocks. Currently two
+implementations: a simpler one for managing blocks on a different
+device (eg. thinly-provisioned data blocks); and one for managing
+the metadata space. The latter is complicated by the need to store
+its own data within the space it's managing.
+
+The data structures
+-------------------
+
+dm-btree.[hc]
+dm-btree-remove.c
+dm-btree-spine.c
+dm-btree-internal.h
+
+Currently there is only one data structure, a hierarchical btree.
+There are plans to add more. For example, something with an
+array-like interface would see a lot of use.
+
+The btree is 'hierarchical' in that you can define it to be composed
+of nested btrees, and take multiple keys. For example, the
+thin-provisioning target uses a btree with two levels of nesting.
+The first maps a device id to a mapping tree, and that in turn maps a
+virtual block to a physical block.
+
+Values stored in the btrees can have arbitrary size. Keys are always
+64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/admin-guide/device-mapper/snapshot.rst b/Documentation/admin-guide/device-mapper/snapshot.rst
new file mode 100644
index 000000000000..ccdd8b587a74
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/snapshot.rst
@@ -0,0 +1,196 @@
+==============================
+Device-mapper snapshot support
+==============================
+
+Device-mapper allows you, without massive data copying:
+
+- To create snapshots of any block device i.e. mountable, saved states of
+ the block device which are also writable without interfering with the
+ original content;
+- To create device "forks", i.e. multiple different versions of the
+ same data stream.
+- To merge a snapshot of a block device back into the snapshot's origin
+ device.
+
+In the first two cases, dm copies only the chunks of data that get
+changed and uses a separate copy-on-write (COW) block device for
+storage.
+
+For snapshot merge the contents of the COW storage are merged back into
+the origin device.
+
+
+There are three dm targets available:
+snapshot, snapshot-origin, and snapshot-merge.
+
+- snapshot-origin <origin>
+
+which will normally have one or more snapshots based on it.
+Reads will be mapped directly to the backing device. For each write, the
+original data will be saved in the <COW device> of each snapshot to keep
+its visible content unchanged, at least until the <COW device> fills up.
+
+
+- snapshot <origin> <COW device> <persistent?> <chunksize>
+ [<# feature args> [<arg>]*]
+
+A snapshot of the <origin> block device is created. Changed chunks of
+<chunksize> sectors will be stored on the <COW device>. Writes will
+only go to the <COW device>. Reads will come from the <COW device> or
+from <origin> for unchanged data. <COW device> will often be
+smaller than the origin and if it fills up the snapshot will become
+useless and be disabled, returning errors. So it is important to monitor
+the amount of free space and expand the <COW device> before it fills up.
+
+<persistent?> is P (Persistent) or N (Not persistent - will not survive
+after reboot). O (Overflow) can be added as a persistent store option
+to allow userspace to advertise its support for seeing "Overflow" in the
+snapshot status. So supported store types are "P", "PO" and "N".
+
+The difference between persistent and transient is with transient
+snapshots less metadata must be saved on disk - they can be kept in
+memory by the kernel.
+
+When loading or unloading the snapshot target, the corresponding
+snapshot-origin or snapshot-merge target must be suspended. A failure to
+suspend the origin target could result in data corruption.
+
+Optional features:
+
+ discard_zeroes_cow - a discard issued to the snapshot device that
+ maps to entire chunks to will zero the corresponding exception(s) in
+ the snapshot's exception store.
+
+ discard_passdown_origin - a discard to the snapshot device is passed
+ down to the snapshot-origin's underlying device. This doesn't cause
+ copy-out to the snapshot exception store because the snapshot-origin
+ target is bypassed.
+
+ The discard_passdown_origin feature depends on the discard_zeroes_cow
+ feature being enabled.
+
+
+- snapshot-merge <origin> <COW device> <persistent> <chunksize>
+ [<# feature args> [<arg>]*]
+
+takes the same table arguments as the snapshot target except it only
+works with persistent snapshots. This target assumes the role of the
+"snapshot-origin" target and must not be loaded if the "snapshot-origin"
+is still present for <origin>.
+
+Creates a merging snapshot that takes control of the changed chunks
+stored in the <COW device> of an existing snapshot, through a handover
+procedure, and merges these chunks back into the <origin>. Once merging
+has started (in the background) the <origin> may be opened and the merge
+will continue while I/O is flowing to it. Changes to the <origin> are
+deferred until the merging snapshot's corresponding chunk(s) have been
+merged. Once merging has started the snapshot device, associated with
+the "snapshot" target, will return -EIO when accessed.
+
+
+How snapshot is used by LVM2
+============================
+When you create the first LVM2 snapshot of a volume, four dm devices are used:
+
+1) a device containing the original mapping table of the source volume;
+2) a device used as the <COW device>;
+3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
+ volume;
+4) the "original" volume (which uses the device number used by the original
+ source volume), whose table is replaced by a "snapshot-origin" mapping
+ from device #1.
+
+A fixed naming scheme is used, so with the following commands::
+
+ lvcreate -L 1G -n base volumeGroup
+ lvcreate -L 100M --snapshot -n snap volumeGroup/base
+
+we'll have this situation (with volumes in above order)::
+
+ # dmsetup table|grep volumeGroup
+
+ volumeGroup-base-real: 0 2097152 linear 8:19 384
+ volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
+ volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
+ volumeGroup-base: 0 2097152 snapshot-origin 254:11
+
+ # ls -lL /dev/mapper/volumeGroup-*
+ brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+ brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
+ brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
+ brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
+
+How snapshot-merge is used by LVM2
+==================================
+A merging snapshot assumes the role of the "snapshot-origin" while
+merging. As such the "snapshot-origin" is replaced with
+"snapshot-merge". The "-real" device is not changed and the "-cow"
+device is renamed to <origin name>-cow to aid LVM2's cleanup of the
+merging snapshot after it completes. The "snapshot" that hands over its
+COW device to the "snapshot-merge" is deactivated (unless using lvchange
+--refresh); but if it is left active it will simply return I/O errors.
+
+A snapshot will merge into its origin with the following command::
+
+ lvconvert --merge volumeGroup/snap
+
+we'll now have this situation::
+
+ # dmsetup table|grep volumeGroup
+
+ volumeGroup-base-real: 0 2097152 linear 8:19 384
+ volumeGroup-base-cow: 0 204800 linear 8:19 2097536
+ volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
+
+ # ls -lL /dev/mapper/volumeGroup-*
+ brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+ brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
+ brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
+
+
+How to determine when a merging is complete
+===========================================
+The snapshot-merge and snapshot status lines end with:
+
+ <sectors_allocated>/<total_sectors> <metadata_sectors>
+
+Both <sectors_allocated> and <total_sectors> include both data and metadata.
+During merging, the number of sectors allocated gets smaller and
+smaller. Merging has finished when the number of sectors holding data
+is zero, in other words <sectors_allocated> == <metadata_sectors>.
+
+Here is a practical example (using a hybrid of lvm and dmsetup commands)::
+
+ # lvs
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup owi-a- 4.00g
+ snap volumeGroup swi-a- 1.00g base 18.97
+
+ # dmsetup status volumeGroup-snap
+ 0 8388608 snapshot 397896/2097152 1560
+ ^^^^ metadata sectors
+
+ # lvconvert --merge -b volumeGroup/snap
+ Merging of volume snap started.
+
+ # lvs volumeGroup/snap
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup Owi-a- 4.00g 17.23
+
+ # dmsetup status volumeGroup-base
+ 0 8388608 snapshot-merge 281688/2097152 1104
+
+ # dmsetup status volumeGroup-base
+ 0 8388608 snapshot-merge 180480/2097152 712
+
+ # dmsetup status volumeGroup-base
+ 0 8388608 snapshot-merge 16/2097152 16
+
+Merging has finished.
+
+::
+
+ # lvs
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup owi-a- 4.00g
diff --git a/Documentation/admin-guide/device-mapper/statistics.rst b/Documentation/admin-guide/device-mapper/statistics.rst
new file mode 100644
index 000000000000..41ded0bc5933
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/statistics.rst
@@ -0,0 +1,225 @@
+=============
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device. If no regions are defined no statistics are
+collected so there isn't any performance impact. Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as `/sys/block/*/stat` or `/proc/diskstats` (see:
+Documentation/admin-guide/iostats.rst). But two extra counters (12 and 13) are
+provided: total time spent reading and writing. When the histogram
+argument is used, the 14th parameter is reported that represents the
+histogram of latencies. All these counters may be accessed by sending
+the @stats_print message to the appropriate DM device via dmsetup.
+
+The reported times are in milliseconds and the granularity depends on
+the kernel ticks. When the option precise_timestamps is used, the
+reported times are in nanoseconds.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created. The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc. Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space. At most, 1/4 of the overall system
+memory may be allocated by DM statistics. The admin can see how much
+memory is used by reading:
+
+ /sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+ @stats_create <range> <step> [<number_of_optional_arguments> <optional_arguments>...] [<program_id> [<aux_data>]]
+ Create a new region and return the region_id.
+
+ <range>
+ "-"
+ whole device
+ "<start_sector>+<length>"
+ a range of <length> 512-byte sectors
+ starting with <start_sector>.
+
+ <step>
+ "<area_size>"
+ the range is subdivided into areas each containing
+ <area_size> sectors.
+ "/<number_of_areas>"
+ the range is subdivided into the specified
+ number of areas.
+
+ <number_of_optional_arguments>
+ The number of optional arguments
+
+ <optional_arguments>
+ The following optional arguments are supported:
+
+ precise_timestamps
+ use precise timer with nanosecond resolution
+ instead of the "jiffies" variable. When this argument is
+ used, the resulting times are in nanoseconds instead of
+ milliseconds. Precise timestamps are a little bit slower
+ to obtain than jiffies-based timestamps.
+ histogram:n1,n2,n3,n4,...
+ collect histogram of latencies. The
+ numbers n1, n2, etc are times that represent the boundaries
+ of the histogram. If precise_timestamps is not used, the
+ times are in milliseconds, otherwise they are in
+ nanoseconds. For each range, the kernel will report the
+ number of requests that completed within this range. For
+ example, if we use "histogram:10,20,30", the kernel will
+ report four numbers a:b:c:d. a is the number of requests
+ that took 0-10 ms to complete, b is the number of requests
+ that took 10-20 ms to complete, c is the number of requests
+ that took 20-30 ms to complete and d is the number of
+ requests that took more than 30 ms to complete.
+
+ <program_id>
+ An optional parameter. A name that uniquely identifies
+ the userspace owner of the range. This groups ranges together
+ so that userspace programs can identify the ranges they
+ created and ignore those created by others.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use it for anything else.
+ If we omit the number of optional arguments, program id must not
+ be a number, otherwise it would be interpreted as the number of
+ optional arguments.
+
+ <aux_data>
+ An optional parameter. A word that provides auxiliary data
+ that is useful to the client program that created the range.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use this value for anything.
+
+ @stats_delete <region_id>
+ Delete the region with the specified id.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_clear <region_id>
+ Clear all the counters except the in-flight i/o counters.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_list [<program_id>]
+ List all regions registered with @stats_create.
+
+ <program_id>
+ An optional parameter.
+ If this parameter is specified, only matching regions
+ are returned.
+ If it is not specified, all regions are returned.
+
+ Output format:
+ <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+ precise_timestamps histogram:n1,n2,n3,...
+
+ The strings "precise_timestamps" and "histogram" are printed only
+ if they were specified when creating the region.
+
+ @stats_print <region_id> [<starting_line> <number_of_lines>]
+ Print counters for each step-sized area of a region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are returned.
+
+ <number_of_lines>
+ The number of lines to include in the output.
+ If omitted, all lines are returned.
+
+ Output format for each step-sized area of a region:
+
+ <start_sector>+<length>
+ counters
+
+ The first 11 counters have the same meaning as
+ `/sys/block/*/stat or /proc/diskstats`.
+
+ Please refer to Documentation/admin-guide/iostats.rst for details.
+
+ 1. the number of reads completed
+ 2. the number of reads merged
+ 3. the number of sectors read
+ 4. the number of milliseconds spent reading
+ 5. the number of writes completed
+ 6. the number of writes merged
+ 7. the number of sectors written
+ 8. the number of milliseconds spent writing
+ 9. the number of I/Os currently in progress
+ 10. the number of milliseconds spent doing I/Os
+ 11. the weighted number of milliseconds spent doing I/Os
+
+ Additional counters:
+
+ 12. the total time spent reading in milliseconds
+ 13. the total time spent writing in milliseconds
+
+ @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+ Atomically print and then clear all the counters except the
+ in-flight i/o counters. Useful when the client consuming the
+ statistics does not want to lose any statistics (those updated
+ between printing and clearing).
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are printed and then cleared.
+
+ <number_of_lines>
+ The number of lines to process.
+ If omitted, all lines are printed and then cleared.
+
+ @stats_set_aux <region_id> <aux_data>
+ Store auxiliary data aux_data for the specified region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <aux_data>
+ The string that identifies data which is useful to the client
+ program that created the range. The kernel returns this
+ string back in the output of @stats_list message, but it
+ doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them::
+
+ dmsetup message vol 0 @stats_create - /100
+
+Set the auxiliary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them)::
+
+ dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics::
+
+ dmsetup message vol 0 @stats_list
+
+Print the statistics::
+
+ dmsetup message vol 0 @stats_print 0
+
+Delete the statistics::
+
+ dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/admin-guide/device-mapper/striped.rst b/Documentation/admin-guide/device-mapper/striped.rst
new file mode 100644
index 000000000000..e9a8da192ae1
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/striped.rst
@@ -0,0 +1,61 @@
+=========
+dm-stripe
+=========
+
+Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
+device across one or more underlying devices. Data is written in "chunks",
+with consecutive chunks rotating among the underlying devices. This can
+potentially provide improved I/O throughput by utilizing several physical
+devices in parallel.
+
+Parameters: <num devs> <chunk size> [<dev path> <offset>]+
+ <num devs>:
+ Number of underlying devices.
+ <chunk size>:
+ Size of each chunk of data. Must be at least as
+ large as the system's PAGE_SIZE.
+ <dev path>:
+ Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>:
+ Starting sector within the device.
+
+One or more underlying devices can be specified. The striped device size must
+be a multiple of the chunk size multiplied by the number of underlying devices.
+
+
+Example scripts
+===============
+
+::
+
+ #!/usr/bin/perl -w
+ # Create a striped device across any number of underlying devices. The device
+ # will be called "stripe_dev" and have a chunk-size of 128k.
+
+ my $chunk_size = 128 * 2;
+ my $dev_name = "stripe_dev";
+ my $num_devs = @ARGV;
+ my @devs = @ARGV;
+ my ($min_dev_size, $stripe_dev_size, $i);
+
+ if (!$num_devs) {
+ die("Specify at least one device\n");
+ }
+
+ $min_dev_size = `blockdev --getsz $devs[0]`;
+ for ($i = 1; $i < $num_devs; $i++) {
+ my $this_size = `blockdev --getsz $devs[$i]`;
+ $min_dev_size = ($min_dev_size < $this_size) ?
+ $min_dev_size : $this_size;
+ }
+
+ $stripe_dev_size = $min_dev_size * $num_devs;
+ $stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
+
+ $table = "0 $stripe_dev_size striped $num_devs $chunk_size";
+ for ($i = 0; $i < $num_devs; $i++) {
+ $table .= " $devs[$i] 0";
+ }
+
+ `echo $table | dmsetup create $dev_name`;
diff --git a/Documentation/admin-guide/device-mapper/switch.rst b/Documentation/admin-guide/device-mapper/switch.rst
new file mode 100644
index 000000000000..7dde06be1a4f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/switch.rst
@@ -0,0 +1,141 @@
+=========
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths. The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture. In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters. When a LUN
+is created it is spread across multiple members. The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used. When iSCSI sessions are created, each
+session is connected to an eth port on a single member. Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required. This
+forwarding is invisible to the initiator. The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators. In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth. An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets. However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+ Upper Tier - Determine which array member the I/O should be sent to.
+ Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths. We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device. This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O. By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us). This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+ <num_paths> <region_size> <num_optional_args> [<optional_args>...] [<dev_path> <offset>]+
+ <num_paths>
+ The number of paths across which to distribute the I/O.
+
+ <region_size>
+ The number of 512-byte sectors in a region. Each region can be redirected
+ to any of the available paths.
+
+ <num_optional_args>
+ The number of optional arguments. Currently, no optional arguments
+ are supported and so this must be zero.
+
+ <dev_path>
+ The block device that represents a specific path to the device.
+
+ <offset>
+ The offset of the start of data on the specific <dev_path> (in units
+ of 512-byte sectors). This number is added to the sector number when
+ forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+ The region number (region size was specified in constructor parameters).
+ If index is omitted, the next region (previous index + 1) is used.
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+ The path number in the range 0 ... (<num_paths> - 1).
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+ This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+ are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+ slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size::
+
+ dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
+ switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1::
+
+ dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command::
+
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+
+is equivalent to::
+
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+ :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
new file mode 100644
index 000000000000..b2fa49a5608a
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
@@ -0,0 +1,427 @@
+=================
+Thin provisioning
+=================
+
+Introduction
+============
+
+This document describes a collection of device-mapper targets that
+between them implement thin-provisioning and snapshots.
+
+The main highlight of this implementation, compared to the previous
+implementation of snapshots, is that it allows many virtual devices to
+be stored on the same data volume. This simplifies administration and
+allows the sharing of data between volumes, thus reducing disk usage.
+
+Another significant feature is support for an arbitrary depth of
+recursive snapshots (snapshots of snapshots of snapshots ...). The
+previous implementation of snapshots did this by chaining together
+lookup tables, and so performance was O(depth). This new
+implementation uses a single data structure to avoid this degradation
+with depth. Fragmentation may still be an issue, however, in some
+scenarios.
+
+Metadata is stored on a separate device from data, giving the
+administrator some freedom, for example to:
+
+- Improve metadata resilience by storing metadata on a mirrored volume
+ but data on a non-mirrored one.
+
+- Improve performance by storing the metadata on SSD.
+
+Status
+======
+
+These targets are considered safe for production use. But different use
+cases will have different performance characteristics, for example due
+to fragmentation of the data volume.
+
+If you find this software is not performing as expected please mail
+dm-devel@redhat.com with details and we'll try our best to improve
+things for you.
+
+Userspace tools for checking and repairing the metadata have been fully
+developed and are available as 'thin_check' and 'thin_repair'. The name
+of the package that provides these utilities varies by distribution (on
+a Red Hat distribution it is named 'device-mapper-persistent-data').
+
+Cookbook
+========
+
+This section describes some quick recipes for using thin provisioning.
+They use the dmsetup program to control the device-mapper driver
+directly. End users will be advised to use a higher-level volume
+manager such as LVM2 once support has been added.
+
+Pool device
+-----------
+
+The pool device ties together the metadata volume and the data volume.
+It maps I/O linearly to the data volume and updates the metadata via
+two mechanisms:
+
+- Function calls from the thin targets
+
+- Device-mapper 'messages' from userspace which control the creation of new
+ virtual devices amongst other things.
+
+Setting up a fresh pool device
+------------------------------
+
+Setting up a pool device requires a valid metadata device, and a
+data device. If you do not have an existing metadata device you can
+make one by zeroing the first 4k to indicate empty metadata.
+
+ dd if=/dev/zero of=$metadata_dev bs=4096 count=1
+
+The amount of metadata you need will vary according to how many blocks
+are shared between thin devices (i.e. through snapshots). If you have
+less sharing than average you'll need a larger-than-average metadata device.
+
+As a guide, we suggest you calculate the number of bytes to use in the
+metadata device as 48 * $data_dev_size / $data_block_size but round it up
+to 2MiB if the answer is smaller. If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
+
+The largest size supported is 16GiB: If the device is larger,
+a warning will be issued and the excess space will not be used.
+
+Reloading a pool table
+----------------------
+
+You may reload a pool's table, indeed this is how the pool is resized
+if it runs out of space. (N.B. While specifying a different metadata
+device when reloading is not forbidden at the moment, things will go
+wrong if it does not route I/O to exactly the same on-disk location as
+previously.)
+
+Using an existing pool device
+-----------------------------
+
+::
+
+ dmsetup create pool \
+ --table "0 20971520 thin-pool $metadata_dev $data_dev \
+ $data_block_size $low_water_mark"
+
+$data_block_size gives the smallest unit of disk space that can be
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a
+multiple of 128 (64KiB). $data_block_size cannot be changed after the
+thin-pool is created. People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KiB). People doing lots of
+snapshotting may want a smaller value such as 128 (64KiB). If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 262144 (128MiB) is suggested.
+
+$low_water_mark is expressed in blocks of size $data_block_size. If
+free space on the data device drops below this level then a dm event
+will be triggered which a userspace daemon should catch allowing it to
+extend the pool device. Only one such event will be sent.
+
+No special event is triggered if a just resumed device's free space is below
+the low water mark. However, resuming a device always triggers an
+event; a userspace daemon should verify that free space exceeds the low
+water mark when handling this event.
+
+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache. If power is lost you may lose some recent
+writes. The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space). If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation. Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed. It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted. Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
+Thin provisioning
+-----------------
+
+i) Creating a new thinly-provisioned volume.
+
+ To create a new thinly- provisioned volume you must send a message to an
+ active pool device, /dev/mapper/pool in this example::
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ Here '0' is an identifier for the volume, a 24-bit number. It's up
+ to the caller to allocate and manage these identifiers. If the
+ identifier is already in use, the message will fail with -EEXIST.
+
+ii) Using a thinly-provisioned volume.
+
+ Thinly-provisioned volumes are activated using the 'thin' target::
+
+ dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
+
+ The last parameter is the identifier for the thinp device.
+
+Internal snapshots
+------------------
+
+i) Creating an internal snapshot.
+
+ Snapshots are created with another message to the pool.
+
+ N.B. If the origin device that you wish to snapshot is active, you
+ must suspend it before creating the snapshot to avoid corruption.
+ This is NOT enforced at the moment, so please be careful!
+
+ ::
+
+ dmsetup suspend /dev/mapper/thin
+ dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
+ dmsetup resume /dev/mapper/thin
+
+ Here '1' is the identifier for the volume, a 24-bit number. '0' is the
+ identifier for the origin device.
+
+ii) Using an internal snapshot.
+
+ Once created, the user doesn't have to worry about any connection
+ between the origin and the snapshot. Indeed the snapshot is no
+ different from any other thinly-provisioned device and can be
+ snapshotted itself via the same method. It's perfectly legal to
+ have only one of them active, and there's no ordering requirement on
+ activating or removing them both. (This differs from conventional
+ device-mapper snapshots.)
+
+ Activate it exactly the same way as any other thinly-provisioned volume::
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+
+External snapshots
+------------------
+
+You can use an external **read only** device as an origin for a
+thinly-provisioned volume. Any read to an unprovisioned area of the
+thin device will be passed through to the origin. Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+ This is the same as creating a thin device.
+ You don't mention the origin at this stage.
+
+ ::
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+ Append an extra parameter to the thin target specifying the origin::
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+ N.B. All descendants (internal snapshots) of this snapshot require the
+ same extra origin parameter.
+
+Deactivation
+------------
+
+All devices using a pool must be deactivated before the pool itself
+can be.
+
+::
+
+ dmsetup remove thin
+ dmsetup remove snap
+ dmsetup remove pool
+
+Reference
+=========
+
+'thin-pool' target
+------------------
+
+i) Constructor
+
+ ::
+
+ thin-pool <metadata dev> <data dev> <data block size (sectors)> \
+ <low water mark (blocks)> [<number of feature args> [<arg>]*]
+
+ Optional feature arguments:
+
+ skip_block_zeroing:
+ Skip the zeroing of newly-provisioned blocks.
+
+ ignore_discard:
+ Disable discard support.
+
+ no_discard_passdown:
+ Don't pass discards down to the underlying
+ data device, but just remove the mapping.
+
+ read_only:
+ Don't allow any changes to be made to the pool
+ metadata. This mode is only available after the
+ thin-pool has been created and first used in full
+ read/write mode. It cannot be specified on initial
+ thin-pool creation.
+
+ error_if_no_space:
+ Error IOs, instead of queueing, if no space.
+
+ Data block size must be between 64KiB (128 sectors) and 1GiB
+ (2097152 sectors) inclusive.
+
+
+ii) Status
+
+ ::
+
+ <transaction id> <used metadata blocks>/<total metadata blocks>
+ <used data blocks>/<total data blocks> <held metadata root>
+ ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
+ needs_check|- metadata_low_watermark
+
+ transaction id:
+ A 64-bit number used by userspace to help synchronise with metadata
+ from volume managers.
+
+ used data blocks / total data blocks
+ If the number of free blocks drops below the pool's low water mark a
+ dm event will be sent to userspace. This event is edge-triggered and
+ it will occur only once after each resume so volume manager writers
+ should register for the event and then check the target's status.
+
+ held metadata root:
+ The location, in blocks, of the metadata root that has been
+ 'held' for userspace read access. '-' indicates there is no
+ held root.
+
+ discard_passdown|no_discard_passdown
+ Whether or not discards are actually being passed down to the
+ underlying device. When this is enabled when loading the table,
+ it can get disabled if the underlying device doesn't support it.
+
+ ro|rw|out_of_data_space
+ If the pool encounters certain types of device failures it will
+ drop into a read-only metadata mode in which no changes to
+ the pool metadata (like allocating new blocks) are permitted.
+
+ In serious cases where even a read-only mode is deemed unsafe
+ no further I/O will be permitted and the status will just
+ contain the string 'Fail'. The userspace recovery tools
+ should then be used.
+
+ error_if_no_space|queue_if_no_space
+ If the pool runs out of data or metadata space, the pool will
+ either queue or error the IO destined to the data device. The
+ default is to queue the IO until more space is added or the
+ 'no_space_timeout' expires. The 'no_space_timeout' dm-thin-pool
+ module parameter can be used to change this timeout -- it
+ defaults to 60 seconds but may be disabled using a value of 0.
+
+ needs_check
+ A metadata operation has failed, resulting in the needs_check
+ flag being set in the metadata's superblock. The metadata
+ device must be deactivated and checked/repaired before the
+ thin-pool can be made fully operational again. '-' indicates
+ needs_check is not set.
+
+ metadata_low_watermark:
+ Value of metadata low watermark in blocks. The kernel sets this
+ value internally but userspace needs to know this value to
+ determine if an event was caused by crossing this threshold.
+
+iii) Messages
+
+ create_thin <dev id>
+ Create a new thinly-provisioned device.
+ <dev id> is an arbitrary unique 24-bit identifier chosen by
+ the caller.
+
+ create_snap <dev id> <origin id>
+ Create a new snapshot of another thinly-provisioned device.
+ <dev id> is an arbitrary unique 24-bit identifier chosen by
+ the caller.
+ <origin id> is the identifier of the thinly-provisioned device
+ of which the new device will be a snapshot.
+
+ delete <dev id>
+ Deletes a thin device. Irreversible.
+
+ set_transaction_id <current id> <new id>
+ Userland volume managers, such as LVM, need a way to
+ synchronise their external metadata with the internal metadata of the
+ pool target. The thin-pool target offers to store an
+ arbitrary 64-bit transaction id and return it on the target's
+ status line. To avoid races you must provide what you think
+ the current transaction id is when you change it with this
+ compare-and-swap message.
+
+ reserve_metadata_snap
+ Reserve a copy of the data mapping btree for use by userland.
+ This allows userland to inspect the mappings as they were when
+ this message was executed. Use the pool's status command to
+ get the root block associated with the metadata snapshot.
+
+ release_metadata_snap
+ Release a previously reserved copy of the data mapping btree.
+
+'thin' target
+-------------
+
+i) Constructor
+
+ ::
+
+ thin <pool dev> <dev id> [<external origin dev>]
+
+ pool dev:
+ the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
+
+ dev id:
+ the internal device identifier of the device to be
+ activated.
+
+ external origin dev:
+ an optional block device outside the pool to be treated as a
+ read-only snapshot origin: reads to unprovisioned areas of the
+ thin target will be mapped to this device.
+
+The pool doesn't store any size against the thin devices. If you
+load a thin target that is smaller than you've been using previously,
+then you'll have no access to blocks mapped beyond the end. If you
+load a target that is bigger than before, then extra blocks will be
+provisioned as and when needed.
+
+ii) Status
+
+ <nr mapped sectors> <highest mapped sector>
+ If the pool has encountered device errors and failed, the status
+ will just contain the string 'Fail'. The userspace recovery
+ tools should then be used.
+
+ In the case where <nr mapped sectors> is 0, there is no highest
+ mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/admin-guide/device-mapper/unstriped.rst b/Documentation/admin-guide/device-mapper/unstriped.rst
new file mode 100644
index 000000000000..5772ccdd1f5f
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/unstriped.rst
@@ -0,0 +1,135 @@
+================================
+Device-mapper "unstriped" target
+================================
+
+Introduction
+============
+
+The device-mapper "unstriped" target provides a transparent mechanism to
+unstripe a device-mapper "striped" target to access the underlying disks
+without having to touch the true backing block-device. It can also be
+used to unstripe a hardware RAID-0 to access backing disks.
+
+Parameters:
+<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+
+<number of stripes>
+ The number of stripes in the RAID 0.
+
+<chunk size>
+ The amount of 512B sectors in the chunk striping.
+
+<dev_path>
+ The block device you wish to unstripe.
+
+<stripe #>
+ The stripe number within the device that corresponds to physical
+ drive you wish to unstripe. This must be 0 indexed.
+
+
+Why use this module?
+====================
+
+An example of undoing an existing dm-stripe
+-------------------------------------------
+
+This small bash script will setup 4 loop devices and use the existing
+striped target to combine the 4 devices into one. It then will use
+the unstriped target on top of the striped device to access the
+individual backing loop devices. We write data to the newly exposed
+unstriped devices and verify the data written matches the correct
+underlying device on the striped array::
+
+ #!/bin/bash
+
+ MEMBER_SIZE=$((128 * 1024 * 1024))
+ NUM=4
+ SEQ_END=$((${NUM}-1))
+ CHUNK=256
+ BS=4096
+
+ RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
+ DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
+ COUNT=$((${MEMBER_SIZE} / ${BS}))
+
+ for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
+ losetup /dev/loop${i} member-${i}
+ DM_PARMS+=" /dev/loop${i} 0"
+ done
+
+ echo $DM_PARMS | dmsetup create raid0
+ for i in $(seq 0 ${SEQ_END}); do
+ echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
+ done;
+
+ for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
+ diff /dev/mapper/set-${i} member-${i}
+ done;
+
+ for i in $(seq 0 ${SEQ_END}); do
+ dmsetup remove set-${i}
+ done
+
+ dmsetup remove raid0
+
+ for i in $(seq 0 ${SEQ_END}); do
+ losetup -d /dev/loop${i}
+ rm -f member-${i}
+ done
+
+Another example
+---------------
+
+Intel NVMe drives contain two cores on the physical device.
+Each core of the drive has segregated access to its LBA range.
+The current LBA model has a RAID 0 128k chunk on each core, resulting
+in a 256k stripe across the two cores::
+
+ Core 0: Core 1:
+ __________ __________
+ | LBA 512| | LBA 768|
+ | LBA 0 | | LBA 256|
+ ---------- ----------
+
+The purpose of this unstriping is to provide better QoS in noisy
+neighbor environments. When two partitions are created on the
+aggregate drive without this unstriping, reads on one partition
+can affect writes on another partition. This is because the partitions
+are striped across the two cores. When we unstripe this hardware RAID 0
+and make partitions on each new exposed device the two partitions are now
+physically separated.
+
+With the dm-unstriped target we're able to segregate an fio script that
+has read and write jobs that are independent of each other. Compared to
+when we run the test on a combined drive with partitions, we were able
+to get a 92% reduction in read latency using this device mapper target.
+
+
+Example dmsetup usage
+=====================
+
+unstriped on top of Intel NVMe device that has 2 cores
+------------------------------------------------------
+
+::
+
+ dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
+ dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
+
+There will now be two devices that expose Intel NVMe core 0 and 1
+respectively::
+
+ /dev/mapper/nvmset0
+ /dev/mapper/nvmset1
+
+unstriped on top of striped with 4 drives using 128K chunk size
+---------------------------------------------------------------
+
+::
+
+ dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
+ dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
+ dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
+ dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/admin-guide/device-mapper/vdo-design.rst b/Documentation/admin-guide/device-mapper/vdo-design.rst
new file mode 100644
index 000000000000..faa0ecd4a5ae
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo-design.rst
@@ -0,0 +1,633 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================
+Design of dm-vdo
+================
+
+The dm-vdo (virtual data optimizer) target provides inline deduplication,
+compression, zero-block elimination, and thin provisioning. A dm-vdo target
+can be backed by up to 256TB of storage, and can present a logical size of
+up to 4PB. This target was originally developed at Permabit Technology
+Corp. starting in 2009. It was first released in 2013 and has been used in
+production environments ever since. It was made open-source in 2017 after
+Permabit was acquired by Red Hat. This document describes the design of
+dm-vdo. For usage, see vdo.rst in the same directory as this file.
+
+Because deduplication rates fall drastically as the block size increases, a
+vdo target has a maximum block size of 4K. However, it can achieve
+deduplication rates of 254:1, i.e. up to 254 copies of a given 4K block can
+reference a single 4K of actual storage. It can achieve compression rates
+of 14:1. All zero blocks consume no storage at all.
+
+Theory of Operation
+===================
+
+The design of dm-vdo is based on the idea that deduplication is a two-part
+problem. The first is to recognize duplicate data. The second is to avoid
+storing multiple copies of those duplicates. Therefore, dm-vdo has two main
+parts: a deduplication index (called UDS) that is used to discover
+duplicate data, and a data store with a reference counted block map that
+maps from logical block addresses to the actual storage location of the
+data.
+
+Zones and Threading
+-------------------
+
+Due to the complexity of data optimization, the number of metadata
+structures involved in a single write operation to a vdo target is larger
+than most other targets. Furthermore, because vdo must operate on small
+block sizes in order to achieve good deduplication rates, acceptable
+performance can only be achieved through parallelism. Therefore, vdo's
+design attempts to be lock-free.
+
+Most of a vdo's main data structures are designed to be easily divided into
+"zones" such that any given bio must only access a single zone of any zoned
+structure. Safety with minimal locking is achieved by ensuring that during
+normal operation, each zone is assigned to a specific thread, and only that
+thread will access the portion of the data structure in that zone.
+Associated with each thread is a work queue. Each bio is associated with a
+request object (the "data_vio") which will be added to a work queue when
+the next phase of its operation requires access to the structures in the
+zone associated with that queue.
+
+Another way of thinking about this arrangement is that the work queue for
+each zone has an implicit lock on the structures it manages for all its
+operations, because vdo guarantees that no other thread will alter those
+structures.
+
+Although each structure is divided into zones, this division is not
+reflected in the on-disk representation of each data structure. Therefore,
+the number of zones for each structure, and hence the number of threads,
+can be reconfigured each time a vdo target is started.
+
+The Deduplication Index
+-----------------------
+
+In order to identify duplicate data efficiently, vdo was designed to
+leverage some common characteristics of duplicate data. From empirical
+observations, we gathered two key insights. The first is that in most data
+sets with significant amounts of duplicate data, the duplicates tend to
+have temporal locality. When a duplicate appears, it is more likely that
+other duplicates will be detected, and that those duplicates will have been
+written at about the same time. This is why the index keeps records in
+temporal order. The second insight is that new data is more likely to
+duplicate recent data than it is to duplicate older data and in general,
+there are diminishing returns to looking further back in time. Therefore,
+when the index is full, it should cull its oldest records to make space for
+new ones. Another important idea behind the design of the index is that the
+ultimate goal of deduplication is to reduce storage costs. Since there is a
+trade-off between the storage saved and the resources expended to achieve
+those savings, vdo does not attempt to find every last duplicate block. It
+is sufficient to find and eliminate most of the redundancy.
+
+Each block of data is hashed to produce a 16-byte block name. An index
+record consists of this block name paired with the presumed location of
+that data on the underlying storage. However, it is not possible to
+guarantee that the index is accurate. In the most common case, this occurs
+because it is too costly to update the index when a block is over-written
+or discarded. Doing so would require either storing the block name along
+with the blocks, which is difficult to do efficiently in block-based
+storage, or reading and rehashing each block before overwriting it.
+Inaccuracy can also result from a hash collision where two different blocks
+have the same name. In practice, this is extremely unlikely, but because
+vdo does not use a cryptographic hash, a malicious workload could be
+constructed. Because of these inaccuracies, vdo treats the locations in the
+index as hints, and reads each indicated block to verify that it is indeed
+a duplicate before sharing the existing block with a new one.
+
+Records are collected into groups called chapters. New records are added to
+the newest chapter, called the open chapter. This chapter is stored in a
+format optimized for adding and modifying records, and the content of the
+open chapter is not finalized until it runs out of space for new records.
+When the open chapter fills up, it is closed and a new open chapter is
+created to collect new records.
+
+Closing a chapter converts it to a different format which is optimized for
+reading. The records are written to a series of record pages based on the
+order in which they were received. This means that records with temporal
+locality should be on a small number of pages, reducing the I/O required to
+retrieve them. The chapter also compiles an index that indicates which
+record page contains any given name. This index means that a request for a
+name can determine exactly which record page may contain that record,
+without having to load the entire chapter from storage. This index uses
+only a subset of the block name as its key, so it cannot guarantee that an
+index entry refers to the desired block name. It can only guarantee that if
+there is a record for this name, it will be on the indicated page. Closed
+chapters are read-only structures and their contents are never altered in
+any way.
+
+Once enough records have been written to fill up all the available index
+space, the oldest chapter is removed to make space for new chapters. Any
+time a request finds a matching record in the index, that record is copied
+into the open chapter. This ensures that useful block names remain available
+in the index, while unreferenced block names are forgotten over time.
+
+In order to find records in older chapters, the index also maintains a
+higher level structure called the volume index, which contains entries
+mapping each block name to the chapter containing its newest record. This
+mapping is updated as records for the block name are copied or updated,
+ensuring that only the newest record for a given block name can be found.
+An older record for a block name will no longer be found even though it has
+not been deleted from its chapter. Like the chapter index, the volume index
+uses only a subset of the block name as its key and can not definitively
+say that a record exists for a name. It can only say which chapter would
+contain the record if a record exists. The volume index is stored entirely
+in memory and is saved to storage only when the vdo target is shut down.
+
+From the viewpoint of a request for a particular block name, it will first
+look up the name in the volume index. This search will either indicate that
+the name is new, or which chapter to search. If it returns a chapter, the
+request looks up its name in the chapter index. This will indicate either
+that the name is new, or which record page to search. Finally, if it is not
+new, the request will look for its name in the indicated record page.
+This process may require up to two page reads per request (one for the
+chapter index page and one for the request page). However, recently
+accessed pages are cached so that these page reads can be amortized across
+many block name requests.
+
+The volume index and the chapter indexes are implemented using a
+memory-efficient structure called a delta index. Instead of storing the
+entire block name (the key) for each entry, the entries are sorted by name
+and only the difference between adjacent keys (the delta) is stored.
+Because we expect the hashes to be randomly distributed, the size of the
+deltas follows an exponential distribution. Because of this distribution,
+the deltas are expressed using a Huffman code to take up even less space.
+The entire sorted list of keys is called a delta list. This structure
+allows the index to use many fewer bytes per entry than a traditional hash
+table, but it is slightly more expensive to look up entries, because a
+request must read every entry in a delta list to add up the deltas in order
+to find the record it needs. The delta index reduces this lookup cost by
+splitting its key space into many sub-lists, each starting at a fixed key
+value, so that each individual list is short.
+
+The default index size can hold 64 million records, corresponding to about
+256GB of data. This means that the index can identify duplicate data if the
+original data was written within the last 256GB of writes. This range is
+called the deduplication window. If new writes duplicate data that is older
+than that, the index will not be able to find it because the records of the
+older data have been removed. This means that if an application writes a
+200 GB file to a vdo target and then immediately writes it again, the two
+copies will deduplicate perfectly. Doing the same with a 500 GB file will
+result in no deduplication, because the beginning of the file will no
+longer be in the index by the time the second write begins (assuming there
+is no duplication within the file itself).
+
+If an application anticipates a data workload that will see useful
+deduplication beyond the 256GB threshold, vdo can be configured to use a
+larger index with a correspondingly larger deduplication window. (This
+configuration can only be set when the target is created, not altered
+later. It is important to consider the expected workload for a vdo target
+before configuring it.) There are two ways to do this.
+
+One way is to increase the memory size of the index, which also increases
+the amount of backing storage required. Doubling the size of the index will
+double the length of the deduplication window at the expense of doubling
+the storage size and the memory requirements.
+
+The other option is to enable sparse indexing. Sparse indexing increases
+the deduplication window by a factor of 10, at the expense of also
+increasing the storage size by a factor of 10. However with sparse
+indexing, the memory requirements do not increase. The trade-off is
+slightly more computation per request and a slight decrease in the amount
+of deduplication detected. For most workloads with significant amounts of
+duplicate data, sparse indexing will detect 97-99% of the deduplication
+that a standard index will detect.
+
+The vio and data_vio Structures
+-------------------------------
+
+A vio (short for Vdo I/O) is conceptually similar to a bio, with additional
+fields and data to track vdo-specific information. A struct vio maintains a
+pointer to a bio but also tracks other fields specific to the operation of
+vdo. The vio is kept separate from its related bio because there are many
+circumstances where vdo completes the bio but must continue to do work
+related to deduplication or compression.
+
+Metadata reads and writes, and other writes that originate within vdo, use
+a struct vio directly. Application reads and writes use a larger structure
+called a data_vio to track information about their progress. A struct
+data_vio contain a struct vio and also includes several other fields
+related to deduplication and other vdo features. The data_vio is the
+primary unit of application work in vdo. Each data_vio proceeds through a
+set of steps to handle the application data, after which it is reset and
+returned to a pool of data_vios for reuse.
+
+There is a fixed pool of 2048 data_vios. This number was chosen to bound
+the amount of work that is required to recover from a crash. In addition,
+benchmarks have indicated that increasing the size of the pool does not
+significantly improve performance.
+
+The Data Store
+--------------
+
+The data store is implemented by three main data structures, all of which
+work in concert to reduce or amortize metadata updates across as many data
+writes as possible.
+
+*The Slab Depot*
+
+Most of the vdo volume belongs to the slab depot. The depot contains a
+collection of slabs. The slabs can be up to 32GB, and are divided into
+three sections. Most of a slab consists of a linear sequence of 4K blocks.
+These blocks are used either to store data, or to hold portions of the
+block map (see below). In addition to the data blocks, each slab has a set
+of reference counters, using 1 byte for each data block. Finally each slab
+has a journal.
+
+Reference updates are written to the slab journal. Slab journal blocks are
+written out either when they are full, or when the recovery journal
+requests they do so in order to allow the main recovery journal (see below)
+to free up space. The slab journal is used both to ensure that the main
+recovery journal can regularly free up space, and also to amortize the cost
+of updating individual reference blocks. The reference counters are kept in
+memory and are written out, a block at a time in oldest-dirtied-order, only
+when there is a need to reclaim slab journal space. The write operations
+are performed in the background as needed so they do not add latency to
+particular I/O operations.
+
+Each slab is independent of every other. They are assigned to "physical
+zones" in round-robin fashion. If there are P physical zones, then slab n
+is assigned to zone n mod P.
+
+The slab depot maintains an additional small data structure, the "slab
+summary," which is used to reduce the amount of work needed to come back
+online after a crash. The slab summary maintains an entry for each slab
+indicating whether or not the slab has ever been used, whether all of its
+reference count updates have been persisted to storage, and approximately
+how full it is. During recovery, each physical zone will attempt to recover
+at least one slab, stopping whenever it has recovered a slab which has some
+free blocks. Once each zone has some space, or has determined that none is
+available, the target can resume normal operation in a degraded mode. Read
+and write requests can be serviced, perhaps with degraded performance,
+while the remainder of the dirty slabs are recovered.
+
+*The Block Map*
+
+The block map contains the logical to physical mapping. It can be thought
+of as an array with one entry per logical address. Each entry is 5 bytes,
+36 bits of which contain the physical block number which holds the data for
+the given logical address. The other 4 bits are used to indicate the nature
+of the mapping. Of the 16 possible states, one represents a logical address
+which is unmapped (i.e. it has never been written, or has been discarded),
+one represents an uncompressed block, and the other 14 states are used to
+indicate that the mapped data is compressed, and which of the compression
+slots in the compressed block contains the data for this logical address.
+
+In practice, the array of mapping entries is divided into "block map
+pages," each of which fits in a single 4K block. Each block map page
+consists of a header and 812 mapping entries. Each mapping page is actually
+a leaf of a radix tree which consists of block map pages at each level.
+There are 60 radix trees which are assigned to "logical zones" in round
+robin fashion. (If there are L logical zones, tree n will belong to zone n
+mod L.) At each level, the trees are interleaved, so logical addresses
+0-811 belong to tree 0, logical addresses 812-1623 belong to tree 1, and so
+on. The interleaving is maintained all the way up to the 60 root nodes.
+Choosing 60 trees results in an evenly distributed number of trees per zone
+for a large number of possible logical zone counts. The storage for the 60
+tree roots is allocated at format time. All other block map pages are
+allocated out of the slabs as needed. This flexible allocation avoids the
+need to pre-allocate space for the entire set of logical mappings and also
+makes growing the logical size of a vdo relatively easy.
+
+In operation, the block map maintains two caches. It is prohibitive to keep
+the entire leaf level of the trees in memory, so each logical zone
+maintains its own cache of leaf pages. The size of this cache is
+configurable at target start time. The second cache is allocated at start
+time, and is large enough to hold all the non-leaf pages of the entire
+block map. This cache is populated as pages are needed.
+
+*The Recovery Journal*
+
+The recovery journal is used to amortize updates across the block map and
+slab depot. Each write request causes an entry to be made in the journal.
+Entries are either "data remappings" or "block map remappings." For a data
+remapping, the journal records the logical address affected and its old and
+new physical mappings. For a block map remapping, the journal records the
+block map page number and the physical block allocated for it. Block map
+pages are never reclaimed or repurposed, so the old mapping is always 0.
+
+Each journal entry is an intent record summarizing the metadata updates
+that are required for a data_vio. The recovery journal issues a flush
+before each journal block write to ensure that the physical data for the
+new block mappings in that block are stable on storage, and journal block
+writes are all issued with the FUA bit set to ensure the recovery journal
+entries themselves are stable. The journal entry and the data write it
+represents must be stable on disk before the other metadata structures may
+be updated to reflect the operation. These entries allow the vdo device to
+reconstruct the logical to physical mappings after an unexpected
+interruption such as a loss of power.
+
+*Write Path*
+
+All write I/O to vdo is asynchronous. Each bio will be acknowledged as soon
+as vdo has done enough work to guarantee that it can complete the write
+eventually. Generally, the data for acknowledged but unflushed write I/O
+can be treated as though it is cached in memory. If an application
+requires data to be stable on storage, it must issue a flush or write the
+data with the FUA bit set like any other asynchronous I/O. Shutting down
+the vdo target will also flush any remaining I/O.
+
+Application write bios follow the steps outlined below.
+
+1. A data_vio is obtained from the data_vio pool and associated with the
+ application bio. If there are no data_vios available, the incoming bio
+ will block until a data_vio is available. This provides back pressure
+ to the application. The data_vio pool is protected by a spin lock.
+
+ The newly acquired data_vio is reset and the bio's data is copied into
+ the data_vio if it is a write and the data is not all zeroes. The data
+ must be copied because the application bio can be acknowledged before
+ the data_vio processing is complete, which means later processing steps
+ will no longer have access to the application bio. The application bio
+ may also be smaller than 4K, in which case the data_vio will have
+ already read the underlying block and the data is instead copied over
+ the relevant portion of the larger block.
+
+2. The data_vio places a claim (the "logical lock") on the logical address
+ of the bio. It is vital to prevent simultaneous modifications of the
+ same logical address, because deduplication involves sharing blocks.
+ This claim is implemented as an entry in a hashtable where the key is
+ the logical address and the value is a pointer to the data_vio
+ currently handling that address.
+
+ If a data_vio looks in the hashtable and finds that another data_vio is
+ already operating on that logical address, it waits until the previous
+ operation finishes. It also sends a message to inform the current
+ lock holder that it is waiting. Most notably, a new data_vio waiting
+ for a logical lock will flush the previous lock holder out of the
+ compression packer (step 8d) rather than allowing it to continue
+ waiting to be packed.
+
+ This stage requires the data_vio to get an implicit lock on the
+ appropriate logical zone to prevent concurrent modifications of the
+ hashtable. This implicit locking is handled by the zone divisions
+ described above.
+
+3. The data_vio traverses the block map tree to ensure that all the
+ necessary internal tree nodes have been allocated, by trying to find
+ the leaf page for its logical address. If any interior tree page is
+ missing, it is allocated at this time out of the same physical storage
+ pool used to store application data.
+
+ a. If any page-node in the tree has not yet been allocated, it must be
+ allocated before the write can continue. This step requires the
+ data_vio to lock the page-node that needs to be allocated. This
+ lock, like the logical block lock in step 2, is a hashtable entry
+ that causes other data_vios to wait for the allocation process to
+ complete.
+
+ The implicit logical zone lock is released while the allocation is
+ happening, in order to allow other operations in the same logical
+ zone to proceed. The details of allocation are the same as in
+ step 4. Once a new node has been allocated, that node is added to
+ the tree using a similar process to adding a new data block mapping.
+ The data_vio journals the intent to add the new node to the block
+ map tree (step 10), updates the reference count of the new block
+ (step 11), and reacquires the implicit logical zone lock to add the
+ new mapping to the parent tree node (step 12). Once the tree is
+ updated, the data_vio proceeds down the tree. Any other data_vios
+ waiting on this allocation also proceed.
+
+ b. In the steady-state case, the block map tree nodes will already be
+ allocated, so the data_vio just traverses the tree until it finds
+ the required leaf node. The location of the mapping (the "block map
+ slot") is recorded in the data_vio so that later steps do not need
+ to traverse the tree again. The data_vio then releases the implicit
+ logical zone lock.
+
+4. If the block is a zero block, skip to step 9. Otherwise, an attempt is
+ made to allocate a free data block. This allocation ensures that the
+ data_vio can write its data somewhere even if deduplication and
+ compression are not possible. This stage gets an implicit lock on a
+ physical zone to search for free space within that zone.
+
+ The data_vio will search each slab in a zone until it finds a free
+ block or decides there are none. If the first zone has no free space,
+ it will proceed to search the next physical zone by taking the implicit
+ lock for that zone and releasing the previous one until it finds a
+ free block or runs out of zones to search. The data_vio will acquire a
+ struct pbn_lock (the "physical block lock") on the free block. The
+ struct pbn_lock also has several fields to record the various kinds of
+ claims that data_vios can have on physical blocks. The pbn_lock is
+ added to a hashtable like the logical block locks in step 2. This
+ hashtable is also covered by the implicit physical zone lock. The
+ reference count of the free block is updated to prevent any other
+ data_vio from considering it free. The reference counters are a
+ sub-component of the slab and are thus also covered by the implicit
+ physical zone lock.
+
+5. If an allocation was obtained, the data_vio has all the resources it
+ needs to complete the write. The application bio can safely be
+ acknowledged at this point. The acknowledgment happens on a separate
+ thread to prevent the application callback from blocking other data_vio
+ operations.
+
+ If an allocation could not be obtained, the data_vio continues to
+ attempt to deduplicate or compress the data, but the bio is not
+ acknowledged because the vdo device may be out of space.
+
+6. At this point vdo must determine where to store the application data.
+ The data_vio's data is hashed and the hash (the "record name") is
+ recorded in the data_vio.
+
+7. The data_vio reserves or joins a struct hash_lock, which manages all of
+ the data_vios currently writing the same data. Active hash locks are
+ tracked in a hashtable similar to the way logical block locks are
+ tracked in step 2. This hashtable is covered by the implicit lock on
+ the hash zone.
+
+ If there is no existing hash lock for this data_vio's record_name, the
+ data_vio obtains a hash lock from the pool, adds it to the hashtable,
+ and sets itself as the new hash lock's "agent." The hash_lock pool is
+ also covered by the implicit hash zone lock. The hash lock agent will
+ do all the work to decide where the application data will be
+ written. If a hash lock for the data_vio's record_name already exists,
+ and the data_vio's data is the same as the agent's data, the new
+ data_vio will wait for the agent to complete its work and then share
+ its result.
+
+ In the rare case that a hash lock exists for the data_vio's hash but
+ the data does not match the hash lock's agent, the data_vio skips to
+ step 8h and attempts to write its data directly. This can happen if two
+ different data blocks produce the same hash, for example.
+
+8. The hash lock agent attempts to deduplicate or compress its data with
+ the following steps.
+
+ a. The agent initializes and sends its embedded deduplication request
+ (struct uds_request) to the deduplication index. This does not
+ require the data_vio to get any locks because the index components
+ manage their own locking. The data_vio waits until it either gets a
+ response from the index or times out.
+
+ b. If the deduplication index returns advice, the data_vio attempts to
+ obtain a physical block lock on the indicated physical address, in
+ order to read the data and verify that it is the same as the
+ data_vio's data, and that it can accept more references. If the
+ physical address is already locked by another data_vio, the data at
+ that address may soon be overwritten so it is not safe to use the
+ address for deduplication.
+
+ c. If the data matches and the physical block can add references, the
+ agent and any other data_vios waiting on it will record this
+ physical block as their new physical address and proceed to step 9
+ to record their new mapping. If there are more data_vios in the hash
+ lock than there are references available, one of the remaining
+ data_vios becomes the new agent and continues to step 8d as if no
+ valid advice was returned.
+
+ d. If no usable duplicate block was found, the agent first checks that
+ it has an allocated physical block (from step 3) that it can write
+ to. If the agent does not have an allocation, some other data_vio in
+ the hash lock that does have an allocation takes over as agent. If
+ none of the data_vios have an allocated physical block, these writes
+ are out of space, so they proceed to step 13 for cleanup.
+
+ e. The agent attempts to compress its data. If the data does not
+ compress, the data_vio will continue to step 8h to write its data
+ directly.
+
+ If the compressed size is small enough, the agent will release the
+ implicit hash zone lock and go to the packer (struct packer) where
+ it will be placed in a bin (struct packer_bin) along with other
+ data_vios. All compression operations require the implicit lock on
+ the packer zone.
+
+ The packer can combine up to 14 compressed blocks in a single 4k
+ data block. Compression is only helpful if vdo can pack at least 2
+ data_vios into a single data block. This means that a data_vio may
+ wait in the packer for an arbitrarily long time for other data_vios
+ to fill out the compressed block. There is a mechanism for vdo to
+ evict waiting data_vios when continuing to wait would cause
+ problems. Circumstances causing an eviction include an application
+ flush, device shutdown, or a subsequent data_vio trying to overwrite
+ the same logical block address. A data_vio may also be evicted from
+ the packer if it cannot be paired with any other compressed block
+ before more compressible blocks need to use its bin. An evicted
+ data_vio will proceed to step 8h to write its data directly.
+
+ f. If the agent fills a packer bin, either because all 14 of its slots
+ are used or because it has no remaining space, it is written out
+ using the allocated physical block from one of its data_vios. Step
+ 8d has already ensured that an allocation is available.
+
+ g. Each data_vio sets the compressed block as its new physical address.
+ The data_vio obtains an implicit lock on the physical zone and
+ acquires the struct pbn_lock for the compressed block, which is
+ modified to be a shared lock. Then it releases the implicit physical
+ zone lock and proceeds to step 8i.
+
+ h. Any data_vio evicted from the packer will have an allocation from
+ step 3. It will write its data to that allocated physical block.
+
+ i. After the data is written, if the data_vio is the agent of a hash
+ lock, it will reacquire the implicit hash zone lock and share its
+ physical address with as many other data_vios in the hash lock as
+ possible. Each data_vio will then proceed to step 9 to record its
+ new mapping.
+
+ j. If the agent actually wrote new data (whether compressed or not),
+ the deduplication index is updated to reflect the location of the
+ new data. The agent then releases the implicit hash zone lock.
+
+9. The data_vio determines the previous mapping of the logical address.
+ There is a cache for block map leaf pages (the "block map cache"),
+ because there are usually too many block map leaf nodes to store
+ entirely in memory. If the desired leaf page is not in the cache, the
+ data_vio will reserve a slot in the cache and load the desired page
+ into it, possibly evicting an older cached page. The data_vio then
+ finds the current physical address for this logical address (the "old
+ physical mapping"), if any, and records it. This step requires a lock
+ on the block map cache structures, covered by the implicit logical zone
+ lock.
+
+10. The data_vio makes an entry in the recovery journal containing the
+ logical block address, the old physical mapping, and the new physical
+ mapping. Making this journal entry requires holding the implicit
+ recovery journal lock. The data_vio will wait in the journal until all
+ recovery blocks up to the one containing its entry have been written
+ and flushed to ensure the transaction is stable on storage.
+
+11. Once the recovery journal entry is stable, the data_vio makes two slab
+ journal entries: an increment entry for the new mapping, and a
+ decrement entry for the old mapping. These two operations each require
+ holding a lock on the affected physical slab, covered by its implicit
+ physical zone lock. For correctness during recovery, the slab journal
+ entries in any given slab journal must be in the same order as the
+ corresponding recovery journal entries. Therefore, if the two entries
+ are in different zones, they are made concurrently, and if they are in
+ the same zone, the increment is always made before the decrement in
+ order to avoid underflow. After each slab journal entry is made in
+ memory, the associated reference count is also updated in memory.
+
+12. Once both of the reference count updates are done, the data_vio
+ acquires the implicit logical zone lock and updates the
+ logical-to-physical mapping in the block map to point to the new
+ physical block. At this point the write operation is complete.
+
+13. If the data_vio has a hash lock, it acquires the implicit hash zone
+ lock and releases its hash lock to the pool.
+
+ The data_vio then acquires the implicit physical zone lock and releases
+ the struct pbn_lock it holds for its allocated block. If it had an
+ allocation that it did not use, it also sets the reference count for
+ that block back to zero to free it for use by subsequent data_vios.
+
+ The data_vio then acquires the implicit logical zone lock and releases
+ the logical block lock acquired in step 2.
+
+ The application bio is then acknowledged if it has not previously been
+ acknowledged, and the data_vio is returned to the pool.
+
+*Read Path*
+
+An application read bio follows a much simpler set of steps. It does steps
+1 and 2 in the write path to obtain a data_vio and lock its logical
+address. If there is already a write data_vio in progress for that logical
+address that is guaranteed to complete, the read data_vio will copy the
+data from the write data_vio and return it. Otherwise, it will look up the
+logical-to-physical mapping by traversing the block map tree as in step 3,
+and then read and possibly decompress the indicated data at the indicated
+physical block address. A read data_vio will not allocate block map tree
+nodes if they are missing. If the interior block map nodes do not exist
+yet, the logical block map address must still be unmapped and the read
+data_vio will return all zeroes. A read data_vio handles cleanup and
+acknowledgment as in step 13, although it only needs to release the logical
+lock and return itself to the pool.
+
+*Small Writes*
+
+All storage within vdo is managed as 4KB blocks, but it can accept writes
+as small as 512 bytes. Processing a write that is smaller than 4K requires
+a read-modify-write operation that reads the relevant 4K block, copies the
+new data over the appropriate sectors of the block, and then launches a
+write operation for the modified data block. The read and write stages of
+this operation are nearly identical to the normal read and write
+operations, and a single data_vio is used throughout this operation.
+
+*Recovery*
+
+When a vdo is restarted after a crash, it will attempt to recover from the
+recovery journal. During the pre-resume phase of the next start, the
+recovery journal is read. The increment portion of valid entries are played
+into the block map. Next, valid entries are played, in order as required,
+into the slab journals. Finally, each physical zone attempts to replay at
+least one slab journal to reconstruct the reference counts of one slab.
+Once each zone has some free space (or has determined that it has none),
+the vdo comes back online, while the remainder of the slab journals are
+used to reconstruct the rest of the reference counts in the background.
+
+*Read-only Rebuild*
+
+If a vdo encounters an unrecoverable error, it will enter read-only mode.
+This mode indicates that some previously acknowledged data may have been
+lost. The vdo may be instructed to rebuild as best it can in order to
+return to a writable state. However, this is never done automatically due
+to the possibility that data has been lost. During a read-only rebuild, the
+block map is recovered from the recovery journal as before. However, the
+reference counts are not rebuilt from the slab journals. Instead, the
+reference counts are zeroed, the entire block map is traversed, and the
+reference counts are updated from the block mappings. While this may lose
+some data, it ensures that the block map and reference counts are
+consistent with each other. This allows vdo to resume normal operation and
+accept further writes.
diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst
new file mode 100644
index 000000000000..8a67b320a97b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo.rst
@@ -0,0 +1,413 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+======
+dm-vdo
+======
+
+The dm-vdo (virtual data optimizer) device mapper target provides
+block-level deduplication, compression, and thin provisioning. As a device
+mapper target, it can add these features to the storage stack, compatible
+with any file system. The vdo target does not protect against data
+corruption, relying instead on integrity protection of the storage below
+it. It is strongly recommended that lvm be used to manage vdo volumes. See
+lvmvdo(7).
+
+Userspace component
+===================
+
+Formatting a vdo volume requires the use of the 'vdoformat' tool, available
+at:
+
+https://github.com/dm-vdo/vdo/
+
+In most cases, a vdo target will recover from a crash automatically the
+next time it is started. In cases where it encountered an unrecoverable
+error (either during normal operation or crash recovery) the target will
+enter or come up in read-only mode. Because read-only mode is indicative of
+data-loss, a positive action must be taken to bring vdo out of read-only
+mode. The 'vdoforcerebuild' tool, available from the same repo, is used to
+prepare a read-only vdo to exit read-only mode. After running this tool,
+the vdo target will rebuild its metadata the next time it is
+started. Although some data may be lost, the rebuilt vdo's metadata will be
+internally consistent and the target will be writable again.
+
+The repo also contains additional userspace tools which can be used to
+inspect a vdo target's on-disk metadata. Fortunately, these tools are
+rarely needed except by dm-vdo developers.
+
+Metadata requirements
+=====================
+
+Each vdo volume reserves 3GB of space for metadata, or more depending on
+its configuration. It is helpful to check that the space saved by
+deduplication and compression is not cancelled out by the metadata
+requirements. An estimation of the space saved for a specific dataset can
+be computed with the vdo estimator tool, which is available at:
+
+https://github.com/dm-vdo/vdoestimator/
+
+Target interface
+================
+
+Table line
+----------
+
+::
+
+ <offset> <logical device size> vdo V4 <storage device>
+ <storage device size> <minimum I/O size> <block map cache size>
+ <block map era length> [optional arguments]
+
+
+Required parameters:
+
+ offset:
+ The offset, in sectors, at which the vdo volume's logical
+ space begins.
+
+ logical device size:
+ The size of the device which the vdo volume will service,
+ in sectors. Must match the current logical size of the vdo
+ volume.
+
+ storage device:
+ The device holding the vdo volume's data and metadata.
+
+ storage device size:
+ The size of the device holding the vdo volume, as a number
+ of 4096-byte blocks. Must match the current size of the vdo
+ volume.
+
+ minimum I/O size:
+ The minimum I/O size for this vdo volume to accept, in
+ bytes. Valid values are 512 or 4096. The recommended value
+ is 4096.
+
+ block map cache size:
+ The size of the block map cache, as a number of 4096-byte
+ blocks. The minimum and recommended value is 32768 blocks.
+ If the logical thread count is non-zero, the cache size
+ must be at least 4096 blocks per logical thread.
+
+ block map era length:
+ The speed with which the block map cache writes out
+ modified block map pages. A smaller era length is likely to
+ reduce the amount of time spent rebuilding, at the cost of
+ increased block map writes during normal operation. The
+ maximum and recommended value is 16380; the minimum value
+ is 1.
+
+Optional parameters:
+--------------------
+Some or all of these parameters may be specified as <key> <value> pairs.
+
+Thread related parameters:
+
+Different categories of work are assigned to separate thread groups, and
+the number of threads in each group can be configured separately.
+
+If <hash>, <logical>, and <physical> are all set to 0, the work handled by
+all three thread types will be handled by a single thread. If any of these
+values are non-zero, all of them must be non-zero.
+
+ ack:
+ The number of threads used to complete bios. Since
+ completing a bio calls an arbitrary completion function
+ outside the vdo volume, threads of this type allow the vdo
+ volume to continue processing requests even when bio
+ completion is slow. The default is 1.
+
+ bio:
+ The number of threads used to issue bios to the underlying
+ storage. Threads of this type allow the vdo volume to
+ continue processing requests even when bio submission is
+ slow. The default is 4.
+
+ bioRotationInterval:
+ The number of bios to enqueue on each bio thread before
+ switching to the next thread. The value must be greater
+ than 0 and not more than 1024; the default is 64.
+
+ cpu:
+ The number of threads used to do CPU-intensive work, such
+ as hashing and compression. The default is 1.
+
+ hash:
+ The number of threads used to manage data comparisons for
+ deduplication based on the hash value of data blocks. The
+ default is 0.
+
+ logical:
+ The number of threads used to manage caching and locking
+ based on the logical address of incoming bios. The default
+ is 0; the maximum is 60.
+
+ physical:
+ The number of threads used to manage administration of the
+ underlying storage device. At format time, a slab size for
+ the vdo is chosen; the vdo storage device must be large
+ enough to have at least 1 slab per physical thread. The
+ default is 0; the maximum is 16.
+
+Miscellaneous parameters:
+
+ maxDiscard:
+ The maximum size of discard bio accepted, in 4096-byte
+ blocks. I/O requests to a vdo volume are normally split
+ into 4096-byte blocks, and processed up to 2048 at a time.
+ However, discard requests to a vdo volume can be
+ automatically split to a larger size, up to <maxDiscard>
+ 4096-byte blocks in a single bio, and are limited to 1500
+ at a time. Increasing this value may provide better overall
+ performance, at the cost of increased latency for the
+ individual discard requests. The default and minimum is 1;
+ the maximum is UINT_MAX / 4096.
+
+ deduplication:
+ Whether deduplication is enabled. The default is 'on'; the
+ acceptable values are 'on' and 'off'.
+
+ compression:
+ Whether compression is enabled. The default is 'off'; the
+ acceptable values are 'on' and 'off'.
+
+Device modification
+-------------------
+
+A modified table may be loaded into a running, non-suspended vdo volume.
+The modifications will take effect when the device is next resumed. The
+modifiable parameters are <logical device size>, <physical device size>,
+<maxDiscard>, <compression>, and <deduplication>.
+
+If the logical device size or physical device size are changed, upon
+successful resume vdo will store the new values and require them on future
+startups. These two parameters may not be decreased. The logical device
+size may not exceed 4 PB. The physical device size must increase by at
+least 32832 4096-byte blocks if at all, and must not exceed the size of the
+underlying storage device. Additionally, when formatting the vdo device, a
+slab size is chosen: the physical device size may never increase above the
+size which provides 8192 slabs, and each increase must be large enough to
+add at least one new slab.
+
+Examples:
+
+Start a previously-formatted vdo volume with 1 GB logical space and 1 GB
+physical space, storing to /dev/dm-1 which has more than 1 GB of space.
+
+::
+
+ dmsetup create vdo0 --table \
+ "0 2097152 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+
+Grow the logical size to 4 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size to 2 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 524288 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size by 1 GB more and increase max discard sectors.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 4096 32768 16380 maxDiscard 8"
+ dmsetup resume vdo0
+
+Stop the vdo volume.
+
+::
+
+ dmsetup remove vdo0
+
+Start the vdo volume again. Note that the logical and physical device sizes
+must still match, but other parameters can change.
+
+::
+
+ dmsetup create vdo1 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 512 65550 5000 hash 1 logical 3 physical 2"
+
+Messages
+--------
+All vdo devices accept messages in the form:
+
+::
+
+ dmsetup message <target-name> 0 <message-name> <message-parameters>
+
+The messages are:
+
+ stats:
+ Outputs the current view of the vdo statistics. Mostly used
+ by the vdostats userspace program to interpret the output
+ buffer.
+
+ config:
+ Outputs useful vdo configuration information. Mostly used
+ by users who want to recreate a similar VDO volume and
+ want to know the creation configuration used.
+
+ dump:
+ Dumps many internal structures to the system log. This is
+ not always safe to run, so it should only be used to debug
+ a hung vdo. Optional parameters to specify structures to
+ dump are:
+
+ viopool: The pool of I/O requests incoming bios
+ pools: A synonym of 'viopool'
+ vdo: Most of the structures managing on-disk data
+ queues: Basic information about each vdo thread
+ threads: A synonym of 'queues'
+ default: Equivalent to 'queues vdo'
+ all: All of the above.
+
+ dump-on-shutdown:
+ Perform a default dump next time vdo shuts down.
+
+
+Status
+------
+
+::
+
+ <device> <operating mode> <in recovery> <index state>
+ <compression state> <physical blocks used> <total physical blocks>
+
+ device:
+ The name of the vdo volume.
+
+ operating mode:
+ The current operating mode of the vdo volume; values may be
+ 'normal', 'recovering' (the volume has detected an issue
+ with its metadata and is attempting to repair itself), and
+ 'read-only' (an error has occurred that forces the vdo
+ volume to only support read operations and not writes).
+
+ in recovery:
+ Whether the vdo volume is currently in recovery mode;
+ values may be 'recovering' or '-' which indicates not
+ recovering.
+
+ index state:
+ The current state of the deduplication index in the vdo
+ volume; values may be 'closed', 'closing', 'error',
+ 'offline', 'online', 'opening', and 'unknown'.
+
+ compression state:
+ The current state of compression in the vdo volume; values
+ may be 'offline' and 'online'.
+
+ used physical blocks:
+ The number of physical blocks in use by the vdo volume.
+
+ total physical blocks:
+ The total number of physical blocks the vdo volume may use;
+ the difference between this value and the
+ <used physical blocks> is the number of blocks the vdo
+ volume has left before being full.
+
+Memory Requirements
+===================
+
+A vdo target requires a fixed 38 MB of RAM along with the following amounts
+that scale with the target:
+
+- 1.15 MB of RAM for each 1 MB of configured block map cache size. The
+ block map cache requires a minimum of 150 MB.
+- 1.6 MB of RAM for each 1 TB of logical space.
+- 268 MB of RAM for each 1 TB of physical storage managed by the volume.
+
+The deduplication index requires additional memory which scales with the
+size of the deduplication window. For dense indexes, the index requires 1
+GB of RAM per 1 TB of window. For sparse indexes, the index requires 1 GB
+of RAM per 10 TB of window. The index configuration is set when the target
+is formatted and may not be modified.
+
+Module Parameters
+=================
+
+The vdo driver has a numeric parameter 'log_level' which controls the
+verbosity of logging from the driver. The default setting is 6
+(LOGLEVEL_INFO and more severe messages).
+
+Run-time Usage
+==============
+
+When using dm-vdo, it is important to be aware of the ways in which its
+behavior differs from other storage targets.
+
+- There is no guarantee that over-writes of existing blocks will succeed.
+ Because the underlying storage may be multiply referenced, over-writing
+ an existing block generally requires a vdo to have a free block
+ available.
+
+- When blocks are no longer in use, sending a discard request for those
+ blocks lets the vdo release references for those blocks. If the vdo is
+ thinly provisioned, discarding unused blocks is essential to prevent the
+ target from running out of space. However, due to the sharing of
+ duplicate blocks, no discard request for any given logical block is
+ guaranteed to reclaim space.
+
+- Assuming the underlying storage properly implements flush requests, vdo
+ is resilient against crashes, however, unflushed writes may or may not
+ persist after a crash.
+
+- Each write to a vdo target entails a significant amount of processing.
+ However, much of the work is paralellizable. Therefore, vdo targets
+ achieve better throughput at higher I/O depths, and can support up 2048
+ requests in parallel.
+
+Tuning
+======
+
+The vdo device has many options, and it can be difficult to make optimal
+choices without perfect knowledge of the workload. Additionally, most
+configuration options must be set when a vdo target is started, and cannot
+be changed without shutting it down completely; the configuration cannot be
+changed while the target is active. Ideally, tuning with simulated
+workloads should be performed before deploying vdo in production
+environments.
+
+The most important value to adjust is the block map cache size. In order to
+service a request for any logical address, a vdo must load the portion of
+the block map which holds the relevant mapping. These mappings are cached.
+Performance will suffer when the working set does not fit in the cache. By
+default, a vdo allocates 128 MB of metadata cache in RAM to support
+efficient access to 100 GB of logical space at a time. It should be scaled
+up proportionally for larger working sets.
+
+The logical and physical thread counts should also be adjusted. A logical
+thread controls a disjoint section of the block map, so additional logical
+threads increase parallelism and can increase throughput. Physical threads
+control a disjoint section of the data blocks, so additional physical
+threads can also increase throughput. However, excess threads can waste
+resources and increase contention.
+
+Bio submission threads control the parallelism involved in sending I/O to
+the underlying storage; fewer threads mean there is more opportunity to
+reorder I/O requests for performance benefit, but also that each I/O
+request has to wait longer before being submitted.
+
+Bio acknowledgment threads are used for finishing I/O requests. This is
+done on dedicated threads since the amount of work required to execute a
+bio's callback can not be controlled by the vdo itself. Usually one thread
+is sufficient but additional threads may be beneficial, particularly when
+bios have CPU-heavy callbacks.
+
+CPU threads are used for hashing and for compression; in workloads with
+compression enabled, more threads may result in higher throughput.
+
+Hash threads are used to sort active requests by hash and determine whether
+they should deduplicate; the most CPU intensive actions done by these
+threads are comparison of 4096-byte data blocks. In most cases, a single
+hash thread is sufficient.
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
new file mode 100644
index 000000000000..8c3f1f967a3c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -0,0 +1,265 @@
+=========
+dm-verity
+=========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+
+::
+
+ <version> <dev> <hash_dev>
+ <data_block_size> <hash_block_size>
+ <num_data_blocks> <hash_start_block>
+ <algorithm> <digest> <salt>
+ [<#opt_params> <opt_params>]
+
+<version>
+ This is the type of the on-disk hash format.
+
+ 0 is the original format used in the Chromium OS.
+ The salt is appended when hashing, digests are stored continuously and
+ the rest of the block is padded with zeroes.
+
+ 1 is the current format that should be used for new devices.
+ The salt is prepended when hashing and each digest is
+ padded with zeroes to the power of two.
+
+<dev>
+ This is the device containing data, the integrity of which needs to be
+ checked. It may be specified as a path, like /dev/sdaX, or a device number,
+ <major>:<minor>.
+
+<hash_dev>
+ This is the device that supplies the hash tree data. It may be
+ specified similarly to the device path and may be the same device. If the
+ same device is used, the hash_start should be outside the configured
+ dm-verity device.
+
+<data_block_size>
+ The block size on a data device in bytes.
+ Each block corresponds to one digest on the hash device.
+
+<hash_block_size>
+ The size of a hash block in bytes.
+
+<num_data_blocks>
+ The number of data blocks on the data device. Additional blocks are
+ inaccessible. You can place hashes to the same partition as data, in this
+ case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+ This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+ to the root block of the hash tree.
+
+<algorithm>
+ The cryptographic hash algorithm used for this device. This should
+ be the name of the algorithm, like "sha1".
+
+<digest>
+ The hexadecimal encoding of the cryptographic hash of the root hash block
+ and the salt. This hash should be trusted as there is no other authenticity
+ beyond this point.
+
+<salt>
+ The hexadecimal encoding of the salt value.
+
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional parameters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 1 ignore_corruption
+
+ignore_corruption
+ Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+ Restart the system when a corrupted block is discovered. This option is
+ not compatible with ignore_corruption and requires user space support to
+ avoid restart loops.
+
+panic_on_corruption
+ Panic the device when a corrupted block is discovered. This option is
+ not compatible with ignore_corruption and restart_on_corruption.
+
+restart_on_error
+ Restart the system when an I/O error is detected.
+ This option can be combined with the restart_on_corruption option.
+
+panic_on_error
+ Panic the device when an I/O error is detected. This option is
+ not compatible with the restart_on_error option but can be combined
+ with the panic_on_corruption option.
+
+ignore_zero_blocks
+ Do not verify blocks that are expected to contain zeroes and always return
+ zeroes instead. This may be useful if the partition contains unused blocks
+ that are not guaranteed to contain zeroes.
+
+use_fec_from_device <fec_dev>
+ Use forward error correction (FEC) to recover from corruption if hash
+ verification fails. Use encoding data from the specified device. This
+ may be the same device where data and hash blocks reside, in which case
+ fec_start must be outside data and hash areas.
+
+ If the encoding data covers additional metadata, it must be accessible
+ on the hash device after the hash blocks.
+
+ Note: block sizes for data and hash devices must match. Also, if the
+ verity <dev> is encrypted the <fec_dev> should be too.
+
+fec_roots <num>
+ Number of generator roots. This equals to the number of parity bytes in
+ the encoding data. For example, in RS(M, N) encoding, the number of roots
+ is M-N.
+
+fec_blocks <num>
+ The number of encoding data blocks on the FEC device. The block size for
+ the FEC device is <data_block_size>.
+
+fec_start <offset>
+ This is the offset, in <data_block_size> blocks, from the start of the
+ FEC device to the beginning of the encoding data.
+
+check_at_most_once
+ Verify data blocks only the first time they are read from the data device,
+ rather than every time. This reduces the overhead of dm-verity so that it
+ can be used on systems that are memory and/or CPU constrained. However, it
+ provides a reduced level of security because only offline tampering of the
+ data device's content will be detected, not online tampering.
+
+ Hash blocks are still verified each time they are read from the hash device,
+ since verification of hash blocks is less performance critical than data
+ blocks, and a hash block will not be verified any more after all the data
+ blocks it covers have been verified anyway.
+
+root_hash_sig_key_desc <key_description>
+ This is the description of the USER_KEY that the kernel will lookup to get
+ the pkcs7 signature of the roothash. The pkcs7 signature is used to validate
+ the root hash during the creation of the device mapper block device.
+ Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG
+ being set in the kernel. The signatures are checked against the builtin
+ trusted keyring by default, or the secondary trusted keyring if
+ DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING is set. The secondary
+ trusted keyring includes by default the builtin trusted keyring, and it can
+ also gain new certificates at run time if they are signed by a certificate
+ already in the secondary trusted keyring.
+
+try_verify_in_tasklet
+ If verity hashes are in cache and the IO size does not exceed the limit,
+ verify data blocks in bottom half instead of workqueue. This option can
+ reduce IO latency. The size limits can be configured via
+ /sys/module/dm_verity/parameters/use_bh_bytes. The four parameters
+ correspond to limits for IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn.
+ For example:
+ <none>,<rt>,<be>,<idle>
+ 4096,4096,4096,4096
+
+Theory of operation
+===================
+
+dm-verity is meant to be set up as part of a verified boot path. This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access. If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail. This should detect
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly, aligned to the nearest
+block size.
+
+If forward error correction (FEC) support is enabled any recovery of
+corrupted data will be verified using the cryptographic hash of the
+corresponding data. This is why combining error correction with
+integrity checking is essential.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
+of some data block on disk is calculated. If it is an intermediary node,
+the hash of a number of child nodes is calculated.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block. The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm. The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+ alg = sha256, num_blocks = 32768, block_size = 4096
+
+::
+
+ [ root ]
+ / . . . \
+ [entry_0] [entry_1]
+ / . . . \ . . . \
+ [entry_0_0] . . . [entry_0_127] . . . . [entry_1_127]
+ / ... \ / . . . \ / \
+ blk_0 ... blk_127 blk_16256 blk_16383 blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+The verity kernel code does not read the verity metadata on-disk header.
+It only reads the hash blocks which directly follow the header.
+It is expected that a user-space tool will verify the integrity of the
+verity header.
+
+Alternatively, the header can be omitted and the dmsetup parameters can
+be passed via the kernel command-line in a rooted chain of trust where
+the command-line is verified.
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+The full specification of kernel parameters and on-disk metadata format
+is available at the cryptsetup project's wiki page
+
+ https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+Set up a device::
+
+ # dmsetup create vroot --readonly --table \
+ "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
+ "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+ "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel device. This is available from
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
+(as a libcryptsetup extension).
+
+Create hash on the device::
+
+ # veritysetup format /dev/sda1 /dev/sda2
+ ...
+ Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+
+Activate the device::
+
+ # veritysetup create vroot /dev/sda1 /dev/sda2 \
+ 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
new file mode 100644
index 000000000000..60c16b7fd5ac
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -0,0 +1,114 @@
+=================
+Writecache target
+=================
+
+The writecache target caches writes on persistent memory or on SSD. It
+doesn't cache reads because reads are supposed to be cached in page cache
+in normal RAM.
+
+When the device is constructed, the first sector should be zeroed or the
+first sector should contain valid superblock from previous invocation.
+
+Constructor parameters:
+
+1. type of the cache device - "p" or "s"
+ - p - persistent memory
+ - s - SSD
+2. the underlying device that will be cached
+3. the cache device
+4. block size (4096 is recommended; the maximum block size is the page
+ size)
+5. the number of optional parameters (the parameters with an argument
+ count as two)
+
+ start_sector n (default: 0)
+ offset from the start of cache device in 512-byte sectors
+ high_watermark n (default: 50)
+ start writeback when the number of used blocks reach this
+ watermark
+ low_watermark x (default: 45)
+ stop writeback when the number of used blocks drops below
+ this watermark
+ writeback_jobs n (default: unlimited)
+ limit the number of blocks that are in flight during
+ writeback. Setting this value reduces writeback
+ throughput, but it may improve latency of read requests
+ autocommit_blocks n (default: 64 for pmem, 65536 for ssd)
+ when the application writes this amount of blocks without
+ issuing the FLUSH request, the blocks are automatically
+ committed
+ autocommit_time ms (default: 1000)
+ autocommit time in milliseconds. The data is automatically
+ committed if this time passes and no FLUSH request is
+ received
+ fua (by default on)
+ applicable only to persistent memory - use the FUA flag
+ when writing data from persistent memory back to the
+ underlying device
+ nofua
+ applicable only to persistent memory - don't use the FUA
+ flag when writing back data and send the FLUSH request
+ afterwards
+
+ - some underlying devices perform better with fua, some
+ with nofua. The user should test it
+ cleaner
+ when this option is activated (either in the constructor
+ arguments or by a message), the cache will not promote
+ new writes (however, writes to already cached blocks are
+ promoted, to avoid data corruption due to misordered
+ writes) and it will gradually writeback any cached
+ data. The userspace can then monitor the cleaning
+ process with "dmsetup status". When the number of cached
+ blocks drops to zero, userspace can unload the
+ dm-writecache target and replace it with dm-linear or
+ other targets.
+ max_age n
+ specifies the maximum age of a block in milliseconds. If
+ a block is stored in the cache for too long, it will be
+ written to the underlying device and cleaned up.
+ metadata_only
+ only metadata is promoted to the cache. This option
+ improves performance for heavier REQ_META workloads.
+ pause_writeback n (default: 3000)
+ pause writeback if there was some write I/O redirected to
+ the origin volume in the last n milliseconds
+
+Status:
+
+1. error indicator - 0 if there was no error, otherwise error number
+2. the number of blocks
+3. the number of free blocks
+4. the number of blocks under writeback
+5. the number of read blocks
+6. the number of read blocks that hit the cache
+7. the number of write blocks
+8. the number of write blocks that hit uncommitted block
+9. the number of write blocks that hit committed block
+10. the number of write blocks that bypass the cache
+11. the number of write blocks that are allocated in the cache
+12. the number of write requests that are blocked on the freelist
+13. the number of flush requests
+14. the number of discarded blocks
+
+Messages:
+ flush
+ Flush the cache device. The message returns successfully
+ if the cache device was flushed without an error
+ flush_on_suspend
+ Flush the cache device on next suspend. Use this message
+ when you are going to remove the cache device. The proper
+ sequence for removing the cache device is:
+
+ 1. send the "flush_on_suspend" message
+ 2. load an inactive table with a linear target that maps
+ to the underlying device
+ 3. suspend the device
+ 4. ask for status and verify that there are no errors
+ 5. resume the device, so that it will use the linear
+ target
+ 6. the cache device is now inactive and it can be deleted
+ cleaner
+ See above "cleaner" constructor documentation.
+ clear_stats
+ Clear the statistics that are reported on the status line
diff --git a/Documentation/admin-guide/device-mapper/zero.rst b/Documentation/admin-guide/device-mapper/zero.rst
new file mode 100644
index 000000000000..11fb5cf4597c
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/zero.rst
@@ -0,0 +1,37 @@
+=======
+dm-zero
+=======
+
+Device-Mapper's "zero" target provides a block-device that always returns
+zero'd data on reads and silently drops writes. This is similar behavior to
+/dev/zero, but as a block-device instead of a character-device.
+
+Dm-zero has no target-specific parameters.
+
+One very interesting use of dm-zero is for creating "sparse" devices in
+conjunction with dm-snapshot. A sparse device reports a device-size larger
+than the amount of actual storage space available for that device. A user can
+write data anywhere within the sparse device and read it back like a normal
+device. Reads to previously unwritten areas will return a zero'd buffer. When
+enough data has been written to fill up the actual storage space, the sparse
+device is deactivated. This can be very useful for testing device and
+filesystem limitations.
+
+To create a sparse device, start by creating a dm-zero device that's the
+desired size of the sparse device. For this example, we'll assume a 10TB
+sparse device::
+
+ TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2` # 10 TB in sectors
+ echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
+
+Then create a snapshot of the zero device, using any available block-device as
+the COW device. The size of the COW device will determine the amount of real
+space available to the sparse device. For this example, we'll assume /dev/sdb1
+is an available 10GB partition::
+
+ echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
+ dmsetup create sparse1
+
+This will create a 10TB sparse device called /dev/mapper/sparse1 that has
+10GB of actual storage space available. If more than 10GB of data is written
+to this device, it will start returning I/O errors.
diff --git a/Documentation/admin-guide/devices.rst b/Documentation/admin-guide/devices.rst
index 7fadc05330dd..e3776d77374b 100644
--- a/Documentation/admin-guide/devices.rst
+++ b/Documentation/admin-guide/devices.rst
@@ -1,3 +1,4 @@
+.. _admin_devices:
Linux allocated devices (4.x+ version)
======================================
@@ -6,17 +7,16 @@ This list is the Linux Device List, the official registry of allocated
device numbers and ``/dev`` directory nodes for the Linux operating
system.
-The LaTeX version of this document is no longer maintained, nor is
-the document that used to reside at lanana.org. This version in the
-mainline Linux kernel is the master document. Updates shall be sent
-as patches to the kernel maintainers (see the
+The version of this document at lanana.org is no longer maintained. This
+version in the mainline Linux kernel is the master document. Updates
+shall be sent as patches to the kernel maintainers (see the
:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` document).
Specifically explore the sections titled "CHAR and MISC DRIVERS", and
"BLOCK LAYER" in the MAINTAINERS file to find the right maintainers
to involve for character and block devices.
This document is included by reference into the Filesystem Hierarchy
-Standard (FHS). The FHS is available from http://www.pathname.com/fhs/.
+Standard (FHS). The FHS is available from https://www.pathname.com/fhs/.
Allocations marked (68k/Amiga) apply to Linux/68k on the Amiga
platform only. Allocations marked (68k/Atari) apply to Linux/68k on
diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index 1649117e6087..94c98be1329a 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -4,7 +4,7 @@
1 char Memory devices
1 = /dev/mem Physical memory access
- 2 = /dev/kmem Kernel virtual memory access
+ 2 = /dev/kmem OBSOLETE - replaced by /proc/kcore
3 = /dev/null Null device
4 = /dev/port I/O port access
5 = /dev/zero Null byte source
@@ -289,7 +289,7 @@
152 = /dev/kpoll Kernel Poll Driver
153 = /dev/mergemem Memory merge device
154 = /dev/pmu Macintosh PowerBook power manager
- 155 = /dev/isictl MultiTech ISICom serial control
+ 155 =
156 = /dev/lcd Front panel LCD display
157 = /dev/ac Applicom Intl Profibus card
158 = /dev/nwbutton Netwinder external button
@@ -319,7 +319,7 @@
182 = /dev/perfctr Performance-monitoring counters
183 = /dev/hwrng Generic random number generator
184 = /dev/cpu/microcode CPU microcode update interface
- 186 = /dev/atomicps Atomic shapshot of process state data
+ 186 = /dev/atomicps Atomic snapshot of process state data
187 = /dev/irnet IrNET device
188 = /dev/smbusbios SMBus BIOS
189 = /dev/ussp_ctl User space serial port control
@@ -375,8 +375,9 @@
239 = /dev/uhid User-space I/O driver support for HID subsystem
240 = /dev/userio Serio driver testing device
241 = /dev/vhost-vsock Host kernel driver for virtio vsock
+ 242 = /dev/rfkill Turning off radio transmissions (rfkill)
- 242-254 Reserved for local use
+ 243-254 Reserved for local use
255 Reserved for MISC_DYNAMIC_MINOR
11 char Raw keyboard device (Linux/SPARC only)
@@ -476,11 +477,6 @@
18 block Sanyo CD-ROM
0 = /dev/sjcd Sanyo CD-ROM
- 19 char Cyclades serial card
- 0 = /dev/ttyC0 First Cyclades port
- ...
- 31 = /dev/ttyC31 32nd Cyclades port
-
19 block "Double" compressed disk
0 = /dev/double0 First compressed disk
...
@@ -492,11 +488,6 @@
See the Double documentation for the meaning of the
mirror devices.
- 20 char Cyclades serial card - alternate devices
- 0 = /dev/cub0 Callout device for ttyC0
- ...
- 31 = /dev/cub31 Callout device for ttyC31
-
20 block Hitachi CD-ROM (under development)
0 = /dev/hitcd Hitachi CD-ROM
@@ -1442,7 +1433,7 @@
...
The driver and documentation may be obtained from
- http://www.winradio.com/
+ https://www.winradio.com/
82 block I2O hard disk
0 = /dev/i2o/hdag 33rd I2O hard disk, whole disk
@@ -1647,12 +1638,21 @@
0 = /dev/comedi0 First comedi device
1 = /dev/comedi1 Second comedi device
...
+ 47 = /dev/comedi47 48th comedi device
- See http://stm.lbl.gov/comedi.
+ Minors 48 to 255 are reserved for comedi subdevices with
+ pathnames of the form "/dev/comediX_subdY", where "X" is the
+ minor number of the associated comedi device and "Y" is the
+ subdevice number. These subdevice minors are assigned
+ dynamically, so there is no fixed mapping from subdevice
+ pathnames to minor numbers.
+
+ See https://www.comedi.org/ for information about the Comedi
+ project.
98 block User-mode virtual block device
0 = /dev/ubda First user-mode block device
- 16 = /dev/udbb Second user-mode block device
+ 16 = /dev/ubdb Second user-mode block device
...
Partitions are handled in the same way as for IDE
@@ -1714,7 +1714,7 @@
implementations a kernel presence for caching and easy
mounting. For more information about the project,
write to <arla-drinkers@stacken.kth.se> or see
- http://www.stacken.kth.se/project/arla/
+ https://www.stacken.kth.se/project/arla/
103 block Audit device
0 = /dev/audit Audit device
@@ -1933,7 +1933,7 @@
...
255= /dev/umem/d15p15 15th partition of 16th board.
- 117 char COSA/SRP synchronous serial card
+ 117 char [REMOVED] COSA/SRP synchronous serial card
0 = /dev/cosa0c0 1st board, 1st channel
1 = /dev/cosa0c1 1st board, 2nd channel
...
@@ -2339,13 +2339,7 @@
disks (see major number 3) except that the limit on
partitions is 31.
- 162 char Raw block device interface
- 0 = /dev/rawctl Raw I/O control device
- 1 = /dev/raw/raw1 First raw I/O device
- 2 = /dev/raw/raw2 Second raw I/O device
- ...
- max minor number of raw device is set by kernel config
- MAX_RAW_DEVS or raw module parameter 'max_raw_devs'
+ 162 char Used for (now removed) raw block device interface
163 char
@@ -2693,22 +2687,13 @@
41 = /dev/ttySMX0 Motorola i.MX - port 0
42 = /dev/ttySMX1 Motorola i.MX - port 1
43 = /dev/ttySMX2 Motorola i.MX - port 2
- 44 = /dev/ttyMM0 Marvell MPSC - port 0
- 45 = /dev/ttyMM1 Marvell MPSC - port 1
+ 44 = /dev/ttyMM0 Marvell MPSC - port 0 (obsolete unused)
+ 45 = /dev/ttyMM1 Marvell MPSC - port 1 (obsolete unused)
46 = /dev/ttyCPM0 PPC CPM (SCC or SMC) - port 0
...
- 47 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5
- 50 = /dev/ttyIOC0 Altix serial card
- ...
- 81 = /dev/ttyIOC31 Altix serial card
+ 51 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5
82 = /dev/ttyVR0 NEC VR4100 series SIU
83 = /dev/ttyVR1 NEC VR4100 series DSIU
- 84 = /dev/ttyIOC84 Altix ioc4 serial card
- ...
- 115 = /dev/ttyIOC115 Altix ioc4 serial card
- 116 = /dev/ttySIOC0 Altix ioc3 serial card
- ...
- 147 = /dev/ttySIOC31 Altix ioc3 serial card
148 = /dev/ttyPSC0 PPC PSC - port 0
...
153 = /dev/ttyPSC5 PPC PSC - port 5
@@ -2719,6 +2704,9 @@
...
185 = /dev/ttyNX15 Hilscher netX serial port 15
186 = /dev/ttyJ0 JTAG1 DCC protocol based serial port emulation
+
+ If maximum number of uartlite serial ports is more than 4, then the driver
+ uses dynamic allocation instead of static allocation for major number.
187 = /dev/ttyUL0 Xilinx uartlite - port 0
...
190 = /dev/ttyUL3 Xilinx uartlite - port 3
@@ -2767,10 +2755,7 @@
43 = /dev/ttycusmx2 Callout device for ttySMX2
46 = /dev/cucpm0 Callout device for ttyCPM0
...
- 49 = /dev/cucpm5 Callout device for ttyCPM5
- 50 = /dev/cuioc40 Callout device for ttyIOC40
- ...
- 81 = /dev/cuioc431 Callout device for ttyIOC431
+ 51 = /dev/cucpm5 Callout device for ttyCPM5
82 = /dev/cuvr0 Callout device for ttyVR0
83 = /dev/cuvr1 Callout device for ttyVR1
@@ -2993,10 +2978,10 @@
65 = /dev/infiniband/issm1 Second InfiniBand IsSM device
...
127 = /dev/infiniband/issm63 63rd InfiniBand IsSM device
- 128 = /dev/infiniband/uverbs0 First InfiniBand verbs device
- 129 = /dev/infiniband/uverbs1 Second InfiniBand verbs device
+ 192 = /dev/infiniband/uverbs0 First InfiniBand verbs device
+ 193 = /dev/infiniband/uverbs1 Second InfiniBand verbs device
...
- 159 = /dev/infiniband/uverbs31 31st InfiniBand verbs device
+ 223 = /dev/infiniband/uverbs31 31st InfiniBand verbs device
232 char Biometric Devices
0 = /dev/biometric/sensor0/fingerprint first fingerprint sensor on first device
@@ -3086,6 +3071,11 @@
...
255 = /dev/osd255 256th OSD Device
+ 261 char Compute Acceleration Devices
+ 0 = /dev/accel/accel0 First acceleration device
+ 1 = /dev/accel/accel1 Second acceleration device
+ ...
+
384-511 char RESERVED FOR DYNAMIC ASSIGNMENT
Character devices that request a dynamic allocation of major
number will take numbers starting from 511 and downward,
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst
index fdf72429f801..7c036590cd07 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -5,135 +5,120 @@ Dynamic debug
Introduction
============
-This document describes how to use the dynamic debug (dyndbg) feature.
+Dynamic debug allows you to dynamically enable/disable kernel
+debug-print code to obtain additional kernel information.
-Dynamic debug is designed to allow you to dynamically enable/disable
-kernel code to obtain additional kernel information. Currently, if
-``CONFIG_DYNAMIC_DEBUG`` is set, then all ``pr_debug()``/``dev_dbg()`` and
-``print_hex_dump_debug()``/``print_hex_dump_bytes()`` calls can be dynamically
-enabled per-callsite.
+If ``/proc/dynamic_debug/control`` exists, your kernel has dynamic
+debug. You'll need root access (sudo su) to use this.
-If ``CONFIG_DYNAMIC_DEBUG`` is not set, ``print_hex_dump_debug()`` is just
-shortcut for ``print_hex_dump(KERN_DEBUG)``.
+Dynamic debug provides:
-For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is
-its ``prefix_str`` argument, if it is constant string; or ``hexdump``
-in case ``prefix_str`` is built dynamically.
+ * a Catalog of all *prdbgs* in your kernel.
+ ``cat /proc/dynamic_debug/control`` to see them.
-Dynamic debug has even more useful features:
-
- * Simple query language allows turning on and off debugging
- statements by matching any combination of 0 or 1 of:
+ * a Simple query/command language to alter *prdbgs* by selecting on
+ any combination of 0 or 1 of:
- source filename
- function name
- line number (including ranges of line numbers)
- module name
- format string
+ - class name (as known/declared by each module)
- * Provides a debugfs control file: ``<debugfs>/dynamic_debug/control``
- which can be read to display the complete list of known debug
- statements, to help guide you
-
-Controlling dynamic debug Behaviour
-===================================
-
-The behaviour of ``pr_debug()``/``dev_dbg()`` are controlled via writing to a
-control file in the 'debugfs' filesystem. Thus, you must first mount
-the debugfs filesystem, in order to make use of this feature.
-Subsequently, we refer to the control file as:
-``<debugfs>/dynamic_debug/control``. For example, if you want to enable
-printing from source file ``svcsock.c``, line 1603 you simply do::
-
- nullarbor:~ # echo 'file svcsock.c line 1603 +p' >
- <debugfs>/dynamic_debug/control
-
-If you make a mistake with the syntax, the write will fail thus::
-
- nullarbor:~ # echo 'file svcsock.c wtf 1 +p' >
- <debugfs>/dynamic_debug/control
- -bash: echo: write error: Invalid argument
+NOTE: To actually get the debug-print output on the console, you may
+need to adjust the kernel ``loglevel=``, or use ``ignore_loglevel``.
+Read about these kernel parameters in
+Documentation/admin-guide/kernel-parameters.rst.
Viewing Dynamic Debug Behaviour
===============================
-You can view the currently configured behaviour of all the debug
-statements via::
+You can view the currently configured behaviour in the *prdbg* catalog::
- nullarbor:~ # cat <debugfs>/dynamic_debug/control
+ :#> head -n7 /proc/dynamic_debug/control
# filename:lineno [module]function flags format
- /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup =_ "SVCRDMA Module Removed, deregister RPC RDMA transport\012"
- /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init =_ "\011max_inline : %d\012"
- /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init =_ "\011sq_depth : %d\012"
- /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init =_ "\011max_requests : %d\012"
- ...
+ init/main.c:1179 [main]initcall_blacklist =_ "blacklisting initcall %s\012
+ init/main.c:1218 [main]initcall_blacklisted =_ "initcall %s blacklisted\012"
+ init/main.c:1424 [main]run_init_process =_ " with arguments:\012"
+ init/main.c:1426 [main]run_init_process =_ " %s\012"
+ init/main.c:1427 [main]run_init_process =_ " with environment:\012"
+ init/main.c:1429 [main]run_init_process =_ " %s\012"
+The 3rd space-delimited column shows the current flags, preceded by
+a ``=`` for easy use with grep/cut. ``=p`` shows enabled callsites.
-You can also apply standard Unix text manipulation filters to this
-data, e.g.::
+Controlling dynamic debug Behaviour
+===================================
- nullarbor:~ # grep -i rdma <debugfs>/dynamic_debug/control | wc -l
- 62
+The behaviour of *prdbg* sites are controlled by writing
+query/commands to the control file. Example::
- nullarbor:~ # grep -i tcp <debugfs>/dynamic_debug/control | wc -l
- 42
+ # grease the interface
+ :#> alias ddcmd='echo $* > /proc/dynamic_debug/control'
-The third column shows the currently enabled flags for each debug
-statement callsite (see below for definitions of the flags). The
-default value, with no flags enabled, is ``=_``. So you can view all
-the debug statement callsites with any non-default flags::
+ :#> ddcmd '-p; module main func run* +p'
+ :#> grep =p /proc/dynamic_debug/control
+ init/main.c:1424 [main]run_init_process =p " with arguments:\012"
+ init/main.c:1426 [main]run_init_process =p " %s\012"
+ init/main.c:1427 [main]run_init_process =p " with environment:\012"
+ init/main.c:1429 [main]run_init_process =p " %s\012"
- nullarbor:~ # awk '$3 != "=_"' <debugfs>/dynamic_debug/control
- # filename:lineno [module]function flags format
- /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012"
+Error messages go to console/syslog::
+
+ :#> ddcmd mode foo +p
+ dyndbg: unknown keyword "mode"
+ dyndbg: query parse failed
+ bash: echo: write error: Invalid argument
+
+If debugfs is also enabled and mounted, ``dynamic_debug/control`` is
+also under the mount-dir, typically ``/sys/kernel/debug/``.
Command Language Reference
==========================
-At the lexical level, a command comprises a sequence of words separated
+At the basic lexical level, a command is a sequence of words separated
by spaces or tabs. So these are all equivalent::
- nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
- <debugfs>/dynamic_debug/control
- nullarbor:~ # echo -n ' file svcsock.c line 1603 +p ' >
- <debugfs>/dynamic_debug/control
- nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd file svcsock.c line 1603 +p
+ :#> ddcmd "file svcsock.c line 1603 +p"
+ :#> ddcmd ' file svcsock.c line 1603 +p '
Command submissions are bounded by a write() system call.
Multiple commands can be written together, separated by ``;`` or ``\n``::
- ~# echo "func pnpacpi_get_resources +p; func pnp_assign_mem +p" \
- > <debugfs>/dynamic_debug/control
-
-If your query set is big, you can batch them too::
-
- ~# cat query-batch-file > <debugfs>/dynamic_debug/control
+ :#> ddcmd "func pnpacpi_get_resources +p; func pnp_assign_mem +p"
+ :#> ddcmd <<"EOC"
+ func pnpacpi_get_resources +p
+ func pnp_assign_mem +p
+ EOC
+ :#> cat query-batch-file > /proc/dynamic_debug/control
-A another way is to use wildcard. The match rule support ``*`` (matches
-zero or more characters) and ``?`` (matches exactly one character).For
-example, you can match all usb drivers::
+You can also use wildcards in each query term. The match rule supports
+``*`` (matches zero or more characters) and ``?`` (matches exactly one
+character). For example, you can match all usb drivers::
- ~# echo "file drivers/usb/* +p" > <debugfs>/dynamic_debug/control
+ :#> ddcmd file "drivers/usb/*" +p # "" to suppress shell expansion
-At the syntactical level, a command comprises a sequence of match
-specifications, followed by a flags change specification::
+Syntactically, a command is pairs of keyword values, followed by a
+flags change or setting::
command ::= match-spec* flags-spec
-The match-spec's are used to choose a subset of the known pr_debug()
-callsites to which to apply the flags-spec. Think of them as a query
-with implicit ANDs between each pair. Note that an empty list of
-match-specs will select all debug statement callsites.
+The match-spec's select *prdbgs* from the catalog, upon which to apply
+the flags-spec, all constraints are ANDed together. An absent keyword
+is the same as keyword "*".
-A match specification comprises a keyword, which controls the
-attribute of the callsite to be compared, and a value to compare
-against. Possible keywords are:::
+
+A match specification is a keyword, which selects the attribute of
+the callsite to be compared, and a value to compare against. Possible
+keywords are:::
match-spec ::= 'func' string |
'file' string |
'module' string |
'format' string |
+ 'class' string |
'line' line-range
line-range ::= lineno |
@@ -156,15 +141,18 @@ func
of each callsite. Example::
func svc_tcp_accept
+ func *recv* # in rfcomm, bluetooth, ping, tcp
file
- The given string is compared against either the full pathname, the
- src-root relative pathname, or the basename of the source file of
- each callsite. Examples::
+ The given string is compared against either the src-root relative
+ pathname, or the basename of the source file of each callsite.
+ Examples::
file svcsock.c
- file kernel/freezer.c
- file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c
+ file kernel/freezer.c # ie column 1 of control file
+ file drivers/usb/* # all callsites under it
+ file inode.c:start_* # parse :tail as a func (above)
+ file inode.c:1-100 # parse :tail as a line-range (above)
module
The given string is compared against the module name
@@ -174,6 +162,7 @@ module
module sunrpc
module nfsd
+ module drm* # both drm, drm_kms_helper
format
The given string is searched for in the dynamic debug format
@@ -191,6 +180,16 @@ format
format "nfsd: SETATTR" // a neater way to match a format with whitespace
format 'nfsd: SETATTR' // yet another way to match a format with whitespace
+class
+ The given class_name is validated against each module, which may
+ have declared a list of known class_names. If the class_name is
+ found for a module, callsite & class matching and adjustment
+ proceeds. Examples::
+
+ class DRM_UT_KMS # a DRM.debug category
+ class JUNK # silent non-match
+ // class TLD_* # NOTICE: no wildcard in class names
+
line
The given line number or range of line numbers is compared
against the line number of each ``pr_debug()`` callsite. A single
@@ -216,20 +215,20 @@ of the characters::
The flags are::
p enables the pr_debug() callsite.
- f Include the function name in the printed message
- l Include line number in the printed message
- m Include module name in the printed message
- t Include thread ID in messages not generated from interrupt context
- _ No flags are set. (Or'd with others on input)
+ _ enables no flags.
-For ``print_hex_dump_debug()`` and ``print_hex_dump_bytes()``, only ``p`` flag
-have meaning, other flags ignored.
+ Decorator flags add to the message-prefix, in order:
+ t Include thread ID, or <intr>
+ m Include module name
+ f Include the function name
+ s Include the source file name
+ l Include line number
-For display, the flags are preceded by ``=``
-(mnemonic: what the flags are currently equal to).
+For ``print_hex_dump_debug()`` and ``print_hex_dump_bytes()``, only
+the ``p`` flag has meaning, other flags are ignored.
-Note the regexp ``^[-+=][flmpt_]+$`` matches a flags specification.
-To clear all flags at once, use ``=_`` or ``-flmpt``.
+Note the regexp ``^[-+=][fslmpt_]+$`` matches a flags specification.
+To clear all flags at once, use ``=_`` or ``-fslmpt``.
Debug messages during Boot Process
@@ -237,14 +236,13 @@ Debug messages during Boot Process
To activate debug messages for core code and built-in modules during
the boot process, even before userspace and debugfs exists, use
-``dyndbg="QUERY"``, ``module.dyndbg="QUERY"``, or ``ddebug_query="QUERY"``
-(``ddebug_query`` is obsoleted by ``dyndbg``, and deprecated). QUERY follows
+``dyndbg="QUERY"`` or ``module.dyndbg="QUERY"``. QUERY follows
the syntax described above, but must not exceed 1023 characters. Your
bootloader may impose lower limits.
These ``dyndbg`` params are processed just after the ddebug tables are
-processed, as part of the arch_initcall. Thus you can enable debug
-messages in all code run after this arch_initcall via this boot
+processed, as part of the early_initcall. Thus you can enable debug
+messages in all code run after this early_initcall via this boot
parameter.
On an x86 system for example ACPI enablement is a subsys_initcall and::
@@ -258,8 +256,7 @@ this boot parameter for debugging purposes.
If ``foo`` module is not built-in, ``foo.dyndbg`` will still be processed at
boot time, without effect, but will be reprocessed when module is
-loaded later. ``dyndbg_query=`` and bare ``dyndbg=`` are only processed at
-boot.
+loaded later. Bare ``dyndbg=`` is only processed at boot.
Debug Messages at Module Initialization Time
@@ -267,7 +264,7 @@ Debug Messages at Module Initialization Time
When ``modprobe foo`` is called, modprobe scans ``/proc/cmdline`` for
``foo.params``, strips ``foo.``, and passes them to the kernel along with
-params given in modprobe args or ``/etc/modprob.d/*.conf`` files,
+params given in modprobe args or ``/etc/modprobe.d/*.conf`` files,
in the following order:
1. parameters given via ``/etc/modprobe.d/*.conf``::
@@ -301,9 +298,9 @@ The ``dyndbg`` option is a "fake" module parameter, which means:
For ``CONFIG_DYNAMIC_DEBUG`` kernels, any settings given at boot-time (or
enabled by ``-DDEBUG`` flag during compilation) can be disabled later via
-the sysfs interface if the debug messages are no longer needed::
+the debugfs interface if the debug messages are no longer needed::
- echo "module module_name -p" > <debugfs>/dynamic_debug/control
+ echo "module module_name -p" > /proc/dynamic_debug/control
Examples
========
@@ -311,43 +308,75 @@ Examples
::
// enable the message at line 1603 of file svcsock.c
- nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'file svcsock.c line 1603 +p'
// enable all the messages in file svcsock.c
- nullarbor:~ # echo -n 'file svcsock.c +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'file svcsock.c +p'
// enable all the messages in the NFS server module
- nullarbor:~ # echo -n 'module nfsd +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'module nfsd +p'
// enable all 12 messages in the function svc_process()
- nullarbor:~ # echo -n 'func svc_process +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'func svc_process +p'
// disable all 12 messages in the function svc_process()
- nullarbor:~ # echo -n 'func svc_process -p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'func svc_process -p'
// enable messages for NFS calls READ, READLINK, READDIR and READDIR+.
- nullarbor:~ # echo -n 'format "nfsd: READ" +p' >
- <debugfs>/dynamic_debug/control
+ :#> ddcmd 'format "nfsd: READ" +p'
// enable messages in files of which the paths include string "usb"
- nullarbor:~ # echo -n '*usb* +p' > <debugfs>/dynamic_debug/control
+ :#> ddcmd 'file *usb* +p'
// enable all messages
- nullarbor:~ # echo -n '+p' > <debugfs>/dynamic_debug/control
+ :#> ddcmd '+p'
// add module, function to all enabled messages
- nullarbor:~ # echo -n '+mf' > <debugfs>/dynamic_debug/control
+ :#> ddcmd '+mf'
// boot-args example, with newlines and comments for readability
Kernel command line: ...
- // see whats going on in dyndbg=value processing
- dynamic_debug.verbose=1
- // enable pr_debugs in 2 builtins, #cmt is stripped
- dyndbg="module params +p #cmt ; module sys +p"
+ // see what's going on in dyndbg=value processing
+ dynamic_debug.verbose=3
+ // enable pr_debugs in the btrfs module (can be builtin or loadable)
+ btrfs.dyndbg="+p"
+ // enable pr_debugs in all files under init/
+ // and the function parse_one, #cmt is stripped
+ dyndbg="file init/* +p #cmt ; func parse_one +p"
// enable pr_debugs in 2 functions in a module loaded later
pc87360.dyndbg="func pc87360_init_device +p; func pc87360_find +p"
+
+Kernel Configuration
+====================
+
+Dynamic Debug is enabled via kernel config items::
+
+ CONFIG_DYNAMIC_DEBUG=y # build catalog, enables CORE
+ CONFIG_DYNAMIC_DEBUG_CORE=y # enable mechanics only, skip catalog
+
+If you do not want to enable dynamic debug globally (i.e. in some embedded
+system), you may set ``CONFIG_DYNAMIC_DEBUG_CORE`` as basic support of dynamic
+debug and add ``ccflags := -DDYNAMIC_DEBUG_MODULE`` into the Makefile of any
+modules which you'd like to dynamically debug later.
+
+
+Kernel *prdbg* API
+==================
+
+The following functions are cataloged and controllable when dynamic
+debug is enabled::
+
+ pr_debug()
+ dev_dbg()
+ print_hex_dump_debug()
+ print_hex_dump_bytes()
+
+Otherwise, they are off by default; ``ccflags += -DDEBUG`` or
+``#define DEBUG`` in a source file will enable them appropriately.
+
+If ``CONFIG_DYNAMIC_DEBUG`` is not set, ``print_hex_dump_debug()`` is
+just a shortcut for ``print_hex_dump(KERN_DEBUG)``.
+
+For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is
+its ``prefix_str`` argument, if it is constant string; or ``hexdump``
+in case ``prefix_str`` is built dynamically.
diff --git a/Documentation/admin-guide/edid.rst b/Documentation/admin-guide/edid.rst
new file mode 100644
index 000000000000..1a9b965aa486
--- /dev/null
+++ b/Documentation/admin-guide/edid.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+EDID
+====
+
+In the good old days when graphics parameters were configured explicitly
+in a file called xorg.conf, even broken hardware could be managed.
+
+Today, with the advent of Kernel Mode Setting, a graphics board is
+either correctly working because all components follow the standards -
+or the computer is unusable, because the screen remains dark after
+booting or it displays the wrong area. Cases when this happens are:
+
+- The graphics board does not recognize the monitor.
+- The graphics board is unable to detect any EDID data.
+- The graphics board incorrectly forwards EDID data to the driver.
+- The monitor sends no or bogus EDID data.
+- A KVM sends its own EDID data instead of querying the connected monitor.
+
+Adding the kernel parameter "nomodeset" helps in most cases, but causes
+restrictions later on.
+
+As a remedy for such situations, the kernel configuration item
+CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
+individually prepared or corrected EDID data set in the /lib/firmware
+directory from where it is loaded via the firmware interface.
diff --git a/Documentation/admin-guide/efi-stub.rst b/Documentation/admin-guide/efi-stub.rst
new file mode 100644
index 000000000000..090f3a185e18
--- /dev/null
+++ b/Documentation/admin-guide/efi-stub.rst
@@ -0,0 +1,100 @@
+=================
+The EFI Boot Stub
+=================
+
+On the x86 and ARM platforms, a kernel zImage/bzImage can masquerade
+as a PE/COFF image, thereby convincing EFI firmware loaders to load
+it as an EFI executable. The code that modifies the bzImage header,
+along with the EFI-specific entry point that the firmware loader
+jumps to are collectively known as the "EFI boot stub", and live in
+arch/x86/boot/header.S and drivers/firmware/efi/libstub/x86-stub.c,
+respectively. For ARM the EFI stub is implemented in
+arch/arm/boot/compressed/efi-header.S and
+drivers/firmware/efi/libstub/arm32-stub.c. EFI stub code that is shared
+between architectures is in drivers/firmware/efi/libstub.
+
+For arm64, there is no compressed kernel support, so the Image itself
+masquerades as a PE/COFF image and the EFI stub is linked into the
+kernel. The arm64 EFI stub lives in drivers/firmware/efi/libstub/arm64.c
+and drivers/firmware/efi/libstub/arm64-stub.c.
+
+By using the EFI boot stub it's possible to boot a Linux kernel
+without the use of a conventional EFI boot loader, such as grub or
+elilo. Since the EFI boot stub performs the jobs of a boot loader, in
+a certain sense it *IS* the boot loader.
+
+The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option.
+
+
+How to install bzImage.efi
+--------------------------
+
+The bzImage located in arch/x86/boot/bzImage must be copied to the EFI
+System Partition (ESP) and renamed with the extension ".efi". Without
+the extension the EFI firmware loader will refuse to execute it. It's
+not possible to execute bzImage.efi from the usual Linux file systems
+because EFI firmware doesn't have support for them. For ARM the
+arch/arm/boot/zImage should be copied to the system partition, and it
+may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
+should be copied but not necessarily renamed.
+
+
+Passing kernel parameters from the EFI shell
+--------------------------------------------
+
+Arguments to the kernel can be passed after bzImage.efi, e.g.::
+
+ fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
+
+
+The "initrd=" option
+--------------------
+
+Like most boot loaders, the EFI stub allows the user to specify
+multiple initrd files using the "initrd=" option. This is the only EFI
+stub-specific command line parameter, everything else is passed to the
+kernel when it boots.
+
+The path to the initrd file must be an absolute path from the
+beginning of the ESP, relative path names do not work. Also, the path
+is an EFI-style path and directory elements must be separated with
+backslashes (\). For example, given the following directory layout::
+
+ fs0:>
+ Kernels\
+ bzImage.efi
+ initrd-large.img
+
+ Ramdisks\
+ initrd-small.img
+ initrd-medium.img
+
+to boot with the initrd-large.img file if the current working
+directory is fs0:\Kernels, the following command must be used::
+
+ fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img
+
+Notice how bzImage.efi can be specified with a relative path. That's
+because the image we're executing is interpreted by the EFI shell,
+which understands relative paths, whereas the rest of the command line
+is passed to bzImage.efi.
+
+
+The "dtb=" option
+-----------------
+
+For the ARM and arm64 architectures, a device tree must be provided to
+the kernel. Normally firmware shall supply the device tree via the
+EFI CONFIGURATION TABLE. However, the "dtb=" command line option can
+be used to override the firmware supplied device tree, or to supply
+one when firmware is unable to.
+
+Please note: Firmware adds runtime configuration information to the
+device tree before booting the kernel. If dtb= is used to override
+the device tree, then any runtime data provided by firmware will be
+lost. The dtb= option should only be used either as a debug tool, or
+as a last resort when a device tree is not provided in the EFI
+CONFIGURATION TABLE.
+
+"dtb=" is processed in the same manner as the "initrd=" option that is
+described above.
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index e506d3dae510..ac0c709ea9e7 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -91,10 +91,50 @@ Currently Available
* large block (up to pagesize) support
* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
the ordering)
+* Case-insensitive file name lookups
+* file-based encryption support (fscrypt)
+* file-based verity support (fsverity)
[1] Filesystems with a block size of 1k may see a limit imposed by the
directory hash tree having a maximum depth of two.
+case-insensitive file name lookups
+======================================================
+
+The case-insensitive file name lookup feature is supported on a
+per-directory basis, allowing the user to mix case-insensitive and
+case-sensitive directories in the same filesystem. It is enabled by
+flipping the +F inode attribute of an empty directory. The
+case-insensitive string match operation is only defined when we know how
+text in encoded in a byte sequence. For that reason, in order to enable
+case-insensitive directories, the filesystem must have the
+casefold feature, which stores the filesystem-wide encoding
+model used. By default, the charset adopted is the latest version of
+Unicode (12.1.0, by the time of this writing), encoded in the UTF-8
+form. The comparison algorithm is implemented by normalizing the
+strings to the Canonical decomposition form, as defined by Unicode,
+followed by a byte per byte comparison.
+
+The case-awareness is name-preserving on the disk, meaning that the file
+name provided by userspace is a byte-per-byte match to what is actually
+written in the disk. The Unicode normalization format used by the
+kernel is thus an internal representation, and not exposed to the
+userspace nor to the disk, with the important exception of disk hashes,
+used on large case-insensitive directories with DX feature. On DX
+directories, the hash must be calculated using the casefolded version of
+the filename, meaning that the normalization format used actually has an
+impact on where the directory entry is stored.
+
+When we change from viewing filenames as opaque byte sequences to seeing
+them as encoded strings we need to address what happens when a program
+tries to create a file with an invalid name. The Unicode subsystem
+within the kernel leaves the decision of what to do in this case to the
+filesystem, which select its preferred behavior by enabling/disabling
+the strict mode. When Ext4 encounters one of those strings and the
+filesystem did not require strict mode, it falls back to considering the
+entire string as an opaque byte sequence, which still allows the user to
+operate on that file, but the case-insensitive lookups won't work.
+
Options
=======
@@ -143,14 +183,17 @@ When mounting an ext4 filesystem, the following option are accepted:
system after its metadata has been committed to the journal.
commit=nrsec (*)
- Ext4 can be told to sync all its data and metadata every 'nrsec'
- seconds. The default value is 5 seconds. This means that if you lose
- your power, you will lose as much as the latest 5 seconds of work (your
- filesystem will not be damaged though, thanks to the journaling). This
- default value (or any low value) will hurt performance, but it's good
- for data-safety. Setting it to 0 will have the same effect as leaving
- it at the default (5 seconds). Setting it to very large values will
- improve performance.
+ This setting limits the maximum age of the running transaction to
+ 'nrsec' seconds. The default value is 5 seconds. This means that if
+ you lose your power, you will lose as much as the latest 5 seconds of
+ metadata changes (your filesystem will not be damaged though, thanks
+ to the journaling). This default value (or any low value) will hurt
+ performance, but it's good for data-safety. Setting it to 0 will have
+ the same effect as leaving it at the default (5 seconds). Setting it
+ to very large values will improve performance. Note that due to
+ delayed allocation even older data can be lost on power failure since
+ writeback of those data begins only after time set in
+ /proc/sys/vm/dirty_expire_centisecs.
barrier=<0|1(*)>, barrier(*), nobarrier
This enables/disables the use of write barriers in the jbd code.
@@ -169,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
- nouser_xattr
- Disables Extended User Attributes. See the attr(5) manual page for
- more information about extended attributes.
-
- noacl
- This option disables POSIX Access Control List support. If ACL support
- is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
- is enabled by default on mount. See the acl(5) manual page for more
- information about acl.
-
bsddf (*)
Make 'df' act like BSD.
@@ -205,11 +238,10 @@ When mounting an ext4 filesystem, the following option are accepted:
configured using tune2fs)
data_err=ignore(*)
- Just print an error message if an error occurs in a file data buffer in
- ordered mode.
+ Just print an error message if an error occurs in a file data buffer.
+
data_err=abort
- Abort the journal if an error occurs in a file data buffer in ordered
- mode.
+ Abort the journal if an error occurs in a file data buffer.
grpid | bsdgroups
New objects have the group ID of their parent.
@@ -349,9 +381,16 @@ When mounting an ext4 filesystem, the following option are accepted:
dax
Use direct access (no page cache). See
- Documentation/filesystems/dax.txt. Note that this option is
+ Documentation/filesystems/dax.rst. Note that this option is
incompatible with data=journal.
+ inlinecrypt
+ When possible, encrypt/decrypt the contents of encrypted files using the
+ blk-crypto framework rather than filesystem-layer encryption. This
+ allows the use of inline encryption hardware. The on-disk format is
+ unaffected. For more details, see
+ Documentation/block/inline-encryption.rst.
+
Data Mode
=========
There are 3 different data modes:
@@ -359,7 +398,7 @@ There are 3 different data modes:
* writeback mode
In data=writeback mode, ext4 does not journal data at all. This mode provides
- a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+ a similar level of journaling as that of XFS and JFS in its default
mode - metadata journaling. A crash+recovery can cause incorrect data to
appear in files which were written shortly before the crash. This mode will
typically provide the best ext4 performance.
@@ -479,21 +518,21 @@ Files in /sys/fs/ext4/<devname>:
Ioctls
======
-There is some Ext4 specific functionality which can be accessed by applications
-through the system call interfaces. The list of all Ext4 specific ioctls are
-shown in the table below.
+Ext4 implements various ioctls which can be used by applications to access
+ext4-specific functionality. An incomplete list of these ioctls is shown in the
+table below. This list includes truly ext4-specific ioctls (``EXT4_IOC_*``) as
+well as ioctls that may have been ext4-specific originally but are now supported
+by some other filesystem(s) too (``FS_IOC_*``).
-Table of Ext4 specific ioctls
+Table of Ext4 ioctls
- EXT4_IOC_GETFLAGS
+ FS_IOC_GETFLAGS
Get additional attributes associated with inode. The ioctl argument is
- an integer bitfield, with bit values described in ext4.h. This ioctl is
- an alias for FS_IOC_GETFLAGS.
+ an integer bitfield, with bit values described in ext4.h.
- EXT4_IOC_SETFLAGS
+ FS_IOC_SETFLAGS
Set additional attributes associated with inode. The ioctl argument is
- an integer bitfield, with bit values described in ext4.h. This ioctl is
- an alias for FS_IOC_SETFLAGS.
+ an integer bitfield, with bit values described in ext4.h.
EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD
Get the inode i_generation number stored for each inode. The
@@ -568,7 +607,7 @@ kernel source: <file:fs/ext4/>
programs: http://e2fsprogs.sourceforge.net/
-useful links: http://fedoraproject.org/wiki/ext3-devel
+useful links: https://fedoraproject.org/wiki/ext3-devel
http://www.bullopensource.org/ext4/
http://ext4.wiki.kernel.org/index.php/Main_Page
- http://fedoraproject.org/wiki/Features/Ext4
+ https://fedoraproject.org/wiki/Features/Ext4
diff --git a/Documentation/admin-guide/features.rst b/Documentation/admin-guide/features.rst
new file mode 100644
index 000000000000..7651eca38227
--- /dev/null
+++ b/Documentation/admin-guide/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features
diff --git a/Documentation/admin-guide/filesystem-monitoring.rst b/Documentation/admin-guide/filesystem-monitoring.rst
new file mode 100644
index 000000000000..ab8dba76283c
--- /dev/null
+++ b/Documentation/admin-guide/filesystem-monitoring.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+File system Monitoring with fanotify
+====================================
+
+File system Error Reporting
+===========================
+
+Fanotify supports the FAN_FS_ERROR event type for file system-wide error
+reporting. It is meant to be used by file system health monitoring
+daemons, which listen for these events and take actions (notify
+sysadmin, start recovery) when a file system problem is detected.
+
+By design, a FAN_FS_ERROR notification exposes sufficient information
+for a monitoring tool to know a problem in the file system has happened.
+It doesn't necessarily provide a user space application with semantics
+to verify an IO operation was successfully executed. That is out of
+scope for this feature. Instead, it is only meant as a framework for
+early file system problem detection and reporting recovery tools.
+
+When a file system operation fails, it is common for dozens of kernel
+errors to cascade after the initial failure, hiding the original failure
+log, which is usually the most useful debug data to troubleshoot the
+problem. For this reason, FAN_FS_ERROR tries to report only the first
+error that occurred for a file system since the last notification, and
+it simply counts additional errors. This ensures that the most
+important pieces of information are never lost.
+
+FAN_FS_ERROR requires the fanotify group to be setup with the
+FAN_REPORT_FID flag.
+
+At the time of this writing, the only file system that emits FAN_FS_ERROR
+notifications is Ext4.
+
+A FAN_FS_ERROR Notification has the following format::
+
+ ::
+
+ [ Notification Metadata (Mandatory) ]
+ [ Generic Error Record (Mandatory) ]
+ [ FID record (Mandatory) ]
+
+The order of records is not guaranteed, and new records might be added
+in the future. Therefore, applications must not rely on the order and
+must be prepared to skip over unknown records. Please refer to
+``samples/fanotify/fs-monitor.c`` for an example parser.
+
+Generic error record
+--------------------
+
+The generic error record provides enough information for a file system
+agnostic tool to learn about a problem in the file system, without
+providing any additional details about the problem. This record is
+identified by ``struct fanotify_event_info_header.info_type`` being set
+to FAN_EVENT_INFO_TYPE_ERROR.
+
+ ::
+
+ struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+ };
+
+The `error` field identifies the type of error using errno values.
+`error_count` tracks the number of errors that occurred and were
+suppressed to preserve the original error information, since the last
+notification.
+
+FID record
+----------
+
+The FID record can be used to uniquely identify the inode that triggered
+the error through the combination of fsid and file handle. A file system
+specific application can use that information to attempt a recovery
+procedure. Errors that are not related to an inode are reported with an
+empty file handle of type FILEID_INVALID.
diff --git a/Documentation/admin-guide/gpio/gpio-aggregator.rst b/Documentation/admin-guide/gpio/gpio-aggregator.rst
new file mode 100644
index 000000000000..8374a9df9105
--- /dev/null
+++ b/Documentation/admin-guide/gpio/gpio-aggregator.rst
@@ -0,0 +1,218 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+GPIO Aggregator
+===============
+
+The GPIO Aggregator provides a mechanism to aggregate GPIOs, and expose them as
+a new gpio_chip. This supports the following use cases.
+
+
+Aggregating GPIOs using Sysfs
+-----------------------------
+
+GPIO controllers are exported to userspace using /dev/gpiochip* character
+devices. Access control to these devices is provided by standard UNIX file
+system permissions, on an all-or-nothing basis: either a GPIO controller is
+accessible for a user, or it is not.
+
+The GPIO Aggregator provides access control for a set of one or more GPIOs, by
+aggregating them into a new gpio_chip, which can be assigned to a group or user
+using standard UNIX file ownership and permissions. Furthermore, this
+simplifies and hardens exporting GPIOs to a virtual machine, as the VM can just
+grab the full GPIO controller, and no longer needs to care about which GPIOs to
+grab and which not, reducing the attack surface.
+
+Aggregated GPIO controllers are instantiated and destroyed by writing to
+write-only attribute files in sysfs.
+
+ /sys/bus/platform/drivers/gpio-aggregator/
+
+ "new_device" ...
+ Userspace may ask the kernel to instantiate an aggregated GPIO
+ controller by writing a string describing the GPIOs to
+ aggregate to the "new_device" file, using the format
+
+ .. code-block:: none
+
+ [<gpioA>] [<gpiochipB> <offsets>] ...
+
+ Where:
+
+ "<gpioA>" ...
+ is a GPIO line name,
+
+ "<gpiochipB>" ...
+ is a GPIO chip label, and
+
+ "<offsets>" ...
+ is a comma-separated list of GPIO offsets and/or
+ GPIO offset ranges denoted by dashes.
+
+ Example: Instantiate a new GPIO aggregator by aggregating GPIO
+ line 19 of "e6052000.gpio" and GPIO lines 20-21 of
+ "e6050000.gpio" into a new gpio_chip:
+
+ .. code-block:: sh
+
+ $ echo 'e6052000.gpio 19 e6050000.gpio 20-21' > new_device
+
+ "delete_device" ...
+ Userspace may ask the kernel to destroy an aggregated GPIO
+ controller after use by writing its device name to the
+ "delete_device" file.
+
+ Example: Destroy the previously-created aggregated GPIO
+ controller, assumed to be "gpio-aggregator.0":
+
+ .. code-block:: sh
+
+ $ echo gpio-aggregator.0 > delete_device
+
+
+Aggregating GPIOs using Configfs
+--------------------------------
+
+**Group:** ``/config/gpio-aggregator``
+
+ This is the root directory of the gpio-aggregator configfs tree.
+
+**Group:** ``/config/gpio-aggregator/<example-name>``
+
+ This directory represents a GPIO aggregator device. You can assign any
+ name to ``<example-name>`` (e.g. ``agg0``), except names starting with
+ ``_sysfs`` prefix, which are reserved for auto-generated configfs
+ entries corresponding to devices created via Sysfs.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/live``
+
+ The ``live`` attribute allows to trigger the actual creation of the device
+ once it's fully configured. Accepted values are:
+
+ * ``1``, ``yes``, ``true`` : enable the virtual device
+ * ``0``, ``no``, ``false`` : disable the virtual device
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/dev_name``
+
+ The read-only ``dev_name`` attribute exposes the name of the device as it
+ will appear in the system on the platform bus (e.g. ``gpio-aggregator.0``).
+ This is useful for identifying a character device for the newly created
+ aggregator. If it's ``gpio-aggregator.0``,
+ ``/sys/devices/platform/gpio-aggregator.0/gpiochipX`` path tells you that the
+ GPIO device id is ``X``.
+
+You must create subdirectories for each virtual line you want to
+instantiate, named exactly as ``line0``, ``line1``, ..., ``lineY``, when
+you want to instantiate ``Y+1`` (Y >= 0) lines. Configure all lines before
+activating the device by setting ``live`` to 1.
+
+**Group:** ``/config/gpio-aggregator/<example-name>/<lineY>/``
+
+ This directory represents a GPIO line to include in the aggregator.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/key``
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/offset``
+
+ The default values after creating the ``<lineY>`` directory are:
+
+ * ``key`` : <empty>
+ * ``offset`` : -1
+
+ ``key`` must always be explicitly configured, while ``offset`` depends.
+ Two configuration patterns exist for each ``<lineY>``:
+
+ (a). For lookup by GPIO line name:
+
+ * Set ``key`` to the line name.
+ * Ensure ``offset`` remains -1 (the default).
+
+ (b). For lookup by GPIO chip name and the line offset within the chip:
+
+ * Set ``key`` to the chip name.
+ * Set ``offset`` to the line offset (0 <= ``offset`` < 65535).
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/name``
+
+ The ``name`` attribute sets a custom name for lineY. If left unset, the
+ line will remain unnamed.
+
+Once the configuration is done, the ``'live'`` attribute must be set to 1
+in order to instantiate the aggregator device. It can be set back to 0 to
+destroy the virtual device. The module will synchronously wait for the new
+aggregator device to be successfully probed and if this doesn't happen, writing
+to ``'live'`` will result in an error. This is a different behaviour from the
+case when you create it using sysfs ``new_device`` interface.
+
+.. note::
+
+ For aggregators created via Sysfs, the configfs entries are
+ auto-generated and appear as ``/config/gpio-aggregator/_sysfs.<N>/``. You
+ cannot add or remove line directories with mkdir(2)/rmdir(2). To modify
+ lines, you must use the "delete_device" interface to tear down the
+ existing device and reconfigure it from scratch. However, you can still
+ toggle the aggregator with the ``live`` attribute and adjust the
+ ``key``, ``offset``, and ``name`` attributes for each line when ``live``
+ is set to 0 by hand (i.e. it's not waiting for deferred probe).
+
+Sample configuration commands
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: sh
+
+ # Create a directory for an aggregator device
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0
+
+ # Configure each line
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line0
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line0/key
+ $ echo 6 > /sys/kernel/config/gpio-aggregator/agg0/line0/offset
+ $ echo test0 > /sys/kernel/config/gpio-aggregator/agg0/line0/name
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line1
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line1/key
+ $ echo 7 > /sys/kernel/config/gpio-aggregator/agg0/line1/offset
+ $ echo test1 > /sys/kernel/config/gpio-aggregator/agg0/line1/name
+
+ # Activate the aggregator device
+ $ echo 1 > /sys/kernel/config/gpio-aggregator/agg0/live
+
+
+Generic GPIO Driver
+-------------------
+
+The GPIO Aggregator can also be used as a generic driver for a simple
+GPIO-operated device described in DT, without a dedicated in-kernel driver.
+This is useful in industrial control, and is not unlike e.g. spidev, which
+allows the user to communicate with an SPI device from userspace.
+
+Binding a device to the GPIO Aggregator is performed either by modifying the
+gpio-aggregator driver, or by writing to the "driver_override" file in Sysfs.
+
+Example: If "door" is a GPIO-operated device described in DT, using its own
+compatible value::
+
+ door {
+ compatible = "myvendor,mydoor";
+
+ gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>,
+ <&gpio2 20 GPIO_ACTIVE_LOW>;
+ gpio-line-names = "open", "lock";
+ };
+
+it can be bound to the GPIO Aggregator by either:
+
+1. Adding its compatible value to ``gpio_aggregator_dt_ids[]``,
+2. Binding manually using "driver_override":
+
+.. code-block:: sh
+
+ $ echo gpio-aggregator > /sys/bus/platform/devices/door/driver_override
+ $ echo door > /sys/bus/platform/drivers/gpio-aggregator/bind
+
+After that, a new gpiochip "door" has been created:
+
+.. code-block:: sh
+
+ $ gpioinfo door
+ gpiochip12 - 2 lines:
+ line 0: "open" unused input active-high
+ line 1: "lock" unused input active-high
diff --git a/Documentation/admin-guide/gpio/gpio-mockup.rst b/Documentation/admin-guide/gpio/gpio-mockup.rst
new file mode 100644
index 000000000000..d6e7438a7550
--- /dev/null
+++ b/Documentation/admin-guide/gpio/gpio-mockup.rst
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+GPIO Testing Driver
+===================
+
+.. note::
+
+ This module has been obsoleted by the more flexible gpio-sim.rst.
+ New developments should use that API and existing developments are
+ encouraged to migrate as soon as possible.
+ This module will continue to be maintained but no new features will be
+ added.
+
+The GPIO Testing Driver (gpio-mockup) provides a way to create simulated GPIO
+chips for testing purposes. The lines exposed by these chips can be accessed
+using the standard GPIO character device interface as well as manipulated
+using the dedicated debugfs directory structure.
+
+Creating simulated chips using module params
+--------------------------------------------
+
+When loading the gpio-mockup driver a number of parameters can be passed to the
+module.
+
+ gpio_mockup_ranges
+
+ This parameter takes an argument in the form of an array of integer
+ pairs. Each pair defines the base GPIO number (non-negative integer)
+ and the first number after the last of this chip. If the base GPIO
+ is -1, the gpiolib will assign it automatically. while the following
+ parameter is the number of lines exposed by the chip.
+
+ Example: gpio_mockup_ranges=-1,8,-1,16,405,409
+
+ The line above creates three chips. The first one will expose 8 lines,
+ the second 16 and the third 4. The base GPIO for the third chip is set
+ to 405 while for two first chips it will be assigned automatically.
+
+ gpio_mockup_named_lines
+
+ This parameter doesn't take any arguments. It lets the driver know that
+ GPIO lines exposed by it should be named.
+
+ The name format is: gpio-mockup-X-Y where X is mockup chip's ID
+ and Y is the line offset.
+
+Manipulating simulated lines
+----------------------------
+
+Each mockup chip creates its own subdirectory in /sys/kernel/debug/gpio-mockup/.
+The directory is named after the chip's label. A symlink is also created, named
+after the chip's name, which points to the label directory.
+
+Inside each subdirectory, there's a separate attribute for each GPIO line. The
+name of the attribute represents the line's offset in the chip.
+
+Reading from a line attribute returns the current value. Writing to it (0 or 1)
+changes the configuration of the simulated pull-up/pull-down resistor
+(1 - pull-up, 0 - pull-down).
diff --git a/Documentation/admin-guide/gpio/gpio-sim.rst b/Documentation/admin-guide/gpio/gpio-sim.rst
new file mode 100644
index 000000000000..f5135a14ef2e
--- /dev/null
+++ b/Documentation/admin-guide/gpio/gpio-sim.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Configfs GPIO Simulator
+=======================
+
+The configfs GPIO Simulator (gpio-sim) provides a way to create simulated GPIO
+chips for testing purposes. The lines exposed by these chips can be accessed
+using the standard GPIO character device interface as well as manipulated
+using sysfs attributes.
+
+Creating simulated chips
+------------------------
+
+The gpio-sim module registers a configfs subsystem called ``'gpio-sim'``. For
+details of the configfs filesystem, please refer to the configfs documentation.
+
+The user can create a hierarchy of configfs groups and items as well as modify
+values of exposed attributes. Once the chip is instantiated, this hierarchy
+will be translated to appropriate device properties. The general structure is:
+
+**Group:** ``/config/gpio-sim``
+
+This is the top directory of the gpio-sim configfs tree.
+
+**Group:** ``/config/gpio-sim/gpio-device``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/dev_name``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/live``
+
+This is a directory representing a GPIO platform device. The ``'dev_name'``
+attribute is read-only and allows the user-space to read the platform device
+name (e.g. ``'gpio-sim.0'``). The ``'live'`` attribute allows to trigger the
+actual creation of the device once it's fully configured. The accepted values
+are: ``'1'`` to enable the simulated device and ``'0'`` to disable and tear
+it down.
+
+**Group:** ``/config/gpio-sim/gpio-device/gpio-bankX``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/chip_name``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/num_lines``
+
+This group represents a bank of GPIOs under the top platform device. The
+``'chip_name'`` attribute is read-only and allows the user-space to read the
+device name of the bank device. The ``'num_lines'`` attribute allows to specify
+the number of lines exposed by this bank.
+
+**Group:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/name``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/valid``
+
+This group represents a single line at the offset Y. The ``valid`` attribute
+indicates whether the line can be used as GPIO. The ``name`` attribute allows
+to set the line name as represented by the 'gpio-line-names' property.
+
+**Item:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/hog``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/hog/name``
+
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/hog/direction``
+
+This item makes the gpio-sim module hog the associated line. The ``'name'``
+attribute specifies the in-kernel consumer name to use. The ``'direction'``
+attribute specifies the hog direction and must be one of: ``'input'``,
+``'output-high'`` and ``'output-low'``.
+
+Inside each bank directory, there's a set of attributes that can be used to
+configure the new chip. Additionally the user can ``mkdir()`` subdirectories
+inside the chip's directory that allow to pass additional configuration for
+specific lines. The name of those subdirectories must take the form of:
+``'line<offset>'`` (e.g. ``'line0'``, ``'line20'``, etc.) as the name will be
+used by the module to assign the config to the specific line at given offset.
+
+Once the configuration is complete, the ``'live'`` attribute must be set to 1 in
+order to instantiate the chip. It can be set back to 0 to destroy the simulated
+chip. The module will synchronously wait for the new simulated device to be
+successfully probed and if this doesn't happen, writing to ``'live'`` will
+result in an error.
+
+Simulated GPIO chips can also be defined in device-tree. The compatible string
+must be: ``"gpio-simulator"``. Supported properties are:
+
+ ``"gpio-sim,label"`` - chip label
+
+Other standard GPIO properties (like ``"gpio-line-names"``, ``"ngpios"`` or
+``"gpio-hog"``) are also supported. Please refer to the GPIO documentation for
+details.
+
+An example device-tree code defining a GPIO simulator:
+
+.. code-block :: none
+
+ gpio-sim {
+ compatible = "gpio-simulator";
+
+ bank0 {
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <16>;
+ gpio-sim,label = "dt-bank0";
+ gpio-line-names = "", "sim-foo", "", "sim-bar";
+ };
+
+ bank1 {
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <8>;
+ gpio-sim,label = "dt-bank1";
+
+ line3 {
+ gpio-hog;
+ gpios = <3 0>;
+ output-high;
+ line-name = "sim-hog-from-dt";
+ };
+ };
+ };
+
+Manipulating simulated lines
+----------------------------
+
+Each simulated GPIO chip creates a separate sysfs group under its device
+directory for each exposed line
+(e.g. ``/sys/devices/platform/gpio-sim.X/gpiochipY/``). The name of each group
+is of the form: ``'sim_gpioX'`` where X is the offset of the line. Inside each
+group there are two attributes:
+
+ ``pull`` - allows to read and set the current simulated pull setting for
+ every line, when writing the value must be one of: ``'pull-up'``,
+ ``'pull-down'``
+
+ ``value`` - allows to read the current value of the line which may be
+ different from the pull if the line is being driven from
+ user-space
diff --git a/Documentation/admin-guide/gpio/gpio-virtuser.rst b/Documentation/admin-guide/gpio/gpio-virtuser.rst
new file mode 100644
index 000000000000..7e7c0df51640
--- /dev/null
+++ b/Documentation/admin-guide/gpio/gpio-virtuser.rst
@@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+Virtual GPIO Consumer
+=====================
+
+The virtual GPIO Consumer module allows users to instantiate virtual devices
+that request GPIOs and then control their behavior over debugfs. Virtual
+consumer devices can be instantiated from device-tree or over configfs.
+
+A virtual consumer uses the driver-facing GPIO APIs and allows to cover it with
+automated tests driven by user-space. The GPIOs are requested using
+``gpiod_get_array()`` and so we support multiple GPIOs per connector ID.
+
+Creating GPIO consumers
+-----------------------
+
+The gpio-consumer module registers a configfs subsystem called
+``'gpio-virtuser'``. For details of the configfs filesystem, please refer to
+the configfs documentation.
+
+The user can create a hierarchy of configfs groups and items as well as modify
+values of exposed attributes. Once the consumer is instantiated, this hierarchy
+will be translated to appropriate device properties. The general structure is:
+
+**Group:** ``/config/gpio-virtuser``
+
+This is the top directory of the gpio-consumer configfs tree.
+
+**Group:** ``/config/gpio-consumer/example-name``
+
+**Attribute:** ``/config/gpio-consumer/example-name/live``
+
+**Attribute:** ``/config/gpio-consumer/example-name/dev_name``
+
+This is a directory representing a GPIO consumer device.
+
+The read-only ``dev_name`` attribute exposes the name of the device as it will
+appear in the system on the platform bus. This is useful for locating the
+associated debugfs directory under
+``/sys/kernel/debug/gpio-virtuser/$dev_name``.
+
+The ``'live'`` attribute allows to trigger the actual creation of the device
+once it's fully configured. The accepted values are: ``'1'`` to enable the
+virtual device and ``'0'`` to disable and tear it down.
+
+Creating GPIO lookup tables
+---------------------------
+
+Users can create a number of configfs groups under the device group:
+
+**Group:** ``/config/gpio-consumer/example-name/con_id``
+
+The ``'con_id'`` directory represents a single GPIO lookup and its value maps
+to the ``'con_id'`` argument of the ``gpiod_get()`` function. For example:
+``con_id`` == ``'reset'`` maps to the ``reset-gpios`` device property.
+
+Users can assign a number of GPIOs to each lookup. Each GPIO is a sub-directory
+with a user-defined name under the ``'con_id'`` group.
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/key``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/offset``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/drive``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/pull``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/active_low``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/transitory``
+
+This is a group describing a single GPIO in the ``con_id-gpios`` property.
+
+For virtual consumers created using configfs we use machine lookup tables so
+this group can be considered as a mapping between the filesystem and the fields
+of a single entry in ``'struct gpiod_lookup'``.
+
+The ``'key'`` attribute represents either the name of the chip this GPIO
+belongs to or the GPIO line name. This depends on the value of the ``'offset'``
+attribute: if its value is >= 0, then ``'key'`` represents the label of the
+chip to lookup while ``'offset'`` represents the offset of the line in that
+chip. If ``'offset'`` is < 0, then ``'key'`` represents the name of the line.
+
+The remaining attributes map to the ``'flags'`` field of the GPIO lookup
+struct. The first two take string values as arguments:
+
+**``'drive'``:** ``'push-pull'``, ``'open-drain'``, ``'open-source'``
+**``'pull'``:** ``'pull-up'``, ``'pull-down'``, ``'pull-disabled'``, ``'as-is'``
+
+``'active_low'`` and ``'transitory'`` are boolean attributes.
+
+Activating GPIO consumers
+-------------------------
+
+Once the configuration is complete, the ``'live'`` attribute must be set to 1 in
+order to instantiate the consumer. It can be set back to 0 to destroy the
+virtual device. The module will synchronously wait for the new simulated device
+to be successfully probed and if this doesn't happen, writing to ``'live'`` will
+result in an error.
+
+Device-tree
+-----------
+
+Virtual GPIO consumers can also be defined in device-tree. The compatible string
+must be: ``"gpio-virtuser"`` with at least one property following the
+standardized GPIO pattern.
+
+An example device-tree code defining a virtual GPIO consumer:
+
+.. code-block :: none
+
+ gpio-virt-consumer {
+ compatible = "gpio-virtuser";
+
+ foo-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>, <&gpio1 2 0>;
+ bar-gpios = <&gpio0 6 0>;
+ };
+
+Controlling virtual GPIO consumers
+----------------------------------
+
+Once active, the device will export debugfs attributes for controlling GPIO
+arrays as well as each requested GPIO line separately. Let's consider the
+following device property: ``foo-gpios = <&gpio0 0 0>, <&gpio0 4 0>;``.
+
+The following debugfs attribute groups will be created:
+
+**Group:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/``
+
+This is the group that will contain the attributes for the entire GPIO array.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/values``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/values_atomic``
+
+Both attributes allow to read and set arrays of GPIO values. User must pass
+exactly the number of values that the array contains in the form of a string
+containing zeroes and ones representing inactive and active GPIO states
+respectively. In this example: ``echo 11 > values``.
+
+The ``values_atomic`` attribute works the same as ``values`` but the kernel
+will execute the GPIO driver callbacks in interrupt context.
+
+**Group:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/``
+
+This is a group that represents a single GPIO with ``$index`` being its offset
+in the array.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/consumer``
+
+Allows to set and read the consumer label of the GPIO line.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/debounce``
+
+Allows to set and read the debounce period of the GPIO line.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/direction``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/direction_atomic``
+
+These two attributes allow to set the direction of the GPIO line. They accept
+"input" and "output" as values. The atomic variant executes the driver callback
+in interrupt context.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/interrupts``
+
+If the line is requested in input mode, writing ``1`` to this attribute will
+make the module listen for edge interrupts on the GPIO. Writing ``0`` disables
+the monitoring. Reading this attribute returns the current number of registered
+interrupts (both edges).
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/value``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/value_atomic``
+
+Both attributes allow to read and set values of individual requested GPIO lines.
+They accept the following values: ``1`` and ``0``.
diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
new file mode 100644
index 000000000000..712f379731cb
--- /dev/null
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+GPIO
+====
+
+.. toctree::
+ :maxdepth: 1
+
+ Character Device Userspace API <../../userspace-api/gpio/chardev>
+ gpio-aggregator
+ gpio-sim
+ gpio-virtuser
+ Obsolete APIs <obsolete>
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/gpio/obsolete.rst b/Documentation/admin-guide/gpio/obsolete.rst
new file mode 100644
index 000000000000..5adbff02d61f
--- /dev/null
+++ b/Documentation/admin-guide/gpio/obsolete.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+Obsolete GPIO APIs
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ Character Device Userspace API (v1) <../../userspace-api/gpio/chardev_v1>
+ Sysfs Interface <../../userspace-api/gpio/sysfs>
+ Mockup Testing Module <gpio-mockup>
+
diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
new file mode 100644
index 000000000000..d0bdbd81dcf9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
@@ -0,0 +1,236 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Attack Vector Controls
+======================
+
+Attack vector controls provide a simple method to configure only the mitigations
+for CPU vulnerabilities which are relevant given the intended use of a system.
+Administrators are encouraged to consider which attack vectors are relevant and
+disable all others in order to recoup system performance.
+
+When new relevant CPU vulnerabilities are found, they will be added to these
+attack vector controls so administrators will likely not need to reconfigure
+their command line parameters as mitigations will continue to be correctly
+applied based on the chosen attack vector controls.
+
+Attack Vectors
+--------------
+
+There are 5 sets of attack-vector mitigations currently supported by the kernel:
+
+#. :ref:`user_kernel`
+#. :ref:`user_user`
+#. :ref:`guest_host`
+#. :ref:`guest_guest`
+#. :ref:`smt`
+
+To control the enabled attack vectors, see :ref:`cmdline`.
+
+.. _user_kernel:
+
+User-to-Kernel
+^^^^^^^^^^^^^^
+
+The user-to-kernel attack vector involves a malicious userspace program
+attempting to leak kernel data into userspace by exploiting a CPU vulnerability.
+The kernel data involved might be limited to certain kernel memory, or include
+all memory in the system, depending on the vulnerability exploited.
+
+If no untrusted userspace applications are being run, such as with single-user
+systems, consider disabling user-to-kernel mitigations.
+
+Note that the CPU vulnerabilities mitigated by Linux have generally not been
+shown to be exploitable from browser-based sandboxes. User-to-kernel
+mitigations are therefore mostly relevant if unknown userspace applications may
+be run by untrusted users.
+
+*user-to-kernel mitigations are enabled by default*
+
+.. _user_user:
+
+User-to-User
+^^^^^^^^^^^^
+
+The user-to-user attack vector involves a malicious userspace program attempting
+to influence the behavior of another unsuspecting userspace program in order to
+exfiltrate data. The vulnerability of a userspace program is based on the
+program itself and the interfaces it provides.
+
+If no untrusted userspace applications are being run, consider disabling
+user-to-user mitigations.
+
+Note that because the Linux kernel contains a mapping of all physical memory,
+preventing a malicious userspace program from leaking data from another
+userspace program requires mitigating user-to-kernel attacks as well for
+complete protection.
+
+*user-to-user mitigations are enabled by default*
+
+.. _guest_host:
+
+Guest-to-Host
+^^^^^^^^^^^^^
+
+The guest-to-host attack vector involves a malicious VM attempting to leak
+hypervisor data into the VM. The data involved may be limited, or may
+potentially include all memory in the system, depending on the vulnerability
+exploited.
+
+If no untrusted VMs are being run, consider disabling guest-to-host mitigations.
+
+*guest-to-host mitigations are enabled by default if KVM support is present*
+
+.. _guest_guest:
+
+Guest-to-Guest
+^^^^^^^^^^^^^^
+
+The guest-to-guest attack vector involves a malicious VM attempting to influence
+the behavior of another unsuspecting VM in order to exfiltrate data. The
+vulnerability of a VM is based on the code inside the VM itself and the
+interfaces it provides.
+
+If no untrusted VMs, or only a single VM is being run, consider disabling
+guest-to-guest mitigations.
+
+Similar to the user-to-user attack vector, preventing a malicious VM from
+leaking data from another VM requires mitigating guest-to-host attacks as well
+due to the Linux kernel phys map.
+
+*guest-to-guest mitigations are enabled by default if KVM support is present*
+
+.. _smt:
+
+Cross-Thread
+^^^^^^^^^^^^
+
+The cross-thread attack vector involves a malicious userspace program or
+malicious VM either observing or attempting to influence the behavior of code
+running on the SMT sibling thread in order to exfiltrate data.
+
+Many cross-thread attacks can only be mitigated if SMT is disabled, which will
+result in reduced CPU core count and reduced performance.
+
+If cross-thread mitigations are fully enabled ('auto,nosmt'), all mitigations
+for cross-thread attacks will be enabled. SMT may be disabled depending on
+which vulnerabilities are present in the CPU.
+
+If cross-thread mitigations are partially enabled ('auto'), mitigations for
+cross-thread attacks will be enabled but SMT will not be disabled.
+
+If cross-thread mitigations are disabled, no mitigations for cross-thread
+attacks will be enabled.
+
+Cross-thread mitigation may not be required if core-scheduling or similar
+techniques are used to prevent untrusted workloads from running on SMT siblings.
+
+*cross-thread mitigations default to partially enabled*
+
+.. _cmdline:
+
+Command Line Controls
+---------------------
+
+Attack vectors are controlled through the mitigations= command line option. The
+value provided begins with a global option and then may optionally include one
+or more options to disable various attack vectors.
+
+Format:
+ | ``mitigations=[global]``
+ | ``mitigations=[global],[attack vectors]``
+
+Global options:
+
+============ =============================================================
+Option Description
+============ =============================================================
+'off' All attack vectors disabled.
+'auto' All attack vectors enabled, partial cross-thread mitigations.
+'auto,nosmt' All attack vectors enabled, full cross-thread mitigations.
+============ =============================================================
+
+Attack vector options:
+
+================= =======================================
+Option Description
+================= =======================================
+'no_user_kernel' Disables user-to-kernel mitigations.
+'no_user_user' Disables user-to-user mitigations.
+'no_guest_host' Disables guest-to-host mitigations.
+'no_guest_guest' Disables guest-to-guest mitigations
+'no_cross_thread' Disables all cross-thread mitigations.
+================= =======================================
+
+Multiple attack vector options may be specified in a comma-separated list. If
+the global option is not specified, it defaults to 'auto'. The global option
+'off' is equivalent to disabling all attack vectors.
+
+Examples:
+ | ``mitigations=auto,no_user_kernel``
+
+ Enable all attack vectors except user-to-kernel. Partial cross-thread
+ mitigations.
+
+ | ``mitigations=auto,nosmt,no_guest_host,no_guest_guest``
+
+ Enable all attack vectors and cross-thread mitigations except for
+ guest-to-host and guest-to-guest mitigations.
+
+ | ``mitigations=,no_cross_thread``
+
+ Enable all attack vectors but not cross-thread mitigations.
+
+Interactions with command-line options
+--------------------------------------
+
+Vulnerability-specific controls (e.g. "retbleed=off") take precedence over all
+attack vector controls. Mitigations for individual vulnerabilities may be
+turned on or off via their command-line options regardless of the attack vector
+controls.
+
+Summary of attack-vector mitigations
+------------------------------------
+
+When a vulnerability is mitigated due to an attack-vector control, the default
+mitigation option for that particular vulnerability is used. To use a different
+mitigation, please use the vulnerability-specific command line option.
+
+The table below summarizes which vulnerabilities are mitigated when different
+attack vectors are enabled and assuming the CPU is vulnerable.
+
+=============== ============== ============ ============= ============== ============ ========
+Vulnerability User-to-Kernel User-to-User Guest-to-Host Guest-to-Guest Cross-Thread Notes
+=============== ============== ============ ============= ============== ============ ========
+BHI X X
+ITS X X
+GDS X X X X * (Note 1)
+L1TF X X * (Note 2)
+MDS X X X X * (Note 2)
+MMIO X X X X * (Note 2)
+Meltdown X
+Retbleed X X * (Note 3)
+RFDS X X X X
+Spectre_v1 X
+Spectre_v2 X X
+Spectre_v2_user X X * (Note 1)
+SRBDS X X X X
+SRSO X X X X
+SSB X
+TAA X X X X * (Note 2)
+TSA X X X X
+VMSCAPE X
+=============== ============== ============ ============= ============== ============ ========
+
+Notes:
+ 1 -- Can be mitigated without disabling SMT.
+
+ 2 -- Disables SMT if cross-thread mitigations are fully enabled and the CPU
+ is vulnerable
+
+ 3 -- Disables SMT if cross-thread mitigations are fully enabled, the CPU is
+ vulnerable, and STIBP is not supported
+
+When an attack-vector is disabled, all mitigations for the vulnerabilities
+listed in the above table are disabled, unless mitigation is required for a
+different enabled attack-vector or a mitigation is explicitly selected via a
+vulnerability-specific command line option.
diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
new file mode 100644
index 000000000000..a92e10ec402e
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
@@ -0,0 +1,226 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Core Scheduling
+===============
+Core scheduling support allows userspace to define groups of tasks that can
+share a core. These groups can be specified either for security usecases (one
+group of tasks don't trust another), or for performance usecases (some
+workloads may benefit from running on the same core as they don't need the same
+hardware resources of the shared core, or may prefer different cores if they
+do share hardware resource needs). This document only describes the security
+usecase.
+
+Security usecase
+----------------
+A cross-HT attack involves the attacker and victim running on different Hyper
+Threads of the same core. MDS and L1TF are examples of such attacks. The only
+full mitigation of cross-HT attacks is to disable Hyper Threading (HT). Core
+scheduling is a scheduler feature that can mitigate some (not all) cross-HT
+attacks. It allows HT to be turned on safely by ensuring that only tasks in a
+user-designated trusted group can share a core. This increase in core sharing
+can also improve performance, however it is not guaranteed that performance
+will always improve, though that is seen to be the case with a number of real
+world workloads. In theory, core scheduling aims to perform at least as good as
+when Hyper Threading is disabled. In practice, this is mostly the case though
+not always: as synchronizing scheduling decisions across 2 or more CPUs in a
+core involves additional overhead - especially when the system is lightly
+loaded. When ``total_threads <= N_CPUS/2``, the extra overhead may cause core
+scheduling to perform more poorly compared to SMT-disabled, where N_CPUS is the
+total number of CPUs. Please measure the performance of your workloads always.
+
+Usage
+-----
+Core scheduling support is enabled via the ``CONFIG_SCHED_CORE`` config option.
+Using this feature, userspace defines groups of tasks that can be co-scheduled
+on the same core. The core scheduler uses this information to make sure that
+tasks that are not in the same group never run simultaneously on a core, while
+doing its best to satisfy the system's scheduling requirements.
+
+Core scheduling can be enabled via the ``PR_SCHED_CORE`` prctl interface.
+This interface provides support for the creation of core scheduling groups, as
+well as admission and removal of tasks from created groups::
+
+ #include <sys/prctl.h>
+
+ int prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5);
+
+option:
+ ``PR_SCHED_CORE``
+
+arg2:
+ Command for operation, must be one off:
+
+ - ``PR_SCHED_CORE_GET`` -- get core_sched cookie of ``pid``.
+ - ``PR_SCHED_CORE_CREATE`` -- create a new unique cookie for ``pid``.
+ - ``PR_SCHED_CORE_SHARE_TO`` -- push core_sched cookie to ``pid``.
+ - ``PR_SCHED_CORE_SHARE_FROM`` -- pull core_sched cookie from ``pid``.
+
+arg3:
+ ``pid`` of the task for which the operation applies.
+
+arg4:
+ ``pid_type`` for which the operation applies. It is one of
+ ``PR_SCHED_CORE_SCOPE_``-prefixed macro constants. For example, if arg4
+ is ``PR_SCHED_CORE_SCOPE_THREAD_GROUP``, then the operation of this command
+ will be performed for all tasks in the task group of ``pid``.
+
+arg5:
+ userspace pointer to an unsigned long long for storing the cookie returned
+ by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
+
+In order for a process to push a cookie to, or pull a cookie from a process, it
+is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the
+process.
+
+Building hierarchies of tasks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The simplest way to build hierarchies of threads/processes which share a
+cookie and thus a core is to rely on the fact that the core-sched cookie is
+inherited across forks/clones and execs, thus setting a cookie for the
+'initial' script/executable/daemon will place every spawned child in the
+same core-sched group.
+
+Cookie Transferral
+~~~~~~~~~~~~~~~~~~
+Transferring a cookie between the current and other tasks is possible using
+PR_SCHED_CORE_SHARE_FROM and PR_SCHED_CORE_SHARE_TO to inherit a cookie from a
+specified task or a share a cookie with a task. In combination this allows a
+simple helper program to pull a cookie from a task in an existing core
+scheduling group and share it with already running tasks.
+
+Design/Implementation
+---------------------
+Each task that is tagged is assigned a cookie internally in the kernel. As
+mentioned in `Usage`_, tasks with the same cookie value are assumed to trust
+each other and share a core.
+
+The basic idea is that, every schedule event tries to select tasks for all the
+siblings of a core such that all the selected tasks running on a core are
+trusted (same cookie) at any point in time. Kernel threads are assumed trusted.
+The idle task is considered special, as it trusts everything and everything
+trusts it.
+
+During a schedule() event on any sibling of a core, the highest priority task on
+the sibling's core is picked and assigned to the sibling calling schedule(), if
+the sibling has the task enqueued. For rest of the siblings in the core,
+highest priority task with the same cookie is selected if there is one runnable
+in their individual run queues. If a task with same cookie is not available,
+the idle task is selected. Idle task is globally trusted.
+
+Once a task has been selected for all the siblings in the core, an IPI is sent to
+siblings for whom a new task was selected. Siblings on receiving the IPI will
+switch to the new task immediately. If an idle task is selected for a sibling,
+then the sibling is considered to be in a `forced idle` state. I.e., it may
+have tasks on its on runqueue to run, however it will still have to run idle.
+More on this in the next section.
+
+Forced-idling of hyperthreads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The scheduler tries its best to find tasks that trust each other such that all
+tasks selected to be scheduled are of the highest priority in a core. However,
+it is possible that some runqueues had tasks that were incompatible with the
+highest priority ones in the core. Favoring security over fairness, one or more
+siblings could be forced to select a lower priority task if the highest
+priority task is not trusted with respect to the core wide highest priority
+task. If a sibling does not have a trusted task to run, it will be forced idle
+by the scheduler (idle thread is scheduled to run).
+
+When the highest priority task is selected to run, a reschedule-IPI is sent to
+the sibling to force it into idle. This results in 4 cases which need to be
+considered depending on whether a VM or a regular usermode process was running
+on either HT::
+
+ HT1 (attack) HT2 (victim)
+ A idle -> user space user space -> idle
+ B idle -> user space guest -> idle
+ C idle -> guest user space -> idle
+ D idle -> guest guest -> idle
+
+Note that for better performance, we do not wait for the destination CPU
+(victim) to enter idle mode. This is because the sending of the IPI would bring
+the destination CPU immediately into kernel mode from user space, or VMEXIT
+in the case of guests. At best, this would only leak some scheduler metadata
+which may not be worth protecting. It is also possible that the IPI is received
+too late on some architectures, but this has not been observed in the case of
+x86.
+
+Trust model
+~~~~~~~~~~~
+Core scheduling maintains trust relationships amongst groups of tasks by
+assigning them a tag that is the same cookie value.
+When a system with core scheduling boots, all tasks are considered to trust
+each other. This is because the core scheduler does not have information about
+trust relationships until userspace uses the above mentioned interfaces, to
+communicate them. In other words, all tasks have a default cookie value of 0.
+and are considered system-wide trusted. The forced-idling of siblings running
+cookie-0 tasks is also avoided.
+
+Once userspace uses the above mentioned interfaces to group sets of tasks, tasks
+within such groups are considered to trust each other, but do not trust those
+outside. Tasks outside the group also don't trust tasks within.
+
+Limitations of core-scheduling
+------------------------------
+Core scheduling tries to guarantee that only trusted tasks run concurrently on a
+core. But there could be small window of time during which untrusted tasks run
+concurrently or kernel could be running concurrently with a task not trusted by
+kernel.
+
+IPI processing delays
+~~~~~~~~~~~~~~~~~~~~~
+Core scheduling selects only trusted tasks to run together. IPI is used to notify
+the siblings to switch to the new task. But there could be hardware delays in
+receiving of the IPI on some arch (on x86, this has not been observed). This may
+cause an attacker task to start running on a CPU before its siblings receive the
+IPI. Even though cache is flushed on entry to user mode, victim tasks on siblings
+may populate data in the cache and micro architectural buffers after the attacker
+starts to run and this is a possibility for data leak.
+
+Open cross-HT issues that core scheduling does not solve
+--------------------------------------------------------
+1. For MDS
+~~~~~~~~~~
+Core scheduling cannot protect against MDS attacks between the siblings
+running in user mode and the others running in kernel mode. Even though all
+siblings run tasks which trust each other, when the kernel is executing
+code on behalf of a task, it cannot trust the code running in the
+sibling. Such attacks are possible for any combination of sibling CPU modes
+(host or guest mode).
+
+2. For L1TF
+~~~~~~~~~~~
+Core scheduling cannot protect against an L1TF guest attacker exploiting a
+guest or host victim. This is because the guest attacker can craft invalid
+PTEs which are not inverted due to a vulnerable guest kernel. The only
+solution is to disable EPT (Extended Page Tables).
+
+For both MDS and L1TF, if the guest vCPU is configured to not trust each
+other (by tagging separately), then the guest to guest attacks would go away.
+Or it could be a system admin policy which considers guest to guest attacks as
+a guest problem.
+
+Another approach to resolve these would be to make every untrusted task on the
+system to not trust every other untrusted task. While this could reduce
+parallelism of the untrusted tasks, it would still solve the above issues while
+allowing system processes (trusted tasks) to share a core.
+
+3. Protecting the kernel (IRQ, syscall, VMEXIT)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Unfortunately, core scheduling does not protect kernel contexts running on
+sibling hyperthreads from one another. Prototypes of mitigations have been posted
+to LKML to solve this, but it is debatable whether such windows are practically
+exploitable, and whether the performance overhead of the prototypes are worth
+it (not to mention, the added code complexity).
+
+Other Use cases
+---------------
+The main use case for Core scheduling is mitigating the cross-HT vulnerabilities
+with SMT enabled. There are other use cases where this feature could be used:
+
+- Isolating tasks that needs a whole core: Examples include realtime tasks, tasks
+ that uses SIMD instructions etc.
+- Gang scheduling: Requirements for a group of tasks that needs to be scheduled
+ together could also be realized using core scheduling. One example is vCPUs of
+ a VM.
diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
new file mode 100644
index 000000000000..875616d675fe
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
@@ -0,0 +1,91 @@
+
+.. SPDX-License-Identifier: GPL-2.0
+
+Cross-Thread Return Address Predictions
+=======================================
+
+Certain AMD and Hygon processors are subject to a cross-thread return address
+predictions vulnerability. When running in SMT mode and one sibling thread
+transitions out of C0 state, the other sibling thread could use return target
+predictions from the sibling thread that transitioned out of C0.
+
+The Spectre v2 mitigations protect the Linux kernel, as it fills the return
+address prediction entries with safe targets when context switching to the idle
+thread. However, KVM does allow a VMM to prevent exiting guest mode when
+transitioning out of C0. This could result in a guest-controlled return target
+being consumed by the sibling thread.
+
+Affected processors
+-------------------
+
+The following CPUs are vulnerable:
+
+ - AMD Family 17h processors
+ - Hygon Family 18h processors
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =======================================
+ CVE-2022-27672 Cross-Thread Return Address Predictions
+ ============== =======================================
+
+Problem
+-------
+
+Affected SMT-capable processors support 1T and 2T modes of execution when SMT
+is enabled. In 2T mode, both threads in a core are executing code. For the
+processor core to enter 1T mode, it is required that one of the threads
+requests to transition out of the C0 state. This can be communicated with the
+HLT instruction or with an MWAIT instruction that requests non-C0.
+When the thread re-enters the C0 state, the processor transitions back
+to 2T mode, assuming the other thread is also still in C0 state.
+
+In affected processors, the return address predictor (RAP) is partitioned
+depending on the SMT mode. For instance, in 2T mode each thread uses a private
+16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon
+transition between 1T/2T mode, the RAP contents are not modified but the RAP
+pointers (which control the next return target to use for predictions) may
+change. This behavior may result in return targets from one SMT thread being
+used by RET predictions in the sibling thread following a 1T/2T switch. In
+particular, a RET instruction executed immediately after a transition to 1T may
+use a return target from the thread that just became idle. In theory, this
+could lead to information disclosure if the return targets used do not come
+from trustworthy code.
+
+Attack scenarios
+----------------
+
+An attack can be mounted on affected processors by performing a series of CALL
+instructions with targeted return locations and then transitioning out of C0
+state.
+
+Mitigation mechanism
+--------------------
+
+Before entering idle state, the kernel context switches to the idle thread. The
+context switch fills the RAP entries (referred to as the RSB in Linux) with safe
+targets by performing a sequence of CALL instructions.
+
+Prevent a guest VM from directly putting the processor into an idle state by
+intercepting HLT and MWAIT instructions.
+
+Both mitigations are required to fully address this issue.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+Use existing Spectre v2 mitigations that will fill the RSB on context switch.
+
+Mitigation control for KVM - module parameter
+---------------------------------------------
+
+By default, the KVM hypervisor mitigates this issue by intercepting guest
+attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS
+capability to override those interceptions, but since this is not common, the
+mitigation that covers this path is not enabled by default.
+
+The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on
+using the boolean module parameter mitigate_smt_rsb, e.g. ``kvm.mitigate_smt_rsb=1``.
diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
new file mode 100644
index 000000000000..264bfa937f7d
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+GDS - Gather Data Sampling
+==========================
+
+Gather Data Sampling is a hardware vulnerability which allows unprivileged
+speculative access to data which was previously stored in vector registers.
+
+Problem
+-------
+When a gather instruction performs loads from memory, different data elements
+are merged into the destination vector register. However, when a gather
+instruction that is transiently executed encounters a fault, stale data from
+architectural or internal vector registers may get transiently forwarded to the
+destination vector register instead. This will allow a malicious attacker to
+infer stale data using typical side channel techniques like cache timing
+attacks. GDS is a purely sampling-based attack.
+
+The attacker uses gather instructions to infer the stale vector register data.
+The victim does not need to do anything special other than use the vector
+registers. The victim does not need to use gather instructions to be
+vulnerable.
+
+Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
+are possible.
+
+Attack scenarios
+----------------
+Without mitigation, GDS can infer stale data across virtually all
+permission boundaries:
+
+ Non-enclaves can infer SGX enclave data
+ Userspace can infer kernel data
+ Guests can infer data from hosts
+ Guest can infer guest from other guests
+ Users can infer data from other users
+
+Because of this, it is important to ensure that the mitigation stays enabled in
+lower-privilege contexts like guests and when running outside SGX enclaves.
+
+The hardware enforces the mitigation for SGX. Likewise, VMMs should ensure
+that guests are not allowed to disable the GDS mitigation. If a host erred and
+allowed this, a guest could theoretically disable GDS mitigation, mount an
+attack, and re-enable it.
+
+Mitigation mechanism
+--------------------
+This issue is mitigated in microcode. The microcode defines the following new
+bits:
+
+ ================================ === ============================
+ IA32_ARCH_CAPABILITIES[GDS_CTRL] R/O Enumerates GDS vulnerability
+ and mitigation support.
+ IA32_ARCH_CAPABILITIES[GDS_NO] R/O Processor is not vulnerable.
+ IA32_MCU_OPT_CTRL[GDS_MITG_DIS] R/W Disables the mitigation
+ 0 by default.
+ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK] R/W Locks GDS_MITG_DIS=0. Writes
+ to GDS_MITG_DIS are ignored
+ Can't be cleared once set.
+ ================================ === ============================
+
+GDS can also be mitigated on systems that don't have updated microcode by
+disabling AVX. This can be done by setting gather_data_sampling="force" or
+"clearcpuid=avx" on the kernel command-line.
+
+If used, these options will disable AVX use by turning off XSAVE YMM support.
+However, the processor will still enumerate AVX support. Userspace that
+does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
+support will break.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The mitigation can be disabled by setting "gather_data_sampling=off" or
+"mitigations=off" on the kernel command line. Not specifying either will default
+to the mitigation being enabled. Specifying "gather_data_sampling=force" will
+use the microcode mitigation when available or disable AVX on affected systems
+where the microcode hasn't been updated to include the mitigation.
+
+GDS System Information
+------------------------
+The kernel provides vulnerability status information through sysfs. For
+GDS this can be accessed by the following sysfs file:
+
+/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected Processor not vulnerable.
+ Vulnerable Processor vulnerable and mitigation disabled.
+ Vulnerable: No microcode Processor vulnerable and microcode is missing
+ mitigation.
+ Mitigation: AVX disabled,
+ no microcode Processor is vulnerable and microcode is missing
+ mitigation. AVX disabled as mitigation.
+ Mitigation: Microcode Processor is vulnerable and mitigation is in
+ effect.
+ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
+ effect and cannot be disabled.
+ Unknown: Dependent on
+ hypervisor status Running on a virtual guest processor that is
+ affected but with no way to know if host
+ processor is mitigated or vulnerable.
+ ============================== =============================================
+
+GDS Default mitigation
+----------------------
+The updated microcode will enable the mitigation by default. The kernel's
+default action is to leave the mitigation enabled.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
new file mode 100644
index 000000000000..55d747511f83
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -0,0 +1,29 @@
+========================
+Hardware vulnerabilities
+========================
+
+This section describes CPU vulnerabilities and provides an overview of the
+possible mitigations along with guidance for selecting mitigations if they
+are configurable at compile, boot or run time.
+
+.. toctree::
+ :maxdepth: 1
+
+ attack_vector_controls
+ spectre
+ l1tf
+ mds
+ tsx_async_abort
+ multihit
+ special-register-buffer-data-sampling
+ core-scheduling
+ l1d_flush
+ processor_mmio_stale_data
+ cross-thread-rsb
+ srso
+ gather_data_sampling
+ reg-file-data-sampling
+ rsb
+ old_microcode
+ indirect-target-selection
+ vmscape
diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
new file mode 100644
index 000000000000..d9ca64108d23
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Indirect Target Selection (ITS)
+===============================
+
+ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were
+released before Alder Lake. ITS may allow an attacker to control the prediction
+of indirect branches and RETs located in the lower half of a cacheline.
+
+ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium).
+
+Scope of Impact
+---------------
+- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be
+ predicted with unintended target corresponding to a branch in the guest.
+
+- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native
+ gadgets.
+
+- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect
+ branches may still be predicted with targets corresponding to direct branches
+ executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which
+ should be available via distro updates. Alternatively microcode can be
+ obtained from Intel's github repository [#f1]_.
+
+Affected CPUs
+-------------
+Below is the list of ITS affected CPUs [#f2]_ [#f3]_:
+
+ ======================== ============ ==================== ===============
+ Common name Family_Model eIBRS Intra-mode BTI
+ Guest/Host Isolation
+ ======================== ============ ==================== ===============
+ SKYLAKE_X (step >= 6) 06_55H Affected Affected
+ ICELAKE_X 06_6AH Not affected Affected
+ ICELAKE_D 06_6CH Not affected Affected
+ ICELAKE_L 06_7EH Not affected Affected
+ TIGERLAKE_L 06_8CH Not affected Affected
+ TIGERLAKE 06_8DH Not affected Affected
+ KABYLAKE_L (step >= 12) 06_8EH Affected Affected
+ KABYLAKE (step >= 13) 06_9EH Affected Affected
+ COMETLAKE 06_A5H Affected Affected
+ COMETLAKE_L 06_A6H Affected Affected
+ ROCKETLAKE 06_A7H Not affected Affected
+ ======================== ============ ==================== ===============
+
+- All affected CPUs enumerate Enhanced IBRS feature.
+- IBPB isolation is affected on all ITS affected CPUs, and need a microcode
+ update for mitigation.
+- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden
+ Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the
+ host's affected status.
+- Intel Atom CPUs are not affected by ITS.
+
+Mitigation
+----------
+As only the indirect branches and RETs that have their last byte of instruction
+in the lower half of the cacheline are vulnerable to ITS, the basic idea behind
+the mitigation is to not allow indirect branches in the lower half.
+
+This is achieved by relying on existing retpoline support in the kernel, and in
+compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly
+added ITS-safe thunks. These safe thunks consists of indirect branch in the
+second half of the cacheline. Not all retpoline sites are patched to thunks, if
+a retpoline site is evaluated to be ITS-safe, it is replaced with an inline
+indirect branch.
+
+Dynamic thunks
+~~~~~~~~~~~~~~
+From a dynamically allocated pool of safe-thunks, each vulnerable site is
+replaced with a new thunk, such that they get a unique address. This could
+improve the branch prediction accuracy. Also, it is a defense-in-depth measure
+against aliasing.
+
+Note, for simplicity, indirect branches in eBPF programs are always replaced
+with a jump to a static thunk in __x86_indirect_its_thunk_array. If required,
+in future this can be changed to use dynamic thunks.
+
+All vulnerable RETs are replaced with a static thunk, they do not use dynamic
+thunks. This is because RETs get their prediction from RSB mostly that does not
+depend on source address. RETs that underflow RSB may benefit from dynamic
+thunks. But, RETs significantly outnumber indirect branches, and any benefit
+from a unique source address could be outweighed by the increased icache
+footprint and iTLB pressure.
+
+Retpoline
+~~~~~~~~~
+Retpoline sequence also mitigates ITS-unsafe indirect branches. For this
+reason, when retpoline is enabled, ITS mitigation only relocates the RETs to
+safe thunks. Unless user requested the RSB-stuffing mitigation.
+
+RSB Stuffing
+~~~~~~~~~~~~
+RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow
+attacks. And it also mitigates RETs that are vulnerable to ITS.
+
+Mitigation in guests
+^^^^^^^^^^^^^^^^^^^^
+All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration
+and Family/Model of the guest. This is because eIBRS feature could be hidden
+from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which
+indicates that the guest is running on an unaffected host.
+
+To prevent guests from unnecessarily deploying the mitigation on unaffected
+platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When
+a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit
+is not set by any hardware, but is **intended for VMMs to synthesize** it for
+guests as per the host's affected status.
+
+Mitigation options
+^^^^^^^^^^^^^^^^^^
+The ITS mitigation can be controlled using the "indirect_target_selection"
+kernel parameter. The available options are:
+
+ ======== ===================================================================
+ on (default) Deploy the "Aligned branch/return thunks" mitigation.
+ If spectre_v2 mitigation enables retpoline, aligned-thunks are only
+ deployed for the affected RET instructions. Retpoline mitigates
+ indirect branches.
+
+ off Disable ITS mitigation.
+
+ vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation
+ part of ITS. Otherwise, mitigation is not deployed. This option is
+ useful when host userspace is not in the threat model, and only
+ attacks from guest to host are considered.
+
+ stuff Deploy RSB-fill mitigation when retpoline is also deployed.
+ Otherwise, deploy the default mitigation. When retpoline mitigation
+ is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates
+ ITS.
+
+ force Force the ITS bug and deploy the default mitigation.
+ ======== ===================================================================
+
+Sysfs reporting
+---------------
+
+The sysfs file showing ITS mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
+
+Note, microcode mitigation status is not reported in this file.
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - Vulnerable
+ - System is vulnerable and no mitigation has been applied.
+ * - Vulnerable, KVM: Not affected
+ - System is vulnerable to intra-mode BTI, but not affected by eIBRS
+ guest/host isolation.
+ * - Mitigation: Aligned branch/return thunks
+ - The mitigation is enabled, affected indirect branches and RETs are
+ relocated to safe thunks.
+ * - Mitigation: Retpolines, Stuffing RSB
+ - The mitigation is enabled using retpoline and RSB stuffing.
+
+References
+----------
+.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files
+
+.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
+
+.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
new file mode 100644
index 000000000000..210020bc3f56
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -0,0 +1,69 @@
+L1D Flushing
+============
+
+With an increasing number of vulnerabilities being reported around data
+leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
+mechanism to flush the L1D cache on context switch.
+
+This mechanism can be used to address e.g. CVE-2020-0550. For applications
+the mechanism keeps them safe from vulnerabilities, related to leaks
+(snooping of) from the L1D cache.
+
+
+Related CVEs
+------------
+The following CVEs can be addressed by this
+mechanism
+
+ ============= ======================== ==================
+ CVE-2020-0550 Improper Data Forwarding OS related aspects
+ ============= ======================== ==================
+
+Usage Guidelines
+----------------
+
+Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst
+<set_spec_ctrl>` for details.
+
+**NOTE**: The feature is disabled by default, applications need to
+specifically opt into the feature to enable it.
+
+Mitigation
+----------
+
+When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+performed when the task is scheduled out and the incoming task belongs to a
+different process and therefore to a different address space.
+
+If the underlying CPU supports L1D flushing in hardware, the hardware
+mechanism is used, software fallback for the mitigation, is not supported.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1D flush mitigations at boot
+time with the option "l1d_flush=". The valid arguments for this option are:
+
+ ============ =============================================================
+ on Enables the prctl interface, applications trying to use
+ the prctl() will fail with an error if l1d_flush is not
+ enabled
+ ============ =============================================================
+
+By default the mechanism is disabled.
+
+Limitations
+-----------
+
+The mechanism does not mitigate L1D data leaks between tasks belonging to
+different processes which are concurrently executing on sibling threads of
+a physical CPU core when SMT is enabled on the system.
+
+This can be addressed by controlled placement of processes on physical CPU
+cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
+document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
+
+**NOTE** : The opt-in of a task for L1D flushing works only when the task's
+affinity is limited to cores running in non-SMT mode. If a task which
+requested L1D flushing is scheduled on a SMT-enabled core the kernel sends
+a SIGBUS to the task.
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index b85dd80510b0..3eeeb488d955 100644
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -241,7 +241,7 @@ Guest mitigation mechanisms
For further information about confining guests to a single or to a group
of cores consult the cpusets documentation:
- https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+ https://www.kernel.org/doc/Documentation/admin-guide/cgroup-v1/cpusets.rst
.. _interrupt_isolation:
@@ -268,7 +268,7 @@ Guest mitigation mechanisms
/proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
available at:
- https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+ https://www.kernel.org/doc/Documentation/core-api/irq/irq-affinity.rst
.. _smt_control:
@@ -405,6 +405,9 @@ time with the option "l1tf=". The valid arguments for this option are:
off Disables hypervisor mitigations and doesn't emit any
warnings.
+ It also drops the swap size and available RAM limit restrictions
+ on both hypervisor and bare metal.
+
============ =============================================================
The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
@@ -442,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
module parameter is ignored and writes to the sysfs file are rejected.
+.. _mitigation_selection:
Mitigation selection guide
--------------------------
@@ -576,7 +580,8 @@ Default mitigations
The kernel default mitigations for vulnerable processors are:
- PTE inversion to protect against malicious user space. This is done
- unconditionally and cannot be controlled.
+ unconditionally and cannot be controlled. The swap storage is limited
+ to ~16TB.
- L1D conditional flushing on VMENTER when EPT is enabled for
a guest.
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
new file mode 100644
index 000000000000..754679db0ce8
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -0,0 +1,303 @@
+MDS - Microarchitectural Data Sampling
+======================================
+
+Microarchitectural Data Sampling is a hardware vulnerability which allows
+unprivileged speculative access to data which is available in various CPU
+internal buffers.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+ - Processors from AMD, Centaur and other non Intel vendors
+
+ - Older processor models, where the CPU family is < 6
+
+ - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
+
+ - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
+ IA32_ARCH_CAPABILITIES MSR.
+
+Whether a processor is affected or not can be read out from the MDS
+vulnerability file in sysfs. See :ref:`mds_sys_info`.
+
+Not all processors are affected by all variants of MDS, but the mitigation
+is identical for all of them so the kernel treats them as a single
+vulnerability.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the MDS vulnerability:
+
+ ============== ===== ===================================================
+ CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
+ CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
+ CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
+ CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
+ ============== ===== ===================================================
+
+Problem
+-------
+
+When performing store, load, L1 refill operations, processors write data
+into temporary microarchitectural structures (buffers). The data in the
+buffer can be forwarded to load operations as an optimization.
+
+Under certain conditions, usually a fault/assist caused by a load
+operation, data unrelated to the load memory address can be speculatively
+forwarded from the buffers. Because the load operation causes a fault or
+assist and its result will be discarded, the forwarded data will not cause
+incorrect program execution or state changes. But a malicious operation
+may be able to forward this speculative data to a disclosure gadget which
+allows in turn to infer the value via a cache side channel attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+Deeper technical information is available in the MDS specific x86
+architecture section: :ref:`Documentation/arch/x86/mds.rst <mds>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the MDS vulnerabilities can be mounted from malicious non-
+privileged user space applications running on hosts or guest. Malicious
+guest OSes can obviously mount attacks as well.
+
+Contrary to other speculation based vulnerabilities the MDS vulnerability
+does not allow the attacker to control the memory target address. As a
+consequence the attacks are purely sampling based, but as demonstrated with
+the TLBleed attack samples can be postprocessed successfully.
+
+Web-Browsers
+^^^^^^^^^^^^
+
+ It's unclear whether attacks through Web-Browsers are possible at
+ all. The exploitation through Java-Script is considered very unlikely,
+ but other widely used web technologies like Webassembly could possibly be
+ abused.
+
+
+.. _mds_sys_info:
+
+MDS system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current MDS
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/mds
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+
+ If the processor is vulnerable but the availability of the microcode
+ based mitigation mechanism is not advertised via CPUID, the kernel
+ selects a best effort mitigation mode. This mode invokes the mitigation
+ instructions without a guarantee that they clear the CPU buffers.
+
+ This is done to address virtualization scenarios where the host has the
+ microcode update applied, but the hypervisor is not yet updated to
+ expose the CPUID to the guest. If the host has updated microcode the
+ protection takes effect; otherwise a few CPU cycles are wasted
+ pointlessly.
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+If the processor is vulnerable then the following information is appended
+to the above information:
+
+ ======================== ============================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT mitigated' SMT is enabled and mitigated
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ============================================
+
+Mitigation mechanism
+-------------------------
+
+The kernel detects the affected CPUs and the presence of the microcode
+which is required.
+
+If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default. The mitigation can be controlled at boot
+time via a kernel command line option. See
+:ref:`mds_mitigation_control_command_line`.
+
+.. _cpu_buffer_clear:
+
+CPU buffer clearing
+^^^^^^^^^^^^^^^^^^^
+
+ The mitigation for MDS clears the affected CPU buffers on return to user
+ space and when entering a guest.
+
+ If SMT is enabled it also clears the buffers on idle entry when the CPU
+ is only affected by MSBDS and not any other MDS variant, because the
+ other variants cannot be protected against cross Hyper-Thread attacks.
+
+ For CPUs which are only affected by MSBDS the user space, guest and idle
+ transition mitigations are sufficient and SMT is not affected.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The protection for host to guest transition depends on the L1TF
+ vulnerability of the CPU:
+
+ - CPU is affected by L1TF:
+
+ If the L1D flush mitigation is enabled and up to date microcode is
+ available, the L1D flush mitigation is automatically protecting the
+ guest transition.
+
+ If the L1D flush mitigation is disabled then the MDS mitigation is
+ invoked explicit when the host MDS mitigation is enabled.
+
+ For details on L1TF and virtualization see:
+ :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
+
+ - CPU is not affected by L1TF:
+
+ CPU buffers are flushed before entering the guest when the host MDS
+ mitigation is enabled.
+
+ The resulting MDS protection matrix for the host to guest transition:
+
+ ============ ===== ============= ============ =================
+ L1TF MDS VMX-L1FLUSH Host MDS MDS-State
+
+ Don't care No Don't care N/A Not affected
+
+ Yes Yes Disabled Off Vulnerable
+
+ Yes Yes Disabled Full Mitigated
+
+ Yes Yes Enabled Don't care Mitigated
+
+ No Yes N/A Off Vulnerable
+
+ No Yes N/A Full Mitigated
+ ============ ===== ============= ============ =================
+
+ This only covers the host to guest transition, i.e. prevents leakage from
+ host to guest, but does not protect the guest internally. Guests need to
+ have their own protections.
+
+.. _xeon_phi:
+
+XEON PHI specific considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The XEON PHI processor family is affected by MSBDS which can be exploited
+ cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
+ to use MWAIT in user space (Ring 3) which opens an potential attack vector
+ for malicious user space. The exposure can be disabled on the kernel
+ command line with the 'ring3mwait=disable' command line option.
+
+ XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
+ before the CPU enters an idle state. As XEON PHI is not affected by L1TF
+ either disabling SMT is not required for full protection.
+
+.. _mds_smt_control:
+
+SMT control
+^^^^^^^^^^^
+
+ All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
+ means on CPUs which are affected by MFBDS or MLPDS it is necessary to
+ disable SMT for full protection. These are most of the affected CPUs; the
+ exception is XEON PHI, see :ref:`xeon_phi`.
+
+ Disabling SMT can have a significant performance impact, but the impact
+ depends on the type of workloads.
+
+ See the relevant chapter in the L1TF mitigation documentation for details:
+ :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
+
+
+.. _mds_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the MDS mitigations at boot
+time with the option "mds=". The valid arguments for this option are:
+
+ ============ =============================================================
+ full If the CPU is vulnerable, enable all available mitigations
+ for the MDS vulnerability, CPU buffer clearing on exit to
+ userspace and when entering a VM. Idle transitions are
+ protected as well if SMT is enabled.
+
+ It does not automatically disable SMT.
+
+ full,nosmt The same as mds=full, with SMT disabled on vulnerable
+ CPUs. This is the complete mitigation.
+
+ off Disables MDS mitigations completely.
+
+ ============ =============================================================
+
+Not specifying this option is equivalent to "mds=full". For processors
+that are affected by both TAA (TSX Asynchronous Abort) and MDS,
+specifying just "mds=off" without an accompanying "tsx_async_abort=off"
+will have no effect as the same mitigation is used for both
+vulnerabilities.
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace
+^^^^^^^^^^^^^^^^^^^^
+
+ If all userspace applications are from a trusted source and do not
+ execute untrusted code which is supplied externally, then the mitigation
+ can be disabled.
+
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The same considerations as above versus trusted user space apply.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The protection depends on the state of the L1TF mitigations.
+ See :ref:`virt_mechanism`.
+
+ If the MDS mitigation is enabled and SMT is disabled, guest to host and
+ guest to guest attacks are prevented.
+
+.. _mds_default_mitigations:
+
+Default mitigations
+-------------------
+
+ The kernel default mitigations for vulnerable processors are:
+
+ - Enable CPU buffer clearing
+
+ The kernel does not by default enforce the disabling of SMT, which leaves
+ SMT systems vulnerable when running untrusted code. The same rationale as
+ for L1TF applies.
+ See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`.
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
new file mode 100644
index 000000000000..140e4cec38c3
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
@@ -0,0 +1,167 @@
+iTLB multihit
+=============
+
+iTLB multihit is an erratum where some processors may incur a machine check
+error, possibly resulting in an unrecoverable CPU lockup, when an
+instruction fetch hits multiple entries in the instruction TLB. This can
+occur when the page size is changed along with either the physical address
+or cache type. A malicious guest running on a virtualized system can
+exploit this erratum to perform a denial of service attack.
+
+
+Affected processors
+-------------------
+
+Variations of this erratum are present on most Intel Core and Xeon processor
+models. The erratum is not present on:
+
+ - non-Intel processors
+
+ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
+
+ - Intel processors that have the PSCHANGE_MC_NO bit set in the
+ IA32_ARCH_CAPABILITIES MSR.
+
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =================================================
+ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change
+ ============== =================================================
+
+
+Problem
+-------
+
+Privileged software, including OS and virtual machine managers (VMM), are in
+charge of memory management. A key component in memory management is the control
+of the page tables. Modern processors use virtual memory, a technique that creates
+the illusion of a very large memory for processors. This virtual space is split
+into pages of a given size. Page tables translate virtual addresses to physical
+addresses.
+
+To reduce latency when performing a virtual to physical address translation,
+processors include a structure, called TLB, that caches recent translations.
+There are separate TLBs for instruction (iTLB) and data (dTLB).
+
+Under this errata, instructions are fetched from a linear address translated
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
+paging structure so that the same linear address using large page size (2 MB, 4
+MB, 1 GB) with a different physical address or memory type. After the page
+structure modification but before the software invalidates any iTLB entries for
+the linear address, a code fetch that happens on the same linear address may
+cause a machine-check error which can result in a system hang or shutdown.
+
+
+Attack scenarios
+----------------
+
+Attacks against the iTLB multihit erratum can be mounted from malicious
+guests in a virtualized system.
+
+
+iTLB multihit system information
+--------------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
+multihit status of the system:whether the system is vulnerable and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - KVM: Mitigation: Split huge pages
+ - Software changes mitigate this issue.
+ * - KVM: Mitigation: VMX unsupported
+ - KVM is not vulnerable because Virtual Machine Extensions (VMX) is not supported.
+ * - KVM: Mitigation: VMX disabled
+ - KVM is not vulnerable because Virtual Machine Extensions (VMX) is disabled.
+ * - KVM: Vulnerable
+ - The processor is vulnerable, but no mitigation enabled
+
+
+Enumeration of the erratum
+--------------------------------
+
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
+and will be set on CPU's which are mitigated against this issue.
+
+ ======================================= =========== ===============================
+ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable
+ ======================================= =========== ===============================
+
+
+Mitigation mechanism
+-------------------------
+
+This erratum can be mitigated by restricting the use of large page sizes to
+non-executable pages. This forces all iTLB entries to be 4K, and removes
+the possibility of multiple hits.
+
+In order to mitigate the vulnerability, KVM initially marks all huge pages
+as non-executable. If the guest attempts to execute in one of those pages,
+the page is broken down into 4K pages, which are then marked executable.
+
+If EPT is disabled or not available on the host, KVM is in control of TLB
+flushes and the problematic situation cannot happen. However, the shadow
+EPT paging mechanism used by nested virtualization is vulnerable, because
+the nested guest can trigger multiple iTLB hits by modifying its own
+(non-nested) page tables. For simplicity, KVM will make large pages
+non-executable in all shadow paging modes.
+
+Mitigation control on the kernel command line and KVM - module parameter
+------------------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism for marking huge pages as
+non-executable can be controlled with a module parameter "nx_huge_pages=".
+The kernel command line allows to control the iTLB multihit mitigations at
+boot time with the option "kvm.nx_huge_pages=".
+
+The valid arguments for these options are:
+
+ ========== ================================================================
+ force Mitigation is enabled. In this case, the mitigation implements
+ non-executable huge pages in Linux kernel KVM module. All huge
+ pages in the EPT are marked as non-executable.
+ If a guest attempts to execute in one of those pages, the page is
+ broken down into 4K pages, which are then marked executable.
+
+ off Mitigation is disabled.
+
+ auto Enable mitigation only if the platform is affected and the kernel
+ was not booted with the "mitigations=off" command line parameter.
+ This is the default option.
+ ========== ================================================================
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The system is protected by the kernel unconditionally and no further
+ action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ If the guest comes from a trusted source, you may assume that the guest will
+ not attempt to maliciously exploit these errata and no further action is
+ required.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ If the guest comes from an untrusted source, the guest host kernel will need
+ to apply iTLB multihit mitigation via the kernel command line or kvm
+ module parameter.
diff --git a/Documentation/admin-guide/hw-vuln/old_microcode.rst b/Documentation/admin-guide/hw-vuln/old_microcode.rst
new file mode 100644
index 000000000000..6ded8f86b8d0
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/old_microcode.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Old Microcode
+=============
+
+The kernel keeps a table of released microcode. Systems that had
+microcode older than this at boot will say "Vulnerable". This means
+that the system was vulnerable to some known CPU issue. It could be
+security or functional, the kernel does not know or care.
+
+You should update the CPU microcode to mitigate any exposure. This is
+usually accomplished by updating the files in
+/lib/firmware/intel-ucode/ via normal distribution updates. Intel also
+distributes these files in a github repo:
+
+ https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files.git
+
+Just like all the other hardware vulnerabilities, exposure is
+determined at boot. Runtime microcode updates do not change the status
+of this vulnerability.
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
new file mode 100644
index 000000000000..6dba18dbb9ab
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -0,0 +1,269 @@
+=========================================
+Processor MMIO Stale Data Vulnerabilities
+=========================================
+
+Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
+(MMIO) vulnerabilities that can expose data. The sequences of operations for
+exposing data range from simple to very complex. Because most of the
+vulnerabilities require the attacker to have access to MMIO, many environments
+are not affected. System environments using virtualization where MMIO access is
+provided to untrusted guests may need mitigation. These vulnerabilities are
+not transient execution attacks. However, these vulnerabilities may propagate
+stale data into core fill buffers where the data can subsequently be inferred
+by an unmitigated transient execution attack. Mitigation for these
+vulnerabilities includes a combination of microcode update and software
+changes, depending on the platform and usage model. Some of these mitigations
+are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
+those used to mitigate Special Register Buffer Data Sampling (SRBDS).
+
+Data Propagators
+================
+Propagators are operations that result in stale data being copied or moved from
+one microarchitectural buffer or register to another. Processor MMIO Stale Data
+Vulnerabilities are operations that may result in stale data being directly
+read into an architectural, software-visible state or sampled from a buffer or
+register.
+
+Fill Buffer Stale Data Propagator (FBSDP)
+-----------------------------------------
+Stale data may propagate from fill buffers (FB) into the non-coherent portion
+of the uncore on some non-coherent writes. Fill buffer propagation by itself
+does not make stale data architecturally visible. Stale data must be propagated
+to a location where it is subject to reading or sampling.
+
+Sideband Stale Data Propagator (SSDP)
+-------------------------------------
+The sideband stale data propagator (SSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. The sideband response buffer is
+shared by all client cores. For non-coherent reads that go to sideband
+destinations, the uncore logic returns 64 bytes of data to the core, including
+both requested data and unrequested stale data, from a transaction buffer and
+the sideband response buffer. As a result, stale data from the sideband
+response and transaction buffers may now reside in a core fill buffer.
+
+Primary Stale Data Propagator (PSDP)
+------------------------------------
+The primary stale data propagator (PSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. Similar to the sideband response
+buffer, the primary response buffer is shared by all client cores. For some
+processors, MMIO primary reads will return 64 bytes of data to the core fill
+buffer including both requested data and unrequested stale data. This is
+similar to the sideband stale data propagator.
+
+Vulnerabilities
+===============
+Device Register Partial Write (DRPW) (CVE-2022-21166)
+-----------------------------------------------------
+Some endpoint MMIO registers incorrectly handle writes that are smaller than
+the register size. Instead of aborting the write or only copying the correct
+subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
+specified by the write transaction may be written to the register. On
+processors affected by FBSDP, this may expose stale data from the fill buffers
+of the core that created the write transaction.
+
+Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
+----------------------------------------------------
+After propagators may have moved data around the uncore and copied stale data
+into client core fill buffers, processors affected by MFBDS can leak data from
+the fill buffer. It is limited to the client (including Intel Xeon server E3)
+uncore implementation.
+
+Shared Buffers Data Read (SBDR) (CVE-2022-21123)
+------------------------------------------------
+It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
+directly read into the architectural software-visible state. It is limited to
+the client (including Intel Xeon server E3) uncore implementation.
+
+Affected Processors
+===================
+Not all the CPUs are affected by all the variants. For instance, most
+processors for the server market (excluding Intel Xeon E3 processors) are
+impacted by only Device Register Partial Write (DRPW).
+
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============ =========
+ Common name Family_Model Steppings
+ =================== ============ =========
+ HASWELL_X 06_3FH 2,4
+ SKYLAKE_L 06_4EH 3
+ BROADWELL_X 06_4FH All
+ SKYLAKE_X 06_55H 3,4,6,7,11
+ BROADWELL_D 06_56H 3,4,5
+ SKYLAKE 06_5EH 3
+ ICELAKE_X 06_6AH 4,5,6
+ ICELAKE_D 06_6CH 1
+ ICELAKE_L 06_7EH 5
+ ATOM_TREMONT_D 06_86H All
+ LAKEFIELD 06_8AH 1
+ KABYLAKE_L 06_8EH 9 to 12
+ ATOM_TREMONT 06_96H 1
+ ATOM_TREMONT_L 06_9CH 0
+ KABYLAKE 06_9EH 9 to 13
+ COMETLAKE 06_A5H 2,3,5
+ COMETLAKE_L 06_A6H 0,1
+ ROCKETLAKE 06_A7H 1
+ =================== ============ =========
+
+If a CPU is in the affected processor list, but not affected by a variant, it
+is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
+section, mitigation largely remains the same for all the variants, i.e. to
+clear the CPU fill buffers via VERW instruction.
+
+New bits in MSRs
+================
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
+capability.
+
+MSR IA32_ARCH_CAPABILITIES
+--------------------------
+Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
+ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
+ data propagator (SSDP).
+Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
+ Stale Data Propagator (FBSDP).
+Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
+ Propagator (PSDP).
+Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
+ values as part of MD_CLEAR operations. Processors that do not
+ enumerate MDS_NO (meaning they are affected by MDS) but that do
+ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
+ FB_CLEAR as part of their MD_CLEAR support.
+Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
+ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
+ bit can be set to cause the VERW instruction to not perform the
+ FB_CLEAR action. Not all processors that support FB_CLEAR will support
+ FB_CLEAR_CTRL.
+
+MSR IA32_MCU_OPT_CTRL
+---------------------
+Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
+action. This may be useful to reduce the performance impact of FB_CLEAR in
+cases where system software deems it warranted (for example, when performance
+is more critical, or the untrusted software has no MMIO access). Note that
+FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
+FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
+that enumerate FB_CLEAR.
+
+Mitigation
+==========
+Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
+same mitigation strategy to force the CPU to clear the affected buffers before
+an attacker can extract the secrets.
+
+This is achieved by using the otherwise unused and obsolete VERW instruction in
+combination with a microcode update. The microcode clears the affected CPU
+buffers when the VERW instruction is executed.
+
+Kernel does the buffer clearing with x86_clear_cpu_buffers().
+
+On MDS affected CPUs, the kernel already invokes CPU buffer clear on
+kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
+additional mitigation is needed on such CPUs.
+
+For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
+with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
+virtualization case, VERW is only needed at VMENTER for a guest with MMIO
+capability.
+
+Mitigation points
+-----------------
+Return to user space
+^^^^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
+needed.
+
+C-State transition
+^^^^^^^^^^^^^^^^^^
+Control register writes by CPU during C-state transition can propagate data
+from fill buffer to uncore buffers. Execute VERW before C-state transition to
+clear CPU fill buffers.
+
+Guest entry point
+^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
+execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
+MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
+Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control the Processor MMIO Stale Data
+mitigations at boot time with the option "mmio_stale_data=". The valid
+arguments for this option are:
+
+ ========== =================================================================
+ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and when entering a VM. Idle transitions are
+ protected as well. It does not automatically disable SMT.
+ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
+ complete mitigation.
+ off Disables mitigation completely.
+ ========== =================================================================
+
+If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
+command line, then the kernel selects the appropriate mitigation.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+
+ If the processor is vulnerable but the availability of the microcode
+ based mitigation mechanism is not advertised via CPUID, the kernel
+ selects a best effort mitigation mode. This mode invokes the mitigation
+ instructions without a guarantee that they clear the CPU buffers.
+
+ This is done to address virtualization scenarios where the host has the
+ microcode update applied, but the hypervisor is not yet updated to
+ expose the CPUID to the guest. If the host has updated microcode the
+ protection takes effect; otherwise a few CPU cycles are wasted
+ pointlessly.
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+ * - 'Unknown: No mitigations'
+ - The processor vulnerability status is unknown because it is
+ out of Servicing period. Mitigation is not attempted.
+
+Definitions:
+------------
+
+Servicing period: The process of providing functional and security updates to
+Intel processors or platforms, utilizing the Intel Platform Update (IPU)
+process or other similar mechanisms.
+
+End of Servicing Updates (ESU): ESU is the date at which Intel will no
+longer provide Servicing, such as through IPU or other similar update
+processes. ESU dates will typically be aligned to end of quarter.
+
+If the processor is vulnerable then the following information is appended to
+the above information:
+
+ ======================== ===========================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ===========================================
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
new file mode 100644
index 000000000000..ad15417d39f9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
@@ -0,0 +1,96 @@
+==================================
+Register File Data Sampling (RFDS)
+==================================
+
+Register File Data Sampling (RFDS) is a microarchitectural vulnerability that
+only affects Intel Atom parts(also branded as E-cores). RFDS may allow
+a malicious actor to infer data values previously used in floating point
+registers, vector registers, or integer registers. RFDS does not provide the
+ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS.
+
+Affected Processors
+===================
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============
+ Common name Family_Model
+ =================== ============
+ ATOM_GOLDMONT 06_5CH
+ ATOM_GOLDMONT_D 06_5FH
+ ATOM_GOLDMONT_PLUS 06_7AH
+ ATOM_TREMONT_D 06_86H
+ ATOM_TREMONT 06_96H
+ ALDERLAKE 06_97H
+ ALDERLAKE_L 06_9AH
+ ATOM_TREMONT_L 06_9CH
+ RAPTORLAKE 06_B7H
+ RAPTORLAKE_P 06_BAH
+ ATOM_GRACEMONT 06_BEH
+ RAPTORLAKE_S 06_BFH
+ =================== ============
+
+Mitigation
+==========
+Intel released a microcode update that enables software to clear sensitive
+information using the VERW instruction. Like MDS, RFDS deploys the same
+mitigation strategy to force the CPU to clear the affected buffers before an
+attacker can extract the secrets. This is achieved by using the otherwise
+unused and obsolete VERW instruction in combination with a microcode update.
+The microcode clears the affected CPU buffers when the VERW instruction is
+executed.
+
+Mitigation points
+-----------------
+VERW is executed by the kernel before returning to user space, and by KVM
+before VMentry. None of the affected cores support SMT, so VERW is not required
+at C-state transitions.
+
+New bits in IA32_ARCH_CAPABILITIES
+----------------------------------
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+vulnerability and mitigation capability:
+
+- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS.
+- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the
+ microcode that clears the affected buffers on VERW execution.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control RFDS mitigation at boot time with the
+parameter "reg_file_data_sampling=". The valid arguments are:
+
+ ========== =================================================================
+ on If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and before entering a VM.
+ off Disables mitigation.
+ ========== =================================================================
+
+Mitigation default is selected by CONFIG_MITIGATION_RFDS.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: No microcode'
+ - The processor is vulnerable but microcode is not updated.
+ * - 'Mitigation: Clear Register File'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst
new file mode 100644
index 000000000000..21dbf9cf25f8
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/rsb.rst
@@ -0,0 +1,268 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+RSB-related mitigations
+=======================
+
+.. warning::
+ Please keep this document up-to-date, otherwise you will be
+ volunteered to update it and convert it to a very long comment in
+ bugs.c!
+
+Since 2018 there have been many Spectre CVEs related to the Return Stack
+Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or
+Return Address Predictor (RAP) on AMD).
+
+Information about these CVEs and how to mitigate them is scattered
+amongst a myriad of microarchitecture-specific documents.
+
+This document attempts to consolidate all the relevant information in
+once place and clarify the reasoning behind the current RSB-related
+mitigations. It's meant to be as concise as possible, focused only on
+the current kernel mitigations: what are the RSB-related attack vectors
+and how are they currently being mitigated?
+
+It's *not* meant to describe how the RSB mechanism operates or how the
+exploits work. More details about those can be found in the references
+below.
+
+Rather, this is basically a glorified comment, but too long to actually
+be one. So when the next CVE comes along, a kernel developer can
+quickly refer to this as a refresher to see what we're actually doing
+and why.
+
+At a high level, there are two classes of RSB attacks: RSB poisoning
+(Intel and AMD) and RSB underflow (Intel only). They must each be
+considered individually for each attack vector (and microarchitecture
+where applicable).
+
+----
+
+RSB poisoning (Intel and AMD)
+=============================
+
+SpectreRSB
+~~~~~~~~~~
+
+RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where
+an attacker poisons an RSB entry to cause a victim's return instruction
+to speculate to an attacker-controlled address. This can happen when
+there are unbalanced CALLs/RETs after a context switch or VMEXIT.
+
+* All attack vectors can potentially be mitigated by flushing out any
+ poisoned RSB entries using an RSB filling sequence
+ [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between
+ untrusted and trusted domains. But this has a performance impact and
+ should be avoided whenever possible.
+
+ .. DANGER::
+ **FIXME**: Currently we're flushing 32 entries. However, some CPU
+ models have more than 32 entries. The loop count needs to be
+ increased for those. More detailed information is needed about RSB
+ sizes.
+
+* On context switch, the user->user mitigation requires ensuring the
+ RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_
+ during a context switch:
+
+ * AMD:
+ On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB.
+ This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_.
+
+ On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be
+ always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is
+ indicated by X86_BUG_IBPB_NO_RET.
+
+ * Intel:
+ IBPB always clears the RSB:
+
+ "Software that executed before the IBPB command cannot control
+ the predicted targets of indirect branches executed after the
+ command on the same logical processor. The term indirect branch
+ in this context includes near return instructions, so these
+ predicted targets may come from the RSB." [#intel-ibpb-rsb]_
+
+* On context switch, user->kernel attacks are prevented by SMEP. User
+ space can only insert user space addresses into the RSB. Even
+ non-canonical addresses can't be inserted due to the page gap at the
+ end of the user canonical address space reserved by TASK_SIZE_MAX.
+ A SMEP #PF at instruction fetch prevents the kernel from speculatively
+ executing user space.
+
+ * AMD:
+ "Finally, branches that are predicted as 'ret' instructions get
+ their predicted targets from the Return Address Predictor (RAP).
+ AMD recommends software use a RAP stuffing sequence (mitigation
+ V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP)
+ to ensure that the addresses in the RAP are safe for
+ speculation. Collectively, we refer to these mitigations as "RAP
+ Protection"." [#amd-smep-rsb]_
+
+ * Intel:
+ "On processors with enhanced IBRS, an RSB overwrite sequence may
+ not suffice to prevent the predicted target of a near return
+ from using an RSB entry created in a less privileged predictor
+ mode. Software can prevent this by enabling SMEP (for
+ transitions from user mode to supervisor mode) and by having
+ IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_
+
+* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB
+ mitigation if needed):
+
+ * AMD:
+ "When Automatic IBRS is enabled, the internal return address
+ stack used for return address predictions is cleared on VMEXIT."
+ [#amd-eibrs-vmexit]_
+
+ * Intel:
+ "On processors with enhanced IBRS, an RSB overwrite sequence may
+ not suffice to prevent the predicted target of a near return
+ from using an RSB entry created in a less privileged predictor
+ mode. Software can prevent this by enabling SMEP (for
+ transitions from user mode to supervisor mode) and by having
+ IA32_SPEC_CTRL.IBRS set during VM exits. Processors with
+ enhanced IBRS still support the usage model where IBRS is set
+ only in the OS/VMM for OSes that enable SMEP. To do this, such
+ processors will ensure that guest behavior cannot control the
+ RSB after a VM exit once IBRS is set, even if IBRS was not set
+ at the time of the VM exit." [#intel-eibrs-vmexit]_
+
+ Note that some Intel CPUs are susceptible to Post-barrier Return
+ Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last
+ CALL from the guest can be used to predict the first unbalanced RET.
+ In this case the PBRSB mitigation is needed in addition to eIBRS.
+
+AMD RETBleed / SRSO / Branch Type Confusion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On AMD, poisoned RSB entries can also be created by the AMD RETBleed
+variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack
+Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel
+protects itself by replacing every RET in the kernel with a branch to a
+single safe RET.
+
+----
+
+RSB underflow (Intel only)
+==========================
+
+RSB Alternate (RSBA) ("Intel Retbleed")
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some Intel Skylake-generation CPUs are susceptible to the Intel variant
+of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow
+[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due
+to mismatched CALLs/RETs or returning from a deep call stack, the branch
+predictor can fall back to using the Branch Target Buffer (BTB). If a
+user forces a BTB collision then the RET can speculatively branch to a
+user-controlled address.
+
+* Note that RSB filling doesn't fully mitigate this issue. If there
+ are enough unbalanced RETs, the RSB may still underflow and fall back
+ to using a poisoned BTB entry.
+
+* On context switch, user->user underflow attacks are mitigated by the
+ conditional IBPB [#cond-ibpb]_ on context switch which effectively
+ clears the BTB:
+
+ * "The indirect branch predictor barrier (IBPB) is an indirect branch
+ control mechanism that establishes a barrier, preventing software
+ that executed before the barrier from controlling the predicted
+ targets of indirect branches executed after the barrier on the same
+ logical processor." [#intel-ibpb-btb]_
+
+* On context switch and VMEXIT, user->kernel and guest->host RSB
+ underflows are mitigated by IBRS or eIBRS:
+
+ * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU"
+ attack demonstrated by the researchers. As previously documented,
+ Intel recommends the use of enhanced IBRS, where supported. This
+ includes any processor that enumerates RRSBA but not RRSBA_DIS_S."
+ [#intel-rsbu]_
+
+ However, note that eIBRS and IBRS do not mitigate intra-mode attacks.
+ Like RRSBA below, this is mitigated by clearing the BHB on kernel
+ entry.
+
+ As an alternative to classic IBRS, call depth tracking (combined with
+ retpolines) can be used to track kernel returns and fill the RSB when
+ it gets close to being empty.
+
+Restricted RSB Alternate (RRSBA)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior,
+which, similar to RSBA described above, also falls back to using the BTB
+on RSB underflow. The only difference is that the predicted targets are
+restricted to the current domain when eIBRS is enabled:
+
+* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch
+ predictors to be used by near RET instructions when the RSB is
+ empty. When eIBRS is enabled, the predicted targets of these
+ alternate predictors are restricted to those belonging to the
+ indirect branch predictor entries of the current prediction domain.
+ [#intel-eibrs-rrsba]_
+
+When a CPU with RRSBA is vulnerable to Branch History Injection
+[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an
+intra-mode BTI attack. This is mitigated by clearing the BHB on
+kernel entry.
+
+However if the kernel uses retpolines instead of eIBRS, it needs to
+disable RRSBA:
+
+* "Where software is using retpoline as a mitigation for BHI or
+ intra-mode BTI, and the processor both enumerates RRSBA and
+ enumerates RRSBA_DIS controls, it should disable this behavior."
+ [#intel-retpoline-rrsba]_
+
+----
+
+References
+==========
+
+.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer <https://arxiv.org/pdf/1807.07940.pdf>`_
+
+.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/retpoline-branch-target-injection-mitigation.html#inpage-nav-5-1>`_
+
+.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/software-techniques-for-managing-speculation.pdf>`_
+
+.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt.
+
+.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD.
+
+.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24594.pdf>`_. SBPB behaves the same way according to `this email <https://lore.kernel.org/5175b163a3736ca5fd01cedf406735636c99a>`_.
+
+.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution <https://comsec.ethz.ch/wp-content/files/ibpb_sp25.pdf>`_
+
+.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/post-barrier-return-stack-buffer-predictions.html>`_
+
+.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion <https://www.amd.com/content/dam/amd/en/documents/resources/technical-guidance-for-mitigating-branch-type-confusion.pdf>`_
+
+.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-restricted-speculation.html>`_
+
+.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf>`_
+
+.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-restricted-speculation.html>`_
+
+.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/post-barrier-return-stack-buffer-predictions.html>`_
+
+.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction <https://comsec.ethz.ch/wp-content/files/retbleed_sec22.pdf>`_
+
+.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion <https://www.amd.com/content/dam/amd/en/documents/resources/technical-guidance-for-mitigating-branch-type-confusion.pdf>`_
+
+.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow <https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf>`_
+
+.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution <https://comsec.ethz.ch/wp-content/files/inception_sec23.pdf>`_
+
+.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/return-stack-buffer-underflow.html>`_
+
+.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-predictor-barrier.html>`_
+
+.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/return-stack-buffer-underflow.html>`_
+
+.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks <http://download.vusec.net/papers/bhi-spectre-bhb_sec22.pdf>`_
+
+.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html>`_
+
+.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html>`_
diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
new file mode 100644
index 000000000000..966c9b3296ea
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+SRBDS - Special Register Buffer Data Sampling
+=============================================
+
+SRBDS is a hardware vulnerability that allows MDS
+Documentation/admin-guide/hw-vuln/mds.rst techniques to
+infer values returned from special register accesses. Special register
+accesses are accesses to off core registers. According to Intel's evaluation,
+the special register reads that have a security expectation of privacy are
+RDRAND, RDSEED and SGX EGETKEY.
+
+When RDRAND, RDSEED and EGETKEY instructions are used, the data is moved
+to the core through the special register mechanism that is susceptible
+to MDS attacks.
+
+Affected processors
+-------------------
+Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED may
+be affected.
+
+A processor is affected by SRBDS if its Family_Model and stepping is
+in the following list, with the exception of the listed processors
+exporting MDS_NO while Intel TSX is available yet not enabled. The
+latter class of processors are only affected when Intel TSX is enabled
+by software using TSX_CTRL_MSR otherwise they are not affected.
+
+ ============= ============ ========
+ common name Family_Model Stepping
+ ============= ============ ========
+ IvyBridge 06_3AH All
+
+ Haswell 06_3CH All
+ Haswell_L 06_45H All
+ Haswell_G 06_46H All
+
+ Broadwell_G 06_47H All
+ Broadwell 06_3DH All
+
+ Skylake_L 06_4EH All
+ Skylake 06_5EH All
+
+ Kabylake_L 06_8EH <= 0xC
+ Kabylake 06_9EH <= 0xD
+ ============= ============ ========
+
+Related CVEs
+------------
+
+The following CVE entry is related to this SRBDS issue:
+
+ ============== ===== =====================================
+ CVE-2020-0543 SRBDS Special Register Buffer Data Sampling
+ ============== ===== =====================================
+
+Attack scenarios
+----------------
+An unprivileged user can extract values returned from RDRAND and RDSEED
+executed on another core or sibling thread using MDS techniques.
+
+
+Mitigation mechanism
+--------------------
+Intel will release microcode updates that modify the RDRAND, RDSEED, and
+EGETKEY instructions to overwrite secret special register data in the shared
+staging buffer before the secret data can be accessed by another logical
+processor.
+
+During execution of the RDRAND, RDSEED, or EGETKEY instructions, off-core
+accesses from other logical processors will be delayed until the special
+register read is complete and the secret data in the shared staging buffer is
+overwritten.
+
+This has three effects on performance:
+
+#. RDRAND, RDSEED, or EGETKEY instructions have higher latency.
+
+#. Executing RDRAND at the same time on multiple logical processors will be
+ serialized, resulting in an overall reduction in the maximum RDRAND
+ bandwidth.
+
+#. Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
+ logical processors that miss their core caches, with an impact similar to
+ legacy locked cache-line-split accesses.
+
+The microcode updates provide an opt-out mechanism (RNGDS_MITG_DIS) to disable
+the mitigation for RDRAND and RDSEED instructions executed outside of Intel
+Software Guard Extensions (Intel SGX) enclaves. On logical processors that
+disable the mitigation using this opt-out mechanism, RDRAND and RDSEED do not
+take longer to execute and do not impact performance of sibling logical
+processors memory accesses. The opt-out mechanism does not affect Intel SGX
+enclaves (including execution of RDRAND or RDSEED inside an enclave, as well
+as EGETKEY execution).
+
+IA32_MCU_OPT_CTRL MSR Definition
+--------------------------------
+Along with the mitigation for this issue, Intel added a new thread-scope
+IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
+RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
+9]==1. This MSR is introduced through the microcode update.
+
+Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
+disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
+enclave on that logical processor. Opting out of the mitigation for a
+particular logical processor does not affect the RDRAND and RDSEED mitigations
+for other logical processors.
+
+Note that inside of an Intel SGX enclave, the mitigation is applied regardless
+of the value of RNGDS_MITG_DS.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows control over the SRBDS mitigation at boot time
+with the option "srbds=". The option for this is:
+
+ ============= =============================================================
+ off This option disables SRBDS mitigation for RDRAND and RDSEED on
+ affected platforms.
+ ============= =============================================================
+
+SRBDS System Information
+------------------------
+The Linux kernel provides vulnerability status information through sysfs. For
+SRBDS this can be accessed by the following sysfs file:
+/sys/devices/system/cpu/vulnerabilities/srbds
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected Processor not vulnerable
+ Vulnerable Processor vulnerable and mitigation disabled
+ Vulnerable: No microcode Processor vulnerable and microcode is missing
+ mitigation
+ Mitigation: Microcode Processor is vulnerable and mitigation is in
+ effect.
+ Mitigation: TSX disabled Processor is only vulnerable when TSX is
+ enabled while this system was booted with TSX
+ disabled.
+ Unknown: Dependent on
+ hypervisor status Running on virtual guest processor that is
+ affected but with no way to know if host
+ processor is mitigated or vulnerable.
+ ============================== =============================================
+
+SRBDS Default mitigation
+------------------------
+This new microcode serializes processor access during execution of RDRAND,
+RDSEED ensures that the shared buffer is overwritten before it is released for
+reuse. Use the "srbds=off" kernel command line to disable the mitigation for
+RDRAND and RDSEED.
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
new file mode 100644
index 000000000000..991f12adef8d
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -0,0 +1,725 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Spectre Side Channels
+=====================
+
+Spectre is a class of side channel attacks that exploit branch prediction
+and speculative execution on modern CPUs to read memory, possibly
+bypassing access controls. Speculative execution side channel exploits
+do not modify memory but attempt to infer privileged data in the memory.
+
+This document covers Spectre variant 1 and Spectre variant 2.
+
+Affected processors
+-------------------
+
+Speculative execution side channel methods affect a wide range of modern
+high performance processors, since most modern high speed processors
+use branch prediction and speculative execution.
+
+The following CPUs are vulnerable:
+
+ - Intel Core, Atom, Pentium, and Xeon processors
+
+ - AMD Phenom, EPYC, and Zen processors
+
+ - IBM POWER and zSeries processors
+
+ - Higher end ARM processors
+
+ - Apple CPUs
+
+ - Higher end MIPS CPUs
+
+ - Likely most other high performance CPUs. Contact your CPU vendor for details.
+
+Whether a processor is affected or not can be read out from the Spectre
+vulnerability files in sysfs. See :ref:`spectre_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entries describe Spectre variants:
+
+ ============= ======================= ==========================
+ CVE-2017-5753 Bounds check bypass Spectre variant 1
+ CVE-2017-5715 Branch target injection Spectre variant 2
+ CVE-2019-1125 Spectre v1 swapgs Spectre variant 1 (swapgs)
+ ============= ======================= ==========================
+
+Problem
+-------
+
+CPUs use speculative operations to improve performance. That may leave
+traces of memory accesses or computations in the processor's caches,
+buffers, and branch predictors. Malicious software may be able to
+influence the speculative execution paths, and then use the side effects
+of the speculative execution in the CPUs' caches and buffers to infer
+privileged data touched during the speculative execution.
+
+Spectre variant 1 attacks take advantage of speculative execution of
+conditional branches, while Spectre variant 2 attacks use speculative
+execution of indirect branches to leak privileged memory.
+See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
+:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+
+Spectre variant 1 (Bounds Check Bypass)
+---------------------------------------
+
+The bounds check bypass attack :ref:`[2] <spec_ref2>` takes advantage
+of speculative execution that bypasses conditional branch instructions
+used for memory access bounds check (e.g. checking if the index of an
+array results in memory access within a valid range). This results in
+memory accesses to invalid memory (with out-of-bound index) that are
+done speculatively before validation checks resolve. Such speculative
+memory accesses can leave side effects, creating side channels which
+leak information to the attacker.
+
+There are some extensions of Spectre variant 1 attacks for reading data
+over the network, see :ref:`[12] <spec_ref12>`. However such attacks
+are difficult, low bandwidth, fragile, and are considered low risk.
+
+Note that, despite "Bounds Check Bypass" name, Spectre variant 1 is not
+only about user-controlled array bounds checks. It can affect any
+conditional checks. The kernel entry code interrupt, exception, and NMI
+handlers all have conditional swapgs checks. Those may be problematic
+in the context of Spectre v1, as kernel code can speculatively run with
+a user GS.
+
+Spectre variant 2 (Branch Target Injection)
+-------------------------------------------
+
+The branch target injection attack takes advantage of speculative
+execution of indirect branches :ref:`[3] <spec_ref3>`. The indirect
+branch predictors inside the processor used to guess the target of
+indirect branches can be influenced by an attacker, causing gadget code
+to be speculatively executed, thus exposing sensitive data touched by
+the victim. The side effects left in the CPU's caches during speculative
+execution can be measured to infer data values.
+
+.. _poison_btb:
+
+In Spectre variant 2 attacks, the attacker can steer speculative indirect
+branches in the victim to gadget code by poisoning the branch target
+buffer of a CPU used for predicting indirect branch addresses. Such
+poisoning could be done by indirect branching into existing code,
+with the address offset of the indirect branch under the attacker's
+control. Since the branch prediction on impacted hardware does not
+fully disambiguate branch address and uses the offset for prediction,
+this could cause privileged code's indirect branch to jump to a gadget
+code with the same offset.
+
+The most useful gadgets take an attacker-controlled input parameter (such
+as a register value) so that the memory read can be controlled. Gadgets
+without input parameters might be possible, but the attacker would have
+very little control over what memory can be read, reducing the risk of
+the attack revealing useful data.
+
+One other variant 2 attack vector is for the attacker to poison the
+return stack buffer (RSB) :ref:`[13] <spec_ref13>` to cause speculative
+subroutine return instruction execution to go to a gadget. An attacker's
+imbalanced subroutine call instructions might "poison" entries in the
+return stack buffer which are later consumed by a victim's subroutine
+return instructions. This attack can be mitigated by flushing the return
+stack buffer on context switch, or virtual machine (VM) exit.
+
+On systems with simultaneous multi-threading (SMT), attacks are possible
+from the sibling thread, as level 1 cache and branch target buffer
+(BTB) may be shared between hardware threads in a CPU core. A malicious
+program running on the sibling thread may influence its peer's BTB to
+steer its indirect branch speculations to gadget code, and measure the
+speculative execution's side effects left in level 1 cache to infer the
+victim's data.
+
+Yet another variant 2 attack vector is for the attacker to poison the
+Branch History Buffer (BHB) to speculatively steer an indirect branch
+to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
+associated with the source address of the indirect branch. Specifically,
+the BHB might be shared across privilege levels even in the presence of
+Enhanced IBRS.
+
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
+
+Attack scenarios
+----------------
+
+The following list of attack scenarios have been anticipated, but may
+not cover all possible attack vectors.
+
+1. A user process attacking the kernel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Spectre variant 1
+~~~~~~~~~~~~~~~~~
+
+ The attacker passes a parameter to the kernel via a register or
+ via a known address in memory during a syscall. Such parameter may
+ be used later by the kernel as an index to an array or to derive
+ a pointer for a Spectre variant 1 attack. The index or pointer
+ is invalid, but bound checks are bypassed in the code branch taken
+ for speculative execution. This could cause privileged memory to be
+ accessed and leaked.
+
+ For kernel code that has been identified where data pointers could
+ potentially be influenced for Spectre attacks, new "nospec" accessor
+ macros are used to prevent speculative loading of data.
+
+Spectre variant 1 (swapgs)
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ An attacker can train the branch predictor to speculatively skip the
+ swapgs path for an interrupt or exception. If they initialize
+ the GS register to a user-space value, if the swapgs is speculatively
+ skipped, subsequent GS-related percpu accesses in the speculation
+ window will be done with the attacker-controlled GS value. This
+ could cause privileged memory to be accessed and leaked.
+
+ For example:
+
+ ::
+
+ if (coming from user space)
+ swapgs
+ mov %gs:<percpu_offset>, %reg
+ mov (%reg), %reg1
+
+ When coming from user space, the CPU can speculatively skip the
+ swapgs, and then do a speculative percpu load using the user GS
+ value. So the user can speculatively force a read of any kernel
+ value. If a gadget exists which uses the percpu value as an address
+ in another load/store, then the contents of the kernel value may
+ become visible via an L1 side channel attack.
+
+ A similar attack exists when coming from kernel space. The CPU can
+ speculatively do the swapgs, causing the user GS to get used for the
+ rest of the speculative window.
+
+Spectre variant 2
+~~~~~~~~~~~~~~~~~
+
+ A spectre variant 2 attacker can :ref:`poison <poison_btb>` the branch
+ target buffer (BTB) before issuing syscall to launch an attack.
+ After entering the kernel, the kernel could use the poisoned branch
+ target buffer on indirect jump and jump to gadget code in speculative
+ execution.
+
+ If an attacker tries to control the memory addresses leaked during
+ speculative execution, he would also need to pass a parameter to the
+ gadget, either through a register or a known address in memory. After
+ the gadget has executed, he can measure the side effect.
+
+ The kernel can protect itself against consuming poisoned branch
+ target buffer entries by using return trampolines (also known as
+ "retpoline") :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` for all
+ indirect branches. Return trampolines trap speculative execution paths
+ to prevent jumping to gadget code during speculative execution.
+ x86 CPUs with Enhanced Indirect Branch Restricted Speculation
+ (Enhanced IBRS) available in hardware should use the feature to
+ mitigate Spectre variant 2 instead of retpoline. Enhanced IBRS is
+ more efficient than retpoline.
+
+ There may be gadget code in firmware which could be exploited with
+ Spectre variant 2 attack by a rogue user process. To mitigate such
+ attacks on x86, Indirect Branch Restricted Speculation (IBRS) feature
+ is turned on before the kernel invokes any firmware code.
+
+2. A user process attacking another user process
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ A malicious user process can try to attack another user process,
+ either via a context switch on the same hardware thread, or from the
+ sibling hyperthread sharing a physical processor core on simultaneous
+ multi-threading (SMT) system.
+
+ Spectre variant 1 attacks generally require passing parameters
+ between the processes, which needs a data passing relationship, such
+ as remote procedure calls (RPC). Those parameters are used in gadget
+ code to derive invalid data pointers accessing privileged memory in
+ the attacked process.
+
+ Spectre variant 2 attacks can be launched from a rogue process by
+ :ref:`poisoning <poison_btb>` the branch target buffer. This can
+ influence the indirect branch targets for a victim process that either
+ runs later on the same hardware thread, or running concurrently on
+ a sibling hardware thread sharing the same physical core.
+
+ A user process can protect itself against Spectre variant 2 attacks
+ by using the prctl() syscall to disable indirect branch speculation
+ for itself. An administrator can also cordon off an unsafe process
+ from polluting the branch target buffer by disabling the process's
+ indirect branch speculation. This comes with a performance cost
+ from not using indirect branch speculation and clearing the branch
+ target buffer. When SMT is enabled on x86, for a process that has
+ indirect branch speculation disabled, Single Threaded Indirect Branch
+ Predictors (STIBP) :ref:`[4] <spec_ref4>` are turned on to prevent the
+ sibling thread from controlling branch target buffer. In addition,
+ the Indirect Branch Prediction Barrier (IBPB) is issued to clear the
+ branch target buffer when context switching to and from such process.
+
+ On x86, the return stack buffer is stuffed on context switch.
+ This prevents the branch target buffer from being used for branch
+ prediction when the return stack buffer underflows while switching to
+ a deeper call stack. Any poisoned entries in the return stack buffer
+ left by the previous process will also be cleared.
+
+ User programs should use address space randomization to make attacks
+ more difficult (Set /proc/sys/kernel/randomize_va_space = 1 or 2).
+
+3. A virtualized guest attacking the host
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The attack mechanism is similar to how user processes attack the
+ kernel. The kernel is entered via hyper-calls or other virtualization
+ exit paths.
+
+ For Spectre variant 1 attacks, rogue guests can pass parameters
+ (e.g. in registers) via hyper-calls to derive invalid pointers to
+ speculate into privileged memory after entering the kernel. For places
+ where such kernel code has been identified, nospec accessor macros
+ are used to stop speculative memory access.
+
+ For Spectre variant 2 attacks, rogue guests can :ref:`poison
+ <poison_btb>` the branch target buffer or return stack buffer, causing
+ the kernel to jump to gadget code in the speculative execution paths.
+
+ To mitigate variant 2, the host kernel can use return trampolines
+ for indirect branches to bypass the poisoned branch target buffer,
+ and flushing the return stack buffer on VM exit. This prevents rogue
+ guests from affecting indirect branching in the host kernel.
+
+ To protect host processes from rogue guests, host processes can have
+ indirect branch speculation disabled via prctl(). The branch target
+ buffer is cleared before context switching to such processes.
+
+4. A virtualized guest attacking other guest
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ A rogue guest may attack another guest to get data accessible by the
+ other guest.
+
+ Spectre variant 1 attacks are possible if parameters can be passed
+ between guests. This may be done via mechanisms such as shared memory
+ or message passing. Such parameters could be used to derive data
+ pointers to privileged data in guest. The privileged data could be
+ accessed by gadget code in the victim's speculation paths.
+
+ Spectre variant 2 attacks can be launched from a rogue guest by
+ :ref:`poisoning <poison_btb>` the branch target buffer or the return
+ stack buffer. Such poisoned entries could be used to influence
+ speculation execution paths in the victim guest.
+
+ Linux kernel mitigates attacks to other guests running in the same
+ CPU hardware thread by flushing the return stack buffer on VM exit,
+ and clearing the branch target buffer before switching to a new guest.
+
+ If SMT is used, Spectre variant 2 attacks from an untrusted guest
+ in the sibling hyperthread can be mitigated by the administrator,
+ by turning off the unsafe guest's indirect branch speculation via
+ prctl(). A guest can also protect itself by turning on microcode
+ based mitigations (such as IBPB or STIBP on x86) within the guest.
+
+.. _spectre_sys_info:
+
+Spectre system information
+--------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current
+mitigation status of the system for Spectre: whether the system is
+vulnerable, and which mitigations are active.
+
+The sysfs file showing Spectre variant 1 mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable.
+ * - 'Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers'
+ - The swapgs protections are disabled; otherwise it has
+ protection in the kernel on a case by case base with explicit
+ pointer sanitation and usercopy LFENCE barriers.
+ * - 'Mitigation: usercopy/swapgs barriers and __user pointer sanitization'
+ - Protection in the kernel on a case by case base with explicit
+ pointer sanitation, usercopy LFENCE barriers, and swapgs LFENCE
+ barriers.
+
+However, the protections are put in place on a case by case basis,
+and there is no guarantee that all possible attack vectors for Spectre
+variant 1 are covered.
+
+The spectre_v2 kernel file reports if the kernel has been compiled with
+retpoline mitigation or if the CPU has hardware mitigation, and if the
+CPU has support for additional process-specific mitigation.
+
+This file also reports CPU features enabled by microcode to mitigate
+attack between user processes:
+
+1. Indirect Branch Prediction Barrier (IBPB) to add additional
+ isolation between processes of different users.
+2. Single Thread Indirect Branch Predictors (STIBP) to add additional
+ isolation between CPU threads running on the same core.
+
+These CPU features may impact performance when used and can be enabled
+per process on a case-by-case base.
+
+The sysfs file showing Spectre variant 2 mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+
+The possible values in this file are:
+
+ - Kernel status:
+
+ ======================================== =================================
+ 'Not affected' The processor is not vulnerable
+ 'Mitigation: None' Vulnerable, no mitigation
+ 'Mitigation: Retpolines' Use Retpoline thunks
+ 'Mitigation: LFENCE' Use LFENCE instructions
+ 'Mitigation: Enhanced IBRS' Hardware-focused mitigation
+ 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines
+ 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE
+ ======================================== =================================
+
+ - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
+ used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
+
+ ========== =============================================================
+ 'IBRS_FW' Protection against user program attacks when calling firmware
+ ========== =============================================================
+
+ - Indirect branch prediction barrier (IBPB) status for protection between
+ processes of different users. This feature can be controlled through
+ prctl() per process, or through kernel command line options. This is
+ an x86 only feature. For more details see below.
+
+ =================== ========================================================
+ 'IBPB: disabled' IBPB unused
+ 'IBPB: always-on' Use IBPB on all tasks
+ 'IBPB: conditional' Use IBPB on SECCOMP or indirect branch restricted tasks
+ =================== ========================================================
+
+ - Single threaded indirect branch prediction (STIBP) status for protection
+ between different hyper threads. This feature can be controlled through
+ prctl per process, or through kernel command line options. This is x86
+ only feature. For more details see below.
+
+ ==================== ========================================================
+ 'STIBP: disabled' STIBP unused
+ 'STIBP: forced' Use STIBP on all tasks
+ 'STIBP: conditional' Use STIBP on SECCOMP or indirect branch restricted tasks
+ ==================== ========================================================
+
+ - Return stack buffer (RSB) protection status:
+
+ ============= ===========================================
+ 'RSB filling' Protection of RSB on context switch enabled
+ ============= ===========================================
+
+ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
+
+ =========================== =======================================================
+ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
+ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
+ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
+ =========================== =======================================================
+
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
+Full mitigation might require a microcode update from the CPU
+vendor. When the necessary microcode is not available, the kernel will
+report vulnerability.
+
+Turning on mitigation for Spectre variant 1 and Spectre variant 2
+-----------------------------------------------------------------
+
+1. Kernel mitigation
+^^^^^^^^^^^^^^^^^^^^
+
+Spectre variant 1
+~~~~~~~~~~~~~~~~~
+
+ For the Spectre variant 1, vulnerable kernel code (as determined
+ by code audit or scanning tools) is annotated on a case by case
+ basis to use nospec accessor macros for bounds clipping :ref:`[2]
+ <spec_ref2>` to avoid any usable disclosure gadgets. However, it may
+ not cover all attack vectors for Spectre variant 1.
+
+ Copy-from-user code has an LFENCE barrier to prevent the access_ok()
+ check from being mis-speculated. The barrier is done by the
+ barrier_nospec() macro.
+
+ For the swapgs variant of Spectre variant 1, LFENCE barriers are
+ added to interrupt, exception and NMI entry where needed. These
+ barriers are done by the FENCE_SWAPGS_KERNEL_ENTRY and
+ FENCE_SWAPGS_USER_ENTRY macros.
+
+Spectre variant 2
+~~~~~~~~~~~~~~~~~
+
+ For Spectre variant 2 mitigation, the compiler turns indirect calls or
+ jumps in the kernel into equivalent return trampolines (retpolines)
+ :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` to go to the target
+ addresses. Speculative execution paths under retpolines are trapped
+ in an infinite loop to prevent any speculative execution jumping to
+ a gadget.
+
+ To turn on retpoline mitigation on a vulnerable CPU, the kernel
+ needs to be compiled with a gcc compiler that supports the
+ -mindirect-branch=thunk-extern -mindirect-branch-register options.
+ If the kernel is compiled with a Clang compiler, the compiler needs
+ to support -mretpoline-external-thunk option. The kernel config
+ CONFIG_MITIGATION_RETPOLINE needs to be turned on, and the CPU needs
+ to run with the latest updated microcode.
+
+ On Intel Skylake-era systems the mitigation covers most, but not all,
+ cases. See :ref:`[3] <spec_ref3>` for more details.
+
+ On CPUs with hardware mitigation for Spectre variant 2 (e.g. IBRS
+ or enhanced IBRS on x86), retpoline is automatically disabled at run time.
+
+ Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
+ boot, by setting the IBRS bit, and they're automatically protected against
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
+
+ On Intel's enhanced IBRS systems, this includes cross-thread branch target
+ injections on SMT systems (STIBP). In other words, Intel eIBRS enables
+ STIBP, too.
+
+ AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear
+ the IBRS bit on exit to userspace, therefore both explicitly enable STIBP.
+
+ The retpoline mitigation is turned on by default on vulnerable
+ CPUs. It can be forced on or off by the administrator
+ via the kernel command line and sysfs control files. See
+ :ref:`spectre_mitigation_control_command_line`.
+
+ On x86, indirect branch restricted speculation is turned on by default
+ before invoking any firmware code to prevent Spectre variant 2 exploits
+ using the firmware.
+
+ Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y
+ and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes
+ attacks on the kernel generally more difficult.
+
+2. User program mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ User programs can mitigate Spectre variant 1 using LFENCE or "bounds
+ clipping". For more details see :ref:`[2] <spec_ref2>`.
+
+ For Spectre variant 2 mitigation, individual user programs
+ can be compiled with return trampolines for indirect branches.
+ This protects them from consuming poisoned entries in the branch
+ target buffer left by malicious software.
+
+ On legacy IBRS systems, at return to userspace, implicit STIBP is disabled
+ because the kernel clears the IBRS bit. In this case, the userspace programs
+ can disable indirect branch speculation via prctl() (See
+ :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+ On x86, this will turn on STIBP to guard against attacks from the
+ sibling thread when the user program is running, and use IBPB to
+ flush the branch target buffer when switching to/from the program.
+
+ Restricting indirect branch speculation on a user program will
+ also prevent the program from launching a variant 2 attack
+ on x86. Administrators can change that behavior via the kernel
+ command line and sysfs control files.
+ See :ref:`spectre_mitigation_control_command_line`.
+
+ Programs that disable their indirect branch speculation will have
+ more overhead and run slower.
+
+ User programs should use address space randomization
+ (/proc/sys/kernel/randomize_va_space = 1 or 2) to make attacks more
+ difficult.
+
+3. VM mitigation
+^^^^^^^^^^^^^^^^
+
+ Within the kernel, Spectre variant 1 attacks from rogue guests are
+ mitigated on a case by case basis in VM exit paths. Vulnerable code
+ uses nospec accessor macros for "bounds clipping", to avoid any
+ usable disclosure gadgets. However, this may not cover all variant
+ 1 attack vectors.
+
+ For Spectre variant 2 attacks from rogue guests to the kernel, the
+ Linux kernel uses retpoline or Enhanced IBRS to prevent consumption of
+ poisoned entries in branch target buffer left by rogue guests. It also
+ flushes the return stack buffer on every VM exit to prevent a return
+ stack buffer underflow so poisoned branch target buffer could be used,
+ or attacker guests leaving poisoned entries in the return stack buffer.
+
+ To mitigate guest-to-guest attacks in the same CPU hardware thread,
+ the branch target buffer is sanitized by flushing before switching
+ to a new guest on a CPU.
+
+ The above mitigations are turned on by default on vulnerable CPUs.
+
+ To mitigate guest-to-guest attacks from sibling thread when SMT is
+ in use, an untrusted guest running in the sibling thread can have
+ its indirect branch speculation disabled by administrator via prctl().
+
+ The kernel also allows guests to use any microcode based mitigation
+ they choose to use (such as IBPB or STIBP on x86) to protect themselves.
+
+.. _spectre_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+In general the kernel selects reasonable default mitigations for the
+current CPU.
+
+Spectre default mitigations can be disabled or changed at the kernel
+command line with the following options:
+
+ - nospectre_v1
+ - nospectre_v2
+ - spectre_v2={option}
+ - spectre_v2_user={option}
+ - spectre_bhi={option}
+
+For more details on the available options, refer to Documentation/admin-guide/kernel-parameters.txt
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace
+^^^^^^^^^^^^^^^^^^^^
+
+ If all userspace applications are from trusted sources and do not
+ execute externally supplied untrusted code, then the mitigations can
+ be disabled.
+
+2. Protect sensitive programs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ For security-sensitive programs that have secrets (e.g. crypto
+ keys), protection against Spectre variant 2 can be put in place by
+ disabling indirect branch speculation when the program is running
+ (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+
+3. Sandbox untrusted programs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Untrusted programs that could be a source of attacks can be cordoned
+ off by disabling their indirect branch speculation when they are run
+ (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+ This prevents untrusted programs from polluting the branch target
+ buffer. This behavior can be changed via the kernel command line
+ and sysfs control files. See
+ :ref:`spectre_mitigation_control_command_line`.
+
+3. High security mode
+^^^^^^^^^^^^^^^^^^^^^
+
+ All Spectre variant 2 mitigations can be forced on
+ at boot time for all programs (See the "on" option in
+ :ref:`spectre_mitigation_control_command_line`). This will add
+ overhead as indirect branch speculations for all programs will be
+ restricted.
+
+ On x86, branch target buffer will be flushed with IBPB when switching
+ to a new program. STIBP is left on all the time to protect programs
+ against variant 2 attacks originating from programs running on
+ sibling threads.
+
+ Alternatively, STIBP can be used only when running programs
+ whose indirect branch speculation is explicitly disabled,
+ while IBPB is still used all the time when switching to a new
+ program to clear the branch target buffer (See "ibpb" option in
+ :ref:`spectre_mitigation_control_command_line`). This "ibpb" option
+ has less performance cost than the "on" option, which leaves STIBP
+ on all the time.
+
+References on Spectre
+---------------------
+
+Intel white papers:
+
+.. _spec_ref1:
+
+[1] `Intel analysis of speculative execution side channels <https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/analysis-of-speculative-execution-side-channels-white-paper.pdf>`_.
+
+.. _spec_ref2:
+
+[2] `Bounds check bypass <https://software.intel.com/security-software-guidance/software-guidance/bounds-check-bypass>`_.
+
+.. _spec_ref3:
+
+[3] `Deep dive: Retpoline: A branch target injection mitigation <https://software.intel.com/security-software-guidance/insights/deep-dive-retpoline-branch-target-injection-mitigation>`_.
+
+.. _spec_ref4:
+
+[4] `Deep Dive: Single Thread Indirect Branch Predictors <https://software.intel.com/security-software-guidance/insights/deep-dive-single-thread-indirect-branch-predictors>`_.
+
+AMD white papers:
+
+.. _spec_ref5:
+
+[5] `AMD64 technology indirect branch control extension <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/white-papers/111006-architecture-guidelines-update-amd64-technology-indirect-branch-control-extension.pdf>`_.
+
+.. _spec_ref6:
+
+[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
+
+ARM white papers:
+
+.. _spec_ref7:
+
+[7] `Cache speculation side-channels <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/download-the-whitepaper>`_.
+
+.. _spec_ref8:
+
+[8] `Cache speculation issues update <https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/latest-updates/cache-speculation-issues-update>`_.
+
+Google white paper:
+
+.. _spec_ref9:
+
+[9] `Retpoline: a software construct for preventing branch-target-injection <https://support.google.com/faqs/answer/7625886>`_.
+
+MIPS white paper:
+
+.. _spec_ref10:
+
+[10] `MIPS: response on speculative execution and side channel vulnerabilities <https://web.archive.org/web/20220512003005if_/https://www.mips.com/blog/mips-response-on-speculative-execution-and-side-channel-vulnerabilities/>`_.
+
+Academic papers:
+
+.. _spec_ref11:
+
+[11] `Spectre Attacks: Exploiting Speculative Execution <https://spectreattack.com/spectre.pdf>`_.
+
+.. _spec_ref12:
+
+[12] `NetSpectre: Read Arbitrary Memory over Network <https://arxiv.org/abs/1807.10535>`_.
+
+.. _spec_ref13:
+
+[13] `Spectre Returns! Speculation Attacks using the Return Stack Buffer <https://www.usenix.org/system/files/conference/woot18/woot18-paper-koruyeh.pdf>`_.
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
new file mode 100644
index 000000000000..66af95251a3d
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/srso.rst
@@ -0,0 +1,242 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Speculative Return Stack Overflow (SRSO)
+========================================
+
+This is a mitigation for the speculative return stack overflow (SRSO)
+vulnerability found on AMD processors. The mechanism is by now the well
+known scenario of poisoning CPU functional units - the Branch Target
+Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
+tricking the elevated privilege domain (the kernel) into leaking
+sensitive data.
+
+AMD CPUs predict RET instructions using a Return Address Predictor (aka
+Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
+CALL instruction (i.e., an instruction predicted to be a CALL but is
+not actually a CALL) can create an entry in the RAP which may be used
+to predict the target of a subsequent RET instruction.
+
+The specific circumstances that lead to this varies by microarchitecture
+but the concern is that an attacker can mis-train the CPU BTB to predict
+non-architectural CALL instructions in kernel space and use this to
+control the speculative target of a subsequent kernel RET, potentially
+leading to information disclosure via a speculative side-channel.
+
+The issue is tracked under CVE-2023-20569.
+
+Affected processors
+-------------------
+
+AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
+processors have not been investigated.
+
+System information and options
+------------------------------
+
+First of all, it is required that the latest microcode be loaded for
+mitigations to be effective.
+
+The sysfs file showing SRSO mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+ The processor is not vulnerable
+
+* 'Vulnerable':
+
+ The processor is vulnerable and no mitigations have been applied.
+
+ * 'Vulnerable: No microcode':
+
+ The processor is vulnerable, no microcode extending IBPB
+ functionality to address the vulnerability has been applied.
+
+ * 'Vulnerable: Safe RET, no microcode':
+
+ The "Safe RET" mitigation (see below) has been applied to protect the
+ kernel, but the IBPB-extending microcode has not been applied. User
+ space tasks may still be vulnerable.
+
+ * 'Vulnerable: Microcode, no safe RET':
+
+ Extended IBPB functionality microcode patch has been applied. It does
+ not address User->Kernel and Guest->Host transitions protection but it
+ does address User->User and VM->VM attack vectors.
+
+ Note that User->User mitigation is controlled by how the IBPB aspect in
+ the Spectre v2 mitigation is selected:
+
+ * conditional IBPB:
+
+ where each process can select whether it needs an IBPB issued
+ around it PR_SPEC_DISABLE/_ENABLE etc, see :doc:`spectre`
+
+ * strict:
+
+ i.e., always on - by supplying spectre_v2_user=on on the kernel
+ command line
+
+ (spec_rstack_overflow=microcode)
+
+ * 'Mitigation: Safe RET':
+
+ Combined microcode/software mitigation. It complements the
+ extended IBPB microcode patch functionality by addressing
+ User->Kernel and Guest->Host transitions protection.
+
+ Selected by default or by spec_rstack_overflow=safe-ret
+
+ * 'Mitigation: IBPB':
+
+ Similar protection as "safe RET" above but employs an IBPB barrier on
+ privilege domain crossings (User->Kernel, Guest->Host).
+
+ (spec_rstack_overflow=ibpb)
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+ Mitigation addressing the cloud provider scenario - the Guest->Host
+ transitions only.
+
+ (spec_rstack_overflow=ibpb-vmexit)
+
+ * 'Mitigation: Reduced Speculation':
+
+ This mitigation gets automatically enabled when the above one "IBPB on
+ VMEXIT" has been selected and the CPU supports the BpSpecReduce bit.
+
+ It gets automatically enabled on machines which have the
+ SRSO_USER_KERNEL_NO=1 CPUID bit. In that case, the code logic is to switch
+ to the above =ibpb-vmexit mitigation because the user/kernel boundary is
+ not affected anymore and thus "safe RET" is not needed.
+
+ After enabling the IBPB on VMEXIT mitigation option, the BpSpecReduce bit
+ is detected (functionality present on all such machines) and that
+ practically overrides IBPB on VMEXIT as it has a lot less performance
+ impact and takes care of the guest->host attack vector too.
+
+In order to exploit vulnerability, an attacker needs to:
+
+ - gain local access on the machine
+
+ - break kASLR
+
+ - find gadgets in the running kernel in order to use them in the exploit
+
+ - potentially create and pin an additional workload on the sibling
+ thread, depending on the microarchitecture (not necessary on fam 0x19)
+
+ - run the exploit
+
+Considering the performance implications of each mitigation type, the
+default one is 'Mitigation: safe RET' which should take care of most
+attack vectors, including the local User->Kernel one.
+
+As always, the user is advised to keep her/his system up-to-date by
+applying software updates regularly.
+
+The default setting will be reevaluated when needed and especially when
+new attack vectors appear.
+
+As one can surmise, 'Mitigation: safe RET' does come at the cost of some
+performance depending on the workload. If one trusts her/his userspace
+and does not want to suffer the performance impact, one can always
+disable the mitigation with spec_rstack_overflow=off.
+
+Similarly, 'Mitigation: IBPB' is another full mitigation type employing
+an indirect branch prediction barrier after having applied the required
+microcode patch for one's system. This mitigation comes also at
+a performance cost.
+
+Mitigation: Safe RET
+--------------------
+
+The mitigation works by ensuring all RET instructions speculate to
+a controlled location, similar to how speculation is controlled in the
+retpoline sequence. To accomplish this, the __x86_return_thunk forces
+the CPU to mispredict every function return using a 'safe return'
+sequence.
+
+To ensure the safety of this mitigation, the kernel must ensure that the
+safe return sequence is itself free from attacker interference. In Zen3
+and Zen4, this is accomplished by creating a BTB alias between the
+untraining function srso_alias_untrain_ret() and the safe return
+function srso_alias_safe_ret() which results in evicting a potentially
+poisoned BTB entry and using that safe one for all function returns.
+
+In older Zen1 and Zen2, this is accomplished using a reinterpretation
+technique similar to Retbleed one: srso_untrain_ret() and
+srso_safe_ret().
+
+Checking the safe RET mitigation actually works
+-----------------------------------------------
+
+In case one wants to validate whether the SRSO safe RET mitigation works
+on a kernel, one could use two performance counters
+
+* PMC_0xc8 - Count of RET/RET lw retired
+* PMC_0xc9 - Count of RET/RET lw retired mispredicted
+
+and compare the number of RETs retired properly vs those retired
+mispredicted, in kernel mode. Another way of specifying those events
+is::
+
+ # perf list ex_ret_near_ret
+
+ List of pre-defined events (to be used in -e or -M):
+
+ core:
+ ex_ret_near_ret
+ [Retired Near Returns]
+ ex_ret_near_ret_mispred
+ [Retired Near Returns Mispredicted]
+
+Either the command using the event mnemonics::
+
+ # perf stat -e ex_ret_near_ret:k -e ex_ret_near_ret_mispred:k sleep 10s
+
+or using the raw PMC numbers::
+
+ # perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+should give the same amount. I.e., every RET retired should be
+mispredicted::
+
+ [root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+ Performance counter stats for 'sleep 10s':
+
+ 137,167 cpu/event=0xc8,umask=0/k
+ 137,173 cpu/event=0xc9,umask=0/k
+
+ 10.004110303 seconds time elapsed
+
+ 0.000000000 seconds user
+ 0.004462000 seconds sys
+
+vs the case when the mitigation is disabled (spec_rstack_overflow=off)
+or not functioning properly, showing usually a lot smaller number of
+mispredicted retired RETs vs the overall count of retired RETs during
+a workload::
+
+ [root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+ Performance counter stats for 'sleep 10s':
+
+ 201,627 cpu/event=0xc8,umask=0/k
+ 4,074 cpu/event=0xc9,umask=0/k
+
+ 10.003267252 seconds time elapsed
+
+ 0.002729000 seconds user
+ 0.000000000 seconds sys
+
+Also, there is a selftest which performs the above, go to
+tools/testing/selftests/x86/ and do::
+
+ make srso
+ ./srso
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
new file mode 100644
index 000000000000..444f84e22a91
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -0,0 +1,270 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+ ============== ===== ===================================================
+ CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some
+ microprocessors utilizing speculative execution may
+ allow an authenticated user to potentially enable
+ information disclosure via a side channel with
+ local access.
+ ============== ===== ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potentially leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/arch/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - 'Vulnerable'
+ - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+
+ If the processor is vulnerable but the availability of the microcode
+ based mitigation mechanism is not advertised via CPUID, the kernel
+ selects a best effort mitigation mode. This mode invokes the mitigation
+ instructions without a guarantee that they clear the CPU buffers.
+
+ This is done to address virtualization scenarios where the host has the
+ microcode update applied, but the hypervisor is not yet updated to
+ expose the CPUID to the guest. If the host has updated microcode the
+ protection takes effect; otherwise a few CPU cycles are wasted
+ pointlessly.
+ * - 'Mitigation: Clear CPU buffers'
+ - The microcode has been updated to clear the buffers. TSX is still enabled.
+ * - 'Mitigation: TSX disabled'
+ - TSX is disabled.
+ * - 'Not affected'
+ - The CPU is not affected by this issue.
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+ ============ =============================================================
+ off This option disables the TAA mitigation on affected platforms.
+ If the system has TSX enabled (see next parameter) and the CPU
+ is affected, the system is vulnerable.
+
+ full TAA mitigation is enabled. If TSX is enabled, on an affected
+ system it will clear CPU buffers on ring transitions. On
+ systems which are MDS-affected and deploy MDS mitigation,
+ TAA is also mitigated. Specifying this option on those
+ systems will have no effect.
+
+ full,nosmt The same as tsx_async_abort=full, with SMT disabled on
+ vulnerable CPUs that have TSX enabled. This is the complete
+ mitigation. When TSX is disabled, SMT is not disabled because
+ CPU is not vulnerable to cross-thread TAA attacks.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full". For
+processors that are affected by both TAA and MDS, specifying just
+"tsx_async_abort=off" without an accompanying "mds=off" will have no
+effect as the same mitigation is used for both vulnerabilities.
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+ ============ =============================================================
+ off Disables TSX on the system.
+
+ Note that this option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+ and which get the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable deactivation of
+ the TSX functionality.
+
+ on Enables TSX.
+
+ Although there are mitigations for all known security
+ vulnerabilities, TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and so there may be
+ unknown security risks associated with leaving it enabled.
+
+ auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+ on the system.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+ ========= ========================== =========================================
+ tsx=on tsx_async_abort=full The system will use VERW to clear CPU
+ buffers. Cross-thread attacks are still
+ possible on SMT machines.
+ tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT
+ mitigated.
+ tsx=on tsx_async_abort=off The system is vulnerable.
+ tsx=off tsx_async_abort=full TSX might be disabled if microcode
+ provides a TSX control MSR. If so,
+ system is not vulnerable.
+ tsx=off tsx_async_abort=full,nosmt Ditto
+ tsx=off tsx_async_abort=off ditto
+ ========= ========================== =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+ ======= ========= ============= ========================================
+ MDS_NO MD_CLEAR TSX_CTRL_MSR Status
+ ======= ========= ============= ========================================
+ 0 0 0 Vulnerable (needs microcode)
+ 0 1 0 MDS and TAA mitigated via VERW
+ 1 1 0 MDS fixed, TAA vulnerable if TSX enabled
+ because MD_CLEAR has no meaning and
+ VERW is not guaranteed to clear buffers
+ 1 X 1 MDS fixed, TAA can be mitigated by
+ VERW or TSX_CTRL_MSR
+ ======= ========= ============= ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+ - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst
new file mode 100644
index 000000000000..d9b9a2b6c114
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/vmscape.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+VMSCAPE
+=======
+
+VMSCAPE is a vulnerability that may allow a guest to influence the branch
+prediction in host userspace. It particularly affects hypervisors like QEMU.
+
+Even if a hypervisor may not have any sensitive data like disk encryption keys,
+guest-userspace may be able to attack the guest-kernel using the hypervisor as
+a confused deputy.
+
+Affected processors
+-------------------
+
+The following CPU families are affected by VMSCAPE:
+
+**Intel processors:**
+ - Skylake generation (Parts without Enhanced-IBRS)
+ - Cascade Lake generation - (Parts affected by ITS guest/host separation)
+ - Alder Lake and newer (Parts affected by BHI)
+
+Note that, BHI affected parts that use BHB clearing software mitigation e.g.
+Icelake are not vulnerable to VMSCAPE.
+
+**AMD processors:**
+ - Zen series (families 0x17, 0x19, 0x1a)
+
+** Hygon processors:**
+ - Family 0x18
+
+Mitigation
+----------
+
+Conditional IBPB
+----------------
+
+Kernel tracks when a CPU has run a potentially malicious guest and issues an
+IBPB before the first exit to userspace after VM-exit. If userspace did not run
+between VM-exit and the next VM-entry, no IBPB is issued.
+
+Note that the existing userspace mitigation against Spectre-v2 is effective in
+protecting the userspace. They are insufficient to protect the userspace VMMs
+from a malicious guest. This is because Spectre-v2 mitigations are applied at
+context switch time, while the userspace VMM can run after a VM-exit without a
+context switch.
+
+Vulnerability enumeration and mitigation is not applied inside a guest. This is
+because nested hypervisors should already be deploying IBPB to isolate
+themselves from nested guests.
+
+SMT considerations
+------------------
+
+When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be
+vulnerable to cross-thread attacks. For complete protection against VMSCAPE
+attacks in SMT environments, STIBP should be enabled.
+
+The kernel will issue a warning if SMT is enabled without adequate STIBP
+protection. Warning is not issued when:
+
+- SMT is disabled
+- STIBP is enabled system-wide
+- Intel eIBRS is enabled (which implies STIBP protection)
+
+System information and options
+------------------------------
+
+The sysfs file showing VMSCAPE mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/vmscape
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+ The processor is not vulnerable to VMSCAPE attacks.
+
+ * 'Vulnerable':
+
+ The processor is vulnerable and no mitigation has been applied.
+
+ * 'Mitigation: IBPB before exit to userspace':
+
+ Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has
+ run a potentially malicious guest and issues an IBPB before the first
+ exit to userspace after VM-exit.
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+ IBPB is issued on every VM-exit. This occurs when other mitigations like
+ RETBLEED or SRSO are already issuing IBPB on VM-exit.
+
+Mitigation control on the kernel command line
+----------------------------------------------
+
+The mitigation can be controlled via the ``vmscape=`` command line parameter:
+
+ * ``vmscape=off``:
+
+ Disable the VMSCAPE mitigation.
+
+ * ``vmscape=ibpb``:
+
+ Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y).
+
+ * ``vmscape=force``:
+
+ Force vulnerability detection and mitigation even on processors that are
+ not known to be affected.
diff --git a/Documentation/admin-guide/hw_random.rst b/Documentation/admin-guide/hw_random.rst
new file mode 100644
index 000000000000..bfc39f1cf470
--- /dev/null
+++ b/Documentation/admin-guide/hw_random.rst
@@ -0,0 +1,104 @@
+=================================
+Hardware random number generators
+=================================
+
+Introduction
+============
+
+The hw_random framework is software that makes use of a
+special hardware feature on your CPU or motherboard,
+a Random Number Generator (RNG). The software has two parts:
+a core providing the /dev/hwrng character device and its
+sysfs support, plus a hardware-specific driver that plugs
+into that core.
+
+To make the most effective use of these mechanisms, you
+should download the support software as well. Download the
+latest version of the "rng-tools" package from:
+
+ https://github.com/nhorman/rng-tools
+
+Those tools use /dev/hwrng to fill the kernel entropy pool,
+which is used internally and exported by the /dev/urandom and
+/dev/random special files.
+
+Theory of operation
+===================
+
+CHARACTER DEVICE. Using the standard open()
+and read() system calls, you can read random data from
+the hardware RNG device. This data is NOT CHECKED by any
+fitness tests, and could potentially be bogus (if the
+hardware is faulty or has been tampered with). Data is only
+output if the hardware "has-data" flag is set, but nevertheless
+a security-conscious person would run fitness tests on the
+data before assuming it is truly random.
+
+The rng-tools package uses such tests in "rngd", and lets you
+run them by hand with a "rngtest" utility.
+
+/dev/hwrng is char device major 10, minor 183.
+
+CLASS DEVICE. There is a /sys/class/misc/hw_random node with
+two unique attributes, "rng_available" and "rng_current". The
+"rng_available" attribute lists the hardware-specific drivers
+available, while "rng_current" lists the one which is currently
+connected to /dev/hwrng. If your system has more than one
+RNG available, you may change the one used by writing a name from
+the list in "rng_available" into "rng_current".
+
+==========================================================================
+
+
+Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
+ - Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
+ - Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
+
+
+About the Intel RNG hardware, from the firmware hub datasheet
+=============================================================
+
+The Firmware Hub integrates a Random Number Generator (RNG)
+using thermal noise generated from inherently random quantum
+mechanical properties of silicon. When not generating new random
+bits the RNG circuitry will enter a low power state. Intel will
+provide a binary software driver to give third party software
+access to our RNG for use as a security feature. At this time,
+the RNG is only to be used with a system in an OS-present state.
+
+Intel RNG Driver notes
+======================
+
+FIXME: support poll(2)
+
+.. note::
+
+ request_mem_region was removed, for three reasons:
+
+ 1) Only one RNG is supported by this driver;
+ 2) The location used by the RNG is a fixed location in
+ MMIO-addressable memory;
+ 3) users with properly working BIOS e820 handling will always
+ have the region in which the RNG is located reserved, so
+ request_mem_region calls always fail for proper setups.
+ However, for people who use mem=XX, BIOS e820 information is
+ **not** in /proc/iomem, and request_mem_region(RNG_ADDR) can
+ succeed.
+
+Driver details
+==============
+
+Based on:
+ Intel 82802AB/82802AC Firmware Hub (FWH) Datasheet
+ May 1999 Order Number: 290658-002 R
+
+Intel 82802 Firmware Hub:
+ Random Number Generator
+ Programmer's Reference Manual
+ December 1999 Order Number: 298029-001 R
+
+Intel 82802 Firmware HUB Random Number Generator Driver
+ Copyright (c) 2000 Matt Sottek <msottek@quiknet.com>
+
+Special thanks to Matt Sottek. I did the "guts", he
+did the "brains" and all the testing.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 965745d5fb9a..259d79fbeb94 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -1,3 +1,4 @@
+=================================================
The Linux kernel user's and administrator's guide
=================================================
@@ -6,6 +7,9 @@ added to the kernel over time. There is, as yet, little overall order or
organization here — this material was not written to be a single, coherent
document! With luck things will improve quickly over time.
+General guides to kernel administration
+---------------------------------------
+
This initial section contains overall information, including the README
file describing the kernel as a whole, documentation on kernel parameters,
etc.
@@ -14,17 +18,44 @@ etc.
:maxdepth: 1
README
- kernel-parameters
devices
-This section describes CPU vulnerabilities and provides an overview of the
-possible mitigations along with guidance for selecting mitigations if they
-are configurable at compile, boot or run time.
+ features
+
+A big part of the kernel's administrative interface is the /proc and sysfs
+virtual filesystems; these documents describe how to interact with tem
+
+.. toctree::
+ :maxdepth: 1
+
+ sysfs-rules
+ sysctl/index
+ cputopology
+ abi
+
+Security-related documentation:
.. toctree::
:maxdepth: 1
- l1tf
+ hw-vuln/index
+ LSM/index
+ perf-security
+
+Booting the kernel
+------------------
+
+.. toctree::
+ :maxdepth: 1
+
+ bootconfig
+ kernel-parameters
+ efi-stub
+ initrd
+
+
+Tracking down and identifying problems
+--------------------------------------
Here is a set of documents aimed at users who are trying to track down
problems and bugs in particular.
@@ -32,50 +63,132 @@ problems and bugs in particular.
.. toctree::
:maxdepth: 1
- reporting-bugs
- security-bugs
+ reporting-issues
+ reporting-regressions
+ quickly-build-trimmed-linux
+ verify-bugs-and-bisect-regressions
bug-hunting
bug-bisect
tainted-kernels
ramoops
dynamic-debug-howto
init
+ kdump/index
+ perf/index
+ pstore-blk
+ clearing-warn-once
+ kernel-per-CPU-kthreads
+ lockup-watchdogs
+ RAS/index
+ sysrq
-This is the beginning of a section with information of interest to
-application developers. Documents covering various aspects of the kernel
-ABI will be found here.
+
+Core-kernel subsystems
+----------------------
+
+These documents describe core-kernel administration interfaces that are
+likely to be of interest on almost any system.
.. toctree::
:maxdepth: 1
- sysfs-rules
+ cgroup-v2
+ cgroup-v1/index
+ cpu-load
+ mm/index
+ module-signing
+ namespaces/index
+ numastat
+ pm/index
+ syscall-user-dispatch
-The rest of this manual consists of various unordered guides on how to
-configure specific aspects of kernel behavior to your liking.
+Support for non-native binary formats. Note that some of these
+documents are ... old ...
.. toctree::
:maxdepth: 1
- initrd
- cgroup-v2
- serial-console
- braille-console
- parport
- md
- module-signing
- sysrq
- unicode
- vga-softcursor
binfmt-misc
- mono
java
- ras
+ mono
+
+
+Block-layer and filesystem administration
+-----------------------------------------
+
+.. toctree::
+ :maxdepth: 1
+
bcache
+ binderfs
+ blockdev/index
+ cifs/index
+ device-mapper/index
ext4
- pm/index
+ filesystem-monitoring
+ nfs/index
+ iostats
+ jfs
+ md
+ ufs
+ xfs
+
+Device-specific guides
+----------------------
+
+How to configure your hardware within your Linux system.
+
+.. toctree::
+ :maxdepth: 1
+
+ acpi/index
+ aoe/index
+ auxdisplay/index
+ braille-console
+ btmrvl
+ dell_rbu
+ edid
+ gpio/index
+ hw_random
+ laptops/index
+ lcd-panel-cgram
+ media/index
+ nvme-multipath
+ parport
+ pnp
+ rapidio
+ rtc
+ serial-console
+ svga
+ thermal/index
thunderbolt
- LSM/index
- mm/index
+ vga-softcursor
+ video-output
+
+Workload analysis
+-----------------
+
+This is the beginning of a section with information of interest to
+application developers and system integrators doing analysis of the
+Linux kernel for safety critical applications. Documents supporting
+analysis of kernel interactions with applications, and key kernel
+subsystems expectations will be found here.
+
+.. toctree::
+ :maxdepth: 1
+
+ workload-tracing
+
+Everything else
+---------------
+
+A few hard-to-categorize and generally obsolete documents.
+
+.. toctree::
+ :maxdepth: 1
+
+ ldm
+ unicode
.. only:: subproject and html
diff --git a/Documentation/admin-guide/init.rst b/Documentation/admin-guide/init.rst
index e89d97f31eaf..41f06a09152e 100644
--- a/Documentation/admin-guide/init.rst
+++ b/Documentation/admin-guide/init.rst
@@ -1,52 +1,48 @@
-Explaining the dreaded "No init found." boot hang message
+Explaining the "No working init found." boot hang message
=========================================================
+:Authors: Andreas Mohr <andi at lisas period de>
+ Cristian Souza <cristianmsbr at gmail period com>
-OK, so you've got this pretty unintuitive message (currently located
-in init/main.c) and are wondering what the H*** went wrong.
-Some high-level reasons for failure (listed roughly in order of execution)
-to load the init binary are:
-
-A) Unable to mount root FS
-B) init binary doesn't exist on rootfs
-C) broken console device
-D) binary exists but dependencies not available
-E) binary cannot be loaded
-
-Detailed explanations:
-
-A) Set "debug" kernel parameter (in bootloader config file or CONFIG_CMDLINE)
- to get more detailed kernel messages.
-B) make sure you have the correct root FS type
- (and ``root=`` kernel parameter points to the correct partition),
- required drivers such as storage hardware (such as SCSI or USB!)
- and filesystem (ext3, jffs2 etc.) are builtin (alternatively as modules,
- to be pre-loaded by an initrd)
-C) Possibly a conflict in ``console= setup`` --> initial console unavailable.
- E.g. some serial consoles are unreliable due to serial IRQ issues (e.g.
- missing interrupt-based configuration).
+This document provides some high-level reasons for failure
+(listed roughly in order of execution) to load the init binary.
+
+1) **Unable to mount root FS**: Set "debug" kernel parameter (in bootloader
+ config file or CONFIG_CMDLINE) to get more detailed kernel messages.
+
+2) **init binary doesn't exist on rootfs**: Make sure you have the correct
+ root FS type (and ``root=`` kernel parameter points to the correct
+ partition), required drivers such as storage hardware (such as SCSI or
+ USB!) and filesystem (ext3, jffs2, etc.) are builtin (alternatively as
+ modules, to be pre-loaded by an initrd).
+
+3) **Broken console device**: Possibly a conflict in ``console= setup``
+ --> initial console unavailable. E.g. some serial consoles are unreliable
+ due to serial IRQ issues (e.g. missing interrupt-based configuration).
Try using a different ``console= device`` or e.g. ``netconsole=``.
-D) e.g. required library dependencies of the init binary such as
- ``/lib/ld-linux.so.2`` missing or broken. Use
- ``readelf -d <INIT>|grep NEEDED`` to find out which libraries are required.
-E) make sure the binary's architecture matches your hardware.
- E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM hardware.
- In case you tried loading a non-binary file here (shell script?),
- you should make sure that the script specifies an interpreter in its shebang
- header line (``#!/...``) that is fully working (including its library
- dependencies). And before tackling scripts, better first test a simple
- non-script binary such as ``/bin/sh`` and confirm its successful execution.
- To find out more, add code ``to init/main.c`` to display kernel_execve()s
- return values.
+
+4) **Binary exists but dependencies not available**: E.g. required library
+ dependencies of the init binary such as ``/lib/ld-linux.so.2`` missing or
+ broken. Use ``readelf -d <INIT>|grep NEEDED`` to find out which libraries
+ are required.
+
+5) **Binary cannot be loaded**: Make sure the binary's architecture matches
+ your hardware. E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM
+ hardware. In case you tried loading a non-binary file here (shell script?),
+ you should make sure that the script specifies an interpreter in its
+ shebang header line (``#!/...``) that is fully working (including its
+ library dependencies). And before tackling scripts, better first test a
+ simple non-script binary such as ``/bin/sh`` and confirm its successful
+ execution. To find out more, add code ``to init/main.c`` to display
+ kernel_execve()s return values.
Please extend this explanation whenever you find new failure causes
(after all loading the init binary is a CRITICAL and hard transition step
-which needs to be made as painless as possible), then submit patch to LKML.
+which needs to be made as painless as possible), then submit a patch to LKML.
Further TODOs:
- Implement the various ``run_init_process()`` invocations via a struct array
which can then store the ``kernel_execve()`` result value and on failure
log it all by iterating over **all** results (very important usability fix).
-- try to make the implementation itself more helpful in general,
- e.g. by providing additional error messages at affected places.
+- Try to make the implementation itself more helpful in general, e.g. by
+ providing additional error messages at affected places.
-Andreas Mohr <andi at lisas period de>
diff --git a/Documentation/admin-guide/initrd.rst b/Documentation/admin-guide/initrd.rst
index a03dabaaf3a3..67bbad8806e8 100644
--- a/Documentation/admin-guide/initrd.rst
+++ b/Documentation/admin-guide/initrd.rst
@@ -376,7 +376,7 @@ Resources
---------
.. [#f1] Almesberger, Werner; "Booting Linux: The History and the Future"
- http://www.almesberger.net/cv/papers/ols2k-9.ps.gz
+ https://www.almesberger.net/cv/papers/ols2k-9.ps.gz
.. [#f2] newlib package (experimental), with initrd example
https://www.sourceware.org/newlib/
.. [#f3] util-linux: Miscellaneous utilities for Linux
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
new file mode 100644
index 000000000000..9453196ade51
--- /dev/null
+++ b/Documentation/admin-guide/iostats.rst
@@ -0,0 +1,187 @@
+=====================
+I/O statistics fields
+=====================
+
+The kernel exposes disk statistics via ``/proc/diskstats`` and
+``/sys/block/<device>/stat``. These stats are usually accessed via tools
+such as ``sar`` and ``iostat``.
+
+Here are examples using a disk with two partitions::
+
+ /proc/diskstats:
+ 259 0 nvme0n1 255999 814 12369153 47919 996852 81 36123024 425995 0 301795 580470 0 0 0 0 60602 106555
+ 259 1 nvme0n1p1 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0
+ 259 2 nvme0n1p2 255401 1 12343477 47799 996004 0 36014736 425784 0 344336 473584 0 0 0 0 0 0
+
+ /sys/block/nvme0n1/stat:
+ 255999 814 12369153 47919 996858 81 36123056 426009 0 301809 580491 0 0 0 0 60605 106562
+
+ /sys/block/nvme0n1/nvme0n1p1/stat:
+ 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0
+
+Both files contain the same 17 statistics. ``/sys/block/<device>/stat``
+contains the fields for ``<device>``. In ``/proc/diskstats`` the fields
+are prefixed with the major and minor device numbers and the device
+name. In the example above, the first stat value for ``nvme0n1`` is
+255999 in both files.
+
+The sysfs ``stat`` file is efficient for monitoring a small, known set
+of disks. If you're tracking a large number of devices,
+``/proc/diskstats`` is often the better choice since it avoids the
+overhead of opening and closing multiple files for each snapshot.
+
+All fields are cumulative, monotonic counters, except for field 9, which
+resets to zero as I/Os complete. The remaining fields reset at boot, on
+device reattachment or reinitialization, or when the underlying counter
+overflows. Applications reading these counters should detect and handle
+resets when comparing stat snapshots.
+
+Each set of stats only applies to the indicated device; if you want
+system-wide stats you'll have to find all the devices and sum them all up.
+
+Field 1 -- # of reads completed (unsigned long)
+ This is the total number of reads completed successfully.
+
+Field 2 -- # of reads merged, field 6 -- # of writes merged (unsigned long)
+ Reads and writes which are adjacent to each other may be merged for
+ efficiency. Thus two 4K reads may become one 8K read before it is
+ ultimately handed to the disk, and so it will be counted (and queued)
+ as only one I/O. This field lets you know how often this was done.
+
+Field 3 -- # of sectors read (unsigned long)
+ This is the total number of sectors read successfully.
+
+Field 4 -- # of milliseconds spent reading (unsigned int)
+ This is the total number of milliseconds spent by all reads (as
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
+
+Field 5 -- # of writes completed (unsigned long)
+ This is the total number of writes completed successfully.
+
+Field 6 -- # of writes merged (unsigned long)
+ See the description of field 2.
+
+Field 7 -- # of sectors written (unsigned long)
+ This is the total number of sectors written successfully.
+
+Field 8 -- # of milliseconds spent writing (unsigned int)
+ This is the total number of milliseconds spent by all writes (as
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
+
+Field 9 -- # of I/Os currently in progress (unsigned int)
+ The only field that should go to zero. Incremented as requests are
+ given to appropriate struct request_queue and decremented as they finish.
+
+Field 10 -- # of milliseconds spent doing I/Os (unsigned int)
+ This field increases so long as field 9 is nonzero.
+
+ Since 5.0 this field counts jiffies when at least one request was
+ started or completed. If request runs more than 2 jiffies then some
+ I/O time might be not accounted in case of concurrent requests.
+
+Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
+ This field is incremented at each I/O start, I/O completion, I/O
+ merge, or read of these stats by the number of I/Os in progress
+ (field 9) times the number of milliseconds spent doing I/O since the
+ last update of this field. This can provide an easy measure of both
+ I/O completion time and the backlog that may be accumulating.
+
+Field 12 -- # of discards completed (unsigned long)
+ This is the total number of discards completed successfully.
+
+Field 13 -- # of discards merged (unsigned long)
+ See the description of field 2
+
+Field 14 -- # of sectors discarded (unsigned long)
+ This is the total number of sectors discarded successfully.
+
+Field 15 -- # of milliseconds spent discarding (unsigned int)
+ This is the total number of milliseconds spent by all discards (as
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
+
+Field 16 -- # of flush requests completed
+ This is the total number of flush requests completed successfully.
+
+ Block layer combines flush requests and executes at most one at a time.
+ This counts flush requests executed by disk. Not tracked for partitions.
+
+Field 17 -- # of milliseconds spent flushing
+ This is the total number of milliseconds spent by all flush requests.
+
+To avoid introducing performance bottlenecks, no locks are held while
+modifying these counters. This implies that minor inaccuracies may be
+introduced when changes collide, so (for instance) adding up all the
+read I/Os issued per partition should equal those made to the disks ...
+but due to the lack of locking it may only be very close.
+
+In 2.6+, there are counters for each CPU, which make the lack of locking
+almost a non-issue. When the statistics are read, the per-CPU counters
+are summed (possibly overflowing the unsigned long variable they are
+summed to) and the result given to the user. There is no convenient
+user interface for accessing the per-CPU counters themselves.
+
+Since 4.19 request times are measured with nanoseconds precision and
+truncated to milliseconds before showing in this interface.
+
+Disks vs Partitions
+-------------------
+
+There were significant changes between 2.4 and 2.6+ in the I/O subsystem.
+As a result, some statistic information disappeared. The translation from
+a disk address relative to a partition to the disk address relative to
+the host disk happens much earlier. All merges and timings now happen
+at the disk level rather than at both the disk and partition level as
+in 2.4. Consequently, you'll see a different statistics output on 2.6+ for
+partitions from that for disks. There are only *four* fields available
+for partitions on 2.6+ machines. This is reflected in the examples above.
+
+Field 1 -- # of reads issued
+ This is the total number of reads issued to this partition.
+
+Field 2 -- # of sectors read
+ This is the total number of sectors requested to be read from this
+ partition.
+
+Field 3 -- # of writes issued
+ This is the total number of writes issued to this partition.
+
+Field 4 -- # of sectors written
+ This is the total number of sectors requested to be written to
+ this partition.
+
+Note that since the address is translated to a disk-relative one, and no
+record of the partition-relative address is kept, the subsequent success
+or failure of the read cannot be attributed to the partition. In other
+words, the number of reads for partitions is counted slightly before time
+of queuing for partitions, and at completion for whole disks. This is
+a subtle distinction that is probably uninteresting for most cases.
+
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) inaccuracy.
+
+Additional notes
+----------------
+
+In 2.6+, sysfs is not mounted by default. If your distribution of
+Linux hasn't added it already, here's the line you'll want to add to
+your ``/etc/fstab``::
+
+ none /sys sysfs defaults 0 0
+
+
+In 2.6+, all disk statistics were removed from ``/proc/stat``. In 2.4, they
+appear in both ``/proc/partitions`` and ``/proc/stat``, although the ones in
+``/proc/stat`` take a very different format from those in ``/proc/partitions``
+(see proc(5), if your system has it.)
+
+-- ricklind@us.ibm.com
diff --git a/Documentation/admin-guide/jfs.rst b/Documentation/admin-guide/jfs.rst
new file mode 100644
index 000000000000..9e12d936bc90
--- /dev/null
+++ b/Documentation/admin-guide/jfs.rst
@@ -0,0 +1,66 @@
+===========================================
+IBM's Journaled File System (JFS) for Linux
+===========================================
+
+JFS Homepage: http://jfs.sourceforge.net/
+
+The following mount options are supported:
+
+(*) == default
+
+iocharset=name
+ Character set to use for converting from Unicode to
+ ASCII. The default is to do no conversion. Use
+ iocharset=utf8 for UTF-8 translations. This requires
+ CONFIG_NLS_UTF8 to be set in the kernel .config file.
+ iocharset=none specifies the default behavior explicitly.
+
+resize=value
+ Resize the volume to <value> blocks. JFS only supports
+ growing a volume, not shrinking it. This option is only
+ valid during a remount, when the volume is mounted
+ read-write. The resize keyword with no value will grow
+ the volume to the full size of the partition.
+
+nointegrity
+ Do not write to the journal. The primary use of this option
+ is to allow for higher performance when restoring a volume
+ from backup media. The integrity of the volume is not
+ guaranteed if the system abnormally abends.
+
+integrity(*)
+ Commit metadata changes to the journal. Use this option to
+ remount a volume where the nointegrity option was
+ previously specified in order to restore normal behavior.
+
+errors=continue
+ Keep going on a filesystem error.
+errors=remount-ro(*)
+ Remount the filesystem read-only on an error.
+errors=panic
+ Panic and halt the machine if an error occurs.
+
+uid=value
+ Override on-disk uid with specified value
+gid=value
+ Override on-disk gid with specified value
+umask=value
+ Override on-disk umask with specified octal value. For
+ directories, the execute bit will be set if the corresponding
+ read bit is set.
+
+discard=minlen, discard/nodiscard(*)
+ This enables/disables the use of discard/TRIM commands.
+ The discard/TRIM commands are sent to the underlying
+ block device when blocks are freed. This is useful for SSD
+ devices and sparse/thinly-provisioned LUNs. The FITRIM ioctl
+ command is also available together with the nodiscard option.
+ The value of minlen specifies the minimum blockcount, when
+ a TRIM command to the block device is considered useful.
+ When no value is given to the discard option, it defaults to
+ 64 blocks, which means 256KiB in JFS.
+ The minlen value of discard overrides the minlen value given
+ on an FITRIM ioctl().
+
+The JFS mailing list can be subscribed to by using the link labeled
+"Mail list Subscribe" at our web page http://jfs.sourceforge.net/
diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
new file mode 100644
index 000000000000..030de95e3e6b
--- /dev/null
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
@@ -0,0 +1,323 @@
+#
+# This file contains a few gdb macros (user defined commands) to extract
+# useful information from kernel crashdump (kdump) like stack traces of
+# all the processes or a particular process and trapinfo.
+#
+# These macros can be used by copying this file in .gdbinit (put in home
+# directory or current directory) or by invoking gdb command with
+# --command=<command-file-name> option
+#
+# Credits:
+# Alexander Nyberg <alexn@telia.com>
+# V Srivatsa <vatsa@in.ibm.com>
+# Maneesh Soni <maneesh@in.ibm.com>
+#
+
+define bttnobp
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ set var $stacksize = sizeof(union thread_union)
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.sp
+ set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
+
+ while ($stackp < $stack_top)
+ if (*($stackp) > _stext && *($stackp) < _sinittext)
+ info symbol *($stackp)
+ end
+ set $stackp += 4
+ end
+ set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.sp
+ set var $stack_top = ($stackp & ~($stacksize - 1)) + stacksize
+
+ while ($stackp < $stack_top)
+ if (*($stackp) > _stext && *($stackp) < _sinittext)
+ info symbol *($stackp)
+ end
+ set $stackp += 4
+ end
+ set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+end
+document bttnobp
+ dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER
+end
+
+define btthreadstack
+ set var $pid_task = $arg0
+
+ printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm
+ printf "task struct: "
+ print $pid_task
+ printf "===================\n"
+ set var $stackp = $pid_task.thread.sp
+ set var $stacksize = sizeof(union thread_union)
+ set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize
+ set var $stack_bot = ($stackp & ~($stacksize - 1))
+
+ set $stackp = *((unsigned long *) $stackp)
+ while (($stackp < $stack_top) && ($stackp > $stack_bot))
+ set var $addr = *(((unsigned long *) $stackp) + 1)
+ info symbol $addr
+ set $stackp = *((unsigned long *) $stackp)
+ end
+end
+document btthreadstack
+ dump a thread stack using the given task structure pointer
+end
+
+
+define btt
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+ btthreadstack $next_t
+
+ set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ btthreadstack $next_th
+ set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+end
+document btt
+ dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER
+end
+
+define btpid
+ set var $pid = $arg0
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ set var $pid_task = 0
+
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+
+ if ($next_t.pid == $pid)
+ set $pid_task = $next_t
+ end
+
+ set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ if ($next_th.pid == $pid)
+ set $pid_task = $next_th
+ end
+ set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+
+ btthreadstack $pid_task
+end
+document btpid
+ backtrace of pid
+end
+
+
+define trapinfo
+ set var $pid = $arg0
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ set var $pid_task = 0
+
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+
+ if ($next_t.pid == $pid)
+ set $pid_task = $next_t
+ end
+
+ set $next_th=(((char *)$next_t->thread_group.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ if ($next_th.pid == $pid)
+ set $pid_task = $next_th
+ end
+ set $next_th=(((char *)$next_th->thread_group.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+
+ printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \
+ $pid_task.thread.cr2, $pid_task.thread.error_code
+
+end
+document trapinfo
+ Run info threads and lookup pid of thread #1
+ 'trapinfo <pid>' will tell you by which trap & possibly
+ address the kernel panicked.
+end
+
+define dump_record
+ set var $desc = $arg0
+ set var $info = $arg1
+ if ($argc > 2)
+ set var $prev_flags = $arg2
+ else
+ set var $prev_flags = 0
+ end
+
+ set var $prefix = 1
+ set var $newline = 1
+
+ set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
+ set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)
+
+ # handle data-less record
+ if ($begin & 1)
+ set var $text_len = 0
+ set var $log = ""
+ else
+ # handle wrapping data block
+ if ($begin > $next)
+ set var $begin = 0
+ end
+
+ # skip over descriptor id
+ set var $begin = $begin + sizeof(long)
+
+ # handle truncated message
+ if ($next - $begin < $info->text_len)
+ set var $text_len = $next - $begin
+ else
+ set var $text_len = $info->text_len
+ end
+
+ set var $log = &prb->text_data_ring.data[$begin]
+ end
+
+ # prev & LOG_CONT && !(info->flags & LOG_PREIX)
+ if (($prev_flags & 8) && !($info->flags & 4))
+ set var $prefix = 0
+ end
+
+ # info->flags & LOG_CONT
+ if ($info->flags & 8)
+ # (prev & LOG_CONT && !(prev & LOG_NEWLINE))
+ if (($prev_flags & 8) && !($prev_flags & 2))
+ set var $prefix = 0
+ end
+ # (!(info->flags & LOG_NEWLINE))
+ if (!($info->flags & 2))
+ set var $newline = 0
+ end
+ end
+
+ if ($prefix)
+ printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
+ end
+ if ($text_len)
+ eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
+ end
+ if ($newline)
+ printf "\n"
+ end
+
+ # handle dictionary data
+
+ set var $dict = &$info->dev_info.subsystem[0]
+ set var $dict_len = sizeof($info->dev_info.subsystem)
+ if ($dict[0] != '\0')
+ printf " SUBSYSTEM="
+ set var $idx = 0
+ while ($idx < $dict_len)
+ set var $c = $dict[$idx]
+ if ($c == '\0')
+ loop_break
+ else
+ if ($c < ' ' || $c >= 127 || $c == '\\')
+ printf "\\x%02x", $c
+ else
+ printf "%c", $c
+ end
+ end
+ set var $idx = $idx + 1
+ end
+ printf "\n"
+ end
+
+ set var $dict = &$info->dev_info.device[0]
+ set var $dict_len = sizeof($info->dev_info.device)
+ if ($dict[0] != '\0')
+ printf " DEVICE="
+ set var $idx = 0
+ while ($idx < $dict_len)
+ set var $c = $dict[$idx]
+ if ($c == '\0')
+ loop_break
+ else
+ if ($c < ' ' || $c >= 127 || $c == '\\')
+ printf "\\x%02x", $c
+ else
+ printf "%c", $c
+ end
+ end
+ set var $idx = $idx + 1
+ end
+ printf "\n"
+ end
+end
+document dump_record
+ Dump a single record. The first parameter is the descriptor,
+ the second parameter is the info, the third parameter is
+ optional and specifies the previous record's flags, used for
+ properly formatting continued lines.
+end
+
+define dmesg
+ # definitions from kernel/printk/printk_ringbuffer.h
+ set var $desc_committed = 1
+ set var $desc_finalized = 2
+ set var $desc_sv_bits = sizeof(long) * 8
+ set var $desc_flags_shift = $desc_sv_bits - 2
+ set var $desc_flags_mask = 3 << $desc_flags_shift
+ set var $id_mask = ~$desc_flags_mask
+
+ set var $desc_count = 1U << prb->desc_ring.count_bits
+ set var $prev_flags = 0
+
+ set var $id = prb->desc_ring.tail_id.counter
+ set var $end_id = prb->desc_ring.head_id.counter
+
+ while (1)
+ set var $desc = &prb->desc_ring.descs[$id % $desc_count]
+ set var $info = &prb->desc_ring.infos[$id % $desc_count]
+
+ # skip non-committed record
+ set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
+ if ($state == $desc_committed || $state == $desc_finalized)
+ dump_record $desc $info $prev_flags
+ set var $prev_flags = $info->flags
+ end
+
+ if ($id == $end_id)
+ loop_break
+ end
+ set var $id = ($id + 1) & $id_mask
+ end
+end
+document dmesg
+ print the kernel ring buffer
+end
diff --git a/Documentation/admin-guide/kdump/index.rst b/Documentation/admin-guide/kdump/index.rst
new file mode 100644
index 000000000000..8e2ebd0383cd
--- /dev/null
+++ b/Documentation/admin-guide/kdump/index.rst
@@ -0,0 +1,20 @@
+
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+.. toctree::
+ :maxdepth: 1
+
+ kdump
+ vmcoreinfo
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
new file mode 100644
index 000000000000..7b011eb116a7
--- /dev/null
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -0,0 +1,612 @@
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup, installation, and analysis
+information.
+
+Overview
+========
+
+Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
+dump of the system kernel's memory needs to be taken (for example, when
+the system panics). The system kernel's memory image is preserved across
+the reboot and is accessible to the dump-capture kernel.
+
+You can use common commands, such as cp, scp or makedumpfile to copy
+the memory image to a dump file on the local disk, or across the network
+to a remote system.
+
+Kdump and kexec are currently supported on the x86, x86_64, ppc64,
+s390x, arm and arm64 architectures.
+
+When the system kernel boots, it reserves a small section of memory for
+the dump-capture kernel. This ensures that ongoing Direct Memory Access
+(DMA) from the system kernel does not corrupt the dump-capture kernel.
+The kexec -p command loads the dump-capture kernel into this reserved
+memory.
+
+On x86 machines, the first 640 KB of physical memory is needed for boot,
+regardless of where the kernel loads. For simpler handling, the whole
+low 1M is reserved to avoid any later kernel or device driver writing
+data into this area. Like this, the low 1M can be reused as system RAM
+by kdump kernel without extra handling.
+
+On PPC64 machines first 32KB of physical memory is needed for booting
+regardless of where the kernel is loaded and to support 64K page size
+kexec backs up the first 64KB memory.
+
+For s390x, when kdump is triggered, the crashkernel region is exchanged
+with the region [0, crashkernel region size] and then the kdump kernel
+runs in [0, crashkernel region size]. Therefore no relocatable kernel is
+needed for s390x.
+
+All of the necessary information about the system kernel's core image is
+encoded in the ELF format, and stored in a reserved area of memory
+before a crash. The physical address of the start of the ELF header is
+passed to the dump-capture kernel through the elfcorehdr= boot
+parameter. Optionally the size of the ELF header can also be passed
+when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
+
+With the dump-capture kernel, you can access the memory image through
+/proc/vmcore. This exports the dump as an ELF-format file that you can
+write out using file copy commands such as cp or scp. You can also use
+makedumpfile utility to analyze and write out filtered contents with
+options, e.g with '-d 31' it will only write out kernel data. Further,
+you can use analysis tools such as the GNU Debugger (GDB) and the Crash
+tool to debug the dump file. This method ensures that the dump pages are
+correctly ordered.
+
+Setup and Installation
+======================
+
+Install kexec-tools
+-------------------
+
+1) Login as the root user.
+
+2) Download the kexec-tools user-space package from the following URL:
+
+http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
+
+This is a symlink to the latest version.
+
+The latest kexec-tools git tree is available at:
+
+- git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+- http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+
+There is also a gitweb interface available at
+http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
+
+More information about kexec-tools can be found at
+http://horms.net/projects/kexec/
+
+3) Unpack the tarball with the tar command, as follows::
+
+ tar xvpzf kexec-tools.tar.gz
+
+4) Change to the kexec-tools directory, as follows::
+
+ cd kexec-tools-VERSION
+
+5) Configure the package, as follows::
+
+ ./configure
+
+6) Compile the package, as follows::
+
+ make
+
+7) Install the package, as follows::
+
+ make install
+
+
+Build the system and dump-capture kernels
+-----------------------------------------
+There are two possible methods of using Kdump.
+
+1) Build a separate custom dump-capture kernel for capturing the
+ kernel core dump.
+
+2) Or use the system kernel binary itself as dump-capture kernel and there is
+ no need to build a separate dump-capture kernel. This is possible
+ only with the architectures which support a relocatable kernel. As
+ of today, i386, x86_64, ppc64, arm and arm64 architectures support
+ relocatable kernel.
+
+Building a relocatable kernel is advantageous from the point of view that
+one does not have to build a second kernel for capturing the dump. But
+at the same time one might want to build a custom dump capture kernel
+suitable to his needs.
+
+Following are the configuration setting required for system and
+dump-capture kernels for enabling kdump support.
+
+System kernel config options
+----------------------------
+
+1) Enable "kexec system call" or "kexec file based system call" in
+ "Processor type and features."::
+
+ CONFIG_KEXEC=y or CONFIG_KEXEC_FILE=y
+
+ And both of them will select KEXEC_CORE::
+
+ CONFIG_KEXEC_CORE=y
+
+2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
+ filesystems." This is usually enabled by default::
+
+ CONFIG_SYSFS=y
+
+ Note that "sysfs file system support" might not appear in the "Pseudo
+ filesystems" menu if "Configure standard kernel features (expert users)"
+ is not enabled in "General Setup." In this case, check the .config file
+ itself to ensure that sysfs is turned on, as follows::
+
+ grep 'CONFIG_SYSFS' .config
+
+3) Enable "Compile the kernel with debug info" in "Kernel hacking."::
+
+ CONFIG_DEBUG_INFO=Y
+
+ This causes the kernel to be built with debug symbols. The dump
+ analysis tools require a vmlinux with debug symbols in order to read
+ and analyze a dump file.
+
+Dump-capture kernel config options (Arch Independent)
+-----------------------------------------------------
+
+1) Enable "kernel crash dumps" support under "Processor type and
+ features"::
+
+ CONFIG_CRASH_DUMP=y
+
+ And this will select VMCORE_INFO and CRASH_RESERVE::
+ CONFIG_VMCORE_INFO=y
+ CONFIG_CRASH_RESERVE=y
+
+2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
+
+ CONFIG_PROC_VMCORE=y
+
+ (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
+
+Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
+--------------------------------------------------------------------
+
+1) On i386, enable high memory support under "Processor type and
+ features"::
+
+ CONFIG_HIGHMEM4G
+
+2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel
+ command line when loading the dump-capture kernel because one
+ CPU is enough for kdump kernel to dump vmcore on most of systems.
+
+ However, you can also specify nr_cpus=X to enable multiple processors
+ in kdump kernel.
+
+ With CONFIG_SMP=n, the above things are not related.
+
+3) A relocatable kernel is suggested to be built by default. If not yet,
+ enable "Build a relocatable kernel" support under "Processor type and
+ features"::
+
+ CONFIG_RELOCATABLE=y
+
+4) Use a suitable value for "Physical address where the kernel is
+ loaded" (under "Processor type and features"). This only appears when
+ "kernel crash dumps" is enabled. A suitable value depends upon
+ whether kernel is relocatable or not.
+
+ If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
+ This will compile the kernel for physical address 1MB, but given the fact
+ kernel is relocatable, it can be run from any physical address hence
+ kexec boot loader will load it in memory region reserved for dump-capture
+ kernel.
+
+ Otherwise it should be the start of memory region reserved for
+ second kernel using boot parameter "crashkernel=Y@X". Here X is
+ start of memory region reserved for dump-capture kernel.
+ Generally X is 16MB (0x1000000). So you can set
+ CONFIG_PHYSICAL_START=0x1000000
+
+5) Make and install the kernel and its modules. DO NOT add this kernel
+ to the boot loader configuration files.
+
+Dump-capture kernel config options (Arch Dependent, ppc64)
+----------------------------------------------------------
+
+1) Enable "Build a kdump crash kernel" support under "Kernel" options::
+
+ CONFIG_CRASH_DUMP=y
+
+2) Enable "Build a relocatable kernel" support::
+
+ CONFIG_RELOCATABLE=y
+
+ Make and install the kernel and its modules.
+
+Dump-capture kernel config options (Arch Dependent, arm)
+----------------------------------------------------------
+
+- To use a relocatable kernel,
+ Enable "AUTO_ZRELADDR" support under "Boot" options::
+
+ AUTO_ZRELADDR=y
+
+Dump-capture kernel config options (Arch Dependent, arm64)
+----------------------------------------------------------
+
+- Please note that kvm of the dump-capture kernel will not be enabled
+ on non-VHE systems even if it is configured. This is because the CPU
+ will not be reset to EL2 on panic.
+
+crashkernel syntax
+===========================
+1) crashkernel=size@offset
+
+ Here 'size' specifies how much memory to reserve for the dump-capture kernel
+ and 'offset' specifies the beginning of this reserved memory. For example,
+ "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
+ starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
+
+ The crashkernel region can be automatically placed by the system
+ kernel at run time. This is done by specifying the base address as 0,
+ or omitting it all together::
+
+ crashkernel=256M@0
+
+ or::
+
+ crashkernel=256M
+
+ If the start address is specified, note that the start address of the
+ kernel will be aligned to a value (which is Arch dependent), so if the
+ start address is not then any space below the alignment point will be
+ wasted.
+
+2) range1:size1[,range2:size2,...][@offset]
+
+ While the "crashkernel=size[@offset]" syntax is sufficient for most
+ configurations, sometimes it's handy to have the reserved memory dependent
+ on the value of System RAM -- that's mostly for distributors that pre-setup
+ the kernel command line to avoid a unbootable system after some memory has
+ been removed from the machine.
+
+ The syntax is::
+
+ crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+ range=start-[end]
+
+ For example::
+
+ crashkernel=512M-2G:64M,2G-:128M
+
+ This would mean:
+
+ 1) if the RAM is smaller than 512M, then don't reserve anything
+ (this is the "rescue" case)
+ 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+ 3) if the RAM size is larger than 2G, then reserve 128M
+
+3) crashkernel=size,high and crashkernel=size,low
+
+ If memory above 4G is preferred, crashkernel=size,high can be used to
+ fulfill that. With it, physical memory is allowed to be allocated from top,
+ so could be above 4G if system has more than 4G RAM installed. Otherwise,
+ memory region will be allocated below 4G if available.
+
+ When crashkernel=X,high is passed, kernel could allocate physical memory
+ region above 4G, low memory under 4G is needed in this case. There are
+ three ways to get low memory:
+
+ 1) Kernel will allocate at least 256M memory below 4G automatically
+ if crashkernel=Y,low is not specified.
+ 2) Let user specify low memory size instead.
+ 3) Specified value 0 will disable low memory allocation::
+
+ crashkernel=0,low
+
+4) crashkernel=size,cma
+
+ Reserve additional crash kernel memory from CMA. This reservation is
+ usable by the first system's userspace memory and kernel movable
+ allocations (memory balloon, zswap). Pages allocated from this memory
+ range will not be included in the vmcore so this should not be used if
+ dumping of userspace memory is intended and it has to be expected that
+ some movable kernel pages may be missing from the dump.
+
+ A standard crashkernel reservation, as described above, is still needed
+ to hold the crash kernel and initrd.
+
+ This option increases the risk of a kdump failure: DMA transfers
+ configured by the first kernel may end up corrupting the second
+ kernel's memory.
+
+ This reservation method is intended for systems that can't afford to
+ sacrifice enough memory for standard crashkernel reservation and where
+ less reliable and possibly incomplete kdump is preferable to no kdump at
+ all.
+
+Boot into System Kernel
+-----------------------
+1) Update the boot loader (such as grub, yaboot, or lilo) configuration
+ files as necessary.
+
+2) Boot the system kernel with the boot parameter "crashkernel=Y@X".
+
+ On x86 and x86_64, use "crashkernel=Y[@X]". Most of the time, the
+ start address 'X' is not necessary, kernel will search a suitable
+ area. Unless an explicit start address is expected.
+
+ On ppc64, use "crashkernel=128M@32M".
+
+ On s390x, typically use "crashkernel=xxM". The value of xx is dependent
+ on the memory consumption of the kdump system. In general this is not
+ dependent on the memory size of the production system.
+
+ On arm, the use of "crashkernel=Y@X" is no longer necessary; the
+ kernel will automatically locate the crash kernel image within the
+ first 512MB of RAM if X is not given.
+
+ On arm64, use "crashkernel=Y[@X]". Note that the start address of
+ the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
+
+Load the Dump-capture Kernel
+============================
+
+After booting to the system kernel, dump-capture kernel needs to be
+loaded.
+
+Based on the architecture and type of image (relocatable or not), one
+can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
+of dump-capture kernel. Following is the summary.
+
+For i386 and x86_64:
+
+ - Use bzImage/vmlinuz if kernel is relocatable.
+ - Use vmlinux if kernel is not relocatable.
+
+For ppc64:
+
+ - Use vmlinux
+
+For s390x:
+
+ - Use image or bzImage
+
+For arm:
+
+ - Use zImage
+
+For arm64:
+
+ - Use vmlinux or Image
+
+If you are using an uncompressed vmlinux image then use following command
+to load dump-capture kernel::
+
+ kexec -p <dump-capture-kernel-vmlinux-image> \
+ --initrd=<initrd-for-dump-capture-kernel> --args-linux \
+ --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed bzImage/vmlinuz, then use following command
+to load dump-capture kernel::
+
+ kexec -p <dump-capture-kernel-bzImage> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed zImage, then use following command
+to load dump-capture kernel::
+
+ kexec --type zImage -p <dump-capture-kernel-bzImage> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --dtb=<dtb-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
+
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel::
+
+ kexec -p <dump-capture-kernel-Image> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
+
+Following are the arch specific command line options to be used while
+loading dump-capture kernel.
+
+For i386 and x86_64:
+
+ "1 irqpoll nr_cpus=1 reset_devices"
+
+For ppc64:
+
+ "1 maxcpus=1 noirqdistrib reset_devices"
+
+For s390x:
+
+ "1 nr_cpus=1 cgroup_disable=memory"
+
+For arm:
+
+ "1 maxcpus=1 reset_devices"
+
+For arm64:
+
+ "1 nr_cpus=1 reset_devices"
+
+Notes on loading the dump-capture kernel:
+
+* By default, the ELF headers are stored in ELF64 format to support
+ systems with more than 4GB memory. On i386, kexec automatically checks if
+ the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
+ So, on non-PAE systems, ELF32 is always used.
+
+ The --elf32-core-headers option can be used to force the generation of ELF32
+ headers. This is necessary because GDB currently cannot open vmcore files
+ with ELF64 headers on 32-bit systems.
+
+* The "irqpoll" boot parameter reduces driver initialization failures
+ due to shared interrupts in the dump-capture kernel.
+
+* You must specify <root-dev> in the format corresponding to the root
+ device name in the output of mount command.
+
+* Boot parameter "1" boots the dump-capture kernel into single-user
+ mode without networking. If you want networking, use "3".
+
+* We generally don't have to bring up a SMP kernel just to capture the
+ dump. Hence generally it is useful either to build a UP dump-capture
+ kernel or specify maxcpus=1 option while loading dump-capture kernel.
+ Note, though maxcpus always works, you had better replace it with
+ nr_cpus to save memory if supported by the current ARCH, such as x86.
+
+* You should enable multi-cpu support in dump-capture kernel if you intend
+ to use multi-thread programs with it, such as parallel dump feature of
+ makedumpfile. Otherwise, the multi-thread program may have a great
+ performance degradation. To enable multi-cpu support, you should bring up an
+ SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it.
+
+* For s390x there are two kdump modes: If an ELF header is specified with
+ the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
+ is done on all other architectures. If no elfcorehdr= kernel parameter is
+ specified, the s390x kdump kernel dynamically creates the header. The
+ second mode has the advantage that for CPU and memory hotplug, kdump has
+ not to be reloaded with kexec_load().
+
+* For s390x systems with many attached devices the "cio_ignore" kernel
+ parameter should be used for the kdump kernel in order to prevent allocation
+ of kernel memory for devices that are not relevant for kdump. The same
+ applies to systems that use SCSI/FCP devices. In that case the
+ "allow_lun_scan" zfcp module parameter should be set to zero before
+ setting FCP devices online.
+
+Kernel Panic
+============
+
+After successfully loading the dump-capture kernel as previously
+described, the system will reboot into the dump-capture kernel if a
+system crash is triggered. Trigger points are located in panic(),
+die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
+
+The following conditions will execute a crash trigger point:
+
+If a hard lockup is detected and "NMI watchdog" is configured, the system
+will boot into the dump-capture kernel ( die_nmi() ).
+
+If die() is called, and it happens to be a thread with pid 0 or 1, or die()
+is called inside interrupt context or die() is called and panic_on_oops is set,
+the system will boot into the dump-capture kernel.
+
+On powerpc systems when a soft-reset is generated, die() is called by all cpus
+and the system will boot into the dump-capture kernel.
+
+For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
+"echo c > /proc/sysrq-trigger" or write a module to force the panic.
+
+Write Out the Dump File
+=======================
+
+After the dump-capture kernel is booted, write out the dump file with
+the following command::
+
+ cp /proc/vmcore <dump-file>
+
+or use scp to write out the dump file between hosts on a network, e.g::
+
+ scp /proc/vmcore remote_username@remote_ip:<dump-file>
+
+You can also use makedumpfile utility to write out the dump file
+with specified options to filter out unwanted contents, e.g::
+
+ makedumpfile -l --message-level 1 -d 31 /proc/vmcore <dump-file>
+
+Analysis
+========
+
+Before analyzing the dump image, you should reboot into a stable kernel.
+
+You can do limited analysis using GDB on the dump file copied out of
+/proc/vmcore. Use the debug vmlinux built with -g and run the following
+command::
+
+ gdb vmlinux <dump-file>
+
+Stack trace for the task on processor 0, register display, and memory
+display work fine.
+
+Note: GDB cannot analyze core files generated in ELF64 format for x86.
+On systems with a maximum of 4GB of memory, you can generate
+ELF32-format headers using the --elf32-core-headers kernel option on the
+dump kernel.
+
+You can also use the Crash utility to analyze dump files in Kdump
+format. Crash is available at the following URL:
+
+ https://github.com/crash-utility/crash
+
+Crash document can be found at:
+ https://crash-utility.github.io/
+
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This
+will cause a kdump to occur at the panic() call. In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
+
+Trigger Kdump on add_taint()
+============================
+
+The kernel parameter panic_on_taint facilitates a conditional call to panic()
+from within add_taint() whenever the value set in this bitmask matches with the
+bit flag being set by add_taint().
+This will cause a kdump to occur at the add_taint()->panic() call.
+
+Write the dump file to encrypted disk volume
+============================================
+
+CONFIG_CRASH_DM_CRYPT can be enabled to support saving the dump file to an
+encrypted disk volume (only x86_64 supported for now). User space can interact
+with /sys/kernel/config/crash_dm_crypt_keys for setup,
+
+1. Tell the first kernel what logon keys are needed to unlock the disk volumes,
+ # Add key #1
+ mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720
+ # Add key #1's description
+ echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description
+
+ # how many keys do we have now?
+ cat /sys/kernel/config/crash_dm_crypt_keys/count
+ 1
+
+ # Add key #2 in the same way
+
+ # how many keys do we have now?
+ cat /sys/kernel/config/crash_dm_crypt_keys/count
+ 2
+
+ # To support CPU/memory hot-plugging, re-use keys already saved to reserved
+ # memory
+ echo true > /sys/kernel/config/crash_dm_crypt_key/reuse
+
+2. Load the dump-capture kernel
+
+3. After the dump-capture kerne get booted, restore the keys to user keyring
+ echo yes > /sys/kernel/crash_dm_crypt_keys/restore
+
+Contact
+=======
+
+- kexec@lists.infradead.org
+
+GDB macros
+==========
+
+.. include:: gdbmacros.txt
+ :literal:
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
new file mode 100644
index 000000000000..404a15f6782c
--- /dev/null
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -0,0 +1,596 @@
+==========
+VMCOREINFO
+==========
+
+What is it?
+===========
+
+VMCOREINFO is a special ELF note section. It contains various
+information from the kernel like structure size, page size, symbol
+values, field offsets, etc. These data are packed into an ELF note
+section and used by user-space tools like crash and makedumpfile to
+analyze a kernel's memory layout.
+
+Common variables
+================
+
+init_uts_ns.name.release
+------------------------
+
+The version of the Linux kernel. Used to find the corresponding source
+code from which the kernel has been built. For example, crash uses it to
+find the corresponding vmlinux in order to process vmcore.
+
+PAGE_SIZE
+---------
+
+The size of a page. It is the smallest unit of data used by the memory
+management facilities. It is usually 4096 bytes of size and a page is
+aligned on 4096 bytes. Used for computing page addresses.
+
+init_uts_ns
+-----------
+
+The UTS namespace which is used to isolate two specific elements of the
+system that relate to the uname(2) system call. It is named after the
+data structure used to store information returned by the uname(2) system
+call.
+
+User-space tools can get the kernel name, host name, kernel release
+number, kernel version, architecture name and OS type from it.
+
+(uts_namespace, name)
+---------------------
+
+Offset of the name's member. Crash Utility and Makedumpfile get
+the start address of the init_uts_ns.name from this.
+
+node_online_map
+---------------
+
+An array node_states[N_ONLINE] which represents the set of online nodes
+in a system, one bit position per node number. Used to keep track of
+which nodes are in the system and online.
+
+swapper_pg_dir
+--------------
+
+The global page directory pointer of the kernel. Used to translate
+virtual to physical addresses.
+
+_stext
+------
+
+Defines the beginning of the text section. In general, _stext indicates
+the kernel start address. Used to convert a virtual address from the
+direct kernel map to a physical address.
+
+VMALLOC_START
+-------------
+
+Stores the base address of vmalloc area. makedumpfile gets this value
+since is necessary for vmalloc translation.
+
+mem_map
+-------
+
+Physical addresses are translated to struct pages by treating them as
+an index into the mem_map array. Right-shifting a physical address
+PAGE_SHIFT bits converts it into a page frame number which is an index
+into that mem_map array.
+
+Used to map an address to the corresponding struct page.
+
+contig_page_data
+----------------
+
+Makedumpfile gets the pglist_data structure from this symbol, which is
+used to describe the memory layout.
+
+User-space tools use this to exclude free pages when dumping memory.
+
+mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map)
+--------------------------------------------------------------------------
+
+The address of the mem_section array, its length, structure size, and
+the section_mem_map offset.
+
+It exists in the sparse memory mapping model, and it is also somewhat
+similar to the mem_map variable, both of them are used to translate an
+address.
+
+MAX_PHYSMEM_BITS
+----------------
+
+Defines the maximum supported physical address space memory.
+
+page
+----
+
+The size of a page structure. struct page is an important data structure
+and it is widely used to compute contiguous memory.
+
+pglist_data
+-----------
+
+The size of a pglist_data structure. This value is used to check if the
+pglist_data structure is valid. It is also used for checking the memory
+type.
+
+zone
+----
+
+The size of a zone structure. This value is used to check if the zone
+structure has been found. It is also used for excluding free pages.
+
+free_area
+---------
+
+The size of a free_area structure. It indicates whether the free_area
+structure is valid or not. Useful when excluding free pages.
+
+list_head
+---------
+
+The size of a list_head structure. Used when iterating lists in a
+post-mortem analysis session.
+
+nodemask_t
+----------
+
+The size of a nodemask_t type. Used to compute the number of online
+nodes.
+
+(page, flags|_refcount|mapping|lru|_mapcount|private|compound_order|compound_head)
+----------------------------------------------------------------------------------
+
+User-space tools compute their values based on the offset of these
+variables. The variables are used when excluding unnecessary pages.
+
+(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_spanned_pages|node_id)
+-----------------------------------------------------------------------------------------
+
+On NUMA machines, each NUMA node has a pg_data_t to describe its memory
+layout. On UMA machines there is a single pglist_data which describes the
+whole memory.
+
+These values are used to check the memory type and to compute the
+virtual address for memory map.
+
+(zone, free_area|vm_stat|spanned_pages)
+---------------------------------------
+
+Each node is divided into a number of blocks called zones which
+represent ranges within memory. A zone is described by a structure zone.
+
+User-space tools compute required values based on the offset of these
+variables.
+
+(free_area, free_list)
+----------------------
+
+Offset of the free_list's member. This value is used to compute the number
+of free pages.
+
+Each zone has a free_area structure array called free_area[NR_PAGE_ORDERS].
+The free_list represents a linked list of free page blocks.
+
+(list_head, next|prev)
+----------------------
+
+Offsets of the list_head's members. list_head is used to define a
+circular linked list. User-space tools need these in order to traverse
+lists.
+
+(vmap_area, va_start|list)
+--------------------------
+
+Offsets of the vmap_area's members. They carry vmalloc-specific
+information. Makedumpfile gets the start address of the vmalloc region
+from this.
+
+(zone.free_area, NR_PAGE_ORDERS)
+--------------------------------
+
+Free areas descriptor. User-space tools use this value to iterate the
+free_area ranges. NR_PAGE_ORDERS is used by the zone buddy allocator.
+
+prb
+---
+
+A pointer to the printk ringbuffer (struct printk_ringbuffer). This
+may be pointing to the static boot ringbuffer or the dynamically
+allocated ringbuffer, depending on when the core dump occurred.
+Used by user-space tools to read the active kernel log buffer.
+
+printk_rb_static
+----------------
+
+A pointer to the static boot printk ringbuffer. If @prb has a
+different value, this is useful for viewing the initial boot messages,
+which may have been overwritten in the dynamically allocated
+ringbuffer.
+
+clear_seq
+---------
+
+The sequence number of the printk() record after the last clear
+command. It indicates the first record after the last
+SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
+tools to dump a subset of the dmesg log.
+
+printk_ringbuffer
+-----------------
+
+The size of a printk_ringbuffer structure. This structure contains all
+information required for accessing the various components of the
+kernel log buffer.
+
+(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
+-----------------------------------------------------------------
+
+Offsets for the various components of the printk ringbuffer. Used by
+user-space tools to view the kernel log buffer without requiring the
+declaration of the structure.
+
+prb_desc_ring
+-------------
+
+The size of the prb_desc_ring structure. This structure contains
+information about the set of record descriptors.
+
+(prb_desc_ring, count_bits|descs|head_id|tail_id)
+-------------------------------------------------
+
+Offsets for the fields describing the set of record descriptors. Used
+by user-space tools to be able to traverse the descriptors without
+requiring the declaration of the structure.
+
+prb_desc
+--------
+
+The size of the prb_desc structure. This structure contains
+information about a single record descriptor.
+
+(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
+------------------------------------------------------
+
+Offsets for the fields describing a record descriptors. Used by
+user-space tools to be able to read descriptors without requiring
+the declaration of the structure.
+
+prb_data_blk_lpos
+-----------------
+
+The size of the prb_data_blk_lpos structure. This structure contains
+information about where the text or dictionary data (data block) is
+located within the respective data ring.
+
+(prb_data_blk_lpos, begin|next)
+-------------------------------
+
+Offsets for the fields describing the location of a data block. Used
+by user-space tools to be able to locate data blocks without
+requiring the declaration of the structure.
+
+printk_info
+-----------
+
+The size of the printk_info structure. This structure contains all
+the meta-data for a record.
+
+(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
+------------------------------------------------------
+
+Offsets for the fields providing the meta-data for a record. Used by
+user-space tools to be able to read the information without requiring
+the declaration of the structure.
+
+prb_data_ring
+-------------
+
+The size of the prb_data_ring structure. This structure contains
+information about a set of data blocks.
+
+(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
+---------------------------------------------------
+
+Offsets for the fields describing a set of data blocks. Used by
+user-space tools to be able to access the data blocks without
+requiring the declaration of the structure.
+
+atomic_long_t
+-------------
+
+The size of the atomic_long_t structure. Used by user-space tools to
+be able to copy the full structure, regardless of its
+architecture-specific implementation.
+
+(atomic_long_t, counter)
+------------------------
+
+Offset for the long value of an atomic_long_t variable. Used by
+user-space tools to access the long value without requiring the
+architecture-specific declaration.
+
+(free_area.free_list, MIGRATE_TYPES)
+------------------------------------
+
+The number of migrate types for pages. The free_list is described by the
+array. Used by tools to compute the number of free pages.
+
+NR_FREE_PAGES
+-------------
+
+On linux-2.6.21 or later, the number of free pages is in
+vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
+
+PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_hwpoison|PG_head_mask
+--------------------------------------------------------------------------
+
+Page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+PAGE_SLAB_MAPCOUNT_VALUE|PAGE_BUDDY_MAPCOUNT_VALUE|PAGE_OFFLINE_MAPCOUNT_VALUE|PAGE_HUGETLB_MAPCOUNT_VALUE|PAGE_UNACCEPTED_MAPCOUNT_VALUE
+------------------------------------------------------------------------------------------------------------------------------------------
+
+More page attributes. These flags are used to filter various unnecessary for
+dumping pages.
+
+
+x86_64
+======
+
+phys_base
+---------
+
+Used to convert the virtual address of an exported kernel symbol to its
+corresponding physical address.
+
+init_top_pgt
+------------
+
+Used to walk through the whole page table and convert virtual addresses
+to physical addresses. The init_top_pgt is somewhat similar to
+swapper_pg_dir, but it is only used in x86_64.
+
+pgtable_l5_enabled
+------------------
+
+User-space tools need to know whether the crash kernel was in 5-level
+paging mode.
+
+node_data
+---------
+
+This is a struct pglist_data array and stores all NUMA nodes
+information. Makedumpfile gets the pglist_data structure from it.
+
+(node_data, MAX_NUMNODES)
+-------------------------
+
+The maximum number of nodes in system.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+KERNEL_IMAGE_SIZE
+-----------------
+
+Currently unused by Makedumpfile. Used to compute the module virtual
+address by Crash.
+
+sme_mask
+--------
+
+AMD-specific with SME support: it indicates the secure memory encryption
+mask. Makedumpfile tools need to know whether the crash kernel was
+encrypted. If SME is enabled in the first kernel, the crash kernel's
+page table entries (pgd/pud/pmd/pte) contain the memory encryption
+mask. This is used to remove the SME mask and obtain the true physical
+address.
+
+Currently, sme_mask stores the value of the C-bit position. If needed,
+additional SME-relevant info can be placed in that variable.
+
+For example::
+
+ [ misc ][ enc bit ][ other misc SME info ]
+ 0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000
+ 63 59 55 51 47 43 39 35 31 27 ... 3
+
+x86_32
+======
+
+X86_PAE
+-------
+
+Denotes whether physical address extensions are enabled. It has the cost
+of a higher page table lookup overhead, and also consumes more page
+table space per process. Used to check whether PAE was enabled in the
+crash kernel when converting virtual addresses to physical addresses.
+
+ARM64
+=====
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+kimage_voffset
+--------------
+
+The offset between the kernel virtual and physical mappings. Used to
+translate virtual to physical addresses.
+
+PHYS_OFFSET
+-----------
+
+Indicates the physical address of the start of memory. Similar to
+kimage_voffset, which is used to translate virtual to physical
+addresses.
+
+KERNELOFFSET
+------------
+
+The kernel randomization offset. Used to compute the page offset. If
+KASLR is disabled, this value is zero.
+
+KERNELPACMASK
+-------------
+
+The mask to extract the Pointer Authentication Code from a kernel virtual
+address.
+
+TCR_EL1.T1SZ
+------------
+
+Indicates the size offset of the memory region addressed by TTBR1_EL1.
+The region size is 2^(64-T1SZ) bytes.
+
+TTBR1_EL1 is the table base address register specified by ARMv8-A
+architecture which is used to lookup the page-tables for the Virtual
+addresses in the higher VA range (refer to ARMv8 ARM document for
+more details).
+
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
+-----------------------------------------------------------------------------
+
+Used to get the correct ranges:
+ MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
+ VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
+ VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
+
+arm
+===
+
+ARM_LPAE
+--------
+
+It indicates whether the crash kernel supports large physical address
+extensions. Used to translate virtual to physical addresses.
+
+s390
+====
+
+lowcore_ptr
+-----------
+
+An array with a pointer to the lowcore of every CPU. Used to print the
+psw and all registers information.
+
+high_memory
+-----------
+
+Used to get the vmalloc_start address from the high_memory symbol.
+
+(lowcore_ptr, NR_CPUS)
+----------------------
+
+The maximum number of CPUs.
+
+powerpc
+=======
+
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+contig_page_data
+----------------
+
+See above.
+
+vmemmap_list
+------------
+
+The vmemmap_list maintains the entire vmemmap physical mapping. Used
+to get vmemmap list count and populated vmemmap regions info. If the
+vmemmap address translation information is stored in the crash kernel,
+it is used to translate vmemmap kernel virtual addresses.
+
+mmu_vmemmap_psize
+-----------------
+
+The size of a page. Used to translate virtual to physical addresses.
+
+mmu_psize_defs
+--------------
+
+Page size definitions, i.e. 4k, 64k, or 16M.
+
+Used to make vtop translations.
+
+vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|(vmemmap_backing, virt_addr)
+--------------------------------------------------------------------------------------------
+
+The vmemmap virtual address space management does not have a traditional
+page table to track which virtual struct pages are backed by a physical
+mapping. The virtual to physical mappings are tracked in a simple linked
+list format.
+
+User-space tools need to know the offset of list, phys and virt_addr
+when computing the count of vmemmap regions.
+
+mmu_psize_def|(mmu_psize_def, shift)
+------------------------------------
+
+The size of a struct mmu_psize_def and the offset of mmu_psize_def's
+member.
+
+Used in vtop translations.
+
+sh
+==
+
+node_data|(node_data, MAX_NUMNODES)
+-----------------------------------
+
+See above.
+
+X2TLB
+-----
+
+Indicates whether the crashed kernel enabled SH extended mode.
+
+RISCV64
+=======
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+PAGE_OFFSET
+-----------
+
+Indicates the virtual kernel start address of the direct-mapped RAM region.
+
+phys_ram_base
+-------------
+
+Indicates the start physical RAM address.
+
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END|KERNEL_LINK_ADDR
+----------------------------------------------------------------------------------------------
+
+Used to get the correct ranges:
+
+ * MODULES_VADDR ~ MODULES_END : Kernel module space.
+ * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space.
+ * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array.
+ * KERNEL_LINK_ADDR : start address of Kernel link and BPF
+
+va_kernel_pa_offset
+-------------------
+
+Indicates the offset between the kernel virtual and physical mappings.
+Used to translate virtual to physical addresses.
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b8d0bc07ed0a..7bf8cc7df6b5 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -1,19 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. _kernelparameters:
The kernel's command-line parameters
====================================
-The following is a consolidated list of the kernel parameters as
-implemented by the __setup(), core_param() and module_param() macros
+The following is a consolidated list of the kernel parameters as implemented
+by the __setup(), early_param(), core_param() and module_param() macros
and sorted into English Dictionary order (defined as ignoring all
punctuation and sorting digits before letters in a case insensitive
manner), and with descriptions where known.
-The kernel parses parameters from the kernel command line up to "--";
+The kernel parses parameters from the kernel command line up to "``--``";
if it doesn't recognize a parameter and it doesn't contain a '.', the
parameter gets passed to init: parameters with '=' go into init's
environment, others are passed as command line arguments to init.
-Everything after "--" is passed as an argument to init.
+Everything after "``--``" is passed as an argument to init.
Module parameters can be specified in two ways: via the kernel command
line with a module name prefix, or via modprobe, e.g.::
@@ -27,6 +29,16 @@ kernel command line (/proc/cmdline) and collects module parameters
when it loads a module, so the kernel command line can be used for
loadable modules too.
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
+
+Special handling
+----------------
+
Hyphens (dashes) and underscores are equivalent in parameter names, so::
log_buf_len=1M print-fatal-signals=1
@@ -39,8 +51,8 @@ Double-quotes can be used to protect spaces in values, e.g.::
param="spaces in here"
-cpu lists:
-----------
+cpu lists
+~~~~~~~~~
Some kernel parameters take a list of CPUs as a value, e.g. isolcpus,
nohz_full, irqaffinity, rcu_nocbs. The format of this list is:
@@ -60,7 +72,7 @@ Note that for the special case of a range one can split the range into equal
sized groups and for each group use some amount from the beginning of that
group:
- <cpu number>-cpu number>:<used size>/<group size>
+ <cpu number>-<cpu number>:<used size>/<group size>
For example one can add to the command line following parameter:
@@ -68,42 +80,62 @@ For example one can add to the command line following parameter:
where the final item represents CPUs 100,101,125,126,150,151,...
+The value "N" can be used to represent the numerically last CPU on the system,
+i.e "foo_cpus=16-N" would be equivalent to "16-31" on a 32 core system.
+Keep in mind that "N" is dynamic, so if system changes cause the bitmap width
+to change, such as less cores in the CPU list, then N and any ranges using N
+will also change. Use the same on a small 4 core system, and "16-N" becomes
+"16-3" and now the same boot input will be flagged as invalid (start > end).
-This document may not be entirely up to date and comprehensive. The command
-"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
-module. Loadable modules, after being loaded into the running kernel, also
-reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
-parameters may be changed at runtime by the command
-``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
+The special case-tolerant group name "all" has a meaning of selecting all CPUs,
+so that "nohz_full=all" is the equivalent of "nohz_full=0-N".
+
+The semantics of "N" and "all" is supported on a level of bitmaps and holds for
+all users of bitmap_parselist().
+
+Metric suffixes
+~~~~~~~~~~~~~~~
+
+The [KMG] suffix is commonly described after a number of kernel
+parameter values. 'K', 'M', 'G', 'T', 'P', and 'E' suffixes are allowed.
+These letters represent the _binary_ multipliers 'Kilo', 'Mega', 'Giga',
+'Tera', 'Peta', and 'Exa', equaling 2^10, 2^20, 2^30, 2^40, 2^50, and
+2^60 bytes respectively. Such letter suffixes can also be entirely omitted.
-The parameters listed below are only valid if certain kernel build options were
-enabled and if respective hardware is present. The text in square brackets at
-the beginning of each description states the restrictions within which a
-parameter is applicable::
+Kernel Build Options
+--------------------
+
+The parameters listed below are only valid if certain kernel build options
+were enabled and if respective hardware is present. This list should be kept
+in alphabetical order. The text in square brackets at the beginning
+of each description states the restrictions within which a parameter
+is applicable::
ACPI ACPI support is enabled.
AGP AGP (Accelerated Graphics Port) is enabled.
ALSA ALSA sound support is enabled.
APIC APIC support is enabled.
APM Advanced Power Management support is enabled.
+ APPARMOR AppArmor support is enabled.
ARM ARM architecture is enabled.
+ ARM64 ARM64 architecture is enabled.
AX25 Appropriate AX.25 support is enabled.
CLK Common clock infrastructure is enabled.
CMA Contiguous Memory Area support is enabled.
DRM Direct Rendering Management support is enabled.
DYNAMIC_DEBUG Build in debug messages and enable them at runtime
+ EARLY Parameter processed too early to be embedded in initrd.
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
EFI EFI Partitioning (GPT) is enabled
- EIDE EIDE/ATAPI support is enabled.
EVM Extended Verification Module
FB The frame buffer device is enabled.
FTRACE Function tracing enabled.
GCOV GCOV profiling is enabled.
+ HIBERNATION HIBERNATION is enabled.
HW Appropriate hardware is enabled.
- IA-64 IA-64 architecture is enabled.
+ HYPER_V HYPERV support is enabled.
IMA Integrity measurement architecture is enabled.
- IOSCHED More than one I/O scheduler is enabled.
IP_PNP IP DHCP, BOOTP, or RARP is enabled.
IPV6 IPv6 support is enabled.
ISAPNP ISA PnP code is enabled.
@@ -113,22 +145,21 @@ parameter is applicable::
KGDB Kernel debugger support is enabled.
KVM Kernel Virtual Machine support is enabled.
LIBATA Libata driver is enabled
- LP Printer support is enabled.
+ LOONGARCH LoongArch architecture is enabled.
LOOP Loopback device support is enabled.
+ LP Printer support is enabled.
M68k M68k architecture is enabled.
These options have more detailed description inside of
- Documentation/m68k/kernel-options.txt.
+ Documentation/arch/m68k/kernel-options.rst.
MDA MDA console support is enabled.
MIPS MIPS architecture is enabled.
MOUSE Appropriate mouse support is enabled.
MSI Message Signaled Interrupts (PCI).
MTD MTD (Memory Technology Device) support is enabled.
NET Appropriate network support is enabled.
- NUMA NUMA support is enabled.
NFS Appropriate NFS support is enabled.
- OSS OSS sound support is enabled.
- PV_OPS A paravirtualized kernel is enabled.
- PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
+ NUMA NUMA support is enabled.
+ OF Devicetree is enabled.
PARISC The PA-RISC architecture is enabled.
PCI PCI bus support is enabled.
PCIE PCI Express support is enabled.
@@ -137,56 +168,54 @@ parameter is applicable::
PPC PowerPC architecture is enabled.
PPT Parallel port support is enabled.
PS2 Appropriate PS/2 support is enabled.
+ PV_OPS A paravirtualized kernel is enabled.
RAM RAM disk support is enabled.
RDT Intel Resource Director Technology.
+ RISCV RISCV architecture is enabled.
S390 S390 architecture is enabled.
SCSI Appropriate SCSI support is enabled.
A lot of drivers have their options described inside
the Documentation/scsi/ sub-directory.
+ SDW SoundWire support is enabled.
SECURITY Different security models are enabled.
SELINUX SELinux support is enabled.
- APPARMOR AppArmor support is enabled.
SERIAL Serial support is enabled.
SH SuperH architecture is enabled.
SMP The kernel is an SMP kernel.
SPARC Sparc architecture is enabled.
- SWSUSP Software suspend (hibernation) is enabled.
SUSPEND System suspend states are enabled.
+ SWSUSP Software suspend (hibernation) is enabled.
TPM TPM drivers are enabled.
- TS Appropriate touchscreen support is enabled.
UMS USB Mass Storage support is enabled.
USB USB support is enabled.
USBHID USB Human Interface Device support is enabled.
V4L Video For Linux support is enabled.
- VMMIO Driver for memory mapped virtio devices is enabled.
VGA The VGA console has been enabled.
+ VMMIO Driver for memory mapped virtio devices is enabled.
VT Virtual terminal support is enabled.
WDT Watchdog support is enabled.
- XT IBM PC/XT MFM hard disk support is enabled.
X86-32 X86-32, aka i386 architecture is enabled.
X86-64 X86-64 architecture is enabled.
- More X86-64 boot options can be found in
- Documentation/x86/x86_64/boot-options.txt .
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
X86_UV SGI UV support is enabled.
XEN Xen support is enabled
+ XTENSA xtensa architecture is enabled.
In addition, the following text indicates that the option::
+ BOOT Is a boot loader parameter.
BUGS= Relates to possible processor bugs on the said processor.
KNL Is a kernel start-up parameter.
- BOOT Is a boot loader parameter.
Parameters denoted with BOOT are actually interpreted by the boot
loader, and have no meaning to the kernel directly.
Do not modify the syntax of boot loader parameters without extreme
-need or coordination with <Documentation/x86/boot.txt>.
+need or coordination with <Documentation/arch/x86/boot.rst>.
There are also arch-specific kernel-parameters not documented here.
-See for example <Documentation/x86/x86_64/boot-options.txt>.
Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
-a trailing = on the name of any parameter states that that parameter will
+a trailing = on the name of any parameter states that the parameter will
be entered as an environment variable, whereas its absence indicates that
it will appear as a kernel argument readable via /proc/cmdline by programs
running once the system is up.
@@ -195,17 +224,7 @@ The number of kernel parameters is not limited, but the length of the
complete command line (parameters including spaces etc.) is limited to
a fixed number of characters. This limit depends on the architecture
and is between 256 and 4096 characters. It is defined in the file
-./include/asm/setup.h as COMMAND_LINE_SIZE.
-
-Finally, the [KMG] suffix is commonly described after a number of kernel
-parameter values. These 'K', 'M', and 'G' letters represent the _binary_
-multipliers 'Kilo', 'Mega', and 'Giga', equaling 2^10, 2^20, and 2^30
-bytes respectively. Such letter suffixes can also be entirely omitted:
+./include/uapi/asm-generic/setup.h as COMMAND_LINE_SIZE.
.. include:: kernel-parameters.txt
:literal:
-
-Todo
-----
-
- Add more DRM drivers.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 19f4423e70d9..6c42061ca20e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1,34 +1,55 @@
- acpi= [HW,ACPI,X86,ARM64]
+ accept_memory= [MM]
+ Format: { eager | lazy }
+ default: lazy
+ By default, unaccepted memory is accepted lazily to
+ avoid prolonged boot times. The lazy option will add
+ some runtime overhead until all memory is eventually
+ accepted. In most cases the overhead is negligible.
+ For some workloads or for debugging purposes
+ accept_memory=eager can be used to accept all memory
+ at once during boot.
+
+ acpi= [HW,ACPI,X86,ARM64,RISCV64,EARLY]
Advanced Configuration and Power Interface
Format: { force | on | off | strict | noirq | rsdt |
- copy_dsdt }
+ copy_dsdt | nospcr }
force -- enable ACPI if default was off
- on -- enable ACPI but allow fallback to DT [arm64]
+ on -- enable ACPI but allow fallback to DT [arm64,riscv64]
off -- disable ACPI if default was on
noirq -- do not use ACPI for IRQ routing
strict -- Be less tolerant of platforms that are not
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
copy_dsdt -- copy DSDT to memory
- For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
+ nocmcff -- Disable firmware first mode for corrected
+ errors. This disables parsing the HEST CMC error
+ source to check if firmware has set the FF flag. This
+ may result in duplicate corrected error reports.
+ nospcr -- disable console in ACPI SPCR table as
+ default _serial_ console on ARM64
+ For ARM64, ONLY "acpi=off", "acpi=on", "acpi=force" or
+ "acpi=nospcr" are available
+ For RISCV64, ONLY "acpi=off", "acpi=on" or "acpi=force"
are available
- See also Documentation/power/runtime_pm.txt, pci=noacpi
+ See also Documentation/power/runtime_pm.rst, pci=noacpi
- acpi_apic_instance= [ACPI, IOAPIC]
+ acpi_apic_instance= [ACPI,IOAPIC,EARLY]
Format: <int>
2: use 2nd APIC table, if available
1,0: use 1st APIC table
default: 0
acpi_backlight= [HW,ACPI]
- acpi_backlight=vendor
- acpi_backlight=video
- If set to vendor, prefer vendor specific driver
+ { vendor | video | native | none }
+ If set to vendor, prefer vendor-specific driver
(e.g. thinkpad_acpi, sony_acpi, etc.) instead
of the ACPI video.ko driver.
+ If set to video, use the ACPI video.ko driver.
+ If set to native, use the device's native backlight mode.
+ If set to none, disable the ACPI backlight interface.
- acpi_force_32bit_fadt_addr
+ acpi_force_32bit_fadt_addr [ACPI,EARLY]
force FADT to use 32 bit addresses rather than the
64 bit X_* addresses. Some firmware have broken 64
bit addresses for force ACPI ignore these and use
@@ -48,18 +69,16 @@
CONFIG_ACPI_DEBUG must be enabled to produce any ACPI
debug output. Bits in debug_layer correspond to a
_COMPONENT in an ACPI source file, e.g.,
- #define _COMPONENT ACPI_PCI_COMPONENT
+ #define _COMPONENT ACPI_EVENTS
Bits in debug_level correspond to a level in
ACPI_DEBUG_PRINT statements, e.g.,
ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
The debug_level mask defaults to "info". See
- Documentation/acpi/debug.txt for more information about
+ Documentation/firmware-guide/acpi/debug.rst for more information about
debug layers and levels.
Enable processor driver info messages:
acpi.debug_layer=0x20000000
- Enable PCI/PCI interrupt routing info messages:
- acpi.debug_layer=0x400000
Enable AML "Debug" output, i.e., stores to the Debug
object while interpreting AML:
acpi.debug_layer=0xffffffff acpi.debug_level=0x2
@@ -86,7 +105,7 @@
no: ACPI OperationRegions are not marked as reserved,
no further checks are performed.
- acpi_force_table_verification [HW,ACPI]
+ acpi_force_table_verification [HW,ACPI,EARLY]
Enable table checksum verification during early stage.
By default, this is disabled due to x86 early mapping
size limitation.
@@ -113,7 +132,7 @@
the GPE dispatcher.
This facility can be used to prevent such uncontrolled
GPE floodings.
- Format: <int>
+ Format: <byte> or <bitmap-list>
acpi_no_auto_serialize [HW,ACPI]
Disable auto-serialization of AML methods
@@ -126,7 +145,7 @@
acpi_no_memhotplug [ACPI] Disable memory hotplug. Useful for kdump
kernels.
- acpi_no_static_ssdt [HW,ACPI]
+ acpi_no_static_ssdt [HW,ACPI,EARLY]
Disable installation of static SSDTs at early boot time
By default, SSDTs contained in the RSDT/XSDT will be
installed automatically and they will appear under
@@ -136,7 +155,11 @@
dynamic table installation which will install SSDT
tables to /sys/firmware/acpi/tables/dynamic.
- acpi_rsdp= [ACPI,EFI,KEXEC]
+ acpi_no_watchdog [HW,ACPI,WDT]
+ Ignore the ACPI-based watchdog interface (WDAT) and let
+ a native driver control the watchdog device instead.
+
+ acpi_rsdp= [ACPI,EFI,KEXEC,EARLY]
Pass the RSDP address to the kernel, mostly used
on machines running EFI runtime service to boot the
second kernel for kdump.
@@ -213,22 +236,31 @@
to assume that this machine's pmtimer latches its value
and always returns good values.
- acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
+ acpi_sci= [HW,ACPI,EARLY] ACPI System Control Interrupt trigger mode
Format: { level | edge | high | low }
- acpi_skip_timer_override [HW,ACPI]
+ acpi_skip_timer_override [HW,ACPI,EARLY]
Recognize and ignore IRQ0/pin2 Interrupt Override.
For broken nForce2 BIOS resulting in XT-PIC timer.
acpi_sleep= [HW,ACPI] Sleep options
- Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
- old_ordering, nonvs, sci_force_enable, nobl }
- See Documentation/power/video.txt for information on
+ Format: { s3_bios, s3_mode, s3_beep, s4_hwsig,
+ s4_nohwsig, old_ordering, nonvs,
+ sci_force_enable, nobl }
+ See Documentation/power/video.rst for information on
s3_bios and s3_mode.
s3_beep is for debugging; it makes the PC's speaker beep
as soon as the kernel's real-mode entry point is called.
+ s4_hwsig causes the kernel to check the ACPI hardware
+ signature during resume from hibernation, and gracefully
+ refuse to resume if it has changed. This complies with
+ the ACPI specification but not with reality, since
+ Windows does not do this and many laptops do change it
+ on docking. So the default behaviour is to allow resume
+ and simply warn when the signature changes, unless the
+ s4_hwsig option is enabled.
s4_nohwsig prevents ACPI hardware signature from being
- used during resume from hibernation.
+ used (or even warned about) during resume.
old_ordering causes the ACPI 1.0 ordering of the _PTS
control method, with respect to putting devices into
low power states, to be enforced (the ACPI 2.0 ordering
@@ -242,11 +274,11 @@
behave incorrectly in some ways with respect to system
suspend and resume to be ignored (use wisely).
- acpi_use_timer_override [HW,ACPI]
+ acpi_use_timer_override [HW,ACPI,EARLY]
Use timer override. For some broken Nvidia NF5 boards
that require a timer override, but don't have HPET
- add_efi_memmap [EFI; X86] Include EFI memory map in
+ add_efi_memmap [EFI,X86,EARLY] Include EFI memory map in
kernel's map of available physical RAM.
agp= [AGP]
@@ -283,13 +315,21 @@
do not want to use tracing_snapshot_alloc() as it needs
to be done where GFP_KERNEL allocations are allowed.
+ allow_mismatched_32bit_el0 [ARM64,EARLY]
+ Allow execve() of 32-bit applications and setting of the
+ PER_LINUX32 personality on systems where only a strict
+ subset of the CPUs support 32-bit EL0. When this
+ parameter is present, the set of CPUs supporting 32-bit
+ EL0 is indicated by /sys/devices/system/cpu/aarch32_el0
+ and hot-unplug operations may be restricted.
+
+ See Documentation/arch/arm64/asymmetric-32bit.rst for more
+ information.
+
amd_iommu= [HW,X86-64]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
- fullflush - enable flushing of IO/TLB entries when
- they are unmapped. Otherwise they are
- flushed before they will be reused, which
- is a lot of faster
+ fullflush - Deprecated, equivalent to iommu.strict=1
off - do not initialize any AMD IOMMU found in
the system
force_isolation - Force device isolation for all
@@ -297,6 +337,17 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
+ force_enable - Force enable the IOMMU on platforms known
+ to be buggy with IOMMU enabled. Use this
+ option with care.
+ pgtbl_v1 - Use v1 page table for DMA-API (Default).
+ pgtbl_v2 - Use v2 page table for DMA-API.
+ irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
+ nohugepages - Limit page-sizes used for v1 page-tables
+ to 4 KiB.
+ v2_pgsizes_only - Limit page-sizes used for v1 page-tables
+ to 4KiB/2Mib/1GiB.
+
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@@ -313,6 +364,34 @@
This mode requires kvm-amd.avic=1.
(Default when IOMMU HW support is present.)
+ amd_pstate= [X86,EARLY]
+ disable
+ Do not enable amd_pstate as the default
+ scaling driver for the supported processors
+ passive
+ Use amd_pstate with passive mode as a scaling driver.
+ In this mode autonomous selection is disabled.
+ Driver requests a desired performance level and platform
+ tries to match the same performance level if it is
+ satisfied by guaranteed performance level.
+ active
+ Use amd_pstate_epp driver instance as the scaling driver,
+ driver provides a hint to the hardware if software wants
+ to bias toward performance (0x0) or energy efficiency (0xff)
+ to the CPPC firmware. then CPPC power algorithm will
+ calculate the runtime workload and adjust the realtime cores
+ frequency.
+ guided
+ Activate guided autonomous mode. Driver requests minimum and
+ maximum performance level and the platform autonomously
+ selects a performance level in this range and appropriate
+ to the current workload.
+
+ amd_prefcore=
+ [X86]
+ disable
+ Disable amd-pstate preferred core.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
@@ -330,17 +409,15 @@
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
- apic= [APIC,X86] Advanced Programmable Interrupt Controller
- Change the output verbosity whilst booting
+ apic [APIC,X86-64] Use IO-APIC. Default.
+
+ apic= [APIC,X86,EARLY] Advanced Programmable Interrupt Controller
+ Change the output verbosity while booting
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
- For X86-32, this can also be used to specify an APIC
- driver name.
- Format: apic=driver_name
- Examples: apic=bigsmp
- apic_extnmi= [APIC,X86] External NMI delivery setting
+ apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting
Format: { bsp (default) | all | none }
bsp: External NMI is delivered only to CPU 0
all: External NMIs are broadcast to all CPUs as a
@@ -349,24 +426,53 @@
useful so that a dump capture kernel won't be
shot down by NMI
- autoconf= [IPV6]
- See Documentation/networking/ipv6.txt.
+ apicpmtimer Do APIC timer calibration using the pmtimer. Implies
+ apicmaintimer. Useful when your PIT timer is totally
+ broken.
- show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller
- Limit apic dumping. The parameter defines the maximal
- number of local apics being dumped. Also it is possible
- to set it to "all" by meaning -- no limit here.
- Format: { 1 (default) | 2 | ... | all }.
- The parameter valid if only apic=debug or
- apic=verbose is specified.
- Example: apic=debug show_lapic=all
+ autoconf= [IPV6]
+ See Documentation/networking/ipv6.rst.
apm= [APM] Advanced Power Management
See header of arch/x86/kernel/apm_32.c.
+ apparmor= [APPARMOR] Disable or enable AppArmor at boot time
+ Format: { "0" | "1" }
+ See security/apparmor/Kconfig help text
+ 0 -- disable.
+ 1 -- enable.
+ Default value is set via kernel config option.
+
arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
Format: <io>,<irq>,<nodeID>
+ arm64.no32bit_el0 [ARM64] Unconditionally disable the execution of
+ 32 bit applications.
+
+ arm64.nobti [ARM64] Unconditionally disable Branch Target
+ Identification support
+
+ arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
+ support
+
+ arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
+ Set instructions support
+
+ arm64.nompam [ARM64] Unconditionally disable Memory Partitioning And
+ Monitoring support
+
+ arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
+ support
+
+ arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication
+ support
+
+ arm64.nosme [ARM64] Unconditionally disable Scalable Matrix
+ Extension support
+
+ arm64.nosve [ARM64] Unconditionally disable Scalable Vector
+ Extension support
+
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse
@@ -428,24 +534,46 @@
Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c.
+ bdev_allow_write_mounted=
+ Format: <bool>
+ Control the ability to open a mounted block device
+ for writing, i.e., allow / disallow writes that bypass
+ the FS. This was implemented as a means to prevent
+ fuzzers from crashing the kernel by overwriting the
+ metadata underneath a mounted FS without its awareness.
+ This also prevents destructive formatting of mounted
+ filesystems by naive storage tooling that don't use
+ O_EXCL. Default is Y and can be changed through the
+ Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
+
+ bert_disable [ACPI]
+ Disable BERT OS support on buggy BIOSes.
+
+ bgrt_disable [ACPI,X86,EARLY]
+ Disable BGRT to avoid flickering OEM logo.
+
blkdevparts= Manual partition parsing of block device(s) for
embedded devices based on command line input.
- See Documentation/block/cmdline-partition.txt
-
- boot_delay= Milliseconds to delay each printk during boot.
- Values larger than 10 seconds (10000) are changed to
- no delay (0).
+ See Documentation/block/cmdline-partition.rst
+
+ boot_delay= [KNL,EARLY]
+ Milliseconds to delay each printk during boot.
+ Only works if CONFIG_BOOT_PRINTK_DELAY is enabled,
+ and you may also have to specify "lpj=". Boot_delay
+ values larger than 10 seconds (10000) are assumed
+ erroneous and ignored.
Format: integer
- bootmem_debug [KNL] Enable bootmem allocator debug messages.
+ bootconfig [KNL,EARLY]
+ Extended command line options can be added to an initrd
+ and this will cause the kernel to look for it.
- bert_disable [ACPI]
- Disable BERT OS support on buggy BIOSes.
+ See Documentation/admin-guide/bootconfig.rst
bttv.card= [HW,V4L] bttv (bt848 + bt878 based grabber cards)
bttv.radio= Most important insmod options are available as
kernel args too.
- bttv.pll= See Documentation/media/v4l-drivers/bttv.rst
+ bttv.pll= See Documentation/admin-guide/media/bttv.rst
bttv.tuner=
bulk_remove=off [PPC] This parameter disables the use of the pSeries
@@ -461,42 +589,87 @@
possible to determine what the correct size should be.
This option provides an override for these situations.
+ carrier_timeout=
+ [NET] Specifies amount of time (in seconds) that
+ the kernel should wait for a network carrier. By default
+ it waits 120 seconds.
+
ca_keys= [KEYS] This parameter identifies a specific key(s) on
the system trusted keyring to be used for certificate
trust validation.
format: { id:<keyid> | builtin }
- cca= [MIPS] Override the kernel pages' cache coherency
+ cca= [MIPS,EARLY] Override the kernel pages' cache coherency
algorithm. Accepted values range from 0 to 7
inclusive. See arch/mips/include/asm/pgtable-bits.h
for platform specific values (SB1, Loongson3 and
others).
ccw_timeout_log [S390]
- See Documentation/s390/CommonIO for details.
-
- cgroup_disable= [KNL] Disable a particular controller
- Format: {name of the controller(s) to disable}
+ See Documentation/arch/s390/common_io.rst for details.
+
+ cfi= [X86-64] Set Control Flow Integrity checking features
+ when CONFIG_FINEIBT is enabled.
+ Format: feature[,feature...]
+ Default: auto
+
+ auto: Use FineIBT if IBT available, otherwise kCFI.
+ Under FineIBT, enable "paranoid" mode when
+ FRED is not available.
+ off: Turn off CFI checking.
+ kcfi: Use kCFI (disable FineIBT).
+ fineibt: Use FineIBT (even if IBT not available).
+ norand: Do not re-randomize CFI hashes.
+ paranoid: Add caller hash checking under FineIBT.
+ bhi: Enable register poisoning to stop speculation
+ across FineIBT. (Disabled by default.)
+ warn: Do not enforce CFI checking: warn only.
+ debug: Report CFI initialization details.
+
+ cgroup_disable= [KNL] Disable a particular controller or optional feature
+ Format: {name of the controller(s) or feature(s) to disable}
The effects of cgroup_disable=foo are:
- foo isn't auto-mounted if you mount all cgroups in
a single hierarchy
- foo isn't visible as an individually mountable
subsystem
+ - if foo is an optional feature then the feature is
+ disabled and corresponding cgroup files are not
+ created
{Currently only "memory" controller deal with this and
cut the overhead, others just disable the usage. So
only cgroup_disable=memory is actually worthy}
+ Specifying "pressure" disables per-cgroup pressure
+ stall information accounting feature
- cgroup_no_v1= [KNL] Disable one, multiple, all cgroup controllers in v1
- Format: { controller[,controller...] | "all" }
+ cgroup_no_v1= [KNL] Disable cgroup controllers and named hierarchies in v1
+ Format: { { controller | "all" | "named" }
+ [,{ controller | "all" | "named" }...] }
Like cgroup_disable, but only applies to cgroup v1;
the blacklisted controllers remain available in cgroup2.
+ "all" blacklists all controllers and "named" disables
+ named mounts. Specifying both "all" and "named" disables
+ all v1 hierarchies.
+
+ cgroup_v1_proc= [KNL] Show also missing controllers in /proc/cgroups
+ Format: { "true" | "false" }
+ /proc/cgroups lists only v1 controllers by default.
+ This compatibility option enables listing also v2
+ controllers (whose v1 code is not compiled!), so that
+ semi-legacy software can check this file to decide
+ about usage of v2 (sic) controllers.
+
+ cgroup_favordynmods= [KNL] Enable or Disable favordynmods.
+ Format: { "true" | "false" }
+ Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS.
cgroup.memory= [KNL] Pass options to the cgroup memory controller.
Format: <string>
nosocket -- Disable socket memory accounting.
nokmem -- Disable kernel memory accounting.
+ nobpf -- Disable BPF memory accounting.
- checkreqprot [SELINUX] Set initial checkreqprot flag value.
+ checkreqprot= [SELINUX] Set initial checkreqprot flag value.
Format: { "0" | "1" }
See security/selinux/Kconfig help text.
0 -- check protection applied by kernel (includes
@@ -504,10 +677,30 @@
1 -- check protection requested by application.
Default value is set via a kernel config option.
Value can be changed at runtime via
- /selinux/checkreqprot.
+ /sys/fs/selinux/checkreqprot.
+ Setting checkreqprot to 1 is deprecated.
cio_ignore= [S390]
- See Documentation/s390/CommonIO for details.
+ See Documentation/arch/s390/common_io.rst for details.
+
+ clearcpuid=X[,X...] [X86]
+ Disable CPUID feature X for the kernel. See
+ arch/x86/include/asm/cpufeatures.h for the valid bit
+ numbers X. Note the Linux-specific bits are not necessarily
+ stable over kernel options, but the vendor-specific
+ ones should be.
+ X can also be a string as appearing in the flags: line
+ in /proc/cpuinfo which does not have the above
+ instability issue. However, not all features have names
+ in /proc/cpuinfo.
+ Note that using this option will taint your kernel.
+ Also note that user programs calling CPUID directly
+ or using the feature without checking anything
+ will still see it. This just prevents it from
+ being used by the kernel or shown in /proc/cpuinfo.
+ Also note the kernel might malfunction if you disable
+ some critical bits.
+
clk_ignore_unused
[CLK]
Prevents the clock framework from automatically gating
@@ -547,34 +740,59 @@
[X86-64] hpet,tsc
clocksource.arm_arch_timer.evtstrm=
- [ARM,ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Enable/disable the eventstream feature of the ARM
architected timer so that code using WFE-based polling
loops can be debugged more effectively on production
systems.
- clearcpuid=BITNUM [X86]
- Disable CPUID feature X for the kernel. See
- arch/x86/include/asm/cpufeatures.h for the valid bit
- numbers. Note the Linux specific bits are not necessarily
- stable over kernel options, but the vendor specific
- ones should be.
- Also note that user programs calling CPUID directly
- or using the feature without checking anything
- will still see it. This just prevents it from
- being used by the kernel or shown in /proc/cpuinfo.
- Also note the kernel might malfunction if you disable
- some critical bits.
+ clocksource.verify_n_cpus= [KNL]
+ Limit the number of CPUs checked for clocksources
+ marked with CLOCK_SOURCE_VERIFY_PERCPU that
+ are marked unstable due to excessive skew.
+ A negative value says to check all CPUs, while
+ zero says not to check any. Values larger than
+ nr_cpu_ids are silently truncated to nr_cpu_ids.
+ The actual CPUs are chosen randomly, with
+ no replacement if the same CPU is chosen twice.
+
+ clocksource-wdtest.holdoff= [KNL]
+ Set the time in seconds that the clocksource
+ watchdog test waits before commencing its tests.
+ Defaults to zero when built as a module and to
+ 10 seconds when built into the kernel.
cma=nn[MG]@[start[MG][-end[MG]]]
- [ARM,X86,KNL]
+ [KNL,CMA,EARLY]
Sets the size of kernel global memory area for
contiguous memory allocations and optionally the
placement constraint by the physical address range of
memory allocations. A value of 0 disables CMA
altogether. For more information, see
- include/linux/dma-contiguous.h
+ kernel/dma/contiguous.c
+
+ cma_pernuma=nn[MG]
+ [KNL,CMA,EARLY]
+ Sets the size of kernel per-numa memory area for
+ contiguous memory allocations. A value of 0 disables
+ per-numa CMA altogether. And If this option is not
+ specified, the default value is 0.
+ With per-numa CMA enabled, DMA users on node nid will
+ first try to allocate buffer from the pernuma area
+ which is located in node nid, if the allocation fails,
+ they will fallback to the global default memory area.
+
+ numa_cma=<node>:nn[MG][,<node>:nn[MG]]
+ [KNL,CMA,EARLY]
+ Sets the size of kernel numa memory area for
+ contiguous memory allocations. It will reserve CMA
+ area for the specified node.
+
+ With numa CMA enabled, DMA users on node nid will
+ first try to allocate buffer from the numa area
+ which is located in node nid, if the allocation fails,
+ they will fallback to the global default memory area.
cmo_free_hint= [PPC] Format: { yes | no }
Specify whether pages are marked as being inactive
@@ -583,7 +801,7 @@
a hypervisor.
Default: yes
- coherent_pool=nn[KMG] [ARM,KNL]
+ coherent_pool=nn[KMG] [ARM,KNL,EARLY]
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
@@ -601,6 +819,17 @@
condev= [HW,S390] console device
conmode=
+ con3215_drop= [S390,EARLY] 3215 console drop mode.
+ Format: y|n|Y|N|1|0
+ When set to true, drop data on the 3215 console when
+ the console buffer is full. In this case the
+ operator using a 3270 terminal emulator (for example
+ x3270) does not have to enter the clear key for the
+ console output to advance and the kernel to continue.
+ This leads to a much faster boot time when a 3270
+ terminal emulator is active. If no 3270 terminal
+ emulator is used, this parameter has no effect.
+
console= [KNL] Output console device and options.
tty<n> Use the virtual console device <n>.
@@ -615,9 +844,28 @@
See Documentation/admin-guide/serial-console.rst for more
information. See
- Documentation/networking/netconsole.txt for an
+ Documentation/networking/netconsole.rst for an
alternative.
+ <DEVNAME>:<n>.<n>[,options]
+ Use the specified serial port on the serial core bus.
+ The addressing uses DEVNAME of the physical serial port
+ device, followed by the serial core controller instance,
+ and the serial port instance. The options are the same
+ as documented for the ttyS addressing above.
+
+ The mapping of the serial ports to the tty instances
+ can be viewed with:
+
+ $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/*
+ /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0
+
+ In the above example, the console can be addressed with
+ console=00:04:0.0. Note that a console addressed this
+ way will only get added when the related device driver
+ is ready. The use of an earlycon parameter in addition to
+ the console may be desired for console output early on.
+
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
uart[8250],mmio16,<addr>[,options]
@@ -636,6 +884,12 @@
hvc<n> Use the hypervisor console device <n>. This is for
both Xen and PowerPC hypervisors.
+ { null | "" }
+ Use to disable console output, i.e., to have kernel
+ console messages discarded.
+ This must be the only console= parameter used on the
+ kernel command line.
+
If the device connected to the port is not a TTY but a braille
device, prepend "brl," before the device type, for instance
console=brl,ttyS0
@@ -662,7 +916,7 @@
coredump_filter=
[KNL] Change the default value for
/proc/<pid>/coredump_filter.
- See also Documentation/filesystems/proc.txt.
+ See also Documentation/filesystems/proc.rst.
coresight_cpu_debug.enable
[ARM,ARM64]
@@ -671,58 +925,115 @@
0: default value, disable debugging
1: enable debugging at boot time
+ cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
+ Format:
+ <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+
cpuidle.off=1 [CPU_IDLE]
disable the cpuidle sub-system
+ cpuidle.governor=
+ [CPU_IDLE] Name of the cpuidle governor to use.
+
cpufreq.off=1 [CPU_FREQ]
disable the cpufreq sub-system
+ cpufreq.default_governor=
+ [CPU_FREQ] Name of the default cpufreq governor or
+ policy to use. This governor must be registered in the
+ kernel before the cpufreq driver probes.
+
cpu_init_udelay=N
- [X86] Delay for N microsec between assert and de-assert
+ [X86,EARLY] Delay for N microsec between assert and de-assert
of APIC INIT to start processors. This delay occurs
on every CPU online, such as boot, and resume from suspend.
Default: 10000
- cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
- Format:
- <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+ cpuhp.parallel=
+ [SMP] Enable/disable parallel bringup of secondary CPUs
+ Format: <bool>
+ Default is enabled if CONFIG_HOTPLUG_PARALLEL=y. Otherwise
+ the parameter has no effect.
+
+ crash_kexec_post_notifiers
+ Only jump to kdump kernel after running the panic
+ notifiers and dumping kmsg. This option increases
+ the risks of a kdump failure, since some panic
+ notifiers can make the crashed kernel more unstable.
+ In configurations where kdump may not be reliable,
+ running the panic notifiers could allow collecting
+ more data on dmesg, like stack traces from other CPUS
+ or extra data dumped by panic_print. Note that some
+ configurations enable this option unconditionally,
+ like Hyper-V, PowerPC (fadump) and AMD SEV-SNP.
crashkernel=size[KMG][@offset[KMG]]
- [KNL] Using kexec, Linux can switch to a 'crash kernel'
+ [KNL,EARLY] Using kexec, Linux can switch to a 'crash kernel'
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
- is selected automatically. Check
- Documentation/kdump/kdump.txt for further details.
+ is selected automatically.
+ [KNL, X86-64, ARM64, RISCV, LoongArch] Select a region
+ under 4G first, and fall back to reserve region above
+ 4G when '@offset' hasn't been specified.
+ See Documentation/admin-guide/kdump/kdump.rst for further details.
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
start-[end] where start and end are both
a memory unit (amount[KMG]). See also
- Documentation/kdump/kdump.txt for an example.
+ Documentation/admin-guide/kdump/kdump.rst for an example.
crashkernel=size[KMG],high
- [KNL, x86_64] range could be above 4G. Allow kernel
- to allocate physical memory region from top, so could
- be above 4G if system have more than 4G ram installed.
- Otherwise memory region will be allocated below 4G, if
- available.
+ [KNL, X86-64, ARM64, RISCV, LoongArch] range could be
+ above 4G.
+ Allow kernel to allocate physical memory region from top,
+ so could be above 4G if system have more than 4G ram
+ installed. Otherwise memory region will be allocated
+ below 4G, if available.
It will be ignored if crashkernel=X is specified.
crashkernel=size[KMG],low
- [KNL, x86_64] range under 4G. When crashkernel=X,high
- is passed, kernel could allocate physical memory region
- above 4G, that cause second kernel crash on system
- that require some amount of low memory, e.g. swiotlb
- requires at least 64M+32K low memory, also enough extra
- low memory is needed to make sure DMA buffers for 32-bit
- devices won't run out. Kernel would try to allocate at
- at least 256M below 4G automatically.
- This one let user to specify own low range under 4G
+ [KNL, X86-64, ARM64, RISCV, LoongArch] range under 4G.
+ When crashkernel=X,high is passed, kernel could allocate
+ physical memory region above 4G, that cause second kernel
+ crash on system that require some amount of low memory,
+ e.g. swiotlb requires at least 64M+32K low memory, also
+ enough extra low memory is needed to make sure DMA buffers
+ for 32-bit devices won't run out. Kernel would try to allocate
+ default size of memory below 4G automatically. The default
+ size is platform dependent.
+ --> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB)
+ --> arm64: 128MiB
+ --> riscv: 128MiB
+ --> loongarch: 128MiB
+ This one lets the user specify own low range under 4G
for second kernel instead.
0: to disable low allocation.
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
+ crashkernel=size[KMG],cma
+ [KNL, X86] Reserve additional crash kernel memory from
+ CMA. This reservation is usable by the first system's
+ userspace memory and kernel movable allocations (memory
+ balloon, zswap). Pages allocated from this memory range
+ will not be included in the vmcore so this should not
+ be used if dumping of userspace memory is intended and
+ it has to be expected that some movable kernel pages
+ may be missing from the dump.
+
+ A standard crashkernel reservation, as described above,
+ is still needed to hold the crash kernel and initrd.
+
+ This option increases the risk of a kdump failure: DMA
+ transfers configured by the first kernel may end up
+ corrupting the second kernel's memory.
+
+ This reservation method is intended for systems that
+ can't afford to sacrifice enough memory for standard
+ crashkernel reservation and where less reliable and
+ possibly incomplete kdump is preferable to no kdump at
+ all.
cryptomgr.notests
[KNL] Disable crypto self-tests
@@ -733,6 +1044,15 @@
cs89x0_media= [HW,NET]
Format: { rj45 | aui | bnc }
+ csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU
+ function call handling. When switched on,
+ additional debug data is printed to the console
+ in case a hanging CPU is detected, and that
+ CPU is pinged again in order to try to resolve
+ the hang situation. The default value of this
+ option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ Kconfig option.
+
dasd= [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
@@ -741,15 +1061,10 @@
Format: <port#>,<type>
See also Documentation/input/devices/joystick-parport.rst
- ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
- time. See
- Documentation/admin-guide/dynamic-debug-howto.rst for
- details. Deprecated, see dyndbg.
-
- debug [KNL] Enable kernel debugging (events log level).
+ debug [KNL,EARLY] Enable kernel debugging (events log level).
debug_boot_weak_hash
- [KNL] Enable printing [hashed] pointers early in the
+ [KNL,EARLY] Enable printing [hashed] pointers early in the
boot sequence. If enabled, we use a weak hash instead
of siphash to hash pointers. Use this option if you are
seeing instances of '(___ptrval___)') and need to see a
@@ -757,129 +1072,160 @@
insecure, please do not use on production kernels.
debug_locks_verbose=
- [KNL] verbose self-tests
- Format=<0|1>
+ [KNL] verbose locking self-tests
+ Format: <int>
Print debugging info while doing the locking API
self-tests.
- We default to 0 (no extra messages), setting it to
- 1 will print _a lot_ more information - normally
- only useful to kernel developers.
-
- debug_objects [KNL] Enable object debugging
+ Bitmask for the various LOCKTYPE_ tests. Defaults to 0
+ (no extra messages), setting it to -1 (all bits set)
+ will print _a_lot_ more information - normally only
+ useful to lockdep developers.
- no_debug_objects
- [KNL] Disable object debugging
+ debug_objects [KNL,EARLY] Enable object debugging
debug_guardpage_minorder=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this
parameter allows control of the order of pages that will
be intentionally kept free (and hence protected) by the
buddy allocator. Bigger value increase the probability
of catching random memory corruption, but reduce the
amount of memory for normal system use. The maximum
- possible value is MAX_ORDER/2. Setting this parameter
- to 1 or 2 should be enough to identify most random
- memory corruption problems caused by bugs in kernel or
- driver code when a CPU writes to (or reads from) a
- random memory location. Note that there exists a class
- of memory corruptions problems caused by buggy H/W or
- F/W or by drivers badly programing DMA (basically when
- memory is written at bus level and the CPU MMU is
- bypassed) which are not detectable by
- CONFIG_DEBUG_PAGEALLOC, hence this option will not help
- tracking down these problems.
+ possible value is MAX_PAGE_ORDER/2. Setting this
+ parameter to 1 or 2 should be enough to identify most
+ random memory corruption problems caused by bugs in
+ kernel or driver code when a CPU writes to (or reads
+ from) a random memory location. Note that there exists
+ a class of memory corruptions problems caused by buggy
+ H/W or F/W or by drivers badly programming DMA
+ (basically when memory is written at bus level and the
+ CPU MMU is bypassed) which are not detectable by
+ CONFIG_DEBUG_PAGEALLOC, hence this option will not
+ help tracking down these problems.
debug_pagealloc=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
- parameter enables the feature at boot time. In
- default, it is disabled. We can avoid allocating huge
- chunk of memory for debug pagealloc if we don't enable
- it at boot time and the system will work mostly same
- with the kernel built without CONFIG_DEBUG_PAGEALLOC.
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
+ enables the feature at boot time. By default, it is
+ disabled and the system will work mostly the same as a
+ kernel built without CONFIG_DEBUG_PAGEALLOC.
+ Note: to get most of debug_pagealloc error reports, it's
+ useful to also enable the page_owner functionality.
on: enable the feature
- debugpat [X86] Enable PAT debugging
+ debugfs= [KNL,EARLY] This parameter enables what is exposed to
+ userspace and debugfs internal clients.
+ Format: { on, no-mount, off }
+ on: All functions are enabled.
+ no-mount:
+ Filesystem is not registered but kernel clients can
+ access APIs and a crashkernel can be used to read
+ its content. There is nothing to mount.
+ off: Filesystem is not registered and clients
+ get a -EPERM as result when trying to register files
+ or directories within debugfs.
+ This is equivalent of the runtime functionality if
+ debugfs was not enabled in the kernel at all.
+ Default value is set in build-time with a kernel configuration.
- decnet.addr= [HW,NET]
- Format: <area>[,<node>]
- See also Documentation/networking/decnet.txt.
+ debugpat [X86] Enable PAT debugging
default_hugepagesz=
- [same as hugepagesz=] The size of the default
- HugeTLB page size. This is the size represented by
- the legacy /proc/ hugepages APIs, used for SHM, and
- default size when mounting hugetlbfs filesystems.
- Defaults to the default architecture's huge page size
- if not specified.
+ [HW] The size of the default HugeTLB page. This is
+ the size represented by the legacy /proc/ hugepages
+ APIs. In addition, this is the default hugetlb size
+ used for shmget(), mmap() and mounting hugetlbfs
+ filesystems. If not specified, defaults to the
+ architecture's default huge page size. Huge page
+ sizes are architecture dependent. See also
+ Documentation/admin-guide/mm/hugetlbpage.rst.
+ Format: size[KMG]
deferred_probe_timeout=
[KNL] Debugging option to set a timeout in seconds for
deferred probe to give up waiting on dependencies to
probe. Only specific dependencies (subsystems or
- drivers) that have opted in will be ignored. A timeout of 0
- will timeout at the end of initcalls. This option will also
+ drivers) that have opted in will be ignored. A timeout
+ of 0 will timeout at the end of initcalls. If the time
+ out hasn't expired, it'll be restarted by each
+ successful driver registration. This option will also
dump out devices still on the deferred probe list after
retrying.
+ delayacct [KNL] Enable per-task delay accounting
+
+ dell_smm_hwmon.ignore_dmi=
+ [HW] Continue probing hardware even if DMI data
+ indicates that the driver is running on unsupported
+ hardware.
+
+ dell_smm_hwmon.force=
+ [HW] Activate driver even if SMM BIOS signature does
+ not match list of supported models and enable otherwise
+ blacklisted features.
+
+ dell_smm_hwmon.power_status=
+ [HW] Report power status in /proc/i8k
+ (disabled by default).
+
+ dell_smm_hwmon.restricted=
+ [HW] Allow controlling fans only if SYS_ADMIN
+ capability is set.
+
+ dell_smm_hwmon.fan_mult=
+ [HW] Factor to multiply fan speed with.
+
+ dell_smm_hwmon.fan_max=
+ [HW] Maximum configurable fan speed.
+
+ dfltcc= [HW,S390]
+ Format: { on | off | def_only | inf_only | always }
+ on: s390 zlib hardware support for compression on
+ level 1 and decompression (default)
+ off: No s390 zlib hardware support
+ def_only: s390 zlib hardware support for deflate
+ only (compression on level 1)
+ inf_only: s390 zlib hardware support for inflate
+ only (decompression)
+ always: Same as 'on' but ignores the selected compression
+ level always using hardware support (used for debugging)
+
dhash_entries= [KNL]
Set number of hash buckets for dentry cache.
- disable_1tb_segments [PPC]
+ disable_1tb_segments [PPC,EARLY]
Disables the use of 1TB hash page table segments. This
causes the kernel to fall back to 256MB segments which
can be useful when debugging issues that require an SLB
miss to occur.
disable= [IPV6]
- See Documentation/networking/ipv6.txt.
+ See Documentation/networking/ipv6.rst.
- hardened_usercopy=
- [KNL] Under CONFIG_HARDENED_USERCOPY, whether
- hardening is enabled for this boot. Hardened
- usercopy checking is used to protect the kernel
- from reading or writing beyond known memory
- allocation boundaries as a proactive defense
- against bounds-checking flaws in the kernel's
- copy_to_user()/copy_from_user() interface.
- on Perform hardened usercopy checks (default).
- off Disable hardened usercopy checks.
-
- disable_radix [PPC]
+ disable_radix [PPC,EARLY]
Disable RADIX MMU mode on POWER9
- disable_cpu_apicid= [X86,APIC,SMP]
- Format: <int>
- The number of initial APIC ID for the
- corresponding CPU to be disabled at boot,
- mostly used for the kdump 2nd kernel to
- disable BSP to wake up multiple CPUs without
- causing system reset or hang due to sending
- INIT from AP to BSP.
-
- disable_counter_freezing [HW]
- Disable Intel PMU counter freezing feature.
- The feature only exists starting from
- Arch Perfmon v4 (Skylake and newer).
-
- disable_ddw [PPC/PSERIES]
- Disable Dynamic DMA Window support. Use this if
+ disable_tlbie [PPC]
+ Disable TLBIE instruction. Currently does not work
+ with KVM, with HASH MMU, or with coherent accelerators.
+
+ disable_ddw [PPC/PSERIES,EARLY]
+ Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.
disable_ipv6= [IPV6]
- See Documentation/networking/ipv6.txt.
+ See Documentation/networking/ipv6.rst.
- disable_mtrr_cleanup [X86]
+ disable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter disables that.
- disable_mtrr_trim [X86, Intel and AMD only]
+ disable_mtrr_trim [X86, Intel and AMD only,EARLY]
By default the kernel will trim any uncacheable
memory out of your available memory pool based on
MTRR settings. This parameter disables that behavior,
possibly causing your machine to run very slowly.
- disable_timer_pin_1 [X86]
+ disable_timer_pin_1 [X86,EARLY]
Disable PIN 1 of APIC timer
Can be useful to work around chipset bugs.
@@ -902,27 +1248,48 @@
The filter can be disabled or changed to another
driver later using sysfs.
+ reg_file_data_sampling=
+ [X86] Controls mitigation for Register File Data
+ Sampling (RFDS) vulnerability. RFDS is a CPU
+ vulnerability which may allow userspace to infer
+ kernel data values previously stored in floating point
+ registers, vector registers, or integer registers.
+ RFDS only affects Intel Atom processors.
+
+ on: Turns ON the mitigation.
+ off: Turns OFF the mitigation.
+
+ This parameter overrides the compile time default set
+ by CONFIG_MITIGATION_RFDS. Mitigation cannot be
+ disabled when other VERW based mitigations (like MDS)
+ are enabled. In order to disable RFDS mitigation all
+ VERW based mitigations need to be disabled.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
+
+ driver_async_probe= [KNL]
+ List of driver names to be probed asynchronously. *
+ matches with all driver names. If * is specified, the
+ rest of the listed driver names are those that will NOT
+ match the *.
+ Format: <driver_name1>,<driver_name2>...
+
drm.edid_firmware=[<connector>:]<file>[,[<connector>:]<file>]
Broken monitors, graphic adapters, KVMs and EDIDless
panels may send no or incorrect EDID data sets.
This parameter allows to specify an EDID data sets
in the /lib/firmware directory that are used instead.
- Generic built-in EDID data sets are used, if one of
- edid/1024x768.bin, edid/1280x1024.bin,
- edid/1680x1050.bin, or edid/1920x1080.bin is given
- and no file with the same name exists. Details and
- instructions how to build your own EDID data are
- available in Documentation/EDID/HOWTO.txt. An EDID
- data set will only be used for a particular connector,
- if its name and a colon are prepended to the EDID
- name. Each connector may use a unique EDID data
- set by separating the files with a comma. An EDID
+ An EDID data set will only be used for a particular
+ connector, if its name and a colon are prepended to
+ the EDID name. Each connector may use a unique EDID
+ data set by separating the files with a comma. An EDID
data set with no connector name will be used for
any connectors not explicitly specified.
dscc4.setup= [NET]
- dt_cpu_ftrs= [PPC]
+ dt_cpu_ftrs= [PPC,EARLY]
Format: {"off" | "known"}
Control how the dt_cpu_ftrs device-tree binding is
used for CPU feature discovery and setup (if it
@@ -937,34 +1304,22 @@
what data is available or for reverse-engineering.
dyndbg[="val"] [KNL,DYNAMIC_DEBUG]
- module.dyndbg[="val"]
+ <module>.dyndbg[="val"]
Enable debug messages at boot time. See
Documentation/admin-guide/dynamic-debug-howto.rst
for details.
- nompx [X86] Disables Intel Memory Protection Extensions.
- See Documentation/x86/intel_mpx.txt for more
- information about the feature.
-
- nopku [X86] Disable Memory Protection Keys CPU feature found
- in some Intel CPUs.
-
- module.async_probe [KNL]
- Enable asynchronous probe on this module.
-
- early_ioremap_debug [KNL]
+ early_ioremap_debug [KNL,EARLY]
Enable debug messages in early_ioremap support. This
is useful for tracking down temporary early mappings
which are not unmapped.
- earlycon= [KNL] Output early console device and options.
+ earlycon= [KNL,EARLY] Output early console device and options.
- [ARM64] The early console is determined by the
- stdout-path property in device tree's chosen node,
- or determined by the ACPI SPCR table.
-
- [X86] When used with no options the early console is
- determined by the ACPI SPCR table.
+ When used with no options, the early console is
+ determined by stdout-path property in device tree's
+ chosen node or the ACPI SPCR table if supported by
+ the platform.
cdns,<addr>[,options]
Start an early, polled-mode console on a Cadence
@@ -973,10 +1328,10 @@
specified, the serial port must already be setup and
configured.
- uart[8250],io,<addr>[,options]
- uart[8250],mmio,<addr>[,options]
- uart[8250],mmio32,<addr>[,options]
- uart[8250],mmio32be,<addr>[,options]
+ uart[8250],io,<addr>[,options[,uartclk]]
+ uart[8250],mmio,<addr>[,options[,uartclk]]
+ uart[8250],mmio32,<addr>[,options[,uartclk]]
+ uart[8250],mmio32be,<addr>[,options[,uartclk]]
uart[8250],0x<addr>[,options]
Start an early, polled-mode console on the 8250/16550
UART at the specified I/O port or MMIO address.
@@ -985,7 +1340,9 @@
If none of [io|mmio|mmio32|mmio32be], <addr> is assumed
to be equivalent to 'mmio'. 'options' are specified
in the same format described for "console=ttyS<n>"; if
- unspecified, the h/w is not initialized.
+ unspecified, the h/w is not initialized. 'uartclk' is
+ the uart clock frequency; if unspecified, it is set
+ to 'BASE_BAUD' * 16.
pl011,<addr>
pl011,mmio32,<addr>
@@ -996,6 +1353,11 @@
the driver will use only 32-bit accessors to read/write
the device registers.
+ liteuart,<addr>
+ Start an early console on a litex serial port at the
+ specified address. The serial port must already be
+ setup and configured. Options are not yet supported.
+
meson,<addr>
Start an early, polled-mode console on a meson serial
port at the specified address. The serial port must
@@ -1020,6 +1382,16 @@
specified address. The serial port must already be
setup and configured. Options are not yet supported.
+ rda,<addr>
+ Start an early, polled-mode console on a serial port
+ of an RDA Micro SoC, such as RDA8810PL, at the
+ specified address. The serial port must already be
+ setup and configured. Options are not yet supported.
+
+ sbi
+ Use RISC-V SBI (Supervisor Binary Interface) for early
+ console.
+
smh Use ARM semihosting calls for early console.
s3c2410,<addr>
@@ -1047,6 +1419,12 @@
A valid base address must be provided, and the serial
port must already be setup and configured.
+ ec_imx21,<addr>
+ ec_imx6q,<addr>
+ Start an early, polled-mode, output-only console on the
+ Freescale i.MX UART at the specified address. The UART
+ must already be setup and configured.
+
ar3700_uart,<addr>
Start an early, polled-mode console on the
Armada 3700 serial port at the specified
@@ -1059,26 +1437,43 @@
specified address. The serial port must already be
setup and configured. Options are not yet supported.
- earlyprintk= [X86,SH,ARM,M68k,S390]
+ efifb,[options]
+ Start an early, unaccelerated console on the EFI
+ memory mapped framebuffer (if available). On cache
+ coherent non-x86 systems that use system memory for
+ the framebuffer, pass the 'ram' option so that it is
+ mapped with the correct attributes.
+
+ linflex,<addr>
+ Use early console provided by Freescale LINFlexD UART
+ serial driver for NXP S32V234 SoCs. A valid base
+ address must be provided, and the serial port must
+ already be setup and configured.
+
+ earlyprintk= [X86,SH,ARM,M68k,S390,UM,EARLY]
earlyprintk=vga
- earlyprintk=efi
earlyprintk=sclp
earlyprintk=xen
earlyprintk=serial[,ttySn[,baudrate]]
earlyprintk=serial[,0x...[,baudrate]]
earlyprintk=ttySn[,baudrate]
earlyprintk=dbgp[debugController#]
- earlyprintk=pciserial[,force],bus:device.function[,baudrate]
+ earlyprintk=mmio32,membase[,{nocfg|baudrate}]
+ earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}]
earlyprintk=xdbc[xhciController#]
+ earlyprintk=bios
earlyprintk is useful when the kernel crashes before
the normal console is initialized. It is not enabled by
default because it has some cosmetic problems.
+ Use "nocfg" to skip UART configuration, assume
+ BIOS/firmware has configured UART correctly.
+
Append ",keep" to not disable it when the real console
takes over.
- Only one of vga, efi, serial, or usb debug port can
+ Only one of vga, serial, or usb debug port can
be used at a time.
Currently only ttyS0 and ttyS1 may be specified by
@@ -1093,13 +1488,15 @@
Interaction with the standard serial driver is not
very good.
- The VGA and EFI output is eventually overwritten by
+ The VGA output is eventually overwritten by
the real console.
- The xen output can only be used by Xen PV guests.
+ The xen option can only be used in Xen domains.
The sclp output can only be used on s390.
+ The bios output can only be used on SuperH.
+
The optional "force" to "pciserial" enables use of a
PCI device even when its classcode is not of the
UART class.
@@ -1112,73 +1509,68 @@
force: enforce the use of EDAC to report H/W event.
default: on.
- ekgdboc= [X86,KGDB] Allow early kernel console debugging
- ekgdboc=kbd
-
- This is designed to be used in conjunction with
- the boot argument: earlyprintk=vga
-
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
- efi= [EFI]
- Format: { "old_map", "nochunk", "noruntime", "debug" }
- old_map [X86-64]: switch to the old ioremap-based EFI
- runtime services mapping. 32-bit still uses this one by
- default.
+ efi= [EFI,EARLY]
+ Format: { "debug", "disable_early_pci_dma",
+ "nochunk", "noruntime", "nosoftreserve",
+ "novamap", "no_disable_early_pci_dma" }
+ debug: enable misc debug output.
+ disable_early_pci_dma: disable the busmaster bit on all
+ PCI bridges while in the EFI boot stub.
nochunk: disable reading files in "chunks" in the EFI
boot stub, as chunking can cause problems with some
firmware implementations.
noruntime : disable EFI runtime services support
- debug: enable misc debug output
-
- efi_no_storage_paranoia [EFI; X86]
+ nosoftreserve: The EFI_MEMORY_SP (Specific Purpose)
+ attribute may cause the kernel to reserve the
+ memory range for a memory mapping driver to
+ claim. Specify efi=nosoftreserve to disable this
+ reservation and treat the memory by its base type
+ (i.e. EFI_CONVENTIONAL_MEMORY / "System RAM").
+ novamap: do not call SetVirtualAddressMap().
+ no_disable_early_pci_dma: Leave the busmaster bit set
+ on all PCI bridges while in the EFI boot stub
+
+ efi_no_storage_paranoia [EFI,X86,EARLY]
Using this parameter you can use more than 50% of
your efi variable storage. Use this parameter only if
you are really sure that your UEFI does sane gc and
fulfills the spec otherwise your board may brick.
- efi_fake_mem= nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI; X86]
- Add arbitrary attribute to specific memory range by
- updating original EFI memory map.
- Region of memory which aa attribute is added to is
- from ss to ss+nn.
- If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
- is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
- attribute is added to range 0x100000000-0x180000000 and
- 0x10a0000000-0x1120000000.
-
- Using this parameter you can do debugging of EFI memmap
- related feature. For example, you can do debugging of
- Address Range Mirroring feature even if your box
- doesn't support it.
-
efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT
that is to be dynamically loaded by Linux. If there are
multiple variables with the same name but with different
vendor GUIDs, all of them will be loaded. See
- Documentation/acpi/ssdt-overlays.txt for details.
+ Documentation/admin-guide/acpi/ssdt-overlays.rst for details.
eisa_irq_edge= [PARISC,HW]
See header of drivers/parisc/eisa.c.
+ ekgdboc= [X86,KGDB,EARLY] Allow early kernel console debugging
+ Format: ekgdboc=kbd
+
+ This is designed to be used in conjunction with
+ the boot argument: earlyprintk=vga
+
+ This parameter works in place of the kgdboc parameter
+ but can only be used if the backing tty is available
+ very early in the boot process. For early debugging
+ via a serial port see kgdboc_earlycon instead.
+
elanfreq= [X86-32]
See comment before function elanfreq_setup() in
arch/x86/kernel/cpu/cpufreq/elanfreq.c.
- elevator= [IOSCHED]
- Format: {"cfq" | "deadline" | "noop"}
- See Documentation/block/cfq-iosched.txt and
- Documentation/block/deadline-iosched.txt for details.
-
- elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
+ elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390,EARLY]
Specifies physical address of start of kernel core
image elf header and optionally the size. Generally
kexec loader will pass this option to capture kernel.
- See Documentation/kdump/kdump.txt for details.
+ See Documentation/admin-guide/kdump/kdump.rst for details.
- enable_mtrr_cleanup [X86]
+ enable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter enables that.
@@ -1189,13 +1581,14 @@
(in particular on some ATI chipsets).
The kernel tries to set a reasonable default.
- enforcing [SELINUX] Set initial enforcing status.
+ enforcing= [SELINUX] Set initial enforcing status.
Format: {"0" | "1"}
See security/selinux/Kconfig help text.
0 -- permissive (log only, no denials).
1 -- enforcing (deny and log).
Default value is 0.
- Value can be changed at runtime via /selinux/enforce.
+ Value can be changed at runtime via
+ /sys/fs/selinux/enforce.
erst_disable [ACPI]
Disable Error Record Serialization Table (ERST)
@@ -1210,21 +1603,30 @@
Permit 'security.evm' to be updated regardless of
current integrity status.
+ early_page_ext [KNL,EARLY] Enforces page_ext initialization to earlier
+ stages so cover more early boot allocations.
+ Please note that as side effect some optimizations
+ might be disabled to achieve that (e.g. parallelized
+ memory initialization is disabled) so the boot process
+ might take longer, especially on systems with a lot of
+ memory. Available with CONFIG_PAGE_EXTENSION=y.
+
failslab=
+ fail_usercopy=
fail_page_alloc=
+ fail_skb_realloc=
fail_make_request=[KNL]
General fault injection mechanism.
Format: <interval>,<probability>,<space>,<times>
See also Documentation/fault-injection/.
- floppy= [HW]
- See Documentation/blockdev/floppy.txt.
+ fb_tunnels= [NET]
+ Format: { initns | none }
+ See Documentation/admin-guide/sysctl/net.rst for
+ fb_tunnels_only_for_init_ns
- force_pal_cache_flush
- [IA-64] Avoid check_sal_cache_flush which may hang on
- buggy SAL_CACHE_FLUSH implementations. Using this
- parameter will force ia64_sal_cache_flush to call
- ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
+ floppy= [HW]
+ See Documentation/admin-guide/blockdev/floppy.rst.
forcepae [X86-32]
Forcefully enable Physical Address Extension (PAE).
@@ -1233,21 +1635,60 @@
Warning: use of this parameter will taint the kernel
and may cause unknown problems.
+ fred= [X86-64]
+ Enable/disable Flexible Return and Event Delivery.
+ Format: { on | off }
+ on: enable FRED when it's present.
+ off: disable FRED, the default setting.
+
ftrace=[tracer]
[FTRACE] will set and start the specified tracer
as early as possible in order to facilitate early
boot debugging.
- ftrace_dump_on_oops[=orig_cpu]
+ ftrace_boot_snapshot
+ [FTRACE] On boot up, a snapshot will be taken of the
+ ftrace ring buffer that can be read at:
+ /sys/kernel/tracing/snapshot.
+ This is useful if you need tracing information from kernel
+ boot up that is likely to be overridden by user space
+ start up functionality.
+
+ Optionally, the snapshot can also be defined for a tracing
+ instance that was created by the trace_instance= command
+ line parameter.
+
+ trace_instance=foo,sched_switch ftrace_boot_snapshot=foo
+
+ The above will cause the "foo" tracing instance to trigger
+ a snapshot at the end of boot up.
+
+ ftrace_dump_on_oops[=2(orig_cpu) | =<instance>][,<instance> |
+ ,<instance>=2(orig_cpu)]
[FTRACE] will dump the trace buffers on oops.
- If no parameter is passed, ftrace will dump
- buffers of all CPUs, but if you pass orig_cpu, it will
- dump only the buffer of the CPU that triggered the
- oops.
+ If no parameter is passed, ftrace will dump global
+ buffers of all CPUs, if you pass 2 or orig_cpu, it
+ will dump only the buffer of the CPU that triggered
+ the oops, or the specific instance will be dumped if
+ its name is passed. Multiple instance dump is also
+ supported, and instances are separated by commas. Each
+ instance supports only dump on CPU that triggered the
+ oops by passing 2 or orig_cpu to it.
+
+ ftrace_dump_on_oops=foo=orig_cpu
+
+ The above will dump only the buffer of "foo" instance
+ on CPU that triggered the oops.
+
+ ftrace_dump_on_oops,foo,bar=orig_cpu
+
+ The above will dump global buffer on all CPUs, the
+ buffer of "foo" instance on all CPUs and the buffer
+ of "bar" instance on CPU that triggered the oops.
ftrace_filter=[function-list]
[FTRACE] Limit the functions traced by the function
- tracer at boot up. function-list is a comma separated
+ tracer at boot up. function-list is a comma-separated
list of functions. This list can be changed at run
time by the set_ftrace_filter file in the debugfs
tracing directory.
@@ -1261,13 +1702,13 @@
ftrace_graph_filter=[function-list]
[FTRACE] Limit the top level callers functions traced
by the function graph tracer at boot up.
- function-list is a comma separated list of functions
+ function-list is a comma-separated list of functions
that can be changed at run time by the
set_graph_function file in the debugfs tracing directory.
ftrace_graph_notrace=[function-list]
[FTRACE] Do not trace from the functions specified in
- function-list. This list is a comma separated list of
+ function-list. This list is a comma-separated list of
functions that can be changed at run time by the
set_graph_notrace file in the debugfs tracing directory.
@@ -1277,6 +1718,43 @@
can be changed at run time by the max_graph_depth file
in the tracefs tracing directory. default: 0 (no limit)
+ fw_devlink= [KNL,EARLY] Create device links between consumer and supplier
+ devices by scanning the firmware to infer the
+ consumer/supplier relationships. This feature is
+ especially useful when drivers are loaded as modules as
+ it ensures proper ordering of tasks like device probing
+ (suppliers first, then consumers), supplier boot state
+ clean up (only after all consumers have probed),
+ suspend/resume & runtime PM (consumers first, then
+ suppliers).
+ Format: { off | permissive | on | rpm }
+ off -- Don't create device links from firmware info.
+ permissive -- Create device links from firmware info
+ but use it only for ordering boot state clean
+ up (sync_state() calls).
+ on -- Create device links from firmware info and use it
+ to enforce probe and suspend/resume ordering.
+ rpm -- Like "on", but also use to order runtime PM.
+
+ fw_devlink.strict=<bool>
+ [KNL,EARLY] Treat all inferred dependencies as mandatory
+ dependencies. This only applies for fw_devlink=on|rpm.
+ Format: <bool>
+
+ fw_devlink.sync_state =
+ [KNL,EARLY] When all devices that could probe have finished
+ probing, this parameter controls what to do with
+ devices that haven't yet received their sync_state()
+ calls.
+ Format: { strict | timeout }
+ strict -- Default. Continue waiting on consumers to
+ probe successfully.
+ timeout -- Give up waiting on consumers and call
+ sync_state() on any devices that haven't yet
+ received their sync_state() calls after
+ deferred_probe_timeout has expired or by
+ late_initcall() if !CONFIG_MODULES.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
@@ -1285,10 +1763,32 @@
gamma= [HW,DRM]
- gart_fix_e820= [X86_64] disable the fix e820 for K8 GART
+ gart_fix_e820= [X86-64,EARLY] disable the fix e820 for K8 GART
Format: off | on
default: on
+ gather_data_sampling=
+ [X86,INTEL,EARLY] Control the Gather Data Sampling (GDS)
+ mitigation.
+
+ Gather Data Sampling is a hardware vulnerability which
+ allows unprivileged speculative access to data which was
+ previously stored in vector registers.
+
+ This issue is mitigated by default in updated microcode.
+ The mitigation may have a performance impact but can be
+ disabled. On systems without the microcode mitigation
+ disabling AVX serves as a mitigation.
+
+ force: Disable AVX to mitigate systems without
+ microcode mitigation. No effect if the microcode
+ mitigation is present. Known to cause crashes in
+ userspace with buggy AVX enumeration.
+
+ off: Disable GDS mitigation.
+
+ gbpages [X86] Use GB pages for kernel direct mappings.
+
gcov_persist= [GCOV] When non-zero (default), profiling data for
kernel modules is saved and remains accessible via
debugfs, even when the module is unloaded/reloaded.
@@ -1299,6 +1799,12 @@
Don't use this when you are not running on the
android emulator
+ gpio-mockup.gpio_mockup_ranges
+ [HW] Sets the ranges of gpiochip of for this device.
+ Format: <start1>,<end1>,<start2>,<end2>...
+ gpio-mockup.gpio_mockup_named_lines
+ [HW] Let the driver know GPIO lines should be named.
+
gpt [EFI] Forces disk with valid GPT signature but
invalid Protective MBR to be treated as GPT. If the
primary GPT is corrupted, it enables the backup/alternate
@@ -1322,22 +1828,50 @@
Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
Default: 1024
- gpio-mockup.gpio_mockup_ranges
- [HW] Sets the ranges of gpiochip of for this device.
- Format: <start1>,<end1>,<start2>,<end2>...
+ hardened_usercopy=
+ [KNL] Under CONFIG_HARDENED_USERCOPY, whether
+ hardening is enabled for this boot. Hardened
+ usercopy checking is used to protect the kernel
+ from reading or writing beyond known memory
+ allocation boundaries as a proactive defense
+ against bounds-checking flaws in the kernel's
+ copy_to_user()/copy_from_user() interface.
+ The default is determined by
+ CONFIG_HARDENED_USERCOPY_DEFAULT_ON.
+ on Perform hardened usercopy checks.
+ off Disable hardened usercopy checks.
hardlockup_all_cpu_backtrace=
[KNL] Should the hard-lockup detector generate
backtraces on all cpus.
- Format: <integer>
+ Format: 0 | 1
+
+ hash_pointers=
+ [KNL,EARLY]
+ By default, when pointers are printed to the console
+ or buffers via the %p format string, that pointer is
+ "hashed", i.e. obscured by hashing the pointer value.
+ This is a security feature that hides actual kernel
+ addresses from unprivileged users, but it also makes
+ debugging the kernel more difficult since unequal
+ pointers can no longer be compared. The choices are:
+ Format: { auto | always | never }
+ Default: auto
+
+ auto - Hash pointers unless slab_debug is enabled.
+ always - Always hash pointers (even if slab_debug is
+ enabled).
+ never - Never hash pointers. This option should only
+ be specified when debugging the kernel. Do
+ not use on production kernels. The boot
+ param "no_hash_pointers" is an alias for
+ this mode.
hashdist= [KNL,NUMA] Large hashes allocated during boot
are distributed across NUMA nodes. Defaults on
for 64-bit NUMA, off otherwise.
Format: 0 | 1 (for off | on)
- hcl= [IA-64] SGI's Hardware Graph compatibility layer
-
hd= [EIDE] (E)IDE hard drive subsystem geometry
Format: <cyl>,<head>,<sect>
@@ -1346,7 +1880,34 @@
corresponding firmware-first mode error processing
logic will be disabled.
- highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
+ hibernate= [HIBERNATION]
+ noresume Don't check if there's a hibernation image
+ present during boot.
+ nocompress Don't compress/decompress hibernation images.
+ no Disable hibernation and resume.
+ protect_image Turn on image protection during restoration
+ (that will set all pages holding image data
+ during restoration read-only).
+
+ hibernate.compressor= [HIBERNATION] Compression algorithm to be
+ used with hibernation.
+ Format: { lzo | lz4 }
+ Default: lzo
+
+ lzo: Select LZO compression algorithm to
+ compress/decompress hibernation image.
+
+ lz4: Select LZ4 compression algorithm to
+ compress/decompress hibernation image.
+
+ hibernate.pm_test_delay=
+ [HIBERNATION]
+ Sets the number of seconds to remain in a hibernation test
+ mode before resuming the system (see
+ /sys/power/pm_test). Only available when CONFIG_PM_DEBUG
+ is set. Default value is 5.
+
+ highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
size on bigger boxes.
@@ -1355,11 +1916,21 @@
Valid parameters: "on", "off"
Default: "on"
- hisax= [HW,ISDN]
- See Documentation/isdn/README.HiSax.
-
hlt [BUGS=ARM,SH]
+ hostname= [KNL,EARLY] Set the hostname (aka UTS nodename).
+ Format: <string>
+ This allows setting the system's hostname during early
+ startup. This sets the name returned by gethostname.
+ Using this parameter to set the hostname makes it
+ possible to ensure the hostname is correctly set before
+ any userspace processes run, avoiding the possibility
+ that a process may call gethostname before the hostname
+ has been explicitly set, resulting in the calling
+ process getting an incorrect result. The string must
+ not exceed the maximum allowed hostname length (usually
+ 64 characters) and will be truncated otherwise.
+
hpet= [X86-32,HPET] option to control HPET usage
Format: { enable (default) | disable | force |
verbose }
@@ -1371,19 +1942,78 @@
hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
- hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
- hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
- On x86-64 and powerpc, this option can be specified
- multiple times interleaved with hugepages= to reserve
- huge pages of different sizes. Valid pages sizes on
- x86-64 are 2M (when the CPU supports "pse") and 1G
- (when the CPU supports the "pdpe1gb" cpuinfo flag).
+ hugepages= [HW,EARLY] Number of HugeTLB pages to allocate at boot.
+ If this follows hugepagesz (below), it specifies
+ the number of pages of hugepagesz to be allocated.
+ If this is the first HugeTLB parameter on the command
+ line, it specifies the number of pages to allocate for
+ the default huge page size. If using node format, the
+ number of pages to allocate per-node can be specified.
+ See also Documentation/admin-guide/mm/hugetlbpage.rst.
+ Format: <integer> or (node format)
+ <node>:<integer>[,<node>:<integer>]
+
+ hugepagesz=
+ [HW,EARLY] The size of the HugeTLB pages. This is
+ used in conjunction with hugepages (above) to
+ allocate huge pages of a specific size at boot. The
+ pair hugepagesz=X hugepages=Y can be specified once
+ for each supported huge page size. Huge page sizes
+ are architecture dependent. See also
+ Documentation/admin-guide/mm/hugetlbpage.rst.
+ Format: size[KMG]
+
+ hugepage_alloc_threads=
+ [HW] The number of threads that should be used to
+ allocate hugepages during boot. This option can be
+ used to improve system bootup time when allocating
+ a large amount of huge pages.
+ The default value is 25% of the available hardware threads.
+
+ Note that this parameter only applies to non-gigantic huge pages.
+
+ hugetlb_cma= [HW,CMA,EARLY] The size of a CMA area used for allocation
+ of gigantic hugepages. Or using node format, the size
+ of a CMA area per node can be specified.
+ Format: nn[KMGTPE] or (node format)
+ <node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
+
+ Reserve a CMA area of given size and allocate gigantic
+ hugepages using the CMA allocator. If enabled, the
+ boot-time allocation of gigantic hugepages is skipped.
+
+ hugetlb_cma_only=
+ [HW,CMA,EARLY] When allocating new HugeTLB pages, only
+ try to allocate from the CMA areas.
+
+ This option does nothing if hugetlb_cma= is not also
+ specified.
+
+ hugetlb_free_vmemmap=
+ [KNL] Requires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+ enabled.
+ Control if HugeTLB Vmemmap Optimization (HVO) is enabled.
+ Allows heavy hugetlb users to free up some more
+ memory (7 * PAGE_SIZE for each 2MB hugetlb page).
+ Format: { on | off (default) }
+
+ on: enable HVO
+ off: disable HVO
+
+ Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
+ the default is on.
+
+ Note that the vmemmap pages may be allocated from the added
+ memory block itself when memory_hotplug.memmap_on_memory is
+ enabled, those vmemmap pages cannot be optimized even if this
+ feature is enabled. Other vmemmap pages not allocated from
+ the added memory block itself do not be affected.
hung_task_panic=
[KNL] Should the hung task detector generate panics.
- Format: <integer>
+ Format: 0 | 1
- A nonzero value instructs the kernel to panic when a
+ A value of 1 instructs the kernel to panic when a
hung task is detected. The default value is controlled
by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
option. The value selected by this boot parameter can
@@ -1395,15 +2025,16 @@
If specified, z/VM IUCV HVC accepts connections
from listed z/VM user IDs only.
- hv_nopvspin [X86,HYPER_V] Disables the paravirt spinlock optimizations
- which allow the hypervisor to 'idle' the
- guest on lock contention.
+ hv_nopvspin [X86,HYPER_V,EARLY]
+ Disables the paravirt spinlock optimizations
+ which allow the hypervisor to 'idle' the guest
+ on lock contention.
- keep_bootcon [KNL]
- Do not unregister boot console at start. This is only
- useful for debugging when something happens in the window
- between unregistering the boot console and initializing
- the real console.
+ hw_protection= [HW]
+ Format: reboot | shutdown
+
+ Hardware protection action taken on critical events like
+ overtemperature or imminent voltage loss.
i2c_bus= [HW] Override the default board specific I2C bus speed
or register an additional I2C bus that is not
@@ -1411,6 +2042,28 @@
Format:
<bus_id>,<clkrate>
+ i2c_touchscreen_props= [HW,ACPI,X86]
+ Set device-properties for ACPI-enumerated I2C-attached
+ touchscreen, to e.g. fix coordinates of upside-down
+ mounted touchscreens. If you need this option please
+ submit a drivers/platform/x86/touchscreen_dmi.c patch
+ adding a DMI quirk for this.
+
+ Format:
+ <ACPI_HW_ID>:<prop_name>=<val>[:prop_name=val][:...]
+ Where <val> is one of:
+ Omit "=<val>" entirely Set a boolean device-property
+ Unsigned number Set a u32 device-property
+ Anything else Set a string device-property
+
+ Examples (split over multiple lines):
+ i2c_touchscreen_props=GDIX1001:touchscreen-inverted-x:
+ touchscreen-inverted-y
+
+ i2c_touchscreen_props=MSSL1680:touchscreen-size-x=1920:
+ touchscreen-size-y=1080:touchscreen-inverted-y:
+ firmware-name=gsl1680-vendor-model.fw:silead,home-button
+
i8042.debug [HW] Toggle i8042 debug mode
i8042.unmask_kbd_data
[HW] Enable printing of interrupt data from the KBD port
@@ -1439,20 +2092,11 @@
architectures force reset to be always executed
i8042.unlock [HW] Unlock (ignore) the keylock
i8042.kbdreset [HW] Reset device connected to KBD port
+ i8042.probe_defer
+ [HW] Allow deferred probing upon i8042 probe errors
i810= [HW,DRM]
- i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
- indicates that the driver is running on unsupported
- hardware.
- i8k.force [HW] Activate i8k driver even if SMM BIOS signature
- does not match list of supported models.
- i8k.power_status
- [HW] Report power status in /proc/i8k
- (disabled by default)
- i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
- capability is set.
-
i915.invert_brightness=
[DRM] Invert the sense of the variable that is used to
set the brightness of the panel backlight. Normally a
@@ -1467,42 +2111,48 @@
0 -- machine default
1 -- force brightness inversion
+ ia32_emulation= [X86-64]
+ Format: <bool>
+ When true, allows loading 32-bit programs and executing 32-bit
+ syscalls, essentially overriding IA32_EMULATION_DEFAULT_DISABLED at
+ boot time. When false, unconditionally disables IA32 emulation.
+
icn= [HW,ISDN]
Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
- ide-core.nodma= [HW] (E)IDE subsystem
- Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
- .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
- .cdrom .chs .ignore_cable are additional options
- See Documentation/ide/ide.txt.
- ide-generic.probe-mask= [HW] (E)IDE subsystem
- Format: <int>
- Probe mask for legacy ISA IDE ports. Depending on
- platform up to 6 ports are supported, enabled by
- setting corresponding bits in the mask to 1. The
- default value is 0x0, which has a special meaning.
- On systems that have PCI, it triggers scanning the
- PCI bus for the first and the second port, which
- are then probed. On systems without PCI the value
- of 0x0 enables probing the two first ports as if it
- was 0x3.
-
- ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
- Claim all unknown PCI IDE storage controllers.
-
- idle= [X86]
+ idle= [X86,EARLY]
Format: idle=poll, idle=halt, idle=nomwait
- Poll forces a polling idle loop that can slightly
- improve the performance of waking up a idle CPU, but
- will use a lot of power and make the system run hot.
- Not recommended.
+
+ idle=poll: Don't do power saving in the idle loop
+ using HLT, but poll for rescheduling event. This will
+ make the CPUs eat a lot more power, but may be useful
+ to get slightly better performance in multiprocessor
+ benchmarks. It also makes some profiling using
+ performance counters more accurate. Please note that
+ on systems with MONITOR/MWAIT support (like Intel
+ EM64T CPUs) this option has no performance advantage
+ over the normal idle loop. It may also interact badly
+ with hyperthreading.
+
idle=halt: Halt is forced to be used for CPU idle.
In such case C2/C3 won't be used again.
+
idle=nomwait: Disable mwait for CPU C-states
+ idxd.sva= [HW]
+ Format: <bool>
+ Allow force disabling of Shared Virtual Memory (SVA)
+ support for the idxd driver. By default it is set to
+ true (1).
+
+ idxd.tc_override= [HW]
+ Format: <bool>
+ Allow override of default traffic class configuration
+ for the device. By default it is set to false (0).
+
ieee754= [MIPS] Select IEEE Std 754 conformance mode
- Format: { strict | legacy | 2008 | relaxed }
+ Format: { strict | legacy | 2008 | relaxed | emulated }
Default: strict
Choose which programs will be accepted for execution
@@ -1522,6 +2172,8 @@
by the FPU
relaxed accept any binaries regardless of whether
supported by the FPU
+ emulated accept any binaries but enable FPU emulator
+ if binary mode is unsupported by the FPU.
The FPU emulator is always able to support both NaN
encodings, so if no FPU hardware is present or it has
@@ -1536,7 +2188,7 @@
mode generally follows that for the NaN encoding,
except where unsupported by hardware.
- ignore_loglevel [KNL]
+ ignore_loglevel [KNL,EARLY]
Ignore loglevel setting - this will print /all/
kernel messages to the console. Useful for debugging.
We also add it as printk module parameter, so users
@@ -1555,7 +2207,7 @@
Format: { "off" | "enforce" | "fix" | "log" }
default: "enforce"
- ima_appraise_tcb [IMA]
+ ima_appraise_tcb [IMA] Deprecated. Use ima_policy= instead.
The builtin appraise policy appraises all files
owned by uid=0.
@@ -1574,7 +2226,7 @@
ima_policy= [IMA]
The builtin policies to load during IMA setup.
Format: "tcb | appraise_tcb | secure_boot |
- fail_securely"
+ fail_securely | critical_data"
The "tcb" policy measures all programs exec'd, files
mmap'd for exec, and all files opened with the read
@@ -1582,8 +2234,7 @@
uid=0.
The "appraise_tcb" policy appraises the integrity of
- all files owned by root. (This is the equivalent
- of ima_appraise_tcb.)
+ all files owned by root.
The "secure_boot" policy appraises the integrity
of files (eg. kexec kernel image, kernel modules,
@@ -1594,6 +2245,9 @@
filesystems with the SB_I_UNVERIFIABLE_SIGNATURE
flag.
+ The "critical_data" policy measures kernel integrity
+ critical data.
+
ima_tcb [IMA] Deprecated. Use ima_policy= instead.
Load a policy which meets the needs of the Trusted
Computing Base. This means IMA will measure all
@@ -1602,7 +2256,8 @@
ima_template= [IMA]
Select one of defined IMA measurements template formats.
- Formats: { "ima" | "ima-ng" | "ima-sig" }
+ Formats: { "ima" | "ima-ng" | "ima-ngv2" | "ima-sig" |
+ "ima-sigv2" }
Default: "ima-ng"
ima_template_fmt=
@@ -1626,6 +2281,28 @@
different crypto accelerators. This option can be used
to achieve best performance for particular HW.
+ ima= [IMA] Enable or disable IMA
+ Format: { "off" | "on" }
+ Default: "on"
+ Note that disabling IMA is limited to kdump kernel.
+
+ indirect_target_selection= [X86,Intel] Mitigation control for Indirect
+ Target Selection(ITS) bug in Intel CPUs. Updated
+ microcode is also required for a fix in IBPB.
+
+ on: Enable mitigation (default).
+ off: Disable mitigation.
+ force: Force the ITS bug and deploy default
+ mitigation.
+ vmexit: Only deploy mitigation if CPU is affected by
+ guest/host isolation part of ITS.
+ stuff: Deploy RSB-fill mitigation when retpoline is
+ also deployed. Otherwise, deploy the default
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
+
init= [KNL]
Format: <full_path>
Run specified binary instead of /sbin/init as init
@@ -1639,9 +2316,37 @@
initcall functions. Useful for debugging built-in
modules and initcalls.
- initrd= [BOOT] Specify the location of the initial ramdisk
+ initramfs_async= [KNL]
+ Format: <bool>
+ Default: 1
+ This parameter controls whether the initramfs
+ image is unpacked asynchronously, concurrently
+ with devices being probed and
+ initialized. This should normally just work,
+ but as a debugging aid, one can get the
+ historical behaviour of the initramfs
+ unpacking being completed before device_ and
+ late_ initcalls.
+
+ initrd= [BOOT,EARLY] Specify the location of the initial ramdisk
+
+ initrdmem= [KNL,EARLY] Specify a physical address and size from which to
+ load the initrd. If an initrd is compiled in or
+ specified in the bootparams, it takes priority over this
+ setting.
+ Format: ss[KMG],nn[KMG]
+ Default is 0, 0
+
+ init_on_alloc= [MM,EARLY] Fill newly allocated pages and heap objects with
+ zeroes.
+ Format: 0 | 1
+ Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
+
+ init_on_free= [MM,EARLY] Fill freed pages and heap objects with zeroes.
+ Format: 0 | 1
+ Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
- init_pkru= [x86] Specify the default memory protection keys rights
+ init_pkru= [X86] Specify the default memory protection keys rights
register contents for all processes. 0x55555554 by
default (disallow access to all but pkey 0). Can
override in debugfs after boot.
@@ -1649,7 +2354,7 @@
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
Format: <irq>
- int_pln_enable [x86] Enable power limit notification interrupt
+ int_pln_enable [X86] Enable power limit notification interrupt
integrity_audit=[IMA]
Format: { "0" | "1" }
@@ -1667,27 +2372,18 @@
bypassed by not enabling DMAR with this option. In
this case, gfx device will use physical address for
DMA.
- forcedac [x86_64]
- With this option iommu will not optimize to look
- for io virtual address below 32-bit forcing dual
- address cycle on pci bus for cards supporting greater
- than 32-bit addressing. The default is to look
- for translation below 32-bit and if not available
- then look in the higher range.
strict [Default Off]
- With this option on every unmap_single operation will
- result in a hardware IOTLB flush operation as opposed
- to batching them for performance.
+ Deprecated, equivalent to iommu.strict=1.
sp_off [Default Off]
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
not be supported.
- ecs_off [Default Off]
- By default, extended context tables will be supported if
- the hardware advertises that it has support both for the
- extended tables themselves, and also PASID support. With
- this option set, extended tables will not be used even
- on hardware which claims to support them.
+ sm_on
+ Enable the Intel IOMMU scalable mode if the hardware
+ advertises that it has support for the scalable mode
+ translation.
+ sm_off
+ Disallow use of the Intel IOMMU scalable mode.
tboot_noforce [Default Off]
Do not force the Intel IOMMU enabled under tboot.
By default, tboot will force Intel IOMMU on, which
@@ -1702,10 +2398,20 @@
0 disables intel_idle and fall back on acpi_idle.
1 to 9 specify maximum depth of C-state.
- intel_pstate= [X86]
+ intel_pstate= [X86,EARLY]
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ active
+ Use intel_pstate driver to bypass the scaling
+ governors layer of cpufreq and provides it own
+ algorithms for p-state selection. There are two
+ P-state selection algorithms provided by
+ intel_pstate in the active mode: powersave and
+ performance. The way they both operate depends
+ on whether or not the hardware managed P-states
+ (HWP) feature has been enabled in the processor
+ and possibly on the processor model.
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
@@ -1735,35 +2441,101 @@
per_cpu_perf_limits
Allow per-logical-CPU P-State performance control limits using
cpufreq sysfs interface
+ no_cas
+ Do not enable capacity-aware scheduling (CAS) on
+ hybrid systems
- intremap= [X86-64, Intel-IOMMU]
+ intremap= [X86-64,Intel-IOMMU,EARLY]
on enable Interrupt Remapping (default)
off disable Interrupt Remapping
nosid disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
nopost disable Interrupt Posting
+ posted_msi
+ enable MSIs delivered as posted interrupts
iomem= Disable strict checking of access to MMIO memory
strict regions from userspace.
relaxed
- iommu= [x86]
+ iommu= [X86,EARLY]
+
off
+ Don't initialize and use any kind of IOMMU.
+
force
+ Force the use of the hardware IOMMU even when
+ it is not actually needed (e.g. because < 3 GB
+ memory).
+
noforce
+ Don't force hardware IOMMU usage when it is not
+ needed. (default).
+
biomerge
panic
nopanic
merge
nomerge
+
soft
- pt [x86]
- nopt [x86]
- nobypass [PPC/POWERNV]
+ Use software bounce buffering (SWIOTLB) (default for
+ Intel machines). This can be used to prevent the usage
+ of an available hardware IOMMU.
+
+ [X86]
+ pt
+ [X86]
+ nopt
+ [PPC/POWERNV]
+ nobypass
Disable IOMMU bypass, using IOMMU for PCI devices.
- iommu.strict= [ARM64] Configure TLB invalidation behaviour
+ [X86]
+ AMD Gart HW IOMMU-specific options:
+
+ <size>
+ Set the size of the remapping area in bytes.
+
+ allowed
+ Overwrite iommu off workarounds for specific chipsets
+
+ fullflush
+ Flush IOMMU on each allocation (default).
+
+ nofullflush
+ Don't use IOMMU fullflush.
+
+ memaper[=<order>]
+ Allocate an own aperture over RAM with size
+ 32MB<<order. (default: order=1, i.e. 64MB)
+
+ merge
+ Do scatter-gather (SG) merging. Implies "force"
+ (experimental).
+
+ nomerge
+ Don't do scatter-gather (SG) merging.
+
+ noaperture
+ Ask the IOMMU not to touch the aperture for AGP.
+
+ noagp
+ Don't initialize the AGP driver and use full aperture.
+
+ panic
+ Always panic when IOMMU overflows.
+
+ iommu.forcedac= [ARM64,X86,EARLY] Control IOVA allocation for PCI devices.
+ Format: { "0" | "1" }
+ 0 - Try to allocate a 32-bit DMA address first, before
+ falling back to the full range if needed.
+ 1 - Allocate directly from the full usable range,
+ forcing Dual Address Cycle for PCI cards supporting
+ greater than 32-bit addressing.
+
+ iommu.strict= [ARM64,X86,S390,EARLY] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -1771,22 +2543,25 @@
throughput at the cost of reduced device isolation.
Will fall back to strict mode if not supported by
the relevant IOMMU driver.
- 1 - Strict mode (default).
+ 1 - Strict mode.
DMA unmap operations invalidate IOMMU hardware TLBs
synchronously.
+ unset - Use value of CONFIG_IOMMU_DEFAULT_DMA_{LAZY,STRICT}.
+ Note: on x86, strict mode specified via one of the
+ legacy driver-specific options takes precedence.
iommu.passthrough=
- [ARM64] Configure DMA to bypass the IOMMU by default.
+ [ARM64,X86,EARLY] Configure DMA to bypass the IOMMU by default.
Format: { "0" | "1" }
0 - Use IOMMU translation for DMA.
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
- io7= [HW] IO7 for Marvel based alpha systems
+ io7= [HW] IO7 for Marvel-based Alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
- io_delay= [X86] I/O delay method
+ io_delay= [X86,EARLY] I/O delay method
0x80
Standard port 0x80 based delay
0xed
@@ -1797,31 +2572,63 @@
No delay
ip= [IP_PNP]
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ ipcmni_extend [KNL,EARLY] Extend the maximum number of unique System V
+ IPC identifiers from 32,768 to 16,777,216.
+
+ ipe.enforce= [IPE]
+ Format: <bool>
+ Determine whether IPE starts in permissive (0) or
+ enforce (1) mode. The default is enforce.
+
+ ipe.success_audit=
+ [IPE]
+ Format: <bool>
+ Start IPE with success auditing enabled, emitting
+ an audit event when a binary is allowed. The default
+ is 0.
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
irqchip.gicv2_force_probe=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Force the kernel to look for the second 4kB page
of a GICv2 controller even if the memory range
exposed by the device tree is too small.
irqchip.gicv3_nolpi=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Force the kernel to ignore the availability of
LPIs (and by consequence ITSs). Intended for system
that use the kernel as a bootloader, and thus want
to let secondary kernels in charge of setting up
LPIs.
+ irqchip.gicv3_pseudo_nmi= [ARM64,EARLY]
+ Enables support for pseudo-NMIs in the kernel. This
+ requires the kernel to be built with
+ CONFIG_ARM64_PSEUDO_NMI.
+
+ irqchip.riscv_imsic_noipi
+ [RISC-V,EARLY]
+ Force the kernel to not use IMSIC software injected MSIs
+ as IPIs. Intended for system where IMSIC is trap-n-emulated,
+ and thus want to reduce MMIO traps when triggering IPIs
+ to multiple harts.
+
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
firmware running.
+ irqhandler.duration_warn_us= [KNL]
+ Warn if an IRQ handler exceeds the specified duration
+ threshold in microseconds. Useful for identifying
+ long-running IRQs in the system.
+
irqpoll [HW]
When an interrupt is not handled search all handlers
for it. Also check all handlers each timer
@@ -1839,7 +2646,9 @@
specified in the flag list (default: domain):
nohz
- Disable the tick when a single task runs.
+ Disable the tick when a single task runs as well as
+ disabling other kernel noises like having RCU callbacks
+ offloaded. This is equivalent to the nohz_full parameter.
A residual 1Hz tick is offloaded to workqueues, which you
need to affine to housekeeping through the global
@@ -1866,50 +2675,106 @@
<cpu number> begins at 0 and the maximum value is
"number of CPUs in system - 1".
- The format of <cpu-list> is described above.
-
+ managed_irq
+
+ Isolate from being targeted by managed interrupts
+ which have an interrupt mask containing isolated
+ CPUs. The affinity of managed interrupts is
+ handled by the kernel and cannot be changed via
+ the /proc/irq/* interfaces.
+
+ This isolation is best effort and only effective
+ if the automatically assigned interrupt mask of a
+ device queue contains isolated and housekeeping
+ CPUs. If housekeeping CPUs are online then such
+ interrupts are directed to the housekeeping CPU
+ so that IO submitted on the housekeeping CPU
+ cannot disturb the isolated CPU.
+
+ If a queue's affinity mask contains only isolated
+ CPUs then this parameter has no effect on the
+ interrupt routing decision, though interrupts are
+ only delivered when tasks running on those
+ isolated CPUs submit IO. IO submitted on
+ housekeeping CPUs has no influence on those
+ queues.
+ The format of <cpu-list> is described above.
iucv= [HW,NET]
- ivrs_ioapic [HW,X86_64]
+ ivrs_ioapic [HW,X86-64]
Provide an override to the IOAPIC-ID<->DEVICE-ID
- mapping provided in the IVRS ACPI table. For
- example, to map IOAPIC-ID decimal 10 to
- PCI device 00:14.0 write the parameter as:
+ mapping provided in the IVRS ACPI table.
+ By default, PCI segment is 0, and can be omitted.
+
+ For example, to map IOAPIC-ID decimal 10 to
+ PCI segment 0x1 and PCI device 00:14.0,
+ write the parameter as:
+ ivrs_ioapic=10@0001:00:14.0
+
+ Deprecated formats:
+ * To map IOAPIC-ID decimal 10 to PCI device 00:14.0
+ write the parameter as:
ivrs_ioapic[10]=00:14.0
+ * To map IOAPIC-ID decimal 10 to PCI segment 0x1 and
+ PCI device 00:14.0 write the parameter as:
+ ivrs_ioapic[10]=0001:00:14.0
- ivrs_hpet [HW,X86_64]
+ ivrs_hpet [HW,X86-64]
Provide an override to the HPET-ID<->DEVICE-ID
- mapping provided in the IVRS ACPI table. For
- example, to map HPET-ID decimal 0 to
- PCI device 00:14.0 write the parameter as:
+ mapping provided in the IVRS ACPI table.
+ By default, PCI segment is 0, and can be omitted.
+
+ For example, to map HPET-ID decimal 10 to
+ PCI segment 0x1 and PCI device 00:14.0,
+ write the parameter as:
+ ivrs_hpet=10@0001:00:14.0
+
+ Deprecated formats:
+ * To map HPET-ID decimal 0 to PCI device 00:14.0
+ write the parameter as:
ivrs_hpet[0]=00:14.0
+ * To map HPET-ID decimal 10 to PCI segment 0x1 and
+ PCI device 00:14.0 write the parameter as:
+ ivrs_ioapic[10]=0001:00:14.0
- ivrs_acpihid [HW,X86_64]
+ ivrs_acpihid [HW,X86-64]
Provide an override to the ACPI-HID:UID<->DEVICE-ID
- mapping provided in the IVRS ACPI table. For
- example, to map UART-HID:UID AMD0020:0 to
- PCI device 00:14.5 write the parameter as:
+ mapping provided in the IVRS ACPI table.
+ By default, PCI segment is 0, and can be omitted.
+
+ For example, to map UART-HID:UID AMD0020:0 to
+ PCI segment 0x1 and PCI device ID 00:14.5,
+ write the parameter as:
+ ivrs_acpihid=AMD0020:0@0001:00:14.5
+
+ Deprecated formats:
+ * To map UART-HID:UID AMD0020:0 to PCI segment is 0,
+ PCI device ID 00:14.5, write the parameter as:
ivrs_acpihid[00:14.5]=AMD0020:0
+ * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and
+ PCI device ID 00:14.5, write the parameter as:
+ ivrs_acpihid[0001:00:14.5]=AMD0020:0
js= [HW,JOY] Analog joystick
See Documentation/input/joydev/joystick.rst.
- nokaslr [KNL]
- When CONFIG_RANDOMIZE_BASE is set, this disables
- kernel and module base offset ASLR (Address Space
- Layout Randomization).
-
kasan_multi_shot
[KNL] Enforce KASAN (Kernel Address Sanitizer) to print
report on every invalid memory access. Without this
parameter KASAN will print report only for the first
invalid access.
- keepinitrd [HW,ARM]
+ keep_bootcon [KNL,EARLY]
+ Do not unregister boot console at start. This is only
+ useful for debugging when something happens in the window
+ between unregistering the boot console and initializing
+ the real console.
- kernelcore= [KNL,X86,IA-64,PPC]
+ keepinitrd [HW,ARM] See retain_initrd.
+
+ kernelcore= [KNL,X86,PPC,EARLY]
Format: nn[KMGTPE] | nn% | "mirror"
This parameter specifies the amount of memory usable by
the kernel for non-movable allocations. The requested
@@ -1934,7 +2799,7 @@
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
are exclusive, so you cannot specify multiple forms.
- kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
+ kgdbdbgp= [KGDB,HW,EARLY] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug
port as it is probed via PCI. The poll interval is
@@ -1955,72 +2820,281 @@
kms, kbd format: kms,kbd
kms, kbd and serial format: kms,kbd,<ser_dev>[,baud]
- kgdbwait [KGDB] Stop kernel execution and enter the
+ kgdboc_earlycon= [KGDB,HW,EARLY]
+ If the boot console provides the ability to read
+ characters and can work in polling mode, you can use
+ this parameter to tell kgdb to use it as a backend
+ until the normal console is registered. Intended to
+ be used together with the kgdboc parameter which
+ specifies the normal console to transition to.
+
+ The name of the early console should be specified
+ as the value of this parameter. Note that the name of
+ the early console might be different than the tty
+ name passed to kgdboc. It's OK to leave the value
+ blank and the first boot console that implements
+ read() will be picked.
+
+ kgdbwait [KGDB,EARLY] Stop kernel execution and enter the
kernel debugger at the earliest opportunity.
- kmac= [MIPS] korina ethernet MAC address.
+ kho= [KEXEC,EARLY]
+ Format: { "0" | "1" | "off" | "on" | "y" | "n" }
+ Enables or disables Kexec HandOver.
+ "0" | "off" | "n" - kexec handover is disabled
+ "1" | "on" | "y" - kexec handover is enabled
+
+ kho_scratch= [KEXEC,EARLY]
+ Format: ll[KMG],mm[KMG],nn[KMG] | nn%
+ Defines the size of the KHO scratch region. The KHO
+ scratch regions are physically contiguous memory
+ ranges that can only be used for non-kernel
+ allocations. That way, even when memory is heavily
+ fragmented with handed over memory, the kexeced
+ kernel will always have enough contiguous ranges to
+ bootstrap itself.
+
+ It is possible to specify the exact amount of
+ memory in the form of "ll[KMG],mm[KMG],nn[KMG]"
+ where the first parameter defines the size of a low
+ memory scratch area, the second parameter defines
+ the size of a global scratch area and the third
+ parameter defines the size of additional per-node
+ scratch areas. The form "nn%" defines scale factor
+ (in percents) of memory that was used during boot.
+
+ kmac= [MIPS] Korina ethernet MAC address.
Configure the RouterBoard 532 series on-chip
Ethernet adapter MAC address.
- kmemleak= [KNL] Boot-time kmemleak enable/disable
+ kmemleak= [KNL,EARLY] Boot-time kmemleak enable/disable
Valid arguments: on, off
Default: on
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
the default is off.
+ kprobe_event=[probe-list]
+ [FTRACE] Add kprobe events and enable at boot time.
+ The probe-list is a semicolon delimited list of probe
+ definitions. Each definition is same as kprobe_events
+ interface, but the parameters are comma delimited.
+ For example, to add a kprobe event on vfs_read with
+ arg1 and arg2, add to the command line;
+
+ kprobe_event=p,vfs_read,$arg1,$arg2
+
+ See also Documentation/trace/kprobetrace.rst "Kernel
+ Boot Parameter" section.
+
+ kpti= [ARM64,EARLY] Control page table isolation of
+ user and kernel address spaces.
+ Default: enabled on cores which need mitigation.
+ 0: force disabled
+ 1: force enabled
+
+ kunit.enable= [KUNIT] Enable executing KUnit tests. Requires
+ CONFIG_KUNIT to be set to be fully enabled. The
+ default value can be overridden via
+ KUNIT_DEFAULT_ENABLED.
+ Default is 1 (enabled)
+
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP)
+ kvm.eager_page_split=
+ [KVM,X86] Controls whether or not KVM will try to
+ proactively split all huge pages during dirty logging.
+ Eager page splitting reduces interruptions to vCPU
+ execution by eliminating the write-protection faults
+ and MMU lock contention that would otherwise be
+ required to split huge pages lazily.
+
+ VM workloads that rarely perform writes or that write
+ only to a small region of VM memory may benefit from
+ disabling eager page splitting to allow huge pages to
+ still be used for reads.
+
+ The behavior of eager page splitting depends on whether
+ KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If
+ disabled, all huge pages in a memslot will be eagerly
+ split when dirty logging is enabled on that memslot. If
+ enabled, eager page splitting will be performed during
+ the KVM_CLEAR_DIRTY ioctl, and only for the pages being
+ cleared.
+
+ Eager page splitting is only supported when kvm.tdp_mmu=Y.
+
+ Default is Y (on).
+
+ kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
+ If enabled, KVM will enable virtualization in hardware
+ when KVM is loaded, and disable virtualization when KVM
+ is unloaded (if KVM is built as a module).
+
+ If disabled, KVM will dynamically enable and disable
+ virtualization on-demand when creating and destroying
+ VMs, i.e. on the 0=>1 and 1=>0 transitions of the
+ number of VMs.
+
+ Enabling virtualization at module load avoids potential
+ latency for creation of the 0=>1 VM, as KVM serializes
+ virtualization enabling across all online CPUs. The
+ "cost" of enabling virtualization when KVM is loaded,
+ is that doing so may interfere with using out-of-tree
+ hypervisors that want to "own" virtualization hardware.
+
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
Default is false (don't support).
- kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
- KVM MMU at runtime.
- Default is 0 (off)
-
- kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
- Default is 1 (enabled)
-
- kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
- for all guests.
- Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
+ kvm.nx_huge_pages=
+ [KVM] Controls the software workaround for the
+ X86_BUG_ITLB_MULTIHIT bug.
+ force : Always deploy workaround.
+ off : Never deploy workaround.
+ auto : Deploy workaround based on the presence of
+ X86_BUG_ITLB_MULTIHIT.
+
+ Default is 'auto'.
+
+ If the software workaround is enabled for the host,
+ guests do need not to enable it for nested guests.
+
+ kvm.nx_huge_pages_recovery_ratio=
+ [KVM] Controls how many 4KiB pages are periodically zapped
+ back to huge pages. 0 disables the recovery, otherwise if
+ the value is N KVM will zap 1/Nth of the 4KiB pages every
+ period (see below). The default is 60.
+
+ kvm.nx_huge_pages_recovery_period_ms=
+ [KVM] Controls the time period at which KVM zaps 4KiB pages
+ back to huge pages. If the value is a non-zero N, KVM will
+ zap a portion (see ratio above) of the pages every N msecs.
+ If the value is 0 (the default), KVM will pick a period based
+ on the ratio, such that a page is zapped after 1 hour on average.
+
+ kvm-amd.nested= [KVM,AMD] Control nested virtualization feature in
+ KVM/SVM. Default is 1 (enabled).
+
+ kvm-amd.npt= [KVM,AMD] Control KVM's use of Nested Page Tables,
+ a.k.a. Two-Dimensional Page Tables. Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for NPT.
+
+ kvm-amd.ciphertext_hiding_asids=
+ [KVM,AMD] Ciphertext hiding prevents disallowed accesses
+ to SNP private memory from reading ciphertext. Instead,
+ reads will see constant default values (0xff).
+
+ If ciphertext hiding is enabled, the joint SEV-ES and
+ SEV-SNP ASID space is partitioned into separate SEV-ES
+ and SEV-SNP ASID ranges, with the SEV-SNP range being
+ [1..max_snp_asid] and the SEV-ES range being
+ (max_snp_asid..min_sev_asid), where min_sev_asid is
+ enumerated by CPUID.0x.8000_001F[EDX].
+
+ A non-zero value enables SEV-SNP ciphertext hiding and
+ adjusts the ASID ranges for SEV-ES and SEV-SNP guests.
+ KVM caps the number of SEV-SNP ASIDs at the maximum
+ possible value, e.g. specifying -1u will assign all
+ joint SEV-ES and SEV-SNP ASIDs to SEV-SNP. Note,
+ assigning all joint ASIDs to SEV-SNP, i.e. configuring
+ max_snp_asid == min_sev_asid-1, will effectively make
+ SEV-ES unusable.
+
+ kvm-arm.mode=
+ [KVM,ARM,EARLY] Select one of KVM/arm64's modes of
+ operation.
+
+ none: Forcefully disable KVM.
+
+ nvhe: Standard nVHE-based mode, without support for
+ protected guests.
+
+ protected: Mode with support for guests whose state is
+ kept private from the host, using VHE or
+ nVHE depending on HW support.
+
+ nested: VHE-based mode with support for nested
+ virtualization. Requires at least ARMv8.4
+ hardware (with FEAT_NV2).
+
+ Defaults to VHE/nVHE based on hardware support. Setting
+ mode to "protected" will disable kexec and hibernation
+ for the host. To force nVHE on VHE hardware, add
+ "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
+ command-line.
+ "nested" is experimental and should be used with
+ extreme caution.
kvm-arm.vgic_v3_group0_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-0
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
system registers
kvm-arm.vgic_v3_group1_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-1
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-1
system registers
kvm-arm.vgic_v3_common_trap=
- [KVM,ARM] Trap guest accesses to GICv3 common
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 common
system registers
kvm-arm.vgic_v4_enable=
- [KVM,ARM] Allow use of GICv4 for direct injection of
- LPIs.
+ [KVM,ARM,EARLY] Allow use of GICv4 for direct
+ injection of LPIs.
- kvm-intel.ept= [KVM,Intel] Disable extended page tables
- (virtualized MMU) support on capable Intel chips.
- Default is 1 (enabled)
+ kvm-arm.wfe_trap_policy=
+ [KVM,ARM] Control when to set WFE instruction trap for
+ KVM VMs. Traps are allowed but not guaranteed by the
+ CPU architecture.
+
+ trap: set WFE instruction trap
+
+ notrap: clear WFE instruction trap
+
+ kvm-arm.wfi_trap_policy=
+ [KVM,ARM] Control when to set WFI instruction trap for
+ KVM VMs. Traps are allowed but not guaranteed by the
+ CPU architecture.
+
+ trap: set WFI instruction trap
+
+ notrap: clear WFI instruction trap
+
+ kvm_cma_resv_ratio=n [PPC,EARLY]
+ Reserves given percentage from system memory area for
+ contiguous memory allocation for KVM hash pagetable
+ allocation.
+ By default it reserves 5% of total system memory.
+ Format: <integer>
+ Default: 5
+
+ kvm-intel.ept= [KVM,Intel] Control KVM's use of Extended Page Tables,
+ a.k.a. Two-Dimensional Page Tables. Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for EPT.
kvm-intel.emulate_invalid_guest_state=
- [KVM,Intel] Enable emulation of invalid guest states
- Default is 0 (disabled)
+ [KVM,Intel] Control whether to emulate invalid guest
+ state. Ignored if kvm-intel.enable_unrestricted_guest=1,
+ as guest state is never invalid for unrestricted
+ guests. This param doesn't apply to nested guests (L2),
+ as KVM never emulates invalid L2 guest state.
+ Default is 1 (enabled).
kvm-intel.flexpriority=
- [KVM,Intel] Disable FlexPriority feature (TPR shadow).
- Default is 1 (enabled)
+ [KVM,Intel] Control KVM's use of FlexPriority feature
+ (TPR shadow). Default is 1 (enabled). Disable by KVM if
+ hardware lacks support for it.
kvm-intel.nested=
- [KVM,Intel] Enable VMX nesting (nVMX).
- Default is 0 (disabled)
+ [KVM,Intel] Control nested virtualization feature in
+ KVM/VMX. Default is 1 (enabled).
kvm-intel.unrestricted_guest=
- [KVM,Intel] Disable unrestricted guest feature
- (virtualized real and unpaged mode) on capable
- Intel chips. Default is 1 (enabled)
+ [KVM,Intel] Control KVM's use of unrestricted guest
+ feature (virtualized real and unpaged mode). Default
+ is 1 (enabled). Disable by KVM if EPT is disabled or
+ hardware lacks support for it.
kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
CVE-2018-3620.
@@ -2034,11 +3108,29 @@
Default is cond (do L1 cache flush in specific instances)
- kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
- feature (tagged TLBs) on capable Intel chips.
- Default is 1 (enabled)
+ kvm-intel.vpid= [KVM,Intel] Control KVM's use of Virtual Processor
+ Identification feature (tagged TLBs). Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for it.
+
+ l1d_flush= [X86,INTEL,EARLY]
+ Control mitigation for L1D based snooping vulnerability.
+
+ Certain CPUs are vulnerable to an exploit against CPU
+ internal buffers which can forward information to a
+ disclosure gadget under certain conditions.
+
+ In vulnerable processors, the speculatively
+ forwarded data can be used in a cache side channel
+ attack, to access data to which the attacker does
+ not have direct access.
- l1tf= [X86] Control mitigation of the L1TF vulnerability on
+ This parameter controls the mitigation. The
+ options are:
+
+ on - enable the interface for the mitigation
+
+ l1tf= [X86,EARLY] Control mitigation of the L1TF vulnerability on
affected CPUs
The kernel PTE inversion protection is unconditionally
@@ -2095,23 +3187,27 @@
off
Disables hypervisor mitigations and doesn't
emit any warnings.
+ It also drops the swap size and available
+ RAM limit restriction on both hypervisor and
+ bare metal.
Default is 'flush'.
- For details see: Documentation/admin-guide/l1tf.rst
+ For details see: Documentation/admin-guide/hw-vuln/l1tf.rst
l2cr= [PPC]
l3cr= [PPC]
- lapic [X86-32,APIC] Enable the local APIC even if BIOS
+ lapic [X86-32,APIC,EARLY] Enable the local APIC even if BIOS
disabled it.
- lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline
+ lapic= [X86,APIC] Do not use TSC deadline
value for LAPIC timer one-shot implementation. Default
back to the programmable timer unit in the LAPIC.
+ Format: notscdeadline
- lapic_timer_c2_ok [X86,APIC] trust the local apic timer
+ lapic_timer_c2_ok [X86,APIC,EARLY] trust the local apic timer
in C2 power state.
libata.dma= [LIBATA] DMA control
@@ -2130,14 +3226,14 @@
when set.
Format: <int>
- libata.force= [LIBATA] Force configurations. The format is comma
- separated list of "[ID:]VAL" where ID is
- PORT[.DEVICE]. PORT and DEVICE are decimal numbers
- matching port, link or device. Basically, it matches
- the ATA ID string printed on console by libata. If
- the whole ID part is omitted, the last PORT and DEVICE
- values are used. If ID hasn't been specified yet, the
- configuration applies to all ports, links and devices.
+ libata.force= [LIBATA] Force configurations. The format is a comma-
+ separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+ PORT and DEVICE are decimal numbers matching port, link
+ or device. Basically, it matches the ATA ID string
+ printed on console by libata. If the whole ID part is
+ omitted, the last PORT and DEVICE values are used. If
+ ID hasn't been specified yet, the configuration applies
+ to all ports, links and devices.
If only DEVICE is omitted, the parameter applies to
the port and all links and devices behind it. DEVICE
@@ -2147,7 +3243,7 @@
host link and device attached to it.
The VAL specifies the configuration to force. As long
- as there's no ambiguity shortcut notation is allowed.
+ as there is no ambiguity, shortcut notation is allowed.
For example, both 1.5 and 1.5G would work for 1.5Gbps.
The following configurations can be forced.
@@ -2160,29 +3256,70 @@
udma[/][16,25,33,44,66,100,133] notation is also
allowed.
+ * nohrst, nosrst, norst: suppress hard, soft and both
+ resets.
+
+ * rstonce: only attempt one reset during hot-unplug
+ link recovery.
+
+ * [no]dbdelay: Enable or disable the extra 200ms delay
+ before debouncing a link PHY and device presence
+ detection.
+
* [no]ncq: Turn on or off NCQ.
- * [no]ncqtrim: Turn off queued DSM TRIM.
+ * [no]ncqtrim: Enable or disable queued DSM TRIM.
+
+ * [no]ncqati: Enable or disable NCQ trim on ATI chipset.
+
+ * [no]trim: Enable or disable (unqueued) TRIM.
- * nohrst, nosrst, norst: suppress hard, soft
- and both resets.
+ * trim_zero: Indicate that TRIM command zeroes data.
- * rstonce: only attempt one reset during
- hot-unplug link recovery
+ * max_trim_128m: Set 128M maximum trim size limit.
- * dump_id: dump IDENTIFY data.
+ * [no]dma: Turn on or off DMA transfers.
- * atapi_dmadir: Enable ATAPI DMADIR bridge support
+ * atapi_dmadir: Enable ATAPI DMADIR bridge support.
+
+ * atapi_mod16_dma: Enable the use of ATAPI DMA for
+ commands that are not a multiple of 16 bytes.
+
+ * [no]dmalog: Enable or disable the use of the
+ READ LOG DMA EXT command to access logs.
+
+ * [no]iddevlog: Enable or disable access to the
+ identify device data log.
+
+ * [no]logdir: Enable or disable access to the general
+ purpose log directory.
+
+ * max_sec_128: Set transfer size limit to 128 sectors.
+
+ * max_sec_1024: Set or clear transfer size limit to
+ 1024 sectors.
+
+ * max_sec_lba48: Set or clear transfer size limit to
+ 65535 sectors.
+
+ * external: Mark port as external (hotplug-capable).
+
+ * [no]lpm: Enable or disable link power management.
+
+ * [no]setxfer: Indicate if transfer speed mode setting
+ should be skipped.
+
+ * [no]fua: Disable or enable FUA (Force Unit Access)
+ support for devices supporting this feature.
+
+ * dump_id: Dump IDENTIFY data.
* disable: Disable this device.
If there are multiple matching configurations changing
the same attribute, the last one is used.
- memblock=debug [KNL] Enable memblock debug messages.
-
- load_ramdisk= [RAM] List of ramdisks to load from floppy
- See Documentation/blockdev/ramdisk.txt.
+ load_ramdisk= [RAM] [Deprecated]
lockd.nlm_grace_period=P [NFS] Assign grace period.
Format: <integer>
@@ -2196,6 +3333,47 @@
lockd.nlm_udpport=M [NFS] Assign UDP port.
Format: <integer>
+ lockdown= [SECURITY,EARLY]
+ { integrity | confidentiality }
+ Enable the kernel lockdown feature. If set to
+ integrity, kernel features that allow userland to
+ modify the running kernel are disabled. If set to
+ confidentiality, kernel features that allow userland
+ to extract confidential information from the kernel
+ are also disabled.
+
+ locktorture.acq_writer_lim= [KNL]
+ Set the time limit in jiffies for a lock
+ acquisition. Acquisitions exceeding this limit
+ will result in a splat once they do complete.
+
+ locktorture.bind_readers= [KNL]
+ Specify the list of CPUs to which the readers are
+ to be bound.
+
+ locktorture.bind_writers= [KNL]
+ Specify the list of CPUs to which the writers are
+ to be bound.
+
+ locktorture.call_rcu_chains= [KNL]
+ Specify the number of self-propagating call_rcu()
+ chains to set up. These are used to ensure that
+ there is a high probability of an RCU grace period
+ in progress at any given time. Defaults to 0,
+ which disables these call_rcu() chains.
+
+ locktorture.long_hold= [KNL]
+ Specify the duration in milliseconds for the
+ occasional long-duration lock hold time. Defaults
+ to 100 milliseconds. Select 0 to disable.
+
+ locktorture.nested_locks= [KNL]
+ Specify the maximum lock nesting depth that
+ locktorture is to exercise, up to a limit of 8
+ (MAX_NESTED_LOCKS). Specify zero to disable.
+ Note that this parameter is ineffective on types
+ of locks that do not support nested acquisition.
+
locktorture.nreaders_stress= [KNL]
Set the number of locking read-acquisition kthreads.
Defaults to being automatically set based on the
@@ -2211,6 +3389,25 @@
Set time (s) between CPU-hotplug operations, or
zero to disable CPU-hotplug testing.
+ locktorture.rt_boost= [KNL]
+ Do periodic testing of real-time lock priority
+ boosting. Select 0 to disable, 1 to boost
+ only rt_mutex, and 2 to boost unconditionally.
+ Defaults to 2, which might seem to be an
+ odd choice, but which should be harmless for
+ non-real-time spinlocks, due to their disabling
+ of preemption. Note that non-realtime mutexes
+ disable boosting.
+
+ locktorture.rt_boost_factor= [KNL]
+ Number that determines how often and for how
+ long priority boosting is exercised. This is
+ scaled down by the number of writers, so that the
+ number of boosts per unit time remains roughly
+ constant as the number of writers increases.
+ On the other hand, the duration of each boost
+ increases with the number of writers.
+
locktorture.shuffle_interval= [KNL]
Set task-shuffle interval (jiffies). Shuffling
tasks allows some CPUs to go into dyntick-idle
@@ -2236,10 +3433,15 @@
locktorture.verbose= [KNL]
Enable additional printk() statements.
+ locktorture.writer_fifo= [KNL]
+ Run the write-side locktorture kthreads at
+ sched_set_fifo() real-time priority.
+
logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
Format: <irq>
- loglevel= All Kernel Messages with a loglevel smaller than the
+ loglevel= [KNL,EARLY]
+ All Kernel Messages with a loglevel smaller than the
console loglevel will be printed to the console. It can
also be changed with klogd or other programs. The
loglevels are defined as follows:
@@ -2253,13 +3455,15 @@
6 (KERN_INFO) informational
7 (KERN_DEBUG) debug-level messages
- log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two and greater
- than the minimal size. The minimal size is defined
- by LOG_BUF_SHIFT kernel config parameter. There is
- also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
- that allows to increase the default size depending on
- the number of CPUs. See init/Kconfig for more details.
+ log_buf_len=n[KMG] [KNL,EARLY]
+ Sets the size of the printk ring buffer, in bytes.
+ n must be a power of two and greater than the
+ minimal size. The minimal size is defined by
+ LOG_BUF_SHIFT kernel config parameter. There
+ is also CONFIG_LOG_CPU_MAX_BUF_SHIFT config
+ parameter that allows to increase the default size
+ depending on the number of CPUs. See init/Kconfig
+ for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
@@ -2297,23 +3501,17 @@
unlikely, in the extreme case this might damage your
hardware.
- ltpc= [NET]
- Format: <io>,<irq>,<dma>
-
lsm.debug [SECURITY] Enable LSM initialization debugging output.
- machvec= [IA-64] Force the use of a particular machine-vector
- (machvec) in a generic kernel.
- Example: machvec=hpzx1_swiotlb
+ lsm=lsm1,...,lsmN
+ [SECURITY] Choose order of LSM initialization. This
+ overrides CONFIG_LSM, and the "security=" parameter.
- machtype= [Loongson] Share the same kernel image file between different
- yeeloong laptop.
+ machtype= [Loongson] Share the same kernel image file between
+ different yeeloong laptops.
Example: machtype=lemote-yeeloong-2f-7inch
- max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
- than or equal to this physical address is ignored.
-
- maxcpus= [SMP] Maximum number of processors that an SMP kernel
+ maxcpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
will bring up during bootup. maxcpus=n : n >= 0 limits
the kernel to bring up 'n' processors. Surely after
bootup you can bring up the other plugged cpu by executing
@@ -2329,9 +3527,77 @@
devices can be requested on-demand with the
/dev/loop-control interface.
- mce [X86-32] Machine Check Exception
+ mce= [X86-{32,64}]
+
+ Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
+
+ off
+ disable machine check
+
+ no_cmci
+ disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your
+ hardware is misbehaving.
+
+ Note that you'll get more problems without CMCI than
+ with due to the shared banks, i.e. you might get
+ duplicated error logs.
+
+ dont_log_ce
+ don't make logs for corrected errors. All events
+ reported as corrected are silently cleared by OS. This
+ option will be useful if you have no interest in any
+ of corrected errors.
+
+ ignore_ce
+ disable features for corrected errors, e.g.
+ polling timer and CMCI. All events reported as
+ corrected are not cleared by OS and remained in its
+ error banks.
+
+ Usually this disablement is not recommended, however
+ if there is an agent checking/clearing corrected
+ errors (e.g. BIOS or hardware monitoring
+ applications), conflicting with OS's error handling,
+ and you cannot deactivate the agent, then this option
+ will be a help.
+
+ no_lmce
+ do not opt-in to Local MCE delivery. Use legacy method
+ to broadcast MCEs.
+
+ bootlog
+ enable logging of machine checks left over from
+ booting. Disabled by default on AMD Fam10h and older
+ because some BIOS leave bogus ones.
+
+ If your BIOS doesn't do that it's a good idea to
+ enable though to make sure you log even machine check
+ events that result in a reboot. On Intel systems it is
+ enabled by default.
+
+ nobootlog
+ disable boot machine check logging.
+
+ monarchtimeout (number)
+ sets the time in us to wait for other CPUs on machine
+ checks. 0 to disable.
+
+ bios_cmci_threshold
+ don't overwrite the bios-set CMCI threshold. This boot
+ option prevents Linux from overwriting the CMCI
+ threshold set by the bios. Without this option, Linux
+ always sets the CMCI threshold to 1. Enabling this may
+ make memory predictive failure analysis less effective
+ if the bios sets thresholds for memory errors since we
+ will not see details for all errors.
+
+ recovery
+ force-enable recoverable machine check code paths
+
+ Everything else is in sysfs now.
- mce=option [X86-64] See Documentation/x86/x86_64/boot-options.txt
md= [HW] RAID subsystems devices and level
See Documentation/admin-guide/md.rst.
@@ -2340,37 +3606,99 @@
Format: <first>,<last>
Specifies range of consoles to be captured by the MDA.
- mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
- Amount of memory to be used when the kernel is not able
- to see the whole system memory or for test.
+ mds= [X86,INTEL,EARLY]
+ Control mitigation for the Micro-architectural Data
+ Sampling (MDS) vulnerability.
+
+ Certain CPUs are vulnerable to an exploit against CPU
+ internal buffers which can forward information to a
+ disclosure gadget under certain conditions.
+
+ In vulnerable processors, the speculatively
+ forwarded data can be used in a cache side channel
+ attack, to access data to which the attacker does
+ not have direct access.
+
+ This parameter controls the MDS mitigation. The
+ options are:
+
+ full - Enable MDS mitigation on vulnerable CPUs
+ full,nosmt - Enable MDS mitigation and disable
+ SMT on vulnerable CPUs
+ off - Unconditionally disable MDS mitigation
+
+ On TAA-affected machines, mds=off can be prevented by
+ an active TAA mitigation as both vulnerabilities are
+ mitigated with the same mechanism so in order to disable
+ this mitigation, you need to specify tsx_async_abort=off
+ too.
+
+ Not specifying this option is equivalent to
+ mds=full.
+
+ For details see: Documentation/admin-guide/hw-vuln/mds.rst
+
+ mem=nn[KMG] [HEXAGON,EARLY] Set the memory size.
+ Must be specified, otherwise memory size will be 0.
+
+ mem=nn[KMG] [KNL,BOOT,EARLY] Force usage of a specific amount
+ of memory Amount of memory to be used in cases
+ as follows:
+
+ 1 for test;
+ 2 when the kernel is not able to see the whole system memory;
+ 3 memory that lies after 'mem=' boundary is excluded from
+ the hypervisor, then assigned to KVM guests.
+ 4 to limit the memory available for kdump kernel.
+
+ [ARC,MICROBLAZE] - the limit applies only to low memory,
+ high memory is not affected.
+
+ [ARM64] - only limits memory covered by the linear
+ mapping. The NOMAP regions are not affected.
+
[X86] Work as limiting max address. Use together
with memmap= to avoid physical address space collisions.
Without memmap= PCI devices could be placed at addresses
belonging to unused RAM.
+ Note that this only takes effects during boot time since
+ in above case 3, memory may need be hot added after boot
+ if system memory of hypervisor is not sufficient.
+
+ mem=nn[KMG]@ss[KMG]
+ [ARM,MIPS,EARLY] - override the memory layout
+ reported by firmware.
+ Define a memory region of size nn[KMG] starting at
+ ss[KMG].
+ Multiple different regions can be specified with
+ multiple mem= parameters on the command line.
+
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
memory.
+ memblock=debug [KNL,EARLY] Enable memblock debug messages.
+
memchunk=nn[KMG]
[KNL,SH] Allow user to override the default size for
per-device physically contiguous DMA buffers.
- memhp_default_state=online/offline
+ memhp_default_state=online/offline/online_kernel/online_movable
[KNL] Set the initial state for the memory hotplug
onlining policy. If not specified, the default value is
set according to the
- CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
- option.
- See Documentation/memory-hotplug.txt.
+ CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel config
+ options.
+ See Documentation/admin-guide/mm/memory-hotplug.rst.
- memmap=exactmap [KNL,X86] Enable setting of an exact
+ memmap=exactmap [KNL,X86,EARLY] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
BIOS output or other requirements. See the memmap=nn@ss
option description.
memmap=nn[KMG]@ss[KMG]
- [KNL] Force usage of a specific region of memory.
+ [KNL, X86,MIPS,XTENSA,EARLY] Force usage of a specific region of memory.
Region of memory to be used is from ss to ss+nn.
If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
which limits max address to nn[KMG].
@@ -2380,11 +3708,11 @@
memmap=100M@2G,100M#3G,1G!1024G
memmap=nn[KMG]#ss[KMG]
- [KNL,ACPI] Mark specific memory as ACPI data.
+ [KNL,ACPI,EARLY] Mark specific memory as ACPI data.
Region of memory to be marked is from ss to ss+nn.
memmap=nn[KMG]$ss[KMG]
- [KNL,ACPI] Mark specific memory as reserved.
+ [KNL,ACPI,EARLY] Mark specific memory as reserved.
Region of memory to be reserved is from ss to ss+nn.
Example: Exclude memory from 0x18690000-0x1869ffff
memmap=64K$0x18690000
@@ -2394,14 +3722,14 @@
like Grub2, otherwise '$' and the following number
will be eaten.
- memmap=nn[KMG]!ss[KMG]
+ memmap=nn[KMG]!ss[KMG,EARLY]
[KNL,X86] Mark specific memory as protected.
Region of memory to be used, from ss to ss+nn.
The memory region may be marked as e820 type 12 (0xc)
and is NVDIMM or ADR memory.
memmap=<size>%<offset>-<oldtype>+<newtype>
- [KNL,ACPI] Convert memory within the specified region
+ [KNL,ACPI,EARLY] Convert memory within the specified region
from <oldtype> to <newtype>. If "-<oldtype>" is left
out, the whole region will be marked as <newtype>,
even if previously unavailable. If "+<newtype>" is left
@@ -2409,30 +3737,49 @@
specified as e820 types, e.g., 1 = RAM, 2 = reserved,
3 = ACPI, 12 = PRAM.
- memory_corruption_check=0/1 [X86]
+ memory_corruption_check=0/1 [X86,EARLY]
Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume.
Setting this option will scan the memory
looking for corruption. Enabling this will
both detect corruption and prevent the kernel
from using the memory being corrupted.
- However, its intended as a diagnostic tool; if
+ However, it's intended as a diagnostic tool; if
repeatable BIOS-originated corruption always
affects the same memory, you can use memmap=
to prevent the kernel from using that memory.
- memory_corruption_check_size=size [X86]
+ memory_corruption_check_size=size [X86,EARLY]
By default it checks for corruption in the low
64k, making this memory unavailable for normal
use. Use this parameter to scan for
corruption in more or less memory.
- memory_corruption_check_period=seconds [X86]
+ memory_corruption_check_period=seconds [X86,EARLY]
By default it checks for corruption every 60
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
- memtest= [KNL,X86,ARM,PPC] Enable memtest
+ memory_hotplug.memmap_on_memory
+ [KNL,X86,ARM] Boolean flag to enable this feature.
+ Format: {on | off (default)}
+ When enabled, runtime hotplugged memory will
+ allocate its internal metadata (struct pages,
+ those vmemmap pages cannot be optimized even
+ if hugetlb_free_vmemmap is enabled) from the
+ hotadded memory which will allow to hotadd a
+ lot of memory without requiring additional
+ memory to do so.
+ This feature is disabled by default because it
+ has some implication on large (e.g. GB)
+ allocations in some configurations (e.g. small
+ memory blocks).
+ The state of the flag can be read in
+ /sys/module/memory_hotplug/parameters/memmap_on_memory.
+ Note that even when enabled, there are a few cases where
+ the feature is not effective.
+
+ memtest= [KNL,X86,ARM,M68K,PPC,RISCV,EARLY] Enable memtest
Format: <integer>
default : 0 <disable>
Specifies the number of memtest passes to be
@@ -2444,13 +3791,11 @@
mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control
Valid arguments: on, off
- Default (depends on kernel configuration option):
- on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
- off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+ Default: off
mem_encrypt=on: Activate SME
mem_encrypt=off: Do not activate SME
- Refer to Documentation/x86/amd-memory-encryption.txt
+ Refer to Documentation/virt/kvm/x86/amd-memory-encryption.rst
for details on when memory encryption can be activated.
mem_sleep_default= [SUSPEND] Default system suspend mode:
@@ -2459,13 +3804,6 @@
deep - Suspend-To-RAM or equivalent (if supported)
See Documentation/admin-guide/pm/sleep-states.rst.
- meye.*= [HW] Set MotionEye Camera parameters
- See Documentation/media/v4l-drivers/meye.rst.
-
- mfgpt_irq= [IA-32] Specify the IRQ to use for the
- Multi-Function General Purpose Timers on AMD Geode
- platforms.
-
mfgptfix [X86-32] Fix MFGPT timers on AMD Geode platforms when
the BIOS has incorrectly applied a workaround. TinyBIOS
version 0.98 is known to be affected, 0.99 fixes the
@@ -2473,8 +3811,18 @@
mga= [HW,DRM]
- min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this
- physical address is ignored.
+ microcode= [X86] Control the behavior of the microcode loader.
+ Available options, comma separated:
+
+ base_rev=X - with <X> with format: <u32>
+ Set the base microcode revision of each thread when in
+ debug mode.
+
+ dis_ucode_ldr: disable the microcode loader
+
+ force_minrev:
+ Enable or disable the microcode minimal revision
+ enforcement for the runtime microcode loader.
mini2440= [ARM,HW,KNL]
Format:[0..2][b][c][t]
@@ -2495,16 +3843,137 @@
touchscreen support is not enabled in the mainstream
kernel as of 2.6.30, a preliminary port can be found
in the "bleeding edge" mini2440 support kernel at
- http://repo.or.cz/w/linux-2.6/mini2440.git
+ https://repo.or.cz/w/linux-2.6/mini2440.git
+
+ mitigations=
+ [X86,PPC,S390,ARM64,EARLY] Control optional mitigations for
+ CPU vulnerabilities. This is a set of curated,
+ arch-independent options, each of which is an
+ aggregation of existing arch-specific options.
+
+ Note, "mitigations" is supported if and only if the
+ kernel was built with CPU_MITIGATIONS=y.
+
+ off
+ Disable all optional CPU mitigations. This
+ improves system performance, but it may also
+ expose users to several CPU vulnerabilities.
+ Equivalent to: if nokaslr then kpti=0 [ARM64]
+ gather_data_sampling=off [X86]
+ indirect_target_selection=off [X86]
+ kvm.nx_huge_pages=off [X86]
+ l1tf=off [X86]
+ mds=off [X86]
+ mmio_stale_data=off [X86]
+ no_entry_flush [PPC]
+ no_uaccess_flush [PPC]
+ nobp=0 [S390]
+ nopti [X86,PPC]
+ nospectre_bhb [ARM64]
+ nospectre_v1 [X86,PPC]
+ nospectre_v2 [X86,PPC,S390,ARM64]
+ reg_file_data_sampling=off [X86]
+ retbleed=off [X86]
+ spec_rstack_overflow=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
+ spectre_v2_user=off [X86]
+ srbds=off [X86,INTEL]
+ ssbd=force-off [ARM64]
+ tsx_async_abort=off [X86]
+ vmscape=off [X86]
+
+ Exceptions:
+ This does not have any effect on
+ kvm.nx_huge_pages when
+ kvm.nx_huge_pages=force.
+
+ auto (default)
+ Mitigate all CPU vulnerabilities, but leave SMT
+ enabled, even if it's vulnerable. This is for
+ users who don't want to be surprised by SMT
+ getting disabled across kernel upgrades, or who
+ have other ways of avoiding SMT-based attacks.
+ Equivalent to: (default behavior)
+
+ auto,nosmt
+ Mitigate all CPU vulnerabilities, disabling SMT
+ if needed. This is for users who always want to
+ be fully mitigated, even if it means losing SMT.
+ Equivalent to: l1tf=flush,nosmt [X86]
+ mds=full,nosmt [X86]
+ tsx_async_abort=full,nosmt [X86]
+ mmio_stale_data=full,nosmt [X86]
+ retbleed=auto,nosmt [X86]
+
+ [X86] After one of the above options, additionally
+ supports attack-vector based controls as documented in
+ Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
mminit_loglevel=
- [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_MEMORY_INIT is set, this
parameter allows control of the logging verbosity for
the additional memory initialisation checks. A value
of 0 disables mminit logging and a level of 4 will
log everything. Information is printed at KERN_DEBUG
so loglevel=8 may also need to be specified.
+ mmio_stale_data=
+ [X86,INTEL,EARLY] Control mitigation for the Processor
+ MMIO Stale Data vulnerabilities.
+
+ Processor MMIO Stale Data is a class of
+ vulnerabilities that may expose data after an MMIO
+ operation. Exposed data could originate or end in
+ the same CPU buffers as affected by MDS and TAA.
+ Therefore, similar to MDS and TAA, the mitigation
+ is to clear the affected CPU buffers.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ full - Enable mitigation on vulnerable CPUs
+
+ full,nosmt - Enable mitigation and disable SMT on
+ vulnerable CPUs.
+
+ off - Unconditionally disable mitigation
+
+ On MDS or TAA affected machines,
+ mmio_stale_data=off can be prevented by an active
+ MDS or TAA mitigation as these vulnerabilities are
+ mitigated with the same mechanism so in order to
+ disable this mitigation, you need to specify
+ mds=off and tsx_async_abort=off too.
+
+ Not specifying this option is equivalent to
+ mmio_stale_data=full.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+
+ <module>.async_probe[=<bool>] [KNL]
+ If no <bool> value is specified or if the value
+ specified is not a valid <bool>, enable asynchronous
+ probe on this module. Otherwise, enable/disable
+ asynchronous probe on this module as indicated by the
+ <bool> value. See also: module.async_probe
+
+ module.async_probe=<bool>
+ [KNL] When set to true, modules will use async probing
+ by default. To enable/disable async probing for a
+ specific module, use the module specific control that
+ is documented under <module>.async_probe. When both
+ module.async_probe and <module>.async_probe are
+ specified, <module>.async_probe takes precedence for
+ the specific module.
+
+ module.enable_dups_trace
+ [KNL] When CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS is set,
+ this means that duplicate request_module() calls will
+ trigger a WARN_ON() instead of a pr_warn(). Note that
+ if MODULE_DEBUG_AUTOLOAD_DUPS_TRACE is set, WARN_ON()s
+ will always be issued and this option does nothing.
module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load.
@@ -2525,7 +3994,7 @@
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
reporting absolute coordinates, such as tablets
- movablecore= [KNL,X86,IA-64,PPC]
+ movablecore= [KNL,X86,PPC,EARLY]
Format: nn[KMGTPE] | nn%
This parameter is the complement to kernelcore=, it
specifies the amount of memory used for migratable
@@ -2536,7 +4005,7 @@
that the amount of memory usable for all allocations
is not too small.
- movable_node [KNL] Boot-time switch to make hotplugable memory
+ movable_node [KNL,EARLY] Boot-time switch to make hotplugable memory
NUMA nodes to be movable. This means that the memory
of such nodes will be usable only for movable
allocations which rules out almost all kernel
@@ -2549,48 +4018,37 @@
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
mtdparts= [MTD]
- See drivers/mtd/cmdlinepart.c.
-
- multitce=off [PPC] This parameter disables the use of the pSeries
- firmware feature for updating multiple TCE entries
- at a time.
-
- onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration
-
- Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
-
- boundary - index of last SLC block on Flex-OneNAND.
- The remaining blocks are configured as MLC blocks.
- lock - Configure if Flex-OneNAND boundary should be locked.
- Once locked, the boundary cannot be changed.
- 1 indicates lock status, 0 indicates unlock status.
-
- mtdset= [ARM]
- ARM/S3C2412 JIVE boot control
-
- See arch/arm/mach-s3c2412/mach-jive.c
+ See drivers/mtd/parsers/cmdlinepart.c
mtouchusb.raw_coordinates=
[HW] Make the MicroTouch USB driver use raw coordinates
('y', default) or cooked coordinates ('n')
- mtrr_chunk_size=nn[KMG] [X86]
+ mtrr=debug [X86,EARLY]
+ Enable printing debug information related to MTRR
+ registers at boot time.
+
+ mtrr_chunk_size=nn[KMG,X86,EARLY]
used for mtrr cleanup. It is largest continuous chunk
that could hold holes aka. UC entries.
- mtrr_gran_size=nn[KMG] [X86]
+ mtrr_gran_size=nn[KMG,X86,EARLY]
Used for mtrr cleanup. It is granularity of mtrr block.
Default is 1.
Large value could prevent small alignment from
using up MTRRs.
- mtrr_spare_reg_nr=n [X86]
+ mtrr_spare_reg_nr=n [X86,EARLY]
Format: <integer>
Range: 0,7 : spare reg number
Default : 1
Used for mtrr cleanup. It is spare mtrr entries number.
Set to 2 or more if your graphical card needs more.
+ multitce=off [PPC] This parameter disables the use of the pSeries
+ firmware feature for updating multiple TCE entries
+ at a time.
+
n2= [NET] SDL Inc. RISCom/N2 synchronous serial card
netdev= [NET] Network devices parameters
@@ -2600,20 +4058,24 @@
This usage is only documented in each driver source
file if at all.
+ netpoll.carrier_timeout=
+ [NET] Specifies amount of time (in seconds) that
+ netpoll should wait for a carrier. By default netpoll
+ waits 4 seconds.
+
nf_conntrack.acct=
[NETFILTER] Enable connection tracking flow accounting
0 to disable accounting
1 to enable accounting
Default value is 0.
- nfsaddrs= [NFS] Deprecated. Use ip= instead.
- See Documentation/filesystems/nfs/nfsroot.txt.
-
- nfsroot= [NFS] nfs root filesystem for disk-less boxes.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ nfs.cache_getent=
+ [NFS] sets the pathname to the program which is used
+ to update the NFS client cache entries.
- nfsrootdebug [NFS] enable nfsroot debugging messages.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ nfs.cache_getent_timeout=
+ [NFS] sets the timeout after which an attempt to
+ update a cache entry is deemed to have failed.
nfs.callback_nr_threads=
[NFSv4] set the total number of threads that the
@@ -2624,17 +4086,12 @@
[NFS] set the TCP port on which the NFSv4 callback
channel should listen.
- nfs.cache_getent=
- [NFS] sets the pathname to the program which is used
- to update the NFS client cache entries.
-
- nfs.cache_getent_timeout=
- [NFS] sets the timeout after which an attempt to
- update a cache entry is deemed to have failed.
-
- nfs.idmap_cache_timeout=
- [NFS] set the maximum lifetime for idmapper cache
- entries.
+ nfs.delay_retrans=
+ [NFS] specifies the number of times the NFSv4 client
+ retries the request before returning an EAGAIN error,
+ after a reply of NFS4ERR_DELAY from the server.
+ Only applies if the softerr mount option is enabled,
+ and the specified value is >= 0.
nfs.enable_ino64=
[NFS] enable 64-bit inode numbers.
@@ -2643,6 +4100,10 @@
of returning the full 64-bit number.
The default is to return 64-bit inode numbers.
+ nfs.idmap_cache_timeout=
+ [NFS] set the maximum lifetime for idmapper cache
+ entries.
+
nfs.max_session_cb_slots=
[NFSv4.1] Sets the maximum number of session
slots the client will assign to the callback
@@ -2670,21 +4131,14 @@
will be autodetected by the client, and it will fall
back to using the idmapper.
To turn off this behaviour, set the value to '0'.
+
nfs.nfs4_unique_id=
[NFS4] Specify an additional fixed unique ident-
ification string that NFSv4 clients can insert into
their nfs_client_id4 string. This is typically a
UUID that is generated at system install time.
- nfs.send_implementation_id =
- [NFSv4.1] Send client implementation identification
- information in exchange_id requests.
- If zero, no implementation identification information
- will be sent.
- The default is to send the implementation identification
- information.
-
- nfs.recover_lost_locks =
+ nfs.recover_lost_locks=
[NFSv4] Attempt to recover locks that were lost due
to a lease timeout on the server. Please note that
doing this risks data corruption, since there are
@@ -2696,7 +4150,15 @@
The default parameter value of '0' causes the kernel
not to attempt recovery of lost locks.
- nfs4.layoutstats_timer =
+ nfs.send_implementation_id=
+ [NFSv4.1] Send client implementation identification
+ information in exchange_id requests.
+ If zero, no implementation identification information
+ will be sent.
+ The default is to send the implementation identification
+ information.
+
+ nfs4.layoutstats_timer=
[NFSv4.2] Change the rate at which the kernel sends
layoutstats to the pNFS metadata server.
@@ -2705,6 +4167,11 @@
driver. A non-zero value sets the minimum interval
in seconds between layoutstats transmissions.
+ nfsd.inter_copy_offload_enable=
+ [NFSv4.2] When set to 1, the server will support
+ server-to-server copies for which this server is
+ the destination of the copy.
+
nfsd.nfs4_disable_idmapping=
[NFSv4] When set to the default of '1', the NFSv4
server will return only numeric uids and gids to
@@ -2712,18 +4179,42 @@
and gids from such clients. This is intended to ease
migration from NFSv2/v3.
+ nfsd.nfsd4_ssc_umount_timeout=
+ [NFSv4.2] When used as the destination of a
+ server-to-server copy, knfsd temporarily mounts
+ the source server. It caches the mount in case
+ it will be needed again, and discards it if not
+ used for the number of milliseconds specified by
+ this parameter.
+
+ nfsaddrs= [NFS] Deprecated. Use ip= instead.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ nfsroot= [NFS] nfs root filesystem for disk-less boxes.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ nfsrootdebug [NFS] enable nfsroot debugging messages.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ nmi_backtrace.backtrace_idle [KNL]
+ Dump stacks even of idle CPUs in response to an
+ NMI stack-backtrace request.
+
nmi_debug= [KNL,SH] Specify one or more actions to take
when a NMI is triggered.
Format: [state][,regs][,debounce][,die]
nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
- Format: [panic,][nopanic,][num]
+ Format: [panic,][nopanic,][rNNN,][num]
Valid num: 0 or 1
0 - turn hardlockup detector in nmi_watchdog off
1 - turn hardlockup detector in nmi_watchdog on
+ rNNN - configure the watchdog with raw perf event 0xNNN
+
When panic is specified, panic when an NMI watchdog
- timeout occurs (or 'nopanic' to override the opposite
- default). To disable both hard and soft lockup detectors,
+ timeout occurs (or 'nopanic' to not panic on an NMI
+ watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set)
+ To disable both hard and soft lockup detectors,
please see 'nowatchdog'.
This is useful when you use a panic=... timeout and
need the box quickly up again.
@@ -2731,18 +4222,27 @@
These settings can be accessed at runtime via
the nmi_watchdog and hardlockup_panic sysctls.
- netpoll.carrier_timeout=
- [NET] Specifies amount of time (in seconds) that
- netpoll should wait for a carrier. By default netpoll
- waits 4 seconds.
-
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
emulation library even if a 387 maths coprocessor
is present.
- no5lvl [X86-64] Disable 5-level paging mode. Forces
+ no4lvl [RISCV,EARLY] Disable 4-level and 5-level paging modes.
+ Forces kernel to use 3-level paging instead.
+
+ no5lvl [X86-64,RISCV,EARLY] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
+ noalign [KNL,ARM]
+
+ noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
+ IOAPICs that may be present in the system.
+
+ noapictimer [APIC,X86] Don't set up the APIC timer
+
+ noautogroup Disable scheduler automatic task group creation.
+
+ nocache [ARM,EARLY]
+
no_console_suspend
[HW] Never suspend the console
Disable suspending of consoles during suspend and
@@ -2758,47 +4258,14 @@
/sys/module/printk/parameters/console_suspend) to
turn on/off it dynamically.
- noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
- caches in the slab allocator. Saves per-node memory,
- but will impact performance.
-
- noalign [KNL,ARM]
-
- noaltinstr [S390] Disables alternative instructions patching
- (CPU alternatives feature).
-
- noapic [SMP,APIC] Tells the kernel to not make use of any
- IOAPICs that may be present in the system.
-
- noautogroup Disable scheduler automatic task group creation.
-
- nobats [PPC] Do not use BATs for mapping kernel lowmem
- on "Classic" PPC cores.
-
- nocache [ARM]
-
- noclflush [BUGS=X86] Don't use the CLFLUSH instruction
-
- nodelayacct [KNL] Disable per-task delay accounting
+ no_debug_objects
+ [KNL,EARLY] Disable object debugging
nodsp [SH] Disable hardware DSP at boot time.
- noefi Disable EFI runtime services support.
-
- noexec [IA-64]
-
- noexec [X86]
- On X86-32 available only on PAE configured kernels.
- noexec=on: enable non-executable mappings (default)
- noexec=off: disable non-executable mappings
-
- nosmap [X86]
- Disable SMAP (Supervisor Mode Access Prevention)
- even if it is supported by processor.
+ noefi [EFI,EARLY] Disable EFI runtime services support.
- nosmep [X86]
- Disable SMEP (Supervisor Mode Execution Prevention)
- even if it is supported by processor.
+ no_entry_flush [PPC,EARLY] Don't flush the L1-D cache when entering the kernel.
noexec32 [X86-64]
This affects only 32-bit executables.
@@ -2807,68 +4274,40 @@
noexec32=off: disable non-executable mappings
read implies executable mappings
+ no_file_caps Tells the kernel not to honor file capabilities. The
+ only way then for a file to be executed with privilege
+ is to be setuid root or executed by root.
+
nofpu [MIPS,SH] Disable hardware FPU at boot time.
+ nofsgsbase [X86] Disables FSGSBASE instructions.
+
nofxsr [BUGS=X86-32] Disables x86 floating point extended
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
- nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+ nogbpages [X86] Do not use GB pages for kernel direct mappings.
- nosmt [KNL,S390] Disable symmetric multithreading (SMT).
- Equivalent to smt=1.
+ no_hash_pointers
+ [KNL,EARLY]
+ Alias for "hash_pointers=never".
- [KNL,x86] Disable symmetric multithreading (SMT).
- nosmt=force: Force disable SMT, cannot be undone
- via the sysfs control file.
-
- nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
- check bypass). With this option data leaks are possible
- in the system.
-
- nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
- (indirect branch prediction) vulnerability. System may
- allow data leaks with this option, which is equivalent
- to spectre_v2=off.
-
- nospec_store_bypass_disable
- [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
-
- noxsave [BUGS=X86] Disables x86 extended register state save
- and restore using xsave. The kernel will fallback to
- enabling legacy floating-point and sse state.
-
- noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
- register states. The kernel will fall back to use
- xsave to save the states. By using this parameter,
- performance of saving the states is degraded because
- xsave doesn't support modified optimization while
- xsaveopt supports it on xsaveopt enabled systems.
-
- noxsaves [X86] Disables xsaves and xrstors used in saving and
- restoring x86 extended register state in compacted
- form of xsave area. The kernel will fall back to use
- xsaveopt and xrstor to save and restore the states
- in standard form of xsave area. By using this
- parameter, xsave area per process might occupy more
- memory on xsaves enabled systems.
+ nohibernate [HIBERNATION] Disable hibernation and resume.
- nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
- wfi(ARM) instruction doesn't work correctly and not to
- use it. This is also useful when using JTAG debugger.
+ nohlt [ARM,ARM64,MICROBLAZE,MIPS,PPC,RISCV,SH] Forces the kernel to
+ busy wait in do_idle() and not use the arch_cpu_idle()
+ implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
+ to be effective. This is useful on platforms where the
+ sleep(SH) or wfi(ARM,ARM64) instructions do not work
+ correctly or when doing power measurements to evaluate
+ the impact of the sleep instructions. This is also
+ useful when using JTAG debugger.
- no_file_caps Tells the kernel not to honor file capabilities. The
- only way then for a file to be executed with privilege
- is to be setuid root or executed by root.
+ nohpet [X86] Don't use the HPET timer.
- nohalt [IA-64] Tells the kernel not to use the power saving
- function PAL_HALT_LIGHT when idle. This increases
- power-consumption. On the positive side, it reduces
- interrupt wake-up latency, which may improve performance
- in certain environments such as networked servers or
- real-time systems.
+ nohugeiomap [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge I/O mappings.
- nohibernate [HIBERNATION] Disable hibernation and resume.
+ nohugevmalloc [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge vmalloc mappings.
nohz= [KNL] Boottime enable/disable dynamic ticks
Valid arguments: on, off
@@ -2884,66 +4323,80 @@
just as if they had also been called out in the
rcu_nocbs= boot parameter.
- noiotrap [SH] Disables trapped I/O port accesses.
-
- noirqdebug [X86-32] Disables the code which attempts to detect and
- disable unhandled interrupt sources.
-
- no_timer_check [X86,APIC] Disables the code which tests for
- broken timer IRQ sources.
-
- noisapnp [ISAPNP] Disables ISA PnP code.
+ Note that this argument takes precedence over
+ the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
noinitrd [RAM] Tells the kernel not to load any configured
initial RAM disk.
- nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
+ nointremap [X86-64,Intel-IOMMU,EARLY] Do not enable interrupt
remapping.
[Deprecated - use intremap=off]
- nointroute [IA-64]
-
- noinvpcid [X86] Disable the INVPCID cpu feature.
-
- nojitter [IA-64] Disables jitter checking for ITC timers.
+ noinvpcid [X86,EARLY] Disable the INVPCID cpu feature.
- no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+ noiotrap [SH] Disables trapped I/O port accesses.
- no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
- fault handling.
+ noirqdebug [X86-32] Disables the code which attempts to detect and
+ disable unhandled interrupt sources.
- no-vmw-sched-clock
- [X86,PV_OPS] Disable paravirtualized VMware scheduler
- clock and use the default one.
+ noisapnp [ISAPNP] Disables ISA PnP code.
- no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
- steal time is computed, but won't influence scheduler
- behaviour
+ nokaslr [KNL,EARLY]
+ When CONFIG_RANDOMIZE_BASE is set, this disables
+ kernel and module base offset ASLR (Address Space
+ Layout Randomization).
- nolapic [X86-32,APIC] Do not enable or use the local APIC.
+ no-kvmapf [X86,KVM,EARLY] Disable paravirtualized asynchronous page
+ fault handling.
- nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
+ no-kvmclock [X86,KVM,EARLY] Disable paravirtualized KVM clock driver
- noltlbs [PPC] Do not use large page/tlb entries for kernel
- lowmem mapping on PPC40x and PPC8xx
+ nolapic [X86-32,APIC,EARLY] Do not enable or use the local APIC.
- nomca [IA-64] Disable machine check abort handling
+ nolapic_timer [X86-32,APIC,EARLY] Do not use the local APIC timer.
nomce [X86-32] Disable Machine Check Exception
nomfgpt [X86-32] Disable Multi-Function General Purpose
Timer usage (for AMD Geode machines).
+ nomodeset Disable kernel modesetting. Most systems' firmware
+ sets up a display mode and provides framebuffer memory
+ for output. With nomodeset, DRM and fbdev drivers will
+ not load if they could possibly displace the pre-
+ initialized output. Only the system framebuffer will
+ be available for use. The respective drivers will not
+ perform display-mode changes or accelerated rendering.
+
+ Useful as error fallback, or for testing and debugging.
+
+ nomodule Disable module load
+
nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
- nomodule Disable module load
-
- nopat [X86] Disable PAT (page attribute table extension of
+ nopat [X86,EARLY] Disable PAT (page attribute table extension of
pagetables) support.
- nopcid [X86-64] Disable the PCID cpu feature.
+ nopcid [X86-64,EARLY] Disable the PCID cpu feature.
+
+ nopku [X86] Disable Memory Protection Keys CPU feature found
+ in some Intel CPUs.
+
+ nopti [X86-64,EARLY]
+ Equivalent to pti=off
+
+ nopv= [X86,XEN,KVM,HYPER_V,VMWARE,EARLY]
+ Disables the PV optimizations forcing the guest to run
+ as generic guest with no PV drivers. Currently support
+ XEN HVM, KVM, HYPER_V and VMWARE guest.
+
+ nopvspin [X86,XEN,KVM,EARLY]
+ Disables the qspinlock slow path using PV optimizations
+ which allow the hypervisor to 'idle' the guest on lock
+ contention.
norandmaps Don't use address space randomization. Equivalent to
echo 0 > /proc/sys/kernel/randomize_va_space
@@ -2951,11 +4404,6 @@
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
with UP alternatives
- nordrand [X86] Disable kernel use of the RDRAND and
- RDSEED instructions even if they are supported
- by the processor. RDRAND and RDSEED are still
- available to user space applications.
-
noresume [SWSUSP] Disables resume and restores original swap
space.
@@ -2963,52 +4411,105 @@
This is required for the Braillex ib80-piezo Braille
reader made by F.H. Papenmeier (Germany).
- nosbagart [IA-64]
+ nosgx [X86-64,SGX,EARLY] Disables Intel SGX kernel support.
- nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
+ nosmap [PPC,EARLY]
+ Disable SMAP (Supervisor Mode Access Prevention)
+ even if it is supported by processor.
+
+ nosmep [PPC64s,EARLY]
+ Disable SMEP (Supervisor Mode Execution Prevention)
+ even if it is supported by processor.
- nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
+ nosmp [SMP,EARLY] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
+ nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT).
+ Equivalent to smt=1.
+
+ [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT).
+ nosmt=force: Force disable SMT, cannot be undone
+ via the sysfs control file.
+
nosoftlockup [KNL] Disable the soft-lockup detector.
+ nospec_store_bypass_disable
+ [HW,EARLY] Disable all mitigations for the Speculative
+ Store Bypass vulnerability
+
+ nospectre_bhb [ARM64,EARLY] Disable all mitigations for Spectre-BHB (branch
+ history injection) vulnerability. System may allow data leaks
+ with this option.
+
+ nospectre_v1 [X86,PPC,EARLY] Disable mitigations for Spectre Variant 1
+ (bounds check bypass). With this option data leaks are
+ possible in the system.
+
+ nospectre_v2 [X86,PPC_E500,ARM64,EARLY] Disable all mitigations
+ for the Spectre variant 2 (indirect branch
+ prediction) vulnerability. System may allow data
+ leaks with this option.
+
+ no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY]
+ Disable paravirtualized steal time accounting. steal time
+ is computed, but won't influence scheduler behaviour
+
nosync [HW,M68K] Disables sync negotiation for all devices.
+ no_timer_check [X86,APIC] Disables the code which tests for broken
+ timer IRQ sources, i.e., the IO-APIC timer. This can
+ work around problems with incorrect timer
+ initialization on some boards.
+
+ no_uaccess_flush
+ [PPC,EARLY] Don't flush the L1-D cache after accessing user data.
+
+ novmcoredd [KNL,KDUMP]
+ Disable device dump. Device dump allows drivers to
+ append dump data to vmcore so you can collect driver
+ specified debug info. Drivers can append the data
+ without any limit and this data is stored in memory,
+ so this may cause significant memory stress. Disabling
+ device dump can help save memory but the driver debug
+ data will be no longer available. This parameter
+ is only available when CONFIG_PROC_VMCORE_DEVICE_DUMP
+ is set.
+
+ no-vmw-sched-clock
+ [X86,PV_OPS,EARLY] Disable paravirtualized VMware
+ scheduler clock and use the default one.
+
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
- nowb [ARM]
-
- nox2apic [X86-64,APIC] Do not enable x2APIC mode.
-
- cpu0_hotplug [X86] Turn on CPU0 hotplug feature when
- CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
- Some features depend on CPU0. Known dependencies are:
- 1. Resume from suspend/hibernate depends on CPU0.
- Suspend/hibernate will fail if CPU0 is offline and you
- need to online CPU0 before suspend/hibernate.
- 2. PIC interrupts also depend on CPU0. CPU0 can't be
- removed if a PIC interrupt is detected.
- It's said poweroff/reboot may depend on CPU0 on some
- machines although I haven't seen such issues so far
- after CPU0 is offline on a few tested machines.
- If the dependencies are under your control, you can
- turn on cpu0_hotplug.
-
- nps_mtm_hs_ctr= [KNL,ARC]
- This parameter sets the maximum duration, in
- cycles, each HW thread of the CTOP can run
- without interruptions, before HW switches it.
- The actual maximum duration is 16 times this
- parameter's value.
- Format: integer between 1 and 255
- Default: 255
-
- nptcg= [IA-64] Override max number of concurrent global TLB
- purges which is reported from either PAL_VM_SUMMARY or
- SAL PALO.
-
- nr_cpus= [SMP] Maximum number of processors that an SMP kernel
+ nowb [ARM,EARLY]
+
+ nox2apic [X86-64,APIC,EARLY] Do not enable x2APIC mode.
+
+ NOTE: this parameter will be ignored on systems with the
+ LEGACY_XAPIC_DISABLED bit set in the
+ IA32_XAPIC_DISABLE_STATUS MSR.
+
+ noxsave [BUGS=X86] Disables x86 extended register state save
+ and restore using xsave. The kernel will fallback to
+ enabling legacy floating-point and sse state.
+
+ noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
+ register states. The kernel will fall back to use
+ xsave to save the states. By using this parameter,
+ performance of saving the states is degraded because
+ xsave doesn't support modified optimization while
+ xsaveopt supports it on xsaveopt enabled systems.
+
+ noxsaves [X86] Disables xsaves and xrstors used in saving and
+ restoring x86 extended register state in compacted
+ form of xsave area. The kernel will fall back to use
+ xsaveopt and xrstor to save and restore the states
+ in standard form of xsave area. By using this
+ parameter, xsave area per process might occupy more
+ memory on xsaves enabled systems.
+
+ nr_cpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
could support. nr_cpus=n : n >= 1 limits the kernel to
support 'n' processors. It could be larger than the
number of already plugged CPU during bootup, later in
@@ -3019,16 +4520,41 @@
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
- numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing.
+ numa=off [KNL, ARM64, PPC, RISCV, SPARC, X86, EARLY]
+ Disable NUMA, Only set up a single NUMA node
+ spanning all memory.
+
+ numa=fake=<size>[MG]
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as a memory unit, fills all system RAM with
+ nodes of size interleaved over physical nodes.
+
+ numa=fake=<N>
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as an integer, fills all system RAM with N
+ fake nodes interleaved over physical nodes.
+
+ numa=fake=<N>U
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as an integer followed by 'U', it will
+ divide each physical node into N emulated nodes.
+
+ numa=noacpi [X86] Don't parse the SRAT table for NUMA setup
+
+ numa=nohmat [X86] Don't parse the HMAT table for NUMA setup, or
+ soft-reserved memory partitioning.
+
+ numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
+ NUMA balancing.
Allowed values are enable and disable
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
'node', 'default' can be specified
This can be set from sysctl after boot.
- See Documentation/sysctl/vm.txt for details.
+ See Documentation/admin-guide/sysctl/vm.rst for details.
- ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
- See Documentation/debugging-via-ohci1394.txt for more
+ ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
+ See Documentation/core-api/debugging-via-ohci1394.rst for more
info.
olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
@@ -3043,54 +4569,105 @@
For example, to override I2C bus2:
omap_mux=i2c2_scl.i2c2_scl=0x100,i2c2_sda.i2c2_sda=0x100
- oprofile.timer= [HW]
- Use timer interrupt instead of performance counters
-
- oprofile.cpu_type= Force an oprofile cpu type
- This might be useful if you have an older oprofile
- userland or if you want common events.
- Format: { arch_perfmon }
- arch_perfmon: [X86] Force use of architectural
- perfmon on Intel CPUs instead of the
- CPU specific event set.
- timer: [X86] Force use of architectural NMI
- timer mode (see also oprofile.timer
- for generic hr timer mode)
-
- oops=panic Always panic on oopses. Default is to just kill the
+ onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration
+
+ Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
+
+ boundary - index of last SLC block on Flex-OneNAND.
+ The remaining blocks are configured as MLC blocks.
+ lock - Configure if Flex-OneNAND boundary should be locked.
+ Once locked, the boundary cannot be changed.
+ 1 indicates lock status, 0 indicates unlock status.
+
+ oops=panic [KNL,EARLY]
+ Always panic on oopses. Default is to just kill the
process, but there is a small probability of
deadlocking the machine.
This will also cause panics on machine check exceptions.
Useful together with panic=30 to trigger a reboot.
- page_owner= [KNL] Boot-time page_owner enabling option.
+ page_alloc.shuffle=
+ [KNL] Boolean flag to control whether the page allocator
+ should randomize its free lists. This parameter can be
+ used to enable/disable page randomization. The state of
+ the flag can be read from sysfs at:
+ /sys/module/page_alloc/parameters/shuffle.
+ This parameter is only available if CONFIG_SHUFFLE_PAGE_ALLOCATOR=y.
+
+ page_owner= [KNL,EARLY] Boot-time page_owner enabling option.
Storage of the information about who allocated
each page is disabled in default. With this switch,
we can turn it on.
on: enable the feature
- page_poison= [KNL] Boot-time parameter changing the state of
+ page_poison= [KNL,EARLY] Boot-time parameter changing the state of
poisoning on the buddy allocator, available with
CONFIG_PAGE_POISONING=y.
off: turn off poisoning (default)
on: turn on poisoning
+ page_reporting.page_reporting_order=
+ [KNL] Minimal page reporting order
+ Format: <integer>
+ Adjust the minimal page reporting order. The page
+ reporting is disabled when it exceeds MAX_PAGE_ORDER.
+
panic= [KNL] Kernel behaviour on panic: delay <timeout>
timeout > 0: seconds before rebooting
timeout = 0: wait forever
timeout < 0: reboot immediately
Format: <timeout>
- panic_on_warn panic() instead of WARN(). Useful to cause kdump
+ panic_on_taint= [KNL,EARLY]
+ Bitmask for conditionally calling panic() in add_taint()
+ Format: <hex>[,nousertaint]
+ Hexadecimal bitmask representing the set of TAINT flags
+ that will cause the kernel to panic when add_taint() is
+ called with any of the flags in this set.
+ The optional switch "nousertaint" can be utilized to
+ prevent userspace forced crashes by writing to sysctl
+ /proc/sys/kernel/tainted any flagset matching with the
+ bitmask set on panic_on_taint.
+ See Documentation/admin-guide/tainted-kernels.rst for
+ extra details on the taint flags that users can pick
+ to compose the bitmask to assign to panic_on_taint.
+
+ panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump
on a WARN().
- crash_kexec_post_notifiers
- Run kdump after running panic-notifiers and dumping
- kmsg. This only for the users who doubt kdump always
- succeeds in any situation.
- Note that this also increases risks of kdump failure,
- because some panic notifiers can make the crashed
- kernel more unstable.
+ panic_print= Bitmask for printing system info when panic happens.
+ User can chose combination of the following bits:
+ bit 0: print all tasks info
+ bit 1: print system memory info
+ bit 2: print timer info
+ bit 3: print locks info if CONFIG_LOCKDEP is on
+ bit 4: print ftrace buffer
+ bit 5: replay all kernel messages on consoles at the end of panic
+ bit 6: print all CPUs backtrace (if available in the arch)
+ bit 7: print only tasks in uninterruptible (blocked) state
+ *Be aware* that this option may print a _lot_ of lines,
+ so there are risks of losing older messages in the log.
+ Use this option carefully, maybe worth to setup a
+ bigger log buffer with "log_buf_len" along with this.
+
+ panic_sys_info= A comma separated list of extra information to be dumped
+ on panic.
+ Format: val[,val...]
+ Where @val can be any of the following:
+
+ tasks: print all tasks info
+ mem: print system memory info
+ timers: print timers info
+ locks: print locks info if CONFIG_LOCKDEP is on
+ ftrace: print ftrace buffer
+ all_bt: print all CPUs backtrace (if available in the arch)
+ blocked_tasks: print only tasks in uninterruptible (blocked) state
+
+ This is a human readable alternative to the 'panic_print' option.
+
+ panic_console_replay
+ When panic happens, replay all kernel messages on
+ consoles at the end of panic.
parkbd.port= [HW] Parallel port number the keyboard adapter is
connected to, default is 0.
@@ -3121,18 +4698,104 @@
Currently this function knows 686a and 8231 chips.
Format: [spp|ps2|epp|ecp|ecpepp]
- pause_on_oops=
+ pata_legacy.all= [HW,LIBATA]
+ Format: <int>
+ Set to non-zero to probe primary and secondary ISA
+ port ranges on PCI systems where no PCI PATA device
+ has been found at either range. Disabled by default.
+
+ pata_legacy.autospeed= [HW,LIBATA]
+ Format: <int>
+ Set to non-zero if a chip is present that snoops speed
+ changes. Disabled by default.
+
+ pata_legacy.ht6560a= [HW,LIBATA]
+ Format: <int>
+ Set to 1, 2, or 3 for HT 6560A on the primary channel,
+ the secondary channel, or both channels respectively.
+ Disabled by default.
+
+ pata_legacy.ht6560b= [HW,LIBATA]
+ Format: <int>
+ Set to 1, 2, or 3 for HT 6560B on the primary channel,
+ the secondary channel, or both channels respectively.
+ Disabled by default.
+
+ pata_legacy.iordy_mask= [HW,LIBATA]
+ Format: <int>
+ IORDY enable mask. Set individual bits to allow IORDY
+ for the respective channel. Bit 0 is for the first
+ legacy channel handled by this driver, bit 1 is for
+ the second channel, and so on. The sequence will often
+ correspond to the primary legacy channel, the secondary
+ legacy channel, and so on, but the handling of a PCI
+ bus and the use of other driver options may interfere
+ with the sequence. By default IORDY is allowed across
+ all channels.
+
+ pata_legacy.opti82c46x= [HW,LIBATA]
+ Format: <int>
+ Set to 1, 2, or 3 for Opti 82c611A on the primary
+ channel, the secondary channel, or both channels
+ respectively. Disabled by default.
+
+ pata_legacy.opti82c611a= [HW,LIBATA]
+ Format: <int>
+ Set to 1, 2, or 3 for Opti 82c465MV on the primary
+ channel, the secondary channel, or both channels
+ respectively. Disabled by default.
+
+ pata_legacy.pio_mask= [HW,LIBATA]
+ Format: <int>
+ PIO mode mask for autospeed devices. Set individual
+ bits to allow the use of the respective PIO modes.
+ Bit 0 is for mode 0, bit 1 is for mode 1, and so on.
+ All modes allowed by default.
+
+ pata_legacy.probe_all= [HW,LIBATA]
+ Format: <int>
+ Set to non-zero to probe tertiary and further ISA
+ port ranges on PCI systems. Disabled by default.
+
+ pata_legacy.probe_mask= [HW,LIBATA]
+ Format: <int>
+ Probe mask for legacy ISA PATA ports. Depending on
+ platform configuration and the use of other driver
+ options up to 6 legacy ports are supported: 0x1f0,
+ 0x170, 0x1e8, 0x168, 0x1e0, 0x160, however probing
+ of individual ports can be disabled by setting the
+ corresponding bits in the mask to 1. Bit 0 is for
+ the first port in the list above (0x1f0), and so on.
+ By default all supported ports are probed.
+
+ pata_legacy.qdi= [HW,LIBATA]
+ Format: <int>
+ Set to non-zero to probe QDI controllers. By default
+ set to 1 if CONFIG_PATA_QDI_MODULE, 0 otherwise.
+
+ pata_legacy.winbond= [HW,LIBATA]
+ Format: <int>
+ Set to non-zero to probe Winbond controllers. Use
+ the standard I/O port (0x130) if 1, otherwise the
+ value given is the I/O port to use (typically 0x1b0).
+ By default set to 1 if CONFIG_PATA_WINBOND_VLB_MODULE,
+ 0 otherwise.
+
+ pata_platform.pio_mask= [HW,LIBATA]
+ Format: <int>
+ Supported PIO mode mask. Set individual bits to allow
+ the use of the respective PIO modes. Bit 0 is for
+ mode 0, bit 1 is for mode 1, and so on. Mode 0 only
+ allowed by default.
+
+ pause_on_oops=<int>
Halt all CPUs after the first oops has been printed for
the specified number of seconds. This is to be used if
your oopses keep scrolling off the screen.
pcbit= [HW,ISDN]
- pcd. [PARIDE]
- See header of drivers/block/paride/pcd.c.
- See also Documentation/blockdev/paride.txt.
-
- pci=option[,option...] [PCI] various PCI subsystem options.
+ pci=option[,option...] [PCI,EARLY] various PCI subsystem options.
Some options herein operate on a specific device
or a set of devices (<pci_dev>). These are
@@ -3246,6 +4909,15 @@
please report a bug.
nocrs [X86] Ignore PCI host bridge windows from ACPI.
If you need to use this, please report a bug.
+ use_e820 [X86] Use E820 reservations to exclude parts of
+ PCI host bridge windows. This is a workaround
+ for BIOS defects in host bridge _CRS methods.
+ If you need to use this, please report a bug to
+ <linux-pci@vger.kernel.org>.
+ no_e820 [X86] Ignore E820 reservations for PCI host
+ bridge windows. This is the default on modern
+ hardware. If you need to use this, please report
+ a bug to <linux-pci@vger.kernel.org>.
routeirq Do IRQ routing for all PCI devices.
This is normally done in pci_enable_device(),
so this option is a temporary workaround
@@ -3290,14 +4962,17 @@
specify the device is described above.
If <order of align> is not specified,
PAGE_SIZE is used as alignment.
- PCI-PCI bridge can be specified, if resource
+ A PCI-PCI bridge can be specified if resource
windows need to be expanded.
To specify the alignment for several
instances of a device, the PCI vendor,
device, subvendor, and subdevice may be
- specified, e.g., 4096@pci:8086:9c22:103c:198f
+ specified, e.g., 12@pci:8086:9c22:103c:198f
+ for 4096-byte alignment.
ecrc= Enable/disable PCIe ECRC (transaction layer
- end-to-end CRC checking).
+ end-to-end CRC checking). Only effective if
+ OS has native AER control (either granted by
+ ACPI _OSC or forced via "pcie_ports=native")
bios: Use BIOS/firmware settings. This is the
the default.
off: Turn ECRC off
@@ -3305,8 +4980,15 @@
hpiosize=nn[KMG] The fixed amount of bus space which is
reserved for hotplug bridge's IO window.
Default size is 256 bytes.
+ hpmmiosize=nn[KMG] The fixed amount of bus space which is
+ reserved for hotplug bridge's MMIO window.
+ Default size is 2 megabytes.
+ hpmmioprefsize=nn[KMG] The fixed amount of bus space which is
+ reserved for hotplug bridge's MMIO_PREF window.
+ Default size is 2 megabytes.
hpmemsize=nn[KMG] The fixed amount of bus space which is
- reserved for hotplug bridge's memory window.
+ reserved for hotplug bridge's MMIO and
+ MMIO_PREF window.
Default size is 2 megabytes.
hpbussize=nn The minimum amount of additional bus numbers
reserved for buses below a hotplug bridge.
@@ -3339,10 +5021,51 @@
bridges without forcing it upstream. Note:
this removes isolation between devices and
may put more devices in an IOMMU group.
-
- pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
+ config_acs=
+ Format:
+ <ACS flags>@<pci_dev>[; ...]
+ Specify one or more PCI devices (in the format
+ specified above) optionally prepended with flags
+ and separated by semicolons. The respective
+ capabilities will be enabled, disabled or
+ unchanged based on what is specified in
+ flags.
+
+ ACS Flags is defined as follows:
+ bit-0 : ACS Source Validation
+ bit-1 : ACS Translation Blocking
+ bit-2 : ACS P2P Request Redirect
+ bit-3 : ACS P2P Completion Redirect
+ bit-4 : ACS Upstream Forwarding
+ bit-5 : ACS P2P Egress Control
+ bit-6 : ACS Direct Translated P2P
+ Each bit can be marked as:
+ '0' – force disabled
+ '1' – force enabled
+ 'x' – unchanged
+ For example,
+ pci=config_acs=10x@pci:0:0
+ would configure all devices that support
+ ACS to enable P2P Request Redirect, disable
+ Translation Blocking, and leave Source
+ Validation unchanged from whatever power-up
+ or firmware set it to.
+
+ Note: this may remove isolation between devices
+ and may put more devices in an IOMMU group.
+ force_floating [S390] Force usage of floating interrupts.
+ nomio [S390] Do not use MIO instructions.
+ norid [S390] ignore the RID field and force use of
+ one PCI domain per PCI function
+ notph [PCIE] If the PCIE_TPH kernel config parameter
+ is enabled, this kernel boot option can be used
+ to disable PCIe TLP Processing Hints support
+ system-wide.
+
+ pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power
Management.
- off Disable ASPM.
+ off Don't touch ASPM configuration at all. Leave any
+ configuration done by firmware unchanged.
force Enable ASPM even on devices that claim not to support it.
WARNING: Forcing ASPM on may cause system lockups.
@@ -3351,6 +5074,8 @@
even if the platform doesn't give the OS permission to
use them. This may cause conflicts if the platform
also tries to use these services.
+ dpc-native Use native PCIe service for DPC only. May
+ cause conflicts if firmware uses AER or DPC.
compat Disable native PCIe services (PME, AER, DPC, PCIe
hotplug).
@@ -3371,29 +5096,21 @@
for debug and development, but should not be
needed on a platform with proper driver support.
- pd. [PARIDE]
- See Documentation/blockdev/paride.txt.
-
pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at
boot time.
Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c
- percpu_alloc= Select which percpu first chunk allocator to use.
+ percpu_alloc= [MM,EARLY]
+ Select which percpu first chunk allocator to use.
Currently supported values are "embed" and "page".
Archs may support subset or none of the selections.
See comments in mm/percpu.c for details on each
allocator. This parameter is primarily for debugging
and performance comparison.
- pf. [PARIDE]
- See Documentation/blockdev/paride.txt.
-
- pg. [PARIDE]
- See Documentation/blockdev/paride.txt.
-
pirq= [SMP,APIC] Manual mp-table setup
- See Documentation/x86/i386/IO-APIC.txt.
+ See Documentation/arch/x86/i386/IO-APIC.rst.
plip= [PPT,NET] Parallel port network link
Format: { parport<nr> | timid | 0 }
@@ -3403,6 +5120,29 @@
Override pmtimer IOPort with a hex value.
e.g. pmtmr=0x508
+ pmu_override= [PPC] Override the PMU.
+ This option takes over the PMU facility, so it is no
+ longer usable by perf. Setting this option starts the
+ PMU counters by setting MMCR0 to 0 (the FC bit is
+ cleared). If a number is given, then MMCR1 is set to
+ that number, otherwise (e.g., 'pmu_override=on'), MMCR1
+ remains 0.
+
+ pm_async= [PM]
+ Format: off
+ This parameter sets the initial value of the
+ /sys/power/pm_async sysfs knob at boot time.
+ If set to "off", disables asynchronous suspend and
+ resume of devices during system-wide power transitions.
+ This can be useful on platforms where device
+ dependencies are not well-defined, or for debugging
+ power management issues. Asynchronous operations are
+ enabled by default.
+
+
+ pm_debug_messages [SUSPEND,KNL]
+ Enable suspend/resume debug messages during boot up.
+
pnp.debug=1 [PNP]
Enable PNP debug messages (depends on the
CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time
@@ -3436,6 +5176,11 @@
may be specified.
Format: <port>,<port>....
+ possible_cpus= [SMP,S390,X86]
+ Format: <unsigned int>
+ Set the number of possible CPUs, overriding the
+ regular discovery mechanisms (such as ACPI/FW, etc).
+
powersave=off [PPC] This option disables power saving features.
It specifically disables cpuidle and sets the
platform machine description specific power_save
@@ -3443,15 +5188,29 @@
execution priority.
ppc_strict_facility_enable
- [PPC] This option catches any kernel floating point,
+ [PPC,ENABLE] This option catches any kernel floating point,
Altivec, VSX and SPE outside of regions specifically
allowed (eg kernel_enable_fpu()/kernel_disable_fpu()).
There is some performance impact when enabling this.
- ppc_tm= [PPC]
+ ppc_tm= [PPC,EARLY]
Format: {"off"}
Disable Hardware Transactional Memory
+ preempt= [KNL]
+ Select preemption mode if you have CONFIG_PREEMPT_DYNAMIC
+ none - Limited to cond_resched() calls
+ voluntary - Limited to cond_resched() and might_sleep() calls
+ full - Any section that isn't explicitly preempt disabled
+ can be preempted anytime. Tasks will also yield
+ contended spinlocks (if the critical section isn't
+ explicitly preempt disabled beyond the lock itself).
+ lazy - Scheduler controlled. Similar to full but instead
+ of preempting the task immediately, the task gets
+ one HZ tick time to yield itself before the
+ preemption will be forced. One preemption is when the
+ task returns to user space.
+
print-fatal-signals=
[KNL] debug: print fatal signals
@@ -3471,6 +5230,23 @@
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
default: disabled
+ printk.console_no_auto_verbose=
+ Disable console loglevel raise on oops, panic
+ or lockdep-detected issues (only if lock debug is on).
+ With an exception to setups with low baudrate on
+ serial console, keeping this 0 is a good choice
+ in order to provide more debug information.
+ Format: <bool>
+ default: 0 (auto_verbose is enabled)
+
+ printk.debug_non_panic_cpus=
+ Allows storing messages from non-panic CPUs into
+ the printk log buffer during panic(). They are
+ flushed to consoles by the panic-CPU on
+ a best-effort basis.
+ Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ Default: disabled
+
printk.devkmsg={on,off,ratelimit}
Control writing to /dev/kmsg.
on - unlimited logging to /dev/kmsg from userspace
@@ -3481,6 +5257,16 @@
printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ proc_mem.force_override= [KNL]
+ Format: {always | ptrace | never}
+ Traditionally /proc/pid/mem allows memory permissions to be
+ overridden without restrictions. This option may be set to
+ restrict that. Can be one of:
+ - 'always': traditional behavior always allows mem overrides.
+ - 'ptrace': only allow mem overrides for active ptracers.
+ - 'never': never allow mem overrides.
+ If not specified, default is the CONFIG_PROC_MEM_* choice.
+
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit.
@@ -3491,18 +5277,25 @@
profile= [KNL] Enable kernel profiling via /proc/profile
Format: [<profiletype>,]<number>
- Param: <profiletype>: "schedule", "sleep", or "kvm"
+ Param: <profiletype>: "schedule" or "kvm"
[defaults to kernel profiling]
Param: "schedule" - profile schedule points.
- Param: "sleep" - profile D-state sleeping (millisecs).
- Requires CONFIG_SCHEDSTATS
Param: "kvm" - profile VM exits.
Param: <number> - step/bucket size as a power of 2 for
statistical time based profiling.
- prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
- before loading.
- See Documentation/blockdev/ramdisk.txt.
+ prompt_ramdisk= [RAM] [Deprecated]
+
+ prot_virt= [S390] enable hosting protected virtual machines
+ isolated from the hypervisor (if hardware supports
+ that). If enabled, the default kernel base address
+ might be overridden even when Kernel Address Space
+ Layout Randomization is disabled.
+ Format: <bool>
+
+ psi= [KNL] Enable or disable pressure stall information
+ tracking.
+ Format: <bool>
psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to
probe for; one of (bare|imps|exps|lifebook|any).
@@ -3519,10 +5312,7 @@
pstore.backend= Specify the name of the pstore backend to use
- pt. [PARIDE]
- See Documentation/blockdev/paride.txt.
-
- pti= [X86_64] Control Page Table Isolation of user and
+ pti= [X86-64] Control Page Table Isolation of user and
kernel address spaces. Disabling this feature
removes hardening, but improves performance of
system calls and interrupts.
@@ -3534,28 +5324,46 @@
Not specifying this option is equivalent to pti=auto.
- nopti [X86_64]
- Equivalent to pti=off
-
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
- quiet [KNL] Disable most log messages
+ quiet [KNL,EARLY] Disable most log messages
r128= [HW,DRM]
+ radix_hcall_invalidate=on [PPC/PSERIES]
+ Disable RADIX GTSE feature and use hcall for TLB
+ invalidate.
+
raid= [HW,RAID]
See Documentation/admin-guide/md.rst.
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
- See Documentation/blockdev/ramdisk.txt.
-
- random.trust_cpu={on,off}
- [KNL] Enable or disable trusting the use of the
- CPU's random number generator (if available) to
- fully seed the kernel's CRNG. Default is controlled
- by CONFIG_RANDOM_TRUST_CPU.
+ See Documentation/admin-guide/blockdev/ramdisk.rst.
+
+ ramdisk_start= [RAM] RAM disk image start address
+
+ random.trust_cpu=off
+ [KNL,EARLY] Disable trusting the use of the CPU's
+ random number generator (if available) to
+ initialize the kernel's RNG.
+
+ random.trust_bootloader=off
+ [KNL,EARLY] Disable trusting the use of the a seed
+ passed by the bootloader (if available) to
+ initialize the kernel's RNG.
+
+ randomize_kstack_offset=
+ [KNL,EARLY] Enable or disable kernel stack offset
+ randomization, which provides roughly 5 bits of
+ entropy, frustrating memory corruption attacks
+ that depend on stack address determinism or
+ cross-syscall address exposures. This is only
+ available on architectures that have defined
+ CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET.
+ Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ Default is CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT.
ras=option[,option,...] [KNL] RAS-specific options
@@ -3563,19 +5371,33 @@
Disable the Correctable Errors Collector,
see CONFIG_RAS_CEC help text.
- rcu_nocbs= [KNL]
- The argument is a cpu list, as described above.
-
- In kernels built with CONFIG_RCU_NOCB_CPU=y, set
- the specified list of CPUs to be no-callback CPUs.
- Invocation of these CPUs' RCU callbacks will be
- offloaded to "rcuox/N" kthreads created for that
- purpose, where "x" is "p" for RCU-preempt, and
- "s" for RCU-sched, and "N" is the CPU number.
- This reduces OS jitter on the offloaded CPUs,
- which can be useful for HPC and real-time
- workloads. It can also improve energy efficiency
- for asymmetric multiprocessors.
+ rcu_nocbs[=cpu-list]
+ [KNL] The optional argument is a cpu list,
+ as described above.
+
+ In kernels built with CONFIG_RCU_NOCB_CPU=y,
+ enable the no-callback CPU mode, which prevents
+ such CPUs' callbacks from being invoked in
+ softirq context. Invocation of such CPUs' RCU
+ callbacks will instead be offloaded to "rcuox/N"
+ kthreads created for that purpose, where "x" is
+ "p" for RCU-preempt, "s" for RCU-sched, and "g"
+ for the kthreads that mediate grace periods; and
+ "N" is the CPU number. This reduces OS jitter on
+ the offloaded CPUs, which can be useful for HPC
+ and real-time workloads. It can also improve
+ energy efficiency for asymmetric multiprocessors.
+
+ If a cpulist is passed as an argument, the specified
+ list of CPUs is set to no-callback mode from boot.
+
+ Otherwise, if the '=' sign and the cpulist
+ arguments are omitted, no CPU will be set to
+ no-callback mode from boot but the mode may be
+ toggled at runtime via cpusets.
+
+ Note that this argument takes precedence over
+ the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
rcu_nocb_poll [KNL]
Rather than requiring that offloaded CPUs
@@ -3592,6 +5414,17 @@
Set maximum number of finished RCU callbacks to
process in one batch.
+ rcutree.csd_lock_suppress_rcu_stall= [KNL]
+ Do only a one-line RCU CPU stall warning when
+ there is an ongoing too-long CSD-lock wait.
+
+ rcutree.do_rcu_barrier= [KNL]
+ Request a call to rcu_barrier(). This is
+ throttled so that userspace tests can safely
+ hammer on the sysfs variable if they so choose.
+ If triggered before the RCU grace-period machinery
+ is fully active, this will error out with EAGAIN.
+
rcutree.dump_tree= [KNL]
Dump the structure of the rcu_node combining tree
out at early boot. This is used for diagnostic
@@ -3611,33 +5444,6 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
- rcutree.rcu_fanout_exact= [KNL]
- Disable autobalancing of the rcu_node combining
- tree. This is used by rcutorture, and might
- possibly be useful for architectures having high
- cache-to-cache transfer latencies.
-
- rcutree.rcu_fanout_leaf= [KNL]
- Change the number of CPUs assigned to each
- leaf rcu_node structure. Useful for very
- large systems, which will choose the value 64,
- and for NUMA systems with large remote-access
- latencies, which will choose a value aligned
- with the appropriate hardware boundaries.
-
- rcutree.jiffies_till_sched_qs= [KNL]
- Set required age in jiffies for a
- given grace period before RCU starts
- soliciting quiescent-state help from
- rcu_note_context_switch(). If not specified, the
- kernel will calculate a value based on the most
- recent settings of rcutree.jiffies_till_first_fqs
- and rcutree.jiffies_till_next_fqs.
- This calculated value may be viewed in
- rcutree.jiffies_to_sched_qs. Any attempt to
- set rcutree.jiffies_to_sched_qs will be
- cheerfully overwritten.
-
rcutree.jiffies_till_first_fqs= [KNL]
Set delay from grace-period initialization to
first attempt to force quiescent states.
@@ -3649,6 +5455,20 @@
quiescent states. Units are jiffies, minimum
value is one, and maximum value is HZ.
+ rcutree.jiffies_till_sched_qs= [KNL]
+ Set required age in jiffies for a
+ given grace period before RCU starts
+ soliciting quiescent-state help from
+ rcu_note_context_switch() and cond_resched().
+ If not specified, the kernel will calculate
+ a value based on the most recent settings
+ of rcutree.jiffies_till_first_fqs
+ and rcutree.jiffies_till_next_fqs.
+ This calculated value may be viewed in
+ rcutree.jiffies_to_sched_qs. Any attempt to set
+ rcutree.jiffies_to_sched_qs will be cheerfully
+ overwritten.
+
rcutree.kthread_prio= [KNL,BOOT]
Set the SCHED_FIFO priority of the RCU per-CPU
kthreads (rcuc/N). This value is also used for
@@ -3659,13 +5479,29 @@
(the least-favored priority). Otherwise, when
RCU_BOOST is not set, valid values are 0-99 and
the default is zero (non-realtime operation).
-
- rcutree.rcu_nocb_leader_stride= [KNL]
- Set the number of NOCB kthread groups, which
- defaults to the square root of the number of
- CPUs. Larger numbers reduces the wakeup overhead
- on the per-CPU grace-period kthreads, but increases
- that same overhead on each group's leader.
+ When RCU_NOCB_CPU is set, also adjust the
+ priority of NOCB callback kthreads.
+
+ rcutree.nocb_nobypass_lim_per_jiffy= [KNL]
+ On callback-offloaded (rcu_nocbs) CPUs,
+ RCU reduces the lock contention that would
+ otherwise be caused by callback floods through
+ use of the ->nocb_bypass list. However, in the
+ common non-flooded case, RCU queues directly to
+ the main ->cblist in order to avoid the extra
+ overhead of the ->nocb_bypass list and its lock.
+ But if there are too many callbacks queued during
+ a single jiffy, RCU pre-queues the callbacks into
+ the ->nocb_bypass queue. The definition of "too
+ many" is supplied by this kernel boot parameter.
+
+ rcutree.nohz_full_patience_delay= [KNL]
+ On callback-offloaded (rcu_nocbs) CPUs, avoid
+ disturbing RCU unless the grace period has
+ reached the specified age in milliseconds.
+ Defaults to zero. Large values will be capped
+ at five seconds. All values will be rounded down
+ to the nearest value representable by jiffies.
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
@@ -3675,15 +5511,64 @@
Set threshold of queued RCU callbacks below which
batch limiting is re-enabled.
- rcutree.rcu_idle_gp_delay= [KNL]
- Set wakeup interval for idle CPUs that have
- RCU callbacks (RCU_FAST_NO_HZ=y).
+ rcutree.qovld= [KNL]
+ Set threshold of queued RCU callbacks beyond which
+ RCU's force-quiescent-state scan will aggressively
+ enlist help from cond_resched() and sched IPIs to
+ help CPUs more quickly reach quiescent states.
+ Set to less than zero to make this be set based
+ on rcutree.qhimark at boot time and to zero to
+ disable more aggressive help enlistment.
+
+ rcutree.rcu_delay_page_cache_fill_msec= [KNL]
+ Set the page-cache refill delay (in milliseconds)
+ in response to low-memory conditions. The range
+ of permitted values is in the range 0:100000.
+
+ rcutree.rcu_divisor= [KNL]
+ Set the shift-right count to use to compute
+ the callback-invocation batch limit bl from
+ the number of callbacks queued on this CPU.
+ The result will be bounded below by the value of
+ the rcutree.blimit kernel parameter. Every bl
+ callbacks, the softirq handler will exit in
+ order to allow the CPU to do other work.
+
+ Please note that this callback-invocation batch
+ limit applies only to non-offloaded callback
+ invocation. Offloaded callbacks are instead
+ invoked in the context of an rcuoc kthread, which
+ scheduler will preempt as it does any other task.
- rcutree.rcu_idle_lazy_gp_delay= [KNL]
- Set wakeup interval for idle CPUs that have
- only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
- Lazy RCU callbacks are those which RCU can
- prove do nothing more than free memory.
+ rcutree.rcu_fanout_exact= [KNL]
+ Disable autobalancing of the rcu_node combining
+ tree. This is used by rcutorture, and might
+ possibly be useful for architectures having high
+ cache-to-cache transfer latencies.
+
+ rcutree.rcu_fanout_leaf= [KNL]
+ Change the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very
+ large systems, which will choose the value 64,
+ and for NUMA systems with large remote-access
+ latencies, which will choose a value aligned
+ with the appropriate hardware boundaries.
+
+ rcutree.rcu_min_cached_objs= [KNL]
+ Minimum number of objects which are cached and
+ maintained per one CPU. Object size is equal
+ to PAGE_SIZE. The cache allows to reduce the
+ pressure to page allocator, also it makes the
+ whole algorithm to behave better in low memory
+ condition.
+
+ rcutree.rcu_nocb_gp_stride= [KNL]
+ Set the number of NOCB callback kthreads in
+ each group, which defaults to the square root
+ of the number of CPUs. Larger numbers reduce
+ the wakeup overhead on the global grace-period
+ kthread, but increases that same overhead on
+ each group's NOCB grace-period kthread.
rcutree.rcu_kick_kthreads= [KNL]
Cause the grace-period kthread to get an extra
@@ -3692,28 +5577,119 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
- rcuperf.gp_async= [KNL]
+ rcutree.rcu_resched_ns= [KNL]
+ Limit the time spend invoking a batch of RCU
+ callbacks to the specified number of nanoseconds.
+ By default, this limit is checked only once
+ every 32 callbacks in order to limit the pain
+ inflicted by local_clock() overhead.
+
+ rcutree.rcu_unlock_delay= [KNL]
+ In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
+ this specifies an rcu_read_unlock()-time delay
+ in microseconds. This defaults to zero.
+ Larger delays increase the probability of
+ catching RCU pointer leaks, that is, buggy use
+ of RCU-protected pointers after the relevant
+ rcu_read_unlock() has completed.
+
+ rcutree.sysrq_rcu= [KNL]
+ Commandeer a sysrq key to dump out Tree RCU's
+ rcu_node tree with an eye towards determining
+ why a new grace period has not yet started.
+
+ rcutree.use_softirq= [KNL]
+ If set to zero, move all RCU_SOFTIRQ processing to
+ per-CPU rcuc kthreads. Defaults to a non-zero
+ value, meaning that RCU_SOFTIRQ is used by default.
+ Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
+ But note that CONFIG_PREEMPT_RT=y kernels disable
+ this kernel boot parameter, forcibly setting it
+ to zero.
+
+ rcutree.enable_rcu_lazy= [KNL]
+ To save power, batch RCU callbacks and flush after
+ delay, memory pressure or callback list growing too
+ big.
+
+ rcutree.rcu_normal_wake_from_gp= [KNL]
+ Reduces a latency of synchronize_rcu() call. This approach
+ maintains its own track of synchronize_rcu() callers, so it
+ does not interact with regular callbacks because it does not
+ use a call_rcu[_hurry]() path. Please note, this is for a
+ normal grace period.
+
+ How to enable it:
+
+ echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
+ or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
+
+ Default is 1 if num_possible_cpus() <= 16 and it is not explicitly
+ disabled by the boot parameter passing 0.
+
+ rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
- rcuperf.gp_async_max= [KNL]
+ rcuscale.gp_async_max= [KNL]
Specify the maximum number of outstanding
callbacks per writer thread. When a writer
thread exceeds this limit, it invokes the
corresponding flavor of rcu_barrier() to allow
previously posted callbacks to drain.
- rcuperf.gp_exp= [KNL]
+ rcuscale.gp_exp= [KNL]
Measure performance of expedited synchronous
grace-period primitives.
- rcuperf.holdoff= [KNL]
+ rcuscale.holdoff= [KNL]
Set test-start holdoff period. The purpose of
this parameter is to delay the start of the
test until boot completes in order to avoid
interference.
- rcuperf.nreaders= [KNL]
+ rcuscale.kfree_by_call_rcu= [KNL]
+ In kernels built with CONFIG_RCU_LAZY=y, test
+ call_rcu() instead of kfree_rcu().
+
+ rcuscale.kfree_mult= [KNL]
+ Instead of allocating an object of size kfree_obj,
+ allocate one of kfree_mult * sizeof(kfree_obj).
+ Defaults to 1.
+
+ rcuscale.kfree_rcu_test= [KNL]
+ Set to measure performance of kfree_rcu() flooding.
+
+ rcuscale.kfree_rcu_test_double= [KNL]
+ Test the double-argument variant of kfree_rcu().
+ If this parameter has the same value as
+ rcuscale.kfree_rcu_test_single, both the single-
+ and double-argument variants are tested.
+
+ rcuscale.kfree_rcu_test_single= [KNL]
+ Test the single-argument variant of kfree_rcu().
+ If this parameter has the same value as
+ rcuscale.kfree_rcu_test_double, both the single-
+ and double-argument variants are tested.
+
+ rcuscale.kfree_nthreads= [KNL]
+ The number of threads running loops of kfree_rcu().
+
+ rcuscale.kfree_alloc_num= [KNL]
+ Number of allocations and frees done in an iteration.
+
+ rcuscale.kfree_loops= [KNL]
+ Number of loops doing rcuscale.kfree_alloc_num number
+ of allocations and frees.
+
+ rcuscale.minruntime= [KNL]
+ Set the minimum test run time in seconds. This
+ does not affect the data-collection interval,
+ but instead allows better measurement of things
+ like CPU consumption.
+
+ rcuscale.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N, where N is the number of CPUs. A value
"n" less than -1 selects N-n+1, where N is again
@@ -3722,44 +5698,31 @@
A value of "n" less than or equal to -N selects
a single reader.
- rcuperf.nwriters= [KNL]
+ rcuscale.nwriters= [KNL]
Set number of RCU writers. The values operate
- the same as for rcuperf.nreaders.
+ the same as for rcuscale.nreaders.
N, where N is the number of CPUs
- rcuperf.perf_type= [KNL]
+ rcuscale.scale_type= [KNL]
Specify the RCU implementation to test.
- rcuperf.shutdown= [KNL]
+ rcuscale.shutdown= [KNL]
Shut the system down after performance tests
complete. This is useful for hands-off automated
testing.
- rcuperf.verbose= [KNL]
+ rcuscale.verbose= [KNL]
Enable additional printk() statements.
- rcuperf.writer_holdoff= [KNL]
+ rcuscale.writer_holdoff= [KNL]
Write-side holdoff between grace periods,
in microseconds. The default of zero says
no holdoff.
- rcutorture.cbflood_inter_holdoff= [KNL]
- Set holdoff time (jiffies) between successive
- callback-flood tests.
-
- rcutorture.cbflood_intra_holdoff= [KNL]
- Set holdoff time (jiffies) between successive
- bursts of callbacks within a given callback-flood
- test.
-
- rcutorture.cbflood_n_burst= [KNL]
- Set the number of bursts making up a given
- callback-flood test. Set this to zero to
- disable callback-flood testing.
-
- rcutorture.cbflood_n_per_burst= [KNL]
- Set the number of callbacks to be registered
- in a given burst of a callback-flood test.
+ rcuscale.writer_holdoff_jiffies= [KNL]
+ Additional write-side holdoff between grace
+ periods, but in jiffies. The default of zero
+ says no holdoff.
rcutorture.fqs_duration= [KNL]
Set duration of force_quiescent_state bursts
@@ -3773,9 +5736,65 @@
Set wait time between force_quiescent_state bursts
in seconds.
+ rcutorture.fwd_progress= [KNL]
+ Specifies the number of kthreads to be used
+ for RCU grace-period forward-progress testing
+ for the types of RCU supporting this notion.
+ Defaults to 1 kthread, values less than zero or
+ greater than the number of CPUs cause the number
+ of CPUs to be used.
+
+ rcutorture.fwd_progress_div= [KNL]
+ Specify the fraction of a CPU-stall-warning
+ period to do tight-loop forward-progress testing.
+
+ rcutorture.fwd_progress_holdoff= [KNL]
+ Number of seconds to wait between successive
+ forward-progress tests.
+
+ rcutorture.fwd_progress_need_resched= [KNL]
+ Enclose cond_resched() calls within checks for
+ need_resched() during tight-loop forward-progress
+ testing.
+
rcutorture.gp_cond= [KNL]
Use conditional/asynchronous update-side
- primitives, if available.
+ normal-grace-period primitives, if available.
+
+ rcutorture.gp_cond_exp= [KNL]
+ Use conditional/asynchronous update-side
+ expedited-grace-period primitives, if available.
+
+ rcutorture.gp_cond_full= [KNL]
+ Use conditional/asynchronous update-side
+ normal-grace-period primitives that also take
+ concurrent expedited grace periods into account,
+ if available.
+
+ rcutorture.gp_cond_exp_full= [KNL]
+ Use conditional/asynchronous update-side
+ expedited-grace-period primitives that also take
+ concurrent normal grace periods into account,
+ if available.
+
+ rcutorture.gp_cond_wi= [KNL]
+ Nominal wait interval for normal conditional
+ grace periods (specified by rcutorture's
+ gp_cond and gp_cond_full module parameters),
+ in microseconds. The actual wait interval will
+ be randomly selected to nanosecond granularity up
+ to this wait interval. Defaults to 16 jiffies,
+ for example, 16,000 microseconds on a system
+ with HZ=1000.
+
+ rcutorture.gp_cond_wi_exp= [KNL]
+ Nominal wait interval for expedited conditional
+ grace periods (specified by rcutorture's
+ gp_cond_exp and gp_cond_exp_full module
+ parameters), in microseconds. The actual wait
+ interval will be randomly selected to nanosecond
+ granularity up to this wait interval. Defaults to
+ 128 microseconds.
rcutorture.gp_exp= [KNL]
Use expedited update-side primitives, if available.
@@ -3784,6 +5803,43 @@
Use normal (non-expedited) asynchronous
update-side primitives, if available.
+ rcutorture.gp_poll= [KNL]
+ Use polled update-side normal-grace-period
+ primitives, if available.
+
+ rcutorture.gp_poll_exp= [KNL]
+ Use polled update-side expedited-grace-period
+ primitives, if available.
+
+ rcutorture.gp_poll_full= [KNL]
+ Use polled update-side normal-grace-period
+ primitives that also take concurrent expedited
+ grace periods into account, if available.
+
+ rcutorture.gp_poll_exp_full= [KNL]
+ Use polled update-side expedited-grace-period
+ primitives that also take concurrent normal
+ grace periods into account, if available.
+
+ rcutorture.gp_poll_wi= [KNL]
+ Nominal wait interval for normal conditional
+ grace periods (specified by rcutorture's
+ gp_poll and gp_poll_full module parameters),
+ in microseconds. The actual wait interval will
+ be randomly selected to nanosecond granularity up
+ to this wait interval. Defaults to 16 jiffies,
+ for example, 16,000 microseconds on a system
+ with HZ=1000.
+
+ rcutorture.gp_poll_wi_exp= [KNL]
+ Nominal wait interval for expedited conditional
+ grace periods (specified by rcutorture's
+ gp_poll_exp and gp_poll_exp_full module
+ parameters), in microseconds. The actual wait
+ interval will be randomly selected to nanosecond
+ granularity up to this wait interval. Defaults to
+ 128 microseconds.
+
rcutorture.gp_sync= [KNL]
Use normal (non-expedited) synchronous
update-side primitives, if available. If all
@@ -3792,6 +5848,43 @@
are zero, rcutorture acts as if is interpreted
they are all non-zero.
+ rcutorture.gpwrap_lag= [KNL]
+ Enable grace-period wrap lag testing. Setting
+ to false prevents the gpwrap lag test from
+ running. Default is true.
+
+ rcutorture.gpwrap_lag_gps= [KNL]
+ Set the value for grace-period wrap lag during
+ active lag testing periods. This controls how many
+ grace periods differences we tolerate between
+ rdp and rnp's gp_seq before setting overflow flag.
+ The default is always set to 8.
+
+ rcutorture.gpwrap_lag_cycle_mins= [KNL]
+ Set the total cycle duration for gpwrap lag
+ testing in minutes. This is the total time for
+ one complete cycle of active and inactive
+ testing periods. Default is 30 minutes.
+
+ rcutorture.gpwrap_lag_active_mins= [KNL]
+ Set the duration for which gpwrap lag is active
+ within each cycle, in minutes. During this time,
+ the grace-period wrap lag will be set to the
+ value specified by gpwrap_lag_gps. Default is
+ 5 minutes.
+
+ rcutorture.irqreader= [KNL]
+ Run RCU readers from irq handlers, or, more
+ accurately, from a timer handler. Not all RCU
+ flavors take kindly to this sort of thing.
+
+ rcutorture.leakpointer= [KNL]
+ Leak an RCU-protected pointer out of the reader.
+ This can of course result in splats, and is
+ intended to test the ability of things like
+ CONFIG_RCU_STRICT_GRACE_PERIOD=y to detect
+ such leaks.
+
rcutorture.n_barrier_cbs= [KNL]
Set callbacks/threads for rcu_barrier() testing.
@@ -3800,6 +5893,14 @@
stress RCU, they don't participate in the actual
test, hence the "fake".
+ rcutorture.nocbs_nthreads= [KNL]
+ Set number of RCU callback-offload togglers.
+ Zero (the default) disables toggling.
+
+ rcutorture.nocbs_toggle= [KNL]
+ Set the delay in milliseconds between successive
+ callback-offload toggling attempts.
+
rcutorture.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N-1, where N is the number of CPUs. A value
@@ -3817,6 +5918,39 @@
Set time (jiffies) between CPU-hotplug operations,
or zero to disable CPU-hotplug testing.
+ rcutorture.preempt_duration= [KNL]
+ Set duration (in milliseconds) of preemptions
+ by a high-priority FIFO real-time task. Set to
+ zero (the default) to disable. The CPUs to
+ preempt are selected randomly from the set that
+ are online at a given point in time. Races with
+ CPUs going offline are ignored, with that attempt
+ at preemption skipped.
+
+ rcutorture.preempt_interval= [KNL]
+ Set interval (in milliseconds, defaulting to one
+ second) between preemptions by a high-priority
+ FIFO real-time task. This delay is mediated
+ by an hrtimer and is further fuzzed to avoid
+ inadvertent synchronizations.
+
+ rcutorture.read_exit_burst= [KNL]
+ The number of times in a given read-then-exit
+ episode that a set of read-then-exit kthreads
+ is spawned.
+
+ rcutorture.read_exit_delay= [KNL]
+ The delay, in seconds, between successive
+ read-then-exit testing episodes.
+
+ rcutorture.reader_flavor= [KNL]
+ A bit mask indicating which readers to use.
+ If there is more than one bit set, the readers
+ are entered from low-order bit up, and are
+ exited in the opposite order. For SRCU, the
+ 0x1 bit is normal readers, 0x2 NMI-safe readers,
+ and 0x4 light-weight readers.
+
rcutorture.shuffle_interval= [KNL]
Set task-shuffle interval (s). Shuffling tasks
allows some CPUs to go into dyntick-idle mode
@@ -3830,11 +5964,38 @@
Duration of CPU stall (s) to test RCU CPU stall
warnings, zero to disable.
+ rcutorture.stall_cpu_block= [KNL]
+ Sleep while stalling if set. This will result
+ in warnings from preemptible RCU in addition to
+ any other stall-related activity. Note that
+ in kernels built with CONFIG_PREEMPTION=n and
+ CONFIG_PREEMPT_COUNT=y, this parameter will
+ cause the CPU to pass through a quiescent state.
+ Given CONFIG_PREEMPTION=n, this will suppress
+ RCU CPU stall warnings, but will instead result
+ in scheduling-while-atomic splats.
+
+ Use of this module parameter results in splats.
+
+
rcutorture.stall_cpu_holdoff= [KNL]
Time to wait (s) after boot before inducing stall.
rcutorture.stall_cpu_irqsoff= [KNL]
- Disable interrupts while stalling if set.
+ Disable interrupts while stalling if set, but only
+ on the first stall in the set.
+
+ rcutorture.stall_cpu_repeat= [KNL]
+ Number of times to repeat the stall sequence,
+ so that rcutorture.stall_cpu_repeat=3 will result
+ in four stall sequences.
+
+ rcutorture.stall_gp_kthread= [KNL]
+ Duration (s) of forced sleep within RCU
+ grace-period kthread to test RCU CPU stall
+ warnings, zero to disable. If both stall_cpu
+ and stall_gp_kthread are specified, the
+ kthread is starved first, then the CPU.
rcutorture.stat_interval= [KNL]
Time (s) between statistics printk()s.
@@ -3853,6 +6014,11 @@
rcutorture.test_boost_duration= [KNL]
Duration (s) of each individual boost test.
+ rcutorture.test_boost_holdoff= [KNL]
+ Holdoff time (s) from start of test to the start
+ of RCU priority-boost testing. Defaults to zero,
+ that is, no holdoff.
+
rcutorture.test_boost_interval= [KNL]
Interval (s) between each boost test.
@@ -3866,11 +6032,50 @@
rcutorture.verbose= [KNL]
Enable additional printk() statements.
+ rcupdate.rcu_cpu_stall_ftrace_dump= [KNL]
+ Dump ftrace buffer after reporting RCU CPU
+ stall warning.
+
+ rcupdate.rcu_cpu_stall_notifiers= [KNL]
+ Provide RCU CPU stall notifiers, but see the
+ warnings in the RCU_CPU_STALL_NOTIFIER Kconfig
+ option's help text. TL;DR: You almost certainly
+ do not want rcupdate.rcu_cpu_stall_notifiers.
+
rcupdate.rcu_cpu_stall_suppress= [KNL]
Suppress RCU CPU stall warning messages.
+ rcupdate.rcu_cpu_stall_suppress_at_boot= [KNL]
+ Suppress RCU CPU stall warning messages and
+ rcutorture writer stall warnings that occur
+ during early boot, that is, during the time
+ before the init task is spawned.
+
rcupdate.rcu_cpu_stall_timeout= [KNL]
Set timeout for RCU CPU stall warning messages.
+ The value is in seconds and the maximum allowed
+ value is 300 seconds.
+
+ rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+ Set timeout for expedited RCU CPU stall warning
+ messages. The value is in milliseconds
+ and the maximum allowed value is 21000
+ milliseconds. Please note that this value is
+ adjusted to an arch timer tick resolution.
+ Setting this to zero causes the value from
+ rcupdate.rcu_cpu_stall_timeout to be used (after
+ conversion from seconds to milliseconds).
+
+ rcupdate.rcu_cpu_stall_cputime= [KNL]
+ Provide statistics on the cputime and count of
+ interrupts and tasks during the sampling period. For
+ multiple continuous RCU stalls, all sampling periods
+ begin at half of the first RCU stall timeout.
+
+ rcupdate.rcu_exp_stall_task_details= [KNL]
+ Print stack dumps of any tasks blocking the
+ current expedited RCU grace period during an
+ expedited RCU CPU stall warning.
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
@@ -3896,10 +6101,93 @@
only normal grace-period primitives. No effect
on CONFIG_TINY_RCU kernels.
+ But note that CONFIG_PREEMPT_RT=y kernels enables
+ this kernel boot parameter, forcibly setting
+ it to the value one, that is, converting any
+ post-boot attempt at an expedited RCU grace
+ period to instead use normal non-expedited
+ grace-period processing.
+
+ rcupdate.rcu_task_collapse_lim= [KNL]
+ Set the maximum number of callbacks present
+ at the beginning of a grace period that allows
+ the RCU Tasks flavors to collapse back to using
+ a single callback queue. This switching only
+ occurs when rcupdate.rcu_task_enqueue_lim is
+ set to the default value of -1.
+
+ rcupdate.rcu_task_contend_lim= [KNL]
+ Set the minimum number of callback-queuing-time
+ lock-contention events per jiffy required to
+ cause the RCU Tasks flavors to switch to per-CPU
+ callback queuing. This switching only occurs
+ when rcupdate.rcu_task_enqueue_lim is set to
+ the default value of -1.
+
+ rcupdate.rcu_task_enqueue_lim= [KNL]
+ Set the number of callback queues to use for the
+ RCU Tasks family of RCU flavors. The default
+ of -1 allows this to be automatically (and
+ dynamically) adjusted. This parameter is intended
+ for use in testing.
+
+ rcupdate.rcu_task_ipi_delay= [KNL]
+ Set time in jiffies during which RCU tasks will
+ avoid sending IPIs, starting with the beginning
+ of a given grace period. Setting a large
+ number avoids disturbing real-time workloads,
+ but lengthens grace periods.
+
+ rcupdate.rcu_task_lazy_lim= [KNL]
+ Number of callbacks on a given CPU that will
+ cancel laziness on that CPU. Use -1 to disable
+ cancellation of laziness, but be advised that
+ doing so increases the danger of OOM due to
+ callback flooding.
+
+ rcupdate.rcu_task_stall_info= [KNL]
+ Set initial timeout in jiffies for RCU task stall
+ informational messages, which give some indication
+ of the problem for those not patient enough to
+ wait for ten minutes. Informational messages are
+ only printed prior to the stall-warning message
+ for a given grace period. Disable with a value
+ less than or equal to zero. Defaults to ten
+ seconds. A change in value does not take effect
+ until the beginning of the next grace period.
+
+ rcupdate.rcu_task_stall_info_mult= [KNL]
+ Multiplier for time interval between successive
+ RCU task stall informational messages for a given
+ RCU tasks grace period. This value is clamped
+ to one through ten, inclusive. It defaults to
+ the value three, so that the first informational
+ message is printed 10 seconds into the grace
+ period, the second at 40 seconds, the third at
+ 160 seconds, and then the stall warning at 600
+ seconds would prevent a fourth at 640 seconds.
+
rcupdate.rcu_task_stall_timeout= [KNL]
- Set timeout in jiffies for RCU task stall warning
- messages. Disable with a value less than or equal
- to zero.
+ Set timeout in jiffies for RCU task stall
+ warning messages. Disable with a value less
+ than or equal to zero. Defaults to ten minutes.
+ A change in value does not take effect until
+ the beginning of the next grace period.
+
+ rcupdate.rcu_tasks_lazy_ms= [KNL]
+ Set timeout in milliseconds RCU Tasks asynchronous
+ callback batching for call_rcu_tasks().
+ A negative value will take the default. A value
+ of zero will disable batching. Batching is
+ always disabled for synchronize_rcu_tasks().
+
+ rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
+ Set timeout in milliseconds RCU Tasks
+ Trace asynchronous callback batching for
+ call_rcu_tasks_trace(). A negative value
+ will take the default. A value of zero will
+ disable batching. Batching is always disabled
+ for synchronize_rcu_tasks_trace().
rcupdate.rcu_self_test= [KNL]
Run the RCU early boot self tests
@@ -3909,28 +6197,144 @@
Run specified binary instead of /init from the ramdisk,
used for early userspace startup. See initrd.
+ rdrand= [X86,EARLY]
+ force - Override the decision by the kernel to hide the
+ advertisement of RDRAND support (this affects
+ certain AMD processors because of buggy BIOS
+ support, specifically around the suspend/resume
+ path).
+
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
- mba.
+ mba, smba, bmec, abmc.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
reboot= [KNL]
Format (x86 or x86_64):
- [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \
+ [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] | d[efault] \
[[,]s[mp]#### \
[[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \
[[,]f[orce]
- Where reboot_mode is one of warm (soft) or cold (hard) or gpio,
+ Where reboot_mode is one of warm (soft) or cold (hard) or gpio
+ (prefix with 'panic_' to set mode for panic
+ reboot only),
reboot_type is one of bios, acpi, kbd, triple, efi, or pci,
reboot_force is either force or not specified,
reboot_cpu is s[mp]#### with #### being the processor
to be used for rebooting.
+ acpi
+ Use the ACPI RESET_REG in the FADT. If ACPI is not
+ configured or the ACPI reset does not work, the reboot
+ path attempts the reset using the keyboard controller.
+
+ bios
+ Use the CPU reboot vector for warm reset
+
+ cold
+ Set the cold reboot flag
+
+ default
+ There are some built-in platform specific "quirks"
+ - you may see: "reboot: <name> series board detected.
+ Selecting <type> for reboots." In the case where you
+ think the quirk is in error (e.g. you have newer BIOS,
+ or newer board) using this option will ignore the
+ built-in quirk table, and use the generic default
+ reboot actions.
+
+ efi
+ Use efi reset_system runtime service. If EFI is not
+ configured or the EFI reset does not work, the reboot
+ path attempts the reset using the keyboard controller.
+
+ force
+ Don't stop other CPUs on reboot. This can make reboot
+ more reliable in some cases.
+
+ kbd
+ Use the keyboard controller. cold reset (default)
+
+ pci
+ Use a write to the PCI config space register 0xcf9 to
+ trigger reboot.
+
+ triple
+ Force a triple fault (init)
+
+ warm
+ Don't set the cold reboot flag
+
+ Using warm reset will be much faster especially on big
+ memory systems because the BIOS will not go through
+ the memory check. Disadvantage is that not all
+ hardware will be completely reinitialized on reboot so
+ there may be boot problems on some systems.
+
+
+ refscale.holdoff= [KNL]
+ Set test-start holdoff period. The purpose of
+ this parameter is to delay the start of the
+ test until boot completes in order to avoid
+ interference.
+
+ refscale.lookup_instances= [KNL]
+ Number of data elements to use for the forms of
+ SLAB_TYPESAFE_BY_RCU testing. A negative number
+ is negated and multiplied by nr_cpu_ids, while
+ zero specifies nr_cpu_ids.
+
+ refscale.loops= [KNL]
+ Set the number of loops over the synchronization
+ primitive under test. Increasing this number
+ reduces noise due to loop start/end overhead,
+ but the default has already reduced the per-pass
+ noise to a handful of picoseconds on ca. 2020
+ x86 laptops.
+
+ refscale.nreaders= [KNL]
+ Set number of readers. The default value of -1
+ selects N, where N is roughly 75% of the number
+ of CPUs. A value of zero is an interesting choice.
+
+ refscale.nruns= [KNL]
+ Set number of runs, each of which is dumped onto
+ the console log.
+
+ refscale.readdelay= [KNL]
+ Set the read-side critical-section duration,
+ measured in microseconds.
+
+ refscale.scale_type= [KNL]
+ Specify the read-protection implementation to test.
+
+ refscale.shutdown= [KNL]
+ Shut down the system at the end of the performance
+ test. This defaults to 1 (shut it down) when
+ refscale is built into the kernel and to 0 (leave
+ it running) when refscale is built as a module.
+
+ refscale.verbose= [KNL]
+ Enable additional printk() statements.
+
+ refscale.verbose_batched= [KNL]
+ Batch the additional printk() statements. If zero
+ (the default) or negative, print everything. Otherwise,
+ print every Nth verbose statement, where N is the value
+ specified.
+
+ regulator_ignore_unused
+ [REGULATOR]
+ Prevents regulator framework from disabling regulators
+ that are unused, due no driver claiming them. This may
+ be useful for debug and development, but should not be
+ needed on a platform with proper driver support.
+
relax_domain_level=
[KNL, SMP] Set scheduler's default relax_domain_level.
- See Documentation/cgroup-v1/cpusets.txt.
+ See Documentation/admin-guide/cgroup-v1/cpusets.rst.
reserve= [KNL,BUGS] Force kernel to ignore I/O ports or memory
Format: <base1>,<size1>[,<base2>,<size2>,...]
@@ -3938,16 +6342,33 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.
- reservetop= [X86-32]
+ reserve_mem= [RAM]
+ Format: nn[KMG]:<align>:<label>
+ Reserve physical memory and label it with a name that
+ other subsystems can use to access it. This is typically
+ used for systems that do not wipe the RAM, and this command
+ line will try to reserve the same physical memory on
+ soft reboots. Note, it is not guaranteed to be the same
+ location. For example, if anything about the system changes
+ or if booting a different kernel. It can also fail if KASLR
+ places the kernel at the location of where the RAM reservation
+ was from a previous boot, the new reservation will be at a
+ different location.
+ Any subsystem using this feature must add a way to verify
+ that the contents of the physical memory is from a previous
+ boot, as there may be cases where the memory will not be
+ located at the same location.
+
+ The format is size:align:label for example, to request
+ 12 megabytes of 4096 alignment for ramoops:
+
+ reserve_mem=12M:4096:oops ramoops.mem_name=oops
+
+ reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
address space.
- reservelow= [X86]
- Format: nn[K]
- Set the amount of memory to reserve for BIOS at
- the bottom of the address space.
-
reset_devices [KNL] Force drivers to reset the underlying device
during initialization.
@@ -3960,7 +6381,7 @@
Specify the offset from the beginning of the partition
given by "resume=" at which the swap header is located,
in <PAGE_SIZE> units (needed only for swap files).
- See Documentation/power/swsusp-and-swap-files.txt
+ See Documentation/power/swsusp-and-swap-files.rst
resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to
read the resume files
@@ -3969,16 +6390,45 @@
Useful for devices that are detected asynchronously
(e.g. USB and MMC devices).
- hibernate= [HIBERNATION]
- noresume Don't check if there's a hibernation image
- present during boot.
- nocompress Don't compress/decompress hibernation images.
- no Disable hibernation and resume.
- protect_image Turn on image protection during restoration
- (that will set all pages holding image data
- during restoration read-only).
-
- retain_initrd [RAM] Keep initrd memory after extraction
+ retain_initrd [RAM] Keep initrd memory after extraction. After boot, it will
+ be accessible via /sys/firmware/initrd.
+
+ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
+ Speculative Code Execution with Return Instructions)
+ vulnerability.
+
+ AMD-based UNRET and IBPB mitigations alone do not stop
+ sibling threads from influencing the predictions of other
+ sibling threads. For that reason, STIBP is used on pro-
+ cessors that support it, and mitigate SMT on processors
+ that don't.
+
+ off - no mitigation
+ auto - automatically select a migitation
+ auto,nosmt - automatically select a mitigation,
+ disabling SMT if necessary for
+ the full mitigation (only on Zen1
+ and older without STIBP).
+ ibpb - On AMD, mitigate short speculation
+ windows on basic block boundaries too.
+ Safe, highest perf impact. It also
+ enables STIBP if present. Not suitable
+ on Intel.
+ ibpb,nosmt - Like "ibpb" above but will disable SMT
+ when STIBP is not available. This is
+ the alternative for systems which do not
+ have STIBP.
+ unret - Force enable untrained return thunks,
+ only effective on AMD f15h-f17h based
+ systems.
+ unret,nosmt - Like unret, but will disable SMT when STIBP
+ is not available. This is the alternative for
+ systems which do not have STIBP.
+
+ Selecting 'auto' will choose a mitigation method at run
+ time according to the CPU.
+
+ Not specifying this option is equivalent to retbleed=auto.
rfkill.default_state=
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
@@ -3992,44 +6442,69 @@
2 The "airplane mode" button toggles between everything
blocked and everything unblocked.
- rhash_entries= [KNL,NET]
- Set number of hash buckets for route cache
-
ring3mwait=disable
[KNL] Disable ring 3 MONITOR/MWAIT feature on supported
CPUs.
+ riscv_isa_fallback [RISCV,EARLY]
+ When CONFIG_RISCV_ISA_FALLBACK is not enabled, permit
+ falling back to detecting extension support by parsing
+ "riscv,isa" property on devicetree systems when the
+ replacement properties are not found. See the Kconfig
+ entry for RISCV_ISA_FALLBACK.
+
ro [KNL] Mount root device read-only on boot
- rodata= [KNL]
+ rodata= [KNL,EARLY]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
+ noalias Mark read-only kernel memory as read-only but retain
+ writable aliases in the direct map for regions outside
+ of the kernel image. [arm64]
rockchip.usb_uart
+ [EARLY]
Enable the uart passthrough on the designated usb port
on Rockchip SoCs. When active, the signals of the
debug-uart get routed to the D+ and D- pins of the usb
port and the regular usb controller gets disabled.
root= [KNL] Root filesystem
- See name_to_dev_t comment in init/do_mounts.c.
+ Usually this is a block device specifier of some kind,
+ see the early_lookup_bdev comment in
+ block/early-lookup.c for details.
+ Alternatively this can be "ram" for the legacy initial
+ ramdisk, "nfs" and "cifs" for root on a network file
+ system, or "mtd" and "ubi" for mounting from raw flash.
rootdelay= [KNL] Delay (in seconds) to pause before attempting to
mount the root filesystem
rootflags= [KNL] Set root filesystem mount option string
+ initramfs_options= [KNL]
+ Specify mount options for for the initramfs mount.
+
rootfstype= [KNL] Set root filesystem type
rootwait [KNL] Wait (indefinitely) for root device to show up.
Useful for devices that are detected asynchronously
(e.g. USB and MMC devices).
+ rootwait= [KNL] Maximum time (in seconds) to wait for root device
+ to show up before attempting to mount the root
+ filesystem.
+
rproc_mem=nn[KMG][@address]
[KNL,ARM,CMA] Remoteproc physical memory block.
Memory area to be used by remote processor image,
managed by CMA.
+ rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
+ when CONFIG_RT_GROUP_SCHED=y. Defaults to
+ !CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
+ Format: <bool>
+
rw [KNL] Mount root device read-write on boot
S [KNL] Run init in single mode
@@ -4037,23 +6512,152 @@
s390_iommu= [HW,S390]
Set s390 IOTLB flushing mode
strict
- With strict flushing every unmap operation will result in
- an IOTLB flush. Default is lazy flushing before reuse,
- which is faster.
+ With strict flushing every unmap operation will result
+ in an IOTLB flush. Default is lazy flushing before
+ reuse, which is faster. Deprecated, equivalent to
+ iommu.strict=1.
+
+ s390_iommu_aperture= [KNL,S390]
+ Specifies the size of the per device DMA address space
+ accessible through the DMA and IOMMU APIs as a decimal
+ factor of the size of main memory.
+ The default is 1 meaning that one can concurrently use
+ as many DMA addresses as physical memory is installed,
+ if supported by hardware, and thus map all of memory
+ once. With a value of 2 one can map all of memory twice
+ and so on. As a special case a factor of 0 imposes no
+ restrictions other than those given by hardware at the
+ cost of significant additional memory use for tables.
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
- sbni= [NET] Granch SBNI12 leased line adapter
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
- sched_debug [KNL] Enables verbose scheduler debug messages.
+ sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
incurs a small amount of overhead in the scheduler
but is useful for debugging and performance tuning.
- skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
+ sched_thermal_decay_shift=
+ [Deprecated]
+ [KNL, SMP] Set a decay shift for scheduler thermal
+ pressure signal. Thermal pressure signal follows the
+ default decay period of other scheduler pelt
+ signals(usually 32 ms but configurable). Setting
+ sched_thermal_decay_shift will left shift the decay
+ period for the thermal pressure signal by the shift
+ value.
+ i.e. with the default pelt decay period of 32 ms
+ sched_thermal_decay_shift thermal pressure decay pr
+ 1 64 ms
+ 2 128 ms
+ and so on.
+ Format: integer between 0 and 10
+ Default is 0.
+
+ scftorture.holdoff= [KNL]
+ Number of seconds to hold off before starting
+ test. Defaults to zero for module insertion and
+ to 10 seconds for built-in smp_call_function()
+ tests.
+
+ scftorture.longwait= [KNL]
+ Request ridiculously long waits randomly selected
+ up to the chosen limit in seconds. Zero (the
+ default) disables this feature. Please note
+ that requesting even small non-zero numbers of
+ seconds can result in RCU CPU stall warnings,
+ softlockup complaints, and so on.
+
+ scftorture.nthreads= [KNL]
+ Number of kthreads to spawn to invoke the
+ smp_call_function() family of functions.
+ The default of -1 specifies a number of kthreads
+ equal to the number of CPUs.
+
+ scftorture.onoff_holdoff= [KNL]
+ Number seconds to wait after the start of the
+ test before initiating CPU-hotplug operations.
+
+ scftorture.onoff_interval= [KNL]
+ Number seconds to wait between successive
+ CPU-hotplug operations. Specifying zero (which
+ is the default) disables CPU-hotplug operations.
+
+ scftorture.shutdown_secs= [KNL]
+ The number of seconds following the start of the
+ test after which to shut down the system. The
+ default of zero avoids shutting down the system.
+ Non-zero values are useful for automated tests.
+
+ scftorture.stat_interval= [KNL]
+ The number of seconds between outputting the
+ current test statistics to the console. A value
+ of zero disables statistics output.
+
+ scftorture.stutter_cpus= [KNL]
+ The number of jiffies to wait between each change
+ to the set of CPUs under test.
+
+ scftorture.use_cpus_read_lock= [KNL]
+ Use use_cpus_read_lock() instead of the default
+ preempt_disable() to disable CPU hotplug
+ while invoking one of the smp_call_function*()
+ functions.
+
+ scftorture.verbose= [KNL]
+ Enable additional printk() statements.
+
+ scftorture.weight_single= [KNL]
+ The probability weighting to use for the
+ smp_call_function_single() function with a zero
+ "wait" parameter. A value of -1 selects the
+ default if all other weights are -1. However,
+ if at least one weight has some other value, a
+ value of -1 will instead select a weight of zero.
+
+ scftorture.weight_single_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_single() function with a
+ non-zero "wait" parameter. See weight_single.
+
+ scftorture.weight_many= [KNL]
+ The probability weighting to use for the
+ smp_call_function_many() function with a zero
+ "wait" parameter. See weight_single.
+ Note well that setting a high probability for
+ this weighting can place serious IPI load
+ on the system.
+
+ scftorture.weight_many_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_many() function with a
+ non-zero "wait" parameter. See weight_single
+ and weight_many.
+
+ scftorture.weight_all= [KNL]
+ The probability weighting to use for the
+ smp_call_function_all() function with a zero
+ "wait" parameter. See weight_single and
+ weight_many.
+
+ scftorture.weight_all_wait= [KNL]
+ The probability weighting to use for the
+ smp_call_function_all() function with a
+ non-zero "wait" parameter. See weight_single
+ and weight_many.
+
+ sdw_mclk_divider=[SDW]
+ Specify the MCLK divider for Intel SoundWire buses in
+ case the BIOS does not provide the clock rate properly.
+
+ skew_tick= [KNL,EARLY] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
Format: { "0" | "1" }
@@ -4062,100 +6666,132 @@
Note: increases power consumption, thus should only be
enabled if running jitter sensitive (HPC/RT) workloads.
- security= [SECURITY] Choose a security module to enable at boot.
- If this boot parameter is not specified, only the first
- security module asking for security registration will be
- loaded. An invalid security module name will be treated
- as if no module has been chosen.
+ security= [SECURITY] Choose a legacy "major" security module to
+ enable at boot. This has been deprecated by the
+ "lsm=" parameter.
selinux= [SELINUX] Disable or enable SELinux at boot time.
Format: { "0" | "1" }
See security/selinux/Kconfig help text.
0 -- disable.
1 -- enable.
- Default value is set via kernel config option.
- If enabled at boot time, /selinux/disable can be used
- later to disable prior to initial policy load.
-
- apparmor= [APPARMOR] Disable or enable AppArmor at boot time
- Format: { "0" | "1" }
- See security/apparmor/Kconfig help text
- 0 -- disable.
- 1 -- enable.
- Default value is set via kernel config option.
+ Default value is 1.
serialnumber [BUGS=X86-32]
- shapers= [NET]
- Maximal number of shapers.
+ sev=option[,option...] [X86-64]
- simeth= [IA-64]
- simscsi=
+ debug
+ Enable debug messages.
- slram= [HW,MTD]
+ nosnp
+ Do not enable SEV-SNP (applies to host/hypervisor
+ only). Setting 'nosnp' avoids the RMP check overhead
+ in memory accesses when users do not want to run
+ SEV-SNP guests.
- slab_nomerge [MM]
- Disable merging of slabs with similar size. May be
- necessary if there is some reason to distinguish
- allocs to different slabs, especially in hardened
- environments where the risk of heap overflows and
- layout control by attackers can usually be
- frustrated by disabling merging. This will reduce
- most of the exposure of a heap attack to a single
- cache (risks via metadata attacks are mostly
- unchanged). Debug options disable merging on their
- own.
- For more information see Documentation/vm/slub.rst.
+ shapers= [NET]
+ Maximal number of shapers.
- slab_max_order= [MM, SLAB]
- Determines the maximum allowed order for slabs.
- A high setting may cause OOMs due to memory
- fragmentation. Defaults to 1 for systems with
- more than 32MB of RAM, 0 otherwise.
+ show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller
+ Limit apic dumping. The parameter defines the maximal
+ number of local apics being dumped. Also it is possible
+ to set it to "all" by meaning -- no limit here.
+ Format: { 1 (default) | 2 | ... | all }.
+ The parameter valid if only apic=debug or
+ apic=verbose is specified.
+ Example: apic=debug show_lapic=all
- slub_debug[=options[,slabs]] [MM, SLUB]
- Enabling slub_debug allows one to determine the
+ slab_debug[=options[,slabs][;[options[,slabs]]...] [MM]
+ Enabling slab_debug allows one to determine the
culprit if slab objects become corrupted. Enabling
- slub_debug can create guard zones around objects and
+ slab_debug can create guard zones around objects and
may poison objects when not in use. Also tracks the
last alloc / free. For more information see
- Documentation/vm/slub.rst.
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_debug legacy name also accepted for now)
- slub_memcg_sysfs= [MM, SLUB]
- Determines whether to enable sysfs directories for
- memory cgroup sub-caches. 1 to enable, 0 to disable.
- The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
- Enabling this can lead to a very high number of debug
- directories and files being created under
- /sys/kernel/slub.
+ Using this option implies the "no_hash_pointers"
+ option which can be undone by adding the
+ "hash_pointers=always" option.
- slub_max_order= [MM, SLUB]
+ slab_max_order= [MM]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
fragmentation. For more information see
- Documentation/vm/slub.rst.
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_max_order legacy name also accepted for now)
+
+ slab_merge [MM]
+ Enable merging of slabs with similar size when the
+ kernel is built without CONFIG_SLAB_MERGE_DEFAULT.
+ (slub_merge legacy name also accepted for now)
- slub_min_objects= [MM, SLUB]
+ slab_min_objects= [MM]
The minimum number of objects per slab. SLUB will
- increase the slab order up to slub_max_order to
+ increase the slab order up to slab_max_order to
generate a sufficiently large slab able to contain
the number of objects indicated. The higher the number
of objects the smaller the overhead of tracking slabs
and the less frequently locks need to be acquired.
- For more information see Documentation/vm/slub.rst.
+ For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_min_objects legacy name also accepted for now)
- slub_min_order= [MM, SLUB]
+ slab_min_order= [MM]
Determines the minimum page order for slabs. Must be
- lower than slub_max_order.
- For more information see Documentation/vm/slub.rst.
+ lower or equal to slab_max_order. For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_min_order legacy name also accepted for now)
+
+ slab_nomerge [MM]
+ Disable merging of slabs with similar size. May be
+ necessary if there is some reason to distinguish
+ allocs to different slabs, especially in hardened
+ environments where the risk of heap overflows and
+ layout control by attackers can usually be
+ frustrated by disabling merging. This will reduce
+ most of the exposure of a heap attack to a single
+ cache (risks via metadata attacks are mostly
+ unchanged). Debug options disable merging on their
+ own.
+ For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_nomerge legacy name also accepted for now)
+
+ slab_strict_numa [MM]
+ Support memory policies on a per object level
+ in the slab allocator. The default is for memory
+ policies to be applied at the folio level when
+ a new folio is needed or a partial folio is
+ retrieved from the lists. Increases overhead
+ in the slab fastpaths but gains more accurate
+ NUMA kernel object placement which helps with slow
+ interconnects in NUMA systems.
- slub_nomerge [MM, SLUB]
- Same with slab_nomerge. This is supported for legacy.
- See slab_nomerge for more information.
+ slram= [HW,MTD]
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
+ smp.csd_lock_timeout= [KNL]
+ Specify the period of time in milliseconds
+ that smp_call_function() and friends will wait
+ for a CPU to release the CSD lock. This is
+ useful when diagnosing bugs involving CPUs
+ disabling interrupts for extended periods
+ of time. Defaults to 5,000 milliseconds, and
+ setting a value of zero disables this feature.
+ This feature may be more efficiently disabled
+ using the csdlock_debug- kernel parameter.
+
+ smp.panic_on_ipistall= [KNL]
+ If a csd_lock_timeout extends for more than
+ the specified number of milliseconds, panic the
+ system. By default, let CSD-lock acquisition
+ take as long as they take. Specifying 300,000
+ for this value provides a 5-minute timeout.
+
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
smsc-ircc2.ircc_sir= [HW] SIR base I/O port
@@ -4167,56 +6803,140 @@
1: Fast pin select (default)
2: ATC IRMode
- smt [KNL,S390] Set the maximum number of threads (logical
- CPUs) to use per physical CPU on systems capable of
- symmetric multithreading (SMT). Will be capped to the
- actual hardware limit.
+ smt= [KNL,MIPS,S390,EARLY] Set the maximum number of threads
+ (logical CPUs) to use per physical CPU on systems
+ capable of symmetric multithreading (SMT). Will
+ be capped to the actual hardware limit.
Format: <integer>
Default: -1 (no limit)
softlockup_panic=
[KNL] Should the soft-lockup detector generate panics.
- Format: <integer>
+ Format: 0 | 1
- A nonzero value instructs the soft-lockup detector
- to panic the machine when a soft-lockup occurs. This
- is also controlled by CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC
- which is the respective build-time switch to that
- functionality.
+ A value of 1 instructs the soft-lockup detector
+ to panic the machine when a soft-lockup occurs. It is
+ also controlled by the kernel.softlockup_panic sysctl
+ and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+ respective build-time switch to that functionality.
softlockup_all_cpu_backtrace=
[KNL] Should the soft-lockup detector generate
backtraces on all cpus.
- Format: <integer>
+ Format: 0 | 1
sonypi.*= [HW] Sony Programmable I/O Control Device driver
- See Documentation/laptops/sonypi.txt
-
- spectre_v2= [X86] Control mitigation of Spectre variant 2
+ See Documentation/admin-guide/laptops/sonypi.rst
+
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation as
+ needed. This protects the kernel from
+ both syscalls and VMs.
+ vmexit - On systems which don't have the HW mitigation
+ available, enable the SW mitigation on vmexit
+ ONLY. On such systems, the host kernel is
+ protected from VM-originated BHI attacks, but
+ may still be vulnerable to syscall attacks.
+ off - Disable the mitigation.
+
+ spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
+ The default operation protects the kernel from
+ user space attacks.
- on - unconditionally enable
- off - unconditionally disable
+ on - unconditionally enable, implies
+ spectre_v2_user=on
+ off - unconditionally disable, implies
+ spectre_v2_user=off
auto - kernel detects whether your CPU model is
vulnerable
Selecting 'on' will, and 'auto' may, choose a
mitigation method at run time according to the
CPU, the available microcode, the setting of the
- CONFIG_RETPOLINE configuration option, and the
- compiler with which the kernel was built.
+ CONFIG_MITIGATION_RETPOLINE configuration option,
+ and the compiler with which the kernel was built.
+
+ Selecting 'on' will also enable the mitigation
+ against user space to user space task attacks.
+ Selecting specific mitigation does not force enable
+ user mitigations.
+
+ Selecting 'off' will disable both the kernel and
+ the user space protections.
Specific mitigations can also be selected manually:
retpoline - replace indirect branches
- retpoline,generic - google's original retpoline
- retpoline,amd - AMD-specific minimal thunk
+ retpoline,generic - Retpolines
+ retpoline,lfence - LFENCE; indirect branch
+ retpoline,amd - alias for retpoline,lfence
+ eibrs - Enhanced/Auto IBRS
+ eibrs,retpoline - Enhanced/Auto IBRS + Retpolines
+ eibrs,lfence - Enhanced/Auto IBRS + LFENCE
+ ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.
+ spectre_v2_user=
+ [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability between
+ user space tasks
+
+ on - Unconditionally enable mitigations. Is
+ enforced by spectre_v2=on
+
+ off - Unconditionally disable mitigations. Is
+ enforced by spectre_v2=off
+
+ prctl - Indirect branch speculation is enabled,
+ but mitigation can be enabled via prctl
+ per thread. The mitigation control state
+ is inherited on fork.
+
+ prctl,ibpb
+ - Like "prctl" above, but only STIBP is
+ controlled per thread. IBPB is issued
+ always when switching between different user
+ space processes.
+
+ seccomp
+ - Same as "prctl" above, but all seccomp
+ threads will enable the mitigation unless
+ they explicitly opt out.
+
+ seccomp,ibpb
+ - Like "seccomp" above, but only STIBP is
+ controlled per thread. IBPB is issued
+ always when switching between different
+ user space processes.
+
+ auto - Kernel selects the mitigation depending on
+ the available CPU features and vulnerability.
+
+ Default mitigation: "prctl"
+
+ Not specifying this option is equivalent to
+ spectre_v2_user=auto.
+
+ spec_rstack_overflow=
+ [X86,EARLY] Control RAS overflow mitigation on AMD Zen CPUs
+
+ off - Disable mitigation
+ microcode - Enable microcode mitigation only
+ safe-ret - Enable sw-only safe RET mitigation (default)
+ ibpb - Enable mitigation by issuing IBPB on
+ kernel entry
+ ibpb-vmexit - Issue IBPB only on VMEXIT
+ (cloud-specific mitigation)
+
spec_store_bypass_disable=
- [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ [HW,EARLY] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
Certain CPUs are vulnerable to an exploit against a
@@ -4254,7 +6974,7 @@
will disable SSB unless they explicitly opt out.
Default mitigations:
- X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+ X86: "prctl"
On powerpc the options are:
@@ -4267,10 +6987,89 @@
Not specifying this option is equivalent to
spec_store_bypass_disable=auto.
- spia_io_base= [HW,MTD]
- spia_fio_base=
- spia_pedr=
- spia_peddr=
+ split_lock_detect=
+ [X86] Enable split lock detection or bus lock detection
+
+ When enabled (and if hardware support is present), atomic
+ instructions that access data across cache line
+ boundaries will result in an alignment check exception
+ for split lock detection or a debug exception for
+ bus lock detection.
+
+ off - not enabled
+
+ warn - the kernel will emit rate-limited warnings
+ about applications triggering the #AC
+ exception or the #DB exception. This mode is
+ the default on CPUs that support split lock
+ detection or bus lock detection. Default
+ behavior is by #AC if both features are
+ enabled in hardware.
+
+ fatal - the kernel will send SIGBUS to applications
+ that trigger the #AC exception or the #DB
+ exception. Default behavior is by #AC if
+ both features are enabled in hardware.
+
+ ratelimit:N -
+ Set system wide rate limit to N bus locks
+ per second for bus lock detection.
+ 0 < N <= 1000.
+
+ N/A for split lock detection.
+
+
+ If an #AC exception is hit in the kernel or in
+ firmware (i.e. not while executing in user mode)
+ the kernel will oops in either "warn" or "fatal"
+ mode.
+
+ #DB exception for bus lock is triggered only when
+ CPL > 0.
+
+ srbds= [X86,INTEL,EARLY]
+ Control the Special Register Buffer Data Sampling
+ (SRBDS) mitigation.
+
+ Certain CPUs are vulnerable to an MDS-like
+ exploit which can leak bits from the random
+ number generator.
+
+ By default, this issue is mitigated by
+ microcode. However, the microcode fix can cause
+ the RDRAND and RDSEED instructions to become
+ much slower. Among other effects, this will
+ result in reduced throughput from /dev/urandom.
+
+ The microcode mitigation can be disabled with
+ the following option:
+
+ off: Disable mitigation and remove
+ performance impact to RDRAND and RDSEED
+
+ srcutree.big_cpu_lim [KNL]
+ Specifies the number of CPUs constituting a
+ large system, such that srcu_struct structures
+ should immediately allocate an srcu_node array.
+ This kernel-boot parameter defaults to 128,
+ but takes effect only when the low-order four
+ bits of srcutree.convert_to_big is equal to 3
+ (decide at boot).
+
+ srcutree.convert_to_big [KNL]
+ Specifies under what conditions an SRCU tree
+ srcu_struct structure will be converted to big
+ form, that is, with an rcu_node tree:
+
+ 0: Never.
+ 1: At init_srcu_struct() time.
+ 2: When rcutorture decides to.
+ 3: Decide at boot time (default).
+ 0x1X: Above plus if high contention.
+
+ Either way, the srcu_node tree will be sized based
+ on the actual runtime number of CPUs (nr_cpu_ids)
+ instead of the compile-time CONFIG_NR_CPUS.
srcutree.counter_wrap_check [KNL]
Specifies how frequently to check for
@@ -4289,7 +7088,33 @@
expediting. Set to zero to disable automatic
expediting.
- ssbd= [ARM64,HW]
+ srcutree.srcu_max_nodelay [KNL]
+ Specifies the number of no-delay instances
+ per jiffy for which the SRCU grace period
+ worker thread will be rescheduled with zero
+ delay. Beyond this limit, worker thread will
+ be rescheduled with a sleep delay of one jiffy.
+
+ srcutree.srcu_max_nodelay_phase [KNL]
+ Specifies the per-grace-period phase, number of
+ non-sleeping polls of readers. Beyond this limit,
+ grace period worker thread will be rescheduled
+ with a sleep delay of one jiffy, between each
+ rescan of the readers, for a grace period phase.
+
+ srcutree.srcu_retry_check_delay [KNL]
+ Specifies number of microseconds of non-sleeping
+ delay between each non-sleeping poll of readers.
+
+ srcutree.small_contention_lim [KNL]
+ Specifies the number of update-side contention
+ events per jiffy will be tolerated before
+ initiating a conversion of an srcu_struct
+ structure to big form. Note that the value of
+ srcutree.convert_to_big must have the 0x10 bit
+ set for contention-based conversions to occur.
+
+ ssbd= [ARM64,HW,EARLY]
Speculative Store Bypass Disable control
On CPUs that are vulnerable to the Speculative
@@ -4313,12 +7138,23 @@
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
+ stack_depot_disable= [KNL,EARLY]
+ Setting this to true through kernel command line will
+ disable the stack depot thereby saving the static memory
+ consumed by the stack hash table. By default this is set
+ to false.
+
+ stack_depot_max_pools= [KNL,EARLY]
+ Specify the maximum number of pools to use for storing
+ stack traces. Pools are allocated on-demand up to this
+ limit. Default value is 8191 pools.
+
stacktrace [FTRACE]
Enabled the stack tracer on boot up.
stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
- will trace at boot up. function-list is a comma separated
+ will trace at boot up. function-list is a comma-separated
list of functions. This list can be changed at run
time by the stack_trace_filter file in the debugfs
tracing directory. Note, this enables stack tracing
@@ -4337,6 +7173,25 @@
stifb= [HW]
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+ strict_sas_size=
+ [X86]
+ Format: <bool>
+ Enable or disable strict sigaltstack size checks
+ against the required signal frame size which
+ depends on the supported FPU features. This can
+ be used to filter out binaries which have
+ not yet been made aware of AT_MINSIGSTKSZ.
+
+ stress_hpt [PPC,EARLY]
+ Limits the number of kernel HPT entries in the hash
+ page table to increase the rate of hash page table
+ faults on kernel addresses.
+
+ stress_slb [PPC,EARLY]
+ Limits the number of kernel SLB entries, and flushes
+ them frequently to increase the rate of SLB faults
+ on kernel addresses.
+
sunrpc.min_resvport=
sunrpc.max_resvport=
[NFS,SUNRPC]
@@ -4387,28 +7242,31 @@
/sys/power/pm_test). Only available when CONFIG_PM_DEBUG
is set. Default value is 5.
- swapaccount=[0|1]
- [KNL] Enable accounting of swap in memory resource
- controller if no parameter or 1 is given or disable
- it if 0 is given (See Documentation/cgroup-v1/memory.txt)
+ svm= [PPC]
+ Format: { on | off | y | n | 1 | 0 }
+ This parameter controls use of the Protected
+ Execution Facility on pSeries.
- swiotlb= [ARM,IA-64,PPC,MIPS,X86]
- Format: { <int> | force | noforce }
+ swiotlb= [ARM,PPC,MIPS,X86,S390,EARLY]
+ Format: { <int> [,<int>] | force | noforce }
<int> -- Number of I/O TLB slabs
+ <int> -- Second integer after comma. Number of swiotlb
+ areas with their own lock. Will be rounded up
+ to a power of 2.
force -- force using of bounce buffers even if they
wouldn't be automatically used by the kernel
noforce -- Never use bounce buffers (for debugging)
- switches= [HW,M68k]
+ switches= [HW,M68k,EARLY]
- sysfs.deprecated=0|1 [KNL]
- Enable/disable old style sysfs layout for old udev
- on older distributions. When this option is enabled
- very new udev will not work anymore. When this option
- is disabled (or CONFIG_SYSFS_DEPRECATED not compiled)
- in older udev will not work anymore.
- Default depends on CONFIG_SYSFS_DEPRECATED_V2 set in
- the kernel configuration.
+ sysctl.*= [KNL]
+ Set a sysctl parameter, right before loading the init
+ process, as if the value was written to the respective
+ /proc/sys/... file. Both '.' and '/' are recognized as
+ separators. Unrecognized parameters and invalid values
+ are reported in the kernel log. Sysctls registered
+ later by a loaded module cannot be set this way.
+ Example: sysctl.vm.swappiness=40
sysrq_always_enabled
[KNL]
@@ -4420,12 +7278,13 @@
Set the number of tcp_metrics_hash slots.
Default value is 8192 or 16384 depending on total
ram pages. This is used to specify the TCP metrics
- cache size. See Documentation/networking/ip-sysctl.txt
+ cache size. See Documentation/networking/ip-sysctl.rst
"tcp_no_metrics_save" section for more details.
tdfx= [HW,DRM]
- test_suspend= [SUSPEND][,N]
+ test_suspend= [SUSPEND]
+ Format: { "mem" | "standby" | "freeze" }[,N]
Specify "mem" (for Suspend-to-RAM) or "standby" (for
standby suspend) or "freeze" (for suspend type freeze)
as the system sleep state during system startup with
@@ -4444,10 +7303,6 @@
-1: disable all critical trip points in all thermal zones
<degrees C>: override all critical trip points
- thermal.nocrt= [HW,ACPI]
- Set to disable actions on ACPI thermal zone
- critical and hot trip points.
-
thermal.off= [HW,ACPI]
1: disable ACPI thermal control
@@ -4461,32 +7316,30 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
- threadirqs [KNL]
+ thp_anon= [KNL]
+ Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
+ state is one of "always", "madvise", "never" or "inherit".
+ Control the default behavior of the system with respect
+ to anonymous transparent hugepages.
+ Can be used multiple times for multiple anon THP sizes.
+ See Documentation/admin-guide/mm/transhuge.rst for more
+ details.
+
+ threadirqs [KNL,EARLY]
Force threading of all interrupt handlers except those
marked explicitly IRQF_NO_THREAD.
- tmem [KNL,XEN]
- Enable the Transcendent memory driver if built-in.
-
- tmem.cleancache=0|1 [KNL, XEN]
- Default is on (1). Disable the usage of the cleancache
- API to send anonymous pages to the hypervisor.
-
- tmem.frontswap=0|1 [KNL, XEN]
- Default is on (1). Disable the usage of the frontswap
- API to send swap pages to the hypervisor. If disabled
- the selfballooning and selfshrinking are force disabled.
-
- tmem.selfballooning=0|1 [KNL, XEN]
- Default is on (1). Disable the driving of swap pages
- to the hypervisor.
-
- tmem.selfshrinking=0|1 [KNL, XEN]
- Default is on (1). Partial swapoff that immediately
- transfers pages from Xen hypervisor back to the
- kernel based on different criteria.
-
- topology= [S390]
+ thp_shmem= [KNL]
+ Format: <size>[KMG],<size>[KMG]:<policy>;<size>[KMG]-<size>[KMG]:<policy>
+ Control the default policy of each hugepage size for the
+ internal shmem mount. <policy> is one of policies available
+ for the shmem mount ("always", "inherit", "never", "within_size",
+ and "advise").
+ It can be used multiple times for multiple shmem THP sizes.
+ See Documentation/admin-guide/mm/transhuge.rst for more
+ details.
+
+ topology= [S390,EARLY]
Format: {off | on}
Specify if the kernel should make use of the cpu
topology information if the hardware supports this.
@@ -4494,13 +7347,41 @@
e.g. base its process migration decisions on it.
Default is on.
- topology_updates= [KNL, PPC, NUMA]
- Format: {off}
- Specify if the kernel should ignore (off)
- topology updates sent by the hypervisor to this
- LPAR.
-
- tp720= [HW,PS2]
+ torture.disable_onoff_at_boot= [KNL]
+ Prevent the CPU-hotplug component of torturing
+ until after init has spawned.
+
+ torture.ftrace_dump_at_shutdown= [KNL]
+ Dump the ftrace buffer at torture-test shutdown,
+ even if there were no errors. This can be a
+ very costly operation when many torture tests
+ are running concurrently, especially on systems
+ with rotating-rust storage.
+
+ torture.verbose_sleep_frequency= [KNL]
+ Specifies how many verbose printk()s should be
+ emitted between each sleep. The default of zero
+ disables verbose-printk() sleeping.
+
+ torture.verbose_sleep_duration= [KNL]
+ Duration of each verbose-printk() sleep in jiffies.
+
+ tpm.disable_pcr_integrity= [HW,TPM]
+ Do not protect PCR registers from unintended physical
+ access, or interposers in the bus by the means of
+ having an integrity protected session wrapped around
+ TPM2_PCR_Extend command. Consider this in a situation
+ where TPM is heavily utilized by IMA, thus protection
+ causing a major performance hit, and the space where
+ machines are deployed is by other means guarded.
+
+ tpm_crb_ffa.busy_timeout_ms= [ARM64,TPM]
+ Maximum time in milliseconds to retry sending a message
+ to the TPM service before giving up. This parameter controls
+ how long the system will continue retrying when the TPM
+ service is busy.
+ Format: <unsigned int>
+ Default: 2000 (2 seconds)
tpm_suspend_pcr=[HW,TPM]
Format: integer pcr id
@@ -4511,22 +7392,163 @@
This will guarantee that all the other pcrs
are saved.
+ tpm_tis.interrupts= [HW,TPM]
+ Enable interrupts for the MMIO based physical layer
+ for the FIFO interface. By default it is set to false
+ (0). For more information about TPM hardware interfaces
+ defined by Trusted Computing Group (TCG) see
+ https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+
+ tp_printk [FTRACE]
+ Have the tracepoints sent to printk as well as the
+ tracing ring buffer. This is useful for early boot up
+ where the system hangs or reboots and does not give the
+ option for reading the tracing buffer or performing a
+ ftrace_dump_on_oops.
+
+ To turn off having tracepoints sent to printk,
+ echo 0 > /proc/sys/kernel/tracepoint_printk
+ Note, echoing 1 into this file without the
+ tp_printk kernel cmdline option has no effect.
+
+ The tp_printk_stop_on_boot (see below) can also be used
+ to stop the printing of events to console at
+ late_initcall_sync.
+
+ ** CAUTION **
+
+ Having tracepoints sent to printk() and activating high
+ frequency tracepoints such as irq or sched, can cause
+ the system to live lock.
+
+ tp_printk_stop_on_boot [FTRACE]
+ When tp_printk (above) is set, it can cause a lot of noise
+ on the console. It may be useful to only include the
+ printing of events during boot up, as user space may
+ make the system inoperable.
+
+ This command line option will stop the printing of events
+ to console at the late_initcall_sync() time frame.
+
trace_buf_size=nn[KMG]
[FTRACE] will set tracing buffer size on each cpu.
+ trace_clock= [FTRACE] Set the clock used for tracing events
+ at boot up.
+ local - Use the per CPU time stamp counter
+ (converted into nanoseconds). Fast, but
+ depending on the architecture, may not be
+ in sync between CPUs.
+ global - Event time stamps are synchronized across
+ CPUs. May be slower than the local clock,
+ but better for some race conditions.
+ counter - Simple counting of events (1, 2, ..)
+ note, some counts may be skipped due to the
+ infrastructure grabbing the clock more than
+ once per event.
+ uptime - Use jiffies as the time stamp.
+ perf - Use the same clock that perf uses.
+ mono - Use ktime_get_mono_fast_ns() for time stamps.
+ mono_raw - Use ktime_get_raw_fast_ns() for time
+ stamps.
+ boot - Use ktime_get_boot_fast_ns() for time stamps.
+ Architectures may add more clocks. See
+ Documentation/trace/ftrace.rst for more details.
+
trace_event=[event-list]
[FTRACE] Set and start specified trace events in order
to facilitate early boot debugging. The event-list is a
- comma separated list of trace events to enable. See
+ comma-separated list of trace events to enable. See
also Documentation/trace/events.rst
+ To enable modules, use :mod: keyword:
+
+ trace_event=:mod:<module>
+
+ The value before :mod: will only enable specific events
+ that are part of the module. See the above mentioned
+ document for more information.
+
+ trace_instance=[instance-info]
+ [FTRACE] Create a ring buffer instance early in boot up.
+ This will be listed in:
+
+ /sys/kernel/tracing/instances
+
+ Events can be enabled at the time the instance is created
+ via:
+
+ trace_instance=<name>,<system1>:<event1>,<system2>:<event2>
+
+ Note, the "<system*>:" portion is optional if the event is
+ unique.
+
+ trace_instance=foo,sched:sched_switch,irq_handler_entry,initcall
+
+ will enable the "sched_switch" event (note, the "sched:" is optional, and
+ the same thing would happen if it was left off). The irq_handler_entry
+ event, and all events under the "initcall" system.
+
+ Flags can be added to the instance to modify its behavior when it is
+ created. The flags are separated by '^'.
+
+ The available flags are:
+
+ traceoff - Have the tracing instance tracing disabled after it is created.
+ traceprintk - Have trace_printk() write into this trace instance
+ (note, "printk" and "trace_printk" can also be used)
+
+ trace_instance=foo^traceoff^traceprintk,sched,irq
+
+ The flags must come before the defined events.
+
+ If memory has been reserved (see memmap for x86), the instance
+ can use that memory:
+
+ memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
+
+ The above will create a "boot_map" instance that uses the physical
+ memory at 0x284500000 that is 12Megs. The per CPU buffers of that
+ instance will be split up accordingly.
+
+ Alternatively, the memory can be reserved by the reserve_mem option:
+
+ reserve_mem=12M:4096:trace trace_instance=boot_map@trace
+
+ This will reserve 12 megabytes at boot up with a 4096 byte alignment
+ and place the ring buffer in this memory. Note that due to KASLR, the
+ memory may not be the same location each time, which will not preserve
+ the buffer content.
+
+ Also note that the layout of the ring buffer data may change between
+ kernel versions where the validator will fail and reset the ring buffer
+ if the layout is not the same as the previous kernel.
+
+ If the ring buffer is used for persistent bootups and has events enabled,
+ it is recommend to disable tracing so that events from a previous boot do not
+ mix with events of the current boot (unless you are debugging a random crash
+ at boot up).
+
+ reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
+
+ Note, saving the trace buffer across reboots does require that the system
+ is set up to not wipe memory. For instance, CONFIG_RESET_ATTACK_MITIGATION
+ can force a memory reset on boot which will clear any trace that was stored.
+ This is just one of many ways that can clear memory. Make sure your system
+ keeps the content of memory across reboots before relying on this option.
+
+ NB: Both the mapped address and size must be page aligned for the architecture.
+
+ See also Documentation/trace/debugging.rst
+
+
trace_options=[option-list]
[FTRACE] Enable or disable tracer options at boot.
The option-list is a comma delimited list of options
that can be enabled or disabled just as if you were
to echo the option name into
- /sys/kernel/debug/tracing/trace_options
+ /sys/kernel/tracing/trace_options
For example, to enable stacktrace option (to dump the
stack trace of each event), add to the command line:
@@ -4536,29 +7558,39 @@
See also Documentation/trace/ftrace.rst "trace options"
section.
- tp_printk[FTRACE]
- Have the tracepoints sent to printk as well as the
- tracing ring buffer. This is useful for early boot up
- where the system hangs or reboots and does not give the
- option for reading the tracing buffer or performing a
- ftrace_dump_on_oops.
+ trace_trigger=[trigger-list]
+ [FTRACE] Add an event trigger on specific events.
+ Set a trigger on top of a specific event, with an optional
+ filter.
- To turn off having tracepoints sent to printk,
- echo 0 > /proc/sys/kernel/tracepoint_printk
- Note, echoing 1 into this file without the
- tracepoint_printk kernel cmdline option has no effect.
+ The format is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+ Where more than one trigger may be specified that are comma delimited.
- ** CAUTION **
+ For example:
- Having tracepoints sent to printk() and activating high
- frequency tracepoints such as irq or sched, can cause
- the system to live lock.
+ trace_trigger="sched_switch.stacktrace if prev_state == 2"
+
+ The above will enable the "stacktrace" trigger on the "sched_switch"
+ event but only trigger it if the "prev_state" of the "sched_switch"
+ event is "2" (TASK_UNINTERRUPTIBLE).
+
+ See also "Event triggers" in Documentation/trace/events.rst
+
+
+ traceoff_after_boot
+ [FTRACE] Sometimes tracing is used to debug issues
+ during the boot process. Since the trace buffer has a
+ limited amount of storage, it may be prudent to
+ disable tracing after the boot is finished, otherwise
+ the critical information may be overwritten. With this
+ option, the main tracing buffer will be turned off at
+ the end of the boot process.
traceoff_on_warning
[FTRACE] enable this option to disable tracing when a
warning is hit. This turns off "tracing_on". Tracing can
be enabled again by echoing '1' into the "tracing_on"
- file located in /sys/kernel/debug/tracing/
+ file located in /sys/kernel/tracing/
This option is useful, as it disables the trace before
the WARNING dump is called, which prevents the trace to
@@ -4575,6 +7607,69 @@
See Documentation/admin-guide/mm/transhuge.rst
for more details.
+ transparent_hugepage_shmem= [KNL]
+ Format: [always|within_size|advise|never|deny|force]
+ Can be used to control the hugepage allocation policy for
+ the internal shmem mount.
+ See Documentation/admin-guide/mm/transhuge.rst
+ for more details.
+
+ transparent_hugepage_tmpfs= [KNL]
+ Format: [always|within_size|advise|never]
+ Can be used to control the default hugepage allocation policy
+ for the tmpfs mount.
+ See Documentation/admin-guide/mm/transhuge.rst
+ for more details.
+
+ trusted.source= [KEYS]
+ Format: <string>
+ This parameter identifies the trust source as a backend
+ for trusted keys implementation. Supported trust
+ sources:
+ - "tpm"
+ - "tee"
+ - "caam"
+ - "dcp"
+ If not specified then it defaults to iterating through
+ the trust source list starting with TPM and assigns the
+ first trust source as a backend which is initialized
+ successfully during iteration.
+
+ trusted.rng= [KEYS]
+ Format: <string>
+ The RNG used to generate key material for trusted keys.
+ Can be one of:
+ - "kernel"
+ - the same value as trusted.source: "tpm" or "tee"
+ - "default"
+ If not specified, "default" is used. In this case,
+ the RNG's choice is left to each individual trust source.
+
+ trusted.dcp_use_otp_key
+ This is intended to be used in combination with
+ trusted.source=dcp and will select the DCP OTP key
+ instead of the DCP UNIQUE key blob encryption.
+
+ trusted.dcp_skip_zk_test
+ This is intended to be used in combination with
+ trusted.source=dcp and will disable the check if the
+ blob key is all zeros. This is helpful for situations where
+ having this key zero'ed is acceptable. E.g. in testing
+ scenarios.
+
+ tsa= [X86] Control mitigation for Transient Scheduler
+ Attacks on AMD CPUs. Search the following in your
+ favourite search engine for more details:
+
+ "Technical guidance for mitigating transient scheduler
+ attacks".
+
+ off - disable the mitigation
+ on - enable the mitigation (default)
+ user - mitigate only user/kernel transitions
+ vm - mitigate only guest/host transitions
+
+
tsc= Disable clocksource stability checks for TSC.
Format: <string>
[x86] reliable: mark tsc clocksource as reliable, this
@@ -4589,6 +7684,96 @@
[x86] unstable: mark the TSC clocksource as unstable, this
marks the TSC unconditionally unstable at bootup and
avoids any further wobbles once the TSC watchdog notices.
+ [x86] nowatchdog: disable clocksource watchdog. Used
+ in situations with strict latency requirements (where
+ interruptions from clocksource watchdog are not
+ acceptable).
+ [x86] recalibrate: force recalibration against a HW timer
+ (HPET or PM timer) on systems whose TSC frequency was
+ obtained from HW or FW using either an MSR or CPUID(0x15).
+ Warn if the difference is more than 500 ppm.
+ [x86] watchdog: Use TSC as the watchdog clocksource with
+ which to check other HW timers (HPET or PM timer), but
+ only on systems where TSC has been deemed trustworthy.
+ This will be suppressed by an earlier tsc=nowatchdog and
+ can be overridden by a later tsc=nowatchdog. A console
+ message will flag any such suppression or overriding.
+
+ tsc_early_khz= [X86,EARLY] Skip early TSC calibration and use the given
+ value instead. Useful when the early TSC frequency discovery
+ procedure is not reliable, such as on overclocked systems
+ with CPUID.16h support and partial CPUID.15h support.
+ Format: <unsigned int>
+
+ tsx= [X86] Control Transactional Synchronization
+ Extensions (TSX) feature in Intel processors that
+ support TSX control.
+
+ This parameter controls the TSX feature. The options are:
+
+ on - Enable TSX on the system. Although there are
+ mitigations for all known security vulnerabilities,
+ TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and
+ so there may be unknown security risks associated
+ with leaving it enabled.
+
+ off - Disable TSX on the system. (Note that this
+ option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have
+ MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+ the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable
+ deactivation of the TSX functionality.)
+
+ auto - Disable TSX if X86_BUG_TAA is present,
+ otherwise enable TSX on the system.
+
+ Not specifying this option is equivalent to tsx=off.
+
+ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+ for more details.
+
+ tsx_async_abort= [X86,INTEL,EARLY] Control mitigation for the TSX Async
+ Abort (TAA) vulnerability.
+
+ Similar to Micro-architectural Data Sampling (MDS)
+ certain CPUs that support Transactional
+ Synchronization Extensions (TSX) are vulnerable to an
+ exploit against CPU internal buffers which can forward
+ information to a disclosure gadget under certain
+ conditions.
+
+ In vulnerable processors, the speculatively forwarded
+ data can be used in a cache side channel attack, to
+ access data to which the attacker does not have direct
+ access.
+
+ This parameter controls the TAA mitigation. The
+ options are:
+
+ full - Enable TAA mitigation on vulnerable CPUs
+ if TSX is enabled.
+
+ full,nosmt - Enable TAA mitigation and disable SMT on
+ vulnerable CPUs. If TSX is disabled, SMT
+ is not disabled because CPU is not
+ vulnerable to cross-thread TAA attacks.
+ off - Unconditionally disable TAA mitigation
+
+ On MDS-affected machines, tsx_async_abort=off can be
+ prevented by an active MDS mitigation as both vulnerabilities
+ are mitigated with the same mechanism so in order to disable
+ this mitigation, you need to specify mds=off too.
+
+ Not specifying this option is equivalent to
+ tsx_async_abort=full. On CPUs which are MDS affected
+ and deploy MDS mitigation, TAA mitigation is not
+ required and doesn't provide any additional
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
@@ -4612,13 +7797,36 @@
Note that genuine overcurrent events won't be
reported either.
+ unaligned_scalar_speed=
+ [RISCV]
+ Format: {slow | fast | unsupported}
+ Allow skipping scalar unaligned access speed tests. This
+ is useful for testing alternative code paths and to skip
+ the tests in environments where they run too slowly. All
+ CPUs must have the same scalar unaligned access speed.
+
+ unaligned_vector_speed=
+ [RISCV]
+ Format: {slow | fast | unsupported}
+ Allow skipping vector unaligned access speed tests. This
+ is useful for testing alternative code paths and to skip
+ the tests in environments where they run too slowly. All
+ CPUs must have the same vector unaligned access speed.
+
unknown_nmi_panic
[X86] Cause panic on unknown NMI.
+ unwind_debug [X86-64,EARLY]
+ Enable unwinder debug output. This can be
+ useful for debugging certain unwinder error
+ conditions, including corrupt stacks and
+ bad/missing unwinder metadata.
+
usbcore.authorized_default=
[USB] Default USB device authorization:
- (default -1 = authorized except for wireless USB,
- 0 = not authorized, 1 = authorized)
+ (default -1 = authorized (same as 1),
+ 0 = not authorized, 1 = authorized, 2 = authorized
+ if device connected to internal port)
usbcore.autosuspend=
[USB] The autosuspend time delay (in seconds) used
@@ -4639,8 +7847,7 @@
usbcore.old_scheme_first=
[USB] Start with the old device initialization
- scheme, applies only to low and full-speed devices
- (default 0 = off).
+ scheme (default 0 = off).
usbcore.usbfs_memory_mb=
[USB] Memory limit (in MB) for buffers allocated by
@@ -4715,6 +7922,9 @@
pause after every control message);
o = USB_QUIRK_HUB_SLOW_RESET (Hub needs extra
delay after resetting its port);
+ p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT
+ (Reduce timeout of the SET_ADDRESS
+ request from 5000 ms to 500 ms);
Example: quirks=0781:5580:bk,0a5c:5834:gij
usbhid.mousepoll=
@@ -4729,6 +7939,9 @@
usb-storage.delay_use=
[UMS] The delay in seconds before a new device is
scanned for Logical Units (default 1).
+ Optionally the delay in milliseconds if the value has
+ suffix with "ms".
+ Example: delay_use=2567ms
usb-storage.quirks=
[UMS] A list of quirks entries to supplement or
@@ -4739,13 +7952,13 @@
Flags is a set of characters, each corresponding
to a common usb-storage quirk flag as follows:
a = SANE_SENSE (collect more than 18 bytes
- of sense data);
+ of sense data, not on uas);
b = BAD_SENSE (don't collect more than 18
- bytes of sense data);
+ bytes of sense data, not on uas);
c = FIX_CAPACITY (decrease the reported
device capacity by one sector);
d = NO_READ_DISC_INFO (don't use
- READ_DISC_INFO command);
+ READ_DISC_INFO command, not on uas);
e = NO_READ_CAPACITY_16 (don't use
READ_CAPACITY_16 command);
f = NO_REPORT_OPCODES (don't use report opcodes
@@ -4759,18 +7972,20 @@
device);
j = NO_REPORT_LUNS (don't use report luns
command, uas only);
+ k = NO_SAME (do not use WRITE_SAME, uas only)
l = NOT_LOCKABLE (don't try to lock and
- unlock ejectable media);
+ unlock ejectable media, not on uas);
m = MAX_SECTORS_64 (don't transfer more
- than 64 sectors = 32 KB at a time);
+ than 64 sectors = 32 KB at a time,
+ not on uas);
n = INITIAL_READ10 (force a retry of the
- initial READ(10) command);
+ initial READ(10) command, not on uas);
o = CAPACITY_OK (accept the capacity
- reported by the device);
+ reported by the device, not on uas);
p = WRITE_CACHE (the device cache is ON
- by default);
+ by default, not on uas);
r = IGNORE_RESIDUE (the device reports
- bogus residue values);
+ bogus residue values, not on uas);
s = SINGLE_LUN (the device has only one
Logical Unit);
t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
@@ -4779,7 +7994,8 @@
w = NO_WP_DETECT (don't test whether the
medium is write-protected).
y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE
- even if the device claims no cache)
+ even if the device claims no cache,
+ not on uas)
Example: quirks=0419:aaf5:rl,0421:0433:rc
user_debug= [KNL,ARM]
@@ -4792,14 +8008,7 @@
16 - SIGBUS faults
Example: user_debug=31
- userpte=
- [X86] Flags controlling user PTE allocations.
-
- nohigh = do not allocate PTE pages in
- HIGHMEM regardless of setting
- of CONFIG_HIGHPTE.
-
- vdso= [X86,SH]
+ vdso= [X86,SH,SPARC]
On X86_32, this is an alias for vdso32=. Otherwise:
vdso=1: enable VDSO (the default)
@@ -4819,17 +8028,15 @@
Try vdso32=0 if you encounter an error that says:
dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
- vector= [IA-64,SMP]
- vector=percpu: enable percpu vector domain
-
- video= [FB] Frame buffer configuration
- See Documentation/fb/modedb.txt.
+ video= [FB,EARLY] Frame buffer configuration
+ See Documentation/fb/modedb.rst.
- video.brightness_switch_enabled= [0,1]
+ video.brightness_switch_enabled= [ACPI]
+ Format: [0|1]
If set to 1, on receiving an ACPI notify event
generated by hotkey, video driver will adjust brightness
level and then send out the event to user space through
- the allocated input device; If set to 0, video driver
+ the allocated input device. If set to 0, video driver
will only send out the event without touching backlight
brightness level.
default: 1
@@ -4851,8 +8058,8 @@
Can be used multiple times for multiple devices.
vga= [BOOT,X86-32] Select a particular video mode
- See Documentation/x86/boot.txt and
- Documentation/svga.txt.
+ See Documentation/arch/x86/boot.rst and
+ Documentation/admin-guide/svga.rst.
Use vga=ask for menu.
This is actually a boot loader parameter; the value is
passed to the kernel using a special protocol.
@@ -4869,13 +8076,16 @@
P Enable page structure init time poisoning
- Disable all of the above options
- vmalloc=nn[KMG] [KNL,BOOT] Forces the vmalloc area to have an exact
- size of <nn>. This can be used to increase the
- minimum size (128MB on x86). It can also be used to
- decrease the size and leave more room for directly
- mapped kernel RAM.
+ vmalloc=nn[KMG] [KNL,BOOT,EARLY] Forces the vmalloc area to have an
+ exact size of <nn>. This can be used to increase
+ the minimum size (128MB on x86, arm32 platforms).
+ It can also be used to decrease the size and leave more room
+ for directly mapped kernel RAM. Note that this parameter does
+ not exist on many other platforms (including arm64, alpha,
+ loongarch, arc, csky, hexagon, microblaze, mips, nios2, openrisc,
+ parisc, m64k, powerpc, riscv, sh, um, xtensa, s390, sparc).
- vmcp_cma=nn[MG] [KNL,S390]
+ vmcp_cma=nn[MG] [KNL,S390,EARLY]
Sets the memory size reserved for contiguous memory
allocations for the vmcp device driver.
@@ -4888,7 +8098,17 @@
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
Format: <command>
- vsyscall= [X86-64]
+ vmscape= [X86] Controls mitigation for VMscape attacks.
+ VMscape attacks can leak information from a userspace
+ hypervisor to a guest via speculative side-channels.
+
+ off - disable the mitigation
+ ibpb - use Indirect Branch Prediction Barrier
+ (IBPB) mitigation (default)
+ force - force vulnerability detection even on
+ unaffected processors
+
+ vsyscall= [X86-64,EARLY]
Controls the behavior of vsyscalls (i.e. calls to
fixed addresses of 0xffffffffff600x00 from legacy
code). Most statically-linked binaries and older
@@ -4896,14 +8116,13 @@
functions are at fixed addresses, they make nice
targets for exploits that can control RIP.
- emulate [default] Vsyscalls turn into traps and are
- emulated reasonably safely.
+ emulate Vsyscalls turn into traps and are emulated
+ reasonably safely. The vsyscall page is
+ readable.
- native Vsyscalls are native syscall instructions.
- This is a little bit faster than trapping
- and makes a few dynamic recompilers work
- better than they would in emulation mode.
- It also makes exploits much easier to write.
+ xonly [default] Vsyscalls turn into traps and are
+ emulated reasonably safely. The vsyscall
+ page is not readable.
none Vsyscalls don't work at all. This makes
them quite hard to use for exploits but
@@ -4916,7 +8135,7 @@
vt.cur_default= [VT] Default cursor shape.
Format: 0xCCBBAA, where AA, BB, and CC are the same as
the parameters of the <Esc>[?A;B;Cc escape sequence;
- see VGA-softcursor.txt. Default: 2 = underline.
+ see vga-softcursor.rst. Default: 2 = underline.
vt.default_blu= [VT]
Format: <blue0>,<blue1>,<blue2>,...,<blue15>
@@ -4959,10 +8178,25 @@
Default: 3 = cyan.
watchdog timers [HW,WDT] For information on watchdog timers,
- see Documentation/watchdog/watchdog-parameters.txt
+ see Documentation/watchdog/watchdog-parameters.rst
or other driver-specific files in the
Documentation/watchdog/ directory.
+ watchdog_thresh=
+ [KNL]
+ Set the hard lockup detector stall duration
+ threshold in seconds. The soft lockup detector
+ threshold is set to twice the value. A value of 0
+ disables both lockup detectors. Default is 10
+ seconds.
+
+ workqueue.unbound_cpus=
+ [KNL,SMP] Specify to constrain one or some CPUs
+ to use in unbound workqueues.
+ Format: <cpu-list>
+ By default, all online CPUs are available for
+ unbound workqueues.
+
workqueue.watchdog_thresh=
If CONFIG_WQ_WATCHDOG is configured, workqueue can
warn stall conditions and dump internal state to
@@ -4972,14 +8206,33 @@
it can be updated at runtime by writing to the
corresponding sysfs file.
- workqueue.disable_numa
- By default, all work items queued to unbound
- workqueues are affine to the NUMA nodes they're
- issued on, which results in better behavior in
- general. If NUMA affinity needs to be disabled for
- whatever reason, this option can be used. Note
- that this also can be controlled per-workqueue for
- workqueues visible under /sys/bus/workqueue/.
+ workqueue.panic_on_stall=<uint>
+ Panic when workqueue stall is detected by
+ CONFIG_WQ_WATCHDOG. It sets the number times of the
+ stall to trigger panic.
+
+ The default is 0, which disables the panic on stall.
+
+ workqueue.cpu_intensive_thresh_us=
+ Per-cpu work items which run for longer than this
+ threshold are automatically considered CPU intensive
+ and excluded from concurrency management to prevent
+ them from noticeably delaying other per-cpu work
+ items. Default is 10000 (10ms).
+
+ If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ will report the work functions which violate this
+ threshold repeatedly. They are likely good
+ candidates for using WQ_UNBOUND workqueues instead.
+
+ workqueue.cpu_intensive_warning_thresh=<uint>
+ If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ will report the work functions which violate the
+ intensive_threshold_us repeatedly. In order to prevent
+ spurious warnings, start printing only after a work
+ function has violated this threshold number of times.
+
+ The default is 4 times. 0 disables the warning.
workqueue.power_efficient
Per-cpu workqueues are generally preferred because
@@ -4996,6 +8249,18 @@
The default value of this parameter is determined by
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+ workqueue.default_affinity_scope=
+ Select the default affinity scope to use for unbound
+ workqueues. Can be one of "cpu", "smt", "cache",
+ "numa" and "system". Default is "cache". For more
+ information, see the Affinity Scopes section in
+ Documentation/core-api/workqueue.rst.
+
+ This can be changed after boot by writing to the
+ matching /sys/module/workqueue/parameters file. All
+ workqueues with the "default" affinity scope will be
+ updated accordingly.
+
workqueue.debug_force_rr_cpu
Workqueue used to implicitly guarantee that work
items queued without explicit CPU specified are put
@@ -5007,16 +8272,16 @@
When enabled, memory and cache locality will be
impacted.
- x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
+ writecombine= [LOONGARCH,EARLY] Control the MAT (Memory Access
+ Type) of ioremap_wc().
+
+ on - Enable writecombine, use WUC for ioremap_wc()
+ off - Disable writecombine, use SUC for ioremap_wc()
+
+ x2apic_phys [X86-64,APIC,EARLY] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
- x86_intel_mid_timer= [X86-32,APBT]
- Choose timer option for x86 Intel MID platform.
- Two valid options are apbt timer only and lapic timer
- plus one apbt timer for broadcast timer.
- x86_intel_mid_timer=apbt_only | lapic_and_apbt
-
xen_512gb_limit [KNL,X86-64,XEN]
Restricts the kernel running paravirtualized under Xen
to use only up to 512 GB of RAM. The reason to do so is
@@ -5024,7 +8289,7 @@
save/restore/migration must be enabled to handle larger
domains.
- xen_emul_unplug= [HW,X86,XEN]
+ xen_emul_unplug= [HW,X86,XEN,EARLY]
Unplug Xen emulated devices
Format: [unplug0,][unplug1]
ide-disks -- unplug primary master IDE devices
@@ -5036,13 +8301,31 @@
the unplug protocol
never -- do not unplug even if version check succeeds
- xen_nopvspin [X86,XEN]
- Disables the ticketlock slowpath using Xen PV
- optimizations.
+ xen_legacy_crash [X86,XEN,EARLY]
+ Crash from Xen panic notifier, without executing late
+ panic() code such as dumping handler.
+
+ xen_mc_debug [X86,XEN,EARLY]
+ Enable multicall debugging when running as a Xen PV guest.
+ Enabling this feature will reduce performance a little
+ bit, so it should only be enabled for obtaining extended
+ debug data in case of multicall errors.
+
+ xen_msr_safe= [X86,XEN,EARLY]
+ Format: <bool>
+ Select whether to always use non-faulting (safe) MSR
+ access functions when running as Xen PV guest. The
+ default value is controlled by CONFIG_XEN_PV_MSR_SAFE.
xen_nopv [X86]
Disables the PV optimizations forcing the HVM guest to
run as generic HVM guest with no PV drivers.
+ This option is obsoleted by the "nopv" option, which
+ has equivalent effect for XEN platform.
+
+ xen_no_vector_callback
+ [KNL,X86,XEN,EARLY] Disable the vector callback for Xen
+ event channel interrupts.
xen_scrub_pages= [XEN]
Boolean option to control scrubbing pages before giving them back
@@ -5050,11 +8333,74 @@
with /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.
+ xen_timer_slop= [X86-64,XEN,EARLY]
+ Set the timer slop (in nanoseconds) for the virtual Xen
+ timers (default is 100000). This adjusts the minimum
+ delta of virtualized Xen timers, where lower values
+ improve timer resolution at the expense of processing
+ more timer interrupts.
+
+ xen.balloon_boot_timeout= [XEN]
+ The time (in seconds) to wait before giving up to boot
+ in case initial ballooning fails to free enough memory.
+ Applies only when running as HVM or PVH guest and
+ started with less memory configured than allowed at
+ max. Default is 180.
+
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+
+ xen.event_loop_timeout= [XEN]
+ After which time (jiffies) the event handling loop
+ should start to delay EOI handling. Default is 2.
+
+ xen.fifo_events= [XEN]
+ Boolean parameter to disable using fifo event handling
+ even if available. Normally fifo event handling is
+ preferred over the 2-level event handling, as it is
+ fairer and the number of possible event channels is
+ much higher. Default is on (use fifo events).
+
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
+ xive= [PPC]
+ By default on POWER9 and above, the kernel will
+ natively use the XIVE interrupt controller. This option
+ allows the fallback firmware mode to be used:
+
+ off Fallback to firmware control of XIVE interrupt
+ controller on both pseries and powernv
+ platforms. Only useful on POWER9 and above.
+
+ xive.store-eoi=off [PPC]
+ By default on POWER10 and above, the kernel will use
+ stores for EOI handling when the XIVE interrupt mode
+ is active. This option allows the XIVE driver to use
+ loads instead, as on POWER9.
+
xhci-hcd.quirks [USB,KNL]
A hex value specifying bitmask with supplemental xhci
host controller quirks. Meaning of each bit can be
consulted in header drivers/usb/host/xhci.h.
+
+ xmon [PPC,EARLY]
+ Format: { early | on | rw | ro | off }
+ Controls if xmon debugger is enabled. Default is off.
+ Passing only "xmon" is equivalent to "xmon=early".
+ early Call xmon as early as possible on boot; xmon
+ debugger is called from setup_arch().
+ on xmon debugger hooks will be installed so xmon
+ is only called on a kernel crash. Default mode,
+ i.e. either "ro" or "rw" mode, is controlled
+ with CONFIG_XMON_DEFAULT_RO_MODE.
+ rw xmon debugger hooks will be installed so xmon
+ is called only on a kernel crash, mode is write,
+ meaning SPR registers, memory and, other data
+ can be written using xmon commands.
+ ro same as "rw" option above but SPR registers,
+ memory, and other data can't be written using
+ xmon commands.
+ off xmon is disabled.
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
new file mode 100644
index 000000000000..ee9a6c94f383
--- /dev/null
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -0,0 +1,325 @@
+==========================================
+Reducing OS jitter due to per-cpu kthreads
+==========================================
+
+This document lists per-CPU kthreads in the Linux kernel and presents
+options to control their OS jitter. Note that non-per-CPU kthreads are
+not listed here. To reduce OS jitter from non-per-CPU kthreads, bind
+them to a "housekeeping" CPU dedicated to such work.
+
+References
+==========
+
+- Documentation/core-api/irq/irq-affinity.rst: Binding interrupts to sets of CPUs.
+
+- Documentation/admin-guide/cgroup-v1: Using cgroups to bind tasks to sets of CPUs.
+
+- man taskset: Using the taskset command to bind tasks to sets
+ of CPUs.
+
+- man sched_setaffinity: Using the sched_setaffinity() system
+ call to bind tasks to sets of CPUs.
+
+- /sys/devices/system/cpu/cpuN/online: Control CPU N's hotplug state,
+ writing "0" to offline and "1" to online.
+
+- In order to locate kernel-generated OS jitter on CPU N:
+
+ cd /sys/kernel/tracing
+ echo 1 > max_graph_depth # Increase the "1" for more detail
+ echo function_graph > current_tracer
+ # run workload
+ cat per_cpu/cpuN/trace
+
+kthreads
+========
+
+Name:
+ ehca_comp/%u
+
+Purpose:
+ Periodically process Infiniband-related work.
+
+To reduce its OS jitter, do any of the following:
+
+1. Don't use eHCA Infiniband hardware, instead choosing hardware
+ that does not require per-CPU kthreads. This will prevent these
+ kthreads from being created in the first place. (This will
+ work for most people, as this hardware, though important, is
+ relatively old and is produced in relatively low unit volumes.)
+2. Do all eHCA-Infiniband-related work on other CPUs, including
+ interrupts.
+3. Rework the eHCA driver so that its per-CPU kthreads are
+ provisioned only on selected CPUs.
+
+
+Name:
+ irq/%d-%s
+
+Purpose:
+ Handle threaded interrupts.
+
+To reduce its OS jitter, do the following:
+
+1. Use irq affinity to force the irq threads to execute on
+ some other CPU.
+
+Name:
+ kcmtpd_ctr_%d
+
+Purpose:
+ Handle Bluetooth work.
+
+To reduce its OS jitter, do one of the following:
+
+1. Don't use Bluetooth, in which case these kthreads won't be
+ created in the first place.
+2. Use irq affinity to force Bluetooth-related interrupts to
+ occur on some other CPU and furthermore initiate all
+ Bluetooth activity on some other CPU.
+
+Name:
+ ksoftirqd/%u
+
+Purpose:
+ Execute softirq handlers when threaded or when under heavy load.
+
+To reduce its OS jitter, each softirq vector must be handled
+separately as follows:
+
+TIMER_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle, for example, by avoiding system calls and by forcing
+ both kernel threads and interrupts to execute elsewhere.
+2. Build with CONFIG_HOTPLUG_CPU=y. After boot completes, force
+ the CPU offline, then bring it back online. This forces
+ recurring timers to migrate elsewhere. If you are concerned
+ with multiple CPUs, force them all offline before bringing the
+ first one back online. Once you have onlined the CPUs in question,
+ do not offline any other CPUs, because doing so could force the
+ timer back onto one of the CPUs in question.
+
+NET_TX_SOFTIRQ and NET_RX_SOFTIRQ
+---------------------------------
+
+Do all of the following:
+
+1. Force networking interrupts onto other CPUs.
+2. Initiate any network I/O on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+
+BLOCK_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1. Force block-device interrupts onto some other CPU.
+2. Initiate any block I/O on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+
+IRQ_POLL_SOFTIRQ
+----------------
+
+Do all of the following:
+
+1. Force block-device interrupts onto some other CPU.
+2. Initiate any block I/O and block-I/O polling on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+
+TASKLET_SOFTIRQ
+---------------
+
+Do one or more of the following:
+
+1. Avoid use of drivers that use tasklets. (Such drivers will contain
+ calls to things like tasklet_schedule().)
+2. Convert all drivers that you must use from tasklets to workqueues.
+3. Force interrupts for drivers using tasklets onto other CPUs,
+ and also do I/O involving these drivers on other CPUs.
+
+SCHED_SOFTIRQ
+-------------
+
+Do all of the following:
+
+1. Avoid sending scheduler IPIs to the CPU to be de-jittered,
+ for example, ensure that at most one runnable kthread is present
+ on that CPU. If a thread that expects to run on the de-jittered
+ CPU awakens, the scheduler will send an IPI that can result in
+ a subsequent SCHED_SOFTIRQ.
+2. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
+ is marked as an adaptive-ticks CPU using the "nohz_full="
+ boot parameter. This reduces the number of scheduler-clock
+ interrupts that the de-jittered CPU receives, minimizing its
+ chances of being selected to do the load balancing work that
+ runs in SCHED_SOFTIRQ context.
+3. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle, for example, by avoiding system calls and by
+ forcing both kernel threads and interrupts to execute elsewhere.
+ This further reduces the number of scheduler-clock interrupts
+ received by the de-jittered CPU.
+
+HRTIMER_SOFTIRQ
+---------------
+
+Do all of the following:
+
+1. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle. For example, avoid system calls and force both
+ kernel threads and interrupts to execute elsewhere.
+2. Build with CONFIG_HOTPLUG_CPU=y. Once boot completes, force the
+ CPU offline, then bring it back online. This forces recurring
+ timers to migrate elsewhere. If you are concerned with multiple
+ CPUs, force them all offline before bringing the first one
+ back online. Once you have onlined the CPUs in question, do not
+ offline any other CPUs, because doing so could force the timer
+ back onto one of the CPUs in question.
+
+RCU_SOFTIRQ
+-----------
+
+Do at least one of the following:
+
+1. Offload callbacks and keep the CPU in either dyntick-idle or
+ adaptive-ticks state by doing all of the following:
+
+ a. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
+ de-jittered is marked as an adaptive-ticks CPU using the
+ "nohz_full=" boot parameter. Bind the rcuo kthreads to
+ housekeeping CPUs, which can tolerate OS jitter.
+ b. To the extent possible, keep the CPU out of the kernel
+ when it is non-idle, for example, by avoiding system
+ calls and by forcing both kernel threads and interrupts
+ to execute elsewhere.
+
+2. Enable RCU to do its processing remotely via dyntick-idle by
+ doing all of the following:
+
+ a. Build with CONFIG_NO_HZ=y.
+ b. Ensure that the CPU goes idle frequently, allowing other
+ CPUs to detect that it has passed through an RCU quiescent
+ state. If the kernel is built with CONFIG_NO_HZ_FULL=y,
+ userspace execution also allows other CPUs to detect that
+ the CPU in question has passed through a quiescent state.
+ c. To the extent possible, keep the CPU out of the kernel
+ when it is non-idle, for example, by avoiding system
+ calls and by forcing both kernel threads and interrupts
+ to execute elsewhere.
+
+Name:
+ kworker/%u:%d%s (cpu, id, priority)
+
+Purpose:
+ Execute workqueue requests
+
+To reduce its OS jitter, do any of the following:
+
+1. Run your workload at a real-time priority, which will allow
+ preempting the kworker daemons.
+2. A given workqueue can be made visible in the sysfs filesystem
+ by passing the WQ_SYSFS to that workqueue's alloc_workqueue().
+ Such a workqueue can be confined to a given subset of the
+ CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
+ files. The set of WQ_SYSFS workqueues can be displayed using
+ "ls /sys/devices/virtual/workqueue". That said, the workqueues
+ maintainer would like to caution people against indiscriminately
+ sprinkling WQ_SYSFS across all the workqueues. The reason for
+ caution is that it is easy to add WQ_SYSFS, but because sysfs is
+ part of the formal user/kernel API, it can be nearly impossible
+ to remove it, even if its addition was a mistake.
+3. Do any of the following needed to avoid jitter that your
+ application cannot tolerate:
+
+ a. Avoid using oprofile, thus avoiding OS jitter from
+ wq_sync_buffer().
+ b. Limit your CPU frequency so that a CPU-frequency
+ governor is not required, possibly enlisting the aid of
+ special heatsinks or other cooling technologies. If done
+ correctly, and if you CPU architecture permits, you should
+ be able to build your kernel with CONFIG_CPU_FREQ=n to
+ avoid the CPU-frequency governor periodically running
+ on each CPU, including cs_dbs_timer() and od_dbs_timer().
+
+ WARNING: Please check your CPU specifications to
+ make sure that this is safe on your particular system.
+ c. As of v3.18, Christoph Lameter's on-demand vmstat workers
+ commit prevents OS jitter due to vmstat_update() on
+ CONFIG_SMP=y systems. Before v3.18, is not possible
+ to entirely get rid of the OS jitter, but you can
+ decrease its frequency by writing a large value to
+ /proc/sys/vm/stat_interval. The default value is HZ,
+ for an interval of one second. Of course, larger values
+ will make your virtual-memory statistics update more
+ slowly. Of course, you can also run your workload at
+ a real-time priority, thus preempting vmstat_update(),
+ but if your workload is CPU-bound, this is a bad idea.
+ However, there is an RFC patch from Christoph Lameter
+ (based on an earlier one from Gilad Ben-Yossef) that
+ reduces or even eliminates vmstat overhead for some
+ workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com.
+ d. If running on high-end powerpc servers, build with
+ CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
+ daemon from running on each CPU every second or so.
+ (This will require editing Kconfig files and will defeat
+ this platform's RAS functionality.) This avoids jitter
+ due to the rtas_event_scan() function.
+ WARNING: Please check your CPU specifications to
+ make sure that this is safe on your particular system.
+ e. If running on PowerMAC, build your kernel with
+ CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
+ avoiding OS jitter from rackmeter_do_timer().
+
+Name:
+ rcuc/%u
+
+Purpose:
+ Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
+
+To reduce its OS jitter, do at least one of the following:
+
+1. Build the kernel with CONFIG_PREEMPT=n. This prevents these
+ kthreads from being created in the first place, and also obviates
+ the need for RCU priority boosting. This approach is feasible
+ for workloads that do not require high degrees of responsiveness.
+2. Build the kernel with CONFIG_RCU_BOOST=n. This prevents these
+ kthreads from being created in the first place. This approach
+ is feasible only if your workload never requires RCU priority
+ boosting, for example, if you ensure frequent idle time on all
+ CPUs that might execute within the kernel.
+3. Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
+ boot parameter offloading RCU callbacks from all CPUs susceptible
+ to OS jitter. This approach prevents the rcuc/%u kthreads from
+ having any work to do, so that they are never awakened.
+4. Ensure that the CPU never enters the kernel, and, in particular,
+ avoid initiating any CPU hotplug operations on this CPU. This is
+ another way of preventing any callbacks from being queued on the
+ CPU, again preventing the rcuc/%u kthreads from having any work
+ to do.
+
+Name:
+ rcuop/%d, rcuos/%d, and rcuog/%d
+
+Purpose:
+ Offload RCU callbacks from the corresponding CPU.
+
+To reduce its OS jitter, do at least one of the following:
+
+1. Use affinity, cgroups, or other mechanism to force these kthreads
+ to execute on some other CPU.
+2. Build with CONFIG_RCU_NOCB_CPU=n, which will prevent these
+ kthreads from being created in the first place. However, please
+ note that this will not eliminate OS jitter, but will instead
+ shift it to RCU_SOFTIRQ.
diff --git a/Documentation/admin-guide/laptops/alienware-wmi.rst b/Documentation/admin-guide/laptops/alienware-wmi.rst
new file mode 100644
index 000000000000..27a32a8057da
--- /dev/null
+++ b/Documentation/admin-guide/laptops/alienware-wmi.rst
@@ -0,0 +1,127 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+====================
+Alienware WMI Driver
+====================
+
+Kurt Borja <kuurtb@gmail.com>
+
+This is a driver for the "WMAX" WMI device, which is found in most Dell gaming
+laptops and controls various special features.
+
+Before the launch of M-Series laptops (~2018), the "WMAX" device controlled
+basic RGB lighting, deep sleep mode, HDMI mode and amplifier status.
+
+Later, this device was completely repurpused. Now it mostly deals with thermal
+profiles, sensor monitoring and overclocking. This interface is named "AWCC" and
+is known to be used by the AWCC OEM application to control these features.
+
+The alienware-wmi driver controls both interfaces.
+
+AWCC Interface
+==============
+
+WMI device documentation: Documentation/wmi/devices/alienware-wmi.rst
+
+Supported devices
+-----------------
+
+- Alienware M-Series laptops
+- Alienware X-Series laptops
+- Alienware Aurora Desktops
+- Dell G-Series laptops
+
+If you believe your device supports the AWCC interface and you don't have any of
+the features described in this document, try the following alienware-wmi module
+parameters:
+
+- ``force_platform_profile=1``: Forces probing for platform profile support
+- ``force_hwmon=1``: Forces probing for HWMON support
+
+If the module loads successfully with these parameters, consider submitting a
+patch adding your model to the ``awcc_dmi_table`` located in
+``drivers/platform/x86/dell/alienware-wmi-wmax.c`` or contacting the maintainer
+for further guidance.
+
+Status
+------
+
+The following features are currently supported:
+
+- :ref:`Platform Profile <platform-profile>`:
+
+ - Thermal profile control
+
+ - G-Mode toggling
+
+- :ref:`HWMON <hwmon>`:
+
+ - Sensor monitoring
+
+ - Manual fan control
+
+.. _platform-profile:
+
+Platform Profile
+----------------
+
+The AWCC interface exposes various firmware defined thermal profiles. These are
+exposed to user-space through the Platform Profile class interface. Refer to
+:ref:`sysfs-class-platform-profile <abi_file_testing_sysfs_class_platform_profile>`
+for more information.
+
+The name of the platform-profile class device exported by this driver is
+"alienware-wmi" and it's path can be found with:
+
+::
+
+ grep -l "alienware-wmi" /sys/class/platform-profile/platform-profile-*/name | sed 's|/[^/]*$||'
+
+If the device supports G-Mode, it is also toggled when selecting the
+``performance`` profile.
+
+.. note::
+ You may set the ``force_gmode`` module parameter to always try to toggle this
+ feature, without checking if your model supports it.
+
+.. _hwmon:
+
+HWMON
+-----
+
+The AWCC interface also supports sensor monitoring and manual fan control. Both
+of these features are exposed to user-space through the HWMON interface.
+
+The name of the hwmon class device exported by this driver is "alienware_wmi"
+and it's path can be found with:
+
+::
+
+ grep -l "alienware_wmi" /sys/class/hwmon/hwmon*/name | sed 's|/[^/]*$||'
+
+Sensor monitoring is done through the standard HWMON interface. Refer to
+:ref:`sysfs-class-hwmon <abi_file_testing_sysfs_class_hwmon>` for more
+information.
+
+Manual fan control on the other hand, is not exposed directly by the AWCC
+interface. Instead it let's us control a fan `boost` value. This `boost` value
+has the following aproximate behavior over the fan pwm:
+
+::
+
+ pwm = pwm_base + (fan_boost / 255) * (pwm_max - pwm_base)
+
+Due to the above behavior, the fan `boost` control is exposed to user-space
+through the following, custom hwmon sysfs attribute:
+
+=============================== ======= =======================================
+Name Perm Description
+=============================== ======= =======================================
+fan[1-4]_boost RW Fan boost value.
+
+ Integer value between 0 and 255
+=============================== ======= =======================================
+
+.. note::
+ In some devices, manual fan control only works reliably if the ``custom``
+ platform profile is selected.
diff --git a/Documentation/admin-guide/laptops/asus-laptop.rst b/Documentation/admin-guide/laptops/asus-laptop.rst
new file mode 100644
index 000000000000..95176321a25a
--- /dev/null
+++ b/Documentation/admin-guide/laptops/asus-laptop.rst
@@ -0,0 +1,271 @@
+==================
+Asus Laptop Extras
+==================
+
+Version 0.1
+
+August 6, 2009
+
+Corentin Chary <corentincj@iksaif.net>
+http://acpi4asus.sf.net/
+
+ This driver provides support for extra features of ACPI-compatible ASUS laptops.
+ It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
+ VICTOR XP7210 for example). It makes all the extra buttons generate input
+ events (like keyboards).
+
+ On some models adds support for changing the display brightness and output,
+ switching the LCD backlight on and off, and most importantly, allows you to
+ blink those fancy LEDs intended for reporting mail and wireless status.
+
+This driver supersedes the old asus_acpi driver.
+
+Requirements
+------------
+
+ Kernel 2.6.X sources, configured for your computer, with ACPI support.
+ You also need CONFIG_INPUT and CONFIG_ACPI.
+
+Status
+------
+
+ The features currently supported are the following (see below for
+ detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - Wlan enable and disable
+ - GPS enable and disable
+ - Video output switching
+ - Ambient Light Sensor on and off
+ - LED control
+ - LED Display control
+ - LCD brightness control
+ - LCD on and off
+
+ A compatibility table by model and feature is maintained on the web
+ site, http://acpi4asus.sf.net/.
+
+Usage
+-----
+
+ Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
+ see some lines like this :
+
+ Asus Laptop Extras version 0.42
+ - L2D model detected.
+
+ If it is not the output you have on your laptop, send it (and the laptop's
+ DSDT) to me.
+
+ That's all, now, all the events generated by the hotkeys of your laptop
+ should be reported via netlink events. You can check with
+ "acpi_genl monitor" (part of the acpica project).
+
+ Hotkeys are also reported as input keys (like keyboards) you can check
+ which key are supported using "xev" under X11.
+
+ You can get information on the version of your DSDT table by reading the
+ /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
+ bug report to do, please include the output of this entry.
+
+LEDs
+----
+
+ You can modify LEDs be echoing values to `/sys/class/leds/asus/*/brightness`::
+
+ echo 1 > /sys/class/leds/asus::mail/brightness
+
+ will switch the mail LED on.
+
+ You can also know if they are on/off by reading their content and use
+ kernel triggers like disk-activity or heartbeat.
+
+Backlight
+---------
+
+ You can control lcd backlight power and brightness with
+ /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
+
+Wireless devices
+----------------
+
+ You can turn the internal Bluetooth adapter on/off with the bluetooth entry
+ (only on models with Bluetooth). This usually controls the associated LED.
+ Same for Wlan adapter.
+
+Display switching
+-----------------
+
+ Note: the display switching code is currently considered EXPERIMENTAL.
+
+ Switching works for the following models:
+
+ - L3800C
+ - A2500H
+ - L5800C
+ - M5200N
+ - W1000N (albeit with some glitches)
+ - M6700R
+ - A6JC
+ - F3J
+
+ Switching doesn't work for the following:
+
+ - M3700N
+ - L2X00D (locks the laptop under certain conditions)
+
+ To switch the displays, echo values from 0 to 15 to
+ /sys/devices/platform/asus-laptop/display. The significance of those values
+ is as follows:
+
+ +-------+-----+-----+-----+-----+-----+
+ | Bin | Val | DVI | TV | CRT | LCD |
+ +-------+-----+-----+-----+-----+-----+
+ | 0000 | 0 | | | | |
+ +-------+-----+-----+-----+-----+-----+
+ | 0001 | 1 | | | | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 0010 | 2 | | | X | |
+ +-------+-----+-----+-----+-----+-----+
+ | 0011 | 3 | | | X | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 0100 | 4 | | X | | |
+ +-------+-----+-----+-----+-----+-----+
+ | 0101 | 5 | | X | | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 0110 | 6 | | X | X | |
+ +-------+-----+-----+-----+-----+-----+
+ | 0111 | 7 | | X | X | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 1000 | 8 | X | | | |
+ +-------+-----+-----+-----+-----+-----+
+ | 1001 | 9 | X | | | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 1010 | 10 | X | | X | |
+ +-------+-----+-----+-----+-----+-----+
+ | 1011 | 11 | X | | X | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 1100 | 12 | X | X | | |
+ +-------+-----+-----+-----+-----+-----+
+ | 1101 | 13 | X | X | | X |
+ +-------+-----+-----+-----+-----+-----+
+ | 1110 | 14 | X | X | X | |
+ +-------+-----+-----+-----+-----+-----+
+ | 1111 | 15 | X | X | X | X |
+ +-------+-----+-----+-----+-----+-----+
+
+ In most cases, the appropriate displays must be plugged in for the above
+ combinations to work. TV-Out may need to be initialized at boot time.
+
+ Debugging:
+
+ 1) Check whether the Fn+F8 key:
+
+ a) does not lock the laptop (try a boot with noapic / nolapic if it does)
+ b) generates events (0x6n, where n is the value corresponding to the
+ configuration above)
+ c) actually works
+
+ Record the disp value at every configuration.
+ 2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
+ Record its value, note any change. If nothing changes, try a broader range,
+ up to 65535.
+ 3) Send ANY output (both positive and negative reports are needed, unless your
+ machine is already listed above) to the acpi4asus-user mailing list.
+
+ Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
+ events are generated and no actual switching occurs. In such a case, a line
+ like::
+
+ echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
+
+ will usually do the trick ($arg is the 0000006n-like event passed to acpid).
+
+ Note: there is currently no reliable way to read display status on xxN
+ (Centrino) models.
+
+LED display
+-----------
+
+ Some models like the W1N have a LED display that can be used to display
+ several items of information.
+
+ LED display works for the following models:
+
+ - W1000N
+ - W1J
+
+ To control the LED display, use the following::
+
+ echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+
+ where T control the 3 letters display, and DDD the 3 digits display,
+ according to the tables below::
+
+ DDD (digits)
+ 000 to 999 = display digits
+ AAA = ---
+ BBB to FFF = turn-off
+
+ T (type)
+ 0 = off
+ 1 = dvd
+ 2 = vcd
+ 3 = mp3
+ 4 = cd
+ 5 = tv
+ 6 = cpu
+ 7 = vol
+
+ For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
+ would display "DVD001".
+
+Driver options
+--------------
+
+ Options can be passed to the asus-laptop driver using the standard
+ module argument syntax (<param>=<value> when passing the option to the
+ module or asus-laptop.<param>=<value> on the kernel boot line when
+ asus-laptop is statically linked into the kernel).
+
+ wapf: WAPF defines the behavior of the Fn+Fx wlan key
+ The significance of values is yet to be found, but
+ most of the time:
+
+ - 0x0 should do nothing
+ - 0x1 should allow to control the device with Fn+Fx key.
+ - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
+ - 0x5 like 0x1 or 0x4
+
+ The default value is 0x1.
+
+Unsupported models
+------------------
+
+ These models will never be supported by this module, as they use a completely
+ different mechanism to handle LEDs and extra stuff (meaning we have no clue
+ how it works):
+
+ - ASUS A1300 (A1B), A1370D
+ - ASUS L7300G
+ - ASUS L8400
+
+Patches, Errors, Questions
+--------------------------
+
+ I appreciate any success or failure
+ reports, especially if they add to or correct the compatibility table.
+ Please include the following information in your report:
+
+ - Asus model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of /sys/devices/platform/asus-laptop/infos
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+ Any other comments or patches are also more than welcome.
+
+ acpi4asus-user@lists.sourceforge.net
+
+ http://sourceforge.net/projects/acpi4asus
diff --git a/Documentation/admin-guide/laptops/disk-shock-protection.rst b/Documentation/admin-guide/laptops/disk-shock-protection.rst
new file mode 100644
index 000000000000..22c7ec3e84cf
--- /dev/null
+++ b/Documentation/admin-guide/laptops/disk-shock-protection.rst
@@ -0,0 +1,151 @@
+==========================
+Hard disk shock protection
+==========================
+
+Author: Elias Oltmanns <eo@nebensachen.de>
+
+Last modified: 2008-10-03
+
+
+.. 0. Contents
+
+ 1. Intro
+ 2. The interface
+ 3. References
+ 4. CREDITS
+
+
+1. Intro
+--------
+
+ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
+Issuing this command should cause the drive to switch to idle mode and
+unload disk heads. This feature is being used in modern laptops in
+conjunction with accelerometers and appropriate software to implement
+a shock protection facility. The idea is to stop all I/O operations on
+the internal hard drive and park its heads on the ramp when critical
+situations are anticipated. The desire to have such a feature
+available on GNU/Linux systems has been the original motivation to
+implement a generic disk head parking interface in the Linux kernel.
+Please note, however, that other components have to be set up on your
+system in order to get disk shock protection working (see
+section 3. References below for pointers to more information about
+that).
+
+
+2. The interface
+----------------
+
+For each ATA device, the kernel exports the file
+`block/*/device/unload_heads` in sysfs (here assumed to be mounted under
+/sys). Access to `/sys/block/*/device/unload_heads` is denied with
+-EOPNOTSUPP if the device does not support the unload feature.
+Otherwise, writing an integer value to this file will take the heads
+of the respective drive off the platter and block all I/O operations
+for the specified number of milliseconds. When the timeout expires and
+no further disk head park request has been issued in the meantime,
+normal operation will be resumed. The maximal value accepted for a
+timeout is 30000 milliseconds. Exceeding this limit will return
+-EOVERFLOW, but heads will be parked anyway and the timeout will be
+set to 30 seconds. However, you can always change a timeout to any
+value between 0 and 30000 by issuing a subsequent head park request
+before the timeout of the previous one has expired. In particular, the
+total timeout can exceed 30 seconds and, more importantly, you can
+cancel a previously set timeout and resume normal operation
+immediately by specifying a timeout of 0. Values below -2 are rejected
+with -EINVAL (see below for the special meaning of -1 and -2). If the
+timeout specified for a recent head park request has not yet expired,
+reading from `/sys/block/*/device/unload_heads` will report the number
+of milliseconds remaining until normal operation will be resumed;
+otherwise, reading the unload_heads attribute will return 0.
+
+For example, do the following in order to park the heads of drive
+/dev/sda and stop all I/O operations for five seconds::
+
+ # echo 5000 > /sys/block/sda/device/unload_heads
+
+A simple::
+
+ # cat /sys/block/sda/device/unload_heads
+
+will show you how many milliseconds are left before normal operation
+will be resumed.
+
+A word of caution: The fact that the interface operates on a basis of
+milliseconds may raise expectations that cannot be satisfied in
+reality. In fact, the ATA specs clearly state that the time for an
+unload operation to complete is vendor specific. The hint in ATA-7
+that this will typically be within 500 milliseconds apparently has
+been dropped in ATA-8.
+
+There is a technical detail of this implementation that may cause some
+confusion and should be discussed here. When a head park request has
+been issued to a device successfully, all I/O operations on the
+controller port this device is attached to will be deferred. That is
+to say, any other device that may be connected to the same port will
+be affected too. The only exception is that a subsequent head unload
+request to that other device will be executed immediately. Further
+operations on that port will be deferred until the timeout specified
+for either device on the port has expired. As far as PATA (old style
+IDE) configurations are concerned, there can only be two devices
+attached to any single port. In SATA world we have port multipliers
+which means that a user-issued head parking request to one device may
+actually result in stopping I/O to a whole bunch of devices. However,
+since this feature is supposed to be used on laptops and does not seem
+to be very useful in any other environment, there will be mostly one
+device per port. Even if the CD/DVD writer happens to be connected to
+the same port as the hard drive, it generally *should* recover just
+fine from the occasional buffer under-run incurred by a head park
+request to the HD. Actually, when you are using an ide driver rather
+than its libata counterpart (i.e. your disk is called /dev/hda
+instead of /dev/sda), then parking the heads of one drive (drive X)
+will generally not affect the mode of operation of another drive
+(drive Y) on the same port as described above. It is only when a port
+reset is required to recover from an exception on drive Y that further
+I/O operations on that drive (and the reset itself) will be delayed
+until drive X is no longer in the parked state.
+
+Finally, there are some hard drives that only comply with an earlier
+version of the ATA standard than ATA-7, but do support the unload
+feature nonetheless. Unfortunately, there is no safe way Linux can
+detect these devices, so you won't be able to write to the
+unload_heads attribute. If you know that your device really does
+support the unload feature (for instance, because the vendor of your
+laptop or the hard drive itself told you so), then you can tell the
+kernel to enable the usage of this feature for that drive by writing
+the special value -1 to the unload_heads attribute::
+
+ # echo -1 > /sys/block/sda/device/unload_heads
+
+will enable the feature for /dev/sda, and giving -2 instead of -1 will
+disable it again.
+
+
+3. References
+-------------
+
+There are several laptops from different vendors featuring shock
+protection capabilities. As manufacturers have refused to support open
+source development of the required software components so far, Linux
+support for shock protection varies considerably between different
+hardware implementations. Ideally, this section should contain a list
+of pointers at different projects aiming at an implementation of shock
+protection on different systems. Unfortunately, I only know of a
+single project which, although still considered experimental, is fit
+for use. Please feel free to add projects that have been the victims
+of my ignorance.
+
+- https://www.thinkwiki.org/wiki/HDAPS
+
+ See this page for information about Linux support of the hard disk
+ active protection system as implemented in IBM/Lenovo Thinkpads.
+
+
+4. CREDITS
+----------
+
+This implementation of disk head parking has been inspired by a patch
+originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
+to develop an implementation of this feature that is fit to be merged
+into mainline have been aided by various kernel developers, in
+particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/admin-guide/laptops/index.rst b/Documentation/admin-guide/laptops/index.rst
new file mode 100644
index 000000000000..db842b629303
--- /dev/null
+++ b/Documentation/admin-guide/laptops/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Laptop Drivers
+==============
+
+.. toctree::
+ :maxdepth: 1
+
+ alienware-wmi
+ asus-laptop
+ disk-shock-protection
+ laptop-mode
+ lg-laptop
+ samsung-galaxybook
+ sony-laptop
+ sonypi
+ thinkpad-acpi
+ toshiba_haps
diff --git a/Documentation/admin-guide/laptops/laptop-mode.rst b/Documentation/admin-guide/laptops/laptop-mode.rst
new file mode 100644
index 000000000000..66eb9cd918b5
--- /dev/null
+++ b/Documentation/admin-guide/laptops/laptop-mode.rst
@@ -0,0 +1,770 @@
+===============================================
+How to conserve battery power using laptop-mode
+===============================================
+
+Document Author: Bart Samwel (bart@samwel.tk)
+
+Date created: January 2, 2004
+
+Last modified: December 06, 2004
+
+Introduction
+------------
+
+Laptop mode is used to minimize the time that the hard disk needs to be spun up,
+to conserve battery power on laptops. It has been reported to cause significant
+power savings.
+
+.. Contents
+
+ * Introduction
+ * Installation
+ * Caveats
+ * The Details
+ * Tips & Tricks
+ * Control script
+ * ACPI integration
+ * Monitoring tool
+
+
+Installation
+------------
+
+To use laptop mode, you don't need to set any kernel configuration options
+or anything. Simply install all the files included in this document, and
+laptop mode will automatically be started when you're on battery. For
+your convenience, a tarball containing an installer can be downloaded at:
+
+ http://www.samwel.tk/laptop_mode/laptop_mode/
+
+To configure laptop mode, you need to edit the configuration file, which is
+located in /etc/default/laptop-mode on Debian-based systems, or in
+/etc/sysconfig/laptop-mode on other systems.
+
+Unfortunately, automatic enabling of laptop mode does not work for
+laptops that don't have ACPI. On those laptops, you need to start laptop
+mode manually. To start laptop mode, run "laptop_mode start", and to
+stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
+has experimental support for APM, you might want to try that first.)
+
+
+Caveats
+-------
+
+* The downside of laptop mode is that you have a chance of losing up to 10
+ minutes of work. If you cannot afford this, don't use it! The supplied ACPI
+ scripts automatically turn off laptop mode when the battery almost runs out,
+ so that you won't lose any data at the end of your battery life.
+
+* Most desktop hard drives have a very limited lifetime measured in spindown
+ cycles, typically about 50.000 times (it's usually listed on the spec sheet).
+ Check your drive's rating, and don't wear down your drive's lifetime if you
+ don't need to.
+
+* If you mount some of your ext3 filesystems with the -n option, then
+ the control script will not be able to remount them correctly. You must set
+ DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
+ wrong options -- or it will fail because it cannot write to /etc/mtab.
+
+* If you have your filesystems listed as type "auto" in fstab, like I did, then
+ the control script will not recognize them as filesystems that need remounting.
+ You must list the filesystems with their true type instead.
+
+* It has been reported that some versions of the mutt mail client use file access
+ times to determine whether a folder contains new mail. If you use mutt and
+ experience this, you must disable the noatime remounting by setting the option
+ DO_REMOUNT_NOATIME to 0 in the configuration file.
+
+
+The Details
+-----------
+
+Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
+present for all kernels that have the laptop mode patch, regardless of any
+configuration options. When the knob is set, any physical disk I/O (that might
+have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
+result of this is that after a disk has spun down, it will not be spun up
+anymore to write dirty blocks, because those blocks had already been written
+immediately after the most recent read operation. The value of the laptop_mode
+knob determines the time between the occurrence of disk I/O and when the flush
+is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
+0 disables laptop mode.
+
+To increase the effectiveness of the laptop_mode strategy, the laptop_mode
+control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
+/proc/sys/vm to about 10 minutes (by default), which means that pages that are
+dirtied are not forced to be written to disk as often. The control script also
+changes the dirty background ratio, so that background writeback of dirty pages
+is not done anymore. Combined with a higher commit value (also 10 minutes) for
+ext3 filesystem (also done automatically by the control script),
+this results in concentration of disk activity in a small time interval which
+occurs only once every 10 minutes, or whenever the disk is forced to spin up by
+a cache miss. The disk can then be spun down in the periods of inactivity.
+
+
+Configuration
+-------------
+
+The laptop mode configuration file is located in /etc/default/laptop-mode on
+Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
+contains the following options:
+
+MAX_AGE:
+
+Maximum time, in seconds, of hard drive spindown time that you are
+comfortable with. Worst case, it's possible that you could lose this
+amount of work if your battery fails while you're in laptop mode.
+
+MINIMUM_BATTERY_MINUTES:
+
+Automatically disable laptop mode if the remaining number of minutes of
+battery power is less than this value. Default is 10 minutes.
+
+AC_HD/BATT_HD:
+
+The idle timeout that should be set on your hard drive when laptop mode
+is active (BATT_HD) and when it is not active (AC_HD). The defaults are
+20 seconds (value 4) for BATT_HD and 2 hours (value 244) for AC_HD. The
+possible values are those listed in the manual page for "hdparm" for the
+"-S" option.
+
+HD:
+
+The devices for which the spindown timeout should be adjusted by laptop mode.
+Default is /dev/hda. If you specify multiple devices, separate them by a space.
+
+READAHEAD:
+
+Disk readahead, in 512-byte sectors, while laptop mode is active. A large
+readahead can prevent disk accesses for things like executable pages (which are
+loaded on demand while the application executes) and sequentially accessed data
+(MP3s).
+
+DO_REMOUNTS:
+
+The control script automatically remounts any mounted journaled filesystems
+with appropriate commit interval options. When this option is set to 0, this
+feature is disabled.
+
+DO_REMOUNT_NOATIME:
+
+When remounting, should the filesystems be remounted with the noatime option?
+Normally, this is set to "1" (enabled), but there may be programs that require
+access time recording.
+
+DIRTY_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+before a writeback is forced, while laptop mode is active. Corresponds to
+the /proc/sys/vm/dirty_ratio sysctl.
+
+DIRTY_BACKGROUND_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
+this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
+sysctl.
+
+Note that the behaviour of dirty_background_ratio is quite different
+when laptop mode is active and when it isn't. When laptop mode is inactive,
+dirty_background_ratio is the threshold percentage at which background writeouts
+start taking place. When laptop mode is active, however, background writeouts
+are disabled, and the dirty_background_ratio only determines how much writeback
+is done when dirty_ratio is reached.
+
+DO_CPU:
+
+Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
+See Documentation/admin-guide/pm/cpufreq.rst for more info. Disabled by default.)
+
+CPU_MAXFREQ:
+
+When on battery, what is the maximum CPU speed that the system should use? Legal
+values are "slowest" for the slowest speed that your CPU is able to operate at,
+or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
+
+
+Tips & Tricks
+-------------
+
+* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
+ of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
+
+* You can spin down the disk while playing MP3, by setting disk readahead
+ to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
+ once, and will then spin down while the MP3 is playing. (Thanks to Bartek
+ Kania.)
+
+* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
+ of colours that my display uses it consumes less battery power. I've seen
+ this on powerbooks too. I hope that this is a piece of information that
+ might be useful to the Laptop Mode patch or its users."
+
+* In syslog.conf, you can prefix entries with a dash `-` to omit syncing the
+ file after every logging. When you're using laptop-mode and your disk doesn't
+ spin down, this is a likely culprit.
+
+* Richard Atterer observed that laptop mode does not work well with noflushd
+ (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
+ from doing its thing.
+
+* If you're worried about your data, you might want to consider using a USB
+ memory stick or something like that as a "working area". (Be aware though
+ that flash memory can only handle a limited number of writes, and overuse
+ may wear out your memory stick pretty quickly. Do _not_ use journalling
+ filesystems on flash memory sticks.)
+
+
+Configuration file for control and ACPI battery scripts
+-------------------------------------------------------
+
+This allows the tunables to be changed for the scripts via an external
+configuration file
+
+It should be installed as /etc/default/laptop-mode on Debian, and as
+/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
+
+Config file::
+
+ # Maximum time, in seconds, of hard drive spindown time that you are
+ # comfortable with. Worst case, it's possible that you could lose this
+ # amount of work if your battery fails you while in laptop mode.
+ #MAX_AGE=600
+
+ # Automatically disable laptop mode when the number of minutes of battery
+ # that you have left goes below this threshold.
+ MINIMUM_BATTERY_MINUTES=10
+
+ # Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
+ # by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
+ # will read a complete MP3 at once, and will then spin down while the MP3/OGG is
+ # playing.
+ #READAHEAD=4096
+
+ # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+ #DO_REMOUNTS=1
+
+ # And shall we add the "noatime" option to that as well? (1=yes)
+ #DO_REMOUNT_NOATIME=1
+
+ # Dirty synchronous ratio. At this percentage of dirty pages the process
+ # which
+ # calls write() does its own writeback
+ #DIRTY_RATIO=40
+
+ #
+ # Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+ # exceeded, the kernel will wake flusher threads which will then reduce the
+ # amount of dirty memory to dirty_background_ratio. Set this nice and low,
+ # so once some writeout has commenced, we do a lot of it.
+ #
+ #DIRTY_BACKGROUND_RATIO=5
+
+ # kernel default dirty buffer age
+ #DEF_AGE=30
+ #DEF_UPDATE=5
+ #DEF_DIRTY_BACKGROUND_RATIO=10
+ #DEF_DIRTY_RATIO=40
+ #DEF_XFS_AGE_BUFFER=15
+ #DEF_XFS_SYNC_INTERVAL=30
+ #DEF_XFS_BUFD_INTERVAL=1
+
+ # This must be adjusted manually to the value of HZ in the running kernel
+ # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+ # centisecs. This can be automated, but it's a work in progress that still
+ # needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
+ # external interfaces, and that is currently always set to 100. So you don't
+ # need to change this on 2.6.
+ #XFS_HZ=100
+
+ # Should the maximum CPU frequency be adjusted down while on battery?
+ # Requires CPUFreq to be setup.
+ # See Documentation/admin-guide/pm/cpufreq.rst for more info
+ #DO_CPU=0
+
+ # When on battery what is the maximum CPU speed that the system should
+ # use? Legal values are "slowest" for the slowest speed that your
+ # CPU is able to operate at, or a value listed in:
+ # /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
+ # Only applicable if DO_CPU=1.
+ #CPU_MAXFREQ=slowest
+
+ # Idle timeout for your hard drive (man hdparm for valid values, -S option)
+ # Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
+ #AC_HD=244
+ #BATT_HD=4
+
+ # The drives for which to adjust the idle timeout. Separate them by a space,
+ # e.g. HD="/dev/hda /dev/hdb".
+ #HD="/dev/hda"
+
+ # Set the spindown timeout on a hard drive?
+ #DO_HD=1
+
+
+Control script
+--------------
+
+Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
+to Kiko Piris).
+
+Control script::
+
+ #!/bin/bash
+
+ # start or stop laptop_mode, best run by a power management daemon when
+ # ac gets connected/disconnected from a laptop
+ #
+ # install as /sbin/laptop_mode
+ #
+ # Contributors to this script: Kiko Piris
+ # Bart Samwel
+ # Micha Feigin
+ # Andrew Morton
+ # Herve Eychenne
+ # Dax Kelson
+ #
+ # Original Linux 2.4 version by: Jens Axboe
+
+ #############################################################################
+
+ # Source config
+ if [ -f /etc/default/laptop-mode ] ; then
+ # Debian
+ . /etc/default/laptop-mode
+ elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ # Others
+ . /etc/sysconfig/laptop-mode
+ fi
+
+ # Don't raise an error if the config file is incomplete
+ # set defaults instead:
+
+ # Maximum time, in seconds, of hard drive spindown time that you are
+ # comfortable with. Worst case, it's possible that you could lose this
+ # amount of work if your battery fails you while in laptop mode.
+ MAX_AGE=${MAX_AGE:-'600'}
+
+ # Read-ahead, in kilobytes
+ READAHEAD=${READAHEAD:-'4096'}
+
+ # Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+ DO_REMOUNTS=${DO_REMOUNTS:-'1'}
+
+ # And shall we add the "noatime" option to that as well? (1=yes)
+ DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
+
+ # Shall we adjust the idle timeout on a hard drive?
+ DO_HD=${DO_HD:-'1'}
+
+ # Adjust idle timeout on which hard drive?
+ HD="${HD:-'/dev/hda'}"
+
+ # spindown time for HD (hdparm -S values)
+ AC_HD=${AC_HD:-'244'}
+ BATT_HD=${BATT_HD:-'4'}
+
+ # Dirty synchronous ratio. At this percentage of dirty pages the process which
+ # calls write() does its own writeback
+ DIRTY_RATIO=${DIRTY_RATIO:-'40'}
+
+ # cpu frequency scaling
+ # See Documentation/admin-guide/pm/cpufreq.rst for more info
+ DO_CPU=${CPU_MANAGE:-'0'}
+ CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
+
+ #
+ # Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+ # exceeded, the kernel will wake flusher threads which will then reduce the
+ # amount of dirty memory to dirty_background_ratio. Set this nice and low,
+ # so once some writeout has commenced, we do a lot of it.
+ #
+ DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
+
+ # kernel default dirty buffer age
+ DEF_AGE=${DEF_AGE:-'30'}
+ DEF_UPDATE=${DEF_UPDATE:-'5'}
+ DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
+ DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
+ DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
+ DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
+ DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
+
+ # This must be adjusted manually to the value of HZ in the running kernel
+ # on 2.4, until the XFS people change their 2.4 external interfaces to work in
+ # centisecs. This can be automated, but it's a work in progress that still needs
+ # some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
+ # interfaces, and that is currently always set to 100. So you don't need to
+ # change this on 2.6.
+ XFS_HZ=${XFS_HZ:-'100'}
+
+ #############################################################################
+
+ KLEVEL="$(uname -r |
+ {
+ IFS='.' read a b c
+ echo $a.$b
+ }
+ )"
+ case "$KLEVEL" in
+ "2.4"|"2.6")
+ ;;
+ *)
+ echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
+ exit 1
+ ;;
+ esac
+
+ if [ ! -e /proc/sys/vm/laptop_mode ] ; then
+ echo "Kernel is not patched with laptop_mode patch." >&2
+ exit 1
+ fi
+
+ if [ ! -w /proc/sys/vm/laptop_mode ] ; then
+ echo "You do not have enough privileges to enable laptop_mode." >&2
+ exit 1
+ fi
+
+ # Remove an option (the first parameter) of the form option=<number> from
+ # a mount options string (the rest of the parameters).
+ parse_mount_opts () {
+ OPT="$1"
+ shift
+ echo ",$*," | sed \
+ -e 's/,'"$OPT"'=[0-9]*,/,/g' \
+ -e 's/,,*/,/g' \
+ -e 's/^,//' \
+ -e 's/,$//'
+ }
+
+ # Remove an option (the first parameter) without any arguments from
+ # a mount option string (the rest of the parameters).
+ parse_nonumber_mount_opts () {
+ OPT="$1"
+ shift
+ echo ",$*," | sed \
+ -e 's/,'"$OPT"',/,/g' \
+ -e 's/,,*/,/g' \
+ -e 's/^,//' \
+ -e 's/,$//'
+ }
+
+ # Find out the state of a yes/no option (e.g. "atime"/"noatime") in
+ # fstab for a given filesystem, and use this state to replace the
+ # value of the option in another mount options string. The device
+ # is the first argument, the option name the second, and the default
+ # value the third. The remainder is the mount options string.
+ #
+ # Example:
+ # parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
+ #
+ # If fstab contains, say, "rw" for this filesystem, then the result
+ # will be "defaults,atime".
+ parse_yesno_opts_wfstab () {
+ L_DEV="$1"
+ OPT="$2"
+ DEF_OPT="$3"
+ shift 3
+ L_OPTS="$*"
+ PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
+ PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
+ # Watch for a default atime in fstab
+ FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+ if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
+ # option specified in fstab: extract the value and use it
+ if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
+ echo "$PARSEDOPTS1,no$OPT"
+ else
+ # no$OPT not found -- so we must have $OPT.
+ echo "$PARSEDOPTS1,$OPT"
+ fi
+ else
+ # option not specified in fstab -- choose the default.
+ echo "$PARSEDOPTS1,$DEF_OPT"
+ fi
+ }
+
+ # Find out the state of a numbered option (e.g. "commit=NNN") in
+ # fstab for a given filesystem, and use this state to replace the
+ # value of the option in another mount options string. The device
+ # is the first argument, and the option name the second. The
+ # remainder is the mount options string in which the replacement
+ # must be done.
+ #
+ # Example:
+ # parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
+ #
+ # If fstab contains, say, "commit=3,rw" for this filesystem, then the
+ # result will be "rw,commit=3".
+ parse_mount_opts_wfstab () {
+ L_DEV="$1"
+ OPT="$2"
+ shift 2
+ L_OPTS="$*"
+ PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
+ # Watch for a default commit in fstab
+ FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+ if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
+ # option specified in fstab: extract the value, and use it
+ echo -n "$PARSEDOPTS1,$OPT="
+ echo ",$FSTAB_OPTS," | sed \
+ -e 's/.*,'"$OPT"'=//' \
+ -e 's/,.*//'
+ else
+ # option not specified in fstab: set it to 0
+ echo "$PARSEDOPTS1,$OPT=0"
+ fi
+ }
+
+ deduce_fstype () {
+ MP="$1"
+ # My root filesystem unfortunately has
+ # type "unknown" in /etc/mtab. If we encounter
+ # "unknown", we try to get the type from fstab.
+ cat /etc/fstab |
+ grep -v '^#' |
+ while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
+ if [ "$FSTAB_MP" = "$MP" ]; then
+ echo $FSTAB_FST
+ exit 0
+ fi
+ done
+ }
+
+ if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
+ NOATIME_OPT=",noatime"
+ fi
+
+ case "$1" in
+ start)
+ AGE=$((100*$MAX_AGE))
+ XFS_AGE=$(($XFS_HZ*$MAX_AGE))
+ echo -n "Starting laptop_mode"
+
+ if [ -d /proc/sys/vm/pagebuf ] ; then
+ # (For 2.4 and early 2.6.)
+ # This only needs to be set, not reset -- it is only used when
+ # laptop mode is enabled.
+ echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+ elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+ # (A couple of early 2.6 laptop mode patches had these.)
+ # The same goes for these.
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
+ # (2.6.6)
+ # But not for these -- they are also used in normal
+ # operation.
+ echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
+ echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+ # (2.6.7 upwards)
+ # And not for these either. These are in centisecs,
+ # not USER_HZ, so we have to use $AGE, not $XFS_AGE.
+ echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
+ echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
+ echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
+ fi
+
+ case "$KLEVEL" in
+ "2.4")
+ echo 1 > /proc/sys/vm/laptop_mode
+ echo "30 500 0 0 $AGE $AGE 60 20 0" > /proc/sys/vm/bdflush
+ ;;
+ "2.6")
+ echo 5 > /proc/sys/vm/laptop_mode
+ echo "$AGE" > /proc/sys/vm/dirty_writeback_centisecs
+ echo "$AGE" > /proc/sys/vm/dirty_expire_centisecs
+ echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_ratio
+ echo "$DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio
+ ;;
+ esac
+ if [ $DO_REMOUNTS -eq 1 ]; then
+ cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+ PARSEDOPTS="$(parse_mount_opts "$OPTS")"
+ if [ "$FST" = 'unknown' ]; then
+ FST=$(deduce_fstype $MP)
+ fi
+ case "$FST" in
+ "ext3")
+ PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
+ ;;
+ "xfs")
+ mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
+ ;;
+ esac
+ if [ -b $DEV ] ; then
+ blockdev --setra $(($READAHEAD * 2)) $DEV
+ fi
+ done
+ fi
+ if [ $DO_HD -eq 1 ] ; then
+ for THISHD in $HD ; do
+ /sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
+ /sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
+ done
+ fi
+ if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+ if [ $CPU_MAXFREQ = 'slowest' ]; then
+ CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
+ fi
+ echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+ fi
+ echo "."
+ ;;
+ stop)
+ U_AGE=$((100*$DEF_UPDATE))
+ B_AGE=$((100*$DEF_AGE))
+ echo -n "Stopping laptop_mode"
+ echo 0 > /proc/sys/vm/laptop_mode
+ if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+ # These need to be restored, if there are no lm_*.
+ echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER)) > /proc/sys/fs/xfs/age_buffer
+ echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) > /proc/sys/fs/xfs/sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+ # These need to be restored as well.
+ echo $((100*$DEF_XFS_AGE_BUFFER)) > /proc/sys/fs/xfs/age_buffer_centisecs
+ echo $((100*$DEF_XFS_SYNC_INTERVAL)) > /proc/sys/fs/xfs/xfssyncd_centisecs
+ echo $((100*$DEF_XFS_BUFD_INTERVAL)) > /proc/sys/fs/xfs/xfsbufd_centisecs
+ fi
+ case "$KLEVEL" in
+ "2.4")
+ echo "30 500 0 0 $U_AGE $B_AGE 60 20 0" > /proc/sys/vm/bdflush
+ ;;
+ "2.6")
+ echo "$U_AGE" > /proc/sys/vm/dirty_writeback_centisecs
+ echo "$B_AGE" > /proc/sys/vm/dirty_expire_centisecs
+ echo "$DEF_DIRTY_RATIO" > /proc/sys/vm/dirty_ratio
+ echo "$DEF_DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio
+ ;;
+ esac
+ if [ $DO_REMOUNTS -eq 1 ] ; then
+ cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+ # Reset commit and atime options to defaults.
+ if [ "$FST" = 'unknown' ]; then
+ FST=$(deduce_fstype $MP)
+ fi
+ case "$FST" in
+ "ext3")
+ PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
+ PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+ ;;
+ "xfs")
+ PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+ ;;
+ esac
+ if [ -b $DEV ] ; then
+ blockdev --setra 256 $DEV
+ fi
+ done
+ fi
+ if [ $DO_HD -eq 1 ] ; then
+ for THISHD in $HD ; do
+ /sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
+ /sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
+ done
+ fi
+ if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+ echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+ fi
+ echo "."
+ ;;
+ *)
+ echo "Usage: $0 {start|stop}" 2>&1
+ exit 1
+ ;;
+
+ esac
+
+ exit 0
+
+
+ACPI integration
+----------------
+
+Dax Kelson submitted this so that the ACPI acpid daemon will
+kick off the laptop_mode script and run hdparm. The part that
+automatically disables laptop mode when the battery is low was
+written by Jan Topinski.
+
+/etc/acpi/events/ac_adapter::
+
+ event=ac_adapter
+ action=/etc/acpi/actions/ac.sh %e
+
+/etc/acpi/events/battery::
+
+ event=battery.*
+ action=/etc/acpi/actions/battery.sh %e
+
+/etc/acpi/actions/ac.sh::
+
+ #!/bin/bash
+
+ # ac on/offline event handler
+
+ status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
+
+ case $status in
+ "on-line")
+ /sbin/laptop_mode stop
+ exit 0
+ ;;
+ "off-line")
+ /sbin/laptop_mode start
+ exit 0
+ ;;
+ esac
+
+
+/etc/acpi/actions/battery.sh::
+
+ #! /bin/bash
+
+ # Automatically disable laptop mode when the battery almost runs out.
+
+ BATT_INFO=/proc/acpi/battery/$2/state
+
+ if [[ -f /proc/sys/vm/laptop_mode ]]
+ then
+ LM=`cat /proc/sys/vm/laptop_mode`
+ if [[ $LM -gt 0 ]]
+ then
+ if [[ -f $BATT_INFO ]]
+ then
+ # Source the config file only now that we know we need
+ if [ -f /etc/default/laptop-mode ] ; then
+ # Debian
+ . /etc/default/laptop-mode
+ elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ # Others
+ . /etc/sysconfig/laptop-mode
+ fi
+ MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
+
+ ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
+ if [[ ACTION -eq "discharging" ]]
+ then
+ PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ fi
+ if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
+ then
+ /sbin/laptop_mode stop
+ fi
+ else
+ logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+ fi
+ fi
+ fi
+
+
+Monitoring tool
+---------------
+
+Bartek Kania submitted this, it can be used to measure how much time your disk
+spends spun up/down. See tools/laptop/dslm/dslm.c
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/admin-guide/laptops/lg-laptop.rst
index e486fe7ddc35..c4dd534f91ed 100644
--- a/Documentation/laptops/lg-laptop.rst
+++ b/Documentation/admin-guide/laptops/lg-laptop.rst
@@ -1,4 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0+
+
+
LG Gram laptop extra features
=============================
@@ -9,14 +11,13 @@ Hotkeys
-------
The following FN keys are ignored by the kernel without this driver:
+
- FN-F1 (LG control panel) - Generates F15
-- FN-F5 (Touchpad toggle) - Generates F13
+- FN-F5 (Touchpad toggle) - Generates F21
- FN-F6 (Airplane mode) - Generates RFKILL
-- FN-F8 (Keyboard backlight) - Generates F16.
- This key also changes keyboard backlight mode.
- FN-F9 (Reader mode) - Generates F14
-The rest of the FN key work without a need for a special driver.
+The rest of the FN keys work without a need for a special driver.
Reader mode
@@ -37,7 +38,7 @@ FN lock.
Battery care limit
------------------
-Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
+Writing 80/100 to /sys/class/power_supply/CMB0/charge_control_end_threshold
sets the maximum capacity to charge the battery. Limiting the charge
reduces battery capacity loss over time.
@@ -47,8 +48,8 @@ This value is reset to 100 when the kernel boots.
Fan mode
--------
-Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
-the fan silent mode.
+Writing 0/1/2 to /sys/devices/platform/lg-laptop/fan_mode sets fan mode to
+Optimal/Silent/Performance respectively.
USB charge
diff --git a/Documentation/admin-guide/laptops/samsung-galaxybook.rst b/Documentation/admin-guide/laptops/samsung-galaxybook.rst
new file mode 100644
index 000000000000..752b8f1a4a74
--- /dev/null
+++ b/Documentation/admin-guide/laptops/samsung-galaxybook.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+==========================
+Samsung Galaxy Book Driver
+==========================
+
+Joshua Grisham <josh@joshuagrisham.com>
+
+This is a Linux x86 platform driver for Samsung Galaxy Book series notebook
+devices which utilizes Samsung's ``SCAI`` ACPI device in order to control
+extra features and receive various notifications.
+
+Supported devices
+=================
+
+Any device with one of the supported ACPI device IDs should be supported. This
+covers most of the "Samsung Galaxy Book" series notebooks that are currently
+available as of this writing, and could include other Samsung notebook devices
+as well.
+
+Status
+======
+
+The following features are currently supported:
+
+- :ref:`Keyboard backlight <keyboard-backlight>` control
+- :ref:`Performance mode <performance-mode>` control implemented using the
+ platform profile interface
+- :ref:`Battery charge control end threshold
+ <battery-charge-control-end-threshold>` (stop charging battery at given
+ percentage value) implemented as a battery hook
+- :ref:`Firmware Attributes <firmware-attributes>` to allow control of various
+ device settings
+- :ref:`Handling of Fn hotkeys <keyboard-hotkey-actions>` for various actions
+- :ref:`Handling of ACPI notifications and hotkeys
+ <acpi-notifications-and-hotkey-actions>`
+
+Because different models of these devices can vary in their features, there is
+logic built within the driver which attempts to test each implemented feature
+for a valid response before enabling its support (registering additional devices
+or extensions, adding sysfs attributes, etc). Therefore, it can be important to
+note that not all features may be supported for your particular device.
+
+The following features might be possible to implement but will require
+additional investigation and are therefore not supported at this time:
+
+- "Dolby Atmos" mode for the speakers
+- "Outdoor Mode" for increasing screen brightness on models with ``SAM0427``
+- "Silent Mode" on models with ``SAM0427``
+
+.. _keyboard-backlight:
+
+Keyboard backlight
+==================
+
+A new LED class named ``samsung-galaxybook::kbd_backlight`` is created which
+will then expose the device using the standard sysfs-based LED interface at
+``/sys/class/leds/samsung-galaxybook::kbd_backlight``. Brightness can be
+controlled by writing the desired value to the ``brightness`` sysfs attribute or
+with any other desired userspace utility.
+
+.. note::
+ Most of these devices have an ambient light sensor which also turns
+ off the keyboard backlight under well-lit conditions. This behavior does not
+ seem possible to control at this time, but can be good to be aware of.
+
+.. _performance-mode:
+
+Performance mode
+================
+
+This driver implements the
+Documentation/userspace-api/sysfs-platform_profile.rst interface for working
+with the "performance mode" function of the Samsung ACPI device.
+
+Mapping of each Samsung "performance mode" to its respective platform profile is
+performed dynamically by the driver, as not all models support all of the same
+performance modes. Your device might have one or more of the following mappings:
+
+- "Silent" maps to ``low-power``
+- "Quiet" maps to ``quiet``
+- "Optimized" maps to ``balanced``
+- "High performance" maps to ``performance``
+
+The result of the mapping can be printed in the kernel log when the module is
+loaded. Supported profiles can also be retrieved from
+``/sys/firmware/acpi/platform_profile_choices``, while
+``/sys/firmware/acpi/platform_profile`` can be used to read or write the
+currently selected profile.
+
+The ``balanced`` platform profile will be set during module load if no profile
+has been previously set.
+
+.. _battery-charge-control-end-threshold:
+
+Battery charge control end threshold
+====================================
+
+This platform driver will add the ability to set the battery's charge control
+end threshold, but does not have the ability to set a start threshold.
+
+This feature is typically called "Battery Saver" by the various Samsung
+applications in Windows, but in Linux we have implemented the standardized
+"charge control threshold" sysfs interface on the battery device to allow for
+controlling this functionality from the userspace.
+
+The sysfs attribute
+``/sys/class/power_supply/BAT1/charge_control_end_threshold`` can be used to
+read or set the desired charge end threshold.
+
+If you wish to maintain interoperability with the Samsung Settings application
+in Windows, then you should set the value to 100 to represent "off", or enable
+the feature using only one of the following values: 50, 60, 70, 80, or 90.
+Otherwise, the driver will accept any value between 1 and 100 as the percentage
+that you wish the battery to stop charging at.
+
+.. note::
+ Some devices have been observed as automatically "turning off" the charge
+ control end threshold if an input value of less than 30 is given.
+
+.. _firmware-attributes:
+
+Firmware Attributes
+===================
+
+The following enumeration-typed firmware attributes are set up by this driver
+and should be accessible under
+``/sys/class/firmware-attributes/samsung-galaxybook/attributes/`` if your device
+supports them:
+
+- ``power_on_lid_open`` (device should power on when the lid is opened)
+- ``usb_charging`` (USB ports can deliver power to connected devices even when
+ the device is powered off or in a low sleep state)
+- ``block_recording`` (blocks access to camera and microphone)
+
+All of these attributes are simple boolean-like enumeration values which use 0
+to represent "off" and 1 to represent "on". Use the ``current_value`` attribute
+to get or change the setting on the device.
+
+Note that when ``block_recording`` is updated, the input device "Samsung Galaxy
+Book Lens Cover" will receive a ``SW_CAMERA_LENS_COVER`` switch event which
+reflects the current state.
+
+.. _keyboard-hotkey-actions:
+
+Keyboard hotkey actions (i8042 filter)
+======================================
+
+The i8042 filter will swallow the keyboard events for the Fn+F9 hotkey (Multi-
+level keyboard backlight toggle) and Fn+F10 hotkey (Block recording toggle)
+and instead execute their actions within the driver itself.
+
+Fn+F9 will cycle through the brightness levels of the keyboard backlight. A
+notification will be sent using ``led_classdev_notify_brightness_hw_changed``
+so that the userspace can be aware of the change. This mimics the behavior of
+other existing devices where the brightness level is cycled internally by the
+embedded controller and then reported via a notification.
+
+Fn+F10 will toggle the value of the "block recording" setting, which blocks
+or allows usage of the built-in camera and microphone (and generates the same
+Lens Cover switch event mentioned above).
+
+.. _acpi-notifications-and-hotkey-actions:
+
+ACPI notifications and hotkey actions
+=====================================
+
+ACPI notifications will generate ACPI netlink events under the device class
+``samsung-galaxybook`` and bus ID matching the Samsung ACPI device ID found on
+your device. The events can be received using userspace tools such as
+``acpi_listen`` and ``acpid``.
+
+The Fn+F11 Performance mode hotkey will be handled by the driver; each keypress
+will cycle to the next available platform profile.
diff --git a/Documentation/admin-guide/laptops/sony-laptop.rst b/Documentation/admin-guide/laptops/sony-laptop.rst
new file mode 100644
index 000000000000..9edcc7f6612f
--- /dev/null
+++ b/Documentation/admin-guide/laptops/sony-laptop.rst
@@ -0,0 +1,174 @@
+=========================================
+Sony Notebook Control Driver (SNC) Readme
+=========================================
+
+ - Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
+ - Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
+
+This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
+Sony Vaio laptops. This driver mixes both devices functions under the same
+(hopefully consistent) interface. This also means that the sonypi driver is
+obsoleted by sony-laptop now.
+
+Fn keys (hotkeys):
+------------------
+
+Some models report hotkeys through the SNC or SPIC devices, such events are
+reported both through the ACPI subsystem as acpi events and through the INPUT
+subsystem. See the logs of /proc/bus/input/devices to find out what those
+events are and which input devices are created by the driver.
+Additionally, loading the driver with the debug option will report all events
+in the kernel log.
+
+The "scancodes" passed to the input system (that can be remapped with udev)
+are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
+module. For example the "FN/E" key combination (EJECTCD on some models)
+generates the scancode 20 (0x14).
+
+Backlight control:
+------------------
+If your laptop model supports it, you will find sysfs files in the
+/sys/class/backlight/sony/
+directory. You will be able to query and set the current screen
+brightness:
+
+ ====================== =========================================
+ brightness get/set screen brightness (an integer
+ between 0 and 7)
+ actual_brightness reading from this file will query the HW
+ to get real brightness value
+ max_brightness the maximum brightness value
+ ====================== =========================================
+
+
+Platform specific:
+------------------
+Loading the sony-laptop module will create a
+/sys/devices/platform/sony-laptop/
+directory populated with some files.
+
+You then read/write integer values from/to those files by using
+standard UNIX tools.
+
+The files are:
+
+ ====================== ==========================================
+ brightness_default screen brightness which will be set
+ when the laptop will be rebooted
+ cdpower power on/off the internal CD drive
+ audiopower power on/off the internal sound card
+ lanpower power on/off the internal ethernet card
+ (only in debug mode)
+ bluetoothpower power on/off the internal bluetooth device
+ fanspeed get/set the fan speed
+ ====================== ==========================================
+
+Note that some files may be missing if they are not supported
+by your particular laptop model.
+
+Example usage::
+
+ # echo "1" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the lowest screen brightness for the next and later reboots
+
+::
+
+ # echo "8" > /sys/devices/platform/sony-laptop/brightness_default
+
+sets the highest screen brightness for the next and later reboots
+
+::
+
+ # cat /sys/devices/platform/sony-laptop/brightness_default
+
+retrieves the value
+
+::
+
+ # echo "0" > /sys/devices/platform/sony-laptop/audiopower
+
+powers off the sound card
+
+::
+
+ # echo "1" > /sys/devices/platform/sony-laptop/audiopower
+
+powers on the sound card.
+
+
+RFkill control:
+---------------
+More recent Vaio models expose a consistent set of ACPI methods to
+control radio frequency emitting devices. If you are a lucky owner of
+such a laptop you will find the necessary rfkill devices under
+/sys/class/rfkill. Check those starting with sony-* in::
+
+ # grep . /sys/class/rfkill/*/{state,name}
+
+
+Development:
+------------
+
+If you want to help with the development of this driver (and
+you are not afraid of any side effects doing strange things with
+your ACPI BIOS could have on your laptop), load the driver and
+pass the option 'debug=1'.
+
+REPEAT:
+ **DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.**
+
+In your kernel logs you will find the list of all ACPI methods
+the SNC device has on your laptop.
+
+* For new models you will see a long list of meaningless method names,
+ reading the DSDT table source should reveal that:
+
+(1) the SNC device uses an internal capability lookup table
+(2) SN00 is used to find values in the lookup table
+(3) SN06 and SN07 are used to call into the real methods based on
+ offsets you can obtain iterating the table using SN00
+(4) SN02 used to enable events.
+
+Some values in the capability lookup table are more or less known, see
+the code for all sony_call_snc_handle calls, others are more obscure.
+
+* For old models you can see the GCDP/GCDP methods used to pwer on/off
+ the CD drive, but there are others and they are usually different from
+ model to model.
+
+**I HAVE NO IDEA WHAT THOSE METHODS DO.**
+
+The sony-laptop driver creates, for some of those methods (the most
+current ones found on several Vaio models), an entry under
+/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
+You can create other entries corresponding to your own laptop methods by
+further editing the source (see the 'sony_nc_values' table, and add a new
+entry to this table with your get/set method names using the
+SNC_HANDLE_NAMES macro).
+
+Your mission, should you accept it, is to try finding out what
+those entries are for, by reading/writing random values from/to those
+files and find out what is the impact on your laptop.
+
+Should you find anything interesting, please report it back to me,
+I will not disavow all knowledge of your actions :)
+
+See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
+useful info.
+
+Bugs/Limitations:
+-----------------
+
+* This driver is not based on official documentation from Sony
+ (because there is none), so there is no guarantee this driver
+ will work at all, or do the right thing. Although this hasn't
+ happened to me, this driver could do very bad things to your
+ laptop, including permanent damage.
+
+* The sony-laptop and sonypi drivers do not interact at all. In the
+ future, sonypi will be removed and replaced by sony-laptop.
+
+* spicctrl, which is the userspace tool used to communicate with the
+ sonypi driver (through /dev/sonypi) is deprecated as well since all
+ its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/admin-guide/laptops/sonypi.rst b/Documentation/admin-guide/laptops/sonypi.rst
new file mode 100644
index 000000000000..7541f56e0007
--- /dev/null
+++ b/Documentation/admin-guide/laptops/sonypi.rst
@@ -0,0 +1,158 @@
+==================================================
+Sony Programmable I/O Control Device Driver Readme
+==================================================
+
+ - Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+ - Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+ - Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
+ - Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
+ - Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
+ - Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+
+This driver enables access to the Sony Programmable I/O Control Device which
+can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
+limited to new FX series laptops, at least the FX501 and the FX702) lack a
+sonypi device and are not supported at all by this driver.
+
+It will give access (through a user space utility) to some events those laptops
+generate, like:
+
+ - jogdial events (the small wheel on the side of Vaios)
+ - capture button events (only on Vaio Picturebook series)
+ - Fn keys
+ - bluetooth button (only on C1VR model)
+ - programmable keys, back, help, zoom, thumbphrase buttons, etc.
+ (when available)
+
+Those events (see linux/sonypi.h) can be polled using the character device node
+/dev/sonypi (major 10, minor auto allocated or specified as an option).
+A simple daemon which translates the jogdial movements into mouse wheel events
+can be downloaded at: <http://popies.net/sonypi/>
+
+Another option to intercept the events is to get them directly through the
+input layer.
+
+This driver supports also some ioctl commands for setting the LCD screen
+brightness and querying the batteries charge information (some more
+commands may be added in the future).
+
+This driver can also be used to set the camera controls on Picturebook series
+(brightness, contrast etc), and is used by the video4linux driver for the
+Motion Eye camera.
+
+Please note that this driver was created by reverse engineering the Windows
+driver and the ACPI BIOS, because Sony doesn't agree to release any programming
+specs for its laptops. If someone convinces them to do so, drop me a note.
+
+Driver options:
+---------------
+
+Several options can be passed to the sonypi driver using the standard
+module argument syntax (<param>=<value> when passing the option to the
+module or sonypi.<param>=<value> on the kernel boot line when sonypi is
+statically linked into the kernel). Those options are:
+
+ =============== =======================================================
+ minor: minor number of the misc device /dev/sonypi,
+ default is -1 (automatic allocation, see /proc/misc
+ or kernel logs)
+
+ camera: if you have a PictureBook series Vaio (with the
+ integrated MotionEye camera), set this parameter to 1
+ in order to let the driver access to the camera
+
+ fnkeyinit: on some Vaios (C1VE, C1VR etc), the Fn key events don't
+ get enabled unless you set this parameter to 1.
+ Do not use this option unless it's actually necessary,
+ some Vaio models don't deal well with this option.
+ This option is available only if the kernel is
+ compiled without ACPI support (since it conflicts
+ with it and it shouldn't be required anyway if
+ ACPI is already enabled).
+
+ verbose: set to 1 to print unknown events received from the
+ sonypi device.
+ set to 2 to print all events received from the
+ sonypi device.
+
+ compat: uses some compatibility code for enabling the sonypi
+ events. If the driver worked for you in the past
+ (prior to version 1.5) and does not work anymore,
+ add this option and report to the author.
+
+ mask: event mask telling the driver what events will be
+ reported to the user. This parameter is required for
+ some Vaio models where the hardware reuses values
+ used in other Vaio models (like the FX series who does
+ not have a jogdial but reuses the jogdial events for
+ programmable keys events). The default event mask is
+ set to 0xffffffff, meaning that all possible events
+ will be tried. You can use the following bits to
+ construct your own event mask (from
+ drivers/char/sonypi.h)::
+
+ SONYPI_JOGGER_MASK 0x0001
+ SONYPI_CAPTURE_MASK 0x0002
+ SONYPI_FNKEY_MASK 0x0004
+ SONYPI_BLUETOOTH_MASK 0x0008
+ SONYPI_PKEY_MASK 0x0010
+ SONYPI_BACK_MASK 0x0020
+ SONYPI_HELP_MASK 0x0040
+ SONYPI_LID_MASK 0x0080
+ SONYPI_ZOOM_MASK 0x0100
+ SONYPI_THUMBPHRASE_MASK 0x0200
+ SONYPI_MEYE_MASK 0x0400
+ SONYPI_MEMORYSTICK_MASK 0x0800
+ SONYPI_BATTERY_MASK 0x1000
+ SONYPI_WIRELESS_MASK 0x2000
+
+ useinput: if set (which is the default) two input devices are
+ created, one which interprets the jogdial events as
+ mouse events, the other one which acts like a
+ keyboard reporting the pressing of the special keys.
+ =============== =======================================================
+
+Module use:
+-----------
+
+In order to automatically load the sonypi module on use, you can put those
+lines a configuration file in /etc/modprobe.d/::
+
+ alias char-major-10-250 sonypi
+ options sonypi minor=250
+
+This supposes the use of minor 250 for the sonypi device::
+
+ # mknod /dev/sonypi c 10 250
+
+Bugs:
+-----
+
+ - several users reported that this driver disables the BIOS-managed
+ Fn-keys which put the laptop in sleeping state, or switch the
+ external monitor on/off. There is no workaround yet, since this
+ driver disables all APM management for those keys, by enabling the
+ ACPI management (and the ACPI core stuff is not complete yet). If
+ you have one of those laptops with working Fn keys and want to
+ continue to use them, don't use this driver.
+
+ - some users reported that the laptop speed is lower (dhrystone
+ tested) when using the driver with the fnkeyinit parameter. I cannot
+ reproduce it on my laptop and not all users have this problem.
+ This happens because the fnkeyinit parameter enables the ACPI
+ mode (but without additional ACPI control, like processor
+ speed handling etc). Use ACPI instead of APM if it works on your
+ laptop.
+
+ - sonypi lacks the ability to distinguish between certain key
+ events on some models.
+
+ - some models with the nvidia card (geforce go 6200 tc) uses a
+ different way to adjust the backlighting of the screen. There
+ is a userspace utility to adjust the brightness on those models,
+ which can be downloaded from
+ https://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
+
+ - since all development was done by reverse engineering, there is
+ *absolutely no guarantee* that this driver will not crash your
+ laptop. Permanently.
diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
new file mode 100644
index 000000000000..4ab0fef7d440
--- /dev/null
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -0,0 +1,1691 @@
+===========================
+ThinkPad ACPI Extras Driver
+===========================
+
+Version 0.25
+
+October 16th, 2013
+
+- Borislav Deianov <borislav@users.sf.net>
+- Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+
+http://ibm-acpi.sf.net/
+
+This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
+supports various features of these laptops which are accessible
+through the ACPI and ACPI EC framework, but not otherwise fully
+supported by the generic Linux ACPI drivers.
+
+This driver used to be named ibm-acpi until kernel 2.6.21 and release
+0.13-20070314. It used to be in the drivers/acpi tree, but it was
+moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
+2.6.22, and release 0.14. It was moved to drivers/platform/x86 for
+kernel 2.6.29 and release 0.22.
+
+The driver is named "thinkpad-acpi". In some places, like module
+names and log messages, "thinkpad_acpi" is used because of userspace
+issues.
+
+"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
+long due to length limitations on some Linux kernel versions.
+
+Status
+------
+
+The features currently supported are the following (see below for
+detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - video output switching, expansion control
+ - ThinkLight on and off
+ - CMOS/UCMS control
+ - LED control
+ - ACPI sounds
+ - temperature sensors
+ - Experimental: embedded controller register dump
+ - LCD brightness control
+ - Volume control
+ - Fan control and monitoring: fan speed, fan enable/disable
+ - WAN enable and disable
+ - UWB enable and disable
+ - LCD Shadow (PrivacyGuard) enable and disable
+ - Lap mode sensor
+ - Setting keyboard language
+ - WWAN Antenna type
+ - Auxmac
+
+A compatibility table by model and feature is maintained on the web
+site, http://ibm-acpi.sf.net/. I appreciate any success or failure
+reports, especially if they add to or correct the compatibility table.
+Please include the following information in your report:
+
+ - ThinkPad model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of the output of dmidecode, with serial numbers
+ and UUIDs masked off
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+Any other comments or patches are also more than welcome.
+
+
+Installation
+------------
+
+If you are compiling this driver as included in the Linux kernel
+sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
+It is located on the menu path: "Device Drivers" -> "X86 Platform
+Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
+
+
+Features
+--------
+
+The driver exports two different interfaces to userspace, which can be
+used to access the features it provides. One is a legacy procfs-based
+interface, which will be removed at some time in the future. The other
+is a new sysfs-based interface which is not complete yet.
+
+The procfs interface creates the /proc/acpi/ibm directory. There is a
+file under that directory for each feature it supports. The procfs
+interface is mostly frozen, and will change very little if at all: it
+will not be extended to add any new functionality in the driver, instead
+all new functionality will be implemented on the sysfs interface.
+
+The sysfs interface tries to blend in the generic Linux sysfs subsystems
+and classes as much as possible. Since some of these subsystems are not
+yet ready or stabilized, it is expected that this interface will change,
+and any and all userspace programs must deal with it.
+
+
+Notes about the sysfs interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unlike what was done with the procfs interface, correctness when talking
+to the sysfs interfaces will be enforced, as will correctness in the
+thinkpad-acpi's implementation of sysfs interfaces.
+
+Also, any bugs in the thinkpad-acpi sysfs driver code or in the
+thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
+maximum correctness, even if that means changing an interface in
+non-compatible ways. As these interfaces mature both in the kernel and
+in thinkpad-acpi, such changes should become quite rare.
+
+Applications interfacing to the thinkpad-acpi sysfs interfaces must
+follow all sysfs guidelines and correctly process all errors (the sysfs
+interface makes extensive use of errors). File descriptors and open /
+close operations to the sysfs inodes must also be properly implemented.
+
+The version of thinkpad-acpi's sysfs interface is exported by the driver
+as a driver attribute (see below).
+
+Sysfs driver attributes are on the driver's sysfs attribute space,
+for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
+
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad", or
+better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
+hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
+/sys/class/hwmon/hwmon?).
+
+Driver version
+--------------
+
+procfs: /proc/acpi/ibm/driver
+
+sysfs driver attribute: version
+
+The driver name and version. No commands can be written to this file.
+
+
+Sysfs interface version
+-----------------------
+
+sysfs driver attribute: interface_version
+
+Version of the thinkpad-acpi sysfs interface, as an unsigned long
+(output in hex format: 0xAAAABBCC), where:
+
+ AAAA
+ - major revision
+ BB
+ - minor revision
+ CC
+ - bugfix revision
+
+The sysfs interface version changelog for the driver can be found at the
+end of this document. Changes to the sysfs interface done by the kernel
+subsystems are not documented here, nor are they tracked by this
+attribute.
+
+Changes to the thinkpad-acpi sysfs interface are only considered
+non-experimental when they are submitted to Linux mainline, at which
+point the changes in this interface are documented and interface_version
+may be updated. If you are using any thinkpad-acpi features not yet
+sent to mainline for merging, you do so on your own risk: these features
+may disappear, or be implemented in a different and incompatible way by
+the time they are merged in Linux mainline.
+
+Changes that are backwards-compatible by nature (e.g. the addition of
+attributes that do not change the way the other attributes work) do not
+always warrant an update of interface_version. Therefore, one must
+expect that an attribute might not be there, and deal with it properly
+(an attribute not being there *is* a valid way to make it clear that a
+feature is not available in sysfs).
+
+
+Hot keys
+--------
+
+procfs: /proc/acpi/ibm/hotkey
+
+sysfs device attribute: hotkey_*
+
+In a ThinkPad, the ACPI HKEY handler is responsible for communicating
+some important events and also keyboard hot key presses to the operating
+system. Enabling the hotkey functionality of thinkpad-acpi signals the
+firmware that such a driver is present, and modifies how the ThinkPad
+firmware will behave in many situations.
+
+The driver enables the HKEY ("hot key") event reporting automatically
+when loaded, and disables it when it is removed.
+
+The driver will report HKEY events in the following format::
+
+ ibm/hotkey HKEY 00000080 0000xxxx
+
+Some of these events refer to hot key presses, but not all of them.
+
+The driver will generate events over the input layer for hot keys and
+radio switches, and over the ACPI netlink layer for other events. The
+input layer support accepts the standard IOCTLs to remap the keycodes
+assigned to each hot key.
+
+The hot key bit mask allows some control over which hot keys generate
+events. If a key is "masked" (bit set to 0 in the mask), the firmware
+will handle it. If it is "unmasked", it signals the firmware that
+thinkpad-acpi would prefer to handle it, if the firmware would be so
+kind to allow it (and it often doesn't!).
+
+Not all bits in the mask can be modified. Not all bits that can be
+modified do anything. Not all hot keys can be individually controlled
+by the mask. Some models do not support the mask at all. The behaviour
+of the mask is, therefore, highly dependent on the ThinkPad model.
+
+The driver will filter out any unmasked hotkeys, so even if the firmware
+doesn't allow disabling an specific hotkey, the driver will not report
+events for unmasked hotkeys.
+
+Note that unmasking some keys prevents their default behavior. For
+example, if Fn+F5 is unmasked, that key will no longer enable/disable
+Bluetooth by itself in firmware.
+
+Note also that not all Fn key combinations are supported through ACPI
+depending on the ThinkPad model and firmware version. On those
+ThinkPads, it is still possible to support some extra hotkeys by
+polling the "CMOS NVRAM" at least 10 times per second. The driver
+attempts to enables this functionality automatically when required.
+
+procfs notes
+^^^^^^^^^^^^
+
+The following commands can be written to the /proc/acpi/ibm/hotkey file::
+
+ echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
+ echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
+ ... any other 8-hex-digit mask ...
+ echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
+
+The following commands have been deprecated and will cause the kernel
+to log a warning::
+
+ echo enable > /proc/acpi/ibm/hotkey -- does nothing
+ echo disable > /proc/acpi/ibm/hotkey -- returns an error
+
+The procfs interface does not support NVRAM polling control. So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
+sysfs notes
+^^^^^^^^^^^
+
+ hotkey_bios_enabled:
+ DEPRECATED, WILL BE REMOVED SOON.
+
+ Returns 0.
+
+ hotkey_bios_mask:
+ DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
+
+ Returns the hot keys mask when thinkpad-acpi was loaded.
+ Upon module unload, the hot keys mask will be restored
+ to this value. This is always 0x80c, because those are
+ the hotkeys that were supported by ancient firmware
+ without mask support.
+
+ hotkey_enable:
+ DEPRECATED, WILL BE REMOVED SOON.
+
+ 0: returns -EPERM
+ 1: does nothing
+
+ hotkey_mask:
+ bit mask to enable reporting (and depending on
+ the firmware, ACPI event generation) for each hot key
+ (see above). Returns the current status of the hot keys
+ mask, and allows one to modify it.
+
+ hotkey_all_mask:
+ bit mask that should enable event reporting for all
+ supported hot keys, when echoed to hotkey_mask above.
+ Unless you know which events need to be handled
+ passively (because the firmware *will* handle them
+ anyway), do *not* use hotkey_all_mask. Use
+ hotkey_recommended_mask, instead. You have been warned.
+
+ hotkey_recommended_mask:
+ bit mask that should enable event reporting for all
+ supported hot keys, except those which are always
+ handled by the firmware anyway. Echo it to
+ hotkey_mask above, to use. This is the default mask
+ used by the driver.
+
+ hotkey_source_mask:
+ bit mask that selects which hot keys will the driver
+ poll the NVRAM for. This is auto-detected by the driver
+ based on the capabilities reported by the ACPI firmware,
+ but it can be overridden at runtime.
+
+ Hot keys whose bits are set in hotkey_source_mask are
+ polled for in NVRAM, and reported as hotkey events if
+ enabled in hotkey_mask. Only a few hot keys are
+ available through CMOS NVRAM polling.
+
+ Warning: when in NVRAM mode, the volume up/down/mute
+ keys are synthesized according to changes in the mixer,
+ which uses a single volume up or volume down hotkey
+ press to unmute, as per the ThinkPad volume mixer user
+ interface. When in ACPI event mode, volume up/down/mute
+ events are reported by the firmware and can behave
+ differently (and that behaviour changes with firmware
+ version -- not just with firmware models -- as well as
+ OSI(Linux) state).
+
+ hotkey_poll_freq:
+ frequency in Hz for hot key polling. It must be between
+ 0 and 25 Hz. Polling is only carried out when strictly
+ needed.
+
+ Setting hotkey_poll_freq to zero disables polling, and
+ will cause hot key presses that require NVRAM polling
+ to never be reported.
+
+ Setting hotkey_poll_freq too low may cause repeated
+ pressings of the same hot key to be misreported as a
+ single key press, or to not even be detected at all.
+ The recommended polling frequency is 10Hz.
+
+ hotkey_radio_sw:
+ If the ThinkPad has a hardware radio switch, this
+ attribute will read 0 if the switch is in the "radios
+ disabled" position, and 1 if the switch is in the
+ "radios enabled" position.
+
+ This attribute has poll()/select() support.
+
+ hotkey_tablet_mode:
+ If the ThinkPad has tablet capabilities, this attribute
+ will read 0 if the ThinkPad is in normal mode, and
+ 1 if the ThinkPad is in tablet mode.
+
+ This attribute has poll()/select() support.
+
+ wakeup_reason:
+ Set to 1 if the system is waking up because the user
+ requested a bay ejection. Set to 2 if the system is
+ waking up because the user requested the system to
+ undock. Set to zero for normal wake-ups or wake-ups
+ due to unknown reasons.
+
+ This attribute has poll()/select() support.
+
+ wakeup_hotunplug_complete:
+ Set to 1 if the system was waken up because of an
+ undock or bay ejection request, and that request
+ was successfully completed. At this point, it might
+ be useful to send the system back to sleep, at the
+ user's choice. Refer to HKEY events 0x4003 and
+ 0x3003, below.
+
+ This attribute has poll()/select() support.
+
+input layer notes
+^^^^^^^^^^^^^^^^^
+
+A Hot key is mapped to a single input layer EV_KEY event, possibly
+followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
+code. An EV_SYN event will always be generated to mark the end of the
+event block.
+
+Do not use the EV_MSC MSC_SCAN events to process keys. They are to be
+used as a helper to remap keys, only. They are particularly useful when
+remapping KEY_UNKNOWN keys.
+
+The events are available in an input device, with the following id:
+
+ ============== ==============================
+ Bus BUS_HOST
+ vendor 0x1014 (PCI_VENDOR_ID_IBM) or
+ 0x17aa (PCI_VENDOR_ID_LENOVO)
+ product 0x5054 ("TP")
+ version 0x4101
+ ============== ==============================
+
+The version will have its LSB incremented if the keymap changes in a
+backwards-compatible way. The MSB shall always be 0x41 for this input
+device. If the MSB is not 0x41, do not use the device as described in
+this section, as it is either something else (e.g. another input device
+exported by a thinkpad driver, such as HDAPS) or its functionality has
+been changed in a non-backwards compatible way.
+
+Adding other event types for other functionalities shall be considered a
+backwards-compatible change for this input device.
+
+Thinkpad-acpi Hot Key event map (version 0x4101):
+
+======= ======= ============== ==============================================
+ACPI Scan
+event code Key Notes
+======= ======= ============== ==============================================
+0x1001 0x00 FN+F1 -
+
+0x1002 0x01 FN+F2 IBM: battery (rare)
+ Lenovo: Screen lock
+
+0x1003 0x02 FN+F3 Many IBM models always report
+ this hot key, even with hot keys
+ disabled or with Fn+F3 masked
+ off
+ IBM: screen lock, often turns
+ off the ThinkLight as side-effect
+ Lenovo: battery
+
+0x1004 0x03 FN+F4 Sleep button (ACPI sleep button
+ semantics, i.e. sleep-to-RAM).
+ It always generates some kind
+ of event, either the hot key
+ event or an ACPI sleep button
+ event. The firmware may
+ refuse to generate further FN+F4
+ key presses until a S3 or S4 ACPI
+ sleep cycle is performed or some
+ time passes.
+
+0x1005 0x04 FN+F5 Radio. Enables/disables
+ the internal Bluetooth hardware
+ and W-WAN card if left in control
+ of the firmware. Does not affect
+ the WLAN card.
+ Should be used to turn on/off all
+ radios (Bluetooth+W-WAN+WLAN),
+ really.
+
+0x1006 0x05 FN+F6 -
+
+0x1007 0x06 FN+F7 Video output cycle.
+ Do you feel lucky today?
+
+0x1008 0x07 FN+F8 IBM: toggle screen expand
+ Lenovo: configure UltraNav,
+ or toggle screen expand.
+ On 2024 platforms replaced by
+ 0x131f (see below) and on newer
+ platforms (2025 +) keycode is
+ replaced by 0x1401 (see below).
+
+0x1009 0x08 FN+F9 -
+
+... ... ... ...
+
+0x100B 0x0A FN+F11 -
+
+0x100C 0x0B FN+F12 Sleep to disk. You are always
+ supposed to handle it yourself,
+ either through the ACPI event,
+ or through a hotkey event.
+ The firmware may refuse to
+ generate further FN+F12 key
+ press events until a S3 or S4
+ ACPI sleep cycle is performed,
+ or some time passes.
+
+0x100D 0x0C FN+BACKSPACE -
+0x100E 0x0D FN+INSERT -
+0x100F 0x0E FN+DELETE -
+
+0x1010 0x0F FN+HOME Brightness up. This key is
+ always handled by the firmware
+ in IBM ThinkPads, even when
+ unmasked. Just leave it alone.
+ For Lenovo ThinkPads with a new
+ BIOS, it has to be handled either
+ by the ACPI OSI, or by userspace.
+ The driver does the right thing,
+ never mess with this.
+0x1011 0x10 FN+END Brightness down. See brightness
+ up for details.
+
+0x1012 0x11 FN+PGUP ThinkLight toggle. This key is
+ always handled by the firmware,
+ even when unmasked.
+
+0x1013 0x12 FN+PGDOWN -
+
+0x1014 0x13 FN+SPACE Zoom key
+
+0x1015 0x14 VOLUME UP Internal mixer volume up. This
+ key is always handled by the
+ firmware, even when unmasked.
+ NOTE: Lenovo seems to be changing
+ this.
+0x1016 0x15 VOLUME DOWN Internal mixer volume up. This
+ key is always handled by the
+ firmware, even when unmasked.
+ NOTE: Lenovo seems to be changing
+ this.
+0x1017 0x16 MUTE Mute internal mixer. This
+ key is always handled by the
+ firmware, even when unmasked.
+
+0x1018 0x17 THINKPAD ThinkPad/Access IBM/Lenovo key
+
+0x1019 0x18 unknown
+
+0x131f ... FN+F8 Platform Mode change (2024 systems).
+ Implemented in driver.
+
+0x1401 ... FN+F8 Platform Mode change (2025 + systems).
+ Implemented in driver.
+... ... ...
+
+0x1020 0x1F unknown
+======= ======= ============== ==============================================
+
+The ThinkPad firmware does not allow one to differentiate when most hot
+keys are pressed or released (either that, or we don't know how to, yet).
+For these keys, the driver generates a set of events for a key press and
+immediately issues the same set of events for a key release. It is
+unknown by the driver if the ThinkPad firmware triggered these events on
+hot key press or release, but the firmware will do it for either one, not
+both.
+
+If a key is mapped to KEY_RESERVED, it generates no input events at all.
+If a key is mapped to KEY_UNKNOWN, it generates an input event that
+includes an scan code. If a key is mapped to anything else, it will
+generate input device EV_KEY events.
+
+In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
+events for switches:
+
+============== ==============================================
+SW_RFKILL_ALL T60 and later hardware rfkill rocker switch
+SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
+============== ==============================================
+
+Non hotkey ACPI HKEY event map
+------------------------------
+
+Events that are never propagated by the driver:
+
+====== ==================================================
+0x2304 System is waking up from suspend to undock
+0x2305 System is waking up from suspend to eject bay
+0x2404 System is waking up from hibernation to undock
+0x2405 System is waking up from hibernation to eject bay
+0x5001 Lid closed
+0x5002 Lid opened
+0x5009 Tablet swivel: switched to tablet mode
+0x500A Tablet swivel: switched to normal mode
+0x5010 Brightness level changed/control event
+0x6000 KEYBOARD: Numlock key pressed
+0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
+0x7000 Radio Switch may have changed state
+====== ==================================================
+
+
+Events that are propagated by the driver to userspace:
+
+====== =====================================================
+0x2313 ALARM: System is waking up from suspend because
+ the battery is nearly empty
+0x2413 ALARM: System is waking up from hibernation because
+ the battery is nearly empty
+0x3003 Bay ejection (see 0x2x05) complete, can sleep again
+0x3006 Bay hotplug request (hint to power up SATA link when
+ the optical drive tray is ejected)
+0x4003 Undocked (see 0x2x04), can sleep again
+0x4010 Docked into hotplug port replicator (non-ACPI dock)
+0x4011 Undocked from hotplug port replicator (non-ACPI dock)
+0x500B Tablet pen inserted into its storage bay
+0x500C Tablet pen removed from its storage bay
+0x6011 ALARM: battery is too hot
+0x6012 ALARM: battery is extremely hot
+0x6021 ALARM: a sensor is too hot
+0x6022 ALARM: a sensor is extremely hot
+0x6030 System thermal table changed
+0x6032 Thermal Control command set completion (DYTC, Windows)
+0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
+0x60C0 X1 Yoga 2016, Tablet mode status changed
+0x60F0 Thermal Transformation changed (GMTS, Windows)
+====== =====================================================
+
+Battery nearly empty alarms are a last resort attempt to get the
+operating system to hibernate or shutdown cleanly (0x2313), or shutdown
+cleanly (0x2413) before power is lost. They must be acted upon, as the
+wake up caused by the firmware will have negated most safety nets...
+
+When any of the "too hot" alarms happen, according to Lenovo the user
+should suspend or hibernate the laptop (and in the case of battery
+alarms, unplug the AC adapter) to let it cool down. These alarms do
+signal that something is wrong, they should never happen on normal
+operating conditions.
+
+The "extremely hot" alarms are emergencies. According to Lenovo, the
+operating system is to force either an immediate suspend or hibernate
+cycle, or a system shutdown. Obviously, something is very wrong if this
+happens.
+
+
+Brightness hotkey notes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Don't mess with the brightness hotkeys in a Thinkpad. If you want
+notifications for OSD, use the sysfs backlight class event support.
+
+The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
+automatically for the cases were userspace has to do something to
+implement brightness changes. When you override these events, you will
+either fail to handle properly the ThinkPads that require explicit
+action to change backlight brightness, or the ThinkPads that require
+that no action be taken to work properly.
+
+
+Bluetooth
+---------
+
+procfs: /proc/acpi/ibm/bluetooth
+
+sysfs device attribute: bluetooth_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_bluetooth_sw"
+
+This feature shows the presence and current state of a ThinkPad
+Bluetooth device in the internal ThinkPad CDC slot.
+
+If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If Bluetooth is installed, the following commands can be used::
+
+ echo enable > /proc/acpi/ibm/bluetooth
+ echo disable > /proc/acpi/ibm/bluetooth
+
+Sysfs notes
+^^^^^^^^^^^
+
+ If the Bluetooth CDC card is installed, it can be enabled /
+ disabled through the "bluetooth_enable" thinkpad-acpi device
+ attribute, and its current status can also be queried.
+
+ enable:
+
+ - 0: disables Bluetooth / Bluetooth is disabled
+ - 1: enables Bluetooth / Bluetooth is enabled.
+
+ Note: this interface has been superseded by the generic rfkill
+ class. It has been deprecated, and it will be removed in year
+ 2010.
+
+ rfkill controller switch "tpacpi_bluetooth_sw": refer to
+ Documentation/driver-api/rfkill.rst for details.
+
+
+Video output control -- /proc/acpi/ibm/video
+--------------------------------------------
+
+This feature allows control over the devices used for video output -
+LCD, CRT or DVI (if available). The following commands are available::
+
+ echo lcd_enable > /proc/acpi/ibm/video
+ echo lcd_disable > /proc/acpi/ibm/video
+ echo crt_enable > /proc/acpi/ibm/video
+ echo crt_disable > /proc/acpi/ibm/video
+ echo dvi_enable > /proc/acpi/ibm/video
+ echo dvi_disable > /proc/acpi/ibm/video
+ echo auto_enable > /proc/acpi/ibm/video
+ echo auto_disable > /proc/acpi/ibm/video
+ echo expand_toggle > /proc/acpi/ibm/video
+ echo video_switch > /proc/acpi/ibm/video
+
+NOTE:
+ Access to this feature is restricted to processes owning the
+ CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
+ enough with some versions of X.org to crash it.
+
+Each video output device can be enabled or disabled individually.
+Reading /proc/acpi/ibm/video shows the status of each device.
+
+Automatic video switching can be enabled or disabled. When automatic
+video switching is enabled, certain events (e.g. opening the lid,
+docking or undocking) cause the video output device to change
+automatically. While this can be useful, it also causes flickering
+and, on the X40, video corruption. By disabling automatic switching,
+the flickering or video corruption can be avoided.
+
+The video_switch command cycles through the available video outputs
+(it simulates the behavior of Fn-F7).
+
+Video expansion can be toggled through this feature. This controls
+whether the display is expanded to fill the entire LCD screen when a
+mode with less than full resolution is used. Note that the current
+video expansion status cannot be determined through this feature.
+
+Note that on many models (particularly those using Radeon graphics
+chips) the X driver configures the video card in a way which prevents
+Fn-F7 from working. This also disables the video output switching
+features of this driver, as it uses the same ACPI methods as
+Fn-F7. Video switching on the console should still work.
+
+UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
+
+
+ThinkLight control
+------------------
+
+procfs: /proc/acpi/ibm/light
+
+sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
+
+procfs notes
+^^^^^^^^^^^^
+
+The ThinkLight status can be read and set through the procfs interface. A
+few models which do not make the status available will show the ThinkLight
+status as "unknown". The available commands are::
+
+ echo on > /proc/acpi/ibm/light
+ echo off > /proc/acpi/ibm/light
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkLight sysfs interface is documented by the LED class
+documentation, in Documentation/leds/leds-class.rst. The ThinkLight LED name
+is "tpacpi::thinklight".
+
+Due to limitations in the sysfs LED class, if the status of the ThinkLight
+cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
+It is impossible to know if the status returned through sysfs is valid.
+
+
+CMOS/UCMS control
+-----------------
+
+procfs: /proc/acpi/ibm/cmos
+
+sysfs device attribute: cmos_command
+
+This feature is mostly used internally by the ACPI firmware to keep the legacy
+CMOS NVRAM bits in sync with the current machine state, and to record this
+state so that the ThinkPad will retain such settings across reboots.
+
+Some of these commands actually perform actions in some ThinkPad models, but
+this is expected to disappear more and more in newer models. As an example, in
+a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
+real, but commands 0 to 2 don't control the mixer anymore (they have been
+phased out) and just update the NVRAM.
+
+The range of valid cmos command numbers is 0 to 21, but not all have an
+effect and the behavior varies from model to model. Here is the behavior
+on the X40 (tpb is the ThinkPad Buttons utility):
+
+ - 0 - Related to "Volume down" key press
+ - 1 - Related to "Volume up" key press
+ - 2 - Related to "Mute on" key press
+ - 3 - Related to "Access IBM" key press
+ - 4 - Related to "LCD brightness up" key press
+ - 5 - Related to "LCD brightness down" key press
+ - 11 - Related to "toggle screen expansion" key press/function
+ - 12 - Related to "ThinkLight on"
+ - 13 - Related to "ThinkLight off"
+ - 14 - Related to "ThinkLight" key press (toggle ThinkLight)
+
+The cmos command interface is prone to firmware split-brain problems, as
+in newer ThinkPads it is just a compatibility layer. Do not use it, it is
+exported just as a debug tool.
+
+
+LED control
+-----------
+
+procfs: /proc/acpi/ibm/led
+sysfs attributes: as per LED class, see below for names
+
+Some of the LED indicators can be controlled through this feature. On
+some older ThinkPad models, it is possible to query the status of the
+LED indicators as well. Newer ThinkPads cannot query the real status
+of the LED indicators.
+
+Because misuse of the LEDs could induce an unaware user to perform
+dangerous actions (like undocking or ejecting a bay device while the
+buses are still active), or mask an important alarm (such as a nearly
+empty battery, or a broken battery), access to most LEDs is
+restricted.
+
+Unrestricted access to all LEDs requires that thinkpad-acpi be
+compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
+Distributions must never enable this option. Individual users that
+are aware of the consequences are welcome to enabling it.
+
+Audio mute and microphone mute LEDs are supported, but currently not
+visible to userspace. They are used by the snd-hda-intel audio driver.
+
+procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+ echo '<LED number> on' >/proc/acpi/ibm/led
+ echo '<LED number> off' >/proc/acpi/ibm/led
+ echo '<LED number> blink' >/proc/acpi/ibm/led
+
+The <LED number> range is 0 to 15. The set of LEDs that can be
+controlled varies from model to model. Here is the common ThinkPad
+mapping:
+
+ - 0 - power
+ - 1 - battery (orange)
+ - 2 - battery (green)
+ - 3 - UltraBase/dock
+ - 4 - UltraBay
+ - 5 - UltraBase battery slot
+ - 6 - (unknown)
+ - 7 - standby
+ - 8 - dock status 1
+ - 9 - dock status 2
+ - 10, 11 - (unknown)
+ - 12 - thinkvantage
+ - 13, 14, 15 - (unknown)
+
+All of the above can be turned on and off and can be made to blink.
+
+sysfs notes
+^^^^^^^^^^^
+
+The ThinkPad LED sysfs interface is described in detail by the LED class
+documentation, in Documentation/leds/leds-class.rst.
+
+The LEDs are named (in LED ID order, from 0 to 12):
+"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
+"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
+"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
+"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
+"tpacpi::thinkvantage".
+
+Due to limitations in the sysfs LED class, if the status of the LED
+indicators cannot be read due to an error, thinkpad-acpi will report it as
+a brightness of zero (same as LED off).
+
+If the thinkpad firmware doesn't support reading the current status,
+trying to read the current LED brightness will just return whatever
+brightness was last written to that attribute.
+
+These LEDs can blink using hardware acceleration. To request that a
+ThinkPad indicator LED should blink in hardware accelerated mode, use the
+"timer" trigger, and leave the delay_on and delay_off parameters set to
+zero (to request hardware acceleration autodetection).
+
+LEDs that are known not to exist in a given ThinkPad model are not
+made available through the sysfs interface. If you have a dock and you
+notice there are LEDs listed for your ThinkPad that do not exist (and
+are not in the dock), or if you notice that there are missing LEDs,
+a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
+
+
+ACPI sounds -- /proc/acpi/ibm/beep
+----------------------------------
+
+The BEEP method is used internally by the ACPI firmware to provide
+audible alerts in various situations. This feature allows the same
+sounds to be triggered manually.
+
+The commands are non-negative integer numbers::
+
+ echo <number> >/proc/acpi/ibm/beep
+
+The valid <number> range is 0 to 17. Not all numbers trigger sounds
+and the sounds vary from model to model. Here is the behavior on the
+X40:
+
+ - 0 - stop a sound in progress (but use 17 to stop 16)
+ - 2 - two beeps, pause, third beep ("low battery")
+ - 3 - single beep
+ - 4 - high, followed by low-pitched beep ("unable")
+ - 5 - single beep
+ - 6 - very high, followed by high-pitched beep ("AC/DC")
+ - 7 - high-pitched beep
+ - 9 - three short beeps
+ - 10 - very long beep
+ - 12 - low-pitched beep
+ - 15 - three high-pitched beeps repeating constantly, stop with 0
+ - 16 - one medium-pitched beep repeating constantly, stop with 17
+ - 17 - stop 16
+
+
+Temperature sensors
+-------------------
+
+procfs: /proc/acpi/ibm/thermal
+
+sysfs device attributes: (hwmon "thinkpad") temp*_input
+
+Most ThinkPads include six or more separate temperature sensors but only
+expose the CPU temperature through the standard ACPI methods. This
+feature shows readings from up to eight different sensors on older
+ThinkPads, and up to sixteen different sensors on newer ThinkPads.
+
+For example, on the X40, a typical output may be:
+
+temperatures:
+ 42 42 45 41 36 -128 33 -128
+
+On the T43/p, a typical output may be:
+
+temperatures:
+ 48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
+
+The mapping of thermal sensors to physical locations varies depending on
+system-board model (and thus, on ThinkPad model).
+
+https://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
+tries to track down these locations for various models.
+
+Most (newer?) models seem to follow this pattern:
+
+- 1: CPU
+- 2: (depends on model)
+- 3: (depends on model)
+- 4: GPU
+- 5: Main battery: main sensor
+- 6: Bay battery: main sensor
+- 7: Main battery: secondary sensor
+- 8: Bay battery: secondary sensor
+- 9-15: (depends on model)
+
+For the R51 (source: Thomas Gruber):
+
+- 2: Mini-PCI
+- 3: Internal HDD
+
+For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
+https://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
+
+- 2: System board, left side (near PCMCIA slot), reported as HDAPS temp
+- 3: PCMCIA slot
+- 9: MCH (northbridge) to DRAM Bus
+- 10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
+ card, under touchpad
+- 11: Power regulator, underside of system board, below F2 key
+
+The A31 has a very atypical layout for the thermal sensors
+(source: Milos Popovic, https://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
+
+- 1: CPU
+- 2: Main Battery: main sensor
+- 3: Power Converter
+- 4: Bay Battery: main sensor
+- 5: MCH (northbridge)
+- 6: PCMCIA/ambient
+- 7: Main Battery: secondary sensor
+- 8: Bay Battery: secondary sensor
+
+
+Procfs notes
+^^^^^^^^^^^^
+
+ Readings from sensors that are not available return -128.
+ No commands can be written to this file.
+
+Sysfs notes
+^^^^^^^^^^^
+
+ Sensors that are not available return the ENXIO error. This
+ status may change at runtime, as there are hotplug thermal
+ sensors, like those inside the batteries and docks.
+
+ thinkpad-acpi thermal sensors are reported through the hwmon
+ subsystem, and follow all of the hwmon guidelines at
+ Documentation/hwmon.
+
+EXPERIMENTAL: Embedded controller register dump
+-----------------------------------------------
+
+This feature is not included in the thinkpad driver anymore.
+Instead the EC can be accessed through /sys/kernel/debug/ec with
+a userspace tool which can be found here:
+ftp://ftp.suse.com/pub/people/trenn/sources/ec
+
+Use it to determine the register holding the fan
+speed on some models. To do that, do the following:
+
+ - make sure the battery is fully charged
+ - make sure the fan is running
+ - use above mentioned tool to read out the EC
+
+Often fan and temperature values vary between
+readings. Since temperatures don't change vary fast, you can take
+several quick dumps to eliminate them.
+
+You can use a similar method to figure out the meaning of other
+embedded controller registers - e.g. make sure nothing else changes
+except the charging or discharging battery to determine which
+registers contain the current battery capacity, etc. If you experiment
+with this, do send me your results (including some complete dumps with
+a description of the conditions when they were taken.)
+
+
+LCD brightness control
+----------------------
+
+procfs: /proc/acpi/ibm/brightness
+
+sysfs backlight device "thinkpad_screen"
+
+This feature allows software control of the LCD brightness on ThinkPad
+models which don't have a hardware brightness slider.
+
+It has some limitations: the LCD backlight cannot be actually turned
+on or off by this interface, it just controls the backlight brightness
+level.
+
+On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
+has eight brightness levels, ranging from 0 to 7. Some of the levels
+may not be distinct. Later Lenovo models that implement the ACPI
+display backlight brightness control methods have 16 levels, ranging
+from 0 to 15.
+
+For IBM ThinkPads, there are two interfaces to the firmware for direct
+brightness control, EC and UCMS (or CMOS). To select which one should be
+used, use the brightness_mode module parameter: brightness_mode=1 selects
+EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
+mode with NVRAM backing (so that brightness changes are remembered across
+shutdown/reboot).
+
+The driver tries to select which interface to use from a table of
+defaults for each ThinkPad model. If it makes a wrong choice, please
+report this as a bug, so that we can fix it.
+
+Lenovo ThinkPads only support brightness_mode=2 (UCMS).
+
+When display backlight brightness controls are available through the
+standard ACPI interface, it is best to use it instead of this direct
+ThinkPad-specific interface. The driver will disable its native
+backlight brightness control interface if it detects that the standard
+ACPI interface is available in the ThinkPad.
+
+If you want to use the thinkpad-acpi backlight brightness control
+instead of the generic ACPI video backlight brightness control for some
+reason, you should use the acpi_backlight=vendor kernel parameter.
+
+The brightness_enable module parameter can be used to control whether
+the LCD brightness control feature will be enabled when available.
+brightness_enable=0 forces it to be disabled. brightness_enable=1
+forces it to be enabled when available, even if the standard ACPI
+interface is also available.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+ echo up >/proc/acpi/ibm/brightness
+ echo down >/proc/acpi/ibm/brightness
+ echo 'level <level>' >/proc/acpi/ibm/brightness
+
+Sysfs notes
+^^^^^^^^^^^
+
+The interface is implemented through the backlight sysfs class, which is
+poorly documented at this time.
+
+Locate the thinkpad_screen device under /sys/class/backlight, and inside
+it there will be the following attributes:
+
+ max_brightness:
+ Reads the maximum brightness the hardware can be set to.
+ The minimum is always zero.
+
+ actual_brightness:
+ Reads what brightness the screen is set to at this instant.
+
+ brightness:
+ Writes request the driver to change brightness to the
+ given value. Reads will tell you what brightness the
+ driver is trying to set the display to when "power" is set
+ to zero and the display has not been dimmed by a kernel
+ power management event.
+
+ power:
+ power management mode, where 0 is "display on", and 1 to 3
+ will dim the display backlight to brightness level 0
+ because thinkpad-acpi cannot really turn the backlight
+ off. Kernel power management events can temporarily
+ increase the current power management level, i.e. they can
+ dim the display.
+
+
+WARNING:
+
+ Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
+ interface and the ACPI-based backlight level change interface
+ (available on newer BIOSes, and driven by the Linux ACPI video driver)
+ at the same time. The two will interact in bad ways, do funny things,
+ and maybe reduce the life of the backlight lamps by needlessly kicking
+ its level up and down at every change.
+
+
+Volume control (Console Audio control)
+--------------------------------------
+
+procfs: /proc/acpi/ibm/volume
+
+ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
+
+NOTE: by default, the volume control interface operates in read-only
+mode, as it is supposed to be used for on-screen-display purposes.
+The read/write mode can be enabled through the use of the
+"volume_control=1" module parameter.
+
+NOTE: distros are urged to not enable volume_control by default, this
+should be done by the local admin only. The ThinkPad UI is for the
+console audio control to be done through the volume keys only, and for
+the desktop environment to just provide on-screen-display feedback.
+Software volume control should be done only in the main AC97/HDA
+mixer.
+
+
+About the ThinkPad Console Audio control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ThinkPads have a built-in amplifier and muting circuit that drives the
+console headphone and speakers. This circuit is after the main AC97
+or HDA mixer in the audio path, and under exclusive control of the
+firmware.
+
+ThinkPads have three special hotkeys to interact with the console
+audio control: volume up, volume down and mute.
+
+It is worth noting that the normal way the mute function works (on
+ThinkPads that do not have a "mute LED") is:
+
+1. Press mute to mute. It will *always* mute, you can press it as
+ many times as you want, and the sound will remain mute.
+
+2. Press either volume key to unmute the ThinkPad (it will _not_
+ change the volume, it will just unmute).
+
+This is a very superior design when compared to the cheap software-only
+mute-toggle solution found on normal consumer laptops: you can be
+absolutely sure the ThinkPad will not make noise if you press the mute
+button, no matter the previous state.
+
+The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
+amplifiers driving the speakers and headphone output, and the firmware
+also handles volume control for the headphone and speakers on these
+ThinkPads without any help from the operating system (this volume
+control stage exists after the main AC97 or HDA mixer in the audio
+path).
+
+The newer Lenovo models only have firmware mute control, and depend on
+the main HDA mixer to do volume control (which is done by the operating
+system). In this case, the volume keys are filtered out for unmute
+key press (there are some firmware bugs in this area) and delivered as
+normal key presses to the operating system (thinkpad-acpi is not
+involved).
+
+
+The ThinkPad-ACPI volume control
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The preferred way to interact with the Console Audio control is the
+ALSA interface.
+
+The legacy procfs interface allows one to read the current state,
+and if volume control is enabled, accepts the following commands::
+
+ echo up >/proc/acpi/ibm/volume
+ echo down >/proc/acpi/ibm/volume
+ echo mute >/proc/acpi/ibm/volume
+ echo unmute >/proc/acpi/ibm/volume
+ echo 'level <level>' >/proc/acpi/ibm/volume
+
+The <level> number range is 0 to 14 although not all of them may be
+distinct. To unmute the volume after the mute command, use either the
+up or down command (the level command will not unmute the volume), or
+the unmute command.
+
+You can use the volume_capabilities parameter to tell the driver
+whether your thinkpad has volume control or mute-only control:
+volume_capabilities=1 for mixers with mute and volume control,
+volume_capabilities=2 for mixers with only mute control.
+
+If the driver misdetects the capabilities for your ThinkPad model,
+please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
+can update the driver.
+
+There are two strategies for volume control. To select which one
+should be used, use the volume_mode module parameter: volume_mode=1
+selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
+(so that volume/mute changes are remembered across shutdown/reboot).
+
+The driver will operate in volume_mode=3 by default. If that does not
+work well on your ThinkPad model, please report this to
+ibm-acpi-devel@lists.sourceforge.net.
+
+The driver supports the standard ALSA module parameters. If the ALSA
+mixer is disabled, the driver will disable all volume functionality.
+
+
+Fan control and monitoring: fan speed, fan enable/disable
+---------------------------------------------------------
+
+procfs: /proc/acpi/ibm/fan
+
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, pwm1_enable, fan2_input
+
+sysfs hwmon driver attributes: fan_watchdog
+
+NOTE NOTE NOTE:
+ fan control operations are disabled by default for
+ safety reasons. To enable them, the module parameter "fan_control=1"
+ must be given to thinkpad-acpi.
+
+This feature attempts to show the current fan speed, control mode and
+other fan data that might be available. The speed is read directly
+from the hardware registers of the embedded controller. This is known
+to work on later R, T, X and Z series ThinkPads but may show a bogus
+value on other models.
+
+Some Lenovo ThinkPads support a secondary fan. This fan cannot be
+controlled separately, it shares the main fan control.
+
+Fan levels
+^^^^^^^^^^
+
+Most ThinkPad fans work in "levels" at the firmware interface. Level 0
+stops the fan. The higher the level, the higher the fan speed, although
+adjacent levels often map to the same fan speed. 7 is the highest
+level, where the fan reaches the maximum recommended speed.
+
+Level "auto" means the EC changes the fan level according to some
+internal algorithm, usually based on readings from the thermal sensors.
+
+There is also a "full-speed" level, also known as "disengaged" level.
+In this level, the EC disables the speed-locked closed-loop fan control,
+and drives the fan as fast as it can go, which might exceed hardware
+limits, so use this level with caution.
+
+The fan usually ramps up or down slowly from one speed to another, and
+it is normal for the EC to take several seconds to react to fan
+commands. The full-speed level may take up to two minutes to ramp up to
+maximum speed, and in some ThinkPads, the tachometer readings go stale
+while the EC is transitioning to the full-speed level.
+
+WARNING WARNING WARNING: do not leave the fan disabled unless you are
+monitoring all of the temperature sensor readings and you are ready to
+enable it if necessary to avoid overheating.
+
+An enabled fan in level "auto" may stop spinning if the EC decides the
+ThinkPad is cool enough and doesn't need the extra airflow. This is
+normal, and the EC will spin the fan up if the various thermal readings
+rise too much.
+
+On the X40, this seems to depend on the CPU and HDD temperatures.
+Specifically, the fan is turned on when either the CPU temperature
+climbs to 56 degrees or the HDD temperature climbs to 46 degrees. The
+fan is turned off when the CPU temperature drops to 49 degrees and the
+HDD temperature drops to 41 degrees. These thresholds cannot
+currently be controlled.
+
+The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
+certain conditions are met. It will override any fan programming done
+through thinkpad-acpi.
+
+The thinkpad-acpi kernel driver can be programmed to revert the fan
+level to a safe setting if userspace does not issue one of the procfs
+fan commands: "enable", "disable", "level" or "watchdog", or if there
+are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
+set to 1, manual mode) within a configurable amount of time of up to
+120 seconds. This functionality is called fan safety watchdog.
+
+Note that the watchdog timer stops after it enables the fan. It will be
+rearmed again automatically (using the same interval) when one of the
+above mentioned fan commands is received. The fan watchdog is,
+therefore, not suitable to protect against fan mode changes made through
+means other than the "enable", "disable", and "level" procfs fan
+commands, or the hwmon fan control sysfs interface.
+
+Procfs notes
+^^^^^^^^^^^^
+
+The fan may be enabled or disabled with the following commands::
+
+ echo enable >/proc/acpi/ibm/fan
+ echo disable >/proc/acpi/ibm/fan
+
+Placing a fan on level 0 is the same as disabling it. Enabling a fan
+will try to place it in a safe level if it is too slow or disabled.
+
+The fan level can be controlled with the command::
+
+ echo 'level <level>' > /proc/acpi/ibm/fan
+
+Where <level> is an integer from 0 to 7, or one of the words "auto" or
+"full-speed" (without the quotes). Not all ThinkPads support the "auto"
+and "full-speed" levels. The driver accepts "disengaged" as an alias for
+"full-speed", and reports it as "disengaged" for backwards
+compatibility.
+
+On the X31 and X40 (and ONLY on those models), the fan speed can be
+controlled to a certain degree. Once the fan is running, it can be
+forced to run faster or slower with the following command::
+
+ echo 'speed <speed>' > /proc/acpi/ibm/fan
+
+The sustainable range of fan speeds on the X40 appears to be from about
+3700 to about 7350. Values outside this range either do not have any
+effect or the fan speed eventually settles somewhere in that range. The
+fan cannot be stopped or started with this command. This functionality
+is incomplete, and not available through the sysfs interface.
+
+To program the safety watchdog, use the "watchdog" command::
+
+ echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
+
+If you want to disable the watchdog, use 0 as the interval.
+
+Sysfs notes
+^^^^^^^^^^^
+
+The sysfs interface follows the hwmon subsystem guidelines for the most
+part, and the exception is the fan safety watchdog.
+
+Writes to any of the sysfs attributes may return the EINVAL error if
+that operation is not supported in a given ThinkPad or if the parameter
+is out-of-bounds, and EPERM if it is forbidden. They may also return
+EINTR (interrupted system call), and EIO (I/O error while trying to talk
+to the firmware).
+
+Features not yet implemented by the driver return ENOSYS.
+
+hwmon device attribute pwm1_enable:
+ - 0: PWM offline (fan is set to full-speed mode)
+ - 1: Manual PWM control (use pwm1 to set fan level)
+ - 2: Hardware PWM control (EC "auto" mode)
+ - 3: reserved (Software PWM control, not implemented yet)
+
+ Modes 0 and 2 are not supported by all ThinkPads, and the
+ driver is not always able to detect this. If it does know a
+ mode is unsupported, it will return -EINVAL.
+
+hwmon device attribute pwm1:
+ Fan level, scaled from the firmware values of 0-7 to the hwmon
+ scale of 0-255. 0 means fan stopped, 255 means highest normal
+ speed (level 7).
+
+ This attribute only commands the fan if pmw1_enable is set to 1
+ (manual PWM control).
+
+hwmon device attribute fan1_input:
+ Fan tachometer reading, in RPM. May go stale on certain
+ ThinkPads while the EC transitions the PWM to offline mode,
+ which can take up to two minutes. May return rubbish on older
+ ThinkPads.
+
+hwmon device attribute fan2_input:
+ Fan tachometer reading, in RPM, for the secondary fan.
+ Available only on some ThinkPads. If the secondary fan is
+ not installed, will always read 0.
+
+hwmon driver attribute fan_watchdog:
+ Fan safety watchdog timer interval, in seconds. Minimum is
+ 1 second, maximum is 120 seconds. 0 disables the watchdog.
+
+To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
+
+To start the fan in a safe mode: set pwm1_enable to 2. If that fails
+with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
+would be the safest choice, though).
+
+
+WAN
+---
+
+procfs: /proc/acpi/ibm/wan
+
+sysfs device attribute: wwan_enable (deprecated)
+
+sysfs rfkill class: switch "tpacpi_wwan_sw"
+
+This feature shows the presence and current state of the built-in
+Wireless WAN device.
+
+If the ThinkPad supports it, the WWAN state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+It was tested on a Lenovo ThinkPad X60. It should probably work on other
+ThinkPad models which come with this module installed.
+
+Procfs notes
+^^^^^^^^^^^^
+
+If the W-WAN card is installed, the following commands can be used::
+
+ echo enable > /proc/acpi/ibm/wan
+ echo disable > /proc/acpi/ibm/wan
+
+Sysfs notes
+^^^^^^^^^^^
+
+ If the W-WAN card is installed, it can be enabled /
+ disabled through the "wwan_enable" thinkpad-acpi device
+ attribute, and its current status can also be queried.
+
+ enable:
+ - 0: disables WWAN card / WWAN card is disabled
+ - 1: enables WWAN card / WWAN card is enabled.
+
+ Note: this interface has been superseded by the generic rfkill
+ class. It has been deprecated, and it will be removed in year
+ 2010.
+
+ rfkill controller switch "tpacpi_wwan_sw": refer to
+ Documentation/driver-api/rfkill.rst for details.
+
+
+LCD Shadow control
+------------------
+
+procfs: /proc/acpi/ibm/lcdshadow
+
+Some newer T480s and T490s ThinkPads provide a feature called
+PrivacyGuard. By turning this feature on, the usable vertical and
+horizontal viewing angles of the LCD can be limited (as if some privacy
+screen was applied manually in front of the display).
+
+procfs notes
+^^^^^^^^^^^^
+
+The available commands are::
+
+ echo '0' >/proc/acpi/ibm/lcdshadow
+ echo '1' >/proc/acpi/ibm/lcdshadow
+
+The first command ensures the best viewing angle and the latter one turns
+on the feature, restricting the viewing angles.
+
+
+DYTC Lapmode sensor
+-------------------
+
+sysfs: dytc_lapmode
+
+Newer thinkpads and mobile workstations have the ability to determine if
+the device is in deskmode or lapmode. This feature is used by user space
+to decide if WWAN transmission can be increased to maximum power and is
+also useful for understanding the different thermal modes available as
+they differ between desk and lap mode.
+
+The property is read-only. If the platform doesn't have support the sysfs
+class is not created.
+
+EXPERIMENTAL: UWB
+-----------------
+
+This feature is considered EXPERIMENTAL because it has not been extensively
+tested and validated in various ThinkPad models yet. The feature may not
+work as expected. USE WITH CAUTION! To use this feature, you need to supply
+the experimental=1 parameter when loading the module.
+
+sysfs rfkill class: switch "tpacpi_uwb_sw"
+
+This feature exports an rfkill controller for the UWB device, if one is
+present and enabled in the BIOS.
+
+Sysfs notes
+^^^^^^^^^^^
+
+ rfkill controller switch "tpacpi_uwb_sw": refer to
+ Documentation/driver-api/rfkill.rst for details.
+
+
+Setting keyboard language
+-------------------------
+
+sysfs: keyboard_lang
+
+This feature is used to set keyboard language to ECFW using ASL interface.
+Fewer thinkpads models like T580 , T590 , T15 Gen 1 etc.. has "=", "(',
+")" numeric keys, which are not displaying correctly, when keyboard language
+is other than "english". This is because the default keyboard language in ECFW
+is set as "english". Hence using this sysfs, user can set the correct keyboard
+language to ECFW and then these key's will work correctly.
+
+Example of command to set keyboard language is mentioned below::
+
+ echo jp > /sys/devices/platform/thinkpad_acpi/keyboard_lang
+
+Text corresponding to keyboard layout to be set in sysfs are: be(Belgian),
+cz(Czech), da(Danish), de(German), en(English), es(Spain), et(Estonian),
+fr(French), fr-ch(French(Switzerland)), hu(Hungarian), it(Italy), jp (Japan),
+nl(Dutch), nn(Norway), pl(Polish), pt(portuguese), sl(Slovenian), sv(Sweden),
+tr(Turkey)
+
+WWAN Antenna type
+-----------------
+
+sysfs: wwan_antenna_type
+
+On some newer Thinkpads we need to set SAR value based on the antenna
+type. This interface will be used by userspace to get the antenna type
+and set the corresponding SAR value, as is required for FCC certification.
+
+The available commands are::
+
+ cat /sys/devices/platform/thinkpad_acpi/wwan_antenna_type
+
+Currently 2 antenna types are supported as mentioned below:
+- type a
+- type b
+
+The property is read-only. If the platform doesn't have support the sysfs
+class is not created.
+
+Auxmac
+------
+
+sysfs: auxmac
+
+Some newer Thinkpads have a feature called MAC Address Pass-through. This
+feature is implemented by the system firmware to provide a system unique MAC,
+that can override a dock or USB ethernet dongle MAC, when connected to a
+network. This property enables user-space to easily determine the MAC address
+if the feature is enabled.
+
+The values of this auxiliary MAC are:
+
+ cat /sys/devices/platform/thinkpad_acpi/auxmac
+
+If the feature is disabled, the value will be 'disabled'.
+
+This property is read-only.
+
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+- 0 = Home mode
+- 1 = Web-browser mode
+- 2 = Web-conference mode
+- 3 = Function mode
+- 4 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+https://download.lenovo.com/ibmdl/pub/pc/pccbbs/mobiles_pdf/x1carbon_2_ug_en.pdf
+
+Battery charge control
+----------------------
+
+sysfs attributes:
+/sys/class/power_supply/BAT*/charge_control_{start,end}_threshold
+
+These two attributes are created for those batteries that are supported by the
+driver. They enable the user to control the battery charge thresholds of the
+given battery. Both values may be read and set. `charge_control_start_threshold`
+accepts an integer between 0 and 99 (inclusive); this value represents a battery
+percentage level, below which charging will begin. `charge_control_end_threshold`
+accepts an integer between 1 and 100 (inclusive); this value represents a battery
+percentage level, above which charging will stop.
+
+The exact semantics of the attributes may be found in
+Documentation/ABI/testing/sysfs-class-power.
+
+Multiple Commands, Module Parameters
+------------------------------------
+
+Multiple commands can be written to the proc files in one shot by
+separating them with commas, for example::
+
+ echo enable,0xffff > /proc/acpi/ibm/hotkey
+ echo lcd_disable,crt_enable > /proc/acpi/ibm/video
+
+Commands can also be specified when loading the thinkpad-acpi module,
+for example::
+
+ modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
+
+
+Enabling debugging output
+-------------------------
+
+The module takes a debug parameter which can be used to selectively
+enable various classes of debugging output, for example::
+
+ modprobe thinkpad_acpi debug=0xffff
+
+will enable all debugging output classes. It takes a bitmask, so
+to enable more than one output class, just add their values.
+
+ ============= ======================================
+ Debug bitmask Description
+ ============= ======================================
+ 0x8000 Disclose PID of userspace programs
+ accessing some functions of the driver
+ 0x0001 Initialization and probing
+ 0x0002 Removal
+ 0x0004 RF Transmitter control (RFKILL)
+ (bluetooth, WWAN, UWB...)
+ 0x0008 HKEY event interface, hotkeys
+ 0x0010 Fan control
+ 0x0020 Backlight brightness
+ 0x0040 Audio mixer/volume control
+ ============= ======================================
+
+There is also a kernel build option to enable more debugging
+information, which may be necessary to debug driver problems.
+
+The level of debugging information output by the driver can be changed
+at runtime through sysfs, using the driver attribute debug_level. The
+attribute takes the same bitmask as the debug module parameter above.
+
+
+Force loading of module
+-----------------------
+
+If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
+the module parameter force_load=1. Regardless of whether this works or
+not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
+
+
+Sysfs interface changelog
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========= ===============================================================
+0x000100: Initial sysfs support, as a single platform driver and
+ device.
+0x000200: Hot key support for 32 hot keys, and radio slider switch
+ support.
+0x010000: Hot keys are now handled by default over the input
+ layer, the radio switch generates input event EV_RADIO,
+ and the driver enables hot key handling by default in
+ the firmware.
+
+0x020000: ABI fix: added a separate hwmon platform device and
+ driver, which must be located by name (thinkpad)
+ and the hwmon class for libsensors4 (lm-sensors 3)
+ compatibility. Moved all hwmon attributes to this
+ new platform device.
+
+0x020100: Marker for thinkpad-acpi with hot key NVRAM polling
+ support. If you must, use it to know you should not
+ start a userspace NVRAM poller (allows to detect when
+ NVRAM is compiled out by the user because it is
+ unneeded/undesired in the first place).
+0x020101: Marker for thinkpad-acpi with hot key NVRAM polling
+ and proper hotkey_mask semantics (version 8 of the
+ NVRAM polling patch). Some development snapshots of
+ 0.18 had an earlier version that did strange things
+ to hotkey_mask.
+
+0x020200: Add poll()/select() support to the following attributes:
+ hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
+
+0x020300: hotkey enable/disable support removed, attributes
+ hotkey_bios_enabled and hotkey_enable deprecated and
+ marked for removal.
+
+0x020400: Marker for 16 LEDs support. Also, LEDs that are known
+ to not exist in a given model are not registered with
+ the LED sysfs class anymore.
+
+0x020500: Updated hotkey driver, hotkey_mask is always available
+ and it is always able to disable hot keys. Very old
+ thinkpads are properly supported. hotkey_bios_mask
+ is deprecated and marked for removal.
+
+0x020600: Marker for backlight change event support.
+
+0x020700: Support for mute-only mixers.
+ Volume control in read-only mode by default.
+ Marker for ALSA mixer support.
+
+0x030000: Thermal and fan sysfs attributes were moved to the hwmon
+ device instead of being attached to the backing platform
+ device.
+========= ===============================================================
diff --git a/Documentation/admin-guide/laptops/toshiba_haps.rst b/Documentation/admin-guide/laptops/toshiba_haps.rst
new file mode 100644
index 000000000000..d28b6c3f2849
--- /dev/null
+++ b/Documentation/admin-guide/laptops/toshiba_haps.rst
@@ -0,0 +1,87 @@
+====================================
+Toshiba HDD Active Protection Sensor
+====================================
+
+Kernel driver: toshiba_haps
+
+Author: Azael Avalos <coproscefalo@gmail.com>
+
+
+.. 0. Contents
+
+ 1. Description
+ 2. Interface
+ 3. Accelerometer axes
+ 4. Supported devices
+ 5. Usage
+
+
+1. Description
+--------------
+
+This driver provides support for the accelerometer found in various Toshiba
+laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
+and detects laptops automatically with this device.
+On Windows, Toshiba provided software monitors this device and provides
+automatic HDD protection (head unload) on sudden moves or harsh vibrations,
+however, this driver only provides a notification via a sysfs file to let
+userspace tools or daemons act accordingly, as well as providing a sysfs
+file to set the desired protection level or sensor sensibility.
+
+
+2. Interface
+------------
+
+This device comes with 3 methods:
+
+==== =====================================================================
+_STA Checks existence of the device, returning Zero if the device does not
+ exists or is not supported.
+PTLV Sets the desired protection level.
+RSSS Shuts down the HDD protection interface for a few seconds,
+ then restores normal operation.
+==== =====================================================================
+
+Note:
+ The presence of Solid State Drives (SSD) can make this driver to fail loading,
+ given the fact that such drives have no movable parts, and thus, not requiring
+ any "protection" as well as failing during the evaluation of the _STA method
+ found under this device.
+
+
+3. Accelerometer axes
+---------------------
+
+This device does not report any axes, however, to query the sensor position
+a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
+provided to query such information, handled by the kernel module toshiba_acpi
+since kernel version 3.15.
+
+
+4. Supported devices
+--------------------
+
+This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
+with this device is supported, given the fact that they have the presence of
+conventional HDD and not only SSD, or a combination of both HDD and SSD.
+
+
+5. Usage
+--------
+
+The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
+
+================ ============================================================
+protection_level The protection_level is readable and writeable, and
+ provides a way to let userspace query the current protection
+ level, as well as set the desired protection level, the
+ available protection levels are::
+
+ ============ ======= ========== ========
+ 0 - Disabled 1 - Low 2 - Medium 3 - High
+ ============ ======= ========== ========
+
+reset_protection The reset_protection entry is writeable only, being "1"
+ the only parameter it accepts, it is used to trigger
+ a reset of the protection interface.
+================ ============================================================
diff --git a/Documentation/admin-guide/lcd-panel-cgram.rst b/Documentation/admin-guide/lcd-panel-cgram.rst
new file mode 100644
index 000000000000..a3eb00c62f53
--- /dev/null
+++ b/Documentation/admin-guide/lcd-panel-cgram.rst
@@ -0,0 +1,27 @@
+======================================
+Parallel port LCD/Keypad Panel support
+======================================
+
+Some LCDs allow you to define up to 8 characters, mapped to ASCII
+characters 0 to 7. The escape code to define a new character is
+'\e[LG' followed by one digit from 0 to 7, representing the character
+number, and up to 8 couples of hex digits terminated by a semi-colon
+(';'). Each couple of digits represents a line, with 1-bits for each
+illuminated pixel with LSB on the right. Lines are numbered from the
+top of the character to the bottom. On a 5x7 matrix, only the 5 lower
+bits of the 7 first bytes are used for each character. If the string
+is incomplete, only complete lines will be redefined. Here are some
+examples::
+
+ printf "\e[LG0010101050D1F0C04;" => 0 = [enter]
+ printf "\e[LG1040E1F0000000000;" => 1 = [up]
+ printf "\e[LG2000000001F0E0400;" => 2 = [down]
+ printf "\e[LG3040E1F001F0E0400;" => 3 = [up-down]
+ printf "\e[LG40002060E1E0E0602;" => 4 = [left]
+ printf "\e[LG500080C0E0F0E0C08;" => 5 = [right]
+ printf "\e[LG60016051516141400;" => 6 = "IP"
+
+ printf "\e[LG00103071F1F070301;" => big speaker
+ printf "\e[LG00002061E1E060200;" => small speaker
+
+Willy
diff --git a/Documentation/ldm.txt b/Documentation/admin-guide/ldm.rst
index 12c571368e73..12c571368e73 100644
--- a/Documentation/ldm.txt
+++ b/Documentation/admin-guide/ldm.rst
diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst
new file mode 100644
index 000000000000..3e09284a8b9b
--- /dev/null
+++ b/Documentation/admin-guide/lockup-watchdogs.rst
@@ -0,0 +1,83 @@
+===============================================================
+Softlockup detector and hardlockup detector (aka nmi_watchdog)
+===============================================================
+
+The Linux kernel can act as a watchdog to detect both soft and hard
+lockups.
+
+A 'softlockup' is defined as a bug that causes the kernel to loop in
+kernel mode for more than 20 seconds (see "Implementation" below for
+details), without giving other tasks a chance to run. The current
+stack trace is displayed upon detection and, by default, the system
+will stay locked up. Alternatively, the kernel can be configured to
+panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
+"softlockup_panic" (see "Documentation/admin-guide/kernel-parameters.rst" for
+details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are
+provided for this.
+
+A 'hardlockup' is defined as a bug that causes the CPU to loop in
+kernel mode for more than 10 seconds (see "Implementation" below for
+details), without letting other interrupts have a chance to run.
+Similarly to the softlockup case, the current stack trace is displayed
+upon detection and the system will stay locked up unless the default
+behavior is changed, which can be done through a sysctl,
+'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC",
+and a kernel parameter, "nmi_watchdog"
+(see "Documentation/admin-guide/kernel-parameters.rst" for details).
+
+The panic option can be used in combination with panic_timeout (this
+timeout is set through the confusingly named "kernel.panic" sysctl),
+to cause the system to reboot automatically after a specified amount
+of time.
+
+Implementation
+==============
+
+The soft and hard lockup detectors are built on top of the hrtimer and
+perf subsystems, respectively. A direct consequence of this is that,
+in principle, they should work in any architecture where these
+subsystems are present.
+
+A periodic hrtimer runs to generate interrupts and kick the watchdog
+job. An NMI perf event is generated every "watchdog_thresh"
+(compile-time initialized to 10 and configurable through sysctl of the
+same name) seconds to check for hardlockups. If any CPU in the system
+does not receive any hrtimer interrupt during that time the
+'hardlockup detector' (the handler for the NMI perf event) will
+generate a kernel warning or call panic, depending on the
+configuration.
+
+The watchdog job runs in a stop scheduling thread that updates a
+timestamp every time it is scheduled. If that timestamp is not updated
+for 2*watchdog_thresh seconds (the softlockup threshold) the
+'softlockup detector' (coded inside the hrtimer callback function)
+will dump useful debug information to the system log, after which it
+will call panic if it was instructed to do so or resume execution of
+other kernel code.
+
+The period of the hrtimer is 2*watchdog_thresh/5, which means it has
+two or three chances to generate an interrupt before the hardlockup
+detector kicks in.
+
+As explained above, a kernel knob is provided that allows
+administrators to configure the period of the hrtimer and the perf
+event. The right value for a particular environment is a trade-off
+between fast response to lockups and detection overhead.
+
+By default, the watchdog runs on all online cores. However, on a
+kernel configured with NO_HZ_FULL, by default the watchdog runs only
+on the housekeeping cores, not the cores specified in the "nohz_full"
+boot argument. If we allowed the watchdog to run by default on
+the "nohz_full" cores, we would have to run timer ticks to activate
+the scheduler, which would prevent the "nohz_full" functionality
+from protecting the user code on those cores from the kernel.
+Of course, disabling it by default on the nohz_full cores means that
+when those cores do enter the kernel, by default we will not be
+able to detect if they lock up. However, allowing the watchdog
+to continue to run on the housekeeping (non-tickless) cores means
+that we will continue to detect lockups properly on those cores.
+
+In either case, the set of cores excluded from running the watchdog
+may be adjusted via the kernel.watchdog_cpumask sysctl. For
+nohz_full cores, this may be useful for debugging a case where the
+kernel seems to be hanging on the nohz_full cores.
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
index 84de718f24a4..deed823eab01 100644
--- a/Documentation/admin-guide/md.rst
+++ b/Documentation/admin-guide/md.rst
@@ -5,7 +5,7 @@ Boot time assembly of RAID arrays
---------------------------------
Tools that manage md devices can be found at
- http://www.kernel.org/pub/linux/utils/raid/
+ https://www.kernel.org/pub/linux/utils/raid/
You can boot with your md device with the following kernel command
@@ -221,7 +221,7 @@ All md devices contain:
layout
The ``layout`` for the array for the particular level. This is
- simply a number that is interpretted differently by different
+ simply a number that is interpreted differently by different
levels. It can be written while assembling an array.
array_size
@@ -317,7 +317,7 @@ All md devices contain:
suspended (not supported yet)
All IO requests will block. The array can be reconfigured.
- Writing this, if accepted, will block until array is quiessent
+ Writing this, if accepted, will block until array is quiescent
readonly
no resync can happen. no superblocks get written.
@@ -347,6 +347,54 @@ All md devices contain:
active-idle
like active, but no writes have been seen for a while (safe_mode_delay).
+ consistency_policy
+ This indicates how the array maintains consistency in case of unexpected
+ shutdown. It can be:
+
+ none
+ Array has no redundancy information, e.g. raid0, linear.
+
+ resync
+ Full resync is performed and all redundancy is regenerated when the
+ array is started after unclean shutdown.
+
+ bitmap
+ Resync assisted by a write-intent bitmap.
+
+ journal
+ For raid4/5/6, journal device is used to log transactions and replay
+ after unclean shutdown.
+
+ ppl
+ For raid5 only, Partial Parity Log is used to close the write hole and
+ eliminate resync.
+
+ The accepted values when writing to this file are ``ppl`` and ``resync``,
+ used to enable and disable PPL.
+
+ uuid
+ This indicates the UUID of the array in the following format:
+ xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+
+ bitmap_type
+ [RW] When read, this file will display the current and available
+ bitmap for this array. The currently active bitmap will be enclosed
+ in [] brackets. Writing an bitmap name or ID to this file will switch
+ control of this array to that new bitmap. Note that writing a new
+ bitmap for created array is forbidden.
+
+ none
+ No bitmap
+ bitmap
+ The default internal bitmap
+ llbitmap
+ The lockless internal bitmap
+
+If bitmap_type is not none, then additional bitmap attributes bitmap/xxx or
+llbitmap/xxx will be created after md device KOBJ_CHANGE event.
+
+If bitmap_type is bitmap, then the md device will also contain:
+
bitmap/location
This indicates where the write-intent bitmap for the array is
stored.
@@ -401,31 +449,23 @@ All md devices contain:
once the array becomes non-degraded, and this fact has been
recorded in the metadata.
- consistency_policy
- This indicates how the array maintains consistency in case of unexpected
- shutdown. It can be:
+If bitmap_type is llbitmap, then the md device will also contain:
- none
- Array has no redundancy information, e.g. raid0, linear.
+ llbitmap/bits
+ This is read-only, show status of bitmap bits, the number of each
+ value.
- resync
- Full resync is performed and all redundancy is regenerated when the
- array is started after unclean shutdown.
+ llbitmap/metadata
+ This is read-only, show bitmap metadata, include chunksize, chunkshift,
+ chunks, offset and daemon_sleep.
- bitmap
- Resync assisted by a write-intent bitmap.
-
- journal
- For raid4/5/6, journal device is used to log transactions and replay
- after unclean shutdown.
-
- ppl
- For raid5 only, Partial Parity Log is used to close the write hole and
- eliminate resync.
-
- The accepted values when writing to this file are ``ppl`` and ``resync``,
- used to enable and disable PPL.
+ llbitmap/daemon_sleep
+ This is read-write, time in seconds that daemon function will be
+ triggered to clear dirty bits.
+ llbitmap/barrier_idle
+ This is read-write, time in seconds that page barrier will be idled,
+ means dirty bits in the page will be cleared.
As component devices are added to an md array, they appear in the ``md``
directory as new directories named::
@@ -754,5 +794,8 @@ These currently include:
journal_mode (currently raid5 only)
The cache mode for raid5. raid5 could include an extra disk for
- caching. The mode can be "write-throuth" and "write-back". The
+ caching. The mode can be "write-through" or "write-back". The
default is "write-through".
+
+ ppl_write_hint
+ NVMe stream ID to be set for each PPL write request.
diff --git a/Documentation/media/v4l-drivers/au0828-cardlist.rst b/Documentation/admin-guide/media/au0828-cardlist.rst
index bb87b7b36a83..aaaadc934e7a 100644
--- a/Documentation/media/v4l-drivers/au0828-cardlist.rst
+++ b/Documentation/admin-guide/media/au0828-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
AU0828 cards list
=================
diff --git a/Documentation/admin-guide/media/avermedia.rst b/Documentation/admin-guide/media/avermedia.rst
new file mode 100644
index 000000000000..93ff74002d20
--- /dev/null
+++ b/Documentation/admin-guide/media/avermedia.rst
@@ -0,0 +1,94 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Avermedia DVB-T on BT878 Release Notes
+======================================
+
+February 14th 2006
+
+.. note::
+
+ Several other Avermedia devices are supported. For a more
+ broader and updated content about that, please check:
+
+ https://linuxtv.org/wiki/index.php/AVerMedia
+
+The Avermedia DVB-T
+~~~~~~~~~~~~~~~~~~~
+
+The Avermedia DVB-T is a budget PCI DVB card. It has 3 inputs:
+
+* RF Tuner Input
+* Composite Video Input (RCA Jack)
+* SVIDEO Input (Mini-DIN)
+
+The RF Tuner Input is the input to the tuner module of the
+card. The Tuner is otherwise known as the "Frontend" . The
+Frontend of the Avermedia DVB-T is a Microtune 7202D. A timely
+post to the linux-dvb mailing list ascertained that the
+Microtune 7202D is supported by the sp887x driver which is
+found in the dvb-hw CVS module.
+
+The DVB-T card is based around the BT878 chip which is a very
+common multimedia bridge and often found on Analogue TV cards.
+There is no on-board MPEG2 decoder, which means that all MPEG2
+decoding must be done in software, or if you have one, on an
+MPEG2 hardware decoding card or chipset.
+
+
+Getting the card going
+~~~~~~~~~~~~~~~~~~~~~~
+
+At this stage, it has not been able to ascertain the
+functionality of the remaining device nodes in respect of the
+Avermedia DVBT. However, full functionality in respect of
+tuning, receiving and supplying the MPEG2 data stream is
+possible with the currently available versions of the driver.
+It may be possible that additional functionality is available
+from the card (i.e. viewing the additional analogue inputs
+that the card presents), but this has not been tested yet. If
+I get around to this, I'll update the document with whatever I
+find.
+
+To power up the card, load the following modules in the
+following order:
+
+* modprobe bttv (normally loaded automatically)
+* modprobe dvb-bt8xx (or place dvb-bt8xx in /etc/modules)
+
+Insertion of these modules into the running kernel will
+activate the appropriate DVB device nodes. It is then possible
+to start accessing the card with utilities such as scan, tzap,
+dvbstream etc.
+
+The frontend module sp887x.o, requires an external firmware.
+Please use the command "get_dvb_firmware sp887x" to download
+it. Then copy it to /usr/lib/hotplug/firmware or /lib/firmware/
+(depending on configuration of firmware hotplug).
+
+Known Limitations
+~~~~~~~~~~~~~~~~~
+
+At present I can say with confidence that the frontend tunes
+via /dev/dvb/adapter{x}/frontend0 and supplies an MPEG2 stream
+via /dev/dvb/adapter{x}/dvr0. I have not tested the
+functionality of any other part of the card yet. I will do so
+over time and update this document.
+
+There are some limitations in the i2c layer due to a returned
+error message inconsistency. Although this generates errors in
+dmesg and the system logs, it does not appear to affect the
+ability of the frontend to function correctly.
+
+Further Update
+~~~~~~~~~~~~~~
+
+dvbstream and VideoLAN Client on windows works a treat with
+DVB, in fact this is currently serving as my main way of
+viewing DVB-T at the moment. Additionally, VLC is happily
+decoding HDTV signals, although the PC is dropping the odd
+frame here and there - I assume due to processing capability -
+as all the decoding is being done under windows in software.
+
+Many thanks to Nigel Pearson for the updates to this document
+since the recent revision of the driver.
diff --git a/Documentation/admin-guide/media/bt8xx.rst b/Documentation/admin-guide/media/bt8xx.rst
new file mode 100644
index 000000000000..3589f6ab7e46
--- /dev/null
+++ b/Documentation/admin-guide/media/bt8xx.rst
@@ -0,0 +1,157 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+How to get the bt8xx cards working
+==================================
+
+Authors:
+ Richard Walker,
+ Jamie Honan,
+ Michael Hunold,
+ Manu Abraham,
+ Uwe Bugla,
+ Michael Krufky
+
+General information
+-------------------
+
+This class of cards has a bt878a as the PCI interface, and require the bttv
+driver for accessing the i2c bus and the gpio pins of the bt8xx chipset.
+
+Please see Documentation/admin-guide/media/bttv-cardlist.rst for a complete
+list of Cards based on the Conexant Bt8xx PCI bridge supported by the
+Linux Kernel.
+
+In order to be able to compile the kernel, some config options should be
+enabled::
+
+ ./scripts/config -e PCI
+ ./scripts/config -e INPUT
+ ./scripts/config -m I2C
+ ./scripts/config -m MEDIA_SUPPORT
+ ./scripts/config -e MEDIA_PCI_SUPPORT
+ ./scripts/config -e MEDIA_ANALOG_TV_SUPPORT
+ ./scripts/config -e MEDIA_DIGITAL_TV_SUPPORT
+ ./scripts/config -e MEDIA_RADIO_SUPPORT
+ ./scripts/config -e RC_CORE
+ ./scripts/config -m VIDEO_BT848
+ ./scripts/config -m DVB_BT8XX
+
+If you want to automatically support all possible variants of the Bt8xx
+cards, you should also do::
+
+ ./scripts/config -e MEDIA_SUBDRV_AUTOSELECT
+
+.. note::
+
+ Please use the following options with care as deselection of drivers which
+ are in fact necessary may result in DVB devices that cannot be tuned due
+ to lack of driver support.
+
+If your goal is to just support an specific board, you may, instead,
+disable MEDIA_SUBDRV_AUTOSELECT and manually select the frontend drivers
+required by your board. With that, you can save some RAM.
+
+You can do that by calling make xconfig/qconfig/menuconfig and look at
+the options on those menu options (only enabled if
+``Autoselect ancillary drivers`` is disabled:
+
+#) ``Device drivers`` => ``Multimedia support`` => ``Customize TV tuners``
+#) ``Device drivers`` => ``Multimedia support`` => ``Customize DVB frontends``
+
+Then, on each of the above menu, please select your card-specific
+frontend and tuner modules.
+
+
+Loading Modules
+---------------
+
+Regular case: If the bttv driver detects a bt8xx-based DVB card, all
+frontend and backend modules will be loaded automatically.
+
+Exceptions are:
+
+- Old TV cards without EEPROMs, sharing a common PCI subsystem ID;
+- Old TwinHan DST cards or clones with or without CA slot and not
+ containing an Eeprom.
+
+In the following cases overriding the PCI type detection for bttv and
+for dvb-bt8xx drivers by passing modprobe parameters may be necessary.
+
+Running TwinHan and Clones
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As shown at Documentation/admin-guide/media/bttv-cardlist.rst, TwinHan and
+clones use ``card=113`` modprobe parameter. So, in order to properly
+detect it for devices without EEPROM, you should use::
+
+ $ modprobe bttv card=113
+ $ modprobe dst
+
+Useful parameters for verbosity level and debugging the dst module::
+
+ verbose=0: messages are disabled
+ 1: only error messages are displayed
+ 2: notifications are displayed
+ 3: other useful messages are displayed
+ 4: debug setting
+ dst_addons=0: card is a free to air (FTA) card only
+ 0x20: card has a conditional access slot for scrambled channels
+ dst_algo=0: (default) Software tuning algorithm
+ 1: Hardware tuning algorithm
+
+
+The autodetected values are determined by the cards' "response string".
+
+In your logs see f. ex.: dst_get_device_id: Recognize [DSTMCI].
+
+For bug reports please send in a complete log with verbose=4 activated.
+Please also see Documentation/admin-guide/media/ci.rst.
+
+Running multiple cards
+~~~~~~~~~~~~~~~~~~~~~~
+
+See Documentation/admin-guide/media/bttv-cardlist.rst for a complete list of
+Card ID. Some examples:
+
+ =========================== ===
+ Brand name ID
+ =========================== ===
+ Pinnacle PCTV Sat 94
+ Nebula Electronics Digi TV 104
+ pcHDTV HD-2000 TV 112
+ Twinhan DST and clones 113
+ Avermedia AverTV DVB-T 77: 123
+ Avermedia AverTV DVB-T 761 124
+ DViCO FusionHDTV DVB-T Lite 128
+ DViCO FusionHDTV 5 Lite 135
+ =========================== ===
+
+.. note::
+
+ When you have multiple cards, the order of the card ID should
+ match the order where they're detected by the system. Please notice
+ that removing/inserting other PCI cards may change the detection
+ order.
+
+Example::
+
+ $ modprobe bttv card=113 card=135
+
+In case of further problems please subscribe and send questions to
+the mailing list: linux-media@vger.kernel.org.
+
+Probing the cards with broken PCI subsystem ID
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There are some TwinHan cards whose EEPROM has become corrupted for some
+reason. The cards do not have a correct PCI subsystem ID.
+Still, it is possible to force probing the cards with::
+
+ $ echo 109e 0878 $subvendor $subdevice > \
+ /sys/bus/pci/drivers/bt878/new_id
+
+The two numbers there are::
+
+ 109e: PCI_VENDOR_ID_BROOKTREE
+ 0878: PCI_DEVICE_ID_BROOKTREE_878
diff --git a/Documentation/media/v4l-drivers/bttv-cardlist.rst b/Documentation/admin-guide/media/bttv-cardlist.rst
index 8da27b924e01..8671d4f7ba7b 100644
--- a/Documentation/media/v4l-drivers/bttv-cardlist.rst
+++ b/Documentation/admin-guide/media/bttv-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
BTTV cards list
===============
@@ -10,7 +12,7 @@ BTTV cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- *** UNKNOWN/GENERIC ***
diff --git a/Documentation/media/v4l-drivers/bttv.rst b/Documentation/admin-guide/media/bttv.rst
index 5f35e2fb5afa..58cbaf6df694 100644
--- a/Documentation/media/v4l-drivers/bttv.rst
+++ b/Documentation/admin-guide/media/bttv.rst
@@ -1,46 +1,65 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
The bttv driver
===============
Release notes for bttv
----------------------
-You'll need at least these config options for bttv:
+You'll need at least these config options for bttv::
-.. code-block:: none
+ ./scripts/config -e PCI
+ ./scripts/config -m I2C
+ ./scripts/config -m INPUT
+ ./scripts/config -m MEDIA_SUPPORT
+ ./scripts/config -e MEDIA_PCI_SUPPORT
+ ./scripts/config -e MEDIA_ANALOG_TV_SUPPORT
+ ./scripts/config -e MEDIA_DIGITAL_TV_SUPPORT
+ ./scripts/config -e MEDIA_RADIO_SUPPORT
+ ./scripts/config -e RC_CORE
+ ./scripts/config -m VIDEO_BT848
- CONFIG_I2C=m
- CONFIG_I2C_ALGOBIT=m
- CONFIG_VIDEO_DEV=m
+If your board has digital TV, you'll also need::
-The latest bttv version is available from http://bytesex.org/bttv/
+ ./scripts/config -m DVB_BT8XX
+In this case, please see Documentation/admin-guide/media/bt8xx.rst
+for additional notes.
Make bttv work with your card
-----------------------------
-Just try "modprobe bttv" and see if that works.
+If you have bttv compiled and installed, just booting the Kernel
+should be enough for it to try probing it. However, depending
+on the model, the Kernel may require additional information about
+the hardware, as the device may not be able to provide such info
+directly to the Kernel.
If it doesn't bttv likely could not autodetect your card and needs some
insmod options. The most important insmod option for bttv is "card=n"
to select the correct card type. If you get video but no sound you've
very likely specified the wrong (or no) card type. A list of supported
-cards is in CARDLIST.bttv
+cards is in Documentation/admin-guide/media/bttv-cardlist.rst.
If bttv takes very long to load (happens sometimes with the cheap
-cards which have no tuner), try adding this to your modules.conf:
-
-.. code-block:: none
+cards which have no tuner), try adding this to your modules configuration
+file (usually, it is either ``/etc/modules.conf`` or some file at
+``/etc/modules-load.d/``, but the actual place depends on your
+distribution)::
options i2c-algo-bit bit_test=1
-For the WinTV/PVR you need one firmware file from the driver CD:
-hcwamc.rbf. The file is in the pvr45xxx.exe archive (self-extracting
-zip file, unzip can unpack it). Put it into the /etc/pvr directory or
-use the firm_altera=<path> insmod option to point the driver to the
-location of the file.
+Some cards may require an extra firmware file to work. For example,
+for the WinTV/PVR you need one firmware file from its driver CD,
+called: ``hcwamc.rbf``. It is inside a self-extracting zip file
+called ``pvr45xxx.exe``. Just placing it at the ``/etc/firmware``
+directory should be enough for it to be autoload during the driver's
+probing mode (e. g. when the Kernel boots or when the driver is
+manually loaded via ``modprobe`` command).
-If your card isn't listed in CARDLIST.bttv or if you have trouble making
-audio work, you should read the Sound-FAQ.
+If your card isn't listed in Documentation/admin-guide/media/bttv-cardlist.rst
+or if you have trouble making audio work, please read :ref:`still_doesnt_work`.
Autodetecting cards
@@ -59,16 +78,19 @@ the Subsystem ID in the second line, looks like this:
only bt878-based cards can have a subsystem ID (which does not mean
that every card really has one). bt848 cards can't have a Subsystem
ID and therefore can't be autodetected. There is a list with the ID's
-in bttv-cards.c (in case you are intrested or want to mail patches
-with updates).
+at Documentation/admin-guide/media/bttv-cardlist.rst
+(in case you are interested or want to mail patches with updates).
+.. _still_doesnt_work:
+
Still doesn't work?
-------------------
I do NOT have a lab with 30+ different grabber boards and a
PAL/NTSC/SECAM test signal generator at home, so I often can't
reproduce your problems. This makes debugging very difficult for me.
+
If you have some knowledge and spare time, please try to fix this
yourself (patches very welcome of course...) You know: The linux
slogan is "Do it yourself".
@@ -83,109 +105,110 @@ same card listens there is much higher...
For problems with sound: There are a lot of different systems used
for TV sound all over the world. And there are also different chips
which decode the audio signal. Reports about sound problems ("stereo
-does'nt work") are pretty useless unless you include some details
+doesn't work") are pretty useless unless you include some details
about your hardware and the TV sound scheme used in your country (or
at least the country you are living in).
Modprobe options
----------------
-Note: "modinfo <module>" prints various information about a kernel
-module, among them a complete and up-to-date list of insmod options.
-This list tends to be outdated because it is updated manually ...
-
-==========================================================================
+.. note::
-bttv.o
-.. code-block:: none
-
- the bt848/878 (grabber chip) driver
-
- insmod args:
- card=n card type, see CARDLIST for a list.
- tuner=n tuner type, see CARDLIST for a list.
- radio=0/1 card supports radio
- pll=0/1/2 pll settings
- 0: don't use PLL
- 1: 28 MHz crystal installed
- 2: 35 MHz crystal installed
-
- triton1=0/1 for Triton1 (+others) compatibility
- vsfx=0/1 yet another chipset bug compatibility bit
- see README.quirks for details on these two.
-
- bigendian=n Set the endianness of the gfx framebuffer.
- Default is native endian.
- fieldnr=0/1 Count fields. Some TV descrambling software
- needs this, for others it only generates
- 50 useless IRQs/sec. default is 0 (off).
- autoload=0/1 autoload helper modules (tuner, audio).
- default is 1 (on).
- bttv_verbose=0/1/2 verbose level (at insmod time, while
- looking at the hardware). default is 1.
- bttv_debug=0/1 debug messages (for capture).
- default is 0 (off).
- irq_debug=0/1 irq handler debug messages.
- default is 0 (off).
- gbuffers=2-32 number of capture buffers for mmap'ed capture.
- default is 4.
- gbufsize= size of capture buffers. default and
- maximum value is 0x208000 (~2MB)
- no_overlay=0 Enable overlay on broken hardware. There
- are some chipsets (SIS for example) which
- are known to have problems with the PCI DMA
- push used by bttv. bttv will disable overlay
- by default on this hardware to avoid crashes.
- With this insmod option you can override this.
- no_overlay=1 Disable overlay. It should be used by broken
- hardware that doesn't support PCI2PCI direct
- transfers.
- automute=0/1 Automatically mutes the sound if there is
- no TV signal, on by default. You might try
- to disable this if you have bad input signal
- quality which leading to unwanted sound
- dropouts.
- chroma_agc=0/1 AGC of chroma signal, off by default.
- adc_crush=0/1 Luminance ADC crush, on by default.
- i2c_udelay= Allow reduce I2C speed. Default is 5 usecs
- (meaning 66,67 Kbps). The default is the
- maximum supported speed by kernel bitbang
- algorithm. You may use lower numbers, if I2C
- messages are lost (16 is known to work on
- all supported cards).
-
- bttv_gpio=0/1
- gpiomask=
- audioall=
- audiomux=
- See Sound-FAQ for a detailed description.
+ The following argument list can be outdated, as we might add more
+ options if ever needed. In case of doubt, please check with
+ ``modinfo <module>``.
+
+ This command prints various information about a kernel
+ module, among them a complete and up-to-date list of insmod options.
+
+
+
+bttv
+ The bt848/878 (grabber chip) driver
+
+ insmod args::
+
+ card=n card type, see CARDLIST for a list.
+ tuner=n tuner type, see CARDLIST for a list.
+ radio=0/1 card supports radio
+ pll=0/1/2 pll settings
+
+ 0: don't use PLL
+ 1: 28 MHz crystal installed
+ 2: 35 MHz crystal installed
+
+ triton1=0/1 for Triton1 (+others) compatibility
+ vsfx=0/1 yet another chipset bug compatibility bit
+ see README.quirks for details on these two.
+
+ bigendian=n Set the endianness of the gfx framebuffer.
+ Default is native endian.
+ fieldnr=0/1 Count fields. Some TV descrambling software
+ needs this, for others it only generates
+ 50 useless IRQs/sec. default is 0 (off).
+ autoload=0/1 autoload helper modules (tuner, audio).
+ default is 1 (on).
+ bttv_verbose=0/1/2 verbose level (at insmod time, while
+ looking at the hardware). default is 1.
+ bttv_debug=0/1 debug messages (for capture).
+ default is 0 (off).
+ irq_debug=0/1 irq handler debug messages.
+ default is 0 (off).
+ gbuffers=2-32 number of capture buffers for mmap'ed capture.
+ default is 4.
+ gbufsize= size of capture buffers. default and
+ maximum value is 0x208000 (~2MB)
+ no_overlay=0 Enable overlay on broken hardware. There
+ are some chipsets (SIS for example) which
+ are known to have problems with the PCI DMA
+ push used by bttv. bttv will disable overlay
+ by default on this hardware to avoid crashes.
+ With this insmod option you can override this.
+ no_overlay=1 Disable overlay. It should be used by broken
+ hardware that doesn't support PCI2PCI direct
+ transfers.
+ automute=0/1 Automatically mutes the sound if there is
+ no TV signal, on by default. You might try
+ to disable this if you have bad input signal
+ quality which leading to unwanted sound
+ dropouts.
+ chroma_agc=0/1 AGC of chroma signal, off by default.
+ adc_crush=0/1 Luminance ADC crush, on by default.
+ i2c_udelay= Allow reduce I2C speed. Default is 5 usecs
+ (meaning 66,67 Kbps). The default is the
+ maximum supported speed by kernel bitbang
+ algorithm. You may use lower numbers, if I2C
+ messages are lost (16 is known to work on
+ all supported cards).
+
+ bttv_gpio=0/1
+ gpiomask=
+ audioall=
+ audiomux=
+ See Sound-FAQ for a detailed description.
remap, card, radio and pll accept up to four comma-separated arguments
(for multiple boards).
-tuner.o
-
-.. code-block:: none
-
+tuner
The tuner driver. You need this unless you want to use only
- with a camera or external tuner ...
+ with a camera or the board doesn't provide analog TV tuning.
+
+ insmod args::
- insmod args:
debug=1 print some debug info to the syslog
type=n type of the tuner chip. n as follows:
see CARDLIST for a complete list.
pal=[bdgil] select PAL variant (used for some tuners
only, important for the audio carrier).
-tvaudio.o
+tvaudio
+ Provide a single driver for all simple i2c audio control
+ chips (tda/tea*).
-.. code-block:: none
-
- new, experimental module which is supported to provide a single
- driver for all simple i2c audio control chips (tda/tea*).
+ insmod args::
- insmod args:
tda8425 = 1 enable/disable the support for the
tda9840 = 1 various chips.
tda9850 = 1 The tea6300 can't be autodetected and is
@@ -198,45 +221,12 @@ tvaudio.o
the wrong one.
debug = 1 print debug messages
- insmod args for tda9874a:
- tda9874a_SIF=1/2 select sound IF input pin (1 or 2)
- (default is pin 1)
- tda9874a_AMSEL=0/1 auto-mute select for NICAM (default=0)
- Please read note 3 below!
- tda9874a_STD=n select TV sound standard (0..8):
- 0 - A2, B/G
- 1 - A2, M (Korea)
- 2 - A2, D/K (1)
- 3 - A2, D/K (2)
- 4 - A2, D/K (3)
- 5 - NICAM, I
- 6 - NICAM, B/G
- 7 - NICAM, D/K (default)
- 8 - NICAM, L
-
- Note 1: tda9874a supports both tda9874h (old) and tda9874a (new) chips.
- Note 2: tda9874h/a and tda9875 (which is supported separately by
- tda9875.o) use the same i2c address so both modules should not be
- used at the same time.
- Note 3: Using tda9874a_AMSEL option depends on your TV card design!
- AMSEL=0: auto-mute will switch between NICAM sound
- and the sound on 1st carrier (i.e. FM mono or AM).
- AMSEL=1: auto-mute will switch between NICAM sound
- and the analog mono input (MONOIN pin).
- If tda9874a decoder on your card has MONOIN pin not connected, then
- use only tda9874_AMSEL=0 or don't specify this option at all.
- For example:
- card=65 (FlyVideo 2000S) - set AMSEL=1 or AMSEL=0
- card=72 (Prolink PV-BT878P rev.9B) - set AMSEL=0 only
-
-msp3400.o
-
-.. code-block:: none
-
+msp3400
The driver for the msp34xx sound processor chips. If you have a
stereo card, you probably want to insmod this one.
- insmod args:
+ insmod args::
+
debug=1/2 print some debug info to the syslog,
2 is more verbose.
simple=1 Use the "short programming" method. Newer
@@ -250,40 +240,6 @@ msp3400.o
should improve things for french people, the
carrier autoscan seems to work with FM only...
-tea6300.o - OBSOLETE (use tvaudio instead)
-
-.. code-block:: none
-
- The driver for the tea6300 fader chip. If you have a stereo
- card and the msp3400.o doesn't work, you might want to try this
- one. This chip is seen on most STB TV/FM cards (usually from
- Gateway OEM sold surplus on auction sites).
-
- insmod args:
- debug=1 print some debug info to the syslog.
-
-tda8425.o - OBSOLETE (use tvaudio instead)
-
-.. code-block:: none
-
- The driver for the tda8425 fader chip. This driver used to be
- part of bttv.c, so if your sound used to work but does not
- anymore, try loading this module.
-
- insmod args:
- debug=1 print some debug info to the syslog.
-
-tda985x.o - OBSOLETE (use tvaudio instead)
-
-.. code-block:: none
-
- The driver for the tda9850/55 audio chips.
-
- insmod args:
- debug=1 print some debug info to the syslog.
- chip=9850/9855 set the chip type.
-
-
If the box freezes hard with bttv
---------------------------------
@@ -304,8 +260,8 @@ bug. It is very helpful if you can tell where exactly it broke
With a hard freeze you probably doesn't find anything in the logfiles.
The only way to capture any kernel messages is to hook up a serial
console and let some terminal application log the messages. /me uses
-screen. See Documentation/admin-guide/serial-console.rst for details on setting
-up a serial console.
+screen. See Documentation/admin-guide/serial-console.rst for details on
+setting up a serial console.
Read Documentation/admin-guide/bug-hunting.rst to learn how to get any useful
information out of a register+stack dump printed by the kernel on
@@ -436,134 +392,12 @@ parking, thus lowering arbitration performance. The Bt879 drivers must
query for these non-compliant devices, and set the EN_VSFX bit only if
required.
-bttv and sound mini howto
--------------------------
-
-There are a lot of different bt848/849/878/879 based boards available.
-Making video work often is not a big deal, because this is handled
-completely by the bt8xx chip, which is common on all boards. But
-sound is handled in slightly different ways on each board.
-
-To handle the grabber boards correctly, there is a array tvcards[] in
-bttv-cards.c, which holds the information required for each board.
-Sound will work only, if the correct entry is used (for video it often
-makes no difference). The bttv driver prints a line to the kernel
-log, telling which card type is used. Like this one:
-
-.. code-block:: none
-
- bttv0: model: BT848(Hauppauge old) [autodetected]
-
-You should verify this is correct. If it isn't, you have to pass the
-correct board type as insmod argument, "insmod bttv card=2" for
-example. The file CARDLIST has a list of valid arguments for card.
-If your card isn't listed there, you might check the source code for
-new entries which are not listed yet. If there isn't one for your
-card, you can check if one of the existing entries does work for you
-(just trial and error...).
-
-Some boards have an extra processor for sound to do stereo decoding
-and other nice features. The msp34xx chips are used by Hauppauge for
-example. If your board has one, you might have to load a helper
-module like msp3400.o to make sound work. If there isn't one for the
-chip used on your board: Bad luck. Start writing a new one. Well,
-you might want to check the video4linux mailing list archive first...
-
-Of course you need a correctly installed soundcard unless you have the
-speakers connected directly to the grabber board. Hint: check the
-mixer settings too. ALSA for example has everything muted by default.
-
-
-How sound works in detail
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Still doesn't work? Looks like some driver hacking is required.
-Below is a do-it-yourself description for you.
-
-The bt8xx chips have 32 general purpose pins, and registers to control
-these pins. One register is the output enable register
-(BT848_GPIO_OUT_EN), it says which pins are actively driven by the
-bt848 chip. Another one is the data register (BT848_GPIO_DATA), where
-you can get/set the status if these pins. They can be used for input
-and output.
-
-Most grabber board vendors use these pins to control an external chip
-which does the sound routing. But every board is a little different.
-These pins are also used by some companies to drive remote control
-receiver chips. Some boards use the i2c bus instead of the gpio pins
-to connect the mux chip.
-
-As mentioned above, there is a array which holds the required
-information for each known board. You basically have to create a new
-line for your board. The important fields are these two:
-
-.. code-block:: c
-
- struct tvcard
- {
- [ ... ]
- u32 gpiomask;
- u32 audiomux[6]; /* Tuner, Radio, external, internal, mute, stereo */
- };
-
-gpiomask specifies which pins are used to control the audio mux chip.
-The corresponding bits in the output enable register
-(BT848_GPIO_OUT_EN) will be set as these pins must be driven by the
-bt848 chip.
-
-The audiomux\[\] array holds the data values for the different inputs
-(i.e. which pins must be high/low for tuner/mute/...). This will be
-written to the data register (BT848_GPIO_DATA) to switch the audio
-mux.
-
-
-What you have to do is figure out the correct values for gpiomask and
-the audiomux array. If you have Windows and the drivers four your
-card installed, you might to check out if you can read these registers
-values used by the windows driver. A tool to do this is available
-from ftp://telepresence.dmem.strath.ac.uk/pub/bt848/winutil, but it
-doesn't work with bt878 boards according to some reports I received.
-Another one with bt878 support is available from
-http://btwincap.sourceforge.net/Files/btspy2.00.zip
-
-You might also dig around in the \*.ini files of the Windows applications.
-You can have a look at the board to see which of the gpio pins are
-connected at all and then start trial-and-error ...
-
-
-Starting with release 0.7.41 bttv has a number of insmod options to
-make the gpio debugging easier:
-
-.. code-block:: none
-
- bttv_gpio=0/1 enable/disable gpio debug messages
- gpiomask=n set the gpiomask value
- audiomux=i,j,... set the values of the audiomux array
- audioall=a set the values of the audiomux array (one
- value for all array elements, useful to check
- out which effect the particular value has).
-
-The messages printed with bttv_gpio=1 look like this:
-
-.. code-block:: none
-
- bttv0: gpio: en=00000027, out=00000024 in=00ffffd8 [audio: off]
-
- en = output _en_able register (BT848_GPIO_OUT_EN)
- out = _out_put bits of the data register (BT848_GPIO_DATA),
- i.e. BT848_GPIO_DATA & BT848_GPIO_OUT_EN
- in = _in_put bits of the data register,
- i.e. BT848_GPIO_DATA & ~BT848_GPIO_OUT_EN
-
-
Other elements of the tvcards array
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you are trying to make a new card work you might find it useful to
-know what the other elements in the tvcards array are good for:
-
-.. code-block:: none
+know what the other elements in the tvcards array are good for::
video_inputs - # of video inputs the card has
audio_inputs - historical cruft, not used any more.
@@ -769,7 +603,7 @@ Identifying:
- Lifeview.com.tw states (Feb. 2002):
"The FlyVideo2000 and FlyVideo2000s product name have renamed to FlyVideo98."
Their Bt8x8 cards are listed as discontinued.
- - Flyvideo 2000S was probably sold as Flyvideo 3000 in some contries(Europe?).
+ - Flyvideo 2000S was probably sold as Flyvideo 3000 in some countries(Europe?).
The new Flyvideo 2000/3000 are SAA7130/SAA7134 based.
"Flyvideo II" had been the name for the 848 cards, nowadays (in Germany)
@@ -796,7 +630,9 @@ Typhoon TV card series:
~~~~~~~~~~~~~~~~~~~~~~~
These can be CPH, Flyvideo, Pixelview or KNC1 series.
+
Typhoon is the brand of Anubis.
+
Model 50680 got re-used, some model no. had different contents over time.
Models:
@@ -948,12 +784,13 @@ is wrong. If it doesn't work, send me email.
on their server are the full data-sheets, but don't ask how I found it.
To use the driver I use the following options, the tuner and pll settings might
-be different in your country
+be different in your country. You can force it via modprobe parameters.
+For example::
+
+ modprobe bttv tuner=1 pll=28 radio=1 card=17
-insmod videodev
-insmod i2c scan=1 i2c_debug=0 verbose=0
-insmod tuner type=1 debug=0
-insmod bttv pll=1 radio=1 card=17
+Sets tuner type 1 (Philips PAL_I), PLL with a 28 MHz crystal, enables
+FM radio and selects bttv card ID 17 (Leadtek WinView 601).
KNC One
@@ -972,15 +809,16 @@ KNC One
Provideo
~~~~~~~~
-- PV951 or PV-951 (also are sold as:
+- PV951 or PV-951, now named PV-951T
+ (also are sold as:
Boeder TV-FM Video Capture Card,
Titanmedia Supervision TV-2400,
Provideo PV951 TF,
3DeMon PV951,
MediaForte TV-Vision PV951,
Yoko PV951,
- Vivanco Tuner Card PCI Art.-Nr.: 68404,
- ) now named PV-951T
+ Vivanco Tuner Card PCI Art.-Nr.: 68404
+ )
- Surveillance Series:
@@ -1071,7 +909,7 @@ DE hat diverse Treiber fuer diese Modelle (Stand 09/2002):
- TVPhone98 (Bt878)
- AVerTV und TVCapture98 w/VCR (Bt 878)
- AVerTVStudio und TVPhone98 w/VCR (Bt878)
- - AVerTV GO Serie (Kein SVideo Input)
+ - AVerTV GO Series (Kein SVideo Input)
- AVerTV98 (BT-878 chip)
- AVerTV98 mit Fernbedienung (BT-878 chip)
- AVerTV/FM98 (BT-878 chip)
diff --git a/Documentation/admin-guide/media/building.rst b/Documentation/admin-guide/media/building.rst
new file mode 100644
index 000000000000..7a413ba07f93
--- /dev/null
+++ b/Documentation/admin-guide/media/building.rst
@@ -0,0 +1,357 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Building support for a media device
+===================================
+
+The first step is to download the Kernel's source code, either via a
+distribution-specific source file or via the Kernel's main git tree\ [1]_.
+
+Please notice, however, that, if:
+
+- you're a braveheart and want to experiment with new stuff;
+- if you want to report a bug;
+- if you're developing new patches
+
+you should use the main media development tree ``master`` branch:
+
+ https://git.linuxtv.org/media.git/
+
+In this case, you may find some useful information at the
+`LinuxTv wiki pages <https://linuxtv.org/wiki>`_:
+
+ https://linuxtv.org/wiki/index.php/How_to_Obtain,_Build_and_Install_V4L-DVB_Device_Drivers
+
+.. [1] The upstream Linux Kernel development tree is located at
+
+ https://git.kernel.org/pub/scm/li nux/kernel/git/torvalds/linux.git/
+
+Configuring the Linux Kernel
+============================
+
+You can access a menu of Kernel building options with::
+
+ $ make menuconfig
+
+Then, select all desired options and exit it, saving the configuration.
+
+The changed configuration will be at the ``.config`` file. It would
+look like::
+
+ ...
+ # CONFIG_RC_CORE is not set
+ # CONFIG_CEC_CORE is not set
+ CONFIG_MEDIA_SUPPORT=m
+ CONFIG_MEDIA_SUPPORT_FILTER=y
+ ...
+
+The media subsystem is controlled by those menu configuration options::
+
+ Device Drivers --->
+ <M> Remote Controller support --->
+ [ ] HDMI CEC RC integration
+ [ ] Enable CEC error injection support
+ [*] HDMI CEC drivers --->
+ <*> Multimedia support --->
+
+The ``Remote Controller support`` option enables the core support for
+remote controllers\ [2]_.
+
+The ``HDMI CEC RC integration`` option enables integration of HDMI CEC
+with Linux, allowing to receive data via HDMI CEC as if it were produced
+by a remote controller directly connected to the machine.
+
+The ``HDMI CEC drivers`` option allow selecting platform and USB drivers
+that receives and/or transmits CEC codes via HDMI interfaces\ [3]_.
+
+The last option (``Multimedia support``) enables support for cameras,
+audio/video grabbers and TV.
+
+The media subsystem support can either be built together with the main
+Kernel or as a module. For most use cases, it is preferred to have it
+built as modules.
+
+.. note::
+
+ Instead of using a menu, the Kernel provides a script with allows
+ enabling configuration options directly. To enable media support
+ and remote controller support using Kernel modules, you could use::
+
+ $ scripts/config -m RC_CORE
+ $ scripts/config -m MEDIA_SUPPORT
+
+.. [2] ``Remote Controller support`` should also be enabled if you
+ want to use some TV card drivers that may depend on the remote
+ controller core support.
+
+.. [3] Please notice that the DRM subsystem also have drivers for GPUs
+ that use the media HDMI CEC support.
+
+ Those GPU-specific drivers are selected via the ``Graphics support``
+ menu, under ``Device Drivers``.
+
+ When a GPU driver supports HDMI CEC, it will automatically
+ enable the CEC core support at the media subsystem.
+
+Media dependencies
+------------------
+
+It should be noticed that enabling the above from a clean config is
+usually not enough. The media subsystem depends on several other Linux
+core support in order to work.
+
+For example, most media devices use a serial communication bus in
+order to talk with some peripherals. Such bus is called I²C
+(Inter-Integrated Circuit). In order to be able to build support
+for such hardware, the I²C bus support should be enabled, either via
+menu or with::
+
+ ./scripts/config -m I2C
+
+Another example: the remote controller core requires support for
+input devices, with can be enabled with::
+
+ ./scripts/config -m INPUT
+
+Other core functionality may also be needed (like PCI and/or USB support),
+depending on the specific driver(s) you would like to enable.
+
+Enabling Remote Controller Support
+----------------------------------
+
+The remote controller menu allows selecting drivers for specific devices.
+It's menu looks like this::
+
+ --- Remote Controller support
+ <M> Compile Remote Controller keymap modules
+ [*] LIRC user interface
+ [*] Support for eBPF programs attached to lirc devices
+ [*] Remote controller decoders --->
+ [*] Remote Controller devices --->
+
+The ``Compile Remote Controller keymap modules`` option creates key maps for
+several popular remote controllers.
+
+The ``LIRC user interface`` option adds enhanced functionality when using the
+``lirc`` program, by enabling an API that allows userspace to receive raw data
+from remote controllers.
+
+The ``Support for eBPF programs attached to lirc devices`` option allows
+the usage of special programs (called eBPF) that would allow applications
+to add extra remote controller decoding functionality to the Linux Kernel.
+
+The ``Remote controller decoders`` option allows selecting the
+protocols that will be recognized by the Linux Kernel. Except if you
+want to disable some specific decoder, it is suggested to keep all
+sub-options enabled.
+
+The ``Remote Controller devices`` allows you to select the drivers
+that would be needed to support your device.
+
+The same configuration can also be set via the ``script/config``
+script. So, for instance, in order to support the ITE remote controller
+driver (found on Intel NUCs and on some ASUS x86 desktops), you could do::
+
+ $ scripts/config -e INPUT
+ $ scripts/config -e ACPI
+ $ scripts/config -e MODULES
+ $ scripts/config -m RC_CORE
+ $ scripts/config -e RC_DEVICES
+ $ scripts/config -e RC_DECODERS
+ $ scripts/config -m IR_RC5_DECODER
+ $ scripts/config -m IR_ITE_CIR
+
+Enabling HDMI CEC Support
+-------------------------
+
+The HDMI CEC support is set automatically when a driver requires it. So,
+all you need to do is to enable support either for a graphics card
+that needs it or by one of the existing HDMI drivers.
+
+The HDMI-specific drivers are available at the ``HDMI CEC drivers``
+menu\ [4]_::
+
+ --- HDMI CEC drivers
+ < > ChromeOS EC CEC driver
+ < > Amlogic Meson AO CEC driver
+ < > Amlogic Meson G12A AO CEC driver
+ < > Generic GPIO-based CEC driver
+ < > Samsung S5P CEC driver
+ < > STMicroelectronics STiH4xx HDMI CEC driver
+ < > STMicroelectronics STM32 HDMI CEC driver
+ < > Tegra HDMI CEC driver
+ < > SECO Boards HDMI CEC driver
+ [ ] SECO Boards IR RC5 support
+ < > Pulse Eight HDMI CEC
+ < > RainShadow Tech HDMI CEC
+
+.. [4] The above contents is just an example. The actual options for
+ HDMI devices depends on the system's architecture and may vary
+ on new Kernels.
+
+Enabling Media Support
+----------------------
+
+The Media menu has a lot more options than the remote controller menu.
+Once selected, you should see the following options::
+
+ --- Media support
+ [ ] Filter media drivers
+ [*] Autoselect ancillary drivers
+ Media device types --->
+ Media core support --->
+ Video4Linux options --->
+ Media controller options --->
+ Digital TV options --->
+ HDMI CEC options --->
+ Media drivers --->
+ Media ancillary drivers --->
+
+Except if you know exactly what you're doing, or if you want to build
+a driver for a SoC platform, it is strongly recommended to keep the
+``Autoselect ancillary drivers`` option turned on, as it will auto-select
+the needed I²C ancillary drivers.
+
+There are now two ways to select media device drivers, as described
+below.
+
+``Filter media drivers`` menu
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This menu is meant to easy setup for PC and Laptop hardware. It works
+by letting the user to specify what kind of media drivers are desired,
+with those options::
+
+ [ ] Cameras and video grabbers
+ [ ] Analog TV
+ [ ] Digital TV
+ [ ] AM/FM radio receivers/transmitters
+ [ ] Software defined radio
+ [ ] Platform-specific devices
+ [ ] Test drivers
+
+So, if you want to add support to a camera or video grabber only,
+select just the first option. Multiple options are allowed.
+
+Once the options on this menu are selected, the building system will
+auto-select the needed core drivers in order to support the selected
+functionality.
+
+.. note::
+
+ Most TV cards are hybrid: they support both Analog TV and Digital TV.
+
+ If you have an hybrid card, you may need to enable both ``Analog TV``
+ and ``Digital TV`` at the menu.
+
+When using this option, the defaults for the media support core
+functionality are usually good enough to provide the basic functionality
+for the driver. Yet, you could manually enable some desired extra (optional)
+functionality using the settings under each of the following
+``Media support`` sub-menus::
+
+ Media core support --->
+ Video4Linux options --->
+ Media controller options --->
+ Digital TV options --->
+ HDMI CEC options --->
+
+Once you select the desired filters, the drivers that matches the filtering
+criteria will be available at the ``Media support->Media drivers`` sub-menu.
+
+``Media Core Support`` menu without filtering
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you disable the ``Filter media drivers`` menu, all drivers available
+for your system whose dependencies are met should be shown at the
+``Media drivers`` menu.
+
+Please notice, however, that you should first ensure that the
+``Media Core Support`` menu has all the core functionality your drivers
+would need, as otherwise the corresponding device drivers won't be shown.
+
+Example
+-------
+
+In order to enable modular support for one of the boards listed on
+:doc:`this table <cx231xx-cardlist>`, with modular media core modules, the
+``.config`` file should contain those lines::
+
+ CONFIG_MODULES=y
+ CONFIG_USB=y
+ CONFIG_I2C=y
+ CONFIG_INPUT=y
+ CONFIG_RC_CORE=m
+ CONFIG_MEDIA_SUPPORT=m
+ CONFIG_MEDIA_SUPPORT_FILTER=y
+ CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
+ CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
+ CONFIG_MEDIA_USB_SUPPORT=y
+ CONFIG_VIDEO_CX231XX=y
+ CONFIG_VIDEO_CX231XX_DVB=y
+
+Building and installing a new Kernel
+====================================
+
+Once the ``.config`` file has everything needed, all it takes to build
+is to run the ``make`` command::
+
+ $ make
+
+And then install the new Kernel and its modules::
+
+ $ sudo make modules_install
+ $ sudo make install
+
+Building just the new media drivers and core
+============================================
+
+Running a new development Kernel from the development tree is usually risky,
+because it may have experimental changes that may have bugs. So, there are
+some ways to build just the new drivers, using alternative trees.
+
+There is the `Linux Kernel backports project
+<https://backports.wiki.kernel.org/index.php/Main_Page>`_, with contains
+newer drivers meant to be compiled against stable Kernels.
+
+The LinuxTV developers, with are responsible for maintaining the media
+subsystem also maintains a backport tree, with just the media drivers
+daily updated from the newest kernel. Such tree is available at:
+
+https://git.linuxtv.org/media_build.git/
+
+It should be noticed that, while it should be relatively safe to use the
+``media_build`` tree for testing purposes, there are not warranties that
+it would work (or even build) on a random Kernel. This tree is maintained
+using a "best-efforts" principle, as time permits us to fix issues there.
+
+If you notice anything wrong on it, feel free to submit patches at the
+Linux media subsystem's mailing list: media@vger.kernel.org. Please
+add ``[PATCH media-build]`` at the e-mail's subject if you submit a new
+patch for the media-build.
+
+Before using it, you should run::
+
+ $ ./build
+
+.. note::
+
+ 1) you may need to run it twice if the ``media-build`` tree gets
+ updated;
+ 2) you may need to do a ``make distclean`` if you had built it
+ in the past for a different Kernel version than the one you're
+ currently using;
+ 3) by default, it will use the same config options for media as
+ the ones defined on the Kernel you're running.
+
+In order to select different drivers or different config options,
+use::
+
+ $ make menuconfig
+
+Then, you can build and install the new drivers::
+
+ $ make && sudo make install
+
+This will override the previous media drivers that your Kernel were
+using.
diff --git a/Documentation/admin-guide/media/c3-isp.dot b/Documentation/admin-guide/media/c3-isp.dot
new file mode 100644
index 000000000000..42dc931ee84a
--- /dev/null
+++ b/Documentation/admin-guide/media/c3-isp.dot
@@ -0,0 +1,26 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0 | <port1> 1} | c3-isp-core\n/dev/v4l-subdev0 | {<port2> 2 | <port3> 3 | <port4> 4 | <port5> 5}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port3 -> n00000008:port0
+ n00000001:port4 -> n0000000b:port0
+ n00000001:port5 -> n0000000e:port0
+ n00000001:port2 -> n00000027
+ n00000008 [label="{{<port0> 0} | c3-isp-resizer0\n/dev/v4l-subdev1 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000008:port1 -> n00000016 [style=bold]
+ n0000000b [label="{{<port0> 0} | c3-isp-resizer1\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000b:port1 -> n0000001a [style=bold]
+ n0000000e [label="{{<port0> 0} | c3-isp-resizer2\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000e:port1 -> n00000023 [style=bold]
+ n00000011 [label="{{<port0> 0} | c3-mipi-adapter\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000011:port1 -> n00000001:port0 [style=bold]
+ n00000016 [label="c3-isp-cap0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000001a [label="c3-isp-cap1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n0000001e [label="{{<port0> 0} | c3-mipi-csi2\n/dev/v4l-subdev5 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000001e:port1 -> n00000011:port0 [style=bold]
+ n00000023 [label="c3-isp-cap2\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n00000027 [label="c3-isp-stats\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n0000002b [label="c3-isp-params\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n0000002b -> n00000001:port1
+ n0000003f [label="{{} | imx290 2-001a\n/dev/v4l-subdev6 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000003f:port0 -> n0000001e:port0 [style=bold]
+}
diff --git a/Documentation/admin-guide/media/c3-isp.rst b/Documentation/admin-guide/media/c3-isp.rst
new file mode 100644
index 000000000000..ac508b8c6831
--- /dev/null
+++ b/Documentation/admin-guide/media/c3-isp.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR MIT)
+
+.. include:: <isonum.txt>
+
+=================================================
+Amlogic C3 Image Signal Processing (C3ISP) driver
+=================================================
+
+Introduction
+============
+
+This file documents the Amlogic C3ISP driver located under
+drivers/media/platform/amlogic/c3/isp.
+
+The current version of the driver supports the C3ISP found on
+Amlogic C308L processor.
+
+The driver implements V4L2, Media controller and V4L2 subdev interfaces.
+Camera sensor using V4L2 subdev interface in the kernel is supported.
+
+The driver has been tested on AW419-C308L-Socket platform.
+
+Amlogic C3 ISP
+==============
+
+The Camera hardware found on C308L processors and supported by
+the driver consists of:
+
+- 1 MIPI-CSI-2 module: handles the physical layer of the MIPI CSI-2 receiver and
+ receives data from the connected camera sensor.
+- 1 MIPI-ADAPTER module: organizes MIPI data to meet ISP input requirements and
+ send MIPI data to ISP.
+- 1 ISP (Image Signal Processing) module: contains a pipeline of image processing
+ hardware blocks. The ISP pipeline contains three resizers at the end each of
+ them connected to a DMA interface which writes the output data to memory.
+
+A high-level functional view of the C3 ISP is presented below.::
+
+ +----------+ +-------+
+ | Resizer |--->| WRMIF |
+ +---------+ +------------+ +--------------+ +-------+ |----------+ +-------+
+ | Sensor |--->| MIPI CSI-2 |--->| MIPI ADAPTER |--->| ISP |---|----------+ +-------+
+ +---------+ +------------+ +--------------+ +-------+ | Resizer |--->| WRMIF |
+ +----------+ +-------+
+ |----------+ +-------+
+ | Resizer |--->| WRMIF |
+ +----------+ +-------+
+
+Driver architecture and design
+==============================
+
+With the goal to model the hardware links between the modules and to expose a
+clean, logical and usable interface, the driver registers the following V4L2
+sub-devices:
+
+- 1 `c3-mipi-csi2` sub-device - the MIPI CSI-2 receiver
+- 1 `c3-mipi-adapter` sub-device - the MIPI adapter
+- 1 `c3-isp-core` sub-device - the ISP core
+- 3 `c3-isp-resizer` sub-devices - the ISP resizers
+
+The `c3-isp-core` sub-device is linked to 2 video device nodes for statistics
+capture and parameters programming:
+
+- the `c3-isp-stats` capture video device node for statistics capture
+- the `c3-isp-params` output video device for parameters programming
+
+Each `c3-isp-resizer` sub-device is linked to a capture video device node where
+frames are captured from:
+
+- `c3-isp-resizer0` is linked to the `c3-isp-cap0` capture video device
+- `c3-isp-resizer1` is linked to the `c3-isp-cap1` capture video device
+- `c3-isp-resizer2` is linked to the `c3-isp-cap2` capture video device
+
+The media controller pipeline graph is as follows (with connected a
+IMX290 camera sensor):
+
+.. _isp_topology_graph:
+
+.. kernel-figure:: c3-isp.dot
+ :alt: c3-isp.dot
+ :align: center
+
+ Media pipeline topology
+
+Implementation
+==============
+
+Runtime configuration of the ISP hardware is performed on the `c3-isp-params`
+video device node using the :ref:`V4L2_META_FMT_C3ISP_PARAMS
+<v4l2-meta-fmt-c3isp-params>` as data format. The buffer structure is defined by
+:c:type:`c3_isp_params_cfg`.
+
+Statistics are captured from the `c3-isp-stats` video device node using the
+:ref:`V4L2_META_FMT_C3ISP_STATS <v4l2-meta-fmt-c3isp-stats>` data format.
+
+The final picture size and format is configured using the V4L2 video
+capture interface on the `c3-isp-cap[0, 2]` video device nodes.
+
+The Amlogic C3 ISP is supported by `libcamera <https://libcamera.org>`_ with a
+dedicated pipeline handler and algorithms that perform run-time image correction
+and enhancement.
diff --git a/Documentation/media/v4l-drivers/cafe_ccic.rst b/Documentation/admin-guide/media/cafe_ccic.rst
index 94f0f58ebe37..ff7fbce1342a 100644
--- a/Documentation/media/v4l-drivers/cafe_ccic.rst
+++ b/Documentation/admin-guide/media/cafe_ccic.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
The cafe_ccic driver
====================
diff --git a/Documentation/admin-guide/media/cardlist.rst b/Documentation/admin-guide/media/cardlist.rst
new file mode 100644
index 000000000000..5b38bfd6a19d
--- /dev/null
+++ b/Documentation/admin-guide/media/cardlist.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Cards List
+==========
+
+The media subsystem provide support for lots of PCI and USB drivers, plus
+platform-specific drivers. It also contains several ancillary I²C drivers.
+
+The platform-specific drivers are usually present on embedded systems,
+or are supported by the main board. Usually, setting them is done via
+OpenFirmware or ACPI.
+
+The PCI and USB drivers, however, are independent of the system's board,
+and may be added/removed by the user.
+
+You may also take a look at
+https://linuxtv.org/wiki/index.php/Hardware_Device_Information
+for more details about supported cards.
+
+.. toctree::
+ :maxdepth: 2
+
+ usb-cardlist
+ pci-cardlist
+ platform-cardlist
+ radio-cardlist
+ i2c-cardlist
+ misc-cardlist
diff --git a/Documentation/admin-guide/media/cec.rst b/Documentation/admin-guide/media/cec.rst
new file mode 100644
index 000000000000..b2e7a300494a
--- /dev/null
+++ b/Documentation/admin-guide/media/cec.rst
@@ -0,0 +1,467 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+HDMI CEC
+========
+
+Supported hardware in mainline
+==============================
+
+HDMI Transmitters:
+
+- Exynos4
+- Exynos5
+- STIH4xx HDMI CEC
+- V4L2 adv7511 (same HW, but a different driver from the drm adv7511)
+- stm32
+- Allwinner A10 (sun4i)
+- Raspberry Pi
+- dw-hdmi (Synopsis IP)
+- amlogic (meson ao-cec and ao-cec-g12a)
+- drm adv7511/adv7533
+- omap4
+- tegra
+- rk3288, rk3399
+- tda998x
+- DisplayPort CEC-Tunneling-over-AUX on i915, nouveau and amdgpu
+- ChromeOS EC CEC
+- CEC for SECO boards (UDOO x86).
+- Chrontel CH7322
+
+
+HDMI Receivers:
+
+- adv7604/11/12
+- adv7842
+- tc358743
+
+USB Dongles (see below for additional information on how to use these
+dongles):
+
+- Pulse-Eight: the pulse8-cec driver implements the following module option:
+ ``persistent_config``: by default this is off, but when set to 1 the driver
+ will store the current settings to the device's internal eeprom and restore
+ it the next time the device is connected to the USB port.
+
+- RainShadow Tech. Note: this driver does not support the persistent_config
+ module option of the Pulse-Eight driver. The hardware supports it, but I
+ have no plans to add this feature. But I accept patches :-)
+
+- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
+ :ref:`extron_da_hd_4k_plus` for more information.
+
+Miscellaneous:
+
+- vivid: emulates a CEC receiver and CEC transmitter.
+ Can be used to test CEC applications without actual CEC hardware.
+
+- cec-gpio. If the CEC pin is hooked up to a GPIO pin then
+ you can control the CEC line through this driver. This supports error
+ injection as well.
+
+- cec-gpio and Allwinner A10 (or any other driver that uses the CEC pin
+ framework to drive the CEC pin directly): the CEC pin framework uses
+ high-resolution timers. These timers are affected by NTP daemons that
+ speed up or slow down the clock to sync with the official time. The
+ chronyd server will by default increase or decrease the clock by
+ 1/12th. This will cause the CEC timings to go out of spec. To fix this,
+ add a 'maxslewrate 40000' line to chronyd.conf. This limits the clock
+ frequency change to 1/25th, which keeps the CEC timings within spec.
+
+
+Utilities
+=========
+
+Utilities are available here: https://git.linuxtv.org/v4l-utils.git
+
+``utils/cec-ctl``: control a CEC device
+
+``utils/cec-compliance``: test compliance of a remote CEC device
+
+``utils/cec-follower``: emulate a CEC follower device
+
+Note that ``cec-ctl`` has support for the CEC Hospitality Profile as is
+used in some hotel displays. See http://www.htng.org.
+
+Note that the libcec library (https://github.com/Pulse-Eight/libcec) supports
+the linux CEC framework.
+
+If you want to get the CEC specification, then look at the References of
+the HDMI wikipedia page: https://en.wikipedia.org/wiki/HDMI. CEC is part
+of the HDMI specification. HDMI 1.3 is freely available (very similar to
+HDMI 1.4 w.r.t. CEC) and should be good enough for most things.
+
+
+DisplayPort to HDMI Adapters with working CEC
+=============================================
+
+Background: most adapters do not support the CEC Tunneling feature,
+and of those that do many did not actually connect the CEC pin.
+Unfortunately, this means that while a CEC device is created, it
+is actually all alone in the world and will never be able to see other
+CEC devices.
+
+This is a list of known working adapters that have CEC Tunneling AND
+that properly connected the CEC pin. If you find adapters that work
+but are not in this list, then drop me a note.
+
+To test: hook up your DP-to-HDMI adapter to a CEC capable device
+(typically a TV), then run::
+
+ cec-ctl --playback # Configure the PC as a CEC Playback device
+ cec-ctl -S # Show the CEC topology
+
+The ``cec-ctl -S`` command should show at least two CEC devices,
+ourselves and the CEC device you are connected to (i.e. typically the TV).
+
+General note: I have only seen this work with the Parade PS175, PS176 and
+PS186 chipsets and the MegaChips 2900. While MegaChips 28x0 claims CEC support,
+I have never seen it work.
+
+USB-C to HDMI
+-------------
+
+Samsung Multiport Adapter EE-PW700: https://www.samsung.com/ie/support/model/EE-PW700BBEGWW/
+
+Kramer ADC-U31C/HF: https://www.kramerav.com/product/ADC-U31C/HF
+
+Club3D CAC-2504: https://www.club-3d.com/en/detail/2449/usb_3.1_type_c_to_hdmi_2.0_uhd_4k_60hz_active_adapter/
+
+DisplayPort to HDMI
+-------------------
+
+Club3D CAC-1080: https://www.club-3d.com/en/detail/2442/displayport_1.4_to_hdmi_2.0b_hdr/
+
+CableCreation (SKU: CD0712): https://www.cablecreation.com/products/active-displayport-to-hdmi-adapter-4k-hdr
+
+HP DisplayPort to HDMI True 4k Adapter (P/N 2JA63AA): https://www.hp.com/us-en/shop/pdp/hp-displayport-to-hdmi-true-4k-adapter
+
+Mini-DisplayPort to HDMI
+------------------------
+
+Club3D CAC-1180: https://www.club-3d.com/en/detail/2443/mini_displayport_1.4_to_hdmi_2.0b_hdr/
+
+Note that passive adapters will never work, you need an active adapter.
+
+The Club3D adapters in this list are all MegaChips 2900 based. Other Club3D adapters
+are PS176 based and do NOT have the CEC pin hooked up, so only the three Club3D
+adapters above are known to work.
+
+I suspect that MegaChips 2900 based designs in general are likely to work
+whereas with the PS176 it is more hit-and-miss (mostly miss). The PS186 is
+likely to have the CEC pin hooked up, it looks like they changed the reference
+design for that chipset.
+
+
+USB CEC Dongles
+===============
+
+These dongles appear as ``/dev/ttyACMX`` devices and need the ``inputattach``
+utility to create the ``/dev/cecX`` devices. Support for the Pulse-Eight
+has been added to ``inputattach`` 1.6.0. Support for the Rainshadow Tech has
+been added to ``inputattach`` 1.6.1.
+
+You also need udev rules to automatically start systemd services::
+
+ SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="2548", ATTRS{idProduct}=="1002", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="pulse8-cec-inputattach@%k.service"
+ SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="2548", ATTRS{idProduct}=="1001", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="pulse8-cec-inputattach@%k.service"
+ SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="04d8", ATTRS{idProduct}=="ff59", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="rainshadow-cec-inputattach@%k.service"
+
+and these systemd services:
+
+For Pulse-Eight make /lib/systemd/system/pulse8-cec-inputattach@.service::
+
+ [Unit]
+ Description=inputattach for pulse8-cec device on %I
+
+ [Service]
+ Type=simple
+ ExecStart=/usr/bin/inputattach --pulse8-cec /dev/%I
+
+For the RainShadow Tech make /lib/systemd/system/rainshadow-cec-inputattach@.service::
+
+ [Unit]
+ Description=inputattach for rainshadow-cec device on %I
+
+ [Service]
+ Type=simple
+ ExecStart=/usr/bin/inputattach --rainshadow-cec /dev/%I
+
+
+For proper suspend/resume support create: /lib/systemd/system/restart-cec-inputattach.service::
+
+ [Unit]
+ Description=restart inputattach for cec devices
+ After=suspend.target
+
+ [Service]
+ Type=forking
+ ExecStart=/bin/bash -c 'for d in /dev/serial/by-id/usb-Pulse-Eight*; do /usr/bin/inputattach --daemon --pulse8-cec $d; done; for d in /dev/serial/by-id/usb-RainShadow_Tech*; do /usr/bin/inputattach --daemon --rainshadow-cec $d; done'
+
+ [Install]
+ WantedBy=suspend.target
+
+And run ``systemctl enable restart-cec-inputattach``.
+
+To automatically set the physical address of the CEC device whenever the
+EDID changes, you can use ``cec-ctl`` with the ``-E`` option::
+
+ cec-ctl -E /sys/class/drm/card0-DP-1/edid
+
+This assumes the dongle is connected to the card0-DP-1 output (``xrandr`` will tell
+you which output is used) and it will poll for changes to the EDID and update
+the Physical Address whenever they occur.
+
+To automatically run this command you can use cron. Edit crontab with
+``crontab -e`` and add this line::
+
+ @reboot /usr/local/bin/cec-ctl -E /sys/class/drm/card0-DP-1/edid
+
+This only works for display drivers that expose the EDID in ``/sys/class/drm``,
+such as the i915 driver.
+
+
+CEC Without HPD
+===============
+
+Some displays when in standby mode have no HDMI Hotplug Detect signal, but
+CEC is still enabled so connected devices can send an <Image View On> CEC
+message in order to wake up such displays. Unfortunately, not all CEC
+adapters can support this. An example is the Odroid-U3 SBC that has a
+level-shifter that is powered off when the HPD signal is low, thus
+blocking the CEC pin. Even though the SoC can use CEC without a HPD,
+the level-shifter will prevent this from functioning.
+
+There is a CEC capability flag to signal this: ``CEC_CAP_NEEDS_HPD``.
+If set, then the hardware cannot wake up displays with this behavior.
+
+Note for CEC application implementers: the <Image View On> message must
+be the first message you send, don't send any other messages before.
+Certain very bad but unfortunately not uncommon CEC implementations
+get very confused if they receive anything else but this message and
+they won't wake up.
+
+When writing a driver it can be tricky to test this. There are two
+ways to do this:
+
+1) Get a Pulse-Eight USB CEC dongle, connect an HDMI cable from your
+ device to the Pulse-Eight, but do not connect the Pulse-Eight to
+ the display.
+
+ Now configure the Pulse-Eight dongle::
+
+ cec-ctl -p0.0.0.0 --tv
+
+ and start monitoring::
+
+ sudo cec-ctl -M
+
+ On the device you are testing run::
+
+ cec-ctl --playback
+
+ It should report a physical address of f.f.f.f. Now run this
+ command::
+
+ cec-ctl -t0 --image-view-on
+
+ The Pulse-Eight should see the <Image View On> message. If not,
+ then something (hardware and/or software) is preventing the CEC
+ message from going out.
+
+ To make sure you have the wiring correct just connect the
+ Pulse-Eight to a CEC-enabled display and run the same command
+ on your device: now there is a HPD, so you should see the command
+ arriving at the Pulse-Eight.
+
+2) If you have another linux device supporting CEC without HPD, then
+ you can just connect your device to that device. Yes, you can connect
+ two HDMI outputs together. You won't have a HPD (which is what we
+ want for this test), but the second device can monitor the CEC pin.
+
+ Otherwise use the same commands as in 1.
+
+If CEC messages do not come through when there is no HPD, then you
+need to figure out why. Typically it is either a hardware restriction
+or the software powers off the CEC core when the HPD goes low. The
+first cannot be corrected of course, the second will likely required
+driver changes.
+
+
+Microcontrollers & CEC
+======================
+
+We have seen some CEC implementations in displays that use a microcontroller
+to sample the bus. This does not have to be a problem, but some implementations
+have timing issues. This is hard to discover unless you can hook up a low-level
+CEC debugger (see the next section).
+
+You will see cases where the CEC transmitter holds the CEC line high or low for
+a longer time than is allowed. For directed messages this is not a problem since
+if that happens the message will not be Acked and it will be retransmitted.
+For broadcast messages no such mechanism exists.
+
+It's not clear what to do about this. It is probably wise to transmit some
+broadcast messages twice to reduce the chance of them being lost. Specifically
+<Standby> and <Active Source> are candidates for that.
+
+
+Making a CEC debugger
+=====================
+
+By using a Raspberry Pi 4B and some cheap components you can make
+your own low-level CEC debugger.
+
+The critical component is one of these HDMI female-female passthrough connectors
+(full soldering type 1):
+
+https://elabbay.myshopify.com/collections/camera/products/hdmi-af-af-v1a-hdmi-type-a-female-to-hdmi-type-a-female-pass-through-adapter-breakout-board?variant=45533926147
+
+The video quality is variable and certainly not enough to pass-through 4kp60
+(594 MHz) video. You might be able to support 4kp30, but more likely you will
+be limited to 1080p60 (148.5 MHz). But for CEC testing that is fine.
+
+You need a breadboard and some breadboard wires:
+
+http://www.dx.com/p/diy-40p-male-to-female-male-to-male-female-to-female-dupont-line-wire-3pcs-356089#.WYLOOXWGN7I
+
+If you want to monitor the HPD and/or 5V lines as well, then you need one of
+these 5V to 3.3V level shifters:
+
+https://www.adafruit.com/product/757
+
+(This is just where I got these components, there are many other places you
+can get similar things).
+
+The ground pin of the HDMI connector needs to be connected to a ground
+pin of the Raspberry Pi, of course.
+
+The CEC pin of the HDMI connector needs to be connected to these pins:
+GPIO 6 and GPIO 7. The optional HPD pin of the HDMI connector should
+be connected via the level shifter to these pins: GPIO 23 and GPIO 12.
+The optional 5V pin of the HDMI connector should be connected via the
+level shifter to these pins: GPIO 25 and GPIO 22. Monitoring the HPD and
+5V lines is not necessary, but it is helpful.
+
+This device tree addition in ``arch/arm/boot/dts/bcm2711-rpi-4-b.dts``
+will hook up the cec-gpio driver correctly::
+
+ cec@6 {
+ compatible = "cec-gpio";
+ cec-gpios = <&gpio 6 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+ hpd-gpios = <&gpio 23 GPIO_ACTIVE_HIGH>;
+ v5-gpios = <&gpio 25 GPIO_ACTIVE_HIGH>;
+ };
+
+ cec@7 {
+ compatible = "cec-gpio";
+ cec-gpios = <&gpio 7 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+ hpd-gpios = <&gpio 12 GPIO_ACTIVE_HIGH>;
+ v5-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
+ };
+
+If you haven't hooked up the HPD and/or 5V lines, then just delete those
+lines.
+
+This dts change will enable two cec GPIO devices: I typically use one to
+send/receive CEC commands and the other to monitor. If you monitor using
+an unconfigured CEC adapter then it will use GPIO interrupts which makes
+monitoring very accurate.
+
+If you just want to monitor traffic, then a single instance is sufficient.
+The minimum configuration is one HDMI female-female passthrough connector
+and two female-female breadboard wires: one for connecting the HDMI ground
+pin to a ground pin on the Raspberry Pi, and the other to connect the HDMI
+CEC pin to GPIO 6 on the Raspberry Pi.
+
+The documentation on how to use the error injection is here: :ref:`cec_pin_error_inj`.
+
+``cec-ctl --monitor-pin`` will do low-level CEC bus sniffing and analysis.
+You can also store the CEC traffic to file using ``--store-pin`` and analyze
+it later using ``--analyze-pin``.
+
+You can also use this as a full-fledged CEC device by configuring it
+using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
+
+.. _extron_da_hd_4k_plus:
+
+Extron DA HD 4K PLUS CEC Adapter driver
+=======================================
+
+This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
+Amplifiers: https://www.extron.com/product/dahd4kplusseries
+
+The 2, 4 and 6 port models are supported.
+
+Firmware version 1.02.0001 or higher is required.
+
+Note that older Extron hardware revisions have a problem with the CEC voltage,
+which may mean that CEC will not work. This is fixed in hardware revisions
+E34814 and up.
+
+The CEC support has two modes: the first is a manual mode where userspace has
+to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
+full control, it is also complicated.
+
+The second mode is an automatic mode, which is selected if the module option
+``vendor_id`` is set. In that case the driver controls CEC and CEC messages
+received in the input will be distributed to the outputs. It is still possible
+to use the /dev/cecX devices to talk to the connected devices directly, but it is
+the driver that configures everything and deals with things like Hotplug Detect
+changes.
+
+The driver also takes care of the EDIDs: /dev/videoX devices are created to
+read the EDIDs and (for the HDMI Input port) to set the EDID.
+
+By default userspace is responsible to set the EDID for the HDMI Input
+according to the EDIDs of the connected displays. But if the ``manufacturer_name``
+module option is set, then the driver will take care of setting the EDID
+of the HDMI Input based on the supported resolutions of the connected displays.
+Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
+displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
+it will fall back to an EDID that just reports 1080p60.
+
+The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
+
+The extron-da-hd-4k-plus driver implements the following module options:
+
+``debug``
+---------
+
+If set to 1, then all serial port traffic is shown.
+
+``vendor_id``
+-------------
+
+The CEC Vendor ID to report to connected displays.
+
+If set, then the driver will take care of distributing CEC messages received
+on the input to the HDMI outputs. This is done for the following CEC messages:
+
+- <Standby>
+- <Image View On> and <Text View On>
+- <Give Device Power Status>
+- <Set System Audio Mode>
+- <Request Current Latency>
+
+If not set, then userspace is responsible for this, and it will have to
+configure the CEC devices for HDMI Input and the HDMI Outputs manually.
+
+``manufacturer_name``
+---------------------
+
+A three character manufacturer name that is used in the EDID for the HDMI
+Input. If not set, then userspace is responsible for configuring an EDID.
+If set, then the driver will update the EDID automatically based on the
+resolutions supported by the connected displays, and it will not be possible
+anymore to manually set the EDID for the HDMI Input.
+
+``hpd_never_low``
+-----------------
+
+If set, then the Hotplug Detect pin of the HDMI Input will always be high,
+even if nothing is connected to the HDMI Outputs. If not set (the default)
+then the Hotplug Detect pin of the HDMI input will go low if all the detected
+Hotplug Detect pins of the HDMI Outputs are also low.
+
+This option may be changed dynamically.
diff --git a/Documentation/admin-guide/media/ci.rst b/Documentation/admin-guide/media/ci.rst
new file mode 100644
index 000000000000..ded4d8fbbf92
--- /dev/null
+++ b/Documentation/admin-guide/media/ci.rst
@@ -0,0 +1,77 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Digital TV Conditional Access Interface
+=======================================
+
+
+.. note::
+
+ This documentation is outdated.
+
+This document describes the usage of the high level CI API as
+in accordance to the Linux DVB API. This is a not a documentation for the,
+existing low level CI API.
+
+.. note::
+
+ For the Twinhan/Twinhan clones, the dst_ca module handles the CI
+ hardware handling. This module is loaded automatically if a CI
+ (Common Interface, that holds the CAM (Conditional Access Module)
+ is detected.
+
+ca_zap
+~~~~~~
+
+A userspace application, like ``ca_zap`` is required to handle encrypted
+MPEG-TS streams.
+
+The ``ca_zap`` userland application is in charge of sending the
+descrambling related information to the Conditional Access Module (CAM).
+
+This application requires the following to function properly as of now.
+
+a) Tune to a valid channel, with szap.
+
+ eg: $ szap -c channels.conf -r "TMC" -x
+
+b) a channels.conf containing a valid PMT PID
+
+ eg: TMC:11996:h:0:27500:278:512:650:321
+
+ here 278 is a valid PMT PID. the rest of the values are the
+ same ones that szap uses.
+
+c) after running a szap, you have to run ca_zap, for the
+ descrambler to function,
+
+ eg: $ ca_zap channels.conf "TMC"
+
+d) Hopefully enjoy your favourite subscribed channel as you do with
+ a FTA card.
+
+.. note::
+
+ Currently ca_zap, and dst_test, both are meant for demonstration
+ purposes only, they can become full fledged applications if necessary.
+
+
+Cards that fall in this category
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+At present the cards that fall in this category are the Twinhan and its
+clones, these cards are available as VVMER, Tomato, Hercules, Orange and
+so on.
+
+CI modules that are supported
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CI module support is largely dependent upon the firmware on the cards
+Some cards do support almost all of the available CI modules. There is
+nothing much that can be done in order to make additional CI modules
+working with these cards.
+
+Modules that have been tested by this driver at present are
+
+(1) Irdeto 1 and 2 from SCM
+(2) Viaccess from SCM
+(3) Dragoncam
diff --git a/Documentation/admin-guide/media/cx18-cardlist.rst b/Documentation/admin-guide/media/cx18-cardlist.rst
new file mode 100644
index 000000000000..26f2da9aa542
--- /dev/null
+++ b/Documentation/admin-guide/media/cx18-cardlist.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+CX18 cards list
+===============
+
+Those cards are supported by cx18 driver:
+
+- Hauppauge HVR-1600 (ESMT memory)
+- Hauppauge HVR-1600 (Samsung memory)
+- Compro VideoMate H900
+- Yuan MPC718 MiniPCI DVB-T/Analog
+- Conexant Raptor PAL/SECAM
+- Toshiba Qosmio DVB-T/Analog
+- Leadtek WinFast PVR2100
+- Leadtek WinFast DVR3100
+- GoTView PCI DVD3 Hybrid
+- Hauppauge HVR-1600 (s5h1411/tda18271)
diff --git a/Documentation/admin-guide/media/cx231xx-cardlist.rst b/Documentation/admin-guide/media/cx231xx-cardlist.rst
new file mode 100644
index 000000000000..d374101be047
--- /dev/null
+++ b/Documentation/admin-guide/media/cx231xx-cardlist.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+cx231xx cards list
+==================
+
+.. tabularcolumns:: |p{1.4cm}|p{10.0cm}|p{6.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 12 19
+ :stub-columns: 0
+
+ * - Card number
+ - Card name
+ - USB IDs
+ * - 0
+ - Unknown CX231xx video grabber
+ - 0572:5A3C
+ * - 1
+ - Conexant Hybrid TV - CARRAERA
+ - 0572:58A2
+ * - 2
+ - Conexant Hybrid TV - SHELBY
+ - 0572:58A1
+ * - 3
+ - Conexant Hybrid TV - RDE253S
+ - 0572:58A4
+ * - 4
+ - Conexant Hybrid TV - RDU253S
+ - 0572:58A5
+ * - 5
+ - Conexant VIDEO GRABBER
+ - 0572:58A6, 07ca:c039
+ * - 6
+ - Conexant Hybrid TV - rde 250
+ - 0572:589E
+ * - 7
+ - Conexant Hybrid TV - RDU 250
+ - 0572:58A0
+ * - 8
+ - Hauppauge EXETER
+ - 2040:b120, 2040:b140
+ * - 9
+ - Hauppauge USB Live 2
+ - 2040:c200
+ * - 10
+ - Pixelview PlayTV USB Hybrid
+ - 4000:4001
+ * - 11
+ - Pixelview Xcapture USB
+ - 1D19:6109, 4000:4001
+ * - 12
+ - Kworld UB430 USB Hybrid
+ - 1b80:e424
+ * - 13
+ - Iconbit Analog Stick U100 FM
+ - 1f4d:0237
+ * - 14
+ - Hauppauge WinTV USB2 FM (PAL)
+ - 2040:b110
+ * - 15
+ - Hauppauge WinTV USB2 FM (NTSC)
+ - 2040:b111
+ * - 16
+ - Elgato Video Capture V2
+ - 0fd9:0037
+ * - 17
+ - Geniatech OTG102
+ - 1f4d:0102
+ * - 18
+ - Kworld UB445 USB Hybrid
+ - 1b80:e421
+ * - 19
+ - Hauppauge WinTV 930C-HD (1113xx) / HVR-900H (111xxx) / PCTV QuatroStick 521e
+ - 2040:b130, 2040:b138, 2013:0259
+ * - 20
+ - Hauppauge WinTV 930C-HD (1114xx) / HVR-901H (1114xx) / PCTV QuatroStick 522e
+ - 2040:b131, 2040:b139, 2013:025e
+ * - 21
+ - Hauppauge WinTV-HVR-955Q (111401)
+ - 2040:b123, 2040:b124
+ * - 22
+ - Terratec Grabby
+ - 1f4d:0102
+ * - 23
+ - Evromedia USB Full Hybrid Full HD
+ - 1b80:d3b2
+ * - 24
+ - Astrometa T2hybrid
+ - 15f4:0135
+ * - 25
+ - The Imaging Source DFG/USB2pro
+ - 199e:8002
+ * - 26
+ - Hauppauge WinTV-HVR-935C
+ - 2040:b151
+ * - 27
+ - Hauppauge WinTV-HVR-975
+ - 2040:b150
diff --git a/Documentation/media/v4l-drivers/cx23885-cardlist.rst b/Documentation/admin-guide/media/cx23885-cardlist.rst
index 8c24df8e0423..c47514fead33 100644
--- a/Documentation/media/v4l-drivers/cx23885-cardlist.rst
+++ b/Documentation/admin-guide/media/cx23885-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
cx23885 cards list
==================
@@ -10,7 +12,7 @@ cx23885 cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- UNKNOWN/GENERIC
@@ -259,3 +261,7 @@ cx23885 cards list
* - 61
- Hauppauge WinTV-QuadHD-ATSC(885)
-
+
+ * - 62
+ - AVerMedia CE310B
+ - 1461:3100
diff --git a/Documentation/media/v4l-drivers/cx88-cardlist.rst b/Documentation/admin-guide/media/cx88-cardlist.rst
index 21648b8c2e83..76dc9a14cf91 100644
--- a/Documentation/media/v4l-drivers/cx88-cardlist.rst
+++ b/Documentation/admin-guide/media/cx88-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
CX88 cards list
===============
@@ -10,7 +12,7 @@ CX88 cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- UNKNOWN/GENERIC
@@ -375,3 +377,7 @@ CX88 cards list
* - 90
- Leadtek TV2000 XP Global (XC4100)
- 107d:6f43
+
+ * - 91
+ - NotOnlyTV LV3H
+ -
diff --git a/Documentation/admin-guide/media/cx88.rst b/Documentation/admin-guide/media/cx88.rst
new file mode 100644
index 000000000000..e4badb18199d
--- /dev/null
+++ b/Documentation/admin-guide/media/cx88.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The cx88 driver
+===============
+
+Author: Gerd Hoffmann
+
+This is a v4l2 device driver for the cx2388x chip.
+
+
+Current status
+--------------
+
+video
+ - Works.
+ - Overlay isn't supported.
+
+audio
+ - Works. The TV standard detection is made by the driver, as the
+ hardware has bugs to auto-detect.
+ - audio data dma (i.e. recording without loopback cable to the
+ sound card) is supported via cx88-alsa.
+
+vbi
+ - Works.
+
+
+How to add support for new cards
+--------------------------------
+
+The driver needs some config info for the TV cards. This stuff is in
+cx88-cards.c. If the driver doesn't work well you likely need a new
+entry for your card in that file. Check the kernel log (using dmesg)
+to see whenever the driver knows your card or not. There is a line
+like this one:
+
+.. code-block:: none
+
+ cx8800[0]: subsystem: 0070:3400, board: Hauppauge WinTV \
+ 34xxx models [card=1,autodetected]
+
+If your card is listed as "board: UNKNOWN/GENERIC" it is unknown to
+the driver. What to do then?
+
+1) Try upgrading to the latest snapshot, maybe it has been added
+ meanwhile.
+2) You can try to create a new entry yourself, have a look at
+ cx88-cards.c. If that worked, mail me your changes as unified
+ diff ("diff -u").
+3) Or you can mail me the config information. We need at least the
+ following information to add the card:
+
+ - the PCI Subsystem ID ("0070:3400" from the line above,
+ "lspci -v" output is fine too).
+ - the tuner type used by the card. You can try to find one by
+ trial-and-error using the tuner=<n> insmod option. If you
+ know which one the card has you can also have a look at the
+ list in CARDLIST.tuner
diff --git a/Documentation/admin-guide/media/dvb-drivers.rst b/Documentation/admin-guide/media/dvb-drivers.rst
new file mode 100644
index 000000000000..66fa4edd0606
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-drivers.rst
@@ -0,0 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+Digital TV driver-specific documentation
+========================================
+
+.. toctree::
+ :maxdepth: 2
+
+ avermedia
+ bt8xx
+ lmedm04
+ opera-firmware
+ technisat
+ ttusb-dec
diff --git a/Documentation/admin-guide/media/dvb-usb-a800-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-a800-cardlist.rst
new file mode 100644
index 000000000000..2ec8bb8230ff
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-a800-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-a800 cards list
+=======================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia AverTV DVB-T USB 2.0 (A800)
+ - 07ca:a800, 07ca:a801
diff --git a/Documentation/admin-guide/media/dvb-usb-af9005-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-af9005-cardlist.rst
new file mode 100644
index 000000000000..285160ee82e8
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-af9005-cardlist.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-af9005 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Afatech DVB-T USB1.1 stick
+ - 15a4:9020
+ * - Ansonic DVB-T USB1.1 stick
+ - 10b9:6000
+ * - TerraTec Cinergy T USB XE
+ - 0ccd:0055
diff --git a/Documentation/admin-guide/media/dvb-usb-af9015-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-af9015-cardlist.rst
new file mode 100644
index 000000000000..c557994f796a
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-af9015-cardlist.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-af9015 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia A309
+ - 07ca:a309
+ * - AVerMedia AVerTV DVB-T Volar X
+ - 07ca:a815
+ * - Afatech AF9015 reference design
+ - 15a4:9015, 15a4:9016
+ * - AverMedia AVerTV Red HD+ (A850T)
+ - 07ca:850b
+ * - AverMedia AVerTV Volar Black HD (A850)
+ - 07ca:850a
+ * - AverMedia AVerTV Volar GPS 805 (A805)
+ - 07ca:a805
+ * - AverMedia AVerTV Volar M (A815Mac)
+ - 07ca:815a
+ * - Conceptronic USB2.0 DVB-T CTVDIGRCU V3.0
+ - 1b80:e397
+ * - DigitalNow TinyTwin
+ - 13d3:3226
+ * - DigitalNow TinyTwin v2
+ - 1b80:e402
+ * - DigitalNow TinyTwin v3
+ - 1f4d:9016
+ * - Fujitsu-Siemens Slim Mobile USB DVB-T
+ - 07ca:8150
+ * - Genius TVGo DVB-T03
+ - 0458:4012
+ * - KWorld Digital MC-810
+ - 1b80:c810
+ * - KWorld PlusTV DVB-T PCI Pro Card (DVB-T PC160-T)
+ - 1b80:c161
+ * - KWorld PlusTV Dual DVB-T PCI (DVB-T PC160-2T)
+ - 1b80:c160
+ * - KWorld PlusTV Dual DVB-T Stick (DVB-T 399U)
+ - 1b80:e399, 1b80:e400
+ * - KWorld USB DVB-T Stick Mobile (UB383-T)
+ - 1b80:e383
+ * - KWorld USB DVB-T TV Stick II (VS-DVB-T 395U)
+ - 1b80:e396, 1b80:e39b, 1b80:e395, 1b80:e39a
+ * - Leadtek WinFast DTV Dongle Gold
+ - 0413:6029
+ * - Leadtek WinFast DTV2000DS
+ - 0413:6a04
+ * - MSI DIGIVOX Duo
+ - 1462:8801
+ * - MSI Digi VOX mini III
+ - 1462:8807
+ * - Pinnacle PCTV 71e
+ - 2304:022b
+ * - Sveon STV20 Tuner USB DVB-T HDTV
+ - 1b80:e39d
+ * - Sveon STV22 Dual USB DVB-T Tuner HDTV
+ - 1b80:e401
+ * - Telestar Starstick 2
+ - 10b9:8000
+ * - TerraTec Cinergy T Stick Dual RC
+ - 0ccd:0099
+ * - TerraTec Cinergy T Stick RC
+ - 0ccd:0097
+ * - TerraTec Cinergy T USB XE
+ - 0ccd:0069
+ * - TrekStor DVB-T USB Stick
+ - 15a4:901b
+ * - TwinHan AzureWave AD-TU700(704J)
+ - 13d3:3237
+ * - Xtensions XD-380
+ - 1ae7:0381
diff --git a/Documentation/admin-guide/media/dvb-usb-af9035-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-af9035-cardlist.rst
new file mode 100644
index 000000000000..63e4170777c4
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-af9035-cardlist.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-af9035 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia AVerTV Volar HD/PRO (A835)
+ - 07ca:a835, 07ca:b835
+ * - AVerMedia HD Volar (A867)
+ - 07ca:1867, 07ca:a867, 07ca:0337
+ * - AVerMedia TD310 DVB-T2
+ - 07ca:1871
+ * - AVerMedia Twinstar (A825)
+ - 07ca:0825
+ * - Afatech AF9035 reference design
+ - 15a4:9035, 15a4:1000, 15a4:1001, 15a4:1002, 15a4:1003
+ * - Asus U3100Mini Plus
+ - 0b05:1779
+ * - Avermedia A835B(1835)
+ - 07ca:1835
+ * - Avermedia A835B(2835)
+ - 07ca:2835
+ * - Avermedia A835B(3835)
+ - 07ca:3835
+ * - Avermedia A835B(4835)
+ - 07ca:4835
+ * - Avermedia AverTV Volar HD 2 (TD110)
+ - 07ca:a110
+ * - Avermedia H335
+ - 07ca:0335
+ * - Digital Dual TV Receiver CTVDIGDUAL_V2
+ - 1b80:e410
+ * - EVOLVEO XtraTV stick
+ - 1f4d:a115
+ * - Hauppauge WinTV-MiniStick 2
+ - 2040:f900
+ * - ITE 9135 Generic
+ - 048d:9135
+ * - ITE 9135(9005) Generic
+ - 048d:9005
+ * - ITE 9135(9006) Generic
+ - 048d:9006
+ * - ITE 9303 Generic
+ - 048d:9306
+ * - Kworld UB499-2T T09
+ - 1b80:e409
+ * - Leadtek WinFast DTV Dongle Dual
+ - 0413:6a05
+ * - Logilink VG0022A
+ - 1d19:0100
+ * - PCTV AndroiDTV (78e)
+ - 2013:025a
+ * - PCTV microStick (79e)
+ - 2013:0262
+ * - Sveon STV22 Dual DVB-T HDTV
+ - 1b80:e411
+ * - TerraTec Cinergy T Stick
+ - 0ccd:0093
+ * - TerraTec Cinergy T Stick (rev. 2)
+ - 0ccd:00aa
+ * - TerraTec Cinergy T Stick Dual RC (rev. 2)
+ - 0ccd:0099
+ * - TerraTec Cinergy TC2 Stick
+ - 0ccd:10b2
+ * - TerraTec T1
+ - 0ccd:10ae
diff --git a/Documentation/admin-guide/media/dvb-usb-anysee-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-anysee-cardlist.rst
new file mode 100644
index 000000000000..1fb5d22a00dc
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-anysee-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-anysee cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Anysee
+ - 04b4:861f, 1c73:861f
diff --git a/Documentation/admin-guide/media/dvb-usb-au6610-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-au6610-cardlist.rst
new file mode 100644
index 000000000000..02b2b742710b
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-au6610-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-au6610 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Sigmatek DVB-110
+ - 058f:6610
diff --git a/Documentation/admin-guide/media/dvb-usb-az6007-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-az6007-cardlist.rst
new file mode 100644
index 000000000000..db27eb47cc8f
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-az6007-cardlist.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-az6007 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Azurewave 6007
+ - 13d3:0ccd
+ * - Technisat CableStar Combo HD CI
+ - 14f7:0003
+ * - Terratec H7
+ - 0ccd:10b4, 0ccd:10a3
diff --git a/Documentation/admin-guide/media/dvb-usb-az6027-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-az6027-cardlist.rst
new file mode 100644
index 000000000000..6d8575e9d90c
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-az6027-cardlist.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-az6027 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AZUREWAVE DVB-S/S2 USB2.0 (AZ6027)
+ - 13d3:3275
+ * - Elgato EyeTV Sat
+ - 0fd9:002a, 0fd9:0025, 0fd9:0036
+ * - TERRATEC S7
+ - 0ccd:10a4
+ * - TERRATEC S7 MKII
+ - 0ccd:10ac
+ * - Technisat SkyStar USB 2 HD CI
+ - 14f7:0001, 14f7:0002
diff --git a/Documentation/admin-guide/media/dvb-usb-ce6230-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-ce6230-cardlist.rst
new file mode 100644
index 000000000000..09750e8ac139
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-ce6230-cardlist.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-ce6230 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia A310 USB 2.0 DVB-T tuner
+ - 07ca:a310
+ * - Intel CE9500 reference design
+ - 8086:9500
diff --git a/Documentation/admin-guide/media/dvb-usb-cinergyT2-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-cinergyT2-cardlist.rst
new file mode 100644
index 000000000000..0ee753929eca
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-cinergyT2-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-cinergyT2 cards list
+============================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - TerraTec/qanu USB2.0 Highspeed DVB-T Receiver
+ - 0ccd:0x0038
diff --git a/Documentation/admin-guide/media/dvb-usb-cxusb-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-cxusb-cardlist.rst
new file mode 100644
index 000000000000..a73f15d1acf5
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-cxusb-cardlist.rst
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-cxusb cards list
+========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia AVerTVHD Volar (A868R)
+ -
+ * - Conexant DMB-TH Stick
+ -
+ * - DViCO FusionHDTV DVB-T Dual Digital 2
+ -
+ * - DViCO FusionHDTV DVB-T Dual Digital 4
+ -
+ * - DViCO FusionHDTV DVB-T Dual Digital 4 (rev 2)
+ -
+ * - DViCO FusionHDTV DVB-T Dual USB
+ -
+ * - DViCO FusionHDTV DVB-T NANO2
+ -
+ * - DViCO FusionHDTV DVB-T USB (LGZ201)
+ -
+ * - DViCO FusionHDTV DVB-T USB (TH7579)
+ -
+ * - DViCO FusionHDTV5 USB Gold
+ -
+ * - DigitalNow DVB-T Dual USB
+ -
+ * - Medion MD95700 (MDUSBTV-HYBRID)
+ -
+ * - Mygica D689 DMB-TH
+ -
diff --git a/Documentation/admin-guide/media/dvb-usb-dib0700-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dib0700-cardlist.rst
new file mode 100644
index 000000000000..4b76b6f1089b
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dib0700-cardlist.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dib0700 cards list
+==========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - ASUS My Cinema U3000 Mini DVBT Tuner
+ - 0b05:171f
+ * - ASUS My Cinema U3100 Mini DVBT Tuner
+ - 0b05:173f
+ * - AVerMedia AVerTV DVB-T Express
+ - 07ca:b568
+ * - AVerMedia AVerTV DVB-T Volar
+ - 07ca:a807, 07ca:b808
+ * - Artec T14BR DVB-T
+ - 05d8:810f
+ * - Asus My Cinema-U3000Hybrid
+ - 0b05:1736
+ * - Compro Videomate U500
+ - 185b:1e78, 185b:1e80
+ * - DiBcom NIM7090 reference design
+ - 10b8:1bb2
+ * - DiBcom NIM8096MD reference design
+ - 10b8:1fa8
+ * - DiBcom NIM9090MD reference design
+ - 10b8:2384
+ * - DiBcom STK7070P reference design
+ - 10b8:1ebc
+ * - DiBcom STK7070PD reference design
+ - 10b8:1ebe
+ * - DiBcom STK7700D reference design
+ - 10b8:1ef0
+ * - DiBcom STK7700P reference design
+ - 10b8:1e14, 10b8:1e78
+ * - DiBcom STK7770P reference design
+ - 10b8:1e80
+ * - DiBcom STK807xP reference design
+ - 10b8:1f90
+ * - DiBcom STK807xPVR reference design
+ - 10b8:1f98
+ * - DiBcom STK8096-PVR reference design
+ - 2013:1faa, 10b8:1faa
+ * - DiBcom STK8096GP reference design
+ - 10b8:1fa0
+ * - DiBcom STK9090M reference design
+ - 10b8:2383
+ * - DiBcom TFE7090PVR reference design
+ - 10b8:1bb4
+ * - DiBcom TFE7790P reference design
+ - 10b8:1e6e
+ * - DiBcom TFE8096P reference design
+ - 10b8:1f9C
+ * - Elgato EyeTV DTT
+ - 0fd9:0021
+ * - Elgato EyeTV DTT rev. 2
+ - 0fd9:003f
+ * - Elgato EyeTV Diversity
+ - 0fd9:0011
+ * - Elgato EyeTV Dtt Dlx PD378S
+ - 0fd9:0020
+ * - EvolutePC TVWay+
+ - 1e59:0002
+ * - Gigabyte U7000
+ - 1044:7001
+ * - Gigabyte U8000-RH
+ - 1044:7002
+ * - Hama DVB=T Hybrid USB Stick
+ - 147f:2758
+ * - Hauppauge ATSC MiniCard (B200)
+ - 2040:b200
+ * - Hauppauge ATSC MiniCard (B210)
+ - 2040:b210
+ * - Hauppauge Nova-T 500 Dual DVB-T
+ - 2040:9941, 2040:9950
+ * - Hauppauge Nova-T MyTV.t
+ - 2040:7080
+ * - Hauppauge Nova-T Stick
+ - 2040:7050, 2040:7060, 2040:7070
+ * - Hauppauge Nova-TD Stick (52009)
+ - 2040:5200
+ * - Hauppauge Nova-TD Stick/Elgato Eye-TV Diversity
+ - 2040:9580
+ * - Hauppauge Nova-TD-500 (84xxx)
+ - 2040:8400
+ * - Leadtek WinFast DTV Dongle H
+ - 0413:60f6
+ * - Leadtek Winfast DTV Dongle (STK7700P based)
+ - 0413:6f00, 0413:6f01
+ * - Medion CTX1921 DVB-T USB
+ - 1660:1921
+ * - Microsoft Xbox One Digital TV Tuner
+ - 045e:02d5
+ * - PCTV 2002e
+ - 2013:025c
+ * - PCTV 2002e SE
+ - 2013:025d
+ * - Pinnacle Expresscard 320cx
+ - 2304:022e
+ * - Pinnacle PCTV 2000e
+ - 2304:022c
+ * - Pinnacle PCTV 282e
+ - 2013:0248, 2304:0248
+ * - Pinnacle PCTV 340e HD Pro USB Stick
+ - 2304:023d
+ * - Pinnacle PCTV 72e
+ - 2304:0236
+ * - Pinnacle PCTV 73A
+ - 2304:0243
+ * - Pinnacle PCTV 73e
+ - 2304:0237
+ * - Pinnacle PCTV 73e SE
+ - 2013:0245, 2304:0245
+ * - Pinnacle PCTV DVB-T Flash Stick
+ - 2304:0228
+ * - Pinnacle PCTV Dual DVB-T Diversity Stick
+ - 2304:0229
+ * - Pinnacle PCTV HD Pro USB Stick
+ - 2304:023a
+ * - Pinnacle PCTV HD USB Stick
+ - 2304:023b
+ * - Pinnacle PCTV Hybrid Stick Solo
+ - 2304:023e
+ * - Prolink Pixelview SBTVD
+ - 1554:5010
+ * - Sony PlayTV
+ - 1415:0003
+ * - TechniSat AirStar TeleStick 2
+ - 14f7:0004
+ * - Terratec Cinergy DT USB XS Diversity/ T5
+ - 0ccd:0081, 0ccd:10a1
+ * - Terratec Cinergy DT XS Diversity
+ - 0ccd:005a
+ * - Terratec Cinergy HT Express
+ - 0ccd:0060
+ * - Terratec Cinergy HT USB XE
+ - 0ccd:0058
+ * - Terratec Cinergy T Express
+ - 0ccd:0062
+ * - Terratec Cinergy T USB XXS (HD)/ T3
+ - 0ccd:0078, 0ccd:10a0, 0ccd:00ab
+ * - Uniwill STK7700P based (Hama and others)
+ - 1584:6003
+ * - YUAN High-Tech DiBcom STK7700D
+ - 1164:1e8c
+ * - YUAN High-Tech MC770
+ - 1164:0871
+ * - YUAN High-Tech STK7700D
+ - 1164:1efc
+ * - YUAN High-Tech STK7700PH
+ - 1164:1f08
+ * - Yuan EC372S
+ - 1164:1edc
+ * - Yuan PD378S
+ - 1164:2edc
diff --git a/Documentation/admin-guide/media/dvb-usb-dibusb-mb-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dibusb-mb-cardlist.rst
new file mode 100644
index 000000000000..f25a54721f0d
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dibusb-mb-cardlist.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dibusb-mb cards list
+============================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AVerMedia AverTV DVBT USB1.1
+ - 14aa:0001, 14aa:0002
+ * - Artec T1 USB1.1 TVBOX with AN2135
+ - 05d8:8105, 05d8:8106
+ * - Artec T1 USB1.1 TVBOX with AN2235
+ - 05d8:8107, 05d8:8108
+ * - Artec T1 USB1.1 TVBOX with AN2235 (faulty USB IDs)
+ - 0547:2235
+ * - Artec T1 USB2.0
+ - 05d8:8109, 05d8:810a
+ * - Compro Videomate DVB-U2000 - DVB-T USB1.1 (please confirm to linux-dvb)
+ - 185b:d000, 145f:010c, 185b:d001
+ * - DiBcom USB1.1 DVB-T reference design (MOD3000)
+ - 10b8:0bb8, 10b8:0bb9
+ * - Grandtec USB1.1 DVB-T
+ - 5032:0fa0, 5032:0bb8, 5032:0fa1, 5032:0bb9
+ * - KWorld V-Stream XPERT DTV - DVB-T USB1.1
+ - eb1a:17de, eb1a:17df
+ * - KWorld Xpert DVB-T USB2.0
+ - eb2a:17de
+ * - KWorld/ADSTech Instant DVB-T USB2.0
+ - 06e1:a333, 06e1:a334
+ * - TwinhanDTV USB-Ter USB1.1 / Magic Box I / HAMA USB1.1 DVB-T device
+ - 13d3:3201, 1822:3201, 13d3:3202, 1822:3202
+ * - Unknown USB1.1 DVB-T device ???? please report the name to the author
+ - 1025:005e, 1025:005f
+ * - VideoWalker DVB-T USB
+ - 0458:701e, 0458:701f
diff --git a/Documentation/admin-guide/media/dvb-usb-dibusb-mc-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dibusb-mc-cardlist.rst
new file mode 100644
index 000000000000..8d03bae0e084
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dibusb-mc-cardlist.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dibusb-mc cards list
+============================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Artec T1 USB2.0 TVBOX (please check the warm ID)
+ - 05d8:8109, 05d8:810a
+ * - Artec T14 - USB2.0 DVB-T
+ - 05d8:810b, 05d8:810c
+ * - DiBcom USB2.0 DVB-T reference design (MOD3000P)
+ - 10b8:0bc6, 10b8:0bc7
+ * - GRAND - USB2.0 DVB-T adapter
+ - 5032:0bc6, 5032:0bc7
+ * - Humax/Coex DVB-T USB Stick 2.0 High Speed
+ - 10b9:5000, 10b9:5001
+ * - LITE-ON USB2.0 DVB-T Tuner
+ - 04ca:f000, 04ca:f001
+ * - Leadtek - USB2.0 Winfast DTV dongle
+ - 0413:6025, 0413:6026
+ * - MSI Digivox Mini SL
+ - eb1a:e360, eb1a:e361
diff --git a/Documentation/admin-guide/media/dvb-usb-digitv-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-digitv-cardlist.rst
new file mode 100644
index 000000000000..2b4d8325e8e9
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-digitv-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-digitv cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Nebula Electronics uDigiTV DVB-T USB2.0)
+ - 0547:0201
diff --git a/Documentation/admin-guide/media/dvb-usb-dtt200u-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dtt200u-cardlist.rst
new file mode 100644
index 000000000000..b4150a7bf31f
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dtt200u-cardlist.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dtt200u cards list
+==========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - WideView WT-220U PenType Receiver (Miglia)
+ - 18f3:0220
+ * - WideView WT-220U PenType Receiver (Typhoon/Freecom)
+ - 14aa:0222, 14aa:0220, 14aa:0221, 14aa:0225, 14aa:0226
+ * - WideView WT-220U PenType Receiver (based on ZL353)
+ - 14aa:022a, 14aa:022b
+ * - WideView/Yuan/Yakumo/Hama/Typhoon DVB-T USB2.0 (WT-200U)
+ - 14aa:0201, 14aa:0301
diff --git a/Documentation/admin-guide/media/dvb-usb-dtv5100-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dtv5100-cardlist.rst
new file mode 100644
index 000000000000..91d6e35e6f9d
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dtv5100-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dtv5100 cards list
+==========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - AME DTV-5100 USB2.0 DVB-T
+ - 0x06be:0xa232
diff --git a/Documentation/admin-guide/media/dvb-usb-dvbsky-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dvbsky-cardlist.rst
new file mode 100644
index 000000000000..9f7b619f35f7
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dvbsky-cardlist.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dvbsky cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - DVBSky S960/S860
+ - 0572:6831
+ * - DVBSky S960CI
+ - 0572:960c
+ * - DVBSky T330
+ - 0572:0320
+ * - DVBSky T680CI
+ - 0572:680c
+ * - MyGica Mini DVB-(T/T2/C) USB Stick T230
+ - 0572:c688
+ * - MyGica Mini DVB-(T/T2/C) USB Stick T230C
+ - 0572:c689
+ * - MyGica Mini DVB-(T/T2/C) USB Stick T230C Lite
+ - 0572:c699
+ * - MyGica Mini DVB-(T/T2/C) USB Stick T230C v2
+ - 0572:c68a
+ * - TechnoTrend TT-connect CT2-4650 CI
+ - 0b48:3012
+ * - TechnoTrend TT-connect CT2-4650 CI v1.1
+ - 0b48:3015
+ * - TechnoTrend TT-connect S2-4650 CI
+ - 0b48:3017
+ * - TechnoTrend TVStick CT2-4400
+ - 0b48:3014
+ * - Terratec Cinergy S2 Rev.4
+ - 0ccd:0105
+ * - Terratec H7 Rev.4
+ - 0ccd:10a5
diff --git a/Documentation/admin-guide/media/dvb-usb-dw2102-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-dw2102-cardlist.rst
new file mode 100644
index 000000000000..e39bc8e4bffe
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-dw2102-cardlist.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-dw2102 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - DVBWorld DVB-C 3101 USB2.0
+ - 04b4:3101
+ * - DVBWorld DVB-S 2101 USB2.0
+ - 04b4:0x2101
+ * - DVBWorld DVB-S 2102 USB2.0
+ - 04b4:2102
+ * - DVBWorld DW2104 USB2.0
+ - 04b4:2104
+ * - GOTVIEW Satellite HD
+ - 0x1FE1:5456
+ * - Geniatech T220 DVB-T/T2 USB2.0
+ - 0x1f4d:0xD220
+ * - SU3000HD DVB-S USB2.0
+ - 0x1f4d:0x3000
+ * - TeVii S482 (tuner 1)
+ - 0x9022:0xd483
+ * - TeVii S482 (tuner 2)
+ - 0x9022:0xd484
+ * - TeVii S630 USB
+ - 0x9022:d630
+ * - TeVii S650 USB2.0
+ - 0x9022:d650
+ * - TeVii S662
+ - 0x9022:d662
+ * - TechnoTrend TT-connect S2-4600
+ - 0b48:3011
+ * - TerraTec Cinergy S USB
+ - 0ccd:0064
+ * - Terratec Cinergy S2 PCIe Dual Port 1
+ - 153b:1181
+ * - Terratec Cinergy S2 PCIe Dual Port 2
+ - 153b:1182
+ * - Terratec Cinergy S2 USB BOX
+ - 0ccd:0x0105
+ * - Terratec Cinergy S2 USB HD
+ - 0ccd:00a8
+ * - Terratec Cinergy S2 USB HD Rev.2
+ - 0ccd:00b0
+ * - Terratec Cinergy S2 USB HD Rev.3
+ - 0ccd:0102
+ * - X3M TV SPC1400HD PCI
+ - 0x1f4d:0x3100
diff --git a/Documentation/admin-guide/media/dvb-usb-ec168-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-ec168-cardlist.rst
new file mode 100644
index 000000000000..a3660dfa5dcc
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-ec168-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-ec168 cards list
+========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - E3C EC168 reference design
+ - 18b4:1689, 18b4:fffa, 18b4:fffb, 18b4:1001, 18b4:1002
diff --git a/Documentation/admin-guide/media/dvb-usb-gl861-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-gl861-cardlist.rst
new file mode 100644
index 000000000000..5ec62fe03d64
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-gl861-cardlist.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-gl861 cards list
+========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - 774 Friio White ISDB-T USB2.0
+ - 7a69:0001
+ * - A-LINK DTU DVB-T USB2.0
+ - 05e3:f170
+ * - MSI Mega Sky 55801 DVB-T USB2.0
+ - 0db0:5581
diff --git a/Documentation/admin-guide/media/dvb-usb-gp8psk-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-gp8psk-cardlist.rst
new file mode 100644
index 000000000000..150fa9f7810a
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-gp8psk-cardlist.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-gp8psk cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Genpix 8PSK-to-USB2 Rev.1 DVB-S receiver
+ - 09c0:0200, 09c0:0201
+ * - Genpix 8PSK-to-USB2 Rev.2 DVB-S receiver
+ - 09c0:0202
+ * - Genpix SkyWalker-1 DVB-S receiver
+ - 09c0:0203
+ * - Genpix SkyWalker-2 DVB-S receiver
+ - 09c0:0206
diff --git a/Documentation/admin-guide/media/dvb-usb-lmedm04-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-lmedm04-cardlist.rst
new file mode 100644
index 000000000000..2050fbf03d4a
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-lmedm04-cardlist.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-lmedm04 cards list
+==========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - DM04_LME2510C_DVB-S
+ - 3344:1120
+ * - DM04_LME2510C_DVB-S RS2000
+ - 3344:22f0
+ * - DM04_LME2510_DVB-S
+ - 3344:1122
diff --git a/Documentation/admin-guide/media/dvb-usb-m920x-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-m920x-cardlist.rst
new file mode 100644
index 000000000000..73145940b5c5
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-m920x-cardlist.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-m920x cards list
+========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - DTV-DVB UDTT7049
+ - 13d3:3219
+ * - Dposh DVB-T USB2.0
+ - 1498:9206, 1498:a090
+ * - LifeView TV Walker Twin DVB-T USB2.0
+ - 10fd:0514, 10fd:0513
+ * - MSI DIGI VOX mini II DVB-T USB2.0
+ - 10fd:1513
+ * - MSI Mega Sky 580 DVB-T USB2.0
+ - 0db0:5580
+ * - Pinnacle PCTV 310e
+ - 13d3:3211
diff --git a/Documentation/admin-guide/media/dvb-usb-mxl111sf-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-mxl111sf-cardlist.rst
new file mode 100644
index 000000000000..6974801c43b6
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-mxl111sf-cardlist.rst
@@ -0,0 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-mxl111sf cards list
+===========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - HCW 117xxx
+ - 2040:b702
+ * - HCW 126xxx
+ - 2040:c602, 2040:c60a
+ * - Hauppauge 117xxx ATSC+
+ - 2040:b700, 2040:b703, 2040:b753, 2040:b763, 2040:b757, 2040:b767
+ * - Hauppauge 117xxx DVBT
+ - 2040:b704, 2040:b764
+ * - Hauppauge 126xxx
+ - 2040:c612, 2040:c61a
+ * - Hauppauge 126xxx ATSC
+ - 2040:c601, 2040:c609, 2040:b701
+ * - Hauppauge 126xxx ATSC+
+ - 2040:c600, 2040:c603, 2040:c60b, 2040:c653, 2040:c65b
+ * - Hauppauge 126xxx DVBT
+ - 2040:c604, 2040:c60c
+ * - Hauppauge 138xxx DVBT
+ - 2040:d854, 2040:d864, 2040:d8d4, 2040:d8e4
+ * - Hauppauge Mercury
+ - 2040:d853, 2040:d863, 2040:d8d3, 2040:d8e3, 2040:d8ff
+ * - Hauppauge WinTV-Aero-M
+ - 2040:c613, 2040:c61b
diff --git a/Documentation/admin-guide/media/dvb-usb-nova-t-usb2-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-nova-t-usb2-cardlist.rst
new file mode 100644
index 000000000000..e295f912a585
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-nova-t-usb2-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-nova-t-usb2 cards list
+==============================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Hauppauge WinTV-NOVA-T usb2
+ - 2040:9300, 2040:9301
diff --git a/Documentation/admin-guide/media/dvb-usb-opera1-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-opera1-cardlist.rst
new file mode 100644
index 000000000000..362245f5a46a
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-opera1-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-opera1 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Opera1 DVB-S USB2.0
+ - 04b4:2830, 695c:3829
diff --git a/Documentation/admin-guide/media/dvb-usb-pctv452e-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-pctv452e-cardlist.rst
new file mode 100644
index 000000000000..886d8cc18acb
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-pctv452e-cardlist.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-pctv452e cards list
+===========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - PCTV HDTV USB
+ - 2304:021f
+ * - Technotrend TT Connect S2-3600
+ - 0b48:3007
+ * - Technotrend TT Connect S2-3650-CI
+ - 0b48:300a
diff --git a/Documentation/admin-guide/media/dvb-usb-rtl28xxu-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-rtl28xxu-cardlist.rst
new file mode 100644
index 000000000000..9f4295331a15
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-rtl28xxu-cardlist.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-rtl28xxu cards list
+===========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - ASUS My Cinema-U3100Mini Plus V2
+ - 1b80:d3a8
+ * - Astrometa DVB-T2
+ - 15f4:0131
+ * - Compro VideoMate U620F
+ - 185b:0620
+ * - Compro VideoMate U650F
+ - 185b:0650
+ * - Crypto ReDi PC 50 A
+ - 1f4d:a803
+ * - Dexatek DK DVB-T Dongle
+ - 1d19:1101
+ * - Dexatek DK mini DVB-T Dongle
+ - 1d19:1102
+ * - DigitalNow Quad DVB-T Receiver
+ - 0413:6680
+ * - Freecom USB2.0 DVB-T
+ - 14aa:0160, 14aa:0161
+ * - G-Tek Electronics Group Lifeview LV5TDLX DVB-T
+ - 1f4d:b803
+ * - GIGABYTE U7300
+ - 1b80:d393
+ * - Genius TVGo DVB-T03
+ - 0458:707f
+ * - GoTView MasterHD 3
+ - 5654:ca42
+ * - Leadtek WinFast DTV Dongle mini
+ - 0413:6a03
+ * - Leadtek WinFast DTV2000DS Plus
+ - 0413:6f12
+ * - Leadtek Winfast DTV Dongle Mini D
+ - 0413:6f0f
+ * - MSI DIGIVOX Micro HD
+ - 1d19:1104
+ * - MaxMedia HU394-T
+ - 1b80:d394
+ * - PROlectrix DV107669
+ - 1f4d:d803
+ * - Peak DVB-T USB
+ - 1b80:d395
+ * - Realtek RTL2831U reference design
+ - 0bda:2831
+ * - Realtek RTL2832U reference design
+ - 0bda:2832, 0bda:2838
+ * - Sveon STV20
+ - 1b80:d39d
+ * - Sveon STV21
+ - 1b80:d3b0
+ * - Sveon STV27
+ - 1b80:d3af
+ * - TURBO-X Pure TV Tuner DTT-2000
+ - 1b80:d3a4
+ * - TerraTec Cinergy T Stick Black
+ - 0ccd:00a9
+ * - TerraTec Cinergy T Stick RC (Rev. 3)
+ - 0ccd:00d3
+ * - TerraTec Cinergy T Stick+
+ - 0ccd:00d7
+ * - TerraTec NOXON DAB Stick
+ - 0ccd:00b3
+ * - TerraTec NOXON DAB Stick (rev 2)
+ - 0ccd:00e0
+ * - TerraTec NOXON DAB Stick (rev 3)
+ - 0ccd:00b4
+ * - Trekstor DVB-T Stick Terres 2.0
+ - 1f4d:C803
diff --git a/Documentation/admin-guide/media/dvb-usb-technisat-usb2-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-technisat-usb2-cardlist.rst
new file mode 100644
index 000000000000..30ee92ada134
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-technisat-usb2-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-technisat-usb2 cards list
+=================================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Technisat SkyStar USB HD (DVB-S/S2)
+ - 14f7:0500
diff --git a/Documentation/admin-guide/media/dvb-usb-ttusb2-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-ttusb2-cardlist.rst
new file mode 100644
index 000000000000..faa78e5f3f5d
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-ttusb2-cardlist.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-ttusb2 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Pinnacle 400e DVB-S USB2.0
+ - 2304:020f
+ * - Pinnacle 450e DVB-S USB2.0
+ - 2304:0222
+ * - Technotrend TT-connect CT-3650
+ - 0b48:300d
+ * - Technotrend TT-connect S-2400
+ - 0b48:3006
+ * - Technotrend TT-connect S-2400 (8kB EEPROM)
+ - 0b48:3009
diff --git a/Documentation/admin-guide/media/dvb-usb-umt-010-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-umt-010-cardlist.rst
new file mode 100644
index 000000000000..ce7ce901b5ac
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-umt-010-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-umt-010 cards list
+==========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Hanftek UMT-010 DVB-T USB2.0
+ - 15f4:0001, 15f4:0015
diff --git a/Documentation/admin-guide/media/dvb-usb-vp702x-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-vp702x-cardlist.rst
new file mode 100644
index 000000000000..101442434268
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-vp702x-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-vp702x cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - TwinhanDTV StarBox DVB-S USB2.0 (VP7021)
+ - 13d3:3207
diff --git a/Documentation/admin-guide/media/dvb-usb-vp7045-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-vp7045-cardlist.rst
new file mode 100644
index 000000000000..2fc8fc4ecc32
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-vp7045-cardlist.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-vp7045 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - DigitalNow TinyUSB 2 DVB-t Receiver
+ - 13d3:3223, 13d3:3224
+ * - Twinhan USB2.0 DVB-T receiver (TwinhanDTV Alpha/MagicBox II)
+ - 13d3:3205, 13d3:3206
diff --git a/Documentation/admin-guide/media/dvb-usb-zd1301-cardlist.rst b/Documentation/admin-guide/media/dvb-usb-zd1301-cardlist.rst
new file mode 100644
index 000000000000..9ca446184753
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb-usb-zd1301-cardlist.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+dvb-usb-zd1301 cards list
+=========================
+
+.. tabularcolumns:: |p{7.0cm}|p{10.5cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 7 13
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - ZyDAS ZD1301 reference design
+ - 0ace:13a1
diff --git a/Documentation/admin-guide/media/dvb.rst b/Documentation/admin-guide/media/dvb.rst
new file mode 100644
index 000000000000..e5258bfa5cd9
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Digital TV
+==========
+
+.. toctree::
+
+ dvb_intro
+ ci
+ faq
+ dvb_references
diff --git a/Documentation/admin-guide/media/dvb_intro.rst b/Documentation/admin-guide/media/dvb_intro.rst
new file mode 100644
index 000000000000..44eac9b3be6c
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb_intro.rst
@@ -0,0 +1,616 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Using the Digital TV Framework
+==============================
+
+Introduction
+~~~~~~~~~~~~
+
+One significant difference between Digital TV and Analogue TV that the
+unwary (like myself) should consider is that, although the component
+structure of DVB-T cards are substantially similar to Analogue TV cards,
+they function in substantially different ways.
+
+The purpose of an Analogue TV is to receive and display an Analogue
+Television signal. An Analogue TV signal (otherwise known as composite
+video) is an analogue encoding of a sequence of image frames (25 frames
+per second in Europe) rasterised using an interlacing technique.
+Interlacing takes two fields to represent one frame. Therefore, an
+Analogue TV card for a PC has the following purpose:
+
+* Tune the receiver to receive a broadcast signal
+* demodulate the broadcast signal
+* demultiplex the analogue video signal and analogue audio
+ signal.
+
+ .. note::
+
+ some countries employ a digital audio signal
+ embedded within the modulated composite analogue signal -
+ using NICAM signaling.)
+
+* digitize the analogue video signal and make the resulting datastream
+ available to the data bus.
+
+The digital datastream from an Analogue TV card is generated by
+circuitry on the card and is often presented uncompressed. For a PAL TV
+signal encoded at a resolution of 768x576 24-bit color pixels over 25
+frames per second - a fair amount of data is generated and must be
+processed by the PC before it can be displayed on the video monitor
+screen. Some Analogue TV cards for PCs have onboard MPEG2 encoders which
+permit the raw digital data stream to be presented to the PC in an
+encoded and compressed form - similar to the form that is used in
+Digital TV.
+
+The purpose of a simple budget digital TV card (DVB-T,C or S) is to
+simply:
+
+* Tune the received to receive a broadcast signal. * Extract the encoded
+ digital datastream from the broadcast signal.
+* Make the encoded digital datastream (MPEG2) available to the data bus.
+
+The significant difference between the two is that the tuner on the
+analogue TV card spits out an Analogue signal, whereas the tuner on the
+digital TV card spits out a compressed encoded digital datastream. As
+the signal is already digitised, it is trivial to pass this datastream
+to the PC databus with minimal additional processing and then extract
+the digital video and audio datastreams passing them to the appropriate
+software or hardware for decoding and viewing.
+
+Getting the card going
+~~~~~~~~~~~~~~~~~~~~~~
+
+The Device Driver API for DVB under Linux will the following
+device nodes via the devfs filesystem:
+
+* /dev/dvb/adapter0/demux0
+* /dev/dvb/adapter0/dvr0
+* /dev/dvb/adapter0/frontend0
+
+The ``/dev/dvb/adapter0/dvr0`` device node is used to read the MPEG2
+Data Stream and the ``/dev/dvb/adapter0/frontend0`` device node is used
+to tune the frontend tuner module. The ``/dev/dvb/adapter0/demux0`` is
+used to control what programs will be received.
+
+Depending on the card's feature set, the Device Driver API could also
+expose other device nodes:
+
+* /dev/dvb/adapter0/ca0
+* /dev/dvb/adapter0/audio0
+* /dev/dvb/adapter0/net0
+* /dev/dvb/adapter0/osd0
+* /dev/dvb/adapter0/video0
+
+The ``/dev/dvb/adapter0/ca0`` is used to decode encrypted channels. The
+other device nodes are found only on devices that use the av7110
+driver, with is now obsoleted, together with the extra API whose such
+devices use.
+
+Receiving a digital TV channel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section attempts to explain how it works and how this affects the
+configuration of a Digital TV card.
+
+On this example, we're considering tuning into DVB-T channels in
+Australia, at the Melbourne region.
+
+The frequencies broadcast by Mount Dandenong transmitters are,
+currently:
+
+Table 1. Transponder Frequencies Mount Dandenong, Vic, Aus.
+
+=========== ===========
+Broadcaster Frequency
+=========== ===========
+Seven 177.500 Mhz
+SBS 184.500 Mhz
+Nine 191.625 Mhz
+Ten 219.500 Mhz
+ABC 226.500 Mhz
+Channel 31 557.625 Mhz
+=========== ===========
+
+The digital TV Scan utilities (like dvbv5-scan) have use a set of
+compiled-in defaults for various countries and regions. Those are
+currently provided as a separate package, called dtv-scan-tables. It's
+git tree is located at LinuxTV.org:
+
+ https://git.linuxtv.org/dtv-scan-tables.git/
+
+If none of the tables there suit, you can specify a data file on the
+command line which contains the transponder frequencies. Here is a
+sample file for the above channel transponders, in the old "channel"
+format::
+
+ # Data file for DVB scan program
+ #
+ # C Frequency SymbolRate FEC QAM
+ # S Frequency Polarisation SymbolRate FEC
+ # T Frequency Bandwidth FEC FEC2 QAM Mode Guard Hier
+
+ T 177500000 7MHz AUTO AUTO QAM64 8k 1/16 NONE
+ T 184500000 7MHz AUTO AUTO QAM64 8k 1/8 NONE
+ T 191625000 7MHz AUTO AUTO QAM64 8k 1/16 NONE
+ T 219500000 7MHz AUTO AUTO QAM64 8k 1/16 NONE
+ T 226500000 7MHz AUTO AUTO QAM64 8k 1/16 NONE
+ T 557625000 7MHz AUTO AUTO QPSK 8k 1/16 NONE
+
+Nowadays, we prefer to use a newer format, with is more verbose and easier
+to understand. With the new format, the "Seven" channel transponder's
+data is represented by::
+
+ [Seven]
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = AUTO
+ CODE_RATE_LP = AUTO
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+ INVERSION = AUTO
+
+For an updated version of the complete table, please see:
+
+ https://git.linuxtv.org/dtv-scan-tables.git/tree/dvb-t/au-Melbourne
+
+When the Digital TV scanning utility runs, it will output a file
+containing the information for all the audio and video programs that
+exists into each channel's transponders which the card's frontend can
+lock onto. (i.e. any whose signal is strong enough at your antenna).
+
+Here's the output of the dvbv5 tools from a channel scan took from
+Melburne::
+
+ [ABC HDTV]
+ SERVICE_ID = 560
+ VIDEO_PID = 2307
+ AUDIO_PID = 0
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [ABC TV Melbourne]
+ SERVICE_ID = 561
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [ABC TV 2]
+ SERVICE_ID = 562
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [ABC TV 3]
+ SERVICE_ID = 563
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [ABC TV 4]
+ SERVICE_ID = 564
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [ABC DiG Radio]
+ SERVICE_ID = 566
+ VIDEO_PID = 0
+ AUDIO_PID = 2311
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 226500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 3/4
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital]
+ SERVICE_ID = 1585
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital 1]
+ SERVICE_ID = 1586
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital 2]
+ SERVICE_ID = 1587
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital 3]
+ SERVICE_ID = 1588
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital]
+ SERVICE_ID = 1589
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital 4]
+ SERVICE_ID = 1590
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital]
+ SERVICE_ID = 1591
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN HD]
+ SERVICE_ID = 1592
+ VIDEO_PID = 514
+ AUDIO_PID = 0
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [TEN Digital]
+ SERVICE_ID = 1593
+ VIDEO_PID = 512
+ AUDIO_PID = 650
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 219500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [Nine Digital]
+ SERVICE_ID = 1072
+ VIDEO_PID = 513
+ AUDIO_PID = 660
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 191625000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [Nine Digital HD]
+ SERVICE_ID = 1073
+ VIDEO_PID = 512
+ AUDIO_PID = 0
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 191625000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [Nine Guide]
+ SERVICE_ID = 1074
+ VIDEO_PID = 514
+ AUDIO_PID = 670
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 191625000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 3/4
+ CODE_RATE_LP = 1/2
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/16
+ HIERARCHY = NONE
+
+ [7 Digital]
+ SERVICE_ID = 1328
+ VIDEO_PID = 769
+ AUDIO_PID = 770
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [7 Digital 1]
+ SERVICE_ID = 1329
+ VIDEO_PID = 769
+ AUDIO_PID = 770
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [7 Digital 2]
+ SERVICE_ID = 1330
+ VIDEO_PID = 769
+ AUDIO_PID = 770
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [7 Digital 3]
+ SERVICE_ID = 1331
+ VIDEO_PID = 769
+ AUDIO_PID = 770
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [7 HD Digital]
+ SERVICE_ID = 1332
+ VIDEO_PID = 833
+ AUDIO_PID = 834
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [7 Program Guide]
+ SERVICE_ID = 1334
+ VIDEO_PID = 865
+ AUDIO_PID = 866
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 177500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS HD]
+ SERVICE_ID = 784
+ VIDEO_PID = 102
+ AUDIO_PID = 103
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS DIGITAL 1]
+ SERVICE_ID = 785
+ VIDEO_PID = 161
+ AUDIO_PID = 81
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS DIGITAL 2]
+ SERVICE_ID = 786
+ VIDEO_PID = 162
+ AUDIO_PID = 83
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS EPG]
+ SERVICE_ID = 787
+ VIDEO_PID = 163
+ AUDIO_PID = 85
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS RADIO 1]
+ SERVICE_ID = 798
+ VIDEO_PID = 0
+ AUDIO_PID = 201
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
+
+ [SBS RADIO 2]
+ SERVICE_ID = 799
+ VIDEO_PID = 0
+ AUDIO_PID = 202
+ DELIVERY_SYSTEM = DVBT
+ FREQUENCY = 536500000
+ INVERSION = OFF
+ BANDWIDTH_HZ = 7000000
+ CODE_RATE_HP = 2/3
+ CODE_RATE_LP = 2/3
+ MODULATION = QAM/64
+ TRANSMISSION_MODE = 8K
+ GUARD_INTERVAL = 1/8
+ HIERARCHY = NONE
diff --git a/Documentation/admin-guide/media/dvb_references.rst b/Documentation/admin-guide/media/dvb_references.rst
new file mode 100644
index 000000000000..4f0fd4259cfa
--- /dev/null
+++ b/Documentation/admin-guide/media/dvb_references.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+References
+==========
+
+The main development site and GIT repository for Digital TV
+drivers is https://linuxtv.org.
+
+The DVB mailing list linux-dvb is hosted at vger. Please see
+http://vger.kernel.org/vger-lists.html#linux-media for details.
+
+There are also some other old lists hosted at:
+https://linuxtv.org/lists.php. If you're interested on that for historic
+reasons, please check the archive at https://linuxtv.org/pipermail/linux-dvb/.
+
+The media subsystem Wiki is hosted at https://linuxtv.org/wiki/.
+There, you'll find lots of information, from both development and usage
+of media boards. Please check it before asking newbie questions on the
+mailing list or IRC channels.
+
+The API documentation is documented at the Kernel tree. You can find it
+in both html and pdf formats, together with other useful documentation at:
+
+ - https://linuxtv.org/docs.php.
+
+You may also find useful material at https://linuxtv.org/downloads/.
+
+In order to get the needed firmware for some drivers to work, there's
+a script at the kernel tree, at scripts/get_dvb_firmware.
diff --git a/Documentation/media/v4l-drivers/em28xx-cardlist.rst b/Documentation/admin-guide/media/em28xx-cardlist.rst
index dfe882ca945f..7dac07986d91 100644
--- a/Documentation/media/v4l-drivers/em28xx-cardlist.rst
+++ b/Documentation/admin-guide/media/em28xx-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
EM28xx cards list
=================
@@ -233,7 +235,7 @@ EM28xx cards list
- em2882
- eb1a:e323
* - 55
- - Terratec Cinnergy Hybrid T USB XS (em2882)
+ - Terratec Cinergy Hybrid T USB XS (em2882)
- em2882
- 0ccd:005e, 0ccd:0042
* - 56
@@ -391,7 +393,7 @@ EM28xx cards list
* - 94
- PCTV tripleStick (292e)
- em28178
- - 2013:025f, 2013:0264, 2040:0264, 2040:8264, 2040:8268, 2040:8268
+ - 2013:025f, 2013:0264, 2040:0264, 2040:8264, 2040:8268
* - 95
- Leadtek VC100
- em2861
@@ -424,3 +426,23 @@ EM28xx cards list
- :ZOLID HYBRID TV STICK
- em2882
-
+ * - 103
+ - Magix USB Videowandler-2
+ - em2861
+ - 1b80:e349
+ * - 104
+ - PCTV DVB-S2 Stick (461e v2)
+ - em28178
+ - 2013:0461, 2013:0259
+ * - 105
+ - MyGica iGrabber
+ - em2860
+ - 1f4d:1abe
+ * - 106
+ - Hauppauge USB QuadHD ATSC
+ - em28274
+ - 2040:846d
+ * - 107
+ - MyGica UTV3 Analog USB2.0 TV Box
+ - em2860
+ - eb1a:2860
diff --git a/Documentation/admin-guide/media/faq.rst b/Documentation/admin-guide/media/faq.rst
new file mode 100644
index 000000000000..b63548b6f313
--- /dev/null
+++ b/Documentation/admin-guide/media/faq.rst
@@ -0,0 +1,216 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+FAQ
+===
+
+.. note::
+
+ 1. With Digital TV, a single physical channel may have different
+ contents inside it. The specs call each one as a *service*.
+ This is what a TV user would call "channel". So, in order to
+ avoid confusion, we're calling *transponders* as the physical
+ channel on this FAQ, and *services* for the logical channel.
+ 2. The LinuxTV community maintains some Wiki pages with contain
+ a lot of information related to the media subsystem. If you
+ don't find an answer for your needs here, it is likely that
+ you'll be able to get something useful there. It is hosted
+ at:
+
+ https://www.linuxtv.org/wiki/
+
+Some very frequently asked questions about Linux Digital TV support
+
+1. The signal seems to die a few seconds after tuning.
+
+ It's not a bug, it's a feature. Because the frontends have
+ significant power requirements (and hence get very hot), they
+ are powered down if they are unused (i.e. if the frontend device
+ is closed). The ``dvb-core`` module parameter ``dvb_shutdown_timeout``
+ allow you to change the timeout (default 5 seconds). Setting the
+ timeout to 0 disables the timeout feature.
+
+2. How can I watch TV?
+
+ Together with the Linux Kernel, the Digital TV developers support
+ some simple utilities which are mainly intended for testing
+ and to demonstrate how the DVB API works. This is called DVB v5
+ tools and are grouped together with the ``v4l-utils`` git repository:
+
+ https://git.linuxtv.org/v4l-utils.git/
+
+ You can find more information at the LinuxTV wiki:
+
+ https://www.linuxtv.org/wiki/index.php/DVBv5_Tools
+
+ The first step is to get a list of services that are transmitted.
+
+ This is done by using several existing tools. You can use
+ for example the ``dvbv5-scan`` tool. You can find more information
+ about it at:
+
+ https://www.linuxtv.org/wiki/index.php/Dvbv5-scan
+
+ There are some other applications like ``w_scan`` [#]_ that do a
+ blind scan, trying hard to find all possible channels, but
+ those consumes a large amount of time to run.
+
+ .. [#] https://www.linuxtv.org/wiki/index.php/W_scan
+
+ Also, some applications like ``kaffeine`` have their own code
+ to scan for services. So, you don't need to use an external
+ application to obtain such list.
+
+ Most of such tools need a file containing a list of channel
+ transponders available on your area. So, LinuxTV developers
+ maintain tables of Digital TV channel transponders, receiving
+ patches from the community to keep them updated.
+
+ This list is hosted at:
+
+ https://git.linuxtv.org/dtv-scan-tables.git
+
+ And packaged on several distributions.
+
+ Kaffeine has some blind scan support for some terrestrial standards.
+ It also relies on DTV scan tables, although it contains a copy
+ of it internally (and, if requested by the user, it will download
+ newer versions of it).
+
+ If you are lucky you can just use one of the supplied channel
+ transponders. If not, you may need to seek for such info at
+ the Internet and create a new file. There are several sites with
+ contains physical channel lists. For cable and satellite, usually
+ knowing how to tune into a single channel is enough for the
+ scanning tool to identify the other channels. On some places,
+ this could also work for terrestrial transmissions.
+
+ Once you have a transponders list, you need to generate a services
+ list with a tool like ``dvbv5-scan``.
+
+ Almost all modern Digital TV cards don't have built-in hardware
+ MPEG-decoders. So, it is up to the application to get a MPEG-TS
+ stream provided by the board, split it into audio, video and other
+ data and decode.
+
+3. Which Digital TV applications exist?
+
+ Several media player applications are capable of tuning into
+ digital TV channels, including Kaffeine, Vlc, mplayer and MythTV.
+
+ Kaffeine aims to be very user-friendly, and it is maintained
+ by one of the Kernel driver developers.
+
+ A comprehensive list of those and other apps can be found at:
+
+ https://www.linuxtv.org/wiki/index.php/TV_Related_Software
+
+ Some of the most popular ones are linked below:
+
+ https://kde.org/applications/multimedia/org.kde.kaffeine
+ KDE media player, focused on Digital TV support
+
+ https://www.linuxtv.org/vdrwiki/index.php/Main_Page
+ Klaus Schmidinger's Video Disk Recorder
+
+ https://linuxtv.org/downloads and https://git.linuxtv.org/
+ Digital TV and other media-related applications and
+ Kernel drivers. The ``v4l-utils`` package there contains
+ several swiss knife tools for using with Digital TV.
+
+ http://sourceforge.net/projects/dvbtools/
+ Dave Chapman's dvbtools package, including
+ dvbstream and dvbtune
+
+ http://www.dbox2.info/
+ LinuxDVB on the dBox2
+
+ http://www.tuxbox.org/
+ the TuxBox CVS many interesting DVB applications and the dBox2
+ DVB source
+
+ http://www.nenie.org/misc/mpsys/
+ MPSYS: a MPEG2 system library and tools
+
+ https://www.videolan.org/vlc/index.pt.html
+ Vlc
+
+ http://mplayerhq.hu/
+ MPlayer
+
+ http://xine.sourceforge.net/ and http://xinehq.de/
+ Xine
+
+ http://www.mythtv.org/
+ MythTV - analog TV and digital TV PVR
+
+ http://dvbsnoop.sourceforge.net/
+ DVB sniffer program to monitor, analyze, debug, dump
+ or view dvb/mpeg/dsm-cc/mhp stream information (TS,
+ PES, SECTION)
+
+4. Can't get a signal tuned correctly
+
+ That could be due to a lot of problems. On my personal experience,
+ usually TV cards need stronger signals than TV sets, and are more
+ sensitive to noise. So, perhaps you just need a better antenna or
+ cabling. Yet, it could also be some hardware or driver issue.
+
+ For example, if you are using a Technotrend/Hauppauge DVB-C card
+ *without* analog module, you might have to use module parameter
+ adac=-1 (dvb-ttpci.o).
+
+ Please see the FAQ page at linuxtv.org, as it could contain some
+ valuable information:
+
+ https://www.linuxtv.org/wiki/index.php/FAQ_%26_Troubleshooting
+
+ If that doesn't work, check at the linux-media ML archives, to
+ see if someone else had a similar problem with your hardware
+ and/or digital TV service provider:
+
+ https://lore.kernel.org/linux-media/
+
+ If none of this works, you can try sending an e-mail to the
+ linux-media ML and see if someone else could shed some light.
+ The e-mail is linux-media AT vger.kernel.org.
+
+5. The dvb_net device doesn't give me any packets at all
+
+ Run ``tcpdump`` on the ``dvb0_0`` interface. This sets the interface
+ into promiscuous mode so it accepts any packets from the PID
+ you have configured with the ``dvbnet`` utility. Check if there
+ are any packets with the IP addr and MAC addr you have
+ configured with ``ifconfig`` or with ``ip addr``.
+
+ If ``tcpdump`` doesn't give you any output, check the statistics
+ which ``ifconfig`` or ``netstat -ni`` outputs. (Note: If the MAC
+ address is wrong, ``dvb_net`` won't get any input; thus you have to
+ run ``tcpdump`` before checking the statistics.) If there are no
+ packets at all then maybe the PID is wrong. If there are error packets,
+ then either the PID is wrong or the stream does not conform to
+ the MPE standard (EN 301 192, http://www.etsi.org/). You can
+ use e.g. ``dvbsnoop`` for debugging.
+
+6. The ``dvb_net`` device doesn't give me any multicast packets
+
+ Check your routes if they include the multicast address range.
+ Additionally make sure that "source validation by reversed path
+ lookup" is disabled::
+
+ $ "echo 0 > /proc/sys/net/ipv4/conf/dvb0/rp_filter"
+
+7. What are all those modules that need to be loaded?
+
+ In order to make it more flexible and support different hardware
+ combinations, the media subsystem is written on a modular way.
+
+ So, besides the Digital TV hardware module for the main chipset,
+ it also needs to load a frontend driver, plus the Digital TV
+ core. If the board also has remote controller, it will also
+ need the remote controller core and the remote controller tables.
+ The same happens if the board has support for analog TV: the
+ core support for video4linux need to be loaded.
+
+ The actual module names are Linux-kernel version specific, as,
+ from time to time, things change, in order to make the media
+ support more flexible.
diff --git a/Documentation/media/v4l-drivers/fimc.rst b/Documentation/admin-guide/media/fimc.rst
index 3adc19bcf039..267ef52fe387 100644
--- a/Documentation/media/v4l-drivers/fimc.rst
+++ b/Documentation/admin-guide/media/fimc.rst
@@ -1,6 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. include:: <isonum.txt>
-The Samsung S5P/EXYNOS4 FIMC driver
+The Samsung S5P/Exynos4 FIMC driver
===================================
Copyright |copy| 2012 - 2013 Samsung Electronics Co., Ltd.
@@ -12,12 +14,12 @@ data from LCD controller (FIMD) through the SoC internal writeback data
path. There are multiple FIMC instances in the SoCs (up to 4), having
slightly different capabilities, like pixel alignment constraints, rotator
availability, LCD writeback support, etc. The driver is located at
-drivers/media/platform/exynos4-is directory.
+drivers/media/platform/samsung/exynos4-is directory.
Supported SoCs
--------------
-S5PC100 (mem-to-mem only), S5PV210, EXYNOS4210
+S5PC100 (mem-to-mem only), S5PV210, Exynos4210
Supported features
------------------
@@ -36,26 +38,6 @@ Not currently supported
- LCD writeback input
- per frame clock gating (mem-to-mem)
-Files partitioning
-------------------
-
-- media device driver
- drivers/media/platform/exynos4-is/media-dev.[ch]
-
-- camera capture video device driver
- drivers/media/platform/exynos4-is/fimc-capture.c
-
-- MIPI-CSI2 receiver subdev
- drivers/media/platform/exynos4-is/mipi-csis.[ch]
-
-- video post-processor (mem-to-mem)
- drivers/media/platform/exynos4-is/fimc-core.c
-
-- common files
- drivers/media/platform/exynos4-is/fimc-core.h
- drivers/media/platform/exynos4-is/fimc-reg.h
- drivers/media/platform/exynos4-is/regs-fimc.h
-
User space interfaces
---------------------
@@ -63,7 +45,7 @@ Media device interface
~~~~~~~~~~~~~~~~~~~~~~
The driver supports Media Controller API as defined at :ref:`media_controller`.
-The media device driver name is "SAMSUNG S5P FIMC".
+The media device driver name is "Samsung S5P FIMC".
The purpose of this interface is to allow changing assignment of FIMC instances
to the SoC peripheral camera input at runtime and optionally to control internal
@@ -72,6 +54,7 @@ connections of the MIPI-CSIS device(s) to the FIMC entities.
The media device interface allows to configure the SoC for capturing image
data from the sensor through more than one FIMC instance (e.g. for simultaneous
viewfinder and still capture setup).
+
Reconfiguration is done by enabling/disabling media links created by the driver
during initialization. The internal device topology can be easily discovered
through media entity and links enumeration.
@@ -114,6 +97,7 @@ sensor subdev -> mipi-csi subdev -> fimc subdev -> video node
When we configure these devices through sub-device API at user space, the
configuration flow must be from left to right, and the video node is
configured as last one.
+
When we don't use sub-device user space API the whole configuration of all
devices belonging to the pipeline is done at the video node driver.
The sysfs entry allows to instruct the capture node driver not to configure
diff --git a/Documentation/admin-guide/media/frontend-cardlist.rst b/Documentation/admin-guide/media/frontend-cardlist.rst
new file mode 100644
index 000000000000..ba5b7c69a978
--- /dev/null
+++ b/Documentation/admin-guide/media/frontend-cardlist.rst
@@ -0,0 +1,226 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Frontend drivers
+================
+
+.. note::
+
+ #) There is no guarantee that every frontend driver works
+ out of the box with every card, because of different wiring.
+
+ #) The demodulator chips can be used with a variety of
+ tuner/PLL chips, and not all combinations are supported. Often
+ the demodulator and tuner/PLL chip are inside a metal box for
+ shielding, and the whole metal box has its own part number.
+
+
+Common Interface (EN50221) controller drivers
+=============================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+cxd2099 Sony CXD2099AR Common Interface driver
+sp2 CIMaX SP2
+============== =========================================================
+
+ATSC (North American/Korean Terrestrial/Cable DTV) frontends
+============================================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+au8522_dig Auvitek AU8522 based DTV demod
+au8522_decoder Auvitek AU8522 based ATV demod
+bcm3510 Broadcom BCM3510
+lg2160 LG Electronics LG216x based
+lgdt3305 LG Electronics LGDT3304 and LGDT3305 based
+lgdt3306a LG Electronics LGDT3306A based
+lgdt330x LG Electronics LGDT3302/LGDT3303 based
+nxt200x NxtWave Communications NXT2002/NXT2004 based
+or51132 Oren OR51132 based
+or51211 Oren OR51211 based
+s5h1409 Samsung S5H1409 based
+s5h1411 Samsung S5H1411 based
+============== =========================================================
+
+DVB-C (cable) frontends
+=======================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+stv0297 ST STV0297 based
+tda10021 Philips TDA10021 based
+tda10023 Philips TDA10023 based
+ves1820 VLSI VES1820 based
+============== =========================================================
+
+DVB-S (satellite) frontends
+===========================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+cx24110 Conexant CX24110 based
+cx24116 Conexant CX24116 based
+cx24117 Conexant CX24117 based
+cx24120 Conexant CX24120 based
+cx24123 Conexant CX24123 based
+ds3000 Montage Technology DS3000 based
+mb86a16 Fujitsu MB86A16 based
+mt312 Zarlink VP310/MT312/ZL10313 based
+s5h1420 Samsung S5H1420 based
+si21xx Silicon Labs SI21XX based
+stb6000 ST STB6000 silicon tuner
+stv0288 ST STV0288 based
+stv0299 ST STV0299 based
+stv0900 ST STV0900 based
+stv6110 ST STV6110 silicon tuner
+tda10071 NXP TDA10071
+tda10086 Philips TDA10086 based
+tda8083 Philips TDA8083 based
+tda8261 Philips TDA8261 based
+tda826x Philips TDA826X silicon tuner
+ts2020 Montage Technology TS2020 based tuners
+tua6100 Infineon TUA6100 PLL
+cx24113 Conexant CX24113/CX24128 tuner for DVB-S/DSS
+itd1000 Integrant ITD1000 Zero IF tuner for DVB-S/DSS
+ves1x93 VLSI VES1893 or VES1993 based
+zl10036 Zarlink ZL10036 silicon tuner
+zl10039 Zarlink ZL10039 silicon tuner
+============== =========================================================
+
+DVB-T (terrestrial) frontends
+=============================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+af9013 Afatech AF9013 demodulator
+cx22700 Conexant CX22700 based
+cx22702 Conexant cx22702 demodulator (OFDM)
+cxd2820r Sony CXD2820R
+cxd2841er Sony CXD2841ER
+cxd2880 Sony CXD2880 DVB-T2/T tuner + demodulator
+dib3000mb DiBcom 3000M-B
+dib3000mc DiBcom 3000P/M-C
+dib7000m DiBcom 7000MA/MB/PA/PB/MC
+dib7000p DiBcom 7000PC
+dib9000 DiBcom 9000
+drxd Micronas DRXD driver
+ec100 E3C EC100
+l64781 LSI L64781
+mt352 Zarlink MT352 based
+nxt6000 NxtWave Communications NXT6000 based
+rtl2830 Realtek RTL2830 DVB-T
+rtl2832 Realtek RTL2832 DVB-T
+rtl2832_sdr Realtek RTL2832 SDR
+s5h1432 Samsung s5h1432 demodulator (OFDM)
+si2168 Silicon Labs Si2168
+sp8870 Spase sp8870 based
+sp887x Spase sp887x based
+stv0367 ST STV0367 based
+tda10048 Philips TDA10048HN based
+tda1004x Philips TDA10045H/TDA10046H based
+zd1301_demod ZyDAS ZD1301
+zl10353 Zarlink ZL10353 based
+============== =========================================================
+
+Digital terrestrial only tuners/PLL
+===================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+dvb-pll Generic I2C PLL based tuners
+dib0070 DiBcom DiB0070 silicon base-band tuner
+dib0090 DiBcom DiB0090 silicon base-band tuner
+============== =========================================================
+
+ISDB-S (satellite) & ISDB-T (terrestrial) frontends
+===================================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+mn88443x Socionext MN88443x
+tc90522 Toshiba TC90522
+============== =========================================================
+
+ISDB-T (terrestrial) frontends
+==============================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+dib8000 DiBcom 8000MB/MC
+mb86a20s Fujitsu mb86a20s
+s921 Sharp S921 frontend
+============== =========================================================
+
+Multistandard (cable + terrestrial) frontends
+=============================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+drxk Micronas DRXK based
+mn88472 Panasonic MN88472
+mn88473 Panasonic MN88473
+si2165 Silicon Labs si2165 based
+tda18271c2dd NXP TDA18271C2 silicon tuner
+============== =========================================================
+
+Multistandard (satellite) frontends
+===================================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+m88ds3103 Montage Technology M88DS3103
+mxl5xx MaxLinear MxL5xx based tuner-demodulators
+stb0899 STB0899 based
+stb6100 STB6100 based tuners
+stv090x STV0900/STV0903(A/B) based
+stv0910 STV0910 based
+stv6110x STV6110/(A) based tuners
+stv6111 STV6111 based tuners
+============== =========================================================
+
+SEC control devices for DVB-S
+=============================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+a8293 Allegro A8293
+af9033 Afatech AF9033 DVB-T demodulator
+ascot2e Sony Ascot2E tuner
+atbm8830 AltoBeam ATBM8830/8831 DMB-TH demodulator
+drx39xyj Micronas DRX-J demodulator
+helene Sony HELENE Sat/Ter tuner (CXD2858ER)
+horus3a Sony Horus3A tuner
+isl6405 ISL6405 SEC controller
+isl6421 ISL6421 SEC controller
+isl6423 ISL6423 SEC controller
+ix2505v Sharp IX2505V silicon tuner
+lgs8gl5 Silicon Legend LGS-8GL5 demodulator (OFDM)
+lgs8gxx Legend Silicon LGS8913/LGS8GL5/LGS8GXX DMB-TH demodulator
+lnbh25 LNBH25 SEC controller
+lnbh29 LNBH29 SEC controller
+lnbp21 LNBP21/LNBH24 SEC controllers
+lnbp22 LNBP22 SEC controllers
+m88rs2000 M88RS2000 DVB-S demodulator and tuner
+tda665x TDA665x tuner
+============== =========================================================
+
+Tools to develop new frontends
+==============================
+
+============== =========================================================
+Driver Name
+============== =========================================================
+dvb_dummy_fe Dummy frontend driver
+============== =========================================================
diff --git a/Documentation/media/v4l-drivers/gspca-cardlist.rst b/Documentation/admin-guide/media/gspca-cardlist.rst
index e18d87e80d78..e3404d1589da 100644
--- a/Documentation/media/v4l-drivers/gspca-cardlist.rst
+++ b/Documentation/admin-guide/media/gspca-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
The gspca cards list
====================
@@ -303,7 +305,7 @@ pac7302 093a:2625 Genius iSlim 310
pac7302 093a:2626 Labtec 2200
pac7302 093a:2627 Genius FaceCam 300
pac7302 093a:2628 Genius iLook 300
-pac7302 093a:2629 Genious iSlim 300
+pac7302 093a:2629 Genius iSlim 300
pac7302 093a:262a Webcam 300k
pac7302 093a:262c Philips SPC 230 NC
jl2005bcd 0979:0227 Various brands, 19 known cameras supported
diff --git a/Documentation/admin-guide/media/i2c-cardlist.rst b/Documentation/admin-guide/media/i2c-cardlist.rst
new file mode 100644
index 000000000000..fff962558cd5
--- /dev/null
+++ b/Documentation/admin-guide/media/i2c-cardlist.rst
@@ -0,0 +1,287 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+I²C drivers
+===========
+
+The I²C (Inter-Integrated Circuit) bus is a three-wires bus used internally
+at the media cards for communication between different chips. While the bus
+is not visible to the Linux Kernel, drivers need to send and receive
+commands via the bus. The Linux Kernel driver abstraction has support to
+implement different drivers for each component inside an I²C bus, as if
+the bus were visible to the main system board.
+
+One of the problems with I²C devices is that sometimes the same device may
+work with different I²C hardware. This is common, for example, on devices
+that comes with a tuner for North America market, and another one for
+Europe. Some drivers have a ``tuner=`` modprobe parameter to allow using a
+different tuner number in order to address such issue.
+
+The current supported of I²C drivers (not including staging drivers) are
+listed below.
+
+Audio decoders, processors and mixers
+-------------------------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+cs3308 Cirrus Logic CS3308 audio ADC
+cs5345 Cirrus Logic CS5345 audio ADC
+cs53l32a Cirrus Logic CS53L32A audio ADC
+msp3400 Micronas MSP34xx audio decoders
+sony-btf-mpx Sony BTF's internal MPX
+tda1997x NXP TDA1997x HDMI receiver
+tda7432 Philips TDA7432 audio processor
+tda9840 Philips TDA9840 audio processor
+tea6415c Philips TEA6415C audio processor
+tea6420 Philips TEA6420 audio processor
+tlv320aic23b Texas Instruments TLV320AIC23B audio codec
+tvaudio Simple audio decoder chips
+uda1342 Philips UDA1342 audio codec
+vp27smpx Panasonic VP27's internal MPX
+wm8739 Wolfson Microelectronics WM8739 stereo audio ADC
+wm8775 Wolfson Microelectronics WM8775 audio ADC with input mixer
+============ ==========================================================
+
+Audio/Video compression chips
+-----------------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+saa6752hs Philips SAA6752HS MPEG-2 Audio/Video Encoder
+============ ==========================================================
+
+Camera sensor devices
+---------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+ccs MIPI CCS compliant camera sensors (also SMIA++ and SMIA)
+et8ek8 ET8EK8 camera sensor
+hi556 Hynix Hi-556 sensor
+hi846 Hynix Hi-846 sensor
+imx208 Sony IMX208 sensor
+imx214 Sony IMX214 sensor
+imx219 Sony IMX219 sensor
+imx258 Sony IMX258 sensor
+imx274 Sony IMX274 sensor
+imx290 Sony IMX290 sensor
+imx319 Sony IMX319 sensor
+imx334 Sony IMX334 sensor
+imx355 Sony IMX355 sensor
+imx412 Sony IMX412 sensor
+mt9m001 mt9m001
+mt9m111 mt9m111, mt9m112 and mt9m131
+mt9p031 Aptina MT9P031
+mt9t112 Aptina MT9T111/MT9T112
+mt9v011 Micron mt9v011 sensor
+mt9v032 Micron MT9V032 sensor
+mt9v111 Aptina MT9V111 sensor
+ov13858 OmniVision OV13858 sensor
+ov13b10 OmniVision OV13B10 sensor
+ov2640 OmniVision OV2640 sensor
+ov2659 OmniVision OV2659 sensor
+ov2680 OmniVision OV2680 sensor
+ov2685 OmniVision OV2685 sensor
+ov5640 OmniVision OV5640 sensor
+ov5645 OmniVision OV5645 sensor
+ov5647 OmniVision OV5647 sensor
+ov5670 OmniVision OV5670 sensor
+ov5675 OmniVision OV5675 sensor
+ov5695 OmniVision OV5695 sensor
+ov7251 OmniVision OV7251 sensor
+ov7640 OmniVision OV7640 sensor
+ov7670 OmniVision OV7670 sensor
+ov772x OmniVision OV772x sensor
+ov7740 OmniVision OV7740 sensor
+ov8856 OmniVision OV8856 sensor
+ov9640 OmniVision OV9640 sensor
+ov9650 OmniVision OV9650/OV9652 sensor
+rj54n1cb0c Sharp RJ54N1CB0C sensor
+s5c73m3 Samsung S5C73M3 sensor
+s5k4ecgx Samsung S5K4ECGX sensor
+s5k5baf Samsung S5K5BAF sensor
+s5k6a3 Samsung S5K6A3 sensor
+============ ==========================================================
+
+Flash devices
+-------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+adp1653 ADP1653 flash
+lm3560 LM3560 dual flash driver
+lm3646 LM3646 dual flash driver
+============ ==========================================================
+
+IR I2C driver
+-------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+ir-kbd-i2c I2C module for IR
+============ ==========================================================
+
+Lens drivers
+------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+ad5820 AD5820 lens voice coil
+ak7375 AK7375 lens voice coil
+dw9714 DW9714 lens voice coil
+dw9768 DW9768 lens voice coil
+dw9807-vcm DW9807 lens voice coil
+============ ==========================================================
+
+Miscellaneous helper chips
+--------------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+video-i2c I2C transport video
+m52790 Mitsubishi M52790 A/V switch
+st-mipid02 STMicroelectronics MIPID02 CSI-2 to PARALLEL bridge
+ths7303 THS7303/53 Video Amplifier
+============ ==========================================================
+
+RDS decoders
+------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+saa6588 SAA6588 Radio Chip RDS decoder
+============ ==========================================================
+
+SDR tuner chips
+---------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+max2175 Maxim 2175 RF to Bits tuner
+============ ==========================================================
+
+Video and audio decoders
+------------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+cx25840 Conexant CX2584x audio/video decoders
+saa717x Philips SAA7171/3/4 audio/video decoders
+============ ==========================================================
+
+Video decoders
+--------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+adv7180 Analog Devices ADV7180 decoder
+adv7183 Analog Devices ADV7183 decoder
+adv748x Analog Devices ADV748x decoder
+adv7604 Analog Devices ADV7604 decoder
+adv7842 Analog Devices ADV7842 decoder
+bt819 BT819A VideoStream decoder
+bt856 BT856 VideoStream decoder
+bt866 BT866 VideoStream decoder
+ks0127 KS0127 video decoder
+ml86v7667 OKI ML86V7667 video decoder
+saa7110 Philips SAA7110 video decoder
+saa7115 Philips SAA7111/3/4/5 video decoders
+tc358743 Toshiba TC358743 decoder
+tvp514x Texas Instruments TVP514x video decoder
+tvp5150 Texas Instruments TVP5150 video decoder
+tvp7002 Texas Instruments TVP7002 video decoder
+tw2804 Techwell TW2804 multiple video decoder
+tw9903 Techwell TW9903 video decoder
+tw9906 Techwell TW9906 video decoder
+tw9910 Techwell TW9910 video decoder
+vpx3220 vpx3220a, vpx3216b & vpx3214c video decoders
+============ ==========================================================
+
+Video encoders
+--------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+adv7170 Analog Devices ADV7170 video encoder
+adv7175 Analog Devices ADV7175 video encoder
+adv7343 ADV7343 video encoder
+adv7393 ADV7393 video encoder
+adv7511-v4l2 Analog Devices ADV7511 encoder
+ak881x AK8813/AK8814 video encoders
+saa7127 Philips SAA7127/9 digital video encoders
+saa7185 Philips SAA7185 video encoder
+ths8200 Texas Instruments THS8200 video encoder
+============ ==========================================================
+
+Video improvement chips
+-----------------------
+
+============ ==========================================================
+Driver Name
+============ ==========================================================
+upd64031a NEC Electronics uPD64031A Ghost Reduction
+upd64083 NEC Electronics uPD64083 3-Dimensional Y/C separation
+============ ==========================================================
+
+Tuner drivers
+-------------
+
+============ ==================================================
+Driver Name
+============ ==================================================
+e4000 Elonics E4000 silicon tuner
+fc0011 Fitipower FC0011 silicon tuner
+fc0012 Fitipower FC0012 silicon tuner
+fc0013 Fitipower FC0013 silicon tuner
+fc2580 FCI FC2580 silicon tuner
+it913x ITE Tech IT913x silicon tuner
+m88rs6000t Montage M88RS6000 internal tuner
+max2165 Maxim MAX2165 silicon tuner
+mc44s803 Freescale MC44S803 Low Power CMOS Broadband tuners
+msi001 Mirics MSi001
+mt2060 Microtune MT2060 silicon IF tuner
+mt2063 Microtune MT2063 silicon IF tuner
+mt20xx Microtune 2032 / 2050 tuners
+mt2131 Microtune MT2131 silicon tuner
+mt2266 Microtune MT2266 silicon tuner
+mxl301rf MaxLinear MxL301RF tuner
+mxl5005s MaxLinear MSL5005S silicon tuner
+mxl5007t MaxLinear MxL5007T silicon tuner
+qm1d1b0004 Sharp QM1D1B0004 tuner
+qm1d1c0042 Sharp QM1D1C0042 tuner
+qt1010 Quantek QT1010 silicon tuner
+r820t Rafael Micro R820T silicon tuner
+si2157 Silicon Labs Si2157 silicon tuner
+tuner-types Simple tuner support
+tda18212 NXP TDA18212 silicon tuner
+tda18218 NXP TDA18218 silicon tuner
+tda18250 NXP TDA18250 silicon tuner
+tda18271 NXP TDA18271 silicon tuner
+tda827x Philips TDA827X silicon tuner
+tda8290 TDA 8290/8295 + 8275(a)/18271 tuner combo
+tda9887 TDA 9885/6/7 analog IF demodulator
+tea5761 TEA 5761 radio tuner
+tea5767 TEA 5767 radio tuner
+tua9001 Infineon TUA9001 silicon tuner
+xc2028 XCeive xc2028/xc3028 tuners
+xc4000 Xceive XC4000 silicon tuner
+xc5000 Xceive XC5000 silicon tuner
+============ ==================================================
+
+.. toctree::
+ :maxdepth: 1
+
+ tuner-cardlist
+ frontend-cardlist
diff --git a/Documentation/admin-guide/media/imx.rst b/Documentation/admin-guide/media/imx.rst
new file mode 100644
index 000000000000..bb68100d8acb
--- /dev/null
+++ b/Documentation/admin-guide/media/imx.rst
@@ -0,0 +1,714 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+i.MX Video Capture Driver
+=========================
+
+Introduction
+------------
+
+The Freescale i.MX5/6 contains an Image Processing Unit (IPU), which
+handles the flow of image frames to and from capture devices and
+display devices.
+
+For image capture, the IPU contains the following internal subunits:
+
+- Image DMA Controller (IDMAC)
+- Camera Serial Interface (CSI)
+- Image Converter (IC)
+- Sensor Multi-FIFO Controller (SMFC)
+- Image Rotator (IRT)
+- Video De-Interlacing or Combining Block (VDIC)
+
+The IDMAC is the DMA controller for transfer of image frames to and from
+memory. Various dedicated DMA channels exist for both video capture and
+display paths. During transfer, the IDMAC is also capable of vertical
+image flip, 8x8 block transfer (see IRT description), pixel component
+re-ordering (for example UYVY to YUYV) within the same colorspace, and
+packed <--> planar conversion. The IDMAC can also perform a simple
+de-interlacing by interweaving even and odd lines during transfer
+(without motion compensation which requires the VDIC).
+
+The CSI is the backend capture unit that interfaces directly with
+camera sensors over Parallel, BT.656/1120, and MIPI CSI-2 buses.
+
+The IC handles color-space conversion, resizing (downscaling and
+upscaling), horizontal flip, and 90/270 degree rotation operations.
+
+There are three independent "tasks" within the IC that can carry out
+conversions concurrently: pre-process encoding, pre-process viewfinder,
+and post-processing. Within each task, conversions are split into three
+sections: downsizing section, main section (upsizing, flip, colorspace
+conversion, and graphics plane combining), and rotation section.
+
+The IPU time-shares the IC task operations. The time-slice granularity
+is one burst of eight pixels in the downsizing section, one image line
+in the main processing section, one image frame in the rotation section.
+
+The SMFC is composed of four independent FIFOs that each can transfer
+captured frames from sensors directly to memory concurrently via four
+IDMAC channels.
+
+The IRT carries out 90 and 270 degree image rotation operations. The
+rotation operation is carried out on 8x8 pixel blocks at a time. This
+operation is supported by the IDMAC which handles the 8x8 block transfer
+along with block reordering, in coordination with vertical flip.
+
+The VDIC handles the conversion of interlaced video to progressive, with
+support for different motion compensation modes (low, medium, and high
+motion). The deinterlaced output frames from the VDIC can be sent to the
+IC pre-process viewfinder task for further conversions. The VDIC also
+contains a Combiner that combines two image planes, with alpha blending
+and color keying.
+
+In addition to the IPU internal subunits, there are also two units
+outside the IPU that are also involved in video capture on i.MX:
+
+- MIPI CSI-2 Receiver for camera sensors with the MIPI CSI-2 bus
+ interface. This is a Synopsys DesignWare core.
+- Two video multiplexers for selecting among multiple sensor inputs
+ to send to a CSI.
+
+For more info, refer to the latest versions of the i.MX5/6 reference
+manuals [#f1]_ and [#f2]_.
+
+
+Features
+--------
+
+Some of the features of this driver include:
+
+- Many different pipelines can be configured via media controller API,
+ that correspond to the hardware video capture pipelines supported in
+ the i.MX.
+
+- Supports parallel, BT.565, and MIPI CSI-2 interfaces.
+
+- Concurrent independent streams, by configuring pipelines to multiple
+ video capture interfaces using independent entities.
+
+- Scaling, color-space conversion, horizontal and vertical flip, and
+ image rotation via IC task subdevs.
+
+- Many pixel formats supported (RGB, packed and planar YUV, partial
+ planar YUV).
+
+- The VDIC subdev supports motion compensated de-interlacing, with three
+ motion compensation modes: low, medium, and high motion. Pipelines are
+ defined that allow sending frames to the VDIC subdev directly from the
+ CSI. There is also support in the future for sending frames to the
+ VDIC from memory buffers via output/mem2mem devices.
+
+- Includes a Frame Interval Monitor (FIM) that can correct vertical sync
+ problems with the ADV718x video decoders.
+
+
+Topology
+--------
+
+The following shows the media topologies for the i.MX6Q SabreSD and
+i.MX6Q SabreAuto. Refer to these diagrams in the entity descriptions
+in the next section.
+
+The i.MX5/6 topologies can differ upstream from the IPUv3 CSI video
+multiplexers, but the internal IPUv3 topology downstream from there
+is common to all i.MX5/6 platforms. For example, the SabreSD, with the
+MIPI CSI-2 OV5640 sensor, requires the i.MX6 MIPI CSI-2 receiver. But
+the SabreAuto has only the ADV7180 decoder on a parallel bt.656 bus, and
+therefore does not require the MIPI CSI-2 receiver, so it is missing in
+its graph.
+
+.. _imx6q_topology_graph:
+
+.. kernel-figure:: imx6q-sabresd.dot
+ :alt: Diagram of the i.MX6Q SabreSD media pipeline topology
+ :align: center
+
+ Media pipeline graph on i.MX6Q SabreSD
+
+.. kernel-figure:: imx6q-sabreauto.dot
+ :alt: Diagram of the i.MX6Q SabreAuto media pipeline topology
+ :align: center
+
+ Media pipeline graph on i.MX6Q SabreAuto
+
+Entities
+--------
+
+imx6-mipi-csi2
+--------------
+
+This is the MIPI CSI-2 receiver entity. It has one sink pad to receive
+the MIPI CSI-2 stream (usually from a MIPI CSI-2 camera sensor). It has
+four source pads, corresponding to the four MIPI CSI-2 demuxed virtual
+channel outputs. Multiple source pads can be enabled to independently
+stream from multiple virtual channels.
+
+This entity actually consists of two sub-blocks. One is the MIPI CSI-2
+core. This is a Synopsys Designware MIPI CSI-2 core. The other sub-block
+is a "CSI-2 to IPU gasket". The gasket acts as a demultiplexer of the
+four virtual channels streams, providing four separate parallel buses
+containing each virtual channel that are routed to CSIs or video
+multiplexers as described below.
+
+On i.MX6 solo/dual-lite, all four virtual channel buses are routed to
+two video multiplexers. Both CSI0 and CSI1 can receive any virtual
+channel, as selected by the video multiplexers.
+
+On i.MX6 Quad, virtual channel 0 is routed to IPU1-CSI0 (after selected
+by a video mux), virtual channels 1 and 2 are hard-wired to IPU1-CSI1
+and IPU2-CSI0, respectively, and virtual channel 3 is routed to
+IPU2-CSI1 (again selected by a video mux).
+
+ipuX_csiY_mux
+-------------
+
+These are the video multiplexers. They have two or more sink pads to
+select from either camera sensors with a parallel interface, or from
+MIPI CSI-2 virtual channels from imx6-mipi-csi2 entity. They have a
+single source pad that routes to a CSI (ipuX_csiY entities).
+
+On i.MX6 solo/dual-lite, there are two video mux entities. One sits
+in front of IPU1-CSI0 to select between a parallel sensor and any of
+the four MIPI CSI-2 virtual channels (a total of five sink pads). The
+other mux sits in front of IPU1-CSI1, and again has five sink pads to
+select between a parallel sensor and any of the four MIPI CSI-2 virtual
+channels.
+
+On i.MX6 Quad, there are two video mux entities. One sits in front of
+IPU1-CSI0 to select between a parallel sensor and MIPI CSI-2 virtual
+channel 0 (two sink pads). The other mux sits in front of IPU2-CSI1 to
+select between a parallel sensor and MIPI CSI-2 virtual channel 3 (two
+sink pads).
+
+ipuX_csiY
+---------
+
+These are the CSI entities. They have a single sink pad receiving from
+either a video mux or from a MIPI CSI-2 virtual channel as described
+above.
+
+This entity has two source pads. The first source pad can link directly
+to the ipuX_vdic entity or the ipuX_ic_prp entity, using hardware links
+that require no IDMAC memory buffer transfer.
+
+When the direct source pad is routed to the ipuX_ic_prp entity, frames
+from the CSI can be processed by one or both of the IC pre-processing
+tasks.
+
+When the direct source pad is routed to the ipuX_vdic entity, the VDIC
+will carry out motion-compensated de-interlace using "high motion" mode
+(see description of ipuX_vdic entity).
+
+The second source pad sends video frames directly to memory buffers
+via the SMFC and an IDMAC channel, bypassing IC pre-processing. This
+source pad is routed to a capture device node, with a node name of the
+format "ipuX_csiY capture".
+
+Note that since the IDMAC source pad makes use of an IDMAC channel,
+pixel reordering within the same colorspace can be carried out by the
+IDMAC channel. For example, if the CSI sink pad is receiving in UYVY
+order, the capture device linked to the IDMAC source pad can capture
+in YUYV order. Also, if the CSI sink pad is receiving a packed YUV
+format, the capture device can capture a planar YUV format such as
+YUV420.
+
+The IDMAC channel at the IDMAC source pad also supports simple
+interweave without motion compensation, which is activated if the source
+pad's field type is sequential top-bottom or bottom-top, and the
+requested capture interface field type is set to interlaced (t-b, b-t,
+or unqualified interlaced). The capture interface will enforce the same
+field order as the source pad field order (interlaced-bt if source pad
+is seq-bt, interlaced-tb if source pad is seq-tb).
+
+For events produced by ipuX_csiY, see ref:`imx_api_ipuX_csiY`.
+
+Cropping in ipuX_csiY
+---------------------
+
+The CSI supports cropping the incoming raw sensor frames. This is
+implemented in the ipuX_csiY entities at the sink pad, using the
+crop selection subdev API.
+
+The CSI also supports fixed divide-by-two downscaling independently in
+width and height. This is implemented in the ipuX_csiY entities at
+the sink pad, using the compose selection subdev API.
+
+The output rectangle at the ipuX_csiY source pad is the same as
+the compose rectangle at the sink pad. So the source pad rectangle
+cannot be negotiated, it must be set using the compose selection
+API at sink pad (if /2 downscale is desired, otherwise source pad
+rectangle is equal to incoming rectangle).
+
+To give an example of crop and /2 downscale, this will crop a
+1280x960 input frame to 640x480, and then /2 downscale in both
+dimensions to 320x240 (assumes ipu1_csi0 is linked to ipu1_csi0_mux):
+
+.. code-block:: none
+
+ media-ctl -V "'ipu1_csi0_mux':2[fmt:UYVY2X8/1280x960]"
+ media-ctl -V "'ipu1_csi0':0[crop:(0,0)/640x480]"
+ media-ctl -V "'ipu1_csi0':0[compose:(0,0)/320x240]"
+
+Frame Skipping in ipuX_csiY
+---------------------------
+
+The CSI supports frame rate decimation, via frame skipping. Frame
+rate decimation is specified by setting the frame intervals at
+sink and source pads. The ipuX_csiY entity then applies the best
+frame skip setting to the CSI to achieve the desired frame rate
+at the source pad.
+
+The following example reduces an assumed incoming 60 Hz frame
+rate by half at the IDMAC output source pad:
+
+.. code-block:: none
+
+ media-ctl -V "'ipu1_csi0':0[fmt:UYVY2X8/640x480@1/60]"
+ media-ctl -V "'ipu1_csi0':2[fmt:UYVY2X8/640x480@1/30]"
+
+Frame Interval Monitor in ipuX_csiY
+-----------------------------------
+
+See ref:`imx_api_FIM`.
+
+ipuX_vdic
+---------
+
+The VDIC carries out motion compensated de-interlacing, with three
+motion compensation modes: low, medium, and high motion. The mode is
+specified with the menu control V4L2_CID_DEINTERLACING_MODE. The VDIC
+has two sink pads and a single source pad.
+
+The direct sink pad receives from an ipuX_csiY direct pad. With this
+link the VDIC can only operate in high motion mode.
+
+When the IDMAC sink pad is activated, it receives from an output
+or mem2mem device node. With this pipeline, the VDIC can also operate
+in low and medium modes, because these modes require receiving
+frames from memory buffers. Note that an output or mem2mem device
+is not implemented yet, so this sink pad currently has no links.
+
+The source pad routes to the IC pre-processing entity ipuX_ic_prp.
+
+ipuX_ic_prp
+-----------
+
+This is the IC pre-processing entity. It acts as a router, routing
+data from its sink pad to one or both of its source pads.
+
+This entity has a single sink pad. The sink pad can receive from the
+ipuX_csiY direct pad, or from ipuX_vdic.
+
+This entity has two source pads. One source pad routes to the
+pre-process encode task entity (ipuX_ic_prpenc), the other to the
+pre-process viewfinder task entity (ipuX_ic_prpvf). Both source pads
+can be activated at the same time if the sink pad is receiving from
+ipuX_csiY. Only the source pad to the pre-process viewfinder task entity
+can be activated if the sink pad is receiving from ipuX_vdic (frames
+from the VDIC can only be processed by the pre-process viewfinder task).
+
+ipuX_ic_prpenc
+--------------
+
+This is the IC pre-processing encode entity. It has a single sink
+pad from ipuX_ic_prp, and a single source pad. The source pad is
+routed to a capture device node, with a node name of the format
+"ipuX_ic_prpenc capture".
+
+This entity performs the IC pre-process encode task operations:
+color-space conversion, resizing (downscaling and upscaling),
+horizontal and vertical flip, and 90/270 degree rotation. Flip
+and rotation are provided via standard V4L2 controls.
+
+Like the ipuX_csiY IDMAC source, this entity also supports simple
+de-interlace without motion compensation, and pixel reordering.
+
+ipuX_ic_prpvf
+-------------
+
+This is the IC pre-processing viewfinder entity. It has a single sink
+pad from ipuX_ic_prp, and a single source pad. The source pad is routed
+to a capture device node, with a node name of the format
+"ipuX_ic_prpvf capture".
+
+This entity is identical in operation to ipuX_ic_prpenc, with the same
+resizing and CSC operations and flip/rotation controls. It will receive
+and process de-interlaced frames from the ipuX_vdic if ipuX_ic_prp is
+receiving from ipuX_vdic.
+
+Like the ipuX_csiY IDMAC source, this entity supports simple
+interweaving without motion compensation. However, note that if the
+ipuX_vdic is included in the pipeline (ipuX_ic_prp is receiving from
+ipuX_vdic), it's not possible to use interweave in ipuX_ic_prpvf,
+since the ipuX_vdic has already carried out de-interlacing (with
+motion compensation) and therefore the field type output from
+ipuX_vdic can only be none (progressive).
+
+Capture Pipelines
+-----------------
+
+The following describe the various use-cases supported by the pipelines.
+
+The links shown do not include the backend sensor, video mux, or mipi
+csi-2 receiver links. This depends on the type of sensor interface
+(parallel or mipi csi-2). So these pipelines begin with:
+
+sensor -> ipuX_csiY_mux -> ...
+
+for parallel sensors, or:
+
+sensor -> imx6-mipi-csi2 -> (ipuX_csiY_mux) -> ...
+
+for mipi csi-2 sensors. The imx6-mipi-csi2 receiver may need to route
+to the video mux (ipuX_csiY_mux) before sending to the CSI, depending
+on the mipi csi-2 virtual channel, hence ipuX_csiY_mux is shown in
+parenthesis.
+
+Unprocessed Video Capture:
+--------------------------
+
+Send frames directly from sensor to camera device interface node, with
+no conversions, via ipuX_csiY IDMAC source pad:
+
+-> ipuX_csiY:2 -> ipuX_csiY capture
+
+IC Direct Conversions:
+----------------------
+
+This pipeline uses the preprocess encode entity to route frames directly
+from the CSI to the IC, to carry out scaling up to 1024x1024 resolution,
+CSC, flipping, and image rotation:
+
+-> ipuX_csiY:1 -> 0:ipuX_ic_prp:1 -> 0:ipuX_ic_prpenc:1 -> ipuX_ic_prpenc capture
+
+Motion Compensated De-interlace:
+--------------------------------
+
+This pipeline routes frames from the CSI direct pad to the VDIC entity to
+support motion-compensated de-interlacing (high motion mode only),
+scaling up to 1024x1024, CSC, flip, and rotation:
+
+-> ipuX_csiY:1 -> 0:ipuX_vdic:2 -> 0:ipuX_ic_prp:2 -> 0:ipuX_ic_prpvf:1 -> ipuX_ic_prpvf capture
+
+
+Usage Notes
+-----------
+
+To aid in configuration and for backward compatibility with V4L2
+applications that access controls only from video device nodes, the
+capture device interfaces inherit controls from the active entities
+in the current pipeline, so controls can be accessed either directly
+from the subdev or from the active capture device interface. For
+example, the FIM controls are available either from the ipuX_csiY
+subdevs or from the active capture device.
+
+The following are specific usage notes for the Sabre* reference
+boards:
+
+
+i.MX6Q SabreLite with OV5642 and OV5640
+---------------------------------------
+
+This platform requires the OmniVision OV5642 module with a parallel
+camera interface, and the OV5640 module with a MIPI CSI-2
+interface. Both modules are available from Boundary Devices:
+
+- https://boundarydevices.com/product/nit6x_5mp
+- https://boundarydevices.com/product/nit6x_5mp_mipi
+
+Note that if only one camera module is available, the other sensor
+node can be disabled in the device tree.
+
+The OV5642 module is connected to the parallel bus input on the i.MX
+internal video mux to IPU1 CSI0. It's i2c bus connects to i2c bus 2.
+
+The MIPI CSI-2 OV5640 module is connected to the i.MX internal MIPI CSI-2
+receiver, and the four virtual channel outputs from the receiver are
+routed as follows: vc0 to the IPU1 CSI0 mux, vc1 directly to IPU1 CSI1,
+vc2 directly to IPU2 CSI0, and vc3 to the IPU2 CSI1 mux. The OV5640 is
+also connected to i2c bus 2 on the SabreLite, therefore the OV5642 and
+OV5640 must not share the same i2c slave address.
+
+The following basic example configures unprocessed video capture
+pipelines for both sensors. The OV5642 is routed to ipu1_csi0, and
+the OV5640, transmitting on MIPI CSI-2 virtual channel 1 (which is
+imx6-mipi-csi2 pad 2), is routed to ipu1_csi1. Both sensors are
+configured to output 640x480, and the OV5642 outputs YUYV2X8, the
+OV5640 UYVY2X8:
+
+.. code-block:: none
+
+ # Setup links for OV5642
+ media-ctl -l "'ov5642 1-0042':0 -> 'ipu1_csi0_mux':1[1]"
+ media-ctl -l "'ipu1_csi0_mux':2 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':2 -> 'ipu1_csi0 capture':0[1]"
+ # Setup links for OV5640
+ media-ctl -l "'ov5640 1-0040':0 -> 'imx6-mipi-csi2':0[1]"
+ media-ctl -l "'imx6-mipi-csi2':2 -> 'ipu1_csi1':0[1]"
+ media-ctl -l "'ipu1_csi1':2 -> 'ipu1_csi1 capture':0[1]"
+ # Configure pads for OV5642 pipeline
+ media-ctl -V "'ov5642 1-0042':0 [fmt:YUYV2X8/640x480 field:none]"
+ media-ctl -V "'ipu1_csi0_mux':2 [fmt:YUYV2X8/640x480 field:none]"
+ media-ctl -V "'ipu1_csi0':2 [fmt:AYUV32/640x480 field:none]"
+ # Configure pads for OV5640 pipeline
+ media-ctl -V "'ov5640 1-0040':0 [fmt:UYVY2X8/640x480 field:none]"
+ media-ctl -V "'imx6-mipi-csi2':2 [fmt:UYVY2X8/640x480 field:none]"
+ media-ctl -V "'ipu1_csi1':2 [fmt:AYUV32/640x480 field:none]"
+
+Streaming can then begin independently on the capture device nodes
+"ipu1_csi0 capture" and "ipu1_csi1 capture". The v4l2-ctl tool can
+be used to select any supported YUV pixelformat on the capture device
+nodes, including planar.
+
+i.MX6Q SabreAuto with ADV7180 decoder
+-------------------------------------
+
+On the i.MX6Q SabreAuto, an on-board ADV7180 SD decoder is connected to the
+parallel bus input on the internal video mux to IPU1 CSI0.
+
+The following example configures a pipeline to capture from the ADV7180
+video decoder, assuming NTSC 720x480 input signals, using simple
+interweave (unconverted and without motion compensation). The adv7180
+must output sequential or alternating fields (field type 'seq-bt' for
+NTSC, or 'alternate'):
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'adv7180 3-0021':0 -> 'ipu1_csi0_mux':1[1]"
+ media-ctl -l "'ipu1_csi0_mux':2 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':2 -> 'ipu1_csi0 capture':0[1]"
+ # Configure pads
+ media-ctl -V "'adv7180 3-0021':0 [fmt:UYVY2X8/720x480 field:seq-bt]"
+ media-ctl -V "'ipu1_csi0_mux':2 [fmt:UYVY2X8/720x480]"
+ media-ctl -V "'ipu1_csi0':2 [fmt:AYUV32/720x480]"
+ # Configure "ipu1_csi0 capture" interface (assumed at /dev/video4)
+ v4l2-ctl -d4 --set-fmt-video=field=interlaced_bt
+
+Streaming can then begin on /dev/video4. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video4.
+
+This example configures a pipeline to capture from the ADV7180
+video decoder, assuming PAL 720x576 input signals, with Motion
+Compensated de-interlacing. The adv7180 must output sequential or
+alternating fields (field type 'seq-tb' for PAL, or 'alternate').
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'adv7180 3-0021':0 -> 'ipu1_csi0_mux':1[1]"
+ media-ctl -l "'ipu1_csi0_mux':2 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':1 -> 'ipu1_vdic':0[1]"
+ media-ctl -l "'ipu1_vdic':2 -> 'ipu1_ic_prp':0[1]"
+ media-ctl -l "'ipu1_ic_prp':2 -> 'ipu1_ic_prpvf':0[1]"
+ media-ctl -l "'ipu1_ic_prpvf':1 -> 'ipu1_ic_prpvf capture':0[1]"
+ # Configure pads
+ media-ctl -V "'adv7180 3-0021':0 [fmt:UYVY2X8/720x576 field:seq-tb]"
+ media-ctl -V "'ipu1_csi0_mux':2 [fmt:UYVY2X8/720x576]"
+ media-ctl -V "'ipu1_csi0':1 [fmt:AYUV32/720x576]"
+ media-ctl -V "'ipu1_vdic':2 [fmt:AYUV32/720x576 field:none]"
+ media-ctl -V "'ipu1_ic_prp':2 [fmt:AYUV32/720x576 field:none]"
+ media-ctl -V "'ipu1_ic_prpvf':1 [fmt:AYUV32/720x576 field:none]"
+ # Configure "ipu1_ic_prpvf capture" interface (assumed at /dev/video2)
+ v4l2-ctl -d2 --set-fmt-video=field=none
+
+Streaming can then begin on /dev/video2. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video2.
+
+This platform accepts Composite Video analog inputs to the ADV7180 on
+Ain1 (connector J42).
+
+i.MX6DL SabreAuto with ADV7180 decoder
+--------------------------------------
+
+On the i.MX6DL SabreAuto, an on-board ADV7180 SD decoder is connected to the
+parallel bus input on the internal video mux to IPU1 CSI0.
+
+The following example configures a pipeline to capture from the ADV7180
+video decoder, assuming NTSC 720x480 input signals, using simple
+interweave (unconverted and without motion compensation). The adv7180
+must output sequential or alternating fields (field type 'seq-bt' for
+NTSC, or 'alternate'):
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'adv7180 4-0021':0 -> 'ipu1_csi0_mux':4[1]"
+ media-ctl -l "'ipu1_csi0_mux':5 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':2 -> 'ipu1_csi0 capture':0[1]"
+ # Configure pads
+ media-ctl -V "'adv7180 4-0021':0 [fmt:UYVY2X8/720x480 field:seq-bt]"
+ media-ctl -V "'ipu1_csi0_mux':5 [fmt:UYVY2X8/720x480]"
+ media-ctl -V "'ipu1_csi0':2 [fmt:AYUV32/720x480]"
+ # Configure "ipu1_csi0 capture" interface (assumed at /dev/video0)
+ v4l2-ctl -d0 --set-fmt-video=field=interlaced_bt
+
+Streaming can then begin on /dev/video0. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video0.
+
+This example configures a pipeline to capture from the ADV7180
+video decoder, assuming PAL 720x576 input signals, with Motion
+Compensated de-interlacing. The adv7180 must output sequential or
+alternating fields (field type 'seq-tb' for PAL, or 'alternate').
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'adv7180 4-0021':0 -> 'ipu1_csi0_mux':4[1]"
+ media-ctl -l "'ipu1_csi0_mux':5 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':1 -> 'ipu1_vdic':0[1]"
+ media-ctl -l "'ipu1_vdic':2 -> 'ipu1_ic_prp':0[1]"
+ media-ctl -l "'ipu1_ic_prp':2 -> 'ipu1_ic_prpvf':0[1]"
+ media-ctl -l "'ipu1_ic_prpvf':1 -> 'ipu1_ic_prpvf capture':0[1]"
+ # Configure pads
+ media-ctl -V "'adv7180 4-0021':0 [fmt:UYVY2X8/720x576 field:seq-tb]"
+ media-ctl -V "'ipu1_csi0_mux':5 [fmt:UYVY2X8/720x576]"
+ media-ctl -V "'ipu1_csi0':1 [fmt:AYUV32/720x576]"
+ media-ctl -V "'ipu1_vdic':2 [fmt:AYUV32/720x576 field:none]"
+ media-ctl -V "'ipu1_ic_prp':2 [fmt:AYUV32/720x576 field:none]"
+ media-ctl -V "'ipu1_ic_prpvf':1 [fmt:AYUV32/720x576 field:none]"
+ # Configure "ipu1_ic_prpvf capture" interface (assumed at /dev/video2)
+ v4l2-ctl -d2 --set-fmt-video=field=none
+
+Streaming can then begin on /dev/video2. The v4l2-ctl tool can also be
+used to select any supported YUV pixelformat on /dev/video2.
+
+This platform accepts Composite Video analog inputs to the ADV7180 on
+Ain1 (connector J42).
+
+i.MX6Q SabreSD with MIPI CSI-2 OV5640
+-------------------------------------
+
+Similarly to i.MX6Q SabreLite, the i.MX6Q SabreSD supports a parallel
+interface OV5642 module on IPU1 CSI0, and a MIPI CSI-2 OV5640
+module. The OV5642 connects to i2c bus 1 and the OV5640 to i2c bus 2.
+
+The device tree for SabreSD includes OF graphs for both the parallel
+OV5642 and the MIPI CSI-2 OV5640, but as of this writing only the MIPI
+CSI-2 OV5640 has been tested, so the OV5642 node is currently disabled.
+The OV5640 module connects to MIPI connector J5. The NXP part number
+for the OV5640 module that connects to the SabreSD board is H120729.
+
+The following example configures unprocessed video capture pipeline to
+capture from the OV5640, transmitting on MIPI CSI-2 virtual channel 0:
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'ov5640 1-003c':0 -> 'imx6-mipi-csi2':0[1]"
+ media-ctl -l "'imx6-mipi-csi2':1 -> 'ipu1_csi0_mux':0[1]"
+ media-ctl -l "'ipu1_csi0_mux':2 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':2 -> 'ipu1_csi0 capture':0[1]"
+ # Configure pads
+ media-ctl -V "'ov5640 1-003c':0 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'imx6-mipi-csi2':1 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'ipu1_csi0_mux':0 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'ipu1_csi0':0 [fmt:AYUV32/640x480]"
+
+Streaming can then begin on "ipu1_csi0 capture" node. The v4l2-ctl
+tool can be used to select any supported pixelformat on the capture
+device node.
+
+To determine what is the /dev/video node correspondent to
+"ipu1_csi0 capture":
+
+.. code-block:: none
+
+ media-ctl -e "ipu1_csi0 capture"
+ /dev/video0
+
+/dev/video0 is the streaming element in this case.
+
+Starting the streaming via v4l2-ctl:
+
+.. code-block:: none
+
+ v4l2-ctl --stream-mmap -d /dev/video0
+
+Starting the streaming via Gstreamer and sending the content to the display:
+
+.. code-block:: none
+
+ gst-launch-1.0 v4l2src device=/dev/video0 ! kmssink
+
+The following example configures a direct conversion pipeline to capture
+from the OV5640, transmitting on MIPI CSI-2 virtual channel 0. It also
+shows colorspace conversion and scaling at IC output.
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'ov5640 1-003c':0 -> 'imx6-mipi-csi2':0[1]"
+ media-ctl -l "'imx6-mipi-csi2':1 -> 'ipu1_csi0_mux':0[1]"
+ media-ctl -l "'ipu1_csi0_mux':2 -> 'ipu1_csi0':0[1]"
+ media-ctl -l "'ipu1_csi0':1 -> 'ipu1_ic_prp':0[1]"
+ media-ctl -l "'ipu1_ic_prp':1 -> 'ipu1_ic_prpenc':0[1]"
+ media-ctl -l "'ipu1_ic_prpenc':1 -> 'ipu1_ic_prpenc capture':0[1]"
+ # Configure pads
+ media-ctl -V "'ov5640 1-003c':0 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'imx6-mipi-csi2':1 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'ipu1_csi0_mux':2 [fmt:UYVY2X8/640x480]"
+ media-ctl -V "'ipu1_csi0':1 [fmt:AYUV32/640x480]"
+ media-ctl -V "'ipu1_ic_prp':1 [fmt:AYUV32/640x480]"
+ media-ctl -V "'ipu1_ic_prpenc':1 [fmt:ARGB8888_1X32/800x600]"
+ # Set a format at the capture interface
+ v4l2-ctl -d /dev/video1 --set-fmt-video=pixelformat=RGB3
+
+Streaming can then begin on "ipu1_ic_prpenc capture" node.
+
+To determine what is the /dev/video node correspondent to
+"ipu1_ic_prpenc capture":
+
+.. code-block:: none
+
+ media-ctl -e "ipu1_ic_prpenc capture"
+ /dev/video1
+
+
+/dev/video1 is the streaming element in this case.
+
+Starting the streaming via v4l2-ctl:
+
+.. code-block:: none
+
+ v4l2-ctl --stream-mmap -d /dev/video1
+
+Starting the streaming via Gstreamer and sending the content to the display:
+
+.. code-block:: none
+
+ gst-launch-1.0 v4l2src device=/dev/video1 ! kmssink
+
+Known Issues
+------------
+
+1. When using 90 or 270 degree rotation control at capture resolutions
+ near the IC resizer limit of 1024x1024, and combined with planar
+ pixel formats (YUV420, YUV422p), frame capture will often fail with
+ no end-of-frame interrupts from the IDMAC channel. To work around
+ this, use lower resolution and/or packed formats (YUYV, RGB3, etc.)
+ when 90 or 270 rotations are needed.
+
+
+File list
+---------
+
+drivers/staging/media/imx/
+include/media/imx.h
+include/linux/imx-media.h
+
+References
+----------
+
+.. [#f1] http://www.nxp.com/assets/documents/data/en/reference-manuals/IMX6DQRM.pdf
+.. [#f2] http://www.nxp.com/assets/documents/data/en/reference-manuals/IMX6SDLRM.pdf
+
+
+Authors
+-------
+
+- Steve Longerbeam <steve_longerbeam@mentor.com>
+- Philipp Zabel <kernel@pengutronix.de>
+- Russell King <linux@armlinux.org.uk>
+
+Copyright (C) 2012-2017 Mentor Graphics Inc.
diff --git a/Documentation/admin-guide/media/imx6q-sabreauto.dot b/Documentation/admin-guide/media/imx6q-sabreauto.dot
new file mode 100644
index 000000000000..bd6cf0b358c0
--- /dev/null
+++ b/Documentation/admin-guide/media/imx6q-sabreauto.dot
@@ -0,0 +1,51 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | ipu1_csi0\n/dev/v4l-subdev0 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port2 -> n00000005 [style=dashed]
+ n00000001:port1 -> n0000000f:port0 [style=dashed]
+ n00000001:port1 -> n0000000b:port0 [style=dashed]
+ n00000005 [label="ipu1_csi0 capture\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000000b [label="{{<port0> 0 | <port1> 1} | ipu1_vdic\n/dev/v4l-subdev1 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000b:port2 -> n0000000f:port0 [style=dashed]
+ n0000000f [label="{{<port0> 0} | ipu1_ic_prp\n/dev/v4l-subdev2 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000f:port1 -> n00000013:port0 [style=dashed]
+ n0000000f:port2 -> n0000001c:port0 [style=dashed]
+ n00000013 [label="{{<port0> 0} | ipu1_ic_prpenc\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000013:port1 -> n00000016 [style=dashed]
+ n00000016 [label="ipu1_ic_prpenc capture\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n0000001c [label="{{<port0> 0} | ipu1_ic_prpvf\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000001c:port1 -> n0000001f [style=dashed]
+ n0000001f [label="ipu1_ic_prpvf capture\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n0000002f [label="{{<port0> 0} | ipu1_csi1\n/dev/v4l-subdev5 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000002f:port2 -> n00000033 [style=dashed]
+ n0000002f:port1 -> n0000000f:port0 [style=dashed]
+ n0000002f:port1 -> n0000000b:port0 [style=dashed]
+ n00000033 [label="ipu1_csi1 capture\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n0000003d [label="{{<port0> 0} | ipu2_csi0\n/dev/v4l-subdev6 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000003d:port2 -> n00000041 [style=dashed]
+ n0000003d:port1 -> n0000004b:port0 [style=dashed]
+ n0000003d:port1 -> n00000047:port0 [style=dashed]
+ n00000041 [label="ipu2_csi0 capture\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000047 [label="{{<port0> 0 | <port1> 1} | ipu2_vdic\n/dev/v4l-subdev7 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000047:port2 -> n0000004b:port0 [style=dashed]
+ n0000004b [label="{{<port0> 0} | ipu2_ic_prp\n/dev/v4l-subdev8 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000004b:port1 -> n0000004f:port0 [style=dashed]
+ n0000004b:port2 -> n00000058:port0 [style=dashed]
+ n0000004f [label="{{<port0> 0} | ipu2_ic_prpenc\n/dev/v4l-subdev9 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000004f:port1 -> n00000052 [style=dashed]
+ n00000052 [label="ipu2_ic_prpenc capture\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n00000058 [label="{{<port0> 0} | ipu2_ic_prpvf\n/dev/v4l-subdev10 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000058:port1 -> n0000005b [style=dashed]
+ n0000005b [label="ipu2_ic_prpvf capture\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n0000006b [label="{{<port0> 0} | ipu2_csi1\n/dev/v4l-subdev11 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000006b:port2 -> n0000006f [style=dashed]
+ n0000006b:port1 -> n0000004b:port0 [style=dashed]
+ n0000006b:port1 -> n00000047:port0 [style=dashed]
+ n0000006f [label="ipu2_csi1 capture\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n00000079 [label="{{<port0> 0 | <port1> 1} | ipu1_csi0_mux\n/dev/v4l-subdev12 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000079:port2 -> n00000001:port0 [style=dashed]
+ n0000007d [label="{{<port0> 0 | <port1> 1} | ipu2_csi1_mux\n/dev/v4l-subdev13 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000007d:port2 -> n0000006b:port0 [style=dashed]
+ n00000081 [label="{{} | adv7180 3-0021\n/dev/v4l-subdev14 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000081:port0 -> n00000079:port1 [style=dashed]
+}
diff --git a/Documentation/admin-guide/media/imx6q-sabresd.dot b/Documentation/admin-guide/media/imx6q-sabresd.dot
new file mode 100644
index 000000000000..7d56cafa1944
--- /dev/null
+++ b/Documentation/admin-guide/media/imx6q-sabresd.dot
@@ -0,0 +1,56 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | ipu1_csi0\n/dev/v4l-subdev0 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port2 -> n00000005 [style=dashed]
+ n00000001:port1 -> n0000000f:port0 [style=dashed]
+ n00000001:port1 -> n0000000b:port0 [style=dashed]
+ n00000005 [label="ipu1_csi0 capture\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000000b [label="{{<port0> 0 | <port1> 1} | ipu1_vdic\n/dev/v4l-subdev1 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000b:port2 -> n0000000f:port0 [style=dashed]
+ n0000000f [label="{{<port0> 0} | ipu1_ic_prp\n/dev/v4l-subdev2 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000f:port1 -> n00000013:port0 [style=dashed]
+ n0000000f:port2 -> n0000001c:port0 [style=dashed]
+ n00000013 [label="{{<port0> 0} | ipu1_ic_prpenc\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000013:port1 -> n00000016 [style=dashed]
+ n00000016 [label="ipu1_ic_prpenc capture\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n0000001c [label="{{<port0> 0} | ipu1_ic_prpvf\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000001c:port1 -> n0000001f [style=dashed]
+ n0000001f [label="ipu1_ic_prpvf capture\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n0000002f [label="{{<port0> 0} | ipu1_csi1\n/dev/v4l-subdev5 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000002f:port2 -> n00000033 [style=dashed]
+ n0000002f:port1 -> n0000000f:port0 [style=dashed]
+ n0000002f:port1 -> n0000000b:port0 [style=dashed]
+ n00000033 [label="ipu1_csi1 capture\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n0000003d [label="{{<port0> 0} | ipu2_csi0\n/dev/v4l-subdev6 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000003d:port2 -> n00000041 [style=dashed]
+ n0000003d:port1 -> n0000004b:port0 [style=dashed]
+ n0000003d:port1 -> n00000047:port0 [style=dashed]
+ n00000041 [label="ipu2_csi0 capture\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000047 [label="{{<port0> 0 | <port1> 1} | ipu2_vdic\n/dev/v4l-subdev7 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000047:port2 -> n0000004b:port0 [style=dashed]
+ n0000004b [label="{{<port0> 0} | ipu2_ic_prp\n/dev/v4l-subdev8 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000004b:port1 -> n0000004f:port0 [style=dashed]
+ n0000004b:port2 -> n00000058:port0 [style=dashed]
+ n0000004f [label="{{<port0> 0} | ipu2_ic_prpenc\n/dev/v4l-subdev9 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000004f:port1 -> n00000052 [style=dashed]
+ n00000052 [label="ipu2_ic_prpenc capture\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n00000058 [label="{{<port0> 0} | ipu2_ic_prpvf\n/dev/v4l-subdev10 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000058:port1 -> n0000005b [style=dashed]
+ n0000005b [label="ipu2_ic_prpvf capture\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n0000006b [label="{{<port0> 0} | ipu2_csi1\n/dev/v4l-subdev11 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000006b:port2 -> n0000006f [style=dashed]
+ n0000006b:port1 -> n0000004b:port0 [style=dashed]
+ n0000006b:port1 -> n00000047:port0 [style=dashed]
+ n0000006f [label="ipu2_csi1 capture\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n00000079 [label="{{<port0> 0} | imx6-mipi-csi2\n/dev/v4l-subdev12 | {<port1> 1 | <port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000079:port2 -> n0000002f:port0 [style=dashed]
+ n00000079:port3 -> n0000003d:port0 [style=dashed]
+ n00000079:port1 -> n0000007f:port0 [style=dashed]
+ n00000079:port4 -> n00000083:port0 [style=dashed]
+ n0000007f [label="{{<port0> 0 | <port1> 1} | ipu1_csi0_mux\n/dev/v4l-subdev13 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000007f:port2 -> n00000001:port0 [style=dashed]
+ n00000083 [label="{{<port0> 0 | <port1> 1} | ipu2_csi1_mux\n/dev/v4l-subdev14 | {<port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000083:port2 -> n0000006b:port0 [style=dashed]
+ n00000087 [label="{{} | ov5640 1-003c\n/dev/v4l-subdev15 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000087:port0 -> n00000079:port0 [style=dashed]
+}
diff --git a/Documentation/admin-guide/media/imx7.rst b/Documentation/admin-guide/media/imx7.rst
new file mode 100644
index 000000000000..2fa27718f52a
--- /dev/null
+++ b/Documentation/admin-guide/media/imx7.rst
@@ -0,0 +1,221 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+i.MX7 Video Capture Driver
+==========================
+
+Introduction
+------------
+
+The i.MX7 contrary to the i.MX5/6 family does not contain an Image Processing
+Unit (IPU); because of that the capabilities to perform operations or
+manipulation of the capture frames are less feature rich.
+
+For image capture the i.MX7 has three units:
+- CMOS Sensor Interface (CSI)
+- Video Multiplexer
+- MIPI CSI-2 Receiver
+
+.. code-block:: none
+
+ MIPI Camera Input ---> MIPI CSI-2 --- > |\
+ | \
+ | \
+ | M |
+ | U | ------> CSI ---> Capture
+ | X |
+ | /
+ Parallel Camera Input ----------------> | /
+ |/
+
+For additional information, please refer to the latest versions of the i.MX7
+reference manual [#f1]_.
+
+Entities
+--------
+
+imx-mipi-csi2
+--------------
+
+This is the MIPI CSI-2 receiver entity. It has one sink pad to receive the pixel
+data from MIPI CSI-2 camera sensor. It has one source pad, corresponding to the
+virtual channel 0. This module is compliant to previous version of Samsung
+D-phy, and supports two D-PHY Rx Data lanes.
+
+csi-mux
+-------
+
+This is the video multiplexer. It has two sink pads to select from either camera
+sensor with a parallel interface or from MIPI CSI-2 virtual channel 0. It has
+a single source pad that routes to the CSI.
+
+csi
+---
+
+The CSI enables the chip to connect directly to external CMOS image sensor. CSI
+can interface directly with Parallel and MIPI CSI-2 buses. It has 256 x 64 FIFO
+to store received image pixel data and embedded DMA controllers to transfer data
+from the FIFO through AHB bus.
+
+This entity has one sink pad that receives from the csi-mux entity and a single
+source pad that routes video frames directly to memory buffers. This pad is
+routed to a capture device node.
+
+Usage Notes
+-----------
+
+To aid in configuration and for backward compatibility with V4L2 applications
+that access controls only from video device nodes, the capture device interfaces
+inherit controls from the active entities in the current pipeline, so controls
+can be accessed either directly from the subdev or from the active capture
+device interface. For example, the sensor controls are available either from the
+sensor subdevs or from the active capture device.
+
+Warp7 with OV2680
+-----------------
+
+On this platform an OV2680 MIPI CSI-2 module is connected to the internal MIPI
+CSI-2 receiver. The following example configures a video capture pipeline with
+an output of 800x600, and BGGR 10 bit bayer format:
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'ov2680 1-0036':0 -> 'imx7-mipi-csis.0':0[1]"
+ media-ctl -l "'imx7-mipi-csis.0':1 -> 'csi-mux':1[1]"
+ media-ctl -l "'csi-mux':2 -> 'csi':0[1]"
+ media-ctl -l "'csi':1 -> 'csi capture':0[1]"
+
+ # Configure pads for pipeline
+ media-ctl -V "'ov2680 1-0036':0 [fmt:SBGGR10_1X10/800x600 field:none]"
+ media-ctl -V "'csi-mux':1 [fmt:SBGGR10_1X10/800x600 field:none]"
+ media-ctl -V "'csi-mux':2 [fmt:SBGGR10_1X10/800x600 field:none]"
+ media-ctl -V "'imx7-mipi-csis.0':0 [fmt:SBGGR10_1X10/800x600 field:none]"
+ media-ctl -V "'csi':0 [fmt:SBGGR10_1X10/800x600 field:none]"
+
+After this streaming can start. The v4l2-ctl tool can be used to select any of
+the resolutions supported by the sensor.
+
+.. code-block:: none
+
+ # media-ctl -p
+ Media controller API version 5.2.0
+
+ Media device information
+ ------------------------
+ driver imx7-csi
+ model imx-media
+ serial
+ bus info
+ hw revision 0x0
+ driver version 5.2.0
+
+ Device topology
+ - entity 1: csi (2 pads, 2 links)
+ type V4L2 subdev subtype Unknown flags 0
+ device node name /dev/v4l-subdev0
+ pad0: Sink
+ [fmt:SBGGR10_1X10/800x600 field:none colorspace:srgb xfer:srgb ycbcr:601 quantization:full-range]
+ <- "csi-mux":2 [ENABLED]
+ pad1: Source
+ [fmt:SBGGR10_1X10/800x600 field:none colorspace:srgb xfer:srgb ycbcr:601 quantization:full-range]
+ -> "csi capture":0 [ENABLED]
+
+ - entity 4: csi capture (1 pad, 1 link)
+ type Node subtype V4L flags 0
+ device node name /dev/video0
+ pad0: Sink
+ <- "csi":1 [ENABLED]
+
+ - entity 10: csi-mux (3 pads, 2 links)
+ type V4L2 subdev subtype Unknown flags 0
+ device node name /dev/v4l-subdev1
+ pad0: Sink
+ [fmt:Y8_1X8/1x1 field:none]
+ pad1: Sink
+ [fmt:SBGGR10_1X10/800x600 field:none]
+ <- "imx7-mipi-csis.0":1 [ENABLED]
+ pad2: Source
+ [fmt:SBGGR10_1X10/800x600 field:none]
+ -> "csi":0 [ENABLED]
+
+ - entity 14: imx7-mipi-csis.0 (2 pads, 2 links)
+ type V4L2 subdev subtype Unknown flags 0
+ device node name /dev/v4l-subdev2
+ pad0: Sink
+ [fmt:SBGGR10_1X10/800x600 field:none]
+ <- "ov2680 1-0036":0 [ENABLED]
+ pad1: Source
+ [fmt:SBGGR10_1X10/800x600 field:none]
+ -> "csi-mux":1 [ENABLED]
+
+ - entity 17: ov2680 1-0036 (1 pad, 1 link)
+ type V4L2 subdev subtype Sensor flags 0
+ device node name /dev/v4l-subdev3
+ pad0: Source
+ [fmt:SBGGR10_1X10/800x600@1/30 field:none colorspace:srgb]
+ -> "imx7-mipi-csis.0":0 [ENABLED]
+
+i.MX6ULL-EVK with OV5640
+------------------------
+
+On this platform a parallel OV5640 sensor is connected to the CSI port.
+The following example configures a video capture pipeline with an output
+of 640x480 and UYVY8_2X8 format:
+
+.. code-block:: none
+
+ # Setup links
+ media-ctl -l "'ov5640 1-003c':0 -> 'csi':0[1]"
+ media-ctl -l "'csi':1 -> 'csi capture':0[1]"
+
+ # Configure pads for pipeline
+ media-ctl -v -V "'ov5640 1-003c':0 [fmt:UYVY8_2X8/640x480 field:none]"
+
+After this streaming can start:
+
+.. code-block:: none
+
+ gst-launch-1.0 -v v4l2src device=/dev/video1 ! video/x-raw,format=UYVY,width=640,height=480 ! v4l2convert ! fbdevsink
+
+.. code-block:: none
+
+ # media-ctl -p
+ Media controller API version 5.14.0
+
+ Media device information
+ ------------------------
+ driver imx7-csi
+ model imx-media
+ serial
+ bus info
+ hw revision 0x0
+ driver version 5.14.0
+
+ Device topology
+ - entity 1: csi (2 pads, 2 links)
+ type V4L2 subdev subtype Unknown flags 0
+ device node name /dev/v4l-subdev0
+ pad0: Sink
+ [fmt:UYVY8_2X8/640x480 field:none colorspace:srgb xfer:srgb ycbcr:601 quantization:full-range]
+ <- "ov5640 1-003c":0 [ENABLED,IMMUTABLE]
+ pad1: Source
+ [fmt:UYVY8_2X8/640x480 field:none colorspace:srgb xfer:srgb ycbcr:601 quantization:full-range]
+ -> "csi capture":0 [ENABLED,IMMUTABLE]
+
+ - entity 4: csi capture (1 pad, 1 link)
+ type Node subtype V4L flags 0
+ device node name /dev/video1
+ pad0: Sink
+ <- "csi":1 [ENABLED,IMMUTABLE]
+
+ - entity 10: ov5640 1-003c (1 pad, 1 link)
+ type V4L2 subdev subtype Sensor flags 0
+ device node name /dev/v4l-subdev1
+ pad0: Source
+ [fmt:UYVY8_2X8/640x480@1/30 field:none colorspace:srgb xfer:srgb ycbcr:601 quantization:full-range]
+ -> "csi":0 [ENABLED,IMMUTABLE]
+
+References
+----------
+
+.. [#f1] https://www.nxp.com/docs/en/reference-manual/IMX7SRM.pdf
diff --git a/Documentation/admin-guide/media/index.rst b/Documentation/admin-guide/media/index.rst
new file mode 100644
index 000000000000..b11737ae6c04
--- /dev/null
+++ b/Documentation/admin-guide/media/index.rst
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+====================================
+Media subsystem admin and user guide
+====================================
+
+This section contains usage information about media subsystem and
+its supported drivers.
+
+Please see:
+
+Documentation/userspace-api/media/index.rst
+
+ - for the userspace APIs used on media devices.
+
+Documentation/driver-api/media/index.rst
+
+ - for driver development information and Kernel APIs used by
+ media devices;
+
+Documentation/process/debugging/media_specific_debugging_guide.rst
+
+ - for advice about essential tools and techniques to debug drivers on this
+ subsystem
+
+.. toctree::
+ :caption: Table of Contents
+ :maxdepth: 2
+ :numbered:
+
+ intro
+ building
+
+ remote-controller
+
+ cec
+
+ dvb
+
+ cardlist
+
+ v4l-drivers
+ dvb-drivers
+
+**Copyright** |copy| 1999-2020 : LinuxTV Developers
+
+::
+
+ This documentation is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ For more details see the file COPYING in the source distribution of Linux.
diff --git a/Documentation/admin-guide/media/intro.rst b/Documentation/admin-guide/media/intro.rst
new file mode 100644
index 000000000000..fec8122f2412
--- /dev/null
+++ b/Documentation/admin-guide/media/intro.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Introduction
+============
+
+The media subsystem consists on Linux support for several different types
+of devices:
+
+- Audio and video grabbers;
+- PC and Laptop Cameras;
+- Complex cameras found on Embedded hardware;
+- Analog and digital TV;
+- HDMI Customer Electronics Control (CEC);
+- Multi-touch input devices;
+- Remote Controllers;
+- Media encoders and decoders.
+
+Due to the diversity of devices, the subsystem provides several different
+APIs:
+
+- Remote Controller API;
+- HDMI CEC API;
+- Video4Linux API;
+- Media controller API;
+- Video4Linux Request API (experimental);
+- Digital TV API (also known as DVB API).
diff --git a/Documentation/admin-guide/media/ipu3.rst b/Documentation/admin-guide/media/ipu3.rst
new file mode 100644
index 000000000000..9c190942932e
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu3.rst
@@ -0,0 +1,596 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===============================================================
+Intel Image Processing Unit 3 (IPU3) Imaging Unit (ImgU) driver
+===============================================================
+
+Copyright |copy| 2018 Intel Corporation
+
+Introduction
+============
+
+This file documents the Intel IPU3 (3rd generation Image Processing Unit)
+Imaging Unit drivers located under drivers/media/pci/intel/ipu3 (CIO2) as well
+as under drivers/staging/media/ipu3 (ImgU).
+
+The Intel IPU3 found in certain Kaby Lake (as well as certain Sky Lake)
+platforms (U/Y processor lines) is made up of two parts namely the Imaging Unit
+(ImgU) and the CIO2 device (MIPI CSI2 receiver).
+
+The CIO2 device receives the raw Bayer data from the sensors and outputs the
+frames in a format that is specific to the IPU3 (for consumption by the IPU3
+ImgU). The CIO2 driver is available as drivers/media/pci/intel/ipu3/ipu3-cio2*
+and is enabled through the CONFIG_VIDEO_IPU3_CIO2 config option.
+
+The Imaging Unit (ImgU) is responsible for processing images captured
+by the IPU3 CIO2 device. The ImgU driver sources can be found under
+drivers/staging/media/ipu3 directory. The driver is enabled through the
+CONFIG_VIDEO_IPU3_IMGU config option.
+
+The two driver modules are named ipu3_csi2 and ipu3_imgu, respectively.
+
+The drivers has been tested on Kaby Lake platforms (U/Y processor lines).
+
+Both of the drivers implement V4L2, Media Controller and V4L2 sub-device
+interfaces. The IPU3 CIO2 driver supports camera sensors connected to the CIO2
+MIPI CSI-2 interfaces through V4L2 sub-device sensor drivers.
+
+CIO2
+====
+
+The CIO2 is represented as a single V4L2 subdev, which provides a V4L2 subdev
+interface to the user space. There is a video node for each CSI-2 receiver,
+with a single media controller interface for the entire device.
+
+The CIO2 contains four independent capture channel, each with its own MIPI CSI-2
+receiver and DMA engine. Each channel is modelled as a V4L2 sub-device exposed
+to userspace as a V4L2 sub-device node and has two pads:
+
+.. tabularcolumns:: |p{0.8cm}|p{4.0cm}|p{4.0cm}|
+
+.. flat-table::
+ :header-rows: 1
+
+ * - Pad
+ - Direction
+ - Purpose
+
+ * - 0
+ - sink
+ - MIPI CSI-2 input, connected to the sensor subdev
+
+ * - 1
+ - source
+ - Raw video capture, connected to the V4L2 video interface
+
+The V4L2 video interfaces model the DMA engines. They are exposed to userspace
+as V4L2 video device nodes.
+
+Capturing frames in raw Bayer format
+------------------------------------
+
+CIO2 MIPI CSI2 receiver is used to capture frames (in packed raw Bayer format)
+from the raw sensors connected to the CSI2 ports. The captured frames are used
+as input to the ImgU driver.
+
+Image processing using IPU3 ImgU requires tools such as raw2pnm [#f1]_, and
+yavta [#f2]_ due to the following unique requirements and / or features specific
+to IPU3.
+
+-- The IPU3 CSI2 receiver outputs the captured frames from the sensor in packed
+raw Bayer format that is specific to IPU3.
+
+-- Multiple video nodes have to be operated simultaneously.
+
+Let us take the example of ov5670 sensor connected to CSI2 port 0, for a
+2592x1944 image capture.
+
+Using the media controller APIs, the ov5670 sensor is configured to send
+frames in packed raw Bayer format to IPU3 CSI2 receiver.
+
+.. code-block:: none
+
+ # This example assumes /dev/media0 as the CIO2 media device
+ export MDEV=/dev/media0
+
+ # and that ov5670 sensor is connected to i2c bus 10 with address 0x36
+ export SDEV=$(media-ctl -d $MDEV -e "ov5670 10-0036")
+
+ # Establish the link for the media devices using media-ctl
+ media-ctl -d $MDEV -l "ov5670:0 -> ipu3-csi2 0:0[1]"
+
+ # Set the format for the media devices
+ media-ctl -d $MDEV -V "ov5670:0 [fmt:SGRBG10/2592x1944]"
+ media-ctl -d $MDEV -V "ipu3-csi2 0:0 [fmt:SGRBG10/2592x1944]"
+ media-ctl -d $MDEV -V "ipu3-csi2 0:1 [fmt:SGRBG10/2592x1944]"
+
+Once the media pipeline is configured, desired sensor specific settings
+(such as exposure and gain settings) can be set, using the yavta tool.
+
+e.g
+
+.. code-block:: none
+
+ yavta -w 0x009e0903 444 $SDEV
+ yavta -w 0x009e0913 1024 $SDEV
+ yavta -w 0x009e0911 2046 $SDEV
+
+Once the desired sensor settings are set, frame captures can be done as below.
+
+e.g
+
+.. code-block:: none
+
+ yavta --data-prefix -u -c10 -n5 -I -s2592x1944 --file=/tmp/frame-#.bin \
+ -f IPU3_SGRBG10 $(media-ctl -d $MDEV -e "ipu3-cio2 0")
+
+With the above command, 10 frames are captured at 2592x1944 resolution, with
+sGRBG10 format and output as IPU3_SGRBG10 format.
+
+The captured frames are available as /tmp/frame-#.bin files.
+
+ImgU
+====
+
+The ImgU is represented as two V4L2 subdevs, each of which provides a V4L2
+subdev interface to the user space.
+
+Each V4L2 subdev represents a pipe, which can support a maximum of 2 streams.
+This helps to support advanced camera features like Continuous View Finder (CVF)
+and Snapshot During Video(SDV).
+
+The ImgU contains two independent pipes, each modelled as a V4L2 sub-device
+exposed to userspace as a V4L2 sub-device node.
+
+Each pipe has two sink pads and three source pads for the following purpose:
+
+.. tabularcolumns:: |p{0.8cm}|p{4.0cm}|p{4.0cm}|
+
+.. flat-table::
+ :header-rows: 1
+
+ * - Pad
+ - Direction
+ - Purpose
+
+ * - 0
+ - sink
+ - Input raw video stream
+
+ * - 1
+ - sink
+ - Processing parameters
+
+ * - 2
+ - source
+ - Output processed video stream
+
+ * - 3
+ - source
+ - Output viewfinder video stream
+
+ * - 4
+ - source
+ - 3A statistics
+
+Each pad is connected to a corresponding V4L2 video interface, exposed to
+userspace as a V4L2 video device node.
+
+Device operation
+----------------
+
+With ImgU, once the input video node ("ipu3-imgu 0/1":0, in
+<entity>:<pad-number> format) is queued with buffer (in packed raw Bayer
+format), ImgU starts processing the buffer and produces the video output in YUV
+format and statistics output on respective output nodes. The driver is expected
+to have buffers ready for all of parameter, output and statistics nodes, when
+input video node is queued with buffer.
+
+At a minimum, all of input, main output, 3A statistics and viewfinder
+video nodes should be enabled for IPU3 to start image processing.
+
+Each ImgU V4L2 subdev has the following set of video nodes.
+
+input, output and viewfinder video nodes
+----------------------------------------
+
+The frames (in packed raw Bayer format specific to the IPU3) received by the
+input video node is processed by the IPU3 Imaging Unit and are output to 2 video
+nodes, with each targeting a different purpose (main output and viewfinder
+output).
+
+Details onand the Bayer format specific to the IPU3 can be found in
+:ref:`v4l2-pix-fmt-ipu3-sbggr10`.
+
+The driver supports V4L2 Video Capture Interface as defined at :ref:`devices`.
+
+Only the multi-planar API is supported. More details can be found at
+:ref:`planar-apis`.
+
+Parameters video node
+---------------------
+
+The parameters video node receives the ImgU algorithm parameters that are used
+to configure how the ImgU algorithms process the image.
+
+Details on processing parameters specific to the IPU3 can be found in
+:ref:`v4l2-meta-fmt-params`.
+
+3A statistics video node
+------------------------
+
+3A statistics video node is used by the ImgU driver to output the 3A (auto
+focus, auto exposure and auto white balance) statistics for the frames that are
+being processed by the ImgU to user space applications. User space applications
+can use this statistics data to compute the desired algorithm parameters for
+the ImgU.
+
+Configuring the Intel IPU3
+==========================
+
+The IPU3 ImgU pipelines can be configured using the Media Controller, defined at
+:ref:`media_controller`.
+
+Running mode and firmware binary selection
+------------------------------------------
+
+ImgU works based on firmware, currently the ImgU firmware support run 2 pipes
+in time-sharing with single input frame data. Each pipe can run at certain mode
+- "VIDEO" or "STILL", "VIDEO" mode is commonly used for video frames capture,
+and "STILL" is used for still frame capture. However, you can also select
+"VIDEO" to capture still frames if you want to capture images with less system
+load and power. For "STILL" mode, ImgU will try to use smaller BDS factor and
+output larger bayer frame for further YUV processing than "VIDEO" mode to get
+high quality images. Besides, "STILL" mode need XNR3 to do noise reduction,
+hence "STILL" mode will need more power and memory bandwidth than "VIDEO" mode.
+TNR will be enabled in "VIDEO" mode and bypassed by "STILL" mode. ImgU is
+running at "VIDEO" mode by default, the user can use v4l2 control
+V4L2_CID_INTEL_IPU3_MODE (currently defined in
+drivers/staging/media/ipu3/include/uapi/intel-ipu3.h) to query and set the
+running mode. For user, there is no difference for buffer queueing between the
+"VIDEO" and "STILL" mode, mandatory input and main output node should be
+enabled and buffers need be queued, the statistics and the view-finder queues
+are optional.
+
+The firmware binary will be selected according to current running mode, such log
+"using binary if_to_osys_striped " or "using binary if_to_osys_primary_striped"
+could be observed if you enable the ImgU dynamic debug, the binary
+if_to_osys_striped is selected for "VIDEO" and the binary
+"if_to_osys_primary_striped" is selected for "STILL".
+
+
+Processing the image in raw Bayer format
+----------------------------------------
+
+Configuring ImgU V4L2 subdev for image processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ImgU V4L2 subdevs have to be configured with media controller APIs to have
+all the video nodes setup correctly.
+
+Let us take "ipu3-imgu 0" subdev as an example.
+
+.. code-block:: none
+
+ media-ctl -d $MDEV -r
+ media-ctl -d $MDEV -l "ipu3-imgu 0 input":0 -> "ipu3-imgu 0":0[1]
+ media-ctl -d $MDEV -l "ipu3-imgu 0":2 -> "ipu3-imgu 0 output":0[1]
+ media-ctl -d $MDEV -l "ipu3-imgu 0":3 -> "ipu3-imgu 0 viewfinder":0[1]
+ media-ctl -d $MDEV -l "ipu3-imgu 0":4 -> "ipu3-imgu 0 3a stat":0[1]
+
+Also the pipe mode of the corresponding V4L2 subdev should be set as desired
+(e.g 0 for video mode or 1 for still mode) through the control id 0x009819a1 as
+below.
+
+.. code-block:: none
+
+ yavta -w "0x009819A1 1" /dev/v4l-subdev7
+
+Certain hardware blocks in ImgU pipeline can change the frame resolution by
+cropping or scaling, these hardware blocks include Input Feeder(IF), Bayer Down
+Scaler (BDS) and Geometric Distortion Correction (GDC).
+There is also a block which can change the frame resolution - YUV Scaler, it is
+only applicable to the secondary output.
+
+RAW Bayer frames go through these ImgU pipeline hardware blocks and the final
+processed image output to the DDR memory.
+
+.. kernel-figure:: ipu3_rcb.svg
+ :alt: ipu3 resolution blocks image
+
+ IPU3 resolution change hardware blocks
+
+**Input Feeder**
+
+Input Feeder gets the Bayer frame data from the sensor, it can enable cropping
+of lines and columns from the frame and then store pixels into device's internal
+pixel buffer which are ready to readout by following blocks.
+
+**Bayer Down Scaler**
+
+Bayer Down Scaler is capable of performing image scaling in Bayer domain, the
+downscale factor can be configured from 1X to 1/4X in each axis with
+configuration steps of 0.03125 (1/32).
+
+**Geometric Distortion Correction**
+
+Geometric Distortion Correction is used to perform correction of distortions
+and image filtering. It needs some extra filter and envelope padding pixels to
+work, so the input resolution of GDC should be larger than the output
+resolution.
+
+**YUV Scaler**
+
+YUV Scaler which similar with BDS, but it is mainly do image down scaling in
+YUV domain, it can support up to 1/12X down scaling, but it can not be applied
+to the main output.
+
+The ImgU V4L2 subdev has to be configured with the supported resolutions in all
+the above hardware blocks, for a given input resolution.
+For a given supported resolution for an input frame, the Input Feeder, Bayer
+Down Scaler and GDC blocks should be configured with the supported resolutions
+as each hardware block has its own alignment requirement.
+
+You must configure the output resolution of the hardware blocks smartly to meet
+the hardware requirement along with keeping the maximum field of view. The
+intermediate resolutions can be generated by specific tool -
+
+https://github.com/intel/intel-ipu3-pipecfg
+
+This tool can be used to generate intermediate resolutions. More information can
+be obtained by looking at the following IPU3 ImgU configuration table.
+
+https://chromium.googlesource.com/chromiumos/overlays/board-overlays/+/master
+
+Under baseboard-poppy/media-libs/cros-camera-hal-configs-poppy/files/gcss
+directory, graph_settings_ov5670.xml can be used as an example.
+
+The following steps prepare the ImgU pipeline for the image processing.
+
+1. The ImgU V4L2 subdev data format should be set by using the
+VIDIOC_SUBDEV_S_FMT on pad 0, using the GDC width and height obtained above.
+
+2. The ImgU V4L2 subdev cropping should be set by using the
+VIDIOC_SUBDEV_S_SELECTION on pad 0, with V4L2_SEL_TGT_CROP as the target,
+using the input feeder height and width.
+
+3. The ImgU V4L2 subdev composing should be set by using the
+VIDIOC_SUBDEV_S_SELECTION on pad 0, with V4L2_SEL_TGT_COMPOSE as the target,
+using the BDS height and width.
+
+For the ov5670 example, for an input frame with a resolution of 2592x1944
+(which is input to the ImgU subdev pad 0), the corresponding resolutions
+for input feeder, BDS and GDC are 2592x1944, 2592x1944 and 2560x1920
+respectively.
+
+Once this is done, the received raw Bayer frames can be input to the ImgU
+V4L2 subdev as below, using the open source application v4l2n [#f1]_.
+
+For an image captured with 2592x1944 [#f4]_ resolution, with desired output
+resolution as 2560x1920 and viewfinder resolution as 2560x1920, the following
+v4l2n command can be used. This helps process the raw Bayer frames and produces
+the desired results for the main output image and the viewfinder output, in NV12
+format.
+
+.. code-block:: none
+
+ v4l2n --pipe=4 --load=/tmp/frame-#.bin --open=/dev/video4
+ --fmt=type:VIDEO_OUTPUT_MPLANE,width=2592,height=1944,pixelformat=0X47337069 \
+ --reqbufs=type:VIDEO_OUTPUT_MPLANE,count:1 --pipe=1 \
+ --output=/tmp/frames.out --open=/dev/video5 \
+ --fmt=type:VIDEO_CAPTURE_MPLANE,width=2560,height=1920,pixelformat=NV12 \
+ --reqbufs=type:VIDEO_CAPTURE_MPLANE,count:1 --pipe=2 \
+ --output=/tmp/frames.vf --open=/dev/video6 \
+ --fmt=type:VIDEO_CAPTURE_MPLANE,width=2560,height=1920,pixelformat=NV12 \
+ --reqbufs=type:VIDEO_CAPTURE_MPLANE,count:1 --pipe=3 --open=/dev/video7 \
+ --output=/tmp/frames.3A --fmt=type:META_CAPTURE,? \
+ --reqbufs=count:1,type:META_CAPTURE --pipe=1,2,3,4 --stream=5
+
+You can also use yavta [#f2]_ command to do same thing as above:
+
+.. code-block:: none
+
+ yavta --data-prefix -Bcapture-mplane -c10 -n5 -I -s2592x1944 \
+ --file=frame-#.out-f NV12 /dev/video5 & \
+ yavta --data-prefix -Bcapture-mplane -c10 -n5 -I -s2592x1944 \
+ --file=frame-#.vf -f NV12 /dev/video6 & \
+ yavta --data-prefix -Bmeta-capture -c10 -n5 -I \
+ --file=frame-#.3a /dev/video7 & \
+ yavta --data-prefix -Boutput-mplane -c10 -n5 -I -s2592x1944 \
+ --file=/tmp/frame-in.cio2 -f IPU3_SGRBG10 /dev/video4
+
+where /dev/video4, /dev/video5, /dev/video6 and /dev/video7 devices point to
+input, output, viewfinder and 3A statistics video nodes respectively.
+
+Converting the raw Bayer image into YUV domain
+----------------------------------------------
+
+The processed images after the above step, can be converted to YUV domain
+as below.
+
+Main output frames
+~~~~~~~~~~~~~~~~~~
+
+.. code-block:: none
+
+ raw2pnm -x2560 -y1920 -fNV12 /tmp/frames.out /tmp/frames.out.ppm
+
+where 2560x1920 is output resolution, NV12 is the video format, followed
+by input frame and output PNM file.
+
+Viewfinder output frames
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: none
+
+ raw2pnm -x2560 -y1920 -fNV12 /tmp/frames.vf /tmp/frames.vf.ppm
+
+where 2560x1920 is output resolution, NV12 is the video format, followed
+by input frame and output PNM file.
+
+Example user space code for IPU3
+================================
+
+User space code that configures and uses IPU3 is available here.
+
+https://chromium.googlesource.com/chromiumos/platform/arc-camera/+/master/
+
+The source can be located under hal/intel directory.
+
+Overview of IPU3 pipeline
+=========================
+
+IPU3 pipeline has a number of image processing stages, each of which takes a
+set of parameters as input. The major stages of pipelines are shown here:
+
+.. kernel-render:: DOT
+ :alt: IPU3 ImgU Pipeline
+ :caption: IPU3 ImgU Pipeline Diagram
+
+ digraph "IPU3 ImgU" {
+ node [shape=box]
+ splines="ortho"
+ rankdir="LR"
+
+ a [label="Raw pixels"]
+ b [label="Bayer Downscaling"]
+ c [label="Optical Black Correction"]
+ d [label="Linearization"]
+ e [label="Lens Shading Correction"]
+ f [label="White Balance / Exposure / Focus Apply"]
+ g [label="Bayer Noise Reduction"]
+ h [label="ANR"]
+ i [label="Demosaicing"]
+ j [label="Color Correction Matrix"]
+ k [label="Gamma correction"]
+ l [label="Color Space Conversion"]
+ m [label="Chroma Down Scaling"]
+ n [label="Chromatic Noise Reduction"]
+ o [label="Total Color Correction"]
+ p [label="XNR3"]
+ q [label="TNR"]
+ r [label="DDR", style=filled, fillcolor=yellow, shape=cylinder]
+ s [label="YUV Downscaling"]
+ t [label="DDR", style=filled, fillcolor=yellow, shape=cylinder]
+
+ { rank=same; a -> b -> c -> d -> e -> f -> g -> h -> i }
+ { rank=same; j -> k -> l -> m -> n -> o -> p -> q -> s -> t}
+
+ a -> j [style=invis, weight=10]
+ i -> j
+ q -> r
+ }
+
+The table below presents a description of the above algorithms.
+
+======================== =======================================================
+Name Description
+======================== =======================================================
+Optical Black Correction Optical Black Correction block subtracts a pre-defined
+ value from the respective pixel values to obtain better
+ image quality.
+ Defined in struct ipu3_uapi_obgrid_param.
+Linearization This algo block uses linearization parameters to
+ address non-linearity sensor effects. The Lookup table
+ table is defined in
+ struct ipu3_uapi_isp_lin_vmem_params.
+SHD Lens shading correction is used to correct spatial
+ non-uniformity of the pixel response due to optical
+ lens shading. This is done by applying a different gain
+ for each pixel. The gain, black level etc are
+ configured in struct ipu3_uapi_shd_config_static.
+BNR Bayer noise reduction block removes image noise by
+ applying a bilateral filter.
+ See struct ipu3_uapi_bnr_static_config for details.
+ANR Advanced Noise Reduction is a block based algorithm
+ that performs noise reduction in the Bayer domain. The
+ convolution matrix etc can be found in
+ struct ipu3_uapi_anr_config.
+DM Demosaicing converts raw sensor data in Bayer format
+ into RGB (Red, Green, Blue) presentation. Then add
+ outputs of estimation of Y channel for following stream
+ processing by Firmware. The struct is defined as
+ struct ipu3_uapi_dm_config.
+Color Correction Color Correction algo transforms sensor specific color
+ space to the standard "sRGB" color space. This is done
+ by applying 3x3 matrix defined in
+ struct ipu3_uapi_ccm_mat_config.
+Gamma correction Gamma correction struct ipu3_uapi_gamma_config is a
+ basic non-linear tone mapping correction that is
+ applied per pixel for each pixel component.
+CSC Color space conversion transforms each pixel from the
+ RGB primary presentation to YUV (Y: brightness,
+ UV: Luminance) presentation. This is done by applying
+ a 3x3 matrix defined in
+ struct ipu3_uapi_csc_mat_config
+CDS Chroma down sampling
+ After the CSC is performed, the Chroma Down Sampling
+ is applied for a UV plane down sampling by a factor
+ of 2 in each direction for YUV 4:2:0 using a 4x2
+ configurable filter struct ipu3_uapi_cds_params.
+CHNR Chroma noise reduction
+ This block processes only the chrominance pixels and
+ performs noise reduction by cleaning the high
+ frequency noise.
+ See struct struct ipu3_uapi_yuvp1_chnr_config.
+TCC Total color correction as defined in struct
+ struct ipu3_uapi_yuvp2_tcc_static_config.
+XNR3 eXtreme Noise Reduction V3 is the third revision of
+ noise reduction algorithm used to improve image
+ quality. This removes the low frequency noise in the
+ captured image. Two related structs are being defined,
+ struct ipu3_uapi_isp_xnr3_params for ISP data memory
+ and struct ipu3_uapi_isp_xnr3_vmem_params for vector
+ memory.
+TNR Temporal Noise Reduction block compares successive
+ frames in time to remove anomalies / noise in pixel
+ values. struct ipu3_uapi_isp_tnr3_vmem_params and
+ struct ipu3_uapi_isp_tnr3_params are defined for ISP
+ vector and data memory respectively.
+======================== =======================================================
+
+Other often encountered acronyms not listed in above table:
+
+ ACC
+ Accelerator cluster
+ AWB_FR
+ Auto white balance filter response statistics
+ BDS
+ Bayer downscaler parameters
+ CCM
+ Color correction matrix coefficients
+ IEFd
+ Image enhancement filter directed
+ Obgrid
+ Optical black level compensation
+ OSYS
+ Output system configuration
+ ROI
+ Region of interest
+ YDS
+ Y down sampling
+ YTM
+ Y-tone mapping
+
+A few stages of the pipeline will be executed by firmware running on the ISP
+processor, while many others will use a set of fixed hardware blocks also
+called accelerator cluster (ACC) to crunch pixel data and produce statistics.
+
+ACC parameters of individual algorithms, as defined by
+struct ipu3_uapi_acc_param, can be chosen to be applied by the user
+space through struct struct ipu3_uapi_flags embedded in
+struct ipu3_uapi_params structure. For parameters that are configured as
+not enabled by the user space, the corresponding structs are ignored by the
+driver, in which case the existing configuration of the algorithm will be
+preserved.
+
+References
+==========
+
+.. [#f1] https://github.com/intel/nvt
+
+.. [#f2] http://git.ideasonboard.org/yavta.git
+
+.. [#f4] ImgU limitation requires an additional 16x16 for all input resolutions
diff --git a/Documentation/admin-guide/media/ipu3_rcb.svg b/Documentation/admin-guide/media/ipu3_rcb.svg
new file mode 100644
index 000000000000..d878421b42a0
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu3_rcb.svg
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="774pt" height="152pt" viewBox="0 0 774 152" version="1.1">
+<defs>
+<g>
+<symbol overflow="visible" id="glyph0-0">
+<path style="stroke:none;" d="M 1 0 L 1 -15 L 9 -15 L 9 0 Z M 8 -1 L 8 -14 L 2 -14 L 2 -1 Z M 8 -1 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-1">
+<path style="stroke:none;" d="M 4.6875 -1.15625 C 5.519531 -1.15625 6.15625 -1.316406 6.59375 -1.640625 C 7.039062 -1.960938 7.265625 -2.441406 7.265625 -3.078125 C 7.265625 -3.460938 7.179688 -3.789062 7.015625 -4.0625 C 6.859375 -4.34375 6.644531 -4.582031 6.375 -4.78125 C 6.113281 -4.988281 5.816406 -5.171875 5.484375 -5.328125 C 5.148438 -5.484375 4.804688 -5.628906 4.453125 -5.765625 C 4.054688 -5.921875 3.675781 -6.097656 3.3125 -6.296875 C 2.945312 -6.492188 2.617188 -6.726562 2.328125 -7 C 2.046875 -7.269531 1.820312 -7.582031 1.65625 -7.9375 C 1.488281 -8.300781 1.40625 -8.726562 1.40625 -9.21875 C 1.40625 -10.300781 1.742188 -11.144531 2.421875 -11.75 C 3.097656 -12.351562 4.046875 -12.65625 5.265625 -12.65625 C 5.597656 -12.65625 5.925781 -12.628906 6.25 -12.578125 C 6.570312 -12.535156 6.875 -12.476562 7.15625 -12.40625 C 7.4375 -12.34375 7.6875 -12.265625 7.90625 -12.171875 C 8.125 -12.085938 8.300781 -12 8.4375 -11.90625 L 7.921875 -10.515625 C 7.648438 -10.679688 7.28125 -10.84375 6.8125 -11 C 6.351562 -11.15625 5.835938 -11.234375 5.265625 -11.234375 C 4.660156 -11.234375 4.140625 -11.082031 3.703125 -10.78125 C 3.265625 -10.488281 3.046875 -10.039062 3.046875 -9.4375 C 3.046875 -9.09375 3.109375 -8.800781 3.234375 -8.5625 C 3.359375 -8.320312 3.53125 -8.109375 3.75 -7.921875 C 3.96875 -7.742188 4.222656 -7.582031 4.515625 -7.4375 C 4.804688 -7.289062 5.128906 -7.144531 5.484375 -7 C 5.984375 -6.789062 6.441406 -6.578125 6.859375 -6.359375 C 7.285156 -6.148438 7.648438 -5.894531 7.953125 -5.59375 C 8.253906 -5.300781 8.488281 -4.953125 8.65625 -4.546875 C 8.820312 -4.148438 8.90625 -3.664062 8.90625 -3.09375 C 8.90625 -2.019531 8.539062 -1.191406 7.8125 -0.609375 C 7.082031 -0.0234375 6.039062 0.265625 4.6875 0.265625 C 4.238281 0.265625 3.820312 0.234375 3.4375 0.171875 C 3.050781 0.109375 2.707031 0.03125 2.40625 -0.0625 C 2.101562 -0.15625 1.835938 -0.25 1.609375 -0.34375 C 1.390625 -0.4375 1.21875 -0.519531 1.09375 -0.59375 L 1.59375 -1.953125 C 1.863281 -1.804688 2.257812 -1.632812 2.78125 -1.4375 C 3.300781 -1.25 3.9375 -1.15625 4.6875 -1.15625 Z M 4.6875 -1.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-2">
+<path style="stroke:none;" d="M 5.1875 -9.5 C 6.4375 -9.5 7.398438 -9.109375 8.078125 -8.328125 C 8.753906 -7.546875 9.09375 -6.363281 9.09375 -4.78125 L 9.09375 -4.203125 L 2.453125 -4.203125 C 2.523438 -3.242188 2.84375 -2.515625 3.40625 -2.015625 C 3.976562 -1.515625 4.773438 -1.265625 5.796875 -1.265625 C 6.390625 -1.265625 6.890625 -1.3125 7.296875 -1.40625 C 7.710938 -1.5 8.023438 -1.597656 8.234375 -1.703125 L 8.453125 -0.296875 C 8.253906 -0.191406 7.894531 -0.0820312 7.375 0.03125 C 6.851562 0.15625 6.269531 0.21875 5.625 0.21875 C 4.820312 0.21875 4.113281 0.0976562 3.5 -0.140625 C 2.894531 -0.390625 2.394531 -0.726562 2 -1.15625 C 1.601562 -1.582031 1.300781 -2.09375 1.09375 -2.6875 C 0.894531 -3.28125 0.796875 -3.925781 0.796875 -4.625 C 0.796875 -5.445312 0.921875 -6.164062 1.171875 -6.78125 C 1.429688 -7.394531 1.765625 -7.898438 2.171875 -8.296875 C 2.585938 -8.703125 3.054688 -9.003906 3.578125 -9.203125 C 4.097656 -9.398438 4.632812 -9.5 5.1875 -9.5 Z M 7.421875 -5.546875 C 7.421875 -6.328125 7.210938 -6.945312 6.796875 -7.40625 C 6.390625 -7.863281 5.84375 -8.09375 5.15625 -8.09375 C 4.769531 -8.09375 4.421875 -8.019531 4.109375 -7.875 C 3.796875 -7.726562 3.523438 -7.535156 3.296875 -7.296875 C 3.066406 -7.054688 2.882812 -6.78125 2.75 -6.46875 C 2.625 -6.164062 2.539062 -5.859375 2.5 -5.546875 Z M 7.421875 -5.546875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-3">
+<path style="stroke:none;" d="M 1.421875 -9.015625 C 2.015625 -9.160156 2.609375 -9.273438 3.203125 -9.359375 C 3.796875 -9.441406 4.351562 -9.484375 4.875 -9.484375 C 6.113281 -9.484375 7.050781 -9.160156 7.6875 -8.515625 C 8.320312 -7.878906 8.640625 -6.851562 8.640625 -5.4375 L 8.640625 0 L 7 0 L 7 -5.140625 C 7 -5.742188 6.945312 -6.226562 6.84375 -6.59375 C 6.738281 -6.96875 6.585938 -7.257812 6.390625 -7.46875 C 6.191406 -7.675781 5.957031 -7.816406 5.6875 -7.890625 C 5.414062 -7.972656 5.117188 -8.015625 4.796875 -8.015625 C 4.535156 -8.015625 4.253906 -8 3.953125 -7.96875 C 3.648438 -7.9375 3.359375 -7.894531 3.078125 -7.84375 L 3.078125 0 L 1.421875 0 Z M 1.421875 -9.015625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-4">
+<path style="stroke:none;" d="M 7.015625 -2.3125 C 7.015625 -2.644531 6.878906 -2.914062 6.609375 -3.125 C 6.335938 -3.34375 6 -3.53125 5.59375 -3.6875 C 5.1875 -3.851562 4.742188 -4.015625 4.265625 -4.171875 C 3.785156 -4.328125 3.335938 -4.515625 2.921875 -4.734375 C 2.515625 -4.960938 2.175781 -5.242188 1.90625 -5.578125 C 1.632812 -5.910156 1.5 -6.34375 1.5 -6.875 C 1.5 -7.625 1.800781 -8.25 2.40625 -8.75 C 3.007812 -9.25 3.960938 -9.5 5.265625 -9.5 C 5.765625 -9.5 6.285156 -9.460938 6.828125 -9.390625 C 7.367188 -9.316406 7.832031 -9.21875 8.21875 -9.09375 L 7.921875 -7.625 C 7.816406 -7.675781 7.671875 -7.726562 7.484375 -7.78125 C 7.296875 -7.84375 7.082031 -7.894531 6.84375 -7.9375 C 6.601562 -7.988281 6.34375 -8.023438 6.0625 -8.046875 C 5.789062 -8.078125 5.53125 -8.09375 5.28125 -8.09375 C 3.84375 -8.09375 3.125 -7.703125 3.125 -6.921875 C 3.125 -6.640625 3.257812 -6.398438 3.53125 -6.203125 C 3.800781 -6.015625 4.144531 -5.835938 4.5625 -5.671875 C 4.976562 -5.515625 5.425781 -5.351562 5.90625 -5.1875 C 6.382812 -5.019531 6.828125 -4.816406 7.234375 -4.578125 C 7.648438 -4.335938 7.992188 -4.046875 8.265625 -3.703125 C 8.546875 -3.367188 8.6875 -2.941406 8.6875 -2.421875 C 8.6875 -1.578125 8.359375 -0.925781 7.703125 -0.46875 C 7.046875 -0.0078125 6.007812 0.21875 4.59375 0.21875 C 3.957031 0.21875 3.375 0.164062 2.84375 0.0625 C 2.3125 -0.0390625 1.800781 -0.203125 1.3125 -0.421875 L 1.640625 -1.921875 C 2.109375 -1.703125 2.597656 -1.523438 3.109375 -1.390625 C 3.617188 -1.253906 4.171875 -1.1875 4.765625 -1.1875 C 6.265625 -1.1875 7.015625 -1.5625 7.015625 -2.3125 Z M 7.015625 -2.3125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-5">
+<path style="stroke:none;" d="M 9.203125 -4.640625 C 9.203125 -3.910156 9.097656 -3.25 8.890625 -2.65625 C 8.679688 -2.0625 8.390625 -1.550781 8.015625 -1.125 C 7.640625 -0.695312 7.191406 -0.363281 6.671875 -0.125 C 6.160156 0.101562 5.597656 0.21875 4.984375 0.21875 C 4.378906 0.21875 3.820312 0.101562 3.3125 -0.125 C 2.800781 -0.363281 2.359375 -0.695312 1.984375 -1.125 C 1.609375 -1.550781 1.316406 -2.0625 1.109375 -2.65625 C 0.898438 -3.25 0.796875 -3.910156 0.796875 -4.640625 C 0.796875 -5.367188 0.898438 -6.035156 1.109375 -6.640625 C 1.316406 -7.242188 1.609375 -7.753906 1.984375 -8.171875 C 2.359375 -8.585938 2.800781 -8.910156 3.3125 -9.140625 C 3.820312 -9.378906 4.378906 -9.5 4.984375 -9.5 C 5.597656 -9.5 6.160156 -9.378906 6.671875 -9.140625 C 7.191406 -8.910156 7.640625 -8.585938 8.015625 -8.171875 C 8.390625 -7.753906 8.679688 -7.242188 8.890625 -6.640625 C 9.097656 -6.035156 9.203125 -5.367188 9.203125 -4.640625 Z M 7.5 -4.640625 C 7.5 -5.691406 7.269531 -6.519531 6.8125 -7.125 C 6.363281 -7.738281 5.753906 -8.046875 4.984375 -8.046875 C 4.222656 -8.046875 3.617188 -7.738281 3.171875 -7.125 C 2.722656 -6.519531 2.5 -5.691406 2.5 -4.640625 C 2.5 -3.597656 2.722656 -2.773438 3.171875 -2.171875 C 3.617188 -1.566406 4.222656 -1.265625 4.984375 -1.265625 C 5.753906 -1.265625 6.363281 -1.566406 6.8125 -2.171875 C 7.269531 -2.773438 7.5 -3.597656 7.5 -4.640625 Z M 7.5 -4.640625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-6">
+<path style="stroke:none;" d="M 2.140625 0 L 2.140625 -8.78125 C 3.503906 -9.25 4.878906 -9.484375 6.265625 -9.484375 C 6.691406 -9.484375 7.097656 -9.460938 7.484375 -9.421875 C 7.867188 -9.390625 8.296875 -9.320312 8.765625 -9.21875 L 8.453125 -7.765625 C 8.023438 -7.878906 7.648438 -7.953125 7.328125 -7.984375 C 7.003906 -8.023438 6.648438 -8.046875 6.265625 -8.046875 C 5.453125 -8.046875 4.625 -7.929688 3.78125 -7.703125 L 3.78125 0 Z M 2.140625 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-7">
+<path style="stroke:none;" d="M 5.8125 -10.984375 L 5.8125 -1.40625 L 8.21875 -1.40625 L 8.21875 0 L 1.78125 0 L 1.78125 -1.40625 L 4.1875 -1.40625 L 4.1875 -10.984375 L 1.78125 -10.984375 L 1.78125 -12.375 L 8.21875 -12.375 L 8.21875 -10.984375 Z M 5.8125 -10.984375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-8">
+<path style="stroke:none;" d="M 1.8125 0 L 1.8125 -12.375 L 8.84375 -12.375 L 8.84375 -10.984375 L 3.453125 -10.984375 L 3.453125 -7.125 L 8.203125 -7.125 L 8.203125 -5.734375 L 3.453125 -5.734375 L 3.453125 0 Z M 1.8125 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-9">
+<path style="stroke:none;" d="M 4.078125 0.09375 C 3.878906 0.09375 3.644531 0.0859375 3.375 0.078125 C 3.113281 0.0664062 2.847656 0.0507812 2.578125 0.03125 C 2.316406 0.0078125 2.050781 -0.0195312 1.78125 -0.0625 C 1.507812 -0.101562 1.273438 -0.148438 1.078125 -0.203125 L 1.078125 -12.203125 C 1.273438 -12.253906 1.503906 -12.300781 1.765625 -12.34375 C 2.023438 -12.382812 2.289062 -12.410156 2.5625 -12.421875 C 2.84375 -12.441406 3.113281 -12.457031 3.375 -12.46875 C 3.632812 -12.488281 3.867188 -12.5 4.078125 -12.5 C 4.691406 -12.5 5.265625 -12.445312 5.796875 -12.34375 C 6.328125 -12.238281 6.789062 -12.054688 7.1875 -11.796875 C 7.582031 -11.546875 7.890625 -11.210938 8.109375 -10.796875 C 8.328125 -10.390625 8.4375 -9.878906 8.4375 -9.265625 C 8.4375 -8.960938 8.390625 -8.675781 8.296875 -8.40625 C 8.203125 -8.132812 8.070312 -7.878906 7.90625 -7.640625 C 7.738281 -7.398438 7.546875 -7.1875 7.328125 -7 C 7.109375 -6.820312 6.875 -6.6875 6.625 -6.59375 C 7.300781 -6.40625 7.867188 -6.0625 8.328125 -5.5625 C 8.785156 -5.0625 9.015625 -4.414062 9.015625 -3.625 C 9.015625 -2.394531 8.617188 -1.46875 7.828125 -0.84375 C 7.046875 -0.21875 5.796875 0.09375 4.078125 0.09375 Z M 2.71875 -5.78125 L 2.71875 -1.359375 C 2.75 -1.347656 2.898438 -1.332031 3.171875 -1.3125 C 3.441406 -1.289062 3.785156 -1.28125 4.203125 -1.28125 C 4.609375 -1.28125 5 -1.3125 5.375 -1.375 C 5.757812 -1.445312 6.097656 -1.570312 6.390625 -1.75 C 6.691406 -1.925781 6.929688 -2.160156 7.109375 -2.453125 C 7.285156 -2.753906 7.375 -3.132812 7.375 -3.59375 C 7.375 -4.007812 7.289062 -4.359375 7.125 -4.640625 C 6.957031 -4.921875 6.738281 -5.144531 6.46875 -5.3125 C 6.195312 -5.476562 5.878906 -5.597656 5.515625 -5.671875 C 5.160156 -5.742188 4.789062 -5.78125 4.40625 -5.78125 Z M 2.71875 -7.140625 L 4.015625 -7.140625 C 4.347656 -7.140625 4.679688 -7.171875 5.015625 -7.234375 C 5.347656 -7.304688 5.644531 -7.414062 5.90625 -7.5625 C 6.175781 -7.707031 6.390625 -7.90625 6.546875 -8.15625 C 6.710938 -8.414062 6.796875 -8.738281 6.796875 -9.125 C 6.796875 -9.476562 6.722656 -9.78125 6.578125 -10.03125 C 6.429688 -10.289062 6.238281 -10.5 6 -10.65625 C 5.757812 -10.820312 5.484375 -10.9375 5.171875 -11 C 4.859375 -11.0625 4.53125 -11.09375 4.1875 -11.09375 C 3.832031 -11.09375 3.523438 -11.085938 3.265625 -11.078125 C 3.003906 -11.078125 2.820312 -11.066406 2.71875 -11.046875 Z M 2.71875 -7.140625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-10">
+<path style="stroke:none;" d="M 9.203125 -6.203125 C 9.203125 -5.054688 9.054688 -4.082031 8.765625 -3.28125 C 8.484375 -2.476562 8.09375 -1.828125 7.59375 -1.328125 C 7.09375 -0.828125 6.5 -0.460938 5.8125 -0.234375 C 5.125 -0.015625 4.378906 0.09375 3.578125 0.09375 C 2.753906 0.09375 1.921875 -0.00390625 1.078125 -0.203125 L 1.078125 -12.203125 C 1.921875 -12.398438 2.753906 -12.5 3.578125 -12.5 C 4.378906 -12.5 5.125 -12.382812 5.8125 -12.15625 C 6.5 -11.925781 7.09375 -11.554688 7.59375 -11.046875 C 8.09375 -10.546875 8.484375 -9.894531 8.765625 -9.09375 C 9.054688 -8.300781 9.203125 -7.335938 9.203125 -6.203125 Z M 2.71875 -1.375 C 3.050781 -1.332031 3.390625 -1.3125 3.734375 -1.3125 C 4.335938 -1.3125 4.875 -1.398438 5.34375 -1.578125 C 5.8125 -1.765625 6.203125 -2.054688 6.515625 -2.453125 C 6.835938 -2.847656 7.082031 -3.351562 7.25 -3.96875 C 7.425781 -4.59375 7.515625 -5.335938 7.515625 -6.203125 C 7.515625 -7.878906 7.191406 -9.109375 6.546875 -9.890625 C 5.898438 -10.679688 4.945312 -11.078125 3.6875 -11.078125 C 3.507812 -11.078125 3.335938 -11.070312 3.171875 -11.0625 C 3.003906 -11.0625 2.851562 -11.046875 2.71875 -11.015625 Z M 2.71875 -1.375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-11">
+<path style="stroke:none;" d="M 7.453125 -6.09375 L 9.09375 -6.09375 L 9.09375 -0.296875 C 8.84375 -0.203125 8.4375 -0.0859375 7.875 0.046875 C 7.320312 0.191406 6.664062 0.265625 5.90625 0.265625 C 5.15625 0.265625 4.472656 0.125 3.859375 -0.15625 C 3.242188 -0.445312 2.71875 -0.863281 2.28125 -1.40625 C 1.851562 -1.957031 1.519531 -2.632812 1.28125 -3.4375 C 1.039062 -4.25 0.921875 -5.171875 0.921875 -6.203125 C 0.921875 -7.242188 1.050781 -8.160156 1.3125 -8.953125 C 1.582031 -9.753906 1.945312 -10.425781 2.40625 -10.96875 C 2.863281 -11.519531 3.398438 -11.9375 4.015625 -12.21875 C 4.628906 -12.507812 5.289062 -12.65625 6 -12.65625 C 6.457031 -12.65625 6.859375 -12.617188 7.203125 -12.546875 C 7.546875 -12.484375 7.835938 -12.40625 8.078125 -12.3125 C 8.328125 -12.226562 8.53125 -12.132812 8.6875 -12.03125 C 8.851562 -11.925781 8.976562 -11.847656 9.0625 -11.796875 L 8.515625 -10.421875 C 8.210938 -10.660156 7.847656 -10.851562 7.421875 -11 C 7.003906 -11.15625 6.5625 -11.234375 6.09375 -11.234375 C 5.59375 -11.234375 5.125 -11.113281 4.6875 -10.875 C 4.257812 -10.632812 3.890625 -10.296875 3.578125 -9.859375 C 3.273438 -9.421875 3.035156 -8.890625 2.859375 -8.265625 C 2.679688 -7.648438 2.59375 -6.960938 2.59375 -6.203125 C 2.59375 -5.453125 2.671875 -4.769531 2.828125 -4.15625 C 2.984375 -3.539062 3.207031 -3.015625 3.5 -2.578125 C 3.789062 -2.140625 4.148438 -1.796875 4.578125 -1.546875 C 5.015625 -1.304688 5.515625 -1.1875 6.078125 -1.1875 C 6.460938 -1.1875 6.757812 -1.210938 6.96875 -1.265625 C 7.1875 -1.316406 7.347656 -1.367188 7.453125 -1.421875 Z M 7.453125 -6.09375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-12">
+<path style="stroke:none;" d="M 9.203125 -0.515625 C 8.734375 -0.253906 8.234375 -0.0625 7.703125 0.0625 C 7.179688 0.195312 6.617188 0.265625 6.015625 0.265625 C 5.285156 0.265625 4.609375 0.132812 3.984375 -0.125 C 3.367188 -0.382812 2.832031 -0.773438 2.375 -1.296875 C 1.925781 -1.828125 1.570312 -2.5 1.3125 -3.3125 C 1.050781 -4.132812 0.921875 -5.097656 0.921875 -6.203125 C 0.921875 -7.253906 1.054688 -8.179688 1.328125 -8.984375 C 1.597656 -9.785156 1.96875 -10.457031 2.4375 -11 C 2.90625 -11.539062 3.453125 -11.953125 4.078125 -12.234375 C 4.703125 -12.515625 5.367188 -12.65625 6.078125 -12.65625 C 6.566406 -12.65625 7.066406 -12.585938 7.578125 -12.453125 C 8.097656 -12.328125 8.601562 -12.109375 9.09375 -11.796875 L 8.625 -10.4375 C 7.738281 -10.945312 6.910156 -11.203125 6.140625 -11.203125 C 5.585938 -11.203125 5.09375 -11.082031 4.65625 -10.84375 C 4.226562 -10.613281 3.859375 -10.28125 3.546875 -9.84375 C 3.242188 -9.40625 3.007812 -8.878906 2.84375 -8.265625 C 2.675781 -7.648438 2.59375 -6.960938 2.59375 -6.203125 C 2.59375 -5.347656 2.679688 -4.609375 2.859375 -3.984375 C 3.046875 -3.359375 3.296875 -2.835938 3.609375 -2.421875 C 3.929688 -2.003906 4.316406 -1.695312 4.765625 -1.5 C 5.210938 -1.300781 5.695312 -1.203125 6.21875 -1.203125 C 6.601562 -1.203125 7.007812 -1.25 7.4375 -1.34375 C 7.863281 -1.445312 8.304688 -1.625 8.765625 -1.875 Z M 9.203125 -0.515625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-0">
+<path style="stroke:none;" d="M 0.59375 0 L 0.59375 -9 L 5.40625 -9 L 5.40625 0 Z M 4.796875 -0.59375 L 4.796875 -8.40625 L 1.203125 -8.40625 L 1.203125 -0.59375 Z M 4.796875 -0.59375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-1">
+<path style="stroke:none;" d="M 2.515625 0 L 2.515625 -2.765625 C 2.023438 -3.554688 1.582031 -4.332031 1.1875 -5.09375 C 0.789062 -5.851562 0.445312 -6.628906 0.15625 -7.421875 L 1.265625 -7.421875 C 1.492188 -6.753906 1.757812 -6.113281 2.0625 -5.5 C 2.363281 -4.882812 2.6875 -4.253906 3.03125 -3.609375 C 3.394531 -4.285156 3.71875 -4.929688 4 -5.546875 C 4.28125 -6.160156 4.539062 -6.785156 4.78125 -7.421875 L 5.859375 -7.421875 C 5.554688 -6.640625 5.207031 -5.875 4.8125 -5.125 C 4.414062 -4.382812 3.976562 -3.601562 3.5 -2.78125 L 3.5 0 Z M 2.515625 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-2">
+<path style="stroke:none;" d="M 3 0.15625 C 2.5625 0.15625 2.1875 0.09375 1.875 -0.03125 C 1.570312 -0.164062 1.320312 -0.347656 1.125 -0.578125 C 0.9375 -0.804688 0.796875 -1.085938 0.703125 -1.421875 C 0.617188 -1.765625 0.578125 -2.144531 0.578125 -2.5625 L 0.578125 -7.421875 L 1.5625 -7.421875 L 1.5625 -2.65625 C 1.5625 -2.28125 1.59375 -1.96875 1.65625 -1.71875 C 1.726562 -1.46875 1.828125 -1.265625 1.953125 -1.109375 C 2.078125 -0.960938 2.222656 -0.859375 2.390625 -0.796875 C 2.566406 -0.734375 2.769531 -0.703125 3 -0.703125 C 3.226562 -0.703125 3.425781 -0.734375 3.59375 -0.796875 C 3.769531 -0.859375 3.921875 -0.960938 4.046875 -1.109375 C 4.171875 -1.265625 4.265625 -1.46875 4.328125 -1.71875 C 4.398438 -1.96875 4.4375 -2.28125 4.4375 -2.65625 L 4.4375 -7.421875 L 5.421875 -7.421875 L 5.421875 -2.5625 C 5.421875 -2.144531 5.375 -1.765625 5.28125 -1.421875 C 5.195312 -1.085938 5.054688 -0.804688 4.859375 -0.578125 C 4.671875 -0.347656 4.421875 -0.164062 4.109375 -0.03125 C 3.804688 0.09375 3.4375 0.15625 3 0.15625 Z M 3 0.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-3">
+<path style="stroke:none;" d="M 1.21875 -7.421875 C 1.320312 -6.921875 1.445312 -6.375 1.59375 -5.78125 C 1.738281 -5.1875 1.890625 -4.585938 2.046875 -3.984375 C 2.210938 -3.390625 2.378906 -2.820312 2.546875 -2.28125 C 2.722656 -1.738281 2.882812 -1.265625 3.03125 -0.859375 C 3.15625 -1.265625 3.300781 -1.742188 3.46875 -2.296875 C 3.644531 -2.847656 3.816406 -3.421875 3.984375 -4.015625 C 4.148438 -4.609375 4.304688 -5.203125 4.453125 -5.796875 C 4.609375 -6.390625 4.734375 -6.929688 4.828125 -7.421875 L 5.859375 -7.421875 C 5.796875 -7.109375 5.691406 -6.679688 5.546875 -6.140625 C 5.398438 -5.597656 5.226562 -4.992188 5.03125 -4.328125 C 4.832031 -3.660156 4.609375 -2.953125 4.359375 -2.203125 C 4.117188 -1.453125 3.863281 -0.71875 3.59375 0 L 2.375 0 C 2.125 -0.71875 1.878906 -1.445312 1.640625 -2.1875 C 1.410156 -2.9375 1.195312 -3.644531 1 -4.3125 C 0.800781 -4.976562 0.628906 -5.582031 0.484375 -6.125 C 0.335938 -6.675781 0.226562 -7.109375 0.15625 -7.421875 Z M 1.21875 -7.421875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-4">
+<path style="stroke:none;" d=""/>
+</symbol>
+<symbol overflow="visible" id="glyph1-5">
+<path style="stroke:none;" d="M 5.515625 -3.71875 C 5.515625 -3.03125 5.425781 -2.445312 5.25 -1.96875 C 5.082031 -1.488281 4.847656 -1.097656 4.546875 -0.796875 C 4.253906 -0.492188 3.898438 -0.273438 3.484375 -0.140625 C 3.078125 -0.00390625 2.628906 0.0625 2.140625 0.0625 C 1.648438 0.0625 1.148438 0 0.640625 -0.125 L 0.640625 -7.3125 C 1.148438 -7.4375 1.648438 -7.5 2.140625 -7.5 C 2.628906 -7.5 3.078125 -7.429688 3.484375 -7.296875 C 3.898438 -7.160156 4.253906 -6.941406 4.546875 -6.640625 C 4.847656 -6.335938 5.082031 -5.941406 5.25 -5.453125 C 5.425781 -4.972656 5.515625 -4.394531 5.515625 -3.71875 Z M 1.625 -0.828125 C 1.832031 -0.804688 2.039062 -0.796875 2.25 -0.796875 C 2.601562 -0.796875 2.921875 -0.847656 3.203125 -0.953125 C 3.484375 -1.054688 3.71875 -1.226562 3.90625 -1.46875 C 4.101562 -1.707031 4.253906 -2.007812 4.359375 -2.375 C 4.460938 -2.75 4.515625 -3.195312 4.515625 -3.71875 C 4.515625 -4.726562 4.316406 -5.46875 3.921875 -5.9375 C 3.535156 -6.40625 2.960938 -6.640625 2.203125 -6.640625 C 2.097656 -6.640625 1.992188 -6.640625 1.890625 -6.640625 C 1.796875 -6.640625 1.707031 -6.628906 1.625 -6.609375 Z M 1.625 -0.828125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-6">
+<path style="stroke:none;" d="M 5.515625 -2.78125 C 5.515625 -2.34375 5.453125 -1.945312 5.328125 -1.59375 C 5.203125 -1.238281 5.023438 -0.929688 4.796875 -0.671875 C 4.578125 -0.410156 4.3125 -0.210938 4 -0.078125 C 3.695312 0.0546875 3.359375 0.125 2.984375 0.125 C 2.628906 0.125 2.296875 0.0546875 1.984375 -0.078125 C 1.679688 -0.210938 1.414062 -0.410156 1.1875 -0.671875 C 0.96875 -0.929688 0.796875 -1.238281 0.671875 -1.59375 C 0.546875 -1.945312 0.484375 -2.34375 0.484375 -2.78125 C 0.484375 -3.21875 0.546875 -3.617188 0.671875 -3.984375 C 0.796875 -4.347656 0.96875 -4.65625 1.1875 -4.90625 C 1.414062 -5.15625 1.679688 -5.347656 1.984375 -5.484375 C 2.296875 -5.628906 2.628906 -5.703125 2.984375 -5.703125 C 3.359375 -5.703125 3.695312 -5.628906 4 -5.484375 C 4.3125 -5.347656 4.578125 -5.15625 4.796875 -4.90625 C 5.023438 -4.65625 5.203125 -4.347656 5.328125 -3.984375 C 5.453125 -3.617188 5.515625 -3.21875 5.515625 -2.78125 Z M 4.5 -2.78125 C 4.5 -3.414062 4.363281 -3.914062 4.09375 -4.28125 C 3.820312 -4.644531 3.453125 -4.828125 2.984375 -4.828125 C 2.523438 -4.828125 2.160156 -4.644531 1.890625 -4.28125 C 1.628906 -3.914062 1.5 -3.414062 1.5 -2.78125 C 1.5 -2.15625 1.628906 -1.660156 1.890625 -1.296875 C 2.160156 -0.929688 2.523438 -0.75 2.984375 -0.75 C 3.453125 -0.75 3.820312 -0.929688 4.09375 -1.296875 C 4.363281 -1.660156 4.5 -2.15625 4.5 -2.78125 Z M 4.5 -2.78125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-7">
+<path style="stroke:none;" d="M 4.109375 0 C 3.992188 -0.269531 3.890625 -0.515625 3.796875 -0.734375 C 3.710938 -0.960938 3.628906 -1.1875 3.546875 -1.40625 C 3.460938 -1.632812 3.378906 -1.867188 3.296875 -2.109375 C 3.210938 -2.359375 3.113281 -2.640625 3 -2.953125 C 2.882812 -2.640625 2.78125 -2.359375 2.6875 -2.109375 C 2.601562 -1.867188 2.519531 -1.632812 2.4375 -1.40625 C 2.351562 -1.1875 2.265625 -0.960938 2.171875 -0.734375 C 2.085938 -0.515625 1.984375 -0.269531 1.859375 0 L 1.109375 0 C 0.890625 -0.976562 0.707031 -1.953125 0.5625 -2.921875 C 0.414062 -3.890625 0.304688 -4.769531 0.234375 -5.5625 L 1.15625 -5.5625 C 1.1875 -5.25 1.210938 -4.941406 1.234375 -4.640625 C 1.265625 -4.347656 1.300781 -4.035156 1.34375 -3.703125 C 1.382812 -3.378906 1.429688 -3.023438 1.484375 -2.640625 C 1.535156 -2.253906 1.59375 -1.820312 1.65625 -1.34375 C 1.78125 -1.664062 1.882812 -1.945312 1.96875 -2.1875 C 2.0625 -2.425781 2.144531 -2.648438 2.21875 -2.859375 C 2.289062 -3.078125 2.359375 -3.296875 2.421875 -3.515625 C 2.492188 -3.742188 2.570312 -4 2.65625 -4.28125 L 3.390625 -4.28125 C 3.472656 -4 3.546875 -3.742188 3.609375 -3.515625 C 3.671875 -3.296875 3.738281 -3.078125 3.8125 -2.859375 C 3.882812 -2.648438 3.957031 -2.425781 4.03125 -2.1875 C 4.113281 -1.945312 4.21875 -1.671875 4.34375 -1.359375 C 4.414062 -1.796875 4.476562 -2.203125 4.53125 -2.578125 C 4.59375 -2.953125 4.640625 -3.304688 4.671875 -3.640625 C 4.710938 -3.972656 4.75 -4.296875 4.78125 -4.609375 C 4.820312 -4.921875 4.851562 -5.238281 4.875 -5.5625 L 5.765625 -5.5625 C 5.734375 -5.164062 5.6875 -4.738281 5.625 -4.28125 C 5.570312 -3.820312 5.503906 -3.351562 5.421875 -2.875 C 5.335938 -2.394531 5.25 -1.910156 5.15625 -1.421875 C 5.0625 -0.929688 4.960938 -0.457031 4.859375 0 Z M 4.109375 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-8">
+<path style="stroke:none;" d="M 0.859375 -5.40625 C 1.210938 -5.5 1.566406 -5.566406 1.921875 -5.609375 C 2.273438 -5.660156 2.609375 -5.6875 2.921875 -5.6875 C 3.671875 -5.6875 4.234375 -5.492188 4.609375 -5.109375 C 4.992188 -4.722656 5.1875 -4.109375 5.1875 -3.265625 L 5.1875 0 L 4.203125 0 L 4.203125 -3.078125 C 4.203125 -3.441406 4.171875 -3.734375 4.109375 -3.953125 C 4.046875 -4.179688 3.953125 -4.359375 3.828125 -4.484375 C 3.710938 -4.609375 3.570312 -4.691406 3.40625 -4.734375 C 3.25 -4.785156 3.070312 -4.8125 2.875 -4.8125 C 2.71875 -4.8125 2.546875 -4.800781 2.359375 -4.78125 C 2.179688 -4.757812 2.007812 -4.734375 1.84375 -4.703125 L 1.84375 0 L 0.859375 0 Z M 0.859375 -5.40625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-9">
+<path style="stroke:none;" d="M 4.21875 -1.390625 C 4.21875 -1.585938 4.132812 -1.75 3.96875 -1.875 C 3.800781 -2.007812 3.59375 -2.125 3.34375 -2.21875 C 3.101562 -2.3125 2.835938 -2.40625 2.546875 -2.5 C 2.265625 -2.59375 2 -2.707031 1.75 -2.84375 C 1.507812 -2.976562 1.304688 -3.144531 1.140625 -3.34375 C 0.984375 -3.539062 0.90625 -3.800781 0.90625 -4.125 C 0.90625 -4.570312 1.082031 -4.945312 1.4375 -5.25 C 1.800781 -5.550781 2.375 -5.703125 3.15625 -5.703125 C 3.457031 -5.703125 3.769531 -5.675781 4.09375 -5.625 C 4.414062 -5.582031 4.695312 -5.523438 4.9375 -5.453125 L 4.75 -4.578125 C 4.6875 -4.609375 4.597656 -4.640625 4.484375 -4.671875 C 4.367188 -4.710938 4.238281 -4.742188 4.09375 -4.765625 C 3.957031 -4.796875 3.804688 -4.816406 3.640625 -4.828125 C 3.472656 -4.847656 3.316406 -4.859375 3.171875 -4.859375 C 2.304688 -4.859375 1.875 -4.625 1.875 -4.15625 C 1.875 -3.988281 1.953125 -3.84375 2.109375 -3.71875 C 2.273438 -3.601562 2.484375 -3.5 2.734375 -3.40625 C 2.984375 -3.3125 3.25 -3.210938 3.53125 -3.109375 C 3.820312 -3.015625 4.09375 -2.894531 4.34375 -2.75 C 4.59375 -2.601562 4.796875 -2.425781 4.953125 -2.21875 C 5.117188 -2.019531 5.203125 -1.765625 5.203125 -1.453125 C 5.203125 -0.953125 5.003906 -0.5625 4.609375 -0.28125 C 4.222656 -0.0078125 3.609375 0.125 2.765625 0.125 C 2.378906 0.125 2.023438 0.09375 1.703125 0.03125 C 1.378906 -0.03125 1.078125 -0.125 0.796875 -0.25 L 0.984375 -1.15625 C 1.265625 -1.019531 1.554688 -0.910156 1.859375 -0.828125 C 2.171875 -0.742188 2.503906 -0.703125 2.859375 -0.703125 C 3.765625 -0.703125 4.21875 -0.929688 4.21875 -1.390625 Z M 4.21875 -1.390625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-10">
+<path style="stroke:none;" d="M 0.59375 -2.765625 C 0.59375 -3.273438 0.671875 -3.710938 0.828125 -4.078125 C 0.984375 -4.441406 1.203125 -4.742188 1.484375 -4.984375 C 1.765625 -5.234375 2.09375 -5.414062 2.46875 -5.53125 C 2.84375 -5.644531 3.238281 -5.703125 3.65625 -5.703125 C 3.925781 -5.703125 4.195312 -5.679688 4.46875 -5.640625 C 4.738281 -5.609375 5.023438 -5.546875 5.328125 -5.453125 L 5.09375 -4.59375 C 4.832031 -4.6875 4.59375 -4.75 4.375 -4.78125 C 4.15625 -4.8125 3.929688 -4.828125 3.703125 -4.828125 C 3.421875 -4.828125 3.148438 -4.785156 2.890625 -4.703125 C 2.640625 -4.628906 2.414062 -4.507812 2.21875 -4.34375 C 2.03125 -4.1875 1.878906 -3.976562 1.765625 -3.71875 C 1.660156 -3.457031 1.609375 -3.140625 1.609375 -2.765625 C 1.609375 -2.421875 1.660156 -2.117188 1.765625 -1.859375 C 1.867188 -1.609375 2.015625 -1.398438 2.203125 -1.234375 C 2.390625 -1.078125 2.613281 -0.957031 2.875 -0.875 C 3.144531 -0.789062 3.4375 -0.75 3.75 -0.75 C 4.007812 -0.75 4.253906 -0.765625 4.484375 -0.796875 C 4.722656 -0.828125 4.984375 -0.890625 5.265625 -0.984375 L 5.40625 -0.15625 C 5.125 -0.0507812 4.835938 0.0195312 4.546875 0.0625 C 4.265625 0.101562 3.957031 0.125 3.625 0.125 C 3.175781 0.125 2.765625 0.0664062 2.390625 -0.046875 C 2.023438 -0.171875 1.707031 -0.351562 1.4375 -0.59375 C 1.164062 -0.832031 0.957031 -1.132812 0.8125 -1.5 C 0.664062 -1.863281 0.59375 -2.285156 0.59375 -2.765625 Z M 0.59375 -2.765625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-11">
+<path style="stroke:none;" d="M 3.0625 -0.703125 C 3.3125 -0.703125 3.53125 -0.707031 3.71875 -0.71875 C 3.914062 -0.738281 4.082031 -0.765625 4.21875 -0.796875 L 4.21875 -2.453125 C 4.082031 -2.492188 3.925781 -2.523438 3.75 -2.546875 C 3.570312 -2.566406 3.382812 -2.578125 3.1875 -2.578125 C 3 -2.578125 2.816406 -2.5625 2.640625 -2.53125 C 2.460938 -2.507812 2.304688 -2.460938 2.171875 -2.390625 C 2.035156 -2.316406 1.921875 -2.222656 1.828125 -2.109375 C 1.742188 -1.992188 1.703125 -1.847656 1.703125 -1.671875 C 1.703125 -1.304688 1.820312 -1.050781 2.0625 -0.90625 C 2.3125 -0.769531 2.644531 -0.703125 3.0625 -0.703125 Z M 2.96875 -5.703125 C 3.382812 -5.703125 3.734375 -5.648438 4.015625 -5.546875 C 4.296875 -5.441406 4.523438 -5.296875 4.703125 -5.109375 C 4.878906 -4.929688 5.003906 -4.707031 5.078125 -4.4375 C 5.148438 -4.175781 5.1875 -3.890625 5.1875 -3.578125 L 5.1875 -0.09375 C 4.957031 -0.0507812 4.648438 -0.00390625 4.265625 0.046875 C 3.890625 0.0976562 3.5 0.125 3.09375 0.125 C 2.789062 0.125 2.492188 0.0976562 2.203125 0.046875 C 1.921875 -0.00390625 1.664062 -0.09375 1.4375 -0.21875 C 1.21875 -0.351562 1.039062 -0.535156 0.90625 -0.765625 C 0.769531 -0.992188 0.703125 -1.289062 0.703125 -1.65625 C 0.703125 -1.976562 0.769531 -2.25 0.90625 -2.46875 C 1.039062 -2.6875 1.21875 -2.863281 1.4375 -3 C 1.664062 -3.132812 1.921875 -3.234375 2.203125 -3.296875 C 2.484375 -3.359375 2.769531 -3.390625 3.0625 -3.390625 C 3.445312 -3.390625 3.832031 -3.34375 4.21875 -3.25 L 4.21875 -3.53125 C 4.21875 -3.695312 4.195312 -3.859375 4.15625 -4.015625 C 4.125 -4.171875 4.054688 -4.3125 3.953125 -4.4375 C 3.847656 -4.5625 3.707031 -4.660156 3.53125 -4.734375 C 3.363281 -4.816406 3.144531 -4.859375 2.875 -4.859375 C 2.53125 -4.859375 2.226562 -4.832031 1.96875 -4.78125 C 1.71875 -4.738281 1.523438 -4.691406 1.390625 -4.640625 L 1.265625 -5.453125 C 1.398438 -5.523438 1.625 -5.582031 1.9375 -5.625 C 2.257812 -5.675781 2.601562 -5.703125 2.96875 -5.703125 Z M 2.96875 -5.703125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-12">
+<path style="stroke:none;" d="M 4.0625 0.125 C 3.707031 0.125 3.410156 0.078125 3.171875 -0.015625 C 2.941406 -0.109375 2.757812 -0.25 2.625 -0.4375 C 2.488281 -0.632812 2.390625 -0.875 2.328125 -1.15625 C 2.273438 -1.4375 2.25 -1.765625 2.25 -2.140625 L 2.25 -7.421875 L 0.640625 -7.421875 L 0.640625 -8.25 L 3.234375 -8.25 L 3.234375 -2.140625 C 3.234375 -1.867188 3.25 -1.644531 3.28125 -1.46875 C 3.320312 -1.289062 3.378906 -1.144531 3.453125 -1.03125 C 3.535156 -0.925781 3.628906 -0.851562 3.734375 -0.8125 C 3.847656 -0.769531 3.984375 -0.75 4.140625 -0.75 C 4.367188 -0.75 4.582031 -0.773438 4.78125 -0.828125 C 4.988281 -0.890625 5.144531 -0.953125 5.25 -1.015625 L 5.40625 -0.1875 C 5.351562 -0.15625 5.28125 -0.117188 5.1875 -0.078125 C 5.101562 -0.046875 5 -0.015625 4.875 0.015625 C 4.757812 0.046875 4.628906 0.0703125 4.484375 0.09375 C 4.347656 0.113281 4.207031 0.125 4.0625 0.125 Z M 4.0625 0.125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-13">
+<path style="stroke:none;" d="M 2.515625 -6.4375 C 2.304688 -6.4375 2.125 -6.503906 1.96875 -6.640625 C 1.8125 -6.785156 1.734375 -6.984375 1.734375 -7.234375 C 1.734375 -7.484375 1.8125 -7.679688 1.96875 -7.828125 C 2.125 -7.972656 2.304688 -8.046875 2.515625 -8.046875 C 2.722656 -8.046875 2.898438 -7.972656 3.046875 -7.828125 C 3.203125 -7.679688 3.28125 -7.484375 3.28125 -7.234375 C 3.28125 -6.984375 3.203125 -6.785156 3.046875 -6.640625 C 2.898438 -6.503906 2.722656 -6.4375 2.515625 -6.4375 Z M 2.25 -4.734375 L 0.640625 -4.734375 L 0.640625 -5.5625 L 3.234375 -5.5625 L 3.234375 -2.140625 C 3.234375 -1.585938 3.3125 -1.21875 3.46875 -1.03125 C 3.625 -0.84375 3.851562 -0.75 4.15625 -0.75 C 4.382812 -0.75 4.597656 -0.773438 4.796875 -0.828125 C 4.992188 -0.890625 5.144531 -0.953125 5.25 -1.015625 L 5.40625 -0.1875 C 5.351562 -0.15625 5.28125 -0.117188 5.1875 -0.078125 C 5.101562 -0.046875 5.003906 -0.015625 4.890625 0.015625 C 4.773438 0.046875 4.644531 0.0703125 4.5 0.09375 C 4.363281 0.113281 4.21875 0.125 4.0625 0.125 C 3.71875 0.125 3.425781 0.078125 3.1875 -0.015625 C 2.957031 -0.109375 2.769531 -0.25 2.625 -0.4375 C 2.488281 -0.632812 2.390625 -0.875 2.328125 -1.15625 C 2.273438 -1.4375 2.25 -1.765625 2.25 -2.140625 Z M 2.25 -4.734375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-14">
+<path style="stroke:none;" d="M 4.15625 -0.515625 C 4.039062 -0.453125 3.863281 -0.382812 3.625 -0.3125 C 3.394531 -0.238281 3.128906 -0.203125 2.828125 -0.203125 C 2.503906 -0.203125 2.195312 -0.253906 1.90625 -0.359375 C 1.625 -0.472656 1.378906 -0.640625 1.171875 -0.859375 C 0.960938 -1.078125 0.796875 -1.351562 0.671875 -1.6875 C 0.546875 -2.03125 0.484375 -2.4375 0.484375 -2.90625 C 0.484375 -3.3125 0.539062 -3.679688 0.65625 -4.015625 C 0.769531 -4.359375 0.9375 -4.65625 1.15625 -4.90625 C 1.375 -5.15625 1.644531 -5.347656 1.96875 -5.484375 C 2.289062 -5.628906 2.65625 -5.703125 3.0625 -5.703125 C 3.539062 -5.703125 3.945312 -5.664062 4.28125 -5.59375 C 4.625 -5.53125 4.910156 -5.46875 5.140625 -5.40625 L 5.140625 -0.4375 C 5.140625 0.425781 4.921875 1.050781 4.484375 1.4375 C 4.054688 1.820312 3.398438 2.015625 2.515625 2.015625 C 2.160156 2.015625 1.835938 1.984375 1.546875 1.921875 C 1.253906 1.867188 0.992188 1.804688 0.765625 1.734375 L 0.953125 0.859375 C 1.160156 0.941406 1.394531 1.007812 1.65625 1.0625 C 1.925781 1.125 2.222656 1.15625 2.546875 1.15625 C 3.117188 1.15625 3.53125 1.035156 3.78125 0.796875 C 4.03125 0.566406 4.15625 0.191406 4.15625 -0.328125 Z M 4.15625 -4.6875 C 4.0625 -4.71875 3.925781 -4.75 3.75 -4.78125 C 3.582031 -4.8125 3.359375 -4.828125 3.078125 -4.828125 C 2.554688 -4.828125 2.160156 -4.648438 1.890625 -4.296875 C 1.628906 -3.941406 1.5 -3.472656 1.5 -2.890625 C 1.5 -2.566406 1.535156 -2.289062 1.609375 -2.0625 C 1.691406 -1.84375 1.796875 -1.65625 1.921875 -1.5 C 2.054688 -1.351562 2.207031 -1.242188 2.375 -1.171875 C 2.539062 -1.109375 2.722656 -1.078125 2.921875 -1.078125 C 3.160156 -1.078125 3.390625 -1.113281 3.609375 -1.1875 C 3.835938 -1.257812 4.019531 -1.34375 4.15625 -1.4375 Z M 4.15625 -4.6875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-15">
+<path style="stroke:none;" d="M 2.8125 -0.703125 C 3.3125 -0.703125 3.691406 -0.796875 3.953125 -0.984375 C 4.222656 -1.171875 4.359375 -1.457031 4.359375 -1.84375 C 4.359375 -2.082031 4.304688 -2.28125 4.203125 -2.4375 C 4.109375 -2.601562 3.984375 -2.75 3.828125 -2.875 C 3.671875 -3 3.488281 -3.109375 3.28125 -3.203125 C 3.082031 -3.296875 2.878906 -3.378906 2.671875 -3.453125 C 2.429688 -3.546875 2.203125 -3.648438 1.984375 -3.765625 C 1.765625 -3.890625 1.566406 -4.03125 1.390625 -4.1875 C 1.222656 -4.351562 1.085938 -4.546875 0.984375 -4.765625 C 0.890625 -4.984375 0.84375 -5.238281 0.84375 -5.53125 C 0.84375 -6.175781 1.046875 -6.679688 1.453125 -7.046875 C 1.859375 -7.410156 2.425781 -7.59375 3.15625 -7.59375 C 3.351562 -7.59375 3.550781 -7.578125 3.75 -7.546875 C 3.945312 -7.523438 4.128906 -7.492188 4.296875 -7.453125 C 4.460938 -7.410156 4.609375 -7.359375 4.734375 -7.296875 C 4.867188 -7.242188 4.976562 -7.191406 5.0625 -7.140625 L 4.75 -6.3125 C 4.59375 -6.40625 4.375 -6.5 4.09375 -6.59375 C 3.8125 -6.695312 3.5 -6.75 3.15625 -6.75 C 2.789062 -6.75 2.476562 -6.65625 2.21875 -6.46875 C 1.957031 -6.289062 1.828125 -6.019531 1.828125 -5.65625 C 1.828125 -5.457031 1.863281 -5.285156 1.9375 -5.140625 C 2.007812 -4.992188 2.113281 -4.863281 2.25 -4.75 C 2.382812 -4.644531 2.535156 -4.546875 2.703125 -4.453125 C 2.878906 -4.367188 3.070312 -4.285156 3.28125 -4.203125 C 3.59375 -4.078125 3.875 -3.945312 4.125 -3.8125 C 4.375 -3.6875 4.585938 -3.535156 4.765625 -3.359375 C 4.953125 -3.179688 5.09375 -2.972656 5.1875 -2.734375 C 5.289062 -2.492188 5.34375 -2.203125 5.34375 -1.859375 C 5.34375 -1.210938 5.125 -0.710938 4.6875 -0.359375 C 4.25 -0.015625 3.625 0.15625 2.8125 0.15625 C 2.539062 0.15625 2.289062 0.132812 2.0625 0.09375 C 1.832031 0.0625 1.625 0.0195312 1.4375 -0.03125 C 1.257812 -0.09375 1.101562 -0.148438 0.96875 -0.203125 C 0.832031 -0.253906 0.726562 -0.304688 0.65625 -0.359375 L 0.953125 -1.171875 C 1.117188 -1.085938 1.359375 -0.988281 1.671875 -0.875 C 1.984375 -0.757812 2.363281 -0.703125 2.8125 -0.703125 Z M 2.8125 -0.703125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-16">
+<path style="stroke:none;" d="M 3.109375 -5.703125 C 3.859375 -5.703125 4.4375 -5.46875 4.84375 -5 C 5.25 -4.53125 5.453125 -3.820312 5.453125 -2.875 L 5.453125 -2.515625 L 1.46875 -2.515625 C 1.507812 -1.941406 1.703125 -1.503906 2.046875 -1.203125 C 2.390625 -0.898438 2.867188 -0.75 3.484375 -0.75 C 3.835938 -0.75 4.132812 -0.773438 4.375 -0.828125 C 4.625 -0.890625 4.8125 -0.953125 4.9375 -1.015625 L 5.078125 -0.1875 C 4.953125 -0.113281 4.734375 -0.046875 4.421875 0.015625 C 4.109375 0.0859375 3.757812 0.125 3.375 0.125 C 2.894531 0.125 2.472656 0.0507812 2.109375 -0.09375 C 1.742188 -0.238281 1.441406 -0.4375 1.203125 -0.6875 C 0.960938 -0.945312 0.78125 -1.253906 0.65625 -1.609375 C 0.539062 -1.960938 0.484375 -2.347656 0.484375 -2.765625 C 0.484375 -3.265625 0.554688 -3.695312 0.703125 -4.0625 C 0.859375 -4.4375 1.0625 -4.742188 1.3125 -4.984375 C 1.5625 -5.222656 1.835938 -5.398438 2.140625 -5.515625 C 2.453125 -5.640625 2.773438 -5.703125 3.109375 -5.703125 Z M 4.453125 -3.328125 C 4.453125 -3.796875 4.328125 -4.164062 4.078125 -4.4375 C 3.828125 -4.71875 3.5 -4.859375 3.09375 -4.859375 C 2.863281 -4.859375 2.65625 -4.8125 2.46875 -4.71875 C 2.28125 -4.632812 2.117188 -4.519531 1.984375 -4.375 C 1.847656 -4.226562 1.738281 -4.0625 1.65625 -3.875 C 1.570312 -3.695312 1.519531 -3.515625 1.5 -3.328125 Z M 4.453125 -3.328125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-17">
+<path style="stroke:none;" d="M 4.15625 -4.390625 C 4.039062 -4.492188 3.875 -4.59375 3.65625 -4.6875 C 3.445312 -4.78125 3.222656 -4.828125 2.984375 -4.828125 C 2.722656 -4.828125 2.5 -4.773438 2.3125 -4.671875 C 2.125 -4.566406 1.96875 -4.421875 1.84375 -4.234375 C 1.726562 -4.054688 1.640625 -3.84375 1.578125 -3.59375 C 1.523438 -3.34375 1.5 -3.070312 1.5 -2.78125 C 1.5 -2.132812 1.648438 -1.632812 1.953125 -1.28125 C 2.253906 -0.925781 2.648438 -0.75 3.140625 -0.75 C 3.390625 -0.75 3.597656 -0.757812 3.765625 -0.78125 C 3.941406 -0.8125 4.070312 -0.835938 4.15625 -0.859375 Z M 4.15625 -8.140625 L 5.140625 -8.3125 L 5.140625 -0.15625 C 4.929688 -0.09375 4.65625 -0.03125 4.3125 0.03125 C 3.976562 0.09375 3.585938 0.125 3.140625 0.125 C 2.742188 0.125 2.378906 0.0546875 2.046875 -0.078125 C 1.722656 -0.210938 1.441406 -0.40625 1.203125 -0.65625 C 0.972656 -0.90625 0.796875 -1.207031 0.671875 -1.5625 C 0.546875 -1.925781 0.484375 -2.332031 0.484375 -2.78125 C 0.484375 -3.21875 0.535156 -3.613281 0.640625 -3.96875 C 0.742188 -4.320312 0.898438 -4.625 1.109375 -4.875 C 1.316406 -5.132812 1.566406 -5.335938 1.859375 -5.484375 C 2.148438 -5.628906 2.488281 -5.703125 2.875 -5.703125 C 3.164062 -5.703125 3.421875 -5.664062 3.640625 -5.59375 C 3.867188 -5.519531 4.039062 -5.441406 4.15625 -5.359375 Z M 4.15625 -8.140625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-18">
+<path style="stroke:none;" d="M 1.28125 0 L 1.28125 -5.265625 C 2.101562 -5.546875 2.925781 -5.6875 3.75 -5.6875 C 4.007812 -5.6875 4.253906 -5.675781 4.484375 -5.65625 C 4.722656 -5.632812 4.976562 -5.59375 5.25 -5.53125 L 5.078125 -4.65625 C 4.816406 -4.726562 4.585938 -4.773438 4.390625 -4.796875 C 4.203125 -4.816406 3.988281 -4.828125 3.75 -4.828125 C 3.269531 -4.828125 2.773438 -4.757812 2.265625 -4.625 L 2.265625 0 Z M 1.28125 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-19">
+<path style="stroke:none;" d="M 0.609375 1.046875 C 0.679688 1.085938 0.78125 1.117188 0.90625 1.140625 C 1.039062 1.160156 1.164062 1.171875 1.28125 1.171875 C 1.675781 1.171875 1.984375 1.082031 2.203125 0.90625 C 2.421875 0.738281 2.625 0.460938 2.8125 0.078125 C 2.363281 -0.773438 1.941406 -1.6875 1.546875 -2.65625 C 1.148438 -3.625 0.828125 -4.59375 0.578125 -5.5625 L 1.65625 -5.5625 C 1.738281 -5.25 1.832031 -4.90625 1.9375 -4.53125 C 2.039062 -4.15625 2.160156 -3.769531 2.296875 -3.375 C 2.441406 -2.988281 2.585938 -2.597656 2.734375 -2.203125 C 2.890625 -1.804688 3.054688 -1.425781 3.234375 -1.0625 C 3.367188 -1.4375 3.488281 -1.800781 3.59375 -2.15625 C 3.707031 -2.519531 3.8125 -2.882812 3.90625 -3.25 C 4.007812 -3.613281 4.109375 -3.984375 4.203125 -4.359375 C 4.296875 -4.742188 4.394531 -5.144531 4.5 -5.5625 L 5.53125 -5.5625 C 5.269531 -4.53125 4.984375 -3.523438 4.671875 -2.546875 C 4.359375 -1.566406 4.019531 -0.660156 3.65625 0.171875 C 3.519531 0.484375 3.375 0.753906 3.21875 0.984375 C 3.0625 1.222656 2.890625 1.414062 2.703125 1.5625 C 2.523438 1.71875 2.316406 1.832031 2.078125 1.90625 C 1.847656 1.976562 1.585938 2.015625 1.296875 2.015625 C 1.140625 2.015625 0.972656 1.992188 0.796875 1.953125 C 0.617188 1.910156 0.5 1.875 0.4375 1.84375 Z M 0.609375 1.046875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-20">
+<path style="stroke:none;" d="M 0.34375 -3.71875 C 0.34375 -4.382812 0.40625 -4.960938 0.53125 -5.453125 C 0.664062 -5.941406 0.847656 -6.34375 1.078125 -6.65625 C 1.304688 -6.96875 1.582031 -7.203125 1.90625 -7.359375 C 2.238281 -7.515625 2.601562 -7.59375 3 -7.59375 C 3.394531 -7.59375 3.753906 -7.515625 4.078125 -7.359375 C 4.410156 -7.203125 4.691406 -6.96875 4.921875 -6.65625 C 5.148438 -6.34375 5.328125 -5.941406 5.453125 -5.453125 C 5.585938 -4.960938 5.65625 -4.382812 5.65625 -3.71875 C 5.65625 -3.050781 5.585938 -2.472656 5.453125 -1.984375 C 5.328125 -1.503906 5.148438 -1.101562 4.921875 -0.78125 C 4.691406 -0.457031 4.410156 -0.21875 4.078125 -0.0625 C 3.753906 0.0820312 3.394531 0.15625 3 0.15625 C 2.601562 0.15625 2.238281 0.0820312 1.90625 -0.0625 C 1.582031 -0.21875 1.304688 -0.457031 1.078125 -0.78125 C 0.847656 -1.101562 0.664062 -1.503906 0.53125 -1.984375 C 0.40625 -2.472656 0.34375 -3.050781 0.34375 -3.71875 Z M 1.359375 -3.71875 C 1.359375 -2.738281 1.488281 -1.988281 1.75 -1.46875 C 2.007812 -0.957031 2.414062 -0.703125 2.96875 -0.703125 C 3.53125 -0.703125 3.953125 -0.957031 4.234375 -1.46875 C 4.515625 -1.988281 4.65625 -2.738281 4.65625 -3.71875 C 4.65625 -4.695312 4.515625 -5.445312 4.234375 -5.96875 C 3.953125 -6.488281 3.53125 -6.75 2.96875 -6.75 C 2.414062 -6.75 2.007812 -6.488281 1.75 -5.96875 C 1.488281 -5.445312 1.359375 -4.695312 1.359375 -3.71875 Z M 1.359375 -3.71875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-21">
+<path style="stroke:none;" d="M 5.140625 -0.15625 C 4.929688 -0.101562 4.644531 -0.046875 4.28125 0.015625 C 3.925781 0.0859375 3.507812 0.125 3.03125 0.125 C 2.613281 0.125 2.265625 0.0625 1.984375 -0.0625 C 1.703125 -0.1875 1.472656 -0.363281 1.296875 -0.59375 C 1.117188 -0.820312 0.992188 -1.09375 0.921875 -1.40625 C 0.847656 -1.71875 0.8125 -2.0625 0.8125 -2.4375 L 0.8125 -5.5625 L 1.796875 -5.5625 L 1.796875 -2.65625 C 1.796875 -1.96875 1.894531 -1.476562 2.09375 -1.1875 C 2.300781 -0.894531 2.644531 -0.75 3.125 -0.75 C 3.226562 -0.75 3.332031 -0.753906 3.4375 -0.765625 C 3.550781 -0.773438 3.65625 -0.785156 3.75 -0.796875 C 3.851562 -0.804688 3.9375 -0.816406 4 -0.828125 C 4.070312 -0.847656 4.125 -0.859375 4.15625 -0.859375 L 4.15625 -5.5625 L 5.140625 -5.5625 Z M 5.140625 -0.15625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-22">
+<path style="stroke:none;" d="M 2.921875 -5.5625 L 5.265625 -5.5625 L 5.265625 -4.734375 L 2.921875 -4.734375 L 2.921875 -2.140625 C 2.921875 -1.867188 2.9375 -1.644531 2.96875 -1.46875 C 3.007812 -1.289062 3.078125 -1.144531 3.171875 -1.03125 C 3.265625 -0.925781 3.382812 -0.851562 3.53125 -0.8125 C 3.675781 -0.769531 3.851562 -0.75 4.0625 -0.75 C 4.34375 -0.75 4.570312 -0.773438 4.75 -0.828125 C 4.925781 -0.878906 5.09375 -0.941406 5.25 -1.015625 L 5.40625 -0.1875 C 5.289062 -0.132812 5.109375 -0.0703125 4.859375 0 C 4.617188 0.0820312 4.316406 0.125 3.953125 0.125 C 3.546875 0.125 3.207031 0.078125 2.9375 -0.015625 C 2.675781 -0.109375 2.46875 -0.25 2.3125 -0.4375 C 2.164062 -0.632812 2.066406 -0.875 2.015625 -1.15625 C 1.960938 -1.4375 1.9375 -1.765625 1.9375 -2.140625 L 1.9375 -4.734375 L 0.75 -4.734375 L 0.75 -5.5625 L 1.9375 -5.5625 L 1.9375 -7.125 L 2.921875 -7.296875 Z M 2.921875 -5.5625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-23">
+<path style="stroke:none;" d="M 4.5 -2.765625 C 4.5 -3.421875 4.347656 -3.925781 4.046875 -4.28125 C 3.742188 -4.632812 3.347656 -4.8125 2.859375 -4.8125 C 2.585938 -4.8125 2.375 -4.796875 2.21875 -4.765625 C 2.0625 -4.742188 1.9375 -4.71875 1.84375 -4.6875 L 1.84375 -1.171875 C 1.957031 -1.066406 2.117188 -0.96875 2.328125 -0.875 C 2.546875 -0.789062 2.773438 -0.75 3.015625 -0.75 C 3.273438 -0.75 3.5 -0.800781 3.6875 -0.90625 C 3.875 -1.007812 4.023438 -1.148438 4.140625 -1.328125 C 4.265625 -1.515625 4.351562 -1.726562 4.40625 -1.96875 C 4.46875 -2.21875 4.5 -2.484375 4.5 -2.765625 Z M 5.515625 -2.765625 C 5.515625 -2.347656 5.460938 -1.957031 5.359375 -1.59375 C 5.253906 -1.238281 5.097656 -0.929688 4.890625 -0.671875 C 4.679688 -0.421875 4.429688 -0.222656 4.140625 -0.078125 C 3.847656 0.0546875 3.507812 0.125 3.125 0.125 C 2.832031 0.125 2.570312 0.0859375 2.34375 0.015625 C 2.125 -0.046875 1.957031 -0.125 1.84375 -0.21875 L 1.84375 1.984375 L 0.859375 1.984375 L 0.859375 -5.40625 C 1.066406 -5.46875 1.34375 -5.53125 1.6875 -5.59375 C 2.03125 -5.65625 2.421875 -5.6875 2.859375 -5.6875 C 3.253906 -5.6875 3.613281 -5.617188 3.9375 -5.484375 C 4.269531 -5.347656 4.550781 -5.15625 4.78125 -4.90625 C 5.019531 -4.65625 5.203125 -4.347656 5.328125 -3.984375 C 5.453125 -3.617188 5.515625 -3.210938 5.515625 -2.765625 Z M 5.515625 -2.765625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph1-24">
+<path style="stroke:none;" d="M 3.015625 -3.734375 L 4.15625 -7.421875 L 5.09375 -7.421875 C 5.25 -6.253906 5.359375 -5.054688 5.421875 -3.828125 C 5.492188 -2.609375 5.554688 -1.332031 5.609375 0 L 4.65625 0 C 4.644531 -0.425781 4.632812 -0.890625 4.625 -1.390625 C 4.625 -1.898438 4.613281 -2.421875 4.59375 -2.953125 C 4.582031 -3.492188 4.570312 -4.039062 4.5625 -4.59375 C 4.550781 -5.144531 4.539062 -5.679688 4.53125 -6.203125 L 3.4375 -2.8125 L 2.578125 -2.8125 L 1.46875 -6.203125 C 1.46875 -5.679688 1.457031 -5.144531 1.4375 -4.59375 C 1.425781 -4.050781 1.414062 -3.507812 1.40625 -2.96875 C 1.394531 -2.425781 1.382812 -1.898438 1.375 -1.390625 C 1.363281 -0.890625 1.351562 -0.425781 1.34375 0 L 0.390625 0 C 0.410156 -0.601562 0.4375 -1.222656 0.46875 -1.859375 C 0.5 -2.503906 0.535156 -3.144531 0.578125 -3.78125 C 0.617188 -4.414062 0.671875 -5.039062 0.734375 -5.65625 C 0.796875 -6.269531 0.863281 -6.859375 0.9375 -7.421875 L 1.84375 -7.421875 Z M 3.015625 -3.734375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-0">
+<path style="stroke:none;" d="M 0.640625 2.296875 L 0.640625 -9.171875 L 7.140625 -9.171875 L 7.140625 2.296875 Z M 1.375 1.578125 L 6.421875 1.578125 L 6.421875 -8.4375 L 1.375 -8.4375 Z M 1.375 1.578125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-1">
+<path style="stroke:none;" d="M 6.34375 -6.84375 L 6.34375 -5.75 C 6.007812 -5.925781 5.675781 -6.0625 5.34375 -6.15625 C 5.007812 -6.25 4.675781 -6.296875 4.34375 -6.296875 C 3.582031 -6.296875 2.992188 -6.050781 2.578125 -5.5625 C 2.160156 -5.082031 1.953125 -4.410156 1.953125 -3.546875 C 1.953125 -2.679688 2.160156 -2.007812 2.578125 -1.53125 C 2.992188 -1.050781 3.582031 -0.8125 4.34375 -0.8125 C 4.675781 -0.8125 5.007812 -0.851562 5.34375 -0.9375 C 5.675781 -1.03125 6.007812 -1.171875 6.34375 -1.359375 L 6.34375 -0.265625 C 6.019531 -0.117188 5.679688 -0.0078125 5.328125 0.0625 C 4.984375 0.144531 4.613281 0.1875 4.21875 0.1875 C 3.144531 0.1875 2.289062 -0.144531 1.65625 -0.8125 C 1.03125 -1.488281 0.71875 -2.398438 0.71875 -3.546875 C 0.71875 -4.703125 1.035156 -5.613281 1.671875 -6.28125 C 2.304688 -6.945312 3.179688 -7.28125 4.296875 -7.28125 C 4.648438 -7.28125 5 -7.242188 5.34375 -7.171875 C 5.6875 -7.097656 6.019531 -6.988281 6.34375 -6.84375 Z M 6.34375 -6.84375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-2">
+<path style="stroke:none;" d="M 5.34375 -6.015625 C 5.207031 -6.085938 5.0625 -6.140625 4.90625 -6.171875 C 4.757812 -6.210938 4.59375 -6.234375 4.40625 -6.234375 C 3.75 -6.234375 3.242188 -6.019531 2.890625 -5.59375 C 2.535156 -5.164062 2.359375 -4.550781 2.359375 -3.75 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.597656 -6.4375 2.914062 -6.757812 3.3125 -6.96875 C 3.707031 -7.175781 4.1875 -7.28125 4.75 -7.28125 C 4.832031 -7.28125 4.921875 -7.273438 5.015625 -7.265625 C 5.109375 -7.253906 5.21875 -7.238281 5.34375 -7.21875 Z M 5.34375 -6.015625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-3">
+<path style="stroke:none;" d="M 3.984375 -6.296875 C 3.359375 -6.296875 2.863281 -6.050781 2.5 -5.5625 C 2.132812 -5.070312 1.953125 -4.398438 1.953125 -3.546875 C 1.953125 -2.691406 2.128906 -2.019531 2.484375 -1.53125 C 2.847656 -1.050781 3.347656 -0.8125 3.984375 -0.8125 C 4.597656 -0.8125 5.085938 -1.054688 5.453125 -1.546875 C 5.816406 -2.035156 6 -2.703125 6 -3.546875 C 6 -4.390625 5.816406 -5.054688 5.453125 -5.546875 C 5.085938 -6.046875 4.597656 -6.296875 3.984375 -6.296875 Z M 3.984375 -7.28125 C 4.992188 -7.28125 5.789062 -6.945312 6.375 -6.28125 C 6.957031 -5.625 7.25 -4.710938 7.25 -3.546875 C 7.25 -2.378906 6.957031 -1.460938 6.375 -0.796875 C 5.789062 -0.140625 4.992188 0.1875 3.984375 0.1875 C 2.960938 0.1875 2.160156 -0.140625 1.578125 -0.796875 C 1.003906 -1.460938 0.71875 -2.378906 0.71875 -3.546875 C 0.71875 -4.710938 1.003906 -5.625 1.578125 -6.28125 C 2.160156 -6.945312 2.960938 -7.28125 3.984375 -7.28125 Z M 3.984375 -7.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-4">
+<path style="stroke:none;" d="M 2.359375 -1.0625 L 2.359375 2.703125 L 1.1875 2.703125 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6.03125 C 2.597656 -6.457031 2.90625 -6.769531 3.28125 -6.96875 C 3.65625 -7.175781 4.101562 -7.28125 4.625 -7.28125 C 5.488281 -7.28125 6.191406 -6.9375 6.734375 -6.25 C 7.273438 -5.5625 7.546875 -4.660156 7.546875 -3.546875 C 7.546875 -2.429688 7.273438 -1.53125 6.734375 -0.84375 C 6.191406 -0.15625 5.488281 0.1875 4.625 0.1875 C 4.101562 0.1875 3.65625 0.0820312 3.28125 -0.125 C 2.90625 -0.332031 2.597656 -0.644531 2.359375 -1.0625 Z M 6.328125 -3.546875 C 6.328125 -4.410156 6.148438 -5.082031 5.796875 -5.5625 C 5.441406 -6.050781 4.957031 -6.296875 4.34375 -6.296875 C 3.726562 -6.296875 3.242188 -6.050781 2.890625 -5.5625 C 2.535156 -5.082031 2.359375 -4.410156 2.359375 -3.546875 C 2.359375 -2.691406 2.535156 -2.019531 2.890625 -1.53125 C 3.242188 -1.039062 3.726562 -0.796875 4.34375 -0.796875 C 4.957031 -0.796875 5.441406 -1.039062 5.796875 -1.53125 C 6.148438 -2.019531 6.328125 -2.691406 6.328125 -3.546875 Z M 6.328125 -3.546875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-5">
+<path style="stroke:none;" d="M 5.75 -6.90625 L 5.75 -5.796875 C 5.425781 -5.960938 5.085938 -6.085938 4.734375 -6.171875 C 4.378906 -6.253906 4.007812 -6.296875 3.625 -6.296875 C 3.039062 -6.296875 2.601562 -6.207031 2.3125 -6.03125 C 2.03125 -5.851562 1.890625 -5.585938 1.890625 -5.234375 C 1.890625 -4.960938 1.988281 -4.75 2.1875 -4.59375 C 2.394531 -4.445312 2.816406 -4.300781 3.453125 -4.15625 L 3.84375 -4.0625 C 4.675781 -3.882812 5.265625 -3.632812 5.609375 -3.3125 C 5.960938 -2.988281 6.140625 -2.539062 6.140625 -1.96875 C 6.140625 -1.300781 5.878906 -0.773438 5.359375 -0.390625 C 4.835938 -0.00390625 4.117188 0.1875 3.203125 0.1875 C 2.816406 0.1875 2.414062 0.148438 2 0.078125 C 1.59375 0.00390625 1.160156 -0.109375 0.703125 -0.265625 L 0.703125 -1.46875 C 1.140625 -1.238281 1.566406 -1.066406 1.984375 -0.953125 C 2.398438 -0.847656 2.8125 -0.796875 3.21875 -0.796875 C 3.769531 -0.796875 4.191406 -0.890625 4.484375 -1.078125 C 4.785156 -1.265625 4.9375 -1.53125 4.9375 -1.875 C 4.9375 -2.1875 4.828125 -2.425781 4.609375 -2.59375 C 4.398438 -2.769531 3.9375 -2.9375 3.21875 -3.09375 L 2.8125 -3.1875 C 2.082031 -3.34375 1.554688 -3.578125 1.234375 -3.890625 C 0.910156 -4.203125 0.75 -4.632812 0.75 -5.1875 C 0.75 -5.851562 0.984375 -6.367188 1.453125 -6.734375 C 1.929688 -7.097656 2.609375 -7.28125 3.484375 -7.28125 C 3.910156 -7.28125 4.316406 -7.25 4.703125 -7.1875 C 5.085938 -7.125 5.4375 -7.03125 5.75 -6.90625 Z M 5.75 -6.90625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-6">
+<path style="stroke:none;" d="M 4.453125 -3.578125 C 3.515625 -3.578125 2.863281 -3.46875 2.5 -3.25 C 2.132812 -3.03125 1.953125 -2.660156 1.953125 -2.140625 C 1.953125 -1.734375 2.085938 -1.40625 2.359375 -1.15625 C 2.628906 -0.914062 3 -0.796875 3.46875 -0.796875 C 4.113281 -0.796875 4.632812 -1.023438 5.03125 -1.484375 C 5.425781 -1.941406 5.625 -2.550781 5.625 -3.3125 L 5.625 -3.578125 Z M 6.78125 -4.0625 L 6.78125 0 L 5.625 0 L 5.625 -1.078125 C 5.351562 -0.648438 5.019531 -0.332031 4.625 -0.125 C 4.226562 0.0820312 3.738281 0.1875 3.15625 0.1875 C 2.425781 0.1875 1.847656 -0.015625 1.421875 -0.421875 C 0.992188 -0.835938 0.78125 -1.382812 0.78125 -2.0625 C 0.78125 -2.863281 1.046875 -3.46875 1.578125 -3.875 C 2.117188 -4.28125 2.921875 -4.484375 3.984375 -4.484375 L 5.625 -4.484375 L 5.625 -4.609375 C 5.625 -5.140625 5.445312 -5.550781 5.09375 -5.84375 C 4.738281 -6.144531 4.238281 -6.296875 3.59375 -6.296875 C 3.1875 -6.296875 2.789062 -6.242188 2.40625 -6.140625 C 2.019531 -6.046875 1.648438 -5.898438 1.296875 -5.703125 L 1.296875 -6.78125 C 1.722656 -6.945312 2.140625 -7.070312 2.546875 -7.15625 C 2.953125 -7.238281 3.34375 -7.28125 3.71875 -7.28125 C 4.75 -7.28125 5.515625 -7.015625 6.015625 -6.484375 C 6.523438 -5.953125 6.78125 -5.144531 6.78125 -4.0625 Z M 6.78125 -4.0625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-7">
+<path style="stroke:none;" d="M 1.21875 -9.875 L 2.390625 -9.875 L 2.390625 0 L 1.21875 0 Z M 1.21875 -9.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-8">
+<path style="stroke:none;" d="M 7.3125 -3.84375 L 7.3125 -3.28125 L 1.9375 -3.28125 C 1.988281 -2.46875 2.226562 -1.851562 2.65625 -1.4375 C 3.09375 -1.019531 3.695312 -0.8125 4.46875 -0.8125 C 4.914062 -0.8125 5.347656 -0.863281 5.765625 -0.96875 C 6.191406 -1.082031 6.613281 -1.25 7.03125 -1.46875 L 7.03125 -0.359375 C 6.613281 -0.179688 6.179688 -0.046875 5.734375 0.046875 C 5.296875 0.140625 4.851562 0.1875 4.40625 0.1875 C 3.269531 0.1875 2.367188 -0.140625 1.703125 -0.796875 C 1.046875 -1.460938 0.71875 -2.359375 0.71875 -3.484375 C 0.71875 -4.648438 1.03125 -5.570312 1.65625 -6.25 C 2.289062 -6.9375 3.140625 -7.28125 4.203125 -7.28125 C 5.160156 -7.28125 5.914062 -6.972656 6.46875 -6.359375 C 7.03125 -5.742188 7.3125 -4.90625 7.3125 -3.84375 Z M 6.140625 -4.1875 C 6.128906 -4.820312 5.945312 -5.332031 5.59375 -5.71875 C 5.25 -6.101562 4.789062 -6.296875 4.21875 -6.296875 C 3.5625 -6.296875 3.035156 -6.109375 2.640625 -5.734375 C 2.253906 -5.367188 2.03125 -4.851562 1.96875 -4.1875 Z M 6.140625 -4.1875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-9">
+<path style="stroke:none;" d="M 6.328125 -3.546875 C 6.328125 -4.410156 6.148438 -5.082031 5.796875 -5.5625 C 5.441406 -6.050781 4.957031 -6.296875 4.34375 -6.296875 C 3.726562 -6.296875 3.242188 -6.050781 2.890625 -5.5625 C 2.535156 -5.082031 2.359375 -4.410156 2.359375 -3.546875 C 2.359375 -2.691406 2.535156 -2.019531 2.890625 -1.53125 C 3.242188 -1.039062 3.726562 -0.796875 4.34375 -0.796875 C 4.957031 -0.796875 5.441406 -1.039062 5.796875 -1.53125 C 6.148438 -2.019531 6.328125 -2.691406 6.328125 -3.546875 Z M 2.359375 -6.03125 C 2.597656 -6.457031 2.90625 -6.769531 3.28125 -6.96875 C 3.65625 -7.175781 4.101562 -7.28125 4.625 -7.28125 C 5.488281 -7.28125 6.191406 -6.9375 6.734375 -6.25 C 7.273438 -5.5625 7.546875 -4.660156 7.546875 -3.546875 C 7.546875 -2.429688 7.273438 -1.53125 6.734375 -0.84375 C 6.191406 -0.15625 5.488281 0.1875 4.625 0.1875 C 4.101562 0.1875 3.65625 0.0820312 3.28125 -0.125 C 2.90625 -0.332031 2.597656 -0.644531 2.359375 -1.0625 L 2.359375 0 L 1.1875 0 L 1.1875 -9.875 L 2.359375 -9.875 Z M 2.359375 -6.03125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-10">
+<path style="stroke:none;" d="M 1.21875 -7.109375 L 2.390625 -7.109375 L 2.390625 0 L 1.21875 0 Z M 1.21875 -9.875 L 2.390625 -9.875 L 2.390625 -8.390625 L 1.21875 -8.390625 Z M 1.21875 -9.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-11">
+<path style="stroke:none;" d="M 7.140625 -4.296875 L 7.140625 0 L 5.96875 0 L 5.96875 -4.25 C 5.96875 -4.925781 5.835938 -5.429688 5.578125 -5.765625 C 5.316406 -6.097656 4.921875 -6.265625 4.390625 -6.265625 C 3.765625 -6.265625 3.269531 -6.0625 2.90625 -5.65625 C 2.539062 -5.257812 2.359375 -4.710938 2.359375 -4.015625 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.640625 -6.425781 2.96875 -6.742188 3.34375 -6.953125 C 3.71875 -7.171875 4.15625 -7.28125 4.65625 -7.28125 C 5.46875 -7.28125 6.082031 -7.023438 6.5 -6.515625 C 6.925781 -6.015625 7.140625 -5.273438 7.140625 -4.296875 Z M 7.140625 -4.296875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-12">
+<path style="stroke:none;" d="M 5.90625 -3.640625 C 5.90625 -4.484375 5.726562 -5.132812 5.375 -5.59375 C 5.03125 -6.0625 4.539062 -6.296875 3.90625 -6.296875 C 3.28125 -6.296875 2.789062 -6.0625 2.4375 -5.59375 C 2.09375 -5.132812 1.921875 -4.484375 1.921875 -3.640625 C 1.921875 -2.796875 2.09375 -2.140625 2.4375 -1.671875 C 2.789062 -1.210938 3.28125 -0.984375 3.90625 -0.984375 C 4.539062 -0.984375 5.03125 -1.210938 5.375 -1.671875 C 5.726562 -2.140625 5.90625 -2.796875 5.90625 -3.640625 Z M 7.078125 -0.875 C 7.078125 0.332031 6.804688 1.226562 6.265625 1.8125 C 5.722656 2.40625 4.898438 2.703125 3.796875 2.703125 C 3.390625 2.703125 3.003906 2.671875 2.640625 2.609375 C 2.273438 2.546875 1.921875 2.453125 1.578125 2.328125 L 1.578125 1.1875 C 1.921875 1.375 2.257812 1.507812 2.59375 1.59375 C 2.925781 1.6875 3.265625 1.734375 3.609375 1.734375 C 4.378906 1.734375 4.953125 1.535156 5.328125 1.140625 C 5.710938 0.742188 5.90625 0.140625 5.90625 -0.671875 L 5.90625 -1.25 C 5.664062 -0.832031 5.351562 -0.519531 4.96875 -0.3125 C 4.59375 -0.101562 4.144531 0 3.625 0 C 2.75 0 2.046875 -0.332031 1.515625 -1 C 0.984375 -1.664062 0.71875 -2.546875 0.71875 -3.640625 C 0.71875 -4.734375 0.984375 -5.613281 1.515625 -6.28125 C 2.046875 -6.945312 2.75 -7.28125 3.625 -7.28125 C 4.144531 -7.28125 4.59375 -7.175781 4.96875 -6.96875 C 5.351562 -6.757812 5.664062 -6.445312 5.90625 -6.03125 L 5.90625 -7.109375 L 7.078125 -7.109375 Z M 7.078125 -0.875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-13">
+<path style="stroke:none;" d="M 5.125 -8.609375 C 4.1875 -8.609375 3.441406 -8.257812 2.890625 -7.5625 C 2.347656 -6.875 2.078125 -5.929688 2.078125 -4.734375 C 2.078125 -3.535156 2.347656 -2.585938 2.890625 -1.890625 C 3.441406 -1.203125 4.1875 -0.859375 5.125 -0.859375 C 6.050781 -0.859375 6.785156 -1.203125 7.328125 -1.890625 C 7.878906 -2.585938 8.15625 -3.535156 8.15625 -4.734375 C 8.15625 -5.929688 7.878906 -6.875 7.328125 -7.5625 C 6.785156 -8.257812 6.050781 -8.609375 5.125 -8.609375 Z M 5.125 -9.65625 C 6.445312 -9.65625 7.503906 -9.207031 8.296875 -8.3125 C 9.097656 -7.414062 9.5 -6.222656 9.5 -4.734375 C 9.5 -3.234375 9.097656 -2.035156 8.296875 -1.140625 C 7.503906 -0.253906 6.445312 0.1875 5.125 0.1875 C 3.789062 0.1875 2.722656 -0.253906 1.921875 -1.140625 C 1.128906 -2.035156 0.734375 -3.234375 0.734375 -4.734375 C 0.734375 -6.222656 1.128906 -7.414062 1.921875 -8.3125 C 2.722656 -9.207031 3.789062 -9.65625 5.125 -9.65625 Z M 5.125 -9.65625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-14">
+<path style="stroke:none;" d="M 1.109375 -2.8125 L 1.109375 -7.109375 L 2.265625 -7.109375 L 2.265625 -2.84375 C 2.265625 -2.175781 2.394531 -1.671875 2.65625 -1.328125 C 2.925781 -0.992188 3.320312 -0.828125 3.84375 -0.828125 C 4.476562 -0.828125 4.976562 -1.023438 5.34375 -1.421875 C 5.707031 -1.828125 5.890625 -2.378906 5.890625 -3.078125 L 5.890625 -7.109375 L 7.0625 -7.109375 L 7.0625 0 L 5.890625 0 L 5.890625 -1.09375 C 5.609375 -0.65625 5.28125 -0.332031 4.90625 -0.125 C 4.53125 0.0820312 4.09375 0.1875 3.59375 0.1875 C 2.78125 0.1875 2.160156 -0.0664062 1.734375 -0.578125 C 1.316406 -1.085938 1.109375 -1.832031 1.109375 -2.8125 Z M 4.046875 -7.28125 Z M 4.046875 -7.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-15">
+<path style="stroke:none;" d="M 2.375 -9.125 L 2.375 -7.109375 L 4.78125 -7.109375 L 4.78125 -6.203125 L 2.375 -6.203125 L 2.375 -2.34375 C 2.375 -1.757812 2.453125 -1.382812 2.609375 -1.21875 C 2.773438 -1.0625 3.101562 -0.984375 3.59375 -0.984375 L 4.78125 -0.984375 L 4.78125 0 L 3.59375 0 C 2.6875 0 2.0625 -0.164062 1.71875 -0.5 C 1.375 -0.84375 1.203125 -1.457031 1.203125 -2.34375 L 1.203125 -6.203125 L 0.34375 -6.203125 L 0.34375 -7.109375 L 1.203125 -7.109375 L 1.203125 -9.125 Z M 2.375 -9.125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-16">
+<path style="stroke:none;" d=""/>
+</symbol>
+<symbol overflow="visible" id="glyph2-17">
+<path style="stroke:none;" d="M 1.28125 -9.484375 L 6.71875 -9.484375 L 6.71875 -8.390625 L 2.5625 -8.390625 L 2.5625 -5.609375 L 6.3125 -5.609375 L 6.3125 -4.53125 L 2.5625 -4.53125 L 2.5625 0 L 1.28125 0 Z M 1.28125 -9.484375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-18">
+<path style="stroke:none;" d="M 6.765625 -5.75 C 7.054688 -6.269531 7.40625 -6.65625 7.8125 -6.90625 C 8.21875 -7.15625 8.695312 -7.28125 9.25 -7.28125 C 9.988281 -7.28125 10.554688 -7.019531 10.953125 -6.5 C 11.359375 -5.976562 11.5625 -5.242188 11.5625 -4.296875 L 11.5625 0 L 10.390625 0 L 10.390625 -4.25 C 10.390625 -4.9375 10.265625 -5.441406 10.015625 -5.765625 C 9.773438 -6.097656 9.410156 -6.265625 8.921875 -6.265625 C 8.316406 -6.265625 7.835938 -6.0625 7.484375 -5.65625 C 7.128906 -5.257812 6.953125 -4.710938 6.953125 -4.015625 L 6.953125 0 L 5.78125 0 L 5.78125 -4.25 C 5.78125 -4.9375 5.660156 -5.441406 5.421875 -5.765625 C 5.179688 -6.097656 4.804688 -6.265625 4.296875 -6.265625 C 3.703125 -6.265625 3.226562 -6.0625 2.875 -5.65625 C 2.53125 -5.25 2.359375 -4.703125 2.359375 -4.015625 L 2.359375 0 L 1.1875 0 L 1.1875 -7.109375 L 2.359375 -7.109375 L 2.359375 -6 C 2.617188 -6.4375 2.9375 -6.757812 3.3125 -6.96875 C 3.6875 -7.175781 4.128906 -7.28125 4.640625 -7.28125 C 5.160156 -7.28125 5.597656 -7.148438 5.953125 -6.890625 C 6.316406 -6.628906 6.585938 -6.25 6.765625 -5.75 Z M 6.765625 -5.75 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-19">
+<path style="stroke:none;" d="M 6.953125 -9.171875 L 6.953125 -7.921875 C 6.472656 -8.148438 6.015625 -8.320312 5.578125 -8.4375 C 5.148438 -8.550781 4.734375 -8.609375 4.328125 -8.609375 C 3.628906 -8.609375 3.085938 -8.472656 2.703125 -8.203125 C 2.328125 -7.929688 2.140625 -7.546875 2.140625 -7.046875 C 2.140625 -6.628906 2.265625 -6.3125 2.515625 -6.09375 C 2.773438 -5.882812 3.253906 -5.710938 3.953125 -5.578125 L 4.734375 -5.421875 C 5.679688 -5.234375 6.382812 -4.910156 6.84375 -4.453125 C 7.300781 -3.992188 7.53125 -3.378906 7.53125 -2.609375 C 7.53125 -1.691406 7.222656 -0.992188 6.609375 -0.515625 C 5.992188 -0.046875 5.085938 0.1875 3.890625 0.1875 C 3.441406 0.1875 2.960938 0.132812 2.453125 0.03125 C 1.953125 -0.0703125 1.429688 -0.222656 0.890625 -0.421875 L 0.890625 -1.734375 C 1.410156 -1.441406 1.921875 -1.222656 2.421875 -1.078125 C 2.921875 -0.929688 3.410156 -0.859375 3.890625 -0.859375 C 4.628906 -0.859375 5.195312 -1 5.59375 -1.28125 C 5.988281 -1.570312 6.1875 -1.984375 6.1875 -2.515625 C 6.1875 -2.984375 6.039062 -3.347656 5.75 -3.609375 C 5.46875 -3.867188 5.003906 -4.066406 4.359375 -4.203125 L 3.578125 -4.359375 C 2.617188 -4.546875 1.925781 -4.84375 1.5 -5.25 C 1.070312 -5.65625 0.859375 -6.21875 0.859375 -6.9375 C 0.859375 -7.78125 1.148438 -8.441406 1.734375 -8.921875 C 2.328125 -9.410156 3.144531 -9.65625 4.1875 -9.65625 C 4.625 -9.65625 5.070312 -9.613281 5.53125 -9.53125 C 6 -9.445312 6.472656 -9.328125 6.953125 -9.171875 Z M 6.953125 -9.171875 "/>
+</symbol>
+<symbol overflow="visible" id="glyph2-20">
+<path style="stroke:none;" d="M 4.1875 0.65625 C 3.851562 1.507812 3.53125 2.0625 3.21875 2.3125 C 2.90625 2.570312 2.488281 2.703125 1.96875 2.703125 L 1.03125 2.703125 L 1.03125 1.734375 L 1.71875 1.734375 C 2.039062 1.734375 2.289062 1.65625 2.46875 1.5 C 2.644531 1.34375 2.835938 0.984375 3.046875 0.421875 L 3.265625 -0.109375 L 0.390625 -7.109375 L 1.625 -7.109375 L 3.84375 -1.546875 L 6.0625 -7.109375 L 7.3125 -7.109375 Z M 4.1875 0.65625 "/>
+</symbol>
+</g>
+</defs>
+<g id="surface268880">
+<rect x="0" y="0" width="774" height="152" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 21.75297 10.408118 L 26.433829 10.408118 L 26.433829 12.281165 L 21.75297 12.281165 Z M 21.75297 10.408118 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 29.079728 10.51222 L 32.829728 10.51222 L 32.829728 12.149915 L 29.079728 12.149915 Z M 29.079728 10.51222 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph0-1" x="20.171875" y="57.705621"/>
+ <use xlink:href="#glyph0-2" x="30.171875" y="57.705621"/>
+ <use xlink:href="#glyph0-3" x="40.171875" y="57.705621"/>
+ <use xlink:href="#glyph0-4" x="50.171875" y="57.705621"/>
+ <use xlink:href="#glyph0-5" x="60.171875" y="57.705621"/>
+ <use xlink:href="#glyph0-6" x="70.171875" y="57.705621"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph0-7" x="174.203125" y="60.053277"/>
+ <use xlink:href="#glyph0-8" x="184.203125" y="60.053277"/>
+</g>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 40.925236 10.544446 L 44.675236 10.544446 L 44.675236 12.090345 L 40.925236 12.090345 Z M 40.925236 10.544446 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 34.883439 10.536634 L 38.633439 10.536634 L 38.633439 12.120032 L 34.883439 12.120032 Z M 34.883439 10.536634 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 47.084806 10.484876 L 52.045743 10.484876 L 52.045743 12.130774 L 47.084806 12.130774 Z M 47.084806 10.484876 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.980118 10.376868 L 59.866642 10.376868 L 59.866642 12.279603 L 53.980118 12.279603 Z M 53.980118 10.376868 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 54.048478 13.825501 L 59.868009 13.825501 L 59.868009 15.490345 L 54.048478 15.490345 Z M 54.048478 13.825501 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 26.481876 11.338001 L 28.593009 11.332337 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 28.968009 11.33136 L 28.468595 11.582728 L 28.593009 11.332337 L 28.467228 11.082728 Z M 28.968009 11.33136 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 32.876798 11.329798 L 34.396525 11.328626 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 34.771525 11.328431 L 34.27172 11.578821 L 34.396525 11.328626 L 34.271329 11.078821 Z M 34.771525 11.328431 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 38.633439 11.328431 L 40.438517 11.319642 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 40.813517 11.317884 L 40.314689 11.570228 L 40.438517 11.319642 L 40.312345 11.070423 Z M 40.813517 11.317884 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 44.675236 11.317298 L 46.597892 11.309876 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 46.972892 11.308313 L 46.473868 11.560267 L 46.597892 11.309876 L 46.471915 11.060267 Z M 46.972892 11.308313 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 52.045743 11.307923 L 53.4934 11.323157 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.8684 11.327063 L 53.365861 11.57179 L 53.4934 11.323157 L 53.370939 11.07179 Z M 53.8684 11.327063 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph0-9" x="286.757813" y="60.611871"/>
+ <use xlink:href="#glyph0-10" x="296.757813" y="60.611871"/>
+ <use xlink:href="#glyph0-1" x="306.757813" y="60.611871"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph0-11" x="405.660156" y="59.904839"/>
+ <use xlink:href="#glyph0-10" x="415.660156" y="59.904839"/>
+ <use xlink:href="#glyph0-12" x="425.660156" y="59.904839"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph1-1" x="511.308594" y="58.064616"/>
+ <use xlink:href="#glyph1-2" x="517.308757" y="58.064616"/>
+ <use xlink:href="#glyph1-3" x="523.308919" y="58.064616"/>
+ <use xlink:href="#glyph1-4" x="529.309082" y="58.064616"/>
+ <use xlink:href="#glyph1-5" x="535.309245" y="58.064616"/>
+ <use xlink:href="#glyph1-6" x="541.309408" y="58.064616"/>
+ <use xlink:href="#glyph1-7" x="547.30957" y="58.064616"/>
+ <use xlink:href="#glyph1-8" x="553.309733" y="58.064616"/>
+ <use xlink:href="#glyph1-9" x="559.309896" y="58.064616"/>
+ <use xlink:href="#glyph1-10" x="565.310059" y="58.064616"/>
+ <use xlink:href="#glyph1-11" x="571.310221" y="58.064616"/>
+ <use xlink:href="#glyph1-12" x="577.310384" y="58.064616"/>
+ <use xlink:href="#glyph1-13" x="583.310547" y="58.064616"/>
+ <use xlink:href="#glyph1-8" x="589.31071" y="58.064616"/>
+ <use xlink:href="#glyph1-14" x="595.310872" y="58.064616"/>
+</g>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 45.671915 11.342298 L 45.655704 11.342298 L 45.655704 14.657923 L 53.561759 14.657923 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<path style="fill-rule:evenodd;fill:rgb(0%,0%,0%);fill-opacity:1;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 53.936759 14.657923 L 53.436759 14.907923 L 53.561759 14.657923 L 53.436759 14.407923 Z M 53.936759 14.657923 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph1-15" x="657.078125" y="57.724772"/>
+ <use xlink:href="#glyph1-16" x="663.078288" y="57.724772"/>
+ <use xlink:href="#glyph1-10" x="669.078451" y="57.724772"/>
+ <use xlink:href="#glyph1-6" x="675.078613" y="57.724772"/>
+ <use xlink:href="#glyph1-8" x="681.078776" y="57.724772"/>
+ <use xlink:href="#glyph1-17" x="687.078939" y="57.724772"/>
+ <use xlink:href="#glyph1-11" x="693.079102" y="57.724772"/>
+ <use xlink:href="#glyph1-18" x="699.079264" y="57.724772"/>
+ <use xlink:href="#glyph1-19" x="705.079427" y="57.724772"/>
+ <use xlink:href="#glyph1-4" x="711.07959" y="57.724772"/>
+ <use xlink:href="#glyph1-20" x="717.079753" y="57.724772"/>
+ <use xlink:href="#glyph1-21" x="723.079915" y="57.724772"/>
+ <use xlink:href="#glyph1-22" x="729.080078" y="57.724772"/>
+ <use xlink:href="#glyph1-23" x="735.080241" y="57.724772"/>
+ <use xlink:href="#glyph1-21" x="741.080404" y="57.724772"/>
+ <use xlink:href="#glyph1-22" x="747.080566" y="57.724772"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph1-24" x="673.335938" y="124.170085"/>
+ <use xlink:href="#glyph1-11" x="679.3361" y="124.170085"/>
+ <use xlink:href="#glyph1-13" x="685.336263" y="124.170085"/>
+ <use xlink:href="#glyph1-8" x="691.336426" y="124.170085"/>
+ <use xlink:href="#glyph1-4" x="697.336589" y="124.170085"/>
+ <use xlink:href="#glyph1-20" x="703.336751" y="124.170085"/>
+ <use xlink:href="#glyph1-21" x="709.336914" y="124.170085"/>
+ <use xlink:href="#glyph1-22" x="715.337077" y="124.170085"/>
+ <use xlink:href="#glyph1-23" x="721.33724" y="124.170085"/>
+ <use xlink:href="#glyph1-21" x="727.337402" y="124.170085"/>
+ <use xlink:href="#glyph1-22" x="733.337565" y="124.170085"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph2-1" x="168.71875" y="31.959093"/>
+ <use xlink:href="#glyph2-2" x="175.866102" y="31.959093"/>
+ <use xlink:href="#glyph2-3" x="180.92551" y="31.959093"/>
+ <use xlink:href="#glyph2-4" x="188.879069" y="31.959093"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph2-5" x="288.109375" y="31.681749"/>
+ <use xlink:href="#glyph2-1" x="294.882378" y="31.681749"/>
+ <use xlink:href="#glyph2-6" x="302.029731" y="31.681749"/>
+ <use xlink:href="#glyph2-7" x="309.996039" y="31.681749"/>
+ <use xlink:href="#glyph2-8" x="313.607964" y="31.681749"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph2-5" x="535.988281" y="33.365343"/>
+ <use xlink:href="#glyph2-1" x="542.761285" y="33.365343"/>
+ <use xlink:href="#glyph2-6" x="549.908637" y="33.365343"/>
+ <use xlink:href="#glyph2-7" x="557.874946" y="33.365343"/>
+ <use xlink:href="#glyph2-8" x="561.486871" y="33.365343"/>
+</g>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph2-9" x="26.695313" y="32.365343"/>
+ <use xlink:href="#glyph2-10" x="34.947266" y="32.365343"/>
+ <use xlink:href="#glyph2-11" x="38.559191" y="32.365343"/>
+ <use xlink:href="#glyph2-11" x="46.798394" y="32.365343"/>
+ <use xlink:href="#glyph2-10" x="55.037598" y="32.365343"/>
+ <use xlink:href="#glyph2-11" x="58.649523" y="32.365343"/>
+ <use xlink:href="#glyph2-12" x="66.888726" y="32.365343"/>
+</g>
+<path style="fill:none;stroke-width:0.1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-dasharray:0.14,0.14;stroke-miterlimit:10;" d="M 45.300431 9.486438 L 60.373478 9.486438 L 60.373478 16.175696 L 45.300431 16.175696 Z M 45.300431 9.486438 " transform="matrix(20,0,0,20,-434.059401,-172.47877)"/>
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+ <use xlink:href="#glyph2-13" x="532.003906" y="11.904405"/>
+ <use xlink:href="#glyph2-14" x="542.236382" y="11.904405"/>
+ <use xlink:href="#glyph2-15" x="550.475586" y="11.904405"/>
+ <use xlink:href="#glyph2-4" x="555.5727" y="11.904405"/>
+ <use xlink:href="#glyph2-14" x="563.824653" y="11.904405"/>
+ <use xlink:href="#glyph2-15" x="572.063856" y="11.904405"/>
+ <use xlink:href="#glyph2-16" x="577.16097" y="11.904405"/>
+ <use xlink:href="#glyph2-17" x="581.293186" y="11.904405"/>
+ <use xlink:href="#glyph2-3" x="588.307346" y="11.904405"/>
+ <use xlink:href="#glyph2-2" x="596.260905" y="11.904405"/>
+ <use xlink:href="#glyph2-18" x="601.377279" y="11.904405"/>
+ <use xlink:href="#glyph2-6" x="614.040853" y="11.904405"/>
+ <use xlink:href="#glyph2-15" x="622.007161" y="11.904405"/>
+ <use xlink:href="#glyph2-15" x="627.104275" y="11.904405"/>
+ <use xlink:href="#glyph2-8" x="632.201389" y="11.904405"/>
+ <use xlink:href="#glyph2-2" x="640.199436" y="11.904405"/>
+ <use xlink:href="#glyph2-16" x="645.544217" y="11.904405"/>
+ <use xlink:href="#glyph2-19" x="649.676432" y="11.904405"/>
+ <use xlink:href="#glyph2-20" x="657.928385" y="11.904405"/>
+ <use xlink:href="#glyph2-5" x="665.621799" y="11.904405"/>
+ <use xlink:href="#glyph2-15" x="672.394803" y="11.904405"/>
+ <use xlink:href="#glyph2-8" x="677.491916" y="11.904405"/>
+ <use xlink:href="#glyph2-18" x="685.489963" y="11.904405"/>
+</g>
+</g>
+</svg>
diff --git a/Documentation/admin-guide/media/ipu6-isys.rst b/Documentation/admin-guide/media/ipu6-isys.rst
new file mode 100644
index 000000000000..d05086824a74
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu6-isys.rst
@@ -0,0 +1,161 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+========================================================
+Intel Image Processing Unit 6 (IPU6) Input System driver
+========================================================
+
+Copyright |copy| 2023--2024 Intel Corporation
+
+Introduction
+============
+
+This file documents the Intel IPU6 (6th generation Image Processing Unit)
+Input System (MIPI CSI2 receiver) drivers located under
+drivers/media/pci/intel/ipu6.
+
+The Intel IPU6 can be found in certain Intel SoCs but not in all SKUs:
+
+* Tiger Lake
+* Jasper Lake
+* Alder Lake
+* Raptor Lake
+* Meteor Lake
+
+Intel IPU6 is made up of two components - Input System (ISYS) and Processing
+System (PSYS).
+
+The Input System mainly works as MIPI CSI-2 receiver which receives and
+processes the image data from the sensors and outputs the frames to memory.
+
+There are 2 driver modules - intel-ipu6 and intel-ipu6-isys. intel-ipu6 is an
+IPU6 common driver which does PCI configuration, firmware loading and parsing,
+firmware authentication, DMA mapping and IPU-MMU (internal Memory mapping Unit)
+configuration. intel_ipu6_isys implements V4L2, Media Controller and V4L2
+sub-device interfaces. The IPU6 ISYS driver supports camera sensors connected
+to the IPU6 ISYS through V4L2 sub-device sensor drivers.
+
+.. Note:: See Documentation/driver-api/media/drivers/ipu6.rst for more
+ information about the IPU6 hardware.
+
+Input system driver
+===================
+
+The Input System driver mainly configures CSI-2 D-PHY, constructs the firmware
+stream configuration, sends commands to firmware, gets response from hardware
+and firmware and then returns buffers to user. The ISYS is represented as
+several V4L2 sub-devices as well as video nodes.
+
+.. kernel-figure:: ipu6_isys_graph.svg
+ :alt: ipu6 isys media graph with multiple streams support
+
+ IPU6 ISYS media graph with multiple streams support
+
+The graph has been produced using the following command:
+
+.. code-block:: none
+
+ fdp -Gsplines=true -Tsvg < dot > dot.svg
+
+Capturing frames with IPU6 ISYS
+-------------------------------
+
+IPU6 ISYS is used to capture frames from the camera sensors connected to the
+CSI2 ports. The supported input formats of ISYS are listed in table below:
+
+.. tabularcolumns:: |p{0.8cm}|p{4.0cm}|p{4.0cm}|
+
+.. flat-table::
+ :header-rows: 1
+
+ * - IPU6 ISYS supported input formats
+
+ * - RGB565, RGB888
+
+ * - UYVY8, YUYV8
+
+ * - RAW8, RAW10, RAW12
+
+.. _ipu6_isys_capture_examples:
+
+Examples
+~~~~~~~~
+
+Here is an example of IPU6 ISYS raw capture on Dell XPS 9315 laptop. On this
+machine, ov01a10 sensor is connected to IPU ISYS CSI-2 port 2, which can
+generate images at sBGGR10 with resolution 1280x800.
+
+Using the media controller APIs, we can configure ov01a10 sensor by
+media-ctl [#f1]_ and yavta [#f2]_ to transmit frames to IPU6 ISYS.
+
+.. code-block:: none
+
+ # Example 1 capture frame from ov01a10 camera sensor
+ # This example assumes /dev/media0 as the IPU ISYS media device
+ export MDEV=/dev/media0
+
+ # Establish the link for the media devices using media-ctl
+ media-ctl -d $MDEV -l "\"ov01a10 3-0036\":0 -> \"Intel IPU6 CSI2 2\":0[1]"
+
+ # Set the format for the media devices
+ media-ctl -d $MDEV -V "ov01a10:0 [fmt:SBGGR10/1280x800]"
+ media-ctl -d $MDEV -V "Intel IPU6 CSI2 2:0 [fmt:SBGGR10/1280x800]"
+ media-ctl -d $MDEV -V "Intel IPU6 CSI2 2:1 [fmt:SBGGR10/1280x800]"
+
+Once the media pipeline is configured, desired sensor specific settings
+(such as exposure and gain settings) can be set, using the yavta tool.
+
+e.g
+
+.. code-block:: none
+
+ # and that ov01a10 sensor is connected to i2c bus 3 with address 0x36
+ export SDEV=$(media-ctl -d $MDEV -e "ov01a10 3-0036")
+
+ yavta -w 0x009e0903 400 $SDEV
+ yavta -w 0x009e0913 1000 $SDEV
+ yavta -w 0x009e0911 2000 $SDEV
+
+Once the desired sensor settings are set, frame captures can be done as below.
+
+e.g
+
+.. code-block:: none
+
+ yavta --data-prefix -u -c10 -n5 -I -s 1280x800 --file=/tmp/frame-#.bin \
+ -f SBGGR10 $(media-ctl -d $MDEV -e "Intel IPU6 ISYS Capture 0")
+
+With the above command, 10 frames are captured at 1280x800 resolution with
+sBGGR10 format. The captured frames are available as /tmp/frame-#.bin files.
+
+Here is another example of IPU6 ISYS RAW and metadata capture from camera
+sensor ov2740 on Lenovo X1 Yoga laptop.
+
+.. code-block:: none
+
+ media-ctl -l "\"ov2740 14-0036\":0 -> \"Intel IPU6 CSI2 1\":0[1]"
+ media-ctl -l "\"Intel IPU6 CSI2 1\":1 -> \"Intel IPU6 ISYS Capture 0\":0[1]"
+ media-ctl -l "\"Intel IPU6 CSI2 1\":2 -> \"Intel IPU6 ISYS Capture 1\":0[1]"
+
+ # set routing
+ media-ctl -R "\"Intel IPU6 CSI2 1\" [0/0->1/0[1],0/1->2/1[1]]"
+
+ media-ctl -V "\"Intel IPU6 CSI2 1\":0/0 [fmt:SGRBG10/1932x1092]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":0/1 [fmt:GENERIC_8/97x1]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":1/0 [fmt:SGRBG10/1932x1092]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":2/1 [fmt:GENERIC_8/97x1]"
+
+ CAPTURE_DEV=$(media-ctl -e "Intel IPU6 ISYS Capture 0")
+ ./yavta --data-prefix -c100 -n5 -I -s1932x1092 --file=/tmp/frame-#.bin \
+ -f SGRBG10 ${CAPTURE_DEV}
+
+ CAPTURE_META=$(media-ctl -e "Intel IPU6 ISYS Capture 1")
+ ./yavta --data-prefix -c100 -n5 -I -s97x1 -B meta-capture \
+ --file=/tmp/meta-#.bin -f GENERIC_8 ${CAPTURE_META}
+
+References
+==========
+
+.. [#f1] https://git.ideasonboard.org/media-ctl.git
+.. [#f2] https://git.ideasonboard.org/yavta.git
diff --git a/Documentation/admin-guide/media/ipu6_isys_graph.svg b/Documentation/admin-guide/media/ipu6_isys_graph.svg
new file mode 100644
index 000000000000..c8539ef320d2
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu6_isys_graph.svg
@@ -0,0 +1,548 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.43.0 (0)
+ -->
+<!-- Title: board Pages: 1 -->
+<svg width="1703pt" height="1473pt"
+ viewBox="0.00 0.00 1703.00 1473.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1469)">
+<title>board</title>
+<polygon fill="white" stroke="transparent" points="-4,4 -4,-1469 1699,-1469 1699,4 -4,4"/>
+<!-- n00000001 -->
+<g id="node1" class="node">
+<title>n00000001</title>
+<polygon fill="yellow" stroke="black" points="832.99,-750.08 629.99,-750.08 629.99,-712.08 832.99,-712.08 832.99,-750.08"/>
+<text text-anchor="middle" x="731.49" y="-734.88" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 0</text>
+<text text-anchor="middle" x="731.49" y="-719.88" font-family="Times,serif" font-size="14.00">/dev/video0</text>
+</g>
+<!-- n00000005 -->
+<g id="node2" class="node">
+<title>n00000005</title>
+<polygon fill="yellow" stroke="black" points="1396.59,-771.88 1193.59,-771.88 1193.59,-733.88 1396.59,-733.88 1396.59,-771.88"/>
+<text text-anchor="middle" x="1295.09" y="-756.68" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 1</text>
+<text text-anchor="middle" x="1295.09" y="-741.68" font-family="Times,serif" font-size="14.00">/dev/video1</text>
+</g>
+<!-- n00000009 -->
+<g id="node3" class="node">
+<title>n00000009</title>
+<polygon fill="yellow" stroke="black" points="1118.52,-690.47 915.52,-690.47 915.52,-652.47 1118.52,-652.47 1118.52,-690.47"/>
+<text text-anchor="middle" x="1017.02" y="-675.27" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 2</text>
+<text text-anchor="middle" x="1017.02" y="-660.27" font-family="Times,serif" font-size="14.00">/dev/video2</text>
+</g>
+<!-- n0000000d -->
+<g id="node4" class="node">
+<title>n0000000d</title>
+<polygon fill="yellow" stroke="black" points="1105.89,-838.84 902.89,-838.84 902.89,-800.84 1105.89,-800.84 1105.89,-838.84"/>
+<text text-anchor="middle" x="1004.39" y="-823.64" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 3</text>
+<text text-anchor="middle" x="1004.39" y="-808.64" font-family="Times,serif" font-size="14.00">/dev/video3</text>
+</g>
+<!-- n00000011 -->
+<g id="node5" class="node">
+<title>n00000011</title>
+<polygon fill="yellow" stroke="black" points="1279.22,-992.95 1076.22,-992.95 1076.22,-954.95 1279.22,-954.95 1279.22,-992.95"/>
+<text text-anchor="middle" x="1177.72" y="-977.75" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 4</text>
+<text text-anchor="middle" x="1177.72" y="-962.75" font-family="Times,serif" font-size="14.00">/dev/video4</text>
+</g>
+<!-- n00000015 -->
+<g id="node6" class="node">
+<title>n00000015</title>
+<polygon fill="yellow" stroke="black" points="939.18,-984.91 736.18,-984.91 736.18,-946.91 939.18,-946.91 939.18,-984.91"/>
+<text text-anchor="middle" x="837.68" y="-969.71" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 5</text>
+<text text-anchor="middle" x="837.68" y="-954.71" font-family="Times,serif" font-size="14.00">/dev/video5</text>
+</g>
+<!-- n00000019 -->
+<g id="node7" class="node">
+<title>n00000019</title>
+<polygon fill="yellow" stroke="black" points="957.87,-527.99 754.87,-527.99 754.87,-489.99 957.87,-489.99 957.87,-527.99"/>
+<text text-anchor="middle" x="856.37" y="-512.79" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 6</text>
+<text text-anchor="middle" x="856.37" y="-497.79" font-family="Times,serif" font-size="14.00">/dev/video6</text>
+</g>
+<!-- n0000001d -->
+<g id="node8" class="node">
+<title>n0000001d</title>
+<polygon fill="yellow" stroke="black" points="1291.02,-542.15 1088.02,-542.15 1088.02,-504.15 1291.02,-504.15 1291.02,-542.15"/>
+<text text-anchor="middle" x="1189.52" y="-526.95" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 7</text>
+<text text-anchor="middle" x="1189.52" y="-511.95" font-family="Times,serif" font-size="14.00">/dev/video7</text>
+</g>
+<!-- n00000021 -->
+<g id="node9" class="node">
+<title>n00000021</title>
+<polygon fill="yellow" stroke="black" points="202.74,-611.46 -0.26,-611.46 -0.26,-573.46 202.74,-573.46 202.74,-611.46"/>
+<text text-anchor="middle" x="101.24" y="-596.26" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 8</text>
+<text text-anchor="middle" x="101.24" y="-581.26" font-family="Times,serif" font-size="14.00">/dev/video8</text>
+</g>
+<!-- n00000025 -->
+<g id="node10" class="node">
+<title>n00000025</title>
+<polygon fill="yellow" stroke="black" points="764.86,-637.89 561.86,-637.89 561.86,-599.89 764.86,-599.89 764.86,-637.89"/>
+<text text-anchor="middle" x="663.36" y="-622.69" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 9</text>
+<text text-anchor="middle" x="663.36" y="-607.69" font-family="Times,serif" font-size="14.00">/dev/video9</text>
+</g>
+<!-- n00000029 -->
+<g id="node11" class="node">
+<title>n00000029</title>
+<polygon fill="yellow" stroke="black" points="358.62,-519.5 146.62,-519.5 146.62,-481.5 358.62,-481.5 358.62,-519.5"/>
+<text text-anchor="middle" x="252.62" y="-504.3" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 10</text>
+<text text-anchor="middle" x="252.62" y="-489.3" font-family="Times,serif" font-size="14.00">/dev/video10</text>
+</g>
+<!-- n0000002d -->
+<g id="node12" class="node">
+<title>n0000002d</title>
+<polygon fill="yellow" stroke="black" points="481.4,-662.59 269.4,-662.59 269.4,-624.59 481.4,-624.59 481.4,-662.59"/>
+<text text-anchor="middle" x="375.4" y="-647.39" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 11</text>
+<text text-anchor="middle" x="375.4" y="-632.39" font-family="Times,serif" font-size="14.00">/dev/video11</text>
+</g>
+<!-- n00000031 -->
+<g id="node13" class="node">
+<title>n00000031</title>
+<polygon fill="yellow" stroke="black" points="637.17,-837.47 425.17,-837.47 425.17,-799.47 637.17,-799.47 637.17,-837.47"/>
+<text text-anchor="middle" x="531.17" y="-822.27" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 12</text>
+<text text-anchor="middle" x="531.17" y="-807.27" font-family="Times,serif" font-size="14.00">/dev/video12</text>
+</g>
+<!-- n00000035 -->
+<g id="node14" class="node">
+<title>n00000035</title>
+<polygon fill="yellow" stroke="black" points="337.75,-833.67 125.75,-833.67 125.75,-795.67 337.75,-795.67 337.75,-833.67"/>
+<text text-anchor="middle" x="231.75" y="-818.47" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 13</text>
+<text text-anchor="middle" x="231.75" y="-803.47" font-family="Times,serif" font-size="14.00">/dev/video13</text>
+</g>
+<!-- n00000039 -->
+<g id="node15" class="node">
+<title>n00000039</title>
+<polygon fill="yellow" stroke="black" points="393.07,-317.96 181.07,-317.96 181.07,-279.96 393.07,-279.96 393.07,-317.96"/>
+<text text-anchor="middle" x="287.07" y="-302.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 14</text>
+<text text-anchor="middle" x="287.07" y="-287.76" font-family="Times,serif" font-size="14.00">/dev/video14</text>
+</g>
+<!-- n0000003d -->
+<g id="node16" class="node">
+<title>n0000003d</title>
+<polygon fill="yellow" stroke="black" points="701.46,-391.04 489.46,-391.04 489.46,-353.04 701.46,-353.04 701.46,-391.04"/>
+<text text-anchor="middle" x="595.46" y="-375.84" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 15</text>
+<text text-anchor="middle" x="595.46" y="-360.84" font-family="Times,serif" font-size="14.00">/dev/video15</text>
+</g>
+<!-- n00000041 -->
+<g id="node17" class="node">
+<title>n00000041</title>
+<polygon fill="yellow" stroke="black" points="212.45,-1228.8 0.45,-1228.8 0.45,-1190.8 212.45,-1190.8 212.45,-1228.8"/>
+<text text-anchor="middle" x="106.45" y="-1213.6" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 16</text>
+<text text-anchor="middle" x="106.45" y="-1198.6" font-family="Times,serif" font-size="14.00">/dev/video16</text>
+</g>
+<!-- n00000045 -->
+<g id="node18" class="node">
+<title>n00000045</title>
+<polygon fill="yellow" stroke="black" points="784.86,-1252.38 572.86,-1252.38 572.86,-1214.38 784.86,-1214.38 784.86,-1252.38"/>
+<text text-anchor="middle" x="678.86" y="-1237.18" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 17</text>
+<text text-anchor="middle" x="678.86" y="-1222.18" font-family="Times,serif" font-size="14.00">/dev/video17</text>
+</g>
+<!-- n00000049 -->
+<g id="node19" class="node">
+<title>n00000049</title>
+<polygon fill="yellow" stroke="black" points="503.14,-1169.96 291.14,-1169.96 291.14,-1131.96 503.14,-1131.96 503.14,-1169.96"/>
+<text text-anchor="middle" x="397.14" y="-1154.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 18</text>
+<text text-anchor="middle" x="397.14" y="-1139.76" font-family="Times,serif" font-size="14.00">/dev/video18</text>
+</g>
+<!-- n0000004d -->
+<g id="node20" class="node">
+<title>n0000004d</title>
+<polygon fill="yellow" stroke="black" points="492.62,-1319.4 280.62,-1319.4 280.62,-1281.4 492.62,-1281.4 492.62,-1319.4"/>
+<text text-anchor="middle" x="386.62" y="-1304.2" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 19</text>
+<text text-anchor="middle" x="386.62" y="-1289.2" font-family="Times,serif" font-size="14.00">/dev/video19</text>
+</g>
+<!-- n00000051 -->
+<g id="node21" class="node">
+<title>n00000051</title>
+<polygon fill="yellow" stroke="black" points="680.74,-1464.66 468.74,-1464.66 468.74,-1426.66 680.74,-1426.66 680.74,-1464.66"/>
+<text text-anchor="middle" x="574.74" y="-1449.46" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 20</text>
+<text text-anchor="middle" x="574.74" y="-1434.46" font-family="Times,serif" font-size="14.00">/dev/video20</text>
+</g>
+<!-- n00000055 -->
+<g id="node22" class="node">
+<title>n00000055</title>
+<polygon fill="yellow" stroke="black" points="302.42,-1452.56 90.42,-1452.56 90.42,-1414.56 302.42,-1414.56 302.42,-1452.56"/>
+<text text-anchor="middle" x="196.42" y="-1437.36" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 21</text>
+<text text-anchor="middle" x="196.42" y="-1422.36" font-family="Times,serif" font-size="14.00">/dev/video21</text>
+</g>
+<!-- n00000059 -->
+<g id="node23" class="node">
+<title>n00000059</title>
+<polygon fill="yellow" stroke="black" points="319.89,-1018.32 107.89,-1018.32 107.89,-980.32 319.89,-980.32 319.89,-1018.32"/>
+<text text-anchor="middle" x="213.89" y="-1003.12" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 22</text>
+<text text-anchor="middle" x="213.89" y="-988.12" font-family="Times,serif" font-size="14.00">/dev/video22</text>
+</g>
+<!-- n0000005d -->
+<g id="node24" class="node">
+<title>n0000005d</title>
+<polygon fill="yellow" stroke="black" points="692.62,-1031.39 480.62,-1031.39 480.62,-993.39 692.62,-993.39 692.62,-1031.39"/>
+<text text-anchor="middle" x="586.62" y="-1016.19" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 23</text>
+<text text-anchor="middle" x="586.62" y="-1001.19" font-family="Times,serif" font-size="14.00">/dev/video23</text>
+</g>
+<!-- n00000061 -->
+<g id="node25" class="node">
+<title>n00000061</title>
+<polygon fill="yellow" stroke="black" points="1122.45,-248.8 910.45,-248.8 910.45,-210.8 1122.45,-210.8 1122.45,-248.8"/>
+<text text-anchor="middle" x="1016.45" y="-233.6" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 24</text>
+<text text-anchor="middle" x="1016.45" y="-218.6" font-family="Times,serif" font-size="14.00">/dev/video24</text>
+</g>
+<!-- n00000065 -->
+<g id="node26" class="node">
+<title>n00000065</title>
+<polygon fill="yellow" stroke="black" points="1694.86,-272.38 1482.86,-272.38 1482.86,-234.38 1694.86,-234.38 1694.86,-272.38"/>
+<text text-anchor="middle" x="1588.86" y="-257.18" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 25</text>
+<text text-anchor="middle" x="1588.86" y="-242.18" font-family="Times,serif" font-size="14.00">/dev/video25</text>
+</g>
+<!-- n00000069 -->
+<g id="node27" class="node">
+<title>n00000069</title>
+<polygon fill="yellow" stroke="black" points="1413.14,-189.96 1201.14,-189.96 1201.14,-151.96 1413.14,-151.96 1413.14,-189.96"/>
+<text text-anchor="middle" x="1307.14" y="-174.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 26</text>
+<text text-anchor="middle" x="1307.14" y="-159.76" font-family="Times,serif" font-size="14.00">/dev/video26</text>
+</g>
+<!-- n0000006d -->
+<g id="node28" class="node">
+<title>n0000006d</title>
+<polygon fill="yellow" stroke="black" points="1402.62,-339.4 1190.62,-339.4 1190.62,-301.4 1402.62,-301.4 1402.62,-339.4"/>
+<text text-anchor="middle" x="1296.62" y="-324.2" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 27</text>
+<text text-anchor="middle" x="1296.62" y="-309.2" font-family="Times,serif" font-size="14.00">/dev/video27</text>
+</g>
+<!-- n00000071 -->
+<g id="node29" class="node">
+<title>n00000071</title>
+<polygon fill="yellow" stroke="black" points="1590.74,-484.66 1378.74,-484.66 1378.74,-446.66 1590.74,-446.66 1590.74,-484.66"/>
+<text text-anchor="middle" x="1484.74" y="-469.46" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 28</text>
+<text text-anchor="middle" x="1484.74" y="-454.46" font-family="Times,serif" font-size="14.00">/dev/video28</text>
+</g>
+<!-- n00000075 -->
+<g id="node30" class="node">
+<title>n00000075</title>
+<polygon fill="yellow" stroke="black" points="1212.42,-472.56 1000.42,-472.56 1000.42,-434.56 1212.42,-434.56 1212.42,-472.56"/>
+<text text-anchor="middle" x="1106.42" y="-457.36" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 29</text>
+<text text-anchor="middle" x="1106.42" y="-442.36" font-family="Times,serif" font-size="14.00">/dev/video29</text>
+</g>
+<!-- n00000079 -->
+<g id="node31" class="node">
+<title>n00000079</title>
+<polygon fill="yellow" stroke="black" points="1229.89,-38.32 1017.89,-38.32 1017.89,-0.32 1229.89,-0.32 1229.89,-38.32"/>
+<text text-anchor="middle" x="1123.89" y="-23.12" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 30</text>
+<text text-anchor="middle" x="1123.89" y="-8.12" font-family="Times,serif" font-size="14.00">/dev/video30</text>
+</g>
+<!-- n0000007d -->
+<g id="node32" class="node">
+<title>n0000007d</title>
+<polygon fill="yellow" stroke="black" points="1602.62,-51.39 1390.62,-51.39 1390.62,-13.39 1602.62,-13.39 1602.62,-51.39"/>
+<text text-anchor="middle" x="1496.62" y="-36.19" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 31</text>
+<text text-anchor="middle" x="1496.62" y="-21.19" font-family="Times,serif" font-size="14.00">/dev/video31</text>
+</g>
+<!-- n00000081 -->
+<g id="node33" class="node">
+<title>n00000081</title>
+<path fill="green" stroke="black" d="M924.28,-700.28C924.28,-700.28 1108.28,-700.28 1108.28,-700.28 1114.28,-700.28 1120.28,-706.28 1120.28,-712.28 1120.28,-712.28 1120.28,-772.28 1120.28,-772.28 1120.28,-778.28 1114.28,-784.28 1108.28,-784.28 1108.28,-784.28 924.28,-784.28 924.28,-784.28 918.28,-784.28 912.28,-778.28 912.28,-772.28 912.28,-772.28 912.28,-712.28 912.28,-712.28 912.28,-706.28 918.28,-700.28 924.28,-700.28"/>
+<text text-anchor="middle" x="1016.28" y="-769.08" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="912.28,-761.28 1120.28,-761.28 "/>
+<text text-anchor="middle" x="1016.28" y="-746.08" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 0</text>
+<text text-anchor="middle" x="1016.28" y="-731.08" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev0</text>
+<polyline fill="none" stroke="black" points="912.28,-723.28 1120.28,-723.28 "/>
+<text text-anchor="middle" x="925.28" y="-708.08" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="938.28,-700.28 938.28,-723.28 "/>
+<text text-anchor="middle" x="951.28" y="-708.08" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="964.28,-700.28 964.28,-723.28 "/>
+<text text-anchor="middle" x="977.28" y="-708.08" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="990.28,-700.28 990.28,-723.28 "/>
+<text text-anchor="middle" x="1003.28" y="-708.08" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="1016.28,-700.28 1016.28,-723.28 "/>
+<text text-anchor="middle" x="1029.28" y="-708.08" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="1042.28,-700.28 1042.28,-723.28 "/>
+<text text-anchor="middle" x="1055.28" y="-708.08" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="1068.28,-700.28 1068.28,-723.28 "/>
+<text text-anchor="middle" x="1081.28" y="-708.08" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="1094.28,-700.28 1094.28,-723.28 "/>
+<text text-anchor="middle" x="1107.28" y="-708.08" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n00000081&#45;&gt;n00000001 -->
+<g id="edge1" class="edge">
+<title>n00000081:port1&#45;&gt;n00000001</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M912.28,-711.28C912.28,-711.28 880.33,-714.78 843.28,-718.84"/>
+<polygon fill="black" stroke="black" points="842.81,-715.37 833.25,-719.94 843.57,-722.33 842.81,-715.37"/>
+</g>
+<!-- n00000081&#45;&gt;n00000005 -->
+<g id="edge2" class="edge">
+<title>n00000081:port2&#45;&gt;n00000005</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M951.38,-700.28C951.38,-700.28 1086.18,-688.61 1123.48,-697.08 1155.93,-704.45 1158.99,-719.67 1190.39,-730.68 1190.49,-730.71 1190.59,-730.75 1190.69,-730.78"/>
+<polygon fill="black" stroke="black" points="1189.45,-734.06 1200.05,-733.86 1191.64,-727.41 1189.45,-734.06"/>
+</g>
+<!-- n00000081&#45;&gt;n00000009 -->
+<g id="edge3" class="edge">
+<title>n00000081:port3&#45;&gt;n00000009</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M977.28,-700.28C977.28,-700.28 979.31,-698.81 982.45,-696.54"/>
+<polygon fill="black" stroke="black" points="984.7,-699.23 990.74,-690.53 980.59,-693.56 984.7,-699.23"/>
+</g>
+<!-- n00000081&#45;&gt;n0000000d -->
+<g id="edge4" class="edge">
+<title>n00000081:port4&#45;&gt;n0000000d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1003.38,-700.26C1003.38,-700.26 916.62,-689.8 909.08,-697.08 880.2,-725.01 885.68,-754.82 909.08,-787.48 910.88,-789.99 918.96,-793.59 929.7,-797.47"/>
+<polygon fill="black" stroke="black" points="928.69,-800.82 939.28,-800.79 930.98,-794.21 928.69,-800.82"/>
+</g>
+<!-- n00000081&#45;&gt;n00000011 -->
+<g id="edge5" class="edge">
+<title>n00000081:port5&#45;&gt;n00000011</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1029.19,-700.26C1029.19,-700.26 1115.28,-690.56 1123.48,-697.08 1198.37,-756.64 1190.55,-886.51 1182.64,-944.71"/>
+<polygon fill="black" stroke="black" points="1179.16,-944.31 1181.18,-954.71 1186.09,-945.32 1179.16,-944.31"/>
+</g>
+<!-- n00000081&#45;&gt;n00000015 -->
+<g id="edge6" class="edge">
+<title>n00000081:port6&#45;&gt;n00000015</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1055.18,-700.28C1055.18,-700.28 915.57,-692.2 909.08,-697.08 834.02,-753.51 831.79,-879.34 835.06,-936.56"/>
+<polygon fill="black" stroke="black" points="831.58,-936.99 835.74,-946.73 838.56,-936.52 831.58,-936.99"/>
+</g>
+<!-- n00000081&#45;&gt;n00000019 -->
+<g id="edge7" class="edge">
+<title>n00000081:port7&#45;&gt;n00000019</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1081.28,-700.28C1081.28,-700.28 916.04,-696.54 912.32,-693.67 864.52,-656.73 856.3,-580.22 855.62,-538.2"/>
+<polygon fill="black" stroke="black" points="859.11,-538.05 855.59,-528.06 852.11,-538.07 859.11,-538.05"/>
+</g>
+<!-- n00000081&#45;&gt;n0000001d -->
+<g id="edge8" class="edge">
+<title>n00000081:port8&#45;&gt;n0000001d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1107.28,-700.28C1107.28,-700.28 1119.29,-696.23 1121.72,-693.67 1159.76,-653.62 1177.38,-589.6 1184.78,-552.46"/>
+<polygon fill="black" stroke="black" points="1188.29,-552.76 1186.69,-542.29 1181.41,-551.47 1188.29,-552.76"/>
+</g>
+<!-- n0000008b -->
+<g id="node34" class="node">
+<title>n0000008b</title>
+<path fill="green" stroke="black" d="M293.1,-532.08C293.1,-532.08 477.1,-532.08 477.1,-532.08 483.1,-532.08 489.1,-538.08 489.1,-544.08 489.1,-544.08 489.1,-604.08 489.1,-604.08 489.1,-610.08 483.1,-616.08 477.1,-616.08 477.1,-616.08 293.1,-616.08 293.1,-616.08 287.1,-616.08 281.1,-610.08 281.1,-604.08 281.1,-604.08 281.1,-544.08 281.1,-544.08 281.1,-538.08 287.1,-532.08 293.1,-532.08"/>
+<text text-anchor="middle" x="385.1" y="-600.88" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="281.1,-593.08 489.1,-593.08 "/>
+<text text-anchor="middle" x="385.1" y="-577.88" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 1</text>
+<text text-anchor="middle" x="385.1" y="-562.88" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev1</text>
+<polyline fill="none" stroke="black" points="281.1,-555.08 489.1,-555.08 "/>
+<text text-anchor="middle" x="294.1" y="-539.88" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="307.1,-532.08 307.1,-555.08 "/>
+<text text-anchor="middle" x="320.1" y="-539.88" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="333.1,-532.08 333.1,-555.08 "/>
+<text text-anchor="middle" x="346.1" y="-539.88" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="359.1,-532.08 359.1,-555.08 "/>
+<text text-anchor="middle" x="372.1" y="-539.88" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="385.1,-532.08 385.1,-555.08 "/>
+<text text-anchor="middle" x="398.1" y="-539.88" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="411.1,-532.08 411.1,-555.08 "/>
+<text text-anchor="middle" x="424.1" y="-539.88" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="437.1,-532.08 437.1,-555.08 "/>
+<text text-anchor="middle" x="450.1" y="-539.88" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="463.1,-532.08 463.1,-555.08 "/>
+<text text-anchor="middle" x="476.1" y="-539.88" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n0000008b&#45;&gt;n00000021 -->
+<g id="edge9" class="edge">
+<title>n0000008b:port1&#45;&gt;n00000021</title>
+<path fill="none" stroke="black" d="M281.1,-543.08C281.1,-543.08 240.1,-560.51 205.94,-570.26 205.35,-570.43 204.77,-570.59 204.18,-570.76"/>
+<polygon fill="black" stroke="black" points="203.2,-567.39 194.47,-573.39 205.03,-574.15 203.2,-567.39"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000025 -->
+<g id="edge10" class="edge">
+<title>n0000008b:port2&#45;&gt;n00000025</title>
+<path fill="none" stroke="black" d="M320.2,-532.07C320.2,-532.07 456.9,-514.37 492.3,-528.88 528.42,-543.68 522.86,-571.78 556.11,-594.53"/>
+<polygon fill="black" stroke="black" points="554.54,-597.67 564.9,-599.88 558.18,-591.69 554.54,-597.67"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000029 -->
+<g id="edge11" class="edge">
+<title>n0000008b:port3&#45;&gt;n00000029</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M346.1,-532.08C346.1,-532.08 333.93,-527.96 318.37,-522.71"/>
+<polygon fill="black" stroke="black" points="319.48,-519.39 308.88,-519.5 317.24,-526.02 319.48,-519.39"/>
+</g>
+<!-- n0000008b&#45;&gt;n0000002d -->
+<g id="edge12" class="edge">
+<title>n0000008b:port4&#45;&gt;n0000002d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M372.19,-532.05C372.19,-532.05 292.97,-514.3 277.9,-528.88 249.01,-556.8 253.16,-587.62 277.9,-619.28 278.34,-619.85 280.33,-620.69 283.45,-621.71"/>
+<polygon fill="black" stroke="black" points="282.71,-625.14 293.29,-624.58 284.67,-618.42 282.71,-625.14"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000031 -->
+<g id="edge13" class="edge">
+<title>n0000008b:port5&#45;&gt;n00000031</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M398,-532.05C398,-532.05 476.28,-515.34 492.3,-528.88 568.49,-593.29 550.55,-729.67 538.14,-789.41"/>
+<polygon fill="black" stroke="black" points="534.69,-788.79 535.99,-799.31 541.53,-790.28 534.69,-788.79"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000035 -->
+<g id="edge14" class="edge">
+<title>n0000008b:port6&#45;&gt;n00000035</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M424,-532.07C424,-532.07 290.37,-518.48 277.9,-528.88 202.27,-591.86 215.34,-725.69 225.66,-785.15"/>
+<polygon fill="black" stroke="black" points="222.29,-786.14 227.54,-795.35 229.17,-784.88 222.29,-786.14"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000039 -->
+<g id="edge15" class="edge">
+<title>n0000008b:port7&#45;&gt;n00000039</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M450.1,-532.08C450.1,-532.08 395.22,-528.13 383.45,-518.65 375.46,-512.21 322.64,-385.46 298.76,-327.47"/>
+<polygon fill="black" stroke="black" points="301.96,-326.05 294.92,-318.14 295.49,-328.72 301.96,-326.05"/>
+</g>
+<!-- n0000008b&#45;&gt;n0000003d -->
+<g id="edge16" class="edge">
+<title>n0000008b:port8&#45;&gt;n0000003d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M476.1,-532.08C476.1,-532.08 522.37,-522.39 526.85,-518.65 563.15,-488.33 581.38,-434.52 589.6,-401.2"/>
+<polygon fill="black" stroke="black" points="593.08,-401.69 591.93,-391.16 586.26,-400.11 593.08,-401.69"/>
+</g>
+<!-- n00000095 -->
+<g id="node35" class="node">
+<title>n00000095</title>
+<path fill="green" stroke="black" d="M301.38,-1180.11C301.38,-1180.11 485.38,-1180.11 485.38,-1180.11 491.38,-1180.11 497.38,-1186.11 497.38,-1192.11 497.38,-1192.11 497.38,-1252.11 497.38,-1252.11 497.38,-1258.11 491.38,-1264.11 485.38,-1264.11 485.38,-1264.11 301.38,-1264.11 301.38,-1264.11 295.38,-1264.11 289.38,-1258.11 289.38,-1252.11 289.38,-1252.11 289.38,-1192.11 289.38,-1192.11 289.38,-1186.11 295.38,-1180.11 301.38,-1180.11"/>
+<text text-anchor="middle" x="393.38" y="-1248.91" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="289.38,-1241.11 497.38,-1241.11 "/>
+<text text-anchor="middle" x="393.38" y="-1225.91" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 2</text>
+<text text-anchor="middle" x="393.38" y="-1210.91" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev2</text>
+<polyline fill="none" stroke="black" points="289.38,-1203.11 497.38,-1203.11 "/>
+<text text-anchor="middle" x="302.38" y="-1187.91" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="315.38,-1180.11 315.38,-1203.11 "/>
+<text text-anchor="middle" x="328.38" y="-1187.91" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="341.38,-1180.11 341.38,-1203.11 "/>
+<text text-anchor="middle" x="354.38" y="-1187.91" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="367.38,-1180.11 367.38,-1203.11 "/>
+<text text-anchor="middle" x="380.38" y="-1187.91" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="393.38,-1180.11 393.38,-1203.11 "/>
+<text text-anchor="middle" x="406.38" y="-1187.91" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="419.38,-1180.11 419.38,-1203.11 "/>
+<text text-anchor="middle" x="432.38" y="-1187.91" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="445.38,-1180.11 445.38,-1203.11 "/>
+<text text-anchor="middle" x="458.38" y="-1187.91" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="471.38,-1180.11 471.38,-1203.11 "/>
+<text text-anchor="middle" x="484.38" y="-1187.91" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n00000095&#45;&gt;n00000041 -->
+<g id="edge17" class="edge">
+<title>n00000095:port1&#45;&gt;n00000041</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M289.38,-1191.11C289.38,-1191.11 258.94,-1194.22 222.89,-1197.91"/>
+<polygon fill="black" stroke="black" points="222.19,-1194.46 212.6,-1198.96 222.9,-1201.42 222.19,-1194.46"/>
+</g>
+<!-- n00000095&#45;&gt;n00000045 -->
+<g id="edge18" class="edge">
+<title>n00000095:port2&#45;&gt;n00000045</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M328.48,-1180.11C328.48,-1180.11 463.26,-1168.53 500.58,-1176.91 534.02,-1184.43 537.24,-1200.06 569.66,-1211.18 569.76,-1211.22 569.86,-1211.25 569.96,-1211.29"/>
+<polygon fill="black" stroke="black" points="568.86,-1214.61 579.45,-1214.34 571,-1207.95 568.86,-1214.61"/>
+</g>
+<!-- n00000095&#45;&gt;n00000049 -->
+<g id="edge19" class="edge">
+<title>n00000095:port3&#45;&gt;n00000049</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M354.38,-1180.11C354.38,-1180.11 356.8,-1178.46 360.49,-1175.94"/>
+<polygon fill="black" stroke="black" points="362.56,-1178.77 368.86,-1170.24 358.62,-1172.98 362.56,-1178.77"/>
+</g>
+<!-- n00000095&#45;&gt;n0000004d -->
+<g id="edge20" class="edge">
+<title>n00000095:port4&#45;&gt;n0000004d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M380.47,-1180.09C380.47,-1180.09 293.71,-1169.63 286.18,-1176.91 257.29,-1204.84 262.63,-1234.76 286.18,-1267.31 288.16,-1270.05 297.33,-1273.96 309.38,-1278.13"/>
+<polygon fill="black" stroke="black" points="308.49,-1281.53 319.09,-1281.36 310.7,-1274.88 308.49,-1281.53"/>
+</g>
+<!-- n00000095&#45;&gt;n00000051 -->
+<g id="edge21" class="edge">
+<title>n00000095:port5&#45;&gt;n00000051</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M406.28,-1180.09C406.28,-1180.09 492.13,-1170.7 500.58,-1176.91 576.41,-1232.66 579.83,-1358.79 577.09,-1416.2"/>
+<polygon fill="black" stroke="black" points="573.59,-1416.23 576.51,-1426.41 580.58,-1416.63 573.59,-1416.23"/>
+</g>
+<!-- n00000095&#45;&gt;n00000055 -->
+<g id="edge22" class="edge">
+<title>n00000095:port6&#45;&gt;n00000055</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M432.28,-1180.11C432.28,-1180.11 292.85,-1172.29 286.18,-1176.91 211.26,-1228.86 198.3,-1348.49 196.45,-1404.12"/>
+<polygon fill="black" stroke="black" points="192.94,-1404.28 196.21,-1414.36 199.94,-1404.44 192.94,-1404.28"/>
+</g>
+<!-- n00000095&#45;&gt;n00000059 -->
+<g id="edge23" class="edge">
+<title>n00000095:port7&#45;&gt;n00000059</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M458.38,-1180.11C458.38,-1180.11 291.84,-1175.85 287.94,-1173.16 239.87,-1139.96 222.85,-1068.83 216.94,-1028.6"/>
+<polygon fill="black" stroke="black" points="220.39,-1028.06 215.6,-1018.61 213.46,-1028.98 220.39,-1028.06"/>
+</g>
+<!-- n00000095&#45;&gt;n0000005d -->
+<g id="edge24" class="edge">
+<title>n00000095:port8&#45;&gt;n0000005d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M484.38,-1180.11C484.38,-1180.11 502.45,-1176.49 506.34,-1173.16 547.25,-1138.2 569.47,-1077.38 579.62,-1041.41"/>
+<polygon fill="black" stroke="black" points="583.06,-1042.09 582.28,-1031.53 576.3,-1040.27 583.06,-1042.09"/>
+</g>
+<!-- n0000009f -->
+<g id="node36" class="node">
+<title>n0000009f</title>
+<path fill="green" stroke="black" d="M1211.38,-200.11C1211.38,-200.11 1395.38,-200.11 1395.38,-200.11 1401.38,-200.11 1407.38,-206.11 1407.38,-212.11 1407.38,-212.11 1407.38,-272.11 1407.38,-272.11 1407.38,-278.11 1401.38,-284.11 1395.38,-284.11 1395.38,-284.11 1211.38,-284.11 1211.38,-284.11 1205.38,-284.11 1199.38,-278.11 1199.38,-272.11 1199.38,-272.11 1199.38,-212.11 1199.38,-212.11 1199.38,-206.11 1205.38,-200.11 1211.38,-200.11"/>
+<text text-anchor="middle" x="1303.38" y="-268.91" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="1199.38,-261.11 1407.38,-261.11 "/>
+<text text-anchor="middle" x="1303.38" y="-245.91" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 3</text>
+<text text-anchor="middle" x="1303.38" y="-230.91" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev3</text>
+<polyline fill="none" stroke="black" points="1199.38,-223.11 1407.38,-223.11 "/>
+<text text-anchor="middle" x="1212.38" y="-207.91" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="1225.38,-200.11 1225.38,-223.11 "/>
+<text text-anchor="middle" x="1238.38" y="-207.91" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="1251.38,-200.11 1251.38,-223.11 "/>
+<text text-anchor="middle" x="1264.38" y="-207.91" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="1277.38,-200.11 1277.38,-223.11 "/>
+<text text-anchor="middle" x="1290.38" y="-207.91" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="1303.38,-200.11 1303.38,-223.11 "/>
+<text text-anchor="middle" x="1316.38" y="-207.91" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="1329.38,-200.11 1329.38,-223.11 "/>
+<text text-anchor="middle" x="1342.38" y="-207.91" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="1355.38,-200.11 1355.38,-223.11 "/>
+<text text-anchor="middle" x="1368.38" y="-207.91" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="1381.38,-200.11 1381.38,-223.11 "/>
+<text text-anchor="middle" x="1394.38" y="-207.91" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n0000009f&#45;&gt;n00000061 -->
+<g id="edge25" class="edge">
+<title>n0000009f:port1&#45;&gt;n00000061</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1199.38,-211.11C1199.38,-211.11 1168.94,-214.22 1132.89,-217.91"/>
+<polygon fill="black" stroke="black" points="1132.19,-214.46 1122.6,-218.96 1132.9,-221.42 1132.19,-214.46"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000065 -->
+<g id="edge26" class="edge">
+<title>n0000009f:port2&#45;&gt;n00000065</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1238.48,-200.11C1238.48,-200.11 1373.26,-188.53 1410.58,-196.91 1444.02,-204.43 1447.24,-220.06 1479.66,-231.18 1479.76,-231.22 1479.86,-231.25 1479.96,-231.29"/>
+<polygon fill="black" stroke="black" points="1478.86,-234.61 1489.45,-234.34 1481,-227.95 1478.86,-234.61"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000069 -->
+<g id="edge27" class="edge">
+<title>n0000009f:port3&#45;&gt;n00000069</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1264.38,-200.11C1264.38,-200.11 1266.8,-198.46 1270.49,-195.94"/>
+<polygon fill="black" stroke="black" points="1272.56,-198.77 1278.86,-190.24 1268.62,-192.98 1272.56,-198.77"/>
+</g>
+<!-- n0000009f&#45;&gt;n0000006d -->
+<g id="edge28" class="edge">
+<title>n0000009f:port4&#45;&gt;n0000006d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1290.47,-200.09C1290.47,-200.09 1203.71,-189.63 1196.18,-196.91 1167.29,-224.84 1172.63,-254.76 1196.18,-287.31 1198.16,-290.05 1207.33,-293.96 1219.38,-298.13"/>
+<polygon fill="black" stroke="black" points="1218.49,-301.53 1229.09,-301.36 1220.7,-294.88 1218.49,-301.53"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000071 -->
+<g id="edge29" class="edge">
+<title>n0000009f:port5&#45;&gt;n00000071</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1316.28,-200.09C1316.28,-200.09 1402.13,-190.7 1410.58,-196.91 1486.41,-252.66 1489.83,-378.79 1487.09,-436.2"/>
+<polygon fill="black" stroke="black" points="1483.59,-436.23 1486.51,-446.41 1490.58,-436.63 1483.59,-436.23"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000075 -->
+<g id="edge30" class="edge">
+<title>n0000009f:port6&#45;&gt;n00000075</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1342.28,-200.11C1342.28,-200.11 1202.85,-192.29 1196.18,-196.91 1121.26,-248.86 1108.3,-368.49 1106.45,-424.12"/>
+<polygon fill="black" stroke="black" points="1102.94,-424.28 1106.21,-434.36 1109.94,-424.44 1102.94,-424.28"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000079 -->
+<g id="edge31" class="edge">
+<title>n0000009f:port7&#45;&gt;n00000079</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1368.38,-200.11C1368.38,-200.11 1201.84,-195.85 1197.94,-193.16 1149.87,-159.96 1132.85,-88.83 1126.94,-48.6"/>
+<polygon fill="black" stroke="black" points="1130.39,-48.06 1125.6,-38.61 1123.46,-48.98 1130.39,-48.06"/>
+</g>
+<!-- n0000009f&#45;&gt;n0000007d -->
+<g id="edge32" class="edge">
+<title>n0000009f:port8&#45;&gt;n0000007d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1394.38,-200.11C1394.38,-200.11 1412.45,-196.49 1416.34,-193.16 1457.25,-158.2 1479.47,-97.38 1489.62,-61.41"/>
+<polygon fill="black" stroke="black" points="1493.06,-62.09 1492.28,-51.53 1486.3,-60.27 1493.06,-62.09"/>
+</g>
+<!-- n000000e9 -->
+<g id="node37" class="node">
+<title>n000000e9</title>
+<path fill="green" stroke="black" d="M398.65,-431.45C398.65,-431.45 511.65,-431.45 511.65,-431.45 517.65,-431.45 523.65,-437.45 523.65,-443.45 523.65,-443.45 523.65,-503.45 523.65,-503.45 523.65,-509.45 517.65,-515.45 511.65,-515.45 511.65,-515.45 398.65,-515.45 398.65,-515.45 392.65,-515.45 386.65,-509.45 386.65,-503.45 386.65,-503.45 386.65,-443.45 386.65,-443.45 386.65,-437.45 392.65,-431.45 398.65,-431.45"/>
+<text text-anchor="middle" x="420.65" y="-500.25" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="454.65,-492.45 454.65,-515.45 "/>
+<text text-anchor="middle" x="489.15" y="-500.25" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="386.65,-492.45 523.65,-492.45 "/>
+<text text-anchor="middle" x="455.15" y="-477.25" font-family="Times,serif" font-size="14.00">ov2740 4&#45;0036</text>
+<text text-anchor="middle" x="455.15" y="-462.25" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev4</text>
+<polyline fill="none" stroke="black" points="386.65,-454.45 523.65,-454.45 "/>
+<text text-anchor="middle" x="455.15" y="-439.25" font-family="Times,serif" font-size="14.00">0</text>
+</g>
+<!-- n000000e9&#45;&gt;n0000008b -->
+<g id="edge33" class="edge">
+<title>n000000e9:port0&#45;&gt;n0000008b:port0</title>
+<path fill="none" stroke="black" stroke-width="2" d="M386.14,-442.55C386.14,-442.55 361.11,-493.23 383.45,-518.65 391.47,-527.78 484.31,-519.72 492.3,-528.88 508.64,-547.6 499.26,-579.87 493.12,-595.68"/>
+<polygon fill="black" stroke="black" stroke-width="2" points="489.86,-594.41 489.11,-604.98 496.29,-597.19 489.86,-594.41"/>
+</g>
+</g>
+</svg>
diff --git a/Documentation/media/v4l-drivers/ivtv-cardlist.rst b/Documentation/admin-guide/media/ivtv-cardlist.rst
index 022dca80c2c8..0ffc3b71ae60 100644
--- a/Documentation/media/v4l-drivers/ivtv-cardlist.rst
+++ b/Documentation/admin-guide/media/ivtv-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
IVTV cards list
===============
@@ -10,7 +12,7 @@ IVTV cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- Hauppauge WinTV PVR-250
diff --git a/Documentation/media/v4l-drivers/ivtv.rst b/Documentation/admin-guide/media/ivtv.rst
index 3ba464c4f9bf..8b65ac3f5321 100644
--- a/Documentation/media/v4l-drivers/ivtv.rst
+++ b/Documentation/admin-guide/media/ivtv.rst
@@ -1,8 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
The ivtv driver
===============
-Author: Hans Verkuil <hverkuil@xs4all.nl>
+Author: Hans Verkuil <hverkuil@kernel.org>
This is a v4l2 device driver for the Conexant cx23415/6 MPEG encoder/decoder.
The cx23415 can do both encoding and decoding, the cx23416 can only do MPEG
@@ -158,7 +159,7 @@ whatever). Otherwise the device numbers can get confusing. The ivtv
Read-only
The raw YUV video output from the current video input. The YUV format
- is non-standard (V4L2_PIX_FMT_HM12).
+ is a 16x16 linear tiled NV12 format (V4L2_PIX_FMT_NV12_16L16)
Note that the YUV and PCM streams are not synchronized, so they are of
limited use.
diff --git a/Documentation/media/dvb-drivers/lmedm04.rst b/Documentation/admin-guide/media/lmedm04.rst
index e8913d4481a0..a6ee33413748 100644
--- a/Documentation/media/dvb-drivers/lmedm04.rst
+++ b/Documentation/admin-guide/media/lmedm04.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Firmware files for lmedm04 cards
================================
diff --git a/Documentation/admin-guide/media/mgb4.rst b/Documentation/admin-guide/media/mgb4.rst
new file mode 100644
index 000000000000..5ac69b833a7a
--- /dev/null
+++ b/Documentation/admin-guide/media/mgb4.rst
@@ -0,0 +1,385 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+The mgb4 driver
+===============
+
+Copyright |copy| 2023 - 2025 Digiteq Automotive
+ author: Martin Tůma <martin.tuma@digiteqautomotive.com>
+
+This is a v4l2 device driver for the Digiteq Automotive FrameGrabber 4, a PCIe
+card capable of capturing and generating FPD-Link III and GMSL2/3 video streams
+as used in the automotive industry.
+
+sysfs interface
+---------------
+
+The mgb4 driver provides a sysfs interface, that is used to configure video
+stream related parameters (some of them must be set properly before the v4l2
+device can be opened) and obtain the video device/stream status.
+
+There are two types of parameters - global / PCI card related, found under
+``/sys/class/video4linux/videoX/device`` and module specific found under
+``/sys/class/video4linux/videoX``.
+
+Global (PCI card) parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**module_type** (R):
+ Module type.
+
+ | 0 - No module present
+ | 1 - FPDL3
+ | 2 - GMSL (one serializer, two daisy chained deserializers)
+ | 3 - GMSL (one serializer, two deserializers)
+ | 4 - GMSL (two deserializers with two daisy chain outputs)
+
+**module_version** (R):
+ Module version number. Zero in case of a missing module.
+
+**fw_type** (R):
+ Firmware type.
+
+ | 1 - FPDL3
+ | 2 - GMSL
+
+**fw_version** (R):
+ Firmware version number.
+
+**serial_number** (R):
+ Card serial number. The format is::
+
+ PRODUCT-REVISION-SERIES-SERIAL
+
+ where each component is a 8b number.
+
+Common FPDL3/GMSL input parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**input_id** (R):
+ Input number ID, zero based.
+
+**oldi_lane_width** (RW):
+ Number of deserializer output lanes.
+
+ | 0 - single
+ | 1 - dual (default)
+
+**color_mapping** (RW):
+ Mapping of the incoming bits in the signal to the colour bits of the pixels.
+
+ | 0 - OLDI/JEIDA
+ | 1 - SPWG/VESA (default)
+
+**link_status** (R):
+ Video link status. If the link is locked, chips are properly connected and
+ communicating at the same speed and protocol. The link can be locked without
+ an active video stream.
+
+ A value of 0 is equivalent to the V4L2_IN_ST_NO_SYNC flag of the V4L2
+ VIDIOC_ENUMINPUT status bits.
+
+ | 0 - unlocked
+ | 1 - locked
+
+**stream_status** (R):
+ Video stream status. A stream is detected if the link is locked, the input
+ pixel clock is running and the DE signal is moving.
+
+ A value of 0 is equivalent to the V4L2_IN_ST_NO_SIGNAL flag of the V4L2
+ VIDIOC_ENUMINPUT status bits.
+
+ | 0 - not detected
+ | 1 - detected
+
+**video_width** (R):
+ Video stream width. This is the actual width as detected by the HW.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in the width
+ field of the v4l2_bt_timings struct.
+
+**video_height** (R):
+ Video stream height. This is the actual height as detected by the HW.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in the height
+ field of the v4l2_bt_timings struct.
+
+**vsync_status** (R):
+ The type of VSYNC pulses as detected by the video format detector.
+
+ The value is equivalent to the flags returned by VIDIOC_QUERY_DV_TIMINGS in
+ the polarities field of the v4l2_bt_timings struct.
+
+ | 0 - active low
+ | 1 - active high
+ | 2 - not available
+
+**hsync_status** (R):
+ The type of HSYNC pulses as detected by the video format detector.
+
+ The value is equivalent to the flags returned by VIDIOC_QUERY_DV_TIMINGS in
+ the polarities field of the v4l2_bt_timings struct.
+
+ | 0 - active low
+ | 1 - active high
+ | 2 - not available
+
+**vsync_gap_length** (RW):
+ If the incoming video signal does not contain synchronization VSYNC and
+ HSYNC pulses, these must be generated internally in the FPGA to achieve
+ the correct frame ordering. This value indicates, how many "empty" pixels
+ (pixels with deasserted Data Enable signal) are necessary to generate the
+ internal VSYNC pulse.
+
+**hsync_gap_length** (RW):
+ If the incoming video signal does not contain synchronization VSYNC and
+ HSYNC pulses, these must be generated internally in the FPGA to achieve
+ the correct frame ordering. This value indicates, how many "empty" pixels
+ (pixels with deasserted Data Enable signal) are necessary to generate the
+ internal HSYNC pulse. The value must be greater than 1 and smaller than
+ vsync_gap_length.
+
+**pclk_frequency** (R):
+ Input pixel clock frequency in kHz.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the pixelclock field of the v4l2_bt_timings struct.
+
+ *Note: The frequency_range parameter must be set properly first to get
+ a valid frequency here.*
+
+**hsync_width** (R):
+ Width of the HSYNC signal in PCLK clock ticks.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the hsync field of the v4l2_bt_timings struct.
+
+**vsync_width** (R):
+ Width of the VSYNC signal in PCLK clock ticks.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the vsync field of the v4l2_bt_timings struct.
+
+**hback_porch** (R):
+ Number of PCLK pulses between deassertion of the HSYNC signal and the first
+ valid pixel in the video line (marked by DE=1).
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the hbackporch field of the v4l2_bt_timings struct.
+
+**hfront_porch** (R):
+ Number of PCLK pulses between the end of the last valid pixel in the video
+ line (marked by DE=1) and assertion of the HSYNC signal.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the hfrontporch field of the v4l2_bt_timings struct.
+
+**vback_porch** (R):
+ Number of video lines between deassertion of the VSYNC signal and the video
+ line with the first valid pixel (marked by DE=1).
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the vbackporch field of the v4l2_bt_timings struct.
+
+**vfront_porch** (R):
+ Number of video lines between the end of the last valid pixel line (marked
+ by DE=1) and assertion of the VSYNC signal.
+
+ The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in
+ the vfrontporch field of the v4l2_bt_timings struct.
+
+**frequency_range** (RW)
+ PLL frequency range of the OLDI input clock generator. The PLL frequency is
+ derived from the Pixel Clock Frequency (PCLK) and is equal to PCLK if
+ oldi_lane_width is set to "single" and PCLK/2 if oldi_lane_width is set to
+ "dual".
+
+ | 0 - PLL < 50MHz (default)
+ | 1 - PLL >= 50MHz
+
+ *Note: This parameter can not be changed while the input v4l2 device is
+ open.*
+
+Common FPDL3/GMSL output parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**output_id** (R):
+ Output number ID, zero based.
+
+**video_source** (RW):
+ Output video source. If set to 0 or 1, the source is the corresponding card
+ input and the v4l2 output devices are disabled. If set to 2 or 3, the source
+ is the corresponding v4l2 video output device. The default is
+ the corresponding v4l2 output, i.e. 2 for OUT1 and 3 for OUT2.
+
+ | 0 - input 0
+ | 1 - input 1
+ | 2 - v4l2 output 0
+ | 3 - v4l2 output 1
+
+ *Note: This parameter can not be changed while ANY of the input/output v4l2
+ devices is open.*
+
+**display_width** (RW):
+ Display width. There is no autodetection of the connected display, so the
+ proper value must be set before the start of streaming. The default width
+ is 1280.
+
+ *Note: This parameter can not be changed while the output v4l2 device is
+ open.*
+
+**display_height** (RW):
+ Display height. There is no autodetection of the connected display, so the
+ proper value must be set before the start of streaming. The default height
+ is 640.
+
+ *Note: This parameter can not be changed while the output v4l2 device is
+ open.*
+
+**frame_rate** (RW):
+ Output video signal frame rate limit in frames per second. Due to
+ the limited output pixel clock steps, the card can not always generate
+ a frame rate perfectly matching the value required by the connected display.
+ Using this parameter one can limit the frame rate by "crippling" the signal
+ so that the lines are not equal (the porches of the last line differ) but
+ the signal appears like having the exact frame rate to the connected display.
+ The default frame rate limit is 60Hz.
+
+**hsync_polarity** (RW):
+ HSYNC signal polarity.
+
+ | 0 - active low (default)
+ | 1 - active high
+
+**vsync_polarity** (RW):
+ VSYNC signal polarity.
+
+ | 0 - active low (default)
+ | 1 - active high
+
+**de_polarity** (RW):
+ DE signal polarity.
+
+ | 0 - active low
+ | 1 - active high (default)
+
+**pclk_frequency** (RW):
+ Output pixel clock frequency. Allowed values are between 25000-190000(kHz)
+ and there is a non-linear stepping between two consecutive allowed
+ frequencies. The driver finds the nearest allowed frequency to the given
+ value and sets it. When reading this property, you get the exact
+ frequency set by the driver. The default frequency is 61150kHz.
+
+ *Note: This parameter can not be changed while the output v4l2 device is
+ open.*
+
+**hsync_width** (RW):
+ Width of the HSYNC signal in pixels. The default value is 40.
+
+**vsync_width** (RW):
+ Width of the VSYNC signal in video lines. The default value is 20.
+
+**hback_porch** (RW):
+ Number of PCLK pulses between deassertion of the HSYNC signal and the first
+ valid pixel in the video line (marked by DE=1). The default value is 50.
+
+**hfront_porch** (RW):
+ Number of PCLK pulses between the end of the last valid pixel in the video
+ line (marked by DE=1) and assertion of the HSYNC signal. The default value
+ is 50.
+
+**vback_porch** (RW):
+ Number of video lines between deassertion of the VSYNC signal and the video
+ line with the first valid pixel (marked by DE=1). The default value is 31.
+
+**vfront_porch** (RW):
+ Number of video lines between the end of the last valid pixel line (marked
+ by DE=1) and assertion of the VSYNC signal. The default value is 30.
+
+FPDL3 specific input parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**fpdl3_input_width** (RW):
+ Number of deserializer input lines.
+
+ | 0 - auto (default)
+ | 1 - single
+ | 2 - dual
+
+FPDL3 specific output parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**fpdl3_output_width** (RW):
+ Number of serializer output lines.
+
+ | 0 - auto (default)
+ | 1 - single
+ | 2 - dual
+
+GMSL specific input parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**gmsl_mode** (RW):
+ GMSL speed mode.
+
+ | 0 - 12Gb/s (default)
+ | 1 - 6Gb/s
+ | 2 - 3Gb/s
+ | 3 - 1.5Gb/s
+
+**gmsl_stream_id** (RW):
+ The GMSL multi-stream contains up to four video streams. This parameter
+ selects which stream is captured by the video input. The value is the
+ zero-based index of the stream. The default stream id is 0.
+
+ *Note: This parameter can not be changed while the input v4l2 device is
+ open.*
+
+**gmsl_fec** (RW):
+ GMSL Forward Error Correction (FEC).
+
+ | 0 - disabled
+ | 1 - enabled (default)
+
+MTD partitions
+--------------
+
+The mgb4 driver creates a MTD device with two partitions:
+ - mgb4-fw.X - FPGA firmware.
+ - mgb4-data.X - Factory settings, e.g. card serial number.
+
+The *mgb4-fw* partition is writable and is used for FW updates, *mgb4-data* is
+read-only. The *X* attached to the partition name represents the card number.
+Depending on the CONFIG_MTD_PARTITIONED_MASTER kernel configuration, you may
+also have a third partition named *mgb4-flash* available in the system. This
+partition represents the whole, unpartitioned, card's FLASH memory and one should
+not fiddle with it...
+
+IIO (triggers)
+--------------
+
+The mgb4 driver creates an Industrial I/O (IIO) device that provides trigger and
+signal level status capability. The following scan elements are available:
+
+**activity**:
+ The trigger levels and pending status.
+
+ | bit 1 - trigger 1 pending
+ | bit 2 - trigger 2 pending
+ | bit 5 - trigger 1 level
+ | bit 6 - trigger 2 level
+
+**timestamp**:
+ The trigger event timestamp.
+
+The iio device can operate either in "raw" mode where you can fetch the signal
+levels (activity bits 5 and 6) using sysfs access or in triggered buffer mode.
+In the triggered buffer mode you can follow the signal level changes (activity
+bits 1 and 2) using the iio device in /dev. If you enable the timestamps, you
+will also get the exact trigger event time that can be matched to a video frame
+(every mgb4 video frame has a timestamp with the same clock source).
+
+*Note: although the activity sample always contains all the status bits, it makes
+no sense to get the pending bits in raw mode or the level bits in the triggered
+buffer mode - the values do not represent valid data in such case.*
diff --git a/Documentation/admin-guide/media/misc-cardlist.rst b/Documentation/admin-guide/media/misc-cardlist.rst
new file mode 100644
index 000000000000..4c26bcfccd61
--- /dev/null
+++ b/Documentation/admin-guide/media/misc-cardlist.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Firewire driver
+===============
+
+The media subsystem also provides a firewire driver for digital TV:
+
+======= =====================
+Driver Name
+======= =====================
+firedtv FireDTV and FloppyDTV
+======= =====================
+
+Test drivers
+============
+
+In order to test userspace applications, there's a number of virtual
+drivers, with provide test functionality, simulating real hardware
+devices:
+
+======= ======================================
+Driver Name
+======= ======================================
+vicodec Virtual Codec Driver
+vim2m Virtual Memory-to-Memory Driver
+vimc Virtual Media Controller Driver (VIMC)
+vivid Virtual Video Test Driver
+======= ======================================
diff --git a/Documentation/admin-guide/media/omap3isp.rst b/Documentation/admin-guide/media/omap3isp.rst
new file mode 100644
index 000000000000..f32e7375a1a2
--- /dev/null
+++ b/Documentation/admin-guide/media/omap3isp.rst
@@ -0,0 +1,92 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+OMAP 3 Image Signal Processor (ISP) driver
+==========================================
+
+Copyright |copy| 2010 Nokia Corporation
+
+Copyright |copy| 2009 Texas Instruments, Inc.
+
+Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>,
+Sakari Ailus <sakari.ailus@iki.fi>, David Cohen <dacohen@gmail.com>
+
+
+Introduction
+------------
+
+This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
+driver located under drivers/media/platform/ti/omap3isp. The original driver was
+written by Texas Instruments but since that it has been rewritten (twice) at
+Nokia.
+
+The driver has been successfully used on the following versions of OMAP 3:
+
+- 3430
+- 3530
+- 3630
+
+The driver implements V4L2, Media controller and v4l2_subdev interfaces.
+Sensor, lens and flash drivers using the v4l2_subdev interface in the kernel
+are supported.
+
+
+Split to subdevs
+----------------
+
+The OMAP 3 ISP is split into V4L2 subdevs, each of the blocks inside the ISP
+having one subdev to represent it. Each of the subdevs provide a V4L2 subdev
+interface to userspace.
+
+- OMAP3 ISP CCP2
+- OMAP3 ISP CSI2a
+- OMAP3 ISP CCDC
+- OMAP3 ISP preview
+- OMAP3 ISP resizer
+- OMAP3 ISP AEWB
+- OMAP3 ISP AF
+- OMAP3 ISP histogram
+
+Each possible link in the ISP is modelled by a link in the Media controller
+interface. For an example program see [#]_.
+
+
+Controlling the OMAP 3 ISP
+--------------------------
+
+In general, the settings given to the OMAP 3 ISP take effect at the beginning
+of the following frame. This is done when the module becomes idle during the
+vertical blanking period on the sensor. In memory-to-memory operation the pipe
+is run one frame at a time. Applying the settings is done between the frames.
+
+All the blocks in the ISP, excluding the CSI-2 and possibly the CCP2 receiver,
+insist on receiving complete frames. Sensors must thus never send the ISP
+partial frames.
+
+Autoidle does have issues with some ISP blocks on the 3430, at least.
+Autoidle is only enabled on 3630 when the omap3isp module parameter autoidle
+is non-zero.
+
+Technical reference manuals (TRMs) and other documentation
+----------------------------------------------------------
+
+OMAP 3430 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP34xx_ES3.1.x_PUBLIC_TRM_vZM.zip>
+Referenced 2011-03-05.
+
+OMAP 35xx TRM:
+<URL:http://www.ti.com/litv/pdf/spruf98o> Referenced 2011-03-05.
+
+OMAP 3630 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP36xx_ES1.x_PUBLIC_TRM_vQ.zip>
+Referenced 2011-03-05.
+
+DM 3730 TRM:
+<URL:http://www.ti.com/litv/pdf/sprugn4h> Referenced 2011-03-06.
+
+
+References
+----------
+
+.. [#] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
diff --git a/Documentation/media/dvb-drivers/opera-firmware.rst b/Documentation/admin-guide/media/opera-firmware.rst
index 41236b43c124..fab3581551de 100644
--- a/Documentation/media/dvb-drivers/opera-firmware.rst
+++ b/Documentation/admin-guide/media/opera-firmware.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Opera firmware
==============
diff --git a/Documentation/admin-guide/media/other-usb-cardlist.rst b/Documentation/admin-guide/media/other-usb-cardlist.rst
new file mode 100644
index 000000000000..fb88db50e861
--- /dev/null
+++ b/Documentation/admin-guide/media/other-usb-cardlist.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Other USB cards list
+====================
+
+================ ====================================== =====================
+Driver Card name USB IDs
+================ ====================================== =====================
+airspy Airspy 1d50:60a1
+dvb-as102 Abilis Systems DVB-Titan 1BA6:0001
+dvb-as102 PCTV Systems picoStick (74e) 2013:0246
+dvb-as102 Elgato EyeTV DTT Deluxe 0fd9:002c
+dvb-as102 nBox DVB-T Dongle 0b89:0007
+dvb-as102 Sky IT Digital Key (green led) 2137:0001
+b2c2-flexcop-usb Technisat/B2C2 FlexCop II/IIb/III 0af7:0101
+ Digital TV
+go7007 WIS GO7007 MPEG encoder 1943:a250, 093b:a002,
+ 093b:a004, 0eb1:6666,
+ 0eb1:6668
+hackrf HackRF Software Decoder Radio 1d50:6089
+hdpvr Hauppauge HD PVR 2040:4900, 2040:4901,
+ 2040:4902, 2040:4982,
+ 2040:4903
+msi2500 Mirics MSi3101 SDR Dongle 1df7:2500, 2040:d300
+pvrusb2 Hauppauge WinTV-PVR USB2 2040:2900, 2040:2950,
+ 2040:2400, 1164:0622,
+ 1164:0602, 11ba:1003,
+ 11ba:1001, 2040:7300,
+ 2040:7500, 2040:7501,
+ 0ccd:0039, 2040:7502,
+ 2040:7510
+pwc Creative Webcam 5 041E:400C
+pwc Creative Webcam Pro Ex 041E:4011
+pwc Logitech QuickCam 3000 Pro 046D:08B0
+pwc Logitech QuickCam Notebook Pro 046D:08B1
+pwc Logitech QuickCam 4000 Pro 046D:08B2
+pwc Logitech QuickCam Zoom (old model) 046D:08B3
+pwc Logitech QuickCam Zoom (new model) 046D:08B4
+pwc Logitech QuickCam Orbit/Sphere 046D:08B5
+pwc Logitech/Cisco VT Camera 046D:08B6
+pwc Logitech ViewPort AV 100 046D:08B7
+pwc Logitech QuickCam 046D:08B8
+pwc Philips PCA645VC 0471:0302
+pwc Philips PCA646VC 0471:0303
+pwc Askey VC010 type 2 0471:0304
+pwc Philips PCVC675K (Vesta) 0471:0307
+pwc Philips PCVC680K (Vesta Pro) 0471:0308
+pwc Philips PCVC690K (Vesta Pro Scan) 0471:030C
+pwc Philips PCVC730K (ToUCam Fun), 0471:0310
+ PCVC830 (ToUCam II)
+pwc Philips PCVC740K (ToUCam Pro), 0471:0311
+ PCVC840 (ToUCam II)
+pwc Philips PCVC750K (ToUCam Pro Scan) 0471:0312
+pwc Philips PCVC720K/40 (ToUCam XS) 0471:0313
+pwc Philips SPC 900NC 0471:0329
+pwc Philips SPC 880NC 0471:032C
+pwc Sotec Afina Eye 04CC:8116
+pwc Samsung MPC-C10 055D:9000
+pwc Samsung MPC-C30 055D:9001
+pwc Samsung SNC-35E (Ver3.0) 055D:9002
+pwc Askey VC010 type 1 069A:0001
+pwc AME Co. Afina Eye 06BE:8116
+pwc Visionite VCS-UC300 0d81:1900
+pwc Visionite VCS-UM100 0d81:1910
+s2255drv Sensoray 2255 1943:2255, 1943:2257
+stk1160 STK1160 USB video capture dongle 05e1:0408
+dvb-ttusb-budget Technotrend/Hauppauge Nova-USB devices 0b48:1003, 0b48:1004,
+ 0b48:1005
+dvb-ttusb_dec Technotrend/Hauppauge MPEG decoder 0b48:1006
+ DEC3000-s
+dvb-ttusb_dec Technotrend/Hauppauge MPEG decoder 0b48:1007
+dvb-ttusb_dec Technotrend/Hauppauge MPEG decoder 0b48:1008
+ DEC2000-t
+dvb-ttusb_dec Technotrend/Hauppauge MPEG decoder
+ DEC2540-t 0b48:1009
+usbtv Fushicai USBTV007 Audio-Video Grabber 1b71:3002, 1f71:3301,
+ 1f71:3306
+================ ====================================== =====================
diff --git a/Documentation/admin-guide/media/pci-cardlist.rst b/Documentation/admin-guide/media/pci-cardlist.rst
new file mode 100644
index 000000000000..239879634ea5
--- /dev/null
+++ b/Documentation/admin-guide/media/pci-cardlist.rst
@@ -0,0 +1,108 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+PCI drivers
+===========
+
+The PCI boards are identified by an identification called PCI ID. The PCI ID
+is actually composed by two parts:
+
+ - Vendor ID and device ID;
+ - Subsystem ID and Subsystem device ID;
+
+The ``lspci -nn`` command allows identifying the vendor/device PCI IDs:
+
+.. code-block:: none
+ :emphasize-lines: 3
+
+ $ lspci -nn
+ ...
+ 00:0a.0 Multimedia controller [0480]: Philips Semiconductors SAA7131/SAA7133/SAA7135 Video Broadcast Decoder [1131:7133] (rev d1)
+ 00:0b.0 Multimedia controller [0480]: Brooktree Corporation Bt878 Audio Capture [109e:0878] (rev 11)
+ 01:00.0 Multimedia video controller [0400]: Conexant Systems, Inc. CX23887/8 PCIe Broadcast Audio and Video Decoder with 3D Comb [14f1:8880] (rev 0f)
+ 02:01.0 Multimedia video controller [0400]: Internext Compression Inc iTVC15 (CX23415) Video Decoder [4444:0803] (rev 01)
+ 02:02.0 Multimedia video controller [0400]: Conexant Systems, Inc. CX23418 Single-Chip MPEG-2 Encoder with Integrated Analog Video/Broadcast Audio Decoder [14f1:5b7a]
+ 02:03.0 Multimedia video controller [0400]: Brooktree Corporation Bt878 Video Capture [109e:036e] (rev 11)
+ ...
+
+The subsystem IDs can be obtained using ``lspci -vn``
+
+.. code-block:: none
+ :emphasize-lines: 4
+
+ $ lspci -vn
+ ...
+ 00:0a.0 0480: 1131:7133 (rev d1)
+ Subsystem: 1461:f01d
+ Flags: bus master, medium devsel, latency 32, IRQ 209
+ Memory at e2002000 (32-bit, non-prefetchable) [size=2K]
+ Capabilities: [40] Power Management version 2
+ ...
+
+At the above example, the first card uses the ``saa7134`` driver, and
+has a vendor/device PCI ID equal to ``1131:7133`` and a PCI subsystem
+ID equal to ``1461:f01d`` (see :doc:`Saa7134 card list<saa7134-cardlist>`).
+
+Unfortunately, sometimes the same PCI subsystem ID is used by different
+products. So, several media drivers allow passing a ``card=`` parameter,
+in order to setup a card number that would match the correct settings for
+an specific board.
+
+The current supported PCI/PCIe cards (not including staging drivers) are
+listed below\ [#]_.
+
+.. [#] some of the drivers have sub-drivers, not shown at this table
+
+================ ========================================================
+Driver Name
+================ ========================================================
+altera-ci Altera FPGA based CI module
+b2c2-flexcop-pci Technisat/B2C2 Air/Sky/Cable2PC PCI
+bt878 DVB/ATSC Support for bt878 based TV cards
+bttv BT8x8 Video For Linux
+cobalt Cisco Cobalt
+cx18 Conexant cx23418 MPEG encoder
+cx23885 Conexant cx23885 (2388x successor)
+cx25821 Conexant cx25821
+cx88xx Conexant 2388x (bt878 successor)
+ddbridge Digital Devices bridge
+dm1105 SDMC DM1105 based PCI cards
+dt3155 DT3155 frame grabber
+dvb-ttpci AV7110 cards
+earth-pt1 PT1 cards
+earth-pt3 Earthsoft PT3 cards
+hexium_gemini Hexium Gemini frame grabber
+hexium_orion Hexium HV-PCI6 and Orion frame grabber
+hopper HOPPER based cards
+ipu3-cio2 Intel ipu3-cio2 driver
+ivtv Conexant cx23416/cx23415 MPEG encoder/decoder
+ivtvfb Conexant cx23415 framebuffer
+mantis MANTIS based cards
+mgb4 Digiteq Automotive MGB4 frame grabber
+mxb Siemens-Nixdorf 'Multimedia eXtension Board'
+netup-unidvb NetUP Universal DVB card
+ngene Micronas nGene
+pluto2 Pluto2 cards
+saa7134 Philips SAA7134
+saa7164 NXP SAA7164
+smipcie SMI PCIe DVBSky cards
+solo6x10 Bluecherry / Softlogic 6x10 capture cards (MPEG-4/H.264)
+tw5864 Techwell TW5864 video/audio grabber and encoder
+tw686x Intersil/Techwell TW686x
+tw68 Techwell tw68x Video For Linux
+zoran Zoran-36057/36067 JPEG codec
+================ ========================================================
+
+Some of those drivers support multiple devices, as shown at the card
+lists below:
+
+.. toctree::
+ :maxdepth: 1
+
+ bttv-cardlist
+ cx18-cardlist
+ cx23885-cardlist
+ cx88-cardlist
+ ivtv-cardlist
+ saa7134-cardlist
+ saa7164-cardlist
+ zoran-cardlist
diff --git a/Documentation/media/v4l-drivers/philips.rst b/Documentation/admin-guide/media/philips.rst
index 4f68947e6a13..e2840be10d08 100644
--- a/Documentation/media/v4l-drivers/philips.rst
+++ b/Documentation/admin-guide/media/philips.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Philips webcams (pwc driver)
============================
diff --git a/Documentation/admin-guide/media/platform-cardlist.rst b/Documentation/admin-guide/media/platform-cardlist.rst
new file mode 100644
index 000000000000..1230ae4037ad
--- /dev/null
+++ b/Documentation/admin-guide/media/platform-cardlist.rst
@@ -0,0 +1,89 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Platform drivers
+================
+
+There are several drivers that are focused on providing support for
+functionality that are already included at the main board, and don't
+use neither USB nor PCI bus. Those drivers are called platform
+drivers, and are very popular on embedded devices.
+
+The current supported of platform drivers (not including staging drivers) are
+listed below
+
+================= ============================================================
+Driver Name
+================= ============================================================
+am437x-vpfe TI AM437x VPFE
+aspeed-video Aspeed AST2400 and AST2500
+atmel-isc ATMEL Image Sensor Controller (ISC)
+atmel-isi ATMEL Image Sensor Interface (ISI)
+c8sectpfe SDR platform devices
+c8sectpfe SDR platform devices
+cafe_ccic Marvell 88ALP01 (Cafe) CMOS Camera Controller
+cdns-csi2rx Cadence MIPI-CSI2 RX Controller
+cdns-csi2tx Cadence MIPI-CSI2 TX Controller
+coda-vpu Chips&Media Coda multi-standard codec IP
+dm355_ccdc TI DM355 CCDC video capture
+dm644x_ccdc TI DM6446 CCDC video capture
+exynos-fimc-is EXYNOS4x12 FIMC-IS (Imaging Subsystem)
+exynos-fimc-lite EXYNOS FIMC-LITE camera interface
+exynos-gsc Samsung Exynos G-Scaler
+exy Samsung S5P/EXYNOS4 SoC series Camera Subsystem
+imx-pxp i.MX Pixel Pipeline (PXP)
+isdf TI DM365 ISIF video capture
+mmp_camera Marvell Armada 610 integrated camera controller
+mtk_jpeg Mediatek JPEG Codec
+mtk-mdp Mediatek MDP
+mtk-vcodec-dec Mediatek Video Codec
+mtk-vpu Mediatek Video Processor Unit
+mx2_emmaprp MX2 eMMa-PrP
+omap3-isp OMAP 3 Camera
+omap-vout OMAP2/OMAP3 V4L2-Display
+pxa_camera PXA27x Quick Capture Interface
+qcom-camss Qualcomm V4L2 Camera Subsystem
+rcar-csi2 R-Car MIPI CSI-2 Receiver
+rcar_drif Renesas Digital Radio Interface (DRIF)
+rcar-fcp Renesas Frame Compression Processor
+rcar_fdp1 Renesas Fine Display Processor
+rcar_jpu Renesas JPEG Processing Unit
+rcar-vin R-Car Video Input (VIN)
+renesas-ceu Renesas Capture Engine Unit (CEU)
+rockchip-rga Rockchip Raster 2d Graphic Acceleration Unit
+s3c-camif Samsung S3C24XX/S3C64XX SoC Camera Interface
+s5p-csis S5P/EXYNOS MIPI-CSI2 receiver (MIPI-CSIS)
+s5p-fimc S5P/EXYNOS4 FIMC/CAMIF camera interface
+s5p-g2d Samsung S5P and EXYNOS4 G2D 2d graphics accelerator
+s5p-jpeg Samsung S5P/Exynos3250/Exynos4 JPEG codec
+s5p-mfc Samsung S5P MFC Video Codec
+sh_veu SuperH VEU mem2mem video processing
+sh_vou SuperH VOU video output
+stm32-dcmi STM32 Digital Camera Memory Interface (DCMI)
+stm32-dma2d STM32 Chrom-Art Accelerator Unit
+sun4i-csi Allwinner A10 CMOS Sensor Interface Support
+sun6i-csi Allwinner V3s Camera Sensor Interface
+sun8i-di Allwinner Deinterlace
+sun8i-rotate Allwinner DE2 rotation
+ti-cal TI Memory-to-memory multimedia devices
+ti-csc TI DVB platform devices
+ti-vpe TI VPE (Video Processing Engine)
+venus-enc Qualcomm Venus V4L2 encoder/decoder
+via-camera VIAFB camera controller
+video-mux Video Multiplexer
+vpif_display TI DaVinci VPIF V4L2-Display
+vpif_capture TI DaVinci VPIF video capture
+vsp1 Renesas VSP1 Video Processing Engine
+xilinx-tpg Xilinx Video Test Pattern Generator
+xilinx-video Xilinx Video IP (EXPERIMENTAL)
+xilinx-vtc Xilinx Video Timing Controller
+================= ============================================================
+
+MMC/SDIO DVB adapters
+---------------------
+
+======= ===========================================
+Driver Name
+======= ===========================================
+smssdio Siano SMS1xxx based MDTV via SDIO interface
+======= ===========================================
+
diff --git a/Documentation/media/v4l-drivers/qcom_camss.rst b/Documentation/admin-guide/media/qcom_camss.rst
index f27c8df20b2b..8a8f3ff40105 100644
--- a/Documentation/media/v4l-drivers/qcom_camss.rst
+++ b/Documentation/admin-guide/media/qcom_camss.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. include:: <isonum.txt>
Qualcomm Camera Subsystem driver
@@ -16,7 +18,7 @@ The driver implements V4L2, Media controller and V4L2 subdev interfaces.
Camera sensor using V4L2 subdev interface in the kernel is supported.
The driver is implemented using as a reference the Qualcomm Camera Subsystem
-driver for Android as found in Code Aurora [#f1]_ [#f2]_.
+driver for Android as found in Code Linaro [#f1]_ [#f2]_.
Qualcomm Camera Subsystem hardware
@@ -121,7 +123,7 @@ The considerations to split the driver in this particular way are as follows:
- representing CSIPHY and CSID modules by a separate sub-device for each module
allows to model the hardware links between these modules;
- representing VFE by a separate sub-devices for each input interface allows
- to use the input interfaces concurently and independently as this is
+ to use the input interfaces concurrently and independently as this is
supported by the hardware;
- representing ISPIF by a number of sub-devices equal to the number of CSID
sub-devices allows to create linear media controller pipelines when using two
@@ -179,5 +181,5 @@ Referenced 2018-06-22.
References
----------
-.. [#f1] https://source.codeaurora.org/quic/la/kernel/msm-3.10/
-.. [#f2] https://source.codeaurora.org/quic/la/kernel/msm-3.18/
+.. [#f1] https://git.codelinaro.org/clo/la/kernel/msm-3.10/
+.. [#f2] https://git.codelinaro.org/clo/la/kernel/msm-3.18/
diff --git a/Documentation/media/v4l-drivers/qcom_camss_8x96_graph.dot b/Documentation/admin-guide/media/qcom_camss_8x96_graph.dot
index de34f0a7afdc..7ed243b41b67 100644
--- a/Documentation/media/v4l-drivers/qcom_camss_8x96_graph.dot
+++ b/Documentation/admin-guide/media/qcom_camss_8x96_graph.dot
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
digraph board {
rankdir=TB
n00000001 [label="{{<port0> 0} | msm_csiphy0\n/dev/v4l-subdev0 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
diff --git a/Documentation/media/v4l-drivers/qcom_camss_graph.dot b/Documentation/admin-guide/media/qcom_camss_graph.dot
index 827fc7112c1e..ef7dca92fd0b 100644
--- a/Documentation/media/v4l-drivers/qcom_camss_graph.dot
+++ b/Documentation/admin-guide/media/qcom_camss_graph.dot
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
digraph board {
rankdir=TB
n00000001 [label="{{<port0> 0} | msm_csiphy0\n/dev/v4l-subdev0 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
diff --git a/Documentation/admin-guide/media/radio-cardlist.rst b/Documentation/admin-guide/media/radio-cardlist.rst
new file mode 100644
index 000000000000..a82a146bf912
--- /dev/null
+++ b/Documentation/admin-guide/media/radio-cardlist.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Radio drivers
+=============
+
+There is also support for pure AM/FM radio, and even for some FM radio
+transmitters:
+
+===================== =========================================================
+Driver Name
+===================== =========================================================
+si4713 Silicon Labs Si4713 FM Radio Transmitter
+radio-aztech Aztech/Packard Bell Radio
+radio-cadet ADS Cadet AM/FM Tuner
+radio-gemtek GemTek Radio card (or compatible)
+radio-maxiradio Guillemot MAXI Radio FM 2000 radio
+radio-miropcm20 miroSOUND PCM20 radio
+radio-aimslab AIMSlab RadioTrack (aka RadioReveal)
+radio-rtrack2 AIMSlab RadioTrack II
+saa7706h SAA7706H Car Radio DSP
+radio-sf16fmi SF16-FMI/SF16-FMP/SF16-FMD Radio
+radio-sf16fmr2 SF16-FMR2/SF16-FMD2 Radio
+radio-shark Griffin radioSHARK USB radio receiver
+shark2 Griffin radioSHARK2 USB radio receiver
+radio-si470x-common Silicon Labs Si470x FM Radio Receiver
+radio-si476x Silicon Laboratories Si476x I2C FM Radio
+radio-tea5764 TEA5764 I2C FM radio
+tef6862 TEF6862 Car Radio Enhanced Selectivity Tuner
+radio-terratec TerraTec ActiveRadio ISA Standalone
+radio-timb Enable the Timberdale radio driver
+radio-trust Trust FM radio card
+radio-typhoon Typhoon Radio (a.k.a. EcoRadio)
+radio-wl1273 Texas Instruments WL1273 I2C FM Radio
+fm_drv ISA radio devices
+fm_drv ISA radio devices
+radio-zoltrix Zoltrix Radio
+dsbr100 D-Link/GemTek USB FM radio
+radio-keene Keene FM Transmitter USB
+radio-ma901 Masterkit MA901 USB FM radio
+radio-mr800 AverMedia MR 800 USB FM radio
+radio-raremono Thanko's Raremono AM/FM/SW radio
+radio-si470x-usb Silicon Labs Si470x FM Radio Receiver support with USB
+radio-usb-si4713 Silicon Labs Si4713 FM Radio Transmitter support with USB
+===================== =========================================================
diff --git a/Documentation/admin-guide/media/raspberrypi-pisp-be.dot b/Documentation/admin-guide/media/raspberrypi-pisp-be.dot
new file mode 100644
index 000000000000..55671dc1d443
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-pisp-be.dot
@@ -0,0 +1,20 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0 | <port1> 1 | <port2> 2 | <port7> 7} | pispbe\n | {<port3> 3 | <port4> 4 | <port5> 5 | <port6> 6}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port3 -> n0000001c [style=bold]
+ n00000001:port4 -> n00000022 [style=bold]
+ n00000001:port5 -> n00000028 [style=bold]
+ n00000001:port6 -> n0000002e [style=bold]
+ n0000000a [label="pispbe-input\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000000a -> n00000001:port0 [style=bold]
+ n00000010 [label="pispbe-tdn_input\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000010 -> n00000001:port1 [style=bold]
+ n00000016 [label="pispbe-stitch_input\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n00000016 -> n00000001:port2 [style=bold]
+ n0000001c [label="pispbe-output0\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n00000022 [label="pispbe-output1\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000028 [label="pispbe-tdn_output\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n0000002e [label="pispbe-stitch_output\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n00000034 [label="pispbe-config\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n00000034 -> n00000001:port7 [style=bold]
+}
diff --git a/Documentation/admin-guide/media/raspberrypi-pisp-be.rst b/Documentation/admin-guide/media/raspberrypi-pisp-be.rst
new file mode 100644
index 000000000000..0fcf46f26276
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-pisp-be.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================================
+Raspberry Pi PiSP Back End Memory-to-Memory ISP (pisp-be)
+=========================================================
+
+The PiSP Back End
+=================
+
+The PiSP Back End is a memory-to-memory Image Signal Processor (ISP) which reads
+image data from DRAM memory and performs image processing as specified by the
+application through the parameters in a configuration buffer, before writing
+pixel data back to memory through two distinct output channels.
+
+The ISP registers and programming model are documented in the `Raspberry Pi
+Image Signal Processor (PiSP) Specification document`_
+
+The PiSP Back End ISP processes images in tiles. The handling of image
+tessellation and the computation of low-level configuration parameters is
+realized by a free software library called `libpisp
+<https://github.com/raspberrypi/libpisp>`_.
+
+The full image processing pipeline, which involves capturing RAW Bayer data from
+an image sensor through a MIPI CSI-2 compatible capture interface, storing them
+in DRAM memory and processing them in the PiSP Back End to obtain images usable
+by an application is implemented in `libcamera <https://libcamera.org>`_ as
+part of the Raspberry Pi platform support.
+
+The pisp-be driver
+==================
+
+The Raspberry Pi PiSP Back End (pisp-be) driver is located under
+drivers/media/platform/raspberrypi/pisp-be. It uses the `V4L2 API` to register
+a number of video capture and output devices, the `V4L2 subdev API` to register
+a subdevice for the ISP that connects the video devices in a single media graph
+realized using the `Media Controller (MC) API`.
+
+The media topology registered by the `pisp-be` driver is represented below:
+
+.. _pips-be-topology:
+
+.. kernel-figure:: raspberrypi-pisp-be.dot
+ :alt: Diagram of the default media pipeline topology
+ :align: center
+
+
+The media graph registers the following video device nodes:
+
+- pispbe-input: output device for images to be submitted to the ISP for
+ processing.
+- pispbe-tdn_input: output device for temporal denoise.
+- pispbe-stitch_input: output device for image stitching (HDR).
+- pispbe-output0: first capture device for processed images.
+- pispbe-output1: second capture device for processed images.
+- pispbe-tdn_output: capture device for temporal denoise.
+- pispbe-stitch_output: capture device for image stitching (HDR).
+- pispbe-config: output device for ISP configuration parameters.
+
+pispbe-input
+------------
+
+Images to be processed by the ISP are queued to the `pispbe-input` output device
+node. For a list of image formats supported as input to the ISP refer to the
+`Raspberry Pi Image Signal Processor (PiSP) Specification document`_.
+
+pispbe-tdn_input, pispbe-tdn_output
+-----------------------------------
+
+The `pispbe-tdn_input` output video device receives images to be processed by
+the temporal denoise block which are captured from the `pispbe-tdn_output`
+capture video device. Userspace is responsible for maintaining queues on both
+devices, and ensuring that buffers completed on the output are queued to the
+input.
+
+pispbe-stitch_input, pispbe-stitch_output
+-----------------------------------------
+
+To realize HDR (high dynamic range) image processing the image stitching and
+tonemapping blocks are used. The `pispbe-stitch_output` writes images to memory
+and the `pispbe-stitch_input` receives the previously written frame to process
+it along with the current input image. Userspace is responsible for maintaining
+queues on both devices, and ensuring that buffers completed on the output are
+queued to the input.
+
+pispbe-output0, pispbe-output1
+------------------------------
+
+The two capture devices write to memory the pixel data as processed by the ISP.
+
+pispbe-config
+-------------
+
+The `pispbe-config` output video devices receives a buffer of configuration
+parameters that define the desired image processing to be performed by the ISP.
+
+The format of the ISP configuration parameter is defined by
+:c:type:`pisp_be_tiles_config` C structure and the meaning of each parameter is
+described in the `Raspberry Pi Image Signal Processor (PiSP) Specification
+document`_.
+
+ISP configuration
+=================
+
+The ISP configuration is described solely by the content of the parameters
+buffer. The only parameter that userspace needs to configure using the V4L2 API
+is the image format on the output and capture video devices for validation of
+the content of the parameters buffer.
+
+.. _Raspberry Pi Image Signal Processor (PiSP) Specification document: https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf
diff --git a/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot
new file mode 100644
index 000000000000..7717f2291049
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot
@@ -0,0 +1,27 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | csi2\n/dev/v4l-subdev0 | {<port1> 1 | <port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port1 -> n00000011 [style=dashed]
+ n00000001:port1 -> n00000007:port0
+ n00000001:port2 -> n00000015
+ n00000001:port2 -> n00000007:port0 [style=dashed]
+ n00000001:port3 -> n00000019 [style=dashed]
+ n00000001:port3 -> n00000007:port0 [style=dashed]
+ n00000001:port4 -> n0000001d [style=dashed]
+ n00000001:port4 -> n00000007:port0 [style=dashed]
+ n00000007 [label="{{<port0> 0 | <port1> 1} | pisp-fe\n/dev/v4l-subdev1 | {<port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000007:port2 -> n00000021
+ n00000007:port3 -> n00000025 [style=dashed]
+ n00000007:port4 -> n00000029
+ n0000000d [label="{imx219 6-0010\n/dev/v4l-subdev2 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000d:port0 -> n00000001:port0 [style=bold]
+ n00000011 [label="rp1-cfe-csi2-ch0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000015 [label="rp1-cfe-csi2-ch1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000019 [label="rp1-cfe-csi2-ch2\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n0000001d [label="rp1-cfe-csi2-ch3\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n00000021 [label="rp1-cfe-fe-image0\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000025 [label="rp1-cfe-fe-image1\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n00000029 [label="rp1-cfe-fe-stats\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n0000002d [label="rp1-cfe-fe-config\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n0000002d -> n00000007:port1
+}
diff --git a/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst
new file mode 100644
index 000000000000..668d978a9875
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Raspberry Pi PiSP Camera Front End (rp1-cfe)
+============================================
+
+The PiSP Camera Front End
+=========================
+
+The PiSP Camera Front End (CFE) is a module which combines a CSI-2 receiver with
+a simple ISP, called the Front End (FE).
+
+The CFE has four DMA engines and can write frames from four separate streams
+received from the CSI-2 to the memory. One of those streams can also be routed
+directly to the FE, which can do minimal image processing, write two versions
+(e.g. non-scaled and downscaled versions) of the received frames to memory and
+provide statistics of the received frames.
+
+The FE registers are documented in the `Raspberry Pi Image Signal Processor
+(ISP) Specification document
+<https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf>`_,
+and example code for FE can be found in `libpisp
+<https://github.com/raspberrypi/libpisp>`_.
+
+The rp1-cfe driver
+==================
+
+The Raspberry Pi PiSP Camera Front End (rp1-cfe) driver is located under
+drivers/media/platform/raspberrypi/rp1-cfe. It uses the `V4L2 API` to register
+a number of video capture and output devices, the `V4L2 subdev API` to register
+subdevices for the CSI-2 received and the FE that connects the video devices in
+a single media graph realized using the `Media Controller (MC) API`.
+
+The media topology registered by the `rp1-cfe` driver, in this particular
+example connected to an imx219 sensor, is the following one:
+
+.. _rp1-cfe-topology:
+
+.. kernel-figure:: raspberrypi-rp1-cfe.dot
+ :alt: Diagram of an example media pipeline topology
+ :align: center
+
+The media graph contains the following video device nodes:
+
+- rp1-cfe-csi2-ch0: capture device for the first CSI-2 stream
+- rp1-cfe-csi2-ch1: capture device for the second CSI-2 stream
+- rp1-cfe-csi2-ch2: capture device for the third CSI-2 stream
+- rp1-cfe-csi2-ch3: capture device for the fourth CSI-2 stream
+- rp1-cfe-fe-image0: capture device for the first FE output
+- rp1-cfe-fe-image1: capture device for the second FE output
+- rp1-cfe-fe-stats: capture device for the FE statistics
+- rp1-cfe-fe-config: output device for FE configuration
+
+rp1-cfe-csi2-chX
+----------------
+
+The rp1-cfe-csi2-chX capture devices are normal V4L2 capture devices which
+can be used to capture video frames or metadata received from the CSI-2.
+
+rp1-cfe-fe-image0, rp1-cfe-fe-image1
+------------------------------------
+
+The rp1-cfe-fe-image0 and rp1-cfe-fe-image1 capture devices are used to write
+the processed frames to memory.
+
+rp1-cfe-fe-stats
+----------------
+
+The format of the FE statistics buffer is defined by
+:c:type:`pisp_statistics` C structure and the meaning of each parameter is
+described in the `PiSP specification` document.
+
+rp1-cfe-fe-config
+-----------------
+
+The format of the FE configuration buffer is defined by
+:c:type:`pisp_fe_config` C structure and the meaning of each parameter is
+described in the `PiSP specification` document.
diff --git a/Documentation/media/v4l-drivers/rcar-fdp1.rst b/Documentation/admin-guide/media/rcar-fdp1.rst
index a59b1e8e3e9c..88b0edcf9046 100644
--- a/Documentation/media/v4l-drivers/rcar-fdp1.rst
+++ b/Documentation/admin-guide/media/rcar-fdp1.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Renesas R-Car Fine Display Processor (FDP1) Driver
==================================================
diff --git a/Documentation/admin-guide/media/remote-controller.rst b/Documentation/admin-guide/media/remote-controller.rst
new file mode 100644
index 000000000000..188944b00f4f
--- /dev/null
+++ b/Documentation/admin-guide/media/remote-controller.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Infrared remote control support in video4linux drivers
+======================================================
+
+Authors: Gerd Hoffmann, Mauro Carvalho Chehab
+
+Basics
+======
+
+Most analog and digital TV boards support remote controllers. Several of
+them have a microprocessor that receives the IR carriers, convert into
+pulse/space sequences and then to scan codes, returning such codes to
+userspace ("scancode mode"). Other boards return just the pulse/space
+sequences ("raw mode").
+
+The support for remote controller in scancode mode is provided by the
+standard Linux input layer. The support for raw mode is provided via LIRC.
+
+In order to check the support and test it, it is suggested to download
+the `v4l-utils <https://git.linuxtv.org/v4l-utils.git/>`_. It provides
+two tools to handle remote controllers:
+
+- ir-keytable: provides a way to query the remote controller, list the
+ protocols it supports, enable in-kernel support for IR decoder or
+ switch the protocol and to test the reception of scan codes;
+
+- ir-ctl: provide tools to handle remote controllers that support raw mode
+ via LIRC interface.
+
+Usually, the remote controller module is auto-loaded when the TV card is
+detected. However, for a few devices, you need to manually load the
+ir-kbd-i2c module.
+
+How it works
+============
+
+The modules register the remote as keyboard within the linux input
+layer, i.e. you'll see the keys of the remote as normal key strokes
+(if CONFIG_INPUT_KEYBOARD is enabled).
+
+Using the event devices (CONFIG_INPUT_EVDEV) it is possible for
+applications to access the remote via /dev/input/event<n> devices.
+The udev/systemd will automatically create the devices. If you install
+the `v4l-utils <https://git.linuxtv.org/v4l-utils.git/>`_, it may also
+automatically load a different keytable than the default one. Please see
+`v4l-utils <https://git.linuxtv.org/v4l-utils.git/>`_ ir-keytable.1
+man page for details.
+
+The ir-keytable tool is nice for trouble shooting, i.e. to check
+whenever the input device is really present, which of the devices it
+is, check whenever pressing keys on the remote actually generates
+events and the like. You can also use any other input utility that changes
+the keymaps, like the input kbd utility.
+
+
+Using with lircd
+----------------
+
+The latest versions of the lircd daemon supports reading events from the
+linux input layer (via event device). It also supports receiving IR codes
+in lirc mode.
+
+
+Using without lircd
+-------------------
+
+Xorg recognizes several IR keycodes that have its numerical value lower
+than 247. With the advent of Wayland, the input driver got updated too,
+and should now accept all keycodes. Yet, you may want to just reassign
+the keycodes to something that your favorite media application likes.
+
+This can be done by setting
+`v4l-utils <https://git.linuxtv.org/v4l-utils.git/>`_ to load your own
+keytable in runtime. Please read ir-keytable.1 man page for details.
diff --git a/Documentation/admin-guide/media/rkisp1.dot b/Documentation/admin-guide/media/rkisp1.dot
new file mode 100644
index 000000000000..54c1953a6130
--- /dev/null
+++ b/Documentation/admin-guide/media/rkisp1.dot
@@ -0,0 +1,18 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0 | <port1> 1} | rkisp1_isp\n/dev/v4l-subdev0 | {<port2> 2 | <port3> 3}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port2 -> n00000006:port0
+ n00000001:port2 -> n00000009:port0
+ n00000001:port3 -> n00000014 [style=bold]
+ n00000006 [label="{{<port0> 0} | rkisp1_resizer_mainpath\n/dev/v4l-subdev1 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000006:port1 -> n0000000c [style=bold]
+ n00000009 [label="{{<port0> 0} | rkisp1_resizer_selfpath\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000009:port1 -> n00000010 [style=bold]
+ n0000000c [label="rkisp1_mainpath\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000010 [label="rkisp1_selfpath\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000014 [label="rkisp1_stats\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n00000018 [label="rkisp1_params\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n00000018 -> n00000001:port1 [style=bold]
+ n0000001c [label="{{} | imx219 4-0010\n/dev/v4l-subdev3 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000001c:port0 -> n00000001:port0
+}
diff --git a/Documentation/admin-guide/media/rkisp1.rst b/Documentation/admin-guide/media/rkisp1.rst
new file mode 100644
index 000000000000..6c878c71442f
--- /dev/null
+++ b/Documentation/admin-guide/media/rkisp1.rst
@@ -0,0 +1,204 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+=========================================
+Rockchip Image Signal Processor (rkisp1)
+=========================================
+
+Introduction
+============
+
+This file documents the driver for the Rockchip ISP1 that is part of RK3288
+and RK3399 SoCs. The driver is located under drivers/media/platform/rockchip/
+rkisp1 and uses the Media-Controller API.
+
+Revisions
+=========
+
+There exist multiple smaller revisions to this ISP that got introduced in
+later SoCs. Revisions can be found in the enum :c:type:`rkisp1_cif_isp_version`
+in the UAPI and the revision of the ISP inside the running SoC can be read
+in the field hw_revision of struct media_device_info as returned by
+ioctl MEDIA_IOC_DEVICE_INFO.
+
+Versions in use are:
+
+- RKISP1_V10: used at least in rk3288 and rk3399
+- RKISP1_V11: declared in the original vendor code, but not used
+- RKISP1_V12: used at least in rk3326 and px30
+- RKISP1_V13: used at least in rk1808
+
+Topology
+========
+.. _rkisp1_topology_graph:
+
+.. kernel-figure:: rkisp1.dot
+ :alt: Diagram of the default media pipeline topology
+ :align: center
+
+
+The driver has 4 video devices:
+
+- rkisp1_mainpath: capture device for retrieving images, usually in higher
+ resolution.
+- rkisp1_selfpath: capture device for retrieving images.
+- rkisp1_stats: a metadata capture device that sends statistics.
+- rkisp1_params: a metadata output device that receives parameters
+ configurations from userspace.
+
+The driver has 3 subdevices:
+
+- rkisp1_resizer_mainpath: used to resize and downsample frames for the
+ mainpath capture device.
+- rkisp1_resizer_selfpath: used to resize and downsample frames for the
+ selfpath capture device.
+- rkisp1_isp: is connected to the sensor and is responsible for all the isp
+ operations.
+
+
+rkisp1_mainpath, rkisp1_selfpath - Frames Capture Video Nodes
+-------------------------------------------------------------
+Those are the `mainpath` and `selfpath` capture devices to capture frames.
+Those entities are the DMA engines that write the frames to memory.
+The selfpath video device can capture YUV/RGB formats. Its input is YUV encoded
+stream and it is able to convert it to RGB. The selfpath is not able to
+capture bayer formats.
+The mainpath can capture both bayer and YUV formats but it is not able to
+capture RGB formats.
+Both capture videos support
+the ``V4L2_CAP_IO_MC`` :ref:`capability <device-capabilities>`.
+
+
+rkisp1_resizer_mainpath, rkisp1_resizer_selfpath - Resizers Subdevices Nodes
+----------------------------------------------------------------------------
+Those are resizer entities for the mainpath and the selfpath. Those entities
+can scale the frames up and down and also change the YUV sampling (for example
+YUV4:2:2 -> YUV4:2:0). They also have cropping capability on the sink pad.
+The resizers entities can only operate on YUV:4:2:2 format
+(MEDIA_BUS_FMT_YUYV8_2X8).
+The mainpath capture device supports capturing video in bayer formats. In that
+case the resizer of the mainpath is set to 'bypass' mode - it just forward the
+frame without operating on it.
+
+rkisp1_isp - Image Signal Processing Subdevice Node
+---------------------------------------------------
+This is the isp entity. It is connected to the sensor on sink pad 0 and
+receives the frames using the CSI-2 protocol. It is responsible of configuring
+the CSI-2 protocol. It has a cropping capability on sink pad 0 that is
+connected to the sensor and on source pad 2 connected to the resizer entities.
+Cropping on sink pad 0 defines the image region from the sensor.
+Cropping on source pad 2 defines the region for the Image Stabilizer (IS).
+
+.. _rkisp1_stats:
+
+rkisp1_stats - Statistics Video Node
+------------------------------------
+The statistics video node outputs the 3A (auto focus, auto exposure and auto
+white balance) statistics, and also histogram statistics for the frames that
+are being processed by the rkisp1 to userspace applications.
+Using these data, applications can implement algorithms and re-parameterize
+the driver through the rkisp_params node to improve image quality during a
+video stream.
+The buffer format is defined by struct :c:type:`rkisp1_stat_buffer`, and
+userspace should set
+:ref:`V4L2_META_FMT_RK_ISP1_STAT_3A <v4l2-meta-fmt-rk-isp1-stat-3a>` as the
+dataformat.
+
+.. _rkisp1_params:
+
+rkisp1_params - Parameters Video Node
+-------------------------------------
+The rkisp1_params video node receives a set of parameters from userspace
+to be applied to the hardware during a video stream, allowing userspace
+to dynamically modify values such as black level, cross talk corrections
+and others.
+
+The ISP driver supports two different parameters configuration methods, the
+`fixed parameters format` or the `extensible parameters format`.
+
+When using the `fixed parameters` method the buffer format is defined by struct
+:c:type:`rkisp1_params_cfg`, and userspace should set
+:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
+dataformat.
+
+When using the `extensible parameters` method the buffer format is defined by
+struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
+:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
+the dataformat.
+
+Capturing Video Frames Example
+==============================
+
+In the following example, the sensor connected to pad 0 of 'rkisp1_isp' is
+imx219.
+
+The following commands can be used to capture video from the selfpath video
+node with dimension 900x800 planar format YUV 4:2:2. It uses all cropping
+capabilities possible, (see explanation right below)
+
+.. code-block:: bash
+
+ # set the links
+ "media-ctl" "-d" "platform:rkisp1" "-r"
+ "media-ctl" "-d" "platform:rkisp1" "-l" "'imx219 4-0010':0 -> 'rkisp1_isp':0 [1]"
+ "media-ctl" "-d" "platform:rkisp1" "-l" "'rkisp1_isp':2 -> 'rkisp1_resizer_selfpath':0 [1]"
+ "media-ctl" "-d" "platform:rkisp1" "-l" "'rkisp1_isp':2 -> 'rkisp1_resizer_mainpath':0 [0]"
+
+ # set format for imx219 4-0010:0
+ "media-ctl" "-d" "platform:rkisp1" "--set-v4l2" '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1640x1232]'
+
+ # set format for rkisp1_isp pads:
+ "media-ctl" "-d" "platform:rkisp1" "--set-v4l2" '"rkisp1_isp":0 [fmt:SRGGB10_1X10/1640x1232 crop: (0,0)/1600x1200]'
+ "media-ctl" "-d" "platform:rkisp1" "--set-v4l2" '"rkisp1_isp":2 [fmt:YUYV8_2X8/1600x1200 crop: (0,0)/1500x1100]'
+
+ # set format for rkisp1_resizer_selfpath pads:
+ "media-ctl" "-d" "platform:rkisp1" "--set-v4l2" '"rkisp1_resizer_selfpath":0 [fmt:YUYV8_2X8/1500x1100 crop: (300,400)/1400x1000]'
+ "media-ctl" "-d" "platform:rkisp1" "--set-v4l2" '"rkisp1_resizer_selfpath":1 [fmt:YUYV8_2X8/900x800]'
+
+ # set format for rkisp1_selfpath:
+ "v4l2-ctl" "-z" "platform:rkisp1" "-d" "rkisp1_selfpath" "-v" "width=900,height=800,"
+ "v4l2-ctl" "-z" "platform:rkisp1" "-d" "rkisp1_selfpath" "-v" "pixelformat=422P"
+
+ # start streaming:
+ v4l2-ctl "-z" "platform:rkisp1" "-d" "rkisp1_selfpath" "--stream-mmap" "--stream-count" "10"
+
+
+In the above example the sensor is configured to bayer format:
+`SRGGB10_1X10/1640x1232`. The rkisp1_isp:0 pad should be configured to the
+same mbus format and dimensions as the sensor, otherwise streaming will fail
+with 'EPIPE' error. So it is also configured to `SRGGB10_1X10/1640x1232`.
+In addition, the rkisp1_isp:0 pad is configured to cropping `(0,0)/1600x1200`.
+
+The cropping dimensions are automatically propagated to be the format of the
+isp source pad `rkisp1_isp:2`. Another cropping operation is configured on
+the isp source pad: `(0,0)/1500x1100`.
+
+The resizer's sink pad `rkisp1_resizer_selfpath` should be configured to format
+`YUYV8_2X8/1500x1100` in order to match the format on the other side of the
+link. In addition a cropping `(300,400)/1400x1000` is configured on it.
+
+The source pad of the resizer, `rkisp1_resizer_selfpath:1` is configured to
+format `YUYV8_2X8/900x800`. That means that the resizer first crop a window
+of `(300,400)/1400x100` from the received frame and then scales this window
+to dimension `900x800`.
+
+Note that the above example does not uses the stats-params control loop.
+Therefore the capture frames will not go through the 3A algorithms and
+probably won't have a good quality, and can even look dark and greenish.
+
+Configuring Quantization
+========================
+
+The driver supports limited and full range quantization on YUV formats,
+where limited is the default.
+To switch between one or the other, userspace should use the Colorspace
+Conversion API (CSC) for subdevices on source pad 2 of the
+isp (`rkisp1_isp:2`). The quantization configured on this pad is the
+quantization of the captured video frames on the mainpath and selfpath
+video nodes.
+Note that the resizer and capture entities will always report
+``V4L2_QUANTIZATION_DEFAULT`` even if the quantization is configured to full
+range on `rkisp1_isp:2`. So in order to get the configured quantization,
+application should get it from pad `rkisp1_isp:2`.
+
diff --git a/Documentation/media/v4l-drivers/saa7134-cardlist.rst b/Documentation/admin-guide/media/saa7134-cardlist.rst
index 6e4c35cbaabf..3ef8fab6bcad 100644
--- a/Documentation/media/v4l-drivers/saa7134-cardlist.rst
+++ b/Documentation/admin-guide/media/saa7134-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
SAA7134 cards list
==================
@@ -10,7 +12,7 @@ SAA7134 cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- UNKNOWN/GENERIC
diff --git a/Documentation/admin-guide/media/saa7134.rst b/Documentation/admin-guide/media/saa7134.rst
new file mode 100644
index 000000000000..18d7cbc897db
--- /dev/null
+++ b/Documentation/admin-guide/media/saa7134.rst
@@ -0,0 +1,89 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The saa7134 driver
+==================
+
+Author Gerd Hoffmann
+
+
+This is a v4l2/oss device driver for saa7130/33/34/35 based capture / TV
+boards.
+
+
+Status
+------
+
+Almost everything is working. video, sound, tuner, radio, mpeg ts, ...
+
+As with bttv, card-specific tweaks are needed. Check CARDLIST for a
+list of known TV cards and saa7134-cards.c for the drivers card
+configuration info.
+
+
+Build
+-----
+
+Once you pick up a Kernel source, you should configure, build,
+install and boot the new kernel. You'll need at least
+these config options::
+
+ ./scripts/config -e PCI
+ ./scripts/config -e INPUT
+ ./scripts/config -m I2C
+ ./scripts/config -m MEDIA_SUPPORT
+ ./scripts/config -e MEDIA_PCI_SUPPORT
+ ./scripts/config -e MEDIA_ANALOG_TV_SUPPORT
+ ./scripts/config -e MEDIA_DIGITAL_TV_SUPPORT
+ ./scripts/config -e MEDIA_RADIO_SUPPORT
+ ./scripts/config -e RC_CORE
+ ./scripts/config -e MEDIA_SUBDRV_AUTOSELECT
+ ./scripts/config -m VIDEO_SAA7134
+ ./scripts/config -e SAA7134_ALSA
+ ./scripts/config -e VIDEO_SAA7134_RC
+ ./scripts/config -e VIDEO_SAA7134_DVB
+ ./scripts/config -e VIDEO_SAA7134_GO7007
+
+To build and install, you should run::
+
+ make && make modules_install && make install
+
+Once the new Kernel is booted, saa7134 driver should be loaded automatically.
+
+Depending on the card you might have to pass ``card=<nr>`` as insmod option.
+If so, please check Documentation/admin-guide/media/saa7134-cardlist.rst
+for valid choices.
+
+Once you have your card type number, you can pass a modules configuration
+via a file (usually, it is either ``/etc/modules.conf`` or some file at
+``/etc/modules-load.d/``, but the actual place depends on your
+distribution), with this content::
+
+ options saa7134 card=13 # Assuming that your card type is #13
+
+
+Changes / Fixes
+---------------
+
+Please mail to linux-media AT vger.kernel.org unified diffs against
+the linux media git tree:
+
+ https://git.linuxtv.org/media.git/
+
+This is done by committing a patch at a clone of the git tree and
+submitting the patch using ``git send-email``. Don't forget to
+describe at the lots what it changes / which problem it fixes / whatever
+it is good for ...
+
+
+Known Problems
+--------------
+
+* The tuner for the flyvideos isn't detected automatically and the
+ default might not work for you depending on which version you have.
+ There is a ``tuner=`` insmod option to override the driver's default.
+
+Credits
+-------
+
+andrew.stevens@philips.com + werner.leeb@philips.com for providing
+saa7134 hardware specs and sample board.
diff --git a/Documentation/media/v4l-drivers/saa7164-cardlist.rst b/Documentation/admin-guide/media/saa7164-cardlist.rst
index e28382ba82e6..7949c09aa900 100644
--- a/Documentation/media/v4l-drivers/saa7164-cardlist.rst
+++ b/Documentation/admin-guide/media/saa7164-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
SAA7164 cards list
==================
@@ -10,7 +12,7 @@ SAA7164 cards list
* - Card number
- Card name
- - PCI IDs
+ - PCI subsystem IDs
* - 0
- Unknown
diff --git a/Documentation/media/v4l-drivers/si470x.rst b/Documentation/admin-guide/media/si470x.rst
index 955d8ca159fe..d53bf5f95200 100644
--- a/Documentation/media/v4l-drivers/si470x.rst
+++ b/Documentation/admin-guide/media/si470x.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. include:: <isonum.txt>
The Silicon Labs Si470x FM Radio Receivers driver
diff --git a/Documentation/media/v4l-drivers/si4713.rst b/Documentation/admin-guide/media/si4713.rst
index 3022e7cfe9a8..85dcf1cd2df8 100644
--- a/Documentation/media/v4l-drivers/si4713.rst
+++ b/Documentation/admin-guide/media/si4713.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. include:: <isonum.txt>
The Silicon Labs Si4713 FM Radio Transmitter Driver
@@ -11,7 +13,7 @@ Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
Information about the Device
----------------------------
-This chip is a Silicon Labs product. It is a I2C device, currently on 0x63 address.
+This chip is a Silicon Labs product. It is an I2C device, currently on 0x63 address.
Basically, it has transmission and signal noise level measurement features.
The Si4713 integrates transmit functions for FM broadcast stereo transmission.
@@ -26,7 +28,7 @@ Users must comply with local regulations on radio frequency (RF) transmission.
Device driver description
-------------------------
-There are two modules to handle this device. One is a I2C device driver
+There are two modules to handle this device. One is an I2C device driver
and the other is a platform driver.
The I2C device driver exports a v4l2-subdev interface to the kernel.
@@ -111,7 +113,7 @@ Here is a summary of them:
- acomp_attack_time - Sets the attack time for audio dynamic range control.
- acomp_release_time - Sets the release time for audio dynamic range control.
-* Limiter setups audio deviation limiter feature. Once a over deviation occurs,
+* Limiter sets up the audio deviation limiter feature. Once an over deviation occurs,
it is possible to adjust the front-end gain of the audio input and always
prevent over deviation.
diff --git a/Documentation/media/v4l-drivers/si476x.rst b/Documentation/admin-guide/media/si476x.rst
index 677512566f15..c8882ee9f208 100644
--- a/Documentation/media/v4l-drivers/si476x.rst
+++ b/Documentation/admin-guide/media/si476x.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. include:: <isonum.txt>
@@ -140,7 +142,7 @@ The drivers exposes following files:
indicator
0x18 lassi Signed Low side adjacent Channel
Strength indicator
- 0x19 hassi ditto fpr High side
+ 0x19 hassi ditto for High side
0x20 mult Multipath indicator
0x21 dev Frequency deviation
0x24 assi Adjacent channel SSI
diff --git a/Documentation/admin-guide/media/siano-cardlist.rst b/Documentation/admin-guide/media/siano-cardlist.rst
new file mode 100644
index 000000000000..bb731a953878
--- /dev/null
+++ b/Documentation/admin-guide/media/siano-cardlist.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Siano cards list
+================
+
+.. tabularcolumns:: p{13.3cm}|p{4.2cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 17 16
+ :stub-columns: 0
+
+ * - Card name
+ - USB IDs
+ * - Hauppauge Catamount
+ - 2040:1700
+ * - Hauppauge Okemo-A
+ - 2040:1800
+ * - Hauppauge Okemo-B
+ - 2040:1801
+ * - Hauppauge WinTV MiniCard
+ - 2040:2000, 2040:200a, 2040:2010, 2040:2011, 2040:2019
+ * - Hauppauge WinTV MiniCard Rev 2
+ - 2040:2009
+ * - Hauppauge WinTV MiniStick
+ - 2040:5500, 2040:5510, 2040:5520, 2040:5530, 2040:5580, 2040:5590, 2040:b900, 2040:b910, 2040:b980, 2040:b990, 2040:c000, 2040:c010, 2040:c080, 2040:c090, 2040:c0a0, 2040:f5a0
+ * - Hauppauge microStick 77e
+ - 2013:0257
+ * - ONDA Data Card Digital Receiver
+ - 19D2:0078
+ * - Siano Denver (ATSC-M/H) Digital Receiver
+ - 187f:0800
+ * - Siano Denver (TDMB) Digital Receiver
+ - 187f:0700
+ * - Siano Ming Digital Receiver
+ - 187f:0310
+ * - Siano Nice Digital Receiver
+ - 187f:0202, 187f:0202
+ * - Siano Nova A Digital Receiver
+ - 187f:0200
+ * - Siano Nova B Digital Receiver
+ - 187f:0201
+ * - Siano Pele Digital Receiver
+ - 187f:0500
+ * - Siano Rio Digital Receiver
+ - 187f:0600, 3275:0080
+ * - Siano Stellar Digital Receiver
+ - 187f:0100
+ * - Siano Stellar Digital Receiver ROM
+ - 187f:0010
+ * - Siano Vega Digital Receiver
+ - 187f:0300
+ * - Siano Venice Digital Receiver
+ - 187f:0301, 187f:0301, 187f:0302
+ * - ZTE Data Card Digital Receiver
+ - 19D2:0086
diff --git a/Documentation/admin-guide/media/starfive_camss.rst b/Documentation/admin-guide/media/starfive_camss.rst
new file mode 100644
index 000000000000..ca42e9447c47
--- /dev/null
+++ b/Documentation/admin-guide/media/starfive_camss.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+================================
+Starfive Camera Subsystem driver
+================================
+
+Introduction
+------------
+
+This file documents the driver for the Starfive Camera Subsystem found on
+Starfive JH7110 SoC. The driver is located under drivers/staging/media/starfive/
+camss.
+
+The driver implements V4L2, Media controller and v4l2_subdev interfaces. Camera
+sensor using V4L2 subdev interface in the kernel is supported.
+
+The driver has been successfully used on the Gstreamer 1.18.5 with v4l2src
+plugin.
+
+
+Starfive Camera Subsystem hardware
+----------------------------------
+
+The Starfive Camera Subsystem hardware consists of::
+
+ |\ +---------------+ +-----------+
+ +----------+ | \ | | | |
+ | | | | | | | |
+ | MIPI |----->| |----->| ISP |----->| |
+ | | | | | | | |
+ +----------+ | | | | | Memory |
+ |MUX| +---------------+ | Interface |
+ +----------+ | | | |
+ | | | |---------------------------->| |
+ | Parallel |----->| | | |
+ | | | | | |
+ +----------+ | / | |
+ |/ +-----------+
+
+- MIPI: The MIPI interface, receiving data from a MIPI CSI-2 camera sensor.
+
+- Parallel: The parallel interface, receiving data from a parallel sensor.
+
+- ISP: The ISP, processing raw Bayer data from an image sensor and producing
+ YUV frames.
+
+
+Topology
+--------
+
+The media controller pipeline graph is as follows:
+
+.. _starfive_camss_graph:
+
+.. kernel-figure:: starfive_camss_graph.dot
+ :alt: starfive_camss_graph.dot
+ :align: center
+
+The driver has 2 video devices:
+
+- capture_raw: The capture device, capturing image data directly from a sensor.
+- capture_yuv: The capture device, capturing YUV frame data processed by the
+ ISP module
+
+The driver has 3 subdevices:
+
+- stf_isp: is responsible for all the isp operations, outputs YUV frames.
+- cdns_csi2rx: a CSI-2 bridge supporting up to 4 CSI lanes in input, and 4
+ different pixel streams in output.
+- imx219: an image sensor, image data is sent through MIPI CSI-2.
diff --git a/Documentation/admin-guide/media/starfive_camss_graph.dot b/Documentation/admin-guide/media/starfive_camss_graph.dot
new file mode 100644
index 000000000000..8eff1f161ac7
--- /dev/null
+++ b/Documentation/admin-guide/media/starfive_camss_graph.dot
@@ -0,0 +1,12 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | stf_isp\n/dev/v4l-subdev0 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port1 -> n00000008 [style=dashed]
+ n00000004 [label="capture_raw\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000008 [label="capture_yuv\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n0000000e [label="{{<port0> 0} | cdns_csi2rx.19800000.csi-bridge\n | {<port1> 1 | <port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000e:port1 -> n00000001:port0 [style=dashed]
+ n0000000e:port1 -> n00000004 [style=dashed]
+ n00000018 [label="{{} | imx219 6-0010\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000018:port0 -> n0000000e:port0 [style=bold]
+}
diff --git a/Documentation/media/dvb-drivers/technisat.rst b/Documentation/admin-guide/media/technisat.rst
index f80f4ecc1560..9eaa12366bbf 100644
--- a/Documentation/media/dvb-drivers/technisat.rst
+++ b/Documentation/admin-guide/media/technisat.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
How to set up the Technisat/B2C2 Flexcop devices
================================================
diff --git a/Documentation/media/dvb-drivers/ttusb-dec.rst b/Documentation/admin-guide/media/ttusb-dec.rst
index 84fc2199dc29..516bbab8a872 100644
--- a/Documentation/media/dvb-drivers/ttusb-dec.rst
+++ b/Documentation/admin-guide/media/ttusb-dec.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
TechnoTrend/Hauppauge DEC USB Driver
====================================
diff --git a/Documentation/media/v4l-drivers/tuner-cardlist.rst b/Documentation/admin-guide/media/tuner-cardlist.rst
index 276dd90e0c59..65ecf48ddf24 100644
--- a/Documentation/media/v4l-drivers/tuner-cardlist.rst
+++ b/Documentation/admin-guide/media/tuner-cardlist.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Tuner cards list
================
@@ -95,4 +97,6 @@ Tuner number Card name
89 Sony BTF-PG472Z PAL/SECAM
90 Sony BTF-PK467Z NTSC-M-JP
91 Sony BTF-PB463Z NTSC-M
+92 Silicon Labs Si2157 tuner
+93 Tena TNF931D-DFDR1
============ =====================================================
diff --git a/Documentation/admin-guide/media/usb-cardlist.rst b/Documentation/admin-guide/media/usb-cardlist.rst
new file mode 100644
index 000000000000..5f5ab0723e48
--- /dev/null
+++ b/Documentation/admin-guide/media/usb-cardlist.rst
@@ -0,0 +1,149 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+USB drivers
+===========
+
+The USB boards are identified by an identification called USB ID.
+
+The ``lsusb`` command allows identifying the USB IDs::
+
+ $ lsusb
+ ...
+ Bus 001 Device 015: ID 046d:082d Logitech, Inc. HD Pro Webcam C920
+ Bus 001 Device 074: ID 2040:b131 Hauppauge
+ Bus 001 Device 075: ID 2013:024f PCTV Systems nanoStick T2 290e
+ ...
+
+Newer camera devices use a standard way to expose themselves as such,
+via USB Video Class. Those cameras are automatically supported by the
+``uvc-driver``.
+
+Older cameras and TV USB devices uses USB Vendor Classes: each vendor
+defines its own way to access the device. This section contains
+card lists for such vendor-class devices.
+
+While this is not as common as on PCI, sometimes the same USB ID is used
+by different products. So, several media drivers allow passing a ``card=``
+parameter, in order to setup a card number that would match the correct
+settings for an specific product type.
+
+The current supported USB cards (not including staging drivers) are
+listed below\ [#]_.
+
+.. [#]
+
+ some of the drivers have sub-drivers, not shown at this table.
+ In particular, gspca driver has lots of sub-drivers,
+ for cameras not supported by the USB Video Class (UVC) driver,
+ as shown at :doc:`gspca card list <gspca-cardlist>`.
+
+====================== =========================================================
+Driver Name
+====================== =========================================================
+airspy AirSpy
+au0828 Auvitek AU0828
+b2c2-flexcop-usb Technisat/B2C2 Air/Sky/Cable2PC USB
+cx231xx Conexant cx231xx USB video capture
+dvb-as102 Abilis AS102 DVB receiver
+dvb-ttusb-budget Technotrend/Hauppauge Nova - USB devices
+dvb-usb-a800 AVerMedia AverTV DVB-T USB 2.0 (A800)
+dvb-usb-af9005 Afatech AF9005 DVB-T USB1.1
+dvb-usb-af9015 Afatech AF9015 DVB-T USB2.0
+dvb-usb-af9035 Afatech AF9035 DVB-T USB2.0
+dvb-usb-anysee Anysee DVB-T/C USB2.0
+dvb-usb-au6610 Alcor Micro AU6610 USB2.0
+dvb-usb-az6007 AzureWave 6007 and clones DVB-T/C USB2.0
+dvb-usb-az6027 Azurewave DVB-S/S2 USB2.0 AZ6027
+dvb-usb-ce6230 Intel CE6230 DVB-T USB2.0
+dvb-usb-cinergyT2 Terratec CinergyT2/qanu USB 2.0 DVB-T
+dvb-usb-cxusb Conexant USB2.0 hybrid
+dvb-usb-dib0700 DiBcom DiB0700
+dvb-usb-dibusb-common DiBcom DiB3000M-B
+dvb-usb-dibusb-mc DiBcom DiB3000M-C/P
+dvb-usb-digitv Nebula Electronics uDigiTV DVB-T USB2.0
+dvb-usb-dtt200u WideView WT-200U and WT-220U (pen) DVB-T
+dvb-usb-dtv5100 AME DTV-5100 USB2.0 DVB-T
+dvb-usb-dvbsky DVBSky USB
+dvb-usb-dw2102 DvbWorld & TeVii DVB-S/S2 USB2.0
+dvb-usb-ec168 E3C EC168 DVB-T USB2.0
+dvb-usb-gl861 Genesys Logic GL861 USB2.0
+dvb-usb-gp8psk GENPIX 8PSK->USB module
+dvb-usb-lmedm04 LME DM04/QQBOX DVB-S USB2.0
+dvb-usb-m920x Uli m920x DVB-T USB2.0
+dvb-usb-nova-t-usb2 Hauppauge WinTV-NOVA-T usb2 DVB-T USB2.0
+dvb-usb-opera Opera1 DVB-S USB2.0 receiver
+dvb-usb-pctv452e Pinnacle PCTV HDTV Pro USB device/TT Connect S2-3600
+dvb-usb-rtl28xxu Realtek RTL28xxU DVB USB
+dvb-usb-technisat-usb2 Technisat DVB-S/S2 USB2.0
+dvb-usb-ttusb2 Pinnacle 400e DVB-S USB2.0
+dvb-usb-umt-010 HanfTek UMT-010 DVB-T USB2.0
+dvb_usb_v2 Support for various USB DVB devices v2
+dvb-usb-vp702x TwinhanDTV StarBox and clones DVB-S USB2.0
+dvb-usb-vp7045 TwinhanDTV Alpha/MagicBoxII, DNTV tinyUSB2, Beetle USB2.0
+em28xx Empia EM28xx USB devices
+go7007 WIS GO7007 MPEG encoder
+gspca Drivers for several USB Cameras
+hackrf HackRF
+hdpvr Hauppauge HD PVR
+msi2500 Mirics MSi2500
+mxl111sf-tuner MxL111SF DTV USB2.0
+pvrusb2 Hauppauge WinTV-PVR USB2
+pwc USB Philips Cameras
+s2250 Sensoray 2250/2251
+s2255drv USB Sensoray 2255 video capture device
+smsusb Siano SMS1xxx based MDTV receiver
+ttusb_dec Technotrend/Hauppauge USB DEC devices
+usbtv USBTV007 video capture
+uvcvideo USB Video Class (UVC)
+zd1301 ZyDAS ZD1301
+====================== =========================================================
+
+.. toctree::
+ :maxdepth: 1
+
+ au0828-cardlist
+ cx231xx-cardlist
+ em28xx-cardlist
+ siano-cardlist
+
+ gspca-cardlist
+
+ dvb-usb-dib0700-cardlist
+ dvb-usb-dibusb-mb-cardlist
+ dvb-usb-dibusb-mc-cardlist
+
+ dvb-usb-a800-cardlist
+ dvb-usb-af9005-cardlist
+ dvb-usb-az6027-cardlist
+ dvb-usb-cinergyT2-cardlist
+ dvb-usb-cxusb-cardlist
+ dvb-usb-digitv-cardlist
+ dvb-usb-dtt200u-cardlist
+ dvb-usb-dtv5100-cardlist
+ dvb-usb-dw2102-cardlist
+ dvb-usb-gp8psk-cardlist
+ dvb-usb-m920x-cardlist
+ dvb-usb-nova-t-usb2-cardlist
+ dvb-usb-opera1-cardlist
+ dvb-usb-pctv452e-cardlist
+ dvb-usb-technisat-usb2-cardlist
+ dvb-usb-ttusb2-cardlist
+ dvb-usb-umt-010-cardlist
+ dvb-usb-vp702x-cardlist
+ dvb-usb-vp7045-cardlist
+
+ dvb-usb-af9015-cardlist
+ dvb-usb-af9035-cardlist
+ dvb-usb-anysee-cardlist
+ dvb-usb-au6610-cardlist
+ dvb-usb-az6007-cardlist
+ dvb-usb-ce6230-cardlist
+ dvb-usb-dvbsky-cardlist
+ dvb-usb-ec168-cardlist
+ dvb-usb-gl861-cardlist
+ dvb-usb-lmedm04-cardlist
+ dvb-usb-mxl111sf-cardlist
+ dvb-usb-rtl28xxu-cardlist
+ dvb-usb-zd1301-cardlist
+
+ other-usb-cardlist
diff --git a/Documentation/admin-guide/media/v4l-drivers.rst b/Documentation/admin-guide/media/v4l-drivers.rst
new file mode 100644
index 000000000000..3bac5165b134
--- /dev/null
+++ b/Documentation/admin-guide/media/v4l-drivers.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _uapi-v4l-drivers:
+
+===============================================
+Video4Linux (V4L) driver-specific documentation
+===============================================
+
+.. toctree::
+ :maxdepth: 2
+
+ bttv
+ c3-isp
+ cafe_ccic
+ cx88
+ fimc
+ imx
+ imx7
+ ipu3
+ ipu6-isys
+ ivtv
+ mgb4
+ omap3isp
+ philips
+ qcom_camss
+ raspberrypi-pisp-be
+ rcar-fdp1
+ rkisp1
+ raspberrypi-rp1-cfe
+ saa7134
+ si470x
+ si4713
+ si476x
+ starfive_camss
+ vimc
+ visl
+ vivid
diff --git a/Documentation/admin-guide/media/vimc.dot b/Documentation/admin-guide/media/vimc.dot
new file mode 100644
index 000000000000..92a5bb631235
--- /dev/null
+++ b/Documentation/admin-guide/media/vimc.dot
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{} | Sensor A\n/dev/v4l-subdev0 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port0 -> n00000005:port0 [style=bold]
+ n00000001:port0 -> n0000000b [style=bold]
+ n00000001 -> n00000002
+ n00000002 [label="{{} | Lens A\n/dev/v4l-subdev5 | {<port0>}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000003 [label="{{} | Sensor B\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000003:port0 -> n00000008:port0 [style=bold]
+ n00000003:port0 -> n0000000f [style=bold]
+ n00000003 -> n00000004
+ n00000004 [label="{{} | Lens B\n/dev/v4l-subdev6 | {<port0>}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000005 [label="{{<port0> 0} | Debayer A\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000005:port1 -> n00000015:port0
+ n00000008 [label="{{<port0> 0} | Debayer B\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000008:port1 -> n00000015:port0 [style=dashed]
+ n0000000b [label="Raw Capture 0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000000f [label="Raw Capture 1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000013 [label="{{} | RGB/YUV Input\n/dev/v4l-subdev4 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000013:port0 -> n00000015:port0 [style=dashed]
+ n00000015 [label="{{<port0> 0} | Scaler\n/dev/v4l-subdev5 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000015:port1 -> n00000018 [style=bold]
+ n00000018 [label="RGB/YUV Capture\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+}
diff --git a/Documentation/admin-guide/media/vimc.rst b/Documentation/admin-guide/media/vimc.rst
new file mode 100644
index 000000000000..29d843a8ddb1
--- /dev/null
+++ b/Documentation/admin-guide/media/vimc.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Virtual Media Controller Driver (vimc)
+==========================================
+
+The vimc driver emulates complex video hardware using the V4L2 API and the Media
+API. It has a capture device and three subdevices: sensor, debayer and scaler.
+
+Topology
+--------
+
+The topology is hardcoded, although you could modify it in vimc-core and
+recompile the driver to achieve your own topology. This is the default topology:
+
+.. _vimc_topology_graph:
+
+.. kernel-figure:: vimc.dot
+ :alt: Diagram of the default media pipeline topology
+ :align: center
+
+ Media pipeline graph on vimc
+
+Configuring the topology
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each subdevice will come with its default configuration (pixelformat, height,
+width, ...). One needs to configure the topology in order to match the
+configuration on each linked subdevice to stream frames through the pipeline.
+If the configuration doesn't match, the stream will fail. The ``v4l-utils``
+package is a bundle of user-space applications, that comes with ``media-ctl`` and
+``v4l2-ctl`` that can be used to configure the vimc configuration. This sequence
+of commands fits for the default topology:
+
+.. code-block:: bash
+
+ media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'
+ media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
+ media-ctl -d platform:vimc -V '"Scaler":0[fmt:RGB888_1X24/640x480]'
+ media-ctl -d platform:vimc -V '"Scaler":0[crop:(100,50)/400x150]'
+ media-ctl -d platform:vimc -V '"Scaler":1[fmt:RGB888_1X24/300x700]'
+ v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=300,height=700
+ v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
+
+Subdevices
+----------
+
+Subdevices define the behavior of an entity in the topology. Depending on the
+subdevice, the entity can have multiple pads of type source or sink.
+
+vimc-sensor:
+ Generates images in several formats using video test pattern generator.
+ Exposes:
+
+ * 1 Pad source
+
+vimc-lens:
+ Ancillary lens for a sensor. Supports auto focus control. Linked to
+ a vimc-sensor using an ancillary link. The lens supports FOCUS_ABSOLUTE
+ control.
+
+.. code-block:: bash
+
+ media-ctl -p
+ ...
+ - entity 28: Lens A (0 pad, 0 link)
+ type V4L2 subdev subtype Lens flags 0
+ device node name /dev/v4l-subdev6
+ - entity 29: Lens B (0 pad, 0 link)
+ type V4L2 subdev subtype Lens flags 0
+ device node name /dev/v4l-subdev7
+ v4l2-ctl -d /dev/v4l-subdev7 -C focus_absolute
+ focus_absolute: 0
+
+
+vimc-debayer:
+ Transforms images in bayer format into a non-bayer format.
+ Exposes:
+
+ * 1 Pad sink
+ * 1 Pad source
+
+vimc-scaler:
+ Re-size the image to meet the source pad resolution. E.g.: if the sync
+ pad is configured to 360x480 and the source to 1280x720, the image will
+ be stretched to fit the source resolution. Works for any resolution
+ within the vimc limitations (even shrinking the image if necessary).
+ Exposes:
+
+ * 1 Pad sink
+ * 1 Pad source
+
+vimc-capture:
+ Exposes node /dev/videoX to allow userspace to capture the stream.
+ Exposes:
+
+ * 1 Pad sink
+ * 1 Pad source
+
+Module options
+--------------
+
+Vimc has a module parameter to configure the driver.
+
+* ``allocator=<unsigned int>``
+
+ memory allocator selection, default is 0. It specifies the way buffers
+ will be allocated.
+
+ - 0: vmalloc
+ - 1: dma-contig
diff --git a/Documentation/admin-guide/media/visl.rst b/Documentation/admin-guide/media/visl.rst
new file mode 100644
index 000000000000..cd45145cde68
--- /dev/null
+++ b/Documentation/admin-guide/media/visl.rst
@@ -0,0 +1,185 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Virtual Stateless Decoder Driver (visl)
+===========================================
+
+A virtual stateless decoder device for stateless uAPI development
+purposes.
+
+This tool's objective is to help the development and testing of
+userspace applications that use the V4L2 stateless API to decode media.
+
+A userspace implementation can use visl to run a decoding loop even when
+no hardware is available or when the kernel uAPI for the codec has not
+been upstreamed yet. This can reveal bugs at an early stage.
+
+This driver can also trace the contents of the V4L2 controls submitted
+to it. It can also dump the contents of the vb2 buffers through a
+debugfs interface. This is in many ways similar to the tracing
+infrastructure available for other popular encode/decode APIs out there
+and can help develop a userspace application by using another (working)
+one as a reference.
+
+.. note::
+
+ No actual decoding of video frames is performed by visl. The
+ V4L2 test pattern generator is used to write various debug information
+ to the capture buffers instead.
+
+Module parameters
+-----------------
+
+- visl_debug: Activates debug info, printing various debug messages through
+ dprintk. Also controls whether per-frame debug info is shown. Defaults to off.
+ Note that enabling this feature can result in slow performance through serial.
+
+- visl_transtime_ms: Simulated process time in milliseconds. Slowing down the
+ decoding speed can be useful for debugging.
+
+- visl_dprintk_frame_start, visl_dprintk_frame_nframes: Dictates a range of
+ frames where dprintk is activated. This only controls the dprintk tracing on a
+ per-frame basis. Note that printing a lot of data can be slow through serial.
+
+- keep_bitstream_buffers: Controls whether bitstream (i.e. OUTPUT) buffers are
+ kept after a decoding session. Defaults to false so as to reduce the amount of
+ clutter. keep_bitstream_buffers == false works well when live debugging the
+ client program with GDB.
+
+- bitstream_trace_frame_start, bitstream_trace_nframes: Similar to
+ visl_dprintk_frame_start, visl_dprintk_nframes, but controls the dumping of
+ buffer data through debugfs instead.
+
+- tpg_verbose: Write extra information on each output frame to ease debugging
+ the API. When set to true, the output frames are not stable for a given input
+ as some information like pointers or queue status will be added to them.
+
+What is the default use case for this driver?
+---------------------------------------------
+
+This driver can be used as a way to compare different userspace implementations.
+This assumes that a working client is run against visl and that the ftrace and
+OUTPUT buffer data is subsequently used to debug a work-in-progress
+implementation.
+
+Even though no video decoding is actually done, the output frames can be used
+against a reference for a given input, except if tpg_verbose is set to true.
+
+Depending on the tpg_verbose parameter value, information on reference frames,
+their timestamps, the status of the OUTPUT and CAPTURE queues and more can be
+read directly from the CAPTURE buffers.
+
+Supported codecs
+----------------
+
+The following codecs are supported:
+
+- FWHT
+- MPEG2
+- VP8
+- VP9
+- H.264
+- HEVC
+- AV1
+
+visl trace events
+-----------------
+The trace events are defined on a per-codec basis, e.g.:
+
+.. code-block:: bash
+
+ $ ls /sys/kernel/tracing/events/ | grep visl
+ visl_av1_controls
+ visl_fwht_controls
+ visl_h264_controls
+ visl_hevc_controls
+ visl_mpeg2_controls
+ visl_vp8_controls
+ visl_vp9_controls
+
+For example, in order to dump HEVC SPS data:
+
+.. code-block:: bash
+
+ $ echo 1 > /sys/kernel/tracing/events/visl_hevc_controls/v4l2_ctrl_hevc_sps/enable
+
+The SPS data will be dumped to the trace buffer, i.e.:
+
+.. code-block:: bash
+
+ $ cat /sys/kernel/tracing/trace
+ video_parameter_set_id 0
+ seq_parameter_set_id 0
+ pic_width_in_luma_samples 1920
+ pic_height_in_luma_samples 1080
+ bit_depth_luma_minus8 0
+ bit_depth_chroma_minus8 0
+ log2_max_pic_order_cnt_lsb_minus4 4
+ sps_max_dec_pic_buffering_minus1 6
+ sps_max_num_reorder_pics 2
+ sps_max_latency_increase_plus1 0
+ log2_min_luma_coding_block_size_minus3 0
+ log2_diff_max_min_luma_coding_block_size 3
+ log2_min_luma_transform_block_size_minus2 0
+ log2_diff_max_min_luma_transform_block_size 3
+ max_transform_hierarchy_depth_inter 2
+ max_transform_hierarchy_depth_intra 2
+ pcm_sample_bit_depth_luma_minus1 0
+ pcm_sample_bit_depth_chroma_minus1 0
+ log2_min_pcm_luma_coding_block_size_minus3 0
+ log2_diff_max_min_pcm_luma_coding_block_size 0
+ num_short_term_ref_pic_sets 0
+ num_long_term_ref_pics_sps 0
+ chroma_format_idc 1
+ sps_max_sub_layers_minus1 0
+ flags AMP_ENABLED|SAMPLE_ADAPTIVE_OFFSET|TEMPORAL_MVP_ENABLED|STRONG_INTRA_SMOOTHING_ENABLED
+
+
+Dumping OUTPUT buffer data through debugfs
+------------------------------------------
+
+If the **VISL_DEBUGFS** Kconfig is enabled, visl will populate
+**/sys/kernel/debug/visl/bitstream** with OUTPUT buffer data according to the
+values of bitstream_trace_frame_start and bitstream_trace_nframes. This can
+highlight errors as broken clients may fail to fill the buffers properly.
+
+A single file is created for each processed OUTPUT buffer. Its name contains an
+integer that denotes the buffer sequence, i.e.:
+
+.. code-block:: c
+
+ snprintf(name, 32, "bitstream%d", run->src->sequence);
+
+Dumping the values is simply a matter of reading from the file, i.e.:
+
+For the buffer with sequence == 0:
+
+.. code-block:: bash
+
+ $ xxd /sys/kernel/debug/visl/bitstream/bitstream0
+ 00000000: 2601 af04 d088 bc25 a173 0e41 a4f2 3274 &......%.s.A..2t
+ 00000010: c668 cb28 e775 b4ac f53a ba60 f8fd 3aa1 .h.(.u...:.`..:.
+ 00000020: 46b4 bcfc 506c e227 2372 e5f5 d7ea 579f F...Pl.'#r....W.
+ 00000030: 6371 5eb5 0eb8 23b5 ca6a 5de5 983a 19e4 cq^...#..j]..:..
+ 00000040: e8c3 4320 b4ba a226 cbc1 4138 3a12 32d6 ..C ...&..A8:.2.
+ 00000050: fef3 247b 3523 4e90 9682 ac8e eb0c a389 ..${5#N.........
+ 00000060: ddd0 6cfc 0187 0e20 7aae b15b 1812 3d33 ..l.... z..[..=3
+ 00000070: e1c5 f425 a83a 00b7 4f18 8127 3c4c aefb ...%.:..O..'<L..
+
+For the buffer with sequence == 1:
+
+.. code-block:: bash
+
+ $ xxd /sys/kernel/debug/visl/bitstream/bitstream1
+ 00000000: 0201 d021 49e1 0c40 aa11 1449 14a6 01dc ...!I..@...I....
+ 00000010: 7023 889a c8cd 2cd0 13b4 dab0 e8ca 21fe p#....,.......!.
+ 00000020: c4c8 ab4c 486e 4e2f b0df 96cc c74e 8dde ...LHnN/.....N..
+ 00000030: 8ce7 ee36 d880 4095 4d64 30a0 ff4f 0c5e ...6..@.Md0..O.^
+ 00000040: f16b a6a1 d806 ca2a 0ece a673 7bea 1f37 .k.....*...s{..7
+ 00000050: 370f 5bb9 1dc4 ba21 6434 bc53 0173 cba0 7.[....!d4.S.s..
+ 00000060: dfe6 bc99 01ea b6e0 346b 92b5 c8de 9f5d ........4k.....]
+ 00000070: e7cc 3484 1769 fef2 a693 a945 2c8b 31da ..4..i.....E,.1.
+
+And so on.
+
+By default, the files are removed during STREAMOFF. This is to reduce the amount
+of clutter.
diff --git a/Documentation/media/v4l-drivers/vivid.rst b/Documentation/admin-guide/media/vivid.rst
index 089595ce11c5..034ca7c77fb9 100644
--- a/Documentation/media/v4l-drivers/vivid.rst
+++ b/Documentation/admin-guide/media/vivid.rst
@@ -1,10 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
The Virtual Video Test Driver (vivid)
=====================================
This driver emulates video4linux hardware of various types: video capture, video
-output, vbi capture and output, radio receivers and transmitters and a software
-defined radio receiver. In addition a simple framebuffer device is available for
-testing capture and output overlays.
+output, vbi capture and output, metadata capture and output, radio receivers and
+transmitters, touch capture and a software defined radio receiver. In addition a
+simple framebuffer device is available for testing capture and output overlays.
Up to 64 vivid instances can be created, each with up to 16 inputs and 16 outputs.
@@ -34,6 +36,8 @@ This document describes the features implemented by this driver:
- Radio receiver and transmitter support, including RDS support
- Software defined radio (SDR) support
- Capture and output overlay support
+- Metadata capture and output support
+- Touch capture support
These features will be described in more detail below.
@@ -56,7 +60,7 @@ all configurable using the following module options:
- node_types:
which devices should each driver instance create. An array of
- hexadecimal values, one for each instance. The default is 0x1d3d.
+ hexadecimal values, one for each instance. The default is 0xe1d3d.
Each value is a bitmask with the following meaning:
- bit 0: Video Capture node
@@ -67,6 +71,9 @@ all configurable using the following module options:
- bit 10-11: VBI Output node: 0 = none, 1 = raw vbi, 2 = sliced vbi, 3 = both
- bit 12: Radio Transmitter node
- bit 16: Framebuffer for testing overlays
+ - bit 17: Metadata Capture node
+ - bit 18: Metadata Output node
+ - bit 19: Touch Capture node
So to create four instances, the first two with just one video capture
device, the second two with just one video output device you would pass
@@ -173,6 +180,21 @@ all configurable using the following module options:
give the desired swradioX start number for each SDR capture device.
The default is -1 which will just take the first free number.
+- meta_cap_nr:
+
+ give the desired videoX start number for each metadata capture device.
+ The default is -1 which will just take the first free number.
+
+- meta_out_nr:
+
+ give the desired videoX start number for each metadata output device.
+ The default is -1 which will just take the first free number.
+
+- touch_cap_nr:
+
+ give the desired v4l-touchX start number for each touch capture device.
+ The default is -1 which will just take the first free number.
+
- ccs_cap_mode:
specify the allowed video capture crop/compose/scaling combination
@@ -271,6 +293,24 @@ all configurable using the following module options:
- 0: vmalloc
- 1: dma-contig
+- cache_hints:
+
+ specifies if the device should set queues' user-space cache and memory
+ consistency hint capability (V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS).
+ The hints are valid only when using MMAP streaming I/O. Default is 0.
+
+ - 0: forbid hints
+ - 1: allow hints
+
+- supports_requests:
+
+ specifies if the device should support the Request API. There are
+ three possible values, default is 1:
+
+ - 0: no request
+ - 1: supports requests
+ - 2: requires requests
+
Taken together, all these module options allow you to precisely customize
the driver behavior and test your application with all sorts of permutations.
It is also very suitable to emulate hardware that is not yet available, e.g.
@@ -282,13 +322,13 @@ Video Capture
This is probably the most frequently used feature. The video capture device
can be configured by using the module options num_inputs, input_types and
-ccs_cap_mode (see section 1 for more detailed information), but by default
-four inputs are configured: a webcam, a TV tuner, an S-Video and an HDMI
-input, one input for each input type. Those are described in more detail
-below.
+ccs_cap_mode (see "Configuring the driver" for more detailed information),
+but by default four inputs are configured: a webcam, a TV tuner, an S-Video
+and an HDMI input, one input for each input type. Those are described in more
+detail below.
Special attention has been given to the rate at which new frames become
-available. The jitter will be around 1 jiffie (that depends on the HZ
+available. The jitter will be around 1 jiffy (that depends on the HZ
configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second),
but the long-term behavior is exactly following the framerate. So a
framerate of 59.94 Hz is really different from 60 Hz. If the framerate
@@ -361,7 +401,7 @@ Which one is returned depends on the chosen channel, each next valid channel
will cycle through the possible audio subchannel combinations. This allows
you to test the various combinations by just switching channels..
-Finally, for these inputs the v4l2_timecode struct is filled in in the
+Finally, for these inputs the v4l2_timecode struct is filled in the
dequeued v4l2_buffer struct.
@@ -403,10 +443,10 @@ Video Output
------------
The video output device can be configured by using the module options
-num_outputs, output_types and ccs_out_mode (see section 1 for more detailed
-information), but by default two outputs are configured: an S-Video and an
-HDMI input, one output for each output type. Those are described in more detail
-below.
+num_outputs, output_types and ccs_out_mode (see "Configuring the driver"
+for more detailed information), but by default two outputs are configured:
+an S-Video and an HDMI input, one output for each output type. Those are
+described in more detail below.
Like with video capture the framerate is also exact in the long term.
@@ -545,6 +585,33 @@ The generated data contains the In-phase and Quadrature components of a
1 kHz tone that has an amplitude of sqrt(2).
+Metadata Capture
+----------------
+
+The Metadata capture generates UVC format metadata. The PTS and SCR are
+transmitted based on the values set in vivid controls.
+
+The Metadata device will only work for the Webcam input, it will give
+back an error for all other inputs.
+
+
+Metadata Output
+---------------
+
+The Metadata output can be used to set brightness, contrast, saturation and hue.
+
+The Metadata device will only work for the Webcam output, it will give
+back an error for all other outputs.
+
+
+Touch Capture
+-------------
+
+The Touch capture generates touch patterns simulating single tap, double tap,
+triple tap, move from left to right, zoom in, zoom out, palm press (simulating
+a large area being pressed on a touchpad), and simulating 16 simultaneous
+touch points.
+
Controls
--------
@@ -656,6 +723,20 @@ The Test Pattern Controls are all specific to video capture.
does the same for the EAV (End of Active Video) code.
+- Insert Video Guard Band
+
+ adds 4 columns of pixels with the HDMI Video Guard Band code at the
+ left hand side of the image. This only works with 3 or 4 byte RGB pixel
+ formats. The RGB pixel value 0xab/0x55/0xab turns out to be equivalent
+ to the HDMI Video Guard Band code that precedes each active video line
+ (see section 5.2.2.1 in the HDMI 1.3 Specification). To test if a video
+ receiver has correct HDMI Video Guard Band processing, enable this
+ control and then move the image to the left hand side of the screen.
+ That will result in video lines that start with multiple pixels that
+ have the same value as the Video Guard Band that precedes them.
+ Receivers that will just keep skipping Video Guard Band values will
+ now fail and either loose sync or these video lines will shift.
+
Capture Feature Selection Controls
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1042,36 +1123,45 @@ FM Radio Modulator Controls
to pass the RDS blocks to the driver, or "Controls" where the RDS data
is Provided by the RDS controls mentioned above.
+Metadata Capture Controls
+~~~~~~~~~~~~~~~~~~~~~~~~~~
-Video, VBI and RDS Looping
---------------------------
+- Generate PTS
-The vivid driver supports looping of video output to video input, VBI output
-to VBI input and RDS output to RDS input. For video/VBI looping this emulates
-as if a cable was hooked up between the output and input connector. So video
-and VBI looping is only supported between S-Video and HDMI inputs and outputs.
-VBI is only valid for S-Video as it makes no sense for HDMI.
+ if set, then the generated metadata stream contains Presentation timestamp.
-Since radio is wireless this looping always happens if the radio receiver
-frequency is close to the radio transmitter frequency. In that case the radio
-transmitter will 'override' the emulated radio stations.
+- Generate SCR
-Looping is currently supported only between devices created by the same
-vivid driver instance.
+ if set, then the generated metadata stream contains Source Clock information.
-Video and Sliced VBI looping
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Video, Sliced VBI and HDMI CEC Looping
+--------------------------------------
-The way to enable video/VBI looping is currently fairly crude. A 'Loop Video'
-control is available in the "Vivid" control class of the video
-capture and VBI capture devices. When checked the video looping will be enabled.
-Once enabled any video S-Video or HDMI input will show a static test pattern
-until the video output has started. At that time the video output will be
-looped to the video input provided that:
+Video Looping functionality is supported for devices created by the same
+vivid driver instance, as well as across multiple instances of the vivid driver.
+The vivid driver supports looping of video and Sliced VBI data between an S-Video output
+and an S-Video input. It also supports looping of video and HDMI CEC data between an
+HDMI output and an HDMI input.
-- the input type matches the output type. So the HDMI input cannot receive
- video from the S-Video output.
+To enable looping, set the 'HDMI/S-Video XXX-N Is Connected To' control(s) to select
+whether an input uses the Test Pattern Generator, or is disconnected, or is connected
+to an output. An input can be connected to an output from any vivid instance.
+The inputs and outputs are numbered XXX-N where XXX is the vivid instance number
+(see module option n_devs). If there is only one vivid instance (the default), then
+XXX will be 000. And N is the Nth S-Video/HDMI input or output of that instance.
+If vivid is loaded without module options, then you can connect the S-Video 000-0 input
+to the S-Video 000-0 output, or the HDMI 000-0 input to the HDMI 000-0 output.
+This is the equivalent of connecting or disconnecting a cable between an input and an
+output in a physical device.
+
+If an 'HDMI/S-Video XXX-N Is Connected To' control selected an output, then the video
+output will be looped to the video input provided that:
+
+- the currently selected input matches the input indicated by the control name.
+
+- in the vivid instance of the output connector, the currently selected output matches
+ the output indicated by the control's value.
- the video resolution of the video input must match that of the video output.
So it is not possible to loop a 50 Hz (720x576) S-Video output to a 60 Hz
@@ -1098,6 +1188,8 @@ looped to the video input provided that:
"DV Timings Signal Mode" for the HDMI input should be configured so that a
valid signal is passed to the video input.
+If any condition is not valid, then the 'Noise' test pattern is shown.
+
The framerates do not have to match, although this might change in the future.
By default you will see the OSD text superimposed on top of the looped video.
@@ -1111,17 +1203,26 @@ and WSS (50 Hz formats) VBI data is looped. Teletext VBI data is not looped.
Radio & RDS Looping
-~~~~~~~~~~~~~~~~~~~
-
-As mentioned in section 6 the radio receiver emulates stations are regular
-frequency intervals. Depending on the frequency of the radio receiver a
-signal strength value is calculated (this is returned by VIDIOC_G_TUNER).
-However, it will also look at the frequency set by the radio transmitter and
-if that results in a higher signal strength than the settings of the radio
-transmitter will be used as if it was a valid station. This also includes
-the RDS data (if any) that the transmitter 'transmits'. This is received
-faithfully on the receiver side. Note that when the driver is loaded the
-frequencies of the radio receiver and transmitter are not identical, so
+-------------------
+
+The vivid driver supports looping of RDS output to RDS input.
+
+Since radio is wireless this looping always happens if the radio receiver
+frequency is close to the radio transmitter frequency. In that case the radio
+transmitter will 'override' the emulated radio stations.
+
+RDS looping is currently supported only between devices created by the same
+vivid driver instance.
+
+As mentioned in the "Radio Receiver" section, the radio receiver emulates
+stations at regular frequency intervals. Depending on the frequency of the
+radio receiver a signal strength value is calculated (this is returned by
+VIDIOC_G_TUNER). However, it will also look at the frequency set by the radio
+transmitter and if that results in a higher signal strength than the settings
+of the radio transmitter will be used as if it was a valid station. This also
+includes the RDS data (if any) that the transmitter 'transmits'. This is
+received faithfully on the receiver side. Note that when the driver is loaded
+the frequencies of the radio receiver and transmitter are not identical, so
initially no looping takes place.
@@ -1131,8 +1232,8 @@ Cropping, Composing, Scaling
This driver supports cropping, composing and scaling in any combination. Normally
which features are supported can be selected through the Vivid controls,
but it is also possible to hardcode it when the module is loaded through the
-ccs_cap_mode and ccs_out_mode module options. See section 1 on the details of
-these module options.
+ccs_cap_mode and ccs_out_mode module options. See "Configuring the driver" on
+the details of these module options.
This allows you to test your application for all these variations.
@@ -1173,7 +1274,8 @@ is set, then the alpha component is only used for the color red and set to
The driver has to be configured to support the multiplanar formats. By default
the driver instances are single-planar. This can be changed by setting the
-multiplanar module option, see section 1 for more details on that option.
+multiplanar module option, see "Configuring the driver" for more details on that
+option.
If the driver instance is using the multiplanar formats/API, then the first
single planar format (YUYV) and the multiplanar NV16M and NV61M formats the
@@ -1183,74 +1285,6 @@ data_offset to be non-zero, so this is a useful feature for testing applications
Video output will also honor any data_offset that the application set.
-Capture Overlay
----------------
-
-Note: capture overlay support is implemented primarily to test the existing
-V4L2 capture overlay API. In practice few if any GPUs support such overlays
-anymore, and neither are they generally needed anymore since modern hardware
-is so much more capable. By setting flag 0x10000 in the node_types module
-option the vivid driver will create a simple framebuffer device that can be
-used for testing this API. Whether this API should be used for new drivers is
-questionable.
-
-This driver has support for a destructive capture overlay with bitmap clipping
-and list clipping (up to 16 rectangles) capabilities. Overlays are not
-supported for multiplanar formats. It also honors the struct v4l2_window field
-setting: if it is set to FIELD_TOP or FIELD_BOTTOM and the capture setting is
-FIELD_ALTERNATE, then only the top or bottom fields will be copied to the overlay.
-
-The overlay only works if you are also capturing at that same time. This is a
-vivid limitation since it copies from a buffer to the overlay instead of
-filling the overlay directly. And if you are not capturing, then no buffers
-are available to fill.
-
-In addition, the pixelformat of the capture format and that of the framebuffer
-must be the same for the overlay to work. Otherwise VIDIOC_OVERLAY will return
-an error.
-
-In order to really see what it going on you will need to create two vivid
-instances: the first with a framebuffer enabled. You configure the capture
-overlay of the second instance to use the framebuffer of the first, then
-you start capturing in the second instance. For the first instance you setup
-the output overlay for the video output, turn on video looping and capture
-to see the blended framebuffer overlay that's being written to by the second
-instance. This setup would require the following commands:
-
-.. code-block:: none
-
- $ sudo modprobe vivid n_devs=2 node_types=0x10101,0x1
- $ v4l2-ctl -d1 --find-fb
- /dev/fb1 is the framebuffer associated with base address 0x12800000
- $ sudo v4l2-ctl -d2 --set-fbuf fb=1
- $ v4l2-ctl -d1 --set-fbuf fb=1
- $ v4l2-ctl -d0 --set-fmt-video=pixelformat='AR15'
- $ v4l2-ctl -d1 --set-fmt-video-out=pixelformat='AR15'
- $ v4l2-ctl -d2 --set-fmt-video=pixelformat='AR15'
- $ v4l2-ctl -d0 -i2
- $ v4l2-ctl -d2 -i2
- $ v4l2-ctl -d2 -c horizontal_movement=4
- $ v4l2-ctl -d1 --overlay=1
- $ v4l2-ctl -d1 -c loop_video=1
- $ v4l2-ctl -d2 --stream-mmap --overlay=1
-
-And from another console:
-
-.. code-block:: none
-
- $ v4l2-ctl -d1 --stream-out-mmap
-
-And yet another console:
-
-.. code-block:: none
-
- $ qv4l2
-
-and start streaming.
-
-As you can see, this is not for the faint of heart...
-
-
Output Overlay
--------------
@@ -1309,7 +1343,7 @@ Some Future Improvements
Just as a reminder and in no particular order:
- Add a virtual alsa driver to test audio
-- Add virtual sub-devices and media controller support
+- Add virtual sub-devices
- Some support for testing compressed video
- Add support to loop raw VBI output to raw VBI input
- Add support to loop teletext sliced VBI output to VBI input
@@ -1318,12 +1352,10 @@ Just as a reminder and in no particular order:
- Add ARGB888 overlay support: better testing of the alpha channel
- Improve pixel aspect support in the tpg code by passing a real v4l2_fract
- Use per-queue locks and/or per-device locks to improve throughput
-- Add support to loop from a specific output to a specific input across
- vivid instances
- The SDR radio should use the same 'frequencies' for stations as the normal
radio receiver, and give back noise if the frequency doesn't match up with
a station frequency
- Make a thread for the RDS generation, that would help in particular for the
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
in real-time.
-- Changing the EDID should cause hotplug detect emulation to happen.
+- Changing the EDID doesn't wait 100 ms before setting the HPD signal.
diff --git a/Documentation/admin-guide/media/zoran-cardlist.rst b/Documentation/admin-guide/media/zoran-cardlist.rst
new file mode 100644
index 000000000000..d7fc8bed62ff
--- /dev/null
+++ b/Documentation/admin-guide/media/zoran-cardlist.rst
@@ -0,0 +1,51 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Zoran cards list
+================
+
+.. tabularcolumns:: |p{1.4cm}|p{11.1cm}|p{4.2cm}|
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 19 18
+ :stub-columns: 0
+
+ * - Card number
+ - Card name
+ - PCI subsystem IDs
+
+ * - 0
+ - DC10(old)
+ - <any>
+
+ * - 1
+ - DC10(new)
+ - <any>
+
+ * - 2
+ - DC10_PLUS
+ - 1031:7efe
+
+ * - 3
+ - DC30
+ - <any>
+
+ * - 4
+ - DC30_PLUS
+ - 1031:d801
+
+ * - 5
+ - LML33
+ - <any>
+
+ * - 6
+ - LML33R10
+ - 12f8:8a02
+
+ * - 7
+ - Buz
+ - 13ca:4231
+
+ * - 8
+ - 6-Eyes
+ - <any>
diff --git a/Documentation/admin-guide/mm/cma_debugfs.rst b/Documentation/admin-guide/mm/cma_debugfs.rst
new file mode 100644
index 000000000000..4120e9cb0cd5
--- /dev/null
+++ b/Documentation/admin-guide/mm/cma_debugfs.rst
@@ -0,0 +1,31 @@
+=====================
+CMA Debugfs Interface
+=====================
+
+The CMA debugfs interface is useful to retrieve basic information out of the
+different CMA areas and to test allocation/release in each of the areas.
+
+Each CMA area represents a directory under <debugfs>/cma/, represented by
+its CMA name like below:
+
+ <debugfs>/cma/<cma_name>
+
+The structure of the files created under that directory is as follows:
+
+ - [RO] base_pfn: The base PFN (Page Frame Number) of the CMA area.
+ This is the same as ranges/0/base_pfn.
+ - [RO] count: Amount of memory in the CMA area.
+ - [RO] order_per_bit: Order of pages represented by one bit.
+ - [RO] bitmap: The bitmap of allocated pages in the area.
+ This is the same as ranges/0/base_pfn.
+ - [RO] ranges/N/base_pfn: The base PFN of contiguous range N
+ in the CMA area.
+ - [RO] ranges/N/bitmap: The bit map of allocated pages in
+ range N in the CMA area.
+ - [WO] alloc: Allocate N pages from that CMA area. For example::
+
+ echo 5 > <debugfs>/cma/<cma_name>/alloc
+
+would try to allocate 5 pages from the 'cma_name' area.
+
+ - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst
index 291699c810d4..e796b0a7e4a5 100644
--- a/Documentation/admin-guide/mm/concepts.rst
+++ b/Documentation/admin-guide/mm/concepts.rst
@@ -1,16 +1,14 @@
-.. _mm_concepts:
-
=================
Concepts overview
=================
-The memory management in Linux is complex system that evolved over the
-years and included more and more functionality to support variety of
+The memory management in Linux is a complex system that evolved over the
+years and included more and more functionality to support a variety of
systems from MMU-less microcontrollers to supercomputers. The memory
-management for systems without MMU is called ``nommu`` and it
+management for systems without an MMU is called ``nommu`` and it
definitely deserves a dedicated document, which hopefully will be
eventually written. Yet, although some of the concepts are the same,
-here we assume that MMU is available and CPU can translate a virtual
+here we assume that an MMU is available and a CPU can translate a virtual
address to a physical address.
.. contents:: :local:
@@ -21,10 +19,10 @@ Virtual Memory Primer
The physical memory in a computer system is a limited resource and
even for systems that support memory hotplug there is a hard limit on
the amount of memory that can be installed. The physical memory is not
-necessary contiguous, it might be accessible as a set of distinct
+necessarily contiguous; it might be accessible as a set of distinct
address ranges. Besides, different CPU architectures, and even
-different implementations of the same architecture have different view
-how these address ranges defined.
+different implementations of the same architecture have different views
+of how these address ranges are defined.
All this makes dealing directly with physical memory quite complex and
to avoid this complexity a concept of virtual memory was developed.
@@ -35,7 +33,7 @@ physical memory (demand paging) and provides a mechanism for the
protection and controlled sharing of data between processes.
With virtual memory, each and every memory access uses a virtual
-address. When the CPU decodes the an instruction that reads (or
+address. When the CPU decodes an instruction that reads (or
writes) from (or to) the system memory, it translates the `virtual`
address encoded in that instruction to a `physical` address that the
memory controller can understand.
@@ -48,8 +46,8 @@ appropriate kernel configuration option.
Each physical memory page can be mapped as one or more virtual
pages. These mappings are described by page tables that allow
-translation from virtual address used by programs to real address in
-the physical memory. The page tables organized hierarchically.
+translation from a virtual address used by programs to the physical
+memory address. The page tables are organized hierarchically.
The tables at the lowest level of the hierarchy contain physical
addresses of actual pages used by the software. The tables at higher
@@ -86,16 +84,15 @@ memory with the huge pages. The first one is `HugeTLB filesystem`, or
hugetlbfs. It is a pseudo filesystem that uses RAM as its backing
store. For the files created in this filesystem the data resides in
the memory and mapped using huge pages. The hugetlbfs is described at
-:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`.
+Documentation/admin-guide/mm/hugetlbpage.rst.
Another, more recent, mechanism that enables use of the huge pages is
called `Transparent HugePages`, or THP. Unlike the hugetlbfs that
requires users and/or system administrators to configure what parts of
the system memory should and can be mapped by the huge pages, THP
manages such mappings transparently to the user and hence the
-name. See
-:ref:`Documentation/admin-guide/mm/transhuge.rst <admin_guide_transhuge>`
-for more details about THP.
+name. See Documentation/admin-guide/mm/transhuge.rst for more details
+about THP.
Zones
=====
@@ -121,12 +118,12 @@ Nodes
Many multi-processor machines are NUMA - Non-Uniform Memory Access -
systems. In such systems the memory is arranged into banks that have
different access latency depending on the "distance" from the
-processor. Each bank is referred as `node` and for each node Linux
-constructs an independent memory management subsystem. A node has it's
+processor. Each bank is referred to as a `node` and for each node Linux
+constructs an independent memory management subsystem. A node has its
own set of zones, lists of free and used pages and various statistics
counters. You can find more details about NUMA in
-:ref:`Documentation/vm/numa.rst <numa>` and in
-:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`.
+Documentation/mm/numa.rst` and in
+Documentation/admin-guide/mm/numa_memory_policy.rst.
Page cache
==========
@@ -149,9 +146,9 @@ for program's stack and heap or by explicit calls to mmap(2) system
call. Usually, the anonymous mappings only define virtual memory areas
that the program is allowed to access. The read accesses will result
in creation of a page table entry that references a special physical
-page filled with zeroes. When the program performs a write, regular
+page filled with zeroes. When the program performs a write, a regular
physical page will be allocated to hold the written data. The page
-will be marked dirty and if the kernel will decide to repurpose it,
+will be marked dirty and if the kernel decides to repurpose it,
the dirty page will be swapped out.
Reclaim
@@ -181,16 +178,16 @@ pressure.
The process of freeing the reclaimable physical memory pages and
repurposing them is called (surprise!) `reclaim`. Linux can reclaim
pages either asynchronously or synchronously, depending on the state
-of the system. When system is not loaded, most of the memory is free
-and allocation request will be satisfied immediately from the free
+of the system. When the system is not loaded, most of the memory is free
+and allocation requests will be satisfied immediately from the free
pages supply. As the load increases, the amount of the free pages goes
-down and when it reaches a certain threshold (high watermark), an
+down and when it reaches a certain threshold (low watermark), an
allocation request will awaken the ``kswapd`` daemon. It will
asynchronously scan memory pages and either just free them if the data
they contain is available elsewhere, or evict to the backing storage
device (remember those dirty pages?). As memory usage increases even
more and reaches another threshold - min watermark - an allocation
-will trigger the `direct reclaim`. In this case allocation is stalled
+will trigger `direct reclaim`. In this case allocation is stalled
until enough memory pages are reclaimed to satisfy the request.
Compaction
@@ -200,7 +197,7 @@ As the system runs, tasks allocate and free the memory and it becomes
fragmented. Although with virtual memory it is possible to present
scattered physical pages as virtually contiguous range, sometimes it is
necessary to allocate large physically contiguous memory areas. Such
-need may arise, for instance, when a device driver requires large
+need may arise, for instance, when a device driver requires a large
buffer for DMA, or when THP allocates a huge page. Memory `compaction`
addresses the fragmentation issue. This mechanism moves occupied pages
from the lower part of a memory zone to free pages in the upper part
@@ -208,15 +205,16 @@ of the zone. When a compaction scan is finished free pages are grouped
together at the beginning of the zone and allocations of large
physically contiguous areas become possible.
-Like reclaim, the compaction may happen asynchronously in ``kcompactd``
-daemon or synchronously as a result of memory allocation request.
+Like reclaim, the compaction may happen asynchronously in the ``kcompactd``
+daemon or synchronously as a result of a memory allocation request.
OOM killer
==========
-It may happen, that on a loaded machine memory will be exhausted. When
-the kernel detects that the system runs out of memory (OOM) it invokes
-`OOM killer`. Its mission is simple: all it has to do is to select a
-task to sacrifice for the sake of the overall system health. The
-selected task is killed in a hope that after it exits enough memory
-will be freed to continue normal operation.
+It is possible that on a loaded machine memory will be exhausted and the
+kernel will be unable to reclaim enough memory to continue to operate. In
+order to save the rest of the system, it invokes the `OOM killer`.
+
+The `OOM killer` selects a task to sacrifice for the sake of the overall
+system health. The selected task is killed in a hope that after it exits
+enough memory will be freed to continue normal operation.
diff --git a/Documentation/admin-guide/mm/damon/index.rst b/Documentation/admin-guide/mm/damon/index.rst
new file mode 100644
index 000000000000..3ce3164480c7
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================================
+DAMON: Data Access MONitoring and Access-aware System Operations
+================================================================
+
+:doc:`DAMON </mm/damon/index>` is a Linux kernel subsystem for efficient data
+access monitoring and access-aware system operations.
+
+.. toctree::
+ :maxdepth: 2
+
+ start
+ usage
+ reclaim
+ lru_sort
+ stat
diff --git a/Documentation/admin-guide/mm/damon/lru_sort.rst b/Documentation/admin-guide/mm/damon/lru_sort.rst
new file mode 100644
index 000000000000..7b0775d281b4
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/lru_sort.rst
@@ -0,0 +1,294 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+DAMON-based LRU-lists Sorting
+=============================
+
+DAMON-based LRU-lists Sorting (DAMON_LRU_SORT) is a static kernel module that
+aimed to be used for proactive and lightweight data access pattern based
+(de)prioritization of pages on their LRU-lists for making LRU-lists a more
+trusworthy data access pattern source.
+
+Where Proactive LRU-lists Sorting is Required?
+==============================================
+
+As page-granularity access checking overhead could be significant on huge
+systems, LRU lists are normally not proactively sorted but partially and
+reactively sorted for special events including specific user requests, system
+calls and memory pressure. As a result, LRU lists are sometimes not so
+perfectly prepared to be used as a trustworthy access pattern source for some
+situations including reclamation target pages selection under sudden memory
+pressure.
+
+Because DAMON can identify access patterns of best-effort accuracy while
+inducing only user-specified range of overhead, proactively running
+DAMON_LRU_SORT could be helpful for making LRU lists more trustworthy access
+pattern source with low and controlled overhead.
+
+How It Works?
+=============
+
+DAMON_LRU_SORT finds hot pages (pages of memory regions that showing access
+rates that higher than a user-specified threshold) and cold pages (pages of
+memory regions that showing no access for a time that longer than a
+user-specified threshold) using DAMON, and prioritizes hot pages while
+deprioritizing cold pages on their LRU-lists. To avoid it consuming too much
+CPU for the prioritizations, a CPU time usage limit can be configured. Under
+the limit, it prioritizes and deprioritizes more hot and cold pages first,
+respectively. System administrators can also configure under what situation
+this scheme should automatically activated and deactivated with three memory
+pressure watermarks.
+
+Its default parameters for hotness/coldness thresholds and CPU quota limit are
+conservatively chosen. That is, the module under its default parameters could
+be widely used without harm for common situations while providing a level of
+benefits for systems having clear hot/cold access patterns under memory
+pressure while consuming only a limited small portion of CPU time.
+
+Interface: Module Parameters
+============================
+
+To use this feature, you should first ensure your system is running on a kernel
+that is built with ``CONFIG_DAMON_LRU_SORT=y``.
+
+To let sysadmins enable or disable it and tune for the given system,
+DAMON_LRU_SORT utilizes module parameters. That is, you can put
+``damon_lru_sort.<parameter>=<value>`` on the kernel boot command line or write
+proper values to ``/sys/module/damon_lru_sort/parameters/<parameter>`` files.
+
+Below are the description of each parameter.
+
+enabled
+-------
+
+Enable or disable DAMON_LRU_SORT.
+
+You can enable DAMON_LRU_SORT by setting the value of this parameter as ``Y``.
+Setting it as ``N`` disables DAMON_LRU_SORT. Note that DAMON_LRU_SORT could do
+no real monitoring and LRU-lists sorting due to the watermarks-based activation
+condition. Refer to below descriptions for the watermarks parameter for this.
+
+commit_inputs
+-------------
+
+Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
+
+Input parameters that updated while DAMON_LRU_SORT is running are not applied
+by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT reads values
+of parametrs except ``enabled`` again. Once the re-reading is done, this
+parameter is set as ``N``. If invalid parameters are found while the
+re-reading, DAMON_LRU_SORT will be disabled.
+
+hot_thres_access_freq
+---------------------
+
+Access frequency threshold for hot memory regions identification in permil.
+
+If a memory region is accessed in frequency of this or higher, DAMON_LRU_SORT
+identifies the region as hot, and mark it as accessed on the LRU list, so that
+it could not be reclaimed under memory pressure. 50% by default.
+
+cold_min_age
+------------
+
+Time threshold for cold memory regions identification in microseconds.
+
+If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
+identifies the region as cold, and mark it as unaccessed on the LRU list, so
+that it could be reclaimed first under memory pressure. 120 seconds by
+default.
+
+quota_ms
+--------
+
+Limit of time for trying the LRU lists sorting in milliseconds.
+
+DAMON_LRU_SORT tries to use only up to this time within a time window
+(quota_reset_interval_ms) for trying LRU lists sorting. This can be used
+for limiting CPU consumption of DAMON_LRU_SORT. If the value is zero, the
+limit is disabled.
+
+10 ms by default.
+
+quota_reset_interval_ms
+-----------------------
+
+The time quota charge reset interval in milliseconds.
+
+The charge reset interval for the quota of time (quota_ms). That is,
+DAMON_LRU_SORT does not try LRU-lists sorting for more than quota_ms
+milliseconds or quota_sz bytes within quota_reset_interval_ms milliseconds.
+
+1 second by default.
+
+wmarks_interval
+---------------
+
+The watermarks check time interval in microseconds.
+
+Minimal time to wait before checking the watermarks, when DAMON_LRU_SORT is
+enabled but inactive due to its watermarks rule. 5 seconds by default.
+
+wmarks_high
+-----------
+
+Free memory rate (per thousand) for the high watermark.
+
+If free memory of the system in bytes per thousand bytes is higher than this,
+DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks the
+watermarks. 200 (20%) by default.
+
+wmarks_mid
+----------
+
+Free memory rate (per thousand) for the middle watermark.
+
+If free memory of the system in bytes per thousand bytes is between this and
+the low watermark, DAMON_LRU_SORT becomes active, so starts the monitoring and
+the LRU-lists sorting. 150 (15%) by default.
+
+wmarks_low
+----------
+
+Free memory rate (per thousand) for the low watermark.
+
+If free memory of the system in bytes per thousand bytes is lower than this,
+DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks the
+watermarks. 50 (5%) by default.
+
+sample_interval
+---------------
+
+Sampling interval for the monitoring in microseconds.
+
+The sampling interval of DAMON for the cold memory monitoring. Please refer to
+the DAMON documentation (:doc:`usage`) for more detail. 5ms by default.
+
+aggr_interval
+-------------
+
+Aggregation interval for the monitoring in microseconds.
+
+The aggregation interval of DAMON for the cold memory monitoring. Please
+refer to the DAMON documentation (:doc:`usage`) for more detail. 100ms by
+default.
+
+min_nr_regions
+--------------
+
+Minimum number of monitoring regions.
+
+The minimal number of monitoring regions of DAMON for the cold memory
+monitoring. This can be used to set lower-bound of the monitoring quality.
+But, setting this too high could result in increased monitoring overhead.
+Please refer to the DAMON documentation (:doc:`usage`) for more detail. 10 by
+default.
+
+max_nr_regions
+--------------
+
+Maximum number of monitoring regions.
+
+The maximum number of monitoring regions of DAMON for the cold memory
+monitoring. This can be used to set upper-bound of the monitoring overhead.
+However, setting this too low could result in bad monitoring quality. Please
+refer to the DAMON documentation (:doc:`usage`) for more detail. 1000 by
+defaults.
+
+monitor_region_start
+--------------------
+
+Start of target memory region in physical address.
+
+The start physical address of memory region that DAMON_LRU_SORT will do work
+against. By default, biggest System RAM is used as the region.
+
+monitor_region_end
+------------------
+
+End of target memory region in physical address.
+
+The end physical address of memory region that DAMON_LRU_SORT will do work
+against. By default, biggest System RAM is used as the region.
+
+kdamond_pid
+-----------
+
+PID of the DAMON thread.
+
+If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. Else,
+-1.
+
+nr_lru_sort_tried_hot_regions
+-----------------------------
+
+Number of hot memory regions that tried to be LRU-sorted.
+
+bytes_lru_sort_tried_hot_regions
+--------------------------------
+
+Total bytes of hot memory regions that tried to be LRU-sorted.
+
+nr_lru_sorted_hot_regions
+-------------------------
+
+Number of hot memory regions that successfully be LRU-sorted.
+
+bytes_lru_sorted_hot_regions
+----------------------------
+
+Total bytes of hot memory regions that successfully be LRU-sorted.
+
+nr_hot_quota_exceeds
+--------------------
+
+Number of times that the time quota limit for hot regions have exceeded.
+
+nr_lru_sort_tried_cold_regions
+------------------------------
+
+Number of cold memory regions that tried to be LRU-sorted.
+
+bytes_lru_sort_tried_cold_regions
+---------------------------------
+
+Total bytes of cold memory regions that tried to be LRU-sorted.
+
+nr_lru_sorted_cold_regions
+--------------------------
+
+Number of cold memory regions that successfully be LRU-sorted.
+
+bytes_lru_sorted_cold_regions
+-----------------------------
+
+Total bytes of cold memory regions that successfully be LRU-sorted.
+
+nr_cold_quota_exceeds
+---------------------
+
+Number of times that the time quota limit for cold regions have exceeded.
+
+Example
+=======
+
+Below runtime example commands make DAMON_LRU_SORT to find memory regions
+having >=50% access frequency and LRU-prioritize while LRU-deprioritizing
+memory regions that not accessed for 120 seconds. The prioritization and
+deprioritization is limited to be done using only up to 1% CPU time to avoid
+DAMON_LRU_SORT consuming too much CPU time for the (de)prioritization. It also
+asks DAMON_LRU_SORT to do nothing if the system's free memory rate is more than
+50%, but start the real works if it becomes lower than 40%. If DAMON_RECLAIM
+doesn't make progress and therefore the free memory rate becomes lower than
+20%, it asks DAMON_LRU_SORT to do nothing again, so that we can fall back to
+the LRU-list based page granularity reclamation. ::
+
+ # cd /sys/module/damon_lru_sort/parameters
+ # echo 500 > hot_thres_access_freq
+ # echo 120000000 > cold_min_age
+ # echo 10 > quota_ms
+ # echo 1000 > quota_reset_interval_ms
+ # echo 500 > wmarks_high
+ # echo 400 > wmarks_mid
+ # echo 200 > wmarks_low
+ # echo Y > enabled
diff --git a/Documentation/admin-guide/mm/damon/reclaim.rst b/Documentation/admin-guide/mm/damon/reclaim.rst
new file mode 100644
index 000000000000..af05ae617018
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/reclaim.rst
@@ -0,0 +1,301 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+DAMON-based Reclamation
+=======================
+
+DAMON-based Reclamation (DAMON_RECLAIM) is a static kernel module that aimed to
+be used for proactive and lightweight reclamation under light memory pressure.
+It doesn't aim to replace the LRU-list based page_granularity reclamation, but
+to be selectively used for different level of memory pressure and requirements.
+
+Where Proactive Reclamation is Required?
+========================================
+
+On general memory over-committed systems, proactively reclaiming cold pages
+helps saving memory and reducing latency spikes that incurred by the direct
+reclaim of the process or CPU consumption of kswapd, while incurring only
+minimal performance degradation [1]_ [2]_ .
+
+Free Pages Reporting [3]_ based memory over-commit virtualization systems are
+good example of the cases. In such systems, the guest VMs reports their free
+memory to host, and the host reallocates the reported memory to other guests.
+As a result, the memory of the systems are fully utilized. However, the
+guests could be not so memory-frugal, mainly because some kernel subsystems and
+user-space applications are designed to use as much memory as available. Then,
+guests could report only small amount of memory as free to host, results in
+memory utilization drop of the systems. Running the proactive reclamation in
+guests could mitigate this problem.
+
+How It Works?
+=============
+
+DAMON_RECLAIM finds memory regions that didn't accessed for specific time
+duration and page out. To avoid it consuming too much CPU for the paging out
+operation, a speed limit can be configured. Under the speed limit, it pages
+out memory regions that didn't accessed longer time first. System
+administrators can also configure under what situation this scheme should
+automatically activated and deactivated with three memory pressure watermarks.
+
+Interface: Module Parameters
+============================
+
+To use this feature, you should first ensure your system is running on a kernel
+that is built with ``CONFIG_DAMON_RECLAIM=y``.
+
+To let sysadmins enable or disable it and tune for the given system,
+DAMON_RECLAIM utilizes module parameters. That is, you can put
+``damon_reclaim.<parameter>=<value>`` on the kernel boot command line or write
+proper values to ``/sys/module/damon_reclaim/parameters/<parameter>`` files.
+
+Below are the description of each parameter.
+
+enabled
+-------
+
+Enable or disable DAMON_RECLAIM.
+
+You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
+Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could do
+no real monitoring and reclamation due to the watermarks-based activation
+condition. Refer to below descriptions for the watermarks parameter for this.
+
+commit_inputs
+-------------
+
+Make DAMON_RECLAIM reads the input parameters again, except ``enabled``.
+
+Input parameters that updated while DAMON_RECLAIM is running are not applied
+by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values
+of parametrs except ``enabled`` again. Once the re-reading is done, this
+parameter is set as ``N``. If invalid parameters are found while the
+re-reading, DAMON_RECLAIM will be disabled.
+
+min_age
+-------
+
+Time threshold for cold memory regions identification in microseconds.
+
+If a memory region is not accessed for this or longer time, DAMON_RECLAIM
+identifies the region as cold, and reclaims it.
+
+120 seconds by default.
+
+quota_ms
+--------
+
+Limit of time for the reclamation in milliseconds.
+
+DAMON_RECLAIM tries to use only up to this time within a time window
+(quota_reset_interval_ms) for trying reclamation of cold pages. This can be
+used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, the
+limit is disabled.
+
+10 ms by default.
+
+quota_sz
+--------
+
+Limit of size of memory for the reclamation in bytes.
+
+DAMON_RECLAIM charges amount of memory which it tried to reclaim within a time
+window (quota_reset_interval_ms) and makes no more than this limit is tried.
+This can be used for limiting consumption of CPU and IO. If this value is
+zero, the limit is disabled.
+
+128 MiB by default.
+
+quota_reset_interval_ms
+-----------------------
+
+The time/size quota charge reset interval in milliseconds.
+
+The charget reset interval for the quota of time (quota_ms) and size
+(quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than
+quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms
+milliseconds.
+
+1 second by default.
+
+quota_mem_pressure_us
+---------------------
+
+Desired level of memory pressure-stall time in microseconds.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming this level of
+memory pressure is incurred. System-wide ``some`` memory PSI in microseconds
+per quota reset interval (``quota_reset_interval_ms``) is collected and
+compared to this value to see if the aim is satisfied. Value zero means
+disabling this auto-tuning feature.
+
+Disabled by default.
+
+quota_autotune_feedback
+-----------------------
+
+User-specifiable feedback for auto-tuning of the effective quota.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming receiving this
+feedback of value ``10,000`` from the user. DAMON_RECLAIM assumes the feedback
+value and the quota are positively proportional. Value zero means disabling
+this auto-tuning feature.
+
+Disabled by default.
+
+wmarks_interval
+---------------
+
+Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is
+enabled but inactive due to its watermarks rule.
+
+wmarks_high
+-----------
+
+Free memory rate (per thousand) for the high watermark.
+
+If free memory of the system in bytes per thousand bytes is higher than this,
+DAMON_RECLAIM becomes inactive, so it does nothing but only periodically checks
+the watermarks.
+
+wmarks_mid
+----------
+
+Free memory rate (per thousand) for the middle watermark.
+
+If free memory of the system in bytes per thousand bytes is between this and
+the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring and
+the reclaiming.
+
+wmarks_low
+----------
+
+Free memory rate (per thousand) for the low watermark.
+
+If free memory of the system in bytes per thousand bytes is lower than this,
+DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks the
+watermarks. In the case, the system falls back to the LRU-list based page
+granularity reclamation logic.
+
+sample_interval
+---------------
+
+Sampling interval for the monitoring in microseconds.
+
+The sampling interval of DAMON for the cold memory monitoring. Please refer to
+the DAMON documentation (:doc:`usage`) for more detail.
+
+aggr_interval
+-------------
+
+Aggregation interval for the monitoring in microseconds.
+
+The aggregation interval of DAMON for the cold memory monitoring. Please
+refer to the DAMON documentation (:doc:`usage`) for more detail.
+
+min_nr_regions
+--------------
+
+Minimum number of monitoring regions.
+
+The minimal number of monitoring regions of DAMON for the cold memory
+monitoring. This can be used to set lower-bound of the monitoring quality.
+But, setting this too high could result in increased monitoring overhead.
+Please refer to the DAMON documentation (:doc:`usage`) for more detail.
+
+max_nr_regions
+--------------
+
+Maximum number of monitoring regions.
+
+The maximum number of monitoring regions of DAMON for the cold memory
+monitoring. This can be used to set upper-bound of the monitoring overhead.
+However, setting this too low could result in bad monitoring quality. Please
+refer to the DAMON documentation (:doc:`usage`) for more detail.
+
+monitor_region_start
+--------------------
+
+Start of target memory region in physical address.
+
+The start physical address of memory region that DAMON_RECLAIM will do work
+against. That is, DAMON_RECLAIM will find cold memory regions in this region
+and reclaims. By default, biggest System RAM is used as the region.
+
+monitor_region_end
+------------------
+
+End of target memory region in physical address.
+
+The end physical address of memory region that DAMON_RECLAIM will do work
+against. That is, DAMON_RECLAIM will find cold memory regions in this region
+and reclaims. By default, biggest System RAM is used as the region.
+
+skip_anon
+---------
+
+Skip anonymous pages reclamation.
+
+If this parameter is set as ``Y``, DAMON_RECLAIM does not reclaim anonymous
+pages. By default, ``N``.
+
+
+kdamond_pid
+-----------
+
+PID of the DAMON thread.
+
+If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. Else,
+-1.
+
+nr_reclaim_tried_regions
+------------------------
+
+Number of memory regions that tried to be reclaimed by DAMON_RECLAIM.
+
+bytes_reclaim_tried_regions
+---------------------------
+
+Total bytes of memory regions that tried to be reclaimed by DAMON_RECLAIM.
+
+nr_reclaimed_regions
+--------------------
+
+Number of memory regions that successfully be reclaimed by DAMON_RECLAIM.
+
+bytes_reclaimed_regions
+-----------------------
+
+Total bytes of memory regions that successfully be reclaimed by DAMON_RECLAIM.
+
+nr_quota_exceeds
+----------------
+
+Number of times that the time/space quota limits have exceeded.
+
+Example
+=======
+
+Below runtime example commands make DAMON_RECLAIM to find memory regions that
+not accessed for 30 seconds or more and pages out. The reclamation is limited
+to be done only up to 1 GiB per second to avoid DAMON_RECLAIM consuming too
+much CPU time for the paging out operation. It also asks DAMON_RECLAIM to do
+nothing if the system's free memory rate is more than 50%, but start the real
+works if it becomes lower than 40%. If DAMON_RECLAIM doesn't make progress and
+therefore the free memory rate becomes lower than 20%, it asks DAMON_RECLAIM to
+do nothing again, so that we can fall back to the LRU-list based page
+granularity reclamation. ::
+
+ # cd /sys/module/damon_reclaim/parameters
+ # echo 30000000 > min_age
+ # echo $((1 * 1024 * 1024 * 1024)) > quota_sz
+ # echo 1000 > quota_reset_interval_ms
+ # echo 500 > wmarks_high
+ # echo 400 > wmarks_mid
+ # echo 200 > wmarks_low
+ # echo Y > enabled
+
+.. [1] https://research.google/pubs/pub48551/
+.. [2] https://lwn.net/Articles/787611/
+.. [3] https://www.kernel.org/doc/html/latest/mm/free_page_reporting.html
diff --git a/Documentation/admin-guide/mm/damon/start.rst b/Documentation/admin-guide/mm/damon/start.rst
new file mode 100644
index 000000000000..ec8c34b2d32f
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/start.rst
@@ -0,0 +1,178 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Getting Started
+===============
+
+This document briefly describes how you can use DAMON by demonstrating its
+default user space tool. Please note that this document describes only a part
+of its features for brevity. Please refer to the usage `doc
+<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
+details.
+
+
+Prerequisites
+=============
+
+Kernel
+------
+
+You should first ensure your system is running on a kernel built with
+``CONFIG_DAMON_*=y``.
+
+
+User Space Tool
+---------------
+
+For the demonstration, we will use the default user space tool for DAMON,
+called DAMON Operator (DAMO). It is available at
+https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
+your ``$PATH``. It's not mandatory, though.
+
+Because DAMO is using the sysfs interface (refer to :doc:`usage` for the
+detail) of DAMON, you should ensure :doc:`sysfs </filesystems/sysfs>` is
+mounted.
+
+
+Snapshot Data Access Patterns
+=============================
+
+The commands below show the memory access pattern of a program at the moment of
+the execution. ::
+
+ $ git clone https://github.com/sjp38/masim; cd masim; make
+ $ sudo damo start "./masim ./configs/stairs.cfg --quiet"
+ $ sudo damo report access
+ heatmap: 641111111000000000000000000000000000000000000000000000[...]33333333333333335557984444[...]7
+ # min/max temperatures: -1,840,000,000, 370,010,000, column size: 3.925 MiB
+ 0 addr 86.182 TiB size 8.000 KiB access 0 % age 14.900 s
+ 1 addr 86.182 TiB size 8.000 KiB access 60 % age 0 ns
+ 2 addr 86.182 TiB size 3.422 MiB access 0 % age 4.100 s
+ 3 addr 86.182 TiB size 2.004 MiB access 95 % age 2.200 s
+ 4 addr 86.182 TiB size 29.688 MiB access 0 % age 14.100 s
+ 5 addr 86.182 TiB size 29.516 MiB access 0 % age 16.700 s
+ 6 addr 86.182 TiB size 29.633 MiB access 0 % age 17.900 s
+ 7 addr 86.182 TiB size 117.652 MiB access 0 % age 18.400 s
+ 8 addr 126.990 TiB size 62.332 MiB access 0 % age 9.500 s
+ 9 addr 126.990 TiB size 13.980 MiB access 0 % age 5.200 s
+ 10 addr 126.990 TiB size 9.539 MiB access 100 % age 3.700 s
+ 11 addr 126.990 TiB size 16.098 MiB access 0 % age 6.400 s
+ 12 addr 127.987 TiB size 132.000 KiB access 0 % age 2.900 s
+ total size: 314.008 MiB
+ $ sudo damo stop
+
+The first command of the above example downloads and builds an artificial
+memory access generator program called ``masim``. The second command asks DAMO
+to start the program via the given command and make DAMON monitors the newly
+started process. The third command retrieves the current snapshot of the
+monitored access pattern of the process from DAMON and shows the pattern in a
+human readable format.
+
+The first line of the output shows the relative access temperature (hotness) of
+the regions in a single row hetmap format. Each column on the heatmap
+represents regions of same size on the monitored virtual address space. The
+position of the colun on the row and the number on the column represents the
+relative location and access temperature of the region. ``[...]`` means
+unmapped huge regions on the virtual address spaces. The second line shows
+additional information for better understanding the heatmap.
+
+Each line of the output from the third line shows which virtual address range
+(``addr XX size XX``) of the process is how frequently (``access XX %``)
+accessed for how long time (``age XX``). For example, the evelenth region of
+~9.5 MiB size is being most frequently accessed for last 3.7 seconds. Finally,
+the fourth command stops DAMON.
+
+Note that DAMON can monitor not only virtual address spaces but multiple types
+of address spaces including the physical address space.
+
+
+Recording Data Access Patterns
+==============================
+
+The commands below record the memory access patterns of a program and save the
+monitoring results to a file. ::
+
+ $ ./masim ./configs/zigzag.cfg &
+ $ sudo damo record -o damon.data $(pidof masim)
+
+The line of the commands run the artificial memory access
+generator program again. The generator will repeatedly
+access two 100 MiB sized memory regions one by one. You can substitute this
+with your real workload. The last line asks ``damo`` to record the access
+pattern in the ``damon.data`` file.
+
+
+Visualizing Recorded Patterns
+=============================
+
+You can visualize the pattern in a heatmap, showing which memory region
+(x-axis) got accessed when (y-axis) and how frequently (number).::
+
+ $ sudo damo report heatmap
+ 22222222222222222222222222222222222222211111111111111111111111111111111111111100
+ 44444444444444444444444444444444444444434444444444444444444444444444444444443200
+ 44444444444444444444444444444444444444433444444444444444444444444444444444444200
+ 33333333333333333333333333333333333333344555555555555555555555555555555555555200
+ 33333333333333333333333333333333333344444444444444444444444444444444444444444200
+ 22222222222222222222222222222222222223355555555555555555555555555555555555555200
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ 33333333333333333333333333333333333333355555555555555555555555555555555555555200
+ 88888888888888888888888888888888888888600000000000000000000000000000000000000000
+ 88888888888888888888888888888888888888600000000000000000000000000000000000000000
+ 33333333333333333333333333333333333333444444444444444444444444444444444444443200
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ [...]
+ # access_frequency: 0 1 2 3 4 5 6 7 8 9
+ # x-axis: space (139728247021568-139728453431248: 196.848 MiB)
+ # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s)
+ # resolution: 80x40 (2.461 MiB and 1.758 s for each character)
+
+You can also visualize the distribution of the working set size, sorted by the
+size.::
+
+ $ sudo damo report wss --range 0 101 10
+ # <percentile> <wss>
+ # target_id 18446632103789443072
+ # avr: 107.708 MiB
+ 0 0 B | |
+ 10 95.328 MiB |**************************** |
+ 20 95.332 MiB |**************************** |
+ 30 95.340 MiB |**************************** |
+ 40 95.387 MiB |**************************** |
+ 50 95.387 MiB |**************************** |
+ 60 95.398 MiB |**************************** |
+ 70 95.398 MiB |**************************** |
+ 80 95.504 MiB |**************************** |
+ 90 190.703 MiB |********************************************************* |
+ 100 196.875 MiB |***********************************************************|
+
+Using ``--sortby`` option with the above command, you can show how the working
+set size has chronologically changed.::
+
+ $ sudo damo report wss --range 0 101 10 --sortby time
+ # <percentile> <wss>
+ # target_id 18446632103789443072
+ # avr: 107.708 MiB
+ 0 3.051 MiB | |
+ 10 190.703 MiB |***********************************************************|
+ 20 95.336 MiB |***************************** |
+ 30 95.328 MiB |***************************** |
+ 40 95.387 MiB |***************************** |
+ 50 95.332 MiB |***************************** |
+ 60 95.320 MiB |***************************** |
+ 70 95.398 MiB |***************************** |
+ 80 95.398 MiB |***************************** |
+ 90 95.340 MiB |***************************** |
+ 100 95.398 MiB |***************************** |
+
+
+Data Access Pattern Aware Memory Management
+===========================================
+
+Below command makes every memory region of size >=4K that has not accessed for
+>=60 seconds in your workload to be swapped out. ::
+
+ $ sudo damo start --damos_access_rate 0 0 --damos_sz_region 4K max \
+ --damos_age 60s max --damos_action pageout \
+ --target_pid <pid of your workload>
diff --git a/Documentation/admin-guide/mm/damon/stat.rst b/Documentation/admin-guide/mm/damon/stat.rst
new file mode 100644
index 000000000000..4c517c2c219a
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/stat.rst
@@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Data Access Monitoring Results Stat
+===================================
+
+Data Access Monitoring Results Stat (DAMON_STAT) is a static kernel module that
+is aimed to be used for simple access pattern monitoring. It monitors accesses
+on the system's entire physical memory using DAMON, and provides simplified
+access monitoring results statistics, namely idle time percentiles and
+estimated memory bandwidth.
+
+Monitoring Accuracy and Overhead
+================================
+
+DAMON_STAT uses monitoring intervals :ref:`auto-tuning
+<damon_design_monitoring_intervals_autotuning>` to make its accuracy high and
+overhead minimum. It auto-tunes the intervals aiming 4 % of observable access
+events to be captured in each snapshot, while limiting the resulting sampling
+events to be 5 milliseconds in minimum and 10 seconds in maximum. On a few
+production server systems, it resulted in consuming only 0.x % single CPU time,
+while capturing reasonable quality of access patterns.
+
+Interface: Module Parameters
+============================
+
+To use this feature, you should first ensure your system is running on a kernel
+that is built with ``CONFIG_DAMON_STAT=y``. The feature can be enabled by
+default at build time, by setting ``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` true.
+
+To let sysadmins enable or disable it at boot and/or runtime, and read the
+monitoring results, DAMON_STAT provides module parameters. Following
+sections are descriptions of the parameters.
+
+enabled
+-------
+
+Enable or disable DAMON_STAT.
+
+You can enable DAMON_STAT by setting the value of this parameter as ``Y``.
+Setting it as ``N`` disables DAMON_STAT. The default value is set by
+``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` build config option.
+
+estimated_memory_bandwidth
+--------------------------
+
+Estimated memory bandwidth consumption (bytes per second) of the system.
+
+DAMON_STAT reads observed access events on the current DAMON results snapshot
+and converts it to memory bandwidth consumption estimation in bytes per second.
+The resulting metric is exposed to user via this read-only parameter. Because
+DAMON uses sampling, this is only an estimation of the access intensity rather
+than accurate memory bandwidth.
+
+memory_idle_ms_percentiles
+--------------------------
+
+Per-byte idle time (milliseconds) percentiles of the system.
+
+DAMON_STAT calculates how long each byte of the memory was not accessed until
+now (idle time), based on the current DAMON results snapshot. If DAMON found a
+region of access frequency (nr_accesses) larger than zero, every byte of the
+region gets zero idle time. If a region has zero access frequency
+(nr_accesses), how long the region was keeping the zero access frequency (age)
+becomes the idle time of every byte of the region. Then, DAMON_STAT exposes
+the percentiles of the idle time values via this read-only parameter. Reading
+the parameter returns 101 idle time values in milliseconds, separated by comma.
+Each value represents 0-th, 1st, 2nd, 3rd, ..., 99th and 100th percentile idle
+times.
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
new file mode 100644
index 000000000000..eae534bc1bee
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -0,0 +1,671 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Detailed Usages
+===============
+
+DAMON provides below interfaces for different users.
+
+- *DAMON user space tool.*
+ `This <https://github.com/damonitor/damo>`_ is for privileged people such as
+ system administrators who want a just-working human-friendly interface.
+ Using this, users can use the DAMON’s major features in a human-friendly way.
+ It may not be highly tuned for special cases, though. For more detail,
+ please refer to its `usage document
+ <https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
+- *sysfs interface.*
+ :ref:`This <sysfs_interface>` is for privileged user space programmers who
+ want more optimized use of DAMON. Using this, users can use DAMON’s major
+ features by reading from and writing to special sysfs files. Therefore,
+ you can write and use your personalized DAMON sysfs wrapper programs that
+ reads/writes the sysfs files instead of you. The `DAMON user space tool
+ <https://github.com/damonitor/damo>`_ is one example of such programs.
+- *Kernel Space Programming Interface.*
+ :doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
+ users can utilize every feature of DAMON most flexibly and efficiently by
+ writing kernel space DAMON application programs for you. You can even extend
+ DAMON for various address spaces. For detail, please refer to the interface
+ :doc:`document </mm/damon/api>`.
+
+.. _sysfs_interface:
+
+sysfs Interface
+===============
+
+DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
+creates multiple directories and files under its sysfs directory,
+``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
+from the files under the directory.
+
+For a short example, users can monitor the virtual address space of a given
+workload as below. ::
+
+ # cd /sys/kernel/mm/damon/admin/
+ # echo 1 > kdamonds/nr_kdamonds && echo 1 > kdamonds/0/contexts/nr_contexts
+ # echo vaddr > kdamonds/0/contexts/0/operations
+ # echo 1 > kdamonds/0/contexts/0/targets/nr_targets
+ # echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid_target
+ # echo on > kdamonds/0/state
+
+Files Hierarchy
+---------------
+
+The files hierarchy of DAMON sysfs interface is shown below. In the below
+figure, parents-children relations are represented with indentations, each
+directory is having ``/`` suffix, and files in each directory are separated by
+comma (",").
+
+.. parsed-literal::
+
+ :ref:`/sys/kernel/mm/damon <sysfs_root>`/admin
+ │ :ref:`kdamonds <sysfs_kdamonds>`/nr_kdamonds
+ │ │ :ref:`0 <sysfs_kdamond>`/state,pid,refresh_ms
+ │ │ │ :ref:`contexts <sysfs_contexts>`/nr_contexts
+ │ │ │ │ :ref:`0 <sysfs_context>`/avail_operations,operations,addr_unit
+ │ │ │ │ │ :ref:`monitoring_attrs <sysfs_monitoring_attrs>`/
+ │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
+ │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us
+ │ │ │ │ │ │ nr_regions/min,max
+ │ │ │ │ │ :ref:`targets <sysfs_targets>`/nr_targets
+ │ │ │ │ │ │ :ref:`0 <sysfs_target>`/pid_target
+ │ │ │ │ │ │ │ :ref:`regions <sysfs_regions>`/nr_regions
+ │ │ │ │ │ │ │ │ :ref:`0 <sysfs_region>`/start,end
+ │ │ │ │ │ │ │ │ ...
+ │ │ │ │ │ │ ...
+ │ │ │ │ │ :ref:`schemes <sysfs_schemes>`/nr_schemes
+ │ │ │ │ │ │ :ref:`0 <sysfs_scheme>`/action,target_nid,apply_interval_us
+ │ │ │ │ │ │ │ :ref:`access_pattern <sysfs_access_pattern>`/
+ │ │ │ │ │ │ │ │ sz/min,max
+ │ │ │ │ │ │ │ │ nr_accesses/min,max
+ │ │ │ │ │ │ │ │ age/min,max
+ │ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes
+ │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
+ │ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
+ │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value,nid
+ │ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
+ │ │ │ │ │ │ │ :ref:`{core_,ops_,}filters <sysfs_filters>`/nr_filters
+ │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max
+ │ │ │ │ │ │ │ :ref:`dests <damon_sysfs_dests>`/nr_dests
+ │ │ │ │ │ │ │ │ 0/id,weight
+ │ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds
+ │ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
+ │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed
+ │ │ │ │ │ │ │ │ ...
+ │ │ │ │ │ │ ...
+ │ │ │ │ ...
+ │ │ ...
+
+.. _sysfs_root:
+
+Root
+----
+
+The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
+has one directory named ``admin``. The directory contains the files for
+privileged user space programs' control of DAMON. User space tools or daemons
+having the root permission could use this directory.
+
+.. _sysfs_kdamonds:
+
+kdamonds/
+---------
+
+Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
+controlling the kdamonds (refer to
+:ref:`design <damon_design_execution_model_and_data_structures>` for more
+details) exists. In the beginning, this directory has only one file,
+``nr_kdamonds``. Writing a number (``N``) to the file creates the number of
+child directories named ``0`` to ``N-1``. Each directory represents each
+kdamond.
+
+.. _sysfs_kdamond:
+
+kdamonds/<N>/
+-------------
+
+In each kdamond directory, three files (``state``, ``pid`` and ``refresh_ms``)
+and one directory (``contexts``) exist.
+
+Reading ``state`` returns ``on`` if the kdamond is currently running, or
+``off`` if it is not running.
+
+Users can write below commands for the kdamond to the ``state`` file.
+
+- ``on``: Start running.
+- ``off``: Stop running.
+- ``commit``: Read the user inputs in the sysfs files except ``state`` file
+ again.
+- ``update_tuned_intervals``: Update the contents of ``sample_us`` and
+ ``aggr_us`` files of the kdamond with the auto-tuning applied ``sampling
+ interval`` and ``aggregation interval`` for the files. Please refer to
+ :ref:`intervals_goal section <damon_usage_sysfs_monitoring_intervals_goal>`
+ for more details.
+- ``commit_schemes_quota_goals``: Read the DAMON-based operation schemes'
+ :ref:`quota goals <sysfs_schemes_quota_goals>`.
+- ``update_schemes_stats``: Update the contents of stats files for each
+ DAMON-based operation scheme of the kdamond. For details of the stats,
+ please refer to :ref:`stats section <sysfs_schemes_stats>`.
+- ``update_schemes_tried_regions``: Update the DAMON-based operation scheme
+ action tried regions directory for each DAMON-based operation scheme of the
+ kdamond. For details of the DAMON-based operation scheme action tried
+ regions directory, please refer to
+ :ref:`tried_regions section <sysfs_schemes_tried_regions>`.
+- ``update_schemes_tried_bytes``: Update only ``.../tried_regions/total_bytes``
+ files.
+- ``clear_schemes_tried_regions``: Clear the DAMON-based operating scheme
+ action tried regions directory for each DAMON-based operation scheme of the
+ kdamond.
+- ``update_schemes_effective_quotas``: Update the contents of
+ ``effective_bytes`` files for each DAMON-based operation scheme of the
+ kdamond. For more details, refer to :ref:`quotas directory <sysfs_quotas>`.
+
+If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
+
+Users can ask the kernel to periodically update files showing auto-tuned
+parameters and DAMOS stats instead of manually writing
+``update_tuned_intervals`` like keywords to ``state`` file. For this, users
+should write the desired update time interval in milliseconds to ``refresh_ms``
+file. If the interval is zero, the periodic update is disabled. Reading the
+file shows currently set time interval.
+
+``contexts`` directory contains files for controlling the monitoring contexts
+that this kdamond will execute.
+
+.. _sysfs_contexts:
+
+kdamonds/<N>/contexts/
+----------------------
+
+In the beginning, this directory has only one file, ``nr_contexts``. Writing a
+number (``N``) to the file creates the number of child directories named as
+``0`` to ``N-1``. Each directory represents each monitoring context (refer to
+:ref:`design <damon_design_execution_model_and_data_structures>` for more
+details). At the moment, only one context per kdamond is supported, so only
+``0`` or ``1`` can be written to the file.
+
+.. _sysfs_context:
+
+contexts/<N>/
+-------------
+
+In each context directory, three files (``avail_operations``, ``operations``
+and ``addr_unit``) and three directories (``monitoring_attrs``, ``targets``,
+and ``schemes``) exist.
+
+DAMON supports multiple types of :ref:`monitoring operations
+<damon_design_configurable_operations_set>`, including those for virtual address
+space and the physical address space. You can get the list of available
+monitoring operations set on the currently running kernel by reading
+``avail_operations`` file. Based on the kernel configuration, the file will
+list different available operation sets. Please refer to the :ref:`design
+<damon_operations_set>` for the list of all available operation sets and their
+brief explanations.
+
+You can set and get what type of monitoring operations DAMON will use for the
+context by writing one of the keywords listed in ``avail_operations`` file and
+reading from the ``operations`` file.
+
+``addr_unit`` file is for setting and getting the :ref:`address unit
+<damon_design_addr_unit>` parameter of the operations set.
+
+.. _sysfs_monitoring_attrs:
+
+contexts/<N>/monitoring_attrs/
+------------------------------
+
+Files for specifying attributes of the monitoring including required quality
+and efficiency of the monitoring are in ``monitoring_attrs`` directory.
+Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
+directory.
+
+Under ``intervals`` directory, three files for DAMON's sampling interval
+(``sample_us``), aggregation interval (``aggr_us``), and update interval
+(``update_us``) exist. You can set and get the values in micro-seconds by
+writing to and reading from the files.
+
+Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
+of DAMON's monitoring regions (``min`` and ``max``, respectively), which
+controls the monitoring overhead, exist. You can set and get the values by
+writing to and rading from the files.
+
+For more details about the intervals and monitoring regions range, please refer
+to the Design document (:doc:`/mm/damon/design`).
+
+.. _damon_usage_sysfs_monitoring_intervals_goal:
+
+contexts/<N>/monitoring_attrs/intervals/intervals_goal/
+-------------------------------------------------------
+
+Under the ``intervals`` directory, one directory for automated tuning of
+``sample_us`` and ``aggr_us``, namely ``intervals_goal`` directory also exists.
+Under the directory, four files for the auto-tuning control, namely
+``access_bp``, ``aggrs``, ``min_sample_us`` and ``max_sample_us`` exist.
+Please refer to the :ref:`design document of the feature
+<damon_design_monitoring_intervals_autotuning>` for the internal of the tuning
+mechanism. Reading and writing the four files under ``intervals_goal``
+directory shows and updates the tuning parameters that described in the
+:ref:design doc <damon_design_monitoring_intervals_autotuning>` with the same
+names. The tuning starts with the user-set ``sample_us`` and ``aggr_us``. The
+tuning-applied current values of the two intervals can be read from the
+``sample_us`` and ``aggr_us`` files after writing ``update_tuned_intervals`` to
+the ``state`` file.
+
+.. _sysfs_targets:
+
+contexts/<N>/targets/
+---------------------
+
+In the beginning, this directory has only one file, ``nr_targets``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each monitoring target.
+
+.. _sysfs_target:
+
+targets/<N>/
+------------
+
+In each target directory, one file (``pid_target``) and one directory
+(``regions``) exist.
+
+If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
+be a process. You can specify the process to DAMON by writing the pid of the
+process to the ``pid_target`` file.
+
+.. _sysfs_regions:
+
+targets/<N>/regions
+-------------------
+
+In case of ``fvaddr`` or ``paddr`` monitoring operations sets, users are
+required to set the monitoring target address ranges. In case of ``vaddr``
+operations set, it is not mandatory, but users can optionally set the initial
+monitoring region to specific address ranges. Please refer to the :ref:`design
+<damon_design_vaddr_target_regions_construction>` for more details.
+
+For such cases, users can explicitly set the initial monitoring target regions
+as they want, by writing proper values to the files under this directory.
+
+In the beginning, this directory has only one file, ``nr_regions``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each initial monitoring target region.
+
+.. _sysfs_region:
+
+regions/<N>/
+------------
+
+In each region directory, you will find two files (``start`` and ``end``). You
+can set and get the start and end addresses of the initial monitoring target
+region by writing to and reading from the files, respectively.
+
+Each region should not overlap with others. ``end`` of directory ``N`` should
+be equal or smaller than ``start`` of directory ``N+1``.
+
+.. _sysfs_schemes:
+
+contexts/<N>/schemes/
+---------------------
+
+The directory for DAMON-based Operation Schemes (:ref:`DAMOS
+<damon_design_damos>`). Users can get and set the schemes by reading from and
+writing to files under this directory.
+
+In the beginning, this directory has only one file, ``nr_schemes``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each DAMON-based operation scheme.
+
+.. _sysfs_scheme:
+
+schemes/<N>/
+------------
+
+In each scheme directory, eight directories (``access_pattern``, ``quotas``,
+``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``dests``,
+``stats``, and ``tried_regions``) and three files (``action``, ``target_nid``
+and ``apply_interval``) exist.
+
+The ``action`` file is for setting and getting the scheme's :ref:`action
+<damon_design_damos_action>`. The keywords that can be written to and read
+from the file and their meaning are same to those of the list on
+:ref:`design doc <damon_design_damos_action>`.
+
+The ``target_nid`` file is for setting the migration target node, which is
+only meaningful when the ``action`` is either ``migrate_hot`` or
+``migrate_cold``.
+
+The ``apply_interval_us`` file is for setting and getting the scheme's
+:ref:`apply_interval <damon_design_damos>` in microseconds.
+
+.. _sysfs_access_pattern:
+
+schemes/<N>/access_pattern/
+---------------------------
+
+The directory for the target access :ref:`pattern
+<damon_design_damos_access_pattern>` of the given DAMON-based operation scheme.
+
+Under the ``access_pattern`` directory, three directories (``sz``,
+``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
+exist. You can set and get the access pattern for the given scheme by writing
+to and reading from the ``min`` and ``max`` files under ``sz``,
+``nr_accesses``, and ``age`` directories, respectively. Note that the ``min``
+and the ``max`` form a closed interval.
+
+.. _sysfs_quotas:
+
+schemes/<N>/quotas/
+-------------------
+
+The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
+DAMON-based operation scheme.
+
+Under ``quotas`` directory, four files (``ms``, ``bytes``,
+``reset_interval_ms``, ``effective_bytes``) and two directories (``weights`` and
+``goals``) exist.
+
+You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
+``reset interval`` in milliseconds by writing the values to the three files,
+respectively. Then, DAMON tries to use only up to ``time quota`` milliseconds
+for applying the ``action`` to memory regions of the ``access_pattern``, and to
+apply the action to only up to ``bytes`` bytes of memory regions within the
+``reset_interval_ms``. Setting both ``ms`` and ``bytes`` zero disables the
+quota limits unless at least one :ref:`goal <sysfs_schemes_quota_goals>` is
+set.
+
+The time quota is internally transformed to a size quota. Between the
+transformed size quota and user-specified size quota, smaller one is applied.
+Based on the user-specified :ref:`goal <sysfs_schemes_quota_goals>`, the
+effective size quota is further adjusted. Reading ``effective_bytes`` returns
+the current effective size quota. The file is not updated in real time, so
+users should ask DAMON sysfs interface to update the content of the file for
+the stats by writing a special keyword, ``update_schemes_effective_quotas`` to
+the relevant ``kdamonds/<N>/state`` file.
+
+Under ``weights`` directory, three files (``sz_permil``,
+``nr_accesses_permil``, and ``age_permil``) exist.
+You can set the :ref:`prioritization weights
+<damon_design_damos_quotas_prioritization>` for size, access frequency, and age
+in per-thousand unit by writing the values to the three files under the
+``weights`` directory.
+
+.. _sysfs_schemes_quota_goals:
+
+schemes/<N>/quotas/goals/
+-------------------------
+
+The directory for the :ref:`automatic quota tuning goals
+<damon_design_damos_quotas_auto_tuning>` of the given DAMON-based operation
+scheme.
+
+In the beginning, this directory has only one file, ``nr_goals``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each goal and current achievement.
+Among the multiple feedback, the best one is used.
+
+Each goal directory contains four files, namely ``target_metric``,
+``target_value``, ``current_value`` and ``nid``. Users can set and get the
+four parameters for the quota auto-tuning goals that specified on the
+:ref:`design doc <damon_design_damos_quotas_auto_tuning>` by writing to and
+reading from each of the files. Note that users should further write
+``commit_schemes_quota_goals`` to the ``state`` file of the :ref:`kdamond
+directory <sysfs_kdamond>` to pass the feedback to DAMON.
+
+.. _sysfs_watermarks:
+
+schemes/<N>/watermarks/
+-----------------------
+
+The directory for the :ref:`watermarks <damon_design_damos_watermarks>` of the
+given DAMON-based operation scheme.
+
+Under the watermarks directory, five files (``metric``, ``interval_us``,
+``high``, ``mid``, and ``low``) for setting the metric, the time interval
+between check of the metric, and the three watermarks exist. You can set and
+get the five values by writing to the files, respectively.
+
+Keywords and meanings of those that can be written to the ``metric`` file are
+as below.
+
+ - none: Ignore the watermarks
+ - free_mem_rate: System's free memory rate (per thousand)
+
+The ``interval`` should written in microseconds unit.
+
+.. _sysfs_filters:
+
+schemes/<N>/{core\_,ops\_,}filters/
+-----------------------------------
+
+Directories for :ref:`filters <damon_design_damos_filters>` of the given
+DAMON-based operation scheme.
+
+``core_filters`` and ``ops_filters`` directories are for the filters handled by
+the DAMON core layer and operations set layer, respectively. ``filters``
+directory can be used for installing filters regardless of their handled
+layers. Filters that requested by ``core_filters`` and ``ops_filters`` will be
+installed before those of ``filters``. All three directories have same files.
+
+Use of ``filters`` directory can make expecting evaluation orders of given
+filters with the files under directory bit confusing. Users are hence
+recommended to use ``core_filters`` and ``ops_filters`` directories. The
+``filters`` directory could be deprecated in future.
+
+In the beginning, the directory has only one file, ``nr_filters``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each filter. The filters are evaluated
+in the numeric order.
+
+Each filter directory contains nine files, namely ``type``, ``matching``,
+``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max``
+and ``target_idx``. To ``type`` file, you can write the type of the filter.
+Refer to :ref:`the design doc <damon_design_damos_filters>` for available type
+names, their meaning and on what layer those are handled.
+
+For ``memcg`` type, you can specify the memory cgroup of the interest by
+writing the path of the memory cgroup from the cgroups mount point to
+``memcg_path`` file. For ``addr`` type, you can specify the start and end
+address of the range (open-ended interval) to ``addr_start`` and ``addr_end``
+files, respectively. For ``hugepage_size`` type, you can specify the minimum
+and maximum size of the range (closed interval) to ``min`` and ``max`` files,
+respectively. For ``target`` type, you can specify the index of the target
+between the list of the DAMON context's monitoring targets list to
+``target_idx`` file.
+
+You can write ``Y`` or ``N`` to ``matching`` file to specify whether the filter
+is for memory that matches the ``type``. You can write ``Y`` or ``N`` to
+``allow`` file to specify if applying the action to the memory that satisfies
+the ``type`` and ``matching`` should be allowed or not.
+
+For example, below restricts a DAMOS action to be applied to only non-anonymous
+pages of all memory cgroups except ``/having_care_already``.::
+
+ # cd ops_filters/0/
+ # echo 2 > nr_filters
+ # # disallow anonymous pages
+ echo anon > 0/type
+ echo Y > 0/matching
+ echo N > 0/allow
+ # # further filter out all cgroups except one at '/having_care_already'
+ echo memcg > 1/type
+ echo /having_care_already > 1/memcg_path
+ echo Y > 1/matching
+ echo N > 1/allow
+
+Refer to the :ref:`DAMOS filters design documentation
+<damon_design_damos_filters>` for more details including how multiple filters
+of different ``allow`` works, when each of the filters are supported, and
+differences on stats.
+
+.. _damon_sysfs_dests:
+
+schemes/<N>/dests/
+------------------
+
+Directory for specifying the destinations of given DAMON-based operation
+scheme's action. This directory is ignored if the action of the given scheme
+is not supporting multiple destinations. Only ``DAMOS_MIGRATE_{HOT,COLD}``
+actions are supporting multiple destinations.
+
+In the beginning, the directory has only one file, ``nr_dests``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each action destination.
+
+Each destination directory contains two files, namely ``id`` and ``weight``.
+Users can write and read the identifier of the destination to ``id`` file.
+For ``DAMOS_MIGRATE_{HOT,COLD}`` actions, the migrate destination node's node
+id should be written to ``id`` file. Users can write and read the weight of
+the destination among the given destinations to the ``weight`` file. The
+weight can be an arbitrary integer. When DAMOS apply the action to each entity
+of the memory region, it will select the destination of the action based on the
+relative weights of the destinations.
+
+.. _sysfs_schemes_stats:
+
+schemes/<N>/stats/
+------------------
+
+DAMON counts statistics for each scheme. This statistics can be used for
+online analysis or tuning of the schemes. Refer to :ref:`design doc
+<damon_design_damos_stat>` for more details about the stats.
+
+The statistics can be retrieved by reading the files under ``stats`` directory
+(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``,
+``sz_ops_filter_passed``, and ``qt_exceeds``), respectively. The files are not
+updated in real time, so you should ask DAMON sysfs interface to update the
+content of the files for the stats by writing a special keyword,
+``update_schemes_stats`` to the relevant ``kdamonds/<N>/state`` file.
+
+.. _sysfs_schemes_tried_regions:
+
+schemes/<N>/tried_regions/
+--------------------------
+
+This directory initially has one file, ``total_bytes``.
+
+When a special keyword, ``update_schemes_tried_regions``, is written to the
+relevant ``kdamonds/<N>/state`` file, DAMON updates the ``total_bytes`` file so
+that reading it returns the total size of the scheme tried regions, and creates
+directories named integer starting from ``0`` under this directory. Each
+directory contains files exposing detailed information about each of the memory
+region that the corresponding scheme's ``action`` has tried to be applied under
+this directory, during next :ref:`apply interval <damon_design_damos>` of the
+corresponding scheme. The information includes address range, ``nr_accesses``,
+and ``age`` of the region.
+
+Writing ``update_schemes_tried_bytes`` to the relevant ``kdamonds/<N>/state``
+file will only update the ``total_bytes`` file, and will not create the
+subdirectories.
+
+The directories will be removed when another special keyword,
+``clear_schemes_tried_regions``, is written to the relevant
+``kdamonds/<N>/state`` file.
+
+The expected usage of this directory is investigations of schemes' behaviors,
+and query-like efficient data access monitoring results retrievals. For the
+latter use case, in particular, users can set the ``action`` as ``stat`` and
+set the ``access pattern`` as their interested pattern that they want to query.
+
+.. _sysfs_schemes_tried_region:
+
+tried_regions/<N>/
+------------------
+
+In each region directory, you will find five files (``start``, ``end``,
+``nr_accesses``, ``age``, and ``sz_filter_passed``). Reading the files will
+show the properties of the region that corresponding DAMON-based operation
+scheme ``action`` has tried to be applied.
+
+Example
+~~~~~~~
+
+Below commands applies a scheme saying "If a memory region of size in [4KiB,
+8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
+interval in [10, 20], page out the region. For the paging out, use only up to
+10ms per second, and also don't page out more than 1GiB per second. Under the
+limitation, page out memory regions having longer age first. Also, check the
+free memory rate of the system every 5 seconds, start the monitoring and paging
+out when the free memory rate becomes lower than 50%, but stop it if the free
+memory rate becomes larger than 60%, or lower than 30%". ::
+
+ # cd <sysfs>/kernel/mm/damon/admin
+ # # populate directories
+ # echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
+ # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
+ # cd kdamonds/0/contexts/0/schemes/0
+ # # set the basic access pattern and the action
+ # echo 4096 > access_pattern/sz/min
+ # echo 8192 > access_pattern/sz/max
+ # echo 0 > access_pattern/nr_accesses/min
+ # echo 5 > access_pattern/nr_accesses/max
+ # echo 10 > access_pattern/age/min
+ # echo 20 > access_pattern/age/max
+ # echo pageout > action
+ # # set quotas
+ # echo 10 > quotas/ms
+ # echo $((1024*1024*1024)) > quotas/bytes
+ # echo 1000 > quotas/reset_interval_ms
+ # # set watermark
+ # echo free_mem_rate > watermarks/metric
+ # echo 5000000 > watermarks/interval_us
+ # echo 600 > watermarks/high
+ # echo 500 > watermarks/mid
+ # echo 300 > watermarks/low
+
+Please note that it's highly recommended to use user space tools like `damo
+<https://github.com/damonitor/damo>`_ rather than manually reading and writing
+the files as above. Above is only for an example.
+
+.. _tracepoint:
+
+Tracepoints for Monitoring Results
+==================================
+
+Users can get the monitoring results via the :ref:`tried_regions
+<sysfs_schemes_tried_regions>`. The interface is useful for getting a
+snapshot, but it could be inefficient for fully recording all the monitoring
+results. For the purpose, two trace points, namely ``damon:damon_aggregated``
+and ``damon:damos_before_apply``, are provided. ``damon:damon_aggregated``
+provides the whole monitoring results, while ``damon:damos_before_apply``
+provides the monitoring results for regions that each DAMON-based Operation
+Scheme (:ref:`DAMOS <damon_design_damos>`) is gonna be applied. Hence,
+``damon:damos_before_apply`` is more useful for recording internal behavior of
+DAMOS, or DAMOS target access
+:ref:`pattern <damon_design_damos_access_pattern>` based query-like efficient
+monitoring results recording.
+
+While the monitoring is turned on, you could record the tracepoint events and
+show results using tracepoint supporting tools like ``perf``. For example::
+
+ # echo on > kdamonds/0/state
+ # perf record -e damon:damon_aggregated &
+ # sleep 5
+ # kill 9 $(pidof perf)
+ # echo off > kdamonds/0/state
+ # perf script
+ kdamond.0 46568 [027] 79357.842179: damon:damon_aggregated: target_id=0 nr_regions=11 122509119488-135708762112: 0 864
+ [...]
+
+Each line of the perf script output represents each monitoring region. The
+first five fields are as usual other tracepoint outputs. The sixth field
+(``target_id=X``) shows the ide of the monitoring target of the region. The
+seventh field (``nr_regions=X``) shows the total number of monitoring regions
+for the target. The eighth field (``X-Y:``) shows the start (``X``) and end
+(``Y``) addresses of the region in bytes. The ninth field (``X``) shows the
+``nr_accesses`` of the region (refer to
+:ref:`design <damon_design_region_based_sampling>` for more details of the
+counter). Finally the tenth field (``X``) shows the ``age`` of the region
+(refer to :ref:`design <damon_design_age_tracking>` for more details of the
+counter).
+
+If the event was ``damon:damos_beofre_apply``, the ``perf script`` output would
+be somewhat like below::
+
+ kdamond.0 47293 [000] 80801.060214: damon:damos_before_apply: ctx_idx=0 scheme_idx=0 target_idx=0 nr_regions=11 121932607488-135128711168: 0 136
+ [...]
+
+Each line of the output represents each monitoring region that each DAMON-based
+Operation Scheme was about to be applied at the traced time. The first five
+fields are as usual. It shows the index of the DAMON context (``ctx_idx=X``)
+of the scheme in the list of the contexts of the context's kdamond, the index
+of the scheme (``scheme_idx=X``) in the list of the schemes of the context, in
+addition to the output of ``damon_aggregated`` tracepoint.
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index 1cc0bc78d10e..67a941903fd2 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -1,5 +1,3 @@
-.. _hugetlbpage:
-
=============
HugeTLB Pages
=============
@@ -60,8 +58,12 @@ HugePages_Surp
the pool above the value in ``/proc/sys/vm/nr_hugepages``. The
maximum number of surplus huge pages is controlled by
``/proc/sys/vm/nr_overcommit_hugepages``.
+ Note: When the feature of freeing unused vmemmap pages associated
+ with each hugetlb page is enabled, the number of surplus huge pages
+ may be temporarily larger than the maximum number of surplus huge
+ pages when the system is under memory pressure.
Hugepagesize
- is the default hugepage size (in Kb).
+ is the default hugepage size (in kB).
Hugetlb
is the total amount of memory (in kB), consumed by huge
pages of all sizes.
@@ -80,6 +82,10 @@ returned to the huge page pool when freed by a task. A user with root
privileges can dynamically allocate more or free some persistent huge pages
by increasing or decreasing the value of ``nr_hugepages``.
+Note: When the feature of freeing unused vmemmap pages associated with each
+hugetlb page is enabled, we can fail to free the huge pages triggered by
+the user when the system is under memory pressure. Please try again later.
+
Pages that are used as huge pages are reserved inside the kernel and cannot
be used for other purposes. Huge pages cannot be swapped out under
memory pressure.
@@ -100,6 +106,75 @@ with a huge page size selection parameter "hugepagesz=<size>". <size> must
be specified in bytes with optional scale suffix [kKmMgG]. The default huge
page size may be selected with the "default_hugepagesz=<size>" boot parameter.
+Hugetlb boot command line parameter semantics
+
+hugepagesz
+ Specify a huge page size. Used in conjunction with hugepages
+ parameter to preallocate a number of huge pages of the specified
+ size. Hence, hugepagesz and hugepages are typically specified in
+ pairs such as::
+
+ hugepagesz=2M hugepages=512
+
+ hugepagesz can only be specified once on the command line for a
+ specific huge page size. Valid huge page sizes are architecture
+ dependent.
+hugepages
+ Specify the number of huge pages to preallocate. This typically
+ follows a valid hugepagesz or default_hugepagesz parameter. However,
+ if hugepages is the first or only hugetlb command line parameter it
+ implicitly specifies the number of huge pages of default size to
+ allocate. If the number of huge pages of default size is implicitly
+ specified, it can not be overwritten by a hugepagesz,hugepages
+ parameter pair for the default size. This parameter also has a
+ node format. The node format specifies the number of huge pages
+ to allocate on specific nodes.
+
+ For example, on an architecture with 2M default huge page size::
+
+ hugepages=256 hugepagesz=2M hugepages=512
+
+ will result in 256 2M huge pages being allocated and a warning message
+ indicating that the hugepages=512 parameter is ignored. If a hugepages
+ parameter is preceded by an invalid hugepagesz parameter, it will
+ be ignored.
+
+ Node format example::
+
+ hugepagesz=2M hugepages=0:1,1:2
+
+ It will allocate 1 2M hugepage on node0 and 2 2M hugepages on node1.
+ If the node number is invalid, the parameter will be ignored.
+hugepage_alloc_threads
+ Specify the number of threads that should be used to allocate hugepages
+ during boot. This parameter can be used to improve system bootup time
+ when allocating a large amount of huge pages.
+
+ The default value is 25% of the available hardware threads.
+ Example to use 8 allocation threads::
+
+ hugepage_alloc_threads=8
+
+ Note that this parameter only applies to non-gigantic huge pages.
+default_hugepagesz
+ Specify the default huge page size. This parameter can
+ only be specified once on the command line. default_hugepagesz can
+ optionally be followed by the hugepages parameter to preallocate a
+ specific number of huge pages of default size. The number of default
+ sized huge pages to preallocate can also be implicitly specified as
+ mentioned in the hugepages section above. Therefore, on an
+ architecture with 2M default huge page size::
+
+ hugepages=256
+ default_hugepagesz=2M hugepages=256
+ hugepages=256 default_hugepagesz=2M
+
+ will all result in 256 2M huge pages being allocated. Valid default
+ huge page size is architecture dependent.
+hugetlb_free_vmemmap
+ When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables HugeTLB
+ Vmemmap Optimization (HVO).
+
When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
indicates the current number of pre-allocated huge pages of the default size.
Thus, one can use the following command to dynamically allocate/deallocate
@@ -177,8 +252,12 @@ will exist, of the form::
hugepages-${size}kB
-Inside each of these directories, the same set of files will exist::
+Inside each of these directories, the set of files contained in ``/proc``
+will exist. In addition, two additional interfaces for demoting huge
+pages may exist::
+ demote
+ demote_size
nr_hugepages
nr_hugepages_mempolicy
nr_overcommit_hugepages
@@ -186,7 +265,29 @@ Inside each of these directories, the same set of files will exist::
resv_hugepages
surplus_hugepages
-which function as described above for the default huge page-sized case.
+The demote interfaces provide the ability to split a huge page into
+smaller huge pages. For example, the x86 architecture supports both
+1GB and 2MB huge pages sizes. A 1GB huge page can be split into 512
+2MB huge pages. Demote interfaces are not available for the smallest
+huge page size. The demote interfaces are:
+
+demote_size
+ is the size of demoted pages. When a page is demoted a corresponding
+ number of huge pages of demote_size will be created. By default,
+ demote_size is set to the next smaller huge page size. If there are
+ multiple smaller huge page sizes, demote_size can be set to any of
+ these smaller sizes. Only huge page sizes less than the current huge
+ pages size are allowed.
+
+demote
+ is used to demote a number of huge pages. A user with root privileges
+ can write to this file. It may not be possible to demote the
+ requested number of huge pages. To determine how many pages were
+ actually demoted, compare the value of nr_hugepages before and after
+ writing to the demote interface. demote is a write only interface.
+
+The interfaces which are the same as in ``/proc`` (all except demote and
+demote_size) function as described above for the default huge page-sized case.
.. _mem_policy_and_hp_alloc:
@@ -220,7 +321,7 @@ memory policy mode--bind, preferred, local or interleave--may be used. The
resulting effect on persistent huge page allocation is as follows:
#. Regardless of mempolicy mode [see
- :ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`],
+ Documentation/admin-guide/mm/numa_memory_policy.rst],
persistent huge pages will be distributed across the node or nodes
specified in the mempolicy as if "interleave" had been specified.
However, if a node in the policy does not contain sufficient contiguous
@@ -285,6 +386,13 @@ Note that the number of overcommit and reserve pages remain global quantities,
as we don't know until fault time, when the faulting task's mempolicy is
applied, from which node the huge page allocation will be attempted.
+The hugetlb may be migrated between the per-node hugepages pool in the following
+scenarios: memory offline, memory failure, longterm pinning, syscalls(mbind,
+migrate_pages and move_pages), alloc_contig_range() and alloc_contig_pages().
+Now only memory offline, memory failure and syscalls allow fallbacking to allocate
+a new hugetlb on a different node if the current node is unable to allocate during
+hugetlb migration, that means these 3 cases can break the per-node hugepages pool.
+
.. _using_huge_pages:
Using Huge Pages
@@ -368,13 +476,13 @@ Examples
.. _map_hugetlb:
``map_hugetlb``
- see tools/testing/selftests/vm/map_hugetlb.c
+ see tools/testing/selftests/mm/map_hugetlb.c
``hugepage-shm``
- see tools/testing/selftests/vm/hugepage-shm.c
+ see tools/testing/selftests/mm/hugepage-shm.c
``hugepage-mmap``
- see tools/testing/selftests/vm/hugepage-mmap.c
+ see tools/testing/selftests/mm/hugepage-mmap.c
The `libhugetlbfs`_ library provides a wide range of userspace tools
to help with huge page usability, environment setup, and control.
diff --git a/Documentation/admin-guide/mm/idle_page_tracking.rst b/Documentation/admin-guide/mm/idle_page_tracking.rst
index df9394fb39c2..16fcf38dac56 100644
--- a/Documentation/admin-guide/mm/idle_page_tracking.rst
+++ b/Documentation/admin-guide/mm/idle_page_tracking.rst
@@ -1,5 +1,3 @@
-.. _idle_page_tracking:
-
==================
Idle Page Tracking
==================
@@ -65,14 +63,13 @@ workload one should:
are not reclaimable, he or she can filter them out using
``/proc/kpageflags``.
-The page-types tool in the tools/vm directory can be used to assist in this.
+The page-types tool in the tools/mm directory can be used to assist in this.
If the tool is run initially with the appropriate option, it will mark all the
queried pages as idle. Subsequent runs of the tool can then show which pages have
their idle flag cleared in the interim.
-See :ref:`Documentation/admin-guide/mm/pagemap.rst <pagemap>` for more
-information about ``/proc/pid/pagemap``, ``/proc/kpageflags``, and
-``/proc/kpagecgroup``.
+See Documentation/admin-guide/mm/pagemap.rst for more information about
+``/proc/pid/pagemap``, ``/proc/kpageflags``, and ``/proc/kpagecgroup``.
.. _impl_details:
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index 8edb35f11317..ebc83ca20fdc 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -3,21 +3,20 @@ Memory Management
=================
Linux memory management subsystem is responsible, as the name implies,
-for managing the memory in the system. This includes implemnetation of
+for managing the memory in the system. This includes implementation of
virtual memory and demand paging, memory allocation both for kernel
-internal structures and user space programms, mapping of files into
+internal structures and user space programs, mapping of files into
processes address space and many other cool things.
Linux memory management is a complex system with many configurable
settings. Most of these settings are available via ``/proc``
-filesystem and can be quired and adjusted using ``sysctl``. These APIs
-are described in Documentation/sysctl/vm.txt and in `man 5 proc`_.
+filesystem and can be queried and adjusted using ``sysctl``. These APIs
+are described in Documentation/admin-guide/sysctl/vm.rst and in `man 5 proc`_.
.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
Linux memory management has its own jargon and if you are not yet
-familiar with it, consider reading
-:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+familiar with it, consider reading Documentation/admin-guide/mm/concepts.rst.
Here we document in detail how to interact with various mechanisms in
the Linux memory management.
@@ -26,12 +25,22 @@ the Linux memory management.
:maxdepth: 1
concepts
+ cma_debugfs
+ damon/index
hugetlbpage
idle_page_tracking
ksm
memory-hotplug
+ multigen_lru
+ nommu-mmap
numa_memory_policy
+ numaperf
pagemap
+ shrinker_debugfs
+ slab
soft-dirty
+ swap_numa
transhuge
userfaultfd
+ zswap
+ kho
diff --git a/Documentation/admin-guide/mm/kho.rst b/Documentation/admin-guide/mm/kho.rst
new file mode 100644
index 000000000000..6dc18ed4b886
--- /dev/null
+++ b/Documentation/admin-guide/mm/kho.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+====================
+Kexec Handover Usage
+====================
+
+Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
+regions, which could contain serialized system states, across kexec.
+
+This document expects that you are familiar with the base KHO
+:ref:`concepts <kho-concepts>`. If you have not read
+them yet, please do so now.
+
+Prerequisites
+=============
+
+KHO is available when the kernel is compiled with ``CONFIG_KEXEC_HANDOVER``
+set to y. Every KHO producer may have its own config option that you
+need to enable if you would like to preserve their respective state across
+kexec.
+
+To use KHO, please boot the kernel with the ``kho=on`` command line
+parameter. You may use ``kho_scratch`` parameter to define size of the
+scratch regions. For example ``kho_scratch=16M,512M,256M`` will reserve a
+16 MiB low memory scratch area, a 512 MiB global scratch region, and 256 MiB
+per NUMA node scratch regions on boot.
+
+Perform a KHO kexec
+===================
+
+First, before you perform a KHO kexec, you need to move the system into
+the :ref:`KHO finalization phase <kho-finalization-phase>` ::
+
+ $ echo 1 > /sys/kernel/debug/kho/out/finalize
+
+After this command, the KHO FDT is available in
+``/sys/kernel/debug/kho/out/fdt``. Other subsystems may also register
+their own preserved sub FDTs under
+``/sys/kernel/debug/kho/out/sub_fdts/``.
+
+Next, load the target payload and kexec into it. It is important that you
+use the ``-s`` parameter to use the in-kernel kexec file loader, as user
+space kexec tooling currently has no support for KHO with the user space
+based file loader ::
+
+ # kexec -l /path/to/bzImage --initrd /path/to/initrd -s
+ # kexec -e
+
+The new kernel will boot up and contain some of the previous kernel's state.
+
+For example, if you used ``reserve_mem`` command line parameter to create
+an early memory reservation, the new kernel will have that memory at the
+same physical address as the old kernel.
+
+Abort a KHO exec
+================
+
+You can move the system out of KHO finalization phase again by calling ::
+
+ $ echo 0 > /sys/kernel/debug/kho/out/active
+
+After this command, the KHO FDT is no longer available in
+``/sys/kernel/debug/kho/out/fdt``.
+
+debugfs Interfaces
+==================
+
+Currently KHO creates the following debugfs interfaces. Notice that these
+interfaces may change in the future. They will be moved to sysfs once KHO is
+stabilized.
+
+``/sys/kernel/debug/kho/out/finalize``
+ Kexec HandOver (KHO) allows Linux to transition the state of
+ compatible drivers into the next kexec'ed kernel. To do so,
+ device drivers will instruct KHO to preserve memory regions,
+ which could contain serialized kernel state.
+ While the state is serialized, they are unable to perform
+ any modifications to state that was serialized, such as
+ handed over memory allocations.
+
+ When this file contains "1", the system is in the transition
+ state. When contains "0", it is not. To switch between the
+ two states, echo the respective number into this file.
+
+``/sys/kernel/debug/kho/out/fdt``
+ When KHO state tree is finalized, the kernel exposes the
+ flattened device tree blob that carries its current KHO
+ state in this file. Kexec user space tooling can use this
+ as input file for the KHO payload image.
+
+``/sys/kernel/debug/kho/out/scratch_len``
+ Lengths of KHO scratch regions, which are physically contiguous
+ memory regions that will always stay available for future kexec
+ allocations. Kexec user space tools can use this file to determine
+ where it should place its payload images.
+
+``/sys/kernel/debug/kho/out/scratch_phys``
+ Physical locations of KHO scratch regions. Kexec user space tools
+ can use this file in conjunction to scratch_phys to determine where
+ it should place its payload images.
+
+``/sys/kernel/debug/kho/out/sub_fdts/``
+ In the KHO finalization phase, KHO producers register their own
+ FDT blob under this directory.
+
+``/sys/kernel/debug/kho/in/fdt``
+ When the kernel was booted with Kexec HandOver (KHO),
+ the state tree that carries metadata about the previous
+ kernel's state is in this file in the format of flattened
+ device tree. This file may disappear when all consumers of
+ it finished to interpret their metadata.
+
+``/sys/kernel/debug/kho/in/sub_fdts/``
+ Similar to ``kho/out/sub_fdts/``, but contains sub FDT blobs
+ of KHO producers passed from the old kernel.
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 9303786632d1..ad8e7a41f3b5 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -1,5 +1,3 @@
-.. _admin_guide_ksm:
-
=======================
Kernel Samepage Merging
=======================
@@ -9,7 +7,7 @@ Overview
KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
added to the Linux kernel in 2.6.32. See ``mm/ksm.c`` for its implementation,
-and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+and http://lwn.net/Articles/306704/ and https://lwn.net/Articles/330589/
KSM was originally developed for use with KVM (where it was known as
Kernel Shared Memory), to fit more virtual machines into physical memory,
@@ -22,7 +20,7 @@ content which can be replaced by a single write-protected page (which
is automatically copied if a process later wants to update its
content). The amount of pages that KSM daemon scans in a single pass
and the time between the passes are configured using :ref:`sysfs
-intraface <ksm_sysfs>`
+interface <ksm_sysfs>`
KSM only merges anonymous (private) pages, never pagecache (file) pages.
KSM's merged pages were originally locked into kernel memory, but can now
@@ -52,14 +50,14 @@ with EAGAIN, but more probably arousing the Out-Of-Memory killer.
If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
built with CONFIG_KSM=y, those calls will normally succeed: even if the
-the KSM daemon is not currently running, MADV_MERGEABLE still registers
+KSM daemon is not currently running, MADV_MERGEABLE still registers
the range for whenever the KSM daemon is started; even if the range
cannot contain any pages which KSM could actually merge; even if
MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
If a region of memory must be split into at least one new MADV_MERGEABLE
or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.txt).
+will exceed ``vm.max_map_count`` (see Documentation/admin-guide/sysctl/vm.rst).
Like other madvise calls, they are intended for use on mapped areas of
the user address space: they will report ENOMEM if the specified range
@@ -82,6 +80,9 @@ pages_to_scan
how many pages to scan before ksmd goes to sleep
e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``.
+ The pages_to_scan value cannot be changed if ``advisor_mode`` has
+ been set to scan-time.
+
Default: 100 (chosen for demonstration purposes)
sleep_millisecs
@@ -157,8 +158,44 @@ stable_node_chains_prune_millisecs
scan. It's a noop if not a single KSM page hit the
``max_page_sharing`` yet.
+smart_scan
+ Historically KSM checked every candidate page for each scan. It did
+ not take into account historic information. When smart scan is
+ enabled, pages that have previously not been de-duplicated get
+ skipped. How often these pages are skipped depends on how often
+ de-duplication has already been tried and failed. By default this
+ optimization is enabled. The ``pages_skipped`` metric shows how
+ effective the setting is.
+
+advisor_mode
+ The ``advisor_mode`` selects the current advisor. Two modes are
+ supported: none and scan-time. The default is none. By setting
+ ``advisor_mode`` to scan-time, the scan time advisor is enabled.
+ The section about ``advisor`` explains in detail how the scan time
+ advisor works.
+
+adivsor_max_cpu
+ specifies the upper limit of the cpu percent usage of the ksmd
+ background thread. The default is 70.
+
+advisor_target_scan_time
+ specifies the target scan time in seconds to scan all the candidate
+ pages. The default value is 200 seconds.
+
+advisor_min_pages_to_scan
+ specifies the lower limit of the ``pages_to_scan`` parameter of the
+ scan time advisor. The default is 500.
+
+adivsor_max_pages_to_scan
+ specifies the upper limit of the ``pages_to_scan`` parameter of the
+ scan time advisor. The default is 30000.
+
The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
+general_profit
+ how effective is KSM. The calculation is explained below.
+pages_scanned
+ how many pages are being scanned for ksm
pages_shared
how many shared pages are being used
pages_sharing
@@ -167,12 +204,21 @@ pages_unshared
how many pages unique but repeatedly checked for merging
pages_volatile
how many pages changing too fast to be placed in a tree
+pages_skipped
+ how many pages did the "smart" page scanning algorithm skip
full_scans
how many times all mergeable areas have been scanned
stable_node_chains
the number of KSM pages that hit the ``max_page_sharing`` limit
stable_node_dups
number of duplicated KSM pages
+ksm_zero_pages
+ how many zero pages that are still mapped into processes were mapped by
+ KSM when deduplicating.
+
+When ``use_zero_pages`` is/was enabled, the sum of ``pages_sharing`` +
+``ksm_zero_pages`` represents the actual number of pages saved by KSM.
+if ``use_zero_pages`` has never been enabled, ``ksm_zero_pages`` is 0.
A high ratio of ``pages_sharing`` to ``pages_shared`` indicates good
sharing, but a high ratio of ``pages_unshared`` to ``pages_sharing``
@@ -184,6 +230,94 @@ The maximum possible ``pages_sharing/pages_shared`` ratio is limited by the
``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
be increased accordingly.
+Monitoring KSM profit
+=====================
+
+KSM can save memory by merging identical pages, but also can consume
+additional memory, because it needs to generate a number of rmap_items to
+save each scanned page's brief rmap information. Some of these pages may
+be merged, but some may not be abled to be merged after being checked
+several times, which are unprofitable memory consumed.
+
+1) How to determine whether KSM save memory or consume memory in system-wide
+ range? Here is a simple approximate calculation for reference::
+
+ general_profit =~ ksm_saved_pages * sizeof(page) - (all_rmap_items) *
+ sizeof(rmap_item);
+
+ where ksm_saved_pages equals to the sum of ``pages_sharing`` +
+ ``ksm_zero_pages`` of the system, and all_rmap_items can be easily
+ obtained by summing ``pages_sharing``, ``pages_shared``, ``pages_unshared``
+ and ``pages_volatile``.
+
+2) The KSM profit inner a single process can be similarly obtained by the
+ following approximate calculation::
+
+ process_profit =~ ksm_saved_pages * sizeof(page) -
+ ksm_rmap_items * sizeof(rmap_item).
+
+ where ksm_saved_pages equals to the sum of ``ksm_merging_pages`` and
+ ``ksm_zero_pages``, both of which are shown under the directory
+ ``/proc/<pid>/ksm_stat``, and ksm_rmap_items is also shown in
+ ``/proc/<pid>/ksm_stat``. The process profit is also shown in
+ ``/proc/<pid>/ksm_stat`` as ksm_process_profit.
+
+From the perspective of application, a high ratio of ``ksm_rmap_items`` to
+``ksm_merging_pages`` means a bad madvise-applied policy, so developers or
+administrators have to rethink how to change madvise policy. Giving an example
+for reference, a page's size is usually 4K, and the rmap_item's size is
+separately 32B on 32-bit CPU architecture and 64B on 64-bit CPU architecture.
+so if the ``ksm_rmap_items/ksm_merging_pages`` ratio exceeds 64 on 64-bit CPU
+or exceeds 128 on 32-bit CPU, then the app's madvise policy should be dropped,
+because the ksm profit is approximately zero or negative.
+
+Monitoring KSM events
+=====================
+
+There are some counters in /proc/vmstat that may be used to monitor KSM events.
+KSM might help save memory, it's a tradeoff by may suffering delay on KSM COW
+or on swapping in copy. Those events could help users evaluate whether or how
+to use KSM. For example, if cow_ksm increases too fast, user may decrease the
+range of madvise(, , MADV_MERGEABLE).
+
+cow_ksm
+ is incremented every time a KSM page triggers copy on write (COW)
+ when users try to write to a KSM page, we have to make a copy.
+
+ksm_swpin_copy
+ is incremented every time a KSM page is copied when swapping in
+ note that KSM page might be copied when swapping in because do_swap_page()
+ cannot do all the locking needed to reconstitute a cross-anon_vma KSM page.
+
+Advisor
+=======
+
+The number of candidate pages for KSM is dynamic. It can be often observed
+that during the startup of an application more candidate pages need to be
+processed. Without an advisor the ``pages_to_scan`` parameter needs to be
+sized for the maximum number of candidate pages. The scan time advisor can
+changes the ``pages_to_scan`` parameter based on demand.
+
+The advisor can be enabled, so KSM can automatically adapt to changes in the
+number of candidate pages to scan. Two advisors are implemented: none and
+scan-time. With none, no advisor is enabled. The default is none.
+
+The scan time advisor changes the ``pages_to_scan`` parameter based on the
+observed scan times. The possible values for the ``pages_to_scan`` parameter is
+limited by the ``advisor_max_cpu`` parameter. In addition there is also the
+``advisor_target_scan_time`` parameter. This parameter sets the target time to
+scan all the KSM candidate pages. The parameter ``advisor_target_scan_time``
+decides how aggressive the scan time advisor scans candidate pages. Lower
+values make the scan time advisor to scan more aggressively. This is the most
+important parameter for the configuration of the scan time advisor.
+
+The initial value and the maximum value can be changed with
+``advisor_min_pages_to_scan`` and ``advisor_max_pages_to_scan``. The default
+values are sufficient for most workloads and use cases.
+
+The ``pages_to_scan`` parameter is re-calculated after a scan has been completed.
+
+
--
Izik Eidus,
Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
index 5c4432c96c4b..33c886f3d198 100644
--- a/Documentation/admin-guide/mm/memory-hotplug.rst
+++ b/Documentation/admin-guide/mm/memory-hotplug.rst
@@ -1,444 +1,696 @@
-.. _admin_guide_memory_hotplug:
+==================
+Memory Hot(Un)Plug
+==================
-==============
-Memory Hotplug
-==============
+This document describes generic Linux support for memory hot(un)plug with
+a focus on System RAM, including ZONE_MOVABLE support.
-:Created: Jul 28 2007
-:Updated: Add some details about locking internals: Aug 20 2018
+.. contents:: :local:
-This document is about memory hotplug including how-to-use and current status.
-Because Memory Hotplug is still under development, contents of this text will
-be changed often.
+Introduction
+============
-.. contents:: :local:
+Memory hot(un)plug allows for increasing and decreasing the size of physical
+memory available to a machine at runtime. In the simplest case, it consists of
+physically plugging or unplugging a DIMM at runtime, coordinated with the
+operating system.
-.. note::
+Memory hot(un)plug is used for various purposes:
- (1) x86_64's has special implementation for memory hotplug.
- This text does not describe it.
- (2) This text assumes that sysfs is mounted at ``/sys``.
+- The physical memory available to a machine can be adjusted at runtime, up- or
+ downgrading the memory capacity. This dynamic memory resizing, sometimes
+ referred to as "capacity on demand", is frequently used with virtual machines
+ and logical partitions.
+- Replacing hardware, such as DIMMs or whole NUMA nodes, without downtime. One
+ example is replacing failing memory modules.
-Introduction
-============
+- Reducing energy consumption either by physically unplugging memory modules or
+ by logically unplugging (parts of) memory modules from Linux.
+
+Further, the basic memory hot(un)plug infrastructure in Linux is nowadays also
+used to expose persistent memory, other performance-differentiated memory and
+reserved memory regions as ordinary system RAM to Linux.
-Purpose of memory hotplug
--------------------------
+Linux only supports memory hot(un)plug on selected 64 bit architectures, such as
+x86_64, arm64, ppc64 and s390x.
-Memory Hotplug allows users to increase/decrease the amount of memory.
-Generally, there are two purposes.
+Memory Hot(Un)Plug Granularity
+------------------------------
-(A) For changing the amount of memory.
- This is to allow a feature like capacity on demand.
-(B) For installing/removing DIMMs or NUMA-nodes physically.
- This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
+Memory hot(un)plug in Linux uses the SPARSEMEM memory model, which divides the
+physical memory address space into chunks of the same size: memory sections. The
+size of a memory section is architecture dependent. For example, x86_64 uses
+128 MiB and ppc64 uses 16 MiB.
-(A) is required by highly virtualized environments and (B) is required by
-hardware which supports memory power management.
+Memory sections are combined into chunks referred to as "memory blocks". The
+size of a memory block is architecture dependent and corresponds to the smallest
+granularity that can be hot(un)plugged. The default size of a memory block is
+the same as memory section size, unless an architecture specifies otherwise.
-Linux memory hotplug is designed for both purpose.
+All memory blocks have the same size.
-Phases of memory hotplug
+Phases of Memory Hotplug
------------------------
-There are 2 phases in Memory Hotplug:
+Memory hotplug consists of two phases:
- 1) Physical Memory Hotplug phase
- 2) Logical Memory Hotplug phase.
+(1) Adding the memory to Linux
+(2) Onlining memory blocks
-The First phase is to communicate hardware/firmware and make/erase
-environment for hotplugged memory. Basically, this phase is necessary
-for the purpose (B), but this is good phase for communication between
-highly virtualized environments too.
+In the first phase, metadata, such as the memory map ("memmap") and page tables
+for the direct mapping, is allocated and initialized, and memory blocks are
+created; the latter also creates sysfs files for managing newly created memory
+blocks.
-When memory is hotplugged, the kernel recognizes new memory, makes new memory
-management tables, and makes sysfs files for new memory's operation.
+In the second phase, added memory is exposed to the page allocator. After this
+phase, the memory is visible in memory statistics, such as free and total
+memory, of the system.
-If firmware supports notification of connection of new memory to OS,
-this phase is triggered automatically. ACPI can notify this event. If not,
-"probe" operation by system administration is used instead.
-(see :ref:`memory_hotplug_physical_mem`).
+Phases of Memory Hotunplug
+--------------------------
-Logical Memory Hotplug phase is to change memory state into
-available/unavailable for users. Amount of memory from user's view is
-changed by this phase. The kernel makes all memory in it as free pages
-when a memory range is available.
+Memory hotunplug consists of two phases:
-In this document, this phase is described as online/offline.
+(1) Offlining memory blocks
+(2) Removing the memory from Linux
-Logical Memory Hotplug phase is triggered by write of sysfs file by system
-administrator. For the hot-add case, it must be executed after Physical Hotplug
-phase by hand.
-(However, if you writes udev's hotplug scripts for memory hotplug, these
-phases can be execute in seamless way.)
+In the first phase, memory is "hidden" from the page allocator again, for
+example, by migrating busy memory to other memory locations and removing all
+relevant free pages from the page allocator After this phase, the memory is no
+longer visible in memory statistics of the system.
-Unit of Memory online/offline operation
----------------------------------------
+In the second phase, the memory blocks are removed and metadata is freed.
-Memory hotplug uses SPARSEMEM memory model which allows memory to be divided
-into chunks of the same size. These chunks are called "sections". The size of
-a memory section is architecture dependent. For example, power uses 16MiB, ia64
-uses 1GiB.
+Memory Hotplug Notifications
+============================
-Memory sections are combined into chunks referred to as "memory blocks". The
-size of a memory block is architecture dependent and represents the logical
-unit upon which memory online/offline operations are to be performed. The
-default size of a memory block is the same as memory section size unless an
-architecture specifies otherwise. (see :ref:`memory_hotplug_sysfs_files`.)
+There are various ways how Linux is notified about memory hotplug events such
+that it can start adding hotplugged memory. This description is limited to
+systems that support ACPI; mechanisms specific to other firmware interfaces or
+virtual machines are not described.
-To determine the size (in bytes) of a memory block please read this file::
+ACPI Notifications
+------------------
- /sys/devices/system/memory/block_size_bytes
+Platforms that support ACPI, such as x86_64, can support memory hotplug
+notifications via ACPI.
-Kernel Configuration
-====================
+In general, a firmware supporting memory hotplug defines a memory class object
+HID "PNP0C80". When notified about hotplug of a new memory device, the ACPI
+driver will hotplug the memory to Linux.
-To use memory hotplug feature, kernel must be compiled with following
-config options.
+If the firmware supports hotplug of NUMA nodes, it defines an object _HID
+"ACPI0004", "PNP0A05", or "PNP0A06". When notified about an hotplug event, all
+assigned memory devices are added to Linux by the ACPI driver.
-- For all memory hotplug:
- - Memory model -> Sparse Memory (``CONFIG_SPARSEMEM``)
- - Allow for memory hot-add (``CONFIG_MEMORY_HOTPLUG``)
+Similarly, Linux can be notified about requests to hotunplug a memory device or
+a NUMA node via ACPI. The ACPI driver will try offlining all relevant memory
+blocks, and, if successful, hotunplug the memory from Linux.
-- To enable memory removal, the following are also necessary:
- - Allow for memory hot remove (``CONFIG_MEMORY_HOTREMOVE``)
- - Page Migration (``CONFIG_MIGRATION``)
+Manual Probing
+--------------
-- For ACPI memory hotplug, the following are also necessary:
- - Memory hotplug (under ACPI Support menu) (``CONFIG_ACPI_HOTPLUG_MEMORY``)
- - This option can be kernel module.
+On some architectures, the firmware may not be able to notify the operating
+system about a memory hotplug event. Instead, the memory has to be manually
+probed from user space.
-- As a related configuration, if your box has a feature of NUMA-node hotplug
- via ACPI, then this option is necessary too.
+The probe interface is located at::
- - ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
- (``CONFIG_ACPI_CONTAINER``).
+ /sys/devices/system/memory/probe
- This option can be kernel module too.
+Only complete memory blocks can be probed. Individual memory blocks are probed
+by providing the physical start address of the memory block::
+ % echo addr > /sys/devices/system/memory/probe
-.. _memory_hotplug_sysfs_files:
+Which results in a memory block for the range [addr, addr + memory_block_size)
+being created.
-sysfs files for memory hotplug
-==============================
+.. note::
-All memory blocks have their device information in sysfs. Each memory block
-is described under ``/sys/devices/system/memory`` as::
+ Using the probe interface is discouraged as it is easy to crash the kernel,
+ because Linux cannot validate user input; this interface might be removed in
+ the future.
- /sys/devices/system/memory/memoryXXX
+Onlining and Offlining Memory Blocks
+====================================
-where XXX is the memory block id.
+After a memory block has been created, Linux has to be instructed to actually
+make use of that memory: the memory block has to be "online".
-For the memory block covered by the sysfs directory. It is expected that all
-memory sections in this range are present and no memory holes exist in the
-range. Currently there is no way to determine if there is a memory hole, but
-the existence of one should not affect the hotplug capabilities of the memory
-block.
+Before a memory block can be removed, Linux has to stop using any memory part of
+the memory block: the memory block has to be "offlined".
-For example, assume 1GiB memory block size. A device for a memory starting at
-0x100000000 is ``/sys/device/system/memory/memory4``::
+The Linux kernel can be configured to automatically online added memory blocks
+and drivers automatically trigger offlining of memory blocks when trying
+hotunplug of memory. Memory blocks can only be removed once offlining succeeded
+and drivers may trigger offlining of memory blocks when attempting hotunplug of
+memory.
- (0x100000000 / 1Gib = 4)
+Onlining Memory Blocks Manually
+-------------------------------
-This device covers address range [0x100000000 ... 0x140000000)
+If auto-onlining of memory blocks isn't enabled, user-space has to manually
+trigger onlining of memory blocks. Often, udev rules are used to automate this
+task in user space.
-Under each memory block, you can see 5 files:
+Onlining of a memory block can be triggered via::
-- ``/sys/devices/system/memory/memoryXXX/phys_index``
-- ``/sys/devices/system/memory/memoryXXX/phys_device``
-- ``/sys/devices/system/memory/memoryXXX/state``
-- ``/sys/devices/system/memory/memoryXXX/removable``
-- ``/sys/devices/system/memory/memoryXXX/valid_zones``
+ % echo online > /sys/devices/system/memory/memoryXXX/state
-=================== ============================================================
-``phys_index`` read-only and contains memory block id, same as XXX.
-``state`` read-write
-
- - at read: contains online/offline state of memory.
- - at write: user can specify "online_kernel",
-
- "online_movable", "online", "offline" command
- which will be performed on all sections in the block.
-``phys_device`` read-only: designed to show the name of physical memory
- device. This is not well implemented now.
-``removable`` read-only: contains an integer value indicating
- whether the memory block is removable or not
- removable. A value of 1 indicates that the memory
- block is removable and a value of 0 indicates that
- it is not removable. A memory block is removable only if
- every section in the block is removable.
-``valid_zones`` read-only: designed to show which zones this memory block
- can be onlined to.
-
- The first column shows it`s default zone.
-
- "memory6/valid_zones: Normal Movable" shows this memoryblock
- can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
- by online_movable.
-
- "memory7/valid_zones: Movable Normal" shows this memoryblock
- can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
- by online_kernel.
-=================== ============================================================
+Or alternatively::
+
+ % echo 1 > /sys/devices/system/memory/memoryXXX/online
+
+The kernel will select the target zone automatically, depending on the
+configured ``online_policy``.
+
+One can explicitly request to associate an offline memory block with
+ZONE_MOVABLE by::
+
+ % echo online_movable > /sys/devices/system/memory/memoryXXX/state
+
+Or one can explicitly request a kernel zone (usually ZONE_NORMAL) by::
+
+ % echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+
+In any case, if onlining succeeds, the state of the memory block is changed to
+be "online". If it fails, the state of the memory block will remain unchanged
+and the above commands will fail.
+
+Onlining Memory Blocks Automatically
+------------------------------------
+
+The kernel can be configured to try auto-onlining of newly added memory blocks.
+If this feature is disabled, the memory blocks will stay offline until
+explicitly onlined from user space.
+
+The configured auto-online behavior can be observed via::
+
+ % cat /sys/devices/system/memory/auto_online_blocks
+
+Auto-onlining can be enabled by writing ``online``, ``online_kernel`` or
+``online_movable`` to that file, like::
+
+ % echo online > /sys/devices/system/memory/auto_online_blocks
+
+Similarly to manual onlining, with ``online`` the kernel will select the
+target zone automatically, depending on the configured ``online_policy``.
+
+Modifying the auto-online behavior will only affect all subsequently added
+memory blocks only.
.. note::
- These directories/files appear after physical memory hotplug phase.
+ In corner cases, auto-onlining can fail. The kernel won't retry. Note that
+ auto-onlining is not expected to fail in default configurations.
-If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed
-via symbolic links located in the ``/sys/devices/system/node/node*`` directories.
+.. note::
-For example::
+ DLPAR on ppc64 ignores the ``offline`` setting and will still online added
+ memory blocks; if onlining fails, memory blocks are removed again.
- /sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+Offlining Memory Blocks
+-----------------------
-A backlink will also be created::
+In the current implementation, Linux's memory offlining will try migrating all
+movable pages off the affected memory block. As most kernel allocations, such as
+page tables, are unmovable, page migration can fail and, therefore, inhibit
+memory offlining from succeeding.
- /sys/devices/system/memory/memory9/node0 -> ../../node/node0
+Having the memory provided by memory block managed by ZONE_MOVABLE significantly
+increases memory offlining reliability; still, memory offlining can fail in
+some corner cases.
-.. _memory_hotplug_physical_mem:
+Further, memory offlining might retry for a long time (or even forever), until
+aborted by the user.
-Physical memory hot-add phase
-=============================
+Offlining of a memory block can be triggered via::
-Hardware(Firmware) Support
---------------------------
+ % echo offline > /sys/devices/system/memory/memoryXXX/state
-On x86_64/ia64 platform, memory hotplug by ACPI is supported.
+Or alternatively::
-In general, the firmware (ACPI) which supports memory hotplug defines
-memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
-Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
-script. This will be done automatically.
+ % echo 0 > /sys/devices/system/memory/memoryXXX/online
-But scripts for memory hotplug are not contained in generic udev package(now).
-You may have to write it by yourself or online/offline memory by hand.
-Please see :ref:`memory_hotplug_how_to_online_memory` and
-:ref:`memory_hotplug_how_to_offline_memory`.
+If offlining succeeds, the state of the memory block is changed to be "offline".
+If it fails, the state of the memory block will remain unchanged and the above
+commands will fail, for example, via::
-If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
-"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
-calls hotplug code for all of objects which are defined in it.
-If memory device is found, memory hotplug code will be called.
+ bash: echo: write error: Device or resource busy
-Notify memory hot-add event by hand
------------------------------------
+or via::
-On some architectures, the firmware may not notify the kernel of a memory
-hotplug event. Therefore, the memory "probe" interface is supported to
-explicitly notify the kernel. This interface depends on
-CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
-if hotplug is supported, although for x86 this should be handled by ACPI
-notification.
+ bash: echo: write error: Invalid argument
-Probe interface is located at::
+Observing the State of Memory Blocks
+------------------------------------
- /sys/devices/system/memory/probe
+The state (online/offline/going-offline) of a memory block can be observed
+either via::
-You can tell the physical address of new memory to the kernel by::
+ % cat /sys/devices/system/memory/memoryXXX/state
- % echo start_address_of_new_memory > /sys/devices/system/memory/probe
+Or alternatively (1/0) via::
-Then, [start_address_of_new_memory, start_address_of_new_memory +
-memory_block_size] memory range is hot-added. In this case, hotplug script is
-not called (in current implementation). You'll have to online memory by
-yourself. Please see :ref:`memory_hotplug_how_to_online_memory`.
+ % cat /sys/devices/system/memory/memoryXXX/online
-Logical Memory hot-add phase
-============================
+For an online memory block, the managing zone can be observed via::
-State of memory
----------------
+ % cat /sys/devices/system/memory/memoryXXX/valid_zones
-To see (online/offline) state of a memory block, read 'state' file::
+Configuring Memory Hot(Un)Plug
+==============================
- % cat /sys/device/system/memory/memoryXXX/state
+There are various ways how system administrators can configure memory
+hot(un)plug and interact with memory blocks, especially, to online them.
+Memory Hot(Un)Plug Configuration via Sysfs
+------------------------------------------
-- If the memory block is online, you'll read "online".
-- If the memory block is offline, you'll read "offline".
+Some memory hot(un)plug properties can be configured or inspected via sysfs in::
+ /sys/devices/system/memory/
-.. _memory_hotplug_how_to_online_memory:
+The following files are currently defined:
-How to online memory
---------------------
+====================== =========================================================
+``auto_online_blocks`` read-write: set or get the default state of new memory
+ blocks; configure auto-onlining.
-When the memory is hot-added, the kernel decides whether or not to "online"
-it according to the policy which can be read from "auto_online_blocks" file::
+ The default value depends on the
+ CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel configuration
+ options.
- % cat /sys/devices/system/memory/auto_online_blocks
+ See the ``state`` property of memory blocks for details.
+``block_size_bytes`` read-only: the size in bytes of a memory block.
+``probe`` write-only: add (probe) selected memory blocks manually
+ from user space by supplying the physical start address.
-The default depends on the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
-option. If it is disabled the default is "offline" which means the newly added
-memory is not in a ready-to-use state and you have to "online" the newly added
-memory blocks manually. Automatic onlining can be requested by writing "online"
-to "auto_online_blocks" file::
+ Availability depends on the CONFIG_ARCH_MEMORY_PROBE
+ kernel configuration option.
+``uevent`` read-write: generic udev file for device subsystems.
+``crash_hotplug`` read-only: when changes to the system memory map
+ occur due to hot un/plug of memory, this file contains
+ '1' if the kernel updates the kdump capture kernel memory
+ map itself (via elfcorehdr and other relevant kexec
+ segments), or '0' if userspace must update the kdump
+ capture kernel memory map.
- % echo online > /sys/devices/system/memory/auto_online_blocks
+ Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
+ configuration option.
+====================== =========================================================
-This sets a global policy and impacts all memory blocks that will subsequently
-be hotplugged. Currently offline blocks keep their state. It is possible, under
-certain circumstances, that some memory blocks will be added but will fail to
-online. User space tools can check their "state" files
-(``/sys/devices/system/memory/memoryXXX/state``) and try to online them manually.
+.. note::
-If the automatic onlining wasn't requested, failed, or some memory block was
-offlined it is possible to change the individual block's state by writing to the
-"state" file::
+ When the CONFIG_MEMORY_FAILURE kernel configuration option is enabled, two
+ additional files ``hard_offline_page`` and ``soft_offline_page`` are available
+ to trigger hwpoisoning of pages, for example, for testing purposes. Note that
+ this functionality is not really related to memory hot(un)plug or actual
+ offlining of memory blocks.
- % echo online > /sys/devices/system/memory/memoryXXX/state
+Memory Block Configuration via Sysfs
+------------------------------------
-This onlining will not change the ZONE type of the target memory block,
-If the memory block doesn't belong to any zone an appropriate kernel zone
-(usually ZONE_NORMAL) will be used unless movable_node kernel command line
-option is specified when ZONE_MOVABLE will be used.
+Each memory block is represented as a memory block device that can be
+onlined or offlined. All memory blocks have their device information located in
+sysfs. Each present memory block is listed under
+``/sys/devices/system/memory`` as::
-You can explicitly request to associate it with ZONE_MOVABLE by::
+ /sys/devices/system/memory/memoryXXX
- % echo online_movable > /sys/devices/system/memory/memoryXXX/state
+where XXX is the memory block id; the number of digits is variable.
-.. note:: current limit: this memory block must be adjacent to ZONE_MOVABLE
+A present memory block indicates that some memory in the range is present;
+however, a memory block might span memory holes. A memory block spanning memory
+holes cannot be offlined.
-Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by::
+For example, assume 1 GiB memory block size. A device for a memory starting at
+0x100000000 is ``/sys/devices/system/memory/memory4``::
- % echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+ (0x100000000 / 1Gib = 4)
+
+This device covers address range [0x100000000 ... 0x140000000)
-.. note:: current limit: this memory block must be adjacent to ZONE_NORMAL
+The following files are currently defined:
-An explicit zone onlining can fail (e.g. when the range is already within
-and existing and incompatible zone already).
+=================== ============================================================
+``online`` read-write: simplified interface to trigger onlining /
+ offlining and to observe the state of a memory block.
+ When onlining, the zone is selected automatically.
+``phys_device`` read-only: legacy interface only ever used on s390x to
+ expose the covered storage increment.
+``phys_index`` read-only: the memory block id (XXX).
+``removable`` read-only: legacy interface that indicated whether a memory
+ block was likely to be offlineable or not. Nowadays, the
+ kernel return ``1`` if and only if it supports memory
+ offlining.
+``state`` read-write: advanced interface to trigger onlining /
+ offlining and to observe the state of a memory block.
+
+ When writing, ``online``, ``offline``, ``online_kernel`` and
+ ``online_movable`` are supported.
+
+ ``online_movable`` specifies onlining to ZONE_MOVABLE.
+ ``online_kernel`` specifies onlining to the default kernel
+ zone for the memory block, such as ZONE_NORMAL.
+ ``online`` let's the kernel select the zone automatically.
+
+ When reading, ``online``, ``offline`` and ``going-offline``
+ may be returned.
+``uevent`` read-write: generic uevent file for devices.
+``valid_zones`` read-only: when a block is online, shows the zone it
+ belongs to; when a block is offline, shows what zone will
+ manage it when the block will be onlined.
+
+ For online memory blocks, ``DMA``, ``DMA32``, ``Normal``,
+ ``Movable`` and ``none`` may be returned. ``none`` indicates
+ that memory provided by a memory block is managed by
+ multiple zones or spans multiple nodes; such memory blocks
+ cannot be offlined. ``Movable`` indicates ZONE_MOVABLE.
+ Other values indicate a kernel zone.
+
+ For offline memory blocks, the first column shows the
+ zone the kernel would select when onlining the memory block
+ right now without further specifying a zone.
+
+ Availability depends on the CONFIG_MEMORY_HOTREMOVE
+ kernel configuration option.
+=================== ============================================================
+
+.. note::
-After this, memory block XXX's state will be 'online' and the amount of
-available memory will be increased.
+ If the CONFIG_NUMA kernel configuration option is enabled, the memoryXXX/
+ directories can also be accessed via symbolic links located in the
+ ``/sys/devices/system/node/node*`` directories.
-This may be changed in future.
+ For example::
-Logical memory remove
-=====================
+ /sys/devices/system/node/node0/memory9 -> ../../memory/memory9
-Memory offline and ZONE_MOVABLE
--------------------------------
+ A backlink will also be created::
-Memory offlining is more complicated than memory online. Because memory offline
-has to make the whole memory block be unused, memory offline can fail if
-the memory block includes memory which cannot be freed.
+ /sys/devices/system/memory/memory9/node0 -> ../../node/node0
+
+Command Line Parameters
+-----------------------
+
+Some command line parameters affect memory hot(un)plug handling. The following
+command line parameters are relevant:
+
+======================== =======================================================
+``memhp_default_state`` configure auto-onlining by essentially setting
+ ``/sys/devices/system/memory/auto_online_blocks``.
+``movable_node`` configure automatic zone selection in the kernel when
+ using the ``contig-zones`` online policy. When
+ set, the kernel will default to ZONE_MOVABLE when
+ onlining a memory block, unless other zones can be kept
+ contiguous.
+======================== =======================================================
+
+See Documentation/admin-guide/kernel-parameters.txt for a more generic
+description of these command line parameters.
+
+Module Parameters
+------------------
+
+Instead of additional command line parameters or sysfs files, the
+``memory_hotplug`` subsystem now provides a dedicated namespace for module
+parameters. Module parameters can be set via the command line by predicating
+them with ``memory_hotplug.`` such as::
+
+ memory_hotplug.memmap_on_memory=1
+
+and they can be observed (and some even modified at runtime) via::
+
+ /sys/module/memory_hotplug/parameters/
+
+The following module parameters are currently defined:
+
+================================ ===============================================
+``memmap_on_memory`` read-write: Allocate memory for the memmap from
+ the added memory block itself. Even if enabled,
+ actual support depends on various other system
+ properties and should only be regarded as a
+ hint whether the behavior would be desired.
+
+ While allocating the memmap from the memory
+ block itself makes memory hotplug less likely
+ to fail and keeps the memmap on the same NUMA
+ node in any case, it can fragment physical
+ memory in a way that huge pages in bigger
+ granularity cannot be formed on hotplugged
+ memory.
+
+ With value "force" it could result in memory
+ wastage due to memmap size limitations. For
+ example, if the memmap for a memory block
+ requires 1 MiB, but the pageblock size is 2
+ MiB, 1 MiB of hotplugged memory will be wasted.
+ Note that there are still cases where the
+ feature cannot be enforced: for example, if the
+ memmap is smaller than a single page, or if the
+ architecture does not support the forced mode
+ in all configurations.
+
+``online_policy`` read-write: Set the basic policy used for
+ automatic zone selection when onlining memory
+ blocks without specifying a target zone.
+ ``contig-zones`` has been the kernel default
+ before this parameter was added. After an
+ online policy was configured and memory was
+ online, the policy should not be changed
+ anymore.
+
+ When set to ``contig-zones``, the kernel will
+ try keeping zones contiguous. If a memory block
+ intersects multiple zones or no zone, the
+ behavior depends on the ``movable_node`` kernel
+ command line parameter: default to ZONE_MOVABLE
+ if set, default to the applicable kernel zone
+ (usually ZONE_NORMAL) if not set.
+
+ When set to ``auto-movable``, the kernel will
+ try onlining memory blocks to ZONE_MOVABLE if
+ possible according to the configuration and
+ memory device details. With this policy, one
+ can avoid zone imbalances when eventually
+ hotplugging a lot of memory later and still
+ wanting to be able to hotunplug as much as
+ possible reliably, very desirable in
+ virtualized environments. This policy ignores
+ the ``movable_node`` kernel command line
+ parameter and isn't really applicable in
+ environments that require it (e.g., bare metal
+ with hotunpluggable nodes) where hotplugged
+ memory might be exposed via the
+ firmware-provided memory map early during boot
+ to the system instead of getting detected,
+ added and onlined later during boot (such as
+ done by virtio-mem or by some hypervisors
+ implementing emulated DIMMs). As one example, a
+ hotplugged DIMM will be onlined either
+ completely to ZONE_MOVABLE or completely to
+ ZONE_NORMAL, not a mixture.
+ As another example, as many memory blocks
+ belonging to a virtio-mem device will be
+ onlined to ZONE_MOVABLE as possible,
+ special-casing units of memory blocks that can
+ only get hotunplugged together. *This policy
+ does not protect from setups that are
+ problematic with ZONE_MOVABLE and does not
+ change the zone of memory blocks dynamically
+ after they were onlined.*
+``auto_movable_ratio`` read-write: Set the maximum MOVABLE:KERNEL
+ memory ratio in % for the ``auto-movable``
+ online policy. Whether the ratio applies only
+ for the system across all NUMA nodes or also
+ per NUMA nodes depends on the
+ ``auto_movable_numa_aware`` configuration.
+
+ All accounting is based on present memory pages
+ in the zones combined with accounting per
+ memory device. Memory dedicated to the CMA
+ allocator is accounted as MOVABLE, although
+ residing on one of the kernel zones. The
+ possible ratio depends on the actual workload.
+ The kernel default is "301" %, for example,
+ allowing for hotplugging 24 GiB to a 8 GiB VM
+ and automatically onlining all hotplugged
+ memory to ZONE_MOVABLE in many setups. The
+ additional 1% deals with some pages being not
+ present, for example, because of some firmware
+ allocations.
+
+ Note that ZONE_NORMAL memory provided by one
+ memory device does not allow for more
+ ZONE_MOVABLE memory for a different memory
+ device. As one example, onlining memory of a
+ hotplugged DIMM to ZONE_NORMAL will not allow
+ for another hotplugged DIMM to get onlined to
+ ZONE_MOVABLE automatically. In contrast, memory
+ hotplugged by a virtio-mem device that got
+ onlined to ZONE_NORMAL will allow for more
+ ZONE_MOVABLE memory within *the same*
+ virtio-mem device.
+``auto_movable_numa_aware`` read-write: Configure whether the
+ ``auto_movable_ratio`` in the ``auto-movable``
+ online policy also applies per NUMA
+ node in addition to the whole system across all
+ NUMA nodes. The kernel default is "Y".
+
+ Disabling NUMA awareness can be helpful when
+ dealing with NUMA nodes that should be
+ completely hotunpluggable, onlining the memory
+ completely to ZONE_MOVABLE automatically if
+ possible.
+
+ Parameter availability depends on CONFIG_NUMA.
+================================ ===============================================
+
+ZONE_MOVABLE
+============
-In general, memory offline can use 2 techniques.
+ZONE_MOVABLE is an important mechanism for more reliable memory offlining.
+Further, having system RAM managed by ZONE_MOVABLE instead of one of the
+kernel zones can increase the number of possible transparent huge pages and
+dynamically allocated huge pages.
-(1) reclaim and free all memory in the memory block.
-(2) migrate all pages in the memory block.
+Most kernel allocations are unmovable. Important examples include the memory
+map (usually 1/64ths of memory), page tables, and kmalloc(). Such allocations
+can only be served from the kernel zones.
-In the current implementation, Linux's memory offline uses method (2), freeing
-all pages in the memory block by page migration. But not all pages are
-migratable. Under current Linux, migratable pages are anonymous pages and
-page caches. For offlining a memory block by migration, the kernel has to
-guarantee that the memory block contains only migratable pages.
+Most user space pages, such as anonymous memory, and page cache pages are
+movable. Such allocations can be served from ZONE_MOVABLE and the kernel zones.
-Now, a boot option for making a memory block which consists of migratable pages
-is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
-create ZONE_MOVABLE...a zone which is just used for movable pages.
-(See also Documentation/admin-guide/kernel-parameters.rst)
+Only movable allocations are served from ZONE_MOVABLE, resulting in unmovable
+allocations being limited to the kernel zones. Without ZONE_MOVABLE, there is
+absolutely no guarantee whether a memory block can be offlined successfully.
-Assume the system has "TOTAL" amount of memory at boot time, this boot option
-creates ZONE_MOVABLE as following.
+Zone Imbalances
+---------------
+
+Having too much system RAM managed by ZONE_MOVABLE is called a zone imbalance,
+which can harm the system or degrade performance. As one example, the kernel
+might crash because it runs out of free memory for unmovable allocations,
+although there is still plenty of free memory left in ZONE_MOVABLE.
-1) When kernelcore=YYYY boot option is used,
- Size of memory not for movable pages (not for offline) is YYYY.
- Size of memory for movable pages (for offline) is TOTAL-YYYY.
+Usually, MOVABLE:KERNEL ratios of up to 3:1 or even 4:1 are fine. Ratios of 63:1
+are definitely impossible due to the overhead for the memory map.
-2) When movablecore=ZZZZ boot option is used,
- Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
- Size of memory for movable pages (for offline) is ZZZZ.
+Actual safe zone ratios depend on the workload. Extreme cases, like excessive
+long-term pinning of pages, might not be able to deal with ZONE_MOVABLE at all.
.. note::
- Unfortunately, there is no information to show which memory block belongs
- to ZONE_MOVABLE. This is TBD.
+ CMA memory part of a kernel zone essentially behaves like memory in
+ ZONE_MOVABLE and similar considerations apply, especially when combining
+ CMA with ZONE_MOVABLE.
-.. _memory_hotplug_how_to_offline_memory:
+ZONE_MOVABLE Sizing Considerations
+----------------------------------
-How to offline memory
----------------------
+We usually expect that a large portion of available system RAM will actually
+be consumed by user space, either directly or indirectly via the page cache. In
+the normal case, ZONE_MOVABLE can be used when allocating such pages just fine.
-You can offline a memory block by using the same sysfs interface that was used
-in memory onlining::
+With that in mind, it makes sense that we can have a big portion of system RAM
+managed by ZONE_MOVABLE. However, there are some things to consider when using
+ZONE_MOVABLE, especially when fine-tuning zone ratios:
- % echo offline > /sys/devices/system/memory/memoryXXX/state
+- Having a lot of offline memory blocks. Even offline memory blocks consume
+ memory for metadata and page tables in the direct map; having a lot of offline
+ memory blocks is not a typical case, though.
+
+- Memory ballooning without balloon compaction is incompatible with
+ ZONE_MOVABLE. Only some implementations, such as virtio-balloon and
+ pseries CMM, fully support balloon compaction.
+
+ Further, the CONFIG_BALLOON_COMPACTION kernel configuration option might be
+ disabled. In that case, balloon inflation will only perform unmovable
+ allocations and silently create a zone imbalance, usually triggered by
+ inflation requests from the hypervisor.
+
+- Gigantic pages are unmovable, resulting in user space consuming a
+ lot of unmovable memory.
+
+- Huge pages are unmovable when an architectures does not support huge
+ page migration, resulting in a similar issue as with gigantic pages.
+
+- Page tables are unmovable. Excessive swapping, mapping extremely large
+ files or ZONE_DEVICE memory can be problematic, although only really relevant
+ in corner cases. When we manage a lot of user space memory that has been
+ swapped out or is served from a file/persistent memory/... we still need a lot
+ of page tables to manage that memory once user space accessed that memory.
+
+- In certain DAX configurations the memory map for the device memory will be
+ allocated from the kernel zones.
+
+- KASAN can have a significant memory overhead, for example, consuming 1/8th of
+ the total system memory size as (unmovable) tracking metadata.
+
+- Long-term pinning of pages. Techniques that rely on long-term pinnings
+ (especially, RDMA and vfio/mdev) are fundamentally problematic with
+ ZONE_MOVABLE, and therefore, memory offlining. Pinned pages cannot reside
+ on ZONE_MOVABLE as that would turn these pages unmovable. Therefore, they
+ have to be migrated off that zone while pinning. Pinning a page can fail
+ even if there is plenty of free memory in ZONE_MOVABLE.
+
+ In addition, using ZONE_MOVABLE might make page pinning more expensive,
+ because of the page migration overhead.
+
+By default, all the memory configured at boot time is managed by the kernel
+zones and ZONE_MOVABLE is not used.
+
+To enable ZONE_MOVABLE to include the memory present at boot and to control the
+ratio between movable and kernel zones there are two command line options:
+``kernelcore=`` and ``movablecore=``. See
+Documentation/admin-guide/kernel-parameters.rst for their description.
+
+Memory Offlining and ZONE_MOVABLE
+---------------------------------
+
+Even with ZONE_MOVABLE, there are some corner cases where offlining a memory
+block might fail:
+
+- Memory blocks with memory holes; this applies to memory blocks present during
+ boot and can apply to memory blocks hotplugged via the XEN balloon and the
+ Hyper-V balloon.
+
+- Mixed NUMA nodes and mixed zones within a single memory block prevent memory
+ offlining; this applies to memory blocks present during boot only.
+
+- Special memory blocks prevented by the system from getting offlined. Examples
+ include any memory available during boot on arm64 or memory blocks spanning
+ the crashkernel area on s390x; this usually applies to memory blocks present
+ during boot only.
+
+- Memory blocks overlapping with CMA areas cannot be offlined, this applies to
+ memory blocks present during boot only.
+
+- Concurrent activity that operates on the same physical memory area, such as
+ allocating gigantic pages, can result in temporary offlining failures.
+
+- Out of memory when dissolving huge pages, especially when HugeTLB Vmemmap
+ Optimization (HVO) is enabled.
+
+ Offlining code may be able to migrate huge page contents, but may not be able
+ to dissolve the source huge page because it fails allocating (unmovable) pages
+ for the vmemmap, because the system might not have free memory in the kernel
+ zones left.
+
+ Users that depend on memory offlining to succeed for movable zones should
+ carefully consider whether the memory savings gained from this feature are
+ worth the risk of possibly not being able to offline memory in certain
+ situations.
+
+Further, when running into out of memory situations while migrating pages, or
+when still encountering permanently unmovable pages within ZONE_MOVABLE
+(-> BUG), memory offlining will keep retrying until it eventually succeeds.
+
+When offlining is triggered from user space, the offlining context can be
+terminated by sending a signal. A timeout based offlining can easily be
+implemented via::
-If offline succeeds, the state of the memory block is changed to be "offline".
-If it fails, some error core (like -EBUSY) will be returned by the kernel.
-Even if a memory block does not belong to ZONE_MOVABLE, you can try to offline
-it. If it doesn't contain 'unmovable' memory, you'll get success.
-
-A memory block under ZONE_MOVABLE is considered to be able to be offlined
-easily. But under some busy state, it may return -EBUSY. Even if a memory
-block cannot be offlined due to -EBUSY, you can retry offlining it and may be
-able to offline it (or not). (For example, a page is referred to by some kernel
-internal call and released soon.)
-
-Consideration:
- Memory hotplug's design direction is to make the possibility of memory
- offlining higher and to guarantee unplugging memory under any situation. But
- it needs more work. Returning -EBUSY under some situation may be good because
- the user can decide to retry more or not by himself. Currently, memory
- offlining code does some amount of retry with 120 seconds timeout.
-
-Physical memory remove
-======================
-
-Need more implementation yet....
- - Notification completion of remove works by OS to firmware.
- - Guard from remove if not yet.
-
-
-Locking Internals
-=================
-
-When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
-the device_hotplug_lock should be held to:
-
-- synchronize against online/offline requests (e.g. via sysfs). This way, memory
- block devices can only be accessed (.online/.state attributes) by user
- space once memory has been fully added. And when removing memory, we
- know nobody is in critical sections.
-- synchronize against CPU hotplug and similar (e.g. relevant for ACPI and PPC)
-
-Especially, there is a possible lock inversion that is avoided using
-device_hotplug_lock when adding memory and user space tries to online that
-memory faster than expected:
-
-- device_online() will first take the device_lock(), followed by
- mem_hotplug_lock
-- add_memory_resource() will first take the mem_hotplug_lock, followed by
- the device_lock() (while creating the devices, during bus_add_device()).
-
-As the device is visible to user space before taking the device_lock(), this
-can result in a lock inversion.
-
-onlining/offlining of memory should be done via device_online()/
-device_offline() - to make sure it is properly synchronized to actions
-via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
-
-When adding/removing/onlining/offlining memory or adding/removing
-heterogeneous/device memory, we should always hold the mem_hotplug_lock in
-write mode to serialise memory hotplug (e.g. access to global/zone
-variables).
-
-In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
-mode allows for a quite efficient get_online_mems/put_online_mems
-implementation, so code accessing memory can protect from that memory
-vanishing.
-
-
-Future Work
-===========
-
- - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
- sysctl or new control file.
- - showing memory block and physical device relationship.
- - test and make it better memory offlining.
- - support HugeTLB page migration and offlining.
- - memmap removing at memory offline.
- - physical remove memory.
+ % timeout $TIMEOUT offline_block | failure_handling
diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst
new file mode 100644
index 000000000000..9cb54b4ff5d9
--- /dev/null
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
@@ -0,0 +1,163 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Multi-Gen LRU
+=============
+The multi-gen LRU is an alternative LRU implementation that optimizes
+page reclaim and improves performance under memory pressure. Page
+reclaim decides the kernel's caching policy and ability to overcommit
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
+
+Quick start
+===========
+Build the kernel with the following configurations.
+
+* ``CONFIG_LRU_GEN=y``
+* ``CONFIG_LRU_GEN_ENABLED=y``
+
+All set!
+
+Runtime options
+===============
+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
+following subsections.
+
+Kill switch
+-----------
+``enabled`` accepts different values to enable or disable the
+following components. Its default value depends on
+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
+unless some of them have unforeseen side effects. Writing to
+``enabled`` has no effect when a component is not supported by the
+hardware, and valid values will be accepted even when the main switch
+is off.
+
+====== ===============================================================
+Values Components
+====== ===============================================================
+0x0001 The main switch for the multi-gen LRU.
+0x0002 Clearing the accessed bit in leaf page table entries in large
+ batches, when MMU sets it (e.g., on x86). This behavior can
+ theoretically worsen lock contention (mmap_lock). If it is
+ disabled, the multi-gen LRU will suffer a minor performance
+ degradation for workloads that contiguously map hot pages,
+ whose accessed bits can be otherwise cleared by fewer larger
+ batches.
+0x0004 Clearing the accessed bit in non-leaf page table entries as
+ well, when MMU sets it (e.g., on x86). This behavior was not
+ verified on x86 varieties other than Intel and AMD. If it is
+ disabled, the multi-gen LRU will suffer a negligible
+ performance degradation.
+[yYnN] Apply to all the components above.
+====== ===============================================================
+
+E.g.,
+::
+
+ echo y >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0007
+ echo 5 >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0005
+
+Thrashing prevention
+--------------------
+Personal computers are more sensitive to thrashing because it can
+cause janks (lags when rendering UI) and negatively impact user
+experience. The multi-gen LRU offers thrashing prevention to the
+majority of laptop and desktop users who do not have ``oomd``.
+
+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
+``N`` milliseconds from getting evicted. The OOM killer is triggered
+if this working set cannot be kept in memory. In other words, this
+option works as an adjustable pressure relief valve, and when open, it
+terminates applications that are hopefully not being used.
+
+Based on the average human detectable lag (~100ms), ``N=1000`` usually
+eliminates intolerable janks due to thrashing. Larger values like
+``N=3000`` make janks less noticeable at the risk of premature OOM
+kills.
+
+The default value ``0`` means disabled.
+
+Experimental features
+=====================
+``/sys/kernel/debug/lru_gen`` accepts commands described in the
+following subsections. Multiple command lines are supported, so does
+concatenation with delimiters ``,`` and ``;``.
+
+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
+evicted generations in this file.
+
+Working set estimation
+----------------------
+Working set estimation measures how much memory an application needs
+in a given time interval, and it is usually done with little impact on
+the performance of the application. E.g., data centers want to
+optimize job scheduling (bin packing) to improve memory utilizations.
+When a new job comes in, the job scheduler needs to find out whether
+each server it manages can allocate a certain amount of memory for
+this new job before it can pick a candidate. To do so, the job
+scheduler needs to estimate the working sets of the existing jobs.
+
+When it is read, ``lru_gen`` returns a histogram of numbers of pages
+accessed over different time intervals for each memcg and node.
+``MAX_NR_GENS`` decides the number of bins for each histogram. The
+histograms are noncumulative.
+::
+
+ memcg memcg_id memcg_path
+ node node_id
+ min_gen_nr age_in_ms nr_anon_pages nr_file_pages
+ ...
+ max_gen_nr age_in_ms nr_anon_pages nr_file_pages
+
+Each bin contains an estimated number of pages that have been accessed
+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
+the former is the largest and that of the latter is the smallest.
+
+Users can write the following command to ``lru_gen`` to create a new
+generation ``max_gen_nr+1``:
+
+ ``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
+
+``can_swap`` defaults to the swap setting and, if it is set to ``1``,
+it forces the scan of anon pages when swap is off, and vice versa.
+``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
+employs heuristics to reduce the overhead, which is likely to reduce
+the coverage as well.
+
+A typical use case is that a job scheduler runs this command at a
+certain time interval to create new generations, and it ranks the
+servers it manages based on the sizes of their cold pages defined by
+this time interval.
+
+Proactive reclaim
+-----------------
+Proactive reclaim induces page reclaim when there is no memory
+pressure. It usually targets cold pages only. E.g., when a new job
+comes in, the job scheduler wants to proactively reclaim cold pages on
+the server it selected, to improve the chance of successfully landing
+this new job.
+
+Users can write the following command to ``lru_gen`` to evict
+generations less than or equal to ``min_gen_nr``.
+
+ ``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
+
+``min_gen_nr`` should be less than ``max_gen_nr-1``, since
+``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
+the active list) and therefore cannot be evicted. ``swappiness``
+overrides the default value in ``/proc/sys/vm/swappiness`` and the valid
+range is [0-200, max], with max being exclusively used for the reclamation
+of anonymous memory. ``nr_to_reclaim`` limits the number of pages to evict.
+
+A typical use case is that a job scheduler runs this command before it
+tries to land a new job on a server. If it fails to materialize enough
+cold pages because of the overestimation, it retries on the next
+server according to the ranking result obtained from the working set
+estimation step. This less forceful approach limits the impacts on the
+existing jobs.
diff --git a/Documentation/nommu-mmap.txt b/Documentation/admin-guide/mm/nommu-mmap.rst
index 530fed08de2c..530fed08de2c 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/admin-guide/mm/nommu-mmap.rst
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index d78c5b315f72..a70f20ce1ffb 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -1,5 +1,3 @@
-.. _numa_memory_policy:
-
==================
NUMA Memory Policy
==================
@@ -15,7 +13,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy
support.
Memory policies should not be confused with cpusets
-(``Documentation/cgroup-v1/cpusets.txt``)
+(``Documentation/admin-guide/cgroup-v1/cpusets.rst``)
which is an administrative mechanism for restricting the nodes from which
memory may be allocated by a set of processes. Memory policies are a
programming interface that a NUMA-aware application can take advantage of. When
@@ -111,7 +109,7 @@ VMA Policy
* A task may install a new VMA policy on a sub-range of a
previously mmap()ed region. When this happens, Linux splits
the existing virtual memory area into 2 or 3 VMAs, each with
- it's own policy.
+ its own policy.
* By default, VMA policy applies only to pages allocated after
the policy is installed. Any pages already faulted into the
@@ -245,6 +243,22 @@ MPOL_INTERLEAVED
address range or file. During system boot up, the temporary
interleaved system default policy works in this mode.
+MPOL_PREFERRED_MANY
+ This mode specifies that the allocation should be preferably
+ satisfied from the nodemask specified in the policy. If there is
+ a memory pressure on all nodes in the nodemask, the allocation
+ can fall back to all existing numa nodes. This is effectively
+ MPOL_PREFERRED allowed for a mask rather than a single node.
+
+MPOL_WEIGHTED_INTERLEAVE
+ This mode operates the same as MPOL_INTERLEAVE, except that
+ interleaving behavior is executed based on weights set in
+ /sys/kernel/mm/mempolicy/weighted_interleave/
+
+ Weighted interleave allocates pages on nodes according to a
+ weight. For example if nodes [0,1] are weighted [5,2], 5 pages
+ will be allocated on node0 for every 2 pages allocated on node1.
+
NUMA memory policy supports the following optional mode flags:
MPOL_F_STATIC_NODES
@@ -253,10 +267,10 @@ MPOL_F_STATIC_NODES
nodes changes after the memory policy has been defined.
Without this flag, any time a mempolicy is rebound because of a
- change in the set of allowed nodes, the node (Preferred) or
- nodemask (Bind, Interleave) is remapped to the new set of
- allowed nodes. This may result in nodes being used that were
- previously undesired.
+ change in the set of allowed nodes, the preferred nodemask (Preferred
+ Many), preferred node (Preferred) or nodemask (Bind, Interleave) is
+ remapped to the new set of allowed nodes. This may result in nodes
+ being used that were previously undesired.
With this flag, if the user-specified nodes overlap with the
nodes allowed by the task's cpuset, then the memory policy is
@@ -353,7 +367,7 @@ and NUMA nodes. "Usage" here means one of the following:
2) examination of the policy to determine the policy mode and associated node
or node lists, if any, for page allocation. This is considered a "hot
path". Note that for MPOL_BIND, the "usage" extends across the entire
- allocation process, which may sleep during page reclaimation, because the
+ allocation process, which may sleep during page reclamation, because the
BIND policy nodemask is used, by reference, to filter ineligible nodes.
We can avoid taking an extra reference during the usages listed above as
@@ -364,19 +378,19 @@ follows:
2) for querying the policy, we do not need to take an extra reference on the
target task's task policy nor vma policies because we always acquire the
- task's mm's mmap_sem for read during the query. The set_mempolicy() and
- mbind() APIs [see below] always acquire the mmap_sem for write when
+ task's mm's mmap_lock for read during the query. The set_mempolicy() and
+ mbind() APIs [see below] always acquire the mmap_lock for write when
installing or replacing task or vma policies. Thus, there is no possibility
of a task or thread freeing a policy while another task or thread is
querying it.
3) Page allocation usage of task or vma policy occurs in the fault path where
- we hold them mmap_sem for read. Again, because replacing the task or vma
- policy requires that the mmap_sem be held for write, the policy can't be
+ we hold them mmap_lock for read. Again, because replacing the task or vma
+ policy requires that the mmap_lock be held for write, the policy can't be
freed out from under us while we're using it for page allocation.
4) Shared policies require special consideration. One task can replace a
- shared memory policy while another task, with a distinct mmap_sem, is
+ shared memory policy while another task, with a distinct mmap_lock, is
querying or allocating a page based on the policy. To resolve this
potential race, the shared policy infrastructure adds an extra reference
to the shared policy during lookup while holding a spin lock on the shared
@@ -401,7 +415,7 @@ follows:
Memory Policy APIs
==================
-Linux supports 3 system calls for controlling memory policy. These APIS
+Linux supports 4 system calls for controlling memory policy. These APIS
always affect only the calling task, the calling task's address space, or
some shared object mapped into the calling task's address space.
@@ -453,6 +467,20 @@ requested via the 'flags' argument.
See the mbind(2) man page for more details.
+Set home node for a Range of Task's Address Spacec::
+
+ long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
+ unsigned long home_node,
+ unsigned long flags);
+
+sys_set_mempolicy_home_node set the home node for a VMA policy present in the
+task's address range. The system call updates the home node only for the existing
+mempolicy range. Other address ranges are ignored. A home node is the NUMA node
+closest to which page allocation will come from. Specifying the home node override
+the default allocation policy to allocate memory close to the local node for an
+executing CPU.
+
+
Memory Policy Command Line Interface
====================================
diff --git a/Documentation/admin-guide/mm/numaperf.rst b/Documentation/admin-guide/mm/numaperf.rst
new file mode 100644
index 000000000000..90a12b6a8bfc
--- /dev/null
+++ b/Documentation/admin-guide/mm/numaperf.rst
@@ -0,0 +1,176 @@
+=======================
+NUMA Memory Performance
+=======================
+
+NUMA Locality
+=============
+
+Some platforms may have multiple types of memory attached to a compute
+node. These disparate memory ranges may share some characteristics, such
+as CPU cache coherence, but may have different performance. For example,
+different media types and buses affect bandwidth and latency.
+
+A system supports such heterogeneous memory by grouping each memory type
+under different domains, or "nodes", based on locality and performance
+characteristics. Some memory may share the same node as a CPU, and others
+are provided as memory only nodes. While memory only nodes do not provide
+CPUs, they may still be local to one or more compute nodes relative to
+other nodes. The following diagram shows one such example of two compute
+nodes with local memory and a memory only node for each of compute node::
+
+ +------------------+ +------------------+
+ | Compute Node 0 +-----+ Compute Node 1 |
+ | Local Node0 Mem | | Local Node1 Mem |
+ +--------+---------+ +--------+---------+
+ | |
+ +--------+---------+ +--------+---------+
+ | Slower Node2 Mem | | Slower Node3 Mem |
+ +------------------+ +--------+---------+
+
+A "memory initiator" is a node containing one or more devices such as
+CPUs or separate memory I/O devices that can initiate memory requests.
+A "memory target" is a node containing one or more physical address
+ranges accessible from one or more memory initiators.
+
+When multiple memory initiators exist, they may not all have the same
+performance when accessing a given memory target. Each initiator-target
+pair may be organized into different ranked access classes to represent
+this relationship. The highest performing initiator to a given target
+is considered to be one of that target's local initiators, and given
+the highest access class, 0. Any given target may have one or more
+local initiators, and any given initiator may have multiple local
+memory targets.
+
+To aid applications matching memory targets with their initiators, the
+kernel provides symlinks to each other. The following example lists the
+relationship for the access class "0" memory initiators and targets::
+
+ # symlinks -v /sys/devices/system/node/nodeX/access0/targets/
+ relative: /sys/devices/system/node/nodeX/access0/targets/nodeY -> ../../nodeY
+
+ # symlinks -v /sys/devices/system/node/nodeY/access0/initiators/
+ relative: /sys/devices/system/node/nodeY/access0/initiators/nodeX -> ../../nodeX
+
+A memory initiator may have multiple memory targets in the same access
+class. The target memory's initiators in a given class indicate the
+nodes' access characteristics share the same performance relative to other
+linked initiator nodes. Each target within an initiator's access class,
+though, do not necessarily perform the same as each other.
+
+The access class "1" is used to allow differentiation between initiators
+that are CPUs and hence suitable for generic task scheduling, and
+IO initiators such as GPUs and NICs. Unlike access class 0, only
+nodes containing CPUs are considered.
+
+NUMA Performance
+================
+
+Applications may wish to consider which node they want their memory to
+be allocated from based on the node's performance characteristics. If
+the system provides these attributes, the kernel exports them under the
+node sysfs hierarchy by appending the attributes directory under the
+memory node's access class 0 initiators as follows::
+
+ /sys/devices/system/node/nodeY/access0/initiators/
+
+These attributes apply only when accessed from nodes that have the
+are linked under the this access's initiators.
+
+The performance characteristics the kernel provides for the local initiators
+are exported are as follows::
+
+ # tree -P "read*|write*" /sys/devices/system/node/nodeY/access0/initiators/
+ /sys/devices/system/node/nodeY/access0/initiators/
+ |-- read_bandwidth
+ |-- read_latency
+ |-- write_bandwidth
+ `-- write_latency
+
+The bandwidth attributes are provided in MiB/second.
+
+The latency attributes are provided in nanoseconds.
+
+The values reported here correspond to the rated latency and bandwidth
+for the platform.
+
+Access class 1 takes the same form but only includes values for CPU to
+memory activity.
+
+NUMA Cache
+==========
+
+System memory may be constructed in a hierarchy of elements with various
+performance characteristics in order to provide large address space of
+slower performing memory cached by a smaller higher performing memory. The
+system physical addresses memory initiators are aware of are provided
+by the last memory level in the hierarchy. The system meanwhile uses
+higher performing memory to transparently cache access to progressively
+slower levels.
+
+The term "far memory" is used to denote the last level memory in the
+hierarchy. Each increasing cache level provides higher performing
+initiator access, and the term "near memory" represents the fastest
+cache provided by the system.
+
+This numbering is different than CPU caches where the cache level (ex:
+L1, L2, L3) uses the CPU-side view where each increased level is lower
+performing. In contrast, the memory cache level is centric to the last
+level memory, so the higher numbered cache level corresponds to memory
+nearer to the CPU, and further from far memory.
+
+The memory-side caches are not directly addressable by software. When
+software accesses a system address, the system will return it from the
+near memory cache if it is present. If it is not present, the system
+accesses the next level of memory until there is either a hit in that
+cache level, or it reaches far memory.
+
+An application does not need to know about caching attributes in order
+to use the system. Software may optionally query the memory cache
+attributes in order to maximize the performance out of such a setup.
+If the system provides a way for the kernel to discover this information,
+for example with ACPI HMAT (Heterogeneous Memory Attribute Table),
+the kernel will append these attributes to the NUMA node memory target.
+
+When the kernel first registers a memory cache with a node, the kernel
+will create the following directory::
+
+ /sys/devices/system/node/nodeX/memory_side_cache/
+
+If that directory is not present, the system either does not provide
+a memory-side cache, or that information is not accessible to the kernel.
+
+The attributes for each level of cache is provided under its cache
+level index::
+
+ /sys/devices/system/node/nodeX/memory_side_cache/indexA/
+ /sys/devices/system/node/nodeX/memory_side_cache/indexB/
+ /sys/devices/system/node/nodeX/memory_side_cache/indexC/
+
+Each cache level's directory provides its attributes. For example, the
+following shows a single cache level and the attributes available for
+software to query::
+
+ # tree /sys/devices/system/node/node0/memory_side_cache/
+ /sys/devices/system/node/node0/memory_side_cache/
+ |-- index1
+ | |-- indexing
+ | |-- line_size
+ | |-- size
+ | `-- write_policy
+
+The "indexing" will be 0 if it is a direct-mapped cache, and non-zero
+for any other indexed based, multi-way associativity.
+
+The "line_size" is the number of bytes accessed from the next cache
+level on a miss.
+
+The "size" is the number of bytes provided by this cache level.
+
+The "write_policy" will be 0 for write-back, and non-zero for
+write-through caching.
+
+See Also
+========
+
+[1] https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
+- Section 5.2.27
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index 3f7bade2c231..e60e9211fd9b 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -1,5 +1,3 @@
-.. _pagemap:
-
=============================
Examining Process Page Tables
=============================
@@ -19,9 +17,12 @@ There are four components to pagemap:
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
* Bit 55 pte is soft-dirty (see
- :ref:`Documentation/admin-guide/mm/soft-dirty.rst <soft_dirty>`)
+ Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped (since 4.2)
- * Bits 57-60 zero
+ * Bit 57 pte is uffd-wp write-protected (since 5.13) (see
+ Documentation/admin-guide/mm/userfaultfd.rst)
+ * Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page)
+ * Bits 59-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
* Bit 62 page swapped
* Bit 63 page present
@@ -37,14 +38,30 @@ There are four components to pagemap:
precisely which pages are mapped (or in swap) and comparing mapped
pages between processes.
+ Traditionally, bit 56 indicates that a page is mapped exactly once and bit
+ 56 is clear when a page is mapped multiple times, even when mapped in the
+ same process multiple times. In some kernel configurations, the semantics
+ for pages part of a larger allocation (e.g., THP) can differ: bit 56 is set
+ if all pages part of the corresponding large allocation are *certainly*
+ mapped in the same process, even if the page is mapped multiple times in that
+ process. Bit 56 is clear when any page page of the larger allocation
+ is *maybe* mapped in a different process. In some cases, a large allocation
+ might be treated as "maybe mapped by multiple processes" even though this
+ is no longer the case.
+
Efficient users of this interface will use ``/proc/pid/maps`` to
determine which areas of memory are actually mapped and llseek to
skip over unmapped regions.
* ``/proc/kpagecount``. This file contains a 64-bit count of the number of
- times each page is mapped, indexed by PFN.
+ times each page is mapped, indexed by PFN. Some kernel configurations do
+ not track the precise number of times a page part of a larger allocation
+ (e.g., THP) is mapped. In these configurations, the average number of
+ mappings per page in this larger allocation is returned instead. However,
+ if any page of the large allocation is mapped, the returned value will
+ be at least 1.
-The page-types tool in the tools/vm directory can be used to query the
+The page-types tool in the tools/mm directory can be used to query the
number of times a page is mapped.
* ``/proc/kpageflags``. This file contains a 64-bit set of flags for each
@@ -75,9 +92,10 @@ number of times a page is mapped.
20. NOPAGE
21. KSM
22. THP
- 23. BALLOON
+ 23. OFFLINE
24. ZERO_PAGE
25. IDLE
+ 26. PGTABLE
* ``/proc/kpagecgroup``. This file contains a 64-bit inode number of the
memory cgroup each page is charged to, indexed by PFN. Only available when
@@ -87,13 +105,14 @@ Short descriptions to the page flags
====================================
0 - LOCKED
- page is being locked for exclusive access, e.g. by undergoing read/write IO
+ The page is being locked for exclusive access, e.g. by undergoing read/write
+ IO.
7 - SLAB
- page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
- When compound page is used, SLUB/SLQB will only set this flag on the head
- page; SLOB will not flag it at all.
+ The page is managed by the SLAB/SLUB kernel memory allocator.
+ When compound page is used, either will only set this flag on the head
+ page.
10 - BUDDY
- a free memory block managed by the buddy system allocator
+ A free memory block managed by the buddy system allocator.
The buddy system organizes free memory in blocks of various orders.
An order N block has 2^N physically contiguous pages, with the BUDDY flag
set for and _only_ for the first page.
@@ -101,95 +120,97 @@ Short descriptions to the page flags
A compound page with order N consists of 2^N physically contiguous pages.
A compound page with order 2 takes the form of "HTTT", where H donates its
head page and T donates its tail page(s). The major consumers of compound
- pages are hugeTLB pages
- (:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`),
+ pages are hugeTLB pages (Documentation/admin-guide/mm/hugetlbpage.rst),
the SLUB etc. memory allocators and various device drivers.
However in this interface, only huge/giga pages are made visible
to end users.
16 - COMPOUND_TAIL
A compound page tail (see description above).
17 - HUGE
- this is an integral part of a HugeTLB page
+ This is an integral part of a HugeTLB page.
19 - HWPOISON
- hardware detected memory corruption on this page: don't touch the data!
+ Hardware detected memory corruption on this page: don't touch the data!
20 - NOPAGE
- no page frame exists at the requested address
+ No page frame exists at the requested address.
21 - KSM
- identical memory pages dynamically shared between one or more processes
+ Identical memory pages dynamically shared between one or more processes.
22 - THP
- contiguous pages which construct transparent hugepages
-23 - BALLOON
- balloon compaction page
+ Contiguous pages which construct THP of any size and mapped by any granularity.
+23 - OFFLINE
+ The page is logically offline.
24 - ZERO_PAGE
- zero page for pfn_zero or huge_zero page
+ Zero page for pfn_zero or huge_zero page.
25 - IDLE
- page has not been accessed since it was marked idle (see
- :ref:`Documentation/admin-guide/mm/idle_page_tracking.rst <idle_page_tracking>`).
+ The page has not been accessed since it was marked idle (see
+ Documentation/admin-guide/mm/idle_page_tracking.rst).
Note that this flag may be stale in case the page was accessed via
a PTE. To make sure the flag is up-to-date one has to read
``/sys/kernel/mm/page_idle/bitmap`` first.
+26 - PGTABLE
+ The page is in use as a page table.
IO related page flags
---------------------
1 - ERROR
- IO error occurred
+ IO error occurred.
3 - UPTODATE
- page has up-to-date data
+ The page has up-to-date data.
ie. for file backed page: (in-memory data revision >= on-disk one)
4 - DIRTY
- page has been written to, hence contains new data
+ The page has been written to, hence contains new data.
i.e. for file backed page: (in-memory data revision > on-disk one)
8 - WRITEBACK
- page is being synced to disk
+ The page is being synced to disk.
LRU related page flags
----------------------
5 - LRU
- page is in one of the LRU lists
+ The page is in one of the LRU lists.
6 - ACTIVE
- page is in the active LRU list
+ The page is in the active LRU list.
18 - UNEVICTABLE
- page is in the unevictable (non-)LRU list It is somehow pinned and
+ The page is in the unevictable (non-)LRU list It is somehow pinned and
not a candidate for LRU page reclaims, e.g. ramfs pages,
- shmctl(SHM_LOCK) and mlock() memory segments
+ shmctl(SHM_LOCK) and mlock() memory segments.
2 - REFERENCED
- page has been referenced since last LRU list enqueue/requeue
+ The page has been referenced since last LRU list enqueue/requeue.
9 - RECLAIM
- page will be reclaimed soon after its pageout IO completed
+ The page will be reclaimed soon after its pageout IO completed.
11 - MMAP
- a memory mapped page
+ A memory mapped page.
12 - ANON
- a memory mapped page that is not part of a file
+ A memory mapped page that is not part of a file.
13 - SWAPCACHE
- page is mapped to swap space, i.e. has an associated swap entry
+ The page is mapped to swap space, i.e. has an associated swap entry.
14 - SWAPBACKED
- page is backed by swap/RAM
+ The page is backed by swap/RAM.
-The page-types tool in the tools/vm directory can be used to query the
+The page-types tool in the tools/mm directory can be used to query the
above flags.
-Using pagemap to do something useful
-====================================
+Exceptions for Shared Memory
+============================
+
+Page table entries for shared pages are cleared when the pages are zapped or
+swapped out. This makes swapped out pages indistinguishable from never-allocated
+ones.
+
+In kernel space, the swap location can still be retrieved from the page cache.
+However, values stored only on the normal PTE get lost irretrievably when the
+page is swapped out (i.e. SOFT_DIRTY).
-The general procedure for using pagemap to find out about a process' memory
-usage goes like this:
+In user space, whether the page is present, swapped or none can be deduced with
+the help of lseek and/or mincore system calls.
- 1. Read ``/proc/pid/maps`` to determine which parts of the memory space are
- mapped to what.
- 2. Select the maps you are interested in -- all of them, or a particular
- library, or the stack or the heap, etc.
- 3. Open ``/proc/pid/pagemap`` and seek to the pages you would like to examine.
- 4. Read a u64 for each page from pagemap.
- 5. Open ``/proc/kpagecount`` and/or ``/proc/kpageflags``. For each PFN you
- just read, seek to that entry in the file, and read the data you want.
+lseek() can differentiate between accessed pages (present or swapped out) and
+holes (none/non-allocated) by specifying the SEEK_DATA flag on the file where
+the pages are backed. For anonymous shared pages, the file can be found in
+``/proc/pid/map_files/``.
-For example, to find the "unique set size" (USS), which is the amount of
-memory that a process is using that is not shared with any other process,
-you can go through every map in the process, find the PFNs, look those up
-in kpagecount, and tally up the number of pages that are only referenced
-once.
+mincore() can differentiate between pages in memory (present, including swap
+cache) and out of memory (swapped out or none/non-allocated).
Other notes
===========
@@ -202,3 +223,94 @@ Before Linux 3.11 pagemap bits 55-60 were used for "page-shift" (which is
always 12 at most architectures). Since Linux 3.11 their meaning changes
after first clear of soft-dirty bits. Since Linux 4.2 they are used for
flags unconditionally.
+
+Pagemap Scan IOCTL
+==================
+
+The ``PAGEMAP_SCAN`` IOCTL on the pagemap file can be used to get or optionally
+clear the info about page table entries. The following operations are supported
+in this IOCTL:
+
+- Scan the address range and get the memory ranges matching the provided criteria.
+ This is performed when the output buffer is specified.
+- Write-protect the pages. The ``PM_SCAN_WP_MATCHING`` is used to write-protect
+ the pages of interest. The ``PM_SCAN_CHECK_WPASYNC`` aborts the operation if
+ non-Async Write Protected pages are found. The ``PM_SCAN_WP_MATCHING`` can be
+ used with or without ``PM_SCAN_CHECK_WPASYNC``.
+- Both of those operations can be combined into one atomic operation where we can
+ get and write protect the pages as well.
+
+Following flags about pages are currently supported:
+
+- ``PAGE_IS_WPALLOWED`` - Page has async-write-protection enabled
+- ``PAGE_IS_WRITTEN`` - Page has been written to from the time it was write protected
+- ``PAGE_IS_FILE`` - Page is file backed
+- ``PAGE_IS_PRESENT`` - Page is present in the memory
+- ``PAGE_IS_SWAPPED`` - Page is in swapped
+- ``PAGE_IS_PFNZERO`` - Page has zero PFN
+- ``PAGE_IS_HUGE`` - Page is PMD-mapped THP or Hugetlb backed
+- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
+- ``PAGE_IS_GUARD`` - Page is a part of a guard region
+
+The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
+
+ 1. The size of the ``struct pm_scan_arg`` must be specified in the ``size``
+ field. This field will be helpful in recognizing the structure if extensions
+ are done later.
+ 2. The flags can be specified in the ``flags`` field. The ``PM_SCAN_WP_MATCHING``
+ and ``PM_SCAN_CHECK_WPASYNC`` are the only added flags at this time. The get
+ operation is optionally performed depending upon if the output buffer is
+ provided or not.
+ 3. The range is specified through ``start`` and ``end``.
+ 4. The walk can abort before visiting the complete range such as the user buffer
+ can get full etc. The walk ending address is specified in``end_walk``.
+ 5. The output buffer of ``struct page_region`` array and size is specified in
+ ``vec`` and ``vec_len``.
+ 6. The optional maximum requested pages are specified in the ``max_pages``.
+ 7. The masks are specified in ``category_mask``, ``category_anyof_mask``,
+ ``category_inverted`` and ``return_mask``.
+
+Find pages which have been written and WP them as well::
+
+ struct pm_scan_arg arg = {
+ .size = sizeof(arg),
+ .flags = PM_SCAN_CHECK_WPASYNC | PM_SCAN_CHECK_WPASYNC,
+ ..
+ .category_mask = PAGE_IS_WRITTEN,
+ .return_mask = PAGE_IS_WRITTEN,
+ };
+
+Find pages which have been written, are file backed, not swapped and either
+present or huge::
+
+ struct pm_scan_arg arg = {
+ .size = sizeof(arg),
+ .flags = 0,
+ ..
+ .category_mask = PAGE_IS_WRITTEN | PAGE_IS_SWAPPED,
+ .category_inverted = PAGE_IS_SWAPPED,
+ .category_anyof_mask = PAGE_IS_PRESENT | PAGE_IS_HUGE,
+ .return_mask = PAGE_IS_WRITTEN | PAGE_IS_SWAPPED |
+ PAGE_IS_PRESENT | PAGE_IS_HUGE,
+ };
+
+The ``PAGE_IS_WRITTEN`` flag can be considered as a better-performing alternative
+of soft-dirty flag. It doesn't get affected by VMA merging of the kernel and hence
+the user can find the true soft-dirty pages in case of normal pages. (There may
+still be extra dirty pages reported for THP or Hugetlb pages.)
+
+"PAGE_IS_WRITTEN" category is used with uffd write protect-enabled ranges to
+implement memory dirty tracking in userspace:
+
+ 1. The userfaultfd file descriptor is created with ``userfaultfd`` syscall.
+ 2. The ``UFFD_FEATURE_WP_UNPOPULATED`` and ``UFFD_FEATURE_WP_ASYNC`` features
+ are set by ``UFFDIO_API`` IOCTL.
+ 3. The memory range is registered with ``UFFDIO_REGISTER_MODE_WP`` mode
+ through ``UFFDIO_REGISTER`` IOCTL.
+ 4. Then any part of the registered memory or the whole memory region must
+ be write protected using ``PAGEMAP_SCAN`` IOCTL with flag ``PM_SCAN_WP_MATCHING``
+ or the ``UFFDIO_WRITEPROTECT`` IOCTL can be used. Both of these perform the
+ same operation. The former is better in terms of performance.
+ 5. Now the ``PAGEMAP_SCAN`` IOCTL can be used to either just find pages which
+ have been written to since they were last marked and/or optionally write protect
+ the pages as well.
diff --git a/Documentation/admin-guide/mm/shrinker_debugfs.rst b/Documentation/admin-guide/mm/shrinker_debugfs.rst
new file mode 100644
index 000000000000..c582033bd113
--- /dev/null
+++ b/Documentation/admin-guide/mm/shrinker_debugfs.rst
@@ -0,0 +1,133 @@
+==========================
+Shrinker Debugfs Interface
+==========================
+
+Shrinker debugfs interface provides a visibility into the kernel memory
+shrinkers subsystem and allows to get information about individual shrinkers
+and interact with them.
+
+For each shrinker registered in the system a directory in **<debugfs>/shrinker/**
+is created. The directory's name is composed from the shrinker's name and an
+unique id: e.g. *kfree_rcu-0* or *sb-xfs:vda1-36*.
+
+Each shrinker directory contains **count** and **scan** files, which allow to
+trigger *count_objects()* and *scan_objects()* callbacks for each memcg and
+numa node (if applicable).
+
+Usage:
+------
+
+1. *List registered shrinkers*
+
+ ::
+
+ $ cd /sys/kernel/debug/shrinker/
+ $ ls
+ dquota-cache-16 sb-devpts-28 sb-proc-47 sb-tmpfs-42
+ mm-shadow-18 sb-devtmpfs-5 sb-proc-48 sb-tmpfs-43
+ mm-zspool:zram0-34 sb-hugetlbfs-17 sb-pstore-31 sb-tmpfs-44
+ rcu-kfree-0 sb-hugetlbfs-33 sb-rootfs-2 sb-tmpfs-49
+ sb-aio-20 sb-iomem-12 sb-securityfs-6 sb-tracefs-13
+ sb-anon_inodefs-15 sb-mqueue-21 sb-selinuxfs-22 sb-xfs:vda1-36
+ sb-bdev-3 sb-nsfs-4 sb-sockfs-8 sb-zsmalloc-19
+ sb-bpf-32 sb-pipefs-14 sb-sysfs-26 thp-deferred_split-10
+ sb-btrfs:vda2-24 sb-proc-25 sb-tmpfs-1 thp-zero-9
+ sb-cgroup2-30 sb-proc-39 sb-tmpfs-27 xfs-buf:vda1-37
+ sb-configfs-23 sb-proc-41 sb-tmpfs-29 xfs-inodegc:vda1-38
+ sb-dax-11 sb-proc-45 sb-tmpfs-35
+ sb-debugfs-7 sb-proc-46 sb-tmpfs-40
+
+2. *Get information about a specific shrinker*
+
+ ::
+
+ $ cd sb-btrfs\:vda2-24/
+ $ ls
+ count scan
+
+3. *Count objects*
+
+ Each line in the output has the following format::
+
+ <cgroup inode id> <nr of objects on node 0> <nr of objects on node 1> ...
+ <cgroup inode id> <nr of objects on node 0> <nr of objects on node 1> ...
+ ...
+
+ If there are no objects on all numa nodes, a line is omitted. If there
+ are no objects at all, the output might be empty.
+
+ If the shrinker is not memcg-aware or CONFIG_MEMCG is off, 0 is printed
+ as cgroup inode id. If the shrinker is not numa-aware, 0's are printed
+ for all nodes except the first one.
+ ::
+
+ $ cat count
+ 1 224 2
+ 21 98 0
+ 55 818 10
+ 2367 2 0
+ 2401 30 0
+ 225 13 0
+ 599 35 0
+ 939 124 0
+ 1041 3 0
+ 1075 1 0
+ 1109 1 0
+ 1279 60 0
+ 1313 7 0
+ 1347 39 0
+ 1381 3 0
+ 1449 14 0
+ 1483 63 0
+ 1517 53 0
+ 1551 6 0
+ 1585 1 0
+ 1619 6 0
+ 1653 40 0
+ 1687 11 0
+ 1721 8 0
+ 1755 4 0
+ 1789 52 0
+ 1823 888 0
+ 1857 1 0
+ 1925 2 0
+ 1959 32 0
+ 2027 22 0
+ 2061 9 0
+ 2469 799 0
+ 2537 861 0
+ 2639 1 0
+ 2707 70 0
+ 2775 4 0
+ 2877 84 0
+ 293 1 0
+ 735 8 0
+
+4. *Scan objects*
+
+ The expected input format::
+
+ <cgroup inode id> <numa id> <number of objects to scan>
+
+ For a non-memcg-aware shrinker or on a system with no memory
+ cgrups **0** should be passed as cgroup id.
+ ::
+
+ $ cd /sys/kernel/debug/shrinker/
+ $ cd sb-btrfs\:vda2-24/
+
+ $ cat count | head -n 5
+ 1 212 0
+ 21 97 0
+ 55 802 5
+ 2367 2 0
+ 225 13 0
+
+ $ echo "55 0 200" > scan
+
+ $ cat count | head -n 5
+ 1 212 0
+ 21 96 0
+ 55 752 5
+ 2367 2 0
+ 225 13 0
diff --git a/Documentation/admin-guide/mm/slab.rst b/Documentation/admin-guide/mm/slab.rst
new file mode 100644
index 000000000000..14429ab90611
--- /dev/null
+++ b/Documentation/admin-guide/mm/slab.rst
@@ -0,0 +1,469 @@
+========================================
+Short users guide for the slab allocator
+========================================
+
+The slab allocator includes full debugging support (when built with
+CONFIG_SLUB_DEBUG=y) but it is off by default (unless built with
+CONFIG_SLUB_DEBUG_ON=y). You can enable debugging only for selected
+slabs in order to avoid an impact on overall system performance which
+may make a bug more difficult to find.
+
+In order to switch debugging on one can add an option ``slab_debug``
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the ``slabinfo`` command to get statistical
+data and perform operation on the slabs. By default ``slabinfo`` only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. ``slabinfo`` can be compiled with
+::
+
+ gcc -o slabinfo tools/mm/slabinfo.c
+
+Some of the modes of operation of ``slabinfo`` require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slab_debug:
+-------------------------------------------
+
+Parameters may be given to ``slab_debug``. If none is specified then full
+debugging is enabled. Format:
+
+slab_debug=<Debug-Options>
+ Enable options for all slabs
+
+slab_debug=<Debug-Options>,<slab name1>,<slab name2>,...
+ Enable options only for select slabs (no spaces
+ after a comma)
+
+Multiple blocks of options for all slabs or selected slabs can be given, with
+blocks of options delimited by ';'. The last of "all slabs" blocks is applied
+to all slabs except those that match one of the "select slabs" block. Options
+of the first "select slabs" blocks that matches the slab's name are applied.
+
+Possible debug options are::
+
+ F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
+ Sorry SLAB legacy issues)
+ Z Red zoning
+ P Poisoning (object and padding)
+ U User tracking (free and alloc)
+ T Trace (please only use on single slabs)
+ A Enable failslab filter mark for the cache
+ O Switch debugging off for caches that would have
+ caused higher minimum slab orders
+ - Switch all debugging off (useful if the kernel is
+ configured with CONFIG_SLUB_DEBUG_ON)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify::
+
+ slab_debug=FZ
+
+Trying to find an issue in the dentry cache? Try::
+
+ slab_debug=,dentry
+
+to only enable debugging on the dentry cache. You may use an asterisk at the
+end of the slab name, in order to cover all slabs with the same prefix. For
+example, here's how you can poison the dentry cache as well as all kmalloc
+slabs::
+
+ slab_debug=P,kmalloc-*,dentry
+
+Red zoning and tracking may realign the slab. We can just apply sanity checks
+to the dentry cache with::
+
+ slab_debug=F,dentry
+
+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes). This has a higher likelihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory. To
+switch off debugging for such caches by default, use::
+
+ slab_debug=O
+
+You can apply different options to different list of slab names, using blocks
+of options. This will enable red zoning for dentry and user tracking for
+kmalloc. All other slabs will not get any debugging enabled::
+
+ slab_debug=Z,dentry;U,kmalloc-*
+
+You can also enable options (e.g. sanity checks and poisoning) for all caches
+except some that are deemed too performance critical and don't need to be
+debugged by specifying global debug options followed by a list of slab names
+with "-" as options::
+
+ slab_debug=FZ;-,zs_handle,zspage
+
+The state of each debug option for a slab can be found in the respective files
+under::
+
+ /sys/kernel/slab/<slab name>/
+
+If the file contains 1, the option is enabled, 0 means disabled. The debug
+options from the ``slab_debug`` parameter translate to the following files::
+
+ F sanity_checks
+ Z red_zone
+ P poison
+ U store_user
+ T trace
+ A failslab
+
+failslab file is writable, so writing 1 or 0 will enable or disable
+the option at runtime. Write returns -EINVAL if cache is an alias.
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+Slab merging
+============
+
+If no debug options are specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+``slabinfo -a`` displays which slabs were merged together.
+
+Slab validation
+===============
+
+SLUB can validate all object if the kernel was booted with slab_debug. In
+order to do so you must have the ``slabinfo`` tool. Then you can do
+::
+
+ slabinfo -v
+
+which will test all objects. Output will be generated to the syslog.
+
+This also works in a more limited way if boot was without slab debug.
+In that case ``slabinfo -v`` simply tests all reachable objects. Usually
+these are in the cpu slabs and the partial slabs. Full slabs are not
+tracked by SLUB in a non debug situation.
+
+Getting more performance
+========================
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+.. slab_min_objects=x (default: automatically scaled by number of cpus)
+.. slab_min_order=x (default 0)
+.. slab_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
+
+``slab_min_objects``
+ allows to specify how many objects must at least fit into one
+ slab in order for the allocation order to be acceptable. In
+ general slub will be able to perform this number of
+ allocations on a slab without consulting centralized resources
+ (list_lock) where contention may occur.
+
+``slab_min_order``
+ specifies a minimum order of slabs. A similar effect like
+ ``slab_min_objects``.
+
+``slab_max_order``
+ specified the order at which ``slab_min_objects`` should no
+ longer be checked. This is useful to avoid SLUB trying to
+ generate super large order pages to fit ``slab_min_objects``
+ of a slab cache with large object sizes into one high order
+ page. Setting command line parameter
+ ``debug_guardpage_minorder=N`` (N > 0), forces setting
+ ``slab_max_order`` to 0, what cause minimum possible order of
+ slabs allocation.
+
+``slab_strict_numa``
+ Enables the application of memory policies on each
+ allocation. This results in more accurate placement of
+ objects which may result in the reduction of accesses
+ to remote nodes. The default is to only apply memory
+ policies at the folio level when a new folio is acquired
+ or a folio is retrieved from the lists. Enabling this
+ option reduces the fastpath performance of the slab allocator.
+
+SLUB Debug output
+=================
+
+Here is a sample of slub debug output::
+
+ ====================================================================
+ BUG kmalloc-8: Right Redzone overwritten
+ --------------------------------------------------------------------
+
+ INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+ INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
+ INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+ INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005
+ Redzone (0xc90f6d28): 00 cc cc cc .
+ Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+
+ [<c010523d>] dump_trace+0x63/0x1eb
+ [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+ [<c010601d>] show_trace+0x12/0x14
+ [<c0106035>] dump_stack+0x16/0x18
+ [<c017e0fa>] object_err+0x143/0x14b
+ [<c017e2cc>] check_object+0x66/0x234
+ [<c017eb43>] __slab_free+0x239/0x384
+ [<c017f446>] kfree+0xa6/0xc6
+ [<c02e2335>] get_modalias+0xb9/0xf5
+ [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
+ [<c027866a>] dev_uevent+0x1ad/0x1da
+ [<c0205024>] kobject_uevent_env+0x20a/0x45b
+ [<c020527f>] kobject_uevent+0xa/0xf
+ [<c02779f1>] store_uevent+0x4f/0x58
+ [<c027758e>] dev_attr_store+0x29/0x2f
+ [<c01bec4f>] sysfs_write_file+0x16e/0x19c
+ [<c0183ba7>] vfs_write+0xd1/0x15a
+ [<c01841d7>] sys_write+0x3d/0x72
+ [<c0104112>] sysenter_past_esp+0x5f/0x99
+ [<b7f7b410>] 0xb7f7b410
+ =======================
+
+ FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
+
+If SLUB encounters a corrupted object (full detection requires the kernel
+to be booted with slab_debug) then the following output will be dumped
+into the syslog:
+
+1. Description of the problem encountered
+
+ This will be a message in the system log starting with::
+
+ ===============================================
+ BUG <slab cache affected>: <What went wrong>
+ -----------------------------------------------
+
+ INFO: <corruption start>-<corruption_end> <more info>
+ INFO: Slab <address> <slab information>
+ INFO: Object <address> <object information>
+ INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+ cpu> pid=<pid of the process>
+ INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+ pid=<pid of the process>
+
+ (Object allocation / free information is only available if SLAB_STORE_USER is
+ set for the slab. slab_debug sets that option)
+
+2. The object contents if an object was involved.
+
+ Various types of lines can follow the BUG SLUB line:
+
+ Bytes b4 <address> : <bytes>
+ Shows a few bytes before the object where the problem was detected.
+ Can be useful if the corruption does not stop with the start of the
+ object.
+
+ Object <address> : <bytes>
+ The bytes of the object. If the object is inactive then the bytes
+ typically contain poison values. Any non-poison value shows a
+ corruption by a write after free.
+
+ Redzone <address> : <bytes>
+ The Redzone following the object. The Redzone is used to detect
+ writes after the object. All bytes should always have the same
+ value. If there is any deviation then it is due to a write after
+ the object boundary.
+
+ (Redzone information is only available if SLAB_RED_ZONE is set.
+ slab_debug sets that option)
+
+ Padding <address> : <bytes>
+ Unused data to fill up the space in order to get the next object
+ properly aligned. In the debug case we make sure that there are
+ at least 4 bytes of padding. This allows the detection of writes
+ before the object.
+
+3. A stackdump
+
+ The stackdump describes the location where the error was detected. The cause
+ of the corruption is may be more likely found by looking at the function that
+ allocated or freed the object.
+
+4. Report on how the problem was dealt with in order to ensure the continued
+ operation of the system.
+
+ These are messages in the system log beginning with::
+
+ FIX <slab cache affected>: <corrective action taken>
+
+ In the above sample SLUB found that the Redzone of an active object has
+ been overwritten. Here a string of 8 characters was written into a slab that
+ has the length of 8 characters. However, a 8 character string needs a
+ terminating 0. That zero has overwritten the first byte of the Redzone field.
+ After reporting the details of the issue encountered the FIX SLUB message
+ tells us that SLUB has restored the Redzone to its proper value and then
+ system operations continue.
+
+Emergency operations
+====================
+
+Minimal debugging (sanity checks alone) can be enabled by booting with::
+
+ slab_debug=F
+
+This will be generally be enough to enable the resiliency features of slub
+which will keep the system running even if a bad kernel component will
+keep corrupting objects. This may be important for production systems.
+Performance will be impacted by the sanity checks and there will be a
+continual stream of error messages to the syslog but no additional memory
+will be used (unlike full debugging).
+
+No guarantees. The kernel component still needs to be fixed. Performance
+may be optimized further by locating the slab that experiences corruption
+and enabling debugging only for that cache
+
+I.e.::
+
+ slab_debug=F,dentry
+
+If the corruption occurs by writing after the end of the object then it
+may be advisable to enable a Redzone to avoid corrupting the beginning
+of other objects::
+
+ slab_debug=FZ,dentry
+
+Extended slabinfo mode and plotting
+===================================
+
+The ``slabinfo`` tool has a special 'extended' ('-X') mode that includes:
+ - Slabcache Totals
+ - Slabs sorted by size (up to -N <num> slabs, default 1)
+ - Slabs sorted by loss (up to -N <num> slabs, default 1)
+
+Additionally, in this mode ``slabinfo`` does not dynamically scale
+sizes (G/M/K) and reports everything in bytes (this functionality is
+also available to other slabinfo modes via '-B' option) which makes
+reporting more precise and accurate. Moreover, in some sense the `-X'
+mode also simplifies the analysis of slabs' behaviour, because its
+output can be plotted using the ``slabinfo-gnuplot.sh`` script. So it
+pushes the analysis from looking through the numbers (tons of numbers)
+to something easier -- visual analysis.
+
+To generate plots:
+
+a) collect slabinfo extended records, for example::
+
+ while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
+
+b) pass stats file(-s) to ``slabinfo-gnuplot.sh`` script::
+
+ slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
+
+ The ``slabinfo-gnuplot.sh`` script will pre-processes the collected records
+ and generates 3 png files (and 3 pre-processing cache files) per STATS
+ file:
+ - Slabcache Totals: FOO_STATS-totals.png
+ - Slabs sorted by size: FOO_STATS-slabs-by-size.png
+ - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
+
+Another use case, when ``slabinfo-gnuplot.sh`` can be useful, is when you
+need to compare slabs' behaviour "prior to" and "after" some code
+modification. To help you out there, ``slabinfo-gnuplot.sh`` script
+can 'merge' the `Slabcache Totals` sections from different
+measurements. To visually compare N plots:
+
+a) Collect as many STATS1, STATS2, .. STATSN files as you need::
+
+ while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
+
+b) Pre-process those STATS files::
+
+ slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
+
+c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
+ generated pre-processed \*-totals::
+
+ slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
+
+ This will produce a single plot (png file).
+
+ Plots, expectedly, can be large so some fluctuations or small spikes
+ can go unnoticed. To deal with that, ``slabinfo-gnuplot.sh`` has two
+ options to 'zoom-in'/'zoom-out':
+
+ a) ``-s %d,%d`` -- overwrites the default image width and height
+ b) ``-r %d,%d`` -- specifies a range of samples to use (for example,
+ in ``slabinfo -X >> FOO_STATS; sleep 1;`` case, using a ``-r
+ 40,60`` range will plot only samples collected between 40th and
+ 60th seconds).
+
+
+DebugFS files for SLUB
+======================
+
+For more information about current state of SLUB caches with the user tracking
+debug option enabled, debugfs files are available, typically under
+/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
+tracking). There are 2 types of these files with the following debug
+information:
+
+1. alloc_traces::
+
+ Prints information about unique allocation traces of the currently
+ allocated objects. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, allocating function, possible memory wastage of
+ kmalloc objects(total/per-object), minimal/average/maximal jiffies
+ since alloc, pid range of the allocating processes, cpu mask of
+ allocating cpus, numa node mask of origins of memory, and stack trace.
+
+ Example:::
+
+ 338 pci_alloc_dev+0x2c/0xa0 waste=521872/1544 age=290837/291891/293509 pid=1 cpus=106 nodes=0-1
+ __kmem_cache_alloc_node+0x11f/0x4e0
+ kmalloc_trace+0x26/0xa0
+ pci_alloc_dev+0x2c/0xa0
+ pci_scan_single_device+0xd2/0x150
+ pci_scan_slot+0xf7/0x2d0
+ pci_scan_child_bus_extend+0x4e/0x360
+ acpi_pci_root_create+0x32e/0x3b0
+ pci_acpi_scan_root+0x2b9/0x2d0
+ acpi_pci_root_add.cold.11+0x110/0xb0a
+ acpi_bus_attach+0x262/0x3f0
+ device_for_each_child+0xb7/0x110
+ acpi_dev_for_each_child+0x77/0xa0
+ acpi_bus_attach+0x108/0x3f0
+ device_for_each_child+0xb7/0x110
+ acpi_dev_for_each_child+0x77/0xa0
+ acpi_bus_attach+0x108/0x3f0
+
+2. free_traces::
+
+ Prints information about unique freeing traces of the currently allocated
+ objects. The freeing traces thus come from the previous life-cycle of the
+ objects and are reported as not available for objects allocated for the first
+ time. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, freeing function, minimal/average/maximal jiffies since free,
+ pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.
+
+ Example:::
+
+ 1980 <not-available> age=4294912290 pid=0 cpus=0
+ 51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
+ kfree+0x2db/0x420
+ acpi_ut_update_ref_count+0x6a6/0x782
+ acpi_ut_update_object_reference+0x1ad/0x234
+ acpi_ut_remove_reference+0x7d/0x84
+ acpi_rs_get_prt_method_data+0x97/0xd6
+ acpi_get_irq_routing_table+0x82/0xc4
+ acpi_pci_irq_find_prt_entry+0x8e/0x2e0
+ acpi_pci_irq_lookup+0x3a/0x1e0
+ acpi_pci_irq_enable+0x77/0x240
+ pcibios_enable_device+0x39/0x40
+ do_pci_enable_device.part.0+0x5d/0xe0
+ pci_enable_device_flags+0xfc/0x120
+ pci_enable_device+0x13/0x20
+ virtio_pci_probe+0x9e/0x170
+ local_pci_probe+0x48/0x80
+ pci_device_probe+0x105/0x1c0
+
+Christoph Lameter, May 30, 2007
+Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/admin-guide/mm/soft-dirty.rst b/Documentation/admin-guide/mm/soft-dirty.rst
index cb0cfd6672fa..aeea936caa44 100644
--- a/Documentation/admin-guide/mm/soft-dirty.rst
+++ b/Documentation/admin-guide/mm/soft-dirty.rst
@@ -1,5 +1,3 @@
-.. _soft_dirty:
-
===============
Soft-Dirty PTEs
===============
diff --git a/Documentation/vm/swap_numa.rst b/Documentation/admin-guide/mm/swap_numa.rst
index e0466f2db8fa..2e630627bcee 100644
--- a/Documentation/vm/swap_numa.rst
+++ b/Documentation/admin-guide/mm/swap_numa.rst
@@ -1,5 +1,3 @@
-.. _swap_numa:
-
===========================================
Automatically bind swap device to numa node
===========================================
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 7ab93a8404b9..1654211cc6cf 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -1,5 +1,3 @@
-.. _admin_guide_transhuge:
-
============================
Transparent Hugepage Support
============================
@@ -47,13 +45,28 @@ components:
the two is using hugepages just because of the fact the TLB miss is
going to run faster.
+Modern kernels support "multi-size THP" (mTHP), which introduces the
+ability to allocate memory in blocks that are bigger than a base page
+but smaller than traditional PMD-size (as described above), in
+increments of a power-of-2 number of pages. mTHP can back anonymous
+memory (for example 16K, 32K, 64K, etc). These THPs continue to be
+PTE-mapped, but in many cases can still provide similar benefits to
+those outlined above: Page faults are significantly reduced (by a
+factor of e.g. 4, 8, 16, etc), but latency spikes are much less
+prominent because the size of each page isn't as huge as the PMD-sized
+variant and there is less memory to clear in each page fault. Some
+architectures also employ TLB compression mechanisms to squeeze more
+entries in when a set of PTEs are virtually and physically contiguous
+and approporiately aligned. In this case, TLB misses will occur less
+often.
+
THP can be enabled system wide or restricted to certain tasks or even
memory ranges inside task's address space. Unless THP is completely
disabled, there is ``khugepaged`` daemon that scans memory and
-collapses sequences of basic pages into huge pages.
+collapses sequences of basic pages into PMD-sized huge pages.
The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
-interface and using madivse(2) and prctl(2) system calls.
+interface and using madvise(2) and prctl(2) system calls.
Transparent Hugepage Support maximizes the usefulness of free memory
if compared to the reservation approach of hugetlbfs by allowing all
@@ -94,15 +107,48 @@ sysfs
Global THP controls
-------------------
-Transparent Hugepage Support for anonymous memory can be entirely disabled
+Transparent Hugepage Support for anonymous memory can be disabled
(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
regions (to avoid the risk of consuming more memory resources) or enabled
-system wide. This can be achieved with one of::
+system wide. This can be achieved per-supported-THP-size with one of::
+
+ echo always >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
+ echo madvise >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
+ echo never >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
+
+where <size> is the hugepage size being addressed, the available sizes
+for which vary by system.
+
+.. note:: Setting "never" in all sysfs THP controls does **not** disable
+ Transparent Huge Pages globally. This is because ``madvise(...,
+ MADV_COLLAPSE)`` ignores these settings and collapses ranges to
+ PMD-sized huge pages unconditionally.
+
+For example::
+
+ echo always >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
+
+Alternatively it is possible to specify that a given hugepage size
+will inherit the top-level "enabled" value::
+
+ echo inherit >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
+
+For example::
+
+ echo inherit >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
+
+The top-level setting (for use with "inherit") can be set by issuing
+one of the following commands::
echo always >/sys/kernel/mm/transparent_hugepage/enabled
echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
echo never >/sys/kernel/mm/transparent_hugepage/enabled
+By default, PMD-sized hugepages have enabled="inherit" and all other
+hugepage sizes have enabled="never". If enabling multiple hugepage
+sizes, the kernel will select the most appropriate enabled size for a
+given allocation.
+
It's also possible to limit defrag efforts in the VM to generate
anonymous hugepages in case they're not immediately free to madvise
regions or to never try to defrag memory and simply fallback to regular
@@ -146,27 +192,83 @@ madvise
behaviour.
never
- should be self-explanatory.
+ should be self-explanatory. Note that ``madvise(...,
+ MADV_COLLAPSE)`` can still cause transparent huge pages to be
+ obtained even if this mode is specified everywhere.
-By default kernel tries to use huge zero page on read page fault to
-anonymous mapping. It's possible to disable huge zero page by writing 0
-or enable it back by writing 1::
+By default kernel tries to use huge, PMD-mappable zero page on read
+page fault to anonymous mapping. It's possible to disable huge zero
+page by writing 0 or enable it back by writing 1::
echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
-Some userspace (such as a test program, or an optimized memory allocation
-library) may want to know the size (in bytes) of a transparent hugepage::
+Some userspace (such as a test program, or an optimized memory
+allocation library) may want to know the size (in bytes) of a
+PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-khugepaged will be automatically started when
-transparent_hugepage/enabled is set to "always" or "madvise, and it'll
-be automatically shutdown if it's set to "never".
+All THPs at fault and collapse time will be added to _deferred_list,
+and will therefore be split under memory presure if they are considered
+"underused". A THP is underused if the number of zero-filled pages in
+the THP is above max_ptes_none (see below). It is possible to disable
+this behaviour by writing 0 to shrink_underused, and enable it by writing
+1 to it::
+
+ echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+ echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+
+khugepaged will be automatically started when PMD-sized THP is enabled
+(either of the per-size anon control or the top-level control are set
+to "always" or "madvise"), and it'll be automatically shutdown when
+PMD-sized THP is disabled (when both the per-size anon control and the
+top-level control are "never")
+
+process THP controls
+--------------------
+
+A process can control its own THP behaviour using the ``PR_SET_THP_DISABLE``
+and ``PR_GET_THP_DISABLE`` pair of prctl(2) calls. The THP behaviour set using
+``PR_SET_THP_DISABLE`` is inherited across fork(2) and execve(2). These calls
+support the following arguments::
+
+ prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0):
+ This will disable THPs completely for the process, irrespective
+ of global THP controls or madvise(..., MADV_COLLAPSE) being used.
+
+ prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, 0, 0):
+ This will disable THPs for the process except when the usage of THPs is
+ advised. Consequently, THPs will only be used when:
+ - Global THP controls are set to "always" or "madvise" and
+ madvise(..., MADV_HUGEPAGE) or madvise(..., MADV_COLLAPSE) is used.
+ - Global THP controls are set to "never" and madvise(..., MADV_COLLAPSE)
+ is used. This is the same behavior as if THPs would not be disabled on
+ a process level.
+ Note that MADV_COLLAPSE is currently always rejected if
+ madvise(..., MADV_NOHUGEPAGE) is set on an area.
+
+ prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0):
+ This will re-enable THPs for the process, as if they were never disabled.
+ Whether THPs will actually be used depends on global THP controls and
+ madvise() calls.
+
+ prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0):
+ This returns a value whose bits indicate how THP-disable is configured:
+ Bits
+ 1 0 Value Description
+ |0|0| 0 No THP-disable behaviour specified.
+ |0|1| 1 THP is entirely disabled for this process.
+ |1|1| 3 THP-except-advised mode is set for this process.
Khugepaged controls
-------------------
+.. note::
+ khugepaged currently only searches for opportunities to collapse to
+ PMD-sized THP and no attempt is made to collapse to other THP
+ sizes.
+
khugepaged runs usually at low frequency so while one may not want to
invoke defrag algorithms synchronously during the page faults, it
should be worth invoking defrag at least in khugepaged. However it's
@@ -191,7 +293,14 @@ allocation failure to throttle the next allocation attempt::
/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
-The khugepaged progress can be seen in the number of pages collapsed::
+The khugepaged progress can be seen in the number of pages collapsed (note
+that this counter may not be an exact count of the number of pages
+collapsed, since "collapsed" could mean multiple things: (1) A PTE mapping
+being replaced by a PMD mapping, or (2) All 4K physical pages replaced by
+one 2M hugepage. Each may happen independently, or together, depending on
+the type of memory and the failures that occur. As such, this value should
+be interpreted roughly as a sign of progress, and counters in /proc/vmstat
+consulted for more accurate accounting)::
/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
@@ -220,46 +329,126 @@ memory. A lower value can prevent THPs from being
collapsed, resulting fewer pages being collapsed into
THPs, and lower memory access performance.
-Boot parameter
-==============
-
-You can change the sysfs boot time defaults of Transparent Hugepage
-Support by passing the parameter ``transparent_hugepage=always`` or
-``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
-to the kernel command line.
+``max_ptes_shared`` specifies how many pages can be shared across multiple
+processes. khugepaged might treat pages of THPs as shared if any page of
+that THP is shared. Exceeding the number would block the collapse::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared
+
+A higher value may increase memory footprint for some workloads.
+
+Boot parameters
+===============
+
+You can change the sysfs boot time default for the top-level "enabled"
+control by passing the parameter ``transparent_hugepage=always`` or
+``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
+kernel command line.
+
+Alternatively, each supported anonymous THP size can be controlled by
+passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
+where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
+supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
+``never`` or ``inherit``.
+
+For example, the following will set 16K, 32K, 64K THP to ``always``,
+set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
+to ``never``::
+
+ thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
+
+``thp_anon=`` may be specified multiple times to configure all THP sizes as
+required. If ``thp_anon=`` is specified at least once, any anon THP sizes
+not explicitly configured on the command line are implicitly set to
+``never``.
+
+``transparent_hugepage`` setting only affects the global toggle. If
+``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
+However, if a valid ``thp_anon`` setting is provided by the user, the
+PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
+is not defined within a valid ``thp_anon``, its policy will default to
+``never``.
+
+Similarly to ``transparent_hugepage``, you can control the hugepage
+allocation policy for the internal shmem mount by using the kernel parameter
+``transparent_hugepage_shmem=<policy>``, where ``<policy>`` is one of the
+seven valid policies for shmem (``always``, ``within_size``, ``advise``,
+``never``, ``deny``, and ``force``).
+
+Similarly to ``transparent_hugepage_shmem``, you can control the default
+hugepage allocation policy for the tmpfs mount by using the kernel parameter
+``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
+four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
+``never``). The tmpfs mount default policy is ``never``.
+
+In the same manner as ``thp_anon`` controls each supported anonymous THP
+size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
+has the same format as ``thp_anon``, but also supports the policy
+``within_size``.
+
+``thp_shmem=`` may be specified multiple times to configure all THP sizes
+as required. If ``thp_shmem=`` is specified at least once, any shmem THP
+sizes not explicitly configured on the command line are implicitly set to
+``never``.
+
+``transparent_hugepage_shmem`` setting only affects the global toggle. If
+``thp_shmem`` is not specified, PMD_ORDER hugepage will default to
+``inherit``. However, if a valid ``thp_shmem`` setting is provided by the
+user, the PMD_ORDER hugepage policy will be overridden. If the policy for
+PMD_ORDER is not defined within a valid ``thp_shmem``, its policy will
+default to ``never``.
Hugepages in tmpfs/shmem
========================
-You can control hugepage allocation policy in tmpfs with mount option
-``huge=``. It can have following values:
+Traditionally, tmpfs only supported a single huge page size ("PMD"). Today,
+it also supports smaller sizes just like anonymous memory, often referred
+to as "multi-size THP" (mTHP). Huge pages of any size are commonly
+represented in the kernel as "large folios".
+
+While there is fine control over the huge page sizes to use for the internal
+shmem mount (see below), ordinary tmpfs mounts will make use of all available
+huge page sizes without any control over the exact sizes, behaving more like
+other file systems.
+
+tmpfs mounts
+------------
+
+The THP allocation policy for tmpfs mounts can be adjusted using the mount
+option: ``huge=``. It can have following values:
always
Attempt to allocate huge pages every time we need a new page;
+ Always try PMD-sized huge pages first, and fall back to smaller-sized
+ huge pages if the PMD-sized huge page allocation fails;
never
- Do not allocate huge pages;
+ Do not allocate huge pages. Note that ``madvise(..., MADV_COLLAPSE)``
+ can still cause transparent huge pages to be obtained even if this mode
+ is specified everywhere;
within_size
- Only allocate huge page if it will be fully within i_size.
- Also respect fadvise()/madvise() hints;
+ Only allocate huge page if it will be fully within i_size;
+ Always try PMD-sized huge pages first, and fall back to smaller-sized
+ huge pages if the PMD-sized huge page allocation fails;
+ Also respect madvise() hints;
advise
- Only allocate huge pages if requested with fadvise()/madvise();
+ Only allocate huge pages if requested with madvise();
+
+Remember, that the kernel may use huge pages of all available sizes, and
+that no fine control as for the internal tmpfs mount is available.
-The default policy is ``never``.
+The default policy in the past was ``never``, but it can now be adjusted
+using the kernel parameter ``transparent_hugepage_tmpfs=<policy>``.
``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
``huge=never`` will not attempt to break up huge pages at all, just stop more
from being allocated.
-There's also sysfs knob to control hugepage allocation policy for internal
-shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
-is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
-MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
-
-In addition to policies listed above, shmem_enabled allows two further
-values:
+In addition to policies listed above, the sysfs knob
+/sys/kernel/mm/transparent_hugepage/shmem_enabled will affect the
+allocation policy of tmpfs mounts, when set to the following values:
deny
For use in emergencies, to force the huge option off from
@@ -267,27 +456,68 @@ deny
force
Force the huge option on for all - very useful for testing;
+shmem / internal tmpfs
+----------------------
+The mount internal tmpfs mount is used for SysV SHM, memfds, shared anonymous
+mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
+
+To control the THP allocation policy for this internal tmpfs mount, the
+sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled and the knobs
+per THP size in
+'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled'
+can be used.
+
+The global knob has the same semantics as the ``huge=`` mount options
+for tmpfs mounts, except that the different huge page sizes can be controlled
+individually, and will only use the setting of the global knob when the
+per-size knob is set to 'inherit'.
+
+The options 'force' and 'deny' are dropped for the individual sizes, which
+are rather testing artifacts from the old ages.
+
+always
+ Attempt to allocate <size> huge pages every time we need a new page;
+
+inherit
+ Inherit the top-level "shmem_enabled" value. By default, PMD-sized hugepages
+ have enabled="inherit" and all other hugepage sizes have enabled="never";
+
+never
+ Do not allocate <size> huge pages. Note that ``madvise(...,
+ MADV_COLLAPSE)`` can still cause transparent huge pages to be obtained
+ even if this mode is specified everywhere;
+
+within_size
+ Only allocate <size> huge page if it will be fully within i_size.
+ Also respect madvise() hints;
+
+advise
+ Only allocate <size> huge pages if requested with madvise();
+
Need of application restart
===========================
-The transparent_hugepage/enabled values and tmpfs mount option only affect
-future behavior. So to make them effective you need to restart any
-application that could have been using hugepages. This also applies to the
-regions registered in khugepaged.
+The transparent_hugepage/enabled and
+transparent_hugepage/hugepages-<size>kB/enabled values and tmpfs mount
+option only affect future behavior. So to make them effective you need
+to restart any application that could have been using hugepages. This
+also applies to the regions registered in khugepaged.
Monitoring usage
================
-The number of anonymous transparent huge pages currently used by the
+The number of PMD-sized anonymous transparent huge pages currently used by the
system is available by reading the AnonHugePages field in ``/proc/meminfo``.
-To identify what applications are using anonymous transparent huge pages,
-it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages fields
-for each mapping.
+To identify what applications are using PMD-sized anonymous transparent huge
+pages, it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages
+fields for each mapping. (Note that AnonHugePages only applies to traditional
+PMD-sized THP for historical reasons and should have been called
+AnonHugePmdMapped).
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
To identify what applications are mapping file transparent huge pages, it
-is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
+is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
for each mapping.
Note that reading the smaps file is expensive and reading it
@@ -298,8 +528,7 @@ monitor how successfully the system is providing huge pages for use.
thp_fault_alloc
is incremented every time a huge page is successfully
- allocated to handle a page fault. This applies to both the
- first time a page is faulted and for COW faults.
+ allocated and charged to handle a page fault.
thp_collapse_alloc
is incremented by khugepaged when it has found
@@ -307,20 +536,37 @@ thp_collapse_alloc
successfully allocated a new huge page to store the data.
thp_fault_fallback
- is incremented if a page fault fails to allocate
+ is incremented if a page fault fails to allocate or charge
a huge page and instead falls back to using small pages.
+thp_fault_fallback_charge
+ is incremented if a page fault fails to charge a huge page and
+ instead falls back to using small pages even though the
+ allocation was successful.
+
thp_collapse_alloc_failed
is incremented if khugepaged found a range
of pages that should be collapsed into one huge page but failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
+
+thp_file_fallback
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
+
+thp_file_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
+ falls back to using small pages even though the allocation was
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -339,6 +585,12 @@ thp_deferred_split_page
splitting it would free up some memory. Pages on split queue are
going to be split under memory pressure.
+thp_underused_split_page
+ is incremented when a huge page on the split queue was split
+ because it was underused. A THP is underused if the number of
+ zero pages in the THP is above a certain threshold
+ (/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
+
thp_split_pmd
is incremented every time a PMD split into table of PTEs.
This can happen, for instance, when application calls mprotect() or
@@ -346,10 +598,9 @@ thp_split_pmd
page table entry.
thp_zero_page_alloc
- is incremented every time a huge zero page is
- successfully allocated. It includes allocations which where
- dropped due race with other allocation. Note, it doesn't count
- every map of the huge zero page, only its allocation.
+ is incremented every time a huge zero page used for thp is
+ successfully allocated. Note, it doesn't count every map of
+ the huge zero page, only its allocation.
thp_zero_page_alloc_failed
is incremented if kernel fails to allocate
@@ -364,6 +615,92 @@ thp_swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
+In /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats, There are
+also individual counters for each huge page size, which can be utilized to
+monitor the system's effectiveness in providing huge pages for usage. Each
+counter has its own corresponding file.
+
+anon_fault_alloc
+ is incremented every time a huge page is successfully
+ allocated and charged to handle a page fault.
+
+anon_fault_fallback
+ is incremented if a page fault fails to allocate or charge
+ a huge page and instead falls back to using huge pages with
+ lower orders or small pages.
+
+anon_fault_fallback_charge
+ is incremented if a page fault fails to charge a huge page and
+ instead falls back to using huge pages with lower orders or
+ small pages even though the allocation was successful.
+
+zswpout
+ is incremented every time a huge page is swapped out to zswap in one
+ piece without splitting.
+
+swpin
+ is incremented every time a huge page is swapped in from a non-zswap
+ swap device in one piece.
+
+swpin_fallback
+ is incremented if swapin fails to allocate or charge a huge page
+ and instead falls back to using huge pages with lower orders or
+ small pages.
+
+swpin_fallback_charge
+ is incremented if swapin fails to charge a huge page and instead
+ falls back to using huge pages with lower orders or small pages
+ even though the allocation was successful.
+
+swpout
+ is incremented every time a huge page is swapped out to a non-zswap
+ swap device in one piece without splitting.
+
+swpout_fallback
+ is incremented if a huge page has to be split before swapout.
+ Usually because failed to allocate some continuous swap space
+ for the huge page.
+
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
+ allocated.
+
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages.
+
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
+ falls back to using small pages even though the allocation was
+ successful.
+
+split
+ is incremented every time a huge page is successfully split into
+ smaller orders. This can happen for a variety of reasons but a
+ common reason is that a huge page is old and is being reclaimed.
+
+split_failed
+ is incremented if kernel fails to split huge
+ page. This can happen if the page was pinned by somebody.
+
+split_deferred
+ is incremented when a huge page is put onto split queue.
+ This happens when a huge page is partially unmapped and splitting
+ it would free up some memory. Pages on split queue are going to
+ be split under memory pressure, if splitting is possible.
+
+nr_anon
+ the number of anonymous THP we have in the whole system. These THPs
+ might be currently entirely mapped or have partially unmapped/unused
+ subpages.
+
+nr_anon_partially_mapped
+ the number of anonymous THP which are likely partially mapped, possibly
+ wasting memory, and have been queued for deferred memory reclamation.
+ Note that in corner some cases (e.g., failed migration), we might detect
+ an anonymous THP as "partially mapped" and count it here, even though it
+ is not actually partially mapped anymore.
+
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
@@ -381,30 +718,15 @@ compact_fail
is incremented if the system tries to compact memory
but failed.
-compact_pages_moved
- is incremented each time a page is moved. If
- this value is increasing rapidly, it implies that the system
- is copying a lot of data to satisfy the huge page allocation.
- It is possible that the cost of copying exceeds any savings
- from reduced TLB misses.
-
-compact_pagemigrate_failed
- is incremented when the underlying mechanism
- for moving a page failed.
-
-compact_blocks_moved
- is incremented each time memory compaction examines
- a huge page aligned range of pages.
-
It is possible to establish how long the stalls were using the function
-tracer to record how long was spent in __alloc_pages_nodemask and
+tracer to record how long was spent in __alloc_pages() and
using the mm_page_alloc tracepoint to identify which allocations were
for huge pages.
Optimizing the applications
===========================
-To be guaranteed that the kernel will map a 2M page immediately in any
+To be guaranteed that the kernel will map a THP immediately in any
memory region, the mmap region has to be hugepage naturally
aligned. posix_memalign() can provide that guarantee.
diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index 5048cf661a8a..e5cc8848dcb3 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -1,5 +1,3 @@
-.. _userfaultfd:
-
===========
Userfaultfd
===========
@@ -12,114 +10,305 @@ and more generally they allow userland to take control of various
memory page faults, something otherwise only the kernel code could do.
For example userfaults allows a proper and more optimal implementation
-of the PROT_NONE+SIGSEGV trick.
+of the ``PROT_NONE+SIGSEGV`` trick.
Design
======
-Userfaults are delivered and resolved through the userfaultfd syscall.
+Userspace creates a new userfaultfd, initializes it, and registers one or more
+regions of virtual memory with it. Then, any page faults which occur within the
+region(s) result in a message being delivered to the userfaultfd, notifying
+userspace of the fault.
-The userfaultfd (aside from registering and unregistering virtual
+The ``userfaultfd`` (aside from registering and unregistering virtual
memory ranges) provides two primary functionalities:
-1) read/POLLIN protocol to notify a userland thread of the faults
+1) ``read/POLLIN`` protocol to notify a userland thread of the faults
happening
-2) various UFFDIO_* ioctls that can manage the virtual memory regions
- registered in the userfaultfd that allows userland to efficiently
+2) various ``UFFDIO_*`` ioctls that can manage the virtual memory regions
+ registered in the ``userfaultfd`` that allows userland to efficiently
resolve the userfaults it receives via 1) or to manage the virtual
memory in the background
The real advantage of userfaults if compared to regular virtual memory
management of mremap/mprotect is that the userfaults in all their
operations never involve heavyweight structures like vmas (in fact the
-userfaultfd runtime load never takes the mmap_sem for writing).
-
+``userfaultfd`` runtime load never takes the mmap_lock for writing).
Vmas are not suitable for page- (or hugepage) granular fault tracking
when dealing with virtual address spaces that could span
Terabytes. Too many vmas would be needed for that.
-The userfaultfd once opened by invoking the syscall, can also be
+The ``userfaultfd``, once created, can also be
passed using unix domain sockets to a manager process, so the same
manager process could handle the userfaults of a multitude of
different processes without them being aware about what is going on
-(well of course unless they later try to use the userfaultfd
+(well of course unless they later try to use the ``userfaultfd``
themselves on the same region the manager is already tracking, which
-is a corner case that would currently return -EBUSY).
+is a corner case that would currently return ``-EBUSY``).
API
===
-When first opened the userfaultfd must be enabled invoking the
-UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
-a later API version) which will specify the read/POLLIN protocol
-userland intends to speak on the UFFD and the uffdio_api.features
-userland requires. The UFFDIO_API ioctl if successful (i.e. if the
-requested uffdio_api.api is spoken also by the running kernel and the
+Creating a userfaultfd
+----------------------
+
+There are two ways to create a new userfaultfd, each of which provide ways to
+restrict access to this functionality (since historically userfaultfds which
+handle kernel page faults have been a useful tool for exploiting the kernel).
+
+The first way, supported since userfaultfd was introduced, is the
+userfaultfd(2) syscall. Access to this is controlled in several ways:
+
+- Any user can always create a userfaultfd which traps userspace page faults
+ only. Such a userfaultfd can be created using the userfaultfd(2) syscall
+ with the flag UFFD_USER_MODE_ONLY.
+
+- In order to also trap kernel page faults for the address space, either the
+ process needs the CAP_SYS_PTRACE capability, or the system must have
+ vm.unprivileged_userfaultfd set to 1. By default, vm.unprivileged_userfaultfd
+ is set to 0.
+
+The second way, added to the kernel more recently, is by opening
+/dev/userfaultfd and issuing a USERFAULTFD_IOC_NEW ioctl to it. This method
+yields equivalent userfaultfds to the userfaultfd(2) syscall.
+
+Unlike userfaultfd(2), access to /dev/userfaultfd is controlled via normal
+filesystem permissions (user/group/mode), which gives fine grained access to
+userfaultfd specifically, without also granting other unrelated privileges at
+the same time (as e.g. granting CAP_SYS_PTRACE would do). Users who have access
+to /dev/userfaultfd can always create userfaultfds that trap kernel page faults;
+vm.unprivileged_userfaultfd is not considered.
+
+Initializing a userfaultfd
+--------------------------
+
+When first opened the ``userfaultfd`` must be enabled invoking the
+``UFFDIO_API`` ioctl specifying a ``uffdio_api.api`` value set to ``UFFD_API`` (or
+a later API version) which will specify the ``read/POLLIN`` protocol
+userland intends to speak on the ``UFFD`` and the ``uffdio_api.features``
+userland requires. The ``UFFDIO_API`` ioctl if successful (i.e. if the
+requested ``uffdio_api.api`` is spoken also by the running kernel and the
requested features are going to be enabled) will return into
-uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
+``uffdio_api.features`` and ``uffdio_api.ioctls`` two 64bit bitmasks of
respectively all the available features of the read(2) protocol and
the generic ioctl available.
-The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
-defines what memory types are supported by the userfaultfd and what
-events, except page fault notifications, may be generated.
-
-If the kernel supports registering userfaultfd ranges on hugetlbfs
-virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
-uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
-set if the kernel supports registering userfaultfd ranges on shared
-memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
-MAP_SHARED, memfd_create, etc).
-
-The userland application that wants to use userfaultfd with hugetlbfs
-or shared memory need to set the corresponding flag in
-uffdio_api.features to enable those features.
-
-If the userland desires to receive notifications for events other than
-page faults, it has to verify that uffdio_api.features has appropriate
-UFFD_FEATURE_EVENT_* bits set. These events are described in more
-detail below in "Non-cooperative userfaultfd" section.
-
-Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
-be invoked (if present in the returned uffdio_api.ioctls bitmask) to
-register a memory range in the userfaultfd by setting the
-uffdio_register structure accordingly. The uffdio_register.mode
+The ``uffdio_api.features`` bitmask returned by the ``UFFDIO_API`` ioctl
+defines what memory types are supported by the ``userfaultfd`` and what
+events, except page fault notifications, may be generated:
+
+- The ``UFFD_FEATURE_EVENT_*`` flags indicate that various other events
+ other than page faults are supported. These events are described in more
+ detail below in the `Non-cooperative userfaultfd`_ section.
+
+- ``UFFD_FEATURE_MISSING_HUGETLBFS`` and ``UFFD_FEATURE_MISSING_SHMEM``
+ indicate that the kernel supports ``UFFDIO_REGISTER_MODE_MISSING``
+ registrations for hugetlbfs and shared memory (covering all shmem APIs,
+ i.e. tmpfs, ``IPCSHM``, ``/dev/zero``, ``MAP_SHARED``, ``memfd_create``,
+ etc) virtual memory areas, respectively.
+
+- ``UFFD_FEATURE_MINOR_HUGETLBFS`` indicates that the kernel supports
+ ``UFFDIO_REGISTER_MODE_MINOR`` registration for hugetlbfs virtual memory
+ areas. ``UFFD_FEATURE_MINOR_SHMEM`` is the analogous feature indicating
+ support for shmem virtual memory areas.
+
+- ``UFFD_FEATURE_MOVE`` indicates that the kernel supports moving an
+ existing page contents from userspace.
+
+The userland application should set the feature flags it intends to use
+when invoking the ``UFFDIO_API`` ioctl, to request that those features be
+enabled if supported.
+
+Once the ``userfaultfd`` API has been enabled the ``UFFDIO_REGISTER``
+ioctl should be invoked (if present in the returned ``uffdio_api.ioctls``
+bitmask) to register a memory range in the ``userfaultfd`` by setting the
+uffdio_register structure accordingly. The ``uffdio_register.mode``
bitmask will specify to the kernel which kind of faults to track for
-the range (UFFDIO_REGISTER_MODE_MISSING would track missing
-pages). The UFFDIO_REGISTER ioctl will return the
-uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
+the range. The ``UFFDIO_REGISTER`` ioctl will return the
+``uffdio_register.ioctls`` bitmask of ioctls that are suitable to resolve
userfaults on the range registered. Not all ioctls will necessarily be
-supported for all memory types depending on the underlying virtual
-memory backend (anonymous memory vs tmpfs vs real filebacked
-mappings).
+supported for all memory types (e.g. anonymous memory vs. shmem vs.
+hugetlbfs), or all types of intercepted faults.
-Userland can use the uffdio_register.ioctls to manage the virtual
+Userland can use the ``uffdio_register.ioctls`` to manage the virtual
address space in the background (to add or potentially also remove
-memory from the userfaultfd registered range). This means a userfault
+memory from the ``userfaultfd`` registered range). This means a userfault
could be triggering just before userland maps in the background the
user-faulted page.
-The primary ioctl to resolve userfaults is UFFDIO_COPY. That
-atomically copies a page into the userfault registered range and wakes
-up the blocked userfaults (unless uffdio_copy.mode &
-UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
-UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
-half copied page since it'll keep userfaulting until the copy has
-finished.
+Resolving Userfaults
+--------------------
+
+There are three basic ways to resolve userfaults:
+
+- ``UFFDIO_COPY`` atomically copies some existing page contents from
+ userspace.
+
+- ``UFFDIO_ZEROPAGE`` atomically zeros the new page.
+
+- ``UFFDIO_CONTINUE`` maps an existing, previously-populated page.
+
+These operations are atomic in the sense that they guarantee nothing can
+see a half-populated page, since readers will keep userfaulting until the
+operation has finished.
+
+By default, these wake up userfaults blocked on the range in question.
+They support a ``UFFDIO_*_MODE_DONTWAKE`` ``mode`` flag, which indicates
+that waking will be done separately at some later time.
+
+Which ioctl to choose depends on the kind of page fault, and what we'd
+like to do to resolve it:
+
+- For ``UFFDIO_REGISTER_MODE_MISSING`` faults, the fault needs to be
+ resolved by either providing a new page (``UFFDIO_COPY``), or mapping
+ the zero page (``UFFDIO_ZEROPAGE``). By default, the kernel would map
+ the zero page for a missing fault. With userfaultfd, userspace can
+ decide what content to provide before the faulting thread continues.
+
+- For ``UFFDIO_REGISTER_MODE_MINOR`` faults, there is an existing page (in
+ the page cache). Userspace has the option of modifying the page's
+ contents before resolving the fault. Once the contents are correct
+ (modified or not), userspace asks the kernel to map the page and let the
+ faulting thread continue with ``UFFDIO_CONTINUE``.
+
+Notes:
+
+- You can tell which kind of fault occurred by examining
+ ``pagefault.flags`` within the ``uffd_msg``, checking for the
+ ``UFFD_PAGEFAULT_FLAG_*`` flags.
+
+- None of the page-delivering ioctls default to the range that you
+ registered with. You must fill in all fields for the appropriate
+ ioctl struct including the range.
+
+- You get the address of the access that triggered the missing page
+ event out of a struct uffd_msg that you read in the thread from the
+ uffd. You can supply as many pages as you want with these IOCTLs.
+ Keep in mind that unless you used DONTWAKE then the first of any of
+ those IOCTLs wakes up the faulting thread.
+
+- Be sure to test for all errors including
+ (``pollfd[0].revents & POLLERR``). This can happen, e.g. when ranges
+ supplied were incorrect.
+
+Write Protect Notifications
+---------------------------
+
+This is equivalent to (but faster than) using mprotect and a SIGSEGV
+signal handler.
+
+Firstly you need to register a range with ``UFFDIO_REGISTER_MODE_WP``.
+Instead of using mprotect(2) you use
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+while ``mode = UFFDIO_WRITEPROTECT_MODE_WP``
+in the struct passed in. The range does not default to and does not
+have to be identical to the range you registered with. You can write
+protect as many ranges as you like (inside the registered range).
+Then, in the thread reading from uffd the struct will have
+``msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP`` set. Now you send
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+again while ``pagefault.mode`` does not have ``UFFDIO_WRITEPROTECT_MODE_WP``
+set. This wakes up the thread which will continue to run with writes. This
+allows you to do the bookkeeping about the write in the uffd reading
+thread before the ioctl.
+
+If you registered with both ``UFFDIO_REGISTER_MODE_MISSING`` and
+``UFFDIO_REGISTER_MODE_WP`` then you need to think about the sequence in
+which you supply a page and undo write protect. Note that there is a
+difference between writes into a WP area and into a !WP area. The
+former will have ``UFFD_PAGEFAULT_FLAG_WP`` set, the latter
+``UFFD_PAGEFAULT_FLAG_WRITE``. The latter did not fail on protection but
+you still need to supply a page when ``UFFDIO_REGISTER_MODE_MISSING`` was
+used.
+
+Userfaultfd write-protect mode currently behave differently on none ptes
+(when e.g. page is missing) over different types of memories.
+
+For anonymous memory, ``ioctl(UFFDIO_WRITEPROTECT)`` will ignore none ptes
+(e.g. when pages are missing and not populated). For file-backed memories
+like shmem and hugetlbfs, none ptes will be write protected just like a
+present pte. In other words, there will be a userfaultfd write fault
+message generated when writing to a missing page on file typed memories,
+as long as the page range was write-protected before. Such a message will
+not be generated on anonymous memories by default.
+
+If the application wants to be able to write protect none ptes on anonymous
+memory, one can pre-populate the memory with e.g. MADV_POPULATE_READ. On
+newer kernels, one can also detect the feature UFFD_FEATURE_WP_UNPOPULATED
+and set the feature bit in advance to make sure none ptes will also be
+write protected even upon anonymous memory.
+
+When using ``UFFDIO_REGISTER_MODE_WP`` in combination with either
+``UFFDIO_REGISTER_MODE_MISSING`` or ``UFFDIO_REGISTER_MODE_MINOR``, when
+resolving missing / minor faults with ``UFFDIO_COPY`` or ``UFFDIO_CONTINUE``
+respectively, it may be desirable for the new page / mapping to be
+write-protected (so future writes will also result in a WP fault). These ioctls
+support a mode flag (``UFFDIO_COPY_MODE_WP`` or ``UFFDIO_CONTINUE_MODE_WP``
+respectively) to configure the mapping this way.
+
+If the userfaultfd context has ``UFFD_FEATURE_WP_ASYNC`` feature bit set,
+any vma registered with write-protection will work in async mode rather
+than the default sync mode.
+
+In async mode, there will be no message generated when a write operation
+happens, meanwhile the write-protection will be resolved automatically by
+the kernel. It can be seen as a more accurate version of soft-dirty
+tracking and it can be different in a few ways:
+
+ - The dirty result will not be affected by vma changes (e.g. vma
+ merging) because the dirty is only tracked by the pte.
+
+ - It supports range operations by default, so one can enable tracking on
+ any range of memory as long as page aligned.
+
+ - Dirty information will not get lost if the pte was zapped due to
+ various reasons (e.g. during split of a shmem transparent huge page).
+
+ - Due to a reverted meaning of soft-dirty (page clean when uffd-wp bit
+ set; dirty when uffd-wp bit cleared), it has different semantics on
+ some of the memory operations. For example: ``MADV_DONTNEED`` on
+ anonymous (or ``MADV_REMOVE`` on a file mapping) will be treated as
+ dirtying of memory by dropping uffd-wp bit during the procedure.
+
+The user app can collect the "written/dirty" status by looking up the
+uffd-wp bit for the pages being interested in /proc/pagemap.
+
+The page will not be under track of uffd-wp async mode until the page is
+explicitly write-protected by ``ioctl(UFFDIO_WRITEPROTECT)`` with the mode
+flag ``UFFDIO_WRITEPROTECT_MODE_WP`` set. Trying to resolve a page fault
+that was tracked by async mode userfaultfd-wp is invalid.
+
+When userfaultfd-wp async mode is used alone, it can be applied to all
+kinds of memory.
+
+Memory Poisioning Emulation
+---------------------------
+
+In response to a fault (either missing or minor), an action userspace can
+take to "resolve" it is to issue a ``UFFDIO_POISON``. This will cause any
+future faulters to either get a SIGBUS, or in KVM's case the guest will
+receive an MCE as if there were hardware memory poisoning.
+
+This is used to emulate hardware memory poisoning. Imagine a VM running on a
+machine which experiences a real hardware memory error. Later, we live migrate
+the VM to another physical machine. Since we want the migration to be
+transparent to the guest, we want that same address range to act as if it was
+still poisoned, even though it's on a new physical host which ostensibly
+doesn't have a memory error in the exact same spot.
QEMU/KVM
========
-QEMU/KVM is using the userfaultfd syscall to implement postcopy live
+QEMU/KVM is using the ``userfaultfd`` syscall to implement postcopy live
migration. Postcopy live migration is one form of memory
externalization consisting of a virtual machine running with part or
all of its memory residing on a different node in the cloud. The
-userfaultfd abstraction is generic enough that not a single line of
+``userfaultfd`` abstraction is generic enough that not a single line of
KVM kernel code had to be modified in order to add postcopy live
migration to QEMU.
-Guest async page faults, FOLL_NOWAIT and all other GUP features work
+Guest async page faults, ``FOLL_NOWAIT`` and all other ``GUP*`` features work
just fine in combination with userfaults. Userfaults trigger async
page faults in the guest scheduler so those guest processes that
aren't waiting for userfaults (i.e. network bound) can keep running in
@@ -132,19 +321,19 @@ generating userfaults for readonly guest regions.
The implementation of postcopy live migration currently uses one
single bidirectional socket but in the future two different sockets
will be used (to reduce the latency of the userfaults to the minimum
-possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
+possible without having to decrease ``/proc/sys/net/ipv4/tcp_wmem``).
The QEMU in the source node writes all pages that it knows are missing
in the destination node, into the socket, and the migration thread of
-the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
-ioctls on the userfaultfd in order to map the received pages into the
-guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
+the QEMU running in the destination node runs ``UFFDIO_COPY|ZEROPAGE``
+ioctls on the ``userfaultfd`` in order to map the received pages into the
+guest (``UFFDIO_ZEROCOPY`` is used if the source page was a zero page).
A different postcopy thread in the destination node listens with
-poll() to the userfaultfd in parallel. When a POLLIN event is
+poll() to the ``userfaultfd`` in parallel. When a ``POLLIN`` event is
generated after a userfault triggers, the postcopy thread read() from
-the userfaultfd and receives the fault address (or -EAGAIN in case the
-userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
+the ``userfaultfd`` and receives the fault address (or ``-EAGAIN`` in case the
+userfault was already resolved and waken by a ``UFFDIO_COPY|ZEROPAGE`` run
by the parallel QEMU migration thread).
After the QEMU postcopy thread (running in the destination node) gets
@@ -155,7 +344,7 @@ remaining missing pages from that new page offset. Soon after that
(just the time to flush the tcp_wmem queue through the network) the
migration thread in the QEMU running in the destination node will
receive the page that triggered the userfault and it'll map it as
-usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
+usual with the ``UFFDIO_COPY|ZEROPAGE`` (without actually knowing if it
was spontaneously sent by the source or if it was an urgent page
requested through a userfault).
@@ -168,74 +357,74 @@ checked to find which missing pages to send in round robin and we seek
over it when receiving incoming userfaults. After sending each page of
course the bitmap is updated accordingly. It's also useful to avoid
sending the same page twice (in case the userfault is read by the
-postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
+postcopy thread just before ``UFFDIO_COPY|ZEROPAGE`` runs in the migration
thread).
Non-cooperative userfaultfd
===========================
-When the userfaultfd is monitored by an external manager, the manager
+When the ``userfaultfd`` is monitored by an external manager, the manager
must be able to track changes in the process virtual memory
layout. Userfaultfd can notify the manager about such changes using
the same read(2) protocol as for the page fault notifications. The
manager has to explicitly enable these events by setting appropriate
-bits in uffdio_api.features passed to UFFDIO_API ioctl:
+bits in ``uffdio_api.features`` passed to ``UFFDIO_API`` ioctl:
-UFFD_FEATURE_EVENT_FORK
- enable userfaultfd hooks for fork(). When this feature is
- enabled, the userfaultfd context of the parent process is
+``UFFD_FEATURE_EVENT_FORK``
+ enable ``userfaultfd`` hooks for fork(). When this feature is
+ enabled, the ``userfaultfd`` context of the parent process is
duplicated into the newly created process. The manager
- receives UFFD_EVENT_FORK with file descriptor of the new
- userfaultfd context in the uffd_msg.fork.
+ receives ``UFFD_EVENT_FORK`` with file descriptor of the new
+ ``userfaultfd`` context in the ``uffd_msg.fork``.
-UFFD_FEATURE_EVENT_REMAP
+``UFFD_FEATURE_EVENT_REMAP``
enable notifications about mremap() calls. When the
non-cooperative process moves a virtual memory area to a
different location, the manager will receive
- UFFD_EVENT_REMAP. The uffd_msg.remap will contain the old and
+ ``UFFD_EVENT_REMAP``. The ``uffd_msg.remap`` will contain the old and
new addresses of the area and its original length.
-UFFD_FEATURE_EVENT_REMOVE
+``UFFD_FEATURE_EVENT_REMOVE``
enable notifications about madvise(MADV_REMOVE) and
- madvise(MADV_DONTNEED) calls. The event UFFD_EVENT_REMOVE will
- be generated upon these calls to madvise. The uffd_msg.remove
+ madvise(MADV_DONTNEED) calls. The event ``UFFD_EVENT_REMOVE`` will
+ be generated upon these calls to madvise(). The ``uffd_msg.remove``
will contain start and end addresses of the removed area.
-UFFD_FEATURE_EVENT_UNMAP
+``UFFD_FEATURE_EVENT_UNMAP``
enable notifications about memory unmapping. The manager will
- get UFFD_EVENT_UNMAP with uffd_msg.remove containing start and
+ get ``UFFD_EVENT_UNMAP`` with ``uffd_msg.remove`` containing start and
end addresses of the unmapped area.
-Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
+Although the ``UFFD_FEATURE_EVENT_REMOVE`` and ``UFFD_FEATURE_EVENT_UNMAP``
are pretty similar, they quite differ in the action expected from the
-userfaultfd manager. In the former case, the virtual memory is
+``userfaultfd`` manager. In the former case, the virtual memory is
removed, but the area is not, the area remains monitored by the
-userfaultfd, and if a page fault occurs in that area it will be
+``userfaultfd``, and if a page fault occurs in that area it will be
delivered to the manager. The proper resolution for such page fault is
to zeromap the faulting address. However, in the latter case, when an
area is unmapped, either explicitly (with munmap() system call), or
implicitly (e.g. during mremap()), the area is removed and in turn the
-userfaultfd context for such area disappears too and the manager will
+``userfaultfd`` context for such area disappears too and the manager will
not get further userland page faults from the removed area. Still, the
notification is required in order to prevent manager from using
-UFFDIO_COPY on the unmapped area.
+``UFFDIO_COPY`` on the unmapped area.
Unlike userland page faults which have to be synchronous and require
explicit or implicit wakeup, all the events are delivered
asynchronously and the non-cooperative process resumes execution as
-soon as manager executes read(). The userfaultfd manager should
-carefully synchronize calls to UFFDIO_COPY with the events
-processing. To aid the synchronization, the UFFDIO_COPY ioctl will
-return -ENOSPC when the monitored process exits at the time of
-UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
-its virtual memory layout simultaneously with outstanding UFFDIO_COPY
+soon as manager executes read(). The ``userfaultfd`` manager should
+carefully synchronize calls to ``UFFDIO_COPY`` with the events
+processing. To aid the synchronization, the ``UFFDIO_COPY`` ioctl will
+return ``-ENOSPC`` when the monitored process exits at the time of
+``UFFDIO_COPY``, and ``-ENOENT``, when the non-cooperative process has changed
+its virtual memory layout simultaneously with outstanding ``UFFDIO_COPY``
operation.
The current asynchronous model of the event delivery is optimal for
-single threaded non-cooperative userfaultfd manager implementations. A
+single threaded non-cooperative ``userfaultfd`` manager implementations. A
synchronous event delivery model can be added later as a new
-userfaultfd feature to facilitate multithreading enhancements of the
-non cooperative manager, for example to allow UFFDIO_COPY ioctls to
+``userfaultfd`` feature to facilitate multithreading enhancements of the
+non cooperative manager, for example to allow ``UFFDIO_COPY`` ioctls to
run in parallel to the event reception. Single threaded
implementations should continue to use the current async event
delivery model instead.
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
new file mode 100644
index 000000000000..283d77217c6f
--- /dev/null
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -0,0 +1,138 @@
+=====
+zswap
+=====
+
+Overview
+========
+
+Zswap is a lightweight compressed cache for swap pages. It takes pages that are
+in the process of being swapped out and attempts to compress them into a
+dynamically allocated RAM-based memory pool. zswap basically trades CPU cycles
+for potentially reduced swap I/O. This trade-off can also result in a
+significant performance improvement if reads from the compressed cache are
+faster than reads from a swap device.
+
+Some potential benefits:
+
+* Desktop/laptop users with limited RAM capacities can mitigate the
+ performance impact of swapping.
+* Overcommitted guests that share a common I/O resource can
+ dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
+ throttling by the hypervisor. This allows more work to get done with less
+ impact to the guest workload and guests sharing the I/O subsystem
+* Users with SSDs as swap devices can extend the life of the device by
+ drastically reducing life-shortening writes.
+
+Zswap evicts pages from compressed cache on an LRU basis to the backing swap
+device when the compressed pool reaches its size limit. This requirement had
+been identified in prior community discussions.
+
+Whether Zswap is enabled at the boot time depends on whether
+the ``CONFIG_ZSWAP_DEFAULT_ON`` Kconfig option is enabled or not.
+This setting can then be overridden by providing the kernel command line
+``zswap.enabled=`` option, for example ``zswap.enabled=0``.
+Zswap can also be enabled and disabled at runtime using the sysfs interface.
+An example command to enable zswap at runtime, assuming sysfs is mounted
+at ``/sys``, is::
+
+ echo 1 > /sys/module/zswap/parameters/enabled
+
+When zswap is disabled at runtime it will stop storing pages that are
+being swapped out. However, it will _not_ immediately write out or fault
+back into memory all of the pages stored in the compressed pool. The
+pages stored in zswap will remain in the compressed pool until they are
+either invalidated or faulted back into memory. In order to force all
+pages out of the compressed pool, a swapoff on the swap device(s) will
+fault back into memory all swapped out pages, including those in the
+compressed pool.
+
+Design
+======
+
+Zswap receives pages for compression from the swap subsystem and is able to
+evict pages from its own compressed pool on an LRU basis and write them back to
+the backing swap device in the case that the compressed pool is full.
+
+Zswap makes use of zsmalloc for the managing the compressed memory pool. Each
+allocation in zsmalloc is not directly accessible by address. Rather, a handle is
+returned by the allocation routine and that handle must be mapped before being
+accessed. The compressed memory pool grows on demand and shrinks as compressed
+pages are freed. The pool is not preallocated.
+
+When a swap page is passed from swapout to zswap, zswap maintains a mapping
+of the swap entry, a combination of the swap type and swap offset, to the
+zsmalloc handle that references that compressed swap page. This mapping is
+achieved with a red-black tree per swap type. The swap offset is the search
+key for the tree nodes.
+
+During a page fault on a PTE that is a swap entry, the swapin code calls the
+zswap load function to decompress the page into the page allocated by the page
+fault handler.
+
+Once there are no PTEs referencing a swap page stored in zswap (i.e. the count
+in the swap_map goes to 0) the swap code calls the zswap invalidate function
+to free the compressed entry.
+
+Zswap seeks to be simple in its policies. Sysfs attributes allow for one user
+controlled policy:
+
+* max_pool_percent - The maximum percentage of memory that the compressed
+ pool can occupy.
+
+The default compressor is selected in ``CONFIG_ZSWAP_COMPRESSOR_DEFAULT``
+Kconfig option, but it can be overridden at boot time by setting the
+``compressor`` attribute, e.g. ``zswap.compressor=lzo``.
+It can also be changed at runtime using the sysfs "compressor"
+attribute, e.g.::
+
+ echo lzo > /sys/module/zswap/parameters/compressor
+
+When the compressor parameter is changed at runtime, any existing compressed
+pages are not modified; they are left in their own pool. When a request is
+made for a page in an old pool, it is uncompressed using its original
+compressor. Once all pages are removed from an old pool, the pool and its
+compressor are freed.
+
+Some of the pages in zswap are same-value filled pages (i.e. contents of the
+page have same value or repetitive pattern). These pages include zero-filled
+pages and they are handled differently. During store operation, a page is
+checked if it is a same-value filled page before compressing it. If true, the
+compressed length of the page is set to zero and the pattern or same-filled
+value is stored.
+
+To prevent zswap from shrinking pool when zswap is full and there's a high
+pressure on swap (this will result in flipping pages in and out zswap pool
+without any real benefit but with a performance drop for the system), a
+special parameter has been introduced to implement a sort of hysteresis to
+refuse taking pages into zswap pool until it has sufficient space if the limit
+has been hit. To set the threshold at which zswap would start accepting pages
+again after it became full, use the sysfs ``accept_threshold_percent``
+attribute, e. g.::
+
+ echo 80 > /sys/module/zswap/parameters/accept_threshold_percent
+
+Setting this parameter to 100 will disable the hysteresis.
+
+Some users cannot tolerate the swapping that comes with zswap store failures
+and zswap writebacks. Swapping can be disabled entirely (without disabling
+zswap itself) on a cgroup-basis as follows::
+
+ echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
+
+Note that if the store failures are recurring (for e.g if the pages are
+incompressible), users can observe reclaim inefficiency after disabling
+writeback (because the same pages might be rejected again and again).
+
+When there is a sizable amount of cold memory residing in the zswap pool, it
+can be advantageous to proactively write these cold pages to swap and reclaim
+the memory for other use cases. By default, the zswap shrinker is disabled.
+User can enable it as follows::
+
+ echo Y > /sys/module/zswap/parameters/shrinker_enabled
+
+This can be enabled at the boot time if ``CONFIG_ZSWAP_SHRINKER_DEFAULT_ON`` is
+selected.
+
+A debugfs interface is provided for various statistic about pool size, number
+of pages stored, same-value filled pages and various counters for the reasons
+pages are rejected.
diff --git a/Documentation/admin-guide/module-signing.rst b/Documentation/admin-guide/module-signing.rst
index f8b584179cff..a8667a777490 100644
--- a/Documentation/admin-guide/module-signing.rst
+++ b/Documentation/admin-guide/module-signing.rst
@@ -28,10 +28,10 @@ trusted userspace bits.
This facility uses X.509 ITU-T standard certificates to encode the public keys
involved. The signatures are not themselves encoded in any industrial standard
-type. The facility currently only supports the RSA public key encryption
-standard (though it is pluggable and permits others to be used). The possible
-hash algorithms that can be used are SHA-1, SHA-224, SHA-256, SHA-384, and
-SHA-512 (the algorithm is selected by data in the signature).
+type. The built-in facility currently only supports the RSA & NIST P-384 ECDSA
+public key signing standard (though it is pluggable and permits others to be
+used). The possible hash algorithms that can be used are SHA-2 and SHA-3 of
+sizes 256, 384, and 512 (the algorithm is selected by data in the signature).
==========================
@@ -81,11 +81,12 @@ This has a number of options available:
sign the modules with:
=============================== ==========================================
- ``CONFIG_MODULE_SIG_SHA1`` :menuselection:`Sign modules with SHA-1`
- ``CONFIG_MODULE_SIG_SHA224`` :menuselection:`Sign modules with SHA-224`
``CONFIG_MODULE_SIG_SHA256`` :menuselection:`Sign modules with SHA-256`
``CONFIG_MODULE_SIG_SHA384`` :menuselection:`Sign modules with SHA-384`
``CONFIG_MODULE_SIG_SHA512`` :menuselection:`Sign modules with SHA-512`
+ ``CONFIG_MODULE_SIG_SHA3_256`` :menuselection:`Sign modules with SHA3-256`
+ ``CONFIG_MODULE_SIG_SHA3_384`` :menuselection:`Sign modules with SHA3-384`
+ ``CONFIG_MODULE_SIG_SHA3_512`` :menuselection:`Sign modules with SHA3-512`
=============================== ==========================================
The algorithm selected here will also be built into the kernel (rather
@@ -106,7 +107,7 @@ This has a number of options available:
certificate and a private key.
If the PEM file containing the private key is encrypted, or if the
- PKCS#11 token requries a PIN, this can be provided at build time by
+ PKCS#11 token requires a PIN, this can be provided at build time by
means of the ``KBUILD_SIGN_PIN`` variable.
@@ -145,6 +146,10 @@ into vmlinux) using parameters in the::
file (which is also generated if it does not already exist).
+One can select between RSA (``MODULE_SIG_KEY_TYPE_RSA``) and ECDSA
+(``MODULE_SIG_KEY_TYPE_ECDSA``) to generate either RSA 4k or NIST
+P-384 keypair.
+
It is strongly recommended that you provide your own x509.genkey file.
Most notably, in the x509.genkey file, the req_distinguished_name section
@@ -266,7 +271,7 @@ for which it has a public key. Otherwise, it will also load modules that are
unsigned. Any module for which the kernel has a key, but which proves to have
a signature mismatch will not be permitted to load.
-Any module that has an unparseable signature will be rejected.
+Any module that has an unparsable signature will be rejected.
=========================================
diff --git a/Documentation/admin-guide/mono.rst b/Documentation/admin-guide/mono.rst
index 59e6d59f0ed9..c6dab5680065 100644
--- a/Documentation/admin-guide/mono.rst
+++ b/Documentation/admin-guide/mono.rst
@@ -12,11 +12,11 @@ other program after you have done the following:
a binary package, a source tarball or by installing from Git. Binary
packages for several distributions can be found at:
- http://www.mono-project.com/download/
+ https://www.mono-project.com/download/
Instructions for compiling Mono can be found at:
- http://www.mono-project.com/docs/compiling-mono/linux/
+ https://www.mono-project.com/docs/compiling-mono/linux/
Once the Mono CLR support has been installed, just check that
``/usr/bin/mono`` (which could be located elsewhere, for example
diff --git a/Documentation/admin-guide/namespaces/compatibility-list.rst b/Documentation/admin-guide/namespaces/compatibility-list.rst
new file mode 100644
index 000000000000..318800b2a943
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/compatibility-list.rst
@@ -0,0 +1,43 @@
+=============================
+Namespaces compatibility list
+=============================
+
+This document contains the information about the problems user
+may have when creating tasks living in different namespaces.
+
+Here's the summary. This matrix shows the known problems, that
+occur when tasks share some namespace (the columns) while living
+in different other namespaces (the rows):
+
+==== === === === === ==== ===
+- UTS IPC VFS PID User Net
+==== === === === === ==== ===
+UTS X
+IPC X 1
+VFS X
+PID 1 1 X
+User 2 2 X
+Net X
+==== === === === === ==== ===
+
+1. Both the IPC and the PID namespaces provide IDs to address
+ object inside the kernel. E.g. semaphore with IPCID or
+ process group with pid.
+
+ In both cases, tasks shouldn't try exposing this ID to some
+ other task living in a different namespace via a shared filesystem
+ or IPC shmem/message. The fact is that this ID is only valid
+ within the namespace it was obtained in and may refer to some
+ other object in another namespace.
+
+2. Intentionally, two equal user IDs in different user namespaces
+ should not be equal from the VFS point of view. In other
+ words, user 10 in one user namespace shouldn't have the same
+ access permissions to files, belonging to user 10 in another
+ namespace.
+
+ The same is true for the IPC namespaces being shared - two users
+ from different user namespaces should not access the same IPC objects
+ even having equal UIDs.
+
+ But currently this is not so.
diff --git a/Documentation/admin-guide/namespaces/index.rst b/Documentation/admin-guide/namespaces/index.rst
new file mode 100644
index 000000000000..384f2e0f33d2
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Namespaces
+==========
+
+.. toctree::
+ :maxdepth: 1
+
+ compatibility-list
+ resource-control
diff --git a/Documentation/admin-guide/namespaces/resource-control.rst b/Documentation/admin-guide/namespaces/resource-control.rst
new file mode 100644
index 000000000000..553a44803231
--- /dev/null
+++ b/Documentation/admin-guide/namespaces/resource-control.rst
@@ -0,0 +1,18 @@
+====================================
+User namespaces and resource control
+====================================
+
+The kernel contains many kinds of objects that either don't have
+individual limits or that have limits which are ineffective when
+a set of processes is allowed to switch their UID. On a system
+where the admins don't trust their users or their users' programs,
+user namespaces expose the system to potential misuse of resources.
+
+In order to mitigate this, we recommend that admins enable memory
+control groups on any system that enables user namespaces.
+Furthermore, we recommend that admins configure the memory control
+groups to limit the maximum memory usable by any untrusted user.
+
+Memory control groups can be configured by installing the libcgroup
+package present on most distros editing /etc/cgrules.conf,
+/etc/cgconfig.conf and setting up libpam-cgroup.
diff --git a/Documentation/admin-guide/nfs/index.rst b/Documentation/admin-guide/nfs/index.rst
new file mode 100644
index 000000000000..3601a708f333
--- /dev/null
+++ b/Documentation/admin-guide/nfs/index.rst
@@ -0,0 +1,14 @@
+=============
+NFS
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ nfs-client
+ nfsroot
+ nfs-rdma
+ nfsd-admin-interfaces
+ nfs-idmapper
+ pnfs-block-server
+ pnfs-scsi-server
diff --git a/Documentation/admin-guide/nfs/nfs-client.rst b/Documentation/admin-guide/nfs/nfs-client.rst
new file mode 100644
index 000000000000..36760685dd34
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-client.rst
@@ -0,0 +1,144 @@
+==========
+NFS Client
+==========
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+special features of the NFS client that can be configured by system
+administrators.
+
+
+The nfs4_unique_id parameter
+============================
+
+NFSv4 requires clients to identify themselves to servers with a unique
+string. File open and lock state shared between one client and one server
+is associated with this identity. To support robust NFSv4 state recovery
+and transparent state migration, this identity string must not change
+across client reboots.
+
+Without any other intervention, the Linux client uses a string that contains
+the local system's node name. System administrators, however, often do not
+take care to ensure that node names are fully qualified and do not change
+over the lifetime of a client system. Node names can have other
+administrative requirements that require particular behavior that does not
+work well as part of an nfs_client_id4 string.
+
+The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
+used together with a system's node name when an NFS client identifies itself to
+a server. Thus, if the system's node name is not unique, its
+nfs.nfs4_unique_id can help prevent collisions with other clients.
+
+The nfs.nfs4_unique_id string is typically a UUID, though it can contain
+anything that is believed to be unique across all NFS clients. An
+nfs4_unique_id string should be chosen when a client system is installed,
+just as a system's root file system gets a fresh UUID in its label at
+install time.
+
+The string should remain fixed for the lifetime of the client. It can be
+changed safely if care is taken that the client shuts down cleanly and all
+outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
+
+This string can be stored in an NFS client's grub.conf, or it can be provided
+via a net boot facility such as PXE. It may also be specified as an nfs.ko
+module parameter.
+
+This uniquifier string will be the same for all NFS clients running in
+containers unless it is overridden by a value written to
+/sys/fs/nfs/net/nfs_client/identifier which will be local to the network
+namespace of the process which writes.
+
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See `RFC3530 Section 6: Filesystem Migration and Replication`_ and
+`Implementation Guide for Referrals in NFSv4`_.
+
+.. _RFC3530 Section 6\: Filesystem Migration and Replication: https://tools.ietf.org/html/rfc3530#section-6
+.. _Implementation Guide for Referrals in NFSv4: https://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+ (1) The process checks the dns_resolve cache to see if it contains a
+ valid entry. If so, it returns that entry and exits.
+
+ (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+ (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+ is run, with two arguments:
+ - the cache name, "dns_resolve"
+ - the hostname to resolve
+
+ (3) After looking up the corresponding ip address, the helper script
+ writes the result into the rpc_pipefs pseudo-file
+ '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+ in the following (text) format:
+
+ "<ip address> <hostname> <ttl>\n"
+
+ Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+ (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+ <hostname> is identical to the second argument of the helper
+ script, and <ttl> is the 'time to live' of this cache entry (in
+ units of seconds).
+
+ .. note::
+ If <ip address> is invalid, say the string "0", then a negative
+ entry is created, which will cause the kernel to treat the hostname
+ as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+.. code-block:: sh
+
+ #!/bin/bash
+ #
+ ttl=600
+ #
+ cut=/usr/bin/cut
+ getent=/usr/bin/getent
+ rpc_pipefs=/var/lib/nfs/rpc_pipefs
+ #
+ die()
+ {
+ echo "Usage: $0 cache_name entry_name"
+ exit 1
+ }
+
+ [ $# -lt 2 ] && die
+ cachename="$1"
+ cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+ case "${cachename}" in
+ dns_resolve)
+ name="$2"
+ result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+ [ -z "${result}" ] && result="0"
+ ;;
+ *)
+ die
+ ;;
+ esac
+ echo "${result} ${name} ${ttl}" >${cache_path}
diff --git a/Documentation/admin-guide/nfs/nfs-idmapper.rst b/Documentation/admin-guide/nfs/nfs-idmapper.rst
new file mode 100644
index 000000000000..58b8e63412d5
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-idmapper.rst
@@ -0,0 +1,78 @@
+=============
+NFS ID Mapper
+=============
+
+Id mapper is used by NFS to translate user and group ids into names, and to
+translate user and group names into ids. Part of this translation involves
+performing an upcall to userspace to request the information. There are two
+ways NFS could obtain this information: placing a call to /sbin/request-key
+or by placing a call to the rpc.idmap daemon.
+
+NFS will attempt to call /sbin/request-key first. If this succeeds, the
+result will be cached using the generic request-key cache. This call should
+only fail if /etc/request-key.conf is not configured for the id_resolver key
+type, see the "Configuring" section below if you wish to use the request-key
+method.
+
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
+configured with the id_resolver key type), then the idmapper will ask the
+legacy rpc.idmap daemon for the id mapping. This result will be stored
+in a custom NFS idmap cache.
+
+
+Configuring
+===========
+
+The file /etc/request-key.conf will need to be modified so /sbin/request-key can
+direct the upcall. The following line should be added:
+
+``#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...``
+``#====== ======= =============== =============== ===============================``
+``create id_resolver * * /usr/sbin/nfs.idmap %k %d 600``
+
+
+This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap.
+The last parameter, 600, defines how many seconds into the future the key will
+expire. This parameter is optional for /usr/sbin/nfs.idmap. When the timeout
+is not specified, nfs.idmap will default to 600 seconds.
+
+id mapper uses for key descriptions::
+
+ uid: Find the UID for the given user
+ gid: Find the GID for the given group
+ user: Find the user name for the given UID
+ group: Find the group name for the given GID
+
+You can handle any of these individually, rather than using the generic upcall
+program. If you would like to use your own program for a uid lookup then you
+would edit your request-key.conf so it look similar to this:
+
+``#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...``
+``#====== ======= =============== =============== ===============================``
+``create id_resolver uid:* * /some/other/program %k %d 600``
+``create id_resolver * * /usr/sbin/nfs.idmap %k %d 600``
+
+
+Notice that the new line was added above the line for the generic program.
+request-key will find the first matching line and corresponding program. In
+this case, /some/other/program will handle all uid lookups and
+/usr/sbin/nfs.idmap will handle gid, user, and group lookups.
+
+See Documentation/security/keys/request-key.rst for more information
+about the request-key function.
+
+
+nfs.idmap
+=========
+
+nfs.idmap is designed to be called by request-key, and should not be run "by
+hand". This program takes two arguments, a serialized key and a key
+description. The serialized key is first converted into a key_serial_t, and
+then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
+
+The actual lookups are performed by functions found in nfsidmap.h. nfs.idmap
+determines the correct function to call by looking at the first part of the
+description string. For example, a uid lookup description will appear as
+"uid:user@domain".
+
+nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/Documentation/admin-guide/nfs/nfs-rdma.rst b/Documentation/admin-guide/nfs/nfs-rdma.rst
new file mode 100644
index 000000000000..f137485f8bde
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfs-rdma.rst
@@ -0,0 +1,292 @@
+===================
+Setting up NFS/RDMA
+===================
+
+:Author:
+ NetApp and Open Grid Computing (May 29, 2008)
+
+.. warning::
+ This document is probably obsolete.
+
+Overview
+========
+
+This document describes how to install and setup the Linux NFS/RDMA client
+and server software.
+
+The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+was first included in the following release, Linux 2.6.25.
+
+In our testing, we have obtained excellent performance results (full 10Gbit
+wire bandwidth at minimal client CPU) under many workloads. The code passes
+the full Connectathon test suite and operates over both Infiniband and iWARP
+RDMA adapters.
+
+Getting Help
+============
+
+If you get stuck, you can ask questions on the
+nfs-rdma-devel@lists.sourceforge.net mailing list.
+
+Installation
+============
+
+These instructions are a step by step guide to building a machine for
+use with NFS/RDMA.
+
+- Install an RDMA device
+
+ Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+ Testing has been performed using several Mellanox-based IB cards, the
+ Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+- Install a Linux distribution and tools
+
+ The first kernel release to contain both the NFS/RDMA client and server was
+ Linux 2.6.25 Therefore, a distribution compatible with this and subsequent
+ Linux kernel release should be installed.
+
+ The procedures described in this document have been tested with
+ distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+- Install nfs-utils-1.1.2 or greater on the client
+
+ An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+ nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+ version with support for NFS/RDMA mounts, but for various reasons we
+ recommend using nfs-utils-1.1.2 or greater). To see which version of
+ mount.nfs you are using, type:
+
+ .. code-block:: sh
+
+ $ /sbin/mount.nfs -V
+
+ If the version is less than 1.1.2 or the command does not exist,
+ you should install the latest version of nfs-utils.
+
+ Download the latest package from: https://www.kernel.org/pub/linux/utils/nfs
+
+ Uncompress the package and follow the installation instructions.
+
+ If you will not need the idmapper and gssd executables (you do not need
+ these to create an NFS/RDMA enabled mount command), the installation
+ process can be simplified by disabling these features when running
+ configure:
+
+ .. code-block:: sh
+
+ $ ./configure --disable-gss --disable-nfsv4
+
+ To build nfs-utils you will need the tcp_wrappers package installed. For
+ more information on this see the package's README and INSTALL files.
+
+ After building the nfs-utils package, there will be a mount.nfs binary in
+ the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+ or v4 mounts. To initiate a v4 mount, the binary must be called
+ mount.nfs4. The standard technique is to create a symlink called
+ mount.nfs4 to mount.nfs.
+
+ This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+ .. code-block:: sh
+
+ $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+ In this location, mount.nfs will be invoked automatically for NFS mounts
+ by the system mount command.
+
+ .. note::
+ mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
+ on the NFS client machine. You do not need this specific version of
+ nfs-utils on the server. Furthermore, only the mount.nfs command from
+ nfs-utils-1.1.2 is needed on the client.
+
+- Install a Linux kernel with NFS/RDMA
+
+ The NFS/RDMA client and server are both included in the mainline Linux
+ kernel version 2.6.25 and later. This and other versions of the Linux
+ kernel can be found at: https://www.kernel.org/pub/linux/kernel/
+
+ Download the sources and place them in an appropriate location.
+
+- Configure the RDMA stack
+
+ Make sure your kernel configuration has RDMA support enabled. Under
+ Device Drivers -> InfiniBand support, update the kernel configuration
+ to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+ InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+ Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+ iWARP adapter support (amso, cxgb3, etc.).
+
+ If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+- Configure the NFS client and server
+
+ Your kernel configuration must also have NFS file system support and/or
+ NFS server support enabled. These and other NFS related configuration
+ options can be found under File Systems -> Network File Systems.
+
+- Build, install, reboot
+
+ The NFS/RDMA code will be enabled automatically if NFS and RDMA
+ are turned on. The NFS/RDMA client and server are configured via the hidden
+ SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+ value of SUNRPC_XPRT_RDMA will be:
+
+ #. N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+ and server will not be built
+
+ #. M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+ in this case the NFS/RDMA client and server will be built as modules
+
+ #. Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+ and server will be built into the kernel
+
+ Therefore, if you have followed the steps above and turned no NFS and RDMA,
+ the NFS/RDMA client and server will be built.
+
+ Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+========================
+
+Before configuring the NFS/RDMA software, it is a good idea to test
+your new kernel to ensure that the kernel is working correctly.
+In particular, it is a good idea to verify that the RDMA stack
+is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+is working properly.
+
+- Check RDMA Setup
+
+ If you built the RDMA components as modules, load them at
+ this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+ card:
+
+ .. code-block:: sh
+
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
+
+ If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+ running on the network. If your IB switch has an embedded SM, you can
+ use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+ of your end nodes.
+
+ If an SM is running on your network, you should see the following:
+
+ .. code-block:: sh
+
+ $ cat /sys/class/infiniband/driverX/ports/1/state
+ 4: ACTIVE
+
+ where driverX is mthca0, ipath5, ehca3, etc.
+
+ To further test the InfiniBand software stack, use IPoIB (this
+ assumes you have two IB hosts named host1 and host2):
+
+ .. code-block:: sh
+
+ host1$ ip link set dev ib0 up
+ host1$ ip address add dev ib0 a.b.c.x
+ host2$ ip link set dev ib0 up
+ host2$ ip address add dev ib0 a.b.c.y
+ host1$ ping a.b.c.y
+ host2$ ping a.b.c.x
+
+ For other device types, follow the appropriate procedures.
+
+- Check NFS Setup
+
+ For the NFS components enabled above (client and/or server),
+ test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+==============
+
+We recommend that you use two machines, one to act as the client and
+one to act as the server.
+
+One time configuration:
+-----------------------
+
+- On the server system, configure the /etc/exports file and start the NFS/RDMA server.
+
+ Exports entries with the following formats have been tested::
+
+ /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+ /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
+
+ The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+ HCA or the client's iWARP address(es) for an RNIC.
+
+ .. note::
+ The "insecure" option must be used because the NFS/RDMA client does
+ not use a reserved port.
+
+Each time a machine boots:
+--------------------------
+
+- Load and configure the RDMA drivers
+
+ For InfiniBand using a Mellanox adapter:
+
+ .. code-block:: sh
+
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
+ $ ip li set dev ib0 up
+ $ ip addr add dev ib0 a.b.c.d
+
+ .. note::
+ Please use unique addresses for the client and server!
+
+- Start the NFS server
+
+ If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA transport module:
+
+ .. code-block:: sh
+
+ $ modprobe svcrdma
+
+ Regardless of how the server was built (module or built-in), start the
+ server:
+
+ .. code-block:: sh
+
+ $ /etc/init.d/nfs start
+
+ or
+
+ .. code-block:: sh
+
+ $ service nfs start
+
+ Instruct the server to listen on the RDMA transport:
+
+ .. code-block:: sh
+
+ $ echo rdma 20049 > /proc/fs/nfsd/portlist
+
+- On the client system
+
+ If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA client module:
+
+ .. code-block:: sh
+
+ $ modprobe xprtrdma.ko
+
+ Regardless of how the client was built (module or built-in), use this
+ command to mount the NFS/RDMA server:
+
+ .. code-block:: sh
+
+ $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
+
+ To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+ the "proto" field for the given mount.
+
+ Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst b/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst
new file mode 100644
index 000000000000..c05926f79054
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfsd-admin-interfaces.rst
@@ -0,0 +1,40 @@
+==================================
+Administrative interfaces for nfsd
+==================================
+
+Note that normally these interfaces are used only by the utilities in
+nfs-utils.
+
+nfsd is controlled mainly by pseudofiles under the "nfsd" filesystem,
+which is normally mounted at /proc/fs/nfsd/.
+
+The server is always started by the first write of a nonzero value to
+nfsd/threads.
+
+Before doing that, NFSD can be told which sockets to listen on by
+writing to nfsd/portlist; that write may be:
+
+ - an ascii-encoded file descriptor, which should refer to a
+ bound (and listening, for tcp) socket, or
+ - "transportname port", where transportname is currently either
+ "udp", "tcp", or "rdma".
+
+If nfsd is started without doing any of these, then it will create one
+udp and one tcp listener at port 2049 (see nfsd_init_socks).
+
+On startup, nfsd and lockd grace periods start. nfsd is shut down by a write of
+0 to nfsd/threads. All locks and state are thrown away at that point.
+
+Between startup and shutdown, the number of threads may be adjusted up
+or down by additional writes to nfsd/threads or by writes to
+nfsd/pool_threads.
+
+For more detail about files under nfsd/ and what they control, see
+fs/nfsd/nfsctl.c; most of them have detailed comments.
+
+Implementation notes
+====================
+
+Note that the rpc server requires the caller to serialize addition and
+removal of listening sockets, and startup and shutdown of the server.
+For nfsd this is done using nfsd_mutex.
diff --git a/Documentation/admin-guide/nfs/nfsroot.rst b/Documentation/admin-guide/nfs/nfsroot.rst
new file mode 100644
index 000000000000..06990309c6ff
--- /dev/null
+++ b/Documentation/admin-guide/nfs/nfsroot.rst
@@ -0,0 +1,364 @@
+===============================================
+Mounting the root filesystem via NFS (nfsroot)
+===============================================
+
+:Authors:
+ Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
+
+ Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+
+ Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
+
+ Updated 2006 by Horms <horms@verge.net.au>
+
+ Updated 2018 by Chris Novakovic <chris@chrisn.me.uk>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server for
+example, it is necessary for the root filesystem to be present on a non-disk
+device. This may be an initramfs (see
+Documentation/filesystems/ramfs-rootfs-initramfs.rst), a ramdisk (see
+Documentation/admin-guide/initrd.rst) or a filesystem mounted via NFS. The
+following text describes on how to use NFS for the root filesystem. For the rest
+of this text 'client' means the diskless system, and 'server' means the NFS
+server.
+
+
+
+
+Enabling nfsroot capabilities
+=============================
+
+In order to use nfsroot, NFS client support needs to be selected as
+built-in during configuration. Once this has been selected, the nfsroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+
+
+Kernel command line
+===================
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of nfsroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+
+root=/dev/nfs
+ This is necessary to enable the pseudo-NFS-device. Note that it's not a
+ real device but just a synonym to tell the kernel to use NFS instead of
+ a real device.
+
+
+nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+ If the `nfsroot' parameter is NOT given on the command line,
+ the default ``"/tftpboot/%s"`` will be used.
+
+ <server-ip> Specifies the IP address of the NFS server.
+ The default address is determined by the ip parameter
+ (see below). This parameter allows the use of different
+ servers for IP autoconfiguration and NFS.
+
+ <root-dir> Name of the directory on the server to mount as root.
+ If there is a "%s" token in the string, it will be
+ replaced by the ASCII-representation of the client's
+ IP address.
+
+ <nfs-options> Standard NFS options. All options are separated by commas.
+ The following defaults are used::
+
+ port = as given by server portmap daemon
+ rsize = 4096
+ wsize = 4096
+ timeo = 7
+ retrans = 3
+ acregmin = 3
+ acregmax = 60
+ acdirmin = 30
+ acdirmax = 60
+ flags = hard, nointr, noposix, cto, ac
+
+
+ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:<dns0-ip>:<dns1-ip>:<ntp0-ip>
+ This parameter tells the kernel how to configure IP addresses of devices
+ and also how to set up the IP routing table. It was originally called
+ nfsaddrs, but now the boot-time IP configuration works independently of
+ NFS, so it was renamed to ip and the old name remained as an alias for
+ compatibility reasons.
+
+ If this parameter is missing from the kernel command line, all fields are
+ assumed to be empty, and the defaults mentioned below apply. In general
+ this means that the kernel tries to configure everything using
+ autoconfiguration.
+
+ The <autoconf> parameter can appear alone as the value to the ip
+ parameter (without all the ':' characters before). If the value is
+ "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
+ autoconfiguration will take place. The most common way to use this
+ is "ip=dhcp".
+
+ <client-ip> IP address of the client.
+ Default: Determined using autoconfiguration.
+
+ <server-ip> IP address of the NFS server.
+ If RARP is used to determine
+ the client address and this parameter is NOT empty only
+ replies from the specified server are accepted.
+
+ Only required for NFS root. That is autoconfiguration
+ will not be triggered if it is missing and NFS root is not
+ in operation.
+
+ Value is exported to /proc/net/pnp with the prefix "bootserver "
+ (see below).
+
+ Default: Determined using autoconfiguration.
+ The address of the autoconfiguration server is used.
+
+ <gw-ip> IP address of a gateway if the server is on a different subnet.
+ Default: Determined using autoconfiguration.
+
+ <netmask> Netmask for local network interface.
+ If unspecified the netmask is derived from the client IP address
+ assuming classful addressing.
+
+ Default: Determined using autoconfiguration.
+
+ <hostname> Name of the client.
+ If a '.' character is present, anything
+ before the first '.' is used as the client's hostname, and anything
+ after it is used as its NIS domain name. May be supplied by
+ autoconfiguration, but its absence will not trigger autoconfiguration.
+ If specified and DHCP is used, the user-provided hostname (and NIS
+ domain name, if present) will be carried in the DHCP request; this
+ may cause a DNS record to be created or updated for the client.
+
+ Default: Client IP address is used in ASCII notation.
+
+ <device> Name of network device to use.
+ Default: If the host only has one device, it is used.
+ Otherwise the device is determined using
+ autoconfiguration. This is done by sending
+ autoconfiguration requests out of all devices,
+ and using the device that received the first reply.
+
+ <autoconf> Method to use for autoconfiguration.
+ In the case of options
+ which specify multiple autoconfiguration protocols,
+ requests are sent using all protocols, and the first one
+ to reply is used.
+
+ Only autoconfiguration protocols that have been compiled
+ into the kernel will be used, regardless of the value of
+ this option::
+
+ off or none: don't use autoconfiguration
+ (do static IP assignment instead)
+ on or any: use any protocol available in the kernel
+ (default)
+ dhcp: use DHCP
+ bootp: use BOOTP
+ rarp: use RARP
+ both: use both BOOTP and RARP but not DHCP
+ (old option kept for backwards compatibility)
+
+ if dhcp is used, the client identifier can be used by following
+ format "ip=dhcp,client-id-type,client-id-value"
+
+ Default: any
+
+ <dns0-ip> IP address of primary nameserver.
+ Value is exported to /proc/net/pnp with the prefix "nameserver "
+ (see below).
+
+ Default: None if not using autoconfiguration; determined
+ automatically if using autoconfiguration.
+
+ <dns1-ip> IP address of secondary nameserver.
+ See <dns0-ip>.
+
+ <ntp0-ip> IP address of a Network Time Protocol (NTP) server.
+ Value is exported to /proc/net/ipconfig/ntp_servers, but is
+ otherwise unused (see below).
+
+ Default: None if not using autoconfiguration; determined
+ automatically if using autoconfiguration.
+
+ After configuration (whether manual or automatic) is complete, two files
+ are created in the following format; lines are omitted if their respective
+ value is empty following configuration:
+
+ - /proc/net/pnp:
+
+ #PROTO: <DHCP|BOOTP|RARP|MANUAL> (depending on configuration method)
+ domain <dns-domain> (if autoconfigured, the DNS domain)
+ nameserver <dns0-ip> (primary name server IP)
+ nameserver <dns1-ip> (secondary name server IP)
+ nameserver <dns2-ip> (tertiary name server IP)
+ bootserver <server-ip> (NFS server IP)
+
+ - /proc/net/ipconfig/ntp_servers:
+
+ <ntp0-ip> (NTP server IP)
+ <ntp1-ip> (NTP server IP)
+ <ntp2-ip> (NTP server IP)
+
+ <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip>
+ (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration;
+ they cannot be specified as part of the "ip=" kernel command line parameter.
+
+ Because the "domain" and "nameserver" options are recognised by DNS
+ resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems
+ that use an NFS root filesystem.
+
+ Note that the kernel will not synchronise the system time with any NTP
+ servers it discovers; this is the responsibility of a user space process
+ (e.g. an initrd/initramfs script that passes the IP addresses listed in
+ /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real
+ root filesystem if it is on NFS).
+
+
+nfsrootdebug
+ This parameter enables debugging messages to appear in the kernel
+ log at boot time so that administrators can verify that the correct
+ NFS mount options, server address, and root path are passed to the
+ NFS client.
+
+
+rdinit=<executable file>
+ To specify which file contains the program that starts system
+ initialization, administrators can use this command line parameter.
+ The default value of this parameter is "/init". If the specified
+ file exists and the kernel can execute it, root filesystem related
+ kernel command line parameters, including 'nfsroot=', are ignored.
+
+ A description of the process of mounting the root file system can be
+ found in Documentation/driver-api/early-userspace/early_userspace_support.rst
+
+
+Boot Loader
+===========
+
+To get the kernel into memory different approaches can be used.
+They depend on various facilities being available:
+
+
+- Booting from a floppy using syslinux
+
+ When building kernels, an easy way to create a boot floppy that uses
+ syslinux is to use the zdisk or bzdisk make targets which use zimage
+ and bzimage images respectively. Both targets accept the
+ FDARGS parameter which can be used to set the kernel command line.
+
+ e.g::
+
+ make bzdisk FDARGS="root=/dev/nfs"
+
+ Note that the user running this command will need to have
+ access to the floppy drive device, /dev/fd0
+
+ For more information on syslinux, including how to create bootdisks
+ for prebuilt kernels, see https://syslinux.zytor.com/
+
+ .. note::
+ Previously it was possible to write a kernel directly to
+ a floppy using dd, configure the boot device using rdev, and
+ boot using the resulting floppy. Linux no longer supports this
+ method of booting.
+
+- Booting from a cdrom using isolinux
+
+ When building kernels, an easy way to create a bootable cdrom that
+ uses isolinux is to use the isoimage target which uses a bzimage
+ image. Like zdisk and bzdisk, this target accepts the FDARGS
+ parameter which can be used to set the kernel command line.
+
+ e.g::
+
+ make isoimage FDARGS="root=/dev/nfs"
+
+ The resulting iso image will be arch/<ARCH>/boot/image.iso
+ This can be written to a cdrom using a variety of tools including
+ cdrecord.
+
+ e.g::
+
+ cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
+
+ For more information on isolinux, including how to create bootdisks
+ for prebuilt kernels, see https://syslinux.zytor.com/
+
+- Using LILO
+
+ When using LILO all the necessary command line parameters may be
+ specified using the 'append=' directive in the LILO configuration
+ file.
+
+ However, to use the 'root=' directive you also need to create
+ a dummy root device, which may be removed after LILO is run.
+
+ e.g::
+
+ mknod /dev/boot255 c 0 255
+
+ For information on configuring LILO, please refer to its documentation.
+
+- Using GRUB
+
+ When using GRUB, kernel parameter are simply appended after the kernel
+ specification: kernel <kernel> <parameters>
+
+- Using loadlin
+
+ loadlin may be used to boot Linux from a DOS command prompt without
+ requiring a local hard disk to mount as root. This has not been
+ thoroughly tested by the authors of this document, but in general
+ it should be possible configure the kernel command line similarly
+ to the configuration of LILO.
+
+ Please refer to the loadlin documentation for further information.
+
+- Using a boot ROM
+
+ This is probably the most elegant way of booting a diskless client.
+ With a boot ROM the kernel is loaded using the TFTP protocol. The
+ authors of this document are not aware of any no commercial boot
+ ROMs that support booting Linux over the network. However, there
+ are two free implementations of a boot ROM, netboot-nfs and
+ etherboot, both of which are available on sunsite.unc.edu, and both
+ of which contain everything you need to boot a diskless Linux client.
+
+- Using pxelinux
+
+ Pxelinux may be used to boot linux using the PXE boot loader
+ which is present on many modern network cards.
+
+ When using pxelinux, the kernel image is specified using
+ "kernel <relative-path-below /tftpboot>". The nfsroot parameters
+ are passed to the kernel by adding them to the "append" line.
+ It is common to use serial console in conjunction with pxelinux,
+ see Documentation/admin-guide/serial-console.rst for more information.
+
+ For more information on isolinux, including how to create bootdisks
+ for prebuilt kernels, see https://syslinux.zytor.com/
+
+
+
+
+Credits
+=======
+
+ The nfsroot code in the kernel and the RARP support have been written
+ by Gero Kuhlmann <gero@gkminix.han.de>.
+
+ The rest of the IP layer autoconfiguration code has been written
+ by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
+
+ In order to write the initial version of nfsroot I would like to thank
+ Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/admin-guide/nfs/pnfs-block-server.rst b/Documentation/admin-guide/nfs/pnfs-block-server.rst
new file mode 100644
index 000000000000..20fe9f5117fe
--- /dev/null
+++ b/Documentation/admin-guide/nfs/pnfs-block-server.rst
@@ -0,0 +1,42 @@
+===================================
+pNFS block layout server user guide
+===================================
+
+The Linux NFS server now supports the pNFS block layout extension. In this
+case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition
+to handling all the metadata access to the NFS export also hands out layouts
+to the clients to directly access the underlying block devices that are
+shared with the client.
+
+To use pNFS block layouts with the Linux NFS server the exported file
+system needs to support the pNFS block layouts (currently just XFS), and the
+file system must sit on shared storage (typically iSCSI) that is accessible
+to the clients in addition to the MDS. As of now the file system needs to
+sit directly on the exported volume, striping or concatenation of
+volumes on the MDS and clients is not supported yet.
+
+On the server, pNFS block volume support is automatically if the file system
+support it. On the client make sure the kernel has the CONFIG_PNFS_BLOCK
+option enabled, the blkmapd daemon from nfs-utils is running, and the
+file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client it calls
+/sbin/nfsd-recall-failed with the first argument set to the IP address of
+the client, and the second argument set to the device node without the /dev
+prefix for the file system to be fenced. Below is an example file that shows
+how to translate the device into a serial number from SCSI EVPD 0x80::
+
+ cat > /sbin/nfsd-recall-failed << EOF
+
+.. code-block:: sh
+
+ #!/bin/sh
+
+ CLIENT="$1"
+ DEV="/dev/$2"
+ EVPD=`sg_inq --page=0x80 ${DEV} | \
+ grep "Unit serial number:" | \
+ awk -F ': ' '{print $2}'`
+
+ echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
+ EOF
diff --git a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
new file mode 100644
index 000000000000..b2eec2288329
--- /dev/null
+++ b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
@@ -0,0 +1,24 @@
+
+==================================
+pNFS SCSI layout server user guide
+==================================
+
+This document describes support for pNFS SCSI layouts in the Linux NFS server.
+With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
+which in addition to handling all the metadata access to the NFS export,
+also hands out layouts to the clients so that they can directly access the
+underlying SCSI LUNs that are shared with the client.
+
+To use pNFS SCSI layouts with the Linux NFS server, the exported file
+system needs to support the pNFS SCSI layouts (currently just XFS), and the
+file system must sit on a SCSI LUN that is accessible to the clients in
+addition to the MDS. As of now the file system needs to sit directly on the
+exported LUN, striping or concatenation of LUNs on the MDS and clients
+is not supported yet.
+
+On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
+automatically enabled if the file system is exported using the "pnfs"
+option and the underlying SCSI device support persistent reservations.
+On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
+enabled, and the file system is mounted using the NFSv4.1 protocol
+version (mount -o vers=4.1).
diff --git a/Documentation/admin-guide/numastat.rst b/Documentation/admin-guide/numastat.rst
new file mode 100644
index 000000000000..08ec2c2bdce3
--- /dev/null
+++ b/Documentation/admin-guide/numastat.rst
@@ -0,0 +1,55 @@
+===============================
+Numa policy hit/miss statistics
+===============================
+
+/sys/devices/system/node/node*/numastat
+
+All units are pages. Hugepages have separate counters.
+
+The numa_hit, numa_miss and numa_foreign counters reflect how well processes
+are able to allocate memory from nodes they prefer. If they succeed, numa_hit
+is incremented on the preferred node, otherwise numa_foreign is incremented on
+the preferred node and numa_miss on the node where allocation succeeded.
+
+Usually preferred node is the one local to the CPU where the process executes,
+but restrictions such as mempolicies can change that, so there are also two
+counters based on CPU local node. local_node is similar to numa_hit and is
+incremented on allocation from a node by CPU on the same node. other_node is
+similar to numa_miss and is incremented on the node where allocation succeeds
+from a CPU from a different node. Note there is no counter analogical to
+numa_foreign.
+
+In more detail:
+
+=============== ============================================================
+numa_hit A process wanted to allocate memory from this node,
+ and succeeded.
+
+numa_miss A process wanted to allocate memory from another node,
+ but ended up with memory from this node.
+
+numa_foreign A process wanted to allocate on this node,
+ but ended up with memory from another node.
+
+local_node A process ran on this node's CPU,
+ and got memory from this node.
+
+other_node A process ran on a different node's CPU
+ and got memory from this node.
+
+interleave_hit Interleaving wanted to allocate from this node
+ and succeeded.
+=============== ============================================================
+
+For easier reading you can use the numastat utility from the numactl package
+(http://oss.sgi.com/projects/libnuma/). Note that it only works
+well right now on machines with a small number of CPUs.
+
+Note that on systems with memoryless nodes (where a node has CPUs but no
+memory) the numa_hit, numa_miss and numa_foreign statistics can be skewed
+heavily. In the current kernel implementation, if a process prefers a
+memoryless node (i.e. because it is running on one of its local CPU), the
+implementation actually treats one of the nearest nodes with memory as the
+preferred node. As a result, such allocation will not increase the numa_foreign
+counter on the memoryless node, and will skew the numa_hit, numa_miss and
+numa_foreign statistics of the nearest node.
diff --git a/Documentation/admin-guide/nvme-multipath.rst b/Documentation/admin-guide/nvme-multipath.rst
new file mode 100644
index 000000000000..97ca1ccef459
--- /dev/null
+++ b/Documentation/admin-guide/nvme-multipath.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Linux NVMe multipath
+====================
+
+This document describes NVMe multipath and its path selection policies supported
+by the Linux NVMe host driver.
+
+
+Introduction
+============
+
+The NVMe multipath feature in Linux integrates namespaces with the same
+identifier into a single block device. Using multipath enhances the reliability
+and stability of I/O access while improving bandwidth performance. When a user
+sends I/O to this merged block device, the multipath mechanism selects one of
+the underlying block devices (paths) according to the configured policy.
+Different policies result in different path selections.
+
+
+Policies
+========
+
+All policies follow the ANA (Asymmetric Namespace Access) mechanism, meaning
+that when an optimized path is available, it will be chosen over a non-optimized
+one. Current the NVMe multipath policies include numa(default), round-robin and
+queue-depth.
+
+To set the desired policy (e.g., round-robin), use one of the following methods:
+ 1. echo -n "round-robin" > /sys/module/nvme_core/parameters/iopolicy
+ 2. or add the "nvme_core.iopolicy=round-robin" to cmdline.
+
+
+NUMA
+----
+
+The NUMA policy selects the path closest to the NUMA node of the current CPU for
+I/O distribution. This policy maintains the nearest paths to each NUMA node
+based on network interface connections.
+
+When to use the NUMA policy:
+ 1. Multi-core Systems: Optimizes memory access in multi-core and
+ multi-processor systems, especially under NUMA architecture.
+ 2. High Affinity Workloads: Binds I/O processing to the CPU to reduce
+ communication and data transfer delays across nodes.
+
+
+Round-Robin
+-----------
+
+The round-robin policy distributes I/O requests evenly across all paths to
+enhance throughput and resource utilization. Each I/O operation is sent to the
+next path in sequence.
+
+When to use the round-robin policy:
+ 1. Balanced Workloads: Effective for balanced and predictable workloads with
+ similar I/O size and type.
+ 2. Homogeneous Path Performance: Utilizes all paths efficiently when
+ performance characteristics (e.g., latency, bandwidth) are similar.
+
+
+Queue-Depth
+-----------
+
+The queue-depth policy manages I/O requests based on the current queue depth
+of each path, selecting the path with the least number of in-flight I/Os.
+
+When to use the queue-depth policy:
+ 1. High load with small I/Os: Effectively balances load across paths when
+ the load is high, and I/O operations consist of small, relatively
+ fixed-sized requests.
diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst
new file mode 100644
index 000000000000..34aa334320ca
--- /dev/null
+++ b/Documentation/admin-guide/perf-security.rst
@@ -0,0 +1,325 @@
+.. _perf_security:
+
+Perf events and tool security
+=============================
+
+Overview
+--------
+
+Usage of Performance Counters for Linux (perf_events) [1]_ , [2]_ , [3]_
+can impose a considerable risk of leaking sensitive data accessed by
+monitored processes. The data leakage is possible both in scenarios of
+direct usage of perf_events system call API [2]_ and over data files
+generated by Perf tool user mode utility (Perf) [3]_ , [4]_ . The risk
+depends on the nature of data that perf_events performance monitoring
+units (PMU) [2]_ and Perf collect and expose for performance analysis.
+Collected system and performance data may be split into several
+categories:
+
+1. System hardware and software configuration data, for example: a CPU
+ model and its cache configuration, an amount of available memory and
+ its topology, used kernel and Perf versions, performance monitoring
+ setup including experiment time, events configuration, Perf command
+ line parameters, etc.
+
+2. User and kernel module paths and their load addresses with sizes,
+ process and thread names with their PIDs and TIDs, timestamps for
+ captured hardware and software events.
+
+3. Content of kernel software counters (e.g., for context switches, page
+ faults, CPU migrations), architectural hardware performance counters
+ (PMC) [8]_ and machine specific registers (MSR) [9]_ that provide
+ execution metrics for various monitored parts of the system (e.g.,
+ memory controller (IMC), interconnect (QPI/UPI) or peripheral (PCIe)
+ uncore counters) without direct attribution to any execution context
+ state.
+
+4. Content of architectural execution context registers (e.g., RIP, RSP,
+ RBP on x86_64), process user and kernel space memory addresses and
+ data, content of various architectural MSRs that capture data from
+ this category.
+
+Data that belong to the fourth category can potentially contain
+sensitive process data. If PMUs in some monitoring modes capture values
+of execution context registers or data from process memory then access
+to such monitoring modes requires to be ordered and secured properly.
+So, perf_events performance monitoring and observability operations are
+the subject for security access control management [5]_ .
+
+perf_events access control
+-------------------------------
+
+To perform security checks, the Linux implementation splits processes
+into two categories [6]_ : a) privileged processes (whose effective user
+ID is 0, referred to as superuser or root), and b) unprivileged
+processes (whose effective UID is nonzero). Privileged processes bypass
+all kernel security permission checks so perf_events performance
+monitoring is fully available to privileged processes without access,
+scope and resource restrictions.
+
+Unprivileged processes are subject to a full security permission check
+based on the process's credentials [5]_ (usually: effective UID,
+effective GID, and supplementary group list).
+
+Linux divides the privileges traditionally associated with superuser
+into distinct units, known as capabilities [6]_ , which can be
+independently enabled and disabled on per-thread basis for processes and
+files of unprivileged users.
+
+Unprivileged processes with enabled CAP_PERFMON capability are treated
+as privileged processes with respect to perf_events performance
+monitoring and observability operations, thus, bypass *scope* permissions
+checks in the kernel. CAP_PERFMON implements the principle of least
+privilege [13]_ (POSIX 1003.1e: 2.2.2.39) for performance monitoring and
+observability operations in the kernel and provides a secure approach to
+performance monitoring and observability in the system.
+
+For backward compatibility reasons the access to perf_events monitoring and
+observability operations is also open for CAP_SYS_ADMIN privileged
+processes but CAP_SYS_ADMIN usage for secure monitoring and observability
+use cases is discouraged with respect to the CAP_PERFMON capability.
+If system audit records [14]_ for a process using perf_events system call
+API contain denial records of acquiring both CAP_PERFMON and CAP_SYS_ADMIN
+capabilities then providing the process with CAP_PERFMON capability singly
+is recommended as the preferred secure approach to resolve double access
+denial logging related to usage of performance monitoring and observability.
+
+Prior Linux v5.9 unprivileged processes using perf_events system call
+are also subject for PTRACE_MODE_READ_REALCREDS ptrace access mode check
+[7]_ , whose outcome determines whether monitoring is permitted.
+So unprivileged processes provided with CAP_SYS_PTRACE capability are
+effectively permitted to pass the check. Starting from Linux v5.9
+CAP_SYS_PTRACE capability is not required and CAP_PERFMON is enough to
+be provided for processes to make performance monitoring and observability
+operations.
+
+Other capabilities being granted to unprivileged processes can
+effectively enable capturing of additional data required for later
+performance analysis of monitored processes or a system. For example,
+CAP_SYSLOG capability permits reading kernel space memory addresses from
+/proc/kallsyms file.
+
+Privileged Perf users groups
+---------------------------------
+
+Mechanisms of capabilities, privileged capability-dumb files [6]_,
+file system ACLs [10]_ and sudo [15]_ utility can be used to create
+dedicated groups of privileged Perf users who are permitted to execute
+performance monitoring and observability without limits. The following
+steps can be taken to create such groups of privileged Perf users.
+
+1. Create perf_users group of privileged Perf users, assign perf_users
+ group to Perf tool executable and limit access to the executable for
+ other users in the system who are not in the perf_users group:
+
+::
+
+ # groupadd perf_users
+ # ls -alhF
+ -rwxr-xr-x 2 root root 11M Oct 19 15:12 perf
+ # chgrp perf_users perf
+ # ls -alhF
+ -rwxr-xr-x 2 root perf_users 11M Oct 19 15:12 perf
+ # chmod o-rwx perf
+ # ls -alhF
+ -rwxr-x--- 2 root perf_users 11M Oct 19 15:12 perf
+
+2. Assign the required capabilities to the Perf tool executable file and
+ enable members of perf_users group with monitoring and observability
+ privileges [6]_ :
+
+::
+
+ # setcap "cap_perfmon,cap_sys_ptrace,cap_syslog=ep" perf
+ # setcap -v "cap_perfmon,cap_sys_ptrace,cap_syslog=ep" perf
+ perf: OK
+ # getcap perf
+ perf = cap_sys_ptrace,cap_syslog,cap_perfmon+ep
+
+If the libcap [16]_ installed doesn't yet support "cap_perfmon", use "38" instead,
+i.e.:
+
+::
+
+ # setcap "38,cap_ipc_lock,cap_sys_ptrace,cap_syslog=ep" perf
+
+Note that you may need to have 'cap_ipc_lock' in the mix for tools such as
+'perf top', alternatively use 'perf top -m N', to reduce the memory that
+it uses for the perf ring buffer, see the memory allocation section below.
+
+Using a libcap without support for CAP_PERFMON will make cap_get_flag(caps, 38,
+CAP_EFFECTIVE, &val) fail, which will lead the default event to be 'cycles:u',
+so as a workaround explicitly ask for the 'cycles' event, i.e.:
+
+::
+
+ # perf top -e cycles
+
+To get kernel and user samples with a perf binary with just CAP_PERFMON.
+
+As a result, members of perf_users group are capable of conducting
+performance monitoring and observability by using functionality of the
+configured Perf tool executable that, when executes, passes perf_events
+subsystem scope checks.
+
+In case Perf tool executable can't be assigned required capabilities (e.g.
+file system is mounted with nosuid option or extended attributes are
+not supported by the file system) then creation of the capabilities
+privileged environment, naturally shell, is possible. The shell provides
+inherent processes with CAP_PERFMON and other required capabilities so that
+performance monitoring and observability operations are available in the
+environment without limits. Access to the environment can be open via sudo
+utility for members of perf_users group only. In order to create such
+environment:
+
+1. Create shell script that uses capsh utility [16]_ to assign CAP_PERFMON
+ and other required capabilities into ambient capability set of the shell
+ process, lock the process security bits after enabling SECBIT_NO_SETUID_FIXUP,
+ SECBIT_NOROOT and SECBIT_NO_CAP_AMBIENT_RAISE bits and then change
+ the process identity to sudo caller of the script who should essentially
+ be a member of perf_users group:
+
+::
+
+ # ls -alh /usr/local/bin/perf.shell
+ -rwxr-xr-x. 1 root root 83 Oct 13 23:57 /usr/local/bin/perf.shell
+ # cat /usr/local/bin/perf.shell
+ exec /usr/sbin/capsh --iab=^cap_perfmon --secbits=239 --user=$SUDO_USER -- -l
+
+2. Extend sudo policy at /etc/sudoers file with a rule for perf_users group:
+
+::
+
+ # grep perf_users /etc/sudoers
+ %perf_users ALL=/usr/local/bin/perf.shell
+
+3. Check that members of perf_users group have access to the privileged
+ shell and have CAP_PERFMON and other required capabilities enabled
+ in permitted, effective and ambient capability sets of an inherent process:
+
+::
+
+ $ id
+ uid=1003(capsh_test) gid=1004(capsh_test) groups=1004(capsh_test),1000(perf_users) context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
+ $ sudo perf.shell
+ [sudo] password for capsh_test:
+ $ grep Cap /proc/self/status
+ CapInh: 0000004000000000
+ CapPrm: 0000004000000000
+ CapEff: 0000004000000000
+ CapBnd: 000000ffffffffff
+ CapAmb: 0000004000000000
+ $ capsh --decode=0000004000000000
+ 0x0000004000000000=cap_perfmon
+
+As a result, members of perf_users group have access to the privileged
+environment where they can use tools employing performance monitoring APIs
+governed by CAP_PERFMON Linux capability.
+
+This specific access control management is only available to superuser
+or root running processes with CAP_SETPCAP, CAP_SETFCAP [6]_
+capabilities.
+
+Unprivileged users
+-----------------------------------
+
+perf_events *scope* and *access* control for unprivileged processes
+is governed by perf_event_paranoid [2]_ setting:
+
+-1:
+ Impose no *scope* and *access* restrictions on using perf_events
+ performance monitoring. Per-user per-cpu perf_event_mlock_kb [2]_
+ locking limit is ignored when allocating memory buffers for storing
+ performance data. This is the least secure mode since allowed
+ monitored *scope* is maximized and no perf_events specific limits
+ are imposed on *resources* allocated for performance monitoring.
+
+>=0:
+ *scope* includes per-process and system wide performance monitoring
+ but excludes raw tracepoints and ftrace function tracepoints
+ monitoring. CPU and system events happened when executing either in
+ user or in kernel space can be monitored and captured for later
+ analysis. Per-user per-cpu perf_event_mlock_kb locking limit is
+ imposed but ignored for unprivileged processes with CAP_IPC_LOCK
+ [6]_ capability.
+
+>=1:
+ *scope* includes per-process performance monitoring only and
+ excludes system wide performance monitoring. CPU and system events
+ happened when executing either in user or in kernel space can be
+ monitored and captured for later analysis. Per-user per-cpu
+ perf_event_mlock_kb locking limit is imposed but ignored for
+ unprivileged processes with CAP_IPC_LOCK capability.
+
+>=2:
+ *scope* includes per-process performance monitoring only. CPU and
+ system events happened when executing in user space only can be
+ monitored and captured for later analysis. Per-user per-cpu
+ perf_event_mlock_kb locking limit is imposed but ignored for
+ unprivileged processes with CAP_IPC_LOCK capability.
+
+Resource control
+---------------------------------
+
+Open file descriptors
++++++++++++++++++++++
+
+The perf_events system call API [2]_ allocates file descriptors for
+every configured PMU event. Open file descriptors are a per-process
+accountable resource governed by the RLIMIT_NOFILE [11]_ limit
+(ulimit -n), which is usually derived from the login shell process. When
+configuring Perf collection for a long list of events on a large server
+system, this limit can be easily hit preventing required monitoring
+configuration. RLIMIT_NOFILE limit can be increased on per-user basis
+modifying content of the limits.conf file [12]_ . Ordinarily, a Perf
+sampling session (perf record) requires an amount of open perf_event
+file descriptors that is not less than the number of monitored events
+multiplied by the number of monitored CPUs.
+
+Memory allocation
++++++++++++++++++
+
+The amount of memory available to user processes for capturing
+performance monitoring data is governed by the perf_event_mlock_kb [2]_
+setting. This perf_event specific resource setting defines overall
+per-cpu limits of memory allowed for mapping by the user processes to
+execute performance monitoring. The setting essentially extends the
+RLIMIT_MEMLOCK [11]_ limit, but only for memory regions mapped
+specifically for capturing monitored performance events and related data.
+
+For example, if a machine has eight cores and perf_event_mlock_kb limit
+is set to 516 KiB, then a user process is provided with 516 KiB * 8 =
+4128 KiB of memory above the RLIMIT_MEMLOCK limit (ulimit -l) for
+perf_event mmap buffers. In particular, this means that, if the user
+wants to start two or more performance monitoring processes, the user is
+required to manually distribute the available 4128 KiB between the
+monitoring processes, for example, using the --mmap-pages Perf record
+mode option. Otherwise, the first started performance monitoring process
+allocates all available 4128 KiB and the other processes will fail to
+proceed due to the lack of memory.
+
+RLIMIT_MEMLOCK and perf_event_mlock_kb resource constraints are ignored
+for processes with the CAP_IPC_LOCK capability. Thus, perf_events/Perf
+privileged users can be provided with memory above the constraints for
+perf_events/Perf performance monitoring purpose by providing the Perf
+executable with CAP_IPC_LOCK capability.
+
+Bibliography
+------------
+
+.. [1] `<https://lwn.net/Articles/337493/>`_
+.. [2] `<http://man7.org/linux/man-pages/man2/perf_event_open.2.html>`_
+.. [3] `<http://web.eece.maine.edu/~vweaver/projects/perf_events/>`_
+.. [4] `<https://perf.wiki.kernel.org/index.php/Main_Page>`_
+.. [5] `<https://www.kernel.org/doc/html/latest/security/credentials.html>`_
+.. [6] `<http://man7.org/linux/man-pages/man7/capabilities.7.html>`_
+.. [7] `<http://man7.org/linux/man-pages/man2/ptrace.2.html>`_
+.. [8] `<https://en.wikipedia.org/wiki/Hardware_performance_counter>`_
+.. [9] `<https://en.wikipedia.org/wiki/Model-specific_register>`_
+.. [10] `<http://man7.org/linux/man-pages/man5/acl.5.html>`_
+.. [11] `<http://man7.org/linux/man-pages/man2/getrlimit.2.html>`_
+.. [12] `<http://man7.org/linux/man-pages/man5/limits.conf.5.html>`_
+.. [13] `<https://sites.google.com/site/fullycapable>`_
+.. [14] `<http://man7.org/linux/man-pages/man8/auditd.8.html>`_
+.. [15] `<https://man7.org/linux/man-pages/man8/sudo.8.html>`_
+.. [16] `<https://git.kernel.org/pub/scm/libs/libcap/libcap.git/>`_
diff --git a/Documentation/admin-guide/perf/alibaba_pmu.rst b/Documentation/admin-guide/perf/alibaba_pmu.rst
new file mode 100644
index 000000000000..7d840023903f
--- /dev/null
+++ b/Documentation/admin-guide/perf/alibaba_pmu.rst
@@ -0,0 +1,105 @@
+=============================================================
+Alibaba's T-Head SoC Uncore Performance Monitoring Unit (PMU)
+=============================================================
+
+The Yitian 710, custom-built by Alibaba Group's chip development business,
+T-Head, implements uncore PMU for performance and functional debugging to
+facilitate system maintenance.
+
+DDR Sub-System Driveway (DRW) PMU Driver
+=========================================
+
+Yitian 710 employs eight DDR5/4 channels, four on each die. Each DDR5 channel
+is independent of others to service system memory requests. And one DDR5
+channel is split into two independent sub-channels. The DDR Sub-System Driveway
+implements separate PMUs for each sub-channel to monitor various performance
+metrics.
+
+The Driveway PMU devices are named as ali_drw_<sys_base_addr> with perf.
+For example, ali_drw_21000 and ali_drw_21080 are two PMU devices for two
+sub-channels of the same channel in die 0. And the PMU device of die 1 is
+prefixed with ali_drw_400XXXXX, e.g. ali_drw_40021000.
+
+Each sub-channel has 36 PMU counters in total, which is classified into
+four groups:
+
+- Group 0: PMU Cycle Counter. This group has one pair of counters
+ pmu_cycle_cnt_low and pmu_cycle_cnt_high, that is used as the cycle count
+ based on DDRC core clock.
+
+- Group 1: PMU Bandwidth Counters. This group has 8 counters that are used
+ to count the total access number of either the eight bank groups in a
+ selected rank, or four ranks separately in the first 4 counters. The base
+ transfer unit is 64B.
+
+- Group 2: PMU Retry Counters. This group has 10 counters, that intend to
+ count the total retry number of each type of uncorrectable error.
+
+- Group 3: PMU Common Counters. This group has 16 counters, that are used
+ to count the common events.
+
+For now, the Driveway PMU driver only uses counters in group 0 and group 3.
+
+The DDR Controller (DDRCTL) and DDR PHY combine to create a complete solution
+for connecting an SoC application bus to DDR memory devices. The DDRCTL
+receives transactions Host Interface (HIF) which is custom-defined by Synopsys.
+These transactions are queued internally and scheduled for access while
+satisfying the SDRAM protocol timing requirements, transaction priorities, and
+dependencies between the transactions. The DDRCTL in turn issues commands on
+the DDR PHY Interface (DFI) to the PHY module, which launches and captures data
+to and from the SDRAM. The driveway PMUs have hardware logic to gather
+statistics and performance logging signals on HIF, DFI, etc.
+
+By counting the READ, WRITE and RMW commands sent to the DDRC through the HIF
+interface, we could calculate the bandwidth. Example usage of counting memory
+data bandwidth::
+
+ perf stat \
+ -e ali_drw_21000/hif_wr/ \
+ -e ali_drw_21000/hif_rd/ \
+ -e ali_drw_21000/hif_rmw/ \
+ -e ali_drw_21000/cycle/ \
+ -e ali_drw_21080/hif_wr/ \
+ -e ali_drw_21080/hif_rd/ \
+ -e ali_drw_21080/hif_rmw/ \
+ -e ali_drw_21080/cycle/ \
+ -e ali_drw_23000/hif_wr/ \
+ -e ali_drw_23000/hif_rd/ \
+ -e ali_drw_23000/hif_rmw/ \
+ -e ali_drw_23000/cycle/ \
+ -e ali_drw_23080/hif_wr/ \
+ -e ali_drw_23080/hif_rd/ \
+ -e ali_drw_23080/hif_rmw/ \
+ -e ali_drw_23080/cycle/ \
+ -e ali_drw_25000/hif_wr/ \
+ -e ali_drw_25000/hif_rd/ \
+ -e ali_drw_25000/hif_rmw/ \
+ -e ali_drw_25000/cycle/ \
+ -e ali_drw_25080/hif_wr/ \
+ -e ali_drw_25080/hif_rd/ \
+ -e ali_drw_25080/hif_rmw/ \
+ -e ali_drw_25080/cycle/ \
+ -e ali_drw_27000/hif_wr/ \
+ -e ali_drw_27000/hif_rd/ \
+ -e ali_drw_27000/hif_rmw/ \
+ -e ali_drw_27000/cycle/ \
+ -e ali_drw_27080/hif_wr/ \
+ -e ali_drw_27080/hif_rd/ \
+ -e ali_drw_27080/hif_rmw/ \
+ -e ali_drw_27080/cycle/ -- sleep 10
+
+Example usage of counting all memory read/write bandwidth by metric::
+
+ perf stat -M ddr_read_bandwidth.all -- sleep 10
+ perf stat -M ddr_write_bandwidth.all -- sleep 10
+
+The average DRAM bandwidth can be calculated as follows:
+
+- Read Bandwidth = perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
+- Write Bandwidth = (perf_hif_wr + perf_hif_rmw) * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
+
+Here, DDRC_WIDTH = 64 bytes.
+
+The current driver does not support sampling. So "perf record" is
+unsupported. Also attach to a task is unsupported as the events are all
+uncore.
diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
new file mode 100644
index 000000000000..94f93f5aee6c
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
+Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
+first phase it's used for counting MCU events on AmpereOne.
+
+
+MCU PMU events
+--------------
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold".
+Note, that the filters are per PMU instance rather than per event.
+
+
+Example for perf tool use::
+
+ / # perf list ampere
+
+ ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
+ <...>
+ ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
+ <...>
+
+ / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+ sleep 1
diff --git a/Documentation/admin-guide/perf/arm-ccn.rst b/Documentation/admin-guide/perf/arm-ccn.rst
new file mode 100644
index 000000000000..f62f7fe50eba
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-ccn.rst
@@ -0,0 +1,61 @@
+==========================
+ARM Cache Coherent Network
+==========================
+
+CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
+(XPs), with each crosspoint supporting up to two device ports,
+so nodes (devices) 0 and 1 are connected to crosspoint 0,
+nodes 2 and 3 to crosspoint 1 etc.
+
+PMU (perf) driver
+-----------------
+
+The CCN driver registers a perf PMU driver, which provides
+description of available events and configuration options
+in sysfs, see /sys/bus/event_source/devices/ccn*.
+
+The "format" directory describes format of the config, config1
+and config2 fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all documented
+events, that can be used with perf tool. For example "xp_valid_flit"
+is an equivalent of "type=0x8,event=0x4". Other parameters must be
+explicitly specified.
+
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
+
+Masks are defined separately from the event description
+(due to limited number of the config values) in the "cmp_mask"
+directory, with first 8 configurable by user and additional
+4 hardcoded for the most frequent use cases.
+
+Cycle counter is described by a "type" value 0xff and does
+not require any other settings.
+
+The driver also provides a "cpumask" sysfs attribute, which contains
+a single CPU ID, of the processor which will be used to handle all
+the CCN PMU events. It is recommended that the user space tools
+request the events on this processor (if not, the perf_event->cpu value
+will be overwritten anyway). In case of this processor being offlined,
+the events are migrated to another one and the attribute is updated.
+
+Example of perf tool use::
+
+ / # perf list | grep ccn
+ ccn/cycles/ [Kernel PMU event]
+ <...>
+ ccn/xp_valid_flit,xp=?,port=?,vc=?,dir=?/ [Kernel PMU event]
+ <...>
+
+ / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
+ sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst
new file mode 100644
index 000000000000..796e25b7027b
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-cmn.rst
@@ -0,0 +1,65 @@
+=============================
+Arm Coherent Mesh Network PMU
+=============================
+
+CMN-600 is a configurable mesh interconnect consisting of a rectangular
+grid of crosspoints (XPs), with each crosspoint supporting up to two
+device ports to which various AMBA CHI agents are attached.
+
+CMN implements a distributed PMU design as part of its debug and trace
+functionality. This consists of a local monitor (DTM) at every XP, which
+counts up to 4 event signals from the connected device nodes and/or the
+XP itself. Overflow from these local counters is accumulated in up to 8
+global counters implemented by the main controller (DTC), which provides
+overall PMU control and interrupts for global counter overflow.
+
+PMU events
+----------
+
+The PMU driver registers a single PMU device for the whole interconnect,
+see /sys/bus/event_source/devices/arm_cmn_0. Multi-chip systems may link
+more than one CMN together via external CCIX links - in this situation,
+each mesh counts its own events entirely independently, and additional
+PMU devices will be named arm_cmn_{1..n}.
+
+Most events are specified in a format based directly on the TRM
+definitions - "type" selects the respective node type, and "eventid" the
+event number. Some events require an additional occupancy ID, which is
+specified by "occupid".
+
+* Since RN-D nodes do not have any distinct events from RN-I nodes, they
+ are treated as the same type (0xa), and the common event templates are
+ named "rnid_*".
+
+* The cycle counter is treated as a synthetic event belonging to the DTC
+ node ("type" == 0x3, "eventid" is ignored).
+
+* XP events also encode the port and channel in the "eventid" field, to
+ match the underlying pmu_event0_id encoding for the pmu_event_sel
+ register. The event templates are named with prefixes to cover all
+ permutations.
+
+By default each event provides an aggregate count over all nodes of the
+given type. To target a specific node, "bynodeid" must be set to 1 and
+"nodeid" to the appropriate value derived from the CMN configuration
+(as defined in the "Node ID Mapping" section of the TRM).
+
+Watchpoints
+-----------
+
+The PMU can also count watchpoint events to monitor specific flit
+traffic. Watchpoints are treated as a synthetic event type, and like PMU
+events can be global or targeted with a particular XP's "nodeid" value.
+Since the watchpoint direction is otherwise implicit in the underlying
+register selection, separate events are provided for flit uploads and
+downloads.
+
+The flit match value and mask are passed in config1 and config2 ("val"
+and "mask" respectively). "wp_dev_sel", "wp_chn_sel", "wp_grp" and
+"wp_exclusive" are specified per the TRM definitions for dtm_wp_config0.
+Where a watchpoint needs to match fields from both match groups on the
+REQ or SNP channel, it can be specified as two events - one for each
+group - with the same nonzero "combine" value. The count for such a
+pair of combined events will be attributed to the primary match.
+Watchpoint events with a "combine" value of 0 are considered independent
+and will count individually.
diff --git a/Documentation/admin-guide/perf/arm-ni.rst b/Documentation/admin-guide/perf/arm-ni.rst
new file mode 100644
index 000000000000..d26a8f697c36
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-ni.rst
@@ -0,0 +1,17 @@
+====================================
+Arm Network-on Chip Interconnect PMU
+====================================
+
+NI-700 and friends implement a distinct PMU for each clock domain within the
+interconnect. Correspondingly, the driver exposes multiple PMU devices named
+arm_ni_<x>_cd_<y>, where <x> is an (arbitrary) instance identifier and <y> is
+the clock domain ID within that particular instance. If multiple NI instances
+exist within a system, the PMU devices can be correlated with the underlying
+hardware instance via sysfs parentage.
+
+Each PMU exposes base event aliases for the interface types present in its clock
+domain. These require qualifying with the "eventid" and "nodeid" parameters
+to specify the event code to count and the interface at which to count it
+(per the configured hardware ID as reflected in the xxNI_NODE_INFO register).
+The exception is the "cycles" alias for the PMU cycle counter, which is encoded
+with the PMU node type and needs no further qualification.
diff --git a/Documentation/admin-guide/perf/arm_dsu_pmu.rst b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
new file mode 100644
index 000000000000..7fd34db75d13
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm_dsu_pmu.rst
@@ -0,0 +1,29 @@
+==================================
+ARM DynamIQ Shared Unit (DSU) PMU
+==================================
+
+ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
+control logic and external interfaces to form a multicore cluster. The PMU
+allows counting the various events related to the L3 cache, Snoop Control Unit
+etc, using 32bit independent counters. It also provides a 64bit cycle counter.
+
+The PMU can only be accessed via CPU system registers and are common to the
+cores connected to the same DSU. Like most of the other uncore PMUs, DSU
+PMU doesn't support process specific events and cannot be used in sampling mode.
+
+The DSU provides a bitmap for a subset of implemented events via hardware
+registers. There is no way for the driver to determine if the other events
+are available or not. Hence the driver exposes only those events advertised
+by the DSU, in "events" directory under::
+
+ /sys/bus/event_sources/devices/arm_dsu_<N>/
+
+The user should refer to the TRM of the product to figure out the supported events
+and use the raw event code for the unlisted events.
+
+The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
+
+
+e.g usage::
+
+ perf stat -a -e arm_dsu_0/cycles/
diff --git a/Documentation/admin-guide/perf/cxl.rst b/Documentation/admin-guide/perf/cxl.rst
new file mode 100644
index 000000000000..9233ea0d0b10
--- /dev/null
+++ b/Documentation/admin-guide/perf/cxl.rst
@@ -0,0 +1,68 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+CXL Performance Monitoring Unit (CPMU)
+======================================
+
+The CXL rev 3.0 specification provides a definition of CXL Performance
+Monitoring Unit in section 13.2: Performance Monitoring.
+
+CXL components (e.g. Root Port, Switch Upstream Port, End Point) may have
+any number of CPMU instances. CPMU capabilities are fully discoverable from
+the devices. The specification provides event definitions for all CXL protocol
+message types and a set of additional events for things commonly counted on
+CXL devices (e.g. DRAM events).
+
+CPMU driver
+===========
+
+The CPMU driver registers a perf PMU with the name pmu_mem<X>.<Y> on the CXL bus
+representing the Yth CPMU for memX.
+
+ /sys/bus/cxl/device/pmu_mem<X>.<Y>
+
+The associated PMU is registered as
+
+ /sys/bus/event_sources/devices/cxl_pmu_mem<X>.<Y>
+
+In common with other CXL bus devices, the id has no specific meaning and the
+relationship to specific CXL device should be established via the device parent
+of the device on the CXL bus.
+
+PMU driver provides description of available events and filter options in sysfs.
+
+The "format" directory describes all formats of the config (event vendor id,
+group id and mask) config1 (threshold, filter enables) and config2 (filter
+parameters) fields of the perf_event_attr structure. The "events" directory
+describes all documented events show in perf list.
+
+The events shown in perf list are the most fine grained events with a single
+bit of the event mask set. More general events may be enable by setting
+multiple mask bits in config. For example, all Device to Host Read Requests
+may be captured on a single counter by setting the bits for all of
+
+* d2h_req_rdcurr
+* d2h_req_rdown
+* d2h_req_rdshared
+* d2h_req_rdany
+* d2h_req_rdownnodata
+
+Example of usage::
+
+ $#perf list
+ cxl_pmu_mem0.0/clock_ticks/ [Kernel PMU event]
+ cxl_pmu_mem0.0/d2h_req_rdshared/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpcur/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpdata/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpinv/ [Kernel PMU event]
+ -----------------------------------------------------------
+
+ $# perf stat -a -e cxl_pmu_mem0.0/clock_ticks/ -e cxl_pmu_mem0.0/d2h_req_rdshared/
+
+Vendor specific events may also be available and if so can be used via
+
+ $# perf stat -a -e cxl_pmu_mem0.0/vid=VID,gid=GID,mask=MASK/
+
+The driver does not support sampling so "perf record" is unsupported.
+It only supports system-wide counting so attaching to a task is
+unsupported.
diff --git a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
new file mode 100644
index 000000000000..167f9281fbf5
--- /dev/null
+++ b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
@@ -0,0 +1,94 @@
+======================================================================
+Synopsys DesignWare Cores (DWC) PCIe Performance Monitoring Unit (PMU)
+======================================================================
+
+DesignWare Cores (DWC) PCIe PMU
+===============================
+
+The PMU is a PCIe configuration space register block provided by each PCIe Root
+Port in a Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
+injection, and Statistics).
+
+As the name indicates, the RAS DES capability supports system level
+debugging, AER error injection, and collection of statistics. To facilitate
+collection of statistics, Synopsys DesignWare Cores PCIe controller
+provides the following two features:
+
+- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
+ time spent in each low-power LTSSM state) and
+- one 32-bit counter per event for Event Counting (error and non-error
+ events for a specified lane)
+
+Note: There is no interrupt for counter overflow.
+
+Time Based Analysis
+-------------------
+
+Using this feature you can obtain information regarding RX/TX data
+throughput and time spent in each low-power LTSSM state by the controller.
+The PMU measures data in two categories:
+
+- Group#0: Percentage of time the controller stays in LTSSM states.
+- Group#1: Amount of data processed (Units of 16 bytes).
+
+Lane Event counters
+-------------------
+
+Using this feature you can obtain Error and Non-Error information in
+specific lane by the controller. The PMU event is selected by all of:
+
+- Group i
+- Event j within the Group i
+- Lane k
+
+Some of the events only exist for specific configurations.
+
+DesignWare Cores (DWC) PCIe PMU Driver
+=======================================
+
+This driver adds PMU devices for each PCIe Root Port named based on the SBDF of
+the Root Port. For example,
+
+ 0001:30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
+
+the PMU device name for this Root Port is dwc_rootport_13018.
+
+The DWC PCIe PMU driver registers a perf PMU driver, which provides
+description of available events and configuration options in sysfs, see
+/sys/bus/event_source/devices/dwc_rootport_{sbdf}.
+
+The "format" directory describes format of the config fields of the
+perf_event_attr structure. The "events" directory provides configuration
+templates for all documented events. For example,
+"rx_pcie_tlp_data_payload" is an equivalent of "eventid=0x21,type=0x0".
+
+The "perf list" command shall list the available events from sysfs, e.g.::
+
+ $# perf list | grep dwc_rootport
+ <...>
+ dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
+ <...>
+ dwc_rootport_13018/rx_memory_read,lane=?/ [Kernel PMU event]
+
+Time Based Analysis Event Usage
+-------------------------------
+
+Example usage of counting PCIe RX TLP data payload (Units of bytes)::
+
+ $# perf stat -a -e dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/
+
+The average RX/TX bandwidth can be calculated using the following formula:
+
+ PCIe RX Bandwidth = rx_pcie_tlp_data_payload / Measure_Time_Window
+ PCIe TX Bandwidth = tx_pcie_tlp_data_payload / Measure_Time_Window
+
+Lane Event Usage
+-------------------------------
+
+Each lane has the same event set and to avoid generating a list of hundreds
+of events, the user need to specify the lane ID explicitly, e.g.::
+
+ $# perf stat -a -e dwc_rootport_13018/rx_memory_read,lane=4/
+
+The driver does not support sampling, therefore "perf record" will not
+work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
new file mode 100644
index 000000000000..2ec0249e37b6
--- /dev/null
+++ b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================================================
+Fujitsu Uncore Performance Monitoring Unit (PMU)
+================================================
+
+This driver supports the Uncore MAC PMUs and the Uncore PCI PMUs found
+in Fujitsu chips.
+Each MAC PMU on these chips is exposed as a uncore perf PMU with device name
+mac_iod<iod>_mac<mac>_ch<ch>.
+And each PCI PMU on these chips is exposed as a uncore perf PMU with device name
+pci_iod<iod>_pci<pci>.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/bus/event_sources/devices/mac_iod<iod>_mac<mac>_ch<ch>/
+and /sys/bus/event_sources/devices/pci_iod<iod>_pci<pci>/.
+This driver exports:
+
+- formats, used by perf user space and other tools to configure events
+- events, used by perf user space and other tools to create events
+ symbolically, e.g.::
+
+ perf stat -a -e mac_iod0_mac0_ch0/event=0x21/ ls
+ perf stat -a -e pci_iod0_pci0/event=0x24/ ls
+
+- cpumask, used by perf user space and other tools to know on which CPUs
+ to open the events
+
+This driver supports the following events for MAC:
+
+- cycles
+ This event counts MAC cycles at MAC frequency.
+- read-count
+ This event counts the number of read requests to MAC.
+- read-count-request
+ This event counts the number of read requests including retry to MAC.
+- read-count-return
+ This event counts the number of responses to read requests to MAC.
+- read-count-request-pftgt
+ This event counts the number of read requests including retry with PFTGT
+ flag.
+- read-count-request-normal
+ This event counts the number of read requests including retry without PFTGT
+ flag.
+- read-count-return-pftgt-hit
+ This event counts the number of responses to read requests which hit the
+ PFTGT buffer.
+- read-count-return-pftgt-miss
+ This event counts the number of responses to read requests which miss the
+ PFTGT buffer.
+- read-wait
+ This event counts outstanding read requests issued by DDR memory controller
+ per cycle.
+- write-count
+ This event counts the number of write requests to MAC (including zero write,
+ full write, partial write, write cancel).
+- write-count-write
+ This event counts the number of full write requests to MAC (not including
+ zero write).
+- write-count-pwrite
+ This event counts the number of partial write requests to MAC.
+- memory-read-count
+ This event counts the number of read requests from MAC to memory.
+- memory-write-count
+ This event counts the number of full write requests from MAC to memory.
+- memory-pwrite-count
+ This event counts the number of partial write requests from MAC to memory.
+- ea-mac
+ This event counts energy consumption of MAC.
+- ea-memory
+ This event counts energy consumption of memory.
+- ea-memory-mac-write
+ This event counts the number of write requests from MAC to memory.
+- ea-ha
+ This event counts energy consumption of HA.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e mac_iod0_mac0_ch0/ea-mac/ ls
+
+And, this driver supports the following events for PCI:
+
+- pci-port0-cycles
+ This event counts PCI cycles at PCI frequency in port0.
+- pci-port0-read-count
+ This event counts read transactions for data transfer in port0.
+- pci-port0-read-count-bus
+ This event counts read transactions for bus usage in port0.
+- pci-port0-write-count
+ This event counts write transactions for data transfer in port0.
+- pci-port0-write-count-bus
+ This event counts write transactions for bus usage in port0.
+- pci-port1-cycles
+ This event counts PCI cycles at PCI frequency in port1.
+- pci-port1-read-count
+ This event counts read transactions for data transfer in port1.
+- pci-port1-read-count-bus
+ This event counts read transactions for bus usage in port1.
+- pci-port1-write-count
+ This event counts write transactions for data transfer in port1.
+- pci-port1-write-count-bus
+ This event counts write transactions for bus usage in port1.
+- ea-pci
+ This event counts energy consumption of PCI.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e pci_iod0_pci0/ea-pci/ ls
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
new file mode 100644
index 000000000000..083ca50de896
--- /dev/null
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -0,0 +1,148 @@
+================================================
+HiSilicon PCIe Performance Monitoring Unit (PMU)
+================================================
+
+On Hip09, HiSilicon PCIe Performance Monitoring Unit (PMU) could monitor
+bandwidth, latency, bus utilization and buffer occupancy data of PCIe.
+
+Each PCIe Core has a PMU to monitor multi Root Ports of this PCIe Core and
+all Endpoints downstream these Root Ports.
+
+
+HiSilicon PCIe PMU driver
+=========================
+
+The PCIe PMU driver registers a perf PMU with the name of its sicl-id and PCIe
+Core id.::
+
+ /sys/bus/event_source/hisi_pcie<sicl>_core<core>
+
+PMU driver provides description of available events and filter options in sysfs,
+see /sys/bus/event_source/devices/hisi_pcie<sicl>_core<core>.
+
+The "format" directory describes all formats of the config (events) and config1
+(filter options) fields of the perf_event_attr structure. The "events" directory
+describes all documented events shown in perf list.
+
+The "identifier" sysfs file allows users to identify the version of the
+PMU hardware device.
+
+The "bus" sysfs file allows users to get the bus number of Root Ports
+monitored by PMU. Furthermore users can get the Root Ports range in
+[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes
+respectively.
+
+Example usage of perf::
+
+ $# perf list
+ hisi_pcie0_core0/rx_mwr_latency/ [kernel PMU event]
+ hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event]
+ ------------------------------------------
+
+ $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0xffff/
+ $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/
+
+The related events usually used to calculate the bandwidth, latency or others.
+They need to start and end counting at the same time, therefore related events
+are best used in the same event group to get the expected value. There are two
+ways to know if they are related events:
+
+a) By event name, such as the latency events "xxx_latency, xxx_cnt" or
+ bandwidth events "xxx_flux, xxx_time".
+b) By event type, such as "event=0xXXXX, event=0x1XXXX".
+
+Example usage of perf group::
+
+ $# perf stat -e "{hisi_pcie0_core0/rx_mwr_latency,port=0xffff/,hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/}"
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported for PCIe PMU.
+
+Filter options
+--------------
+
+1. Target filter
+
+ PMU could only monitor the performance of traffic downstream target Root
+ Ports or downstream target Endpoint. PCIe PMU driver support "port" and
+ "bdf" interfaces for users.
+ Please notice that, one of these two interfaces must be set, and these two
+ interfaces aren't supported at the same time. If they are both set, only
+ "port" filter is valid.
+ If "port" filter not being set or is set explicitly to zero (default), the
+ "bdf" filter will be in effect, because "bdf=0" meaning 0000:000:00.0.
+
+ - port
+
+ "port" filter can be used in all PCIe PMU events, target Root Port can be
+ selected by configuring the 16-bits-bitmap "port". Multi ports can be
+ selected for AP-layer-events, and only one port can be selected for
+ TL/DL-layer-events.
+
+ For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of
+ bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4
+ lanes), bit8 is set, port=0x100; if these two Root Ports are both
+ monitored, port=0x101.
+
+ Example usage of perf::
+
+ $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
+
+ - bdf
+
+ "bdf" filter can only be used in bandwidth events, target Endpoint is
+ selected by configuring BDF to "bdf". Counter only counts the bandwidth of
+ message requested by target Endpoint.
+
+ For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
+
+ Example usage of perf::
+
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
+
+2. Trigger filter
+
+ Event statistics start when the first time TLP length is greater/smaller
+ than trigger condition. You can set the trigger condition by writing
+ "trig_len", and set the trigger mode by writing "trig_mode". This filter can
+ only be used in bandwidth events.
+
+ For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
+ means statistics start when TLP length > trigger condition, "trig_mode=1"
+ means start when TLP length < condition.
+
+ Example usage of perf::
+
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,trig_len=0x4,trig_mode=1/ sleep 5
+
+3. Threshold filter
+
+ Counter counts when TLP length within the specified range. You can set the
+ threshold by writing "thr_len", and set the threshold mode by writing
+ "thr_mode". This filter can only be used in bandwidth events.
+
+ For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
+ counter counts when TLP length >= threshold, and "thr_mode=1" means counts
+ when TLP length < threshold.
+
+ Example usage of perf::
+
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,thr_len=0x4,thr_mode=1/ sleep 5
+
+4. TLP Length filter
+
+ When counting bandwidth, the data can be composed of certain parts of TLP
+ packets. You can specify it through "len_mode":
+
+ - 2'b00: Reserved (Do not use this since the behaviour is undefined)
+ - 2'b01: Bandwidth of TLP payloads
+ - 2'b10: Bandwidth of TLP headers
+ - 2'b11: Bandwidth of both TLP payloads and headers
+
+ For example, "len_mode=2" means only counting the bandwidth of TLP headers
+ and "len_mode=3" means the final bandwidth data is composed of both TLP
+ headers and payloads. Default value if not specified is 2'b11.
+
+ Example usage of perf::
+
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,len_mode=0x1/ sleep 5
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
new file mode 100644
index 000000000000..d56b2d690709
--- /dev/null
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -0,0 +1,177 @@
+======================================================
+HiSilicon SoC uncore Performance Monitoring Unit (PMU)
+======================================================
+
+The HiSilicon SoC chip includes various independent system device PMUs
+such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
+independent and have hardware logic to gather statistics and performance
+information.
+
+The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
+(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
+called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
+two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
+
+HiSilicon SoC uncore PMU driver
+-------------------------------
+
+Each device PMU has separate registers for event counting, control and
+interrupt, and the PMU driver shall register perf PMU drivers like L3C,
+HHA and DDRC etc. The available events and configuration options shall
+be described in the sysfs, see::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>
+
+The "perf list" command shall list the available events from sysfs.
+
+Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
+name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
+where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
+module.
+
+e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
+SCCL ID #3.
+
+e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
+SCCL ID #1.
+
+The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
+ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is
+also provided to show the CPUs associated with this PMU. The "cpumask" indicates
+the CPUs to open the events, usually as a hint for userspaces tools like perf.
+It only contains one associated CPU from the "associated_cpus".
+
+Example usage of perf::
+
+ $# perf list
+ hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+ hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+ ------------------------------------------
+
+ $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+ $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+
+For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same
+as PMU v1, but some new functions are added to the hardware.
+
+1. L3C PMU supports filtering by core/thread within the cluster which can be
+specified as a bitmap::
+
+ $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5
+
+This will only count the operations from core/thread 0 and 1 in this cluster.
+
+User should not use tt_core_deprecated to specify the core/thread filtering.
+This option is provided for backward compatiblility and only support 8bit
+which may not cover all the core/thread sharing L3C.
+
+2. Tracetag allow the user to chose to count only read, write or atomic
+operations via the tt_req parameeter in perf. The default value counts all
+operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101
+represents write operations, 3'b110 represents atomic store operations and
+3'b111 represents atomic non-store operations, other values are reserved::
+
+ $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5
+
+This will only count the read operations in this cluster.
+
+3. Datasrc allows the user to check where the data comes from. It is 5 bits.
+Some important codes are as follows:
+
+- 5'b00001: comes from L3C in this die;
+- 5'b01000: comes from L3C in the cross-die;
+- 5'b01001: comes from L3C which is in another socket;
+- 5'b01110: comes from the local DDR;
+- 5'b01111: comes from the cross-die DDR;
+- 5'b10000: comes from cross-socket DDR;
+
+etc, it is mainly helpful to find that the data source is nearest from the CPU
+cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be
+configured in perf command::
+
+ $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/,
+ hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5
+
+4. Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
+contains several Compute Clusters (CCLs). The I/O dies are called Super I/O
+clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the
+SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit
+CCL/ICL-ID. For I/O die, the ICL-ID is followed by:
+
+- 5'b00000: I/O_MGMT_ICL;
+- 5'b00001: Network_ICL;
+- 5'b00011: HAC_ICL;
+- 5'b10000: PCIe_ICL;
+
+5. uring_channel: UC PMU events 0x47~0x59 supports filtering by tx request
+uring channel. It is 2 bits. Some important codes are as follows:
+
+- 2'b11: count the events which sent to the uring_ext (MATA) channel;
+- 2'b01: is the same as 2'b11;
+- 2'b10: count the events which sent to the uring (non-MATA) channel;
+- 2'b00: default value, count the events which sent to both uring and
+ uring_ext channels;
+
+6. ch: NoC PMU supports filtering the event counts of certain transaction
+channel with this option. The current supported channels are as follows:
+
+- 3'b010: Request channel
+- 3'b100: Snoop channel
+- 3'b110: Response channel
+- 3'b111: Data channel
+
+7. tt_en: NoC PMU supports counting only transactions that have tracetag set
+if this option is set. See the 2nd list for more information about tracetag.
+
+For HiSilicon uncore PMU v3 whose identifier is 0x40, some uncore PMUs are
+further divided into parts for finer granularity of tracing, each part has its
+own dedicated PMU, and all such PMUs together cover the monitoring job of events
+on particular uncore device. Such PMUs are described in sysfs with name format
+slightly changed::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}_{Z}/ddrc{Y}_{Z}/noc{Y}_{Z}>
+
+Z is the sub-id, indicating different PMUs for part of hardware device.
+
+Usage of most PMUs with different sub-ids are identical. Specially, L3C PMU
+provides ``ext`` option to allow exploration of even finer granual statistics
+of L3C PMU. L3C PMU driver uses that as hint of termination when delivering
+perf command to hardware:
+
+- ext=0: Default, could be used with event names.
+- ext=1 and ext=2: Must be used with event codes, event names are not supported.
+
+An example of perf command could be::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/rd_spipe/ sleep 5
+
+or::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/event=0x1,ext=1/ sleep 5
+
+As above, ``hisi_sccl0_l3c1_0`` locates PMU of Super CPU CLuster 0, L3 cache 1
+pipe0.
+
+First command locates the first part of L3C since ``ext=0`` is implied by
+default. Second command issues the counting on another part of L3C with the
+event ``0x1``.
+
+Users could configure IDs to count data come from specific CCL/ICL, by setting
+srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
+tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not
+check the bit when matching against the srcid_cmd/tgtid_cmd.
+
+If all of these options are disabled, it can works by the default value that
+doesn't distinguish the filter condition and ID information and will return
+the total counter values in the PMU counters.
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported as the events are all uncore.
+
+Note: Please contact the maintainer for a complete list of events supported for
+the PMU devices in the SoC and its information if needed.
diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst
new file mode 100644
index 000000000000..1195e570f2d6
--- /dev/null
+++ b/Documentation/admin-guide/perf/hns3-pmu.rst
@@ -0,0 +1,136 @@
+======================================
+HNS3 Performance Monitoring Unit (PMU)
+======================================
+
+HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an
+End Point device to collect performance statistics of HiSilicon SoC NIC.
+On Hip09, each SICL(Super I/O cluster) has one PMU device.
+
+HNS3 PMU supports collection of performance statistics such as bandwidth,
+latency, packet rate and interrupt rate.
+
+Each HNS3 PMU supports 8 hardware events.
+
+HNS3 PMU driver
+===============
+
+The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
+
+ /sys/bus/event_source/devices/hns3_pmu_sicl_<sicl_id>
+
+PMU driver provides description of available events, filter modes, format,
+identifier and cpumask in sysfs.
+
+The "events" directory describes the event code of all supported events
+shown in perf list.
+
+The "filtermode" directory describes the supported filter modes of each
+event.
+
+The "format" directory describes all formats of the config (events) and
+config1 (filter options) fields of the perf_event_attr structure.
+
+The "identifier" file shows version of PMU hardware device.
+
+The "bdf_min" and "bdf_max" files show the supported bdf range of each
+pmu device.
+
+The "hw_clk_freq" file shows the hardware clock frequency of each pmu
+device.
+
+Example usage of checking event code and subevent code::
+
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
+ config=0x00204
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
+ config=0x10204
+
+Each performance statistic has a pair of events to get two values to
+calculate real performance data in userspace.
+
+The bits 0~15 of config (here 0x0204) are the true hardware event code. If
+two events have same value of bits 0~15 of config, that means they are
+event pair. And the bit 16 of config indicates getting counter 0 or
+counter 1 of hardware event.
+
+After getting two values of event pair in userspace, the formula of
+computation to calculate real performance data is:::
+
+ counter 0 / counter 1
+
+Example usage of checking supported filter mode::
+
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
+ filter mode supported: global/port/port-tc/func/func-queue/
+
+Example usage of perf::
+
+ $# perf list
+ hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event]
+ hns3_pmu_sicl_0/bw_ssu_rpu_time/ [kernel PMU event]
+ ------------------------------------------
+
+ $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000
+ or
+ $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000
+
+
+Filter modes
+--------------
+
+1. global mode
+PMU collect performance statistics for all HNS3 PCIe functions of IO DIE.
+Set the "global" filter option to 1 will enable this mode.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000
+
+2. port mode
+PMU collect performance statistic of one whole physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0xF in this mode,
+here tc stands for traffic class.
+
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000
+
+3. port-tc mode
+PMU collect performance statistic of one tc of physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this
+mode.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000
+
+4. func mode
+PMU collect performance statistic of one PF/VF. The function id is BDF of
+PF/VF, its conversion formula::
+
+ func = (bus << 8) + (device << 3) + (function)
+
+for example:
+ BDF func
+ 35:00.0 0x3500
+ 35:00.1 0x3501
+ 35:01.0 0x3508
+
+In this mode, the "queue" filter option must be set to 0xFFFF.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000
+
+5. func-queue mode
+PMU collect performance statistic of one queue of PF/VF. The function id
+is BDF of PF/VF, the "queue" filter option must be set to the exact queue
+id of function.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000
+
+6. func-intr mode
+PMU collect performance statistic of one interrupt of PF/VF. The function
+id is BDF of PF/VF, the "intr" filter option must be set to the exact
+interrupt id of function.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
new file mode 100644
index 000000000000..77418ae5a290
--- /dev/null
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -0,0 +1,100 @@
+=====================================================
+Freescale i.MX8 DDR Performance Monitoring Unit (PMU)
+=====================================================
+
+There are no performance counters inside the DRAM controller, so performance
+signals are brought out to the edge of the controller where a set of 4 x 32 bit
+counters is implemented. This is controlled by the CSV modes programmed in counter
+control register which causes a large number of PERF signals to be generated.
+
+Selection of the value for each counter is done via the config registers. There
+is one register for each counter. Counter 0 is special in that it always counts
+“time” and when expired causes a lock on itself and the other counters and an
+interrupt is raised. If any other counter overflows, it continues counting, and
+no interrupt is raised.
+
+The "format" directory describes format of the config (event ID) and config1/2
+(AXI filter setting) fields of the perf_event_attr structure, see /sys/bus/event_source/
+devices/imx8_ddr0/format/. The "events" directory describes the events types
+hardware supported that can be used with perf tool, see /sys/bus/event_source/
+devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
+in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
+
+ .. code-block:: bash
+
+ perf stat -a -e imx8_ddr0/cycles/ cmd
+ perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd
+
+AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
+to count reading or writing matches filter setting. Filter setting is various
+from different DRAM controller implementations, which is distinguished by quirks
+in the driver. You also can dump info from userspace, "caps" directory show the
+type of AXI filter (filter, enhanced_filter and super_filter). Value 0 for
+un-supported, and value 1 for supported.
+
+* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0, super_filter: 0).
+ Filter is defined with two configuration parts:
+ --AXI_ID defines AxID matching value.
+ --AXI_MASKING defines which bits of AxID are meaningful for the matching.
+
+ - 0: corresponding bit is masked.
+ - 1: corresponding bit is not masked, i.e. used to do the matching.
+
+ AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
+ When non-masked bits are matching corresponding AXI_ID bits then counter is
+ incremented. Perf counter is incremented if::
+
+ AxID && AXI_MASKING == AXI_ID && AXI_MASKING
+
+ This filter doesn't support filter different AXI ID for axid-read and axid-write
+ event at the same time as this filter is shared between counters.
+
+ .. code-block:: bash
+
+ perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
+ perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd
+
+ .. note::
+
+ axi_mask is inverted in userspace(i.e. set bits are bits to mask), and
+ it will be reverted in driver automatically. so that the user can just specify
+ axi_id to monitor a specific id, rather than having to specify axi_mask.
+
+ .. code-block:: bash
+
+ perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
+
+* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1, super_filter: 0).
+ This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
+ counting the number of bytes (as opposed to the number of bursts) from DDR
+ read and write transactions concurrently with another set of data counters.
+
+* With DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER quirk(filter: 0, enhanced_filter: 0, super_filter: 1).
+ There is a limitation in previous AXI filter, it cannot filter different IDs
+ at the same time as the filter is shared between counters. This quirk is the
+ extension of AXI ID filter. One improvement is that counter 1-3 has their own
+ filter, means that it supports concurrently filter various IDs. Another
+ improvement is that counter 1-3 supports AXI PORT and CHANNEL selection. Support
+ selecting address channel or data channel.
+
+ Filter is defined with 2 configuration registers per counter 1-3.
+ --Counter N MASK COMP register - including AXI_ID and AXI_MASKING.
+ --Counter N MUX CNTL register - including AXI CHANNEL and AXI PORT.
+
+ - 0: address channel
+ - 1: data channel
+
+ PMU in DDR subsystem, only one single port0 exists, so axi_port is reserved
+ which should be 0.
+
+ .. code-block:: bash
+
+ perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
+ perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
+
+ .. note::
+
+ axi_channel is inverted in userspace, and it will be reverted in driver
+ automatically. So that users do not need specify axi_channel if want to
+ monitor data channel from DDR transactions, since data channel is more
+ meaningful.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
new file mode 100644
index 000000000000..47d9a3df6329
--- /dev/null
+++ b/Documentation/admin-guide/perf/index.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Performance monitor support
+===========================
+
+.. toctree::
+ :maxdepth: 1
+
+ hisi-pmu
+ hisi-pcie-pmu
+ hns3-pmu
+ imx-ddr
+ qcom_l2_pmu
+ qcom_l3_pmu
+ starfive_starlink_pmu
+ mrvl-odyssey-ddr-pmu
+ mrvl-odyssey-tad-pmu
+ arm-ccn
+ arm-cmn
+ arm-ni
+ xgene-pmu
+ arm_dsu_pmu
+ thunderx2-pmu
+ alibaba_pmu
+ dwc_pcie_pmu
+ nvidia-pmu
+ meson-ddr-pmu
+ cxl
+ ampere_cspmu
+ mrvl-pem-pmu
+ fujitsu_uncore_pmu
diff --git a/Documentation/admin-guide/perf/meson-ddr-pmu.rst b/Documentation/admin-guide/perf/meson-ddr-pmu.rst
new file mode 100644
index 000000000000..8e71be1d6346
--- /dev/null
+++ b/Documentation/admin-guide/perf/meson-ddr-pmu.rst
@@ -0,0 +1,70 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================================
+Amlogic SoC DDR Bandwidth Performance Monitoring Unit (PMU)
+===========================================================
+
+The Amlogic Meson G12 SoC contains a bandwidth monitor inside DRAM controller.
+The monitor includes 4 channels. Each channel can count the request accessing
+DRAM. The channel can count up to 3 AXI port simultaneously. It can be helpful
+to show if the performance bottleneck is on DDR bandwidth.
+
+Currently, this driver supports the following 5 perf events:
+
++ meson_ddr_bw/total_rw_bytes/
++ meson_ddr_bw/chan_1_rw_bytes/
++ meson_ddr_bw/chan_2_rw_bytes/
++ meson_ddr_bw/chan_3_rw_bytes/
++ meson_ddr_bw/chan_4_rw_bytes/
+
+meson_ddr_bw/chan_{1,2,3,4}_rw_bytes/ events are channel-specific events.
+Each channel support filtering, which can let the channel to monitor
+individual IP module in SoC.
+
+Below are DDR access request event filter keywords:
+
++ arm - from CPU
++ vpu_read1 - from OSD + VPP read
++ gpu - from 3D GPU
++ pcie - from PCIe controller
++ hdcp - from HDCP controller
++ hevc_front - from HEVC codec front end
++ usb3_0 - from USB3.0 controller
++ hevc_back - from HEVC codec back end
++ h265enc - from HEVC encoder
++ vpu_read2 - from DI read
++ vpu_write1 - from VDIN write
++ vpu_write2 - from di write
++ vdec - from legacy codec video decoder
++ hcodec - from H264 encoder
++ ge2d - from ge2d
++ spicc1 - from SPI controller 1
++ usb0 - from USB2.0 controller 0
++ dma - from system DMA controller 1
++ arb0 - from arb0
++ sd_emmc_b - from SD eMMC b controller
++ usb1 - from USB2.0 controller 1
++ audio - from Audio module
++ sd_emmc_c - from SD eMMC c controller
++ spicc2 - from SPI controller 2
++ ethernet - from Ethernet controller
+
+
+Examples:
+
+ + Show the total DDR bandwidth per seconds:
+
+ .. code-block:: bash
+
+ perf stat -a -e meson_ddr_bw/total_rw_bytes/ -I 1000 sleep 10
+
+
+ + Show individual DDR bandwidth from CPU and GPU respectively, as well as
+ sum of them:
+
+ .. code-block:: bash
+
+ perf stat -a -e meson_ddr_bw/chan_1_rw_bytes,arm=1/ -I 1000 sleep 10
+ perf stat -a -e meson_ddr_bw/chan_2_rw_bytes,gpu=1/ -I 1000 sleep 10
+ perf stat -a -e meson_ddr_bw/chan_3_rw_bytes,arm=1,gpu=1/ -I 1000 sleep 10
+
diff --git a/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst b/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
new file mode 100644
index 000000000000..2e817593a4d9
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
@@ -0,0 +1,80 @@
+===================================================================
+Marvell Odyssey DDR PMU Performance Monitoring Unit (PMU UNCORE)
+===================================================================
+
+Odyssey DRAM Subsystem supports eight counters for monitoring performance
+and software can program those counters to monitor any of the defined
+performance events. Supported performance events include those counted
+at the interface between the DDR controller and the PHY, interface between
+the DDR Controller and the CHI interconnect, or within the DDR Controller.
+
+Additionally DSS also supports two fixed performance event counters, one
+for ddr reads and the other for ddr writes.
+
+The counter will be operating in either manual or auto mode.
+
+The PMU driver exposes the available events and format options under sysfs::
+
+ /sys/bus/event_source/devices/mrvl_ddr_pmu_<>/events/
+ /sys/bus/event_source/devices/mrvl_ddr_pmu_<>/format/
+
+Examples::
+
+ $ perf list | grep ddr
+ mrvl_ddr_pmu_<>/ddr_act_bypass_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_bsm_alloc/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_bsm_starvation/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_active_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_mwr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_rd_active_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_rd_or_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_read/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_write/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_capar_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_crit_ref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_ddr_reads/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_ddr_writes/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_cmd_is_retry/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_cycles/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_parity_poison/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_rd_data_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_wr_data_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dqsosc_mpc/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dqsosc_mrr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_mpsm/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_powerdown/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_selfref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_pri_rdaccess/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rd_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rd_or_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rmw_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hpri_sched_rd_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_load_mode/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_lpri_sched_rd_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge_for_other/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge_for_rdwr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_raw_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_bypass_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_crc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_uc_ecc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rdwr_transitions/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_refresh/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_retry_fifo_full/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_spec_ref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_tcr_mrr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_war_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_waw_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_win_limit_reached_rd/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_win_limit_reached_wr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_wr_crc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_wr_trxn_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_write_combine/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqcl/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqlatch/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqstart/ [Kernel PMU event]
+
+ $ perf stat -e ddr_cam_read,ddr_cam_write,ddr_cam_active_access,ddr_cam
+ rd_or_wr_access,ddr_cam_rd_active_access,ddr_cam_mwr <workload>
diff --git a/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst b/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
new file mode 100644
index 000000000000..ad1975b14087
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
@@ -0,0 +1,37 @@
+====================================================================
+Marvell Odyssey LLC-TAD Performance Monitoring Unit (PMU UNCORE)
+====================================================================
+
+Each TAD provides eight 64-bit counters for monitoring
+cache behavior.The driver always configures the same counter for
+all the TADs. The user would end up effectively reserving one of
+eight counters in every TAD to look across all TADs.
+The occurrences of events are aggregated and presented to the user
+at the end of running the workload. The driver does not provide a
+way for the user to partition TADs so that different TADs are used for
+different applications.
+
+The performance events reflect various internal or interface activities.
+By combining the values from multiple performance counters, cache
+performance can be measured in terms such as: cache miss rate, cache
+allocations, interface retry rate, internal resource occupancy, etc.
+
+The PMU driver exposes the available events and format options under sysfs::
+
+ /sys/bus/event_source/devices/tad/events/
+ /sys/bus/event_source/devices/tad/format/
+
+Examples::
+
+ $ perf list | grep tad
+ tad/tad_alloc_any/ [Kernel PMU event]
+ tad/tad_alloc_dtg/ [Kernel PMU event]
+ tad/tad_alloc_ltg/ [Kernel PMU event]
+ tad/tad_hit_any/ [Kernel PMU event]
+ tad/tad_hit_dtg/ [Kernel PMU event]
+ tad/tad_hit_ltg/ [Kernel PMU event]
+ tad/tad_req_msh_in_exlmn/ [Kernel PMU event]
+ tad/tad_tag_rd/ [Kernel PMU event]
+ tad/tad_tot_cycle/ [Kernel PMU event]
+
+ $ perf stat -e tad_alloc_dtg,tad_alloc_ltg,tad_alloc_any,tad_hit_dtg,tad_hit_ltg,tad_hit_any,tad_tag_rd <workload>
diff --git a/Documentation/admin-guide/perf/mrvl-pem-pmu.rst b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst
new file mode 100644
index 000000000000..c39007149b97
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst
@@ -0,0 +1,56 @@
+=================================================================
+Marvell Odyssey PEM Performance Monitoring Unit (PMU UNCORE)
+=================================================================
+
+The PCI Express Interface Units(PEM) are associated with a corresponding
+monitoring unit. This includes performance counters to track various
+characteristics of the data that is transmitted over the PCIe link.
+
+The counters track inbound and outbound transactions which
+includes separate counters for posted/non-posted/completion TLPs.
+Also, inbound and outbound memory read requests along with their
+latencies can also be monitored. Address Translation Services(ATS)events
+such as ATS Translation, ATS Page Request, ATS Invalidation along with
+their corresponding latencies are also tracked.
+
+There are separate 64 bit counters to measure posted/non-posted/completion
+tlps in inbound and outbound transactions. ATS events are measured by
+different counters.
+
+The PMU driver exposes the available events and format options under sysfs,
+/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/events/
+/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/format/
+
+Examples::
+
+ # perf list | grep mrvl_pcie_rc_pmu
+ mrvl_pcie_rc_pmu_<>/ats_inv/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_inv_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_pri/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_pri_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_trans/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_trans_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_inflight/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_reads/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ebus/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ncb/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_npr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_pr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_npr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_pr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_inflight_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_pr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_reads_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_pr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_pr_partid/ [Kernel PMU event]
+
+
+ # perf stat -e ib_inflight,ib_reads,ib_req_no_ro_ebus,ib_req_no_ro_ncb <workload>
diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst
new file mode 100644
index 000000000000..f538ef67e0e8
--- /dev/null
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@@ -0,0 +1,333 @@
+=========================================================
+NVIDIA Tegra SoC Uncore Performance Monitoring Unit (PMU)
+=========================================================
+
+The NVIDIA Tegra SoC includes various system PMUs to measure key performance
+metrics like memory bandwidth, latency, and utilization:
+
+* Scalable Coherency Fabric (SCF)
+* NVLink-C2C0
+* NVLink-C2C1
+* CNVLink
+* PCIE
+
+PMU Driver
+----------
+
+The PMUs in this document are based on ARM CoreSight PMU Architecture as
+described in document: ARM IHI 0091. Since this is a standard architecture, the
+PMUs are managed by a common driver "arm-cs-arch-pmu". This driver describes
+the available events and configuration of each PMU in sysfs. Please see the
+sections below to get the sysfs path of each PMU. Like other uncore PMU drivers,
+the driver provides "cpumask" sysfs attribute to show the CPU id used to handle
+the PMU event. There is also "associated_cpus" sysfs attribute, which contains a
+list of CPUs associated with the PMU instance.
+
+.. _SCF_PMU_Section:
+
+SCF PMU
+-------
+
+The SCF PMU monitors system level cache events, CPU traffic, and
+strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
+:ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about the PMU
+traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 in socket 0::
+
+ perf stat -a -e nvidia_scf_pmu_0/event=0x0/
+
+* Count event id 0x0 in socket 1::
+
+ perf stat -a -e nvidia_scf_pmu_1/event=0x0/
+
+NVLink-C2C0 PMU
+--------------------
+
+The NVLink-C2C0 PMU monitors incoming traffic from a GPU/CPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. The type of traffic captured by this PMU
+varies dependent on the chip configuration:
+
+* NVIDIA Grace Hopper Superchip: Hopper GPU is connected with Grace SoC.
+
+ In this config, the PMU captures GPU ATS translated or EGM traffic from the GPU.
+
+* NVIDIA Grace CPU Superchip: two Grace CPU SoCs are connected.
+
+ In this config, the PMU captures read and relaxed ordered (RO) writes from
+ PCIE device of the remote SoC.
+
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU/CPU connected with socket 0::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 1::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 2::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 3::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
+
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
+
+NVLink-C2C1 PMU
+-------------------
+
+The NVLink-C2C1 PMU monitors incoming traffic from a GPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. This PMU captures untranslated GPU
+traffic, in contrast with NvLink-C2C0 PMU that captures ATS translated traffic.
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU connected with socket 0::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 1::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 2::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 3::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
+
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
+
+CNVLink PMU
+---------------
+
+The CNVLink PMU monitors traffic from GPU and PCIE device on remote sockets
+to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
+(RO) write traffic. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.
+
+Each SoC socket can be connected to one or more sockets via CNVLink. The user can
+use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
+Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
+socket 1 to 3. The PMU will monitor all remote sockets by default if not
+specified.
+/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
+shows the valid bits that can be set in the "rem_socket" parameter.
+
+The PMU can not distinguish the remote traffic initiator, therefore it does not
+provide filter to select the traffic source to monitor. It reports combined
+traffic from remote GPU and PCIE devices.
+
+Example usage:
+
+* Count event id 0x0 for the traffic from remote socket 1, 2, and 3 to socket 0::
+
+ perf stat -a -e nvidia_cnvlink_pmu_0/event=0x0,rem_socket=0xE/
+
+* Count event id 0x0 for the traffic from remote socket 0, 2, and 3 to socket 1::
+
+ perf stat -a -e nvidia_cnvlink_pmu_1/event=0x0,rem_socket=0xD/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 3 to socket 2::
+
+ perf stat -a -e nvidia_cnvlink_pmu_2/event=0x0,rem_socket=0xB/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 2 to socket 3::
+
+ perf stat -a -e nvidia_cnvlink_pmu_3/event=0x0,rem_socket=0x7/
+
+
+PCIE PMU
+------------
+
+The PCIE PMU monitors all read/write traffic from PCIE root ports to
+local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.
+
+Each SoC socket can support multiple root ports. The user can use
+"root_port" bitmap parameter to select the port(s) to monitor, i.e.
+"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
+ports by default if not specified.
+/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
+shows the valid bits that can be set in the "root_port" parameter.
+
+Example usage:
+
+* Count event id 0x0 from root port 0 and 1 of socket 0::
+
+ perf stat -a -e nvidia_pcie_pmu_0/event=0x0,root_port=0x3/
+
+* Count event id 0x0 from root port 0 and 1 of socket 1::
+
+ perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/
+
+.. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:
+
+Traffic Coverage
+----------------
+
+The PMU traffic coverage may vary dependent on the chip configuration:
+
+* **NVIDIA Grace Hopper Superchip**: Hopper GPU is connected with Grace SoC.
+
+ Example configuration with two Grace SoCs::
+
+ ********************************* *********************************
+ * SOCKET-A * * SOCKET-B *
+ * * * *
+ * :::::::: * * :::::::: *
+ * : PCIE : * * : PCIE : *
+ * :::::::: * * :::::::: *
+ * | * * | *
+ * | * * | *
+ * ::::::: ::::::::: * * ::::::::: ::::::: *
+ * : : : : * * : : : : *
+ * : GPU :<--NVLink-->: Grace :<---CNVLink--->: Grace :<--NVLink-->: GPU : *
+ * : : C2C : SoC : * * : SoC : C2C : : *
+ * ::::::: ::::::::: * * ::::::::: ::::::: *
+ * | | * * | | *
+ * | | * * | | *
+ * &&&&&&&& &&&&&&&& * * &&&&&&&& &&&&&&&& *
+ * & GMEM & & CMEM & * * & CMEM & & GMEM & *
+ * &&&&&&&& &&&&&&&& * * &&&&&&&& &&&&&&&& *
+ * * * *
+ ********************************* *********************************
+
+ GMEM = GPU Memory (e.g. HBM)
+ CMEM = CPU Memory (e.g. LPDDR5X)
+
+ |
+ | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+ ::
+
+ +--------------+-------+-----------+-----------+-----+----------+----------+
+ | | Source |
+ + +-------+-----------+-----------+-----+----------+----------+
+ | Destination | |GPU ATS |GPU Not-ATS| | Socket-B | Socket-B |
+ | |PCI R/W|Translated,|Translated | CPU | CPU/PCIE1| GPU/PCIE2|
+ | | |EGM | | | | |
+ +==============+=======+===========+===========+=====+==========+==========+
+ | Local | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | SCF PMU | CNVLink |
+ | SYSRAM/CMEM | PMU |PMU |PMU | PMU | | PMU |
+ +--------------+-------+-----------+-----------+-----+----------+----------+
+ | Local GMEM | PCIE | N/A |NVLink-C2C1| SCF | SCF PMU | CNVLink |
+ | | PMU | |PMU | PMU | | PMU |
+ +--------------+-------+-----------+-----------+-----+----------+----------+
+ | Remote | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | | |
+ | SYSRAM/CMEM | PMU |PMU |PMU | PMU | N/A | N/A |
+ | over CNVLink | | | | | | |
+ +--------------+-------+-----------+-----------+-----+----------+----------+
+ | Remote GMEM | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | | |
+ | over CNVLink | PMU |PMU |PMU | PMU | N/A | N/A |
+ +--------------+-------+-----------+-----------+-----+----------+----------+
+
+ PCIE1 traffic represents strongly ordered (SO) writes.
+ PCIE2 traffic represents reads and relaxed ordered (RO) writes.
+
+* **NVIDIA Grace CPU Superchip**: two Grace CPU SoCs are connected.
+
+ Example configuration with two Grace SoCs::
+
+ ******************* *******************
+ * SOCKET-A * * SOCKET-B *
+ * * * *
+ * :::::::: * * :::::::: *
+ * : PCIE : * * : PCIE : *
+ * :::::::: * * :::::::: *
+ * | * * | *
+ * | * * | *
+ * ::::::::: * * ::::::::: *
+ * : : * * : : *
+ * : Grace :<--------NVLink------->: Grace : *
+ * : SoC : * C2C * : SoC : *
+ * ::::::::: * * ::::::::: *
+ * | * * | *
+ * | * * | *
+ * &&&&&&&& * * &&&&&&&& *
+ * & CMEM & * * & CMEM & *
+ * &&&&&&&& * * &&&&&&&& *
+ * * * *
+ ******************* *******************
+
+ GMEM = GPU Memory (e.g. HBM)
+ CMEM = CPU Memory (e.g. LPDDR5X)
+
+ |
+ | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+ ::
+
+ +-----------------+-----------+---------+----------+-------------+
+ | | Source |
+ + +-----------+---------+----------+-------------+
+ | Destination | | | Socket-B | Socket-B |
+ | | PCI R/W | CPU | CPU/PCIE1| PCIE2 |
+ | | | | | |
+ +=================+===========+=========+==========+=============+
+ | Local | PCIE PMU | SCF PMU | SCF PMU | NVLink-C2C0 |
+ | SYSRAM/CMEM | | | | PMU |
+ +-----------------+-----------+---------+----------+-------------+
+ | Remote | | | | |
+ | SYSRAM/CMEM | PCIE PMU | SCF PMU | N/A | N/A |
+ | over NVLink-C2C | | | | |
+ +-----------------+-----------+---------+----------+-------------+
+
+ PCIE1 traffic represents strongly ordered (SO) writes.
+ PCIE2 traffic represents reads and relaxed ordered (RO) writes.
diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
new file mode 100644
index 000000000000..c37c6be9b8d8
--- /dev/null
+++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
@@ -0,0 +1,39 @@
+=====================================================================
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=====================================================================
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/bus/event_source/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf::
+
+ perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+ perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
new file mode 100644
index 000000000000..a66556b7e985
--- /dev/null
+++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
@@ -0,0 +1,26 @@
+===========================================================================
+Qualcomm Datacenter Technologies L3 Cache Performance Monitoring Unit (PMU)
+===========================================================================
+
+This driver supports the L3 cache PMUs found in Qualcomm Datacenter Technologies
+Centriq SoCs. The L3 cache on these SOCs is composed of multiple slices, shared
+by all cores within a socket. Each slice is exposed as a separate uncore perf
+PMU with device name l3cache_<socket>_<instance>. User space is responsible
+for aggregating across slices.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/bus/event_source/devices/l3cache*. Given that these are uncore PMUs
+the driver also exposes a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per socket which will be used to handle all the PMU
+events on that socket.
+
+The hardware implements 32bit event counters and has a flat 8bit event space
+exposed via the "event" format attribute. In addition to the 32bit physical
+counters the driver supports virtual 64bit hardware counters by using hardware
+counter chaining. This feature is exposed via the "lc" (long counter) format
+flag. E.g.::
+
+ perf stat -e l3cache_0_0/read-miss,lc/
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/starfive_starlink_pmu.rst b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst
new file mode 100644
index 000000000000..2932ddb4eb76
--- /dev/null
+++ b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst
@@ -0,0 +1,46 @@
+================================================
+StarFive StarLink Performance Monitor Unit (PMU)
+================================================
+
+StarFive StarLink Performance Monitor Unit (PMU) exists within the
+StarLink Coherent Network on Chip (CNoC) that connects multiple CPU
+clusters with an L3 memory system.
+
+The uncore PMU supports overflow interrupt, up to 16 programmable 64bit
+event counters, and an independent 64bit cycle counter.
+The PMU can only be accessed via Memory Mapped I/O and are common to the
+cores connected to the same PMU.
+
+Driver exposes supported PMU events in sysfs "events" directory under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/events/
+
+Driver exposes cpu used to handle PMU events in sysfs "cpumask" directory
+under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/cpumask/
+
+Driver describes the format of config (event ID) in sysfs "format" directory
+under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/format/
+
+Example of perf usage::
+
+ $ perf list
+
+ starfive_starlink_pmu/cycles/ [Kernel PMU event]
+ starfive_starlink_pmu/read_hit/ [Kernel PMU event]
+ starfive_starlink_pmu/read_miss/ [Kernel PMU event]
+ starfive_starlink_pmu/read_request/ [Kernel PMU event]
+ starfive_starlink_pmu/release_request/ [Kernel PMU event]
+ starfive_starlink_pmu/write_hit/ [Kernel PMU event]
+ starfive_starlink_pmu/write_miss/ [Kernel PMU event]
+ starfive_starlink_pmu/write_request/ [Kernel PMU event]
+ starfive_starlink_pmu/writeback/ [Kernel PMU event]
+
+
+ $ perf stat -a -e /starfive_starlink_pmu/cycles/ sleep 1
+
+Sampling is not supported. As a result, "perf record" is not supported.
+Attaching to a task is not supported, only system-wide counting is supported.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
new file mode 100644
index 000000000000..9255f7bf9452
--- /dev/null
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -0,0 +1,44 @@
+=============================================================
+Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
+=============================================================
+
+The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
+PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and
+Cavium Coherent Processor Interconnect (CCPI2).
+
+The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
+Events are counted for the default channel (i.e. channel 0) and prorated
+to the total number of channels/tiles.
+
+The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8
+counters. Counters are independently programmable to different events and
+can be started and stopped individually. None of the counters support an
+overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds.
+The CCPI2 counters are 64-bit and assumed not to overflow in normal operation.
+
+PMU UNCORE (perf) driver:
+
+The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
+L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
+(CCPI2) events simultaneously. The PMUs provide a description of their
+available events and configuration options under sysfs, see
+/sys/bus/event_source/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
+
+The driver does not support sampling, therefore "perf record" will not
+work. Per-task perf sessions are also not supported.
+
+Examples::
+
+ # perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
+
+ # perf stat -a -e \
+ uncore_dmc_0/cnt_cycles/,\
+ uncore_dmc_0/data_transfers/,\
+ uncore_dmc_0/read_txns/,\
+ uncore_dmc_0/write_txns/ sleep 1
+
+ # perf stat -a -e \
+ uncore_l3c_0/read_request/,\
+ uncore_l3c_0/read_hit/,\
+ uncore_l3c_0/inv_request/,\
+ uncore_l3c_0/inv_hit/ sleep 1
diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst
new file mode 100644
index 000000000000..98ccb8e777c4
--- /dev/null
+++ b/Documentation/admin-guide/perf/xgene-pmu.rst
@@ -0,0 +1,49 @@
+================================================
+APM X-Gene SoC Performance Monitoring Unit (PMU)
+================================================
+
+X-Gene SoC PMU consists of various independent system device PMUs such as
+L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
+controller(s). These PMU devices are loosely architected to follow the
+same model as the PMU for ARM cores. The PMUs share the same top level
+interrupt and status CSR region.
+
+PMU (perf) driver
+-----------------
+
+The xgene-pmu driver registers several perf PMU drivers. Each of the perf
+driver provides description of its available events and configuration options
+in sysfs, see /sys/bus/event_source/devices/<l3cX/iobX/mcbX/mcX>/.
+
+The "format" directory describes format of the config (event ID),
+config1 (agent ID) fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all supported event types that
+can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
+equivalent of "l3c0/config=0x0b/".
+
+Most of the SoC PMU has a specific list of agent ID used for monitoring
+performance of a specific datapath. For example, agents of a L3 cache can be
+a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable of
+masking the agents from which the request come from. If the bit with
+the bit number corresponding to the agent is set, the event is counted only if
+it is caused by a request from that agent. Each agent ID bit is inversely mapped
+to a corresponding bit in "config1" field. By default, the event will be
+counted for all agent requests (config1 = 0x0). For all the supported agents of
+each PMU, please refer to APM X-Gene User Manual.
+
+Each perf driver also provides a "cpumask" sysfs attribute, which contains a
+single CPU ID of the processor which will be used to handle all the PMU events.
+
+Example for perf tool use::
+
+ / # perf list | grep -e l3c -e iob -e mcb -e mc
+ l3c0/ackq-full/ [Kernel PMU event]
+ <...>
+ mcb1/mcb-csw-stall/ [Kernel PMU event]
+
+ / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
+
+ / # perf stat -a -e l3c0/read-miss,config1=0xfffffffffffffffe/ sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
new file mode 100644
index 000000000000..e1771f2225d5
--- /dev/null
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -0,0 +1,802 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===============================================
+``amd-pstate`` CPU Performance Scaling Driver
+===============================================
+
+:Copyright: |copy| 2021 Advanced Micro Devices, Inc.
+
+:Author: Huang Rui <ray.huang@amd.com>
+
+
+Introduction
+===================
+
+``amd-pstate`` is the AMD CPU performance scaling driver that introduces a
+new CPU frequency control mechanism on modern AMD APU and CPU series in
+Linux kernel. The new mechanism is based on Collaborative Processor
+Performance Control (CPPC) which provides finer grain frequency management
+than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
+the ACPI P-states driver to manage CPU frequency and clocks with switching
+only in 3 P-states. CPPC replaces the ACPI P-states controls and allows a
+flexible, low-latency interface for the Linux kernel to directly
+communicate the performance hints to hardware.
+
+``amd-pstate`` leverages the Linux kernel governors such as ``schedutil``,
+``ondemand``, etc. to manage the performance hints which are provided by
+CPPC hardware functionality that internally follows the hardware
+specification (for details refer to AMD64 Architecture Programmer's Manual
+Volume 2: System Programming [1]_). Currently, ``amd-pstate`` supports basic
+frequency control function according to kernel governors on some of the
+Zen2 and Zen3 processors, and we will implement more AMD specific functions
+in future after we verify them on the hardware and SBIOS.
+
+
+AMD CPPC Overview
+=======================
+
+Collaborative Processor Performance Control (CPPC) interface enumerates a
+continuous, abstract, and unit-less performance value in a scale that is
+not tied to a specific performance state / frequency. This is an ACPI
+standard [2]_ which software can specify application performance goals and
+hints as a relative target to the infrastructure limits. AMD processors
+provide the low latency register model (MSR) instead of an AML code
+interpreter for performance adjustments. ``amd-pstate`` will initialize a
+``struct cpufreq_driver`` instance, ``amd_pstate_driver``, with the callbacks
+to manage each performance update behavior. ::
+
+ Highest Perf ------>+-----------------------+ +-----------------------+
+ | | | |
+ | | | |
+ | | Max Perf ---->| |
+ | | | |
+ | | | |
+ Nominal Perf ------>+-----------------------+ +-----------------------+
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | Desired Perf ---->| |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ | | | |
+ Lowest non- | | | |
+ linear perf ------>+-----------------------+ +-----------------------+
+ | | | |
+ | | Min perf ---->| |
+ | | | |
+ Lowest perf ------>+-----------------------+ +-----------------------+
+ | | | |
+ | | | |
+ | | | |
+ 0 ------>+-----------------------+ +-----------------------+
+
+ AMD P-States Performance Scale
+
+
+.. _perf_cap:
+
+AMD CPPC Performance Capability
+--------------------------------
+
+Highest Performance (RO)
+.........................
+
+This is the absolute maximum performance an individual processor may reach,
+assuming ideal conditions. This performance level may not be sustainable
+for long durations and may only be achievable if other platform components
+are in a specific state; for example, it may require other processors to be in
+an idle state. This would be equivalent to the highest frequencies
+supported by the processor.
+
+Nominal (Guaranteed) Performance (RO)
+......................................
+
+This is the maximum sustained performance level of the processor, assuming
+ideal operating conditions. In the absence of an external constraint (power,
+thermal, etc.), this is the performance level the processor is expected to
+be able to maintain continuously. All cores/processors are expected to be
+able to sustain their nominal performance state simultaneously.
+
+Lowest non-linear Performance (RO)
+...................................
+
+This is the lowest performance level at which nonlinear power savings are
+achieved, for example, due to the combined effects of voltage and frequency
+scaling. Above this threshold, lower performance levels should be generally
+more energy efficient than higher performance levels. This register
+effectively conveys the most efficient performance level to ``amd-pstate``.
+
+Lowest Performance (RO)
+........................
+
+This is the absolute lowest performance level of the processor. Selecting a
+performance level lower than the lowest nonlinear performance level may
+cause an efficiency penalty but should reduce the instantaneous power
+consumption of the processor.
+
+AMD CPPC Performance Control
+------------------------------
+
+``amd-pstate`` passes performance goals through these registers. The
+register drives the behavior of the desired performance target.
+
+Minimum requested performance (RW)
+...................................
+
+``amd-pstate`` specifies the minimum allowed performance level.
+
+Maximum requested performance (RW)
+...................................
+
+``amd-pstate`` specifies a limit the maximum performance that is expected
+to be supplied by the hardware.
+
+Desired performance target (RW)
+...................................
+
+``amd-pstate`` specifies a desired target in the CPPC performance scale as
+a relative number. This can be expressed as percentage of nominal
+performance (infrastructure max). Below the nominal sustained performance
+level, desired performance expresses the average performance level of the
+processor subject to hardware. Above the nominal performance level,
+the processor must provide at least nominal performance requested and go higher
+if current operating conditions allow.
+
+Energy Performance Preference (EPP) (RW)
+.........................................
+
+This attribute provides a hint to the hardware if software wants to bias
+toward performance (0x0) or energy efficiency (0xff).
+
+
+Key Governors Support
+=======================
+
+``amd-pstate`` can be used with all the (generic) scaling governors listed
+by the ``scaling_available_governors`` policy attribute in ``sysfs``. Then,
+it is responsible for the configuration of policy objects corresponding to
+CPUs and provides the ``CPUFreq`` core (and the scaling governors attached
+to the policy objects) with accurate information on the maximum and minimum
+operating frequencies supported by the hardware. Users can check the
+``scaling_cur_freq`` information comes from the ``CPUFreq`` core.
+
+``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
+frequency control. It is to fine tune the processor configuration on
+``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
+registers the adjust_perf callback to implement performance update behavior
+similar to CPPC. It is initialized by ``sugov_start`` and then populates the
+CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as the
+utilization update callback function in the CPU scheduler. The CPU scheduler
+will call ``cpufreq_update_util`` and assigns the target performance according
+to the ``struct sugov_cpu`` that the utilization update belongs to.
+Then, ``amd-pstate`` updates the desired performance according to the CPU
+scheduler assigned.
+
+.. _processor_support:
+
+Processor Support
+=======================
+
+The ``amd-pstate`` initialization will fail if the ``_CPC`` entry in the ACPI
+SBIOS does not exist in the detected processor. It uses ``acpi_cpc_valid``
+to check the existence of ``_CPC``. All Zen based processors support the legacy
+ACPI hardware P-States function, so when ``amd-pstate`` fails initialization,
+the kernel will fall back to initialize the ``acpi-cpufreq`` driver.
+
+There are two types of hardware implementations for ``amd-pstate``: one is
+`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
+<perf_cap_>`_. It can use the :c:macro:`X86_FEATURE_CPPC` feature flag to
+indicate the different types. (For details, refer to the Processor Programming
+Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors [3]_.)
+``amd-pstate`` is to register different ``static_call`` instances for different
+hardware implementations.
+
+Currently, some of the Zen2 and Zen3 processors support ``amd-pstate``. In the
+future, it will be supported on more and more AMD processors.
+
+Full MSR Support
+-----------------
+
+Some new Zen3 processors such as Cezanne provide the MSR registers directly
+while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
+``amd-pstate`` can handle the MSR register to implement the fast switch
+function in ``CPUFreq`` that can reduce the latency of frequency control in
+interrupt context. The functions with a ``pstate_xxx`` prefix represent the
+operations on MSR registers.
+
+Shared Memory Support
+----------------------
+
+If the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, the
+processor supports the shared memory solution. In this case, ``amd-pstate``
+uses the ``cppc_acpi`` helper methods to implement the callback functions
+that are defined on ``static_call``. The functions with the ``cppc_xxx`` prefix
+represent the operations of ACPI CPPC helpers for the shared memory solution.
+
+
+AMD P-States and ACPI hardware P-States always can be supported in one
+processor. But AMD P-States has the higher priority and if it is enabled
+with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond
+to the request from AMD P-States.
+
+
+User Space Interface in ``sysfs`` - Per-policy control
+======================================================
+
+``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level. They are located in the
+``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
+
+ root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_highest_perf
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_freq
+ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_max_freq
+
+
+``amd_pstate_highest_perf / amd_pstate_max_freq``
+
+Maximum CPPC performance and CPU frequency that the driver is allowed to
+set, in percent of the maximum supported CPPC performance level (the highest
+performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
+In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
+table, so we need to expose it to sysfs. If boost is not active, but
+still supported, this maximum frequency will be larger than the one in
+``cpuinfo``.
+This attribute is read-only.
+
+``amd_pstate_lowest_nonlinear_freq``
+
+The lowest non-linear CPPC CPU frequency that the driver is allowed to set,
+in percent of the maximum supported CPPC performance level. (Please see the
+lowest non-linear performance in `AMD CPPC Performance Capability
+<perf_cap_>`_.)
+This attribute is read-only.
+
+``amd_pstate_hw_prefcore``
+
+Whether the platform supports the preferred core feature and it has been
+enabled. This attribute is read-only.
+
+``amd_pstate_prefcore_ranking``
+
+The performance ranking of the core. This number doesn't have any unit, but
+larger numbers are preferred at the time of reading. This can change at
+runtime based on platform conditions. This attribute is read-only.
+
+``energy_performance_available_preferences``
+
+A list of all the supported EPP preferences that could be used for
+``energy_performance_preference`` on this system.
+These profiles represent different hints that are provided
+to the low-level firmware about the user's desired energy vs efficiency
+tradeoff. ``default`` represents the epp value is set by platform
+firmware. This attribute is read-only.
+
+``energy_performance_preference``
+
+The current energy performance preference can be read from this attribute.
+and user can change current preference according to energy or performance needs
+Please get all support profiles list from
+``energy_performance_available_preferences`` attribute, all the profiles are
+integer values defined between 0 to 255 when EPP feature is enabled by platform
+firmware, if EPP feature is disabled, driver will ignore the written value
+This attribute is read-write.
+
+``boost``
+The `boost` sysfs attribute provides control over the CPU core
+performance boost, allowing users to manage the maximum frequency limitation
+of the CPU. This attribute can be used to enable or disable the boost feature
+on individual CPUs.
+
+When the boost feature is enabled, the CPU can dynamically increase its frequency
+beyond the base frequency, providing enhanced performance for demanding workloads.
+On the other hand, disabling the boost feature restricts the CPU to operate at the
+base frequency, which may be desirable in certain scenarios to prioritize power
+efficiency or manage temperature.
+
+To manipulate the `boost` attribute, users can write a value of `0` to disable the
+boost or `1` to enable it, for the respective CPU using the sysfs path
+`/sys/devices/system/cpu/cpuX/cpufreq/boost`, where `X` represents the CPU number.
+
+Other performance and frequency values can be read back from
+``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
+
+
+``amd-pstate`` vs ``acpi-cpufreq``
+======================================
+
+On the majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
+provided by the platform firmware are used for CPU performance scaling, but
+only provide 3 P-states on AMD processors.
+However, on modern AMD APU and CPU series, hardware provides the Collaborative
+Processor Performance Control according to the ACPI protocol and customizes this
+for AMD platforms. That is, fine-grained and continuous frequency ranges
+instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
+module which supports the new AMD P-States mechanism on most of the future AMD
+platforms. The AMD P-States mechanism is the more performance and energy
+efficiency frequency management method on AMD processors.
+
+
+``amd-pstate`` Driver Operation Modes
+======================================
+
+``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
+non-autonomous (passive) mode and guided autonomous (guided) mode.
+Active/passive/guided mode can be chosen by different kernel parameters.
+
+- In autonomous mode, platform ignores the desired performance level request
+ and takes into account only the values set to the minimum, maximum and energy
+ performance preference registers.
+- In non-autonomous mode, platform gets desired performance level
+ from OS directly through Desired Performance Register.
+- In guided-autonomous mode, platform sets operating performance level
+ autonomously according to the current workload and within the limits set by
+ OS through min and max performance registers.
+
+Active Mode
+------------
+
+``amd_pstate=active``
+
+This is the low-level firmware control mode which is implemented by ``amd_pstate_epp``
+driver with ``amd_pstate=active`` passed to the kernel in the command line.
+In this mode, ``amd_pstate_epp`` driver provides a hint to the hardware if software
+wants to bias toward performance (0x0) or energy efficiency (0xff) to the CPPC firmware.
+then CPPC power algorithm will calculate the runtime workload and adjust the realtime
+cores frequency according to the power supply and thermal, core voltage and some other
+hardware conditions.
+
+Passive Mode
+------------
+
+``amd_pstate=passive``
+
+It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line.
+In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC
+performance scale as a relative number. This can be expressed as percentage of nominal
+performance (infrastructure max). Below the nominal sustained performance level,
+desired performance expresses the average performance level of the processor subject
+to the Performance Reduction Tolerance register. Above the nominal performance level,
+processor must provide at least nominal performance requested and go higher if current
+operating conditions allow.
+
+Guided Mode
+-----------
+
+``amd_pstate=guided``
+
+If ``amd_pstate=guided`` is passed to kernel command line option then this mode
+is activated. In this mode, driver requests minimum and maximum performance
+level and the platform autonomously selects a performance level in this range
+and appropriate to the current workload.
+
+``amd-pstate`` Preferred Core
+=================================
+
+The core frequency is subjected to the process variation in semiconductors.
+Not all cores are able to reach the maximum frequency respecting the
+infrastructure limits. Consequently, AMD has redefined the concept of
+maximum frequency of a part. This means that a fraction of cores can reach
+maximum frequency. To find the best process scheduling policy for a given
+scenario, OS needs to know the core ordering informed by the platform through
+highest performance capability register of the CPPC interface.
+
+``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
+cores that can achieve a higher frequency with lower voltage. The preferred
+core rankings can dynamically change based on the workload, platform conditions,
+thermals and ageing.
+
+The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
+driver will also determine whether or not ``amd-pstate`` preferred core is
+supported by the platform.
+
+``amd-pstate`` driver will provide an initial core ordering when the system boots.
+The platform uses the CPPC interfaces to communicate the core ranking to the
+operating system and scheduler to make sure that OS is choosing the cores
+with highest performance firstly for scheduling the process. When ``amd-pstate``
+driver receives a message with the highest performance change, it will
+update the core ranking and set the cpu's priority.
+
+``amd-pstate`` Preferred Core Switch
+=====================================
+Kernel Parameters
+-----------------
+
+``amd-pstate`` peferred core`` has two states: enable and disable.
+Enable/disable states can be chosen by different kernel parameters.
+Default enable ``amd-pstate`` preferred core.
+
+``amd_prefcore=disable``
+
+For systems that support ``amd-pstate`` preferred core, the core rankings will
+always be advertised by the platform. But OS can choose to ignore that via the
+kernel parameter ``amd_prefcore=disable``.
+
+User Space Interface in ``sysfs`` - General
+===========================================
+
+Global Attributes
+-----------------
+
+``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level. They are located in the
+``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs.
+
+``status``
+ Operation mode of the driver: "active", "passive", "guided" or "disable".
+
+ "active"
+ The driver is functional and in the ``active mode``
+
+ "passive"
+ The driver is functional and in the ``passive mode``
+
+ "guided"
+ The driver is functional and in the ``guided mode``
+
+ "disable"
+ The driver is unregistered and not functional now.
+
+ This attribute can be written to in order to change the driver's
+ operation mode or to unregister it. The string written to it must be
+ one of the possible values of it and, if successful, writing one of
+ these values to the sysfs file will cause the driver to switch over
+ to the operation mode represented by that string - or to be
+ unregistered in the "disable" case.
+
+``prefcore``
+ Preferred core state of the driver: "enabled" or "disabled".
+
+ "enabled"
+ Enable the ``amd-pstate`` preferred core.
+
+ "disabled"
+ Disable the ``amd-pstate`` preferred core
+
+
+ This attribute is read-only to check the state of preferred core set
+ by the kernel parameter.
+
+``cpupower`` tool support for ``amd-pstate``
+===============================================
+
+``amd-pstate`` is supported by the ``cpupower`` tool, which can be used to dump
+frequency information. Development is in progress to support more and more
+operations for the new ``amd-pstate`` module with this tool. ::
+
+ root@hr-test1:/home/ray# cpupower frequency-info
+ analyzing CPU 0:
+ driver: amd-pstate
+ CPUs which run at the same hardware frequency: 0
+ CPUs which need to have their frequency coordinated by software: 0
+ maximum transition latency: 131 us
+ hardware limits: 400 MHz - 4.68 GHz
+ available cpufreq governors: ondemand conservative powersave userspace performance schedutil
+ current policy: frequency should be within 400 MHz and 4.68 GHz.
+ The governor "schedutil" may decide which speed to use
+ within this range.
+ current CPU frequency: Unable to call hardware
+ current CPU frequency: 4.02 GHz (asserted by call to kernel)
+ boost state support:
+ Supported: yes
+ Active: yes
+ AMD PSTATE Highest Performance: 166. Maximum Frequency: 4.68 GHz.
+ AMD PSTATE Nominal Performance: 117. Nominal Frequency: 3.30 GHz.
+ AMD PSTATE Lowest Non-linear Performance: 39. Lowest Non-linear Frequency: 1.10 GHz.
+ AMD PSTATE Lowest Performance: 15. Lowest Frequency: 400 MHz.
+
+
+Diagnostics and Tuning
+=======================
+
+Trace Events
+--------------
+
+There are two static trace events that can be used for ``amd-pstate``
+diagnostics. One of them is the ``cpu_frequency`` trace event generally used
+by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
+specific to ``amd-pstate``. The following sequence of shell commands can
+be used to enable them and see their output (if the kernel is
+configured to support event tracing). ::
+
+ root@hr-test1:/home/ray# cd /sys/kernel/tracing/
+ root@hr-test1:/sys/kernel/tracing# echo 1 > events/amd_cpu/enable
+ root@hr-test1:/sys/kernel/tracing# cat trace
+ # tracer: nop
+ #
+ # entries-in-buffer/entries-written: 47827/42233061 #P:2
+ #
+ # _-----=> irqs-off
+ # / _----=> need-resched
+ # | / _---=> hardirq/softirq
+ # || / _--=> preempt-depth
+ # ||| / delay
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
+ # | | | |||| | |
+ <idle>-0 [015] dN... 4995.979886: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=15 changed=false fast_switch=true
+ <idle>-0 [007] d.h.. 4995.979893: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true
+ cat-2161 [000] d.... 4995.980841: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=0 changed=false fast_switch=true
+ sshd-2125 [004] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=4 changed=false fast_switch=true
+ <idle>-0 [007] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true
+ <idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
+ <idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
+
+The ``cpu_frequency`` trace event will be triggered either by the ``schedutil`` scaling
+governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
+policies with other scaling governors).
+
+
+Tracer Tool
+-------------
+
+``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
+generate performance plots. This utility can be used to debug and tune the
+performance of ``amd-pstate`` driver. The tracer tool needs to import intel
+pstate tracer.
+
+Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
+used in two ways. If trace file is available, then directly parse the file
+with command ::
+
+ ./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
+
+Or generate trace file with root privilege, then parse and plot with command ::
+
+ sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
+
+The test result can be found in ``results/test_name``. Following is the example
+about part of the output. ::
+
+ common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
+ CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
+ CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
+
+Unit Tests for amd-pstate
+-------------------------
+
+``amd-pstate-ut`` is a test module for testing the ``amd-pstate`` driver.
+
+ * It can help all users to verify their processor support (SBIOS/Firmware or Hardware).
+
+ * Kernel can have a basic function test to avoid the kernel regression during the update.
+
+ * We can introduce more functional or performance tests to align the result together, it will benefit power and performance scale optimization.
+
+1. Test case descriptions
+
+ 1). Basic tests
+
+ Test prerequisite and basic functions for the ``amd-pstate`` driver.
+
+ +---------+--------------------------------+------------------------------------------------------------------------------------+
+ | Index | Functions | Description |
+ +=========+================================+====================================================================================+
+ | 1 | amd_pstate_ut_acpi_cpc_valid || Check whether the _CPC object is present in SBIOS. |
+ | | || |
+ | | || The detail refer to `Processor Support <processor_support_>`_. |
+ +---------+--------------------------------+------------------------------------------------------------------------------------+
+ | 2 | amd_pstate_ut_check_enabled || Check whether AMD P-State is enabled. |
+ | | || |
+ | | || AMD P-States and ACPI hardware P-States always can be supported in one processor. |
+ | | | But AMD P-States has the higher priority and if it is enabled with |
+ | | | :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond to the |
+ | | | request from AMD P-States. |
+ +---------+--------------------------------+------------------------------------------------------------------------------------+
+ | 3 | amd_pstate_ut_check_perf || Check if the each performance values are reasonable. |
+ | | || highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0. |
+ +---------+--------------------------------+------------------------------------------------------------------------------------+
+ | 4 | amd_pstate_ut_check_freq || Check if the each frequency values and max freq when set support boost mode |
+ | | | are reasonable. |
+ | | || max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0 |
+ | | || If boost is not active but supported, this maximum frequency will be larger than |
+ | | | the one in ``cpuinfo``. |
+ +---------+--------------------------------+------------------------------------------------------------------------------------+
+
+ 2). Tbench test
+
+ Test and monitor the cpu changes when running tbench benchmark under the specified governor.
+ These changes include desire performance, frequency, load, performance, energy etc.
+ The specified governor is ondemand or schedutil.
+ Tbench can also be tested on the ``acpi-cpufreq`` kernel driver for comparison.
+
+ 3). Gitsource test
+
+ Test and monitor the cpu changes when running gitsource benchmark under the specified governor.
+ These changes include desire performance, frequency, load, time, energy etc.
+ The specified governor is ondemand or schedutil.
+ Gitsource can also be tested on the ``acpi-cpufreq`` kernel driver for comparison.
+
+#. How to execute the tests
+
+ We use test module in the kselftest frameworks to implement it.
+ We create ``amd-pstate-ut`` module and tie it into kselftest.(for
+ details refer to Linux Kernel Selftests [4]_).
+
+ 1). Build
+
+ + open the :c:macro:`CONFIG_X86_AMD_PSTATE` configuration option.
+ + set the :c:macro:`CONFIG_X86_AMD_PSTATE_UT` configuration option to M.
+ + make project
+ + make selftest ::
+
+ $ cd linux
+ $ make -C tools/testing/selftests
+
+ + make perf ::
+
+ $ cd tools/perf/
+ $ make
+
+
+ 2). Installation & Steps ::
+
+ $ make -C tools/testing/selftests install INSTALL_PATH=~/kselftest
+ $ cp tools/perf/perf /usr/bin/perf
+ $ sudo ./kselftest/run_kselftest.sh -c amd-pstate
+
+ 3). Specified test case ::
+
+ $ cd ~/kselftest/amd-pstate
+ $ sudo ./run.sh -t basic
+ $ sudo ./run.sh -t tbench
+ $ sudo ./run.sh -t tbench -m acpi-cpufreq
+ $ sudo ./run.sh -t gitsource
+ $ sudo ./run.sh -t gitsource -m acpi-cpufreq
+ $ ./run.sh --help
+ ./run.sh: illegal option -- -
+ Usage: ./run.sh [OPTION...]
+ [-h <help>]
+ [-o <output-file-for-dump>]
+ [-c <all: All testing,
+ basic: Basic testing,
+ tbench: Tbench testing,
+ gitsource: Gitsource testing.>]
+ [-t <tbench time limit>]
+ [-p <tbench process number>]
+ [-l <loop times for tbench>]
+ [-i <amd tracer interval>]
+ [-m <comparative test: acpi-cpufreq>]
+
+
+ 4). Results
+
+ + basic
+
+ When you finish test, you will get the following log info ::
+
+ $ dmesg | grep "amd_pstate_ut" | tee log.txt
+ [12977.570663] amd_pstate_ut: 1 amd_pstate_ut_acpi_cpc_valid success!
+ [12977.570673] amd_pstate_ut: 2 amd_pstate_ut_check_enabled success!
+ [12977.571207] amd_pstate_ut: 3 amd_pstate_ut_check_perf success!
+ [12977.571212] amd_pstate_ut: 4 amd_pstate_ut_check_freq success!
+
+ + tbench
+
+ When you finish test, you will get selftest.tbench.csv and png images.
+ The selftest.tbench.csv file contains the raw data and the drop of the comparative test.
+ The png images shows the performance, energy and performan per watt of each test.
+ Open selftest.tbench.csv :
+
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + Governor | Round | Des-perf | Freq | Load | Performance | Energy | Performance Per Watt |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + Unit | | | GHz | | MB/s | J | MB/J |
+ +=================================================+==============+==========+=========+==========+=============+=========+======================+
+ + amd-pstate-ondemand | 1 | | | | 2504.05 | 1563.67 | 158.5378 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | 2 | | | | 2243.64 | 1430.32 | 155.2941 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | 3 | | | | 2183.88 | 1401.32 | 154.2860 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | Average | | | | 2310.52 | 1465.1 | 156.1268 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 1 | 165.329 | 1.62257 | 99.798 | 2136.54 | 1395.26 | 151.5971 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 2 | 166 | 1.49761 | 99.9993 | 2100.56 | 1380.5 | 150.6377 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 3 | 166 | 1.47806 | 99.9993 | 2084.12 | 1375.76 | 149.9737 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | Average | 165.776 | 1.53275 | 99.9322 | 2107.07 | 1383.84 | 150.7399 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 1 | | | | 2529.9 | 1564.4 | 160.0997 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 2 | | | | 2249.76 | 1432.97 | 155.4297 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 3 | | | | 2181.46 | 1406.88 | 153.5060 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | Average | | | | 2320.37 | 1468.08 | 156.4741 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 1 | | | | 2137.64 | 1385.24 | 152.7723 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 2 | | | | 2107.05 | 1372.23 | 152.0138 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 3 | | | | 2085.86 | 1365.35 | 151.2433 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | Average | | | | 2110.18 | 1374.27 | 152.0136 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil | Comprison(%) | | | | -9.0584 | -6.3899 | -2.8506 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand VS amd-pstate-schedutil | Comprison(%) | | | | 8.8053 | -5.5463 | -3.4503 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand VS amd-pstate-ondemand | Comprison(%) | | | | -0.4245 | -0.2029 | -0.2219 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil VS amd-pstate-schedutil | Comprison(%) | | | | -0.1473 | 0.6963 | -0.8378 |
+ +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+
+ + gitsource
+
+ When you finish test, you will get selftest.gitsource.csv and png images.
+ The selftest.gitsource.csv file contains the raw data and the drop of the comparative test.
+ The png images shows the performance, energy and performan per watt of each test.
+ Open selftest.gitsource.csv :
+
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + Governor | Round | Des-perf | Freq | Load | Time | Energy | Performance Per Watt |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + Unit | | | GHz | | s | J | 1/J |
+ +=================================================+==============+==========+==========+==========+=============+=========+======================+
+ + amd-pstate-ondemand | 1 | 50.119 | 2.10509 | 23.3076 | 475.69 | 865.78 | 0.001155027 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | 2 | 94.8006 | 1.98771 | 56.6533 | 467.1 | 839.67 | 0.001190944 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | 3 | 76.6091 | 2.53251 | 43.7791 | 467.69 | 855.85 | 0.001168429 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand | Average | 73.8429 | 2.20844 | 41.2467 | 470.16 | 853.767 | 0.001171279 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 1 | 165.919 | 1.62319 | 98.3868 | 464.17 | 866.8 | 0.001153668 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 2 | 165.97 | 1.31309 | 99.5712 | 480.15 | 880.4 | 0.001135847 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | 3 | 165.973 | 1.28448 | 99.9252 | 481.79 | 867.02 | 0.001153375 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-schedutil | Average | 165.954 | 1.40692 | 99.2944 | 475.37 | 871.407 | 0.001147569 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 1 | | | | 2379.62 | 742.96 | 0.001345967 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 2 | | | | 441.74 | 817.49 | 0.001223256 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | 3 | | | | 455.48 | 820.01 | 0.001219497 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand | Average | | | | 425.613 | 793.487 | 0.001260260 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 1 | | | | 459.69 | 838.54 | 0.001192548 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 2 | | | | 466.55 | 830.89 | 0.001203528 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | 3 | | | | 470.38 | 837.32 | 0.001194286 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil | Average | | | | 465.54 | 835.583 | 0.001196769 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil | Comprison(%) | | | | 9.3810 | 5.3051 | -5.0379 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + amd-pstate-ondemand VS amd-pstate-schedutil | Comprison(%) | 124.7392 | -36.2934 | 140.7329 | 1.1081 | 2.0661 | -2.0242 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-ondemand VS amd-pstate-ondemand | Comprison(%) | | | | 10.4665 | 7.5968 | -7.0605 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+ + acpi-cpufreq-schedutil VS amd-pstate-schedutil | Comprison(%) | | | | 2.1115 | 4.2873 | -4.1110 |
+ +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+
+Reference
+===========
+
+.. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming,
+ https://www.amd.com/system/files/TechDocs/24593.pdf
+
+.. [2] Advanced Configuration and Power Interface Specification,
+ https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf
+
+.. [3] Processor Programming Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors
+ https://www.amd.com/system/files/TechDocs/56569-A1-PUB.zip
+
+.. [4] Linux Kernel Selftests,
+ https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 7eca9026a9ed..738d7b4dc33a 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,13 +1,16 @@
-.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
=======================
CPU Performance Scaling
=======================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
The Concept of CPU Performance Scaling
======================================
@@ -88,16 +91,16 @@ control the P-state of multiple CPUs at the same time and writing to it affects
all of those CPUs simultaneously.
Sets of CPUs sharing hardware P-state control interfaces are represented by
-``CPUFreq`` as |struct cpufreq_policy| objects. For consistency,
-|struct cpufreq_policy| is also used when there is only one CPU in the given
+``CPUFreq`` as struct cpufreq_policy objects. For consistency,
+struct cpufreq_policy is also used when there is only one CPU in the given
set.
-The ``CPUFreq`` core maintains a pointer to a |struct cpufreq_policy| object for
+The ``CPUFreq`` core maintains a pointer to a struct cpufreq_policy object for
every CPU in the system, including CPUs that are currently offline. If multiple
CPUs share the same hardware P-state control interface, all of the pointers
-corresponding to them point to the same |struct cpufreq_policy| object.
+corresponding to them point to the same struct cpufreq_policy object.
-``CPUFreq`` uses |struct cpufreq_policy| as its basic data type and the design
+``CPUFreq`` uses struct cpufreq_policy as its basic data type and the design
of its user space interface is based on the policy concept.
@@ -143,9 +146,9 @@ CPUs in it.
The next major initialization step for a new policy object is to attach a
scaling governor to it (to begin with, that is the default scaling governor
-determined by the kernel configuration, but it may be changed later
-via ``sysfs``). First, a pointer to the new policy object is passed to the
-governor's ``->init()`` callback which is expected to initialize all of the
+determined by the kernel command line or configuration, but it may be changed
+later via ``sysfs``). First, a pointer to the new policy object is passed to
+the governor's ``->init()`` callback which is expected to initialize all of the
data structures necessary to handle the given policy and, possibly, to add
a governor ``sysfs`` interface to it. Next, the governor is started by
invoking its ``->start()`` callback.
@@ -228,7 +231,7 @@ are the following:
present).
The existence of the limit may be a result of some (often unintentional)
- BIOS settings, restrictions coming from a service processor or another
+ BIOS settings, restrictions coming from a service processor or other
BIOS/HW-based mechanisms.
This does not cover ACPI thermal limitations which can be discovered
@@ -245,6 +248,20 @@ are the following:
If that frequency cannot be determined, this attribute should not
be present.
+``cpuinfo_avg_freq``
+ An average frequency (in KHz) of all CPUs belonging to a given policy,
+ derived from a hardware provided feedback and reported on a time frame
+ spanning at most few milliseconds.
+
+ This is expected to be based on the frequency the hardware actually runs
+ at and, as such, might require specialised hardware support (such as AMU
+ extension on ARM). If one cannot be determined, this attribute should
+ not be present.
+
+ Note that failed attempt to retrieve current frequency for a given
+ CPU(s) will result in an appropriate error, i.e.: EAGAIN for CPU that
+ remains idle (raised on ARM).
+
``cpuinfo_max_freq``
Maximum possible operating frequency the CPUs belonging to this policy
can run at (in kHz).
@@ -257,13 +274,13 @@ are the following:
The time it takes to switch the CPUs belonging to this policy from one
P-state to another, in nanoseconds.
- If unknown or if known to be so high that the scaling driver does not
- work with the `ondemand`_ governor, -1 (:c:macro:`CPUFREQ_ETERNAL`)
- will be returned by reads from this attribute.
-
``related_cpus``
List of all (online and offline) CPUs belonging to this policy.
+``scaling_available_frequencies``
+ List of available frequencies of the CPUs belonging to this policy
+ (in kHz).
+
``scaling_available_governors``
List of ``CPUFreq`` scaling governors present in the kernel that can
be attached to this policy or (if the |intel_pstate| scaling driver is
@@ -286,7 +303,8 @@ are the following:
Some architectures (e.g. ``x86``) may attempt to provide information
more precisely reflecting the current CPU frequency through this
attribute, but that still may not be the exact current CPU frequency as
- seen by the hardware at the moment.
+ seen by the hardware at the moment. This behavior though, is only
+ available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option.
``scaling_driver``
The scaling driver currently in use.
@@ -376,7 +394,9 @@ policy limits change after that.
This governor does not do anything by itself. Instead, it allows user space
to set the CPU frequency for the policy it is attached to by writing to the
-``scaling_setspeed`` attribute of that policy.
+``scaling_setspeed`` attribute of that policy. Though the intention may be to
+set an exact frequency for the policy, the actual frequency may vary depending
+on hardware coordination, thermal and power limits, and other factors.
``schedutil``
-------------
@@ -396,8 +416,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to
the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn,
if it is invoked by the CFS scheduling class, the governor will use the
Per-Entity Load Tracking (PELT) metric for the root control group of the
-given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_
-LWN.net article for a description of the PELT mechanism). Then, the new
+given CPU as the CPU utilization estimate (see the *Per-entity load tracking*
+LWN.net article [1]_ for a description of the PELT mechanism). Then, the new
CPU frequency to apply is computed in accordance with the formula
f = 1.25 * ``f_0`` * ``util`` / ``max``
@@ -418,8 +438,8 @@ This governor exposes only one tunable:
``rate_limit_us``
Minimum time (in microseconds) that has to pass between two consecutive
- runs of governor computations (default: 1000 times the scaling driver's
- transition latency).
+ runs of governor computations (default: 1.5 times the scaling driver's
+ transition latency or the maximum 2ms).
The purpose of this tunable is to reduce the scheduler context overhead
of the governor which might be excessive without it.
@@ -467,17 +487,17 @@ This governor exposes the following tunables:
This is how often the governor's worker routine should run, in
microseconds.
- Typically, it is set to values of the order of 10000 (10 ms). Its
- default value is equal to the value of ``cpuinfo_transition_latency``
- for each policy this governor is attached to (but since the unit here
- is greater by 1000, this means that the time represented by
- ``sampling_rate`` is 1000 times greater than the transition latency by
- default).
+ Typically, it is set to values of the order of 2000 (2 ms). Its
+ default value is to add a 50% breathing room
+ to ``cpuinfo_transition_latency`` on each policy this governor is
+ attached to. The minimum is typically the length of two scheduler
+ ticks.
If this tunable is per-policy, the following shell command sets the time
- represented by it to be 750 times as high as the transition latency::
+ represented by it to be 1.5 times as high as the transition latency
+ (the default)::
- # echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
+ # echo `$(($(cat cpuinfo_transition_latency) * 3 / 2))` > ondemand/sampling_rate
``up_threshold``
If the estimated CPU load is above this value (in percent), the governor
@@ -698,4 +718,8 @@ hardware feature (e.g. all Intel ones), even if the
:c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set.
-.. _Per-entity load tracking: https://lwn.net/Articles/531853/
+References
+==========
+
+.. [1] Jonathan Corbet, *Per-entity load tracking*,
+ https://lwn.net/Articles/531853/
diff --git a/Documentation/admin-guide/pm/cpufreq_drivers.rst b/Documentation/admin-guide/pm/cpufreq_drivers.rst
new file mode 100644
index 000000000000..9a134ae65803
--- /dev/null
+++ b/Documentation/admin-guide/pm/cpufreq_drivers.rst
@@ -0,0 +1,274 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================================
+Legacy Documentation of CPU Performance Scaling Drivers
+=======================================================
+
+Included below are historic documents describing assorted
+:doc:`CPU performance scaling <cpufreq>` drivers. They are reproduced verbatim,
+with the original white space formatting and indentation preserved, except for
+the added leading space character in every line of text.
+
+
+AMD PowerNow! Drivers
+=====================
+
+::
+
+ PowerNow! and Cool'n'Quiet are AMD names for frequency
+ management capabilities in AMD processors. As the hardware
+ implementation changes in new generations of the processors,
+ there is a different cpu-freq driver for each generation.
+
+ Note that the driver's will not load on the "wrong" hardware,
+ so it is safe to try each driver in turn when in doubt as to
+ which is the correct driver.
+
+ Note that the functionality to change frequency (and voltage)
+ is not available in all processors. The drivers will refuse
+ to load on processors without this capability. The capability
+ is detected with the cpuid instruction.
+
+ The drivers use BIOS supplied tables to obtain frequency and
+ voltage information appropriate for a particular platform.
+ Frequency transitions will be unavailable if the BIOS does
+ not supply these tables.
+
+ 6th Generation: powernow-k6
+
+ 7th Generation: powernow-k7: Athlon, Duron, Geode.
+
+ 8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron.
+ Documentation on this functionality in 8th generation processors
+ is available in the "BIOS and Kernel Developer's Guide", publication
+ 26094, in chapter 9, available for download from www.amd.com.
+
+ BIOS supplied data, for powernow-k7 and for powernow-k8, may be
+ from either the PSB table or from ACPI objects. The ACPI support
+ is only available if the kernel config sets CONFIG_ACPI_PROCESSOR.
+ The powernow-k8 driver will attempt to use ACPI if so configured,
+ and fall back to PST if that fails.
+ The powernow-k7 driver will try to use the PSB support first, and
+ fall back to ACPI if the PSB support fails. A module parameter,
+ acpi_force, is provided to force ACPI support to be used instead
+ of PSB support.
+
+
+``cpufreq-nforce2``
+===================
+
+::
+
+ The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms.
+
+ This works better than on other platforms, because the FSB of the CPU
+ can be controlled independently from the PCI/AGP clock.
+
+ The module has two options:
+
+ fid: multiplier * 10 (for example 8.5 = 85)
+ min_fsb: minimum FSB
+
+ If not set, fid is calculated from the current CPU speed and the FSB.
+ min_fsb defaults to FSB at boot time - 50 MHz.
+
+ IMPORTANT: The available range is limited downwards!
+ Also the minimum available FSB can differ, for systems
+ booting with 200 MHz, 150 should always work.
+
+
+``pcc-cpufreq``
+===============
+
+::
+
+ /*
+ * pcc-cpufreq.txt - PCC interface documentation
+ *
+ * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+ */
+
+
+ Processor Clocking Control Driver
+ ---------------------------------
+
+ Contents:
+ ---------
+ 1. Introduction
+ 1.1 PCC interface
+ 1.1.1 Get Average Frequency
+ 1.1.2 Set Desired Frequency
+ 1.2 Platforms affected
+ 2. Driver and /sys details
+ 2.1 scaling_available_frequencies
+ 2.2 cpuinfo_transition_latency
+ 2.3 cpuinfo_cur_freq
+ 2.4 related_cpus
+ 3. Caveats
+
+ 1. Introduction:
+ ----------------
+ Processor Clocking Control (PCC) is an interface between the platform
+ firmware and OSPM. It is a mechanism for coordinating processor
+ performance (ie: frequency) between the platform firmware and the OS.
+
+ The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
+ interface.
+
+ OS utilizes the PCC interface to inform platform firmware what frequency the
+ OS wants for a logical processor. The platform firmware attempts to achieve
+ the requested frequency. If the request for the target frequency could not be
+ satisfied by platform firmware, then it usually means that power budget
+ conditions are in place, and "power capping" is taking place.
+
+ 1.1 PCC interface:
+ ------------------
+ The complete PCC specification is available here:
+ https://acpica.org/sites/acpica/files/Processor-Clocking-Control-v1p0.pdf
+
+ PCC relies on a shared memory region that provides a channel for communication
+ between the OS and platform firmware. PCC also implements a "doorbell" that
+ is used by the OS to inform the platform firmware that a command has been
+ sent.
+
+ The ACPI PCCH() method is used to discover the location of the PCC shared
+ memory region. The shared memory region header contains the "command" and
+ "status" interface. PCCH() also contains details on how to access the platform
+ doorbell.
+
+ The following commands are supported by the PCC interface:
+ * Get Average Frequency
+ * Set Desired Frequency
+
+ The ACPI PCCP() method is implemented for each logical processor and is
+ used to discover the offsets for the input and output buffers in the shared
+ memory region.
+
+ When PCC mode is enabled, the platform will not expose processor performance
+ or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
+ the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
+ AMD) will not load.
+
+ However, OSPM remains in control of policy. The governor (eg: "ondemand")
+ computes the required performance for each processor based on server workload.
+ The PCC driver fills in the command interface, and the input buffer and
+ communicates the request to the platform firmware. The platform firmware is
+ responsible for delivering the requested performance.
+
+ Each PCC command is "global" in scope and can affect all the logical CPUs in
+ the system. Therefore, PCC is capable of performing "group" updates. With PCC
+ the OS is capable of getting/setting the frequency of all the logical CPUs in
+ the system with a single call to the BIOS.
+
+ 1.1.1 Get Average Frequency:
+ ----------------------------
+ This command is used by the OSPM to query the running frequency of the
+ processor since the last time this command was completed. The output buffer
+ indicates the average unhalted frequency of the logical processor expressed as
+ a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
+ also signifies if the CPU frequency is limited by a power budget condition.
+
+ 1.1.2 Set Desired Frequency:
+ ----------------------------
+ This command is used by the OSPM to communicate to the platform firmware the
+ desired frequency for a logical processor. The output buffer is currently
+ ignored by OSPM. The next invocation of "Get Average Frequency" will inform
+ OSPM if the desired frequency was achieved or not.
+
+ 1.2 Platforms affected:
+ -----------------------
+ The PCC driver will load on any system where the platform firmware:
+ * supports the PCC interface, and the associated PCCH() and PCCP() methods
+ * assumes responsibility for managing the hardware clocking controls in order
+ to deliver the requested processor performance
+
+ Currently, certain HP ProLiant platforms implement the PCC interface. On those
+ platforms PCC is the "default" choice.
+
+ However, it is possible to disable this interface via a BIOS setting. In
+ such an instance, as is also the case on platforms where the PCC interface
+ is not implemented, the PCC driver will fail to load silently.
+
+ 2. Driver and /sys details:
+ ---------------------------
+ When the driver loads, it merely prints the lowest and the highest CPU
+ frequencies supported by the platform firmware.
+
+ The PCC driver loads with a message such as:
+ pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
+ MHz
+
+ This means that the OPSM can request the CPU to run at any frequency in
+ between the limits (1600 MHz, and 2933 MHz) specified in the message.
+
+ Internally, there is no need for the driver to convert the "target" frequency
+ to a corresponding P-state.
+
+ The VERSION number for the driver will be of the format v.xy.ab.
+ eg: 1.00.02
+ ----- --
+ | |
+ | -- this will increase with bug fixes/enhancements to the driver
+ |-- this is the version of the PCC specification the driver adheres to
+
+
+ The following is a brief discussion on some of the fields exported via the
+ /sys filesystem and how their values are affected by the PCC driver:
+
+ 2.1 scaling_available_frequencies:
+ ----------------------------------
+ scaling_available_frequencies is not created in /sys. No intermediate
+ frequencies need to be listed because the BIOS will try to achieve any
+ frequency, within limits, requested by the governor. A frequency does not have
+ to be strictly associated with a P-state.
+
+ 2.2 cpuinfo_transition_latency:
+ -------------------------------
+ The cpuinfo_transition_latency field is 0. The PCC specification does
+ not include a field to expose this value currently.
+
+ 2.3 cpuinfo_cur_freq:
+ ---------------------
+ A) Often cpuinfo_cur_freq will show a value different than what is declared
+ in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
+ This is due to "turbo boost" available on recent Intel processors. If certain
+ conditions are met the BIOS can achieve a slightly higher speed than requested
+ by OSPM. An example:
+
+ scaling_cur_freq : 2933000
+ cpuinfo_cur_freq : 3196000
+
+ B) There is a round-off error associated with the cpuinfo_cur_freq value.
+ Since the driver obtains the current frequency as a "percentage" (%) of the
+ nominal frequency from the BIOS, sometimes, the values displayed by
+ scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
+
+ scaling_cur_freq : 1600000
+ cpuinfo_cur_freq : 1583000
+
+ In this example, the nominal frequency is 2933 MHz. The driver obtains the
+ current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
+
+ 54% of 2933 MHz = 1583 MHz
+
+ Nominal frequency is the maximum frequency of the processor, and it usually
+ corresponds to the frequency of the P0 P-state.
+
+ 2.4 related_cpus:
+ -----------------
+ The related_cpus field is identical to affected_cpus.
+
+ affected_cpus : 4
+ related_cpus : 4
+
+ Currently, the PCC driver does not evaluate _PSD. The platforms that support
+ PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
+ to ensure that the same frequency is requested of all dependent CPUs.
+
+ 3. Caveats:
+ -----------
+ The "cpufreq_stats" module in its present form cannot be loaded and
+ expected to work with the PCC driver. Since the "cpufreq_stats" module
+ provides information wrt each P-state, it is not applicable to the PCC driver.
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
new file mode 100644
index 000000000000..0c090b076224
--- /dev/null
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -0,0 +1,657 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+.. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
+.. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
+
+========================
+CPU Idle Time Management
+========================
+
+:Copyright: |copy| 2018 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+Concepts
+========
+
+Modern processors are generally able to enter states in which the execution of
+a program is suspended and instructions belonging to it are not fetched from
+memory or executed. Those states are the *idle* states of the processor.
+
+Since part of the processor hardware is not used in idle states, entering them
+generally allows power drawn by the processor to be reduced and, in consequence,
+it is an opportunity to save energy.
+
+CPU idle time management is an energy-efficiency feature concerned about using
+the idle states of processors for this purpose.
+
+Logical CPUs
+------------
+
+CPU idle time management operates on CPUs as seen by the *CPU scheduler* (that
+is the part of the kernel responsible for the distribution of computational
+work in the system). In its view, CPUs are *logical* units. That is, they need
+not be separate physical entities and may just be interfaces appearing to
+software as individual single-core processors. In other words, a CPU is an
+entity which appears to be fetching instructions that belong to one sequence
+(program) from memory and executing them, but it need not work this way
+physically. Generally, three different cases can be consider here.
+
+First, if the whole processor can only follow one sequence of instructions (one
+program) at a time, it is a CPU. In that case, if the hardware is asked to
+enter an idle state, that applies to the processor as a whole.
+
+Second, if the processor is multi-core, each core in it is able to follow at
+least one program at a time. The cores need not be entirely independent of each
+other (for example, they may share caches), but still most of the time they
+work physically in parallel with each other, so if each of them executes only
+one program, those programs run mostly independently of each other at the same
+time. The entire cores are CPUs in that case and if the hardware is asked to
+enter an idle state, that applies to the core that asked for it in the first
+place, but it also may apply to a larger unit (say a "package" or a "cluster")
+that the core belongs to (in fact, it may apply to an entire hierarchy of larger
+units containing the core). Namely, if all of the cores in the larger unit
+except for one have been put into idle states at the "core level" and the
+remaining core asks the processor to enter an idle state, that may trigger it
+to put the whole larger unit into an idle state which also will affect the
+other cores in that unit.
+
+Finally, each core in a multi-core processor may be able to follow more than one
+program in the same time frame (that is, each core may be able to fetch
+instructions from multiple locations in memory and execute them in the same time
+frame, but not necessarily entirely in parallel with each other). In that case
+the cores present themselves to software as "bundles" each consisting of
+multiple individual single-core "processors", referred to as *hardware threads*
+(or hyper-threads specifically on Intel hardware), that each can follow one
+sequence of instructions. Then, the hardware threads are CPUs from the CPU idle
+time management perspective and if the processor is asked to enter an idle state
+by one of them, the hardware thread (or CPU) that asked for it is stopped, but
+nothing more happens, unless all of the other hardware threads within the same
+core also have asked the processor to enter an idle state. In that situation,
+the core may be put into an idle state individually or a larger unit containing
+it may be put into an idle state as a whole (if the other cores within the
+larger unit are in idle states already).
+
+Idle CPUs
+---------
+
+Logical CPUs, simply referred to as "CPUs" in what follows, are regarded as
+*idle* by the Linux kernel when there are no tasks to run on them except for the
+special "idle" task.
+
+Tasks are the CPU scheduler's representation of work. Each task consists of a
+sequence of instructions to execute, or code, data to be manipulated while
+running that code, and some context information that needs to be loaded into the
+processor every time the task's code is run by a CPU. The CPU scheduler
+distributes work by assigning tasks to run to the CPUs present in the system.
+
+Tasks can be in various states. In particular, they are *runnable* if there are
+no specific conditions preventing their code from being run by a CPU as long as
+there is a CPU available for that (for example, they are not waiting for any
+events to occur or similar). When a task becomes runnable, the CPU scheduler
+assigns it to one of the available CPUs to run and if there are no more runnable
+tasks assigned to it, the CPU will load the given task's context and run its
+code (from the instruction following the last one executed so far, possibly by
+another CPU). [If there are multiple runnable tasks assigned to one CPU
+simultaneously, they will be subject to prioritization and time sharing in order
+to allow them to make some progress over time.]
+
+The special "idle" task becomes runnable if there are no other runnable tasks
+assigned to the given CPU and the CPU is then regarded as idle. In other words,
+in Linux idle CPUs run the code of the "idle" task called *the idle loop*. That
+code may cause the processor to be put into one of its idle states, if they are
+supported, in order to save energy, but if the processor does not support any
+idle states, or there is not enough time to spend in an idle state before the
+next wakeup event, or there are strict latency constraints preventing any of the
+available idle states from being used, the CPU will simply execute more or less
+useless instructions in a loop until it is assigned a new task to run.
+
+
+.. _idle-loop:
+
+The Idle Loop
+=============
+
+The idle loop code takes two major steps in every iteration of it. First, it
+calls into a code module referred to as the *governor* that belongs to the CPU
+idle time management subsystem called ``CPUIdle`` to select an idle state for
+the CPU to ask the hardware to enter. Second, it invokes another code module
+from the ``CPUIdle`` subsystem, called the *driver*, to actually ask the
+processor hardware to enter the idle state selected by the governor.
+
+The role of the governor is to find an idle state most suitable for the
+conditions at hand. For this purpose, idle states that the hardware can be
+asked to enter by logical CPUs are represented in an abstract way independent of
+the platform or the processor architecture and organized in a one-dimensional
+(linear) array. That array has to be prepared and supplied by the ``CPUIdle``
+driver matching the platform the kernel is running on at the initialization
+time. This allows ``CPUIdle`` governors to be independent of the underlying
+hardware and to work with any platforms that the Linux kernel can run on.
+
+Each idle state present in that array is characterized by two parameters to be
+taken into account by the governor, the *target residency* and the (worst-case)
+*exit latency*. The target residency is the minimum time the hardware must
+spend in the given state, including the time needed to enter it (which may be
+substantial), in order to save more energy than it would save by entering one of
+the shallower idle states instead. [The "depth" of an idle state roughly
+corresponds to the power drawn by the processor in that state.] The exit
+latency, in turn, is the maximum time it will take a CPU asking the processor
+hardware to enter an idle state to start executing the first instruction after a
+wakeup from that state. Note that in general the exit latency also must cover
+the time needed to enter the given state in case the wakeup occurs when the
+hardware is entering it and it must be entered completely to be exited in an
+ordered manner.
+
+There are two types of information that can influence the governor's decisions.
+First of all, the governor knows the time until the closest timer event. That
+time is known exactly, because the kernel programs timers and it knows exactly
+when they will trigger, and it is the maximum time the hardware that the given
+CPU depends on can spend in an idle state, including the time necessary to enter
+and exit it. However, the CPU may be woken up by a non-timer event at any time
+(in particular, before the closest timer triggers) and it generally is not known
+when that may happen. The governor can only see how much time the CPU actually
+was idle after it has been woken up (that time will be referred to as the *idle
+duration* from now on) and it can use that information somehow along with the
+time until the closest timer to estimate the idle duration in future. How the
+governor uses that information depends on what algorithm is implemented by it
+and that is the primary reason for having more than one governor in the
+``CPUIdle`` subsystem.
+
+There are four ``CPUIdle`` governors available, ``menu``, `TEO <teo-gov_>`_,
+``ladder`` and ``haltpoll``. Which of them is used by default depends on the
+configuration of the kernel and in particular on whether or not the scheduler
+tick can be `stopped by the idle loop <idle-cpus-and-tick_>`_. Available
+governors can be read from the :file:`available_governors`, and the governor
+can be changed at runtime. The name of the ``CPUIdle`` governor currently
+used by the kernel can be read from the :file:`current_governor_ro` or
+:file:`current_governor` file under :file:`/sys/devices/system/cpu/cpuidle/`
+in ``sysfs``.
+
+Which ``CPUIdle`` driver is used, on the other hand, usually depends on the
+platform the kernel is running on, but there are platforms with more than one
+matching driver. For example, there are two drivers that can work with the
+majority of Intel platforms, ``intel_idle`` and ``acpi_idle``, one with
+hardcoded idle states information and the other able to read that information
+from the system's ACPI tables, respectively. Still, even in those cases, the
+driver chosen at the system initialization time cannot be replaced later, so the
+decision on which one of them to use has to be made early (on Intel platforms
+the ``acpi_idle`` driver will be used if ``intel_idle`` is disabled for some
+reason or if it does not recognize the processor). The name of the ``CPUIdle``
+driver currently used by the kernel can be read from the :file:`current_driver`
+file under :file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``.
+
+
+.. _idle-cpus-and-tick:
+
+Idle CPUs and The Scheduler Tick
+================================
+
+The scheduler tick is a timer that triggers periodically in order to implement
+the time sharing strategy of the CPU scheduler. Of course, if there are
+multiple runnable tasks assigned to one CPU at the same time, the only way to
+allow them to make reasonable progress in a given time frame is to make them
+share the available CPU time. Namely, in rough approximation, each task is
+given a slice of the CPU time to run its code, subject to the scheduling class,
+prioritization and so on and when that time slice is used up, the CPU should be
+switched over to running (the code of) another task. The currently running task
+may not want to give the CPU away voluntarily, however, and the scheduler tick
+is there to make the switch happen regardless. That is not the only role of the
+tick, but it is the primary reason for using it.
+
+The scheduler tick is problematic from the CPU idle time management perspective,
+because it triggers periodically and relatively often (depending on the kernel
+configuration, the length of the tick period is between 1 ms and 10 ms).
+Thus, if the tick is allowed to trigger on idle CPUs, it will not make sense
+for them to ask the hardware to enter idle states with target residencies above
+the tick period length. Moreover, in that case the idle duration of any CPU
+will never exceed the tick period length and the energy used for entering and
+exiting idle states due to the tick wakeups on idle CPUs will be wasted.
+
+Fortunately, it is not really necessary to allow the tick to trigger on idle
+CPUs, because (by definition) they have no tasks to run except for the special
+"idle" one. In other words, from the CPU scheduler perspective, the only user
+of the CPU time on them is the idle loop. Since the time of an idle CPU need
+not be shared between multiple runnable tasks, the primary reason for using the
+tick goes away if the given CPU is idle. Consequently, it is possible to stop
+the scheduler tick entirely on idle CPUs in principle, even though that may not
+always be worth the effort.
+
+Whether or not it makes sense to stop the scheduler tick in the idle loop
+depends on what is expected by the governor. First, if there is another
+(non-tick) timer due to trigger within the tick range, stopping the tick clearly
+would be a waste of time, even though the timer hardware may not need to be
+reprogrammed in that case. Second, if the governor is expecting a non-timer
+wakeup within the tick range, stopping the tick is not necessary and it may even
+be harmful. Namely, in that case the governor will select an idle state with
+the target residency within the time until the expected wakeup, so that state is
+going to be relatively shallow. The governor really cannot select a deep idle
+state then, as that would contradict its own expectation of a wakeup in short
+order. Now, if the wakeup really occurs shortly, stopping the tick would be a
+waste of time and in this case the timer hardware would need to be reprogrammed,
+which is expensive. On the other hand, if the tick is stopped and the wakeup
+does not occur any time soon, the hardware may spend indefinite amount of time
+in the shallow idle state selected by the governor, which will be a waste of
+energy. Hence, if the governor is expecting a wakeup of any kind within the
+tick range, it is better to allow the tick trigger. Otherwise, however, the
+governor will select a relatively deep idle state, so the tick should be stopped
+so that it does not wake up the CPU too early.
+
+In any case, the governor knows what it is expecting and the decision on whether
+or not to stop the scheduler tick belongs to it. Still, if the tick has been
+stopped already (in one of the previous iterations of the loop), it is better
+to leave it as is and the governor needs to take that into account.
+
+The kernel can be configured to disable stopping the scheduler tick in the idle
+loop altogether. That can be done through the build-time configuration of it
+(by unsetting the ``CONFIG_NO_HZ_IDLE`` configuration option) or by passing
+``nohz=off`` to it in the command line. In both cases, as the stopping of the
+scheduler tick is disabled, the governor's decisions regarding it are simply
+ignored by the idle loop code and the tick is never stopped.
+
+The systems that run kernels configured to allow the scheduler tick to be
+stopped on idle CPUs are referred to as *tickless* systems and they are
+generally regarded as more energy-efficient than the systems running kernels in
+which the tick cannot be stopped. If the given system is tickless, it will use
+the ``menu`` governor by default and if it is not tickless, the default
+``CPUIdle`` governor on it will be ``ladder``.
+
+
+.. _menu-gov:
+
+The ``menu`` Governor
+=====================
+
+The ``menu`` governor is the default ``CPUIdle`` governor for tickless systems.
+It is quite complex, but the basic principle of its design is straightforward.
+Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
+the CPU will ask the processor hardware to enter), it attempts to predict the
+idle duration and uses the predicted value for idle state selection.
+
+It first uses a simple pattern recognition algorithm to obtain a preliminary
+idle duration prediction. Namely, it saves the last 8 observed idle duration
+values and, when predicting the idle duration next time, it computes the average
+and variance of them. If the variance is small (smaller than 400 square
+milliseconds) or it is small relative to the average (the average is greater
+that 6 times the standard deviation), the average is regarded as the "typical
+interval" value. Otherwise, either the longest or the shortest (depending on
+which one is farther from the average) of the saved observed idle duration
+values is discarded and the computation is repeated for the remaining ones.
+
+Again, if the variance of them is small (in the above sense), the average is
+taken as the "typical interval" value and so on, until either the "typical
+interval" is determined or too many data points are disregarded. In the latter
+case, if the size of the set of data points still under consideration is
+sufficiently large, the next idle duration is not likely to be above the largest
+idle duration value still in that set, so that value is taken as the predicted
+next idle duration. Finally, if the set of data points still under
+consideration is too small, no prediction is made.
+
+If the preliminary prediction of the next idle duration computed this way is
+long enough, the governor obtains the time until the closest timer event with
+the assumption that the scheduler tick will be stopped. That time, referred to
+as the *sleep length* in what follows, is the upper bound on the time before the
+next CPU wakeup. It is used to determine the sleep length range, which in turn
+is needed to get the sleep length correction factor.
+
+The ``menu`` governor maintains an array containing several correction factor
+values that correspond to different sleep length ranges organized so that each
+range represented in the array is approximately 10 times wider than the previous
+one.
+
+The correction factor for the given sleep length range (determined before
+selecting the idle state for the CPU) is updated after the CPU has been woken
+up and the closer the sleep length is to the observed idle duration, the closer
+to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
+The sleep length is multiplied by the correction factor for the range that it
+falls into to obtain an approximation of the predicted idle duration that is
+compared to the "typical interval" determined previously and the minimum of
+the two is taken as the final idle duration prediction.
+
+If the "typical interval" value is small, which means that the CPU is likely
+to be woken up soon enough, the sleep length computation is skipped as it may
+be costly and the idle duration is simply predicted to equal the "typical
+interval" value.
+
+Now, the governor is ready to walk the list of idle states and choose one of
+them. For this purpose, it compares the target residency of each state with
+the predicted idle duration and the exit latency of it with the with the latency
+limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
+framework. It selects the state with the target residency closest to the predicted
+idle duration, but still below it, and exit latency that does not exceed the
+limit.
+
+In the final step the governor may still need to refine the idle state selection
+if it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_. That
+happens if the idle duration predicted by it is less than the tick period and
+the tick has not been stopped already (in a previous iteration of the idle
+loop). Then, the sleep length used in the previous computations may not reflect
+the real time until the closest timer event and if it really is greater than
+that time, the governor may need to select a shallower state with a suitable
+target residency.
+
+
+.. _teo-gov:
+
+The Timer Events Oriented (TEO) Governor
+========================================
+
+The timer events oriented (TEO) governor is an alternative ``CPUIdle`` governor
+for tickless systems. It follows the same basic strategy as the ``menu`` `one
+<menu-gov_>`_: it always tries to find the deepest idle state suitable for the
+given conditions. However, it applies a different approach to that problem.
+
+.. kernel-doc:: drivers/cpuidle/governors/teo.c
+ :doc: teo-description
+
+.. _idle-states-representation:
+
+Representation of Idle States
+=============================
+
+For the CPU idle time management purposes all of the physical idle states
+supported by the processor have to be represented as a one-dimensional array of
+|struct cpuidle_state| objects each allowing an individual (logical) CPU to ask
+the processor hardware to enter an idle state of certain properties. If there
+is a hierarchy of units in the processor, one |struct cpuidle_state| object can
+cover a combination of idle states supported by the units at different levels of
+the hierarchy. In that case, the `target residency and exit latency parameters
+of it <idle-loop_>`_, must reflect the properties of the idle state at the
+deepest level (i.e. the idle state of the unit containing all of the other
+units).
+
+For example, take a processor with two cores in a larger unit referred to as
+a "module" and suppose that asking the hardware to enter a specific idle state
+(say "X") at the "core" level by one core will trigger the module to try to
+enter a specific idle state of its own (say "MX") if the other core is in idle
+state "X" already. In other words, asking for idle state "X" at the "core"
+level gives the hardware a license to go as deep as to idle state "MX" at the
+"module" level, but there is no guarantee that this is going to happen (the core
+asking for idle state "X" may just end up in that state by itself instead).
+Then, the target residency of the |struct cpuidle_state| object representing
+idle state "X" must reflect the minimum time to spend in idle state "MX" of
+the module (including the time needed to enter it), because that is the minimum
+time the CPU needs to be idle to save any energy in case the hardware enters
+that state. Analogously, the exit latency parameter of that object must cover
+the exit time of idle state "MX" of the module (and usually its entry time too),
+because that is the maximum delay between a wakeup signal and the time the CPU
+will start to execute the first new instruction (assuming that both cores in the
+module will always be ready to execute instructions as soon as the module
+becomes operational as a whole).
+
+There are processors without direct coordination between different levels of the
+hierarchy of units inside them, however. In those cases asking for an idle
+state at the "core" level does not automatically affect the "module" level, for
+example, in any way and the ``CPUIdle`` driver is responsible for the entire
+handling of the hierarchy. Then, the definition of the idle state objects is
+entirely up to the driver, but still the physical properties of the idle state
+that the processor hardware finally goes into must always follow the parameters
+used by the governor for idle state selection (for instance, the actual exit
+latency of that idle state must not exceed the exit latency parameter of the
+idle state object selected by the governor).
+
+In addition to the target residency and exit latency idle state parameters
+discussed above, the objects representing idle states each contain a few other
+parameters describing the idle state and a pointer to the function to run in
+order to ask the hardware to enter that state. Also, for each
+|struct cpuidle_state| object, there is a corresponding
+:c:type:`struct cpuidle_state_usage <cpuidle_state_usage>` one containing usage
+statistics of the given idle state. That information is exposed by the kernel
+via ``sysfs``.
+
+For each CPU in the system, there is a :file:`/sys/devices/system/cpu/cpu<N>/cpuidle/`
+directory in ``sysfs``, where the number ``<N>`` is assigned to the given
+CPU at the initialization time. That directory contains a set of subdirectories
+called :file:`state0`, :file:`state1` and so on, up to the number of idle state
+objects defined for the given CPU minus one. Each of these directories
+corresponds to one idle state object and the larger the number in its name, the
+deeper the (effective) idle state represented by it. Each of them contains
+a number of files (attributes) representing the properties of the idle state
+object corresponding to it, as follows:
+
+``above``
+ Total number of times this idle state had been asked for, but the
+ observed idle duration was certainly too short to match its target
+ residency.
+
+``below``
+ Total number of times this idle state had been asked for, but certainly
+ a deeper idle state would have been a better match for the observed idle
+ duration.
+
+``desc``
+ Description of the idle state.
+
+``disable``
+ Whether or not this idle state is disabled.
+
+``default_status``
+ The default status of this state, "enabled" or "disabled".
+
+``latency``
+ Exit latency of the idle state in microseconds.
+
+``name``
+ Name of the idle state.
+
+``power``
+ Power drawn by hardware in this idle state in milliwatts (if specified,
+ 0 otherwise).
+
+``residency``
+ Target residency of the idle state in microseconds.
+
+``time``
+ Total time spent in this idle state by the given CPU (as measured by the
+ kernel) in microseconds.
+
+``usage``
+ Total number of times the hardware has been asked by the given CPU to
+ enter this idle state.
+
+``rejected``
+ Total number of times a request to enter this idle state on the given
+ CPU was rejected.
+
+The :file:`desc` and :file:`name` files both contain strings. The difference
+between them is that the name is expected to be more concise, while the
+description may be longer and it may contain white space or special characters.
+The other files listed above contain integer numbers.
+
+The :file:`disable` attribute is the only writeable one. If it contains 1, the
+given idle state is disabled for this particular CPU, which means that the
+governor will never select it for this particular CPU and the ``CPUIdle``
+driver will never ask the hardware to enter it for that CPU as a result.
+However, disabling an idle state for one CPU does not prevent it from being
+asked for by the other CPUs, so it must be disabled for all of them in order to
+never be asked for by any of them. [Note that, due to the way the ``ladder``
+governor is implemented, disabling an idle state prevents that governor from
+selecting any idle states deeper than the disabled one too.]
+
+If the :file:`disable` attribute contains 0, the given idle state is enabled for
+this particular CPU, but it still may be disabled for some or all of the other
+CPUs in the system at the same time. Writing 1 to it causes the idle state to
+be disabled for this particular CPU and writing 0 to it allows the governor to
+take it into consideration for the given CPU and the driver to ask for it,
+unless that state was disabled globally in the driver (in which case it cannot
+be used at all).
+
+The :file:`power` attribute is not defined very well, especially for idle state
+objects representing combinations of idle states at different levels of the
+hierarchy of units in the processor, and it generally is hard to obtain idle
+state power numbers for complex hardware, so :file:`power` often contains 0 (not
+available) and if it contains a nonzero number, that number may not be very
+accurate and it should not be relied on for anything meaningful.
+
+The number in the :file:`time` file generally may be greater than the total time
+really spent by the given CPU in the given idle state, because it is measured by
+the kernel and it may not cover the cases in which the hardware refused to enter
+this idle state and entered a shallower one instead of it (or even it did not
+enter any idle state at all). The kernel can only measure the time span between
+asking the hardware to enter an idle state and the subsequent wakeup of the CPU
+and it cannot say what really happened in the meantime at the hardware level.
+Moreover, if the idle state object in question represents a combination of idle
+states at different levels of the hierarchy of units in the processor,
+the kernel can never say how deep the hardware went down the hierarchy in any
+particular case. For these reasons, the only reliable way to find out how
+much time has been spent by the hardware in different idle states supported by
+it is to use idle state residency counters in the hardware, if available.
+
+Generally, an interrupt received when trying to enter an idle state causes the
+idle state entry request to be rejected, in which case the ``CPUIdle`` driver
+may return an error code to indicate that this was the case. The :file:`usage`
+and :file:`rejected` files report the number of times the given idle state
+was entered successfully or rejected, respectively.
+
+.. _cpu-pm-qos:
+
+Power Management Quality of Service for CPUs
+============================================
+
+The power management quality of service (PM QoS) framework in the Linux kernel
+allows kernel code and user space processes to set constraints on various
+energy-efficiency features of the kernel to prevent performance from dropping
+below a required level.
+
+CPU idle time management can be affected by PM QoS in two ways, through the
+global CPU latency limit and through the resume latency constraints for
+individual CPUs. Kernel code (e.g. device drivers) can set both of them with
+the help of special internal interfaces provided by the PM QoS framework. User
+space can modify the former by opening the :file:`cpu_dma_latency` special
+device file under :file:`/dev/` and writing a binary value (interpreted as a
+signed 32-bit integer) to it. In turn, the resume latency constraint for a CPU
+can be modified from user space by writing a string (representing a signed
+32-bit integer) to the :file:`power/pm_qos_resume_latency_us` file under
+:file:`/sys/devices/system/cpu/cpu<N>/` in ``sysfs``, where the CPU number
+``<N>`` is allocated at the system initialization time. Negative values
+will be rejected in both cases and, also in both cases, the written integer
+number will be interpreted as a requested PM QoS constraint in microseconds.
+
+The requested value is not automatically applied as a new constraint, however,
+as it may be less restrictive (greater in this particular case) than another
+constraint previously requested by someone else. For this reason, the PM QoS
+framework maintains a list of requests that have been made so far for the
+global CPU latency limit and for each individual CPU, aggregates them and
+applies the effective (minimum in this particular case) value as the new
+constraint.
+
+In fact, opening the :file:`cpu_dma_latency` special device file causes a new
+PM QoS request to be created and added to a global priority list of CPU latency
+limit requests and the file descriptor coming from the "open" operation
+represents that request. If that file descriptor is then used for writing, the
+number written to it will be associated with the PM QoS request represented by
+it as a new requested limit value. Next, the priority list mechanism will be
+used to determine the new effective value of the entire list of requests and
+that effective value will be set as a new CPU latency limit. Thus requesting a
+new limit value will only change the real limit if the effective "list" value is
+affected by it, which is the case if it is the minimum of the requested values
+in the list.
+
+The process holding a file descriptor obtained by opening the
+:file:`cpu_dma_latency` special device file controls the PM QoS request
+associated with that file descriptor, but it controls this particular PM QoS
+request only.
+
+Closing the :file:`cpu_dma_latency` special device file or, more precisely, the
+file descriptor obtained while opening it, causes the PM QoS request associated
+with that file descriptor to be removed from the global priority list of CPU
+latency limit requests and destroyed. If that happens, the priority list
+mechanism will be used again, to determine the new effective value for the whole
+list and that value will become the new limit.
+
+In turn, for each CPU there is one resume latency PM QoS request associated with
+the :file:`power/pm_qos_resume_latency_us` file under
+:file:`/sys/devices/system/cpu/cpu<N>/` in ``sysfs`` and writing to it causes
+this single PM QoS request to be updated regardless of which user space
+process does that. In other words, this PM QoS request is shared by the entire
+user space, so access to the file associated with it needs to be arbitrated
+to avoid confusion. [Arguably, the only legitimate use of this mechanism in
+practice is to pin a process to the CPU in question and let it use the
+``sysfs`` interface to control the resume latency constraint for it.] It is
+still only a request, however. It is an entry in a priority list used to
+determine the effective value to be set as the resume latency constraint for the
+CPU in question every time the list of requests is updated this way or another
+(there may be other requests coming from kernel code in that list).
+
+CPU idle time governors are expected to regard the minimum of the global
+(effective) CPU latency limit and the effective resume latency constraint for
+the given CPU as the upper limit for the exit latency of the idle states that
+they are allowed to select for that CPU. They should never select any idle
+states with exit latency beyond that limit.
+
+
+Idle States Control Via Kernel Command Line
+===========================================
+
+In addition to the ``sysfs`` interface allowing individual idle states to be
+`disabled for individual CPUs <idle-states-representation_>`_, there are kernel
+command line parameters affecting CPU idle time management.
+
+The ``cpuidle.off=1`` kernel command line option can be used to disable the
+CPU idle time management entirely. It does not prevent the idle loop from
+running on idle CPUs, but it prevents the CPU idle time governors and drivers
+from being invoked. If it is added to the kernel command line, the idle loop
+will ask the hardware to enter idle states on idle CPUs via the CPU architecture
+support code that is expected to provide a default mechanism for this purpose.
+That default mechanism usually is the least common denominator for all of the
+processors implementing the architecture (i.e. CPU instruction set) in question,
+however, so it is rather crude and not very energy-efficient. For this reason,
+it is not recommended for production use.
+
+The ``cpuidle.governor=`` kernel command line switch allows the ``CPUIdle``
+governor to use to be specified. It has to be appended with a string matching
+the name of an available governor (e.g. ``cpuidle.governor=menu``) and that
+governor will be used instead of the default one. It is possible to force
+the ``menu`` governor to be used on the systems that use the ``ladder`` governor
+by default this way, for example.
+
+The other kernel command line parameters controlling CPU idle time management
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
+
+The *x86* architecture support code recognizes three kernel command line
+options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
+and ``idle=nomwait``. The first two of them disable the ``acpi_idle`` and
+``intel_idle`` drivers altogether, which effectively causes the entire
+``CPUIdle`` subsystem to be disabled and makes the idle loop invoke the
+architecture support code to deal with idle CPUs. How it does that depends on
+which of the two parameters is added to the kernel command line. In the
+``idle=halt`` case, the architecture support code will use the ``HLT``
+instruction of the CPUs (which, as a rule, suspends the execution of the program
+and causes the hardware to attempt to enter the shallowest available idle state)
+for this purpose, and if ``idle=poll`` is used, idle CPUs will execute a
+more or less "lightweight" sequence of instructions in a tight loop. [Note
+that using ``idle=poll`` is somewhat drastic in many cases, as preventing idle
+CPUs from saving almost any energy at all may not be the only effect of it.
+For example, on Intel hardware it effectively prevents CPUs from using
+P-states (see |cpufreq|) that require any number of CPUs in a package to be
+idle, so it very well may hurt single-thread computations performance as well as
+energy-efficiency. Thus using it for performance reasons may not be a good idea
+at all.]
+
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
+
+In addition to the architecture-level kernel command line options affecting CPU
+idle time management, there are parameters affecting individual ``CPUIdle``
+drivers that can be passed to them via the kernel command line. Specifically,
+the ``intel_idle.max_cstate=<n>`` and ``processor.max_cstate=<n>`` parameters,
+where ``<n>`` is an idle state index also used in the name of the given
+state's directory in ``sysfs`` (see
+`Representation of Idle States <idle-states-representation_>`_), causes the
+``intel_idle`` and ``acpi_idle`` drivers, respectively, to discard all of the
+idle states deeper than idle state ``<n>``. In that case, they will never ask
+for any of those idle states or expose them to the governor. [The behavior of
+the two drivers is different for ``<n>`` equal to ``0``. Adding
+``intel_idle.max_cstate=0`` to the kernel command line disables the
+``intel_idle`` driver and allows ``acpi_idle`` to be used, whereas
+``processor.max_cstate=0`` is equivalent to ``processor.max_cstate=1``.
+Also, the ``acpi_idle`` driver is part of the ``processor`` kernel module that
+can be loaded separately and ``max_cstate=<n>`` can be passed to it as a module
+parameter when it is loaded.]
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index 49237ac73442..39f8f9f81e7a 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
================
Power Management
================
diff --git a/Documentation/admin-guide/pm/intel-speed-select.rst b/Documentation/admin-guide/pm/intel-speed-select.rst
new file mode 100644
index 000000000000..a2bfb971654f
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel-speed-select.rst
@@ -0,0 +1,939 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Intel(R) Speed Select Technology User Guide
+============================================================
+
+The Intel(R) Speed Select Technology (Intel(R) SST) provides a powerful new
+collection of features that give more granular control over CPU performance.
+With Intel(R) SST, one server can be configured for power and performance for a
+variety of diverse workload requirements.
+
+Refer to the links below for an overview of the technology:
+
+- https://www.intel.com/content/www/us/en/architecture-and-technology/speed-select-technology-article.html
+- https://builders.intel.com/docs/networkbuilders/intel-speed-select-technology-base-frequency-enhancing-performance.pdf
+
+These capabilities are further enhanced in some of the newer generations of
+server platforms where these features can be enumerated and controlled
+dynamically without pre-configuring via BIOS setup options. This dynamic
+configuration is done via mailbox commands to the hardware. One way to enumerate
+and configure these features is by using the Intel Speed Select utility.
+
+This document explains how to use the Intel Speed Select tool to enumerate and
+control Intel(R) SST features. This document gives example commands and explains
+how these commands change the power and performance profile of the system under
+test. Using this tool as an example, customers can replicate the messaging
+implemented in the tool in their production software.
+
+intel-speed-select configuration tool
+======================================
+
+Most Linux distribution packages may include the "intel-speed-select" tool. If not,
+it can be built by downloading the Linux kernel tree from kernel.org. Once
+downloaded, the tool can be built without building the full kernel.
+
+From the kernel tree, run the following commands::
+
+# cd tools/power/x86/intel-speed-select/
+# make
+# make install
+
+Getting Help
+------------
+
+To get help with the tool, execute the command below::
+
+# intel-speed-select --help
+
+The top-level help describes arguments and features. Notice that there is a
+multi-level help structure in the tool. For example, to get help for the feature "perf-profile"::
+
+# intel-speed-select perf-profile --help
+
+To get help on a command, another level of help is provided. For example for the command info "info"::
+
+# intel-speed-select perf-profile info --help
+
+Summary of platform capability
+------------------------------
+To check the current platform and driver capabilities, execute::
+
+#intel-speed-select --info
+
+For example on a test system::
+
+ # intel-speed-select --info
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Platform: API version : 1
+ Platform: Driver version : 1
+ Platform: mbox supported : 1
+ Platform: mmio supported : 1
+ Intel(R) SST-PP (feature perf-profile) is supported
+ TDP level change control is unlocked, max level: 4
+ Intel(R) SST-TF (feature turbo-freq) is supported
+ Intel(R) SST-BF (feature base-freq) is not supported
+ Intel(R) SST-CP (feature core-power) is supported
+
+Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP)
+------------------------------------------------------------------------
+
+This feature allows configuration of a server dynamically based on workload
+performance requirements. This helps users during deployment as they do not have
+to choose a specific server configuration statically. This Intel(R) Speed Select
+Technology - Performance Profile (Intel(R) SST-PP) feature introduces a mechanism
+that allows multiple optimized performance profiles per system. Each profile
+defines a set of CPUs that need to be online and rest offline to sustain a
+guaranteed base frequency. Once the user issues a command to use a specific
+performance profile and meet CPU online/offline requirement, the user can expect
+a change in the base frequency dynamically. This feature is called
+"perf-profile" when using the Intel Speed Select tool.
+
+Number or performance levels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There can be multiple performance profiles on a system. To get the number of
+profiles, execute the command below::
+
+ # intel-speed-select perf-profile get-config-levels
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ get-config-levels:4
+ package-1
+ die-0
+ cpu-14
+ get-config-levels:4
+
+On this system under test, there are 4 performance profiles in addition to the
+base performance profile (which is performance level 0).
+
+Lock/Unlock status
+~~~~~~~~~~~~~~~~~~
+
+Even if there are multiple performance profiles, it is possible that they
+are locked. If they are locked, users cannot issue a command to change the
+performance state. It is possible that there is a BIOS setup to unlock or check
+with your system vendor.
+
+To check if the system is locked, execute the following command::
+
+ # intel-speed-select perf-profile get-lock-status
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ get-lock-status:0
+ package-1
+ die-0
+ cpu-14
+ get-lock-status:0
+
+In this case, lock status is 0, which means that the system is unlocked.
+
+Properties of a performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get properties of a specific performance level (For example for the level 0, below), execute the command below::
+
+ # intel-speed-select perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ perf-profile-level-0
+ cpu-count:28
+ enable-cpu-mask:000003ff,f0003fff
+ enable-cpu-list:0,1,2,3,4,5,6,7,8,9,10,11,12,13,28,29,30,31,32,33,34,35,36,37,38,39,40,41
+ thermal-design-power-ratio:26
+ base-frequency(MHz):2600
+ speed-select-turbo-freq:disabled
+ speed-select-base-freq:disabled
+ ...
+ ...
+
+Here -l option is used to specify a performance level.
+
+If the option -l is omitted, then this command will print information about all
+the performance levels. The above command is printing properties of the
+performance level 0.
+
+For this performance profile, the list of CPUs displayed by the
+"enable-cpu-mask/enable-cpu-list" at the max can be "online." When that
+condition is met, then base frequency of 2600 MHz can be maintained. To
+understand more, execute "intel-speed-select perf-profile info" for performance
+level 4::
+
+ # intel-speed-select perf-profile info -l 4
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ perf-profile-level-4
+ cpu-count:28
+ enable-cpu-mask:000000fa,f0000faf
+ enable-cpu-list:0,1,2,3,5,7,8,9,10,11,28,29,30,31,33,35,36,37,38,39
+ thermal-design-power-ratio:28
+ base-frequency(MHz):2800
+ speed-select-turbo-freq:disabled
+ speed-select-base-freq:unsupported
+ ...
+ ...
+
+There are fewer CPUs in the "enable-cpu-mask/enable-cpu-list". Consequently, if
+the user only keeps these CPUs online and the rest "offline," then the base
+frequency is increased to 2.8 GHz compared to 2.6 GHz at performance level 0.
+
+Get current performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the current performance level, execute::
+
+ # intel-speed-select perf-profile get-config-current-level
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ get-config-current_level:0
+
+First verify that the base_frequency displayed by the cpufreq sysfs is correct::
+
+ # cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency
+ 2600000
+
+This matches the base-frequency (MHz) field value displayed from the
+"perf-profile info" command for performance level 0(cpufreq frequency is in
+KHz).
+
+To check if the average frequency is equal to the base frequency for a 100% busy
+workload, disable turbo::
+
+# echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
+
+Then runs a busy workload on all CPUs, for example::
+
+#stress -c 64
+
+To verify the base frequency, run turbostat::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+
+ Package Core CPU Bzy_MHz
+ - - 2600
+ 0 0 0 2600
+ 0 1 1 2600
+ 0 2 2 2600
+ 0 3 3 2600
+ 0 4 4 2600
+ . . . .
+
+
+Changing performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To the change the performance level to 4, execute::
+
+ # intel-speed-select -d perf-profile set-config-level -l 4 -o
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ perf-profile
+ set_tdp_level:success
+
+In the command above, "-o" is optional. If it is specified, then it will also
+offline CPUs which are not present in the enable_cpu_mask for this performance
+level.
+
+Now if the base_frequency is checked::
+
+ #cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency
+ 2800000
+
+Which shows that the base frequency now increased from 2600 MHz at performance
+level 0 to 2800 MHz at performance level 4. As a result, any workload, which can
+use fewer CPUs, can see a boost of 200 MHz compared to performance level 0.
+
+Changing performance level via BMC Interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is possible to change SST-PP level using out of band (OOB) agent (Via some
+remote management console, through BMC "Baseboard Management Controller"
+interface). This mode is supported from the Sapphire Rapids processor
+generation. The kernel and tool change to support this mode is added to Linux
+kernel version 5.18. To enable this feature, kernel config
+"CONFIG_INTEL_HFI_THERMAL" is required. The minimum version of the tool
+is "v1.12" to support this feature, which is part of Linux kernel version 5.18.
+
+To support such configuration, this tool can be used as a daemon. Add
+a command line option --oob::
+
+ # intel-speed-select --oob
+ Intel(R) Speed Select Technology
+ Executing on CPU model:143[0x8f]
+ OOB mode is enabled and will run as daemon
+
+In this mode the tool will online/offline CPUs based on the new performance
+level.
+
+Check presence of other Intel(R) SST features
+---------------------------------------------
+
+Each of the performance profiles also specifies weather there is support of
+other two Intel(R) SST features (Intel(R) Speed Select Technology - Base Frequency
+(Intel(R) SST-BF) and Intel(R) Speed Select Technology - Turbo Frequency (Intel
+SST-TF)).
+
+For example, from the output of "perf-profile info" above, for level 0 and level
+4:
+
+For level 0::
+ speed-select-turbo-freq:disabled
+ speed-select-base-freq:disabled
+
+For level 4::
+ speed-select-turbo-freq:disabled
+ speed-select-base-freq:unsupported
+
+Given these results, the "speed-select-base-freq" (Intel(R) SST-BF) in level 4
+changed from "disabled" to "unsupported" compared to performance level 0.
+
+This means that at performance level 4, the "speed-select-base-freq" feature is
+not supported. However, at performance level 0, this feature is "supported", but
+currently "disabled", meaning the user has not activated this feature. Whereas
+"speed-select-turbo-freq" (Intel(R) SST-TF) is supported at both performance
+levels, but currently not activated by the user.
+
+The Intel(R) SST-BF and the Intel(R) SST-TF features are built on a foundation
+technology called Intel(R) Speed Select Technology - Core Power (Intel(R) SST-CP).
+The platform firmware enables this feature when Intel(R) SST-BF or Intel(R) SST-TF
+is supported on a platform.
+
+Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP)
+---------------------------------------------------------------
+
+Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP) is an interface that
+allows users to define per core priority. This defines a mechanism to distribute
+power among cores when there is a power constrained scenario. This defines a
+class of service (CLOS) configuration.
+
+The user can configure up to 4 class of service configurations. Each CLOS group
+configuration allows definitions of parameters, which affects how the frequency
+can be limited and power is distributed. Each CPU core can be tied to a class of
+service and hence an associated priority. The granularity is at core level not
+at per CPU level.
+
+Enable CLOS based prioritization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To use CLOS based prioritization feature, firmware must be informed to enable
+and use a priority type. There is a default per platform priority type, which
+can be changed with optional command line parameter.
+
+To enable and check the options, execute::
+
+ # intel-speed-select core-power enable --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Enable core-power for a package/die
+ Clos Enable: Specify priority type with [--priority|-p]
+ 0: Proportional, 1: Ordered
+
+There are two types of priority types:
+
+- Ordered
+
+Priority for ordered throttling is defined based on the index of the assigned
+CLOS group. Where CLOS0 gets highest priority (throttled last).
+
+Priority order is:
+CLOS0 > CLOS1 > CLOS2 > CLOS3.
+
+- Proportional
+
+When proportional priority is used, there is an additional parameter called
+frequency_weight, which can be specified per CLOS group. The goal of
+proportional priority is to provide each core with the requested min., then
+distribute all remaining (excess/deficit) budgets in proportion to a defined
+weight. This proportional priority can be configured using "core-power config"
+command.
+
+To enable with the platform default priority type, execute::
+
+ # intel-speed-select core-power enable
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ core-power
+ enable:success
+ package-1
+ die-0
+ cpu-6
+ core-power
+ enable:success
+
+The scope of this enable is per package or die scoped when a package contains
+multiple dies. To check if CLOS is enabled and get priority type, "core-power
+info" command can be used. For example to check the status of core-power feature
+on CPU 0, execute::
+
+ # intel-speed-select -c 0 core-power info
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ core-power
+ support-status:supported
+ enable-status:enabled
+ clos-enable-status:enabled
+ priority-type:proportional
+ package-1
+ die-0
+ cpu-24
+ core-power
+ support-status:supported
+ enable-status:enabled
+ clos-enable-status:enabled
+ priority-type:proportional
+
+Configuring CLOS groups
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Each CLOS group has its own attributes including min, max, freq_weight and
+desired. These parameters can be configured with "core-power config" command.
+Defaults will be used if user skips setting a parameter except clos id, which is
+mandatory. To check core-power config options, execute::
+
+ # intel-speed-select core-power config --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Set core-power configuration for one of the four clos ids
+ Specify targeted clos id with [--clos|-c]
+ Specify clos Proportional Priority [--weight|-w]
+ Specify clos min in MHz with [--min|-n]
+ Specify clos max in MHz with [--max|-m]
+
+For example::
+
+ # intel-speed-select core-power config -c 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ clos epp is not specified, default: 0
+ clos frequency weight is not specified, default: 0
+ clos min is not specified, default: 0 MHz
+ clos max is not specified, default: 25500 MHz
+ clos desired is not specified, default: 0
+ package-0
+ die-0
+ cpu-0
+ core-power
+ config:success
+ package-1
+ die-0
+ cpu-6
+ core-power
+ config:success
+
+The user has the option to change defaults. For example, the user can change the
+"min" and set the base frequency to always get guaranteed base frequency.
+
+Get the current CLOS configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To check the current configuration, "core-power get-config" can be used. For
+example, to get the configuration of CLOS 0::
+
+ # intel-speed-select core-power get-config -c 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ core-power
+ clos:0
+ epp:0
+ clos-proportional-priority:0
+ clos-min:0 MHz
+ clos-max:Max Turbo frequency
+ clos-desired:0 MHz
+ package-1
+ die-0
+ cpu-24
+ core-power
+ clos:0
+ epp:0
+ clos-proportional-priority:0
+ clos-min:0 MHz
+ clos-max:Max Turbo frequency
+ clos-desired:0 MHz
+
+Associating a CPU with a CLOS group
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To associate a CPU to a CLOS group "core-power assoc" command can be used::
+
+ # intel-speed-select core-power assoc --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Associate a clos id to a CPU
+ Specify targeted clos id with [--clos|-c]
+
+
+For example to associate CPU 10 to CLOS group 3, execute::
+
+ # intel-speed-select -c 10 core-power assoc -c 3
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-10
+ core-power
+ assoc:success
+
+Once a CPU is associated, its sibling CPUs are also associated to a CLOS group.
+Once associated, avoid changing Linux "cpufreq" subsystem scaling frequency
+limits.
+
+To check the existing association for a CPU, "core-power get-assoc" command can
+be used. For example, to get association of CPU 10, execute::
+
+ # intel-speed-select -c 10 core-power get-assoc
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-1
+ die-0
+ cpu-10
+ get-assoc
+ clos:3
+
+This shows that CPU 10 is part of a CLOS group 3.
+
+
+Disable CLOS based prioritization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To disable, execute::
+
+# intel-speed-select core-power disable
+
+Some features like Intel(R) SST-TF can only be enabled when CLOS based prioritization
+is enabled. For this reason, disabling while Intel(R) SST-TF is enabled can cause
+Intel(R) SST-TF to fail. This will cause the "disable" command to display an error
+if Intel(R) SST-TF is already enabled. In turn, to disable, the Intel(R) SST-TF
+feature must be disabled first.
+
+Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF)
+-------------------------------------------------------------------
+
+The Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF) feature lets
+the user control base frequency. If some critical workload threads demand
+constant high guaranteed performance, then this feature can be used to execute
+the thread at higher base frequency on specific sets of CPUs (high priority
+CPUs) at the cost of lower base frequency (low priority CPUs) on other CPUs.
+This feature does not require offline of the low priority CPUs.
+
+The support of Intel(R) SST-BF depends on the Intel(R) Speed Select Technology -
+Performance Profile (Intel(R) SST-PP) performance level configuration. It is
+possible that only certain performance levels support Intel(R) SST-BF. It is also
+possible that only base performance level (level = 0) has support of Intel
+SST-BF. Consequently, first select the desired performance level to enable this
+feature.
+
+In the system under test here, Intel(R) SST-BF is supported at the base
+performance level 0, but currently disabled. For example for the level 0::
+
+ # intel-speed-select -c 0 perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ perf-profile-level-0
+ ...
+
+ speed-select-base-freq:disabled
+ ...
+
+Before enabling Intel(R) SST-BF and measuring its impact on a workload
+performance, execute some workload and measure performance and get a baseline
+performance to compare against.
+
+Here the user wants more guaranteed performance. For this reason, it is likely
+that turbo is disabled. To disable turbo, execute::
+
+#echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
+
+Based on the output of the "intel-speed-select perf-profile info -l 0" base
+frequency of guaranteed frequency 2600 MHz.
+
+
+Measure baseline performance for comparison
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To compare, pick a multi-threaded workload where each thread can be scheduled on
+separate CPUs. "Hackbench pipe" test is a good example on how to improve
+performance using Intel(R) SST-BF.
+
+Below, the workload is measuring average scheduler wakeup latency, so a lower
+number means better performance::
+
+ # taskset -c 3,4 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+ Total time: 6.102 [sec]
+ 6.102445 usecs/op
+ 163868 ops/sec
+
+While running the above test, if we take turbostat output, it will show us that
+2 of the CPUs are busy and reaching max. frequency (which would be the base
+frequency as the turbo is disabled). The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package Core CPU Bzy_MHz
+ 0 0 0 1000
+ 0 1 1 1005
+ 0 2 2 1000
+ 0 3 3 2600
+ 0 4 4 2600
+ 0 5 5 1000
+ 0 6 6 1000
+ 0 7 7 1005
+ 0 8 8 1005
+ 0 9 9 1000
+ 0 10 10 1000
+ 0 11 11 995
+ 0 12 12 1000
+ 0 13 13 1000
+
+From the above turbostat output, both CPU 3 and 4 are very busy and reaching
+full guaranteed frequency of 2600 MHz.
+
+Intel(R) SST-BF Capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get capabilities of Intel(R) SST-BF for the current performance level 0,
+execute::
+
+ # intel-speed-select base-freq info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ speed-select-base-freq
+ high-priority-base-frequency(MHz):3000
+ high-priority-cpu-mask:00000216,00002160
+ high-priority-cpu-list:5,6,8,13,33,34,36,41
+ low-priority-base-frequency(MHz):2400
+ tjunction-temperature(C):125
+ thermal-design-power(W):205
+
+The above capabilities show that there are some CPUs on this system that can
+offer base frequency of 3000 MHz compared to the standard base frequency at this
+performance levels. Nevertheless, these CPUs are fixed, and they are presented
+via high-priority-cpu-list/high-priority-cpu-mask. But if this Intel(R) SST-BF
+feature is selected, the low priorities CPUs (which are not in
+high-priority-cpu-list) can only offer up to 2400 MHz. As a result, if this
+clipping of low priority CPUs is acceptable, then the user can enable Intel
+SST-BF feature particularly for the above "sched pipe" workload since only two
+CPUs are used, they can be scheduled on high priority CPUs and can get boost of
+400 MHz.
+
+Enable Intel(R) SST-BF
+~~~~~~~~~~~~~~~~~~~~~~
+
+To enable Intel(R) SST-BF feature, execute::
+
+ # intel-speed-select base-freq enable -a
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ base-freq
+ enable:success
+ package-1
+ die-0
+ cpu-14
+ base-freq
+ enable:success
+
+In this case, -a option is optional. This not only enables Intel(R) SST-BF, but it
+also adjusts the priority of cores using Intel(R) Speed Select Technology Core
+Power (Intel(R) SST-CP) features. This option sets the minimum performance of each
+Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP) class to
+maximum performance so that the hardware will give maximum performance possible
+for each CPU.
+
+If -a option is not used, then the following steps are required before enabling
+Intel(R) SST-BF:
+
+- Discover Intel(R) SST-BF and note low and high priority base frequency
+- Note the high priority CPU list
+- Enable CLOS using core-power feature set
+- Configure CLOS parameters. Use CLOS.min to set to minimum performance
+- Subscribe desired CPUs to CLOS groups
+
+With this configuration, if the same workload is executed by pinning the
+workload to high priority CPUs (CPU 5 and 6 in this case)::
+
+ #taskset -c 5,6 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+ Total time: 5.627 [sec]
+ 5.627922 usecs/op
+ 177685 ops/sec
+
+This way, by enabling Intel(R) SST-BF, the performance of this benchmark is
+improved (latency reduced) by 7.79%. From the turbostat output, it can be
+observed that the high priority CPUs reached 3000 MHz compared to 2600 MHz.
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package Core CPU Bzy_MHz
+ 0 0 0 2151
+ 0 1 1 2166
+ 0 2 2 2175
+ 0 3 3 2175
+ 0 4 4 2175
+ 0 5 5 3000
+ 0 6 6 3000
+ 0 7 7 2180
+ 0 8 8 2662
+ 0 9 9 2176
+ 0 10 10 2175
+ 0 11 11 2176
+ 0 12 12 2176
+ 0 13 13 2661
+
+Disable Intel(R) SST-BF
+~~~~~~~~~~~~~~~~~~~~~~~
+
+To disable the Intel(R) SST-BF feature, execute::
+
+# intel-speed-select base-freq disable -a
+
+
+Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF)
+--------------------------------------------------------------------
+
+This feature enables the ability to set different "All core turbo ratio limits"
+to cores based on the priority. By using this feature, some cores can be
+configured to get higher turbo frequency by designating them as high priority at
+the cost of lower or no turbo frequency on the low priority cores.
+
+For this reason, this feature is only useful when system is busy utilizing all
+CPUs, but the user wants some configurable option to get high performance on
+some CPUs.
+
+The support of Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF)
+depends on the Intel(R) Speed Select Technology - Performance Profile (Intel
+SST-PP) performance level configuration. It is possible that only a certain
+performance level supports Intel(R) SST-TF. It is also possible that only the base
+performance level (level = 0) has the support of Intel(R) SST-TF. Hence, first
+select the desired performance level to enable this feature.
+
+In the system under test here, Intel(R) SST-TF is supported at the base
+performance level 0, but currently disabled::
+
+ # intel-speed-select -c 0 perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ package-0
+ die-0
+ cpu-0
+ perf-profile-level-0
+ ...
+ ...
+ speed-select-turbo-freq:disabled
+ ...
+ ...
+
+
+To check if performance can be improved using Intel(R) SST-TF feature, get the turbo
+frequency properties with Intel(R) SST-TF enabled and compare to the base turbo
+capability of this system.
+
+Get Base turbo capability
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the base turbo capability of performance level 0, execute::
+
+ # intel-speed-select perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ perf-profile-level-0
+ ...
+ ...
+ turbo-ratio-limits-sse
+ bucket-0
+ core-count:2
+ max-turbo-frequency(MHz):3200
+ bucket-1
+ core-count:4
+ max-turbo-frequency(MHz):3100
+ bucket-2
+ core-count:6
+ max-turbo-frequency(MHz):3100
+ bucket-3
+ core-count:8
+ max-turbo-frequency(MHz):3100
+ bucket-4
+ core-count:10
+ max-turbo-frequency(MHz):3100
+ bucket-5
+ core-count:12
+ max-turbo-frequency(MHz):3100
+ bucket-6
+ core-count:14
+ max-turbo-frequency(MHz):3100
+ bucket-7
+ core-count:16
+ max-turbo-frequency(MHz):3100
+
+Based on the data above, when all the CPUS are busy, the max. frequency of 3100
+MHz can be achieved. If there is some busy workload on cpu 0 - 11 (e.g. stress)
+and on CPU 12 and 13, execute "hackbench pipe" workload::
+
+ # taskset -c 12,13 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+ Total time: 5.705 [sec]
+ 5.705488 usecs/op
+ 175269 ops/sec
+
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package Core CPU Bzy_MHz
+ 0 0 0 3000
+ 0 1 1 3000
+ 0 2 2 3000
+ 0 3 3 3000
+ 0 4 4 3000
+ 0 5 5 3100
+ 0 6 6 3100
+ 0 7 7 3000
+ 0 8 8 3100
+ 0 9 9 3000
+ 0 10 10 3000
+ 0 11 11 3000
+ 0 12 12 3100
+ 0 13 13 3100
+
+Based on turbostat output, the performance is limited by frequency cap of 3100
+MHz. To check if the hackbench performance can be improved for CPU 12 and CPU
+13, first check the capability of the Intel(R) SST-TF feature for this performance
+level.
+
+Get Intel(R) SST-TF Capability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the capability, the "turbo-freq info" command can be used::
+
+ # intel-speed-select turbo-freq info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-0
+ speed-select-turbo-freq
+ bucket-0
+ high-priority-cores-count:2
+ high-priority-max-frequency(MHz):3200
+ high-priority-max-avx2-frequency(MHz):3200
+ high-priority-max-avx512-frequency(MHz):3100
+ bucket-1
+ high-priority-cores-count:4
+ high-priority-max-frequency(MHz):3100
+ high-priority-max-avx2-frequency(MHz):3000
+ high-priority-max-avx512-frequency(MHz):2900
+ bucket-2
+ high-priority-cores-count:6
+ high-priority-max-frequency(MHz):3100
+ high-priority-max-avx2-frequency(MHz):3000
+ high-priority-max-avx512-frequency(MHz):2900
+ speed-select-turbo-freq-clip-frequencies
+ low-priority-max-frequency(MHz):2600
+ low-priority-max-avx2-frequency(MHz):2400
+ low-priority-max-avx512-frequency(MHz):2100
+
+Based on the output above, there is an Intel(R) SST-TF bucket for which there are
+two high priority cores. If only two high priority cores are set, then max.
+turbo frequency on those cores can be increased to 3200 MHz. This is 100 MHz
+more than the base turbo capability for all cores.
+
+In turn, for the hackbench workload, two CPUs can be set as high priority and
+rest as low priority. One side effect is that once enabled, the low priority
+cores will be clipped to a lower frequency of 2600 MHz.
+
+Enable Intel(R) SST-TF
+~~~~~~~~~~~~~~~~~~~~~~
+
+To enable Intel(R) SST-TF, execute::
+
+ # intel-speed-select -c 12,13 turbo-freq enable -a
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+ die-0
+ cpu-12
+ turbo-freq
+ enable:success
+ package-0
+ die-0
+ cpu-13
+ turbo-freq
+ enable:success
+ package--1
+ die-0
+ cpu-63
+ turbo-freq --auto
+ enable:success
+
+In this case, the option "-a" is optional. If set, it enables Intel(R) SST-TF
+feature and also sets the CPUs to high and low priority using Intel Speed
+Select Technology Core Power (Intel(R) SST-CP) features. The CPU numbers passed
+with "-c" arguments are marked as high priority, including its siblings.
+
+If -a option is not used, then the following steps are required before enabling
+Intel(R) SST-TF:
+
+- Discover Intel(R) SST-TF and note buckets of high priority cores and maximum frequency
+
+- Enable CLOS using core-power feature set - Configure CLOS parameters
+
+- Subscribe desired CPUs to CLOS groups making sure that high priority cores are set to the maximum frequency
+
+If the same hackbench workload is executed, schedule hackbench threads on high
+priority CPUs::
+
+ #taskset -c 12,13 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+ Total time: 5.510 [sec]
+ 5.510165 usecs/op
+ 180826 ops/sec
+
+This improved performance by around 3.3% improvement on a busy system. Here the
+turbostat output will show that the CPU 12 and CPU 13 are getting 100 MHz boost.
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package Core CPU Bzy_MHz
+ ...
+ 0 12 12 3200
+ 0 13 13 3200
diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst
new file mode 100644
index 000000000000..005121167af7
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_epb.rst
@@ -0,0 +1,41 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+======================================
+Intel Performance and Energy Bias Hint
+======================================
+
+:Copyright: |copy| 2019 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
+ :doc: overview
+
+Intel Performance and Energy Bias Attribute in ``sysfs``
+========================================================
+
+The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU
+can be checked or updated through a ``sysfs`` attribute (file) under
+:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>``
+is allocated at the system initialization time:
+
+``energy_perf_bias``
+ Shows the current EPB value for the CPU in a sliding scale 0 - 15, where
+ a value of 0 corresponds to a hint preference for highest performance
+ and a value of 15 corresponds to the maximum energy savings.
+
+ In order to update the EPB value for the CPU, this attribute can be
+ written to, either with a number in the 0 - 15 sliding scale above, or
+ with one of the strings: "performance", "balance-performance", "normal",
+ "balance-power", "power" that represent values reflected by their
+ meaning.
+
+ This attribute is present for all online CPUs supporting the EPB
+ feature.
+
+Note that while the EPB interface to the processor is defined at the logical CPU
+level, the physical register backing it may be shared by multiple CPUs (for
+example, SMT siblings or cores in one package). For this reason, updating the
+EPB value for one CPU may cause the EPB values for other CPUs to change.
diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
new file mode 100644
index 000000000000..ed6f055d4b14
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -0,0 +1,316 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================================
+``intel_idle`` CPU Idle Time Management Driver
+==============================================
+
+:Copyright: |copy| 2020 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_idle`` is a part of the
+:doc:`CPU idle time management subsystem <cpuidle>` in the Linux kernel
+(``CPUIdle``). It is the default CPU idle time management driver for the
+Nehalem and later generations of Intel processors, but the level of support for
+a particular processor model in it depends on whether or not it recognizes that
+processor model and may also depend on information coming from the platform
+firmware. [To understand ``intel_idle`` it is necessary to know how ``CPUIdle``
+works in general, so this is the time to get familiar with
+Documentation/admin-guide/pm/cpuidle.rst if you have not done that yet.]
+
+``intel_idle`` uses the ``MWAIT`` instruction to inform the processor that the
+logical CPU executing it is idle and so it may be possible to put some of the
+processor's functional blocks into low-power states. That instruction takes two
+arguments (passed in the ``EAX`` and ``ECX`` registers of the target CPU), the
+first of which, referred to as a *hint*, can be used by the processor to
+determine what can be done (for details refer to Intel Software Developer’s
+Manual [1]_). Accordingly, ``intel_idle`` refuses to work with processors in
+which the support for the ``MWAIT`` instruction has been disabled (for example,
+via the platform firmware configuration menu) or which do not support that
+instruction at all.
+
+``intel_idle`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.
+
+Sysfs Interface
+===============
+
+The ``intel_idle`` driver exposes the following ``sysfs`` attributes in
+``/sys/devices/system/cpu/cpuidle/``:
+
+``intel_c1_demotion``
+ Enable or disable C1 demotion for all CPUs in the system. This file is
+ only exposed on platforms that support the C1 demotion feature and where
+ it was tested. Value 0 means that C1 demotion is disabled, value 1 means
+ that it is enabled. Write 0 or 1 to disable or enable C1 demotion for
+ all CPUs.
+
+ The C1 demotion feature involves the platform firmware demoting deep
+ C-state requests from the OS (e.g., C6 requests) to C1. The idea is that
+ firmware monitors CPU wake-up rate, and if it is higher than a
+ platform-specific threshold, the firmware demotes deep C-state requests
+ to C1. For example, Linux requests C6, but firmware noticed too many
+ wake-ups per second, and it keeps the CPU in C1. When the CPU stays in
+ C1 long enough, the platform promotes it back to C6. This may improve
+ some workloads' performance, but it may also increase power consumption.
+
+.. _intel-idle-enumeration-of-states:
+
+Enumeration of Idle States
+==========================
+
+Each ``MWAIT`` hint value is interpreted by the processor as a license to
+reconfigure itself in a certain way in order to save energy. The processor
+configurations (with reduced power draw) resulting from that are referred to
+as C-states (in the ACPI terminology) or idle states. The list of meaningful
+``MWAIT`` hint values and idle states (i.e. low-power configurations of the
+processor) corresponding to them depends on the processor model and it may also
+depend on the configuration of the platform.
+
+In order to create a list of available idle states required by the ``CPUIdle``
+subsystem (see :ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst),
+``intel_idle`` can use two sources of information: static tables of idle states
+for different processor models included in the driver itself and the ACPI tables
+of the system. The former are always used if the processor model at hand is
+recognized by ``intel_idle`` and the latter are used if that is required for
+the given processor model (which is the case for all server processor models
+recognized by ``intel_idle``) or if the processor model is not recognized.
+[There is a module parameter that can be used to make the driver use the ACPI
+tables with any processor model recognized by it; see
+`below <intel-idle-parameters_>`_.]
+
+If the ACPI tables are going to be used for building the list of available idle
+states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI
+objects corresponding to the CPUs in the system (refer to the ACPI specification
+[2]_ for the description of ``_CST`` and its output package). Because the
+``CPUIdle`` subsystem expects that the list of idle states supplied by the
+driver will be suitable for all of the CPUs handled by it and ``intel_idle`` is
+registered as the ``CPUIdle`` driver for all of the CPUs in the system, the
+driver looks for the first ``_CST`` object returning at least one valid idle
+state description and such that all of the idle states included in its return
+package are of the FFH (Functional Fixed Hardware) type, which means that the
+``MWAIT`` instruction is expected to be used to tell the processor that it can
+enter one of them. The return package of that ``_CST`` is then assumed to be
+applicable to all of the other CPUs in the system and the idle state
+descriptions extracted from it are stored in a preliminary list of idle states
+coming from the ACPI tables. [This step is skipped if ``intel_idle`` is
+configured to ignore the ACPI tables; see `below <intel-idle-parameters_>`_.]
+
+Next, the first (index 0) entry in the list of available idle states is
+initialized to represent a "polling idle state" (a pseudo-idle state in which
+the target CPU continuously fetches and executes instructions), and the
+subsequent (real) idle state entries are populated as follows.
+
+If the processor model at hand is recognized by ``intel_idle``, there is a
+(static) table of idle state descriptions for it in the driver. In that case,
+the "internal" table is the primary source of information on idle states and the
+information from it is copied to the final list of available idle states. If
+using the ACPI tables for the enumeration of idle states is not required
+(depending on the processor model), all of the listed idle state are enabled by
+default (so all of them will be taken into consideration by ``CPUIdle``
+governors during CPU idle state selection). Otherwise, some of the listed idle
+states may not be enabled by default if there are no matching entries in the
+preliminary list of idle states coming from the ACPI tables. In that case user
+space still can enable them later (on a per-CPU basis) with the help of
+the ``disable`` idle state attribute in ``sysfs`` (see
+:ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst). This basically means that
+the idle states "known" to the driver may not be enabled by default if they have
+not been exposed by the platform firmware (through the ACPI tables).
+
+If the given processor model is not recognized by ``intel_idle``, but it
+supports ``MWAIT``, the preliminary list of idle states coming from the ACPI
+tables is used for building the final list that will be supplied to the
+``CPUIdle`` core during driver registration. For each idle state in that list,
+the description, ``MWAIT`` hint and exit latency are copied to the corresponding
+entry in the final list of idle states. The name of the idle state represented
+by it (to be returned by the ``name`` idle state attribute in ``sysfs``) is
+"CX_ACPI", where X is the index of that idle state in the final list (note that
+the minimum value of X is 1, because 0 is reserved for the "polling" state), and
+its target residency is based on the exit latency value. Specifically, for
+C1-type idle states the exit latency value is also used as the target residency
+(for compatibility with the majority of the "internal" tables of idle states for
+various processor models recognized by ``intel_idle``) and for the other idle
+state types (C2 and C3) the target residency value is 3 times the exit latency
+(again, that is because it reflects the target residency to exit latency ratio
+in the majority of cases for the processor models recognized by ``intel_idle``).
+All of the idle states in the final list are enabled by default in this case.
+
+
+.. _intel-idle-initialization:
+
+Initialization
+==============
+
+The initialization of ``intel_idle`` starts with checking if the kernel command
+line options forbid the use of the ``MWAIT`` instruction. If that is the case,
+an error code is returned right away.
+
+The next step is to check whether or not the processor model is known to the
+driver, which determines the idle states enumeration method (see
+`above <intel-idle-enumeration-of-states_>`_), and whether or not the processor
+supports ``MWAIT`` (the initialization fails if that is not the case). Then,
+the ``MWAIT`` support in the processor is enumerated through ``CPUID`` and the
+driver initialization fails if the level of support is not as expected (for
+example, if the total number of ``MWAIT`` substates returned is 0).
+
+Next, if the driver is not configured to ignore the ACPI tables (see
+`below <intel-idle-parameters_>`_), the idle states information provided by the
+platform firmware is extracted from them.
+
+Then, ``CPUIdle`` device objects are allocated for all CPUs and the list of
+available idle states is created as explained
+`above <intel-idle-enumeration-of-states_>`_.
+
+Finally, ``intel_idle`` is registered with the help of cpuidle_register_driver()
+as the ``CPUIdle`` driver for all CPUs in the system and a CPU online callback
+for configuring individual CPUs is registered via cpuhp_setup_state(), which
+(among other things) causes the callback routine to be invoked for all of the
+CPUs present in the system at that time (each CPU executes its own instance of
+the callback routine). That routine registers a ``CPUIdle`` device for the CPU
+running it (which enables the ``CPUIdle`` subsystem to operate that CPU) and
+optionally performs some CPU-specific initialization actions that may be
+required for the given processor model.
+
+
+.. _intel-idle-parameters:
+
+Kernel Command Line Options and Module Parameters
+=================================================
+
+The *x86* architecture support code recognizes three kernel command line
+options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
+and ``idle=nomwait``. If any of them is present in the kernel command line, the
+``MWAIT`` instruction is not allowed to be used, so the initialization of
+``intel_idle`` will fail.
+
+Apart from that there are five module parameters recognized by ``intel_idle``
+itself that can be set via the kernel command line (they cannot be updated via
+sysfs, so that is the only way to change their values).
+
+The ``max_cstate`` parameter value is the maximum idle state index in the list
+of idle states supplied to the ``CPUIdle`` core during the registration of the
+driver. It is also the maximum number of regular (non-polling) idle states that
+can be used by ``intel_idle``, so the enumeration of idle states is terminated
+after finding that number of usable idle states (the other idle states that
+potentially might have been used if ``max_cstate`` had been greater are not
+taken into consideration at all). Setting ``max_cstate`` can prevent
+``intel_idle`` from exposing idle states that are regarded as "too deep" for
+some reason to the ``CPUIdle`` core, but it does so by making them effectively
+invisible until the system is shut down and started again which may not always
+be desirable. In practice, it is only really necessary to do that if the idle
+states in question cannot be enabled during system startup, because in the
+working state of the system the CPU power management quality of service (PM
+QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states
+even if they have been enumerated (see :ref:`cpu-pm-qos` in
+Documentation/admin-guide/pm/cpuidle.rst).
+Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail.
+
+The ``no_acpi``, ``use_acpi`` and ``no_native`` module parameters are
+recognized by ``intel_idle`` if the kernel has been configured with ACPI
+support. In the case that ACPI is not configured these flags have no impact
+on functionality.
+
+``no_acpi`` - Do not use ACPI at all. Only native mode is available, no
+ACPI mode.
+
+``use_acpi`` - No-op in ACPI mode, the driver will consult ACPI tables for
+C-states on/off status in native mode.
+
+``no_native`` - Work only in ACPI mode, no native mode available (ignore
+all custom tables).
+
+The value of the ``states_off`` module parameter (0 by default) represents a
+list of idle states to be disabled by default in the form of a bitmask.
+
+Namely, the positions of the bits that are set in the ``states_off`` value are
+the indices of idle states to be disabled by default (as reflected by the names
+of the corresponding idle state directories in ``sysfs``, :file:`state0`,
+:file:`state1` ... :file:`state<i>` ..., where ``<i>`` is the index of the given
+idle state; see :ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst).
+
+For example, if ``states_off`` is equal to 3, the driver will disable idle
+states 0 and 1 by default, and if it is equal to 8, idle state 3 will be
+disabled by default and so on (bit positions beyond the maximum idle state index
+are ignored).
+
+The idle states disabled this way can be enabled (on a per-CPU basis) from user
+space via ``sysfs``.
+
+The ``ibrs_off`` module parameter is a boolean flag (defaults to
+false). If set, it is used to control if IBRS (Indirect Branch Restricted
+Speculation) should be turned off when the CPU enters an idle state.
+This flag does not affect CPUs that use Enhanced IBRS which can remain
+on with little performance impact.
+
+For some CPUs, IBRS will be selected as mitigation for Spectre v2 and Retbleed
+security vulnerabilities by default. Leaving the IBRS mode on while idling may
+have a performance impact on its sibling CPU. The IBRS mode will be turned off
+by default when the CPU enters into a deep idle state, but not in some
+shallower ones. Setting the ``ibrs_off`` module parameter will force the IBRS
+mode to off when the CPU is in any one of the available idle states. This may
+help performance of a sibling CPU at the expense of a slightly higher wakeup
+latency for the idle CPU.
+
+
+.. _intel-idle-core-and-package-idle-states:
+
+Core and Package Levels of Idle States
+======================================
+
+Typically, in a processor supporting the ``MWAIT`` instruction there are (at
+least) two levels of idle states (or C-states). One level, referred to as
+"core C-states", covers individual cores in the processor, whereas the other
+level, referred to as "package C-states", covers the entire processor package
+and it may also involve other components of the system (GPUs, memory
+controllers, I/O hubs etc.).
+
+Some of the ``MWAIT`` hint values allow the processor to use core C-states only
+(most importantly, that is the case for the ``MWAIT`` hint value corresponding
+to the ``C1`` idle state), but the majority of them give it a license to put
+the target core (i.e. the core containing the logical CPU executing ``MWAIT``
+with the given hint value) into a specific core C-state and then (if possible)
+to enter a specific package C-state at the deeper level. For example, the
+``MWAIT`` hint value representing the ``C3`` idle state allows the processor to
+put the target core into the low-power state referred to as "core ``C3``" (or
+``CC3``), which happens if all of the logical CPUs (SMT siblings) in that core
+have executed ``MWAIT`` with the ``C3`` hint value (or with a hint value
+representing a deeper idle state), and in addition to that (in the majority of
+cases) it gives the processor a license to put the entire package (possibly
+including some non-CPU components such as a GPU or a memory controller) into the
+low-power state referred to as "package ``C3``" (or ``PC3``), which happens if
+all of the cores have gone into the ``CC3`` state and (possibly) some additional
+conditions are satisfied (for instance, if the GPU is covered by ``PC3``, it may
+be required to be in a certain GPU-specific low-power state for ``PC3`` to be
+reachable).
+
+As a rule, there is no simple way to make the processor use core C-states only
+if the conditions for entering the corresponding package C-states are met, so
+the logical CPU executing ``MWAIT`` with a hint value that is not core-level
+only (like for ``C1``) must always assume that this may cause the processor to
+enter a package C-state. [That is why the exit latency and target residency
+values corresponding to the majority of ``MWAIT`` hint values in the "internal"
+tables of idle states in ``intel_idle`` reflect the properties of package
+C-states.] If using package C-states is not desirable at all, either
+:ref:`PM QoS <cpu-pm-qos>` or the ``max_cstate`` module parameter of
+``intel_idle`` described `above <intel-idle-parameters_>`_ must be used to
+restrict the range of permissible idle states to the ones with core-level only
+``MWAIT`` hint values (like ``C1``).
+
+
+References
+==========
+
+.. [1] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2B*,
+ https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2b-manual.html
+
+.. [2] *Advanced Configuration and Power Interface (ACPI) Specification*,
+ https://uefi.org/specifications
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ac6f5c597a56..26e702c7016e 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -1,10 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===============================================
``intel_pstate`` CPU Performance Scaling Driver
===============================================
-::
+:Copyright: |copy| 2017 Intel Corporation
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
General Information
@@ -15,16 +18,15 @@ General Information
(``CPUFreq``). It is a scaling driver for the Sandy Bridge and later
generations of Intel processors. Note, however, that some of those processors
may not be supported. [To understand ``intel_pstate`` it is necessary to know
-how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
-you have not done that yet.]
+how ``CPUFreq`` works in general, so this is the time to read
+Documentation/admin-guide/pm/cpufreq.rst if you have not done that yet.]
For the processors supported by ``intel_pstate``, the P-state concept is broader
than just an operating frequency or an operating performance point (see the
-`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more
information about that). For this reason, the representation of P-states used
by ``intel_pstate`` internally follows the hardware specification (for details
-refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core
+refer to Intel Software Developer’s Manual [2]_). However, the ``CPUFreq`` core
uses frequencies for identifying operating performance points of CPUs and
frequencies are involved in the user space interface exposed by it, so
``intel_pstate`` maps its internal representation of P-states to frequencies too
@@ -52,17 +54,21 @@ registered (see `below <status_attr_>`_).
Operation Modes
===============
-``intel_pstate`` can operate in three different modes: in the active mode with
-or without hardware-managed P-states support and in the passive mode. Which of
-them will be in effect depends on what kernel command line options are used and
-on the capabilities of the processor.
+``intel_pstate`` can operate in two different modes, active or passive. In the
+active mode, it uses its own internal performance scaling governor algorithm or
+allows the hardware to do performance scaling by itself, while in the passive
+mode it responds to requests made by a generic ``CPUFreq`` governor implementing
+a certain performance scaling algorithm. Which of them will be in effect
+depends on what kernel command line options are used and on the capabilities of
+the processor.
Active Mode
-----------
-This is the default operation mode of ``intel_pstate``. If it works in this
-mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
-policies contains the string "intel_pstate".
+This is the default operation mode of ``intel_pstate`` for processors with
+hardware-managed P-states (HWP) support. If it works in this mode, the
+``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq`` policies
+contains the string "intel_pstate".
In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
provides its own scaling algorithms for P-state selection. Those algorithms
@@ -117,7 +123,9 @@ Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
internal P-state selection logic is expected to focus entirely on performance.
This will override the EPP/EPB setting coming from the ``sysfs`` interface
-(see `Energy vs Performance Hints`_ below).
+(see `Energy vs Performance Hints`_ below). Moreover, any attempts to change
+the EPP/EPB to a value different from 0 ("performance") via ``sysfs`` in this
+configuration will be rejected.
Also, in this configuration the range of P-states available to the processor's
internal P-state selection logic is always restricted to the upper boundary
@@ -136,12 +144,13 @@ internal P-state selection logic to be less performance-focused.
Active Mode Without HWP
~~~~~~~~~~~~~~~~~~~~~~~
-This is the default operation mode for processors that do not support the HWP
-feature. It also is used by default with the ``intel_pstate=no_hwp`` argument
-in the kernel command line. However, in this mode ``intel_pstate`` may refuse
-to work with the given processor if it does not recognize it. [Note that
-``intel_pstate`` will never refuse to work with any processor with the HWP
-feature enabled.]
+This operation mode is optional for processors that do not support the HWP
+feature or when the ``intel_pstate=no_hwp`` argument is passed to the kernel in
+the command line. The active mode is used in those cases if the
+``intel_pstate=active`` argument is passed to the kernel in the command line.
+In this mode ``intel_pstate`` may refuse to work with processors that are not
+recognized by it. [Note that ``intel_pstate`` will never refuse to work with
+any processor with the HWP feature enabled.]
In this mode ``intel_pstate`` registers utilization update callbacks with the
CPU scheduler in order to run a P-state selection algorithm, either
@@ -186,10 +195,15 @@ is not set.
Passive Mode
------------
-This mode is used if the ``intel_pstate=passive`` argument is passed to the
-kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
-Like in the active mode without HWP support, in this mode ``intel_pstate`` may
-refuse to work with the given processor if it does not recognize it.
+This is the default operation mode of ``intel_pstate`` for processors without
+hardware-managed P-states (HWP) support. It is always used if the
+``intel_pstate=passive`` argument is passed to the kernel in the command line
+regardless of whether or not the given processor supports HWP. [Note that the
+``intel_pstate=no_hwp`` setting causes the driver to start in the passive mode
+if it is not combined with ``intel_pstate=active``.] Like in the active mode
+without HWP support, in this mode ``intel_pstate`` may refuse to work with
+processors that are not recognized by it if HWP is prevented from being enabled
+through the kernel command line.
If the driver works in this mode, the ``scaling_driver`` policy attribute in
``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
@@ -310,10 +324,109 @@ manuals need to be consulted to get to it too.
For this reason, there is a list of supported processors in ``intel_pstate`` and
the driver initialization will fail if the detected processor is not in that
-list, unless it supports the `HWP feature <Active Mode_>`_. [The interface to
-obtain all of the information listed above is the same for all of the processors
-supporting the HWP feature, which is why they all are supported by
-``intel_pstate``.]
+list, unless it supports the HWP feature. [The interface to obtain all of the
+information listed above is the same for all of the processors supporting the
+HWP feature, which is why ``intel_pstate`` works with all of them.]
+
+
+Support for Hybrid Processors
+=============================
+
+Some processors supported by ``intel_pstate`` contain two or more types of CPU
+cores differing by the maximum turbo P-state, performance vs power characteristics,
+cache sizes, and possibly other properties. They are commonly referred to as
+hybrid processors. To support them, ``intel_pstate`` requires HWP to be enabled
+and it assumes the HWP performance units to be the same for all CPUs in the
+system, so a given HWP performance level always represents approximately the
+same physical performance regardless of the core (CPU) type.
+
+Hybrid Processors with SMT
+--------------------------
+
+On systems where SMT (Simultaneous Multithreading), also referred to as
+HyperThreading (HT) in the context of Intel processors, is enabled on at least
+one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely,
+the priority of a given CPU reflects its highest HWP performance level which
+causes the CPU scheduler to generally prefer more performant CPUs, so the less
+performant CPUs are used when the other ones are fully loaded. However, SMT
+siblings (that is, logical CPUs sharing one physical core) are treated in a
+special way such that if one of them is in use, the effective priority of the
+other ones is lowered below the priorities of the CPUs located in the other
+physical cores.
+
+This approach maximizes performance in the majority of cases, but unfortunately
+it also leads to excessive energy usage in some important scenarios, like video
+playback, which is not generally desirable. While there is no other viable
+choice with SMT enabled because the effective capacity and utilization of SMT
+siblings are hard to determine, hybrid processors without SMT can be handled in
+more energy-efficient ways.
+
+.. _CAS:
+
+Capacity-Aware Scheduling Support
+---------------------------------
+
+The capacity-aware scheduling (CAS) support in the CPU scheduler is enabled by
+``intel_pstate`` by default on hybrid processors without SMT. CAS generally
+causes the scheduler to put tasks on a CPU so long as there is a sufficient
+amount of spare capacity on it, and if the utilization of a given task is too
+high for it, the task will need to go somewhere else.
+
+Since CAS takes CPU capacities into account, it does not require CPU
+prioritization and it allows tasks to be distributed more symmetrically among
+the more performant and less performant CPUs. Once placed on a CPU with enough
+capacity to accommodate it, a task may just continue to run there regardless of
+whether or not the other CPUs are fully loaded, so on average CAS reduces the
+utilization of the more performant CPUs which causes the energy usage to be more
+balanced because the more performant CPUs are generally less energy-efficient
+than the less performant ones.
+
+In order to use CAS, the scheduler needs to know the capacity of each CPU in
+the system and it needs to be able to compute scale-invariant utilization of
+CPUs, so ``intel_pstate`` provides it with the requisite information.
+
+First of all, the capacity of each CPU is represented by the ratio of its highest
+HWP performance level, multiplied by 1024, to the highest HWP performance level
+of the most performant CPU in the system, which works because the HWP performance
+units are the same for all CPUs. Second, the frequency-invariance computations,
+carried out by the scheduler to always express CPU utilization in the same units
+regardless of the frequency it is currently running at, are adjusted to take the
+CPU capacity into account. All of this happens when ``intel_pstate`` has
+registered itself with the ``CPUFreq`` core and it has figured out that it is
+running on a hybrid processor without SMT.
+
+Energy-Aware Scheduling Support
+-------------------------------
+
+If ``CONFIG_ENERGY_MODEL`` has been set during kernel configuration and
+``intel_pstate`` runs on a hybrid processor without SMT, in addition to enabling
+`CAS <CAS_>`_ it registers an Energy Model for the processor. This allows the
+Energy-Aware Scheduling (EAS) support to be enabled in the CPU scheduler if
+``schedutil`` is used as the ``CPUFreq`` governor which requires ``intel_pstate``
+to operate in the `passive mode <Passive Mode_>`_.
+
+The Energy Model registered by ``intel_pstate`` is artificial (that is, it is
+based on abstract cost values and it does not include any real power numbers)
+and it is relatively simple to avoid unnecessary computations in the scheduler.
+There is a performance domain in it for every CPU in the system and the cost
+values for these performance domains have been chosen so that running a task on
+a less performant (small) CPU appears to be always cheaper than running that
+task on a more performant (big) CPU. However, for two CPUs of the same type,
+the cost difference depends on their current utilization, and the CPU whose
+current utilization is higher generally appears to be a more expensive
+destination for a given task. This helps to balance the load among CPUs of the
+same type.
+
+Since EAS works on top of CAS, high-utilization tasks are always migrated to
+CPUs with enough capacity to accommodate them, but thanks to EAS, low-utilization
+tasks tend to be placed on the CPUs that look less expensive to the scheduler.
+Effectively, this causes the less performant and less loaded CPUs to be
+preferred as long as they have enough spare capacity to run the given task
+which generally leads to reduced energy usage.
+
+The Energy Model created by ``intel_pstate`` can be inspected by looking at
+the ``energy_model`` directory in ``debugfs`` (typlically mounted on
+``/sys/kernel/debug/``).
User Space Interface in ``sysfs``
@@ -352,6 +465,9 @@ argument is passed to the kernel in the command line.
inclusive) including both turbo and non-turbo P-states (see
`Turbo P-states Support`_).
+ This attribute is present only if the value exposed by it is the same
+ for all of the CPUs in the system.
+
The value of this attribute is not affected by the ``no_turbo``
setting described `below <no_turbo_attr_>`_.
@@ -361,19 +477,22 @@ argument is passed to the kernel in the command line.
Ratio of the `turbo range <turbo_>`_ size to the size of the entire
range of supported P-states, in percent.
+ This attribute is present only if the value exposed by it is the same
+ for all of the CPUs in the system.
+
This attribute is read-only.
.. _no_turbo_attr:
``no_turbo``
If set (equal to 1), the driver is not allowed to set any turbo P-states
- (see `Turbo P-states Support`_). If unset (equalt to 0, which is the
+ (see `Turbo P-states Support`_). If unset (equal to 0, which is the
default), turbo P-states can be set by the driver.
[Note that ``intel_pstate`` does not support the general ``boost``
attribute (supported by some other scaling drivers) which is replaced
by this one.]
- This attrubute does not affect the maximum supported frequency value
+ This attribute does not affect the maximum supported frequency value
supplied to the ``CPUFreq`` core and exposed via the policy interface,
but it affects the maximum possible value of per-policy P-state limits
(see `Interpretation of Policy Attributes`_ below for details).
@@ -417,18 +536,24 @@ argument is passed to the kernel in the command line.
as well as the per-policy ones) are then reset to their default
values, possibly depending on the target operation mode.]
- That only is supported in some configurations, though (for example, if
- the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
- the operation mode of the driver cannot be changed), and if it is not
- supported in the current configuration, writes to this attribute will
- fail with an appropriate error.
+``energy_efficiency``
+ This attribute is only present on platforms with CPUs matching the Kaby
+ Lake or Coffee Lake desktop CPU model. By default, energy-efficiency
+ optimizations are disabled on these CPU models if HWP is enabled.
+ Enabling energy-efficiency optimizations may limit maximum operating
+ frequency with or without the HWP feature. With HWP enabled, the
+ optimizations are done only in the turbo frequency range. Without it,
+ they are done in the entire available frequency range. Setting this
+ attribute to "1" enables the energy-efficiency optimizations and setting
+ to "0" disables them.
Interpretation of Policy Attributes
-----------------------------------
The interpretation of some ``CPUFreq`` policy attributes described in
-:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
-and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+Documentation/admin-guide/pm/cpufreq.rst is special with ``intel_pstate``
+as the current scaling driver and it generally depends on the driver's
+`operation mode <Operation Modes_>`_.
First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
``scaling_cur_freq`` attributes are produced by applying a processor-specific
@@ -465,8 +590,8 @@ Next, the following policy attributes have special meaning if
policy for the time interval between the last two invocations of the
driver's utilization update callback by the CPU scheduler for that CPU.
-One more policy attribute is present if the `HWP feature is enabled in the
-processor <Active Mode With HWP_>`_:
+One more policy attribute is present if the HWP feature is enabled in the
+processor:
``base_frequency``
Shows the base frequency of the CPU. Any frequency above this will be
@@ -495,15 +620,23 @@ on the following rules, regardless of the current operation mode of the driver:
2. Each individual CPU is affected by its own per-policy limits (that is, it
cannot be requested to run faster than its own per-policy maximum and it
- cannot be requested to run slower than its own per-policy minimum).
+ cannot be requested to run slower than its own per-policy minimum). The
+ effective performance depends on whether the platform supports per core
+ P-states, hyper-threading is enabled and on current performance requests
+ from other CPUs. When platform doesn't support per core P-states, the
+ effective performance can be more than the policy limits set on a CPU, if
+ other CPUs are requesting higher performance at that moment. Even with per
+ core P-states support, when hyper-threading is enabled, if the sibling CPU
+ is requesting higher performance, the other siblings will get higher
+ performance than their policy limits.
3. The global and per-policy limits can be set independently.
-If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
-resulting effective values are written into its registers whenever the limits
-change in order to request its internal P-state selection logic to always set
-P-states within these limits. Otherwise, the limits are taken into account by
-scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
+In the `active mode with the HWP feature enabled <Active Mode With HWP_>`_, the
+resulting effective values are written into hardware registers whenever the
+limits change in order to request its internal P-state selection logic to always
+set P-states within these limits. Otherwise, the limits are taken into account
+by scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
every time before setting a new P-state for a CPU.
Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
@@ -514,12 +647,11 @@ at all and the only way to set the limits is by using the policy attributes.
Energy vs Performance Hints
---------------------------
-If ``intel_pstate`` works in the `active mode with the HWP feature enabled
-<Active Mode With HWP_>`_ in the processor, additional attributes are present
-in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow
-user space to help ``intel_pstate`` to adjust the processor's internal P-state
-selection logic by focusing it on performance or on energy-efficiency, or
-somewhere between the two extremes:
+If the hardware-managed P-states (HWP) is enabled in the processor, additional
+attributes, intended to allow user space to help ``intel_pstate`` to adjust the
+processor's internal P-state selection logic by focusing it on performance or on
+energy-efficiency, or somewhere between the two extremes, are present in every
+``CPUFreq`` policy directory in ``sysfs``. They are :
``energy_performance_preference``
Current value of the energy vs performance hint for the given policy
@@ -538,7 +670,11 @@ somewhere between the two extremes:
Strings written to the ``energy_performance_preference`` attribute are
internally translated to integer values written to the processor's
Energy-Performance Preference (EPP) knob (if supported) or its
-Energy-Performance Bias (EPB) knob.
+Energy-Performance Bias (EPB) knob. It is also possible to write a positive
+integer value between 0 to 255, if the EPP feature is present. If the EPP
+feature is not present, writing integer value to this attribute is not
+supported. In this case, user can use the
+"/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface.
[Note that tasks may by migrated from one CPU to another by the scheduler's
load-balancing algorithm and if different energy vs performance hints are
@@ -553,9 +689,9 @@ or to pin every task potentially sensitive to them to a specific CPU.]
On the majority of systems supported by ``intel_pstate``, the ACPI tables
provided by the platform firmware contain ``_PSS`` objects returning information
-that can be used for CPU performance scaling (refer to the `ACPI specification`_
-for details on the ``_PSS`` objects and the format of the information returned
-by them).
+that can be used for CPU performance scaling (refer to the ACPI specification
+[3]_ for details on the ``_PSS`` objects and the format of the information
+returned by them).
The information returned by the ACPI ``_PSS`` objects is used by the
``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate``
@@ -619,12 +755,14 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
Do not register ``intel_pstate`` as the scaling driver even if the
processor is supported by it.
+``active``
+ Register ``intel_pstate`` in the `active mode <Active Mode_>`_ to start
+ with.
+
``passive``
Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
start with.
- This option implies the ``no_hwp`` one described below.
-
``force``
Register ``intel_pstate`` as the scaling driver instead of
``acpi-cpufreq`` even if the latter is preferred on the given system.
@@ -639,13 +777,12 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
driver is used instead of ``acpi-cpufreq``.
``no_hwp``
- Do not enable the `hardware-managed P-states (HWP) feature
- <Active Mode With HWP_>`_ even if it is supported by the processor.
+ Do not enable the hardware-managed P-states (HWP) feature even if it is
+ supported by the processor.
``hwp_only``
Register ``intel_pstate`` as the scaling driver only if the
- `hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
- supported by the processor.
+ hardware-managed P-states (HWP) feature is supported by the processor.
``support_acpi_ppc``
Take ACPI ``_PPC`` performance limits into account.
@@ -659,6 +796,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
Use per-logical-CPU P-State limits (see `Coordination of P-state
Limits`_ for details).
+``no_cas``
+ Do not enable `capacity-aware scheduling <CAS_>`_ which is enabled by
+ default on hybrid systems without SMT.
Diagnostics and Tuning
======================
@@ -675,7 +815,7 @@ it works in the `active mode <Active Mode_>`_.
The following sequence of shell commands can be used to enable them and see
their output (if the kernel is generally configured to support event tracing)::
- # cd /sys/kernel/debug/tracing/
+ # cd /sys/kernel/tracing/
# echo 1 > events/power/pstate_sample/enable
# echo 1 > events/power/cpu_frequency/enable
# cat trace
@@ -692,10 +832,10 @@ core (for the policies with other scaling governors).
The ``ftrace`` interface can be used for low-level diagnostics of
``intel_pstate``. For example, to check how often the function to set a
-P-state is called, the ``ftrace`` filter can be set to to
+P-state is called, the ``ftrace`` filter can be set to
:c:func:`intel_pstate_set_pstate`::
- # cd /sys/kernel/debug/tracing/
+ # cd /sys/kernel/tracing/
# cat available_filter_functions | grep -i pstate
intel_pstate_set_pstate
intel_pstate_cpu_init
@@ -720,6 +860,14 @@ P-state is called, the ``ftrace`` filter can be set to to
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
-.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
-.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+References
+==========
+
+.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*,
+ https://events.static.linuxfound.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+
+.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*,
+ https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+
+.. [3] *Advanced Configuration and Power Interface Specification*,
+ https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
new file mode 100644
index 000000000000..d367ba4d744a
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -0,0 +1,184 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================
+Intel Uncore Frequency Scaling
+==============================
+
+:Copyright: |copy| 2022-2023 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+The uncore can consume significant amount of power in Intel's Xeon servers based
+on the workload characteristics. To optimize the total power and improve overall
+performance, SoCs have internal algorithms for scaling uncore frequency. These
+algorithms monitor workload usage of uncore and set a desirable frequency.
+
+It is possible that users have different expectations of uncore performance and
+want to have control over it. The objective is similar to allowing users to set
+the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
+Users may have some latency sensitive workloads where they do not want any
+change to uncore frequency. Also, users may have workloads which require
+different core and uncore performance at distinct phases and they may want to
+use both cpufreq and the uncore scaling interface to distribute power and
+improve overall performance.
+
+Sysfs Interface
+---------------
+
+To control uncore frequency, a sysfs interface is provided in the directory:
+`/sys/devices/system/cpu/intel_uncore_frequency/`.
+
+There is one directory for each package and die combination as the scope of
+uncore scaling control is per die in multiple die/package SoCs or per
+package for single die per package SoCs. The name represents the
+scope of control. For example: 'package_00_die_00' is for package id 0 and
+die 0.
+
+Each package_*_die_* contains the following attributes:
+
+``initial_max_freq_khz``
+ Out of reset, this attribute represent the maximum possible frequency.
+ This is a read-only attribute. If users adjust max_freq_khz,
+ they can always go back to maximum using the value from this attribute.
+
+``initial_min_freq_khz``
+ Out of reset, this attribute represent the minimum possible frequency.
+ This is a read-only attribute. If users adjust min_freq_khz,
+ they can always go back to minimum using the value from this attribute.
+
+``max_freq_khz``
+ This attribute is used to set the maximum uncore frequency.
+
+``min_freq_khz``
+ This attribute is used to set the minimum uncore frequency.
+
+``current_freq_khz``
+ This attribute is used to get the current uncore frequency.
+
+SoCs with TPMI (Topology Aware Register and PM Capsule Interface)
+-----------------------------------------------------------------
+
+An SoC can contain multiple power domains with individual or collection
+of mesh partitions. This partition is called fabric cluster.
+
+Certain type of meshes will need to run at the same frequency, they will
+be placed in the same fabric cluster. Benefit of fabric cluster is that it
+offers a scalable mechanism to deal with partitioned fabrics in a SoC.
+
+The current sysfs interface supports controls at package and die level.
+This interface is not enough to support more granular control at
+fabric cluster level.
+
+SoCs with the support of TPMI (Topology Aware Register and PM Capsule
+Interface), can have multiple power domains. Each power domain can
+contain one or more fabric clusters.
+
+To represent controls at fabric cluster level in addition to the
+controls at package and die level (like systems without TPMI
+support), sysfs is enhanced. This granular interface is presented in the
+sysfs with directories names prefixed with "uncore". For example:
+uncore00, uncore01 etc.
+
+The scope of control is specified by attributes "package_id", "domain_id"
+and "fabric_cluster_id" in the directory.
+
+Attributes in each directory:
+
+``domain_id``
+ This attribute is used to get the power domain id of this instance.
+
+``die_id``
+ This attribute is used to get the Linux die id of this instance.
+ This attribute is only present for domains with core agents and
+ when the CPUID leaf 0x1f presents die ID.
+
+``fabric_cluster_id``
+ This attribute is used to get the fabric cluster id of this instance.
+
+``package_id``
+ This attribute is used to get the package id of this instance.
+
+``agent_types``
+ This attribute displays all the hardware agents present within the
+ domain. Each agent has the capability to control one or more hardware
+ subsystems, which include: core, cache, memory, and I/O.
+
+The other attributes are same as presented at package_*_die_* level.
+
+In most of current use cases, the "max_freq_khz" and "min_freq_khz"
+is updated at "package_*_die_*" level. This model will be still supported
+with the following approach:
+
+When user uses controls at "package_*_die_*" level, then every fabric
+cluster is affected in that package and die. For example: user changes
+"max_freq_khz" in the package_00_die_00, then "max_freq_khz" for uncore*
+directory with the same package id will be updated. In this case user can
+still update "max_freq_khz" at each uncore* level, which is more restrictive.
+Similarly, user can update "min_freq_khz" at "package_*_die_*" level
+to apply at each uncore* level.
+
+Support for "current_freq_khz" is available only at each fabric cluster
+level (i.e., in uncore* directory).
+
+Efficiency vs. Latency Tradeoff
+-------------------------------
+
+The Efficiency Latency Control (ELC) feature improves performance
+per watt. With this feature hardware power management algorithms
+optimize trade-off between latency and power consumption. For some
+latency sensitive workloads further tuning can be done by SW to
+get desired performance.
+
+The hardware monitors the average CPU utilization across all cores
+in a power domain at regular intervals and decides an uncore frequency.
+While this may result in the best performance per watt, workload may be
+expecting higher performance at the expense of power. Consider an
+application that intermittently wakes up to perform memory reads on an
+otherwise idle system. In such cases, if hardware lowers uncore
+frequency, then there may be delay in ramp up of frequency to meet
+target performance.
+
+The ELC control defines some parameters which can be changed from SW.
+If the average CPU utilization is below a user-defined threshold
+(elc_low_threshold_percent attribute below), the user-defined uncore
+floor frequency will be used (elc_floor_freq_khz attribute below)
+instead of hardware calculated minimum.
+
+Similarly in high load scenario where the CPU utilization goes above
+the high threshold value (elc_high_threshold_percent attribute below)
+instead of jumping to maximum uncore frequency, frequency is increased
+in 100MHz steps. This avoids consuming unnecessarily high power
+immediately with CPU utilization spikes.
+
+Attributes for efficiency latency control:
+
+``elc_floor_freq_khz``
+ This attribute is used to get/set the efficiency latency floor frequency.
+ If this variable is lower than the 'min_freq_khz', it is ignored by
+ the firmware.
+
+``elc_low_threshold_percent``
+ This attribute is used to get/set the efficiency latency control low
+ threshold. This attribute is in percentages of CPU utilization.
+
+``elc_high_threshold_percent``
+ This attribute is used to get/set the efficiency latency control high
+ threshold. This attribute is in percentages of CPU utilization.
+
+``elc_high_threshold_enable``
+ This attribute is used to enable/disable the efficiency latency control
+ high threshold. Write '1' to enable, '0' to disable.
+
+Example system configuration below, which does following:
+ * when CPU utilization is less than 10%: sets uncore frequency to 800MHz
+ * when CPU utilization is higher than 95%: increases uncore frequency in
+ 100MHz steps, until power limit is reached
+
+ elc_floor_freq_khz:800000
+ elc_high_threshold_percent:95
+ elc_high_threshold_enable:1
+ elc_low_threshold_percent:10
diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst
index dbf5acd49f35..ee55a460c639 100644
--- a/Documentation/admin-guide/pm/sleep-states.rst
+++ b/Documentation/admin-guide/pm/sleep-states.rst
@@ -1,10 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===================
System Sleep States
===================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Sleep states are global low-power states of the entire system in which user
space code cannot be executed and the overall system activity is significantly
@@ -149,8 +153,11 @@ for the given CPU architecture includes the low-level code for system resume.
Basic ``sysfs`` Interfaces for System Suspend and Hibernation
=============================================================
-The following files located in the :file:`/sys/power/` directory can be used by
-user space for sleep states control.
+The power management subsystem provides userspace with a unified ``sysfs``
+interface for system sleep regardless of the underlying system architecture or
+platform. That interface is located in the :file:`/sys/power/` directory
+(assuming that ``sysfs`` is mounted at :file:`/sys`) and it consists of the
+following attributes (files):
``state``
This file contains a list of strings representing sleep states supported
@@ -158,9 +165,9 @@ user space for sleep states control.
to start a transition of the system into the sleep state represented by
that string.
- In particular, the strings "disk", "freeze" and "standby" represent the
+ In particular, the "disk", "freeze" and "standby" strings represent the
:ref:`hibernation <hibernation>`, :ref:`suspend-to-idle <s2idle>` and
- :ref:`standby <standby>` sleep states, respectively. The string "mem"
+ :ref:`standby <standby>` sleep states, respectively. The "mem" string
is interpreted in accordance with the contents of the ``mem_sleep`` file
described below.
@@ -173,7 +180,7 @@ user space for sleep states control.
associated with the "mem" string in the ``state`` file described above.
The strings that may be present in this file are "s2idle", "shallow"
- and "deep". The string "s2idle" always represents :ref:`suspend-to-idle
+ and "deep". The "s2idle" string always represents :ref:`suspend-to-idle
<s2idle>` and, by convention, "shallow" and "deep" represent
:ref:`standby <standby>` and :ref:`suspend-to-RAM <s2ram>`,
respectively.
@@ -181,15 +188,17 @@ user space for sleep states control.
Writing one of the listed strings into this file causes the system
suspend variant represented by it to be associated with the "mem" string
in the ``state`` file. The string representing the suspend variant
- currently associated with the "mem" string in the ``state`` file
- is listed in square brackets.
+ currently associated with the "mem" string in the ``state`` file is
+ shown in square brackets.
If the kernel does not support system suspend, this file is not present.
``disk``
- This file contains a list of strings representing different operations
- that can be carried out after the hibernation image has been saved. The
- possible options are as follows:
+ This file controls the operating mode of hibernation (Suspend-to-Disk).
+ Specifically, it tells the kernel what to do after creating a
+ hibernation image.
+
+ Reading from it returns a list of supported options encoded as:
``platform``
Put the system into a special low-power state (e.g. ACPI S4) to
@@ -197,6 +206,11 @@ user space for sleep states control.
platform firmware to take a simplified initialization path after
wakeup.
+ It is only available if the platform provides a special
+ mechanism to put the system to sleep after creating a
+ hibernation image (platforms with ACPI do that as a rule, for
+ example).
+
``shutdown``
Power off the system.
@@ -210,22 +224,53 @@ user space for sleep states control.
the hibernation image and continue. Otherwise, use the image
to restore the previous state of the system.
+ It is available if system suspend is supported.
+
``test_resume``
Diagnostic operation. Load the image as though the system had
just woken up from hibernation and the currently running kernel
instance was a restore kernel and follow up with full system
resume.
- Writing one of the listed strings into this file causes the option
+ Writing one of the strings listed above into this file causes the option
represented by it to be selected.
- The currently selected option is shown in square brackets which means
+ The currently selected option is shown in square brackets, which means
that the operation represented by it will be carried out after creating
- and saving the image next time hibernation is triggered by writing
- ``disk`` to :file:`/sys/power/state`.
+ and saving the image when hibernation is triggered by writing ``disk``
+ to :file:`/sys/power/state`.
If the kernel does not support hibernation, this file is not present.
+``image_size``
+ This file controls the size of hibernation images.
+
+ It can be written a string representing a non-negative integer that will
+ be used as a best-effort upper limit of the image size, in bytes. The
+ hibernation core will do its best to ensure that the image size will not
+ exceed that number, but if that turns out to be impossible to achieve, a
+ hibernation image will still be created and its size will be as small as
+ possible. In particular, writing '0' to this file causes the size of
+ hibernation images to be minimum.
+
+ Reading from it returns the current image size limit, which is set to
+ around 2/5 of the available RAM size by default.
+
+``pm_trace``
+ This file controls the "PM trace" mechanism saving the last suspend
+ or resume event point in the RTC memory across reboots. It helps to
+ debug hard lockups or reboots due to device driver failures that occur
+ during system suspend or resume (which is more common) more effectively.
+
+ If it contains "1", the fingerprint of each suspend/resume event point
+ in turn will be stored in the RTC memory (overwriting the actual RTC
+ information), so it will survive a system crash if one occurs right
+ after storing it and it can be used later to identify the driver that
+ caused the crash to happen.
+
+ It contains "0" by default, which may be changed to "1" by writing a
+ string representing a nonzero integer into it.
+
According to the above, there are two ways to make the system go into the
:ref:`suspend-to-idle <s2idle>` state. The first one is to write "freeze"
directly to :file:`/sys/power/state`. The second one is to write "s2idle" to
@@ -240,6 +285,7 @@ system go into the :ref:`suspend-to-RAM <s2ram>` state (write "deep" into
The default suspend variant (ie. the one to be used without writing anything
into :file:`/sys/power/mem_sleep`) is either "deep" (on the majority of systems
supporting :ref:`suspend-to-RAM <s2ram>`) or "s2idle", but it can be overridden
-by the value of the "mem_sleep_default" parameter in the kernel command line.
-On some ACPI-based systems, depending on the information in the ACPI tables, the
-default may be "s2idle" even if :ref:`suspend-to-RAM <s2ram>` is supported.
+by the value of the ``mem_sleep_default`` parameter in the kernel command line.
+On some systems with ACPI, depending on the information in the ACPI tables, the
+default may be "s2idle" even if :ref:`suspend-to-RAM <s2ram>` is supported in
+principle.
diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst
index afe4d3f831fe..dd0362e32fa5 100644
--- a/Documentation/admin-guide/pm/strategies.rst
+++ b/Documentation/admin-guide/pm/strategies.rst
@@ -1,10 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===========================
Power Management Strategies
===========================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
The Linux kernel supports two major high-level power management strategies.
diff --git a/Documentation/admin-guide/pm/suspend-flows.rst b/Documentation/admin-guide/pm/suspend-flows.rst
new file mode 100644
index 000000000000..c479d7462647
--- /dev/null
+++ b/Documentation/admin-guide/pm/suspend-flows.rst
@@ -0,0 +1,270 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=========================
+System Suspend Code Flows
+=========================
+
+:Copyright: |copy| 2020 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+At least one global system-wide transition needs to be carried out for the
+system to get from the working state into one of the supported
+:doc:`sleep states <sleep-states>`. Hibernation requires more than one
+transition to occur for this purpose, but the other sleep states, commonly
+referred to as *system-wide suspend* (or simply *system suspend*) states, need
+only one.
+
+For those sleep states, the transition from the working state of the system into
+the target sleep state is referred to as *system suspend* too (in the majority
+of cases, whether this means a transition or a sleep state of the system should
+be clear from the context) and the transition back from the sleep state into the
+working state is referred to as *system resume*.
+
+The kernel code flows associated with the suspend and resume transitions for
+different sleep states of the system are quite similar, but there are some
+significant differences between the :ref:`suspend-to-idle <s2idle>` code flows
+and the code flows related to the :ref:`suspend-to-RAM <s2ram>` and
+:ref:`standby <standby>` sleep states.
+
+The :ref:`suspend-to-RAM <s2ram>` and :ref:`standby <standby>` sleep states
+cannot be implemented without platform support and the difference between them
+boils down to the platform-specific actions carried out by the suspend and
+resume hooks that need to be provided by the platform driver to make them
+available. Apart from that, the suspend and resume code flows for these sleep
+states are mostly identical, so they both together will be referred to as
+*platform-dependent suspend* states in what follows.
+
+
+.. _s2idle_suspend:
+
+Suspend-to-idle Suspend Code Flow
+=================================
+
+The following steps are taken in order to transition the system from the working
+state to the :ref:`suspend-to-idle <s2idle>` sleep state:
+
+ 1. Invoking system-wide suspend notifiers.
+
+ Kernel subsystems can register callbacks to be invoked when the suspend
+ transition is about to occur and when the resume transition has finished.
+
+ That allows them to prepare for the change of the system state and to clean
+ up after getting back to the working state.
+
+ 2. Freezing tasks.
+
+ Tasks are frozen primarily in order to avoid unchecked hardware accesses
+ from user space through MMIO regions or I/O registers exposed directly to
+ it and to prevent user space from entering the kernel while the next step
+ of the transition is in progress (which might have been problematic for
+ various reasons).
+
+ All user space tasks are intercepted as though they were sent a signal and
+ put into uninterruptible sleep until the end of the subsequent system resume
+ transition.
+
+ The kernel threads that choose to be frozen during system suspend for
+ specific reasons are frozen subsequently, but they are not intercepted.
+ Instead, they are expected to periodically check whether or not they need
+ to be frozen and to put themselves into uninterruptible sleep if so. [Note,
+ however, that kernel threads can use locking and other concurrency controls
+ available in kernel space to synchronize themselves with system suspend and
+ resume, which can be much more precise than the freezing, so the latter is
+ not a recommended option for kernel threads.]
+
+ 3. Suspending devices and reconfiguring IRQs.
+
+ Devices are suspended in four phases called *prepare*, *suspend*,
+ *late suspend* and *noirq suspend* (see :ref:`driverapi_pm_devices` for more
+ information on what exactly happens in each phase).
+
+ Every device is visited in each phase, but typically it is not physically
+ accessed in more than two of them.
+
+ The runtime PM API is disabled for every device during the *late* suspend
+ phase and high-level ("action") interrupt handlers are prevented from being
+ invoked before the *noirq* suspend phase.
+
+ Interrupts are still handled after that, but they are only acknowledged to
+ interrupt controllers without performing any device-specific actions that
+ would be triggered in the working state of the system (those actions are
+ deferred till the subsequent system resume transition as described
+ `below <s2idle_resume_>`_).
+
+ IRQs associated with system wakeup devices are "armed" so that the resume
+ transition of the system is started when one of them signals an event.
+
+ 4. Freezing the scheduler tick and suspending timekeeping.
+
+ When all devices have been suspended, CPUs enter the idle loop and are put
+ into the deepest available idle state. While doing that, each of them
+ "freezes" its own scheduler tick so that the timer events associated with
+ the tick do not occur until the CPU is woken up by another interrupt source.
+
+ The last CPU to enter the idle state also stops the timekeeping which
+ (among other things) prevents high resolution timers from triggering going
+ forward until the first CPU that is woken up restarts the timekeeping.
+ That allows the CPUs to stay in the deep idle state relatively long in one
+ go.
+
+ From this point on, the CPUs can only be woken up by non-timer hardware
+ interrupts. If that happens, they go back to the idle state unless the
+ interrupt that woke up one of them comes from an IRQ that has been armed for
+ system wakeup, in which case the system resume transition is started.
+
+
+.. _s2idle_resume:
+
+Suspend-to-idle Resume Code Flow
+================================
+
+The following steps are taken in order to transition the system from the
+:ref:`suspend-to-idle <s2idle>` sleep state into the working state:
+
+ 1. Resuming timekeeping and unfreezing the scheduler tick.
+
+ When one of the CPUs is woken up (by a non-timer hardware interrupt), it
+ leaves the idle state entered in the last step of the preceding suspend
+ transition, restarts the timekeeping (unless it has been restarted already
+ by another CPU that woke up earlier) and the scheduler tick on that CPU is
+ unfrozen.
+
+ If the interrupt that has woken up the CPU was armed for system wakeup,
+ the system resume transition begins.
+
+ 2. Resuming devices and restoring the working-state configuration of IRQs.
+
+ Devices are resumed in four phases called *noirq resume*, *early resume*,
+ *resume* and *complete* (see :ref:`driverapi_pm_devices` for more
+ information on what exactly happens in each phase).
+
+ Every device is visited in each phase, but typically it is not physically
+ accessed in more than two of them.
+
+ The working-state configuration of IRQs is restored after the *noirq* resume
+ phase and the runtime PM API is re-enabled for every device whose driver
+ supports it during the *early* resume phase.
+
+ 3. Thawing tasks.
+
+ Tasks frozen in step 2 of the preceding `suspend <s2idle_suspend_>`_
+ transition are "thawed", which means that they are woken up from the
+ uninterruptible sleep that they went into at that time and user space tasks
+ are allowed to exit the kernel.
+
+ 4. Invoking system-wide resume notifiers.
+
+ This is analogous to step 1 of the `suspend <s2idle_suspend_>`_ transition
+ and the same set of callbacks is invoked at this point, but a different
+ "notification type" parameter value is passed to them.
+
+
+Platform-dependent Suspend Code Flow
+====================================
+
+The following steps are taken in order to transition the system from the working
+state to platform-dependent suspend state:
+
+ 1. Invoking system-wide suspend notifiers.
+
+ This step is the same as step 1 of the suspend-to-idle suspend transition
+ described `above <s2idle_suspend_>`_.
+
+ 2. Freezing tasks.
+
+ This step is the same as step 2 of the suspend-to-idle suspend transition
+ described `above <s2idle_suspend_>`_.
+
+ 3. Suspending devices and reconfiguring IRQs.
+
+ This step is analogous to step 3 of the suspend-to-idle suspend transition
+ described `above <s2idle_suspend_>`_, but the arming of IRQs for system
+ wakeup generally does not have any effect on the platform.
+
+ There are platforms that can go into a very deep low-power state internally
+ when all CPUs in them are in sufficiently deep idle states and all I/O
+ devices have been put into low-power states. On those platforms,
+ suspend-to-idle can reduce system power very effectively.
+
+ On the other platforms, however, low-level components (like interrupt
+ controllers) need to be turned off in a platform-specific way (implemented
+ in the hooks provided by the platform driver) to achieve comparable power
+ reduction.
+
+ That usually prevents in-band hardware interrupts from waking up the system,
+ which must be done in a special platform-dependent way. Then, the
+ configuration of system wakeup sources usually starts when system wakeup
+ devices are suspended and is finalized by the platform suspend hooks later
+ on.
+
+ 4. Disabling non-boot CPUs.
+
+ On some platforms the suspend hooks mentioned above must run in a one-CPU
+ configuration of the system (in particular, the hardware cannot be accessed
+ by any code running in parallel with the platform suspend hooks that may,
+ and often do, trap into the platform firmware in order to finalize the
+ suspend transition).
+
+ For this reason, the CPU offline/online (CPU hotplug) framework is used
+ to take all of the CPUs in the system, except for one (the boot CPU),
+ offline (typically, the CPUs that have been taken offline go into deep idle
+ states).
+
+ This means that all tasks are migrated away from those CPUs and all IRQs are
+ rerouted to the only CPU that remains online.
+
+ 5. Suspending core system components.
+
+ This prepares the core system components for (possibly) losing power going
+ forward and suspends the timekeeping.
+
+ 6. Platform-specific power removal.
+
+ This is expected to remove power from all of the system components except
+ for the memory controller and RAM (in order to preserve the contents of the
+ latter) and some devices designated for system wakeup.
+
+ In many cases control is passed to the platform firmware which is expected
+ to finalize the suspend transition as needed.
+
+
+Platform-dependent Resume Code Flow
+===================================
+
+The following steps are taken in order to transition the system from a
+platform-dependent suspend state into the working state:
+
+ 1. Platform-specific system wakeup.
+
+ The platform is woken up by a signal from one of the designated system
+ wakeup devices (which need not be an in-band hardware interrupt) and
+ control is passed back to the kernel (the working configuration of the
+ platform may need to be restored by the platform firmware before the
+ kernel gets control again).
+
+ 2. Resuming core system components.
+
+ The suspend-time configuration of the core system components is restored and
+ the timekeeping is resumed.
+
+ 3. Re-enabling non-boot CPUs.
+
+ The CPUs disabled in step 4 of the preceding suspend transition are taken
+ back online and their suspend-time configuration is restored.
+
+ 4. Resuming devices and restoring the working-state configuration of IRQs.
+
+ This step is the same as step 2 of the suspend-to-idle suspend transition
+ described `above <s2idle_resume_>`_.
+
+ 5. Thawing tasks.
+
+ This step is the same as step 3 of the suspend-to-idle suspend transition
+ described `above <s2idle_resume_>`_.
+
+ 6. Invoking system-wide resume notifiers.
+
+ This step is the same as step 4 of the suspend-to-idle suspend transition
+ described `above <s2idle_resume_>`_.
diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst
index 0c81e4c5de39..1a1924d71006 100644
--- a/Documentation/admin-guide/pm/system-wide.rst
+++ b/Documentation/admin-guide/pm/system-wide.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
============================
System-Wide Power Management
============================
@@ -6,3 +8,4 @@ System-Wide Power Management
:maxdepth: 2
sleep-states
+ suspend-flows
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index fa01bf083dfe..ee45887811ff 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
==============================
Working-State Power Management
==============================
@@ -5,5 +7,12 @@ Working-State Power Management
.. toctree::
:maxdepth: 2
+ cpuidle
+ intel_idle
cpufreq
intel_pstate
+ amd-pstate
+ cpufreq_drivers
+ intel_epb
+ intel-speed-select
+ intel_uncore_frequency_scaling
diff --git a/Documentation/admin-guide/pnp.rst b/Documentation/admin-guide/pnp.rst
new file mode 100644
index 000000000000..24d80e3eb309
--- /dev/null
+++ b/Documentation/admin-guide/pnp.rst
@@ -0,0 +1,285 @@
+=================================
+Linux Plug and Play Documentation
+=================================
+
+:Author: Adam Belay <ambx1@neo.rr.com>
+:Last updated: Oct. 16, 2002
+
+
+Overview
+--------
+
+Plug and Play provides a means of detecting and setting resources for legacy or
+otherwise unconfigurable devices. The Linux Plug and Play Layer provides these
+services to compatible drivers.
+
+
+The User Interface
+------------------
+
+The Linux Plug and Play user interface provides a means to activate PnP devices
+for legacy and user level drivers that do not support Linux Plug and Play. The
+user interface is integrated into sysfs.
+
+In addition to the standard sysfs file the following are created in each
+device's directory:
+- id - displays a list of support EISA IDs
+- options - displays possible resource configurations
+- resources - displays currently allocated resources and allows resource changes
+
+activating a device
+^^^^^^^^^^^^^^^^^^^
+
+::
+
+ # echo "auto" > resources
+
+this will invoke the automatic resource config system to activate the device
+
+manually activating a device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+ # echo "manual <depnum> <mode>" > resources
+
+ <depnum> - the configuration number
+ <mode> - static or dynamic
+ static = for next boot
+ dynamic = now
+
+disabling a device
+^^^^^^^^^^^^^^^^^^
+
+::
+
+ # echo "disable" > resources
+
+
+EXAMPLE:
+
+Suppose you need to activate the floppy disk controller.
+
+1. change to the proper directory, in my case it is
+ /driver/bus/pnp/devices/00:0f::
+
+ # cd /driver/bus/pnp/devices/00:0f
+ # cat name
+ PC standard floppy disk controller
+
+2. check if the device is already active::
+
+ # cat resources
+ DISABLED
+
+ - Notice the string "DISABLED". This means the device is not active.
+
+3. check the device's possible configurations (optional)::
+
+ # cat options
+ Dependent: 01 - Priority acceptable
+ port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
+ port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
+ irq 6
+ dma 2 8-bit compatible
+ Dependent: 02 - Priority acceptable
+ port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
+ port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
+ irq 6
+ dma 2 8-bit compatible
+
+4. now activate the device::
+
+ # echo "auto" > resources
+
+5. finally check if the device is active::
+
+ # cat resources
+ io 0x3f0-0x3f5
+ io 0x3f7-0x3f7
+ irq 6
+ dma 2
+
+also there are a series of kernel parameters::
+
+ pnp_reserve_irq=irq1[,irq2] ....
+ pnp_reserve_dma=dma1[,dma2] ....
+ pnp_reserve_io=io1,size1[,io2,size2] ....
+ pnp_reserve_mem=mem1,size1[,mem2,size2] ....
+
+
+
+The Unified Plug and Play Layer
+-------------------------------
+
+All Plug and Play drivers, protocols, and services meet at a central location
+called the Plug and Play Layer. This layer is responsible for the exchange of
+information between PnP drivers and PnP protocols. Thus it automatically
+forwards commands to the proper protocol. This makes writing PnP drivers
+significantly easier.
+
+The following functions are available from the Plug and Play Layer:
+
+pnp_get_protocol
+ increments the number of uses by one
+
+pnp_put_protocol
+ deincrements the number of uses by one
+
+pnp_register_protocol
+ use this to register a new PnP protocol
+
+pnp_register_driver
+ adds a PnP driver to the Plug and Play Layer
+
+ this includes driver model integration
+ returns zero for success or a negative error number for failure; count
+ calls to the .add() method if you need to know how many devices bind to
+ the driver
+
+pnp_unregister_driver
+ removes a PnP driver from the Plug and Play Layer
+
+
+
+Plug and Play Protocols
+-----------------------
+
+This section contains information for PnP protocol developers.
+
+The following Protocols are currently available in the computing world:
+
+- PNPBIOS:
+ used for system devices such as serial and parallel ports.
+- ISAPNP:
+ provides PnP support for the ISA bus
+- ACPI:
+ among its many uses, ACPI provides information about system level
+ devices.
+
+It is meant to replace the PNPBIOS. It is not currently supported by Linux
+Plug and Play but it is planned to be in the near future.
+
+
+Requirements for a Linux PnP protocol:
+1. the protocol must use EISA IDs
+2. the protocol must inform the PnP Layer of a device's current configuration
+
+- the ability to set resources is optional but preferred.
+
+The following are PnP protocol related functions:
+
+pnp_add_device
+ use this function to add a PnP device to the PnP layer
+
+ only call this function when all wanted values are set in the pnp_dev
+ structure
+
+pnp_init_device
+ call this to initialize the PnP structure
+
+pnp_remove_device
+ call this to remove a device from the Plug and Play Layer.
+ it will fail if the device is still in use.
+ automatically will free mem used by the device and related structures
+
+pnp_add_id
+ adds an EISA ID to the list of supported IDs for the specified device
+
+For more information consult the source of a protocol such as
+/drivers/pnp/pnpbios/core.c.
+
+
+
+Linux Plug and Play Drivers
+---------------------------
+
+This section contains information for Linux PnP driver developers.
+
+The New Way
+^^^^^^^^^^^
+
+1. first make a list of supported EISA IDS
+
+ ex::
+
+ static const struct pnp_id pnp_dev_table[] = {
+ /* Standard LPT Printer Port */
+ {.id = "PNP0400", .driver_data = 0},
+ /* ECP Printer Port */
+ {.id = "PNP0401", .driver_data = 0},
+ {.id = ""}
+ };
+
+ Please note that the character 'X' can be used as a wild card in the function
+ portion (last four characters).
+
+ ex::
+
+ /* Unknown PnP modems */
+ { "PNPCXXX", UNKNOWN_DEV },
+
+ Supported PnP card IDs can optionally be defined.
+ ex::
+
+ static const struct pnp_id pnp_card_table[] = {
+ { "ANYDEVS", 0 },
+ { "", 0 }
+ };
+
+2. Optionally define probe and remove functions. It may make sense not to
+ define these functions if the driver already has a reliable method of detecting
+ the resources, such as the parport_pc driver.
+
+ ex::
+
+ static int
+ serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const
+ struct pnp_id *dev_id)
+ {
+ . . .
+
+ ex::
+
+ static void serial_pnp_remove(struct pnp_dev * dev)
+ {
+ . . .
+
+ consult /drivers/serial/8250_pnp.c for more information.
+
+3. create a driver structure
+
+ ex::
+
+ static struct pnp_driver serial_pnp_driver = {
+ .name = "serial",
+ .card_id_table = pnp_card_table,
+ .id_table = pnp_dev_table,
+ .probe = serial_pnp_probe,
+ .remove = serial_pnp_remove,
+ };
+
+ * name and id_table cannot be NULL.
+
+4. register the driver
+
+ ex::
+
+ static int __init serial8250_pnp_init(void)
+ {
+ return pnp_register_driver(&serial_pnp_driver);
+ }
+
+The Old Way
+^^^^^^^^^^^
+
+A series of compatibility functions have been created to make it easy to convert
+ISAPNP drivers. They should serve as a temporary solution only.
+
+They are as follows::
+
+ struct pnp_dev *pnp_find_dev(struct pnp_card *card,
+ unsigned short vendor,
+ unsigned short function,
+ struct pnp_dev *from)
+
diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
new file mode 100644
index 000000000000..1bb2a1c292aa
--- /dev/null
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -0,0 +1,234 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+pstore block oops/panic logger
+==============================
+
+Introduction
+------------
+
+pstore block (pstore/blk) is an oops/panic logger that writes its logs to a
+block device and non-block device before the system crashes. You can get
+these log files by mounting pstore filesystem like::
+
+ mount -t pstore pstore /sys/fs/pstore
+
+
+pstore block concepts
+---------------------
+
+pstore/blk provides efficient configuration method for pstore/blk, which
+divides all configurations into two parts, configurations for user and
+configurations for driver.
+
+Configurations for user determine how pstore/blk works, such as pmsg_size,
+kmsg_size and so on. All of them support both Kconfig and module parameters,
+but module parameters have priority over Kconfig.
+
+Configurations for driver are all about block device and non-block device,
+such as total_size of block device and read/write operations.
+
+Configurations for user
+-----------------------
+
+All of these configurations support both Kconfig and module parameters, but
+module parameters have priority over Kconfig.
+
+Here is an example for module parameters::
+
+ pstore_blk.blkdev=/dev/mmcblk0p7 pstore_blk.kmsg_size=64 best_effort=y
+
+The detail of each configurations may be of interest to you.
+
+blkdev
+~~~~~~
+
+The block device to use. Most of the time, it is a partition of block device.
+It's required for pstore/blk. It is also used for MTD device.
+
+When pstore/blk is built as a module, "blkdev" accepts the following variants:
+
+1. /dev/<disk_name> represents the device number of disk
+#. /dev/<disk_name><decimal> represents the device number of partition - device
+ number of disk plus the partition number
+#. /dev/<disk_name>p<decimal> - same as the above; this form is used when disk
+ name of partitioned disk ends with a digit.
+
+When pstore/blk is built into the kernel, "blkdev" accepts the following variants:
+
+#. <hex_major><hex_minor> device number in hexadecimal representation,
+ with no leading 0x, for example b302.
+#. PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF represents the unique id of
+ a partition if the partition table provides it. The UUID may be either an
+ EFI/GPT UUID, or refer to an MSDOS partition using the format SSSSSSSS-PP,
+ where SSSSSSSS is a zero-filled hex representation of the 32-bit
+ "NT disk signature", and PP is a zero-filled hex representation of the
+ 1-based partition number.
+#. PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to a
+ partition with a known unique id.
+#. <major>:<minor> major and minor number of the device separated by a colon.
+
+It accepts the following variants for MTD device:
+
+1. <device name> MTD device name. "pstore" is recommended.
+#. <device number> MTD device number.
+
+kmsg_size
+~~~~~~~~~
+
+The chunk size in KB for oops/panic front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care about the oops/panic log.
+
+There are multiple chunks for oops/panic front-end depending on the remaining
+space except other pstore front-ends.
+
+pstore/blk will log to oops/panic chunks one by one, and always overwrite the
+oldest chunk if there is no more free chunk.
+
+pmsg_size
+~~~~~~~~~
+
+The chunk size in KB for pmsg front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care about the pmsg log.
+
+Unlike oops/panic front-end, there is only one chunk for pmsg front-end.
+
+Pmsg is a user space accessible pstore object. Writes to */dev/pmsg0* are
+appended to the chunk. On reboot the contents are available in
+*/sys/fs/pstore/pmsg-pstore-blk-0*.
+
+console_size
+~~~~~~~~~~~~
+
+The chunk size in KB for console front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care about the console log.
+
+Similar to pmsg front-end, there is only one chunk for console front-end.
+
+All log of console will be appended to the chunk. On reboot the contents are
+available in */sys/fs/pstore/console-pstore-blk-0*.
+
+ftrace_size
+~~~~~~~~~~~
+
+The chunk size in KB for ftrace front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care about the ftrace log.
+
+Similar to oops front-end, there are multiple chunks for ftrace front-end
+depending on the count of cpu processors. Each chunk size is equal to
+ftrace_size / processors_count.
+
+All log of ftrace will be appended to the chunk. On reboot the contents are
+combined and available in */sys/fs/pstore/ftrace-pstore-blk-0*.
+
+Persistent function tracing might be useful for debugging software or hardware
+related hangs. Here is an example of usage::
+
+ # mount -t pstore pstore /sys/fs/pstore
+ # mount -t debugfs debugfs /sys/kernel/debug/
+ # echo 1 > /sys/kernel/debug/pstore/record_ftrace
+ # reboot -f
+ [...]
+ # mount -t pstore pstore /sys/fs/pstore
+ # tail /sys/fs/pstore/ftrace-pstore-blk-0
+ CPU:0 ts:5914676 c0063828 c0063b94 call_cpuidle <- cpu_startup_entry+0x1b8/0x1e0
+ CPU:0 ts:5914678 c039ecdc c006385c cpuidle_enter_state <- call_cpuidle+0x44/0x48
+ CPU:0 ts:5914680 c039e9a0 c039ecf0 cpuidle_enter_freeze <- cpuidle_enter_state+0x304/0x314
+ CPU:0 ts:5914681 c0063870 c039ea30 sched_idle_set_state <- cpuidle_enter_state+0x44/0x314
+ CPU:1 ts:5916720 c0160f59 c015ee04 kernfs_unmap_bin_file <- __kernfs_remove+0x140/0x204
+ CPU:1 ts:5916721 c05ca625 c015ee0c __mutex_lock_slowpath <- __kernfs_remove+0x148/0x204
+ CPU:1 ts:5916723 c05c813d c05ca630 yield_to <- __mutex_lock_slowpath+0x314/0x358
+ CPU:1 ts:5916724 c05ca2d1 c05ca638 __ww_mutex_lock <- __mutex_lock_slowpath+0x31c/0x358
+
+max_reason
+~~~~~~~~~~
+
+Limiting which kinds of kmsg dumps are stored can be controlled via
+the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's
+``enum kmsg_dump_reason``. For example, to store both Oopses and Panics,
+``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics
+``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0
+(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the
+``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS,
+otherwise KMSG_DUMP_MAX.
+
+Configurations for driver
+-------------------------
+
+A device driver uses ``register_pstore_device`` with
+``struct pstore_device_info`` to register to pstore/blk.
+
+.. kernel-doc:: fs/pstore/blk.c
+ :export:
+
+Compression and header
+----------------------
+
+Block device is large enough for uncompressed oops data. Actually we do not
+recommend data compression because pstore/blk will insert some information into
+the first line of oops/panic data. For example::
+
+ Panic: Total 16 times
+
+It means that it's OOPS|Panic for the 16th time since the first booting.
+Sometimes the number of occurrences of oops|panic since the first booting is
+important to judge whether the system is stable.
+
+The following line is inserted by pstore filesystem. For example::
+
+ Oops#2 Part1
+
+It means that it's OOPS for the 2nd time on the last boot.
+
+Reading the data
+----------------
+
+The dump data can be read from the pstore filesystem. The format for these
+files is ``dmesg-pstore-blk-[N]`` for oops/panic front-end,
+``pmsg-pstore-blk-0`` for pmsg front-end and so on. The timestamp of the
+dump file records the trigger time. To delete a stored record from block
+device, simply unlink the respective pstore file.
+
+Attentions in panic read/write APIs
+-----------------------------------
+
+If on panic, the kernel is not going to run for much longer, the tasks will not
+be scheduled and most kernel resources will be out of service. It
+looks like a single-threaded program running on a single-core computer.
+
+The following points require special attention for panic read/write APIs:
+
+1. Can **NOT** allocate any memory.
+ If you need memory, just allocate while the block driver is initializing
+ rather than waiting until the panic.
+#. Must be polled, **NOT** interrupt driven.
+ No task schedule any more. The block driver should delay to ensure the write
+ succeeds, but NOT sleep.
+#. Can **NOT** take any lock.
+ There is no other task, nor any shared resource; you are safe to break all
+ locks.
+#. Just use CPU to transfer.
+ Do not use DMA to transfer unless you are sure that DMA will not keep lock.
+#. Control registers directly.
+ Please control registers directly rather than use Linux kernel resources.
+ Do I/O map while initializing rather than wait until a panic occurs.
+#. Reset your block device and controller if necessary.
+ If you are not sure of the state of your block device and controller when
+ a panic occurs, you are safe to stop and reset them.
+
+pstore/blk supports psblk_blkdev_info(), which is defined in
+*linux/pstore_blk.h*, to get information of using block device, such as the
+device number, sector count and start sector of the whole disk.
+
+pstore block internals
+----------------------
+
+For developer reference, here are all the important structures and APIs:
+
+.. kernel-doc:: fs/pstore/zone.c
+ :internal:
+
+.. kernel-doc:: include/linux/pstore_zone.h
+ :internal:
+
+.. kernel-doc:: include/linux/pstore_blk.h
+ :internal:
diff --git a/Documentation/admin-guide/quickly-build-trimmed-linux.rst b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
new file mode 100644
index 000000000000..cb4b78468a93
--- /dev/null
+++ b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
@@ -0,0 +1,1097 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+===========================================
+How to quickly build a trimmed Linux kernel
+===========================================
+
+This guide explains how to swiftly build Linux kernels that are ideal for
+testing purposes, but perfectly fine for day-to-day use, too.
+
+The essence of the process (aka 'TL;DR')
+========================================
+
+*[If you are new to compiling Linux, ignore this TLDR and head over to the next
+section below: it contains a step-by-step guide, which is more detailed, but
+still brief and easy to follow; that guide and its accompanying reference
+section also mention alternatives, pitfalls, and additional aspects, all of
+which might be relevant for you.]*
+
+If your system uses techniques like Secure Boot, prepare it to permit starting
+self-compiled Linux kernels; install compilers and everything else needed for
+building Linux; make sure to have 12 Gigabyte free space in your home directory.
+Now run the following commands to download fresh Linux mainline sources, which
+you then use to configure, build and install your own kernel::
+
+ git clone --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git ~/linux/
+ cd ~/linux/
+ # Hint: if you want to apply patches, do it at this point. See below for details.
+ # Hint: it's recommended to tag your build at this point. See below for details.
+ yes "" | make localmodconfig
+ # Hint: at this point you might want to adjust the build configuration; you'll
+ # have to, if you are running Debian. See below for details.
+ make -j $(nproc --all)
+ # Note: on many commodity distributions the next command suffices, but on Arch
+ # Linux, its derivatives, and some others it does not. See below for details.
+ command -v installkernel && sudo make modules_install install
+ reboot
+
+If you later want to build a newer mainline snapshot, use these commands::
+
+ cd ~/linux/
+ git fetch --depth 1 origin
+ # Note: the next command will discard any changes you did to the code:
+ git checkout --force --detach origin/master
+ # Reminder: if you want to (re)apply patches, do it at this point.
+ # Reminder: you might want to add or modify a build tag at this point.
+ make olddefconfig
+ make -j $(nproc --all)
+ # Reminder: the next command on some distributions does not suffice.
+ command -v installkernel && sudo make modules_install install
+ reboot
+
+Step-by-step guide
+==================
+
+Compiling your own Linux kernel is easy in principle. There are various ways to
+do it. Which of them actually work and is the best depends on the circumstances.
+
+This guide describes a way perfectly suited for those who want to quickly
+install Linux from sources without being bothered by complicated details; the
+goal is to cover everything typically needed on mainstream Linux distributions
+running on commodity PC or server hardware.
+
+The described approach is great for testing purposes, for example to try a
+proposed fix or to check if a problem was already fixed in the latest codebase.
+Nonetheless, kernels built this way are also totally fine for day-to-day use
+while at the same time being easy to keep up to date.
+
+The following steps describe the important aspects of the process; a
+comprehensive reference section later explains each of them in more detail. It
+sometimes also describes alternative approaches, pitfalls, as well as errors
+that might occur at a particular point -- and how to then get things rolling
+again.
+
+..
+ Note: if you see this note, you are reading the text's source file. You
+ might want to switch to a rendered version, as it makes it a lot easier to
+ quickly look something up in the reference section and afterwards jump back
+ to where you left off. Find a the latest rendered version here:
+ https://docs.kernel.org/admin-guide/quickly-build-trimmed-linux.html
+
+.. _backup_sbs:
+
+ * Create a fresh backup and put system repair and restore tools at hand, just
+ to be prepared for the unlikely case of something going sideways.
+
+ [:ref:`details<backup>`]
+
+.. _secureboot_sbs:
+
+ * On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later. The
+ quickest and easiest way to achieve this on commodity x86 systems is to
+ disable such techniques in the BIOS setup utility; alternatively, remove
+ their restrictions through a process initiated by
+ ``mokutil --disable-validation``.
+
+ [:ref:`details<secureboot>`]
+
+.. _buildrequires_sbs:
+
+ * Install all software required to build a Linux kernel. Often you will need:
+ 'bc', 'binutils' ('ld' et al.), 'bison', 'flex', 'gcc', 'git', 'openssl',
+ 'pahole', 'perl', and the development headers for 'libelf' and 'openssl'. The
+ reference section shows how to quickly install those on various popular Linux
+ distributions.
+
+ [:ref:`details<buildrequires>`]
+
+.. _diskspace_sbs:
+
+ * Ensure to have enough free space for building and installing Linux. For the
+ latter 150 Megabyte in /lib/ and 100 in /boot/ are a safe bet. For storing
+ sources and build artifacts 12 Gigabyte in your home directory should
+ typically suffice. If you have less available, be sure to check the reference
+ section for the step that explains adjusting your kernels build
+ configuration: it mentions a trick that reduce the amount of required space
+ in /home/ to around 4 Gigabyte.
+
+ [:ref:`details<diskspace>`]
+
+.. _sources_sbs:
+
+ * Retrieve the sources of the Linux version you intend to build; then change
+ into the directory holding them, as all further commands in this guide are
+ meant to be executed from there.
+
+ *[Note: the following paragraphs describe how to retrieve the sources by
+ partially cloning the Linux stable git repository. This is called a shallow
+ clone. The reference section explains two alternatives:* :ref:`packaged
+ archives<sources_archive>` *and* :ref:`a full git clone<sources_full>` *;
+ prefer the latter, if downloading a lot of data does not bother you, as that
+ will avoid some* :ref:`peculiar characteristics of shallow clones the
+ reference section explains<sources_shallow>` *.]*
+
+ First, execute the following command to retrieve a fresh mainline codebase::
+
+ git clone --no-checkout --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git ~/linux/
+ cd ~/linux/
+
+ If you want to access recent mainline releases and pre-releases, deepen you
+ clone's history to the oldest mainline version you are interested in::
+
+ git fetch --shallow-exclude=v6.0 origin
+
+ In case you want to access a stable/longterm release (say v6.1.5), simply add
+ the branch holding that series; afterwards fetch the history at least up to
+ the mainline version that started the series (v6.1)::
+
+ git remote set-branches --add origin linux-6.1.y
+ git fetch --shallow-exclude=v6.0 origin
+
+ Now checkout the code you are interested in. If you just performed the
+ initial clone, you will be able to check out a fresh mainline codebase, which
+ is ideal for checking whether developers already fixed an issue::
+
+ git checkout --detach origin/master
+
+ If you deepened your clone, you instead of ``origin/master`` can specify the
+ version you deepened to (``v6.0`` above); later releases like ``v6.1`` and
+ pre-release like ``v6.2-rc1`` will work, too. Stable or longterm versions
+ like ``v6.1.5`` work just the same, if you added the appropriate
+ stable/longterm branch as described.
+
+ [:ref:`details<sources>`]
+
+.. _patching_sbs:
+
+ * In case you want to apply a kernel patch, do so now. Often a command like
+ this will do the trick::
+
+ patch -p1 < ../proposed-fix.patch
+
+ If the ``-p1`` is actually needed, depends on how the patch was created; in
+ case it does not apply thus try without it.
+
+ If you cloned the sources with git and anything goes sideways, run ``git
+ reset --hard`` to undo any changes to the sources.
+
+ [:ref:`details<patching>`]
+
+.. _tagging_sbs:
+
+ * If you patched your kernel or have one of the same version installed already,
+ better add a unique tag to the one you are about to build::
+
+ echo "-proposed_fix" > localversion
+
+ Running ``uname -r`` under your kernel later will then print something like
+ '6.1-rc4-proposed_fix'.
+
+ [:ref:`details<tagging>`]
+
+ .. _configuration_sbs:
+
+ * Create the build configuration for your kernel based on an existing
+ configuration.
+
+ If you already prepared such a '.config' file yourself, copy it to
+ ~/linux/ and run ``make olddefconfig``.
+
+ Use the same command, if your distribution or somebody else already tailored
+ your running kernel to your or your hardware's needs: the make target
+ 'olddefconfig' will then try to use that kernel's .config as base.
+
+ Using this make target is fine for everybody else, too -- but you often can
+ save a lot of time by using this command instead::
+
+ yes "" | make localmodconfig
+
+ This will try to pick your distribution's kernel as base, but then disable
+ modules for any features apparently superfluous for your setup. This will
+ reduce the compile time enormously, especially if you are running an
+ universal kernel from a commodity Linux distribution.
+
+ There is a catch: 'localmodconfig' is likely to disable kernel features you
+ did not use since you booted your Linux -- like drivers for currently
+ disconnected peripherals or a virtualization software not haven't used yet.
+ You can reduce or nearly eliminate that risk with tricks the reference
+ section outlines; but when building a kernel just for quick testing purposes
+ it is often negligible if such features are missing. But you should keep that
+ aspect in mind when using a kernel built with this make target, as it might
+ be the reason why something you only use occasionally stopped working.
+
+ [:ref:`details<configuration>`]
+
+.. _configmods_sbs:
+
+ * Check if you might want to or have to adjust some kernel configuration
+ options:
+
+ * Evaluate how you want to handle debug symbols. Enable them, if you later
+ might need to decode a stack trace found for example in a 'panic', 'Oops',
+ 'warning', or 'BUG'; on the other hand disable them, if you are short on
+ storage space or prefer a smaller kernel binary. See the reference section
+ for details on how to do either. If neither applies, it will likely be fine
+ to simply not bother with this. [:ref:`details<configmods_debugsymbols>`]
+
+ * Are you running Debian? Then to avoid known problems by performing
+ additional adjustments explained in the reference section.
+ [:ref:`details<configmods_distros>`].
+
+ * If you want to influence the other aspects of the configuration, do so now
+ by using make targets like 'menuconfig' or 'xconfig'.
+ [:ref:`details<configmods_individual>`].
+
+.. _build_sbs:
+
+ * Build the image and the modules of your kernel::
+
+ make -j $(nproc --all)
+
+ If you want your kernel packaged up as deb, rpm, or tar file, see the
+ reference section for alternatives.
+
+ [:ref:`details<build>`]
+
+.. _install_sbs:
+
+ * Now install your kernel::
+
+ command -v installkernel && sudo make modules_install install
+
+ Often all left for you to do afterwards is a ``reboot``, as many commodity
+ Linux distributions will then create an initramfs (also known as initrd) and
+ an entry for your kernel in your bootloader's configuration; but on some
+ distributions you have to take care of these two steps manually for reasons
+ the reference section explains.
+
+ On a few distributions like Arch Linux and its derivatives the above command
+ does nothing at all; in that case you have to manually install your kernel,
+ as outlined in the reference section.
+
+ If you are running an immutable Linux distribution, check its documentation
+ and the web to find out how to install your own kernel there.
+
+ [:ref:`details<install>`]
+
+.. _another_sbs:
+
+ * To later build another kernel you need similar steps, but sometimes slightly
+ different commands.
+
+ First, switch back into the sources tree::
+
+ cd ~/linux/
+
+ In case you want to build a version from a stable/longterm series you have
+ not used yet (say 6.2.y), tell git to track it::
+
+ git remote set-branches --add origin linux-6.2.y
+
+ Now fetch the latest upstream changes; you again need to specify the earliest
+ version you care about, as git otherwise might retrieve the entire commit
+ history::
+
+ git fetch --shallow-exclude=v6.0 origin
+
+ Now switch to the version you are interested in -- but be aware the command
+ used here will discard any modifications you performed, as they would
+ conflict with the sources you want to checkout::
+
+ git checkout --force --detach origin/master
+
+ At this point you might want to patch the sources again or set/modify a build
+ tag, as explained earlier. Afterwards adjust the build configuration to the
+ new codebase using olddefconfig, which will now adjust the configuration file
+ you prepared earlier using localmodconfig (~/linux/.config) for your next
+ kernel::
+
+ # reminder: if you want to apply patches, do it at this point
+ # reminder: you might want to update your build tag at this point
+ make olddefconfig
+
+ Now build your kernel::
+
+ make -j $(nproc --all)
+
+ Afterwards install the kernel as outlined above::
+
+ command -v installkernel && sudo make modules_install install
+
+ [:ref:`details<another>`]
+
+.. _uninstall_sbs:
+
+ * Your kernel is easy to remove later, as its parts are only stored in two
+ places and clearly identifiable by the kernel's release name. Just ensure to
+ not delete the kernel you are running, as that might render your system
+ unbootable.
+
+ Start by deleting the directory holding your kernel's modules, which is named
+ after its release name -- '6.0.1-foobar' in the following example::
+
+ sudo rm -rf /lib/modules/6.0.1-foobar
+
+ Now try the following command, which on some distributions will delete all
+ other kernel files installed while also removing the kernel's entry from the
+ bootloader configuration::
+
+ command -v kernel-install && sudo kernel-install -v remove 6.0.1-foobar
+
+ If that command does not output anything or fails, see the reference section;
+ do the same if any files named '*6.0.1-foobar*' remain in /boot/.
+
+ [:ref:`details<uninstall>`]
+
+.. _submit_improvements_qbtl:
+
+Did you run into trouble following any of the above steps that is not cleared up
+by the reference section below? Or do you have ideas how to improve the text?
+Then please take a moment of your time and let the maintainer of this document
+know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while CCing the
+Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is vital to
+improve this document further, which is in everybody's interest, as it will
+enable more people to master the task described here.
+
+Reference section for the step-by-step guide
+============================================
+
+This section holds additional information for each of the steps in the above
+guide.
+
+.. _backup:
+
+Prepare for emergencies
+-----------------------
+
+ *Create a fresh backup and put system repair and restore tools at hand*
+ [:ref:`... <backup_sbs>`]
+
+Remember, you are dealing with computers, which sometimes do unexpected things
+-- especially if you fiddle with crucial parts like the kernel of an operating
+system. That's what you are about to do in this process. Hence, better prepare
+for something going sideways, even if that should not happen.
+
+[:ref:`back to step-by-step guide <backup_sbs>`]
+
+.. _secureboot:
+
+Dealing with techniques like Secure Boot
+----------------------------------------
+
+ *On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later.*
+ [:ref:`... <secureboot_sbs>`]
+
+Many modern systems allow only certain operating systems to start; they thus by
+default will reject booting self-compiled kernels.
+
+You ideally deal with this by making your platform trust your self-built kernels
+with the help of a certificate and signing. How to do that is not described
+here, as it requires various steps that would take the text too far away from
+its purpose; 'Documentation/admin-guide/module-signing.rst' and various web
+sides already explain this in more detail.
+
+Temporarily disabling solutions like Secure Boot is another way to make your own
+Linux boot. On commodity x86 systems it is possible to do this in the BIOS Setup
+utility; the steps to do so are not described here, as they greatly vary between
+machines.
+
+On mainstream x86 Linux distributions there is a third and universal option:
+disable all Secure Boot restrictions for your Linux environment. You can
+initiate this process by running ``mokutil --disable-validation``; this will
+tell you to create a one-time password, which is safe to write down. Now
+restart; right after your BIOS performed all self-tests the bootloader Shim will
+show a blue box with a message 'Press any key to perform MOK management'. Hit
+some key before the countdown exposes. This will open a menu and choose 'Change
+Secure Boot state' there. Shim's 'MokManager' will now ask you to enter three
+randomly chosen characters from the one-time password specified earlier. Once
+you provided them, confirm that you really want to disable the validation.
+Afterwards, permit MokManager to reboot the machine.
+
+[:ref:`back to step-by-step guide <secureboot_sbs>`]
+
+.. _buildrequires:
+
+Install build requirements
+--------------------------
+
+ *Install all software required to build a Linux kernel.*
+ [:ref:`...<buildrequires_sbs>`]
+
+The kernel is pretty stand-alone, but besides tools like the compiler you will
+sometimes need a few libraries to build one. How to install everything needed
+depends on your Linux distribution and the configuration of the kernel you are
+about to build.
+
+Here are a few examples what you typically need on some mainstream
+distributions:
+
+ * Debian, Ubuntu, and derivatives::
+
+ sudo apt install bc binutils bison dwarves flex gcc git make openssl \
+ pahole perl-base libssl-dev libelf-dev
+
+ * Fedora and derivatives::
+
+ sudo dnf install binutils /usr/include/{libelf.h,openssl/pkcs7.h} \
+ /usr/bin/{bc,bison,flex,gcc,git,openssl,make,perl,pahole}
+
+ * openSUSE and derivatives::
+
+ sudo zypper install bc binutils bison dwarves flex gcc git make perl-base \
+ openssl openssl-devel libelf-dev
+
+In case you wonder why these lists include openssl and its development headers:
+they are needed for the Secure Boot support, which many distributions enable in
+their kernel configuration for x86 machines.
+
+Sometimes you will need tools for compression formats like bzip2, gzip, lz4,
+lzma, lzo, xz, or zstd as well.
+
+You might need additional libraries and their development headers in case you
+perform tasks not covered in this guide. For example, zlib will be needed when
+building kernel tools from the tools/ directory; adjusting the build
+configuration with make targets like 'menuconfig' or 'xconfig' will require
+development headers for ncurses or Qt5.
+
+[:ref:`back to step-by-step guide <buildrequires_sbs>`]
+
+.. _diskspace:
+
+Space requirements
+------------------
+
+ *Ensure to have enough free space for building and installing Linux.*
+ [:ref:`... <diskspace_sbs>`]
+
+The numbers mentioned are rough estimates with a big extra charge to be on the
+safe side, so often you will need less.
+
+If you have space constraints, remember to read the reference section when you
+reach the :ref:`section about configuration adjustments' <configmods>`, as
+ensuring debug symbols are disabled will reduce the consumed disk space by quite
+a few gigabytes.
+
+[:ref:`back to step-by-step guide <diskspace_sbs>`]
+
+
+.. _sources:
+
+Download the sources
+--------------------
+
+ *Retrieve the sources of the Linux version you intend to build.*
+ [:ref:`...<sources_sbs>`]
+
+The step-by-step guide outlines how to retrieve Linux' sources using a shallow
+git clone. There is :ref:`more to tell about this method<sources_shallow>` and
+two alternate ways worth describing: :ref:`packaged archives<sources_archive>`
+and :ref:`a full git clone<sources_full>`. And the aspects ':ref:`wouldn't it
+be wiser to use a proper pre-release than the latest mainline code
+<sources_snapshot>`' and ':ref:`how to get an even fresher mainline codebase
+<sources_fresher>`' need elaboration, too.
+
+Note, to keep things simple the commands used in this guide store the build
+artifacts in the source tree. If you prefer to separate them, simply add
+something like ``O=~/linux-builddir/`` to all make calls; also adjust the path
+in all commands that add files or modify any generated (like your '.config').
+
+[:ref:`back to step-by-step guide <sources_sbs>`]
+
+.. _sources_shallow:
+
+Noteworthy characteristics of shallow clones
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The step-by-step guide uses a shallow clone, as it is the best solution for most
+of this document's target audience. There are a few aspects of this approach
+worth mentioning:
+
+ * This document in most places uses ``git fetch`` with ``--shallow-exclude=``
+ to specify the earliest version you care about (or to be precise: its git
+ tag). You alternatively can use the parameter ``--shallow-since=`` to specify
+ an absolute (say ``'2023-07-15'``) or relative (``'12 months'``) date to
+ define the depth of the history you want to download. As a second
+ alternative, you can also specify a certain depth explicitly with a parameter
+ like ``--depth=1``, unless you add branches for stable/longterm kernels.
+
+ * When running ``git fetch``, remember to always specify the oldest version,
+ the time you care about, or an explicit depth as shown in the step-by-step
+ guide. Otherwise you will risk downloading nearly the entire git history,
+ which will consume quite a bit of time and bandwidth while also stressing the
+ servers.
+
+ Note, you do not have to use the same version or date all the time. But when
+ you change it over time, git will deepen or flatten the history to the
+ specified point. That allows you to retrieve versions you initially thought
+ you did not need -- or it will discard the sources of older versions, for
+ example in case you want to free up some disk space. The latter will happen
+ automatically when using ``--shallow-since=`` or
+ ``--depth=``.
+
+ * Be warned, when deepening your clone you might encounter an error like
+ 'fatal: error in object: unshallow cafecaca0c0dacafecaca0c0dacafecaca0c0da'.
+ In that case run ``git repack -d`` and try again``
+
+ * In case you want to revert changes from a certain version (say Linux 6.3) or
+ perform a bisection (v6.2..v6.3), better tell ``git fetch`` to retrieve
+ objects up to three versions earlier (e.g. 6.0): ``git describe`` will then
+ be able to describe most commits just like it would in a full git clone.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_archive:
+
+Downloading the sources using a packages archive
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+People new to compiling Linux often assume downloading an archive via the
+front-page of https://kernel.org is the best approach to retrieve Linux'
+sources. It actually can be, if you are certain to build just one particular
+kernel version without changing any code. Thing is: you might be sure this will
+be the case, but in practice it often will turn out to be a wrong assumption.
+
+That's because when reporting or debugging an issue developers will often ask to
+give another version a try. They also might suggest temporarily undoing a commit
+with ``git revert`` or might provide various patches to try. Sometimes reporters
+will also be asked to use ``git bisect`` to find the change causing a problem.
+These things rely on git or are a lot easier and quicker to handle with it.
+
+A shallow clone also does not add any significant overhead. For example, when
+you use ``git clone --depth=1`` to create a shallow clone of the latest mainline
+codebase git will only retrieve a little more data than downloading the latest
+mainline pre-release (aka 'rc') via the front-page of kernel.org would.
+
+A shallow clone therefore is often the better choice. If you nevertheless want
+to use a packaged source archive, download one via kernel.org; afterwards
+extract its content to some directory and change to the subdirectory created
+during extraction. The rest of the step-by-step guide will work just fine, apart
+from things that rely on git -- but this mainly concerns the section on
+successive builds of other versions.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_full:
+
+Downloading the sources using a full git clone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If downloading and storing a lot of data (~4,4 Gigabyte as of early 2023) is
+nothing that bothers you, instead of a shallow clone perform a full git clone
+instead. You then will avoid the specialties mentioned above and will have all
+versions and individual commits at hand at any time::
+
+ curl -L \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/clone.bundle \
+ -o linux-stable.git.bundle
+ git clone linux-stable.git.bundle ~/linux/
+ rm linux-stable.git.bundle
+ cd ~/linux/
+ git remote set-url origin \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git fetch origin
+ git checkout --detach origin/master
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_snapshot:
+
+Proper pre-releases (RCs) vs. latest mainline
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When cloning the sources using git and checking out origin/master, you often
+will retrieve a codebase that is somewhere between the latest and the next
+release or pre-release. This almost always is the code you want when giving
+mainline a shot: pre-releases like v6.1-rc5 are in no way special, as they do
+not get any significant extra testing before being published.
+
+There is one exception: you might want to stick to the latest mainline release
+(say v6.1) before its successor's first pre-release (v6.2-rc1) is out. That is
+because compiler errors and other problems are more likely to occur during this
+time, as mainline then is in its 'merge window': a usually two week long phase,
+in which the bulk of the changes for the next release is merged.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_fresher:
+
+Avoiding the mainline lag
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The explanations for both the shallow clone and the full clone both retrieve the
+code from the Linux stable git repository. That makes things simpler for this
+document's audience, as it allows easy access to both mainline and
+stable/longterm releases. This approach has just one downside:
+
+Changes merged into the mainline repository are only synced to the master branch
+of the Linux stable repository every few hours. This lag most of the time is
+not something to worry about; but in case you really need the latest code, just
+add the mainline repo as additional remote and checkout the code from there::
+
+ git remote add mainline \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ git fetch mainline
+ git checkout --detach mainline/master
+
+When doing this with a shallow clone, remember to call ``git fetch`` with one
+of the parameters described earlier to limit the depth.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _patching:
+
+Patch the sources (optional)
+----------------------------
+
+ *In case you want to apply a kernel patch, do so now.*
+ [:ref:`...<patching_sbs>`]
+
+This is the point where you might want to patch your kernel -- for example when
+a developer proposed a fix and asked you to check if it helps. The step-by-step
+guide already explains everything crucial here.
+
+[:ref:`back to step-by-step guide <patching_sbs>`]
+
+.. _tagging:
+
+Tagging this kernel build (optional, often wise)
+------------------------------------------------
+
+ *If you patched your kernel or already have that kernel version installed,
+ better tag your kernel by extending its release name:*
+ [:ref:`...<tagging_sbs>`]
+
+Tagging your kernel will help avoid confusion later, especially when you patched
+your kernel. Adding an individual tag will also ensure the kernel's image and
+its modules are installed in parallel to any existing kernels.
+
+There are various ways to add such a tag. The step-by-step guide realizes one by
+creating a 'localversion' file in your build directory from which the kernel
+build scripts will automatically pick up the tag. You can later change that file
+to use a different tag in subsequent builds or simply remove that file to dump
+the tag.
+
+[:ref:`back to step-by-step guide <tagging_sbs>`]
+
+.. _configuration:
+
+Define the build configuration for your kernel
+----------------------------------------------
+
+ *Create the build configuration for your kernel based on an existing
+ configuration.* [:ref:`... <configuration_sbs>`]
+
+There are various aspects for this steps that require a more careful
+explanation:
+
+Pitfalls when using another configuration file as base
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Make targets like localmodconfig and olddefconfig share a few common snares you
+want to be aware of:
+
+ * These targets will reuse a kernel build configuration in your build directory
+ (e.g. '~/linux/.config'), if one exists. In case you want to start from
+ scratch you thus need to delete it.
+
+ * The make targets try to find the configuration for your running kernel
+ automatically, but might choose poorly. A line like '# using defaults found
+ in /boot/config-6.0.7-250.fc36.x86_64' or 'using config:
+ '/boot/config-6.0.7-250.fc36.x86_64' tells you which file they picked. If
+ that is not the intended one, simply store it as '~/linux/.config'
+ before using these make targets.
+
+ * Unexpected things might happen if you try to use a config file prepared for
+ one kernel (say v6.0) on an older generation (say v5.15). In that case you
+ might want to use a configuration as base which your distribution utilized
+ when they used that or an slightly older kernel version.
+
+Influencing the configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The make target olddefconfig and the ``yes "" |`` used when utilizing
+localmodconfig will set any undefined build options to their default value. This
+among others will disable many kernel features that were introduced after your
+base kernel was released.
+
+If you want to set these configurations options manually, use ``oldconfig``
+instead of ``olddefconfig`` or omit the ``yes "" |`` when utilizing
+localmodconfig. Then for each undefined configuration option you will be asked
+how to proceed. In case you are unsure what to answer, simply hit 'enter' to
+apply the default value.
+
+Big pitfall when using localmodconfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As explained briefly in the step-by-step guide already: with localmodconfig it
+can easily happen that your self-built kernel will lack modules for tasks you
+did not perform before utilizing this make target. That's because those tasks
+require kernel modules that are normally autoloaded when you perform that task
+for the first time; if you didn't perform that task at least once before using
+localmodconfig, the latter will thus assume these modules are superfluous and
+disable them.
+
+You can try to avoid this by performing typical tasks that often will autoload
+additional kernel modules: start a VM, establish VPN connections, loop-mount a
+CD/DVD ISO, mount network shares (CIFS, NFS, ...), and connect all external
+devices (2FA keys, headsets, webcams, ...) as well as storage devices with file
+systems you otherwise do not utilize (btrfs, ext4, FAT, NTFS, XFS, ...). But it
+is hard to think of everything that might be needed -- even kernel developers
+often forget one thing or another at this point.
+
+Do not let that risk bother you, especially when compiling a kernel only for
+testing purposes: everything typically crucial will be there. And if you forget
+something important you can turn on a missing feature later and quickly run the
+commands to compile and install a better kernel.
+
+But if you plan to build and use self-built kernels regularly, you might want to
+reduce the risk by recording which modules your system loads over the course of
+a few weeks. You can automate this with `modprobed-db
+<https://github.com/graysky2/modprobed-db>`_. Afterwards use ``LSMOD=<path>`` to
+point localmodconfig to the list of modules modprobed-db noticed being used::
+
+ yes "" | make LSMOD="${HOME}"/.config/modprobed.db localmodconfig
+
+Remote building with localmodconfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you want to use localmodconfig to build a kernel for another machine, run
+``lsmod > lsmod_foo-machine`` on it and transfer that file to your build host.
+Now point the build scripts to the file like this: ``yes "" | make
+LSMOD=~/lsmod_foo-machine localmodconfig``. Note, in this case
+you likely want to copy a base kernel configuration from the other machine over
+as well and place it as .config in your build directory.
+
+[:ref:`back to step-by-step guide <configuration_sbs>`]
+
+.. _configmods:
+
+Adjust build configuration
+--------------------------
+
+ *Check if you might want to or have to adjust some kernel configuration
+ options:*
+
+Depending on your needs you at this point might want or have to adjust some
+kernel configuration options.
+
+.. _configmods_debugsymbols:
+
+Debug symbols
+~~~~~~~~~~~~~
+
+ *Evaluate how you want to handle debug symbols.*
+ [:ref:`...<configmods_sbs>`]
+
+Most users do not need to care about this, it's often fine to leave everything
+as it is; but you should take a closer look at this, if you might need to decode
+a stack trace or want to reduce space consumption.
+
+Having debug symbols available can be important when your kernel throws a
+'panic', 'Oops', 'warning', or 'BUG' later when running, as then you will be
+able to find the exact place where the problem occurred in the code. But
+collecting and embedding the needed debug information takes time and consumes
+quite a bit of space: in late 2022 the build artifacts for a typical x86 kernel
+configured with localmodconfig consumed around 5 Gigabyte of space with debug
+symbols, but less than 1 when they were disabled. The resulting kernel image and
+the modules are bigger as well, which increases load times.
+
+Hence, if you want a small kernel and are unlikely to decode a stack trace
+later, you might want to disable debug symbols to avoid above downsides::
+
+ ./scripts/config --file .config -d DEBUG_INFO \
+ -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -d DEBUG_INFO_DWARF4 \
+ -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
+ make olddefconfig
+
+You on the other hand definitely want to enable them, if there is a decent
+chance that you need to decode a stack trace later (as explained by 'Decode
+failure messages' in Documentation/admin-guide/tainted-kernels.rst in more
+detail)::
+
+ ./scripts/config --file .config -d DEBUG_INFO_NONE -e DEBUG_KERNEL
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS -e KALLSYMS_ALL
+ make olddefconfig
+
+Note, many mainstream distributions enable debug symbols in their kernel
+configurations -- make targets like localmodconfig and olddefconfig thus will
+often pick that setting up.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _configmods_distros:
+
+Distro specific adjustments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Are you running* [:ref:`... <configmods_sbs>`]
+
+The following sections help you to avoid build problems that are known to occur
+when following this guide on a few commodity distributions.
+
+**Debian:**
+
+ * Remove a stale reference to a certificate file that would cause your build to
+ fail::
+
+ ./scripts/config --file .config --set-str SYSTEM_TRUSTED_KEYS ''
+
+ Alternatively, download the needed certificate and make that configuration
+ option point to it, as `the Debian handbook explains in more detail
+ <https://debian-handbook.info/browse/stable/sect.kernel-compilation.html>`_
+ -- or generate your own, as explained in
+ Documentation/admin-guide/module-signing.rst.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _configmods_individual:
+
+Individual adjustments
+~~~~~~~~~~~~~~~~~~~~~~
+
+ *If you want to influence the other aspects of the configuration, do so
+ now* [:ref:`... <configmods_sbs>`]
+
+You at this point can use a command like ``make menuconfig`` to enable or
+disable certain features using a text-based user interface; to use a graphical
+configuration utilize, use the make target ``xconfig`` or ``gconfig`` instead.
+All of them require development libraries from toolkits they are based on
+(ncurses, Qt5, Gtk2); an error message will tell you if something required is
+missing.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _build:
+
+Build your kernel
+-----------------
+
+ *Build the image and the modules of your kernel* [:ref:`... <build_sbs>`]
+
+A lot can go wrong at this stage, but the instructions below will help you help
+yourself. Another subsection explains how to directly package your kernel up as
+deb, rpm or tar file.
+
+Dealing with build errors
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a build error occurs, it might be caused by some aspect of your machine's
+setup that often can be fixed quickly; other times though the problem lies in
+the code and can only be fixed by a developer. A close examination of the
+failure messages coupled with some research on the internet will often tell you
+which of the two it is. To perform such an investigation, restart the build
+process like this::
+
+ make V=1
+
+The ``V=1`` activates verbose output, which might be needed to see the actual
+error. To make it easier to spot, this command also omits the ``-j $(nproc
+--all)`` used earlier to utilize every CPU core in the system for the job -- but
+this parallelism also results in some clutter when failures occur.
+
+After a few seconds the build process should run into the error again. Now try
+to find the most crucial line describing the problem. Then search the internet
+for the most important and non-generic section of that line (say 4 to 8 words);
+avoid or remove anything that looks remotely system-specific, like your username
+or local path names like ``/home/username/linux/``. First try your regular
+internet search engine with that string, afterwards search Linux kernel mailing
+lists via `lore.kernel.org/all/ <https://lore.kernel.org/all/>`_.
+
+This most of the time will find something that will explain what is wrong; quite
+often one of the hits will provide a solution for your problem, too. If you
+do not find anything that matches your problem, try again from a different angle
+by modifying your search terms or using another line from the error messages.
+
+In the end, most trouble you are to run into has likely been encountered and
+reported by others already. That includes issues where the cause is not your
+system, but lies the code. If you run into one of those, you might thus find a
+solution (e.g. a patch) or workaround for your problem, too.
+
+Package your kernel up
+~~~~~~~~~~~~~~~~~~~~~~
+
+The step-by-step guide uses the default make targets (e.g. 'bzImage' and
+'modules' on x86) to build the image and the modules of your kernel, which later
+steps of the guide then install. You instead can also directly build everything
+and directly package it up by using one of the following targets:
+
+ * ``make -j $(nproc --all) bindeb-pkg`` to generate a deb package
+
+ * ``make -j $(nproc --all) binrpm-pkg`` to generate a rpm package
+
+ * ``make -j $(nproc --all) tarbz2-pkg`` to generate a bz2 compressed tarball
+
+This is just a selection of available make targets for this purpose, see
+``make help`` for others. You can also use these targets after running
+``make -j $(nproc --all)``, as they will pick up everything already built.
+
+If you employ the targets to generate deb or rpm packages, ignore the
+step-by-step guide's instructions on installing and removing your kernel;
+instead install and remove the packages using the package utility for the format
+(e.g. dpkg and rpm) or a package management utility build on top of them (apt,
+aptitude, dnf/yum, zypper, ...). Be aware that the packages generated using
+these two make targets are designed to work on various distributions utilizing
+those formats, they thus will sometimes behave differently than your
+distribution's kernel packages.
+
+[:ref:`back to step-by-step guide <build_sbs>`]
+
+.. _install:
+
+Install your kernel
+-------------------
+
+ *Now install your kernel* [:ref:`... <install_sbs>`]
+
+What you need to do after executing the command in the step-by-step guide
+depends on the existence and the implementation of an ``installkernel``
+executable. Many commodity Linux distributions ship such a kernel installer in
+``/sbin/`` that does everything needed, hence there is nothing left for you
+except rebooting. But some distributions contain an installkernel that does
+only part of the job -- and a few lack it completely and leave all the work to
+you.
+
+If ``installkernel`` is found, the kernel's build system will delegate the
+actual installation of your kernel's image and related files to this executable.
+On almost all Linux distributions it will store the image as '/boot/vmlinuz-
+<your kernel's release name>' and put a 'System.map-<your kernel's release
+name>' alongside it. Your kernel will thus be installed in parallel to any
+existing ones, unless you already have one with exactly the same release name.
+
+Installkernel on many distributions will afterwards generate an 'initramfs'
+(often also called 'initrd'), which commodity distributions rely on for booting;
+hence be sure to keep the order of the two make targets used in the step-by-step
+guide, as things will go sideways if you install your kernel's image before its
+modules. Often installkernel will then add your kernel to the bootloader
+configuration, too. You have to take care of one or both of these tasks
+yourself, if your distributions installkernel doesn't handle them.
+
+A few distributions like Arch Linux and its derivatives totally lack an
+installkernel executable. On those just install the modules using the kernel's
+build system and then install the image and the System.map file manually::
+
+ sudo make modules_install
+ sudo install -m 0600 $(make -s image_name) /boot/vmlinuz-$(make -s kernelrelease)
+ sudo install -m 0600 System.map /boot/System.map-$(make -s kernelrelease)
+
+If your distribution boots with the help of an initramfs, now generate one for
+your kernel using the tools your distribution provides for this process.
+Afterwards add your kernel to your bootloader configuration and reboot.
+
+[:ref:`back to step-by-step guide <install_sbs>`]
+
+.. _another:
+
+Another round later
+-------------------
+
+ *To later build another kernel you need similar, but sometimes slightly
+ different commands* [:ref:`... <another_sbs>`]
+
+The process to build later kernels is similar, but at some points slightly
+different. You for example do not want to use 'localmodconfig' for succeeding
+kernel builds, as you already created a trimmed down configuration you want to
+use from now on. Hence instead just use ``oldconfig`` or ``olddefconfig`` to
+adjust your build configurations to the needs of the kernel version you are
+about to build.
+
+If you created a shallow-clone with git, remember what the :ref:`section that
+explained the setup described in more detail <sources>`: you need to use a
+slightly different ``git fetch`` command and when switching to another series
+need to add an additional remote branch.
+
+[:ref:`back to step-by-step guide <another_sbs>`]
+
+.. _uninstall:
+
+Uninstall the kernel later
+--------------------------
+
+ *All parts of your installed kernel are identifiable by its release name and
+ thus easy to remove later.* [:ref:`... <uninstall_sbs>`]
+
+Do not worry installing your kernel manually and thus bypassing your
+distribution's packaging system will totally mess up your machine: all parts of
+your kernel are easy to remove later, as files are stored in two places only and
+normally identifiable by the kernel's release name.
+
+One of the two places is a directory in /lib/modules/, which holds the modules
+for each installed kernel. This directory is named after the kernel's release
+name; hence, to remove all modules for one of your kernels, simply remove its
+modules directory in /lib/modules/.
+
+The other place is /boot/, where typically one to five files will be placed
+during installation of a kernel. All of them usually contain the release name in
+their file name, but how many files and their name depends somewhat on your
+distribution's installkernel executable (:ref:`see above <install>`) and its
+initramfs generator. On some distributions the ``kernel-install`` command
+mentioned in the step-by-step guide will remove all of these files for you --
+and the entry for your kernel in the bootloader configuration at the same time,
+too. On others you have to take care of these steps yourself. The following
+command should interactively remove the two main files of a kernel with the
+release name '6.0.1-foobar'::
+
+ rm -i /boot/{System.map,vmlinuz}-6.0.1-foobar
+
+Now remove the belonging initramfs, which often will be called something like
+``/boot/initramfs-6.0.1-foobar.img`` or ``/boot/initrd.img-6.0.1-foobar``.
+Afterwards check for other files in /boot/ that have '6.0.1-foobar' in their
+name and delete them as well. Now remove the kernel from your bootloader's
+configuration.
+
+Note, be very careful with wildcards like '*' when deleting files or directories
+for kernels manually: you might accidentally remove files of a 6.0.11 kernel
+when all you want is to remove 6.0 or 6.0.1.
+
+[:ref:`back to step-by-step guide <uninstall_sbs>`]
+
+.. _faq:
+
+FAQ
+===
+
+Why does this 'how-to' not work on my system?
+---------------------------------------------
+
+As initially stated, this guide is 'designed to cover everything typically
+needed [to build a kernel] on mainstream Linux distributions running on
+commodity PC or server hardware'. The outlined approach despite this should work
+on many other setups as well. But trying to cover every possible use-case in one
+guide would defeat its purpose, as without such a focus you would need dozens or
+hundreds of constructs along the lines of 'in case you are having <insert
+machine or distro>, you at this point have to do <this and that>
+<instead|additionally>'. Each of which would make the text longer, more
+complicated, and harder to follow.
+
+That being said: this of course is a balancing act. Hence, if you think an
+additional use-case is worth describing, suggest it to the maintainers of this
+document, as :ref:`described above <submit_improvements_qbtl>`.
+
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/quickly-build-trimmed-linux.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
+
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 6dbcc5481000..2eabef31220d 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -3,7 +3,7 @@ Ramoops oops/panic logger
Sergiu Iordache <sergiu@chromium.org>
-Updated: 17 November 2011
+Updated: 10 Feb 2021
Introduction
------------
@@ -22,7 +22,9 @@ and type of the memory area are set using three variables:
* ``mem_address`` for the start
* ``mem_size`` for the size. The memory size will be rounded down to a
power of two.
- * ``mem_type`` to specifiy if the memory type (default is pgprot_writecombine).
+ * ``mem_type`` to specify if the memory type (default is pgprot_writecombine).
+ * ``mem_name`` to specify a memory region defined by ``reserve_mem`` command
+ line parameter.
Typically the default value of ``mem_type=0`` should be used as that sets the pstore
mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
@@ -30,13 +32,21 @@ mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
depends on atomic operations. At least on ARM, pgprot_noncached causes the
memory to be mapped strongly ordered, and atomic operations on strongly ordered
memory are implementation defined, and won't work on many ARMs such as omaps.
+Setting ``mem_type=2`` attempts to treat the memory region as normal memory,
+which enables full cache on it. This can improve the performance.
The memory area is divided into ``record_size`` chunks (also rounded down to
-power of two) and each oops/panic writes a ``record_size`` chunk of
+power of two) and each kmesg dump writes a ``record_size`` chunk of
information.
-Dumping both oopses and panics can be done by setting 1 in the ``dump_oops``
-variable while setting 0 in that variable dumps only the panics.
+Limiting which kinds of kmsg dumps are stored can be controlled via
+the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's
+``enum kmsg_dump_reason``. For example, to store both Oopses and Panics,
+``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics
+``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0
+(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the
+``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS,
+otherwise KMSG_DUMP_MAX.
The module uses a counter to record multiple dumps but the counter gets reset
on restart (i.e. new dumps after the restart will overwrite old ones).
@@ -61,7 +71,7 @@ Setting the ramoops parameters can be done in several different manners:
mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1
B. Use Device Tree bindings, as described in
- ``Documentation/devicetree/bindings/reserved-memory/ramoops.txt``.
+ ``Documentation/devicetree/bindings/reserved-memory/ramoops.yaml``.
For example::
reserved-memory {
@@ -90,7 +100,7 @@ Setting the ramoops parameters can be done in several different manners:
.mem_address = <...>,
.mem_type = <...>,
.record_size = <...>,
- .dump_oops = <...>,
+ .max_reason = <...>,
.ecc = <...>,
};
@@ -110,6 +120,17 @@ Setting the ramoops parameters can be done in several different manners:
return ret;
}
+ D. Using a region of memory reserved via ``reserve_mem`` command line
+ parameter. The address and size will be defined by the ``reserve_mem``
+ parameter. Note, that ``reserve_mem`` may not always allocate memory
+ in the same location, and cannot be relied upon. Testing will need
+ to be done, and it may not work on every machine, nor every kernel.
+ Consider this a "best effort" approach. The ``reserve_mem`` option
+ takes a size, alignment and name as arguments. The name is used
+ to map the memory to a label that can be retrieved by ramoops.
+
+ reserve_mem=2M:4096:oops ramoops.mem_name=oops
+
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
very early in the architecture code, e.g.::
diff --git a/Documentation/driver-api/rapidio.rst b/Documentation/admin-guide/rapidio.rst
index 71ff658ab78e..71ff658ab78e 100644
--- a/Documentation/driver-api/rapidio.rst
+++ b/Documentation/admin-guide/rapidio.rst
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
deleted file mode 100644
index 197896718f81..000000000000
--- a/Documentation/admin-guide/ras.rst
+++ /dev/null
@@ -1,1210 +0,0 @@
-.. include:: <isonum.txt>
-
-============================================
-Reliability, Availability and Serviceability
-============================================
-
-RAS concepts
-************
-
-Reliability, Availability and Serviceability (RAS) is a concept used on
-servers meant to measure their robustness.
-
-Reliability
- is the probability that a system will produce correct outputs.
-
- * Generally measured as Mean Time Between Failures (MTBF)
- * Enhanced by features that help to avoid, detect and repair hardware faults
-
-Availability
- is the probability that a system is operational at a given time
-
- * Generally measured as a percentage of downtime per a period of time
- * Often uses mechanisms to detect and correct hardware faults in
- runtime;
-
-Serviceability (or maintainability)
- is the simplicity and speed with which a system can be repaired or
- maintained
-
- * Generally measured on Mean Time Between Repair (MTBR)
-
-Improving RAS
--------------
-
-In order to reduce systems downtime, a system should be capable of detecting
-hardware errors, and, when possible correcting them in runtime. It should
-also provide mechanisms to detect hardware degradation, in order to warn
-the system administrator to take the action of replacing a component before
-it causes data loss or system downtime.
-
-Among the monitoring measures, the most usual ones include:
-
-* CPU – detect errors at instruction execution and at L1/L2/L3 caches;
-* Memory – add error correction logic (ECC) to detect and correct errors;
-* I/O – add CRC checksums for transferred data;
-* Storage – RAID, journal file systems, checksums,
- Self-Monitoring, Analysis and Reporting Technology (SMART).
-
-By monitoring the number of occurrences of error detections, it is possible
-to identify if the probability of hardware errors is increasing, and, on such
-case, do a preventive maintenance to replace a degraded component while
-those errors are correctable.
-
-Types of errors
----------------
-
-Most mechanisms used on modern systems use use technologies like Hamming
-Codes that allow error correction when the number of errors on a bit packet
-is below a threshold. If the number of errors is above, those mechanisms
-can indicate with a high degree of confidence that an error happened, but
-they can't correct.
-
-Also, sometimes an error occur on a component that it is not used. For
-example, a part of the memory that it is not currently allocated.
-
-That defines some categories of errors:
-
-* **Correctable Error (CE)** - the error detection mechanism detected and
- corrected the error. Such errors are usually not fatal, although some
- Kernel mechanisms allow the system administrator to consider them as fatal.
-
-* **Uncorrected Error (UE)** - the amount of errors happened above the error
- correction threshold, and the system was unable to auto-correct.
-
-* **Fatal Error** - when an UE error happens on a critical component of the
- system (for example, a piece of the Kernel got corrupted by an UE), the
- only reliable way to avoid data corruption is to hang or reboot the machine.
-
-* **Non-fatal Error** - when an UE error happens on an unused component,
- like a CPU in power down state or an unused memory bank, the system may
- still run, eventually replacing the affected hardware by a hot spare,
- if available.
-
- Also, when an error happens on a userspace process, it is also possible to
- kill such process and let userspace restart it.
-
-The mechanism for handling non-fatal errors is usually complex and may
-require the help of some userspace application, in order to apply the
-policy desired by the system administrator.
-
-Identifying a bad hardware component
-------------------------------------
-
-Just detecting a hardware flaw is usually not enough, as the system needs
-to pinpoint to the minimal replaceable unit (MRU) that should be exchanged
-to make the hardware reliable again.
-
-So, it requires not only error logging facilities, but also mechanisms that
-will translate the error message to the silkscreen or component label for
-the MRU.
-
-Typically, it is very complex for memory, as modern CPUs interlace memory
-from different memory modules, in order to provide a better performance. The
-DMI BIOS usually have a list of memory module labels, with can be obtained
-using the ``dmidecode`` tool. For example, on a desktop machine, it shows::
-
- Memory Device
- Total Width: 64 bits
- Data Width: 64 bits
- Size: 16384 MB
- Form Factor: SODIMM
- Set: None
- Locator: ChannelA-DIMM0
- Bank Locator: BANK 0
- Type: DDR4
- Type Detail: Synchronous
- Speed: 2133 MHz
- Rank: 2
- Configured Clock Speed: 2133 MHz
-
-On the above example, a DDR4 SO-DIMM memory module is located at the
-system's memory labeled as "BANK 0", as given by the *bank locator* field.
-Please notice that, on such system, the *total width* is equal to the
-*data width*. It means that such memory module doesn't have error
-detection/correction mechanisms.
-
-Unfortunately, not all systems use the same field to specify the memory
-bank. On this example, from an older server, ``dmidecode`` shows::
-
- Memory Device
- Array Handle: 0x1000
- Error Information Handle: Not Provided
- Total Width: 72 bits
- Data Width: 64 bits
- Size: 8192 MB
- Form Factor: DIMM
- Set: 1
- Locator: DIMM_A1
- Bank Locator: Not Specified
- Type: DDR3
- Type Detail: Synchronous Registered (Buffered)
- Speed: 1600 MHz
- Rank: 2
- Configured Clock Speed: 1600 MHz
-
-There, the DDR3 RDIMM memory module is located at the system's memory labeled
-as "DIMM_A1", as given by the *locator* field. Please notice that this
-memory module has 64 bits of *data width* and 72 bits of *total width*. So,
-it has 8 extra bits to be used by error detection and correction mechanisms.
-Such kind of memory is called Error-correcting code memory (ECC memory).
-
-To make things even worse, it is not uncommon that systems with different
-labels on their system's board to use exactly the same BIOS, meaning that
-the labels provided by the BIOS won't match the real ones.
-
-ECC memory
-----------
-
-As mentioned on the previous section, ECC memory has extra bits to be
-used for error correction. So, on 64 bit systems, a memory module
-has 64 bits of *data width*, and 74 bits of *total width*. So, there are
-8 bits extra bits to be used for the error detection and correction
-mechanisms. Those extra bits are called *syndrome*\ [#f1]_\ [#f2]_.
-
-So, when the cpu requests the memory controller to write a word with
-*data width*, the memory controller calculates the *syndrome* in real time,
-using Hamming code, or some other error correction code, like SECDED+,
-producing a code with *total width* size. Such code is then written
-on the memory modules.
-
-At read, the *total width* bits code is converted back, using the same
-ECC code used on write, producing a word with *data width* and a *syndrome*.
-The word with *data width* is sent to the CPU, even when errors happen.
-
-The memory controller also looks at the *syndrome* in order to check if
-there was an error, and if the ECC code was able to fix such error.
-If the error was corrected, a Corrected Error (CE) happened. If not, an
-Uncorrected Error (UE) happened.
-
-The information about the CE/UE errors is stored on some special registers
-at the memory controller and can be accessed by reading such registers,
-either by BIOS, by some special CPUs or by Linux EDAC driver. On x86 64
-bit CPUs, such errors can also be retrieved via the Machine Check
-Architecture (MCA)\ [#f3]_.
-
-.. [#f1] Please notice that several memory controllers allow operation on a
- mode called "Lock-Step", where it groups two memory modules together,
- doing 128-bit reads/writes. That gives 16 bits for error correction, with
- significantly improves the error correction mechanism, at the expense
- that, when an error happens, there's no way to know what memory module is
- to blame. So, it has to blame both memory modules.
-
-.. [#f2] Some memory controllers also allow using memory in mirror mode.
- On such mode, the same data is written to two memory modules. At read,
- the system checks both memory modules, in order to check if both provide
- identical data. On such configuration, when an error happens, there's no
- way to know what memory module is to blame. So, it has to blame both
- memory modules (or 4 memory modules, if the system is also on Lock-step
- mode).
-
-.. [#f3] For more details about the Machine Check Architecture (MCA),
- please read Documentation/x86/x86_64/machinecheck at the Kernel tree.
-
-EDAC - Error Detection And Correction
-*************************************
-
-.. note::
-
- "bluesmoke" was the name for this device driver subsystem when it
- was "out-of-tree" and maintained at http://bluesmoke.sourceforge.net.
- That site is mostly archaic now and can be used only for historical
- purposes.
-
- When the subsystem was pushed upstream for the first time, on
- Kernel 2.6.16, for the first time, it was renamed to ``EDAC``.
-
-Purpose
--------
-
-The ``edac`` kernel module's goal is to detect and report hardware errors
-that occur within the computer system running under linux.
-
-Memory
-------
-
-Memory Correctable Errors (CE) and Uncorrectable Errors (UE) are the
-primary errors being harvested. These types of errors are harvested by
-the ``edac_mc`` device.
-
-Detecting CE events, then harvesting those events and reporting them,
-**can** but must not necessarily be a predictor of future UE events. With
-CE events only, the system can and will continue to operate as no data
-has been damaged yet.
-
-However, preventive maintenance and proactive part replacement of memory
-modules exhibiting CEs can reduce the likelihood of the dreaded UE events
-and system panics.
-
-Other hardware elements
------------------------
-
-A new feature for EDAC, the ``edac_device`` class of device, was added in
-the 2.6.23 version of the kernel.
-
-This new device type allows for non-memory type of ECC hardware detectors
-to have their states harvested and presented to userspace via the sysfs
-interface.
-
-Some architectures have ECC detectors for L1, L2 and L3 caches,
-along with DMA engines, fabric switches, main data path switches,
-interconnections, and various other hardware data paths. If the hardware
-reports it, then a edac_device device probably can be constructed to
-harvest and present that to userspace.
-
-
-PCI bus scanning
-----------------
-
-In addition, PCI devices are scanned for PCI Bus Parity and SERR Errors
-in order to determine if errors are occurring during data transfers.
-
-The presence of PCI Parity errors must be examined with a grain of salt.
-There are several add-in adapters that do **not** follow the PCI specification
-with regards to Parity generation and reporting. The specification says
-the vendor should tie the parity status bits to 0 if they do not intend
-to generate parity. Some vendors do not do this, and thus the parity bit
-can "float" giving false positives.
-
-There is a PCI device attribute located in sysfs that is checked by
-the EDAC PCI scanning code. If that attribute is set, PCI parity/error
-scanning is skipped for that device. The attribute is::
-
- broken_parity_status
-
-and is located in ``/sys/devices/pci<XXX>/0000:XX:YY.Z`` directories for
-PCI devices.
-
-
-Versioning
-----------
-
-EDAC is composed of a "core" module (``edac_core.ko``) and several Memory
-Controller (MC) driver modules. On a given system, the CORE is loaded
-and one MC driver will be loaded. Both the CORE and the MC driver (or
-``edac_device`` driver) have individual versions that reflect current
-release level of their respective modules.
-
-Thus, to "report" on what version a system is running, one must report
-both the CORE's and the MC driver's versions.
-
-
-Loading
--------
-
-If ``edac`` was statically linked with the kernel then no loading
-is necessary. If ``edac`` was built as modules then simply modprobe
-the ``edac`` pieces that you need. You should be able to modprobe
-hardware-specific modules and have the dependencies load the necessary
-core modules.
-
-Example::
-
- $ modprobe amd76x_edac
-
-loads both the ``amd76x_edac.ko`` memory controller module and the
-``edac_mc.ko`` core module.
-
-
-Sysfs interface
----------------
-
-EDAC presents a ``sysfs`` interface for control and reporting purposes. It
-lives in the /sys/devices/system/edac directory.
-
-Within this directory there currently reside 2 components:
-
- ======= ==============================
- mc memory controller(s) system
- pci PCI control and status system
- ======= ==============================
-
-
-
-Memory Controller (mc) Model
-----------------------------
-
-Each ``mc`` device controls a set of memory modules [#f4]_. These modules
-are laid out in a Chip-Select Row (``csrowX``) and Channel table (``chX``).
-There can be multiple csrows and multiple channels.
-
-.. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
- used to refer to a memory module, although there are other memory
- packaging alternatives, like SO-DIMM, SIMM, etc. Along this document,
- and inside the EDAC system, the term "dimm" is used for all memory
- modules, even when they use a different kind of packaging.
-
-Memory controllers allow for several csrows, with 8 csrows being a
-typical value. Yet, the actual number of csrows depends on the layout of
-a given motherboard, memory controller and memory module characteristics.
-
-Dual channels allow for dual data length (e. g. 128 bits, on 64 bit systems)
-data transfers to/from the CPU from/to memory. Some newer chipsets allow
-for more than 2 channels, like Fully Buffered DIMMs (FB-DIMMs) memory
-controllers. The following example will assume 2 channels:
-
- +------------+-----------------------+
- | CS Rows | Channels |
- +------------+-----------+-----------+
- | | ``ch0`` | ``ch1`` |
- +============+===========+===========+
- | ``csrow0`` | DIMM_A0 | DIMM_B0 |
- +------------+ | |
- | ``csrow1`` | | |
- +------------+-----------+-----------+
- | ``csrow2`` | DIMM_A1 | DIMM_B1 |
- +------------+ | |
- | ``csrow3`` | | |
- +------------+-----------+-----------+
-
-In the above example, there are 4 physical slots on the motherboard
-for memory DIMMs:
-
- +---------+---------+
- | DIMM_A0 | DIMM_B0 |
- +---------+---------+
- | DIMM_A1 | DIMM_B1 |
- +---------+---------+
-
-Labels for these slots are usually silk-screened on the motherboard.
-Slots labeled ``A`` are channel 0 in this example. Slots labeled ``B`` are
-channel 1. Notice that there are two csrows possible on a physical DIMM.
-These csrows are allocated their csrow assignment based on the slot into
-which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
-Channel, the csrows cross both DIMMs.
-
-Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
-Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
-will have just one csrow (csrow0). csrow1 will be empty. On the other
-hand, when 2 dual ranked DIMMs are similarly placed, then both csrow0
-and csrow1 will be populated. The pattern repeats itself for csrow2 and
-csrow3.
-
-The representation of the above is reflected in the directory
-tree in EDAC's sysfs interface. Starting in directory
-``/sys/devices/system/edac/mc``, each memory controller will be
-represented by its own ``mcX`` directory, where ``X`` is the
-index of the MC::
-
- ..../edac/mc/
- |
- |->mc0
- |->mc1
- |->mc2
- ....
-
-Under each ``mcX`` directory each ``csrowX`` is again represented by a
-``csrowX``, where ``X`` is the csrow index::
-
- .../mc/mc0/
- |
- |->csrow0
- |->csrow2
- |->csrow3
- ....
-
-Notice that there is no csrow1, which indicates that csrow0 is composed
-of a single ranked DIMMs. This should also apply in both Channels, in
-order to have dual-channel mode be operational. Since both csrow2 and
-csrow3 are populated, this indicates a dual ranked set of DIMMs for
-channels 0 and 1.
-
-Within each of the ``mcX`` and ``csrowX`` directories are several EDAC
-control and attribute files.
-
-``mcX`` directories
--------------------
-
-In ``mcX`` directories are EDAC control and attribute files for
-this ``X`` instance of the memory controllers.
-
-For a description of the sysfs API, please see:
-
- Documentation/ABI/testing/sysfs-devices-edac
-
-
-``dimmX`` or ``rankX`` directories
-----------------------------------
-
-The recommended way to use the EDAC subsystem is to look at the information
-provided by the ``dimmX`` or ``rankX`` directories [#f5]_.
-
-A typical EDAC system has the following structure under
-``/sys/devices/system/edac/``\ [#f6]_::
-
- /sys/devices/system/edac/
- ├── mc
- │   ├── mc0
- │   │   ├── ce_count
- │   │   ├── ce_noinfo_count
- │   │   ├── dimm0
- │   │   │   ├── dimm_ce_count
- │   │   │   ├── dimm_dev_type
- │   │   │   ├── dimm_edac_mode
- │   │   │   ├── dimm_label
- │   │   │   ├── dimm_location
- │   │   │   ├── dimm_mem_type
- │   │   │   ├── dimm_ue_count
- │   │   │   ├── size
- │   │   │   └── uevent
- │   │   ├── max_location
- │   │   ├── mc_name
- │   │   ├── reset_counters
- │   │   ├── seconds_since_reset
- │   │   ├── size_mb
- │   │   ├── ue_count
- │   │   ├── ue_noinfo_count
- │   │   └── uevent
- │   ├── mc1
- │   │   ├── ce_count
- │   │   ├── ce_noinfo_count
- │   │   ├── dimm0
- │   │   │   ├── dimm_ce_count
- │   │   │   ├── dimm_dev_type
- │   │   │   ├── dimm_edac_mode
- │   │   │   ├── dimm_label
- │   │   │   ├── dimm_location
- │   │   │   ├── dimm_mem_type
- │   │   │   ├── dimm_ue_count
- │   │   │   ├── size
- │   │   │   └── uevent
- │   │   ├── max_location
- │   │   ├── mc_name
- │   │   ├── reset_counters
- │   │   ├── seconds_since_reset
- │   │   ├── size_mb
- │   │   ├── ue_count
- │   │   ├── ue_noinfo_count
- │   │   └── uevent
- │   └── uevent
- └── uevent
-
-In the ``dimmX`` directories are EDAC control and attribute files for
-this ``X`` memory module:
-
-- ``size`` - Total memory managed by this csrow attribute file
-
- This attribute file displays, in count of megabytes, the memory
- that this csrow contains.
-
-- ``dimm_ue_count`` - Uncorrectable Errors count attribute file
-
- This attribute file displays the total count of uncorrectable
- errors that have occurred on this DIMM. If panic_on_ue is set
- this counter will not have a chance to increment, since EDAC
- will panic the system.
-
-- ``dimm_ce_count`` - Correctable Errors count attribute file
-
- This attribute file displays the total count of correctable
- errors that have occurred on this DIMM. This count is very
- important to examine. CEs provide early indications that a
- DIMM is beginning to fail. This count field should be
- monitored for non-zero values and report such information
- to the system administrator.
-
-- ``dimm_dev_type`` - Device type attribute file
-
- This attribute file will display what type of DRAM device is
- being utilized on this DIMM.
- Examples:
-
- - x1
- - x2
- - x4
- - x8
-
-- ``dimm_edac_mode`` - EDAC Mode of operation attribute file
-
- This attribute file will display what type of Error detection
- and correction is being utilized.
-
-- ``dimm_label`` - memory module label control file
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-- ``dimm_location`` - location of the memory module
-
- The location can have up to 3 levels, and describe how the
- memory controller identifies the location of a memory module.
- Depending on the type of memory and memory controller, it
- can be:
-
- - *csrow* and *channel* - used when the memory controller
- doesn't identify a single DIMM - e. g. in ``rankX`` dir;
- - *branch*, *channel*, *slot* - typically used on FB-DIMM memory
- controllers;
- - *channel*, *slot* - used on Nehalem and newer Intel drivers.
-
-- ``dimm_mem_type`` - Memory Type attribute file
-
- This attribute file will display what type of memory is currently
- on this csrow. Normally, either buffered or unbuffered memory.
- Examples:
-
- - Registered-DDR
- - Unbuffered-DDR
-
-.. [#f5] On some systems, the memory controller doesn't have any logic
- to identify the memory module. On such systems, the directory is called ``rankX`` and works on a similar way as the ``csrowX`` directories.
- On modern Intel memory controllers, the memory controller identifies the
- memory modules directly. On such systems, the directory is called ``dimmX``.
-
-.. [#f6] There are also some ``power`` directories and ``subsystem``
- symlinks inside the sysfs mapping that are automatically created by
- the sysfs subsystem. Currently, they serve no purpose.
-
-``csrowX`` directories
-----------------------
-
-When CONFIG_EDAC_LEGACY_SYSFS is enabled, sysfs will contain the ``csrowX``
-directories. As this API doesn't work properly for Rambus, FB-DIMMs and
-modern Intel Memory Controllers, this is being deprecated in favor of
-``dimmX`` directories.
-
-In the ``csrowX`` directories are EDAC control and attribute files for
-this ``X`` instance of csrow:
-
-
-- ``ue_count`` - Total Uncorrectable Errors count attribute file
-
- This attribute file displays the total count of uncorrectable
- errors that have occurred on this csrow. If panic_on_ue is set
- this counter will not have a chance to increment, since EDAC
- will panic the system.
-
-
-- ``ce_count`` - Total Correctable Errors count attribute file
-
- This attribute file displays the total count of correctable
- errors that have occurred on this csrow. This count is very
- important to examine. CEs provide early indications that a
- DIMM is beginning to fail. This count field should be
- monitored for non-zero values and report such information
- to the system administrator.
-
-
-- ``size_mb`` - Total memory managed by this csrow attribute file
-
- This attribute file displays, in count of megabytes, the memory
- that this csrow contains.
-
-
-- ``mem_type`` - Memory Type attribute file
-
- This attribute file will display what type of memory is currently
- on this csrow. Normally, either buffered or unbuffered memory.
- Examples:
-
- - Registered-DDR
- - Unbuffered-DDR
-
-
-- ``edac_mode`` - EDAC Mode of operation attribute file
-
- This attribute file will display what type of Error detection
- and correction is being utilized.
-
-
-- ``dev_type`` - Device type attribute file
-
- This attribute file will display what type of DRAM device is
- being utilized on this DIMM.
- Examples:
-
- - x1
- - x2
- - x4
- - x8
-
-
-- ``ch0_ce_count`` - Channel 0 CE Count attribute file
-
- This attribute file will display the count of CEs on this
- DIMM located in channel 0.
-
-
-- ``ch0_ue_count`` - Channel 0 UE Count attribute file
-
- This attribute file will display the count of UEs on this
- DIMM located in channel 0.
-
-
-- ``ch0_dimm_label`` - Channel 0 DIMM Label control file
-
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-
-- ``ch1_ce_count`` - Channel 1 CE Count attribute file
-
-
- This attribute file will display the count of CEs on this
- DIMM located in channel 1.
-
-
-- ``ch1_ue_count`` - Channel 1 UE Count attribute file
-
-
- This attribute file will display the count of UEs on this
- DIMM located in channel 0.
-
-
-- ``ch1_dimm_label`` - Channel 1 DIMM Label control file
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-
-System Logging
---------------
-
-If logging for UEs and CEs is enabled, then system logs will contain
-information indicating that errors have been detected::
-
- EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, channel 1 "DIMM_B1": amd76x_edac
- EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, channel 1 "DIMM_B1": amd76x_edac
-
-
-The structure of the message is:
-
- +---------------------------------------+-------------+
- | Content | Example |
- +=======================================+=============+
- | The memory controller | MC0 |
- +---------------------------------------+-------------+
- | Error type | CE |
- +---------------------------------------+-------------+
- | Memory page | 0x283 |
- +---------------------------------------+-------------+
- | Offset in the page | 0xce0 |
- +---------------------------------------+-------------+
- | The byte granularity | grain 8 |
- | or resolution of the error | |
- +---------------------------------------+-------------+
- | The error syndrome | 0xb741 |
- +---------------------------------------+-------------+
- | Memory row | row 0 |
- +---------------------------------------+-------------+
- | Memory channel | channel 1 |
- +---------------------------------------+-------------+
- | DIMM label, if set prior | DIMM B1 |
- +---------------------------------------+-------------+
- | And then an optional, driver-specific | |
- | message that may have additional | |
- | information. | |
- +---------------------------------------+-------------+
-
-Both UEs and CEs with no info will lack all but memory controller, error
-type, a notice of "no info" and then an optional, driver-specific error
-message.
-
-
-PCI Bus Parity Detection
-------------------------
-
-On Header Type 00 devices, the primary status is looked at for any
-parity error regardless of whether parity is enabled on the device or
-not. (The spec indicates parity is generated in some cases). On Header
-Type 01 bridges, the secondary status register is also looked at to see
-if parity occurred on the bus on the other side of the bridge.
-
-
-Sysfs configuration
--------------------
-
-Under ``/sys/devices/system/edac/pci`` are control and attribute files as
-follows:
-
-
-- ``check_pci_parity`` - Enable/Disable PCI Parity checking control file
-
- This control file enables or disables the PCI Bus Parity scanning
- operation. Writing a 1 to this file enables the scanning. Writing
- a 0 to this file disables the scanning.
-
- Enable::
-
- echo "1" >/sys/devices/system/edac/pci/check_pci_parity
-
- Disable::
-
- echo "0" >/sys/devices/system/edac/pci/check_pci_parity
-
-
-- ``pci_parity_count`` - Parity Count
-
- This attribute file will display the number of parity errors that
- have been detected.
-
-
-Module parameters
------------------
-
-- ``edac_mc_panic_on_ue`` - Panic on UE control file
-
- An uncorrectable error will cause a machine panic. This is usually
- desirable. It is a bad idea to continue when an uncorrectable error
- occurs - it is indeterminate what was uncorrected and the operating
- system context might be so mangled that continuing will lead to further
- corruption. If the kernel has MCE configured, then EDAC will never
- notice the UE.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_panic_on_ue=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
-
-
-- ``edac_mc_log_ue`` - Log UE control file
-
-
- Generate kernel messages describing uncorrectable errors. These errors
- are reported through the system message log system. UE statistics
- will be accumulated even when UE logging is disabled.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_log_ue=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
-
-
-- ``edac_mc_log_ce`` - Log CE control file
-
-
- Generate kernel messages describing correctable errors. These
- errors are reported through the system message log system.
- CE statistics will be accumulated even when CE logging is disabled.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_log_ce=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
-
-
-- ``edac_mc_poll_msec`` - Polling period control file
-
-
- The time period, in milliseconds, for polling for error information.
- Too small a value wastes resources. Too large a value might delay
- necessary handling of errors and might loose valuable information for
- locating the error. 1000 milliseconds (once each second) is the current
- default. Systems which require all the bandwidth they can get, may
- increase this.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_poll_msec=[0|1]
-
- RUN TIME::
-
- echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
-
-
-- ``panic_on_pci_parity`` - Panic on PCI PARITY Error
-
-
- This control file enables or disables panicking when a parity
- error has been detected.
-
-
- module/kernel parameter::
-
- edac_panic_on_pci_pe=[0|1]
-
- Enable::
-
- echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
-
- Disable::
-
- echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
-
-
-
-EDAC device type
-----------------
-
-In the header file, edac_pci.h, there is a series of edac_device structures
-and APIs for the EDAC_DEVICE.
-
-User space access to an edac_device is through the sysfs interface.
-
-At the location ``/sys/devices/system/edac`` (sysfs) new edac_device devices
-will appear.
-
-There is a three level tree beneath the above ``edac`` directory. For example,
-the ``test_device_edac`` device (found at the http://bluesmoke.sourceforget.net
-website) installs itself as::
-
- /sys/devices/system/edac/test-instance
-
-in this directory are various controls, a symlink and one or more ``instance``
-directories.
-
-The standard default controls are:
-
- ============== =======================================================
- log_ce boolean to log CE events
- log_ue boolean to log UE events
- panic_on_ue boolean to ``panic`` the system if an UE is encountered
- (default off, can be set true via startup script)
- poll_msec time period between POLL cycles for events
- ============== =======================================================
-
-The test_device_edac device adds at least one of its own custom control:
-
- ============== ==================================================
- test_bits which in the current test driver does nothing but
- show how it is installed. A ported driver can
- add one or more such controls and/or attributes
- for specific uses.
- One out-of-tree driver uses controls here to allow
- for ERROR INJECTION operations to hardware
- injection registers
- ============== ==================================================
-
-The symlink points to the 'struct dev' that is registered for this edac_device.
-
-Instances
----------
-
-One or more instance directories are present. For the ``test_device_edac``
-case:
-
- +----------------+
- | test-instance0 |
- +----------------+
-
-
-In this directory there are two default counter attributes, which are totals of
-counter in deeper subdirectories.
-
- ============== ====================================
- ce_count total of CE events of subdirectories
- ue_count total of UE events of subdirectories
- ============== ====================================
-
-Blocks
-------
-
-At the lowest directory level is the ``block`` directory. There can be 0, 1
-or more blocks specified in each instance:
-
- +-------------+
- | test-block0 |
- +-------------+
-
-In this directory the default attributes are:
-
- ============== ================================================
- ce_count which is counter of CE events for this ``block``
- of hardware being monitored
- ue_count which is counter of UE events for this ``block``
- of hardware being monitored
- ============== ================================================
-
-
-The ``test_device_edac`` device adds 4 attributes and 1 control:
-
- ================== ====================================================
- test-block-bits-0 for every POLL cycle this counter
- is incremented
- test-block-bits-1 every 10 cycles, this counter is bumped once,
- and test-block-bits-0 is set to 0
- test-block-bits-2 every 100 cycles, this counter is bumped once,
- and test-block-bits-1 is set to 0
- test-block-bits-3 every 1000 cycles, this counter is bumped once,
- and test-block-bits-2 is set to 0
- ================== ====================================================
-
-
- ================== ====================================================
- reset-counters writing ANY thing to this control will
- reset all the above counters.
- ================== ====================================================
-
-
-Use of the ``test_device_edac`` driver should enable any others to create their own
-unique drivers for their hardware systems.
-
-The ``test_device_edac`` sample driver is located at the
-http://bluesmoke.sourceforge.net project site for EDAC.
-
-
-Usage of EDAC APIs on Nehalem and newer Intel CPUs
---------------------------------------------------
-
-On older Intel architectures, the memory controller was part of the North
-Bridge chipset. Nehalem, Sandy Bridge, Ivy Bridge, Haswell, Sky Lake and
-newer Intel architectures integrated an enhanced version of the memory
-controller (MC) inside the CPUs.
-
-This chapter will cover the differences of the enhanced memory controllers
-found on newer Intel CPUs, such as ``i7core_edac``, ``sb_edac`` and
-``sbx_edac`` drivers.
-
-.. note::
-
- The Xeon E7 processor families use a separate chip for the memory
- controller, called Intel Scalable Memory Buffer. This section doesn't
- apply for such families.
-
-1) There is one Memory Controller per Quick Patch Interconnect
- (QPI). At the driver, the term "socket" means one QPI. This is
- associated with a physical CPU socket.
-
- Each MC have 3 physical read channels, 3 physical write channels and
- 3 logic channels. The driver currently sees it as just 3 channels.
- Each channel can have up to 3 DIMMs.
-
- The minimum known unity is DIMMs. There are no information about csrows.
- As EDAC API maps the minimum unity is csrows, the driver sequentially
- maps channel/DIMM into different csrows.
-
- For example, supposing the following layout::
-
- Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
- dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400
- Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
- Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
-
- The driver will map it as::
-
- csrow0: channel 0, dimm0
- csrow1: channel 0, dimm1
- csrow2: channel 1, dimm0
- csrow3: channel 2, dimm0
-
- exports one DIMM per csrow.
-
- Each QPI is exported as a different memory controller.
-
-2) The MC has the ability to inject errors to test drivers. The drivers
- implement this functionality via some error injection nodes:
-
- For injecting a memory error, there are some sysfs nodes, under
- ``/sys/devices/system/edac/mc/mc?/``:
-
- - ``inject_addrmatch/*``:
- Controls the error injection mask register. It is possible to specify
- several characteristics of the address to match an error code::
-
- dimm = the affected dimm. Numbers are relative to a channel;
- rank = the memory rank;
- channel = the channel that will generate an error;
- bank = the affected bank;
- page = the page address;
- column (or col) = the address column.
-
- each of the above values can be set to "any" to match any valid value.
-
- At driver init, all values are set to any.
-
- For example, to generate an error at rank 1 of dimm 2, for any channel,
- any bank, any page, any column::
-
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
- echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
-
- To return to the default behaviour of matching any, you can do::
-
- echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
- echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
-
- - ``inject_eccmask``:
- specifies what bits will have troubles,
-
- - ``inject_section``:
- specifies what ECC cache section will get the error::
-
- 3 for both
- 2 for the highest
- 1 for the lowest
-
- - ``inject_type``:
- specifies the type of error, being a combination of the following bits::
-
- bit 0 - repeat
- bit 1 - ecc
- bit 2 - parity
-
- - ``inject_enable``:
- starts the error generation when something different than 0 is written.
-
- All inject vars can be read. root permission is needed for write.
-
- Datasheet states that the error will only be generated after a write on an
- address that matches inject_addrmatch. It seems, however, that reading will
- also produce an error.
-
- For example, the following code will generate an error for any write access
- at socket 0, on any DIMM/address on channel 2::
-
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_type
- echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask
- echo 3 >/sys/devices/system/edac/mc/mc0/inject_section
- echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
- dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null
-
- For socket 1, it is needed to replace "mc0" by "mc1" at the above
- commands.
-
- The generated error message will look like::
-
- EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error))
-
-3) Corrected Error memory register counters
-
- Those newer MCs have some registers to count memory errors. The driver
- uses those registers to report Corrected Errors on devices with Registered
- DIMMs.
-
- However, those counters don't work with Unregistered DIMM. As the chipset
- offers some counters that also work with UDIMMs (but with a worse level of
- granularity than the default ones), the driver exposes those registers for
- UDIMM memories.
-
- They can be read by looking at the contents of ``all_channel_counts/``::
-
- $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0
- 0
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1
- 0
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2
- 0
-
- What happens here is that errors on different csrows, but at the same
- dimm number will increment the same counter.
- So, in this memory mapping::
-
- csrow0: channel 0, dimm0
- csrow1: channel 0, dimm1
- csrow2: channel 1, dimm0
- csrow3: channel 2, dimm0
-
- The hardware will increment udimm0 for an error at the first dimm at either
- csrow0, csrow2 or csrow3;
-
- The hardware will increment udimm1 for an error at the second dimm at either
- csrow0, csrow2 or csrow3;
-
- The hardware will increment udimm2 for an error at the third dimm at either
- csrow0, csrow2 or csrow3;
-
-4) Standard error counters
-
- The standard error counters are generated when an mcelog error is received
- by the driver. Since, with UDIMM, this is counted by software, it is
- possible that some errors could be lost. With RDIMM's, they display the
- contents of the registers
-
-Reference documents used on ``amd64_edac``
-------------------------------------------
-
-``amd64_edac`` module is based on the following documents
-(available from http://support.amd.com/en-us/search/tech-docs):
-
-1. :Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
- Opteron Processors
- :AMD publication #: 26094
- :Revision: 3.26
- :Link: http://support.amd.com/TechDocs/26094.PDF
-
-2. :Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
- Processors
- :AMD publication #: 32559
- :Revision: 3.00
- :Issue Date: May 2006
- :Link: http://support.amd.com/TechDocs/32559.pdf
-
-3. :Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
- Processors
- :AMD publication #: 31116
- :Revision: 3.00
- :Issue Date: September 07, 2007
- :Link: http://support.amd.com/TechDocs/31116.pdf
-
-4. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
- Models 30h-3Fh Processors
- :AMD publication #: 49125
- :Revision: 3.06
- :Issue Date: 2/12/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf
-
-5. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
- Models 60h-6Fh Processors
- :AMD publication #: 50742
- :Revision: 3.01
- :Issue Date: 7/23/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf
-
-6. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 16h
- Models 00h-0Fh Processors
- :AMD publication #: 48751
- :Revision: 3.03
- :Issue Date: 2/23/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/48751_16h_bkdg.pdf
-
-Credits
-=======
-
-* Written by Doug Thompson <dougthompson@xmission.com>
-
- - 7 Dec 2005
- - 17 Jul 2007 Updated
-
-* |copy| Mauro Carvalho Chehab
-
- - 05 Aug 2009 Nehalem interface
- - 26 Oct 2016 Converted to ReST and cleanups at the Nehalem section
-
-* EDAC authors/maintainers:
-
- - Doug Thompson, Dave Jiang, Dave Peterson et al,
- - Mauro Carvalho Chehab
- - Borislav Petkov
- - original author: Thayne Harbaugh
diff --git a/Documentation/admin-guide/reporting-bugs.rst b/Documentation/admin-guide/reporting-bugs.rst
deleted file mode 100644
index 4650edb8840a..000000000000
--- a/Documentation/admin-guide/reporting-bugs.rst
+++ /dev/null
@@ -1,182 +0,0 @@
-.. _reportingbugs:
-
-Reporting bugs
-++++++++++++++
-
-Background
-==========
-
-The upstream Linux kernel maintainers only fix bugs for specific kernel
-versions. Those versions include the current "release candidate" (or -rc)
-kernel, any "stable" kernel versions, and any "long term" kernels.
-
-Please see https://www.kernel.org/ for a list of supported kernels. Any
-kernel marked with [EOL] is "end of life" and will not have any fixes
-backported to it.
-
-If you've found a bug on a kernel version that isn't listed on kernel.org,
-contact your Linux distribution or embedded vendor for support.
-Alternatively, you can attempt to run one of the supported stable or -rc
-kernels, and see if you can reproduce the bug on that. It's preferable
-to reproduce the bug on the latest -rc kernel.
-
-
-How to report Linux kernel bugs
-===============================
-
-
-Identify the problematic subsystem
-----------------------------------
-
-Identifying which part of the Linux kernel might be causing your issue
-increases your chances of getting your bug fixed. Simply posting to the
-generic linux-kernel mailing list (LKML) may cause your bug report to be
-lost in the noise of a mailing list that gets 1000+ emails a day.
-
-Instead, try to figure out which kernel subsystem is causing the issue,
-and email that subsystem's maintainer and mailing list. If the subsystem
-maintainer doesn't answer, then expand your scope to mailing lists like
-LKML.
-
-
-Identify who to notify
-----------------------
-
-Once you know the subsystem that is causing the issue, you should send a
-bug report. Some maintainers prefer bugs to be reported via bugzilla
-(https://bugzilla.kernel.org), while others prefer that bugs be reported
-via the subsystem mailing list.
-
-To find out where to send an emailed bug report, find your subsystem or
-device driver in the MAINTAINERS file. Search in the file for relevant
-entries, and send your bug report to the person(s) listed in the "M:"
-lines, making sure to Cc the mailing list(s) in the "L:" lines. When the
-maintainer replies to you, make sure to 'Reply-all' in order to keep the
-public mailing list(s) in the email thread.
-
-If you know which driver is causing issues, you can pass one of the driver
-files to the get_maintainer.pl script::
-
- perl scripts/get_maintainer.pl -f <filename>
-
-If it is a security bug, please copy the Security Contact listed in the
-MAINTAINERS file. They can help coordinate bugfix and disclosure. See
-:ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>` for more information.
-
-If you can't figure out which subsystem caused the issue, you should file
-a bug in kernel.org bugzilla and send email to
-linux-kernel@vger.kernel.org, referencing the bugzilla URL. (For more
-information on the linux-kernel mailing list see
-http://www.tux.org/lkml/).
-
-
-Tips for reporting bugs
------------------------
-
-If you haven't reported a bug before, please read:
-
- http://www.chiark.greenend.org.uk/~sgtatham/bugs.html
-
- http://www.catb.org/esr/faqs/smart-questions.html
-
-It's REALLY important to report bugs that seem unrelated as separate email
-threads or separate bugzilla entries. If you report several unrelated
-bugs at once, it's difficult for maintainers to tease apart the relevant
-data.
-
-
-Gather information
-------------------
-
-The most important information in a bug report is how to reproduce the
-bug. This includes system information, and (most importantly)
-step-by-step instructions for how a user can trigger the bug.
-
-If the failure includes an "OOPS:", take a picture of the screen, capture
-a netconsole trace, or type the message from your screen into the bug
-report. Please read "Documentation/admin-guide/bug-hunting.rst" before posting your
-bug report. This explains what you should do with the "Oops" information
-to make it useful to the recipient.
-
-This is a suggested format for a bug report sent via email or bugzilla.
-Having a standardized bug report form makes it easier for you not to
-overlook things, and easier for the developers to find the pieces of
-information they're really interested in. If some information is not
-relevant to your bug, feel free to exclude it.
-
-First run the ver_linux script included as scripts/ver_linux, which
-reports the version of some important subsystems. Run this script with
-the command ``awk -f scripts/ver_linux``.
-
-Use that information to fill in all fields of the bug report form, and
-post it to the mailing list with a subject of "PROBLEM: <one line
-summary from [1.]>" for easy identification by the developers::
-
- [1.] One line summary of the problem:
- [2.] Full description of the problem/report:
- [3.] Keywords (i.e., modules, networking, kernel):
- [4.] Kernel information
- [4.1.] Kernel version (from /proc/version):
- [4.2.] Kernel .config file:
- [5.] Most recent kernel version which did not have the bug:
- [6.] Output of Oops.. message (if applicable) with symbolic information
- resolved (see Documentation/admin-guide/bug-hunting.rst)
- [7.] A small shell script or example program which triggers the
- problem (if possible)
- [8.] Environment
- [8.1.] Software (add the output of the ver_linux script here)
- [8.2.] Processor information (from /proc/cpuinfo):
- [8.3.] Module information (from /proc/modules):
- [8.4.] Loaded driver and hardware information (/proc/ioports, /proc/iomem)
- [8.5.] PCI information ('lspci -vvv' as root)
- [8.6.] SCSI information (from /proc/scsi/scsi)
- [8.7.] Other information that might be relevant to the problem
- (please look in /proc and include all information that you
- think to be relevant):
- [X.] Other notes, patches, fixes, workarounds:
-
-
-Follow up
-=========
-
-Expectations for bug reporters
-------------------------------
-
-Linux kernel maintainers expect bug reporters to be able to follow up on
-bug reports. That may include running new tests, applying patches,
-recompiling your kernel, and/or re-triggering your bug. The most
-frustrating thing for maintainers is for someone to report a bug, and then
-never follow up on a request to try out a fix.
-
-That said, it's still useful for a kernel maintainer to know a bug exists
-on a supported kernel, even if you can't follow up with retests. Follow
-up reports, such as replying to the email thread with "I tried the latest
-kernel and I can't reproduce my bug anymore" are also helpful, because
-maintainers have to assume silence means things are still broken.
-
-Expectations for kernel maintainers
------------------------------------
-
-Linux kernel maintainers are busy, overworked human beings. Some times
-they may not be able to address your bug in a day, a week, or two weeks.
-If they don't answer your email, they may be on vacation, or at a Linux
-conference. Check the conference schedule at https://LWN.net for more info:
-
- https://lwn.net/Calendar/
-
-In general, kernel maintainers take 1 to 5 business days to respond to
-bugs. The majority of kernel maintainers are employed to work on the
-kernel, and they may not work on the weekends. Maintainers are scattered
-around the world, and they may not work in your time zone. Unless you
-have a high priority bug, please wait at least a week after the first bug
-report before sending the maintainer a reminder email.
-
-The exceptions to this rule are regressions, kernel crashes, security holes,
-or userspace breakage caused by new kernel behavior. Those bugs should be
-addressed by the maintainers ASAP. If you suspect a maintainer is not
-responding to these types of bugs in a timely manner (especially during a
-merge window), escalate the bug to LKML and Linus Torvalds.
-
-Thank you!
-
-[Some of this is taken from Frohwalt Egerer's original linux-kernel FAQ]
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
new file mode 100644
index 000000000000..a68e6d909274
--- /dev/null
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -0,0 +1,1764 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. See the bottom of this file for additional redistribution information.
+
+Reporting issues
+++++++++++++++++
+
+
+The short guide (aka TL;DR)
+===========================
+
+Are you facing a regression with vanilla kernels from the same stable or
+longterm series? One still supported? Then search the `LKML
+<https://lore.kernel.org/lkml/>`_ and the `Linux stable mailing list
+<https://lore.kernel.org/stable/>`_ archives for matching reports to join. If
+you don't find any, install `the latest release from that series
+<https://kernel.org/>`_. If it still shows the issue, report it to the stable
+mailing list (stable@vger.kernel.org) and CC the regressions list
+(regressions@lists.linux.dev); ideally also CC the maintainer and the mailing
+list for the subsystem in question.
+
+In all other cases try your best guess which kernel part might be causing the
+issue. Check the :ref:`MAINTAINERS <maintainers>` file for how its developers
+expect to be told about problems, which most of the time will be by email with a
+mailing list in CC. Check the destination's archives for matching reports;
+search the `LKML <https://lore.kernel.org/lkml/>`_ and the web, too. If you
+don't find any to join, install `the latest mainline kernel
+<https://kernel.org/>`_. If the issue is present there, send a report.
+
+The issue was fixed there, but you would like to see it resolved in a still
+supported stable or longterm series as well? Then install its latest release.
+If it shows the problem, search for the change that fixed it in mainline and
+check if backporting is in the works or was discarded; if it's neither, ask
+those who handled the change for it.
+
+**General remarks**: When installing and testing a kernel as outlined above,
+ensure it's vanilla (IOW: not patched and not using add-on modules). Also make
+sure it's built and running in a healthy environment and not already tainted
+before the issue occurs.
+
+If you are facing multiple issues with the Linux kernel at once, report each
+separately. While writing your report, include all information relevant to the
+issue, like the kernel and the distro used. In case of a regression, CC the
+regressions mailing list (regressions@lists.linux.dev) to your report. Also try
+to pinpoint the culprit with a bisection; if you succeed, include its
+commit-id and CC everyone in the sign-off-by chain.
+
+Once the report is out, answer any questions that come up and help where you
+can. That includes keeping the ball rolling by occasionally retesting with newer
+releases and sending a status update afterwards.
+
+Step-by-step guide how to report issues to the kernel maintainers
+=================================================================
+
+The above TL;DR outlines roughly how to report issues to the Linux kernel
+developers. It might be all that's needed for people already familiar with
+reporting issues to Free/Libre & Open Source Software (FLOSS) projects. For
+everyone else there is this section. It is more detailed and uses a
+step-by-step approach. It still tries to be brief for readability and leaves
+out a lot of details; those are described below the step-by-step guide in a
+reference section, which explains each of the steps in more detail.
+
+Note: this section covers a few more aspects than the TL;DR and does things in
+a slightly different order. That's in your interest, to make sure you notice
+early if an issue that looks like a Linux kernel problem is actually caused by
+something else. These steps thus help to ensure the time you invest in this
+process won't feel wasted in the end:
+
+ * Are you facing an issue with a Linux kernel a hardware or software vendor
+ provided? Then in almost all cases you are better off to stop reading this
+ document and reporting the issue to your vendor instead, unless you are
+ willing to install the latest Linux version yourself. Be aware the latter
+ will often be needed anyway to hunt down and fix issues.
+
+ * Perform a rough search for existing reports with your favorite internet
+ search engine; additionally, check the archives of the `Linux Kernel Mailing
+ List (LKML) <https://lore.kernel.org/lkml/>`_. If you find matching reports,
+ join the discussion instead of sending a new one.
+
+ * See if the issue you are dealing with qualifies as regression, security
+ issue, or a really severe problem: those are 'issues of high priority' that
+ need special handling in some steps that are about to follow.
+
+ * Make sure it's not the kernel's surroundings that are causing the issue
+ you face.
+
+ * Create a fresh backup and put system repair and restore tools at hand.
+
+ * Ensure your system does not enhance its kernels by building additional
+ kernel modules on-the-fly, which solutions like DKMS might be doing locally
+ without your knowledge.
+
+ * Check if your kernel was 'tainted' when the issue occurred, as the event
+ that made the kernel set this flag might be causing the issue you face.
+
+ * Write down coarsely how to reproduce the issue. If you deal with multiple
+ issues at once, create separate notes for each of them and make sure they
+ work independently on a freshly booted system. That's needed, as each issue
+ needs to get reported to the kernel developers separately, unless they are
+ strongly entangled.
+
+ * If you are facing a regression within a stable or longterm version line
+ (say something broke when updating from 5.10.4 to 5.10.5), scroll down to
+ 'Dealing with regressions within a stable and longterm kernel line'.
+
+ * Locate the driver or kernel subsystem that seems to be causing the issue.
+ Find out how and where its developers expect reports. Note: most of the
+ time this won't be bugzilla.kernel.org, as issues typically need to be sent
+ by mail to a maintainer and a public mailing list.
+
+ * Search the archives of the bug tracker or mailing list in question
+ thoroughly for reports that might match your issue. If you find anything,
+ join the discussion instead of sending a new report.
+
+After these preparations you'll now enter the main part:
+
+ * Unless you are already running the latest 'mainline' Linux kernel, better
+ go and install it for the reporting process. Testing and reporting with
+ the latest 'stable' Linux can be an acceptable alternative in some
+ situations; during the merge window that actually might be even the best
+ approach, but in that development phase it can be an even better idea to
+ suspend your efforts for a few days anyway. Whatever version you choose,
+ ideally use a 'vanilla' build. Ignoring these advices will dramatically
+ increase the risk your report will be rejected or ignored.
+
+ * Ensure the kernel you just installed does not 'taint' itself when
+ running.
+
+ * Reproduce the issue with the kernel you just installed. If it doesn't show
+ up there, scroll down to the instructions for issues only happening with
+ stable and longterm kernels.
+
+ * Optimize your notes: try to find and write the most straightforward way to
+ reproduce your issue. Make sure the end result has all the important
+ details, and at the same time is easy to read and understand for others
+ that hear about it for the first time. And if you learned something in this
+ process, consider searching again for existing reports about the issue.
+
+ * If your failure involves a 'panic', 'Oops', 'warning', or 'BUG', consider
+ decoding the kernel log to find the line of code that triggered the error.
+
+ * If your problem is a regression, try to narrow down when the issue was
+ introduced as much as possible.
+
+ * Start to compile the report by writing a detailed description about the
+ issue. Always mention a few things: the latest kernel version you installed
+ for reproducing, the Linux Distribution used, and your notes on how to
+ reproduce the issue. Ideally, make the kernel's build configuration
+ (.config) and the output from ``dmesg`` available somewhere on the net and
+ link to it. Include or upload all other information that might be relevant,
+ like the output/screenshot of an Oops or the output from ``lspci``. Once
+ you wrote this main part, insert a normal length paragraph on top of it
+ outlining the issue and the impact quickly. On top of this add one sentence
+ that briefly describes the problem and gets people to read on. Now give the
+ thing a descriptive title or subject that yet again is shorter. Then you're
+ ready to send or file the report like the MAINTAINERS file told you, unless
+ you are dealing with one of those 'issues of high priority': they need
+ special care which is explained in 'Special handling for high priority
+ issues' below.
+
+ * Wait for reactions and keep the thing rolling until you can accept the
+ outcome in one way or the other. Thus react publicly and in a timely manner
+ to any inquiries. Test proposed fixes. Do proactive testing: retest with at
+ least every first release candidate (RC) of a new mainline version and
+ report your results. Send friendly reminders if things stall. And try to
+ help yourself, if you don't get any help or if it's unsatisfying.
+
+
+Reporting regressions within a stable and longterm kernel line
+--------------------------------------------------------------
+
+This subsection is for you, if you followed above process and got sent here at
+the point about regression within a stable or longterm kernel version line. You
+face one of those if something breaks when updating from 5.10.4 to 5.10.5 (a
+switch from 5.9.15 to 5.10.5 does not qualify). The developers want to fix such
+regressions as quickly as possible, hence there is a streamlined process to
+report them:
+
+ * Check if the kernel developers still maintain the Linux kernel version
+ line you care about: go to the `front page of kernel.org
+ <https://kernel.org/>`_ and make sure it mentions
+ the latest release of the particular version line without an '[EOL]' tag.
+
+ * Check the archives of the `Linux stable mailing list
+ <https://lore.kernel.org/stable/>`_ for existing reports.
+
+ * Install the latest release from the particular version line as a vanilla
+ kernel. Ensure this kernel is not tainted and still shows the problem, as
+ the issue might have already been fixed there. If you first noticed the
+ problem with a vendor kernel, check a vanilla build of the last version
+ known to work performs fine as well.
+
+ * Send a short problem report to the Linux stable mailing list
+ (stable@vger.kernel.org) and CC the Linux regressions mailing list
+ (regressions@lists.linux.dev); if you suspect the cause in a particular
+ subsystem, CC its maintainer and its mailing list. Roughly describe the
+ issue and ideally explain how to reproduce it. Mention the first version
+ that shows the problem and the last version that's working fine. Then
+ wait for further instructions.
+
+The reference section below explains each of these steps in more detail.
+
+
+Reporting issues only occurring in older kernel version lines
+-------------------------------------------------------------
+
+This subsection is for you, if you tried the latest mainline kernel as outlined
+above, but failed to reproduce your issue there; at the same time you want to
+see the issue fixed in a still supported stable or longterm series or vendor
+kernels regularly rebased on those. If that is the case, follow these steps:
+
+ * Prepare yourself for the possibility that going through the next few steps
+ might not get the issue solved in older releases: the fix might be too big
+ or risky to get backported there.
+
+ * Perform the first three steps in the section "Dealing with regressions
+ within a stable and longterm kernel line" above.
+
+ * Search the Linux kernel version control system for the change that fixed
+ the issue in mainline, as its commit message might tell you if the fix is
+ scheduled for backporting already. If you don't find anything that way,
+ search the appropriate mailing lists for posts that discuss such an issue
+ or peer-review possible fixes; then check the discussions if the fix was
+ deemed unsuitable for backporting. If backporting was not considered at
+ all, join the newest discussion, asking if it's in the cards.
+
+ * One of the former steps should lead to a solution. If that doesn't work
+ out, ask the maintainers for the subsystem that seems to be causing the
+ issue for advice; CC the mailing list for the particular subsystem as well
+ as the stable mailing list.
+
+The reference section below explains each of these steps in more detail.
+
+
+Reference section: Reporting issues to the kernel maintainers
+=============================================================
+
+The detailed guides above outline all the major steps in brief fashion, which
+should be enough for most people. But sometimes there are situations where even
+experienced users might wonder how to actually do one of those steps. That's
+what this section is for, as it will provide a lot more details on each of the
+above steps. Consider this as reference documentation: it's possible to read it
+from top to bottom. But it's mainly meant to skim over and a place to look up
+details how to actually perform those steps.
+
+A few words of general advice before digging into the details:
+
+ * The Linux kernel developers are well aware this process is complicated and
+ demands more than other FLOSS projects. We'd love to make it simpler. But
+ that would require work in various places as well as some infrastructure,
+ which would need constant maintenance; nobody has stepped up to do that
+ work, so that's just how things are for now.
+
+ * A warranty or support contract with some vendor doesn't entitle you to
+ request fixes from developers in the upstream Linux kernel community: such
+ contracts are completely outside the scope of the Linux kernel, its
+ development community, and this document. That's why you can't demand
+ anything such a contract guarantees in this context, not even if the
+ developer handling the issue works for the vendor in question. If you want
+ to claim your rights, use the vendor's support channel instead. When doing
+ so, you might want to mention you'd like to see the issue fixed in the
+ upstream Linux kernel; motivate them by saying it's the only way to ensure
+ the fix in the end will get incorporated in all Linux distributions.
+
+ * If you never reported an issue to a FLOSS project before you should consider
+ reading `How to Report Bugs Effectively
+ <https://www.chiark.greenend.org.uk/~sgtatham/bugs.html>`_, `How To Ask
+ Questions The Smart Way
+ <http://www.catb.org/esr/faqs/smart-questions.html>`_, and `How to ask good
+ questions <https://jvns.ca/blog/good-questions/>`_.
+
+With that off the table, find below the details on how to properly report
+issues to the Linux kernel developers.
+
+
+Make sure you're using the upstream Linux kernel
+------------------------------------------------
+
+ *Are you facing an issue with a Linux kernel a hardware or software vendor
+ provided? Then in almost all cases you are better off to stop reading this
+ document and reporting the issue to your vendor instead, unless you are
+ willing to install the latest Linux version yourself. Be aware the latter
+ will often be needed anyway to hunt down and fix issues.*
+
+Like most programmers, Linux kernel developers don't like to spend time dealing
+with reports for issues that don't even happen with their current code. It's
+just a waste everybody's time, especially yours. Unfortunately such situations
+easily happen when it comes to the kernel and often leads to frustration on both
+sides. That's because almost all Linux-based kernels pre-installed on devices
+(Computers, Laptops, Smartphones, Routers, …) and most shipped by Linux
+distributors are quite distant from the official Linux kernel as distributed by
+kernel.org: these kernels from these vendors are often ancient from the point of
+Linux development or heavily modified, often both.
+
+Most of these vendor kernels are quite unsuitable for reporting issues to the
+Linux kernel developers: an issue you face with one of them might have been
+fixed by the Linux kernel developers months or years ago already; additionally,
+the modifications and enhancements by the vendor might be causing the issue you
+face, even if they look small or totally unrelated. That's why you should report
+issues with these kernels to the vendor. Its developers should look into the
+report and, in case it turns out to be an upstream issue, fix it directly
+upstream or forward the report there. In practice that often does not work out
+or might not what you want. You thus might want to consider circumventing the
+vendor by installing the very latest Linux kernel core yourself. If that's an
+option for you move ahead in this process, as a later step in this guide will
+explain how to do that once it rules out other potential causes for your issue.
+
+Note, the previous paragraph is starting with the word 'most', as sometimes
+developers in fact are willing to handle reports about issues occurring with
+vendor kernels. If they do in the end highly depends on the developers and the
+issue in question. Your chances are quite good if the distributor applied only
+small modifications to a kernel based on a recent Linux version; that for
+example often holds true for the mainline kernels shipped by Debian GNU/Linux
+Sid or Fedora Rawhide. Some developers will also accept reports about issues
+with kernels from distributions shipping the latest stable kernel, as long as
+it's only slightly modified; that for example is often the case for Arch Linux,
+regular Fedora releases, and openSUSE Tumbleweed. But keep in mind, you better
+want to use a mainline Linux and avoid using a stable kernel for this
+process, as outlined in the section 'Install a fresh kernel for testing' in more
+detail.
+
+Obviously you are free to ignore all this advice and report problems with an old
+or heavily modified vendor kernel to the upstream Linux developers. But note,
+those often get rejected or ignored, so consider yourself warned. But it's still
+better than not reporting the issue at all: sometimes such reports directly or
+indirectly will help to get the issue fixed over time.
+
+
+Search for existing reports, first run
+--------------------------------------
+
+ *Perform a rough search for existing reports with your favorite internet
+ search engine; additionally, check the archives of the Linux Kernel Mailing
+ List (LKML). If you find matching reports, join the discussion instead of
+ sending a new one.*
+
+Reporting an issue that someone else already brought forward is often a waste of
+time for everyone involved, especially you as the reporter. So it's in your own
+interest to thoroughly check if somebody reported the issue already. At this
+step of the process it's okay to just perform a rough search: a later step will
+tell you to perform a more detailed search once you know where your issue needs
+to be reported to. Nevertheless, do not hurry with this step of the reporting
+process, it can save you time and trouble.
+
+Simply search the internet with your favorite search engine first. Afterwards,
+search the `Linux Kernel Mailing List (LKML) archives
+<https://lore.kernel.org/lkml/>`_.
+
+If you get flooded with results consider telling your search engine to limit
+search timeframe to the past month or year. And wherever you search, make sure
+to use good search terms; vary them a few times, too. While doing so try to
+look at the issue from the perspective of someone else: that will help you to
+come up with other words to use as search terms. Also make sure not to use too
+many search terms at once. Remember to search with and without information like
+the name of the kernel driver or the name of the affected hardware component.
+But its exact brand name (say 'ASUS Red Devil Radeon RX 5700 XT Gaming OC')
+often is not much helpful, as it is too specific. Instead try search terms like
+the model line (Radeon 5700 or Radeon 5000) and the code name of the main chip
+('Navi' or 'Navi10') with and without its manufacturer ('AMD').
+
+In case you find an existing report about your issue, join the discussion, as
+you might be able to provide valuable additional information. That can be
+important even when a fix is prepared or in its final stages already, as
+developers might look for people that can provide additional information or
+test a proposed fix. Jump to the section 'Duties after the report went out' for
+details on how to get properly involved.
+
+Note, searching `bugzilla.kernel.org <https://bugzilla.kernel.org/>`_ might also
+be a good idea, as that might provide valuable insights or turn up matching
+reports. If you find the latter, just keep in mind: most subsystems expect
+reports in different places, as described below in the section "Check where you
+need to report your issue". The developers that should take care of the issue
+thus might not even be aware of the bugzilla ticket. Hence, check the ticket if
+the issue already got reported as outlined in this document and if not consider
+doing so.
+
+
+Issue of high priority?
+-----------------------
+
+ *See if the issue you are dealing with qualifies as regression, security
+ issue, or a really severe problem: those are 'issues of high priority' that
+ need special handling in some steps that are about to follow.*
+
+Linus Torvalds and the leading Linux kernel developers want to see some issues
+fixed as soon as possible, hence there are 'issues of high priority' that get
+handled slightly differently in the reporting process. Three type of cases
+qualify: regressions, security issues, and really severe problems.
+
+You deal with a regression if some application or practical use case running
+fine with one Linux kernel works worse or not at all with a newer version
+compiled using a similar configuration. The document
+Documentation/admin-guide/reporting-regressions.rst explains this in more
+detail. It also provides a good deal of other information about regressions you
+might want to be aware of; it for example explains how to add your issue to the
+list of tracked regressions, to ensure it won't fall through the cracks.
+
+What qualifies as security issue is left to your judgment. Consider reading
+Documentation/process/security-bugs.rst before proceeding, as it
+provides additional details how to best handle security issues.
+
+An issue is a 'really severe problem' when something totally unacceptably bad
+happens. That's for example the case when a Linux kernel corrupts the data it's
+handling or damages hardware it's running on. You're also dealing with a severe
+issue when the kernel suddenly stops working with an error message ('kernel
+panic') or without any farewell note at all. Note: do not confuse a 'panic' (a
+fatal error where the kernel stop itself) with a 'Oops' (a recoverable error),
+as the kernel remains running after the latter.
+
+
+Ensure a healthy environment
+----------------------------
+
+ *Make sure it's not the kernel's surroundings that are causing the issue
+ you face.*
+
+Problems that look a lot like a kernel issue are sometimes caused by build or
+runtime environment. It's hard to rule out that problem completely, but you
+should minimize it:
+
+ * Use proven tools when building your kernel, as bugs in the compiler or the
+ binutils can cause the resulting kernel to misbehave.
+
+ * Ensure your computer components run within their design specifications;
+ that's especially important for the main processor, the main memory, and the
+ motherboard. Therefore, stop undervolting or overclocking when facing a
+ potential kernel issue.
+
+ * Try to make sure it's not faulty hardware that is causing your issue. Bad
+ main memory for example can result in a multitude of issues that will
+ manifest itself in problems looking like kernel issues.
+
+ * If you're dealing with a filesystem issue, you might want to check the file
+ system in question with ``fsck``, as it might be damaged in a way that leads
+ to unexpected kernel behavior.
+
+ * When dealing with a regression, make sure it's not something else that
+ changed in parallel to updating the kernel. The problem for example might be
+ caused by other software that was updated at the same time. It can also
+ happen that a hardware component coincidentally just broke when you rebooted
+ into a new kernel for the first time. Updating the systems BIOS or changing
+ something in the BIOS Setup can also lead to problems that on look a lot
+ like a kernel regression.
+
+
+Prepare for emergencies
+-----------------------
+
+ *Create a fresh backup and put system repair and restore tools at hand.*
+
+Reminder, you are dealing with computers, which sometimes do unexpected things,
+especially if you fiddle with crucial parts like the kernel of its operating
+system. That's what you are about to do in this process. Thus, make sure to
+create a fresh backup; also ensure you have all tools at hand to repair or
+reinstall the operating system as well as everything you need to restore the
+backup.
+
+
+Make sure your kernel doesn't get enhanced
+------------------------------------------
+
+ *Ensure your system does not enhance its kernels by building additional
+ kernel modules on-the-fly, which solutions like DKMS might be doing locally
+ without your knowledge.*
+
+The risk your issue report gets ignored or rejected dramatically increases if
+your kernel gets enhanced in any way. That's why you should remove or disable
+mechanisms like akmods and DKMS: those build add-on kernel modules
+automatically, for example when you install a new Linux kernel or boot it for
+the first time. Also remove any modules they might have installed. Then reboot
+before proceeding.
+
+Note, you might not be aware that your system is using one of these solutions:
+they often get set up silently when you install Nvidia's proprietary graphics
+driver, VirtualBox, or other software that requires a some support from a
+module not part of the Linux kernel. That why your might need to uninstall the
+packages with such software to get rid of any 3rd party kernel module.
+
+
+Check 'taint' flag
+------------------
+
+ *Check if your kernel was 'tainted' when the issue occurred, as the event
+ that made the kernel set this flag might be causing the issue you face.*
+
+The kernel marks itself with a 'taint' flag when something happens that might
+lead to follow-up errors that look totally unrelated. The issue you face might
+be such an error if your kernel is tainted. That's why it's in your interest to
+rule this out early before investing more time into this process. This is the
+only reason why this step is here, as this process later will tell you to
+install the latest mainline kernel; you will need to check the taint flag again
+then, as that's when it matters because it's the kernel the report will focus
+on.
+
+On a running system is easy to check if the kernel tainted itself: if ``cat
+/proc/sys/kernel/tainted`` returns '0' then the kernel is not tainted and
+everything is fine. Checking that file is impossible in some situations; that's
+why the kernel also mentions the taint status when it reports an internal
+problem (a 'kernel bug'), a recoverable error (a 'kernel Oops') or a
+non-recoverable error before halting operation (a 'kernel panic'). Look near
+the top of the error messages printed when one of these occurs and search for a
+line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
+not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
+followed by a few spaces and some letters.
+
+If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
+to find out why. Try to eliminate the reason. Often it's caused by one these
+three things:
+
+ 1. A recoverable error (a 'kernel Oops') occurred and the kernel tainted
+ itself, as the kernel knows it might misbehave in strange ways after that
+ point. In that case check your kernel or system log and look for a section
+ that starts with this::
+
+ Oops: 0000 [#1] SMP
+
+ That's the first Oops since boot-up, as the '#1' between the brackets shows.
+ Every Oops and any other problem that happens after that point might be a
+ follow-up problem to that first Oops, even if both look totally unrelated.
+ Rule this out by getting rid of the cause for the first Oops and reproducing
+ the issue afterwards. Sometimes simply restarting will be enough, sometimes
+ a change to the configuration followed by a reboot can eliminate the Oops.
+ But don't invest too much time into this at this point of the process, as
+ the cause for the Oops might already be fixed in the newer Linux kernel
+ version you are going to install later in this process.
+
+ 2. Your system uses a software that installs its own kernel modules, for
+ example Nvidia's proprietary graphics driver or VirtualBox. The kernel
+ taints itself when it loads such module from external sources (even if
+ they are Open Source): they sometimes cause errors in unrelated kernel
+ areas and thus might be causing the issue you face. You therefore have to
+ prevent those modules from loading when you want to report an issue to the
+ Linux kernel developers. Most of the time the easiest way to do that is:
+ temporarily uninstall such software including any modules they might have
+ installed. Afterwards reboot.
+
+ 3. The kernel also taints itself when it's loading a module that resides in
+ the staging tree of the Linux kernel source. That's a special area for
+ code (mostly drivers) that does not yet fulfill the normal Linux kernel
+ quality standards. When you report an issue with such a module it's
+ obviously okay if the kernel is tainted; just make sure the module in
+ question is the only reason for the taint. If the issue happens in an
+ unrelated area reboot and temporarily block the module from being loaded
+ by specifying ``foo.blacklist=1`` as kernel parameter (replace 'foo' with
+ the name of the module in question).
+
+
+Document how to reproduce issue
+-------------------------------
+
+ *Write down coarsely how to reproduce the issue. If you deal with multiple
+ issues at once, create separate notes for each of them and make sure they
+ work independently on a freshly booted system. That's needed, as each issue
+ needs to get reported to the kernel developers separately, unless they are
+ strongly entangled.*
+
+If you deal with multiple issues at once, you'll have to report each of them
+separately, as they might be handled by different developers. Describing
+various issues in one report also makes it quite difficult for others to tear
+it apart. Hence, only combine issues in one report if they are very strongly
+entangled.
+
+Additionally, during the reporting process you will have to test if the issue
+happens with other kernel versions. Therefore, it will make your work easier if
+you know exactly how to reproduce an issue quickly on a freshly booted system.
+
+Note: it's often fruitless to report issues that only happened once, as they
+might be caused by a bit flip due to cosmic radiation. That's why you should
+try to rule that out by reproducing the issue before going further. Feel free
+to ignore this advice if you are experienced enough to tell a one-time error
+due to faulty hardware apart from a kernel issue that rarely happens and thus
+is hard to reproduce.
+
+
+Regression in stable or longterm kernel?
+----------------------------------------
+
+ *If you are facing a regression within a stable or longterm version line
+ (say something broke when updating from 5.10.4 to 5.10.5), scroll down to
+ 'Dealing with regressions within a stable and longterm kernel line'.*
+
+Regression within a stable and longterm kernel version line are something the
+Linux developers want to fix badly, as such issues are even more unwanted than
+regression in the main development branch, as they can quickly affect a lot of
+people. The developers thus want to learn about such issues as quickly as
+possible, hence there is a streamlined process to report them. Note,
+regressions with newer kernel version line (say something broke when switching
+from 5.9.15 to 5.10.5) do not qualify.
+
+
+Check where you need to report your issue
+-----------------------------------------
+
+ *Locate the driver or kernel subsystem that seems to be causing the issue.
+ Find out how and where its developers expect reports. Note: most of the
+ time this won't be bugzilla.kernel.org, as issues typically need to be sent
+ by mail to a maintainer and a public mailing list.*
+
+It's crucial to send your report to the right people, as the Linux kernel is a
+big project and most of its developers are only familiar with a small subset of
+it. Quite a few programmers for example only care for just one driver, for
+example one for a WiFi chip; its developer likely will only have small or no
+knowledge about the internals of remote or unrelated "subsystems", like the TCP
+stack, the PCIe/PCI subsystem, memory management or file systems.
+
+Problem is: the Linux kernel lacks a central bug tracker where you can simply
+file your issue and make it reach the developers that need to know about it.
+That's why you have to find the right place and way to report issues yourself.
+You can do that with the help of a script (see below), but it mainly targets
+kernel developers and experts. For everybody else the MAINTAINERS file is the
+better place.
+
+How to read the MAINTAINERS file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To illustrate how to use the :ref:`MAINTAINERS <maintainers>` file, let's assume
+the WiFi in your Laptop suddenly misbehaves after updating the kernel. In that
+case it's likely an issue in the WiFi driver. Obviously it could also be some
+code it builds upon, but unless you suspect something like that stick to the
+driver. If it's really something else, the driver's developers will get the
+right people involved.
+
+Sadly, there is no way to check which code is driving a particular hardware
+component that is both universal and easy.
+
+In case of a problem with the WiFi driver you for example might want to look at
+the output of ``lspci -k``, as it lists devices on the PCI/PCIe bus and the
+kernel module driving it::
+
+ [user@something ~]$ lspci -k
+ [...]
+ 3a:00.0 Network controller: Qualcomm Atheros QCA6174 802.11ac Wireless Network Adapter (rev 32)
+ Subsystem: Bigfoot Networks, Inc. Device 1535
+ Kernel driver in use: ath10k_pci
+ Kernel modules: ath10k_pci
+ [...]
+
+But this approach won't work if your WiFi chip is connected over USB or some
+other internal bus. In those cases you might want to check your WiFi manager or
+the output of ``ip link``. Look for the name of the problematic network
+interface, which might be something like 'wlp58s0'. This name can be used like
+this to find the module driving it::
+
+ [user@something ~]$ realpath --relative-to=/sys/module/ /sys/class/net/wlp58s0/device/driver/module
+ ath10k_pci
+
+In case tricks like these don't bring you any further, try to search the
+internet on how to narrow down the driver or subsystem in question. And if you
+are unsure which it is: just try your best guess, somebody will help you if you
+guessed poorly.
+
+Once you know the driver or subsystem, you want to search for it in the
+MAINTAINERS file. In the case of 'ath10k_pci' you won't find anything, as the
+name is too specific. Sometimes you will need to search on the net for help;
+but before doing so, try a somewhat shorted or modified name when searching the
+MAINTAINERS file, as then you might find something like this::
+
+ QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
+ Mail: A. Some Human <shuman@example.com>
+ Mailing list: ath10k@lists.infradead.org
+ Status: Supported
+ Web-page: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
+ SCM: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+ Files: drivers/net/wireless/ath/ath10k/
+
+Note: the line description will be abbreviations, if you read the plain
+MAINTAINERS file found in the root of the Linux source tree. 'Mail:' for
+example will be 'M:', 'Mailing list:' will be 'L', and 'Status:' will be 'S:'.
+A section near the top of the file explains these and other abbreviations.
+
+First look at the line 'Status'. Ideally it should be 'Supported' or
+'Maintained'. If it states 'Obsolete' then you are using some outdated approach
+that was replaced by a newer solution you need to switch to. Sometimes the code
+only has someone who provides 'Odd Fixes' when feeling motivated. And with
+'Orphan' you are totally out of luck, as nobody takes care of the code anymore.
+That only leaves these options: arrange yourself to live with the issue, fix it
+yourself, or find a programmer somewhere willing to fix it.
+
+After checking the status, look for a line starting with 'bugs:': it will tell
+you where to find a subsystem specific bug tracker to file your issue. The
+example above does not have such a line. That is the case for most sections, as
+Linux kernel development is completely driven by mail. Very few subsystems use
+a bug tracker, and only some of those rely on bugzilla.kernel.org.
+
+In this and many other cases you thus have to look for lines starting with
+'Mail:' instead. Those mention the name and the email addresses for the
+maintainers of the particular code. Also look for a line starting with 'Mailing
+list:', which tells you the public mailing list where the code is developed.
+Your report later needs to go by mail to those addresses. Additionally, for all
+issue reports sent by email, make sure to add the Linux Kernel Mailing List
+(LKML) <linux-kernel@vger.kernel.org> to CC. Don't omit either of the mailing
+lists when sending your issue report by mail later! Maintainers are busy people
+and might leave some work for other developers on the subsystem specific list;
+and LKML is important to have one place where all issue reports can be found.
+
+
+Finding the maintainers with the help of a script
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For people that have the Linux sources at hand there is a second option to find
+the proper place to report: the script 'scripts/get_maintainer.pl' which tries
+to find all people to contact. It queries the MAINTAINERS file and needs to be
+called with a path to the source code in question. For drivers compiled as
+module if often can be found with a command like this::
+
+ $ modinfo ath10k_pci | grep filename | sed 's!/lib/modules/.*/kernel/!!; s!filename:!!; s!\.ko\(\|\.xz\)!!'
+ drivers/net/wireless/ath/ath10k/ath10k_pci.ko
+
+Pass parts of this to the script::
+
+ $ ./scripts/get_maintainer.pl -f drivers/net/wireless/ath/ath10k*
+ Some Human <shuman@example.com> (supporter:QUALCOMM ATHEROS ATH10K WIRELESS DRIVER)
+ Another S. Human <asomehuman@example.com> (maintainer:NETWORKING DRIVERS)
+ ath10k@lists.infradead.org (open list:QUALCOMM ATHEROS ATH10K WIRELESS DRIVER)
+ linux-wireless@vger.kernel.org (open list:NETWORKING DRIVERS (WIRELESS))
+ netdev@vger.kernel.org (open list:NETWORKING DRIVERS)
+ linux-kernel@vger.kernel.org (open list)
+
+Don't sent your report to all of them. Send it to the maintainers, which the
+script calls "supporter:"; additionally CC the most specific mailing list for
+the code as well as the Linux Kernel Mailing List (LKML). In this case you thus
+would need to send the report to 'Some Human <shuman@example.com>' with
+'ath10k@lists.infradead.org' and 'linux-kernel@vger.kernel.org' in CC.
+
+Note: in case you cloned the Linux sources with git you might want to call
+``get_maintainer.pl`` a second time with ``--git``. The script then will look
+at the commit history to find which people recently worked on the code in
+question, as they might be able to help. But use these results with care, as it
+can easily send you in a wrong direction. That for example happens quickly in
+areas rarely changed (like old or unmaintained drivers): sometimes such code is
+modified during tree-wide cleanups by developers that do not care about the
+particular driver at all.
+
+
+Search for existing reports, second run
+---------------------------------------
+
+ *Search the archives of the bug tracker or mailing list in question
+ thoroughly for reports that might match your issue. If you find anything,
+ join the discussion instead of sending a new report.*
+
+As mentioned earlier already: reporting an issue that someone else already
+brought forward is often a waste of time for everyone involved, especially you
+as the reporter. That's why you should search for existing report again, now
+that you know where they need to be reported to. If it's mailing list, you will
+often find its archives on `lore.kernel.org <https://lore.kernel.org/>`_.
+
+But some list are hosted in different places. That for example is the case for
+the ath10k WiFi driver used as example in the previous step. But you'll often
+find the archives for these lists easily on the net. Searching for 'archive
+ath10k@lists.infradead.org' for example will lead you to the `Info page for the
+ath10k mailing list <https://lists.infradead.org/mailman/listinfo/ath10k>`_,
+which at the top links to its
+`list archives <https://lists.infradead.org/pipermail/ath10k/>`_. Sadly this and
+quite a few other lists miss a way to search the archives. In those cases use a
+regular internet search engine and add something like
+'site:lists.infradead.org/pipermail/ath10k/' to your search terms, which limits
+the results to the archives at that URL.
+
+It's also wise to check the internet, LKML and maybe bugzilla.kernel.org again
+at this point. If your report needs to be filed in a bug tracker, you may want
+to check the mailing list archives for the subsystem as well, as someone might
+have reported it only there.
+
+For details how to search and what to do if you find matching reports see
+"Search for existing reports, first run" above.
+
+Do not hurry with this step of the reporting process: spending 30 to 60 minutes
+or even more time can save you and others quite a lot of time and trouble.
+
+
+Install a fresh kernel for testing
+----------------------------------
+
+ *Unless you are already running the latest 'mainline' Linux kernel, better
+ go and install it for the reporting process. Testing and reporting with
+ the latest 'stable' Linux can be an acceptable alternative in some
+ situations; during the merge window that actually might be even the best
+ approach, but in that development phase it can be an even better idea to
+ suspend your efforts for a few days anyway. Whatever version you choose,
+ ideally use a 'vanilla' built. Ignoring these advices will dramatically
+ increase the risk your report will be rejected or ignored.*
+
+As mentioned in the detailed explanation for the first step already: Like most
+programmers, Linux kernel developers don't like to spend time dealing with
+reports for issues that don't even happen with the current code. It's just a
+waste everybody's time, especially yours. That's why it's in everybody's
+interest that you confirm the issue still exists with the latest upstream code
+before reporting it. You are free to ignore this advice, but as outlined
+earlier: doing so dramatically increases the risk that your issue report might
+get rejected or simply ignored.
+
+In the scope of the kernel "latest upstream" normally means:
+
+ * Install a mainline kernel; the latest stable kernel can be an option, but
+ most of the time is better avoided. Longterm kernels (sometimes called 'LTS
+ kernels') are unsuitable at this point of the process. The next subsection
+ explains all of this in more detail.
+
+ * The over next subsection describes way to obtain and install such a kernel.
+ It also outlines that using a pre-compiled kernel are fine, but better are
+ vanilla, which means: it was built using Linux sources taken straight `from
+ kernel.org <https://kernel.org/>`_ and not modified or enhanced in any way.
+
+Choosing the right version for testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Head over to `kernel.org <https://kernel.org/>`_ to find out which version you
+want to use for testing. Ignore the big yellow button that says 'Latest release'
+and look a little lower at the table. At its top you'll see a line starting with
+mainline, which most of the time will point to a pre-release with a version
+number like '5.8-rc2'. If that's the case, you'll want to use this mainline
+kernel for testing, as that where all fixes have to be applied first. Do not let
+that 'rc' scare you, these 'development kernels' are pretty reliable — and you
+made a backup, as you were instructed above, didn't you?
+
+In about two out of every nine to ten weeks, mainline might point you to a
+proper release with a version number like '5.7'. If that happens, consider
+suspending the reporting process until the first pre-release of the next
+version (5.8-rc1) shows up on kernel.org. That's because the Linux development
+cycle then is in its two-week long 'merge window'. The bulk of the changes and
+all intrusive ones get merged for the next release during this time. It's a bit
+more risky to use mainline during this period. Kernel developers are also often
+quite busy then and might have no spare time to deal with issue reports. It's
+also quite possible that one of the many changes applied during the merge
+window fixes the issue you face; that's why you soon would have to retest with
+a newer kernel version anyway, as outlined below in the section 'Duties after
+the report went out'.
+
+That's why it might make sense to wait till the merge window is over. But don't
+to that if you're dealing with something that shouldn't wait. In that case
+consider obtaining the latest mainline kernel via git (see below) or use the
+latest stable version offered on kernel.org. Using that is also acceptable in
+case mainline for some reason does currently not work for you. An in general:
+using it for reproducing the issue is also better than not reporting it issue
+at all.
+
+Better avoid using the latest stable kernel outside merge windows, as all fixes
+must be applied to mainline first. That's why checking the latest mainline
+kernel is so important: any issue you want to see fixed in older version lines
+needs to be fixed in mainline first before it can get backported, which can
+take a few days or weeks. Another reason: the fix you hope for might be too
+hard or risky for backporting; reporting the issue again hence is unlikely to
+change anything.
+
+These aspects are also why longterm kernels (sometimes called "LTS kernels")
+are unsuitable for this part of the reporting process: they are to distant from
+the current code. Hence go and test mainline first and follow the process
+further: if the issue doesn't occur with mainline it will guide you how to get
+it fixed in older version lines, if that's in the cards for the fix in question.
+
+How to obtain a fresh Linux kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Using a pre-compiled kernel**: This is often the quickest, easiest, and safest
+way for testing — especially is you are unfamiliar with the Linux kernel. The
+problem: most of those shipped by distributors or add-on repositories are build
+from modified Linux sources. They are thus not vanilla and therefore often
+unsuitable for testing and issue reporting: the changes might cause the issue
+you face or influence it somehow.
+
+But you are in luck if you are using a popular Linux distribution: for quite a
+few of them you'll find repositories on the net that contain packages with the
+latest mainline or stable Linux built as vanilla kernel. It's totally okay to
+use these, just make sure from the repository's description they are vanilla or
+at least close to it. Additionally ensure the packages contain the latest
+versions as offered on kernel.org. The packages are likely unsuitable if they
+are older than a week, as new mainline and stable kernels typically get released
+at least once a week.
+
+Please note that you might need to build your own kernel manually later: that's
+sometimes needed for debugging or testing fixes, as described later in this
+document. Also be aware that pre-compiled kernels might lack debug symbols that
+are needed to decode messages the kernel prints when a panic, Oops, warning, or
+BUG occurs; if you plan to decode those, you might be better off compiling a
+kernel yourself (see the end of this subsection and the section titled 'Decode
+failure messages' for details).
+
+**Using git**: Developers and experienced Linux users familiar with git are
+often best served by obtaining the latest Linux kernel sources straight from the
+`official development repository on kernel.org
+<https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/>`_.
+Those are likely a bit ahead of the latest mainline pre-release. Don't worry
+about it: they are as reliable as a proper pre-release, unless the kernel's
+development cycle is currently in the middle of a merge window. But even then
+they are quite reliable.
+
+**Conventional**: People unfamiliar with git are often best served by
+downloading the sources as tarball from `kernel.org <https://kernel.org/>`_.
+
+How to actually build a kernel is not described here, as many websites explain
+the necessary steps already. If you are new to it, consider following one of
+those how-to's that suggest to use ``make localmodconfig``, as that tries to
+pick up the configuration of your current kernel and then tries to adjust it
+somewhat for your system. That does not make the resulting kernel any better,
+but quicker to compile.
+
+Note: If you are dealing with a panic, Oops, warning, or BUG from the kernel,
+please try to enable CONFIG_KALLSYMS when configuring your kernel.
+Additionally, enable CONFIG_DEBUG_KERNEL and CONFIG_DEBUG_INFO, too; the
+latter is the relevant one of those two, but can only be reached if you enable
+the former. Be aware CONFIG_DEBUG_INFO increases the storage space required to
+build a kernel by quite a bit. But that's worth it, as these options will allow
+you later to pinpoint the exact line of code that triggers your issue. The
+section 'Decode failure messages' below explains this in more detail.
+
+But keep in mind: Always keep a record of the issue encountered in case it is
+hard to reproduce. Sending an undecoded report is better than not reporting
+the issue at all.
+
+
+Check 'taint' flag
+------------------
+
+ *Ensure the kernel you just installed does not 'taint' itself when
+ running.*
+
+As outlined above in more detail already: the kernel sets a 'taint' flag when
+something happens that can lead to follow-up errors that look totally
+unrelated. That's why you need to check if the kernel you just installed does
+not set this flag. And if it does, you in almost all the cases needs to
+eliminate the reason for it before you reporting issues that occur with it. See
+the section above for details how to do that.
+
+
+Reproduce issue with the fresh kernel
+-------------------------------------
+
+ *Reproduce the issue with the kernel you just installed. If it doesn't show
+ up there, scroll down to the instructions for issues only happening with
+ stable and longterm kernels.*
+
+Check if the issue occurs with the fresh Linux kernel version you just
+installed. If it was fixed there already, consider sticking with this version
+line and abandoning your plan to report the issue. But keep in mind that other
+users might still be plagued by it, as long as it's not fixed in either stable
+and longterm version from kernel.org (and thus vendor kernels derived from
+those). If you prefer to use one of those or just want to help their users,
+head over to the section "Details about reporting issues only occurring in
+older kernel version lines" below.
+
+
+Optimize description to reproduce issue
+---------------------------------------
+
+ *Optimize your notes: try to find and write the most straightforward way to
+ reproduce your issue. Make sure the end result has all the important
+ details, and at the same time is easy to read and understand for others
+ that hear about it for the first time. And if you learned something in this
+ process, consider searching again for existing reports about the issue.*
+
+An unnecessarily complex report will make it hard for others to understand your
+report. Thus try to find a reproducer that's straight forward to describe and
+thus easy to understand in written form. Include all important details, but at
+the same time try to keep it as short as possible.
+
+In this in the previous steps you likely have learned a thing or two about the
+issue you face. Use this knowledge and search again for existing reports
+instead you can join.
+
+
+Decode failure messages
+-----------------------
+
+ *If your failure involves a 'panic', 'Oops', 'warning', or 'BUG', consider
+ decoding the kernel log to find the line of code that triggered the error.*
+
+When the kernel detects an internal problem, it will log some information about
+the executed code. This makes it possible to pinpoint the exact line in the
+source code that triggered the issue and shows how it was called. But that only
+works if you enabled CONFIG_DEBUG_INFO and CONFIG_KALLSYMS when configuring
+your kernel. If you did so, consider to decode the information from the
+kernel's log. That will make it a lot easier to understand what lead to the
+'panic', 'Oops', 'warning', or 'BUG', which increases the chances that someone
+can provide a fix.
+
+Decoding can be done with a script you find in the Linux source tree. If you
+are running a kernel you compiled yourself earlier, call it like this::
+
+ [user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh ./linux-5.10.5/vmlinux
+
+If you are running a packaged vanilla kernel, you will likely have to install
+the corresponding packages with debug symbols. Then call the script (which you
+might need to get from the Linux sources if your distro does not package it)
+like this::
+
+ [user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh \
+ /usr/lib/debug/lib/modules/5.10.10-4.1.x86_64/vmlinux /usr/src/kernels/5.10.10-4.1.x86_64/
+
+The script will work on log lines like the following, which show the address of
+the code the kernel was executing when the error occurred::
+
+ [ 68.387301] RIP: 0010:test_module_init+0x5/0xffa [test_module]
+
+Once decoded, these lines will look like this::
+
+ [ 68.387301] RIP: 0010:test_module_init (/home/username/linux-5.10.5/test-module/test-module.c:16) test_module
+
+In this case the executed code was built from the file
+'~/linux-5.10.5/test-module/test-module.c' and the error occurred by the
+instructions found in line '16'.
+
+The script will similarly decode the addresses mentioned in the section
+starting with 'Call trace', which show the path to the function where the
+problem occurred. Additionally, the script will show the assembler output for
+the code section the kernel was executing.
+
+Note, if you can't get this to work, simply skip this step and mention the
+reason for it in the report. If you're lucky, it might not be needed. And if it
+is, someone might help you to get things going. Also be aware this is just one
+of several ways to decode kernel stack traces. Sometimes different steps will
+be required to retrieve the relevant details. Don't worry about that, if that's
+needed in your case, developers will tell you what to do.
+
+
+Special care for regressions
+----------------------------
+
+ *If your problem is a regression, try to narrow down when the issue was
+ introduced as much as possible.*
+
+Linux lead developer Linus Torvalds insists that the Linux kernel never
+worsens, that's why he deems regressions as unacceptable and wants to see them
+fixed quickly. That's why changes that introduced a regression are often
+promptly reverted if the issue they cause can't get solved quickly any other
+way. Reporting a regression is thus a bit like playing a kind of trump card to
+get something quickly fixed. But for that to happen the change that's causing
+the regression needs to be known. Normally it's up to the reporter to track
+down the culprit, as maintainers often won't have the time or setup at hand to
+reproduce it themselves.
+
+To find the change there is a process called 'bisection' which the document
+Documentation/admin-guide/bug-bisect.rst describes in detail. That process
+will often require you to build about ten to twenty kernel images, trying to
+reproduce the issue with each of them before building the next. Yes, that takes
+some time, but don't worry, it works a lot quicker than most people assume.
+Thanks to a 'binary search' this will lead you to the one commit in the source
+code management system that's causing the regression. Once you find it, search
+the net for the subject of the change, its commit id and the shortened commit id
+(the first 12 characters of the commit id). This will lead you to existing
+reports about it, if there are any.
+
+Note, a bisection needs a bit of know-how, which not everyone has, and quite a
+bit of effort, which not everyone is willing to invest. Nevertheless, it's
+highly recommended performing a bisection yourself. If you really can't or
+don't want to go down that route at least find out which mainline kernel
+introduced the regression. If something for example breaks when switching from
+5.5.15 to 5.8.4, then try at least all the mainline releases in that area (5.6,
+5.7 and 5.8) to check when it first showed up. Unless you're trying to find a
+regression in a stable or longterm kernel, avoid testing versions which number
+has three sections (5.6.12, 5.7.8), as that makes the outcome hard to
+interpret, which might render your testing useless. Once you found the major
+version which introduced the regression, feel free to move on in the reporting
+process. But keep in mind: it depends on the issue at hand if the developers
+will be able to help without knowing the culprit. Sometimes they might
+recognize from the report want went wrong and can fix it; other times they will
+be unable to help unless you perform a bisection.
+
+When dealing with regressions make sure the issue you face is really caused by
+the kernel and not by something else, as outlined above already.
+
+In the whole process keep in mind: an issue only qualifies as regression if the
+older and the newer kernel got built with a similar configuration. This can be
+achieved by using ``make olddefconfig``, as explained in more detail by
+Documentation/admin-guide/reporting-regressions.rst; that document also
+provides a good deal of other information about regressions you might want to be
+aware of.
+
+
+Write and send the report
+-------------------------
+
+ *Start to compile the report by writing a detailed description about the
+ issue. Always mention a few things: the latest kernel version you installed
+ for reproducing, the Linux Distribution used, and your notes on how to
+ reproduce the issue. Ideally, make the kernel's build configuration
+ (.config) and the output from ``dmesg`` available somewhere on the net and
+ link to it. Include or upload all other information that might be relevant,
+ like the output/screenshot of an Oops or the output from ``lspci``. Once
+ you wrote this main part, insert a normal length paragraph on top of it
+ outlining the issue and the impact quickly. On top of this add one sentence
+ that briefly describes the problem and gets people to read on. Now give the
+ thing a descriptive title or subject that yet again is shorter. Then you're
+ ready to send or file the report like the MAINTAINERS file told you, unless
+ you are dealing with one of those 'issues of high priority': they need
+ special care which is explained in 'Special handling for high priority
+ issues' below.*
+
+Now that you have prepared everything it's time to write your report. How to do
+that is partly explained by the three documents linked to in the preface above.
+That's why this text will only mention a few of the essentials as well as
+things specific to the Linux kernel.
+
+There is one thing that fits both categories: the most crucial parts of your
+report are the title/subject, the first sentence, and the first paragraph.
+Developers often get quite a lot of mail. They thus often just take a few
+seconds to skim a mail before deciding to move on or look closer. Thus: the
+better the top section of your report, the higher are the chances that someone
+will look into it and help you. And that is why you should ignore them for now
+and write the detailed report first. ;-)
+
+Things each report should mention
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Describe in detail how your issue happens with the fresh vanilla kernel you
+installed. Try to include the step-by-step instructions you wrote and optimized
+earlier that outline how you and ideally others can reproduce the issue; in
+those rare cases where that's impossible try to describe what you did to
+trigger it.
+
+Also include all the relevant information others might need to understand the
+issue and its environment. What's actually needed depends a lot on the issue,
+but there are some things you should include always:
+
+ * the output from ``cat /proc/version``, which contains the Linux kernel
+ version number and the compiler it was built with.
+
+ * the Linux distribution the machine is running (``hostnamectl | grep
+ "Operating System"``)
+
+ * the architecture of the CPU and the operating system (``uname -mi``)
+
+ * if you are dealing with a regression and performed a bisection, mention the
+ subject and the commit-id of the change that is causing it.
+
+In a lot of cases it's also wise to make two more things available to those
+that read your report:
+
+ * the configuration used for building your Linux kernel (the '.config' file)
+
+ * the kernel's messages that you get from ``dmesg`` written to a file. Make
+ sure that it starts with a line like 'Linux version 5.8-1
+ (foobar@example.com) (gcc (GCC) 10.2.1, GNU ld version 2.34) #1 SMP Mon Aug
+ 3 14:54:37 UTC 2020' If it's missing, then important messages from the first
+ boot phase already got discarded. In this case instead consider using
+ ``journalctl -b 0 -k``; alternatively you can also reboot, reproduce the
+ issue and call ``dmesg`` right afterwards.
+
+These two files are big, that's why it's a bad idea to put them directly into
+your report. If you are filing the issue in a bug tracker then attach them to
+the ticket. If you report the issue by mail do not attach them, as that makes
+the mail too large; instead do one of these things:
+
+ * Upload the files somewhere public (your website, a public file paste
+ service, a ticket created just for this purpose on `bugzilla.kernel.org
+ <https://bugzilla.kernel.org/>`_, ...) and include a link to them in your
+ report. Ideally use something where the files stay available for years, as
+ they could be useful to someone many years from now; this for example can
+ happen if five or ten years from now a developer works on some code that was
+ changed just to fix your issue.
+
+ * Put the files aside and mention you will send them later in individual
+ replies to your own mail. Just remember to actually do that once the report
+ went out. ;-)
+
+Things that might be wise to provide
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Depending on the issue you might need to add more background data. Here are a
+few suggestions what often is good to provide:
+
+ * If you are dealing with a 'warning', an 'OOPS' or a 'panic' from the kernel,
+ include it. If you can't copy'n'paste it, try to capture a netconsole trace
+ or at least take a picture of the screen.
+
+ * If the issue might be related to your computer hardware, mention what kind
+ of system you use. If you for example have problems with your graphics card,
+ mention its manufacturer, the card's model, and what chip is uses. If it's a
+ laptop mention its name, but try to make sure it's meaningful. 'Dell XPS 13'
+ for example is not, because it might be the one from 2012; that one looks
+ not that different from the one sold today, but apart from that the two have
+ nothing in common. Hence, in such cases add the exact model number, which
+ for example are '9380' or '7390' for XPS 13 models introduced during 2019.
+ Names like 'Lenovo Thinkpad T590' are also somewhat ambiguous: there are
+ variants of this laptop with and without a dedicated graphics chip, so try
+ to find the exact model name or specify the main components.
+
+ * Mention the relevant software in use. If you have problems with loading
+ modules, you want to mention the versions of kmod, systemd, and udev in use.
+ If one of the DRM drivers misbehaves, you want to state the versions of
+ libdrm and Mesa; also specify your Wayland compositor or the X-Server and
+ its driver. If you have a filesystem issue, mention the version of
+ corresponding filesystem utilities (e2fsprogs, btrfs-progs, xfsprogs, ...).
+
+ * Gather additional information from the kernel that might be of interest. The
+ output from ``lspci -nn`` will for example help others to identify what
+ hardware you use. If you have a problem with hardware you even might want to
+ make the output from ``sudo lspci -vvv`` available, as that provides
+ insights how the components were configured. For some issues it might be
+ good to include the contents of files like ``/proc/cpuinfo``,
+ ``/proc/ioports``, ``/proc/iomem``, ``/proc/modules``, or
+ ``/proc/scsi/scsi``. Some subsystem also offer tools to collect relevant
+ information. One such tool is ``alsa-info.sh`` `which the audio/sound
+ subsystem developers provide <https://www.alsa-project.org/wiki/AlsaInfo>`_.
+
+Those examples should give your some ideas of what data might be wise to
+attach, but you have to think yourself what will be helpful for others to know.
+Don't worry too much about forgetting something, as developers will ask for
+additional details they need. But making everything important available from
+the start increases the chance someone will take a closer look.
+
+
+The important part: the head of your report
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Now that you have the detailed part of the report prepared let's get to the
+most important section: the first few sentences. Thus go to the top, add
+something like 'The detailed description:' before the part you just wrote and
+insert two newlines at the top. Now write one normal length paragraph that
+describes the issue roughly. Leave out all boring details and focus on the
+crucial parts readers need to know to understand what this is all about; if you
+think this bug affects a lot of users, mention this to get people interested.
+
+Once you did that insert two more lines at the top and write a one sentence
+summary that explains quickly what the report is about. After that you have to
+get even more abstract and write an even shorter subject/title for the report.
+
+Now that you have written this part take some time to optimize it, as it is the
+most important parts of your report: a lot of people will only read this before
+they decide if reading the rest is time well spent.
+
+Now send or file the report like the :ref:`MAINTAINERS <maintainers>` file told
+you, unless it's one of those 'issues of high priority' outlined earlier: in
+that case please read the next subsection first before sending the report on
+its way.
+
+Special handling for high priority issues
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Reports for high priority issues need special handling.
+
+**Severe issues**: make sure the subject or ticket title as well as the first
+paragraph makes the severeness obvious.
+
+**Regressions**: make the report's subject start with '[REGRESSION]'.
+
+In case you performed a successful bisection, use the title of the change that
+introduced the regression as the second part of your subject. Make the report
+also mention the commit id of the culprit. In case of an unsuccessful bisection,
+make your report mention the latest tested version that's working fine (say 5.7)
+and the oldest where the issue occurs (say 5.8-rc1).
+
+When sending the report by mail, CC the Linux regressions mailing list
+(regressions@lists.linux.dev). In case the report needs to be filed to some web
+tracker, proceed to do so. Once filed, forward the report by mail to the
+regressions list; CC the maintainer and the mailing list for the subsystem in
+question. Make sure to inline the forwarded report, hence do not attach it.
+Also add a short note at the top where you mention the URL to the ticket.
+
+When mailing or forwarding the report, in case of a successful bisection add the
+author of the culprit to the recipients; also CC everyone in the signed-off-by
+chain, which you find at the end of its commit message.
+
+**Security issues**: for these issues your will have to evaluate if a
+short-term risk to other users would arise if details were publicly disclosed.
+If that's not the case simply proceed with reporting the issue as described.
+For issues that bear such a risk you will need to adjust the reporting process
+slightly:
+
+ * If the MAINTAINERS file instructed you to report the issue by mail, do not
+ CC any public mailing lists.
+
+ * If you were supposed to file the issue in a bug tracker make sure to mark
+ the ticket as 'private' or 'security issue'. If the bug tracker does not
+ offer a way to keep reports private, forget about it and send your report as
+ a private mail to the maintainers instead.
+
+In both cases make sure to also mail your report to the addresses the
+MAINTAINERS file lists in the section 'security contact'. Ideally directly CC
+them when sending the report by mail. If you filed it in a bug tracker, forward
+the report's text to these addresses; but on top of it put a small note where
+you mention that you filed it with a link to the ticket.
+
+See Documentation/process/security-bugs.rst for more information.
+
+
+Duties after the report went out
+--------------------------------
+
+ *Wait for reactions and keep the thing rolling until you can accept the
+ outcome in one way or the other. Thus react publicly and in a timely manner
+ to any inquiries. Test proposed fixes. Do proactive testing: retest with at
+ least every first release candidate (RC) of a new mainline version and
+ report your results. Send friendly reminders if things stall. And try to
+ help yourself, if you don't get any help or if it's unsatisfying.*
+
+If your report was good and you are really lucky then one of the developers
+might immediately spot what's causing the issue; they then might write a patch
+to fix it, test it, and send it straight for integration in mainline while
+tagging it for later backport to stable and longterm kernels that need it. Then
+all you need to do is reply with a 'Thank you very much' and switch to a version
+with the fix once it gets released.
+
+But this ideal scenario rarely happens. That's why the job is only starting
+once you got the report out. What you'll have to do depends on the situations,
+but often it will be the things listed below. But before digging into the
+details, here are a few important things you need to keep in mind for this part
+of the process.
+
+
+General advice for further interactions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Always reply in public**: When you filed the issue in a bug tracker, always
+reply there and do not contact any of the developers privately about it. For
+mailed reports always use the 'Reply-all' function when replying to any mails
+you receive. That includes mails with any additional data you might want to add
+to your report: go to your mail applications 'Sent' folder and use 'reply-all'
+on your mail with the report. This approach will make sure the public mailing
+list(s) and everyone else that gets involved over time stays in the loop; it
+also keeps the mail thread intact, which among others is really important for
+mailing lists to group all related mails together.
+
+There are just two situations where a comment in a bug tracker or a 'Reply-all'
+is unsuitable:
+
+ * Someone tells you to send something privately.
+
+ * You were told to send something, but noticed it contains sensitive
+ information that needs to be kept private. In that case it's okay to send it
+ in private to the developer that asked for it. But note in the ticket or a
+ mail that you did that, so everyone else knows you honored the request.
+
+**Do research before asking for clarifications or help**: In this part of the
+process someone might tell you to do something that requires a skill you might
+not have mastered yet. For example, you might be asked to use some test tools
+you never have heard of yet; or you might be asked to apply a patch to the
+Linux kernel sources to test if it helps. In some cases it will be fine sending
+a reply asking for instructions how to do that. But before going that route try
+to find the answer own your own by searching the internet; alternatively
+consider asking in other places for advice. For example ask a friend or post
+about it to a chatroom or forum you normally hang out.
+
+**Be patient**: If you are really lucky you might get a reply to your report
+within a few hours. But most of the time it will take longer, as maintainers
+are scattered around the globe and thus might be in a different time zone – one
+where they already enjoy their night away from keyboard.
+
+In general, kernel developers will take one to five business days to respond to
+reports. Sometimes it will take longer, as they might be busy with the merge
+windows, other work, visiting developer conferences, or simply enjoying a long
+summer holiday.
+
+The 'issues of high priority' (see above for an explanation) are an exception
+here: maintainers should address them as soon as possible; that's why you
+should wait a week at maximum (or just two days if it's something urgent)
+before sending a friendly reminder.
+
+Sometimes the maintainer might not be responding in a timely manner; other
+times there might be disagreements, for example if an issue qualifies as
+regression or not. In such cases raise your concerns on the mailing list and
+ask others for public or private replies how to move on. If that fails, it
+might be appropriate to get a higher authority involved. In case of a WiFi
+driver that would be the wireless maintainers; if there are no higher level
+maintainers or all else fails, it might be one of those rare situations where
+it's okay to get Linus Torvalds involved.
+
+**Proactive testing**: Every time the first pre-release (the 'rc1') of a new
+mainline kernel version gets released, go and check if the issue is fixed there
+or if anything of importance changed. Mention the outcome in the ticket or in a
+mail you sent as reply to your report (make sure it has all those in the CC
+that up to that point participated in the discussion). This will show your
+commitment and that you are willing to help. It also tells developers if the
+issue persists and makes sure they do not forget about it. A few other
+occasional retests (for example with rc3, rc5 and the final) are also a good
+idea, but only report your results if something relevant changed or if you are
+writing something anyway.
+
+With all these general things off the table let's get into the details of how
+to help to get issues resolved once they were reported.
+
+Inquires and testing request
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here are your duties in case you got replies to your report:
+
+**Check who you deal with**: Most of the time it will be the maintainer or a
+developer of the particular code area that will respond to your report. But as
+issues are normally reported in public it could be anyone that's replying —
+including people that want to help, but in the end might guide you totally off
+track with their questions or requests. That rarely happens, but it's one of
+many reasons why it's wise to quickly run an internet search to see who you're
+interacting with. By doing this you also get aware if your report was heard by
+the right people, as a reminder to the maintainer (see below) might be in order
+later if discussion fades out without leading to a satisfying solution for the
+issue.
+
+**Inquiries for data**: Often you will be asked to test something or provide
+additional details. Try to provide the requested information soon, as you have
+the attention of someone that might help and risk losing it the longer you
+wait; that outcome is even likely if you do not provide the information within
+a few business days.
+
+**Requests for testing**: When you are asked to test a diagnostic patch or a
+possible fix, try to test it in timely manner, too. But do it properly and make
+sure to not rush it: mixing things up can happen easily and can lead to a lot
+of confusion for everyone involved. A common mistake for example is thinking a
+proposed patch with a fix was applied, but in fact wasn't. Things like that
+happen even to experienced testers occasionally, but they most of the time will
+notice when the kernel with the fix behaves just as one without it.
+
+What to do when nothing of substance happens
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some reports will not get any reaction from the responsible Linux kernel
+developers; or a discussion around the issue evolved, but faded out with
+nothing of substance coming out of it.
+
+In these cases wait two (better: three) weeks before sending a friendly
+reminder: maybe the maintainer was just away from keyboard for a while when
+your report arrived or had something more important to take care of. When
+writing the reminder, kindly ask if anything else from your side is needed to
+get the ball running somehow. If the report got out by mail, do that in the
+first lines of a mail that is a reply to your initial mail (see above) which
+includes a full quote of the original report below: that's on of those few
+situations where such a 'TOFU' (Text Over, Fullquote Under) is the right
+approach, as then all the recipients will have the details at hand immediately
+in the proper order.
+
+After the reminder wait three more weeks for replies. If you still don't get a
+proper reaction, you first should reconsider your approach. Did you maybe try
+to reach out to the wrong people? Was the report maybe offensive or so
+confusing that people decided to completely stay away from it? The best way to
+rule out such factors: show the report to one or two people familiar with FLOSS
+issue reporting and ask for their opinion. Also ask them for their advice how
+to move forward. That might mean: prepare a better report and make those people
+review it before you send it out. Such an approach is totally fine; just
+mention that this is the second and improved report on the issue and include a
+link to the first report.
+
+If the report was proper you can send a second reminder; in it ask for advice
+why the report did not get any replies. A good moment for this second reminder
+mail is shortly after the first pre-release (the 'rc1') of a new Linux kernel
+version got published, as you should retest and provide a status update at that
+point anyway (see above).
+
+If the second reminder again results in no reaction within a week, try to
+contact a higher-level maintainer asking for advice: even busy maintainers by
+then should at least have sent some kind of acknowledgment.
+
+Remember to prepare yourself for a disappointment: maintainers ideally should
+react somehow to every issue report, but they are only obliged to fix those
+'issues of high priority' outlined earlier. So don't be too devastating if you
+get a reply along the lines of 'thanks for the report, I have more important
+issues to deal with currently and won't have time to look into this for the
+foreseeable future'.
+
+It's also possible that after some discussion in the bug tracker or on a list
+nothing happens anymore and reminders don't help to motivate anyone to work out
+a fix. Such situations can be devastating, but is within the cards when it
+comes to Linux kernel development. This and several other reasons for not
+getting help are explained in 'Why some issues won't get any reaction or remain
+unfixed after being reported' near the end of this document.
+
+Don't get devastated if you don't find any help or if the issue in the end does
+not get solved: the Linux kernel is FLOSS and thus you can still help yourself.
+You for example could try to find others that are affected and team up with
+them to get the issue resolved. Such a team could prepare a fresh report
+together that mentions how many you are and why this is something that in your
+option should get fixed. Maybe together you can also narrow down the root cause
+or the change that introduced a regression, which often makes developing a fix
+easier. And with a bit of luck there might be someone in the team that knows a
+bit about programming and might be able to write a fix.
+
+
+Reference for "Reporting regressions within a stable and longterm kernel line"
+------------------------------------------------------------------------------
+
+This subsection provides details for the steps you need to perform if you face
+a regression within a stable and longterm kernel line.
+
+Make sure the particular version line still gets support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check if the kernel developers still maintain the Linux kernel version
+ line you care about: go to the front page of kernel.org and make sure it
+ mentions the latest release of the particular version line without an
+ '[EOL]' tag.*
+
+Most kernel version lines only get supported for about three months, as
+maintaining them longer is quite a lot of work. Hence, only one per year is
+chosen and gets supported for at least two years (often six). That's why you
+need to check if the kernel developers still support the version line you care
+for.
+
+Note, if kernel.org lists two stable version lines on the front page, you
+should consider switching to the newer one and forget about the older one:
+support for it is likely to be abandoned soon. Then it will get a "end-of-life"
+(EOL) stamp. Version lines that reached that point still get mentioned on the
+kernel.org front page for a week or two, but are unsuitable for testing and
+reporting.
+
+Search stable mailing list
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check the archives of the Linux stable mailing list for existing reports.*
+
+Maybe the issue you face is already known and was fixed or is about to. Hence,
+`search the archives of the Linux stable mailing list
+<https://lore.kernel.org/stable/>`_ for reports about an issue like yours. If
+you find any matches, consider joining the discussion, unless the fix is
+already finished and scheduled to get applied soon.
+
+Reproduce issue with the newest release
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Install the latest release from the particular version line as a vanilla
+ kernel. Ensure this kernel is not tainted and still shows the problem, as
+ the issue might have already been fixed there. If you first noticed the
+ problem with a vendor kernel, check a vanilla build of the last version
+ known to work performs fine as well.*
+
+Before investing any more time in this process you want to check if the issue
+was already fixed in the latest release of version line you're interested in.
+This kernel needs to be vanilla and shouldn't be tainted before the issue
+happens, as detailed outlined already above in the section "Install a fresh
+kernel for testing".
+
+Did you first notice the regression with a vendor kernel? Then changes the
+vendor applied might be interfering. You need to rule that out by performing
+a recheck. Say something broke when you updated from 5.10.4-vendor.42 to
+5.10.5-vendor.43. Then after testing the latest 5.10 release as outlined in
+the previous paragraph check if a vanilla build of Linux 5.10.4 works fine as
+well. If things are broken there, the issue does not qualify as upstream
+regression and you need switch back to the main step-by-step guide to report
+the issue.
+
+Report the regression
+~~~~~~~~~~~~~~~~~~~~~
+
+ *Send a short problem report to the Linux stable mailing list
+ (stable@vger.kernel.org) and CC the Linux regressions mailing list
+ (regressions@lists.linux.dev); if you suspect the cause in a particular
+ subsystem, CC its maintainer and its mailing list. Roughly describe the
+ issue and ideally explain how to reproduce it. Mention the first version
+ that shows the problem and the last version that's working fine. Then
+ wait for further instructions.*
+
+When reporting a regression that happens within a stable or longterm kernel
+line (say when updating from 5.10.4 to 5.10.5) a brief report is enough for
+the start to get the issue reported quickly. Hence a rough description to the
+stable and regressions mailing list is all it takes; but in case you suspect
+the cause in a particular subsystem, CC its maintainers and its mailing list
+as well, because that will speed things up.
+
+And note, it helps developers a great deal if you can specify the exact version
+that introduced the problem. Hence if possible within a reasonable time frame,
+try to find that version using vanilla kernels. Let's assume something broke when
+your distributor released a update from Linux kernel 5.10.5 to 5.10.8. Then as
+instructed above go and check the latest kernel from that version line, say
+5.10.9. If it shows the problem, try a vanilla 5.10.5 to ensure that no patches
+the distributor applied interfere. If the issue doesn't manifest itself there,
+try 5.10.7 and then (depending on the outcome) 5.10.8 or 5.10.6 to find the
+first version where things broke. Mention it in the report and state that 5.10.9
+is still broken.
+
+What the previous paragraph outlines is basically a rough manual 'bisection'.
+Once your report is out your might get asked to do a proper one, as it allows to
+pinpoint the exact change that causes the issue (which then can easily get
+reverted to fix the issue quickly). Hence consider to do a proper bisection
+right away if time permits. See the section 'Special care for regressions' and
+the document Documentation/admin-guide/bug-bisect.rst for details how to
+perform one. In case of a successful bisection add the author of the culprit to
+the recipients; also CC everyone in the signed-off-by chain, which you find at
+the end of its commit message.
+
+
+Reference for "Reporting issues only occurring in older kernel version lines"
+-----------------------------------------------------------------------------
+
+This section provides details for the steps you need to take if you could not
+reproduce your issue with a mainline kernel, but want to see it fixed in older
+version lines (aka stable and longterm kernels).
+
+Some fixes are too complex
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Prepare yourself for the possibility that going through the next few steps
+ might not get the issue solved in older releases: the fix might be too big
+ or risky to get backported there.*
+
+Even small and seemingly obvious code-changes sometimes introduce new and
+totally unexpected problems. The maintainers of the stable and longterm kernels
+are very aware of that and thus only apply changes to these kernels that are
+within rules outlined in Documentation/process/stable-kernel-rules.rst.
+
+Complex or risky changes for example do not qualify and thus only get applied
+to mainline. Other fixes are easy to get backported to the newest stable and
+longterm kernels, but too risky to integrate into older ones. So be aware the
+fix you are hoping for might be one of those that won't be backported to the
+version line your care about. In that case you'll have no other choice then to
+live with the issue or switch to a newer Linux version, unless you want to
+patch the fix into your kernels yourself.
+
+Common preparations
+~~~~~~~~~~~~~~~~~~~
+
+ *Perform the first three steps in the section "Reporting issues only
+ occurring in older kernel version lines" above.*
+
+You need to carry out a few steps already described in another section of this
+guide. Those steps will let you:
+
+ * Check if the kernel developers still maintain the Linux kernel version line
+ you care about.
+
+ * Search the Linux stable mailing list for exiting reports.
+
+ * Check with the latest release.
+
+
+Check code history and search for existing discussions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Search the Linux kernel version control system for the change that fixed
+ the issue in mainline, as its commit message might tell you if the fix is
+ scheduled for backporting already. If you don't find anything that way,
+ search the appropriate mailing lists for posts that discuss such an issue
+ or peer-review possible fixes; then check the discussions if the fix was
+ deemed unsuitable for backporting. If backporting was not considered at
+ all, join the newest discussion, asking if it's in the cards.*
+
+In a lot of cases the issue you deal with will have happened with mainline, but
+got fixed there. The commit that fixed it would need to get backported as well
+to get the issue solved. That's why you want to search for it or any
+discussions abound it.
+
+ * First try to find the fix in the Git repository that holds the Linux kernel
+ sources. You can do this with the web interfaces `on kernel.org
+ <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/>`_
+ or its mirror `on GitHub <https://github.com/torvalds/linux>`_; if you have
+ a local clone you alternatively can search on the command line with ``git
+ log --grep=<pattern>``.
+
+ If you find the fix, look if the commit message near the end contains a
+ 'stable tag' that looks like this:
+
+ Cc: <stable@vger.kernel.org> # 5.4+
+
+ If that's case the developer marked the fix safe for backporting to version
+ line 5.4 and later. Most of the time it's getting applied there within two
+ weeks, but sometimes it takes a bit longer.
+
+ * If the commit doesn't tell you anything or if you can't find the fix, look
+ again for discussions about the issue. Search the net with your favorite
+ internet search engine as well as the archives for the `Linux kernel
+ developers mailing list <https://lore.kernel.org/lkml/>`_. Also read the
+ section `Locate kernel area that causes the issue` above and follow the
+ instructions to find the subsystem in question: its bug tracker or mailing
+ list archive might have the answer you are looking for.
+
+ * If you see a proposed fix, search for it in the version control system as
+ outlined above, as the commit might tell you if a backport can be expected.
+
+ * Check the discussions for any indicators the fix might be too risky to get
+ backported to the version line you care about. If that's the case you have
+ to live with the issue or switch to the kernel version line where the fix
+ got applied.
+
+ * If the fix doesn't contain a stable tag and backporting was not discussed,
+ join the discussion: mention the version where you face the issue and that
+ you would like to see it fixed, if suitable.
+
+
+Ask for advice
+~~~~~~~~~~~~~~
+
+ *One of the former steps should lead to a solution. If that doesn't work
+ out, ask the maintainers for the subsystem that seems to be causing the
+ issue for advice; CC the mailing list for the particular subsystem as well
+ as the stable mailing list.*
+
+If the previous three steps didn't get you closer to a solution there is only
+one option left: ask for advice. Do that in a mail you sent to the maintainers
+for the subsystem where the issue seems to have its roots; CC the mailing list
+for the subsystem as well as the stable mailing list (stable@vger.kernel.org).
+
+
+Why some issues won't get any reaction or remain unfixed after being reported
+=============================================================================
+
+When reporting a problem to the Linux developers, be aware only 'issues of high
+priority' (regressions, security issues, severe problems) are definitely going
+to get resolved. The maintainers or if all else fails Linus Torvalds himself
+will make sure of that. They and the other kernel developers will fix a lot of
+other issues as well. But be aware that sometimes they can't or won't help; and
+sometimes there isn't even anyone to send a report to.
+
+This is best explained with kernel developers that contribute to the Linux
+kernel in their spare time. Quite a few of the drivers in the kernel were
+written by such programmers, often because they simply wanted to make their
+hardware usable on their favorite operating system.
+
+These programmers most of the time will happily fix problems other people
+report. But nobody can force them to do, as they are contributing voluntarily.
+
+Then there are situations where such developers really want to fix an issue,
+but can't: sometimes they lack hardware programming documentation to do so.
+This often happens when the publicly available docs are superficial or the
+driver was written with the help of reverse engineering.
+
+Sooner or later spare time developers will also stop caring for the driver.
+Maybe their test hardware broke, got replaced by something more fancy, or is so
+old that it's something you don't find much outside of computer museums
+anymore. Sometimes developer stops caring for their code and Linux at all, as
+something different in their life became way more important. In some cases
+nobody is willing to take over the job as maintainer – and nobody can be forced
+to, as contributing to the Linux kernel is done on a voluntary basis. Abandoned
+drivers nevertheless remain in the kernel: they are still useful for people and
+removing would be a regression.
+
+The situation is not that different with developers that are paid for their
+work on the Linux kernel. Those contribute most changes these days. But their
+employers sooner or later also stop caring for their code or make its
+programmer focus on other things. Hardware vendors for example earn their money
+mainly by selling new hardware; quite a few of them hence are not investing
+much time and energy in maintaining a Linux kernel driver for something they
+stopped selling years ago. Enterprise Linux distributors often care for a
+longer time period, but in new versions often leave support for old and rare
+hardware aside to limit the scope. Often spare time contributors take over once
+a company orphans some code, but as mentioned above: sooner or later they will
+leave the code behind, too.
+
+Priorities are another reason why some issues are not fixed, as maintainers
+quite often are forced to set those, as time to work on Linux is limited.
+That's true for spare time or the time employers grant their developers to
+spend on maintenance work on the upstream kernel. Sometimes maintainers also
+get overwhelmed with reports, even if a driver is working nearly perfectly. To
+not get completely stuck, the programmer thus might have no other choice than
+to prioritize issue reports and reject some of them.
+
+But don't worry too much about all of this, a lot of drivers have active
+maintainers who are quite interested in fixing as many issues as possible.
+
+
+Closing words
+=============
+
+Compared with other Free/Libre & Open Source Software it's hard to report
+issues to the Linux kernel developers: the length and complexity of this
+document and the implications between the lines illustrate that. But that's how
+it is for now. The main author of this text hopes documenting the state of the
+art will lay some groundwork to improve the situation over time.
+
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text, but for copyright reasons please CC
+ linux-doc@vger.kernel.org and "sign-off" your contribution as
+ Documentation/process/submitting-patches.rst outlines in the section "Sign
+ your work - the Developer's Certificate of Origin".
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use "The Linux kernel developers" for author attribution and link
+ this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
new file mode 100644
index 000000000000..946518355a2c
--- /dev/null
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -0,0 +1,451 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+Reporting regressions
++++++++++++++++++++++
+
+"*We don't cause regressions*" is the first rule of Linux kernel development;
+Linux founder and lead developer Linus Torvalds established it himself and
+ensures it's obeyed.
+
+This document describes what the rule means for users and how the Linux kernel's
+development model ensures to address all reported regressions; aspects relevant
+for kernel developers are left to Documentation/process/handling-regressions.rst.
+
+
+The important bits (aka "TL;DR")
+================================
+
+#. It's a regression if something running fine with one Linux kernel works worse
+ or not at all with a newer version. Note, the newer kernel has to be compiled
+ using a similar configuration; the detailed explanations below describes this
+ and other fine print in more detail.
+
+#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
+ it already covers all aspects important for regressions and repeated
+ below for convenience. Two of them are important: start your report's subject
+ with "[REGRESSION]" and CC or forward it to `the regression mailing list
+ <https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
+
+#. Optional, but recommended: when sending or forwarding your report, make the
+ Linux kernel regression tracking bot "regzbot" track the issue by specifying
+ when the regression started like this::
+
+ #regzbot introduced: v5.13..v5.14-rc1
+
+
+All the details on Linux kernel regressions relevant for users
+==============================================================
+
+
+The important basics
+--------------------
+
+
+What is a "regression" and what is the "no regressions" rule?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's a regression if some application or practical use case running fine with
+one Linux kernel works worse or not at all with a newer version compiled using a
+similar configuration. The "no regressions" rule forbids this to take place; if
+it happens by accident, developers that caused it are expected to quickly fix
+the issue.
+
+It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
+5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
+It's also a regression if a perfectly working application suddenly shows erratic
+behavior with a newer kernel version; such issues can be caused by changes in
+procfs, sysfs, or one of the many other interfaces Linux provides to userland
+software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
+be built from a configuration similar to the one from 5.13. This can be achieved
+using ``make olddefconfig``, as explained in more detail below.
+
+Note the "practical use case" in the first sentence of this section: developers
+despite the "no regressions" rule are free to change any aspect of the kernel
+and even APIs or ABIs to userland, as long as no existing application or use
+case breaks.
+
+Also be aware the "no regressions" rule covers only interfaces the kernel
+provides to the userland. It thus does not apply to kernel-internal interfaces
+like the module API, which some externally developed drivers use to hook into
+the kernel.
+
+How do I report a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Just report the issue as outlined in
+Documentation/admin-guide/reporting-issues.rst, it already describes the
+important points. The following aspects outlined there are especially relevant
+for regressions:
+
+ * When checking for existing reports to join, also search the `archives of the
+ Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
+ `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+ * Start your report's subject with "[REGRESSION]".
+
+ * In your report, clearly mention the last kernel version that worked fine and
+ the first broken one. Ideally try to find the exact change causing the
+ regression using a bisection, as explained below in more detail.
+
+ * Remember to let the Linux regressions mailing list
+ (regressions@lists.linux.dev) know about your report:
+
+ * If you report the regression by mail, CC the regressions list.
+
+ * If you report your regression to some bug tracker, forward the submitted
+ report by mail to the regressions list while CCing the maintainer and the
+ mailing list for the subsystem in question.
+
+ If it's a regression within a stable or longterm series (e.g.
+ v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
+ <https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
+
+ In case you performed a successful bisection, add everyone to the CC the
+ culprit's commit message mentions in lines starting with "Signed-off-by:".
+
+When CCing for forwarding your report to the list, consider directly telling the
+aforementioned Linux kernel regression tracking bot about your report. To do
+that, include a paragraph like this in your mail::
+
+ #regzbot introduced: v5.13..v5.14-rc1
+
+Regzbot will then consider your mail a report for a regression introduced in the
+specified version range. In above case Linux v5.13 still worked fine and Linux
+v5.14-rc1 was the first version where you encountered the issue. If you
+performed a bisection to find the commit that caused the regression, specify the
+culprit's commit-id instead::
+
+ #regzbot introduced: 1f2e3d4c5d
+
+Placing such a "regzbot command" is in your interest, as it will ensure the
+report won't fall through the cracks unnoticed. If you omit this, the Linux
+kernel's regressions tracker will take care of telling regzbot about your
+regression, as long as you send a copy to the regressions mailing lists. But the
+regression tracker is just one human which sometimes has to rest or occasionally
+might even enjoy some time away from computers (as crazy as that might sound).
+Relying on this person thus will result in an unnecessary delay before the
+regressions becomes mentioned `on the list of tracked and unresolved Linux
+kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
+weekly regression reports sent by regzbot. Such delays can result in Linus
+Torvalds being unaware of important regressions when deciding between "continue
+development or call this finished and release the final?".
+
+Are really all regressions fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Nearly all of them are, as long as the change causing the regression (the
+"culprit commit") is reliably identified. Some regressions can be fixed without
+this, but often it's required.
+
+Who needs to find the root cause of a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers of the affected code area should try to locate the culprit on their
+own. But for them that's often impossible to do with reasonable effort, as quite
+a lot of issues only occur in a particular environment outside the developer's
+reach -- for example, a specific hardware platform, firmware, Linux distro,
+system's configuration, or application. That's why in the end it's often up to
+the reporter to locate the culprit commit; sometimes users might even need to
+run additional tests afterwards to pinpoint the exact root cause. Developers
+should offer advice and reasonably help where they can, to make this process
+relatively easy and achievable for typical users.
+
+How can I find the culprit?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Perform a bisection, as roughly outlined in
+Documentation/admin-guide/reporting-issues.rst and described in more detail by
+Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
+in many cases finds the culprit relatively quickly. If it's hard or
+time-consuming to reliably reproduce the issue, consider teaming up with other
+affected users to narrow down the search range together.
+
+Who can I ask for advice when it comes to regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
+CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+Additional details about regressions
+------------------------------------
+
+
+What is the goal of the "no regressions" rule?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Users should feel safe when updating kernel versions and not have to worry
+something might break. This is in the interest of the kernel developers to make
+updating attractive: they don't want users to stay on stable or longterm Linux
+series that are either abandoned or more than one and a half years old. That's
+in everybody's interest, as `those series might have known bugs, security
+issues, or other problematic aspects already fixed in later versions
+<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
+Additionally, the kernel developers want to make it simple and appealing for
+users to test the latest pre-release or regular release. That's also in
+everybody's interest, as it's a lot easier to track down and fix problems, if
+they are reported shortly after being introduced.
+
+Is the "no regressions" rule really adhered in practice?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's taken really seriously, as can be seen by many mailing list posts from
+Linux creator and lead developer Linus Torvalds, some of which are quoted in
+Documentation/process/handling-regressions.rst.
+
+Exceptions to this rule are extremely rare; in the past developers almost always
+turned out to be wrong when they assumed a particular situation was warranting
+an exception.
+
+Who ensures the "no regressions" rule is actually followed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The subsystem maintainers should take care of that, which are watched and
+supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
+Greg Kroah-Hartman et al. for various stable/longterm series.
+
+All of them are helped by people trying to ensure no regression report falls
+through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
+the Linux kernel's "regressions tracker"; to facilitate this work he relies on
+regzbot, the Linux kernel regression tracking bot. That's why you want to bring
+your report on the radar of these people by CCing or forwarding each report to
+the regressions mailing list, ideally with a "regzbot command" in your mail to
+get it tracked immediately.
+
+How quickly are regressions normally fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+The answer thus depends on various factors like the impact of a regression, its
+age, or the Linux series in which it occurs. In the end though, most regressions
+should be fixed within two weeks.
+
+Is it a regression, if the issue can be avoided by updating some software?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Almost always: yes. If a developer tells you otherwise, ask the regression
+tracker for advice as outlined above.
+
+Is it a regression, if a newer kernel works slower or consumes more energy?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Yes, but the difference has to be significant. A five percent slow-down in a
+micro-benchmark thus is unlikely to qualify as regression, unless it also
+influences the results of a broad benchmark by more than one percent. If in
+doubt, ask for advice.
+
+Is it a regression, if an external kernel module breaks when updating Linux?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+No, as the "no regression" rule is about interfaces and services the Linux
+kernel provides to the userland. It thus does not cover building or running
+externally developed kernel modules, as they run in kernel-space and hook into
+the kernel using internal interfaces occasionally changed.
+
+How are regressions handled that are caused by security fixes?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In extremely rare situations security issues can't be fixed without causing
+regressions; those fixes are given way, as they are the lesser evil in the end.
+Luckily this middling almost always can be avoided, as key developers for the
+affected area and often Linus Torvalds himself try very hard to fix security
+issues without causing regressions.
+
+If you nevertheless face such a case, check the mailing list archives if people
+tried their best to avoid the regression. If not, report it; if in doubt, ask
+for advice as outlined above.
+
+What happens if fixing a regression is impossible without causing another?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sadly these things happen, but luckily not very often; if they occur, expert
+developers of the affected code area should look into the issue to find a fix
+that avoids regressions or at least their impact. If you run into such a
+situation, do what was outlined already for regressions caused by security
+fixes: check earlier discussions if people already tried their best and ask for
+advice if in doubt.
+
+A quick note while at it: these situations could be avoided, if people would
+regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
+development cycle a test run. This is best explained by imagining a change
+integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
+the same time is a hard requirement for some other improvement applied for
+5.15-rc1. All these changes often can simply be reverted and the regression thus
+solved, if someone finds and reports it before 5.15 is released. A few days or
+weeks later this solution can become impossible, as some software might have
+started to rely on aspects introduced by one of the follow-up changes: reverting
+all changes would then cause a regression for users of said software and thus is
+out of the question.
+
+Is it a regression, if some feature I relied on was removed months ago?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is, but often it's hard to fix such regressions due to the aspects outlined
+in the previous section. It hence needs to be dealt with on a case-by-case
+basis. This is another reason why it's in everybody's interest to regularly test
+mainline pre-releases.
+
+Does the "no regression" rule apply if I seem to be the only affected person?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It does, but only for practical usage: the Linux developers want to be free to
+remove support for hardware only to be found in attics and museums anymore.
+
+Note, sometimes regressions can't be avoided to make progress -- and the latter
+is needed to prevent Linux from stagnation. Hence, if only very few users seem
+to be affected by a regression, it for the greater good might be in their and
+everyone else's interest to lettings things pass. Especially if there is an
+easy way to circumvent the regression somehow, for example by updating some
+software or using a kernel parameter created just for this purpose.
+
+Does the regression rule apply for code in the staging tree as well?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not according to the `help text for the configuration option covering all
+staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
+which since its early days states::
+
+ Please note that these drivers are under heavy development, may or
+ may not work, and may contain userspace interfaces that most likely
+ will be changed in the near future.
+
+The staging developers nevertheless often adhere to the "no regressions" rule,
+but sometimes bend it to make progress. That's for example why some users had to
+deal with (often negligible) regressions when a WiFi driver from the staging
+tree was replaced by a totally different one written from scratch.
+
+Why do later versions have to be "compiled with a similar configuration"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because the Linux kernel developers sometimes integrate changes known to cause
+regressions, but make them optional and disable them in the kernel's default
+configuration. This trick allows progress, as the "no regressions" rule
+otherwise would lead to stagnation.
+
+Consider for example a new security feature blocking access to some kernel
+interfaces often abused by malware, which at the same time are required to run a
+few rarely used applications. The outlined approach makes both camps happy:
+people using these applications can leave the new security feature off, while
+everyone else can enable it without running into trouble.
+
+How to create a configuration similar to the one of an older kernel?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start your machine with a known-good kernel and configure the newer Linux
+version with ``make olddefconfig``. This makes the kernel's build scripts pick
+up the configuration file (the ".config" file) from the running kernel as base
+for the new one you are about to compile; afterwards they set all new
+configuration options to their default value, which should disable new features
+that might cause regressions.
+
+Can I report a regression I found with pre-compiled vanilla kernels?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You need to ensure the newer kernel was compiled with a similar configuration
+file as the older one (see above), as those that built them might have enabled
+some known-to-be incompatible feature for the newer kernel. If in doubt, report
+the matter to the kernel's provider and ask for advice.
+
+
+More about regression tracking with "regzbot"
+---------------------------------------------
+
+What is regression tracking and why should I care about it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for Linux kernel development as well. That's why Thorsten Leemhuis, the
+Linux Kernel's regression tracker, and some people try to ensure all regression
+are fixed by keeping an eye on them until they are resolved. Neither of them are
+paid for this, that's why the work is done on a best effort basis.
+
+Why and how are Linux kernel regressions tracked using a bot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tracking regressions completely manually has proven to be quite hard due to the
+distributed and loosely structured nature of Linux kernel development process.
+That's why the Linux kernel's regression tracker developed regzbot to facilitate
+the work, with the long term goal to automate regression tracking as much as
+possible for everyone involved.
+
+Regzbot works by watching for replies to reports of tracked regressions.
+Additionally, it's looking out for posted or committed patches referencing such
+reports with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+involve regzbot to track severe issues, like reports about hangs, corrupted
+data, or internal errors (Panic, Oops, BUG(), warning, ...).
+
+How to change aspects of a tracked regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+report. The easiest way to do that: find the report in your "Sent" folder or the
+mailing list archive and reply to it using your mailer's "Reply-all" function.
+In that mail, use one of the following commands in a stand-alone paragraph (IOW:
+use blank lines to separate one or multiple of these commands from the rest of
+the mail's text).
+
+ * Update when the regression started to happen, for example after performing a
+ bisection::
+
+ #regzbot introduced: 1f2e3d4c5d
+
+ * Set or update the title::
+
+ #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+ the issue or a fix are discussed:::
+
+ #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+ #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Point to a place with further details of interest, like a mailing list post
+ or a ticket in a bug tracker that are slightly related, but about a different
+ topic::
+
+ #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as invalid::
+
+ #regzbot invalid: wasn't a regression, problem has always existed
+
+Regzbot supports a few other commands primarily used by developers or people
+tracking regressions. They and more details about the aforementioned regzbot
+commands can be found in the `getting started guide
+<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
+the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+for regzbot.
+
+..
+ end-of-content
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use "The Linux kernel developers" for author attribution and link
+ this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/rtc.rst b/Documentation/admin-guide/rtc.rst
new file mode 100644
index 000000000000..688c95b11919
--- /dev/null
+++ b/Documentation/admin-guide/rtc.rst
@@ -0,0 +1,140 @@
+=======================================
+Real Time Clock (RTC) Drivers for Linux
+=======================================
+
+When Linux developers talk about a "Real Time Clock", they usually mean
+something that tracks wall clock time and is battery backed so that it
+works even with system power off. Such clocks will normally not track
+the local time zone or daylight savings time -- unless they dual boot
+with MS-Windows -- but will instead be set to Coordinated Universal Time
+(UTC, formerly "Greenwich Mean Time").
+
+The newest non-PC hardware tends to just count seconds, like the time(2)
+system call reports, but RTCs also very commonly represent time using
+the Gregorian calendar and 24 hour time, as reported by gmtime(3).
+
+Linux has two largely-compatible userspace RTC API families you may
+need to know about:
+
+ * /dev/rtc ... is the RTC provided by PC compatible systems,
+ so it's not very portable to non-x86 systems.
+
+ * /dev/rtc0, /dev/rtc1 ... are part of a framework that's
+ supported by a wide variety of RTC chips on all systems.
+
+Programmers need to understand that the PC/AT functionality is not
+always available, and some systems can do much more. That is, the
+RTCs use the same API to make requests in both RTC frameworks (using
+different filenames of course), but the hardware may not offer the
+same functionality. For example, not every RTC is hooked up to an
+IRQ, so they can't all issue alarms; and where standard PC RTCs can
+only issue an alarm up to 24 hours in the future, other hardware may
+be able to schedule one any time in the upcoming century.
+
+
+Old PC/AT-Compatible driver: /dev/rtc
+--------------------------------------
+
+All PCs (even Alpha machines) have a Real Time Clock built into them.
+Usually they are built into the chipset of the computer, but some may
+actually have a Motorola MC146818 (or clone) on the board. This is the
+clock that keeps the date and time while your computer is turned off.
+
+ACPI has standardized that MC146818 functionality, and extended it in
+a few ways (enabling longer alarm periods, and wake-from-hibernate).
+That functionality is NOT exposed in the old driver.
+
+However it can also be used to generate signals from a slow 2Hz to a
+relatively fast 8192Hz, in increments of powers of two. These signals
+are reported by interrupt number 8. (Oh! So *that* is what IRQ 8 is
+for...) It can also function as a 24hr alarm, raising IRQ 8 when the
+alarm goes off. The alarm can also be programmed to only check any
+subset of the three programmable values, meaning that it could be set to
+ring on the 30th second of the 30th minute of every hour, for example.
+The clock can also be set to generate an interrupt upon every clock
+update, thus generating a 1Hz signal.
+
+The interrupts are reported via /dev/rtc (major 10, minor 135, read only
+character device) in the form of an unsigned long. The low byte contains
+the type of interrupt (update-done, alarm-rang, or periodic) that was
+raised, and the remaining bytes contain the number of interrupts since
+the last read. Status information is reported through the pseudo-file
+/proc/driver/rtc if the /proc filesystem was enabled. The driver has
+built in locking so that only one process is allowed to have the /dev/rtc
+interface open at a time.
+
+A user process can monitor these interrupts by doing a read(2) or a
+select(2) on /dev/rtc -- either will block/stop the user process until
+the next interrupt is received. This is useful for things like
+reasonably high frequency data acquisition where one doesn't want to
+burn up 100% CPU by polling gettimeofday etc. etc.
+
+At high frequencies, or under high loads, the user process should check
+the number of interrupts received since the last read to determine if
+there has been any interrupt "pileup" so to speak. Just for reference, a
+typical 486-33 running a tight read loop on /dev/rtc will start to suffer
+occasional interrupt pileup (i.e. > 1 IRQ event since last read) for
+frequencies above 1024Hz. So you really should check the high bytes
+of the value you read, especially at frequencies above that of the
+normal timer interrupt, which is 100Hz.
+
+Programming and/or enabling interrupt frequencies greater than 64Hz is
+only allowed by root. This is perhaps a bit conservative, but we don't want
+an evil user generating lots of IRQs on a slow 386sx-16, where it might have
+a negative impact on performance. This 64Hz limit can be changed by writing
+a different value to /proc/sys/dev/rtc/max-user-freq. Note that the
+interrupt handler is only a few lines of code to minimize any possibility
+of this effect.
+
+Also, if the kernel time is synchronized with an external source, the
+kernel will write the time back to the CMOS clock every 11 minutes. In
+the process of doing this, the kernel briefly turns off RTC periodic
+interrupts, so be aware of this if you are doing serious work. If you
+don't synchronize the kernel time with an external source (via ntp or
+whatever) then the kernel will keep its hands off the RTC, allowing you
+exclusive access to the device for your applications.
+
+The alarm and/or interrupt frequency are programmed into the RTC via
+various ioctl(2) calls as listed in ./include/linux/rtc.h
+Rather than write 50 pages describing the ioctl() and so on, it is
+perhaps more useful to include a small test program that demonstrates
+how to use them, and demonstrates the features of the driver. This is
+probably a lot more useful to people interested in writing applications
+that will be using this driver. See the code at the end of this document.
+
+(The original /dev/rtc driver was written by Paul Gortmaker.)
+
+
+New portable "RTC Class" drivers: /dev/rtcN
+--------------------------------------------
+
+Because Linux supports many non-ACPI and non-PC platforms, some of which
+have more than one RTC style clock, it needed a more portable solution
+than expecting a single battery-backed MC146818 clone on every system.
+Accordingly, a new "RTC Class" framework has been defined. It offers
+three different userspace interfaces:
+
+ * /dev/rtcN ... much the same as the older /dev/rtc interface
+
+ * /sys/class/rtc/rtcN ... sysfs attributes support readonly
+ access to some RTC attributes.
+
+ * /proc/driver/rtc ... the system clock RTC may expose itself
+ using a procfs interface. If there is no RTC for the system clock,
+ rtc0 is used by default. More information is (currently) shown
+ here than through sysfs.
+
+The RTC Class framework supports a wide variety of RTCs, ranging from those
+integrated into embeddable system-on-chip (SOC) processors to discrete chips
+using I2C, SPI, or some other bus to communicate with the host CPU. There's
+even support for PC-style RTCs ... including the features exposed on newer PCs
+through ACPI.
+
+The new framework also removes the "one RTC per system" restriction. For
+example, maybe the low-power battery-backed RTC is a discrete I2C chip, but
+a high functionality RTC is integrated into the SOC. That system might read
+the system clock from the discrete RTC, but use the integrated one for all
+other tasks, because of its greater functionality.
+
+Check out tools/testing/selftests/rtc/rtctest.c for an example usage of the
+ioctl interface.
diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst
deleted file mode 100644
index 30187d49dc2c..000000000000
--- a/Documentation/admin-guide/security-bugs.rst
+++ /dev/null
@@ -1,89 +0,0 @@
-.. _securitybugs:
-
-Security bugs
-=============
-
-Linux kernel developers take security very seriously. As such, we'd
-like to know when a security bug is found so that it can be fixed and
-disclosed as quickly as possible. Please report security bugs to the
-Linux kernel security team.
-
-Contact
--------
-
-The Linux kernel security team can be contacted by email at
-<security@kernel.org>. This is a private list of security officers
-who will help verify the bug report and develop and release a fix.
-If you already have a fix, please include it with your report, as
-that can speed up the process considerably. It is possible that the
-security team will bring in extra help from area maintainers to
-understand and fix the security vulnerability.
-
-As it is with any bug, the more information provided the easier it
-will be to diagnose and fix. Please review the procedure outlined in
-admin-guide/reporting-bugs.rst if you are unclear about what
-information is helpful. Any exploit code is very helpful and will not
-be released without consent from the reporter unless it has already been
-made public.
-
-Disclosure and embargoed information
-------------------------------------
-
-The security list is not a disclosure channel. For that, see Coordination
-below.
-
-Once a robust fix has been developed, the release process starts. Fixes
-for publicly known bugs are released immediately.
-
-Although our preference is to release fixes for publicly undisclosed bugs
-as soon as they become available, this may be postponed at the request of
-the reporter or an affected party for up to 7 calendar days from the start
-of the release process, with an exceptional extension to 14 calendar days
-if it is agreed that the criticality of the bug requires more time. The
-only valid reason for deferring the publication of a fix is to accommodate
-the logistics of QA and large scale rollouts which require release
-coordination.
-
-Whilst embargoed information may be shared with trusted individuals in
-order to develop a fix, such information will not be published alongside
-the fix or on any other disclosure channel without the permission of the
-reporter. This includes but is not limited to the original bug report
-and followup discussions (if any), exploits, CVE information or the
-identity of the reporter.
-
-In other words our only interest is in getting bugs fixed. All other
-information submitted to the security list and any followup discussions
-of the report are treated confidentially even after the embargo has been
-lifted, in perpetuity.
-
-Coordination
-------------
-
-Fixes for sensitive bugs, such as those that might lead to privilege
-escalations, may need to be coordinated with the private
-<linux-distros@vs.openwall.org> mailing list so that distribution vendors
-are well prepared to issue a fixed kernel upon public disclosure of the
-upstream fix. Distros will need some time to test the proposed patch and
-will generally request at least a few days of embargo, and vendor update
-publication prefers to happen Tuesday through Thursday. When appropriate,
-the security team can assist with this coordination, or the reporter can
-include linux-distros from the start. In this case, remember to prefix
-the email Subject line with "[vs]" as described in the linux-distros wiki:
-<http://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists>
-
-CVE assignment
---------------
-
-The security team does not normally assign CVEs, nor do we require them
-for reports or fixes, as this can needlessly complicate the process and
-may delay the bug handling. If a reporter wishes to have a CVE identifier
-assigned ahead of public disclosure, they will need to contact the private
-linux-distros list, described above. When such a CVE identifier is known
-before a patch is provided, it is desirable to mention it in the commit
-message if the reporter agrees.
-
-Non-disclosure agreements
--------------------------
-
-The Linux kernel security team is not a formal body and therefore unable
-to enter any non-disclosure agreements.
diff --git a/Documentation/admin-guide/serial-console.rst b/Documentation/admin-guide/serial-console.rst
index a8d1e36b627a..1609e7479249 100644
--- a/Documentation/admin-guide/serial-console.rst
+++ b/Documentation/admin-guide/serial-console.rst
@@ -33,8 +33,11 @@ The format of this option is::
9600n8. The maximum baudrate is 115200.
You can specify multiple console= options on the kernel command line.
-Output will appear on all of them. The last device will be used when
-you open ``/dev/console``. So, for example::
+
+The behavior is well defined when each device type is mentioned only once.
+In this case, the output will appear on all requested consoles. And
+the last device will be used when you open ``/dev/console``.
+So, for example::
console=ttyS1,9600 console=tty0
@@ -42,19 +45,48 @@ defines that opening ``/dev/console`` will get you the current foreground
virtual console, and kernel messages will appear on both the VGA
console and the 2nd serial port (ttyS1 or COM2) at 9600 baud.
-Note that you can only define one console per device type (serial, video).
+The behavior is more complicated when the same device type is defined more
+times. In this case, there are the following two rules:
+
+1. The output will appear only on the first device of each defined type.
+
+2. ``/dev/console`` will be associated with the first registered device.
+ Where the registration order depends on how kernel initializes various
+ subsystems.
+
+ This rule is used also when the last console= parameter is not used
+ for other reasons. For example, because of a typo or because
+ the hardware is not available.
+
+The result might be surprising. For example, the following two command
+lines have the same result::
+
+ console=ttyS1,9600 console=tty0 console=tty1
+ console=tty0 console=ttyS1,9600 console=tty1
+
+The kernel messages are printed only on ``tty0`` and ``ttyS1``. And
+``/dev/console`` gets associated with ``tty0``. It is because kernel
+tries to register graphical consoles before serial ones. It does it
+because of the default behavior when no console device is specified,
+see below.
+
+Note that the last ``console=tty1`` parameter still makes a difference.
+The kernel command line is used also by systemd. It would use the last
+defined ``tty1`` as the login console.
If no console device is specified, the first device found capable of
acting as a system console will be used. At this time, the system
first looks for a VGA card and then for a serial port. So if you don't
have a VGA card in your system the first serial port will automatically
-become the console.
+become the console, unless the kernel is configured with the
+CONFIG_NULL_TTY_DEFAULT_CONSOLE option, then it will default to using the
+ttynull device.
You will need to create a new device to use ``/dev/console``. The official
``/dev/console`` is now character device 5,1.
(You can also use a network device as a console. See
-``Documentation/networking/netconsole.txt`` for information on that.)
+``Documentation/networking/netconsole.rst`` for information on that.)
Here's an example that will use ``/dev/ttyS1`` (COM2) as the console.
Replace the sample values as needed.
diff --git a/Documentation/admin-guide/spkguide.txt b/Documentation/admin-guide/spkguide.txt
new file mode 100644
index 000000000000..0d5965138f8f
--- /dev/null
+++ b/Documentation/admin-guide/spkguide.txt
@@ -0,0 +1,1625 @@
+
+The Speakup User's Guide
+For Speakup 3.1.2 and Later
+By Gene Collins
+Updated by others
+Last modified on Mon Sep 27 14:26:31 2010
+Document version 1.3
+
+Copyright (c) 2005 Gene Collins
+Copyright (c) 2008, 2023 Samuel Thibault
+Copyright (c) 2009, 2010 the Speakup Team
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no
+Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A
+copy of the license is included in the section entitled "GNU Free
+Documentation License".
+
+Preface
+
+The purpose of this document is to familiarize users with the user
+interface to Speakup, a Linux Screen Reader. If you need instructions
+for installing or obtaining Speakup, visit the web site at
+http://linux-speakup.org/. Speakup is a set of patches to the standard
+Linux kernel source tree. It can be built as a series of modules, or as
+a part of a monolithic kernel. These details are beyond the scope of
+this manual, but the user may need to be aware of the module
+capabilities, depending on how your system administrator has installed
+Speakup. If Speakup is built as a part of a monolithic kernel, and the
+user is using a hardware synthesizer, then Speakup will be able to
+provide speech access from the time the kernel is loaded, until the time
+the system is shutdown. This means that if you have obtained Linux
+installation media for a distribution which includes Speakup as a part
+of its kernel, you will be able, as a blind person, to install Linux
+with speech access unaided by a sighted person. Again, these details
+are beyond the scope of this manual, but the user should be aware of
+them. See the web site mentioned above for further details.
+
+1. Starting Speakup
+
+If your system administrator has installed Speakup to work with your
+specific synthesizer by default, then all you need to do to use Speakup
+is to boot your system, and Speakup should come up talking. This
+assumes of course that your synthesizer is a supported hardware
+synthesizer, and that it is either installed in or connected to your
+system, and is if necessary powered on.
+
+It is possible, however, that Speakup may have been compiled into the
+kernel with no default synthesizer. It is even possible that your
+kernel has been compiled with support for some of the supported
+synthesizers and not others. If you find that this is the case, and
+your synthesizer is supported but not available, complain to the person
+who compiled and installed your kernel. Or better yet, go to the web
+site, and learn how to patch Speakup into your own kernel source, and
+build and install your own kernel.
+
+If your kernel has been compiled with Speakup, and has no default
+synthesizer set, or you would like to use a different synthesizer than
+the default one, then you may issue the following command at the boot
+prompt of your boot loader.
+
+linux speakup.synth=ltlk
+
+This command would tell Speakup to look for and use a LiteTalk or
+DoubleTalk LT at boot up. You may replace the ltlk synthesizer keyword
+with the keyword for whatever synthesizer you wish to use. The
+speakup.synth parameter will accept the following keywords, provided
+that support for the related synthesizers has been built into the
+kernel.
+
+acntsa -- Accent SA
+acntpc -- Accent PC
+apollo -- Apollo
+audptr -- Audapter
+bns -- Braille 'n Speak
+dectlk -- DecTalk Express (old and new, db9 serial only)
+decext -- DecTalk (old) External
+dtlk -- DoubleTalk PC
+keypc -- Keynote Gold PC
+ltlk -- DoubleTalk LT, LiteTalk, or external Tripletalk (db9 serial only)
+spkout -- Speak Out
+txprt -- Transport
+dummy -- Plain text terminal
+
+Note: Speakup does * NOT * support the internal Tripletalk!
+
+Speakup does support two other synthesizers, but because they work in
+conjunction with other software, they must be loaded as modules after
+their related software is loaded, and so are not available at boot up.
+These are as follows:
+
+decpc -- DecTalk PC (not available at boot up)
+soft -- One of several software synthesizers (not available at boot up)
+
+By default speakup looks for the synthesizer on the ttyS0 serial port. This can
+be changed with the device parameter of the modules, for instance for
+DoubleTalk LT:
+
+speakup_ltlk.dev=ttyUSB0
+
+See the sections on loading modules and software synthesizers later in
+this manual for further details. It should be noted here that the
+speakup.synth boot parameter will have no effect if Speakup has been
+compiled as modules. In order for Speakup modules to be loaded during
+the boot process, such action must be configured by your system
+administrator. This will mean that you will hear some, but not all, of
+the bootup messages.
+
+2. Basic operation
+
+Once you have booted the system, and if necessary, have supplied the
+proper bootup parameter for your synthesizer, Speakup will begin
+talking as soon as the kernel is loaded. In fact, it will talk a lot!
+It will speak all the boot up messages that the kernel prints on the
+screen during the boot process. This is because Speakup is not a
+separate screen reader, but is actually built into the operating
+system. Since almost all console applications must print text on the
+screen using the kernel, and must get their keyboard input through the
+kernel, they are automatically handled properly by Speakup. There are a
+few exceptions, but we'll come to those later.
+
+Note: In this guide I will refer to the numeric keypad as the keypad.
+This is done because the speakupmap.map file referred to later in this
+manual uses the term keypad instead of numeric keypad. Also I'm lazy
+and would rather only type one word. So keypad it is. Got it? Good.
+
+Most of the Speakup review keys are located on the keypad at the far
+right of the keyboard. The numlock key should be off, in order for these
+to work. If you toggle the numlock on, the keypad will produce numbers,
+which is exactly what you want for spreadsheets and such. For the
+purposes of this guide, you should have the numlock turned off, which is
+its default state at bootup.
+
+You probably won't want to listen to all the bootup messages every time
+you start your system, though it's a good idea to listen to them at
+least once, just so you'll know what kind of information is available to
+you during the boot process. You can always review these messages after
+bootup with the command:
+
+dmesg | more
+
+In order to speed the boot process, and to silence the speaking of the
+bootup messages, just press the keypad enter key. This key is located
+in the bottom right corner of the keypad. Speakup will shut up and stay
+that way, until you press another key.
+
+You can check to see if the boot process has completed by pressing the 8
+key on the keypad, which reads the current line. This also has the
+effect of starting Speakup talking again, so you can press keypad enter
+to silence it again if the boot process has not completed.
+
+When the boot process is complete, you will arrive at a "login" prompt.
+At this point, you'll need to type in your user id and password, as
+provided by your system administrator. You will hear Speakup speak the
+letters of your user id as you type it, but not the password. This is
+because the password is not displayed on the screen for security
+reasons. This has nothing to do with Speakup, it's a Linux security
+feature.
+
+Once you've logged in, you can run any Linux command or program which is
+allowed by your user id. Normal users will not be able to run programs
+which require root privileges.
+
+When you are running a program or command, Speakup will automatically
+speak new text as it arrives on the screen. You can at any time silence
+the speech with keypad enter, or use any of the Speakup review keys.
+
+Here are some basic Speakup review keys, and a short description of what
+they do.
+
+keypad 1 -- read previous character
+keypad 2 -- read current character (pressing keypad 2 twice rapidly will speak
+ the current character phonetically)
+keypad 3 -- read next character
+keypad 4 -- read previous word
+keypad 5 -- read current word (press twice rapidly to spell the current word)
+keypad 6 -- read next word
+keypad 7 -- read previous line
+keypad 8 -- read current line (press twice rapidly to hear how much the
+ text on the current line is indented)
+keypad 9 -- read next line
+keypad period -- speak current cursor position and announce current
+ virtual console
+
+It's also worth noting that the insert key on the keypad is mapped
+as the speakup key. Instead of pressing and releasing this key, as you
+do under DOS or Windows, you hold it like a shift key, and press other
+keys in combination with it. For example, repeatedly holding keypad
+insert, from now on called speakup, and keypad enter will toggle the
+speaking of new text on the screen on and off. This is not the same as
+just pressing keypad enter by itself, which just silences the speech
+until you hit another key. When you hit speakup plus keypad enter,
+Speakup will say, "You turned me off.", or "Hey, that's better." When
+Speakup is turned off, no new text on the screen will be spoken. You
+can still use the reading controls to review the screen however.
+
+3. Using the Speakup Help System
+
+In order to enter the Speakup help system, press and hold the speakup
+key (remember that this is the keypad insert key), and press the f1 key.
+You will hear the message:
+
+"Press space to leave help, cursor up or down to scroll, or a letter to
+go to commands in list."
+
+When you press the spacebar to leave the help system, you will hear:
+
+"Leaving help."
+
+While you are in the Speakup help system, you can scroll up or down
+through the list of available commands using the cursor keys. The list
+of commands is arranged in alphabetical order. If you wish to jump to
+commands in a specific part of the alphabet, you may press the letter of
+the alphabet you wish to jump to.
+
+You can also just explore by typing keyboard keys. Pressing keys will
+cause Speakup to speak the command associated with that key. For
+example, if you press the keypad 8 key, you will hear:
+
+"Keypad 8 is line, say current."
+
+You'll notice that some commands do not have keys assigned to them.
+This is because they are very infrequently used commands, and are also
+accessible through the sys system. We'll discuss the sys system later
+in this manual.
+
+You'll also notice that some commands have two keys assigned to them.
+This is because Speakup has a built in set of alternative key bindings
+for laptop users. The alternate speakup key is the caps lock key. You
+can press and hold the caps lock key, while pressing an alternate
+speakup command key to activate the command. On most laptops, the
+numeric keypad is defined as the keys in the j k l area of the keyboard.
+
+There is usually a function key which turns this keypad function on and
+off, and some other key which controls the numlock state. Toggling the
+keypad functionality on and off can become a royal pain. So, Speakup
+gives you a simple way to get at an alternative set of key mappings for
+your laptop. These are also available by default on desktop systems,
+because Speakup does not know whether it is running on a desktop or
+laptop. So you may choose which set of Speakup keys to use. Some
+system administrators may have chosen to compile Speakup for a desktop
+system without this set of alternate key bindings, but these details are
+beyond the scope of this manual. To use the caps lock for its normal
+purpose, hold the shift key while toggling the caps lock on and off. We
+should note here, that holding the caps lock key and pressing the z key
+will toggle the alternate j k l keypad on and off.
+
+4. Keys and Their Assigned Commands
+
+In this section, we'll go through a list of all the speakup keys and
+commands. You can also get a list of commands and assigned keys from
+the help system.
+
+The following list was taken from the speakupmap.map file. Key
+assignments are on the left of the equal sign, and the associated
+Speakup commands are on the right. The designation "spk" means to press
+and hold the speakup key, a.k.a. keypad insert, a.k.a. caps lock, while
+pressing the other specified key.
+
+spk key_f9 = punc_level_dec
+spk key_f10 = punc_level_inc
+spk key_f11 = reading_punc_dec
+spk key_f12 = reading_punc_inc
+spk key_1 = vol_dec
+spk key_2 = vol_inc
+spk key_3 = pitch_dec
+spk key_4 = pitch_inc
+spk key_5 = rate_dec
+spk key_6 = rate_inc
+key_kpasterisk = toggle_cursoring
+spk key_kpasterisk = speakup_goto
+spk key_f1 = speakup_help
+spk key_f2 = set_win
+spk key_f3 = clear_win
+spk key_f4 = enable_win
+spk key_f5 = edit_some
+spk key_f6 = edit_most
+spk key_f7 = edit_delim
+spk key_f8 = edit_repeat
+shift spk key_f9 = edit_exnum
+ key_kp7 = say_prev_line
+spk key_kp7 = left_edge
+ key_kp8 = say_line
+double key_kp8 = say_line_indent
+spk key_kp8 = say_from_top
+ key_kp9 = say_next_line
+spk key_kp9 = top_edge
+ key_kpminus = speakup_parked
+spk key_kpminus = say_char_num
+ key_kp4 = say_prev_word
+spk key_kp4 = say_from_left
+ key_kp5 = say_word
+double key_kp5 = spell_word
+spk key_kp5 = spell_phonetic
+ key_kp6 = say_next_word
+spk key_kp6 = say_to_right
+ key_kpplus = say_screen
+spk key_kpplus = say_win
+ key_kp1 = say_prev_char
+spk key_kp1 = right_edge
+ key_kp2 = say_char
+spk key_kp2 = say_to_bottom
+double key_kp2 = say_phonetic_char
+ key_kp3 = say_next_char
+spk key_kp3 = bottom_edge
+ key_kp0 = spk_key
+ key_kpdot = say_position
+spk key_kpdot = say_attributes
+key_kpenter = speakup_quiet
+spk key_kpenter = speakup_off
+key_sysrq = speech_kill
+ key_kpslash = speakup_cut
+spk key_kpslash = speakup_paste
+spk key_pageup = say_first_char
+spk key_pagedown = say_last_char
+key_capslock = spk_key
+ spk key_z = spk_lock
+key_leftmeta = spk_key
+ctrl spk key_0 = speakup_goto
+spk key_u = say_prev_line
+spk key_i = say_line
+double spk key_i = say_line_indent
+spk key_o = say_next_line
+spk key_minus = speakup_parked
+shift spk key_minus = say_char_num
+spk key_j = say_prev_word
+spk key_k = say_word
+double spk key_k = spell_word
+spk key_l = say_next_word
+spk key_m = say_prev_char
+spk key_comma = say_char
+double spk key_comma = say_phonetic_char
+spk key_dot = say_next_char
+spk key_n = say_position
+ ctrl spk key_m = left_edge
+ ctrl spk key_y = top_edge
+ ctrl spk key_dot = right_edge
+ctrl spk key_p = bottom_edge
+spk key_apostrophe = say_screen
+spk key_h = say_from_left
+spk key_y = say_from_top
+spk key_semicolon = say_to_right
+spk key_p = say_to_bottom
+spk key_slash = say_attributes
+ spk key_enter = speakup_quiet
+ ctrl spk key_enter = speakup_off
+ spk key_9 = speakup_cut
+spk key_8 = speakup_paste
+shift spk key_m = say_first_char
+ ctrl spk key_semicolon = say_last_char
+spk key_r = read_all_doc
+
+5. The Speakup Sys System
+
+The Speakup screen reader also creates a speakup subdirectory as a part
+of the sys system.
+
+As a convenience, run as root
+
+ln -s /sys/accessibility/speakup /speakup
+
+to directly access speakup parameters from /speakup.
+You can see these entries by typing the command:
+
+ls -1 /speakup/*
+
+If you issue the above ls command, you will get back something like
+this:
+
+/speakup/attrib_bleep
+/speakup/bell_pos
+/speakup/bleep_time
+/speakup/bleeps
+/speakup/cursor_time
+/speakup/delimiters
+/speakup/ex_num
+/speakup/key_echo
+/speakup/keymap
+/speakup/no_interrupt
+/speakup/punc_all
+/speakup/punc_level
+/speakup/punc_most
+/speakup/punc_some
+/speakup/reading_punc
+/speakup/repeats
+/speakup/say_control
+/speakup/say_word_ctl
+/speakup/silent
+/speakup/spell_delay
+/speakup/synth
+/speakup/synth_direct
+/speakup/version
+
+/speakup/i18n:
+announcements
+characters
+chartab
+colors
+ctl_keys
+formatted
+function_names
+key_names
+states
+
+/speakup/soft:
+caps_start
+caps_stop
+delay_time
+direct
+freq
+full_time
+jiffy_delta
+pitch
+inflection
+punct
+rate
+tone
+trigger_time
+voice
+vol
+
+Notice the two subdirectories of /speakup: /speakup/i18n and
+/speakup/soft.
+The i18n subdirectory is described in a later section.
+The files under /speakup/soft represent settings that are specific to the
+driver for the software synthesizer. If you use the LiteTalk, your
+synthesizer-specific settings would be found in /speakup/ltlk. In other words,
+a subdirectory named /speakup/KWD is created to hold parameters specific
+to the device whose keyword is KWD.
+These parameters include volume, rate, pitch, and others.
+
+In addition to using the Speakup hot keys to change such things as
+volume, pitch, and rate, you can also echo values to the appropriate
+entry in the /speakup directory. This is very useful, since it
+lets you control Speakup parameters from within a script. How you
+would write such scripts is somewhat beyond the scope of this manual,
+but I will include a couple of simple examples here to give you a
+general idea of what such scripts can do.
+
+Suppose for example, that you wanted to control both the punctuation
+level and the reading punctuation level at the same time. For
+simplicity, we'll call them punc0, punc1, punc2, and punc3. The scripts
+might look something like this:
+
+#!/bin/bash
+# punc0
+# set punc and reading punc levels to 0
+echo 0 >/speakup/punc_level
+echo 0 >/speakup/reading_punc
+echo Punctuation level set to 0.
+
+#!/bin/bash
+# punc1
+# set punc and reading punc levels to 1
+echo 1 >/speakup/punc_level
+echo 1 >/speakup/reading_punc
+echo Punctuation level set to 1.
+
+#!/bin/bash
+# punc2
+# set punc and reading punc levels to 2
+echo 2 >/speakup/punc_level
+echo 2 >/speakup/reading_punc
+echo Punctuation level set to 2.
+
+#!/bin/bash
+# punc3
+# set punc and reading punc levels to 3
+echo 3 >/speakup/punc_level
+echo 3 >/speakup/reading_punc
+echo Punctuation level set to 3.
+
+If you were to store these four small scripts in a directory in your
+path, perhaps /usr/local/bin, and set the permissions to 755 with the
+chmod command, then you could change the default reading punc and
+punctuation levels at the same time by issuing just one command. For
+example, if you were to execute the punc3 command at your shell prompt,
+then the reading punc and punc level would both get set to 3.
+
+I should note that the above scripts were written to work with bash, but
+regardless of which shell you use, you should be able to do something
+similar.
+
+The Speakup sys system also has another interesting use. You can echo
+Speakup parameters into the sys system in a script during system
+startup, and speakup will return to your preferred parameters every time
+the system is rebooted.
+
+Most of the Speakup sys parameters can be manipulated by a regular user
+on the system. However, there are a few parameters that are dangerous
+enough that they should only be manipulated by the root user on your
+system. There are even some parameters that are read only, and cannot
+be written to at all. For example, the version entry in the Speakup
+sys system is read only. This is because there is no reason for a user
+to tamper with the version number which is reported by Speakup. Doing
+an ls -l on /speakup/version will return this:
+
+-r--r--r-- 1 root root 0 Mar 21 13:46 /speakup/version
+
+As you can see, the version entry in the Speakup sys system is read
+only, is owned by root, and belongs to the root group. Doing a cat of
+/speakup/version will display the Speakup version number, like
+this:
+
+cat /speakup/version
+Speakup v-2.00 CVS: Thu Oct 21 10:38:21 EDT 2004
+synth dtlk version 1.1
+
+The display shows the Speakup version number, along with the version
+number of the driver for the current synthesizer.
+
+Looking at entries in the Speakup sys system can be useful in many
+ways. For example, you might wish to know what level your volume is set
+at. You could type:
+
+cat /speakup/KWD/vol
+# Replace KWD with the keyword for your synthesizer, E.G., ltlk for LiteTalk.
+5
+
+The number five which comes back is the level at which the synthesizer
+volume is set at.
+
+All the entries in the Speakup sys system are readable, some are
+writable by root only, and some are writable by everyone. Unless you
+know what you are doing, you should probably leave the ones that are
+writable by root only alone. Most of the names are self explanatory.
+Vol for controlling volume, pitch for pitch, inflection for pitch range, rate
+for controlling speaking rate, etc. If you find one you aren't sure about, you
+can post a query on the Speakup list.
+
+6. Changing Synthesizers
+
+It is possible to change to a different synthesizer while speakup is
+running. In other words, it is not necessary to reboot the system
+in order to use a different synthesizer. You can simply echo the
+synthesizer keyword to the /speakup/synth sys entry.
+Depending on your situation, you may wish to echo none to the synth
+sys entry, to disable speech while one synthesizer is disconnected and
+a second one is connected in its place. Then echo the keyword for the
+new synthesizer into the synth sys entry in order to start speech
+with the newly connected synthesizer. See the list of synthesizer
+keywords in section 1 to find the keyword which matches your synth.
+
+7. Loading modules
+
+As mentioned earlier, Speakup can either be completely compiled into the
+kernel, with the exception of the help module, or it can be compiled as
+a series of modules. When compiled as modules, Speakup will only be
+able to speak some of the bootup messages if your system administrator
+has configured the system to load the modules at boot time. The modules
+can be loaded after the file systems have been checked and mounted, or
+from an initrd. There is a third possibility. Speakup can be compiled
+with some components built into the kernel, and others as modules. As
+we'll see in the next section, this is particularly useful when you are
+working with software synthesizers.
+
+If Speakup is completely compiled as modules, then you must use the
+modprobe command to load Speakup. You do this by loading the module for
+the synthesizer driver you wish to use. The driver modules are all
+named speakup_<keyword>, where <keyword> is the keyword for the
+synthesizer you want. So, in order to load the driver for the DecTalk
+Express, you would type the following command:
+
+modprobe speakup_dectlk
+
+Issuing this command would load the DecTalk Express driver and all other
+related Speakup modules necessary to get Speakup up and running.
+
+To completely unload Speakup, again presuming that it is entirely built
+as modules, you would give the command:
+
+modprobe -r speakup_dectlk
+
+The above command assumes you were running a DecTalk Express. If you
+were using a different synth, then you would substitute its keyword in
+place of dectlk.
+
+If you have multiple drivers loaded, you need to unload all of them, in
+order to completely unload Speakup.
+For example, if you have loaded both the dectlk and ltlk drivers, use the
+command:
+modprobe -r speakup_dectlk speakup_ltlk
+
+You cannot unload the driver for software synthesizers when a user-space
+daemon is using /dev/softsynth. First, kill the daemon. Next, remove
+the driver with the command:
+modprobe -r speakup_soft
+
+Now, suppose we have a situation where the main Speakup component
+is built into the kernel, and some or all of the drivers are built as
+modules. Since the main part of Speakup is compiled into the kernel, a
+partial Speakup sys system has been created which we can take advantage
+of by simply echoing the synthesizer keyword into the
+/speakup/synth sys entry. This will cause the kernel to
+automatically load the appropriate driver module, and start Speakup
+talking. To switch to another synth, just echo a new keyword to the
+synth sys entry. For example, to load the DoubleTalk LT driver,
+you would type:
+
+echo ltlk >/speakup/synth
+
+You can use the modprobe -r command to unload driver modules, regardless
+of whether the main part of Speakup has been built into the kernel or
+not.
+
+8. Using Software Synthesizers
+
+Using a software synthesizer requires that some other software be
+installed and running on your system. For this reason, software
+synthesizers are not available for use at bootup, or during a system
+installation process.
+There are two freely-available solutions for software speech: Espeakup and
+Speech Dispatcher.
+These are described in subsections 8.1 and 8.2, respectively.
+
+During the rest of this section, we assume that speakup_soft is either
+built in to your kernel, or loaded as a module.
+
+If your system does not have udev installed , before you can use a
+software synthesizer, you must have created the /dev/softsynth device.
+If you have not already done so, issue the following commands as root:
+
+cd /dev
+mknod softsynth c 10 26
+
+While we are at it, we might just as well create the /dev/synth device,
+which can be used to let user space programs send information to your
+synthesizer. To create /dev/synth, change to the /dev directory, and
+issue the following command as root:
+
+mknod synth c 10 25
+
+of both.
+
+8.1. Espeakup
+
+Espeakup is a connector between Speakup and the eSpeak software synthesizer.
+Espeakup may already be available as a package for your distribution
+of Linux. If it is not packaged, you need to install it manually.
+You can find it in the contrib/ subdirectory of the Speakup sources.
+The filename is espeakup-$VERSION.tar.bz2, where $VERSION
+depends on the current release of Espeakup. The Speakup 3.1.2 source
+ships with version 0.71 of Espeakup.
+The README file included with the Espeakup sources describes the process
+of manual installation.
+
+Assuming that Espeakup is installed, either by the user or by the distributor,
+follow these steps to use it.
+
+Tell Speakup to use the "soft driver:
+echo soft > /speakup/synth
+
+Finally, start the espeakup program. There are two ways to do it.
+Both require root privileges.
+
+If Espeakup was installed as a package for your Linux distribution,
+you probably have a distribution-specific script that controls the operation
+of the daemon. Look for a file named espeakup under /etc/init.d or
+/etc/rc.d. Execute the following command with root privileges:
+/etc/init.d/espeakup start
+Replace init.d with rc.d, if your distribution uses scripts located under
+/etc/rc.d.
+Your distribution will also have a procedure for starting daemons at
+boot-time, so it is possible to have software speech as soon as user-space
+daemons are started by the bootup scripts.
+These procedures are not described in this document.
+
+If you built Espeakup manually, the "make install" step placed the binary
+under /usr/bin.
+Run the following command as root:
+/usr/bin/espeakup
+Espeakup should start speaking.
+
+8.2. Speech Dispatcher
+
+For this option, you must have a package called
+Speech Dispatcher running on your system, and it must be configured to
+work with one of its supported software synthesizers.
+
+Two open source synthesizers you might use are Flite and Festival. You
+might also choose to purchase the Software DecTalk from Fonix Sales Inc.
+If you run a google search for Fonix, you'll find their web site.
+
+You can obtain a copy of Speech Dispatcher from free(b)soft at
+http://www.freebsoft.org/. Follow the installation instructions that
+come with Speech Dispatcher in order to install and configure Speech
+Dispatcher. You can check out the web site for your Linux distribution
+in order to get a copy of either Flite or Festival. Your Linux
+distribution may also have a precompiled Speech Dispatcher package.
+
+Once you've installed, configured, and tested Speech Dispatcher with your
+chosen software synthesizer, you still need one more piece of software
+in order to make things work. You need a package called speechd-up.
+You get it from the free(b)soft web site mentioned above. After you've
+compiled and installed speechd-up, you are almost ready to begin using
+your software synthesizer.
+
+Now you can begin using your software synthesizer. In order to do so,
+echo the soft keyword to the synth sys entry like this:
+
+echo soft >/speakup/synth
+
+Next run the speechd_up command like this:
+
+speechd_up &
+
+Your synth should now start talking, and you should be able to adjust
+the pitch, rate, etc.
+
+9. Using The DecTalk PC Card
+
+The DecTalk PC card is an ISA card that is inserted into one of the ISA
+slots in your computer. It requires that the DecTalk PC software be
+installed on your computer, and that the software be loaded onto the
+Dectalk PC card before it can be used.
+
+You can get the dec_pc.tgz file from the linux-speakup.org site. The
+dec_pc.tgz file is in the ~ftp/pub/linux/speakup directory.
+
+After you have downloaded the dec_pc.tgz file, untar it in your home
+directory, and read the Readme file in the newly created dec_pc
+directory.
+
+The easiest way to get the software working is to copy the entire dec_pc
+directory into /user/local/lib. To do this, su to root in your home
+directory, and issue the command:
+
+cp dec_pc /usr/local/lib
+
+You will need to copy the dtload command from the dec_pc directory to a
+directory in your path. Either /usr/bin or /usr/local/bin is a good
+choice.
+
+You can now run the dtload command in order to load the DecTalk PC
+software onto the card. After you have done this, echo the decpc
+keyword to the synth entry in the sys system like this:
+
+echo decpc >/speakup/synth
+
+Your DecTalk PC should start talking, and then you can adjust the pitch,
+rate, volume, voice, etc. The voice entry in the Speakup sys system
+will accept a number from 0 through 7 for the DecTalk PC synthesizer,
+which will give you access to some of the DecTalk voices.
+
+10. Using Cursor Tracking
+
+In Speakup version 2.0 and later, cursor tracking is turned on by
+default. This means that when you are using an editor, Speakup will
+automatically speak characters as you move left and right with the
+cursor keys, and lines as you move up and down with the cursor keys.
+This is the traditional sort of cursor tracking.
+Recent versions of Speakup provide two additional ways to control the
+text that is spoken when the cursor is moved:
+"highlight tracking" and "read window."
+They are described later in this section.
+Sometimes, these modes get in your way, so you can disable cursor tracking
+altogether.
+
+You may select among the various forms of cursor tracking using the keypad
+asterisk key.
+Each time you press this key, a new mode is selected, and Speakup speaks
+the name of the new mode. The names for the four possible states of cursor
+tracking are: "cursoring on", "highlight tracking", "read window",
+and "cursoring off." The keypad asterisk key moves through the list of
+modes in a circular fashion.
+
+If highlight tracking is enabled, Speakup tracks highlighted text,
+rather than the cursor itself. When you move the cursor with the arrow keys,
+Speakup speaks the currently highlighted information.
+This is useful when moving through various menus and dialog boxes.
+If cursor tracking isn't helping you while navigating a menu,
+try highlight tracking.
+
+With the "read window" variety of cursor tracking, you can limit the text
+that Speakup speaks by specifying a window of interest on the screen.
+See section 15 for a description of the process of defining windows.
+When you move the cursor via the arrow keys, Speakup only speaks
+the contents of the window. This is especially helpful when you are hearing
+superfluous speech. Consider the following example.
+
+Suppose that you are at a shell prompt. You use bash, and you want to
+explore your command history using the up and down arrow keys. If you
+have enabled cursor tracking, you will hear two pieces of information.
+Speakup speaks both your shell prompt and the current entry from the
+command history. You may not want to hear the prompt repeated
+each time you move, so you can silence it by specifying a window. Find
+the last line of text on the screen. Clear the current window by pressing
+the key combination speakup f3. Use the review cursor to find the first
+character that follows your shell prompt. Press speakup + f2 twice, to
+define a one-line window. The boundaries of the window are the
+character following the shell prompt and the end of the line. Now, cycle
+through the cursor tracking modes using keypad asterisk, until Speakup
+says "read window." Move through your history using your arrow keys.
+You will notice that Speakup no longer speaks the redundant prompt.
+
+Some folks like to turn cursor tracking off while they are using the
+lynx web browser. You definitely want to turn cursor tracking off when
+you are using the alsamixer application. Otherwise, you won't be able
+to hear your mixer settings while you are using the arrow keys.
+
+11. Cut and Paste
+
+One of Speakup's more useful features is the ability to cut and paste
+text on the screen. This means that you can capture information from a
+program, and paste that captured text into a different place in the
+program, or into an entirely different program, which may even be
+running on a different console.
+
+For example, in this manual, we have made references to several web
+sites. It would be nice if you could cut and paste these urls into your
+web browser. Speakup does this quite nicely. Suppose you wanted to
+past the following url into your browser:
+
+http://linux-speakup.org/
+
+Use the speakup review keys to position the reading cursor on the first
+character of the above url. When the reading cursor is in position,
+press the keypad slash key once. Speakup will say, "mark". Next,
+position the reading cursor on the rightmost character of the above
+url. Press the keypad slash key once again to actually cut the text
+from the screen. Speakup will say, "cut". Although we call this
+cutting, Speakup does not actually delete the cut text from the screen.
+It makes a copy of the text in a special buffer for later pasting.
+
+Now that you have the url cut from the screen, you can paste it into
+your browser, or even paste the url on a command line as an argument to
+your browser.
+
+Suppose you want to start lynx and go to the Speakup site.
+
+You can switch to a different console with the alt left and right
+arrows, or you can switch to a specific console by typing alt and a
+function key. These are not Speakup commands, just standard Linux
+console capabilities.
+
+Once you've changed to an appropriate console, and are at a shell prompt,
+type the word lynx, followed by a space. Now press and hold the speakup
+key, while you type the keypad slash character. The url will be pasted
+onto the command line, just as though you had typed it in. Press the
+enter key to execute the command.
+
+The paste buffer will continue to hold the cut information, until a new
+mark and cut operation is carried out. This means you can paste the cut
+information as many times as you like before doing another cut
+operation.
+
+You are not limited to cutting and pasting only one line on the screen.
+You can also cut and paste rectangular regions of the screen. Just
+position the reading cursor at the top left corner of the text to be
+cut, mark it with the keypad slash key, then position the reading cursor
+at the bottom right corner of the region to be cut, and cut it with the
+keypad slash key.
+
+12. Changing the Pronunciation of Characters
+
+Through the /speakup/i18n/characters sys entry, Speakup gives you the
+ability to change how Speakup pronounces a given character. You could,
+for example, change how some punctuation characters are spoken. You can
+even change how Speakup will pronounce certain letters.
+
+You may, for example, wish to change how Speakup pronounces the z
+character. The author of Speakup, Kirk Reiser, is Canadian, and thus
+believes that the z should be pronounced zed. If you are an American,
+you might wish to use the zee pronunciation instead of zed. You can
+change the pronunciation of both the upper and lower case z with the
+following two commands:
+
+echo 90 zee >/speakup/characters
+echo 122 zee >/speakup/characters
+
+Let's examine the parts of the two previous commands. They are issued
+at the shell prompt, and could be placed in a startup script.
+
+The word echo tells the shell that you want to have it display the
+string of characters that follow the word echo. If you were to just
+type:
+
+echo hello.
+
+You would get the word hello printed on your screen as soon as you
+pressed the enter key. In this case, we are echoing strings that we
+want to be redirected into the sys system.
+
+The numbers 90 and 122 in the above echo commands are the ascii numeric
+values for the upper and lower case z, the characters we wish to change.
+
+The string zee is the pronunciation that we want Speakup to use for the
+upper and lower case z.
+
+The > symbol redirects the output of the echo command to a file, just
+like in DOS, or at the Windows command prompt.
+
+And finally, /speakup/i18n/characters is the file entry in the sys system
+where we want the output to be directed. Speakup looks at the numeric
+value of the character we want to change, and inserts the pronunciation
+string into an internal table.
+
+You can look at the whole table with the following command:
+
+cat /speakup/i18n/characters
+
+Speakup will then print out the entire character pronunciation table. I
+won't display it here, but leave you to look at it at your convenience.
+
+13. Mapping Keys
+
+Speakup has the capability of allowing you to assign or "map" keys to
+internal Speakup commands. This section necessarily assumes you have a
+Linux kernel source tree installed, and that it has been patched and
+configured with Speakup. How you do this is beyond the scope of this
+manual. For this information, visit the Speakup web site at
+http://linux-speakup.org/. The reason you'll need the kernel source
+tree patched with Speakup is that the genmap utility you'll need for
+processing keymaps is in the
+/usr/src/linux-<version_number>/drivers/char/speakup directory. The
+<version_number> in the above directory path is the version number of
+the Linux source tree you are working with.
+
+So ok, you've gone off and gotten your kernel source tree, and patched
+and configured it. Now you can start manipulating keymaps.
+
+You can either use the
+/usr/src/linux-<version_number>/drivers/char/speakup/speakupmap.map file
+included with the Speakup source, or you can cut and paste the copy in
+section 4 into a separate file. If you use the one in the Speakup
+source tree, make sure you make a backup of it before you start making
+changes. You have been warned!
+
+Suppose that you want to swap the key assignments for the Speakup
+say_last_char and the Speakup say_first_char commands. The
+speakupmap.map lists the key mappings for these two commands as follows:
+
+spk key_pageup = say_first_char
+spk key_pagedown = say_last_char
+
+You can edit your copy of the speakupmap.map file and swap the command
+names on the right side of the = (equals) sign. You did make a backup,
+right? The new keymap lines would look like this:
+
+spk key_pageup = say_last_char
+spk key_pagedown = say_first_char
+
+After you edit your copy of the speakupmap.map file, save it under a new
+file name, perhaps newmap.map. Then exit your editor and return to the
+shell prompt.
+
+You are now ready to load your keymap with your swapped key assignments.
+ Assuming that you saved your new keymap as the file newmap.map, you
+would load your keymap into the sys system like this:
+
+/usr/src/linux-<version_number>/drivers/char/speakup/genmap newmap.map
+>/speakup/keymap
+
+Remember to substitute your kernel version number for the
+<version_number> in the above command. Also note that although the
+above command wrapped onto two lines in this document, you should type
+it all on one line.
+
+Your say first and say last characters should now be swapped. Pressing
+speakup pagedown should read you the first non-whitespace character on
+the line your reading cursor is in, and pressing speakup pageup should
+read you the last character on the line your reading cursor is in.
+
+You should note that these new mappings will only stay in effect until
+you reboot, or until you load another keymap.
+
+One final warning. If you try to load a partial map, you will quickly
+find that all the mappings you didn't include in your file got deleted
+from the working map. Be extremely careful, and always make a backup!
+You have been warned!
+
+14. Internationalizing Speakup
+
+Speakup indicates various conditions to the user by speaking messages.
+For instance, when you move to the left edge of the screen with the
+review keys, Speakup says, "left."
+Prior to version 3.1.0 of Speakup, all of these messages were in English,
+and they could not be changed. If you used a non-English synthesizer,
+you still heard English messages, such as "left" and "cursoring on."
+In version 3.1.0 or higher, one may load translations for the various
+messages via the /sys filesystem.
+
+The directory /speakup/i18n contains several collections of messages.
+Each group of messages is stored in its own file.
+The following section lists all of these files, along with a brief description
+of each.
+
+14.1. Files Under the i18n Subdirectory
+
+* announcements:
+This file contains various general announcements, most of which cannot
+be categorized. You will find messages such as "You killed Speakup",
+"I'm alive", "leaving help", "parked", "unparked", and others.
+You will also find the names of the screen edges and cursor tracking modes
+here.
+
+* characters:
+See section 12 for a description of this file.
+
+* chartab:
+See section 12. Unlike the rest of the files in the i18n subdirectory,
+this one does not contain messages to be spoken.
+
+* colors:
+When you use the "say attributes" function, Speakup says the name of the
+foreground and background colors. These names come from the i18n/colors
+file.
+
+* ctl_keys:
+Here, you will find names of control keys. These are used with Speakup's
+say_control feature.
+
+* formatted:
+This group of messages contains embedded formatting codes, to specify
+the type and width of displayed data. If you change these, you must
+preserve all of the formatting codes, and they must appear in the order
+used by the default messages.
+
+* function_names:
+Here, you will find a list of names for Speakup functions. These are used
+by the help system. For example, suppose that you have activated help mode,
+and you pressed keypad 3. Speakup says:
+"keypad 3 is character, say next."
+The message "character, say next" names a Speakup function, and it
+comes from this function_names file.
+
+* key_names:
+Again, key_names is used by Speakup's help system. In the previous
+example, Speakup said that you pressed "keypad 3."
+This name came from the key_names file.
+
+* states:
+This file contains names for key states.
+Again, these are part of the help system. For instance, if you had pressed
+speakup + keypad 3, you would hear:
+"speakup keypad 3 is go to bottom edge."
+The speakup key is depressed, so the name of the key state is speakup.
+This part of the message comes from the states collection.
+
+14.2. Changing language
+
+14.2.1. Loading Your Own Messages
+
+The files under the i18n subdirectory all follow the same format.
+They consist of lines, with one message per line.
+Each message is represented by a number, followed by the text of the message.
+The number is the position of the message in the given collection.
+For example, if you view the file /speakup/i18n/colors, you will see the
+following list:
+
+0 black
+1 blue
+2 green
+3 cyan
+4 red
+5 magenta
+6 yellow
+7 white
+8 grey
+
+You can change one message, or you can change a whole group.
+To load a whole collection of messages from a new source, simply use
+the cp command:
+cp ~/my_colors /speakup/i18n/colors
+You can change an individual message with the echo command,
+as shown in the following example.
+
+The Spanish name for the color blue is azul.
+Looking at the colors file, we see that the name "blue" is at position 1
+within the colors group. Let's change blue to azul:
+echo '1 azul' > /speakup/i18n/colors
+The next time that Speakup says message 1 from the colors group, it will
+say "azul", rather than "blue."
+
+14.2.2. Choose a language
+
+In the future, translations into various languages will be made available,
+and most users will just load the files necessary for their language. So far,
+only French language is available beyond native Canadian English language.
+
+French is only available after you are logged in.
+
+Canadian English is the default language. To toggle another language,
+download the source of Speakup and untar it in your home directory. The
+following command should let you do this:
+
+tar xvjf speakup-<version>.tar.bz2
+
+where <version> is the version number of the application.
+
+Next, change to the newly created directory, then into the tools/ directory, and
+run the script speakup_setlocale. You are asked the language that you want to
+use. Type the number associated to your language (e.g. fr for French) then press
+Enter. Needed files are copied in the i18n directory.
+
+Note: the speakupconf must be installed on your system so that settings are saved.
+Otherwise, you will have an error: your language will be loaded but you will
+have to run the script again every time Speakup restarts.
+See section 16.1. for information about speakupconf.
+
+You will have to repeat these steps for any change of locale, i.e. if you wish
+change the speakup's language or charset (iso-8859-15 ou UTF-8).
+
+If you wish store the settings, note that at your next login, you will need to
+do:
+
+speakup load
+
+Alternatively, you can add the above line to your file
+~/.bashrc or ~/.bash_profile.
+
+If your system administrator himself ran the script, all the users will be able
+to change from English to the language chosen by root and do directly
+speakupconf load (or add this to the ~/.bashrc or
+~/.bash_profile file). If there are several languages to handle, the
+administrator (or every user) will have to run the first steps until speakupconf
+save, choosing the appropriate language, in every user's home directory. Every
+user will then be able to do speakupconf load, Speakup will load his own settings.
+
+14.3. No Support for Non-Western-European Languages
+
+As of the current release, Speakup only supports Western European languages.
+Support for the extended characters used by languages outside of the Western
+European family of languages is a work in progress.
+
+15. Using Speakup's Windowing Capability
+
+Speakup has the capability of defining and manipulating windows on the
+screen. Speakup uses the term "Window", to mean a user defined area of
+the screen. The key strokes for defining and manipulating Speakup
+windows are as follows:
+
+speakup + f2 -- Set the bounds of the window.
+Speakup + f3 -- clear the current window definition.
+speakup + f4 -- Toggle window silence on and off.
+speakup + keypad plus -- Say the currently defined window.
+
+These capabilities are useful for tracking a certain part of the screen
+without rereading the whole screen, or for silencing a part of the
+screen that is constantly changing, such as a clock or status line.
+
+There is no way to save these window settings, and you can only have one
+window defined for each virtual console. There is also no way to have
+windows automatically defined for specific applications.
+
+In order to define a window, use the review keys to move your reading
+cursor to the beginning of the area you want to define. Then press
+speakup + f2. Speakup will tell you that the window starts at the
+indicated row and column position. Then move the reading cursor to the
+end of the area to be defined as a window, and press speakup + f2 again.
+ If there is more than one line in the window, Speakup will tell you
+that the window ends at the indicated row and column position. If there
+is only one line in the window, then Speakup will tell you that the
+window is the specified line on the screen. If you are only defining a
+one line window, you can just press speakup + f2 twice after placing the
+reading cursor on the line you want to define as a window. It is not
+necessary to position the reading cursor at the end of the line in order
+to define the whole line as a window.
+
+16. Tools for Controlling Speakup
+
+The speakup distribution includes extra tools (in the tools directory)
+which were written to make speakup easier to use. This section will
+briefly describe the use of these tools.
+
+16.1. Speakupconf
+
+speakupconf began life as a contribution from Steve Holmes, a member of
+the speakup community. We would like to thank him for his work on the
+early versions of this project.
+
+This script may be installed as part of your linux distribution, but if
+it isn't, the recommended places to put it are /usr/local/bin or
+/usr/bin. This script can be run by any user, so it does not require
+root privileges.
+
+Speakupconf allows you to save and load your Speakup settings. It works
+by reading and writing the /sys files described above.
+
+The directory that speakupconf uses to store your settings depends on
+whether it is run from the root account. If you execute speakupconf as
+root, it uses the directory /etc/speakup. Otherwise, it uses the directory
+~/.speakup, where ~ is your home directory.
+Anyone who needs to use Speakup from your console can load his own custom
+settings with this script.
+
+speakupconf takes one required argument: load or save.
+Use the command
+speakupconf save
+to save your Speakup settings, and
+speakupconf load
+to load them into Speakup.
+A second argument may be specified to use an alternate directory to
+load or save the speakup parameters.
+
+16.2. Talkwith
+
+Charles Hallenbeck, another member of the speakup community, wrote the
+initial versions of this script, and we would also like to thank him for
+his work on it.
+
+This script needs root privileges to run, so if it is not installed as
+part of your linux distribution, the recommended places to install it
+are /usr/local/sbin or /usr/sbin.
+
+Talkwith allows you to switch synthesizers on the fly. It takes a synthesizer
+name as an argument. For instance,
+talkwith dectlk
+causes Speakup to use the DecTalk Express. If you wish to switch to a
+software synthesizer, you must also indicate which daemon you wish to
+use. There are two possible choices:
+spd and espeakup. spd is an abbreviation for speechd-up.
+If you wish to use espeakup for software synthesis, give the command
+talkwith soft espeakup
+To use speechd-up, type:
+talkwith soft spd
+Any arguments that follow the name of the daemon are passed to the daemon
+when it is invoked. For instance:
+talkwith espeakup --default-voice=fr
+causes espeakup to use the French voice.
+Note that talkwith must always be executed with root privileges.
+
+Talkwith does not attempt to load your settings after the new
+synthesizer is activated. You can use speakupconf to load your settings
+if desired.
+
+ GNU Free Documentation License
+ Version 1.2, November 2002
+
+
+ Copyright (C) 2000,2001,2002 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+0. PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+functional and useful document "free" in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it,
+with or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible
+for modifications made by others.
+
+This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+
+1. APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work, in any medium, that
+contains a notice placed by the copyright holder saying it can be
+distributed under the terms of this License. Such a notice grants a
+world-wide, royalty-free license, unlimited in duration, to use that
+work under the conditions stated herein. The "Document", below,
+refers to any such manual or work. Any member of the public is a
+licensee, and is addressed as "you". You accept the license if you
+copy, modify or distribute the work in a way requiring permission
+under copyright law.
+
+A "Modified Version" of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A "Secondary Section" is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject. (Thus, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The "Invariant Sections" are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License. If a
+section does not fit the above definition of Secondary then it is not
+allowed to be designated as Invariant. The Document may contain zero
+Invariant Sections. If the Document does not identify any Invariant
+Sections then there are none.
+
+The "Cover Texts" are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License. A Front-Cover Text may
+be at most 5 words, and a Back-Cover Text may be at most 25 words.
+
+A "Transparent" copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, that is suitable for revising the document
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup, or absence of markup, has been arranged to thwart
+or discourage subsequent modification by readers is not Transparent.
+An image format is not Transparent if used for any substantial amount
+of text. A copy that is not "Transparent" is called "Opaque".
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML, PostScript or PDF designed for human modification. Examples of
+transparent image formats include PNG, XCF and JPG. Opaque formats
+include proprietary formats that can be read and edited only by
+proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML, PostScript or PDF produced by some word
+processors for output purposes only.
+
+The "Title Page" means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, "Title Page" means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+A section "Entitled XYZ" means a named subunit of the Document whose
+title either is precisely XYZ or contains XYZ in parentheses following
+text that translates XYZ in another language. (Here XYZ stands for a
+specific section name mentioned below, such as "Acknowledgements",
+"Dedications", "Endorsements", or "History".) To "Preserve the Title"
+of such a section when you modify the Document means that it remains a
+section "Entitled XYZ" according to this definition.
+
+The Document may include Warranty Disclaimers next to the notice which
+states that this License applies to the Document. These Warranty
+Disclaimers are considered to be included by reference in this
+License, but only as regards disclaiming warranties: any other
+implication that these Warranty Disclaimers may have is void and has
+no effect on the meaning of this License.
+
+
+2. VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+
+3. COPYING IN QUANTITY
+
+If you publish printed copies (or copies in media that commonly have
+printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a computer-network location from which the general network-using
+public has access to download using public-standard network protocols
+a complete Transparent copy of the Document, free of added material.
+If you use the latter option, you must take reasonably prudent steps,
+when you begin distribution of Opaque copies in quantity, to ensure
+that this Transparent copy will remain thus accessible at the stated
+location until at least one year after the last time you distribute an
+Opaque copy (directly or through your agents or retailers) of that
+edition to the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+
+4. MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+ from that of the Document, and from those of previous versions
+ (which should, if there were any, be listed in the History section
+ of the Document). You may use the same title as a previous version
+ if the original publisher of that version gives permission.
+B. List on the Title Page, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the Modified
+ Version, together with at least five of the principal authors of the
+ Document (all of its principal authors, if it has fewer than five),
+ unless they release you from this requirement.
+C. State on the Title page the name of the publisher of the
+ Modified Version, as the publisher.
+D. Preserve all the copyright notices of the Document.
+E. Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.
+F. Include, immediately after the copyright notices, a license notice
+ giving the public permission to use the Modified Version under the
+ terms of this License, in the form shown in the Addendum below.
+G. Preserve in that license notice the full lists of Invariant Sections
+ and required Cover Texts given in the Document's license notice.
+H. Include an unaltered copy of this License.
+I. Preserve the section Entitled "History", Preserve its Title, and add
+ to it an item stating at least the title, year, new authors, and
+ publisher of the Modified Version as given on the Title Page. If
+ there is no section Entitled "History" in the Document, create one
+ stating the title, year, authors, and publisher of the Document as
+ given on its Title Page, then add an item describing the Modified
+ Version as stated in the previous sentence.
+J. Preserve the network location, if any, given in the Document for
+ public access to a Transparent copy of the Document, and likewise
+ the network locations given in the Document for previous versions
+ it was based on. These may be placed in the "History" section.
+ You may omit a network location for a work that was published at
+ least four years before the Document itself, or if the original
+ publisher of the version it refers to gives permission.
+K. For any section Entitled "Acknowledgements" or "Dedications",
+ Preserve the Title of the section, and preserve in the section all
+ the substance and tone of each of the contributor acknowledgements
+ and/or dedications given therein.
+L. Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers
+ or the equivalent are not considered part of the section titles.
+M. Delete any section Entitled "Endorsements". Such a section
+ may not be included in the Modified Version.
+N. Do not retitle any existing section to be Entitled "Endorsements"
+ or to conflict in title with any Invariant Section.
+O. Preserve any Warranty Disclaimers.
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section Entitled "Endorsements", provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+
+5. COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice, and that you preserve all their Warranty Disclaimers.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections Entitled "History"
+in the various original documents, forming one section Entitled
+"History"; likewise combine any sections Entitled "Acknowledgements",
+and any sections Entitled "Dedications". You must delete all sections
+Entitled "Endorsements".
+
+
+6. COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+
+7. AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, is called an "aggregate" if the copyright
+resulting from the compilation is not used to limit the legal rights
+of the compilation's users beyond what the individual works permit.
+When the Document is included in an aggregate, this License does not
+apply to the other works in the aggregate which are not themselves
+derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on
+covers that bracket the Document within the aggregate, or the
+electronic equivalent of covers if the Document is in electronic form.
+Otherwise they must appear on printed covers that bracket the whole
+aggregate.
+
+
+8. TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers. In case of a disagreement between
+the translation and the original version of this License or a notice
+or disclaimer, the original version will prevail.
+
+If a section in the Document is Entitled "Acknowledgements",
+"Dedications", or "History", the requirement (section 4) to Preserve
+its Title (section 1) will typically require changing the actual
+title.
+
+
+9. TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License. Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+
+10. FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+https://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License "or any later version" applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+
+ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+ Copyright (c) YEAR YOUR NAME.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.2
+ or any later version published by the Free Software Foundation;
+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+ A copy of the license is included in the section entitled "GNU
+ Free Documentation License".
+
+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
+replace the "with...Texts." line with this:
+
+ with the Invariant Sections being LIST THEIR TITLES, with the
+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+
+If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
+
+The End.
diff --git a/Documentation/admin-guide/svga.rst b/Documentation/admin-guide/svga.rst
new file mode 100644
index 000000000000..9eb1e0738e84
--- /dev/null
+++ b/Documentation/admin-guide/svga.rst
@@ -0,0 +1,250 @@
+.. include:: <isonum.txt>
+
+=================================
+Video Mode Selection Support 2.13
+=================================
+
+:Copyright: |copy| 1995--1999 Martin Mares, <mj@ucw.cz>
+
+Intro
+~~~~~
+
+This small document describes the "Video Mode Selection" feature which
+allows the use of various special video modes supported by the video BIOS. Due
+to usage of the BIOS, the selection is limited to boot time (before the
+kernel decompression starts) and works only on 80X86 machines that are
+booted through BIOS firmware (as opposed to through UEFI, kexec, etc.).
+
+.. note::
+
+ Short intro for the impatient: Just use vga=ask for the first time,
+ enter ``scan`` on the video mode prompt, pick the mode you want to use,
+ remember its mode ID (the four-digit hexadecimal number) and then
+ set the vga parameter to this number (converted to decimal first).
+
+The video mode to be used is selected by a kernel parameter which can be
+specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..."
+option of LILO (or some other boot loader you use) or by the "xrandr" utility
+(present in standard Linux utility packages). You can use the following values
+of this parameter::
+
+ NORMAL_VGA - Standard 80x25 mode available on all display adapters.
+
+ EXTENDED_VGA - Standard 8-pixel font mode: 80x43 on EGA, 80x50 on VGA.
+
+ ASK_VGA - Display a video mode menu upon startup (see below).
+
+ 0..35 - Menu item number (when you have used the menu to view the list of
+ modes available on your adapter, you can specify the menu item you want
+ to use). 0..9 correspond to "0".."9", 10..35 to "a".."z". Warning: the
+ mode list displayed may vary as the kernel version changes, because the
+ modes are listed in a "first detected -- first displayed" manner. It's
+ better to use absolute mode numbers instead.
+
+ 0x.... - Hexadecimal video mode ID (also displayed on the menu, see below
+ for exact meaning of the ID). Warning: LILO doesn't support
+ hexadecimal numbers -- you have to convert it to decimal manually.
+
+Menu
+~~~~
+
+The ASK_VGA mode causes the kernel to offer a video mode menu upon
+bootup. It displays a "Press <RETURN> to see video modes available, <SPACE>
+to continue or wait 30 secs" message. If you press <RETURN>, you enter the
+menu, if you press <SPACE> or wait 30 seconds, the kernel will boot up in
+the standard 80x25 mode.
+
+The menu looks like::
+
+ Video adapter: <name-of-detected-video-adapter>
+ Mode: COLSxROWS:
+ 0 0F00 80x25
+ 1 0F01 80x50
+ 2 0F02 80x43
+ 3 0F03 80x26
+ ....
+ Enter mode number or ``scan``: <flashing-cursor-here>
+
+<name-of-detected-video-adapter> tells what video adapter did Linux detect
+-- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA
+with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection
+of chipsets is turned off by default as it's inherently unreliable due to
+absolutely insane PC design.
+
+"0 0F00 80x25" means that the first menu item (the menu items are numbered
+from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the
+next section for a description of mode IDs).
+
+<flashing-cursor-here> encourages you to enter the item number or mode ID
+you wish to set and press <RETURN>. If the computer complains something about
+"Unknown mode ID", it is trying to tell you that it isn't possible to set such
+a mode. It's also possible to press only <RETURN> which leaves the current mode.
+
+The mode list usually contains a few basic modes and some VESA modes. In
+case your chipset has been detected, some chipset-specific modes are shown as
+well (some of these might be missing or unusable on your machine as different
+BIOSes are often shipped with the same card and the mode numbers depend purely
+on the VGA BIOS).
+
+The modes displayed on the menu are partially sorted: The list starts with
+the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and
+80x43), local modes (if the local modes feature is enabled), VESA modes and
+finally SVGA modes for the auto-detected adapter.
+
+If you are not happy with the mode list offered (e.g., if you think your card
+is able to do more), you can enter "scan" instead of item number / mode ID. The
+program will try to ask the BIOS for all possible video mode numbers and test
+what happens then. The screen will be probably flashing wildly for some time and
+strange noises will be heard from inside the monitor and so on and then, really
+all consistent video modes supported by your BIOS will appear (plus maybe some
+``ghost modes``). If you are afraid this could damage your monitor, don't use
+this function.
+
+After scanning, the mode ordering is a bit different: the auto-detected SVGA
+modes are not listed at all and the modes revealed by ``scan`` are shown before
+all VESA modes.
+
+Mode IDs
+~~~~~~~~
+
+Because of the complexity of all the video stuff, the video mode IDs
+used here are also a bit complex. A video mode ID is a 16-bit number usually
+expressed in a hexadecimal notation (starting with "0x"). You can set a mode
+by entering its mode directly if you know it even if it isn't shown on the menu.
+
+The ID numbers can be divided to those regions::
+
+ 0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use
+ outside the menu as this can change from boot to boot (especially if you
+ have used the ``scan`` feature).
+
+ 0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number
+ (as presented to INT 10, function 00) increased by 0x0100.
+
+ 0x0200 to 0x08ff - VESA BIOS modes. The ID is a VESA mode ID increased by
+ 0x0100. All VESA modes should be autodetected and shown on the menu.
+
+ 0x0900 to 0x09ff - Video7 special modes. Set by calling INT 0x10, AX=0x6f05.
+ (Usually 940=80x43, 941=132x25, 942=132x44, 943=80x60, 944=100x60,
+ 945=132x28 for the standard Video7 BIOS)
+
+ 0x0f00 to 0x0fff - special modes (they are set by various tricks -- usually
+ by modifying one of the standard modes). Currently available:
+ 0x0f00 standard 80x25, don't reset mode if already set (=FFFF)
+ 0x0f01 standard with 8-point font: 80x43 on EGA, 80x50 on VGA
+ 0x0f02 VGA 80x43 (VGA switched to 350 scanlines with a 8-point font)
+ 0x0f03 VGA 80x28 (standard VGA scans, but 14-point font)
+ 0x0f04 leave current video mode
+ 0x0f05 VGA 80x30 (480 scans, 16-point font)
+ 0x0f06 VGA 80x34 (480 scans, 14-point font)
+ 0x0f07 VGA 80x60 (480 scans, 8-point font)
+ 0x0f08 Graphics hack (see the VIDEO_GFX_HACK paragraph below)
+
+ 0x1000 to 0x7fff - modes specified by resolution. The code has a "0xRRCC"
+ form where RR is a number of rows and CC is a number of columns.
+ E.g., 0x1950 corresponds to a 80x25 mode, 0x2b84 to 132x43 etc.
+ This is the only fully portable way to refer to a non-standard mode,
+ but it relies on the mode being found and displayed on the menu
+ (remember that mode scanning is not done automatically).
+
+ 0xff00 to 0xffff - aliases for backward compatibility:
+ 0xffff equivalent to 0x0f00 (standard 80x25)
+ 0xfffe equivalent to 0x0f01 (EGA 80x43 or VGA 80x50)
+
+If you add 0x8000 to the mode ID, the program will try to recalculate
+vertical display timing according to mode parameters, which can be used to
+eliminate some annoying bugs of certain VGA BIOSes (usually those used for
+cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the
+end of the display.
+
+Options
+~~~~~~~
+
+Build options for arch/x86/boot/* are selected by the kernel kconfig
+utility and the kernel .config file.
+
+VIDEO_GFX_HACK - includes special hack for setting of graphics modes
+to be used later by special drivers.
+Allows to set _any_ BIOS mode including graphic ones and forcing specific
+text screen resolution instead of peeking it from BIOS variables. Don't use
+unless you think you know what you're doing. To activate this setup, use
+mode number 0x0f08 (see the Mode IDs section above).
+
+Still doesn't work?
+~~~~~~~~~~~~~~~~~~~
+
+When the mode detection doesn't work (e.g., the mode list is incorrect or
+the machine hangs instead of displaying the menu), try to switch off some of
+the configuration options listed under "Options". If it fails, you can still use
+your kernel with the video mode set directly via the kernel parameter.
+
+In either case, please send me a bug report containing what _exactly_
+happens and how do the configuration switches affect the behaviour of the bug.
+
+If you start Linux from M$-DOS, you might also use some DOS tools for
+video mode setting. In this case, you must specify the 0x0f04 mode ("leave
+current settings") to Linux, because if you don't and you use any non-standard
+mode, Linux will switch to 80x25 automatically.
+
+If you set some extended mode and there's one or more extra lines on the
+bottom of the display containing already scrolled-out text, your VGA BIOS
+contains the most common video BIOS bug called "incorrect vertical display
+end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately,
+this must be done manually -- no autodetection mechanisms are available.
+
+History
+~~~~~~~
+
+=============== ================================================================
+1.0 (??-Nov-95) First version supporting all adapters supported by the old
+ setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels
+ and then removed due to instability on some machines.
+2.0 (28-Jan-96) Rewritten from scratch. Cirrus Logic 64XX support added, almost
+ everything is configurable, the VESA support should be much more
+ stable, explicit mode numbering allowed, "scan" implemented etc.
+2.1 (30-Jan-96) VESA modes moved to 0x200-0x3ff. Mode selection by resolution
+ supported. Few bugs fixed. VESA modes are listed prior to
+ modes supplied by SVGA autodetection as they are more reliable.
+ CLGD autodetect works better. Doesn't depend on 80x25 being
+ active when started. Scanning fixed. 80x43 (any VGA) added.
+ Code cleaned up.
+2.2 (01-Feb-96) EGA 80x43 fixed. VESA extended to 0x200-0x4ff (non-standard 02XX
+ VESA modes work now). Display end bug workaround supported.
+ Special modes renumbered to allow adding of the "recalculate"
+ flag, 0xffff and 0xfffe became aliases instead of real IDs.
+ Screen contents retained during mode changes.
+2.3 (15-Mar-96) Changed to work with 1.3.74 kernel.
+2.4 (18-Mar-96) Added patches by Hans Lermen fixing a memory overwrite problem
+ with some boot loaders. Memory management rewritten to reflect
+ these changes. Unfortunately, screen contents retaining works
+ only with some loaders now.
+ Added a Tseng 132x60 mode.
+2.5 (19-Mar-96) Fixed a VESA mode scanning bug introduced in 2.4.
+2.6 (25-Mar-96) Some VESA BIOS errors not reported -- it fixes error reports on
+ several cards with broken VESA code (e.g., ATI VGA).
+2.7 (09-Apr-96) - Accepted all VESA modes in range 0x100 to 0x7ff, because some
+ cards use very strange mode numbers.
+ - Added Realtek VGA modes (thanks to Gonzalo Tornaria).
+ - Hardware testing order slightly changed, tests based on ROM
+ contents done as first.
+ - Added support for special Video7 mode switching functions
+ (thanks to Tom Vander Aa).
+ - Added 480-scanline modes (especially useful for notebooks,
+ original version written by hhanemaa@cs.ruu.nl, patched by
+ Jeff Chua, rewritten by me).
+ - Screen store/restore fixed.
+2.8 (14-Apr-96) - Previous release was not compilable without CONFIG_VIDEO_SVGA.
+ - Better recognition of text modes during mode scan.
+2.9 (12-May-96) - Ignored VESA modes 0x80 - 0xff (more VESA BIOS bugs!)
+2.10(11-Nov-96) - The whole thing made optional.
+ - Added the CONFIG_VIDEO_400_HACK switch.
+ - Added the CONFIG_VIDEO_GFX_HACK switch.
+ - Code cleanup.
+2.11(03-May-97) - Yet another cleanup, now including also the documentation.
+ - Direct testing of SVGA adapters turned off by default, ``scan``
+ offered explicitly on the prompt line.
+ - Removed the doc section describing adding of new probing
+ functions as I try to get rid of _all_ hardware probing here.
+2.12(25-May-98) Added support for VESA frame buffer graphics.
+2.13(14-May-99) Minor documentation fixes.
+=============== ================================================================
diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
new file mode 100644
index 000000000000..c1768d9e80fa
--- /dev/null
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Syscall User Dispatch
+=====================
+
+Background
+----------
+
+Compatibility layers like Wine need a way to efficiently emulate system
+calls of only a part of their process - the part that has the
+incompatible code - while being able to execute native syscalls without
+a high performance penalty on the native part of the process. Seccomp
+falls short on this task, since it has limited support to efficiently
+filter syscalls based on memory regions, and it doesn't support removing
+filters. Therefore a new mechanism is necessary.
+
+Syscall User Dispatch brings the filtering of the syscall dispatcher
+address back to userspace. The application is in control of a flip
+switch, indicating the current personality of the process. A
+multiple-personality application can then flip the switch without
+invoking the kernel, when crossing the compatibility layer API
+boundaries, to enable/disable the syscall redirection and execute
+syscalls directly (disabled) or send them to be emulated in userspace
+through a SIGSYS.
+
+The goal of this design is to provide very quick compatibility layer
+boundary crosses, which is achieved by not executing a syscall to change
+personality every time the compatibility layer executes. Instead, a
+userspace memory region exposed to the kernel indicates the current
+personality, and the application simply modifies that variable to
+configure the mechanism.
+
+There is a relatively high cost associated with handling signals on most
+architectures, like x86, but at least for Wine, syscalls issued by
+native Windows code are currently not known to be a performance problem,
+since they are quite rare, at least for modern gaming applications.
+
+Since this mechanism is designed to capture syscalls issued by
+non-native applications, it must function on syscalls whose invocation
+ABI is completely unexpected to Linux. Syscall User Dispatch, therefore
+doesn't rely on any of the syscall ABI to make the filtering. It uses
+only the syscall dispatcher address and the userspace key.
+
+As the ABI of these intercepted syscalls is unknown to Linux, these
+syscalls are not instrumentable via ptrace or the syscall tracepoints.
+
+Interface
+---------
+
+A thread can setup this mechanism on supported kernels by executing the
+following prctl:
+
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
+
+<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
+or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
+that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
+
+For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
+a memory region interval from which syscalls are always executed directly,
+regardless of the userspace selector. This provides a fast path for the
+C library, which includes the most common syscall dispatchers in the native
+code applications, and also provides a way for the signal handler to return
+without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
+interface should make sure that at least the signal trampoline code is
+included in this region. In addition, for syscalls that implement the
+trampoline code on the vDSO, that trampoline is never intercepted.
+
+For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
+a memory region interval from which syscalls are dispatched based on
+the userspace selector. Syscalls from outside of the range are always
+executed directly.
+
+[selector] is a pointer to a char-sized region in the process memory
+region, that provides a quick way to enable disable syscall redirection
+thread-wide, without the need to invoke the kernel directly. selector
+can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
+Any other value should terminate the program with a SIGSYS.
+
+Additionally, a tasks syscall user dispatch configuration can be peeked
+and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
+requests. This is useful for checkpoint/restart software.
+
+Security Notes
+--------------
+
+Syscall User Dispatch provides functionality for compatibility layers to
+quickly capture system calls issued by a non-native part of the
+application, while not impacting the Linux native regions of the
+process. It is not a mechanism for sandboxing system calls, and it
+should not be seen as a security mechanism, since it is trivial for a
+malicious application to subvert the mechanism by jumping to an allowed
+dispatcher region prior to executing the syscall, or to discover the
+address and modify the selector value. If the use case requires any
+kind of security sandboxing, Seccomp should be used instead.
+
+Any fork or exec of the existing process resets the mechanism to
+PR_SYS_DISPATCH_OFF.
diff --git a/Documentation/admin-guide/sysctl/abi.rst b/Documentation/admin-guide/sysctl/abi.rst
new file mode 100644
index 000000000000..4e6db0a2a4c0
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/abi.rst
@@ -0,0 +1,34 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+================================
+Documentation for /proc/sys/abi/
+================================
+
+.. See scripts/check-sysctl-docs to keep this up to date:
+.. scripts/check-sysctl-docs -vtable="abi" \
+.. Documentation/admin-guide/sysctl/abi.rst \
+.. $(git grep -l register_sysctl_)
+
+Copyright (c) 2020, Stephen Kitt
+
+For general info, see Documentation/admin-guide/sysctl/index.rst.
+
+------------------------------------------------------------------------------
+
+The files in ``/proc/sys/abi`` can be used to see and modify
+ABI-related settings.
+
+Currently, these files might (depending on your configuration)
+show up in ``/proc/sys/kernel``:
+
+.. contents:: :local:
+
+vsyscall32 (x86)
+================
+
+Determines whether the kernels maps a vDSO page into 32-bit processes;
+can be set to 1 to enable, or 0 to disable. Defaults to enabled if
+``CONFIG_COMPAT_VDSO`` is set, disabled otherwise.
+
+This controls the same setting as the ``vdso32`` kernel boot
+parameter.
diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst
new file mode 100644
index 000000000000..9b7f65c3efd8
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/fs.rst
@@ -0,0 +1,374 @@
+===============================
+Documentation for /proc/sys/fs/
+===============================
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in intro.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files and directories
+in ``/proc/sys/fs/``.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files *can* be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+1. /proc/sys/fs
+===============
+
+Currently, these files might (depending on your configuration)
+show up in ``/proc/sys/fs``:
+
+.. contents:: :local:
+
+
+aio-nr & aio-max-nr
+-------------------
+
+``aio-nr`` shows the current system-wide number of asynchronous io
+requests. ``aio-max-nr`` allows you to change the maximum value
+``aio-nr`` can grow to. If ``aio-nr`` reaches ``aio-nr-max`` then
+``io_setup`` will fail with ``EAGAIN``. Note that raising
+``aio-max-nr`` does not result in the
+pre-allocation or re-sizing of any kernel data structures.
+
+dentry-negative
+----------------------------
+
+Policy for negative dentries. Set to 1 to always delete the dentry when a
+file is removed, and 0 to disable it. By default, this behavior is disabled.
+
+dentry-state
+------------
+
+This file shows the values in ``struct dentry_stat_t``, as defined in
+``fs/dcache.c``::
+
+ struct dentry_stat_t dentry_stat {
+ long nr_dentry;
+ long nr_unused;
+ long age_limit; /* age in seconds */
+ long want_pages; /* pages requested by system */
+ long nr_negative; /* # of unused negative dentries */
+ long dummy; /* Reserved for future use */
+ };
+
+Dentries are dynamically allocated and deallocated.
+
+``nr_dentry`` shows the total number of dentries allocated (active
++ unused). ``nr_unused shows`` the number of dentries that are not
+actively used, but are saved in the LRU list for future reuse.
+
+``age_limit`` is the age in seconds after which dcache entries
+can be reclaimed when memory is short and ``want_pages`` is
+nonzero when ``shrink_dcache_pages()`` has been called and the
+dcache isn't pruned yet.
+
+``nr_negative`` shows the number of unused dentries that are also
+negative dentries which do not map to any files. Instead,
+they help speeding up rejection of non-existing files provided
+by the users.
+
+
+file-max & file-nr
+------------------
+
+The value in ``file-max`` denotes the maximum number of file-
+handles that the Linux kernel will allocate. When you get lots
+of error messages about running out of file handles, you might
+want to increase this limit.
+
+Historically,the kernel was able to allocate file handles
+dynamically, but not to free them again. The three values in
+``file-nr`` denote the number of allocated file handles, the number
+of allocated but unused file handles, and the maximum number of
+file handles. Linux 2.6 and later always reports 0 as the number of free
+file handles -- this is not an error, it just means that the
+number of allocated file handles exactly matches the number of
+used file handles.
+
+Attempts to allocate more file descriptors than ``file-max`` are
+reported with ``printk``, look for::
+
+ VFS: file-max limit <number> reached
+
+in the kernel logs.
+
+
+inode-nr & inode-state
+----------------------
+
+As with file handles, the kernel allocates the inode structures
+dynamically, but can't free them yet.
+
+The file ``inode-nr`` contains the first two items from
+``inode-state``, so we'll skip to that file...
+
+``inode-state`` contains three actual numbers and four dummies.
+The actual numbers are, in order of appearance, ``nr_inodes``,
+``nr_free_inodes`` and ``preshrink``.
+
+``nr_inodes`` stands for the number of inodes the system has
+allocated.
+
+``nr_free_inodes`` represents the number of free inodes (?) and
+preshrink is nonzero when the
+system needs to prune the inode list instead of allocating
+more.
+
+
+mount-max
+---------
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+
+nr_open
+-------
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on ``RLIMIT_NOFILE``
+resource limit.
+
+
+overflowgid & overflowuid
+-------------------------
+
+Some filesystems only support 16-bit UIDs and GIDs, although in Linux
+UIDs and GIDs are 32 bits. When one of these filesystems is mounted
+with writes enabled, any UID or GID that would exceed 65535 is translated
+to a fixed value before being written to disk.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+pipe-user-pages-hard
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+
+pipe-user-pages-soft
+--------------------
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to two pages. Once this limit is reached,
+new pipes will be limited to two pages in size for this user in order to
+limit total memory usage, and trying to increase them using ``fcntl()`` will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+
+protected_fifos
+---------------
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow ``O_CREAT`` open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+
+protected_hardlinks
+--------------------
+
+A long-standing class of security issues is the hardlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like ``/tmp``. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given hardlink (i.e. a
+root process follows a hardlink created by another user). Additionally,
+on systems without separated partitions, this stops unauthorized users
+from "pinning" vulnerable setuid/setgid files against being upgraded by
+the administrator, or linking to special files.
+
+When set to "0", hardlink creation behavior is unrestricted.
+
+When set to "1" hardlinks cannot be created by users if they do not
+already own the source file, or do not have read/write access to it.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+protected_regular
+-----------------
+
+This protection is similar to `protected_fifos`_, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow ``O_CREAT`` open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+
+protected_symlinks
+------------------
+
+A long-standing class of security issues is the symlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like ``/tmp``. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given symlink (i.e. a
+root process follows a symlink belonging to another user). For a likely
+incomplete list of hundreds of examples across the years, please see:
+https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
+
+When set to "0", symlink following behavior is unrestricted.
+
+When set to "1" symlinks are permitted to be followed only when outside
+a sticky world-writable directory, or when the uid of the symlink and
+follower match, or when the directory owner matches the symlink's owner.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+
+suid_dumpable
+-------------
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+= ========== ===============================================================
+0 (default) Traditional behaviour. Any process which has changed
+ privilege levels or is execute only will not be dumped.
+1 (debug) All processes dump core when possible. The core dump is
+ owned by the current user and no security is applied. This is
+ intended for system debugging situations only.
+ Ptrace is unchecked.
+ This is insecure as it allows regular users to examine the
+ memory contents of privileged processes.
+2 (suidsafe) Any binary which normally would not be dumped is dumped
+ anyway, but only if the ``core_pattern`` kernel sysctl (see
+ :ref:`Documentation/admin-guide/sysctl/kernel.rst <core_pattern>`)
+ is set to
+ either a pipe handler or a fully qualified path. (For more
+ details on this limitation, see CVE-2006-2451.) This mode is
+ appropriate when administrators are attempting to debug
+ problems in a normal environment, and either have a core dump
+ pipe handler that knows to treat privileged core dumps with
+ care, or specific directory defined for catching core dumps.
+ If a core dump happens without a pipe handler or fully
+ qualified path, a message will be emitted to syslog warning
+ about the lack of a correct setting.
+= ========== ===============================================================
+
+
+
+2. /proc/sys/fs/binfmt_misc
+===========================
+
+Documentation for the files in ``/proc/sys/fs/binfmt_misc`` is
+in Documentation/admin-guide/binfmt-misc.rst.
+
+
+3. /proc/sys/fs/mqueue - POSIX message queues filesystem
+========================================================
+
+
+The "mqueue" filesystem provides the necessary kernel features to enable the
+creation of a user space library that implements the POSIX message queues
+API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting the
+amount of resources used by the file system.
+
+``/proc/sys/fs/mqueue/queues_max`` is a read/write file for
+setting/getting the maximum number of message queues allowed on the
+system.
+
+``/proc/sys/fs/mqueue/msg_max`` is a read/write file for
+setting/getting the maximum number of messages in a queue value. In
+fact it is the limiting value for another (user) limit which is set in
+``mq_open`` invocation. This attribute of a queue must be less than
+or equal to ``msg_max``.
+
+``/proc/sys/fs/mqueue/msgsize_max`` is a read/write file for
+setting/getting the maximum message size value (it is an attribute of
+every message queue, set during its creation).
+
+``/proc/sys/fs/mqueue/msg_default`` is a read/write file for
+setting/getting the default number of messages in a queue value if the
+``attr`` parameter of ``mq_open(2)`` is ``NULL``. If it exceeds
+``msg_max``, the default value is initialized to ``msg_max``.
+
+``/proc/sys/fs/mqueue/msgsize_default`` is a read/write file for
+setting/getting the default message size value if the ``attr``
+parameter of ``mq_open(2)`` is ``NULL``. If it exceeds
+``msgsize_max``, the default value is initialized to ``msgsize_max``.
+
+4. /proc/sys/fs/epoll - Configuration options for the epoll interface
+=====================================================================
+
+This directory contains configuration options for the epoll(7) interface.
+
+max_user_watches
+----------------
+
+Every epoll file descriptor can store a number of files to be monitored
+for event readiness. Each one of these monitored files constitutes a "watch".
+This configuration option sets the maximum number of "watches" that are
+allowed for each user.
+Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
+on a 64-bit one.
+The current default value for ``max_user_watches`` is 4% of the
+available low memory, divided by the "watch" cost in bytes.
+
+5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems
+=====================================================================
+
+This directory contains the following configuration options for FUSE
+filesystems:
+
+``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for
+setting/getting the maximum number of pages that can be used for servicing
+requests in FUSE.
+
+``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for
+setting/getting the default timeout (in seconds) for a fuse server to
+reply to a kernel-issued request in the event where the server did not
+specify a timeout at mount. If the server set a timeout,
+then default_request_timeout will be ignored. The default
+"default_request_timeout" is set to 0. 0 indicates no default timeout.
+The maximum value that can be set is 65535.
+
+``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for
+setting/getting the maximum timeout (in seconds) for a fuse server to
+reply to a kernel-issued request. A value greater than 0 automatically opts
+the server into a timeout that will be set to at most "max_request_timeout",
+even if the server did not specify a timeout and default_request_timeout is
+set to 0. If max_request_timeout is greater than 0 and the server set a timeout
+greater than max_request_timeout or default_request_timeout is set to a value
+greater than max_request_timeout, the system will use max_request_timeout as the
+timeout. 0 indicates no max request timeout. The maximum value that can be set
+is 65535.
+
+For timeouts, if the server does not respond to the request by the time
+the set timeout elapses, then the connection to the fuse server will be aborted.
+Please note that the timeouts are not 100% precise (eg you may set 60 seconds but
+the timeout may kick in after 70 seconds). The upper margin of error for the
+timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds.
diff --git a/Documentation/admin-guide/sysctl/index.rst b/Documentation/admin-guide/sysctl/index.rst
new file mode 100644
index 000000000000..4dd2c9b5d752
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/index.rst
@@ -0,0 +1,104 @@
+===========================
+Documentation for /proc/sys
+===========================
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+------------------------------------------------------------------------------
+
+'Why', I hear you ask, 'would anyone even _want_ documentation
+for them sysctl files? If anybody really needs it, it's all in
+the source...'
+
+Well, this documentation is written because some people either
+don't know they need to tweak something, or because they don't
+have the time or knowledge to read the source code.
+
+Furthermore, the programmers who built sysctl have built it to
+be actually used, not just for the fun of programming it :-)
+
+------------------------------------------------------------------------------
+
+Legal blurb:
+
+As usual, there are two main things to consider:
+
+1. you get what you pay for
+2. it's free
+
+The consequences are that I won't guarantee the correctness of
+this document, and if you come to me complaining about how you
+screwed up your system because of wrong documentation, I won't
+feel sorry for you. I might even laugh at you...
+
+But of course, if you _do_ manage to screw up your system using
+only the sysctl options used in this file, I'd like to hear of
+it. Not only to have a great laugh, but also to make sure that
+you're the last RTFMing person to screw up.
+
+In short, e-mail your suggestions, corrections and / or horror
+stories to: <riel@nl.linux.org>
+
+Rik van Riel.
+
+--------------------------------------------------------------
+
+Introduction
+============
+
+Sysctl is a means of configuring certain aspects of the kernel
+at run-time, and the /proc/sys/ directory is there so that you
+don't even need special tools to do it!
+In fact, there are only four things needed to use these config
+facilities:
+
+- a running Linux system
+- root access
+- common sense (this is especially hard to come by these days)
+- knowledge of what all those values mean
+
+As a quick 'ls /proc/sys' will show, the directory consists of
+several (arch-dependent?) subdirs. Each subdir is mainly about
+one part of the kernel, so you can do configuration on a piece
+by piece basis, or just some 'thematic frobbing'.
+
+This documentation is about:
+
+=============== ===============================================================
+abi/ execution domains & personalities
+<$ARCH> tuning controls for various CPU architecture (e.g. csky, s390)
+crypto/ <undocumented>
+debug/ <undocumented>
+dev/ device specific information (e.g. dev/cdrom/info)
+fs/ specific filesystems
+ filehandle, inode, dentry and quota tuning
+ binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
+kernel/ global kernel info / tuning
+ miscellaneous stuff
+ some architecture-specific controls
+ security (LSM) stuff
+net/ networking stuff, for documentation look in:
+ <Documentation/networking/>
+proc/ <empty>
+sunrpc/ SUN Remote Procedure Call (NFS)
+user/ Per user namespace limits
+vm/ memory management tuning
+ buffer and cache management
+xen/ <undocumented>
+=============== ===============================================================
+
+These are the subdirs I have on my system or have been discovered by
+searching through the source code. There might be more or other subdirs
+in another setup. If you see another dir, I'd really like to hear about
+it :-)
+
+.. toctree::
+ :maxdepth: 1
+
+ abi
+ fs
+ kernel
+ net
+ sunrpc
+ user
+ vm
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
new file mode 100644
index 000000000000..f3ee807b5d8b
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -0,0 +1,1718 @@
+===================================
+Documentation for /proc/sys/kernel/
+===================================
+
+.. See scripts/check-sysctl-docs to keep this up to date
+
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in
+Documentation/admin-guide/sysctl/index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains documentation for the sysctl files in
+``/proc/sys/kernel/``.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files *can* be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+Currently, these files might (depending on your configuration)
+show up in ``/proc/sys/kernel``:
+
+.. contents:: :local:
+
+
+acct
+====
+
+::
+
+ highwater lowwater frequency
+
+If BSD-style process accounting is enabled these values control
+its behaviour. If free space on filesystem where the log lives
+goes below ``lowwater``\ % accounting suspends. If free space gets
+above ``highwater``\ % accounting resumes. ``frequency`` determines
+how often do we check the amount of free space (value is in
+seconds). Default:
+
+::
+
+ 4 2 30
+
+That is, suspend accounting if free space drops below 2%; resume it
+if it increases to at least 4%; consider information about amount of
+free space valid for 30 seconds.
+
+
+acpi_video_flags
+================
+
+See Documentation/power/video.rst. This allows the video resume mode to be set,
+in a similar fashion to the ``acpi_sleep`` kernel parameter, by
+combining the following values:
+
+= =======
+1 s3_bios
+2 s3_mode
+4 s3_beep
+= =======
+
+arch
+====
+
+The machine hardware name, the same output as ``uname -m``
+(e.g. ``x86_64`` or ``aarch64``).
+
+auto_msgmni
+===========
+
+This variable has no effect and may be removed in future kernel
+releases. Reading it always returns 0.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of
+`msgmni`_
+upon memory add/remove or upon IPC namespace creation/removal.
+Echoing "1" into this file enabled msgmni automatic recomputing.
+Echoing "0" turned it off. The default value was 1.
+
+
+bootloader_type (x86 only)
+==========================
+
+This gives the bootloader type number as indicated by the bootloader,
+shifted left by 4, and OR'd with the low four bits of the bootloader
+version. The reason for this encoding is that this used to match the
+``type_of_loader`` field in the kernel header; the encoding is kept for
+backwards compatibility. That is, if the full bootloader type number
+is 0x15 and the full version number is 0x234, this file will contain
+the value 340 = 0x154.
+
+See the ``type_of_loader`` and ``ext_loader_type`` fields in
+Documentation/arch/x86/boot.rst for additional information.
+
+
+bootloader_version (x86 only)
+=============================
+
+The complete bootloader version number. In the example above, this
+file will contain the value 564 = 0x234.
+
+See the ``type_of_loader`` and ``ext_loader_ver`` fields in
+Documentation/arch/x86/boot.rst for additional information.
+
+
+bpf_stats_enabled
+=================
+
+Controls whether the kernel should collect statistics on BPF programs
+(total time spent running, number of times run...). Enabling
+statistics causes a slight reduction in performance on each program
+run. The statistics can be seen using ``bpftool``.
+
+= ===================================
+0 Don't collect statistics (default).
+1 Collect statistics.
+= ===================================
+
+
+cad_pid
+=======
+
+This is the pid which will be signalled on reboot (notably, by
+Ctrl-Alt-Delete). Writing a value to this file which doesn't
+correspond to a running process will result in ``-ESRCH``.
+
+See also `ctrl-alt-del`_.
+
+
+cap_last_cap
+============
+
+Highest valid capability of the running kernel. Exports
+``CAP_LAST_CAP`` from the kernel.
+
+
+.. _core_pattern:
+
+core_pattern
+============
+
+``core_pattern`` is used to specify a core dumpfile pattern name.
+
+* max length 127 characters; default value is "core"
+* ``core_pattern`` is used as a pattern template for the output
+ filename; certain string patterns (beginning with '%') are
+ substituted with their actual values.
+* backward compatibility with ``core_uses_pid``:
+
+ If ``core_pattern`` does not include "%p" (default does not)
+ and ``core_uses_pid`` is set, then .PID will be appended to
+ the filename.
+
+* corename format specifiers
+
+ ======== ==========================================
+ %<NUL> '%' is dropped
+ %% output one '%'
+ %p pid
+ %P global pid (init PID namespace)
+ %i tid
+ %I global tid (init PID namespace)
+ %u uid (in initial user namespace)
+ %g gid (in initial user namespace)
+ %d dump mode, matches ``PR_SET_DUMPABLE`` and
+ ``/proc/sys/fs/suid_dumpable``
+ %s signal number
+ %t UNIX time of dump
+ %h hostname
+ %e executable filename (may be shortened, could be changed by prctl etc)
+ %f executable filename
+ %E executable path
+ %c maximum size of core file by resource limit RLIMIT_CORE
+ %C CPU the task ran on
+ %F pidfd number
+ %<OTHER> both are dropped
+ ======== ==========================================
+
+* If the first character of the pattern is a '|', the kernel will treat
+ the rest of the pattern as a command to run. The core dump will be
+ written to the standard input of that program instead of to a file.
+
+
+core_pipe_limit
+===============
+
+This sysctl is only applicable when `core_pattern`_ is configured to
+pipe core files to a user space helper (when the first character of
+``core_pattern`` is a '|', see above).
+When collecting cores via a pipe to an application, it is occasionally
+useful for the collecting application to gather data about the
+crashing process from its ``/proc/pid`` directory.
+In order to do this safely, the kernel must wait for the collecting
+process to exit, so as not to remove the crashing processes proc files
+prematurely.
+This in turn creates the possibility that a misbehaving userspace
+collecting process can block the reaping of a crashed process simply
+by never exiting.
+This sysctl defends against that.
+It defines how many concurrent crashing processes may be piped to user
+space applications in parallel.
+If this value is exceeded, then those crashing processes above that
+value are noted via the kernel log and their cores are skipped.
+0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the
+collecting process is not guaranteed access to ``/proc/<crashing
+pid>/``).
+This value defaults to 0.
+
+
+core_sort_vma
+=============
+
+The default coredump writes VMAs in address order. By setting
+``core_sort_vma`` to 1, VMAs will be written from smallest size
+to largest size. This is known to break at least elfutils, but
+can be handy when dealing with very large (and truncated)
+coredumps where the more useful debugging details are included
+in the smaller VMAs.
+
+
+core_uses_pid
+=============
+
+The default coredump filename is "core". By setting
+``core_uses_pid`` to 1, the coredump filename becomes core.PID.
+If `core_pattern`_ does not include "%p" (default does not)
+and ``core_uses_pid`` is set, then .PID will be appended to
+the filename.
+
+
+ctrl-alt-del
+============
+
+When the value in this file is 0, ctrl-alt-del is trapped and
+sent to the ``init(1)`` program to handle a graceful restart.
+When, however, the value is > 0, Linux's reaction to a Vulcan
+Nerve Pinch (tm) will be an immediate reboot, without even
+syncing its dirty buffers.
+
+Note:
+ when a program (like dosemu) has the keyboard in 'raw'
+ mode, the ctrl-alt-del is intercepted by the program before it
+ ever reaches the kernel tty layer, and it's up to the program
+ to decide what to do with it.
+
+
+dmesg_restrict
+==============
+
+This toggle indicates whether unprivileged users are prevented
+from using ``dmesg(8)`` to view messages from the kernel's log
+buffer.
+When ``dmesg_restrict`` is set to 0 there are no restrictions.
+When ``dmesg_restrict`` is set to 1, users must have
+``CAP_SYSLOG`` to use ``dmesg(8)``.
+
+The kernel config option ``CONFIG_SECURITY_DMESG_RESTRICT`` sets the
+default value of ``dmesg_restrict``.
+
+
+domainname & hostname
+=====================
+
+These files can be used to set the NIS/YP domainname and the
+hostname of your box in exactly the same way as the commands
+domainname and hostname, i.e.::
+
+ # echo "darkstar" > /proc/sys/kernel/hostname
+ # echo "mydomain" > /proc/sys/kernel/domainname
+
+has the same effect as::
+
+ # hostname "darkstar"
+ # domainname "mydomain"
+
+Note, however, that the classic darkstar.frop.org has the
+hostname "darkstar" and DNS (Internet Domain Name Server)
+domainname "frop.org", not to be confused with the NIS (Network
+Information Service) or YP (Yellow Pages) domainname. These two
+domain names are in general different. For a detailed discussion
+see the ``hostname(1)`` man page.
+
+
+firmware_config
+===============
+
+See Documentation/driver-api/firmware/fallback-mechanisms.rst.
+
+The entries in this directory allow the firmware loader helper
+fallback to be controlled:
+
+* ``force_sysfs_fallback``, when set to 1, forces the use of the
+ fallback;
+* ``ignore_sysfs_fallback``, when set to 1, ignores any fallback.
+
+
+ftrace_dump_on_oops
+===================
+
+Determines whether ``ftrace_dump()`` should be called on an oops (or
+kernel panic). This will output the contents of the ftrace buffers to
+the console. This is very useful for capturing traces that lead to
+crashes and outputting them to a serial console.
+
+======================= ===========================================
+0 Disabled (default).
+1 Dump buffers of all CPUs.
+2(orig_cpu) Dump the buffer of the CPU that triggered the
+ oops.
+<instance> Dump the specific instance buffer on all CPUs.
+<instance>=2(orig_cpu) Dump the specific instance buffer on the CPU
+ that triggered the oops.
+======================= ===========================================
+
+Multiple instance dump is also supported, and instances are separated
+by commas. If global buffer also needs to be dumped, please specify
+the dump mode (1/2/orig_cpu) first for global buffer.
+
+So for example to dump "foo" and "bar" instance buffer on all CPUs,
+user can::
+
+ echo "foo,bar" > /proc/sys/kernel/ftrace_dump_on_oops
+
+To dump global buffer and "foo" instance buffer on all
+CPUs along with the "bar" instance buffer on CPU that triggered the
+oops, user can::
+
+ echo "1,foo,bar=2" > /proc/sys/kernel/ftrace_dump_on_oops
+
+ftrace_enabled, stack_tracer_enabled
+====================================
+
+See Documentation/trace/ftrace.rst.
+
+
+hardlockup_all_cpu_backtrace
+============================
+
+This value controls the hard lockup detector behavior when a hard
+lockup condition is detected as to whether or not to gather further
+debug information. If enabled, arch-specific all-CPU stack dumping
+will be initiated.
+
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
+
+
+hardlockup_panic
+================
+
+This parameter can be used to control whether the kernel panics
+when a hard lockup is detected.
+
+= ===========================
+0 Don't panic on hard lockup.
+1 Panic on hard lockup.
+= ===========================
+
+See Documentation/admin-guide/lockup-watchdogs.rst for more information.
+This can also be set using the nmi_watchdog kernel parameter.
+
+
+hotplug
+=======
+
+Path for the hotplug policy agent.
+Default value is ``CONFIG_UEVENT_HELPER_PATH``, which in turn defaults
+to the empty string.
+
+This file only exists when ``CONFIG_UEVENT_HELPER`` is enabled. Most
+modern systems rely exclusively on the netlink-based uevent source and
+don't need this.
+
+
+hung_task_all_cpu_backtrace
+===========================
+
+If this option is set, the kernel will send an NMI to all CPUs to dump
+their backtraces when a hung task is detected. This file shows up if
+CONFIG_DETECT_HUNG_TASK and CONFIG_SMP are enabled.
+
+0: Won't show all CPUs backtraces when a hung task is detected.
+This is the default behavior.
+
+1: Will non-maskably interrupt all CPUs and dump their backtraces when
+a hung task is detected.
+
+
+hung_task_panic
+===============
+
+Controls the kernel's behavior when a hung task is detected.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+= =================================================
+0 Continue operation. This is the default behavior.
+1 Panic immediately.
+= =================================================
+
+
+hung_task_check_count
+=====================
+
+The upper bound on the number of tasks that are checked.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+
+hung_task_detect_count
+======================
+
+Indicates the total number of tasks that have been detected as hung since
+the system boot.
+
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+
+hung_task_timeout_secs
+======================
+
+When a task in D state did not get scheduled
+for more than this value report a warning.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+0 means infinite timeout, no checking is done.
+
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
+
+
+hung_task_check_interval_secs
+=============================
+
+Hung task check interval. If hung task checking is enabled
+(see `hung_task_timeout_secs`_), the check is done every
+``hung_task_check_interval_secs`` seconds.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+0 (default) means use ``hung_task_timeout_secs`` as checking
+interval.
+
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
+
+
+hung_task_warnings
+==================
+
+The maximum number of warnings to report. During a check interval
+if a hung task is detected, this value is decreased by 1.
+When this value reaches 0, no more warnings will be reported.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+-1: report an infinite number of warnings.
+
+
+hyperv_record_panic_msg
+=======================
+
+Controls whether the panic kmsg data should be reported to Hyper-V.
+
+= =========================================================
+0 Do not report panic kmsg data.
+1 Report the panic kmsg data. This is the default behavior.
+= =========================================================
+
+
+ignore-unaligned-usertrap
+=========================
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``;
+currently, ``arc``, ``parisc`` and ``loongarch``), controls whether all
+unaligned traps are logged.
+
+= =============================================================
+0 Log all unaligned accesses.
+1 Only warn the first time a process traps. This is the default
+ setting.
+= =============================================================
+
+See also `unaligned-trap`_.
+
+io_uring_disabled
+=================
+
+Prevents all processes from creating new io_uring instances. Enabling this
+shrinks the kernel's attack surface.
+
+= ======================================================================
+0 All processes can create io_uring instances as normal. This is the
+ default setting.
+1 io_uring creation is disabled (io_uring_setup() will fail with
+ -EPERM) for unprivileged processes not in the io_uring_group group.
+ Existing io_uring instances can still be used. See the
+ documentation for io_uring_group for more information.
+2 io_uring creation is disabled for all processes. io_uring_setup()
+ always fails with -EPERM. Existing io_uring instances can still be
+ used.
+= ======================================================================
+
+
+io_uring_group
+==============
+
+When io_uring_disabled is set to 1, a process must either be
+privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
+to create an io_uring instance. If io_uring_group is set to -1 (the
+default), only processes with the CAP_SYS_ADMIN capability may create
+io_uring instances.
+
+
+kexec_load_disabled
+===================
+
+A toggle indicating if the syscalls ``kexec_load`` and
+``kexec_file_load`` have been disabled.
+This value defaults to 0 (false: ``kexec_*load`` enabled), but can be
+set to 1 (true: ``kexec_*load`` disabled).
+Once true, kexec can no longer be used, and the toggle cannot be set
+back to false.
+This allows a kexec image to be loaded before disabling the syscall,
+allowing a system to set up (and later use) an image without it being
+altered.
+Generally used together with the `modules_disabled`_ sysctl.
+
+kexec_load_limit_panic
+======================
+
+This parameter specifies a limit to the number of times the syscalls
+``kexec_load`` and ``kexec_file_load`` can be called with a crash
+image. It can only be set with a more restrictive value than the
+current one.
+
+== ======================================================
+-1 Unlimited calls to kexec. This is the default setting.
+N Number of calls left.
+== ======================================================
+
+kexec_load_limit_reboot
+=======================
+
+Similar functionality as ``kexec_load_limit_panic``, but for a normal
+image.
+
+kptr_restrict
+=============
+
+This toggle indicates whether restrictions are placed on
+exposing kernel addresses via ``/proc`` and other interfaces.
+
+When ``kptr_restrict`` is set to 0 (the default) the address is hashed
+before printing.
+(This is the equivalent to %p.)
+
+When ``kptr_restrict`` is set to 1, kernel pointers printed using the
+%pK format specifier will be replaced with 0s unless the user has
+``CAP_SYSLOG`` and effective user and group ids are equal to the real
+ids.
+This is because %pK checks are done at read() time rather than open()
+time, so if permissions are elevated between the open() and the read()
+(e.g via a setuid binary) then %pK will not leak kernel pointers to
+unprivileged users.
+Note, this is a temporary solution only.
+The correct long-term solution is to do the permission checks at
+open() time.
+Consider removing world read permissions from files that use %pK, and
+using `dmesg_restrict`_ to protect against uses of %pK in ``dmesg(8)``
+if leaking kernel pointer values to unprivileged users is a concern.
+
+When ``kptr_restrict`` is set to 2, kernel pointers printed using
+%pK will be replaced with 0s regardless of privileges.
+
+
+modprobe
+========
+
+The full path to the usermode helper for autoloading kernel modules,
+by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to
+"/sbin/modprobe". This binary is executed when the kernel requests a
+module. For example, if userspace passes an unknown filesystem type
+to mount(), then the kernel will automatically request the
+corresponding filesystem module by executing this usermode helper.
+This usermode helper should insert the needed module into the kernel.
+
+This sysctl only affects module autoloading. It has no effect on the
+ability to explicitly insert modules.
+
+This sysctl can be used to debug module loading requests::
+
+ echo '#! /bin/sh' > /tmp/modprobe
+ echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
+ echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
+ chmod a+x /tmp/modprobe
+ echo /tmp/modprobe > /proc/sys/kernel/modprobe
+
+Alternatively, if this sysctl is set to the empty string, then module
+autoloading is completely disabled. The kernel will not try to
+execute a usermode helper at all, nor will it call the
+kernel_module_request LSM hook.
+
+If CONFIG_STATIC_USERMODEHELPER=y is set in the kernel configuration,
+then the configured static usermode helper overrides this sysctl,
+except that the empty string is still accepted to completely disable
+module autoloading as described above.
+
+modules_disabled
+================
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel. This toggle defaults to off
+(0), but can be set true (1). Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false. Generally used with the `kexec_load_disabled`_ toggle.
+
+
+.. _msgmni:
+
+msgmax, msgmnb, and msgmni
+==========================
+
+``msgmax`` is the maximum size of an IPC message, in bytes. 8192 by
+default (``MSGMAX``).
+
+``msgmnb`` is the maximum size of an IPC queue, in bytes. 16384 by
+default (``MSGMNB``).
+
+``msgmni`` is the maximum number of IPC queues. 32000 by default
+(``MSGMNI``).
+
+All of these parameters are set per ipc namespace. The maximum number of bytes
+in POSIX message queues is limited by ``RLIMIT_MSGQUEUE``. This limit is
+respected hierarchically in the each user namespace.
+
+msg_next_id, sem_next_id, and shm_next_id (System V IPC)
+========================================================
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0:``INT_MAX``}.
+
+Notes:
+ 1) kernel doesn't guarantee, that new object will have desired id. So,
+ it's up to userspace, how to handle an object with "wrong" id.
+ 2) Toggle with non-default value will be set back to -1 by kernel after
+ successful IPC object allocation. If an IPC object allocation syscall
+ fails, it is undefined if the value remains unmodified or is reset to -1.
+
+
+ngroups_max
+===========
+
+Maximum number of supplementary groups, _i.e._ the maximum size which
+``setgroups`` will accept. Exports ``NGROUPS_MAX`` from the kernel.
+
+
+
+nmi_watchdog
+============
+
+This parameter can be used to control the NMI watchdog
+(i.e. the hard lockup detector) on x86 systems.
+
+= =================================
+0 Disable the hard lockup detector.
+1 Enable the hard lockup detector.
+= =================================
+
+The hard lockup detector monitors each CPU for its ability to respond to
+timer interrupts. The mechanism utilizes CPU performance counter registers
+that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
+while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
+
+The NMI watchdog is disabled by default if the kernel is running as a guest
+in a KVM virtual machine. This default can be overridden by adding::
+
+ nmi_watchdog=1
+
+to the guest kernel command line (see
+Documentation/admin-guide/kernel-parameters.rst).
+
+
+nmi_wd_lpm_factor (PPC only)
+============================
+
+Factor to apply to the NMI watchdog timeout (only when ``nmi_watchdog`` is
+set to 1). This factor represents the percentage added to
+``watchdog_thresh`` when calculating the NMI watchdog timeout during an
+LPM. The soft lockup timeout is not impacted.
+
+A value of 0 means no change. The default value is 200 meaning the NMI
+watchdog is set to 30s (based on ``watchdog_thresh`` equal to 10).
+
+
+numa_balancing
+==============
+
+Enables/disables and configures automatic page fault based NUMA memory
+balancing. Memory is moved automatically to nodes that access it often.
+The value to set can be the result of ORing the following:
+
+= =================================
+0 NUMA_BALANCING_DISABLED
+1 NUMA_BALANCING_NORMAL
+2 NUMA_BALANCING_MEMORY_TIERING
+= =================================
+
+Or NUMA_BALANCING_NORMAL to optimize page placement among different
+NUMA nodes to reduce remote accessing. On NUMA machines, there is a
+performance penalty if remote memory is accessed by a CPU. When this
+feature is enabled the kernel samples what task thread is accessing
+memory by periodically unmapping pages and later trapping a page
+fault. At the time of the page fault, it is determined if the data
+being accessed should be migrated to a local memory node.
+
+The unmapping of pages and trapping faults incur additional overhead that
+ideally is offset by improved memory locality but there is no universal
+guarantee. If the target workload is already bound to NUMA nodes then this
+feature should be disabled.
+
+Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
+different types of memory (represented as different NUMA nodes) to
+place the hot pages in the fast memory. This is implemented based on
+unmapping and page fault too.
+
+numa_balancing_promote_rate_limit_MBps
+======================================
+
+Too high promotion/demotion throughput between different memory types
+may hurt application latency. This can be used to rate limit the
+promotion throughput. The per-node max promotion throughput in MB/s
+will be limited to be no more than the set value.
+
+A rule of thumb is to set this to less than 1/10 of the PMEM node
+write bandwidth.
+
+oops_all_cpu_backtrace
+======================
+
+If this option is set, the kernel will send an NMI to all CPUs to dump
+their backtraces when an oops event occurs. It should be used as a last
+resort in case a panic cannot be triggered (to protect VMs running, for
+example) or kdump can't be collected. This file shows up if CONFIG_SMP
+is enabled.
+
+0: Won't show all CPUs backtraces when an oops is detected.
+This is the default behavior.
+
+1: Will non-maskably interrupt all CPUs and dump their backtraces when
+an oops event is detected.
+
+
+oops_limit
+==========
+
+Number of kernel oopses after which the kernel should panic when
+``panic_on_oops`` is not set. Setting this to 0 disables checking
+the count. Setting this to 1 has the same effect as setting
+``panic_on_oops=1``. The default value is 10000.
+
+
+osrelease, ostype & version
+===========================
+
+::
+
+ # cat osrelease
+ 2.1.88
+ # cat ostype
+ Linux
+ # cat version
+ #5 Wed Feb 25 21:49:24 MET 1998
+
+The files ``osrelease`` and ``ostype`` should be clear enough.
+``version``
+needs a little more clarification however. The '#5' means that
+this is the fifth kernel built from this source base and the
+date behind it indicates the time the kernel was built.
+The only way to tune these values is to rebuild the kernel :-)
+
+
+overflowgid & overflowuid
+=========================
+
+if your architecture did not always support 32-bit UIDs (i.e. arm,
+i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
+applications that use the old 16-bit UID/GID system calls, if the
+actual UID or GID would exceed 65535.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+
+panic
+=====
+
+The value in this file determines the behaviour of the kernel on a
+panic:
+
+* if zero, the kernel will loop forever;
+* if negative, the kernel will reboot immediately;
+* if positive, the kernel will reboot after the corresponding number
+ of seconds.
+
+When you use the software watchdog, the recommended setting is 60.
+
+
+panic_on_io_nmi
+===============
+
+Controls the kernel's behavior when a CPU receives an NMI caused by
+an IO error.
+
+= ==================================================================
+0 Try to continue operation (default).
+1 Panic immediately. The IO error triggered an NMI. This indicates a
+ serious system condition which could result in IO data corruption.
+ Rather than continuing, panicking might be a better choice. Some
+ servers issue this sort of NMI when the dump button is pushed,
+ and you can use this option to take a crash dump.
+= ==================================================================
+
+
+panic_on_oops
+=============
+
+Controls the kernel's behaviour when an oops or BUG is encountered.
+
+= ===================================================================
+0 Try to continue operation.
+1 Panic immediately. If the `panic` sysctl is also non-zero then the
+ machine will be rebooted.
+= ===================================================================
+
+
+panic_on_stackoverflow
+======================
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if ``CONFIG_DEBUG_STACKOVERFLOW`` is enabled.
+
+= ==========================
+0 Try to continue operation.
+1 Panic immediately.
+= ==========================
+
+
+panic_on_unrecovered_nmi
+========================
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMIs for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+
+panic_on_warn
+=============
+
+Calls panic() in the WARN() path when set to 1. This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+= ================================================
+0 Only WARN(), default behaviour.
+1 Call panic() after printing out WARN() location.
+= ================================================
+
+
+panic_print
+===========
+
+Bitmask for printing system info when panic happens. User can chose
+combination of the following bits:
+
+===== ============================================
+bit 0 print all tasks info
+bit 1 print system memory info
+bit 2 print timer info
+bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
+bit 4 print ftrace buffer
+bit 5 replay all kernel messages on consoles at the end of panic
+bit 6 print all CPUs backtrace (if available in the arch)
+bit 7 print only tasks in uninterruptible (blocked) state
+===== ============================================
+
+So for example to print tasks and memory info on panic, user can::
+
+ echo 3 > /proc/sys/kernel/panic_print
+
+
+panic_sys_info
+==============
+
+A comma separated list of extra information to be dumped on panic,
+for example, "tasks,mem,timers,...". It is a human readable alternative
+to 'panic_print'. Possible values are:
+
+============= ===================================================
+tasks print all tasks info
+mem print system memory info
+timer print timers info
+lock print locks info if CONFIG_LOCKDEP is on
+ftrace print ftrace buffer
+all_bt print all CPUs backtrace (if available in the arch)
+blocked_tasks print only tasks in uninterruptible (blocked) state
+============= ===================================================
+
+
+panic_on_rcu_stall
+==================
+
+When set to 1, calls panic() after RCU stall detection messages. This
+is useful to define the root cause of RCU stalls using a vmcore.
+
+= ============================================================
+0 Do not panic() when RCU stall takes place, default behavior.
+1 panic() after printing RCU stall messages.
+= ============================================================
+
+max_rcu_stall_to_panic
+======================
+
+When ``panic_on_rcu_stall`` is set to 1, this value determines the
+number of times that RCU can stall before panic() is called.
+
+When ``panic_on_rcu_stall`` is set to 0, this value is has no effect.
+
+perf_cpu_time_max_percent
+=========================
+
+Hints to the kernel how much CPU time it should be allowed to
+use to handle perf sampling events. If the perf subsystem
+is informed that its samples are exceeding this limit, it
+will drop its sampling frequency to attempt to reduce its CPU
+usage.
+
+Some perf sampling happens in NMIs. If these samples
+unexpectedly take too long to execute, the NMIs can become
+stacked up next to each other so much that nothing else is
+allowed to execute.
+
+===== ========================================================
+0 Disable the mechanism. Do not monitor or correct perf's
+ sampling rate no matter how CPU time it takes.
+
+1-100 Attempt to throttle perf's sample rate to this
+ percentage of CPU. Note: the kernel calculates an
+ "expected" length of each sample event. 100 here means
+ 100% of that expected length. Even if this is set to
+ 100, you may still see sample throttling if this
+ length is exceeded. Set to 0 if you truly do not care
+ how much CPU is consumed.
+===== ========================================================
+
+
+perf_event_paranoid
+===================
+
+Controls use of the performance events system by unprivileged
+users (without CAP_PERFMON). The default value is 2.
+
+For backward compatibility reasons access to system performance
+monitoring and observability remains open for CAP_SYS_ADMIN
+privileged processes but CAP_SYS_ADMIN usage for secure system
+performance monitoring and observability operations is discouraged
+with respect to CAP_PERFMON use cases.
+
+=== ==================================================================
+ -1 Allow use of (almost) all events by all users.
+
+ Ignore mlock limit after perf_event_mlock_kb without
+ ``CAP_IPC_LOCK``.
+
+>=0 Disallow ftrace function tracepoint by users without
+ ``CAP_PERFMON``.
+
+ Disallow raw tracepoint access by users without ``CAP_PERFMON``.
+
+>=1 Disallow CPU event access by users without ``CAP_PERFMON``.
+
+>=2 Disallow kernel profiling by users without ``CAP_PERFMON``.
+=== ==================================================================
+
+
+perf_event_max_stack
+====================
+
+Controls maximum number of stack frames to copy for (``attr.sample_type &
+PERF_SAMPLE_CALLCHAIN``) configured events, for instance, when using
+'``perf record -g``' or '``perf trace --call-graph fp``'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return ``-EBUSY``.
+
+The default value is 127.
+
+
+perf_event_mlock_kb
+===================
+
+Control size of per-cpu ring buffer not counted against mlock limit.
+
+The default value is 512 + 1 page
+
+
+perf_event_max_contexts_per_stack
+=================================
+
+Controls maximum number of stack frame context entries for
+(``attr.sample_type & PERF_SAMPLE_CALLCHAIN``) configured events, for
+instance, when using '``perf record -g``' or '``perf trace --call-graph fp``'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return ``-EBUSY``.
+
+The default value is 8.
+
+
+perf_user_access (arm64 and riscv only)
+=======================================
+
+Controls user space access for reading perf event counters.
+
+* for arm64
+ The default value is 0 (access disabled).
+
+ When set to 1, user space can read performance monitor counter registers
+ directly.
+
+ See Documentation/arch/arm64/perf.rst for more information.
+
+* for riscv
+ When set to 0, user space access is disabled.
+
+ The default value is 1, user space can read performance monitor counter
+ registers through perf, any direct access without perf intervention will trigger
+ an illegal instruction.
+
+ When set to 2, which enables legacy mode (user space has direct access to cycle
+ and insret CSRs only). Note that this legacy value is deprecated and will be
+ removed once all user space applications are fixed.
+
+ Note that the time CSR is always directly accessible to all modes.
+
+pid_max
+=======
+
+PID allocation wrap value. When the kernel's next PID value
+reaches this value, it wraps back to a minimum PID value.
+PIDs of value ``pid_max`` or larger are not allocated.
+
+
+ns_last_pid
+===========
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+
+powersave-nap (PPC only)
+========================
+
+If set, Linux-PPC will use the 'nap' mode of powersaving,
+otherwise the 'doze' mode will be used.
+
+
+==============================================================
+
+printk
+======
+
+The four values in printk denote: ``console_loglevel``,
+``default_message_loglevel``, ``minimum_console_loglevel`` and
+``default_console_loglevel`` respectively.
+
+These values influence printk() behavior when printing or
+logging error messages. See '``man 2 syslog``' for more info on
+the different loglevels.
+
+======================== =====================================
+console_loglevel messages with a higher priority than
+ this will be printed to the console
+default_message_loglevel messages without an explicit priority
+ will be printed with this priority
+minimum_console_loglevel minimum (highest) value to which
+ console_loglevel can be set
+default_console_loglevel default value for console_loglevel
+======================== =====================================
+
+
+printk_delay
+============
+
+Delay each printk message in ``printk_delay`` milliseconds
+
+Value from 0 - 10000 is allowed.
+
+
+printk_ratelimit
+================
+
+Some warning messages are rate limited. ``printk_ratelimit`` specifies
+the minimum length of time between these messages (in seconds).
+The default value is 5 seconds.
+
+A value of 0 will disable rate limiting.
+
+
+printk_ratelimit_burst
+======================
+
+While long term we enforce one message per `printk_ratelimit`_
+seconds, we do allow a burst of messages to pass through.
+``printk_ratelimit_burst`` specifies the number of messages we can
+send before ratelimiting kicks in. After `printk_ratelimit`_ seconds
+have elapsed, another burst of messages may be sent.
+
+The default value is 10 messages.
+
+
+printk_devkmsg
+==============
+
+Control the logging to ``/dev/kmsg`` from userspace:
+
+========= =============================================
+ratelimit default, ratelimited
+on unlimited logging to /dev/kmsg from userspace
+off logging to /dev/kmsg disabled
+========= =============================================
+
+The kernel command line parameter ``printk.devkmsg=`` overrides this and is
+a one-time setting until next reboot: once set, it cannot be changed by
+this sysctl interface anymore.
+
+==============================================================
+
+
+pty
+===
+
+See Documentation/filesystems/devpts.rst.
+
+
+random
+======
+
+This is a directory, with the following entries:
+
+* ``boot_id``: a UUID generated the first time this is retrieved, and
+ unvarying after that;
+
+* ``uuid``: a UUID generated every time this is retrieved (this can
+ thus be used to generate UUIDs at will);
+
+* ``entropy_avail``: the pool's entropy count, in bits;
+
+* ``poolsize``: the entropy pool size, in bits;
+
+* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
+ number of seconds between urandom pool reseeding). This file is
+ writable for compatibility purposes, but writing to it has no effect
+ on any RNG behavior;
+
+* ``write_wakeup_threshold``: when the entropy count drops below this
+ (as a number of bits), processes waiting to write to ``/dev/random``
+ are woken up. This file is writable for compatibility purposes, but
+ writing to it has no effect on any RNG behavior.
+
+
+randomize_va_space
+==================
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+== ===========================================================================
+0 Turn the process address space randomization off. This is the
+ default for architectures that do not support this feature anyways,
+ and kernels that are booted with the "norandmaps" parameter.
+
+1 Make the addresses of mmap base, stack and VDSO page randomized.
+ This, among other things, implies that shared libraries will be
+ loaded to random addresses. Also for PIE-linked binaries, the
+ location of code start is randomized. This is the default if the
+ ``CONFIG_COMPAT_BRK`` option is enabled.
+
+2 Additionally enable heap randomization. This is the default if
+ ``CONFIG_COMPAT_BRK`` is disabled.
+
+ There are a few legacy applications out there (such as some ancient
+ versions of libc.so.5 from 1996) that assume that brk area starts
+ just after the end of the code+bss. These applications break when
+ start of the brk area is randomized. There are however no known
+ non-legacy applications that would be broken this way, so for most
+ systems it is safe to choose full randomization.
+
+ Systems with ancient and/or broken binaries should be configured
+ with ``CONFIG_COMPAT_BRK`` enabled, which excludes the heap from process
+ address space randomization.
+== ===========================================================================
+
+
+real-root-dev
+=============
+
+See Documentation/admin-guide/initrd.rst.
+
+
+reboot-cmd (SPARC only)
+=======================
+
+??? This seems to be a way to give an argument to the Sparc
+ROM/Flash boot loader. Maybe to tell it what to do after
+rebooting. ???
+
+
+sched_energy_aware
+==================
+
+Enables/disables Energy Aware Scheduling (EAS). EAS starts
+automatically on platforms where it can run (that is,
+platforms with asymmetric CPU topologies and having an Energy
+Model available). If your platform happens to meet the
+requirements for EAS but you do not want to use it, change
+this value to 0. On Non-EAS platforms, write operation fails and
+read doesn't return anything.
+
+task_delayacct
+===============
+
+Enables/disables task delay accounting (see
+Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
+a small amount of overhead in the scheduler but is useful for debugging
+and performance tuning. It is required by some tools such as iotop.
+
+sched_schedstats
+================
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+sched_util_clamp_min
+====================
+
+Max allowed *minimum* utilization.
+
+Default value is 1024, which is the maximum possible value.
+
+It means that any requested uclamp.min value cannot be greater than
+sched_util_clamp_min, i.e., it is restricted to the range
+[0:sched_util_clamp_min].
+
+sched_util_clamp_max
+====================
+
+Max allowed *maximum* utilization.
+
+Default value is 1024, which is the maximum possible value.
+
+It means that any requested uclamp.max value cannot be greater than
+sched_util_clamp_max, i.e., it is restricted to the range
+[0:sched_util_clamp_max].
+
+sched_util_clamp_min_rt_default
+===============================
+
+By default Linux is tuned for performance. Which means that RT tasks always run
+at the highest frequency and most capable (highest capacity) CPU (in
+heterogeneous systems).
+
+Uclamp achieves this by setting the requested uclamp.min of all RT tasks to
+1024 by default, which effectively boosts the tasks to run at the highest
+frequency and biases them to run on the biggest CPU.
+
+This knob allows admins to change the default behavior when uclamp is being
+used. In battery powered devices particularly, running at the maximum
+capacity and frequency will increase energy consumption and shorten the battery
+life.
+
+This knob is only effective for RT tasks which the user hasn't modified their
+requested uclamp.min value via sched_setattr() syscall.
+
+This knob will not escape the range constraint imposed by sched_util_clamp_min
+defined above.
+
+For example if
+
+ sched_util_clamp_min_rt_default = 800
+ sched_util_clamp_min = 600
+
+Then the boost will be clamped to 600 because 800 is outside of the permissible
+range of [0:600]. This could happen for instance if a powersave mode will
+restrict all boosts temporarily by modifying sched_util_clamp_min. As soon as
+this restriction is lifted, the requested sched_util_clamp_min_rt_default
+will take effect.
+
+seccomp
+=======
+
+See Documentation/userspace-api/seccomp_filter.rst.
+
+
+sg-big-buff
+===========
+
+This file shows the size of the generic SCSI (sg) buffer.
+You can't tune it just yet, but you could change it on
+compile time by editing ``include/scsi/sg.h`` and changing
+the value of ``SG_BIG_BUFF``.
+
+There shouldn't be any reason to change this value. If
+you can come up with one, you probably know what you
+are doing anyway :)
+
+
+shmall
+======
+
+This parameter sets the total amount of shared memory pages that can be used
+inside ipc namespace. The shared memory pages counting occurs for each ipc
+namespace separately and is not inherited. Hence, ``shmall`` should always be at
+least ``ceil(shmmax/PAGE_SIZE)``.
+
+If you are not sure what the default ``PAGE_SIZE`` is on your Linux
+system, you can run the following command::
+
+ # getconf PAGE_SIZE
+
+To reduce or disable the ability to allocate shared memory, you must create a
+new ipc namespace, set this parameter to the required value and prohibit the
+creation of a new ipc namespace in the current user namespace or cgroups can
+be used.
+
+shmmax
+======
+
+This value can be used to query and set the run time limit
+on the maximum shared memory segment size that can be created.
+Shared memory segments up to 1Gb are now supported in the
+kernel. This value defaults to ``SHMMAX``.
+
+
+shmmni
+======
+
+This value determines the maximum number of shared memory segments.
+4096 by default (``SHMMNI``).
+
+
+shm_rmid_forced
+===============
+
+Linux lets you set resource limits, including how much memory one
+process can consume, via ``setrlimit(2)``. Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits. If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination. It will
+also destroy segments that were created, but never attached to, on exit
+from the process. The only use left for ``IPC_RMID`` is to immediately
+destroy an unattached segment. Of course, this breaks the way things are
+defined, so some applications might stop working. Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, ``RLIMIT_AS`` and ``RLIMIT_NPROC``). Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+
+sysctl_writes_strict
+====================
+
+Control how file position affects the behavior of updating sysctl values
+via the ``/proc/sys`` interface:
+
+ == ======================================================================
+ -1 Legacy per-write sysctl value handling, with no printk warnings.
+ Each write syscall must fully contain the sysctl value to be
+ written, and multiple writes on the same sysctl file descriptor
+ will rewrite the sysctl value, regardless of file position.
+ 0 Same behavior as above, but warn about processes that perform writes
+ to a sysctl file descriptor when the file position is not 0.
+ 1 (default) Respect file position when writing sysctl strings. Multiple
+ writes will append to the sysctl value buffer. Anything past the max
+ length of the sysctl value buffer will be ignored. Writes to numeric
+ sysctl entries must always be at file position 0 and the value must
+ be fully contained in the buffer sent in the write syscall.
+ == ======================================================================
+
+
+softlockup_all_cpu_backtrace
+============================
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
+
+
+softlockup_panic
+=================
+
+This parameter can be used to control whether the kernel panics
+when a soft lockup is detected.
+
+= ============================================
+0 Don't panic on soft lockup.
+1 Panic on soft lockup.
+= ============================================
+
+This can also be set using the softlockup_panic kernel parameter.
+
+
+soft_watchdog
+=============
+
+This parameter can be used to control the soft lockup detector.
+
+= =================================
+0 Disable the soft lockup detector.
+1 Enable the soft lockup detector.
+= =================================
+
+The soft lockup detector monitors CPUs for threads that are hogging the CPUs
+without rescheduling voluntarily, and thus prevent the 'migration/N' threads
+from running, causing the watchdog work fail to execute. The mechanism depends
+on the CPUs ability to respond to timer interrupts which are needed for the
+watchdog work to be queued by the watchdog timer function, otherwise the NMI
+watchdog — if enabled — can detect a hard lockup condition.
+
+
+split_lock_mitigate (x86 only)
+==============================
+
+On x86, each "split lock" imposes a system-wide performance penalty. On larger
+systems, large numbers of split locks from unprivileged users can result in
+denials of service to well-behaved and potentially more important users.
+
+The kernel mitigates these bad users by detecting split locks and imposing
+penalties: forcing them to wait and only allowing one core to execute split
+locks at a time.
+
+These mitigations can make those bad applications unbearably slow. Setting
+split_lock_mitigate=0 may restore some application performance, but will also
+increase system exposure to denial of service attacks from split lock users.
+
+= ===================================================================
+0 Disable the mitigation mode - just warns the split lock on kernel log
+ and exposes the system to denials of service from the split lockers.
+1 Enable the mitigation mode (this is the default) - penalizes the split
+ lockers with intentional performance degradation.
+= ===================================================================
+
+
+stack_erasing
+=============
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with ``CONFIG_KSTACK_ERASE``.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+= ====================================================================
+0 Kernel stack erasing is disabled, KSTACK_ERASE_METRICS are not updated.
+1 Kernel stack erasing is enabled (default), it is performed before
+ returning to the userspace at the end of syscalls.
+= ====================================================================
+
+
+stop-a (SPARC only)
+===================
+
+Controls Stop-A:
+
+= ====================================
+0 Stop-A has no effect.
+1 Stop-A breaks to the PROM (default).
+= ====================================
+
+Stop-A is always enabled on a panic, so that the user can return to
+the boot PROM.
+
+
+sysrq
+=====
+
+See Documentation/admin-guide/sysrq.rst.
+
+
+tainted
+=======
+
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+====== ===== ==============================================================
+ 1 `(P)` proprietary module was loaded
+ 2 `(F)` module was force loaded
+ 4 `(S)` kernel running on an out of specification system
+ 8 `(R)` module was force unloaded
+ 16 `(M)` processor reported a Machine Check Exception (MCE)
+ 32 `(B)` bad page referenced or some unexpected page flags
+ 64 `(U)` taint requested by userspace application
+ 128 `(D)` kernel died recently, i.e. there was an OOPS or BUG
+ 256 `(A)` an ACPI table was overridden by user
+ 512 `(W)` kernel issued warning
+ 1024 `(C)` staging driver was loaded
+ 2048 `(I)` workaround for bug in platform firmware applied
+ 4096 `(O)` externally-built ("out-of-tree") module was loaded
+ 8192 `(E)` unsigned module was loaded
+ 16384 `(L)` soft lockup occurred
+ 32768 `(K)` kernel has been live patched
+ 65536 `(X)` Auxiliary taint, defined and used by for distros
+131072 `(T)` The kernel was built with the struct randomization plugin
+====== ===== ==============================================================
+
+See Documentation/admin-guide/tainted-kernels.rst for more information.
+
+Note:
+ writes to this sysctl interface will fail with ``EINVAL`` if the kernel is
+ booted with the command line option ``panic_on_taint=<bitmask>,nousertaint``
+ and any of the ORed together values being written to ``tainted`` match with
+ the bitmask declared on panic_on_taint.
+ See Documentation/admin-guide/kernel-parameters.rst for more details on
+ that particular kernel command line option and its optional
+ ``nousertaint`` switch.
+
+threads-max
+===========
+
+This value controls the maximum number of threads that can be created
+using ``fork()``.
+
+During initialization the kernel sets this value such that even if the
+maximum number of threads is created, the thread structures occupy only
+a part (1/8th) of the available RAM pages.
+
+The minimum value that can be written to ``threads-max`` is 1.
+
+The maximum value that can be written to ``threads-max`` is given by the
+constant ``FUTEX_TID_MASK`` (0x3fffffff).
+
+If a value outside of this range is written to ``threads-max`` an
+``EINVAL`` error occurs.
+
+timer_migration
+===============
+
+When set to a non-zero value, attempt to migrate timers away from idle cpus to
+allow them to remain in low power states longer.
+
+Default is set (1).
+
+traceoff_on_warning
+===================
+
+When set, disables tracing (see Documentation/trace/ftrace.rst) when a
+``WARN()`` is hit.
+
+
+tracepoint_printk
+=================
+
+When tracepoints are sent to printk() (enabled by the ``tp_printk``
+boot parameter), this entry provides runtime control::
+
+ echo 0 > /proc/sys/kernel/tracepoint_printk
+
+will stop tracepoints from being sent to printk(), and::
+
+ echo 1 > /proc/sys/kernel/tracepoint_printk
+
+will send them to printk() again.
+
+This only works if the kernel was booted with ``tp_printk`` enabled.
+
+See Documentation/admin-guide/kernel-parameters.rst and
+Documentation/trace/boottime-trace.rst.
+
+
+unaligned-trap
+==============
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently,
+``arc``, ``parisc`` and ``loongarch``), controls whether unaligned traps
+are caught and emulated (instead of failing).
+
+= ========================================================
+0 Do not emulate unaligned accesses.
+1 Emulate unaligned accesses. This is the default setting.
+= ========================================================
+
+See also `ignore-unaligned-usertrap`_.
+
+
+unknown_nmi_panic
+=================
+
+The value in this file affects behavior of handling NMI. When the
+value is non-zero, unknown NMI is trapped and then panic occurs. At
+that time, kernel debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for
+example. If a system hangs up, try pressing the NMI switch.
+
+
+unprivileged_bpf_disabled
+=========================
+
+Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF``
+will return ``-EPERM``. Once set to 1, this can't be cleared from the
+running kernel anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to ``bpf()``,
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+= =============================================================
+0 Unprivileged calls to ``bpf()`` are enabled
+1 Unprivileged calls to ``bpf()`` are disabled without recovery
+2 Unprivileged calls to ``bpf()`` are disabled
+= =============================================================
+
+
+warn_limit
+==========
+
+Number of kernel warnings after which the kernel should panic when
+``panic_on_warn`` is not set. Setting this to 0 disables checking
+the warning count. Setting this to 1 has the same effect as setting
+``panic_on_warn=1``. The default value is 0.
+
+
+watchdog
+========
+
+This parameter can be used to disable or enable the soft lockup detector
+*and* the NMI watchdog (i.e. the hard lockup detector) at the same time.
+
+= ==============================
+0 Disable both lockup detectors.
+1 Enable both lockup detectors.
+= ==============================
+
+The soft lockup detector and the NMI watchdog can also be disabled or
+enabled individually, using the ``soft_watchdog`` and ``nmi_watchdog``
+parameters.
+If the ``watchdog`` parameter is read, for example by executing::
+
+ cat /proc/sys/kernel/watchdog
+
+the output of this command (0 or 1) shows the logical OR of
+``soft_watchdog`` and ``nmi_watchdog``.
+
+
+watchdog_cpumask
+================
+
+This value can be used to control on which cpus the watchdog may run.
+The default cpumask is all possible cores, but if ``NO_HZ_FULL`` is
+enabled in the kernel config, and cores are specified with the
+``nohz_full=`` boot argument, those cores are excluded by default.
+Offline cores can be included in this mask, and if the core is later
+brought online, the watchdog will be started based on the mask value.
+
+Typically this value would only be touched in the ``nohz_full`` case
+to re-enable cores that by default were not running the watchdog,
+if a kernel lockup was suspected on those cores.
+
+The argument value is the standard cpulist format for cpumasks,
+so for example to enable the watchdog on cores 0, 2, 3, and 4 you
+might say::
+
+ echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
+
+
+watchdog_thresh
+===============
+
+This value can be used to control the frequency of hrtimer and NMI
+events and the soft and hard lockup thresholds. The default threshold
+is 10 seconds.
+
+The softlockup threshold is (``2 * watchdog_thresh``). Setting this
+tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
new file mode 100644
index 000000000000..2ef50828aff1
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -0,0 +1,519 @@
+================================
+Documentation for /proc/sys/net/
+================================
+
+Copyright
+
+Copyright (c) 1999
+
+ - Terrehon Bowden <terrehon@pacbell.net>
+ - Bodo Bauer <bb@ricochet.net>
+
+Copyright (c) 2000
+
+ - Jorge Nerin <comandante@zaralinux.com>
+
+Copyright (c) 2009
+
+ - Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/net
+
+The interface to the networking parts of the kernel is located in
+/proc/sys/net. The following table shows all possible subdirectories. You may
+see only some of them, depending on your kernel's configuration.
+
+
+Table : Subdirectories in /proc/sys/net
+
+ ========= =================== = ========== ===================
+ Directory Content Directory Content
+ ========= =================== = ========== ===================
+ 802 E802 protocol mptcp Multipath TCP
+ appletalk Appletalk protocol netfilter Network Filter
+ ax25 AX25 netrom NET/ROM
+ bridge Bridging rose X.25 PLP layer
+ core General parameter tipc TIPC
+ ethernet Ethernet protocol unix Unix domain sockets
+ ipv4 IP version 4 x25 X.25 protocol
+ ipv6 IP version 6
+ ========= =================== = ========== ===================
+
+1. /proc/sys/net/core - Network core options
+============================================
+
+bpf_jit_enable
+--------------
+
+This enables the BPF Just in Time (JIT) compiler. BPF is a flexible
+and efficient infrastructure allowing to execute bytecode at various
+hook points. It is used in a number of Linux kernel subsystems such
+as networking (e.g. XDP, tc), tracing (e.g. kprobes, uprobes, tracepoints)
+and security (e.g. seccomp). LLVM has a BPF back end that can compile
+restricted C into a sequence of BPF instructions. After program load
+through bpf(2) and passing a verifier in the kernel, a JIT will then
+translate these BPF proglets into native CPU instructions. There are
+two flavors of JITs, the newer eBPF JIT currently supported on:
+
+ - x86_64
+ - x86_32
+ - arm64
+ - arm32
+ - ppc64
+ - ppc32
+ - sparc64
+ - mips64
+ - s390x
+ - riscv64
+ - riscv32
+ - loongarch64
+ - arc
+
+And the older cBPF JIT supported on the following archs:
+
+ - mips
+ - sparc
+
+eBPF JITs are a superset of cBPF JITs, meaning the kernel will
+migrate cBPF instructions into eBPF instructions and then JIT
+compile them transparently. Older cBPF JITs can only translate
+tcpdump filters, seccomp rules, etc, but not mentioned eBPF
+programs loaded through bpf(2).
+
+Values:
+
+ - 0 - disable the JIT (default value)
+ - 1 - enable the JIT
+ - 2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
+bpf_jit_harden
+--------------
+
+This enables hardening for the BPF JIT compiler. Supported are eBPF
+JIT backends. Enabling hardening trades off performance, but can
+mitigate JIT spraying.
+
+Values:
+
+ - 0 - disable JIT hardening (default value)
+ - 1 - enable JIT hardening for unprivileged users only
+ - 2 - enable JIT hardening for all users
+
+where "privileged user" in this context means a process having
+CAP_BPF or CAP_SYS_ADMIN in the root user name space.
+
+bpf_jit_kallsyms
+----------------
+
+When BPF JIT compiler is enabled, then compiled images are unknown
+addresses to the kernel, meaning they neither show up in traces nor
+in /proc/kallsyms. This enables export of these addresses, which can
+be used for debugging/tracing. If bpf_jit_harden is enabled, this
+feature is disabled.
+
+Values :
+
+ - 0 - disable JIT kallsyms export (default value)
+ - 1 - enable JIT kallsyms export for privileged users only
+
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
+dev_weight
+----------
+
+The maximum number of packets that kernel can handle on a NAPI interrupt,
+it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
+aggregated packet is counted as one packet in this context.
+
+Default: 64
+
+dev_weight_rx_bias
+------------------
+
+RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
+of the driver for the per softirq cycle netdev_budget. This parameter influences
+the proportion of the configured netdev_budget that is spent on RPS based packet
+processing during RX softirq cycles. It is further meant for making current
+dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
+(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
+on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
+
+Default: 1
+
+dev_weight_tx_bias
+------------------
+
+Scales the maximum number of packets that can be processed during a TX softirq cycle.
+Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
+net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
+
+Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
+
+Default: 1
+
+default_qdisc
+-------------
+
+The default queuing discipline to use for network devices. This allows
+overriding the default of pfifo_fast with an alternative. Since the default
+queuing discipline is created without additional parameters so is best suited
+to queuing disciplines that work well without configuration like stochastic
+fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use
+queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin
+which require setting up classes and bandwidths. Note that physical multiqueue
+interfaces still use mq as root qdisc, which in turn uses this default for its
+leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead
+default to noqueue.
+
+Default: pfifo_fast
+
+busy_read
+---------
+
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for packets on the device queue.
+This sets the default value of the SO_BUSY_POLL socket option.
+Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
+which is the preferred method of enabling. If you need to enable the feature
+globally via sysctl, a value of 50 is recommended.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+busy_poll
+----------------
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for events.
+Recommended value depends on the number of sockets you poll on.
+For several sockets 50, for several hundreds 100.
+For more than that you probably want to use epoll.
+Note that only sockets with SO_BUSY_POLL set will be busy polled,
+so you want to either selectively set SO_BUSY_POLL on those sockets or set
+sysctl.net.busy_read globally.
+
+Will increase power usage.
+
+Default: 0 (off)
+
+mem_pcpu_rsv
+------------
+
+Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU.
+
+rmem_default
+------------
+
+The default setting of the socket receive buffer in bytes.
+
+rmem_max
+--------
+
+The maximum receive socket buffer size in bytes.
+
+Default: 4194304
+
+rps_default_mask
+----------------
+
+The default RPS CPU mask used on newly created network devices. An empty
+mask means RPS disabled by default.
+
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+
+Default: 1 (on)
+
+
+wmem_default
+------------
+
+The default setting (in bytes) of the socket send buffer.
+
+wmem_max
+--------
+
+The maximum send socket buffer size in bytes.
+
+Default: 4194304
+
+message_burst and message_cost
+------------------------------
+
+These parameters are used to limit the warning messages written to the kernel
+log from the networking code. They enforce a rate limit to make a
+denial-of-service attack impossible. A higher message_cost factor, results in
+fewer messages that will be written. Message_burst controls when messages will
+be dropped. The default settings limit warning messages to one every five
+seconds.
+
+warnings
+--------
+
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
+
+netdev_budget
+-------------
+
+Maximum number of packets taken from all interfaces in one polling cycle (NAPI
+poll). In one polling cycle interfaces which are registered to polling are
+probed in a round-robin manner. Also, a polling cycle may not exceed
+netdev_budget_usecs microseconds, even if netdev_budget has not been
+exhausted.
+
+netdev_budget_usecs
+---------------------
+
+Maximum number of microseconds in one NAPI polling cycle. Polling
+will exit when either netdev_budget_usecs have elapsed during the
+poll cycle or the number of packets processed reaches netdev_budget.
+
+netdev_max_backlog
+------------------
+
+Maximum number of packets, queued on the INPUT side, when the interface
+receives packets faster than kernel can process them.
+
+netdev_rss_key
+--------------
+
+RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
+randomly generated.
+Some user space might need to gather its content even if drivers do not
+provide ethtool -x support yet.
+
+::
+
+ myhost:~# cat /proc/sys/net/core/netdev_rss_key
+ 84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+
+File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+
+Note:
+ /proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+ but most drivers only use 40 bytes of it.
+
+::
+
+ myhost:~# ethtool -x eth0
+ RX flow hash indirection table for eth0 with 8 RX ring(s):
+ 0: 0 1 2 3 4 5 6 7
+ RSS hash key:
+ 84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
+netdev_unregister_timeout_secs
+------------------------------
+
+Unregister network device timeout in seconds.
+This option controls the timeout (in seconds) used to issue a warning while
+waiting for a network device refcount to drop to 0 during device
+unregistration. A lower value may be useful during bisection to detect
+a leaked reference faster. A larger value may be useful to prevent false
+warnings on slow/loaded systems.
+Default value is 10, minimum 1, maximum 3600.
+
+skb_defer_max
+-------------
+
+Max size (in skbs) of the per-cpu list of skbs being freed
+by the cpu which allocated them. Used by TCP stack so far.
+
+Default: 64
+
+optmem_max
+----------
+
+Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
+of struct cmsghdr structures with appended data. TCP tx zerocopy also uses
+optmem_max as a limit for its internal structures.
+
+Default : 128 KB
+
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created. There are 3 possibilities
+(a) value = 0; respective fallback tunnels are created when module is
+loaded in every net namespaces (backward compatible behavior).
+(b) value = 1; [kcmd value: initns] respective fallback tunnels are
+created only in init net namespace and every other net namespace will
+not have them.
+(c) value = 2; [kcmd value: none] fallback tunnels are not created
+when a module is loaded in any of the net namespace. Setting value to
+"2" is pointless after boot if these modules are built-in, so there is
+a kernel command-line option that can change this default. Please refer to
+Documentation/admin-guide/kernel-parameters.txt for additional details.
+
+Not creating fallback tunnels gives control to userspace to create
+whatever is needed only and avoid creating devices which are redundant.
+
+Default : 0 (for compatibility reasons)
+
+devconf_inherit_init_net
+------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values. If set to 3, both IPv4 and IPv6
+settings are forced to inherit from current ones in the netns where this
+new netns has been created.
+
+Default : 0 (for compatibility reasons)
+
+txrehash
+--------
+
+Controls default hash rethink behaviour on socket when SO_TXREHASH option is set
+to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+
+If set to 1 (default), hash rethink is performed on listening socket.
+If set to 0, hash rethink is not performed.
+
+gro_normal_batch
+----------------
+
+Maximum number of the segments to batch up on output of GRO. When a packet
+exits GRO, either as a coalesced superframe or as an original packet which
+GRO has decided not to coalesce, it is placed on a per-NAPI list. This
+list is then passed to the stack when the number of segments reaches the
+gro_normal_batch limit.
+
+high_order_alloc_disable
+------------------------
+
+By default the allocator for page frags tries to use high order pages (order-3
+on x86). While the default behavior gives good results in most cases, some users
+might have hit a contention in page allocations/freeing. This was especially
+true on older kernels (< 5.14) when high-order pages were not stored on per-cpu
+lists. This allows to opt-in for order-0 allocation instead but is now mostly of
+historical importance.
+
+Default: 0
+
+2. /proc/sys/net/unix - Parameters for Unix domain sockets
+----------------------------------------------------------
+
+There is only one file in this directory.
+unix_dgram_qlen limits the max number of datagrams queued in Unix domain
+socket's buffer. It will not take effect unless PF_UNIX flag is specified.
+
+
+3. /proc/sys/net/ipv4 - IPV4 settings
+-------------------------------------
+Please see: Documentation/networking/ip-sysctl.rst and
+Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
+
+
+4. Appletalk
+------------
+
+The /proc/sys/net/appletalk directory holds the Appletalk configuration data
+when Appletalk is loaded. The configurable parameters are:
+
+aarp-expiry-time
+----------------
+
+The amount of time we keep an ARP entry before expiring it. Used to age out
+old hosts.
+
+aarp-resolve-time
+-----------------
+
+The amount of time we will spend trying to resolve an Appletalk address.
+
+aarp-retransmit-limit
+---------------------
+
+The number of times we will retransmit a query before giving up.
+
+aarp-tick-time
+--------------
+
+Controls the rate at which expires are checked.
+
+The directory /proc/net/appletalk holds the list of active Appletalk sockets
+on a machine.
+
+The fields indicate the DDP type, the local address (in network:node format)
+the remote address, the size of the transmit pending queue, the size of the
+received queue (bytes waiting for applications to read) the state and the uid
+owning the socket.
+
+/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
+shows the name of the interface, its Appletalk address, the network range on
+that address (or network number for phase 1 networks), and the status of the
+interface.
+
+/proc/net/atalk_route lists each known network route. It lists the target
+(network) that the route leads to, the router (may be directly connected), the
+route flags, and the device the route is using.
+
+5. TIPC
+-------
+
+tipc_rmem
+---------
+
+The TIPC protocol now has a tunable for the receive memory, similar to the
+tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
+
+::
+
+ # cat /proc/sys/net/tipc/tipc_rmem
+ 4252725 34021800 68043600
+ #
+
+The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
+are scaled (shifted) versions of that same value. Note that the min value
+is not at this point in time used in any meaningful way, but the triplet is
+preserved in order to be consistent with things like tcp_rmem.
+
+named_timeout
+-------------
+
+TIPC name table updates are distributed asynchronously in a cluster, without
+any form of transaction handling. This means that different race scenarios are
+possible. One such is that a name withdrawal sent out by one node and received
+by another node may arrive after a second, overlapping name publication already
+has been accepted from a third node, although the conflicting updates
+originally may have been issued in the correct sequential order.
+If named_timeout is nonzero, failed topology updates will be placed on a defer
+queue until another event arrives that clears the error, or until the timeout
+expires. Value is in milliseconds.
diff --git a/Documentation/admin-guide/sysctl/sunrpc.rst b/Documentation/admin-guide/sysctl/sunrpc.rst
new file mode 100644
index 000000000000..09780a682afd
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/sunrpc.rst
@@ -0,0 +1,25 @@
+===================================
+Documentation for /proc/sys/sunrpc/
+===================================
+
+kernel version 2.2.10
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to (re)set the debug
+flags of the SUN Remote Procedure Call (RPC) subsystem in
+the Linux kernel. This stuff is used for NFS, KNFSD and
+maybe a few other things as well.
+
+The files in there are used to control the debugging flags:
+rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
+
+These flags are for kernel hackers only. You should read the
+source code in net/sunrpc/ for more information.
diff --git a/Documentation/admin-guide/sysctl/user.rst b/Documentation/admin-guide/sysctl/user.rst
new file mode 100644
index 000000000000..c45824589339
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/user.rst
@@ -0,0 +1,84 @@
+=================================
+Documentation for /proc/sys/user/
+=================================
+
+kernel version 4.9.0
+
+Copyright (c) 2016 Eric Biederman <ebiederm@xmission.com>
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/user.
+
+The files in this directory can be used to override the default
+limits on the number of namespaces and other objects that have
+per user per user namespace limits.
+
+The primary purpose of these limits is to stop programs that
+malfunction and attempt to create a ridiculous number of objects,
+before the malfunction becomes a system wide problem. It is the
+intention that the defaults of these limits are set high enough that
+no program in normal operation should run into these limits.
+
+The creation of per user per user namespace objects are charged to
+the user in the user namespace who created the object and
+verified to be below the per user limit in that user namespace.
+
+The creation of objects is also charged to all of the users
+who created user namespaces the creation of the object happens
+in (user namespaces can be nested) and verified to be below the per user
+limits in the user namespaces of those users.
+
+This recursive counting of created objects ensures that creating a
+user namespace does not allow a user to escape their current limits.
+
+Currently, these files are in /proc/sys/user:
+
+max_cgroup_namespaces
+=====================
+
+ The maximum number of cgroup namespaces that any user in the current
+ user namespace may create.
+
+max_ipc_namespaces
+==================
+
+ The maximum number of ipc namespaces that any user in the current
+ user namespace may create.
+
+max_mnt_namespaces
+==================
+
+ The maximum number of mount namespaces that any user in the current
+ user namespace may create.
+
+max_net_namespaces
+==================
+
+ The maximum number of network namespaces that any user in the
+ current user namespace may create.
+
+max_pid_namespaces
+==================
+
+ The maximum number of pid namespaces that any user in the current
+ user namespace may create.
+
+max_time_namespaces
+===================
+
+ The maximum number of time namespaces that any user in the current
+ user namespace may create.
+
+max_user_namespaces
+===================
+
+ The maximum number of user namespaces that any user in the current
+ user namespace may create.
+
+max_uts_namespaces
+==================
+
+ The maximum number of user namespaces that any user in the current
+ user namespace may create.
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
new file mode 100644
index 000000000000..4d71211fdad8
--- /dev/null
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -0,0 +1,1125 @@
+===============================
+Documentation for /proc/sys/vm/
+===============================
+
+kernel version 2.6.29
+
+Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+Copyright (c) 2008 Peter W. Morreale <pmorreale@novell.com>
+
+For general info and legal blurb, please look in index.rst.
+
+------------------------------------------------------------------------------
+
+This file contains the documentation for the sysctl files in
+/proc/sys/vm and is valid for Linux kernel version 2.6.29.
+
+The files in this directory can be used to tune the operation
+of the virtual memory (VM) subsystem of the Linux kernel and
+the writeout of dirty data to disk.
+
+Default values and initialization routines for most of these
+files can be found in mm/swap.c.
+
+Currently, these files are in /proc/sys/vm:
+
+- admin_reserve_kbytes
+- compact_memory
+- compaction_proactiveness
+- compact_unevictable_allowed
+- defrag_mode
+- dirty_background_bytes
+- dirty_background_ratio
+- dirty_bytes
+- dirty_expire_centisecs
+- dirty_ratio
+- dirtytime_expire_seconds
+- dirty_writeback_centisecs
+- drop_caches
+- enable_soft_offline
+- extfrag_threshold
+- highmem_is_dirtyable
+- hugetlb_shm_group
+- laptop_mode
+- legacy_va_layout
+- lowmem_reserve_ratio
+- max_map_count
+- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y)
+- memory_failure_early_kill
+- memory_failure_recovery
+- min_free_kbytes
+- min_slab_ratio
+- min_unmapped_ratio
+- mmap_min_addr
+- mmap_rnd_bits
+- mmap_rnd_compat_bits
+- nr_hugepages
+- nr_hugepages_mempolicy
+- nr_overcommit_hugepages
+- nr_trim_pages (only if CONFIG_MMU=n)
+- numa_zonelist_order
+- oom_dump_tasks
+- oom_kill_allocating_task
+- overcommit_kbytes
+- overcommit_memory
+- overcommit_ratio
+- page-cluster
+- page_lock_unfairness
+- panic_on_oom
+- percpu_pagelist_high_fraction
+- stat_interval
+- stat_refresh
+- numa_stat
+- swappiness
+- unprivileged_userfaultfd
+- user_reserve_kbytes
+- vfs_cache_pressure
+- vfs_cache_pressure_denom
+- watermark_boost_factor
+- watermark_scale_factor
+- zone_reclaim_mode
+
+
+admin_reserve_kbytes
+====================
+
+The amount of free memory in the system that should be reserved for users
+with the capability cap_sys_admin.
+
+admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
+
+That should provide enough for the admin to log in and kill a process,
+if necessary, under the default overcommit 'guess' mode.
+
+Systems running under overcommit 'never' should increase this to account
+for the full Virtual Memory Size of programs used to recover. Otherwise,
+root may not be able to log in to recover the system.
+
+How do you calculate a minimum useful reserve?
+
+sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
+
+For overcommit 'guess', we can sum resident set sizes (RSS).
+On x86_64 this is about 8MB.
+
+For overcommit 'never', we can take the max of their virtual sizes (VSZ)
+and add the sum of their RSS.
+On x86_64 this is about 128MB.
+
+Changing this takes effect whenever an application requests memory.
+
+
+compact_memory
+==============
+
+Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
+all zones are compacted such that free memory is available in contiguous
+blocks where possible. This can be important for example in the allocation of
+huge pages although processes will also directly compact memory as required.
+
+compaction_proactiveness
+========================
+
+This tunable takes a value in the range [0, 100] with a default value of
+20. This tunable determines how aggressively compaction is done in the
+background. Write of a non zero value to this tunable will immediately
+trigger the proactive compaction. Setting it to 0 disables proactive compaction.
+
+Note that compaction has a non-trivial system-wide impact as pages
+belonging to different processes are moved around, which could also lead
+to latency spikes in unsuspecting applications. The kernel employs
+various heuristics to avoid wasting CPU cycles if it detects that
+proactive compaction is not being effective.
+
+Setting the value above 80 will, in addition to lowering the acceptable level
+of fragmentation, make the compaction code more sensitive to increases in
+fragmentation, i.e. compaction will trigger more often, but reduce
+fragmentation by a smaller amount.
+This makes the fragmentation level more stable over time.
+
+Be careful when setting it to extreme values like 100, as that may
+cause excessive background compaction activity.
+
+compact_unevictable_allowed
+===========================
+
+Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
+allowed to examine the unevictable lru (mlocked pages) for pages to compact.
+This should be used on systems where stalls for minor page faults are an
+acceptable trade for large contiguous free memory. Set to 0 to prevent
+compaction from moving pages that are unevictable. Default value is 1.
+On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
+to compaction, which would block the task from becoming active until the fault
+is resolved.
+
+defrag_mode
+===========
+
+When set to 1, the page allocator tries harder to avoid fragmentation
+and maintain the ability to produce huge pages / higher-order pages.
+
+It is recommended to enable this right after boot, as fragmentation,
+once it occurred, can be long-lasting or even permanent.
+
+dirty_background_bytes
+======================
+
+Contains the amount of dirty memory at which the background kernel
+flusher threads will start writeback.
+
+Note:
+ dirty_background_bytes is the counterpart of dirty_background_ratio. Only
+ one of them may be specified at a time. When one sysctl is written it is
+ immediately taken into account to evaluate the dirty memory limits and the
+ other appears as 0 when read.
+
+
+dirty_background_ratio
+======================
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which the background kernel
+flusher threads will start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirty_bytes
+===========
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
+specified at a time. When one sysctl is written it is immediately taken into
+account to evaluate the dirty memory limits and the other appears as 0 when
+read.
+
+Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
+value lower than this limit will be ignored and the old configuration will be
+retained.
+
+
+dirty_expire_centisecs
+======================
+
+This tunable is used to define when dirty data is old enough to be eligible
+for writeout by the kernel flusher threads. It is expressed in 100'ths
+of a second. Data which has been dirty in-memory for longer than this
+interval will be written out next time a flusher thread wakes up.
+
+
+dirty_ratio
+===========
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which a process which is
+generating disk writes will itself start writing out dirty data.
+
+The total available memory is not equal to total system memory.
+
+
+dirtytime_expire_seconds
+========================
+
+When a lazytime inode is constantly having its pages dirtied, the inode with
+an updated timestamp will never get chance to be written out. And, if the
+only thing that has happened on the file system is a dirtytime inode caused
+by an atime update, a worker will be scheduled to make sure that inode
+eventually gets pushed out to disk. This tunable is used to define when dirty
+inode is old enough to be eligible for writeback by the kernel flusher threads.
+And, it is also used as the interval to wakeup dirtytime_writeback thread.
+
+
+dirty_writeback_centisecs
+=========================
+
+The kernel flusher threads will periodically wake up and write `old` data
+out to disk. This tunable expresses the interval between those wakeups, in
+100'ths of a second.
+
+Setting this to zero disables periodic writeback altogether.
+
+
+drop_caches
+===========
+
+Writing to this will cause the kernel to drop clean caches, as well as
+reclaimable slab objects like dentries and inodes. Once dropped, their
+memory becomes free.
+
+To free pagecache::
+
+ echo 1 > /proc/sys/vm/drop_caches
+
+To free reclaimable slab objects (includes dentries and inodes)::
+
+ echo 2 > /proc/sys/vm/drop_caches
+
+To free slab objects and pagecache::
+
+ echo 3 > /proc/sys/vm/drop_caches
+
+This is a non-destructive operation and will not free any dirty objects.
+To increase the number of objects freed by this operation, the user may run
+`sync` prior to writing to /proc/sys/vm/drop_caches. This will minimize the
+number of dirty objects on the system and create more candidates to be
+dropped.
+
+This file is not a means to control the growth of the various kernel caches
+(inodes, dentries, pagecache, etc...) These objects are automatically
+reclaimed by the kernel when memory is needed elsewhere on the system.
+
+Use of this file can cause performance problems. Since it discards cached
+objects, it may cost a significant amount of I/O and CPU to recreate the
+dropped objects, especially if they were under heavy use. Because of this,
+use outside of a testing or debugging environment is not recommended.
+
+You may see informational messages in your kernel log when this file is
+used::
+
+ cat (1234): drop_caches: 3
+
+These are informational only. They do not mean that anything is wrong
+with your system. To disable them, echo 4 (bit 2) into drop_caches.
+
+enable_soft_offline
+===================
+Correctable memory errors are very common on servers. Soft-offline is kernel's
+solution for memory pages having (excessive) corrected memory errors.
+
+For different types of page, soft-offline has different behaviors / costs.
+
+- For a raw error page, soft-offline migrates the in-use page's content to
+ a new raw page.
+
+- For a page that is part of a transparent hugepage, soft-offline splits the
+ transparent hugepage into raw pages, then migrates only the raw error page.
+ As a result, user is transparently backed by 1 less hugepage, impacting
+ memory access performance.
+
+- For a page that is part of a HugeTLB hugepage, soft-offline first migrates
+ the entire HugeTLB hugepage, during which a free hugepage will be consumed
+ as migration target. Then the original hugepage is dissolved into raw
+ pages without compensation, reducing the capacity of the HugeTLB pool by 1.
+
+It is user's call to choose between reliability (staying away from fragile
+physical memory) vs performance / capacity implications in transparent and
+HugeTLB cases.
+
+For all architectures, enable_soft_offline controls whether to soft offline
+memory pages. When set to 1, kernel attempts to soft offline the pages
+whenever it thinks needed. When set to 0, kernel returns EOPNOTSUPP to
+the request to soft offline the pages. Its default value is 1.
+
+It is worth mentioning that after setting enable_soft_offline to 0, the
+following requests to soft offline pages will not be performed:
+
+- Request to soft offline pages from RAS Correctable Errors Collector.
+
+- On ARM, the request to soft offline pages from GHES driver.
+
+- On PARISC, the request to soft offline pages from Page Deallocation Table.
+
+extfrag_threshold
+=================
+
+This parameter affects whether the kernel will compact memory or direct
+reclaim to satisfy a high-order allocation. The extfrag/extfrag_index file in
+debugfs shows what the fragmentation index for each order is in each zone in
+the system. Values tending towards 0 imply allocations would fail due to lack
+of memory, values towards 1000 imply failures are due to fragmentation and -1
+implies that the allocation will succeed as long as watermarks are met.
+
+The kernel will not compact memory in a zone if the
+fragmentation index is <= extfrag_threshold. The default value is 500.
+
+
+highmem_is_dirtyable
+====================
+
+Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
+
+This parameter controls whether the high memory is considered for dirty
+writers throttling. This is not the case by default which means that
+only the amount of memory directly visible/usable by the kernel can
+be dirtied. As a result, on systems with a large amount of memory and
+lowmem basically depleted writers might be throttled too early and
+streaming writes can get very slow.
+
+Changing the value to non zero would allow more memory to be dirtied
+and thus allow writers to write more data which can be flushed to the
+storage more effectively. Note this also comes with a risk of pre-mature
+OOM killer because some writers (e.g. direct block device writes) can
+only use the low memory and they can fill it up with dirty data without
+any throttling.
+
+
+hugetlb_shm_group
+=================
+
+hugetlb_shm_group contains group id that is allowed to create SysV
+shared memory segment using hugetlb page.
+
+
+laptop_mode
+===========
+
+laptop_mode is a knob that controls "laptop mode". All the things that are
+controlled by this knob are discussed in Documentation/admin-guide/laptops/laptop-mode.rst.
+
+
+legacy_va_layout
+================
+
+If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
+will use the legacy (2.4) layout for all processes.
+
+
+lowmem_reserve_ratio
+====================
+
+For some specialised workloads on highmem machines it is dangerous for
+the kernel to allow process memory to be allocated from the "lowmem"
+zone. This is because that memory could then be pinned via the mlock()
+system call, or by unavailability of swapspace.
+
+And on large highmem machines this lack of reclaimable lowmem memory
+can be fatal.
+
+So the Linux page allocator has a mechanism which prevents allocations
+which *could* use highmem from using too much lowmem. This means that
+a certain amount of lowmem is defended from the possibility of being
+captured into pinned user memory.
+
+(The same argument applies to the old 16 megabyte ISA DMA region. This
+mechanism will also defend that region from allocations which could use
+highmem or lowmem).
+
+The `lowmem_reserve_ratio` tunable determines how aggressive the kernel is
+in defending these lower zones.
+
+If you have a machine which uses highmem or ISA DMA and your
+applications are using mlock(), or if you are running with no swap then
+you probably should change the lowmem_reserve_ratio setting.
+
+The lowmem_reserve_ratio is an array. You can see them by reading this file::
+
+ % cat /proc/sys/vm/lowmem_reserve_ratio
+ 256 256 32
+
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like the following. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this::
+
+ Node 0, zone DMA
+ pages free 1355
+ min 3
+ low 3
+ high 4
+ :
+ :
+ numa_other 0
+ protection: (0, 2004, 2004, 2004)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ pagesets
+ cpu: 0 pcp: 0
+ :
+
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
+not be used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following expression::
+
+ (i < j):
+ zone[i]->protection[j]
+ = (total sums of managed_pages from zone[i+1] to zone[j] on the node)
+ / lowmem_reserve_ratio[i];
+ (i = j):
+ (should not be protected. = 0;
+ (i > j):
+ (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+
+ === ====================================
+ 256 (if zone[i] means DMA or DMA32 zone)
+ 32 (others)
+ === ====================================
+
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total managed
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
+
+
+max_map_count
+=============
+
+This file contains the maximum number of memory map areas a process
+may have. Memory map areas are used as a side-effect of calling
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
+
+While most applications need less than a thousand maps, certain
+programs, particularly malloc debuggers, may consume lots of them,
+e.g., up to one or two maps per allocation.
+
+The default value is 65530.
+
+
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disable memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
+memory_failure_early_kill
+=========================
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other up-to-date copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected. Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+
+memory_failure_recovery
+=======================
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
+
+min_free_kbytes
+===============
+
+This is used to force the Linux VM to keep a minimum number
+of kilobytes free. The VM uses this number to compute a
+watermark[WMARK_MIN] value for each lowmem zone in the system.
+Each lowmem zone gets a number of reserved free pages based
+proportionally on its size.
+
+Some minimal amount of memory is needed to satisfy PF_MEMALLOC
+allocations; if you set this to lower than 1024KB, your system will
+become subtly broken, and prone to deadlock under high loads.
+
+Setting this too high will OOM your machine instantly.
+
+
+min_slab_ratio
+==============
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone. On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+
+min_unmapped_ratio
+==================
+
+This is available only on NUMA kernels.
+
+This is a percentage of the total pages in each zone. Zone reclaim will
+only occur if more than this percentage of pages are in a state that
+zone_reclaim_mode allows to be reclaimed.
+
+If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
+against all file-backed unmapped pages including swapcache pages and tmpfs
+files. Otherwise, only unmapped pages backed by normal files but not tmpfs
+files and similar are considered.
+
+The default is 1 percent.
+
+
+mmap_min_addr
+=============
+
+This file indicates the amount of address space which a user process will
+be restricted from mmapping. Since kernel null dereference bugs could
+accidentally operate based on the information in the first couple of pages
+of memory userspace processes should not be allowed to write to them. By
+default this value is set to 0 and no protections will be enforced by the
+security module. Setting this value to something like 64k will allow the
+vast majority of applications to work correctly and provide defense in depth
+against future potential kernel bugs.
+
+
+mmap_rnd_bits
+=============
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations on architectures which support
+tuning address space randomization. This value will be bounded
+by the architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_bits tunable
+
+
+mmap_rnd_compat_bits
+====================
+
+This value can be used to select the number of bits to use to
+determine the random offset to the base address of vma regions
+resulting from mmap allocations for applications run in
+compatibility mode on architectures which support tuning address
+space randomization. This value will be bounded by the
+architecture's minimum and maximum supported values.
+
+This value can be changed after boot using the
+/proc/sys/vm/mmap_rnd_compat_bits tunable
+
+
+nr_hugepages
+============
+
+Change the minimum size of the hugepage pool.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+hugetlb_optimize_vmemmap
+========================
+
+This knob is not available when the size of 'struct page' (a structure defined
+in include/linux/mm_types.h) is not power of two (an unusual system config could
+result in this).
+
+Enable (set to 1) or disable (set to 0) HugeTLB Vmemmap Optimization (HVO).
+
+Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
+buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages
+per 1GB HugeTLB page), whereas already allocated HugeTLB pages will not be
+optimized. When those optimized HugeTLB pages are freed from the HugeTLB pool
+to the buddy allocator, the vmemmap pages representing that range needs to be
+remapped again and the vmemmap pages discarded earlier need to be rellocated
+again. If your use case is that HugeTLB pages are allocated 'on the fly' (e.g.
+never explicitly allocating HugeTLB pages with 'nr_hugepages' but only set
+'nr_overcommit_hugepages', those overcommitted HugeTLB pages are allocated 'on
+the fly') instead of being pulled from the HugeTLB pool, you should weigh the
+benefits of memory savings against the more overhead (~2x slower than before)
+of allocation or freeing HugeTLB pages between the HugeTLB pool and the buddy
+allocator. Another behavior to note is that if the system is under heavy memory
+pressure, it could prevent the user from freeing HugeTLB pages from the HugeTLB
+pool to the buddy allocator since the allocation of vmemmap pages could be
+failed, you have to retry later if your system encounter this situation.
+
+Once disabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
+buddy allocator will not be optimized meaning the extra overhead at allocation
+time from buddy allocator disappears, whereas already optimized HugeTLB pages
+will not be affected. If you want to make sure there are no optimized HugeTLB
+pages, you can set "nr_hugepages" to 0 first and then disable this. Note that
+writing 0 to nr_hugepages will make any "in use" HugeTLB pages become surplus
+pages. So, those surplus pages are still optimized until they are no longer
+in use. You would need to wait for those surplus pages to be released before
+there are no optimized pages in the system.
+
+
+nr_hugepages_mempolicy
+======================
+
+Change the size of the hugepage pool at run-time on a specific
+set of NUMA nodes.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_overcommit_hugepages
+=======================
+
+Change the maximum size of the hugepage pool. The maximum is
+nr_hugepages + nr_overcommit_hugepages.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
+nr_trim_pages
+=============
+
+This is available only on NOMMU kernels.
+
+This value adjusts the excess page trimming behaviour of power-of-2 aligned
+NOMMU mmap allocations.
+
+A value of 0 disables trimming of allocations entirely, while a value of 1
+trims excess pages aggressively. Any value >= 1 acts as the watermark where
+trimming of allocations is initiated.
+
+The default value is 1.
+
+See Documentation/admin-guide/mm/nommu-mmap.rst for more information.
+
+
+numa_zonelist_order
+===================
+
+This sysctl is only for NUMA and it is deprecated. Anything but
+Node order will fail!
+
+'where the memory is allocated from' is controlled by zonelists.
+
+(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
+you may be able to read ZONE_DMA as ZONE_DMA32...)
+
+In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
+ZONE_NORMAL -> ZONE_DMA
+This means that a memory allocation request for GFP_KERNEL will
+get memory from ZONE_DMA only when ZONE_NORMAL is not available.
+
+In NUMA case, you can think of following 2 types of order.
+Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL::
+
+ (A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
+ (B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
+
+Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
+will be used before ZONE_NORMAL exhaustion. This increases possibility of
+out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
+
+Type(B) cannot offer the best locality but is more robust against OOM of
+the DMA zone.
+
+Type(A) is called as "Node" order. Type (B) is "Zone" order.
+
+"Node order" orders the zonelists by node, then by zone within each node.
+Specify "[Nn]ode" for node order
+
+"Zone Order" orders the zonelists by zone type, then by node within each
+zone. Specify "[Zz]one" for zone order.
+
+Specify "[Dd]efault" to request automatic configuration.
+
+On 32-bit, the Normal zone needs to be preserved for allocations accessible
+by the kernel, so "zone" order will be selected.
+
+On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
+order will be selected.
+
+Default order is recommended unless this is causing problems for your
+system/application.
+
+
+oom_dump_tasks
+==============
+
+Enables a system-wide task dump (excluding kernel threads) to be produced
+when the kernel performs an OOM-killing and includes such information as
+pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
+score, and name. This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
+
+If this is set to zero, this information is suppressed. On very
+large systems with thousands of tasks it may not be feasible to dump
+the memory state information for each one. Such systems should not
+be forced to incur a performance penalty in OOM conditions when the
+information may not be desired.
+
+If this is set to non-zero, this information is shown whenever the
+OOM killer actually kills a memory-hogging task.
+
+The default value is 1 (enabled).
+
+
+oom_kill_allocating_task
+========================
+
+This enables or disables killing the OOM-triggering task in
+out-of-memory situations.
+
+If this is set to zero, the OOM killer will scan through the entire
+tasklist and select a task based on heuristics to kill. This normally
+selects a rogue memory-hogging task that frees up a large amount of
+memory when killed.
+
+If this is set to non-zero, the OOM killer simply kills the task that
+triggered the out-of-memory condition. This avoids the expensive
+tasklist scan.
+
+If panic_on_oom is selected, it takes precedence over whatever value
+is used in oom_kill_allocating_task.
+
+The default value is 0.
+
+
+overcommit_kbytes
+=================
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+
+overcommit_memory
+=================
+
+This value contains a flag that enables memory overcommitment.
+
+When this flag is 0, the kernel compares the userspace memory request
+size against total memory plus swap and rejects obvious overcommits.
+
+When this flag is 1, the kernel pretends there is always enough
+memory until it actually runs out.
+
+When this flag is 2, the kernel uses a "never overcommit"
+policy that attempts to prevent any overcommit of memory.
+Note that user_reserve_kbytes affects this policy.
+
+This feature can be very useful because there are a lot of
+programs that malloc() huge amounts of memory "just-in-case"
+and don't use much of it.
+
+The default value is 0.
+
+See Documentation/mm/overcommit-accounting.rst and
+mm/util.c::__vm_enough_memory() for more information.
+
+
+overcommit_ratio
+================
+
+When overcommit_memory is set to 2, the committed address
+space is not permitted to exceed swap plus this percentage
+of physical RAM. See above.
+
+
+page-cluster
+============
+
+page-cluster controls the number of pages up to which consecutive pages
+are read in from swap in a single attempt. This is the swap counterpart
+to page cache readahead.
+The mentioned consecutivity is not in terms of virtual/physical addresses,
+but consecutive on swap space - that means they were swapped out together.
+
+It is a logarithmic value - setting it to zero means "1 page", setting
+it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
+Zero disables swap readahead completely.
+
+The default value is three (eight pages at a time). There may be some
+small benefits in tuning this to a different value if your workload is
+swap-intensive.
+
+Lower values mean lower latencies for initial faults, but at the same time
+extra faults and I/O delays for following faults if they would have been part of
+that consecutive pages readahead would have brought in.
+
+
+page_lock_unfairness
+====================
+
+This value determines the number of times that the page lock can be
+stolen from under a waiter. After the lock is stolen the number of times
+specified in this file (default is 5), the "fair lock handoff" semantics
+will apply, and the waiter will only be awakened if the lock can be taken.
+
+panic_on_oom
+============
+
+This enables or disables panic on out-of-memory feature.
+
+If this is set to 0, the kernel will kill some rogue process,
+called oom_killer. Usually, oom_killer can kill rogue processes and
+system will survive.
+
+If this is set to 1, the kernel panics when out-of-memory happens.
+However, if a process limits using nodes by mempolicy/cpusets,
+and those nodes become memory exhaustion status, one process
+may be killed by oom-killer. No panic occurs in this case.
+Because other nodes' memory may be free. This means system total status
+may be not fatal yet.
+
+If this is set to 2, the kernel panics compulsorily even on the
+above-mentioned. Even oom happens under memory cgroup, the whole
+system panics.
+
+The default value is 0.
+
+1 and 2 are for failover of clustering. Please select either
+according to your policy of failover.
+
+panic_on_oom=2+kdump gives you very strong tool to investigate
+why oom happens. You can get snapshot.
+
+
+percpu_pagelist_high_fraction
+=============================
+
+This is the fraction of pages in each zone that are can be stored to
+per-cpu page lists. It is an upper boundary that is divided depending
+on the number of online CPUs. The min value for this is 8 which means
+that we do not allow more than 1/8th of pages in each zone to be stored
+on per-cpu page lists. This entry only changes the value of hot per-cpu
+page lists. A user can specify a number like 100 to allocate 1/100th of
+each zone between per-cpu lists.
+
+The batch value of each per-cpu page list remains the same regardless of
+the value of the high fraction so allocation latencies are unaffected.
+
+The initial value is zero. Kernel uses this value to set the high pcp->high
+mark based on the low watermark for the zone and the number of local
+online CPUs. If the user writes '0' to this sysctl, it will revert to
+this default behavior.
+
+
+stat_interval
+=============
+
+The time interval between which vm statistics are updated. The default
+is 1 second.
+
+
+stat_refresh
+============
+
+Any read or write (by root only) flushes all the per-cpu vm statistics
+into their global totals, for more accurate reports when testing
+e.g. cat /proc/sys/vm/stat_refresh /proc/meminfo
+
+As a side-effect, it also checks for negative totals (elsewhere reported
+as 0) and "fails" with EINVAL if any are found, with a warning in dmesg.
+(At time of writing, a few stats are known sometimes to be found negative,
+with no ill effects: errors and warnings on these stats are suppressed.)
+
+
+numa_stat
+=========
+
+This interface allows runtime configuration of numa statistics.
+
+When page allocation performance becomes a bottleneck and you can tolerate
+some possible tool breakage and decreased numa counter precision, you can
+do::
+
+ echo 0 > /proc/sys/vm/numa_stat
+
+When page allocation performance is not a bottleneck and you want all
+tooling to work, you can do::
+
+ echo 1 > /proc/sys/vm/numa_stat
+
+
+swappiness
+==========
+
+This control is used to define the rough relative IO cost of swapping
+and filesystem paging, as a value between 0 and 200. At 100, the VM
+assumes equal IO cost and will thus apply memory pressure to the page
+cache and swap-backed pages equally; lower values signify more
+expensive swap IO, higher values indicates cheaper.
+
+Keep in mind that filesystem IO patterns under memory pressure tend to
+be more efficient than swap's random IO. An optimal value will require
+experimentation and will also be workload-dependent.
+
+The default value is 60.
+
+For in-memory swap, like zram or zswap, as well as hybrid setups that
+have swap on faster devices than the filesystem, values beyond 100 can
+be considered. For example, if the random IO against the swap device
+is on average 2x faster than IO from the filesystem, swappiness should
+be 133 (x + 2x = 200, 2x = 133.33).
+
+At 0, the kernel will not initiate swap until the amount of free and
+file-backed pages is less than the high watermark in a zone.
+
+
+unprivileged_userfaultfd
+========================
+
+This flag controls the mode in which unprivileged users can use the
+userfaultfd system calls. Set this to 0 to restrict unprivileged users
+to handle page faults in user mode only. In this case, users without
+SYS_CAP_PTRACE must pass UFFD_USER_MODE_ONLY in order for userfaultfd to
+succeed. Prohibiting use of userfaultfd for handling faults from kernel
+mode may make certain vulnerabilities more difficult to exploit.
+
+Set this to 1 to allow unprivileged users to use the userfaultfd system
+calls without any restrictions.
+
+The default value is 0.
+
+Another way to control permissions for userfaultfd is to use
+/dev/userfaultfd instead of userfaultfd(2). See
+Documentation/admin-guide/mm/userfaultfd.rst.
+
+user_reserve_kbytes
+===================
+
+When overcommit_memory is set to 2, "never overcommit" mode, reserve
+min(3% of current process size, user_reserve_kbytes) of free memory.
+This is intended to prevent a user from starting a single memory hogging
+process, such that they cannot recover (kill the hog).
+
+user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
+
+If this is reduced to zero, then the user will be allowed to allocate
+all free memory with a single process, minus admin_reserve_kbytes.
+Any subsequent attempts to execute a command will result in
+"fork: Cannot allocate memory".
+
+Changing this takes effect whenever an application requests memory.
+
+
+vfs_cache_pressure
+==================
+
+This percentage value controls the tendency of the kernel to reclaim
+the memory which is used for caching of directory and inode objects.
+
+At the default value of vfs_cache_pressure=vfs_cache_pressure_denom the kernel
+will attempt to reclaim dentries and inodes at a "fair" rate with respect to
+pagecache and swapcache reclaim. Decreasing vfs_cache_pressure causes the
+kernel to prefer to retain dentry and inode caches. When vfs_cache_pressure=0,
+the kernel will never reclaim dentries and inodes due to memory pressure and
+this can easily lead to out-of-memory conditions. Increasing vfs_cache_pressure
+beyond vfs_cache_pressure_denom causes the kernel to prefer to reclaim dentries
+and inodes.
+
+Increasing vfs_cache_pressure significantly beyond vfs_cache_pressure_denom may
+have negative performance impact. Reclaim code needs to take various locks to
+find freeable directory and inode objects. When vfs_cache_pressure equals
+(10 * vfs_cache_pressure_denom), it will look for ten times more freeable
+objects than there are.
+
+Note: This setting should always be used together with vfs_cache_pressure_denom.
+
+vfs_cache_pressure_denom
+========================
+
+Defaults to 100 (minimum allowed value). Requires corresponding
+vfs_cache_pressure setting to take effect.
+
+watermark_boost_factor
+======================
+
+This factor controls the level of reclaim when memory is being fragmented.
+It defines the percentage of the high watermark of a zone that will be
+reclaimed if pages of different mobility are being mixed within pageblocks.
+The intent is that compaction has less work to do in the future and to
+increase the success rate of future high-order allocations such as SLUB
+allocations, THP and hugetlbfs pages.
+
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 means that up to 150% of the high watermark will be reclaimed in the
+event of a pageblock being mixed due to fragmentation. The level of reclaim
+is determined by the number of fragmentation events that occurred in the
+recent past. If this value is smaller than a pageblock then a pageblocks
+worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor
+of 0 will disable the feature.
+
+
+watermark_scale_factor
+======================
+
+This factor controls the aggressiveness of kswapd. It defines the
+amount of memory left in a node/system before kswapd is woken up and
+how much memory needs to be free before kswapd goes back to sleep.
+
+The unit is in fractions of 10,000. The default value of 10 means the
+distances between watermarks are 0.1% of the available memory in the
+node/system. The maximum value is 3000, or 30% of memory.
+
+A high rate of threads entering direct reclaim (allocstall) or kswapd
+going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
+that the number of free pages kswapd maintains for latency reasons is
+too small for the allocation bursts occurring in the system. This knob
+can then be used to tune kswapd aggressiveness accordingly.
+
+
+zone_reclaim_mode
+=================
+
+Zone_reclaim_mode allows someone to set more or less aggressive approaches to
+reclaim memory when a zone runs out of memory. If it is set to zero then no
+zone reclaim occurs. Allocations will be satisfied from other zones / nodes
+in the system.
+
+This is value OR'ed together of
+
+= ===================================
+1 Zone reclaim on
+2 Zone reclaim writes dirty pages out
+4 Zone reclaim swaps pages
+= ===================================
+
+zone_reclaim_mode is disabled by default. For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
+data locality.
+
+Consider enabling one or more zone_reclaim mode bits if it's known that the
+workload is partitioned such that each partition fits within a NUMA node
+and that accessing remote memory would cause a measurable performance
+reduction. The page allocator will take additional actions before
+allocating off node pages.
+
+Allowing zone reclaim to write out pages stops processes that are
+writing large amounts of data from dirtying pages on other nodes. Zone
+reclaim will write out dirty pages if a zone fills up and so effectively
+throttle the process. This may decrease the performance of a single process
+since it cannot use all of system memory to buffer the outgoing writes
+anymore but it preserve the memory on other nodes so that the performance
+of other processes running on other nodes will not be affected.
+
+Allowing regular swap effectively restricts allocations to the local
+node unless explicitly overridden by memory policies or cpuset
+configurations.
diff --git a/Documentation/admin-guide/sysrq.rst b/Documentation/admin-guide/sysrq.rst
index 7b9035c01a2e..9c7aa817adc7 100644
--- a/Documentation/admin-guide/sysrq.rst
+++ b/Documentation/admin-guide/sysrq.rst
@@ -48,35 +48,48 @@ always allowed (by a user with admin privileges).
How do I use the magic SysRq key?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-On x86 - You press the key combo :kbd:`ALT-SysRq-<command key>`.
+On x86
+ You press the key combo `ALT-SysRq-<command key>`.
-.. note::
+ .. note::
Some
keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
also known as the 'Print Screen' key. Also some keyboards cannot
handle so many keys being pressed at the same time, so you might
- have better luck with press :kbd:`Alt`, press :kbd:`SysRq`,
- release :kbd:`SysRq`, press :kbd:`<command key>`, release everything.
+ have better luck with press `Alt`, press `SysRq`,
+ release `SysRq`, press `<command key>`, release everything.
-On SPARC - You press :kbd:`ALT-STOP-<command key>`, I believe.
+On SPARC
+ You press `ALT-STOP-<command key>`, I believe.
On the serial console (PC style standard serial ports only)
You send a ``BREAK``, then within 5 seconds a command key. Sending
``BREAK`` twice is interpreted as a normal BREAK.
On PowerPC
- Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`,
- :kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`<command key>` may suffice.
+ Press `ALT - Print Screen` (or `F13`) - `<command key>`.
+ `Print Screen` (or `F13`) - `<command key>` may suffice.
On other
If you know of the key combos for other architectures, please
- let me know so I can add them to this section.
+ submit a patch to be included in this section.
On all
- write a character to /proc/sysrq-trigger. e.g.::
+ Write a single character to /proc/sysrq-trigger.
+ Only the first character is processed, the rest of the string is
+ ignored. However, it is not recommended to write any extra characters
+ as the behavior is undefined and might change in the future versions.
+ E.g.::
echo t > /proc/sysrq-trigger
+ Alternatively, write multiple characters prepended by underscore.
+ This way, all characters will be processed. E.g.::
+
+ echo _reisub > /proc/sysrq-trigger
+
+The `<command key>` is case sensitive.
+
What are the 'command' keys?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -86,8 +99,8 @@ Command Function
``b`` Will immediately reboot the system without syncing or unmounting
your disks.
-``c`` Will perform a system crash by a NULL pointer dereference.
- A crashdump will be taken if configured.
+``c`` Will perform a system crash and a crashdump will be taken
+ if configured.
``d`` Shows all locks that are held.
@@ -134,7 +147,7 @@ Command Function
``v`` Forcefully restores framebuffer console
``v`` Causes ETM buffer dump [ARM-specific]
-``w`` Dumps tasks that are in uninterruptable (blocked) state.
+``w`` Dumps tasks that are in uninterruptible (blocked) state.
``x`` Used by xmon interface on ppc/powerpc platforms.
Show global PMU Registers on sparc64.
@@ -148,6 +161,8 @@ Command Function
will be printed to your console. (``0``, for example would make
it so that only emergency messages like PANICs or OOPSes would
make it to your console.)
+
+``R`` Replay the kernel log messages on consoles.
=========== ===================================================================
Okay, so what can I use them for?
@@ -171,22 +186,20 @@ It seems others find it useful as (System Attention Key) which is
useful when you want to exit a program that will not let you switch consoles.
(For example, X or a svgalib program.)
-``reboot(b)`` is good when you're unable to shut down. But you should also
-``sync(s)`` and ``umount(u)`` first.
+``reboot(b)`` is good when you're unable to shut down, it is an equivalent
+of pressing the "reset" button.
``crash(c)`` can be used to manually trigger a crashdump when the system is hung.
Note that this just triggers a crash if there is no dump mechanism available.
-``sync(s)`` is great when your system is locked up, it allows you to sync your
-disks and will certainly lessen the chance of data loss and fscking. Note
-that the sync hasn't taken place until you see the "OK" and "Done" appear
-on the screen. (If the kernel is really in strife, you may not ever get the
-OK or Done message...)
+``sync(s)`` is handy before yanking removable medium or after using a rescue
+shell that provides no graceful shutdown -- it will ensure your data is
+safely written to the disk. Note that the sync hasn't taken place until you see
+the "OK" and "Done" appear on the screen.
-``umount(u)`` is basically useful in the same ways as ``sync(s)``. I generally
-``sync(s)``, ``umount(u)``, then ``reboot(b)`` when my system locks. It's saved
-me many a fsck. Again, the unmount (remount read-only) hasn't taken place until
-you see the "OK" and "Done" message appear on the screen.
+``umount(u)`` can be used to mark filesystems as properly unmounted. From the
+running system's point of view, they will be remounted read-only. The remount
+isn't complete until you see the "OK" and "Done" message appear on the screen.
The loglevels ``0``-``9`` are useful when your console is being flooded with
kernel messages you do not want to see. Selecting ``0`` will prevent all but
@@ -200,13 +213,22 @@ processes.
"just thaw ``it(j)``" is useful if your system becomes unresponsive due to a
frozen (probably root) filesystem via the FIFREEZE ioctl.
+``Replay logs(R)`` is useful to view the kernel log messages when system is hung
+or you are not able to use dmesg command to view the messages in printk buffer.
+User may have to press the key combination multiple times if console system is
+busy. If it is completely locked up, then messages won't be printed. Output
+messages depend on current console loglevel, which can be modified using
+sysrq[0-9] (see above).
+
Sometimes SysRq seems to get 'stuck' after using it, what can I do?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-That happens to me, also. I've found that tapping shift, alt, and control
-on both sides of the keyboard, and hitting an invalid sysrq sequence again
-will fix the problem. (i.e., something like :kbd:`alt-sysrq-z`). Switching to
-another virtual console (:kbd:`ALT+Fn`) and then back again should also help.
+When this happens, try tapping shift, alt and control on both sides of the
+keyboard, and hitting an invalid sysrq sequence again. (i.e., something like
+`alt-sysrq-z`).
+
+Switching to another virtual console (`ALT+Fn`) and then back again
+should also help.
I hit SysRq, but nothing seems to happen, what's wrong?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -231,13 +253,13 @@ prints help, and C) an action_msg string, that will print right before your
handler is called. Your handler must conform to the prototype in 'sysrq.h'.
After the ``sysrq_key_op`` is created, you can call the kernel function
-``register_sysrq_key(int key, struct sysrq_key_op *op_p);`` this will
+``register_sysrq_key(int key, const struct sysrq_key_op *op_p);`` this will
register the operation pointed to by ``op_p`` at table key 'key',
if that slot in the table is blank. At module unload time, you must call
-the function ``unregister_sysrq_key(int key, struct sysrq_key_op *op_p)``, which
-will remove the key op pointed to by 'op_p' from the key 'key', if and only if
-it is currently registered in that slot. This is in case the slot has been
-overwritten since you registered it.
+the function ``unregister_sysrq_key(int key, const struct sysrq_key_op *op_p)``,
+which will remove the key op pointed to by 'op_p' from the key 'key', if and
+only if it is currently registered in that slot. This is in case the slot has
+been overwritten since you registered it.
The Magic SysRQ system works by registering key operations against a key op
lookup table, which is defined in 'drivers/tty/sysrq.c'. This key table has
@@ -268,7 +290,7 @@ exception the header line from the sysrq command is passed to all console
consumers as if the current loglevel was maximum. If only the header
is emitted it is almost certain that the kernel loglevel is too low.
Should you require the output on the console channel then you will need
-to temporarily up the console loglevel using :kbd:`alt-sysrq-8` or::
+to temporarily up the console loglevel using `alt-sysrq-8` or::
echo 8 > /proc/sysrq-trigger
@@ -284,7 +306,7 @@ Just ask them on the linux-kernel mailing list:
Credits
~~~~~~~
-Written by Mydraal <vulpyne@vulpyne.net>
-Updated by Adam Sulmicki <adam@cfar.umd.edu>
-Updated by Jeremy M. Dolan <jmd@turbogeek.org> 2001/01/28 10:15:59
-Added to by Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+- Written by Mydraal <vulpyne@vulpyne.net>
+- Updated by Adam Sulmicki <adam@cfar.umd.edu>
+- Updated by Jeremy M. Dolan <jmd@turbogeek.org> 2001/01/28 10:15:59
+- Added to by Crutcher Dunnavant <crutcher+kernel@datastacks.com>
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 28a869c509a0..a0cc017e4424 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -1,59 +1,191 @@
Tainted kernels
---------------
-Some oops reports contain the string **'Tainted: '** after the program
-counter. This indicates that the kernel has been tainted by some
-mechanism. The string is followed by a series of position-sensitive
-characters, each representing a particular tainted value.
-
- 1) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if
+The kernel will mark itself as 'tainted' when something occurs that might be
+relevant later when investigating problems. Don't worry too much about this,
+most of the time it's not a problem to run a tainted kernel; the information is
+mainly of interest once someone wants to investigate some problem, as its real
+cause might be the event that got the kernel tainted. That's why bug reports
+from tainted kernels will often be ignored by developers, hence try to reproduce
+problems with an untainted kernel.
+
+Note the kernel will remain tainted even after you undo what caused the taint
+(i.e. unload a proprietary kernel module), to indicate the kernel remains not
+trustworthy. That's also why the kernel will print the tainted state when it
+notices an internal problem (a 'kernel bug'), a recoverable error
+('kernel oops') or a non-recoverable error ('kernel panic') and writes debug
+information about this to the logs ``dmesg`` outputs. It's also possible to
+check the tainted state at runtime through a file in ``/proc/``.
+
+
+Tainted flag in bugs, oops or panics messages
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You find the tainted state near the top in a line starting with 'CPU:'; if or
+why the kernel was tainted is shown after the Process ID ('PID:') and a shortened
+name of the command ('Comm:') that triggered the event::
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+ Oops: 0002 [#1] SMP PTI
+ CPU: 0 PID: 4424 Comm: insmod Tainted: P W O 4.20.0-0.rc6.fc30 #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:my_oops_init+0x13/0x1000 [kpanic]
+ [...]
+
+You'll find a 'Not tainted: ' there if the kernel was not tainted at the
+time of the event; if it was, then it will print 'Tainted: ' and characters
+either letters or blanks. In the example above it looks like this::
+
+ Tainted: P W O
+
+The meaning of those characters is explained in the table below. In this case
+the kernel got tainted earlier because a proprietary Module (``P``) was loaded,
+a warning occurred (``W``), and an externally-built module was loaded (``O``).
+To decode other letters use the table below.
+
+
+Decoding tainted state at runtime
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+At runtime, you can query the tainted state by reading
+``cat /proc/sys/kernel/tainted``. If that returns ``0``, the kernel is not
+tainted; any other number indicates the reasons why it is. The easiest way to
+decode that number is the script ``tools/debugging/kernel-chktaint``, which your
+distribution might ship as part of a package called ``linux-tools`` or
+``kernel-tools``; if it doesn't, you can download the script from
+`git.kernel.org <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/debugging/kernel-chktaint>`_
+and execute it with ``sh kernel-chktaint``, which would print something like
+this on the machine that had the statements in the logs that were quoted earlier::
+
+ Kernel is Tainted for following reasons:
+ * Proprietary module was loaded (#0)
+ * Kernel issued warning (#9)
+ * Externally-built ('out-of-tree') module was loaded (#12)
+ See Documentation/admin-guide/tainted-kernels.rst in the Linux kernel or
+ https://www.kernel.org/doc/html/latest/admin-guide/tainted-kernels.html for
+ a more details explanation of the various taint flags.
+ Raw taint value as int/string: 4609/'P W O '
+
+You can try to decode the number yourself. That's easy if there was only one
+reason that got your kernel tainted, as in this case you can find the number
+with the table below. If there were multiple reasons you need to decode the
+number, as it is a bitfield, where each bit indicates the absence or presence of
+a particular type of taint. It's best to leave that to the aforementioned
+script, but if you need something quick you can use this shell command to check
+which bits are set::
+
+ $ for i in $(seq 18); do echo $(($i-1)) $(($(cat /proc/sys/kernel/tainted)>>($i-1)&1));done
+
+Table for decoding tainted state
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=== === ====== ========================================================
+Bit Log Number Reason that got the kernel tainted
+=== === ====== ========================================================
+ 0 G/P 1 proprietary module was loaded
+ 1 _/F 2 module was force loaded
+ 2 _/S 4 kernel running on an out of specification system
+ 3 _/R 8 module was force unloaded
+ 4 _/M 16 processor reported a Machine Check Exception (MCE)
+ 5 _/B 32 bad page referenced or some unexpected page flags
+ 6 _/U 64 taint requested by userspace application
+ 7 _/D 128 kernel died recently, i.e. there was an OOPS or BUG
+ 8 _/A 256 ACPI table overridden by user
+ 9 _/W 512 kernel issued warning
+ 10 _/C 1024 staging driver was loaded
+ 11 _/I 2048 workaround for bug in platform firmware applied
+ 12 _/O 4096 externally-built ("out-of-tree") module was loaded
+ 13 _/E 8192 unsigned module was loaded
+ 14 _/L 16384 soft lockup occurred
+ 15 _/K 32768 kernel has been live patched
+ 16 _/X 65536 auxiliary taint, defined for and used by distros
+ 17 _/T 131072 kernel was built with the struct randomization plugin
+ 18 _/N 262144 an in-kernel test has been run
+ 19 _/J 524288 userspace used a mutating debug operation in fwctl
+=== === ====== ========================================================
+
+Note: The character ``_`` is representing a blank in this table to make reading
+easier.
+
+More detailed explanation for tainting
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ 0) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if
any proprietary module has been loaded. Modules without a
MODULE_LICENSE or with a MODULE_LICENSE that is not recognised by
insmod as GPL compatible are assumed to be proprietary.
- 2) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all
+ 1) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all
modules were loaded normally.
- 3) ``S`` if the oops occurred on an SMP kernel running on hardware that
- hasn't been certified as safe to run multiprocessor.
- Currently this occurs only on various Athlons that are not
- SMP capable.
-
- 4) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all
+ 2) ``S`` if the kernel is running on a processor or system that is out of
+ specification: hardware has been put into an unsupported configuration,
+ therefore proper execution cannot be guaranteed.
+ Kernel will be tainted if, for example:
+
+ - on x86: PAE is forced through forcepae on intel CPUs (such as Pentium M)
+ which do not report PAE but may have a functional implementation, an SMP
+ kernel is running on non officially capable SMP Athlon CPUs, MSRs are
+ being poked at from userspace.
+ - on arm: kernel running on certain CPUs (such as Keystone 2) without
+ having certain kernel features enabled.
+ - on arm64: there are mismatched hardware features between CPUs, the
+ bootloader has booted CPUs in different modes.
+ - certain drivers are being used on non supported architectures (such as
+ scsi/snic on something else than x86_64, scsi/ips on non
+ x86/x86_64/itanium, have broken firmware settings for the
+ irqchip/irq-gic on arm64 ...).
+ - x86/x86_64: Microcode late loading is dangerous and will result in
+ tainting the kernel. It requires that all CPUs rendezvous to make sure
+ the update happens when the system is as quiescent as possible. However,
+ a higher priority MCE/SMI/NMI can move control flow away from that
+ rendezvous and interrupt the update, which can be detrimental to the
+ machine.
+
+ 3) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all
modules were unloaded normally.
- 5) ``M`` if any processor has reported a Machine Check Exception,
+ 4) ``M`` if any processor has reported a Machine Check Exception,
``' '`` if no Machine Check Exceptions have occurred.
- 6) ``B`` if a page-release function has found a bad page reference or
- some unexpected page flags.
+ 5) ``B`` If a page-release function has found a bad page reference or some
+ unexpected page flags. This indicates a hardware problem or a kernel bug;
+ there should be other information in the log indicating why this tainting
+ occurred.
- 7) ``U`` if a user or user application specifically requested that the
+ 6) ``U`` if a user or user application specifically requested that the
Tainted flag be set, ``' '`` otherwise.
- 8) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG.
+ 7) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG.
- 9) ``A`` if the ACPI table has been overridden.
+ 8) ``A`` if an ACPI table has been overridden.
- 10) ``W`` if a warning has previously been issued by the kernel.
+ 9) ``W`` if a warning has previously been issued by the kernel.
(Though some warnings may set more specific taint flags.)
- 11) ``C`` if a staging driver has been loaded.
+ 10) ``C`` if a staging driver has been loaded.
- 12) ``I`` if the kernel is working around a severe bug in the platform
+ 11) ``I`` if the kernel is working around a severe bug in the platform
firmware (BIOS or similar).
- 13) ``O`` if an externally-built ("out-of-tree") module has been loaded.
+ 12) ``O`` if an externally-built ("out-of-tree") module has been loaded.
- 14) ``E`` if an unsigned module has been loaded in a kernel supporting
+ 13) ``E`` if an unsigned module has been loaded in a kernel supporting
module signature.
- 15) ``L`` if a soft lockup has previously occurred on the system.
+ 14) ``L`` if a soft lockup has previously occurred on the system.
+
+ 15) ``K`` if the kernel has been live patched.
+
+ 16) ``X`` Auxiliary taint, defined for and used by Linux distributors.
+
+ 17) ``T`` Kernel was build with the randstruct plugin, which can intentionally
+ produce extremely unusual kernel structure layouts (even performance
+ pathological ones), which is important to know when debugging. Set at
+ build time.
- 16) ``K`` if the kernel has been live patched.
+ 18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
-The primary reason for the **'Tainted: '** string is to tell kernel
-debuggers if this is a clean kernel or if anything unusual has
-occurred. Tainting is permanent: even if an offending module is
-unloaded, the tainted value remains to indicate that the kernel is not
-trustworthy.
+ 19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
+ to use the devices debugging features. Device debugging features could
+ cause the device to malfunction in undefined ways.
diff --git a/Documentation/admin-guide/thermal/index.rst b/Documentation/admin-guide/thermal/index.rst
new file mode 100644
index 000000000000..193b7b01a87d
--- /dev/null
+++ b/Documentation/admin-guide/thermal/index.rst
@@ -0,0 +1,8 @@
+=================
+Thermal Subsystem
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ intel_powerclamp
diff --git a/Documentation/admin-guide/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst
new file mode 100644
index 000000000000..08509b978af4
--- /dev/null
+++ b/Documentation/admin-guide/thermal/intel_powerclamp.rst
@@ -0,0 +1,345 @@
+=======================
+Intel Powerclamp Driver
+=======================
+
+By:
+ - Arjan van de Ven <arjan@linux.intel.com>
+ - Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+.. Contents:
+
+ (*) Introduction
+ - Goals and Objectives
+
+ (*) Theory of Operation
+ - Idle Injection
+ - Calibration
+
+ (*) Performance Analysis
+ - Effectiveness and Limitations
+ - Power vs Performance
+ - Scalability
+ - Calibration
+ - Comparison with Alternative Techniques
+
+ (*) Usage and Interfaces
+ - Generic Thermal Layer (sysfs)
+ - Kernel APIs (TBD)
+
+ (*) Module Parameters
+
+INTRODUCTION
+============
+
+Consider the situation where a system’s power consumption must be
+reduced at runtime, due to power budget, thermal constraint, or noise
+level, and where active cooling is not preferred. Software managed
+passive power reduction must be performed to prevent the hardware
+actions that are designed for catastrophic scenarios.
+
+Currently, P-states, T-states (clock modulation), and CPU offlining
+are used for CPU throttling.
+
+On Intel CPUs, C-states provide effective power reduction, but so far
+they’re only used opportunistically, based on workload. With the
+development of intel_powerclamp driver, the method of synchronizing
+idle injection across all online CPU threads was introduced. The goal
+is to achieve forced and controllable C-state residency.
+
+Test/Analysis has been made in the areas of power, performance,
+scalability, and user experience. In many cases, clear advantage is
+shown over taking the CPU offline or modulating the CPU clock.
+
+
+THEORY OF OPERATION
+===================
+
+Idle Injection
+--------------
+
+On modern Intel processors (Nehalem or later), package level C-state
+residency is available in MSRs, thus also available to the kernel.
+
+These MSRs are::
+
+ #define MSR_PKG_C2_RESIDENCY 0x60D
+ #define MSR_PKG_C3_RESIDENCY 0x3F8
+ #define MSR_PKG_C6_RESIDENCY 0x3F9
+ #define MSR_PKG_C7_RESIDENCY 0x3FA
+
+If the kernel can also inject idle time to the system, then a
+closed-loop control system can be established that manages package
+level C-state. The intel_powerclamp driver is conceived as such a
+control system, where the target set point is a user-selected idle
+ratio (based on power reduction), and the error is the difference
+between the actual package level C-state residency ratio and the target idle
+ratio.
+
+Injection is controlled by high priority kernel threads, spawned for
+each online CPU.
+
+These kernel threads, with SCHED_FIFO class, are created to perform
+clamping actions of controlled duty ratio and duration. Each per-CPU
+thread synchronizes its idle time and duration, based on the rounding
+of jiffies, so accumulated errors can be prevented to avoid a jittery
+effect. Threads are also bound to the CPU such that they cannot be
+migrated, unless the CPU is taken offline. In this case, threads
+belong to the offlined CPUs will be terminated immediately.
+
+Running as SCHED_FIFO and relatively high priority, also allows such
+scheme to work for both preemptible and non-preemptible kernels.
+Alignment of idle time around jiffies ensures scalability for HZ
+values. This effect can be better visualized using a Perf timechart.
+The following diagram shows the behavior of kernel thread
+kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
+for a given "duration", then relinquishes the CPU to other tasks,
+until the next time interval.
+
+The NOHZ schedule tick is disabled during idle time, but interrupts
+are not masked. Tests show that the extra wakeups from scheduler tick
+have a dramatic impact on the effectiveness of the powerclamp driver
+on large scale systems (Westmere system with 80 processors).
+
+::
+
+ CPU0
+ ____________ ____________
+ kidle_inject/0 | sleep | mwait | sleep |
+ _________| |________| |_______
+ duration
+ CPU1
+ ____________ ____________
+ kidle_inject/1 | sleep | mwait | sleep |
+ _________| |________| |_______
+ ^
+ |
+ |
+ roundup(jiffies, interval)
+
+Only one CPU is allowed to collect statistics and update global
+control parameters. This CPU is referred to as the controlling CPU in
+this document. The controlling CPU is elected at runtime, with a
+policy that favors BSP, taking into account the possibility of a CPU
+hot-plug.
+
+In terms of dynamics of the idle control system, package level idle
+time is considered largely as a non-causal system where its behavior
+cannot be based on the past or current input. Therefore, the
+intel_powerclamp driver attempts to enforce the desired idle time
+instantly as given input (target idle ratio). After injection,
+powerclamp monitors the actual idle for a given time window and adjust
+the next injection accordingly to avoid over/under correction.
+
+When used in a causal control system, such as a temperature control,
+it is up to the user of this driver to implement algorithms where
+past samples and outputs are included in the feedback. For example, a
+PID-based thermal controller can use the powerclamp driver to
+maintain a desired target temperature, based on integral and
+derivative gains of the past samples.
+
+
+
+Calibration
+-----------
+During scalability testing, it is observed that synchronized actions
+among CPUs become challenging as the number of cores grows. This is
+also true for the ability of a system to enter package level C-states.
+
+To make sure the intel_powerclamp driver scales well, online
+calibration is implemented. The goals for doing such a calibration
+are:
+
+a) determine the effective range of idle injection ratio
+b) determine the amount of compensation needed at each target ratio
+
+Compensation to each target ratio consists of two parts:
+
+ a) steady state error compensation
+
+ This is to offset the error occurring when the system can
+ enter idle without extra wakeups (such as external interrupts).
+
+ b) dynamic error compensation
+
+ When an excessive amount of wakeups occurs during idle, an
+ additional idle ratio can be added to quiet interrupts, by
+ slowing down CPU activities.
+
+A debugfs file is provided for the user to examine compensation
+progress and results, such as on a Westmere system::
+
+ [jacob@nex01 ~]$ cat
+ /sys/kernel/debug/intel_powerclamp/powerclamp_calib
+ controlling cpu: 0
+ pct confidence steady dynamic (compensation)
+ 0 0 0 0
+ 1 1 0 0
+ 2 1 1 0
+ 3 3 1 0
+ 4 3 1 0
+ 5 3 1 0
+ 6 3 1 0
+ 7 3 1 0
+ 8 3 1 0
+ ...
+ 30 3 2 0
+ 31 3 2 0
+ 32 3 1 0
+ 33 3 2 0
+ 34 3 1 0
+ 35 3 2 0
+ 36 3 1 0
+ 37 3 2 0
+ 38 3 1 0
+ 39 3 2 0
+ 40 3 3 0
+ 41 3 1 0
+ 42 3 2 0
+ 43 3 1 0
+ 44 3 1 0
+ 45 3 2 0
+ 46 3 3 0
+ 47 3 0 0
+ 48 3 2 0
+ 49 3 3 0
+
+Calibration occurs during runtime. No offline method is available.
+Steady state compensation is used only when confidence levels of all
+adjacent ratios have reached satisfactory level. A confidence level
+is accumulated based on clean data collected at runtime. Data
+collected during a period without extra interrupts is considered
+clean.
+
+To compensate for excessive amounts of wakeup during idle, additional
+idle time is injected when such a condition is detected. Currently,
+we have a simple algorithm to double the injection ratio. A possible
+enhancement might be to throttle the offending IRQ, such as delaying
+EOI for level triggered interrupts. But it is a challenge to be
+non-intrusive to the scheduler or the IRQ core code.
+
+
+CPU Online/Offline
+------------------
+Per-CPU kernel threads are started/stopped upon receiving
+notifications of CPU hotplug activities. The intel_powerclamp driver
+keeps track of clamping kernel threads, even after they are migrated
+to other CPUs, after a CPU offline event.
+
+
+Performance Analysis
+====================
+This section describes the general performance data collected on
+multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
+
+Effectiveness and Limitations
+-----------------------------
+The maximum range that idle injection is allowed is capped at 50
+percent. As mentioned earlier, since interrupts are allowed during
+forced idle time, excessive interrupts could result in less
+effectiveness. The extreme case would be doing a ping -f to generated
+flooded network interrupts without much CPU acknowledgement. In this
+case, little can be done from the idle injection threads. In most
+normal cases, such as scp a large file, applications can be throttled
+by the powerclamp driver, since slowing down the CPU also slows down
+network protocol processing, which in turn reduces interrupts.
+
+When control parameters change at runtime by the controlling CPU, it
+may take an additional period for the rest of the CPUs to catch up
+with the changes. During this time, idle injection is out of sync,
+thus not able to enter package C- states at the expected ratio. But
+this effect is minor, in that in most cases change to the target
+ratio is updated much less frequently than the idle injection
+frequency.
+
+Scalability
+-----------
+Tests also show a minor, but measurable, difference between the 4P/8P
+Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
+More compensation is needed on Westmere for the same amount of
+target idle ratio. The compensation also increases as the idle ratio
+gets larger. The above reason constitutes the need for the
+calibration code.
+
+On the IVB 8P system, compared to an offline CPU, powerclamp can
+achieve up to 40% better performance per watt. (measured by a spin
+counter summed over per CPU counting threads spawned for all running
+CPUs).
+
+Usage and Interfaces
+====================
+The powerclamp driver is registered to the generic thermal layer as a
+cooling device. Currently, it’s not bound to any thermal zones::
+
+ jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
+ cur_state:0
+ max_state:50
+ type:intel_powerclamp
+
+cur_state allows user to set the desired idle percentage. Writing 0 to
+cur_state will stop idle injection. Writing a value between 1 and
+max_state will start the idle injection. Reading cur_state returns the
+actual and current idle percentage. This may not be the same value
+set by the user in that current idle percentage depends on workload
+and includes natural idle. When idle injection is disabled, reading
+cur_state returns value -1 instead of 0 which is to avoid confusing
+100% busy state with the disabled state.
+
+Example usage:
+
+- To inject 25% idle time::
+
+ $ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
+
+If the system is not busy and has more than 25% idle time already,
+then the powerclamp driver will not start idle injection. Using Top
+will not show idle injection kernel threads.
+
+If the system is busy (spin test below) and has less than 25% natural
+idle time, powerclamp kernel threads will do idle injection. Forced
+idle time is accounted as normal idle in that common code path is
+taken as the idle task.
+
+In this example, 24.1% idle is shown. This helps the system admin or
+user determine the cause of slowdown, when a powerclamp driver is in action::
+
+
+ Tasks: 197 total, 1 running, 196 sleeping, 0 stopped, 0 zombie
+ Cpu(s): 71.2%us, 4.7%sy, 0.0%ni, 24.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
+ Mem: 3943228k total, 1689632k used, 2253596k free, 74960k buffers
+ Swap: 4087804k total, 0k used, 4087804k free, 945336k cached
+
+ PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
+ 3352 jacob 20 0 262m 644 428 S 286 0.0 0:17.16 spin
+ 3341 root -51 0 0 0 0 D 25 0.0 0:01.62 kidle_inject/0
+ 3344 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/3
+ 3342 root -51 0 0 0 0 D 25 0.0 0:01.61 kidle_inject/1
+ 3343 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/2
+ 2935 jacob 20 0 696m 125m 35m S 5 3.3 0:31.11 firefox
+ 1546 root 20 0 158m 20m 6640 S 3 0.5 0:26.97 Xorg
+ 2100 jacob 20 0 1223m 88m 30m S 3 2.3 0:23.68 compiz
+
+Tests have shown that by using the powerclamp driver as a cooling
+device, a PID based userspace thermal controller can manage to
+control CPU temperature effectively, when no other thermal influence
+is added. For example, a UltraBook user can compile the kernel under
+certain temperature (below most active trip points).
+
+Module Parameters
+=================
+
+``cpumask`` (RW)
+ A bit mask of CPUs to inject idle. The format of the bitmask is same as
+ used in other subsystems like in /proc/irq/\*/smp_affinity. The mask is
+ comma separated 32 bit groups. Each CPU is one bit. For example for a 256
+ CPU system the full mask is:
+ ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff
+
+ The rightmost mask is for CPU 0-32.
+
+``max_idle`` (RW)
+ Maximum injected idle time to the total CPU time ratio in percent range
+ from 1 to 100. Even if the cooling device max_state is always 100 (100%),
+ this parameter allows to add a max idle percent limit. The default is 50,
+ to match the current implementation of powerclamp driver. Also doesn't
+ allow value more than 75, if the cpumask includes every CPU present in
+ the system.
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index 35fccba6a9a6..102c693c8f81 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -1,12 +1,34 @@
-=============
- Thunderbolt
-=============
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+ USB4 and Thunderbolt
+======================
+USB4 is the public specification based on Thunderbolt 3 protocol with
+some differences at the register level among other things. Connection
+manager is an entity running on the host router (host controller)
+responsible for enumerating routers and establishing tunnels. A
+connection manager can be implemented either in firmware or software.
+Typically PCs come with a firmware connection manager for Thunderbolt 3
+and early USB4 capable systems. Apple systems on the other hand use
+software connection manager and the later USB4 compliant devices follow
+the suit.
+
+The Linux Thunderbolt driver supports both and can detect at runtime which
+connection manager implementation is to be used. To be on the safe side the
+software connection manager in Linux also advertises security level
+``user`` which means PCIe tunneling is disabled by default. The
+documentation below applies to both implementations with the exception that
+the software connection manager only supports ``user`` security level and
+is expected to be accompanied with an IOMMU based DMA protection.
+
+Security levels and how to use them
+-----------------------------------
The interface presented here is not meant for end users. Instead there
should be a userspace tool that handles all the low-level details, keeps
a database of the authorized devices and prompts users for new connections.
More details about the sysfs interface for Thunderbolt devices can be
-found in ``Documentation/ABI/testing/sysfs-bus-thunderbolt``.
+found in Documentation/ABI/testing/sysfs-bus-thunderbolt.
Those users who just want to connect any device without any sort of
manual work can add following line to
@@ -18,8 +40,6 @@ This will authorize all devices automatically when they appear. However,
keep in mind that this bypasses the security levels and makes the system
vulnerable to DMA attacks.
-Security levels and how to use them
------------------------------------
Starting with Intel Falcon Ridge Thunderbolt controller there are 4
security levels available. Intel Titan Ridge added one more security level
(usbonly). The reason for these is the fact that the connected devices can
@@ -27,6 +47,9 @@ be DMA masters and thus read contents of the host memory without CPU and OS
knowing about it. There are ways to prevent this by setting up an IOMMU but
it is not always available for various reasons.
+Some USB4 systems have a BIOS setting to disable PCIe tunneling. This is
+treated as another security level (nopcie).
+
The security levels are as follows:
none
@@ -57,6 +80,10 @@ The security levels are as follows:
Display Port in a dock. All PCIe links downstream of the dock are
removed.
+ nopcie
+ PCIe tunneling is disabled/forbidden from the BIOS. Available in some
+ USB4 systems.
+
The current security level can be read from
``/sys/bus/thunderbolt/devices/domainX/security`` where ``domainX`` is
the Thunderbolt domain the host controller manages. There is typically
@@ -133,8 +160,44 @@ If the user still wants to connect the device they can either approve
the device without a key or write a new key and write 1 to the
``authorized`` file to get the new key stored on the device NVM.
-Upgrading NVM on Thunderbolt device or host
--------------------------------------------
+De-authorizing devices
+----------------------
+It is possible to de-authorize devices by writing ``0`` to their
+``authorized`` attribute. This requires support from the connection
+manager implementation and can be checked by reading domain
+``deauthorization`` attribute. If it reads ``1`` then the feature is
+supported.
+
+When a device is de-authorized the PCIe tunnel from the parent device
+PCIe downstream (or root) port to the device PCIe upstream port is torn
+down. This is essentially the same thing as PCIe hot-remove and the PCIe
+toplogy in question will not be accessible anymore until the device is
+authorized again. If there is storage such as NVMe or similar involved,
+there is a risk for data loss if the filesystem on that storage is not
+properly shut down. You have been warned!
+
+DMA protection utilizing IOMMU
+------------------------------
+Recent systems from 2018 and forward with Thunderbolt ports may natively
+support IOMMU. This means that Thunderbolt security is handled by an IOMMU
+so connected devices cannot access memory regions outside of what is
+allocated for them by drivers. When Linux is running on such system it
+automatically enables IOMMU if not enabled by the user already. These
+systems can be identified by reading ``1`` from
+``/sys/bus/thunderbolt/devices/domainX/iommu_dma_protection`` attribute.
+
+The driver does not do anything special in this case but because DMA
+protection is handled by the IOMMU, security levels (if set) are
+redundant. For this reason some systems ship with security level set to
+``none``. Other systems have security level set to ``user`` in order to
+support downgrade to older OS, so users who want to automatically
+authorize devices when IOMMU DMA protection is enabled can use the
+following ``udev`` rule::
+
+ ACTION=="add", SUBSYSTEM=="thunderbolt", ATTRS{iommu_dma_protection}=="1", ATTR{authorized}=="0", ATTR{authorized}="1"
+
+Upgrading NVM on Thunderbolt device, host or retimer
+----------------------------------------------------
Since most of the functionality is handled in firmware running on a
host controller or a device, it is important that the firmware can be
upgraded to the latest where possible bugs in it have been fixed.
@@ -145,9 +208,10 @@ for some machines:
`Thunderbolt Updates <https://thunderbolttechnology.net/updates>`_
-Before you upgrade firmware on a device or host, please make sure it is a
-suitable upgrade. Failing to do that may render the device (or host) in a
-state where it cannot be used properly anymore without special tools!
+Before you upgrade firmware on a device, host or retimer, please make
+sure it is a suitable upgrade. Failing to do that may render the device
+in a state where it cannot be used properly anymore without special
+tools!
Host NVM upgrade on Apple Macs is not supported.
@@ -192,6 +256,35 @@ Note names of the NVMem devices ``nvm_activeN`` and ``nvm_non_activeN``
depend on the order they are registered in the NVMem subsystem. N in
the name is the identifier added by the NVMem subsystem.
+Upgrading on-board retimer NVM when there is no cable connected
+---------------------------------------------------------------
+If the platform supports, it may be possible to upgrade the retimer NVM
+firmware even when there is nothing connected to the USB4
+ports. When this is the case the ``usb4_portX`` devices have two special
+attributes: ``offline`` and ``rescan``. The way to upgrade the firmware
+is to first put the USB4 port into offline mode::
+
+ # echo 1 > /sys/bus/thunderbolt/devices/0-0/usb4_port1/offline
+
+This step makes sure the port does not respond to any hotplug events,
+and also ensures the retimers are powered on. The next step is to scan
+for the retimers::
+
+ # echo 1 > /sys/bus/thunderbolt/devices/0-0/usb4_port1/rescan
+
+This enumerates and adds the on-board retimers. Now retimer NVM can be
+upgraded in the same way than with cable connected (see previous
+section). However, the retimer is not disconnected as we are offline
+mode) so after writing ``1`` to ``nvm_authenticate`` one should wait for
+5 or more seconds before running rescan again::
+
+ # echo 1 > /sys/bus/thunderbolt/devices/0-0/usb4_port1/rescan
+
+This point if everything went fine, the port can be put back to
+functional state again::
+
+ # echo 0 > /sys/bus/thunderbolt/devices/0-0/usb4_port1/offline
+
Upgrading NVM when host controller is in safe mode
--------------------------------------------------
If the existing NVM is not properly authenticated (or is missing) the
@@ -203,6 +296,39 @@ information is missing.
To recover from this mode, one needs to flash a valid NVM image to the
host controller in the same way it is done in the previous chapter.
+Tunneling events
+----------------
+The driver sends ``KOBJ_CHANGE`` events to userspace when there is a
+tunneling change in the ``thunderbolt_domain``. The notification carries
+following environment variables::
+
+ TUNNEL_EVENT=<EVENT>
+ TUNNEL_DETAILS=0:12 <-> 1:20 (USB3)
+
+Possible values for ``<EVENT>`` are:
+
+ activated
+ The tunnel was activated (created).
+
+ changed
+ There is a change in this tunnel. For example bandwidth allocation was
+ changed.
+
+ deactivated
+ The tunnel was torn down.
+
+ low bandwidth
+ The tunnel is not getting optimal bandwidth.
+
+ insufficient bandwidth
+ There is not enough bandwidth for the current tunnel requirements.
+
+The ``TUNNEL_DETAILS`` is only provided if the tunnel is known. For
+example, in case of Firmware Connection Manager this is missing or does
+not provide full tunnel information. In case of Software Connection Manager
+this includes full tunnel details. The format currently matches what the
+driver uses when logging. This may change over time.
+
Networking over Thunderbolt cable
---------------------------------
Thunderbolt technology allows software communication between two hosts
@@ -232,12 +358,7 @@ Forcing power
Many OEMs include a method that can be used to force the power of a
Thunderbolt controller to an "On" state even if nothing is connected.
If supported by your machine this will be exposed by the WMI bus with
-a sysfs attribute called "force_power".
-
-For example the intel-wmi-thunderbolt driver exposes this attribute in:
- /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
-
- To force the power to on, write 1 to this attribute file.
- To disable force power, write 0 to this attribute file.
+a sysfs attribute called "force_power", see
+Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt for details.
Note: it's currently not possible to query the force power state of a platform.
diff --git a/Documentation/admin-guide/ufs.rst b/Documentation/admin-guide/ufs.rst
new file mode 100644
index 000000000000..55d15297f8d7
--- /dev/null
+++ b/Documentation/admin-guide/ufs.rst
@@ -0,0 +1,68 @@
+=========
+Using UFS
+=========
+
+mount -t ufs -o ufstype=type_of_ufs device dir
+
+
+UFS Options
+===========
+
+ufstype=type_of_ufs
+ UFS is a file system widely used in different operating systems.
+ The problem are differences among implementations. Features of
+ some implementations are undocumented, so its hard to recognize
+ type of ufs automatically. That's why user must specify type of
+ ufs manually by mount option ufstype. Possible values are:
+
+ old
+ old format of ufs
+ default value, supported as read-only
+
+ 44bsd
+ used in FreeBSD, NetBSD, OpenBSD
+ supported as read-write
+
+ ufs2
+ used in FreeBSD 5.x
+ supported as read-write
+
+ 5xbsd
+ synonym for ufs2
+
+ sun
+ used in SunOS (Solaris)
+ supported as read-write
+
+ sunx86
+ used in SunOS for Intel (Solarisx86)
+ supported as read-write
+
+ hp
+ used in HP-UX
+ supported as read-only
+
+ nextstep
+ used in NextStep
+ supported as read-only
+
+ nextstep-cd
+ used for NextStep CDROMs (block_size == 2048)
+ supported as read-only
+
+ openstep
+ used in OpenStep
+ supported as read-only
+
+
+Possible Problems
+-----------------
+
+See next section, if you have any.
+
+
+Bug Reports
+-----------
+
+Any ufs bug report you can send to daniel.pirkl@email.cz or
+to dushistov@mail.ru (do not send partition tables bug reports).
diff --git a/Documentation/admin-guide/unicode.rst b/Documentation/admin-guide/unicode.rst
index 7425a3351321..cba7e5017d36 100644
--- a/Documentation/admin-guide/unicode.rst
+++ b/Documentation/admin-guide/unicode.rst
@@ -3,11 +3,10 @@ Unicode support
Last update: 2005-01-17, version 1.4
-This file is maintained by H. Peter Anvin <unicode@lanana.org> as part
-of the Linux Assigned Names And Numbers Authority (LANANA) project.
-The current version can be found at:
-
- http://www.lanana.org/docs/unicode/admin-guide/unicode.rst
+Note: The original version of this document, which was maintained at
+lanana.org as part of the Linux Assigned Names And Numbers Authority
+(LANANA) project, is no longer existent. So, this version in the
+mainline Linux kernel is now the maintained main document.
Introduction
------------
@@ -114,7 +113,7 @@ Unicode practice.
This range is now officially managed by the ConScript Unicode
Registry. The normative reference is at:
- http://www.evertype.com/standards/csur/klingon.html
+ https://www.evertype.com/standards/csur/klingon.html
Klingon has an alphabet of 26 characters, a positional numeric writing
system with 10 digits, and is written left-to-right, top-to-bottom.
@@ -178,7 +177,7 @@ fictional and artificial scripts has been established by John Cowan
<jcowan@reutershealth.com> and Michael Everson <everson@evertype.com>.
The ConScript Unicode Registry is accessible at:
- http://www.evertype.com/standards/csur/
+ https://www.evertype.com/standards/csur/
The ranges used fall at the low end of the End User Zone and can hence
not be normatively assigned, but it is recommended that people who
diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
new file mode 100644
index 000000000000..d83601f2a459
--- /dev/null
+++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
@@ -0,0 +1,2222 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+=========================================
+How to verify bugs and bisect regressions
+=========================================
+
+This document describes how to check if some Linux kernel problem occurs in code
+currently supported by developers -- to then explain how to locate the change
+causing the issue, if it is a regression (e.g. did not happen with earlier
+versions).
+
+The text aims at people running kernels from mainstream Linux distributions on
+commodity hardware who want to report a kernel bug to the upstream Linux
+developers. Despite this intent, the instructions work just as well for users
+who are already familiar with building their own kernels: they help avoid
+mistakes occasionally made even by experienced developers.
+
+..
+ Note: if you see this note, you are reading the text's source file. You
+ might want to switch to a rendered version: it makes it a lot easier to
+ read and navigate this document -- especially when you want to look something
+ up in the reference section, then jump back to where you left off.
+..
+ Find the latest rendered version of this text here:
+ https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.html
+
+The essence of the process (aka 'TL;DR')
+========================================
+
+*[If you are new to building or bisecting Linux, ignore this section and head
+over to the* ':ref:`step-by-step guide <introguide_bissbs>`' *below. It utilizes
+the same commands as this section while describing them in brief fashion. The
+steps are nevertheless easy to follow and together with accompanying entries
+in a reference section mention many alternatives, pitfalls, and additional
+aspects, all of which might be essential in your present case.]*
+
+**In case you want to check if a bug is present in code currently supported by
+developers**, execute just the *preparations* and *segment 1*; while doing so,
+consider the newest Linux kernel you regularly use to be the 'working' kernel.
+In the following example that's assumed to be 6.0, which is why its sources
+will be used to prepare the .config file.
+
+**In case you face a regression**, follow the steps at least till the end of
+*segment 2*. Then you can submit a preliminary report -- or continue with
+*segment 3*, which describes how to perform a bisection needed for a
+full-fledged regression report. In the following example 6.0.13 is assumed to be
+the 'working' kernel and 6.1.5 to be the first 'broken', which is why 6.0
+will be considered the 'good' release and used to prepare the .config file.
+
+* **Preparations**: set up everything to build your own kernels::
+
+ # * Remove any software that depends on externally maintained kernel modules
+ # or builds any automatically during bootup.
+ # * Ensure Secure Boot permits booting self-compiled Linux kernels.
+ # * If you are not already running the 'working' kernel, reboot into it.
+ # * Install compilers and everything else needed for building Linux.
+ # * Ensure to have 15 Gigabyte free space in your home directory.
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git switch --detach v6.0
+ # * Hint: if you used an existing clone, ensure no stale .config is around.
+ make olddefconfig
+ # * Ensure the former command picked the .config of the 'working' kernel.
+ # * Connect external hardware (USB keys, tokens, ...), start a VM, bring up
+ # VPNs, mount network shares, and briefly try the feature that is broken.
+ yes '' | make localmodconfig
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+ # * Note, when short on storage space, check the guide for an alternative:
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+ # * Hint: at this point you might want to adjust the build configuration;
+ # you'll have to, if you are running Debian.
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+* **Segment 1**: build a kernel from the latest mainline codebase.
+
+ This among others checks if the problem was fixed already and which developers
+ later need to be told about the problem; in case of a regression, this rules
+ out a .config change as root of the problem.
+
+ a) Checking out latest mainline code::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ b) Build, install, and boot a kernel::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Make sure there is enough disk space to hold another kernel:
+ df -h /boot/ /lib/modules/
+ # * Note: on Arch Linux, its derivatives and a few other distributions
+ # the following commands will do nothing at all or only part of the
+ # job. See the step-by-step guide for further details.
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ # * Check how much space your self-built kernel actually needs, which
+ # enables you to make better estimates later:
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+ # * Hint: the output of the following command will help you pick the
+ # right kernel from the boot menu:
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+ # * Once booted, ensure you are running the kernel you just built by
+ # checking if the output of the next two commands matches:
+ tail -n 1 ~/kernels-built
+ uname -r
+ cat /proc/sys/kernel/tainted
+
+ c) Check if the problem occurs with this kernel as well.
+
+* **Segment 2**: ensure the 'good' kernel is also a 'working' kernel.
+
+ This among others verifies the trimmed .config file actually works well, as
+ bisecting with it otherwise would be a waste of time:
+
+ a) Start by checking out the sources of the 'good' version::
+
+ cd ~/linux/
+ git switch --discard-changes --detach v6.0
+
+ b) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b* -- just feel free to skip the 'du' commands, as you have a rough
+ estimate already.
+
+ c) Ensure the feature that regressed with the 'broken' kernel actually works
+ with this one.
+
+* **Segment 3**: perform and validate the bisection.
+
+ a) Retrieve the sources for your 'bad' version::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+ b) Initialize the bisection::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ c) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b*.
+
+ In case building or booting the kernel fails for unrelated reasons, run
+ ``git bisect skip``. In all other outcomes, check if the regressed feature
+ works with the newly built kernel. If it does, tell Git by executing
+ ``git bisect good``; if it does not, run ``git bisect bad`` instead.
+
+ All three commands will make Git check out another commit; then re-execute
+ this step (e.g. build, install, boot, and test a kernel to then tell Git
+ the outcome). Do so again and again until Git shows which commit broke
+ things. If you run short of disk space during this process, check the
+ section 'Complementary tasks: cleanup during and after the process'
+ below.
+
+ d) Once your finished the bisection, put a few things away::
+
+ cd ~/linux/
+ git bisect log > ~/bisect-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ e) Try to verify the bisection result::
+
+ git switch --discard-changes --detach mainline/master
+ git revert --no-edit cafec0cacaca0
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ This is optional, as some commits are impossible to revert. But if the
+ second command worked flawlessly, build, install, and boot one more kernel
+ kernel; just this time skip the first command copying the base .config file
+ over, as that already has been taken care off.
+
+* **Complementary tasks**: cleanup during and after the process.
+
+ a) To avoid running out of disk space during a bisection, you might need to
+ remove some kernels you built earlier. You most likely want to keep those
+ you built during segment 1 and 2 around for a while, but you will most
+ likely no longer need kernels tested during the actual bisection
+ (Segment 3 c). You can list them in build order using::
+
+ ls -ltr /lib/modules/*-local*
+
+ To then for example erase a kernel that identifies itself as
+ '6.0-rc1-local-gcafec0cacaca0', use this::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+ # * Note, on some distributions kernel-install is missing
+ # or does only part of the job.
+
+ b) If you performed a bisection and successfully validated the result, feel
+ free to remove all kernels built during the actual bisection (Segment 3 c);
+ the kernels you built earlier and later you might want to keep around for
+ a week or two.
+
+* **Optional task**: test a debug patch or a proposed fix later::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+ git apply /tmp/foobars-proposed-fix-v1.patch
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+ Build, install, and boot a kernel as described in *segment 1, section b* --
+ but this time omit the first command copying the build configuration over,
+ as that has been taken care of already.
+
+.. _introguide_bissbs:
+
+Step-by-step guide on how to verify bugs and bisect regressions
+===============================================================
+
+This guide describes how to set up your own Linux kernels for investigating bugs
+or regressions you intend to report. How far you want to follow the instructions
+depends on your issue:
+
+Execute all steps till the end of *segment 1* to **verify if your kernel problem
+is present in code supported by Linux kernel developers**. If it is, you are all
+set to report the bug -- unless it did not happen with earlier kernel versions,
+as then your want to at least continue with *segment 2* to **check if the issue
+qualifies as regression** which receive priority treatment. Depending on the
+outcome you then are ready to report a bug or submit a preliminary regression
+report; instead of the latter your could also head straight on and follow
+*segment 3* to **perform a bisection** for a full-fledged regression report
+developers are obliged to act upon.
+
+ :ref:`Preparations: set up everything to build your own kernels <introprep_bissbs>`.
+
+ :ref:`Segment 1: try to reproduce the problem with the latest codebase <introlatestcheck_bissbs>`.
+
+ :ref:`Segment 2: check if the kernels you build work fine <introworkingcheck_bissbs>`.
+
+ :ref:`Segment 3: perform a bisection and validate the result <introbisect_bissbs>`.
+
+ :ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+ :ref:`Optional tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
+
+The steps in each segment illustrate the important aspects of the process, while
+a comprehensive reference section holds additional details for almost all of the
+steps. The reference section sometimes also outlines alternative approaches,
+pitfalls, as well as problems that might occur at the particular step -- and how
+to get things rolling again.
+
+For further details on how to report Linux kernel issues or regressions check
+out Documentation/admin-guide/reporting-issues.rst, which works in conjunction
+with this document. It among others explains why you need to verify bugs with
+the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6),
+even if you face a problem with a kernel from a 'stable/longterm' series
+(say 6.0.13).
+
+For users facing a regression that document also explains why sending a
+preliminary report after segment 2 might be wise, as the regression and its
+culprit might be known already. For further details on what actually qualifies
+as a regression check out Documentation/admin-guide/reporting-regressions.rst.
+
+If you run into any problems while following this guide or have ideas how to
+improve it, :ref:`please let the kernel developers know <submit_improvements_vbbr>`.
+
+.. _introprep_bissbs:
+
+Preparations: set up everything to build your own kernels
+---------------------------------------------------------
+
+The following steps lay the groundwork for all further tasks.
+
+Note: the instructions assume you are building and testing on the same
+machine; if you want to compile the kernel on another system, check
+:ref:`Build kernels on a different machine <buildhost_bis>` below.
+
+.. _backup_bissbs:
+
+* Create a fresh backup and put system repair and restore tools at hand, just
+ to be prepared for the unlikely case of something going sideways.
+
+ [:ref:`details <backup_bisref>`]
+
+.. _vanilla_bissbs:
+
+* Remove all software that depends on externally developed kernel drivers or
+ builds them automatically. That includes but is not limited to DKMS, openZFS,
+ VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module).
+
+ [:ref:`details <vanilla_bisref>`]
+
+.. _secureboot_bissbs:
+
+* On platforms with 'Secure Boot' or similar solutions, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot. The
+ quickest and easiest way to achieve this on commodity x86 systems is to
+ disable such techniques in the BIOS setup utility; alternatively, remove
+ their restrictions through a process initiated by
+ ``mokutil --disable-validation``.
+
+ [:ref:`details <secureboot_bisref>`]
+
+.. _rangecheck_bissbs:
+
+* Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide:
+
+ * Do you follow this guide to verify if a bug is present in the code the
+ primary developers care for? Then consider the version of the newest kernel
+ you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2).
+
+ * Do you face a regression, e.g. something broke or works worse after
+ switching to a newer kernel version? In that case it depends on the version
+ range during which the problem appeared:
+
+ * Something regressed when updating from a stable/longterm release
+ (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a
+ stable/longterm version based on one (say 6.1.5)? Then consider the
+ mainline release your working kernel is based on to be the 'good'
+ version (e.g. 6.0) and the first version to be broken as the 'bad' one
+ (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed
+ that 6.0 is fine; this hypothesis will be checked in segment 2.
+
+ * Something regressed when switching from one mainline version (say 6.0) to
+ a later one (like 6.1-rc1) or a stable/longterm release based on it
+ (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and
+ the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'.
+
+ * Something regressed when updating within a stable/longterm series (say
+ from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad'
+ (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series.
+
+ *Note, do not confuse 'good' version with 'working' kernel; the latter term
+ throughout this guide will refer to the last kernel that has been working
+ fine.*
+
+ [:ref:`details <rangecheck_bisref>`]
+
+.. _bootworking_bissbs:
+
+* Boot into the 'working' kernel and briefly use the apparently broken feature.
+
+ [:ref:`details <bootworking_bisref>`]
+
+.. _diskspace_bissbs:
+
+* Ensure to have enough free space for building Linux. 15 Gigabyte in your home
+ directory should typically suffice. If you have less available, be sure to pay
+ attention to later steps about retrieving the Linux sources and handling of
+ debug symbols: both explain approaches reducing the amount of space, which
+ should allow you to master these tasks with about 4 Gigabytes free space.
+
+ [:ref:`details <diskspace_bisref>`]
+
+.. _buildrequires_bissbs:
+
+* Install all software required to build a Linux kernel. Often you will need:
+ 'bc', 'binutils' ('ld' et al.), 'bison', 'flex', 'gcc', 'git', 'openssl',
+ 'pahole', 'perl', and the development headers for 'libelf' and 'openssl'. The
+ reference section shows how to quickly install those on various popular Linux
+ distributions.
+
+ [:ref:`details <buildrequires_bisref>`]
+
+.. _sources_bissbs:
+
+* Retrieve the mainline Linux sources; then change into the directory holding
+ them, as all further commands in this guide are meant to be executed from
+ there.
+
+ *Note, the following describe how to retrieve the sources using a full
+ mainline clone, which downloads about 2,75 GByte as of early 2024. The*
+ :ref:`reference section describes two alternatives <sources_bisref>` *:
+ one downloads less than 500 MByte, the other works better with unreliable
+ internet connections.*
+
+ Execute the following command to retrieve a fresh mainline codebase while
+ preparing things to add branches for stable/longterm series later::
+
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+ [:ref:`details <sources_bisref>`]
+
+.. _stablesources_bissbs:
+
+* Is one of the versions you earlier established as 'good' or 'bad' a stable or
+ longterm release (say 6.1.5)? Then download the code for the series it belongs
+ to ('linux-6.1.y' in this example)::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+.. _oldconfig_bissbs:
+
+* Start preparing a kernel build configuration (the '.config' file).
+
+ Before doing so, ensure you are still running the 'working' kernel an earlier
+ step told you to boot; if you are unsure, check the current kernelrelease
+ identifier using ``uname -r``.
+
+ Afterwards check out the source code for the version earlier established as
+ 'good'. In the following example command this is assumed to be 6.0; note that
+ the version number in this and all later Git commands needs to be prefixed
+ with a 'v'::
+
+ git switch --discard-changes --detach v6.0
+
+ Now create a build configuration file::
+
+ make olddefconfig
+
+ The kernel build scripts then will try to locate the build configuration file
+ for the running kernel and then adjust it for the needs of the kernel sources
+ you checked out. While doing so, it will print a few lines you need to check.
+
+ Look out for a line starting with '# using defaults found in'. It should be
+ followed by a path to a file in '/boot/' that contains the release identifier
+ of your currently working kernel. If the line instead continues with something
+ like 'arch/x86/configs/x86_64_defconfig', then the build infra failed to find
+ the .config file for your running kernel -- in which case you have to put one
+ there manually, as explained in the reference section.
+
+ In case you can not find such a line, look for one containing '# configuration
+ written to .config'. If that's the case you have a stale build configuration
+ lying around. Unless you intend to use it, delete it; afterwards run
+ 'make olddefconfig' again and check if it now picked up the right config file
+ as base.
+
+ [:ref:`details <oldconfig_bisref>`]
+
+.. _localmodconfig_bissbs:
+
+* Disable any kernel modules apparently superfluous for your setup. This is
+ optional, but especially wise for bisections, as it speeds up the build
+ process enormously -- at least unless the .config file picked up in the
+ previous step was already tailored to your and your hardware needs, in which
+ case you should skip this step.
+
+ To prepare the trimming, connect external hardware you occasionally use (USB
+ keys, tokens, ...), quickly start a VM, and bring up VPNs. And if you rebooted
+ since you started that guide, ensure that you tried using the feature causing
+ trouble since you started the system. Only then trim your .config::
+
+ yes '' | make localmodconfig
+
+ There is a catch to this, as the 'apparently' in initial sentence of this step
+ and the preparation instructions already hinted at:
+
+ The 'localmodconfig' target easily disables kernel modules for features only
+ used occasionally -- like modules for external peripherals not yet connected
+ since booting, virtualization software not yet utilized, VPN tunnels, and a
+ few other things. That's because some tasks rely on kernel modules Linux only
+ loads when you execute tasks like the aforementioned ones for the first time.
+
+ This drawback of localmodconfig is nothing you should lose sleep over, but
+ something to keep in mind: if something is misbehaving with the kernels built
+ during this guide, this is most likely the reason. You can reduce or nearly
+ eliminate the risk with tricks outlined in the reference section; but when
+ building a kernel just for quick testing purposes this is usually not worth
+ spending much effort on, as long as it boots and allows to properly test the
+ feature that causes trouble.
+
+ [:ref:`details <localmodconfig_bisref>`]
+
+.. _tagging_bissbs:
+
+* Ensure all the kernels you will build are clearly identifiable using a special
+ tag and a unique version number::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+
+ [:ref:`details <tagging_bisref>`]
+
+.. _debugsymbols_bissbs:
+
+* Decide how to handle debug symbols.
+
+ In the context of this document it is often wise to enable them, as there is a
+ decent chance you will need to decode a stack trace from a 'panic', 'Oops',
+ 'warning', or 'BUG'::
+
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+
+ But if you are extremely short on storage space, you might want to disable
+ debug symbols instead::
+
+ ./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \
+ -d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
+
+ [:ref:`details <debugsymbols_bisref>`]
+
+.. _configmods_bissbs:
+
+* Check if you may want or need to adjust some other kernel configuration
+ options:
+
+ * Are you running Debian? Then you want to avoid known problems by performing
+ additional adjustments explained in the reference section.
+
+ [:ref:`details <configmods_distros_bisref>`].
+
+ * If you want to influence other aspects of the configuration, do so now using
+ your preferred tool. Note, to use make targets like 'menuconfig' or
+ 'nconfig', you will need to install the development files of ncurses; for
+ 'xconfig' you likewise need the Qt5 or Qt6 headers.
+
+ [:ref:`details <configmods_individual_bisref>`].
+
+.. _saveconfig_bissbs:
+
+* Reprocess the .config after the latest adjustments and store it in a safe
+ place::
+
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+ [:ref:`details <saveconfig_bisref>`]
+
+.. _introlatestcheck_bissbs:
+
+Segment 1: try to reproduce the problem with the latest codebase
+----------------------------------------------------------------
+
+The following steps verify if the problem occurs with the code currently
+supported by developers. In case you face a regression, it also checks that the
+problem is not caused by some .config change, as reporting the issue then would
+be a waste of time. [:ref:`details <introlatestcheck_bisref>`]
+
+.. _checkoutmaster_bissbs:
+
+* Check out the latest Linux codebase.
+
+ * Are your 'good' and 'bad' versions from the same stable or longterm series?
+ Then check the `front page of kernel.org <https://kernel.org/>`_: if it
+ lists a release from that series without an '[EOL]' tag, checkout the series
+ latest version ('linux-6.1.y' in the following example)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach stable/linux-6.1.y
+
+ Your series is unsupported, if is not listed or carrying a 'end of life'
+ tag. In that case you might want to check if a successor series (say
+ linux-6.2.y) or mainline (see next point) fix the bug.
+
+ * In all other cases, run::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ [:ref:`details <checkoutmaster_bisref>`]
+
+.. _build_bissbs:
+
+* Build the image and the modules of your first kernel using the config file you
+ prepared::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+
+ If you want your kernel packaged up as deb, rpm, or tar file, see the
+ reference section for alternatives, which obviously will require other
+ steps to install as well.
+
+ [:ref:`details <build_bisref>`]
+
+.. _install_bissbs:
+
+* Install your newly built kernel.
+
+ Before doing so, consider checking if there is still enough space for it::
+
+ df -h /boot/ /lib/modules/
+
+ For now assume 150 MByte in /boot/ and 200 in /lib/modules/ will suffice; how
+ much your kernels actually require will be determined later during this guide.
+
+ Now install the kernel's modules and its image, which will be stored in
+ parallel to the your Linux distribution's kernels::
+
+ sudo make modules_install
+ command -v installkernel && sudo make install
+
+ The second command ideally will take care of three steps required at this
+ point: copying the kernel's image to /boot/, generating an initramfs, and
+ adding an entry for both to the boot loader's configuration.
+
+ Sadly some distributions (among them Arch Linux, its derivatives, and many
+ immutable Linux distributions) will perform none or only some of those tasks.
+ You therefore want to check if all of them were taken care of and manually
+ perform those that were not. The reference section provides further details on
+ that; your distribution's documentation might help, too.
+
+ Once you figured out the steps needed at this point, consider writing them
+ down: if you will build more kernels as described in segment 2 and 3, you will
+ have to perform those again after executing ``command -v installkernel [...]``.
+
+ [:ref:`details <install_bisref>`]
+
+.. _storagespace_bissbs:
+
+* In case you plan to follow this guide further, check how much storage space
+ the kernel, its modules, and other related files like the initramfs consume::
+
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+
+ Write down or remember those two values for later: they enable you to prevent
+ running out of disk space accidentally during a bisection.
+
+ [:ref:`details <storagespace_bisref>`]
+
+.. _kernelrelease_bissbs:
+
+* Show and store the kernelrelease identifier of the kernel you just built::
+
+ make -s kernelrelease | tee -a ~/kernels-built
+
+ Remember the identifier momentarily, as it will help you pick the right kernel
+ from the boot menu upon restarting.
+
+* Reboot into your newly built kernel. To ensure your actually started the one
+ you just built, you might want to verify if the output of these commands
+ matches::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+.. _tainted_bissbs:
+
+* Check if the kernel marked itself as 'tainted'::
+
+ cat /proc/sys/kernel/tainted
+
+ If that command does not return '0', check the reference section, as the cause
+ for this might interfere with your testing.
+
+ [:ref:`details <tainted_bisref>`]
+
+.. _recheckbroken_bissbs:
+
+* Verify if your bug occurs with the newly built kernel. If it does not, check
+ out the instructions in the reference section to ensure nothing went sideways
+ during your tests.
+
+ [:ref:`details <recheckbroken_bisref>`]
+
+.. _recheckstablebroken_bissbs:
+
+* Did you just built a stable or longterm kernel? And were you able to reproduce
+ the regression with it? Then you should test the latest mainline codebase as
+ well, because the result determines which developers the bug must be submitted
+ to.
+
+ To prepare that test, check out current mainline::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ Now use the checked out code to build and install another kernel using the
+ commands the earlier steps already described in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Confirm you booted the kernel you intended to start and check its tainted
+ status::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+ cat /proc/sys/kernel/tainted
+
+ Now verify if this kernel is showing the problem. If it does, then you need
+ to report the bug to the primary developers; if it does not, report it to the
+ stable team. See Documentation/admin-guide/reporting-issues.rst for details.
+
+ [:ref:`details <recheckstablebroken_bisref>`]
+
+Do you follow this guide to verify if a problem is present in the code
+currently supported by Linux kernel developers? Then you are done at this
+point. If you later want to remove the kernel you just built, check out
+:ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+In case you face a regression, move on and execute at least the next segment
+as well.
+
+.. _introworkingcheck_bissbs:
+
+Segment 2: check if the kernels you build work fine
+---------------------------------------------------
+
+In case of a regression, you now want to ensure the trimmed configuration file
+you created earlier works as expected; a bisection with the .config file
+otherwise would be a waste of time. [:ref:`details <introworkingcheck_bisref>`]
+
+.. _recheckworking_bissbs:
+
+* Build your own variant of the 'working' kernel and check if the feature that
+ regressed works as expected with it.
+
+ Start by checking out the sources for the version earlier established as
+ 'good' (once again assumed to be 6.0 here)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach v6.0
+
+ Now use the checked out code to configure, build, and install another kernel
+ using the commands the previous subsection explained in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ When the system booted, you may want to verify once again that the
+ kernel you started is the one you just built::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now check if this kernel works as expected; if not, consult the reference
+ section for further instructions.
+
+ [:ref:`details <recheckworking_bisref>`]
+
+.. _introbisect_bissbs:
+
+Segment 3: perform the bisection and validate the result
+--------------------------------------------------------
+
+With all the preparations and precaution builds taken care of, you are now ready
+to begin the bisection. This will make you build quite a few kernels -- usually
+about 15 in case you encountered a regression when updating to a newer series
+(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build
+configuration created earlier this works a lot faster than many people assume:
+overall on average it will often just take about 10 to 15 minutes to compile
+each kernel on commodity x86 machines.
+
+.. _bisectstart_bissbs:
+
+* Start the bisection and tell Git about the versions earlier established as
+ 'good' (6.0 in the following example command) and 'bad' (6.1.5)::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ [:ref:`details <bisectstart_bisref>`]
+
+.. _bisectbuild_bissbs:
+
+* Now use the code Git checked out to build, install, and boot a kernel using
+ the commands introduced earlier::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ If compilation fails for some reason, run ``git bisect skip`` and restart
+ executing the stack of commands from the beginning.
+
+ In case you skipped the 'test latest codebase' step in the guide, check its
+ description as for why the 'df [...]' and 'make -s kernelrelease [...]'
+ commands are here.
+
+ Important note: the latter command from this point on will print release
+ identifiers that might look odd or wrong to you -- which they are not, as it's
+ totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0'
+ if you bisect between versions 6.1 and 6.2 for example.
+
+ [:ref:`details <bisectbuild_bisref>`]
+
+.. _bisecttest_bissbs:
+
+* Now check if the feature that regressed works in the kernel you just built.
+
+ You again might want to start by making sure the kernel you booted is the one
+ you just built::
+
+ cd ~/linux/
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now verify if the feature that regressed works at this kernel bisection point.
+ If it does, run this::
+
+ git bisect good
+
+ If it does not, run this::
+
+ git bisect bad
+
+ Be sure about what you tell Git, as getting this wrong just once will send the
+ rest of the bisection totally off course.
+
+ While the bisection is ongoing, Git will use the information you provided to
+ find and check out another bisection point for you to test. While doing so, it
+ will print something like 'Bisecting: 675 revisions left to test after this
+ (roughly 10 steps)' to indicate how many further changes it expects to be
+ tested. Now build and install another kernel using the instructions from the
+ previous step; afterwards follow the instructions in this step again.
+
+ Repeat this again and again until you finish the bisection -- that's the case
+ when Git after tagging a change as 'good' or 'bad' prints something like
+ 'cafecaca0c0dacafecaca0c0dacafecaca0c0da is the first bad commit'; right
+ afterwards it will show some details about the culprit including the patch
+ description of the change. The latter might fill your terminal screen, so you
+ might need to scroll up to see the message mentioning the culprit;
+ alternatively, run ``git bisect log > ~/bisection-log``.
+
+ [:ref:`details <bisecttest_bisref>`]
+
+.. _bisectlog_bissbs:
+
+* Store Git's bisection log and the current .config file in a safe place before
+ telling Git to reset the sources to the state before the bisection::
+
+ cd ~/linux/
+ git bisect log > ~/bisection-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ [:ref:`details <bisectlog_bisref>`]
+
+.. _revert_bissbs:
+
+* Try reverting the culprit on top of latest mainline to see if this fixes your
+ regression.
+
+ This is optional, as it might be impossible or hard to realize. The former is
+ the case, if the bisection determined a merge commit as the culprit; the
+ latter happens if other changes depend on the culprit. But if the revert
+ succeeds, it is worth building another kernel, as it validates the result of
+ a bisection, which can easily deroute; it furthermore will let kernel
+ developers know, if they can resolve the regression with a quick revert.
+
+ Begin by checking out the latest codebase depending on the range you bisected:
+
+ * Did you face a regression within a stable/longterm series (say between
+ 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the
+ latest codebase for the affected series like this::
+
+ git fetch stable
+ git switch --discard-changes --detach linux-6.0.y
+
+ * In all other cases check out latest mainline::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ If you bisected a regression within a stable/longterm series that also
+ happens in mainline, there is one more thing to do: look up the mainline
+ commit-id. To do so, use a command like ``git show abcdcafecabcd`` to
+ view the patch description of the culprit. There will be a line near
+ the top which looks like 'commit cafec0cacaca0 upstream.' or
+ 'Upstream commit cafec0cacaca0'; use that commit-id in the next command
+ and not the one the bisection blamed.
+
+ Now try reverting the culprit by specifying its commit id::
+
+ git revert --no-edit cafec0cacaca0
+
+ If that fails, give up trying and move on to the next step; if it works,
+ adjust the tag to facilitate the identification and prevent accidentally
+ overwriting another kernel::
+
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ Build a kernel using the familiar command sequence, just without copying the
+ the base .config over::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Now check one last time if the feature that made you perform a bisection works
+ with that kernel: if everything went well, it should not show the regression.
+
+ [:ref:`details <revert_bisref>`]
+
+.. _introclosure_bissbs:
+
+Complementary tasks: cleanup during and after the bisection
+-----------------------------------------------------------
+
+During and after following this guide you might want or need to remove some of
+the kernels you installed: the boot menu otherwise will become confusing or
+space might run out.
+
+.. _makeroom_bissbs:
+
+* To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier. This guide stores them in '~/kernels-built', but the following
+ command will print them as well::
+
+ ls -ltr /lib/modules/*-local*
+
+ You in most situations want to remove the oldest kernels built during the
+ actual bisection (e.g. segment 3 of this guide). The two ones you created
+ beforehand (e.g. to test the latest codebase and the version considered
+ 'good') might become handy to verify something later -- thus better keep them
+ around, unless you are really short on storage space.
+
+ To remove the modules of a kernel with the kernelrelease identifier
+ '*6.0-rc1-local-gcafec0cacaca0*', start by removing the directory holding its
+ modules::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+
+ Afterwards try the following command::
+
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+
+ On quite a few distributions this will delete all other kernel files installed
+ while also removing the kernel's entry from the boot menu. But on some
+ distributions kernel-install does not exist or leaves boot-loader entries or
+ kernel image and related files behind; in that case remove them as described
+ in the reference section.
+
+ [:ref:`details <makeroom_bisref>`]
+
+.. _finishingtouch_bissbs:
+
+* Once you have finished the bisection, do not immediately remove anything you
+ set up, as you might need a few things again. What is safe to remove depends
+ on the outcome of the bisection:
+
+ * Could you initially reproduce the regression with the latest codebase and
+ after the bisection were able to fix the problem by reverting the culprit on
+ top of the latest codebase? Then you want to keep those two kernels around
+ for a while, but safely remove all others with a '-local' in the release
+ identifier.
+
+ * Did the bisection end on a merge-commit or seems questionable for other
+ reasons? Then you want to keep as many kernels as possible around for a few
+ days: it's pretty likely that you will be asked to recheck something.
+
+ * In other cases it likely is a good idea to keep the following kernels around
+ for some time: the one built from the latest codebase, the one created from
+ the version considered 'good', and the last three or four you compiled
+ during the actual bisection process.
+
+ [:ref:`details <finishingtouch_bisref>`]
+
+.. _introoptional_bissbs:
+
+Optional: test reverts, patches, or later versions
+--------------------------------------------------
+
+While or after reporting a bug, you might want or potentially will be asked to
+test reverts, debug patches, proposed fixes, or other versions. In that case
+follow these instructions.
+
+* Update your Git clone and check out the latest code.
+
+ * In case you want to test mainline, fetch its latest changes before checking
+ its code out::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ * In case you want to test a stable or longterm kernel, first add the branch
+ holding the series you are interested in (6.2 in the example), unless you
+ already did so earlier::
+
+ git remote set-branches --add stable linux-6.2.y
+
+ Then fetch the latest changes and check out the latest version from the
+ series::
+
+ git fetch stable
+ git switch --discard-changes --detach stable/linux-6.2.y
+
+* Copy your kernel build configuration over::
+
+ cp ~/kernel-config-working .config
+
+* Your next step depends on what you want to do:
+
+ * In case you just want to test the latest codebase, head to the next step,
+ you are already all set.
+
+ * In case you want to test if a revert fixes an issue, revert one or multiple
+ changes by specifying their commit ids::
+
+ git revert --no-edit cafec0cacaca0
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ * In case you want to test a patch, store the patch in a file like
+ '/tmp/foobars-proposed-fix-v1.patch' and apply it like this::
+
+ git apply /tmp/foobars-proposed-fix-v1.patch
+
+ In case of multiple patches, repeat this step with the others.
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+* Build a kernel using the familiar commands, just without copying the kernel
+ build configuration over, as that has been taken care of already::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+* Now verify you booted the newly built kernel and check it.
+
+[:ref:`details <introoptional_bisref>`]
+
+.. _submit_improvements_vbbr:
+
+Conclusion
+----------
+
+You have reached the end of the step-by-step guide.
+
+Did you run into trouble following any of the above steps not cleared up by the
+reference section below? Did you spot errors? Or do you have ideas how to
+improve the guide?
+
+If any of that applies, please take a moment and let the maintainer of this
+document know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while
+CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is
+vital to improve this text further, which is in everybody's interest, as it
+will enable more people to master the task described here -- and hopefully also
+improve similar guides inspired by this one.
+
+
+Reference section for the step-by-step guide
+============================================
+
+This section holds additional information for almost all the items in the above
+step-by-step guide.
+
+Preparations for building your own kernels
+------------------------------------------
+
+ *The steps in this section lay the groundwork for all further tests.*
+ [:ref:`... <introprep_bissbs>`]
+
+The steps in all later sections of this guide depend on those described here.
+
+[:ref:`back to step-by-step guide <introprep_bissbs>`].
+
+.. _backup_bisref:
+
+Prepare for emergencies
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Create a fresh backup and put system repair and restore tools at hand.*
+ [:ref:`... <backup_bissbs>`]
+
+Remember, you are dealing with computers, which sometimes do unexpected things
+-- especially if you fiddle with crucial parts like the kernel of an operating
+system. That's what you are about to do in this process. Hence, better prepare
+for something going sideways, even if that should not happen.
+
+[:ref:`back to step-by-step guide <backup_bissbs>`]
+
+.. _vanilla_bisref:
+
+Remove anything related to externally maintained kernel modules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Remove all software that depends on externally developed kernel drivers or
+ builds them automatically.* [:ref:`...<vanilla_bissbs>`]
+
+Externally developed kernel modules can easily cause trouble during a bisection.
+
+But there is a more important reason why this guide contains this step: most
+kernel developers will not care about reports about regressions occurring with
+kernels that utilize such modules. That's because such kernels are not
+considered 'vanilla' anymore, as Documentation/admin-guide/reporting-issues.rst
+explains in more detail.
+
+[:ref:`back to step-by-step guide <vanilla_bissbs>`]
+
+.. _secureboot_bisref:
+
+Deal with techniques like Secure Boot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later.*
+ [:ref:`... <secureboot_bissbs>`]
+
+Many modern systems allow only certain operating systems to start; that's why
+they reject booting self-compiled kernels by default.
+
+You ideally deal with this by making your platform trust your self-built kernels
+with the help of a certificate. How to do that is not described
+here, as it requires various steps that would take the text too far away from
+its purpose; 'Documentation/admin-guide/module-signing.rst' and various web
+sides already explain everything needed in more detail.
+
+Temporarily disabling solutions like Secure Boot is another way to make your own
+Linux boot. On commodity x86 systems it is possible to do this in the BIOS Setup
+utility; the required steps vary a lot between machines and therefore cannot be
+described here.
+
+On mainstream x86 Linux distributions there is a third and universal option:
+disable all Secure Boot restrictions for your Linux environment. You can
+initiate this process by running ``mokutil --disable-validation``; this will
+tell you to create a one-time password, which is safe to write down. Now
+restart; right after your BIOS performed all self-tests the bootloader Shim will
+show a blue box with a message 'Press any key to perform MOK management'. Hit
+some key before the countdown exposes, which will open a menu. Choose 'Change
+Secure Boot state'. Shim's 'MokManager' will now ask you to enter three
+randomly chosen characters from the one-time password specified earlier. Once
+you provided them, confirm you really want to disable the validation.
+Afterwards, permit MokManager to reboot the machine.
+
+[:ref:`back to step-by-step guide <secureboot_bissbs>`]
+
+.. _bootworking_bisref:
+
+Boot the last kernel that was working
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Boot into the last working kernel and briefly recheck if the feature that
+ regressed really works.* [:ref:`...<bootworking_bissbs>`]
+
+This will make later steps that cover creating and trimming the configuration do
+the right thing.
+
+[:ref:`back to step-by-step guide <bootworking_bissbs>`]
+
+.. _diskspace_bisref:
+
+Space requirements
+~~~~~~~~~~~~~~~~~~
+
+ *Ensure to have enough free space for building Linux.*
+ [:ref:`... <diskspace_bissbs>`]
+
+The numbers mentioned are rough estimates with a big extra charge to be on the
+safe side, so often you will need less.
+
+If you have space constraints, be sure to hay attention to the :ref:`step about
+debug symbols' <debugsymbols_bissbs>` and its :ref:`accompanying reference
+section' <debugsymbols_bisref>`, as disabling then will reduce the consumed disk
+space by quite a few gigabytes.
+
+[:ref:`back to step-by-step guide <diskspace_bissbs>`]
+
+.. _rangecheck_bisref:
+
+Bisection range
+~~~~~~~~~~~~~~~
+
+ *Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide.* [:ref:`...<rangecheck_bissbs>`]
+
+Establishing the range of commits to be checked is mostly straightforward,
+except when a regression occurred when switching from a release of one stable
+series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case
+Git will need some hand holding, as there is no straight line of descent.
+
+That's because with the release of 6.0 mainline carried on to 6.1 while the
+stable series 6.0.y branched to the side. It's therefore theoretically possible
+that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a
+commit that went into one of the 6.0.y releases, but never hit mainline or the
+6.1.y series. Thankfully that normally should not happen due to the way the
+stable/longterm maintainers maintain the code. It's thus pretty safe to assume
+6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel
+will be built and tested in the segment '2' of this guide; Git would force you
+to do this as well, if you tried bisecting between 6.0.13 and 6.1.15.
+
+[:ref:`back to step-by-step guide <rangecheck_bissbs>`]
+
+.. _buildrequires_bisref:
+
+Install build requirements
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Install all software required to build a Linux kernel.*
+ [:ref:`...<buildrequires_bissbs>`]
+
+The kernel is pretty stand-alone, but besides tools like the compiler you will
+sometimes need a few libraries to build one. How to install everything needed
+depends on your Linux distribution and the configuration of the kernel you are
+about to build.
+
+Here are a few examples what you typically need on some mainstream
+distributions:
+
+* Arch Linux and derivatives::
+
+ sudo pacman --needed -S bc binutils bison flex gcc git kmod libelf openssl \
+ pahole perl zlib ncurses qt6-base
+
+* Debian, Ubuntu, and derivatives::
+
+ sudo apt install bc binutils bison dwarves flex gcc git kmod libelf-dev \
+ libssl-dev make openssl pahole perl-base pkg-config zlib1g-dev \
+ libncurses-dev qt6-base-dev g++
+
+* Fedora and derivatives::
+
+ sudo dnf install binutils \
+ /usr/bin/{bc,bison,flex,gcc,git,openssl,make,perl,pahole,rpmbuild} \
+ /usr/include/{libelf.h,openssl/pkcs7.h,zlib.h,ncurses.h,qt6/QtGui/QAction}
+
+* openSUSE and derivatives::
+
+ sudo zypper install bc binutils bison dwarves flex gcc git \
+ kernel-install-tools libelf-devel make modutils openssl openssl-devel \
+ perl-base zlib-devel rpm-build ncurses-devel qt6-base-devel
+
+These commands install a few packages that are often, but not always needed. You
+for example might want to skip installing the development headers for ncurses,
+which you will only need in case you later might want to adjust the kernel build
+configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit
+the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'.
+
+You furthermore might need additional libraries and their development headers
+for tasks not covered in this guide -- for example when building utilities from
+the kernel's tools/ directory.
+
+[:ref:`back to step-by-step guide <buildrequires_bissbs>`]
+
+.. _sources_bisref:
+
+Download the sources using Git
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Retrieve the Linux mainline sources.*
+ [:ref:`...<sources_bissbs>`]
+
+The step-by-step guide outlines how to download the Linux sources using a full
+Git clone of Linus' mainline repository. There is nothing more to say about
+that -- but there are two alternatives ways to retrieve the sources that might
+work better for you:
+
+* If you have an unreliable internet connection, consider
+ :ref:`using a 'Git bundle'<sources_bundle_bisref>`.
+
+* If downloading the complete repository would take too long or requires too
+ much storage space, consider :ref:`using a 'shallow
+ clone'<sources_shallow_bisref>`.
+
+.. _sources_bundle_bisref:
+
+Downloading Linux mainline sources using a bundle
+"""""""""""""""""""""""""""""""""""""""""""""""""
+
+Use the following commands to retrieve the Linux mainline sources using a
+bundle::
+
+ wget -c \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/clone.bundle
+ git clone --no-checkout clone.bundle ~/linux/
+ cd ~/linux/
+ git remote remove origin
+ git remote add mainline \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ git fetch mainline
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+In case the 'wget' command fails, just re-execute it, it will pick up where
+it left off.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _sources_shallow_bisref:
+
+Downloading Linux mainline sources using a shallow clone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First, execute the following command to retrieve the latest mainline codebase::
+
+ git clone -o mainline --no-checkout --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+Now deepen your clone's history to the second predecessor of the mainline
+release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would
+be the first predecessor and 5.18 the second -- hence deepen the history up to
+that version::
+
+ git fetch --shallow-exclude=v5.18 mainline
+
+Afterwards add the stable Git repository as remote and all required stable
+branches as explained in the step-by-step guide.
+
+Note, shallow clones have a few peculiar characteristics:
+
+* For bisections the history needs to be deepened a few mainline versions
+ farther than it seems necessary, as explained above already. That's because
+ Git otherwise will be unable to revert or describe most of the commits within
+ a range (say 6.1..6.2), as they are internally based on earlier kernels
+ releases (like 6.0-rc2 or 5.19-rc3).
+
+* This document in most places uses ``git fetch`` with ``--shallow-exclude=``
+ to specify the earliest version you care about (or to be precise: its git
+ tag). You alternatively can use the parameter ``--shallow-since=`` to specify
+ an absolute (say ``'2023-07-15'``) or relative (``'12 months'``) date to
+ define the depth of the history you want to download. When using them while
+ bisecting mainline, ensure to deepen the history to at least 7 months before
+ the release of the mainline release your 'good' kernel is based on.
+
+* Be warned, when deepening your clone you might encounter an error like
+ 'fatal: error in object: unshallow cafecaca0c0dacafecaca0c0dacafecaca0c0da'.
+ In that case run ``git repack -d`` and try again.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _oldconfig_bisref:
+
+Start defining the build configuration for your kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Start preparing a kernel build configuration (the '.config' file).*
+ [:ref:`... <oldconfig_bissbs>`]
+
+*Note, this is the first of multiple steps in this guide that create or modify
+build artifacts. The commands used in this guide store them right in the source
+tree to keep things simple. In case you prefer storing the build artifacts
+separately, create a directory like '~/linux-builddir/' and add the parameter
+``O=~/linux-builddir/`` to all make calls used throughout this guide. You will
+have to point other commands there as well -- among them the ``./scripts/config
+[...]`` commands, which will require ``--file ~/linux-builddir/.config`` to
+locate the right build configuration.*
+
+Two things can easily go wrong when creating a .config file as advised:
+
+* The oldconfig target will use a .config file from your build directory, if
+ one is already present there (e.g. '~/linux/.config'). That's totally fine if
+ that's what you intend (see next step), but in all other cases you want to
+ delete it. This for example is important in case you followed this guide
+ further, but due to problems come back here to redo the configuration from
+ scratch.
+
+* Sometimes olddefconfig is unable to locate the .config file for your running
+ kernel and will use defaults, as briefly outlined in the guide. In that case
+ check if your distribution ships the configuration somewhere and manually put
+ it in the right place (e.g. '~/linux/.config') if it does. On distributions
+ where /proc/config.gz exists this can be achieved using this command::
+
+ zcat /proc/config.gz > .config
+
+ Once you put it there, run ``make olddefconfig`` again to adjust it to the
+ needs of the kernel about to be built.
+
+Note, the olddefconfig target will set any undefined build options to their
+default value. If you prefer to set such configuration options manually, use
+``make oldconfig`` instead. Then for each undefined configuration option you
+will be asked how to proceed; in case you are unsure what to answer, simply hit
+'enter' to apply the default value. Note though that for bisections you normally
+want to go with the defaults, as you otherwise might enable a new feature that
+causes a problem looking like regressions (for example due to security
+restrictions).
+
+Occasionally odd things happen when trying to use a config file prepared for one
+kernel (say 6.1) on an older mainline release -- especially if it is much older
+(say 5.15). That's one of the reasons why the previous step in the guide told
+you to boot the kernel where everything works. If you manually add a .config
+file you thus want to ensure it's from the working kernel and not from a one
+that shows the regression.
+
+In case you want to build kernels for another machine, locate its kernel build
+configuration; usually ``ls /boot/config-$(uname -r)`` will print its name. Copy
+that file to the build machine and store it as ~/linux/.config; afterwards run
+``make olddefconfig`` to adjust it.
+
+[:ref:`back to step-by-step guide <oldconfig_bissbs>`]
+
+.. _localmodconfig_bisref:
+
+Trim the build configuration for your kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Disable any kernel modules apparently superfluous for your setup.*
+ [:ref:`... <localmodconfig_bissbs>`]
+
+As explained briefly in the step-by-step guide already: with localmodconfig it
+can easily happen that your self-built kernels will lack modules for tasks you
+did not perform at least once before utilizing this make target. That happens
+when a task requires kernel modules which are only autoloaded when you execute
+it for the first time. So when you never performed that task since starting your
+kernel the modules will not have been loaded -- and from localmodconfig's point
+of view look superfluous, which thus disables them to reduce the amount of code
+to be compiled.
+
+You can try to avoid this by performing typical tasks that often will autoload
+additional kernel modules: start a VM, establish VPN connections, loop-mount a
+CD/DVD ISO, mount network shares (CIFS, NFS, ...), and connect all external
+devices (2FA keys, headsets, webcams, ...) as well as storage devices with file
+systems you otherwise do not utilize (btrfs, ext4, FAT, NTFS, XFS, ...). But it
+is hard to think of everything that might be needed -- even kernel developers
+often forget one thing or another at this point.
+
+Do not let that risk bother you, especially when compiling a kernel only for
+testing purposes: everything typically crucial will be there. And if you forget
+something important you can turn on a missing feature manually later and quickly
+run the commands again to compile and install a kernel that has everything you
+need.
+
+But if you plan to build and use self-built kernels regularly, you might want to
+reduce the risk by recording which modules your system loads over the course of
+a few weeks. You can automate this with `modprobed-db
+<https://github.com/graysky2/modprobed-db>`_. Afterwards use ``LSMOD=<path>`` to
+point localmodconfig to the list of modules modprobed-db noticed being used::
+
+ yes '' | make LSMOD='${HOME}'/.config/modprobed.db localmodconfig
+
+That parameter also allows you to build trimmed kernels for another machine in
+case you copied a suitable .config over to use as base (see previous step). Just
+run ``lsmod > lsmod_foo-machine`` on that system and copy the generated file to
+your build's host home directory. Then run these commands instead of the one the
+step-by-step guide mentions::
+
+ yes '' | make LSMOD=~/lsmod_foo-machine localmodconfig
+
+[:ref:`back to step-by-step guide <localmodconfig_bissbs>`]
+
+.. _tagging_bisref:
+
+Tag the kernels about to be build
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Ensure all the kernels you will build are clearly identifiable using a
+ special tag and a unique version identifier.* [:ref:`... <tagging_bissbs>`]
+
+This allows you to differentiate your distribution's kernels from those created
+during this process, as the file or directories for the latter will contain
+'-local' in the name; it also helps picking the right entry in the boot menu and
+not lose track of you kernels, as their version numbers will look slightly
+confusing during the bisection.
+
+[:ref:`back to step-by-step guide <tagging_bissbs>`]
+
+.. _debugsymbols_bisref:
+
+Decide to enable or disable debug symbols
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Decide how to handle debug symbols.* [:ref:`... <debugsymbols_bissbs>`]
+
+Having debug symbols available can be important when your kernel throws a
+'panic', 'Oops', 'warning', or 'BUG' later when running, as then you will be
+able to find the exact place where the problem occurred in the code. But
+collecting and embedding the needed debug information takes time and consumes
+quite a bit of space: in late 2022 the build artifacts for a typical x86 kernel
+trimmed with localmodconfig consumed around 5 Gigabyte of space with debug
+symbols, but less than 1 when they were disabled. The resulting kernel image and
+modules are bigger as well, which increases storage requirements for /boot/ and
+load times.
+
+In case you want a small kernel and are unlikely to decode a stack trace later,
+you thus might want to disable debug symbols to avoid those downsides. If it
+later turns out that you need them, just enable them as shown and rebuild the
+kernel.
+
+You on the other hand definitely want to enable them for this process, if there
+is a decent chance that you need to decode a stack trace later. The section
+'Decode failure messages' in Documentation/admin-guide/reporting-issues.rst
+explains this process in more detail.
+
+[:ref:`back to step-by-step guide <debugsymbols_bissbs>`]
+
+.. _configmods_bisref:
+
+Adjust build configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check if you may want or need to adjust some other kernel configuration
+ options:*
+
+Depending on your needs you at this point might want or have to adjust some
+kernel configuration options.
+
+.. _configmods_distros_bisref:
+
+Distro specific adjustments
+"""""""""""""""""""""""""""
+
+ *Are you running* [:ref:`... <configmods_bissbs>`]
+
+The following sections help you to avoid build problems that are known to occur
+when following this guide on a few commodity distributions.
+
+**Debian:**
+
+* Remove a stale reference to a certificate file that would cause your build to
+ fail::
+
+ ./scripts/config --set-str SYSTEM_TRUSTED_KEYS ''
+
+ Alternatively, download the needed certificate and make that configuration
+ option point to it, as `the Debian handbook explains in more detail
+ <https://debian-handbook.info/browse/stable/sect.kernel-compilation.html>`_
+ -- or generate your own, as explained in
+ Documentation/admin-guide/module-signing.rst.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _configmods_individual_bisref:
+
+Individual adjustments
+""""""""""""""""""""""
+
+ *If you want to influence the other aspects of the configuration, do so
+ now.* [:ref:`... <configmods_bissbs>`]
+
+At this point you can use a command like ``make menuconfig`` or ``make nconfig``
+to enable or disable certain features using a text-based user interface; to use
+a graphical configuration utility, run ``make xconfig`` instead. Both of them
+require development libraries from toolkits they are rely on (ncurses
+respectively Qt5 or Qt6); an error message will tell you if something required
+is missing.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _saveconfig_bisref:
+
+Put the .config file aside
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Reprocess the .config after the latest changes and store it in a safe place.*
+ [:ref:`... <saveconfig_bissbs>`]
+
+Put the .config you prepared aside, as you want to copy it back to the build
+directory every time during this guide before you start building another
+kernel. That's because going back and forth between different versions can alter
+.config files in odd ways; those occasionally cause side effects that could
+confuse testing or in some cases render the result of your bisection
+meaningless.
+
+[:ref:`back to step-by-step guide <saveconfig_bissbs>`]
+
+.. _introlatestcheck_bisref:
+
+Try to reproduce the problem with the latest codebase
+-----------------------------------------------------
+
+ *Verify the regression is not caused by some .config change and check if it
+ still occurs with the latest codebase.* [:ref:`... <introlatestcheck_bissbs>`]
+
+For some readers it might seem unnecessary to check the latest codebase at this
+point, especially if you did that already with a kernel prepared by your
+distributor or face a regression within a stable/longterm series. But it's
+highly recommended for these reasons:
+
+* You will run into any problems caused by your setup before you actually begin
+ a bisection. That will make it a lot easier to differentiate between 'this
+ most likely is some problem in my setup' and 'this change needs to be skipped
+ during the bisection, as the kernel sources at that stage contain an unrelated
+ problem that causes building or booting to fail'.
+
+* These steps will rule out if your problem is caused by some change in the
+ build configuration between the 'working' and the 'broken' kernel. This for
+ example can happen when your distributor enabled an additional security
+ feature in the newer kernel which was disabled or not yet supported by the
+ older kernel. That security feature might get into the way of something you
+ do -- in which case your problem from the perspective of the Linux kernel
+ upstream developers is not a regression, as
+ Documentation/admin-guide/reporting-regressions.rst explains in more detail.
+ You thus would waste your time if you'd try to bisect this.
+
+* If the cause for your regression was already fixed in the latest mainline
+ codebase, you'd perform the bisection for nothing. This holds true for a
+ regression you encountered with a stable/longterm release as well, as they are
+ often caused by problems in mainline changes that were backported -- in which
+ case the problem will have to be fixed in mainline first. Maybe it already was
+ fixed there and the fix is already in the process of being backported.
+
+* For regressions within a stable/longterm series it's furthermore crucial to
+ know if the issue is specific to that series or also happens in the mainline
+ kernel, as the report needs to be sent to different people:
+
+ * Regressions specific to a stable/longterm series are the stable team's
+ responsibility; mainline Linux developers might or might not care.
+
+ * Regressions also happening in mainline are something the regular Linux
+ developers and maintainers have to handle; the stable team does not care
+ and does not need to be involved in the report, they just should be told
+ to backport the fix once it's ready.
+
+ Your report might be ignored if you send it to the wrong party -- and even
+ when you get a reply there is a decent chance that developers tell you to
+ evaluate which of the two cases it is before they take a closer look.
+
+[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+
+.. _checkoutmaster_bisref:
+
+Check out the latest Linux codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check out the latest Linux codebase.*
+ [:ref:`... <checkoutmaster_bissbs>`]
+
+In case you later want to recheck if an ever newer codebase might fix the
+problem, remember to run that ``git fetch --shallow-exclude [...]`` command
+again mentioned earlier to update your local Git repository.
+
+[:ref:`back to step-by-step guide <checkoutmaster_bissbs>`]
+
+.. _build_bisref:
+
+Build your kernel
+~~~~~~~~~~~~~~~~~
+
+ *Build the image and the modules of your first kernel using the config file
+ you prepared.* [:ref:`... <build_bissbs>`]
+
+A lot can go wrong at this stage, but the instructions below will help you help
+yourself. Another subsection explains how to directly package your kernel up as
+deb, rpm or tar file.
+
+Dealing with build errors
+"""""""""""""""""""""""""
+
+When a build error occurs, it might be caused by some aspect of your machine's
+setup that often can be fixed quickly; other times though the problem lies in
+the code and can only be fixed by a developer. A close examination of the
+failure messages coupled with some research on the internet will often tell you
+which of the two it is. To perform such investigation, restart the build
+process like this::
+
+ make V=1
+
+The ``V=1`` activates verbose output, which might be needed to see the actual
+error. To make it easier to spot, this command also omits the ``-j $(nproc
+--all)`` used earlier to utilize every CPU core in the system for the job -- but
+this parallelism also results in some clutter when failures occur.
+
+After a few seconds the build process should run into the error again. Now try
+to find the most crucial line describing the problem. Then search the internet
+for the most important and non-generic section of that line (say 4 to 8 words);
+avoid or remove anything that looks remotely system-specific, like your username
+or local path names like ``/home/username/linux/``. First try your regular
+internet search engine with that string, afterwards search Linux kernel mailing
+lists via `lore.kernel.org/all/ <https://lore.kernel.org/all/>`_.
+
+This most of the time will find something that will explain what is wrong; quite
+often one of the hits will provide a solution for your problem, too. If you
+do not find anything that matches your problem, try again from a different angle
+by modifying your search terms or using another line from the error messages.
+
+In the end, most issues you run into have likely been encountered and
+reported by others already. That includes issues where the cause is not your
+system, but lies in the code. If you run into one of those, you might thus find
+a solution (e.g. a patch) or workaround for your issue, too.
+
+Package your kernel up
+""""""""""""""""""""""
+
+The step-by-step guide uses the default make targets (e.g. 'bzImage' and
+'modules' on x86) to build the image and the modules of your kernel, which later
+steps of the guide then install. You instead can also directly build everything
+and directly package it up by using one of the following targets:
+
+* ``make -j $(nproc --all) bindeb-pkg`` to generate a deb package
+
+* ``make -j $(nproc --all) binrpm-pkg`` to generate a rpm package
+
+* ``make -j $(nproc --all) tarbz2-pkg`` to generate a bz2 compressed tarball
+
+This is just a selection of available make targets for this purpose, see
+``make help`` for others. You can also use these targets after running
+``make -j $(nproc --all)``, as they will pick up everything already built.
+
+If you employ the targets to generate deb or rpm packages, ignore the
+step-by-step guide's instructions on installing and removing your kernel;
+instead install and remove the packages using the package utility for the format
+(e.g. dpkg and rpm) or a package management utility build on top of them (apt,
+aptitude, dnf/yum, zypper, ...). Be aware that the packages generated using
+these two make targets are designed to work on various distributions utilizing
+those formats, they thus will sometimes behave differently than your
+distribution's kernel packages.
+
+[:ref:`back to step-by-step guide <build_bissbs>`]
+
+.. _install_bisref:
+
+Put the kernel in place
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Install the kernel you just built.* [:ref:`... <install_bissbs>`]
+
+What you need to do after executing the command in the step-by-step guide
+depends on the existence and the implementation of ``/sbin/installkernel``
+executable on your distribution.
+
+If installkernel is found, the kernel's build system will delegate the actual
+installation of your kernel image to this executable, which then performs some
+or all of these tasks:
+
+* On almost all Linux distributions installkernel will store your kernel's
+ image in /boot/, usually as '/boot/vmlinuz-<kernelrelease_id>'; often it will
+ put a 'System.map-<kernelrelease_id>' alongside it.
+
+* On most distributions installkernel will then generate an 'initramfs'
+ (sometimes also called 'initrd'), which usually are stored as
+ '/boot/initramfs-<kernelrelease_id>.img' or
+ '/boot/initrd-<kernelrelease_id>'. Commodity distributions rely on this file
+ for booting, hence ensure to execute the make target 'modules_install' first,
+ as your distribution's initramfs generator otherwise will be unable to find
+ the modules that go into the image.
+
+* On some distributions installkernel will then add an entry for your kernel
+ to your bootloader's configuration.
+
+You have to take care of some or all of the tasks yourself, if your
+distribution lacks an installkernel script or does only handle part of them.
+Consult the distribution's documentation for details. If in doubt, install the
+kernel manually::
+
+ sudo install -m 0600 $(make -s image_name) /boot/vmlinuz-$(make -s kernelrelease)
+ sudo install -m 0600 System.map /boot/System.map-$(make -s kernelrelease)
+
+Now generate your initramfs using the tools your distribution provides for this
+process. Afterwards add your kernel to your bootloader configuration and reboot.
+
+[:ref:`back to step-by-step guide <install_bissbs>`]
+
+.. _storagespace_bisref:
+
+Storage requirements per kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check how much storage space the kernel, its modules, and other related files
+ like the initramfs consume.* [:ref:`... <storagespace_bissbs>`]
+
+The kernels built during a bisection consume quite a bit of space in /boot/ and
+/lib/modules/, especially if you enabled debug symbols. That makes it easy to
+fill up volumes during a bisection -- and due to that even kernels which used to
+work earlier might fail to boot. To prevent that you will need to know how much
+space each installed kernel typically requires.
+
+Note, most of the time the pattern '/boot/*$(make -s kernelrelease)*' used in
+the guide will match all files needed to boot your kernel -- but neither the
+path nor the naming scheme are mandatory. On some distributions you thus will
+need to look in different places.
+
+[:ref:`back to step-by-step guide <storagespace_bissbs>`]
+
+.. _tainted_bisref:
+
+Check if your newly built kernel considers itself 'tainted'
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check if the kernel marked itself as 'tainted'.*
+ [:ref:`... <tainted_bissbs>`]
+
+Linux marks itself as tainted when something happens that potentially leads to
+follow-up errors that look totally unrelated. That is why developers might
+ignore or react scantly to reports from tainted kernels -- unless of course the
+kernel set the flag right when the reported bug occurred.
+
+That's why you want check why a kernel is tainted as explained in
+Documentation/admin-guide/tainted-kernels.rst; doing so is also in your own
+interest, as your testing might be flawed otherwise.
+
+[:ref:`back to step-by-step guide <tainted_bissbs>`]
+
+.. _recheckbroken_bisref:
+
+Check the kernel built from a recent mainline codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Verify if your bug occurs with the newly built kernel.*
+ [:ref:`... <recheckbroken_bissbs>`]
+
+There are a couple of reasons why your bug or regression might not show up with
+the kernel you built from the latest codebase. These are the most frequent:
+
+* The bug was fixed meanwhile.
+
+* What you suspected to be a regression was caused by a change in the build
+ configuration the provider of your kernel carried out.
+
+* Your problem might be a race condition that does not show up with your kernel;
+ the trimmed build configuration, a different setting for debug symbols, the
+ compiler used, and various other things can cause this.
+
+* In case you encountered the regression with a stable/longterm kernel it might
+ be a problem that is specific to that series; the next step in this guide will
+ check this.
+
+[:ref:`back to step-by-step guide <recheckbroken_bissbs>`]
+
+.. _recheckstablebroken_bisref:
+
+Check the kernel built from the latest stable/longterm codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Are you facing a regression within a stable/longterm release, but failed to
+ reproduce it with the kernel you just built using the latest mainline sources?
+ Then check if the latest codebase for the particular series might already fix
+ the problem.* [:ref:`... <recheckstablebroken_bissbs>`]
+
+If this kernel does not show the regression either, there most likely is no need
+for a bisection.
+
+[:ref:`back to step-by-step guide <recheckstablebroken_bissbs>`]
+
+.. _introworkingcheck_bisref:
+
+Ensure the 'good' version is really working well
+------------------------------------------------
+
+ *Check if the kernels you build work fine.*
+ [:ref:`... <introworkingcheck_bissbs>`]
+
+This section will reestablish a known working base. Skipping it might be
+appealing, but is usually a bad idea, as it does something important:
+
+It will ensure the .config file you prepared earlier actually works as expected.
+That is in your own interest, as trimming the configuration is not foolproof --
+and you might be building and testing ten or more kernels for nothing before
+starting to suspect something might be wrong with the build configuration.
+
+That alone is reason enough to spend the time on this, but not the only reason.
+
+Many readers of this guide normally run kernels that are patched, use add-on
+modules, or both. Those kernels thus are not considered 'vanilla' -- therefore
+it's possible that the thing that regressed might never have worked in vanilla
+builds of the 'good' version in the first place.
+
+There is a third reason for those that noticed a regression between
+stable/longterm kernels of different series (e.g. 6.0.13..6.1.5): it will
+ensure the kernel version you assumed to be 'good' earlier in the process (e.g.
+6.0) actually is working.
+
+[:ref:`back to step-by-step guide <introworkingcheck_bissbs>`]
+
+.. _recheckworking_bisref:
+
+Build your own version of the 'good' kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Build your own variant of the working kernel and check if the feature that
+ regressed works as expected with it.* [:ref:`... <recheckworking_bissbs>`]
+
+In case the feature that broke with newer kernels does not work with your first
+self-built kernel, find and resolve the cause before moving on. There are a
+multitude of reasons why this might happen. Some ideas where to look:
+
+* Check the taint status and the output of ``dmesg``, maybe something unrelated
+ went wrong.
+
+* Maybe localmodconfig did something odd and disabled the module required to
+ test the feature? Then you might want to recreate a .config file based on the
+ one from the last working kernel and skip trimming it down; manually disabling
+ some features in the .config might work as well to reduce the build time.
+
+* Maybe it's not a kernel regression and something that is caused by some fluke,
+ a broken initramfs (also known as initrd), new firmware files, or an updated
+ userland software?
+
+* Maybe it was a feature added to your distributor's kernel which vanilla Linux
+ at that point never supported?
+
+Note, if you found and fixed problems with the .config file, you want to use it
+to build another kernel from the latest codebase, as your earlier tests with
+mainline and the latest version from an affected stable/longterm series were
+most likely flawed.
+
+[:ref:`back to step-by-step guide <recheckworking_bissbs>`]
+
+Perform a bisection and validate the result
+-------------------------------------------
+
+ *With all the preparations and precaution builds taken care of, you are now
+ ready to begin the bisection.* [:ref:`... <introbisect_bissbs>`]
+
+The steps in this segment perform and validate the bisection.
+
+[:ref:`back to step-by-step guide <introbisect_bissbs>`].
+
+.. _bisectstart_bisref:
+
+Start the bisection
+~~~~~~~~~~~~~~~~~~~
+
+ *Start the bisection and tell Git about the versions earlier established as
+ 'good' and 'bad'.* [:ref:`... <bisectstart_bissbs>`]
+
+This will start the bisection process; the last of the commands will make Git
+check out a commit round about half-way between the 'good' and the 'bad' changes
+for you to test.
+
+[:ref:`back to step-by-step guide <bisectstart_bissbs>`]
+
+.. _bisectbuild_bisref:
+
+Build a kernel from the bisection point
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Build, install, and boot a kernel from the code Git checked out using the
+ same commands you used earlier.* [:ref:`... <bisectbuild_bissbs>`]
+
+There are two things worth of note here:
+
+* Occasionally building the kernel will fail or it might not boot due some
+ problem in the code at the bisection point. In that case run this command::
+
+ git bisect skip
+
+ Git will then check out another commit nearby which with a bit of luck should
+ work better. Afterwards restart executing this step.
+
+* Those slightly odd looking version identifiers can happen during bisections,
+ because the Linux kernel subsystems prepare their changes for a new mainline
+ release (say 6.2) before its predecessor (e.g. 6.1) is finished. They thus
+ base them on a somewhat earlier point like 6.1-rc1 or even 6.0 -- and then
+ get merged for 6.2 without rebasing nor squashing them once 6.1 is out. This
+ leads to those slightly odd looking version identifiers coming up during
+ bisections.
+
+[:ref:`back to step-by-step guide <bisectbuild_bissbs>`]
+
+.. _bisecttest_bisref:
+
+Bisection checkpoint
+~~~~~~~~~~~~~~~~~~~~
+
+ *Check if the feature that regressed works in the kernel you just built.*
+ [:ref:`... <bisecttest_bissbs>`]
+
+Ensure what you tell Git is accurate: getting it wrong just one time will bring
+the rest of the bisection totally off course, hence all testing after that point
+will be for nothing.
+
+[:ref:`back to step-by-step guide <bisecttest_bissbs>`]
+
+.. _bisectlog_bisref:
+
+Put the bisection log away
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Store Git's bisection log and the current .config file in a safe place.*
+ [:ref:`... <bisectlog_bissbs>`]
+
+As indicated above: declaring just one kernel wrongly as 'good' or 'bad' will
+render the end result of a bisection useless. In that case you'd normally have
+to restart the bisection from scratch. The log can prevent that, as it might
+allow someone to point out where a bisection likely went sideways -- and then
+instead of testing ten or more kernels you might only have to build a few to
+resolve things.
+
+The .config file is put aside, as there is a decent chance that developers might
+ask for it after you report the regression.
+
+[:ref:`back to step-by-step guide <bisectlog_bissbs>`]
+
+.. _revert_bisref:
+
+Try reverting the culprit
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Try reverting the culprit on top of the latest codebase to see if this fixes
+ your regression.* [:ref:`... <revert_bissbs>`]
+
+This is an optional step, but whenever possible one you should try: there is a
+decent chance that developers will ask you to perform this step when you bring
+the bisection result up. So give it a try, you are in the flow already, building
+one more kernel shouldn't be a big deal at this point.
+
+The step-by-step guide covers everything relevant already except one slightly
+rare thing: did you bisected a regression that also happened with mainline using
+a stable/longterm series, but Git failed to revert the commit in mainline? Then
+try to revert the culprit in the affected stable/longterm series -- and if that
+succeeds, test that kernel version instead.
+
+[:ref:`back to step-by-step guide <revert_bissbs>`]
+
+Cleanup steps during and after following this guide
+---------------------------------------------------
+
+ *During and after following this guide you might want or need to remove some
+ of the kernels you installed.* [:ref:`... <introclosure_bissbs>`]
+
+The steps in this section describe clean-up procedures.
+
+[:ref:`back to step-by-step guide <introclosure_bissbs>`].
+
+.. _makeroom_bisref:
+
+Cleaning up during the bisection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier.* [:ref:`... <makeroom_bissbs>`]
+
+The kernels you install during this process are easy to remove later, as its
+parts are only stored in two places and clearly identifiable. You thus do not
+need to worry to mess up your machine when you install a kernel manually (and
+thus bypass your distribution's packaging system): all parts of your kernels are
+relatively easy to remove later.
+
+One of the two places is a directory in /lib/modules/, which holds the modules
+for each installed kernel. This directory is named after the kernel's release
+identifier; hence, to remove all modules for one of the kernels you built,
+simply remove its modules directory in /lib/modules/.
+
+The other place is /boot/, where typically two up to five files will be placed
+during installation of a kernel. All of them usually contain the release name in
+their file name, but how many files and their exact names depend somewhat on
+your distribution's installkernel executable and its initramfs generator. On
+some distributions the ``kernel-install remove...`` command mentioned in the
+step-by-step guide will delete all of these files for you while also removing
+the menu entry for the kernel from your bootloader configuration. On others you
+have to take care of these two tasks yourself. The following command should
+interactively remove the three main files of a kernel with the release name
+'6.0-rc1-local-gcafec0cacaca0'::
+
+ rm -i /boot/{System.map,vmlinuz,initr}-6.0-rc1-local-gcafec0cacaca0
+
+Afterwards check for other files in /boot/ that have
+'6.0-rc1-local-gcafec0cacaca0' in their name and consider deleting them as well.
+Now remove the boot entry for the kernel from your bootloader's configuration;
+the steps to do that vary quite a bit between Linux distributions.
+
+Note, be careful with wildcards like '*' when deleting files or directories
+for kernels manually: you might accidentally remove files of a 6.0.13 kernel
+when all you want is to remove 6.0 or 6.0.1.
+
+[:ref:`back to step-by-step guide <makeroom_bissbs>`]
+
+Cleaning up after the bisection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _finishingtouch_bisref:
+
+ *Once you have finished the bisection, do not immediately remove anything
+ you set up, as you might need a few things again.*
+ [:ref:`... <finishingtouch_bissbs>`]
+
+When you are really short of storage space removing the kernels as described in
+the step-by-step guide might not free as much space as you would like. In that
+case consider running ``rm -rf ~/linux/*`` as well now. This will remove the
+build artifacts and the Linux sources, but will leave the Git repository
+(~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the
+sources back.
+
+Removing the repository as well would likely be unwise at this point: there
+is a decent chance developers will ask you to build another kernel to
+perform additional tests -- like testing a debug patch or a proposed fix.
+Details on how to perform those can be found in the section :ref:`Optional
+tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
+
+Additional tests are also the reason why you want to keep the
+~/kernel-config-working file around for a few weeks.
+
+[:ref:`back to step-by-step guide <finishingtouch_bissbs>`]
+
+.. _introoptional_bisref:
+
+Test reverts, patches, or later versions
+----------------------------------------
+
+ *While or after reporting a bug, you might want or potentially will be asked
+ to test reverts, patches, proposed fixes, or other versions.*
+ [:ref:`... <introoptional_bissbs>`]
+
+All the commands used in this section should be pretty straight forward, so
+there is not much to add except one thing: when setting a kernel tag as
+instructed, ensure it is not much longer than the one used in the example, as
+problems will arise if the kernelrelease identifier exceeds 63 characters.
+
+[:ref:`back to step-by-step guide <introoptional_bissbs>`].
+
+
+Additional information
+======================
+
+.. _buildhost_bis:
+
+Build kernels on a different machine
+------------------------------------
+
+To compile kernels on another system, slightly alter the step-by-step guide's
+instructions:
+
+* Start following the guide on the machine where you want to install and test
+ the kernels later.
+
+* After executing ':ref:`Boot into the working kernel and briefly use the
+ apparently broken feature <bootworking_bissbs>`', save the list of loaded
+ modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the
+ build configuration for the running kernel (see ':ref:`Start defining the
+ build configuration for your kernel <oldconfig_bisref>`' for hints on where
+ to find it) and store it as '~/test-machine-config-working'. Transfer both
+ files to the home directory of your build host.
+
+* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough
+ free space for building [...] <diskspace_bissbs>`').
+
+* When you reach ':ref:`Start preparing a kernel build configuration[...]
+ <oldconfig_bissbs>`': before running ``make olddefconfig`` for the first time,
+ execute the following command to base your configuration on the one from the
+ test machine's 'working' kernel::
+
+ cp ~/test-machine-config-working ~/linux/.config
+
+* During the next step to ':ref:`disable any apparently superfluous kernel
+ modules <localmodconfig_bissbs>`' use the following command instead::
+
+ yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig
+
+* Continue the guide, but ignore the instructions outlining how to compile,
+ install, and reboot into a kernel every time they come up. Instead build
+ like this::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig &&
+ make -j $(nproc --all) targz-pkg
+
+ This will generate a gzipped tar file whose name is printed in the last
+ line shown; for example, a kernel with the kernelrelease identifier
+ '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will
+ be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'.
+
+ Copy that file to your test machine's home directory.
+
+* Switch to the test machine to check if you have enough space to hold another
+ kernel. Then extract the file you transferred::
+
+ sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C /
+
+ Afterwards :ref:`generate the initramfs and add the kernel to your boot
+ loader's configuration <install_bisref>`; on some distributions the following
+ command will take care of both these tasks::
+
+ sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423
+
+ Now reboot and ensure you started the intended kernel.
+
+This approach even works when building for another architecture: just install
+cross-compilers and add the appropriate parameters to every invocation of make
+(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``).
+
+Additional reading material
+---------------------------
+
+* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
+ `fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
+ in the Git documentation.
+* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
+ from kernel developer Nathan Chancellor.
+* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
+* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/video-output.rst b/Documentation/admin-guide/video-output.rst
new file mode 100644
index 000000000000..56d6fa2e2368
--- /dev/null
+++ b/Documentation/admin-guide/video-output.rst
@@ -0,0 +1,34 @@
+Video Output Switcher Control
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+2006 luming.yu@intel.com
+
+The output sysfs class driver provides an abstract video output layer that
+can be used to hook platform specific methods to enable/disable video output
+device through common sysfs interface. For example, on my IBM ThinkPad T42
+laptop, The ACPI video driver registered its output devices and read/write
+method for 'state' with output sysfs class. The user interface under sysfs is::
+
+ linux:/sys/class/video_output # tree .
+ .
+ |-- CRT0
+ | |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+ | |-- state
+ | |-- subsystem -> ../../../class/video_output
+ | `-- uevent
+ |-- DVI0
+ | |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+ | |-- state
+ | |-- subsystem -> ../../../class/video_output
+ | `-- uevent
+ |-- LCD0
+ | |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+ | |-- state
+ | |-- subsystem -> ../../../class/video_output
+ | `-- uevent
+ `-- TV0
+ |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+ |-- state
+ |-- subsystem -> ../../../class/video_output
+ `-- uevent
+
diff --git a/Documentation/admin-guide/workload-tracing.rst b/Documentation/admin-guide/workload-tracing.rst
new file mode 100644
index 000000000000..d6313890ee41
--- /dev/null
+++ b/Documentation/admin-guide/workload-tracing.rst
@@ -0,0 +1,606 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+
+======================================================
+Discovering Linux kernel subsystems used by a workload
+======================================================
+
+:Authors: - Shuah Khan <skhan@linuxfoundation.org>
+ - Shefali Sharma <sshefali021@gmail.com>
+:maintained-by: Shuah Khan <skhan@linuxfoundation.org>
+
+Key Points
+==========
+
+ * Understanding system resources necessary to build and run a workload
+ is important.
+ * Linux tracing and strace can be used to discover the system resources
+ in use by a workload. The completeness of the system usage information
+ depends on the completeness of coverage of a workload.
+ * Performance and security of the operating system can be analyzed with
+ the help of tools such as:
+ `perf <https://man7.org/linux/man-pages/man1/perf.1.html>`_,
+ `stress-ng <https://www.mankier.com/1/stress-ng>`_,
+ `paxtest <https://github.com/opntr/paxtest-freebsd>`_.
+ * Once we discover and understand the workload needs, we can focus on them
+ to avoid regressions and use it to evaluate safety considerations.
+
+Methodology
+===========
+
+`strace <https://man7.org/linux/man-pages/man1/strace.1.html>`_ is a
+diagnostic, instructional, and debugging tool and can be used to discover
+the system resources in use by a workload. Once we discover and understand
+the workload needs, we can focus on them to avoid regressions and use it
+to evaluate safety considerations. We use strace tool to trace workloads.
+
+This method of tracing using strace tells us the system calls invoked by
+the workload and doesn't include all the system calls that can be invoked
+by it. In addition, this tracing method tells us just the code paths within
+these system calls that are invoked. As an example, if a workload opens a
+file and reads from it successfully, then the success path is the one that
+is traced. Any error paths in that system call will not be traced. If there
+is a workload that provides full coverage of a workload then the method
+outlined here will trace and find all possible code paths. The completeness
+of the system usage information depends on the completeness of coverage of a
+workload.
+
+The goal is tracing a workload on a system running a default kernel without
+requiring custom kernel installs.
+
+How do we gather fine-grained system information?
+=================================================
+
+strace tool can be used to trace system calls made by a process and signals
+it receives. System calls are the fundamental interface between an
+application and the operating system kernel. They enable a program to
+request services from the kernel. For instance, the open() system call in
+Linux is used to provide access to a file in the file system. strace enables
+us to track all the system calls made by an application. It lists all the
+system calls made by a process and their resulting output.
+
+You can generate profiling data combining strace and perf record tools to
+record the events and information associated with a process. This provides
+insight into the process. "perf annotate" tool generates the statistics of
+each instruction of the program. This document goes over the details of how
+to gather fine-grained information on a workload's usage of system resources.
+
+We used strace to trace the perf, stress-ng, paxtest workloads to illustrate
+our methodology to discover resources used by a workload. This process can
+be applied to trace other workloads.
+
+Getting the system ready for tracing
+====================================
+
+Before we can get started we will show you how to get your system ready.
+We assume that you have a Linux distribution running on a physical system
+or a virtual machine. Most distributions will include strace command. Let’s
+install other tools that aren’t usually included to build Linux kernel.
+Please note that the following works on Debian based distributions. You
+might have to find equivalent packages on other Linux distributions.
+
+Install tools to build Linux kernel and tools in kernel repository.
+scripts/ver_linux is a good way to check if your system already has
+the necessary tools::
+
+ sudo apt-get install build-essential flex bison yacc
+ sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev
+
+cscope is a good tool to browse kernel sources. Let's install it now::
+
+ sudo apt-get install cscope
+
+Install stress-ng and paxtest::
+
+ apt-get install stress-ng
+ apt-get install paxtest
+
+Workload overview
+=================
+
+As mentioned earlier, we used strace to trace perf bench, stress-ng and
+paxtest workloads to show how to analyze a workload and identify Linux
+subsystems used by these workloads. Let's start with an overview of these
+three workloads to get a better understanding of what they do and how to
+use them.
+
+perf bench (all) workload
+-------------------------
+
+The perf bench command contains multiple multi-threaded microkernel
+benchmarks for executing different subsystems in the Linux kernel and
+system calls. This allows us to easily measure the impact of changes,
+which can help mitigate performance regressions. It also acts as a common
+benchmarking framework, enabling developers to easily create test cases,
+integrate transparently, and use performance-rich tooling subsystems.
+
+Stress-ng netdev stressor workload
+----------------------------------
+
+stress-ng is used for performing stress testing on the kernel. It allows
+you to exercise various physical subsystems of the computer, as well as
+interfaces of the OS kernel, using "stressor-s". They are available for
+CPU, CPU cache, devices, I/O, interrupts, file system, memory, network,
+operating system, pipelines, schedulers, and virtual machines. Please refer
+to the `stress-ng man-page <https://www.mankier.com/1/stress-ng>`_ to
+find the description of all the available stressor-s. The netdev stressor
+starts specified number (N) of workers that exercise various netdevice
+ioctl commands across all the available network devices.
+
+paxtest kiddie workload
+-----------------------
+
+paxtest is a program that tests buffer overflows in the kernel. It tests
+kernel enforcements over memory usage. Generally, execution in some memory
+segments makes buffer overflows possible. It runs a set of programs that
+attempt to subvert memory usage. It is used as a regression test suite for
+PaX, but might be useful to test other memory protection patches for the
+kernel. We used paxtest kiddie mode which looks for simple vulnerabilities.
+
+What is strace and how do we use it?
+====================================
+
+As mentioned earlier, strace which is a useful diagnostic, instructional,
+and debugging tool and can be used to discover the system resources in use
+by a workload. It can be used:
+
+ * To see how a process interacts with the kernel.
+ * To see why a process is failing or hanging.
+ * For reverse engineering a process.
+ * To find the files on which a program depends.
+ * For analyzing the performance of an application.
+ * For troubleshooting various problems related to the operating system.
+
+In addition, strace can generate run-time statistics on times, calls, and
+errors for each system call and report a summary when program exits,
+suppressing the regular output. This attempts to show system time (CPU time
+spent running in the kernel) independent of wall clock time. We plan to use
+these features to get information on workload system usage.
+
+strace command supports basic, verbose, and stats modes. strace command when
+run in verbose mode gives more detailed information about the system calls
+invoked by a process.
+
+Running strace -c generates a report of the percentage of time spent in each
+system call, the total time in seconds, the microseconds per call, the total
+number of calls, the count of each system call that has failed with an error
+and the type of system call made.
+
+ * Usage: strace <command we want to trace>
+ * Verbose mode usage: strace -v <command>
+ * Gather statistics: strace -c <command>
+
+We used the “-c” option to gather fine-grained run-time statistics in use
+by three workloads we have chose for this analysis.
+
+ * perf
+ * stress-ng
+ * paxtest
+
+What is cscope and how do we use it?
+====================================
+
+Now let’s look at `cscope <https://cscope.sourceforge.net/>`_, a command
+line tool for browsing C, C++ or Java code-bases. We can use it to find
+all the references to a symbol, global definitions, functions called by a
+function, functions calling a function, text strings, regular expression
+patterns, files including a file.
+
+We can use cscope to find which system call belongs to which subsystem.
+This way we can find the kernel subsystems used by a process when it is
+executed.
+
+Let’s checkout the latest Linux repository and build cscope database::
+
+ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git linux
+ cd linux
+ cscope -R -p10 # builds cscope.out database before starting browse session
+ cscope -d -p10 # starts browse session on cscope.out database
+
+Note: Run "cscope -R -p10" to build the database and c"scope -d -p10" to
+enter into the browsing session. cscope by default cscope.out database.
+To get out of this mode press ctrl+d. -p option is used to specify the
+number of file path components to display. -p10 is optimal for browsing
+kernel sources.
+
+What is perf and how do we use it?
+==================================
+
+Perf is an analysis tool based on Linux 2.6+ systems, which abstracts the
+CPU hardware difference in performance measurement in Linux, and provides
+a simple command line interface. Perf is based on the perf_events interface
+exported by the kernel. It is very useful for profiling the system and
+finding performance bottlenecks in an application.
+
+If you haven't already checked out the Linux mainline repository, you can do
+so and then build kernel and perf tool::
+
+ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git linux
+ cd linux
+ make -j3 all
+ cd tools/perf
+ make
+
+Note: The perf command can be built without building the kernel in the
+repository and can be run on older kernels. However matching the kernel
+and perf revisions gives more accurate information on the subsystem usage.
+
+We used "perf stat" and "perf bench" options. For a detailed information on
+the perf tool, run "perf -h".
+
+perf stat
+---------
+The perf stat command generates a report of various hardware and software
+events. It does so with the help of hardware counter registers found in
+modern CPUs that keep the count of these activities. "perf stat cal" shows
+stats for cal command.
+
+Perf bench
+----------
+The perf bench command contains multiple multi-threaded microkernel
+benchmarks for executing different subsystems in the Linux kernel and
+system calls. This allows us to easily measure the impact of changes,
+which can help mitigate performance regressions. It also acts as a common
+benchmarking framework, enabling developers to easily create test cases,
+integrate transparently, and use performance-rich tooling.
+
+"perf bench all" command runs the following benchmarks:
+
+ * sched/messaging
+ * sched/pipe
+ * syscall/basic
+ * mem/memcpy
+ * mem/memset
+
+What is stress-ng and how do we use it?
+=======================================
+
+As mentioned earlier, stress-ng is used for performing stress testing on
+the kernel. It allows you to exercise various physical subsystems of the
+computer, as well as interfaces of the OS kernel, using stressor-s. They
+are available for CPU, CPU cache, devices, I/O, interrupts, file system,
+memory, network, operating system, pipelines, schedulers, and virtual
+machines.
+
+The netdev stressor starts N workers that exercise various netdevice ioctl
+commands across all the available network devices. The following ioctls are
+exercised:
+
+ * SIOCGIFCONF, SIOCGIFINDEX, SIOCGIFNAME, SIOCGIFFLAGS
+ * SIOCGIFADDR, SIOCGIFNETMASK, SIOCGIFMETRIC, SIOCGIFMTU
+ * SIOCGIFHWADDR, SIOCGIFMAP, SIOCGIFTXQLEN
+
+The following command runs the stressor::
+
+ stress-ng --netdev 1 -t 60 --metrics command.
+
+We can use the perf record command to record the events and information
+associated with a process. This command records the profiling data in the
+perf.data file in the same directory.
+
+Using the following commands you can record the events associated with the
+netdev stressor, view the generated report perf.data and annotate the to
+view the statistics of each instruction of the program::
+
+ perf record stress-ng --netdev 1 -t 60 --metrics command.
+ perf report
+ perf annotate
+
+What is paxtest and how do we use it?
+=====================================
+
+paxtest is a program that tests buffer overflows in the kernel. It tests
+kernel enforcements over memory usage. Generally, execution in some memory
+segments makes buffer overflows possible. It runs a set of programs that
+attempt to subvert memory usage. It is used as a regression test suite for
+PaX, and will be useful to test other memory protection patches for the
+kernel.
+
+paxtest provides kiddie and blackhat modes. The paxtest kiddie mode runs
+in normal mode, whereas the blackhat mode tries to get around the protection
+of the kernel testing for vulnerabilities. We focus on the kiddie mode here
+and combine "paxtest kiddie" run with "perf record" to collect CPU stack
+traces for the paxtest kiddie run to see which function is calling other
+functions in the performance profile. Then the "dwarf" (DWARF's Call Frame
+Information) mode can be used to unwind the stack.
+
+The following command can be used to view resulting report in call-graph
+format::
+
+ perf record --call-graph dwarf paxtest kiddie
+ perf report --stdio
+
+Tracing workloads
+=================
+
+Now that we understand the workloads, let's start tracing them.
+
+Tracing perf bench all workload
+-------------------------------
+
+Run the following command to trace perf bench all workload::
+
+ strace -c perf bench all
+
+**System Calls made by the workload**
+
+The below table shows the system calls invoked by the workload, number of
+times each system call is invoked, and the corresponding Linux subsystem.
+
++-------------------+-----------+-----------------+-------------------------+
+| System Call | # calls | Linux Subsystem | System Call (API) |
++===================+===========+=================+=========================+
+| getppid | 10000001 | Process Mgmt | sys_getpid() |
++-------------------+-----------+-----------------+-------------------------+
+| clone | 1077 | Process Mgmt. | sys_clone() |
++-------------------+-----------+-----------------+-------------------------+
+| prctl | 23 | Process Mgmt. | sys_prctl() |
++-------------------+-----------+-----------------+-------------------------+
+| prlimit64 | 7 | Process Mgmt. | sys_prlimit64() |
++-------------------+-----------+-----------------+-------------------------+
+| getpid | 10 | Process Mgmt. | sys_getpid() |
++-------------------+-----------+-----------------+-------------------------+
+| uname | 3 | Process Mgmt. | sys_uname() |
++-------------------+-----------+-----------------+-------------------------+
+| sysinfo | 1 | Process Mgmt. | sys_sysinfo() |
++-------------------+-----------+-----------------+-------------------------+
+| getuid | 1 | Process Mgmt. | sys_getuid() |
++-------------------+-----------+-----------------+-------------------------+
+| getgid | 1 | Process Mgmt. | sys_getgid() |
++-------------------+-----------+-----------------+-------------------------+
+| geteuid | 1 | Process Mgmt. | sys_geteuid() |
++-------------------+-----------+-----------------+-------------------------+
+| getegid | 1 | Process Mgmt. | sys_getegid |
++-------------------+-----------+-----------------+-------------------------+
+| close | 49951 | Filesystem | sys_close() |
++-------------------+-----------+-----------------+-------------------------+
+| pipe | 604 | Filesystem | sys_pipe() |
++-------------------+-----------+-----------------+-------------------------+
+| openat | 48560 | Filesystem | sys_opennat() |
++-------------------+-----------+-----------------+-------------------------+
+| fstat | 8338 | Filesystem | sys_fstat() |
++-------------------+-----------+-----------------+-------------------------+
+| stat | 1573 | Filesystem | sys_stat() |
++-------------------+-----------+-----------------+-------------------------+
+| pread64 | 9646 | Filesystem | sys_pread64() |
++-------------------+-----------+-----------------+-------------------------+
+| getdents64 | 1873 | Filesystem | sys_getdents64() |
++-------------------+-----------+-----------------+-------------------------+
+| access | 3 | Filesystem | sys_access() |
++-------------------+-----------+-----------------+-------------------------+
+| lstat | 1880 | Filesystem | sys_lstat() |
++-------------------+-----------+-----------------+-------------------------+
+| lseek | 6 | Filesystem | sys_lseek() |
++-------------------+-----------+-----------------+-------------------------+
+| ioctl | 3 | Filesystem | sys_ioctl() |
++-------------------+-----------+-----------------+-------------------------+
+| dup2 | 1 | Filesystem | sys_dup2() |
++-------------------+-----------+-----------------+-------------------------+
+| execve | 2 | Filesystem | sys_execve() |
++-------------------+-----------+-----------------+-------------------------+
+| fcntl | 8779 | Filesystem | sys_fcntl() |
++-------------------+-----------+-----------------+-------------------------+
+| statfs | 1 | Filesystem | sys_statfs() |
++-------------------+-----------+-----------------+-------------------------+
+| epoll_create | 2 | Filesystem | sys_epoll_create() |
++-------------------+-----------+-----------------+-------------------------+
+| epoll_ctl | 64 | Filesystem | sys_epoll_ctl() |
++-------------------+-----------+-----------------+-------------------------+
+| newfstatat | 8318 | Filesystem | sys_newfstatat() |
++-------------------+-----------+-----------------+-------------------------+
+| eventfd2 | 192 | Filesystem | sys_eventfd2() |
++-------------------+-----------+-----------------+-------------------------+
+| mmap | 243 | Memory Mgmt. | sys_mmap() |
++-------------------+-----------+-----------------+-------------------------+
+| mprotect | 32 | Memory Mgmt. | sys_mprotect() |
++-------------------+-----------+-----------------+-------------------------+
+| brk | 21 | Memory Mgmt. | sys_brk() |
++-------------------+-----------+-----------------+-------------------------+
+| munmap | 128 | Memory Mgmt. | sys_munmap() |
++-------------------+-----------+-----------------+-------------------------+
+| set_mempolicy | 156 | Memory Mgmt. | sys_set_mempolicy() |
++-------------------+-----------+-----------------+-------------------------+
+| set_tid_address | 1 | Process Mgmt. | sys_set_tid_address() |
++-------------------+-----------+-----------------+-------------------------+
+| set_robust_list | 1 | Futex | sys_set_robust_list() |
++-------------------+-----------+-----------------+-------------------------+
+| futex | 341 | Futex | sys_futex() |
++-------------------+-----------+-----------------+-------------------------+
+| sched_getaffinity | 79 | Scheduler | sys_sched_getaffinity() |
++-------------------+-----------+-----------------+-------------------------+
+| sched_setaffinity | 223 | Scheduler | sys_sched_setaffinity() |
++-------------------+-----------+-----------------+-------------------------+
+| socketpair | 202 | Network | sys_socketpair() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigprocmask | 21 | Signal | sys_rt_sigprocmask() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigaction | 36 | Signal | sys_rt_sigaction() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigreturn | 2 | Signal | sys_rt_sigreturn() |
++-------------------+-----------+-----------------+-------------------------+
+| wait4 | 889 | Time | sys_wait4() |
++-------------------+-----------+-----------------+-------------------------+
+| clock_nanosleep | 37 | Time | sys_clock_nanosleep() |
++-------------------+-----------+-----------------+-------------------------+
+| capget | 4 | Capability | sys_capget() |
++-------------------+-----------+-----------------+-------------------------+
+
+Tracing stress-ng netdev stressor workload
+------------------------------------------
+
+Run the following command to trace stress-ng netdev stressor workload::
+
+ strace -c stress-ng --netdev 1 -t 60 --metrics
+
+**System Calls made by the workload**
+
+The below table shows the system calls invoked by the workload, number of
+times each system call is invoked, and the corresponding Linux subsystem.
+
++-------------------+-----------+-----------------+-------------------------+
+| System Call | # calls | Linux Subsystem | System Call (API) |
++===================+===========+=================+=========================+
+| openat | 74 | Filesystem | sys_openat() |
++-------------------+-----------+-----------------+-------------------------+
+| close | 75 | Filesystem | sys_close() |
++-------------------+-----------+-----------------+-------------------------+
+| read | 58 | Filesystem | sys_read() |
++-------------------+-----------+-----------------+-------------------------+
+| fstat | 20 | Filesystem | sys_fstat() |
++-------------------+-----------+-----------------+-------------------------+
+| flock | 10 | Filesystem | sys_flock() |
++-------------------+-----------+-----------------+-------------------------+
+| write | 7 | Filesystem | sys_write() |
++-------------------+-----------+-----------------+-------------------------+
+| getdents64 | 8 | Filesystem | sys_getdents64() |
++-------------------+-----------+-----------------+-------------------------+
+| pread64 | 8 | Filesystem | sys_pread64() |
++-------------------+-----------+-----------------+-------------------------+
+| lseek | 1 | Filesystem | sys_lseek() |
++-------------------+-----------+-----------------+-------------------------+
+| access | 2 | Filesystem | sys_access() |
++-------------------+-----------+-----------------+-------------------------+
+| getcwd | 1 | Filesystem | sys_getcwd() |
++-------------------+-----------+-----------------+-------------------------+
+| execve | 1 | Filesystem | sys_execve() |
++-------------------+-----------+-----------------+-------------------------+
+| mmap | 61 | Memory Mgmt. | sys_mmap() |
++-------------------+-----------+-----------------+-------------------------+
+| munmap | 3 | Memory Mgmt. | sys_munmap() |
++-------------------+-----------+-----------------+-------------------------+
+| mprotect | 20 | Memory Mgmt. | sys_mprotect() |
++-------------------+-----------+-----------------+-------------------------+
+| mlock | 2 | Memory Mgmt. | sys_mlock() |
++-------------------+-----------+-----------------+-------------------------+
+| brk | 3 | Memory Mgmt. | sys_brk() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigaction | 21 | Signal | sys_rt_sigaction() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigprocmask | 1 | Signal | sys_rt_sigprocmask() |
++-------------------+-----------+-----------------+-------------------------+
+| sigaltstack | 1 | Signal | sys_sigaltstack() |
++-------------------+-----------+-----------------+-------------------------+
+| rt_sigreturn | 1 | Signal | sys_rt_sigreturn() |
++-------------------+-----------+-----------------+-------------------------+
+| getpid | 8 | Process Mgmt. | sys_getpid() |
++-------------------+-----------+-----------------+-------------------------+
+| prlimit64 | 5 | Process Mgmt. | sys_prlimit64() |
++-------------------+-----------+-----------------+-------------------------+
+| arch_prctl | 2 | Process Mgmt. | sys_arch_prctl() |
++-------------------+-----------+-----------------+-------------------------+
+| sysinfo | 2 | Process Mgmt. | sys_sysinfo() |
++-------------------+-----------+-----------------+-------------------------+
+| getuid | 2 | Process Mgmt. | sys_getuid() |
++-------------------+-----------+-----------------+-------------------------+
+| uname | 1 | Process Mgmt. | sys_uname() |
++-------------------+-----------+-----------------+-------------------------+
+| setpgid | 1 | Process Mgmt. | sys_setpgid() |
++-------------------+-----------+-----------------+-------------------------+
+| getrusage | 1 | Process Mgmt. | sys_getrusage() |
++-------------------+-----------+-----------------+-------------------------+
+| geteuid | 1 | Process Mgmt. | sys_geteuid() |
++-------------------+-----------+-----------------+-------------------------+
+| getppid | 1 | Process Mgmt. | sys_getppid() |
++-------------------+-----------+-----------------+-------------------------+
+| sendto | 3 | Network | sys_sendto() |
++-------------------+-----------+-----------------+-------------------------+
+| connect | 1 | Network | sys_connect() |
++-------------------+-----------+-----------------+-------------------------+
+| socket | 1 | Network | sys_socket() |
++-------------------+-----------+-----------------+-------------------------+
+| clone | 1 | Process Mgmt. | sys_clone() |
++-------------------+-----------+-----------------+-------------------------+
+| set_tid_address | 1 | Process Mgmt. | sys_set_tid_address() |
++-------------------+-----------+-----------------+-------------------------+
+| wait4 | 2 | Time | sys_wait4() |
++-------------------+-----------+-----------------+-------------------------+
+| alarm | 1 | Time | sys_alarm() |
++-------------------+-----------+-----------------+-------------------------+
+| set_robust_list | 1 | Futex | sys_set_robust_list() |
++-------------------+-----------+-----------------+-------------------------+
+
+Tracing paxtest kiddie workload
+-------------------------------
+
+Run the following command to trace paxtest kiddie workload::
+
+ strace -c paxtest kiddie
+
+**System Calls made by the workload**
+
+The below table shows the system calls invoked by the workload, number of
+times each system call is invoked, and the corresponding Linux subsystem.
+
++-------------------+-----------+-----------------+----------------------+
+| System Call | # calls | Linux Subsystem | System Call (API) |
++===================+===========+=================+======================+
+| read | 3 | Filesystem | sys_read() |
++-------------------+-----------+-----------------+----------------------+
+| write | 11 | Filesystem | sys_write() |
++-------------------+-----------+-----------------+----------------------+
+| close | 41 | Filesystem | sys_close() |
++-------------------+-----------+-----------------+----------------------+
+| stat | 24 | Filesystem | sys_stat() |
++-------------------+-----------+-----------------+----------------------+
+| fstat | 2 | Filesystem | sys_fstat() |
++-------------------+-----------+-----------------+----------------------+
+| pread64 | 6 | Filesystem | sys_pread64() |
++-------------------+-----------+-----------------+----------------------+
+| access | 1 | Filesystem | sys_access() |
++-------------------+-----------+-----------------+----------------------+
+| pipe | 1 | Filesystem | sys_pipe() |
++-------------------+-----------+-----------------+----------------------+
+| dup2 | 24 | Filesystem | sys_dup2() |
++-------------------+-----------+-----------------+----------------------+
+| execve | 1 | Filesystem | sys_execve() |
++-------------------+-----------+-----------------+----------------------+
+| fcntl | 26 | Filesystem | sys_fcntl() |
++-------------------+-----------+-----------------+----------------------+
+| openat | 14 | Filesystem | sys_openat() |
++-------------------+-----------+-----------------+----------------------+
+| rt_sigaction | 7 | Signal | sys_rt_sigaction() |
++-------------------+-----------+-----------------+----------------------+
+| rt_sigreturn | 38 | Signal | sys_rt_sigreturn() |
++-------------------+-----------+-----------------+----------------------+
+| clone | 38 | Process Mgmt. | sys_clone() |
++-------------------+-----------+-----------------+----------------------+
+| wait4 | 44 | Time | sys_wait4() |
++-------------------+-----------+-----------------+----------------------+
+| mmap | 7 | Memory Mgmt. | sys_mmap() |
++-------------------+-----------+-----------------+----------------------+
+| mprotect | 3 | Memory Mgmt. | sys_mprotect() |
++-------------------+-----------+-----------------+----------------------+
+| munmap | 1 | Memory Mgmt. | sys_munmap() |
++-------------------+-----------+-----------------+----------------------+
+| brk | 3 | Memory Mgmt. | sys_brk() |
++-------------------+-----------+-----------------+----------------------+
+| getpid | 1 | Process Mgmt. | sys_getpid() |
++-------------------+-----------+-----------------+----------------------+
+| getuid | 1 | Process Mgmt. | sys_getuid() |
++-------------------+-----------+-----------------+----------------------+
+| getgid | 1 | Process Mgmt. | sys_getgid() |
++-------------------+-----------+-----------------+----------------------+
+| geteuid | 2 | Process Mgmt. | sys_geteuid() |
++-------------------+-----------+-----------------+----------------------+
+| getegid | 1 | Process Mgmt. | sys_getegid() |
++-------------------+-----------+-----------------+----------------------+
+| getppid | 1 | Process Mgmt. | sys_getppid() |
++-------------------+-----------+-----------------+----------------------+
+| arch_prctl | 2 | Process Mgmt. | sys_arch_prctl() |
++-------------------+-----------+-----------------+----------------------+
+
+Conclusion
+==========
+
+This document is intended to be used as a guide on how to gather fine-grained
+information on the resources in use by workloads using strace.
+
+References
+==========
+
+ * `Discovery Linux Kernel Subsystems used by OpenAPS <https://elisa.tech/blog/2022/02/02/discovery-linux-kernel-subsystems-used-by-openaps>`_
+ * `ELISA-White-Papers-Discovering Linux kernel subsystems used by a workload <https://github.com/elisa-tech/ELISA-White-Papers/blob/master/Processes/Discovering_Linux_kernel_subsystems_used_by_a_workload.md>`_
+ * `strace <https://man7.org/linux/man-pages/man1/strace.1.html>`_
+ * `perf <https://man7.org/linux/man-pages/man1/perf.1.html>`_
+ * `paxtest README <https://github.com/opntr/paxtest-freebsd/blob/hardenedbsd/0.9.14-hbsd/README>`_
+ * `stress-ng <https://www.mankier.com/1/stress-ng>`_
+ * `Monitoring and managing system status and performance <https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/monitoring_and_managing_system_status_and_performance/index>`_
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
new file mode 100644
index 000000000000..c85cd327af28
--- /dev/null
+++ b/Documentation/admin-guide/xfs.rst
@@ -0,0 +1,551 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+The SGI XFS Filesystem
+======================
+
+XFS is a high performance journaling filesystem which originated
+on the SGI IRIX platform. It is completely multi-threaded, can
+support large files and large filesystems, extended attributes,
+variable block sizes, is extent based, and makes extensive use of
+Btrees (directories, extents, free space) to aid both performance
+and scalability.
+
+Refer to the documentation at https://xfs.wiki.kernel.org/
+for further details. This implementation is on-disk compatible
+with the IRIX version of XFS.
+
+
+Mount Options
+=============
+
+When mounting an XFS filesystem, the following options are accepted.
+
+ allocsize=size
+ Sets the buffered I/O end-of-file preallocation size when
+ doing delayed allocation writeout (default size is 64KiB).
+ Valid values for this option are page size (typically 4KiB)
+ through to 1GiB, inclusive, in power-of-2 increments.
+
+ The default behaviour is for dynamic end-of-file
+ preallocation size, which uses a set of heuristics to
+ optimise the preallocation size based on the current
+ allocation patterns within the file and the access patterns
+ to the file. Specifying a fixed ``allocsize`` value turns off
+ the dynamic behaviour.
+
+ discard or nodiscard (default)
+ Enable/disable the issuing of commands to let the block
+ device reclaim space freed by the filesystem. This is
+ useful for SSD devices, thinly provisioned LUNs and virtual
+ machine images, but may have a performance impact.
+
+ Note: It is currently recommended that you use the ``fstrim``
+ application to ``discard`` unused blocks rather than the ``discard``
+ mount option because the performance impact of this option
+ is quite severe.
+
+ grpid/bsdgroups or nogrpid/sysvgroups (default)
+ These options define what group ID a newly created file
+ gets. When ``grpid`` is set, it takes the group ID of the
+ directory in which it is created; otherwise it takes the
+ ``fsgid`` of the current process, unless the directory has the
+ ``setgid`` bit set, in which case it takes the ``gid`` from the
+ parent directory, and also gets the ``setgid`` bit set if it is
+ a directory itself.
+
+ filestreams
+ Make the data allocator use the filestreams allocation mode
+ across the entire filesystem rather than just on directories
+ configured to use it.
+
+ inode32 or inode64 (default)
+ When ``inode32`` is specified, it indicates that XFS limits
+ inode creation to locations which will not result in inode
+ numbers with more than 32 bits of significance.
+
+ When ``inode64`` is specified, it indicates that XFS is allowed
+ to create inodes at any location in the filesystem,
+ including those which will result in inode numbers occupying
+ more than 32 bits of significance.
+
+ ``inode32`` is provided for backwards compatibility with older
+ systems and applications, since 64 bits inode numbers might
+ cause problems for some applications that cannot handle
+ large inode numbers. If applications are in use which do
+ not handle inode numbers bigger than 32 bits, the ``inode32``
+ option should be specified.
+
+ largeio or nolargeio (default)
+ If ``nolargeio`` is specified, the optimal I/O reported in
+ ``st_blksize`` by **stat(2)** will be as small as possible to allow
+ user applications to avoid inefficient read/modify/write
+ I/O. This is typically the page size of the machine, as
+ this is the granularity of the page cache.
+
+ If ``largeio`` is specified, a filesystem that was created with a
+ ``swidth`` specified will return the ``swidth`` value (in bytes)
+ in ``st_blksize``. If the filesystem does not have a ``swidth``
+ specified but does specify an ``allocsize`` then ``allocsize``
+ (in bytes) will be returned instead. Otherwise the behaviour
+ is the same as if ``nolargeio`` was specified.
+
+ logbufs=value
+ Set the number of in-memory log buffers. Valid numbers
+ range from 2-8 inclusive.
+
+ The default value is 8 buffers.
+
+ If the memory cost of 8 log buffers is too high on small
+ systems, then it may be reduced at some cost to performance
+ on metadata intensive workloads. The ``logbsize`` option below
+ controls the size of each buffer and so is also relevant to
+ this case.
+
+ lifetime (default) or nolifetime
+ Enable data placement based on write life time hints provided
+ by the user. This turns on co-allocation of data of similar
+ life times when statistically favorable to reduce garbage
+ collection cost.
+
+ These options are only available for zoned rt file systems.
+
+ logbsize=value
+ Set the size of each in-memory log buffer. The size may be
+ specified in bytes, or in kilobytes with a "k" suffix.
+ Valid sizes for version 1 and version 2 logs are 16384 (16k)
+ and 32768 (32k). Valid sizes for version 2 logs also
+ include 65536 (64k), 131072 (128k) and 262144 (256k). The
+ logbsize must be an integer multiple of the log
+ stripe unit configured at **mkfs(8)** time.
+
+ The default value for version 1 logs is 32768, while the
+ default value for version 2 logs is MAX(32768, log_sunit).
+
+ logdev=device and rtdev=device
+ Use an external log (metadata journal) and/or real-time device.
+ An XFS filesystem has up to three parts: a data section, a log
+ section, and a real-time section. The real-time section is
+ optional, and the log section can be separate from the data
+ section or contained within it.
+
+ max_atomic_write=value
+ Set the maximum size of an atomic write. The size may be
+ specified in bytes, in kilobytes with a "k" suffix, in megabytes
+ with a "m" suffix, or in gigabytes with a "g" suffix. The size
+ cannot be larger than the maximum write size, larger than the
+ size of any allocation group, or larger than the size of a
+ remapping operation that the log can complete atomically.
+
+ The default value is to set the maximum I/O completion size
+ to allow each CPU to handle one at a time.
+
+ max_open_zones=value
+ Specify the max number of zones to keep open for writing on a
+ zoned rt device. Many open zones aids file data separation
+ but may impact performance on HDDs.
+
+ If ``max_open_zones`` is not specified, the value is determined
+ by the capabilities and the size of the zoned rt device.
+
+ noalign
+ Data allocations will not be aligned at stripe unit
+ boundaries. This is only relevant to filesystems created
+ with non-zero data alignment parameters (``sunit``, ``swidth``) by
+ **mkfs(8)**.
+
+ norecovery
+ The filesystem will be mounted without running log recovery.
+ If the filesystem was not cleanly unmounted, it is likely to
+ be inconsistent when mounted in ``norecovery`` mode.
+ Some files or directories may not be accessible because of this.
+ Filesystems mounted ``norecovery`` must be mounted read-only or
+ the mount will fail.
+
+ nouuid
+ Don't check for double mounted file systems using the file
+ system ``uuid``. This is useful to mount LVM snapshot volumes,
+ and often used in combination with ``norecovery`` for mounting
+ read-only snapshots.
+
+ noquota
+ Forcibly turns off all quota accounting and enforcement
+ within the filesystem.
+
+ uquota/usrquota/uqnoenforce/quota
+ User disk quota accounting enabled, and limits (optionally)
+ enforced. Refer to **xfs_quota(8)** for further details.
+
+ gquota/grpquota/gqnoenforce
+ Group disk quota accounting enabled and limits (optionally)
+ enforced. Refer to **xfs_quota(8)** for further details.
+
+ pquota/prjquota/pqnoenforce
+ Project disk quota accounting enabled and limits (optionally)
+ enforced. Refer to **xfs_quota(8)** for further details.
+
+ sunit=value and swidth=value
+ Used to specify the stripe unit and width for a RAID device
+ or a stripe volume. "value" must be specified in 512-byte
+ block units. These options are only relevant to filesystems
+ that were created with non-zero data alignment parameters.
+
+ The ``sunit`` and ``swidth`` parameters specified must be compatible
+ with the existing filesystem alignment characteristics. In
+ general, that means the only valid changes to ``sunit`` are
+ increasing it by a power-of-2 multiple. Valid ``swidth`` values
+ are any integer multiple of a valid ``sunit`` value.
+
+ Typically the only time these mount options are necessary if
+ after an underlying RAID device has had its geometry
+ modified, such as adding a new disk to a RAID5 lun and
+ reshaping it.
+
+ swalloc
+ Data allocations will be rounded up to stripe width boundaries
+ when the current end of file is being extended and the file
+ size is larger than the stripe width size.
+
+ wsync
+ When specified, all filesystem namespace operations are
+ executed synchronously. This ensures that when the namespace
+ operation (create, unlink, etc) completes, the change to the
+ namespace is on stable storage. This is useful in HA setups
+ where failover must not result in clients seeing
+ inconsistent namespace presentation during or after a
+ failover event.
+
+Deprecation of V4 Format
+========================
+
+The V4 filesystem format lacks certain features that are supported by
+the V5 format, such as metadata checksumming, strengthened metadata
+verification, and the ability to store timestamps past the year 2038.
+Because of this, the V4 format is deprecated. All users should upgrade
+by backing up their files, reformatting, and restoring from the backup.
+
+Administrators and users can detect a V4 filesystem by running xfs_info
+against a filesystem mountpoint and checking for a string containing
+"crc=". If no such string is found, please upgrade xfsprogs to the
+latest version and try again.
+
+The deprecation will take place in two parts. Support for mounting V4
+filesystems can now be disabled at kernel build time via Kconfig option.
+These options were changed to default to no in September 2025. In
+September 2030, support will be removed from the codebase entirely.
+
+Note: Distributors may choose to withdraw V4 format support earlier than
+the dates listed above.
+
+Deprecated Mount Options
+========================
+
+============================ ================
+ Name Removal Schedule
+============================ ================
+Mounting with V4 filesystem September 2030
+Mounting ascii-ci filesystem September 2030
+============================ ================
+
+
+Removed Mount Options
+=====================
+
+=========================== =======
+ Name Removed
+=========================== =======
+ delaylog/nodelaylog v4.0
+ ihashsize v4.0
+ irixsgid v4.0
+ osyncisdsync/osyncisosync v4.0
+ barrier v4.19
+ nobarrier v4.19
+ ikeep/noikeep v6.18
+ attr2/noattr2 v6.18
+=========================== =======
+
+sysctls
+=======
+
+The following sysctls are available for the XFS filesystem:
+
+ fs.xfs.stats_clear (Min: 0 Default: 0 Max: 1)
+ Setting this to "1" clears accumulated XFS statistics
+ in /proc/fs/xfs/stat. It then immediately resets to "0".
+
+ fs.xfs.xfssyncd_centisecs (Min: 100 Default: 3000 Max: 720000)
+ The interval at which the filesystem flushes metadata
+ out to disk and runs internal cache cleanup routines.
+
+ fs.xfs.filestream_centisecs (Min: 1 Default: 3000 Max: 360000)
+ The interval at which the filesystem ages filestreams cache
+ references and returns timed-out AGs back to the free stream
+ pool.
+
+ fs.xfs.speculative_prealloc_lifetime
+ (Units: seconds Min: 1 Default: 300 Max: 86400)
+ The interval at which the background scanning for inodes
+ with unused speculative preallocation runs. The scan
+ removes unused preallocation from clean inodes and releases
+ the unused space back to the free pool.
+
+ fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
+ A volume knob for error reporting when internal errors occur.
+ This will generate detailed messages & backtraces for filesystem
+ shutdowns, for example. Current threshold values are:
+
+ XFS_ERRLEVEL_OFF: 0
+ XFS_ERRLEVEL_LOW: 1
+ XFS_ERRLEVEL_HIGH: 5
+
+ fs.xfs.panic_mask (Min: 0 Default: 0 Max: 511)
+ Causes certain error conditions to call BUG(). Value is a bitmask;
+ OR together the tags which represent errors which should cause panics:
+
+ XFS_NO_PTAG 0
+ XFS_PTAG_IFLUSH 0x00000001
+ XFS_PTAG_LOGRES 0x00000002
+ XFS_PTAG_AILDELETE 0x00000004
+ XFS_PTAG_ERROR_REPORT 0x00000008
+ XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010
+ XFS_PTAG_SHUTDOWN_IOERROR 0x00000020
+ XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
+ XFS_PTAG_FSBLOCK_ZERO 0x00000080
+ XFS_PTAG_VERIFIER_ERROR 0x00000100
+
+ This option is intended for debugging only.
+
+ fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "sync" flag set
+ by the **xfs_io(8)** chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nodump (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nodump" flag set
+ by the **xfs_io(8)** chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_noatime (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "noatime" flag set
+ by the **xfs_io(8)** chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nosymlinks (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nosymlinks" flag set
+ by the **xfs_io(8)** chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nodefrag (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nodefrag" flag set
+ by the **xfs_io(8)** chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.rotorstep (Min: 1 Default: 1 Max: 256)
+ In "inode32" allocation mode, this option determines how many
+ files the allocator attempts to allocate in the same allocation
+ group before moving to the next allocation group. The intent
+ is to control the rate at which the allocator moves between
+ allocation groups when allocating extents for new files.
+
+Deprecated Sysctls
+==================
+
+None currently.
+
+Removed Sysctls
+===============
+
+========================================== =======
+ Name Removed
+========================================== =======
+ fs.xfs.xfsbufd_centisec v4.0
+ fs.xfs.age_buffer_centisecs v4.0
+ fs.xfs.irix_symlink_mode v6.18
+ fs.xfs.irix_sgid_inherit v6.18
+ fs.xfs.speculative_cow_prealloc_lifetime v6.18
+========================================== =======
+
+Error handling
+==============
+
+XFS can act differently according to the type of error found during its
+operation. The implementation introduces the following concepts to the error
+handler:
+
+ -failure speed:
+ Defines how fast XFS should propagate an error upwards when a specific
+ error is found during the filesystem operation. It can propagate
+ immediately, after a defined number of retries, after a set time period,
+ or simply retry forever.
+
+ -error classes:
+ Specifies the subsystem the error configuration will apply to, such as
+ metadata IO or memory allocation. Different subsystems will have
+ different error handlers for which behaviour can be configured.
+
+ -error handlers:
+ Defines the behavior for a specific error.
+
+The filesystem behavior during an error can be set via ``sysfs`` files. Each
+error handler works independently - the first condition met by an error handler
+for a specific class will cause the error to be propagated rather than reset and
+retried.
+
+The action taken by the filesystem when the error is propagated is context
+dependent - it may cause a shut down in the case of an unrecoverable error,
+it may be reported back to userspace, or it may even be ignored because
+there's nothing useful we can with the error or anyone we can report it to (e.g.
+during unmount).
+
+The configuration files are organized into the following hierarchy for each
+mounted filesystem:
+
+ /sys/fs/xfs/<dev>/error/<class>/<error>/
+
+Where:
+ <dev>
+ The short device name of the mounted filesystem. This is the same device
+ name that shows up in XFS kernel error messages as "XFS(<dev>): ..."
+
+ <class>
+ The subsystem the error configuration belongs to. As of 4.9, the defined
+ classes are:
+
+ - "metadata": applies metadata buffer write IO
+
+ <error>
+ The individual error handler configurations.
+
+
+Each filesystem has "global" error configuration options defined in their top
+level directory:
+
+ /sys/fs/xfs/<dev>/error/
+
+ fail_at_unmount (Min: 0 Default: 1 Max: 1)
+ Defines the filesystem error behavior at unmount time.
+
+ If set to a value of 1, XFS will override all other error configurations
+ during unmount and replace them with "immediate fail" characteristics.
+ i.e. no retries, no retry timeout. This will always allow unmount to
+ succeed when there are persistent errors present.
+
+ If set to 0, the configured retry behaviour will continue until all
+ retries and/or timeouts have been exhausted. This will delay unmount
+ completion when there are persistent errors, and it may prevent the
+ filesystem from ever unmounting fully in the case of "retry forever"
+ handler configurations.
+
+ Note: there is no guarantee that fail_at_unmount can be set while an
+ unmount is in progress. It is possible that the ``sysfs`` entries are
+ removed by the unmounting filesystem before a "retry forever" error
+ handler configuration causes unmount to hang, and hence the filesystem
+ must be configured appropriately before unmount begins to prevent
+ unmount hangs.
+
+Each filesystem has specific error class handlers that define the error
+propagation behaviour for specific errors. There is also a "default" error
+handler defined, which defines the behaviour for all errors that don't have
+specific handlers defined. Where multiple retry constraints are configured for
+a single error, the first retry configuration that expires will cause the error
+to be propagated. The handler configurations are found in the directory:
+
+ /sys/fs/xfs/<dev>/error/<class>/<error>/
+
+ max_retries (Min: -1 Default: Varies Max: INTMAX)
+ Defines the allowed number of retries of a specific error before
+ the filesystem will propagate the error. The retry count for a given
+ error context (e.g. a specific metadata buffer) is reset every time
+ there is a successful completion of the operation.
+
+ Setting the value to "-1" will cause XFS to retry forever for this
+ specific error.
+
+ Setting the value to "0" will cause XFS to fail immediately when the
+ specific error is reported.
+
+ Setting the value to "N" (where 0 < N < Max) will make XFS retry the
+ operation "N" times before propagating the error.
+
+ retry_timeout_seconds (Min: -1 Default: Varies Max: 1 day)
+ Define the amount of time (in seconds) that the filesystem is
+ allowed to retry its operations when the specific error is
+ found.
+
+ Setting the value to "-1" will allow XFS to retry forever for this
+ specific error.
+
+ Setting the value to "0" will cause XFS to fail immediately when the
+ specific error is reported.
+
+ Setting the value to "N" (where 0 < N < Max) will allow XFS to retry the
+ operation for up to "N" seconds before propagating the error.
+
+**Note:** The default behaviour for a specific error handler is dependent on both
+the class and error context. For example, the default values for
+"metadata/ENODEV" are "0" rather than "-1" so that this error handler defaults
+to "fail immediately" behaviour. This is done because ENODEV is a fatal,
+unrecoverable error no matter how many times the metadata IO is retried.
+
+Workqueue Concurrency
+=====================
+
+XFS uses kernel workqueues to parallelize metadata update processes. This
+enables it to take advantage of storage hardware that can service many IO
+operations simultaneously. This interface exposes internal implementation
+details of XFS, and as such is explicitly not part of any userspace API/ABI
+guarantee the kernel may give userspace. These are undocumented features of
+the generic workqueue implementation XFS uses for concurrency, and they are
+provided here purely for diagnostic and tuning purposes and may change at any
+time in the future.
+
+The control knobs for a filesystem's workqueues are organized by task at hand
+and the short name of the data device. They all can be found in:
+
+ /sys/bus/workqueue/devices/${task}!${device}
+
+================ ===========
+ Task Description
+================ ===========
+ xfs_iwalk-$pid Inode scans of the entire filesystem. Currently limited to
+ mount time quotacheck.
+ xfs-gc Background garbage collection of disk space that have been
+ speculatively allocated beyond EOF or for staging copy on
+ write operations.
+================ ===========
+
+For example, the knobs for the quotacheck workqueue for /dev/nvme0n1 would be
+found in /sys/bus/workqueue/devices/xfs_iwalk-1111!nvme0n1/.
+
+The interesting knobs for XFS workqueues are as follows:
+
+============ ===========
+ Knob Description
+============ ===========
+ max_active Maximum number of background threads that can be started to
+ run the work.
+ cpumask CPUs upon which the threads are allowed to run.
+ nice Relative priority of scheduling the threads. These are the
+ same nice levels that can be applied to userspace processes.
+============ ===========
+
+Zoned Filesystems
+=================
+
+For zoned file systems, the following attributes are exposed in:
+
+ /sys/fs/xfs/<dev>/zoned/
+
+ max_open_zones (Min: 1 Default: Varies Max: UINTMAX)
+ This read-only attribute exposes the maximum number of open zones
+ available for data placement. The value is determined at mount time and
+ is limited by the capabilities of the backing zoned device, file system
+ size and the max_open_zones mount option.
+
+ zonegc_low_space (Min: 0 Default: 0 Max: 100)
+ Define a percentage for how much of the unused space that GC should keep
+ available for writing. A high value will reclaim more of the space
+ occupied by unused blocks, creating a larger buffer against write
+ bursts at the cost of increased write amplification. Regardless
+ of this value, garbage collection will always aim to free a minimum
+ amount of blocks to keep max_open_zones open for data placement purposes.
diff --git a/Documentation/aoe/aoe.txt b/Documentation/aoe/aoe.txt
deleted file mode 100644
index c71487d399d1..000000000000
--- a/Documentation/aoe/aoe.txt
+++ /dev/null
@@ -1,143 +0,0 @@
-ATA over Ethernet is a network protocol that provides simple access to
-block storage on the LAN.
-
- http://support.coraid.com/documents/AoEr11.txt
-
-The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
-
- http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
-
-It has many tips and hints! Please see, especially, recommended
-tunings for virtual memory:
-
- http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
-
-The aoetools are userland programs that are designed to work with this
-driver. The aoetools are on sourceforge.
-
- http://aoetools.sourceforge.net/
-
-The scripts in this Documentation/aoe directory are intended to
-document the use of the driver and are not necessary if you install
-the aoetools.
-
-
-CREATING DEVICE NODES
-
- Users of udev should find the block device nodes created
- automatically, but to create all the necessary device nodes, use the
- udev configuration rules provided in udev.txt (in this directory).
-
- There is a udev-install.sh script that shows how to install these
- rules on your system.
-
- There is also an autoload script that shows how to edit
- /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
- necessary. Preloading the aoe module is preferable to autoloading,
- however, because AoE discovery takes a few seconds. It can be
- confusing when an AoE device is not present the first time the a
- command is run but appears a second later.
-
-USING DEVICE NODES
-
- "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
- like any retransmitted packets.
-
- "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
- limit ATA over Ethernet traffic to eth2 and eth4. AoE traffic from
- untrusted networks should be ignored as a matter of security. See
- also the aoe_iflist driver option described below.
-
- "echo > /dev/etherd/discover" tells the driver to find out what AoE
- devices are available.
-
- In the future these character devices may disappear and be replaced
- by sysfs counterparts. Using the commands in aoetools insulates
- users from these implementation details.
-
- The block devices are named like this:
-
- e{shelf}.{slot}
- e{shelf}.{slot}p{part}
-
- ... so that "e0.2" is the third blade from the left (slot 2) in the
- first shelf (shelf address zero). That's the whole disk. The first
- partition on that disk would be "e0.2p1".
-
-USING SYSFS
-
- Each aoe block device in /sys/block has the extra attributes of
- state, mac, and netif. The state attribute is "up" when the device
- is ready for I/O and "down" if detected but unusable. The
- "down,closewait" state shows that the device is still open and
- cannot come up again until it has been closed.
-
- The mac attribute is the ethernet address of the remote AoE device.
- The netif attribute is the network interface on the localhost
- through which we are communicating with the remote AoE device.
-
- There is a script in this directory that formats this information in
- a convenient way. Users with aoetools should use the aoe-stat
- command.
-
- root@makki root# sh Documentation/aoe/status.sh
- e10.0 eth3 up
- e10.1 eth3 up
- e10.2 eth3 up
- e10.3 eth3 up
- e10.4 eth3 up
- e10.5 eth3 up
- e10.6 eth3 up
- e10.7 eth3 up
- e10.8 eth3 up
- e10.9 eth3 up
- e4.0 eth1 up
- e4.1 eth1 up
- e4.2 eth1 up
- e4.3 eth1 up
- e4.4 eth1 up
- e4.5 eth1 up
- e4.6 eth1 up
- e4.7 eth1 up
- e4.8 eth1 up
- e4.9 eth1 up
-
- Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
- option discussed below) instead of /dev/etherd/interfaces to limit
- AoE traffic to the network interfaces in the given
- whitespace-separated list. Unlike the old character device, the
- sysfs entry can be read from as well as written to.
-
- It's helpful to trigger discovery after setting the list of allowed
- interfaces. The aoetools package provides an aoe-discover script
- for this purpose. You can also directly use the
- /dev/etherd/discover special file described above.
-
-DRIVER OPTIONS
-
- There is a boot option for the built-in aoe driver and a
- corresponding module parameter, aoe_iflist. Without this option,
- all network interfaces may be used for ATA over Ethernet. Here is a
- usage example for the module parameter.
-
- modprobe aoe_iflist="eth1 eth3"
-
- The aoe_deadsecs module parameter determines the maximum number of
- seconds that the driver will wait for an AoE device to provide a
- response to an AoE command. After aoe_deadsecs seconds have
- elapsed, the AoE device will be marked as "down". A value of zero
- is supported for testing purposes and makes the aoe driver keep
- trying AoE commands forever.
-
- The aoe_maxout module parameter has a default of 128. This is the
- maximum number of unresponded packets that will be sent to an AoE
- target at one time.
-
- The aoe_dyndevs module parameter defaults to 1, meaning that the
- driver will assign a block device minor number to a discovered AoE
- target based on the order of its discovery. With dynamic minor
- device numbers in use, a greater range of AoE shelf and slot
- addresses can be supported. Users with udev will never have to
- think about minor numbers. Using aoe_dyndevs=0 allows device nodes
- to be pre-created using a static minor-number scheme with the
- aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt
deleted file mode 100644
index c09dfad4aed8..000000000000
--- a/Documentation/aoe/todo.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-There is a potential for deadlock when allocating a struct sk_buff for
-data that needs to be written out to aoe storage. If the data is
-being written from a dirty page in order to free that page, and if
-there are no other pages available, then deadlock may occur when a
-free page is needed for the sk_buff allocation. This situation has
-not been observed, but it would be nice to eliminate any potential for
-deadlock under memory pressure.
-
-Because ATA over Ethernet is not fragmented by the kernel's IP code,
-the destructor member of the struct sk_buff is available to the aoe
-driver. By using a mempool for allocating all but the first few
-sk_buffs, and by registering a destructor, we should be able to
-efficiently allocate sk_buffs without introducing any potential for
-deadlock.
diff --git a/Documentation/arch/arc/arc.rst b/Documentation/arch/arc/arc.rst
new file mode 100644
index 000000000000..6c4d978f3f4e
--- /dev/null
+++ b/Documentation/arch/arc/arc.rst
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Linux kernel for ARC processors
+*******************************
+
+Other sources of information
+############################
+
+Below are some resources where more information can be found on
+ARC processors and relevant open source projects.
+
+- `<https://embarc.org>`_ - Community portal for open source on ARC.
+ Good place to start to find relevant FOSS projects, toolchain releases,
+ news items and more.
+
+- `<https://github.com/foss-for-synopsys-dwc-arc-processors>`_ -
+ Home for all development activities regarding open source projects for
+ ARC processors. Some of the projects are forks of various upstream projects,
+ where "work in progress" is hosted prior to submission to upstream projects.
+ Other projects are developed by Synopsys and made available to community
+ as open source for use on ARC Processors.
+
+- `Official Synopsys ARC Processors website
+ <https://www.synopsys.com/designware-ip/processor-solutions.html>`_ -
+ location, with access to some IP documentation (`Programmer's Reference
+ Manual, AKA PRM for ARC HS processors
+ <https://www.synopsys.com/dw/doc.php/ds/cc/programmers-reference-manual-ARC-HS.pdf>`_)
+ and free versions of some commercial tools (`Free nSIM
+ <https://www.synopsys.com/cgi-bin/dwarcnsim/req1.cgi>`_ and
+ `MetaWare Light Edition <https://www.synopsys.com/cgi-bin/arcmwtk_lite/reg1.cgi>`_).
+ Please note though, registration is required to access both the documentation and
+ the tools.
+
+Important note on ARC processors configurability
+################################################
+
+ARC processors are highly configurable and several configurable options
+are supported in Linux. Some options are transparent to software
+(i.e cache geometries, some can be detected at runtime and configured
+and used accordingly, while some need to be explicitly selected or configured
+in the kernel's configuration utility (AKA "make menuconfig").
+
+However not all configurable options are supported when an ARC processor
+is to run Linux. SoC design teams should refer to "Appendix E:
+Configuration for ARC Linux" in the ARC HS Databook for configurability
+guidelines.
+
+Following these guidelines and selecting valid configuration options
+up front is critical to help prevent any unwanted issues during
+SoC bringup and software development in general.
+
+Building the Linux kernel for ARC processors
+############################################
+
+The process of kernel building for ARC processors is the same as for any other
+architecture and could be done in 2 ways:
+
+- Cross-compilation: process of compiling for ARC targets on a development
+ host with a different processor architecture (generally x86_64/amd64).
+- Native compilation: process of compiling for ARC on a ARC platform
+ (hardware board or a simulator like QEMU) with complete development environment
+ (GNU toolchain, dtc, make etc) installed on the platform.
+
+In both cases, up-to-date GNU toolchain for ARC for the host is needed.
+Synopsys offers prebuilt toolchain releases which can be used for this purpose,
+available from:
+
+- Synopsys GNU toolchain releases:
+ `<https://github.com/foss-for-synopsys-dwc-arc-processors/toolchain/releases>`_
+
+- Linux kernel compilers collection:
+ `<https://mirrors.edge.kernel.org/pub/tools/crosstool>`_
+
+- Bootlin's toolchain collection: `<https://toolchains.bootlin.com>`_
+
+Once the toolchain is installed in the system, make sure its "bin" folder
+is added in your ``PATH`` environment variable. Then set ``ARCH=arc`` &
+``CROSS_COMPILE=arc-linux`` (or whatever matches installed ARC toolchain prefix)
+and then as usual ``make defconfig && make``.
+
+This will produce "vmlinux" file in the root of the kernel source tree
+usable for loading on the target system via JTAG.
+If you need to get an image usable with U-Boot bootloader,
+type ``make uImage`` and ``uImage`` will be produced in ``arch/arc/boot``
+folder.
diff --git a/Documentation/arch/arc/features.rst b/Documentation/arch/arc/features.rst
new file mode 100644
index 000000000000..49ff446ff744
--- /dev/null
+++ b/Documentation/arch/arc/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features arc
diff --git a/Documentation/arch/arc/index.rst b/Documentation/arch/arc/index.rst
new file mode 100644
index 000000000000..7b098d4a5e3e
--- /dev/null
+++ b/Documentation/arch/arc/index.rst
@@ -0,0 +1,17 @@
+===================
+ARC architecture
+===================
+
+.. toctree::
+ :maxdepth: 1
+
+ arc
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/arm/arm.rst b/Documentation/arch/arm/arm.rst
new file mode 100644
index 000000000000..7b41b89dd9bd
--- /dev/null
+++ b/Documentation/arch/arm/arm.rst
@@ -0,0 +1,212 @@
+=======================
+ARM Linux 2.6 and upper
+=======================
+
+ Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
+ updates.
+
+Compilation of kernel
+---------------------
+
+ In order to compile ARM Linux, you will need a compiler capable of
+ generating ARM ELF code with GNU extensions. GCC 3.3 is known to be
+ a good compiler. Fortunately, you needn't guess. The kernel will report
+ an error if your compiler is a recognized offender.
+
+ To build ARM Linux natively, you shouldn't have to alter the ARCH = line
+ in the top level Makefile. However, if you don't have the ARM Linux ELF
+ tools installed as default, then you should change the CROSS_COMPILE
+ line as detailed below.
+
+ If you wish to cross-compile, then alter the following lines in the top
+ level make file::
+
+ ARCH = <whatever>
+
+ with::
+
+ ARCH = arm
+
+ and::
+
+ CROSS_COMPILE=
+
+ to::
+
+ CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
+
+ eg.::
+
+ CROSS_COMPILE=arm-linux-
+
+ Do a 'make config', followed by 'make Image' to build the kernel
+ (arch/arm/boot/Image). A compressed image can be built by doing a
+ 'make zImage' instead of 'make Image'.
+
+
+Bug reports etc
+---------------
+
+ Please send patches to the patch system. For more information, see
+ http://www.arm.linux.org.uk/developer/patches/info.php Always include some
+ explanation as to what the patch does and why it is needed.
+
+ Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
+ or submitted through the web form at
+ http://www.arm.linux.org.uk/developer/
+
+ When sending bug reports, please ensure that they contain all relevant
+ information, eg. the kernel messages that were printed before/during
+ the problem, what you were doing, etc.
+
+
+Include files
+-------------
+
+ Several new include directories have been created under include/asm-arm,
+ which are there to reduce the clutter in the top-level directory. These
+ directories, and their purpose is listed below:
+
+ ============= ==========================================================
+ `arch-*` machine/platform specific header files
+ `hardware` driver-internal ARM specific data structures/definitions
+ `mach` descriptions of generic ARM to specific machine interfaces
+ `proc-*` processor dependent header files (currently only two
+ categories)
+ ============= ==========================================================
+
+
+Machine/Platform support
+------------------------
+
+ The ARM tree contains support for a lot of different machine types. To
+ continue supporting these differences, it has become necessary to split
+ machine-specific parts by directory. For this, the machine category is
+ used to select which directories and files get included (we will use
+ $(MACHINE) to refer to the category)
+
+ To this end, we now have arch/arm/mach-$(MACHINE) directories which are
+ designed to house the non-driver files for a particular machine (eg, PCI,
+ memory management, architecture definitions etc). For all future
+ machines, there should be a corresponding arch/arm/mach-$(MACHINE)/include/mach
+ directory.
+
+
+Modules
+-------
+
+ Although modularisation is supported (and required for the FP emulator),
+ each module on an ARM2/ARM250/ARM3 machine when is loaded will take
+ memory up to the next 32k boundary due to the size of the pages.
+ Therefore, is modularisation on these machines really worth it?
+
+ However, ARM6 and up machines allow modules to take multiples of 4k, and
+ as such Acorn RiscPCs and other architectures using these processors can
+ make good use of modularisation.
+
+
+ADFS Image files
+----------------
+
+ You can access image files on your ADFS partitions by mounting the ADFS
+ partition, and then using the loopback device driver. You must have
+ losetup installed.
+
+ Please note that the PCEmulator DOS partitions have a partition table at
+ the start, and as such, you will have to give '-o offset' to losetup.
+
+
+Request to developers
+---------------------
+
+ When writing device drivers which include a separate assembler file, please
+ include it in with the C file, and not the arch/arm/lib directory. This
+ allows the driver to be compiled as a loadable module without requiring
+ half the code to be compiled into the kernel image.
+
+ In general, try to avoid using assembler unless it is really necessary. It
+ makes drivers far less easy to port to other hardware.
+
+
+ST506 hard drives
+-----------------
+
+ The ST506 hard drive controllers seem to be working fine (if a little
+ slowly). At the moment they will only work off the controllers on an
+ A4x0's motherboard, but for it to work off a Podule just requires
+ someone with a podule to add the addresses for the IRQ mask and the
+ HDC base to the source.
+
+ As of 31/3/96 it works with two drives (you should get the ADFS
+ `*configure` harddrive set to 2). I've got an internal 20MB and a great
+ big external 5.25" FH 64MB drive (who could ever want more :-) ).
+
+ I've just got 240K/s off it (a dd with bs=128k); that's about half of what
+ RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting
+ last week :-)
+
+ Known bug: Drive data errors can cause a hang; including cases where
+ the controller has fixed the error using ECC. (Possibly ONLY
+ in that case...hmm).
+
+
+1772 Floppy
+-----------
+ This also seems to work OK, but hasn't been stressed much lately. It
+ hasn't got any code for disc change detection in there at the moment which
+ could be a bit of a problem! Suggestions on the correct way to do this
+ are welcome.
+
+
+`CONFIG_MACH_` and `CONFIG_ARCH_`
+---------------------------------
+ A change was made in 2003 to the macro names for new machines.
+ Historically, `CONFIG_ARCH_` was used for the bonafide architecture,
+ e.g. SA1100, as well as implementations of the architecture,
+ e.g. Assabet. It was decided to change the implementation macros
+ to read `CONFIG_MACH_` for clarity. Moreover, a retroactive fixup has
+ not been made because it would complicate patching.
+
+ Previous registrations may be found online.
+
+ <http://www.arm.linux.org.uk/developer/machines/>
+
+Kernel entry (head.S)
+---------------------
+ The initial entry into the kernel is via head.S, which uses machine
+ independent code. The machine is selected by the value of 'r1' on
+ entry, which must be kept unique.
+
+ Due to the large number of machines which the ARM port of Linux provides
+ for, we have a method to manage this which ensures that we don't end up
+ duplicating large amounts of code.
+
+ We group machine (or platform) support code into machine classes. A
+ class typically based around one or more system on a chip devices, and
+ acts as a natural container around the actual implementations. These
+ classes are given directories - arch/arm/mach-<class> - which contain
+ the source files and include/mach/ to support the machine class.
+
+ For example, the SA1100 class is based upon the SA1100 and SA1110 SoC
+ devices, and contains the code to support the way the on-board and off-
+ board devices are used, or the device is setup, and provides that
+ machine specific "personality."
+
+ For platforms that support device tree (DT), the machine selection is
+ controlled at runtime by passing the device tree blob to the kernel. At
+ compile-time, support for the machine type must be selected. This allows for
+ a single multiplatform kernel build to be used for several machine types.
+
+ For platforms that do not use device tree, this machine selection is
+ controlled by the machine type ID, which acts both as a run-time and a
+ compile-time code selection method. You can register a new machine via the
+ web site at:
+
+ <http://www.arm.linux.org.uk/developer/machines/>
+
+ Note: Please do not register a machine type for DT-only platforms. If your
+ platform is DT-only, you do not need a registered machine type.
+
+---
+
+Russell King (15/03/2004)
diff --git a/Documentation/arch/arm/booting.rst b/Documentation/arch/arm/booting.rst
new file mode 100644
index 000000000000..5974e37b3d20
--- /dev/null
+++ b/Documentation/arch/arm/booting.rst
@@ -0,0 +1,237 @@
+=================
+Booting ARM Linux
+=================
+
+Author: Russell King
+
+Date : 18 May 2002
+
+The following documentation is relevant to 2.4.18-rmk6 and beyond.
+
+In order to boot ARM Linux, you require a boot loader, which is a small
+program that runs before the main kernel. The boot loader is expected
+to initialise various devices, and eventually call the Linux kernel,
+passing information to the kernel.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM.
+2. Initialise one serial port.
+3. Detect the machine type.
+4. Setup the kernel tagged list.
+5. Load initramfs.
+6. Call the kernel image.
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Existing boot loaders:
+ MANDATORY
+New boot loaders:
+ MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system. It performs
+this in a machine dependent manner. (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Initialise one serial port
+-----------------------------
+
+Existing boot loaders:
+ OPTIONAL, RECOMMENDED
+New boot loaders:
+ OPTIONAL, RECOMMENDED
+
+The boot loader should initialise and enable one serial port on the
+target. This allows the kernel serial driver to automatically detect
+which serial port it should use for the kernel console (generally
+used for debugging purposes, or communication with the target.)
+
+As an alternative, the boot loader can pass the relevant 'console='
+option to the kernel via the tagged lists specifying the port, and
+serial format options as described in
+
+ Documentation/admin-guide/kernel-parameters.rst.
+
+
+3. Detect the machine type
+--------------------------
+
+Existing boot loaders:
+ OPTIONAL
+New boot loaders:
+ MANDATORY except for DT-only platforms
+
+The boot loader should detect the machine type its running on by some
+method. Whether this is a hard coded value or some algorithm that
+looks at the connected hardware is beyond the scope of this document.
+The boot loader must ultimately be able to provide a MACH_TYPE_xxx
+value to the kernel. (see linux/arch/arm/tools/mach-types). This
+should be passed to the kernel in register r1.
+
+For DT-only platforms, the machine type will be determined by device
+tree. set the machine type to all ones (~0). This is not strictly
+necessary, but assures that it will not match any existing types.
+
+4. Setup boot data
+------------------
+
+Existing boot loaders:
+ OPTIONAL, HIGHLY RECOMMENDED
+New boot loaders:
+ MANDATORY
+
+The boot loader must provide either a tagged list or a dtb image for
+passing configuration data to the kernel. The physical address of the
+boot data is passed to the kernel in register r2.
+
+4a. Setup the kernel tagged list
+--------------------------------
+
+The boot loader must create and initialise the kernel tagged list.
+A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE.
+The ATAG_CORE tag may or may not be empty. An empty ATAG_CORE tag
+has the size field set to '2' (0x00000002). The ATAG_NONE must set
+the size field to zero.
+
+Any number of tags can be placed in the list. It is undefined
+whether a repeated tag appends to the information carried by the
+previous tag, or whether it replaces the information in its
+entirety; some tags behave as the former, others the latter.
+
+The boot loader must pass at a minimum the size and location of
+the system memory, and root filesystem location. Therefore, the
+minimum tagged list should look::
+
+ +-----------+
+ base -> | ATAG_CORE | |
+ +-----------+ |
+ | ATAG_MEM | | increasing address
+ +-----------+ |
+ | ATAG_NONE | |
+ +-----------+ v
+
+The tagged list should be stored in system RAM.
+
+The tagged list must be placed in a region of memory where neither
+the kernel decompressor nor initrd 'bootp' program will overwrite
+it. The recommended placement is in the first 16KiB of RAM.
+
+4b. Setup the device tree
+-------------------------
+
+The boot loader must load a device tree image (dtb) into system ram
+at a 64bit aligned address and initialize it with the boot data. The
+dtb format is documented at https://www.devicetree.org/specifications/.
+The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
+physical address to determine if a dtb has been passed instead of a
+tagged list.
+
+The boot loader must pass at a minimum the size and location of the
+system memory, and the root filesystem location. The dtb must be
+placed in a region of memory where the kernel decompressor will not
+overwrite it, while remaining within the region which will be covered
+by the kernel's low-memory mapping.
+
+A safe location is just above the 128MiB boundary from start of RAM.
+
+5. Load initramfs.
+------------------
+
+Existing boot loaders:
+ OPTIONAL
+New boot loaders:
+ OPTIONAL
+
+If an initramfs is in use then, as with the dtb, it must be placed in
+a region of memory where the kernel decompressor will not overwrite it
+while also with the region which will be covered by the kernel's
+low-memory mapping.
+
+A safe location is just above the device tree blob which itself will
+be loaded just above the 128MiB boundary from the start of RAM as
+recommended above.
+
+6. Calling the kernel image
+---------------------------
+
+Existing boot loaders:
+ MANDATORY
+New boot loaders:
+ MANDATORY
+
+There are two options for calling the kernel zImage. If the zImage
+is stored in flash, and is linked correctly to be run from flash,
+then it is legal for the boot loader to call the zImage in flash
+directly.
+
+The zImage may also be placed in system RAM and called there. The
+kernel should be placed in the first 128MiB of RAM. It is recommended
+that it is loaded above 32MiB in order to avoid the need to relocate
+prior to decompression, which will make the boot process slightly
+faster.
+
+When booting a raw (non-zImage) kernel the constraints are tighter.
+In this case the kernel must be loaded at an offset into system equal
+to TEXT_OFFSET - PAGE_OFFSET.
+
+In any case, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+ corrupted by bogus network packets or disk data. This will save
+ you many hours of debug.
+
+- CPU register settings
+
+ - r0 = 0,
+ - r1 = machine type number discovered in (3) above.
+ - r2 = physical address of tagged list in system RAM, or
+ physical address of device tree block (dtb) in system RAM
+
+- CPU mode
+
+ All forms of interrupts must be disabled (IRQs and FIQs)
+
+ For CPUs which do not include the ARM virtualization extensions, the
+ CPU must be in SVC mode. (A special exception exists for Angel)
+
+ CPUs which include support for the virtualization extensions can be
+ entered in HYP mode in order to enable the kernel to make full use of
+ these extensions. This is the recommended boot method for such CPUs,
+ unless the virtualisations are already in use by a pre-installed
+ hypervisor.
+
+ If the kernel is not entered in HYP mode for any reason, it must be
+ entered in SVC mode.
+
+- Caches, MMUs
+
+ The MMU must be off.
+
+ Instruction cache may be on or off.
+
+ Data cache must be off.
+
+ If the kernel is entered in HYP mode, the above requirements apply to
+ the HYP mode configuration in addition to the ordinary PL1 (privileged
+ kernel modes) configuration. In addition, all traps into the
+ hypervisor must be disabled, and PL1 access must be granted for all
+ peripherals and CPU resources for which this is architecturally
+ possible. Except for entering in HYP mode, the system configuration
+ should be such that a kernel which does not include support for the
+ virtualization extensions can boot correctly without extra help.
+
+- The boot loader is expected to call the kernel image by jumping
+ directly to the first instruction of the kernel image.
+
+ On CPUs supporting the ARM instruction set, the entry must be
+ made in ARM state, even for a Thumb-2 kernel.
+
+ On CPUs supporting only the Thumb instruction set such as
+ Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arch/arm/cluster-pm-race-avoidance.rst b/Documentation/arch/arm/cluster-pm-race-avoidance.rst
new file mode 100644
index 000000000000..aa58603d3f28
--- /dev/null
+++ b/Documentation/arch/arm/cluster-pm-race-avoidance.rst
@@ -0,0 +1,533 @@
+=========================================================
+Cluster-wide Power-up/power-down race avoidance algorithm
+=========================================================
+
+This file documents the algorithm which is used to coordinate CPU and
+cluster setup and teardown operations and to manage hardware coherency
+controls safely.
+
+The section "Rationale" explains what the algorithm is for and why it is
+needed. "Basic model" explains general concepts using a simplified view
+of the system. The other sections explain the actual details of the
+algorithm in use.
+
+
+Rationale
+---------
+
+In a system containing multiple CPUs, it is desirable to have the
+ability to turn off individual CPUs when the system is idle, reducing
+power consumption and thermal dissipation.
+
+In a system containing multiple clusters of CPUs, it is also desirable
+to have the ability to turn off entire clusters.
+
+Turning entire clusters off and on is a risky business, because it
+involves performing potentially destructive operations affecting a group
+of independently running CPUs, while the OS continues to run. This
+means that we need some coordination in order to ensure that critical
+cluster-level operations are only performed when it is truly safe to do
+so.
+
+Simple locking may not be sufficient to solve this problem, because
+mechanisms like Linux spinlocks may rely on coherency mechanisms which
+are not immediately enabled when a cluster powers up. Since enabling or
+disabling those mechanisms may itself be a non-atomic operation (such as
+writing some hardware registers and invalidating large caches), other
+methods of coordination are required in order to guarantee safe
+power-down and power-up at the cluster level.
+
+The mechanism presented in this document describes a coherent memory
+based protocol for performing the needed coordination. It aims to be as
+lightweight as possible, while providing the required safety properties.
+
+
+Basic model
+-----------
+
+Each cluster and CPU is assigned a state, as follows:
+
+ - DOWN
+ - COMING_UP
+ - UP
+ - GOING_DOWN
+
+::
+
+ +---------> UP ----------+
+ | v
+
+ COMING_UP GOING_DOWN
+
+ ^ |
+ +--------- DOWN <--------+
+
+
+DOWN:
+ The CPU or cluster is not coherent, and is either powered off or
+ suspended, or is ready to be powered off or suspended.
+
+COMING_UP:
+ The CPU or cluster has committed to moving to the UP state.
+ It may be part way through the process of initialisation and
+ enabling coherency.
+
+UP:
+ The CPU or cluster is active and coherent at the hardware
+ level. A CPU in this state is not necessarily being used
+ actively by the kernel.
+
+GOING_DOWN:
+ The CPU or cluster has committed to moving to the DOWN
+ state. It may be part way through the process of teardown and
+ coherency exit.
+
+
+Each CPU has one of these states assigned to it at any point in time.
+The CPU states are described in the "CPU state" section, below.
+
+Each cluster is also assigned a state, but it is necessary to split the
+state value into two parts (the "cluster" state and "inbound" state) and
+to introduce additional states in order to avoid races between different
+CPUs in the cluster simultaneously modifying the state. The cluster-
+level states are described in the "Cluster state" section.
+
+To help distinguish the CPU states from cluster states in this
+discussion, the state names are given a `CPU_` prefix for the CPU states,
+and a `CLUSTER_` or `INBOUND_` prefix for the cluster states.
+
+
+CPU state
+---------
+
+In this algorithm, each individual core in a multi-core processor is
+referred to as a "CPU". CPUs are assumed to be single-threaded:
+therefore, a CPU can only be doing one thing at a single point in time.
+
+This means that CPUs fit the basic model closely.
+
+The algorithm defines the following states for each CPU in the system:
+
+ - CPU_DOWN
+ - CPU_COMING_UP
+ - CPU_UP
+ - CPU_GOING_DOWN
+
+::
+
+ cluster setup and
+ CPU setup complete policy decision
+ +-----------> CPU_UP ------------+
+ | v
+
+ CPU_COMING_UP CPU_GOING_DOWN
+
+ ^ |
+ +----------- CPU_DOWN <----------+
+ policy decision CPU teardown complete
+ or hardware event
+
+
+The definitions of the four states correspond closely to the states of
+the basic model.
+
+Transitions between states occur as follows.
+
+A trigger event (spontaneous) means that the CPU can transition to the
+next state as a result of making local progress only, with no
+requirement for any external event to happen.
+
+
+CPU_DOWN:
+ A CPU reaches the CPU_DOWN state when it is ready for
+ power-down. On reaching this state, the CPU will typically
+ power itself down or suspend itself, via a WFI instruction or a
+ firmware call.
+
+ Next state:
+ CPU_COMING_UP
+ Conditions:
+ none
+
+ Trigger events:
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CPU_COMING_UP:
+ A CPU cannot start participating in hardware coherency until the
+ cluster is set up and coherent. If the cluster is not ready,
+ then the CPU will wait in the CPU_COMING_UP state until the
+ cluster has been set up.
+
+ Next state:
+ CPU_UP
+ Conditions:
+ The CPU's parent cluster must be in CLUSTER_UP.
+ Trigger events:
+ Transition of the parent cluster to CLUSTER_UP.
+
+ Refer to the "Cluster state" section for a description of the
+ CLUSTER_UP state.
+
+
+CPU_UP:
+ When a CPU reaches the CPU_UP state, it is safe for the CPU to
+ start participating in local coherency.
+
+ This is done by jumping to the kernel's CPU resume code.
+
+ Note that the definition of this state is slightly different
+ from the basic model definition: CPU_UP does not mean that the
+ CPU is coherent yet, but it does mean that it is safe to resume
+ the kernel. The kernel handles the rest of the resume
+ procedure, so the remaining steps are not visible as part of the
+ race avoidance algorithm.
+
+ The CPU remains in this state until an explicit policy decision
+ is made to shut down or suspend the CPU.
+
+ Next state:
+ CPU_GOING_DOWN
+ Conditions:
+ none
+ Trigger events:
+ explicit policy decision
+
+
+CPU_GOING_DOWN:
+ While in this state, the CPU exits coherency, including any
+ operations required to achieve this (such as cleaning data
+ caches).
+
+ Next state:
+ CPU_DOWN
+ Conditions:
+ local CPU teardown complete
+ Trigger events:
+ (spontaneous)
+
+
+Cluster state
+-------------
+
+A cluster is a group of connected CPUs with some common resources.
+Because a cluster contains multiple CPUs, it can be doing multiple
+things at the same time. This has some implications. In particular, a
+CPU can start up while another CPU is tearing the cluster down.
+
+In this discussion, the "outbound side" is the view of the cluster state
+as seen by a CPU tearing the cluster down. The "inbound side" is the
+view of the cluster state as seen by a CPU setting the CPU up.
+
+In order to enable safe coordination in such situations, it is important
+that a CPU which is setting up the cluster can advertise its state
+independently of the CPU which is tearing down the cluster. For this
+reason, the cluster state is split into two parts:
+
+ "cluster" state: The global state of the cluster; or the state
+ on the outbound side:
+
+ - CLUSTER_DOWN
+ - CLUSTER_UP
+ - CLUSTER_GOING_DOWN
+
+ "inbound" state: The state of the cluster on the inbound side.
+
+ - INBOUND_NOT_COMING_UP
+ - INBOUND_COMING_UP
+
+
+ The different pairings of these states results in six possible
+ states for the cluster as a whole::
+
+ CLUSTER_UP
+ +==========> INBOUND_NOT_COMING_UP -------------+
+ # |
+ |
+ CLUSTER_UP <----+ |
+ INBOUND_COMING_UP | v
+
+ ^ CLUSTER_GOING_DOWN CLUSTER_GOING_DOWN
+ # INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
+
+ CLUSTER_DOWN | |
+ INBOUND_COMING_UP <----+ |
+ |
+ ^ |
+ +=========== CLUSTER_DOWN <------------+
+ INBOUND_NOT_COMING_UP
+
+ Transitions -----> can only be made by the outbound CPU, and
+ only involve changes to the "cluster" state.
+
+ Transitions ===##> can only be made by the inbound CPU, and only
+ involve changes to the "inbound" state, except where there is no
+ further transition possible on the outbound side (i.e., the
+ outbound CPU has put the cluster into the CLUSTER_DOWN state).
+
+ The race avoidance algorithm does not provide a way to determine
+ which exact CPUs within the cluster play these roles. This must
+ be decided in advance by some other means. Refer to the section
+ "Last man and first man selection" for more explanation.
+
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
+ cluster can actually be powered down.
+
+ The parallelism of the inbound and outbound CPUs is observed by
+ the existence of two different paths from CLUSTER_GOING_DOWN/
+ INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
+ model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
+ COMING_UP in the basic model). The second path avoids cluster
+ teardown completely.
+
+ CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
+ model. The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
+ is trivial and merely resets the state machine ready for the
+ next cycle.
+
+ Details of the allowable transitions follow.
+
+ The next state in each case is notated
+
+ <cluster state>/<inbound state> (<transitioner>)
+
+ where the <transitioner> is the side on which the transition
+ can occur; either the inbound or the outbound side.
+
+
+CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+ Next state:
+ CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions:
+ none
+
+ Trigger events:
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_DOWN/INBOUND_COMING_UP:
+
+ In this state, an inbound CPU sets up the cluster, including
+ enabling of hardware coherency at the cluster level and any
+ other operations (such as cache invalidation) which are required
+ in order to achieve this.
+
+ The purpose of this state is to do sufficient cluster-level
+ setup to enable other CPUs in the cluster to enter coherency
+ safely.
+
+ Next state:
+ CLUSTER_UP/INBOUND_COMING_UP (inbound)
+ Conditions:
+ cluster-level setup and hardware coherency complete
+ Trigger events:
+ (spontaneous)
+
+
+CLUSTER_UP/INBOUND_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ This is a transient state, leading immediately to
+ CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster
+ should consider treat these two states as equivalent.
+
+ Next state:
+ CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+ Conditions:
+ none
+ Trigger events:
+ (spontaneous)
+
+
+CLUSTER_UP/INBOUND_NOT_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ The cluster will remain in this state until a policy decision is
+ made to power the cluster down.
+
+ Next state:
+ CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions:
+ none
+ Trigger events:
+ policy decision to power down the cluster
+
+
+CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
+
+ An outbound CPU is tearing the cluster down. The selected CPU
+ must wait in this state until all CPUs in the cluster are in the
+ CPU_DOWN state.
+
+ When all CPUs are in the CPU_DOWN state, the cluster can be torn
+ down, for example by cleaning data caches and exiting
+ cluster-level coherency.
+
+ To avoid wasteful unnecessary teardown operations, the outbound
+ should check the inbound cluster state for asynchronous
+ transitions to INBOUND_COMING_UP. Alternatively, individual
+ CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
+
+
+ Next states:
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions:
+ cluster torn down and ready to power off
+ Trigger events:
+ (spontaneous)
+
+ CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions:
+ none
+
+ Trigger events:
+ a) an explicit hardware power-up operation,
+ resulting from a policy decision on another
+ CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
+
+ The cluster is (or was) being torn down, but another CPU has
+ come online in the meantime and is trying to set up the cluster
+ again.
+
+ If the outbound CPU observes this state, it has two choices:
+
+ a) back out of teardown, restoring the cluster to the
+ CLUSTER_UP state;
+
+ b) finish tearing the cluster down and put the cluster
+ in the CLUSTER_DOWN state; the inbound CPU will
+ set up the cluster again from there.
+
+ Choice (a) permits the removal of some latency by avoiding
+ unnecessary teardown and setup operations in situations where
+ the cluster is not really going to be powered down.
+
+
+ Next states:
+
+ CLUSTER_UP/INBOUND_COMING_UP (outbound)
+ Conditions:
+ cluster-level setup and hardware
+ coherency complete
+
+ Trigger events:
+ (spontaneous)
+
+ CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
+ Conditions:
+ cluster torn down and ready to power off
+
+ Trigger events:
+ (spontaneous)
+
+
+Last man and First man selection
+--------------------------------
+
+The CPU which performs cluster tear-down operations on the outbound side
+is commonly referred to as the "last man".
+
+The CPU which performs cluster setup on the inbound side is commonly
+referred to as the "first man".
+
+The race avoidance algorithm documented above does not provide a
+mechanism to choose which CPUs should play these roles.
+
+
+Last man:
+
+When shutting down the cluster, all the CPUs involved are initially
+executing Linux and hence coherent. Therefore, ordinary spinlocks can
+be used to select a last man safely, before the CPUs become
+non-coherent.
+
+
+First man:
+
+Because CPUs may power up asynchronously in response to external wake-up
+events, a dynamic mechanism is needed to make sure that only one CPU
+attempts to play the first man role and do the cluster-level
+initialisation: any other CPUs must wait for this to complete before
+proceeding.
+
+Cluster-level initialisation may involve actions such as configuring
+coherency controls in the bus fabric.
+
+The current implementation in mcpm_head.S uses a separate mutual exclusion
+mechanism to do this arbitration. This mechanism is documented in
+detail in vlocks.txt.
+
+
+Features and Limitations
+------------------------
+
+Implementation:
+
+ The current ARM-based implementation is split between
+ arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
+ arch/arm/common/mcpm_entry.c (everything else):
+
+ __mcpm_cpu_going_down() signals the transition of a CPU to the
+ CPU_GOING_DOWN state.
+
+ __mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
+ state.
+
+ A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
+ low-level power-up code in mcpm_head.S. This could
+ involve CPU-specific setup code, but in the current
+ implementation it does not.
+
+ __mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
+ handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+ and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+ the case of an aborted cluster power-down).
+
+ These functions are more complex than the __mcpm_cpu_*()
+ functions due to the extra inter-CPU coordination which
+ is needed for safe transitions at the cluster level.
+
+ A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
+ the low-level power-up code in mcpm_head.S. This
+ typically involves platform-specific setup code,
+ provided by the platform-specific power_up_setup
+ function registered via mcpm_sync_init.
+
+Deep topologies:
+
+ As currently described and implemented, the algorithm does not
+ support CPU topologies involving more than two levels (i.e.,
+ clusters of clusters are not supported). The algorithm could be
+ extended by replicating the cluster-level states for the
+ additional topological levels, and modifying the transition
+ rules for the intermediate (non-outermost) cluster levels.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, in
+collaboration with Nicolas Pitre and Achin Gupta.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
diff --git a/Documentation/arch/arm/features.rst b/Documentation/arch/arm/features.rst
new file mode 100644
index 000000000000..0e76aaf68eca
--- /dev/null
+++ b/Documentation/arch/arm/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features arm
diff --git a/Documentation/arch/arm/firmware.rst b/Documentation/arch/arm/firmware.rst
new file mode 100644
index 000000000000..efd844baec1d
--- /dev/null
+++ b/Documentation/arch/arm/firmware.rst
@@ -0,0 +1,72 @@
+==========================================================================
+Interface for registering and calling firmware-specific operations for ARM
+==========================================================================
+
+Written by Tomasz Figa <t.figa@samsung.com>
+
+Some boards are running with secure firmware running in TrustZone secure
+world, which changes the way some things have to be initialized. This makes
+a need to provide an interface for such platforms to specify available firmware
+operations and call them when needed.
+
+Firmware operations can be specified by filling in a struct firmware_ops
+with appropriate callbacks and then registering it with register_firmware_ops()
+function::
+
+ void register_firmware_ops(const struct firmware_ops *ops)
+
+The ops pointer must be non-NULL. More information about struct firmware_ops
+and its members can be found in arch/arm/include/asm/firmware.h header.
+
+There is a default, empty set of operations provided, so there is no need to
+set anything if platform does not require firmware operations.
+
+To call a firmware operation, a helper macro is provided::
+
+ #define call_firmware_op(op, ...) \
+ ((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
+
+the macro checks if the operation is provided and calls it or otherwise returns
+-ENOSYS to signal that given operation is not available (for example, to allow
+fallback to legacy operation).
+
+Example of registering firmware operations::
+
+ /* board file */
+
+ static int platformX_do_idle(void)
+ {
+ /* tell platformX firmware to enter idle */
+ return 0;
+ }
+
+ static int platformX_cpu_boot(int i)
+ {
+ /* tell platformX firmware to boot CPU i */
+ return 0;
+ }
+
+ static const struct firmware_ops platformX_firmware_ops = {
+ .do_idle = exynos_do_idle,
+ .cpu_boot = exynos_cpu_boot,
+ /* other operations not available on platformX */
+ };
+
+ /* init_early callback of machine descriptor */
+ static void __init board_init_early(void)
+ {
+ register_firmware_ops(&platformX_firmware_ops);
+ }
+
+Example of using a firmware operation::
+
+ /* some platform code, e.g. SMP initialization */
+
+ __raw_writel(__pa_symbol(exynos4_secondary_startup),
+ CPU1_BOOT_REG);
+
+ /* Call Exynos specific smc call */
+ if (call_firmware_op(cpu_boot, cpu) == -ENOSYS)
+ cpu_boot_legacy(...); /* Try legacy way */
+
+ gic_raise_softirq(cpumask_of(cpu), 1);
diff --git a/Documentation/arch/arm/google/chromebook-boot-flow.rst b/Documentation/arch/arm/google/chromebook-boot-flow.rst
new file mode 100644
index 000000000000..36da77684bba
--- /dev/null
+++ b/Documentation/arch/arm/google/chromebook-boot-flow.rst
@@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Chromebook Boot Flow
+======================================
+
+Most recent Chromebooks that use device tree are using the opensource
+depthcharge_ bootloader. Depthcharge_ expects the OS to be packaged as a `FIT
+Image`_ which contains an OS image as well as a collection of device trees. It
+is up to depthcharge_ to pick the right device tree from the `FIT Image`_ and
+provide it to the OS.
+
+The scheme that depthcharge_ uses to pick the device tree takes into account
+three variables:
+
+- Board name, specified at depthcharge_ compile time. This is $(BOARD) below.
+- Board revision number, determined at runtime (perhaps by reading GPIO
+ strappings, perhaps via some other method). This is $(REV) below.
+- SKU number, read from GPIO strappings at boot time. This is $(SKU) below.
+
+For recent Chromebooks, depthcharge_ creates a match list that looks like this:
+
+- google,$(BOARD)-rev$(REV)-sku$(SKU)
+- google,$(BOARD)-rev$(REV)
+- google,$(BOARD)-sku$(SKU)
+- google,$(BOARD)
+
+Note that some older Chromebooks use a slightly different list that may
+not include SKU matching or may prioritize SKU/rev differently.
+
+Note that for some boards there may be extra board-specific logic to inject
+extra compatibles into the list, but this is uncommon.
+
+Depthcharge_ will look through all device trees in the `FIT Image`_ trying to
+find one that matches the most specific compatible. It will then look
+through all device trees in the `FIT Image`_ trying to find the one that
+matches the *second most* specific compatible, etc.
+
+When searching for a device tree, depthcharge_ doesn't care where the
+compatible string falls within a device tree's root compatible string array.
+As an example, if we're on board "lazor", rev 4, SKU 0 and we have two device
+trees:
+
+- "google,lazor-rev5-sku0", "google,lazor-rev4-sku0", "qcom,sc7180"
+- "google,lazor", "qcom,sc7180"
+
+Then depthcharge_ will pick the first device tree even though
+"google,lazor-rev4-sku0" was the second compatible listed in that device tree.
+This is because it is a more specific compatible than "google,lazor".
+
+It should be noted that depthcharge_ does not have any smarts to try to
+match board or SKU revisions that are "close by". That is to say that
+if depthcharge_ knows it's on "rev4" of a board but there is no "rev4"
+device tree then depthcharge_ *won't* look for a "rev3" device tree.
+
+In general when any significant changes are made to a board the board
+revision number is increased even if none of those changes need to
+be reflected in the device tree. Thus it's fairly common to see device
+trees with multiple revisions.
+
+It should be noted that, taking into account the above system that
+depthcharge_ has, the most flexibility is achieved if the device tree
+supporting the newest revision(s) of a board omits the "-rev{REV}"
+compatible strings. When this is done then if you get a new board
+revision and try to run old software on it then we'll at pick the
+newest device tree we know about.
+
+.. _depthcharge: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/depthcharge/
+.. _`FIT Image`: https://doc.coreboot.org/lib/payloads/fit.html
diff --git a/Documentation/arch/arm/index.rst b/Documentation/arch/arm/index.rst
new file mode 100644
index 000000000000..fd43502ae924
--- /dev/null
+++ b/Documentation/arch/arm/index.rst
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+ARM Architecture
+================
+
+.. toctree::
+ :maxdepth: 1
+
+ arm
+ booting
+ cluster-pm-race-avoidance
+ firmware
+ interrupts
+ kernel_mode_neon
+ kernel_user_helpers
+ memory
+ mem_alignment
+ tcm
+ setup
+ swp_emulation
+ uefi
+ vlocks
+ porting
+
+ features
+
+SoC-specific documents
+======================
+
+.. toctree::
+ :maxdepth: 1
+
+ google/chromebook-boot-flow
+
+ ixp4xx
+
+ marvell
+ microchip
+
+ netwinder
+ nwfpe/index
+
+ keystone/overview
+ keystone/knav-qmss
+
+ omap/index
+
+ pxa/mfp
+
+
+ sa1100/index
+
+ stm32/stm32f746-overview
+ stm32/overview
+ stm32/stm32h743-overview
+ stm32/stm32h750-overview
+ stm32/stm32f769-overview
+ stm32/stm32f429-overview
+ stm32/stm32mp13-overview
+ stm32/stm32mp151-overview
+ stm32/stm32mp157-overview
+ stm32/stm32-dma-mdma-chaining
+
+ sunxi
+
+ samsung/index
+
+ sunxi/clocks
+
+ spear/overview
+
+ sti/stih407-overview
+ sti/stih418-overview
+ sti/overview
+
+ vfp/release-notes
+
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/arm/interrupts.rst b/Documentation/arch/arm/interrupts.rst
new file mode 100644
index 000000000000..2ae70e0e9732
--- /dev/null
+++ b/Documentation/arch/arm/interrupts.rst
@@ -0,0 +1,169 @@
+==========
+Interrupts
+==========
+
+2.5.2-rmk5:
+ This is the first kernel that contains a major shake up of some of the
+ major architecture-specific subsystems.
+
+Firstly, it contains some pretty major changes to the way we handle the
+MMU TLB. Each MMU TLB variant is now handled completely separately -
+we have TLB v3, TLB v4 (without write buffer), TLB v4 (with write buffer),
+and finally TLB v4 (with write buffer, with I TLB invalidate entry).
+There is more assembly code inside each of these functions, mainly to
+allow more flexible TLB handling for the future.
+
+Secondly, the IRQ subsystem.
+
+The 2.5 kernels will be having major changes to the way IRQs are handled.
+Unfortunately, this means that machine types that touch the irq_desc[]
+array (basically all machine types) will break, and this means every
+machine type that we currently have.
+
+Lets take an example. On the Assabet with Neponset, we have::
+
+ GPIO25 IRR:2
+ SA1100 ------------> Neponset -----------> SA1111
+ IIR:1
+ -----------> USAR
+ IIR:0
+ -----------> SMC9196
+
+The way stuff currently works, all SA1111 interrupts are mutually
+exclusive of each other - if you're processing one interrupt from the
+SA1111 and another comes in, you have to wait for that interrupt to
+finish processing before you can service the new interrupt. Eg, an
+IDE PIO-based interrupt on the SA1111 excludes all other SA1111 and
+SMC9196 interrupts until it has finished transferring its multi-sector
+data, which can be a long time. Note also that since we loop in the
+SA1111 IRQ handler, SA1111 IRQs can hold off SMC9196 IRQs indefinitely.
+
+
+The new approach brings several new ideas...
+
+We introduce the concept of a "parent" and a "child". For example,
+to the Neponset handler, the "parent" is GPIO25, and the "children"d
+are SA1111, SMC9196 and USAR.
+
+We also bring the idea of an IRQ "chip" (mainly to reduce the size of
+the irqdesc array). This doesn't have to be a real "IC"; indeed the
+SA11x0 IRQs are handled by two separate "chip" structures, one for
+GPIO0-10, and another for all the rest. It is just a container for
+the various operations (maybe this'll change to a better name).
+This structure has the following operations::
+
+ struct irqchip {
+ /*
+ * Acknowledge the IRQ.
+ * If this is a level-based IRQ, then it is expected to mask the IRQ
+ * as well.
+ */
+ void (*ack)(unsigned int irq);
+ /*
+ * Mask the IRQ in hardware.
+ */
+ void (*mask)(unsigned int irq);
+ /*
+ * Unmask the IRQ in hardware.
+ */
+ void (*unmask)(unsigned int irq);
+ /*
+ * Re-run the IRQ
+ */
+ void (*rerun)(unsigned int irq);
+ /*
+ * Set the type of the IRQ.
+ */
+ int (*type)(unsigned int irq, unsigned int, type);
+ };
+
+ack
+ - required. May be the same function as mask for IRQs
+ handled by do_level_IRQ.
+mask
+ - required.
+unmask
+ - required.
+rerun
+ - optional. Not required if you're using do_level_IRQ for all
+ IRQs that use this 'irqchip'. Generally expected to re-trigger
+ the hardware IRQ if possible. If not, may call the handler
+ directly.
+type
+ - optional. If you don't support changing the type of an IRQ,
+ it should be null so people can detect if they are unable to
+ set the IRQ type.
+
+For each IRQ, we keep the following information:
+
+ - "disable" depth (number of disable_irq()s without enable_irq()s)
+ - flags indicating what we can do with this IRQ (valid, probe,
+ noautounmask) as before
+ - status of the IRQ (probing, enable, etc)
+ - chip
+ - per-IRQ handler
+ - irqaction structure list
+
+The handler can be one of the 3 standard handlers - "level", "edge" and
+"simple", or your own specific handler if you need to do something special.
+
+The "level" handler is what we currently have - its pretty simple.
+"edge" knows about the brokenness of such IRQ implementations - that you
+need to leave the hardware IRQ enabled while processing it, and queueing
+further IRQ events should the IRQ happen again while processing. The
+"simple" handler is very basic, and does not perform any hardware
+manipulation, nor state tracking. This is useful for things like the
+SMC9196 and USAR above.
+
+So, what's changed?
+===================
+
+1. Machine implementations must not write to the irqdesc array.
+
+2. New functions to manipulate the irqdesc array. The first 4 are expected
+ to be useful only to machine specific code. The last is recommended to
+ only be used by machine specific code, but may be used in drivers if
+ absolutely necessary.
+
+ set_irq_chip(irq,chip)
+ Set the mask/unmask methods for handling this IRQ
+
+ set_irq_handler(irq,handler)
+ Set the handler for this IRQ (level, edge, simple)
+
+ set_irq_chained_handler(irq,handler)
+ Set a "chained" handler for this IRQ - automatically
+ enables this IRQ (eg, Neponset and SA1111 handlers).
+
+ set_irq_flags(irq,flags)
+ Set the valid/probe/noautoenable flags.
+
+ set_irq_type(irq,type)
+ Set active the IRQ edge(s)/level. This replaces the
+ SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
+ function. Type should be one of IRQ_TYPE_xxx defined in
+ <linux/irq.h>
+
+3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type.
+
+4. Direct access to SA1111 INTPOL is deprecated. Use set_irq_type instead.
+
+5. A handler is expected to perform any necessary acknowledgement of the
+ parent IRQ via the correct chip specific function. For instance, if
+ the SA1111 is directly connected to a SA1110 GPIO, then you should
+ acknowledge the SA1110 IRQ each time you re-read the SA1111 IRQ status.
+
+6. For any child which doesn't have its own IRQ enable/disable controls
+ (eg, SMC9196), the handler must mask or acknowledge the parent IRQ
+ while the child handler is called, and the child handler should be the
+ "simple" handler (not "edge" nor "level"). After the handler completes,
+ the parent IRQ should be unmasked, and the status of all children must
+ be re-checked for pending events. (see the Neponset IRQ handler for
+ details).
+
+7. fixup_irq() is gone, as is `arch/arm/mach-*/include/mach/irq.h`
+
+Please note that this will not solve all problems - some of them are
+hardware based. Mixing level-based and edge-based IRQs on the same
+parent signal (eg neponset) is one such area where a software based
+solution can't provide the full answer to low IRQ latency.
diff --git a/Documentation/arch/arm/ixp4xx.rst b/Documentation/arch/arm/ixp4xx.rst
new file mode 100644
index 000000000000..17aafc610908
--- /dev/null
+++ b/Documentation/arch/arm/ixp4xx.rst
@@ -0,0 +1,173 @@
+===========================================================
+Release Notes for Linux on Intel's IXP4xx Network Processor
+===========================================================
+
+Maintained by Deepak Saxena <dsaxena@plexity.net>
+-------------------------------------------------------------------------
+
+1. Overview
+
+Intel's IXP4xx network processor is a highly integrated SOC that
+is targeted for network applications, though it has become popular
+in industrial control and other areas due to low cost and power
+consumption. The IXP4xx family currently consists of several processors
+that support different network offload functions such as encryption,
+routing, firewalling, etc. The IXP46x family is an updated version which
+supports faster speeds, new memory and flash configurations, and more
+integration such as an on-chip I2C controller.
+
+For more information on the various versions of the CPU, see:
+
+ http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
+
+Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
+stripped of much of the network intelligence.
+
+2. Linux Support
+
+Linux currently supports the following features on the IXP4xx chips:
+
+- Dual serial ports
+- PCI interface
+- Flash access (MTD/JFFS)
+- I2C through GPIO on IXP42x
+- GPIO for input/output/interrupts
+ See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
+- Timers (watchdog, OS)
+
+The following components of the chips are not supported by Linux and
+require the use of Intel's proprietary CSR software:
+
+- USB device interface
+- Network interfaces (HSS, Utopia, NPEs, etc)
+- Network offload functionality
+
+If you need to use any of the above, you need to download Intel's
+software from:
+
+ http://developer.intel.com/design/network/products/npfamily/ixp425.htm
+
+DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
+SOFTWARE.
+
+There are several websites that provide directions/pointers on using
+Intel's software:
+
+ - http://sourceforge.net/projects/ixp4xx-osdg/
+ Open Source Developer's Guide for using uClinux and the Intel libraries
+
+ - http://gatewaymaker.sourceforge.net/
+ Simple one page summary of building a gateway using an IXP425 and Linux
+
+ - http://ixp425.sourceforge.net/
+ ATM device driver for IXP425 that relies on Intel's libraries
+
+3. Known Issues/Limitations
+
+3a. Limited inbound PCI window
+
+The IXP4xx family allows for up to 256MB of memory but the PCI interface
+can only expose 64MB of that memory to the PCI bus. This means that if
+you are running with > 64MB, all PCI buffers outside of the accessible
+range will be bounced using the routines in arch/arm/common/dmabounce.c.
+
+3b. Limited outbound PCI window
+
+IXP4xx provides two methods of accessing PCI memory space:
+
+1) A direct mapped window from 0x48000000 to 0x4bffffff (64MB).
+ To access PCI via this space, we simply ioremap() the BAR
+ into the kernel and we can use the standard read[bwl]/write[bwl]
+ macros. This is the preferred method due to speed but it
+ limits the system to just 64MB of PCI memory. This can be
+ problematic if using video cards and other memory-heavy devices.
+
+2) If > 64MB of memory space is required, the IXP4xx can be
+ configured to use indirect registers to access PCI This allows
+ for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
+ The disadvantage of this is that every PCI access requires
+ three local register accesses plus a spinlock, but in some
+ cases the performance hit is acceptable. In addition, you cannot
+ mmap() PCI devices in this case due to the indirect nature
+ of the PCI window.
+
+By default, the direct method is used for performance reasons. If
+you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
+
+3c. GPIO as Interrupts
+
+Currently the code only handles level-sensitive GPIO interrupts
+
+4. Supported platforms
+
+ADI Engineering Coyote Gateway Reference Platform
+http://www.adiengineering.com/productsCoyote.html
+
+ The ADI Coyote platform is reference design for those building
+ small residential/office gateways. One NPE is connected to a 10/100
+ interface, one to 4-port 10/100 switch, and the third to and ADSL
+ interface. In addition, it also supports to POTs interfaces connected
+ via SLICs. Note that those are not supported by Linux ATM. Finally,
+ the platform has two mini-PCI slots used for 802.11[bga] cards.
+ Finally, there is an IDE port hanging off the expansion bus.
+
+Gateworks Avila Network Platform
+http://www.gateworks.com/support/overview.php
+
+ The Avila platform is basically and IXDP425 with the 4 PCI slots
+ replaced with mini-PCI slots and a CF IDE interface hanging off
+ the expansion bus.
+
+Intel IXDP425 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
+
+ This is Intel's standard reference platform for the IXDP425 and is
+ also known as the Richfield board. It contains 4 PCI slots, 16MB
+ of flash, two 10/100 ports and one ADSL port.
+
+Intel IXDP465 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdp465.htm
+
+ This is basically an IXDP425 with an IXP465 and 32M of flash instead
+ of just 16.
+
+Intel IXDPG425 Development Platform
+
+ This is basically and ADI Coyote board with a NEC EHCI controller
+ added. One issue with this board is that the mini-PCI slots only
+ have the 3.3v line connected, so you can't use a PCI to mini-PCI
+ adapter with an E100 card. So to NFS root you need to use either
+ the CSR or a WiFi card and a ramdisk that BOOTPs and then does
+ a pivot_root to NFS.
+
+Motorola PrPMC1100 Processor Mezanine Card
+http://www.fountainsys.com
+
+ The PrPMC1100 is based on the IXCP1100 and is meant to plug into
+ and IXP2400/2800 system to act as the system controller. It simply
+ contains a CPU and 16MB of flash on the board and needs to be
+ plugged into a carrier board to function. Currently Linux only
+ supports the Motorola PrPMC carrier board for this platform.
+
+5. TODO LIST
+
+- Add support for Coyote IDE
+- Add support for edge-based GPIO interrupts
+- Add support for CF IDE on expansion bus
+
+6. Thanks
+
+The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
+
+The following people have contributed patches/comments/etc:
+
+- Lennerty Buytenhek
+- Lutz Jaenicke
+- Justin Mayfield
+- Robert E. Ranslam
+
+[I know I've forgotten others, please email me to be added]
+
+-------------------------------------------------------------------------
+
+Last Update: 01/04/2005
diff --git a/Documentation/arch/arm/kernel_mode_neon.rst b/Documentation/arch/arm/kernel_mode_neon.rst
new file mode 100644
index 000000000000..9bfb71a2a9b9
--- /dev/null
+++ b/Documentation/arch/arm/kernel_mode_neon.rst
@@ -0,0 +1,124 @@
+================
+Kernel mode NEON
+================
+
+TL;DR summary
+-------------
+* Use only NEON instructions, or VFP instructions that don't rely on support
+ code
+* Isolate your NEON code in a separate compilation unit, and compile it with
+ '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
+* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
+ NEON code
+* Don't sleep in your NEON code, and be aware that it will be executed with
+ preemption disabled
+
+
+Introduction
+------------
+It is possible to use NEON instructions (and in some cases, VFP instructions) in
+code that runs in kernel mode. However, for performance reasons, the NEON/VFP
+register file is not preserved and restored at every context switch or taken
+exception like the normal register file is, so some manual intervention is
+required. Furthermore, special care is required for code that may sleep [i.e.,
+may call schedule()], as NEON or VFP instructions will be executed in a
+non-preemptible section for reasons outlined below.
+
+
+Lazy preserve and restore
+-------------------------
+The NEON/VFP register file is managed using lazy preserve (on UP systems) and
+lazy restore (on both SMP and UP systems). This means that the register file is
+kept 'live', and is only preserved and restored when multiple tasks are
+contending for the NEON/VFP unit (or, in the SMP case, when a task migrates to
+another core). Lazy restore is implemented by disabling the NEON/VFP unit after
+every context switch, resulting in a trap when subsequently a NEON/VFP
+instruction is issued, allowing the kernel to step in and perform the restore if
+necessary.
+
+Any use of the NEON/VFP unit in kernel mode should not interfere with this, so
+it is required to do an 'eager' preserve of the NEON/VFP register file, and
+enable the NEON/VFP unit explicitly so no exceptions are generated on first
+subsequent use. This is handled by the function kernel_neon_begin(), which
+should be called before any kernel mode NEON or VFP instructions are issued.
+Likewise, the NEON/VFP unit should be disabled again after use to make sure user
+mode will hit the lazy restore trap upon next use. This is handled by the
+function kernel_neon_end().
+
+
+Interruptions in kernel mode
+----------------------------
+For reasons of performance and simplicity, it was decided that there shall be no
+preserve/restore mechanism for the kernel mode NEON/VFP register contents. This
+implies that interruptions of a kernel mode NEON section can only be allowed if
+they are guaranteed not to touch the NEON/VFP registers. For this reason, the
+following rules and restrictions apply in the kernel:
+* NEON/VFP code is not allowed in interrupt context;
+* NEON/VFP code is not allowed to sleep;
+* NEON/VFP code is executed with preemption disabled.
+
+If latency is a concern, it is possible to put back to back calls to
+kernel_neon_end() and kernel_neon_begin() in places in your code where none of
+the NEON registers are live. (Additional calls to kernel_neon_begin() should be
+reasonably cheap if no context switch occurred in the meantime)
+
+
+VFP and support code
+--------------------
+Earlier versions of VFP (prior to version 3) rely on software support for things
+like IEEE-754 compliant underflow handling etc. When the VFP unit needs such
+software assistance, it signals the kernel by raising an undefined instruction
+exception. The kernel responds by inspecting the VFP control registers and the
+current instruction and arguments, and emulates the instruction in software.
+
+Such software assistance is currently not implemented for VFP instructions
+executed in kernel mode. If such a condition is encountered, the kernel will
+fail and generate an OOPS.
+
+
+Separating NEON code from ordinary code
+---------------------------------------
+The compiler is not aware of the special significance of kernel_neon_begin() and
+kernel_neon_end(), i.e., that it is only allowed to issue NEON/VFP instructions
+between calls to these respective functions. Furthermore, GCC may generate NEON
+instructions of its own at -O3 level if -mfpu=neon is selected, and even if the
+kernel is currently compiled at -O2, future changes may result in NEON/VFP
+instructions appearing in unexpected places if no special care is taken.
+
+Therefore, the recommended and only supported way of using NEON/VFP in the
+kernel is by adhering to the following rules:
+
+* isolate the NEON code in a separate compilation unit and compile it with
+ '-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
+* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
+ into the unit containing the NEON code from a compilation unit which is *not*
+ built with the GCC flag '-mfpu=neon' set.
+
+As the kernel is compiled with '-msoft-float', the above will guarantee that
+both NEON and VFP instructions will only ever appear in designated compilation
+units at any optimization level.
+
+
+NEON assembler
+--------------
+NEON assembler is supported with no additional caveats as long as the rules
+above are followed.
+
+
+NEON code generated by GCC
+--------------------------
+The GCC option -ftree-vectorize (implied by -O3) tries to exploit implicit
+parallelism, and generates NEON code from ordinary C source code. This is fully
+supported as long as the rules above are followed.
+
+
+NEON intrinsics
+---------------
+NEON intrinsics are also supported. However, as code using NEON intrinsics
+relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
+observe the following in addition to the rules above:
+
+* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
+ uses its builtin version of <stdint.h> (this is a C99 header which the kernel
+ does not supply);
+* Include <arm_neon.h> last, or at least after <linux/types.h>
diff --git a/Documentation/arch/arm/kernel_user_helpers.rst b/Documentation/arch/arm/kernel_user_helpers.rst
new file mode 100644
index 000000000000..eb6f3d916622
--- /dev/null
+++ b/Documentation/arch/arm/kernel_user_helpers.rst
@@ -0,0 +1,268 @@
+============================
+Kernel-provided User Helpers
+============================
+
+These are segment of kernel provided user code reachable from user space
+at a fixed address in kernel memory. This is used to provide user space
+with some operations which require kernel help because of unimplemented
+native feature and/or instructions in many ARM CPUs. The idea is for this
+code to be executed directly in user mode for best efficiency but which is
+too intimate with the kernel counter part to be left to user libraries.
+In fact this code might even differ from one CPU to another depending on
+the available instruction set, or whether it is a SMP systems. In other
+words, the kernel reserves the right to change this code as needed without
+warning. Only the entry points and their results as documented here are
+guaranteed to be stable.
+
+This is different from (but doesn't preclude) a full blown VDSO
+implementation, however a VDSO would prevent some assembly tricks with
+constants that allows for efficient branching to those code segments. And
+since those code segments only use a few cycles before returning to user
+code, the overhead of a VDSO indirect far call would add a measurable
+overhead to such minimalistic operations.
+
+User space is expected to bypass those helpers and implement those things
+inline (either in the code emitted directly by the compiler, or part of
+the implementation of a library call) when optimizing for a recent enough
+processor that has the necessary native support, but only if resulting
+binaries are already to be incompatible with earlier ARM processors due to
+usage of similar native instructions for other things. In other words
+don't make binaries unable to run on earlier processors just for the sake
+of not using these kernel helpers if your compiled code is not going to
+use new instructions for other purpose.
+
+New helpers may be added over time, so an older kernel may be missing some
+helpers present in a newer kernel. For this reason, programs must check
+the value of __kuser_helper_version (see below) before assuming that it is
+safe to call any particular helper. This check should ideally be
+performed only once at process startup time, and execution aborted early
+if the required helpers are not provided by the kernel version that
+process is running on.
+
+kuser_helper_version
+--------------------
+
+Location: 0xffff0ffc
+
+Reference declaration::
+
+ extern int32_t __kuser_helper_version;
+
+Definition:
+
+ This field contains the number of helpers being implemented by the
+ running kernel. User space may read this to determine the availability
+ of a particular helper.
+
+Usage example::
+
+ #define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+ void check_kuser_version(void)
+ {
+ if (__kuser_helper_version < 2) {
+ fprintf(stderr, "can't do atomic operations, kernel too old\n");
+ abort();
+ }
+ }
+
+Notes:
+
+ User space may assume that the value of this field never changes
+ during the lifetime of any single process. This means that this
+ field can be read once during the initialisation of a library or
+ startup phase of a program.
+
+kuser_get_tls
+-------------
+
+Location: 0xffff0fe0
+
+Reference prototype::
+
+ void * __kuser_get_tls(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ r0 = TLS value
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+
+Usage example::
+
+ typedef void * (__kuser_get_tls_t)(void);
+ #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+ void foo()
+ {
+ void *tls = __kuser_get_tls();
+ printf("TLS = %p\n", tls);
+ }
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
+
+kuser_cmpxchg
+-------------
+
+Location: 0xffff0fc0
+
+Reference prototype::
+
+ int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+Input:
+
+ r0 = oldval
+ r1 = newval
+ r2 = ptr
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, ip, flags
+
+Definition:
+
+ Atomically store newval in `*ptr` only if `*ptr` is equal to oldval.
+ Return zero if `*ptr` was changed or non-zero if no exchange happened.
+ The C flag is also set if `*ptr` was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example::
+
+ typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+ #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+ int atomic_add(volatile int *ptr, int val)
+ {
+ int old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg(old, new, ptr));
+
+ return new;
+ }
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
+
+kuser_memory_barrier
+--------------------
+
+Location: 0xffff0fa0
+
+Reference prototype::
+
+ void __kuser_memory_barrier(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ none
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Apply any needed memory barrier to preserve consistency with data modified
+ manually and __kuser_cmpxchg usage.
+
+Usage example::
+
+ typedef void (__kuser_dmb_t)(void);
+ #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
+
+kuser_cmpxchg64
+---------------
+
+Location: 0xffff0f60
+
+Reference prototype::
+
+ int __kuser_cmpxchg64(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+
+Input:
+
+ r0 = pointer to oldval
+ r1 = pointer to newval
+ r2 = pointer to target value
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, lr, flags
+
+Definition:
+
+ Atomically store the 64-bit value pointed by `*newval` in `*ptr` only if `*ptr`
+ is equal to the 64-bit value pointed by `*oldval`. Return zero if `*ptr` was
+ changed or non-zero if no exchange happened.
+
+ The C flag is also set if `*ptr` was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example::
+
+ typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+ #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+ int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+ {
+ int64_t old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg64(&old, &new, ptr));
+
+ return new;
+ }
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Due to the length of this sequence, this spans 2 conventional kuser
+ "slots", therefore 0xffff0f80 is not used as a valid entry point.
+
+ - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/arch/arm/keystone/knav-qmss.rst b/Documentation/arch/arm/keystone/knav-qmss.rst
new file mode 100644
index 000000000000..7f7638d80b42
--- /dev/null
+++ b/Documentation/arch/arm/keystone/knav-qmss.rst
@@ -0,0 +1,60 @@
+======================================================================
+Texas Instruments Keystone Navigator Queue Management SubSystem driver
+======================================================================
+
+Driver source code path
+ drivers/soc/ti/knav_qmss.c
+ drivers/soc/ti/knav_qmss_acc.c
+
+The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
+the main hardware sub system which forms the backbone of the Keystone
+multi-core Navigator. QMSS consist of queue managers, packed-data structure
+processors(PDSP), linking RAM, descriptor pools and infrastructure
+Packet DMA.
+The Queue Manager is a hardware module that is responsible for accelerating
+management of the packet queues. Packets are queued/de-queued by writing or
+reading descriptor address to a particular memory mapped location. The PDSPs
+perform QMSS related functions like accumulation, QoS, or event management.
+Linking RAM registers are used to link the descriptors which are stored in
+descriptor RAM. Descriptor RAM is configurable as internal or external memory.
+The QMSS driver manages the PDSP setups, linking RAM regions,
+queue pool management (allocation, push, pop and notify) and descriptor
+pool management.
+
+knav qmss driver provides a set of APIs to drivers to open/close qmss queues,
+allocate descriptor pools, map the descriptors, push/pop to queues etc. For
+details of the available APIs, please refers to include/linux/soc/ti/knav_qmss.h
+
+DT documentation is available at
+Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
+
+Accumulator QMSS queues using PDSP firmware
+============================================
+The QMSS PDSP firmware support accumulator channel that can monitor a single
+queue or multiple contiguous queues. drivers/soc/ti/knav_qmss_acc.c is the
+driver that interface with the accumulator PDSP. This configures
+accumulator channels defined in DTS (example in DT documentation) to monitor
+1 or 32 queues per channel. More description on the firmware is available in
+CPPI/QMSS Low Level Driver document (docs/CPPI_QMSS_LLD_SDS.pdf) at
+
+ git://git.ti.com/keystone-rtos/qmss-lld.git
+
+k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin firmware supports upto 48 accumulator
+channels. This firmware is available under ti-keystone folder of
+firmware.git at
+
+ git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
+
+To use copy the firmware image to lib/firmware folder of the initramfs or
+ubifs file system and provide a sym link to k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin
+in the file system and boot up the kernel. User would see
+
+ "firmware file ks2_qmss_pdsp_acc48.bin downloaded for PDSP"
+
+in the boot up log if loading of firmware to PDSP is successful.
+
+Use of accumulated queues requires the firmware image to be present in the
+file system. The driver doesn't acc queues to the supported queue range if
+PDSP is not running in the SoC. The API call fails if there is a queue open
+request to an acc queue and PDSP is not running. So make sure to copy firmware
+to file system before using these queue types.
diff --git a/Documentation/arch/arm/keystone/overview.rst b/Documentation/arch/arm/keystone/overview.rst
new file mode 100644
index 000000000000..cd90298c493c
--- /dev/null
+++ b/Documentation/arch/arm/keystone/overview.rst
@@ -0,0 +1,74 @@
+==========================
+TI Keystone Linux Overview
+==========================
+
+Introduction
+------------
+Keystone range of SoCs are based on ARM Cortex-A15 MPCore Processors
+and c66x DSP cores. This document describes essential information required
+for users to run Linux on Keystone based EVMs from Texas Instruments.
+
+Following SoCs & EVMs are currently supported:-
+
+K2HK SoC and EVM
+=================
+
+a.k.a Keystone 2 Hawking/Kepler SoC
+TCI6636K2H & TCI6636K2K: See documentation at
+
+ http://www.ti.com/product/tci6638k2k
+ http://www.ti.com/product/tci6638k2h
+
+EVM:
+ http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
+
+K2E SoC and EVM
+===============
+
+a.k.a Keystone 2 Edison SoC
+
+K2E - 66AK2E05:
+
+See documentation at
+
+ http://www.ti.com/product/66AK2E05/technicaldocuments
+
+EVM:
+ https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
+
+K2L SoC and EVM
+===============
+
+a.k.a Keystone 2 Lamarr SoC
+
+K2L - TCI6630K2L:
+
+See documentation at
+ http://www.ti.com/product/TCI6630K2L/technicaldocuments
+
+EVM:
+ https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
+
+Configuration
+-------------
+
+All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same
+image is used to boot on individual EVMs. The platform configuration is
+specified through DTS. Following are the DTS used:
+
+ K2HK EVM:
+ k2hk-evm.dts
+ K2E EVM:
+ k2e-evm.dts
+ K2L EVM:
+ k2l-evm.dts
+
+The device tree documentation for the keystone machines are located at
+
+ Documentation/devicetree/bindings/arm/keystone/keystone.txt
+
+Document Author
+---------------
+Murali Karicheri <m-karicheri2@ti.com>
+
+Copyright 2015 Texas Instruments
diff --git a/Documentation/arch/arm/marvell.rst b/Documentation/arch/arm/marvell.rst
new file mode 100644
index 000000000000..3d369a566038
--- /dev/null
+++ b/Documentation/arch/arm/marvell.rst
@@ -0,0 +1,527 @@
+================
+ARM Marvell SoCs
+================
+
+This document lists all the ARM Marvell SoCs that are currently
+supported in mainline by the Linux kernel. As the Marvell families of
+SoCs are large and complex, it is hard to understand where the support
+for a particular SoC is available in the Linux kernel. This document
+tries to help in understanding where those SoCs are supported, and to
+match them with their corresponding public datasheet, when available.
+
+Orion family
+------------
+
+ Flavors:
+ - 88F5082
+ - 88F5181 a.k.a Orion-1
+ - 88F5181L a.k.a Orion-VoIP
+ - 88F5182 a.k.a Orion-NAS
+
+ - Datasheet: https://web.archive.org/web/20210124231420/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-datasheet.pdf
+ - Programmer's User Guide: https://web.archive.org/web/20210124231536/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-opensource-manual.pdf
+ - User Manual: https://web.archive.org/web/20210124231631/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-usermanual.pdf
+ - Functional Errata: https://web.archive.org/web/20210704165540/https://www.digriz.org.uk/ts78xx/88F5182_Functional_Errata.pdf
+ - 88F5281 a.k.a Orion-2
+
+ - Datasheet: https://web.archive.org/web/20131028144728/http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
+ - 88F6183 a.k.a Orion-1-90
+ Homepage:
+ https://web.archive.org/web/20080607215437/http://www.marvell.com/products/media/index.jsp
+ Core:
+ Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
+ Linux kernel mach directory:
+ arch/arm/mach-orion5x
+ Linux kernel plat directory:
+ arch/arm/plat-orion
+
+Kirkwood family
+---------------
+
+ Flavors:
+ - 88F6282 a.k.a Armada 300
+
+ - Product Brief : https://web.archive.org/web/20111027032509/http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ - 88F6283 a.k.a Armada 310
+
+ - Product Brief : https://web.archive.org/web/20111027032509/http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ - 88F6190
+
+ - Product Brief : https://web.archive.org/web/20130730072715/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
+ - Hardware Spec : https://web.archive.org/web/20121021182835/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6192
+
+ - Product Brief : https://web.archive.org/web/20131113121446/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
+ - Hardware Spec : https://web.archive.org/web/20121021182835/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6182
+ - 88F6180
+
+ - Product Brief : https://web.archive.org/web/20120616201621/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
+ - Hardware Spec : https://web.archive.org/web/20130730091654/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6280
+
+ - Product Brief : https://web.archive.org/web/20130730091058/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6280_SoC_PB-001.pdf
+ - 88F6281
+
+ - Product Brief : https://web.archive.org/web/20120131133709/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
+ - Hardware Spec : https://web.archive.org/web/20120620073511/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ - 88F6321
+ - 88F6322
+ - 88F6323
+
+ - Product Brief : https://web.archive.org/web/20120616201639/http://www.marvell.com/embedded-processors/kirkwood/assets/88f632x_pb.pdf
+ Homepage:
+ https://web.archive.org/web/20160513194943/http://www.marvell.com/embedded-processors/kirkwood/
+ Core:
+ Feroceon 88fr131 ARMv5 compatible
+ Linux kernel mach directory:
+ arch/arm/mach-mvebu
+ Linux kernel plat directory:
+ none
+
+Discovery family
+----------------
+
+ Flavors:
+ - MV78100
+
+ - Product Brief : https://web.archive.org/web/20120616194711/http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
+ - Hardware Spec : https://web.archive.org/web/20141005120451/http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20111110081125/http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+ - MV78200
+
+ - Product Brief : https://web.archive.org/web/20140801121623/http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
+ - Hardware Spec : https://web.archive.org/web/20141005120458/http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20111110081125/http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+
+ - MV76100
+
+ - Product Brief : https://web.archive.org/web/20140722064429/http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV76100-002_WEB.pdf
+ - Hardware Spec : https://web.archive.org/web/20140722064425/http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV76100_OpenSource.pdf
+ - Functional Spec: https://web.archive.org/web/20111110081125/http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+
+ Not supported by the Linux kernel.
+
+ Homepage:
+ https://web.archive.org/web/20110924171043/http://www.marvell.com/embedded-processors/discovery-innovation/
+ Core:
+ Feroceon 88fr571-vd ARMv5 compatible
+
+ Linux kernel mach directory:
+ arch/arm/mach-mv78xx0
+ Linux kernel plat directory:
+ arch/arm/plat-orion
+
+EBU Armada family
+-----------------
+
+ Armada 370 Flavors:
+ - 88F6710
+ - 88F6707
+ - 88F6W11
+
+ - Product infos: https://web.archive.org/web/20141002083258/http://www.marvell.com/embedded-processors/armada-370/
+ - Product Brief: https://web.archive.org/web/20121115063038/http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
+ - Hardware Spec: https://web.archive.org/web/20140617183747/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
+ - Functional Spec: https://web.archive.org/web/20140617183701/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
+
+ Core:
+ Sheeva ARMv7 compatible PJ4B
+
+ Armada XP Flavors:
+ - MV78230
+ - MV78260
+ - MV78460
+
+ NOTE:
+ not to be confused with the non-SMP 78xx0 SoCs
+
+ - Product infos: https://web.archive.org/web/20150101215721/http://www.marvell.com/embedded-processors/armada-xp/
+ - Product Brief: https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+ - Functional Spec: https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+ - Hardware Specs:
+ - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+ - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+ - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+ Core:
+ Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
+ Armada 375 Flavors:
+ - 88F6720
+
+ - Product infos: https://web.archive.org/web/20140108032402/http://www.marvell.com/embedded-processors/armada-375/
+ - Product Brief: https://web.archive.org/web/20131216023516/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
+
+ Core:
+ ARM Cortex-A9
+
+ Armada 38x Flavors:
+ - 88F6810 Armada 380
+ - 88F6811 Armada 381
+ - 88F6821 Armada 382
+ - 88F6W21 Armada 383
+ - 88F6820 Armada 385
+ - 88F6825
+ - 88F6828 Armada 388
+
+ - Product infos: https://web.archive.org/web/20181006144616/http://www.marvell.com/embedded-processors/armada-38x/
+ - Functional Spec: https://web.archive.org/web/20200420191927/https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-38x-functional-specifications-2015-11.pdf
+ - Hardware Spec: https://web.archive.org/web/20180713105318/https://www.marvell.com/docs/embedded-processors/assets/marvell-embedded-processors-armada-38x-hardware-specifications-2017-03.pdf
+ - Design guide: https://web.archive.org/web/20180712231737/https://www.marvell.com/docs/embedded-processors/assets/marvell-embedded-processors-armada-38x-hardware-design-guide-2017-08.pdf
+
+ Core:
+ ARM Cortex-A9
+
+ Armada 39x Flavors:
+ - 88F6920 Armada 390
+ - 88F6925 Armada 395
+ - 88F6928 Armada 398
+
+ - Product infos: https://web.archive.org/web/20181020222559/http://www.marvell.com/embedded-processors/armada-39x/
+
+ Core:
+ ARM Cortex-A9
+
+ Linux kernel mach directory:
+ arch/arm/mach-mvebu
+ Linux kernel plat directory:
+ none
+
+EBU Armada family ARMv8
+-----------------------
+
+ Armada 3710/3720 Flavors:
+ - 88F3710
+ - 88F3720
+
+ Core:
+ ARM Cortex A53 (ARMv8)
+
+ Homepage:
+ https://web.archive.org/web/20181103003602/http://www.marvell.com/embedded-processors/armada-3700/
+
+ Product Brief:
+ https://web.archive.org/web/20210121194810/https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-product-brief-2016-01.pdf
+
+ Hardware Spec:
+ https://web.archive.org/web/20210202162011/http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-hardware-specifications-2019-09.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-37*
+
+ Armada 7K Flavors:
+ - 88F6040 (AP806 Quad 600 MHz + one CP110)
+ - 88F7020 (AP806 Dual + one CP110)
+ - 88F7040 (AP806 Quad + one CP110)
+
+ Core: ARM Cortex A72
+
+ Homepage:
+ https://web.archive.org/web/20181020222606/http://www.marvell.com/embedded-processors/armada-70xx/
+
+ Product Brief:
+ - https://web.archive.org/web/20161010105541/http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
+ - https://web.archive.org/web/20160928154533/http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-70*
+
+ Armada 8K Flavors:
+ - 88F8020 (AP806 Dual + two CP110)
+ - 88F8040 (AP806 Quad + two CP110)
+ Core:
+ ARM Cortex A72
+
+ Homepage:
+ https://web.archive.org/web/20181022004830/http://www.marvell.com/embedded-processors/armada-80xx/
+
+ Product Brief:
+ - https://web.archive.org/web/20210124233728/https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-8020-product-brief-2017-12.pdf
+ - https://web.archive.org/web/20161010105532/http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/armada-80*
+
+ Octeon TX2 CN913x Flavors:
+ - CN9130 (AP807 Quad + one internal CP115)
+ - CN9131 (AP807 Quad + one internal CP115 + one external CP115 / 88F8215)
+ - CN9132 (AP807 Quad + one internal CP115 + two external CP115 / 88F8215)
+
+ Core:
+ ARM Cortex A72
+
+ Homepage:
+ https://web.archive.org/web/20200803150818/https://www.marvell.com/products/infrastructure-processors/multi-core-processors/octeon-tx2/octeon-tx2-cn9130.html
+
+ Product Brief:
+ https://web.archive.org/web/20200803150818/https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-infrastructure-processors-octeon-tx2-cn913x-product-brief-2020-02.pdf
+
+ Device tree files:
+ arch/arm64/boot/dts/marvell/cn913*
+
+Avanta family
+-------------
+
+ Flavors:
+ - 88F6500
+ - 88F6510
+ - 88F6530P
+ - 88F6550
+ - 88F6560
+ - 88F6601
+
+ Homepage:
+ https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/
+
+ Product Brief:
+ https://web.archive.org/web/20180829171057/http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
+
+ No public datasheet available.
+
+ Core:
+ ARMv5 compatible
+
+ Linux kernel mach directory:
+ no code in mainline yet, planned for the future
+ Linux kernel plat directory:
+ no code in mainline yet, planned for the future
+
+Storage family
+--------------
+
+ Armada SP:
+ - 88RC1580
+
+ Product infos:
+ https://web.archive.org/web/20191129073953/http://www.marvell.com/storage/armada-sp/
+
+ Core:
+ Sheeva ARMv7 compatible Quad-core PJ4C
+
+ (not supported in upstream Linux kernel)
+
+Dove family (application processor)
+-----------------------------------
+
+ Flavors:
+ - 88AP510 a.k.a Armada 510
+
+ Product Brief:
+ https://web.archive.org/web/20111102020643/http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
+
+ Hardware Spec:
+ https://web.archive.org/web/20160428160231/http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
+
+ Functional Spec:
+ https://web.archive.org/web/20120130172443/http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
+
+ Homepage:
+ https://web.archive.org/web/20160822232651/http://www.marvell.com/application-processors/armada-500/
+
+ Core:
+ ARMv7 compatible
+
+ Directory:
+ - arch/arm/mach-mvebu (DT enabled platforms)
+ - arch/arm/mach-dove (non-DT enabled platforms)
+
+PXA 2xx/3xx/93x/95x family
+--------------------------
+
+ Flavors:
+ - PXA21x, PXA25x, PXA26x
+ - Application processor only
+ - Core: ARMv5 XScale1 core
+ - PXA270, PXA271, PXA272
+ - Product Brief : https://web.archive.org/web/20150927135510/http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
+ - Design guide : https://web.archive.org/web/20120111181937/http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
+ - Developers manual : https://web.archive.org/web/20150927164805/http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
+ - Specification : https://web.archive.org/web/20140211221535/http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
+ - Specification update : https://web.archive.org/web/20120111104906/http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
+ - Application processor only
+ - Core: ARMv5 XScale2 core
+ - PXA300, PXA310, PXA320
+ - PXA 300 Product Brief : https://web.archive.org/web/20120111121203/http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
+ - PXA 310 Product Brief : https://web.archive.org/web/20120111104515/http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
+ - PXA 320 Product Brief : https://web.archive.org/web/20121021182826/http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
+ - Design guide : https://web.archive.org/web/20130727144625/http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
+ - Developers manual : https://web.archive.org/web/20130727144605/http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
+ - Specifications : https://web.archive.org/web/20130727144559/http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
+ - Specification Update : https://web.archive.org/web/20150927183411/http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
+ - Reference Manual : https://web.archive.org/web/20120111103844/http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
+ - Application processor only
+ - Core: ARMv5 XScale3 core
+ - PXA930, PXA935
+ - Application processor with Communication processor
+ - Core: ARMv5 XScale3 core
+ - PXA955
+ - Application processor with Communication processor
+ - Core: ARMv7 compatible Sheeva PJ4 core
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
+ PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
+ the later PXA95x were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the MMP/MMP2 family of SoCs.
+
+ Linux kernel mach directory:
+ arch/arm/mach-pxa
+
+MMP/MMP2/MMP3 family (communication processor)
+----------------------------------------------
+
+ Flavors:
+ - PXA168, a.k.a Armada 168
+ - Homepage : https://web.archive.org/web/20110926014256/http://www.marvell.com/application-processors/armada-100/armada-168.jsp
+ - Product brief : https://web.archive.org/web/20111102030100/http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
+ - Hardware manual : https://web.archive.org/web/20160428165359/http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
+ - Software manual : https://web.archive.org/web/20160428154454/http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
+ - Specification update : https://web.archive.org/web/20150927160338/http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
+ - Boot ROM manual : https://web.archive.org/web/20130727205559/http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
+ - App node package : https://web.archive.org/web/20141005090706/http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
+ - Application processor only
+ - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+ - PXA910/PXA920
+ - Homepage : https://web.archive.org/web/20150928121236/http://www.marvell.com/communication-processors/pxa910/
+ - Product Brief : https://archive.org/download/marvell-pxa910-pb/Marvell_PXA910_Platform-001_PB.pdf
+ - Application processor with Communication processor
+ - Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
+ - PXA688, a.k.a. MMP2, a.k.a Armada 610 (OLPC XO-1.75)
+ - Product Brief : https://web.archive.org/web/20111102023255/http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
+ - Application processor only
+ - Core: ARMv7 compatible Sheeva PJ4 88sv581x core
+ - PXA2128, a.k.a. MMP3, a.k.a Armada 620 (OLPC XO-4)
+ - Product Brief : https://web.archive.org/web/20120824055155/http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
+ - Application processor only
+ - Core: Dual-core ARMv7 compatible Sheeva PJ4C core
+ - PXA960/PXA968/PXA978 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: ARMv7 compatible Sheeva PJ4 core
+ - PXA986/PXA988 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
+ - PXA1088/PXA1920 (Linux support not upstream)
+ - Application processor with Communication Processor
+ - Core: quad-core ARMv7 Cortex-A7
+ - PXA1908/PXA1928/PXA1936
+ - Application processor with Communication Processor
+ - Core: multi-core ARMv8 Cortex-A53
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. All the processors of
+ this MMP/MMP2 family were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the PXA family of SoCs listed above.
+
+ Linux kernel mach directory:
+ arch/arm/mach-mmp
+
+Berlin family (Multimedia Solutions)
+-------------------------------------
+
+ - Flavors:
+ - 88DE3010, Armada 1000 (no Linux support)
+ - Core: Marvell PJ1 (ARMv5TE), Dual-core
+ - Product Brief: https://web.archive.org/web/20131103162620/http://www.marvell.com/digital-entertainment/assets/armada_1000_pb.pdf
+ - 88DE3005, Armada 1500 Mini
+ - Design name: BG2CD
+ - Core: ARM Cortex-A9, PL310 L2CC
+ - 88DE3006, Armada 1500 Mini Plus
+ - Design name: BG2CDP
+ - Core: Dual Core ARM Cortex-A7
+ - 88DE3100, Armada 1500
+ - Design name: BG2
+ - Core: Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
+ - 88DE3114, Armada 1500 Pro
+ - Design name: BG2Q
+ - Core: Quad Core ARM Cortex-A9, PL310 L2CC
+ - 88DE3214, Armada 1500 Pro 4K
+ - Design name: BG3
+ - Core: ARM Cortex-A15, CA15 integrated L2CC
+ - 88DE3218, ARMADA 1500 Ultra
+ - Core: ARM Cortex-A53
+
+ Homepage: https://www.synaptics.com/products/multimedia-solutions
+ Directory: arch/arm/mach-berlin
+
+ Comments:
+
+ * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
+ with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
+
+ * The Berlin family was acquired by Synaptics from Marvell in 2017.
+
+CPU Cores
+---------
+
+The XScale cores were designed by Intel, and shipped by Marvell in the older
+PXA processors. Feroceon is a Marvell designed core that developed in-house,
+and that evolved into Sheeva. The XScale and Feroceon cores were phased out
+over time and replaced with Sheeva cores in later products, which subsequently
+got replaced with licensed ARM Cortex-A cores.
+
+ XScale 1
+ CPUID 0x69052xxx
+ ARMv5, iWMMXt
+ XScale 2
+ CPUID 0x69054xxx
+ ARMv5, iWMMXt
+ XScale 3
+ CPUID 0x69056xxx or 0x69056xxx
+ ARMv5, iWMMXt
+ Feroceon-1850 88fr331 "Mohawk"
+ CPUID 0x5615331x or 0x41xx926x
+ ARMv5TE, single issue
+ Feroceon-2850 88fr531-vd "Jolteon"
+ CPUID 0x5605531x or 0x41xx926x
+ ARMv5TE, VFP, dual-issue
+ Feroceon 88fr571-vd "Jolteon"
+ CPUID 0x5615571x
+ ARMv5TE, VFP, dual-issue
+ Feroceon 88fr131 "Mohawk-D"
+ CPUID 0x5625131x
+ ARMv5TE, single-issue in-order
+ Sheeva PJ1 88sv331 "Mohawk"
+ CPUID 0x561584xx
+ ARMv5, single-issue iWMMXt v2
+ Sheeva PJ4 88sv581x "Flareon"
+ CPUID 0x560f581x
+ ARMv7, idivt, optional iWMMXt v2
+ Sheeva PJ4B 88sv581x
+ CPUID 0x561f581x
+ ARMv7, idivt, optional iWMMXt v2
+ Sheeva PJ4B-MP / PJ4C
+ CPUID 0x562f584x
+ ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
+
+Long-term plans
+---------------
+
+ * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
+ mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
+ Business Unit) in a single mach-<foo> directory. The plat-orion/
+ would therefore disappear.
+
+Credits
+-------
+
+- Maen Suleiman <maen@marvell.com>
+- Lior Amsalem <alior@marvell.com>
+- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+- Andrew Lunn <andrew@lunn.ch>
+- Nicolas Pitre <nico@fluxnic.net>
+- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arch/arm/mem_alignment.rst b/Documentation/arch/arm/mem_alignment.rst
new file mode 100644
index 000000000000..64bd77959300
--- /dev/null
+++ b/Documentation/arch/arm/mem_alignment.rst
@@ -0,0 +1,63 @@
+================
+Memory alignment
+================
+
+Too many problems popped up because of unnoticed misaligned memory access in
+kernel code lately. Therefore the alignment fixup is now unconditionally
+configured in for SA11x0 based targets. According to Alan Cox, this is a
+bad idea to configure it out, but Russell King has some good reasons for
+doing so on some f***ed up ARM architectures like the EBSA110. However
+this is not the case on many design I'm aware of, like all SA11x0 based
+ones.
+
+Of course this is a bad idea to rely on the alignment trap to perform
+unaligned memory access in general. If those access are predictable, you
+are better to use the macros provided by include/linux/unaligned.h. The
+alignment trap can fixup misaligned access for the exception cases, but at
+a high performance cost. It better be rare.
+
+Now for user space applications, it is possible to configure the alignment
+trap to SIGBUS any code performing unaligned access (good for debugging bad
+code), or even fixup the access by software like for kernel code. The later
+mode isn't recommended for performance reasons (just think about the
+floating point emulation that works about the same way). Fix your code
+instead!
+
+Please note that randomly changing the behaviour without good thought is
+real bad - it changes the behaviour of all unaligned instructions in user
+space, and might cause programs to fail unexpectedly.
+
+To change the alignment trap behavior, simply echo a number into
+/proc/cpu/alignment. The number is made up from various bits:
+
+=== ========================================================
+bit behavior when set
+=== ========================================================
+0 A user process performing an unaligned memory access
+ will cause the kernel to print a message indicating
+ process name, pid, pc, instruction, address, and the
+ fault code.
+
+1 The kernel will attempt to fix up the user process
+ performing the unaligned access. This is of course
+ slow (think about the floating point emulator) and
+ not recommended for production use.
+
+2 The kernel will send a SIGBUS signal to the user process
+ performing the unaligned access.
+=== ========================================================
+
+Note that not all combinations are supported - only values 0 through 5.
+(6 and 7 don't make sense).
+
+For example, the following will turn on the warnings, but without
+fixing up or sending SIGBUS signals::
+
+ echo 1 > /proc/cpu/alignment
+
+You can also read the content of the same file to get statistical
+information on unaligned access occurrences plus the current mode of
+operation for user space code.
+
+
+Nicolas Pitre, Mar 13, 2001. Modified Russell King, Nov 30, 2001.
diff --git a/Documentation/arch/arm/memory.rst b/Documentation/arch/arm/memory.rst
new file mode 100644
index 000000000000..0cb1e2938823
--- /dev/null
+++ b/Documentation/arch/arm/memory.rst
@@ -0,0 +1,103 @@
+=================================
+Kernel Memory Layout on ARM Linux
+=================================
+
+ Russell King <rmk@arm.linux.org.uk>
+
+ November 17, 2005 (2.6.15)
+
+This document describes the virtual memory layout which the Linux
+kernel uses for ARM processors. It indicates which regions are
+free for platforms to use, and which are used by generic code.
+
+The ARM CPU is capable of addressing a maximum of 4GB virtual memory
+space, and this must be shared between user space processes, the
+kernel, and hardware devices.
+
+As the ARM architecture matures, it becomes necessary to reserve
+certain regions of VM space for use for new facilities; therefore
+this document may reserve more VM space over time.
+
+=============== =============== ===============================================
+Start End Use
+=============== =============== ===============================================
+ffff8000 ffffffff copy_user_page / clear_user_page use.
+ For SA11xx and Xscale, this is used to
+ setup a minicache mapping.
+
+ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs.
+
+ffff1000 ffff7fff Reserved.
+ Platforms must not use this address range.
+
+ffff0000 ffff0fff CPU vector page.
+ The CPU vectors are mapped here if the
+ CPU supports vector relocation (control
+ register V bit.)
+
+fffe0000 fffeffff XScale cache flush area. This is used
+ in proc-xscale.S to flush the whole data
+ cache. (XScale does not have TCM.)
+
+fffe8000 fffeffff DTCM mapping area for platforms with
+ DTCM mounted inside the CPU.
+
+fffe0000 fffe7fff ITCM mapping area for platforms with
+ ITCM mounted inside the CPU.
+
+ffc80000 ffefffff Fixmap mapping region. Addresses provided
+ by fix_to_virt() will be located here.
+
+ffc00000 ffc7ffff Guard region
+
+ff800000 ffbfffff Permanent, fixed read-only mapping of the
+ firmware provided DT blob
+
+fee00000 feffffff Mapping of PCI I/O space. This is a static
+ mapping within the vmalloc space.
+
+VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space.
+ Memory returned by vmalloc/ioremap will
+ be dynamically placed in this region.
+ Machine specific static mappings are also
+ located here through iotable_init().
+ VMALLOC_START is based upon the value
+ of the high_memory variable, and VMALLOC_END
+ is equal to 0xff800000.
+
+PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region.
+ This maps the platforms RAM, and typically
+ maps all platform RAM in a 1:1 relationship.
+
+PKMAP_BASE PAGE_OFFSET-1 Permanent kernel mappings
+ One way of mapping HIGHMEM pages into kernel
+ space.
+
+MODULES_VADDR MODULES_END-1 Kernel module space
+ Kernel modules inserted via insmod are
+ placed here using dynamic mappings.
+
+TASK_SIZE MODULES_VADDR-1 KASAn shadow memory when KASan is in use.
+ The range from MODULES_VADDR to the top
+ of the memory is shadowed here with 1 bit
+ per byte of memory.
+
+00001000 TASK_SIZE-1 User space mappings
+ Per-thread mappings are placed here via
+ the mmap() system call.
+
+00000000 00000fff CPU vector page / null pointer trap
+ CPUs which do not support vector remapping
+ place their vector page here. NULL pointer
+ dereferences by both the kernel and user
+ space are also caught via this mapping.
+=============== =============== ===============================================
+
+Please note that mappings which collide with the above areas may result
+in a non-bootable kernel, or may cause the kernel to (eventually) panic
+at run time.
+
+Since future CPUs may impact the kernel mapping layout, user programs
+must not access any memory which is not mapped inside their 0x0001000
+to TASK_SIZE address range. If they wish to access these areas, they
+must set up their own mappings using open() and mmap().
diff --git a/Documentation/arch/arm/microchip.rst b/Documentation/arch/arm/microchip.rst
new file mode 100644
index 000000000000..e721d855f2c9
--- /dev/null
+++ b/Documentation/arch/arm/microchip.rst
@@ -0,0 +1,230 @@
+=============================
+ARM Microchip SoCs (aka AT91)
+=============================
+
+
+Introduction
+------------
+This document gives useful information about the ARM Microchip SoCs that are
+currently supported in Linux Mainline (you know, the one on kernel.org).
+
+It is important to note that the Microchip (previously Atmel) ARM-based MPU
+product line is historically named "AT91" or "at91" throughout the Linux kernel
+development process even if this product prefix has completely disappeared from
+the official Microchip product name. Anyway, files, directories, git trees,
+git branches/tags and email subject always contain this "at91" sub-string.
+
+
+AT91 SoCs
+---------
+Documentation and detailed datasheet for each product are available on
+the Microchip website: http://www.microchip.com.
+
+ Flavors:
+ * ARM 920 based SoC
+ - at91rm9200
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-1768-32-bit-ARM920T-Embedded-Microprocessor-AT91RM9200_Datasheet.pdf
+
+ * ARM 926 based SoCs
+ - at91sam9260
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6221-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9260_Datasheet.pdf
+
+ - at91sam9xe
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
+
+ - at91sam9261
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6062-ARM926EJ-S-Microprocessor-SAM9261_Datasheet.pdf
+
+ - at91sam9263
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6249-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9263_Datasheet.pdf
+
+ - at91sam9rl
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/doc6289.pdf
+
+ - at91sam9g20
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001516A.pdf
+
+ - at91sam9g45 family
+ - at91sam9g45
+ - at91sam9g46
+ - at91sam9m10
+ - at91sam9m11 (device superset)
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
+
+ - at91sam9x5 family (aka "The 5 series")
+ - at91sam9g15
+ - at91sam9g25
+ - at91sam9g35
+ - at91sam9x25
+ - at91sam9x35
+
+ * Datasheet (can be considered as covering the whole family)
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11055-32-bit-ARM926EJ-S-Microcontroller-SAM9X35_Datasheet.pdf
+
+ - at91sam9n12
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001517A.pdf
+
+ - sam9x60
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/SAM9X60-Data-Sheet-DS60001579A.pdf
+
+ * ARM Cortex-A5 based SoCs
+ - sama5d3 family
+
+ - sama5d31
+ - sama5d33
+ - sama5d34
+ - sama5d35
+ - sama5d36 (device superset)
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet_B.pdf
+
+ * ARM Cortex-A5 + NEON based SoCs
+ - sama5d4 family
+
+ - sama5d41
+ - sama5d42
+ - sama5d43
+ - sama5d44 (device superset)
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/60001525A.pdf
+
+ - sama5d2 family
+
+ - sama5d21
+ - sama5d22
+ - sama5d23
+ - sama5d24
+ - sama5d26
+ - sama5d27 (device superset)
+ - sama5d28 (device superset + environmental monitors)
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001476B.pdf
+
+ * ARM Cortex-A7 based SoCs
+ - sama7g5 family
+
+ - sama7g51
+ - sama7g52
+ - sama7g53
+ - sama7g54 (device superset)
+
+ * Datasheet
+
+ Coming soon
+
+ - lan966 family
+ - lan9662
+ - lan9668
+
+ * Datasheet
+
+ Coming soon
+
+ * ARM Cortex-M7 MCUs
+ - sams70 family
+
+ - sams70j19
+ - sams70j20
+ - sams70j21
+ - sams70n19
+ - sams70n20
+ - sams70n21
+ - sams70q19
+ - sams70q20
+ - sams70q21
+
+ - samv70 family
+
+ - samv70j19
+ - samv70j20
+ - samv70n19
+ - samv70n20
+ - samv70q19
+ - samv70q20
+
+ - samv71 family
+
+ - samv71j19
+ - samv71j20
+ - samv71j21
+ - samv71n19
+ - samv71n20
+ - samv71n21
+ - samv71q19
+ - samv71q20
+ - samv71q21
+
+ * Datasheet
+
+ http://ww1.microchip.com/downloads/en/DeviceDoc/SAM-E70-S70-V70-V71-Family-Data-Sheet-DS60001527D.pdf
+
+
+Linux kernel information
+------------------------
+Linux kernel mach directory: arch/arm/mach-at91
+MAINTAINERS entry is: "ARM/Microchip (AT91) SoC support"
+
+
+Device Tree for AT91 SoCs and boards
+------------------------------------
+All AT91 SoCs are converted to Device Tree. Since Linux 3.19, these products
+must use this method to boot the Linux kernel.
+
+Work In Progress statement:
+Device Tree files and Device Tree bindings that apply to AT91 SoCs and boards are
+considered as "Unstable". To be completely clear, any at91 binding can change at
+any time. So, be sure to use a Device Tree Binary and a Kernel Image generated from
+the same source tree.
+Please refer to the Documentation/devicetree/bindings/ABI.rst file for a
+definition of a "Stable" binding/ABI.
+This statement will be removed by AT91 MAINTAINERS when appropriate.
+
+Naming conventions and best practice:
+
+- SoCs Device Tree Source Include files are named after the official name of
+ the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
+- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
+ shared across SoCs or boards (sama5d3.dtsi or at91sam9x5cm.dtsi for instance).
+ When collecting nodes for a particular peripheral or topic, the identifier have to
+ be placed at the end of the file name, separated with a "_" (at91sam9x5_can.dtsi
+ or sama5d3_gmac.dtsi for example).
+- board Device Tree Source files (.dts) are prefixed by the string "at91-" so
+ that they can be identified easily. Note that some files are historical exceptions
+ to this rule (sama5d3[13456]ek.dts, usb_a9g20.dts or animeo_ip.dts for example).
diff --git a/Documentation/arch/arm/netwinder.rst b/Documentation/arch/arm/netwinder.rst
new file mode 100644
index 000000000000..8eab66caa2ac
--- /dev/null
+++ b/Documentation/arch/arm/netwinder.rst
@@ -0,0 +1,85 @@
+================================
+NetWinder specific documentation
+================================
+
+The NetWinder is a small low-power computer, primarily designed
+to run Linux. It is based around the StrongARM RISC processor,
+DC21285 PCI bridge, with PC-type hardware glued around it.
+
+Port usage
+==========
+
+======= ====== ===============================
+Min Max Description
+======= ====== ===============================
+0x0000 0x000f DMA1
+0x0020 0x0021 PIC1
+0x0060 0x006f Keyboard
+0x0070 0x007f RTC
+0x0080 0x0087 DMA1
+0x0088 0x008f DMA2
+0x00a0 0x00a3 PIC2
+0x00c0 0x00df DMA2
+0x0180 0x0187 IRDA
+0x01f0 0x01f6 ide0
+0x0201 Game port
+0x0203 RWA010 configuration read
+0x0220 ? SoundBlaster
+0x0250 ? WaveArtist
+0x0279 RWA010 configuration index
+0x02f8 0x02ff Serial ttyS1
+0x0300 0x031f Ether10
+0x0338 GPIO1
+0x033a GPIO2
+0x0370 0x0371 W83977F configuration registers
+0x0388 ? AdLib
+0x03c0 0x03df VGA
+0x03f6 ide0
+0x03f8 0x03ff Serial ttyS0
+0x0400 0x0408 DC21143
+0x0480 0x0487 DMA1
+0x0488 0x048f DMA2
+0x0a79 RWA010 configuration write
+0xe800 0xe80f ide0/ide1 BM DMA
+======= ====== ===============================
+
+
+Interrupt usage
+===============
+
+======= ======= ========================
+IRQ type Description
+======= ======= ========================
+ 0 ISA 100Hz timer
+ 1 ISA Keyboard
+ 2 ISA cascade
+ 3 ISA Serial ttyS1
+ 4 ISA Serial ttyS0
+ 5 ISA PS/2 mouse
+ 6 ISA IRDA
+ 7 ISA Printer
+ 8 ISA RTC alarm
+ 9 ISA
+10 ISA GP10 (Orange reset button)
+11 ISA
+12 ISA WaveArtist
+13 ISA
+14 ISA hda1
+15 ISA
+======= ======= ========================
+
+DMA usage
+=========
+
+======= ======= ===========
+DMA type Description
+======= ======= ===========
+ 0 ISA IRDA
+ 1 ISA
+ 2 ISA cascade
+ 3 ISA WaveArtist
+ 4 ISA
+ 5 ISA
+ 6 ISA
+ 7 ISA WaveArtist
+======= ======= ===========
diff --git a/Documentation/arch/arm/nwfpe/index.rst b/Documentation/arch/arm/nwfpe/index.rst
new file mode 100644
index 000000000000..3c4d2f9aa10e
--- /dev/null
+++ b/Documentation/arch/arm/nwfpe/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+NetWinder's floating point emulator
+===================================
+
+.. toctree::
+ :maxdepth: 1
+
+ nwfpe
+ netwinder-fpe
+ notes
+ todo
diff --git a/Documentation/arch/arm/nwfpe/netwinder-fpe.rst b/Documentation/arch/arm/nwfpe/netwinder-fpe.rst
new file mode 100644
index 000000000000..cbb320960fc4
--- /dev/null
+++ b/Documentation/arch/arm/nwfpe/netwinder-fpe.rst
@@ -0,0 +1,162 @@
+=============
+Current State
+=============
+
+The following describes the current state of the NetWinder's floating point
+emulator.
+
+In the following nomenclature is used to describe the floating point
+instructions. It follows the conventions in the ARM manual.
+
+::
+
+ <S|D|E> = <single|double|extended>, no default
+ {P|M|Z} = {round to +infinity,round to -infinity,round to zero},
+ default = round to nearest
+
+Note: items enclosed in {} are optional.
+
+Floating Point Coprocessor Data Transfer Instructions (CPDT)
+------------------------------------------------------------
+
+LDF/STF - load and store floating
+
+<LDF|STF>{cond}<S|D|E> Fd, Rn
+<LDF|STF>{cond}<S|D|E> Fd, [Rn, #<expression>]{!}
+<LDF|STF>{cond}<S|D|E> Fd, [Rn], #<expression>
+
+These instructions are fully implemented.
+
+LFM/SFM - load and store multiple floating
+
+Form 1 syntax:
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn]
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn, #<expression>]{!}
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn], #<expression>
+
+Form 2 syntax:
+<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
+
+These instructions are fully implemented. They store/load three words
+for each floating point register into the memory location given in the
+instruction. The format in memory is unlikely to be compatible with
+other implementations, in particular the actual hardware. Specific
+mention of this is made in the ARM manuals.
+
+Floating Point Coprocessor Register Transfer Instructions (CPRT)
+----------------------------------------------------------------
+
+Conversions, read/write status/control register instructions
+
+FLT{cond}<S,D,E>{P,M,Z} Fn, Rd Convert integer to floating point
+FIX{cond}{P,M,Z} Rd, Fn Convert floating point to integer
+WFS{cond} Rd Write floating point status register
+RFS{cond} Rd Read floating point status register
+WFC{cond} Rd Write floating point control register
+RFC{cond} Rd Read floating point control register
+
+FLT/FIX are fully implemented.
+
+RFS/WFS are fully implemented.
+
+RFC/WFC are fully implemented. RFC/WFC are supervisor only instructions, and
+presently check the CPU mode, and do an invalid instruction trap if not called
+from supervisor mode.
+
+Compare instructions
+
+CMF{cond} Fn, Fm Compare floating
+CMFE{cond} Fn, Fm Compare floating with exception
+CNF{cond} Fn, Fm Compare negated floating
+CNFE{cond} Fn, Fm Compare negated floating with exception
+
+These are fully implemented.
+
+Floating Point Coprocessor Data Instructions (CPDT)
+---------------------------------------------------
+
+Dyadic operations:
+
+ADF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - add
+SUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - subtract
+RSF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse subtract
+MUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - multiply
+DVF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - divide
+RDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse divide
+
+These are fully implemented.
+
+FML{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast multiply
+FDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast divide
+FRD{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast reverse divide
+
+These are fully implemented as well. They use the same algorithm as the
+non-fast versions. Hence, in this implementation their performance is
+equivalent to the MUF/DVF/RDV instructions. This is acceptable according
+to the ARM manual. The manual notes these are defined only for single
+operands, on the actual FPA11 hardware they do not work for double or
+extended precision operands. The emulator currently does not check
+the requested permissions conditions, and performs the requested operation.
+
+RMF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - IEEE remainder
+
+This is fully implemented.
+
+Monadic operations:
+
+MVF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move
+MNF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move negated
+
+These are fully implemented.
+
+ABS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - absolute value
+SQT{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - square root
+RND{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - round
+
+These are fully implemented.
+
+URD{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - unnormalized round
+NRM{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - normalize
+
+These are implemented. URD is implemented using the same code as the RND
+instruction. Since URD cannot return a unnormalized number, NRM becomes
+a NOP.
+
+Library calls:
+
+POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented. They are not currently issued by the compiler,
+and are handled by routines in libc. These are not implemented by the FPA11
+hardware, but are handled by the floating point support code. They should
+be implemented in future versions.
+
+Signalling:
+
+Signals are implemented. However current ELF kernels produced by Rebel.com
+have a bug in them that prevents the module from generating a SIGFPE. This
+is caused by a failure to alias fp_current to the kernel variable
+current_set[0] correctly.
+
+The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
+a fix for this problem and also incorporates the current version of the
+emulator directly. It is possible to run with no floating point module
+loaded with this kernel. It is provided as a demonstration of the
+technology and for those who want to do floating point work that depends
+on signals. It is not strictly necessary to use the module.
+
+A module (either the one provided by Russell King, or the one in this
+distribution) can be loaded to replace the functionality of the emulator
+built into the kernel.
diff --git a/Documentation/arch/arm/nwfpe/notes.rst b/Documentation/arch/arm/nwfpe/notes.rst
new file mode 100644
index 000000000000..102e55af8439
--- /dev/null
+++ b/Documentation/arch/arm/nwfpe/notes.rst
@@ -0,0 +1,32 @@
+Notes
+=====
+
+There seems to be a problem with exp(double) and our emulator. I haven't
+been able to track it down yet. This does not occur with the emulator
+supplied by Russell King.
+
+I also found one oddity in the emulator. I don't think it is serious but
+will point it out. The ARM calling conventions require floating point
+registers f4-f7 to be preserved over a function call. The compiler quite
+often uses an stfe instruction to save f4 on the stack upon entry to a
+function, and an ldfe instruction to restore it before returning.
+
+I was looking at some code, that calculated a double result, stored it in f4
+then made a function call. Upon return from the function call the number in
+f4 had been converted to an extended value in the emulator.
+
+This is a side effect of the stfe instruction. The double in f4 had to be
+converted to extended, then stored. If an lfm/sfm combination had been used,
+then no conversion would occur. This has performance considerations. The
+result from the function call and f4 were used in a multiplication. If the
+emulator sees a multiply of a double and extended, it promotes the double to
+extended, then does the multiply in extended precision.
+
+This code will cause this problem:
+
+double x, y, z;
+z = log(x)/log(y);
+
+The result of log(x) (a double) will be calculated, returned in f0, then
+moved to f4 to preserve it over the log(y) call. The division will be done
+in extended precision, due to the stfe instruction used to save f4 in log(y).
diff --git a/Documentation/arch/arm/nwfpe/nwfpe.rst b/Documentation/arch/arm/nwfpe/nwfpe.rst
new file mode 100644
index 000000000000..35cd90dacbff
--- /dev/null
+++ b/Documentation/arch/arm/nwfpe/nwfpe.rst
@@ -0,0 +1,74 @@
+Introduction
+============
+
+This directory contains the version 0.92 test release of the NetWinder
+Floating Point Emulator.
+
+The majority of the code was written by me, Scott Bambrough It is
+written in C, with a small number of routines in inline assembler
+where required. It was written quickly, with a goal of implementing a
+working version of all the floating point instructions the compiler
+emits as the first target. I have attempted to be as optimal as
+possible, but there remains much room for improvement.
+
+I have attempted to make the emulator as portable as possible. One of
+the problems is with leading underscores on kernel symbols. Elf
+kernels have no leading underscores, a.out compiled kernels do. I
+have attempted to use the C_SYMBOL_NAME macro wherever this may be
+important.
+
+Another choice I made was in the file structure. I have attempted to
+contain all operating system specific code in one module (fpmodule.*).
+All the other files contain emulator specific code. This should allow
+others to port the emulator to NetBSD for instance relatively easily.
+
+The floating point operations are based on SoftFloat Release 2, by
+John Hauser. SoftFloat is a software implementation of floating-point
+that conforms to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic. As many as four formats are supported: single precision,
+double precision, extended double precision, and quadruple precision.
+All operations required by the standard are implemented, except for
+conversions to and from decimal. We use only the single precision,
+double precision and extended double precision formats. The port of
+SoftFloat to the ARM was done by Phil Blundell, based on an earlier
+port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
+
+The file README.FPE contains a description of what has been implemented
+so far in the emulator. The file TODO contains a information on what
+remains to be done, and other ideas for the emulator.
+
+Bug reports, comments, suggestions should be directed to me at
+<scottb@netwinder.org>. General reports of "this program doesn't
+work correctly when your emulator is installed" are useful for
+determining that bugs still exist; but are virtually useless when
+attempting to isolate the problem. Please report them, but don't
+expect quick action. Bugs still exist. The problem remains in isolating
+which instruction contains the bug. Small programs illustrating a specific
+problem are a godsend.
+
+Legal Notices
+-------------
+
+The NetWinder Floating Point Emulator is free software. Everything Rebel.com
+has written is provided under the GNU GPL. See the file COPYING for copying
+conditions. Excluded from the above is the SoftFloat code. John Hauser's
+legal notice for SoftFloat is included below.
+
+-------------------------------------------------------------------------------
+
+SoftFloat Legal Notice
+
+SoftFloat was written by John R. Hauser. This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704. Funding was partially
+provided by the National Science Foundation under grant MIP-9311980. The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+-------------------------------------------------------------------------------
diff --git a/Documentation/arch/arm/nwfpe/todo.rst b/Documentation/arch/arm/nwfpe/todo.rst
new file mode 100644
index 000000000000..393f11b14540
--- /dev/null
+++ b/Documentation/arch/arm/nwfpe/todo.rst
@@ -0,0 +1,72 @@
+TODO LIST
+=========
+
+::
+
+ POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+ RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+ POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+ LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+ LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+ EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+ SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+ COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+ TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented. They are not currently issued by the compiler,
+and are handled by routines in libc. These are not implemented by the FPA11
+hardware, but are handled by the floating point support code. They should
+be implemented in future versions.
+
+There are a couple of ways to approach the implementation of these. One
+method would be to use accurate table methods for these routines. I have
+a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
+seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
+These methods are used in GLIBC for some of the transcendental functions.
+
+Another approach, which I know little about is CORDIC. This stands for
+Coordinate Rotation Digital Computer, and is a method of computing
+transcendental functions using mostly shifts and adds and a few
+multiplications and divisions. The ARM excels at shifts and adds,
+so such a method could be promising, but requires more research to
+determine if it is feasible.
+
+Rounding Methods
+----------------
+
+The IEEE standard defines 4 rounding modes. Round to nearest is the
+default, but rounding to + or - infinity or round to zero are also allowed.
+Many architectures allow the rounding mode to be specified by modifying bits
+in a control register. Not so with the ARM FPA11 architecture. To change
+the rounding mode one must specify it with each instruction.
+
+This has made porting some benchmarks difficult. It is possible to
+introduce such a capability into the emulator. The FPCR contains
+bits describing the rounding mode. The emulator could be altered to
+examine a flag, which if set forced it to ignore the rounding mode in
+the instruction, and use the mode specified in the bits in the FPCR.
+
+This would require a method of getting/setting the flag, and the bits
+in the FPCR. This requires a kernel call in ArmLinux, as WFC/RFC are
+supervisor only instructions. If anyone has any ideas or comments I
+would like to hear them.
+
+NOTE:
+ pulled out from some docs on ARM floating point, specifically
+ for the Acorn FPE, but not limited to it:
+
+ The floating point control register (FPCR) may only be present in some
+ implementations: it is there to control the hardware in an implementation-
+ specific manner, for example to disable the floating point system. The user
+ mode of the ARM is not permitted to use this register (since the right is
+ reserved to alter it between implementations) and the WFC and RFC
+ instructions will trap if tried in user mode.
+
+ Hence, the answer is yes, you could do this, but then you will run a high
+ risk of becoming isolated if and when hardware FP emulation comes out
+
+ -- Russell.
diff --git a/Documentation/arch/arm/omap/dss.rst b/Documentation/arch/arm/omap/dss.rst
new file mode 100644
index 000000000000..a40c4d9c717a
--- /dev/null
+++ b/Documentation/arch/arm/omap/dss.rst
@@ -0,0 +1,372 @@
+=========================
+OMAP2/3 Display Subsystem
+=========================
+
+This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
+(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
+TV-out and multiple display support, but there are lots of small improvements
+also.
+
+The DSS2 driver (omapdss module) is in arch/arm/plat-omap/dss/, and the FB,
+panel and controller drivers are in drivers/video/omap2/. DSS1 and DSS2 live
+currently side by side, you can choose which one to use.
+
+Features
+--------
+
+Working and tested features include:
+
+- MIPI DPI (parallel) output
+- MIPI DSI output in command mode
+- MIPI DBI (RFBI) output
+- SDI output
+- TV output
+- All pieces can be compiled as a module or inside kernel
+- Use DISPC to update any of the outputs
+- Use CPU to update RFBI or DSI output
+- OMAP DISPC planes
+- RGB16, RGB24 packed, RGB24 unpacked
+- YUV2, UYVY
+- Scaling
+- Adjusting DSS FCK to find a good pixel clock
+- Use DSI DPLL to create DSS FCK
+
+Tested boards include:
+- OMAP3 SDP board
+- Beagle board
+- N810
+
+omapdss driver
+--------------
+
+The DSS driver does not itself have any support for Linux framebuffer, V4L or
+such like the current ones, but it has an internal kernel API that upper level
+drivers can use.
+
+The DSS driver models OMAP's overlays, overlay managers and displays in a
+flexible way to enable non-common multi-display configuration. In addition to
+modelling the hardware overlays, omapdss supports virtual overlays and overlay
+managers. These can be used when updating a display with CPU or system DMA.
+
+omapdss driver support for audio
+--------------------------------
+There exist several display technologies and standards that support audio as
+well. Hence, it is relevant to update the DSS device driver to provide an audio
+interface that may be used by an audio driver or any other driver interested in
+the functionality.
+
+The audio_enable function is intended to prepare the relevant
+IP for playback (e.g., enabling an audio FIFO, taking in/out of reset
+some IP, enabling companion chips, etc). It is intended to be called before
+audio_start. The audio_disable function performs the reverse operation and is
+intended to be called after audio_stop.
+
+While a given DSS device driver may support audio, it is possible that for
+certain configurations audio is not supported (e.g., an HDMI display using a
+VESA video timing). The audio_supported function is intended to query whether
+the current configuration of the display supports audio.
+
+The audio_config function is intended to configure all the relevant audio
+parameters of the display. In order to make the function independent of any
+specific DSS device driver, a struct omap_dss_audio is defined. Its purpose
+is to contain all the required parameters for audio configuration. At the
+moment, such structure contains pointers to IEC-60958 channel status word
+and CEA-861 audio infoframe structures. This should be enough to support
+HDMI and DisplayPort, as both are based on CEA-861 and IEC-60958.
+
+The audio_enable/disable, audio_config and audio_supported functions could be
+implemented as functions that may sleep. Hence, they should not be called
+while holding a spinlock or a readlock.
+
+The audio_start/audio_stop function is intended to effectively start/stop audio
+playback after the configuration has taken place. These functions are designed
+to be used in an atomic context. Hence, audio_start should return quickly and be
+called only after all the needed resources for audio playback (audio FIFOs,
+DMA channels, companion chips, etc) have been enabled to begin data transfers.
+audio_stop is designed to only stop the audio transfers. The resources used
+for playback are released using audio_disable.
+
+The enum omap_dss_audio_state may be used to help the implementations of
+the interface to keep track of the audio state. The initial state is _DISABLED;
+then, the state transitions to _CONFIGURED, and then, when it is ready to
+play audio, to _ENABLED. The state _PLAYING is used when the audio is being
+rendered.
+
+
+Panel and controller drivers
+----------------------------
+
+The drivers implement panel or controller specific functionality and are not
+usually visible to users except through omapfb driver. They register
+themselves to the DSS driver.
+
+omapfb driver
+-------------
+
+The omapfb driver implements arbitrary number of standard linux framebuffers.
+These framebuffers can be routed flexibly to any overlays, thus allowing very
+dynamic display architecture.
+
+The driver exports some omapfb specific ioctls, which are compatible with the
+ioctls in the old driver.
+
+The rest of the non standard features are exported via sysfs. Whether the final
+implementation will use sysfs, or ioctls, is still open.
+
+V4L2 drivers
+------------
+
+V4L2 is being implemented in TI.
+
+From omapdss point of view the V4L2 drivers should be similar to framebuffer
+driver.
+
+Architecture
+--------------------
+
+Some clarification what the different components do:
+
+ - Framebuffer is a memory area inside OMAP's SRAM/SDRAM that contains the
+ pixel data for the image. Framebuffer has width and height and color
+ depth.
+ - Overlay defines where the pixels are read from and where they go on the
+ screen. The overlay may be smaller than framebuffer, thus displaying only
+ part of the framebuffer. The position of the overlay may be changed if
+ the overlay is smaller than the display.
+ - Overlay manager combines the overlays in to one image and feeds them to
+ display.
+ - Display is the actual physical display device.
+
+A framebuffer can be connected to multiple overlays to show the same pixel data
+on all of the overlays. Note that in this case the overlay input sizes must be
+the same, but, in case of video overlays, the output size can be different. Any
+framebuffer can be connected to any overlay.
+
+An overlay can be connected to one overlay manager. Also DISPC overlays can be
+connected only to DISPC overlay managers, and virtual overlays can be only
+connected to virtual overlays.
+
+An overlay manager can be connected to one display. There are certain
+restrictions which kinds of displays an overlay manager can be connected:
+
+ - DISPC TV overlay manager can be only connected to TV display.
+ - Virtual overlay managers can only be connected to DBI or DSI displays.
+ - DISPC LCD overlay manager can be connected to all displays, except TV
+ display.
+
+Sysfs
+-----
+The sysfs interface is mainly used for testing. I don't think sysfs
+interface is the best for this in the final version, but I don't quite know
+what would be the best interfaces for these things.
+
+The sysfs interface is divided to two parts: DSS and FB.
+
+/sys/class/graphics/fb? directory:
+mirror 0=off, 1=on
+rotate Rotation 0-3 for 0, 90, 180, 270 degrees
+rotate_type 0 = DMA rotation, 1 = VRFB rotation
+overlays List of overlay numbers to which framebuffer pixels go
+phys_addr Physical address of the framebuffer
+virt_addr Virtual address of the framebuffer
+size Size of the framebuffer
+
+/sys/devices/platform/omapdss/overlay? directory:
+enabled 0=off, 1=on
+input_size width,height (ie. the framebuffer size)
+manager Destination overlay manager name
+name
+output_size width,height
+position x,y
+screen_width width
+global_alpha global alpha 0-255 0=transparent 255=opaque
+
+/sys/devices/platform/omapdss/manager? directory:
+display Destination display
+name
+alpha_blending_enabled 0=off, 1=on
+trans_key_enabled 0=off, 1=on
+trans_key_type gfx-destination, video-source
+trans_key_value transparency color key (RGB24)
+default_color default background color (RGB24)
+
+/sys/devices/platform/omapdss/display? directory:
+
+=============== =============================================================
+ctrl_name Controller name
+mirror 0=off, 1=on
+update_mode 0=off, 1=auto, 2=manual
+enabled 0=off, 1=on
+name
+rotate Rotation 0-3 for 0, 90, 180, 270 degrees
+timings Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
+ When writing, two special timings are accepted for tv-out:
+ "pal" and "ntsc"
+panel_name
+tear_elim Tearing elimination 0=off, 1=on
+output_type Output type (video encoder only): "composite" or "svideo"
+=============== =============================================================
+
+There are also some debugfs files at <debugfs>/omapdss/ which show information
+about clocks and registers.
+
+Examples
+--------
+
+The following definitions have been made for the examples below::
+
+ ovl0=/sys/devices/platform/omapdss/overlay0
+ ovl1=/sys/devices/platform/omapdss/overlay1
+ ovl2=/sys/devices/platform/omapdss/overlay2
+
+ mgr0=/sys/devices/platform/omapdss/manager0
+ mgr1=/sys/devices/platform/omapdss/manager1
+
+ lcd=/sys/devices/platform/omapdss/display0
+ dvi=/sys/devices/platform/omapdss/display1
+ tv=/sys/devices/platform/omapdss/display2
+
+ fb0=/sys/class/graphics/fb0
+ fb1=/sys/class/graphics/fb1
+ fb2=/sys/class/graphics/fb2
+
+Default setup on OMAP3 SDP
+--------------------------
+
+Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
+and TV-out are not in use. The columns from left to right are:
+framebuffers, overlays, overlay managers, displays. Framebuffers are
+handled by omapfb, and the rest by the DSS::
+
+ FB0 --- GFX -\ DVI
+ FB1 --- VID1 --+- LCD ---- LCD
+ FB2 --- VID2 -/ TV ----- TV
+
+Example: Switch from LCD to DVI
+-------------------------------
+
+::
+
+ w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+ h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+ echo "0" > $lcd/enabled
+ echo "" > $mgr0/display
+ fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
+ # at this point you have to switch the dvi/lcd dip-switch from the omap board
+ echo "dvi" > $mgr0/display
+ echo "1" > $dvi/enabled
+
+After this the configuration looks like:::
+
+ FB0 --- GFX -\ -- DVI
+ FB1 --- VID1 --+- LCD -/ LCD
+ FB2 --- VID2 -/ TV ----- TV
+
+Example: Clone GFX overlay to LCD and TV
+----------------------------------------
+
+::
+
+ w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+ h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+ echo "0" > $ovl0/enabled
+ echo "0" > $ovl1/enabled
+
+ echo "" > $fb1/overlays
+ echo "0,1" > $fb0/overlays
+
+ echo "$w,$h" > $ovl1/output_size
+ echo "tv" > $ovl1/manager
+
+ echo "1" > $ovl0/enabled
+ echo "1" > $ovl1/enabled
+
+ echo "1" > $tv/enabled
+
+After this the configuration looks like (only relevant parts shown)::
+
+ FB0 +-- GFX ---- LCD ---- LCD
+ \- VID1 ---- TV ---- TV
+
+Misc notes
+----------
+
+OMAP FB allocates the framebuffer memory using the standard dma allocator. You
+can enable Contiguous Memory Allocator (CONFIG_CMA) to improve the dma
+allocator, and if CMA is enabled, you use "cma=" kernel parameter to increase
+the global memory area for CMA.
+
+Using DSI DPLL to generate pixel clock it is possible produce the pixel clock
+of 86.5MHz (max possible), and with that you get 1280x1024@57 output from DVI.
+
+Rotation and mirroring currently only supports RGB565 and RGB8888 modes. VRFB
+does not support mirroring.
+
+VRFB rotation requires much more memory than non-rotated framebuffer, so you
+probably need to increase your vram setting before using VRFB rotation. Also,
+many applications may not work with VRFB if they do not pay attention to all
+framebuffer parameters.
+
+Kernel boot arguments
+---------------------
+
+omapfb.mode=<display>:<mode>[,...]
+ - Default video mode for specified displays. For example,
+ "dvi:800x400MR-24@60". See drivers/video/modedb.c.
+ There are also two special modes: "pal" and "ntsc" that
+ can be used to tv out.
+
+omapfb.vram=<fbnum>:<size>[@<physaddr>][,...]
+ - VRAM allocated for a framebuffer. Normally omapfb allocates vram
+ depending on the display size. With this you can manually allocate
+ more or define the physical address of each framebuffer. For example,
+ "1:4M" to allocate 4M for fb1.
+
+omapfb.debug=<y|n>
+ - Enable debug printing. You have to have OMAPFB debug support enabled
+ in kernel config.
+
+omapfb.test=<y|n>
+ - Draw test pattern to framebuffer whenever framebuffer settings change.
+ You need to have OMAPFB debug support enabled in kernel config.
+
+omapfb.vrfb=<y|n>
+ - Use VRFB rotation for all framebuffers.
+
+omapfb.rotate=<angle>
+ - Default rotation applied to all framebuffers.
+ 0 - 0 degree rotation
+ 1 - 90 degree rotation
+ 2 - 180 degree rotation
+ 3 - 270 degree rotation
+
+omapfb.mirror=<y|n>
+ - Default mirror for all framebuffers. Only works with DMA rotation.
+
+omapdss.def_disp=<display>
+ - Name of default display, to which all overlays will be connected.
+ Common examples are "lcd" or "tv".
+
+omapdss.debug=<y|n>
+ - Enable debug printing. You have to have DSS debug support enabled in
+ kernel config.
+
+TODO
+----
+
+DSS locking
+
+Error checking
+
+- Lots of checks are missing or implemented just as BUG()
+
+System DMA update for DSI
+
+- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
+ to skip the empty byte?)
+
+OMAP1 support
+
+- Not sure if needed
diff --git a/Documentation/arch/arm/omap/index.rst b/Documentation/arch/arm/omap/index.rst
new file mode 100644
index 000000000000..8b365b212e49
--- /dev/null
+++ b/Documentation/arch/arm/omap/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+TI OMAP
+=======
+
+.. toctree::
+ :maxdepth: 1
+
+ omap
+ omap_pm
+ dss
diff --git a/Documentation/arch/arm/omap/omap.rst b/Documentation/arch/arm/omap/omap.rst
new file mode 100644
index 000000000000..f440c0f4613f
--- /dev/null
+++ b/Documentation/arch/arm/omap/omap.rst
@@ -0,0 +1,18 @@
+============
+OMAP history
+============
+
+This file contains documentation for running mainline
+kernel on omaps.
+
+====== ======================================================
+KERNEL NEW DEPENDENCIES
+====== ======================================================
+v4.3+ Update is needed for custom .config files to make sure
+ CONFIG_REGULATOR_PBIAS is enabled for MMC1 to work
+ properly.
+
+v4.18+ Update is needed for custom .config files to make sure
+ CONFIG_MMC_SDHCI_OMAP is enabled for all MMC instances
+ to work in DRA7 and K2G based boards.
+====== ======================================================
diff --git a/Documentation/arch/arm/omap/omap_pm.rst b/Documentation/arch/arm/omap/omap_pm.rst
new file mode 100644
index 000000000000..a335e4c8ce2c
--- /dev/null
+++ b/Documentation/arch/arm/omap/omap_pm.rst
@@ -0,0 +1,165 @@
+=====================
+The OMAP PM interface
+=====================
+
+This document describes the temporary OMAP PM interface. Driver
+authors use these functions to communicate minimum latency or
+throughput constraints to the kernel power management code.
+Over time, the intention is to merge features from the OMAP PM
+interface into the Linux PM QoS code.
+
+Drivers need to express PM parameters which:
+
+- support the range of power management parameters present in the TI SRF;
+
+- separate the drivers from the underlying PM parameter
+ implementation, whether it is the TI SRF or Linux PM QoS or Linux
+ latency framework or something else;
+
+- specify PM parameters in terms of fundamental units, such as
+ latency and throughput, rather than units which are specific to OMAP
+ or to particular OMAP variants;
+
+- allow drivers which are shared with other architectures (e.g.,
+ DaVinci) to add these constraints in a way which won't affect non-OMAP
+ systems,
+
+- can be implemented immediately with minimal disruption of other
+ architectures.
+
+
+This document proposes the OMAP PM interface, including the following
+five power management functions for driver code:
+
+1. Set the maximum MPU wakeup latency::
+
+ (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
+
+2. Set the maximum device wakeup latency::
+
+ (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
+
+3. Set the maximum system DMA transfer start latency (CORE pwrdm)::
+
+ (*pdata->set_max_sdma_lat)(struct device *dev, long t)
+
+4. Set the minimum bus throughput needed by a device::
+
+ (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
+
+5. Return the number of times the device has lost context::
+
+ (*pdata->get_dev_context_loss_count)(struct device *dev)
+
+
+Further documentation for all OMAP PM interface functions can be
+found in arch/arm/plat-omap/include/mach/omap-pm.h.
+
+
+The OMAP PM layer is intended to be temporary
+---------------------------------------------
+
+The intention is that eventually the Linux PM QoS layer should support
+the range of power management features present in OMAP3. As this
+happens, existing drivers using the OMAP PM interface can be modified
+to use the Linux PM QoS code; and the OMAP PM interface can disappear.
+
+
+Driver usage of the OMAP PM functions
+-------------------------------------
+
+As the 'pdata' in the above examples indicates, these functions are
+exposed to drivers through function pointers in driver .platform_data
+structures. The function pointers are initialized by the `board-*.c`
+files to point to the corresponding OMAP PM functions:
+
+- set_max_dev_wakeup_lat will point to
+ omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
+ not support these functions should leave these function pointers set
+ to NULL. Drivers should use the following idiom::
+
+ if (pdata->set_max_dev_wakeup_lat)
+ (*pdata->set_max_dev_wakeup_lat)(dev, t);
+
+The most common usage of these functions will probably be to specify
+the maximum time from when an interrupt occurs, to when the device
+becomes accessible. To accomplish this, driver writers should use the
+set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
+latency, and the set_max_dev_wakeup_lat() function to constrain the
+device wakeup latency (from clk_enable() to accessibility). For
+example::
+
+ /* Limit MPU wakeup latency */
+ if (pdata->set_max_mpu_wakeup_lat)
+ (*pdata->set_max_mpu_wakeup_lat)(dev, tc);
+
+ /* Limit device powerdomain wakeup latency */
+ if (pdata->set_max_dev_wakeup_lat)
+ (*pdata->set_max_dev_wakeup_lat)(dev, td);
+
+ /* total wakeup latency in this example: (tc + td) */
+
+The PM parameters can be overwritten by calling the function again
+with the new value. The settings can be removed by calling the
+function with a t argument of -1 (except in the case of
+set_max_bus_tput(), which should be called with an r argument of 0).
+
+The fifth function above, omap_pm_get_dev_context_loss_count(),
+is intended as an optimization to allow drivers to determine whether the
+device has lost its internal context. If context has been lost, the
+driver must restore its internal context before proceeding.
+
+
+Other specialized interface functions
+-------------------------------------
+
+The five functions listed above are intended to be usable by any
+device driver. DSPBridge and CPUFreq have a few special requirements.
+DSPBridge expresses target DSP performance levels in terms of OPP IDs.
+CPUFreq expresses target MPU performance levels in terms of MPU
+frequency. The OMAP PM interface contains functions for these
+specialized cases to convert that input information (OPPs/MPU
+frequency) into the form that the underlying power management
+implementation needs:
+
+6. `(*pdata->dsp_get_opp_table)(void)`
+
+7. `(*pdata->dsp_set_min_opp)(u8 opp_id)`
+
+8. `(*pdata->dsp_get_opp)(void)`
+
+9. `(*pdata->cpu_get_freq_table)(void)`
+
+10. `(*pdata->cpu_set_freq)(unsigned long f)`
+
+11. `(*pdata->cpu_get_freq)(void)`
+
+Customizing OPP for platform
+============================
+Defining CONFIG_PM should enable OPP layer for the silicon
+and the registration of OPP table should take place automatically.
+However, in special cases, the default OPP table may need to be
+tweaked, for e.g.:
+
+ * enable default OPPs which are disabled by default, but which
+ could be enabled on a platform
+ * Disable an unsupported OPP on the platform
+ * Define and add a custom opp table entry
+ in these cases, the board file needs to do additional steps as follows:
+
+arch/arm/mach-omapx/board-xyz.c::
+
+ #include "pm.h"
+ ....
+ static void __init omap_xyz_init_irq(void)
+ {
+ ....
+ /* Initialize the default table */
+ omapx_opp_init();
+ /* Do customization to the defaults */
+ ....
+ }
+
+NOTE:
+ omapx_opp_init will be omap3_opp_init or as required
+ based on the omap family.
diff --git a/Documentation/arch/arm/porting.rst b/Documentation/arch/arm/porting.rst
new file mode 100644
index 000000000000..bd21958bdb2d
--- /dev/null
+++ b/Documentation/arch/arm/porting.rst
@@ -0,0 +1,137 @@
+=======
+Porting
+=======
+
+Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
+
+Initial definitions
+-------------------
+
+The following symbol definitions rely on you knowing the translation that
+__virt_to_phys() does for your machine. This macro converts the passed
+virtual address to a physical address. Normally, it is simply:
+
+ phys = virt - PAGE_OFFSET + PHYS_OFFSET
+
+
+Decompressor Symbols
+--------------------
+
+ZTEXTADDR
+ Start address of decompressor. There's no point in talking about
+ virtual or physical addresses here, since the MMU will be off at
+ the time when you call the decompressor code. You normally call
+ the kernel at this address to start it booting. This doesn't have
+ to be located in RAM, it can be in flash or other read-only or
+ read-write addressable medium.
+
+ZBSSADDR
+ Start address of zero-initialised work area for the decompressor.
+ This must be pointing at RAM. The decompressor will zero initialise
+ this for you. Again, the MMU will be off.
+
+ZRELADDR
+ This is the address where the decompressed kernel will be written,
+ and eventually executed. The following constraint must be valid:
+
+ __virt_to_phys(TEXTADDR) == ZRELADDR
+
+ The initial part of the kernel is carefully coded to be position
+ independent.
+
+INITRD_PHYS
+ Physical address to place the initial RAM disk. Only relevant if
+ you are using the bootpImage stuff (which only works on the old
+ struct param_struct).
+
+INITRD_VIRT
+ Virtual address of the initial RAM disk. The following constraint
+ must be valid:
+
+ __virt_to_phys(INITRD_VIRT) == INITRD_PHYS
+
+PARAMS_PHYS
+ Physical address of the struct param_struct or tag list, giving the
+ kernel various parameters about its execution environment.
+
+
+Kernel Symbols
+--------------
+
+PHYS_OFFSET
+ Physical start address of the first bank of RAM.
+
+PAGE_OFFSET
+ Virtual start address of the first bank of RAM. During the kernel
+ boot phase, virtual address PAGE_OFFSET will be mapped to physical
+ address PHYS_OFFSET, along with any other mappings you supply.
+ This should be the same value as TASK_SIZE.
+
+TASK_SIZE
+ The maximum size of a user process in bytes. Since user space
+ always starts at zero, this is the maximum address that a user
+ process can access+1. The user space stack grows down from this
+ address.
+
+ Any virtual address below TASK_SIZE is deemed to be user process
+ area, and therefore managed dynamically on a process by process
+ basis by the kernel. I'll call this the user segment.
+
+ Anything above TASK_SIZE is common to all processes. I'll call
+ this the kernel segment.
+
+ (In other words, you can't put IO mappings below TASK_SIZE, and
+ hence PAGE_OFFSET).
+
+TEXTADDR
+ Virtual start address of kernel, normally PAGE_OFFSET + 0x8000.
+ This is where the kernel image ends up. With the latest kernels,
+ it must be located at 32768 bytes into a 128MB region. Previous
+ kernels placed a restriction of 256MB here.
+
+DATAADDR
+ Virtual address for the kernel data segment. Must not be defined
+ when using the decompressor.
+
+VMALLOC_START / VMALLOC_END
+ Virtual addresses bounding the vmalloc() area. There must not be
+ any static mappings in this area; vmalloc will overwrite them.
+ The addresses must also be in the kernel segment (see above).
+ Normally, the vmalloc() area starts VMALLOC_OFFSET bytes above the
+ last virtual RAM address (found using variable high_memory).
+
+VMALLOC_OFFSET
+ Offset normally incorporated into VMALLOC_START to provide a hole
+ between virtual RAM and the vmalloc area. We do this to allow
+ out of bounds memory accesses (eg, something writing off the end
+ of the mapped memory map) to be caught. Normally set to 8MB.
+
+Architecture Specific Macros
+----------------------------
+
+BOOT_MEM(pram,pio,vio)
+ `pram` specifies the physical start address of RAM. Must always
+ be present, and should be the same as PHYS_OFFSET.
+
+ `pio` is the physical address of an 8MB region containing IO for
+ use with the debugging macros in arch/arm/kernel/debug-armv.S.
+
+ `vio` is the virtual address of the 8MB debugging region.
+
+ It is expected that the debugging region will be re-initialised
+ by the architecture specific code later in the code (via the
+ MAPIO function).
+
+BOOT_PARAMS
+ Same as, and see PARAMS_PHYS.
+
+FIXUP(func)
+ Machine specific fixups, run before memory subsystems have been
+ initialised.
+
+MAPIO(func)
+ Machine specific function to map IO areas (including the debug
+ region above).
+
+INITIRQ(func)
+ Machine specific function to initialise interrupts.
diff --git a/Documentation/arch/arm/pxa/mfp.rst b/Documentation/arch/arm/pxa/mfp.rst
new file mode 100644
index 000000000000..ac34e5d7ee44
--- /dev/null
+++ b/Documentation/arch/arm/pxa/mfp.rst
@@ -0,0 +1,288 @@
+==============================================
+MFP Configuration for PXA2xx/PXA3xx Processors
+==============================================
+
+ Eric Miao <eric.miao@marvell.com>
+
+MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
+later PXA series processors. This document describes the existing MFP API,
+and how board/platform driver authors could make use of it.
+
+Basic Concept
+=============
+
+Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
+mechanism is introduced from PXA3xx to completely move the pin-mux functions
+out of the GPIO controller. In addition to pin-mux configurations, the MFP
+also controls the low power state, driving strength, pull-up/down and event
+detection of each pin. Below is a diagram of internal connections between
+the MFP logic and the remaining SoC peripherals::
+
+ +--------+
+ | |--(GPIO19)--+
+ | GPIO | |
+ | |--(GPIO...) |
+ +--------+ |
+ | +---------+
+ +--------+ +------>| |
+ | PWM2 |--(PWM_OUT)-------->| MFP |
+ +--------+ +------>| |-------> to external PAD
+ | +---->| |
+ +--------+ | | +-->| |
+ | SSP2 |---(TXD)----+ | | +---------+
+ +--------+ | |
+ | |
+ +--------+ | |
+ | Keypad |--(MKOUT4)----+ |
+ +--------+ |
+ |
+ +--------+ |
+ | UART2 |---(TXD)--------+
+ +--------+
+
+NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily
+mean it's dedicated for GPIO19, only as a hint that internally this pin
+can be routed from GPIO19 of the GPIO controller.
+
+To better understand the change from PXA25x/PXA27x GPIO alternate function
+to this new MFP mechanism, here are several key points:
+
+ 1. GPIO controller on PXA3xx is now a dedicated controller, same as other
+ internal controllers like PWM, SSP and UART, with 128 internal signals
+ which can be routed to external through one or more MFPs (e.g. GPIO<0>
+ can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2,
+ see arch/arm/mach-pxa/mfp-pxa300.h)
+
+ 2. Alternate function configuration is removed from this GPIO controller,
+ the remaining functions are pure GPIO-specific, i.e.
+
+ - GPIO signal level control
+ - GPIO direction control
+ - GPIO level change detection
+
+ 3. Low power state for each pin is now controlled by MFP, this means the
+ PGSRx registers on PXA2xx are now useless on PXA3xx
+
+ 4. Wakeup detection is now controlled by MFP, PWER does not control the
+ wakeup from GPIO(s) any more, depending on the sleeping state, ADxER
+ (as defined in pxa3xx-regs.h) controls the wakeup from MFP
+
+NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
+mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
+pad (or ball).
+
+MFP API Usage
+=============
+
+For board code writers, here are some guidelines:
+
+1. include ONE of the following header files in your <board>.c:
+
+ - #include "mfp-pxa25x.h"
+ - #include "mfp-pxa27x.h"
+ - #include "mfp-pxa300.h"
+ - #include "mfp-pxa320.h"
+ - #include "mfp-pxa930.h"
+
+ NOTE: only one file in your <board>.c, depending on the processors used,
+ because pin configuration definitions may conflict in these file (i.e.
+ same name, different meaning and settings on different processors). E.g.
+ for zylonite platform, which support both PXA300/PXA310 and PXA320, two
+ separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c
+ (in addition to handle MFP configuration differences, they also handle
+ the other differences between the two combinations).
+
+ NOTE: PXA300 and PXA310 are almost identical in pin configurations (with
+ PXA310 supporting some additional ones), thus the difference is actually
+ covered in a single mfp-pxa300.h.
+
+2. prepare an array for the initial pin configurations, e.g.::
+
+ static unsigned long mainstone_pin_config[] __initdata = {
+ /* Chip Select */
+ GPIO15_nCS_1,
+
+ /* LCD - 16bpp Active TFT */
+ GPIOxx_TFT_LCD_16BPP,
+ GPIO16_PWM0_OUT, /* Backlight */
+
+ /* MMC */
+ GPIO32_MMC_CLK,
+ GPIO112_MMC_CMD,
+ GPIO92_MMC_DAT_0,
+ GPIO109_MMC_DAT_1,
+ GPIO110_MMC_DAT_2,
+ GPIO111_MMC_DAT_3,
+
+ ...
+
+ /* GPIO */
+ GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
+ };
+
+ a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
+ and written to the actual registers, they are useless and may discard,
+ adding '__initdata' will help save some additional bytes here.
+
+ b) when there is only one possible pin configurations for a component,
+ some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on
+ PXA25x and PXA27x processors
+
+ c) if by board design, a pin can be configured to wake up the system
+ from low power state, it can be 'OR'ed with any of:
+
+ WAKEUP_ON_EDGE_BOTH
+ WAKEUP_ON_EDGE_RISE
+ WAKEUP_ON_EDGE_FALL
+ WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs,
+
+ to indicate that this pin has the capability of wake-up the system,
+ and on which edge(s). This, however, doesn't necessarily mean the
+ pin _will_ wakeup the system, it will only when set_irq_wake() is
+ invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq())
+ and eventually calls gpio_set_wake() for the actual register setting.
+
+ d) although PXA3xx MFP supports edge detection on each pin, the
+ internal logic will only wakeup the system when those specific bits
+ in ADxER registers are set, which can be well mapped to the
+ corresponding peripheral, thus set_irq_wake() can be called with
+ the peripheral IRQ to enable the wakeup.
+
+
+MFP on PXA3xx
+=============
+
+Every external I/O pad on PXA3xx (excluding those for special purpose) has
+one MFP logic associated, and is controlled by one MFP register (MFPR).
+
+The MFPR has the following bit definitions (for PXA300/PXA310/PXA320)::
+
+ 31 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ | RESERVED |PS|PU|PD| DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL |
+ +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+
+ Bit 3: RESERVED
+ Bit 4: EDGE_RISE_EN - enable detection of rising edge on this pin
+ Bit 5: EDGE_FALL_EN - enable detection of falling edge on this pin
+ Bit 6: EDGE_CLEAR - disable edge detection on this pin
+ Bit 7: SLEEP_OE_N - enable outputs during low power modes
+ Bit 8: SLEEP_DATA - output data on the pin during low power modes
+ Bit 9: SLEEP_SEL - selection control for low power modes signals
+ Bit 13: PULLDOWN_EN - enable the internal pull-down resistor on this pin
+ Bit 14: PULLUP_EN - enable the internal pull-up resistor on this pin
+ Bit 15: PULL_SEL - pull state controlled by selected alternate function
+ (0) or by PULL{UP,DOWN}_EN bits (1)
+
+ Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7
+ Bit 10-12: DRIVE - drive strength and slew rate
+ 0b000 - fast 1mA
+ 0b001 - fast 2mA
+ 0b002 - fast 3mA
+ 0b003 - fast 4mA
+ 0b004 - slow 6mA
+ 0b005 - fast 6mA
+ 0b006 - slow 10mA
+ 0b007 - fast 10mA
+
+MFP Design for PXA2xx/PXA3xx
+============================
+
+Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
+MFP API is introduced to cover both series of processors.
+
+The basic idea of this design is to introduce definitions for all possible pin
+configurations, these definitions are processor and platform independent, and
+the actual API invoked to convert these definitions into register settings and
+make them effective there-after.
+
+Files Involved
+--------------
+
+ - arch/arm/mach-pxa/include/mach/mfp.h
+
+ for
+ 1. Unified pin definitions - enum constants for all configurable pins
+ 2. processor-neutral bit definitions for a possible MFP configuration
+
+ - arch/arm/mach-pxa/mfp-pxa3xx.h
+
+ for PXA3xx specific MFPR register bit definitions and PXA3xx common pin
+ configurations
+
+ - arch/arm/mach-pxa/mfp-pxa2xx.h
+
+ for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations
+
+ - arch/arm/mach-pxa/mfp-pxa25x.h
+ arch/arm/mach-pxa/mfp-pxa27x.h
+ arch/arm/mach-pxa/mfp-pxa300.h
+ arch/arm/mach-pxa/mfp-pxa320.h
+ arch/arm/mach-pxa/mfp-pxa930.h
+
+ for processor specific definitions
+
+ - arch/arm/mach-pxa/mfp-pxa3xx.c
+ - arch/arm/mach-pxa/mfp-pxa2xx.c
+
+ for implementation of the pin configuration to take effect for the actual
+ processor.
+
+Pin Configuration
+-----------------
+
+ The following comments are copied from mfp.h (see the actual source code
+ for most updated info)::
+
+ /*
+ * a possible MFP configuration is represented by a 32-bit integer
+ *
+ * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
+ * bit 10..12 - Alternate Function Selection
+ * bit 13..15 - Drive Strength
+ * bit 16..18 - Low Power Mode State
+ * bit 19..20 - Low Power Mode Edge Detection
+ * bit 21..22 - Run Mode Pull State
+ *
+ * to facilitate the definition, the following macros are provided
+ *
+ * MFP_CFG_DEFAULT - default MFP configuration value, with
+ * alternate function = 0,
+ * drive strength = fast 3mA (MFP_DS03X)
+ * low power mode = default
+ * edge detection = none
+ *
+ * MFP_CFG - default MFPR value with alternate function
+ * MFP_CFG_DRV - default MFPR value with alternate function and
+ * pin drive strength
+ * MFP_CFG_LPM - default MFPR value with alternate function and
+ * low power mode
+ * MFP_CFG_X - default MFPR value with alternate function,
+ * pin drive strength and low power mode
+ */
+
+ Examples of pin configurations are::
+
+ #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
+
+ which reads GPIO94 can be configured as SSP3_RXD, with alternate function
+ selection of 1, driving strength of 0b101, and a float state in low power
+ modes.
+
+ NOTE: this is the default setting of this pin being configured as SSP3_RXD
+ which can be modified a bit in board code, though it is not recommended to
+ do so, simply because this default setting is usually carefully encoded,
+ and is supposed to work in most cases.
+
+Register Settings
+-----------------
+
+ Register settings on PXA3xx for a pin configuration is actually very
+ straight-forward, most bits can be converted directly into MFPR value
+ in a easier way. Two sets of MFPR values are calculated: the run-time
+ ones and the low power mode ones, to allow different settings.
+
+ The conversion from a generic pin configuration to the actual register
+ settings on PXA2xx is a bit complicated: many registers are involved,
+ including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see
+ mfp-pxa2xx.c for how the conversion is made.
diff --git a/Documentation/arch/arm/sa1100/assabet.rst b/Documentation/arch/arm/sa1100/assabet.rst
new file mode 100644
index 000000000000..a761e128fb08
--- /dev/null
+++ b/Documentation/arch/arm/sa1100/assabet.rst
@@ -0,0 +1,301 @@
+============================================
+The Intel Assabet (SA-1110 evaluation) board
+============================================
+
+Please see:
+http://developer.intel.com
+
+Also some notes from John G Dorsey <jd5q@andrew.cmu.edu>:
+http://www.cs.cmu.edu/~wearable/software/assabet.html
+
+
+Building the kernel
+-------------------
+
+To build the kernel with current defaults::
+
+ make assabet_defconfig
+ make oldconfig
+ make zImage
+
+The resulting kernel image should be available in linux/arch/arm/boot/zImage.
+
+
+Installing a bootloader
+-----------------------
+
+A couple of bootloaders able to boot Linux on Assabet are available:
+
+BLOB (http://www.lartmaker.nl/lartware/blob/)
+
+ BLOB is a bootloader used within the LART project. Some contributed
+ patches were merged into BLOB to add support for Assabet.
+
+Compaq's Bootldr + John Dorsey's patch for Assabet support
+(http://www.handhelds.org/Compaq/bootldr.html)
+(http://www.wearablegroup.org/software/bootldr/)
+
+ Bootldr is the bootloader developed by Compaq for the iPAQ Pocket PC.
+ John Dorsey has produced add-on patches to add support for Assabet and
+ the JFFS filesystem.
+
+RedBoot (http://sources.redhat.com/redboot/)
+
+ RedBoot is a bootloader developed by Red Hat based on the eCos RTOS
+ hardware abstraction layer. It supports Assabet amongst many other
+ hardware platforms.
+
+RedBoot is currently the recommended choice since it's the only one to have
+networking support, and is the most actively maintained.
+
+Brief examples on how to boot Linux with RedBoot are shown below. But first
+you need to have RedBoot installed in your flash memory. A known to work
+precompiled RedBoot binary is available from the following location:
+
+- ftp://ftp.netwinder.org/users/n/nico/
+- ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
+- ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
+
+Look for redboot-assabet*.tgz. Some installation infos are provided in
+redboot-assabet*.txt.
+
+
+Initial RedBoot configuration
+-----------------------------
+
+The commands used here are explained in The RedBoot User's Guide available
+on-line at http://sources.redhat.com/ecos/docs.html.
+Please refer to it for explanations.
+
+If you have a CF network card (my Assabet kit contained a CF+ LP-E from
+Socket Communications Inc.), you should strongly consider using it for TFTP
+file transfers. You must insert it before RedBoot runs since it can't detect
+it dynamically.
+
+To initialize the flash directory::
+
+ fis init -f
+
+To initialize the non-volatile settings, like whether you want to use BOOTP or
+a static IP address, etc, use this command::
+
+ fconfig -i
+
+
+Writing a kernel image into flash
+---------------------------------
+
+First, the kernel image must be loaded into RAM. If you have the zImage file
+available on a TFTP server::
+
+ load zImage -r -b 0x100000
+
+If you rather want to use Y-Modem upload over the serial port::
+
+ load -m ymodem -r -b 0x100000
+
+To write it to flash::
+
+ fis create "Linux kernel" -b 0x100000 -l 0xc0000
+
+
+Booting the kernel
+------------------
+
+The kernel still requires a filesystem to boot. A ramdisk image can be loaded
+as follows::
+
+ load ramdisk_image.gz -r -b 0x800000
+
+Again, Y-Modem upload can be used instead of TFTP by replacing the file name
+by '-y ymodem'.
+
+Now the kernel can be retrieved from flash like this::
+
+ fis load "Linux kernel"
+
+or loaded as described previously. To boot the kernel::
+
+ exec -b 0x100000 -l 0xc0000
+
+The ramdisk image could be stored into flash as well, but there are better
+solutions for on-flash filesystems as mentioned below.
+
+
+Using JFFS2
+-----------
+
+Using JFFS2 (the Second Journalling Flash File System) is probably the most
+convenient way to store a writable filesystem into flash. JFFS2 is used in
+conjunction with the MTD layer which is responsible for low-level flash
+management. More information on the Linux MTD can be found on-line at:
+http://www.linux-mtd.infradead.org/. A JFFS howto with some infos about
+creating JFFS/JFFS2 images is available from the same site.
+
+For instance, a sample JFFS2 image can be retrieved from the same FTP sites
+mentioned below for the precompiled RedBoot image.
+
+To load this file::
+
+ load sample_img.jffs2 -r -b 0x100000
+
+The result should look like::
+
+ RedBoot> load sample_img.jffs2 -r -b 0x100000
+ Raw file loaded 0x00100000-0x00377424
+
+Now we must know the size of the unallocated flash::
+
+ fis free
+
+Result::
+
+ RedBoot> fis free
+ 0x500E0000 .. 0x503C0000
+
+The values above may be different depending on the size of the filesystem and
+the type of flash. See their usage below as an example and take care of
+substituting yours appropriately.
+
+We must determine some values::
+
+ size of unallocated flash: 0x503c0000 - 0x500e0000 = 0x2e0000
+ size of the filesystem image: 0x00377424 - 0x00100000 = 0x277424
+
+We want to fit the filesystem image of course, but we also want to give it all
+the remaining flash space as well. To write it::
+
+ fis unlock -f 0x500E0000 -l 0x2e0000
+ fis erase -f 0x500E0000 -l 0x2e0000
+ fis write -b 0x100000 -l 0x277424 -f 0x500E0000
+ fis create "JFFS2" -n -f 0x500E0000 -l 0x2e0000
+
+Now the filesystem is associated to a MTD "partition" once Linux has discovered
+what they are in the boot process. From Redboot, the 'fis list' command
+displays them::
+
+ RedBoot> fis list
+ Name FLASH addr Mem addr Length Entry point
+ RedBoot 0x50000000 0x50000000 0x00020000 0x00000000
+ RedBoot config 0x503C0000 0x503C0000 0x00020000 0x00000000
+ FIS directory 0x503E0000 0x503E0000 0x00020000 0x00000000
+ Linux kernel 0x50020000 0x00100000 0x000C0000 0x00000000
+ JFFS2 0x500E0000 0x500E0000 0x002E0000 0x00000000
+
+However Linux should display something like::
+
+ SA1100 flash: probing 32-bit flash bus
+ SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
+ Using RedBoot partition definition
+ Creating 5 MTD partitions on "SA1100 flash":
+ 0x00000000-0x00020000 : "RedBoot"
+ 0x00020000-0x000e0000 : "Linux kernel"
+ 0x000e0000-0x003c0000 : "JFFS2"
+ 0x003c0000-0x003e0000 : "RedBoot config"
+ 0x003e0000-0x00400000 : "FIS directory"
+
+What's important here is the position of the partition we are interested in,
+which is the third one. Within Linux, this correspond to /dev/mtdblock2.
+Therefore to boot Linux with the kernel and its root filesystem in flash, we
+need this RedBoot command::
+
+ fis load "Linux kernel"
+ exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
+
+Of course other filesystems than JFFS might be used, like cramfs for example.
+You might want to boot with a root filesystem over NFS, etc. It is also
+possible, and sometimes more convenient, to flash a filesystem directly from
+within Linux while booted from a ramdisk or NFS. The Linux MTD repository has
+many tools to deal with flash memory as well, to erase it for example. JFFS2
+can then be mounted directly on a freshly erased partition and files can be
+copied over directly. Etc...
+
+
+RedBoot scripting
+-----------------
+
+All the commands above aren't so useful if they have to be typed in every
+time the Assabet is rebooted. Therefore it's possible to automate the boot
+process using RedBoot's scripting capability.
+
+For example, I use this to boot Linux with both the kernel and the ramdisk
+images retrieved from a TFTP server on the network::
+
+ RedBoot> fconfig
+ Run script at boot: false true
+ Boot script:
+ Enter script, terminate with empty line
+ >> load zImage -r -b 0x100000
+ >> load ramdisk_ks.gz -r -b 0x800000
+ >> exec -b 0x100000 -l 0xc0000
+ >>
+ Boot script timeout (1000ms resolution): 3
+ Use BOOTP for network configuration: true
+ GDB connection port: 9000
+ Network debug at boot time: false
+ Update RedBoot non-volatile configuration - are you sure (y/n)? y
+
+Then, rebooting the Assabet is just a matter of waiting for the login prompt.
+
+
+
+Nicolas Pitre
+nico@fluxnic.net
+
+June 12, 2001
+
+
+Status of peripherals in -rmk tree (updated 14/10/2001)
+-------------------------------------------------------
+
+Assabet:
+ Serial ports:
+ Radio: TX, RX, CTS, DSR, DCD, RI
+ - PM: Not tested.
+ - COM: TX, RX, CTS, DSR, DCD, RTS, DTR, PM
+ - PM: Not tested.
+ - I2C: Implemented, not fully tested.
+ - L3: Fully tested, pass.
+ - PM: Not tested.
+
+ Video:
+ - LCD: Fully tested. PM
+
+ (LCD doesn't like being blanked with neponset connected)
+
+ - Video out: Not fully
+
+ Audio:
+ UDA1341:
+ - Playback: Fully tested, pass.
+ - Record: Implemented, not tested.
+ - PM: Not tested.
+
+ UCB1200:
+ - Audio play: Implemented, not heavily tested.
+ - Audio rec: Implemented, not heavily tested.
+ - Telco audio play: Implemented, not heavily tested.
+ - Telco audio rec: Implemented, not heavily tested.
+ - POTS control: No
+ - Touchscreen: Yes
+ - PM: Not tested.
+
+ Other:
+ - PCMCIA:
+ - LPE: Fully tested, pass.
+ - USB: No
+ - IRDA:
+ - SIR: Fully tested, pass.
+ - FIR: Fully tested, pass.
+ - PM: Not tested.
+
+Neponset:
+ Serial ports:
+ - COM1,2: TX, RX, CTS, DSR, DCD, RTS, DTR
+ - PM: Not tested.
+ - USB: Implemented, not heavily tested.
+ - PCMCIA: Implemented, not heavily tested.
+ - CF: Implemented, not heavily tested.
+ - PM: Not tested.
+
+More stuff can be found in the -np (Nicolas Pitre's) tree.
diff --git a/Documentation/arch/arm/sa1100/cerf.rst b/Documentation/arch/arm/sa1100/cerf.rst
new file mode 100644
index 000000000000..7fa71b609bf9
--- /dev/null
+++ b/Documentation/arch/arm/sa1100/cerf.rst
@@ -0,0 +1,35 @@
+==============
+CerfBoard/Cube
+==============
+
+*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
+
+The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
+that measures approximately 2" square. It includes an Ethernet
+controller, an RS232-compatible serial port, a USB function port, and
+one CompactFlash+ slot on the back. Pictures can be found at the
+Intrinsyc website, http://www.intrinsyc.com.
+
+This document describes the support in the Linux kernel for the
+Intrinsyc CerfBoard.
+
+Supported in this version
+=========================
+
+ - CompactFlash+ slot (select PCMCIA in General Setup and any options
+ that may be required)
+ - Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
+ Network Devices)
+ - Serial ports with a serial console (hardcoded to 38400 8N1)
+
+In order to get this kernel onto your Cerf, you need a server that runs
+both BOOTP and TFTP. Detailed instructions should have come with your
+evaluation kit on how to use the bootloader. This series of commands
+will suffice::
+
+ make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
+ make ARCH=arm CROSS_COMPILE=arm-linux- zImage
+ make ARCH=arm CROSS_COMPILE=arm-linux- modules
+ cp arch/arm/boot/zImage <TFTP directory>
+
+support@intrinsyc.com
diff --git a/Documentation/arch/arm/sa1100/index.rst b/Documentation/arch/arm/sa1100/index.rst
new file mode 100644
index 000000000000..c9aed43280ff
--- /dev/null
+++ b/Documentation/arch/arm/sa1100/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Intel StrongARM 1100
+====================
+
+.. toctree::
+ :maxdepth: 1
+
+ assabet
+ cerf
+ lart
+ serial_uart
diff --git a/Documentation/arch/arm/sa1100/lart.rst b/Documentation/arch/arm/sa1100/lart.rst
new file mode 100644
index 000000000000..94c0568d1095
--- /dev/null
+++ b/Documentation/arch/arm/sa1100/lart.rst
@@ -0,0 +1,15 @@
+====================================
+Linux Advanced Radio Terminal (LART)
+====================================
+
+The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
+applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
+other StrongARM-gadgets. Almost all SA signals are directly accessible
+through a number of connectors. The powersupply accepts voltages
+between 3.5V and 16V and is overdimensioned to support a range of
+daughterboards. A quad Ethernet / IDE / PS2 / sound daughterboard
+is under development, with plenty of others in different stages of
+planning.
+
+The hardware designs for this board have been released under an open license;
+see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arch/arm/sa1100/serial_uart.rst b/Documentation/arch/arm/sa1100/serial_uart.rst
new file mode 100644
index 000000000000..ea983642b9be
--- /dev/null
+++ b/Documentation/arch/arm/sa1100/serial_uart.rst
@@ -0,0 +1,51 @@
+==================
+SA1100 serial port
+==================
+
+The SA1100 serial port had its major/minor numbers officially assigned::
+
+ > Date: Sun, 24 Sep 2000 21:40:27 -0700
+ > From: H. Peter Anvin <hpa@transmeta.com>
+ > To: Nicolas Pitre <nico@CAM.ORG>
+ > Cc: Device List Maintainer <device@lanana.org>
+ > Subject: Re: device
+ >
+ > Okay. Note that device numbers 204 and 205 are used for "low density
+ > serial devices", so you will have a range of minors on those majors (the
+ > tty device layer handles this just fine, so you don't have to worry about
+ > doing anything special.)
+ >
+ > So your assignments are:
+ >
+ > 204 char Low-density serial ports
+ > 5 = /dev/ttySA0 SA1100 builtin serial port 0
+ > 6 = /dev/ttySA1 SA1100 builtin serial port 1
+ > 7 = /dev/ttySA2 SA1100 builtin serial port 2
+ >
+ > 205 char Low-density serial ports (alternate device)
+ > 5 = /dev/cusa0 Callout device for ttySA0
+ > 6 = /dev/cusa1 Callout device for ttySA1
+ > 7 = /dev/cusa2 Callout device for ttySA2
+ >
+
+You must create those inodes in /dev on the root filesystem used
+by your SA1100-based device::
+
+ mknod ttySA0 c 204 5
+ mknod ttySA1 c 204 6
+ mknod ttySA2 c 204 7
+ mknod cusa0 c 205 5
+ mknod cusa1 c 205 6
+ mknod cusa2 c 205 7
+
+In addition to the creation of the appropriate device nodes above, you
+must ensure your user space applications make use of the correct device
+name. The classic example is the content of the /etc/inittab file where
+you might have a getty process started on ttyS0.
+
+In this case:
+
+- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
+
+- don't forget to add 'ttySA0', 'console', or the appropriate tty name
+ in /etc/securetty for root to be allowed to login as well.
diff --git a/Documentation/arch/arm/samsung/bootloader-interface.rst b/Documentation/arch/arm/samsung/bootloader-interface.rst
new file mode 100644
index 000000000000..a56f325dae78
--- /dev/null
+++ b/Documentation/arch/arm/samsung/bootloader-interface.rst
@@ -0,0 +1,81 @@
+==========================================================
+Interface between kernel and boot loaders on Exynos boards
+==========================================================
+
+Author: Krzysztof Kozlowski
+
+Date : 6 June 2015
+
+The document tries to describe currently used interface between Linux kernel
+and boot loaders on Samsung Exynos based boards. This is not a definition
+of interface but rather a description of existing state, a reference
+for information purpose only.
+
+In the document "boot loader" means any of following: U-boot, proprietary
+SBOOT or any other firmware for ARMv7 and ARMv8 initializing the board before
+executing kernel.
+
+
+1. Non-Secure mode
+
+Address: sysram_ns_base_addr
+
+============= ============================================ ==================
+Offset Value Purpose
+============= ============================================ ==================
+0x08 exynos_cpu_resume_ns, mcpm_entry_point System suspend
+0x0c 0x00000bad (Magic cookie) System suspend
+0x1c exynos4_secondary_startup Secondary CPU boot
+0x1c + 4*cpu exynos4_secondary_startup (Exynos4412) Secondary CPU boot
+0x20 0xfcba0d10 (Magic cookie) AFTR
+0x24 exynos_cpu_resume_ns AFTR
+0x28 + 4*cpu 0x8 (Magic cookie, Exynos3250) AFTR
+0x28 0x0 or last value during resume (Exynos542x) System suspend
+============= ============================================ ==================
+
+
+2. Secure mode
+
+Address: sysram_base_addr
+
+============= ============================================ ==================
+Offset Value Purpose
+============= ============================================ ==================
+0x00 exynos4_secondary_startup Secondary CPU boot
+0x04 exynos4_secondary_startup (Exynos542x) Secondary CPU boot
+4*cpu exynos4_secondary_startup (Exynos4412) Secondary CPU boot
+0x20 exynos_cpu_resume (Exynos4210 r1.0) AFTR
+0x24 0xfcba0d10 (Magic cookie, Exynos4210 r1.0) AFTR
+============= ============================================ ==================
+
+Address: pmu_base_addr
+
+============= ============================================ ==================
+Offset Value Purpose
+============= ============================================ ==================
+0x0800 exynos_cpu_resume AFTR, suspend
+0x0800 mcpm_entry_point (Exynos542x with MCPM) AFTR, suspend
+0x0804 0xfcba0d10 (Magic cookie) AFTR
+0x0804 0x00000bad (Magic cookie) System suspend
+0x0814 exynos4_secondary_startup (Exynos4210 r1.1) Secondary CPU boot
+0x0818 0xfcba0d10 (Magic cookie, Exynos4210 r1.1) AFTR
+0x081C exynos_cpu_resume (Exynos4210 r1.1) AFTR
+============= ============================================ ==================
+
+3. Other (regardless of secure/non-secure mode)
+
+Address: pmu_base_addr
+
+============= =============================== ===============================
+Offset Value Purpose
+============= =============================== ===============================
+0x0908 Non-zero Secondary CPU boot up indicator
+ on Exynos3250 and Exynos542x
+============= =============================== ===============================
+
+
+4. Glossary
+
+AFTR - ARM Off Top Running, a low power mode, Cortex cores and many other
+modules are power gated, except the TOP modules
+MCPM - Multi-Cluster Power Management
diff --git a/Documentation/arm/Samsung/clksrc-change-registers.awk b/Documentation/arch/arm/samsung/clksrc-change-registers.awk
index 7be1b8aa7cd9..7be1b8aa7cd9 100755
--- a/Documentation/arm/Samsung/clksrc-change-registers.awk
+++ b/Documentation/arch/arm/samsung/clksrc-change-registers.awk
diff --git a/Documentation/arch/arm/samsung/gpio.rst b/Documentation/arch/arm/samsung/gpio.rst
new file mode 100644
index 000000000000..27fae0d50361
--- /dev/null
+++ b/Documentation/arch/arm/samsung/gpio.rst
@@ -0,0 +1,32 @@
+===========================
+Samsung GPIO implementation
+===========================
+
+Introduction
+------------
+
+This outlines the Samsung GPIO implementation and the architecture
+specific calls provided alongside the drivers/gpio core.
+
+
+GPIOLIB integration
+-------------------
+
+The gpio implementation uses gpiolib as much as possible, only providing
+specific calls for the items that require Samsung specific handling, such
+as pin special-function or pull resistor control.
+
+GPIO numbering is synchronised between the Samsung and gpiolib system.
+
+
+PIN configuration
+-----------------
+
+Pin configuration is specific to the Samsung architecture, with each SoC
+registering the necessary information for the core gpio configuration
+implementation to configure pins as necessary.
+
+The s3c_gpio_cfgpin() and s3c_gpio_setpull() provide the means for a
+driver or machine to change gpio configuration.
+
+See arch/arm/mach-s3c/gpio-cfg.h for more information on these functions.
diff --git a/Documentation/arch/arm/samsung/index.rst b/Documentation/arch/arm/samsung/index.rst
new file mode 100644
index 000000000000..8142cce3d23e
--- /dev/null
+++ b/Documentation/arch/arm/samsung/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Samsung SoC
+===========
+
+.. toctree::
+ :maxdepth: 1
+
+ gpio
+ bootloader-interface
+ overview
diff --git a/Documentation/arch/arm/samsung/overview.rst b/Documentation/arch/arm/samsung/overview.rst
new file mode 100644
index 000000000000..8b15a190169b
--- /dev/null
+++ b/Documentation/arch/arm/samsung/overview.rst
@@ -0,0 +1,76 @@
+==========================
+Samsung ARM Linux Overview
+==========================
+
+Introduction
+------------
+
+ The Samsung range of ARM SoCs spans many similar devices, from the initial
+ ARM9 through to the newest ARM cores. This document shows an overview of
+ the current kernel support, how to use it and where to find the code
+ that supports this.
+
+ The currently supported SoCs are:
+
+ - S3C64XX: S3C6400 and S3C6410
+ - S5PC110 / S5PV210
+
+
+Configuration
+-------------
+
+ A number of configurations are supplied, as there is no current way of
+ unifying all the SoCs into one kernel.
+
+ s5pc110_defconfig
+ - S5PC110 specific default configuration
+ s5pv210_defconfig
+ - S5PV210 specific default configuration
+
+
+Layout
+------
+
+ The directory layout is currently being restructured, and consists of
+ several platform directories and then the machine specific directories
+ of the CPUs being built for.
+
+ plat-samsung provides the base for all the implementations, and is the
+ last in the line of include directories that are processed for the build
+ specific information. It contains the base clock, GPIO and device definitions
+ to get the system running.
+
+ plat-s5p is for s5p specific builds, and contains common support for the
+ S5P specific systems. Not all S5Ps use all the features in this directory
+ due to differences in the hardware.
+
+
+Layout changes
+--------------
+
+ The old plat-s3c and plat-s5pc1xx directories have been removed, with
+ support moved to either plat-samsung or plat-s5p as necessary. These moves
+ where to simplify the include and dependency issues involved with having
+ so many different platform directories.
+
+
+Port Contributors
+-----------------
+
+ Ben Dooks (BJD)
+ Vincent Sanders
+ Herbert Potzl
+ Arnaud Patard (RTP)
+ Roc Wu
+ Klaus Fetscher
+ Dimitry Andric
+ Shannon Holland
+ Guillaume Gourat (NexVision)
+ Christer Weinigel (wingel) (Acer N30)
+ Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
diff --git a/Documentation/arch/arm/setup.rst b/Documentation/arch/arm/setup.rst
new file mode 100644
index 000000000000..8e12ef3fb9a7
--- /dev/null
+++ b/Documentation/arch/arm/setup.rst
@@ -0,0 +1,108 @@
+=============================================
+Kernel initialisation parameters on ARM Linux
+=============================================
+
+The following document describes the kernel initialisation parameter
+structure, otherwise known as 'struct param_struct' which is used
+for most ARM Linux architectures.
+
+This structure is used to pass initialisation parameters from the
+kernel loader to the Linux kernel proper, and may be short lived
+through the kernel initialisation process. As a general rule, it
+should not be referenced outside of arch/arm/kernel/setup.c:setup_arch().
+
+There are a lot of parameters listed in there, and they are described
+below:
+
+ page_size
+ This parameter must be set to the page size of the machine, and
+ will be checked by the kernel.
+
+ nr_pages
+ This is the total number of pages of memory in the system. If
+ the memory is banked, then this should contain the total number
+ of pages in the system.
+
+ If the system contains separate VRAM, this value should not
+ include this information.
+
+ ramdisk_size
+ This is now obsolete, and should not be used.
+
+ flags
+ Various kernel flags, including:
+
+ ===== ========================
+ bit 0 1 = mount root read only
+ bit 1 unused
+ bit 2 0 = load ramdisk
+ bit 3 0 = prompt for ramdisk
+ ===== ========================
+
+ rootdev
+ major/minor number pair of device to mount as the root filesystem.
+
+ video_num_cols / video_num_rows
+ These two together describe the character size of the dummy console,
+ or VGA console character size. They should not be used for any other
+ purpose.
+
+ It's generally a good idea to set these to be either standard VGA, or
+ the equivalent character size of your fbcon display. This then allows
+ all the bootup messages to be displayed correctly.
+
+ video_x / video_y
+ This describes the character position of cursor on VGA console, and
+ is otherwise unused. (should not be used for other console types, and
+ should not be used for other purposes).
+
+ memc_control_reg
+ MEMC chip control register for Acorn Archimedes and Acorn A5000
+ based machines. May be used differently by different architectures.
+
+ sounddefault
+ Default sound setting on Acorn machines. May be used differently by
+ different architectures.
+
+ adfsdrives
+ Number of ADFS/MFM disks. May be used differently by different
+ architectures.
+
+ bytes_per_char_h / bytes_per_char_v
+ These are now obsolete, and should not be used.
+
+ pages_in_bank[4]
+ Number of pages in each bank of the systems memory (used for RiscPC).
+ This is intended to be used on systems where the physical memory
+ is non-contiguous from the processors point of view.
+
+ pages_in_vram
+ Number of pages in VRAM (used on Acorn RiscPC). This value may also
+ be used by loaders if the size of the video RAM can't be obtained
+ from the hardware.
+
+ initrd_start / initrd_size
+ This describes the kernel virtual start address and size of the
+ initial ramdisk.
+
+ rd_start
+ Start address in sectors of the ramdisk image on a floppy disk.
+
+ system_rev
+ system revision number.
+
+ system_serial_low / system_serial_high
+ system 64-bit serial number
+
+ mem_fclk_21285
+ The speed of the external oscillator to the 21285 (footbridge),
+ which control's the speed of the memory bus, timer & serial port.
+ Depending upon the speed of the cpu its value can be between
+ 0-66 MHz. If no params are passed or a value of zero is passed,
+ then a value of 50 Mhz is the default on 21285 architectures.
+
+ paths[8][128]
+ These are now obsolete, and should not be used.
+
+ commandline
+ Kernel command line parameters. Details can be found elsewhere.
diff --git a/Documentation/arch/arm/spear/overview.rst b/Documentation/arch/arm/spear/overview.rst
new file mode 100644
index 000000000000..1a77f6b213b6
--- /dev/null
+++ b/Documentation/arch/arm/spear/overview.rst
@@ -0,0 +1,66 @@
+========================
+SPEAr ARM Linux Overview
+========================
+
+Introduction
+------------
+
+ SPEAr (Structured Processor Enhanced Architecture).
+ weblink : http://www.st.com/spear
+
+ The ST Microelectronics SPEAr range of ARM9/CortexA9 System-on-Chip CPUs are
+ supported by the 'spear' platform of ARM Linux. Currently SPEAr1310,
+ SPEAr1340, SPEAr300, SPEAr310, SPEAr320 and SPEAr600 SOCs are supported.
+
+ Hierarchy in SPEAr is as follows:
+
+ SPEAr (Platform)
+
+ - SPEAr3XX (3XX SOC series, based on ARM9)
+ - SPEAr300 (SOC)
+ - SPEAr300 Evaluation Board
+ - SPEAr310 (SOC)
+ - SPEAr310 Evaluation Board
+ - SPEAr320 (SOC)
+ - SPEAr320 Evaluation Board
+ - SPEAr6XX (6XX SOC series, based on ARM9)
+ - SPEAr600 (SOC)
+ - SPEAr600 Evaluation Board
+ - SPEAr13XX (13XX SOC series, based on ARM CORTEXA9)
+ - SPEAr1310 (SOC)
+ - SPEAr1310 Evaluation Board
+ - SPEAr1340 (SOC)
+ - SPEAr1340 Evaluation Board
+
+Configuration
+-------------
+
+ A generic configuration is provided for each machine, and can be used as the
+ default by::
+
+ make spear13xx_defconfig
+ make spear3xx_defconfig
+ make spear6xx_defconfig
+
+Layout
+------
+
+ The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
+ SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
+ with headers in plat/.
+
+ Each machine series have a directory with name arch/arm/mach-spear followed by
+ series name. Like mach-spear3xx, mach-spear6xx and mach-spear13xx.
+
+ Common file for machines of spear3xx family is mach-spear3xx/spear3xx.c, for
+ spear6xx is mach-spear6xx/spear6xx.c and for spear13xx family is
+ mach-spear13xx/spear13xx.c. mach-spear* also contain soc/machine specific
+ files, like spear1310.c, spear1340.c spear300.c, spear310.c, spear320.c and
+ spear600.c. mach-spear* doesn't contains board specific files as they fully
+ support Flattened Device Tree.
+
+
+Document Author
+---------------
+
+ Viresh Kumar <vireshk@kernel.org>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arch/arm/sti/overview.rst b/Documentation/arch/arm/sti/overview.rst
new file mode 100644
index 000000000000..ae16aced800f
--- /dev/null
+++ b/Documentation/arch/arm/sti/overview.rst
@@ -0,0 +1,32 @@
+======================
+STi ARM Linux Overview
+======================
+
+Introduction
+------------
+
+ The ST Microelectronics Multimedia and Application Processors range of
+ CortexA9 System-on-Chip are supported by the 'STi' platform of
+ ARM Linux. Currently STiH407, STiH410 and STiH418 are supported.
+
+
+configuration
+-------------
+
+ The configuration for the STi platform is supported via the multi_v7_defconfig.
+
+Layout
+------
+
+ All the files for multiple machine families (STiH407, STiH410, and STiH418)
+ are located in the platform code contained in arch/arm/mach-sti
+
+ There is a generic board board-dt.c in the mach folder which support
+ Flattened Device Tree, which means, It works with any compatible board with
+ Device Trees.
+
+
+Document Author
+---------------
+
+ Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arch/arm/sti/stih407-overview.rst b/Documentation/arch/arm/sti/stih407-overview.rst
new file mode 100644
index 000000000000..027e75bc7b7c
--- /dev/null
+++ b/Documentation/arch/arm/sti/stih407-overview.rst
@@ -0,0 +1,19 @@
+================
+STiH407 Overview
+================
+
+Introduction
+------------
+
+ The STiH407 is the new generation of SoC for Multi-HD, AVC set-top boxes
+ and server/connected client application for satellite, cable, terrestrial
+ and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
+ - SATA2, USB 3.0, PCIe, Gbit Ethernet
+
+Document Author
+---------------
+
+ Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arch/arm/sti/stih418-overview.rst b/Documentation/arch/arm/sti/stih418-overview.rst
new file mode 100644
index 000000000000..b563c1f4fe5a
--- /dev/null
+++ b/Documentation/arch/arm/sti/stih418-overview.rst
@@ -0,0 +1,21 @@
+================
+STiH418 Overview
+================
+
+Introduction
+------------
+
+ The STiH418 is the new generation of SoC for UHDp60 set-top boxes
+ and server/connected client application for satellite, cable, terrestrial
+ and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.5 GHz quad core CPU (28nm)
+ - SATA2, USB 3.0, PCIe, Gbit Ethernet
+ - HEVC L5.1 Main 10
+ - VP9
+
+Document Author
+---------------
+
+ Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/stm32/overview.rst b/Documentation/arch/arm/stm32/overview.rst
index 85cfc8410798..85cfc8410798 100644
--- a/Documentation/arm/stm32/overview.rst
+++ b/Documentation/arch/arm/stm32/overview.rst
diff --git a/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
new file mode 100644
index 000000000000..301aa30890ae
--- /dev/null
+++ b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
@@ -0,0 +1,415 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+STM32 DMA-MDMA chaining
+=======================
+
+
+Introduction
+------------
+
+ This document describes the STM32 DMA-MDMA chaining feature. But before going
+ further, let's introduce the peripherals involved.
+
+ To offload data transfers from the CPU, STM32 microprocessors (MPUs) embed
+ direct memory access controllers (DMA).
+
+ STM32MP1 SoCs embed both STM32 DMA and STM32 MDMA controllers. STM32 DMA
+ request routing capabilities are enhanced by a DMA request multiplexer
+ (STM32 DMAMUX).
+
+ **STM32 DMAMUX**
+
+ STM32 DMAMUX routes any DMA request from a given peripheral to any STM32 DMA
+ controller (STM32MP1 counts two STM32 DMA controllers) channels.
+
+ **STM32 DMA**
+
+ STM32 DMA is mainly used to implement central data buffer storage (usually in
+ the system SRAM) for different peripheral. It can access external RAMs but
+ without the ability to generate convenient burst transfer ensuring the best
+ load of the AXI.
+
+ **STM32 MDMA**
+
+ STM32 MDMA (Master DMA) is mainly used to manage direct data transfers between
+ RAM data buffers without CPU intervention. It can also be used in a
+ hierarchical structure that uses STM32 DMA as first level data buffer
+ interfaces for AHB peripherals, while the STM32 MDMA acts as a second level
+ DMA with better performance. As a AXI/AHB master, STM32 MDMA can take control
+ of the AXI/AHB bus.
+
+
+Principles
+----------
+
+ STM32 DMA-MDMA chaining feature relies on the strengths of STM32 DMA and
+ STM32 MDMA controllers.
+
+ STM32 DMA has a circular Double Buffer Mode (DBM). At each end of transaction
+ (when DMA data counter - DMA_SxNDTR - reaches 0), the memory pointers
+ (configured with DMA_SxSM0AR and DMA_SxM1AR) are swapped and the DMA data
+ counter is automatically reloaded. This allows the SW or the STM32 MDMA to
+ process one memory area while the second memory area is being filled/used by
+ the STM32 DMA transfer.
+
+ With STM32 MDMA linked-list mode, a single request initiates the data array
+ (collection of nodes) to be transferred until the linked-list pointer for the
+ channel is null. The channel transfer complete of the last node is the end of
+ transfer, unless first and last nodes are linked to each other, in such a
+ case, the linked-list loops on to create a circular MDMA transfer.
+
+ STM32 MDMA has direct connections with STM32 DMA. This enables autonomous
+ communication and synchronization between peripherals, thus saving CPU
+ resources and bus congestion. Transfer Complete signal of STM32 DMA channel
+ can triggers STM32 MDMA transfer. STM32 MDMA can clear the request generated
+ by the STM32 DMA by writing to its Interrupt Clear register (whose address is
+ stored in MDMA_CxMAR, and bit mask in MDMA_CxMDR).
+
+ .. table:: STM32 MDMA interconnect table with STM32 DMA
+
+ +--------------+----------------+-----------+------------+
+ | STM32 DMAMUX | STM32 DMA | STM32 DMA | STM32 MDMA |
+ | channels | channels | Transfer | request |
+ | | | complete | |
+ | | | signal | |
+ +==============+================+===========+============+
+ | Channel *0* | DMA1 channel 0 | dma1_tcf0 | *0x00* |
+ +--------------+----------------+-----------+------------+
+ | Channel *1* | DMA1 channel 1 | dma1_tcf1 | *0x01* |
+ +--------------+----------------+-----------+------------+
+ | Channel *2* | DMA1 channel 2 | dma1_tcf2 | *0x02* |
+ +--------------+----------------+-----------+------------+
+ | Channel *3* | DMA1 channel 3 | dma1_tcf3 | *0x03* |
+ +--------------+----------------+-----------+------------+
+ | Channel *4* | DMA1 channel 4 | dma1_tcf4 | *0x04* |
+ +--------------+----------------+-----------+------------+
+ | Channel *5* | DMA1 channel 5 | dma1_tcf5 | *0x05* |
+ +--------------+----------------+-----------+------------+
+ | Channel *6* | DMA1 channel 6 | dma1_tcf6 | *0x06* |
+ +--------------+----------------+-----------+------------+
+ | Channel *7* | DMA1 channel 7 | dma1_tcf7 | *0x07* |
+ +--------------+----------------+-----------+------------+
+ | Channel *8* | DMA2 channel 0 | dma2_tcf0 | *0x08* |
+ +--------------+----------------+-----------+------------+
+ | Channel *9* | DMA2 channel 1 | dma2_tcf1 | *0x09* |
+ +--------------+----------------+-----------+------------+
+ | Channel *10* | DMA2 channel 2 | dma2_tcf2 | *0x0A* |
+ +--------------+----------------+-----------+------------+
+ | Channel *11* | DMA2 channel 3 | dma2_tcf3 | *0x0B* |
+ +--------------+----------------+-----------+------------+
+ | Channel *12* | DMA2 channel 4 | dma2_tcf4 | *0x0C* |
+ +--------------+----------------+-----------+------------+
+ | Channel *13* | DMA2 channel 5 | dma2_tcf5 | *0x0D* |
+ +--------------+----------------+-----------+------------+
+ | Channel *14* | DMA2 channel 6 | dma2_tcf6 | *0x0E* |
+ +--------------+----------------+-----------+------------+
+ | Channel *15* | DMA2 channel 7 | dma2_tcf7 | *0x0F* |
+ +--------------+----------------+-----------+------------+
+
+ STM32 DMA-MDMA chaining feature then uses a SRAM buffer. STM32MP1 SoCs embed
+ three fast access static internal RAMs of various size, used for data storage.
+ Due to STM32 DMA legacy (within microcontrollers), STM32 DMA performances are
+ bad with DDR, while they are optimal with SRAM. Hence the SRAM buffer used
+ between STM32 DMA and STM32 MDMA. This buffer is split in two equal periods
+ and STM32 DMA uses one period while STM32 MDMA uses the other period
+ simultaneously.
+ ::
+
+ dma[1:2]-tcf[0:7]
+ .----------------.
+ ____________ ' _________ V____________
+ | STM32 DMA | / __|>_ \ | STM32 MDMA |
+ |------------| | / \ | |------------|
+ | DMA_SxM0AR |<=>| | SRAM | |<=>| []-[]...[] |
+ | DMA_SxM1AR | | \_____/ | | |
+ |____________| \___<|____/ |____________|
+
+ STM32 DMA-MDMA chaining uses (struct dma_slave_config).peripheral_config to
+ exchange the parameters needed to configure MDMA. These parameters are
+ gathered into a u32 array with three values:
+
+ * the STM32 MDMA request (which is actually the DMAMUX channel ID),
+ * the address of the STM32 DMA register to clear the Transfer Complete
+ interrupt flag,
+ * the mask of the Transfer Complete interrupt flag of the STM32 DMA channel.
+
+Device Tree updates for STM32 DMA-MDMA chaining support
+-------------------------------------------------------
+
+ **1. Allocate a SRAM buffer**
+
+ SRAM device tree node is defined in SoC device tree. You can refer to it in
+ your board device tree to define your SRAM pool.
+ ::
+
+ &sram {
+ my_foo_device_dma_pool: dma-sram@0 {
+ reg = <0x0 0x1000>;
+ };
+ };
+
+ Be careful of the start index, in case there are other SRAM consumers.
+ Define your pool size strategically: to optimise chaining, the idea is that
+ STM32 DMA and STM32 MDMA can work simultaneously, on each buffer of the
+ SRAM.
+ If the SRAM period is greater than the expected DMA transfer, then STM32 DMA
+ and STM32 MDMA will work sequentially instead of simultaneously. It is not a
+ functional issue but it is not optimal.
+
+ Don't forget to refer to your SRAM pool in your device node. You need to
+ define a new property.
+ ::
+
+ &my_foo_device {
+ ...
+ my_dma_pool = &my_foo_device_dma_pool;
+ };
+
+ Then get this SRAM pool in your foo driver and allocate your SRAM buffer.
+
+ **2. Allocate a STM32 DMA channel and a STM32 MDMA channel**
+
+ You need to define an extra channel in your device tree node, in addition to
+ the one you should already have for "classic" DMA operation.
+
+ This new channel must be taken from STM32 MDMA channels, so, the phandle of
+ the DMA controller to use is the MDMA controller's one.
+ ::
+
+ &my_foo_device {
+ [...]
+ my_dma_pool = &my_foo_device_dma_pool;
+ dmas = <&dmamux1 ...>, // STM32 DMA channel
+ <&mdma1 0 0x3 0x1200000a 0 0>; // + STM32 MDMA channel
+ };
+
+ Concerning STM32 MDMA bindings:
+
+ 1. The request line number : whatever the value here, it will be overwritten
+ by MDMA driver with the STM32 DMAMUX channel ID passed through
+ (struct dma_slave_config).peripheral_config
+
+ 2. The priority level : choose Very High (0x3) so that your channel will
+ take priority other the other during request arbitration
+
+ 3. A 32bit mask specifying the DMA channel configuration : source and
+ destination address increment, block transfer with 128 bytes per single
+ transfer
+
+ 4. The 32bit value specifying the register to be used to acknowledge the
+ request: it will be overwritten by MDMA driver, with the DMA channel
+ interrupt flag clear register address passed through
+ (struct dma_slave_config).peripheral_config
+
+ 5. The 32bit mask specifying the value to be written to acknowledge the
+ request: it will be overwritten by MDMA driver, with the DMA channel
+ Transfer Complete flag passed through
+ (struct dma_slave_config).peripheral_config
+
+Driver updates for STM32 DMA-MDMA chaining support in foo driver
+----------------------------------------------------------------
+
+ **0. (optional) Refactor the original sg_table if dmaengine_prep_slave_sg()**
+
+ In case of dmaengine_prep_slave_sg(), the original sg_table can't be used as
+ is. Two new sg_tables must be created from the original one. One for
+ STM32 DMA transfer (where memory address targets now the SRAM buffer instead
+ of DDR buffer) and one for STM32 MDMA transfer (where memory address targets
+ the DDR buffer).
+
+ The new sg_list items must fit SRAM period length. Here is an example for
+ DMA_DEV_TO_MEM:
+ ::
+
+ /*
+ * Assuming sgl and nents, respectively the initial scatterlist and its
+ * length.
+ * Assuming sram_dma_buf and sram_period, respectively the memory
+ * allocated from the pool for DMA usage, and the length of the period,
+ * which is half of the sram_buf size.
+ */
+ struct sg_table new_dma_sgt, new_mdma_sgt;
+ struct scatterlist *s, *_sgl;
+ dma_addr_t ddr_dma_buf;
+ u32 new_nents = 0, len;
+ int i;
+
+ /* Count the number of entries needed */
+ for_each_sg(sgl, s, nents, i)
+ if (sg_dma_len(s) > sram_period)
+ new_nents += DIV_ROUND_UP(sg_dma_len(s), sram_period);
+ else
+ new_nents++;
+
+ /* Create sg table for STM32 DMA channel */
+ ret = sg_alloc_table(&new_dma_sgt, new_nents, GFP_ATOMIC);
+ if (ret)
+ dev_err(dev, "DMA sg table alloc failed\n");
+
+ for_each_sg(new_dma_sgt.sgl, s, new_dma_sgt.nents, i) {
+ _sgl = sgl;
+ sg_dma_len(s) = min(sg_dma_len(_sgl), sram_period);
+ /* Targets the beginning = first half of the sram_buf */
+ s->dma_address = sram_buf;
+ /*
+ * Targets the second half of the sram_buf
+ * for odd indexes of the item of the sg_list
+ */
+ if (i & 1)
+ s->dma_address += sram_period;
+ }
+
+ /* Create sg table for STM32 MDMA channel */
+ ret = sg_alloc_table(&new_mdma_sgt, new_nents, GFP_ATOMIC);
+ if (ret)
+ dev_err(dev, "MDMA sg_table alloc failed\n");
+
+ _sgl = sgl;
+ len = sg_dma_len(sgl);
+ ddr_dma_buf = sg_dma_address(sgl);
+ for_each_sg(mdma_sgt.sgl, s, mdma_sgt.nents, i) {
+ size_t bytes = min_t(size_t, len, sram_period);
+
+ sg_dma_len(s) = bytes;
+ sg_dma_address(s) = ddr_dma_buf;
+ len -= bytes;
+
+ if (!len && sg_next(_sgl)) {
+ _sgl = sg_next(_sgl);
+ len = sg_dma_len(_sgl);
+ ddr_dma_buf = sg_dma_address(_sgl);
+ } else {
+ ddr_dma_buf += bytes;
+ }
+ }
+
+ Don't forget to release these new sg_tables after getting the descriptors
+ with dmaengine_prep_slave_sg().
+
+ **1. Set controller specific parameters**
+
+ First, use dmaengine_slave_config() with a struct dma_slave_config to
+ configure STM32 DMA channel. You just have to take care of DMA addresses,
+ the memory address (depending on the transfer direction) must point on your
+ SRAM buffer, and set (struct dma_slave_config).peripheral_size != 0.
+
+ STM32 DMA driver will check (struct dma_slave_config).peripheral_size to
+ determine if chaining is being used or not. If it is used, then STM32 DMA
+ driver fills (struct dma_slave_config).peripheral_config with an array of
+ three u32 : the first one containing STM32 DMAMUX channel ID, the second one
+ the channel interrupt flag clear register address, and the third one the
+ channel Transfer Complete flag mask.
+
+ Then, use dmaengine_slave_config with another struct dma_slave_config to
+ configure STM32 MDMA channel. Take care of DMA addresses, the device address
+ (depending on the transfer direction) must point on your SRAM buffer, and
+ the memory address must point to the buffer originally used for "classic"
+ DMA operation. Use the previous (struct dma_slave_config).peripheral_size
+ and .peripheral_config that have been updated by STM32 DMA driver, to set
+ (struct dma_slave_config).peripheral_size and .peripheral_config of the
+ struct dma_slave_config to configure STM32 MDMA channel.
+ ::
+
+ struct dma_slave_config dma_conf;
+ struct dma_slave_config mdma_conf;
+
+ memset(&dma_conf, 0, sizeof(dma_conf));
+ [...]
+ config.direction = DMA_DEV_TO_MEM;
+ config.dst_addr = sram_dma_buf; // SRAM buffer
+ config.peripheral_size = 1; // peripheral_size != 0 => chaining
+
+ dmaengine_slave_config(dma_chan, &dma_config);
+
+ memset(&mdma_conf, 0, sizeof(mdma_conf));
+ config.direction = DMA_DEV_TO_MEM;
+ mdma_conf.src_addr = sram_dma_buf; // SRAM buffer
+ mdma_conf.dst_addr = rx_dma_buf; // original memory buffer
+ mdma_conf.peripheral_size = dma_conf.peripheral_size; // <- dma_conf
+ mdma_conf.peripheral_config = dma_config.peripheral_config; // <- dma_conf
+
+ dmaengine_slave_config(mdma_chan, &mdma_conf);
+
+ **2. Get a descriptor for STM32 DMA channel transaction**
+
+ In the same way you get your descriptor for your "classic" DMA operation,
+ you just have to replace the original sg_list (in case of
+ dmaengine_prep_slave_sg()) with the new sg_list using SRAM buffer, or to
+ replace the original buffer address, length and period (in case of
+ dmaengine_prep_dma_cyclic()) with the new SRAM buffer.
+
+ **3. Get a descriptor for STM32 MDMA channel transaction**
+
+ If you previously get descriptor (for STM32 DMA) with
+
+ * dmaengine_prep_slave_sg(), then use dmaengine_prep_slave_sg() for
+ STM32 MDMA;
+ * dmaengine_prep_dma_cyclic(), then use dmaengine_prep_dma_cyclic() for
+ STM32 MDMA.
+
+ Use the new sg_list using SRAM buffer (in case of dmaengine_prep_slave_sg())
+ or, depending on the transfer direction, either the original DDR buffer (in
+ case of DMA_DEV_TO_MEM) or the SRAM buffer (in case of DMA_MEM_TO_DEV), the
+ source address being previously set with dmaengine_slave_config().
+
+ **4. Submit both transactions**
+
+ Before submitting your transactions, you may need to define on which
+ descriptor you want a callback to be called at the end of the transfer
+ (dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
+ Depending on the direction, set the callback on the descriptor that finishes
+ the overall transfer:
+
+ * DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
+ * DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
+
+ Then, submit the descriptors whatever the order, with dmaengine_tx_submit().
+
+ **5. Issue pending requests (and wait for callback notification)**
+
+ As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
+ STM32 MDMA channel before STM32 DMA channel.
+
+ If any, your callback will be called to warn you about the end of the overall
+ transfer or the period completion.
+
+ Don't forget to terminate both channels. STM32 DMA channel is configured in
+ cyclic Double-Buffer mode so it won't be disabled by HW, you need to terminate
+ it. STM32 MDMA channel will be stopped by HW in case of sg transfer, but not
+ in case of cyclic transfer. You can terminate it whatever the kind of transfer.
+
+ **STM32 DMA-MDMA chaining DMA_MEM_TO_DEV special case**
+
+ STM32 DMA-MDMA chaining in DMA_MEM_TO_DEV is a special case. Indeed, the
+ STM32 MDMA feeds the SRAM buffer with the DDR data, and the STM32 DMA reads
+ data from SRAM buffer. So some data (the first period) have to be copied in
+ SRAM buffer when the STM32 DMA starts to read.
+
+ A trick could be pausing the STM32 DMA channel (that will raise a Transfer
+ Complete signal, triggering the STM32 MDMA channel), but the first data read
+ by the STM32 DMA could be "wrong". The proper way is to prepare the first SRAM
+ period with dmaengine_prep_dma_memcpy(). Then this first period should be
+ "removed" from the sg or the cyclic transfer.
+
+ Due to this complexity, rather use the STM32 DMA-MDMA chaining for
+ DMA_DEV_TO_MEM and keep the "classic" DMA usage for DMA_MEM_TO_DEV, unless
+ you're not afraid.
+
+Resources
+---------
+
+ Application note, datasheet and reference manual are available on ST website
+ (STM32MP1_).
+
+ Dedicated focus on three application notes (AN5224_, AN4031_ & AN5001_)
+ dealing with STM32 DMAMUX, STM32 DMA and STM32 MDMA.
+
+.. _STM32MP1: https://www.st.com/en/microcontrollers-microprocessors/stm32mp1-series.html
+.. _AN5224: https://www.st.com/resource/en/application_note/an5224-stm32-dmamux-the-dma-request-router-stmicroelectronics.pdf
+.. _AN4031: https://www.st.com/resource/en/application_note/dm00046011-using-the-stm32f2-stm32f4-and-stm32f7-series-dma-controller-stmicroelectronics.pdf
+.. _AN5001: https://www.st.com/resource/en/application_note/an5001-stm32cube-expansion-package-for-stm32h7-series-mdma-stmicroelectronics.pdf
+
+:Authors:
+
+- Amelie Delaunay <amelie.delaunay@foss.st.com> \ No newline at end of file
diff --git a/Documentation/arm/stm32/stm32f429-overview.rst b/Documentation/arch/arm/stm32/stm32f429-overview.rst
index 18feda97f483..a7ebe8ea6697 100644
--- a/Documentation/arm/stm32/stm32f429-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f429-overview.rst
@@ -1,3 +1,4 @@
+==================
STM32F429 Overview
==================
@@ -21,6 +22,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F429_)
.. _STM32F429: http://www.st.com/web/en/catalog/mmc/FM141/SC1169/SS1577/LN1806?ecmp=stm32f429-439_pron_pr-ces2014_nov2013
-:Authors:
-
-Maxime Coquelin <mcoquelin.stm32@gmail.com>
+:Authors: Maxime Coquelin <mcoquelin.stm32@gmail.com>
diff --git a/Documentation/arm/stm32/stm32f746-overview.rst b/Documentation/arch/arm/stm32/stm32f746-overview.rst
index b5f4b6ce7656..335f0855a858 100644
--- a/Documentation/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f746-overview.rst
@@ -1,3 +1,4 @@
+==================
STM32F746 Overview
==================
@@ -14,7 +15,7 @@ It features:
- SD/MMC/SDIO support
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface
- LCD controller
@@ -28,6 +29,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F746_)
.. _STM32F746: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32f7-series/stm32f7x6/stm32f746ng.html
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32f769-overview.rst b/Documentation/arch/arm/stm32/stm32f769-overview.rst
index 228656ced2fe..ef31aadee68f 100644
--- a/Documentation/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f769-overview.rst
@@ -1,3 +1,4 @@
+==================
STM32F769 Overview
==================
@@ -14,7 +15,7 @@ It features:
- SD/MMC/SDIO support*2
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C*4, SPI*6, CAN*3 busses support
+- I2C*4, SPI*6, CAN*3 buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface*2
- LCD controller
@@ -30,6 +31,4 @@ Datasheet and reference manual are publicly available on ST website (STM32F769_)
.. _STM32F769: http://www.st.com/content/st_com/en/products/microcontrollers/stm32-32-bit-arm-cortex-mcus/stm32-high-performance-mcus/stm32f7-series/stm32f7x9/stm32f769ni.html
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arm/stm32/stm32h743-overview.rst b/Documentation/arch/arm/stm32/stm32h743-overview.rst
index 3458dc00095d..7659df24d362 100644
--- a/Documentation/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32h743-overview.rst
@@ -1,3 +1,4 @@
+==================
STM32H743 Overview
==================
@@ -14,7 +15,7 @@ It features:
- SD/MMC/SDIO support
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface
- LCD controller
@@ -29,6 +30,4 @@ Datasheet and reference manual are publicly available on ST website (STM32H743_)
.. _STM32H743: http://www.st.com/en/microcontrollers/stm32h7x3.html?querycriteria=productId=LN2033
-:Authors:
-
-Alexandre Torgue <alexandre.torgue@st.com>
+:Authors: Alexandre Torgue <alexandre.torgue@st.com>
diff --git a/Documentation/arch/arm/stm32/stm32h750-overview.rst b/Documentation/arch/arm/stm32/stm32h750-overview.rst
new file mode 100644
index 000000000000..be032b77d1f1
--- /dev/null
+++ b/Documentation/arch/arm/stm32/stm32h750-overview.rst
@@ -0,0 +1,34 @@
+==================
+STM32H750 Overview
+==================
+
+Introduction
+------------
+
+The STM32H750 is a Cortex-M7 MCU aimed at various applications.
+It features:
+
+- Cortex-M7 core running up to @480MHz
+- 128K internal flash, 1MBytes internal RAM
+- FMC controller to connect SDRAM, NOR and NAND memories
+- Dual mode QSPI
+- SD/MMC/SDIO support
+- Ethernet controller
+- USB OTFG FS & HS controllers
+- I2C, SPI, CAN buses support
+- Several 16 & 32 bits general purpose timers
+- Serial Audio interface
+- LCD controller
+- HDMI-CEC
+- SPDIFRX
+- DFSDM
+
+Resources
+---------
+
+Datasheet and reference manual are publicly available on ST website (STM32H750_).
+
+.. _STM32H750: https://www.st.com/en/microcontrollers-microprocessors/stm32h750-value-line.html
+
+:Authors: Dillon Min <dillon.minfei@gmail.com>
+
diff --git a/Documentation/arch/arm/stm32/stm32mp13-overview.rst b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
new file mode 100644
index 000000000000..b5e9589fb06f
--- /dev/null
+++ b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
@@ -0,0 +1,37 @@
+===================
+STM32MP13 Overview
+===================
+
+Introduction
+------------
+
+The STM32MP131/STM32MP133/STM32MP135 are Cortex-A MPU aimed at various applications.
+They feature:
+
+- One Cortex-A7 application core
+- Standard memories interface support
+- Standard connectivity, widely inherited from the STM32 MCU family
+- Comprehensive security support
+
+More details:
+
+- Cortex-A7 core running up to @900MHz
+- FMC controller to connect SDRAM, NOR and NAND memories
+- QSPI
+- SD/MMC/SDIO support
+- 2*Ethernet controller
+- CAN
+- ADC/DAC
+- USB EHCI/OHCI controllers
+- USB OTG
+- I2C, SPI, CAN buses support
+- Several general purpose timers
+- Serial Audio interface
+- LCD controller
+- DCMIPP
+- SPDIFRX
+- DFSDM
+
+:Authors:
+
+- Alexandre Torgue <alexandre.torgue@foss.st.com>
diff --git a/Documentation/arch/arm/stm32/stm32mp151-overview.rst b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
new file mode 100644
index 000000000000..b58c256ede9a
--- /dev/null
+++ b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
@@ -0,0 +1,36 @@
+===================
+STM32MP151 Overview
+===================
+
+Introduction
+------------
+
+The STM32MP151 is a Cortex-A MPU aimed at various applications.
+It features:
+
+- Single Cortex-A7 application core
+- Standard memories interface support
+- Standard connectivity, widely inherited from the STM32 MCU family
+- Comprehensive security support
+
+More details:
+
+- Cortex-A7 core running up to @800MHz
+- FMC controller to connect SDRAM, NOR and NAND memories
+- QSPI
+- SD/MMC/SDIO support
+- Ethernet controller
+- ADC/DAC
+- USB EHCI/OHCI controllers
+- USB OTG
+- I2C, SPI buses support
+- Several general purpose timers
+- Serial Audio interface
+- LCD-TFT controller
+- DCMIPP
+- SPDIFRX
+- DFSDM
+
+:Authors:
+
+- Roan van Dijk <roan@protonic.nl>
diff --git a/Documentation/arm/stm32/stm32mp157-overview.rst b/Documentation/arch/arm/stm32/stm32mp157-overview.rst
index 62e176d47ca7..f62fdc8e7d8d 100644
--- a/Documentation/arm/stm32/stm32mp157-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp157-overview.rst
@@ -1,3 +1,4 @@
+===================
STM32MP157 Overview
===================
diff --git a/Documentation/arch/arm/sunxi.rst b/Documentation/arch/arm/sunxi.rst
new file mode 100644
index 000000000000..b85d1e2f2d47
--- /dev/null
+++ b/Documentation/arch/arm/sunxi.rst
@@ -0,0 +1,170 @@
+==================
+ARM Allwinner SoCs
+==================
+
+This document lists all the ARM Allwinner SoCs that are currently
+supported in mainline by the Linux kernel. This document will also
+provide links to documentation and/or datasheet for these SoCs.
+
+SunXi family
+------------
+ Linux kernel mach directory: arch/arm/mach-sunxi
+
+ Flavors:
+
+ * ARM926 based SoCs
+ - Allwinner F20 (sun3i)
+
+ * Not Supported
+
+ * ARM Cortex-A8 based SoCs
+ - Allwinner A10 (sun4i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
+ * User Manual
+
+ http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
+
+ - Allwinner A10s (sun5i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
+
+ - Allwinner A13 / R8 (sun5i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
+ * User Manual
+
+ http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
+
+ - Next Thing Co GR8 (sun5i)
+
+ * Single ARM Cortex-A7 based SoCs
+ - Allwinner V3s (sun8i)
+
+ * Datasheet
+
+ http://linux-sunxi.org/File:Allwinner_V3s_Datasheet_V1.0.pdf
+
+ * Dual ARM Cortex-A7 based SoCs
+ - Allwinner A20 (sun7i)
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
+
+ - Allwinner A23 (sun8i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
+
+ * Quad ARM Cortex-A7 based SoCs
+ - Allwinner A31 (sun6i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
+
+ - Allwinner A31s (sun6i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
+
+ - Allwinner A33 (sun8i)
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
+
+ - Allwinner H2+ (sun8i)
+
+ * No document available now, but is known to be working properly with
+ H3 drivers and memory map.
+
+ - Allwinner H3 (sun8i)
+
+ * Datasheet
+
+ https://linux-sunxi.org/images/4/4b/Allwinner_H3_Datasheet_V1.2.pdf
+
+ - Allwinner R40 (sun8i)
+
+ * Datasheet
+
+ https://github.com/tinalinux/docs/raw/r40-v1.y/R40_Datasheet_V1.0.pdf
+
+ * User Manual
+
+ https://github.com/tinalinux/docs/raw/r40-v1.y/Allwinner_R40_User_Manual_V1.0.pdf
+
+ * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
+ - Allwinner A80
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
+
+ * Octa ARM Cortex-A7 based SoCs
+ - Allwinner A83T
+
+ * Datasheet
+
+ https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
+
+ * User Manual
+
+ https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
+
+ * Quad ARM Cortex-A53 based SoCs
+ - Allwinner A64
+
+ * Datasheet
+
+ http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
+
+ * User Manual
+
+ http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
+
+ - Allwinner H6
+
+ * Datasheet
+
+ https://linux-sunxi.org/images/5/5c/Allwinner_H6_V200_Datasheet_V1.1.pdf
+
+ * User Manual
+
+ https://linux-sunxi.org/images/4/46/Allwinner_H6_V200_User_Manual_V1.1.pdf
+
+ - Allwinner H616
+
+ * Datasheet
+
+ https://linux-sunxi.org/images/b/b9/H616_Datasheet_V1.0_cleaned.pdf
+
+ * User Manual
+
+ https://linux-sunxi.org/images/2/24/H616_User_Manual_V1.0_cleaned.pdf
diff --git a/Documentation/arch/arm/sunxi/clocks.rst b/Documentation/arch/arm/sunxi/clocks.rst
new file mode 100644
index 000000000000..dfe6d4887210
--- /dev/null
+++ b/Documentation/arch/arm/sunxi/clocks.rst
@@ -0,0 +1,57 @@
+=======================================================
+Frequently asked questions about the sunxi clock system
+=======================================================
+
+This document contains useful bits of information that people tend to ask
+about the sunxi clock system, as well as accompanying ASCII art when adequate.
+
+Q: Why is the main 24MHz oscillator gateable? Wouldn't that break the
+ system?
+
+A: The 24MHz oscillator allows gating to save power. Indeed, if gated
+ carelessly the system would stop functioning, but with the right
+ steps, one can gate it and keep the system running. Consider this
+ simplified suspend example:
+
+ While the system is operational, you would see something like::
+
+ 24MHz 32kHz
+ |
+ PLL1
+ \
+ \_ CPU Mux
+ |
+ [CPU]
+
+ When you are about to suspend, you switch the CPU Mux to the 32kHz
+ oscillator::
+
+ 24Mhz 32kHz
+ | |
+ PLL1 |
+ /
+ CPU Mux _/
+ |
+ [CPU]
+
+ Finally you can gate the main oscillator::
+
+ 32kHz
+ |
+ |
+ /
+ CPU Mux _/
+ |
+ [CPU]
+
+Q: Were can I learn more about the sunxi clocks?
+
+A: The linux-sunxi wiki contains a page documenting the clock registers,
+ you can find it at
+
+ http://linux-sunxi.org/A10/CCM
+
+ The authoritative source for information at this time is the ccmu driver
+ released by Allwinner, you can find it at
+
+ https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/arch/arm/swp_emulation.rst b/Documentation/arch/arm/swp_emulation.rst
new file mode 100644
index 000000000000..bf205e3de36e
--- /dev/null
+++ b/Documentation/arch/arm/swp_emulation.rst
@@ -0,0 +1,27 @@
+Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE)
+---------------------------------------------------------------------
+
+ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommends
+moving to the load-locked/store-conditional instructions LDREX and STREX.
+
+ARMv7 multiprocessing extensions introduce the ability to disable these
+instructions, triggering an undefined instruction exception when executed.
+Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB
+sequence. If a memory access fault (an abort) occurs, a segmentation fault is
+signalled to the triggering process.
+
+/proc/cpu/swp_emulation holds some statistics/information, including the PID of
+the last process to trigger the emulation to be invocated. For example::
+
+ Emulated SWP: 12
+ Emulated SWPB: 0
+ Aborted SWP{B}: 1
+ Last process: 314
+
+
+NOTE:
+ when accessing uncached shared regions, LDREX/STREX rely on an external
+ transaction monitoring block called a global monitor to maintain update
+ atomicity. If your system does not implement a global monitor, this option can
+ cause programs that perform SWP operations to uncached memory to deadlock, as
+ the STREX operation will always fail.
diff --git a/Documentation/arch/arm/tcm.rst b/Documentation/arch/arm/tcm.rst
new file mode 100644
index 000000000000..7ce17a248af9
--- /dev/null
+++ b/Documentation/arch/arm/tcm.rst
@@ -0,0 +1,161 @@
+==================================================
+ARM TCM (Tightly-Coupled Memory) handling in Linux
+==================================================
+
+Written by Linus Walleij <linus.walleij@stericsson.com>
+
+Some ARM SoCs have a so-called TCM (Tightly-Coupled Memory).
+This is usually just a few (4-64) KiB of RAM inside the ARM
+processor.
+
+Due to being embedded inside the CPU, the TCM has a
+Harvard-architecture, so there is an ITCM (instruction TCM)
+and a DTCM (data TCM). The DTCM can not contain any
+instructions, but the ITCM can actually contain data.
+The size of DTCM or ITCM is minimum 4KiB so the typical
+minimum configuration is 4KiB ITCM and 4KiB DTCM.
+
+ARM CPUs have special registers to read out status, physical
+location and size of TCM memories. arch/arm/include/asm/cputype.h
+defines a CPUID_TCM register that you can read out from the
+system control coprocessor. Documentation from ARM can be found
+at http://infocenter.arm.com, search for "TCM Status Register"
+to see documents for all CPUs. Reading this register you can
+determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
+in the machine.
+
+There is further a TCM region register (search for "TCM Region
+Registers" at the ARM site) that can report and modify the location
+size of TCM memories at runtime. This is used to read out and modify
+TCM location and size. Notice that this is not a MMU table: you
+actually move the physical location of the TCM around. At the
+place you put it, it will mask any underlying RAM from the
+CPU so it is usually wise not to overlap any physical RAM with
+the TCM.
+
+The TCM memory can then be remapped to another address again using
+the MMU, but notice that the TCM is often used in situations where
+the MMU is turned off. To avoid confusion the current Linux
+implementation will map the TCM 1 to 1 from physical to virtual
+memory in the location specified by the kernel. Currently Linux
+will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
+on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
+
+Newer versions of the region registers also support dividing these
+TCMs in two separate banks, so for example an 8KiB ITCM is divided
+into two 4KiB banks with its own control registers. The idea is to
+be able to lock and hide one of the banks for use by the secure
+world (TrustZone).
+
+TCM is used for a few things:
+
+- FIQ and other interrupt handlers that need deterministic
+ timing and cannot wait for cache misses.
+
+- Idle loops where all external RAM is set to self-refresh
+ retention mode, so only on-chip RAM is accessible by
+ the CPU and then we hang inside ITCM waiting for an
+ interrupt.
+
+- Other operations which implies shutting off or reconfiguring
+ the external RAM controller.
+
+There is an interface for using TCM on the ARM architecture
+in <asm/tcm.h>. Using this interface it is possible to:
+
+- Define the physical address and size of ITCM and DTCM.
+
+- Tag functions to be compiled into ITCM.
+
+- Tag data and constants to be allocated to DTCM and ITCM.
+
+- Have the remaining TCM RAM added to a special
+ allocation pool with gen_pool_create() and gen_pool_add()
+ and provide tcm_alloc() and tcm_free() for this
+ memory. Such a heap is great for things like saving
+ device state when shutting off device power domains.
+
+A machine that has TCM memory shall select HAVE_TCM from
+arch/arm/Kconfig for itself. Code that needs to use TCM shall
+#include <asm/tcm.h>
+
+Functions to go into itcm can be tagged like this:
+int __tcmfunc foo(int bar);
+
+Since these are marked to become long_calls and you may want
+to have functions called locally inside the TCM without
+wasting space, there is also the __tcmlocalfunc prefix that
+will make the call relative.
+
+Variables to go into dtcm can be tagged like this::
+
+ int __tcmdata foo;
+
+Constants can be tagged like this::
+
+ int __tcmconst foo;
+
+To put assembler into TCM just use::
+
+ .section ".tcm.text" or .section ".tcm.data"
+
+respectively.
+
+Example code::
+
+ #include <asm/tcm.h>
+
+ /* Uninitialized data */
+ static u32 __tcmdata tcmvar;
+ /* Initialized data */
+ static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+ /* Constant */
+ static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+
+ static void __tcmlocalfunc tcm_to_tcm(void)
+ {
+ int i;
+ for (i = 0; i < 100; i++)
+ tcmvar ++;
+ }
+
+ static void __tcmfunc hello_tcm(void)
+ {
+ /* Some abstract code that runs in ITCM */
+ int i;
+ for (i = 0; i < 100; i++) {
+ tcmvar ++;
+ }
+ tcm_to_tcm();
+ }
+
+ static void __init test_tcm(void)
+ {
+ u32 *tcmem;
+ int i;
+
+ hello_tcm();
+ printk("Hello TCM executed from ITCM RAM\n");
+
+ printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
+ tcmvar = 0xDEADBEEFU;
+ printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
+
+ printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
+
+ printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
+
+ /* Allocate some TCM memory from the pool */
+ tcmem = tcm_alloc(20);
+ if (tcmem) {
+ printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
+ tcmem[0] = 0xDEADBEEFU;
+ tcmem[1] = 0x2BADBABEU;
+ tcmem[2] = 0xCAFEBABEU;
+ tcmem[3] = 0xDEADBEEFU;
+ tcmem[4] = 0x2BADBABEU;
+ for (i = 0; i < 5; i++)
+ printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
+ tcm_free(tcmem, 20);
+ }
+ }
diff --git a/Documentation/arch/arm/uefi.rst b/Documentation/arch/arm/uefi.rst
new file mode 100644
index 000000000000..2b7ad9bd7cd2
--- /dev/null
+++ b/Documentation/arch/arm/uefi.rst
@@ -0,0 +1,72 @@
+================================================
+The Unified Extensible Firmware Interface (UEFI)
+================================================
+
+UEFI, the Unified Extensible Firmware Interface, is a specification
+governing the behaviours of compatible firmware interfaces. It is
+maintained by the UEFI Forum - http://www.uefi.org/.
+
+UEFI is an evolution of its predecessor 'EFI', so the terms EFI and
+UEFI are used somewhat interchangeably in this document and associated
+source code. As a rule, anything new uses 'UEFI', whereas 'EFI' refers
+to legacy code or specifications.
+
+UEFI support in Linux
+=====================
+Booting on a platform with firmware compliant with the UEFI specification
+makes it possible for the kernel to support additional features:
+
+- UEFI Runtime Services
+- Retrieving various configuration information through the standardised
+ interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
+
+For actually enabling [U]EFI support, enable:
+
+- CONFIG_EFI=y
+- CONFIG_EFIVAR_FS=y or m
+
+The implementation depends on receiving information about the UEFI environment
+in a Flattened Device Tree (FDT) - so is only available with CONFIG_OF.
+
+UEFI stub
+=========
+The "stub" is a feature that extends the Image/zImage into a valid UEFI
+PE/COFF executable, including a loader application that makes it possible to
+load the kernel directly from the UEFI shell, boot menu, or one of the
+lightweight bootloaders like Gummiboot or rEFInd.
+
+The kernel image built with stub support remains a valid kernel image for
+booting in non-UEFI environments.
+
+UEFI kernel support on ARM
+==========================
+UEFI kernel support on the ARM architectures (arm and arm64) is only available
+when boot is performed through the stub.
+
+When booting in UEFI mode, the stub deletes any memory nodes from a provided DT.
+Instead, the kernel reads the UEFI memory map.
+
+The stub populates the FDT /chosen node with (and the kernel scans for) the
+following parameters:
+
+========================== ====== ===========================================
+Name Type Description
+========================== ====== ===========================================
+linux,uefi-system-table 64-bit Physical address of the UEFI System Table.
+
+linux,uefi-mmap-start 64-bit Physical address of the UEFI memory map,
+ populated by the UEFI GetMemoryMap() call.
+
+linux,uefi-mmap-size 32-bit Size in bytes of the UEFI memory map
+ pointed to in previous entry.
+
+linux,uefi-mmap-desc-size 32-bit Size in bytes of each entry in the UEFI
+ memory map.
+
+linux,uefi-mmap-desc-ver 32-bit Version of the mmap descriptor format.
+
+kaslr-seed 64-bit Entropy used to randomize the kernel image
+ base address location.
+
+bootargs String Kernel command line
+========================== ====== ===========================================
diff --git a/Documentation/arch/arm/vfp/release-notes.rst b/Documentation/arch/arm/vfp/release-notes.rst
new file mode 100644
index 000000000000..c6b04937cee3
--- /dev/null
+++ b/Documentation/arch/arm/vfp/release-notes.rst
@@ -0,0 +1,57 @@
+===============================================
+Release notes for Linux Kernel VFP support code
+===============================================
+
+Date: 20 May 2004
+
+Author: Russell King
+
+This is the first release of the Linux Kernel VFP support code. It
+provides support for the exceptions bounced from VFP hardware found
+on ARM926EJ-S.
+
+This release has been validated against the SoftFloat-2b library by
+John R. Hauser using the TestFloat-2a test suite. Details of this
+library and test suite can be found at:
+
+ http://www.jhauser.us/arithmetic/SoftFloat.html
+
+The operations which have been tested with this package are:
+
+ - fdiv
+ - fsub
+ - fadd
+ - fmul
+ - fcmp
+ - fcmpe
+ - fcvtd
+ - fcvts
+ - fsito
+ - ftosi
+ - fsqrt
+
+All the above pass softfloat tests with the following exceptions:
+
+- fadd/fsub shows some differences in the handling of +0 / -0 results
+ when input operands differ in signs.
+- the handling of underflow exceptions is slightly different. If a
+ result underflows before rounding, but becomes a normalised number
+ after rounding, we do not signal an underflow exception.
+
+Other operations which have been tested by basic assembly-only tests
+are:
+
+ - fcpy
+ - fabs
+ - fneg
+ - ftoui
+ - ftosiz
+ - ftouiz
+
+The combination operations have not been tested:
+
+ - fmac
+ - fnmac
+ - fmsc
+ - fnmsc
+ - fnmul
diff --git a/Documentation/arch/arm/vlocks.rst b/Documentation/arch/arm/vlocks.rst
new file mode 100644
index 000000000000..737aa8661a21
--- /dev/null
+++ b/Documentation/arch/arm/vlocks.rst
@@ -0,0 +1,212 @@
+======================================
+vlocks for Bare-Metal Mutual Exclusion
+======================================
+
+Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
+mechanism, with reasonable but minimal requirements on the memory
+system.
+
+These are intended to be used to coordinate critical activity among CPUs
+which are otherwise non-coherent, in situations where the hardware
+provides no other mechanism to support this and ordinary spinlocks
+cannot be used.
+
+
+vlocks make use of the atomicity provided by the memory system for
+writes to a single memory location. To arbitrate, every CPU "votes for
+itself", by storing a unique number to a common memory location. The
+final value seen in that memory location when all the votes have been
+cast identifies the winner.
+
+In order to make sure that the election produces an unambiguous result
+in finite time, a CPU will only enter the election in the first place if
+no winner has been chosen and the election does not appear to have
+started yet.
+
+
+Algorithm
+---------
+
+The easiest way to explain the vlocks algorithm is with some pseudo-code::
+
+
+ int currently_voting[NR_CPUS] = { 0, };
+ int last_vote = -1; /* no votes yet */
+
+ bool vlock_trylock(int this_cpu)
+ {
+ /* signal our desire to vote */
+ currently_voting[this_cpu] = 1;
+ if (last_vote != -1) {
+ /* someone already volunteered himself */
+ currently_voting[this_cpu] = 0;
+ return false; /* not ourself */
+ }
+
+ /* let's suggest ourself */
+ last_vote = this_cpu;
+ currently_voting[this_cpu] = 0;
+
+ /* then wait until everyone else is done voting */
+ for_each_cpu(i) {
+ while (currently_voting[i] != 0)
+ /* wait */;
+ }
+
+ /* result */
+ if (last_vote == this_cpu)
+ return true; /* we won */
+ return false;
+ }
+
+ bool vlock_unlock(void)
+ {
+ last_vote = -1;
+ }
+
+
+The currently_voting[] array provides a way for the CPUs to determine
+whether an election is in progress, and plays a role analogous to the
+"entering" array in Lamport's bakery algorithm [1].
+
+However, once the election has started, the underlying memory system
+atomicity is used to pick the winner. This avoids the need for a static
+priority rule to act as a tie-breaker, or any counters which could
+overflow.
+
+As long as the last_vote variable is globally visible to all CPUs, it
+will contain only one value that won't change once every CPU has cleared
+its currently_voting flag.
+
+
+Features and limitations
+------------------------
+
+ * vlocks are not intended to be fair. In the contended case, it is the
+ _last_ CPU which attempts to get the lock which will be most likely
+ to win.
+
+ vlocks are therefore best suited to situations where it is necessary
+ to pick a unique winner, but it does not matter which CPU actually
+ wins.
+
+ * Like other similar mechanisms, vlocks will not scale well to a large
+ number of CPUs.
+
+ vlocks can be cascaded in a voting hierarchy to permit better scaling
+ if necessary, as in the following hypothetical example for 4096 CPUs::
+
+ /* first level: local election */
+ my_town = towns[(this_cpu >> 4) & 0xf];
+ I_won = vlock_trylock(my_town, this_cpu & 0xf);
+ if (I_won) {
+ /* we won the town election, let's go for the state */
+ my_state = states[(this_cpu >> 8) & 0xf];
+ I_won = vlock_lock(my_state, this_cpu & 0xf));
+ if (I_won) {
+ /* and so on */
+ I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
+ if (I_won) {
+ /* ... */
+ }
+ vlock_unlock(the_whole_country);
+ }
+ vlock_unlock(my_state);
+ }
+ vlock_unlock(my_town);
+
+
+ARM implementation
+------------------
+
+The current ARM implementation [2] contains some optimisations beyond
+the basic algorithm:
+
+ * By packing the members of the currently_voting array close together,
+ we can read the whole array in one transaction (providing the number
+ of CPUs potentially contending the lock is small enough). This
+ reduces the number of round-trips required to external memory.
+
+ In the ARM implementation, this means that we can use a single load
+ and comparison::
+
+ LDR Rt, [Rn]
+ CMP Rt, #0
+
+ ...in place of code equivalent to::
+
+ LDRB Rt, [Rn]
+ CMP Rt, #0
+ LDRBEQ Rt, [Rn, #1]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #2]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #3]
+ CMPEQ Rt, #0
+
+ This cuts down on the fast-path latency, as well as potentially
+ reducing bus contention in contended cases.
+
+ The optimisation relies on the fact that the ARM memory system
+ guarantees coherency between overlapping memory accesses of
+ different sizes, similarly to many other architectures. Note that
+ we do not care which element of currently_voting appears in which
+ bits of Rt, so there is no need to worry about endianness in this
+ optimisation.
+
+ If there are too many CPUs to read the currently_voting array in
+ one transaction then multiple transactions are still required. The
+ implementation uses a simple loop of word-sized loads for this
+ case. The number of transactions is still fewer than would be
+ required if bytes were loaded individually.
+
+
+ In principle, we could aggregate further by using LDRD or LDM, but
+ to keep the code simple this was not attempted in the initial
+ implementation.
+
+
+ * vlocks are currently only used to coordinate between CPUs which are
+ unable to enable their caches yet. This means that the
+ implementation removes many of the barriers which would be required
+ when executing the algorithm in cached memory.
+
+ packing of the currently_voting array does not work with cached
+ memory unless all CPUs contending the lock are cache-coherent, due
+ to cache writebacks from one CPU clobbering values written by other
+ CPUs. (Though if all the CPUs are cache-coherent, you should be
+ probably be using proper spinlocks instead anyway).
+
+
+ * The "no votes yet" value used for the last_vote variable is 0 (not
+ -1 as in the pseudocode). This allows statically-allocated vlocks
+ to be implicitly initialised to an unlocked state simply by putting
+ them in .bss.
+
+ An offset is added to each CPU's ID for the purpose of setting this
+ variable, so that no CPU uses the value 0 for its ID.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, for
+use in ARM-based big.LITTLE platforms, with review and input gratefully
+received from Nicolas Pitre and Achin Gupta. Thanks to Nicolas for
+grabbing most of this text out of the relevant mail thread and writing
+up the pseudocode.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
+
+
+References
+----------
+
+[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
+ Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
+
+ https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
+
+[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/Documentation/arch/arm64/acpi_object_usage.rst b/Documentation/arch/arm64/acpi_object_usage.rst
new file mode 100644
index 000000000000..06d8a87971ef
--- /dev/null
+++ b/Documentation/arch/arm64/acpi_object_usage.rst
@@ -0,0 +1,809 @@
+===========
+ACPI Tables
+===========
+
+The expectations of individual ACPI tables are discussed in the list that
+follows.
+
+If a section number is used, it refers to a section number in the ACPI
+specification where the object is defined. If "Signature Reserved" is used,
+the table signature (the first four bytes of the table) is the only portion
+of the table recognized by the specification, and the actual table is defined
+outside of the UEFI Forum (see Section 5.2.6 of the specification).
+
+For ACPI on arm64, tables also fall into the following categories:
+
+ - Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
+
+ - Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
+
+ - Optional: AGDI, BGRT, CEDT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT,
+ HMAT, IBFT, IORT, MCHI, MPAM, MPST, MSCT, NFIT, PMTT, PPTT, RASF, SBST,
+ SDEI, SLIT, SPMI, SRAT, STAO, TCPA, TPM2, UEFI, XENV
+
+ - Not supported: AEST, APMT, BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT,
+ MSDM, OEMx, PDTT, PSDT, RAS2, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+
+====== ========================================================================
+Table Usage for ARMv8 Linux
+====== ========================================================================
+AEST Signature Reserved (signature == "AEST")
+
+ **Arm Error Source Table**
+
+ This table informs the OS of any error nodes in the system that are
+ compliant with the Arm RAS architecture.
+
+AGDI Signature Reserved (signature == "AGDI")
+
+ **Arm Generic diagnostic Dump and Reset Device Interface Table**
+
+ This table describes a non-maskable event, that is used by the platform
+ firmware, to request the OS to generate a diagnostic dump and reset the device.
+
+APMT Signature Reserved (signature == "APMT")
+
+ **Arm Performance Monitoring Table**
+
+ This table describes the properties of PMU support implemented by
+ components in the system.
+
+BERT Section 18.3 (signature == "BERT")
+
+ **Boot Error Record Table**
+
+ Must be supplied if RAS support is provided by the platform. It
+ is recommended this table be supplied.
+
+BOOT Signature Reserved (signature == "BOOT")
+
+ **simple BOOT flag table**
+
+ Microsoft only table, will not be supported.
+
+BGRT Section 5.2.22 (signature == "BGRT")
+
+ **Boot Graphics Resource Table**
+
+ Optional, not currently supported, with no real use-case for an
+ ARM server.
+
+CEDT Signature Reserved (signature == "CEDT")
+
+ **CXL Early Discovery Table**
+
+ This table allows the OS to discover any CXL Host Bridges and the Host
+ Bridge registers.
+
+CPEP Section 5.2.18 (signature == "CPEP")
+
+ **Corrected Platform Error Polling table**
+
+ Optional, not currently supported, and not recommended until such
+ time as ARM-compatible hardware is available, and the specification
+ suitably modified.
+
+CSRT Signature Reserved (signature == "CSRT")
+
+ **Core System Resources Table**
+
+ Optional, not currently supported.
+
+DBG2 Signature Reserved (signature == "DBG2")
+
+ **DeBuG port table 2**
+
+ License has changed and should be usable. Optional if used instead
+ of earlycon=<device> on the command line.
+
+DBGP Signature Reserved (signature == "DBGP")
+
+ **DeBuG Port table**
+
+ Microsoft only table, will not be supported.
+
+DSDT Section 5.2.11.1 (signature == "DSDT")
+
+ **Differentiated System Description Table**
+
+ A DSDT is required; see also SSDT.
+
+ ACPI tables contain only one DSDT but can contain one or more SSDTs,
+ which are optional. Each SSDT can only add to the ACPI namespace,
+ but cannot modify or replace anything in the DSDT.
+
+DMAR Signature Reserved (signature == "DMAR")
+
+ **DMA Remapping table**
+
+ x86 only table, will not be supported.
+
+DRTM Signature Reserved (signature == "DRTM")
+
+ **Dynamic Root of Trust for Measurement table**
+
+ Optional, not currently supported.
+
+ECDT Section 5.2.16 (signature == "ECDT")
+
+ **Embedded Controller Description Table**
+
+ Optional, not currently supported, but could be used on ARM if and
+ only if one uses the GPE_BIT field to represent an IRQ number, since
+ there are no GPE blocks defined in hardware reduced mode. This would
+ need to be modified in the ACPI specification.
+
+EINJ Section 18.6 (signature == "EINJ")
+
+ **Error Injection table**
+
+ This table is very useful for testing platform response to error
+ conditions; it allows one to inject an error into the system as
+ if it had actually occurred. However, this table should not be
+ shipped with a production system; it should be dynamically loaded
+ and executed with the ACPICA tools only during testing.
+
+ERST Section 18.5 (signature == "ERST")
+
+ **Error Record Serialization Table**
+
+ On a platform supports RAS, this table must be supplied if it is not
+ UEFI-based; if it is UEFI-based, this table may be supplied. When this
+ table is not present, UEFI run time service will be utilized to save
+ and retrieve hardware error information to and from a persistent store.
+
+ETDT Signature Reserved (signature == "ETDT")
+
+ **Event Timer Description Table**
+
+ Obsolete table, will not be supported.
+
+FACS Section 5.2.10 (signature == "FACS")
+
+ **Firmware ACPI Control Structure**
+
+ It is unlikely that this table will be terribly useful. If it is
+ provided, the Global Lock will NOT be used since it is not part of
+ the hardware reduced profile, and only 64-bit address fields will
+ be considered valid.
+
+FADT Section 5.2.9 (signature == "FACP")
+
+ **Fixed ACPI Description Table**
+ Required for arm64.
+
+
+ The HW_REDUCED_ACPI flag must be set. All of the fields that are
+ to be ignored when HW_REDUCED_ACPI is set are expected to be set to
+ zero.
+
+ If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
+ used, not FIRMWARE_CTRL.
+
+ If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
+ filled in properly - that the PSCI_COMPLIANT flag is set and that
+ PSCI_USE_HVC is set or unset as needed (see table 5-37).
+
+ For the DSDT that is also required, the X_DSDT field is to be used,
+ not the DSDT field.
+
+FPDT Section 5.2.23 (signature == "FPDT")
+
+ **Firmware Performance Data Table**
+
+ Optional, useful for boot performance profiling.
+
+GTDT Section 5.2.24 (signature == "GTDT")
+
+ **Generic Timer Description Table**
+
+ Required for arm64.
+
+HEST Section 18.3.2 (signature == "HEST")
+
+ **Hardware Error Source Table**
+
+ ARM-specific error sources have been defined; please use those or the
+ PCI types such as type 6 (AER Root Port), 7 (AER Endpoint), or 8 (AER
+ Bridge), or use type 9 (Generic Hardware Error Source). Firmware first
+ error handling is possible if and only if Trusted Firmware is being
+ used on arm64.
+
+ Must be supplied if RAS support is provided by the platform. It
+ is recommended this table be supplied.
+
+HMAT Section 5.2.28 (signature == "HMAT")
+
+ **Heterogeneous Memory Attribute Table**
+
+ This table describes the memory attributes, such as memory side cache
+ attributes and bandwidth and latency details, related to Memory Proximity
+ Domains. The OS uses this information to optimize the system memory
+ configuration.
+
+HPET Signature Reserved (signature == "HPET")
+
+ **High Precision Event timer Table**
+
+ x86 only table, will not be supported.
+
+IBFT Signature Reserved (signature == "IBFT")
+
+ **iSCSI Boot Firmware Table**
+
+ Microsoft defined table, support TBD.
+
+IORT Signature Reserved (signature == "IORT")
+
+ **Input Output Remapping Table**
+
+ arm64 only table, required in order to describe IO topology, SMMUs,
+ and GIC ITSs, and how those various components are connected together,
+ such as identifying which components are behind which SMMUs/ITSs.
+ This table will only be required on certain SBSA platforms (e.g.,
+ when using GICv3-ITS and an SMMU); on SBSA Level 0 platforms, it
+ remains optional.
+
+IVRS Signature Reserved (signature == "IVRS")
+
+ **I/O Virtualization Reporting Structure**
+
+ x86_64 (AMD) only table, will not be supported.
+
+LPIT Signature Reserved (signature == "LPIT")
+
+ **Low Power Idle Table**
+
+ x86 only table as of ACPI 5.1; starting with ACPI 6.0, processor
+ descriptions and power states on ARM platforms should use the DSDT
+ and define processor container devices (_HID ACPI0010, Section 8.4,
+ and more specifically 8.4.3 and 8.4.4).
+
+MADT Section 5.2.12 (signature == "APIC")
+
+ **Multiple APIC Description Table**
+
+ Required for arm64. Only the GIC interrupt controller structures
+ should be used (types 0xA - 0xF).
+
+MCFG Signature Reserved (signature == "MCFG")
+
+ **Memory-mapped ConFiGuration space**
+
+ If the platform supports PCI/PCIe, an MCFG table is required.
+
+MCHI Signature Reserved (signature == "MCHI")
+
+ **Management Controller Host Interface table**
+
+ Optional, not currently supported.
+
+MPAM Signature Reserved (signature == "MPAM")
+
+ **Memory Partitioning And Monitoring table**
+
+ This table allows the OS to discover the MPAM controls implemented by
+ the subsystems.
+
+MPST Section 5.2.21 (signature == "MPST")
+
+ **Memory Power State Table**
+
+ Optional, not currently supported.
+
+MSCT Section 5.2.19 (signature == "MSCT")
+
+ **Maximum System Characteristic Table**
+
+ Optional, not currently supported.
+
+MSDM Signature Reserved (signature == "MSDM")
+
+ **Microsoft Data Management table**
+
+ Microsoft only table, will not be supported.
+
+NFIT Section 5.2.25 (signature == "NFIT")
+
+ **NVDIMM Firmware Interface Table**
+
+ Optional, not currently supported.
+
+OEMx Signature of "OEMx" only
+
+ **OEM Specific Tables**
+
+ All tables starting with a signature of "OEM" are reserved for OEM
+ use. Since these are not meant to be of general use but are limited
+ to very specific end users, they are not recommended for use and are
+ not supported by the kernel for arm64.
+
+PCCT Section 14.1 (signature == "PCCT)
+
+ **Platform Communications Channel Table**
+
+ Recommend for use on arm64; use of PCC is recommended when using CPPC
+ to control performance and power for platform processors.
+
+PDTT Section 5.2.29 (signature == "PDTT")
+
+ **Platform Debug Trigger Table**
+
+ This table describes PCC channels used to gather debug logs of
+ non-architectural features.
+
+
+PMTT Section 5.2.21.12 (signature == "PMTT")
+
+ **Platform Memory Topology Table**
+
+ Optional, not currently supported.
+
+PPTT Section 5.2.30 (signature == "PPTT")
+
+ **Processor Properties Topology Table**
+
+ This table provides the processor and cache topology.
+
+PSDT Section 5.2.11.3 (signature == "PSDT")
+
+ **Persistent System Description Table**
+
+ Obsolete table, will not be supported.
+
+RAS2 Section 5.2.21 (signature == "RAS2")
+
+ **RAS Features 2 table**
+
+ This table provides interfaces for the RAS capabilities implemented in
+ the platform.
+
+RASF Section 5.2.20 (signature == "RASF")
+
+ **RAS Feature table**
+
+ Optional, not currently supported.
+
+RSDP Section 5.2.5 (signature == "RSD PTR")
+
+ **Root System Description PoinTeR**
+
+ Required for arm64.
+
+RSDT Section 5.2.7 (signature == "RSDT")
+
+ **Root System Description Table**
+
+ Since this table can only provide 32-bit addresses, it is deprecated
+ on arm64, and will not be used. If provided, it will be ignored.
+
+SBST Section 5.2.14 (signature == "SBST")
+
+ **Smart Battery Subsystem Table**
+
+ Optional, not currently supported.
+
+SDEI Signature Reserved (signature == "SDEI")
+
+ **Software Delegated Exception Interface table**
+
+ This table advertises the presence of the SDEI interface.
+
+SLIC Signature Reserved (signature == "SLIC")
+
+ **Software LIcensing table**
+
+ Microsoft only table, will not be supported.
+
+SLIT Section 5.2.17 (signature == "SLIT")
+
+ **System Locality distance Information Table**
+
+ Optional in general, but required for NUMA systems.
+
+SPCR Signature Reserved (signature == "SPCR")
+
+ **Serial Port Console Redirection table**
+
+ Required for arm64.
+
+SPMI Signature Reserved (signature == "SPMI")
+
+ **Server Platform Management Interface table**
+
+ Optional, not currently supported.
+
+SRAT Section 5.2.16 (signature == "SRAT")
+
+ **System Resource Affinity Table**
+
+ Optional, but if used, only the GICC Affinity structures are read.
+ To support arm64 NUMA, this table is required.
+
+SSDT Section 5.2.11.2 (signature == "SSDT")
+
+ **Secondary System Description Table**
+
+ These tables are a continuation of the DSDT; these are recommended
+ for use with devices that can be added to a running system, but can
+ also serve the purpose of dividing up device descriptions into more
+ manageable pieces.
+
+ An SSDT can only ADD to the ACPI namespace. It cannot modify or
+ replace existing device descriptions already in the namespace.
+
+ These tables are optional, however. ACPI tables should contain only
+ one DSDT but can contain many SSDTs.
+
+STAO Signature Reserved (signature == "STAO")
+
+ **_STA Override table**
+
+ Optional, but only necessary in virtualized environments in order to
+ hide devices from guest OSs.
+
+TCPA Signature Reserved (signature == "TCPA")
+
+ **Trusted Computing Platform Alliance table**
+
+ Optional, not currently supported, and may need changes to fully
+ interoperate with arm64.
+
+TPM2 Signature Reserved (signature == "TPM2")
+
+ **Trusted Platform Module 2 table**
+
+ Optional, not currently supported, and may need changes to fully
+ interoperate with arm64.
+
+UEFI Signature Reserved (signature == "UEFI")
+
+ **UEFI ACPI data table**
+
+ Optional, not currently supported. No known use case for arm64,
+ at present.
+
+WAET Signature Reserved (signature == "WAET")
+
+ **Windows ACPI Emulated devices Table**
+
+ Microsoft only table, will not be supported.
+
+WDAT Signature Reserved (signature == "WDAT")
+
+ **Watch Dog Action Table**
+
+ Microsoft only table, will not be supported.
+
+WDRT Signature Reserved (signature == "WDRT")
+
+ **Watch Dog Resource Table**
+
+ Microsoft only table, will not be supported.
+
+WPBT Signature Reserved (signature == "WPBT")
+
+ **Windows Platform Binary Table**
+
+ Microsoft only table, will not be supported.
+
+XENV Signature Reserved (signature == "XENV")
+
+ **Xen project table**
+
+ Optional, used only by Xen at present.
+
+XSDT Section 5.2.8 (signature == "XSDT")
+
+ **eXtended System Description Table**
+
+ Required for arm64.
+====== ========================================================================
+
+ACPI Objects
+------------
+The expectations on individual ACPI objects that are likely to be used are
+shown in the list that follows; any object not explicitly mentioned below
+should be used as needed for a particular platform or particular subsystem,
+such as power management or PCI.
+
+===== ================ ========================================================
+Name Section Usage for ARMv8 Linux
+===== ================ ========================================================
+_CCA 6.2.17 This method must be defined for all bus masters
+ on arm64 - there are no assumptions made about
+ whether such devices are cache coherent or not.
+ The _CCA value is inherited by all descendants of
+ these devices so it does not need to be repeated.
+ Without _CCA on arm64, the kernel does not know what
+ to do about setting up DMA for the device.
+
+ NB: this method provides default cache coherency
+ attributes; the presence of an SMMU can be used to
+ modify that, however. For example, a master could
+ default to non-coherent, but be made coherent with
+ the appropriate SMMU configuration (see Table 17 of
+ the IORT specification, ARM Document DEN 0049B).
+
+_CID 6.1.2 Use as needed, see also _HID.
+
+_CLS 6.1.3 Use as needed, see also _HID.
+
+_CPC 8.4.7.1 Use as needed, power management specific. CPPC is
+ recommended on arm64.
+
+_CRS 6.2.2 Required on arm64.
+
+_CSD 8.4.2.2 Use as needed, used only in conjunction with _CST.
+
+_CST 8.4.2.1 Low power idle states (8.4.4) are recommended instead
+ of C-states.
+
+_DDN 6.1.4 This field can be used for a device name. However,
+ it is meant for DOS device names (e.g., COM1), so be
+ careful of its use across OSes.
+
+_DSD 6.2.5 To be used with caution. If this object is used, try
+ to use it within the constraints already defined by the
+ Device Properties UUID. Only in rare circumstances
+ should it be necessary to create a new _DSD UUID.
+
+ In either case, submit the _DSD definition along with
+ any driver patches for discussion, especially when
+ device properties are used. A driver will not be
+ considered complete without a corresponding _DSD
+ description. Once approved by kernel maintainers,
+ the UUID or device properties must then be registered
+ with the UEFI Forum; this may cause some iteration as
+ more than one OS will be registering entries.
+
+_DSM 9.1.1 Do not use this method. It is not standardized, the
+ return values are not well documented, and it is
+ currently a frequent source of error.
+
+\_GL 5.7.1 This object is not to be used in hardware reduced
+ mode, and therefore should not be used on arm64.
+
+_GLK 6.5.7 This object requires a global lock be defined; there
+ is no global lock on arm64 since it runs in hardware
+ reduced mode. Hence, do not use this object on arm64.
+
+\_GPE 5.3.1 This namespace is for x86 use only. Do not use it
+ on arm64.
+
+_HID 6.1.5 This is the primary object to use in device probing,
+ though _CID and _CLS may also be used.
+
+_INI 6.5.1 Not required, but can be useful in setting up devices
+ when UEFI leaves them in a state that may not be what
+ the driver expects before it starts probing.
+
+_LPI 8.4.4.3 Recommended for use with processor definitions (_HID
+ ACPI0010) on arm64. See also _RDI.
+
+_MLS 6.1.7 Highly recommended for use in internationalization.
+
+_OFF 7.2.2 It is recommended to define this method for any device
+ that can be turned on or off.
+
+_ON 7.2.3 It is recommended to define this method for any device
+ that can be turned on or off.
+
+\_OS 5.7.3 This method will return "Linux" by default (this is
+ the value of the macro ACPI_OS_NAME on Linux). The
+ command line parameter acpi_os=<string> can be used
+ to set it to some other value.
+
+_OSC 6.2.11 This method can be a global method in ACPI (i.e.,
+ \_SB._OSC), or it may be associated with a specific
+ device (e.g., \_SB.DEV0._OSC), or both. When used
+ as a global method, only capabilities published in
+ the ACPI specification are allowed. When used as
+ a device-specific method, the process described for
+ using _DSD MUST be used to create an _OSC definition;
+ out-of-process use of _OSC is not allowed. That is,
+ submit the device-specific _OSC usage description as
+ part of the kernel driver submission, get it approved
+ by the kernel community, then register it with the
+ UEFI Forum.
+
+\_OSI 5.7.2 Deprecated on ARM64. As far as ACPI firmware is
+ concerned, _OSI is not to be used to determine what
+ sort of system is being used or what functionality
+ is provided. The _OSC method is to be used instead.
+
+_PDC 8.4.1 Deprecated, do not use on arm64.
+
+\_PIC 5.8.1 The method should not be used. On arm64, the only
+ interrupt model available is GIC.
+
+\_PR 5.3.1 This namespace is for x86 use only on legacy systems.
+ Do not use it on arm64.
+
+_PRT 6.2.13 Required as part of the definition of all PCI root
+ devices.
+
+_PRx 7.3.8-11 Use as needed; power management specific. If _PR0 is
+ defined, _PR3 must also be defined.
+
+_PSx 7.3.2-5 Use as needed; power management specific. If _PS0 is
+ defined, _PS3 must also be defined. If clocks or
+ regulators need adjusting to be consistent with power
+ usage, change them in these methods.
+
+_RDI 8.4.4.4 Recommended for use with processor definitions (_HID
+ ACPI0010) on arm64. This should only be used in
+ conjunction with _LPI.
+
+\_REV 5.7.4 Always returns the latest version of ACPI supported.
+
+\_SB 5.3.1 Required on arm64; all devices must be defined in this
+ namespace.
+
+_SLI 6.2.15 Use is recommended when SLIT table is in use.
+
+_STA 6.3.7, It is recommended to define this method for any device
+ 7.2.4 that can be turned on or off. See also the STAO table
+ that provides overrides to hide devices in virtualized
+ environments.
+
+_SRS 6.2.16 Use as needed; see also _PRS.
+
+_STR 6.1.10 Recommended for conveying device names to end users;
+ this is preferred over using _DDN.
+
+_SUB 6.1.9 Use as needed; _HID or _CID are preferred.
+
+_SUN 6.1.11 Use as needed, but recommended.
+
+_SWS 7.4.3 Use as needed; power management specific; this may
+ require specification changes for use on arm64.
+
+_UID 6.1.12 Recommended for distinguishing devices of the same
+ class; define it if at all possible.
+===== ================ ========================================================
+
+
+
+
+ACPI Event Model
+----------------
+Do not use GPE block devices; these are not supported in the hardware reduced
+profile used by arm64. Since there are no GPE blocks defined for use on ARM
+platforms, ACPI events must be signaled differently.
+
+There are two options: GPIO-signaled interrupts (Section 5.6.5), and
+interrupt-signaled events (Section 5.6.9). Interrupt-signaled events are a
+new feature in the ACPI 6.1 specification. Either - or both - can be used
+on a given platform, and which to use may be dependent of limitations in any
+given SoC. If possible, interrupt-signaled events are recommended.
+
+
+ACPI Processor Control
+----------------------
+Section 8 of the ACPI specification changed significantly in version 6.0.
+Processors should now be defined as Device objects with _HID ACPI0007; do
+not use the deprecated Processor statement in ASL. All multiprocessor systems
+should also define a hierarchy of processors, done with Processor Container
+Devices (see Section 8.4.3.1, _HID ACPI0010); do not use processor aggregator
+devices (Section 8.5) to describe processor topology. Section 8.4 of the
+specification describes the semantics of these object definitions and how
+they interrelate.
+
+Most importantly, the processor hierarchy defined also defines the low power
+idle states that are available to the platform, along with the rules for
+determining which processors can be turned on or off and the circumstances
+that control that. Without this information, the processors will run in
+whatever power state they were left in by UEFI.
+
+Note too, that the processor Device objects defined and the entries in the
+MADT for GICs are expected to be in synchronization. The _UID of the Device
+object must correspond to processor IDs used in the MADT.
+
+It is recommended that CPPC (8.4.5) be used as the primary model for processor
+performance control on arm64. C-states and P-states may become available at
+some point in the future, but most current design work appears to favor CPPC.
+
+Further, it is essential that the ARMv8 SoC provide a fully functional
+implementation of PSCI; this will be the only mechanism supported by ACPI
+to control CPU power state. Booting of secondary CPUs using the ACPI
+parking protocol is possible, but discouraged, since only PSCI is supported
+for ARM servers.
+
+
+ACPI System Address Map Interfaces
+----------------------------------
+In Section 15 of the ACPI specification, several methods are mentioned as
+possible mechanisms for conveying memory resource information to the kernel.
+For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
+GetMemoryMap() boot service is the only mechanism that will be used.
+
+
+ACPI Platform Error Interfaces (APEI)
+-------------------------------------
+The APEI tables supported are described above.
+
+APEI requires the equivalent of an SCI and an NMI on ARMv8. The SCI is used
+to notify the OSPM of errors that have occurred but can be corrected and the
+system can continue correct operation, even if possibly degraded. The NMI is
+used to indicate fatal errors that cannot be corrected, and require immediate
+attention.
+
+Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
+these slightly differently. The SCI is handled as a high priority interrupt;
+given that these are corrected (or correctable) errors being reported, this
+is sufficient. The NMI is emulated as the highest priority interrupt
+possible. This implies some caution must be used since there could be
+interrupts at higher privilege levels or even interrupts at the same priority
+as the emulated NMI. In Linux, this should not be the case but one should
+be aware it could happen.
+
+
+ACPI Objects Not Supported on ARM64
+-----------------------------------
+While this may change in the future, there are several classes of objects
+that can be defined, but are not currently of general interest to ARM servers.
+Some of these objects have x86 equivalents, and may actually make sense in ARM
+servers. However, there is either no hardware available at present, or there
+may not even be a non-ARM implementation yet. Hence, they are not currently
+supported.
+
+The following classes of objects are not supported:
+
+ - Section 9.2: ambient light sensor devices
+
+ - Section 9.3: battery devices
+
+ - Section 9.4: lids (e.g., laptop lids)
+
+ - Section 9.8.2: IDE controllers
+
+ - Section 9.9: floppy controllers
+
+ - Section 9.10: GPE block devices
+
+ - Section 9.15: PC/AT RTC/CMOS devices
+
+ - Section 9.16: user presence detection devices
+
+ - Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
+
+ - Section 9.18: time and alarm devices (see 9.15)
+
+ - Section 10: power source and power meter devices
+
+ - Section 11: thermal management
+
+ - Section 12: embedded controllers interface
+
+ - Section 13: SMBus interfaces
+
+
+This also means that there is no support for the following objects:
+
+==== =========================== ==== ==========
+Name Section Name Section
+==== =========================== ==== ==========
+_ALC 9.3.4 _FDM 9.10.3
+_ALI 9.3.2 _FIX 6.2.7
+_ALP 9.3.6 _GAI 10.4.5
+_ALR 9.3.5 _GHL 10.4.7
+_ALT 9.3.3 _GTM 9.9.2.1.1
+_BCT 10.2.2.10 _LID 9.5.1
+_BDN 6.5.3 _PAI 10.4.4
+_BIF 10.2.2.1 _PCL 10.3.2
+_BIX 10.2.2.1 _PIF 10.3.3
+_BLT 9.2.3 _PMC 10.4.1
+_BMA 10.2.2.4 _PMD 10.4.8
+_BMC 10.2.2.12 _PMM 10.4.3
+_BMD 10.2.2.11 _PRL 10.3.4
+_BMS 10.2.2.5 _PSR 10.3.1
+_BST 10.2.2.6 _PTP 10.4.2
+_BTH 10.2.2.7 _SBS 10.1.3
+_BTM 10.2.2.9 _SHL 10.4.6
+_BTP 10.2.2.8 _STM 9.9.2.1.1
+_DCK 6.5.2 _UPD 9.16.1
+_EC 12.12 _UPP 9.16.2
+_FDE 9.10.1 _WPC 10.5.2
+_FDI 9.10.2 _WPP 10.5.3
+==== =========================== ==== ==========
diff --git a/Documentation/arch/arm64/amu.rst b/Documentation/arch/arm64/amu.rst
new file mode 100644
index 000000000000..ac1b3f0e211d
--- /dev/null
+++ b/Documentation/arch/arm64/amu.rst
@@ -0,0 +1,119 @@
+.. _amu_index:
+
+=======================================================
+Activity Monitors Unit (AMU) extension in AArch64 Linux
+=======================================================
+
+Author: Ionela Voinescu <ionela.voinescu@arm.com>
+
+Date: 2019-09-10
+
+This document briefly describes the provision of Activity Monitors Unit
+support in AArch64 Linux.
+
+
+Architecture overview
+---------------------
+
+The activity monitors extension is an optional extension introduced by the
+ARMv8.4 CPU architecture.
+
+The activity monitors unit, implemented in each CPU, provides performance
+counters intended for system management use. The AMU extension provides a
+system register interface to the counter registers and also supports an
+optional external memory-mapped interface.
+
+Version 1 of the Activity Monitors architecture implements a counter group
+of four fixed and architecturally defined 64-bit event counters.
+
+ - CPU cycle counter: increments at the frequency of the CPU.
+ - Constant counter: increments at the fixed frequency of the system
+ clock.
+ - Instructions retired: increments with every architecturally executed
+ instruction.
+ - Memory stall cycles: counts instruction dispatch stall cycles caused by
+ misses in the last level cache within the clock domain.
+
+When in WFI or WFE these counters do not increment.
+
+The Activity Monitors architecture provides space for up to 16 architected
+event counters. Future versions of the architecture may use this space to
+implement additional architected event counters.
+
+Additionally, version 1 implements a counter group of up to 16 auxiliary
+64-bit event counters.
+
+On cold reset all counters reset to 0.
+
+
+Basic support
+-------------
+
+The kernel can safely run a mix of CPUs with and without support for the
+activity monitors extension. Therefore, when CONFIG_ARM64_AMU_EXTN is
+selected we unconditionally enable the capability to allow any late CPU
+(secondary or hotplugged) to detect and use the feature.
+
+When the feature is detected on a CPU, we flag the availability of the
+feature but this does not guarantee the correct functionality of the
+counters, only the presence of the extension.
+
+Firmware (code running at higher exception levels, e.g. arm-tf) support is
+needed to:
+
+ - Enable access for lower exception levels (EL2 and EL1) to the AMU
+ registers.
+ - Enable the counters. If not enabled these will read as 0.
+ - Save/restore the counters before/after the CPU is being put/brought up
+ from the 'off' power state.
+
+When using kernels that have this feature enabled but boot with broken
+firmware the user may experience panics or lockups when accessing the
+counter registers. Even if these symptoms are not observed, the values
+returned by the register reads might not correctly reflect reality. Most
+commonly, the counters will read as 0, indicating that they are not
+enabled.
+
+If proper support is not provided in firmware it's best to disable
+CONFIG_ARM64_AMU_EXTN. To be noted that for security reasons, this does not
+bypass the setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to
+EL1 (kernel). Therefore, firmware should still ensure accesses to AMU registers
+are not trapped in EL2/EL3.
+
+The fixed counters of AMUv1 are accessible through the following system
+register definitions:
+
+ - SYS_AMEVCNTR0_CORE_EL0
+ - SYS_AMEVCNTR0_CONST_EL0
+ - SYS_AMEVCNTR0_INST_RET_EL0
+ - SYS_AMEVCNTR0_MEM_STALL_EL0
+
+Auxiliary platform specific counters can be accessed using
+SYS_AMEVCNTR1_EL0(n), where n is a value between 0 and 15.
+
+Details can be found in: arch/arm64/include/asm/sysreg.h.
+
+
+Userspace access
+----------------
+
+Currently, access from userspace to the AMU registers is disabled due to:
+
+ - Security reasons: they might expose information about code executed in
+ secure mode.
+ - Purpose: AMU counters are intended for system management use.
+
+Also, the presence of the feature is not visible to userspace.
+
+
+Virtualization
+--------------
+
+Currently, access from userspace (EL0) and kernelspace (EL1) on the KVM
+guest side is disabled due to:
+
+ - Security reasons: they might expose information about code executed
+ by other guests or the host.
+
+Any attempt to access the AMU registers will result in an UNDEFINED
+exception being injected into the guest.
diff --git a/Documentation/arch/arm64/arm-acpi.rst b/Documentation/arch/arm64/arm-acpi.rst
new file mode 100644
index 000000000000..e59e4505d0d9
--- /dev/null
+++ b/Documentation/arch/arm64/arm-acpi.rst
@@ -0,0 +1,575 @@
+===================
+ACPI on Arm systems
+===================
+
+ACPI can be used for Armv8 and Armv9 systems designed to follow
+the BSA (Arm Base System Architecture) [0] and BBR (Arm
+Base Boot Requirements) [1] specifications. Both BSA and BBR are publicly
+accessible documents.
+Arm Servers, in addition to being BSA compliant, comply with a set
+of rules defined in SBSA (Server Base System Architecture) [2].
+
+The Arm kernel implements the reduced hardware model of ACPI version
+5.1 or later. Links to the specification and all external documents
+it refers to are managed by the UEFI Forum. The specification is
+available at http://www.uefi.org/specifications and documents referenced
+by the specification can be found via http://www.uefi.org/acpi.
+
+If an Arm system does not meet the requirements of the BSA and BBR,
+or cannot be described using the mechanisms defined in the required ACPI
+specifications, then ACPI may not be a good fit for the hardware.
+
+While the documents mentioned above set out the requirements for building
+industry-standard Arm systems, they also apply to more than one operating
+system. The purpose of this document is to describe the interaction between
+ACPI and Linux only, on an Arm system -- that is, what Linux expects of
+ACPI and what ACPI can expect of Linux.
+
+
+Why ACPI on Arm?
+----------------
+Before examining the details of the interface between ACPI and Linux, it is
+useful to understand why ACPI is being used. Several technologies already
+exist in Linux for describing non-enumerable hardware, after all. In this
+section we summarize a blog post [3] from Grant Likely that outlines the
+reasoning behind ACPI on Arm systems. Actually, we snitch a good portion
+of the summary text almost directly, to be honest.
+
+The short form of the rationale for ACPI on Arm is:
+
+- ACPI’s byte code (AML) allows the platform to encode hardware behavior,
+ while DT explicitly does not support this. For hardware vendors, being
+ able to encode behavior is a key tool used in supporting operating
+ system releases on new hardware.
+
+- ACPI’s OSPM defines a power management model that constrains what the
+ platform is allowed to do into a specific model, while still providing
+ flexibility in hardware design.
+
+- In the enterprise server environment, ACPI has established bindings (such
+ as for RAS) which are currently used in production systems. DT does not.
+ Such bindings could be defined in DT at some point, but doing so means Arm
+ and x86 would end up using completely different code paths in both firmware
+ and the kernel.
+
+- Choosing a single interface to describe the abstraction between a platform
+ and an OS is important. Hardware vendors would not be required to implement
+ both DT and ACPI if they want to support multiple operating systems. And,
+ agreeing on a single interface instead of being fragmented into per OS
+ interfaces makes for better interoperability overall.
+
+- The new ACPI governance process works well and Linux is now at the same
+ table as hardware vendors and other OS vendors. In fact, there is no
+ longer any reason to feel that ACPI only belongs to Windows or that
+ Linux is in any way secondary to Microsoft in this arena. The move of
+ ACPI governance into the UEFI forum has significantly opened up the
+ specification development process, and currently, a large portion of the
+ changes being made to ACPI are being driven by Linux.
+
+Key to the use of ACPI is the support model. For servers in general, the
+responsibility for hardware behaviour cannot solely be the domain of the
+kernel, but rather must be split between the platform and the kernel, in
+order to allow for orderly change over time. ACPI frees the OS from needing
+to understand all the minute details of the hardware so that the OS doesn’t
+need to be ported to each and every device individually. It allows the
+hardware vendors to take responsibility for power management behaviour without
+depending on an OS release cycle which is not under their control.
+
+ACPI is also important because hardware and OS vendors have already worked
+out the mechanisms for supporting a general purpose computing ecosystem. The
+infrastructure is in place, the bindings are in place, and the processes are
+in place. DT does exactly what Linux needs it to when working with vertically
+integrated devices, but there are no good processes for supporting what the
+server vendors need. Linux could potentially get there with DT, but doing so
+really just duplicates something that already works. ACPI already does what
+the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
+vendors would still end up providing two completely separate firmware
+interfaces -- one for Linux and one for Windows.
+
+
+Kernel Compatibility
+--------------------
+One of the primary motivations for ACPI is standardization, and using that
+to provide backward compatibility for Linux kernels. In the server market,
+software and hardware are often used for long periods. ACPI allows the
+kernel and firmware to agree on a consistent abstraction that can be
+maintained over time, even as hardware or software change. As long as the
+abstraction is supported, systems can be updated without necessarily having
+to replace the kernel.
+
+When a Linux driver or subsystem is first implemented using ACPI, it by
+definition ends up requiring a specific version of the ACPI specification
+-- its baseline. ACPI firmware must continue to work, even though it may
+not be optimal, with the earliest kernel version that first provides support
+for that baseline version of ACPI. There may be a need for additional drivers,
+but adding new functionality (e.g., CPU power management) should not break
+older kernel versions. Further, ACPI firmware must also work with the most
+recent version of the kernel.
+
+
+Relationship with Device Tree
+-----------------------------
+ACPI support in drivers and subsystems for Arm should never be mutually
+exclusive with DT support at compile time.
+
+At boot time the kernel will only use one description method depending on
+parameters passed from the boot loader (including kernel bootargs).
+
+Regardless of whether DT or ACPI is used, the kernel must always be capable
+of booting with either scheme (in kernels with both schemes enabled at compile
+time).
+
+
+Booting using ACPI tables
+-------------------------
+The only defined method for passing ACPI tables to the kernel on Arm
+is via the UEFI system configuration table. Just so it is explicit, this
+means that ACPI is only supported on platforms that boot via UEFI.
+
+When an Arm system boots, it can either have DT information, ACPI tables,
+or in some very unusual cases, both. If no command line parameters are used,
+the kernel will try to use DT for device enumeration; if there is no DT
+present, the kernel will try to use ACPI tables, but only if they are present.
+If neither is available, the kernel will not boot. If acpi=force is used
+on the command line, the kernel will attempt to use ACPI tables first, but
+fall back to DT if there are no ACPI tables present. The basic idea is that
+the kernel will not fail to boot unless it absolutely has no other choice.
+
+Processing of ACPI tables may be disabled by passing acpi=off on the kernel
+command line; this is the default behavior.
+
+In order for the kernel to load and use ACPI tables, the UEFI implementation
+MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
+the ACPI signature "RSD PTR "). If this pointer is incorrect and acpi=force
+is used, the kernel will disable ACPI and try to use DT to boot instead; the
+kernel has, in effect, determined that ACPI tables are not present at that
+point.
+
+If the pointer to the RSDP table is correct, the table will be mapped into
+the kernel by the ACPI core, using the address provided by UEFI.
+
+The ACPI core will then locate and map in all other ACPI tables provided by
+using the addresses in the RSDP table to find the XSDT (eXtended System
+Description Table). The XSDT in turn provides the addresses to all other
+ACPI tables provided by the system firmware; the ACPI core will then traverse
+this table and map in the tables listed.
+
+The ACPI core will ignore any provided RSDT (Root System Description Table).
+RSDTs have been deprecated and are ignored on arm64 since they only allow
+for 32-bit addresses.
+
+Further, the ACPI core will only use the 64-bit address fields in the FADT
+(Fixed ACPI Description Table). Any 32-bit address fields in the FADT will
+be ignored on arm64.
+
+Hardware reduced mode (see Section 4.1 of the ACPI 6.1 specification) will
+be enforced by the ACPI core on arm64. Doing so allows the ACPI core to
+run less complex code since it no longer has to provide support for legacy
+hardware from other architectures. Any fields that are not to be used for
+hardware reduced mode must be set to zero.
+
+For the ACPI core to operate properly, and in turn provide the information
+the kernel needs to configure devices, it expects to find the following
+tables (all section numbers refer to the ACPI 6.5 specification):
+
+ - RSDP (Root System Description Pointer), section 5.2.5
+
+ - XSDT (eXtended System Description Table), section 5.2.8
+
+ - FADT (Fixed ACPI Description Table), section 5.2.9
+
+ - DSDT (Differentiated System Description Table), section
+ 5.2.11.1
+
+ - MADT (Multiple APIC Description Table), section 5.2.12
+
+ - GTDT (Generic Timer Description Table), section 5.2.24
+
+ - PPTT (Processor Properties Topology Table), section 5.2.30
+
+ - DBG2 (DeBuG port table 2), section 5.2.6, specifically Table 5-6.
+
+ - APMT (Arm Performance Monitoring unit Table), section 5.2.6, specifically Table 5-6.
+
+ - AGDI (Arm Generic diagnostic Dump and Reset Device Interface Table), section 5.2.6, specifically Table 5-6.
+
+ - If PCI is supported, the MCFG (Memory mapped ConFiGuration
+ Table), section 5.2.6, specifically Table 5-6.
+
+ - If booting without a console=<device> kernel parameter is
+ supported, the SPCR (Serial Port Console Redirection table),
+ section 5.2.6, specifically Table 5-6.
+
+ - If necessary to describe the I/O topology, SMMUs and GIC ITSs,
+ the IORT (Input Output Remapping Table, section 5.2.6, specifically
+ Table 5-6).
+
+ - If NUMA is supported, the following tables are required:
+
+ - SRAT (System Resource Affinity Table), section 5.2.16
+
+ - SLIT (System Locality distance Information Table), section 5.2.17
+
+ - If NUMA is supported, and the system contains heterogeneous memory,
+ the HMAT (Heterogeneous Memory Attribute Table), section 5.2.28.
+
+ - If the ACPI Platform Error Interfaces are required, the following
+ tables are conditionally required:
+
+ - BERT (Boot Error Record Table, section 18.3.1)
+
+ - EINJ (Error INJection table, section 18.6.1)
+
+ - ERST (Error Record Serialization Table, section 18.5)
+
+ - HEST (Hardware Error Source Table, section 18.3.2)
+
+ - SDEI (Software Delegated Exception Interface table, section 5.2.6,
+ specifically Table 5-6)
+
+ - AEST (Arm Error Source Table, section 5.2.6,
+ specifically Table 5-6)
+
+ - RAS2 (ACPI RAS2 feature table, section 5.2.21)
+
+ - If the system contains controllers using PCC channel, the
+ PCCT (Platform Communications Channel Table), section 14.1
+
+ - If the system contains a controller to capture board-level system state,
+ and communicates with the host via PCC, the PDTT (Platform Debug Trigger
+ Table), section 5.2.29.
+
+ - If NVDIMM is supported, the NFIT (NVDIMM Firmware Interface Table), section 5.2.26
+
+ - If video framebuffer is present, the BGRT (Boot Graphics Resource Table), section 5.2.23
+
+ - If IPMI is implemented, the SPMI (Server Platform Management Interface),
+ section 5.2.6, specifically Table 5-6.
+
+ - If the system contains a CXL Host Bridge, the CEDT (CXL Early Discovery
+ Table), section 5.2.6, specifically Table 5-6.
+
+ - If the system supports MPAM, the MPAM (Memory Partitioning And Monitoring table), section 5.2.6,
+ specifically Table 5-6.
+
+ - If the system lacks persistent storage, the IBFT (ISCSI Boot Firmware
+ Table), section 5.2.6, specifically Table 5-6.
+
+
+If the above tables are not all present, the kernel may or may not be
+able to boot properly since it may not be able to configure all of the
+devices available. This list of tables is not meant to be all inclusive;
+in some environments other tables may be needed (e.g., any of the APEI
+tables from section 18) to support specific functionality.
+
+
+ACPI Detection
+--------------
+Drivers should determine their probe() type by checking for a null
+value for ACPI_HANDLE, or checking .of_node, or other information in
+the device structure. This is detailed further in the "Driver
+Recommendations" section.
+
+In non-driver code, if the presence of ACPI needs to be detected at
+run time, then check the value of acpi_disabled. If CONFIG_ACPI is not
+set, acpi_disabled will always be 1.
+
+
+Device Enumeration
+------------------
+Device descriptions in ACPI should use standard recognized ACPI interfaces.
+These may contain less information than is typically provided via a Device
+Tree description for the same device. This is also one of the reasons that
+ACPI can be useful -- the driver takes into account that it may have less
+detailed information about the device and uses sensible defaults instead.
+If done properly in the driver, the hardware can change and improve over
+time without the driver having to change at all.
+
+Clocks provide an excellent example. In DT, clocks need to be specified
+and the drivers need to take them into account. In ACPI, the assumption
+is that UEFI will leave the device in a reasonable default state, including
+any clock settings. If for some reason the driver needs to change a clock
+value, this can be done in an ACPI method; all the driver needs to do is
+invoke the method and not concern itself with what the method needs to do
+to change the clock. Changing the hardware can then take place over time
+by changing what the ACPI method does, and not the driver.
+
+In DT, the parameters needed by the driver to set up clocks as in the example
+above are known as "bindings"; in ACPI, these are known as "Device Properties"
+and provided to a driver via the _DSD object.
+
+ACPI tables are described with a formal language called ASL, the ACPI
+Source Language (section 19 of the specification). This means that there
+are always multiple ways to describe the same thing -- including device
+properties. For example, device properties could use an ASL construct
+that looks like this: Name(KEY0, "value0"). An ACPI device driver would
+then retrieve the value of the property by evaluating the KEY0 object.
+However, using Name() this way has multiple problems: (1) ACPI limits
+names ("KEY0") to four characters unlike DT; (2) there is no industry
+wide registry that maintains a list of names, minimizing re-use; (3)
+there is also no registry for the definition of property values ("value0"),
+again making re-use difficult; and (4) how does one maintain backward
+compatibility as new hardware comes out? The _DSD method was created
+to solve precisely these sorts of problems; Linux drivers should ALWAYS
+use the _DSD method for device properties and nothing else.
+
+The _DSM object (ACPI Section 9.14.1) could also be used for conveying
+device properties to a driver. Linux drivers should only expect it to
+be used if _DSD cannot represent the data required, and there is no way
+to create a new UUID for the _DSD object. Note that there is even less
+regulation of the use of _DSM than there is of _DSD. Drivers that depend
+on the contents of _DSM objects will be more difficult to maintain over
+time because of this; as of this writing, the use of _DSM is the cause
+of quite a few firmware problems and is not recommended.
+
+Drivers should look for device properties in the _DSD object ONLY; the _DSD
+object is described in the ACPI specification section 6.2.5, but this only
+describes how to define the structure of an object returned via _DSD, and
+how specific data structures are defined by specific UUIDs. Linux should
+only use the _DSD Device Properties UUID [4]:
+
+ - UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
+
+Common device properties can be registered by creating a pull request to [4] so
+that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum can be used
+but not as "uefi-" common properties.
+
+Before creating new device properties, check to be sure that they have not
+been defined before and either registered in the Linux kernel documentation
+as DT bindings, or the UEFI Forum as device properties. While we do not want
+to simply move all DT bindings into ACPI device properties, we can learn from
+what has been previously defined.
+
+If it is necessary to define a new device property, or if it makes sense to
+synthesize the definition of a binding so it can be used in any firmware,
+both DT bindings and ACPI device properties for device drivers have review
+processes. Use them both. When the driver itself is submitted for review
+to the Linux mailing lists, the device property definitions needed must be
+submitted at the same time. A driver that supports ACPI and uses device
+properties will not be considered complete without their definitions. Once
+the device property has been accepted by the Linux community, it must be
+registered with the UEFI Forum [4], which will review it again for consistency
+within the registry. This may require iteration. The UEFI Forum, though,
+will always be the canonical site for device property definitions.
+
+It may make sense to provide notice to the UEFI Forum that there is the
+intent to register a previously unused device property name as a means of
+reserving the name for later use. Other operating system vendors will
+also be submitting registration requests and this may help smooth the
+process.
+
+Once registration and review have been completed, the kernel provides an
+interface for looking up device properties in a manner independent of
+whether DT or ACPI is being used. This API should be used [5]; it can
+eliminate some duplication of code paths in driver probing functions and
+discourage divergence between DT bindings and ACPI device properties.
+
+
+Programmable Power Control Resources
+------------------------------------
+Programmable power control resources include such resources as voltage/current
+providers (regulators) and clock sources.
+
+With ACPI, the kernel clock and regulator framework is not expected to be used
+at all.
+
+The kernel assumes that power control of these resources is represented with
+Power Resource Objects (ACPI section 7.1). The ACPI core will then handle
+correctly enabling and disabling resources as they are needed. In order to
+get that to work, ACPI assumes each device has defined D-states and that these
+can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
+in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
+turning a device full off.
+
+There are two options for using those Power Resources. They can:
+
+ - be managed in a _PSx method which gets called on entry to power
+ state Dx.
+
+ - be declared separately as power resources with their own _ON and _OFF
+ methods. They are then tied back to D-states for a particular device
+ via _PRx which specifies which power resources a device needs to be on
+ while in Dx. Kernel then tracks number of devices using a power resource
+ and calls _ON/_OFF as needed.
+
+The kernel ACPI code will also assume that the _PSx methods follow the normal
+ACPI rules for such methods:
+
+ - If either _PS0 or _PS3 is implemented, then the other method must also
+ be implemented.
+
+ - If a device requires usage or setup of a power resource when on, the ASL
+ should organize that it is allocated/enabled using the _PS0 method.
+
+ - Resources allocated or enabled in the _PS0 method should be disabled
+ or de-allocated in the _PS3 method.
+
+ - Firmware will leave the resources in a reasonable state before handing
+ over control to the kernel.
+
+Such code in _PSx methods will of course be very platform specific. But,
+this allows the driver to abstract out the interface for operating the device
+and avoid having to read special non-standard values from ACPI tables. Further,
+abstracting the use of these resources allows the hardware to change over time
+without requiring updates to the driver.
+
+
+Clocks
+------
+ACPI makes the assumption that clocks are initialized by the firmware --
+UEFI, in this case -- to some working value before control is handed over
+to the kernel. This has implications for devices such as UARTs, or SoC-driven
+LCD displays, for example.
+
+When the kernel boots, the clocks are assumed to be set to reasonable
+working values. If for some reason the frequency needs to change -- e.g.,
+throttling for power management -- the device driver should expect that
+process to be abstracted out into some ACPI method that can be invoked
+(please see the ACPI specification for further recommendations on standard
+methods to be expected). The only exceptions to this are CPU clocks where
+CPPC provides a much richer interface than ACPI methods. If the clocks
+are not set, there is no direct way for Linux to control them.
+
+If an SoC vendor wants to provide fine-grained control of the system clocks,
+they could do so by providing ACPI methods that could be invoked by Linux
+drivers. However, this is NOT recommended and Linux drivers should NOT use
+such methods, even if they are provided. Such methods are not currently
+standardized in the ACPI specification, and using them could tie a kernel
+to a very specific SoC, or tie an SoC to a very specific version of the
+kernel, both of which we are trying to avoid.
+
+
+Driver Recommendations
+----------------------
+DO NOT remove any DT handling when adding ACPI support for a driver. The
+same device may be used on many different systems.
+
+DO try to structure the driver so that it is data-driven. That is, set up
+a struct containing internal per-device state based on defaults and whatever
+else must be discovered by the driver probe function. Then, have the rest
+of the driver operate off of the contents of that struct. Doing so should
+allow most divergence between ACPI and DT functionality to be kept local to
+the probe function instead of being scattered throughout the driver. For
+example::
+
+ static int device_probe_dt(struct platform_device *pdev)
+ {
+ /* DT specific functionality */
+ ...
+ }
+
+ static int device_probe_acpi(struct platform_device *pdev)
+ {
+ /* ACPI specific functionality */
+ ...
+ }
+
+ static int device_probe(struct platform_device *pdev)
+ {
+ ...
+ struct device_node node = pdev->dev.of_node;
+ ...
+
+ if (node)
+ ret = device_probe_dt(pdev);
+ else if (ACPI_HANDLE(&pdev->dev))
+ ret = device_probe_acpi(pdev);
+ else
+ /* other initialization */
+ ...
+ /* Continue with any generic probe operations */
+ ...
+ }
+
+DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
+clear the different names the driver is probed for, both from DT and from
+ACPI::
+
+ static struct of_device_id virtio_mmio_match[] = {
+ { .compatible = "virtio,mmio", },
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+ static const struct acpi_device_id virtio_mmio_acpi_match[] = {
+ { "LNRO0005", },
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
+
+
+ASWG
+----
+The ACPI specification changes regularly. During the year 2014, for instance,
+version 5.1 was released and version 6.0 substantially completed, with most of
+the changes being driven by Arm-specific requirements. Proposed changes are
+presented and discussed in the ASWG (ACPI Specification Working Group) which
+is a part of the UEFI Forum. The current version of the ACPI specification
+is 6.5 release in August 2022.
+
+Participation in this group is open to all UEFI members. Please see
+http://www.uefi.org/workinggroup for details on group membership.
+
+It is the intent of the Arm ACPI kernel code to follow the ACPI specification
+as closely as possible, and to only implement functionality that complies with
+the released standards from UEFI ASWG. As a practical matter, there will be
+vendors that provide bad ACPI tables or violate the standards in some way.
+If this is because of errors, quirks and fix-ups may be necessary, but will
+be avoided if possible. If there are features missing from ACPI that preclude
+it from being used on a platform, ECRs (Engineering Change Requests) should be
+submitted to ASWG and go through the normal approval process; for those that
+are not UEFI members, many other members of the Linux community are and would
+likely be willing to assist in submitting ECRs.
+
+
+Linux Code
+----------
+Individual items specific to Linux on Arm, contained in the Linux
+source code, are in the list that follows:
+
+ACPI_OS_NAME
+ This macro defines the string to be returned when
+ an ACPI method invokes the _OS method. On Arm
+ systems, this macro will be "Linux" by default.
+ The command line parameter acpi_os=<string>
+ can be used to set it to some other value. The
+ default value for other architectures is "Microsoft
+ Windows NT", for example.
+
+ACPI Objects
+------------
+Detailed expectations for ACPI tables and object are listed in the file
+Documentation/arch/arm64/acpi_object_usage.rst.
+
+
+References
+----------
+[0] https://developer.arm.com/documentation/den0094/latest
+ document Arm-DEN-0094: "Arm Base System Architecture", version 1.0C, dated 6 Oct 2022
+
+[1] https://developer.arm.com/documentation/den0044/latest
+ Document Arm-DEN-0044: "Arm Base Boot Requirements", version 2.0G, dated 15 Apr 2022
+
+[2] https://developer.arm.com/documentation/den0029/latest
+ Document Arm-DEN-0029: "Arm Server Base System Architecture", version 7.1, dated 06 Oct 2022
+
+[3] http://www.secretlab.ca/archives/151,
+ 10 Jan 2015, Copyright (c) 2015,
+ Linaro Ltd., written by Grant Likely.
+
+[4] _DSD (Device Specific Data) Implementation Guide
+ https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf
+
+[5] Kernel code for the unified device
+ property interface can be found in
+ include/linux/property.h and drivers/base/property.c.
+
+
+Authors
+-------
+- Al Stone <al.stone@linaro.org>
+- Graeme Gregory <graeme.gregory@linaro.org>
+- Hanjun Guo <hanjun.guo@linaro.org>
+
+- Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/arch/arm64/arm-cca.rst b/Documentation/arch/arm64/arm-cca.rst
new file mode 100644
index 000000000000..c48b7d4ab6bd
--- /dev/null
+++ b/Documentation/arch/arm64/arm-cca.rst
@@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Arm Confidential Compute Architecture
+=====================================
+
+Arm systems that support the Realm Management Extension (RME) contain
+hardware to allow a VM guest to be run in a way which protects the code
+and data of the guest from the hypervisor. It extends the older "two
+world" model (Normal and Secure World) into four worlds: Normal, Secure,
+Root and Realm. Linux can then also be run as a guest to a monitor
+running in the Realm world.
+
+The monitor running in the Realm world is known as the Realm Management
+Monitor (RMM) and implements the Realm Management Monitor
+specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
+in EL2 and manages the stage 2 page tables etc of the guests running in
+Realm world), however much of the control is handled by a hypervisor
+running in the Normal World. The Normal World hypervisor uses the Realm
+Management Interface (RMI) defined by the RMM specification to request
+the RMM to perform operations (e.g. mapping memory or executing a vCPU).
+
+The RMM defines an environment for guests where the address space (IPA)
+is split into two. The lower half is protected - any memory that is
+mapped in this half cannot be seen by the Normal World and the RMM
+restricts what operations the Normal World can perform on this memory
+(e.g. the Normal World cannot replace pages in this region without the
+guest's cooperation). The upper half is shared, the Normal World is free
+to make changes to the pages in this region, and is able to emulate MMIO
+devices in this region too.
+
+A guest running in a Realm may also communicate with the RMM using the
+Realm Services Interface (RSI) to request changes in its environment or
+to perform attestation about its environment. In particular it may
+request that areas of the protected address space are transitioned
+between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
+guest to give up memory to be returned to the Normal World, or to
+request new memory from the Normal World. Without an explicit request
+from the Realm guest the RMM will otherwise prevent the Normal World
+from making these changes.
+
+Linux as a Realm Guest
+----------------------
+
+To run Linux as a guest within a Realm, the following must be provided
+either by the VMM or by a `boot loader` run in the Realm before Linux:
+
+ * All protected RAM described to Linux (by DT or ACPI) must be marked
+ RIPAS RAM before handing control over to Linux.
+
+ * MMIO devices must be either unprotected (e.g. emulated by the Normal
+ World) or marked RIPAS DEV.
+
+ * MMIO devices emulated by the Normal World and used very early in boot
+ (specifically earlycon) must be specified in the upper half of IPA.
+ For earlycon this can be done by specifying the address on the
+ command line, e.g. with an IPA size of 33 bits and the base address
+ of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
+
+ * Linux will use bounce buffers for communicating with unprotected
+ devices. It will transition some protected memory to RIPAS EMPTY and
+ expect to be able to access unprotected pages at the same IPA address
+ but with the highest valid IPA bit set. The expectation is that the
+ VMM will remove the physical pages from the protected mapping and
+ provide those pages as unprotected pages.
+
+References
+----------
+[1] https://developer.arm.com/documentation/den0137/
diff --git a/Documentation/arch/arm64/asymmetric-32bit.rst b/Documentation/arch/arm64/asymmetric-32bit.rst
new file mode 100644
index 000000000000..57b8d7476f71
--- /dev/null
+++ b/Documentation/arch/arm64/asymmetric-32bit.rst
@@ -0,0 +1,163 @@
+======================
+Asymmetric 32-bit SoCs
+======================
+
+Author: Will Deacon <will@kernel.org>
+
+This document describes the impact of asymmetric 32-bit SoCs on the
+execution of 32-bit (``AArch32``) applications.
+
+Date: 2021-05-17
+
+Introduction
+============
+
+Some Armv9 SoCs suffer from a big.LITTLE misfeature where only a subset
+of the CPUs are capable of executing 32-bit user applications. On such
+a system, Linux by default treats the asymmetry as a "mismatch" and
+disables support for both the ``PER_LINUX32`` personality and
+``execve(2)`` of 32-bit ELF binaries, with the latter returning
+``-ENOEXEC``. If the mismatch is detected during late onlining of a
+64-bit-only CPU, then the onlining operation fails and the new CPU is
+unavailable for scheduling.
+
+Surprisingly, these SoCs have been produced with the intention of
+running legacy 32-bit binaries. Unsurprisingly, that doesn't work very
+well with the default behaviour of Linux.
+
+It seems inevitable that future SoCs will drop 32-bit support
+altogether, so if you're stuck in the unenviable position of needing to
+run 32-bit code on one of these transitionary platforms then you would
+be wise to consider alternatives such as recompilation, emulation or
+retirement. If neither of those options are practical, then read on.
+
+Enabling kernel support
+=======================
+
+Since the kernel support is not completely transparent to userspace,
+allowing 32-bit tasks to run on an asymmetric 32-bit system requires an
+explicit "opt-in" and can be enabled by passing the
+``allow_mismatched_32bit_el0`` parameter on the kernel command-line.
+
+For the remainder of this document we will refer to an *asymmetric
+system* to mean an asymmetric 32-bit SoC running Linux with this kernel
+command-line option enabled.
+
+Userspace impact
+================
+
+32-bit tasks running on an asymmetric system behave in mostly the same
+way as on a homogeneous system, with a few key differences relating to
+CPU affinity.
+
+sysfs
+-----
+
+The subset of CPUs capable of running 32-bit tasks is described in
+``/sys/devices/system/cpu/aarch32_el0`` and is documented further in
+Documentation/ABI/testing/sysfs-devices-system-cpu.
+
+**Note:** CPUs are advertised by this file as they are detected and so
+late-onlining of 32-bit-capable CPUs can result in the file contents
+being modified by the kernel at runtime. Once advertised, CPUs are never
+removed from the file.
+
+``execve(2)``
+-------------
+
+On a homogeneous system, the CPU affinity of a task is preserved across
+``execve(2)``. This is not always possible on an asymmetric system,
+specifically when the new program being executed is 32-bit yet the
+affinity mask contains 64-bit-only CPUs. In this situation, the kernel
+determines the new affinity mask as follows:
+
+ 1. If the 32-bit-capable subset of the affinity mask is not empty,
+ then the affinity is restricted to that subset and the old affinity
+ mask is saved. This saved mask is inherited over ``fork(2)`` and
+ preserved across ``execve(2)`` of 32-bit programs.
+
+ **Note:** This step does not apply to ``SCHED_DEADLINE`` tasks.
+ See `SCHED_DEADLINE`_.
+
+ 2. Otherwise, the cpuset hierarchy of the task is walked until an
+ ancestor is found containing at least one 32-bit-capable CPU. The
+ affinity of the task is then changed to match the 32-bit-capable
+ subset of the cpuset determined by the walk.
+
+ 3. On failure (i.e. out of memory), the affinity is changed to the set
+ of all 32-bit-capable CPUs of which the kernel is aware.
+
+A subsequent ``execve(2)`` of a 64-bit program by the 32-bit task will
+invalidate the affinity mask saved in (1) and attempt to restore the CPU
+affinity of the task using the saved mask if it was previously valid.
+This restoration may fail due to intervening changes to the deadline
+policy or cpuset hierarchy, in which case the ``execve(2)`` continues
+with the affinity unchanged.
+
+Calls to ``sched_setaffinity(2)`` for a 32-bit task will consider only
+the 32-bit-capable CPUs of the requested affinity mask. On success, the
+affinity for the task is updated and any saved mask from a prior
+``execve(2)`` is invalidated.
+
+``SCHED_DEADLINE``
+------------------
+
+Explicit admission of a 32-bit deadline task to the default root domain
+(e.g. by calling ``sched_setattr(2)``) is rejected on an asymmetric
+32-bit system unless admission control is disabled by writing -1 to
+``/proc/sys/kernel/sched_rt_runtime_us``.
+
+``execve(2)`` of a 32-bit program from a 64-bit deadline task will
+return ``-ENOEXEC`` if the root domain for the task contains any
+64-bit-only CPUs and admission control is enabled. Concurrent offlining
+of 32-bit-capable CPUs may still necessitate the procedure described in
+`execve(2)`_, in which case step (1) is skipped and a warning is
+emitted on the console.
+
+**Note:** It is recommended that a set of 32-bit-capable CPUs are placed
+into a separate root domain if ``SCHED_DEADLINE`` is to be used with
+32-bit tasks on an asymmetric system. Failure to do so is likely to
+result in missed deadlines.
+
+Cpusets
+-------
+
+The affinity of a 32-bit task on an asymmetric system may include CPUs
+that are not explicitly allowed by the cpuset to which it is attached.
+This can occur as a result of the following two situations:
+
+ - A 64-bit task attached to a cpuset which allows only 64-bit CPUs
+ executes a 32-bit program.
+
+ - All of the 32-bit-capable CPUs allowed by a cpuset containing a
+ 32-bit task are offlined.
+
+In both of these cases, the new affinity is calculated according to step
+(2) of the process described in `execve(2)`_ and the cpuset hierarchy is
+unchanged irrespective of the cgroup version.
+
+CPU hotplug
+-----------
+
+On an asymmetric system, the first detected 32-bit-capable CPU is
+prevented from being offlined by userspace and any such attempt will
+return ``-EPERM``. Note that suspend is still permitted even if the
+primary CPU (i.e. CPU 0) is 64-bit-only.
+
+KVM
+---
+
+Although KVM will not advertise 32-bit EL0 support to any vCPUs on an
+asymmetric system, a broken guest at EL1 could still attempt to execute
+32-bit code at EL0. In this case, an exit from a vCPU thread in 32-bit
+mode will return to host userspace with an ``exit_reason`` of
+``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
+re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
+
+NOHZ FULL
+---------
+
+To avoid perturbing an adaptive-ticks CPU (specified using
+``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
+are treated as 64-bit-only when support for asymmetric 32-bit systems
+is enabled.
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
new file mode 100644
index 000000000000..e4f953839f71
--- /dev/null
+++ b/Documentation/arch/arm64/booting.rst
@@ -0,0 +1,608 @@
+=====================
+Booting AArch64 Linux
+=====================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date : 07 September 2012
+
+This document is based on the ARM booting document by Russell King and
+is relevant to all public releases of the AArch64 Linux kernel.
+
+The AArch64 exception model is made up of a number of exception levels
+(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
+counterpart. EL2 is the hypervisor level, EL3 is the highest priority
+level and exists only in secure mode. Both are architecturally optional.
+
+For the purposes of this document, we will use the term `boot loader`
+simply to define all software that executes on the CPU(s) before control
+is passed to the Linux kernel. This may include secure monitor and
+hypervisor code, or it may just be a handful of instructions for
+preparing a minimal boot environment.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM
+2. Setup the device tree
+3. Decompress the kernel image
+4. Call the kernel image
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Requirement: MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system. It performs
+this in a machine dependent manner. (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+For Arm Confidential Compute Realms this includes ensuring that all
+protected RAM has a Realm IPA state (RIPAS) of "RAM".
+
+
+2. Setup the device tree
+-------------------------
+
+Requirement: MANDATORY
+
+The device tree blob (dtb) must be placed on an 8-byte boundary and must
+not exceed 2 megabytes in size. Since the dtb will be mapped cacheable
+using blocks of up to 2 megabytes in size, it must not be placed within
+any 2M region which must be mapped with any specific attributes.
+
+NOTE: versions prior to v4.2 also require that the DTB be placed within
+the 512 MB region starting at text_offset bytes below the kernel Image.
+
+3. Decompress the kernel image
+------------------------------
+
+Requirement: OPTIONAL
+
+The AArch64 kernel does not currently provide a decompressor and
+therefore requires decompression (gzip etc.) to be performed by the boot
+loader if a compressed Image target (e.g. Image.gz) is used. For
+bootloaders that do not implement this requirement, the uncompressed
+Image target is available instead.
+
+
+4. Call the kernel image
+------------------------
+
+Requirement: MANDATORY
+
+The decompressed kernel image contains a 64-byte header as follows::
+
+ u32 code0; /* Executable code */
+ u32 code1; /* Executable code */
+ u64 text_offset; /* Image load offset, little endian */
+ u64 image_size; /* Effective Image size, little endian */
+ u64 flags; /* kernel flags, little endian */
+ u64 res2 = 0; /* reserved */
+ u64 res3 = 0; /* reserved */
+ u64 res4 = 0; /* reserved */
+ u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */
+ u32 res5; /* reserved (used for PE COFF offset) */
+
+
+Header notes:
+
+- As of v3.17, all fields are little endian unless stated otherwise.
+
+- code0/code1 are responsible for branching to stext.
+
+- when booting through EFI, code0/code1 are initially skipped.
+ res5 is an offset to the PE header and the PE header has the EFI
+ entry point (efi_stub_entry). When the stub has done its work, it
+ jumps to code0 to resume the normal boot process.
+
+- Prior to v3.17, the endianness of text_offset was not specified. In
+ these cases image_size is zero and text_offset is 0x80000 in the
+ endianness of the kernel. Where image_size is non-zero image_size is
+ little-endian and must be respected. Where image_size is zero,
+ text_offset can be assumed to be 0x80000.
+
+- The flags field (introduced in v3.17) is a little-endian 64-bit field
+ composed as follows:
+
+ ============= ===============================================================
+ Bit 0 Kernel endianness. 1 if BE, 0 if LE.
+ Bit 1-2 Kernel Page size.
+
+ * 0 - Unspecified.
+ * 1 - 4K
+ * 2 - 16K
+ * 3 - 64K
+ Bit 3 Kernel physical placement
+
+ 0
+ 2MB aligned base should be as close as possible
+ to the base of DRAM, since memory below it is not
+ accessible via the linear mapping
+ 1
+ 2MB aligned base such that all image_size bytes
+ counted from the start of the image are within
+ the 48-bit addressable range of physical memory
+ Bits 4-63 Reserved.
+ ============= ===============================================================
+
+- When image_size is zero, a bootloader should attempt to keep as much
+ memory as possible free for use by the kernel immediately after the
+ end of the kernel image. The amount of space required will vary
+ depending on selected features, and is effectively unbound.
+
+The Image must be placed text_offset bytes from a 2MB aligned base
+address anywhere in usable system RAM and called there. The region
+between the 2 MB aligned base address and the start of the image has no
+special significance to the kernel, and may be used for other purposes.
+At least image_size bytes from the start of the image must be free for
+use by the kernel.
+NOTE: versions prior to v4.6 cannot make use of memory below the
+physical offset of the Image so it is recommended that the Image be
+placed as close as possible to the start of system RAM.
+
+If an initrd/initramfs is passed to the kernel at boot, it must reside
+entirely within a 1 GB aligned physical memory window of up to 32 GB in
+size that fully covers the kernel Image as well.
+
+Any memory described to the kernel (even that below the start of the
+image) which is not marked as reserved from the kernel (e.g., with a
+memreserve region in the device tree) will be considered as available to
+the kernel.
+
+Before jumping into the kernel, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+ corrupted by bogus network packets or disk data. This will save
+ you many hours of debug.
+
+- Primary CPU general-purpose register settings:
+
+ - x0 = physical address of device tree blob (dtb) in system RAM.
+ - x1 = 0 (reserved for future use)
+ - x2 = 0 (reserved for future use)
+ - x3 = 0 (reserved for future use)
+
+- CPU mode
+
+ All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
+ IRQ and FIQ).
+ The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
+ to have access to the virtualisation extensions), or in EL1.
+
+- Caches, MMUs
+
+ The MMU must be off.
+
+ The instruction cache may be on or off, and must not hold any stale
+ entries corresponding to the loaded kernel image.
+
+ The address range corresponding to the loaded kernel image must be
+ cleaned to the PoC. In the presence of a system cache or other
+ coherent masters with caches enabled, this will typically require
+ cache maintenance by VA rather than set/way operations.
+ System caches which respect the architected cache maintenance by VA
+ operations must be configured and may be enabled.
+ System caches which do not respect architected cache maintenance by VA
+ operations (not recommended) must be configured and disabled.
+
+- Architected timers
+
+ CNTFRQ must be programmed with the timer frequency and CNTVOFF must
+ be programmed with a consistent value on all CPUs. If entering the
+ kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
+ available.
+
+- Coherency
+
+ All CPUs to be booted by the kernel must be part of the same coherency
+ domain on entry to the kernel. This may require IMPLEMENTATION DEFINED
+ initialisation to enable the receiving of maintenance operations on
+ each CPU.
+
+- System registers
+
+ All writable architected system registers at or below the exception
+ level where the kernel image will be entered must be initialised by
+ software at a higher exception level to prevent execution in an UNKNOWN
+ state.
+
+ For all systems:
+ - If EL3 is present:
+
+ - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
+ executing on.
+ - The value of SCR_EL3.FIQ must be the same as the one present at boot
+ time whenever the kernel is executing.
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.HCE (bit 8) must be initialised to 0b1.
+
+ For systems with a GICv5 interrupt controller to be used in v5 mode:
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - ICH_HFGRTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_HMRn_EL1 (bit 16) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IAFFIDR_EL1 (bit 7) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HPPIR_EL1 (bit 4) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HAPR_EL1 (bit 3) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IDRn_EL1 (bit 1) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGWTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGITR_EL2.GICRCDNMIA (bit 10) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICRCDIA (bit 9) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDI (bit 8) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEOI (bit 7) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDHM (bit 6) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDRCFG (bit 5) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPEND (bit 4) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDAFF (bit 3) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPRI (bit 2) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDIS (bit 1) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEN (bit 0) must be initialised to 0b1.
+
+ - The DT or ACPI tables must describe a GICv5 interrupt controller.
+
+ For systems with a GICv3 interrupt controller to be used in v3 mode:
+ - If EL3 is present:
+
+ - ICC_SRE_EL3.Enable (bit 3) must be initialised to 0b1.
+ - ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+ - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
+ all CPUs the kernel is executing on, and must stay constant
+ for the lifetime of the kernel.
+
+ - If the kernel is entered at EL1:
+
+ - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1
+ - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
+
+ - The DT or ACPI tables must describe a GICv3 interrupt controller.
+
+ For systems with a GICv3 interrupt controller to be used in
+ compatibility (v2) mode:
+
+ - If EL3 is present:
+
+ ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b0.
+
+ - If the kernel is entered at EL1:
+
+ ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b0.
+
+ - The DT or ACPI tables must describe a GICv2 interrupt controller.
+
+ For CPUs with pointer authentication functionality:
+
+ - If EL3 is present:
+
+ - SCR_EL3.APK (bit 16) must be initialised to 0b1
+ - SCR_EL3.API (bit 17) must be initialised to 0b1
+
+ - If the kernel is entered at EL1:
+
+ - HCR_EL2.APK (bit 40) must be initialised to 0b1
+ - HCR_EL2.API (bit 41) must be initialised to 0b1
+
+ For CPUs with Activity Monitors Unit v1 (AMUv1) extension present:
+
+ - If EL3 is present:
+
+ - CPTR_EL3.TAM (bit 30) must be initialised to 0b0
+ - CPTR_EL2.TAM (bit 30) must be initialised to 0b0
+ - AMCNTENSET0_EL0 must be initialised to 0b1111
+ - AMCNTENSET1_EL0 must be initialised to a platform specific value
+ having 0b1 set for the corresponding bit for each of the auxiliary
+ counters present.
+
+ - If the kernel is entered at EL1:
+
+ - AMCNTENSET0_EL0 must be initialised to 0b1111
+ - AMCNTENSET1_EL0 must be initialised to a platform specific value
+ having 0b1 set for the corresponding bit for each of the auxiliary
+ counters present.
+
+ For CPUs with the Fine Grained Traps (FEAT_FGT) extension present:
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+
+ For CPUs with the Fine Grained Traps 2 (FEAT_FGT2) extension present:
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.FGTEn2 (bit 59) must be initialised to 0b1.
+
+ For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.HXEn (bit 38) must be initialised to 0b1.
+
+ For CPUs with Advanced SIMD and floating point support:
+
+ - If EL3 is present:
+
+ - CPTR_EL3.TFP (bit 10) must be initialised to 0b0.
+
+ - If EL2 is present and the kernel is entered at EL1:
+
+ - CPTR_EL2.TFP (bit 10) must be initialised to 0b0.
+
+ For CPUs with the Scalable Vector Extension (FEAT_SVE) present:
+
+ - if EL3 is present:
+
+ - CPTR_EL3.EZ (bit 8) must be initialised to 0b1.
+
+ - ZCR_EL3.LEN must be initialised to the same value for all CPUs the
+ kernel is executed on.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - CPTR_EL2.TZ (bit 8) must be initialised to 0b0.
+
+ - CPTR_EL2.ZEN (bits 17:16) must be initialised to 0b11.
+
+ - ZCR_EL2.LEN must be initialised to the same value for all CPUs the
+ kernel will execute on.
+
+ For CPUs with the Scalable Matrix Extension (FEAT_SME):
+
+ - If EL3 is present:
+
+ - CPTR_EL3.ESM (bit 12) must be initialised to 0b1.
+
+ - SCR_EL3.EnTP2 (bit 41) must be initialised to 0b1.
+
+ - SMCR_EL3.LEN must be initialised to the same value for all CPUs the
+ kernel will execute on.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - CPTR_EL2.TSM (bit 12) must be initialised to 0b0.
+
+ - CPTR_EL2.SMEN (bits 25:24) must be initialised to 0b11.
+
+ - SCTLR_EL2.EnTP2 (bit 60) must be initialised to 0b1.
+
+ - SMCR_EL2.LEN must be initialised to the same value for all CPUs the
+ kernel will execute on.
+
+ - HWFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+
+ - HWFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+
+ - HWFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+
+ - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+
+ For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64):
+
+ - If EL3 is present:
+
+ - SMCR_EL3.FA64 (bit 31) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.
+
+ For CPUs with the Memory Tagging Extension feature (FEAT_MTE2):
+
+ - If EL3 is present:
+
+ - SCR_EL3.ATA (bit 26) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCR_EL2.ATA (bit 56) must be initialised to 0b1.
+
+ For CPUs with the Scalable Matrix Extension version 2 (FEAT_SME2):
+
+ - If EL3 is present:
+
+ - SMCR_EL3.EZT0 (bit 30) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
+
+ For CPUs with the Branch Record Buffer Extension (FEAT_BRBE):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.SBRBE (bits 33:32) must be initialised to 0b01 or 0b11.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - BRBCR_EL2.CC (bit 3) must be initialised to 0b1.
+ - BRBCR_EL2.MPRED (bit 4) must be initialised to 0b1.
+
+ - HDFGRTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
+ - HDFGRTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
+ - HDFGRTR_EL2.nBRBIDR (bit 59) must be initialised to 0b1.
+
+ - HDFGWTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
+ - HDFGWTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
+
+ - HFGITR_EL2.nBRBIALL (bit 56) must be initialised to 0b1.
+ - HFGITR_EL2.nBRBINJ (bit 55) must be initialised to 0b1.
+
+ For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPM2 (bit 7) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+ - HDFGWTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+ For CPUs with SPE data source filtering (FEAT_SPE_FDS):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPMS3 (bit 42) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+
+ For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
+
+ - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
+ must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
+
+ For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
+
+ - If EL3 is present:
+
+ - SCR_EL3.TCR2En (bit 43) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.TCR2En (bit 14) must be initialised to 0b1.
+
+ For CPUs with the Stage 1 Permission Indirection Extension feature (FEAT_S1PIE):
+
+ - If EL3 is present:
+
+ - SCR_EL3.PIEn (bit 45) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HFGRTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+
+ - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+
+ - For CPUs with Guarded Control Stacks (FEAT_GCS):
+
+ - GCSCR_EL1 must be initialised to 0.
+
+ - GCSCRE0_EL1 must be initialised to 0.
+
+ - If EL3 is present:
+
+ - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
+
+ - If EL2 is present:
+
+ - GCSCR_EL2 must be initialised to 0.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.GCSEn must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
+ - For CPUs with debug architecture i.e FEAT_Debugv8pN (all versions):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.TDA (bit 9) must be initialized to 0b0
+
+ - For CPUs with FEAT_PMUv3:
+
+ - If EL3 is present:
+
+ - MDCR_EL3.TPM (bit 6) must be initialized to 0b0
+
+The requirements described above for CPU mode, caches, MMUs, architected
+timers, coherency and system registers apply to all CPUs. All CPUs must
+enter the kernel in the same exception level. Where the values documented
+disable traps it is permissible for these traps to be enabled so long as
+those traps are handled transparently by higher exception levels as though
+the values documented were set.
+
+The boot loader is expected to enter the kernel on each CPU in the
+following manner:
+
+- The primary CPU must jump directly to the first instruction of the
+ kernel image. The device tree blob passed by this CPU must contain
+ an 'enable-method' property for each cpu node. The supported
+ enable-methods are described below.
+
+ It is expected that the bootloader will generate these device tree
+ properties and insert them into the blob prior to kernel entry.
+
+- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
+ property in their cpu node. This property identifies a
+ naturally-aligned 64-bit zero-initalised memory location.
+
+ These CPUs should spin outside of the kernel in a reserved area of
+ memory (communicated to the kernel by a /memreserve/ region in the
+ device tree) polling their cpu-release-addr location, which must be
+ contained in the reserved region. A wfe instruction may be inserted
+ to reduce the overhead of the busy-loop and a sev will be issued by
+ the primary CPU. When a read of the location pointed to by the
+ cpu-release-addr returns a non-zero value, the CPU must jump to this
+ value. The value will be written as a single 64-bit little-endian
+ value, so CPUs must convert the read value to their native endianness
+ before jumping to it.
+
+- CPUs with a "psci" enable method should remain outside of
+ the kernel (i.e. outside of the regions of memory described to the
+ kernel in the memory node, or in a reserved area of memory described
+ to the kernel by a /memreserve/ region in the device tree). The
+ kernel will issue CPU_ON calls as described in ARM document number ARM
+ DEN 0022A ("Power State Coordination Interface System Software on ARM
+ processors") to bring CPUs into the kernel.
+
+ The device tree should contain a 'psci' node, as described in
+ Documentation/devicetree/bindings/arm/psci.yaml.
+
+- Secondary CPU general-purpose register settings
+
+ - x0 = 0 (reserved for future use)
+ - x1 = 0 (reserved for future use)
+ - x2 = 0 (reserved for future use)
+ - x3 = 0 (reserved for future use)
diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst
new file mode 100644
index 000000000000..add66afc7b03
--- /dev/null
+++ b/Documentation/arch/arm64/cpu-feature-registers.rst
@@ -0,0 +1,419 @@
+===========================
+ARM64 CPU Feature Registers
+===========================
+
+Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+
+This file describes the ABI for exporting the AArch64 CPU ID/feature
+registers to userspace. The availability of this ABI is advertised
+via the HWCAP_CPUID in HWCAPs.
+
+1. Motivation
+-------------
+
+The ARM architecture defines a set of feature registers, which describe
+the capabilities of the CPU/system. Access to these system registers is
+restricted from EL0 and there is no reliable way for an application to
+extract this information to make better decisions at runtime. There is
+limited information available to the application via HWCAPs, however
+there are some issues with their usage.
+
+ a) Any change to the HWCAPs requires an update to userspace (e.g libc)
+ to detect the new changes, which can take a long time to appear in
+ distributions. Exposing the registers allows applications to get the
+ information without requiring updates to the toolchains.
+
+ b) Access to HWCAPs is sometimes limited (e.g prior to libc, or
+ when ld is initialised at startup time).
+
+ c) HWCAPs cannot represent non-boolean information effectively. The
+ architecture defines a canonical format for representing features
+ in the ID registers; this is well defined and is capable of
+ representing all valid architecture variations.
+
+
+2. Requirements
+---------------
+
+ a) Safety:
+
+ Applications should be able to use the information provided by the
+ infrastructure to run safely across the system. This has greater
+ implications on a system with heterogeneous CPUs.
+ The infrastructure exports a value that is safe across all the
+ available CPU on the system.
+
+ e.g, If at least one CPU doesn't implement CRC32 instructions, while
+ others do, we should report that the CRC32 is not implemented.
+ Otherwise an application could crash when scheduled on the CPU
+ which doesn't support CRC32.
+
+ b) Security:
+
+ Applications should only be able to receive information that is
+ relevant to the normal operation in userspace. Hence, some of the
+ fields are masked out(i.e, made invisible) and their values are set to
+ indicate the feature is 'not supported'. See Section 4 for the list
+ of visible features. Also, the kernel may manipulate the fields
+ based on what it supports. e.g, If FP is not supported by the
+ kernel, the values could indicate that the FP is not available
+ (even when the CPU provides it).
+
+ c) Implementation Defined Features
+
+ The infrastructure doesn't expose any register which is
+ IMPLEMENTATION DEFINED as per ARMv8-A Architecture.
+
+ d) CPU Identification:
+
+ MIDR_EL1 is exposed to help identify the processor. On a
+ heterogeneous system, this could be racy (just like getcpu()). The
+ process could be migrated to another CPU by the time it uses the
+ register value, unless the CPU affinity is set. Hence, there is no
+ guarantee that the value reflects the processor that it is
+ currently executing on. REVIDR and AIDR are not exposed due to this
+ constraint, as these registers only make sense in conjunction with
+ the MIDR. Alternately, MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are exposed
+ via sysfs at::
+
+ /sys/devices/system/cpu/cpu$ID/regs/identification/
+ \- midr_el1
+ \- revidr_el1
+ \- aidr_el1
+
+3. Implementation
+--------------------
+
+The infrastructure is built on the emulation of the 'MRS' instruction.
+Accessing a restricted system register from an application generates an
+exception and ends up in SIGILL being delivered to the process.
+The infrastructure hooks into the exception handler and emulates the
+operation if the source belongs to the supported system register space.
+
+The infrastructure emulates only the following system register space::
+
+ Op0=3, Op1=0, CRn=0, CRm=0,2,3,4,5,6,7
+
+(See Table C5-6 'System instruction encodings for non-Debug System
+register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
+registers).
+
+The following rules are applied to the value returned by the
+infrastructure:
+
+ a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0.
+ b) The value of a reserved field is populated with the reserved
+ value as defined by the architecture.
+ c) The value of a 'visible' field holds the system wide safe value
+ for the particular feature (except for MIDR_EL1, see section 4).
+ d) All other fields (i.e, invisible fields) are set to indicate
+ the feature is missing (as defined by the architecture).
+
+4. List of registers with visible features
+-------------------------------------------
+
+ 1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | RNDR | [63-60] | y |
+ +------------------------------+---------+---------+
+ | TS | [55-52] | y |
+ +------------------------------+---------+---------+
+ | FHM | [51-48] | y |
+ +------------------------------+---------+---------+
+ | DP | [47-44] | y |
+ +------------------------------+---------+---------+
+ | SM4 | [43-40] | y |
+ +------------------------------+---------+---------+
+ | SM3 | [39-36] | y |
+ +------------------------------+---------+---------+
+ | SHA3 | [35-32] | y |
+ +------------------------------+---------+---------+
+ | RDM | [31-28] | y |
+ +------------------------------+---------+---------+
+ | ATOMICS | [23-20] | y |
+ +------------------------------+---------+---------+
+ | CRC32 | [19-16] | y |
+ +------------------------------+---------+---------+
+ | SHA2 | [15-12] | y |
+ +------------------------------+---------+---------+
+ | SHA1 | [11-8] | y |
+ +------------------------------+---------+---------+
+ | AES | [7-4] | y |
+ +------------------------------+---------+---------+
+
+
+ 2) ID_AA64PFR0_EL1 - Processor Feature Register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | DIT | [51-48] | y |
+ +------------------------------+---------+---------+
+ | MPAM | [43-40] | n |
+ +------------------------------+---------+---------+
+ | SVE | [35-32] | y |
+ +------------------------------+---------+---------+
+ | GIC | [27-24] | n |
+ +------------------------------+---------+---------+
+ | AdvSIMD | [23-20] | y |
+ +------------------------------+---------+---------+
+ | FP | [19-16] | y |
+ +------------------------------+---------+---------+
+ | EL3 | [15-12] | n |
+ +------------------------------+---------+---------+
+ | EL2 | [11-8] | n |
+ +------------------------------+---------+---------+
+ | EL1 | [7-4] | n |
+ +------------------------------+---------+---------+
+ | EL0 | [3-0] | n |
+ +------------------------------+---------+---------+
+
+
+ 3) ID_AA64PFR1_EL1 - Processor Feature Register 1
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | SME | [27-24] | y |
+ +------------------------------+---------+---------+
+ | MTE | [11-8] | y |
+ +------------------------------+---------+---------+
+ | SSBS | [7-4] | y |
+ +------------------------------+---------+---------+
+ | BT | [3-0] | y |
+ +------------------------------+---------+---------+
+
+
+ 4) MIDR_EL1 - Main ID Register
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | Implementer | [31-24] | y |
+ +------------------------------+---------+---------+
+ | Variant | [23-20] | y |
+ +------------------------------+---------+---------+
+ | Architecture | [19-16] | y |
+ +------------------------------+---------+---------+
+ | PartNum | [15-4] | y |
+ +------------------------------+---------+---------+
+ | Revision | [3-0] | y |
+ +------------------------------+---------+---------+
+
+ NOTE: The 'visible' fields of MIDR_EL1 will contain the value
+ as available on the CPU where it is fetched and is not a system
+ wide safe value.
+
+ 5) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | I8MM | [55-52] | y |
+ +------------------------------+---------+---------+
+ | DGH | [51-48] | y |
+ +------------------------------+---------+---------+
+ | BF16 | [47-44] | y |
+ +------------------------------+---------+---------+
+ | SB | [39-36] | y |
+ +------------------------------+---------+---------+
+ | FRINTTS | [35-32] | y |
+ +------------------------------+---------+---------+
+ | GPI | [31-28] | y |
+ +------------------------------+---------+---------+
+ | GPA | [27-24] | y |
+ +------------------------------+---------+---------+
+ | LRCPC | [23-20] | y |
+ +------------------------------+---------+---------+
+ | FCMA | [19-16] | y |
+ +------------------------------+---------+---------+
+ | JSCVT | [15-12] | y |
+ +------------------------------+---------+---------+
+ | API | [11-8] | y |
+ +------------------------------+---------+---------+
+ | APA | [7-4] | y |
+ +------------------------------+---------+---------+
+ | DPB | [3-0] | y |
+ +------------------------------+---------+---------+
+
+ 6) ID_AA64MMFR0_EL1 - Memory model feature register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | ECV | [63-60] | y |
+ +------------------------------+---------+---------+
+
+ 7) ID_AA64MMFR2_EL1 - Memory model feature register 2
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | AT | [35-32] | y |
+ +------------------------------+---------+---------+
+
+ 8) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | F64MM | [59-56] | y |
+ +------------------------------+---------+---------+
+ | F32MM | [55-52] | y |
+ +------------------------------+---------+---------+
+ | I8MM | [47-44] | y |
+ +------------------------------+---------+---------+
+ | SM4 | [43-40] | y |
+ +------------------------------+---------+---------+
+ | SHA3 | [35-32] | y |
+ +------------------------------+---------+---------+
+ | B16B16 | [27-24] | y |
+ +------------------------------+---------+---------+
+ | BF16 | [23-20] | y |
+ +------------------------------+---------+---------+
+ | BitPerm | [19-16] | y |
+ +------------------------------+---------+---------+
+ | AES | [7-4] | y |
+ +------------------------------+---------+---------+
+ | SVEVer | [3-0] | y |
+ +------------------------------+---------+---------+
+
+ 8) ID_AA64MMFR1_EL1 - Memory model feature register 1
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | AFP | [47-44] | y |
+ +------------------------------+---------+---------+
+
+ 9) ID_AA64ISAR2_EL1 - Instruction set attribute register 2
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | CSSC | [55-52] | y |
+ +------------------------------+---------+---------+
+ | RPRFM | [51-48] | y |
+ +------------------------------+---------+---------+
+ | BC | [23-20] | y |
+ +------------------------------+---------+---------+
+ | MOPS | [19-16] | y |
+ +------------------------------+---------+---------+
+ | APA3 | [15-12] | y |
+ +------------------------------+---------+---------+
+ | GPA3 | [11-8] | y |
+ +------------------------------+---------+---------+
+ | RPRES | [7-4] | y |
+ +------------------------------+---------+---------+
+ | WFXT | [3-0] | y |
+ +------------------------------+---------+---------+
+
+ 10) MVFR0_EL1 - AArch32 Media and VFP Feature Register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | FPDP | [11-8] | y |
+ +------------------------------+---------+---------+
+
+ 11) MVFR1_EL1 - AArch32 Media and VFP Feature Register 1
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | SIMDFMAC | [31-28] | y |
+ +------------------------------+---------+---------+
+ | SIMDSP | [19-16] | y |
+ +------------------------------+---------+---------+
+ | SIMDInt | [15-12] | y |
+ +------------------------------+---------+---------+
+ | SIMDLS | [11-8] | y |
+ +------------------------------+---------+---------+
+
+ 12) ID_ISAR5_EL1 - AArch32 Instruction Set Attribute Register 5
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | CRC32 | [19-16] | y |
+ +------------------------------+---------+---------+
+ | SHA2 | [15-12] | y |
+ +------------------------------+---------+---------+
+ | SHA1 | [11-8] | y |
+ +------------------------------+---------+---------+
+ | AES | [7-4] | y |
+ +------------------------------+---------+---------+
+
+
+Appendix I: Example
+-------------------
+
+::
+
+ /*
+ * Sample program to demonstrate the MRS emulation ABI.
+ *
+ * Copyright (C) 2015-2016, ARM Ltd
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+ #include <asm/hwcap.h>
+ #include <stdio.h>
+ #include <sys/auxv.h>
+
+ #define get_cpu_ftr(id) ({ \
+ unsigned long __val; \
+ asm("mrs %0, "#id : "=r" (__val)); \
+ printf("%-20s: 0x%016lx\n", #id, __val); \
+ })
+
+ int main(void)
+ {
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
+ fputs("CPUID registers unavailable\n", stderr);
+ return 1;
+ }
+
+ get_cpu_ftr(ID_AA64ISAR0_EL1);
+ get_cpu_ftr(ID_AA64ISAR1_EL1);
+ get_cpu_ftr(ID_AA64MMFR0_EL1);
+ get_cpu_ftr(ID_AA64MMFR1_EL1);
+ get_cpu_ftr(ID_AA64PFR0_EL1);
+ get_cpu_ftr(ID_AA64PFR1_EL1);
+ get_cpu_ftr(ID_AA64DFR0_EL1);
+ get_cpu_ftr(ID_AA64DFR1_EL1);
+
+ get_cpu_ftr(MIDR_EL1);
+ get_cpu_ftr(MPIDR_EL1);
+ get_cpu_ftr(REVIDR_EL1);
+
+ #if 0
+ /* Unexposed register access causes SIGILL */
+ get_cpu_ftr(ID_MMFR0_EL1);
+ #endif
+
+ return 0;
+ }
diff --git a/Documentation/arch/arm64/cpu-hotplug.rst b/Documentation/arch/arm64/cpu-hotplug.rst
new file mode 100644
index 000000000000..8fb438bf7781
--- /dev/null
+++ b/Documentation/arch/arm64/cpu-hotplug.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _cpuhp_index:
+
+====================
+CPU Hotplug and ACPI
+====================
+
+CPU hotplug in the arm64 world is commonly used to describe the kernel taking
+CPUs online/offline using PSCI. This document is about ACPI firmware allowing
+CPUs that were not available during boot to be added to the system later.
+
+``possible`` and ``present`` refer to the state of the CPU as seen by linux.
+
+
+CPU Hotplug on physical systems - CPUs not present at boot
+----------------------------------------------------------
+
+Physical systems need to mark a CPU that is ``possible`` but not ``present`` as
+being ``present``. An example would be a dual socket machine, where the package
+in one of the sockets can be replaced while the system is running.
+
+This is not supported.
+
+In the arm64 world CPUs are not a single device but a slice of the system.
+There are no systems that support the physical addition (or removal) of CPUs
+while the system is running, and ACPI is not able to sufficiently describe
+them.
+
+e.g. New CPUs come with new caches, but the platform's cache topology is
+described in a static table, the PPTT. How caches are shared between CPUs is
+not discoverable, and must be described by firmware.
+
+e.g. The GIC redistributor for each CPU must be accessed by the driver during
+boot to discover the system wide supported features. ACPI's MADT GICC
+structures can describe a redistributor associated with a disabled CPU, but
+can't describe whether the redistributor is accessible, only that it is not
+'always on'.
+
+arm64's ACPI tables assume that everything described is ``present``.
+
+
+CPU Hotplug on virtual systems - CPUs not enabled at boot
+---------------------------------------------------------
+
+Virtual systems have the advantage that all the properties the system will
+ever have can be described at boot. There are no power-domain considerations
+as such devices are emulated.
+
+CPU Hotplug on virtual systems is supported. It is distinct from physical
+CPU Hotplug as all resources are described as ``present``, but CPUs may be
+marked as disabled by firmware. Only the CPU's online/offline behaviour is
+influenced by firmware. An example is where a virtual machine boots with a
+single CPU, and additional CPUs are added once a cloud orchestrator deploys
+the workload.
+
+For a virtual machine, the VMM (e.g. Qemu) plays the part of firmware.
+
+Virtual hotplug is implemented as a firmware policy affecting which CPUs can be
+brought online. Firmware can enforce its policy via PSCI's return codes. e.g.
+``DENIED``.
+
+The ACPI tables must describe all the resources of the virtual machine. CPUs
+that firmware wishes to disable either from boot (or later) should not be
+``enabled`` in the MADT GICC structures, but should have the ``online capable``
+bit set, to indicate they can be enabled later. The boot CPU must be marked as
+``enabled``. The 'always on' GICR structure must be used to describe the
+redistributors.
+
+CPUs described as ``online capable`` but not ``enabled`` can be set to enabled
+by the DSDT's Processor object's _STA method. On virtual systems the _STA method
+must always report the CPU as ``present``. Changes to the firmware policy can
+be notified to the OS via device-check or eject-request.
+
+CPUs described as ``enabled`` in the static table, should not have their _STA
+modified dynamically by firmware. Soft-restart features such as kexec will
+re-read the static properties of the system from these static tables, and
+may malfunction if these no longer describe the running system. Linux will
+re-discover the dynamic properties of the system from the _STA method later
+during boot.
diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst
new file mode 100644
index 000000000000..a15df4956849
--- /dev/null
+++ b/Documentation/arch/arm64/elf_hwcaps.rst
@@ -0,0 +1,452 @@
+.. _elf_hwcaps_index:
+
+================
+ARM64 ELF hwcaps
+================
+
+This document describes the usage and semantics of the arm64 ELF hwcaps.
+
+
+1. Introduction
+---------------
+
+Some hardware or software features are only available on some CPU
+implementations, and/or with certain kernel configurations, but have no
+architected discovery mechanism available to userspace code at EL0. The
+kernel exposes the presence of these features to userspace through a set
+of flags called hwcaps, exposed in the auxiliary vector.
+
+Userspace software can test for features by acquiring the AT_HWCAP,
+AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
+whether the relevant flags are set, e.g.::
+
+ bool floating_point_is_present(void)
+ {
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+ if (hwcaps & HWCAP_FP)
+ return true;
+
+ return false;
+ }
+
+Where software relies on a feature described by a hwcap, it should check
+the relevant hwcap flag to verify that the feature is present before
+attempting to make use of the feature.
+
+Features cannot be probed reliably through other means. When a feature
+is not available, attempting to use it may result in unpredictable
+behaviour, and is not guaranteed to result in any reliable indication
+that the feature is unavailable, such as a SIGILL.
+
+
+2. Interpretation of hwcaps
+---------------------------
+
+The majority of hwcaps are intended to indicate the presence of features
+which are described by architected ID registers inaccessible to
+userspace code at EL0. These hwcaps are defined in terms of ID register
+fields, and should be interpreted with reference to the definition of
+these fields in the ARM Architecture Reference Manual (ARM ARM).
+
+Such hwcaps are described below in the form::
+
+ Functionality implied by idreg.field == val.
+
+Such hwcaps indicate the availability of functionality that the ARM ARM
+defines as being present when idreg.field has value val, but do not
+indicate that idreg.field is precisely equal to val, nor do they
+indicate the absence of functionality implied by other values of
+idreg.field.
+
+Other hwcaps may indicate the presence of features which cannot be
+described by ID registers alone. These may be described without
+reference to ID registers, and may refer to other documentation.
+
+
+3. The hwcaps exposed in AT_HWCAP
+---------------------------------
+
+HWCAP_FP
+ Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
+
+HWCAP_ASIMD
+ Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
+
+HWCAP_EVTSTRM
+ The generic timer is configured to generate events at a frequency of
+ approximately 10KHz.
+
+HWCAP_AES
+ Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
+
+HWCAP_PMULL
+ Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010.
+
+HWCAP_SHA1
+ Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
+
+HWCAP_SHA2
+ Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
+
+HWCAP_CRC32
+ Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
+
+HWCAP_ATOMICS
+ Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
+
+HWCAP_FPHP
+ Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
+
+HWCAP_ASIMDHP
+ Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
+
+HWCAP_CPUID
+ EL0 access to certain ID registers is available, to the extent
+ described by Documentation/arch/arm64/cpu-feature-registers.rst.
+
+ These ID registers may imply the availability of features.
+
+HWCAP_ASIMDRDM
+ Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
+
+HWCAP_JSCVT
+ Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
+
+HWCAP_FCMA
+ Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
+
+HWCAP_LRCPC
+ Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
+
+HWCAP_DCPOP
+ Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
+
+HWCAP_SHA3
+ Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
+
+HWCAP_SM3
+ Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
+
+HWCAP_SM4
+ Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
+
+HWCAP_ASIMDDP
+ Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
+
+HWCAP_SHA512
+ Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010.
+
+HWCAP_SVE
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
+
+HWCAP_ASIMDFHM
+ Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
+
+HWCAP_DIT
+ Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
+
+HWCAP_USCAT
+ Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
+
+HWCAP_ILRCPC
+ Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
+
+HWCAP_FLAGM
+ Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+
+HWCAP_SSBS
+ Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
+
+HWCAP_SB
+ Functionality implied by ID_AA64ISAR1_EL1.SB == 0b0001.
+
+HWCAP_PACA
+ Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
+ ID_AA64ISAR1_EL1.API == 0b0001, as described by
+ Documentation/arch/arm64/pointer-authentication.rst.
+
+HWCAP_PACG
+ Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
+ ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
+ Documentation/arch/arm64/pointer-authentication.rst.
+
+HWCAP_GCS
+ Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
+ described by Documentation/arch/arm64/gcs.rst.
+
+HWCAP_CMPBR
+ Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
+
+HWCAP_FPRCVT
+ Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
+
+HWCAP_F8MM8
+ Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
+
+HWCAP_F8MM4
+ Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
+
+HWCAP_SVE_F16MM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F16MM == 0b0001.
+
+HWCAP_SVE_ELTPERM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
+
+HWCAP_SVE_AES2
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0011.
+
+HWCAP_SVE_BFSCALE
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.B16B16 == 0b0010.
+
+HWCAP_SVE2P2
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0011.
+
+HWCAP_SME2P2
+ Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
+
+HWCAP_SME_SBITPERM
+ Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
+
+HWCAP_SME_AES
+ Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
+
+HWCAP_SME_SFEXPA
+ Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
+
+HWCAP_SME_STMOP
+ Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
+
+HWCAP_SME_SMOP4
+ Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
+
+HWCAP2_DCPODP
+ Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
+
+HWCAP2_SVE2
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0001.
+
+HWCAP2_SVEAES
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0001.
+
+HWCAP2_SVEPMULL
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0010.
+
+HWCAP2_SVEBITPERM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+
+HWCAP2_SVESHA3
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+
+HWCAP2_SVESM4
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SM4 == 0b0001.
+
+HWCAP2_FLAGM2
+ Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+
+HWCAP2_FRINT
+ Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
+
+HWCAP2_SVEI8MM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.I8MM == 0b0001.
+
+HWCAP2_SVEF32MM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F32MM == 0b0001.
+
+HWCAP2_SVEF64MM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F64MM == 0b0001.
+
+HWCAP2_SVEBF16
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BF16 == 0b0001.
+
+HWCAP2_I8MM
+ Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
+
+HWCAP2_BF16
+ Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001.
+
+HWCAP2_DGH
+ Functionality implied by ID_AA64ISAR1_EL1.DGH == 0b0001.
+
+HWCAP2_RNG
+ Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
+
+HWCAP2_BTI
+ Functionality implied by ID_AA64PFR1_EL1.BT == 0b0001.
+
+HWCAP2_MTE
+ Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
+ by Documentation/arch/arm64/memory-tagging-extension.rst.
+
+HWCAP2_ECV
+ Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
+
+HWCAP2_AFP
+ Functionality implied by ID_AA64MMFR1_EL1.AFP == 0b0001.
+
+HWCAP2_RPRES
+ Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
+
+HWCAP2_MTE3
+ Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
+ by Documentation/arch/arm64/memory-tagging-extension.rst.
+
+HWCAP2_SME
+ Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+ by Documentation/arch/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+ Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+ Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+ Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+ Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+ Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+ Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+ Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
+HWCAP2_WFXT
+ Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
+
+HWCAP2_EBF16
+ Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
+
+HWCAP2_SVE_EBF16
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BF16 == 0b0010.
+
+HWCAP2_CSSC
+ Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
+
+HWCAP2_RPRFM
+ Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
+
+HWCAP2_SVE2P1
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0010.
+
+HWCAP2_SME2
+ Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
+
+HWCAP2_SME2P1
+ Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0010.
+
+HWCAP2_SMEI16I32
+ Functionality implied by ID_AA64SMFR0_EL1.I16I32 == 0b0101
+
+HWCAP2_SMEBI32I32
+ Functionality implied by ID_AA64SMFR0_EL1.BI32I32 == 0b1
+
+HWCAP2_SMEB16B16
+ Functionality implied by ID_AA64SMFR0_EL1.B16B16 == 0b1
+
+HWCAP2_SMEF16F16
+ Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
+
+HWCAP2_MOPS
+ Functionality implied by ID_AA64ISAR2_EL1.MOPS == 0b0001.
+
+HWCAP2_HBC
+ Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
+
+HWCAP2_SVE_B16B16
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.B16B16 == 0b0001.
+
+HWCAP2_LRCPC3
+ Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
+
+HWCAP2_LSE128
+ Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
+
+HWCAP2_FPMR
+ Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001.
+
+HWCAP2_LUT
+ Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001.
+
+HWCAP2_FAMINMAX
+ Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001.
+
+HWCAP2_F8CVT
+ Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1.
+
+HWCAP2_F8FMA
+ Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1.
+
+HWCAP2_F8DP4
+ Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1.
+
+HWCAP2_F8DP2
+ Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1.
+
+HWCAP2_F8E4M3
+ Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1.
+
+HWCAP2_F8E5M2
+ Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1.
+
+HWCAP2_SME_LUTV2
+ Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1.
+
+HWCAP2_SME_F8F16
+ Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1.
+
+HWCAP2_SME_F8F32
+ Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1.
+
+HWCAP2_SME_SF8FMA
+ Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1.
+
+HWCAP2_SME_SF8DP4
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
+
+HWCAP2_SME_SF8DP2
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1.
+
+HWCAP2_SME_SF8DP4
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
+
+HWCAP2_POE
+ Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
+
+HWCAP3_MTE_FAR
+ Functionality implied by ID_AA64PFR2_EL1.MTEFAR == 0b0001.
+
+HWCAP3_MTE_STORE_ONLY
+ Functionality implied by ID_AA64PFR2_EL1.MTESTOREONLY == 0b0001.
+
+HWCAP3_LSFE
+ Functionality implied by ID_AA64ISAR3_EL1.LSFE == 0b0001
+
+
+4. Unused AT_HWCAP bits
+-----------------------
+
+For interoperation with userspace, the kernel guarantees that bits 62
+and 63 of AT_HWCAP will always be returned as 0.
diff --git a/Documentation/arch/arm64/features.rst b/Documentation/arch/arm64/features.rst
new file mode 100644
index 000000000000..03321f4309d0
--- /dev/null
+++ b/Documentation/arch/arm64/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features arm64
diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst
new file mode 100644
index 000000000000..226c0b008456
--- /dev/null
+++ b/Documentation/arch/arm64/gcs.rst
@@ -0,0 +1,227 @@
+===============================================
+Guarded Control Stack support for AArch64 Linux
+===============================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Guarded Control Stack (GCS) feature.
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+
+
+1. General
+-----------
+
+* GCS is an architecture feature intended to provide greater protection
+ against return oriented programming (ROP) attacks and to simplify the
+ implementation of features that need to collect stack traces such as
+ profiling.
+
+* When GCS is enabled a separate guarded control stack is maintained by the
+ PE which is writeable only through specific GCS operations. This
+ stores the call stack only, when a procedure call instruction is
+ performed the current PC is pushed onto the GCS and on RET the
+ address in the LR is verified against that on the top of the GCS.
+
+* When active the current GCS pointer is stored in the system register
+ GCSPR_EL0. This is readable by userspace but can only be updated
+ via specific GCS instructions.
+
+* The architecture provides instructions for switching between guarded
+ control stacks with checks to ensure that the new stack is a valid
+ target for switching.
+
+* The functionality of GCS is similar to that provided by the x86 Shadow
+ Stack feature, due to sharing of userspace interfaces the ABI refers to
+ shadow stacks rather than GCS.
+
+* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
+ AT_HWCAP entry.
+
+* GCS is enabled per thread. While there is support for disabling GCS
+ at runtime this should be done with great care.
+
+* GCS memory access faults are reported as normal memory access faults.
+
+* GCS specific errors (those reported with EC 0x2d) will be reported as
+ SIGSEGV with a si_code of SEGV_CPERR (control protection error).
+
+* GCS is supported only for AArch64.
+
+* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
+ regardless of the GCS configuration for the thread.
+
+* The architecture supports enabling GCS without verifying that return values
+ in LR match those in the GCS, the LR will be ignored. This is not supported
+ by Linux.
+
+
+
+2. Enabling and disabling Guarded Control Stacks
+-------------------------------------------------
+
+* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
+ prctl(), this takes a single flags argument specifying which GCS features
+ should be used.
+
+* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
+ and enables GCS for the thread, enabling the functionality controlled by
+ GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
+
+* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
+ by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
+
+* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
+ by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
+
+* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
+
+* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
+ values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
+ status of the specified GCS mode bits will be rejected.
+
+* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
+ userspace to prevent changes to any future features.
+
+* There is no support for a process to remove a lock that has been set for
+ it.
+
+* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
+ thread that called them, any other running threads will be unaffected.
+
+* New threads inherit the GCS configuration of the thread that created them.
+
+* GCS is disabled on exec().
+
+* The current GCS configuration for a thread may be read with the
+ PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
+ are passed to PR_SET_SHADOW_STACK_STATUS.
+
+* If GCS is disabled for a thread after having previously been enabled then
+ the stack will remain allocated for the lifetime of the thread. At present
+ any attempt to reenable GCS for the thread will be rejected, this may be
+ revisited in future.
+
+* It should be noted that since enabling GCS will result in GCS becoming
+ active immediately it is not normally possible to return from the function
+ that invoked the prctl() that enabled GCS. It is expected that the normal
+ usage will be that GCS is enabled very early in execution of a program.
+
+
+
+3. Allocation of Guarded Control Stacks
+----------------------------------------
+
+* When GCS is enabled for a thread a new Guarded Control Stack will be
+ allocated for it of half the standard stack size or 2 gigabytes,
+ whichever is smaller.
+
+* When a new thread is created by a thread which has GCS enabled then a
+ new Guarded Control Stack will be allocated for the new thread with
+ half the size of the standard stack.
+
+* When a stack is allocated by enabling GCS or during thread creation then
+ the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
+ be set to point to the address of this 0 value, this can be used to
+ detect the top of the stack.
+
+* Additional Guarded Control Stacks can be allocated using the
+ map_shadow_stack() system call.
+
+* Stacks allocated using map_shadow_stack() can optionally have an end of
+ stack marker and cap placed at the top of the stack. If the flag
+ SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
+ if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
+ bytes of the stack and if it is specified then the cap will be the next
+ 8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
+ valid since the marker is all bits 0 it has no observable effect.
+
+* Stacks allocated using map_shadow_stack() must have a size which is a
+ multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
+
+* An address can be specified to map_shadow_stack(), if one is provided then
+ it must be aligned to a page boundary.
+
+* When a thread is freed the Guarded Control Stack initially allocated for
+ that thread will be freed. Note carefully that if the stack has been
+ switched this may not be the stack currently in use by the thread.
+
+
+4. Signal handling
+--------------------
+
+* A new signal frame record gcs_context encodes the current GCS mode and
+ pointer for the interrupted context on signal delivery. This will always
+ be present on systems that support GCS.
+
+* The record contains a flag field which reports the current GCS configuration
+ for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
+
+* The signal handler is run with the same GCS configuration as the interrupted
+ context.
+
+* When GCS is enabled for the interrupted thread a signal handling specific
+ GCS cap token will be written to the GCS, this is an architectural GCS cap
+ with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the
+ signal frame will point to this cap token.
+
+* The signal handler will use the same GCS as the interrupted context.
+
+* When GCS is enabled on signal entry a frame with the address of the signal
+ return handler will be pushed onto the GCS, allowing return from the signal
+ handler via RET as normal. This will not be reported in the gcs_context in
+ the signal frame.
+
+
+5. Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is a gcs_context record in the signal frame then the GCS flags
+ and GCSPR_EL0 will be restored from that context prior to further
+ validation.
+
+* If there is no gcs_context record in the signal frame then the GCS
+ configuration will be unchanged.
+
+* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
+ point to a valid GCS signal cap record, this will be popped from the
+ GCS prior to signal return.
+
+* If the GCS configuration is locked when returning from a signal then any
+ attempt to change the GCS configuration will be treated as an error. This
+ is true even if GCS was not enabled prior to signal entry.
+
+* GCS may be disabled via signal return but any attempt to enable GCS via
+ signal return will be rejected.
+
+
+6. ptrace extensions
+---------------------
+
+* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
+ PTRACE_SETREGSET.
+
+* The GCS mode, including enable and disable, may be configured via ptrace.
+ If GCS is enabled via ptrace no new GCS will be allocated for the thread.
+
+* Configuration via ptrace ignores locking of GCS mode bits.
+
+
+7. ELF coredump extensions
+---------------------------
+
+* NT_ARM_GCS notes will be added to each coredump for each thread of the
+ dumped process. The contents will be equivalent to the data that would
+ have been read if a PTRACE_GETREGSET of the corresponding type were
+ executed for each thread when the coredump was generated.
+
+
+
+8. /proc extensions
+--------------------
+
+* Guarded Control Stack pages will include "ss" in their VmFlags in
+ /proc/<pid>/smaps.
diff --git a/Documentation/arch/arm64/hugetlbpage.rst b/Documentation/arch/arm64/hugetlbpage.rst
new file mode 100644
index 000000000000..a110124c11e3
--- /dev/null
+++ b/Documentation/arch/arm64/hugetlbpage.rst
@@ -0,0 +1,43 @@
+.. _hugetlbpage_index:
+
+====================
+HugeTLBpage on ARM64
+====================
+
+Hugepage relies on making efficient use of TLBs to improve performance of
+address translations. The benefit depends on both -
+
+ - the size of hugepages
+ - size of entries supported by the TLBs
+
+The ARM64 port supports two flavours of hugepages.
+
+1) Block mappings at the pud/pmd level
+--------------------------------------
+
+These are regular hugepages where a pmd or a pud page table entry points to a
+block of memory. Regardless of the supported size of entries in TLB, block
+mappings reduce the depth of page table walk needed to translate hugepage
+addresses.
+
+2) Using the Contiguous bit
+---------------------------
+
+The architecture provides a contiguous bit in the translation table entries
+(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a
+contiguous set of entries that can be cached in a single TLB entry.
+
+The contiguous bit is used in Linux to increase the mapping size at the pmd and
+pte (last) level. The number of supported contiguous entries varies by page size
+and level of the page table.
+
+
+The following hugepage sizes are supported -
+
+ ====== ======== ==== ======== ===
+ - CONT PTE PMD CONT PMD PUD
+ ====== ======== ==== ======== ===
+ 4K: 64K 2M 32M 1G
+ 16K: 2M 32M 1G
+ 64K: 2M 512M 16G
+ ====== ======== ==== ======== ===
diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst
new file mode 100644
index 000000000000..6a012c98bdcd
--- /dev/null
+++ b/Documentation/arch/arm64/index.rst
@@ -0,0 +1,42 @@
+.. _arm64_index:
+
+==================
+ARM64 Architecture
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ acpi_object_usage
+ amu
+ arm-acpi
+ arm-cca
+ asymmetric-32bit
+ booting
+ cpu-feature-registers
+ cpu-hotplug
+ elf_hwcaps
+ gcs
+ hugetlbpage
+ kdump
+ legacy_instructions
+ memory
+ memory-tagging-extension
+ mops
+ perf
+ pointer-authentication
+ ptdump
+ silicon-errata
+ sme
+ sve
+ tagged-address-abi
+ tagged-pointers
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/arm64/kasan-offsets.sh b/Documentation/arch/arm64/kasan-offsets.sh
new file mode 100644
index 000000000000..2dc5f9e18039
--- /dev/null
+++ b/Documentation/arch/arm64/kasan-offsets.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+# Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW
+# start address at the top of the linear region
+
+print_kasan_offset () {
+ printf "%02d\t" $1
+ printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \
+ - (1 << (64 - 32 - $2)) ))
+}
+
+echo KASAN_SHADOW_SCALE_SHIFT = 3
+printf "VABITS\tKASAN_SHADOW_OFFSET\n"
+print_kasan_offset 48 3
+print_kasan_offset 47 3
+print_kasan_offset 42 3
+print_kasan_offset 39 3
+print_kasan_offset 36 3
+echo
+echo KASAN_SHADOW_SCALE_SHIFT = 4
+printf "VABITS\tKASAN_SHADOW_OFFSET\n"
+print_kasan_offset 48 4
+print_kasan_offset 47 4
+print_kasan_offset 42 4
+print_kasan_offset 39 4
+print_kasan_offset 36 4
diff --git a/Documentation/arch/arm64/kdump.rst b/Documentation/arch/arm64/kdump.rst
new file mode 100644
index 000000000000..56a89f45df28
--- /dev/null
+++ b/Documentation/arch/arm64/kdump.rst
@@ -0,0 +1,92 @@
+=======================================
+crashkernel memory reservation on arm64
+=======================================
+
+Author: Baoquan He <bhe@redhat.com>
+
+Kdump mechanism is used to capture a corrupted kernel vmcore so that
+it can be subsequently analyzed. In order to do this, a preliminarily
+reserved memory is needed to pre-load the kdump kernel and boot such
+kernel if corruption happens.
+
+That reserved memory for kdump is adapted to be able to minimally
+accommodate the kdump kernel and the user space programs needed for the
+vmcore collection.
+
+Kernel parameter
+================
+
+Through the kernel parameters below, memory can be reserved accordingly
+during the early stage of the first kernel booting so that a continuous
+large chunk of memomy can be found. The low memory reservation needs to
+be considered if the crashkernel is reserved from the high memory area.
+
+- crashkernel=size@offset
+- crashkernel=size
+- crashkernel=size,high crashkernel=size,low
+
+Low memory and high memory
+==========================
+
+For kdump reservations, low memory is the memory area under a specific
+limit, usually decided by the accessible address bits of the DMA-capable
+devices needed by the kdump kernel to run. Those devices not related to
+vmcore dumping can be ignored. On arm64, the low memory upper bound is
+not fixed: it is 1G on the RPi4 platform but 4G on most other systems.
+On special kernels built with CONFIG_ZONE_(DMA|DMA32) disabled, the
+whole system RAM is low memory. Outside of the low memory described
+above, the rest of system RAM is considered high memory.
+
+Implementation
+==============
+
+1) crashkernel=size@offset
+--------------------------
+
+The crashkernel memory must be reserved at the user-specified region or
+fail if already occupied.
+
+
+2) crashkernel=size
+-------------------
+
+The crashkernel memory region will be reserved in any available position
+according to the search order:
+
+Firstly, the kernel searches the low memory area for an available region
+with the specified size.
+
+If searching for low memory fails, the kernel falls back to searching
+the high memory area for an available region of the specified size. If
+the reservation in high memory succeeds, a default size reservation in
+the low memory will be done. Currently the default size is 128M,
+sufficient for the low memory needs of the kdump kernel.
+
+Note: crashkernel=size is the recommended option for crashkernel kernel
+reservations. The user would not need to know the system memory layout
+for a specific platform.
+
+3) crashkernel=size,high crashkernel=size,low
+---------------------------------------------
+
+crashkernel=size,(high|low) are an important supplement to
+crashkernel=size. They allows the user to specify how much memory needs
+to be allocated from the high memory and low memory respectively. On
+many systems the low memory is precious and crashkernel reservations
+from this area should be kept to a minimum.
+
+To reserve memory for crashkernel=size,high, searching is first
+attempted from the high memory region. If the reservation succeeds, the
+low memory reservation will be done subsequently.
+
+If reservation from the high memory failed, the kernel falls back to
+searching the low memory with the specified size in crashkernel=,high.
+If it succeeds, no further reservation for low memory is needed.
+
+Notes:
+
+- If crashkernel=,low is not specified, the default low memory
+ reservation will be done automatically.
+
+- if crashkernel=0,low is specified, it means that the low memory
+ reservation is omitted intentionally.
diff --git a/Documentation/arch/arm64/legacy_instructions.rst b/Documentation/arch/arm64/legacy_instructions.rst
new file mode 100644
index 000000000000..54401b22cb8f
--- /dev/null
+++ b/Documentation/arch/arm64/legacy_instructions.rst
@@ -0,0 +1,68 @@
+===================
+Legacy instructions
+===================
+
+The arm64 port of the Linux kernel provides infrastructure to support
+emulation of instructions which have been deprecated, or obsoleted in
+the architecture. The infrastructure code uses undefined instruction
+hooks to support emulation. Where available it also allows turning on
+the instruction execution in hardware.
+
+The emulation mode can be controlled by writing to sysctl nodes
+(/proc/sys/abi). The following explains the different execution
+behaviours and the corresponding values of the sysctl nodes -
+
+* Undef
+ Value: 0
+
+ Generates undefined instruction abort. Default for instructions that
+ have been obsoleted in the architecture, e.g., SWP
+
+* Emulate
+ Value: 1
+
+ Uses software emulation. To aid migration of software, in this mode
+ usage of emulated instruction is traced as well as rate limited
+ warnings are issued. This is the default for deprecated
+ instructions, .e.g., CP15 barriers
+
+* Hardware Execution
+ Value: 2
+
+ Although marked as deprecated, some implementations may support the
+ enabling/disabling of hardware support for the execution of these
+ instructions. Using hardware execution generally provides better
+ performance, but at the loss of ability to gather runtime statistics
+ about the use of the deprecated instructions.
+
+The default mode depends on the status of the instruction in the
+architecture. Deprecated instructions should default to emulation
+while obsolete instructions must be undefined by default.
+
+Note: Instruction emulation may not be possible in all cases. See
+individual instruction notes for further information.
+
+Supported legacy instructions
+-----------------------------
+* SWP{B}
+
+:Node: /proc/sys/abi/swp
+:Status: Obsolete
+:Default: Undef (0)
+
+* CP15 Barriers
+
+:Node: /proc/sys/abi/cp15_barrier
+:Status: Deprecated
+:Default: Emulate (1)
+
+* SETEND
+
+:Node: /proc/sys/abi/setend
+:Status: Deprecated
+:Default: Emulate (1)*
+
+ Note: All the cpus on the system must have mixed endian support at EL0
+ for this feature to be enabled. If a new CPU - which doesn't support mixed
+ endian - is hotplugged in after this feature has been enabled, there could
+ be unexpected results in the application.
diff --git a/Documentation/arch/arm64/memory-tagging-extension.rst b/Documentation/arch/arm64/memory-tagging-extension.rst
new file mode 100644
index 000000000000..679725030731
--- /dev/null
+++ b/Documentation/arch/arm64/memory-tagging-extension.rst
@@ -0,0 +1,375 @@
+===============================================
+Memory Tagging Extension (MTE) in AArch64 Linux
+===============================================
+
+Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
+ Catalin Marinas <catalin.marinas@arm.com>
+
+Date: 2020-02-25
+
+This document describes the provision of the Memory Tagging Extension
+functionality in AArch64 Linux.
+
+Introduction
+============
+
+ARMv8.5 based processors introduce the Memory Tagging Extension (MTE)
+feature. MTE is built on top of the ARMv8.0 virtual address tagging TBI
+(Top Byte Ignore) feature and allows software to access a 4-bit
+allocation tag for each 16-byte granule in the physical address space.
+Such memory range must be mapped with the Normal-Tagged memory
+attribute. A logical tag is derived from bits 59-56 of the virtual
+address used for the memory access. A CPU with MTE enabled will compare
+the logical tag against the allocation tag and potentially raise an
+exception on mismatch, subject to system registers configuration.
+
+Userspace Support
+=================
+
+When ``CONFIG_ARM64_MTE`` is selected and Memory Tagging Extension is
+supported by the hardware, the kernel advertises the feature to
+userspace via ``HWCAP2_MTE``.
+
+PROT_MTE
+--------
+
+To access the allocation tags, a user process must enable the Tagged
+memory attribute on an address range using a new ``prot`` flag for
+``mmap()`` and ``mprotect()``:
+
+``PROT_MTE`` - Pages allow access to the MTE allocation tags.
+
+The allocation tag is set to 0 when such pages are first mapped in the
+user address space and preserved on copy-on-write. ``MAP_SHARED`` is
+supported and the allocation tags can be shared between processes.
+
+**Note**: ``PROT_MTE`` is only supported on ``MAP_ANONYMOUS`` and
+RAM-based file mappings (``tmpfs``, ``memfd``). Passing it to other
+types of mapping will result in ``-EINVAL`` returned by these system
+calls.
+
+**Note**: The ``PROT_MTE`` flag (and corresponding memory type) cannot
+be cleared by ``mprotect()``.
+
+**Note**: ``madvise()`` memory ranges with ``MADV_DONTNEED`` and
+``MADV_FREE`` may have the allocation tags cleared (set to 0) at any
+point after the system call.
+
+Tag Check Faults
+----------------
+
+When ``PROT_MTE`` is enabled on an address range and a mismatch between
+the logical and allocation tags occurs on access, there are three
+configurable behaviours:
+
+- *Ignore* - This is the default mode. The CPU (and kernel) ignores the
+ tag check fault.
+
+- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with
+ ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The
+ memory access is not performed. If ``SIGSEGV`` is ignored or blocked
+ by the offending thread, the containing process is terminated with a
+ ``coredump``.
+
+- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending
+ thread, asynchronously following one or multiple tag check faults,
+ with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
+ address is unknown).
+
+- *Asymmetric* - Reads are handled as for synchronous mode while writes
+ are handled as for asynchronous mode.
+
+The user can select the above modes, per thread, using the
+``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
+contains any number of the following values in the ``PR_MTE_TCF_MASK``
+bit-field:
+
+- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
+ (ignored if combined with other options)
+- ``PR_MTE_TCF_SYNC`` - *Synchronous* tag check fault mode
+- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
+
+If no modes are specified, tag check faults are ignored. If a single
+mode is specified, the program will run in that mode. If multiple
+modes are specified, the mode is selected as described in the "Per-CPU
+preferred tag checking modes" section below.
+
+The current tag check fault configuration can be read using the
+``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
+multiple modes were requested then all will be reported.
+
+Tag checking can also be disabled for a user thread by setting the
+``PSTATE.TCO`` bit with ``MSR TCO, #1``.
+
+**Note**: Signal handlers are always invoked with ``PSTATE.TCO = 0``,
+irrespective of the interrupted context. ``PSTATE.TCO`` is restored on
+``sigreturn()``.
+
+**Note**: There are no *match-all* logical tags available for user
+applications.
+
+**Note**: Kernel accesses to the user address space (e.g. ``read()``
+system call) are not checked if the user thread tag checking mode is
+``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is
+``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user
+address accesses, however it cannot always guarantee it. Kernel accesses
+to user addresses are always performed with an effective ``PSTATE.TCO``
+value of zero, regardless of the user configuration.
+
+Excluding Tags in the ``IRG``, ``ADDG`` and ``SUBG`` instructions
+-----------------------------------------------------------------
+
+The architecture allows excluding certain tags to be randomly generated
+via the ``GCR_EL1.Exclude`` register bit-field. By default, Linux
+excludes all tags other than 0. A user thread can enable specific tags
+in the randomly generated set using the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
+flags, 0, 0, 0)`` system call where ``flags`` contains the tags bitmap
+in the ``PR_MTE_TAG_MASK`` bit-field.
+
+**Note**: The hardware uses an exclude mask but the ``prctl()``
+interface provides an include mask. An include mask of ``0`` (exclusion
+mask ``0xffff``) results in the CPU always generating tag ``0``.
+
+Per-CPU preferred tag checking mode
+-----------------------------------
+
+On some CPUs the performance of MTE in stricter tag checking modes
+is similar to that of less strict tag checking modes. This makes it
+worthwhile to enable stricter checks on those CPUs when a less strict
+checking mode is requested, in order to gain the error detection
+benefits of the stricter checks without the performance downsides. To
+support this scenario, a privileged user may configure a stricter
+tag checking mode as the CPU's preferred tag checking mode.
+
+The preferred tag checking mode for each CPU is controlled by
+``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
+privileged user may write the value ``async``, ``sync`` or ``asymm``. The
+default preferred mode for each CPU is ``async``.
+
+To allow a program to potentially run in the CPU's preferred tag
+checking mode, the user program may set multiple tag check fault mode
+bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
+flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
+modes are requested then asymmetric mode may also be selected by the
+kernel. If the CPU's preferred tag checking mode is in the task's set
+of provided tag checking modes, that mode will be selected. Otherwise,
+one of the modes in the task's mode will be selected by the kernel
+from the task's mode set using the preference order:
+
+ 1. Asynchronous
+ 2. Asymmetric
+ 3. Synchronous
+
+Note that there is no way for userspace to request multiple modes and
+also disable asymmetric mode.
+
+Initial process state
+---------------------
+
+On ``execve()``, the new process has the following configuration:
+
+- ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled)
+- No tag checking modes are selected (tag check faults ignored)
+- ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded)
+- ``PSTATE.TCO`` set to 0
+- ``PROT_MTE`` not set on any of the initial memory maps
+
+On ``fork()``, the new process inherits the parent's configuration and
+memory map attributes with the exception of the ``madvise()`` ranges
+with ``MADV_WIPEONFORK`` which will have the data and tags cleared (set
+to 0).
+
+The ``ptrace()`` interface
+--------------------------
+
+``PTRACE_PEEKMTETAGS`` and ``PTRACE_POKEMTETAGS`` allow a tracer to read
+the tags from or set the tags to a tracee's address space. The
+``ptrace()`` system call is invoked as ``ptrace(request, pid, addr,
+data)`` where:
+
+- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_POKEMTETAGS``.
+- ``pid`` - the tracee's PID.
+- ``addr`` - address in the tracee's address space.
+- ``data`` - pointer to a ``struct iovec`` where ``iov_base`` points to
+ a buffer of ``iov_len`` length in the tracer's address space.
+
+The tags in the tracer's ``iov_base`` buffer are represented as one
+4-bit tag per byte and correspond to a 16-byte MTE tag granule in the
+tracee's address space.
+
+**Note**: If ``addr`` is not aligned to a 16-byte granule, the kernel
+will use the corresponding aligned address.
+
+``ptrace()`` return value:
+
+- 0 - tags were copied, the tracer's ``iov_len`` was updated to the
+ number of tags transferred. This may be smaller than the requested
+ ``iov_len`` if the requested address range in the tracee's or the
+ tracer's space cannot be accessed or does not have valid tags.
+- ``-EPERM`` - the specified process cannot be traced.
+- ``-EIO`` - the tracee's address range cannot be accessed (e.g. invalid
+ address) and no tags copied. ``iov_len`` not updated.
+- ``-EFAULT`` - fault on accessing the tracer's memory (``struct iovec``
+ or ``iov_base`` buffer) and no tags copied. ``iov_len`` not updated.
+- ``-EOPNOTSUPP`` - the tracee's address does not have valid tags (never
+ mapped with the ``PROT_MTE`` flag). ``iov_len`` not updated.
+
+**Note**: There are no transient errors for the requests above, so user
+programs should not retry in case of a non-zero system call return.
+
+``PTRACE_GETREGSET`` and ``PTRACE_SETREGSET`` with ``addr ==
+``NT_ARM_TAGGED_ADDR_CTRL`` allow ``ptrace()`` access to the tagged
+address ABI control and MTE configuration of a process as per the
+``prctl()`` options described in
+Documentation/arch/arm64/tagged-address-abi.rst and above. The corresponding
+``regset`` is 1 element of 8 bytes (``sizeof(long))``).
+
+Core dump support
+-----------------
+
+The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
+in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The
+program header for such segment is defined as:
+
+:``p_type``: ``PT_AARCH64_MEMTAG_MTE``
+:``p_flags``: 0
+:``p_offset``: segment file offset
+:``p_vaddr``: segment virtual address, same as the corresponding
+ ``PT_LOAD`` segment
+:``p_paddr``: 0
+:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
+ (two 4-bit tags cover 32 bytes of memory)
+:``p_memsz``: segment size in memory, same as the corresponding
+ ``PT_LOAD`` segment
+:``p_align``: 0
+
+The tags are stored in the core file at ``p_offset`` as two 4-bit tags
+in a byte. With the tag granule of 16 bytes, a 4K page requires 128
+bytes in the core file.
+
+Example of correct usage
+========================
+
+*MTE Example code*
+
+.. code-block:: c
+
+ /*
+ * To be compiled with -march=armv8.5-a+memtag
+ */
+ #include <errno.h>
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/auxv.h>
+ #include <sys/mman.h>
+ #include <sys/prctl.h>
+
+ /*
+ * From arch/arm64/include/uapi/asm/hwcap.h
+ */
+ #define HWCAP2_MTE (1 << 18)
+
+ /*
+ * From arch/arm64/include/uapi/asm/mman.h
+ */
+ #define PROT_MTE 0x20
+
+ /*
+ * From include/uapi/linux/prctl.h
+ */
+ #define PR_SET_TAGGED_ADDR_CTRL 55
+ #define PR_GET_TAGGED_ADDR_CTRL 56
+ # define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+ # define PR_MTE_TCF_SHIFT 1
+ # define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+ # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+ # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+ # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
+ # define PR_MTE_TAG_SHIFT 3
+ # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
+
+ /*
+ * Insert a random logical tag into the given pointer.
+ */
+ #define insert_random_tag(ptr) ({ \
+ uint64_t __val; \
+ asm("irg %0, %1" : "=r" (__val) : "r" (ptr)); \
+ __val; \
+ })
+
+ /*
+ * Set the allocation tag on the destination address.
+ */
+ #define set_tag(tagged_addr) do { \
+ asm volatile("stg %0, [%0]" : : "r" (tagged_addr) : "memory"); \
+ } while (0)
+
+ int main()
+ {
+ unsigned char *a;
+ unsigned long page_sz = sysconf(_SC_PAGESIZE);
+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
+
+ /* check if MTE is present */
+ if (!(hwcap2 & HWCAP2_MTE))
+ return EXIT_FAILURE;
+
+ /*
+ * Enable the tagged address ABI, synchronous or asynchronous MTE
+ * tag check faults (based on per-CPU preference) and allow all
+ * non-zero tags in the randomly generated set.
+ */
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL,
+ PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC |
+ (0xfffe << PR_MTE_TAG_SHIFT),
+ 0, 0, 0)) {
+ perror("prctl() failed");
+ return EXIT_FAILURE;
+ }
+
+ a = mmap(0, page_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (a == MAP_FAILED) {
+ perror("mmap() failed");
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * Enable MTE on the above anonymous mmap. The flag could be passed
+ * directly to mmap() and skip this step.
+ */
+ if (mprotect(a, page_sz, PROT_READ | PROT_WRITE | PROT_MTE)) {
+ perror("mprotect() failed");
+ return EXIT_FAILURE;
+ }
+
+ /* access with the default tag (0) */
+ a[0] = 1;
+ a[1] = 2;
+
+ printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]);
+
+ /* set the logical and allocation tags */
+ a = (unsigned char *)insert_random_tag(a);
+ set_tag(a);
+
+ printf("%p\n", a);
+
+ /* non-zero tag access */
+ a[0] = 3;
+ printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]);
+
+ /*
+ * If MTE is enabled correctly the next instruction will generate an
+ * exception.
+ */
+ printf("Expecting SIGSEGV...\n");
+ a[16] = 0xdd;
+
+ /* this should not be printed in the PR_MTE_TCF_SYNC mode */
+ printf("...haven't got one\n");
+
+ return EXIT_FAILURE;
+ }
diff --git a/Documentation/arch/arm64/memory.rst b/Documentation/arch/arm64/memory.rst
new file mode 100644
index 000000000000..678fbb418c3a
--- /dev/null
+++ b/Documentation/arch/arm64/memory.rst
@@ -0,0 +1,100 @@
+==============================
+Memory Layout on AArch64 Linux
+==============================
+
+Author: Catalin Marinas <catalin.marinas@arm.com>
+
+This document describes the virtual memory layout used by the AArch64
+Linux kernel. The architecture allows up to 4 levels of translation
+tables with a 4KB page size and up to 3 levels with a 64KB page size.
+
+AArch64 Linux uses either 3 levels or 4 levels of translation tables
+with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit
+(256TB) virtual addresses, respectively, for both user and kernel. With
+64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB)
+virtual address, are used but the memory layout is the same.
+
+ARMv8.2 adds optional support for Large Virtual Address space. This is
+only available when running with a 64KB page size and expands the
+number of descriptors in the first level of translation.
+
+TTBRx selection is given by bit 55 of the virtual address. The
+swapper_pg_dir contains only kernel (global) mappings while the user pgd
+contains only user (non-global) mappings. The swapper_pg_dir address is
+written to TTBR1 and never written to TTBR0.
+
+When using KVM without the Virtualization Host Extensions, the
+hypervisor maps kernel pages in EL2 at a fixed (and potentially
+random) offset from the linear mapping. See the kern_hyp_va macro and
+kvm_update_va_mask function for more details. MMIO devices such as
+GICv2 gets mapped next to the HYP idmap page, as do vectors when
+ARM64_SPECTRE_V3A is enabled for particular CPUs.
+
+When using KVM with the Virtualization Host Extensions, no additional
+mappings are created, since the host kernel runs directly in EL2.
+
+52-bit VA support in the kernel
+-------------------------------
+If the ARMv8.2-LVA optional feature is present, and we are running
+with a 64KB page size; then it is possible to use 52-bits of address
+space for both userspace and kernel addresses. However, any kernel
+binary that supports 52-bit must also be able to fall back to 48-bit
+at early boot time if the hardware feature is not present.
+
+This fallback mechanism necessitates the kernel .text to be in the
+higher addresses such that they are invariant to 48/52-bit VAs. Due
+to the kasan shadow being a fraction of the entire kernel VA space,
+the end of the kasan shadow must also be in the higher half of the
+kernel VA space for both 48/52-bit. (Switching from 48-bit to 52-bit,
+the end of the kasan shadow is invariant and dependent on ~0UL,
+whilst the start address will "grow" towards the lower addresses).
+
+In order to optimise phys_to_virt and virt_to_phys, the PAGE_OFFSET
+is kept constant at 0xFFF0000000000000 (corresponding to 52-bit),
+this obviates the need for an extra variable read. The physvirt
+offset and vmemmap offsets are computed at early boot to enable
+this logic.
+
+As a single binary will need to support both 48-bit and 52-bit VA
+spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
+also must be sized large enough to accommodate a fixed PAGE_OFFSET.
+
+Most code in the kernel should not need to consider the VA_BITS, for
+code that does need to know the VA size the variables are
+defined as follows:
+
+VA_BITS constant the *maximum* VA space size
+
+VA_BITS_MIN constant the *minimum* VA space size
+
+vabits_actual variable the *actual* VA space size
+
+
+Maximum and minimum sizes can be useful to ensure that buffers are
+sized large enough or that addresses are positioned close enough for
+the "worst" case.
+
+52-bit userspace VAs
+--------------------
+To maintain compatibility with software that relies on the ARMv8.0
+VA space maximum size of 48-bits, the kernel will, by default,
+return virtual addresses to userspace from a 48-bit range.
+
+Software can "opt-in" to receiving VAs from a 52-bit space by
+specifying an mmap hint parameter that is larger than 48-bit.
+
+For example:
+
+.. code-block:: c
+
+ maybe_high_address = mmap(~0UL, size, prot, flags,...);
+
+It is also possible to build a debug kernel that returns addresses
+from a 52-bit space by enabling the following kernel config options:
+
+.. code-block:: sh
+
+ CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y
+
+Note that this option is only intended for debugging applications
+and should not be used in production.
diff --git a/Documentation/arch/arm64/mops.rst b/Documentation/arch/arm64/mops.rst
new file mode 100644
index 000000000000..2ef5b147f8dc
--- /dev/null
+++ b/Documentation/arch/arm64/mops.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Memory copy/set instructions (MOPS)
+===================================
+
+A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
+instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
+
+A main or epilogue instruction can take a MOPS exception for various reasons,
+for example when a task is migrated to a CPU with a different MOPS
+implementation, or when the instruction's alignment and size requirements are
+not met. The software exception handler is then expected to reset the registers
+and restart execution from the prologue instruction. Normally this is handled
+by the kernel.
+
+For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
+the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
+
+.. _arm64_mops_hyp:
+
+Hypervisor requirements
+-----------------------
+
+A hypervisor running a Linux guest must handle all MOPS exceptions from the
+guest kernel, as Linux may not be able to handle the exception at all times.
+For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
+to another physical CPU with a different MOPS implementation.
+
+To do this, the hypervisor must:
+
+ - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
+
+ - Have an exception handler that implements the algorithm from the Arm ARM
+ rules CNTMJ and MWFQH.
+
+ - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
+ potential step of the current instruction.
+
+ Note: Clearing PSTATE.SS is needed so that a single step exception is taken
+ on the next instruction (the prologue instruction). Otherwise prologue
+ would get silently stepped over and the single step exception taken on the
+ main instruction. Note that if the guest instruction is not being stepped
+ then clearing PSTATE.SS has no effect.
diff --git a/Documentation/arch/arm64/perf.rst b/Documentation/arch/arm64/perf.rst
new file mode 100644
index 000000000000..997fd716b82f
--- /dev/null
+++ b/Documentation/arch/arm64/perf.rst
@@ -0,0 +1,238 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _perf_index:
+
+====
+Perf
+====
+
+Perf Event Attributes
+=====================
+
+:Author: Andrew Murray <andrew.murray@arm.com>
+:Date: 2019-03-06
+
+exclude_user
+------------
+
+This attribute excludes userspace.
+
+Userspace always runs at EL0 and thus this attribute will exclude EL0.
+
+
+exclude_kernel
+--------------
+
+This attribute excludes the kernel.
+
+The kernel runs at EL2 with VHE and EL1 without. Guest kernels always run
+at EL1.
+
+For the host this attribute will exclude EL1 and additionally EL2 on a VHE
+system.
+
+For the guest this attribute will exclude EL1. Please note that EL2 is
+never counted within a guest.
+
+
+exclude_hv
+----------
+
+This attribute excludes the hypervisor.
+
+For a VHE host this attribute is ignored as we consider the host kernel to
+be the hypervisor.
+
+For a non-VHE host this attribute will exclude EL2 as we consider the
+hypervisor to be any code that runs at EL2 which is predominantly used for
+guest/host transitions.
+
+For the guest this attribute has no effect. Please note that EL2 is
+never counted within a guest.
+
+
+exclude_host / exclude_guest
+----------------------------
+
+These attributes exclude the KVM host and guest, respectively.
+
+The KVM host may run at EL0 (userspace), EL1 (non-VHE kernel) and EL2 (VHE
+kernel or non-VHE hypervisor).
+
+The KVM guest may run at EL0 (userspace) and EL1 (kernel).
+
+Due to the overlapping exception levels between host and guests we cannot
+exclusively rely on the PMU's hardware exception filtering - therefore we
+must enable/disable counting on the entry and exit to the guest. This is
+performed differently on VHE and non-VHE systems.
+
+For non-VHE systems we exclude EL2 for exclude_host - upon entering and
+exiting the guest we disable/enable the event as appropriate based on the
+exclude_host and exclude_guest attributes.
+
+For VHE systems we exclude EL1 for exclude_guest and exclude both EL0,EL2
+for exclude_host. Upon entering and exiting the guest we modify the event
+to include/exclude EL0 as appropriate based on the exclude_host and
+exclude_guest attributes.
+
+The statements above also apply when these attributes are used within a
+non-VHE guest however please note that EL2 is never counted within a guest.
+
+
+Accuracy
+--------
+
+On non-VHE hosts we enable/disable counters on the entry/exit of host/guest
+transition at EL2 - however there is a period of time between
+enabling/disabling the counters and entering/exiting the guest. We are
+able to eliminate counters counting host events on the boundaries of guest
+entry/exit when counting guest events by filtering out EL2 for
+exclude_host. However when using !exclude_hv there is a small blackout
+window at the guest entry/exit where host events are not captured.
+
+On VHE systems there are no blackout windows.
+
+Perf Userspace PMU Hardware Counter Access
+==========================================
+
+Overview
+--------
+The perf userspace tool relies on the PMU to monitor events. It offers an
+abstraction layer over the hardware counters since the underlying
+implementation is cpu-dependent.
+Arm64 allows userspace tools to have access to the registers storing the
+hardware counters' values directly.
+
+This targets specifically self-monitoring tasks in order to reduce the overhead
+by directly accessing the registers without having to go through the kernel.
+
+How-to
+------
+The focus is set on the armv8 PMUv3 which makes sure that the access to the pmu
+registers is enabled and that the userspace has access to the relevant
+information in order to use them.
+
+In order to have access to the hardware counters, the global sysctl
+kernel/perf_user_access must first be enabled:
+
+.. code-block:: sh
+
+ echo 1 > /proc/sys/kernel/perf_user_access
+
+It is necessary to open the event using the perf tool interface with config1:1
+attr bit set: the sys_perf_event_open syscall returns a fd which can
+subsequently be used with the mmap syscall in order to retrieve a page of memory
+containing information about the event. The PMU driver uses this page to expose
+to the user the hardware counter's index and other necessary data. Using this
+index enables the user to access the PMU registers using the `mrs` instruction.
+Access to the PMU registers is only valid while the sequence lock is unchanged.
+In particular, the PMSELR_EL0 register is zeroed each time the sequence lock is
+changed.
+
+The userspace access is supported in libperf using the perf_evsel__mmap()
+and perf_evsel__read() functions. See `tools/lib/perf/tests/test-evsel.c`_ for
+an example.
+
+About heterogeneous systems
+---------------------------
+On heterogeneous systems such as big.LITTLE, userspace PMU counter access can
+only be enabled when the tasks are pinned to a homogeneous subset of cores and
+the corresponding PMU instance is opened by specifying the 'type' attribute.
+The use of generic event types is not supported in this case.
+
+Have a look at `tools/perf/arch/arm64/tests/user-events.c`_ for an example. It
+can be run using the perf tool to check that the access to the registers works
+correctly from userspace:
+
+.. code-block:: sh
+
+ perf test -v user
+
+About chained events and counter sizes
+--------------------------------------
+The user can request either a 32-bit (config1:0 == 0) or 64-bit (config1:0 == 1)
+counter along with userspace access. The sys_perf_event_open syscall will fail
+if a 64-bit counter is requested and the hardware doesn't support 64-bit
+counters. Chained events are not supported in conjunction with userspace counter
+access. If a 32-bit counter is requested on hardware with 64-bit counters, then
+userspace must treat the upper 32-bits read from the counter as UNKNOWN. The
+'pmc_width' field in the user page will indicate the valid width of the counter
+and should be used to mask the upper bits as needed.
+
+.. Links
+.. _tools/perf/arch/arm64/tests/user-events.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/arch/arm64/tests/user-events.c
+.. _tools/lib/perf/tests/test-evsel.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/perf/tests/test-evsel.c
+
+Event Counting Threshold
+==========================================
+
+Overview
+--------
+
+FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
+events whose count meets a specified threshold condition. For example if
+threshold_compare is set to 2 ('Greater than or equal'), and the
+threshold is set to 2, then the PMU counter will now only increment by
+when an event would have previously incremented the PMU counter by 2 or
+more on a single processor cycle.
+
+To increment by 1 after passing the threshold condition instead of the
+number of events on that cycle, add the 'threshold_count' option to the
+commandline.
+
+How-to
+------
+
+These are the parameters for controlling the feature:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Parameter
+ - Description
+ * - threshold
+ - Value to threshold the event by. A value of 0 means that
+ thresholding is disabled and the other parameters have no effect.
+ * - threshold_compare
+ - | Comparison function to use, with the following values supported:
+ |
+ | 0: Not-equal
+ | 1: Equals
+ | 2: Greater-than-or-equal
+ | 3: Less-than
+ * - threshold_count
+ - If this is set, count by 1 after passing the threshold condition
+ instead of the value of the event on this cycle.
+
+The threshold, threshold_compare and threshold_count values can be
+provided per event, for example:
+
+.. code-block:: sh
+
+ perf stat -e stall_slot/threshold=2,threshold_compare=2/ \
+ -e dtlb_walk/threshold=10,threshold_compare=3,threshold_count/
+
+In this example the stall_slot event will count by 2 or more on every
+cycle where 2 or more stalls happen. And dtlb_walk will count by 1 on
+every cycle where the number of dtlb walks were less than 10.
+
+The maximum supported threshold value can be read from the caps of each
+PMU, for example:
+
+.. code-block:: sh
+
+ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
+
+ 0x000000ff
+
+If a value higher than this is given, then opening the event will result
+in an error. The highest possible maximum is 4095, as the config field
+for threshold is limited to 12 bits, and the Perf tool will refuse to
+parse higher values.
+
+If the PMU doesn't support FEAT_PMUv3_TH, then threshold_max will read
+0, and attempting to set a threshold value will also result in an error.
+threshold_max will also read as 0 on aarch32 guests, even if the host
+is running on hardware with the feature.
diff --git a/Documentation/arch/arm64/pointer-authentication.rst b/Documentation/arch/arm64/pointer-authentication.rst
new file mode 100644
index 000000000000..e5dad2e40aa8
--- /dev/null
+++ b/Documentation/arch/arm64/pointer-authentication.rst
@@ -0,0 +1,142 @@
+=======================================
+Pointer authentication in AArch64 Linux
+=======================================
+
+Author: Mark Rutland <mark.rutland@arm.com>
+
+Date: 2017-07-19
+
+This document briefly describes the provision of pointer authentication
+functionality in AArch64 Linux.
+
+
+Architecture overview
+---------------------
+
+The ARMv8.3 Pointer Authentication extension adds primitives that can be
+used to mitigate certain classes of attack where an attacker can corrupt
+the contents of some memory (e.g. the stack).
+
+The extension uses a Pointer Authentication Code (PAC) to determine
+whether pointers have been modified unexpectedly. A PAC is derived from
+a pointer, another value (such as the stack pointer), and a secret key
+held in system registers.
+
+The extension adds instructions to insert a valid PAC into a pointer,
+and to verify/remove the PAC from a pointer. The PAC occupies a number
+of high-order bits of the pointer, which varies dependent on the
+configured virtual address size and whether pointer tagging is in use.
+
+A subset of these instructions have been allocated from the HINT
+encoding space. In the absence of the extension (or when disabled),
+these instructions behave as NOPs. Applications and libraries using
+these instructions operate correctly regardless of the presence of the
+extension.
+
+The extension provides five separate keys to generate PACs - two for
+instruction addresses (APIAKey, APIBKey), two for data addresses
+(APDAKey, APDBKey), and one for generic authentication (APGAKey).
+
+
+Basic support
+-------------
+
+When CONFIG_ARM64_PTR_AUTH is selected, and relevant HW support is
+present, the kernel will assign random key values to each process at
+exec*() time. The keys are shared by all threads within the process, and
+are preserved across fork().
+
+Presence of address authentication functionality is advertised via
+HWCAP_PACA, and generic authentication functionality via HWCAP_PACG.
+
+The number of bits that the PAC occupies in a pointer is 55 minus the
+virtual address size configured by the kernel. For example, with a
+virtual address size of 48, the PAC is 7 bits wide.
+
+When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled
+with HINT space pointer authentication instructions protecting
+function returns. Kernels built with this option will work on hardware
+with or without pointer authentication support.
+
+In addition to exec(), keys can also be reinitialized to random values
+using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,
+PR_PAC_APIBKEY, PR_PAC_APDAKEY, PR_PAC_APDBKEY and PR_PAC_APGAKEY
+specifies which keys are to be reinitialized; specifying 0 means "all
+keys".
+
+
+Debugging
+---------
+
+When CONFIG_ARM64_PTR_AUTH is selected, and HW support for address
+authentication is present, the kernel will expose the position of TTBR0
+PAC bits in the NT_ARM_PAC_MASK regset (struct user_pac_mask), which
+userspace can acquire via PTRACE_GETREGSET.
+
+The regset is exposed only when HWCAP_PACA is set. Separate masks are
+exposed for data pointers and instruction pointers, as the set of PAC
+bits can vary between the two. Note that the masks apply to TTBR0
+addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel
+pointers).
+
+Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel
+will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct
+user_pac_address_keys and struct user_pac_generic_keys). These can be
+used to get and set the keys for a thread.
+
+
+Virtualization
+--------------
+
+Pointer authentication is enabled in KVM guest when each virtual cpu is
+initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
+requesting these two separate cpu features to be enabled. The current KVM
+guest implementation works by enabling both features together, so both
+these userspace flags are checked before enabling pointer authentication.
+The separate userspace flag will allow to have no userspace ABI changes
+if support is added in the future to allow these two features to be
+enabled independently of one another.
+
+As Arm Architecture specifies that Pointer Authentication feature is
+implemented along with the VHE feature so KVM arm64 ptrauth code relies
+on VHE mode to be present.
+
+Additionally, when these vcpu feature flags are not set then KVM will
+filter out the Pointer Authentication system key registers from
+KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
+register. Any attempt to use the Pointer Authentication instructions will
+result in an UNDEFINED exception being injected into the guest.
+
+
+Enabling and disabling keys
+---------------------------
+
+The prctl PR_PAC_SET_ENABLED_KEYS allows the user program to control which
+PAC keys are enabled in a particular task. It takes two arguments, the
+first being a bitmask of PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY
+and PR_PAC_APDBKEY specifying which keys shall be affected by this prctl,
+and the second being a bitmask of the same bits specifying whether the key
+should be enabled or disabled. For example::
+
+ prctl(PR_PAC_SET_ENABLED_KEYS,
+ PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY,
+ PR_PAC_APIBKEY, 0, 0);
+
+disables all keys except the IB key.
+
+The main reason why this is useful is to enable a userspace ABI that uses PAC
+instructions to sign and authenticate function pointers and other pointers
+exposed outside of the function, while still allowing binaries conforming to
+the ABI to interoperate with legacy binaries that do not sign or authenticate
+pointers.
+
+The idea is that a dynamic loader or early startup code would issue this
+prctl very early after establishing that a process may load legacy binaries,
+but before executing any PAC instructions.
+
+For compatibility with previous kernel versions, processes start up with IA,
+IB, DA and DB enabled, and are reset to this state on exec(). Processes created
+via fork() and clone() inherit the key enabled state from the calling process.
+
+It is recommended to avoid disabling the IA key, as this has higher performance
+overhead than disabling any of the other keys.
diff --git a/Documentation/arch/arm64/ptdump.rst b/Documentation/arch/arm64/ptdump.rst
new file mode 100644
index 000000000000..51eb902ba41a
--- /dev/null
+++ b/Documentation/arch/arm64/ptdump.rst
@@ -0,0 +1,94 @@
+======================
+Kernel page table dump
+======================
+
+ptdump is a debugfs interface that provides a detailed dump of the
+kernel page tables. It offers a comprehensive overview of the kernel
+virtual memory layout as well as the attributes associated with the
+various regions in a human-readable format. It is useful to dump the
+kernel page tables to verify permissions and memory types. Examining the
+page table entries and permissions helps identify potential security
+vulnerabilities such as mappings with overly permissive access rights or
+improper memory protections.
+
+Memory hotplug allows dynamic expansion or contraction of available
+memory without requiring a system reboot. To maintain the consistency
+and integrity of the memory management data structures, arm64 makes use
+of the ``mem_hotplug_lock`` semaphore in write mode. Additionally, in
+read mode, ``mem_hotplug_lock`` supports an efficient implementation of
+``get_online_mems()`` and ``put_online_mems()``. These protect the
+offlining of memory being accessed by the ptdump code.
+
+In order to dump the kernel page tables, enable the following
+configurations and mount debugfs::
+
+ CONFIG_PTDUMP_DEBUGFS=y
+
+ mount -t debugfs nodev /sys/kernel/debug
+ cat /sys/kernel/debug/kernel_page_tables
+
+On analysing the output of ``cat /sys/kernel/debug/kernel_page_tables``
+one can derive information about the virtual address range of the entry,
+followed by size of the memory region covered by this entry, the
+hierarchical structure of the page tables and finally the attributes
+associated with each page. The page attributes provide information about
+access permissions, execution capability, type of mapping such as leaf
+level PTE or block level PGD, PMD and PUD, and access status of a page
+within the kernel memory. Assessing these attributes can assist in
+understanding the memory layout, access patterns and security
+characteristics of the kernel pages.
+
+Kernel virtual memory layout example::
+
+ start address end address size attributes
+ +---------------------------------------------------------------------------------------+
+ | ---[ Linear Mapping start ]---------------------------------------------------------- |
+ | .................. |
+ | 0xfff0000000000000-0xfff0000000210000 2112K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED |
+ | 0xfff0000000210000-0xfff0000001c00000 26560K PTE ro NX SHD AF UXN MEM/NORMAL |
+ | .................. |
+ | ---[ Linear Mapping end ]------------------------------------------------------------ |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Modules start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xffff800000000000-0xffff800008000000 128M PTE |
+ | .................. |
+ | ---[ Modules end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmalloc() area ]---------------------------------------------------------------- |
+ | .................. |
+ | 0xffff800008010000-0xffff800008200000 1984K PTE ro x SHD AF UXN MEM/NORMAL |
+ | 0xffff800008200000-0xffff800008e00000 12M PTE ro x SHD AF CON UXN MEM/NORMAL |
+ | .................. |
+ | ---[ vmalloc() end ]----------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Fixmap start ]------------------------------------------------------------------ |
+ | .................. |
+ | 0xfffffbfffdb80000-0xfffffbfffdb90000 64K PTE ro x SHD AF UXN MEM/NORMAL |
+ | 0xfffffbfffdb90000-0xfffffbfffdba0000 64K PTE ro NX SHD AF UXN MEM/NORMAL |
+ | .................. |
+ | ---[ Fixmap end ]-------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ PCI I/O start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xfffffbfffe800000-0xfffffbffff800000 16M PTE |
+ | .................. |
+ | ---[ PCI I/O end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmemmap start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xfffffc0002000000-0xfffffc0002200000 2M PTE RW NX SHD AF UXN MEM/NORMAL |
+ | 0xfffffc0002200000-0xfffffc0020000000 478M PTE |
+ | .................. |
+ | ---[ vmemmap end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+
+``cat /sys/kernel/debug/kernel_page_tables`` output::
+
+ 0xfff0000001c00000-0xfff0000080000000 2020M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000080000000-0xfff0000800000000 30G PMD
+ 0xfff0000800000000-0xfff0000800700000 7M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000800700000-0xfff0000800710000 64K PTE ro NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000800710000-0xfff0000880000000 2089920K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000880000000-0xfff0040000000000 4062G PMD
+ 0xfff0040000000000-0xffff800000000000 3964T PGD
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
new file mode 100644
index 000000000000..a7ec57060f64
--- /dev/null
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -0,0 +1,307 @@
+=======================================
+Silicon Errata and Software Workarounds
+=======================================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date : 27 November 2015
+
+It is an unfortunate fact of life that hardware is often produced with
+so-called "errata", which can cause it to deviate from the architecture
+under specific circumstances. For hardware produced by ARM, these
+errata are broadly classified into the following categories:
+
+ ========== ========================================================
+ Category A A critical error without a viable workaround.
+ Category B A significant or critical error with an acceptable
+ workaround.
+ Category C A minor error that is not expected to occur under normal
+ operation.
+ ========== ========================================================
+
+For more information, consult one of the "Software Developers Errata
+Notice" documents available on infocenter.arm.com (registration
+required).
+
+As far as Linux is concerned, Category B errata may require some special
+treatment in the operating system. For example, avoiding a particular
+sequence of code, or configuring the processor in a particular way. A
+less common situation may require similar actions in order to declassify
+a Category A erratum into a Category C erratum. These are collectively
+known as "software workarounds" and are only required in the minority of
+cases (e.g. those cases that both require a non-secure workaround *and*
+can be triggered by Linux).
+
+For software workarounds that may adversely impact systems unaffected by
+the erratum in question, a Kconfig entry is added under "Kernel
+Features" -> "ARM errata workarounds via the alternatives framework".
+With the exception of workarounds for errata deemed "rare" by Arm, these
+are enabled by default and patched in at runtime when an affected CPU is
+detected. For less-intrusive workarounds, a Kconfig option is not
+available and the code is structured (preferably with a comment) in such
+a way that the erratum will not be hit.
+
+This approach can make it slightly onerous to determine exactly which
+errata are worked around in an arbitrary kernel source tree, so this
+file acts as a registry of software workarounds in the Linux Kernel and
+will be updated when new workarounds are committed and backported to
+stable kernels.
+
++----------------+-----------------+-----------------+-----------------------------+
+| Implementor | Component | Erratum ID | Kconfig |
++================+=================+=================+=============================+
+| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
++----------------+-----------------+-----------------+-----------------------------+
+| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
++----------------+-----------------+-----------------+-----------------------------+
+| Ampere | AmpereOne AC04 | AC04_CPU_23 | AMPERE_ERRATUM_AC04_CPU_23 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A55 | #1530923 | ARM64_ERRATUM_1530923 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A55 | #2441007 | ARM64_ERRATUM_2441007 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #852523 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A72 | #853709 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #1165522 | ARM64_ERRATUM_1165522 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #1286807 | ARM64_ERRATUM_1286807 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #1463225 | ARM64_ERRATUM_1463225 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #1490853 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A77 | #1491015 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A710 | #3324338 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X1 | #1502854 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #3324338 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X3 | #3324335 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X4 | #3194386 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X925 | #3324334 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #1349291 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #1490853 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N3 | #3456111 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V1 | #1619801 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V3AE | #3312417 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
+| | | #562869,1047329 | |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-600 | #1076982,1209401| N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-700 | #2268618,2812531| N/A |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
++----------------+-----------------+-----------------+-----------------------------+
+| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX ITS | #22375,24313 | CAVIUM_ERRATUM_22375 |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX GICv3 | #38539 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX SMMUv2 | #27704 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX2 SMMUv3| #74 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX2 SMMUv3| #126 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Marvell | ARM-MMU-500 | #582743 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| NVIDIA | Carmel Core | N/A | NVIDIA_CARMEL_CNP_ERRATUM |
++----------------+-----------------+-----------------+-----------------------------+
+| NVIDIA | T241 GICv3/4.x | T241-FABRIC-4 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip0{6,7} | #161010701 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip0{6,7} | #161010803 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A |
+| | ,10C,11} | | |
+| | SMMU PMCG | | |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo/Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1463225 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1418040 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1530923 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 |
++----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 |
++----------------+-----------------+-----------------+-----------------------------+
+| Rockchip | RK3568 | #3568002 | ROCKCHIP_ERRATUM_3568002 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| ASR | ASR8601 | #8601001 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst
new file mode 100644
index 000000000000..583f2ee9cb97
--- /dev/null
+++ b/Documentation/arch/arm64/sme.rst
@@ -0,0 +1,461 @@
+===================================================
+Scalable Matrix Extension support for AArch64 Linux
+===================================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Matrix Extension (SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive. It should be read in conjunction with the SVE
+documentation in sve.rst which provides details on the Streaming SVE mode
+included in SME.
+
+This document does not aim to describe the SME architecture or programmer's
+model. To aid understanding, a minimal description of relevant programmer's
+model features for SME is included in Appendix A.
+
+
+1. General
+-----------
+
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA and (when
+ present) ZTn register state and TPIDR2_EL0 are tracked per thread.
+
+* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
+ AT_HWCAP2 entry. Presence of this flag implies the presence of the SME
+ instructions and registers, and the Linux-specific system interfaces
+ described in this document. SME is reported in /proc/cpuinfo as "sme".
+
+* The presence of SME2 is reported to userspace via HWCAP2_SME2 in the
+ aux vector AT_HWCAP2 entry. Presence of this flag implies the presence of
+ the SME2 instructions and ZT0, and the Linux-specific system interfaces
+ described in this document. SME2 is reported in /proc/cpuinfo as "sme2".
+
+* Support for the execution of SME instructions in userspace can also be
+ detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
+ instruction, and checking that the value of the SME field is nonzero. [3]
+
+ It does not guarantee the presence of the system interfaces described in the
+ following sections: software that needs to verify that those interfaces are
+ present must check for HWCAP2_SME instead.
+
+* There are a number of optional SME features, presence of these is reported
+ through AT_HWCAP2 through:
+
+ HWCAP2_SME_I16I64
+ HWCAP2_SME_F64F64
+ HWCAP2_SME_I8I32
+ HWCAP2_SME_F16F32
+ HWCAP2_SME_B16F32
+ HWCAP2_SME_F32F32
+ HWCAP2_SME_FA64
+ HWCAP2_SME2
+
+ This list may be extended over time as the SME architecture evolves.
+
+ These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
+ which userspace can read using an MRS instruction. See elf_hwcaps.txt and
+ cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+ NT_ARM_SVE, NT_ARM_SSVE, NT_ARM_ZA and NT_ARM_ZT regsets. The recommended
+ way of detecting support for these regsets is to connect to a target process
+ first and then attempt a
+
+ ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
+
+* Whenever ZA register values are exchanged in memory between userspace and
+ the kernel, the register value is encoded in memory as a series of horizontal
+ vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
+ used for SVE vectors.
+
+* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
+ is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.
+
+2. Vector lengths
+------------------
+
+SME defines a second vector length similar to the SVE vector length which
+controls the size of the streaming mode SVE vectors and the ZA matrix array.
+The ZA matrix is square with each side having as many bytes as a streaming
+mode SVE vector.
+
+
+3. System call behaviour
+-------------------------
+
+* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
+ ZA matrix and ZTn (if present) are preserved.
+
+* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
+ as per the standard SVE ABI.
+
+* None of the SVE registers, ZA or ZTn are used to pass arguments to
+ or receive results from any syscall.
+
+* On process creation (eg, clone()) the newly created process will have
+ PSTATE.SM cleared.
+
+* All other SME state of a thread, including the currently configured vector
+ length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
+ length (if any), is preserved across all syscalls, subject to the specific
+ exceptions for execve() described in section 6.
+
+
+4. Signal handling
+-------------------
+
+* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0.
+
+* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
+ tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
+
+* A new signal frame record za_context encodes the ZA register contents on
+ signal delivery. [1]
+
+* The signal frame record for ZA always contains basic metadata, in particular
+ the thread's vector length (in za_context.vl).
+
+* The ZA matrix may or may not be included in the record, depending on
+ the value of PSTATE.ZA. The registers are present if and only if:
+ za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+ in which case PSTATE.ZA == 1.
+
+* If matrix data is present, the remainder of the record has a vl-dependent
+ size and layout. Macros ZA_SIG_* are defined [1] to facilitate access to
+ them.
+
+* The matrix is stored as a series of horizontal vectors in the same format as
+ is used for SVE vectors.
+
+* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
+ space is allocated on the stack, an extra_context record is written in
+ __reserved[] referencing this space. za_context is then written in the
+ extra space. Refer to [1] for further details about this mechanism.
+
+* If ZTn is supported and PSTATE.ZA==1 then a signal frame record for ZTn will
+ be generated.
+
+* The signal record for ZTn has magic ZT_MAGIC (0x5a544e01) and consists of a
+ standard signal frame header followed by a struct zt_context specifying
+ the number of ZTn registers supported by the system, then zt_context.nregs
+ blocks of 64 bytes of data per register.
+
+
+5. Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no za_context record in the signal frame, or if the record is
+ present but contains no register data as described in the previous section,
+ then ZA is disabled.
+
+* If za_context is present in the signal frame and contains matrix data then
+ PSTATE.ZA is set to 1 and ZA is populated with the specified data.
+
+* The vector length cannot be changed via signal return. If za_context.vl in
+ the signal frame does not match the current vector length, the signal return
+ attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+* If ZTn is not supported or PSTATE.ZA==0 then it is illegal to have a
+ signal frame record for ZTn, resulting in a forced SIGSEGV.
+
+
+6. prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SME vector
+length:
+
+prctl(PR_SME_SET_VL, unsigned long arg)
+
+ Sets the vector length of the calling thread and related flags, where
+ arg == vl | flags. Other threads of the calling process are unaffected.
+
+ vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+ flags:
+
+ PR_SME_VL_INHERIT
+
+ Inherit the current vector length across execve(). Otherwise, the
+ vector length is reset to the system default at execve(). (See
+ Section 9.)
+
+ PR_SME_SET_VL_ONEXEC
+
+ Defer the requested vector length change until the next execve()
+ performed by this thread.
+
+ The effect is equivalent to implicit execution of the following
+ call immediately after the next execve() (if any) by the thread:
+
+ prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
+
+ This allows launching of a new program with a different vector
+ length, while avoiding runtime side effects in the caller.
+
+ Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
+ immediately.
+
+
+ Return value: a nonnegative on success, or a negative value on error:
+ EINVAL: SME not supported, invalid vector length requested, or
+ invalid flags.
+
+
+ On success:
+
+ * Either the calling thread's vector length or the deferred vector length
+ to be applied at the next execve() by the thread (dependent on whether
+ PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
+ supported by the system that is less than or equal to vl. If vl ==
+ SVE_VL_MAX, the value set will be the largest value supported by the
+ system.
+
+ * Any previously outstanding deferred vector length change in the calling
+ thread is cancelled.
+
+ * The returned value describes the resulting configuration, encoded as for
+ PR_SME_GET_VL. The vector length reported in this value is the new
+ current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
+ present in arg; otherwise, the reported vector length is the deferred
+ vector length that will be applied at the next execve() by the calling
+ thread.
+
+ * Changing the vector length causes all of ZA, ZTn, P0..P15, FFR and all
+ bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+ unspecified, including both streaming and non-streaming SVE state.
+ Calling PR_SME_SET_VL with vl equal to the thread's current vector
+ length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
+ does not constitute a change to the vector length for this purpose.
+
+ * Changing the vector length causes PSTATE.ZA to be cleared.
+ Calling PR_SME_SET_VL with vl equal to the thread's current vector
+ length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
+ does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SME_GET_VL)
+
+ Gets the vector length of the calling thread.
+
+ The following flag may be OR-ed into the result:
+
+ PR_SME_VL_INHERIT
+
+ Vector length will be inherited across execve().
+
+ There is no way to determine whether there is an outstanding deferred
+ vector length change (which would only normally be the case between a
+ fork() or vfork() and the corresponding execve() in typical use).
+
+ To extract the vector length from the result, bitwise and it with
+ PR_SME_VL_LEN_MASK.
+
+ Return value: a nonnegative value on success, or a negative value on error:
+ EINVAL: SME not supported.
+
+
+7. ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
+ state via PTRACE_GETREGSET and PTRACE_SETREGSET, this is documented in
+ sve.rst.
+
+* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
+ PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+ Refer to [2] for definitions.
+
+The regset data starts with struct user_za_header, containing:
+
+ size
+
+ Size of the complete regset, in bytes.
+ This depends on vl and possibly on other things in the future.
+
+ If a call to PTRACE_GETREGSET requests less data than the value of
+ size, the caller can allocate a larger buffer and retry in order to
+ read the complete regset.
+
+ max_size
+
+ Maximum size in bytes that the regset can grow to for the target
+ thread. The regset won't grow bigger than this even if the target
+ thread changes its vector length etc.
+
+ vl
+
+ Target thread's current streaming vector length, in bytes.
+
+ max_vl
+
+ Maximum possible streaming vector length for the target thread.
+
+ flags
+
+ Zero or more of the following flags, which have the same
+ meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+ SME_PT_VL_INHERIT
+
+ SME_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+ those documented for PR_SME_SET_VL.
+
+ The caller must make a further GETREGSET call if it needs to know what VL is
+ actually set by SETREGSET, unless is it known in advance that the requested
+ VL is supported.
+
+* The size and layout of the payload depends on the header fields. The
+ ZA_PT_ZA*() macros are provided to facilitate access to the data.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+ case the vector length and flags are changed and PSTATE.ZA is set to 0
+ (along with any consequences of those changes). If a payload is provided
+ then PSTATE.ZA will be set to 1.
+
+* For SETREGSET, if the requested VL is not supported, the effect will be the
+ same as if the payload were omitted, except that an EIO error is reported.
+ No attempt is made to translate the payload data to the correct layout
+ for the vector length actually set. It is up to the caller to translate the
+ payload layout for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+* A new regset NT_ARM_ZT is defined for access to ZTn state via
+ PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+* The NT_ARM_ZT regset consists of a single 512 bit register.
+
+* When PSTATE.ZA==0 reads of NT_ARM_ZT will report all bits of ZTn as 0.
+
+* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
+
+* If any register data is provided along with SME_PT_VL_ONEXEC then the
+ registers data will be interpreted with the current vector length, not
+ the vector length configured for use on exec.
+
+
+8. ELF coredump extensions
+---------------------------
+
+* NT_ARM_SSVE notes will be added to each coredump for
+ each thread of the dumped process. The contents will be equivalent to the
+ data that would have been read if a PTRACE_GETREGSET of the corresponding
+ type were executed for each thread when the coredump was generated.
+
+* A NT_ARM_ZA note will be added to each coredump for each thread of the
+ dumped process. The contents will be equivalent to the data that would have
+ been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
+ when the coredump was generated.
+
+* A NT_ARM_ZT note will be added to each coredump for each thread of the
+ dumped process. The contents will be equivalent to the data that would have
+ been read if a PTRACE_GETREGSET of NT_ARM_ZT were executed for each thread
+ when the coredump was generated.
+
+* The NT_ARM_TLS note will be extended to two registers, the second register
+ will contain TPIDR2_EL0 on systems that support SME and will be read as
+ zero with writes ignored otherwise.
+
+9. System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+ mechanism is provided for administrators, distro maintainers and developers
+ to set the default vector length for userspace processes:
+
+/proc/sys/abi/sme_default_vector_length
+
+ Writing the text representation of an integer to this file sets the system
+ default vector length to the specified value rounded to a supported value
+ using the same rules as for setting vector length via PR_SME_SET_VL.
+
+ The result can be determined by reopening the file and reading its
+ contents.
+
+ At boot, the default vector length is initially set to 32 or the maximum
+ supported vector length, whichever is smaller and supported. This
+ determines the initial vector length of the init process (PID 1).
+
+ Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+ the system default vector length, unless
+
+ * PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
+ calling thread, or
+
+ * a deferred vector length change is pending, established via the
+ PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+ of any existing process or thread that does not make an execve() call.
+
+
+Appendix A. SME programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SME to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1. Registers
+---------------
+
+In A64 state, SME adds the following:
+
+* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
+ features are available. When supported EL0 software may enter and leave
+ streaming mode at any time.
+
+ For best system performance it is strongly encouraged for software to enable
+ streaming mode only when it is actively being used.
+
+* A new vector length controlling the size of ZA and the Z registers when in
+ streaming mode, separately to the vector length used for SVE when not in
+ streaming mode. There is no requirement that either the currently selected
+ vector length or the set of vector lengths supported for the two modes in
+ a given system have any relationship. The streaming mode vector length
+ is referred to as SVL.
+
+* A new ZA matrix register. This is a square matrix of SVLxSVL bits. Most
+ operations on ZA require that streaming mode be enabled but ZA can be
+ enabled without streaming mode in order to load, save and retain data.
+
+ For best system performance it is strongly encouraged for software to enable
+ ZA only when it is actively being used.
+
+* A new ZT0 register is introduced when SME2 is present. This is a 512 bit
+ register which is accessible when PSTATE.ZA is set, as ZA itself is.
+
+* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
+ SMSTOP instructions or by access to the SVCR system register:
+
+ * PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
+ data while if it is 0 then ZA can not be accessed. When PSTATE.ZA is
+ changed from 0 to 1 all bits in ZA are cleared.
+
+ * PSTATE.SM, if this is 1 then the PE is in streaming mode. When the value
+ of PSTATE.SM is changed then it is implementation defined if the subset
+ of the floating point register bits valid in both modes may be retained.
+ Any other bits will be cleared.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+ AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+ AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arch/arm64/cpu-feature-registers.rst
diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst
new file mode 100644
index 000000000000..28152492c29c
--- /dev/null
+++ b/Documentation/arch/arm64/sve.rst
@@ -0,0 +1,614 @@
+===================================================
+Scalable Vector Extension support for AArch64 Linux
+===================================================
+
+Author: Dave Martin <Dave.Martin@arm.com>
+
+Date: 4 August 2017
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Vector Extension (SVE), including
+interactions with Streaming SVE mode added by the Scalable Matrix Extension
+(SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+This document does not aim to describe the SVE architecture or programmer's
+model. To aid understanding, a minimal description of relevant programmer's
+model features for SVE is included in Appendix A.
+
+
+1. General
+-----------
+
+* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
+ tracked per-thread.
+
+* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
+ in the system, when it is not supported and these interfaces are used to
+ access streaming mode FFR is read and written as zero.
+
+* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
+ AT_HWCAP entry. Presence of this flag implies the presence of the SVE
+ instructions and registers, and the Linux-specific system interfaces
+ described in this document. SVE is reported in /proc/cpuinfo as "sve".
+
+* Support for the execution of SVE instructions in userspace can also be
+ detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
+ instruction, and checking that the value of the SVE field is nonzero. [3]
+
+ It does not guarantee the presence of the system interfaces described in the
+ following sections: software that needs to verify that those interfaces are
+ present must check for HWCAP_SVE instead.
+
+* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also
+ be reported in the AT_HWCAP2 aux vector entry. In addition to this,
+ optional extensions to SVE2 may be reported by the presence of:
+
+ HWCAP2_SVE2
+ HWCAP2_SVEAES
+ HWCAP2_SVEPMULL
+ HWCAP2_SVEBITPERM
+ HWCAP2_SVESHA3
+ HWCAP2_SVESM4
+ HWCAP2_SVE2P1
+
+ This list may be extended over time as the SVE architecture evolves.
+
+ These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1,
+ which userspace can read using an MRS instruction. See elf_hwcaps.txt and
+ cpu-feature-registers.txt for details.
+
+* On hardware that supports the SME extensions, HWCAP2_SME will also be
+ reported in the AT_HWCAP2 aux vector entry. Among other things SME adds
+ streaming mode which provides a subset of the SVE feature set using a
+ separate SME vector length and the same Z/V registers. See sme.rst
+ for more details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+ NT_ARM_SVE regset. The recommended way of detecting support for this regset
+ is to connect to a target process first and then attempt a
+ ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). Note that when SME is
+ present and streaming SVE mode is in use the FPSIMD subset of registers
+ will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
+ in the target.
+
+* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
+ between userspace and the kernel, the register value is encoded in memory in
+ an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
+ byte offset i from the start of the memory representation. This affects for
+ example the signal frame (struct sve_context) and ptrace interface
+ (struct user_sve_header) and associated data.
+
+ Beware that on big-endian systems this results in a different byte order than
+ for the FPSIMD V-registers, which are stored as single host-endian 128-bit
+ values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
+ byte offset i. (struct fpsimd_context, struct user_fpsimd_state).
+
+
+2. Vector length terminology
+-----------------------------
+
+The size of an SVE vector (Z) register is referred to as the "vector length".
+
+To avoid confusion about the units used to express vector length, the kernel
+adopts the following conventions:
+
+* Vector length (VL) = size of a Z-register in bytes
+
+* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
+
+(So, VL = 16 * VQ.)
+
+The VQ convention is used where the underlying granularity is important, such
+as in data structure definitions. In most other situations, the VL convention
+is used. This is consistent with the meaning of the "VL" pseudo-register in
+the SVE instruction set architecture.
+
+
+3. System call behaviour
+-------------------------
+
+* On syscall, V0..V31 are preserved (as without SVE). Thus, bits [127:0] of
+ Z0..Z31 are preserved. All other bits of Z0..Z31, and all of P0..P15 and FFR
+ become zero on return from a syscall.
+
+* The SVE registers are not used to pass arguments to or receive results from
+ any syscall.
+
+* All other SVE state of a thread, including the currently configured vector
+ length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
+ length (if any), is preserved across all syscalls, subject to the specific
+ exceptions for execve() described in section 6.
+
+ In particular, on return from a fork() or clone(), the parent and new child
+ process or thread share identical SVE configuration, matching that of the
+ parent before the call.
+
+
+4. Signal handling
+-------------------
+
+* A new signal frame record sve_context encodes the SVE registers on signal
+ delivery. [1]
+
+* This record is supplementary to fpsimd_context. The FPSR and FPCR registers
+ are only present in fpsimd_context. For convenience, the content of V0..V31
+ is duplicated between sve_context and fpsimd_context.
+
+* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
+ if set indicates that the thread is in streaming mode and the vector length
+ and register data (if present) describe the streaming SVE data and vector
+ length.
+
+* The signal frame record for SVE always contains basic metadata, in particular
+ the thread's vector length (in sve_context.vl).
+
+* The SVE registers may or may not be included in the record, depending on
+ whether the registers are live for the thread. The registers are present if
+ and only if:
+ sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
+
+* If the registers are present, the remainder of the record has a vl-dependent
+ size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
+ the members.
+
+* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
+ layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
+ start of the register's representation in memory.
+
+* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
+ space is allocated on the stack, an extra_context record is written in
+ __reserved[] referencing this space. sve_context is then written in the
+ extra space. Refer to [1] for further details about this mechanism.
+
+
+5. Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no sve_context record in the signal frame, or if the record is
+ present but contains no register data as described in the previous section,
+ then the SVE registers/bits become non-live and take unspecified values.
+
+* If sve_context is present in the signal frame and contains full register
+ data, the SVE registers become live and are populated with the specified
+ data. However, for backward compatibility reasons, bits [127:0] of Z0..Z31
+ are always restored from the corresponding members of fpsimd_context.vregs[]
+ and not from sve_context. The remaining bits are restored from sve_context.
+
+* Inclusion of fpsimd_context in the signal frame remains mandatory,
+ irrespective of whether sve_context is present or not.
+
+* The vector length cannot be changed via signal return. If sve_context.vl in
+ the signal frame does not match the current vector length, the signal return
+ attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+* It is permitted to enter or leave streaming mode by setting or clearing
+ the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
+ when doing so sve_context.vl and any register data are appropriate for the
+ vector length in the new mode.
+
+
+6. prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SVE vector
+length:
+
+prctl(PR_SVE_SET_VL, unsigned long arg)
+
+ Sets the vector length of the calling thread and related flags, where
+ arg == vl | flags. Other threads of the calling process are unaffected.
+
+ vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+ flags:
+
+ PR_SVE_VL_INHERIT
+
+ Inherit the current vector length across execve(). Otherwise, the
+ vector length is reset to the system default at execve(). (See
+ Section 9.)
+
+ PR_SVE_SET_VL_ONEXEC
+
+ Defer the requested vector length change until the next execve()
+ performed by this thread.
+
+ The effect is equivalent to implicit execution of the following
+ call immediately after the next execve() (if any) by the thread:
+
+ prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
+
+ This allows launching of a new program with a different vector
+ length, while avoiding runtime side effects in the caller.
+
+
+ Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
+ immediately.
+
+
+ Return value: a nonnegative on success, or a negative value on error:
+ EINVAL: SVE not supported, invalid vector length requested, or
+ invalid flags.
+
+
+ On success:
+
+ * Either the calling thread's vector length or the deferred vector length
+ to be applied at the next execve() by the thread (dependent on whether
+ PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
+ supported by the system that is less than or equal to vl. If vl ==
+ SVE_VL_MAX, the value set will be the largest value supported by the
+ system.
+
+ * Any previously outstanding deferred vector length change in the calling
+ thread is cancelled.
+
+ * The returned value describes the resulting configuration, encoded as for
+ PR_SVE_GET_VL. The vector length reported in this value is the new
+ current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
+ present in arg; otherwise, the reported vector length is the deferred
+ vector length that will be applied at the next execve() by the calling
+ thread.
+
+ * Changing the vector length causes all of P0..P15, FFR and all bits of
+ Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+ unspecified. Calling PR_SVE_SET_VL with vl equal to the thread's current
+ vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
+ flag, does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SVE_GET_VL)
+
+ Gets the vector length of the calling thread.
+
+ The following flag may be OR-ed into the result:
+
+ PR_SVE_VL_INHERIT
+
+ Vector length will be inherited across execve().
+
+ There is no way to determine whether there is an outstanding deferred
+ vector length change (which would only normally be the case between a
+ fork() or vfork() and the corresponding execve() in typical use).
+
+ To extract the vector length from the result, bitwise and it with
+ PR_SVE_VL_LEN_MASK.
+
+ Return value: a nonnegative value on success, or a negative value on error:
+ EINVAL: SVE not supported.
+
+
+7. ptrace extensions
+---------------------
+
+* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
+ PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
+ streaming mode SVE registers and NT_ARM_SVE describes the
+ non-streaming mode SVE registers.
+
+ In this description a register set is referred to as being "live" when
+ the target is in the appropriate streaming or non-streaming mode and is
+ using data beyond the subset shared with the FPSIMD Vn registers.
+
+ Refer to [2] for definitions.
+
+The regset data starts with struct user_sve_header, containing:
+
+ size
+
+ Size of the complete regset, in bytes.
+ This depends on vl and possibly on other things in the future.
+
+ If a call to PTRACE_GETREGSET requests less data than the value of
+ size, the caller can allocate a larger buffer and retry in order to
+ read the complete regset.
+
+ max_size
+
+ Maximum size in bytes that the regset can grow to for the target
+ thread. The regset won't grow bigger than this even if the target
+ thread changes its vector length etc.
+
+ vl
+
+ Target thread's current vector length, in bytes.
+
+ max_vl
+
+ Maximum possible vector length for the target thread.
+
+ flags
+
+ at most one of
+
+ SVE_PT_REGS_FPSIMD
+
+ SVE registers are not live (GETREGSET) or are to be made
+ non-live (SETREGSET).
+
+ The payload is of type struct user_fpsimd_state, with the same
+ meaning as for NT_PRFPREG, starting at offset
+ SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
+
+ Extra data might be appended in the future: the size of the
+ payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
+
+ vq should be obtained using sve_vq_from_vl(vl).
+
+ or
+
+ SVE_PT_REGS_SVE
+
+ SVE registers are live (GETREGSET) or are to be made live
+ (SETREGSET).
+
+ The payload contains the SVE register data, starting at offset
+ SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
+ size SVE_PT_SVE_SIZE(vq, flags);
+
+ ... OR-ed with zero or more of the following flags, which have the same
+ meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+ SVE_PT_VL_INHERIT
+
+ SVE_PT_VL_ONEXEC (SETREGSET only).
+
+ If neither FPSIMD nor SVE flags are provided then no register
+ payload is available, this is only possible when SME is implemented.
+
+
+* The effects of changing the vector length and/or flags are equivalent to
+ those documented for PR_SVE_SET_VL.
+
+ The caller must make a further GETREGSET call if it needs to know what VL is
+ actually set by SETREGSET, unless is it known in advance that the requested
+ VL is supported.
+
+* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
+ the header fields. The SVE_PT_SVE_*() macros are provided to facilitate
+ access to the members.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+ case only the vector length and flags are changed (along with any
+ consequences of those changes).
+
+* In systems supporting SME when in streaming mode a GETREGSET for
+ NT_REG_SVE will return only the user_sve_header with no register data,
+ similarly a GETREGSET for NT_REG_SSVE will not return any register data
+ when not in streaming mode.
+
+* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
+
+* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
+ requested VL is not supported, the effect will be the same as if the
+ payload were omitted, except that an EIO error is reported. No
+ attempt is made to translate the payload data to the correct layout
+ for the vector length actually set. The thread's FPSIMD state is
+ preserved, but the remaining bits of the SVE registers become
+ unspecified. It is up to the caller to translate the payload layout
+ for the actual VL and retry.
+
+* Where SME is implemented it is not possible to GETREGSET the register
+ state for normal SVE when in streaming mode, nor the streaming mode
+ register state when in normal mode, regardless of the implementation defined
+ behaviour of the hardware for sharing data between the two modes.
+
+* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
+ streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
+ if the target was not in streaming mode.
+
+* If any register data is provided along with SVE_PT_VL_ONEXEC then the
+ registers data will be interpreted with the current vector length, not
+ the vector length configured for use on exec.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8. ELF coredump extensions
+---------------------------
+
+* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
+ each thread of the dumped process. The contents will be equivalent to the
+ data that would have been read if a PTRACE_GETREGSET of the corresponding
+ type were executed for each thread when the coredump was generated.
+
+9. System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+ mechanism is provided for administrators, distro maintainers and developers
+ to set the default vector length for userspace processes:
+
+/proc/sys/abi/sve_default_vector_length
+
+ Writing the text representation of an integer to this file sets the system
+ default vector length to the specified value rounded to a supported value
+ using the same rules as for setting vector length via PR_SVE_SET_VL.
+
+ The result can be determined by reopening the file and reading its
+ contents.
+
+ At boot, the default vector length is initially set to 64 or the maximum
+ supported vector length, whichever is smaller. This determines the initial
+ vector length of the init process (PID 1).
+
+ Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+ the system default vector length, unless
+
+ * PR_SVE_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
+ calling thread, or
+
+ * a deferred vector length change is pending, established via the
+ PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+ of any existing process or thread that does not make an execve() call.
+
+10. Perf extensions
+--------------------------------
+
+* The arm64 specific DWARF standard [5] added the VG (Vector Granule) register
+ at index 46. This register is used for DWARF unwinding when variable length
+ SVE registers are pushed onto the stack.
+
+* Its value is equivalent to the current SVE vector length (VL) in bits divided
+ by 64.
+
+* The value is included in Perf samples in the regs[46] field if
+ PERF_SAMPLE_REGS_USER is set and the sample_regs_user mask has bit 46 set.
+
+* The value is the current value at the time the sample was taken, and it can
+ change over time.
+
+* If the system doesn't support SVE when perf_event_open is called with these
+ settings, the event will fail to open.
+
+Appendix A. SVE programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1. Registers
+---------------
+
+In A64 state, SVE adds the following:
+
+* 32 8VL-bit vector registers Z0..Z31
+ For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
+
+ A register write using a Vn register name zeros all bits of the corresponding
+ Zn except for bits [127:0].
+
+* 16 VL-bit predicate registers P0..P15
+
+* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
+
+* a VL "pseudo-register" that determines the size of each vector register
+
+ The SVE instruction set architecture provides no way to write VL directly.
+ Instead, it can be modified only by EL1 and above, by writing appropriate
+ system registers.
+
+* The value of VL can be configured at runtime by EL1 and above:
+ 16 <= VL <= VLmax, where VL must be a multiple of 16.
+
+* The maximum vector length is determined by the hardware:
+ 16 <= VLmax <= 256.
+
+ (The SVE architecture specifies 256, but permits future architecture
+ revisions to raise this limit.)
+
+* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
+ operations in a similar way to the way in which they interact with ARMv8
+ floating-point operations::
+
+ 8VL-1 128 0 bit index
+ +---- //// -----------------+
+ Z0 | : V0 |
+ : :
+ Z7 | : V7 |
+ Z8 | : * V8 |
+ : : :
+ Z15 | : *V15 |
+ Z16 | : V16 |
+ : :
+ Z31 | : V31 |
+ +---- //// -----------------+
+ 31 0
+ VL-1 0 +-------+
+ +---- //// --+ FPSR | |
+ P0 | | +-------+
+ : | | *FPCR | |
+ P15 | | +-------+
+ +---- //// --+
+ FFR | | +-----+
+ +---- //// --+ VL | |
+ +-----+
+
+(*) callee-save:
+ This only applies to bits [63:0] of Z-/V-registers.
+ FPCR contains callee-save and caller-save bits. See [4] for details.
+
+
+A.2. Procedure call standard
+-----------------------------
+
+The ARMv8-A base procedure call standard is extended as follows with respect to
+the additional SVE register state:
+
+* All SVE register bits that are not shared with FP/SIMD are caller-save.
+
+* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
+
+ This follows from the way these bits are mapped to V8..V15, which are caller-
+ save in the base procedure call standard.
+
+
+Appendix B. ARMv8-A FP/SIMD programmer's model
+===============================================
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+Refer to [4] for more information.
+
+ARMv8-A defines the following floating-point / SIMD register state:
+
+* 32 128-bit vector registers V0..V31
+* 2 32-bit status/control registers FPSR, FPCR
+
+::
+
+ 127 0 bit index
+ +---------------+
+ V0 | |
+ : : :
+ V7 | |
+ * V8 | |
+ : : : :
+ *V15 | |
+ V16 | |
+ : : :
+ V31 | |
+ +---------------+
+
+ 31 0
+ +-------+
+ FPSR | |
+ +-------+
+ *FPCR | |
+ +-------+
+
+(*) callee-save:
+ This only applies to bits [63:0] of V-registers.
+ FPCR contains a mixture of callee-save and caller-save bits.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+ AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+ AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arch/arm64/cpu-feature-registers.rst
+
+[4] ARM IHI0055C
+ http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
+ http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
+ Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
+
+[5] https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst
diff --git a/Documentation/arch/arm64/tagged-address-abi.rst b/Documentation/arch/arm64/tagged-address-abi.rst
new file mode 100644
index 000000000000..fe24a3f158c5
--- /dev/null
+++ b/Documentation/arch/arm64/tagged-address-abi.rst
@@ -0,0 +1,179 @@
+==========================
+AArch64 TAGGED ADDRESS ABI
+==========================
+
+Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
+ Catalin Marinas <catalin.marinas@arm.com>
+
+Date: 21 August 2019
+
+This document describes the usage and semantics of the Tagged Address
+ABI on AArch64 Linux.
+
+1. Introduction
+---------------
+
+On AArch64 the ``TCR_EL1.TBI0`` bit is set by default, allowing
+userspace (EL0) to perform memory accesses through 64-bit pointers with
+a non-zero top byte. This document describes the relaxation of the
+syscall ABI that allows userspace to pass certain tagged pointers to
+kernel syscalls.
+
+2. AArch64 Tagged Address ABI
+-----------------------------
+
+From the kernel syscall interface perspective and for the purposes of
+this document, a "valid tagged pointer" is a pointer with a potentially
+non-zero top-byte that references an address in the user process address
+space obtained in one of the following ways:
+
+- ``mmap()`` syscall where either:
+
+ - flags have the ``MAP_ANONYMOUS`` bit set or
+ - the file descriptor refers to a regular file (including those
+ returned by ``memfd_create()``) or ``/dev/zero``
+
+- ``brk()`` syscall (i.e. the heap area between the initial location of
+ the program break at process creation and its current location).
+
+- any memory mapped by the kernel in the address space of the process
+ during creation and with the same restrictions as for ``mmap()`` above
+ (e.g. data, bss, stack).
+
+The AArch64 Tagged Address ABI has two stages of relaxation depending on
+how the user addresses are used by the kernel:
+
+1. User addresses not accessed by the kernel but used for address space
+ management (e.g. ``mprotect()``, ``madvise()``). The use of valid
+ tagged pointers in this context is allowed with these exceptions:
+
+ - ``brk()``, ``mmap()`` and the ``new_address`` argument to
+ ``mremap()`` as these have the potential to alias with existing
+ user addresses.
+
+ NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+ incorrectly accept valid tagged pointers for the ``brk()``,
+ ``mmap()`` and ``mremap()`` system calls.
+
+ - The ``range.start``, ``start`` and ``dst`` arguments to the
+ ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
+ ``userfaultfd()``, as fault addresses subsequently obtained by reading
+ the file descriptor will be untagged, which may otherwise confuse
+ tag-unaware programs.
+
+ NOTE: This behaviour changed in v5.14 and so some earlier kernels may
+ incorrectly accept valid tagged pointers for this system call.
+
+2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
+ relaxation is disabled by default and the application thread needs to
+ explicitly enable it via ``prctl()`` as follows:
+
+ - ``PR_SET_TAGGED_ADDR_CTRL``: enable or disable the AArch64 Tagged
+ Address ABI for the calling thread.
+
+ The ``(unsigned int) arg2`` argument is a bit mask describing the
+ control mode used:
+
+ - ``PR_TAGGED_ADDR_ENABLE``: enable AArch64 Tagged Address ABI.
+ Default status is disabled.
+
+ Arguments ``arg3``, ``arg4``, and ``arg5`` must be 0.
+
+ - ``PR_GET_TAGGED_ADDR_CTRL``: get the status of the AArch64 Tagged
+ Address ABI for the calling thread.
+
+ Arguments ``arg2``, ``arg3``, ``arg4``, and ``arg5`` must be 0.
+
+ The ABI properties described above are thread-scoped, inherited on
+ clone() and fork() and cleared on exec().
+
+ Calling ``prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)``
+ returns ``-EINVAL`` if the AArch64 Tagged Address ABI is globally
+ disabled by ``sysctl abi.tagged_addr_disabled=1``. The default
+ ``sysctl abi.tagged_addr_disabled`` configuration is 0.
+
+When the AArch64 Tagged Address ABI is enabled for a thread, the
+following behaviours are guaranteed:
+
+- All syscalls except the cases mentioned in section 3 can accept any
+ valid tagged pointer.
+
+- The syscall behaviour is undefined for invalid tagged pointers: it may
+ result in an error code being returned, a (fatal) signal being raised,
+ or other modes of failure.
+
+- The syscall behaviour for a valid tagged pointer is the same as for
+ the corresponding untagged pointer.
+
+
+A definition of the meaning of tagged pointers on AArch64 can be found
+in Documentation/arch/arm64/tagged-pointers.rst.
+
+3. AArch64 Tagged Address ABI Exceptions
+-----------------------------------------
+
+The following system call parameters must be untagged regardless of the
+ABI relaxation:
+
+- ``prctl()`` other than pointers to user data either passed directly or
+ indirectly as arguments to be accessed by the kernel.
+
+- ``ioctl()`` other than pointers to user data either passed directly or
+ indirectly as arguments to be accessed by the kernel.
+
+- ``shmat()`` and ``shmdt()``.
+
+- ``brk()`` (since kernel v5.6).
+
+- ``mmap()`` (since kernel v5.6).
+
+- ``mremap()``, the ``new_address`` argument (since kernel v5.6).
+
+Any attempt to use non-zero tagged pointers may result in an error code
+being returned, a (fatal) signal being raised, or other modes of
+failure.
+
+4. Example of correct usage
+---------------------------
+.. code-block:: c
+
+ #include <stdlib.h>
+ #include <string.h>
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <sys/prctl.h>
+
+ #define PR_SET_TAGGED_ADDR_CTRL 55
+ #define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+
+ #define TAG_SHIFT 56
+
+ int main(void)
+ {
+ int tbi_enabled = 0;
+ unsigned long tag = 0;
+ char *ptr;
+
+ /* check/enable the tagged address ABI */
+ if (!prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0))
+ tbi_enabled = 1;
+
+ /* memory allocation */
+ ptr = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED)
+ return 1;
+
+ /* set a non-zero tag if the ABI is available */
+ if (tbi_enabled)
+ tag = rand() & 0xff;
+ ptr = (char *)((unsigned long)ptr | (tag << TAG_SHIFT));
+
+ /* memory access to a tagged address */
+ strcpy(ptr, "tagged pointer\n");
+
+ /* syscall with a tagged pointer */
+ write(1, ptr, strlen(ptr));
+
+ return 0;
+ }
diff --git a/Documentation/arch/arm64/tagged-pointers.rst b/Documentation/arch/arm64/tagged-pointers.rst
new file mode 100644
index 000000000000..f87a925ca9a5
--- /dev/null
+++ b/Documentation/arch/arm64/tagged-pointers.rst
@@ -0,0 +1,89 @@
+=========================================
+Tagged virtual addresses in AArch64 Linux
+=========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+
+Date : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use.
+
+
+Passing tagged addresses to the kernel
+--------------------------------------
+
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00, unless the application enables the AArch64
+Tagged Address ABI explicitly
+(Documentation/arch/arm64/tagged-address-abi.rst).
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+ passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+ signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+ them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations when the
+userspace application did not enable the AArch64 Tagged Address ABI may
+result in an error code being returned, a (fatal) signal being raised,
+or other modes of failure.
+
+For these reasons, when the AArch64 Tagged Address ABI is disabled,
+passing non-zero address tags to the kernel via system calls is
+forbidden, and using a non-zero address tag for sp is strongly
+discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+When delivering signals, non-zero tags are not preserved in
+siginfo.si_addr unless the flag SA_EXPOSE_TAGBITS was set in
+sigaction.sa_flags when the signal handler was installed. This means
+that signal handlers in applications making use of tags cannot rely
+on the tag information for user virtual addresses being maintained
+in these fields unless the flag was set.
+
+If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address
+are preserved in response to synchronous tag check faults (SEGV_MTESERR)
+otherwise not preserved even if SA_EXPOSE_TAGBITS was set.
+Applications should interpret the values of these bits based on
+the support for the HWCAP3_MTE_FAR. If the support is not present,
+the values of these bits should be considered as undefined otherwise valid.
+
+For signals raised in response to watchpoint debug exceptions, the
+tag information will be preserved regardless of the SA_EXPOSE_TAGBITS
+flag setting.
+
+Non-zero tags are never preserved in sigcontext.fault_address
+regardless of the SA_EXPOSE_TAGBITS flag setting.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
+
+This behaviour is maintained when the AArch64 Tagged Address ABI is
+enabled.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.
diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst
new file mode 100644
index 000000000000..3f9962e45c09
--- /dev/null
+++ b/Documentation/arch/index.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+CPU Architectures
+=================
+
+These books provide programming details about architecture-specific
+implementation.
+
+.. toctree::
+ :maxdepth: 2
+
+ arc/index
+ arm/index
+ arm64/index
+ loongarch/index
+ m68k/index
+ mips/index
+ nios2/index
+ openrisc/index
+ parisc/index
+ powerpc/index
+ riscv/index
+ s390/index
+ sh/index
+ sparc/index
+ x86/index
+ xtensa/index
diff --git a/Documentation/arch/loongarch/booting.rst b/Documentation/arch/loongarch/booting.rst
new file mode 100644
index 000000000000..91eccd410478
--- /dev/null
+++ b/Documentation/arch/loongarch/booting.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Booting Linux/LoongArch
+=======================
+
+:Author: Yanteng Si <siyanteng@loongson.cn>
+:Date: 18 Nov 2022
+
+Information passed from BootLoader to kernel
+============================================
+
+LoongArch supports ACPI and FDT. The information that needs to be passed
+to the kernel includes the memmap, the initrd, the command line, optionally
+the ACPI/FDT tables, and so on.
+
+The kernel is passed the following arguments on `kernel_entry` :
+
+ - a0 = efi_boot: `efi_boot` is a flag indicating whether
+ this boot environment is fully UEFI-compliant.
+
+ - a1 = cmdline: `cmdline` is a pointer to the kernel command line.
+
+ - a2 = systemtable: `systemtable` points to the EFI system table.
+ All pointers involved at this stage are in physical addresses.
+
+Header of Linux/LoongArch kernel images
+=======================================
+
+Linux/LoongArch kernel images are EFI images. Being PE files, they have
+a 64-byte header structured like::
+
+ u32 MZ_MAGIC /* "MZ", MS-DOS header */
+ u32 res0 = 0 /* Reserved */
+ u64 kernel_entry /* Kernel entry point */
+ u64 _end - _text /* Kernel image effective size */
+ u64 load_offset /* Kernel image load offset from start of RAM */
+ u64 res1 = 0 /* Reserved */
+ u64 res2 = 0 /* Reserved */
+ u64 res3 = 0 /* Reserved */
+ u32 LINUX_PE_MAGIC /* Magic number */
+ u32 pe_header - _head /* Offset to the PE header */
diff --git a/Documentation/arch/loongarch/features.rst b/Documentation/arch/loongarch/features.rst
new file mode 100644
index 000000000000..009f44c7951f
--- /dev/null
+++ b/Documentation/arch/loongarch/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features loongarch
diff --git a/Documentation/arch/loongarch/index.rst b/Documentation/arch/loongarch/index.rst
new file mode 100644
index 000000000000..c779bfa00c05
--- /dev/null
+++ b/Documentation/arch/loongarch/index.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+LoongArch Architecture
+======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ introduction
+ booting
+ irq-chip-model
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst
new file mode 100644
index 000000000000..5e6db78abeaf
--- /dev/null
+++ b/Documentation/arch/loongarch/introduction.rst
@@ -0,0 +1,390 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Introduction to LoongArch
+=========================
+
+LoongArch is a new RISC ISA, which is a bit like MIPS or RISC-V. There are
+currently 3 variants: a reduced 32-bit version (LA32R), a standard 32-bit
+version (LA32S) and a 64-bit version (LA64). There are 4 privilege levels
+(PLVs) defined in LoongArch: PLV0~PLV3, from high to low. Kernel runs at PLV0
+while applications run at PLV3. This document introduces the registers, basic
+instruction set, virtual memory and some other topics of LoongArch.
+
+Registers
+=========
+
+LoongArch registers include general purpose registers (GPRs), floating point
+registers (FPRs), vector registers (VRs) and control status registers (CSRs)
+used in privileged mode (PLV0).
+
+GPRs
+----
+
+LoongArch has 32 GPRs ( ``$r0`` ~ ``$r31`` ); each one is 32-bit wide in LA32
+and 64-bit wide in LA64. ``$r0`` is hard-wired to zero, and the other registers
+are not architecturally special. (Except ``$r1``, which is hard-wired as the
+link register of the BL instruction.)
+
+The kernel uses a variant of the LoongArch register convention, as described in
+the LoongArch ELF psABI spec, in :ref:`References <loongarch-references>`:
+
+================= =============== =================== ============
+Name Alias Usage Preserved
+ across calls
+================= =============== =================== ============
+``$r0`` ``$zero`` Constant zero Unused
+``$r1`` ``$ra`` Return address No
+``$r2`` ``$tp`` TLS/Thread pointer Unused
+``$r3`` ``$sp`` Stack pointer Yes
+``$r4``-``$r11`` ``$a0``-``$a7`` Argument registers No
+``$r4``-``$r5`` ``$v0``-``$v1`` Return value No
+``$r12``-``$r20`` ``$t0``-``$t8`` Temp registers No
+``$r21`` ``$u0`` Percpu base address Unused
+``$r22`` ``$fp`` Frame pointer Yes
+``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
+================= =============== =================== ============
+
+.. Note::
+ The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
+ kernel for storing the percpu base address. It normally has no ABI name,
+ but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
+ in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
+ respectively.
+
+FPRs
+----
+
+LoongArch has 32 FPRs ( ``$f0`` ~ ``$f31`` ) when FPU is present. Each one is
+64-bit wide on the LA64 cores.
+
+The floating-point register convention is the same as described in the
+LoongArch ELF psABI spec:
+
+================= ================== =================== ============
+Name Alias Usage Preserved
+ across calls
+================= ================== =================== ============
+``$f0``-``$f7`` ``$fa0``-``$fa7`` Argument registers No
+``$f0``-``$f1`` ``$fv0``-``$fv1`` Return value No
+``$f8``-``$f23`` ``$ft0``-``$ft15`` Temp registers No
+``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
+================= ================== =================== ============
+
+.. Note::
+ You may see ``$fv0`` or ``$fv1`` in some old code, however they are
+ deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
+
+VRs
+----
+
+There are currently 2 vector extensions to LoongArch:
+
+- LSX (Loongson SIMD eXtension) with 128-bit vectors,
+- LASX (Loongson Advanced SIMD eXtension) with 256-bit vectors.
+
+LSX brings ``$v0`` ~ ``$v31`` while LASX brings ``$x0`` ~ ``$x31`` as the vector
+registers.
+
+The VRs overlap with FPRs: for example, on a core implementing LSX and LASX,
+the lower 128 bits of ``$x0`` is shared with ``$v0``, and the lower 64 bits of
+``$v0`` is shared with ``$f0``; same with all other VRs.
+
+CSRs
+----
+
+CSRs can only be accessed from privileged mode (PLV0):
+
+================= ===================================== ==============
+Address Full Name Abbrev Name
+================= ===================================== ==============
+0x0 Current Mode Information CRMD
+0x1 Pre-exception Mode Information PRMD
+0x2 Extension Unit Enable EUEN
+0x3 Miscellaneous Control MISC
+0x4 Exception Configuration ECFG
+0x5 Exception Status ESTAT
+0x6 Exception Return Address ERA
+0x7 Bad (Faulting) Virtual Address BADV
+0x8 Bad (Faulting) Instruction Word BADI
+0xC Exception Entrypoint Address EENTRY
+0x10 TLB Index TLBIDX
+0x11 TLB Entry High-order Bits TLBEHI
+0x12 TLB Entry Low-order Bits 0 TLBELO0
+0x13 TLB Entry Low-order Bits 1 TLBELO1
+0x18 Address Space Identifier ASID
+0x19 Page Global Directory Address for PGDL
+ Lower-half Address Space
+0x1A Page Global Directory Address for PGDH
+ Higher-half Address Space
+0x1B Page Global Directory Address PGD
+0x1C Page Walk Control for Lower- PWCL
+ half Address Space
+0x1D Page Walk Control for Higher- PWCH
+ half Address Space
+0x1E STLB Page Size STLBPS
+0x1F Reduced Virtual Address Configuration RVACFG
+0x20 CPU Identifier CPUID
+0x21 Privileged Resource Configuration 1 PRCFG1
+0x22 Privileged Resource Configuration 2 PRCFG2
+0x23 Privileged Resource Configuration 3 PRCFG3
+0x30+n (0≤n≤15) Saved Data register SAVEn
+0x40 Timer Identifier TID
+0x41 Timer Configuration TCFG
+0x42 Timer Value TVAL
+0x43 Compensation of Timer Count CNTC
+0x44 Timer Interrupt Clearing TICLR
+0x60 LLBit Control LLBCTL
+0x80 Implementation-specific Control 1 IMPCTL1
+0x81 Implementation-specific Control 2 IMPCTL2
+0x88 TLB Refill Exception Entrypoint TLBRENTRY
+ Address
+0x89 TLB Refill Exception BAD (Faulting) TLBRBADV
+ Virtual Address
+0x8A TLB Refill Exception Return Address TLBRERA
+0x8B TLB Refill Exception Saved Data TLBRSAVE
+ Register
+0x8C TLB Refill Exception Entry Low-order TLBRELO0
+ Bits 0
+0x8D TLB Refill Exception Entry Low-order TLBRELO1
+ Bits 1
+0x8E TLB Refill Exception Entry High-order TLBEHI
+ Bits
+0x8F TLB Refill Exception Pre-exception TLBRPRMD
+ Mode Information
+0x90 Machine Error Control MERRCTL
+0x91 Machine Error Information 1 MERRINFO1
+0x92 Machine Error Information 2 MERRINFO2
+0x93 Machine Error Exception Entrypoint MERRENTRY
+ Address
+0x94 Machine Error Exception Return MERRERA
+ Address
+0x95 Machine Error Exception Saved Data MERRSAVE
+ Register
+0x98 Cache TAGs CTAG
+0x180+n (0≤n≤3) Direct Mapping Configuration Window n DMWn
+0x200+2n (0≤n≤31) Performance Monitor Configuration n PMCFGn
+0x201+2n (0≤n≤31) Performance Monitor Overall Counter n PMCNTn
+0x300 Memory Load/Store WatchPoint MWPC
+ Overall Control
+0x301 Memory Load/Store WatchPoint MWPS
+ Overall Status
+0x310+8n (0≤n≤7) Memory Load/Store WatchPoint n MWPnCFG1
+ Configuration 1
+0x311+8n (0≤n≤7) Memory Load/Store WatchPoint n MWPnCFG2
+ Configuration 2
+0x312+8n (0≤n≤7) Memory Load/Store WatchPoint n MWPnCFG3
+ Configuration 3
+0x313+8n (0≤n≤7) Memory Load/Store WatchPoint n MWPnCFG4
+ Configuration 4
+0x380 Instruction Fetch WatchPoint FWPC
+ Overall Control
+0x381 Instruction Fetch WatchPoint FWPS
+ Overall Status
+0x390+8n (0≤n≤7) Instruction Fetch WatchPoint n FWPnCFG1
+ Configuration 1
+0x391+8n (0≤n≤7) Instruction Fetch WatchPoint n FWPnCFG2
+ Configuration 2
+0x392+8n (0≤n≤7) Instruction Fetch WatchPoint n FWPnCFG3
+ Configuration 3
+0x393+8n (0≤n≤7) Instruction Fetch WatchPoint n FWPnCFG4
+ Configuration 4
+0x500 Debug Register DBG
+0x501 Debug Exception Return Address DERA
+0x502 Debug Exception Saved Data Register DSAVE
+================= ===================================== ==============
+
+ERA, TLBRERA, MERRERA and DERA are sometimes also known as EPC, TLBREPC, MERREPC
+and DEPC respectively.
+
+Basic Instruction Set
+=====================
+
+Instruction formats
+-------------------
+
+LoongArch instructions are 32 bits wide, belonging to 9 basic instruction
+formats (and variants of them):
+
+=========== ==========================
+Format name Composition
+=========== ==========================
+2R Opcode + Rj + Rd
+3R Opcode + Rk + Rj + Rd
+4R Opcode + Ra + Rk + Rj + Rd
+2RI8 Opcode + I8 + Rj + Rd
+2RI12 Opcode + I12 + Rj + Rd
+2RI14 Opcode + I14 + Rj + Rd
+2RI16 Opcode + I16 + Rj + Rd
+1RI21 Opcode + I21L + Rj + I21H
+I26 Opcode + I26L + I26H
+=========== ==========================
+
+Rd is the destination register operand, while Rj, Rk and Ra ("a" stands for
+"additional") are the source register operands. I8/I12/I14/I16/I21/I26 are
+immediate operands of respective width. The longer I21 and I26 are stored
+in separate higher and lower parts in the instruction word, denoted by the "L"
+and "H" suffixes.
+
+List of Instructions
+--------------------
+
+For brevity, only instruction names (mnemonics) are listed here; please see the
+:ref:`References <loongarch-references>` for details.
+
+
+1. Arithmetic Instructions::
+
+ ADD.W SUB.W ADDI.W ADD.D SUB.D ADDI.D
+ SLT SLTU SLTI SLTUI
+ AND OR NOR XOR ANDN ORN ANDI ORI XORI
+ MUL.W MULH.W MULH.WU DIV.W DIV.WU MOD.W MOD.WU
+ MUL.D MULH.D MULH.DU DIV.D DIV.DU MOD.D MOD.DU
+ PCADDI PCADDU12I PCADDU18I
+ LU12I.W LU32I.D LU52I.D ADDU16I.D
+
+2. Bit-shift Instructions::
+
+ SLL.W SRL.W SRA.W ROTR.W SLLI.W SRLI.W SRAI.W ROTRI.W
+ SLL.D SRL.D SRA.D ROTR.D SLLI.D SRLI.D SRAI.D ROTRI.D
+
+3. Bit-manipulation Instructions::
+
+ EXT.W.B EXT.W.H CLO.W CLO.D SLZ.W CLZ.D CTO.W CTO.D CTZ.W CTZ.D
+ BYTEPICK.W BYTEPICK.D BSTRINS.W BSTRINS.D BSTRPICK.W BSTRPICK.D
+ REVB.2H REVB.4H REVB.2W REVB.D REVH.2W REVH.D BITREV.4B BITREV.8B BITREV.W BITREV.D
+ MASKEQZ MASKNEZ
+
+4. Branch Instructions::
+
+ BEQ BNE BLT BGE BLTU BGEU BEQZ BNEZ B BL JIRL
+
+5. Load/Store Instructions::
+
+ LD.B LD.BU LD.H LD.HU LD.W LD.WU LD.D ST.B ST.H ST.W ST.D
+ LDX.B LDX.BU LDX.H LDX.HU LDX.W LDX.WU LDX.D STX.B STX.H STX.W STX.D
+ LDPTR.W LDPTR.D STPTR.W STPTR.D
+ PRELD PRELDX
+
+6. Atomic Operation Instructions::
+
+ LL.W SC.W LL.D SC.D
+ AMSWAP.W AMSWAP.D AMADD.W AMADD.D AMAND.W AMAND.D AMOR.W AMOR.D AMXOR.W AMXOR.D
+ AMMAX.W AMMAX.D AMMIN.W AMMIN.D
+
+7. Barrier Instructions::
+
+ IBAR DBAR
+
+8. Special Instructions::
+
+ SYSCALL BREAK CPUCFG NOP IDLE ERTN(ERET) DBCL(DBGCALL) RDTIMEL.W RDTIMEH.W RDTIME.D
+ ASRTLE.D ASRTGT.D
+
+9. Privileged Instructions::
+
+ CSRRD CSRWR CSRXCHG
+ IOCSRRD.B IOCSRRD.H IOCSRRD.W IOCSRRD.D IOCSRWR.B IOCSRWR.H IOCSRWR.W IOCSRWR.D
+ CACOP TLBP(TLBSRCH) TLBRD TLBWR TLBFILL TLBCLR TLBFLUSH INVTLB LDDIR LDPTE
+
+Virtual Memory
+==============
+
+LoongArch supports direct-mapped virtual memory and page-mapped virtual memory.
+
+Direct-mapped virtual memory is configured by CSR.DMWn (n=0~3), it has a simple
+relationship between virtual address (VA) and physical address (PA)::
+
+ VA = PA + FixedOffset
+
+Page-mapped virtual memory has arbitrary relationship between VA and PA, which
+is recorded in TLB and page tables. LoongArch's TLB includes a fully-associative
+MTLB (Multiple Page Size TLB) and set-associative STLB (Single Page Size TLB).
+
+By default, the whole virtual address space of LA32 is configured like this:
+
+============ =========================== =============================
+Name Address Range Attributes
+============ =========================== =============================
+``UVRANGE`` ``0x00000000 - 0x7FFFFFFF`` Page-mapped, Cached, PLV0~3
+``KPRANGE0`` ``0x80000000 - 0x9FFFFFFF`` Direct-mapped, Uncached, PLV0
+``KPRANGE1`` ``0xA0000000 - 0xBFFFFFFF`` Direct-mapped, Cached, PLV0
+``KVRANGE`` ``0xC0000000 - 0xFFFFFFFF`` Page-mapped, Cached, PLV0
+============ =========================== =============================
+
+User mode (PLV3) can only access UVRANGE. For direct-mapped KPRANGE0 and
+KPRANGE1, PA is equal to VA with bit30~31 cleared. For example, the uncached
+direct-mapped VA of 0x00001000 is 0x80001000, and the cached direct-mapped
+VA of 0x00001000 is 0xA0001000.
+
+By default, the whole virtual address space of LA64 is configured like this:
+
+============ ====================== ======================================
+Name Address Range Attributes
+============ ====================== ======================================
+``XUVRANGE`` ``0x0000000000000000 - Page-mapped, Cached, PLV0~3
+ 0x3FFFFFFFFFFFFFFF``
+``XSPRANGE`` ``0x4000000000000000 - Direct-mapped, Cached / Uncached, PLV0
+ 0x7FFFFFFFFFFFFFFF``
+``XKPRANGE`` ``0x8000000000000000 - Direct-mapped, Cached / Uncached, PLV0
+ 0xBFFFFFFFFFFFFFFF``
+``XKVRANGE`` ``0xC000000000000000 - Page-mapped, Cached, PLV0
+ 0xFFFFFFFFFFFFFFFF``
+============ ====================== ======================================
+
+User mode (PLV3) can only access XUVRANGE. For direct-mapped XSPRANGE and
+XKPRANGE, PA is equal to VA with bits 60~63 cleared, and the cache attribute
+is configured by bits 60~61 in VA: 0 is for strongly-ordered uncached, 1 is
+for coherent cached, and 2 is for weakly-ordered uncached.
+
+Currently we only use XKPRANGE for direct mapping and XSPRANGE is reserved.
+
+To put this in action: the strongly-ordered uncached direct-mapped VA (in
+XKPRANGE) of 0x00000000_00001000 is 0x80000000_00001000, the coherent cached
+direct-mapped VA (in XKPRANGE) of 0x00000000_00001000 is 0x90000000_00001000,
+and the weakly-ordered uncached direct-mapped VA (in XKPRANGE) of 0x00000000
+_00001000 is 0xA0000000_00001000.
+
+Relationship of Loongson and LoongArch
+======================================
+
+LoongArch is a RISC ISA which is different from any other existing ones, while
+Loongson is a family of processors. Loongson includes 3 series: Loongson-1 is
+the 32-bit processor series, Loongson-2 is the low-end 64-bit processor series,
+and Loongson-3 is the high-end 64-bit processor series. Old Loongson is based on
+MIPS, while New Loongson is based on LoongArch. Take Loongson-3 as an example:
+Loongson-3A1000/3B1500/3A2000/3A3000/3A4000 are MIPS-compatible, while Loongson-
+3A5000 (and future revisions) are all based on LoongArch.
+
+.. _loongarch-references:
+
+References
+==========
+
+Official web site of Loongson Technology Corp. Ltd.:
+
+ http://www.loongson.cn/
+
+Developer web site of Loongson and LoongArch (Software and Documentation):
+
+ http://www.loongnix.cn/
+
+ https://github.com/loongson/
+
+ https://loongson.github.io/LoongArch-Documentation/
+
+Documentation of LoongArch ISA:
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese)
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English)
+
+Documentation of LoongArch ELF psABI:
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-CN.pdf (in Chinese)
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-EN.pdf (in English)
+
+Linux kernel repository of Loongson and LoongArch:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git
diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
new file mode 100644
index 000000000000..8f5c3345109e
--- /dev/null
+++ b/Documentation/arch/loongarch/irq-chip-model.rst
@@ -0,0 +1,256 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+IRQ chip model (hierarchy) of LoongArch
+=======================================
+
+Currently, LoongArch based processors (e.g. Loongson-3A5000) can only work together
+with LS7A chipsets. The irq chips in LoongArch computers include CPUINTC (CPU Core
+Interrupt Controller), LIOINTC (Legacy I/O Interrupt Controller), EIOINTC (Extended
+I/O Interrupt Controller), HTVECINTC (Hyper-Transport Vector Interrupt Controller),
+PCH-PIC (Main Interrupt Controller in LS7A chipset), PCH-LPC (LPC Interrupt Controller
+in LS7A chipset) and PCH-MSI (MSI Interrupt Controller).
+
+CPUINTC is a per-core controller (in CPU), LIOINTC/EIOINTC/HTVECINTC are per-package
+controllers (in CPU), while PCH-PIC/PCH-LPC/PCH-MSI are controllers out of CPU (i.e.,
+in chipsets). These controllers (in other words, irqchips) are linked in a hierarchy,
+and there are two models of hierarchy (legacy model and extended model).
+
+Legacy IRQ model
+================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, while all other devices
+interrupts go to PCH-PIC/PCH-LPC/PCH-MSI and gathered by HTVECINTC, and then go
+to LIOINTC, and then CPUINTC::
+
+ +-----+ +---------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +---------+ +-------+
+ ^
+ |
+ +---------+ +-------+
+ | LIOINTC | <-- | UARTs |
+ +---------+ +-------+
+ ^
+ |
+ +-----------+
+ | HTVECINTC |
+ +-----------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | PCH-LPC | | Devices | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
+Extended IRQ model
+==================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, while all other devices
+interrupts go to PCH-PIC/PCH-LPC/PCH-MSI and gathered by EIOINTC, and then go to
+to CPUINTC directly::
+
+ +-----+ +---------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+ +-------+
+ | EIOINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | PCH-LPC | | Devices | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
+Virtual Extended IRQ model
+==========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt
+go to CPUINTC directly, CPU UARTS interrupts go to PCH-PIC, while all other
+devices interrupts go to PCH-PIC/PCH-MSI and gathered by V-EIOINTC (Virtual
+Extended I/O Interrupt Controller), and then go to CPUINTC directly::
+
+ +-----+ +-------------------+ +-------+
+ | IPI |--> | CPUINTC(0-255vcpu)| <-- | Timer |
+ +-----+ +-------------------+ +-------+
+ ^
+ |
+ +-----------+
+ | V-EIOINTC |
+ +-----------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +--------+ +---------+ +---------+
+ | UARTs | | Devices | | Devices |
+ +--------+ +---------+ +---------+
+
+
+Description
+-----------
+V-EIOINTC (Virtual Extended I/O Interrupt Controller) is an extension of
+EIOINTC, it only works in VM mode which runs in KVM hypervisor. Interrupts can
+be routed to up to four vCPUs via standard EIOINTC, however with V-EIOINTC
+interrupts can be routed to up to 256 virtual cpus.
+
+With standard EIOINTC, interrupt routing setting includes two parts: eight
+bits for CPU selection and four bits for CPU IP (Interrupt Pin) selection.
+For CPU selection there is four bits for EIOINTC node selection, four bits
+for EIOINTC CPU selection. Bitmap method is used for CPU selection and
+CPU IP selection, so interrupt can only route to CPU0 - CPU3 and IP0-IP3 in
+one EIOINTC node.
+
+With V-EIOINTC it supports to route more CPUs and CPU IP (Interrupt Pin),
+there are two newly added registers with V-EIOINTC.
+
+EXTIOI_VIRT_FEATURES
+--------------------
+This register is read-only register, which indicates supported features with
+V-EIOINTC. Feature EXTIOI_HAS_INT_ENCODE and EXTIOI_HAS_CPU_ENCODE is added.
+
+Feature EXTIOI_HAS_INT_ENCODE is part of standard EIOINTC. If it is 1, it
+indicates that CPU Interrupt Pin selection can be normal method rather than
+bitmap method, so interrupt can be routed to IP0 - IP15.
+
+Feature EXTIOI_HAS_CPU_ENCODE is extension of V-EIOINTC. If it is 1, it
+indicates that CPU selection can be normal method rather than bitmap method,
+so interrupt can be routed to CPU0 - CPU255.
+
+EXTIOI_VIRT_CONFIG
+------------------
+This register is read-write register, for compatibility interrupt routed uses
+the default method which is the same with standard EIOINTC. If the bit is set
+with 1, it indicated HW to use normal method rather than bitmap method.
+
+Advanced Extended IRQ model
+===========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
+to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
+go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
+ACPI-related definitions
+========================
+
+CPUINTC::
+
+ ACPI_MADT_TYPE_CORE_PIC;
+ struct acpi_madt_core_pic;
+ enum acpi_madt_core_pic_version;
+
+LIOINTC::
+
+ ACPI_MADT_TYPE_LIO_PIC;
+ struct acpi_madt_lio_pic;
+ enum acpi_madt_lio_pic_version;
+
+EIOINTC::
+
+ ACPI_MADT_TYPE_EIO_PIC;
+ struct acpi_madt_eio_pic;
+ enum acpi_madt_eio_pic_version;
+
+HTVECINTC::
+
+ ACPI_MADT_TYPE_HT_PIC;
+ struct acpi_madt_ht_pic;
+ enum acpi_madt_ht_pic_version;
+
+PCH-PIC::
+
+ ACPI_MADT_TYPE_BIO_PIC;
+ struct acpi_madt_bio_pic;
+ enum acpi_madt_bio_pic_version;
+
+PCH-MSI::
+
+ ACPI_MADT_TYPE_MSI_PIC;
+ struct acpi_madt_msi_pic;
+ enum acpi_madt_msi_pic_version;
+
+PCH-LPC::
+
+ ACPI_MADT_TYPE_LPC_PIC;
+ struct acpi_madt_lpc_pic;
+ enum acpi_madt_lpc_pic_version;
+
+References
+==========
+
+Documentation of Loongson-3A5000:
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-3A5000-usermanual-1.02-CN.pdf (in Chinese)
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-3A5000-usermanual-1.02-EN.pdf (in English)
+
+Documentation of Loongson's LS7A chipset:
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-CN.pdf (in Chinese)
+
+ https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
+
+.. Note::
+ - CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
+ in Section 7.4 of "LoongArch Reference Manual, Vol 1";
+ - LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
+ "Loongson 7A1000 Bridge User Manual";
+ - PCH-LPC is "LPC Interrupts" described in Section 24.3 of
+ "Loongson 7A1000 Bridge User Manual".
diff --git a/Documentation/arch/m68k/buddha-driver.rst b/Documentation/arch/m68k/buddha-driver.rst
new file mode 100644
index 000000000000..5d1bc824978b
--- /dev/null
+++ b/Documentation/arch/m68k/buddha-driver.rst
@@ -0,0 +1,209 @@
+=====================================
+Amiga Buddha and Catweasel IDE Driver
+=====================================
+
+The Amiga Buddha and Catweasel IDE Driver (part of ide.c) was written by
+Geert Uytterhoeven based on the following specifications:
+
+------------------------------------------------------------------------
+
+Register map of the Buddha IDE controller and the
+Buddha-part of the Catweasel Zorro-II version
+
+The Autoconfiguration has been implemented just as Commodore
+described in their manuals, no tricks have been used (for
+example leaving some address lines out of the equations...).
+If you want to configure the board yourself (for example let
+a Linux kernel configure the card), look at the Commodore
+Docs. Reading the nibbles should give this information::
+
+ Vendor number: 4626 ($1212)
+ product number: 0 (42 for Catweasel Z-II)
+ Serial number: 0
+ Rom-vector: $1000
+
+The card should be a Z-II board, size 64K, not for freemem
+list, Rom-Vektor is valid, no second Autoconfig-board on the
+same card, no space preference, supports "Shutup_forever".
+
+Setting the base address should be done in two steps, just
+as the Amiga Kickstart does: The lower nibble of the 8-Bit
+address is written to $4a, then the whole Byte is written to
+$48, while it doesn't matter how often you're writing to $4a
+as long as $48 is not touched. After $48 has been written,
+the whole card disappears from $e8 and is mapped to the new
+address just written. Make sure $4a is written before $48,
+otherwise your chance is only 1:16 to find the board :-).
+
+The local memory-map is even active when mapped to $e8:
+
+============== ===========================================
+$0-$7e Autokonfig-space, see Z-II docs.
+
+$80-$7fd reserved
+
+$7fe Speed-select Register: Read & Write
+ (description see further down)
+
+$800-$8ff IDE-Select 0 (Port 0, Register set 0)
+
+$900-$9ff IDE-Select 1 (Port 0, Register set 1)
+
+$a00-$aff IDE-Select 2 (Port 1, Register set 0)
+
+$b00-$bff IDE-Select 3 (Port 1, Register set 1)
+
+$c00-$cff IDE-Select 4 (Port 2, Register set 0,
+ Catweasel only!)
+
+$d00-$dff IDE-Select 5 (Port 3, Register set 1,
+ Catweasel only!)
+
+$e00-$eff local expansion port, on Catweasel Z-II the
+ Catweasel registers are also mapped here.
+ Never touch, use multidisk.device!
+
+$f00 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 0.
+
+$f01-$f3f mirror of $f00
+
+$f40 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 1.
+
+$f41-$f7f mirror of $f40
+
+$f80 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 2.
+ (Catweasel only!)
+
+$f81-$fbf mirror of $f80
+
+$fc0 write-only: Writing any value to this
+ register enables IRQs to be passed from the
+ IDE ports to the Zorro bus. This mechanism
+ has been implemented to be compatible with
+ harddisks that are either defective or have
+ a buggy firmware and pull the IRQ line up
+ while starting up. If interrupts would
+ always be passed to the bus, the computer
+ might not start up. Once enabled, this flag
+ can not be disabled again. The level of the
+ flag can not be determined by software
+ (what for? Write to me if it's necessary!).
+
+$fc1-$fff mirror of $fc0
+
+$1000-$ffff Buddha-Rom with offset $1000 in the rom
+ chip. The addresses $0 to $fff of the rom
+ chip cannot be read. Rom is Byte-wide and
+ mapped to even addresses.
+============== ===========================================
+
+The IDE ports issue an INT2. You can read the level of the
+IRQ-lines of the IDE-ports by reading from the three (two
+for Buddha-only) registers $f00, $f40 and $f80. This way
+more than one I/O request can be handled and you can easily
+determine what driver has to serve the INT2. Buddha and
+Catweasel expansion boards can issue an INT6. A separate
+memory map is available for the I/O module and the sysop's
+I/O module.
+
+The IDE ports are fed by the address lines A2 to A4, just as
+the Amiga 1200 and Amiga 4000 IDE ports are. This way
+existing drivers can be easily ported to Buddha. A move.l
+polls two words out of the same address of IDE port since
+every word is mirrored once. movem is not possible, but
+it's not necessary either, because you can only speedup
+68000 systems with this technique. A 68020 system with
+fastmem is faster with move.l.
+
+If you're using the mirrored registers of the IDE-ports with
+A6=1, the Buddha doesn't care about the speed that you have
+selected in the speed register (see further down). With
+A6=1 (for example $840 for port 0, register set 0), a 780ns
+access is being made. These registers should be used for a
+command access to the harddisk/CD-Rom, since command
+accesses are Byte-wide and have to be made slower according
+to the ATA-X3T9 manual.
+
+Now for the speed-register: The register is byte-wide, and
+only the upper three bits are used (Bits 7 to 5). Bit 4
+must always be set to 1 to be compatible with later Buddha
+versions (if I'll ever update this one). I presume that
+I'll never use the lower four bits, but they have to be set
+to 1 by definition.
+
+The values in this table have to be shifted 5 bits to the
+left and or'd with $1f (this sets the lower 5 bits).
+
+All the timings have in common: Select and IOR/IOW rise at
+the same time. IOR and IOW have a propagation delay of
+about 30ns to the clocks on the Zorro bus, that's why the
+values are no multiple of 71. One clock-cycle is 71ns long
+(exactly 70,5 at 14,18 Mhz on PAL systems).
+
+value 0 (Default after reset)
+ 497ns Select (7 clock cycles) , IOR/IOW after 172ns (2 clock cycles)
+ (same timing as the Amiga 1200 does on it's IDE port without
+ accelerator card)
+
+value 1
+ 639ns Select (9 clock cycles), IOR/IOW after 243ns (3 clock cycles)
+
+value 2
+ 781ns Select (11 clock cycles), IOR/IOW after 314ns (4 clock cycles)
+
+value 3
+ 355ns Select (5 clock cycles), IOR/IOW after 101ns (1 clock cycle)
+
+value 4
+ 355ns Select (5 clock cycles), IOR/IOW after 172ns (2 clock cycles)
+
+value 5
+ 355ns Select (5 clock cycles), IOR/IOW after 243ns (3 clock cycles)
+
+value 6
+ 1065ns Select (15 clock cycles), IOR/IOW after 314ns (4 clock cycles)
+
+value 7
+ 355ns Select, (5 clock cycles), IOR/IOW after 101ns (1 clock cycle)
+
+When accessing IDE registers with A6=1 (for example $84x),
+the timing will always be mode 0 8-bit compatible, no matter
+what you have selected in the speed register:
+
+781ns select, IOR/IOW after 4 clock cycles (=314ns) active.
+
+All the timings with a very short select-signal (the 355ns
+fast accesses) depend on the accelerator card used in the
+system: Sometimes two more clock cycles are inserted by the
+bus interface, making the whole access 497ns long. This
+doesn't affect the reliability of the controller nor the
+performance of the card, since this doesn't happen very
+often.
+
+All the timings are calculated and only confirmed by
+measurements that allowed me to count the clock cycles. If
+the system is clocked by an oscillator other than 28,37516
+Mhz (for example the NTSC-frequency 28,63636 Mhz), each
+clock cycle is shortened to a bit less than 70ns (not worth
+mentioning). You could think of a small performance boost
+by overclocking the system, but you would either need a
+multisync monitor, or a graphics card, and your internal
+diskdrive would go crazy, that's why you shouldn't tune your
+Amiga this way.
+
+Giving you the possibility to write software that is
+compatible with both the Buddha and the Catweasel Z-II, The
+Buddha acts just like a Catweasel Z-II with no device
+connected to the third IDE-port. The IRQ-register $f80
+always shows a "no IRQ here" on the Buddha, and accesses to
+the third IDE port are going into data's Nirwana on the
+Buddha.
+
+Jens Schönfeld february 19th, 1997
+
+updated may 27th, 1997
+
+eMail: sysop@nostlgic.tng.oche.de
diff --git a/Documentation/arch/m68k/features.rst b/Documentation/arch/m68k/features.rst
new file mode 100644
index 000000000000..de7f0ccf7fc8
--- /dev/null
+++ b/Documentation/arch/m68k/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features m68k
diff --git a/Documentation/arch/m68k/index.rst b/Documentation/arch/m68k/index.rst
new file mode 100644
index 000000000000..0f890dbb5fe2
--- /dev/null
+++ b/Documentation/arch/m68k/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+m68k Architecture
+=================
+
+.. toctree::
+ :maxdepth: 2
+
+ kernel-options
+ buddha-driver
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/m68k/kernel-options.rst b/Documentation/arch/m68k/kernel-options.rst
new file mode 100644
index 000000000000..2008a20b4329
--- /dev/null
+++ b/Documentation/arch/m68k/kernel-options.rst
@@ -0,0 +1,911 @@
+===================================
+Command Line Options for Linux/m68k
+===================================
+
+Last Update: 2 May 1999
+
+Linux/m68k version: 2.2.6
+
+Author: Roman.Hodek@informatik.uni-erlangen.de (Roman Hodek)
+
+Update: jds@kom.auc.dk (Jes Sorensen) and faq@linux-m68k.org (Chris Lawrence)
+
+0) Introduction
+===============
+
+Often I've been asked which command line options the Linux/m68k
+kernel understands, or how the exact syntax for the ... option is, or
+... about the option ... . I hope, this document supplies all the
+answers...
+
+Note that some options might be outdated, their descriptions being
+incomplete or missing. Please update the information and send in the
+patches.
+
+
+1) Overview of the Kernel's Option Processing
+=============================================
+
+The kernel knows three kinds of options on its command line:
+
+ 1) kernel options
+ 2) environment settings
+ 3) arguments for init
+
+To which of these classes an argument belongs is determined as
+follows: If the option is known to the kernel itself, i.e. if the name
+(the part before the '=') or, in some cases, the whole argument string
+is known to the kernel, it belongs to class 1. Otherwise, if the
+argument contains an '=', it is of class 2, and the definition is put
+into init's environment. All other arguments are passed to init as
+command line options.
+
+This document describes the valid kernel options for Linux/m68k in
+the version mentioned at the start of this file. Later revisions may
+add new such options, and some may be missing in older versions.
+
+In general, the value (the part after the '=') of an option is a
+list of values separated by commas. The interpretation of these values
+is up to the driver that "owns" the option. This association of
+options with drivers is also the reason that some are further
+subdivided.
+
+
+2) General Kernel Options
+=========================
+
+2.1) root=
+----------
+
+:Syntax: root=/dev/<device>
+:or: root=<hex_number>
+
+This tells the kernel which device it should mount as the root
+filesystem. The device must be a block device with a valid filesystem
+on it.
+
+The first syntax gives the device by name. These names are converted
+into a major/minor number internally in the kernel in an unusual way.
+Normally, this "conversion" is done by the device files in /dev, but
+this isn't possible here, because the root filesystem (with /dev)
+isn't mounted yet... So the kernel parses the name itself, with some
+hardcoded name to number mappings. The name must always be a
+combination of two or three letters, followed by a decimal number.
+Valid names are::
+
+ /dev/ram: -> 0x0100 (initial ramdisk)
+ /dev/hda: -> 0x0300 (first IDE disk)
+ /dev/hdb: -> 0x0340 (second IDE disk)
+ /dev/sda: -> 0x0800 (first SCSI disk)
+ /dev/sdb: -> 0x0810 (second SCSI disk)
+ /dev/sdc: -> 0x0820 (third SCSI disk)
+ /dev/sdd: -> 0x0830 (forth SCSI disk)
+ /dev/sde: -> 0x0840 (fifth SCSI disk)
+ /dev/fd : -> 0x0200 (floppy disk)
+
+The name must be followed by a decimal number, that stands for the
+partition number. Internally, the value of the number is just
+added to the device number mentioned in the table above. The
+exceptions are /dev/ram and /dev/fd, where /dev/ram refers to an
+initial ramdisk loaded by your bootstrap program (please consult the
+instructions for your bootstrap program to find out how to load an
+initial ramdisk). As of kernel version 2.0.18 you must specify
+/dev/ram as the root device if you want to boot from an initial
+ramdisk. For the floppy devices, /dev/fd, the number stands for the
+floppy drive number (there are no partitions on floppy disks). I.e.,
+/dev/fd0 stands for the first drive, /dev/fd1 for the second, and so
+on. Since the number is just added, you can also force the disk format
+by adding a number greater than 3. If you look into your /dev
+directory, use can see the /dev/fd0D720 has major 2 and minor 16. You
+can specify this device for the root FS by writing "root=/dev/fd16" on
+the kernel command line.
+
+[Strange and maybe uninteresting stuff ON]
+
+This unusual translation of device names has some strange
+consequences: If, for example, you have a symbolic link from /dev/fd
+to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format,
+you cannot use this name for specifying the root device, because the
+kernel cannot see this symlink before mounting the root FS and it
+isn't in the table above. If you use it, the root device will not be
+set at all, without an error message. Another example: You cannot use a
+partition on e.g. the sixth SCSI disk as the root filesystem, if you
+want to specify it by name. This is, because only the devices up to
+/dev/sde are in the table above, but not /dev/sdf. Although, you can
+use the sixth SCSI disk for the root FS, but you have to specify the
+device by number... (see below). Or, even more strange, you can use the
+fact that there is no range checking of the partition number, and your
+knowledge that each disk uses 16 minors, and write "root=/dev/sde17"
+(for /dev/sdf1).
+
+[Strange and maybe uninteresting stuff OFF]
+
+If the device containing your root partition isn't in the table
+above, you can also specify it by major and minor numbers. These are
+written in hex, with no prefix and no separator between. E.g., if you
+have a CD with contents appropriate as a root filesystem in the first
+SCSI CD-ROM drive, you boot from it by "root=0b00". Here, hex "0b" =
+decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
+the first of these. You can find out all valid major numbers by
+looking into include/linux/major.h.
+
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them. For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point. For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+
+ PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/admin-guide/kernel-parameters.rst".
+
+
+2.2) ro, rw
+-----------
+
+:Syntax: ro
+:or: rw
+
+These two options tell the kernel whether it should mount the root
+filesystem read-only or read-write. The default is read-only, except
+for ramdisks, which default to read-write.
+
+
+2.3) debug
+----------
+
+:Syntax: debug
+
+This raises the kernel log level to 10 (the default is 7). This is the
+same level as set by the "dmesg" command, just that the maximum level
+selectable by dmesg is 8.
+
+
+2.4) debug=
+-----------
+
+:Syntax: debug=<device>
+
+This option causes certain kernel messages be printed to the selected
+debugging device. This can aid debugging the kernel, since the
+messages can be captured and analyzed on some other machine. Which
+devices are possible depends on the machine type. There are no checks
+for the validity of the device name. If the device isn't implemented,
+nothing happens.
+
+Messages logged this way are in general stack dumps after kernel
+memory faults or bad kernel traps, and kernel panics. To be exact: all
+messages of level 0 (panic messages) and all messages printed while
+the log level is 8 or more (their level doesn't matter). Before stack
+dumps, the kernel sets the log level to 10 automatically. A level of
+at least 8 can also be set by the "debug" command line option (see
+2.3) and at run time with "dmesg -n 8".
+
+Devices possible for Amiga:
+
+ - "ser":
+ built-in serial port; parameters: 9600bps, 8N1
+ - "mem":
+ Save the messages to a reserved area in chip mem. After
+ rebooting, they can be read under AmigaOS with the tool
+ 'dmesg'.
+
+Devices possible for Atari:
+
+ - "ser1":
+ ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
+ - "ser2":
+ SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
+ - "ser" :
+ default serial port
+ This is "ser2" for a Falcon, and "ser1" for any other machine
+ - "midi":
+ The MIDI port; parameters: 31250bps, 8N1
+ - "par" :
+ parallel port
+
+ The printing routine for this implements a timeout for the
+ case there's no printer connected (else the kernel would
+ lock up). The timeout is not exact, but usually a few
+ seconds.
+
+
+2.6) ramdisk_size=
+------------------
+
+:Syntax: ramdisk_size=<size>
+
+This option instructs the kernel to set up a ramdisk of the given
+size in KBytes. Do not use this option if the ramdisk contents are
+passed by bootstrap! In this case, the size is selected automatically
+and should not be overwritten.
+
+The only application is for root filesystems on floppy disks, that
+should be loaded into memory. To do that, select the corresponding
+size of the disk as ramdisk size, and set the root device to the disk
+drive (with "root=").
+
+
+2.7) swap=
+
+ I can't find any sign of this option in 2.2.6.
+
+2.8) buff=
+-----------
+
+ I can't find any sign of this option in 2.2.6.
+
+
+3) General Device Options (Amiga and Atari)
+===========================================
+
+3.1) ether=
+-----------
+
+:Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
+
+<dev-name> is the name of a net driver, as specified in
+drivers/net/Space.c in the Linux source. Most prominent are eth0, ...
+eth3, sl0, ... sl3, ppp0, ..., ppp3, dummy, and lo.
+
+The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
+settings by this options. Also, the existing ethernet drivers for
+Linux/m68k (ariadne, a2065, hydra) don't use them because Zorro boards
+are really Plug-'n-Play, so the "ether=" option is useless altogether
+for Linux/m68k.
+
+
+3.2) hd=
+--------
+
+:Syntax: hd=<cylinders>,<heads>,<sectors>
+
+This option sets the disk geometry of an IDE disk. The first hd=
+option is for the first IDE disk, the second for the second one.
+(I.e., you can give this option twice.) In most cases, you won't have
+to use this option, since the kernel can obtain the geometry data
+itself. It exists just for the case that this fails for one of your
+disks.
+
+
+3.3) max_scsi_luns=
+-------------------
+
+:Syntax: max_scsi_luns=<n>
+
+Sets the maximum number of LUNs (logical units) of SCSI devices to
+be scanned. Valid values for <n> are between 1 and 8. Default is 8 if
+"Probe all LUNs on each SCSI device" was selected during the kernel
+configuration, else 1.
+
+
+3.4) st=
+--------
+
+:Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
+
+Sets several parameters of the SCSI tape driver. <buffer_size> is
+the number of 512-byte buffers reserved for tape operations for each
+device. <write_thres> sets the number of blocks which must be filled
+to start an actual write operation to the tape. Maximum value is the
+total number of buffers. <max_buffer> limits the total number of
+buffers allocated for all tape devices.
+
+
+3.5) dmasound=
+--------------
+
+:Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
+
+This option controls some configurations of the Linux/m68k DMA sound
+driver (Amiga and Atari): <buffers> is the number of buffers you want
+to use (minimum 4, default 4), <buffer-size> is the size of each
+buffer in kilobytes (minimum 4, default 32) and <catch-radius> says
+how much percent of error will be tolerated when setting a frequency
+(maximum 10, default 0). For example with 3% you can play 8000Hz
+AU-Files on the Falcon with its hardware frequency of 8195Hz and thus
+don't need to expand the sound.
+
+
+
+4) Options for Atari Only
+=========================
+
+4.1) video=
+-----------
+
+:Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer,
+eg. most atari users will want to specify `atafb` here. The
+<sub-options> is a comma-separated list of the sub-options listed
+below.
+
+NB:
+ Please notice that this option was renamed from `atavideo` to
+ `video` during the development of the 1.3.x kernels, thus you
+ might need to update your boot-scripts if upgrading to 2.x from
+ an 1.2.x kernel.
+
+NBB:
+ The behavior of video= was changed in 2.1.57 so the recommended
+ option is to specify the name of the frame buffer.
+
+4.1.1) Video Mode
+-----------------
+
+This sub-option may be any of the predefined video modes, as listed
+in atari/atafb.c in the Linux/m68k source tree. The kernel will
+activate the given video mode at boot time and make it the default
+mode, if the hardware allows. Currently defined names are:
+
+ - stlow : 320x200x4
+ - stmid, default5 : 640x200x2
+ - sthigh, default4: 640x400x1
+ - ttlow : 320x480x8, TT only
+ - ttmid, default1 : 640x480x4, TT only
+ - tthigh, default2: 1280x960x1, TT only
+ - vga2 : 640x480x1, Falcon only
+ - vga4 : 640x480x2, Falcon only
+ - vga16, default3 : 640x480x4, Falcon only
+ - vga256 : 640x480x8, Falcon only
+ - falh2 : 896x608x1, Falcon only
+ - falh16 : 896x608x4, Falcon only
+
+If no video mode is given on the command line, the kernel tries the
+modes names "default<n>" in turn, until one is possible with the
+hardware in use.
+
+A video mode setting doesn't make sense, if the external driver is
+activated by a "external:" sub-option.
+
+4.1.2) inverse
+--------------
+
+Invert the display. This affects only text consoles.
+Usually, the background is chosen to be black. With this
+option, you can make the background white.
+
+4.1.3) font
+-----------
+
+:Syntax: font:<fontname>
+
+Specify the font to use in text modes. Currently you can choose only
+between `VGA8x8`, `VGA8x16` and `PEARL8x8`. `VGA8x8` is default, if the
+vertical size of the display is less than 400 pixel rows. Otherwise, the
+`VGA8x16` font is the default.
+
+4.1.4) `hwscroll_`
+------------------
+
+:Syntax: `hwscroll_<n>`
+
+The number of additional lines of video memory to reserve for
+speeding up the scrolling ("hardware scrolling"). Hardware scrolling
+is possible only if the kernel can set the video base address in steps
+fine enough. This is true for STE, MegaSTE, TT, and Falcon. It is not
+possible with plain STs and graphics cards (The former because the
+base address must be on a 256 byte boundary there, the latter because
+the kernel doesn't know how to set the base address at all.)
+
+By default, <n> is set to the number of visible text lines on the
+display. Thus, the amount of video memory is doubled, compared to no
+hardware scrolling. You can turn off the hardware scrolling altogether
+by setting <n> to 0.
+
+4.1.5) internal:
+----------------
+
+:Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
+
+This option specifies the capabilities of some extended internal video
+hardware, like e.g. OverScan. <xres> and <yres> give the (extended)
+dimensions of the screen.
+
+If your OverScan needs a black border, you have to write the last
+three arguments of the "internal:". <xres_max> is the maximum line
+length the hardware allows, <yres_max> the maximum number of lines.
+<offset> is the offset of the visible part of the screen memory to its
+physical start, in bytes.
+
+Often, extended interval video hardware has to be activated somehow.
+For this, see the "sw_*" options below.
+
+4.1.6) external:
+----------------
+
+:Syntax:
+ external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>
+ [;<colw>[;<coltype>[;<xres_virtual>]]]]]
+
+.. I had to break this line...
+
+This is probably the most complicated parameter... It specifies that
+you have some external video hardware (a graphics board), and how to
+use it under Linux/m68k. The kernel cannot know more about the hardware
+than you tell it here! The kernel also is unable to set or change any
+video modes, since it doesn't know about any board internal. So, you
+have to switch to that video mode before you start Linux, and cannot
+switch to another mode once Linux has started.
+
+The first 3 parameters of this sub-option should be obvious: <xres>,
+<yres> and <depth> give the dimensions of the screen and the number of
+planes (depth). The depth is the logarithm to base 2 of the number
+of colors possible. (Or, the other way round: The number of colors is
+2^depth).
+
+You have to tell the kernel furthermore how the video memory is
+organized. This is done by a letter as <org> parameter:
+
+ 'n':
+ "normal planes", i.e. one whole plane after another
+ 'i':
+ "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
+ of the next, and so on... This mode is used only with the
+ built-in Atari video modes, I think there is no card that
+ supports this mode.
+ 'p':
+ "packed pixels", i.e. <depth> consecutive bits stand for all
+ planes of one pixel; this is the most common mode for 8 planes
+ (256 colors) on graphic cards
+ 't':
+ "true color" (more or less packed pixels, but without a color
+ lookup table); usually depth is 24
+
+For monochrome modes (i.e., <depth> is 1), the <org> letter has a
+different meaning:
+
+ 'n':
+ normal colors, i.e. 0=white, 1=black
+ 'i':
+ inverted colors, i.e. 0=black, 1=white
+
+The next important information about the video hardware is the base
+address of the video memory. That is given in the <scrmem> parameter,
+as a hexadecimal number with a "0x" prefix. You have to find out this
+address in the documentation of your hardware.
+
+The next parameter, <scrlen>, tells the kernel about the size of the
+video memory. If it's missing, the size is calculated from <xres>,
+<yres>, and <depth>. For now, it is not useful to write a value here.
+It would be used only for hardware scrolling (which isn't possible
+with the external driver, because the kernel cannot set the video base
+address), or for virtual resolutions under X (which the X server
+doesn't support yet). So, it's currently best to leave this field
+empty, either by ending the "external:" after the video address or by
+writing two consecutive semicolons, if you want to give a <vgabase>
+(it is allowed to leave this parameter empty).
+
+The <vgabase> parameter is optional. If it is not given, the kernel
+cannot read or write any color registers of the video hardware, and
+thus you have to set appropriate colors before you start Linux. But if
+your card is somehow VGA compatible, you can tell the kernel the base
+address of the VGA register set, so it can change the color lookup
+table. You have to look up this address in your board's documentation.
+To avoid misunderstandings: <vgabase> is the _base_ address, i.e. a 4k
+aligned address. For read/writing the color registers, the kernel
+uses the addresses vgabase+0x3c7...vgabase+0x3c9. The <vgabase>
+parameter is written in hexadecimal with a "0x" prefix, just as
+<scrmem>.
+
+<colw> is meaningful only if <vgabase> is specified. It tells the
+kernel how wide each of the color register is, i.e. the number of bits
+per single color (red/green/blue). Default is 6, another quite usual
+value is 8.
+
+Also <coltype> is used together with <vgabase>. It tells the kernel
+about the color register model of your gfx board. Currently, the types
+"vga" (which is also the default) and "mv300" (SANG MV300) are
+implemented.
+
+Parameter <xres_virtual> is required for ProMST or ET4000 cards where
+the physical linelength differs from the visible length. With ProMST,
+xres_virtual must be set to 2048. For ET4000, xres_virtual depends on the
+initialisation of the video-card.
+If you're missing a corresponding yres_virtual: the external part is legacy,
+therefore we don't support hardware-dependent functions like hardware-scroll,
+panning or blanking.
+
+4.1.7) eclock:
+--------------
+
+The external pixel clock attached to the Falcon VIDEL shifter. This
+currently works only with the ScreenWonder!
+
+4.1.8) monitorcap:
+-------------------
+
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. Don't use it
+with a fixed-frequency monitor! For now, only the Falcon frame buffer
+uses the settings of "monitorcap:".
+
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+ The defaults are 58;62;31;32 (VGA compatible).
+
+ The defaults for TV/SC1224/SC1435 cover both PAL and NTSC standards.
+
+4.1.9) keep
+------------
+
+If this option is given, the framebuffer device doesn't do any video
+mode calculations and settings on its own. The only Atari fb device
+that does this currently is the Falcon.
+
+What you reach with this: Settings for unknown video extensions
+aren't overridden by the driver, so you can still use the mode found
+when booting, when the driver doesn't know to set this mode itself.
+But this also means, that you can't switch video modes anymore...
+
+An example where you may want to use "keep" is the ScreenBlaster for
+the Falcon.
+
+
+4.2) atamouse=
+--------------
+
+:Syntax: atamouse=<x-threshold>,[<y-threshold>]
+
+With this option, you can set the mouse movement reporting threshold.
+This is the number of pixels of mouse movement that have to accumulate
+before the IKBD sends a new mouse packet to the kernel. Higher values
+reduce the mouse interrupt load and thus reduce the chance of keyboard
+overruns. Lower values give a slightly faster mouse responses and
+slightly better mouse tracking.
+
+You can set the threshold in x and y separately, but usually this is
+of little practical use. If there's just one number in the option, it
+is used for both dimensions. The default value is 2 for both
+thresholds.
+
+
+4.3) ataflop=
+-------------
+
+:Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
+
+ The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This
+ setting affects how many buffers are reserved and which formats are
+ probed (see also below). The default is 1 (HD). Only one drive type
+ can be selected. If you have two disk drives, select the "better"
+ type.
+
+ The second parameter <trackbuffer> tells the kernel whether to use
+ track buffering (1) or not (0). The default is machine-dependent:
+ no for the Medusa and yes for all others.
+
+ With the two following parameters, you can change the default
+ steprate used for drive A and B, resp.
+
+
+4.4) atascsi=
+-------------
+
+:Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
+
+This option sets some parameters for the Atari native SCSI driver.
+Generally, any number of arguments can be omitted from the end. And
+for each of the numbers, a negative value means "use default". The
+defaults depend on whether TT-style or Falcon-style SCSI is used.
+Below, defaults are noted as n/m, where the first value refers to
+TT-SCSI and the latter to Falcon-SCSI. If an illegal value is given
+for one parameter, an error message is printed and that one setting is
+ignored (others aren't affected).
+
+ <can_queue>:
+ This is the maximum number of SCSI commands queued internally to the
+ Atari SCSI driver. A value of 1 effectively turns off the driver
+ internal multitasking (if it causes problems). Legal values are >=
+ 1. <can_queue> can be as high as you like, but values greater than
+ <cmd_per_lun> times the number of SCSI targets (LUNs) you have
+ don't make sense. Default: 16/8.
+
+ <cmd_per_lun>:
+ Maximum number of SCSI commands issued to the driver for one
+ logical unit (LUN, usually one SCSI target). Legal values start
+ from 1. If tagged queuing (see below) is not used, values greater
+ than 2 don't make sense, but waste memory. Otherwise, the maximum
+ is the number of command tags available to the driver (currently
+ 32). Default: 8/1. (Note: Values > 1 seem to cause problems on a
+ Falcon, cause not yet known.)
+
+ The <cmd_per_lun> value at a great part determines the amount of
+ memory SCSI reserves for itself. The formula is rather
+ complicated, but I can give you some hints:
+
+ no scatter-gather:
+ cmd_per_lun * 232 bytes
+ full scatter-gather:
+ cmd_per_lun * approx. 17 Kbytes
+
+ <scat-gat>:
+ Size of the scatter-gather table, i.e. the number of requests
+ consecutive on the disk that can be merged into one SCSI command.
+ Legal values are between 0 and 255. Default: 255/0. Note: This
+ value is forced to 0 on a Falcon, since scatter-gather isn't
+ possible with the ST-DMA. Not using scatter-gather hurts
+ performance significantly.
+
+ <host-id>:
+ The SCSI ID to be used by the initiator (your Atari). This is
+ usually 7, the highest possible ID. Every ID on the SCSI bus must
+ be unique. Default: determined at run time: If the NV-RAM checksum
+ is valid, and bit 7 in byte 30 of the NV-RAM is set, the lower 3
+ bits of this byte are used as the host ID. (This method is defined
+ by Atari and also used by some TOS HD drivers.) If the above
+ isn't given, the default ID is 7. (both, TT and Falcon).
+
+ <tagged>:
+ 0 means turn off tagged queuing support, all other values > 0 mean
+ use tagged queuing for targets that support it. Default: currently
+ off, but this may change when tagged queuing handling has been
+ proved to be reliable.
+
+ Tagged queuing means that more than one command can be issued to
+ one LUN, and the SCSI device itself orders the requests so they
+ can be performed in optimal order. Not all SCSI devices support
+ tagged queuing (:-().
+
+4.5 switches=
+-------------
+
+:Syntax: switches=<list of switches>
+
+With this option you can switch some hardware lines that are often
+used to enable/disable certain hardware extensions. Examples are
+OverScan, overclocking, ...
+
+The <list of switches> is a comma-separated list of the following
+items:
+
+ ikbd:
+ set RTS of the keyboard ACIA high
+ midi:
+ set RTS of the MIDI ACIA high
+ snd6:
+ set bit 6 of the PSG port A
+ snd7:
+ set bit 6 of the PSG port A
+
+It doesn't make sense to mention a switch more than once (no
+difference to only once), but you can give as many switches as you
+want to enable different features. The switch lines are set as early
+as possible during kernel initialization (even before determining the
+present hardware.)
+
+All of the items can also be prefixed with `ov_`, i.e. `ov_ikbd`,
+`ov_midi`, ... These options are meant for switching on an OverScan
+video extension. The difference to the bare option is that the
+switch-on is done after video initialization, and somehow synchronized
+to the HBLANK. A speciality is that ov_ikbd and ov_midi are switched
+off before rebooting, so that OverScan is disabled and TOS boots
+correctly.
+
+If you give an option both, with and without the `ov_` prefix, the
+earlier initialization (`ov_`-less) takes precedence. But the
+switching-off on reset still happens in this case.
+
+5) Options for Amiga Only:
+==========================
+
+5.1) video=
+-----------
+
+:Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer, valid
+options are `amifb`, `cyber`, 'virge', `retz3` and `clgen`, provided
+that the respective frame buffer devices have been compiled into the
+kernel (or compiled as loadable modules). The behavior of the <fbname>
+option was changed in 2.1.57 so it is now recommended to specify this
+option.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below. This option is organized similar to the Atari version of the
+"video"-option (4.1), but knows fewer sub-options.
+
+5.1.1) video mode
+-----------------
+
+Again, similar to the video mode for the Atari (see 4.1.1). Predefined
+modes depend on the used frame buffer device.
+
+OCS, ECS and AGA machines all use the color frame buffer. The following
+predefined video modes are available:
+
+NTSC modes:
+ - ntsc : 640x200, 15 kHz, 60 Hz
+ - ntsc-lace : 640x400, 15 kHz, 60 Hz interlaced
+
+PAL modes:
+ - pal : 640x256, 15 kHz, 50 Hz
+ - pal-lace : 640x512, 15 kHz, 50 Hz interlaced
+
+ECS modes:
+ - multiscan : 640x480, 29 kHz, 57 Hz
+ - multiscan-lace : 640x960, 29 kHz, 57 Hz interlaced
+ - euro36 : 640x200, 15 kHz, 72 Hz
+ - euro36-lace : 640x400, 15 kHz, 72 Hz interlaced
+ - euro72 : 640x400, 29 kHz, 68 Hz
+ - euro72-lace : 640x800, 29 kHz, 68 Hz interlaced
+ - super72 : 800x300, 23 kHz, 70 Hz
+ - super72-lace : 800x600, 23 kHz, 70 Hz interlaced
+ - dblntsc-ff : 640x400, 27 kHz, 57 Hz
+ - dblntsc-lace : 640x800, 27 kHz, 57 Hz interlaced
+ - dblpal-ff : 640x512, 27 kHz, 47 Hz
+ - dblpal-lace : 640x1024, 27 kHz, 47 Hz interlaced
+ - dblntsc : 640x200, 27 kHz, 57 Hz doublescan
+ - dblpal : 640x256, 27 kHz, 47 Hz doublescan
+
+VGA modes:
+ - vga : 640x480, 31 kHz, 60 Hz
+ - vga70 : 640x400, 31 kHz, 70 Hz
+
+Please notice that the ECS and VGA modes require either an ECS or AGA
+chipset, and that these modes are limited to 2-bit color for the ECS
+chipset and 8-bit color for the AGA chipset.
+
+5.1.2) depth
+------------
+
+:Syntax: depth:<nr. of bit-planes>
+
+Specify the number of bit-planes for the selected video-mode.
+
+5.1.3) inverse
+--------------
+
+Use inverted display (black on white). Functionally the same as the
+"inverse" sub-option for the Atari.
+
+5.1.4) font
+-----------
+
+:Syntax: font:<fontname>
+
+Specify the font to use in text modes. Functionally the same as the
+"font" sub-option for the Atari, except that `PEARL8x8` is used instead
+of `VGA8x8` if the vertical size of the display is less than 400 pixel
+rows.
+
+5.1.5) monitorcap:
+-------------------
+
+:Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. For now, only
+the color frame buffer uses the settings of "monitorcap:".
+
+<vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
+
+
+5.2) fd_def_df0=
+----------------
+
+:Syntax: fd_def_df0=<value>
+
+Sets the df0 value for "silent" floppy drives. The value should be in
+hexadecimal with "0x" prefix.
+
+
+5.3) wd33c93=
+-------------
+
+:Syntax: wd33c93=<sub-options...>
+
+These options affect the A590/A2091, A3000 and GVP Series II SCSI
+controllers.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below.
+
+5.3.1) nosync
+-------------
+
+:Syntax: nosync:bitmask
+
+bitmask is a byte where the 1st 7 bits correspond with the 7
+possible SCSI devices. Set a bit to prevent sync negotiation on that
+device. To maintain backwards compatibility, a command-line such as
+"wd33c93=255" will be automatically translated to
+"wd33c93=nosync:0xff". The default is to disable sync negotiation for
+all devices, eg. nosync:0xff.
+
+5.3.2) period
+-------------
+
+:Syntax: period:ns
+
+`ns` is the minimum # of nanoseconds in a SCSI data transfer
+period. Default is 500; acceptable values are 250 - 1000.
+
+5.3.3) disconnect
+-----------------
+
+:Syntax: disconnect:x
+
+Specify x = 0 to never allow disconnects, 2 to always allow them.
+x = 1 does 'adaptive' disconnects, which is the default and generally
+the best choice.
+
+5.3.4) debug
+------------
+
+:Syntax: debug:x
+
+If `DEBUGGING_ON` is defined, x is a bit mask that causes various
+types of debug output to printed - see the DB_xxx defines in
+wd33c93.h.
+
+5.3.5) clock
+------------
+
+:Syntax: clock:x
+
+x = clock input in MHz for WD33c93 chip. Normal values would be from
+8 through 20. The default value depends on your hostadapter(s),
+default for the A3000 internal controller is 14, for the A2091 it's 8
+and for the GVP hostadapters it's either 8 or 14, depending on the
+hostadapter and the SCSI-clock jumper present on some GVP
+hostadapters.
+
+5.3.6) next
+-----------
+
+No argument. Used to separate blocks of keywords when there's more
+than one wd33c93-based host adapter in the system.
+
+5.3.7) nodma
+------------
+
+:Syntax: nodma:x
+
+If x is 1 (or if the option is just written as "nodma"), the WD33c93
+controller will not use DMA (= direct memory access) to access the
+Amiga's memory. This is useful for some systems (like A3000's and
+A4000's with the A3640 accelerator, revision 3.0) that have problems
+using DMA to chip memory. The default is 0, i.e. to use DMA if
+possible.
+
+
+5.4) gvp11=
+-----------
+
+:Syntax: gvp11=<addr-mask>
+
+The earlier versions of the GVP driver did not handle DMA
+address-mask settings correctly which made it necessary for some
+people to use this option, in order to get their GVP controller
+running under Linux. These problems have hopefully been solved and the
+use of this option is now highly unrecommended!
+
+Incorrect use can lead to unpredictable behavior, so please only use
+this option if you *know* what you are doing and have a reason to do
+so. In any case if you experience problems and need to use this
+option, please inform us about it by mailing to the Linux/68k kernel
+mailing list.
+
+The address mask set by this option specifies which addresses are
+valid for DMA with the GVP Series II SCSI controller. An address is
+valid, if no bits are set except the bits that are set in the mask,
+too.
+
+Some versions of the GVP can only DMA into a 24 bit address range,
+some can address a 25 bit address range while others can use the whole
+32 bit address range for DMA. The correct setting depends on your
+controller and should be autodetected by the driver. An example is the
+24 bit region which is specified by a mask of 0x00fffffe.
diff --git a/Documentation/arch/mips/booting.rst b/Documentation/arch/mips/booting.rst
new file mode 100644
index 000000000000..7c18a4eab48b
--- /dev/null
+++ b/Documentation/arch/mips/booting.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+BMIPS DeviceTree Booting
+------------------------
+
+ Some bootloaders only support a single entry point, at the start of the
+ kernel image. Other bootloaders will jump to the ELF start address.
+ Both schemes are supported; CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y,
+ so the first instruction immediately jumps to kernel_entry().
+
+ Similar to the arch/arm case (b), a DT-aware bootloader is expected to
+ set up the following registers:
+
+ a0 : 0
+
+ a1 : 0xffffffff
+
+ a2 : Physical pointer to the device tree block (defined in chapter
+ II) in RAM. The device tree can be located anywhere in the first
+ 512MB of the physical address space (0x00000000 - 0x1fffffff),
+ aligned on a 64 bit boundary.
+
+ Legacy bootloaders do not use this convention, and they do not pass in a
+ DT block. In this case, Linux will look for a builtin DTB, selected via
+ CONFIG_DT_*.
+
+ This convention is defined for 32-bit systems only, as there are not
+ currently any 64-bit BMIPS implementations.
diff --git a/Documentation/arch/mips/features.rst b/Documentation/arch/mips/features.rst
new file mode 100644
index 000000000000..6e0ffe3e7354
--- /dev/null
+++ b/Documentation/arch/mips/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features mips
diff --git a/Documentation/arch/mips/index.rst b/Documentation/arch/mips/index.rst
new file mode 100644
index 000000000000..037f85a08fe3
--- /dev/null
+++ b/Documentation/arch/mips/index.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+MIPS-specific Documentation
+===========================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ booting
+ ingenic-tcu
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/mips/ingenic-tcu.rst b/Documentation/arch/mips/ingenic-tcu.rst
new file mode 100644
index 000000000000..2ce4cb1314dc
--- /dev/null
+++ b/Documentation/arch/mips/ingenic-tcu.rst
@@ -0,0 +1,71 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+Ingenic JZ47xx SoCs Timer/Counter Unit hardware
+===============================================
+
+The Timer/Counter Unit (TCU) in Ingenic JZ47xx SoCs is a multi-function
+hardware block. It features up to eight channels, that can be used as
+counters, timers, or PWM.
+
+- JZ4725B, JZ4750, JZ4755 only have six TCU channels. The other SoCs all
+ have eight channels.
+
+- JZ4725B introduced a separate channel, called Operating System Timer
+ (OST). It is a 32-bit programmable timer. On JZ4760B and above, it is
+ 64-bit.
+
+- Each one of the TCU channels has its own clock, which can be reparented to three
+ different clocks (pclk, ext, rtc), gated, and reclocked, through their TCSR register.
+
+ - The watchdog and OST hardware blocks also feature a TCSR register with the same
+ format in their register space.
+ - The TCU registers used to gate/ungate can also gate/ungate the watchdog and
+ OST clocks.
+
+- Each TCU channel works in one of two modes:
+
+ - mode TCU1: channels cannot work in sleep mode, but are easier to
+ operate.
+ - mode TCU2: channels can work in sleep mode, but the operation is a bit
+ more complicated than with TCU1 channels.
+
+- The mode of each TCU channel depends on the SoC used:
+
+ - On the oldest SoCs (up to JZ4740), all of the eight channels operate in
+ TCU1 mode.
+ - On JZ4725B, channel 5 operates as TCU2, the others operate as TCU1.
+ - On newest SoCs (JZ4750 and above), channels 1-2 operate as TCU2, the
+ others operate as TCU1.
+
+- Each channel can generate an interrupt. Some channels share an interrupt
+ line, some don't, and this changes between SoC versions:
+
+ - on older SoCs (JZ4740 and below), channel 0 and channel 1 have their
+ own interrupt line; channels 2-7 share the last interrupt line.
+ - On JZ4725B, channel 0 has its own interrupt; channels 1-5 share one
+ interrupt line; the OST uses the last interrupt line.
+ - on newer SoCs (JZ4750 and above), channel 5 has its own interrupt;
+ channels 0-4 and (if eight channels) 6-7 all share one interrupt line;
+ the OST uses the last interrupt line.
+
+Implementation
+==============
+
+The functionalities of the TCU hardware are spread across multiple drivers:
+
+=========== =====
+clocks drivers/clk/ingenic/tcu.c
+interrupts drivers/irqchip/irq-ingenic-tcu.c
+timers drivers/clocksource/ingenic-timer.c
+OST drivers/clocksource/ingenic-ost.c
+PWM drivers/pwm/pwm-jz4740.c
+watchdog drivers/watchdog/jz4740_wdt.c
+=========== =====
+
+Because various functionalities of the TCU that belong to different drivers
+and frameworks can be controlled from the same registers, all of these
+drivers access their registers through the same regmap.
+
+For more information regarding the devicetree bindings of the TCU drivers,
+have a look at Documentation/devicetree/bindings/timer/ingenic,tcu.yaml.
diff --git a/Documentation/arch/nios2/features.rst b/Documentation/arch/nios2/features.rst
new file mode 100644
index 000000000000..89913810ccb5
--- /dev/null
+++ b/Documentation/arch/nios2/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features nios2
diff --git a/Documentation/arch/nios2/index.rst b/Documentation/arch/nios2/index.rst
new file mode 100644
index 000000000000..4468fe1a1037
--- /dev/null
+++ b/Documentation/arch/nios2/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Nios II Specific Documentation
+==============================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ nios2
+ features
diff --git a/Documentation/arch/nios2/nios2.rst b/Documentation/arch/nios2/nios2.rst
new file mode 100644
index 000000000000..43da3f7cee76
--- /dev/null
+++ b/Documentation/arch/nios2/nios2.rst
@@ -0,0 +1,24 @@
+=================================
+Linux on the Nios II architecture
+=================================
+
+This is a port of Linux to Nios II (nios2) processor.
+
+In order to compile for Nios II, you need a version of GCC with support for the generic
+system call ABI. Please see this link for more information on how compiling and booting
+software for the Nios II platform:
+http://www.rocketboards.org/foswiki/Documentation/NiosIILinuxUserManual
+
+For reference, please see the following link:
+http://www.altera.com/literature/lit-nio2.jsp
+
+What is Nios II?
+================
+Nios II is a 32-bit embedded-processor architecture designed specifically for the
+Altera family of FPGAs. In order to support Linux, Nios II needs to be configured
+with MMU and hardware multiplier enabled.
+
+Nios II ABI
+===========
+Please refer to chapter "Application Binary Interface" in Nios II Processor Reference
+Handbook.
diff --git a/Documentation/arch/openrisc/features.rst b/Documentation/arch/openrisc/features.rst
new file mode 100644
index 000000000000..bae2e25adfd6
--- /dev/null
+++ b/Documentation/arch/openrisc/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features openrisc
diff --git a/Documentation/arch/openrisc/index.rst b/Documentation/arch/openrisc/index.rst
new file mode 100644
index 000000000000..6879f998b87a
--- /dev/null
+++ b/Documentation/arch/openrisc/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+OpenRISC Architecture
+=====================
+
+.. toctree::
+ :maxdepth: 2
+
+ openrisc_port
+ todo
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst
new file mode 100644
index 000000000000..60b0a9e51d70
--- /dev/null
+++ b/Documentation/arch/openrisc/openrisc_port.rst
@@ -0,0 +1,127 @@
+==============
+OpenRISC Linux
+==============
+
+This is a port of Linux to the OpenRISC class of microprocessors; the initial
+target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k).
+
+For information about OpenRISC processors and ongoing development:
+
+ ======= ==============================
+ website https://openrisc.io
+ email linux-openrisc@vger.kernel.org
+ ======= ==============================
+
+---------------------------------------------------------------------
+
+Build instructions for OpenRISC toolchain and Linux
+===================================================
+
+In order to build and run Linux for OpenRISC, you'll need at least a basic
+toolchain and, perhaps, the architectural simulator. Steps to get these bits
+in place are outlined here.
+
+1) Toolchain
+
+Toolchain binaries can be obtained from openrisc.io or our github releases page.
+Instructions for building the different toolchains can be found on openrisc.io
+or Stafford's toolchain build and release scripts.
+
+ ========== ==========================================================
+ binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases
+ toolchains https://openrisc.io/software
+ building https://github.com/stffrdhrn/or1k-toolchain-build
+ ========== ==========================================================
+
+2) Building
+
+Build the Linux kernel as usual::
+
+ make ARCH=openrisc CROSS_COMPILE="or1k-linux-" defconfig
+ make ARCH=openrisc CROSS_COMPILE="or1k-linux-"
+
+If you want to embed initramfs in the kernel, also pass ``CONFIG_INITRAMFS_SOURCE``. For example::
+
+ make ARCH=openrisc CROSS_COMPILE="or1k-linux-" CONFIG_INITRAMFS_SOURCE="path/to/rootfs path/to/devnodes"
+
+For more information on this, please check Documentation/filesystems/ramfs-rootfs-initramfs.rst.
+
+3) Running on FPGA (optional)
+
+The OpenRISC community typically uses FuseSoC to manage building and programming
+an SoC into an FPGA. The below is an example of programming a De0 Nano
+development board with the OpenRISC SoC. During the build FPGA RTL is code
+downloaded from the FuseSoC IP cores repository and built using the FPGA vendor
+tools. Binaries are loaded onto the board with openocd.
+
+::
+
+ git clone https://github.com/olofk/fusesoc
+ cd fusesoc
+ sudo pip install -e .
+
+ fusesoc init
+ fusesoc build de0_nano
+ fusesoc pgm de0_nano
+
+ openocd -f interface/altera-usb-blaster.cfg \
+ -f board/or1k_generic.cfg
+
+ telnet localhost 4444
+ > init
+ > halt; load_image vmlinux ; reset
+
+4) Running on a Simulator (optional)
+
+QEMU is a processor emulator which we recommend for simulating the OpenRISC
+platform. Please follow the OpenRISC instructions on the QEMU website to get
+Linux running on QEMU. You can build QEMU yourself, but your Linux distribution
+likely provides binary packages to support OpenRISC.
+
+ ============= ======================================================
+ qemu openrisc https://wiki.qemu.org/Documentation/Platforms/OpenRISC
+ ============= ======================================================
+
+---------------------------------------------------------------------
+
+Terminology
+===========
+
+In the code, the following particles are used on symbols to limit the scope
+to more or less specific processor implementations:
+
+========= =======================================
+openrisc: the OpenRISC class of processors
+or1k: the OpenRISC 1000 family of processors
+or1200: the OpenRISC 1200 processor
+========= =======================================
+
+---------------------------------------------------------------------
+
+History
+========
+
+18-11-2003 Matjaz Breskvar (phoenix@bsemi.com)
+ initial port of linux to OpenRISC/or32 architecture.
+ all the core stuff is implemented and seams usable.
+
+08-12-2003 Matjaz Breskvar (phoenix@bsemi.com)
+ complete change of TLB miss handling.
+ rewrite of exceptions handling.
+ fully functional sash-3.6 in default initrd.
+ a much improved version with changes all around.
+
+10-04-2004 Matjaz Breskvar (phoenix@bsemi.com)
+ a lot of bugfixes all over.
+ ethernet support, functional http and telnet servers.
+ running many standard linux apps.
+
+26-06-2004 Matjaz Breskvar (phoenix@bsemi.com)
+ port to 2.6.x
+
+30-11-2004 Matjaz Breskvar (phoenix@bsemi.com)
+ lots of bugfixes and enhancements.
+ added opencores framebuffer driver.
+
+09-10-2010 Jonas Bonn (jonas@southpole.se)
+ major rewrite to bring up to par with upstream Linux 2.6.36
diff --git a/Documentation/arch/openrisc/todo.rst b/Documentation/arch/openrisc/todo.rst
new file mode 100644
index 000000000000..420b18b87eda
--- /dev/null
+++ b/Documentation/arch/openrisc/todo.rst
@@ -0,0 +1,15 @@
+====
+TODO
+====
+
+The OpenRISC Linux port is fully functional and has been tracking upstream
+since 2.6.35. There are, however, remaining items to be completed within
+the coming months. Here's a list of known-to-be-less-than-stellar items
+that are due for investigation shortly, i.e. our TODO list:
+
+- Implement the rest of the DMA API... dma_map_sg, etc.
+
+- Finish the renaming cleanup... there are references to or32 in the code
+ which was an older name for the architecture. The name we've settled on is
+ or1k and this change is slowly trickling through the stack. For the time
+ being, or32 is equivalent to or1k.
diff --git a/Documentation/arch/parisc/debugging.rst b/Documentation/arch/parisc/debugging.rst
new file mode 100644
index 000000000000..de1b60402c5b
--- /dev/null
+++ b/Documentation/arch/parisc/debugging.rst
@@ -0,0 +1,46 @@
+=================
+PA-RISC Debugging
+=================
+
+okay, here are some hints for debugging the lower-level parts of
+linux/parisc.
+
+
+1. Absolute addresses
+=====================
+
+A lot of the assembly code currently runs in real mode, which means
+absolute addresses are used instead of virtual addresses as in the
+rest of the kernel. To translate an absolute address to a virtual
+address you can lookup in System.map, add __PAGE_OFFSET (0x10000000
+currently).
+
+
+2. HPMCs
+========
+
+When real-mode code tries to access non-existent memory, you'll get
+an HPMC instead of a kernel oops. To debug an HPMC, try to find
+the System Responder/Requestor addresses. The System Requestor
+address should match (one of the) processor HPAs (high addresses in
+the I/O range); the System Responder address is the address real-mode
+code tried to access.
+
+Typical values for the System Responder address are addresses larger
+than __PAGE_OFFSET (0x10000000) which mean a virtual address didn't
+get translated to a physical address before real-mode code tried to
+access it.
+
+
+3. Q bit fun
+============
+
+Certain, very critical code has to clear the Q bit in the PSW. What
+happens when the Q bit is cleared is the CPU does not update the
+registers interruption handlers read to find out where the machine
+was interrupted - so if you get an interruption between the instruction
+that clears the Q bit and the RFI that sets it again you don't know
+where exactly it happened. If you're lucky the IAOQ will point to the
+instruction that cleared the Q bit, if you're not it points anywhere
+at all. Usually Q bit problems will show themselves in unexplainable
+system hangs or running off the end of physical memory.
diff --git a/Documentation/arch/parisc/features.rst b/Documentation/arch/parisc/features.rst
new file mode 100644
index 000000000000..b3aa4d243b93
--- /dev/null
+++ b/Documentation/arch/parisc/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features parisc
diff --git a/Documentation/arch/parisc/index.rst b/Documentation/arch/parisc/index.rst
new file mode 100644
index 000000000000..240685751825
--- /dev/null
+++ b/Documentation/arch/parisc/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+PA-RISC Architecture
+====================
+
+.. toctree::
+ :maxdepth: 2
+
+ debugging
+ registers
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/parisc/registers.rst b/Documentation/arch/parisc/registers.rst
new file mode 100644
index 000000000000..59c8ecf3e856
--- /dev/null
+++ b/Documentation/arch/parisc/registers.rst
@@ -0,0 +1,154 @@
+================================
+Register Usage for Linux/PA-RISC
+================================
+
+[ an asterisk is used for planned usage which is currently unimplemented ]
+
+General Registers as specified by ABI
+=====================================
+
+Control Registers
+-----------------
+
+=============================== ===============================================
+CR 0 (Recovery Counter) used for ptrace
+CR 1-CR 7(undefined) unused
+CR 8 (Protection ID) per-process value*
+CR 9, 12, 13 (PIDS) unused
+CR10 (CCR) lazy FPU saving*
+CR11 as specified by ABI (SAR)
+CR14 (interruption vector) initialized to fault_vector
+CR15 (EIEM) initialized to all ones*
+CR16 (Interval Timer) read for cycle count/write starts Interval Tmr
+CR17-CR22 interruption parameters
+CR19 Interrupt Instruction Register
+CR20 Interrupt Space Register
+CR21 Interrupt Offset Register
+CR22 Interrupt PSW
+CR23 (EIRR) read for pending interrupts/write clears bits
+CR24 (TR 0) Kernel Space Page Directory Pointer
+CR25 (TR 1) User Space Page Directory Pointer
+CR26 (TR 2) not used
+CR27 (TR 3) Thread descriptor pointer
+CR28 (TR 4) not used
+CR29 (TR 5) not used
+CR30 (TR 6) current / 0
+CR31 (TR 7) Temporary register, used in various places
+=============================== ===============================================
+
+Space Registers (kernel mode)
+-----------------------------
+
+=============================== ===============================================
+SR0 temporary space register
+SR4-SR7 set to 0
+SR1 temporary space register
+SR2 kernel should not clobber this
+SR3 used for userspace accesses (current process)
+=============================== ===============================================
+
+Space Registers (user mode)
+---------------------------
+
+=============================== ===============================================
+SR0 temporary space register
+SR1 temporary space register
+SR2 holds space of linux gateway page
+SR3 holds user address space value while in kernel
+SR4-SR7 Defines short address space for user/kernel
+=============================== ===============================================
+
+
+Processor Status Word
+---------------------
+
+=============================== ===============================================
+W (64-bit addresses) 0
+E (Little-endian) 0
+S (Secure Interval Timer) 0
+T (Taken Branch Trap) 0
+H (Higher-privilege trap) 0
+L (Lower-privilege trap) 0
+N (Nullify next instruction) used by C code
+X (Data memory break disable) 0
+B (Taken Branch) used by C code
+C (code address translation) 1, 0 while executing real-mode code
+V (divide step correction) used by C code
+M (HPMC mask) 0, 1 while executing HPMC handler*
+C/B (carry/borrow bits) used by C code
+O (ordered references) 1*
+F (performance monitor) 0
+R (Recovery Counter trap) 0
+Q (collect interruption state) 1 (0 in code directly preceding an rfi)
+P (Protection Identifiers) 1*
+D (Data address translation) 1, 0 while executing real-mode code
+I (external interrupt mask) used by cli()/sti() macros
+=============================== ===============================================
+
+"Invisible" Registers
+---------------------
+
+=============================== ===============================================
+PSW default W value 0
+PSW default E value 0
+Shadow Registers used by interruption handler code
+TOC enable bit 1
+=============================== ===============================================
+
+-------------------------------------------------------------------------
+
+The PA-RISC architecture defines 7 registers as "shadow registers".
+Those are used in RETURN FROM INTERRUPTION AND RESTORE instruction to reduce
+the state save and restore time by eliminating the need for general register
+(GR) saves and restores in interruption handlers.
+Shadow registers are the GRs 1, 8, 9, 16, 17, 24, and 25.
+
+-------------------------------------------------------------------------
+
+Register usage notes, originally from John Marvin, with some additional
+notes from Randolph Chung.
+
+For the general registers:
+
+r1,r2,r19-r26,r28,r29 & r31 can be used without saving them first. And of
+course, you need to save them if you care about them, before calling
+another procedure. Some of the above registers do have special meanings
+that you should be aware of:
+
+ r1:
+ The addil instruction is hardwired to place its result in r1,
+ so if you use that instruction be aware of that.
+
+ r2:
+ This is the return pointer. In general you don't want to
+ use this, since you need the pointer to get back to your
+ caller. However, it is grouped with this set of registers
+ since the caller can't rely on the value being the same
+ when you return, i.e. you can copy r2 to another register
+ and return through that register after trashing r2, and
+ that should not cause a problem for the calling routine.
+
+ r19-r22:
+ these are generally regarded as temporary registers.
+ Note that in 64 bit they are arg7-arg4.
+
+ r23-r26:
+ these are arg3-arg0, i.e. you can use them if you
+ don't care about the values that were passed in anymore.
+
+ r28,r29:
+ are ret0 and ret1. They are what you pass return values
+ in. r28 is the primary return. When returning small structures
+ r29 may also be used to pass data back to the caller.
+
+ r30:
+ stack pointer
+
+ r31:
+ the ble instruction puts the return pointer in here.
+
+
+ r3-r18,r27,r30 need to be saved and restored. r3-r18 are just
+ general purpose registers. r27 is the data pointer, and is
+ used to make references to global variables easier. r30 is
+ the stack pointer.
diff --git a/Documentation/arch/powerpc/associativity.rst b/Documentation/arch/powerpc/associativity.rst
new file mode 100644
index 000000000000..4d01c7368561
--- /dev/null
+++ b/Documentation/arch/powerpc/associativity.rst
@@ -0,0 +1,105 @@
+============================
+NUMA resource associativity
+============================
+
+Associativity represents the groupings of the various platform resources into
+domains of substantially similar mean performance relative to resources outside
+of that domain. Resources subsets of a given domain that exhibit better
+performance relative to each other than relative to other resources subsets
+are represented as being members of a sub-grouping domain. This performance
+characteristic is presented in terms of NUMA node distance within the Linux kernel.
+From the platform view, these groups are also referred to as domains.
+
+PAPR interface currently supports different ways of communicating these resource
+grouping details to the OS. These are referred to as Form 0, Form 1 and Form2
+associativity grouping. Form 0 is the oldest format and is now considered deprecated.
+
+Hypervisor indicates the type/form of associativity used via "ibm,architecture-vec-5 property".
+Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1.
+A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity
+bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
+
+Form 0
+------
+Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE).
+
+Form 1
+------
+With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity
+device tree properties are used to determine the NUMA distance between resource groups/domains.
+
+The “ibm,associativity” property contains a list of one or more numbers (domainID)
+representing the resource’s platform grouping domains.
+
+The “ibm,associativity-reference-points” property contains a list of one or more numbers
+(domainID index) that represents the 1 based ordinal in the associativity lists.
+The list of domainID indexes represents an increasing hierarchy of resource grouping.
+
+ex:
+{ primary domainID index, secondary domainID index, tertiary domainID index.. }
+
+Linux kernel uses the domainID at the primary domainID index as the NUMA node id.
+Linux kernel computes NUMA distance between two domains by recursively comparing
+if they belong to the same higher-level domains. For mismatch at every higher
+level of the resource group, the kernel doubles the NUMA distance between the
+comparing domains.
+
+Form 2
+-------
+Form 2 associativity format adds separate device tree properties representing NUMA node distance
+thereby making the node distance computation flexible. Form 2 also allows flexible primary
+domain numbering. With numa distance computation now detached from the index value in
+"ibm,associativity-reference-points" property, Form 2 allows a large number of primary domain
+ids at the same domainID index representing resource groups of different performance/latency
+characteristics.
+
+Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the
+"ibm,architecture-vec-5" property.
+
+"ibm,numa-lookup-index-table" property contains a list of one or more numbers representing
+the domainIDs present in the system. The offset of the domainID in this property is
+used as an index while computing numa distance information via "ibm,numa-distance-table".
+
+prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by
+N domainID encoded as with encode-int
+
+For ex:
+"ibm,numa-lookup-index-table" = {4, 0, 8, 250, 252}. The offset of domainID 8 (2) is used when
+computing the distance of domain 8 from other domains present in the system. For the rest of
+this document, this offset will be referred to as domain distance offset.
+
+"ibm,numa-distance-table" property contains a list of one or more numbers representing the NUMA
+distance between resource groups/domains present in the system.
+
+prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
+N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
+The number N must be equal to the square of m where m is the number of domainIDs in the
+numa-lookup-index-table.
+
+For ex:
+ibm,numa-lookup-index-table = <3 0 8 40>;
+ibm,numa-distace-table = <9>, /bits/ 8 < 10 20 80 20 10 160 80 160 10>;
+
+::
+
+ | 0 8 40
+ --|------------
+ |
+ 0 | 10 20 80
+ |
+ 8 | 20 10 160
+ |
+ 40| 80 160 10
+
+A possible "ibm,associativity" property for resources in node 0, 8 and 40
+
+{ 3, 6, 7, 0 }
+{ 3, 6, 9, 8 }
+{ 3, 6, 7, 40}
+
+With "ibm,associativity-reference-points" { 0x3 }
+
+"ibm,lookup-index-table" helps in having a compact representation of distance matrix.
+Since domainID can be sparse, the matrix of distances can also be effectively sparse.
+With "ibm,lookup-index-table" we can achieve a compact representation of
+distance information.
diff --git a/Documentation/arch/powerpc/booting.rst b/Documentation/arch/powerpc/booting.rst
new file mode 100644
index 000000000000..472e97891aef
--- /dev/null
+++ b/Documentation/arch/powerpc/booting.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+DeviceTree Booting
+------------------
+
+During the development of the Linux/ppc64 kernel, and more specifically, the
+addition of new platform types outside of the old IBM pSeries/iSeries pair, it
+was decided to enforce some strict rules regarding the kernel entry and
+bootloader <-> kernel interfaces, in order to avoid the degeneration that had
+become the ppc32 kernel entry point and the way a new platform should be added
+to the kernel. The legacy iSeries platform breaks those rules as it predates
+this scheme, but no new board support will be accepted in the main tree that
+doesn't follow them properly. In addition, since the advent of the arch/powerpc
+merged architecture for ppc32 and ppc64, new 32-bit platforms and 32-bit
+platforms which move into arch/powerpc will be required to use these rules as
+well.
+
+The main requirement that will be defined in more detail below is the presence
+of a device-tree whose format is defined after Open Firmware specification.
+However, in order to make life easier to embedded board vendors, the kernel
+doesn't require the device-tree to represent every device in the system and only
+requires some nodes and properties to be present. For example, the kernel does
+not require you to create a node for every PCI device in the system. It is a
+requirement to have a node for PCI host bridges in order to provide interrupt
+routing information and memory/IO ranges, among others. It is also recommended
+to define nodes for on chip devices and other buses that don't specifically fit
+in an existing OF specification. This creates a great flexibility in the way the
+kernel can then probe those and match drivers to device, without having to hard
+code all sorts of tables. It also makes it more flexible for board vendors to do
+minor hardware upgrades without significantly impacting the kernel code or
+cluttering it with special cases.
+
+
+Entry point
+~~~~~~~~~~~
+
+There is one single entry point to the kernel, at the start
+of the kernel image. That entry point supports two calling
+conventions:
+
+ a) Boot from Open Firmware. If your firmware is compatible
+ with Open Firmware (IEEE 1275) or provides an OF compatible
+ client interface API (support for "interpret" callback of
+ forth words isn't required), you can enter the kernel with:
+
+ r5 : OF callback pointer as defined by IEEE 1275
+ bindings to powerpc. Only the 32-bit client interface
+ is currently supported
+
+ r3, r4 : address & length of an initrd if any or 0
+
+ The MMU is either on or off; the kernel will run the
+ trampoline located in arch/powerpc/kernel/prom_init.c to
+ extract the device-tree and other information from open
+ firmware and build a flattened device-tree as described
+ in b). prom_init() will then re-enter the kernel using
+ the second method. This trampoline code runs in the
+ context of the firmware, which is supposed to handle all
+ exceptions during that time.
+
+ b) Direct entry with a flattened device-tree block. This entry
+ point is called by a) after the OF trampoline and can also be
+ called directly by a bootloader that does not support the Open
+ Firmware client interface. It is also used by "kexec" to
+ implement "hot" booting of a new kernel from a previous
+ running one. This method is what I will describe in more
+ details in this document, as method a) is simply standard Open
+ Firmware, and thus should be implemented according to the
+ various standard documents defining it and its binding to the
+ PowerPC platform. The entry point definition then becomes:
+
+ r3 : physical pointer to the device-tree block
+ (defined in chapter II) in RAM
+
+ r4 : physical pointer to the kernel itself. This is
+ used by the assembly code to properly disable the MMU
+ in case you are entering the kernel with MMU enabled
+ and a non-1:1 mapping.
+
+ r5 : NULL (as to differentiate with method a)
+
+Note about SMP entry: Either your firmware puts your other
+CPUs in some sleep loop or spin loop in ROM where you can get
+them out via a soft reset or some other means, in which case
+you don't need to care, or you'll have to enter the kernel
+with all CPUs. The way to do that with method b) will be
+described in a later revision of this document.
+
+Board supports (platforms) are not exclusive config options. An
+arbitrary set of board supports can be built in a single kernel
+image. The kernel will "know" what set of functions to use for a
+given platform based on the content of the device-tree. Thus, you
+should:
+
+ a) add your platform support as a _boolean_ option in
+ arch/powerpc/Kconfig, following the example of PPC_PSERIES
+ and PPC_PMAC. The latter is probably a good
+ example of a board support to start from.
+
+ b) create your main platform file as
+ "arch/powerpc/platforms/myplatform/myboard_setup.c" and add it
+ to the Makefile under the condition of your ``CONFIG_``
+ option. This file will define a structure of type "ppc_md"
+ containing the various callbacks that the generic code will
+ use to get to your platform specific code
+
+A kernel image may support multiple platforms, but only if the
+platforms feature the same core architecture. A single kernel build
+cannot support both configurations with Book E and configurations
+with classic Powerpc architectures.
diff --git a/Documentation/arch/powerpc/bootwrapper.rst b/Documentation/arch/powerpc/bootwrapper.rst
new file mode 100644
index 000000000000..cdfa2bc8425f
--- /dev/null
+++ b/Documentation/arch/powerpc/bootwrapper.rst
@@ -0,0 +1,131 @@
+========================
+The PowerPC boot wrapper
+========================
+
+Copyright (C) Secret Lab Technologies Ltd.
+
+PowerPC image targets compresses and wraps the kernel image (vmlinux) with
+a boot wrapper to make it usable by the system firmware. There is no
+standard PowerPC firmware interface, so the boot wrapper is designed to
+be adaptable for each kind of image that needs to be built.
+
+The boot wrapper can be found in the arch/powerpc/boot/ directory. The
+Makefile in that directory has targets for all the available image types.
+The different image types are used to support all of the various firmware
+interfaces found on PowerPC platforms. OpenFirmware is the most commonly
+used firmware type on general purpose PowerPC systems from Apple, IBM and
+others. U-Boot is typically found on embedded PowerPC hardware, but there
+are a handful of other firmware implementations which are also popular. Each
+firmware interface requires a different image format.
+
+The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
+it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
+image. The details of the build system is discussed in the next section.
+Currently, the following image format targets exist:
+
+ ==================== ========================================================
+ cuImage.%: Backwards compatible uImage for older version of
+ U-Boot (for versions that don't understand the device
+ tree). This image embeds a device tree blob inside
+ the image. The boot wrapper, kernel and device tree
+ are all embedded inside the U-Boot uImage file format
+ with boot wrapper code that extracts data from the old
+ bd_info structure and loads the data into the device
+ tree before jumping into the kernel.
+
+ Because of the series of #ifdefs found in the
+ bd_info structure used in the old U-Boot interfaces,
+ cuImages are platform specific. Each specific
+ U-Boot platform has a different platform init file
+ which populates the embedded device tree with data
+ from the platform specific bd_info file. The platform
+ specific cuImage platform init code can be found in
+ `arch/powerpc/boot/cuboot.*.c`. Selection of the correct
+ cuImage init code for a specific board can be found in
+ the wrapper structure.
+
+ dtbImage.%: Similar to zImage, except device tree blob is embedded
+ inside the image instead of provided by firmware. The
+ output image file can be either an elf file or a flat
+ binary depending on the platform.
+
+ dtbImages are used on systems which do not have an
+ interface for passing a device tree directly.
+ dtbImages are similar to simpleImages except that
+ dtbImages have platform specific code for extracting
+ data from the board firmware, but simpleImages do not
+ talk to the firmware at all.
+
+ PlayStation 3 support uses dtbImage. So do Embedded
+ Planet boards using the PlanetCore firmware. Board
+ specific initialization code is typically found in a
+ file named arch/powerpc/boot/<platform>.c; but this
+ can be overridden by the wrapper script.
+
+ simpleImage.%: Firmware independent compressed image that does not
+ depend on any particular firmware interface and embeds
+ a device tree blob. This image is a flat binary that
+ can be loaded to any location in RAM and jumped to.
+ Firmware cannot pass any configuration data to the
+ kernel with this image type and it depends entirely on
+ the embedded device tree for all information.
+
+ treeImage.%; Image format for used with OpenBIOS firmware found
+ on some ppc4xx hardware. This image embeds a device
+ tree blob inside the image.
+
+ uImage: Native image format used by U-Boot. The uImage target
+ does not add any boot code. It just wraps a compressed
+ vmlinux in the uImage data structure. This image
+ requires a version of U-Boot that is able to pass
+ a device tree to the kernel at boot. If using an older
+ version of U-Boot, then you need to use a cuImage
+ instead.
+
+ zImage.%: Image format which does not embed a device tree.
+ Used by OpenFirmware and other firmware interfaces
+ which are able to supply a device tree. This image
+ expects firmware to provide the device tree at boot.
+ Typically, if you have general purpose PowerPC
+ hardware then you want this image format.
+ ==================== ========================================================
+
+Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
+and cuImage) all generate the device tree blob from a file in the
+arch/powerpc/boot/dts/ directory. The Makefile selects the correct device
+tree source based on the name of the target. Therefore, if the kernel is
+built with 'make treeImage.walnut', then the build system will use
+arch/powerpc/boot/dts/walnut.dts to build treeImage.walnut.
+
+Two special targets called 'zImage' and 'zImage.initrd' also exist. These
+targets build all the default images as selected by the kernel configuration.
+Default images are selected by the boot wrapper Makefile
+(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look
+at the Makefile to see which default image targets are available.
+
+How it is built
+---------------
+arch/powerpc is designed to support multiplatform kernels, which means
+that a single vmlinux image can be booted on many different target boards.
+It also means that the boot wrapper must be able to wrap for many kinds of
+images on a single build. The design decision was made to not use any
+conditional compilation code (#ifdef, etc) in the boot wrapper source code.
+All of the boot wrapper pieces are buildable at any time regardless of the
+kernel configuration. Building all the wrapper bits on every kernel build
+also ensures that obscure parts of the wrapper are at the very least compile
+tested in a large variety of environments.
+
+The wrapper is adapted for different image types at link time by linking in
+just the wrapper bits that are appropriate for the image type. The 'wrapper
+script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
+is responsible for selecting the correct wrapper bits for the image type.
+The arguments are well documented in the script's comment block, so they
+are not repeated here. However, it is worth mentioning that the script
+uses the -p (platform) argument as the main method of deciding which wrapper
+bits to compile in. Look for the large 'case "$platform" in' block in the
+middle of the script. This is also the place where platform specific fixups
+can be selected by changing the link order.
+
+In particular, care should be taken when working with cuImages. cuImage
+wrapper bits are very board specific and care should be taken to make sure
+the target you are trying to build is supported by the wrapper bits.
diff --git a/Documentation/arch/powerpc/cpu_families.rst b/Documentation/arch/powerpc/cpu_families.rst
new file mode 100644
index 000000000000..f55433c6b8f3
--- /dev/null
+++ b/Documentation/arch/powerpc/cpu_families.rst
@@ -0,0 +1,219 @@
+============
+CPU Families
+============
+
+This document tries to summarise some of the different cpu families that exist
+and are supported by arch/powerpc.
+
+
+Book3S (aka sPAPR)
+------------------
+
+- Hash MMU (except 603 and e300)
+- Radix MMU (POWER9 and later)
+- Software loaded TLB (603 and e300)
+- Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600)
+- Mix of 32 & 64 bit::
+
+ +--------------+ +----------------+
+ | Old POWER | --------------> | RS64 (threads) |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+ +----------------+ +------+
+ | 601 | --------------> | 603 | ---> | e300 |
+ +--------------+ +----------------+ +------+
+ | |
+ | |
+ v v
+ +--------------+ +-----+ +----------------+ +-------+
+ | 604 | | 755 | <--- | 750 (G3) | ---> | 750CX |
+ +--------------+ +-----+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | 620 (64 bit) | | 7400 | | 750CL |
+ +--------------+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | POWER3/630 | | 7410 | | 750FX |
+ +--------------+ +----------------+ +-------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER3+ | | 7450 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER4 | | 7455 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +-------+ +----------------+
+ | POWER4+ | --> | 970 | | 7447 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5 | | 970FX | | 7448 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5+ | | 970MP | | e600 |
+ +--------------+ +-------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER5++ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +-------+
+ | POWER6 | <-?-> | Cell |
+ +--------------+ +-------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7+ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER8 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER9 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER10 |
+ +--------------+
+
+
+ +---------------+
+ | PA6T (64 bit) |
+ +---------------+
+
+
+IBM BookE
+---------
+
+- Software loaded TLB.
+- All 32 bit::
+
+ +--------------+
+ | 440 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +----------------+
+ | 450 | --> | BG/P |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | 460 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 476 |
+ +--------------+
+
+
+Motorola/Freescale 8xx
+----------------------
+
+- Software loaded with hardware assist.
+- All 32 bit::
+
+ +-------------+
+ | MPC8xx Core |
+ +-------------+
+
+
+Freescale BookE
+---------------
+
+- Software loaded TLB.
+- e6500 adds HW loaded indirect TLB entries.
+- Mix of 32 & 64 bit::
+
+ +--------------+
+ | e200 |
+ +--------------+
+
+
+ +--------------------------------+
+ | e500 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500v2 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500mc (Book3e) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e5500 (64 bit) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e6500 (HW TLB) (Multithreaded) |
+ +--------------------------------+
+
+
+IBM A2 core
+-----------
+
+- Book3E, software loaded TLB + HW loaded indirect TLB entries.
+- 64 bit::
+
+ +--------------+ +----------------+
+ | A2 core | --> | WSP |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | BG/Q |
+ +--------------+
diff --git a/Documentation/arch/powerpc/cpu_features.rst b/Documentation/arch/powerpc/cpu_features.rst
new file mode 100644
index 000000000000..b7bcdd2f41bb
--- /dev/null
+++ b/Documentation/arch/powerpc/cpu_features.rst
@@ -0,0 +1,60 @@
+============
+CPU Features
+============
+
+Hollis Blanchard <hollis@austin.ibm.com>
+5 Jun 2002
+
+This document describes the system (including self-modifying code) used in the
+PPC Linux kernel to support a variety of PowerPC CPUs without requiring
+compile-time selection.
+
+Early in the boot process the ppc32 kernel detects the current CPU type and
+chooses a set of features accordingly. Some examples include Altivec support,
+split instruction and data caches, and if the CPU supports the DOZE and NAP
+sleep modes.
+
+Detection of the feature set is simple. A list of processors can be found in
+arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
+each value in the list. If a match is found, the cpu_features of cur_cpu_spec
+is assigned to the feature bitmask for this processor and a __setup_cpu
+function is called.
+
+C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
+particular feature bit. This is done in quite a few places, for example
+in ppc_setup_l2cr().
+
+Implementing cpufeatures in assembly is a little more involved. There are
+several paths that are performance-critical and would suffer if an array
+index, structure dereference, and conditional branch were added. To avoid the
+performance penalty but still allow for runtime (rather than compile-time) CPU
+selection, unused code is replaced by 'nop' instructions. This nop'ing is
+based on CPU 0's capabilities, so a multi-processor system with non-identical
+processors will not work (but such a system would likely have other problems
+anyways).
+
+After detecting the processor type, the kernel patches out sections of code
+that shouldn't be used by writing nop's over it. Using cpufeatures requires
+just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
+transfer_to_handler::
+
+ #ifdef CONFIG_ALTIVEC
+ BEGIN_FTR_SECTION
+ mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */
+ stw r22,THREAD_VRSAVE(r23)
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #endif /* CONFIG_ALTIVEC */
+
+If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
+instructions are replaced with nop's.
+
+The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
+and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
+cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
+should be used in the majority of cases.
+
+The END_FTR_SECTION macros are implemented by storing information about this
+code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
+(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
+__ftr_fixup, and if the required feature is not present it will loop writing
+nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
diff --git a/Documentation/arch/powerpc/dawr-power9.rst b/Documentation/arch/powerpc/dawr-power9.rst
new file mode 100644
index 000000000000..310f2e0cea81
--- /dev/null
+++ b/Documentation/arch/powerpc/dawr-power9.rst
@@ -0,0 +1,101 @@
+=====================
+DAWR issues on POWER9
+=====================
+
+On older POWER9 processors, the Data Address Watchpoint Register (DAWR) can
+cause a checkstop if it points to cache inhibited (CI) memory. Currently Linux
+has no way to distinguish CI memory when configuring the DAWR, so on affected
+systems, the DAWR is disabled.
+
+Affected processor revisions
+============================
+
+This issue is only present on processors prior to v2.3. The revision can be
+found in /proc/cpuinfo::
+
+ processor : 0
+ cpu : POWER9, altivec supported
+ clock : 3800.000000MHz
+ revision : 2.3 (pvr 004e 1203)
+
+On a system with the issue, the DAWR is disabled as detailed below.
+
+Technical Details:
+==================
+
+DAWR has 6 different ways of being set.
+1) ptrace
+2) h_set_mode(DAWR)
+3) h_set_dabr()
+4) kvmppc_set_one_reg()
+5) xmon
+
+For ptrace, we now advertise zero breakpoints on POWER9 via the
+PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to
+software emulation of the watchpoint (which is slow).
+
+h_set_mode(DAWR) and h_set_dabr() will now return an error to the
+guest on a POWER9 host. Current Linux guests ignore this error, so
+they will silently not get the DAWR.
+
+kvmppc_set_one_reg() will store the value in the vcpu but won't
+actually set it on POWER9 hardware. This is done so we don't break
+migration from POWER8 to POWER9, at the cost of silently losing the
+DAWR on the migration.
+
+For xmon, the 'bd' command will return an error on P9.
+
+Consequences for users
+======================
+
+For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
+will accept the command. Unfortunately since there is no hardware
+support for the watchpoint, GDB will software emulate the watchpoint
+making it run very slowly.
+
+The same will also be true for any guests started on a POWER9
+host. The watchpoint will fail and GDB will fall back to software
+emulation.
+
+If a guest is started on a POWER8 host, GDB will accept the watchpoint
+and configure the hardware to use the DAWR. This will run at full
+speed since it can use the hardware emulation. Unfortunately if this
+guest is migrated to a POWER9 host, the watchpoint will be lost on the
+POWER9. Loads and stores to the watchpoint locations will not be
+trapped in GDB. The watchpoint is remembered, so if the guest is
+migrated back to the POWER8 host, it will start working again.
+
+Force enabling the DAWR
+=======================
+Kernels (since ~v5.2) have an option to force enable the DAWR via::
+
+ echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR. This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+
+ tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+
+Any errors/failures/skips mean something is wrong.
diff --git a/Documentation/arch/powerpc/dexcr.rst b/Documentation/arch/powerpc/dexcr.rst
new file mode 100644
index 000000000000..ab0724212fcd
--- /dev/null
+++ b/Documentation/arch/powerpc/dexcr.rst
@@ -0,0 +1,195 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+==========================================
+DEXCR (Dynamic Execution Control Register)
+==========================================
+
+Overview
+========
+
+The DEXCR is a privileged special purpose register (SPR) introduced in
+PowerPC ISA 3.1B (Power10) that allows per-cpu control over several dynamic
+execution behaviours. These behaviours include speculation (e.g., indirect
+branch target prediction) and enabling return-oriented programming (ROP)
+protection instructions.
+
+The execution control is exposed in hardware as up to 32 bits ('aspects') in
+the DEXCR. Each aspect controls a certain behaviour, and can be set or cleared
+to enable/disable the aspect. There are several variants of the DEXCR for
+different purposes:
+
+DEXCR
+ A privileged SPR that can control aspects for userspace and kernel space
+HDEXCR
+ A hypervisor-privileged SPR that can control aspects for the hypervisor and
+ enforce aspects for the kernel and userspace.
+UDEXCR
+ An optional ultravisor-privileged SPR that can control aspects for the ultravisor.
+
+Userspace can examine the current DEXCR state using a dedicated SPR that
+provides a non-privileged read-only view of the userspace DEXCR aspects.
+There is also an SPR that provides a read-only view of the hypervisor enforced
+aspects, which ORed with the userspace DEXCR view gives the effective DEXCR
+state for a process.
+
+
+Configuration
+=============
+
+prctl
+-----
+
+A process can control its own userspace DEXCR value using the
+``PR_PPC_GET_DEXCR`` and ``PR_PPC_SET_DEXCR`` pair of
+:manpage:`prctl(2)` commands. These calls have the form::
+
+ prctl(PR_PPC_GET_DEXCR, unsigned long which, 0, 0, 0);
+ prctl(PR_PPC_SET_DEXCR, unsigned long which, unsigned long ctrl, 0, 0);
+
+The possible 'which' and 'ctrl' values are as follows. Note there is no relation
+between the 'which' value and the DEXCR aspect's index.
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 7 1
+
+ * - ``prctl()`` which
+ - Aspect name
+ - Aspect index
+
+ * - ``PR_PPC_DEXCR_SBHE``
+ - Speculative Branch Hint Enable (SBHE)
+ - 0
+
+ * - ``PR_PPC_DEXCR_IBRTPD``
+ - Indirect Branch Recurrent Target Prediction Disable (IBRTPD)
+ - 3
+
+ * - ``PR_PPC_DEXCR_SRAPD``
+ - Subroutine Return Address Prediction Disable (SRAPD)
+ - 4
+
+ * - ``PR_PPC_DEXCR_NPHIE``
+ - Non-Privileged Hash Instruction Enable (NPHIE)
+ - 5
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - ``prctl()`` ctrl
+ - Meaning
+
+ * - ``PR_PPC_DEXCR_CTRL_EDITABLE``
+ - This aspect can be configured with PR_PPC_SET_DEXCR (get only)
+
+ * - ``PR_PPC_DEXCR_CTRL_SET``
+ - This aspect is set / set this aspect
+
+ * - ``PR_PPC_DEXCR_CTRL_CLEAR``
+ - This aspect is clear / clear this aspect
+
+ * - ``PR_PPC_DEXCR_CTRL_SET_ONEXEC``
+ - This aspect will be set after exec / set this aspect after exec
+
+ * - ``PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC``
+ - This aspect will be clear after exec / clear this aspect after exec
+
+Note that
+
+* which is a plain value, not a bitmask. Aspects must be worked with individually.
+
+* ctrl is a bitmask. ``PR_PPC_GET_DEXCR`` returns both the current and onexec
+ configuration. For example, ``PR_PPC_GET_DEXCR`` may return
+ ``PR_PPC_DEXCR_CTRL_EDITABLE | PR_PPC_DEXCR_CTRL_SET |
+ PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC``. This would indicate the aspect is currently
+ set, it will be cleared when you run exec, and you can change this with the
+ ``PR_PPC_SET_DEXCR`` prctl.
+
+* The set/clear terminology refers to setting/clearing the bit in the DEXCR.
+ For example::
+
+ prctl(PR_PPC_SET_DEXCR, PR_PPC_DEXCR_IBRTPD, PR_PPC_DEXCR_CTRL_SET, 0, 0);
+
+ will set the IBRTPD aspect bit in the DEXCR, causing indirect branch prediction
+ to be disabled.
+
+* The status returned by ``PR_PPC_GET_DEXCR`` represents what value the process
+ would like applied. It does not include any alternative overrides, such as if
+ the hypervisor is enforcing the aspect be set. To see the true DEXCR state
+ software should read the appropriate SPRs directly.
+
+* The aspect state when starting a process is copied from the parent's state on
+ :manpage:`fork(2)`. The state is reset to a fixed value on
+ :manpage:`execve(2)`. The PR_PPC_SET_DEXCR prctl() can control both of these
+ values.
+
+* The ``*_ONEXEC`` controls do not change the current process's DEXCR.
+
+Use ``PR_PPC_SET_DEXCR`` with one of ``PR_PPC_DEXCR_CTRL_SET`` or
+``PR_PPC_DEXCR_CTRL_CLEAR`` to edit a given aspect.
+
+Common error codes for both getting and setting the DEXCR are as follows:
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - Error
+ - Meaning
+
+ * - ``EINVAL``
+ - The DEXCR is not supported by the kernel.
+
+ * - ``ENODEV``
+ - The aspect is not recognised by the kernel or not supported by the
+ hardware.
+
+``PR_PPC_SET_DEXCR`` may also report the following error codes:
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - Error
+ - Meaning
+
+ * - ``EINVAL``
+ - The ctrl value contains unrecognised flags.
+
+ * - ``EINVAL``
+ - The ctrl value contains mutually conflicting flags (e.g.,
+ ``PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR``)
+
+ * - ``EPERM``
+ - This aspect cannot be modified with prctl() (check for the
+ PR_PPC_DEXCR_CTRL_EDITABLE flag with PR_PPC_GET_DEXCR).
+
+ * - ``EPERM``
+ - The process does not have sufficient privilege to perform the operation.
+ For example, clearing NPHIE on exec is a privileged operation (a process
+ can still clear its own NPHIE aspect without privileges).
+
+This interface allows a process to control its own DEXCR aspects, and also set
+the initial DEXCR value for any children in its process tree (up to the next
+child to use an ``*_ONEXEC`` control). This allows fine-grained control over the
+default value of the DEXCR, for example allowing containers to run with different
+default values.
+
+
+coredump and ptrace
+===================
+
+The userspace values of the DEXCR and HDEXCR (in this order) are exposed under
+``NT_PPC_DEXCR``. These are each 64 bits and readonly, and are intended to
+assist with core dumps. The DEXCR may be made writable in future. The top 32
+bits of both registers (corresponding to the non-userspace bits) are masked off.
+
+If the kernel config ``CONFIG_CHECKPOINT_RESTORE`` is enabled, then
+``NT_PPC_HASHKEYR`` is available and exposes the HASHKEYR value of the process
+for reading and writing. This is a tradeoff between increased security and
+checkpoint/restore support: a process should normally have no need to know its
+secret key, but restoring a process requires setting its original key. The key
+therefore appears in core dumps, and an attacker may be able to retrieve it from
+a coredump and effectively bypass ROP protection on any threads that share this
+key (potentially all threads from the same parent that have not run ``exec()``).
diff --git a/Documentation/arch/powerpc/dscr.rst b/Documentation/arch/powerpc/dscr.rst
new file mode 100644
index 000000000000..f735ec5375d5
--- /dev/null
+++ b/Documentation/arch/powerpc/dscr.rst
@@ -0,0 +1,87 @@
+===================================
+DSCR (Data Stream Control Register)
+===================================
+
+DSCR register in powerpc allows user to have some control of prefetch of data
+stream in the processor. Please refer to the ISA documents or related manual
+for more detailed information regarding how to use this DSCR to attain this
+control of the prefetches . This document here provides an overview of kernel
+support for DSCR, related kernel objects, its functionalities and exported
+user interface.
+
+(A) Data Structures:
+
+ (1) thread_struct::
+
+ dscr /* Thread DSCR value */
+ dscr_inherit /* Thread has changed default DSCR */
+
+ (2) PACA::
+
+ dscr_default /* per-CPU DSCR default value */
+
+ (3) sysfs.c::
+
+ dscr_default /* System DSCR default value */
+
+(B) Scheduler Changes:
+
+ Scheduler will write the per-CPU DSCR default which is stored in the
+ CPU's PACA value into the register if the thread has dscr_inherit value
+ cleared which means that it has not changed the default DSCR till now.
+ If the dscr_inherit value is set which means that it has changed the
+ default DSCR value, scheduler will write the changed value which will
+ now be contained in thread struct's dscr into the register instead of
+ the per-CPU default PACA based DSCR value.
+
+ NOTE: Please note here that the system wide global DSCR value never
+ gets used directly in the scheduler process context switch at all.
+
+(C) SYSFS Interface:
+
+ - Global DSCR default: /sys/devices/system/cpu/dscr_default
+ - CPU specific DSCR default: /sys/devices/system/cpu/cpuN/dscr
+
+ Changing the global DSCR default in the sysfs will change all the CPU
+ specific DSCR defaults immediately in their PACA structures. Again if
+ the current process has the dscr_inherit clear, it also writes the new
+ value into every CPU's DSCR register right away and updates the current
+ thread's DSCR value as well.
+
+ Changing the CPU specific DSCR default value in the sysfs does exactly
+ the same thing as above but unlike the global one above, it just changes
+ stuff for that particular CPU instead for all the CPUs on the system.
+
+(D) User Space Instructions:
+
+ The DSCR register can be accessed in the user space using any of these
+ two SPR numbers available for that purpose.
+
+ (1) Problem state SPR: 0x03 (Un-privileged, POWER8 only)
+ (2) Privileged state SPR: 0x11 (Privileged)
+
+ Accessing DSCR through privileged SPR number (0x11) from user space
+ works, as it is emulated following an illegal instruction exception
+ inside the kernel. Both mfspr and mtspr instructions are emulated.
+
+ Accessing DSCR through user level SPR (0x03) from user space will first
+ create a facility unavailable exception. Inside this exception handler
+ all mfspr instruction based read attempts will get emulated and returned
+ where as the first mtspr instruction based write attempts will enable
+ the DSCR facility for the next time around (both for read and write) by
+ setting DSCR facility in the FSCR register.
+
+(E) Specifics about 'dscr_inherit':
+
+ The thread struct element 'dscr_inherit' represents whether the thread
+ in question has attempted and changed the DSCR itself using any of the
+ following methods. This element signifies whether the thread wants to
+ use the CPU default DSCR value or its own changed DSCR value in the
+ kernel.
+
+ (1) mtspr instruction (SPR number 0x03)
+ (2) mtspr instruction (SPR number 0x11)
+ (3) ptrace interface (Explicitly set user DSCR value)
+
+ Any child of the process created after this event in the process inherits
+ this same behaviour as well.
diff --git a/Documentation/arch/powerpc/eeh-pci-error-recovery.rst b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst
new file mode 100644
index 000000000000..153d0af055b6
--- /dev/null
+++ b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst
@@ -0,0 +1,335 @@
+==========================
+PCI Bus EEH Error Recovery
+==========================
+
+Linas Vepstas <linas@austin.ibm.com>
+
+12 January 2005
+
+
+Overview:
+---------
+The IBM POWER-based pSeries and iSeries computers include PCI bus
+controller chips that have extended capabilities for detecting and
+reporting a large variety of PCI bus error conditions. These features
+go under the name of "EEH", for "Enhanced Error Handling". The EEH
+hardware features allow PCI bus errors to be cleared and a PCI
+card to be "rebooted", without also having to reboot the operating
+system.
+
+This is in contrast to traditional PCI error handling, where the
+PCI chip is wired directly to the CPU, and an error would cause
+a CPU machine-check/check-stop condition, halting the CPU entirely.
+Another "traditional" technique is to ignore such errors, which
+can lead to data corruption, both of user data or of kernel data,
+hung/unresponsive adapters, or system crashes/lockups. Thus,
+the idea behind EEH is that the operating system can become more
+reliable and robust by protecting it from PCI errors, and giving
+the OS the ability to "reboot"/recover individual PCI devices.
+
+Future systems from other vendors, based on the PCI-E specification,
+may contain similar features.
+
+
+Causes of EEH Errors
+--------------------
+EEH was originally designed to guard against hardware failure, such
+as PCI cards dying from heat, humidity, dust, vibration and bad
+electrical connections. The vast majority of EEH errors seen in
+"real life" are due to either poorly seated PCI cards, or,
+unfortunately quite commonly, due to device driver bugs, device firmware
+bugs, and sometimes PCI card hardware bugs.
+
+The most common software bug, is one that causes the device to
+attempt to DMA to a location in system memory that has not been
+reserved for DMA access for that card. This is a powerful feature,
+as it prevents what; otherwise, would have been silent memory
+corruption caused by the bad DMA. A number of device driver
+bugs have been found and fixed in this way over the past few
+years. Other possible causes of EEH errors include data or
+address line parity errors (for example, due to poor electrical
+connectivity due to a poorly seated card), and PCI-X split-completion
+errors (due to software, device firmware, or device PCI hardware bugs).
+The vast majority of "true hardware failures" can be cured by
+physically removing and re-seating the PCI card.
+
+
+Detection and Recovery
+----------------------
+In the following discussion, a generic overview of how to detect
+and recover from EEH errors will be presented. This is followed
+by an overview of how the current implementation in the Linux
+kernel does it. The actual implementation is subject to change,
+and some of the finer points are still being debated. These
+may in turn be swayed if or when other architectures implement
+similar functionality.
+
+When a PCI Host Bridge (PHB, the bus controller connecting the
+PCI bus to the system CPU electronics complex) detects a PCI error
+condition, it will "isolate" the affected PCI card. Isolation
+will block all writes (either to the card from the system, or
+from the card to the system), and it will cause all reads to
+return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
+This value was chosen because it is the same value you would
+get if the device was physically unplugged from the slot.
+This includes access to PCI memory, I/O space, and PCI config
+space. Interrupts; however, will continue to be delivered.
+
+Detection and recovery are performed with the aid of ppc64
+firmware. The programming interfaces in the Linux kernel
+into the firmware are referred to as RTAS (Run-Time Abstraction
+Services). The Linux kernel does not (should not) access
+the EEH function in the PCI chipsets directly, primarily because
+there are a number of different chipsets out there, each with
+different interfaces and quirks. The firmware provides a
+uniform abstraction layer that will work with all pSeries
+and iSeries hardware (and be forwards-compatible).
+
+If the OS or device driver suspects that a PCI slot has been
+EEH-isolated, there is a firmware call it can make to determine if
+this is the case. If so, then the device driver should put itself
+into a consistent state (given that it won't be able to complete any
+pending work) and start recovery of the card. Recovery normally
+would consist of resetting the PCI device (holding the PCI #RST
+line high for two seconds), followed by setting up the device
+config space (the base address registers (BAR's), latency timer,
+cache line size, interrupt line, and so on). This is followed by a
+reinitialization of the device driver. In a worst-case scenario,
+the power to the card can be toggled, at least on hot-plug-capable
+slots. In principle, layers far above the device driver probably
+do not need to know that the PCI card has been "rebooted" in this
+way; ideally, there should be at most a pause in Ethernet/disk/USB
+I/O while the card is being reset.
+
+If the card cannot be recovered after three or four resets, the
+kernel/device driver should assume the worst-case scenario, that the
+card has died completely, and report this error to the sysadmin.
+In addition, error messages are reported through RTAS and also through
+syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
+The correct way to deal with failed adapters is to use the standard
+PCI hotplug tools to remove and replace the dead card.
+
+
+Current PPC64 Linux EEH Implementation
+--------------------------------------
+At this time, a generic EEH recovery mechanism has been implemented,
+so that individual device drivers do not need to be modified to support
+EEH recovery. This generic mechanism piggy-backs on the PCI hotplug
+infrastructure, and percolates events up through the userspace/udev
+infrastructure. Following is a detailed description of how this is
+accomplished.
+
+EEH must be enabled in the PHB's very early during the boot process,
+and if a PCI slot is hot-plugged. The former is performed by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
+drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
+EEH must be enabled before a PCI scan of the device can proceed.
+Current Power5 hardware will not work unless EEH is enabled;
+although older Power4 can run with it disabled. Effectively,
+EEH can no longer be turned off. PCI devices *must* be
+registered with the EEH code; the EEH code needs to know about
+the I/O address ranges of the PCI device in order to detect an
+error. Given an arbitrary address, the routine
+pci_get_device_by_addr() will find the pci device associated
+with that address (if any).
+
+The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
+etc. include a check to see if the i/o read returned all-0xff's.
+If so, these make a call to eeh_dn_check_failure(), which in turn
+asks the firmware if the all-ff's value is the sign of a true EEH
+error. If it is not, processing continues as normal. The grand
+total number of these false alarms or "false positives" can be
+seen in /proc/ppc64/eeh (subject to change). Normally, almost
+all of these occur during boot, when the PCI bus is scanned, where
+a large number of 0xff reads are part of the bus scan procedure.
+
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages). This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
+beforehand.
+
+Next, it uses the Linux kernel notifier chain/work queue mechanism to
+allow any interested parties to find out about the failure. Device
+drivers, or other parts of the kernel, can use
+`eeh_register_notifier(struct notifier_block *)` to find out about EEH
+events. The event will include a pointer to the pci device, the
+device node and some state info. Receivers of the event can "do as
+they wish"; the default handler will be described further in this
+section.
+
+To assist in the recovery of the device, eeh.c exports the
+following functions:
+
+rtas_set_slot_reset()
+ assert the PCI #RST line for 1/8th of a second
+rtas_configure_bridge()
+ ask firmware to configure any PCI bridges
+ located topologically under the pci slot.
+eeh_save_bars() and eeh_restore_bars():
+ save and restore the PCI
+ config-space info for a device and any devices under it.
+
+
+A handler for the EEH notifier_block events is implemented in
+drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
+It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
+This last call causes the device driver for the card to be stopped,
+which causes uevents to go out to user space. This triggers
+user-space scripts that might issue commands such as "ifdown eth0"
+for ethernet cards, and so on. This handler then sleeps for 5 seconds,
+hoping to give the user-space scripts enough time to complete.
+It then resets the PCI card, reconfigures the device BAR's, and
+any bridges underneath. It then calls rpaphp_enable_pci_slot(),
+which restarts the device driver and triggers more user-space
+events (for example, calling "ifup eth0" for ethernet cards).
+
+
+Device Shutdown and User-Space Events
+-------------------------------------
+This section documents what happens when a pci slot is unconfigured,
+focusing on how the device driver gets shut down, and on how the
+events get delivered to user-space scripts.
+
+Following is an example sequence of events that cause a device driver
+close function to be called during the first phase of an EEH reset.
+The following sequence is an example of the pcnet32 device driver::
+
+ rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c
+ {
+ calls
+ pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
+ {
+ calls
+ pci_destroy_dev (struct pci_dev *)
+ {
+ calls
+ device_unregister (&dev->dev) // in /drivers/base/core.c
+ {
+ calls
+ device_del (struct device *)
+ {
+ calls
+ bus_remove_device() // in /drivers/base/bus.c
+ {
+ calls
+ device_release_driver()
+ {
+ calls
+ struct device_driver->remove() which is just
+ pci_device_remove() // in /drivers/pci/pci_driver.c
+ {
+ calls
+ struct pci_driver->remove() which is just
+ pcnet32_remove_one() // in /drivers/net/pcnet32.c
+ {
+ calls
+ unregister_netdev() // in /net/core/dev.c
+ {
+ calls
+ dev_close() // in /net/core/dev.c
+ {
+ calls dev->stop();
+ which is just pcnet32_close() // in pcnet32.c
+ {
+ which does what you wanted
+ to stop the device
+ }
+ }
+ }
+ which
+ frees pcnet32 device driver memory
+ }
+ }}}}}}
+
+
+in drivers/pci/pci_driver.c,
+struct device_driver->remove() is just pci_device_remove()
+which calls struct pci_driver->remove() which is pcnet32_remove_one()
+which calls unregister_netdev() (in net/core/dev.c)
+which calls dev_close() (in net/core/dev.c)
+which calls dev->stop() which is pcnet32_close()
+which then does the appropriate shutdown.
+
+---
+
+Following is the analogous stack trace for events sent to user-space
+when the pci device is unconfigured::
+
+ rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c
+ calls
+ pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
+ calls
+ pci_destroy_dev (struct pci_dev *) {
+ calls
+ device_unregister (&dev->dev) { // in /drivers/base/core.c
+ calls
+ device_del(struct device * dev) { // in /drivers/base/core.c
+ calls
+ kobject_del() { //in /libs/kobject.c
+ calls
+ kobject_uevent() { // in /libs/kobject.c
+ calls
+ kset_uevent() { // in /lib/kobject.c
+ calls
+ kset->uevent_ops->uevent() // which is really just
+ a call to
+ dev_uevent() { // in /drivers/base/core.c
+ calls
+ dev->bus->uevent() which is really just a call to
+ pci_uevent () { // in drivers/pci/hotplug.c
+ which prints device name, etc....
+ }
+ }
+ then kobject_uevent() sends a netlink uevent to userspace
+ --> userspace uevent
+ (during early boot, nobody listens to netlink events and
+ kobject_uevent() executes uevent_helper[], which runs the
+ event process /sbin/hotplug)
+ }
+ }
+ kobject_del() then calls sysfs_remove_dir(), which would
+ trigger any user-space daemon that was watching /sysfs,
+ and notice the delete event.
+
+
+Pro's and Con's of the Current Design
+-------------------------------------
+There are several issues with the current EEH software recovery design,
+which may be addressed in future revisions. But first, note that the
+big plus of the current design is that no changes need to be made to
+individual device drivers, so that the current design throws a wide net.
+The biggest negative of the design is that it potentially disturbs
+network daemons and file systems that didn't need to be disturbed.
+
+- A minor complaint is that resetting the network card causes
+ user-space back-to-back ifdown/ifup burps that potentially disturb
+ network daemons, that didn't need to even know that the pci
+ card was being rebooted.
+
+- A more serious concern is that the same reset, for SCSI devices,
+ causes havoc to mounted file systems. Scripts cannot post-facto
+ unmount a file system without flushing pending buffers, but this
+ is impossible, because I/O has already been stopped. Thus,
+ ideally, the reset should happen at or below the block layer,
+ so that the file systems are not disturbed.
+
+ Ext3fs seems to be tolerant, retrying reads/writes until it does
+ succeed. Both have been only lightly tested in this scenario.
+
+ The SCSI-generic subsystem already has built-in code for performing
+ SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
+ (HBA) resets. These are cascaded into a chain of attempted
+ resets if a SCSI command fails. These are completely hidden
+ from the block layer. It would be very natural to add an EEH
+ reset into this chain of events.
+
+- If a SCSI error occurs for the root device, all is lost unless
+ the sysadmin had the foresight to run /bin, /sbin, /etc, /var
+ and so on, out of ramdisk/tmpfs.
+
+
+Conclusions
+-----------
+There's forward progress ...
diff --git a/Documentation/arch/powerpc/elf_hwcaps.rst b/Documentation/arch/powerpc/elf_hwcaps.rst
new file mode 100644
index 000000000000..fce7489877b5
--- /dev/null
+++ b/Documentation/arch/powerpc/elf_hwcaps.rst
@@ -0,0 +1,232 @@
+.. _elf_hwcaps_powerpc:
+
+==================
+POWERPC ELF HWCAPs
+==================
+
+This document describes the usage and semantics of the powerpc ELF HWCAPs.
+
+
+1. Introduction
+---------------
+
+Some hardware or software features are only available on some CPU
+implementations, and/or with certain kernel configurations, but have no other
+discovery mechanism available to userspace code. The kernel exposes the
+presence of these features to userspace through a set of flags called HWCAPs,
+exposed in the auxiliary vector.
+
+Userspace software can test for features by acquiring the AT_HWCAP or
+AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
+flags are set, e.g.::
+
+ bool floating_point_is_present(void)
+ {
+ unsigned long HWCAPs = getauxval(AT_HWCAP);
+ if (HWCAPs & PPC_FEATURE_HAS_FPU)
+ return true;
+
+ return false;
+ }
+
+Where software relies on a feature described by a HWCAP, it should check the
+relevant HWCAP flag to verify that the feature is present before attempting to
+make use of the feature.
+
+HWCAP is the preferred method to test for the presence of a feature rather
+than probing through other means, which may not be reliable or may cause
+unpredictable behaviour.
+
+Software that targets a particular platform does not necessarily have to
+test for required or implied features. For example if the program requires
+FPU, VMX, VSX, it is not necessary to test those HWCAPs, and it may be
+impossible to do so if the compiler generates code requiring those features.
+
+2. Facilities
+-------------
+
+The Power ISA uses the term "facility" to describe a class of instructions,
+registers, interrupts, etc. The presence or absence of a facility indicates
+whether this class is available to be used, but the specifics depend on the
+ISA version. For example, if the VSX facility is available, the VSX
+instructions that can be used differ between the v3.0B and v3.1B ISA
+versions.
+
+3. Categories
+-------------
+
+The Power ISA before v3.0 uses the term "category" to describe certain
+classes of instructions and operating modes which may be optional or
+mutually exclusive, the exact meaning of the HWCAP flag may depend on
+context, e.g., the presence of the BOOKE feature implies that the server
+category is not implemented.
+
+4. HWCAP allocation
+-------------------
+
+HWCAPs are allocated as described in Power Architecture 64-Bit ELF V2 ABI
+Specification (which will be reflected in the kernel's uapi headers).
+
+5. The HWCAPs exposed in AT_HWCAP
+---------------------------------
+
+PPC_FEATURE_32
+ 32-bit CPU
+
+PPC_FEATURE_64
+ 64-bit CPU (userspace may be running in 32-bit mode).
+
+PPC_FEATURE_601_INSTR
+ The processor is PowerPC 601.
+ Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601")
+
+PPC_FEATURE_HAS_ALTIVEC
+ Vector (aka Altivec, VMX) facility is available.
+
+PPC_FEATURE_HAS_FPU
+ Floating point facility is available.
+
+PPC_FEATURE_HAS_MMU
+ Memory management unit is present and enabled.
+
+PPC_FEATURE_HAS_4xxMAC
+ The processor is 40x or 44x family.
+ Unused in the kernel since 732b32daef80 ("powerpc: Remove core support for 40x")
+
+PPC_FEATURE_UNIFIED_CACHE
+ The processor has a unified L1 cache for instructions and data, as
+ found in NXP e200.
+ Unused in the kernel since 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)")
+
+PPC_FEATURE_HAS_SPE
+ Signal Processing Engine facility is available.
+
+PPC_FEATURE_HAS_EFP_SINGLE
+ Embedded Floating Point single precision operations are available.
+
+PPC_FEATURE_HAS_EFP_DOUBLE
+ Embedded Floating Point double precision operations are available.
+
+PPC_FEATURE_NO_TB
+ The timebase facility (mftb instruction) is not available.
+ This is a 601 specific HWCAP, so if it is known that the processor
+ running is not a 601, via other HWCAPs or other means, it is not
+ required to test this bit before using the timebase.
+ Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601")
+
+PPC_FEATURE_POWER4
+ The processor is POWER4 or PPC970/FX/MP.
+ POWER4 support dropped from the kernel since 471d7ff8b51b ("powerpc/64s: Remove POWER4 support")
+
+PPC_FEATURE_POWER5
+ The processor is POWER5.
+
+PPC_FEATURE_POWER5_PLUS
+ The processor is POWER5+.
+
+PPC_FEATURE_CELL
+ The processor is Cell.
+
+PPC_FEATURE_BOOKE
+ The processor implements the embedded category ("BookE") architecture.
+
+PPC_FEATURE_SMT
+ The processor implements SMT.
+
+PPC_FEATURE_ICACHE_SNOOP
+ The processor icache is coherent with the dcache, and instruction storage
+ can be made consistent with data storage for the purpose of executing
+ instructions with the sequence (as described in, e.g., POWER9 Processor
+ User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi))::
+
+ sync
+ icbi (to any address)
+ isync
+
+PPC_FEATURE_ARCH_2_05
+ The processor supports the v2.05 userlevel architecture. Processors
+ supporting later architectures DO NOT set this feature.
+
+PPC_FEATURE_PA6T
+ The processor is PA6T.
+
+PPC_FEATURE_HAS_DFP
+ DFP facility is available.
+
+PPC_FEATURE_POWER6_EXT
+ The processor is POWER6.
+
+PPC_FEATURE_ARCH_2_06
+ The processor supports the v2.06 userlevel architecture. Processors
+ supporting later architectures also set this feature.
+
+PPC_FEATURE_HAS_VSX
+ VSX facility is available.
+
+PPC_FEATURE_PSERIES_PERFMON_COMPAT
+ The processor supports architected PMU events in the range 0xE0-0xFF.
+
+PPC_FEATURE_TRUE_LE
+ The processor supports true little-endian mode.
+
+PPC_FEATURE_PPC_LE
+ The processor supports "PowerPC Little-Endian", that uses address
+ munging to make storage access appear to be little-endian, but the
+ data is stored in a different format that is unsuitable to be
+ accessed by other agents not running in this mode.
+
+6. The HWCAPs exposed in AT_HWCAP2
+----------------------------------
+
+PPC_FEATURE2_ARCH_2_07
+ The processor supports the v2.07 userlevel architecture. Processors
+ supporting later architectures also set this feature.
+
+PPC_FEATURE2_HTM
+ Transactional Memory feature is available.
+
+PPC_FEATURE2_DSCR
+ DSCR facility is available.
+
+PPC_FEATURE2_EBB
+ EBB facility is available.
+
+PPC_FEATURE2_ISEL
+ isel instruction is available. This is superseded by ARCH_2_07 and
+ later.
+
+PPC_FEATURE2_TAR
+ TAR facility is available.
+
+PPC_FEATURE2_VEC_CRYPTO
+ v2.07 crypto instructions are available.
+
+PPC_FEATURE2_HTM_NOSC
+ System calls fail if called in a transactional state, see
+ Documentation/arch/powerpc/syscall64-abi.rst
+
+PPC_FEATURE2_ARCH_3_00
+ The processor supports the v3.0B / v3.0C userlevel architecture. Processors
+ supporting later architectures also set this feature.
+
+PPC_FEATURE2_HAS_IEEE128
+ IEEE 128-bit binary floating point is supported with VSX
+ quad-precision instructions and data types.
+
+PPC_FEATURE2_DARN
+ darn instruction is available.
+
+PPC_FEATURE2_SCV
+ The scv 0 instruction may be used for system calls, see
+ Documentation/arch/powerpc/syscall64-abi.rst.
+
+PPC_FEATURE2_HTM_NO_SUSPEND
+ A limited Transactional Memory facility that does not support suspend is
+ available, see Documentation/arch/powerpc/transactional_memory.rst.
+
+PPC_FEATURE2_ARCH_3_1
+ The processor supports the v3.1 userlevel architecture. Processors
+ supporting later architectures also set this feature.
+
+PPC_FEATURE2_MMA
+ MMA facility is available.
diff --git a/Documentation/arch/powerpc/elfnote.rst b/Documentation/arch/powerpc/elfnote.rst
new file mode 100644
index 000000000000..3ec8d61e9a33
--- /dev/null
+++ b/Documentation/arch/powerpc/elfnote.rst
@@ -0,0 +1,41 @@
+==========================
+ELF Note PowerPC Namespace
+==========================
+
+The PowerPC namespace in an ELF Note of the kernel binary is used to store
+capabilities and information which can be used by a bootloader or userland.
+
+Types and Descriptors
+---------------------
+
+The types to be used with the "PowerPC" namespace are defined in [#f1]_.
+
+ 1) PPC_ELFNOTE_CAPABILITIES
+
+Define the capabilities supported/required by the kernel. This type uses a
+bitmap as "descriptor" field. Each bit is described below:
+
+- Ultravisor-capable bit (PowerNV only).
+
+.. code-block:: c
+
+ #define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+
+Indicate that the powerpc kernel binary knows how to run in an
+ultravisor-enabled system.
+
+In an ultravisor-enabled system, some machine resources are now controlled
+by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+being run on a machine with ultravisor, the kernel will probably crash
+trying to access ultravisor resources. For instance, it may crash in early
+boot trying to set the partition table entry 0.
+
+In an ultravisor-enabled system, a bootloader could warn the user or prevent
+the kernel from being run if the PowerPC ultravisor capability doesn't exist
+or the Ultravisor-capable bit is not set.
+
+References
+----------
+
+.. [#f1] arch/powerpc/include/asm/elfnote.h
+
diff --git a/Documentation/arch/powerpc/features.rst b/Documentation/arch/powerpc/features.rst
new file mode 100644
index 000000000000..ee4b95e04202
--- /dev/null
+++ b/Documentation/arch/powerpc/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features powerpc
diff --git a/Documentation/arch/powerpc/firmware-assisted-dump.rst b/Documentation/arch/powerpc/firmware-assisted-dump.rst
new file mode 100644
index 000000000000..7e266e749cd5
--- /dev/null
+++ b/Documentation/arch/powerpc/firmware-assisted-dump.rst
@@ -0,0 +1,396 @@
+======================
+Firmware-Assisted Dump
+======================
+
+July 2011
+
+The goal of firmware-assisted dump is to enable the dump of
+a crashed system, and to do so from a fully-reset system, and
+to minimize the total elapsed time until the system is back
+in production use.
+
+- Firmware-Assisted Dump (FADump) infrastructure is intended to replace
+ the existing phyp assisted dump.
+- Fadump uses the same firmware interfaces and memory reservation model
+ as phyp assisted dump.
+- Unlike phyp dump, FADump exports the memory dump through /proc/vmcore
+ in the ELF format in the same way as kdump. This helps us reuse the
+ kdump infrastructure for dump capture and filtering.
+- Unlike phyp dump, userspace tool does not need to refer any sysfs
+ interface while reading /proc/vmcore.
+- Unlike phyp dump, FADump allows user to release all the memory reserved
+ for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
+- Once enabled through kernel boot parameter, FADump can be
+ started/stopped through /sys/kernel/fadump_registered interface (see
+ sysfs files section below) and can be easily integrated with kdump
+ service start/stop init scripts.
+
+Comparing with kdump or other strategies, firmware-assisted
+dump offers several strong, practical advantages:
+
+- Unlike kdump, the system has been reset, and loaded
+ with a fresh copy of the kernel. In particular,
+ PCI and I/O devices have been reinitialized and are
+ in a clean, consistent state.
+- Once the dump is copied out, the memory that held the dump
+ is immediately available to the running kernel. And therefore,
+ unlike kdump, FADump doesn't need a 2nd reboot to get back
+ the system to the production configuration.
+
+The above can only be accomplished by coordination with,
+and assistance from the Power firmware. The procedure is
+as follows:
+
+- The first kernel registers the sections of memory with the
+ Power firmware for dump preservation during OS initialization.
+ These registered sections of memory are reserved by the first
+ kernel during early boot.
+
+- When system crashes, the Power firmware will copy the registered
+ low memory regions (boot memory) from source to destination area.
+ It will also save hardware PTE's.
+
+ NOTE:
+ The term 'boot memory' means size of the low memory chunk
+ that is required for a kernel to boot successfully when
+ booted with restricted memory. By default, the boot memory
+ size will be the larger of 5% of system RAM or 256MB.
+ Alternatively, user can also specify boot memory size
+ through boot parameter 'crashkernel=' which will override
+ the default calculated size. Use this option if default
+ boot memory size is not sufficient for second kernel to
+ boot successfully. For syntax of crashkernel= parameter,
+ refer to Documentation/admin-guide/kdump/kdump.rst. If any
+ offset is provided in crashkernel= parameter, it will be
+ ignored as FADump uses a predefined offset to reserve memory
+ for boot memory dump preservation in case of a crash.
+
+- After the low memory (boot memory) area has been saved, the
+ firmware will reset PCI and other hardware state. It will
+ *not* clear the RAM. It will then launch the bootloader, as
+ normal.
+
+- The freshly booted kernel will notice that there is a new node
+ (rtas/ibm,kernel-dump on pSeries or ibm,opal/dump/mpipl-boot
+ on OPAL platform) in the device tree, indicating that
+ there is crash data available from a previous boot. During
+ the early boot OS will reserve rest of the memory above
+ boot memory size effectively booting with restricted memory
+ size. This will make sure that this kernel (also, referred
+ to as second kernel or capture kernel) will not touch any
+ of the dump memory area.
+
+- User-space tools will read /proc/vmcore to obtain the contents
+ of memory, which holds the previous crashed kernel dump in ELF
+ format. The userspace tools may copy this info to disk, or
+ network, nas, san, iscsi, etc. as desired.
+
+- Once the userspace tool is done saving dump, it will echo
+ '1' to /sys/kernel/fadump_release_mem to release the reserved
+ memory back to general use, except the memory required for
+ next firmware-assisted dump registration.
+
+ e.g.::
+
+ # echo 1 > /sys/kernel/fadump_release_mem
+
+Please note that the firmware-assisted dump feature
+is only available on POWER6 and above systems on pSeries
+(PowerVM) platform and POWER9 and above systems with OP940
+or later firmware versions on PowerNV (OPAL) platform.
+Note that, OPAL firmware exports ibm,opal/dump node when
+FADump is supported on PowerNV platform.
+
+On OPAL based machines, system first boots into an intermittent
+kernel (referred to as petitboot kernel) before booting into the
+capture kernel. This kernel would have minimal kernel and/or
+userspace support to process crash data. Such kernel needs to
+preserve previously crash'ed kernel's memory for the subsequent
+capture kernel boot to process this crash data. Kernel config
+option CONFIG_PRESERVE_FA_DUMP has to be enabled on such kernel
+to ensure that crash data is preserved to process later.
+
+-- On OPAL based machines (PowerNV), if the kernel is build with
+ CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also
+ exported as /sys/firmware/opal/mpipl/core file. This procfs file is
+ helpful in debugging OPAL crashes with GDB. The kernel memory
+ used for exporting this procfs file can be released by echo'ing
+ '1' to /sys/firmware/opal/mpipl/release_core node.
+
+ e.g.
+ # echo 1 > /sys/firmware/opal/mpipl/release_core
+
+-- Support for Additional Kernel Arguments in Fadump
+ Fadump has a feature that allows passing additional kernel arguments
+ to the fadump kernel. This feature was primarily designed to disable
+ kernel functionalities that are not required for the fadump kernel
+ and to reduce its memory footprint while collecting the dump.
+
+ Command to Add Additional Kernel Parameters to Fadump:
+ e.g.
+ # echo "nr_cpus=16" > /sys/kernel/fadump/bootargs_append
+
+ The above command is sufficient to add additional arguments to fadump.
+ An explicit service restart is not required.
+
+ Command to Retrieve the Additional Fadump Arguments:
+ e.g.
+ # cat /sys/kernel/fadump/bootargs_append
+
+Note: Additional kernel arguments for fadump with HASH MMU is only
+ supported if the RMA size is greater than 768 MB. If the RMA
+ size is less than 768 MB, the kernel does not export the
+ /sys/kernel/fadump/bootargs_append sysfs node.
+
+Implementation details:
+-----------------------
+
+During boot, a check is made to see if firmware supports
+this feature on that particular machine. If it does, then
+we check to see if an active dump is waiting for us. If yes
+then everything but boot memory size of RAM is reserved during
+early boot (See Fig. 2). This area is released once we finish
+collecting the dump from user land scripts (e.g. kdump scripts)
+that are run. If there is dump data, then the
+/sys/kernel/fadump_release_mem file is created, and the reserved
+memory is held.
+
+If there is no waiting dump data, then only the memory required to
+hold CPU state, HPTE region, boot memory dump, and FADump header is
+usually reserved at an offset greater than boot memory size (see Fig. 1).
+This area is *not* released: this region will be kept permanently
+reserved, so that it can act as a receptacle for a copy of the boot
+memory content in addition to CPU state and HPTE region, in the case
+a crash does occur.
+
+Since this reserved memory area is used only after the system crash,
+there is no point in blocking this significant chunk of memory from
+production kernel. Hence, the implementation uses the Linux kernel's
+Contiguous Memory Allocator (CMA) for memory reservation if CMA is
+configured for kernel. With CMA reservation this memory will be
+available for applications to use it, while kernel is prevented from
+using it. With this FADump will still be able to capture all of the
+kernel memory and most of the user space memory except the user pages
+that were present in CMA region::
+
+ o Memory Reservation during first kernel
+
+ Low memory Top of memory
+ 0 boot memory size |<------ Reserved dump area ----->| |
+ | | | Permanent Reservation | |
+ V V | | V
+ +-----------+-----/ /---+---+----+-----------+-------+----+-----+
+ | | |///|////| DUMP | HDR |////| |
+ +-----------+-----/ /---+---+----+-----------+-------+----+-----+
+ | ^ ^ ^ ^ ^
+ | | | | | |
+ \ CPU HPTE / | |
+ -------------------------------- | |
+ Boot memory content gets transferred | |
+ to reserved area by firmware at the | |
+ time of crash. | |
+ FADump Header |
+ (meta area) |
+ |
+ |
+ Metadata: This area holds a metadata structure whose
+ address is registered with f/w and retrieved in the
+ second kernel after crash, on platforms that support
+ tags (OPAL). Having such structure with info needed
+ to process the crashdump eases dump capture process.
+
+ Fig. 1
+
+
+ o Memory Reservation during second kernel after crash
+
+ Low memory Top of memory
+ 0 boot memory size |
+ | |<------------ Crash preserved area ------------>|
+ V V |<--- Reserved dump area --->| |
+ +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+
+ | |ELF| | |///|////| DUMP | HDR |/////| |
+ +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+
+ | | | | | |
+ ----- ------------------------------ ---------------
+ \ | |
+ \ | |
+ \ | |
+ \ | ----------------------------
+ \ | /
+ \ | /
+ \ | /
+ /proc/vmcore
+
+
+ +---+
+ |///| -> Regions (CPU, HPTE & Metadata) marked like this in the above
+ +---+ figures are not always present. For example, OPAL platform
+ does not have CPU & HPTE regions while Metadata region is
+ not supported on pSeries currently.
+
+ +---+
+ |ELF| -> elfcorehdr, it is created in second kernel after crash.
+ +---+
+
+ Note: Memory from 0 to the boot memory size is used by second kernel
+
+ Fig. 2
+
+
+Currently the dump will be copied from /proc/vmcore to a new file upon
+user intervention. The dump data available through /proc/vmcore will be
+in ELF format. Hence the existing kdump infrastructure (kdump scripts)
+to save the dump works fine with minor modifications. KDump scripts on
+major Distro releases have already been modified to work seamlessly (no
+user intervention in saving the dump) when FADump is used, instead of
+KDump, as dump mechanism.
+
+The tools to examine the dump will be same as the ones
+used for kdump.
+
+How to enable firmware-assisted dump (FADump):
+----------------------------------------------
+
+1. Set config option CONFIG_FA_DUMP=y and build kernel.
+2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+ By default, FADump reserved memory will be initialized as CMA area.
+ Alternatively, user can boot linux kernel with 'fadump=nocma' to
+ prevent FADump to use CMA.
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
+ to specify size of the memory to reserve for boot memory dump
+ preservation.
+
+NOTE:
+ 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ use 'crashkernel=' to specify size of the memory to reserve
+ for boot memory dump preservation.
+ 2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
+ 3. if user wants to capture all of user space memory and ok with
+ reserved memory not available to production system, then
+ 'fadump=nocma' kernel parameter can be used to fallback to
+ old behaviour.
+
+Sysfs/debugfs files:
+--------------------
+
+Firmware-assisted dump feature uses sysfs file system to hold
+the control files and debugfs file to display memory reserved region.
+
+Here is the list of files under kernel sysfs:
+
+ /sys/kernel/fadump_enabled
+ This is used to display the FADump status.
+
+ - 0 = FADump is disabled
+ - 1 = FADump is enabled
+
+ This interface can be used by kdump init scripts to identify if
+ FADump is enabled in the kernel and act accordingly.
+
+ /sys/kernel/fadump_registered
+ This is used to display the FADump registration status as well
+ as to control (start/stop) the FADump registration.
+
+ - 0 = FADump is not registered.
+ - 1 = FADump is registered and ready to handle system crash.
+
+ To register FADump echo 1 > /sys/kernel/fadump_registered and
+ echo 0 > /sys/kernel/fadump_registered for un-register and stop the
+ FADump. Once the FADump is un-registered, the system crash will not
+ be handled and vmcore will not be captured. This interface can be
+ easily integrated with kdump service start/stop.
+
+ /sys/kernel/fadump/mem_reserved
+
+ This is used to display the memory reserved by FADump for saving the
+ crash dump.
+
+ /sys/kernel/fadump_release_mem
+ This file is available only when FADump is active during
+ second kernel. This is used to release the reserved memory
+ region that are held for saving crash dump. To release the
+ reserved memory echo 1 to it::
+
+ echo 1 > /sys/kernel/fadump_release_mem
+
+ After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
+ file will change to reflect the new memory reservations.
+
+ The existing userspace tools (kdump infrastructure) can be easily
+ enhanced to use this interface to release the memory reserved for
+ dump and continue without 2nd reboot.
+
+Note: /sys/kernel/fadump_release_opalcore sysfs has moved to
+ /sys/firmware/opal/mpipl/release_core
+
+ /sys/firmware/opal/mpipl/release_core
+
+ This file is available only on OPAL based machines when FADump is
+ active during capture kernel. This is used to release the memory
+ used by the kernel to export /sys/firmware/opal/mpipl/core file. To
+ release this memory, echo '1' to it:
+
+ echo 1 > /sys/firmware/opal/mpipl/release_core
+
+Note: The following FADump sysfs files are deprecated.
+
++----------------------------------+--------------------------------+
+| Deprecated | Alternative |
++----------------------------------+--------------------------------+
+| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled |
++----------------------------------+--------------------------------+
+| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered |
++----------------------------------+--------------------------------+
+| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem |
++----------------------------------+--------------------------------+
+
+Here is the list of files under powerpc debugfs:
+(Assuming debugfs is mounted on /sys/kernel/debug directory.)
+
+ /sys/kernel/debug/powerpc/fadump_region
+ This file shows the reserved memory regions if FADump is
+ enabled otherwise this file is empty. The output format
+ is::
+
+ <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+
+ and for kernel DUMP region is:
+
+ DUMP: Src: <src-addr>, Dest: <dest-addr>, Size: <size>, Dumped: # bytes
+
+ e.g.
+ Contents when FADump is registered during first kernel::
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+
+ Contents when FADump is active during second kernel::
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+ : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+
+
+NOTE:
+ Please refer to Documentation/filesystems/debugfs.rst on
+ how to mount the debugfs filesystem.
+
+
+TODO:
+-----
+ - Need to come up with the better approach to find out more
+ accurate boot memory size that is required for a kernel to
+ boot successfully when booted with restricted memory.
+
+Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+
+This document is based on the original documentation written for phyp
+
+assisted dump by Linas Vepstas and Manish Ahuja.
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst
new file mode 100644
index 000000000000..fcb4eb6306b1
--- /dev/null
+++ b/Documentation/arch/powerpc/htm.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _htm:
+
+===================================
+HTM (Hardware Trace Macro)
+===================================
+
+Athira Rajeev, 2 Mar 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
+functions, including setup, configuration, control and dumping of the HTM data.
+For using HTM, it is required to setup HTM buffers and HTM operations can
+be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip
+of the system from within a partition itself. To use this feature, a debugfs
+folder called "htmdump" is present under /sys/kernel/debug/powerpc.
+
+
+HTM debugfs example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/kernel/debug/powerpc/htmdump/
+ coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup
+ htmstart htmstatus htmtype nodalchipindex nodeindex trace
+
+Details on each file:
+
+* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for.
+* htmtype: specifies the type of HTM. Supported target is hardwareTarget.
+* trace: is to read the HTM data.
+* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure.
+* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing.
+* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation.
+* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2
+* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip.
+* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc.
+* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer.
+
+To see the system processor configuration details:
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file
+
+The result can be interpreted using hexdump.
+
+To collect HTM traces for a partition represented by nodeindex as
+zero, nodalchipindex as 1 and coreindexonchip as 12
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 )
+
+This requires a CEC reboot to get the HTM buffers allocated.
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 0 > nodeindex
+ # echo 1 > nodalchipindex
+ # echo 12 > coreindexonchip
+ # echo 1 > htmflags # to set noWrap for HTM buffers
+ # echo 1 > htmconfigure # Configure the HTM
+ # echo 1 > htmstart # Start the HTM
+ # echo 0 > htmstart # Stop the HTM
+ # echo 0 > htmconfigure # Deconfigure the HTM
+ # cat htmstatus # Dump the status of HTM entries as data
+
+Above will set the htmtype and core details, followed by executing respective HTM operation.
+
+Read the HTM trace data
+========================
+
+After starting the trace collection, run the workload
+of interest. Stop the trace collection after required period
+of time, and read the trace file.
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file
+
+This trace file will contain the relevant instruction traces
+collected during the workload execution. And can be used as
+input file for trace decoders to understand data.
+
+Benefits of using HTM debugfs interface
+=======================================
+
+It is now possible to collect traces for a particular core/chip
+from within any partition of the system and decode it. Through
+this enablement, a small partition can be dedicated to collect the
+trace data and analyze to provide important information for Performance
+analysis, Software tuning, or Hardware debug.
diff --git a/Documentation/arch/powerpc/hvcs.rst b/Documentation/arch/powerpc/hvcs.rst
new file mode 100644
index 000000000000..6808acde672f
--- /dev/null
+++ b/Documentation/arch/powerpc/hvcs.rst
@@ -0,0 +1,581 @@
+===============================================================
+HVCS IBM "Hypervisor Virtual Console Server" Installation Guide
+===============================================================
+
+for Linux Kernel 2.6.4+
+
+Copyright (C) 2004 IBM Corporation
+
+.. ===========================================================================
+.. NOTE:Eight space tabs are the optimum editor setting for reading this file.
+.. ===========================================================================
+
+
+Author(s): Ryan S. Arnold <rsa@us.ibm.com>
+
+Date Created: March, 02, 2004
+Last Changed: August, 24, 2004
+
+.. Table of contents:
+
+ 1. Driver Introduction:
+ 2. System Requirements
+ 3. Build Options:
+ 3.1 Built-in:
+ 3.2 Module:
+ 4. Installation:
+ 5. Connection:
+ 6. Disconnection:
+ 7. Configuration:
+ 8. Questions & Answers:
+ 9. Reporting Bugs:
+
+1. Driver Introduction:
+=======================
+
+This is the device driver for the IBM Hypervisor Virtual Console Server,
+"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user
+space applications access to the system consoles of logically partitioned
+operating systems (Linux and AIX) running on the same partitioned Power5
+ppc64 system. Physical hardware consoles per partition are not practical
+on this hardware so system consoles are accessed by this driver using
+firmware interfaces to virtual terminal devices.
+
+2. System Requirements:
+=======================
+
+This device driver was written using 2.6.4 Linux kernel APIs and will only
+build and run on kernels of this version or later.
+
+This driver was written to operate solely on IBM Power5 ppc64 hardware
+though some care was taken to abstract the architecture dependent firmware
+calls from the driver code.
+
+Sysfs must be mounted on the system so that the user can determine which
+major and minor numbers are associated with each vty-server. Directions
+for sysfs mounting are outside the scope of this document.
+
+3. Build Options:
+=================
+
+The hvcs driver registers itself as a tty driver. The tty layer
+dynamically allocates a block of major and minor numbers in a quantity
+requested by the registering driver. The hvcs driver asks the tty layer
+for 64 of these major/minor numbers by default to use for hvcs device node
+entries.
+
+If the default number of device entries is adequate then this driver can be
+built into the kernel. If not, the default can be over-ridden by inserting
+the driver as a module with insmod parameters.
+
+3.1 Built-in:
+-------------
+
+The following menuconfig example demonstrates selecting to build this
+driver into the kernel::
+
+ Device Drivers --->
+ Character devices --->
+ <*> IBM Hypervisor Virtual Console Server Support
+
+Begin the kernel make process.
+
+3.2 Module:
+-----------
+
+The following menuconfig example demonstrates selecting to build this
+driver as a kernel module::
+
+ Device Drivers --->
+ Character devices --->
+ <M> IBM Hypervisor Virtual Console Server Support
+
+The make process will build the following kernel modules:
+
+ - hvcs.ko
+ - hvcserver.ko
+
+To insert the module with the default allocation execute the following
+commands in the order they appear::
+
+ insmod hvcserver.ko
+ insmod hvcs.ko
+
+The hvcserver module contains architecture specific firmware calls and must
+be inserted first, otherwise the hvcs module will not find some of the
+symbols it expects.
+
+To override the default use an insmod parameter as follows (requesting 4
+tty devices as an example)::
+
+ insmod hvcs.ko hvcs_parm_num_devs=4
+
+There is a maximum number of dev entries that can be specified on insmod.
+We think that 1024 is currently a decent maximum number of server adapters
+to allow. This can always be changed by modifying the constant in the
+source file before building.
+
+NOTE: The length of time it takes to insmod the driver seems to be related
+to the number of tty interfaces the registering driver requests.
+
+In order to remove the driver module execute the following command::
+
+ rmmod hvcs.ko
+
+The recommended method for installing hvcs as a module is to use depmod to
+build a current modules.dep file in /lib/modules/`uname -r` and then
+execute::
+
+ modprobe hvcs hvcs_parm_num_devs=4
+
+The modules.dep file indicates that hvcserver.ko needs to be inserted
+before hvcs.ko and modprobe uses this file to smartly insert the modules in
+the proper order.
+
+The following modprobe command is used to remove hvcs and hvcserver in the
+proper order::
+
+ modprobe -r hvcs
+
+4. Installation:
+================
+
+The tty layer creates sysfs entries which contain the major and minor
+numbers allocated for the hvcs driver. The following snippet of "tree"
+output of the sysfs directory shows where these numbers are presented::
+
+ sys/
+ |-- *other sysfs base dirs*
+ |
+ |-- class
+ | |-- *other classes of devices*
+ | |
+ | `-- tty
+ | |-- *other tty devices*
+ | |
+ | |-- hvcs0
+ | | `-- dev
+ | |-- hvcs1
+ | | `-- dev
+ | |-- hvcs2
+ | | `-- dev
+ | |-- hvcs3
+ | | `-- dev
+ | |
+ | |-- *other tty devices*
+ |
+ |-- *other sysfs base dirs*
+
+For the above examples the following output is a result of cat'ing the
+"dev" entry in the hvcs directory::
+
+ Pow5:/sys/class/tty/hvcs0/ # cat dev
+ 254:0
+
+ Pow5:/sys/class/tty/hvcs1/ # cat dev
+ 254:1
+
+ Pow5:/sys/class/tty/hvcs2/ # cat dev
+ 254:2
+
+ Pow5:/sys/class/tty/hvcs3/ # cat dev
+ 254:3
+
+The output from reading the "dev" attribute is the char device major and
+minor numbers that the tty layer has allocated for this driver's use. Most
+systems running hvcs will already have the device entries created or udev
+will do it automatically.
+
+Given the example output above, to manually create a /dev/hvcs* node entry
+mknod can be used as follows::
+
+ mknod /dev/hvcs0 c 254 0
+ mknod /dev/hvcs1 c 254 1
+ mknod /dev/hvcs2 c 254 2
+ mknod /dev/hvcs3 c 254 3
+
+Using mknod to manually create the device entries makes these device nodes
+persistent. Once created they will exist prior to the driver insmod.
+
+Attempting to connect an application to /dev/hvcs* prior to insertion of
+the hvcs module will result in an error message similar to the following::
+
+ "/dev/hvcs*: No such device".
+
+NOTE: Just because there is a device node present doesn't mean that there
+is a vty-server device configured for that node.
+
+5. Connection
+=============
+
+Since this driver controls devices that provide a tty interface a user can
+interact with the device node entries using any standard tty-interactive
+method (e.g. "cat", "dd", "echo"). The intent of this driver however, is
+to provide real time console interaction with a Linux partition's console,
+which requires the use of applications that provide bi-directional,
+interactive I/O with a tty device.
+
+Applications (e.g. "minicom" and "screen") that act as terminal emulators
+or perform terminal type control sequence conversion on the data being
+passed through them are NOT acceptable for providing interactive console
+I/O. These programs often emulate antiquated terminal types (vt100 and
+ANSI) and expect inbound data to take the form of one of these supported
+terminal types but they either do not convert, or do not _adequately_
+convert, outbound data into the terminal type of the terminal which invoked
+them (though screen makes an attempt and can apparently be configured with
+much termcap wrestling.)
+
+For this reason kermit and cu are two of the recommended applications for
+interacting with a Linux console via an hvcs device. These programs simply
+act as a conduit for data transfer to and from the tty device. They do not
+require inbound data to take the form of a particular terminal type, nor do
+they cook outbound data to a particular terminal type.
+
+In order to ensure proper functioning of console applications one must make
+sure that once connected to a /dev/hvcs console that the console's $TERM
+env variable is set to the exact terminal type of the terminal emulator
+used to launch the interactive I/O application. If one is using xterm and
+kermit to connect to /dev/hvcs0 when the console prompt becomes available
+one should "export TERM=xterm" on the console. This tells ncurses
+applications that are invoked from the console that they should output
+control sequences that xterm can understand.
+
+As a precautionary measure an hvcs user should always "exit" from their
+session before disconnecting an application such as kermit from the device
+node. If this is not done, the next user to connect to the console will
+continue using the previous user's logged in session which includes
+using the $TERM variable that the previous user supplied.
+
+Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
+is used to connect to each vty-server adapter. In order to determine which
+vty-server adapter is associated with which /dev/hvcs* node a special sysfs
+attribute has been added to each vty-server sysfs entry. This entry is
+called "index" and showing it reveals an integer that refers to the
+/dev/hvcs* entry to use to connect to that device. For instance cating the
+index attribute of vty-server adapter 30000004 shows the following::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
+ 2
+
+This index of '2' means that in order to connect to vty-server adapter
+30000004 the user should interact with /dev/hvcs2.
+
+It should be noted that due to the system hotplug I/O capabilities of a
+system the /dev/hvcs* entry that interacts with a particular vty-server
+adapter is not guaranteed to remain the same across system reboots. Look
+in the Q & A section for more on this issue.
+
+6. Disconnection
+================
+
+As a security feature to prevent the delivery of stale data to an
+unintended target the Power5 system firmware disables the fetching of data
+and discards that data when a connection between a vty-server and a vty has
+been severed. As an example, when a vty-server is immediately disconnected
+from a vty following output of data to the vty the vty adapter may not have
+enough time between when it received the data interrupt and when the
+connection was severed to fetch the data from firmware before the fetch is
+disabled by firmware.
+
+When hvcs is being used to serve consoles this behavior is not a huge issue
+because the adapter stays connected for large amounts of time following
+almost all data writes. When hvcs is being used as a tty conduit to tunnel
+data between two partitions [see Q & A below] this is a huge problem
+because the standard Linux behavior when cat'ing or dd'ing data to a device
+is to open the tty, send the data, and then close the tty. If this driver
+manually terminated vty-server connections on tty close this would close
+the vty-server and vty connection before the target vty has had a chance to
+fetch the data.
+
+Additionally, disconnecting a vty-server and vty only on module removal or
+adapter removal is impractical because other vty-servers in other
+partitions may require the usage of the target vty at any time.
+
+Due to this behavioral restriction disconnection of vty-servers from the
+connected vty is a manual procedure using a write to a sysfs attribute
+outlined below, on the other hand the initial vty-server connection to a
+vty is established automatically by this driver. Manual vty-server
+connection is never required.
+
+In order to terminate the connection between a vty-server and vty the
+"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
+Reading this attribute reveals the current connection state of the
+vty-server adapter. A zero means that the vty-server is not connected to a
+vty. A one indicates that a connection is active.
+
+Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
+connection between the vty-server and target vty ONLY if the vterm_state
+previously read '1'. The write directive is ignored if the vterm_state
+read '0' or if any value other than '0' was written to the vterm_state
+attribute. The following example will show the method used for verifying
+the vty-server connection status and disconnecting a vty-server connection::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 1
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 0
+
+All vty-server connections are automatically terminated when the device is
+hotplug removed and when the module is removed.
+
+7. Configuration
+================
+
+Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
+is symlinked in several other sysfs tree directories, notably under the
+hvcs driver entry, which looks like the following example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs # ls
+ . .. 30000003 30000004 rescan
+
+By design, firmware notifies the hvcs driver of vty-server lifetimes and
+partner vty removals but not the addition of partner vtys. Since an HMC
+Super Admin can add partner info dynamically we have provided the hvcs
+driver sysfs directory with the "rescan" update attribute which will query
+firmware and update the partner info for all the vty-servers that this
+driver manages. Writing a '1' to the attribute triggers the update. An
+explicit example follows:
+
+ Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
+
+Reading the attribute will indicate a state of '1' or '0'. A one indicates
+that an update is in process. A zero indicates that an update has
+completed or was never executed.
+
+Vty-server entries in this directory are a 32 bit partition unique unit
+address that is created by firmware. An example vty-server sysfs entry
+looks like the following::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
+ . current_vty devspec name partner_vtys
+ .. index partner_clcs vterm_state
+
+Each entry is provided, by default with a "name" attribute. Reading the
+"name" attribute will reveal the device type as shown in the following
+example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
+ vty-server
+
+Each entry is also provided, by default, with a "devspec" attribute which
+reveals the full device specification when read, as shown in the following
+example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
+ /vdevice/vty-server@30000004
+
+Each vty-server sysfs dir is provided with two read-only attributes that
+provide lists of easily parsed partner vty data: "partner_vtys" and
+"partner_clcs"::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
+ 30000000
+ 30000001
+ 30000002
+ 30000000
+ 30000000
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
+ U5112.428.103048A-V3-C0
+ U5112.428.103048A-V3-C2
+ U5112.428.103048A-V3-C3
+ U5112.428.103048A-V4-C0
+ U5112.428.103048A-V5-C0
+
+Reading partner_vtys returns a list of partner vtys. Vty unit address
+numbering is only per-partition-unique so entries will frequently repeat.
+
+Reading partner_clcs returns a list of "converged location codes" which are
+composed of a system serial number followed by "-V*", where the '*' is the
+target partition number, and "-C*", where the '*' is the slot of the
+adapter. The first vty partner corresponds to the first clc item, the
+second vty partner to the second clc item, etc.
+
+A vty-server can only be connected to a single vty at a time. The entry,
+"current_vty" prints the clc of the currently selected partner vty when
+read.
+
+The current_vty can be changed by writing a valid partner clc to the entry
+as in the following example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
+ 8A-V4-C0 > current_vty
+
+Changing the current_vty when a vty-server is already connected to a vty
+does not affect the current connection. The change takes effect when the
+currently open connection is freed.
+
+Information on the "vterm_state" attribute was covered earlier on the
+chapter entitled "disconnection".
+
+8. Questions & Answers:
+=======================
+
+Q: What are the security concerns involving hvcs?
+
+A: There are three main security concerns:
+
+ 1. The creator of the /dev/hvcs* nodes has the ability to restrict
+ the access of the device entries to certain users or groups. It
+ may be best to create a special hvcs group privilege for providing
+ access to system consoles.
+
+ 2. To provide network security when grabbing the console it is
+ suggested that the user connect to the console hosting partition
+ using a secure method, such as SSH or sit at a hardware console.
+
+ 3. Make sure to exit the user session when done with a console or
+ the next vty-server connection (which may be from another
+ partition) will experience the previously logged in session.
+
+---------------------------------------------------------------------------
+
+Q: How do I multiplex a console that I grab through hvcs so that other
+people can see it:
+
+A: You can use "screen" to directly connect to the /dev/hvcs* device and
+setup a session on your machine with the console group privileges. As
+pointed out earlier by default screen doesn't provide the termcap settings
+for most terminal emulators to provide adequate character conversion from
+term type "screen" to others. This means that curses based programs may
+not display properly in screen sessions.
+
+---------------------------------------------------------------------------
+
+Q: Why are the colors all messed up?
+Q: Why are the control characters acting strange or not working?
+Q: Why is the console output all strange and unintelligible?
+
+A: Please see the preceding section on "Connection" for a discussion of how
+applications can affect the display of character control sequences.
+Additionally, just because you logged into the console using and xterm
+doesn't mean someone else didn't log into the console with the HMC console
+(vt320) before you and leave the session logged in. The best thing to do
+is to export TERM to the terminal type of your terminal emulator when you
+get the console. Additionally make sure to "exit" the console before you
+disconnect from the console. This will ensure that the next user gets
+their own TERM type set when they login.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, can't open connection: /dev/hvcs*"What is happening?
+
+A: Some other Power5 console mechanism has a connection to the vty and
+isn't giving it up. You can try to force disconnect the consoles from the
+HMC by right clicking on the partition and then selecting "close terminal".
+Otherwise you have to hunt down the people who have console authority. It
+is possible that you already have the console open using another kermit
+session and just forgot about it. Please review the console options for
+Power5 systems to determine the many ways a system console can be held.
+
+OR
+
+A: Another user may not have a connectivity method currently attached to a
+/dev/hvcs device but the vterm_state may reveal that they still have the
+vty-server connection established. They need to free this using the method
+outlined in the section on "Disconnection" in order for others to connect
+to the target vty.
+
+OR
+
+A: The user profile you are using to execute kermit probably doesn't have
+permissions to use the /dev/hvcs* device.
+
+OR
+
+A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
+entry still exists (on systems without udev).
+
+OR
+
+A: There is not a corresponding vty-server device that maps to an existing
+/dev/hvcs* entry.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, write access to UUCP lockfile directory denied."
+
+A: The /dev/hvcs* entry you have specified doesn't exist where you said it
+does? Maybe you haven't inserted the module (on systems with udev).
+
+---------------------------------------------------------------------------
+
+Q: If I already have one Linux partition installed can I use hvcs on said
+partition to provide the console for the install of a second Linux
+partition?
+
+A: Yes granted that your are connected to the /dev/hvcs* device using
+kermit or cu or some other program that doesn't provide terminal emulation.
+
+---------------------------------------------------------------------------
+
+Q: Can I connect to more than one partition's console at a time using this
+driver?
+
+A: Yes. Of course this means that there must be more than one vty-server
+configured for this partition and each must point to a disconnected vty.
+
+---------------------------------------------------------------------------
+
+Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
+
+A: Yes, if you have dlpar and hotplug enabled for your system and it has
+been built into the kernel the hvcs drivers is configured to dynamically
+handle additions of new devices and removals of unused devices.
+
+---------------------------------------------------------------------------
+
+Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
+after a reboot. What happened?
+
+A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
+in the order that the adapters are exposed. Due to hotplug capabilities of
+this driver assignment of hotplug added vty-servers may be in a different
+order than how they would be exposed on module load. Rebooting or
+reloading the module after dynamic addition may result in the /dev/hvcs*
+and vty-server coupling changing if a vty-server adapter was added in a
+slot between two other vty-server adapters. Refer to the section above
+on how to determine which vty-server goes with which /dev/hvcs* node.
+Hint; look at the sysfs "index" attribute for the vty-server.
+
+---------------------------------------------------------------------------
+
+Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
+device on that partition as the other end of the pipe?
+
+A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
+for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
+In order to get a tty conduit working between the two partitions the HMC
+Super Admin must create an additional "serial server" for the target
+partition with the HMC gui which will show up as /dev/hvc* when the target
+partition is rebooted.
+
+The HMC Super Admin then creates an additional "serial client" for the
+current partition and points this at the target partition's newly created
+"serial server" adapter (remember the slot). This shows up as an
+additional /dev/hvcs* device.
+
+Now a program on the target system can be configured to read or write to
+/dev/hvc* and another program on the current partition can be configured to
+read or write to /dev/hvcs*. Now you have a tty conduit between two
+partitions.
+
+---------------------------------------------------------------------------
+
+9. Reporting Bugs:
+==================
+
+The proper channel for reporting bugs is either through the Linux OS
+distribution company that provided your OS or by posting issues to the
+PowerPC development mailing list at:
+
+linuxppc-dev@lists.ozlabs.org
+
+This request is to provide a documented and searchable public exchange
+of the problems and solutions surrounding this driver for the benefit of
+all users.
diff --git a/Documentation/arch/powerpc/imc.rst b/Documentation/arch/powerpc/imc.rst
new file mode 100644
index 000000000000..633bcee7dc85
--- /dev/null
+++ b/Documentation/arch/powerpc/imc.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _imc:
+
+===================================
+IMC (In-Memory Collection Counters)
+===================================
+
+Anju T Sudhakar, 10 May 2019
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+IMC (In-Memory collection counters) is a hardware monitoring facility that
+collects large numbers of hardware performance events at Nest level (these are
+on-chip but off-core), Core level and Thread level.
+
+The Nest PMU counters are handled by a Nest IMC microcode which runs in the OCC
+(On-Chip Controller) complex. The microcode collects the counter data and moves
+the nest IMC counter data to memory.
+
+The Core and Thread IMC PMU counters are handled in the core. Core level PMU
+counters give us the IMC counters' data per core and thread level PMU counters
+give us the IMC counters' data per CPU thread.
+
+OPAL obtains the IMC PMU and supported events information from the IMC Catalog
+and passes on to the kernel via the device tree. The event's information
+contains:
+
+- Event name
+- Event Offset
+- Event description
+
+and possibly also:
+
+- Event scale
+- Event unit
+
+Some PMUs may have a common scale and unit values for all their supported
+events. For those cases, the scale and unit properties for those events must be
+inherited from the PMU.
+
+The event offset in the memory is where the counter data gets accumulated.
+
+IMC catalog is available at:
+ https://github.com/open-power/ima-catalog
+
+The kernel discovers the IMC counters information in the device tree at the
+`imc-counters` device node which has a compatible field
+`ibm,opal-in-memory-counters`. From the device tree, the kernel parses the PMUs
+and their event's information and register the PMU and its attributes in the
+kernel.
+
+IMC example usage
+=================
+
+.. code-block:: sh
+
+ # perf list
+ [...]
+ nest_mcs01/PM_MCS01_64B_RD_DISP_PORT01/ [Kernel PMU event]
+ nest_mcs01/PM_MCS01_64B_RD_DISP_PORT23/ [Kernel PMU event]
+ [...]
+ core_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event]
+ core_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event]
+ [...]
+ thread_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event]
+ thread_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event]
+
+To see per chip data for nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/:
+
+.. code-block:: sh
+
+ # ./perf stat -e "nest_mcs01/PM_MCS01_64B_WR_DISP_PORT01/" -a --per-socket
+
+To see non-idle instructions for core 0:
+
+.. code-block:: sh
+
+ # ./perf stat -e "core_imc/CPM_NON_IDLE_INST/" -C 0 -I 1000
+
+To see non-idle instructions for a "make":
+
+.. code-block:: sh
+
+ # ./perf stat -e "thread_imc/CPM_NON_IDLE_PCYC/" make
+
+
+IMC Trace-mode
+===============
+
+POWER9 supports two modes for IMC which are the Accumulation mode and Trace
+mode. In Accumulation mode, event counts are accumulated in system Memory.
+Hypervisor then reads the posted counts periodically or when requested. In IMC
+Trace mode, the 64 bit trace SCOM value is initialized with the event
+information. The CPMCxSEL and CPMC_LOAD in the trace SCOM, specifies the event
+to be monitored and the sampling duration. On each overflow in the CPMCxSEL,
+hardware snapshots the program counter along with event counts and writes into
+memory pointed by LDBAR.
+
+LDBAR is a 64 bit special purpose per thread register, it has bits to indicate
+whether hardware is configured for accumulation or trace mode.
+
+LDBAR Register Layout
+---------------------
+
+ +-------+----------------------+
+ | 0 | Enable/Disable |
+ +-------+----------------------+
+ | 1 | 0: Accumulation Mode |
+ | +----------------------+
+ | | 1: Trace Mode |
+ +-------+----------------------+
+ | 2:3 | Reserved |
+ +-------+----------------------+
+ | 4-6 | PB scope |
+ +-------+----------------------+
+ | 7 | Reserved |
+ +-------+----------------------+
+ | 8:50 | Counter Address |
+ +-------+----------------------+
+ | 51:63 | Reserved |
+ +-------+----------------------+
+
+TRACE_IMC_SCOM bit representation
+---------------------------------
+
+ +-------+------------+
+ | 0:1 | SAMPSEL |
+ +-------+------------+
+ | 2:33 | CPMC_LOAD |
+ +-------+------------+
+ | 34:40 | CPMC1SEL |
+ +-------+------------+
+ | 41:47 | CPMC2SEL |
+ +-------+------------+
+ | 48:50 | BUFFERSIZE |
+ +-------+------------+
+ | 51:63 | RESERVED |
+ +-------+------------+
+
+CPMC_LOAD contains the sampling duration. SAMPSEL and CPMCxSEL determines the
+event to count. BUFFERSIZE indicates the memory range. On each overflow,
+hardware snapshots the program counter along with event counts and updates the
+memory and reloads the CMPC_LOAD value for the next sampling duration. IMC
+hardware does not support exceptions, so it quietly wraps around if memory
+buffer reaches the end.
+
+*Currently the event monitored for trace-mode is fixed as cycle.*
+
+Trace IMC example usage
+=======================
+
+.. code-block:: sh
+
+ # perf list
+ [....]
+ trace_imc/trace_cycles/ [Kernel PMU event]
+
+To record an application/process with trace-imc event:
+
+.. code-block:: sh
+
+ # perf record -e trace_imc/trace_cycles/ yes > /dev/null
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.012 MB perf.data (21 samples) ]
+
+The `perf.data` generated, can be read using perf report.
+
+Benefits of using IMC trace-mode
+================================
+
+PMI (Performance Monitoring Interrupts) interrupt handling is avoided, since IMC
+trace mode snapshots the program counter and updates to the memory. And this
+also provide a way for the operating system to do instruction sampling in real
+time without PMI processing overhead.
+
+Performance data using `perf top` with and without trace-imc event.
+
+PMI interrupts count when `perf top` command is executed without trace-imc event.
+
+.. code-block:: sh
+
+ # grep PMI /proc/interrupts
+ PMI: 0 0 0 0 Performance monitoring interrupts
+ # ./perf top
+ ...
+ # grep PMI /proc/interrupts
+ PMI: 39735 8710 17338 17801 Performance monitoring interrupts
+ # ./perf top -e trace_imc/trace_cycles/
+ ...
+ # grep PMI /proc/interrupts
+ PMI: 39735 8710 17338 17801 Performance monitoring interrupts
+
+
+That is, the PMI interrupt counts do not increment when using the `trace_imc` event.
diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst
new file mode 100644
index 000000000000..1be2ee3f0361
--- /dev/null
+++ b/Documentation/arch/powerpc/index.rst
@@ -0,0 +1,49 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+powerpc
+=======
+
+.. toctree::
+ :maxdepth: 1
+
+ associativity
+ booting
+ bootwrapper
+ cpu_families
+ cpu_features
+ dawr-power9
+ dexcr
+ dscr
+ eeh-pci-error-recovery
+ elf_hwcaps
+ elfnote
+ firmware-assisted-dump
+ htm
+ hvcs
+ imc
+ isa-versions
+ kaslr-booke32
+ mpc52xx
+ kvm-nested
+ papr_hcalls
+ pci_iov_resource_on_powernv
+ pmu-ebb
+ ptrace
+ qe_firmware
+ syscall64-abi
+ transactional_memory
+ ultravisor
+ vas-api
+ vcpudispatch_stats
+ vmemmap_dedup
+ vpa-dtl
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/powerpc/isa-versions.rst b/Documentation/arch/powerpc/isa-versions.rst
new file mode 100644
index 000000000000..a8d6b6028b3e
--- /dev/null
+++ b/Documentation/arch/powerpc/isa-versions.rst
@@ -0,0 +1,101 @@
+==========================
+CPU to ISA Version Mapping
+==========================
+
+Mapping of some CPU versions to relevant ISA versions.
+
+Note Power4 and Power4+ are not supported.
+
+========= ====================================================================
+CPU Architecture version
+========= ====================================================================
+Power10 Power ISA v3.1
+Power9 Power ISA v3.0B
+Power8 Power ISA v2.07
+e6500 Power ISA v2.06 with some exceptions
+e5500 Power ISA v2.06 with some exceptions, no Altivec
+Power7 Power ISA v2.06
+Power6 Power ISA v2.05
+PA6T Power ISA v2.04
+Cell PPU - Power ISA v2.02 with some minor exceptions
+ - Plus Altivec/VMX ~= 2.03
+Power5++ Power ISA v2.04 (no VMX)
+Power5+ Power ISA v2.03
+Power5 - PowerPC User Instruction Set Architecture Book I v2.02
+ - PowerPC Virtual Environment Architecture Book II v2.02
+ - PowerPC Operating Environment Architecture Book III v2.02
+PPC970 - PowerPC User Instruction Set Architecture Book I v2.01
+ - PowerPC Virtual Environment Architecture Book II v2.01
+ - PowerPC Operating Environment Architecture Book III v2.01
+ - Plus Altivec/VMX ~= 2.03
+Power4+ - PowerPC User Instruction Set Architecture Book I v2.01
+ - PowerPC Virtual Environment Architecture Book II v2.01
+ - PowerPC Operating Environment Architecture Book III v2.01
+Power4 - PowerPC User Instruction Set Architecture Book I v2.00
+ - PowerPC Virtual Environment Architecture Book II v2.00
+ - PowerPC Operating Environment Architecture Book III v2.00
+========= ====================================================================
+
+
+Key Features
+------------
+
+========== ==================
+CPU VMX (aka. Altivec)
+========== ==================
+Power10 Yes
+Power9 Yes
+Power8 Yes
+e6500 Yes
+e5500 No
+Power7 Yes
+Power6 Yes
+PA6T Yes
+Cell PPU Yes
+Power5++ No
+Power5+ No
+Power5 No
+PPC970 Yes
+Power4+ No
+Power4 No
+========== ==================
+
+========== ====
+CPU VSX
+========== ====
+Power10 Yes
+Power9 Yes
+Power8 Yes
+e6500 No
+e5500 No
+Power7 Yes
+Power6 No
+PA6T No
+Cell PPU No
+Power5++ No
+Power5+ No
+Power5 No
+PPC970 No
+Power4+ No
+Power4 No
+========== ====
+
+========== ====================================
+CPU Transactional Memory
+========== ====================================
+Power10 No (* see Power ISA v3.1, "Appendix A. Notes on the Removal of Transactional Memory from the Architecture")
+Power9 Yes (* see transactional_memory.txt)
+Power8 Yes
+e6500 No
+e5500 No
+Power7 No
+Power6 No
+PA6T No
+Cell PPU No
+Power5++ No
+Power5+ No
+Power5 No
+PPC970 No
+Power4+ No
+Power4 No
+========== ====================================
diff --git a/Documentation/arch/powerpc/kasan.txt b/Documentation/arch/powerpc/kasan.txt
new file mode 100644
index 000000000000..a4f647e4fffa
--- /dev/null
+++ b/Documentation/arch/powerpc/kasan.txt
@@ -0,0 +1,58 @@
+KASAN is supported on powerpc on 32-bit and Radix 64-bit only.
+
+32 bit support
+==============
+
+KASAN is supported on both hash and nohash MMUs on 32-bit.
+
+The shadow area sits at the top of the kernel virtual memory space above the
+fixmap area and occupies one eighth of the total kernel virtual memory space.
+
+Instrumentation of the vmalloc area is optional, unless built with modules,
+in which case it is required.
+
+64 bit support
+==============
+
+Currently, only the radix MMU is supported. There have been versions for hash
+and Book3E processors floating around on the mailing list, but nothing has been
+merged.
+
+KASAN support on Book3S is a bit tricky to get right:
+
+ - It would be good to support inline instrumentation so as to be able to catch
+ stack issues that cannot be caught with outline mode.
+
+ - Inline instrumentation requires a fixed offset.
+
+ - Book3S runs code with translations off ("real mode") during boot, including a
+ lot of generic device-tree parsing code which is used to determine MMU
+ features.
+
+ - Some code - most notably a lot of KVM code - also runs with translations off
+ after boot.
+
+ - Therefore any offset has to point to memory that is valid with
+ translations on or off.
+
+One approach is just to give up on inline instrumentation. This way boot-time
+checks can be delayed until after the MMU is set is up, and we can just not
+instrument any code that runs with translations off after booting. This is the
+current approach.
+
+To avoid this limitation, the KASAN shadow would have to be placed inside the
+linear mapping, using the same high-bits trick we use for the rest of the linear
+mapping. This is tricky:
+
+ - We'd like to place it near the start of physical memory. In theory we can do
+ this at run-time based on how much physical memory we have, but this requires
+ being able to arbitrarily relocate the kernel, which is basically the tricky
+ part of KASLR. Not being game to implement both tricky things at once, this
+ is hopefully something we can revisit once we get KASLR for Book3S.
+
+ - Alternatively, we can place the shadow at the _end_ of memory, but this
+ requires knowing how much contiguous physical memory a system has _at compile
+ time_. This is a big hammer, and has some unfortunate consequences: inablity
+ to handle discontiguous physical memory, total failure to boot on machines
+ with less memory than specified, and that machines with more memory than
+ specified can't use it. This was deemed unacceptable.
diff --git a/Documentation/arch/powerpc/kaslr-booke32.rst b/Documentation/arch/powerpc/kaslr-booke32.rst
new file mode 100644
index 000000000000..5681c1d1b65b
--- /dev/null
+++ b/Documentation/arch/powerpc/kaslr-booke32.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+KASLR for Freescale BookE32
+===========================
+
+The word KASLR stands for Kernel Address Space Layout Randomization.
+
+This document tries to explain the implementation of the KASLR for
+Freescale BookE32. KASLR is a security feature that deters exploit
+attempts relying on knowledge of the location of kernel internals.
+
+Since CONFIG_RELOCATABLE has already supported, what we need to do is
+map or copy kernel to a proper place and relocate. Freescale Book-E
+parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
+entries are not suitable to map the kernel directly in a randomized
+region, so we chose to copy the kernel to a proper place and restart to
+relocate.
+
+Entropy is derived from the banner and timer base, which will change every
+build and boot. This not so much safe so additionally the bootloader may
+pass entropy via the /chosen/kaslr-seed node in device tree.
+
+We will use the first 512M of the low memory to randomize the kernel
+image. The memory will be split in 64M zones. We will use the lower 8
+bit of the entropy to decide the index of the 64M zone. Then we chose a
+16K aligned offset inside the 64M zone to put the kernel in::
+
+ KERNELBASE
+
+ |--> 64M <--|
+ | |
+ +---------------+ +----------------+---------------+
+ | |....| |kernel| | |
+ +---------------+ +----------------+---------------+
+ | |
+ |-----> offset <-----|
+
+ kernstart_virt_addr
+
+To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enabled and you
+want to disable it at runtime, add "nokaslr" to the kernel cmdline.
diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst
new file mode 100644
index 000000000000..574592505604
--- /dev/null
+++ b/Documentation/arch/powerpc/kvm-nested.rst
@@ -0,0 +1,656 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+Nested KVM on POWER
+====================================
+
+Introduction
+============
+
+This document explains how a guest operating system can act as a
+hypervisor and run nested guests through the use of hypercalls, if the
+hypervisor has implemented them. The terms L0, L1, and L2 are used to
+refer to different software entities. L0 is the hypervisor mode entity
+that would normally be called the "host" or "hypervisor". L1 is a
+guest virtual machine that is directly run under L0 and is initiated
+and controlled by L0. L2 is a guest virtual machine that is initiated
+and controlled by L1 acting as a hypervisor.
+
+Existing API
+============
+
+Linux/KVM has had support for Nesting as an L0 or L1 since 2018
+
+The L0 code was added::
+
+ commit 8e3f5fc1045dc49fd175b978c5457f5f51e7a2ce
+ Author: Paul Mackerras <paulus@ozlabs.org>
+ Date: Mon Oct 8 16:31:03 2018 +1100
+ KVM: PPC: Book3S HV: Framework and hcall stubs for nested virtualization
+
+The L1 code was added::
+
+ commit 360cae313702cdd0b90f82c261a8302fecef030a
+ Author: Paul Mackerras <paulus@ozlabs.org>
+ Date: Mon Oct 8 16:31:04 2018 +1100
+ KVM: PPC: Book3S HV: Nested guest entry via hypercall
+
+This API works primarily using a single hcall h_enter_nested(). This
+call made by the L1 to tell the L0 to start an L2 vCPU with the given
+state. The L0 then starts this L2 and runs until an L2 exit condition
+is reached. Once the L2 exits, the state of the L2 is given back to
+the L1 by the L0. The full L2 vCPU state is always transferred from
+and to L1 when the L2 is run. The L0 doesn't keep any state on the L2
+vCPU (except in the short sequence in the L0 on L1 -> L2 entry and L2
+-> L1 exit).
+
+The only state kept by the L0 is the partition table. The L1 registers
+it's partition table using the h_set_partition_table() hcall. All
+other state held by the L0 about the L2s is cached state (such as
+shadow page tables).
+
+The L1 may run any L2 or vCPU without first informing the L0. It
+simply starts the vCPU using h_enter_nested(). The creation of L2s and
+vCPUs is done implicitly whenever h_enter_nested() is called.
+
+In this document, we call this existing API the v1 API.
+
+New PAPR API
+===============
+
+The new PAPR API changes from the v1 API such that the creating L2 and
+associated vCPUs is explicit. In this document, we call this the v2
+API.
+
+h_enter_nested() is replaced with H_GUEST_VCPU_RUN(). Before this can
+be called the L1 must explicitly create the L2 using h_guest_create()
+and any associated vCPUs() created with h_guest_create_vCPU(). Getting
+and setting vCPU state can also be performed using h_guest_{g|s}et
+hcall.
+
+The basic execution flow is for an L1 to create an L2, run it, and
+delete it is:
+
+- L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES()
+ (normally at L1 boot time).
+
+- L1 requests the L0 create an L2 with H_GUEST_CREATE() and receives a token
+
+- L1 requests the L0 create an L2 vCPU with H_GUEST_CREATE_VCPU()
+
+- L1 and L0 communicate the vCPU state using the H_GUEST_{G,S}ET() hcall
+
+- L1 requests the L0 runs the vCPU running H_GUEST_VCPU_RUN() hcall
+
+- L1 deletes L2 with H_GUEST_DELETE()
+
+More details of the individual hcalls follows:
+
+HCALL Details
+=============
+
+This documentation is provided to give an overall understating of the
+API. It doesn't aim to provide all the details required to implement
+an L1 or L0. Latest version of PAPR can be referred to for more details.
+
+All these HCALLs are made by the L1 to the L0.
+
+H_GUEST_GET_CAPABILITIES()
+--------------------------
+
+This is called to get the capabilities of the L0 nested
+hypervisor. This includes capabilities such the CPU versions (eg
+POWER9, POWER10) that are supported as L2s::
+
+ H_GUEST_GET_CAPABILITIES(uint64 flags)
+
+ Parameters:
+ Input:
+ flags: Reserved
+ Output:
+ R3: Return code
+ R4: Hypervisor Supported Capabilities bitmap 1
+
+H_GUEST_SET_CAPABILITIES()
+--------------------------
+
+This is called to inform the L0 of the capabilities of the L1
+hypervisor. The set of flags passed here are the same as
+H_GUEST_GET_CAPABILITIES()
+
+Typically, GET will be called first and then SET will be called with a
+subset of the flags returned from GET. This process allows the L0 and
+L1 to negotiate an agreed set of capabilities::
+
+ H_GUEST_SET_CAPABILITIES(uint64 flags,
+ uint64 capabilitiesBitmap1)
+ Parameters:
+ Input:
+ flags: Reserved
+ capabilitiesBitmap1: Only capabilities advertised through
+ H_GUEST_GET_CAPABILITIES
+ Output:
+ R3: Return code
+ R4: If R3 = H_P2: The number of invalid bitmaps
+ R5: If R3 = H_P2: The index of first invalid bitmap
+
+H_GUEST_CREATE()
+----------------
+
+This is called to create an L2. A unique ID of the L2 created
+(similar to an LPID) is returned, which can be used on subsequent HCALLs to
+identify the L2::
+
+ H_GUEST_CREATE(uint64 flags,
+ uint64 continueToken);
+ Parameters:
+ Input:
+ flags: Reserved
+ continueToken: Initial call set to -1. Subsequent calls,
+ after H_Busy or H_LongBusyOrder has been
+ returned, value that was returned in R4.
+ Output:
+ R3: Return code. Notable:
+ H_Not_Enough_Resources: Unable to create Guest VCPU due to not
+ enough Hypervisor memory. See H_GUEST_CREATE_GET_STATE(flags =
+ takeOwnershipOfVcpuState)
+ R4: If R3 = H_Busy or_H_LongBusyOrder -> continueToken
+
+H_GUEST_CREATE_VCPU()
+---------------------
+
+This is called to create a vCPU associated with an L2. The L2 id
+(returned from H_GUEST_CREATE()) should be passed it. Also passed in
+is a unique (for this L2) vCPUid. This vCPUid is allocated by the
+L1::
+
+ H_GUEST_CREATE_VCPU(uint64 flags,
+ uint64 guestId,
+ uint64 vcpuId);
+ Parameters:
+ Input:
+ flags: Reserved
+ guestId: ID obtained from H_GUEST_CREATE
+ vcpuId: ID of the vCPU to be created. This must be within the
+ range of 0 to 2047
+ Output:
+ R3: Return code. Notable:
+ H_Not_Enough_Resources: Unable to create Guest VCPU due to not
+ enough Hypervisor memory. See H_GUEST_CREATE_GET_STATE(flags =
+ takeOwnershipOfVcpuState)
+
+H_GUEST_GET_STATE()
+-------------------
+
+This is called to get state associated with an L2 (Guest-wide or vCPU specific).
+This info is passed via the Guest State Buffer (GSB), a standard format as
+explained later in this doc, necessary details below:
+
+This can get either L2 wide or vcpu specific information. Examples of
+L2 wide is the timebase offset or process scoped page table
+info. Examples of vCPU specific are GPRs or VSRs. A bit in the flags
+parameter specifies if this call is L2 wide or vCPU specific and the
+IDs in the GSB must match this.
+
+The L1 provides a pointer to the GSB as a parameter to this call. Also
+provided is the L2 and vCPU IDs associated with the state to set.
+
+The L1 writes only the IDs and sizes in the GSB. L0 writes the
+associated values for each ID in the GSB::
+
+ H_GUEST_GET_STATE(uint64 flags,
+ uint64 guestId,
+ uint64 vcpuId,
+ uint64 dataBuffer,
+ uint64 dataBufferSizeInBytes);
+ Parameters:
+ Input:
+ flags:
+ Bit 0: getGuestWideState: Request state of the Guest instead
+ of an individual VCPU.
+ Bit 1: getHostWideState: Request stats of the Host. This causes
+ the guestId and vcpuId parameters to be ignored and attempting
+ to get the VCPU/Guest state will cause an error.
+ Bits 2-63: Reserved
+ guestId: ID obtained from H_GUEST_CREATE
+ vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
+ dataBuffer: A L1 real address of the GSB.
+ If takeOwnershipOfVcpuState, size must be at least the size
+ returned by ID=0x0001
+ dataBufferSizeInBytes: Size of dataBuffer
+ Output:
+ R3: Return code
+ R4: If R3 = H_Invalid_Element_Id: The array index of the bad
+ element ID.
+ If R3 = H_Invalid_Element_Size: The array index of the bad
+ element size.
+ If R3 = H_Invalid_Element_Value: The array index of the bad
+ element value.
+
+H_GUEST_SET_STATE()
+-------------------
+
+This is called to set L2 wide or vCPU specific L2 state. This info is
+passed via the Guest State Buffer (GSB), necessary details below:
+
+This can set either L2 wide or vcpu specific information. Examples of
+L2 wide is the timebase offset or process scoped page table
+info. Examples of vCPU specific are GPRs or VSRs. A bit in the flags
+parameter specifies if this call is L2 wide or vCPU specific and the
+IDs in the GSB must match this.
+
+The L1 provides a pointer to the GSB as a parameter to this call. Also
+provided is the L2 and vCPU IDs associated with the state to set.
+
+The L1 writes all values in the GSB and the L0 only reads the GSB for
+this call::
+
+ H_GUEST_SET_STATE(uint64 flags,
+ uint64 guestId,
+ uint64 vcpuId,
+ uint64 dataBuffer,
+ uint64 dataBufferSizeInBytes);
+ Parameters:
+ Input:
+ flags:
+ Bit 0: getGuestWideState: Request state of the Guest instead
+ of an individual VCPU.
+ Bit 1: returnOwnershipOfVcpuState Return Guest VCPU state. See
+ GET_STATE takeOwnershipOfVcpuState
+ Bits 2-63: Reserved
+ guestId: ID obtained from H_GUEST_CREATE
+ vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
+ dataBuffer: A L1 real address of the GSB.
+ If takeOwnershipOfVcpuState, size must be at least the size
+ returned by ID=0x0001
+ dataBufferSizeInBytes: Size of dataBuffer
+ Output:
+ R3: Return code
+ R4: If R3 = H_Invalid_Element_Id: The array index of the bad
+ element ID.
+ If R3 = H_Invalid_Element_Size: The array index of the bad
+ element size.
+ If R3 = H_Invalid_Element_Value: The array index of the bad
+ element value.
+
+H_GUEST_RUN_VCPU()
+------------------
+
+This is called to run an L2 vCPU. The L2 and vCPU IDs are passed in as
+parameters. The vCPU runs with the state set previously using
+H_GUEST_SET_STATE(). When the L2 exits, the L1 will resume from this
+hcall.
+
+This hcall also has associated input and output GSBs. Unlike
+H_GUEST_{S,G}ET_STATE(), these GSB pointers are not passed in as
+parameters to the hcall (This was done in the interest of
+performance). The locations of these GSBs must be preregistered using
+the H_GUEST_SET_STATE() call with ID 0x0c00 and 0x0c01 (see table
+below).
+
+The input GSB may contain only VCPU specific elements to be set. This
+GSB may also contain zero elements (ie 0 in the first 4 bytes of the
+GSB) if nothing needs to be set.
+
+On exit from the hcall, the output buffer is filled with elements
+determined by the L0. The reason for the exit is contained in GPR4 (ie
+NIP is put in GPR4). The elements returned depend on the exit
+type. For example, if the exit reason is the L2 doing a hcall (GPR4 =
+0xc00), then GPR3-12 are provided in the output GSB as this is the
+state likely needed to service the hcall. If additional state is
+needed, H_GUEST_GET_STATE() may be called by the L1.
+
+To synthesize interrupts in the L2, when calling H_GUEST_RUN_VCPU()
+the L1 may set a flag (as a hcall parameter) and the L0 will
+synthesize the interrupt in the L2. Alternatively, the L1 may
+synthesize the interrupt itself using H_GUEST_SET_STATE() or the
+H_GUEST_RUN_VCPU() input GSB to set the state appropriately::
+
+ H_GUEST_RUN_VCPU(uint64 flags,
+ uint64 guestId,
+ uint64 vcpuId,
+ uint64 dataBuffer,
+ uint64 dataBufferSizeInBytes);
+ Parameters:
+ Input:
+ flags:
+ Bit 0: generateExternalInterrupt: Generate an external interrupt
+ Bit 1: generatePrivilegedDoorbell: Generate a Privileged Doorbell
+ Bit 2: sendToSystemReset”: Generate a System Reset Interrupt
+ Bits 3-63: Reserved
+ guestId: ID obtained from H_GUEST_CREATE
+ vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
+ Output:
+ R3: Return code
+ R4: If R3 = H_Success: The reason L1 VCPU exited (ie. NIA)
+ 0x000: The VCPU stopped running for an unspecified reason. An
+ example of this is the Hypervisor stopping a VCPU running
+ due to an outstanding interrupt for the Host Partition.
+ 0x980: HDEC
+ 0xC00: HCALL
+ 0xE00: HDSI
+ 0xE20: HISI
+ 0xE40: HEA
+ 0xF80: HV Fac Unavail
+ If R3 = H_Invalid_Element_Id, H_Invalid_Element_Size, or
+ H_Invalid_Element_Value: R4 is offset of the invalid element
+ in the input buffer.
+
+H_GUEST_DELETE()
+----------------
+
+This is called to delete an L2. All associated vCPUs are also
+deleted. No specific vCPU delete call is provided.
+
+A flag may be provided to delete all guests. This is used to reset the
+L0 in the case of kdump/kexec::
+
+ H_GUEST_DELETE(uint64 flags,
+ uint64 guestId)
+ Parameters:
+ Input:
+ flags:
+ Bit 0: deleteAllGuests: deletes all guests
+ Bits 1-63: Reserved
+ guestId: ID obtained from H_GUEST_CREATE
+ Output:
+ R3: Return code
+
+Guest State Buffer
+==================
+
+The Guest State Buffer (GSB) is the main method of communicating state
+about the L2 between the L1 and L0 via H_GUEST_{G,S}ET() and
+H_GUEST_VCPU_RUN() calls.
+
+State may be associated with a whole L2 (eg timebase offset) or a
+specific L2 vCPU (eg. GPR state). Only L2 VCPU state maybe be set by
+H_GUEST_VCPU_RUN().
+
+All data in the GSB is big endian (as is standard in PAPR)
+
+The Guest state buffer has a header which gives the number of
+elements, followed by the GSB elements themselves.
+
+GSB header:
+
++----------+----------+-------------------------------------------+
+| Offset | Size | Purpose |
+| Bytes | Bytes | |
++==========+==========+===========================================+
+| 0 | 4 | Number of elements |
++----------+----------+-------------------------------------------+
+| 4 | | Guest state buffer elements |
++----------+----------+-------------------------------------------+
+
+GSB element:
+
++----------+----------+-------------------------------------------+
+| Offset | Size | Purpose |
+| Bytes | Bytes | |
++==========+==========+===========================================+
+| 0 | 2 | ID |
++----------+----------+-------------------------------------------+
+| 2 | 2 | Size of Value |
++----------+----------+-------------------------------------------+
+| 4 | As above | Value |
++----------+----------+-------------------------------------------+
+
+The ID in the GSB element specifies what is to be set. This includes
+archtected state like GPRs, VSRs, SPRs, plus also some meta data about
+the partition like the timebase offset and partition scoped page
+table information.
+
++--------+-------+----+--------+----------------------------------+
+| ID | Size | RW |(H)ost | Details |
+| | Bytes | |(G)uest | |
+| | | |(T)hread| |
+| | | |Scope | |
++========+=======+====+========+==================================+
+| 0x0000 | | RW | TG | NOP element |
++--------+-------+----+--------+----------------------------------+
+| 0x0001 | 0x08 | R | G | Size of L0 vCPU state. See: |
+| | | | | H_GUEST_GET_STATE: |
+| | | | | flags = takeOwnershipOfVcpuState |
++--------+-------+----+--------+----------------------------------+
+| 0x0002 | 0x08 | R | G | Size Run vCPU out buffer |
++--------+-------+----+--------+----------------------------------+
+| 0x0003 | 0x04 | RW | G | Logical PVR |
++--------+-------+----+--------+----------------------------------+
+| 0x0004 | 0x08 | RW | G | TB Offset (L1 relative) |
++--------+-------+----+--------+----------------------------------+
+| 0x0005 | 0x18 | RW | G |Partition scoped page tbl info: |
+| | | | | |
+| | | | |- 0x00 Addr part scope table |
+| | | | |- 0x08 Num addr bits |
+| | | | |- 0x10 Size root dir |
++--------+-------+----+--------+----------------------------------+
+| 0x0006 | 0x10 | RW | G |Process Table Information: |
+| | | | | |
+| | | | |- 0x0 Addr proc scope table |
+| | | | |- 0x8 Table size. |
++--------+-------+----+--------+----------------------------------+
+| 0x0007-| | | | Reserved |
+| 0x07FF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x0800 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Management Space |
+| | | | | for an L1-Lpar. |
++--------+-------+----+--------+----------------------------------+
+| 0x0801 | 0x08 | R | H | Max bytes available in the |
+| | | | | L0's Guest Management Space for |
+| | | | | an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0802 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0803 | 0x08 | R | H | Max bytes available in the L0's |
+| | | | | Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from |
+| | | | | L0 Guest's Page Table Management |
+| | | | | Space due to overcommit |
++--------+-------+----+--------+----------------------------------+
+| 0x0805-| | | | Reserved |
+| 0x0BFF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: |
+| | | | | |
+| | | | |- 0x0 Addr of buffer |
+| | | | |- 0x8 Buffer Size. |
++--------+-------+----+--------+----------------------------------+
+| 0x0C01 | 0x10 | RW | T |Run vCPU Output Buffer: |
+| | | | | |
+| | | | |- 0x0 Addr of buffer |
+| | | | |- 0x8 Buffer Size. |
++--------+-------+----+--------+----------------------------------+
+| 0x0C02 | 0x08 | RW | T | vCPU VPA Address |
++--------+-------+----+--------+----------------------------------+
+| 0x0C03-| | | | Reserved |
+| 0x0FFF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x1000-| 0x08 | RW | T | GPR 0-31 |
+| 0x101F | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x1020 | 0x08 | T | T | HDEC expiry TB |
++--------+-------+----+--------+----------------------------------+
+| 0x1021 | 0x08 | RW | T | NIA |
++--------+-------+----+--------+----------------------------------+
+| 0x1022 | 0x08 | RW | T | MSR |
++--------+-------+----+--------+----------------------------------+
+| 0x1023 | 0x08 | RW | T | LR |
++--------+-------+----+--------+----------------------------------+
+| 0x1024 | 0x08 | RW | T | XER |
++--------+-------+----+--------+----------------------------------+
+| 0x1025 | 0x08 | RW | T | CTR |
++--------+-------+----+--------+----------------------------------+
+| 0x1026 | 0x08 | RW | T | CFAR |
++--------+-------+----+--------+----------------------------------+
+| 0x1027 | 0x08 | RW | T | SRR0 |
++--------+-------+----+--------+----------------------------------+
+| 0x1028 | 0x08 | RW | T | SRR1 |
++--------+-------+----+--------+----------------------------------+
+| 0x1029 | 0x08 | RW | T | DAR |
++--------+-------+----+--------+----------------------------------+
+| 0x102A | 0x08 | RW | T | DEC expiry TB |
++--------+-------+----+--------+----------------------------------+
+| 0x102B | 0x08 | RW | T | VTB |
++--------+-------+----+--------+----------------------------------+
+| 0x102C | 0x08 | RW | T | LPCR |
++--------+-------+----+--------+----------------------------------+
+| 0x102D | 0x08 | RW | T | HFSCR |
++--------+-------+----+--------+----------------------------------+
+| 0x102E | 0x08 | RW | T | FSCR |
++--------+-------+----+--------+----------------------------------+
+| 0x102F | 0x08 | RW | T | FPSCR |
++--------+-------+----+--------+----------------------------------+
+| 0x1030 | 0x08 | RW | T | DAWR0 |
++--------+-------+----+--------+----------------------------------+
+| 0x1031 | 0x08 | RW | T | DAWR1 |
++--------+-------+----+--------+----------------------------------+
+| 0x1032 | 0x08 | RW | T | CIABR |
++--------+-------+----+--------+----------------------------------+
+| 0x1033 | 0x08 | RW | T | PURR |
++--------+-------+----+--------+----------------------------------+
+| 0x1034 | 0x08 | RW | T | SPURR |
++--------+-------+----+--------+----------------------------------+
+| 0x1035 | 0x08 | RW | T | IC |
++--------+-------+----+--------+----------------------------------+
+| 0x1036-| 0x08 | RW | T | SPRG 0-3 |
+| 0x1039 | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x103A | 0x08 | W | T | PPR |
++--------+-------+----+--------+----------------------------------+
+| 0x103B | 0x08 | RW | T | MMCR 0-3 |
+| 0x103E | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x103F | 0x08 | RW | T | MMCRA |
++--------+-------+----+--------+----------------------------------+
+| 0x1040 | 0x08 | RW | T | SIER |
++--------+-------+----+--------+----------------------------------+
+| 0x1041 | 0x08 | RW | T | SIER 2 |
++--------+-------+----+--------+----------------------------------+
+| 0x1042 | 0x08 | RW | T | SIER 3 |
++--------+-------+----+--------+----------------------------------+
+| 0x1043 | 0x08 | RW | T | BESCR |
++--------+-------+----+--------+----------------------------------+
+| 0x1044 | 0x08 | RW | T | EBBHR |
++--------+-------+----+--------+----------------------------------+
+| 0x1045 | 0x08 | RW | T | EBBRR |
++--------+-------+----+--------+----------------------------------+
+| 0x1046 | 0x08 | RW | T | AMR |
++--------+-------+----+--------+----------------------------------+
+| 0x1047 | 0x08 | RW | T | IAMR |
++--------+-------+----+--------+----------------------------------+
+| 0x1048 | 0x08 | RW | T | AMOR |
++--------+-------+----+--------+----------------------------------+
+| 0x1049 | 0x08 | RW | T | UAMOR |
++--------+-------+----+--------+----------------------------------+
+| 0x104A | 0x08 | RW | T | SDAR |
++--------+-------+----+--------+----------------------------------+
+| 0x104B | 0x08 | RW | T | SIAR |
++--------+-------+----+--------+----------------------------------+
+| 0x104C | 0x08 | RW | T | DSCR |
++--------+-------+----+--------+----------------------------------+
+| 0x104D | 0x08 | RW | T | TAR |
++--------+-------+----+--------+----------------------------------+
+| 0x104E | 0x08 | RW | T | DEXCR |
++--------+-------+----+--------+----------------------------------+
+| 0x104F | 0x08 | RW | T | HDEXCR |
++--------+-------+----+--------+----------------------------------+
+| 0x1050 | 0x08 | RW | T | HASHKEYR |
++--------+-------+----+--------+----------------------------------+
+| 0x1051 | 0x08 | RW | T | HASHPKEYR |
++--------+-------+----+--------+----------------------------------+
+| 0x1052 | 0x08 | RW | T | CTRL |
++--------+-------+----+--------+----------------------------------+
+| 0x1053 | 0x08 | RW | T | DPDES |
++--------+-------+----+--------+----------------------------------+
+| 0x1054-| | | | Reserved |
+| 0x1FFF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x2000 | 0x04 | RW | T | CR |
++--------+-------+----+--------+----------------------------------+
+| 0x2001 | 0x04 | RW | T | PIDR |
++--------+-------+----+--------+----------------------------------+
+| 0x2002 | 0x04 | RW | T | DSISR |
++--------+-------+----+--------+----------------------------------+
+| 0x2003 | 0x04 | RW | T | VSCR |
++--------+-------+----+--------+----------------------------------+
+| 0x2004 | 0x04 | RW | T | VRSAVE |
++--------+-------+----+--------+----------------------------------+
+| 0x2005 | 0x04 | RW | T | DAWRX0 |
++--------+-------+----+--------+----------------------------------+
+| 0x2006 | 0x04 | RW | T | DAWRX1 |
++--------+-------+----+--------+----------------------------------+
+| 0x2007-| 0x04 | RW | T | PMC 1-6 |
+| 0x200c | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x200D | 0x04 | RW | T | WORT |
++--------+-------+----+--------+----------------------------------+
+| 0x200E | 0x04 | RW | T | PSPB |
++--------+-------+----+--------+----------------------------------+
+| 0x200F-| | | | Reserved |
+| 0x2FFF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x3000-| 0x10 | RW | T | VSR 0-63 |
+| 0x303F | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x3040-| | | | Reserved |
+| 0xEFFF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0xF000 | 0x08 | R | T | HDAR |
++--------+-------+----+--------+----------------------------------+
+| 0xF001 | 0x04 | R | T | HDSISR |
++--------+-------+----+--------+----------------------------------+
+| 0xF002 | 0x04 | R | T | HEIR |
++--------+-------+----+--------+----------------------------------+
+| 0xF003 | 0x08 | R | T | ASDR |
++--------+-------+----+--------+----------------------------------+
+
+
+Miscellaneous info
+==================
+
+State not in ptregs/hvregs
+--------------------------
+
+In the v1 API, some state is not in the ptregs/hvstate. This includes
+the vector register and some SPRs. For the L1 to set this state for
+the L2, the L1 loads up these hardware registers before the
+h_enter_nested() call and the L0 ensures they end up as the L2 state
+(by not touching them).
+
+The v2 API removes this and explicitly sets this state via the GSB.
+
+L1 Implementation details: Caching state
+----------------------------------------
+
+In the v1 API, all state is sent from the L1 to the L0 and vice versa
+on every h_enter_nested() hcall. If the L0 is not currently running
+any L2s, the L0 has no state information about them. The only
+exception to this is the location of the partition table, registered
+via h_set_partition_table().
+
+The v2 API changes this so that the L0 retains the L2 state even when
+it's vCPUs are no longer running. This means that the L1 only needs to
+communicate with the L0 about L2 state when it needs to modify the L2
+state, or when it's value is out of date. This provides an opportunity
+for performance optimisation.
+
+When a vCPU exits from a H_GUEST_RUN_VCPU() call, the L1 internally
+marks all L2 state as invalid. This means that if the L1 wants to know
+the L2 state (say via a kvm_get_one_reg() call), it needs call
+H_GUEST_GET_STATE() to get that state. Once it's read, it's marked as
+valid in L1 until the L2 is run again.
+
+Also, when an L1 modifies L2 vcpu state, it doesn't need to write it
+to the L0 until that L2 vcpu runs again. Hence when the L1 updates
+state (say via a kvm_set_one_reg() call), it writes to an internal L1
+copy and only flushes this copy to the L0 when the L2 runs again via
+the H_GUEST_VCPU_RUN() input buffer.
+
+This lazy updating of state by the L1 avoids unnecessary
+H_GUEST_{G|S}ET_STATE() calls.
diff --git a/Documentation/arch/powerpc/mpc52xx.rst b/Documentation/arch/powerpc/mpc52xx.rst
new file mode 100644
index 000000000000..5243b1763fad
--- /dev/null
+++ b/Documentation/arch/powerpc/mpc52xx.rst
@@ -0,0 +1,43 @@
+=============================
+Linux 2.6.x on MPC52xx family
+=============================
+
+For the latest info, go to https://www.246tNt.com/mpc52xx/
+
+To compile/use :
+
+ - U-Boot::
+
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # make uImage
+
+ then, on U-boot:
+ => tftpboot 200000 uImage
+ => tftpboot 400000 pRamdisk
+ => bootm 200000 400000
+
+ - DBug::
+
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
+ # make zImage.initrd
+ # make
+
+ then in DBug:
+ DBug> dn -i zImage.initrd.lite5200
+
+
+Some remarks:
+
+ - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
+ is not supported, and I'm not sure anyone is interested in working on it
+ so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
+ nothing to do with the MPC5200. I also included the 'MPC' for the same
+ reason.
+ - Of course, I inspired myself from the 2.4 port. If you think I forgot to
+ mention you/your company in the copyright of some code, I'll correct it
+ ASAP.
diff --git a/Documentation/arch/powerpc/papr_hcalls.rst b/Documentation/arch/powerpc/papr_hcalls.rst
new file mode 100644
index 000000000000..805e1cb9bab9
--- /dev/null
+++ b/Documentation/arch/powerpc/papr_hcalls.rst
@@ -0,0 +1,313 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Hypercall Op-codes (hcalls)
+===========================
+
+Overview
+=========
+
+Virtualization on 64-bit Power Book3S Platforms is based on the PAPR
+specification [1]_ which describes the run-time environment for a guest
+operating system and how it should interact with the hypervisor for
+privileged operations. Currently there are two PAPR compliant hypervisors:
+
+- **IBM PowerVM (PHYP)**: IBM's proprietary hypervisor that supports AIX,
+ IBM-i and Linux as supported guests (termed as Logical Partitions
+ or LPARS). It supports the full PAPR specification.
+
+- **Qemu/KVM**: Supports PPC64 linux guests running on a PPC64 linux host.
+ Though it only implements a subset of PAPR specification called LoPAPR [2]_.
+
+On PPC64 arch a guest kernel running on top of a PAPR hypervisor is called
+a *pSeries guest*. A pseries guest runs in a supervisor mode (HV=0) and must
+issue hypercalls to the hypervisor whenever it needs to perform an action
+that is hypervisor privileged [3]_ or for other services managed by the
+hypervisor.
+
+Hence a Hypercall (hcall) is essentially a request by the pseries guest
+asking hypervisor to perform a privileged operation on behalf of the guest. The
+guest issues a with necessary input operands. The hypervisor after performing
+the privilege operation returns a status code and output operands back to the
+guest.
+
+HCALL ABI
+=========
+The ABI specification for a hcall between a pseries guest and PAPR hypervisor
+is covered in section 14.5.3 of ref [2]_. Switch to the Hypervisor context is
+done via the instruction **HVCS** that expects the Opcode for hcall is set in *r3*
+and any in-arguments for the hcall are provided in registers *r4-r12*. If values
+have to be passed through a memory buffer, the data stored in that buffer should be
+in Big-endian byte order.
+
+Once control returns back to the guest after hypervisor has serviced the
+'HVCS' instruction the return value of the hcall is available in *r3* and any
+out values are returned in registers *r4-r12*. Again like in case of in-arguments,
+any out values stored in a memory buffer will be in Big-endian byte order.
+
+Powerpc arch code provides convenient wrappers named **plpar_hcall_xxx** defined
+in a arch specific header [4]_ to issue hcalls from the linux kernel
+running as pseries guest.
+
+Register Conventions
+====================
+
+Any hcall should follow same register convention as described in section 2.2.1.1
+of "64-Bit ELF V2 ABI Specification: Power Architecture"[5]_. Table below
+summarizes these conventions:
+
++----------+----------+-------------------------------------------+
+| Register |Volatile | Purpose |
+| Range |(Y/N) | |
++==========+==========+===========================================+
+| r0 | Y | Optional-usage |
++----------+----------+-------------------------------------------+
+| r1 | N | Stack Pointer |
++----------+----------+-------------------------------------------+
+| r2 | N | TOC |
++----------+----------+-------------------------------------------+
+| r3 | Y | hcall opcode/return value |
++----------+----------+-------------------------------------------+
+| r4-r10 | Y | in and out values |
++----------+----------+-------------------------------------------+
+| r11 | Y | Optional-usage/Environmental pointer |
++----------+----------+-------------------------------------------+
+| r12 | Y | Optional-usage/Function entry address at |
+| | | global entry point |
++----------+----------+-------------------------------------------+
+| r13 | N | Thread-Pointer |
++----------+----------+-------------------------------------------+
+| r14-r31 | N | Local Variables |
++----------+----------+-------------------------------------------+
+| LR | Y | Link Register |
++----------+----------+-------------------------------------------+
+| CTR | Y | Loop Counter |
++----------+----------+-------------------------------------------+
+| XER | Y | Fixed-point exception register. |
++----------+----------+-------------------------------------------+
+| CR0-1 | Y | Condition register fields. |
++----------+----------+-------------------------------------------+
+| CR2-4 | N | Condition register fields. |
++----------+----------+-------------------------------------------+
+| CR5-7 | Y | Condition register fields. |
++----------+----------+-------------------------------------------+
+| Others | N | |
++----------+----------+-------------------------------------------+
+
+DRC & DRC Indexes
+=================
+::
+
+ DR1 Guest
+ +--+ +------------+ +---------+
+ | | <----> | | | User |
+ +--+ DRC1 | | DRC | Space |
+ | PAPR | Index +---------+
+ DR2 | Hypervisor | | |
+ +--+ | | <-----> | Kernel |
+ | | <----> | | Hcall | |
+ +--+ DRC2 +------------+ +---------+
+
+PAPR hypervisor terms shared hardware resources like PCI devices, NVDIMMs etc
+available for use by LPARs as Dynamic Resource (DR). When a DR is allocated to
+an LPAR, PHYP creates a data-structure called Dynamic Resource Connector (DRC)
+to manage LPAR access. An LPAR refers to a DRC via an opaque 32-bit number
+called DRC-Index. The DRC-index value is provided to the LPAR via device-tree
+where its present as an attribute in the device tree node associated with the
+DR.
+
+HCALL Return-values
+===================
+
+After servicing the hcall, hypervisor sets the return-value in *r3* indicating
+success or failure of the hcall. In case of a failure an error code indicates
+the cause for error. These codes are defined and documented in arch specific
+header [4]_.
+
+In some cases a hcall can potentially take a long time and need to be issued
+multiple times in order to be completely serviced. These hcalls will usually
+accept an opaque value *continue-token* within there argument list and a
+return value of *H_CONTINUE* indicates that hypervisor hasn't still finished
+servicing the hcall yet.
+
+To make such hcalls the guest need to set *continue-token == 0* for the
+initial call and use the hypervisor returned value of *continue-token*
+for each subsequent hcall until hypervisor returns a non *H_CONTINUE*
+return value.
+
+HCALL Op-codes
+==============
+
+Below is a partial list of HCALLs that are supported by PHYP. For the
+corresponding opcode values please look into the arch specific header [4]_:
+
+**H_SCM_READ_METADATA**
+
+| Input: *drcIndex, offset, buffer-address, numBytesToRead*
+| Out: *numBytesRead*
+| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_Hardware*
+
+Given a DRC Index of an NVDIMM, read N-bytes from the metadata area
+associated with it, at a specified offset and copy it to provided buffer.
+The metadata area stores configuration information such as label information,
+bad-blocks etc. The metadata area is located out-of-band of NVDIMM storage
+area hence a separate access semantics is provided.
+
+**H_SCM_WRITE_METADATA**
+
+| Input: *drcIndex, offset, data, numBytesToWrite*
+| Out: *None*
+| Return Value: *H_Success, H_Parameter, H_P2, H_P4, H_Hardware*
+
+Given a DRC Index of an NVDIMM, write N-bytes to the metadata area
+associated with it, at the specified offset and from the provided buffer.
+
+**H_SCM_BIND_MEM**
+
+| Input: *drcIndex, startingScmBlockIndex, numScmBlocksToBind,*
+| *targetLogicalMemoryAddress, continue-token*
+| Out: *continue-token, targetLogicalMemoryAddress, numScmBlocksToBound*
+| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_P4, H_Overlap,*
+| *H_Too_Big, H_P5, H_Busy*
+
+Given a DRC-Index of an NVDIMM, map a continuous SCM blocks range
+*(startingScmBlockIndex, startingScmBlockIndex+numScmBlocksToBind)* to the guest
+at *targetLogicalMemoryAddress* within guest physical address space. In
+case *targetLogicalMemoryAddress == 0xFFFFFFFF_FFFFFFFF* then hypervisor
+assigns a target address to the guest. The HCALL can fail if the Guest has
+an active PTE entry to the SCM block being bound.
+
+**H_SCM_UNBIND_MEM**
+| Input: drcIndex, startingScmLogicalMemoryAddress, numScmBlocksToUnbind
+| Out: numScmBlocksUnbound
+| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Overlap,*
+| *H_Busy, H_LongBusyOrder1mSec, H_LongBusyOrder10mSec*
+
+Given a DRC-Index of an NVDimm, unmap *numScmBlocksToUnbind* SCM blocks starting
+at *startingScmLogicalMemoryAddress* from guest physical address space. The
+HCALL can fail if the Guest has an active PTE entry to the SCM block being
+unbound.
+
+**H_SCM_QUERY_BLOCK_MEM_BINDING**
+
+| Input: *drcIndex, scmBlockIndex*
+| Out: *Guest-Physical-Address*
+| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound*
+
+Given a DRC-Index and an SCM Block index return the guest physical address to
+which the SCM block is mapped to.
+
+**H_SCM_QUERY_LOGICAL_MEM_BINDING**
+
+| Input: *Guest-Physical-Address*
+| Out: *drcIndex, scmBlockIndex*
+| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound*
+
+Given a guest physical address return which DRC Index and SCM block is mapped
+to that address.
+
+**H_SCM_UNBIND_ALL**
+
+| Input: *scmTargetScope, drcIndex*
+| Out: *None*
+| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Busy,*
+| *H_LongBusyOrder1mSec, H_LongBusyOrder10mSec*
+
+Depending on the Target scope unmap all SCM blocks belonging to all NVDIMMs
+or all SCM blocks belonging to a single NVDIMM identified by its drcIndex
+from the LPAR memory.
+
+**H_SCM_HEALTH**
+
+| Input: drcIndex
+| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)*
+| Return Value: *H_Success, H_Parameter, H_Hardware*
+
+Given a DRC Index return the info on predictive failure and overall health of
+the PMEM device. The asserted bits in the health-bitmap indicate one or more states
+(described in table below) of the PMEM device and health-bit-valid-bitmap indicate
+which bits in health-bitmap are valid. The bits are reported in
+reverse bit ordering for example a value of 0xC400000000000000
+indicates bits 0, 1, and 5 are valid.
+
+Health Bitmap Flags:
+
++------+-----------------------------------------------------------------------+
+| Bit | Definition |
++======+=======================================================================+
+| 00 | PMEM device is unable to persist memory contents. |
+| | If the system is powered down, nothing will be saved. |
++------+-----------------------------------------------------------------------+
+| 01 | PMEM device failed to persist memory contents. Either contents were |
+| | not saved successfully on power down or were not restored properly on |
+| | power up. |
++------+-----------------------------------------------------------------------+
+| 02 | PMEM device contents are persisted from previous IPL. The data from |
+| | the last boot were successfully restored. |
++------+-----------------------------------------------------------------------+
+| 03 | PMEM device contents are not persisted from previous IPL. There was no|
+| | data to restore from the last boot. |
++------+-----------------------------------------------------------------------+
+| 04 | PMEM device memory life remaining is critically low |
++------+-----------------------------------------------------------------------+
+| 05 | PMEM device will be garded off next IPL due to failure |
++------+-----------------------------------------------------------------------+
+| 06 | PMEM device contents cannot persist due to current platform health |
+| | status. A hardware failure may prevent data from being saved or |
+| | restored. |
++------+-----------------------------------------------------------------------+
+| 07 | PMEM device is unable to persist memory contents in certain conditions|
++------+-----------------------------------------------------------------------+
+| 08 | PMEM device is encrypted |
++------+-----------------------------------------------------------------------+
+| 09 | PMEM device has successfully completed a requested erase or secure |
+| | erase procedure. |
++------+-----------------------------------------------------------------------+
+|10:63 | Reserved / Unused |
++------+-----------------------------------------------------------------------+
+
+**H_SCM_PERFORMANCE_STATS**
+
+| Input: drcIndex, resultBuffer Addr
+| Out: None
+| Return Value: *H_Success, H_Parameter, H_Unsupported, H_Hardware, H_Authority, H_Privilege*
+
+Given a DRC Index collect the performance statistics for NVDIMM and copy them
+to the resultBuffer.
+
+**H_SCM_FLUSH**
+
+| Input: *drcIndex, continue-token*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list of
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS or other error is returned by the hypervisor.
+
+**H_HTM**
+
+| Input: flags, target, operation (op), op-param1, op-param2, op-param3
+| Out: *dumphtmbufferdata*
+| Return Value: *H_Success,H_Busy,H_LongBusyOrder,H_Partial,H_Parameter,
+ H_P2,H_P3,H_P4,H_P5,H_P6,H_State,H_Not_Available,H_Authority*
+
+H_HTM supports setup, configuration, control and dumping of Hardware Trace
+Macro (HTM) function and its data. HTM buffer stores tracing data for functions
+like core instruction, core LLAT and nest.
+
+References
+==========
+.. [1] "Power Architecture Platform Reference"
+ https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference
+.. [2] "Linux on Power Architecture Platform Reference"
+ https://members.openpowerfoundation.org/document/dl/469
+.. [3] "Definitions and Notation" Book III-Section 14.5.3
+ https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0
+.. [4] arch/powerpc/include/asm/hvcall.h
+.. [5] "64-Bit ELF V2 ABI Specification: Power Architecture"
+ https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture
diff --git a/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst b/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst
new file mode 100644
index 000000000000..f5a5793e1613
--- /dev/null
+++ b/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst
@@ -0,0 +1,312 @@
+===================================================
+PCI Express I/O Virtualization Resource on Powerenv
+===================================================
+
+Wei Yang <weiyang@linux.vnet.ibm.com>
+
+Benjamin Herrenschmidt <benh@au1.ibm.com>
+
+Bjorn Helgaas <bhelgaas@google.com>
+
+26 Aug 2014
+
+This document describes the requirement from hardware for PCI MMIO resource
+sizing and assignment on PowerKVM and how generic PCI code handles this
+requirement. The first two sections describe the concepts of Partitionable
+Endpoints and the implementation on P8 (IODA2). The next two sections talks
+about considerations on enabling SRIOV on IODA2.
+
+1. Introduction to Partitionable Endpoints
+==========================================
+
+A Partitionable Endpoint (PE) is a way to group the various resources
+associated with a device or a set of devices to provide isolation between
+partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
+to freeze a device that is causing errors in order to limit the possibility
+of propagation of bad data.
+
+There is thus, in HW, a table of PE states that contains a pair of "frozen"
+state bits (one for MMIO and one for DMA, they get set together but can be
+cleared independently) for each PE.
+
+When a PE is frozen, all stores in any direction are dropped and all loads
+return all 1's value. MSIs are also blocked. There's a bit more state that
+captures things like the details of the error that caused the freeze etc., but
+that's not critical.
+
+The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
+are matched to their corresponding PEs.
+
+The following section provides a rough description of what we have on P8
+(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB
+is a completely separate HW entity that replicates the entire logic, so has
+its own set of PEs, etc.
+
+2. Implementation of Partitionable Endpoints on P8 (IODA2)
+==========================================================
+
+P8 supports up to 256 Partitionable Endpoints per PHB.
+
+ * Inbound
+
+ For DMA, MSIs and inbound PCIe error messages, we have a table (in
+ memory but accessed in HW by the chip) that provides a direct
+ correspondence between a PCIe RID (bus/dev/fn) with a PE number.
+ We call this the RTT.
+
+ - For DMA we then provide an entire address space for each PE that can
+ contain two "windows", depending on the value of PCI address bit 59.
+ Each window can be configured to be remapped via a "TCE table" (IOMMU
+ translation table), which has various configurable characteristics
+ not described here.
+
+ - For MSIs, we have two windows in the address space (one at the top of
+ the 32-bit space and one much higher) which, via a combination of the
+ address and MSI value, will result in one of the 2048 interrupts per
+ bridge being triggered. There's a PE# in the interrupt controller
+ descriptor table as well which is compared with the PE# obtained from
+ the RTT to "authorize" the device to emit that specific interrupt.
+
+ - Error messages just use the RTT.
+
+ * Outbound. That's where the tricky part is.
+
+ Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
+ from the CPU address space to the PCI address space. There is one M32
+ window and sixteen M64 windows. They have different characteristics.
+ First what they have in common: they forward a configurable portion of
+ the CPU address space to the PCIe bus and must be naturally aligned
+ power of two in size. The rest is different:
+
+ - The M32 window:
+
+ * Is limited to 4GB in size.
+
+ * Drops the top bits of the address (above the size) and replaces
+ them with a configurable value. This is typically used to generate
+ 32-bit PCIe accesses. We configure that window at boot from FW and
+ don't touch it from Linux; it's usually set to forward a 2GB
+ portion of address space from the CPU to PCIe
+ 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually
+ reserved for MSIs but this is not a problem at this point; we just
+ need to ensure Linux doesn't assign anything there, the M32 logic
+ ignores that however and will forward in that space if we try).
+
+ * It is divided into 256 segments of equal size. A table in the chip
+ maps each segment to a PE#. That allows portions of the MMIO space
+ to be assigned to PEs on a segment granularity. For a 2GB window,
+ the segment granularity is 2GB/256 = 8MB.
+
+ Now, this is the "main" window we use in Linux today (excluding
+ SR-IOV). We basically use the trick of forcing the bridge MMIO windows
+ onto a segment alignment/granularity so that the space behind a bridge
+ can be assigned to a PE.
+
+ Ideally we would like to be able to have individual functions in PEs
+ but that would mean using a completely different address allocation
+ scheme where individual function BARs can be "grouped" to fit in one or
+ more segments.
+
+ - The M64 windows:
+
+ * Must be at least 256MB in size.
+
+ * Do not translate addresses (the address on PCIe is the same as the
+ address on the PowerBus). There is a way to also set the top 14
+ bits which are not conveyed by PowerBus but we don't use this.
+
+ * Can be configured to be segmented. When not segmented, we can
+ specify the PE# for the entire window. When segmented, a window
+ has 256 segments; however, there is no table for mapping a segment
+ to a PE#. The segment number *is* the PE#.
+
+ * Support overlaps. If an address is covered by multiple windows,
+ there's a defined ordering for which window applies.
+
+ We have code (fairly new compared to the M32 stuff) that exploits that
+ for large BARs in 64-bit space:
+
+ We configure an M64 window to cover the entire region of address space
+ that has been assigned by FW for the PHB (about 64GB, ignore the space
+ for the M32, it comes out of a different "reserve"). We configure it
+ as segmented.
+
+ Then we do the same thing as with M32, using the bridge alignment
+ trick, to match to those giant segments.
+
+ Since we cannot remap, we have two additional constraints:
+
+ - We do the PE# allocation *after* the 64-bit space has been assigned
+ because the addresses we use directly determine the PE#. We then
+ update the M32 PE# for the devices that use both 32-bit and 64-bit
+ spaces or assign the remaining PE# to 32-bit only devices.
+
+ - We cannot "group" segments in HW, so if a device ends up using more
+ than one segment, we end up with more than one PE#. There is a HW
+ mechanism to make the freeze state cascade to "companion" PEs but
+ that only works for PCIe error messages (typically used so that if
+ you freeze a switch, it freezes all its children). So we do it in
+ SW. We lose a bit of effectiveness of EEH in that case, but that's
+ the best we found. So when any of the PEs freezes, we freeze the
+ other ones for that "domain". We thus introduce the concept of
+ "master PE" which is the one used for DMA, MSIs, etc., and "secondary
+ PEs" that are used for the remaining M64 segments.
+
+ We would like to investigate using additional M64 windows in "single
+ PE" mode to overlay over specific BARs to work around some of that, for
+ example for devices with very large BARs, e.g., GPUs. It would make
+ sense, but we haven't done it yet.
+
+3. Considerations for SR-IOV on PowerKVM
+========================================
+
+ * SR-IOV Background
+
+ The PCIe SR-IOV feature allows a single Physical Function (PF) to
+ support several Virtual Functions (VFs). Registers in the PF's SR-IOV
+ Capability control the number of VFs and whether they are enabled.
+
+ When VFs are enabled, they appear in Configuration Space like normal
+ PCI devices, but the BARs in VF config space headers are unusual. For
+ a non-VF device, software uses BARs in the config space header to
+ discover the BAR sizes and assign addresses for them. For VF devices,
+ software uses VF BAR registers in the *PF* SR-IOV Capability to
+ discover sizes and assign addresses. The BARs in the VF's config space
+ header are read-only zeros.
+
+ When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
+ base address for all the corresponding VF(n) BARs. For example, if the
+ PF SR-IOV Capability is programmed to enable eight VFs, and it has a
+ 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
+ This region is divided into eight contiguous 1MB regions, each of which
+ is a BAR0 for one of the VFs. Note that even though the VF BAR
+ describes an 8MB region, the alignment requirement is for a single VF,
+ i.e., 1MB in this example.
+
+ There are several strategies for isolating VFs in PEs:
+
+ - M32 window: There's one M32 window, and it is split into 256
+ equally-sized segments. The finest granularity possible is a 256MB
+ window with 1MB segments. VF BARs that are 1MB or larger could be
+ mapped to separate PEs in this window. Each segment can be
+ individually mapped to a PE via the lookup table, so this is quite
+ flexible, but it works best when all the VF BARs are the same size. If
+ they are different sizes, the entire window has to be small enough that
+ the segment size matches the smallest VF BAR, which means larger VF
+ BARs span several segments.
+
+ - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
+ to a single PE, so it could only isolate one VF.
+
+ - Single segmented M64 windows: A segmented M64 window could be used just
+ like the M32 window, but the segments can't be individually mapped to
+ PEs (the segment number is the PE#), so there isn't as much
+ flexibility. A VF with multiple BARs would have to be in a "domain" of
+ multiple PEs, which is not as well isolated as a single PE.
+
+ - Multiple segmented M64 windows: As usual, each window is split into 256
+ equally-sized segments, and the segment number is the PE#. But if we
+ use several M64 windows, they can be set to different base addresses
+ and different segment sizes. If we have VFs that each have a 1MB BAR
+ and a 32MB BAR, we could use one M64 window to assign 1MB segments and
+ another M64 window to assign 32MB segments.
+
+ Finally, the plan to use M64 windows for SR-IOV, which will be described
+ more in the next two sections. For a given VF BAR, we need to
+ effectively reserve the entire 256 segments (256 * VF BAR size) and
+ position the VF BAR to start at the beginning of a free range of
+ segments/PEs inside that M64 window.
+
+ The goal is of course to be able to give a separate PE for each VF.
+
+ The IODA2 platform has 16 M64 windows, which are used to map MMIO
+ range to PE#. Each M64 window defines one MMIO range and this range is
+ divided into 256 segments, with each segment corresponding to one PE.
+
+ We decide to leverage this M64 window to map VFs to individual PEs, since
+ SR-IOV VF BARs are all the same size.
+
+ But doing so introduces another problem: total_VFs is usually smaller
+ than the number of M64 window segments, so if we map one VF BAR directly
+ to one M64 window, some part of the M64 window will map to another
+ device's MMIO range.
+
+ IODA supports 256 PEs, so segmented windows contain 256 segments, so if
+ total_VFs is less than 256, we have the situation in Figure 1.0, where
+ segments [total_VFs, 255] of the M64 window may map to some MMIO range on
+ other devices::
+
+ 0 1 total_VFs - 1
+ +------+------+- -+------+------+
+ | | | ... | | |
+ +------+------+- -+------+------+
+
+ VF(n) BAR space
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.0 Direct map VF(n) BAR space
+
+ Our current solution is to allocate 256 segments even if the VF(n) BAR
+ space doesn't need that much, as shown in Figure 1.1::
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ VF(n) BAR space + extra
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.1 Map VF(n) BAR space + extra
+
+ Allocating the extra space ensures that the entire M64 window will be
+ assigned to this one SR-IOV device and none of the space will be
+ available for other devices. Note that this only expands the space
+ reserved in software; there are still only total_VFs VFs, and they only
+ respond to segments [0, total_VFs - 1]. There's nothing in hardware that
+ responds to segments [total_VFs, 255].
+
+4. Implications for the Generic PCI Code
+========================================
+
+The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
+aligned to the size of an individual VF BAR.
+
+In IODA2, the MMIO address determines the PE#. If the address is in an M32
+window, we can set the PE# by updating the table that translates segments
+to PE#s. Similarly, if the address is in an unsegmented M64 window, we can
+set the PE# for the window. But if it's in a segmented M64 window, the
+segment number is the PE#.
+
+Therefore, the only way to control the PE# for a VF is to change the base
+of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact
+amount of space required for the VF(n) BAR space, the VF BAR value is fixed
+and cannot be changed.
+
+On the other hand, if the PCI core allocates additional space, the VF BAR
+value can be changed as long as the entire VF(n) BAR space remains inside
+the space allocated by the core.
+
+Ideally the segment size will be the same as an individual VF BAR size.
+Then each VF will be in its own PE. The VF BARs (and therefore the PE#s)
+are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we
+allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
+
+If the segment size is smaller than the VF BAR size, it will take several
+segments to cover a VF BAR, and a VF will be in several PEs. This is
+possible, but the isolation isn't as good, and it reduces the number of PE#
+choices because instead of consuming only numVFs segments, the VF(n) BAR
+space will consume (numVFs * n) segments. That means there aren't as many
+available segments for adjusting base of the VF(n) BAR space.
diff --git a/Documentation/arch/powerpc/pmu-ebb.rst b/Documentation/arch/powerpc/pmu-ebb.rst
new file mode 100644
index 000000000000..4f474758eb55
--- /dev/null
+++ b/Documentation/arch/powerpc/pmu-ebb.rst
@@ -0,0 +1,138 @@
+========================
+PMU Event Based Branches
+========================
+
+Event Based Branches (EBBs) are a feature which allows the hardware to
+branch directly to a specified user space address when certain events occur.
+
+The full specification is available in Power ISA v2.07:
+
+ https://www.power.org/documentation/power-isa-version-2-07/
+
+One type of event for which EBBs can be configured is PMU exceptions. This
+document describes the API for configuring the Power PMU to generate EBBs,
+using the Linux perf_events API.
+
+
+Terminology
+-----------
+
+Throughout this document we will refer to an "EBB event" or "EBB events". This
+just refers to a struct perf_event which has set the "EBB" flag in its
+attr.config. All events which can be configured on the hardware PMU are
+possible "EBB events".
+
+
+Background
+----------
+
+When a PMU EBB occurs it is delivered to the currently running process. As such
+EBBs can only sensibly be used by programs for self-monitoring.
+
+It is a feature of the perf_events API that events can be created on other
+processes, subject to standard permission checks. This is also true of EBB
+events, however unless the target process enables EBBs (via mtspr(BESCR)) no
+EBBs will ever be delivered.
+
+This makes it possible for a process to enable EBBs for itself, but not
+actually configure any events. At a later time another process can come along
+and attach an EBB event to the process, which will then cause EBBs to be
+delivered to the first process. It's not clear if this is actually useful.
+
+
+When the PMU is configured for EBBs, all PMU interrupts are delivered to the
+user process. This means once an EBB event is scheduled on the PMU, no non-EBB
+events can be configured. This means that EBB events can not be run
+concurrently with regular 'perf' commands, or any other perf events.
+
+It is however safe to run 'perf' commands on a process which is using EBBs. The
+kernel will in general schedule the EBB event, and perf will be notified that
+its events could not run.
+
+The exclusion between EBB events and regular events is implemented using the
+existing "pinned" and "exclusive" attributes of perf_events. This means EBB
+events will be given priority over other events, unless they are also pinned.
+If an EBB event and a regular event are both pinned, then whichever is enabled
+first will be scheduled and the other will be put in error state. See the
+section below titled "Enabling an EBB event" for more information.
+
+
+Creating an EBB event
+---------------------
+
+To request that an event is counted using EBB, the event code should have bit
+63 set.
+
+EBB events must be created with a particular, and restrictive, set of
+attributes - this is so that they interoperate correctly with the rest of the
+perf_events subsystem.
+
+An EBB event must be created with the "pinned" and "exclusive" attributes set.
+Note that if you are creating a group of EBB events, only the leader can have
+these attributes set.
+
+An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
+"enable_on_exec" attributes.
+
+An EBB event must be attached to a task. This is specified to perf_event_open()
+by passing a pid value, typically 0 indicating the current task.
+
+All events in a group must agree on whether they want EBB. That is all events
+must request EBB, or none may request EBB.
+
+EBB events must specify the PMC they are to be counted on. This ensures
+userspace is able to reliably determine which PMC the event is scheduled on.
+
+
+Enabling an EBB event
+---------------------
+
+Once an EBB event has been successfully opened, it must be enabled with the
+perf_events API. This can be achieved either via the ioctl() interface, or the
+prctl() interface.
+
+However, due to the design of the perf_events API, enabling an event does not
+guarantee that it has been scheduled on the PMU. To ensure that the EBB event
+has been scheduled on the PMU, you must perform a read() on the event. If the
+read() returns EOF, then the event has not been scheduled and EBBs are not
+enabled.
+
+This behaviour occurs because the EBB event is pinned and exclusive. When the
+EBB event is enabled it will force all other non-pinned events off the PMU. In
+this case the enable will be successful. However if there is already an event
+pinned on the PMU then the enable will not be successful.
+
+
+Reading an EBB event
+--------------------
+
+It is possible to read() from an EBB event. However the results are
+meaningless. Because interrupts are being delivered to the user process the
+kernel is not able to count the event, and so will return a junk value.
+
+
+Closing an EBB event
+--------------------
+
+When an EBB event is finished with, you can close it using close() as for any
+regular event. If this is the last EBB event the PMU will be deconfigured and
+no further PMU EBBs will be delivered.
+
+
+EBB Handler
+-----------
+
+The EBB handler is just regular userspace code, however it must be written in
+the style of an interrupt handler. When the handler is entered all registers
+are live (possibly) and so must be saved somehow before the handler can invoke
+other code.
+
+It's up to the program how to handle this. For C programs a relatively simple
+option is to create an interrupt frame on the stack and save registers there.
+
+Fork
+----
+
+EBB events are not inherited across fork. If the child process wishes to use
+EBBs it should open a new event for itself. Similarly the EBB state in
+BESCR/EBBHR/EBBRR is cleared across fork().
diff --git a/Documentation/arch/powerpc/ptrace.rst b/Documentation/arch/powerpc/ptrace.rst
new file mode 100644
index 000000000000..5629edf4d56e
--- /dev/null
+++ b/Documentation/arch/powerpc/ptrace.rst
@@ -0,0 +1,157 @@
+======
+Ptrace
+======
+
+GDB intends to support the following hardware debug features of BookE
+processors:
+
+4 hardware breakpoints (IAC)
+2 hardware watchpoints (read, write and read-write) (DAC)
+2 value conditions for the hardware watchpoints (DVC)
+
+For that, we need to extend ptrace so that GDB can query and set these
+resources. Since we're extending, we're trying to create an interface
+that's extendable and that covers both BookE and server processors, so
+that GDB doesn't need to special-case each of them. We added the
+following 3 new ptrace requests.
+
+1. PPC_PTRACE_GETHWDBGINFO
+============================
+
+Query for GDB to discover the hardware debug features. The main info to
+be returned here is the minimum alignment for the hardware watchpoints.
+BookE processors don't have restrictions here, but server processors have
+an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
+adding special cases to GDB based on what it sees in AUXV.
+
+Since we're at it, we added other useful info that the kernel can return to
+GDB: this query will return the number of hardware breakpoints, hardware
+watchpoints and whether it supports a range of addresses and a condition.
+The query will fill the following structure provided by the requesting process::
+
+ struct ppc_debug_info {
+ unit32_t version;
+ unit32_t num_instruction_bps;
+ unit32_t num_data_bps;
+ unit32_t num_condition_regs;
+ unit32_t data_bp_alignment;
+ unit32_t sizeof_condition; /* size of the DVC register */
+ uint64_t features; /* bitmask of the individual flags */
+ };
+
+features will have bits indicating whether there is support for::
+
+ #define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1
+ #define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2
+ #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4
+ #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8
+ #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
+ #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20
+
+2. PPC_PTRACE_SETHWDEBUG
+
+Sets a hardware breakpoint or watchpoint, according to the provided structure::
+
+ struct ppc_hw_breakpoint {
+ uint32_t version;
+ #define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1
+ #define PPC_BREAKPOINT_TRIGGER_READ 0x2
+ #define PPC_BREAKPOINT_TRIGGER_WRITE 0x4
+ uint32_t trigger_type; /* only some combinations allowed */
+ #define PPC_BREAKPOINT_MODE_EXACT 0x0
+ #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1
+ #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2
+ #define PPC_BREAKPOINT_MODE_MASK 0x3
+ uint32_t addr_mode; /* address match mode */
+
+ #define PPC_BREAKPOINT_CONDITION_MODE 0x3
+ #define PPC_BREAKPOINT_CONDITION_NONE 0x0
+ #define PPC_BREAKPOINT_CONDITION_AND 0x1
+ #define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */
+ #define PPC_BREAKPOINT_CONDITION_OR 0x2
+ #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
+ #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */
+ #define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16))
+ uint32_t condition_mode; /* break/watchpoint condition flags */
+
+ uint64_t addr;
+ uint64_t addr2;
+ uint64_t condition_value;
+ };
+
+A request specifies one event, not necessarily just one register to be set.
+For instance, if the request is for a watchpoint with a condition, both the
+DAC and DVC registers will be set in the same request.
+
+With this GDB can ask for all kinds of hardware breakpoints and watchpoints
+that the BookE supports. COMEFROM breakpoints available in server processors
+are not contemplated, but that is out of the scope of this work.
+
+ptrace will return an integer (handle) uniquely identifying the breakpoint or
+watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG
+request to ask for its removal. Return -ENOSPC if the requested breakpoint
+can't be allocated on the registers.
+
+Some examples of using the structure to:
+
+- set a breakpoint in the first breakpoint register::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers on reads in the second watchpoint register::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers only with a specific value::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = (uint64_t) condition;
+
+- set a ranged hardware breakpoint::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+- set a watchpoint in server processors (BookS)::
+
+ p.version = 1;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ or
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+ * addr2 - addr <= 8 Bytes.
+ */
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+3. PPC_PTRACE_DELHWDEBUG
+
+Takes an integer which identifies an existing breakpoint or watchpoint
+(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
+corresponding breakpoint or watchpoint..
diff --git a/Documentation/arch/powerpc/qe_firmware.rst b/Documentation/arch/powerpc/qe_firmware.rst
new file mode 100644
index 000000000000..a358f152b7e7
--- /dev/null
+++ b/Documentation/arch/powerpc/qe_firmware.rst
@@ -0,0 +1,296 @@
+=========================================
+Freescale QUICC Engine Firmware Uploading
+=========================================
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+ Freescale Semiconductor
+
+.. Table of Contents
+
+ I - Software License for Firmware
+
+ II - Microcode Availability
+
+ III - Description and Terminology
+
+ IV - Microcode Programming Details
+
+ V - Firmware Structure Layout
+
+ VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license. For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels. Some are available on
+http://opensource.freescale.com. For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+=================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+ 1) describes the microcode's purpose
+ 2) describes how and where to upload the microcode
+ 3) specifies the values of various registers
+ 4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor. To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+ IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+ needs split I-RAM. Split I-RAM is only meaningful for SOCs that have
+ QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM
+ allows each processor to run a different microcode, effectively creating an
+ asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+ in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+ data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode. These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated. This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined. This table describes the structure::
+
+ ---------------------------------------------------------------
+ | Offset in | | Destination Offset | Size of |
+ | array | Protocol | within PRAM | Operand |
+ --------------------------------------------------------------|
+ | 0 | Ethernet | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 4 | ATM | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 8 | PPP | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 12 | Ethernet RX | 0x22 | 1 byte |
+ | | Distributor Page | | |
+ ---------------------------------------------------------------
+ | 16 | ATM Globtal | 0x28 | 1 byte |
+ | | Params Table | | |
+ ---------------------------------------------------------------
+ | 20 | Insert Frame | 0xF8 | 4 bytes |
+ ---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the software drivers. Each bit has its own impact
+and has special instructions for the s/w associated with it. This structure is
+described in this table::
+
+ -----------------------------------------------------------------------
+ | Bit # | Name | Description |
+ -----------------------------------------------------------------------
+ | 0 | General | Indicates that prior to each host command |
+ | | push command | given by the application, the software must |
+ | | | assert a special host command (push command)|
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 1 | UCC ATM | Indicates that after issuing ATM RX INIT |
+ | | RX INIT | command, the host must issue another special|
+ | | push command | command (push command) and immediately |
+ | | | following that re-issue the ATM RX INIT |
+ | | | command. (This makes the sequence of |
+ | | | initializing the ATM receiver a sequence of |
+ | | | three host commands) |
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 2 | Add/remove | Indicates that following the specific host |
+ | | command | command: "Add/Remove entry in Hash Lookup |
+ | | validation | Table" used in Interworking setup, the user |
+ | | | must issue another command. |
+ | | | CECDR = 0xce000003. |
+ | | | CECR = 0x01c10f58. |
+ -----------------------------------------------------------------------
+ | 3 | General push | Indicates that the s/w has to initialize |
+ | | command | some pointers in the Ethernet thread pages |
+ | | | which are used when Header Compression is |
+ | | | activated. The full details of these |
+ | | | pointers is located in the software drivers.|
+ -----------------------------------------------------------------------
+ | 4 | General push | Indicates that after issuing Ethernet TX |
+ | | command | INIT command, user must issue this command |
+ | | | for each SNUM of Ethernet TX thread. |
+ | | | CECDR = 0x00800003. |
+ | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, |
+ | | | 1'b{1}, 12'b{0}, 4'b{1} |
+ -----------------------------------------------------------------------
+ | 5 - 31 | N/A | Reserved, set to zero. |
+ -----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file. This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode. The format of these files
+do not lend themselves to simple inclusion into other code. Hence,
+the need for a more portable format. This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob. This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian. See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself. To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+ The 'length' field is the size, in bytes, of the entire structure,
+ including all the microcode embedded in it, as well as the CRC (if
+ present).
+
+ The 'magic' field is an array of three bytes that contains the letters
+ 'Q', 'E', and 'F'. This is an identifier that indicates that this
+ structure is a QE Firmware structure.
+
+ The 'version' field is a single byte that indicates the version of this
+ structure. If the layout of the structure should ever need to be
+ changed to add support for additional types of microcode, then the
+ version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures. There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself. Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match. However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbers,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0::
+
+ soc.model = 8323
+ soc.major = 1
+ soc.minor = 0
+
+'padding' is necessary for structure alignment. This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers. Each bit has its own impact and has special
+instructions for the driver associated with it. This field is stored in
+the QE library and available to any driver that calls qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps. As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calls qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+ For each RISC processor there is one 'microcode' structure. The first
+ 'microcode' structure is for the first RISC, and so on.
+
+ The 'id' field is a null-terminated string suitable for printing that
+ identifies this particular microcode.
+
+ 'traps' is an array of 16 words that contain hardware trap values
+ for each of the 16 traps. If trap[i] is 0, then this particular
+ trap is to be ignored (i.e. not written to TIBCR[i]). The entire value
+ is written as-is to the TIBCR[i] register, so be sure to set the EN
+ and T_IBP bits if necessary.
+
+ 'eccr' is the value to program into the ECCR register.
+
+ 'iram_offset' is the offset into IRAM to start writing the
+ microcode.
+
+ 'count' is the number of 32-bit words in the microcode.
+
+ 'code_offset' is the offset, in bytes, from the beginning of this
+ structure where the microcode itself can be found. The first
+ microcode binary should be located immediately after the 'microcode'
+ array.
+
+ 'major', 'minor', and 'revision' are the major, minor, and revision
+ version numbers, respectively, of the microcode. If all values are 0,
+ then these fields are ignored.
+
+ 'reserved' is necessary for structure alignment. Since 'microcode'
+ is an array, the 64-bit 'extended_modes' field needs to be aligned
+ on a 64-bit boundary, and this can only happen if the size of
+ 'microcode' is a multiple of 8 bytes. To ensure that, we add
+ 'reserved'.
+
+After the last microcode is a 32-bit CRC. It can be calculated using
+this algorithm::
+
+ u32 crc32(const u8 *p, unsigned int len)
+ {
+ unsigned int i;
+ u32 crc = 0;
+
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+ }
+ return crc;
+ }
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/Documentation/arch/powerpc/syscall64-abi.rst b/Documentation/arch/powerpc/syscall64-abi.rst
new file mode 100644
index 000000000000..56490c4c0c07
--- /dev/null
+++ b/Documentation/arch/powerpc/syscall64-abi.rst
@@ -0,0 +1,153 @@
+===============================================
+Power Architecture 64-bit Linux system call ABI
+===============================================
+
+syscall
+=======
+
+Invocation
+----------
+The syscall is made with the sc instruction, and returns with execution
+continuing at the instruction following the sc instruction.
+
+If PPC_FEATURE2_SCV appears in the AT_HWCAP2 ELF auxiliary vector, the
+scv 0 instruction is an alternative that may provide better performance,
+with some differences to calling sequence.
+
+syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI
+specification C function calling sequence, including register preservation
+rules, with the following differences.
+
+.. [1] Some syscalls (typically low-level management functions) may have
+ different calling sequences (e.g., rt_sigreturn).
+
+Parameters
+----------
+The system call number is specified in r0.
+
+There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
+
+Return value
+------------
+- For the sc instruction, both a value and an error condition are returned.
+ cr0.SO is the error condition, and r3 is the return value. When cr0.SO is
+ clear, the syscall succeeded and r3 is the return value. When cr0.SO is set,
+ the syscall failed and r3 is the error value (that normally corresponds to
+ errno).
+
+- For the scv 0 instruction, the return value indicates failure if it is
+ -4095..-1 (i.e., it is >= -MAX_ERRNO (-4095) as an unsigned comparison),
+ in which case the error value is the negated return value.
+
+Stack
+-----
+System calls do not modify the caller's stack frame. For example, the caller's
+stack frame LR and CR save fields are not used.
+
+Register preservation rules
+---------------------------
+Register preservation rules match the ELF ABI calling sequence with some
+differences.
+
+For the sc instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register | Preservation Rules | Purpose |
++==============+====================+=========================================+
+| r0 | Volatile | (System call number.) |
++--------------+--------------------+-----------------------------------------+
+| r3 | Volatile | (Parameter 1, and return value.) |
++--------------+--------------------+-----------------------------------------+
+| r4-r8 | Volatile | (Parameters 2-6.) |
++--------------+--------------------+-----------------------------------------+
+| cr0 | Volatile | (cr0.SO is the return error condition.) |
++--------------+--------------------+-----------------------------------------+
+| cr1, cr5-7 | Nonvolatile | |
++--------------+--------------------+-----------------------------------------+
+| lr | Nonvolatile | |
++--------------+--------------------+-----------------------------------------+
+
+For the scv 0 instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register | Preservation Rules | Purpose |
++==============+====================+=========================================+
+| r0 | Volatile | (System call number.) |
++--------------+--------------------+-----------------------------------------+
+| r3 | Volatile | (Parameter 1, and return value.) |
++--------------+--------------------+-----------------------------------------+
+| r4-r8 | Volatile | (Parameters 2-6.) |
++--------------+--------------------+-----------------------------------------+
+
+All floating point and vector data registers as well as control and status
+registers are nonvolatile.
+
+Transactional Memory
+--------------------
+Syscall behavior can change if the processor is in transactional or suspended
+transaction state, and the syscall can affect the behavior of the transaction.
+
+If the processor is in suspended state when a syscall is made, the syscall
+will be performed as normal, and will return as normal. The syscall will be
+performed in suspended state, so its side effects will be persistent according
+to the usual transactional memory semantics. A syscall may or may not result
+in the transaction being doomed by hardware.
+
+If the processor is in transactional state when a syscall is made, then the
+behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF
+auxiliary vector.
+
+- If present, which is the case for newer kernels, then the syscall will not
+ be performed and the transaction will be doomed by the kernel with the
+ failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR.
+
+- If not present (older kernels), then the kernel will suspend the
+ transactional state and the syscall will proceed as in the case of a
+ suspended state syscall, and will resume the transactional state before
+ returning to the caller. This case is not well defined or supported, so this
+ behavior should not be relied upon.
+
+scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC.
+
+ptrace
+------
+When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains
+the system call type that can be used to distinguish between sc and scv 0
+system calls, and the different register conventions can be accounted for.
+
+If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was
+performed with the sc instruction, if it is 0x3000 then the system call was
+performed with the scv 0 instruction.
+
+vsyscall
+========
+
+vsyscall calling sequence matches the syscall calling sequence, with the
+following differences. Some vsyscalls may have different calling sequences.
+
+Parameters and return value
+---------------------------
+r0 is not used as an input. The vsyscall is selected by its address.
+
+Stack
+-----
+The vsyscall may or may not use the caller's stack frame save areas.
+
+Register preservation rules
+---------------------------
+
+=========== ========
+r0 Volatile
+cr1, cr5-7 Volatile
+lr Volatile
+=========== ========
+
+Invocation
+----------
+The vsyscall is performed with a branch-with-link instruction to the vsyscall
+function address.
+
+Transactional Memory
+--------------------
+vsyscalls will run in the same transactional state as the caller. A vsyscall
+may or may not result in the transaction being doomed by hardware.
diff --git a/Documentation/arch/powerpc/transactional_memory.rst b/Documentation/arch/powerpc/transactional_memory.rst
new file mode 100644
index 000000000000..040a20675fd1
--- /dev/null
+++ b/Documentation/arch/powerpc/transactional_memory.rst
@@ -0,0 +1,274 @@
+============================
+Transactional Memory support
+============================
+
+POWER kernel support for this feature is currently limited to supporting
+its use by user programs. It is not currently used by the kernel itself.
+
+This file aims to sum up how it is supported by Linux and what behaviour you
+can expect from your user programs.
+
+
+Basic overview
+==============
+
+Hardware Transactional Memory is supported on POWER8 processors, and is a
+feature that enables a different form of atomic memory access. Several new
+instructions are presented to delimit transactions; transactions are
+guaranteed to either complete atomically or roll back and undo any partial
+changes.
+
+A simple transaction looks like this::
+
+ begin_move_money:
+ tbegin
+ beq abort_handler
+
+ ld r4, SAVINGS_ACCT(r3)
+ ld r5, CURRENT_ACCT(r3)
+ subi r5, r5, 1
+ addi r4, r4, 1
+ std r4, SAVINGS_ACCT(r3)
+ std r5, CURRENT_ACCT(r3)
+
+ tend
+
+ b continue
+
+ abort_handler:
+ ... test for odd failures ...
+
+ /* Retry the transaction if it failed because it conflicted with
+ * someone else: */
+ b begin_move_money
+
+
+The 'tbegin' instruction denotes the start point, and 'tend' the end point.
+Between these points the processor is in 'Transactional' state; any memory
+references will complete in one go if there are no conflicts with other
+transactional or non-transactional accesses within the system. In this
+example, the transaction completes as though it were normal straight-line code
+IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
+atomic move of money from the current account to the savings account has been
+performed. Even though the normal ld/std instructions are used (note no
+lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
+updated, or neither will be updated.
+
+If, in the meantime, there is a conflict with the locations accessed by the
+transaction, the transaction will be aborted by the CPU. Register and memory
+state will roll back to that at the 'tbegin', and control will continue from
+'tbegin+4'. The branch to abort_handler will be taken this second time; the
+abort handler can check the cause of the failure, and retry.
+
+Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
+and a few other status/flag regs; see the ISA for details.
+
+Causes of transaction aborts
+============================
+
+- Conflicts with cache lines used by other processors
+- Signals
+- Context switches
+- See the ISA for full documentation of everything that will abort transactions.
+
+
+Syscalls
+========
+
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
+
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel. However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware. The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure. No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
+
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library. Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
+
+
+Signals
+=======
+
+Delivery of signals (both sync and async) during transactions provides a second
+thread state (ucontext/mcontext) to represent the second transactional register
+state. Signal delivery 'treclaim's to capture both register states, so signals
+abort transactions. The usual ucontext_t passed to the signal handler
+represents the checkpointed/original register state; the signal appears to have
+arisen at 'tbegin+4'.
+
+If the sighandler ucontext has uc_link set, a second ucontext has been
+delivered. For future compatibility the MSR.TS field should be checked to
+determine the transactional state -- if so, the second ucontext in uc->uc_link
+represents the active transactional registers at the point of the signal.
+
+For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
+field shows the transactional mode.
+
+For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
+bits are stored in the MSR of the second ucontext, i.e. in
+uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional
+state TS.
+
+However, basic signal handlers don't need to be aware of transactions
+and simply returning from the handler will deal with things correctly:
+
+Transaction-aware signal handlers can read the transactional register state
+from the second ucontext. This will be necessary for crash handlers to
+determine, for example, the address of the instruction causing the SIGSEGV.
+
+Example signal handler::
+
+ void crash_handler(int sig, siginfo_t *si, void *uc)
+ {
+ ucontext_t *ucp = uc;
+ ucontext_t *transactional_ucp = ucp->uc_link;
+
+ if (ucp_link) {
+ u64 msr = ucp->uc_mcontext.regs->msr;
+ /* May have transactional ucontext! */
+ #ifndef __powerpc64__
+ msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
+ #endif
+ if (MSR_TM_ACTIVE(msr)) {
+ /* Yes, we crashed during a transaction. Oops. */
+ fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
+ "crashy instruction was at 0x%llx\n",
+ ucp->uc_mcontext.regs->nip,
+ transactional_ucp->uc_mcontext.regs->nip);
+ }
+ }
+
+ fix_the_problem(ucp->dar);
+ }
+
+When in an active transaction that takes a signal, we need to be careful with
+the stack. It's possible that the stack has moved back up after the tbegin.
+The obvious case here is when the tbegin is called inside a function that
+returns before a tend. In this case, the stack is part of the checkpointed
+transactional memory state. If we write over this non transactionally or in
+suspend, we are in trouble because if we get a tm abort, the program counter and
+stack pointer will be back at the tbegin but our in memory stack won't be valid
+anymore.
+
+To avoid this, when taking a signal in an active transaction, we need to use
+the stack pointer from the checkpointed state, rather than the speculated
+state. This ensures that the signal context (written tm suspended) will be
+written below the stack required for the rollback. The transaction is aborted
+because of the treclaim, so any memory written between the tbegin and the
+signal will be rolled back anyway.
+
+For signals taken in non-TM or suspended mode, we use the
+normal/non-checkpointed stack pointer.
+
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
+
+Failure cause codes used by kernel
+==================================
+
+These are defined in <asm/reg.h>, and distinguish different reasons why the
+kernel aborted a transaction:
+
+ ====================== ================================
+ TM_CAUSE_RESCHED Thread was rescheduled.
+ TM_CAUSE_TLBI Software TLB invalid.
+ TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap.
+ TM_CAUSE_SYSCALL Syscall from active transaction.
+ TM_CAUSE_SIGNAL Signal delivered.
+ TM_CAUSE_MISC Currently unused.
+ TM_CAUSE_ALIGNMENT Alignment fault.
+ TM_CAUSE_EMULATE Emulation that touched memory.
+ ====================== ================================
+
+These can be checked by the user program's abort handler as TEXASR[0:7]. If
+bit 7 is set, it indicates that the error is considered persistent. For example
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
+
+GDB
+===
+
+GDB and ptrace are not currently TM-aware. If one stops during a transaction,
+it looks like the transaction has just started (the checkpointed state is
+presented). The transaction cannot then be continued and will take the failure
+handler route. Furthermore, the transactional 2nd register state will be
+inaccessible. GDB can currently be used on programs using TM, but not sensibly
+in parts within transactions.
+
+POWER9
+======
+
+TM on POWER9 has issues with storing the complete register state. This
+is described in this commit::
+
+ commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
+ Author: Paul Mackerras <paulus@ozlabs.org>
+ Date: Wed Mar 21 21:32:01 2018 +1100
+ KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
+
+To account for this different POWER9 chips have TM enabled in
+different ways.
+
+On POWER9N DD2.01 and below, TM is disabled. ie
+HWCAP2[PPC_FEATURE2_HTM] is not set.
+
+On POWER9N DD2.1 TM is configured by firmware to always abort a
+transaction when tm suspend occurs. So tsuspend will cause a
+transaction to be aborted and rolled back. Kernel exceptions will also
+cause the transaction to be aborted and rolled back and the exception
+will not occur. If userspace constructs a sigcontext that enables TM
+suspend, the sigcontext will be rejected by the kernel. This mode is
+advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set.
+HWCAP2[PPC_FEATURE2_HTM] is not set in this mode.
+
+On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as
+described in commit 4bb3c7a0208f), hence TM is enabled for guests
+ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that
+makes heavy use of TM suspend (tsuspend or kernel suspend) will result
+in traps into the hypervisor and hence will suffer a performance
+degradation. Host userspace has TM disabled
+ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it
+at some point in the future if we bring the emulation into host
+userspace context switching).
+
+POWER9C DD1.2 and above are only available with POWERVM and hence
+Linux only runs as a guest. On these systems TM is emulated like on
+POWER9N DD2.2.
+
+Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and
+POWER9C DD1.2. Since earlier POWER9 processors don't support TM
+emulation, migration from POWER8 to POWER9 is not supported there.
+
+Kernel implementation
+=====================
+
+h/rfid mtmsrd quirk
+-------------------
+
+As defined in the ISA, rfid has a quirk which is useful in early
+exception handling. When in a userspace transaction and we enter the
+kernel via some exception, MSR will end up as TM=0 and TS=01 (ie. TM
+off but TM suspended). Regularly the kernel will want change bits in
+the MSR and will perform an rfid to do this. In this case rfid can
+have SRR0 TM = 0 and TS = 00 (ie. TM off and non transaction) and the
+resulting MSR will retain TM = 0 and TS=01 from before (ie. stay in
+suspend). This is a quirk in the architecture as this would normally
+be a transition from TS=01 to TS=00 (ie. suspend -> non transactional)
+which is an illegal transition.
+
+This quirk is described the architecture in the definition of rfid
+with these lines:
+
+ if (MSR 29:31 ¬ = 0b010 | SRR1 29:31 ¬ = 0b000) then
+ MSR 29:31 <- SRR1 29:31
+
+hrfid and mtmsrd have the same quirk.
+
+The Linux kernel uses this quirk in its early exception handling.
diff --git a/Documentation/arch/powerpc/ultravisor.rst b/Documentation/arch/powerpc/ultravisor.rst
new file mode 100644
index 000000000000..6d0407b2f5a1
--- /dev/null
+++ b/Documentation/arch/powerpc/ultravisor.rst
@@ -0,0 +1,1117 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _ultravisor:
+
+============================
+Protected Execution Facility
+============================
+
+.. contents::
+ :depth: 3
+
+Introduction
+############
+
+ Protected Execution Facility (PEF) is an architectural change for
+ POWER 9 that enables Secure Virtual Machines (SVMs). DD2.3 chips
+ (PVR=0x004e1203) or greater will be PEF-capable. A new ISA release
+ will include the PEF RFC02487 changes.
+
+ When enabled, PEF adds a new higher privileged mode, called Ultravisor
+ mode, to POWER architecture. Along with the new mode there is new
+ firmware called the Protected Execution Ultravisor (or Ultravisor
+ for short). Ultravisor mode is the highest privileged mode in POWER
+ architecture.
+
+ +------------------+
+ | Privilege States |
+ +==================+
+ | Problem |
+ +------------------+
+ | Supervisor |
+ +------------------+
+ | Hypervisor |
+ +------------------+
+ | Ultravisor |
+ +------------------+
+
+ PEF protects SVMs from the hypervisor, privileged users, and other
+ VMs in the system. SVMs are protected while at rest and can only be
+ executed by an authorized machine. All virtual machines utilize
+ hypervisor services. The Ultravisor filters calls between the SVMs
+ and the hypervisor to assure that information does not accidentally
+ leak. All hypercalls except H_RANDOM are reflected to the hypervisor.
+ H_RANDOM is not reflected to prevent the hypervisor from influencing
+ random values in the SVM.
+
+ To support this there is a refactoring of the ownership of resources
+ in the CPU. Some of the resources which were previously hypervisor
+ privileged are now ultravisor privileged.
+
+Hardware
+========
+
+ The hardware changes include the following:
+
+ * There is a new bit in the MSR that determines whether the current
+ process is running in secure mode, MSR(S) bit 41. MSR(S)=1, process
+ is in secure mode, MSR(s)=0 process is in normal mode.
+
+ * The MSR(S) bit can only be set by the Ultravisor.
+
+ * HRFID cannot be used to set the MSR(S) bit. If the hypervisor needs
+ to return to a SVM it must use an ultracall. It can determine if
+ the VM it is returning to is secure.
+
+ * There is a new Ultravisor privileged register, SMFCTRL, which has an
+ enable/disable bit SMFCTRL(E).
+
+ * The privilege of a process is now determined by three MSR bits,
+ MSR(S, HV, PR). In each of the tables below the modes are listed
+ from least privilege to highest privilege. The higher privilege
+ modes can access all the resources of the lower privilege modes.
+
+ **Secure Mode MSR Settings**
+
+ +---+---+---+---------------+
+ | S | HV| PR|Privilege |
+ +===+===+===+===============+
+ | 1 | 0 | 1 | Problem |
+ +---+---+---+---------------+
+ | 1 | 0 | 0 | Privileged(OS)|
+ +---+---+---+---------------+
+ | 1 | 1 | 0 | Ultravisor |
+ +---+---+---+---------------+
+ | 1 | 1 | 1 | Reserved |
+ +---+---+---+---------------+
+
+ **Normal Mode MSR Settings**
+
+ +---+---+---+---------------+
+ | S | HV| PR|Privilege |
+ +===+===+===+===============+
+ | 0 | 0 | 1 | Problem |
+ +---+---+---+---------------+
+ | 0 | 0 | 0 | Privileged(OS)|
+ +---+---+---+---------------+
+ | 0 | 1 | 0 | Hypervisor |
+ +---+---+---+---------------+
+ | 0 | 1 | 1 | Problem (Host)|
+ +---+---+---+---------------+
+
+ * Memory is partitioned into secure and normal memory. Only processes
+ that are running in secure mode can access secure memory.
+
+ * The hardware does not allow anything that is not running secure to
+ access secure memory. This means that the Hypervisor cannot access
+ the memory of the SVM without using an ultracall (asking the
+ Ultravisor). The Ultravisor will only allow the hypervisor to see
+ the SVM memory encrypted.
+
+ * I/O systems are not allowed to directly address secure memory. This
+ limits the SVMs to virtual I/O only.
+
+ * The architecture allows the SVM to share pages of memory with the
+ hypervisor that are not protected with encryption. However, this
+ sharing must be initiated by the SVM.
+
+ * When a process is running in secure mode all hypercalls
+ (syscall lev=1) go to the Ultravisor.
+
+ * When a process is in secure mode all interrupts go to the
+ Ultravisor.
+
+ * The following resources have become Ultravisor privileged and
+ require an Ultravisor interface to manipulate:
+
+ * Processor configurations registers (SCOMs).
+
+ * Stop state information.
+
+ * The debug registers CIABR, DAWR, and DAWRX when SMFCTRL(D) is set.
+ If SMFCTRL(D) is not set they do not work in secure mode. When set,
+ reading and writing requires an Ultravisor call, otherwise that
+ will cause a Hypervisor Emulation Assistance interrupt.
+
+ * PTCR and partition table entries (partition table is in secure
+ memory). An attempt to write to PTCR will cause a Hypervisor
+ Emulation Assistance interrupt.
+
+ * LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
+ non-architected registers. An attempt to write to them will cause a
+ Hypervisor Emulation Assistance interrupt.
+
+ * Paging for an SVM, sharing of memory with Hypervisor for an SVM.
+ (Including Virtual Processor Area (VPA) and virtual I/O).
+
+
+Software/Microcode
+==================
+
+ The software changes include:
+
+ * SVMs are created from normal VM using (open source) tooling supplied
+ by IBM.
+
+ * All SVMs start as normal VMs and utilize an ultracall, UV_ESM
+ (Enter Secure Mode), to make the transition.
+
+ * When the UV_ESM ultracall is made the Ultravisor copies the VM into
+ secure memory, decrypts the verification information, and checks the
+ integrity of the SVM. If the integrity check passes the Ultravisor
+ passes control in secure mode.
+
+ * The verification information includes the pass phrase for the
+ encrypted disk associated with the SVM. This pass phrase is given
+ to the SVM when requested.
+
+ * The Ultravisor is not involved in protecting the encrypted disk of
+ the SVM while at rest.
+
+ * For external interrupts the Ultravisor saves the state of the SVM,
+ and reflects the interrupt to the hypervisor for processing.
+ For hypercalls, the Ultravisor inserts neutral state into all
+ registers not needed for the hypercall then reflects the call to
+ the hypervisor for processing. The H_RANDOM hypercall is performed
+ by the Ultravisor and not reflected.
+
+ * For virtual I/O to work bounce buffering must be done.
+
+ * The Ultravisor uses AES (IAPM) for protection of SVM memory. IAPM
+ is a mode of AES that provides integrity and secrecy concurrently.
+
+ * The movement of data between normal and secure pages is coordinated
+ with the Ultravisor by a new HMM plug-in in the Hypervisor.
+
+ The Ultravisor offers new services to the hypervisor and SVMs. These
+ are accessed through ultracalls.
+
+Terminology
+===========
+
+ * Hypercalls: special system calls used to request services from
+ Hypervisor.
+
+ * Normal memory: Memory that is accessible to Hypervisor.
+
+ * Normal page: Page backed by normal memory and available to
+ Hypervisor.
+
+ * Shared page: A page backed by normal memory and available to both
+ the Hypervisor/QEMU and the SVM (i.e page has mappings in SVM and
+ Hypervisor/QEMU).
+
+ * Secure memory: Memory that is accessible only to Ultravisor and
+ SVMs.
+
+ * Secure page: Page backed by secure memory and only available to
+ Ultravisor and SVM.
+
+ * SVM: Secure Virtual Machine.
+
+ * Ultracalls: special system calls used to request services from
+ Ultravisor.
+
+
+Ultravisor calls API
+####################
+
+ This section describes Ultravisor calls (ultracalls) needed to
+ support Secure Virtual Machines (SVM)s and Paravirtualized KVM. The
+ ultracalls allow the SVMs and Hypervisor to request services from the
+ Ultravisor such as accessing a register or memory region that can only
+ be accessed when running in Ultravisor-privileged mode.
+
+ The specific service needed from an ultracall is specified in register
+ R3 (the first parameter to the ultracall). Other parameters to the
+ ultracall, if any, are specified in registers R4 through R12.
+
+ Return value of all ultracalls is in register R3. Other output values
+ from the ultracall, if any, are returned in registers R4 through R12.
+ The only exception to this register usage is the ``UV_RETURN``
+ ultracall described below.
+
+ Each ultracall returns specific error codes, applicable in the context
+ of the ultracall. However, like with the PowerPC Architecture Platform
+ Reference (PAPR), if no specific error code is defined for a
+ particular situation, then the ultracall will fallback to an erroneous
+ parameter-position based code. i.e U_PARAMETER, U_P2, U_P3 etc
+ depending on the ultracall parameter that may have caused the error.
+
+ Some ultracalls involve transferring a page of data between Ultravisor
+ and Hypervisor. Secure pages that are transferred from secure memory
+ to normal memory may be encrypted using dynamically generated keys.
+ When the secure pages are transferred back to secure memory, they may
+ be decrypted using the same dynamically generated keys. Generation and
+ management of these keys will be covered in a separate document.
+
+ For now this only covers ultracalls currently implemented and being
+ used by Hypervisor and SVMs but others can be added here when it
+ makes sense.
+
+ The full specification for all hypercalls/ultracalls will eventually
+ be made available in the public/OpenPower version of the PAPR
+ specification.
+
+ .. note::
+
+ If PEF is not enabled, the ultracalls will be redirected to the
+ Hypervisor which must handle/fail the calls.
+
+Ultracalls used by Hypervisor
+=============================
+
+ This section describes the virtual memory management ultracalls used
+ by the Hypervisor to manage SVMs.
+
+UV_PAGE_OUT
+-----------
+
+ Encrypt and move the contents of a page from secure memory to normal
+ memory.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_PAGE_OUT,
+ uint16_t lpid, /* LPAR ID */
+ uint64_t dest_ra, /* real address of destination page */
+ uint64_t src_gpa, /* source guest-physical-address */
+ uint8_t flags, /* flags */
+ uint64_t order) /* page size order */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``dest_ra`` is invalid.
+ * U_P3 if the ``src_gpa`` address is invalid.
+ * U_P4 if any bit in the ``flags`` is unrecognized
+ * U_P5 if the ``order`` parameter is unsupported.
+ * U_FUNCTION if functionality is not supported.
+ * U_BUSY if page cannot be currently paged-out.
+
+Description
+~~~~~~~~~~~
+
+ Encrypt the contents of a secure-page and make it available to
+ Hypervisor in a normal page.
+
+ By default, the source page is unmapped from the SVM's partition-
+ scoped page table. But the Hypervisor can provide a hint to the
+ Ultravisor to retain the page mapping by setting the ``UV_SNAPSHOT``
+ flag in ``flags`` parameter.
+
+ If the source page is already a shared page the call returns
+ U_SUCCESS, without doing anything.
+
+Use cases
+~~~~~~~~~
+
+ #. QEMU attempts to access an address belonging to the SVM but the
+ page frame for that address is not mapped into QEMU's address
+ space. In this case, the Hypervisor will allocate a page frame,
+ map it into QEMU's address space and issue the ``UV_PAGE_OUT``
+ call to retrieve the encrypted contents of the page.
+
+ #. When Ultravisor runs low on secure memory and it needs to page-out
+ an LRU page. In this case, Ultravisor will issue the
+ ``H_SVM_PAGE_OUT`` hypercall to the Hypervisor. The Hypervisor will
+ then allocate a normal page and issue the ``UV_PAGE_OUT`` ultracall
+ and the Ultravisor will encrypt and move the contents of the secure
+ page into the normal page.
+
+ #. When Hypervisor accesses SVM data, the Hypervisor requests the
+ Ultravisor to transfer the corresponding page into a insecure page,
+ which the Hypervisor can access. The data in the normal page will
+ be encrypted though.
+
+UV_PAGE_IN
+----------
+
+ Move the contents of a page from normal memory to secure memory.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_PAGE_IN,
+ uint16_t lpid, /* the LPAR ID */
+ uint64_t src_ra, /* source real address of page */
+ uint64_t dest_gpa, /* destination guest physical address */
+ uint64_t flags, /* flags */
+ uint64_t order) /* page size order */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_BUSY if page cannot be currently paged-in.
+ * U_FUNCTION if functionality is not supported
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``src_ra`` is invalid.
+ * U_P3 if the ``dest_gpa`` address is invalid.
+ * U_P4 if any bit in the ``flags`` is unrecognized
+ * U_P5 if the ``order`` parameter is unsupported.
+
+Description
+~~~~~~~~~~~
+
+ Move the contents of the page identified by ``src_ra`` from normal
+ memory to secure memory and map it to the guest physical address
+ ``dest_gpa``.
+
+ If `dest_gpa` refers to a shared address, map the page into the
+ partition-scoped page-table of the SVM. If `dest_gpa` is not shared,
+ copy the contents of the page into the corresponding secure page.
+ Depending on the context, decrypt the page before being copied.
+
+ The caller provides the attributes of the page through the ``flags``
+ parameter. Valid values for ``flags`` are:
+
+ * CACHE_INHIBITED
+ * CACHE_ENABLED
+ * WRITE_PROTECTION
+
+ The Hypervisor must pin the page in memory before making
+ ``UV_PAGE_IN`` ultracall.
+
+Use cases
+~~~~~~~~~
+
+ #. When a normal VM switches to secure mode, all its pages residing
+ in normal memory, are moved into secure memory.
+
+ #. When an SVM requests to share a page with Hypervisor the Hypervisor
+ allocates a page and informs the Ultravisor.
+
+ #. When an SVM accesses a secure page that has been paged-out,
+ Ultravisor invokes the Hypervisor to locate the page. After
+ locating the page, the Hypervisor uses UV_PAGE_IN to make the
+ page available to Ultravisor.
+
+UV_PAGE_INVAL
+-------------
+
+ Invalidate the Ultravisor mapping of a page.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_PAGE_INVAL,
+ uint16_t lpid, /* the LPAR ID */
+ uint64_t guest_pa, /* destination guest-physical-address */
+ uint64_t order) /* page size order */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``guest_pa`` is invalid (or corresponds to a secure
+ page mapping).
+ * U_P3 if the ``order`` is invalid.
+ * U_FUNCTION if functionality is not supported.
+ * U_BUSY if page cannot be currently invalidated.
+
+Description
+~~~~~~~~~~~
+
+ This ultracall informs Ultravisor that the page mapping in Hypervisor
+ corresponding to the given guest physical address has been invalidated
+ and that the Ultravisor should not access the page. If the specified
+ ``guest_pa`` corresponds to a secure page, Ultravisor will ignore the
+ attempt to invalidate the page and return U_P2.
+
+Use cases
+~~~~~~~~~
+
+ #. When a shared page is unmapped from the QEMU's page table, possibly
+ because it is paged-out to disk, Ultravisor needs to know that the
+ page should not be accessed from its side too.
+
+
+UV_WRITE_PATE
+-------------
+
+ Validate and write the partition table entry (PATE) for a given
+ partition.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_WRITE_PATE,
+ uint32_t lpid, /* the LPAR ID */
+ uint64_t dw0 /* the first double word to write */
+ uint64_t dw1) /* the second double word to write */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_BUSY if PATE cannot be currently written to.
+ * U_FUNCTION if functionality is not supported.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``dw0`` is invalid.
+ * U_P3 if the ``dw1`` address is invalid.
+ * U_PERMISSION if the Hypervisor is attempting to change the PATE
+ of a secure virtual machine or if called from a
+ context other than Hypervisor.
+
+Description
+~~~~~~~~~~~
+
+ Validate and write a LPID and its partition-table-entry for the given
+ LPID. If the LPID is already allocated and initialized, this call
+ results in changing the partition table entry.
+
+Use cases
+~~~~~~~~~
+
+ #. The Partition table resides in Secure memory and its entries,
+ called PATE (Partition Table Entries), point to the partition-
+ scoped page tables for the Hypervisor as well as each of the
+ virtual machines (both secure and normal). The Hypervisor
+ operates in partition 0 and its partition-scoped page tables
+ reside in normal memory.
+
+ #. This ultracall allows the Hypervisor to register the partition-
+ scoped and process-scoped page table entries for the Hypervisor
+ and other partitions (virtual machines) with the Ultravisor.
+
+ #. If the value of the PATE for an existing partition (VM) changes,
+ the TLB cache for the partition is flushed.
+
+ #. The Hypervisor is responsible for allocating LPID. The LPID and
+ its PATE entry are registered together. The Hypervisor manages
+ the PATE entries for a normal VM and can change the PATE entry
+ anytime. Ultravisor manages the PATE entries for an SVM and
+ Hypervisor is not allowed to modify them.
+
+UV_RETURN
+---------
+
+ Return control from the Hypervisor back to the Ultravisor after
+ processing an hypercall or interrupt that was forwarded (aka
+ *reflected*) to the Hypervisor.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_RETURN)
+
+Return values
+~~~~~~~~~~~~~
+
+ This call never returns to Hypervisor on success. It returns
+ U_INVALID if ultracall is not made from a Hypervisor context.
+
+Description
+~~~~~~~~~~~
+
+ When an SVM makes an hypercall or incurs some other exception, the
+ Ultravisor usually forwards (aka *reflects*) the exceptions to the
+ Hypervisor. After processing the exception, Hypervisor uses the
+ ``UV_RETURN`` ultracall to return control back to the SVM.
+
+ The expected register state on entry to this ultracall is:
+
+ * Non-volatile registers are restored to their original values.
+ * If returning from an hypercall, register R0 contains the return
+ value (**unlike other ultracalls**) and, registers R4 through R12
+ contain any output values of the hypercall.
+ * R3 contains the ultracall number, i.e UV_RETURN.
+ * If returning with a synthesized interrupt, R2 contains the
+ synthesized interrupt number.
+
+Use cases
+~~~~~~~~~
+
+ #. Ultravisor relies on the Hypervisor to provide several services to
+ the SVM such as processing hypercall and other exceptions. After
+ processing the exception, Hypervisor uses UV_RETURN to return
+ control back to the Ultravisor.
+
+ #. Hypervisor has to use this ultracall to return control to the SVM.
+
+
+UV_REGISTER_MEM_SLOT
+--------------------
+
+ Register an SVM address-range with specified properties.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_REGISTER_MEM_SLOT,
+ uint64_t lpid, /* LPAR ID of the SVM */
+ uint64_t start_gpa, /* start guest physical address */
+ uint64_t size, /* size of address range in bytes */
+ uint64_t flags /* reserved for future expansion */
+ uint16_t slotid) /* slot identifier */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``start_gpa`` is invalid.
+ * U_P3 if ``size`` is invalid.
+ * U_P4 if any bit in the ``flags`` is unrecognized.
+ * U_P5 if the ``slotid`` parameter is unsupported.
+ * U_PERMISSION if called from context other than Hypervisor.
+ * U_FUNCTION if functionality is not supported.
+
+
+Description
+~~~~~~~~~~~
+
+ Register a memory range for an SVM. The memory range starts at the
+ guest physical address ``start_gpa`` and is ``size`` bytes long.
+
+Use cases
+~~~~~~~~~
+
+
+ #. When a virtual machine goes secure, all the memory slots managed by
+ the Hypervisor move into secure memory. The Hypervisor iterates
+ through each of memory slots, and registers the slot with
+ Ultravisor. Hypervisor may discard some slots such as those used
+ for firmware (SLOF).
+
+ #. When new memory is hot-plugged, a new memory slot gets registered.
+
+
+UV_UNREGISTER_MEM_SLOT
+----------------------
+
+ Unregister an SVM address-range that was previously registered using
+ UV_REGISTER_MEM_SLOT.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_UNREGISTER_MEM_SLOT,
+ uint64_t lpid, /* LPAR ID of the SVM */
+ uint64_t slotid) /* reservation slotid */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_FUNCTION if functionality is not supported.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_P2 if ``slotid`` is invalid.
+ * U_PERMISSION if called from context other than Hypervisor.
+
+Description
+~~~~~~~~~~~
+
+ Release the memory slot identified by ``slotid`` and free any
+ resources allocated towards the reservation.
+
+Use cases
+~~~~~~~~~
+
+ #. Memory hot-remove.
+
+
+UV_SVM_TERMINATE
+----------------
+
+ Terminate an SVM and release its resources.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_SVM_TERMINATE,
+ uint64_t lpid, /* LPAR ID of the SVM */)
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_FUNCTION if functionality is not supported.
+ * U_PARAMETER if ``lpid`` is invalid.
+ * U_INVALID if VM is not secure.
+ * U_PERMISSION if not called from a Hypervisor context.
+
+Description
+~~~~~~~~~~~
+
+ Terminate an SVM and release all its resources.
+
+Use cases
+~~~~~~~~~
+
+ #. Called by Hypervisor when terminating an SVM.
+
+
+Ultracalls used by SVM
+======================
+
+UV_SHARE_PAGE
+-------------
+
+ Share a set of guest physical pages with the Hypervisor.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_SHARE_PAGE,
+ uint64_t gfn, /* guest page frame number */
+ uint64_t num) /* number of pages of size PAGE_SIZE */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_FUNCTION if functionality is not supported.
+ * U_INVALID if the VM is not secure.
+ * U_PARAMETER if ``gfn`` is invalid.
+ * U_P2 if ``num`` is invalid.
+
+Description
+~~~~~~~~~~~
+
+ Share the ``num`` pages starting at guest physical frame number ``gfn``
+ with the Hypervisor. Assume page size is PAGE_SIZE bytes. Zero the
+ pages before returning.
+
+ If the address is already backed by a secure page, unmap the page and
+ back it with an insecure page, with the help of the Hypervisor. If it
+ is not backed by any page yet, mark the PTE as insecure and back it
+ with an insecure page when the address is accessed. If it is already
+ backed by an insecure page, zero the page and return.
+
+Use cases
+~~~~~~~~~
+
+ #. The Hypervisor cannot access the SVM pages since they are backed by
+ secure pages. Hence an SVM must explicitly request Ultravisor for
+ pages it can share with Hypervisor.
+
+ #. Shared pages are needed to support virtio and Virtual Processor Area
+ (VPA) in SVMs.
+
+
+UV_UNSHARE_PAGE
+---------------
+
+ Restore a shared SVM page to its initial state.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_UNSHARE_PAGE,
+ uint64_t gfn, /* guest page frame number */
+ uint73 num) /* number of pages of size PAGE_SIZE*/
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_FUNCTION if functionality is not supported.
+ * U_INVALID if VM is not secure.
+ * U_PARAMETER if ``gfn`` is invalid.
+ * U_P2 if ``num`` is invalid.
+
+Description
+~~~~~~~~~~~
+
+ Stop sharing ``num`` pages starting at ``gfn`` with the Hypervisor.
+ Assume that the page size is PAGE_SIZE. Zero the pages before
+ returning.
+
+ If the address is already backed by an insecure page, unmap the page
+ and back it with a secure page. Inform the Hypervisor to release
+ reference to its shared page. If the address is not backed by a page
+ yet, mark the PTE as secure and back it with a secure page when that
+ address is accessed. If it is already backed by an secure page zero
+ the page and return.
+
+Use cases
+~~~~~~~~~
+
+ #. The SVM may decide to unshare a page from the Hypervisor.
+
+
+UV_UNSHARE_ALL_PAGES
+--------------------
+
+ Unshare all pages the SVM has shared with Hypervisor.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_UNSHARE_ALL_PAGES)
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success.
+ * U_FUNCTION if functionality is not supported.
+ * U_INVAL if VM is not secure.
+
+Description
+~~~~~~~~~~~
+
+ Unshare all shared pages from the Hypervisor. All unshared pages are
+ zeroed on return. Only pages explicitly shared by the SVM with the
+ Hypervisor (using UV_SHARE_PAGE ultracall) are unshared. Ultravisor
+ may internally share some pages with the Hypervisor without explicit
+ request from the SVM. These pages will not be unshared by this
+ ultracall.
+
+Use cases
+~~~~~~~~~
+
+ #. This call is needed when ``kexec`` is used to boot a different
+ kernel. It may also be needed during SVM reset.
+
+UV_ESM
+------
+
+ Secure the virtual machine (*enter secure mode*).
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t ultracall(const uint64_t UV_ESM,
+ uint64_t esm_blob_addr, /* location of the ESM blob */
+ unint64_t fdt) /* Flattened device tree */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * U_SUCCESS on success (including if VM is already secure).
+ * U_FUNCTION if functionality is not supported.
+ * U_INVALID if VM is not secure.
+ * U_PARAMETER if ``esm_blob_addr`` is invalid.
+ * U_P2 if ``fdt`` is invalid.
+ * U_PERMISSION if any integrity checks fail.
+ * U_RETRY insufficient memory to create SVM.
+ * U_NO_KEY symmetric key unavailable.
+
+Description
+~~~~~~~~~~~
+
+ Secure the virtual machine. On successful completion, return
+ control to the virtual machine at the address specified in the
+ ESM blob.
+
+Use cases
+~~~~~~~~~
+
+ #. A normal virtual machine can choose to switch to a secure mode.
+
+Hypervisor Calls API
+####################
+
+ This document describes the Hypervisor calls (hypercalls) that are
+ needed to support the Ultravisor. Hypercalls are services provided by
+ the Hypervisor to virtual machines and Ultravisor.
+
+ Register usage for these hypercalls is identical to that of the other
+ hypercalls defined in the Power Architecture Platform Reference (PAPR)
+ document. i.e on input, register R3 identifies the specific service
+ that is being requested and registers R4 through R11 contain
+ additional parameters to the hypercall, if any. On output, register
+ R3 contains the return value and registers R4 through R9 contain any
+ other output values from the hypercall.
+
+ This document only covers hypercalls currently implemented/planned
+ for Ultravisor usage but others can be added here when it makes sense.
+
+ The full specification for all hypercalls/ultracalls will eventually
+ be made available in the public/OpenPower version of the PAPR
+ specification.
+
+Hypervisor calls to support Ultravisor
+======================================
+
+ Following are the set of hypercalls needed to support Ultravisor.
+
+H_SVM_INIT_START
+----------------
+
+ Begin the process of converting a normal virtual machine into an SVM.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t hypercall(const uint64_t H_SVM_INIT_START)
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * H_SUCCESS on success.
+ * H_STATE if the VM is not in a position to switch to secure.
+
+Description
+~~~~~~~~~~~
+
+ Initiate the process of securing a virtual machine. This involves
+ coordinating with the Ultravisor, using ultracalls, to allocate
+ resources in the Ultravisor for the new SVM, transferring the VM's
+ pages from normal to secure memory etc. When the process is
+ completed, Ultravisor issues the H_SVM_INIT_DONE hypercall.
+
+Use cases
+~~~~~~~~~
+
+ #. Ultravisor uses this hypercall to inform Hypervisor that a VM
+ has initiated the process of switching to secure mode.
+
+
+H_SVM_INIT_DONE
+---------------
+
+ Complete the process of securing an SVM.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t hypercall(const uint64_t H_SVM_INIT_DONE)
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * H_SUCCESS on success.
+ * H_UNSUPPORTED if called from the wrong context (e.g.
+ from an SVM or before an H_SVM_INIT_START
+ hypercall).
+ * H_STATE if the hypervisor could not successfully
+ transition the VM to Secure VM.
+
+Description
+~~~~~~~~~~~
+
+ Complete the process of securing a virtual machine. This call must
+ be made after a prior call to ``H_SVM_INIT_START`` hypercall.
+
+Use cases
+~~~~~~~~~
+
+ On successfully securing a virtual machine, the Ultravisor informs
+ Hypervisor about it. Hypervisor can use this call to finish setting
+ up its internal state for this virtual machine.
+
+
+H_SVM_INIT_ABORT
+----------------
+
+ Abort the process of securing an SVM.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t hypercall(const uint64_t H_SVM_INIT_ABORT)
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * H_PARAMETER on successfully cleaning up the state,
+ Hypervisor will return this value to the
+ **guest**, to indicate that the underlying
+ UV_ESM ultracall failed.
+
+ * H_STATE if called after a VM has gone secure (i.e
+ H_SVM_INIT_DONE hypercall was successful).
+
+ * H_UNSUPPORTED if called from a wrong context (e.g. from a
+ normal VM).
+
+Description
+~~~~~~~~~~~
+
+ Abort the process of securing a virtual machine. This call must
+ be made after a prior call to ``H_SVM_INIT_START`` hypercall and
+ before a call to ``H_SVM_INIT_DONE``.
+
+ On entry into this hypercall the non-volatile GPRs and FPRs are
+ expected to contain the values they had at the time the VM issued
+ the UV_ESM ultracall. Further ``SRR0`` is expected to contain the
+ address of the instruction after the ``UV_ESM`` ultracall and ``SRR1``
+ the MSR value with which to return to the VM.
+
+ This hypercall will cleanup any partial state that was established for
+ the VM since the prior ``H_SVM_INIT_START`` hypercall, including paging
+ out pages that were paged-into secure memory, and issue the
+ ``UV_SVM_TERMINATE`` ultracall to terminate the VM.
+
+ After the partial state is cleaned up, control returns to the VM
+ (**not Ultravisor**), at the address specified in ``SRR0`` with the
+ MSR values set to the value in ``SRR1``.
+
+Use cases
+~~~~~~~~~
+
+ If after a successful call to ``H_SVM_INIT_START``, the Ultravisor
+ encounters an error while securing a virtual machine, either due
+ to lack of resources or because the VM's security information could
+ not be validated, Ultravisor informs the Hypervisor about it.
+ Hypervisor should use this call to clean up any internal state for
+ this virtual machine and return to the VM.
+
+H_SVM_PAGE_IN
+-------------
+
+ Move the contents of a page from normal memory to secure memory.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t hypercall(const uint64_t H_SVM_PAGE_IN,
+ uint64_t guest_pa, /* guest-physical-address */
+ uint64_t flags, /* flags */
+ uint64_t order) /* page size order */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * H_SUCCESS on success.
+ * H_PARAMETER if ``guest_pa`` is invalid.
+ * H_P2 if ``flags`` is invalid.
+ * H_P3 if ``order`` of page is invalid.
+
+Description
+~~~~~~~~~~~
+
+ Retrieve the content of the page, belonging to the VM at the specified
+ guest physical address.
+
+ Only valid value(s) in ``flags`` are:
+
+ * H_PAGE_IN_SHARED which indicates that the page is to be shared
+ with the Ultravisor.
+
+ * H_PAGE_IN_NONSHARED indicates that the UV is not anymore
+ interested in the page. Applicable if the page is a shared page.
+
+ The ``order`` parameter must correspond to the configured page size.
+
+Use cases
+~~~~~~~~~
+
+ #. When a normal VM becomes a secure VM (using the UV_ESM ultracall),
+ the Ultravisor uses this hypercall to move contents of each page of
+ the VM from normal memory to secure memory.
+
+ #. Ultravisor uses this hypercall to ask Hypervisor to provide a page
+ in normal memory that can be shared between the SVM and Hypervisor.
+
+ #. Ultravisor uses this hypercall to page-in a paged-out page. This
+ can happen when the SVM touches a paged-out page.
+
+ #. If SVM wants to disable sharing of pages with Hypervisor, it can
+ inform Ultravisor to do so. Ultravisor will then use this hypercall
+ and inform Hypervisor that it has released access to the normal
+ page.
+
+H_SVM_PAGE_OUT
+---------------
+
+ Move the contents of the page to normal memory.
+
+Syntax
+~~~~~~
+
+.. code-block:: c
+
+ uint64_t hypercall(const uint64_t H_SVM_PAGE_OUT,
+ uint64_t guest_pa, /* guest-physical-address */
+ uint64_t flags, /* flags (currently none) */
+ uint64_t order) /* page size order */
+
+Return values
+~~~~~~~~~~~~~
+
+ One of the following values:
+
+ * H_SUCCESS on success.
+ * H_PARAMETER if ``guest_pa`` is invalid.
+ * H_P2 if ``flags`` is invalid.
+ * H_P3 if ``order`` is invalid.
+
+Description
+~~~~~~~~~~~
+
+ Move the contents of the page identified by ``guest_pa`` to normal
+ memory.
+
+ Currently ``flags`` is unused and must be set to 0. The ``order``
+ parameter must correspond to the configured page size.
+
+Use cases
+~~~~~~~~~
+
+ #. If Ultravisor is running low on secure pages, it can move the
+ contents of some secure pages, into normal pages using this
+ hypercall. The content will be encrypted.
+
+References
+##########
+
+- `Supporting Protected Computing on IBM Power Architecture <https://developer.ibm.com/articles/l-support-protected-computing/>`_
diff --git a/Documentation/arch/powerpc/vas-api.rst b/Documentation/arch/powerpc/vas-api.rst
new file mode 100644
index 000000000000..a9625a2fa0c6
--- /dev/null
+++ b/Documentation/arch/powerpc/vas-api.rst
@@ -0,0 +1,305 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _VAS-API:
+
+===================================================
+Virtual Accelerator Switchboard (VAS) userspace API
+===================================================
+
+Introduction
+============
+
+Power9 processor introduced Virtual Accelerator Switchboard (VAS) which
+allows both userspace and kernel communicate to co-processor
+(hardware accelerator) referred to as the Nest Accelerator (NX). The NX
+unit comprises of one or more hardware engines or co-processor types
+such as 842 compression, GZIP compression and encryption. On power9,
+userspace applications will have access to only GZIP Compression engine
+which supports ZLIB and GZIP compression algorithms in the hardware.
+
+To communicate with NX, kernel has to establish a channel or window and
+then requests can be submitted directly without kernel involvement.
+Requests to the GZIP engine must be formatted as a co-processor Request
+Block (CRB) and these CRBs must be submitted to the NX using COPY/PASTE
+instructions to paste the CRB to hardware address that is associated with
+the engine's request queue.
+
+The GZIP engine provides two priority levels of requests: Normal and
+High. Only Normal requests are supported from userspace right now.
+
+This document explains userspace API that is used to interact with
+kernel to setup channel / window which can be used to send compression
+requests directly to NX accelerator.
+
+
+Overview
+========
+
+Application access to the GZIP engine is provided through
+/dev/crypto/nx-gzip device node implemented by the VAS/NX device driver.
+An application must open the /dev/crypto/nx-gzip device to obtain a file
+descriptor (fd). Then should issue VAS_TX_WIN_OPEN ioctl with this fd to
+establish connection to the engine. It means send window is opened on GZIP
+engine for this process. Once a connection is established, the application
+should use the mmap() system call to map the hardware address of engine's
+request queue into the application's virtual address space.
+
+The application can then submit one or more requests to the engine by
+using copy/paste instructions and pasting the CRBs to the virtual address
+(aka paste_address) returned by mmap(). User space can close the
+established connection or send window by closing the file descriptor
+(close(fd)) or upon the process exit.
+
+Note that applications can send several requests with the same window or
+can establish multiple windows, but one window for each file descriptor.
+
+Following sections provide additional details and references about the
+individual steps.
+
+NX-GZIP Device Node
+===================
+
+There is one /dev/crypto/nx-gzip node in the system and it provides
+access to all GZIP engines in the system. The only valid operations on
+/dev/crypto/nx-gzip are:
+
+ * open() the device for read and write.
+ * issue VAS_TX_WIN_OPEN ioctl
+ * mmap() the engine's request queue into application's virtual
+ address space (i.e. get a paste_address for the co-processor
+ engine).
+ * close the device node.
+
+Other file operations on this device node are undefined.
+
+Note that the copy and paste operations go directly to the hardware and
+do not go through this device. Refer COPY/PASTE document for more
+details.
+
+Although a system may have several instances of the NX co-processor
+engines (typically, one per P9 chip) there is just one
+/dev/crypto/nx-gzip device node in the system. When the nx-gzip device
+node is opened, Kernel opens send window on a suitable instance of NX
+accelerator. It finds CPU on which the user process is executing and
+determine the NX instance for the corresponding chip on which this CPU
+belongs.
+
+Applications may chose a specific instance of the NX co-processor using
+the vas_id field in the VAS_TX_WIN_OPEN ioctl as detailed below.
+
+A userspace library libnxz is available here but still in development:
+
+ https://github.com/abalib/power-gzip
+
+Applications that use inflate / deflate calls can link with libnxz
+instead of libz and use NX GZIP compression without any modification.
+
+Open /dev/crypto/nx-gzip
+========================
+
+The nx-gzip device should be opened for read and write. No special
+privileges are needed to open the device. Each window corresponds to one
+file descriptor. So if the userspace process needs multiple windows,
+several open calls have to be issued.
+
+See open(2) system call man pages for other details such as return values,
+error codes and restrictions.
+
+VAS_TX_WIN_OPEN ioctl
+=====================
+
+Applications should use the VAS_TX_WIN_OPEN ioctl as follows to establish
+a connection with NX co-processor engine:
+
+ ::
+
+ struct vas_tx_win_open_attr {
+ __u32 version;
+ __s16 vas_id; /* specific instance of vas or -1
+ for default */
+ __u16 reserved1;
+ __u64 flags; /* For future use */
+ __u64 reserved2[6];
+ };
+
+ version:
+ The version field must be currently set to 1.
+ vas_id:
+ If '-1' is passed, kernel will make a best-effort attempt
+ to assign an optimal instance of NX for the process. To
+ select the specific VAS instance, refer
+ "Discovery of available VAS engines" section below.
+
+ flags, reserved1 and reserved2[6] fields are for future extension
+ and must be set to 0.
+
+ The attributes attr for the VAS_TX_WIN_OPEN ioctl are defined as
+ follows::
+
+ #define VAS_MAGIC 'v'
+ #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 1,
+ struct vas_tx_win_open_attr)
+
+ struct vas_tx_win_open_attr attr;
+ rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+
+ The VAS_TX_WIN_OPEN ioctl returns 0 on success. On errors, it
+ returns -1 and sets the errno variable to indicate the error.
+
+ Error conditions:
+
+ ====== ================================================
+ EINVAL fd does not refer to a valid VAS device.
+ EINVAL Invalid vas ID
+ EINVAL version is not set with proper value
+ EEXIST Window is already opened for the given fd
+ ENOMEM Memory is not available to allocate window
+ ENOSPC System has too many active windows (connections)
+ opened
+ EINVAL reserved fields are not set to 0.
+ ====== ================================================
+
+ See the ioctl(2) man page for more details, error codes and
+ restrictions.
+
+mmap() NX-GZIP device
+=====================
+
+The mmap() system call for a NX-GZIP device fd returns a paste_address
+that the application can use to copy/paste its CRB to the hardware engines.
+
+ ::
+
+ paste_addr = mmap(addr, size, prot, flags, fd, offset);
+
+ Only restrictions on mmap for a NX-GZIP device fd are:
+
+ * size should be PAGE_SIZE
+ * offset parameter should be 0ULL
+
+ Refer to mmap(2) man page for additional details/restrictions.
+ In addition to the error conditions listed on the mmap(2) man
+ page, can also fail with one of the following error codes:
+
+ ====== =============================================
+ EINVAL fd is not associated with an open window
+ (i.e mmap() does not follow a successful call
+ to the VAS_TX_WIN_OPEN ioctl).
+ EINVAL offset field is not 0ULL.
+ ====== =============================================
+
+Discovery of available VAS engines
+==================================
+
+Each available VAS instance in the system will have a device tree node
+like /proc/device-tree/vas@* or /proc/device-tree/xscom@*/vas@*.
+Determine the chip or VAS instance and use the corresponding ibm,vas-id
+property value in this node to select specific VAS instance.
+
+Copy/Paste operations
+=====================
+
+Applications should use the copy and paste instructions to send CRB to NX.
+Refer section 4.4 in PowerISA for Copy/Paste instructions:
+https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0
+
+CRB Specification and use NX
+============================
+
+Applications should format requests to the co-processor using the
+co-processor Request Block (CRBs). Refer NX-GZIP user's manual for the format
+of CRB and use NX from userspace such as sending requests and checking
+request status.
+
+NX Fault handling
+=================
+
+Applications send requests to NX and wait for the status by polling on
+co-processor Status Block (CSB) flags. NX updates status in CSB after each
+request is processed. Refer NX-GZIP user's manual for the format of CSB and
+status flags.
+
+In case if NX encounters translation error (called NX page fault) on CSB
+address or any request buffer, raises an interrupt on the CPU to handle the
+fault. Page fault can happen if an application passes invalid addresses or
+request buffers are not in memory. The operating system handles the fault by
+updating CSB with the following data::
+
+ csb.flags = CSB_V;
+ csb.cc = CSB_CC_FAULT_ADDRESS;
+ csb.ce = CSB_CE_TERMINATION;
+ csb.address = fault_address;
+
+When an application receives translation error, it can touch or access
+the page that has a fault address so that this page will be in memory. Then
+the application can resend this request to NX.
+
+If the OS can not update CSB due to invalid CSB address, sends SEGV signal
+to the process who opened the send window on which the original request was
+issued. This signal returns with the following siginfo struct::
+
+ siginfo.si_signo = SIGSEGV;
+ siginfo.si_errno = EFAULT;
+ siginfo.si_code = SEGV_MAPERR;
+ siginfo.si_addr = CSB address;
+
+In the case of multi-thread applications, NX send windows can be shared
+across all threads. For example, a child thread can open a send window,
+but other threads can send requests to NX using this window. These
+requests will be successful even in the case of OS handling faults as long
+as CSB address is valid. If the NX request contains an invalid CSB address,
+the signal will be sent to the child thread that opened the window. But if
+the thread is exited without closing the window and the request is issued
+using this window. the signal will be issued to the thread group leader
+(tgid). It is up to the application whether to ignore or handle these
+signals.
+
+NX-GZIP User's Manual:
+https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf
+
+Simple example
+==============
+
+ ::
+
+ int use_nx_gzip()
+ {
+ int rc, fd;
+ void *addr;
+ struct vas_setup_attr txattr;
+
+ fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open nx-gzip failed\n");
+ return -1;
+ }
+ memset(&txattr, 0, sizeof(txattr));
+ txattr.version = 1;
+ txattr.vas_id = -1
+ rc = ioctl(fd, VAS_TX_WIN_OPEN,
+ (unsigned long)&txattr);
+ if (rc < 0) {
+ fprintf(stderr, "ioctl() n %d, error %d\n",
+ rc, errno);
+ return rc;
+ }
+ addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE,
+ MAP_SHARED, fd, 0ULL);
+ if (addr == MAP_FAILED) {
+ fprintf(stderr, "mmap() failed, errno %d\n",
+ errno);
+ return -errno;
+ }
+ do {
+ //Format CRB request with compression or
+ //uncompression
+ // Refer tests for vas_copy/vas_paste
+ vas_copy((&crb, 0, 1);
+ vas_paste(addr, 0, 1);
+ // Poll on csb.flags with timeout
+ // csb address is listed in CRB
+ } while (true)
+ close(fd) or window can be closed upon process exit
+ }
+
+ Refer https://github.com/libnxz/power-gzip for tests or more
+ use cases.
diff --git a/Documentation/arch/powerpc/vcpudispatch_stats.rst b/Documentation/arch/powerpc/vcpudispatch_stats.rst
new file mode 100644
index 000000000000..5704657a5987
--- /dev/null
+++ b/Documentation/arch/powerpc/vcpudispatch_stats.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+VCPU Dispatch Statistics
+========================
+
+For Shared Processor LPARs, the POWER Hypervisor maintains a relatively
+static mapping of the LPAR processors (vcpus) to physical processor
+chips (representing the "home" node) and tries to always dispatch vcpus
+on their associated physical processor chip. However, under certain
+scenarios, vcpus may be dispatched on a different processor chip (away
+from its home node).
+
+/proc/powerpc/vcpudispatch_stats can be used to obtain statistics
+related to the vcpu dispatch behavior. Writing '1' to this file enables
+collecting the statistics, while writing '0' disables the statistics.
+By default, the DTLB log for each vcpu is processed 50 times a second so
+as not to miss any entries. This processing frequency can be changed
+through /proc/powerpc/vcpudispatch_stats_freq.
+
+The statistics themselves are available by reading the procfs file
+/proc/powerpc/vcpudispatch_stats. Each line in the output corresponds to
+a vcpu as represented by the first field, followed by 8 numbers.
+
+The first number corresponds to:
+
+1. total vcpu dispatches since the beginning of statistics collection
+
+The next 4 numbers represent vcpu dispatch dispersions:
+
+2. number of times this vcpu was dispatched on the same processor as last
+ time
+3. number of times this vcpu was dispatched on a different processor core
+ as last time, but within the same chip
+4. number of times this vcpu was dispatched on a different chip
+5. number of times this vcpu was dispatches on a different socket/drawer
+ (next numa boundary)
+
+The final 3 numbers represent statistics in relation to the home node of
+the vcpu:
+
+6. number of times this vcpu was dispatched in its home node (chip)
+7. number of times this vcpu was dispatched in a different node
+8. number of times this vcpu was dispatched in a node further away (numa
+ distance)
+
+An example output::
+
+ $ sudo cat /proc/powerpc/vcpudispatch_stats
+ cpu0 6839 4126 2683 30 0 6821 18 0
+ cpu1 2515 1274 1229 12 0 2509 6 0
+ cpu2 2317 1198 1109 10 0 2312 5 0
+ cpu3 2259 1165 1088 6 0 2256 3 0
+ cpu4 2205 1143 1056 6 0 2202 3 0
+ cpu5 2165 1121 1038 6 0 2162 3 0
+ cpu6 2183 1127 1050 6 0 2180 3 0
+ cpu7 2193 1133 1052 8 0 2187 6 0
+ cpu8 2165 1115 1032 18 0 2156 9 0
+ cpu9 2301 1252 1033 16 0 2293 8 0
+ cpu10 2197 1138 1041 18 0 2187 10 0
+ cpu11 2273 1185 1062 26 0 2260 13 0
+ cpu12 2186 1125 1043 18 0 2177 9 0
+ cpu13 2161 1115 1030 16 0 2153 8 0
+ cpu14 2206 1153 1033 20 0 2196 10 0
+ cpu15 2163 1115 1032 16 0 2155 8 0
+
+In the output above, for vcpu0, there have been 6839 dispatches since
+statistics were enabled. 4126 of those dispatches were on the same
+physical cpu as the last time. 2683 were on a different core, but within
+the same chip, while 30 dispatches were on a different chip compared to
+its last dispatch.
+
+Also, out of the total of 6839 dispatches, we see that there have been
+6821 dispatches on the vcpu's home node, while 18 dispatches were
+outside its home node, on a neighbouring chip.
diff --git a/Documentation/arch/powerpc/vmemmap_dedup.rst b/Documentation/arch/powerpc/vmemmap_dedup.rst
new file mode 100644
index 000000000000..dc4db59fdf87
--- /dev/null
+++ b/Documentation/arch/powerpc/vmemmap_dedup.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Device DAX
+==========
+
+The device-dax interface uses the tail deduplication technique explained in
+Documentation/mm/vmemmap_dedup.rst
+
+On powerpc, vmemmap deduplication is only used with radix MMU translation. Also
+with a 64K page size, only the devdax namespace with 1G alignment uses vmemmap
+deduplication.
+
+With 2M PMD level mapping, we require 32 struct pages and a single 64K vmemmap
+page can contain 1024 struct pages (64K/sizeof(struct page)). Hence there is no
+vmemmap deduplication possible.
+
+With 1G PUD level mapping, we require 16384 struct pages and a single 64K
+vmemmap page can contain 1024 struct pages (64K/sizeof(struct page)). Hence we
+require 16 64K pages in vmemmap to map the struct page for 1G PUD level mapping.
+
+Here's how things look like on device-dax after the sections are populated::
+ +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
+ | | | 0 | -------------> | 0 |
+ | | +-----------+ +-----------+
+ | | | 1 | -------------> | 1 |
+ | | +-----------+ +-----------+
+ | | | 2 | ----------------^ ^ ^ ^ ^ ^
+ | | +-----------+ | | | | |
+ | | | 3 | ------------------+ | | | |
+ | | +-----------+ | | | |
+ | | | 4 | --------------------+ | | |
+ | PUD | +-----------+ | | |
+ | level | | . | ----------------------+ | |
+ | mapping | +-----------+ | |
+ | | | . | ------------------------+ |
+ | | +-----------+ |
+ | | | 15 | --------------------------+
+ | | +-----------+
+ | |
+ | |
+ | |
+ +-----------+
+
+
+With 4K page size, 2M PMD level mapping requires 512 struct pages and a single
+4K vmemmap page contains 64 struct pages(4K/sizeof(struct page)). Hence we
+require 8 4K pages in vmemmap to map the struct page for 2M pmd level mapping.
+
+Here's how things look like on device-dax after the sections are populated::
+
+ +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
+ | | | 0 | -------------> | 0 |
+ | | +-----------+ +-----------+
+ | | | 1 | -------------> | 1 |
+ | | +-----------+ +-----------+
+ | | | 2 | ----------------^ ^ ^ ^ ^ ^
+ | | +-----------+ | | | | |
+ | | | 3 | ------------------+ | | | |
+ | | +-----------+ | | | |
+ | | | 4 | --------------------+ | | |
+ | PMD | +-----------+ | | |
+ | level | | 5 | ----------------------+ | |
+ | mapping | +-----------+ | |
+ | | | 6 | ------------------------+ |
+ | | +-----------+ |
+ | | | 7 | --------------------------+
+ | | +-----------+
+ | |
+ | |
+ | |
+ +-----------+
+
+With 1G PUD level mapping, we require 262144 struct pages and a single 4K
+vmemmap page can contain 64 struct pages (4K/sizeof(struct page)). Hence we
+require 4096 4K pages in vmemmap to map the struct pages for 1G PUD level
+mapping.
+
+Here's how things look like on device-dax after the sections are populated::
+
+ +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
+ | | | 0 | -------------> | 0 |
+ | | +-----------+ +-----------+
+ | | | 1 | -------------> | 1 |
+ | | +-----------+ +-----------+
+ | | | 2 | ----------------^ ^ ^ ^ ^ ^
+ | | +-----------+ | | | | |
+ | | | 3 | ------------------+ | | | |
+ | | +-----------+ | | | |
+ | | | 4 | --------------------+ | | |
+ | PUD | +-----------+ | | |
+ | level | | . | ----------------------+ | |
+ | mapping | +-----------+ | |
+ | | | . | ------------------------+ |
+ | | +-----------+ |
+ | | | 4095 | --------------------------+
+ | | +-----------+
+ | |
+ | |
+ | |
+ +-----------+
diff --git a/Documentation/arch/powerpc/vpa-dtl.rst b/Documentation/arch/powerpc/vpa-dtl.rst
new file mode 100644
index 000000000000..58d0022f993a
--- /dev/null
+++ b/Documentation/arch/powerpc/vpa-dtl.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _vpa-dtl:
+
+===================================
+DTL (Dispatch Trace Log)
+===================================
+
+Athira Rajeev, 19 April 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+The pseries Shared Processor Logical Partition(SPLPAR) machines can
+retrieve a log of dispatch and preempt events from the hypervisor
+using data from Disptach Trace Log(DTL) buffer. With this information,
+user can retrieve when and why each dispatch & preempt has occurred.
+The vpa-dtl PMU exposes the Virtual Processor Area(VPA) DTL counters
+via perf.
+
+Infrastructure used
+===================
+
+The VPA DTL PMU counters do not interrupt on overflow or generate any
+PMI interrupts. Therefore, hrtimer is used to poll the DTL data. The timer
+nterval can be provided by user via sample_period field in nano seconds.
+vpa dtl pmu has one hrtimer added per vpa-dtl pmu thread. DTL (Dispatch
+Trace Log) contains information about dispatch/preempt, enqueue time etc.
+We directly copy the DTL buffer data as part of auxiliary buffer and it
+will be processed later. This will avoid time taken to create samples
+in the kernel space. The PMU driver collecting Dispatch Trace Log (DTL)
+entries makes use of AUX support in perf infrastructure. On the tools side,
+this data is made available as PERF_RECORD_AUXTRACE records.
+
+To correlate each DTL entry with other events across CPU's, an auxtrace_queue
+is created for each CPU. Each auxtrace queue has a array/list of auxtrace buffers.
+All auxtrace queues is maintained in auxtrace heap. The queues are sorted
+based on timestamp. When the different PERF_RECORD_XX records are processed,
+compare the timestamp of perf record with timestamp of top element in the
+auxtrace heap so that DTL events can be co-related with other events
+Process the auxtrace queue if the timestamp of element from heap is
+lower than timestamp from entry in perf record. Sometimes it could happen that
+one buffer is only partially processed. if the timestamp of occurrence of
+another event is more than currently processed element in the queue, it will
+move on to next perf record. So keep track of position of buffer to continue
+processing next time. Update the timestamp of the auxtrace heap with the timestamp
+of last processed entry from the auxtrace buffer.
+
+This infrastructure ensures dispatch trace log entries can be correlated
+and presented along with other events like sched.
+
+vpa-dtl PMU example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/devices/vpa_dtl/
+ events format perf_event_mux_interval_ms power subsystem type uevent
+
+
+To capture the DTL data using perf record:
+.. code-block:: sh
+
+ # ./perf record -a -e sched:\*,vpa_dtl/dtl_all/ -c 1000000000 sleep 1
+
+The result can be interpreted using perf record. Snippet of perf report -D
+
+.. code-block:: sh
+
+ # ./perf report -D
+
+There are different PERF_RECORD_XX records. In that records corresponding to
+auxtrace buffers includes:
+
+1. PERF_RECORD_AUX
+ Conveys that new data is available in AUX area
+
+2. PERF_RECORD_AUXTRACE_INFO
+ Describes offset and size of auxtrace data in the buffers
+
+3. PERF_RECORD_AUXTRACE
+ This is the record that defines the auxtrace data which here in case of
+ vpa-dtl pmu is dispatch trace log data.
+
+Snippet from perf report -D showing the PERF_RECORD_AUXTRACE dump
+
+.. code-block:: sh
+
+0 0 0x39b10 [0x30]: PERF_RECORD_AUXTRACE size: 0x690 offset: 0 ref: 0 idx: 0 tid: -1 cpu: 0
+.
+. ... VPA DTL PMU data: size 1680 bytes, entries is 35
+. 00000000: boot_tb: 21349649546353231, tb_freq: 512000000
+. 00000030: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:7064, ready_to_enqueue_time:187, waiting_to_ready_time:6611773
+. 00000060: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:146, ready_to_enqueue_time:0, waiting_to_ready_time:15359437
+. 00000090: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:4868, ready_to_enqueue_time:232, waiting_to_ready_time:5100709
+. 000000c0: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:179, ready_to_enqueue_time:0, waiting_to_ready_time:30714243
+. 000000f0: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:197, ready_to_enqueue_time:0, waiting_to_ready_time:15350648
+. 00000120: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:213, ready_to_enqueue_time:0, waiting_to_ready_time:15353446
+. 00000150: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:212, ready_to_enqueue_time:0, waiting_to_ready_time:15355126
+. 00000180: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:6368, ready_to_enqueue_time:164, waiting_to_ready_time:5104665
+
+Above is representation of dtl entry of below format:
+
+struct dtl_entry {
+ u8 dispatch_reason;
+ u8 preempt_reason;
+ u16 processor_id;
+ u32 enqueue_to_dispatch_time;
+ u32 ready_to_enqueue_time;
+ u32 waiting_to_ready_time;
+ u64 timebase;
+ u64 fault_addr;
+ u64 srr0;
+ u64 srr1;
+
+};
+
+First two fields represent the dispatch reason and preempt reason. The post
+processing of PERF_RECORD_AUXTRACE records will translate to meaningful data
+for user to consume.
+
+Visualize the dispatch trace log entries with perf report
+=========================================================
+
+.. code-block:: sh
+
+ # ./perf record -a -e sched:*,vpa_dtl/dtl_all/ -c 1000000000 sleep 1
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.300 MB perf.data ]
+
+ # ./perf report
+ # Samples: 321 of event 'vpa-dtl'
+ # Event count (approx.): 321
+ #
+ # Children Self Command Shared Object Symbol
+ # ........ ........ ....... ................. ..............................
+ #
+ 100.00% 100.00% swapper [kernel.kallsyms] [k] plpar_hcall_norets_notrace
+
+Visualize the dispatch trace log entries with perf script
+=========================================================
+
+.. code-block:: sh
+
+ # ./perf script
+ migration/9 67 [009] 105373.359903: sched:sched_waking: comm=perf pid=13418 prio=120 target_cpu=009
+ migration/9 67 [009] 105373.359904: sched:sched_migrate_task: comm=perf pid=13418 prio=120 orig_cpu=9 dest_cpu=10
+ migration/9 67 [009] 105373.359907: sched:sched_stat_runtime: comm=migration/9 pid=67 runtime=4050 [ns]
+ migration/9 67 [009] 105373.359908: sched:sched_switch: prev_comm=migration/9 prev_pid=67 prev_prio=0 prev_state=S ==> next_comm=swapper/9 next_pid=0 next_prio=120
+ :256 256 [016] 105373.359913: vpa-dtl: timebase: 21403600706628832 dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:4854, ready_to_enqueue_time:139, waiting_to_ready_time:511842115 c0000000000fcd28 plpar_hcall_norets_notrace+0x18 ([kernel.kallsyms])
+ :256 256 [017] 105373.360012: vpa-dtl: timebase: 21403600706679454 dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:236, ready_to_enqueue_time:0, waiting_to_ready_time:133864583 c0000000000fcd28 plpar_hcall_norets_notrace+0x18 ([kernel.kallsyms])
+ perf 13418 [010] 105373.360048: sched:sched_stat_runtime: comm=perf pid=13418 runtime=139748 [ns]
+ perf 13418 [010] 105373.360052: sched:sched_waking: comm=migration/10 pid=72 prio=0 target_cpu=010
diff --git a/Documentation/arch/riscv/acpi.rst b/Documentation/arch/riscv/acpi.rst
new file mode 100644
index 000000000000..9870a282815b
--- /dev/null
+++ b/Documentation/arch/riscv/acpi.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+ACPI on RISC-V
+==============
+
+The ISA string parsing rules for ACPI are defined by `Version ASCIIDOC
+Conversion, 12/2022 of the RISC-V specifications, as defined by tag
+"riscv-isa-release-1239329-2023-05-23" (commit 1239329
+) <https://github.com/riscv/riscv-isa-manual/releases/tag/riscv-isa-release-1239329-2023-05-23>`_
diff --git a/Documentation/arch/riscv/boot-image-header.rst b/Documentation/arch/riscv/boot-image-header.rst
new file mode 100644
index 000000000000..df2ffc173e80
--- /dev/null
+++ b/Documentation/arch/riscv/boot-image-header.rst
@@ -0,0 +1,59 @@
+=================================
+Boot image header in RISC-V Linux
+=================================
+
+:Author: Atish Patra <atish.patra@wdc.com>
+:Date: 20 May 2019
+
+This document only describes the boot image header details for RISC-V Linux.
+
+The following 64-byte header is present in decompressed Linux kernel image::
+
+ u32 code0; /* Executable code */
+ u32 code1; /* Executable code */
+ u64 text_offset; /* Image load offset, little endian */
+ u64 image_size; /* Effective Image size, little endian */
+ u64 flags; /* kernel flags, little endian */
+ u32 version; /* Version of this header */
+ u32 res1 = 0; /* Reserved */
+ u64 res2 = 0; /* Reserved */
+ u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */
+ u32 magic2 = 0x05435352; /* Magic number 2, little endian, "RSC\x05" */
+ u32 res3; /* Reserved for PE COFF offset */
+
+This header format is compliant with PE/COFF header and largely inspired from
+ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common
+header in future.
+
+Notes
+=====
+
+- This header is also reused to support EFI stub for RISC-V. EFI specification
+ needs PE/COFF image header in the beginning of the kernel image in order to
+ load it as an EFI application. In order to support EFI stub, code0 is replaced
+ with "MZ" magic string and res3(at offset 0x3c) points to the rest of the
+ PE/COFF header.
+
+- version field indicate header version number
+
+ ========== =============
+ Bits 0:15 Minor version
+ Bits 16:31 Major version
+ ========== =============
+
+ This preserves compatibility across newer and older version of the header.
+ The current version is defined as 0.2.
+
+- The "magic" field is deprecated as of version 0.2. In a future
+ release, it may be removed. This originally should have matched up
+ with the ARM64 header "magic" field, but unfortunately does not.
+ The "magic2" field replaces it, matching up with the ARM64 header.
+
+- In current header, the flags field has only one field.
+
+ ===== ====================================
+ Bit 0 Kernel endianness. 1 if BE, 0 if LE.
+ ===== ====================================
+
+- Image size is mandatory for boot loader to load kernel image. Booting will
+ fail otherwise.
diff --git a/Documentation/arch/riscv/boot.rst b/Documentation/arch/riscv/boot.rst
new file mode 100644
index 000000000000..6077b587a842
--- /dev/null
+++ b/Documentation/arch/riscv/boot.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+RISC-V Kernel Boot Requirements and Constraints
+===============================================
+
+:Author: Alexandre Ghiti <alexghiti@rivosinc.com>
+:Date: 23 May 2023
+
+This document describes what the RISC-V kernel expects from bootloaders and
+firmware, and also the constraints that any developer must have in mind when
+touching the early boot process. For the purposes of this document, the
+``early boot process`` refers to any code that runs before the final virtual
+mapping is set up.
+
+Pre-kernel Requirements and Constraints
+=======================================
+
+The RISC-V kernel expects the following of bootloaders and platform firmware:
+
+Register state
+--------------
+
+The RISC-V kernel expects:
+
+ * ``$a0`` to contain the hartid of the current core.
+ * ``$a1`` to contain the address of the devicetree in memory.
+
+CSR state
+---------
+
+The RISC-V kernel expects:
+
+ * ``$satp = 0``: the MMU, if present, must be disabled.
+
+Reserved memory for resident firmware
+-------------------------------------
+
+The RISC-V kernel must not map any resident memory, or memory protected with
+PMPs, in the direct mapping, so the firmware must correctly mark those regions
+as per the devicetree specification and/or the UEFI specification.
+
+Kernel location
+---------------
+
+The RISC-V kernel expects to be placed at a PMD boundary (2MB aligned for rv64
+and 4MB aligned for rv32). Note that the EFI stub will physically relocate the
+kernel if that's not the case.
+
+Hardware description
+--------------------
+
+The firmware can pass either a devicetree or ACPI tables to the RISC-V kernel.
+
+The devicetree is either passed directly to the kernel from the previous stage
+using the ``$a1`` register, or when booting with UEFI, it can be passed using the
+EFI configuration table.
+
+The ACPI tables are passed to the kernel using the EFI configuration table. In
+this case, a tiny devicetree is still created by the EFI stub. Please refer to
+"EFI stub and devicetree" section below for details about this devicetree.
+
+Kernel entry
+------------
+
+On SMP systems, there are 2 methods to enter the kernel:
+
+- ``RISCV_BOOT_SPINWAIT``: the firmware releases all harts in the kernel, one hart
+ wins a lottery and executes the early boot code while the other harts are
+ parked waiting for the initialization to finish. This method is mostly used to
+ support older firmwares without SBI HSM extension and M-mode RISC-V kernel.
+- ``Ordered booting``: the firmware releases only one hart that will execute the
+ initialization phase and then will start all other harts using the SBI HSM
+ extension. The ordered booting method is the preferred booting method for
+ booting the RISC-V kernel because it can support CPU hotplug and kexec.
+
+UEFI
+----
+
+UEFI memory map
+~~~~~~~~~~~~~~~
+
+When booting with UEFI, the RISC-V kernel will use only the EFI memory map to
+populate the system memory.
+
+The UEFI firmware must parse the subnodes of the ``/reserved-memory`` devicetree
+node and abide by the devicetree specification to convert the attributes of
+those subnodes (``no-map`` and ``reusable``) into their correct EFI equivalent
+(refer to section "3.5.4 /reserved-memory and UEFI" of the devicetree
+specification v0.4-rc1).
+
+RISCV_EFI_BOOT_PROTOCOL
+~~~~~~~~~~~~~~~~~~~~~~~
+
+When booting with UEFI, the EFI stub requires the boot hartid in order to pass
+it to the RISC-V kernel in ``$a1``. The EFI stub retrieves the boot hartid using
+one of the following methods:
+
+- ``RISCV_EFI_BOOT_PROTOCOL`` (**preferred**).
+- ``boot-hartid`` devicetree subnode (**deprecated**).
+
+Any new firmware must implement ``RISCV_EFI_BOOT_PROTOCOL`` as the devicetree
+based approach is deprecated now.
+
+Early Boot Requirements and Constraints
+=======================================
+
+The RISC-V kernel's early boot process operates under the following constraints:
+
+EFI stub and devicetree
+-----------------------
+
+When booting with UEFI, the devicetree is supplemented (or created) by the EFI
+stub with the same parameters as arm64 which are described at the paragraph
+"UEFI kernel support on ARM" in Documentation/arch/arm/uefi.rst.
+
+Virtual mapping installation
+----------------------------
+
+The installation of the virtual mapping is done in 2 steps in the RISC-V kernel:
+
+1. ``setup_vm()`` installs a temporary kernel mapping in ``early_pg_dir`` which
+ allows discovery of the system memory. Only the kernel text/data are mapped
+ at this point. When establishing this mapping, no allocation can be done
+ (since the system memory is not known yet), so ``early_pg_dir`` page table is
+ statically allocated (using only one table for each level).
+
+2. ``setup_vm_final()`` creates the final kernel mapping in ``swapper_pg_dir``
+ and takes advantage of the discovered system memory to create the linear
+ mapping. When establishing this mapping, the kernel can allocate memory but
+ cannot access it directly (since the direct mapping is not present yet), so
+ it uses temporary mappings in the fixmap region to be able to access the
+ newly allocated page table levels.
+
+For ``virt_to_phys()`` and ``phys_to_virt()`` to be able to correctly convert
+direct mapping addresses to physical addresses, they need to know the start of
+the DRAM. This happens after step 1, right before step 2 installs the direct
+mapping (see ``setup_bootmem()`` function in arch/riscv/mm/init.c). Any usage of
+those macros before the final virtual mapping is installed must be carefully
+examined.
+
+Devicetree mapping via fixmap
+-----------------------------
+
+As the ``reserved_mem`` array is initialized with virtual addresses established
+by ``setup_vm()``, and used with the mapping established by
+``setup_vm_final()``, the RISC-V kernel uses the fixmap region to map the
+devicetree. This ensures that the devicetree remains accessible by both virtual
+mappings.
+
+Pre-MMU execution
+-----------------
+
+A few pieces of code need to run before even the first virtual mapping is
+established. These are the installation of the first virtual mapping itself,
+patching of early alternatives and the early parsing of the kernel command line.
+That code must be very carefully compiled as:
+
+- ``-fno-pie``: This is needed for relocatable kernels which use ``-fPIE``,
+ since otherwise, any access to a global symbol would go through the GOT which
+ is only relocated virtually.
+- ``-mcmodel=medany``: Any access to a global symbol must be PC-relative to
+ avoid any relocations to happen before the MMU is setup.
+- *all* instrumentation must also be disabled (that includes KASAN, ftrace and
+ others).
+
+As using a symbol from a different compilation unit requires this unit to be
+compiled with those flags, we advise, as much as possible, not to use external
+symbols.
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
new file mode 100644
index 000000000000..40ba53bed5df
--- /dev/null
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -0,0 +1,130 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================================
+Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
+==============================================================================
+
+CMODX is a programming technique where a program executes instructions that were
+modified by the program itself. Instruction storage and the instruction cache
+(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
+program must enforce its own synchronization with the unprivileged fence.i
+instruction.
+
+CMODX in the Kernel Space
+-------------------------
+
+Dynamic ftrace
+---------------------
+
+Essentially, dynamic ftrace directs the control flow by inserting a function
+call at each patchable function entry, and patches it dynamically at runtime to
+enable or disable the redirection. In the case of RISC-V, 2 instructions,
+AUIPC + JALR, are required to compose a function call. However, it is impossible
+to patch 2 instructions and expect that a concurrent read-side executes them
+without a race condition. This series makes atmoic code patching possible in
+RISC-V ftrace. Kernel preemption makes things even worse as it allows the old
+state to persist across the patching process with stop_machine().
+
+In order to get rid of stop_machine() and run dynamic ftrace with full kernel
+preemption, we partially initialize each patchable function entry at boot-time,
+setting the first instruction to AUIPC, and the second to NOP. Now, atmoic
+patching is possible because the kernel only has to update one instruction.
+According to Ziccif, as long as an instruction is naturally aligned, the ISA
+guarantee an atomic update.
+
+By fixing down the first instruction, AUIPC, the range of the ftrace trampoline
+is limited to +-2K from the predetermined target, ftrace_caller, due to the lack
+of immediate encoding space in RISC-V. To address the issue, we introduce
+CALL_OPS, where an 8B naturally align metadata is added in front of each
+pacthable function. The metadata is resolved at the first trampoline, then the
+execution can be derect to another custom trampoline.
+
+CMODX in the User Space
+-----------------------
+
+Though fence.i is an unprivileged instruction, the default Linux ABI prohibits
+the use of fence.i in userspace applications. At any point the scheduler may
+migrate a task onto a new hart. If migration occurs after the userspace
+synchronized the icache and instruction storage with fence.i, the icache on the
+new hart will no longer be clean. This is due to the behavior of fence.i only
+affecting the hart that it is called on. Thus, the hart that the task has been
+migrated to may not have synchronized instruction storage and icache.
+
+There are two ways to solve this problem: use the riscv_flush_icache() syscall,
+or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
+userspace. The syscall performs a one-off icache flushing operation. The prctl
+changes the Linux ABI to allow userspace to emit icache flushing operations.
+
+As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
+At the time of writing, this only occurs during the riscv_flush_icache() syscall
+and when the kernel uses copy_to_user_page(). These deferred flushes happen only
+when the memory map being used by a hart changes. If the prctl() context caused
+an icache flush, this deferred icache flush will be skipped as it is redundant.
+Therefore, there will be no additional flush when using the riscv_flush_icache()
+syscall inside of the prctl() context.
+
+prctl() Interface
+---------------------
+
+Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
+remaining arguments will be delegated to the riscv_set_icache_flush_ctx
+function detailed below.
+
+.. kernel-doc:: arch/riscv/mm/cacheflush.c
+ :identifiers: riscv_set_icache_flush_ctx
+
+Example usage:
+
+The following files are meant to be compiled and linked with each other. The
+modify_instruction() function replaces an add with 0 with an add with one,
+causing the instruction sequence in get_value() to change from returning a zero
+to returning a one.
+
+cmodx.c::
+
+ #include <stdio.h>
+ #include <sys/prctl.h>
+
+ extern int get_value();
+ extern void modify_instruction();
+
+ int main()
+ {
+ int value = get_value();
+ printf("Value before cmodx: %d\n", value);
+
+ // Call prctl before first fence.i is called inside modify_instruction
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
+ modify_instruction();
+ // Call prctl after final fence.i is called in process
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS);
+
+ value = get_value();
+ printf("Value after cmodx: %d\n", value);
+ return 0;
+ }
+
+cmodx.S::
+
+ .option norvc
+
+ .text
+ .global modify_instruction
+ modify_instruction:
+ lw a0, new_insn
+ lui a5,%hi(old_insn)
+ sw a0,%lo(old_insn)(a5)
+ fence.i
+ ret
+
+ .section modifiable, "awx"
+ .global get_value
+ get_value:
+ li a0, 0
+ old_insn:
+ addi a0, a0, 0
+ ret
+
+ .data
+ new_insn:
+ addi a0, a0, 1
diff --git a/Documentation/arch/riscv/features.rst b/Documentation/arch/riscv/features.rst
new file mode 100644
index 000000000000..36e90144adab
--- /dev/null
+++ b/Documentation/arch/riscv/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features riscv
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
new file mode 100644
index 000000000000..2f449c9b15bd
--- /dev/null
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -0,0 +1,372 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+RISC-V Hardware Probing Interface
+---------------------------------
+
+The RISC-V hardware probing interface is based around a single syscall, which
+is defined in <asm/hwprobe.h>::
+
+ struct riscv_hwprobe {
+ __s64 key;
+ __u64 value;
+ };
+
+ long sys_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, cpu_set_t *cpus,
+ unsigned int flags);
+
+The arguments are split into three groups: an array of key-value pairs, a CPU
+set, and some flags. The key-value pairs are supplied with a count. Userspace
+must prepopulate the key field for each element, and the kernel will fill in the
+value if the key is recognized. If a key is unknown to the kernel, its key field
+will be cleared to -1, and its value set to 0. The CPU set is defined by
+CPU_SET(3) with size ``cpusetsize`` bytes. For value-like keys (eg. vendor,
+arch, impl), the returned value will only be valid if all CPUs in the given set
+have the same value. Otherwise -1 will be returned. For boolean-like keys, the
+value returned will be a logical AND of the values for the specified CPUs.
+Usermode can supply NULL for ``cpus`` and 0 for ``cpusetsize`` as a shortcut for
+all online CPUs. The currently supported flags are:
+
+* :c:macro:`RISCV_HWPROBE_WHICH_CPUS`: This flag basically reverses the behavior
+ of sys_riscv_hwprobe(). Instead of populating the values of keys for a given
+ set of CPUs, the values of each key are given and the set of CPUs is reduced
+ by sys_riscv_hwprobe() to only those which match each of the key-value pairs.
+ How matching is done depends on the key type. For value-like keys, matching
+ means to be the exact same as the value. For boolean-like keys, matching
+ means the result of a logical AND of the pair's value with the CPU's value is
+ exactly the same as the pair's value. Additionally, when ``cpus`` is an empty
+ set, then it is initialized to all online CPUs which fit within it, i.e. the
+ CPU set returned is the reduction of all the online CPUs which can be
+ represented with a CPU set of size ``cpusetsize``.
+
+All other flags are reserved for future compatibility and must be zero.
+
+On success 0 is returned, on failure a negative error code is returned.
+
+The following keys are defined:
+
+* :c:macro:`RISCV_HWPROBE_KEY_MVENDORID`: Contains the value of ``mvendorid``,
+ as defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as
+ defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MIMPID`: Contains the value of ``mimpid``, as
+ defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base
+ user-visible behavior that this kernel supports. The following base user ABIs
+ are defined:
+
+ * :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: Support for rv32ima or
+ rv64ima, as defined by version 2.2 of the user ISA and version 1.10 of the
+ privileged ISA, with the following known exceptions (more exceptions may be
+ added, but only if it can be demonstrated that the user ABI is not broken):
+
+ * The ``fence.i`` instruction cannot be directly executed by userspace
+ programs (it may still be executed in userspace via a
+ kernel-controlled mechanism such as the vDSO).
+
+* :c:macro:`RISCV_HWPROBE_KEY_IMA_EXT_0`: A bitmask containing the extensions
+ that are compatible with the :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`:
+ base system behavior.
+
+ * :c:macro:`RISCV_HWPROBE_IMA_FD`: The F and D extensions are supported, as
+ defined by commit cd20cee ("FMIN/FMAX now implement
+ minimumNumber/maximumNumber, not minNum/maxNum") of the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
+ by version 2.2 of the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
+ version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBA`: The Zba address generation extension is
+ supported, as defined in version 1.0 of the Bit-Manipulation ISA
+ extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBB`: The Zbb extension is supported, as defined
+ in version 1.0 of the Bit-Manipulation ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined
+ in version 1.0 of the Bit-Manipulation ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICBOZ`: The Zicboz extension is supported, as
+ ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBC` The Zbc extension is supported, as defined
+ in version 1.0 of the Bit-Manipulation ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBKB` The Zbkb extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBKC` The Zbkc extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZBKX` The Zbkx extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKND` The Zknd extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKNE` The Zkne extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKNH` The Zknh extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKSED` The Zksed extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKSH` The Zksh extension is supported, as
+ defined in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZKT` The Zkt extension is supported, as defined
+ in version 1.0 of the Scalar Crypto ISA extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVBB`: The Zvbb extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVBC`: The Zvbc extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKB`: The Zvkb extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKG`: The Zvkg extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKNED`: The Zvkned extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKNHA`: The Zvknha extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKNHB`: The Zvknhb extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKSED`: The Zvksed extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKSH`: The Zvksh extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVKT`: The Zvkt extension is supported as
+ defined in version 1.0 of the RISC-V Cryptography Extensions Volume II.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZFH`: The Zfh extension version 1.0 is supported
+ as defined in the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZFHMIN`: The Zfhmin extension version 1.0 is
+ supported as defined in the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIHINTNTL`: The Zihintntl extension version 1.0
+ is supported as defined in the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFH`: The Zvfh extension is supported as
+ defined in the RISC-V Vector manual starting from commit e2ccd0548d6c
+ ("Remove draft warnings from Zvfh[min]").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFHMIN`: The Zvfhmin extension is supported as
+ defined in the RISC-V Vector manual starting from commit e2ccd0548d6c
+ ("Remove draft warnings from Zvfh[min]").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZFA`: The Zfa extension is supported as
+ defined in the RISC-V ISA manual starting from commit 056b6ff467c7
+ ("Zfa is ratified").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZTSO`: The Ztso extension is supported as
+ defined in the RISC-V ISA manual starting from commit 5618fb5a216b
+ ("Ztso is now ratified.")
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZACAS`: The Zacas extension is supported as
+ defined in the Atomic Compare-and-Swap (CAS) instructions manual starting
+ from commit 5059e0ca641c ("update to ratified").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICNTR`: The Zicntr extension version 2.0
+ is supported as defined in the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICOND`: The Zicond extension is supported as
+ defined in the RISC-V Integer Conditional (Zicond) operations extension
+ manual starting from commit 95cf1f9 ("Add changes requested by Ved
+ during signoff")
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIHINTPAUSE`: The Zihintpause extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ d8ab5c78c207 ("Zihintpause is ratified").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIHPM`: The Zihpm extension version 2.0
+ is supported as defined in the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE32X`: The Vector sub-extension Zve32x is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE32F`: The Vector sub-extension Zve32f is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64X`: The Vector sub-extension Zve64x is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64F`: The Vector sub-extension Zve64f is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64D`: The Vector sub-extension Zve64d is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIMOP`: The Zimop May-Be-Operations extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ 58220614a5f ("Zimop is ratified/1.0").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCA`: The Zca extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCB`: The Zcb extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCD`: The Zcd extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCF`: The Zcf extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCMOP`: The Zcmop May-Be-Operations extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ c732a4f39a4 ("Zcmop is ratified/1.0").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZAWRS`: The Zawrs extension is supported as
+ ratified in commit 98918c844281 ("Merge pull request #1217 from
+ riscv/zawrs") of riscv-isa-manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZAAMO`: The Zaamo extension is supported as
+ defined in the in the RISC-V ISA manual starting from commit e87412e621f1
+ ("integrate Zaamo and Zalrsc text (#1304)").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZALRSC`: The Zalrsc extension is supported as
+ defined in the in the RISC-V ISA manual starting from commit e87412e621f1
+ ("integrate Zaamo and Zalrsc text (#1304)").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as
+ defined in version 1.0 of the RISC-V Pointer Masking extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZFBFMIN`: The Zfbfmin extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFMIN`: The Zvfbfmin extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFWMA`: The Zvfbfwma extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as
+ ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZABHA`: The Zabha extension is supported as
+ ratified in commit 49f49c842ff9 ("Update to Rafified state") of
+ riscv-zabha.
+
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
+ :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was
+ mistakenly classified as a bitmask rather than a value.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing
+ the performance of misaligned scalar native word accesses on the selected set
+ of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of
+ misaligned scalar accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar
+ accesses are emulated via software, either in or below the kernel. These
+ accesses are always extremely slow.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native
+ word sized accesses are slower than the equivalent quantity of byte
+ accesses. Misaligned accesses may be supported directly in hardware, or
+ trapped and emulated by software.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native
+ word sized accesses are faster than the equivalent quantity of byte
+ accesses.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar
+ accesses are not supported at all and will generate a misaligned address
+ fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which
+ represents the size of the Zicboz block in bytes.
+
+* :c:macro:`RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS`: An unsigned long which
+ represent the highest userspace virtual address usable.
+
+* :c:macro:`RISCV_HWPROBE_KEY_TIME_CSR_FREQ`: Frequency (in Hz) of `time CSR`.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF`: An enum value describing the
+ performance of misaligned vector accesses on the selected set of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN`: The performance of misaligned
+ vector accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW`: 32-bit misaligned accesses using vector
+ registers are slower than the equivalent quantity of byte accesses via vector registers.
+ Misaligned accesses may be supported directly in hardware, or trapped and emulated by software.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_FAST`: 32-bit misaligned accesses using vector
+ registers are faster than the equivalent quantity of byte accesses via vector registers.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are
+ not supported at all and will generate a misaligned address fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_MIPS_0`: A bitmask containing the
+ mips vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * MIPS
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XMIPSEXECTL`: The xmipsexectl vendor
+ extension is supported in the MIPS ISA extensions spec.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the
+ thead vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * T-HEAD
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor
+ extension is supported in the T-Head ISA extensions spec starting from
+ commit a18c801634 ("Add T-Head VECTOR vendor extension. ").
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which
+ represents the size of the Zicbom block in bytes.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0`: A bitmask containing the
+ sifive vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * SIFIVE
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD`: The Xsfqmaccdod vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Instruction Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf
+ vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged
+ Clip Instructions Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq
+ vendor extension is supported in version 1.0 of Matrix Multiply Accumulate
+ Instruction Extensions Specification. \ No newline at end of file
diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
new file mode 100644
index 000000000000..eecf347ce849
--- /dev/null
+++ b/Documentation/arch/riscv/index.rst
@@ -0,0 +1,25 @@
+===================
+RISC-V architecture
+===================
+
+.. toctree::
+ :maxdepth: 1
+
+ acpi
+ boot
+ boot-image-header
+ vm-layout
+ hwprobe
+ patch-acceptance
+ uabi
+ vector
+ cmodx
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/riscv/patch-acceptance.rst b/Documentation/arch/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..634aa222b410
--- /dev/null
+++ b/Documentation/arch/riscv/patch-acceptance.rst
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+arch/riscv maintenance guidelines for developers
+================================================
+
+Overview
+--------
+The RISC-V instruction set architecture is developed in the open:
+in-progress drafts are available for all to review and to experiment
+with implementations. New module or extension drafts can change
+during the development process - sometimes in ways that are
+incompatible with previous drafts. This flexibility can present a
+challenge for RISC-V Linux maintenance. Linux maintainers disapprove
+of churn, and the Linux development process prefers well-reviewed and
+tested code over experimental code. We wish to extend these same
+principles to the RISC-V-related code that will be accepted for
+inclusion in the kernel.
+
+Patchwork
+---------
+
+RISC-V has a patchwork instance, where the status of patches can be checked:
+
+ https://patchwork.kernel.org/project/linux-riscv/list/
+
+If your patch does not appear in the default view, the RISC-V maintainers have
+likely either requested changes, or expect it to be applied to another tree.
+
+Automation runs against this patchwork instance, building/testing patches as
+they arrive. The automation applies patches against the current HEAD of the
+RISC-V `for-next` and `fixes` branches, depending on whether the patch has been
+detected as a fix. Failing those, it will use the RISC-V `master` branch.
+The exact commit to which a series has been applied will be noted on patchwork.
+Patches for which any of the checks fail are unlikely to be applied and in most
+cases will need to be resubmitted.
+
+Submit Checklist Addendum
+-------------------------
+We'll only accept patches for new modules or extensions if the
+specifications for those modules or extensions are listed as being
+unlikely to be incompatibly changed in the future. For
+specifications from the RISC-V foundation this means "Frozen" or
+"Ratified", for the UEFI forum specifications this means a published
+ECR. (Developers may, of course, maintain their own Linux kernel trees
+that contain code for any draft extensions that they wish.)
+
+Additionally, the RISC-V specification allows implementers to create
+their own custom extensions. These custom extensions aren't required
+to go through any review or ratification process by the RISC-V
+Foundation. To avoid the maintenance complexity and potential
+performance impact of adding kernel code for implementor-specific
+RISC-V extensions, we'll only consider patches for extensions that either:
+
+- Have been officially frozen or ratified by the RISC-V Foundation, or
+- Have been implemented in hardware that is widely available, per standard
+ Linux practice.
+
+(Implementers, may, of course, maintain their own Linux kernel trees containing
+code for any custom extensions that they wish.)
diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst
new file mode 100644
index 000000000000..243e40062e34
--- /dev/null
+++ b/Documentation/arch/riscv/uabi.rst
@@ -0,0 +1,86 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+RISC-V Linux User ABI
+=====================
+
+ISA string ordering in /proc/cpuinfo
+------------------------------------
+
+The canonical order of ISA extension names in the ISA string is defined in
+chapter 27 of the unprivileged specification.
+The specification uses vague wording, such as should, when it comes to ordering,
+so for our purposes the following rules apply:
+
+#. Single-letter extensions come first, in canonical order.
+ The canonical order is "IMAFDQLCBKJTPVH".
+
+#. All multi-letter extensions will be separated from other extensions by an
+ underscore.
+
+#. Additional standard extensions (starting with 'Z') will be sorted after
+ single-letter extensions and before any higher-privileged extensions.
+
+#. For additional standard extensions, the first letter following the 'Z'
+ conventionally indicates the most closely related alphabetical
+ extension category. If multiple 'Z' extensions are named, they will be
+ ordered first by category, in canonical order, as listed above, then
+ alphabetically within a category.
+
+#. Standard supervisor-level extensions (starting with 'S') will be listed
+ after standard unprivileged extensions. If multiple supervisor-level
+ extensions are listed, they will be ordered alphabetically.
+
+#. Standard machine-level extensions (starting with 'Zxm') will be listed
+ after any lower-privileged, standard extensions. If multiple machine-level
+ extensions are listed, they will be ordered alphabetically.
+
+#. Non-standard extensions (starting with 'X') will be listed after all standard
+ extensions. If multiple non-standard extensions are listed, they will be
+ ordered alphabetically.
+
+An example string following the order is::
+
+ rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux
+
+"isa" and "hart isa" lines in /proc/cpuinfo
+-------------------------------------------
+
+The "isa" line in /proc/cpuinfo describes the lowest common denominator of
+RISC-V ISA extensions recognized by the kernel and implemented on all harts. The
+"hart isa" line, in contrast, describes the set of extensions recognized by the
+kernel on the particular hart being described, even if those extensions may not
+be present on all harts in the system.
+
+In both lines, the presence of an extension guarantees only that the hardware
+has the described capability. Additional kernel support or policy changes may be
+required before an extension's capability is fully usable by userspace programs.
+Similarly, for S-mode extensions, presence in one of these lines does not
+guarantee that the kernel is taking advantage of the extension, or that the
+feature will be visible in guest VMs managed by this kernel.
+
+Inversely, the absence of an extension in these lines does not necessarily mean
+the hardware does not support that feature. The running kernel may not recognize
+the extension, or may have deliberately removed it from the listing.
+
+Misaligned accesses
+-------------------
+
+Misaligned scalar accesses are supported in userspace, but they may perform
+poorly. Misaligned vector accesses are only supported if the Zicclsm extension
+is supported.
+
+Pointer masking
+---------------
+
+Support for pointer masking in userspace (the Supm extension) is provided via
+the ``PR_SET_TAGGED_ADDR_CTRL`` and ``PR_GET_TAGGED_ADDR_CTRL`` ``prctl()``
+operations. Pointer masking is disabled by default. To enable it, userspace
+must call ``PR_SET_TAGGED_ADDR_CTRL`` with the ``PR_PMLEN`` field set to the
+number of mask/tag bits needed by the application. ``PR_PMLEN`` is interpreted
+as a lower bound; if the kernel is unable to satisfy the request, the
+``PR_SET_TAGGED_ADDR_CTRL`` operation will fail. The actual number of tag bits
+is returned in ``PR_PMLEN`` by the ``PR_GET_TAGGED_ADDR_CTRL`` operation.
+
+Additionally, when pointer masking is enabled (``PR_PMLEN`` is greater than 0),
+a tagged address ABI is supported, with the same interface and behavior as
+documented for AArch64 (Documentation/arch/arm64/tagged-address-abi.rst).
diff --git a/Documentation/arch/riscv/vector.rst b/Documentation/arch/riscv/vector.rst
new file mode 100644
index 000000000000..3987f5f76a9d
--- /dev/null
+++ b/Documentation/arch/riscv/vector.rst
@@ -0,0 +1,140 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Vector Extension Support for RISC-V Linux
+=========================================
+
+This document briefly outlines the interface provided to userspace by Linux in
+order to support the use of the RISC-V Vector Extension.
+
+1. prctl() Interface
+---------------------
+
+Two new prctl() calls are added to allow programs to manage the enablement
+status for the use of Vector in userspace. The intended usage guideline for
+these interfaces is to give init systems a way to modify the availability of V
+for processes running under its domain. Calling these interfaces is not
+recommended in libraries routines because libraries should not override policies
+configured from the parent process. Also, users must note that these interfaces
+are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
+to use in a portable code. To get the availability of V in an ELF program,
+please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the
+auxiliary vector.
+
+* prctl(PR_RISCV_V_SET_CONTROL, unsigned long arg)
+
+ Sets the Vector enablement status of the calling thread, where the control
+ argument consists of two 2-bit enablement statuses and a bit for inheritance
+ mode. Other threads of the calling process are unaffected.
+
+ Enablement status is a tri-state value each occupying 2-bit of space in
+ the control argument:
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_DEFAULT`: Use the system-wide default
+ enablement status on execve(). The system-wide default setting can be
+ controlled via sysctl interface (see sysctl section below).
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_ON`: Allow Vector to be run for the
+ thread.
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_OFF`: Disallow Vector. Executing Vector
+ instructions under such condition will trap and casuse the termination of the thread.
+
+ arg: The control argument is a 5-bit value consisting of 3 parts, and
+ accessed by 3 masks respectively.
+
+ The 3 masks, PR_RISCV_V_VSTATE_CTRL_CUR_MASK,
+ PR_RISCV_V_VSTATE_CTRL_NEXT_MASK, and PR_RISCV_V_VSTATE_CTRL_INHERIT
+ represents bit[1:0], bit[3:2], and bit[4]. bit[1:0] accounts for the
+ enablement status of current thread, and the setting at bit[3:2] takes place
+ at next execve(). bit[4] defines the inheritance mode of the setting in
+ bit[3:2].
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_CUR_MASK`: bit[1:0]: Account for the
+ Vector enablement status for the calling thread. The calling thread is
+ not able to turn off Vector once it has been enabled. The prctl() call
+ fails with EPERM if the value in this mask is PR_RISCV_V_VSTATE_CTRL_OFF
+ but the current enablement status is not off. Setting
+ PR_RISCV_V_VSTATE_CTRL_DEFAULT here takes no effect but to set back
+ the original enablement status.
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_NEXT_MASK`: bit[3:2]: Account for the
+ Vector enablement setting for the calling thread at the next execve()
+ system call. If PR_RISCV_V_VSTATE_CTRL_DEFAULT is used in this mask,
+ then the enablement status will be decided by the system-wide
+ enablement status when execve() happen.
+
+ * :c:macro:`PR_RISCV_V_VSTATE_CTRL_INHERIT`: bit[4]: the inheritance
+ mode for the setting at PR_RISCV_V_VSTATE_CTRL_NEXT_MASK. If the bit
+ is set then the following execve() will not clear the setting in both
+ PR_RISCV_V_VSTATE_CTRL_NEXT_MASK and PR_RISCV_V_VSTATE_CTRL_INHERIT.
+ This setting persists across changes in the system-wide default value.
+
+ Return value:
+ * 0 on success;
+ * EINVAL: Vector not supported, invalid enablement status for current or
+ next mask;
+ * EPERM: Turning off Vector in PR_RISCV_V_VSTATE_CTRL_CUR_MASK if Vector
+ was enabled for the calling thread.
+
+ On success:
+ * A valid setting for PR_RISCV_V_VSTATE_CTRL_CUR_MASK takes place
+ immediately. The enablement status specified in
+ PR_RISCV_V_VSTATE_CTRL_NEXT_MASK happens at the next execve() call, or
+ all following execve() calls if PR_RISCV_V_VSTATE_CTRL_INHERIT bit is
+ set.
+ * Every successful call overwrites a previous setting for the calling
+ thread.
+
+* prctl(PR_RISCV_V_GET_CONTROL)
+
+ Gets the same Vector enablement status for the calling thread. Setting for
+ next execve() call and the inheritance bit are all OR-ed together.
+
+ Note that ELF programs are able to get the availability of V for itself by
+ reading :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the
+ auxiliary vector.
+
+ Return value:
+ * a nonnegative value on success;
+ * EINVAL: Vector not supported.
+
+2. System runtime configuration (sysctl)
+-----------------------------------------
+
+To mitigate the ABI impact of expansion of the signal stack, a
+policy mechanism is provided to the administrators, distro maintainers, and
+developers to control the default Vector enablement status for userspace
+processes in form of sysctl knob:
+
+* /proc/sys/abi/riscv_v_default_allow
+
+ Writing the text representation of 0 or 1 to this file sets the default
+ system enablement status for new starting userspace programs. Valid values
+ are:
+
+ * 0: Do not allow Vector code to be executed as the default for new processes.
+ * 1: Allow Vector code to be executed as the default for new processes.
+
+ Reading this file returns the current system default enablement status.
+
+ At every execve() call, a new enablement status of the new process is set to
+ the system default, unless:
+
+ * PR_RISCV_V_VSTATE_CTRL_INHERIT is set for the calling process, and the
+ setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not
+ PR_RISCV_V_VSTATE_CTRL_DEFAULT. Or,
+
+ * The setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not
+ PR_RISCV_V_VSTATE_CTRL_DEFAULT.
+
+ Modifying the system default enablement status does not affect the enablement
+ status of any existing process of thread that do not make an execve() call.
+
+3. Vector Register State Across System Calls
+---------------------------------------------
+
+As indicated by version 1.0 of the V extension [1], vector registers are
+clobbered by system calls.
+
+1: https://github.com/riscv/riscv-v-spec/blob/master/calling-convention.adoc
diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst
new file mode 100644
index 000000000000..eabec99b5852
--- /dev/null
+++ b/Documentation/arch/riscv/vm-layout.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Virtual Memory Layout on RISC-V Linux
+=====================================
+
+:Author: Alexandre Ghiti <alex@ghiti.fr>
+:Date: 12 February 2021
+
+This document describes the virtual memory layout used by the RISC-V Linux
+Kernel.
+
+RISC-V Linux Kernel 32bit
+=========================
+
+RISC-V Linux Kernel SV32
+------------------------
+
+TODO
+
+RISC-V Linux Kernel 64bit
+=========================
+
+The RISC-V privileged architecture document states that the 64bit addresses
+"must have bits 63–48 all equal to bit 47, or else a page-fault exception will
+occur.": that splits the virtual address space into 2 halves separated by a very
+big hole, the lower half is where the userspace resides, the upper half is where
+the RISC-V Linux Kernel resides.
+
+RISC-V Linux Kernel SV39
+------------------------
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 0000003fffffffff | 256 GB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -256 GB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffffffc4fea00000 | -236 GB | ffffffc4feffffff | 6 MB | fixmap
+ ffffffc4ff000000 | -236 GB | ffffffc4ffffffff | 16 MB | PCI io
+ ffffffc500000000 | -236 GB | ffffffc5ffffffff | 4 GB | vmemmap
+ ffffffc600000000 | -232 GB | ffffffd5ffffffff | 64 GB | vmalloc/ioremap space
+ ffffffd600000000 | -168 GB | fffffff5ffffffff | 128 GB | direct mapping of all physical memory
+ | | | |
+ fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ |
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
+ __________________|____________|__________________|_________|____________________________________________________________
+
+
+RISC-V Linux Kernel SV48
+------------------------
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -128 TB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap
+ ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io
+ ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap
+ ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space
+ ffffaf8000000000 | -80.5 TB | ffffef7fffffffff | 64 TB | direct mapping of all physical memory
+ ffffef8000000000 | -16.5 TB | fffffffeffffffff | 16.5 TB | kasan
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 39-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
+ __________________|____________|__________________|_________|____________________________________________________________
+
+
+RISC-V Linux Kernel SV57
+------------------------
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -64 PB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap
+ ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io
+ ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap
+ ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space
+ ff60000000000000 | -40 PB | ffdeffffffffffff | 32 PB | direct mapping of all physical memory
+ ffdf000000000000 | -8 PB | fffffffeffffffff | 8 PB | kasan
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 39-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
+ __________________|____________|__________________|_________|____________________________________________________________
diff --git a/Documentation/s390/3270.ChangeLog b/Documentation/arch/s390/3270.ChangeLog
index 031c36081946..ecaf60b6c381 100644
--- a/Documentation/s390/3270.ChangeLog
+++ b/Documentation/arch/s390/3270.ChangeLog
@@ -16,7 +16,7 @@ Sep 2002: Dynamically get 3270 input buffer
Sep 2002: Fix tubfs kmalloc()s
* Do read and write lengths correctly in fs3270_read()
- and fs3270_write(), whilst never asking kmalloc()
+ and fs3270_write(), while never asking kmalloc()
for more than 0x800 bytes. Affects tubfs.c and tubio.h.
Sep 2002: Recognize 3270 control unit type 3174
diff --git a/Documentation/arch/s390/3270.rst b/Documentation/arch/s390/3270.rst
new file mode 100644
index 000000000000..467eace91473
--- /dev/null
+++ b/Documentation/arch/s390/3270.rst
@@ -0,0 +1,298 @@
+===============================
+IBM 3270 Display System support
+===============================
+
+This file describes the driver that supports local channel attachment
+of IBM 3270 devices. It consists of three sections:
+
+ * Introduction
+ * Installation
+ * Operation
+
+
+Introduction
+============
+
+This paper describes installing and operating 3270 devices under
+Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
+which I'm sure hundreds of millions were sold by IBM and clonemakers
+twenty and thirty years ago.
+
+You may have 3270s in-house and not know it. If you're using the
+VM-ESA operating system, define a 3270 to your virtual machine by using
+the command "DEF GRAF <hex-address>" This paper presumes you will be
+defining four 3270s with the CP/CMS commands:
+
+ - DEF GRAF 620
+ - DEF GRAF 621
+ - DEF GRAF 622
+ - DEF GRAF 623
+
+Your network connection from VM-ESA allows you to use x3270, tn3270, or
+another 3270 emulator, started from an xterm window on your PC or
+workstation. With the DEF GRAF command, an application such as xterm,
+and this Linux-390 3270 driver, you have another way of talking to your
+Linux box.
+
+This paper covers installation of the driver and operation of a
+dialed-in x3270.
+
+
+Installation
+============
+
+You install the driver by installing a patch, doing a kernel build, and
+running the configuration script (config3270.sh, in this directory).
+
+WARNING: If you are using 3270 console support, you must rerun the
+configuration script every time you change the console's address (perhaps
+by using the condev= parameter in silo's /boot/parmfile). More precisely,
+you should rerun the configuration script every time your set of 3270s,
+including the console 3270, changes subchannel identifier relative to
+one another. ReIPL as soon as possible after running the configuration
+script and the resulting /tmp/mkdev3270.
+
+If you have chosen to make tub3270 a module, you add a line to a
+configuration file under /etc/modprobe.d/. If you are working on a VM
+virtual machine, you can use DEF GRAF to define virtual 3270 devices.
+
+You may generate both 3270 and 3215 console support, or one or the
+other, or neither. If you generate both, the console type under VM is
+not changed. Use #CP Q TERM to see what the current console type is.
+Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only
+3270 console support, then the driver automatically converts your console
+at boot time to a 3270 if it is a 3215.
+
+In brief, these are the steps:
+
+ 1. Install the tub3270 patch
+ 2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
+ 3. (If VM) define devices with DEF GRAF
+ 4. Reboot
+ 5. Configure
+
+To test that everything works, assuming VM and x3270,
+
+ 1. Bring up an x3270 window.
+ 2. Use the DIAL command in that window.
+ 3. You should immediately see a Linux login screen.
+
+Here are the installation steps in detail:
+
+ 1. The 3270 driver is a part of the official Linux kernel
+ source. Build a tree with the kernel source and any necessary
+ patches. Then do::
+
+ make oldconfig
+ (If you wish to disable 3215 console support, edit
+ .config; change CONFIG_TN3215's value to "n";
+ and rerun "make oldconfig".)
+ make image
+ make modules
+ make modules_install
+
+ 2. (Perform this step only if you have configured tub3270 as a
+ module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically
+ load the driver when it's needed. With this line added, you will see
+ login prompts appear on your 3270s as soon as boot is complete (or
+ with emulated 3270s, as soon as you dial into your vm guest using the
+ command "DIAL <vmguestname>"). Since the line-mode major number is
+ 227, the line to add should be::
+
+ alias char-major-227 tub3270
+
+ 3. Define graphic devices to your vm guest machine, if you
+ haven't already. Define them before you reboot (reipl):
+
+ - DEFINE GRAF 620
+ - DEFINE GRAF 621
+ - DEFINE GRAF 622
+ - DEFINE GRAF 623
+
+ 4. Reboot. The reboot process scans hardware devices, including
+ 3270s, and this enables the tub3270 driver once loaded to respond
+ correctly to the configuration requests of the next step. If
+ you have chosen 3270 console support, your console now behaves
+ as a 3270, not a 3215.
+
+ 5. Run the 3270 configuration script config3270. It is
+ distributed in this same directory, Documentation/arch/s390, as
+ config3270.sh. Inspect the output script it produces,
+ /tmp/mkdev3270, and then run that script. This will create the
+ necessary character special device files and make the necessary
+ changes to /etc/inittab.
+
+ Then notify /sbin/init that /etc/inittab has changed, by issuing
+ the telinit command with the q operand::
+
+ cd Documentation/arch/s390
+ sh config3270.sh
+ sh /tmp/mkdev3270
+ telinit q
+
+ This should be sufficient for your first time. If your 3270
+ configuration has changed and you're reusing config3270, you
+ should follow these steps::
+
+ Change 3270 configuration
+ Reboot
+ Run config3270 and /tmp/mkdev3270
+ Reboot
+
+Here are the testing steps in detail:
+
+ 1. Bring up an x3270 window, or use an actual hardware 3278 or
+ 3279, or use the 3270 emulator of your choice. You would be
+ running the emulator on your PC or workstation. You would use
+ the command, for example::
+
+ x3270 vm-esa-domain-name &
+
+ if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
+ default model number. The driver does not take advantage of
+ extended attributes.
+
+ The screen you should now see contains a VM logo with input
+ lines near the bottom. Use TAB to move to the bottom line,
+ probably labeled "COMMAND ===>".
+
+ 2. Use the DIAL command instead of the LOGIN command to connect
+ to one of the virtual 3270s you defined with the DEF GRAF
+ commands::
+
+ dial my-vm-guest-name
+
+ 3. You should immediately see a login prompt from your
+ Linux-390 operating system. If that does not happen, you would
+ see instead the line "DIALED TO my-vm-guest-name 0620".
+
+ To troubleshoot: do these things.
+
+ A. Is the driver loaded? Use the lsmod command (no operands)
+ to find out. Probably it isn't. Try loading it manually, with
+ the command "insmod tub3270". Does that command give error
+ messages? Ha! There's your problem.
+
+ B. Is the /etc/inittab file modified as in installation step 3
+ above? Use the grep command to find out; for instance, issue
+ "grep 3270 /etc/inittab". Nothing found? There's your
+ problem!
+
+ C. Are the device special files created, as in installation
+ step 2 above? Use the ls -l command to find out; for instance,
+ issue "ls -l /dev/3270/tty620". The output should start with the
+ letter "c" meaning character device and should contain "227, 1"
+ just to the left of the device name. No such file? no "c"?
+ Wrong major number? Wrong minor number? There's your
+ problem!
+
+ D. Do you get the message::
+
+ "HCPDIA047E my-vm-guest-name 0620 does not exist"?
+
+ If so, you must issue the command "DEF GRAF 620" from your VM
+ 3215 console and then reboot the system.
+
+
+
+OPERATION.
+==========
+
+The driver defines three areas on the 3270 screen: the log area, the
+input area, and the status area.
+
+The log area takes up all but the bottom two lines of the screen. The
+driver writes terminal output to it, starting at the top line and going
+down. When it fills, the status area changes from "Linux Running" to
+"Linux More...". After a scrolling timeout of (default) 5 sec, the
+screen clears and more output is written, from the top down.
+
+The input area extends from the beginning of the second-to-last screen
+line to the start of the status area. You type commands in this area
+and hit ENTER to execute them.
+
+The status area initializes to "Linux Running" to give you a warm
+fuzzy feeling. When the log area fills up and output awaits, it
+changes to "Linux More...". At this time you can do several things or
+nothing. If you do nothing, the screen will clear in (default) 5 sec
+and more output will appear. You may hit ENTER with nothing typed in
+the input area to toggle between "Linux More..." and "Linux Holding",
+which indicates no scrolling will occur. (If you hit ENTER with "Linux
+Running" and nothing typed, the application receives a newline.)
+
+You may change the scrolling timeout value. For example, the following
+command line::
+
+ echo scrolltime=60 > /proc/tty/driver/tty3270
+
+changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
+you wish to prevent scrolling entirely.
+
+Other things you may do when the log area fills up are: hit PA2 to
+clear the log area and write more output to it, or hit CLEAR to clear
+the log area and the input area and write more output to the log area.
+
+Some of the Program Function (PF) and Program Attention (PA) keys are
+preassigned special functions. The ones that are not yield an alarm
+when pressed.
+
+PA1 causes a SIGINT to the currently running application. You may do
+the same thing from the input area, by typing "^C" and hitting ENTER.
+
+PA2 causes the log area to be cleared. If output awaits, it is then
+written to the log area.
+
+PF3 causes an EOF to be received as input by the application. You may
+cause an EOF also by typing "^D" and hitting ENTER.
+
+No PF key is preassigned to cause a job suspension, but you may cause a
+job suspension by typing "^Z" and hitting ENTER. You may wish to
+assign this function to a PF key. To make PF7 cause job suspension,
+execute the command::
+
+ echo pf7=^z > /proc/tty/driver/tty3270
+
+If the input you type does not end with the two characters "^n", the
+driver appends a newline character and sends it to the tty driver;
+otherwise the driver strips the "^n" and does not append a newline.
+The IBM 3215 driver behaves similarly.
+
+Pf10 causes the most recent command to be retrieved from the tube's
+command stack (default depth 20) and displayed in the input area. You
+may hit PF10 again for the next-most-recent command, and so on. A
+command is entered into the stack only when the input area is not made
+invisible (such as for password entry) and it is not identical to the
+current top entry. PF10 rotates backward through the command stack;
+PF11 rotates forward. You may assign the backward function to any PF
+key (or PA key, for that matter), say, PA3, with the command::
+
+ echo -e pa3=\\033k > /proc/tty/driver/tty3270
+
+This assigns the string ESC-k to PA3. Similarly, the string ESC-j
+performs the forward function. (Rationale: In bash with vi-mode line
+editing, ESC-k and ESC-j retrieve backward and forward history.
+Suggestions welcome.)
+
+Is a stack size of twenty commands not to your liking? Change it on
+the fly. To change to saving the last 100 commands, execute the
+command::
+
+ echo recallsize=100 > /proc/tty/driver/tty3270
+
+Have a command you issue frequently? Assign it to a PF or PA key! Use
+the command::
+
+ echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+
+to execute the commands mkdir foobar and cd foobar immediately when you
+hit PF24. Want to see the command line first, before you execute it?
+Use the -n option of the echo command::
+
+ echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
+
+
+
+Happy testing! I welcome any and all comments about this document, the
+driver, etc etc.
+
+Dick Hitt <rbh00@utsglobal.com>
diff --git a/Documentation/arch/s390/cds.rst b/Documentation/arch/s390/cds.rst
new file mode 100644
index 000000000000..bcad2a14244a
--- /dev/null
+++ b/Documentation/arch/s390/cds.rst
@@ -0,0 +1,530 @@
+===========================
+Linux for S/390 and zSeries
+===========================
+
+Common Device Support (CDS)
+Device Driver I/O Support Routines
+
+Authors:
+ - Ingo Adlung
+ - Cornelia Huck
+
+Copyright, IBM Corp. 1999-2002
+
+Introduction
+============
+
+This document describes the common device support routines for Linux/390.
+Different than other hardware architectures, ESA/390 has defined a unified
+I/O access method. This gives relief to the device drivers as they don't
+have to deal with different bus types, polling versus interrupt
+processing, shared versus non-shared interrupt processing, DMA versus port
+I/O (PIO), and other hardware features more. However, this implies that
+either every single device driver needs to implement the hardware I/O
+attachment functionality itself, or the operating system provides for a
+unified method to access the hardware, providing all the functionality that
+every single device driver would have to provide itself.
+
+The document does not intend to explain the ESA/390 hardware architecture in
+every detail.This information can be obtained from the ESA/390 Principles of
+Operation manual (IBM Form. No. SA22-7201).
+
+In order to build common device support for ESA/390 I/O interfaces, a
+functional layer was introduced that provides generic I/O access methods to
+the hardware.
+
+The common device support layer comprises the I/O support routines defined
+below. Some of them implement common Linux device driver interfaces, while
+some of them are ESA/390 platform specific.
+
+Note:
+ In order to write a driver for S/390, you also need to look into the interface
+ described in Documentation/arch/s390/driver-model.rst.
+
+Note for porting drivers from 2.4:
+
+The major changes are:
+
+* The functions use a ccw_device instead of an irq (subchannel).
+* All drivers must define a ccw_driver (see driver-model.txt) and the associated
+ functions.
+* request_irq() and free_irq() are no longer done by the driver.
+* The oper_handler is (kindof) replaced by the probe() and set_online() functions
+ of the ccw_driver.
+* The not_oper_handler is (kindof) replaced by the remove() and set_offline()
+ functions of the ccw_driver.
+* The channel device layer is gone.
+* The interrupt handlers must be adapted to use a ccw_device as argument.
+ Moreover, they don't return a devstat, but an irb.
+* Before initiating an io, the options must be set via ccw_device_set_options().
+* Instead of calling read_dev_chars()/read_conf_data(), the driver issues
+ the channel program and handles the interrupt itself.
+
+ccw_device_get_ciw()
+ get commands from extended sense data.
+
+ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
+ initiate an I/O request.
+
+ccw_device_resume()
+ resume channel program execution.
+
+ccw_device_halt()
+ terminate the current I/O request processed on the device.
+
+do_IRQ()
+ generic interrupt routine. This function is called by the interrupt entry
+ routine whenever an I/O interrupt is presented to the system. The do_IRQ()
+ routine determines the interrupt status and calls the device specific
+ interrupt handler according to the rules (flags) defined during I/O request
+ initiation with do_IO().
+
+The next chapters describe the functions other than do_IRQ() in more details.
+The do_IRQ() interface is not described, as it is called from the Linux/390
+first level interrupt handler only and does not comprise a device driver
+callable interface. Instead, the functional description of do_IO() also
+describes the input to the device specific interrupt handler.
+
+Note:
+ All explanations apply also to the 64 bit architecture s390x.
+
+
+Common Device Support (CDS) for Linux/390 Device Drivers
+========================================================
+
+General Information
+-------------------
+
+The following chapters describe the I/O related interface routines the
+Linux/390 common device support (CDS) provides to allow for device specific
+driver implementations on the IBM ESA/390 hardware platform. Those interfaces
+intend to provide the functionality required by every device driver
+implementation to allow to drive a specific hardware device on the ESA/390
+platform. Some of the interface routines are specific to Linux/390 and some
+of them can be found on other Linux platforms implementations too.
+Miscellaneous function prototypes, data declarations, and macro definitions
+can be found in the architecture specific C header file
+linux/arch/s390/include/asm/irq.h.
+
+Overview of CDS interface concepts
+----------------------------------
+
+Different to other hardware platforms, the ESA/390 architecture doesn't define
+interrupt lines managed by a specific interrupt controller and bus systems
+that may or may not allow for shared interrupts, DMA processing, etc.. Instead,
+the ESA/390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the systems.
+Though the ESA/390 hardware platform knows about a huge variety of different
+peripheral attachments like disk devices (aka. DASDs), tapes, communication
+controllers, etc. they can all be accessed by a well defined access method and
+they are presenting I/O completion a unified way : I/O interruptions. Every
+single device is uniquely identified to the system by a so called subchannel,
+where the ESA/390 architecture allows for 64k devices be attached.
+
+Linux, however, was first built on the Intel PC architecture, with its two
+cascaded 8259 programmable interrupt controllers (PICs), that allow for a
+maximum of 15 different interrupt lines. All devices attached to such a system
+share those 15 interrupt levels. Devices attached to the ISA bus system must
+not share interrupt levels (aka. IRQs), as the ISA bus bases on edge triggered
+interrupts. MCA, EISA, PCI and other bus systems base on level triggered
+interrupts, and therewith allow for shared IRQs. However, if multiple devices
+present their hardware status by the same (shared) IRQ, the operating system
+has to call every single device driver registered on this IRQ in order to
+determine the device driver owning the device that raised the interrupt.
+
+Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
+For internal use of the common I/O layer, these are still there. However,
+device drivers should use the new calling interface via the ccw_device only.
+
+During its startup the Linux/390 system checks for peripheral devices. Each
+of those devices is uniquely defined by a so called subchannel by the ESA/390
+channel subsystem. While the subchannel numbers are system generated, each
+subchannel also takes a user defined attribute, the so called device number.
+Both subchannel number and device number cannot exceed 65535. During sysfs
+initialisation, the information about control unit type and device types that
+imply specific I/O commands (channel command words - CCWs) in order to operate
+the device are gathered. Device drivers can retrieve this set of hardware
+information during their initialization step to recognize the devices they
+support using the information saved in the struct ccw_device given to them.
+This methods implies that Linux/390 doesn't require to probe for free (not
+armed) interrupt request lines (IRQs) to drive its devices with. Where
+applicable, the device drivers can use issue the READ DEVICE CHARACTERISTICS
+ccw to retrieve device characteristics in its online routine.
+
+In order to allow for easy I/O initiation the CDS layer provides a
+ccw_device_start() interface that takes a device specific channel program (one
+or more CCWs) as input sets up the required architecture specific control blocks
+and initiates an I/O request on behalf of the device driver. The
+ccw_device_start() routine allows to specify whether it expects the CDS layer
+to notify the device driver for every interrupt it observes, or with final status
+only. See ccw_device_start() for more details. A device driver must never issue
+ESA/390 I/O commands itself, but must use the Linux/390 CDS interfaces instead.
+
+For long running I/O request to be canceled, the CDS layer provides the
+ccw_device_halt() function. Some devices require to initially issue a HALT
+SUBCHANNEL (HSCH) command without having pending I/O requests. This function is
+also covered by ccw_device_halt().
+
+
+get_ciw() - get command information word
+
+This call enables a device driver to get information about supported commands
+from the extended SenseID data.
+
+::
+
+ struct ciw *
+ ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+
+==== ========================================================
+cdev The ccw_device for which the command is to be retrieved.
+cmd The command type to be retrieved.
+==== ========================================================
+
+ccw_device_get_ciw() returns:
+
+===== ================================================================
+ NULL No extended data available, invalid device or command not found.
+!NULL The command requested.
+===== ================================================================
+
+::
+
+ ccw_device_start() - Initiate I/O Request
+
+The ccw_device_start() routines is the I/O request front-end processor. All
+device driver I/O requests must be issued using this routine. A device driver
+must not issue ESA/390 I/O commands itself. Instead the ccw_device_start()
+routine provides all interfaces required to drive arbitrary devices.
+
+This description also covers the status information passed to the device
+driver's interrupt handler as this is related to the rules (flags) defined
+with the associated I/O request when calling ccw_device_start().
+
+::
+
+ int ccw_device_start(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags);
+ int ccw_device_start_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags,
+ int expires);
+ int ccw_device_start_key(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags);
+ int ccw_device_start_key_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags,
+ int expires);
+
+============= =============================================================
+cdev ccw_device the I/O is destined for
+cpa logical start address of channel program
+user_intparm user specific interrupt information; will be presented
+ back to the device driver's interrupt handler. Allows a
+ device driver to associate the interrupt with a
+ particular I/O request.
+lpm defines the channel path to be used for a specific I/O
+ request. A value of 0 will make cio use the opm.
+key the storage key to use for the I/O (useful for operating on a
+ storage with a storage key != default key)
+flag defines the action to be performed for I/O processing
+expires timeout value in jiffies. The common I/O layer will terminate
+ the running program after this and call the interrupt handler
+ with ERR_PTR(-ETIMEDOUT) as irb.
+============= =============================================================
+
+Possible flag values are:
+
+========================= =============================================
+DOIO_ALLOW_SUSPEND channel program may become suspended
+DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually
+ this implies the channel program might
+ become modified
+DOIO_SUPPRESS_INTER don't call the handler on intermediate status
+========================= =============================================
+
+The cpa parameter points to the first format 1 CCW of a channel program::
+
+ struct ccw1 {
+ __u8 cmd_code;/* command code */
+ __u8 flags; /* flags, like IDA addressing, etc. */
+ __u16 count; /* byte count */
+ __u32 cda; /* data address */
+ } __attribute__ ((packed,aligned(8)));
+
+with the following CCW flags values defined:
+
+=================== =========================
+CCW_FLAG_DC data chaining
+CCW_FLAG_CC command chaining
+CCW_FLAG_SLI suppress incorrect length
+CCW_FLAG_SKIP skip
+CCW_FLAG_PCI PCI
+CCW_FLAG_IDA indirect addressing
+CCW_FLAG_SUSPEND suspend
+=================== =========================
+
+
+Via ccw_device_set_options(), the device driver may specify the following
+options for the device:
+
+========================= ======================================
+DOIO_EARLY_NOTIFICATION allow for early interrupt notification
+DOIO_REPORT_ALL report all interrupt conditions
+========================= ======================================
+
+
+The ccw_device_start() function returns:
+
+======== ======================================================================
+ 0 successful completion or request successfully initiated
+ -EBUSY The device is currently processing a previous I/O request, or there is
+ a status pending at the device.
+-ENODEV cdev is invalid, the device is not operational or the ccw_device is
+ not online.
+======== ======================================================================
+
+When the I/O request completes, the CDS first level interrupt handler will
+accumulate the status in a struct irb and then call the device interrupt handler.
+The intparm field will contain the value the device driver has associated with a
+particular I/O request. If a pending device status was recognized,
+intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
+by an alert status notification. In any case this status is not related to the
+current (last) I/O request. In case of a delayed status notification no special
+interrupt will be presented to indicate I/O completion as the I/O request was
+never started, even though ccw_device_start() returned with successful completion.
+
+The irb may contain an error value, and the device driver should check for this
+first:
+
+========== =================================================================
+-ETIMEDOUT the common I/O layer terminated the request after the specified
+ timeout value
+-EIO the common I/O layer terminated the request due to an error state
+========== =================================================================
+
+If the concurrent sense flag in the extended status word (esw) in the irb is
+set, the field erw.scnt in the esw describes the number of device specific
+sense bytes available in the extended control word irb->scsw.ecw[]. No device
+sensing by the device driver itself is required.
+
+The device interrupt handler can use the following definitions to investigate
+the primary unit check source coded in sense byte 0 :
+
+======================= ====
+SNS0_CMD_REJECT 0x80
+SNS0_INTERVENTION_REQ 0x40
+SNS0_BUS_OUT_CHECK 0x20
+SNS0_EQUIPMENT_CHECK 0x10
+SNS0_DATA_CHECK 0x08
+SNS0_OVERRUN 0x04
+SNS0_INCOMPL_DOMAIN 0x01
+======================= ====
+
+Depending on the device status, multiple of those values may be set together.
+Please refer to the device specific documentation for details.
+
+The irb->scsw.cstat field provides the (accumulated) subchannel status :
+
+========================= ============================
+SCHN_STAT_PCI program controlled interrupt
+SCHN_STAT_INCORR_LEN incorrect length
+SCHN_STAT_PROG_CHECK program check
+SCHN_STAT_PROT_CHECK protection check
+SCHN_STAT_CHN_DATA_CHK channel data check
+SCHN_STAT_CHN_CTRL_CHK channel control check
+SCHN_STAT_INTF_CTRL_CHK interface control check
+SCHN_STAT_CHAIN_CHECK chaining check
+========================= ============================
+
+The irb->scsw.dstat field provides the (accumulated) device status :
+
+===================== =================
+DEV_STAT_ATTENTION attention
+DEV_STAT_STAT_MOD status modifier
+DEV_STAT_CU_END control unit end
+DEV_STAT_BUSY busy
+DEV_STAT_CHN_END channel end
+DEV_STAT_DEV_END device end
+DEV_STAT_UNIT_CHECK unit check
+DEV_STAT_UNIT_EXCEP unit exception
+===================== =================
+
+Please see the ESA/390 Principles of Operation manual for details on the
+individual flag meanings.
+
+Usage Notes:
+
+ccw_device_start() must be called disabled and with the ccw device lock held.
+
+The device driver is allowed to issue the next ccw_device_start() call from
+within its interrupt handler already. It is not required to schedule a
+bottom-half, unless a non deterministically long running error recovery procedure
+or similar needs to be scheduled. During I/O processing the Linux/390 generic
+I/O device driver support has already obtained the IRQ lock, i.e. the handler
+must not try to obtain it again when calling ccw_device_start() or we end in a
+deadlock situation!
+
+If a device driver relies on an I/O request to be completed prior to start the
+next it can reduce I/O processing overhead by chaining a NoOp I/O command
+CCW_CMD_NOOP to the end of the submitted CCW chain. This will force Channel-End
+and Device-End status to be presented together, with a single interrupt.
+However, this should be used with care as it implies the channel will remain
+busy, not being able to process I/O requests for other devices on the same
+channel. Therefore e.g. read commands should never use this technique, as the
+result will be presented by a single interrupt anyway.
+
+In order to minimize I/O overhead, a device driver should use the
+DOIO_REPORT_ALL only if the device can report intermediate interrupt
+information prior to device-end the device driver urgently relies on. In this
+case all I/O interruptions are presented to the device driver until final
+status is recognized.
+
+If a device is able to recover from asynchronously presented I/O errors, it can
+perform overlapping I/O using the DOIO_EARLY_NOTIFICATION flag. While some
+devices always report channel-end and device-end together, with a single
+interrupt, others present primary status (channel-end) when the channel is
+ready for the next I/O request and secondary status (device-end) when the data
+transmission has been completed at the device.
+
+Above flag allows to exploit this feature, e.g. for communication devices that
+can handle lost data on the network to allow for enhanced I/O processing.
+
+Unless the channel subsystem at any time presents a secondary status interrupt,
+exploiting this feature will cause only primary status interrupts to be
+presented to the device driver while overlapping I/O is performed. When a
+secondary status without error (alert status) is presented, this indicates
+successful completion for all overlapping ccw_device_start() requests that have
+been issued since the last secondary (final) status.
+
+Channel programs that intend to set the suspend flag on a channel command word
+(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
+suspend flag will cause a channel program check. At the time the channel program
+becomes suspended an intermediate interrupt will be generated by the channel
+subsystem.
+
+ccw_device_resume() - Resume Channel Program Execution
+
+If a device driver chooses to suspend the current channel program execution by
+setting the CCW suspend flag on a particular CCW, the channel program execution
+is suspended. In order to resume channel program execution the CIO layer
+provides the ccw_device_resume() routine.
+
+::
+
+ int ccw_device_resume(struct ccw_device *cdev);
+
+==== ================================================
+cdev ccw_device the resume operation is requested for
+==== ================================================
+
+The ccw_device_resume() function returns:
+
+========= ==============================================
+ 0 suspended channel program is resumed
+ -EBUSY status pending
+ -ENODEV cdev invalid or not-operational subchannel
+ -EINVAL resume function not applicable
+-ENOTCONN there is no I/O request pending for completion
+========= ==============================================
+
+Usage Notes:
+
+Please have a look at the ccw_device_start() usage notes for more details on
+suspended channel programs.
+
+ccw_device_halt() - Halt I/O Request Processing
+
+Sometimes a device driver might need a possibility to stop the processing of
+a long-running channel program or the device might require to initially issue
+a halt subchannel (HSCH) I/O command. For those purposes the ccw_device_halt()
+command is provided.
+
+ccw_device_halt() must be called disabled and with the ccw device lock held.
+
+::
+
+ int ccw_device_halt(struct ccw_device *cdev,
+ unsigned long intparm);
+
+======= =====================================================
+cdev ccw_device the halt operation is requested for
+intparm interruption parameter; value is only used if no I/O
+ is outstanding, otherwise the intparm associated with
+ the I/O request is returned
+======= =====================================================
+
+The ccw_device_halt() function returns:
+
+======= ==============================================================
+ 0 request successfully initiated
+-EBUSY the device is currently busy, or status pending.
+-ENODEV cdev invalid.
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
+
+Usage Notes:
+
+A device driver may write a never-ending channel program by writing a channel
+program that at its end loops back to its beginning by means of a transfer in
+channel (TIC) command (CCW_CMD_TIC). Usually this is performed by network
+device drivers by setting the PCI CCW flag (CCW_FLAG_PCI). Once this CCW is
+executed a program controlled interrupt (PCI) is generated. The device driver
+can then perform an appropriate action. Prior to interrupt of an outstanding
+read to a network device (with or without PCI flag) a ccw_device_halt()
+is required to end the pending operation.
+
+::
+
+ ccw_device_clear() - Terminage I/O Request Processing
+
+In order to terminate all I/O processing at the subchannel, the clear subchannel
+(CSCH) command is used. It can be issued via ccw_device_clear().
+
+ccw_device_clear() must be called disabled and with the ccw device lock held.
+
+::
+
+ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
+
+======= ===============================================
+cdev ccw_device the clear operation is requested for
+intparm interruption parameter (see ccw_device_halt())
+======= ===============================================
+
+The ccw_device_clear() function returns:
+
+======= ==============================================================
+ 0 request successfully initiated
+-ENODEV cdev invalid
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
+
+Miscellaneous Support Routines
+------------------------------
+
+This chapter describes various routines to be used in a Linux/390 device
+driver programming environment.
+
+get_ccwdev_lock()
+
+Get the address of the device specific lock. This is then used in
+spin_lock() / spin_unlock() calls.
+
+::
+
+ __u8 ccw_device_get_path_mask(struct ccw_device *cdev);
+
+Get the mask of the path currently available for cdev.
diff --git a/Documentation/arch/s390/common_io.rst b/Documentation/arch/s390/common_io.rst
new file mode 100644
index 000000000000..6dcb40cb7145
--- /dev/null
+++ b/Documentation/arch/s390/common_io.rst
@@ -0,0 +1,140 @@
+======================
+S/390 common I/O-Layer
+======================
+
+command line parameters, procfs and debugfs entries
+===================================================
+
+Command line parameters
+-----------------------
+
+* ccw_timeout_log
+
+ Enable logging of debug information in case of ccw device timeouts.
+
+* cio_ignore = device[,device[,..]]
+
+ device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
+
+ The given devices will be ignored by the common I/O-layer; no detection
+ and device sensing will be done on any of those devices. The subchannel to
+ which the device in question is attached will be treated as if no device was
+ attached.
+
+ An ignored device can be un-ignored later; see the "/proc entries"-section for
+ details.
+
+ The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
+ device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
+ give a device number 0xabcd, it will be interpreted as 0.0.abcd.
+
+ You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev'
+ keywords can be used to refer to the CCW based boot device and CCW console
+ device respectively (these are probably useful only when combined with the '!'
+ operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
+ The command line
+ is parsed from left to right.
+
+ For example::
+
+ cio_ignore=0.0.0023-0.0.0042,0.0.4711
+
+ will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
+ 0.0.4711, if detected.
+
+ As another example::
+
+ cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
+
+ will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
+
+ By default, no devices are ignored.
+
+
+/proc entries
+-------------
+
+* /proc/cio_ignore
+
+ Lists the ranges of devices (by bus id) which are ignored by common I/O.
+
+ You can un-ignore certain or all devices by piping to /proc/cio_ignore.
+ "free all" will un-ignore all ignored devices,
+ "free <device range>, <device range>, ..." will un-ignore the specified
+ devices.
+
+ For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
+
+ - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
+ will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
+ to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
+ - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
+ 0.0.0041;
+ - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
+ devices.
+
+ When a device is un-ignored, device recognition and sensing is performed and
+ the device driver will be notified if possible, so the device will become
+ available to the system. Note that un-ignoring is performed asynchronously.
+
+ You can also add ranges of devices to be ignored by piping to
+ /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
+ specified devices.
+
+ Note: While already known devices can be added to the list of devices to be
+ ignored, there will be no effect on then. However, if such a device
+ disappears and then reappears, it will then be ignored. To make
+ known devices go away, you need the "purge" command (see below).
+
+ For example::
+
+ "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
+
+ will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
+ devices.
+
+ You can remove already known but now ignored devices via::
+
+ "echo purge > /proc/cio_ignore"
+
+ All devices ignored but still registered and not online (= not in use)
+ will be deregistered and thus removed from the system.
+
+ The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
+ compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
+ numbers given as 0xabcd will be interpreted as 0.0.abcd.
+
+* /proc/cio_settle
+
+ A write request to this file is blocked until all queued cio actions are
+ handled. This will allow userspace to wait for pending work affecting
+ device availability after changing cio_ignore or the hardware configuration.
+
+* For some of the information present in the /proc filesystem in 2.4 (namely,
+ /proc/subchannels and /proc/chpids), see driver-model.txt.
+ Information formerly in /proc/irq_count is now in /proc/interrupts.
+
+
+debugfs entries
+---------------
+
+* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
+
+ Some views generated by the debug feature to hold various debug outputs.
+
+ - /sys/kernel/debug/s390dbf/cio_crw/sprintf
+ Messages from the processing of pending channel report words (machine check
+ handling).
+
+ - /sys/kernel/debug/s390dbf/cio_msg/sprintf
+ Various debug messages from the common I/O-layer.
+
+ - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
+ Logs the calling of functions in the common I/O-layer and, if applicable,
+ which subchannel they were called for, as well as dumps of some data
+ structures (like irb in an error case).
+
+ The level of logging can be changed to be more or less verbose by piping to
+ /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
+ documentation on the S/390 debug feature (Documentation/arch/s390/s390dbf.rst)
+ for details.
diff --git a/Documentation/s390/config3270.sh b/Documentation/arch/s390/config3270.sh
index 515e2f431487..515e2f431487 100644
--- a/Documentation/s390/config3270.sh
+++ b/Documentation/arch/s390/config3270.sh
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst
new file mode 100644
index 000000000000..e7488f02bb78
--- /dev/null
+++ b/Documentation/arch/s390/driver-model.rst
@@ -0,0 +1,307 @@
+=============================
+S/390 driver model interfaces
+=============================
+
+1. CCW devices
+--------------
+
+All devices which can be addressed by means of ccws are called 'CCW devices' -
+even if they aren't actually driven by ccws.
+
+All ccw devices are accessed via a subchannel, this is reflected in the
+structures under devices/::
+
+ devices/
+ - system/
+ - css0/
+ - 0.0.0000/0.0.0815/
+ - 0.0.0001/0.0.4711/
+ - 0.0.0002/
+ - 0.1.0000/0.1.1234/
+ ...
+ - defunct/
+
+In this example, device 0815 is accessed via subchannel 0 in subchannel set 0,
+device 4711 via subchannel 1 in subchannel set 0, and subchannel 2 is a non-I/O
+subchannel. Device 1234 is accessed via subchannel 0 in subchannel set 1.
+
+The subchannel named 'defunct' does not represent any real subchannel on the
+system; it is a pseudo subchannel where disconnected ccw devices are moved to
+if they are displaced by another ccw device becoming operational on their
+former subchannel. The ccw devices will be moved again to a proper subchannel
+if they become operational again on that subchannel.
+
+You should address a ccw device via its bus id (e.g. 0.0.4711); the device can
+be found under bus/ccw/devices/.
+
+All ccw devices export some data via sysfs.
+
+cutype:
+ The control unit type / model.
+
+devtype:
+ The device type / model, if applicable.
+
+availability:
+ Can be 'good' or 'boxed'; 'no path' or 'no device' for
+ disconnected devices.
+
+online:
+ An interface to set the device online and offline.
+ In the special case of the device being disconnected (see the
+ notify function under 1.2), piping 0 to online will forcibly delete
+ the device.
+
+The device drivers can add entries to export per-device data and interfaces.
+
+There is also some data exported on a per-subchannel basis (see under
+bus/css/devices/):
+
+chpids:
+ Via which chpids the device is connected.
+
+pimpampom:
+ The path installed, path available and path operational masks.
+
+There also might be additional data, for example for block devices.
+
+
+1.1 Bringing up a ccw device
+----------------------------
+
+This is done in several steps.
+
+a. Each driver can provide one or more parameter interfaces where parameters can
+ be specified. These interfaces are also in the driver's responsibility.
+b. After a. has been performed, if necessary, the device is finally brought up
+ via the 'online' interface.
+
+
+1.2 Writing a driver for ccw devices
+------------------------------------
+
+The basic struct ccw_device and struct ccw_driver data structures can be found
+under include/asm/ccwdev.h::
+
+ struct ccw_device {
+ spinlock_t *ccwlock;
+ struct ccw_device_private *private;
+ struct ccw_device_id id;
+
+ struct ccw_driver *drv;
+ struct device dev;
+ int online;
+
+ void (*handler) (struct ccw_device *dev, unsigned long intparm,
+ struct irb *irb);
+ };
+
+ struct ccw_driver {
+ struct module *owner;
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
+ int (*remove) (struct ccw_device *);
+ int (*set_online) (struct ccw_device *);
+ int (*set_offline) (struct ccw_device *);
+ int (*notify) (struct ccw_device *, int);
+ struct device_driver driver;
+ char *name;
+ };
+
+The 'private' field contains data needed for internal i/o operation only, and
+is not available to the device driver.
+
+Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
+and/or device types/models it is interested. This information can later be found
+in the struct ccw_device_id fields::
+
+ struct ccw_device_id {
+ __u16 match_flags;
+
+ __u16 cu_type;
+ __u16 dev_type;
+ __u8 cu_model;
+ __u8 dev_model;
+
+ unsigned long driver_info;
+ };
+
+The functions in ccw_driver should be used in the following way:
+
+probe:
+ This function is called by the device layer for each device the driver
+ is interested in. The driver should only allocate private structures
+ to put in dev->driver_data and create attributes (if needed). Also,
+ the interrupt handler (see below) should be set here.
+
+::
+
+ int (*probe) (struct ccw_device *cdev);
+
+Parameters:
+ cdev
+ - the device to be probed.
+
+
+remove:
+ This function is called by the device layer upon removal of the driver,
+ the device or the module. The driver should perform cleanups here.
+
+::
+
+ int (*remove) (struct ccw_device *cdev);
+
+Parameters:
+ cdev
+ - the device to be removed.
+
+
+set_online:
+ This function is called by the common I/O layer when the device is
+ activated via the 'online' attribute. The driver should finally
+ setup and activate the device here.
+
+::
+
+ int (*set_online) (struct ccw_device *);
+
+Parameters:
+ cdev
+ - the device to be activated. The common layer has
+ verified that the device is not already online.
+
+
+set_offline: This function is called by the common I/O layer when the device is
+ de-activated via the 'online' attribute. The driver should shut
+ down the device, but not de-allocate its private data.
+
+::
+
+ int (*set_offline) (struct ccw_device *);
+
+Parameters:
+ cdev
+ - the device to be deactivated. The common layer has
+ verified that the device is online.
+
+
+notify:
+ This function is called by the common I/O layer for some state changes
+ of the device.
+
+ Signalled to the driver are:
+
+ * In online state, device detached (CIO_GONE) or last path gone
+ (CIO_NO_PATH). The driver must return !0 to keep the device; for
+ return code 0, the device will be deleted as usual (also when no
+ notify function is registered). If the driver wants to keep the
+ device, it is moved into disconnected state.
+ * In disconnected state, device operational again (CIO_OPER). The
+ common I/O layer performs some sanity checks on device number and
+ Device / CU to be reasonably sure if it is still the same device.
+ If not, the old device is removed and a new one registered. By the
+ return code of the notify function the device driver signals if it
+ wants the device back: !0 for keeping, 0 to make the device being
+ removed and re-registered.
+
+::
+
+ int (*notify) (struct ccw_device *, int);
+
+Parameters:
+ cdev
+ - the device whose state changed.
+
+ event
+ - the event that happened. This can be one of CIO_GONE,
+ CIO_NO_PATH or CIO_OPER.
+
+The handler field of the struct ccw_device is meant to be set to the interrupt
+handler for the device. In order to accommodate drivers which use several
+distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
+instead of ccw_driver.
+The handler is registered with the common layer during set_online() processing
+before the driver is called, and is deregistered during set_offline() after the
+driver has been called. Also, after registering / before deregistering, path
+grouping resp. disbanding of the path group (if applicable) are performed.
+
+::
+
+ void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+
+Parameters: dev - the device the handler is called for
+ intparm - the intparm which allows the device driver to identify
+ the i/o the interrupt is associated with, or to recognize
+ the interrupt as unsolicited.
+ irb - interruption response block which contains the accumulated
+ status.
+
+The device driver is called from the common ccw_device layer and can retrieve
+information about the interrupt from the irb parameter.
+
+
+1.3 ccwgroup devices
+--------------------
+
+The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
+devices, like qeth or ctc.
+
+The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
+this attributes creates a ccwgroup device consisting of these ccw devices (if
+possible). This ccwgroup device can be set online or offline just like a normal
+ccw device.
+
+Each ccwgroup device also provides an 'ungroup' attribute to destroy the device
+again (only when offline). This is a generic ccwgroup mechanism (the driver does
+not need to implement anything beyond normal removal routines).
+
+A ccw device which is a member of a ccwgroup device carries a pointer to the
+ccwgroup device in the driver_data of its device struct. This field must not be
+touched by the driver - it should use the ccwgroup device's driver_data for its
+private data.
+
+To implement a ccwgroup driver, please refer to include/asm/ccwgroup.h. Keep in
+mind that most drivers will need to implement both a ccwgroup and a ccw
+driver.
+
+
+2. Channel paths
+-----------------
+
+Channel paths show up, like subchannels, under the channel subsystem root (css0)
+and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
+Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
+only the logical state and not the physical state, since we cannot track the
+latter consistently due to lacking machine support (we don't need to be aware
+of it anyway).
+
+status
+ - Can be 'online' or 'offline'.
+ Piping 'on' or 'off' sets the chpid logically online/offline.
+ Piping 'on' to an online chpid triggers path reprobing for all devices
+ the chpid connects to. This can be used to force the kernel to re-use
+ a channel path the user knows to be online, but the machine hasn't
+ created a machine check for.
+
+type
+ - The physical type of the channel path.
+
+shared
+ - Whether the channel path is shared.
+
+cmg
+ - The channel measurement group.
+
+3. System devices
+-----------------
+
+3.1 xpram
+---------
+
+xpram shows up under devices/system/ as 'xpram'.
+
+3.2 cpus
+--------
+
+For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
+attribute 'online' which can be 0 or 1.
diff --git a/Documentation/arch/s390/features.rst b/Documentation/arch/s390/features.rst
new file mode 100644
index 000000000000..2883dc950681
--- /dev/null
+++ b/Documentation/arch/s390/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features s390
diff --git a/Documentation/arch/s390/index.rst b/Documentation/arch/s390/index.rst
new file mode 100644
index 000000000000..e75a6e5d2505
--- /dev/null
+++ b/Documentation/arch/s390/index.rst
@@ -0,0 +1,31 @@
+=================
+s390 Architecture
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ cds
+ 3270
+ driver-model
+ mm
+ monreader
+ qeth
+ s390dbf
+ vfio-ap
+ vfio-ap-locking
+ vfio-ccw
+ zfcpdump
+ common_io
+ pci
+
+ text_files
+
+ features
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/arch/s390/mm.rst b/Documentation/arch/s390/mm.rst
new file mode 100644
index 000000000000..084adad5eef9
--- /dev/null
+++ b/Documentation/arch/s390/mm.rst
@@ -0,0 +1,111 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Memory Management
+=================
+
+Virtual memory layout
+=====================
+
+.. note::
+
+ - Some aspects of the virtual memory layout setup are not
+ clarified (number of page levels, alignment, DMA memory).
+
+ - Unused gaps in the virtual memory layout could be present
+ or not - depending on how partucular system is configured.
+ No page tables are created for the unused gaps.
+
+ - The virtual memory regions are tracked or untracked by KASAN
+ instrumentation, as well as the KASAN shadow memory itself is
+ created only when CONFIG_KASAN configuration option is enabled.
+
+::
+
+ =============================================================================
+ | Physical | Virtual | VM area description
+ =============================================================================
+ +- 0 --------------+- 0 --------------+
+ | | S390_lowcore | Low-address memory
+ | +- 8 KB -----------+
+ | | |
+ | | |
+ | | ... unused gap | KASAN untracked
+ | | |
+ +- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
+ |.amode31 text/data|.amode31 text/data| KASAN untracked
+ +- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
+ | | |
+ | | |
+ +- __kaslr_offset_phys | kernel rand. phys start
+ | | |
+ | kernel text/data | |
+ | | |
+ +------------------+ | kernel phys end
+ | | |
+ | | |
+ | | |
+ | | |
+ +- ident_map_size -+ |
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +- __identity_base + identity mapping start (>= 2GB)
+ | |
+ | identity | phys == virt - __identity_base
+ | mapping | virt == phys + __identity_base
+ | |
+ | | KASAN tracked
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ +---- vmemmap -----+ 'struct page' array start
+ | |
+ | virtually mapped |
+ | memory map | KASAN untracked
+ | |
+ +- __abs_lowcore --+
+ | |
+ | Absolute Lowcore | KASAN untracked
+ | |
+ +- __memcpy_real_area
+ | |
+ | Real Memory Copy| KASAN untracked
+ | |
+ +- VMALLOC_START --+ vmalloc area start
+ | | KASAN untracked or
+ | vmalloc area | KASAN shallowly populated in case
+ | | CONFIG_KASAN_VMALLOC=y
+ +- MODULES_VADDR --+ modules area start
+ | | KASAN allocated per module or
+ | modules area | KASAN shallowly populated in case
+ | | CONFIG_KASAN_VMALLOC=y
+ +- __kaslr_offset -+ kernel rand. virt start
+ | | KASAN tracked
+ | kernel text/data | phys == (kvirt - __kaslr_offset) +
+ | | __kaslr_offset_phys
+ +- kernel .bss end + kernel rand. virt end
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +------------------+ UltraVisor Secure Storage limit
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +KASAN_SHADOW_START+ KASAN shadow memory start
+ | |
+ | KASAN shadow | KASAN untracked
+ | |
+ +------------------+ ASCE limit
diff --git a/Documentation/arch/s390/monreader.rst b/Documentation/arch/s390/monreader.rst
new file mode 100644
index 000000000000..21cdfb699b49
--- /dev/null
+++ b/Documentation/arch/s390/monreader.rst
@@ -0,0 +1,212 @@
+=================================================
+Linux API for read access to z/VM Monitor Records
+=================================================
+
+Date : 2004-Nov-26
+
+Author: Gerald Schaefer (geraldsc@de.ibm.com)
+
+
+
+
+Description
+===========
+This item delivers a new Linux API in the form of a misc char device that is
+usable from user space and allows read access to the z/VM Monitor Records
+collected by the `*MONITOR` System Service of z/VM.
+
+
+User Requirements
+=================
+The z/VM guest on which you want to access this API needs to be configured in
+order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
+IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
+restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
+This item will use the IUCV device driver to access the z/VM services, so you
+need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
+
+There are two options for being able to load the monitor DCSS (examples assume
+that the monitor DCSS begins at 144 MB and ends at 152 MB). You can query the
+location of the monitor DCSS with the Class E privileged CP command Q NSS MAP
+(the values BEGPAG and ENDPAG are given in units of 4K pages).
+
+See also "CP Command and Utility Reference" (SC24-6081-00) for more information
+on the DEF STOR and Q NSS MAP commands, as well as "Saved Segments Planning
+and Administration" (SC24-6116-00) for more information on DCSSes.
+
+1st option:
+-----------
+You can use the CP command DEF STOR CONFIG to define a "memory hole" in your
+guest virtual storage around the address range of the DCSS.
+
+Example: DEF STOR CONFIG 0.140M 200M.200M
+
+This defines two blocks of storage, the first is 140MB in size an begins at
+address 0MB, the second is 200MB in size and begins at address 200MB,
+resulting in a total storage of 340MB. Note that the first block should
+always start at 0 and be at least 64MB in size.
+
+2nd option:
+-----------
+Your guest virtual storage has to end below the starting address of the DCSS
+and you have to specify the "mem=" kernel parameter in your parmfile with a
+value greater than the ending address of the DCSS.
+
+Example::
+
+ DEF STOR 140M
+
+This defines 140MB storage size for your guest, the parameter "mem=160M" is
+added to the parmfile.
+
+
+User Interface
+==============
+The char device is implemented as a kernel module named "monreader",
+which can be loaded via the modprobe command, or it can be compiled into the
+kernel instead. There is one optional module (or kernel) parameter, "mondcss",
+to specify the name of the monitor DCSS. If the module is compiled into the
+kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
+in the parmfile.
+
+The default name for the DCSS is "MONDCSS" if none is specified. In case that
+there are other users already connected to the `*MONITOR` service (e.g.
+Performance Toolkit), the monitor DCSS is already defined and you have to use
+the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
+of the monitor DCSS, if already defined, and the users connected to the
+`*MONITOR` service.
+Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
+DCSS if your z/VM doesn't have one already, you need Class E privileges to
+define and save a DCSS.
+
+Example:
+--------
+
+::
+
+ modprobe monreader mondcss=MYDCSS
+
+This loads the module and sets the DCSS name to "MYDCSS".
+
+NOTE:
+-----
+This API provides no interface to control the `*MONITOR` service, e.g. specify
+which data should be collected. This can be done by the CP command MONITOR
+(Class E privileged), see "CP Command and Utility Reference".
+
+Device nodes with udev:
+-----------------------
+After loading the module, a char device will be created along with the device
+node /<udev directory>/monreader.
+
+Device nodes without udev:
+--------------------------
+If your distribution does not support udev, a device node will not be created
+automatically and you have to create it manually after loading the module.
+Therefore you need to know the major and minor numbers of the device. These
+numbers can be found in /sys/class/misc/monreader/dev.
+
+Typing cat /sys/class/misc/monreader/dev will give an output of the form
+<major>:<minor>. The device node can be created via the mknod command, enter
+mknod <name> c <major> <minor>, where <name> is the name of the device node
+to be created.
+
+Example:
+--------
+
+::
+
+ # modprobe monreader
+ # cat /sys/class/misc/monreader/dev
+ 10:63
+ # mknod /dev/monreader c 10 63
+
+This loads the module with the default monitor DCSS (MONDCSS) and creates a
+device node.
+
+File operations:
+----------------
+The following file operations are supported: open, release, read, poll.
+There are two alternative methods for reading: either non-blocking read in
+conjunction with polling, or blocking read without polling. IOCTLs are not
+supported.
+
+Read:
+-----
+Reading from the device provides a 12 Byte monitor control element (MCE),
+followed by a set of one or more contiguous monitor records (similar to the
+output of the CMS utility MONWRITE without the 4K control blocks). The MCE
+contains information on the type of the following record set (sample/event
+data), the monitor domains contained within it and the start and end address
+of the record set in the monitor DCSS. The start and end address can be used
+to determine the size of the record set, the end address is the address of the
+last byte of data. The start address is needed to handle "end-of-frame" records
+correctly (domain 1, record 13), i.e. it can be used to determine the record
+start offset relative to a 4K page (frame) boundary.
+
+See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
+of the monitor control element layout. The layout of the monitor records can
+be found here (z/VM 5.1): https://www.vm.ibm.com/pubs/mon510/index.html
+
+The layout of the data stream provided by the monreader device is as follows::
+
+ ...
+ <0 byte read>
+ <first MCE> \
+ <first set of records> |
+ ... |- data set
+ <last MCE> |
+ <last set of records> /
+ <0 byte read>
+ ...
+
+There may be more than one combination of MCE and corresponding record set
+within one data set and the end of each data set is indicated by a successful
+read with a return value of 0 (0 byte read).
+Any received data must be considered invalid until a complete set was
+read successfully, including the closing 0 byte read. Therefore you should
+always read the complete set into a buffer before processing the data.
+
+The maximum size of a data set can be as large as the size of the
+monitor DCSS, so design the buffer adequately or use dynamic memory allocation.
+The size of the monitor DCSS will be printed into syslog after loading the
+module. You can also use the (Class E privileged) CP command Q NSS MAP to
+list all available segments and information about them.
+
+As with most char devices, error conditions are indicated by returning a
+negative value for the number of bytes read. In this case, the errno variable
+indicates the error condition:
+
+EIO:
+ reply failed, read data is invalid and the application
+ should discard the data read since the last successful read with 0 size.
+EFAULT:
+ copy_to_user failed, read data is invalid and the application should
+ discard the data read since the last successful read with 0 size.
+EAGAIN:
+ occurs on a non-blocking read if there is no data available at the
+ moment. There is no data missing or corrupted, just try again or rather
+ use polling for non-blocking reads.
+EOVERFLOW:
+ message limit reached, the data read since the last successful
+ read with 0 size is valid but subsequent records may be missing.
+
+In the last case (EOVERFLOW) there may be missing data, in the first two cases
+(EIO, EFAULT) there will be missing data. It's up to the application if it will
+continue reading subsequent data or rather exit.
+
+Open:
+-----
+Only one user is allowed to open the char device. If it is already in use, the
+open function will fail (return a negative value) and set errno to EBUSY.
+The open function may also fail if an IUCV connection to the `*MONITOR` service
+cannot be established. In this case errno will be set to EIO and an error
+message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
+codes are described in the "z/VM Performance" book, Appendix A.
+
+NOTE:
+-----
+As soon as the device is opened, incoming messages will be accepted and they
+will account for the message limit, i.e. opening the device without reading
+from it will provoke the "message limit reached" error (EOVERFLOW error code)
+eventually.
diff --git a/Documentation/arch/s390/pci.rst b/Documentation/arch/s390/pci.rst
new file mode 100644
index 000000000000..d5755484d8e7
--- /dev/null
+++ b/Documentation/arch/s390/pci.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========
+S/390 PCI
+=========
+
+Authors:
+ - Pierre Morel
+
+Copyright, IBM Corp. 2020
+
+
+Command line parameters and debugfs entries
+===========================================
+
+Command line parameters
+-----------------------
+
+* nomio
+
+ Do not use PCI Mapped I/O (MIO) instructions.
+
+* norid
+
+ Ignore the RID field and force use of one PCI domain per PCI function.
+
+debugfs entries
+---------------
+
+The S/390 debug feature (s390dbf) generates views to hold various debug results in sysfs directories of the form:
+
+ * /sys/kernel/debug/s390dbf/pci_*/
+
+For example:
+
+ - /sys/kernel/debug/s390dbf/pci_msg/sprintf
+ Holds messages from the processing of PCI events, like machine check handling
+ and setting of global functionality, like UID checking.
+
+ Change the level of logging to be more or less verbose by piping
+ a number between 0 and 6 to /sys/kernel/debug/s390dbf/pci_*/level. For
+ details, see the documentation on the S/390 debug feature at
+ Documentation/arch/s390/s390dbf.rst.
+
+Sysfs entries
+=============
+
+Entries specific to zPCI functions and entries that hold zPCI information.
+
+* /sys/bus/pci/slots/XXXXXXXX
+
+ The slot entries are set up using the function identifier (FID) of the
+ PCI function. The format depicted as XXXXXXXX above is 8 hexadecimal digits
+ with 0 padding and lower case hexadecimal digits.
+
+ - /sys/bus/pci/slots/XXXXXXXX/power
+
+ A physical function that currently supports a virtual function cannot be
+ powered off until all virtual functions are removed with:
+ echo 0 > /sys/bus/pci/devices/XXXX:XX:XX.X/sriov_numvf
+
+* /sys/bus/pci/devices/XXXX:XX:XX.X/
+
+ - function_id
+ A zPCI function identifier that uniquely identifies the function in the Z server.
+
+ - function_handle
+ Low-level identifier used for a configured PCI function.
+ It might be useful for debugging.
+
+ - pchid
+ Model-dependent location of the I/O adapter.
+
+ - pfgid
+ PCI function group ID, functions that share identical functionality
+ use a common identifier.
+ A PCI group defines interrupts, IOMMU, IOTLB, and DMA specifics.
+
+ - vfn
+ The virtual function number, from 1 to N for virtual functions,
+ 0 for physical functions.
+
+ - pft
+ The PCI function type
+
+ - port
+ The port corresponds to the physical port the function is attached to.
+ It also gives an indication of the physical function a virtual function
+ is attached to.
+
+ - uid
+ The user identifier (UID) may be defined as part of the machine
+ configuration or the z/VM or KVM guest configuration. If the accompanying
+ uid_is_unique attribute is 1 the platform guarantees that the UID is unique
+ within that instance and no devices with the same UID can be attached
+ during the lifetime of the system.
+
+ - uid_is_unique
+ Indicates whether the user identifier (UID) is guaranteed to be and remain
+ unique within this Linux instance.
+
+ - pfip/segmentX
+ The segments determine the isolation of a function.
+ They correspond to the physical path to the function.
+ The more the segments are different, the more the functions are isolated.
+
+Enumeration and hotplug
+=======================
+
+The PCI address consists of four parts: domain, bus, device and function,
+and is of this form: DDDD:BB:dd.f
+
+* When not using multi-functions (norid is set, or the firmware does not
+ support multi-functions):
+
+ - There is only one function per domain.
+
+ - The domain is set from the zPCI function's UID as defined during the
+ LPAR creation.
+
+* When using multi-functions (norid parameter is not set),
+ zPCI functions are addressed differently:
+
+ - There is still only one bus per domain.
+
+ - There can be up to 256 functions per bus.
+
+ - The domain part of the address of all functions for
+ a multi-Function device is set from the zPCI function's UID as defined
+ in the LPAR creation for the function zero.
+
+ - New functions will only be ready for use after the function zero
+ (the function with devfn 0) has been enumerated.
diff --git a/Documentation/arch/s390/qeth.rst b/Documentation/arch/s390/qeth.rst
new file mode 100644
index 000000000000..f02fdaa68de0
--- /dev/null
+++ b/Documentation/arch/s390/qeth.rst
@@ -0,0 +1,64 @@
+=============================
+IBM s390 QDIO Ethernet Driver
+=============================
+
+OSA and HiperSockets Bridge Port Support
+========================================
+
+Uevents
+-------
+
+To generate the events the device must be assigned a role of either
+a primary or a secondary Bridge Port. For more information, see
+"z/VM Connectivity, SC24-6174".
+
+When run on an OSA or HiperSockets Bridge Capable Port hardware, and the state
+of some configured Bridge Port device on the channel changes, a udev
+event with ACTION=CHANGE is emitted on behalf of the corresponding
+ccwgroup device. The event has the following attributes:
+
+BRIDGEPORT=statechange
+ indicates that the Bridge Port device changed
+ its state.
+
+ROLE={primary|secondary|none}
+ the role assigned to the port.
+
+STATE={active|standby|inactive}
+ the newly assumed state of the port.
+
+When run on HiperSockets Bridge Capable Port hardware with host address
+notifications enabled, a udev event with ACTION=CHANGE is emitted.
+It is emitted on behalf of the corresponding ccwgroup device when a host
+or a VLAN is registered or unregistered on the network served by the device.
+The event has the following attributes:
+
+BRIDGEDHOST={reset|register|deregister|abort}
+ host address
+ notifications are started afresh, a new host or VLAN is registered or
+ deregistered on the Bridge Port HiperSockets channel, or address
+ notifications are aborted.
+
+VLAN=numeric-vlan-id
+ VLAN ID on which the event occurred. Not included
+ if no VLAN is involved in the event.
+
+MAC=xx:xx:xx:xx:xx:xx
+ MAC address of the host that is being registered
+ or deregistered from the HiperSockets channel. Not reported if the
+ event reports the creation or destruction of a VLAN.
+
+NTOK_BUSID=x.y.zzzz
+ device bus ID (CSSID, SSID and device number).
+
+NTOK_IID=xx
+ device IID.
+
+NTOK_CHPID=xx
+ device CHPID.
+
+NTOK_CHID=xxxx
+ device channel ID.
+
+Note that the `NTOK_*` attributes refer to devices other than the one
+connected to the system on which the OS is running.
diff --git a/Documentation/arch/s390/s390dbf.rst b/Documentation/arch/s390/s390dbf.rst
new file mode 100644
index 000000000000..af8bdc3629e7
--- /dev/null
+++ b/Documentation/arch/s390/s390dbf.rst
@@ -0,0 +1,478 @@
+==================
+S390 Debug Feature
+==================
+
+files:
+ - arch/s390/kernel/debug.c
+ - arch/s390/include/asm/debug.h
+
+Description:
+------------
+The goal of this feature is to provide a kernel debug logging API
+where log records can be stored efficiently in memory, where each component
+(e.g. device drivers) can have one separate debug log.
+One purpose of this is to inspect the debug logs after a production system crash
+in order to analyze the reason for the crash.
+
+If the system still runs but only a subcomponent which uses dbf fails,
+it is possible to look at the debug logs on a live system via the Linux
+debugfs filesystem.
+
+The debug feature may also very useful for kernel and driver development.
+
+Design:
+-------
+Kernel components (e.g. device drivers) can register themselves at the debug
+feature with the function call :c:func:`debug_register()`.
+This function initializes a
+debug log for the caller. For each debug log exists a number of debug areas
+where exactly one is active at one time. Each debug area consists of contiguous
+pages in memory. In the debug areas there are stored debug entries (log records)
+which are written by event- and exception-calls.
+
+An event-call writes the specified debug entry to the active debug
+area and updates the log pointer for the active area. If the end
+of the active debug area is reached, a wrap around is done (ring buffer)
+and the next debug entry will be written at the beginning of the active
+debug area.
+
+An exception-call writes the specified debug entry to the log and
+switches to the next debug area. This is done in order to be sure
+that the records which describe the origin of the exception are not
+overwritten when a wrap around for the current area occurs.
+
+The debug areas themselves are also ordered in form of a ring buffer.
+When an exception is thrown in the last debug area, the following debug
+entries are then written again in the very first area.
+
+There are four versions for the event- and exception-calls: One for
+logging raw data, one for text, one for numbers (unsigned int and long),
+and one for sprintf-like formatted strings.
+
+Each debug entry contains the following data:
+
+- Timestamp
+- Cpu-Number of calling task
+- Level of debug entry (0...6)
+- Return Address to caller
+- Flag, if entry is an exception or not
+
+The debug logs can be inspected in a live system through entries in
+the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
+a directory for each registered component, which is named like the
+corresponding component. The debugfs normally should be mounted to
+``/sys/kernel/debug`` therefore the debug feature can be accessed under
+``/sys/kernel/debug/s390dbf``.
+
+The content of the directories are files which represent different views
+to the debug log. Each component can decide which views should be
+used through registering them with the function :c:func:`debug_register_view()`.
+Predefined views for hex/ascii and sprintf data are provided.
+It is also possible to define other views. The content of
+a view can be inspected simply by reading the corresponding debugfs file.
+
+All debug logs have an actual debug level (range from 0 to 6).
+The default level is 3. Event and Exception functions have a :c:data:`level`
+parameter. Only debug entries with a level that is lower or equal
+than the actual level are written to the log. This means, when
+writing events, high priority log entries should have a low level
+value whereas low priority entries should have a high one.
+The actual debug level can be changed with the help of the debugfs-filesystem
+through writing a number string "x" to the ``level`` debugfs file which is
+provided for every debug log. Debugging can be switched off completely
+by using "-" on the ``level`` debugfs file.
+
+Example::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/level
+
+It is also possible to deactivate the debug feature globally for every
+debug log. You can change the behavior using 2 sysctl parameters in
+``/proc/sys/s390dbf``:
+
+There are currently 2 possible triggers, which stop the debug feature
+globally. The first possibility is to use the ``debug_active`` sysctl. If
+set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
+debug feature is turned off.
+
+The second trigger which stops the debug feature is a kernel oops.
+That prevents the debug feature from overwriting debug information that
+happened before the oops. After an oops you can reactivate the debug feature
+by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
+suggested to use an oopsed kernel in a production environment.
+
+If you want to disallow the deactivation of the debug feature, you can use
+the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
+feature cannot be stopped. If the debug feature is already stopped, it
+will stay deactivated.
+
+Kernel Interfaces:
+------------------
+
+.. kernel-doc:: arch/s390/kernel/debug.c
+.. kernel-doc:: arch/s390/include/asm/debug.h
+
+Predefined views:
+-----------------
+
+.. code-block:: c
+
+ extern struct debug_view debug_hex_ascii_view;
+
+ extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+.. code-block:: c
+
+ /*
+ * hex_ascii-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and 4 byte data field */
+
+ debug_info = debug_register("test", 1, 4, 4 );
+ debug_register_view(debug_info, &debug_hex_ascii_view);
+
+ debug_text_event(debug_info, 4 , "one ");
+ debug_int_exception(debug_info, 4, 4711);
+ debug_event(debug_info, 3, &debug_info, 4);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+.. code-block:: c
+
+ /*
+ * sprintf-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and data field for */
+ /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
+
+ debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
+ debug_register_view(debug_info, &debug_sprintf_view);
+
+ debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+ debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+Debugfs Interface
+-----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example::
+
+ > ls /sys/kernel/debug/s390dbf/dasd
+ flush hex_ascii level pages
+ > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
+ 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
+ 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
+ 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
+ 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
+ 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
+ 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
+ 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
+ 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example::
+
+
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 3
+ > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0::
+
+ > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd"::
+
+ > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stopping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed::
+
+ > cat /proc/sys/s390dbf/debug_stoppable
+
+2. Stop debug feature::
+
+ > echo 0 > /proc/sys/s390dbf/debug_active
+
+crash Interface
+----------------
+The ``crash`` tool since v5.1.0 has a built-in command
+``s390dbf`` to display all the debug logs or export them to the file system.
+With this tool it is possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anchor of the debug-logs through
+the ``debug_area_first`` symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in :c:func:`debug_register()` in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are two predefined views: hex_ascii and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. ``45 43 4b 44 | ECKD``).
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT:
+ Using "%s" in sprintf event functions is dangerous. You can only
+ use "%s" in the sprintf event functions, if the memory for the passed string
+ is available as long as the debug feature exists. The reason behind this is
+ that due to performance considerations only a pointer to the string is stored
+ in the debug feature. If you log a string that is freed afterwards, you will
+ get an OOPS when inspecting the debug feature, because then the debug feature
+ will access the already freed memory.
+
+NOTE:
+ If using the sprintf view do NOT use other event/exception functions
+ than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+ Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view)::
+
+ area time level exception cpu caller data (hex + ascii)
+ --------------------------------------------------------------------------
+ 00 00964419409:440690 1 - 00 88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files:
+
+.. code-block:: c
+
+ struct debug_view {
+ char name[DEBUG_MAX_PROCF_LEN];
+ debug_prolog_proc_t* prolog_proc;
+ debug_header_proc_t* header_proc;
+ debug_format_proc_t* format_proc;
+ debug_input_proc_t* input_proc;
+ void* private_data;
+ };
+
+where:
+
+.. code-block:: c
+
+ typedef int (debug_header_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ int area,
+ debug_entry_t* entry,
+ char* out_buf);
+
+ typedef int (debug_format_proc_t) (debug_info_t* id,
+ struct debug_view* view, char* out_buf,
+ const char* in_buf);
+ typedef int (debug_prolog_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ char* out_buf);
+ typedef int (debug_input_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ struct file* file, const char* user_buf,
+ size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this::
+
+ "prolog_proc output"
+
+ "header_proc output 1" "format_proc output 1"
+ "header_proc output 2" "format_proc output 2"
+ "header_proc output 3" "format_proc output 3"
+ ...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
+
+For header_proc there can be used the default function
+:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g::
+
+ 00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example:
+
+.. code-block:: c
+
+ #include <asm/debug.h>
+
+ #define UNKNOWNSTR "data: %08x"
+
+ const char* messages[] =
+ {"This error...........\n",
+ "That error...........\n",
+ "Problem..............\n",
+ "Something went wrong.\n",
+ "Everything ok........\n",
+ NULL
+ };
+
+ static int debug_test_format_fn(
+ debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf
+ )
+ {
+ int i, rc = 0;
+
+ if (id->buf_size >= 4) {
+ int msg_nr = *((int*)in_buf);
+ if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
+ rc += sprintf(out_buf, "%s", messages[msg_nr]);
+ else
+ rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+ }
+ return rc;
+ }
+
+ struct debug_view debug_test_view = {
+ "myview", /* name of view */
+ NULL, /* no prolog */
+ &debug_dflt_header_fn, /* default header for each entry */
+ &debug_test_format_fn, /* our own format function */
+ NULL, /* no input function */
+ NULL /* no private data */
+ };
+
+test:
+=====
+
+.. code-block:: c
+
+ debug_info_t *debug_info;
+ int i;
+ ...
+ debug_info = debug_register("test", 0, 4, 4);
+ debug_register_view(debug_info, &debug_test_view);
+ for (i = 0; i < 10; i ++)
+ debug_int_event(debug_info, 1, i);
+
+::
+
+ > cat /sys/kernel/debug/s390dbf/test/myview
+ 00 00964419734:611402 1 - 00 88042ca This error...........
+ 00 00964419734:611405 1 - 00 88042ca That error...........
+ 00 00964419734:611408 1 - 00 88042ca Problem..............
+ 00 00964419734:611411 1 - 00 88042ca Something went wrong.
+ 00 00964419734:611414 1 - 00 88042ca Everything ok........
+ 00 00964419734:611417 1 - 00 88042ca data: 00000005
+ 00 00964419734:611419 1 - 00 88042ca data: 00000006
+ 00 00964419734:611422 1 - 00 88042ca data: 00000007
+ 00 00964419734:611425 1 - 00 88042ca data: 00000008
+ 00 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/arch/s390/text_files.rst b/Documentation/arch/s390/text_files.rst
new file mode 100644
index 000000000000..c94d05d4fa17
--- /dev/null
+++ b/Documentation/arch/s390/text_files.rst
@@ -0,0 +1,11 @@
+ibm 3270 changelog
+------------------
+
+.. include:: 3270.ChangeLog
+ :literal:
+
+ibm 3270 config3270.sh
+----------------------
+
+.. literalinclude:: config3270.sh
+ :language: shell
diff --git a/Documentation/arch/s390/vfio-ap-locking.rst b/Documentation/arch/s390/vfio-ap-locking.rst
new file mode 100644
index 000000000000..0dfcdb562e21
--- /dev/null
+++ b/Documentation/arch/s390/vfio-ap-locking.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+VFIO AP Locks Overview
+======================
+This document describes the locks that are pertinent to the secure operation
+of the vfio_ap device driver. Throughout this document, the following variables
+will be used to denote instances of the structures herein described:
+
+.. code-block:: c
+
+ struct ap_matrix_dev *matrix_dev;
+ struct ap_matrix_mdev *matrix_mdev;
+ struct kvm *kvm;
+
+The Matrix Devices Lock (drivers/s390/crypto/vfio_ap_private.h)
+---------------------------------------------------------------
+
+.. code-block:: c
+
+ struct ap_matrix_dev {
+ ...
+ struct list_head mdev_list;
+ struct mutex mdevs_lock;
+ ...
+ }
+
+The Matrix Devices Lock (matrix_dev->mdevs_lock) is implemented as a global
+mutex contained within the single object of struct ap_matrix_dev. This lock
+controls access to all fields contained within each matrix_mdev
+(matrix_dev->mdev_list). This lock must be held while reading from, writing to
+or using the data from a field contained within a matrix_mdev instance
+representing one of the vfio_ap device driver's mediated devices.
+
+The KVM Lock (include/linux/kvm_host.h)
+---------------------------------------
+
+.. code-block:: c
+
+ struct kvm {
+ ...
+ struct mutex lock;
+ ...
+ }
+
+The KVM Lock (kvm->lock) controls access to the state data for a KVM guest. This
+lock must be held by the vfio_ap device driver while one or more AP adapters,
+domains or control domains are being plugged into or unplugged from the guest.
+
+The KVM pointer is stored in the in the matrix_mdev instance
+(matrix_mdev->kvm = kvm) containing the state of the mediated device that has
+been attached to the KVM guest.
+
+The Guests Lock (drivers/s390/crypto/vfio_ap_private.h)
+-----------------------------------------------------------
+
+.. code-block:: c
+
+ struct ap_matrix_dev {
+ ...
+ struct list_head mdev_list;
+ struct mutex guests_lock;
+ ...
+ }
+
+The Guests Lock (matrix_dev->guests_lock) controls access to the
+matrix_mdev instances (matrix_dev->mdev_list) that represent mediated devices
+that hold the state for the mediated devices that have been attached to a
+KVM guest. This lock must be held:
+
+1. To control access to the KVM pointer (matrix_mdev->kvm) while the vfio_ap
+ device driver is using it to plug/unplug AP devices passed through to the KVM
+ guest.
+
+2. To add matrix_mdev instances to or remove them from matrix_dev->mdev_list.
+ This is necessary to ensure the proper locking order when the list is perused
+ to find an ap_matrix_mdev instance for the purpose of plugging/unplugging
+ AP devices passed through to a KVM guest.
+
+ For example, when a queue device is removed from the vfio_ap device driver,
+ if the adapter is passed through to a KVM guest, it will have to be
+ unplugged. In order to figure out whether the adapter is passed through,
+ the matrix_mdev object to which the queue is assigned will have to be
+ found. The KVM pointer (matrix_mdev->kvm) can then be used to determine if
+ the mediated device is passed through (matrix_mdev->kvm != NULL) and if so,
+ to unplug the adapter.
+
+It is not necessary to take the Guests Lock to access the KVM pointer if the
+pointer is not used to plug/unplug devices passed through to the KVM guest;
+however, in this case, the Matrix Devices Lock (matrix_dev->mdevs_lock) must be
+held in order to access the KVM pointer since it is set and cleared under the
+protection of the Matrix Devices Lock. A case in point is the function that
+handles interception of the PQAP(AQIC) instruction sub-function. This handler
+needs to access the KVM pointer only for the purposes of setting or clearing IRQ
+resources, so only the matrix_dev->mdevs_lock needs to be held.
+
+The PQAP Hook Lock (arch/s390/include/asm/kvm_host.h)
+-----------------------------------------------------
+
+.. code-block:: c
+
+ typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
+
+ struct kvm_s390_crypto {
+ ...
+ struct rw_semaphore pqap_hook_rwsem;
+ crypto_hook *pqap_hook;
+ ...
+ };
+
+The PQAP Hook Lock is a r/w semaphore that controls access to the function
+pointer of the handler ``(*kvm->arch.crypto.pqap_hook)`` to invoke when the
+PQAP(AQIC) instruction sub-function is intercepted by the host. The lock must be
+held in write mode when pqap_hook value is set, and in read mode when the
+pqap_hook function is called.
diff --git a/Documentation/arch/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst
new file mode 100644
index 000000000000..eba1991fbdba
--- /dev/null
+++ b/Documentation/arch/s390/vfio-ap.rst
@@ -0,0 +1,1129 @@
+===============================
+Adjunct Processor (AP) facility
+===============================
+
+
+Introduction
+============
+The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
+of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
+The AP devices provide cryptographic functions to all CPUs assigned to a
+linux system running in an IBM Z system LPAR.
+
+The AP adapter cards are exposed via the AP bus. The motivation for vfio-ap
+is to make AP cards available to KVM guests using the VFIO mediated device
+framework. This implementation relies considerably on the s390 virtualization
+facilities which do most of the hard work of providing direct access to AP
+devices.
+
+AP Architectural Overview
+=========================
+To facilitate the comprehension of the design, let's start with some
+definitions:
+
+* AP adapter
+
+ An AP adapter is an IBM Z adapter card that can perform cryptographic
+ functions. There can be from 0 to 256 adapters assigned to an LPAR. Adapters
+ assigned to the LPAR in which a linux host is running will be available to
+ the linux host. Each adapter is identified by a number from 0 to 255; however,
+ the maximum adapter number is determined by machine model and/or adapter type.
+ When installed, an AP adapter is accessed by AP instructions executed by any
+ CPU.
+
+ The AP adapter cards are assigned to a given LPAR via the system's Activation
+ Profile which can be edited via the HMC. When the linux host system is IPL'd
+ in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
+ creates a sysfs device for each assigned adapter. For example, if AP adapters
+ 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
+ sysfs device entries::
+
+ /sys/devices/ap/card04
+ /sys/devices/ap/card0a
+
+ Symbolic links to these devices will also be created in the AP bus devices
+ sub-directory::
+
+ /sys/bus/ap/devices/[card04]
+ /sys/bus/ap/devices/[card04]
+
+* AP domain
+
+ An adapter is partitioned into domains. An adapter can hold up to 256 domains
+ depending upon the adapter type and hardware configuration. A domain is
+ identified by a number from 0 to 255; however, the maximum domain number is
+ determined by machine model and/or adapter type.. A domain can be thought of
+ as a set of hardware registers and memory used for processing AP commands. A
+ domain can be configured with a secure private key used for clear key
+ encryption. A domain is classified in one of two ways depending upon how it
+ may be accessed:
+
+ * Usage domains are domains that are targeted by an AP instruction to
+ process an AP command.
+
+ * Control domains are domains that are changed by an AP command sent to a
+ usage domain; for example, to set the secure private key for the control
+ domain.
+
+ The AP usage and control domains are assigned to a given LPAR via the system's
+ Activation Profile which can be edited via the HMC. When a linux host system
+ is IPL'd in the LPAR, the AP bus module detects the AP usage and control
+ domains assigned to the LPAR. The domain number of each usage domain and
+ adapter number of each AP adapter are combined to create AP queue devices
+ (see AP Queue section below). The domain number of each control domain will be
+ represented in a bitmask and stored in a sysfs file
+ /sys/bus/ap/ap_control_domain_mask. The bits in the mask, from most to least
+ significant bit, correspond to domains 0-255.
+
+* AP Queue
+
+ An AP queue is the means by which an AP command is sent to a usage domain
+ inside a specific adapter. An AP queue is identified by a tuple
+ comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
+ APQI corresponds to a given usage domain number within the adapter. This tuple
+ forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
+ instructions include a field containing the APQN to identify the AP queue to
+ which the AP command is to be sent for processing.
+
+ The AP bus will create a sysfs device for each APQN that can be derived from
+ the cross product of the AP adapter and usage domain numbers detected when the
+ AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
+ domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
+ following sysfs entries::
+
+ /sys/devices/ap/card04/04.0006
+ /sys/devices/ap/card04/04.0047
+ /sys/devices/ap/card0a/0a.0006
+ /sys/devices/ap/card0a/0a.0047
+
+ The following symbolic links to these devices will be created in the AP bus
+ devices subdirectory::
+
+ /sys/bus/ap/devices/[04.0006]
+ /sys/bus/ap/devices/[04.0047]
+ /sys/bus/ap/devices/[0a.0006]
+ /sys/bus/ap/devices/[0a.0047]
+
+* AP Instructions:
+
+ There are three AP instructions:
+
+ * NQAP: to enqueue an AP command-request message to a queue
+ * DQAP: to dequeue an AP command-reply message from a queue
+ * PQAP: to administer the queues
+
+ AP instructions identify the domain that is targeted to process the AP
+ command; this must be one of the usage domains. An AP command may modify a
+ domain that is not one of the usage domains, but the modified domain
+ must be one of the control domains.
+
+AP and SIE
+==========
+Let's now take a look at how AP instructions executed on a guest are interpreted
+by the hardware.
+
+A satellite control block called the Crypto Control Block (CRYCB) is attached to
+our main hardware virtualization control block. The CRYCB contains an AP Control
+Block (APCB) that has three fields to identify the adapters, usage domains and
+control domains assigned to the KVM guest:
+
+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
+ to the KVM guest. Each bit in the mask, from left to right, corresponds to
+ an APID from 0-255. If a bit is set, the corresponding adapter is valid for
+ use by the KVM guest.
+
+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
+ assigned to the KVM guest. Each bit in the mask, from left to right,
+ corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the
+ corresponding queue is valid for use by the KVM guest.
+
+* The AP Domain Mask field is a bit mask that identifies the AP control domains
+ assigned to the KVM guest. The ADM bit mask controls which domains can be
+ changed by an AP command-request message sent to a usage domain from the
+ guest. Each bit in the mask, from left to right, corresponds to a domain from
+ 0-255. If a bit is set, the corresponding domain can be modified by an AP
+ command-request message sent to a usage domain.
+
+If you recall from the description of an AP Queue, AP instructions include
+an APQN to identify the AP queue to which an AP command-request message is to be
+sent (NQAP and PQAP instructions), or from which a command-reply message is to
+be received (DQAP instruction). The validity of an APQN is defined by the matrix
+calculated from the APM and AQM; it is the Cartesian product of all assigned
+adapter numbers (APM) with all assigned queue indexes (AQM). For example, if
+adapters 1 and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs
+(1,5), (1,6), (2,5) and (2,6) will be valid for the guest.
+
+The APQNs can provide secure key functionality - i.e., a private key is stored
+on the adapter card for each of its domains - so each APQN must be assigned to
+at most one guest or to the linux host::
+
+ Example 1: Valid configuration:
+ ------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapter 1,2 domain 7
+
+ This is valid because both guests have a unique set of APQNs:
+ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+ Guest2 has APQNs (1,7), (2,7)
+
+ Example 2: Valid configuration:
+ ------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapters 3,4 domains 5,6
+
+ This is also valid because both guests have a unique set of APQNs:
+ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+ Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+
+ Example 3: Invalid configuration:
+ --------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapter 1 domains 6,7
+
+ This is an invalid configuration because both guests have access to
+ APQN (1,6).
+
+The Design
+==========
+The design introduces three new objects:
+
+1. AP matrix device
+2. VFIO AP device driver (vfio_ap.ko)
+3. VFIO AP mediated pass-through device
+
+The VFIO AP device driver
+-------------------------
+The VFIO AP (vfio_ap) device driver serves the following purposes:
+
+1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
+
+2. Sets up the VFIO mediated device interfaces to manage a vfio_ap mediated
+ device and creates the sysfs interfaces for assigning adapters, usage
+ domains, and control domains comprising the matrix for a KVM guest.
+
+3. Configures the APM, AQM and ADM in the APCB contained in the CRYCB referenced
+ by a KVM guest's SIE state description to grant the guest access to a matrix
+ of AP devices
+
+Reserve APQNs for exclusive use of KVM guests
+---------------------------------------------
+The following block diagram illustrates the mechanism by which APQNs are
+reserved::
+
+ +------------------+
+ 7 remove | |
+ +--------------------> cex4queue driver |
+ | | |
+ | +------------------+
+ |
+ |
+ | +------------------+ +----------------+
+ | 5 register driver | | 3 create | |
+ | +----------------> Device core +----------> matrix device |
+ | | | | | |
+ | | +--------^---------+ +----------------+
+ | | |
+ | | +-------------------+
+ | | +-----------------------------------+ |
+ | | | 4 register AP driver | | 2 register device
+ | | | | |
+ +--------+---+-v---+ +--------+-------+-+
+ | | | |
+ | ap_bus +--------------------- > vfio_ap driver |
+ | | 8 probe | |
+ +--------^---------+ +--^--^------------+
+ 6 edit | | |
+ apmask | +-----------------------------+ | 11 mdev create
+ aqmask | | 1 modprobe |
+ +--------+-----+---+ +----------------+-+ +----------------+
+ | | | |10 create| mediated |
+ | admin | | VFIO device core |---------> matrix |
+ | + | | | device |
+ +------+-+---------+ +--------^---------+ +--------^-------+
+ | | | |
+ | | 9 create vfio_ap-passthrough | |
+ | +------------------------------+ |
+ +-------------------------------------------------------------+
+ 12 assign adapter/domain/control domain
+
+The process for reserving an AP queue for use by a KVM guest is:
+
+1. The administrator loads the vfio_ap device driver
+2. The vfio-ap driver during its initialization will register a single 'matrix'
+ device with the device core. This will serve as the parent device for
+ all vfio_ap mediated devices used to configure an AP matrix for a guest.
+3. The /sys/devices/vfio_ap/matrix device is created by the device core
+4. The vfio_ap device driver will register with the AP bus for AP queue devices
+ of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
+ driver's probe and remove callback interfaces. Devices older than CEX4 queues
+ are not supported to simplify the implementation by not needlessly
+ complicating the design by supporting older devices that will go out of
+ service in the relatively near future, and for which there are few older
+ systems around on which to test.
+5. The AP bus registers the vfio_ap device driver with the device core
+6. The administrator edits the AP adapter and queue masks to reserve AP queues
+ for use by the vfio_ap device driver.
+7. The AP bus removes the AP queues reserved for the vfio_ap driver from the
+ default zcrypt cex4queue driver.
+8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
+ it.
+9. The administrator creates a passthrough type vfio_ap mediated device to be
+ used by a guest
+10. The administrator assigns the adapters, usage domains and control domains
+ to be exclusively used by a guest.
+
+Set up the VFIO mediated device interfaces
+------------------------------------------
+The VFIO AP device driver utilizes the common interfaces of the VFIO mediated
+device core driver to:
+
+* Register an AP mediated bus driver to add a vfio_ap mediated device to and
+ remove it from a VFIO group.
+* Create and destroy a vfio_ap mediated device
+* Add a vfio_ap mediated device to and remove it from the AP mediated bus driver
+* Add a vfio_ap mediated device to and remove it from an IOMMU group
+
+The following high-level block diagram shows the main components and interfaces
+of the VFIO AP mediated device driver::
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_parent() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ap.ko |<-> matrix
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
+
+During initialization of the vfio_ap module, the matrix device is registered
+with an 'mdev_parent_ops' structure that provides the sysfs attribute
+structures, mdev functions and callback interfaces for managing the mediated
+matrix device.
+
+* sysfs attribute structures:
+
+ supported_type_groups
+ The VFIO mediated device framework supports creation of user-defined
+ mediated device types. These mediated device types are specified
+ via the 'supported_type_groups' structure when a device is registered
+ with the mediated device framework. The registration process creates the
+ sysfs structures for each mediated device type specified in the
+ 'mdev_supported_types' sub-directory of the device being registered. Along
+ with the device type, the sysfs attributes of the mediated device type are
+ provided.
+
+ The VFIO AP device driver will register one mediated device type for
+ passthrough devices:
+
+ /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+
+ Only the read-only attributes required by the VFIO mdev framework will
+ be provided::
+
+ ... name
+ ... device_api
+ ... available_instances
+ ... device_api
+
+ Where:
+
+ * name:
+ specifies the name of the mediated device type
+ * device_api:
+ the mediated device type's API
+ * available_instances:
+ the number of vfio_ap mediated passthrough devices
+ that can be created
+ * device_api:
+ specifies the VFIO API
+ mdev_attr_groups
+ This attribute group identifies the user-defined sysfs attributes of the
+ mediated device. When a device is registered with the VFIO mediated device
+ framework, the sysfs attribute files identified in the 'mdev_attr_groups'
+ structure will be created in the vfio_ap mediated device's directory. The
+ sysfs attributes for a vfio_ap mediated device are:
+
+ assign_adapter / unassign_adapter:
+ Write-only attributes for assigning/unassigning an AP adapter to/from the
+ vfio_ap mediated device. To assign/unassign an adapter, the APID of the
+ adapter is echoed into the respective attribute file.
+ assign_domain / unassign_domain:
+ Write-only attributes for assigning/unassigning an AP usage domain to/from
+ the vfio_ap mediated device. To assign/unassign a domain, the domain
+ number of the usage domain is echoed into the respective attribute
+ file.
+ matrix:
+ A read-only file for displaying the APQNs derived from the Cartesian
+ product of the adapter and domain numbers assigned to the vfio_ap mediated
+ device.
+ guest_matrix:
+ A read-only file for displaying the APQNs derived from the Cartesian
+ product of the adapter and domain numbers assigned to the APM and AQM
+ fields respectively of the KVM guest's CRYCB. This may differ from the
+ the APQNs assigned to the vfio_ap mediated device if any APQN does not
+ reference a queue device bound to the vfio_ap device driver (i.e., the
+ queue is not in the host's AP configuration).
+ assign_control_domain / unassign_control_domain:
+ Write-only attributes for assigning/unassigning an AP control domain
+ to/from the vfio_ap mediated device. To assign/unassign a control domain,
+ the ID of the domain to be assigned/unassigned is echoed into the
+ respective attribute file.
+ control_domains:
+ A read-only file for displaying the control domain numbers assigned to the
+ vfio_ap mediated device.
+ ap_config:
+ A read/write file that, when written to, allows all three of the
+ vfio_ap mediated device's ap matrix masks to be replaced in one shot.
+ Three masks are given, one for adapters, one for domains, and one for
+ control domains. If the given state cannot be set then no changes are
+ made to the vfio-ap mediated device.
+
+ The format of the data written to ap_config is as follows:
+ {amask},{dmask},{cmask}\n
+
+ \n is a newline character.
+
+ amask, dmask, and cmask are masks identifying which adapters, domains,
+ and control domains should be assigned to the mediated device.
+
+ The format of a mask is as follows:
+ 0xNN..NN
+
+ Where NN..NN is 64 hexadecimal characters representing a 256-bit value.
+ The leftmost (highest order) bit represents adapter/domain 0.
+
+ For an example set of masks that represent your mdev's current
+ configuration, simply cat ap_config.
+
+ Setting an adapter or domain number greater than the maximum allowed for
+ the system will result in an error.
+
+ This attribute is intended to be used by automation. End users would be
+ better served using the respective assign/unassign attributes for
+ adapters, domains, and control domains.
+
+* functions:
+
+ create:
+ allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+
+ * Store the reference to the KVM structure for the guest using the mdev
+ * Store the AP matrix configuration for the adapters, domains, and control
+ domains assigned via the corresponding sysfs attributes files
+ * Store the AP matrix configuration for the adapters, domains and control
+ domains available to a guest. A guest may not be provided access to APQNs
+ referencing queue devices that do not exist, or are not bound to the
+ vfio_ap device driver.
+
+ remove:
+ deallocates the vfio_ap mediated device's ap_matrix_mdev structure.
+ This will be allowed only if a running guest is not using the mdev.
+
+* callback interfaces
+
+ open_device:
+ The vfio_ap driver uses this callback to register a
+ VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the matrix mdev
+ devices. The open_device callback is invoked by userspace to connect the
+ VFIO iommu group for the matrix mdev device to the MDEV bus. Access to the
+ KVM structure used to configure the KVM guest is provided via this callback.
+ The KVM structure, is used to configure the guest's access to the AP matrix
+ defined via the vfio_ap mediated device's sysfs attribute files.
+
+ close_device:
+ unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
+ matrix mdev device and deconfigures the guest's AP matrix.
+
+ ioctl:
+ this callback handles the VFIO_DEVICE_GET_INFO and VFIO_DEVICE_RESET ioctls
+ defined by the vfio framework.
+
+Configure the guest's AP resources
+----------------------------------
+Configuring the AP resources for a KVM guest will be performed when the
+VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
+function is called when userspace connects to KVM. The guest's AP resources are
+configured via its APCB by:
+
+* Setting the bits in the APM corresponding to the APIDs assigned to the
+ vfio_ap mediated device via its 'assign_adapter' interface.
+* Setting the bits in the AQM corresponding to the domains assigned to the
+ vfio_ap mediated device via its 'assign_domain' interface.
+* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
+ vfio_ap mediated device via its 'assign_control_domains' interface.
+
+The linux device model precludes passing a device through to a KVM guest that
+is not bound to the device driver facilitating its pass-through. Consequently,
+an APQN that does not reference a queue device bound to the vfio_ap device
+driver will not be assigned to a KVM guest's matrix. The AP architecture,
+however, does not provide a means to filter individual APQNs from the guest's
+matrix, so the adapters, domains and control domains assigned to vfio_ap
+mediated device via its sysfs 'assign_adapter', 'assign_domain' and
+'assign_control_domain' interfaces will be filtered before providing the AP
+configuration to a guest:
+
+* The APIDs of the adapters, the APQIs of the domains and the domain numbers of
+ the control domains assigned to the matrix mdev that are not also assigned to
+ the host's AP configuration will be filtered.
+
+* Each APQN derived from the Cartesian product of the APIDs and APQIs assigned
+ to the vfio_ap mdev is examined and if any one of them does not reference a
+ queue device bound to the vfio_ap device driver, the adapter will not be
+ plugged into the guest (i.e., the bit corresponding to its APID will not be
+ set in the APM of the guest's APCB).
+
+The CPU model features for AP
+-----------------------------
+The AP stack relies on the presence of the AP instructions as well as three
+facilities: The AP Facilities Test (APFT) facility; the AP Query
+Configuration Information (QCI) facility; and the AP Queue Interruption Control
+facility. These features/facilities are made available to a KVM guest via the
+following CPU model features:
+
+1. ap: Indicates whether the AP instructions are installed on the guest. This
+ feature will be enabled by KVM only if the AP instructions are installed
+ on the host.
+
+2. apft: Indicates the APFT facility is available on the guest. This facility
+ can be made available to the guest only if it is available on the host (i.e.,
+ facility bit 15 is set).
+
+3. apqci: Indicates the AP QCI facility is available on the guest. This facility
+ can be made available to the guest only if it is available on the host (i.e.,
+ facility bit 12 is set).
+
+4. apqi: Indicates AP Queue Interruption Control faclity is available on the
+ guest. This facility can be made available to the guest only if it is
+ available on the host (i.e., facility bit 65 is set).
+
+Note: If the user chooses to specify a CPU model different than the 'host'
+model to QEMU, the CPU model features and facilities need to be turned on
+explicitly; for example::
+
+ /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on,apqi=on
+
+A guest can be precluded from using AP features/facilities by turning them off
+explicitly; for example::
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off,apqi=off
+
+Note: If the APFT facility is turned off (apft=off) for the guest, the guest
+will not see any AP devices. The zcrypt device drivers on the guest that
+register for type 10 and newer AP devices - i.e., the cex4card and cex4queue
+device drivers - need the APFT facility to ascertain the facilities installed on
+a given AP device. If the APFT facility is not installed on the guest, then no
+adapter or domain devices will get created by the AP bus running on the
+guest because only type 10 and newer devices can be configured for guest use.
+
+Example
+=======
+Let's now provide an example to illustrate how KVM guests may be given
+access to AP facilities. For this example, we will show how to configure
+three guests such that executing the lszcrypt command on the guests would
+look like this:
+
+Guest1
+------
+=========== ===== ============
+CARD.DOMAIN TYPE MODE
+=========== ===== ============
+05 CEX5C CCA-Coproc
+05.0004 CEX5C CCA-Coproc
+05.00ab CEX5C CCA-Coproc
+06 CEX5A Accelerator
+06.0004 CEX5A Accelerator
+06.00ab CEX5A Accelerator
+=========== ===== ============
+
+Guest2
+------
+=========== ===== ============
+CARD.DOMAIN TYPE MODE
+=========== ===== ============
+05 CEX5C CCA-Coproc
+05.0047 CEX5C CCA-Coproc
+05.00ff CEX5C CCA-Coproc
+=========== ===== ============
+
+Guest3
+------
+=========== ===== ============
+CARD.DOMAIN TYPE MODE
+=========== ===== ============
+06 CEX5A Accelerator
+06.0047 CEX5A Accelerator
+06.00ff CEX5A Accelerator
+=========== ===== ============
+
+These are the steps:
+
+1. Install the vfio_ap module on the linux host. The dependency chain for the
+ vfio_ap module is:
+ * iommu
+ * s390
+ * zcrypt
+ * vfio
+ * vfio_mdev
+ * vfio_mdev_device
+ * KVM
+
+ To build the vfio_ap module, the kernel build must be configured with the
+ following Kconfig elements selected:
+ * IOMMU_SUPPORT
+ * S390
+ * AP
+ * VFIO
+ * KVM
+
+ If using make menuconfig select the following to build the vfio_ap module::
+
+ -> Device Drivers
+ -> IOMMU Hardware Support
+ select S390 AP IOMMU Support
+ -> VFIO Non-Privileged userspace driver framework
+ -> Mediated device driver frramework
+ -> VFIO driver for Mediated devices
+ -> I/O subsystem
+ -> VFIO support for AP devices
+
+2. Secure the AP queues to be used by the three guests so that the host can not
+ access them. To secure them, there are two sysfs files that specify
+ bitmasks marking a subset of the APQN range as usable only by the default AP
+ queue device drivers. All remaining APQNs are available for use by
+ any other device driver. The vfio_ap device driver is currently the only
+ non-default device driver. The location of the sysfs files containing the
+ masks are::
+
+ /sys/bus/ap/apmask
+ /sys/bus/ap/aqmask
+
+ The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
+ (APID). Each bit in the mask, from left to right, corresponds to an APID from
+ 0-255. If a bit is set, the APID belongs to the subset of APQNs marked as
+ available only to the default AP queue device drivers.
+
+ The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
+ (APQI). Each bit in the mask, from left to right, corresponds to an APQI from
+ 0-255. If a bit is set, the APQI belongs to the subset of APQNs marked as
+ available only to the default AP queue device drivers.
+
+ The Cartesian product of the APIDs corresponding to the bits set in the
+ apmask and the APQIs corresponding to the bits set in the aqmask comprise
+ the subset of APQNs that can be used only by the host default device drivers.
+ All other APQNs are available to the non-default device drivers such as the
+ vfio_ap driver.
+
+ Take, for example, the following masks::
+
+ apmask:
+ 0x7d00000000000000000000000000000000000000000000000000000000000000
+
+ aqmask:
+ 0x8000000000000000000000000000000000000000000000000000000000000000
+
+ The masks indicate:
+
+ * Adapters 1, 2, 3, 4, 5, and 7 are available for use by the host default
+ device drivers.
+
+ * Domain 0 is available for use by the host default device drivers
+
+ * The subset of APQNs available for use only by the default host device
+ drivers are:
+
+ (1,0), (2,0), (3,0), (4.0), (5,0) and (7,0)
+
+ * All other APQNs are available for use by the non-default device drivers.
+
+ The APQN of each AP queue device assigned to the linux host is checked by the
+ AP bus against the set of APQNs derived from the Cartesian product of APIDs
+ and APQIs marked as available to the default AP queue device drivers. If a
+ match is detected, only the default AP queue device drivers will be probed;
+ otherwise, the vfio_ap device driver will be probed.
+
+ By default, the two masks are set to reserve all APQNs for use by the default
+ AP queue device drivers. There are two ways the default masks can be changed:
+
+ 1. The sysfs mask files can be edited by echoing a string into the
+ respective sysfs mask file in one of two formats:
+
+ * An absolute hex string starting with 0x - like "0x12345678" - sets
+ the mask. If the given string is shorter than the mask, it is padded
+ with 0s on the right; for example, specifying a mask value of 0x41 is
+ the same as specifying::
+
+ 0x4100000000000000000000000000000000000000000000000000000000000000
+
+ Keep in mind that the mask reads from left to right, so the mask
+ above identifies device numbers 1 and 7 (01000001).
+
+ If the string is longer than the mask, the operation is terminated with
+ an error (EINVAL).
+
+ * Individual bits in the mask can be switched on and off by specifying
+ each bit number to be switched in a comma separated list. Each bit
+ number string must be prepended with a ('+') or minus ('-') to indicate
+ the corresponding bit is to be switched on ('+') or off ('-'). Some
+ valid values are:
+
+ - "+0" switches bit 0 on
+ - "-13" switches bit 13 off
+ - "+0x41" switches bit 65 on
+ - "-0xff" switches bit 255 off
+
+ The following example:
+
+ +0,-6,+0x47,-0xf0
+
+ Switches bits 0 and 71 (0x47) on
+
+ Switches bits 6 and 240 (0xf0) off
+
+ Note that the bits not specified in the list remain as they were before
+ the operation.
+
+ 2. The masks can also be changed at boot time via parameters on the kernel
+ command line like this:
+
+ ap.apmask=0xffff ap.aqmask=0x40
+
+ This would create the following masks::
+
+ apmask:
+ 0xffff000000000000000000000000000000000000000000000000000000000000
+
+ aqmask:
+ 0x4000000000000000000000000000000000000000000000000000000000000000
+
+ Resulting in these two pools::
+
+ default drivers pool: adapter 0-15, domain 1
+ alternate drivers pool: adapter 16-255, domains 0, 2-255
+
+ **Note:**
+ Changing a mask such that one or more APQNs will be taken from a vfio_ap
+ mediated device (see below) will fail with an error (EBUSY). A message
+ is logged to the kernel ring buffer which can be viewed with the 'dmesg'
+ command. The output identifies each APQN flagged as 'in use' and identifies
+ the vfio_ap mediated device to which it is assigned; for example:
+
+ Userspace may not re-assign queue 05.0054 already assigned to 62177883-f1bb-47f0-914d-32a22e3a8804
+ Userspace may not re-assign queue 04.0054 already assigned to cef03c3c-903d-4ecc-9a83-40694cb8aee4
+
+Securing the APQNs for our example
+----------------------------------
+ To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
+ 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
+ APQNs can be removed from the default masks using either of the following
+ commands::
+
+ echo -5,-6 > /sys/bus/ap/apmask
+
+ echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
+
+ Or the masks can be set as follows::
+
+ echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
+ > apmask
+
+ echo 0xf7fffffffffffffffeffffffffffffffffffffffffeffffffffffffffffffffe \
+ > aqmask
+
+ This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
+ 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
+ sysfs directory for the vfio_ap device driver will now contain symbolic links
+ to the AP queue devices bound to it::
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [vfio_ap]
+ ......... [05.0004]
+ ......... [05.0047]
+ ......... [05.00ab]
+ ......... [05.00ff]
+ ......... [06.0004]
+ ......... [06.0047]
+ ......... [06.00ab]
+ ......... [06.00ff]
+
+ Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
+ can be bound to the vfio_ap device driver. The reason for this is to
+ simplify the implementation by not needlessly complicating the design by
+ supporting older devices that will go out of service in the relatively near
+ future and for which there are few older systems on which to test.
+
+ The administrator, therefore, must take care to secure only AP queues that
+ can be bound to the vfio_ap device driver. The device type for a given AP
+ queue device can be read from the parent card's sysfs directory. For example,
+ to see the hardware type of the queue 05.0004:
+
+ cat /sys/bus/ap/devices/card05/hwtype
+
+ The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
+ vfio_ap device driver.
+
+3. Create the mediated devices needed to configure the AP matrixes for the
+ three guests and to provide an interface to the vfio_ap driver for
+ use by the guests::
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough] (passthrough vfio_ap mediated device type)
+ --------- create
+ --------- [devices]
+
+ To create the mediated devices for the three guests::
+
+ uuidgen > create
+ uuidgen > create
+ uuidgen > create
+
+ or
+
+ echo $uuid1 > create
+ echo $uuid2 > create
+ echo $uuid3 > create
+
+ This will create three mediated devices in the [devices] subdirectory named
+ after the UUID written to the create attribute file. We call them $uuid1,
+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ --------------- unassign_control_domain
+ --------------- unassign_domain
+
+ ------------ [$uuid2]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+ ------------ [$uuid3]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+ Note *****: The vfio_ap mdevs do not persist across reboots unless the
+ mdevctl tool is used to create and persist them.
+
+4. The administrator now needs to configure the matrixes for the mediated
+ devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
+
+ This is how the matrix is configured for Guest1::
+
+ echo 5 > assign_adapter
+ echo 6 > assign_adapter
+ echo 4 > assign_domain
+ echo 0xab > assign_domain
+
+ Control domains can similarly be assigned using the assign_control_domain
+ sysfs file.
+
+ If a mistake is made configuring an adapter, domain or control domain,
+ you can use the unassign_xxx files to unassign the adapter, domain or
+ control domain.
+
+ To display the matrix configuration for Guest1::
+
+ cat matrix
+
+ To display the matrix that is or will be assigned to Guest1::
+
+ cat guest_matrix
+
+ This is how the matrix is configured for Guest2::
+
+ echo 5 > assign_adapter
+ echo 0x47 > assign_domain
+ echo 0xff > assign_domain
+
+ This is how the matrix is configured for Guest3::
+
+ echo 6 > assign_adapter
+ echo 0x47 > assign_domain
+ echo 0xff > assign_domain
+
+ In order to successfully assign an adapter:
+
+ * The adapter number specified must represent a value from 0 up to the
+ maximum adapter number configured for the system. If an adapter number
+ higher than the maximum is specified, the operation will terminate with
+ an error (ENODEV).
+
+ Note: The maximum adapter number can be obtained via the sysfs
+ /sys/bus/ap/ap_max_adapter_id attribute file.
+
+ * Each APQN derived from the Cartesian product of the APID of the adapter
+ being assigned and the APQIs of the domains previously assigned:
+
+ - Must only be available to the vfio_ap device driver as specified in the
+ sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
+ one APQN is reserved for use by the host device driver, the operation
+ will terminate with an error (EADDRNOTAVAIL).
+
+ - Must NOT be assigned to another vfio_ap mediated device. If even one APQN
+ is assigned to another vfio_ap mediated device, the operation will
+ terminate with an error (EBUSY).
+
+ - Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
+ sys/bus/ap/aqmask attribute files are being edited or the operation may
+ terminate with an error (EBUSY).
+
+ In order to successfully assign a domain:
+
+ * The domain number specified must represent a value from 0 up to the
+ maximum domain number configured for the system. If a domain number
+ higher than the maximum is specified, the operation will terminate with
+ an error (ENODEV).
+
+ Note: The maximum domain number can be obtained via the sysfs
+ /sys/bus/ap/ap_max_domain_id attribute file.
+
+ * Each APQN derived from the Cartesian product of the APQI of the domain
+ being assigned and the APIDs of the adapters previously assigned:
+
+ - Must only be available to the vfio_ap device driver as specified in the
+ sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
+ one APQN is reserved for use by the host device driver, the operation
+ will terminate with an error (EADDRNOTAVAIL).
+
+ - Must NOT be assigned to another vfio_ap mediated device. If even one APQN
+ is assigned to another vfio_ap mediated device, the operation will
+ terminate with an error (EBUSY).
+
+ - Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
+ sys/bus/ap/aqmask attribute files are being edited or the operation may
+ terminate with an error (EBUSY).
+
+ In order to successfully assign a control domain:
+
+ * The domain number specified must represent a value from 0 up to the maximum
+ domain number configured for the system. If a control domain number higher
+ than the maximum is specified, the operation will terminate with an
+ error (ENODEV).
+
+5. Start Guest1::
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+
+7. Start Guest2::
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+
+7. Start Guest3::
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+
+When the guest is shut down, the vfio_ap mediated devices may be removed.
+
+Using our example again, to remove the vfio_ap mediated device $uuid1::
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- remove
+
+::
+
+ echo 1 > remove
+
+This will remove all of the matrix mdev device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the matrix mdev device,
+all of the steps starting with step 3 will have to be performed again. Note
+that the remove will fail if a guest using the vfio_ap mdev is still running.
+
+It is not necessary to remove a vfio_ap mdev, but one may want to
+remove it if no guest will use it during the remaining lifetime of the linux
+host. If the vfio_ap mdev is removed, one may want to also reconfigure
+the pool of adapters and queues reserved for use by the default drivers.
+
+Hot plug/unplug support:
+========================
+An adapter, domain or control domain may be hot plugged into a running KVM
+guest by assigning it to the vfio_ap mediated device being used by the guest if
+the following conditions are met:
+
+* The adapter, domain or control domain must also be assigned to the host's
+ AP configuration.
+
+* Each APQN derived from the Cartesian product comprised of the APID of the
+ adapter being assigned and the APQIs of the domains assigned must reference a
+ queue device bound to the vfio_ap device driver.
+
+* To hot plug a domain, each APQN derived from the Cartesian product
+ comprised of the APQI of the domain being assigned and the APIDs of the
+ adapters assigned must reference a queue device bound to the vfio_ap device
+ driver.
+
+An adapter, domain or control domain may be hot unplugged from a running KVM
+guest by unassigning it from the vfio_ap mediated device being used by the
+guest.
+
+Over-provisioning of AP queues for a KVM guest:
+===============================================
+Over-provisioning is defined herein as the assignment of adapters or domains to
+a vfio_ap mediated device that do not reference AP devices in the host's AP
+configuration. The idea here is that when the adapter or domain becomes
+available, it will be automatically hot-plugged into the KVM guest using
+the vfio_ap mediated device to which it is assigned as long as each new APQN
+resulting from plugging it in references a queue device bound to the vfio_ap
+device driver.
+
+Driver Features
+===============
+The vfio_ap driver exposes a sysfs file containing supported features.
+This exists so third party tools (like Libvirt and mdevctl) can query the
+availability of specific features.
+
+The features list can be found here: /sys/bus/matrix/devices/matrix/features
+
+Entries are space delimited. Each entry consists of a combination of
+alphanumeric and underscore characters.
+
+Example:
+cat /sys/bus/matrix/devices/matrix/features
+guest_matrix dyn ap_config
+
+the following features are advertised:
+
+---------------+---------------------------------------------------------------+
+| Flag | Description |
++==============+===============================================================+
+| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
+| | adapters and domains that are or will be passed through to a |
+| | guest when the mdev is attached to it. |
++--------------+---------------------------------------------------------------+
+| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
+| | domains for a guest to which the mdev is attached. |
++------------+-----------------------------------------------------------------+
+| ap_config | ap_config interface for one-shot modifications to mdev config |
++--------------+---------------------------------------------------------------+
+
+Limitations
+===========
+Live guest migration is not supported for guests using AP devices without
+intervention by a system administrator. Before a KVM guest can be migrated,
+the vfio_ap mediated device must be removed. Unfortunately, it can not be
+removed manually (i.e., echo 1 > /sys/devices/vfio_ap/matrix/$UUID/remove) while
+the mdev is in use by a KVM guest. If the guest is being emulated by QEMU,
+its mdev can be hot unplugged from the guest in one of two ways:
+
+1. If the KVM guest was started with libvirt, you can hot unplug the mdev via
+ the following commands:
+
+ virsh detach-device <guestname> <path-to-device-xml>
+
+ For example, to hot unplug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 from
+ the guest named 'my-guest':
+
+ virsh detach-device my-guest ~/config/my-guest-hostdev.xml
+
+ The contents of my-guest-hostdev.xml:
+
+.. code-block:: xml
+
+ <hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
+ <source>
+ <address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
+ </source>
+ </hostdev>
+
+
+ virsh qemu-monitor-command <guest-name> --hmp "device-del <device-id>"
+
+ For example, to hot unplug the vfio_ap mediated device identified on the
+ qemu command line with 'id=hostdev0' from the guest named 'my-guest':
+
+.. code-block:: sh
+
+ virsh qemu-monitor-command my-guest --hmp "device_del hostdev0"
+
+2. A vfio_ap mediated device can be hot unplugged by attaching the qemu monitor
+ to the guest and using the following qemu monitor command:
+
+ (QEMU) device-del id=<device-id>
+
+ For example, to hot unplug the vfio_ap mediated device that was specified
+ on the qemu command line with 'id=hostdev0' when the guest was started:
+
+ (QEMU) device-del id=hostdev0
+
+After live migration of the KVM guest completes, an AP configuration can be
+restored to the KVM guest by hot plugging a vfio_ap mediated device on the target
+system into the guest in one of two ways:
+
+1. If the KVM guest was started with libvirt, you can hot plug a matrix mediated
+ device into the guest via the following virsh commands:
+
+ virsh attach-device <guestname> <path-to-device-xml>
+
+ For example, to hot plug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 into
+ the guest named 'my-guest':
+
+ virsh attach-device my-guest ~/config/my-guest-hostdev.xml
+
+ The contents of my-guest-hostdev.xml:
+
+.. code-block:: xml
+
+ <hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
+ <source>
+ <address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
+ </source>
+ </hostdev>
+
+
+ virsh qemu-monitor-command <guest-name> --hmp \
+ "device_add vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
+
+ For example, to hot plug the vfio_ap mediated device
+ 62177883-f1bb-47f0-914d-32a22e3a8804 into the guest named 'my-guest' with
+ device-id hostdev0:
+
+ virsh qemu-monitor-command my-guest --hmp \
+ "device_add vfio-ap,\
+ sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
+ id=hostdev0"
+
+2. A vfio_ap mediated device can be hot plugged by attaching the qemu monitor
+ to the guest and using the following qemu monitor command:
+
+ (qemu) device_add "vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
+
+ For example, to plug the vfio_ap mediated device
+ 62177883-f1bb-47f0-914d-32a22e3a8804 into the guest with the device-id
+ hostdev0:
+
+ (QEMU) device-add "vfio-ap,\
+ sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
+ id=hostdev0"
diff --git a/Documentation/arch/s390/vfio-ccw.rst b/Documentation/arch/s390/vfio-ccw.rst
new file mode 100644
index 000000000000..42960b7b0d70
--- /dev/null
+++ b/Documentation/arch/s390/vfio-ccw.rst
@@ -0,0 +1,445 @@
+==================================
+vfio-ccw: the basic infrastructure
+==================================
+
+Introduction
+------------
+
+Here we describe the vfio support for I/O subchannel devices for
+Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
+virtual machine, while vfio is the means.
+
+Different than other hardware architectures, s390 has defined a unified
+I/O access method, which is so called Channel I/O. It has its own access
+patterns:
+
+- Channel programs run asynchronously on a separate (co)processor.
+- The channel subsystem will access any memory designated by the caller
+ in the channel program directly, i.e. there is no iommu involved.
+
+Thus when we introduce vfio support for these devices, we realize it
+with a mediated device (mdev) implementation. The vfio mdev will be
+added to an iommu group, so as to make itself able to be managed by the
+vfio framework. And we add read/write callbacks for special vfio I/O
+regions to pass the channel programs from the mdev to its parent device
+(the real I/O subchannel device) to do further address translation and
+to perform I/O instructions.
+
+This document does not intend to explain the s390 I/O architecture in
+every detail. More information/reference could be found here:
+
+- A good start to know Channel I/O in general:
+ https://en.wikipedia.org/wiki/Channel_I/O
+- s390 architecture:
+ s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+- The existing QEMU code which implements a simple emulated channel
+ subsystem could also be a good reference. It makes it easier to follow
+ the flow.
+ qemu/hw/s390x/css.c
+
+For vfio mediated device framework:
+- Documentation/driver-api/vfio-mediated-device.rst
+
+Motivation of vfio-ccw
+----------------------
+
+Typically, a guest virtualized via QEMU/KVM on s390 only sees
+paravirtualized virtio devices via the "Virtio Over Channel I/O
+(virtio-ccw)" transport. This makes virtio devices discoverable via
+standard operating system algorithms for handling channel devices.
+
+However this is not enough. On s390 for the majority of devices, which
+use the standard Channel I/O based mechanism, we also need to provide
+the functionality of passing through them to a QEMU virtual machine.
+This includes devices that don't have a virtio counterpart (e.g. tape
+drives) or that have specific characteristics which guests want to
+exploit.
+
+For passing a device to a guest, we want to use the same interface as
+everybody else, namely vfio. We implement this vfio support for channel
+devices via the vfio mediated device framework and the subchannel device
+driver "vfio_ccw".
+
+Access patterns of CCW devices
+------------------------------
+
+s390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the
+systems. Though the s390 hardware platform knows about a huge variety of
+different peripheral attachments like disk devices (aka. DASDs), tapes,
+communication controllers, etc. They can all be accessed by a well
+defined access method and they are presenting I/O completion a unified
+way: I/O interruptions.
+
+All I/O requires the use of channel command words (CCWs). A CCW is an
+instruction to a specialized I/O channel processor. A channel program is
+a sequence of CCWs which are executed by the I/O channel subsystem. To
+issue a channel program to the channel subsystem, it is required to
+build an operation request block (ORB), which can be used to point out
+the format of the CCW and other control information to the system. The
+operating system signals the I/O channel subsystem to begin executing
+the channel program with a SSCH (start sub-channel) instruction. The
+central processor is then free to proceed with non-I/O instructions
+until interrupted. The I/O completion result is received by the
+interrupt handler in the form of interrupt response block (IRB).
+
+Back to vfio-ccw, in short:
+
+- ORBs and channel programs are built in guest kernel (with guest
+ physical addresses).
+- ORBs and channel programs are passed to the host kernel.
+- Host kernel translates the guest physical addresses to real addresses
+ and starts the I/O with issuing a privileged Channel I/O instruction
+ (e.g SSCH).
+- channel programs run asynchronously on a separate processor.
+- I/O completion will be signaled to the host with I/O interruptions.
+ And it will be copied as IRB to user space to pass it back to the
+ guest.
+
+Physical vfio ccw device and its child mdev
+-------------------------------------------
+
+As mentioned above, we realize vfio-ccw with a mdev implementation.
+
+Channel I/O does not have IOMMU hardware support, so the physical
+vfio-ccw device does not have an IOMMU level translation or isolation.
+
+Subchannel I/O instructions are all privileged instructions. When
+handling the I/O instruction interception, vfio-ccw has the software
+policing and translation how the channel program is programmed before
+it gets sent to hardware.
+
+Within this implementation, we have two drivers for two types of
+devices:
+
+- The vfio_ccw driver for the physical subchannel device.
+ This is an I/O subchannel driver for the real subchannel device. It
+ realizes a group of callbacks and registers to the mdev framework as a
+ parent (physical) device. As a consequence, mdev provides vfio_ccw a
+ generic interface (sysfs) to create mdev devices. A vfio mdev could be
+ created by vfio_ccw then and added to the mediated bus. It is the vfio
+ device that added to an IOMMU group and a vfio group.
+ vfio_ccw also provides an I/O region to accept channel program
+ request from user space and store I/O interrupt result for user
+ space to retrieve. To notify user space an I/O completion, it offers
+ an interface to setup an eventfd fd for asynchronous signaling.
+
+- The vfio_mdev driver for the mediated vfio ccw device.
+ This is provided by the mdev framework. It is a vfio device driver for
+ the mdev that created by vfio_ccw.
+ It realizes a group of vfio device driver callbacks, adds itself to a
+ vfio group, and registers itself to the mdev framework as a mdev
+ driver.
+ It uses a vfio iommu backend that uses the existing map and unmap
+ ioctls, but rather than programming them into an IOMMU for a device,
+ it simply stores the translations for use by later requests. This
+ means that a device programmed in a VM with guest physical addresses
+ can have the vfio kernel convert that address to process virtual
+ address, pin the page and program the hardware with the host physical
+ address in one step.
+ For a mdev, the vfio iommu backend will not pin the pages during the
+ VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
+ of the iova<->vaddr mappings in this operation. And they export a
+ vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
+ backend for the physical devices to pin and unpin pages by demand.
+
+Below is a high Level block diagram::
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_parent() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ccw.ko |<-> subchannel
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
+
+The process of how these work together.
+
+1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
+ physical device (with callbacks) to mdev framework.
+ When vfio_ccw probing the subchannel device, it registers device
+ pointer and callbacks to the mdev framework. Mdev related file nodes
+ under the device node in sysfs would be created for the subchannel
+ device, namely 'mdev_create', 'mdev_destroy' and
+ 'mdev_supported_types'.
+2. Create a mediated vfio ccw device.
+ Use the 'mdev_create' sysfs file, we need to manually create one (and
+ only one for our case) mediated device.
+3. vfio_mdev.ko drives the mediated ccw device.
+ vfio_mdev is also the vfio device driver. It will probe the mdev and
+ add it to an iommu_group and a vfio_group. Then we could pass through
+ the mdev to a guest.
+
+
+VFIO-CCW Regions
+----------------
+
+The vfio-ccw driver exposes MMIO regions to accept requests from and return
+results to userspace.
+
+vfio-ccw I/O region
+-------------------
+
+An I/O region is used to accept channel program request from user
+space and store I/O interrupt result for user space to retrieve. The
+definition of the region is::
+
+ struct ccw_io_region {
+ #define ORB_AREA_SIZE 12
+ __u8 orb_area[ORB_AREA_SIZE];
+ #define SCSW_AREA_SIZE 12
+ __u8 scsw_area[SCSW_AREA_SIZE];
+ #define IRB_AREA_SIZE 96
+ __u8 irb_area[IRB_AREA_SIZE];
+ __u32 ret_code;
+ } __packed;
+
+This region is always available.
+
+While starting an I/O request, orb_area should be filled with the
+guest ORB, and scsw_area should be filled with the SCSW of the Virtual
+Subchannel.
+
+irb_area stores the I/O result.
+
+ret_code stores a return code for each access of the region. The following
+values may occur:
+
+``0``
+ The operation was successful.
+
+``-EOPNOTSUPP``
+ The ORB specified transport mode or the
+ SCSW specified a function other than the start function.
+
+``-EIO``
+ A request was issued while the device was not in a state ready to accept
+ requests, or an internal error occurred.
+
+``-EBUSY``
+ The subchannel was status pending or busy, or a request is already active.
+
+``-EAGAIN``
+ A request was being processed, and the caller should retry.
+
+``-EACCES``
+ The channel path(s) used for the I/O were found to be not operational.
+
+``-ENODEV``
+ The device was found to be not operational.
+
+``-EINVAL``
+ The orb specified a chain longer than 255 ccws, or an internal error
+ occurred.
+
+
+vfio-ccw cmd region
+-------------------
+
+The vfio-ccw cmd region is used to accept asynchronous instructions
+from userspace::
+
+ #define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
+ #define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
+ struct ccw_cmd_region {
+ __u32 command;
+ __u32 ret_code;
+ } __packed;
+
+This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD.
+
+Currently, CLEAR SUBCHANNEL and HALT SUBCHANNEL use this region.
+
+command specifies the command to be issued; ret_code stores a return code
+for each access of the region. The following values may occur:
+
+``0``
+ The operation was successful.
+
+``-ENODEV``
+ The device was found to be not operational.
+
+``-EINVAL``
+ A command other than halt or clear was specified.
+
+``-EIO``
+ A request was issued while the device was not in a state ready to accept
+ requests.
+
+``-EAGAIN``
+ A request was being processed, and the caller should retry.
+
+``-EBUSY``
+ The subchannel was status pending or busy while processing a halt request.
+
+vfio-ccw schib region
+---------------------
+
+The vfio-ccw schib region is used to return Subchannel-Information
+Block (SCHIB) data to userspace::
+
+ struct ccw_schib_region {
+ #define SCHIB_AREA_SIZE 52
+ __u8 schib_area[SCHIB_AREA_SIZE];
+ } __packed;
+
+This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_SCHIB.
+
+Reading this region triggers a STORE SUBCHANNEL to be issued to the
+associated hardware.
+
+vfio-ccw crw region
+---------------------
+
+The vfio-ccw crw region is used to return Channel Report Word (CRW)
+data to userspace::
+
+ struct ccw_crw_region {
+ __u32 crw;
+ __u32 pad;
+ } __packed;
+
+This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_CRW.
+
+Reading this region returns a CRW if one that is relevant for this
+subchannel (e.g. one reporting changes in channel path state) is
+pending, or all zeroes if not. If multiple CRWs are pending (including
+possibly chained CRWs), reading this region again will return the next
+one, until no more CRWs are pending and zeroes are returned. This is
+similar to how STORE CHANNEL REPORT WORD works.
+
+vfio-ccw operation details
+--------------------------
+
+vfio-ccw follows what vfio-pci did on the s390 platform and uses
+vfio-iommu-type1 as the vfio iommu backend.
+
+* CCW translation APIs
+ A group of APIs (start with `cp_`) to do CCW translation. The CCWs
+ passed in by a user space program are organized with their guest
+ physical memory addresses. These APIs will copy the CCWs into kernel
+ space, and assemble a runnable kernel channel program by updating the
+ guest physical addresses with their corresponding host physical addresses.
+ Note that we have to use IDALs even for direct-access CCWs, as the
+ referenced memory can be located anywhere, including above 2G.
+
+* vfio_ccw device driver
+ This driver utilizes the CCW translation APIs and introduces
+ vfio_ccw, which is the driver for the I/O subchannel devices you want
+ to pass through.
+ vfio_ccw implements the following vfio ioctls::
+
+ VFIO_DEVICE_GET_INFO
+ VFIO_DEVICE_GET_IRQ_INFO
+ VFIO_DEVICE_GET_REGION_INFO
+ VFIO_DEVICE_RESET
+ VFIO_DEVICE_SET_IRQS
+
+ This provides an I/O region, so that the user space program can pass a
+ channel program to the kernel, to do further CCW translation before
+ issuing them to a real device.
+ This also provides the SET_IRQ ioctl to setup an event notifier to
+ notify the user space program the I/O completion in an asynchronous
+ way.
+
+The use of vfio-ccw is not limited to QEMU, while QEMU is definitely a
+good example to get understand how these patches work. Here is a little
+bit more detail how an I/O request triggered by the QEMU guest will be
+handled (without error handling).
+
+Explanation:
+
+- Q1-Q7: QEMU side process.
+- K1-K5: Kernel side process.
+
+Q1.
+ Get I/O region info during initialization.
+
+Q2.
+ Setup event notifier and handler to handle I/O completion.
+
+... ...
+
+Q3.
+ Intercept a ssch instruction.
+Q4.
+ Write the guest channel program and ORB to the I/O region.
+
+ K1.
+ Copy from guest to kernel.
+ K2.
+ Translate the guest channel program to a host kernel space
+ channel program, which becomes runnable for a real device.
+ K3.
+ With the necessary information contained in the orb passed in
+ by QEMU, issue the ccwchain to the device.
+ K4.
+ Return the ssch CC code.
+Q5.
+ Return the CC code to the guest.
+
+... ...
+
+ K5.
+ Interrupt handler gets the I/O result and write the result to
+ the I/O region.
+ K6.
+ Signal QEMU to retrieve the result.
+
+Q6.
+ Get the signal and event handler reads out the result from the I/O
+ region.
+Q7.
+ Update the irb for the guest.
+
+Limitations
+-----------
+
+The current vfio-ccw implementation focuses on supporting basic commands
+needed to implement block device functionality (read/write) of DASD/ECKD
+device only. Some commands may need special handling in the future, for
+example, anything related to path grouping.
+
+DASD is a kind of storage device. While ECKD is a data recording format.
+More information for DASD and ECKD could be found here:
+https://en.wikipedia.org/wiki/Direct-access_storage_device
+https://en.wikipedia.org/wiki/Count_key_data
+
+Together with the corresponding work in QEMU, we can bring the passed
+through DASD/ECKD device online in a guest now and use it as a block
+device.
+
+The current code allows the guest to start channel programs via
+START SUBCHANNEL, and to issue HALT SUBCHANNEL, CLEAR SUBCHANNEL,
+and STORE SUBCHANNEL.
+
+Currently all channel programs are prefetched, regardless of the
+p-bit setting in the ORB. As a result, self modifying channel
+programs are not supported. For this reason, IPL has to be handled as
+a special case by a userspace/guest program; this has been implemented
+in QEMU's s390-ccw bios as of QEMU 4.1.
+
+vfio-ccw supports classic (command mode) channel I/O only. Transport
+mode (HPF) is not supported.
+
+QDIO subchannels are currently not supported. Classic devices other than
+DASD/ECKD might work, but have not been tested.
+
+Reference
+---------
+1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
+3. https://en.wikipedia.org/wiki/Channel_I/O
+4. Documentation/arch/s390/cds.rst
+5. Documentation/driver-api/vfio.rst
+6. Documentation/driver-api/vfio-mediated-device.rst
diff --git a/Documentation/arch/s390/zfcpdump.rst b/Documentation/arch/s390/zfcpdump.rst
new file mode 100644
index 000000000000..a61de7aa8778
--- /dev/null
+++ b/Documentation/arch/s390/zfcpdump.rst
@@ -0,0 +1,50 @@
+==================================
+The s390 SCSI dump tool (zfcpdump)
+==================================
+
+System z machines (z900 or higher) provide hardware support for creating system
+dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
+has to create a dump of the current (probably crashed) Linux image. In order to
+not overwrite memory of the crashed Linux with data of the dump tool, the
+hardware saves some memory plus the register sets of the boot CPU before the
+dump tool is loaded. There exists an SCLP hardware interface to obtain the saved
+memory afterwards. Currently 32 MB are saved.
+
+This zfcpdump implementation consists of a Linux dump kernel together with
+a user space dump tool, which are loaded together into the saved memory region
+below 32 MB. zfcpdump is installed on a SCSI disk using zipl (as contained in
+the s390-tools package) to make the device bootable. The operator of a Linux
+system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump
+resides on.
+
+The user space dump tool accesses the memory of the crashed system by means
+of the /proc/vmcore interface. This interface exports the crashed system's
+memory and registers in ELF core dump format. To access the memory which has
+been saved by the hardware SCLP requests will be created at the time the data
+is needed by /proc/vmcore. The tail part of the crashed systems memory which
+has not been stashed by hardware can just be copied from real memory.
+
+To build a dump enabled kernel the kernel config option CONFIG_CRASH_DUMP
+has to be set.
+
+To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig".
+
+The s390 zipl tool looks for the zfcpdump kernel and optional initrd/initramfs
+under the following locations:
+
+* kernel: <zfcpdump directory>/zfcpdump.image
+* ramdisk: <zfcpdump directory>/zfcpdump.rd
+
+The zfcpdump directory is defined in the s390-tools package.
+
+The user space application of zfcpdump can reside in an intitramfs or an
+initrd. It can also be included in a built-in kernel initramfs. The application
+reads from /proc/vmcore or zcore/mem and writes the system dump to a SCSI disk.
+
+The s390-tools package version 1.24.0 and above builds an external zfcpdump
+initramfs with a user space application that writes the dump to a SCSI
+partition.
+
+For more information on how to use zfcpdump refer to the s390 'Using the Dump
+Tools' book, which is available from IBM Knowledge Center:
+https://www.ibm.com/support/knowledgecenter/linuxonibm/liaaf/lnz_r_dt.html
diff --git a/Documentation/arch/sh/booting.rst b/Documentation/arch/sh/booting.rst
new file mode 100644
index 000000000000..d851c49a01bf
--- /dev/null
+++ b/Documentation/arch/sh/booting.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+DeviceTree Booting
+------------------
+
+ Device-tree compatible SH bootloaders are expected to provide the physical
+ address of the device tree blob in r4. Since legacy bootloaders did not
+ guarantee any particular initial register state, kernels built to
+ inter-operate with old bootloaders must either use a builtin DTB or
+ select a legacy board option (something other than CONFIG_SH_DEVICE_TREE)
+ that does not use device tree. Support for the latter is being phased out
+ in favor of device tree.
diff --git a/Documentation/arch/sh/features.rst b/Documentation/arch/sh/features.rst
new file mode 100644
index 000000000000..fae48fe81e9b
--- /dev/null
+++ b/Documentation/arch/sh/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features sh
diff --git a/Documentation/arch/sh/index.rst b/Documentation/arch/sh/index.rst
new file mode 100644
index 000000000000..01fce7c131f1
--- /dev/null
+++ b/Documentation/arch/sh/index.rst
@@ -0,0 +1,50 @@
+=======================
+SuperH Interfaces Guide
+=======================
+
+:Author: Paul Mundt
+
+.. toctree::
+ :maxdepth: 1
+
+ booting
+ new-machine
+ register-banks
+
+ features
+
+Memory Management
+=================
+
+SH-4
+----
+
+Store Queue API
+~~~~~~~~~~~~~~~
+
+.. kernel-doc:: arch/sh/kernel/cpu/sh4/sq.c
+ :export:
+
+Machine Specific Interfaces
+===========================
+
+mach-dreamcast
+--------------
+
+.. kernel-doc:: arch/sh/boards/mach-dreamcast/rtc.c
+ :internal:
+
+mach-x3proto
+------------
+
+.. kernel-doc:: arch/sh/boards/mach-x3proto/ilsel.c
+ :export:
+
+Busses
+======
+
+Maple
+-----
+
+.. kernel-doc:: drivers/sh/maple/maple.c
+ :export:
diff --git a/Documentation/arch/sh/new-machine.rst b/Documentation/arch/sh/new-machine.rst
new file mode 100644
index 000000000000..e501c52b3b30
--- /dev/null
+++ b/Documentation/arch/sh/new-machine.rst
@@ -0,0 +1,277 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+Adding a new board to LinuxSH
+=============================
+
+ Paul Mundt <lethal@linux-sh.org>
+
+This document attempts to outline what steps are necessary to add support
+for new boards to the LinuxSH port under the new 2.5 and 2.6 kernels. This
+also attempts to outline some of the noticeable changes between the 2.4
+and the 2.5/2.6 SH backend.
+
+1. New Directory Structure
+==========================
+
+The first thing to note is the new directory structure. Under 2.4, most
+of the board-specific code (with the exception of stboards) ended up
+in arch/sh/kernel/ directly, with board-specific headers ending up in
+include/asm-sh/. For the new kernel, things are broken out by board type,
+companion chip type, and CPU type. Looking at a tree view of this directory
+hierarchy looks like the following:
+
+Board-specific code::
+
+ .
+ |-- arch
+ | `-- sh
+ | `-- boards
+ | |-- adx
+ | | `-- board-specific files
+ | |-- bigsur
+ | | `-- board-specific files
+ | |
+ | ... more boards here ...
+ |
+ `-- include
+ `-- asm-sh
+ |-- adx
+ | `-- board-specific headers
+ |-- bigsur
+ | `-- board-specific headers
+ |
+ .. more boards here ...
+
+Next, for companion chips::
+
+ .
+ `-- arch
+ `-- sh
+ `-- cchips
+ `-- hd6446x
+ `-- hd64461
+ `-- cchip-specific files
+
+... and so on. Headers for the companion chips are treated the same way as
+board-specific headers. Thus, include/asm-sh/hd64461 is home to all of the
+hd64461-specific headers.
+
+Finally, CPU family support is also abstracted::
+
+ .
+ |-- arch
+ | `-- sh
+ | |-- kernel
+ | | `-- cpu
+ | | |-- sh2
+ | | | `-- SH-2 generic files
+ | | |-- sh3
+ | | | `-- SH-3 generic files
+ | | `-- sh4
+ | | `-- SH-4 generic files
+ | `-- mm
+ | `-- This is also broken out per CPU family, so each family can
+ | have their own set of cache/tlb functions.
+ |
+ `-- include
+ `-- asm-sh
+ |-- cpu-sh2
+ | `-- SH-2 specific headers
+ |-- cpu-sh3
+ | `-- SH-3 specific headers
+ `-- cpu-sh4
+ `-- SH-4 specific headers
+
+It should be noted that CPU subtypes are _not_ abstracted. Thus, these still
+need to be dealt with by the CPU family specific code.
+
+2. Adding a New Board
+=====================
+
+The first thing to determine is whether the board you are adding will be
+isolated, or whether it will be part of a family of boards that can mostly
+share the same board-specific code with minor differences.
+
+In the first case, this is just a matter of making a directory for your
+board in arch/sh/boards/ and adding rules to hook your board in with the
+build system (more on this in the next section). However, for board families
+it makes more sense to have a common top-level arch/sh/boards/ directory
+and then populate that with sub-directories for each member of the family.
+Both the Solution Engine and the hp6xx boards are an example of this.
+
+After you have setup your new arch/sh/boards/ directory, remember that you
+should also add a directory in include/asm-sh for headers localized to this
+board (if there are going to be more than one). In order to interoperate
+seamlessly with the build system, it's best to have this directory the same
+as the arch/sh/boards/ directory name, though if your board is again part of
+a family, the build system has ways of dealing with this (via incdir-y
+overloading), and you can feel free to name the directory after the family
+member itself.
+
+There are a few things that each board is required to have, both in the
+arch/sh/boards and the include/asm-sh/ hierarchy. In order to better
+explain this, we use some examples for adding an imaginary board. For
+setup code, we're required at the very least to provide definitions for
+get_system_type() and platform_setup(). For our imaginary board, this
+might look something like::
+
+ /*
+ * arch/sh/boards/vapor/setup.c - Setup code for imaginary board
+ */
+ #include <linux/init.h>
+
+ const char *get_system_type(void)
+ {
+ return "FooTech Vaporboard";
+ }
+
+ int __init platform_setup(void)
+ {
+ /*
+ * If our hardware actually existed, we would do real
+ * setup here. Though it's also sane to leave this empty
+ * if there's no real init work that has to be done for
+ * this board.
+ */
+
+ /* Start-up imaginary PCI ... */
+
+ /* And whatever else ... */
+
+ return 0;
+ }
+
+Our new imaginary board will also have to tie into the machvec in order for it
+to be of any use.
+
+machvec functions fall into a number of categories:
+
+ - I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
+ - I/O mapping functions (ioport_map, ioport_unmap, etc).
+ - a 'heartbeat' function.
+ - PCI and IRQ initialization routines.
+ - Consistent allocators (for boards that need special allocators,
+ particularly for allocating out of some board-specific SRAM for DMA
+ handles).
+
+There are machvec functions added and removed over time, so always be sure to
+consult include/asm-sh/machvec.h for the current state of the machvec.
+
+The kernel will automatically wrap in generic routines for undefined function
+pointers in the machvec at boot time, as machvec functions are referenced
+unconditionally throughout most of the tree. Some boards have incredibly
+sparse machvecs (such as the dreamcast and sh03), whereas others must define
+virtually everything (rts7751r2d).
+
+Adding a new machine is relatively trivial (using vapor as an example):
+
+If the board-specific definitions are quite minimalistic, as is the case for
+the vast majority of boards, simply having a single board-specific header is
+sufficient.
+
+ - add a new file include/asm-sh/vapor.h which contains prototypes for
+ any machine specific IO functions prefixed with the machine name, for
+ example vapor_inb. These will be needed when filling out the machine
+ vector.
+
+ Note that these prototypes are generated automatically by setting
+ __IO_PREFIX to something sensible. A typical example would be::
+
+ #define __IO_PREFIX vapor
+ #include <asm/io_generic.h>
+
+ somewhere in the board-specific header. Any boards being ported that still
+ have a legacy io.h should remove it entirely and switch to the new model.
+
+ - Add machine vector definitions to the board's setup.c. At a bare minimum,
+ this must be defined as something like::
+
+ struct sh_machine_vector mv_vapor __initmv = {
+ .mv_name = "vapor",
+ };
+ ALIAS_MV(vapor)
+
+ - finally add a file arch/sh/boards/vapor/io.c, which contains definitions of
+ the machine specific io functions (if there are enough to warrant it).
+
+3. Hooking into the Build System
+================================
+
+Now that we have the corresponding directories setup, and all of the
+board-specific code is in place, it's time to look at how to get the
+whole mess to fit into the build system.
+
+Large portions of the build system are now entirely dynamic, and merely
+require the proper entry here and there in order to get things done.
+
+The first thing to do is to add an entry to arch/sh/Kconfig, under the
+"System type" menu::
+
+ config SH_VAPOR
+ bool "Vapor"
+ help
+ select Vapor if configuring for a FooTech Vaporboard.
+
+next, this has to be added into arch/sh/Makefile. All boards require a
+machdir-y entry in order to be built. This entry needs to be the name of
+the board directory as it appears in arch/sh/boards, even if it is in a
+sub-directory (in which case, all parent directories below arch/sh/boards/
+need to be listed). For our new board, this entry can look like::
+
+ machdir-$(CONFIG_SH_VAPOR) += vapor
+
+provided that we've placed everything in the arch/sh/boards/vapor/ directory.
+
+Next, the build system assumes that your include/asm-sh directory will also
+be named the same. If this is not the case (as is the case with multiple
+boards belonging to a common family), then the directory name needs to be
+implicitly appended to incdir-y. The existing code manages this for the
+Solution Engine and hp6xx boards, so see these for an example.
+
+Once that is taken care of, it's time to add an entry for the mach type.
+This is done by adding an entry to the end of the arch/sh/tools/mach-types
+list. The method for doing this is self explanatory, and so we won't waste
+space restating it here. After this is done, you will be able to use
+implicit checks for your board if you need this somewhere throughout the
+common code, such as::
+
+ /* Make sure we're on the FooTech Vaporboard */
+ if (!mach_is_vapor())
+ return -ENODEV;
+
+also note that the mach_is_boardname() check will be implicitly forced to
+lowercase, regardless of the fact that the mach-types entries are all
+uppercase. You can read the script if you really care, but it's pretty ugly,
+so you probably don't want to do that.
+
+Now all that's left to do is providing a defconfig for your new board. This
+way, other people who end up with this board can simply use this config
+for reference instead of trying to guess what settings are supposed to be
+used on it.
+
+Also, as soon as you have copied over a sample .config for your new board
+(assume arch/sh/configs/vapor_defconfig), you can also use this directly as a
+build target, and it will be implicitly listed as such in the help text.
+
+Looking at the 'make help' output, you should now see something like:
+
+Architecture specific targets (sh):
+
+ ======================= =============================================
+ zImage Compressed kernel image (arch/sh/boot/zImage)
+ adx_defconfig Build for adx
+ cqreek_defconfig Build for cqreek
+ dreamcast_defconfig Build for dreamcast
+ ...
+ vapor_defconfig Build for vapor
+ ======================= =============================================
+
+which then allows you to do::
+
+ $ make ARCH=sh CROSS_COMPILE=sh4-linux- vapor_defconfig vmlinux
+
+which will in turn copy the defconfig for this board, run it through
+oldconfig (prompting you for any new options since the time of creation),
+and start you on your way to having a functional kernel for your new
+board.
diff --git a/Documentation/arch/sh/register-banks.rst b/Documentation/arch/sh/register-banks.rst
new file mode 100644
index 000000000000..2bef5c8fcbbc
--- /dev/null
+++ b/Documentation/arch/sh/register-banks.rst
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+Notes on register bank usage in the kernel
+==========================================
+
+Introduction
+------------
+
+The SH-3 and SH-4 CPU families traditionally include a single partial register
+bank (selected by SR.RB, only r0 ... r7 are banked), whereas other families
+may have more full-featured banking or simply no such capabilities at all.
+
+SR.RB banking
+-------------
+
+In the case of this type of banking, banked registers are mapped directly to
+r0 ... r7 if SR.RB is set to the bank we are interested in, otherwise ldc/stc
+can still be used to reference the banked registers (as r0_bank ... r7_bank)
+when in the context of another bank. The developer must keep the SR.RB value
+in mind when writing code that utilizes these banked registers, for obvious
+reasons. Userspace is also not able to poke at the bank1 values, so these can
+be used rather effectively as scratch registers by the kernel.
+
+Presently the kernel uses several of these registers.
+
+ - r0_bank, r1_bank (referenced as k0 and k1, used for scratch
+ registers when doing exception handling).
+
+ - r2_bank (used to track the EXPEVT/INTEVT code)
+
+ - Used by do_IRQ() and friends for doing irq mapping based off
+ of the interrupt exception vector jump table offset
+
+ - r6_bank (global interrupt mask)
+
+ - The SR.IMASK interrupt handler makes use of this to set the
+ interrupt priority level (used by local_irq_enable())
+
+ - r7_bank (current)
diff --git a/Documentation/arch/sparc/adi.rst b/Documentation/arch/sparc/adi.rst
new file mode 100644
index 000000000000..dbcd8b6e7bc3
--- /dev/null
+++ b/Documentation/arch/sparc/adi.rst
@@ -0,0 +1,286 @@
+================================
+Application Data Integrity (ADI)
+================================
+
+SPARC M7 processor adds the Application Data Integrity (ADI) feature.
+ADI allows a task to set version tags on any subset of its address
+space. Once ADI is enabled and version tags are set for ranges of
+address space of a task, the processor will compare the tag in pointers
+to memory in these ranges to the version set by the application
+previously. Access to memory is granted only if the tag in given pointer
+matches the tag set by the application. In case of mismatch, processor
+raises an exception.
+
+Following steps must be taken by a task to enable ADI fully:
+
+1. Set the user mode PSTATE.mcde bit. This acts as master switch for
+ the task's entire address space to enable/disable ADI for the task.
+
+2. Set TTE.mcd bit on any TLB entries that correspond to the range of
+ addresses ADI is being enabled on. MMU checks the version tag only
+ on the pages that have TTE.mcd bit set.
+
+3. Set the version tag for virtual addresses using stxa instruction
+ and one of the MCD specific ASIs. Each stxa instruction sets the
+ given tag for one ADI block size number of bytes. This step must
+ be repeated for entire page to set tags for entire page.
+
+ADI block size for the platform is provided by the hypervisor to kernel
+in machine description tables. Hypervisor also provides the number of
+top bits in the virtual address that specify the version tag. Once
+version tag has been set for a memory location, the tag is stored in the
+physical memory and the same tag must be present in the ADI version tag
+bits of the virtual address being presented to the MMU. For example on
+SPARC M7 processor, MMU uses bits 63-60 for version tags and ADI block
+size is same as cacheline size which is 64 bytes. A task that sets ADI
+version to, say 10, on a range of memory, must access that memory using
+virtual addresses that contain 0xa in bits 63-60.
+
+ADI is enabled on a set of pages using mprotect() with PROT_ADI flag.
+When ADI is enabled on a set of pages by a task for the first time,
+kernel sets the PSTATE.mcde bit for the task. Version tags for memory
+addresses are set with an stxa instruction on the addresses using
+ASI_MCD_PRIMARY or ASI_MCD_ST_BLKINIT_PRIMARY. ADI block size is
+provided by the hypervisor to the kernel. Kernel returns the value of
+ADI block size to userspace using auxiliary vector along with other ADI
+info. Following auxiliary vectors are provided by the kernel:
+
+ ============ ===========================================
+ AT_ADI_BLKSZ ADI block size. This is the granularity and
+ alignment, in bytes, of ADI versioning.
+ AT_ADI_NBITS Number of ADI version bits in the VA
+ ============ ===========================================
+
+
+IMPORTANT NOTES
+===============
+
+- Version tag values of 0x0 and 0xf are reserved. These values match any
+ tag in virtual address and never generate a mismatch exception.
+
+- Version tags are set on virtual addresses from userspace even though
+ tags are stored in physical memory. Tags are set on a physical page
+ after it has been allocated to a task and a pte has been created for
+ it.
+
+- When a task frees a memory page it had set version tags on, the page
+ goes back to free page pool. When this page is re-allocated to a task,
+ kernel clears the page using block initialization ASI which clears the
+ version tags as well for the page. If a page allocated to a task is
+ freed and allocated back to the same task, old version tags set by the
+ task on that page will no longer be present.
+
+- ADI tag mismatches are not detected for non-faulting loads.
+
+- Kernel does not set any tags for user pages and it is entirely a
+ task's responsibility to set any version tags. Kernel does ensure the
+ version tags are preserved if a page is swapped out to the disk and
+ swapped back in. It also preserves that version tags if a page is
+ migrated.
+
+- ADI works for any size pages. A userspace task need not be aware of
+ page size when using ADI. It can simply select a virtual address
+ range, enable ADI on the range using mprotect() and set version tags
+ for the entire range. mprotect() ensures range is aligned to page size
+ and is a multiple of page size.
+
+- ADI tags can only be set on writable memory. For example, ADI tags can
+ not be set on read-only mappings.
+
+
+
+ADI related traps
+=================
+
+With ADI enabled, following new traps may occur:
+
+Disrupting memory corruption
+----------------------------
+
+ When a store accesses a memory location that has TTE.mcd=1,
+ the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
+ tag in the address used (bits 63:60) does not match the tag set on
+ the corresponding cacheline, a memory corruption trap occurs. By
+ default, it is a disrupting trap and is sent to the hypervisor
+ first. Hypervisor creates a sun4v error report and sends a
+ resumable error (TT=0x7e) trap to the kernel. The kernel sends
+ a SIGSEGV to the task that resulted in this trap with the following
+ info::
+
+ siginfo.si_signo = SIGSEGV;
+ siginfo.errno = 0;
+ siginfo.si_code = SEGV_ADIDERR;
+ siginfo.si_addr = addr; /* PC where first mismatch occurred */
+ siginfo.si_trapno = 0;
+
+
+Precise memory corruption
+-------------------------
+
+ When a store accesses a memory location that has TTE.mcd=1,
+ the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
+ tag in the address used (bits 63:60) does not match the tag set on
+ the corresponding cacheline, a memory corruption trap occurs. If
+ MCD precise exception is enabled (MCDPERR=1), a precise
+ exception is sent to the kernel with TT=0x1a. The kernel sends
+ a SIGSEGV to the task that resulted in this trap with the following
+ info::
+
+ siginfo.si_signo = SIGSEGV;
+ siginfo.errno = 0;
+ siginfo.si_code = SEGV_ADIPERR;
+ siginfo.si_addr = addr; /* address that caused trap */
+ siginfo.si_trapno = 0;
+
+ NOTE:
+ ADI tag mismatch on a load always results in precise trap.
+
+
+MCD disabled
+------------
+
+ When a task has not enabled ADI and attempts to set ADI version
+ on a memory address, processor sends an MCD disabled trap. This
+ trap is handled by hypervisor first and the hypervisor vectors this
+ trap through to the kernel as Data Access Exception trap with
+ fault type set to 0xa (invalid ASI). When this occurs, the kernel
+ sends the task SIGSEGV signal with following info::
+
+ siginfo.si_signo = SIGSEGV;
+ siginfo.errno = 0;
+ siginfo.si_code = SEGV_ACCADI;
+ siginfo.si_addr = addr; /* address that caused trap */
+ siginfo.si_trapno = 0;
+
+
+Sample program to use ADI
+-------------------------
+
+Following sample program is meant to illustrate how to use the ADI
+functionality::
+
+ #include <unistd.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <elf.h>
+ #include <sys/ipc.h>
+ #include <sys/shm.h>
+ #include <sys/mman.h>
+ #include <asm/asi.h>
+
+ #ifndef AT_ADI_BLKSZ
+ #define AT_ADI_BLKSZ 48
+ #endif
+ #ifndef AT_ADI_NBITS
+ #define AT_ADI_NBITS 49
+ #endif
+
+ #ifndef PROT_ADI
+ #define PROT_ADI 0x10
+ #endif
+
+ #define BUFFER_SIZE 32*1024*1024UL
+
+ main(int argc, char* argv[], char* envp[])
+ {
+ unsigned long i, mcde, adi_blksz, adi_nbits;
+ char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr;
+ int shmid, version;
+ Elf64_auxv_t *auxv;
+
+ adi_blksz = 0;
+
+ while(*envp++ != NULL);
+ for (auxv = (Elf64_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) {
+ switch (auxv->a_type) {
+ case AT_ADI_BLKSZ:
+ adi_blksz = auxv->a_un.a_val;
+ break;
+ case AT_ADI_NBITS:
+ adi_nbits = auxv->a_un.a_val;
+ break;
+ }
+ }
+ if (adi_blksz == 0) {
+ fprintf(stderr, "Oops! ADI is not supported\n");
+ exit(1);
+ }
+
+ printf("ADI capabilities:\n");
+ printf("\tBlock size = %ld\n", adi_blksz);
+ printf("\tNumber of bits = %ld\n", adi_nbits);
+
+ if ((shmid = shmget(2, BUFFER_SIZE,
+ IPC_CREAT | SHM_R | SHM_W)) < 0) {
+ perror("shmget failed");
+ exit(1);
+ }
+
+ shmaddr = shmat(shmid, NULL, 0);
+ if (shmaddr == (char *)-1) {
+ perror("shm attach failed");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(1);
+ }
+
+ if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) {
+ perror("mprotect failed");
+ goto err_out;
+ }
+
+ /* Set the ADI version tag on the shm segment
+ */
+ version = 10;
+ tmp_addr = shmaddr;
+ end = shmaddr + BUFFER_SIZE;
+ while (tmp_addr < end) {
+ asm volatile(
+ "stxa %1, [%0]0x90\n\t"
+ :
+ : "r" (tmp_addr), "r" (version));
+ tmp_addr += adi_blksz;
+ }
+ asm volatile("membar #Sync\n\t");
+
+ /* Create a versioned address from the normal address by placing
+ * version tag in the upper adi_nbits bits
+ */
+ tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits);
+ tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits);
+ veraddr = (void *) (((unsigned long)version << (64-adi_nbits))
+ | (unsigned long)tmp_addr);
+
+ printf("Starting the writes:\n");
+ for (i = 0; i < BUFFER_SIZE; i++) {
+ veraddr[i] = (char)(i);
+ if (!(i % (1024 * 1024)))
+ printf(".");
+ }
+ printf("\n");
+
+ printf("Verifying data...");
+ fflush(stdout);
+ for (i = 0; i < BUFFER_SIZE; i++)
+ if (veraddr[i] != (char)i)
+ printf("\nIndex %lu mismatched\n", i);
+ printf("Done.\n");
+
+ /* Disable ADI and clean up
+ */
+ if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) {
+ perror("mprotect failed");
+ goto err_out;
+ }
+
+ if (shmdt((const void *)shmaddr) != 0)
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+
+ exit(0);
+
+ err_out:
+ if (shmdt((const void *)shmaddr) != 0)
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(1);
+ }
diff --git a/Documentation/arch/sparc/console.rst b/Documentation/arch/sparc/console.rst
new file mode 100644
index 000000000000..73132db83ece
--- /dev/null
+++ b/Documentation/arch/sparc/console.rst
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console
+==========================================
+
+On Baremetal:
+ 1. press Esc + 'B'
+
+On LDOM:
+ 1. press Ctrl + ']'
+ 2. telnet> send break
diff --git a/Documentation/arch/sparc/features.rst b/Documentation/arch/sparc/features.rst
new file mode 100644
index 000000000000..96835b6d598a
--- /dev/null
+++ b/Documentation/arch/sparc/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features sparc
diff --git a/Documentation/arch/sparc/index.rst b/Documentation/arch/sparc/index.rst
new file mode 100644
index 000000000000..ae884224eec2
--- /dev/null
+++ b/Documentation/arch/sparc/index.rst
@@ -0,0 +1,13 @@
+==================
+Sparc Architecture
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ console
+ adi
+
+ oradax/oracle-dax
+
+ features
diff --git a/Documentation/sparc/oradax/dax-hv-api.txt b/Documentation/arch/sparc/oradax/dax-hv-api.txt
index 73e8d506cf64..ef1a4c2bf08b 100644
--- a/Documentation/sparc/oradax/dax-hv-api.txt
+++ b/Documentation/arch/sparc/oradax/dax-hv-api.txt
@@ -22,7 +22,7 @@ Chapter 36. Coprocessor services
functionality offered may vary by virtual machine implementation.
The DAX is a virtual device to sun4v guests, with supported data operations indicated by the virtual device
- compatibilty property. Functionality is accessed through the submission of Command Control Blocks
+ compatibility property. Functionality is accessed through the submission of Command Control Blocks
(CCBs) via the ccb_submit API function. The operations are processed asynchronously, with the status
of the submitted operations reported through a Completion Area linked to each CCB. Each CCB has a
separate Completion Area and, unless execution order is specifically restricted through the use of serial-
@@ -41,7 +41,7 @@ Chapter 36. Coprocessor services
submissions until they succeed; waiting for an outstanding CCB to complete is not necessary, and would
not be a guarantee that a future submission would succeed.
- The availablility of DAX coprocessor command service is indicated by the presence of the DAX virtual
+ The availability of DAX coprocessor command service is indicated by the presence of the DAX virtual
device node in the guest MD (Section 8.24.17, “Database Analytics Accelerators (DAX) virtual-device
node”).
@@ -313,7 +313,7 @@ bits set, and terminate at a CCB that has the Conditional bit set, but not the P
Secondary Input Description
Format Code
- 0 Element is stored as value minus 1 (0 evalutes to 1, 1 evalutes
+ 0 Element is stored as value minus 1 (0 evaluates to 1, 1 evaluates
to 2, etc)
1 Element is stored as value
@@ -659,7 +659,7 @@ Offset Size Field Description
“Secondary Input Element Size”
[13:10] Output Format (see Section 36.2.1.1.6, “Output Format”)
[9:5] Operand size for first scan criteria value. In a scan value
- operation, this is one of two potential extact match values.
+ operation, this is one of two potential exact match values.
In a scan range operation, this is the size of the upper range
@@ -673,7 +673,7 @@ Offset Size Field Description
operand, minus 1. Values 0xF-0x1E are reserved. A value of
0x1F indicates this operand is not in use for this scan operation.
[4:0] Operand size for second scan criteria value. In a scan value
- operation, this is one of two potential extact match values.
+ operation, this is one of two potential exact match values.
In a scan range operation, this is the size of the lower range
boundary. The value of this field is the number of bytes in the
operand, minus 1. Values 0xF-0x1E are reserved. A value of
@@ -690,24 +690,24 @@ Offset Size Field Description
48 8 Output (same fields as Primary Input)
56 8 Symbol Table (if used by Primary Input). Same fields as Section 36.2.1.2,
“Extract command”
-64 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+64 4 Next 4 most significant bytes of first scan criteria operand occurring after the
bytes specified at offset 40, if needed by the operand size. If first operand
is less than 8 bytes, the valid bytes are left-aligned to the lowest address.
-68 4 Next 4 most significant bytes of second scan criteria operand occuring after
+68 4 Next 4 most significant bytes of second scan criteria operand occurring after
the bytes specified at offset 44, if needed by the operand size. If second
operand is less than 8 bytes, the valid bytes are left-aligned to the lowest
address.
-72 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+72 4 Next 4 most significant bytes of first scan criteria operand occurring after the
bytes specified at offset 64, if needed by the operand size. If first operand
is less than 12 bytes, the valid bytes are left-aligned to the lowest address.
-76 4 Next 4 most significant bytes of second scan criteria operand occuring after
+76 4 Next 4 most significant bytes of second scan criteria operand occurring after
the bytes specified at offset 68, if needed by the operand size. If second
operand is less than 12 bytes, the valid bytes are left-aligned to the lowest
address.
-80 4 Next 4 most significant bytes of first scan criteria operand occuring after the
+80 4 Next 4 most significant bytes of first scan criteria operand occurring after the
bytes specified at offset 72, if needed by the operand size. If first operand
is less than 16 bytes, the valid bytes are left-aligned to the lowest address.
-84 4 Next 4 most significant bytes of second scan criteria operand occuring after
+84 4 Next 4 most significant bytes of second scan criteria operand occurring after
the bytes specified at offset 76, if needed by the operand size. If second
operand is less than 16 bytes, the valid bytes are left-aligned to the lowest
address.
@@ -721,10 +721,10 @@ Offset Size Field Description
36.2.1.4. Translate commands
- The translate commands takes an input array of indicies, and a table of single bit values indexed by those
- indicies, and outputs a bit vector or index array created by reading the tables bit value at each index in
+ The translate commands takes an input array of indices, and a table of single bit values indexed by those
+ indices, and outputs a bit vector or index array created by reading the tables bit value at each index in
the input array. The output should therefore contain exactly one bit per index in the input data stream,
- when outputing as a bit vector. When outputing as an index array, the number of elements depends on the
+ when outputting as a bit vector. When outputting as an index array, the number of elements depends on the
values read in the bit table, but will always be less than, or equal to, the number of input elements. Only
a restricted subset of the possible input format types are supported. No variable width or Huffman/OZIP
encoded input streams are allowed. The primary input data element size must be 3 bytes or less.
@@ -742,7 +742,7 @@ Offset Size Field Description
code in the CCB header.
There are two supported formats for the output stream: the bit vector and index array formats (codes 0x8,
- 0xD, and 0xE). The index array format is an array of indicies of bits which would have been set if the
+ 0xD, and 0xE). The index array format is an array of indices of bits which would have been set if the
output format was a bit array.
The return value of the CCB completion area contains the number of bits set in the output bit vector,
@@ -1254,7 +1254,7 @@ EUNAVAILABLE The requested CCB operation could not be performed at this time.
submitted CCB, or may apply to a larger scope. The status should not be
interpreted as permanent, and the guest should attempt to submit CCBs in
the future which had previously been unable to be performed. The status
- data provides additional information about scope of the retricted availability
+ data provides additional information about scope of the restricted availability
as follows:
Value Description
0 Processing for the exact CCB instance submitted was unavailable,
@@ -1330,20 +1330,20 @@ EUNAVAILABLE The requested CCB operation could not be performed at this time.
of other CCBs ahead of the requested CCB, to provide a relative estimate of when the CCB may execute.
The dax return value is only valid when the state is ENQUEUED. The value returned is the DAX unit
- instance indentifier for the DAX unit processing the queue where the requested CCB is located. The value
+ instance identifier for the DAX unit processing the queue where the requested CCB is located. The value
matches the value that would have been, or was, returned by ccb_submit using the queue info flag.
The queue return value is only valid when the state is ENQUEUED. The value returned is the DAX
- queue instance indentifier for the DAX unit processing the queue where the requested CCB is located. The
+ queue instance identifier for the DAX unit processing the queue where the requested CCB is located. The
value matches the value that would have been, or was, returned by ccb_submit using the queue info flag.
36.3.2.1. Errors
- EOK The request was proccessed and the CCB state is valid.
+ EOK The request was processed and the CCB state is valid.
EBADALIGN address is not on a 64-byte aligned.
ENORADDR The real address provided for address is not valid.
EINVAL The CCB completion area contents are not valid.
- EWOULDBLOCK Internal resource contraints prevented the CCB state from being queried at this
+ EWOULDBLOCK Internal resource constraints prevented the CCB state from being queried at this
time. The guest should retry the request.
ENOACCESS The guest does not have permission to access the coprocessor virtual device
functionality.
@@ -1401,11 +1401,11 @@ EUNAVAILABLE The requested CCB operation could not be performed at this time.
36.3.3.2. Errors
- EOK The request was proccessed and the result is valid.
+ EOK The request was processed and the result is valid.
EBADALIGN address is not on a 64-byte aligned.
ENORADDR The real address provided for address is not valid.
EINVAL The CCB completion area contents are not valid.
- EWOULDBLOCK Internal resource contraints prevented the CCB from being killed at this time.
+ EWOULDBLOCK Internal resource constraints prevented the CCB from being killed at this time.
The guest should retry the request.
ENOACCESS The guest does not have permission to access the coprocessor virtual device
functionality.
@@ -1423,7 +1423,7 @@ EUNAVAILABLE The requested CCB operation could not be performed at this time.
36.3.4.1. Errors
- EOK The request was proccessed and the number of enabled/disabled DAX units
+ EOK The request was processed and the number of enabled/disabled DAX units
are valid.
diff --git a/Documentation/arch/sparc/oradax/oracle-dax.rst b/Documentation/arch/sparc/oradax/oracle-dax.rst
new file mode 100644
index 000000000000..d1e14d572918
--- /dev/null
+++ b/Documentation/arch/sparc/oradax/oracle-dax.rst
@@ -0,0 +1,445 @@
+=======================================
+Oracle Data Analytics Accelerator (DAX)
+=======================================
+
+DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
+(DAX2) processor chips, and has direct access to the CPU's L3 caches
+as well as physical memory. It can perform several operations on data
+streams with various input and output formats. A driver provides a
+transport mechanism and has limited knowledge of the various opcodes
+and data formats. A user space library provides high level services
+and translates these into low level commands which are then passed
+into the driver and subsequently the Hypervisor and the coprocessor.
+The library is the recommended way for applications to use the
+coprocessor, and the driver interface is not intended for general use.
+This document describes the general flow of the driver, its
+structures, and its programmatic interface. It also provides example
+code sufficient to write user or kernel applications that use DAX
+functionality.
+
+The user library is open source and available at:
+
+ https://oss.oracle.com/git/gitweb.cgi?p=libdax.git
+
+The Hypervisor interface to the coprocessor is described in detail in
+the accompanying document, dax-hv-api.txt, which is a plain text
+excerpt of the (Oracle internal) "UltraSPARC Virtual Machine
+Specification" version 3.0.20+15, dated 2017-09-25.
+
+
+High Level Overview
+===================
+
+A coprocessor request is described by a Command Control Block
+(CCB). The CCB contains an opcode and various parameters. The opcode
+specifies what operation is to be done, and the parameters specify
+options, flags, sizes, and addresses. The CCB (or an array of CCBs)
+is passed to the Hypervisor, which handles queueing and scheduling of
+requests to the available coprocessor execution units. A status code
+returned indicates if the request was submitted successfully or if
+there was an error. One of the addresses given in each CCB is a
+pointer to a "completion area", which is a 128 byte memory block that
+is written by the coprocessor to provide execution status. No
+interrupt is generated upon completion; the completion area must be
+polled by software to find out when a transaction has finished, but
+the M7 and later processors provide a mechanism to pause the virtual
+processor until the completion status has been updated by the
+coprocessor. This is done using the monitored load and mwait
+instructions, which are described in more detail later. The DAX
+coprocessor was designed so that after a request is submitted, the
+kernel is no longer involved in the processing of it. The polling is
+done at the user level, which results in almost zero latency between
+completion of a request and resumption of execution of the requesting
+thread.
+
+
+Addressing Memory
+=================
+
+The kernel does not have access to physical memory in the Sun4v
+architecture, as there is an additional level of memory virtualization
+present. This intermediate level is called "real" memory, and the
+kernel treats this as if it were physical. The Hypervisor handles the
+translations between real memory and physical so that each logical
+domain (LDOM) can have a partition of physical memory that is isolated
+from that of other LDOMs. When the kernel sets up a virtual mapping,
+it specifies a virtual address and the real address to which it should
+be mapped.
+
+The DAX coprocessor can only operate on physical memory, so before a
+request can be fed to the coprocessor, all the addresses in a CCB must
+be converted into physical addresses. The kernel cannot do this since
+it has no visibility into physical addresses. So a CCB may contain
+either the virtual or real addresses of the buffers or a combination
+of them. An "address type" field is available for each address that
+may be given in the CCB. In all cases, the Hypervisor will translate
+all the addresses to physical before dispatching to hardware. Address
+translations are performed using the context of the process initiating
+the request.
+
+
+The Driver API
+==============
+
+An application makes requests to the driver via the write() system
+call, and gets results (if any) via read(). The completion areas are
+made accessible via mmap(), and are read-only for the application.
+
+The request may either be an immediate command or an array of CCBs to
+be submitted to the hardware.
+
+Each open instance of the device is exclusive to the thread that
+opened it, and must be used by that thread for all subsequent
+operations. The driver open function creates a new context for the
+thread and initializes it for use. This context contains pointers and
+values used internally by the driver to keep track of submitted
+requests. The completion area buffer is also allocated, and this is
+large enough to contain the completion areas for many concurrent
+requests. When the device is closed, any outstanding transactions are
+flushed and the context is cleaned up.
+
+On a DAX1 system (M7), the device will be called "oradax1", while on a
+DAX2 system (M8) it will be "oradax2". If an application requires one
+or the other, it should simply attempt to open the appropriate
+device. Only one of the devices will exist on any given system, so the
+name can be used to determine what the platform supports.
+
+The immediate commands are CCB_DEQUEUE, CCB_KILL, and CCB_INFO. For
+all of these, success is indicated by a return value from write()
+equal to the number of bytes given in the call. Otherwise -1 is
+returned and errno is set.
+
+CCB_DEQUEUE
+-----------
+
+Tells the driver to clean up resources associated with past
+requests. Since no interrupt is generated upon the completion of a
+request, the driver must be told when it may reclaim resources. No
+further status information is returned, so the user should not
+subsequently call read().
+
+CCB_KILL
+--------
+
+Kills a CCB during execution. The CCB is guaranteed to not continue
+executing once this call returns successfully. On success, read() must
+be called to retrieve the result of the action.
+
+CCB_INFO
+--------
+
+Retrieves information about a currently executing CCB. Note that some
+Hypervisors might return 'notfound' when the CCB is in 'inprogress'
+state. To ensure a CCB in the 'notfound' state will never be executed,
+CCB_KILL must be invoked on that CCB. Upon success, read() must be
+called to retrieve the details of the action.
+
+Submission of an array of CCBs for execution
+---------------------------------------------
+
+A write() whose length is a multiple of the CCB size is treated as a
+submit operation. The file offset is treated as the index of the
+completion area to use, and may be set via lseek() or using the
+pwrite() system call. If -1 is returned then errno is set to indicate
+the error. Otherwise, the return value is the length of the array that
+was actually accepted by the coprocessor. If the accepted length is
+equal to the requested length, then the submission was completely
+successful and there is no further status needed; hence, the user
+should not subsequently call read(). Partial acceptance of the CCB
+array is indicated by a return value less than the requested length,
+and read() must be called to retrieve further status information. The
+status will reflect the error caused by the first CCB that was not
+accepted, and status_data will provide additional data in some cases.
+
+MMAP
+----
+
+The mmap() function provides access to the completion area allocated
+in the driver. Note that the completion area is not writeable by the
+user process, and the mmap call must not specify PROT_WRITE.
+
+
+Completion of a Request
+=======================
+
+The first byte in each completion area is the command status which is
+updated by the coprocessor hardware. Software may take advantage of
+new M7/M8 processor capabilities to efficiently poll this status byte.
+First, a "monitored load" is achieved via a Load from Alternate Space
+(ldxa, lduba, etc.) with ASI 0x84 (ASI_MONITOR_PRIMARY). Second, a
+"monitored wait" is achieved via the mwait instruction (a write to
+%asr28). This instruction is like pause in that it suspends execution
+of the virtual processor for the given number of nanoseconds, but in
+addition will terminate early when one of several events occur. If the
+block of data containing the monitored location is modified, then the
+mwait terminates. This causes software to resume execution immediately
+(without a context switch or kernel to user transition) after a
+transaction completes. Thus the latency between transaction completion
+and resumption of execution may be just a few nanoseconds.
+
+
+Application Life Cycle of a DAX Submission
+==========================================
+
+ - open dax device
+ - call mmap() to get the completion area address
+ - allocate a CCB and fill in the opcode, flags, parameters, addresses, etc.
+ - submit CCB via write() or pwrite()
+ - go into a loop executing monitored load + monitored wait and
+ terminate when the command status indicates the request is complete
+ (CCB_KILL or CCB_INFO may be used any time as necessary)
+ - perform a CCB_DEQUEUE
+ - call munmap() for completion area
+ - close the dax device
+
+
+Memory Constraints
+==================
+
+The DAX hardware operates only on physical addresses. Therefore, it is
+not aware of virtual memory mappings and the discontiguities that may
+exist in the physical memory that a virtual buffer maps to. There is
+no I/O TLB or any scatter/gather mechanism. All buffers, whether input
+or output, must reside in a physically contiguous region of memory.
+
+The Hypervisor translates all addresses within a CCB to physical
+before handing off the CCB to DAX. The Hypervisor determines the
+virtual page size for each virtual address given, and uses this to
+program a size limit for each address. This prevents the coprocessor
+from reading or writing beyond the bound of the virtual page, even
+though it is accessing physical memory directly. A simpler way of
+saying this is that a DAX operation will never "cross" a virtual page
+boundary. If an 8k virtual page is used, then the data is strictly
+limited to 8k. If a user's buffer is larger than 8k, then a larger
+page size must be used, or the transaction size will be truncated to
+8k.
+
+Huge pages. A user may allocate huge pages using standard interfaces.
+Memory buffers residing on huge pages may be used to achieve much
+larger DAX transaction sizes, but the rules must still be followed,
+and no transaction will cross a page boundary, even a huge page. A
+major caveat is that Linux on Sparc presents 8Mb as one of the huge
+page sizes. Sparc does not actually provide a 8Mb hardware page size,
+and this size is synthesized by pasting together two 4Mb pages. The
+reasons for this are historical, and it creates an issue because only
+half of this 8Mb page can actually be used for any given buffer in a
+DAX request, and it must be either the first half or the second half;
+it cannot be a 4Mb chunk in the middle, since that crosses a
+(hardware) page boundary. Note that this entire issue may be hidden by
+higher level libraries.
+
+
+CCB Structure
+-------------
+A CCB is an array of 8 64-bit words. Several of these words provide
+command opcodes, parameters, flags, etc., and the rest are addresses
+for the completion area, output buffer, and various inputs::
+
+ struct ccb {
+ u64 control;
+ u64 completion;
+ u64 input0;
+ u64 access;
+ u64 input1;
+ u64 op_data;
+ u64 output;
+ u64 table;
+ };
+
+See libdax/common/sys/dax1/dax1_ccb.h for a detailed description of
+each of these fields, and see dax-hv-api.txt for a complete description
+of the Hypervisor API available to the guest OS (ie, Linux kernel).
+
+The first word (control) is examined by the driver for the following:
+ - CCB version, which must be consistent with hardware version
+ - Opcode, which must be one of the documented allowable commands
+ - Address types, which must be set to "virtual" for all the addresses
+ given by the user, thereby ensuring that the application can
+ only access memory that it owns
+
+
+Example Code
+============
+
+The DAX is accessible to both user and kernel code. The kernel code
+can make hypercalls directly while the user code must use wrappers
+provided by the driver. The setup of the CCB is nearly identical for
+both; the only difference is in preparation of the completion area. An
+example of user code is given now, with kernel code afterwards.
+
+In order to program using the driver API, the file
+arch/sparc/include/uapi/asm/oradax.h must be included.
+
+First, the proper device must be opened. For M7 it will be
+/dev/oradax1 and for M8 it will be /dev/oradax2. The simplest
+procedure is to attempt to open both, as only one will succeed::
+
+ fd = open("/dev/oradax1", O_RDWR);
+ if (fd < 0)
+ fd = open("/dev/oradax2", O_RDWR);
+ if (fd < 0)
+ /* No DAX found */
+
+Next, the completion area must be mapped::
+
+ completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0);
+
+All input and output buffers must be fully contained in one hardware
+page, since as explained above, the DAX is strictly constrained by
+virtual page boundaries. In addition, the output buffer must be
+64-byte aligned and its size must be a multiple of 64 bytes because
+the coprocessor writes in units of cache lines.
+
+This example demonstrates the DAX Scan command, which takes as input a
+vector and a match value, and produces a bitmap as the output. For
+each input element that matches the value, the corresponding bit is
+set in the output.
+
+In this example, the input vector consists of a series of single bits,
+and the match value is 0. So each 0 bit in the input will produce a 1
+in the output, and vice versa, which produces an output bitmap which
+is the input bitmap inverted.
+
+For details of all the parameters and bits used in this CCB, please
+refer to section 36.2.1.3 of the DAX Hypervisor API document, which
+describes the Scan command in detail::
+
+ ccb->control = /* Table 36.1, CCB Header Format */
+ (2L << 48) /* command = Scan Value */
+ | (3L << 40) /* output address type = primary virtual */
+ | (3L << 34) /* primary input address type = primary virtual */
+ /* Section 36.2.1, Query CCB Command Formats */
+ | (1 << 28) /* 36.2.1.1.1 primary input format = fixed width bit packed */
+ | (0 << 23) /* 36.2.1.1.2 primary input element size = 0 (1 bit) */
+ | (8 << 10) /* 36.2.1.1.6 output format = bit vector */
+ | (0 << 5) /* 36.2.1.3 First scan criteria size = 0 (1 byte) */
+ | (31 << 0); /* 36.2.1.3 Disable second scan criteria */
+
+ ccb->completion = 0; /* Completion area address, to be filled in by driver */
+
+ ccb->input0 = (unsigned long) input; /* primary input address */
+
+ ccb->access = /* Section 36.2.1.2, Data Access Control */
+ (2 << 24) /* Primary input length format = bits */
+ | (nbits - 1); /* number of bits in primary input stream, minus 1 */
+
+ ccb->input1 = 0; /* secondary input address, unused */
+
+ ccb->op_data = 0; /* scan criteria (value to be matched) */
+
+ ccb->output = (unsigned long) output; /* output address */
+
+ ccb->table = 0; /* table address, unused */
+
+The CCB submission is a write() or pwrite() system call to the
+driver. If the call fails, then a read() must be used to retrieve the
+status::
+
+ if (pwrite(fd, ccb, 64, 0) != 64) {
+ struct ccb_exec_result status;
+ read(fd, &status, sizeof(status));
+ /* bail out */
+ }
+
+After a successful submission of the CCB, the completion area may be
+polled to determine when the DAX is finished. Detailed information on
+the contents of the completion area can be found in section 36.2.2 of
+the DAX HV API document::
+
+ while (1) {
+ /* Monitored Load */
+ __asm__ __volatile__("lduba [%1] 0x84, %0\n"
+ : "=r" (status)
+ : "r" (completion_area));
+
+ if (status) /* 0 indicates command in progress */
+ break;
+
+ /* MWAIT */
+ __asm__ __volatile__("wr %%g0, 1000, %%asr28\n" ::); /* 1000 ns */
+ }
+
+A completion area status of 1 indicates successful completion of the
+CCB and validity of the output bitmap, which may be used immediately.
+All other non-zero values indicate error conditions which are
+described in section 36.2.2::
+
+ if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
+ /* completion_area[0] contains the completion status */
+ /* completion_area[1] contains an error code, see 36.2.2 */
+ }
+
+After the completion area has been processed, the driver must be
+notified that it can release any resources associated with the
+request. This is done via the dequeue operation::
+
+ struct dax_command cmd;
+ cmd.command = CCB_DEQUEUE;
+ if (write(fd, &cmd, sizeof(cmd)) != sizeof(cmd)) {
+ /* bail out */
+ }
+
+Finally, normal program cleanup should be done, i.e., unmapping
+completion area, closing the dax device, freeing memory etc.
+
+Kernel example
+--------------
+
+The only difference in using the DAX in kernel code is the treatment
+of the completion area. Unlike user applications which mmap the
+completion area allocated by the driver, kernel code must allocate its
+own memory to use for the completion area, and this address and its
+type must be given in the CCB::
+
+ ccb->control |= /* Table 36.1, CCB Header Format */
+ (3L << 32); /* completion area address type = primary virtual */
+
+ ccb->completion = (unsigned long) completion_area; /* Completion area address */
+
+The dax submit hypercall is made directly. The flags used in the
+ccb_submit call are documented in the DAX HV API in section 36.3.1/
+
+::
+
+ #include <asm/hypervisor.h>
+
+ hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64,
+ HV_CCB_QUERY_CMD |
+ HV_CCB_ARG0_PRIVILEGED | HV_CCB_ARG0_TYPE_PRIMARY |
+ HV_CCB_VA_PRIVILEGED,
+ 0, &bytes_accepted, &status_data);
+
+ if (hv_rv != HV_EOK) {
+ /* hv_rv is an error code, status_data contains */
+ /* potential additional status, see 36.3.1.1 */
+ }
+
+After the submission, the completion area polling code is identical to
+that in user land::
+
+ while (1) {
+ /* Monitored Load */
+ __asm__ __volatile__("lduba [%1] 0x84, %0\n"
+ : "=r" (status)
+ : "r" (completion_area));
+
+ if (status) /* 0 indicates command in progress */
+ break;
+
+ /* MWAIT */
+ __asm__ __volatile__("wr %%g0, 1000, %%asr28\n" ::); /* 1000 ns */
+ }
+
+ if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
+ /* completion_area[0] contains the completion status */
+ /* completion_area[1] contains an error code, see 36.2.2 */
+ }
+
+The output bitmap is ready for consumption immediately after the
+completion status indicates success.
+
+Excer[t from UltraSPARC Virtual Machine Specification
+=====================================================
+
+ .. include:: dax-hv-api.txt
+ :literal:
diff --git a/Documentation/arch/x86/amd-debugging.rst b/Documentation/arch/x86/amd-debugging.rst
new file mode 100644
index 000000000000..d92bf59d62c7
--- /dev/null
+++ b/Documentation/arch/x86/amd-debugging.rst
@@ -0,0 +1,368 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Debugging AMD Zen systems
++++++++++++++++++++++++++
+
+Introduction
+============
+
+This document describes techniques that are useful for debugging issues with
+AMD Zen systems. It is intended for use by developers and technical users
+to help identify and resolve issues.
+
+S3 vs s2idle
+============
+
+On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
+and suspend-to-idle (s2idle). To confirm which mode your system supports you
+can look at ``cat /sys/power/mem_sleep``. If it shows ``s2idle [deep]`` then
+*S3* is supported. If it shows ``[s2idle]`` then *s2idle* is
+supported.
+
+On systems that support *S3*, the firmware will be utilized to put all hardware into
+the appropriate low power state.
+
+On systems that support *s2idle*, the kernel will be responsible for transitioning devices
+into the appropriate low power state. When all devices are in the appropriate low
+power state, the hardware will transition into a hardware sleep state.
+
+After a suspend cycle you can tell how much time was spent in a hardware sleep
+state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
+
+This flowchart explains how the AMD s2idle suspend flow works.
+
+.. kernel-figure:: suspend.svg
+
+This flowchart explains how the amd s2idle resume flow works.
+
+.. kernel-figure:: resume.svg
+
+s2idle debugging tool
+=====================
+
+As there are a lot of places that problems can occur, a debugging tool has been
+created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+that can help test for common problems and offer suggestions.
+
+If you have an s2idle issue, it's best to start with this and follow instructions
+from its findings. If you continue to have an issue, raise a bug with the
+report generated from this script to
+`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
+
+Spurious s2idle wakeups from an IRQ
+===================================
+
+Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
+This can be matched to ``/proc/interrupts`` to determine what device woke the system.
+
+If this isn't enough to debug the problem, then the following sysfs files
+can be set to add more verbosity to the wakeup process: ::
+
+ # echo 1 | sudo tee /sys/power/pm_debug_messages
+ # echo 1 | sudo tee /sys/power/pm_print_times
+
+After making those changes, the kernel will display messages that can
+be traced back to kernel s2idle loop code as well as display any active
+GPIO sources while waking up.
+
+If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
+needed. These commands can enable additional trace data: ::
+
+ # echo enable | sudo tee /sys/module/acpi/parameters/trace_state
+ # echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
+ # echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
+ # echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
+
+Spurious s2idle wakeups from a GPIO
+===================================
+
+If a GPIO is active when waking up the system ideally you would look at the
+schematic to determine what device it is associated with. If the schematic
+is not available, another tactic is to look at the ACPI _EVT() entry
+to determine what device is notified when that GPIO is active.
+
+For a hypothetical example, say that GPIO 59 woke up the system. You can
+look at the SSDT to determine what device is notified when GPIO 59 is active.
+
+First convert the GPIO number into hex. ::
+
+ $ python3 -c "print(hex(59))"
+ 0x3b
+
+Next determine which ACPI table has the ``_EVT`` entry. For example: ::
+
+ $ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
+ grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
+
+Decode this table::
+
+ $ sudo cp /sys/firmware/acpi/tables/SSDT27 .
+ $ sudo iasl -d SSDT27
+
+Then look at the table and find the matching entry for GPIO 0x3b. ::
+
+ Case (0x3B)
+ {
+ M000 (0x393B)
+ M460 (" Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
+ Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
+ }
+
+You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
+when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
+step further you can figure out which XHCI controller it is by matching it to
+ACPI.::
+
+ $ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
+ /sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
+ /sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
+ /sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
+ /sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
+ /sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
+ /sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
+ /sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
+
+Here you can see it matches to ``device:2d``. Look at the ``physical_node``
+to determine what PCI device that actually is. ::
+
+ $ ls -l /sys/bus/acpi/devices/device:2d/physical_node
+ lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
+
+So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
+
+The ``amd_s2idle.py`` script will capture most of these artifacts for you.
+
+s2idle PM debug messages
+========================
+
+During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
+to check all uPEP constraints. Failing uPEP constraints does not prevent
+s0i3 entry. This means that if some constraints are not met, it is possible
+the kernel may attempt to enter s2idle even if there are some known issues.
+
+To activate PM debugging, either specify ``pm_debug_messagess`` kernel
+command-line option at boot or write to ``/sys/power/pm_debug_messages``.
+Unmet constraints will be displayed in the kernel log and can be
+viewed by logging tools that process kernel ring buffer like ``dmesg`` or
+``journalctl``."
+
+If the system freezes on entry/exit before these messages are flushed, a
+useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
+notification to the platform to start s0i3 entry. This will stop the
+system from freezing on entry or exit and let you view all the failed
+constraints. ::
+
+ cd /sys/bus/platform/drivers/amd_pmc
+ ls | grep AMD | sudo tee unbind
+
+After doing this, run the suspend cycle and look specifically for errors around: ::
+
+ ACPI: LPI: Constraint not met; min power state:%s current power state:%s
+
+Historical examples of s2idle issues
+====================================
+
+To help understand the types of issues that can occur and how to debug them,
+here are some historical examples of s2idle issues that have been resolved.
+
+Core offlining
+--------------
+An end user had reported that taking a core offline would prevent the system
+from properly entering s0i3. This was debugged using internal AMD tools
+to capture and display a stream of metrics from the hardware showing what changed
+when a core was offlined. It was determined that the hardware didn't get
+notification the offline cores were in the deepest state, and so it prevented
+CPU from going into the deepest state. The issue was debugged to a missing
+command to put cores into C3 upon offline.
+
+`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
+
+Corruption after resume
+-----------------------
+A big problem that occurred with Rembrandt was that there was graphical
+corruption after resume. This happened because of a misalignment of PSP
+and driver responsibility. The PSP will save and restore DMCUB, but the
+driver assumed it needed to reset DMCUB on resume.
+This actually was a misalignment for earlier silicon as well, but was not
+observed.
+
+`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
+
+Back to Back suspends fail
+--------------------------
+When using a wakeup source that triggers the IRQ to wakeup, a bug in the
+pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
+system going back to sleep properly.
+
+`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
+
+Spurious timer based wakeup after 5 minutes
+-------------------------------------------
+The HPET was being used to program the wakeup source for the system, however
+this was causing a spurious wakeup after 5 minutes. The correct alarm to use
+was the ACPI alarm.
+
+`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
+
+Disk disappears after resume
+----------------------------
+After resuming from s2idle, the NVME disk would disappear. This was due to the
+BIOS not specifying the _DSD StorageD3Enable property. This caused the NVME
+driver not to put the disk into the expected state at suspend and to fail
+on resume.
+
+`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
+
+Spurious IRQ1
+-------------
+A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
+platform firmware bug where IRQ1 is triggered during s0i3 resume.
+
+This was fixed in the platform firmware, but a number of systems didn't
+receive any more platform firmware updates.
+
+`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
+
+Hardware timeout
+----------------
+The hardware performs many actions besides accepting the values from
+amd-pmc driver. As the communication path with the hardware is a mailbox,
+it's possible that it might not respond quickly enough.
+This issue manifested as a failure to suspend: ::
+
+ PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
+ amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
+
+The timing problem was identified by comparing the values of the idle mask.
+
+`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
+
+Failed to reach hardware sleep state with panel on
+--------------------------------------------------
+On some Strix systems certain panels were observed to block the system from
+entering a hardware sleep state if the internal panel was on during the sequence.
+
+Even though the panel got turned off during suspend it exposed a timing problem
+where an interrupt caused the display hardware to wake up and block low power
+state entry.
+
+`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
+
+Runtime power consumption issues
+================================
+
+Runtime power consumption is influenced by many factors, including but not
+limited to the configuration of the PCIe Active State Power Management (ASPM),
+the display brightness, the EPP policy of the CPU, and the power management
+of the devices.
+
+ASPM
+----
+For the best runtime power consumption, ASPM should be programmed as intended
+by the BIOS from the hardware vendor. To accomplish this the Linux kernel
+should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
+sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
+
+Most notably, if L1.2 is not configured properly for any devices, the SoC
+will not be able to enter the deepest idle state.
+
+EPP Policy
+----------
+The ``energy_performance_preference`` sysfs file can be used to set a bias
+of efficiency or performance for a CPU. This has a direct relationship on
+the battery life when more heavily biased towards performance.
+
+
+BIOS debug messages
+===================
+
+Most OEM machines don't have a serial UART for outputting kernel or BIOS
+debug messages. However BIOS debug messages are useful for understanding
+both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
+
+As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
+the infrastructure used for exporting debugging messages is often the same
+as AMD reference BIOS.
+
+Manually Parsing
+----------------
+There is generally an ACPI method ``\M460`` that different paths of the AML
+will call to emit a message to the BIOS serial log. This method takes
+7 arguments, with the first being a string and the rest being optional
+integers::
+
+ Method (M460, 7, Serialized)
+
+Here is an example of a string that BIOS AML may call out using ``\M460``::
+
+ M460 (" OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
+
+Normally when executed, the ``\M460`` method would populate the additional
+arguments into the string. In order to get these messages from the Linux
+kernel a hook has been added into ACPICA that can capture the *arguments*
+sent to ``\M460`` and print them to the kernel ring buffer.
+For example the following message could be emitted into kernel ring buffer::
+
+ extrace-0174 ex_trace_args : " OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
+
+In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
+and then turn on the following ACPICA tracing parameters.
+This can be done either on the kernel command line or at runtime:
+
+* ``acpi.trace_method_name=\M460``
+* ``acpi.trace_state=method``
+
+NOTE: These can be very noisy at bootup. If you turn these parameters on
+the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
+to a larger size such as 17 to avoid losing early boot messages.
+
+Tool assisted Parsing
+---------------------
+As mentioned above, parsing by hand can be tedious, especially with a lot of
+messages. To help with this, a tool has been created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+to help parse the messages.
+
+Random reboot issues
+====================
+
+When a random reboot occurs, the high-level reason for the reboot is stored
+in a register that will persist onto the next boot.
+
+There are 6 classes of reasons for the reboot:
+ * Software induced
+ * Power state transition
+ * Pin induced
+ * Hardware induced
+ * Remote reset
+ * Internal CPU event
+
+.. csv-table::
+ :header: "Bit", "Type", "Reason"
+ :align: left
+
+ "0", "Pin", "thermal pin BP_THERMTRIP_L was tripped"
+ "1", "Pin", "power button was pressed for 4 seconds"
+ "2", "Pin", "shutdown pin was tripped"
+ "4", "Remote", "remote ASF power off command was received"
+ "9", "Internal", "internal CPU thermal limit was tripped"
+ "16", "Pin", "system reset pin BP_SYS_RST_L was tripped"
+ "17", "Software", "software issued PCI reset"
+ "18", "Software", "software wrote 0x4 to reset control register 0xCF9"
+ "19", "Software", "software wrote 0x6 to reset control register 0xCF9"
+ "20", "Software", "software wrote 0xE to reset control register 0xCF9"
+ "21", "ACPI-state", "ACPI power state transition occurred"
+ "22", "Pin", "keyboard reset pin KB_RST_L was tripped"
+ "23", "Internal", "internal CPU shutdown event occurred"
+ "24", "Hardware", "system failed to boot before failed boot timer expired"
+ "25", "Hardware", "hardware watchdog timer expired"
+ "26", "Remote", "remote ASF reset command was received"
+ "27", "Internal", "an uncorrected error caused a data fabric sync flood event"
+ "29", "Internal", "FCH and MP1 failed warm reset handshake"
+ "30", "Internal", "a parity error occurred"
+ "31", "Internal", "a software sync flood event occurred"
+
+This information is read by the kernel at bootup and printed into
+the syslog. When a random reboot occurs this message can be helpful
+to determine the next component to debug.
diff --git a/Documentation/arch/x86/amd-hfi.rst b/Documentation/arch/x86/amd-hfi.rst
new file mode 100644
index 000000000000..bf3d3a1985a2
--- /dev/null
+++ b/Documentation/arch/x86/amd-hfi.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================================
+Hardware Feedback Interface For Hetero Core Scheduling On AMD Platform
+======================================================================
+
+:Copyright: 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+
+:Author: Perry Yuan <perry.yuan@amd.com>
+:Author: Mario Limonciello <mario.limonciello@amd.com>
+
+Overview
+--------
+
+AMD Heterogeneous Core implementations are comprised of more than one
+architectural class and CPUs are comprised of cores of various efficiency and
+power capabilities: performance-oriented *classic cores* and power-efficient
+*dense cores*. As such, power management strategies must be designed to
+accommodate the complexities introduced by incorporating different core types.
+Heterogeneous systems can also extend to more than two architectural classes
+as well. The purpose of the scheduling feedback mechanism is to provide
+information to the operating system scheduler in real time such that the
+scheduler can direct threads to the optimal core.
+
+The goal of AMD's heterogeneous architecture is to attain power benefit by
+sending background threads to the dense cores while sending high priority
+threads to the classic cores. From a performance perspective, sending
+background threads to dense cores can free up power headroom and allow the
+classic cores to optimally service demanding threads. Furthermore, the area
+optimized nature of the dense cores allows for an increasing number of
+physical cores. This improved core density will have positive multithreaded
+performance impact.
+
+AMD Heterogeneous Core Driver
+-----------------------------
+
+The ``amd_hfi`` driver delivers the operating system a performance and energy
+efficiency capability data for each CPU in the system. The scheduler can use
+the ranking data from the HFI driver to make task placement decisions.
+
+Thread Classification and Ranking Table Interaction
+----------------------------------------------------
+
+The thread classification is used to select into a ranking table that
+describes an efficiency and performance ranking for each classification.
+
+Threads are classified during runtime into enumerated classes. The classes
+represent thread performance/power characteristics that may benefit from
+special scheduling behaviors. The below table depicts an example of thread
+classification and a preference where a given thread should be scheduled
+based on its thread class. The real time thread classification is consumed
+by the operating system and is used to inform the scheduler of where the
+thread should be placed.
+
+Thread Classification Example Table
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++----------+----------------+-------------------------------+---------------------+---------+
+| class ID | Classification | Preferred scheduling behavior | Preemption priority | Counter |
++----------+----------------+-------------------------------+---------------------+---------+
+| 0 | Default | Performant | Highest | |
++----------+----------------+-------------------------------+---------------------+---------+
+| 1 | Non-scalable | Efficient | Lowest | PMCx1A1 |
++----------+----------------+-------------------------------+---------------------+---------+
+| 2 | I/O bound | Efficient | Lowest | PMCx044 |
++----------+----------------+-------------------------------+---------------------+---------+
+
+Thread classification is performed by the hardware each time that the thread is switched out.
+Threads that don't meet any hardware specified criteria are classified as "default".
+
+AMD Hardware Feedback Interface
+--------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. A higher
+performance value indicates higher performance capability, and a higher
+efficiency value indicates more efficiency. Energy efficiency and performance
+are reported in separate capabilities in the shared memory based ranking table.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors.
+Power Management firmware is responsible for detecting events that require
+a reordering of the performance and efficiency ranking. Table updates happen
+relatively infrequently and occur on the time scale of seconds or more.
+
+The following events trigger a table update:
+ * Thermal Stress Events
+ * Silent Compute
+ * Extreme Low Battery Scenarios
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication
+that the hardware recommends to the operating system to not schedule any tasks
+on that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The implementation of threads scheduling consists of the following steps:
+
+1. A thread is spawned and scheduled to the ideal core using the default
+ heterogeneous scheduling policy.
+2. The processor profiles thread execution and assigns an enumerated
+ classification ID.
+ This classification is communicated to the OS via logical processor
+ scope MSR.
+3. During the thread context switch out the operating system consumes the
+ workload (WL) classification which resides in a logical processor scope MSR.
+4. The OS triggers the hardware to clear its history by writing to an MSR,
+ after consuming the WL classification and before switching in the new thread.
+5. If due to the classification, ranking table, and processor availability,
+ the thread is not on its ideal processor, the OS will then consider
+ scheduling the thread on its ideal processor (if available).
+
+Ranking Table
+-------------
+The ranking table is a shared memory region that is used to communicate the
+performance and energy efficiency capabilities of each CPU in the system.
+
+The ranking table design includes rankings for each APIC ID in the system and
+rankings both for performance and efficiency for each workload classification.
+
+.. kernel-doc:: drivers/platform/x86/amd/hfi/hfi.c
+ :doc: amd_shmem_info
+
+Ranking Table update
+---------------------------
+The power management firmware issues an platform interrupt after updating the
+ranking table and is ready for the operating system to consume it. CPUs receive
+such interrupt and read new ranking table from shared memory which PCCT table
+has provided, then ``amd_hfi`` driver parses the new table to provide new
+consume data for scheduling decisions.
diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst
new file mode 100644
index 000000000000..bd840df708ea
--- /dev/null
+++ b/Documentation/arch/x86/amd-memory-encryption.rst
@@ -0,0 +1,278 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+AMD Memory Encryption
+=====================
+
+Secure Memory Encryption (SME) and Secure Encrypted Virtualization (SEV) are
+features found on AMD processors.
+
+SME provides the ability to mark individual pages of memory as encrypted using
+the standard x86 page tables. A page that is marked encrypted will be
+automatically decrypted when read from DRAM and encrypted when written to
+DRAM. SME can therefore be used to protect the contents of DRAM from physical
+attacks on the system.
+
+SEV enables running encrypted virtual machines (VMs) in which the code and data
+of the guest VM are secured so that a decrypted version is available only
+within the VM itself. SEV guest VMs have the concept of private and shared
+memory. Private memory is encrypted with the guest-specific key, while shared
+memory may be encrypted with hypervisor key. When SME is enabled, the hypervisor
+key is the same key which is used in SME.
+
+A page is encrypted when a page table entry has the encryption bit set (see
+below on how to determine its position). The encryption bit can also be
+specified in the cr3 register, allowing the PGD table to be encrypted. Each
+successive level of page tables can also be encrypted by setting the encryption
+bit in the page table entry that points to the next table. This allows the full
+page table hierarchy to be encrypted. Note, this means that just because the
+encryption bit is set in cr3, doesn't imply the full hierarchy is encrypted.
+Each page table entry in the hierarchy needs to have the encryption bit set to
+achieve that. So, theoretically, you could have the encryption bit set in cr3
+so that the PGD is encrypted, but not set the encryption bit in the PGD entry
+for a PUD which results in the PUD pointed to by that entry to not be
+encrypted.
+
+When SEV is enabled, instruction pages and guest page tables are always treated
+as private. All the DMA operations inside the guest must be performed on shared
+memory. Since the memory encryption bit is controlled by the guest OS when it
+is operating in 64-bit or 32-bit PAE mode, in all other modes the SEV hardware
+forces the memory encryption bit to 1.
+
+Support for SME and SEV can be determined through the CPUID instruction. The
+CPUID function 0x8000001f reports information related to SME::
+
+ 0x8000001f[eax]:
+ Bit[0] indicates support for SME
+ Bit[1] indicates support for SEV
+ 0x8000001f[ebx]:
+ Bits[5:0] pagetable bit number used to activate memory
+ encryption
+ Bits[11:6] reduction in physical address space, in bits, when
+ memory encryption is enabled (this only affects
+ system physical addresses, not guest physical
+ addresses)
+
+If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to
+determine if SME is enabled and/or to enable memory encryption::
+
+ 0xc0010010:
+ Bit[23] 0 = memory encryption features are disabled
+ 1 = memory encryption features are enabled
+
+If SEV is supported, MSR 0xc0010131 (MSR_AMD64_SEV) can be used to determine if
+SEV is active::
+
+ 0xc0010131:
+ Bit[0] 0 = memory encryption is not active
+ 1 = memory encryption is active
+
+Linux relies on BIOS to set this bit if BIOS has determined that the reduction
+in the physical address space as a result of enabling memory encryption (see
+CPUID information above) will not conflict with the address space resource
+requirements for the system. If this bit is not set upon Linux startup then
+Linux itself will not set it and memory encryption will not be possible.
+
+The state of SME in the Linux kernel can be documented as follows:
+
+ - Supported:
+ The CPU supports SME (determined through CPUID instruction).
+
+ - Enabled:
+ Supported and bit 23 of MSR_AMD64_SYSCFG is set.
+
+ - Active:
+ Supported, Enabled and the Linux kernel is actively applying
+ the encryption bit to page table entries (the SME mask in the
+ kernel is non-zero).
+
+SME can also be enabled and activated in the BIOS. If SME is enabled and
+activated in the BIOS, then all memory accesses will be encrypted and it
+will not be necessary to activate the Linux memory encryption support.
+
+If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG),
+then memory encryption can be enabled by supplying mem_encrypt=on on the
+kernel command line. However, if BIOS does not enable SME, then Linux
+will not be able to activate memory encryption, even if configured to do
+so by default or the mem_encrypt=on command line parameter is specified.
+
+Secure Nested Paging (SNP)
+==========================
+
+SEV-SNP introduces new features (SEV_FEATURES[1:63]) which can be enabled
+by the hypervisor for security enhancements. Some of these features need
+guest side implementation to function correctly. The below table lists the
+expected guest behavior with various possible scenarios of guest/hypervisor
+SNP feature support.
+
++-----------------+---------------+---------------+------------------+
+| Feature Enabled | Guest needs | Guest has | Guest boot |
+| by the HV | implementation| implementation| behaviour |
++=================+===============+===============+==================+
+| No | No | No | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| No | Yes | No | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| No | Yes | Yes | Boot |
+| | | | |
++-----------------+---------------+---------------+------------------+
+| Yes | No | No | Boot with |
+| | | | feature enabled |
++-----------------+---------------+---------------+------------------+
+| Yes | Yes | No | Graceful boot |
+| | | | failure |
++-----------------+---------------+---------------+------------------+
+| Yes | Yes | Yes | Boot with |
+| | | | feature enabled |
++-----------------+---------------+---------------+------------------+
+
+More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
+
+Reverse Map Table (RMP)
+=======================
+
+The RMP is a structure in system memory that is used to ensure a one-to-one
+mapping between system physical addresses and guest physical addresses. Each
+page of memory that is potentially assignable to guests has one entry within
+the RMP.
+
+The RMP table can be either contiguous in memory or a collection of segments
+in memory.
+
+Contiguous RMP
+--------------
+
+Support for this form of the RMP is present when support for SEV-SNP is
+present, which can be determined using the CPUID instruction::
+
+ 0x8000001f[eax]:
+ Bit[4] indicates support for SEV-SNP
+
+The location of the RMP is identified to the hardware through two MSRs::
+
+ 0xc0010132 (RMP_BASE):
+ System physical address of the first byte of the RMP
+
+ 0xc0010133 (RMP_END):
+ System physical address of the last byte of the RMP
+
+Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV
+firmware increases the alignment requirement to require a 1MB alignment.
+
+The RMP consists of a 16KB region used for processor bookkeeping followed
+by the RMP entries, which are 16 bytes in size. The size of the RMP
+determines the range of physical memory that the hypervisor can assign to
+SEV-SNP guests. The RMP covers the system physical address from::
+
+ 0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB.
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR
+values to locate the RMP and determine the size of the RMP. The RMP must
+cover all of system memory in order for Linux to enable SEV-SNP.
+
+Segmented RMP
+-------------
+
+Segmented RMP support is a new way of representing the layout of an RMP.
+Initial RMP support required the RMP table to be contiguous in memory.
+RMP accesses from a NUMA node on which the RMP doesn't reside
+can take longer than accesses from a NUMA node on which the RMP resides.
+Segmented RMP support allows the RMP entries to be located on the same
+node as the memory the RMP is covering, potentially reducing latency
+associated with accessing an RMP entry associated with the memory. Each
+RMP segment covers a specific range of system physical addresses.
+
+Support for this form of the RMP can be determined using the CPUID
+instruction::
+
+ 0x8000001f[eax]:
+ Bit[23] indicates support for segmented RMP
+
+If supported, segmented RMP attributes can be found using the CPUID
+instruction::
+
+ 0x80000025[eax]:
+ Bits[5:0] minimum supported RMP segment size
+ Bits[11:6] maximum supported RMP segment size
+
+ 0x80000025[ebx]:
+ Bits[9:0] number of cacheable RMP segment definitions
+ Bit[10] indicates if the number of cacheable RMP segments
+ is a hard limit
+
+To enable a segmented RMP, a new MSR is available::
+
+ 0xc0010136 (RMP_CFG):
+ Bit[0] indicates if segmented RMP is enabled
+ Bits[13:8] contains the size of memory covered by an RMP
+ segment (expressed as a power of 2)
+
+The RMP segment size defined in the RMP_CFG MSR applies to all segments
+of the RMP. Therefore each RMP segment covers a specific range of system
+physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then
+the RMP segment coverage value is 0x24 => 36, meaning the size of memory
+covered by an RMP segment is 64GB (1 << 36). So the first RMP segment
+covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment
+covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc.
+
+When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping
+area as it does today (16K in size). However, instead of RMP entries
+beginning immediately after the bookkeeping area, there is a 4K RMP
+segment table (RST). Each entry in the RST is 8-bytes in size and represents
+an RMP segment::
+
+ Bits[19:0] mapped size (in GB)
+ The mapped size can be less than the defined segment size.
+ A value of zero, indicates that no RMP exists for the range
+ of system physical addresses associated with this segment.
+ Bits[51:20] segment physical address
+ This address is left shift 20-bits (or just masked when
+ read) to form the physical address of the segment (1MB
+ alignment).
+
+The RST can hold 512 segment entries but can be limited in size to the number
+of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable
+RMP segments is a hard limit (CPUID 0x80000025_EBX[10]).
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the segmented RMP (the bookkeeping area, RST, and all segments), build the RST
+and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR
+values to locate the RMP and determine the size and location of the RMP
+segments. The RMP must cover all of system memory in order for Linux to enable
+SEV-SNP.
+
+More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table",
+docID: 24593.
+
+Secure VM Service Module (SVSM)
+===============================
+
+SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which
+defines four privilege levels at which guest software can run. The most
+privileged level is 0 and numerically higher numbers have lesser privileges.
+More details in the AMD64 APM Vol 2, section "15.35.7 Virtual Machine
+Privilege Levels", docID: 24593.
+
+When using that feature, different services can run at different protection
+levels, apart from the guest OS but still within the secure SNP environment.
+They can provide services to the guest, like a vTPM, for example.
+
+When a guest is not running at VMPL0, it needs to communicate with the software
+running at VMPL0 to perform privileged operations or to interact with secure
+services. An example fur such a privileged operation is PVALIDATE which is
+*required* to be executed at VMPL0.
+
+In this scenario, the software running at VMPL0 is usually called a Secure VM
+Service Module (SVSM). Discovery of an SVSM and the API used to communicate
+with it is documented in "Secure VM Service Module for SEV-SNP Guests", docID:
+58019.
+
+(Latest versions of the above-mentioned documents can be found by using
+a search engine like duckduckgo.com and typing in:
+
+ site:amd.com "Secure VM Service Module for SEV-SNP Guests", docID: 58019
+
+for example.)
diff --git a/Documentation/arch/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst
new file mode 100644
index 000000000000..a094f55c10b0
--- /dev/null
+++ b/Documentation/arch/x86/amd_hsmp.rst
@@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+AMD HSMP interface
+============================================
+
+Newer Fam19h(model 0x00-0x1f, 0x30-0x3f, 0x90-0x9f, 0xa0-0xaf),
+Fam1Ah(model 0x00-0x1f) EPYC server line of processors from AMD support
+system management functionality via HSMP (Host System Management Port).
+
+The Host System Management Port (HSMP) is an interface to provide
+OS-level software with access to system management functions via a
+set of mailbox registers.
+
+More details on the interface can be found in chapter
+"7 Host System Management Port (HSMP)" of the family/model PPR
+Eg: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/55898_B1_pub_0_50.zip
+
+
+HSMP interface is supported on EPYC line of server CPUs and MI300A (APU).
+
+
+HSMP device
+============================================
+
+amd_hsmp driver under drivers/platforms/x86/amd/hsmp/ has separate driver files
+for ACPI object based probing, platform device based probing and for the common
+code for these two drivers.
+
+Kconfig option CONFIG_AMD_HSMP_PLAT compiles plat.c and creates amd_hsmp.ko.
+Kconfig option CONFIG_AMD_HSMP_ACPI compiles acpi.c and creates hsmp_acpi.ko.
+Selecting any of these two configs automatically selects CONFIG_AMD_HSMP. This
+compiles common code hsmp.c and creates hsmp_common.ko module.
+
+Both the ACPI and plat drivers create the miscdevice /dev/hsmp to let
+user space programs run hsmp mailbox commands.
+
+The ACPI object format supported by the driver is defined below.
+
+$ ls -al /dev/hsmp
+crw-r--r-- 1 root root 10, 123 Jan 21 21:41 /dev/hsmp
+
+Characteristics of the dev node:
+ * Write mode is used for running set/configure commands
+ * Read mode is used for running get/status monitor commands
+
+Access restrictions:
+ * Only root user is allowed to open the file in write mode.
+ * The file can be opened in read mode by all the users.
+
+In-kernel integration:
+ * Other subsystems in the kernel can use the exported transport
+ function hsmp_send_message().
+ * Locking across callers is taken care by the driver.
+
+
+HSMP sysfs interface
+====================
+
+1. Metrics table binary sysfs
+
+AMD MI300A MCM provides GET_METRICS_TABLE message to retrieve
+most of the system management information from SMU in one go.
+
+The metrics table is made available as hexadecimal sysfs binary file
+under per socket sysfs directory created at
+/sys/devices/platform/amd_hsmp/socket%d/metrics_bin
+
+Note: lseek() is not supported as entire metrics table is read.
+
+Metrics table definitions will be documented as part of Public PPR.
+The same is defined in the amd_hsmp.h header.
+
+2. HSMP telemetry sysfs files
+
+Following sysfs files are available at /sys/devices/platform/AMDI0097:0X/.
+
+* c0_residency_input: Percentage of cores in C0 state.
+* prochot_status: Reports 1 if the processor is at thermal threshold value,
+ 0 otherwise.
+* smu_fw_version: SMU firmware version.
+* protocol_version: HSMP interface version.
+* ddr_max_bw: Theoretical maximum DDR bandwidth in GB/s.
+* ddr_utilised_bw_input: Current utilized DDR bandwidth in GB/s.
+* ddr_utilised_bw_perc_input(%): Percentage of current utilized DDR bandwidth.
+* mclk_input: Memory clock in MHz.
+* fclk_input: Fabric clock in MHz.
+* clk_fmax: Maximum frequency of socket in MHz.
+* clk_fmin: Minimum frequency of socket in MHz.
+* cclk_freq_limit_input: Core clock frequency limit per socket in MHz.
+* pwr_current_active_freq_limit: Current active frequency limit of socket
+ in MHz.
+* pwr_current_active_freq_limit_source: Source of current active frequency
+ limit.
+
+ACPI device object format
+=========================
+The ACPI object format expected from the amd_hsmp driver
+for socket with ID00 is given below::
+
+ Device(HSMP)
+ {
+ Name(_HID, "AMDI0097")
+ Name(_UID, "ID00")
+ Name(HSE0, 0x00000001)
+ Name(RBF0, ResourceTemplate()
+ {
+ Memory32Fixed(ReadWrite, 0xxxxxxx, 0x00100000)
+ })
+ Method(_CRS, 0, NotSerialized)
+ {
+ Return(RBF0)
+ }
+ Method(_STA, 0, NotSerialized)
+ {
+ If(LEqual(HSE0, One))
+ {
+ Return(0x0F)
+ }
+ Else
+ {
+ Return(Zero)
+ }
+ }
+ Name(_DSD, Package(2)
+ {
+ Buffer(0x10)
+ {
+ 0x9D, 0x61, 0x4D, 0xB7, 0x07, 0x57, 0xBD, 0x48,
+ 0xA6, 0x9F, 0x4E, 0xA2, 0x87, 0x1F, 0xC2, 0xF6
+ },
+ Package(3)
+ {
+ Package(2) {"MsgIdOffset", 0x00010934},
+ Package(2) {"MsgRspOffset", 0x00010980},
+ Package(2) {"MsgArgOffset", 0x000109E0}
+ }
+ })
+ }
+
+HSMP HWMON interface
+====================
+HSMP power sensors are registered with the hwmon interface. A separate hwmon
+directory is created for each socket and the following files are generated
+within the hwmon directory.
+- power1_input (read only)
+- power1_cap_max (read only)
+- power1_cap (read, write)
+
+An example
+==========
+
+To access hsmp device from a C program.
+First, you need to include the headers::
+
+ #include <linux/amd_hsmp.h>
+
+Which defines the supported messages/message IDs.
+
+Next thing, open the device file, as follows::
+
+ int file;
+
+ file = open("/dev/hsmp", O_RDWR);
+ if (file < 0) {
+ /* ERROR HANDLING; you can check errno to see what went wrong */
+ exit(1);
+ }
+
+The following IOCTL is defined:
+
+``ioctl(file, HSMP_IOCTL_CMD, struct hsmp_message *msg)``
+ The argument is a pointer to a::
+
+ struct hsmp_message {
+ __u32 msg_id; /* Message ID */
+ __u16 num_args; /* Number of input argument words in message */
+ __u16 response_sz; /* Number of expected output/response words */
+ __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */
+ __u16 sock_ind; /* socket number */
+ };
+
+The ioctl would return a non-zero on failure; you can read errno to see
+what happened. The transaction returns 0 on success.
+
+More details on the interface and message definitions can be found in chapter
+"7 Host System Management Port (HSMP)" of the respective family/model PPR
+eg: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/55898_B1_pub_0_50.zip
+
+User space C-APIs are made available by linking against the esmi library,
+which is provided by the E-SMS project https://www.amd.com/en/developer/e-sms.html.
+See: https://github.com/amd/esmi_ib_library
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
new file mode 100644
index 000000000000..77e6163288db
--- /dev/null
+++ b/Documentation/arch/x86/boot.rst
@@ -0,0 +1,1442 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Linux/x86 Boot Protocol
+===========================
+
+On the x86 platform, the Linux kernel uses a rather complicated boot
+convention. This has evolved partially due to historical aspects, as
+well as the desire in the early days to have the kernel itself be a
+bootable image, the complicated PC memory model and due to changed
+expectations in the PC industry caused by the effective demise of
+real-mode DOS as a mainstream operating system.
+
+Currently, the following versions of the Linux/x86 boot protocol exist.
+
+============= ============================================================
+Old kernels zImage/Image support only. Some very early kernels
+ may not even support a command line.
+
+Protocol 2.00 (Kernel 1.3.73) Added bzImage and initrd support, as
+ well as a formalized way to communicate between the
+ boot loader and the kernel. setup.S made relocatable,
+ although the traditional setup area still assumed
+ writable.
+
+Protocol 2.01 (Kernel 1.3.76) Added a heap overrun warning.
+
+Protocol 2.02 (Kernel 2.4.0-test3-pre3) New command line protocol.
+ Lower the conventional memory ceiling. No overwrite
+ of the traditional setup area, thus making booting
+ safe for systems which use the EBDA from SMM or 32-bit
+ BIOS entry points. zImage deprecated but still
+ supported.
+
+Protocol 2.03 (Kernel 2.4.18-pre1) Explicitly makes the highest possible
+ initrd address available to the bootloader.
+
+Protocol 2.04 (Kernel 2.6.14) Extend the syssize field to four bytes.
+
+Protocol 2.05 (Kernel 2.6.20) Make protected mode kernel relocatable.
+ Introduce relocatable_kernel and kernel_alignment fields.
+
+Protocol 2.06 (Kernel 2.6.22) Added a field that contains the size of
+ the boot command line.
+
+Protocol 2.07 (Kernel 2.6.24) Added paravirtualised boot protocol.
+ Introduced hardware_subarch and hardware_subarch_data
+ and KEEP_SEGMENTS flag in load_flags.
+
+Protocol 2.08 (Kernel 2.6.26) Added crc32 checksum and ELF format
+ payload. Introduced payload_offset and payload_length
+ fields to aid in locating the payload.
+
+Protocol 2.09 (Kernel 2.6.26) Added a field of 64-bit physical
+ pointer to single linked list of struct setup_data.
+
+Protocol 2.10 (Kernel 2.6.31) Added a protocol for relaxed alignment
+ beyond the kernel_alignment added, new init_size and
+ pref_address fields. Added extended boot loader IDs.
+
+Protocol 2.11 (Kernel 3.6) Added a field for offset of EFI handover
+ protocol entry point.
+
+Protocol 2.12 (Kernel 3.8) Added the xloadflags field and extension fields
+ to struct boot_params for loading bzImage and ramdisk
+ above 4G in 64bit.
+
+Protocol 2.13 (Kernel 3.14) Support 32- and 64-bit flags being set in
+ xloadflags to support booting a 64-bit kernel from 32-bit
+ EFI
+
+Protocol 2.14 BURNT BY INCORRECT COMMIT
+ ae7e1238e68f2a472a125673ab506d49158c1889
+ ("x86/boot: Add ACPI RSDP address to setup_header")
+ DO NOT USE!!! ASSUME SAME AS 2.13.
+
+Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
+============= ============================================================
+
+.. note::
+ The protocol version number should be changed only if the setup header
+ is changed. There is no need to update the version number if boot_params
+ or kernel_info are changed. Additionally, it is recommended to use
+ xloadflags (in this case the protocol version number should not be
+ updated either) or kernel_info to communicate supported Linux kernel
+ features to the boot loader. Due to very limited space available in
+ the original setup header every update to it should be considered
+ with great care. Starting from the protocol 2.15 the primary way to
+ communicate things to the boot loader is the kernel_info.
+
+
+Memory Layout
+=============
+
+The traditional memory map for the kernel loader, used for Image or
+zImage kernels, typically looks like::
+
+ | |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+ 09A000 +------------------------+
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
+ 098000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ 090200 +------------------------+
+ | Kernel boot sector | The kernel legacy boot sector.
+ 090000 +------------------------+
+ | Protected-mode kernel | The bulk of the kernel image.
+ 010000 +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
+
+When using bzImage, the protected-mode kernel was relocated to
+0x100000 ("high memory"), and the kernel real-mode block (boot sector,
+setup, and stack/heap) was made relocatable to any address between
+0x10000 and end of low memory. Unfortunately, in protocols 2.00 and
+2.01 the 0x90000+ memory range is still used internally by the kernel;
+the 2.02 protocol resolves that problem.
+
+It is desirable to keep the "memory ceiling" -- the highest point in
+low memory touched by the boot loader -- as low as possible, since
+some newer BIOSes have begun to allocate some rather large amounts of
+memory, called the Extended BIOS Data Area, near the top of low
+memory. The boot loader should use the "INT 12h" BIOS call to verify
+how much low memory is available.
+
+Unfortunately, if INT 12h reports that the amount of memory is too
+low, there is usually nothing the boot loader can do but to report an
+error to the user. The boot loader should therefore be designed to
+take up as little space in low memory as it reasonably can. For
+zImage or old bzImage kernels, which need data written into the
+0x90000 segment, the boot loader should make sure not to use memory
+above the 0x9A000 point; too many BIOSes will break above that point.
+
+For a modern bzImage kernel with boot protocol version >= 2.02, a
+memory layout like the following is suggested::
+
+ ~ ~
+ | Protected-mode kernel |
+ 100000 +------------------------+
+ | I/O memory hole |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
+ X+10000 +------------------------+
+ | Stack/heap | For use by the kernel real-mode code.
+ X+08000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
+ X +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
+
+ ... where the address X is as low as the design of the boot loader permits.
+
+
+The Real-Mode Kernel Header
+===========================
+
+In the following text, and anywhere in the kernel boot sequence, "a
+sector" refers to 512 bytes. It is independent of the actual sector
+size of the underlying medium.
+
+The first step in loading a Linux kernel should be to load the
+real-mode code (boot sector and setup code) and then examine the
+following header at offset 0x01f1. The real-mode code can total up to
+32K, although the boot loader may choose to load only the first two
+sectors (1K) and then examine the bootup sector size.
+
+The header looks like:
+
+=========== ======== ===================== ============================================
+Offset/Size Proto Name Meaning
+=========== ======== ===================== ============================================
+01F1/1 ALL(1) setup_sects The size of the setup in sectors
+01F2/2 ALL root_flags If set, the root is mounted readonly
+01F4/4 2.04+(2) syssize The size of the 32-bit code in 16-byte paras
+01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only
+01FA/2 ALL vid_mode Video mode control
+01FC/2 ALL root_dev Default root device number
+01FE/2 ALL boot_flag 0xAA55 magic number
+0200/2 2.00+ jump Jump instruction
+0202/4 2.00+ header Magic signature "HdrS"
+0206/2 2.00+ version Boot protocol version supported
+0208/4 2.00+ realmode_swtch Boot loader hook (see below)
+020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
+020E/2 2.00+ kernel_version Pointer to kernel version string
+0210/1 2.00+ type_of_loader Boot loader identifier
+0211/1 2.00+ loadflags Boot protocol option flags
+0212/2 2.00+ setup_move_size Move to high memory size (used with hooks)
+0214/4 2.00+ code32_start Boot loader hook (see below)
+0218/4 2.00+ ramdisk_image initrd load address (set by boot loader)
+021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
+0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
+0224/2 2.01+ heap_end_ptr Free memory after setup end
+0226/1 2.02+(3) ext_loader_ver Extended boot loader version
+0227/1 2.02+(3) ext_loader_type Extended boot loader ID
+0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
+022C/4 2.03+ initrd_addr_max Highest legal initrd address
+0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
+0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
+0235/1 2.10+ min_alignment Minimum alignment, as a power of two
+0236/2 2.12+ xloadflags Boot protocol option flags
+0238/4 2.06+ cmdline_size Maximum size of the kernel command line
+023C/4 2.07+ hardware_subarch Hardware subarchitecture
+0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
+0248/4 2.08+ payload_offset Offset of kernel payload
+024C/4 2.08+ payload_length Length of kernel payload
+0250/8 2.09+ setup_data 64-bit physical pointer to linked list
+ of struct setup_data
+0258/8 2.10+ pref_address Preferred loading address
+0260/4 2.10+ init_size Linear memory required during initialization
+0264/4 2.11+ handover_offset Offset of handover entry point
+0268/4 2.15+ kernel_info_offset Offset of the kernel_info
+=========== ======== ===================== ============================================
+
+.. note::
+ (1) For backwards compatibility, if the setup_sects field contains 0,
+ the real value is 4.
+
+ (2) For boot protocol prior to 2.04, the upper two bytes of the syssize
+ field are unusable, which means the size of a bzImage kernel
+ cannot be determined.
+
+ (3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
+If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
+the boot protocol version is "old". Loading an old kernel, the
+following parameters should be assumed::
+
+ Image type = zImage
+ initrd not supported
+ Real-mode kernel must be located at 0x90000.
+
+Otherwise, the "version" field contains the protocol version,
+e.g. protocol version 2.01 will contain 0x0201 in this field. When
+setting fields in the header, you must make sure only to set fields
+supported by the protocol version in use.
+
+
+Details of Header Fields
+========================
+
+For each field, some are information from the kernel to the bootloader
+("read"), some are expected to be filled out by the bootloader
+("write"), and some are expected to be read and modified by the
+bootloader ("modify").
+
+All general purpose boot loaders should write the fields marked
+(obligatory). Boot loaders who want to load the kernel at a
+nonstandard address should fill in the fields marked (reloc); other
+boot loaders can ignore those fields.
+
+The byte order of all fields is little endian (this is x86, after all.)
+
+============ ===========
+Field name: setup_sects
+Type: read
+Offset/size: 0x1f1/1
+Protocol: ALL
+============ ===========
+
+ The size of the setup code in 512-byte sectors. If this field is
+ 0, the real value is 4. The real-mode code consists of the boot
+ sector (always one 512-byte sector) plus the setup code.
+
+============ =================
+Field name: root_flags
+Type: modify (optional)
+Offset/size: 0x1f2/2
+Protocol: ALL
+============ =================
+
+ If this field is nonzero, the root defaults to readonly. The use of
+ this field is deprecated; use the "ro" or "rw" options on the
+ command line instead.
+
+============ ===============================================
+Field name: syssize
+Type: read
+Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL)
+Protocol: 2.04+
+============ ===============================================
+
+ The size of the protected-mode code in units of 16-byte paragraphs.
+ For protocol versions older than 2.04 this field is only two bytes
+ wide, and therefore cannot be trusted for the size of a kernel if
+ the LOAD_HIGH flag is set.
+
+============ ===============
+Field name: ram_size
+Type: kernel internal
+Offset/size: 0x1f8/2
+Protocol: ALL
+============ ===============
+
+ This field is obsolete.
+
+============ ===================
+Field name: vid_mode
+Type: modify (obligatory)
+Offset/size: 0x1fa/2
+============ ===================
+
+ Please see the section on SPECIAL COMMAND LINE OPTIONS.
+
+============ =================
+Field name: root_dev
+Type: modify (optional)
+Offset/size: 0x1fc/2
+Protocol: ALL
+============ =================
+
+ The default root device device number. The use of this field is
+ deprecated, use the "root=" option on the command line instead.
+
+============ =========
+Field name: boot_flag
+Type: read
+Offset/size: 0x1fe/2
+Protocol: ALL
+============ =========
+
+ Contains 0xAA55. This is the closest thing old Linux kernels have
+ to a magic number.
+
+============ =======
+Field name: jump
+Type: read
+Offset/size: 0x200/2
+Protocol: 2.00+
+============ =======
+
+ Contains an x86 jump instruction, 0xEB followed by a signed offset
+ relative to byte 0x202. This can be used to determine the size of
+ the header.
+
+============ =======
+Field name: header
+Type: read
+Offset/size: 0x202/4
+Protocol: 2.00+
+============ =======
+
+ Contains the magic number "HdrS" (0x53726448).
+
+============ =======
+Field name: version
+Type: read
+Offset/size: 0x206/2
+Protocol: 2.00+
+============ =======
+
+ Contains the boot protocol version, in (major << 8) + minor format,
+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
+ 10.17.
+
+============ =================
+Field name: realmode_swtch
+Type: modify (optional)
+Offset/size: 0x208/4
+Protocol: 2.00+
+============ =================
+
+ Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+
+============ =============
+Field name: start_sys_seg
+Type: read
+Offset/size: 0x20c/2
+Protocol: 2.00+
+============ =============
+
+ The load low segment (0x1000). Obsolete.
+
+============ ==============
+Field name: kernel_version
+Type: read
+Offset/size: 0x20e/2
+Protocol: 2.00+
+============ ==============
+
+ If set to a nonzero value, contains a pointer to a NUL-terminated
+ human-readable kernel version number string, less 0x200. This can
+ be used to display the kernel version to the user. This value
+ should be less than (0x200 * setup_sects).
+
+ For example, if this value is set to 0x1c00, the kernel version
+ number string can be found at offset 0x1e00 in the kernel file.
+ This is a valid value if and only if the "setup_sects" field
+ contains the value 15 or higher, as::
+
+ 0x1c00 < 15 * 0x200 (= 0x1e00) but
+ 0x1c00 >= 14 * 0x200 (= 0x1c00)
+
+ 0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
+
+============ ==================
+Field name: type_of_loader
+Type: write (obligatory)
+Offset/size: 0x210/1
+Protocol: 2.00+
+============ ==================
+
+ If your boot loader has an assigned id (see table below), enter
+ 0xTV here, where T is an identifier for the boot loader and V is
+ a version number. Otherwise, enter 0xFF here.
+
+ For boot loader IDs above T = 0xD, write T = 0xE to this field and
+ write the extended ID minus 0x10 to the ext_loader_type field.
+ Similarly, the ext_loader_ver field can be used to provide more than
+ four bits for the bootloader version.
+
+ For example, for T = 0x15, V = 0x234, write::
+
+ type_of_loader <- 0xE4
+ ext_loader_type <- 0x05
+ ext_loader_ver <- 0x23
+
+ Assigned boot loader ids (hexadecimal):
+
+ == =======================================
+ 0 LILO
+ (0x00 reserved for pre-2.00 bootloader)
+ 1 Loadlin
+ 2 bootsect-loader
+ (0x20, all other values reserved)
+ 3 Syslinux
+ 4 Etherboot/gPXE/iPXE
+ 5 ELILO
+ 7 GRUB
+ 8 U-Boot
+ 9 Xen
+ A Gujin
+ B Qemu
+ C Arcturus Networks uCbootloader
+ D kexec-tools
+ E Extended (see ext_loader_type)
+ F Special (0xFF = undefined)
+ 10 Reserved
+ 11 Minimal Linux Bootloader
+ <http://sebastian-plotz.blogspot.de>
+ 12 OVMF UEFI virtualization stack
+ 13 barebox
+ == =======================================
+
+ Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
+
+============ ===================
+Field name: loadflags
+Type: modify (obligatory)
+Offset/size: 0x211/1
+Protocol: 2.00+
+============ ===================
+
+ This field is a bitmask.
+
+ Bit 0 (read): LOADED_HIGH
+
+ - If 0, the protected-mode code is loaded at 0x10000.
+ - If 1, the protected-mode code is loaded at 0x100000.
+
+ Bit 1 (kernel internal): KASLR_FLAG
+
+ - Used internally by the compressed kernel to communicate
+ KASLR status to kernel proper.
+
+ - If 1, KASLR enabled.
+ - If 0, KASLR disabled.
+
+ Bit 5 (write): QUIET_FLAG
+
+ - If 0, print early messages.
+ - If 1, suppress early messages.
+
+ This requests to the kernel (decompressor and early
+ kernel) to not write early messages that require
+ accessing the display hardware directly.
+
+ Bit 6 (obsolete): KEEP_SEGMENTS
+
+ Protocol: 2.07+
+
+ - This flag is obsolete.
+
+ Bit 7 (write): CAN_USE_HEAP
+
+ Set this bit to 1 to indicate that the value entered in the
+ heap_end_ptr is valid. If this field is clear, some setup code
+ functionality will be disabled.
+
+
+============ ===================
+Field name: setup_move_size
+Type: modify (obligatory)
+Offset/size: 0x212/2
+Protocol: 2.00-2.01
+============ ===================
+
+ When using protocol 2.00 or 2.01, if the real mode kernel is not
+ loaded at 0x90000, it gets moved there later in the loading
+ sequence. Fill in this field if you want additional data (such as
+ the kernel command line) moved in addition to the real-mode kernel
+ itself.
+
+ The unit is bytes starting with the beginning of the boot sector.
+
+ This field is can be ignored when the protocol is 2.02 or higher, or
+ if the real-mode code is loaded at 0x90000.
+
+============ ========================
+Field name: code32_start
+Type: modify (optional, reloc)
+Offset/size: 0x214/4
+Protocol: 2.00+
+============ ========================
+
+ The address to jump to in protected mode. This defaults to the load
+ address of the kernel, and can be used by the boot loader to
+ determine the proper load address.
+
+ This field can be modified for two purposes:
+
+ 1. as a boot loader hook (see Advanced Boot Loader Hooks below.)
+
+ 2. if a bootloader which does not install a hook loads a
+ relocatable kernel at a nonstandard address it will have to modify
+ this field to point to the load address.
+
+============ ==================
+Field name: ramdisk_image
+Type: write (obligatory)
+Offset/size: 0x218/4
+Protocol: 2.00+
+============ ==================
+
+ The 32-bit linear address of the initial ramdisk or ramfs. Leave at
+ zero if there is no initial ramdisk/ramfs.
+
+============ ==================
+Field name: ramdisk_size
+Type: write (obligatory)
+Offset/size: 0x21c/4
+Protocol: 2.00+
+============ ==================
+
+ Size of the initial ramdisk or ramfs. Leave at zero if there is no
+ initial ramdisk/ramfs.
+
+============ ===============
+Field name: bootsect_kludge
+Type: kernel internal
+Offset/size: 0x220/4
+Protocol: 2.00+
+============ ===============
+
+ This field is obsolete.
+
+============ ==================
+Field name: heap_end_ptr
+Type: write (obligatory)
+Offset/size: 0x224/2
+Protocol: 2.01+
+============ ==================
+
+ Set this field to the offset (from the beginning of the real-mode
+ code) of the end of the setup stack/heap, minus 0x0200.
+
+============ ================
+Field name: ext_loader_ver
+Type: write (optional)
+Offset/size: 0x226/1
+Protocol: 2.02+
+============ ================
+
+ This field is used as an extension of the version number in the
+ type_of_loader field. The total version number is considered to be
+ (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+ The use of this field is boot loader specific. If not written, it
+ is zero.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+============ =====================================================
+Field name: ext_loader_type
+Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size: 0x227/1
+Protocol: 2.02+
+============ =====================================================
+
+ This field is used as an extension of the type number in
+ type_of_loader field. If the type in type_of_loader is 0xE, then
+ the actual type is (ext_loader_type + 0x10).
+
+ This field is ignored if the type in type_of_loader is not 0xE.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+============ ==================
+Field name: cmd_line_ptr
+Type: write (obligatory)
+Offset/size: 0x228/4
+Protocol: 2.02+
+============ ==================
+
+ Set this field to the linear address of the kernel command line.
+ The kernel command line can be located anywhere between the end of
+ the setup heap and 0xA0000; it does not have to be located in the
+ same 64K segment as the real-mode code itself.
+
+ Fill in this field even if your boot loader does not support a
+ command line, in which case you can point this to an empty string
+ (or better yet, to the string "auto".) If this field is left at
+ zero, the kernel will assume that your boot loader does not support
+ the 2.02+ protocol.
+
+============ ===============
+Field name: initrd_addr_max
+Type: read
+Offset/size: 0x22c/4
+Protocol: 2.03+
+============ ===============
+
+ The maximum address that may be occupied by the initial
+ ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this
+ field is not present, and the maximum address is 0x37FFFFFF. (This
+ address is defined as the address of the highest safe byte, so if
+ your ramdisk is exactly 131072 bytes long and this field is
+ 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
+
+============ ============================
+Field name: kernel_alignment
+Type: read/modify (reloc)
+Offset/size: 0x230/4
+Protocol: 2.05+ (read), 2.10+ (modify)
+============ ============================
+
+ Alignment unit required by the kernel (if relocatable_kernel is
+ true.) A relocatable kernel that is loaded at an alignment
+ incompatible with the value in this field will be realigned during
+ kernel initialization.
+
+ Starting with protocol version 2.10, this reflects the kernel
+ alignment preferred for optimal performance; it is possible for the
+ loader to modify this field to permit a lesser alignment. See the
+ min_alignment and pref_address field below.
+
+============ ==================
+Field name: relocatable_kernel
+Type: read (reloc)
+Offset/size: 0x234/1
+Protocol: 2.05+
+============ ==================
+
+ If this field is nonzero, the protected-mode part of the kernel can
+ be loaded at any address that satisfies the kernel_alignment field.
+ After loading, the boot loader must set the code32_start field to
+ point to the loaded code, or to a boot loader hook.
+
+============ =============
+Field name: min_alignment
+Type: read (reloc)
+Offset/size: 0x235/1
+Protocol: 2.10+
+============ =============
+
+ This field, if nonzero, indicates as a power of two the minimum
+ alignment required, as opposed to preferred, by the kernel to boot.
+ If a boot loader makes use of this field, it should update the
+ kernel_alignment field with the alignment unit desired; typically::
+
+ kernel_alignment = 1 << min_alignment;
+
+ There may be a considerable performance cost with an excessively
+ misaligned kernel. Therefore, a loader should typically try each
+ power-of-two alignment from kernel_alignment down to this alignment.
+
+============ ==========
+Field name: xloadflags
+Type: read
+Offset/size: 0x236/2
+Protocol: 2.12+
+============ ==========
+
+ This field is a bitmask.
+
+ Bit 0 (read): XLF_KERNEL_64
+
+ - If 1, this kernel has the legacy 64-bit entry point at 0x200.
+
+ Bit 1 (read): XLF_CAN_BE_LOADED_ABOVE_4G
+
+ - If 1, kernel/boot_params/cmdline/ramdisk can be above 4G.
+
+ Bit 2 (read): XLF_EFI_HANDOVER_32
+
+ - If 1, the kernel supports the 32-bit EFI handoff entry point
+ given at handover_offset.
+
+ Bit 3 (read): XLF_EFI_HANDOVER_64
+
+ - If 1, the kernel supports the 64-bit EFI handoff entry point
+ given at handover_offset + 0x200.
+
+ Bit 4 (read): XLF_EFI_KEXEC
+
+ - If 1, the kernel supports kexec EFI boot with EFI runtime support.
+
+
+============ ============
+Field name: cmdline_size
+Type: read
+Offset/size: 0x238/4
+Protocol: 2.06+
+============ ============
+
+ The maximum size of the command line without the terminating
+ zero. This means that the command line can contain at most
+ cmdline_size characters. With protocol version 2.05 and earlier, the
+ maximum size was 255.
+
+============ ====================================
+Field name: hardware_subarch
+Type: write (optional, defaults to x86/PC)
+Offset/size: 0x23c/4
+Protocol: 2.07+
+============ ====================================
+
+ In a paravirtualized environment the hardware low level architectural
+ pieces such as interrupt handling, page table handling, and
+ accessing process control registers needs to be done differently.
+
+ This field allows the bootloader to inform the kernel we are in one
+ one of those environments.
+
+ ========== ==============================
+ 0x00000000 The default x86/PC environment
+ 0x00000001 lguest
+ 0x00000002 Xen
+ 0x00000003 Intel MID (Moorestown, CloverTrail, Merrifield, Moorefield)
+ 0x00000004 CE4100 TV Platform
+ ========== ==============================
+
+============ =========================
+Field name: hardware_subarch_data
+Type: write (subarch-dependent)
+Offset/size: 0x240/8
+Protocol: 2.07+
+============ =========================
+
+ A pointer to data that is specific to hardware subarch
+ This field is currently unused for the default x86/PC environment,
+ do not modify.
+
+============ ==============
+Field name: payload_offset
+Type: read
+Offset/size: 0x248/4
+Protocol: 2.08+
+============ ==============
+
+ If non-zero then this field contains the offset from the beginning
+ of the protected-mode code to the payload.
+
+ The payload may be compressed. The format of both the compressed and
+ uncompressed data should be determined using the standard magic
+ numbers. The currently supported compression formats are gzip
+ (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
+ (magic number 5D 00), XZ (magic number FD 37), LZ4 (magic number
+ 02 21) and ZSTD (magic number 28 B5). The uncompressed payload is
+ currently always ELF (magic number 7F 45 4C 46).
+
+============ ==============
+Field name: payload_length
+Type: read
+Offset/size: 0x24c/4
+Protocol: 2.08+
+============ ==============
+
+ The length of the payload.
+
+============ ===============
+Field name: setup_data
+Type: write (special)
+Offset/size: 0x250/8
+Protocol: 2.09+
+============ ===============
+
+ The 64-bit physical pointer to NULL terminated single linked list of
+ struct setup_data. This is used to define a more extensible boot
+ parameters passing mechanism. The definition of struct setup_data is
+ as follow::
+
+ struct setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
+ }
+
+ Where, the next is a 64-bit physical pointer to the next node of
+ linked list, the next field of the last node is 0; the type is used
+ to identify the contents of data; the len is the length of data
+ field; the data holds the real payload.
+
+ This list may be modified at a number of points during the bootup
+ process. Therefore, when modifying this list one should always make
+ sure to consider the case where the linked list already contains
+ entries.
+
+ The setup_data is a bit awkward to use for extremely large data objects,
+ both because the setup_data header has to be adjacent to the data object
+ and because it has a 32-bit length field. However, it is important that
+ intermediate stages of the boot process have a way to identify which
+ chunks of memory are occupied by kernel data.
+
+ Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
+ protocol 2.15::
+
+ struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+ };
+
+ The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
+ SETUP_INDIRECT itself since making the setup_indirect a tree structure
+ could require a lot of stack space in something that needs to parse it
+ and stack space can be limited in boot contexts.
+
+ Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
+ In this case setup_data and setup_indirect will look like this::
+
+ struct setup_data {
+ .next = 0, /* or <addr_of_next_setup_data_struct> */
+ .type = SETUP_INDIRECT,
+ .len = sizeof(setup_indirect),
+ .data[sizeof(setup_indirect)] = (struct setup_indirect) {
+ .type = SETUP_INDIRECT | SETUP_E820_EXT,
+ .reserved = 0,
+ .len = <len_of_SETUP_E820_EXT_data>,
+ .addr = <addr_of_SETUP_E820_EXT_data>,
+ },
+ }
+
+.. note::
+ SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
+ from SETUP_INDIRECT itself. So, this kind of objects cannot be provided
+ by the bootloaders.
+
+============ ============
+Field name: pref_address
+Type: read (reloc)
+Offset/size: 0x258/8
+Protocol: 2.10+
+============ ============
+
+ This field, if nonzero, represents a preferred load address for the
+ kernel. A relocating bootloader should attempt to load at this
+ address if possible.
+
+ A non-relocatable kernel will unconditionally move itself and to run
+ at this address. A relocatable kernel will move itself to this address if it
+ loaded below this address.
+
+============ =======
+Field name: init_size
+Type: read
+Offset/size: 0x260/4
+============ =======
+
+ This field indicates the amount of linear contiguous memory starting
+ at the kernel runtime start address that the kernel needs before it
+ is capable of examining its memory map. This is not the same thing
+ as the total amount of memory the kernel needs to boot, but it can
+ be used by a relocating boot loader to help select a safe load
+ address for the kernel.
+
+ The kernel runtime start address is determined by the following algorithm::
+
+ if (relocatable_kernel) {
+ if (load_address < pref_address)
+ load_address = pref_address;
+ runtime_start = align_up(load_address, kernel_alignment);
+ } else {
+ runtime_start = pref_address;
+ }
+
+Hence the necessary memory window location and size can be estimated by
+a boot loader as::
+
+ memory_window_start = runtime_start;
+ memory_window_size = init_size;
+
+============ ===============
+Field name: handover_offset
+Type: read
+Offset/size: 0x264/4
+============ ===============
+
+ This field is the offset from the beginning of the kernel image to
+ the EFI handover protocol entry point. Boot loaders using the EFI
+ handover protocol to boot the kernel should jump to this offset.
+
+ See EFI HANDOVER PROTOCOL below for more details.
+
+============ ==================
+Field name: kernel_info_offset
+Type: read
+Offset/size: 0x268/4
+Protocol: 2.15+
+============ ==================
+
+ This field is the offset from the beginning of the kernel image to the
+ kernel_info. The kernel_info structure is embedded in the Linux image
+ in the uncompressed protected mode region.
+
+
+The kernel_info
+===============
+
+The relationships between the headers are analogous to the various data
+sections::
+
+ setup_header = .data
+ boot_params/setup_data = .bss
+
+What is missing from the above list? That's right::
+
+ kernel_info = .rodata
+
+We have been (ab)using .data for things that could go into .rodata or .bss for
+a long time, for lack of alternatives and -- especially early on -- inertia.
+Also, the BIOS stub is responsible for creating boot_params, so it isn't
+available to a BIOS-based loader (setup_data is, though).
+
+setup_header is permanently limited to 144 bytes due to the reach of the
+2-byte jump field, which doubles as a length field for the structure, combined
+with the size of the "hole" in struct boot_params that a protected-mode loader
+or the BIOS stub has to copy it into. It is currently 119 bytes long, which
+leaves us with 25 very precious bytes. This isn't something that can be fixed
+without revising the boot protocol entirely, breaking backwards compatibility.
+
+boot_params proper is limited to 4096 bytes, but can be arbitrarily extended
+by adding setup_data entries. It cannot be used to communicate properties of
+the kernel image, because it is .bss and has no image-provided content.
+
+kernel_info solves this by providing an extensible place for information about
+the kernel image. It is readonly, because the kernel cannot rely on a
+bootloader copying its contents anywhere, but that is OK; if it becomes
+necessary it can still contain data items that an enabled bootloader would be
+expected to copy into a setup_data chunk.
+
+All kernel_info data should be part of this structure. Fixed size data have to
+be put before kernel_info_var_len_data label. Variable size data have to be put
+after kernel_info_var_len_data label. Each chunk of variable size data has to
+be prefixed with header/magic and its size, e.g.::
+
+ kernel_info:
+ .ascii "LToP" /* Header, Linux top (structure). */
+ .long kernel_info_var_len_data - kernel_info
+ .long kernel_info_end - kernel_info
+ .long 0x01234567 /* Some fixed size data for the bootloaders. */
+ kernel_info_var_len_data:
+ example_struct: /* Some variable size data for the bootloaders. */
+ .ascii "0123" /* Header/Magic. */
+ .long example_struct_end - example_struct
+ .ascii "Struct"
+ .long 0x89012345
+ example_struct_end:
+ example_strings: /* Some variable size data for the bootloaders. */
+ .ascii "ABCD" /* Header/Magic. */
+ .long example_strings_end - example_strings
+ .asciz "String_0"
+ .asciz "String_1"
+ example_strings_end:
+ kernel_info_end:
+
+This way the kernel_info is self-contained blob.
+
+.. note::
+ Each variable size data header/magic can be any 4-character string,
+ without \0 at the end of the string, which does not collide with
+ existing variable length data headers/magics.
+
+
+Details of the kernel_info Fields
+=================================
+
+============ ========
+Field name: header
+Offset/size: 0x0000/4
+============ ========
+
+ Contains the magic number "LToP" (0x506f544c).
+
+============ ========
+Field name: size
+Offset/size: 0x0004/4
+============ ========
+
+ This field contains the size of the kernel_info including kernel_info.header.
+ It does not count kernel_info.kernel_info_var_len_data size. This field should be
+ used by the bootloaders to detect supported fixed size fields in the kernel_info
+ and beginning of kernel_info.kernel_info_var_len_data.
+
+============ ========
+Field name: size_total
+Offset/size: 0x0008/4
+============ ========
+
+ This field contains the size of the kernel_info including kernel_info.header
+ and kernel_info.kernel_info_var_len_data.
+
+============ ==============
+Field name: setup_type_max
+Offset/size: 0x000c/4
+============ ==============
+
+ This field contains maximal allowed type for setup_data and setup_indirect structs.
+
+
+The Kernel Command Line
+=======================
+
+The kernel command line has become an important way for the boot
+loader to communicate with the kernel. Some of its options are also
+relevant to the boot loader itself, see "special command line options"
+below.
+
+The kernel command line is a null-terminated string. The maximum
+length can be retrieved from the field cmdline_size. Before protocol
+version 2.06, the maximum was 255 characters. A string that is too
+long will be automatically truncated by the kernel.
+
+If the boot protocol version is 2.02 or later, the address of the
+kernel command line is given by the header field cmd_line_ptr (see
+above.) This address can be anywhere between the end of the setup
+heap and 0xA0000.
+
+If the protocol version is *not* 2.02 or higher, the kernel
+command line is entered using the following protocol:
+
+ - At offset 0x0020 (word), "cmd_line_magic", enter the magic
+ number 0xA33F.
+
+ - At offset 0x0022 (word), "cmd_line_offset", enter the offset
+ of the kernel command line (relative to the start of the
+ real-mode kernel).
+
+ - The kernel command line *must* be within the memory region
+ covered by setup_move_size, so you may need to adjust this
+ field.
+
+
+Memory Layout of The Real-Mode Code
+===================================
+
+The real-mode code requires a stack/heap to be set up, as well as
+memory allocated for the kernel command line. This needs to be done
+in the real-mode accessible memory in bottom megabyte.
+
+It should be noted that modern machines often have a sizable Extended
+BIOS Data Area (EBDA). As a result, it is advisable to use as little
+of the low megabyte as possible.
+
+Unfortunately, under the following circumstances the 0x90000 memory
+segment has to be used:
+
+ - When loading a zImage kernel ((loadflags & 0x01) == 0).
+ - When loading a 2.01 or earlier boot protocol kernel.
+
+.. note::
+ For the 2.00 and 2.01 boot protocols, the real-mode code
+ can be loaded at another address, but it is internally
+ relocated to 0x90000. For the "old" protocol, the
+ real-mode code must be loaded at 0x90000.
+
+When loading at 0x90000, avoid using memory above 0x9a000.
+
+For boot protocol 2.02 or higher, the command line does not have to be
+located in the same 64K segment as the real-mode setup code; it is
+thus permitted to give the stack/heap the full 64K segment and locate
+the command line above it.
+
+The kernel command line should not be located below the real-mode
+code, nor should it be located in high memory.
+
+
+Sample Boot Configuration
+=========================
+
+As a sample configuration, assume the following layout of the real
+mode segment.
+
+ When loading below 0x90000, use the entire segment:
+
+ ============= ===================
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0xdfff Stack and heap
+ 0xe000-0xffff Kernel command line
+ ============= ===================
+
+ When loading at 0x90000 OR the protocol version is 2.01 or earlier:
+
+ ============= ===================
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0x97ff Stack and heap
+ 0x9800-0x9fff Kernel command line
+ ============= ===================
+
+Such a boot loader should enter the following fields in the header::
+
+ unsigned long base_ptr; /* base address for real-mode segment */
+
+ if (setup_sects == 0)
+ setup_sects = 4;
+
+ if (protocol >= 0x0200) {
+ type_of_loader = <type code>;
+ if (loading_initrd) {
+ ramdisk_image = <initrd_address>;
+ ramdisk_size = <initrd_size>;
+ }
+
+ if (protocol >= 0x0202 && loadflags & 0x01)
+ heap_end = 0xe000;
+ else
+ heap_end = 0x9800;
+
+ if (protocol >= 0x0201) {
+ heap_end_ptr = heap_end - 0x200;
+ loadflags |= 0x80; /* CAN_USE_HEAP */
+ }
+
+ if (protocol >= 0x0202) {
+ cmd_line_ptr = base_ptr + heap_end;
+ strcpy(cmd_line_ptr, cmdline);
+ } else {
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+ setup_move_size = heap_end + strlen(cmdline) + 1;
+ strcpy(base_ptr + cmd_line_offset, cmdline);
+ }
+ } else {
+ /* Very old kernel */
+
+ heap_end = 0x9800;
+
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+
+ /* A very old kernel MUST have its real-mode code loaded at 0x90000 */
+ if (base_ptr != 0x90000) {
+ /* Copy the real-mode kernel */
+ memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
+ base_ptr = 0x90000; /* Relocated */
+ }
+
+ strcpy(0x90000 + cmd_line_offset, cmdline);
+
+ /* It is recommended to clear memory up to the 32K mark */
+ memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
+ }
+
+
+Loading The Rest of The Kernel
+==============================
+
+The 32-bit (non-real-mode) kernel starts at offset (setup_sects + 1) * 512
+in the kernel file (again, if setup_sects == 0 the real value is 4.)
+It should be loaded at address 0x10000 for Image/zImage kernels and
+0x100000 for bzImage kernels.
+
+The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
+bit (LOAD_HIGH) in the loadflags field is set::
+
+ is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
+ load_address = is_bzImage ? 0x100000 : 0x10000;
+
+.. note::
+ Image/zImage kernels can be up to 512K in size, and thus use the entire
+ 0x10000-0x90000 range of memory. This means it is pretty much a
+ requirement for these kernels to load the real-mode part at 0x90000.
+ bzImage kernels allow much more flexibility.
+
+Special Command Line Options
+============================
+
+If the command line provided by the boot loader is entered by the
+user, the user may expect the following command line options to work.
+They should normally not be deleted from the kernel command line even
+though not all of them are actually meaningful to the kernel. Boot
+loader authors who need additional command line options for the boot
+loader itself should get them registered in
+Documentation/admin-guide/kernel-parameters.rst to make sure they will not
+conflict with actual kernel options now or in the future.
+
+ vga=<mode>
+ <mode> here is either an integer (in C notation, either
+ decimal, octal, or hexadecimal) or one of the strings
+ "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask"
+ (meaning 0xFFFD). This value should be entered into the
+ vid_mode field, as it is used by the kernel before the command
+ line is parsed.
+
+ mem=<size>
+ <size> is an integer in C notation optionally followed by
+ (case insensitive) K, M, G, T, P or E (meaning << 10, << 20,
+ << 30, << 40, << 50 or << 60). This specifies the end of
+ memory to the kernel. This affects the possible placement of
+ an initrd, since an initrd should be placed near end of
+ memory. Note that this is an option to *both* the kernel and
+ the bootloader!
+
+ initrd=<file>
+ An initrd should be loaded. The meaning of <file> is
+ obviously bootloader-dependent, and some boot loaders
+ (e.g. LILO) do not have such a command.
+
+In addition, some boot loaders add the following options to the
+user-specified command line:
+
+ BOOT_IMAGE=<file>
+ The boot image which was loaded. Again, the meaning of <file>
+ is obviously bootloader-dependent.
+
+ auto
+ The kernel was booted without explicit user intervention.
+
+If these options are added by the boot loader, it is highly
+recommended that they are located *first*, before the user-specified
+or configuration-specified command line. Otherwise, "init=/bin/sh"
+gets confused by the "auto" option.
+
+
+Running the Kernel
+==================
+
+The kernel is started by jumping to the kernel entry point, which is
+located at *segment* offset 0x20 from the start of the real mode
+kernel. This means that if you loaded your real-mode kernel code at
+0x90000, the kernel entry point is 9020:0000.
+
+At entry, ds = es = ss should point to the start of the real-mode
+kernel code (0x9000 if the code is loaded at 0x90000), sp should be
+set up properly, normally pointing to the top of the heap, and
+interrupts should be disabled. Furthermore, to guard against bugs in
+the kernel, it is recommended that the boot loader sets fs = gs = ds =
+es = ss.
+
+In our example from above, we would do::
+
+ /*
+ * Note: in the case of the "old" kernel protocol, base_ptr must
+ * be == 0x90000 at this point; see the previous sample code.
+ */
+ seg = base_ptr >> 4;
+
+ cli(); /* Enter with interrupts disabled! */
+
+ /* Set up the real-mode kernel stack */
+ _SS = seg;
+ _SP = heap_end;
+
+ _DS = _ES = _FS = _GS = seg;
+ jmp_far(seg + 0x20, 0); /* Run the kernel */
+
+If your boot sector accesses a floppy drive, it is recommended to
+switch off the floppy motor before running the kernel, since the
+kernel boot leaves interrupts off and thus the motor will not be
+switched off, especially if the loaded kernel has the floppy driver as
+a demand-loaded module!
+
+
+Advanced Boot Loader Hooks
+==========================
+
+If the boot loader runs in a particularly hostile environment (such as
+LOADLIN, which runs under DOS) it may be impossible to follow the
+standard memory location requirements. Such a boot loader may use the
+following hooks that, if set, are invoked by the kernel at the
+appropriate time. The use of these hooks should probably be
+considered an absolutely last resort!
+
+IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and
+%edi across invocation.
+
+ realmode_swtch:
+ A 16-bit real mode far subroutine invoked immediately before
+ entering protected mode. The default routine disables NMI, so
+ your routine should probably do so, too.
+
+ code32_start:
+ A 32-bit flat-mode routine *jumped* to immediately after the
+ transition to protected mode, but before the kernel is
+ uncompressed. No segments, except CS, are guaranteed to be
+ set up (current kernels do, but older ones do not); you should
+ set them up to BOOT_DS (0x18) yourself.
+
+ After completing your hook, you should jump to the address
+ that was in this field before your boot loader overwrote it
+ (relocated, if appropriate.)
+
+
+32-bit Boot Protocol
+====================
+
+For machine with some new BIOS other than legacy BIOS, such as EFI,
+LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel
+based on legacy BIOS can not be used, so a 32-bit boot protocol needs
+to be defined.
+
+In 32-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+should be allocated and initialized to all zero. Then the setup header
+from offset 0x01f1 of kernel image on should be loaded into struct
+boot_params and examined. The end of setup header can be calculated as
+follow::
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as
+described in chapter Documentation/arch/x86/zero-page.rst.
+
+After setting up the struct boot_params, the boot loader can load the
+32/64-bit kernel in the same way as that of 16-bit boot protocol.
+
+In 32-bit boot protocol, the kernel is started by jumping to the
+32-bit kernel entry point, which is the start address of loaded
+32/64-bit kernel.
+
+At entry, the CPU must be in 32-bit protected mode with paging
+disabled; a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
+address of the struct boot_params; %ebp, %edi and %ebx must be zero.
+
+64-bit Boot Protocol
+====================
+
+For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
+and we need a 64-bit boot protocol.
+
+In 64-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+could be allocated anywhere (even above 4G) and initialized to all zero.
+Then, the setup header at offset 0x01f1 of kernel image on should be
+loaded into struct boot_params and examined. The end of setup header
+can be calculated as follows::
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as described
+in chapter Documentation/arch/x86/zero-page.rst.
+
+After setting up the struct boot_params, the boot loader can load
+64-bit kernel in the same way as that of 16-bit boot protocol, but
+kernel could be loaded above 4G.
+
+In 64-bit boot protocol, the kernel is started by jumping to the
+64-bit kernel entry point, which is the start address of loaded
+64-bit kernel plus 0x200.
+
+At entry, the CPU must be in 64-bit mode with paging enabled.
+The range with setup_header.init_size from start address of loaded
+kernel and zero page and command line buffer get ident mapping;
+a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
+address of the struct boot_params.
+
+EFI Handover Protocol (deprecated)
+==================================
+
+This protocol allows boot loaders to defer initialisation to the EFI
+boot stub. The boot loader is required to load the kernel/initrd(s)
+from the boot media and jump to the EFI handover protocol entry point
+which is hdr->handover_offset bytes from the beginning of
+startup_{32,64}.
+
+The boot loader MUST respect the kernel's PE/COFF metadata when it comes
+to section alignment, the memory footprint of the executable image beyond
+the size of the file itself, and any other aspect of the PE/COFF header
+that may affect correct operation of the image as a PE/COFF binary in the
+execution context provided by the EFI firmware.
+
+The function prototype for the handover entry point looks like this::
+
+ void efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp);
+
+'handle' is the EFI image handle passed to the boot loader by the EFI
+firmware, 'table' is the EFI system table - these are the first two
+arguments of the "handoff state" as described in section 2.3 of the
+UEFI specification. 'bp' is the boot loader-allocated boot params.
+
+The boot loader *must* fill out the following fields in bp::
+
+ - hdr.cmd_line_ptr
+ - hdr.ramdisk_image (if applicable)
+ - hdr.ramdisk_size (if applicable)
+
+All other fields should be zero.
+
+.. note::
+ The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
+ entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
+ loading protocol (refer to [0] for an example of the bootloader side of
+ this), which removes the need for any knowledge on the part of the EFI
+ bootloader regarding the internal representation of boot_params or any
+ requirements/limitations regarding the placement of the command line
+ and ramdisk in memory, or the placement of the kernel image itself.
+
+[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0
diff --git a/Documentation/arch/x86/booting-dt.rst b/Documentation/arch/x86/booting-dt.rst
new file mode 100644
index 000000000000..b089ffd56e6e
--- /dev/null
+++ b/Documentation/arch/x86/booting-dt.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+DeviceTree Booting
+------------------
+
+ There is one single 32bit entry point to the kernel at code32_start,
+ the decompressor (the real mode entry point goes to the same 32bit
+ entry point once it switched into protected mode). That entry point
+ supports one calling convention which is documented in
+ Documentation/arch/x86/boot.rst
+ The physical pointer to the device-tree block is passed via setup_data
+ which requires at least boot protocol 2.09.
+ The type filed is defined as
+
+ #define SETUP_DTB 2
+
+ This device-tree is used as an extension to the "boot page". As such it
+ does not parse / consider data which is already covered by the boot
+ page. This includes memory size, reserved ranges, command line arguments
+ or initrd address. It simply holds information which can not be retrieved
+ otherwise like interrupt routing or a list of devices behind an I2C bus.
diff --git a/Documentation/arch/x86/buslock.rst b/Documentation/arch/x86/buslock.rst
new file mode 100644
index 000000000000..31f1bfdff16f
--- /dev/null
+++ b/Documentation/arch/x86/buslock.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===============================
+Bus lock detection and handling
+===============================
+
+:Copyright: |copy| 2021 Intel Corporation
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+ - Tony Luck <tony.luck@intel.com>
+
+Problem
+=======
+
+A split lock is any atomic operation whose operand crosses two cache lines.
+Since the operand spans two cache lines and the operation must be atomic,
+the system locks the bus while the CPU accesses the two cache lines.
+
+A bus lock is acquired through either split locked access to writeback (WB)
+memory or any locked access to non-WB memory. This is typically thousands of
+cycles slower than an atomic operation within a cache line. It also disrupts
+performance on other cores and brings the whole system to its knees.
+
+Detection
+=========
+
+Intel processors may support either or both of the following hardware
+mechanisms to detect split locks and bus locks. Some AMD processors also
+support bus lock detect.
+
+#AC exception for split lock detection
+--------------------------------------
+
+Beginning with the Tremont Atom CPU split lock operations may raise an
+Alignment Check (#AC) exception when a split lock operation is attempted.
+
+#DB exception for bus lock detection
+------------------------------------
+
+Some CPUs have the ability to notify the kernel by an #DB trap after a user
+instruction acquires a bus lock and is executed. This allows the kernel to
+terminate the application or to enforce throttling.
+
+Software handling
+=================
+
+The kernel #AC and #DB handlers handle bus lock based on the kernel
+parameter "split_lock_detect". Here is a summary of different options:
+
++------------------+----------------------------+-----------------------+
+|split_lock_detect=|#AC for split lock |#DB for bus lock |
++------------------+----------------------------+-----------------------+
+|off |Do nothing |Do nothing |
++------------------+----------------------------+-----------------------+
+|warn |Kernel OOPs |Warn once per task and |
+|(default) |Warn once per task, add a |and continues to run. |
+| |delay, add synchronization | |
+| |to prevent more than one | |
+| |core from executing a | |
+| |split lock in parallel. | |
+| |sysctl split_lock_mitigate | |
+| |can be used to avoid the | |
+| |delay and synchronization | |
+| |When both features are | |
+| |supported, warn in #AC | |
++------------------+----------------------------+-----------------------+
+|fatal |Kernel OOPs |Send SIGBUS to user. |
+| |Send SIGBUS to user | |
+| |When both features are | |
+| |supported, fatal in #AC | |
++------------------+----------------------------+-----------------------+
+|ratelimit:N |Do nothing |Limit bus lock rate to |
+|(0 < N <= 1000) | |N bus locks per second |
+| | |system wide and warn on|
+| | |bus locks. |
++------------------+----------------------------+-----------------------+
+
+Usages
+======
+
+Detecting and handling bus lock may find usages in various areas:
+
+It is critical for real time system designers who build consolidated real
+time systems. These systems run hard real time code on some cores and run
+"untrusted" user processes on other cores. The hard real time cannot afford
+to have any bus lock from the untrusted processes to hurt real time
+performance. To date the designers have been unable to deploy these
+solutions as they have no way to prevent the "untrusted" user code from
+generating split lock and bus lock to block the hard real time code to
+access memory during bus locking.
+
+It's also useful for general computing to prevent guests or user
+applications from slowing down the overall system by executing instructions
+with bus lock.
+
+
+Guidance
+========
+off
+---
+
+Disable checking for split lock and bus lock. This option can be useful if
+there are legacy applications that trigger these events at a low rate so
+that mitigation is not needed.
+
+warn
+----
+
+A warning is emitted when a bus lock is detected which allows to identify
+the offending application. This is the default behavior.
+
+fatal
+-----
+
+In this case, the bus lock is not tolerated and the process is killed.
+
+ratelimit
+---------
+
+A system wide bus lock rate limit N is specified where 0 < N <= 1000. This
+allows a bus lock rate up to N bus locks per second. When the bus lock rate
+is exceeded then any task which is caught via the buslock #DB exception is
+throttled by enforced sleeps until the rate goes under the limit again.
+
+This is an effective mitigation in cases where a minimal impact can be
+tolerated, but an eventual Denial of Service attack has to be prevented. It
+allows to identify the offending processes and analyze whether they are
+malicious or just badly written.
+
+Selecting a rate limit of 1000 allows the bus to be locked for up to about
+seven million cycles each second (assuming 7000 cycles for each bus
+lock). On a 2 GHz processor that would be about 0.35% system slowdown.
diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst
new file mode 100644
index 000000000000..9f2e47c4b1c8
--- /dev/null
+++ b/Documentation/arch/x86/cpuinfo.rst
@@ -0,0 +1,202 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+x86 Feature Flags
+=================
+
+Introduction
+============
+
+The list of feature flags in /proc/cpuinfo is not complete and
+represents an ill-fated attempt from long time ago to put feature flags
+in an easy to find place for userspace.
+
+However, the number of feature flags is growing with each CPU generation,
+leading to unparseable and unwieldy /proc/cpuinfo.
+
+What is more, those feature flags do not even need to be in that file
+because userspace doesn't care about them - glibc et al already use
+CPUID to find out what the target machine supports and what not.
+
+And even if it doesn't show a particular feature flag - although the CPU
+still does have support for the respective hardware functionality and
+said CPU supports CPUID faulting - userspace can simply probe for the
+feature and figure out if it is supported or not, regardless of whether
+it is being advertised somewhere.
+
+Furthermore, those flag strings become an ABI the moment they appear
+there and maintaining them forever when nothing even uses them is a lot
+of wasted effort.
+
+So, the current use of /proc/cpuinfo is to show features which the
+kernel has *enabled* and *supports*. As in: the CPUID feature flag is
+there, there's an additional setup which the kernel has done while
+booting and the functionality is ready to use. A perfect example for
+that is "user_shstk" where additional code enablement is present in the
+kernel to support shadow stack for user programs.
+
+So, if users want to know if a feature is available on a given system,
+they try to find the flag in /proc/cpuinfo. If a given flag is present,
+it means that
+
+* the kernel knows about the feature enough to have an X86_FEATURE bit
+
+* the kernel supports it and is currently making it available either to
+ userspace or some other part of the kernel
+
+* if the flag represents a hardware feature the hardware supports it.
+
+The absence of a flag in /proc/cpuinfo by itself means almost nothing to
+an end user.
+
+On the one hand, a feature like "vaes" might be fully available to user
+applications on a kernel that has not defined X86_FEATURE_VAES and thus
+there is no "vaes" in /proc/cpuinfo.
+
+On the other hand, a new kernel running on non-VAES hardware would also
+have no "vaes" in /proc/cpuinfo. There's no way for an application or
+user to tell the difference.
+
+The end result is that the flags field in /proc/cpuinfo is marginally
+useful for kernel debugging, but not really for anything else.
+Applications should instead use things like the glibc facilities for
+querying CPU support. Users should rely on tools like
+tools/arch/x86/kcpuid and cpuid(1).
+
+Regarding implementation, flags appearing in /proc/cpuinfo have an
+X86_FEATURE definition in arch/x86/include/asm/cpufeatures.h. These flags
+represent hardware features as well as software features.
+
+If the kernel cares about a feature or KVM want to expose the feature to
+a KVM guest, it should only then expose it to the guest when the guest
+needs to parse /proc/cpuinfo. Which, as mentioned above, is highly
+unlikely. KVM can synthesize the CPUID bit and the KVM guest can simply
+query CPUID and figure out what the hypervisor supports and what not. As
+already stated, /proc/cpuinfo is not a dumping ground for useless
+feature flags.
+
+
+How are feature flags created?
+==============================
+
+Feature flags can be derived from the contents of CPUID leaves
+--------------------------------------------------------------
+
+These feature definitions are organized mirroring the layout of CPUID
+leaves and grouped in words with offsets as mapped in enum cpuid_leafs
+in cpufeatures.h (see arch/x86/include/asm/cpufeatures.h for details).
+If a feature is defined with a X86_FEATURE_<name> definition in
+cpufeatures.h, and if it is detected at run time, the flags will be
+displayed accordingly in /proc/cpuinfo. For example, the flag "avx2"
+comes from X86_FEATURE_AVX2 in cpufeatures.h.
+
+Flags can be from scattered CPUID-based features
+------------------------------------------------
+
+Hardware features enumerated in sparsely populated CPUID leaves get
+software-defined values. Still, CPUID needs to be queried to determine
+if a given feature is present. This is done in init_scattered_cpuid_features().
+For instance, X86_FEATURE_CQM_LLC is defined as 11*32 + 0 and its presence is
+checked at runtime in the respective CPUID leaf [EAX=f, ECX=0] bit EDX[1].
+
+The intent of scattering CPUID leaves is to not bloat struct
+cpuinfo_x86.x86_capability[] unnecessarily. For instance, the CPUID leaf
+[EAX=7, ECX=0] has 30 features and is dense, but the CPUID leaf [EAX=7, EAX=1]
+has only one feature and would waste 31 bits of space in the x86_capability[]
+array. Since there is a struct cpuinfo_x86 for each possible CPU, the wasted
+memory is not trivial.
+
+Flags can be created synthetically under certain conditions for hardware features
+---------------------------------------------------------------------------------
+
+Examples of conditions include whether certain features are present in
+MSR_IA32_CORE_CAPS or specific CPU models are identified. If the needed
+conditions are met, the features are enabled by the set_cpu_cap or
+setup_force_cpu_cap macros. For example, if bit 5 is set in MSR_IA32_CORE_CAPS,
+the feature X86_FEATURE_SPLIT_LOCK_DETECT will be enabled and
+"split_lock_detect" will be displayed. The flag "ring3mwait" will be
+displayed only when running on INTEL_XEON_PHI_[KNL|KNM] processors.
+
+Flags can represent purely software features
+--------------------------------------------
+These flags do not represent hardware features. Instead, they represent a
+software feature implemented in the kernel. For example, Kernel Page Table
+Isolation is purely software feature and its feature flag X86_FEATURE_PTI is
+also defined in cpufeatures.h.
+
+Naming of Flags
+===============
+
+The script arch/x86/kernel/cpu/mkcapflags.sh processes the
+#define X86_FEATURE_<name> from cpufeatures.h and generates the
+x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the
+resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming
+of flags in the x86_cap/bug_flags[] are as follows:
+
+Flags do not appear by default in /proc/cpuinfo
+-----------------------------------------------
+
+Feature flags are omitted by default from /proc/cpuinfo as it does not make
+sense for the feature to be exposed to userspace in most cases. For example,
+X86_FEATURE_ALWAYS is defined in cpufeatures.h but that flag is an internal
+kernel feature used in the alternative runtime patching functionality. So the
+flag does not appear in /proc/cpuinfo.
+
+Specify a flag name if absolutely needed
+----------------------------------------
+
+If the comment on the line for the #define X86_FEATURE_* starts with a
+double-quote character (""), the string inside the double-quote characters
+will be the name of the flags. For example, the flag "sse4_1" comes from
+the comment "sse4_1" following the X86_FEATURE_XMM4_1 definition.
+
+There are situations in which overriding the displayed name of the flag is
+needed. For instance, /proc/cpuinfo is a userspace interface and must remain
+constant. If, for some reason, the naming of X86_FEATURE_<name> changes, one
+shall override the new naming with the name already used in /proc/cpuinfo.
+
+Flags are missing when one or more of these happen
+==================================================
+
+The hardware does not enumerate support for it
+----------------------------------------------
+
+For example, when a new kernel is running on old hardware or the feature is
+not enabled by boot firmware. Even if the hardware is new, there might be a
+problem enabling the feature at run time, the flag will not be displayed.
+
+The kernel does not know about the flag
+---------------------------------------
+
+For example, when an old kernel is running on new hardware.
+
+The kernel disabled support for it at compile-time
+--------------------------------------------------
+
+For example, if Linear Address Masking (LAM) is not enabled when building (i.e.,
+CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up.
+Even though the feature will still be detected via CPUID, the kernel disables
+it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM).
+
+The feature is disabled at boot-time
+------------------------------------
+A feature can be disabled either using a command-line parameter or because
+it failed to be enabled. The command-line parameter clearcpuid= can be used
+to disable features using the feature number as defined in
+/arch/x86/include/asm/cpufeatures.h. For instance, User Mode Instruction
+Protection can be disabled using clearcpuid=514. The number 514 is calculated
+from #define X86_FEATURE_UMIP (16*32 + 2).
+
+In addition, there exists a variety of custom command-line parameters that
+disable specific features. The list of parameters includes, but is not limited
+to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
+"no5lvl".
+
+The feature was known to be non-functional
+------------------------------------------
+
+The feature was known to be non-functional because a dependency was
+missing at runtime. For example, AVX flags will not show up if XSAVE feature
+is disabled since they depend on XSAVE feature. Another example would be broken
+CPUs and them missing microcode patches. Due to that, the kernel decides not to
+enable a feature.
diff --git a/Documentation/arch/x86/earlyprintk.rst b/Documentation/arch/x86/earlyprintk.rst
new file mode 100644
index 000000000000..51ef11e8f725
--- /dev/null
+++ b/Documentation/arch/x86/earlyprintk.rst
@@ -0,0 +1,151 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Early Printk
+============
+
+Mini-HOWTO for using the earlyprintk=dbgp boot option with a
+USB2 Debug port key and a debug cable, on x86 systems.
+
+You need two computers, the 'USB debug key' special gadget and
+two USB cables, connected like this::
+
+ [host/target] <-------> [USB debug key] <-------> [client/console]
+
+Hardware requirements
+=====================
+
+ a) Host/target system needs to have USB debug port capability.
+
+ You can check this capability by looking at a 'Debug port' bit in
+ the lspci -vvv output::
+
+ # lspci -vvv
+ ...
+ 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
+ Subsystem: Lenovo ThinkPad T61
+ Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
+ Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Latency: 0
+ Interrupt: pin D routed to IRQ 19
+ Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
+ Capabilities: [50] Power Management version 2
+ Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
+ Status: D0 PME-Enable- DSel=0 DScale=0 PME+
+ Capabilities: [58] Debug port: BAR=1 offset=00a0
+ ^^^^^^^^^^^ <==================== [ HERE ]
+ Kernel driver in use: ehci_hcd
+ Kernel modules: ehci-hcd
+ ...
+
+ .. note::
+ If your system does not list a debug port capability then you probably
+ won't be able to use the USB debug key.
+
+ b) You also need a NetChip USB debug cable/key:
+
+ http://www.plxtech.com/products/NET2000/NET20DC/default.asp
+
+ This is a small blue plastic connector with two USB connections;
+ it draws power from its USB connections.
+
+ c) You need a second client/console system with a high speed USB 2.0 port.
+
+ d) The NetChip device must be plugged directly into the physical
+ debug port on the "host/target" system. You cannot use a USB hub in
+ between the physical debug port and the "host/target" system.
+
+ The EHCI debug controller is bound to a specific physical USB
+ port and the NetChip device will only work as an early printk
+ device in this port. The EHCI host controllers are electrically
+ wired such that the EHCI debug controller is hooked up to the
+ first physical port and there is no way to change this via software.
+ You can find the physical port through experimentation by trying
+ each physical port on the system and rebooting. Or you can try
+ and use lsusb or look at the kernel info messages emitted by the
+ usb stack when you plug a usb device into various ports on the
+ "host/target" system.
+
+ Some hardware vendors do not expose the usb debug port with a
+ physical connector and if you find such a device send a complaint
+ to the hardware vendor, because there is no reason not to wire
+ this port into one of the physically accessible ports.
+
+ e) It is also important to note, that many versions of the NetChip
+ device require the "client/console" system to be plugged into the
+ right hand side of the device (with the product logo facing up and
+ readable left to right). The reason being is that the 5 volt
+ power supply is taken from only one side of the device and it
+ must be the side that does not get rebooted.
+
+Software requirements
+=====================
+
+ a) On the host/target system:
+
+ You need to enable the following kernel config option::
+
+ CONFIG_EARLY_PRINTK_DBGP=y
+
+ And you need to add the boot command line: "earlyprintk=dbgp".
+
+ .. note::
+ If you are using Grub, append it to the 'kernel' line in
+ /etc/grub.conf. If you are using Grub2 on a BIOS firmware system,
+ append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
+ using Grub2 on an EFI firmware system, append it to the 'linux'
+ or 'linuxefi' line in /boot/grub2/grub.cfg or
+ /boot/efi/EFI/<distro>/grub.cfg.
+
+ On systems with more than one EHCI debug controller you must
+ specify the correct EHCI debug controller number. The ordering
+ comes from the PCI bus enumeration of the EHCI controllers. The
+ default with no number argument is "0" or the first EHCI debug
+ controller. To use the second EHCI debug controller, you would
+ use the command line: "earlyprintk=dbgp1"
+
+ .. note::
+ normally earlyprintk console gets turned off once the
+ regular console is alive - use "earlyprintk=dbgp,keep" to keep
+ this channel open beyond early bootup. This can be useful for
+ debugging crashes under Xorg, etc.
+
+ b) On the client/console system:
+
+ You should enable the following kernel config option::
+
+ CONFIG_USB_SERIAL_DEBUG=y
+
+ On the next bootup with the modified kernel you should
+ get a /dev/ttyUSBx device(s).
+
+ Now this channel of kernel messages is ready to be used: start
+ your favorite terminal emulator (minicom, etc.) and set
+ it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
+ see the raw output.
+
+ c) On Nvidia Southbridge based systems: the kernel will try to probe
+ and find out which port has a debug device connected.
+
+Testing
+=======
+
+You can test the output by using earlyprintk=dbgp,keep and provoking
+kernel messages on the host/target system. You can provoke a harmless
+kernel message by for example doing::
+
+ echo h > /proc/sysrq-trigger
+
+On the host/target system you should see this help line in "dmesg" output::
+
+ SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
+
+On the client/console system do::
+
+ cat /dev/ttyUSB0
+
+And you should see the help line above displayed shortly after you've
+provoked it on the host system.
+
+If it does not work then please ask about it on the linux-kernel@vger.kernel.org
+mailing list or contact the x86 maintainers.
diff --git a/Documentation/arch/x86/elf_auxvec.rst b/Documentation/arch/x86/elf_auxvec.rst
new file mode 100644
index 000000000000..18e4744717f9
--- /dev/null
+++ b/Documentation/arch/x86/elf_auxvec.rst
@@ -0,0 +1,53 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+x86-specific ELF Auxiliary Vectors
+==================================
+
+This document describes the semantics of the x86 auxiliary vectors.
+
+Introduction
+============
+
+ELF Auxiliary vectors enable the kernel to efficiently provide
+configuration-specific parameters to userspace. In this example, a program
+allocates an alternate stack based on the kernel-provided size::
+
+ #include <sys/auxv.h>
+ #include <elf.h>
+ #include <signal.h>
+ #include <stdlib.h>
+ #include <assert.h>
+ #include <err.h>
+
+ #ifndef AT_MINSIGSTKSZ
+ #define AT_MINSIGSTKSZ 51
+ #endif
+
+ ....
+ stack_t ss;
+
+ ss.ss_sp = malloc(ss.ss_size);
+ assert(ss.ss_sp);
+
+ ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+ ss.ss_flags = 0;
+
+ if (sigaltstack(&ss, NULL))
+ err(1, "sigaltstack");
+
+
+The exposed auxiliary vectors
+=============================
+
+AT_SYSINFO is used for locating the vsyscall entry point. It is not
+exported on 64-bit mode.
+
+AT_SYSINFO_EHDR is the start address of the page containing the vDSO.
+
+AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to
+deliver a signal to user-space. AT_MINSIGSTKSZ comprehends the space
+consumed by the kernel to accommodate the user context for the current
+hardware configuration. It does not comprehend subsequent user-space stack
+consumption, which must be added by the user. (e.g. Above, user-space adds
+SIGSTKSZ to AT_MINSIGSTKSZ.)
diff --git a/Documentation/arch/x86/entry_64.rst b/Documentation/arch/x86/entry_64.rst
new file mode 100644
index 000000000000..0afdce3c06f4
--- /dev/null
+++ b/Documentation/arch/x86/entry_64.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Kernel Entries
+==============
+
+This file documents some of the kernel entries in
+arch/x86/entry/entry_64.S. A lot of this explanation is adapted from
+an email from Ingo Molnar:
+
+https://lore.kernel.org/r/20110529191055.GC9835%40elte.hu
+
+The x86 architecture has quite a few different ways to jump into
+kernel code. Most of these entry points are registered in
+arch/x86/kernel/traps.c and implemented in arch/x86/entry/entry_64.S
+for 64-bit, arch/x86/entry/entry_32.S for 32-bit and finally
+arch/x86/entry/entry_64_compat.S which implements the 32-bit compatibility
+syscall entry points and thus provides for 32-bit processes the
+ability to execute syscalls when running on 64-bit kernels.
+
+The IDT vector assignments are listed in arch/x86/include/asm/irq_vectors.h.
+
+Some of these entries are:
+
+ - system_call: syscall instruction from 64-bit code.
+
+ - entry_INT80_compat: int 0x80 from 32-bit or 64-bit code; compat syscall
+ either way.
+
+ - entry_INT80_compat, ia32_sysenter: syscall and sysenter from 32-bit
+ code
+
+ - interrupt: An array of entries. Every IDT vector that doesn't
+ explicitly point somewhere else gets set to the corresponding
+ value in interrupts. These point to a whole array of
+ magically-generated functions that make their way to common_interrupt()
+ with the interrupt number as a parameter.
+
+ - APIC interrupts: Various special-purpose interrupts for things
+ like TLB shootdown.
+
+ - Architecturally-defined exceptions like divide_error.
+
+There are a few complexities here. The different x86-64 entries
+have different calling conventions. The syscall and sysenter
+instructions have their own peculiar calling conventions. Some of
+the IDT entries push an error code onto the stack; others don't.
+IDT entries using the IST alternative stack mechanism need their own
+magic to get the stack frames right. (You can find some
+documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM,
+Volume 3, Chapter 6.)
+
+Dealing with the swapgs instruction is especially tricky. Swapgs
+toggles whether gs is the kernel gs or the user gs. The swapgs
+instruction is rather fragile: it must nest perfectly and only in
+single depth, it should only be used if entering from user mode to
+kernel mode and then when returning to user-space, and precisely
+so. If we mess that up even slightly, we crash.
+
+So when we have a secondary entry, already in kernel mode, we *must
+not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's
+not switched/swapped yet.
+
+Now, there's a secondary complication: there's a cheap way to test
+which mode the CPU is in and an expensive way.
+
+The cheap way is to pick this info off the entry frame on the kernel
+stack, from the CS of the ptregs area of the kernel stack::
+
+ xorl %ebx,%ebx
+ testl $3,CS+8(%rsp)
+ je error_kernelspace
+ SWAPGS
+
+The expensive (paranoid) way is to read back the MSR_GS_BASE value
+(which is what SWAPGS modifies)::
+
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx,%ebx
+ 1: ret
+
+If we are at an interrupt or user-trap/gate-alike boundary then we can
+use the faster check: the stack will be a reliable indicator of
+whether SWAPGS was already done: if we see that we are a secondary
+entry interrupting kernel mode execution, then we know that the GS
+base has already been switched. If it says that we interrupted
+user-space execution then we must do the SWAPGS.
+
+But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context,
+which might have triggered right after a normal entry wrote CS to the
+stack but before we executed SWAPGS, then the only safe way to check
+for GS is the slower method: the RDMSR.
+
+Therefore, super-atomic entries (except NMI, which is handled separately)
+must use idtentry with paranoid=1 to handle gsbase correctly. This
+triggers three main behavior changes:
+
+ - Interrupt entry will use the slower gsbase check.
+ - Interrupt entry from user mode will switch off the IST stack.
+ - Interrupt exit to kernel mode will not attempt to reschedule.
+
+We try to only use IST entries and the paranoid entry code for vectors
+that absolutely need the more expensive check for the GS base - and we
+generate all 'normal' entry points with the regular (faster) paranoid=0
+variant.
diff --git a/Documentation/arch/x86/exception-tables.rst b/Documentation/arch/x86/exception-tables.rst
new file mode 100644
index 000000000000..6e7177363f8f
--- /dev/null
+++ b/Documentation/arch/x86/exception-tables.rst
@@ -0,0 +1,357 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Kernel level exception handling
+===============================
+
+Commentary by Joerg Pommnitz <joerg@raleigh.ibm.com>
+
+When a process runs in kernel mode, it often has to access user
+mode memory whose address has been passed by an untrusted program.
+To protect itself the kernel has to verify this address.
+
+In older versions of Linux this was done with the
+int verify_area(int type, const void * addr, unsigned long size)
+function (which has since been replaced by access_ok()).
+
+This function verified that the memory area starting at address
+'addr' and of size 'size' was accessible for the operation specified
+in type (read or write). To do this, verify_read had to look up the
+virtual memory area (vma) that contained the address addr. In the
+normal case (correctly working program), this test was successful.
+It only failed for a few buggy programs. In some kernel profiling
+tests, this normally unneeded verification used up a considerable
+amount of time.
+
+To overcome this situation, Linus decided to let the virtual memory
+hardware present in every Linux-capable CPU handle this test.
+
+How does this work?
+
+Whenever the kernel tries to access an address that is currently not
+accessible, the CPU generates a page fault exception and calls the
+page fault handler::
+
+ void exc_page_fault(struct pt_regs *regs, unsigned long error_code)
+
+in arch/x86/mm/fault.c. The parameters on the stack are set up by
+the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
+regs is a pointer to the saved registers on the stack, error_code
+contains a reason code for the exception.
+
+exc_page_fault() first obtains the inaccessible address from the CPU
+control register CR2. If the address is within the virtual address
+space of the process, the fault probably occurred, because the page
+was not swapped in, write protected or something similar. However,
+we are interested in the other case: the address is not valid, there
+is no vma that contains this address. In this case, the kernel jumps
+to the bad_area label.
+
+There it uses the address of the instruction that caused the exception
+(i.e. regs->eip) to find an address where the execution can continue
+(fixup). If this search is successful, the fault handler modifies the
+return address (again regs->eip) and returns. The execution will
+continue at the address in fixup.
+
+Where does fixup point to?
+
+Since we jump to the contents of fixup, fixup obviously points
+to executable code. This code is hidden inside the user access macros.
+I have picked the get_user() macro defined in arch/x86/include/asm/uaccess.h
+as an example. The definition is somewhat hard to follow, so let's peek at
+the code generated by the preprocessor and the compiler. I selected
+the get_user() call in drivers/char/sysrq.c for a detailed examination.
+
+The original code in sysrq.c line 587::
+
+ get_user(c, buf);
+
+The preprocessor output (edited to become somewhat readable)::
+
+ (
+ {
+ long __gu_err = - 14 , __gu_val = 0;
+ const __typeof__(*( ( buf ) )) *__gu_addr = ((buf));
+ if (((((0 + current_set[0])->tss.segment) == 0x18 ) ||
+ (((sizeof(*(buf))) <= 0xC0000000UL) &&
+ ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf)))))))
+ do {
+ __gu_err = 0;
+ switch ((sizeof(*(buf)))) {
+ case 1:
+ __asm__ __volatile__(
+ "1: mov" "b" " %2,%" "b" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "b" " %" "b" "1,%" "b" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ;
+ break;
+ case 2:
+ __asm__ __volatile__(
+ "1: mov" "w" " %2,%" "w" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "w" " %" "w" "1,%" "w" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err ));
+ break;
+ case 4:
+ __asm__ __volatile__(
+ "1: mov" "l" " %2,%" "" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "l" " %" "" "1,%" "" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n" " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err));
+ break;
+ default:
+ (__gu_val) = __get_user_bad();
+ }
+ } while (0) ;
+ ((c)) = (__typeof__(*((buf))))__gu_val;
+ __gu_err;
+ }
+ );
+
+WOW! Black GCC/assembly magic. This is impossible to follow, so let's
+see what code gcc generates::
+
+ > xorl %edx,%edx
+ > movl current_set,%eax
+ > cmpl $24,788(%eax)
+ > je .L1424
+ > cmpl $-1073741825,64(%esp)
+ > ja .L1423
+ > .L1424:
+ > movl %edx,%eax
+ > movl 64(%esp),%ebx
+ > #APP
+ > 1: movb (%ebx),%dl /* this is the actual user access */
+ > 2:
+ > .section .fixup,"ax"
+ > 3: movl $-14,%eax
+ > xorb %dl,%dl
+ > jmp 2b
+ > .section __ex_table,"a"
+ > .align 4
+ > .long 1b,3b
+ > .text
+ > #NO_APP
+ > .L1423:
+ > movzbl %dl,%esi
+
+The optimizer does a good job and gives us something we can actually
+understand. Can we? The actual user access is quite obvious. Thanks
+to the unified address space we can just access the address in user
+memory. But what does the .section stuff do?????
+
+To understand this we have to look at the final kernel::
+
+ > objdump --section-headers vmlinux
+ >
+ > vmlinux: file format elf32-i386
+ >
+ > Sections:
+ > Idx Name Size VMA LMA File off Algn
+ > 0 .text 00098f40 c0100000 c0100000 00001000 2**4
+ > CONTENTS, ALLOC, LOAD, READONLY, CODE
+ > 1 .fixup 000016bc c0198f40 c0198f40 00099f40 2**0
+ > CONTENTS, ALLOC, LOAD, READONLY, CODE
+ > 2 .rodata 0000f127 c019a5fc c019a5fc 0009b5fc 2**2
+ > CONTENTS, ALLOC, LOAD, READONLY, DATA
+ > 3 __ex_table 000015c0 c01a9724 c01a9724 000aa724 2**2
+ > CONTENTS, ALLOC, LOAD, READONLY, DATA
+ > 4 .data 0000ea58 c01abcf0 c01abcf0 000abcf0 2**4
+ > CONTENTS, ALLOC, LOAD, DATA
+ > 5 .bss 00018e21 c01ba748 c01ba748 000ba748 2**2
+ > ALLOC
+ > 6 .comment 00000ec4 00000000 00000000 000ba748 2**0
+ > CONTENTS, READONLY
+ > 7 .note 00001068 00000ec4 00000ec4 000bb60c 2**0
+ > CONTENTS, READONLY
+
+There are obviously 2 non standard ELF sections in the generated object
+file. But first we want to find out what happened to our code in the
+final kernel executable::
+
+ > objdump --disassemble --section=.text vmlinux
+ >
+ > c017e785 <do_con_write+c1> xorl %edx,%edx
+ > c017e787 <do_con_write+c3> movl 0xc01c7bec,%eax
+ > c017e78c <do_con_write+c8> cmpl $0x18,0x314(%eax)
+ > c017e793 <do_con_write+cf> je c017e79f <do_con_write+db>
+ > c017e795 <do_con_write+d1> cmpl $0xbfffffff,0x40(%esp,1)
+ > c017e79d <do_con_write+d9> ja c017e7a7 <do_con_write+e3>
+ > c017e79f <do_con_write+db> movl %edx,%eax
+ > c017e7a1 <do_con_write+dd> movl 0x40(%esp,1),%ebx
+ > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+ > c017e7a7 <do_con_write+e3> movzbl %dl,%esi
+
+The whole user memory access is reduced to 10 x86 machine instructions.
+The instructions bracketed in the .section directives are no longer
+in the normal execution path. They are located in a different section
+of the executable file::
+
+ > objdump --disassemble --section=.fixup vmlinux
+ >
+ > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+ > c0199ffa <.fixup+10ba> xorb %dl,%dl
+ > c0199ffc <.fixup+10bc> jmp c017e7a7 <do_con_write+e3>
+
+And finally::
+
+ > objdump --full-contents --section=__ex_table vmlinux
+ >
+ > c01aa7c4 93c017c0 e09f19c0 97c017c0 99c017c0 ................
+ > c01aa7d4 f6c217c0 e99f19c0 a5e717c0 f59f19c0 ................
+ > c01aa7e4 080a18c0 01a019c0 0a0a18c0 04a019c0 ................
+
+or in human readable byte order::
+
+ > c01aa7c4 c017c093 c0199fe0 c017c097 c017c099 ................
+ > c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
+ ^^^^^^^^^^^^^^^^^
+ this is the interesting part!
+ > c01aa7e4 c0180a08 c019a001 c0180a0a c019a004 ................
+
+What happened? The assembly directives::
+
+ .section .fixup,"ax"
+ .section __ex_table,"a"
+
+told the assembler to move the following code to the specified
+sections in the ELF object file. So the instructions::
+
+ 3: movl $-14,%eax
+ xorb %dl,%dl
+ jmp 2b
+
+ended up in the .fixup section of the object file and the addresses::
+
+ .long 1b,3b
+
+ended up in the __ex_table section of the object file. 1b and 3b
+are local labels. The local label 1b (1b stands for next label 1
+backward) is the address of the instruction that might fault, i.e.
+in our case the address of the label 1 is c017e7a5:
+the original assembly code: > 1: movb (%ebx),%dl
+and linked in vmlinux : > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+
+The local label 3 (backwards again) is the address of the code to handle
+the fault, in our case the actual value is c0199ff5:
+the original assembly code: > 3: movl $-14,%eax
+and linked in vmlinux : > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+
+If the fixup was able to handle the exception, control flow may be returned
+to the instruction after the one that triggered the fault, ie. local label 2b.
+
+The assembly code::
+
+ > .section __ex_table,"a"
+ > .align 4
+ > .long 1b,3b
+
+becomes the value pair::
+
+ > c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
+ ^this is ^this is
+ 1b 3b
+
+c017e7a5,c0199ff5 in the exception table of the kernel.
+
+So, what actually happens if a fault from kernel mode with no suitable
+vma occurs?
+
+#. access to invalid address::
+
+ > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+#. MMU generates exception
+#. CPU calls exc_page_fault()
+#. exc_page_fault() calls do_user_addr_fault()
+#. do_user_addr_fault() calls kernelmode_fixup_or_oops()
+#. kernelmode_fixup_or_oops() calls fixup_exception() (regs->eip == c017e7a5);
+#. fixup_exception() calls search_exception_tables()
+#. search_exception_tables() looks up the address c017e7a5 in the
+ exception table (i.e. the contents of the ELF section __ex_table)
+ and returns the address of the associated fault handle code c0199ff5.
+#. fixup_exception() modifies its own return address to point to the fault
+ handle code and returns.
+#. execution continues in the fault handling code.
+#. a) EAX becomes -EFAULT (== -14)
+ b) DL becomes zero (the value we "read" from user space)
+ c) execution continues at local label 2 (address of the
+ instruction immediately after the faulting user access).
+
+ The steps a to c above in a certain way emulate the faulting instruction.
+
+That's it, mostly. If you look at our example, you might ask why
+we set EAX to -EFAULT in the exception handler code. Well, the
+get_user() macro actually returns a value: 0, if the user access was
+successful, -EFAULT on failure. Our original code did not test this
+return value, however the inline assembly code in get_user() tries to
+return -EFAULT. GCC selected EAX to return this value.
+
+NOTE:
+Due to the way that the exception table is built and needs to be ordered,
+only use exceptions for code in the .text section. Any other section
+will cause the exception table to not be sorted correctly, and the
+exceptions will fail.
+
+Things changed when 64-bit support was added to x86 Linux. Rather than
+double the size of the exception table by expanding the two entries
+from 32-bits to 64 bits, a clever trick was used to store addresses
+as relative offsets from the table itself. The assembly code changed
+from::
+
+ .long 1b,3b
+ to:
+ .long (from) - .
+ .long (to) - .
+
+and the C-code that uses these values converts back to absolute addresses
+like this::
+
+ ex_insn_addr(const struct exception_table_entry *x)
+ {
+ return (unsigned long)&x->insn + x->insn;
+ }
+
+In v4.6 the exception table entry was expanded with a new field "handler".
+This is also 32-bits wide and contains a third relative function
+pointer which points to one of:
+
+1) ``int ex_handler_default(const struct exception_table_entry *fixup)``
+ This is legacy case that just jumps to the fixup code
+
+2) ``int ex_handler_fault(const struct exception_table_entry *fixup)``
+ This case provides the fault number of the trap that occurred at
+ entry->insn. It is used to distinguish page faults from machine
+ check.
+
+More functions can easily be added.
+
+CONFIG_BUILDTIME_TABLE_SORT allows the __ex_table section to be sorted post
+link of the kernel image, via a host utility scripts/sorttable. It will set the
+symbol main_extable_sort_needed to 0, avoiding sorting the __ex_table section
+at boot time. With the exception table sorted, at runtime when an exception
+occurs we can quickly lookup the __ex_table entry via binary search.
+
+This is not just a boot time optimization, some architectures require this
+table to be sorted in order to handle exceptions relatively early in the boot
+process. For example, i386 makes use of this form of exception handling before
+paging support is even enabled!
diff --git a/Documentation/arch/x86/features.rst b/Documentation/arch/x86/features.rst
new file mode 100644
index 000000000000..a33616346a38
--- /dev/null
+++ b/Documentation/arch/x86/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features x86
diff --git a/Documentation/arch/x86/i386/IO-APIC.rst b/Documentation/arch/x86/i386/IO-APIC.rst
new file mode 100644
index 000000000000..ce4d8df15e7c
--- /dev/null
+++ b/Documentation/arch/x86/i386/IO-APIC.rst
@@ -0,0 +1,123 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+IO-APIC
+=======
+
+:Author: Ingo Molnar <mingo@kernel.org>
+
+Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC',
+which is an enhanced interrupt controller. It enables us to route
+hardware interrupts to multiple CPUs, or to CPU groups. Without an
+IO-APIC, interrupts from hardware will be delivered only to the
+CPU which boots the operating system (usually CPU#0).
+
+Linux supports all variants of compliant SMP boards, including ones with
+multiple IO-APICs. Multiple IO-APICs are used in high-end servers to
+distribute IRQ load further.
+
+There are (a few) known breakages in certain older boards, such bugs are
+usually worked around by the kernel. If your MP-compliant SMP board does
+not boot Linux, then consult the linux-smp mailing list archives first.
+
+If your box boots fine with enabled IO-APIC IRQs, then your
+/proc/interrupts will look like this one::
+
+ hell:~> cat /proc/interrupts
+ CPU0
+ 0: 1360293 IO-APIC-edge timer
+ 1: 4 IO-APIC-edge keyboard
+ 2: 0 XT-PIC cascade
+ 13: 1 XT-PIC fpu
+ 14: 1448 IO-APIC-edge ide0
+ 16: 28232 IO-APIC-level Intel EtherExpress Pro 10/100 Ethernet
+ 17: 51304 IO-APIC-level eth0
+ NMI: 0
+ ERR: 0
+ hell:~>
+
+Some interrupts are still listed as 'XT PIC', but this is not a problem;
+none of those IRQ sources is performance-critical.
+
+
+In the unlikely case that your board does not create a working mp-table,
+you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This
+is non-trivial though and cannot be automated. One sample /etc/lilo.conf
+entry::
+
+ append="pirq=15,11,10"
+
+The actual numbers depend on your system, on your PCI cards and on their
+PCI slot position. Usually PCI slots are 'daisy chained' before they are
+connected to the PCI chipset IRQ routing facility (the incoming PIRQ1-4
+lines)::
+
+ ,-. ,-. ,-. ,-. ,-.
+ PIRQ4 ----| |-. ,-| |-. ,-| |-. ,-| |--------| |
+ |S| \ / |S| \ / |S| \ / |S| |S|
+ PIRQ3 ----|l|-. `/---|l|-. `/---|l|-. `/---|l|--------|l|
+ |o| \/ |o| \/ |o| \/ |o| |o|
+ PIRQ2 ----|t|-./`----|t|-./`----|t|-./`----|t|--------|t|
+ |1| /\ |2| /\ |3| /\ |4| |5|
+ PIRQ1 ----| |- `----| |- `----| |- `----| |--------| |
+ `-' `-' `-' `-' `-'
+
+Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD::
+
+ ,-.
+ INTD--| |
+ |S|
+ INTC--|l|
+ |o|
+ INTB--|t|
+ |x|
+ INTA--| |
+ `-'
+
+These INTA-D PCI IRQs are always 'local to the card', their real meaning
+depends on which slot they are in. If you look at the daisy chaining diagram,
+a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of
+the PCI chipset. Most cards issue INTA, this creates optimal distribution
+between the PIRQ lines. (distributing IRQ sources properly is not a
+necessity, PCI IRQs can be shared at will, but it's a good for performance
+to have non shared interrupts). Slot5 should be used for videocards, they
+do not use interrupts normally, thus they are not daisy chained either.
+
+so if you have your SCSI card (IRQ11) in Slot1, Tulip card (IRQ9) in
+Slot2, then you'll have to specify this pirq= line::
+
+ append="pirq=11,9"
+
+the following script tries to figure out such a default pirq= line from
+your PCI configuration::
+
+ echo -n pirq=; echo `scanpci | grep T_L | cut -c56-` | sed 's/ /,/g'
+
+note that this script won't work if you have skipped a few slots or if your
+board does not do default daisy-chaining. (or the IO-APIC has the PIRQ pins
+connected in some strange way). E.g. if in the above case you have your SCSI
+card (IRQ11) in Slot3, and have Slot1 empty::
+
+ append="pirq=0,9,11"
+
+[value '0' is a generic 'placeholder', reserved for empty (or non-IRQ emitting)
+slots.]
+
+Generally, it's always possible to find out the correct pirq= settings, just
+permute all IRQ numbers properly ... it will take some time though. An
+'incorrect' pirq line will cause the booting process to hang, or a device
+won't function properly (e.g. if it's inserted as a module).
+
+If you have 2 PCI buses, then you can use up to 8 pirq values, although such
+boards tend to have a good configuration.
+
+Be prepared that it might happen that you need some strange pirq line::
+
+ append="pirq=0,0,0,0,0,0,9,11"
+
+Use smart trial-and-error techniques to find out the correct pirq line ...
+
+Good luck and mail to linux-smp@vger.kernel.org or
+linux-kernel@vger.kernel.org if you have any problems that are not covered
+by this document.
+
diff --git a/Documentation/arch/x86/i386/index.rst b/Documentation/arch/x86/i386/index.rst
new file mode 100644
index 000000000000..8747cf5bbd49
--- /dev/null
+++ b/Documentation/arch/x86/i386/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+i386 Support
+============
+
+.. toctree::
+ :maxdepth: 2
+
+ IO-APIC
diff --git a/Documentation/arch/x86/ifs.rst b/Documentation/arch/x86/ifs.rst
new file mode 100644
index 000000000000..97abb696a680
--- /dev/null
+++ b/Documentation/arch/x86/ifs.rst
@@ -0,0 +1,2 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. kernel-doc:: drivers/platform/x86/intel/ifs/ifs.h
diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst
new file mode 100644
index 000000000000..f88bcfceb7f2
--- /dev/null
+++ b/Documentation/arch/x86/index.rst
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+x86-specific Documentation
+==========================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ boot
+ booting-dt
+ cpuinfo
+ topology
+ exception-tables
+ kernel-stacks
+ entry_64
+ earlyprintk
+ orc-unwinder
+ zero-page
+ tlb
+ mtrr
+ pat
+ intel-hfi
+ shstk
+ iommu
+ intel_txt
+ amd-debugging
+ amd-memory-encryption
+ amd_hsmp
+ amd-hfi
+ tdx
+ pti
+ mds
+ microcode
+ tsx_async_abort
+ buslock
+ usb-legacy-support
+ i386/index
+ x86_64/index
+ ifs
+ sva
+ sgx
+ features
+ elf_auxvec
+ xstate
diff --git a/Documentation/arch/x86/intel-hfi.rst b/Documentation/arch/x86/intel-hfi.rst
new file mode 100644
index 000000000000..49dea58ea4fb
--- /dev/null
+++ b/Documentation/arch/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
diff --git a/Documentation/intel_txt.txt b/Documentation/arch/x86/intel_txt.rst
index d83c1a2122c9..d83c1a2122c9 100644
--- a/Documentation/intel_txt.txt
+++ b/Documentation/arch/x86/intel_txt.rst
diff --git a/Documentation/arch/x86/iommu.rst b/Documentation/arch/x86/iommu.rst
new file mode 100644
index 000000000000..41fbadfe2221
--- /dev/null
+++ b/Documentation/arch/x86/iommu.rst
@@ -0,0 +1,151 @@
+=================
+x86 IOMMU Support
+=================
+
+The architecture specs can be obtained from the below locations.
+
+- Intel: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
+- AMD: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_3_07_PUB.pdf
+
+This guide gives a quick cheat sheet for some basic understanding.
+
+Basic stuff
+-----------
+
+ACPI enumerates and lists the different IOMMUs on the platform, and
+device scope relationships between devices and which IOMMU controls
+them.
+
+Some ACPI Keywords:
+
+- DMAR - Intel DMA Remapping table
+- DRHD - Intel DMA Remapping Hardware Unit Definition
+- RMRR - Intel Reserved Memory Region Reporting Structure
+- IVRS - AMD I/O Virtualization Reporting Structure
+- IVDB - AMD I/O Virtualization Definition Block
+- IVHD - AMD I/O Virtualization Hardware Definition
+
+What is Intel RMRR?
+^^^^^^^^^^^^^^^^^^^
+
+There are some devices the BIOS controls, for e.g USB devices to perform
+PS2 emulation. The regions of memory used for these devices are marked
+reserved in the e820 map. When we turn on DMA translation, DMA to those
+regions will fail. Hence BIOS uses RMRR to specify these regions along with
+devices that need to access these regions. OS is expected to setup
+unity mappings for these regions for these devices to access these regions.
+
+What is AMD IVRS?
+^^^^^^^^^^^^^^^^^
+
+The architecture defines an ACPI-compatible data structure called an I/O
+Virtualization Reporting Structure (IVRS) that is used to convey information
+related to I/O virtualization to system software. The IVRS describes the
+configuration and capabilities of the IOMMUs contained in the platform as
+well as information about the devices that each IOMMU virtualizes.
+
+The IVRS provides information about the following:
+
+- IOMMUs present in the platform including their capabilities and proper configuration
+- System I/O topology relevant to each IOMMU
+- Peripheral devices that cannot be otherwise enumerated
+- Memory regions used by SMI/SMM, platform firmware, and platform hardware. These are generally exclusion ranges to be configured by system software.
+
+How is an I/O Virtual Address (IOVA) generated?
+-----------------------------------------------
+
+Well behaved drivers call dma_map_*() calls before sending command to device
+that needs to perform DMA. Once DMA is completed and mapping is no longer
+required, driver performs dma_unmap_*() calls to unmap the region.
+
+Intel Specific Notes
+--------------------
+
+Graphics Problems?
+^^^^^^^^^^^^^^^^^^
+
+If you encounter issues with graphics devices, you can try adding
+option intel_iommu=igfx_off to turn off the integrated graphics engine.
+If this fixes anything, please ensure you file a bug reporting the problem.
+
+Some exceptions to IOVA
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
+The same is true for peer to peer transactions. Hence we reserve the
+address from PCI MMIO ranges so they are not allocated for IOVA addresses.
+
+AMD Specific Notes
+------------------
+
+Graphics Problems?
+^^^^^^^^^^^^^^^^^^
+
+If you encounter issues with integrated graphics devices, you can try adding
+option iommu=pt to the kernel command line use a 1:1 mapping for the IOMMU. If
+this fixes anything, please ensure you file a bug reporting the problem.
+
+Fault reporting
+---------------
+When errors are reported, the IOMMU signals via an interrupt. The fault
+reason and device that caused it is printed on the console.
+
+
+Kernel Log Samples
+------------------
+
+Intel Boot Messages
+^^^^^^^^^^^^^^^^^^^
+
+Something like this gets printed indicating presence of DMAR tables
+in ACPI:
+
+::
+
+ ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
+
+When DMAR is being processed and initialized by ACPI, prints DMAR locations
+and any RMRR's processed:
+
+::
+
+ ACPI DMAR:Host address width 36
+ ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
+ ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
+ ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
+ ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
+ ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
+
+When DMAR is enabled for use, you will notice:
+
+::
+
+ PCI-DMA: Using DMAR IOMMU
+
+Intel Fault reporting
+^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+ DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+ DMAR:[fault reason 05] PTE Write access is not set
+ DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+ DMAR:[fault reason 05] PTE Write access is not set
+
+AMD Boot Messages
+^^^^^^^^^^^^^^^^^
+
+Something like this gets printed indicating presence of the IOMMU:
+
+::
+
+ iommu: Default domain type: Translated
+ iommu: DMA domain TLB invalidation policy: lazy mode
+
+AMD Fault reporting
+^^^^^^^^^^^^^^^^^^^
+
+::
+
+ AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0007 address=0xffffc02000 flags=0x0000]
+ AMD-Vi: Event logged [IO_PAGE_FAULT device=07:00.0 domain=0x0007 address=0xffffc02000 flags=0x0000]
diff --git a/Documentation/arch/x86/kernel-stacks.rst b/Documentation/arch/x86/kernel-stacks.rst
new file mode 100644
index 000000000000..738671a4070b
--- /dev/null
+++ b/Documentation/arch/x86/kernel-stacks.rst
@@ -0,0 +1,152 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Kernel Stacks
+=============
+
+Kernel stacks on x86-64 bit
+===========================
+
+Most of the text from Keith Owens, hacked by AK
+
+x86_64 page size (PAGE_SIZE) is 4K.
+
+Like all other architectures, x86_64 has a kernel stack for every
+active thread. These thread stacks are THREAD_SIZE (4*PAGE_SIZE) big.
+These stacks contain useful data as long as a thread is alive or a
+zombie. While the thread is in user space the kernel stack is empty
+except for the thread_info structure at the bottom.
+
+In addition to the per thread stacks, there are specialized stacks
+associated with each CPU. These stacks are only used while the kernel
+is in control on that CPU; when a CPU returns to user space the
+specialized stacks contain no useful data. The main CPU stacks are:
+
+* Interrupt stack. IRQ_STACK_SIZE
+
+ Used for external hardware interrupts. If this is the first external
+ hardware interrupt (i.e. not a nested hardware interrupt) then the
+ kernel switches from the current task to the interrupt stack. Like
+ the split thread and interrupt stacks on i386, this gives more room
+ for kernel interrupt processing without having to increase the size
+ of every per thread stack.
+
+ The interrupt stack is also used when processing a softirq.
+
+Switching to the kernel interrupt stack is done by software based on a
+per CPU interrupt nest counter. This is needed because x86-64 "IST"
+hardware stacks cannot nest without races.
+
+x86_64 also has a feature which is not available on i386, the ability
+to automatically switch to a new stack for designated events such as
+double fault or NMI, which makes it easier to handle these unusual
+events on x86_64. This feature is called the Interrupt Stack Table
+(IST). There can be up to 7 IST entries per CPU. The IST code is an
+index into the Task State Segment (TSS). The IST entries in the TSS
+point to dedicated stacks; each stack can be a different size.
+
+An IST is selected by a non-zero value in the IST field of an
+interrupt-gate descriptor. When an interrupt occurs and the hardware
+loads such a descriptor, the hardware automatically sets the new stack
+pointer based on the IST value, then invokes the interrupt handler. If
+the interrupt came from user mode, then the interrupt handler prologue
+will switch back to the per-thread stack. If software wants to allow
+nested IST interrupts then the handler must adjust the IST values on
+entry to and exit from the interrupt handler. (This is occasionally
+done, e.g. for debug exceptions.)
+
+Events with different IST codes (i.e. with different stacks) can be
+nested. For example, a debug interrupt can safely be interrupted by an
+NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
+pointers on entry to and exit from all IST events, in theory allowing
+IST events with the same code to be nested. However in most cases, the
+stack size allocated to an IST assumes no nesting for the same code.
+If that assumption is ever broken then the stacks will become corrupt.
+
+The currently assigned IST stacks are:
+
+* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 8 - Double Fault Exception (#DF).
+
+ Invoked when handling one exception causes another exception. Happens
+ when the kernel is very confused (e.g. kernel stack pointer corrupt).
+ Using a separate stack allows the kernel to recover from it well enough
+ in many cases to still output an oops.
+
+* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for non-maskable interrupts (NMI).
+
+ NMI can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for NMI events avoids making
+ assumptions about the previous state of the kernel stack.
+
+* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for hardware debug interrupts (interrupt 1) and for software
+ debug interrupts (INT3).
+
+ When debugging a kernel, debug interrupts (both hardware and
+ software) can occur at any time. Using IST for these interrupts
+ avoids making assumptions about the previous state of the kernel
+ stack.
+
+ To handle nested #DB correctly there exist two instances of DB stacks. On
+ #DB entry the IST stackpointer for #DB is switched to the second instance
+ so a nested #DB starts from a clean stack. The nested #DB switches
+ the IST stackpointer to a guard hole to catch triple nesting.
+
+* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 18 - Machine Check Exception (#MC).
+
+ MCE can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for MCE events avoids making
+ assumptions about the previous state of the kernel stack.
+
+For more details see the Intel IA32 or AMD AMD64 architecture manuals.
+
+
+Printing backtraces on x86
+==========================
+
+The question about the '?' preceding function names in an x86 stacktrace
+keeps popping up, here's an indepth explanation. It helps if the reader
+stares at print_context_stack() and the whole machinery in and around
+arch/x86/kernel/dumpstack.c.
+
+Adapted from Ingo's mail, Message-ID: <20150521101614.GA10889@gmail.com>:
+
+We always scan the full kernel stack for return addresses stored on
+the kernel stack(s) [1]_, from stack top to stack bottom, and print out
+anything that 'looks like' a kernel text address.
+
+If it fits into the frame pointer chain, we print it without a question
+mark, knowing that it's part of the real backtrace.
+
+If the address does not fit into our expected frame pointer chain we
+still print it, but we print a '?'. It can mean two things:
+
+ - either the address is not part of the call chain: it's just stale
+ values on the kernel stack, from earlier function calls. This is
+ the common case.
+
+ - or it is part of the call chain, but the frame pointer was not set
+ up properly within the function, so we don't recognize it.
+
+This way we will always print out the real call chain (plus a few more
+entries), regardless of whether the frame pointer was set up correctly
+or not - but in most cases we'll get the call chain right as well. The
+entries printed are strictly in stack order, so you can deduce more
+information from that as well.
+
+The most important property of this method is that we _never_ lose
+information: we always strive to print _all_ addresses on the stack(s)
+that look like kernel text addresses, so if debug information is wrong,
+we still print out the real call chain as well - just with more question
+marks than ideal.
+
+.. [1] For things like IRQ and IST stacks, we also scan those stacks, in
+ the right order, and try to cross from one stack into another
+ reconstructing the call chain. This works most of the time.
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst
new file mode 100644
index 000000000000..3518671e1a85
--- /dev/null
+++ b/Documentation/arch/x86/mds.rst
@@ -0,0 +1,209 @@
+Microarchitectural Data Sampling (MDS) mitigation
+=================================================
+
+.. _mds:
+
+Overview
+--------
+
+Microarchitectural Data Sampling (MDS) is a family of side channel attacks
+on internal buffers in Intel CPUs. The variants are:
+
+ - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
+ - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
+ - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
+ - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
+
+MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
+dependent load (store-to-load forwarding) as an optimization. The forward
+can also happen to a faulting or assisting load operation for a different
+memory address, which can be exploited under certain conditions. Store
+buffers are partitioned between Hyper-Threads so cross thread forwarding is
+not possible. But if a thread enters or exits a sleep state the store
+buffer is repartitioned which can expose data from one thread to the other.
+
+MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
+L1 miss situations and to hold data which is returned or sent in response
+to a memory or I/O operation. Fill buffers can forward data to a load
+operation and also write data to the cache. When the fill buffer is
+deallocated it can retain the stale data of the preceding operations which
+can then be forwarded to a faulting or assisting load operation, which can
+be exploited under certain conditions. Fill buffers are shared between
+Hyper-Threads so cross thread leakage is possible.
+
+MLPDS leaks Load Port Data. Load ports are used to perform load operations
+from memory or I/O. The received data is then forwarded to the register
+file or a subsequent operation. In some implementations the Load Port can
+contain stale data from a previous operation which can be forwarded to
+faulting or assisting loads under certain conditions, which again can be
+exploited eventually. Load ports are shared between Hyper-Threads so cross
+thread leakage is possible.
+
+MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
+memory that takes a fault or assist can leave data in a microarchitectural
+structure that may later be observed using one of the same methods used by
+MSBDS, MFBDS or MLPDS.
+
+Exposure assumptions
+--------------------
+
+It is assumed that attack code resides in user space or in a guest with one
+exception. The rationale behind this assumption is that the code construct
+needed for exploiting MDS requires:
+
+ - to control the load to trigger a fault or assist
+
+ - to have a disclosure gadget which exposes the speculatively accessed
+ data for consumption through a side channel.
+
+ - to control the pointer through which the disclosure gadget exposes the
+ data
+
+The existence of such a construct in the kernel cannot be excluded with
+100% certainty, but the complexity involved makes it extremely unlikely.
+
+There is one exception, which is untrusted BPF. The functionality of
+untrusted BPF is limited, but it needs to be thoroughly investigated
+whether it can be used to create such a construct.
+
+
+Mitigation strategy
+-------------------
+
+All variants have the same mitigation strategy at least for the single CPU
+thread case (SMT off): Force the CPU to clear the affected buffers.
+
+This is achieved by using the otherwise unused and obsolete VERW
+instruction in combination with a microcode update. The microcode clears
+the affected CPU buffers when the VERW instruction is executed.
+
+For virtualization there are two ways to achieve CPU buffer
+clearing. Either the modified VERW instruction or via the L1D Flush
+command. The latter is issued when L1TF mitigation is enabled so the extra
+VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
+be issued.
+
+If the VERW instruction with the supplied segment selector argument is
+executed on a CPU without the microcode update there is no side effect
+other than a small number of pointlessly wasted CPU cycles.
+
+This does not protect against cross Hyper-Thread attacks except for MSBDS
+which is only exploitable cross Hyper-thread when one of the Hyper-Threads
+enters a C-state.
+
+The kernel provides a function to invoke the buffer clearing:
+
+ x86_clear_cpu_buffers()
+
+Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
+Other than CFLAGS.ZF, this macro doesn't clobber any registers.
+
+The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
+(idle) transitions.
+
+As a special quirk to address virtualization scenarios where the host has
+the microcode updated, but the hypervisor does not (yet) expose the
+MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
+hope that it might actually clear the buffers. The state is reflected
+accordingly.
+
+According to current knowledge additional mitigations inside the kernel
+itself are not required because the necessary gadgets to expose the leaked
+data cannot be controlled in a way which allows exploitation from malicious
+user space or VM guests.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ======= ============================================================
+ off Mitigation is disabled. Either the CPU is not affected or
+ mds=off is supplied on the kernel command line
+
+ full Mitigation is enabled. CPU is affected and MD_CLEAR is
+ advertised in CPUID.
+
+ vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
+ advertised in CPUID. That is mainly for virtualization
+ scenarios where the host has the updated microcode but the
+ hypervisor does not expose MD_CLEAR in CPUID. It's a best
+ effort approach without guarantee.
+ ======= ============================================================
+
+If the CPU is affected and mds=off is not supplied on the kernel command
+line then the kernel selects the appropriate mitigation mode depending on
+the availability of the MD_CLEAR CPUID bit.
+
+Mitigation points
+-----------------
+
+1. Return to user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ When transitioning from kernel to user space the CPU buffers are flushed
+ on affected CPUs when the mitigation is not disabled on the kernel
+ command line. The mitigation is enabled through the feature flag
+ X86_FEATURE_CLEAR_CPU_BUF.
+
+ The mitigation is invoked just before transitioning to userspace after
+ user registers are restored. This is done to minimize the window in
+ which kernel data could be accessed after VERW e.g. via an NMI after
+ VERW.
+
+ **Corner case not handled**
+ Interrupts returning to kernel don't clear CPUs buffers since the
+ exit-to-user path is expected to do that anyways. But, there could be
+ a case when an NMI is generated in kernel after the exit-to-user path
+ has cleared the buffers. This case is not handled and NMI returning to
+ kernel don't clear CPU buffers because:
+
+ 1. It is rare to get an NMI after VERW, but before returning to userspace.
+ 2. For an unprivileged user, there is no known way to make that NMI
+ less rare or target it.
+ 3. It would take a large number of these precisely-timed NMIs to mount
+ an actual attack. There's presumably not enough bandwidth.
+ 4. The NMI in question occurs after a VERW, i.e. when user state is
+ restored and most interesting data is already scrubbed. What's left
+ is only the data that NMI touches, and that may or may not be of
+ any interest.
+
+
+2. C-State transition
+^^^^^^^^^^^^^^^^^^^^^
+
+ When a CPU goes idle and enters a C-State the CPU buffers need to be
+ cleared on affected CPUs when SMT is active. This addresses the
+ repartitioning of the store buffer when one of the Hyper-Threads enters
+ a C-State.
+
+ When SMT is inactive, i.e. either the CPU does not support it or all
+ sibling threads are offline CPU buffer clearing is not required.
+
+ The idle clearing is enabled on CPUs which are only affected by MSBDS
+ and not by any other MDS variant. The other MDS variants cannot be
+ protected against cross Hyper-Thread attacks because the Fill Buffer and
+ the Load Ports are shared. So on CPUs affected by other variants, the
+ idle clearing would be a window dressing exercise and is therefore not
+ activated.
+
+ The invocation is controlled by the static key cpu_buf_idle_clear which is
+ switched depending on the chosen mitigation mode and the SMT state of the
+ system.
+
+ The buffer clear is only invoked before entering the C-State to prevent
+ that stale data from the idling CPU from spilling to the Hyper-Thread
+ sibling after the store buffer got repartitioned and all entries are
+ available to the non idle sibling.
+
+ When coming out of idle the store buffer is partitioned again so each
+ sibling has half of it available. The back from idle CPU could be then
+ speculatively exposed to contents of the sibling. The buffers are
+ flushed either on exit to user space or on VMENTER so malicious code
+ in user space or the guest cannot speculatively access them.
+
+ The mitigation is hooked into all variants of halt()/mwait(), but does
+ not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
+ has been superseded by the intel_idle driver around 2010 and is
+ preferred on all affected CPUs which are expected to gain the MD_CLEAR
+ functionality in microcode. Aside of that the IO-Port mechanism is a
+ legacy interface which is only used on older systems which are either
+ not affected or do not receive microcode updates anymore.
diff --git a/Documentation/arch/x86/microcode.rst b/Documentation/arch/x86/microcode.rst
new file mode 100644
index 000000000000..b627c6f36bcf
--- /dev/null
+++ b/Documentation/arch/x86/microcode.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+The Linux Microcode Loader
+==========================
+
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+ - Borislav Petkov <bp@suse.de>
+ - Ashok Raj <ashok.raj@intel.com>
+
+The kernel has a x86 microcode loading facility which is supposed to
+provide microcode loading methods in the OS. Potential use cases are
+updating the microcode on platforms beyond the OEM End-Of-Life support,
+and updating the microcode on long-running systems without rebooting.
+
+The loader supports three loading methods:
+
+Early load microcode
+====================
+
+The kernel can update microcode very early during boot. Loading
+microcode early can fix CPU issues before they are observed during
+kernel boot time.
+
+The microcode is stored in an initrd file. During boot, it is read from
+it and loaded into the CPU cores.
+
+The format of the combined initrd image is microcode in (uncompressed)
+cpio format followed by the (possibly compressed) initrd image. The
+loader parses the combined initrd image during boot.
+
+The microcode files in cpio name space are:
+
+on Intel:
+ kernel/x86/microcode/GenuineIntel.bin
+on AMD :
+ kernel/x86/microcode/AuthenticAMD.bin
+
+During BSP (BootStrapping Processor) boot (pre-SMP), the kernel
+scans the microcode file in the initrd. If microcode matching the
+CPU is found, it will be applied in the BSP and later on in all APs
+(Application Processors).
+
+The loader also saves the matching microcode for the CPU in memory.
+Thus, the cached microcode patch is applied when CPUs resume from a
+sleep state.
+
+Here's a crude example how to prepare an initrd with microcode (this is
+normally done automatically by the distribution, when recreating the
+initrd, so you don't really have to do it yourself. It is documented
+here for future reference only).
+::
+
+ #!/bin/bash
+
+ if [ -z "$1" ]; then
+ echo "You need to supply an initrd file"
+ exit 1
+ fi
+
+ INITRD="$1"
+
+ DSTDIR=kernel/x86/microcode
+ TMPDIR=/tmp/initrd
+
+ rm -rf $TMPDIR
+
+ mkdir $TMPDIR
+ cd $TMPDIR
+ mkdir -p $DSTDIR
+
+ if [ -d /lib/firmware/amd-ucode ]; then
+ cat /lib/firmware/amd-ucode/microcode_amd*.bin > $DSTDIR/AuthenticAMD.bin
+ fi
+
+ if [ -d /lib/firmware/intel-ucode ]; then
+ cat /lib/firmware/intel-ucode/* > $DSTDIR/GenuineIntel.bin
+ fi
+
+ find . | cpio -o -H newc >../ucode.cpio
+ cd ..
+ mv $INITRD $INITRD.orig
+ cat ucode.cpio $INITRD.orig > $INITRD
+
+ rm -rf $TMPDIR
+
+
+The system needs to have the microcode packages installed into
+/lib/firmware or you need to fixup the paths above if yours are
+somewhere else and/or you've downloaded them directly from the processor
+vendor's site.
+
+Late loading
+============
+
+You simply install the microcode packages your distro supplies and
+run::
+
+ # echo 1 > /sys/devices/system/cpu/microcode/reload
+
+as root.
+
+The loading mechanism looks for microcode blobs in
+/lib/firmware/{intel-ucode,amd-ucode}. The default distro installation
+packages already put them there.
+
+Since kernel 5.19, late loading is not enabled by default.
+
+The /dev/cpu/microcode method has been removed in 5.19.
+
+Why is late loading dangerous?
+==============================
+
+Synchronizing all CPUs
+----------------------
+
+The microcode engine which receives the microcode update is shared
+between the two logical threads in a SMT system. Therefore, when
+the update is executed on one SMT thread of the core, the sibling
+"automatically" gets the update.
+
+Since the microcode can "simulate" MSRs too, while the microcode update
+is in progress, those simulated MSRs transiently cease to exist. This
+can result in unpredictable results if the SMT sibling thread happens to
+be in the middle of an access to such an MSR. The usual observation is
+that such MSR accesses cause #GPs to be raised to signal that former are
+not present.
+
+The disappearing MSRs are just one common issue which is being observed.
+Any other instruction that's being patched and gets concurrently
+executed by the other SMT sibling, can also result in similar,
+unpredictable behavior.
+
+To eliminate this case, a stop_machine()-based CPU synchronization was
+introduced as a way to guarantee that all logical CPUs will not execute
+any code but just wait in a spin loop, polling an atomic variable.
+
+While this took care of device or external interrupts, IPIs including
+LVT ones, such as CMCI etc, it cannot address other special interrupts
+that can't be shut off. Those are Machine Check (#MC), System Management
+(#SMI) and Non-Maskable interrupts (#NMI).
+
+Machine Checks
+--------------
+
+Machine Checks (#MC) are non-maskable. There are two kinds of MCEs.
+Fatal un-recoverable MCEs and recoverable MCEs. While un-recoverable
+errors are fatal, recoverable errors can also happen in kernel context
+are also treated as fatal by the kernel.
+
+On certain Intel machines, MCEs are also broadcast to all threads in a
+system. If one thread is in the middle of executing WRMSR, a MCE will be
+taken at the end of the flow. Either way, they will wait for the thread
+performing the wrmsr(0x79) to rendezvous in the MCE handler and shutdown
+eventually if any of the threads in the system fail to check in to the
+MCE rendezvous.
+
+To be paranoid and get predictable behavior, the OS can choose to set
+MCG_STATUS.MCIP. Since MCEs can be at most one in a system, if an
+MCE was signaled, the above condition will promote to a system reset
+automatically. OS can turn off MCIP at the end of the update for that
+core.
+
+System Management Interrupt
+---------------------------
+
+SMIs are also broadcast to all CPUs in the platform. Microcode update
+requests exclusive access to the core before writing to MSR 0x79. So if
+it does happen such that, one thread is in WRMSR flow, and the 2nd got
+an SMI, that thread will be stopped in the first instruction in the SMI
+handler.
+
+Since the secondary thread is stopped in the first instruction in SMI,
+there is very little chance that it would be in the middle of executing
+an instruction being patched. Plus OS has no way to stop SMIs from
+happening.
+
+Non-Maskable Interrupts
+-----------------------
+
+When thread0 of a core is doing the microcode update, if thread1 is
+pulled into NMI, that can cause unpredictable behavior due to the
+reasons above.
+
+OS can choose a variety of methods to avoid running into this situation.
+
+
+Is the microcode suitable for late loading?
+-------------------------------------------
+
+Late loading is done when the system is fully operational and running
+real workloads. Late loading behavior depends on what the base patch on
+the CPU is before upgrading to the new patch.
+
+This is true for Intel CPUs.
+
+Consider, for example, a CPU has patch level 1 and the update is to
+patch level 3.
+
+Between patch1 and patch3, patch2 might have deprecated a software-visible
+feature.
+
+This is unacceptable if software is even potentially using that feature.
+For instance, say MSR_X is no longer available after an update,
+accessing that MSR will cause a #GP fault.
+
+Basically there is no way to declare a new microcode update suitable
+for late-loading. This is another one of the problems that caused late
+loading to be not enabled by default.
+
+Builtin microcode
+=================
+
+The loader supports also loading of a builtin microcode supplied through
+the regular builtin firmware method CONFIG_EXTRA_FIRMWARE. Only 64-bit is
+currently supported.
+
+Here's an example::
+
+ CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
+ CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
+
+This basically means, you have the following tree structure locally::
+
+ /lib/firmware/
+ |-- amd-ucode
+ ...
+ | |-- microcode_amd_fam15h.bin
+ ...
+ |-- intel-ucode
+ ...
+ | |-- 06-3a-09
+ ...
+
+so that the build system can find those files and integrate them into
+the final kernel image. The early loader finds them and applies them.
+
+Needless to say, this method is not the most flexible one because it
+requires rebuilding the kernel each time updated microcode from the CPU
+vendor is available.
diff --git a/Documentation/arch/x86/mtrr.rst b/Documentation/arch/x86/mtrr.rst
new file mode 100644
index 000000000000..f65ef034da7a
--- /dev/null
+++ b/Documentation/arch/x86/mtrr.rst
@@ -0,0 +1,354 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+MTRR (Memory Type Range Register) control
+=========================================
+
+:Authors: - Richard Gooch <rgooch@atnf.csiro.au> - 3 Jun 1999
+ - Luis R. Rodriguez <mcgrof@do-not-panic.com> - April 9, 2015
+
+
+Phasing out MTRR use
+====================
+
+MTRR use is replaced on modern x86 hardware with PAT. Direct MTRR use by
+drivers on Linux is now completely phased out, device drivers should use
+arch_phys_wc_add() in combination with ioremap_wc() to make MTRR effective on
+non-PAT systems while a no-op but equally effective on PAT enabled systems.
+
+Even if Linux does not use MTRRs directly, some x86 platform firmware may still
+set up MTRRs early before booting the OS. They do this as some platform
+firmware may still have implemented access to MTRRs which would be controlled
+and handled by the platform firmware directly. An example of platform use of
+MTRRs is through the use of SMI handlers, one case could be for fan control,
+the platform code would need uncachable access to some of its fan control
+registers. Such platform access does not need any Operating System MTRR code in
+place other than mtrr_type_lookup() to ensure any OS specific mapping requests
+are aligned with platform MTRR setup. If MTRRs are only set up by the platform
+firmware code though and the OS does not make any specific MTRR mapping
+requests mtrr_type_lookup() should always return MTRR_TYPE_INVALID.
+
+For details refer to Documentation/arch/x86/pat.rst.
+
+.. tip::
+ On Intel P6 family processors (Pentium Pro, Pentium II and later)
+ the Memory Type Range Registers (MTRRs) may be used to control
+ processor access to memory ranges. This is most useful when you have
+ a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+ allows bus write transfers to be combined into a larger transfer
+ before bursting over the PCI/AGP bus. This can increase performance
+ of image write operations 2.5 times or more.
+
+ The Cyrix 6x86, 6x86MX and M II processors have Address Range
+ Registers (ARRs) which provide a similar functionality to MTRRs. For
+ these, the ARRs are used to emulate the MTRRs.
+
+ The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+ MTRRs. These are supported. The AMD Athlon family provide 8 Intel
+ style MTRRs.
+
+ The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
+ are supported.
+
+ The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs.
+
+ The CONFIG_MTRR option creates a /proc/mtrr file which may be used
+ to manipulate your MTRRs. Typically the X server should use
+ this. This should have a reasonably generic interface so that
+ similar control registers on other processors can be easily
+ supported.
+
+There are two interfaces to /proc/mtrr: one is an ASCII interface
+which allows you to read and write. The other is an ioctl()
+interface. The ASCII interface is meant for administration. The
+ioctl() interface is meant for C programs (i.e. the X server). The
+interfaces are described below, with sample commands and C code.
+
+
+Reading MTRRs from the shell
+============================
+::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+
+Creating MTRRs from the C-shell::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr
+
+or if you use bash::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr
+
+And the result thereof::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+ reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1
+
+This is for video RAM at base address 0xf8000000 and size 4 megabytes. To
+find out your base address, you need to look at the output of your X
+server, which tells you where the linear framebuffer address is. A
+typical line that you may get is::
+
+ (--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000
+
+Note that you should only use the value from the X server, as it may
+move the framebuffer base address, so the only value you can trust is
+that reported by the X server.
+
+To find out the size of your framebuffer (what, you don't actually
+know?), the following line will tell you::
+
+ (--) S3: videoram: 4096k
+
+That's 4 megabytes, which is 0x400000 bytes (in hexadecimal).
+A patch is being written for XFree86 which will make this automatic:
+in other words the X server will manipulate /proc/mtrr using the
+ioctl() interface, so users won't have to do anything. If you use a
+commercial X server, lobby your vendor to add support for MTRRs.
+
+
+Creating overlapping MTRRs
+==========================
+::
+
+ %echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr
+ %echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr
+
+And the results::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
+ reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
+ reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
+
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
+excluded from the beginning of the region because it is used for
+registers.
+
+NOTE: You can only create type=uncachable region, if the first
+region that you created is type=write-combining.
+
+
+Removing MTRRs from the C-shel
+==============================
+::
+
+ % echo "disable=2" >! /proc/mtrr
+
+or using bash::
+
+ % echo "disable=2" >| /proc/mtrr
+
+
+Reading MTRRs from a C program using ioctl()'s
+==============================================
+::
+
+ /* mtrr-show.c
+
+ Source file for mtrr-show (example program to show MTRRs using ioctl()'s)
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This program will use an ioctl() on /proc/mtrr to show the current MTRR
+ settings. This is an alternative to reading /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main ()
+ {
+ int fd;
+ struct mtrr_gentry gentry;
+
+ if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (1);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (2);
+ }
+ for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0;
+ ++gentry.regnum)
+ {
+ if (gentry.size < 1)
+ {
+ fprintf (stderr, "Register: %u disabled\n", gentry.regnum);
+ continue;
+ }
+ fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n",
+ gentry.regnum, gentry.base, gentry.size,
+ mtrr_strings[gentry.type]);
+ }
+ if (errno == EINVAL) exit (0);
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (3);
+ } /* End Function main */
+
+
+Creating MTRRs from a C programme using ioctl()'s
+=================================================
+::
+
+ /* mtrr-add.c
+
+ Source file for mtrr-add (example programme to add an MTRRs using ioctl())
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This programme will use an ioctl() on /proc/mtrr to add an entry. The first
+ available mtrr is used. This is an alternative to writing /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <string.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main (int argc, char **argv)
+ {
+ int fd;
+ struct mtrr_sentry sentry;
+
+ if (argc != 4)
+ {
+ fprintf (stderr, "Usage:\tmtrr-add base size type\n");
+ exit (1);
+ }
+ sentry.base = strtoul (argv[1], NULL, 0);
+ sentry.size = strtoul (argv[2], NULL, 0);
+ for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type)
+ {
+ if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break;
+ }
+ if (sentry.type >= MTRR_NUM_TYPES)
+ {
+ fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]);
+ exit (2);
+ }
+ if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (3);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (4);
+ }
+ if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1)
+ {
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (5);
+ }
+ fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n");
+ sleep (5);
+ close (fd);
+ fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n",
+ stderr);
+ } /* End Function main */
diff --git a/Documentation/arch/x86/orc-unwinder.rst b/Documentation/arch/x86/orc-unwinder.rst
new file mode 100644
index 000000000000..cdb257015bd9
--- /dev/null
+++ b/Documentation/arch/x86/orc-unwinder.rst
@@ -0,0 +1,182 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+ORC unwinder
+============
+
+Overview
+========
+
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
+similar in concept to a DWARF unwinder. The difference is that the
+format of the ORC data is much simpler than DWARF, which in turn allows
+the ORC unwinder to be much simpler and faster.
+
+The ORC data consists of unwind tables which are generated by objtool.
+They contain out-of-band data which is used by the in-kernel ORC
+unwinder. Objtool generates the ORC data by first doing compile-time
+stack metadata validation (CONFIG_STACK_VALIDATION). After analyzing
+all the code paths of a .o file, it determines information about the
+stack state at each instruction address in the file and outputs that
+information to the .orc_unwind and .orc_unwind_ip sections.
+
+The per-object ORC sections are combined at link time and are sorted and
+post-processed at boot time. The unwinder uses the resulting data to
+correlate instruction addresses with their stack states at run time.
+
+
+ORC vs frame pointers
+=====================
+
+With frame pointers enabled, GCC adds instrumentation code to every
+function in the kernel. The kernel's .text size increases by about
+3.2%, resulting in a broad kernel-wide slowdown. Measurements by Mel
+Gorman [1]_ have shown a slowdown of 5-10% for some workloads.
+
+In contrast, the ORC unwinder has no effect on text size or runtime
+performance, because the debuginfo is out of band. So if you disable
+frame pointers and enable the ORC unwinder, you get a nice performance
+improvement across the board, and still have reliable stack traces.
+
+Ingo Molnar says:
+
+ "Note that it's not just a performance improvement, but also an
+ instruction cache locality improvement: 3.2% .text savings almost
+ directly transform into a similarly sized reduction in cache
+ footprint. That can transform to even higher speedups for workloads
+ whose cache locality is borderline."
+
+Another benefit of ORC compared to frame pointers is that it can
+reliably unwind across interrupts and exceptions. Frame pointer based
+unwinds can sometimes skip the caller of the interrupted function, if it
+was a leaf function or if the interrupt hit before the frame pointer was
+saved.
+
+The main disadvantage of the ORC unwinder compared to frame pointers is
+that it needs more memory to store the ORC unwind tables: roughly 2-4MB
+depending on the kernel config.
+
+
+ORC vs DWARF
+============
+
+ORC debuginfo's advantage over DWARF itself is that it's much simpler.
+It gets rid of the complex DWARF CFI state machine and also gets rid of
+the tracking of unnecessary registers. This allows the unwinder to be
+much simpler, meaning fewer bugs, which is especially important for
+mission critical oops code.
+
+The simpler debuginfo format also enables the unwinder to be much faster
+than DWARF, which is important for perf and lockdep. In a basic
+performance test by Jiri Slaby [2]_, the ORC unwinder was about 20x
+faster than an out-of-tree DWARF unwinder. (Note: That measurement was
+taken before some performance tweaks were added, which doubled
+performance, so the speedup over DWARF may be closer to 40x.)
+
+The ORC data format does have a few downsides compared to DWARF. ORC
+unwind tables take up ~50% more RAM (+1.3MB on an x86 defconfig kernel)
+than DWARF-based eh_frame tables.
+
+Another potential downside is that, as GCC evolves, it's conceivable
+that the ORC data may end up being *too* simple to describe the state of
+the stack for certain optimizations. But IMO this is unlikely because
+GCC saves the frame pointer for any unusual stack adjustments it does,
+so I suspect we'll really only ever need to keep track of the stack
+pointer and the frame pointer between call frames. But even if we do
+end up having to track all the registers DWARF tracks, at least we will
+still be able to control the format, e.g. no complex state machines.
+
+
+ORC unwind table generation
+===========================
+
+The ORC data is generated by objtool. With the existing compile-time
+stack metadata validation feature, objtool already follows all code
+paths, and so it already has all the information it needs to be able to
+generate ORC data from scratch. So it's an easy step to go from stack
+validation to ORC data generation.
+
+It should be possible to instead generate the ORC data with a simple
+tool which converts DWARF to ORC data. However, such a solution would
+be incomplete due to the kernel's extensive use of asm, inline asm, and
+special sections like exception tables.
+
+That could be rectified by manually annotating those special code paths
+using GNU assembler .cfi annotations in .S files, and homegrown
+annotations for inline asm in .c files. But asm annotations were tried
+in the past and were found to be unmaintainable. They were often
+incorrect/incomplete and made the code harder to read and keep updated.
+And based on looking at glibc code, annotating inline asm in .c files
+might be even worse.
+
+Objtool still needs a few annotations, but only in code which does
+unusual things to the stack like entry code. And even then, far fewer
+annotations are needed than what DWARF would need, so they're much more
+maintainable than DWARF CFI annotations.
+
+So the advantages of using objtool to generate ORC data are that it
+gives more accurate debuginfo, with very few annotations. It also
+insulates the kernel from toolchain bugs which can be very painful to
+deal with in the kernel since we often have to workaround issues in
+older versions of the toolchain for years.
+
+The downside is that the unwinder now becomes dependent on objtool's
+ability to reverse engineer GCC code flow. If GCC optimizations become
+too complicated for objtool to follow, the ORC data generation might
+stop working or become incomplete. (It's worth noting that livepatch
+already has such a dependency on objtool's ability to follow GCC code
+flow.)
+
+If newer versions of GCC come up with some optimizations which break
+objtool, we may need to revisit the current implementation. Some
+possible solutions would be asking GCC to make the optimizations more
+palatable, or having objtool use DWARF as an additional input, or
+creating a GCC plugin to assist objtool with its analysis. But for now,
+objtool follows GCC code quite well.
+
+
+Unwinder implementation details
+===============================
+
+Objtool generates the ORC data by integrating with the compile-time
+stack metadata validation feature, which is described in detail in
+tools/objtool/Documentation/objtool.txt. After analyzing all
+the code paths of a .o file, it creates an array of orc_entry structs,
+and a parallel array of instruction addresses associated with those
+structs, and writes them to the .orc_unwind and .orc_unwind_ip sections
+respectively.
+
+The ORC data is split into the two arrays for performance reasons, to
+make the searchable part of the data (.orc_unwind_ip) more compact. The
+arrays are sorted in parallel at boot time.
+
+Performance is further improved by the use of a fast lookup table which
+is created at runtime. The fast lookup table associates a given address
+with a range of indices for the .orc_unwind table, so that only a small
+subset of the table needs to be searched.
+
+
+Etymology
+=========
+
+Orcs, fearsome creatures of medieval folklore, are the Dwarves' natural
+enemies. Similarly, the ORC unwinder was created in opposition to the
+complexity and slowness of DWARF.
+
+"Although Orcs rarely consider multiple solutions to a problem, they do
+excel at getting things done because they are creatures of action, not
+thought." [3]_ Similarly, unlike the esoteric DWARF unwinder, the
+veracious ORC unwinder wastes no time or siloconic effort decoding
+variable-length zero-extended unsigned-integer byte-coded
+state-machine-based debug information entries.
+
+Similar to how Orcs frequently unravel the well-intentioned plans of
+their adversaries, the ORC unwinder frequently unravels stacks with
+brutal, unyielding efficiency.
+
+ORC stands for Oops Rewind Capability.
+
+
+.. [1] https://lore.kernel.org/r/20170602104048.jkkzssljsompjdwy@suse.de
+.. [2] https://lore.kernel.org/r/d2ca5435-6386-29b8-db87-7f227c2b713a@suse.cz
+.. [3] http://dustin.wikidot.com/half-orcs-and-orcs
diff --git a/Documentation/arch/x86/pat.rst b/Documentation/arch/x86/pat.rst
new file mode 100644
index 000000000000..5d901771016d
--- /dev/null
+++ b/Documentation/arch/x86/pat.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+PAT (Page Attribute Table)
+==========================
+
+x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
+page level granularity. PAT is complementary to the MTRR settings which allows
+for setting of memory types over physical address ranges. However, PAT is
+more flexible than MTRR due to its capability to set attributes at page level
+and also due to the fact that there are no hardware limitations on number of
+such attribute settings allowed. Added flexibility comes with guidelines for
+not having memory type aliasing for the same physical memory with multiple
+virtual addresses.
+
+PAT allows for different types of memory attributes. The most commonly used
+ones that will be supported at this time are:
+
+=== ==============
+WB Write-back
+UC Uncached
+WC Write-combined
+WT Write-through
+UC- Uncached Minus
+=== ==============
+
+
+PAT APIs
+========
+
+There are many different APIs in the kernel that allows setting of memory
+attributes at the page level. In order to avoid aliasing, these interfaces
+should be used thoughtfully. Below is a table of interfaces available,
+their intended usage and their memory attribute relationships. Internally,
+these APIs use a reserve_memtype()/free_memtype() interface on the physical
+address range to avoid any aliasing.
+
++------------------------+----------+--------------+------------------+
+| API | RAM | ACPI,... | Reserved/Holes |
++------------------------+----------+--------------+------------------+
+| ioremap | -- | UC- | UC- |
++------------------------+----------+--------------+------------------+
+| ioremap_cache | -- | WB | WB |
++------------------------+----------+--------------+------------------+
+| ioremap_uc | -- | UC | UC |
++------------------------+----------+--------------+------------------+
+| ioremap_wc | -- | -- | WC |
++------------------------+----------+--------------+------------------+
+| ioremap_wt | -- | -- | WT |
++------------------------+----------+--------------+------------------+
+| set_memory_uc, | UC- | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| set_memory_wc, | WC | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| set_memory_wt, | WT | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| pci sysfs resource | -- | -- | UC- |
++------------------------+----------+--------------+------------------+
+| pci sysfs resource_wc | -- | -- | WC |
+| is IORESOURCE_PREFETCH | | | |
++------------------------+----------+--------------+------------------+
+| pci proc | -- | -- | UC- |
+| !PCIIOC_WRITE_COMBINE | | | |
++------------------------+----------+--------------+------------------+
+| pci proc | -- | -- | WC |
+| PCIIOC_WRITE_COMBINE | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+| read-write | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | UC- | UC- |
+| mmap SYNC flag | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+| mmap !SYNC flag | | | |
+| and | |(from existing| (from existing |
+| any alias to this area | |alias) | alias) |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB | WB |
+| mmap !SYNC flag | | | |
+| no alias to this area | | | |
+| and | | | |
+| MTRR says WB | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | -- | UC- |
+| mmap !SYNC flag | | | |
+| no alias to this area | | | |
+| and | | | |
+| MTRR says !WB | | | |
++------------------------+----------+--------------+------------------+
+
+
+Advanced APIs for drivers
+=========================
+
+A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
+vmf_insert_pfn.
+
+Drivers wanting to export some pages to userspace do it by using mmap
+interface and a combination of:
+
+ 1) pgprot_noncached()
+ 2) io_remap_pfn_range() or remap_pfn_range() or vmf_insert_pfn()
+
+With PAT support, a new API pgprot_writecombine is being added. So, drivers can
+continue to use the above sequence, with either pgprot_noncached() or
+pgprot_writecombine() in step 1, followed by step 2.
+
+In addition, step 2 internally tracks the region as UC or WC in memtype
+list in order to ensure no conflicting mapping.
+
+Note that this set of APIs only works with IO (non RAM) regions. If driver
+wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
+as step 0 above and also track the usage of those pages and use set_memory_wb()
+before the page is freed to free pool.
+
+MTRR effects on PAT / non-PAT systems
+=====================================
+
+The following table provides the effects of using write-combining MTRRs when
+using ioremap*() calls on x86 for both non-PAT and PAT systems. Ideally
+mtrr_add() usage will be phased out in favor of arch_phys_wc_add() which will
+be a no-op on PAT enabled systems. The region over which a arch_phys_wc_add()
+is made, should already have been ioremapped with WC attributes or PAT entries,
+this can be done by using ioremap_wc() / set_memory_wc(). Devices which
+combine areas of IO memory desired to remain uncacheable with areas where
+write-combining is desirable should consider use of ioremap_uc() followed by
+set_memory_wc() to white-list effective write-combined areas. Such use is
+nevertheless discouraged as the effective memory type is considered
+implementation defined, yet this strategy can be used as last resort on devices
+with size-constrained regions where otherwise MTRR write-combining would
+otherwise not be effective.
+::
+
+ ==== ======= === ========================= =====================
+ MTRR Non-PAT PAT Linux ioremap value Effective memory type
+ ==== ======= === ========================= =====================
+ PAT Non-PAT | PAT
+ |PCD |
+ ||PWT |
+ ||| |
+ WC 000 WB _PAGE_CACHE_MODE_WB WC | WC
+ WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC
+ WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | UC
+ WC 011 UC _PAGE_CACHE_MODE_UC UC | UC
+ ==== ======= === ========================= =====================
+
+ (*) denotes implementation defined and is discouraged
+
+.. note:: -- in the above table mean "Not suggested usage for the API". Some
+ of the --'s are strictly enforced by the kernel. Some others are not really
+ enforced today, but may be enforced in future.
+
+For ioremap and pci access through /sys or /proc - The actual type returned
+can be more restrictive, in case of any existing aliasing for that address.
+For example: If there is an existing uncached mapping, a new ioremap_wc can
+return uncached mapping in place of write-combine requested.
+
+set_memory_[uc|wc|wt] and set_memory_wb should be used in pairs, where driver
+will first make a region uc, wc or wt and switch it back to wb after use.
+
+Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
+interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
+
+Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
+types.
+
+Drivers should use set_memory_[uc|wc|wt] to set access type for RAM ranges.
+
+
+PAT debugging
+=============
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by::
+
+ # mount -t debugfs debugfs /sys/kernel/debug
+ # cat /sys/kernel/debug/x86/pat_memtype_list
+ PAT memtype list:
+ uncached-minus @ 0x7fadf000-0x7fae0000
+ uncached-minus @ 0x7fb19000-0x7fb1a000
+ uncached-minus @ 0x7fb1a000-0x7fb1b000
+ uncached-minus @ 0x7fb1b000-0x7fb1c000
+ uncached-minus @ 0x7fb1c000-0x7fb1d000
+ uncached-minus @ 0x7fb1d000-0x7fb1e000
+ uncached-minus @ 0x7fb1e000-0x7fb25000
+ uncached-minus @ 0x7fb25000-0x7fb26000
+ uncached-minus @ 0x7fb26000-0x7fb27000
+ uncached-minus @ 0x7fb27000-0x7fb28000
+ uncached-minus @ 0x7fb28000-0x7fb2e000
+ uncached-minus @ 0x7fb2e000-0x7fb2f000
+ uncached-minus @ 0x7fb2f000-0x7fb30000
+ uncached-minus @ 0x7fb31000-0x7fb32000
+ uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+
+PAT Initialization
+==================
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ ==== ===== ========================== ========= =======
+ MTRR PAT Call Sequence PAT State PAT MSR
+ ==== ===== ========================== ========= =======
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+ ==== ===== ========================== ========= =======
+
+ Legend
+
+ ========= =======================================
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+ ========= =======================================
+
diff --git a/Documentation/arch/x86/pti.rst b/Documentation/arch/x86/pti.rst
new file mode 100644
index 000000000000..57e8392f61d3
--- /dev/null
+++ b/Documentation/arch/x86/pti.rst
@@ -0,0 +1,193 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Page Table Isolation (PTI)
+==========================
+
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER [1]_) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach [2]_.
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y at compile
+time. Once enabled at compile-time, it can be disabled at boot with
+the 'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. Percpu TSS is mapped into the user page tables to allow SYSCALL64 path
+ to work under PTI. This doesn't have a direct runtime cost but it can
+ be argued it opens certain timing attack scenarios.
+ c. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+ ::
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+.. [1] https://gruss.cc/files/kaiser.pdf
+.. [2] https://meltdownattack.com/meltdown.pdf
diff --git a/Documentation/arch/x86/resume.svg b/Documentation/arch/x86/resume.svg
new file mode 100644
index 000000000000..ad8839f762bf
--- /dev/null
+++ b/Documentation/arch/x86/resume.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="582px" height="1152px" viewBox="-0.5 -0.5 582 1152" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;lFF5s3GfZ4Py3fAf5dUH&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;2364&quot; dy=&quot;1473&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-10&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; value=&quot;Wakeup event occurs&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-8&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-56&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; value=&quot;MP1 hands off control to OS&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot; value=&quot;&amp;lt;span style=&amp;quot;background-color: rgb(232, 205, 151);&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;&amp;quot;&amp;gt;OS Moves one core out of ACPI C3&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;&amp;lt;/span&amp;gt;&quot; style=&quot;verticalLabelPosition=middle;verticalAlign=middle;html=1;shape=process;whiteSpace=wrap;rounded=1;size=0.14;arcSize=6;labelPosition=center;align=center;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-11&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; value=&quot;MP0/MP1 boot process&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-27&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; value=&quot;OS checks all wake sources&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-40&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-44&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.28&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-41&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1165&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;153&quot; y=&quot;11&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; value=&quot;ACPI fixed &amp;lt;br&amp;gt;event active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;260&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-43&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot;&gt;&#10; &lt;mxGeometry y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-58&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.0145&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;102&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; value=&quot;IRQ other &amp;lt;br&amp;gt;than ACPI SCI active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;560&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-65&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4694&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-45&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2867&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;5&quot; y=&quot;8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; value=&quot;GPIO&amp;lt;br&amp;gt;IRQ shared&amp;lt;br&amp;gt;with SCI&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-31&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;660&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;710&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;810&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-48&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1714&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-53&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.5259&quot; y=&quot;-5&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;220&quot; y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7472&quot; y=&quot;3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-92&quot; y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; value=&quot;Any PM&amp;lt;br&amp;gt;wakeup event&amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;860&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot; value=&quot;Kernel resumes system&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-26&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; value=&quot;uPEP driver removes OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-40&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.56&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;67&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=-0.008;entryY=0.422;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-50&quot; y=&quot;535&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;20&quot; y=&quot;685&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-61&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.1126&quot; y=&quot;-3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-16&quot; y=&quot;-85&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; value=&quot;Any GPIO&amp;lt;br&amp;gt;w/ WAKESTS&amp;lt;br&amp;gt;active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-90&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot; value=&quot;Check for ACPI Notify() events&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;560&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-51&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4506&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;215&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; value=&quot;Any GPE &amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;710&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-63&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; value=&quot;OS moves active &amp;lt;br&amp;gt;core back to&amp;lt;br&amp;gt;ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot; value=&quot;MP1 puts system back to sleep&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-10"><g><path d="M 101 51 L 154.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 51 L 152.88 54.5 L 154.63 51 L 152.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-1"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Wakeup event occurs</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-8"><g><path d="M 421 51 L 474.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 479.88 51 L 472.88 54.5 L 474.63 51 L 472.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-56"><g><path d="M 371 101 L 371 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 149.88 L 367.5 142.88 L 371 144.63 L 374.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-3"><g><rect x="321" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 hands off control to OS</div></div></div></foreignObject><image x="322" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-4"><g><rect x="481" y="21" width="100" height="60" rx="3.6" ry="3.6" fill="#f2cc8f" stroke="#e07a5f" pointer-events="all"/><path d="M 495 21 L 495 81 M 567 21 L 567 81" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 70px; height: 1px; padding-top: 51px; margin-left: 496px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><span style="background-color: rgb(232, 205, 151);"><span style="color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;">OS Moves one core out of ACPI C3</span><br /></span></div></div></div></foreignObject><image x="496" y="30" width="70" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-11"><g><path d="M 261 51 L 314.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.88 51 L 312.88 54.5 L 314.63 51 L 312.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-9"><g><rect x="161" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP0/MP1 boot process</div></div></div></foreignObject><image x="162" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-27"><g><path d="M 371 251 L 371 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 299.88 L 367.5 292.88 L 371 294.63 L 374.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-13"><g><rect x="321" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS checks all wake sources</div></div></div></foreignObject><image x="322" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-29"><g><path d="M 371 551 L 371 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 599.88 L 367.5 592.88 L 371 594.63 L 374.5 592.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-44"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 562px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="556" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-34"><g><path d="M 321 501 L 51 501 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-41"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-15"><g><path d="M 371 451 L 421 501 L 371 551 L 321 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI fixed <br />event active</div></div></div></foreignObject><image x="322" y="487" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-28"><g><path d="M 371 401 L 371 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 449.88 L 367.5 442.88 L 371 444.63 L 374.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-43"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 412px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="406" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-57"><g><path d="M 321 351 L 107.37 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 102.12 351 L 109.12 347.5 L 107.37 351 L 109.12 354.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-58"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-16"><g><path d="M 371 301 L 421 351 L 371 401 L 321 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">IRQ other <br />than ACPI SCI active</div></div></div></foreignObject><image x="322" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-30"><g><path d="M 371 701 L 371 726 L 371 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 749.88 L 367.5 742.88 L 371 744.63 L 374.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-65"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 715px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="709" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-36"><g><path d="M 321 651 L 257.37 651" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 252.12 651 L 259.12 647.5 L 257.37 651 L 259.12 654.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-45"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 302px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="293" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-17"><g><path d="M 371 601 L 421 651 L 371 701 L 321 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO<br />IRQ shared<br />with SCI</div></div></div></foreignObject><image x="322" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-31"><g><path d="M 371 851 L 371 894.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 899.88 L 367.5 892.88 L 371 894.63 L 374.5 892.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-32"><g><path d="M 371 1001 L 371.5 1026.5 L 371.13 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371.02 1049.88 L 367.67 1042.81 L 371.13 1044.63 L 374.66 1042.96 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-48"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1023px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="1017" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-52"><g><path d="M 321 1101 L 51 1101 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-53"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="1106" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-62"><g><path d="M 421 1101 L 531 1101 L 531 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 402.12 L 534.5 409.12 L 531 407.37 L 527.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 432px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="425.5" y="1106" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-23"><g><path d="M 371 1051 L 421 1101 L 371 1151 L 321 1101 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1101px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any PM<br />wakeup event<br />pending</div></div></div></foreignObject><image x="322" y="1080" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-24"><g><path d="M 31.61 171 L 70.39 171 C 87.29 171 101 184.43 101 201 C 101 217.57 87.29 231 70.39 231 L 31.61 231 C 14.71 231 1 217.57 1 201 C 1 184.43 14.71 171 31.61 171 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Kernel resumes system</div></div></div></foreignObject><image x="2" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-26"><g><path d="M 51 301 L 51 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 232.12 L 54.5 239.12 L 51 237.37 L 47.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-25"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver removes OS_HINT</div></div></div></foreignObject><image x="2" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-37"><g><path d="M 151 651 L 51 651 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-40"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 142px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="133" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-60"><g><path d="M 201 701 L 201 793.2 L 313.83 793.2" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.08 793.2 L 312.08 796.7 L 313.83 793.2 L 312.08 789.7 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-61"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 712px; margin-left: 211px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="204.5" y="706" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-35"><g><path d="M 201 601 L 251 651 L 201 701 L 151 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 152px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPIO<br />w/ WAKESTS<br />active</div></div></div></foreignObject><image x="152" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-38"><g><rect x="321" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Check for ACPI Notify() events</div></div></div></foreignObject><image x="322" y="787" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-49"><g><path d="M 371 1001 L 371 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 1049.88 L 367.5 1042.88 L 371 1044.63 L 374.5 1042.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-50"><g><path d="M 321 951 L 51 951 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-51"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 962px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="956" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-39"><g><path d="M 371 901 L 421 951 L 371 1001 L 321 951 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 951px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPE <br />pending</div></div></div></foreignObject><image x="322" y="937" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-63"><g><path d="M 531 301 L 531 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 232.12 L 534.5 239.12 L 531 237.37 L 527.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-54"><g><rect x="481" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS moves active <br />core back to<br />ACPI C3</div></div></div></foreignObject><image x="482" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-55"><g><path d="M 511.61 171 L 550.39 171 C 567.29 171 581 184.43 581 201 C 581 217.57 567.29 231 550.39 231 L 511.61 231 C 494.71 231 481 217.57 481 201 C 481 184.43 494.71 171 511.61 171 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 puts system back to sleep</div></div></div></foreignObject><image x="482" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/sgx.rst b/Documentation/arch/x86/sgx.rst
new file mode 100644
index 000000000000..d90796adc2ec
--- /dev/null
+++ b/Documentation/arch/x86/sgx.rst
@@ -0,0 +1,302 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Software Guard eXtensions (SGX)
+===============================
+
+Overview
+========
+
+Software Guard eXtensions (SGX) hardware enables for user space applications
+to set aside private memory regions of code and data:
+
+* Privileged (ring-0) ENCLS functions orchestrate the construction of the
+ regions.
+* Unprivileged (ring-3) ENCLU functions allow an application to enter and
+ execute inside the regions.
+
+These memory regions are called enclaves. An enclave can be only entered at a
+fixed set of entry points. Each entry point can hold a single hardware thread
+at a time. While the enclave is loaded from a regular binary file by using
+ENCLS functions, only the threads inside the enclave can access its memory. The
+region is denied from outside access by the CPU, and encrypted before it leaves
+from LLC.
+
+The support can be determined by
+
+ ``grep sgx /proc/cpuinfo``
+
+SGX must both be supported in the processor and enabled by the BIOS. If SGX
+appears to be unsupported on a system which has hardware support, ensure
+support is enabled in the BIOS. If a BIOS presents a choice between "Enabled"
+and "Software Enabled" modes for SGX, choose "Enabled".
+
+Enclave Page Cache
+==================
+
+SGX utilizes an *Enclave Page Cache (EPC)* to store pages that are associated
+with an enclave. It is contained in a BIOS-reserved region of physical memory.
+Unlike pages used for regular memory, pages can only be accessed from outside of
+the enclave during enclave construction with special, limited SGX instructions.
+
+Only a CPU executing inside an enclave can directly access enclave memory.
+However, a CPU executing inside an enclave may access normal memory outside the
+enclave.
+
+The kernel manages enclave memory similar to how it treats device memory.
+
+Enclave Page Types
+------------------
+
+**SGX Enclave Control Structure (SECS)**
+ Enclave's address range, attributes and other global data are defined
+ by this structure.
+
+**Regular (REG)**
+ Regular EPC pages contain the code and data of an enclave.
+
+**Thread Control Structure (TCS)**
+ Thread Control Structure pages define the entry points to an enclave and
+ track the execution state of an enclave thread.
+
+**Version Array (VA)**
+ Version Array pages contain 512 slots, each of which can contain a version
+ number for a page evicted from the EPC.
+
+Enclave Page Cache Map
+----------------------
+
+The processor tracks EPC pages in a hardware metadata structure called the
+*Enclave Page Cache Map (EPCM)*. The EPCM contains an entry for each EPC page
+which describes the owning enclave, access rights and page type among the other
+things.
+
+EPCM permissions are separate from the normal page tables. This prevents the
+kernel from, for instance, allowing writes to data which an enclave wishes to
+remain read-only. EPCM permissions may only impose additional restrictions on
+top of normal x86 page permissions.
+
+For all intents and purposes, the SGX architecture allows the processor to
+invalidate all EPCM entries at will. This requires that software be prepared to
+handle an EPCM fault at any time. In practice, this can happen on events like
+power transitions when the ephemeral key that encrypts enclave memory is lost.
+
+Application interface
+=====================
+
+Enclave build functions
+-----------------------
+
+In addition to the traditional compiler and linker build process, SGX has a
+separate enclave “build” process. Enclaves must be built before they can be
+executed (entered). The first step in building an enclave is opening the
+**/dev/sgx_enclave** device. Since enclave memory is protected from direct
+access, special privileged instructions are then used to copy data into enclave
+pages and establish enclave page permissions.
+
+.. kernel-doc:: arch/x86/kernel/cpu/sgx/ioctl.c
+ :functions: sgx_ioc_enclave_create
+ sgx_ioc_enclave_add_pages
+ sgx_ioc_enclave_init
+ sgx_ioc_enclave_provision
+
+Enclave runtime management
+--------------------------
+
+Systems supporting SGX2 additionally support changes to initialized
+enclaves: modifying enclave page permissions and type, and dynamically
+adding and removing of enclave pages. When an enclave accesses an address
+within its address range that does not have a backing page then a new
+regular page will be dynamically added to the enclave. The enclave is
+still required to run EACCEPT on the new page before it can be used.
+
+.. kernel-doc:: arch/x86/kernel/cpu/sgx/ioctl.c
+ :functions: sgx_ioc_enclave_restrict_permissions
+ sgx_ioc_enclave_modify_types
+ sgx_ioc_enclave_remove_pages
+
+Enclave vDSO
+------------
+
+Entering an enclave can only be done through SGX-specific EENTER and ERESUME
+functions, and is a non-trivial process. Because of the complexity of
+transitioning to and from an enclave, enclaves typically utilize a library to
+handle the actual transitions. This is roughly analogous to how glibc
+implementations are used by most applications to wrap system calls.
+
+Another crucial characteristic of enclaves is that they can generate exceptions
+as part of their normal operation that need to be handled in the enclave or are
+unique to SGX.
+
+Instead of the traditional signal mechanism to handle these exceptions, SGX
+can leverage special exception fixup provided by the vDSO. The kernel-provided
+vDSO function wraps low-level transitions to/from the enclave like EENTER and
+ERESUME. The vDSO function intercepts exceptions that would otherwise generate
+a signal and return the fault information directly to its caller. This avoids
+the need to juggle signal handlers.
+
+.. kernel-doc:: arch/x86/include/uapi/asm/sgx.h
+ :functions: vdso_sgx_enter_enclave_t
+
+ksgxd
+=====
+
+SGX support includes a kernel thread called *ksgxd*.
+
+EPC sanitization
+----------------
+
+ksgxd is started when SGX initializes. Enclave memory is typically ready
+for use when the processor powers on or resets. However, if SGX has been in
+use since the reset, enclave pages may be in an inconsistent state. This might
+occur after a crash and kexec() cycle, for instance. At boot, ksgxd
+reinitializes all enclave pages so that they can be allocated and re-used.
+
+The sanitization is done by going through EPC address space and applying the
+EREMOVE function to each physical page. Some enclave pages like SECS pages have
+hardware dependencies on other pages which prevents EREMOVE from functioning.
+Executing two EREMOVE passes removes the dependencies.
+
+Page reclaimer
+--------------
+
+Similar to the core kswapd, ksgxd, is responsible for managing the
+overcommitment of enclave memory. If the system runs out of enclave memory,
+*ksgxd* “swaps” enclave memory to normal memory.
+
+Launch Control
+==============
+
+SGX provides a launch control mechanism. After all enclave pages have been
+copied, kernel executes EINIT function, which initializes the enclave. Only after
+this the CPU can execute inside the enclave.
+
+EINIT function takes an RSA-3072 signature of the enclave measurement. The function
+checks that the measurement is correct and signature is signed with the key
+hashed to the four **IA32_SGXLEPUBKEYHASH{0, 1, 2, 3}** MSRs representing the
+SHA256 of a public key.
+
+Those MSRs can be configured by the BIOS to be either readable or writable.
+Linux supports only writable configuration in order to give full control to the
+kernel on launch control policy. Before calling EINIT function, the driver sets
+the MSRs to match the enclave's signing key.
+
+Encryption engines
+==================
+
+In order to conceal the enclave data while it is out of the CPU package, the
+memory controller has an encryption engine to transparently encrypt and decrypt
+enclave memory.
+
+In CPUs prior to Ice Lake, the Memory Encryption Engine (MEE) is used to
+encrypt pages leaving the CPU caches. MEE uses a n-ary Merkle tree with root in
+SRAM to maintain integrity of the encrypted data. This provides integrity and
+anti-replay protection but does not scale to large memory sizes because the time
+required to update the Merkle tree grows logarithmically in relation to the
+memory size.
+
+CPUs starting from Icelake use Total Memory Encryption (TME) in the place of
+MEE. TME-based SGX implementations do not have an integrity Merkle tree, which
+means integrity and replay-attacks are not mitigated. B, it includes
+additional changes to prevent cipher text from being returned and SW memory
+aliases from being created.
+
+DMA to enclave memory is blocked by range registers on both MEE and TME systems
+(SDM section 41.10).
+
+Usage Models
+============
+
+Shared Library
+--------------
+
+Sensitive data and the code that acts on it is partitioned from the application
+into a separate library. The library is then linked as a DSO which can be loaded
+into an enclave. The application can then make individual function calls into
+the enclave through special SGX instructions. A run-time within the enclave is
+configured to marshal function parameters into and out of the enclave and to
+call the correct library function.
+
+Application Container
+---------------------
+
+An application may be loaded into a container enclave which is specially
+configured with a library OS and run-time which permits the application to run.
+The enclave run-time and library OS work together to execute the application
+when a thread enters the enclave.
+
+Impact of Potential Kernel SGX Bugs
+===================================
+
+EPC leaks
+---------
+
+When EPC page leaks happen, a WARNING like this is shown in dmesg:
+
+"EREMOVE returned ... and an EPC page was leaked. SGX may become unusable..."
+
+This is effectively a kernel use-after-free of an EPC page, and due
+to the way SGX works, the bug is detected at freeing. Rather than
+adding the page back to the pool of available EPC pages, the kernel
+intentionally leaks the page to avoid additional errors in the future.
+
+When this happens, the kernel will likely soon leak more EPC pages, and
+SGX will likely become unusable because the memory available to SGX is
+limited. However, while this may be fatal to SGX, the rest of the kernel
+is unlikely to be impacted and should continue to work.
+
+As a result, when this happens, user should stop running any new
+SGX workloads, (or just any new workloads), and migrate all valuable
+workloads. Although a machine reboot can recover all EPC memory, the bug
+should be reported to Linux developers.
+
+
+Virtual EPC
+===========
+
+The implementation has also a virtual EPC driver to support SGX enclaves
+in guests. Unlike the SGX driver, an EPC page allocated by the virtual
+EPC driver doesn't have a specific enclave associated with it. This is
+because KVM doesn't track how a guest uses EPC pages.
+
+As a result, the SGX core page reclaimer doesn't support reclaiming EPC
+pages allocated to KVM guests through the virtual EPC driver. If the
+user wants to deploy SGX applications both on the host and in guests
+on the same machine, the user should reserve enough EPC (by taking out
+total virtual EPC size of all SGX VMs from the physical EPC size) for
+host SGX applications so they can run with acceptable performance.
+
+Architectural behavior is to restore all EPC pages to an uninitialized
+state also after a guest reboot. Because this state can be reached only
+through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc``
+provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction
+on all pages in the virtual EPC.
+
+``EREMOVE`` can fail for three reasons. Userspace must pay attention
+to expected failures and handle them as follows:
+
+1. Page removal will always fail when any thread is running in the
+ enclave to which the page belongs. In this case the ioctl will
+ return ``EBUSY`` independent of whether it has successfully removed
+ some pages; userspace can avoid these failures by preventing execution
+ of any vcpu which maps the virtual EPC.
+
+2. Page removal will cause a general protection fault if two calls to
+ ``EREMOVE`` happen concurrently for pages that refer to the same
+ "SECS" metadata pages. This can happen if there are concurrent
+ invocations to ``SGX_IOC_VEPC_REMOVE_ALL``, or if a ``/dev/sgx_vepc``
+ file descriptor in the guest is closed at the same time as
+ ``SGX_IOC_VEPC_REMOVE_ALL``; it will also be reported as ``EBUSY``.
+ This can be avoided in userspace by serializing calls to the ioctl()
+ and to close(), but in general it should not be a problem.
+
+3. Finally, page removal will fail for SECS metadata pages which still
+ have child pages. Child pages can be removed by executing
+ ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors
+ mapped into the guest. This means that the ioctl() must be called
+ twice: an initial set of calls to remove child pages and a subsequent
+ set of calls to remove SECS pages. The second set of calls is only
+ required for those mappings that returned a nonzero value from the
+ first call. It indicates a bug in the kernel or the userspace client
+ if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has
+ a return code other than 0.
diff --git a/Documentation/arch/x86/shstk.rst b/Documentation/arch/x86/shstk.rst
new file mode 100644
index 000000000000..60260e809baf
--- /dev/null
+++ b/Documentation/arch/x86/shstk.rst
@@ -0,0 +1,179 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Control-flow Enforcement Technology (CET) Shadow Stack
+======================================================
+
+CET Background
+==============
+
+Control-flow Enforcement Technology (CET) covers several related x86 processor
+features that provide protection against control flow hijacking attacks. CET
+can protect both applications and the kernel.
+
+CET introduces shadow stack and indirect branch tracking (IBT). A shadow stack
+is a secondary stack allocated from memory which cannot be directly modified by
+applications. When executing a CALL instruction, the processor pushes the
+return address to both the normal stack and the shadow stack. Upon
+function return, the processor pops the shadow stack copy and compares it
+to the normal stack copy. If the two differ, the processor raises a
+control-protection fault. IBT verifies indirect CALL/JMP targets are intended
+as marked by the compiler with 'ENDBR' opcodes. Not all CPU's have both Shadow
+Stack and Indirect Branch Tracking. Today in the 64-bit kernel, only userspace
+shadow stack and kernel IBT are supported.
+
+Requirements to use Shadow Stack
+================================
+
+To use userspace shadow stack you need HW that supports it, a kernel
+configured with it and userspace libraries compiled with it.
+
+The kernel Kconfig option is X86_USER_SHADOW_STACK. When compiled in, shadow
+stacks can be disabled at runtime with the kernel parameter: nousershstk.
+
+To build a user shadow stack enabled kernel, Binutils v2.29 or LLVM v6 or later
+are required.
+
+At run time, /proc/cpuinfo shows CET features if the processor supports
+CET. "user_shstk" means that userspace shadow stack is supported on the current
+kernel and HW.
+
+Application Enabling
+====================
+
+An application's CET capability is marked in its ELF note and can be verified
+from readelf/llvm-readelf output::
+
+ readelf -n <application> | grep -a SHSTK
+ properties: x86 feature: SHSTK
+
+The kernel does not process these applications markers directly. Applications
+or loaders must enable CET features using the interface described in section 4.
+Typically this would be done in dynamic loader or static runtime objects, as is
+the case in GLIBC.
+
+Enabling arch_prctl()'s
+=======================
+
+Elf features should be enabled by the loader using the below arch_prctl's. They
+are only supported in 64 bit user applications. These operate on the features
+on a per-thread basis. The enablement status is inherited on clone, so if the
+feature is enabled on the first thread, it will propagate to all the thread's
+in an app.
+
+arch_prctl(ARCH_SHSTK_ENABLE, unsigned long feature)
+ Enable a single feature specified in 'feature'. Can only operate on
+ one feature at a time.
+
+arch_prctl(ARCH_SHSTK_DISABLE, unsigned long feature)
+ Disable a single feature specified in 'feature'. Can only operate on
+ one feature at a time.
+
+arch_prctl(ARCH_SHSTK_LOCK, unsigned long features)
+ Lock in features at their current enabled or disabled status. 'features'
+ is a mask of all features to lock. All bits set are processed, unset bits
+ are ignored. The mask is ORed with the existing value. So any feature bits
+ set here cannot be enabled or disabled afterwards.
+
+arch_prctl(ARCH_SHSTK_UNLOCK, unsigned long features)
+ Unlock features. 'features' is a mask of all features to unlock. All
+ bits set are processed, unset bits are ignored. Only works via ptrace.
+
+arch_prctl(ARCH_SHSTK_STATUS, unsigned long addr)
+ Copy the currently enabled features to the address passed in addr. The
+ features are described using the bits passed into the others in
+ 'features'.
+
+The return values are as follows. On success, return 0. On error, errno can
+be::
+
+ -EPERM if any of the passed feature are locked.
+ -ENOTSUPP if the feature is not supported by the hardware or
+ kernel.
+ -EINVAL arguments (non existing feature, etc)
+ -EFAULT if could not copy information back to userspace
+
+The feature's bits supported are::
+
+ ARCH_SHSTK_SHSTK - Shadow stack
+ ARCH_SHSTK_WRSS - WRSS
+
+Currently shadow stack and WRSS are supported via this interface. WRSS
+can only be enabled with shadow stack, and is automatically disabled
+if shadow stack is disabled.
+
+Proc Status
+===========
+To check if an application is actually running with shadow stack, the
+user can read the /proc/$PID/status. It will report "wrss" or "shstk"
+depending on what is enabled. The lines look like this::
+
+ x86_Thread_features: shstk wrss
+ x86_Thread_features_locked: shstk wrss
+
+Implementation of the Shadow Stack
+==================================
+
+Shadow Stack Size
+-----------------
+
+A task's shadow stack is allocated from memory to a fixed size of
+MIN(RLIMIT_STACK, 4 GB). In other words, the shadow stack is allocated to
+the maximum size of the normal stack, but capped to 4 GB. In the case
+of the clone3 syscall, there is a stack size passed in and shadow stack
+uses this instead of the rlimit.
+
+Signal
+------
+
+The main program and its signal handlers use the same shadow stack. Because
+the shadow stack stores only return addresses, a large shadow stack covers
+the condition that both the program stack and the signal alternate stack run
+out.
+
+When a signal happens, the old pre-signal state is pushed on the stack. When
+shadow stack is enabled, the shadow stack specific state is pushed onto the
+shadow stack. Today this is only the old SSP (shadow stack pointer), pushed
+in a special format with bit 63 set. On sigreturn this old SSP token is
+verified and restored by the kernel. The kernel will also push the normal
+restorer address to the shadow stack to help userspace avoid a shadow stack
+violation on the sigreturn path that goes through the restorer.
+
+So the shadow stack signal frame format is as follows::
+
+ |1...old SSP| - Pointer to old pre-signal ssp in sigframe token format
+ (bit 63 set to 1)
+ | ...| - Other state may be added in the future
+
+
+32 bit ABI signals are not supported in shadow stack processes. Linux prevents
+32 bit execution while shadow stack is enabled by the allocating shadow stacks
+outside of the 32 bit address space. When execution enters 32 bit mode, either
+via far call or returning to userspace, a #GP is generated by the hardware
+which, will be delivered to the process as a segfault. When transitioning to
+userspace the register's state will be as if the userspace ip being returned to
+caused the segfault.
+
+Fork
+----
+
+The shadow stack's vma has VM_SHADOW_STACK flag set; its PTEs are required
+to be read-only and dirty. When a shadow stack PTE is not RO and dirty, a
+shadow access triggers a page fault with the shadow stack access bit set
+in the page fault error code.
+
+When a task forks a child, its shadow stack PTEs are copied and both the
+parent's and the child's shadow stack PTEs are cleared of the dirty bit.
+Upon the next shadow stack access, the resulting shadow stack page fault
+is handled by page copy/re-use.
+
+When a pthread child is created, the kernel allocates a new shadow stack
+for the new thread. New shadow stack creation behaves like mmap() with respect
+to ASLR behavior. Similarly, on thread exit the thread's shadow stack is
+disabled.
+
+Exec
+----
+
+On exec, shadow stack features are disabled by the kernel. At which point,
+userspace can choose to re-enable, or lock them.
diff --git a/Documentation/arch/x86/suspend.svg b/Documentation/arch/x86/suspend.svg
new file mode 100644
index 000000000000..a69073c018d5
--- /dev/null
+++ b/Documentation/arch/x86/suspend.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="407px" height="1132px" viewBox="-0.5 -0.5 407 1132" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;46NsKM0iVOHTgNer6hpB&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;1964&quot; dy=&quot;1073&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-21&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-4&quot; target=&quot;8N6JJebqrzA787TgpwUj-12&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-4&quot; value=&quot;SFH driver notifies MP2 to stop all sensor collection&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-6&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-51&quot; target=&quot;8N6JJebqrzA787TgpwUj-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;400&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;450&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-37&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-6&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2133&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-22&quot; y=&quot;16&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-9&quot; value=&quot;Abort suspend; details logged in dmesg&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;425&quot; y=&quot;140&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-12&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-18&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;380&quot; y=&quot;320&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-19&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-18&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.3265&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-27&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-24&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-28&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;340&quot; y=&quot;570&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;180&quot; y=&quot;620&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-38&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-24&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0038&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-26&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;410&quot; y=&quot;530&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;555&quot; y=&quot;230&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;Array as=&quot;points&quot;&gt;&#10; &lt;mxPoint x=&quot;475&quot; y=&quot;470&quot; /&gt;&#10; &lt;/Array&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-35&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-26&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7458&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-1&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-28&quot; target=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-28&quot; value=&quot;All devices go into deepest D-state or F-state&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-29&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-31&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;760&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;720&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-31&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0683&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-36&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.8315&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;2&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-46&quot; value=&quot;GPIO driver suspends non-wake GPIOs&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-47&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-48&quot; target=&quot;8N6JJebqrzA787TgpwUj-50&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-48&quot; value=&quot;Suspend initiated from userspace&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;120&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-50&quot; target=&quot;8N6JJebqrzA787TgpwUj-51&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-50&quot; value=&quot;GPU driver shuts down clocks and sends SMU messages&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-51&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-53&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-54&quot; target=&quot;8N6JJebqrzA787TgpwUj-56&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-54&quot; value=&quot;ACPI s2idle driver notifies EC using _DSM&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;870&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-55&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-56&quot; target=&quot;8N6JJebqrzA787TgpwUj-58&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-56&quot; value=&quot;uPEP driver (amd-pmc) sends OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1010&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-57&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-58&quot; target=&quot;8N6JJebqrzA787TgpwUj-59&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-58&quot; value=&quot;Put all x86 CPU cores into ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1150&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-59&quot; value=&quot;s2idle loop waiting for IRQ &amp;lt;br&amp;gt;to wake&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;1170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-65&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-66&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-65&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.144&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-4&quot; y=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-60&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-61&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-62&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;440&quot; y=&quot;620&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-14&quot; y=&quot;160&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="8N6JJebqrzA787TgpwUj-21"><g><path d="M 101 351 L 154.63 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 351 L 152.88 354.5 L 154.63 351 L 152.88 347.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-4"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">SFH driver notifies MP2 to stop all sensor collection</div></div></div></foreignObject><image x="2" y="322.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-6"><g><path d="M 211 251 L 211 276 L 51 276 L 51 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 299.88 L 47.5 292.88 L 51 294.63 L 54.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-37"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 292px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="286" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-9"><g><path d="M 336.61 21 L 375.39 21 C 392.29 21 406 34.43 406 51 C 406 67.57 392.29 81 375.39 81 L 336.61 81 C 319.71 81 306 67.57 306 51 C 306 34.43 319.71 21 336.61 21 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Abort suspend; details logged in dmesg</div></div></div></foreignObject><image x="307" y="30" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-12"><g><path d="M 211 301 L 261 351 L 211 401 L 161 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="344.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-18"><g><path d="M 261 201 L 356 201.5 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-19"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 212px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="206" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-24"><g><path d="M 211 401 L 211 426 L 51 426 L 51 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 449.88 L 47.5 442.88 L 51 444.63 L 54.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-38"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 442px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="436" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-26"><g><path d="M 261 351 L 356 351 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-35"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-30"><g><path d="M 101 501 L 154.63 501" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 501 L 152.88 504.5 L 154.63 501 L 152.88 497.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-28"><g><rect x="1" y="451" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">All devices go into deepest D-state or F-state</div></div></div></foreignObject><image x="2" y="480" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-29"><g><path d="M 211 451 L 261 501 L 211 551 L 161 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="494.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-31"><g><path d="M 211 551 L 211 576 L 51.5 576 L 51.13 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51.02 599.88 L 47.66 592.81 L 51.13 594.63 L 54.66 592.95 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 592px; margin-left: 139px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="132.5" y="586" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-34"><g><path d="M 261 501 L 356 501 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-36"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-46"><g><rect x="1" y="601" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO driver suspends non-wake GPIOs</div></div></div></foreignObject><image x="2" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-47"><g><path d="M 51 101 L 51 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 149.88 L 47.5 142.88 L 51 144.63 L 54.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-48"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Suspend initiated from userspace</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-49"><g><path d="M 101 201 L 154.63 201" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 201 L 152.88 204.5 L 154.63 201 L 152.88 197.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-50"><g><rect x="1" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPU driver shuts down clocks and sends SMU messages</div></div></div></foreignObject><image x="2" y="172.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-51"><g><path d="M 211 151 L 261 201 L 211 251 L 161 201 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="194.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-53"><g><path d="M 51 851 L 51 884.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 889.88 L 47.5 882.88 L 51 884.63 L 54.5 882.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-54"><g><rect x="1" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI s2idle driver notifies EC using _DSM</div></div></div></foreignObject><image x="2" y="780" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-55"><g><path d="M 51 991 L 51 1024.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 1029.88 L 47.5 1022.88 L 51 1024.63 L 54.5 1022.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-56"><g><rect x="1" y="891" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 941px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver (amd-pmc) sends OS_HINT</div></div></div></foreignObject><image x="2" y="920" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-57"><g><path d="M 101 1081 L 154.63 1081" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 1081 L 152.88 1084.5 L 154.63 1081 L 152.88 1077.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-58"><g><rect x="1" y="1031" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Put all x86 CPU cores into ACPI C3</div></div></div></foreignObject><image x="2" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-59"><g><path d="M 191.61 1051 L 230.39 1051 C 247.29 1051 261 1064.43 261 1081 C 261 1097.57 247.29 1111 230.39 1111 L 191.61 1111 C 174.71 1111 161 1097.57 161 1081 C 161 1064.43 174.71 1051 191.61 1051 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">s2idle loop waiting for IRQ <br />to wake</div></div></div></foreignObject><image x="162" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-65"><g><path d="M 211 701 L 211 726 L 51 726 L 51 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 749.88 L 47.5 742.88 L 51 744.63 L 54.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-66"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 737px; margin-left: 143px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="136.5" y="731" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-60"><g><path d="M 211 601 L 261 651 L 211 701 L 161 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="644.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-61"><g><path d="M 261 651 L 356 651 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-62"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 308px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="299" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst
new file mode 100644
index 000000000000..6a759984d471
--- /dev/null
+++ b/Documentation/arch/x86/sva.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Shared Virtual Addressing (SVA) with ENQCMD
+===========================================
+
+Background
+==========
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM).
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to the PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU is also
+required to support the PCIe features ATS and PRI. ATS allows devices
+to cache translations for virtual addresses. The IOMMU driver uses the
+mmu_notifier() support to keep the device TLB cache and the CPU cache in
+sync. When an ATS lookup fails for a virtual address, the device should
+use the PRI in order to request the virtual address to be paged into the
+CPU page tables. The device must use ATS again in order to fetch the
+translation before use.
+
+Shared Hardware Workqueues
+==========================
+
+Unlike Single Root I/O Virtualization (SR-IOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20-bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+======
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement fairness or ensure forward progress should be provided.
+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permits hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=============================
+
+A new thread-scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA-capable device, this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU-specific API
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+iommu_sva_bind_device(), which will do the following:
+
+- Allocate the PASID, and program the process page-table (%cr3 register) in the
+ PASID context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+ the device TLB in sync. For example, when a page-table entry is invalidated,
+ the IOMMU propagates the invalidation to the device TLB. This will force any
+ future access by the device to this virtual address to participate in
+ ATS. If the IOMMU responds with proper response that a page is not
+ present, the device would request the page to be paged in via the PCIe PRI
+ protocol before performing I/O.
+
+This MSR is managed with the XSAVE feature set as "supervisor state" to
+ensure the MSR is updated during context switch.
+
+PASID Management
+================
+
+The kernel must allocate a PASID on behalf of each process which will use
+ENQCMD and program it into the new MSR to communicate the process identity to
+platform hardware. ENQCMD uses the PASID stored in this MSR to tag requests
+from this process. When a user submits a work descriptor to a device using the
+ENQCMD instruction, the PASID field in the descriptor is auto-filled with the
+value from MSR_IA32_PASID. Requests for DMA from the device are also tagged
+with the same PASID. The platform IOMMU uses the PASID in the transaction to
+perform address translation. The IOMMU APIs setup the corresponding PASID
+entry in IOMMU with the process address used by the CPU (e.g. %cr3 register in
+x86).
+
+The MSR must be configured on each logical CPU before any application
+thread can interact with a device. Threads that belong to the same
+process share the same page tables, thus the same MSR value.
+
+PASID Life Cycle Management
+===========================
+
+PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created.
+
+Only processes that access SVA-capable devices need to have a PASID
+allocated. This allocation happens when a process opens/binds an SVA-capable
+device but finds no PASID for this process. Subsequent binds of the same, or
+other devices will share the same PASID.
+
+Although the PASID is allocated to the process by opening a device,
+it is not active in any of the threads of that process. It's loaded to the
+IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+to a device using the ENQCMD.
+
+That first access will trigger a #GP fault because the IA32_PASID MSR
+has not been initialized with the PASID value assigned to the process
+when the device was opened. The Linux #GP handler notes that a PASID has
+been allocated for the process, and so initializes the IA32_PASID MSR
+and returns so that the ENQCMD instruction is re-executed.
+
+On fork(2) or exec(2) the PASID is removed from the process as it no
+longer has the same address space that it had when the device was opened.
+
+On clone(2) the new task shares the same address space, so will be
+able to use the PASID allocated to the process. The IA32_PASID is not
+preemptively initialized as the PASID value might not be allocated yet or
+the kernel does not know whether this thread is going to access the device
+and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+init optimization. Since #GP faults have to be handled on any threads that
+were created before the PASID was assigned to the mm of the process, newly
+created threads might as well be treated in a consistent way.
+
+Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+all threads in unbind, free the PASID lazily only on mm exit.
+
+If a process does a close(2) of the device file descriptor and munmap(2)
+of the device MMIO portal, then the driver will unbind the device. The
+PASID is still marked VALID in the PASID_MSR for any threads in the
+process that accessed the device. But this is harmless as without the
+MMIO portal they cannot submit new work to the device.
+
+Relationships
+=============
+
+ * Each process has many threads, but only one PASID.
+ * Devices have a limited number (~10's to 1000's) of hardware workqueues.
+ The device driver manages allocating hardware workqueues.
+ * A single mmap() maps a single hardware workqueue as a "portal" and
+ each portal maps down to a single workqueue.
+ * For each device with which a process interacts, there must be
+ one or more mmap()'d portals.
+ * Many threads within a process can share a single portal to access
+ a single device.
+ * Multiple processes can separately mmap() the same portal, in
+ which case they still share one device hardware workqueue.
+ * The single process-wide PASID is used by all threads to interact
+ with all devices. There is not, for instance, a PASID for each
+ thread or each thread<->device pair.
+
+FAQ
+===
+
+* What is SVA/SVM?
+
+Shared Virtual Addressing (SVA) permits I/O hardware and the processor to
+work in the same address space, i.e., to share it. Some call it Shared
+Virtual Memory (SVM), but Linux community wanted to avoid confusing it with
+POSIX Shared Memory and Secure Virtual Machines which were terms already in
+circulation.
+
+* What is a PASID?
+
+A Process Address Space ID (PASID) is a PCIe-defined Transaction Layer Packet
+(TLP) prefix. A PASID is a 20-bit number allocated and managed by the OS.
+PASID is included in all transactions between the platform and the device.
+
+* How are shared workqueues different?
+
+Traditionally, in order for userspace applications to interact with hardware,
+there is a separate hardware instance required per process. For example,
+consider doorbells as a mechanism of informing hardware about work to process.
+Each doorbell is required to be spaced 4k (or page-size) apart for process
+isolation. This requires hardware to provision that space and reserve it in
+MMIO. This doesn't scale as the number of threads becomes quite large. The
+hardware also manages the queue depth for Shared Work Queues (SWQ), and
+consumers don't need to track queue depth. If there is no space to accept
+a command, the device will return an error indicating retry.
+
+A user should check Deferrable Memory Write (DMWr) capability on the device
+and only submits ENQCMD when the device supports it. In the new DMWr PCIe
+terminology, devices need to support DMWr completer capability. In addition,
+it requires all switch ports to support DMWr routing and must be enabled by
+the PCIe subsystem, much like how PCIe atomic operations are managed for
+instance.
+
+SWQ allows hardware to provision just a single address in the device. When
+used with ENQCMD to submit work, the device can distinguish the process
+submitting the work since it will include the PASID assigned to that
+process. This helps the device scale to a large number of processes.
+
+* Is this the same as a user space device driver?
+
+Communicating with the device via the shared workqueue is much simpler
+than a full blown user space driver. The kernel driver does all the
+initialization of the hardware. User space only needs to worry about
+submitting work and processing completions.
+
+* Is this the same as SR-IOV?
+
+Single Root I/O Virtualization (SR-IOV) focuses on providing independent
+hardware interfaces for virtualizing hardware. Hence, it's required to be
+an almost fully functional interface to software supporting the traditional
+BARs, space for interrupts via MSI-X, its own register layout.
+Virtual Functions (VFs) are assisted by the Physical Function (PF)
+driver.
+
+Scalable I/O Virtualization builds on the PASID concept to create device
+instances for virtualization. SIOV requires host software to assist in
+creating virtual devices; each virtual device is represented by a PASID
+along with the bus/device/function of the device. This allows device
+hardware to optimize device resource creation and can grow dynamically on
+demand. SR-IOV creation and management is very static in nature. Consult
+references below for more details.
+
+* Why not just create a virtual function for each app?
+
+Creating PCIe SR-IOV type Virtual Functions (VF) is expensive. VFs require
+duplicated hardware for PCI config space and interrupts such as MSI-X.
+Resources such as interrupts have to be hard partitioned between VFs at
+creation time, and cannot scale dynamically on demand. The VFs are not
+completely independent from the Physical Function (PF). Most VFs require
+some communication and assistance from the PF driver. SIOV, in contrast,
+creates a software-defined device where all the configuration and control
+aspects are mediated via the slow path. The work submission and completion
+happen without any mediation.
+
+* Does this support virtualization?
+
+ENQCMD can be used from within a guest VM. In these cases, the VMM helps
+with setting up a translation table to translate from Guest PASID to Host
+PASID. Please consult the ENQCMD instruction set reference for more
+details.
+
+* Does memory need to be pinned?
+
+When devices support SVA along with platform hardware such as IOMMU
+supporting such devices, there is no need to pin memory for DMA purposes.
+Devices that support SVA also support other PCIe features that remove the
+pinning requirement for memory.
+
+Device TLB support - Device requests the IOMMU to lookup an address before
+use via Address Translation Service (ATS) requests. If the mapping exists
+but there is no page allocated by the OS, IOMMU hardware returns that no
+mapping exists.
+
+Device requests the virtual address to be mapped via Page Request
+Interface (PRI). Once the OS has successfully completed the mapping, it
+returns the response back to the device. The device requests again for
+a translation and continues.
+
+IOMMU works with the OS in managing consistency of page-tables with the
+device. When removing pages, it interacts with the device to remove any
+device TLB entry that might have been cached before removing the mappings from
+the OS.
+
+References
+==========
+
+VT-D:
+https://01.org/blogs/ashokraj/2018/recent-enhancements-intel-virtualization-technology-directed-i/o-intel-vt-d
+
+SIOV:
+https://01.org/blogs/2019/assignable-interfaces-intel-scalable-i/o-virtualization-linux
+
+ENQCMD in ISE:
+https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
+
+DSA spec:
+https://software.intel.com/sites/default/files/341204-intel-data-streaming-accelerator-spec.pdf
diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst
new file mode 100644
index 000000000000..61670e7df2f7
--- /dev/null
+++ b/Documentation/arch/x86/tdx.rst
@@ -0,0 +1,446 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Intel Trust Domain Extensions (TDX)
+=====================================
+
+Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
+the host and physical attacks by isolating the guest register state and by
+encrypting the guest memory. In TDX, a special module running in a special
+mode sits between the host and the guest and manages the guest/host
+separation.
+
+TDX Host Kernel Support
+=======================
+
+TDX introduces a new CPU mode called Secure Arbitration Mode (SEAM) and
+a new isolated range pointed by the SEAM Ranger Register (SEAMRR). A
+CPU-attested software module called 'the TDX module' runs inside the new
+isolated range to provide the functionalities to manage and run protected
+VMs.
+
+TDX also leverages Intel Multi-Key Total Memory Encryption (MKTME) to
+provide crypto-protection to the VMs. TDX reserves part of MKTME KeyIDs
+as TDX private KeyIDs, which are only accessible within the SEAM mode.
+BIOS is responsible for partitioning legacy MKTME KeyIDs and TDX KeyIDs.
+
+Before the TDX module can be used to create and run protected VMs, it
+must be loaded into the isolated range and properly initialized. The TDX
+architecture doesn't require the BIOS to load the TDX module, but the
+kernel assumes it is loaded by the BIOS.
+
+TDX boot-time detection
+-----------------------
+
+The kernel detects TDX by detecting TDX private KeyIDs during kernel
+boot. Below dmesg shows when TDX is enabled by BIOS::
+
+ [..] virt/tdx: BIOS enabled: private KeyID range: [16, 64)
+
+TDX module initialization
+---------------------------------------
+
+The kernel talks to the TDX module via the new SEAMCALL instruction. The
+TDX module implements SEAMCALL leaf functions to allow the kernel to
+initialize it.
+
+If the TDX module isn't loaded, the SEAMCALL instruction fails with a
+special error. In this case the kernel fails the module initialization
+and reports the module isn't loaded::
+
+ [..] virt/tdx: module not loaded
+
+Initializing the TDX module consumes roughly ~1/256th system RAM size to
+use it as 'metadata' for the TDX memory. It also takes additional CPU
+time to initialize those metadata along with the TDX module itself. Both
+are not trivial. The kernel initializes the TDX module at runtime on
+demand.
+
+Besides initializing the TDX module, a per-cpu initialization SEAMCALL
+must be done on one cpu before any other SEAMCALLs can be made on that
+cpu.
+
+The kernel provides two functions, tdx_enable() and tdx_cpu_enable() to
+allow the user of TDX to enable the TDX module and enable TDX on local
+cpu respectively.
+
+Making SEAMCALL requires VMXON has been done on that CPU. Currently only
+KVM implements VMXON. For now both tdx_enable() and tdx_cpu_enable()
+don't do VMXON internally (not trivial), but depends on the caller to
+guarantee that.
+
+To enable TDX, the caller of TDX should: 1) temporarily disable CPU
+hotplug; 2) do VMXON and tdx_enable_cpu() on all online cpus; 3) call
+tdx_enable(). For example::
+
+ cpus_read_lock();
+ on_each_cpu(vmxon_and_tdx_cpu_enable());
+ ret = tdx_enable();
+ cpus_read_unlock();
+ if (ret)
+ goto no_tdx;
+ // TDX is ready to use
+
+And the caller of TDX must guarantee the tdx_cpu_enable() has been
+successfully done on any cpu before it wants to run any other SEAMCALL.
+A typical usage is do both VMXON and tdx_cpu_enable() in CPU hotplug
+online callback, and refuse to online if tdx_cpu_enable() fails.
+
+User can consult dmesg to see whether the TDX module has been initialized.
+
+If the TDX module is initialized successfully, dmesg shows something
+like below::
+
+ [..] virt/tdx: 262668 KBs allocated for PAMT
+ [..] virt/tdx: module initialized
+
+If the TDX module failed to initialize, dmesg also shows it failed to
+initialize::
+
+ [..] virt/tdx: module initialization failed ...
+
+TDX Interaction to Other Kernel Components
+------------------------------------------
+
+TDX Memory Policy
+~~~~~~~~~~~~~~~~~
+
+TDX reports a list of "Convertible Memory Region" (CMR) to tell the
+kernel which memory is TDX compatible. The kernel needs to build a list
+of memory regions (out of CMRs) as "TDX-usable" memory and pass those
+regions to the TDX module. Once this is done, those "TDX-usable" memory
+regions are fixed during module's lifetime.
+
+To keep things simple, currently the kernel simply guarantees all pages
+in the page allocator are TDX memory. Specifically, the kernel uses all
+system memory in the core-mm "at the time of TDX module initialization"
+as TDX memory, and in the meantime, refuses to online any non-TDX-memory
+in the memory hotplug.
+
+Physical Memory Hotplug
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Note TDX assumes convertible memory is always physically present during
+machine's runtime. A non-buggy BIOS should never support hot-removal of
+any convertible memory. This implementation doesn't handle ACPI memory
+removal but depends on the BIOS to behave correctly.
+
+CPU Hotplug
+~~~~~~~~~~~
+
+TDX module requires the per-cpu initialization SEAMCALL must be done on
+one cpu before any other SEAMCALLs can be made on that cpu. The kernel
+provides tdx_cpu_enable() to let the user of TDX to do it when the user
+wants to use a new cpu for TDX task.
+
+TDX doesn't support physical (ACPI) CPU hotplug. During machine boot,
+TDX verifies all boot-time present logical CPUs are TDX compatible before
+enabling TDX. A non-buggy BIOS should never support hot-add/removal of
+physical CPU. Currently the kernel doesn't handle physical CPU hotplug,
+but depends on the BIOS to behave correctly.
+
+Note TDX works with CPU logical online/offline, thus the kernel still
+allows to offline logical CPU and online it again.
+
+Erratum
+~~~~~~~
+
+The first few generations of TDX hardware have an erratum. A partial
+write to a TDX private memory cacheline will silently "poison" the
+line. Subsequent reads will consume the poison and generate a machine
+check.
+
+A partial write is a memory write where a write transaction of less than
+cacheline lands at the memory controller. The CPU does these via
+non-temporal write instructions (like MOVNTI), or through UC/WC memory
+mappings. Devices can also do partial writes via DMA.
+
+Theoretically, a kernel bug could do partial write to TDX private memory
+and trigger unexpected machine check. What's more, the machine check
+code will present these as "Hardware error" when they were, in fact, a
+software-triggered issue. But in the end, this issue is hard to trigger.
+
+If the platform has such erratum, the kernel prints additional message in
+machine check handler to tell user the machine check may be caused by
+kernel bug on TDX private memory.
+
+Kexec
+~~~~~~~
+
+Currently kexec doesn't work on the TDX platforms with the aforementioned
+erratum. It fails when loading the kexec kernel image. Otherwise it
+works normally.
+
+Interaction vs S3 and deeper states
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TDX cannot survive from S3 and deeper states. The hardware resets and
+disables TDX completely when platform goes to S3 and deeper. Both TDX
+guests and the TDX module get destroyed permanently.
+
+The kernel uses S3 for suspend-to-ram, and use S4 and deeper states for
+hibernation. Currently, for simplicity, the kernel chooses to make TDX
+mutually exclusive with S3 and hibernation.
+
+The kernel disables TDX during early boot when hibernation support is
+available::
+
+ [..] virt/tdx: initialization failed: Hibernation support is enabled
+
+Add 'nohibernate' kernel command line to disable hibernation in order to
+use TDX.
+
+ACPI S3 is disabled during kernel early boot if TDX is enabled. The user
+needs to turn off TDX in the BIOS in order to use S3.
+
+TDX Guest Support
+=================
+Since the host cannot directly access guest registers or memory, much
+normal functionality of a hypervisor must be moved into the guest. This is
+implemented using a Virtualization Exception (#VE) that is handled by the
+guest kernel. A #VE is handled entirely inside the guest kernel, but some
+require the hypervisor to be consulted.
+
+TDX includes new hypercall-like mechanisms for communicating from the
+guest to the hypervisor or the TDX module.
+
+New TDX Exceptions
+------------------
+
+TDX guests behave differently from bare-metal and traditional VMX guests.
+In TDX guests, otherwise normal instructions or memory accesses can cause
+#VE or #GP exceptions.
+
+Instructions marked with an '*' conditionally cause exceptions. The
+details for these instructions are discussed below.
+
+Instruction-based #VE
+~~~~~~~~~~~~~~~~~~~~~
+
+- Port I/O (INS, OUTS, IN, OUT)
+- HLT
+- MONITOR, MWAIT
+- WBINVD, INVD
+- VMCALL
+- RDMSR*,WRMSR*
+- CPUID*
+
+Instruction-based #GP
+~~~~~~~~~~~~~~~~~~~~~
+
+- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
+ VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
+- ENCLS, ENCLU
+- GETSEC
+- RSM
+- ENQCMD
+- RDMSR*,WRMSR*
+
+RDMSR/WRMSR Behavior
+~~~~~~~~~~~~~~~~~~~~
+
+MSR access behavior falls into three categories:
+
+- #GP generated
+- #VE generated
+- "Just works"
+
+In general, the #GP MSRs should not be used in guests. Their use likely
+indicates a bug in the guest. The guest may try to handle the #GP with a
+hypercall but it is unlikely to succeed.
+
+The #VE MSRs are typically able to be handled by the hypervisor. Guests
+can make a hypercall to the hypervisor to handle the #VE.
+
+The "just works" MSRs do not need any special guest handling. They might
+be implemented by directly passing through the MSR to the hardware or by
+trapping and handling in the TDX module. Other than possibly being slow,
+these MSRs appear to function just as they would on bare metal.
+
+CPUID Behavior
+~~~~~~~~~~~~~~
+
+For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
+return values (in guest EAX/EBX/ECX/EDX) are configurable by the
+hypervisor. For such cases, the Intel TDX module architecture defines two
+virtualization types:
+
+- Bit fields for which the hypervisor controls the value seen by the guest
+ TD.
+
+- Bit fields for which the hypervisor configures the value such that the
+ guest TD either sees their native value or a value of 0. For these bit
+ fields, the hypervisor can mask off the native values, but it can not
+ turn *on* values.
+
+A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
+not know how to handle. The guest kernel may ask the hypervisor for the
+value with a hypercall.
+
+#VE on Memory Accesses
+----------------------
+
+There are essentially two classes of TDX memory: private and shared.
+Private memory receives full TDX protections. Its content is protected
+against access from the hypervisor. Shared memory is expected to be
+shared between guest and hypervisor and does not receive full TDX
+protections.
+
+A TD guest is in control of whether its memory accesses are treated as
+private or shared. It selects the behavior with a bit in its page table
+entries. This helps ensure that a guest does not place sensitive
+information in shared memory, exposing it to the untrusted hypervisor.
+
+#VE on Shared Memory
+~~~~~~~~~~~~~~~~~~~~
+
+Access to shared mappings can cause a #VE. The hypervisor ultimately
+controls whether a shared memory access causes a #VE, so the guest must be
+careful to only reference shared pages it can safely handle a #VE. For
+instance, the guest should be careful not to access shared memory in the
+#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
+
+Shared mapping content is entirely controlled by the hypervisor. The guest
+should only use shared mappings for communicating with the hypervisor.
+Shared mappings must never be used for sensitive memory content like kernel
+stacks. A good rule of thumb is that hypervisor-shared memory should be
+treated the same as memory mapped to userspace. Both the hypervisor and
+userspace are completely untrusted.
+
+MMIO for virtual devices is implemented as shared memory. The guest must
+be careful not to access device MMIO regions unless it is also prepared to
+handle a #VE.
+
+#VE on Private Pages
+~~~~~~~~~~~~~~~~~~~~
+
+An access to private mappings can also cause a #VE. Since all kernel
+memory is also private memory, the kernel might theoretically need to
+handle a #VE on arbitrary kernel memory accesses. This is not feasible, so
+TDX guests ensure that all guest memory has been "accepted" before memory
+is used by the kernel.
+
+A modest amount of memory (typically 512M) is pre-accepted by the firmware
+before the kernel runs to ensure that the kernel can start up without
+being subjected to a #VE.
+
+The hypervisor is permitted to unilaterally move accepted pages to a
+"blocked" state. However, if it does this, page access will not generate a
+#VE. It will, instead, cause a "TD Exit" where the hypervisor is required
+to handle the exception.
+
+Linux #VE handler
+-----------------
+
+Just like page faults or #GP's, #VE exceptions can be either handled or be
+fatal. Typically, an unhandled userspace #VE results in a SIGSEGV.
+An unhandled kernel #VE results in an oops.
+
+Handling nested exceptions on x86 is typically nasty business. A #VE
+could be interrupted by an NMI which triggers another #VE and hilarity
+ensues. The TDX #VE architecture anticipated this scenario and includes a
+feature to make it slightly less nasty.
+
+During #VE handling, the TDX module ensures that all interrupts (including
+NMIs) are blocked. The block remains in place until the guest makes a
+TDG.VP.VEINFO.GET TDCALL. This allows the guest to control when interrupts
+or a new #VE can be delivered.
+
+However, the guest kernel must still be careful to avoid potential
+#VE-triggering actions (discussed above) while this block is in place.
+While the block is in place, any #VE is elevated to a double fault (#DF)
+which is not recoverable.
+
+MMIO handling
+-------------
+
+In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
+mapping which will cause a VMEXIT on access, and then the hypervisor
+emulates the access. That is not possible in TDX guests because VMEXIT
+will expose the register state to the host. TDX guests don't trust the host
+and can't have their state exposed to the host.
+
+In TDX, MMIO regions typically trigger a #VE exception in the guest. The
+guest #VE handler then emulates the MMIO instruction inside the guest and
+converts it into a controlled TDCALL to the host, rather than exposing
+guest state to the host.
+
+MMIO addresses on x86 are just special physical addresses. They can
+theoretically be accessed with any instruction that accesses memory.
+However, the kernel instruction decoding method is limited. It is only
+designed to decode instructions like those generated by io.h macros.
+
+MMIO access via other means (like structure overlays) may result in an
+oops.
+
+Shared Memory Conversions
+-------------------------
+
+All TDX guest memory starts out as private at boot. This memory can not
+be accessed by the hypervisor. However, some kernel users like device
+drivers might have a need to share data with the hypervisor. To do this,
+memory must be converted between shared and private. This can be
+accomplished using some existing memory encryption helpers:
+
+ * set_memory_decrypted() converts a range of pages to shared.
+ * set_memory_encrypted() converts memory back to private.
+
+Device drivers are the primary user of shared memory, but there's no need
+to touch every driver. DMA buffers and ioremap() do the conversions
+automatically.
+
+TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
+converted to shared on boot.
+
+For coherent DMA allocation, the DMA buffer gets converted on the
+allocation. Check force_dma_unencrypted() for details.
+
+Attestation
+===========
+
+Attestation is used to verify the TDX guest trustworthiness to other
+entities before provisioning secrets to the guest. For example, a key
+server may want to use attestation to verify that the guest is the
+desired one before releasing the encryption keys to mount the encrypted
+rootfs or a secondary drive.
+
+The TDX module records the state of the TDX guest in various stages of
+the guest boot process using the build time measurement register (MRTD)
+and runtime measurement registers (RTMR). Measurements related to the
+guest initial configuration and firmware image are recorded in the MRTD
+register. Measurements related to initial state, kernel image, firmware
+image, command line options, initrd, ACPI tables, etc are recorded in
+RTMR registers. For more details, as an example, please refer to TDX
+Virtual Firmware design specification, section titled "TD Measurement".
+At TDX guest runtime, the attestation process is used to attest to these
+measurements.
+
+The attestation process consists of two steps: TDREPORT generation and
+Quote generation.
+
+TDX guest uses TDCALL[TDG.MR.REPORT] to get the TDREPORT (TDREPORT_STRUCT)
+from the TDX module. TDREPORT is a fixed-size data structure generated by
+the TDX module which contains guest-specific information (such as build
+and boot measurements), platform security version, and the MAC to protect
+the integrity of the TDREPORT. A user-provided 64-Byte REPORTDATA is used
+as input and included in the TDREPORT. Typically it can be some nonce
+provided by attestation service so the TDREPORT can be verified uniquely.
+More details about the TDREPORT can be found in Intel TDX Module
+specification, section titled "TDG.MR.REPORT Leaf".
+
+After getting the TDREPORT, the second step of the attestation process
+is to send it to the Quoting Enclave (QE) to generate the Quote. TDREPORT
+by design can only be verified on the local platform as the MAC key is
+bound to the platform. To support remote verification of the TDREPORT,
+TDX leverages Intel SGX Quoting Enclave to verify the TDREPORT locally
+and convert it to a remotely verifiable Quote. Method of sending TDREPORT
+to QE is implementation specific. Attestation software can choose
+whatever communication channel available (i.e. vsock or TCP/IP) to
+send the TDREPORT to QE and receive the Quote.
+
+References
+==========
+
+TDX reference material is collected here:
+
+https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
diff --git a/Documentation/arch/x86/tlb.rst b/Documentation/arch/x86/tlb.rst
new file mode 100644
index 000000000000..82ec58ae63a8
--- /dev/null
+++ b/Documentation/arch/x86/tlb.rst
@@ -0,0 +1,83 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+The TLB
+=======
+
+When the kernel unmaps or modified the attributes of a range of
+memory, it has two choices:
+
+ 1. Flush the entire TLB with a two-instruction sequence. This is
+ a quick operation, but it causes collateral damage: TLB entries
+ from areas other than the one we are trying to flush will be
+ destroyed and must be refilled later, at some cost.
+ 2. Use the invlpg instruction to invalidate a single page at a
+ time. This could potentially cost many more instructions, but
+ it is a much more precise operation, causing no collateral
+ damage to other TLB entries.
+
+Which method to do depends on a few things:
+
+ 1. The size of the flush being performed. A flush of the entire
+ address space is obviously better performed by flushing the
+ entire TLB than doing 2^48/PAGE_SIZE individual flushes.
+ 2. The contents of the TLB. If the TLB is empty, then there will
+ be no collateral damage caused by doing the global flush, and
+ all of the individual flush will have ended up being wasted
+ work.
+ 3. The size of the TLB. The larger the TLB, the more collateral
+ damage we do with a full flush. So, the larger the TLB, the
+ more attractive an individual flush looks. Data and
+ instructions have separate TLBs, as do different page sizes.
+ 4. The microarchitecture. The TLB has become a multi-level
+ cache on modern CPUs, and the global flushes have become more
+ expensive relative to single-page flushes.
+
+There is obviously no way the kernel can know all these things,
+especially the contents of the TLB during a given flush. The
+sizes of the flush will vary greatly depending on the workload as
+well. There is essentially no "right" point to choose.
+
+You may be doing too many individual invalidations if you see the
+invlpg instruction (or instructions _near_ it) show up high in
+profiles. If you believe that individual invalidations being
+called too often, you can lower the tunable::
+
+ /sys/kernel/debug/x86/tlb_single_page_flush_ceiling
+
+This will cause us to do the global flush for more cases.
+Lowering it to 0 will disable the use of the individual flushes.
+Setting it to 1 is a very conservative setting and it should
+never need to be 0 under normal circumstances.
+
+Despite the fact that a single individual flush on x86 is
+guaranteed to flush a full 2MB [1]_, hugetlbfs always uses the full
+flushes. THP is treated exactly the same as normal memory.
+
+You might see invlpg inside of flush_tlb_mm_range() show up in
+profiles, or you can use the trace_tlb_flush() tracepoints. to
+determine how long the flush operations are taking.
+
+Essentially, you are balancing the cycles you spend doing invlpg
+with the cycles that you spend refilling the TLB later.
+
+You can measure how expensive TLB refills are by using
+performance counters and 'perf stat', like this::
+
+ perf stat -e
+ cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
+ cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
+ cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
+ cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
+ cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
+ cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
+
+That works on an IvyBridge-era CPU (i5-3320M). Different CPUs
+may have differently-named counters, but they should at least
+be there in some form. You can use pmu-tools 'ocperf list'
+(https://github.com/andikleen/pmu-tools) to find the right
+counters for a given CPU.
+
+.. [1] A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
+ says: "One execution of INVLPG is sufficient even for a page
+ with size greater than 4 KBytes."
diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst
new file mode 100644
index 000000000000..86bec8ac2c4d
--- /dev/null
+++ b/Documentation/arch/x86/topology.rst
@@ -0,0 +1,421 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+x86 Topology
+============
+
+This documents and clarifies the main aspects of x86 topology modelling and
+representation in the kernel. Update/change when doing changes to the
+respective code.
+
+The architecture-agnostic topology definitions are in
+Documentation/admin-guide/cputopology.rst. This file holds x86-specific
+differences/specialities which must not necessarily apply to the generic
+definitions. Thus, the way to read up on Linux topology on x86 is to start
+with the generic one and look at this one in parallel for the x86 specifics.
+
+Needless to say, code should use the generic functions - this file is *only*
+here to *document* the inner workings of x86 topology.
+
+Started by Thomas Gleixner <tglx@linutronix.de> and Borislav Petkov <bp@alien8.de>.
+
+The main aim of the topology facilities is to present adequate interfaces to
+code which needs to know/query/use the structure of the running system wrt
+threads, cores, packages, etc.
+
+The kernel does not care about the concept of physical sockets because a
+socket has no relevance to software. It's an electromechanical component. In
+the past a socket always contained a single package (see below), but with the
+advent of Multi Chip Modules (MCM) a socket can hold more than one package. So
+there might be still references to sockets in the code, but they are of
+historical nature and should be cleaned up.
+
+The topology of a system is described in the units of:
+
+ - packages
+ - cores
+ - threads
+
+Package
+=======
+Packages contain a number of cores plus shared resources, e.g. DRAM
+controller, shared caches etc.
+
+Modern systems may also use the term 'Die' for package.
+
+AMD nomenclature for package is 'Node'.
+
+Package-related topology information in the kernel:
+
+ - topology_num_threads_per_package()
+
+ The number of threads in a package.
+
+ - topology_num_cores_per_package()
+
+ The number of cores in a package.
+
+ - topology_max_dies_per_package()
+
+ The maximum number of dies in a package.
+
+ - cpuinfo_x86.topo.die_id:
+
+ The physical ID of the die.
+
+ - cpuinfo_x86.topo.pkg_id:
+
+ The physical ID of the package. This information is retrieved via CPUID
+ and deduced from the APIC IDs of the cores in the package.
+
+ Modern systems use this value for the socket. There may be multiple
+ packages within a socket. This value may differ from topo.die_id.
+
+ - cpuinfo_x86.topo.logical_pkg_id:
+
+ The logical ID of the package. As we do not trust BIOSes to enumerate the
+ packages in a consistent way, we introduced the concept of logical package
+ ID so we can sanely calculate the number of maximum possible packages in
+ the system and have the packages enumerated linearly.
+
+ - topology_max_packages():
+
+ The maximum possible number of packages in the system. Helpful for per
+ package facilities to preallocate per package information.
+
+ - cpuinfo_x86.topo.llc_id:
+
+ - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
+ Cache
+
+ - On AMD, the Node ID or Core Complex ID containing the Last Level
+ Cache. In general, it is a number identifying an LLC uniquely on the
+ system.
+
+Cores
+=====
+A core consists of 1 or more threads. It does not matter whether the threads
+are SMT- or CMT-type threads.
+
+AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
+"core".
+
+Threads
+=======
+A thread is a single scheduling unit. It's the equivalent to a logical Linux
+CPU.
+
+AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
+uses "thread".
+
+Thread-related topology information in the kernel:
+
+ - topology_core_cpumask():
+
+ The cpumask contains all online threads in the package to which a thread
+ belongs.
+
+ The number of online threads is also printed in /proc/cpuinfo "siblings."
+
+ - topology_sibling_cpumask():
+
+ The cpumask contains all online threads in the core to which a thread
+ belongs.
+
+ - topology_logical_package_id():
+
+ The logical package ID to which a thread belongs.
+
+ - topology_physical_package_id():
+
+ The physical package ID to which a thread belongs.
+
+ - topology_core_id();
+
+ The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
+ "core_id."
+
+ - topology_logical_core_id();
+
+ The logical core ID to which a thread belongs.
+
+
+
+System topology enumeration
+===========================
+
+The topology on x86 systems can be discovered using a combination of vendor
+specific CPUID leaves which enumerate the processor topology and the cache
+hierarchy.
+
+The CPUID leaves in their preferred order of parsing for each x86 vendor is as
+follows:
+
+1) AMD
+
+ 1) CPUID leaf 0x80000026 [Extended CPU Topology] (Core::X86::Cpuid::ExCpuTopology)
+
+ The extended CPUID leaf 0x80000026 is the extension of the CPUID leaf 0xB
+ and provides the topology information of Core, Complex, CCD (Die), and
+ Socket in each level.
+
+ Support for the leaf is discovered by checking if the maximum extended
+ CPUID level is >= 0x80000026 and then checking if `LogProcAtThisLevel`
+ in `EBX[15:0]` at a particular level (starting from 0) is non-zero.
+
+ The `LevelType` in `ECX[15:8]` at the level provides the topology domain
+ the level describes - Core, Complex, CCD(Die), or the Socket.
+
+ The kernel uses the `CoreMaskWidth` from `EAX[4:0]` to discover the
+ number of bits that need to be right-shifted from `ExtendedLocalApicId`
+ in `EDX[31:0]` in order to get a unique Topology ID for the topology
+ level. CPUs with the same Topology ID share the resources at that level.
+
+ CPUID leaf 0x80000026 also provides more information regarding the power
+ and efficiency rankings, and about the core type on AMD processors with
+ heterogeneous characteristics.
+
+ If CPUID leaf 0x80000026 is supported, further parsing is not required.
+
+ 2) CPUID leaf 0x0000000B [Extended Topology Enumeration] (Core::X86::Cpuid::ExtTopEnum)
+
+ The extended CPUID leaf 0x0000000B is the predecessor on the extended
+ CPUID leaf 0x80000026 and only describes the core, and the socket domains
+ of the processor topology.
+
+ The support for the leaf is discovered by checking if the maximum supported
+ CPUID level is >= 0xB and then if `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ The `LevelType` in `ECX[15:8]` at the level provides the topology domain
+ that the level describes - Thread, or Processor (Socket).
+
+ The kernel uses the `CoreMaskWidth` from `EAX[4:0]` to discover the
+ number of bits that need to be right-shifted from the `ExtendedLocalApicId`
+ in `EDX[31:0]` to get a unique Topology ID for that topology level. CPUs
+ sharing the Topology ID share the resources at that level.
+
+ If CPUID leaf 0xB is supported, further parsing is not required.
+
+
+ 3) CPUID leaf 0x80000008 ECX [Size Identifiers] (Core::X86::Cpuid::SizeId)
+
+ If neither the CPUID leaf 0x80000026 nor 0xB is supported, the number of
+ CPUs on the package is detected using the Size Identifier leaf
+ 0x80000008 ECX.
+
+ The support for the leaf is discovered by checking if the supported
+ extended CPUID level is >= 0x80000008.
+
+ The shifts from the APIC ID for the Socket ID is calculated from the
+ `ApicIdSize` field in `ECX[15:12]` if it is non-zero.
+
+ If `ApicIdSize` is reported to be zero, the shift is calculated as the
+ order of the `number of threads` calculated from `NC` field in
+ `ECX[7:0]` which describes the `number of threads - 1` on the package.
+
+ Unless Extended APIC ID is supported, the APIC ID used to find the
+ Socket ID is from the `LocalApicId` field of CPUID leaf 0x00000001
+ `EBX[31:24]`.
+
+ The topology parsing continues to detect if Extended APIC ID is
+ supported or not.
+
+
+ 4) CPUID leaf 0x8000001E [Extended APIC ID, Core Identifiers, Node Identifiers]
+ (Core::X86::Cpuid::{ExtApicId,CoreId,NodeId})
+
+ The support for Extended APIC ID can be detected by checking for the
+ presence of `TopologyExtensions` in `ECX[22]` of CPUID leaf 0x80000001
+ [Feature Identifiers] (Core::X86::Cpuid::FeatureExtIdEcx).
+
+ If Topology Extensions is supported, the APIC ID from `ExtendedApicId`
+ from CPUID leaf 0x8000001E `EAX[31:0]` should be preferred over that from
+ `LocalApicId` field of CPUID leaf 0x00000001 `EBX[31:24]` for topology
+ enumeration.
+
+ On processors of Family 0x17 and above that do not support CPUID leaf
+ 0x80000026 or CPUID leaf 0xB, the shifts from the APIC ID for the Core
+ ID is calculated using the order of `number of threads per core`
+ calculated using the `ThreadsPerCore` field in `EBX[15:8]` which
+ describes `number of threads per core - 1`.
+
+ On Processors of Family 0x15, the Core ID from `EBX[7:0]` is used as the
+ `cu_id` (Compute Unit ID) to detect CPUs that share the compute units.
+
+
+ All AMD processors that support the `TopologyExtensions` feature store the
+ `NodeId` from the `ECX[7:0]` of CPUID leaf 0x8000001E
+ (Core::X86::Cpuid::NodeId) as the per-CPU `node_id`. On older processors,
+ the `node_id` was discovered using MSR_FAM10H_NODE_ID MSR (MSR
+ 0x0xc001_100c). The presence of the NODE_ID MSR was detected by checking
+ `ECX[19]` of CPUID leaf 0x80000001 [Feature Identifiers]
+ (Core::X86::Cpuid::FeatureExtIdEcx).
+
+
+2) Intel
+
+ On Intel platforms, the CPUID leaves that enumerate the processor
+ topology are as follows:
+
+ 1) CPUID leaf 0x1F (V2 Extended Topology Enumeration Leaf)
+
+ The CPUID leaf 0x1F is the extension of the CPUID leaf 0xB and provides
+ the topology information of Core, Module, Tile, Die, DieGrp, and Socket
+ in each level.
+
+ The support for the leaf is discovered by checking if the supported
+ CPUID level is >= 0x1F and then `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ The `Domain Type` in `ECX[15:8]` of the sub-leaf provides the topology
+ domain that the level describes - Core, Module, Tile, Die, DieGrp, and
+ Socket.
+
+ The kernel uses the value from `EAX[4:0]` to discover the number of
+ bits that need to be right shifted from the `x2APIC ID` in `EDX[31:0]`
+ to get a unique Topology ID for the topology level. CPUs with the same
+ Topology ID share the resources at that level.
+
+ If CPUID leaf 0x1F is supported, further parsing is not required.
+
+
+ 2) CPUID leaf 0x0000000B (Extended Topology Enumeration Leaf)
+
+ The extended CPUID leaf 0x0000000B is the predecessor of the V2 Extended
+ Topology Enumeration Leaf 0x1F and only describes the core, and the
+ socket domains of the processor topology.
+
+ The support for the leaf is iscovered by checking if the supported CPUID
+ level is >= 0xB and then checking if `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ CPUID leaf 0x0000000B shares the same layout as CPUID leaf 0x1F and
+ should be enumerated in a similar manner.
+
+ If CPUID leaf 0xB is supported, further parsing is not required.
+
+
+ 3) CPUID leaf 0x00000004 (Deterministic Cache Parameters Leaf)
+
+ On Intel processors that support neither CPUID leaf 0x1F, nor CPUID leaf
+ 0xB, the shifts for the SMT domains is calculated using the number of
+ CPUs sharing the L1 cache.
+
+ Processors that feature Hyper-Threading is detected using `EDX[28]` of
+ CPUID leaf 0x1 (Basic CPUID Information).
+
+ The order of `Maximum number of addressable IDs for logical processors
+ sharing this cache` from `EAX[25:14]` of level-0 of CPUID 0x4 provides
+ the shifts from the APIC ID required to compute the Core ID.
+
+ The APIC ID and Package information is computed using the data from
+ CPUID leaf 0x1.
+
+
+ 4) CPUID leaf 0x00000001 (Basic CPUID Information)
+
+ The mask and shifts to derive the Physical Package (socket) ID is
+ computed using the `Maximum number of addressable IDs for logical
+ processors in this physical package` from `EBX[23:16]` of CPUID leaf
+ 0x1.
+
+ The APIC ID on the legacy platforms is derived from the `Initial APIC
+ ID` field from `EBX[31:24]` of CPUID leaf 0x1.
+
+
+3) Centaur and Zhaoxin
+
+ Similar to Intel, Centaur and Zhaoxin use a combination of CPUID leaf
+ 0x00000004 (Deterministic Cache Parameters Leaf) and CPUID leaf 0x00000001
+ (Basic CPUID Information) to derive the topology information.
+
+
+
+System topology examples
+========================
+
+.. note::
+ The alternative Linux CPU enumeration depends on how the BIOS enumerates the
+ threads. Many BIOSes enumerate all threads 0 first and then all threads 1.
+ That has the "advantage" that the logical Linux CPU numbers of threads 0 stay
+ the same whether threads are enabled or not. That's merely an implementation
+ detail and has no practical impact.
+
+1) Single Package, Single Core::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+
+2) Single Package, Dual Core
+
+ a) One thread per core::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+
+ b) Two threads per core::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 1
+ -> [core 1] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 3
+
+ Alternative enumeration::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 2
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+ -> [thread 1] -> Linux CPU 3
+
+ AMD nomenclature for CMT systems::
+
+ [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+ -> [Compute Unit Core 1] -> Linux CPU 1
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+ -> [Compute Unit Core 1] -> Linux CPU 3
+
+4) Dual Package, Dual Core
+
+ a) One thread per core::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+ -> [core 1] -> [thread 0] -> Linux CPU 3
+
+ b) Two threads per core::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 1
+ -> [core 1] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 3
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 4
+ -> [thread 1] -> Linux CPU 5
+ -> [core 1] -> [thread 0] -> Linux CPU 6
+ -> [thread 1] -> Linux CPU 7
+
+ Alternative enumeration::
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 4
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+ -> [thread 1] -> Linux CPU 5
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 6
+ -> [core 1] -> [thread 0] -> Linux CPU 3
+ -> [thread 1] -> Linux CPU 7
+
+ AMD nomenclature for CMT systems::
+
+ [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+ -> [Compute Unit Core 1] -> Linux CPU 1
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+ -> [Compute Unit Core 1] -> Linux CPU 3
+
+ [node 1] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 4
+ -> [Compute Unit Core 1] -> Linux CPU 5
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 6
+ -> [Compute Unit Core 1] -> Linux CPU 7
diff --git a/Documentation/arch/x86/tsx_async_abort.rst b/Documentation/arch/x86/tsx_async_abort.rst
new file mode 100644
index 000000000000..583ddc185ba2
--- /dev/null
+++ b/Documentation/arch/x86/tsx_async_abort.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS). In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction. This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ============= ============================================================
+ off Mitigation is disabled. Either the CPU is not affected or
+ tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled Mitigation is enabled. TSX feature is disabled by default at
+ bootup on processors that support TSX control.
+
+ verw Mitigation is enabled. CPU is affected and MD_CLEAR is
+ advertised in CPUID.
+
+ ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not
+ advertised in CPUID. That is mainly for virtualization
+ scenarios where the host has the updated microcode but the
+ hypervisor does not expose MD_CLEAR in CPUID. It's a best
+ effort approach without guarantee.
+ ============= ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Disabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+2. "tsx=on"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Enabled Yes None Same as MDS
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+3. "tsx=auto"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+ Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+ sub-feature of TSX (will force all transactions to abort on the
+ XBEGIN instruction).
+
+ Bit 1: When set it disables the enumeration of the RTM and HLE feature
+ (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+ CPUID(EAX=7).EBX{bit11} read as 0).
diff --git a/Documentation/arch/x86/usb-legacy-support.rst b/Documentation/arch/x86/usb-legacy-support.rst
new file mode 100644
index 000000000000..b17bf122270a
--- /dev/null
+++ b/Documentation/arch/x86/usb-legacy-support.rst
@@ -0,0 +1,41 @@
+
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+USB Legacy support
+==================
+
+:Author: Vojtech Pavlik <vojtech@suse.cz>, January 2004
+
+
+Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a
+feature that allows one to use the USB mouse and keyboard as if they were
+their classic PS/2 counterparts. This means one can use an USB keyboard to
+type in LILO for example.
+
+It has several drawbacks, though:
+
+1) On some machines, the emulated PS/2 mouse takes over even when no USB
+ mouse is present and a real PS/2 mouse is present. In that case the extra
+ features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may
+ not be available.
+
+2) If AMD64 64-bit mode is enabled, again system crashes often happen,
+ because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The
+ BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit
+ yet.
+
+Solutions:
+
+Problem 1)
+ can be solved by loading the USB drivers prior to loading the
+ PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into
+ the kernel unconditionally, this means the USB drivers need to be
+ compiled-in, too.
+
+Problem 2)
+ is usually fixed by a BIOS update. Check the board
+ manufacturers web site. If an update is not available, disable USB
+ Legacy support in the BIOS. If this alone doesn't help, try also adding
+ idle=poll on the kernel command line. The BIOS may be entering the SMM
+ on the HLT instruction as well.
diff --git a/Documentation/arch/x86/x86_64/5level-paging.rst b/Documentation/arch/x86/x86_64/5level-paging.rst
new file mode 100644
index 000000000000..ad7ddc13f79d
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/5level-paging.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+5-level paging
+==============
+
+Overview
+========
+Original x86-64 was limited by 4-level paging to 256 TiB of virtual address
+space and 64 TiB of physical address space. We are already bumping into
+this limit: some vendors offer servers with 64 TiB of memory today.
+
+To overcome the limitation upcoming hardware will introduce support for
+5-level paging. It is a straight-forward extension of the current page
+table structure adding one more layer of translation.
+
+It bumps the limits to 128 PiB of virtual address space and 4 PiB of
+physical address space. This "ought to be enough for anybody" ©.
+
+QEMU 2.9 and later support 5-level paging.
+
+Virtual memory layout for 5-level paging is described in
+Documentation/arch/x86/x86_64/mm.rst
+
+User-space and large virtual address space
+==========================================
+On x86, 5-level paging enables 56-bit userspace virtual address space.
+Not all user space is ready to handle wide addresses. It's known that
+at least some JIT compilers use higher bits in pointers to encode their
+information. It collides with valid pointers with 5-level paging and
+leads to crashes.
+
+To mitigate this, we are not going to allocate virtual address space
+above 47-bit by default.
+
+But userspace can ask for allocation from full address space by
+specifying hint address (with or without MAP_FIXED) above 47-bits.
+
+If hint address set above 47-bit, but MAP_FIXED is not specified, we try
+to look for unmapped area by specified address. If it's already
+occupied, we look for unmapped area in *full* address space, rather than
+from 47-bit window.
+
+A high hint address would only affect the allocation in question, but not
+any future mmap()s.
+
+Specifying high hint address on older kernel or on machine without 5-level
+paging support is safe. The hint will be ignored and kernel will fall back
+to allocation from 47-bit address space.
+
+This approach helps to easily make application's memory allocator aware
+about large address space without manually tracking allocated virtual
+address space.
+
+One important case we need to handle here is interaction with MPX.
+MPX (without MAWA extension) cannot handle addresses above 47-bit, so we
+need to make sure that MPX cannot be enabled we already have VMA above
+the boundary and forbid creating such VMAs once MPX is enabled.
diff --git a/Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst b/Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst
new file mode 100644
index 000000000000..8d1c91f0c880
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
+Firmware support for CPU hotplug under Linux/x86-64
+===================================================
+
+Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to
+know in advance of boot time the maximum number of CPUs that could be plugged
+into the system. ACPI 3.0 currently has no official way to supply
+this information from the firmware to the operating system.
+
+In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the
+ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC
+objects by setting the Enabled bit in the LAPIC object to zero.
+
+For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable
+CPU is already available in the MADT. If the CPU is not available yet
+it should have its LAPIC Enabled bit set to 0. Linux will use the number
+of disabled LAPICs to compute the maximum number of future CPUs.
+
+In the worst case the user can overwrite this choice using a command line
+option (additional_cpus=...), but it is recommended to supply the correct
+number (or a reasonable approximation of it, with erring towards more not less)
+in the MADT to avoid manual configuration.
diff --git a/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
new file mode 100644
index 000000000000..970ee94eb551
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Fake NUMA For CPUSets
+=====================
+
+:Author: David Rientjes <rientjes@cs.washington.edu>
+
+Using numa=fake and CPUSets for Resource Management
+
+This document describes how the numa=fake x86_64 command-line option can be used
+in conjunction with cpusets for coarse memory management. Using this feature,
+you can create fake NUMA nodes that represent contiguous chunks of memory and
+assign them to cpusets and their attached tasks. This is a way of limiting the
+amount of system memory that are available to a certain class of tasks.
+
+For more information on the features of cpusets, see
+Documentation/admin-guide/cgroup-v1/cpusets.rst.
+There are a number of different configurations you can use for your needs. For
+more information on the numa=fake command line option and its various ways of
+configuring fake nodes, see Documentation/admin-guide/kernel-parameters.txt
+
+For the purposes of this introduction, we'll assume a very primitive NUMA
+emulation setup of "numa=fake=4*512,". This will split our system memory into
+four equal chunks of 512M each that we can now use to assign to cpusets. As
+you become more familiar with using this combination for resource control,
+you'll determine a better setup to minimize the number of nodes you have to deal
+with.
+
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
+
+ Faking node 0 at 0000000000000000-0000000020000000 (512MB)
+ Faking node 1 at 0000000020000000-0000000040000000 (512MB)
+ Faking node 2 at 0000000040000000-0000000060000000 (512MB)
+ Faking node 3 at 0000000060000000-0000000080000000 (512MB)
+ ...
+ On node 0 totalpages: 130975
+ On node 1 totalpages: 131072
+ On node 2 totalpages: 131072
+ On node 3 totalpages: 131072
+
+Now following the instructions for mounting the cpusets filesystem from
+Documentation/admin-guide/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
+address spaces) to individual cpusets::
+
+ [root@xroads /]# mkdir exampleset
+ [root@xroads /]# mount -t cpuset none exampleset
+ [root@xroads /]# mkdir exampleset/ddset
+ [root@xroads /]# cd exampleset/ddset
+ [root@xroads /exampleset/ddset]# echo 0-1 > cpus
+ [root@xroads /exampleset/ddset]# echo 0-1 > mems
+
+Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
+memory allocations (1G).
+
+You can now assign tasks to these cpusets to limit the memory resources
+available to them according to the fake nodes assigned as mems::
+
+ [root@xroads /exampleset/ddset]# echo $$ > tasks
+ [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
+ [1] 13425
+
+Notice the difference between the system memory usage as reported by
+/proc/meminfo between the restricted cpuset case above and the unrestricted
+case (i.e. running the same 'dd' command without assigning it to a fake NUMA
+cpuset):
+
+ ======== ============ ==========
+ Name Unrestricted Restricted
+ ======== ============ ==========
+ MemTotal 3091900 kB 3091900 kB
+ MemFree 42113 kB 1513236 kB
+ ======== ============ ==========
+
+This allows for coarse memory management for the tasks you assign to particular
+cpusets. Since cpusets can form a hierarchy, you can create some pretty
+interesting combinations of use-cases for various classes of tasks for your
+memory management needs.
diff --git a/Documentation/arch/x86/x86_64/fred.rst b/Documentation/arch/x86/x86_64/fred.rst
new file mode 100644
index 000000000000..9f57e7b91f7e
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fred.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Flexible Return and Event Delivery (FRED)
+=========================================
+
+Overview
+========
+
+The FRED architecture defines simple new transitions that change
+privilege level (ring transitions). The FRED architecture was
+designed with the following goals:
+
+1) Improve overall performance and response time by replacing event
+ delivery through the interrupt descriptor table (IDT event
+ delivery) and event return by the IRET instruction with lower
+ latency transitions.
+
+2) Improve software robustness by ensuring that event delivery
+ establishes the full supervisor context and that event return
+ establishes the full user context.
+
+The new transitions defined by the FRED architecture are FRED event
+delivery and, for returning from events, two FRED return instructions.
+FRED event delivery can effect a transition from ring 3 to ring 0, but
+it is used also to deliver events incident to ring 0. One FRED
+instruction (ERETU) effects a return from ring 0 to ring 3, while the
+other (ERETS) returns while remaining in ring 0. Collectively, FRED
+event delivery and the FRED return instructions are FRED transitions.
+
+In addition to these transitions, the FRED architecture defines a new
+instruction (LKGS) for managing the state of the GS segment register.
+The LKGS instruction can be used by 64-bit operating systems that do
+not use the new FRED transitions.
+
+Furthermore, the FRED architecture is easy to extend for future CPU
+architectures.
+
+Software based event dispatching
+================================
+
+FRED operates differently from IDT in terms of event handling. Instead
+of directly dispatching an event to its handler based on the event
+vector, FRED requires the software to dispatch an event to its handler
+based on both the event's type and vector. Therefore, an event dispatch
+framework must be implemented to facilitate the event-to-handler
+dispatch process. The FRED event dispatch framework takes control
+once an event is delivered, and employs a two-level dispatch.
+
+The first level dispatching is event type based, and the second level
+dispatching is event vector based.
+
+Full supervisor/user context
+============================
+
+FRED event delivery atomically save and restore full supervisor/user
+context upon event delivery and return. Thus it avoids the problem of
+transient states due to %cr2 and/or %dr6, and it is no longer needed
+to handle all the ugly corner cases caused by half baked entry states.
+
+FRED allows explicit unblock of NMI with new event return instructions
+ERETS/ERETU, avoiding the mess caused by IRET which unconditionally
+unblocks NMI, e.g., when an exception happens during NMI handling.
+
+FRED always restores the full value of %rsp, thus ESPFIX is no longer
+needed when FRED is enabled.
+
+LKGS
+====
+
+LKGS behaves like the MOV to GS instruction except that it loads the
+base address into the IA32_KERNEL_GS_BASE MSR instead of the GS
+segment’s descriptor cache. With LKGS, it ends up with avoiding
+mucking with kernel GS, i.e., an operating system can always operate
+with its own GS base address.
+
+Because FRED event delivery from ring 3 and ERETU both swap the value
+of the GS base address and that of the IA32_KERNEL_GS_BASE MSR, plus
+the introduction of LKGS instruction, the SWAPGS instruction is no
+longer needed when FRED is enabled, thus is disallowed (#UD).
+
+Stack levels
+============
+
+4 stack levels 0~3 are introduced to replace the nonreentrant IST for
+event handling, and each stack level should be configured to use a
+dedicated stack.
+
+The current stack level could be unchanged or go higher upon FRED
+event delivery. If unchanged, the CPU keeps using the current event
+stack. If higher, the CPU switches to a new event stack specified by
+the MSR of the new stack level, i.e., MSR_IA32_FRED_RSP[123].
+
+Only execution of a FRED return instruction ERET[US], could lower the
+current stack level, causing the CPU to switch back to the stack it was
+on before a previous event delivery that promoted the stack level.
diff --git a/Documentation/arch/x86/x86_64/fsgs.rst b/Documentation/arch/x86/x86_64/fsgs.rst
new file mode 100644
index 000000000000..6bda4d16d3f7
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fsgs.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Using FS and GS segments in user space applications
+===================================================
+
+The x86 architecture supports segmentation. Instructions which access
+memory can use segment register based addressing mode. The following
+notation is used to address a byte within a segment:
+
+ Segment-register:Byte-address
+
+The segment base address is added to the Byte-address to compute the
+resulting virtual address which is accessed. This allows to access multiple
+instances of data with the identical Byte-address, i.e. the same code. The
+selection of a particular instance is purely based on the base-address in
+the segment register.
+
+In 32-bit mode the CPU provides 6 segments, which also support segment
+limits. The limits can be used to enforce address space protections.
+
+In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
+always 0 to provide a full 64bit address space. The FS and GS segments are
+still functional in 64-bit mode.
+
+Common FS and GS usage
+------------------------------
+
+The FS segment is commonly used to address Thread Local Storage (TLS). FS
+is usually managed by runtime code or a threading library. Variables
+declared with the '__thread' storage class specifier are instantiated per
+thread and the compiler emits the FS: address prefix for accesses to these
+variables. Each thread has its own FS base address so common code can be
+used without complex address offset calculations to access the per thread
+instances. Applications should not use FS for other purposes when they use
+runtimes or threading libraries which manage the per thread FS.
+
+The GS segment has no common use and can be used freely by
+applications. GCC and Clang support GS based addressing via address space
+identifiers.
+
+Reading and writing the FS/GS base address
+------------------------------------------
+
+There exist two mechanisms to read and write the FS/GS base address:
+
+ - the arch_prctl() system call
+
+ - the FSGSBASE instruction family
+
+Accessing FS/GS base with arch_prctl()
+--------------------------------------
+
+ The arch_prctl(2) based mechanism is available on all 64-bit CPUs and all
+ kernel versions.
+
+ Reading the base:
+
+ arch_prctl(ARCH_GET_FS, &fsbase);
+ arch_prctl(ARCH_GET_GS, &gsbase);
+
+ Writing the base:
+
+ arch_prctl(ARCH_SET_FS, fsbase);
+ arch_prctl(ARCH_SET_GS, gsbase);
+
+ The ARCH_SET_GS prctl may be disabled depending on kernel configuration
+ and security settings.
+
+Accessing FS/GS base with the FSGSBASE instructions
+---------------------------------------------------
+
+ With the Ivy Bridge CPU generation Intel introduced a new set of
+ instructions to access the FS and GS base registers directly from user
+ space. These instructions are also supported on AMD Family 17H CPUs. The
+ following instructions are available:
+
+ =============== ===========================
+ RDFSBASE %reg Read the FS base register
+ RDGSBASE %reg Read the GS base register
+ WRFSBASE %reg Write the FS base register
+ WRGSBASE %reg Write the GS base register
+ =============== ===========================
+
+ The instructions avoid the overhead of the arch_prctl() syscall and allow
+ more flexible usage of the FS/GS addressing modes in user space
+ applications. This does not prevent conflicts between threading libraries
+ and runtimes which utilize FS and applications which want to use it for
+ their own purpose.
+
+FSGSBASE instructions enablement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
+ available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
+
+ The availability of the instructions does not enable them
+ automatically. The kernel has to enable them explicitly in CR4. The
+ reason for this is that older kernels make assumptions about the values in
+ the GS register and enforce them when GS base is set via
+ arch_prctl(). Allowing user space to write arbitrary values to GS base
+ would violate these assumptions and cause malfunction.
+
+ On kernels which do not enable FSGSBASE the execution of the FSGSBASE
+ instructions will fault with a #UD exception.
+
+ The kernel provides reliable information about the enabled state in the
+ ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the
+ kernel has FSGSBASE instructions enabled and applications can use them.
+ The following code example shows how this detection works::
+
+ #include <sys/auxv.h>
+ #include <elf.h>
+
+ /* Will be eventually in asm/hwcap.h */
+ #ifndef HWCAP2_FSGSBASE
+ #define HWCAP2_FSGSBASE (1 << 1)
+ #endif
+
+ ....
+
+ unsigned val = getauxval(AT_HWCAP2);
+
+ if (val & HWCAP2_FSGSBASE)
+ printf("FSGSBASE enabled\n");
+
+FSGSBASE instructions compiler support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
+instructions. Clang 5 supports them as well.
+
+ =================== ===========================
+ _readfsbase_u64() Read the FS base register
+ _readgsbase_u64() Read the GS base register
+ _writefsbase_u64() Write the FS base register
+ _writegsbase_u64() Write the GS base register
+ =================== ===========================
+
+To utilize these intrinsics <immintrin.h> must be included in the source
+code and the compiler option -mfsgsbase has to be added.
+
+Compiler support for FS/GS based addressing
+-------------------------------------------
+
+GCC version 6 and newer provide support for FS/GS based addressing via
+Named Address Spaces. GCC implements the following address space
+identifiers for x86:
+
+ ========= ====================================
+ __seg_fs Variable is addressed relative to FS
+ __seg_gs Variable is addressed relative to GS
+ ========= ====================================
+
+The preprocessor symbols __SEG_FS and __SEG_GS are defined when these
+address spaces are supported. Code which implements fallback modes should
+check whether these symbols are defined. Usage example::
+
+ #ifdef __SEG_GS
+
+ long data0 = 0;
+ long data1 = 1;
+
+ long __seg_gs *ptr;
+
+ /* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */
+ ....
+
+ /* Set GS base to point to data0 */
+ _writegsbase_u64(&data0);
+
+ /* Access offset 0 of GS */
+ ptr = 0;
+ printf("data0 = %ld\n", *ptr);
+
+ /* Set GS base to point to data1 */
+ _writegsbase_u64(&data1);
+ /* ptr still addresses offset 0! */
+ printf("data1 = %ld\n", *ptr);
+
+
+Clang does not provide the GCC address space identifiers, but it provides
+address spaces via an attribute based mechanism in Clang 2.6 and newer
+versions:
+
+ ==================================== =====================================
+ __attribute__((address_space(256)) Variable is addressed relative to GS
+ __attribute__((address_space(257)) Variable is addressed relative to FS
+ ==================================== =====================================
+
+FS/GS based addressing with inline assembly
+-------------------------------------------
+
+In case the compiler does not support address spaces, inline assembly can
+be used for FS/GS based addressing mode::
+
+ mov %fs:offset, %reg
+ mov %gs:offset, %reg
+
+ mov %reg, %fs:offset
+ mov %reg, %gs:offset
diff --git a/Documentation/arch/x86/x86_64/index.rst b/Documentation/arch/x86/x86_64/index.rst
new file mode 100644
index 000000000000..a0261957a08a
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+x86_64 Support
+==============
+
+.. toctree::
+ :maxdepth: 2
+
+ uefi
+ mm
+ 5level-paging
+ fake-numa-for-cpusets
+ cpu-hotplug-spec
+ machinecheck
+ fsgs
+ fred
diff --git a/Documentation/arch/x86/x86_64/machinecheck.rst b/Documentation/arch/x86/x86_64/machinecheck.rst
new file mode 100644
index 000000000000..cea12ee97200
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/machinecheck.rst
@@ -0,0 +1,33 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Configurable sysfs parameters for the x86-64 machine check code
+===============================================================
+
+Machine checks report internal hardware error conditions detected
+by the CPU. Uncorrected errors typically cause a machine check
+(often with panic), corrected ones cause a machine check log entry.
+
+Machine checks are organized in banks (normally associated with
+a hardware subsystem) and subevents in a bank. The exact meaning
+of the banks and subevent is CPU specific.
+
+mcelog knows how to decode them.
+
+When you see the "Machine check errors logged" message in the system
+log then mcelog should run to collect and decode machine check entries
+from /dev/mcelog. Normally mcelog should be run regularly from a cronjob.
+
+Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN
+(N = CPU number).
+
+The directory contains some configurable entries. See
+Documentation/ABI/testing/sysfs-mce for more details.
+
+TBD document entries for AMD threshold interrupt configuration
+
+For more details about the x86 machine check architecture
+see the Intel and AMD architecture manuals from their developer websites.
+
+For more details about the architecture
+see http://one.firstfloor.org/~andi/mce.pdf
diff --git a/Documentation/arch/x86/x86_64/mm.rst b/Documentation/arch/x86/x86_64/mm.rst
new file mode 100644
index 000000000000..a6cf05d51bd8
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/mm.rst
@@ -0,0 +1,180 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Memory Management
+=================
+
+Complete virtual memory map with 4-level page tables
+====================================================
+
+.. note::
+
+ - Negative addresses such as "-23 TB" are absolute addresses in bytes, counted down
+ from the top of the 64-bit address space. It's easier to understand the layout
+ when seen both in absolute addresses and in distance-from-top notation.
+
+ For example 0xffffe90000000000 == -23 TB, it's 23 TB lower than the top of the
+ 64-bit address space (ffffffffffffffff).
+
+ Note that as we get closer to the top of the address space, the notation changes
+ from TB to GB and then MB/KB.
+
+ - "16M TB" might look weird at first sight, but it's an easier way to visualize size
+ notation than "16 EB", which few will recognize at first sight as 16 exabytes.
+ It also shows it nicely how incredibly large 64-bit address space is.
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00007fffffffefff | ~128 TB | user-space virtual memory, different per mm
+ 00007ffffffff000 | ~128 TB | 00007fffffffffff | 4 kB | ... guard hole
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000800000000000 | +128 TB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -8 EB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM relaxes canonicallity check allowing to create aliases
+ | | | | for userspace memory here.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 8000000000000000 | -8 EB | ffff7fffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -128 TB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM_SUP relaxes canonicallity check allowing to create
+ | | | | aliases for kernel memory here.
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
+ ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI
+ ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
+ ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole
+ ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base)
+ ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole
+ ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base)
+ ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole
+ ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 56-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
+ | | | | vaddr_end for KASLR
+ fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
+ ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
+ ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
+ ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
+ ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
+ ffffffff80000000 |-2048 MB | | |
+ ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
+ ffffffffff000000 | -16 MB | | |
+ FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
+ ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
+ ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
+ __________________|____________|__________________|_________|___________________________________________________________
+
+
+Complete virtual memory map with 5-level page tables
+====================================================
+
+.. note::
+
+ - With 56-bit addresses, user-space memory gets expanded by a factor of 512x,
+ from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PB starting
+ offset and many of the regions expand to support the much larger physical
+ memory supported.
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00fffffffffff000 | ~64 PB | user-space virtual memory, different per mm
+ 00fffffffffff000 | ~64 PB | 00ffffffffffffff | 4 kB | ... guard hole
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0100000000000000 | +64 PB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -8EB TB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM relaxes canonicallity check allowing to create aliases
+ | | | | for userspace memory here.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ 8000000000000000 | -8 EB | feffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -64 PB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM_SUP relaxes canonicallity check allowing to create
+ | | | | aliases for kernel memory here.
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
+ ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
+ ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
+ ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
+ ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
+ ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole
+ ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base)
+ ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole
+ ffdf000000000000 | -8.25 PB | fffffbffffffffff | ~8 PB | KASAN shadow memory
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 47-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
+ | | | | vaddr_end for KASLR
+ fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
+ ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
+ ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
+ ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
+ ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
+ ffffffff80000000 |-2048 MB | | |
+ ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
+ ffffffffff000000 | -16 MB | | |
+ FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
+ ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
+ ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
+ __________________|____________|__________________|_________|___________________________________________________________
+
+Architecture defines a 64-bit virtual address. Implementations can support
+less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
+
+The direct mapping covers all memory in the system up to the highest
+memory address (this means in some cases it can also include PCI memory
+holes).
+
+We map EFI runtime services in the 'efi_pgd' PGD in a 64GB large virtual
+memory window (this size is arbitrary, it can be raised later if needed).
+The mappings are not part of any other kernel PGD and are only available
+during EFI runtime calls.
+
+Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
+
+For both 4- and 5-level layouts, the KSTACK_ERASE_POISON value in the last 2MB
+hole: ffffffffffff4111
diff --git a/Documentation/arch/x86/x86_64/uefi.rst b/Documentation/arch/x86/x86_64/uefi.rst
new file mode 100644
index 000000000000..e84592dbd6c1
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/uefi.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+General note on [U]EFI x86_64 support
+=====================================
+
+The nomenclature EFI and UEFI are used interchangeably in this document.
+
+Although the tools below are _not_ needed for building the kernel,
+the needed bootloader support and associated tools for x86_64 platforms
+with EFI firmware and specifications are listed below.
+
+1. UEFI specification: http://www.uefi.org
+
+2. Booting Linux kernel on UEFI x86_64 platform can either be
+ done using the <Documentation/admin-guide/efi-stub.rst> or using a
+ separate bootloader.
+
+3. x86_64 platform with EFI/UEFI firmware.
+
+Mechanics
+---------
+
+Refer to <Documentation/admin-guide/efi-stub.rst> to learn how to use the EFI stub.
+
+Below are general EFI setup guidelines on the x86_64 platform,
+regardless of whether you use the EFI stub or a separate bootloader.
+
+- Build the kernel with the following configuration::
+
+ CONFIG_FB_EFI=y
+ CONFIG_FRAMEBUFFER_CONSOLE=y
+
+ If EFI runtime services are expected, the following configuration should
+ be selected::
+
+ CONFIG_EFI=y
+ CONFIG_EFIVAR_FS=y or m # optional
+
+- Create a VFAT partition on the disk with the EFI System flag
+ You can do this with fdisk with the following commands:
+
+ 1. g - initialize a GPT partition table
+ 2. n - create a new partition
+ 3. t - change the partition type to "EFI System" (number 1)
+ 4. w - write and save the changes
+
+ Afterwards, initialize the VFAT filesystem by running mkfs::
+
+ mkfs.fat /dev/<your-partition>
+
+- Copy the boot files to the VFAT partition:
+ If you use the EFI stub method, the kernel acts also as an EFI executable.
+
+ You can just copy the bzImage to the EFI/boot/bootx64.efi path on the partition
+ so that it will automatically get booted, see the <Documentation/admin-guide/efi-stub.rst> page
+ for additional instructions regarding passage of kernel parameters and initramfs.
+
+ If you use a custom bootloader, refer to the relevant documentation for help on this part.
+
+- If some or all EFI runtime services don't work, you can try following
+ kernel command line parameters to turn off some or all EFI runtime
+ services.
+
+ noefi
+ turn off all EFI runtime services
+ reboot_type=k
+ turn off EFI reboot runtime service
+
+- If the EFI memory map has additional entries not in the E820 map,
+ you can include those entries in the kernels memory map of available
+ physical RAM by using the following kernel command line parameter.
+
+ add_efi_memmap
+ include EFI memory map of available physical RAM
diff --git a/Documentation/arch/x86/xstate.rst b/Documentation/arch/x86/xstate.rst
new file mode 100644
index 000000000000..cec05ac464c1
--- /dev/null
+++ b/Documentation/arch/x86/xstate.rst
@@ -0,0 +1,174 @@
+Using XSTATE features in user space applications
+================================================
+
+The x86 architecture supports floating-point extensions which are
+enumerated via CPUID. Applications consult CPUID and use XGETBV to
+evaluate which features have been enabled by the kernel XCR0.
+
+Up to AVX-512 and PKRU states, these features are automatically enabled by
+the kernel if available. Features like AMX TILE_DATA (XSTATE component 18)
+are enabled by XCR0 as well, but the first use of related instruction is
+trapped by the kernel because by default the required large XSTATE buffers
+are not allocated automatically.
+
+The purpose for dynamic features
+--------------------------------
+
+Legacy userspace libraries often have hard-coded, static sizes for
+alternate signal stacks, often using MINSIGSTKSZ which is typically 2KB.
+That stack must be able to store at *least* the signal frame that the
+kernel sets up before jumping into the signal handler. That signal frame
+must include an XSAVE buffer defined by the CPU.
+
+However, that means that the size of signal stacks is dynamic, not static,
+because different CPUs have differently-sized XSAVE buffers. A compiled-in
+size of 2KB with existing applications is too small for new CPU features
+like AMX. Instead of universally requiring larger stack, with the dynamic
+enabling, the kernel can enforce userspace applications to have
+properly-sized altstacks.
+
+Using dynamically enabled XSTATE features in user space applications
+--------------------------------------------------------------------
+
+The kernel provides an arch_prctl(2) based mechanism for applications to
+request the usage of such features. The arch_prctl(2) options related to
+this are:
+
+-ARCH_GET_XCOMP_SUPP
+
+ arch_prctl(ARCH_GET_XCOMP_SUPP, &features);
+
+ ARCH_GET_XCOMP_SUPP stores the supported features in userspace storage of
+ type uint64_t. The second argument is a pointer to that storage.
+
+-ARCH_GET_XCOMP_PERM
+
+ arch_prctl(ARCH_GET_XCOMP_PERM, &features);
+
+ ARCH_GET_XCOMP_PERM stores the features for which the userspace process
+ has permission in userspace storage of type uint64_t. The second argument
+ is a pointer to that storage.
+
+-ARCH_REQ_XCOMP_PERM
+
+ arch_prctl(ARCH_REQ_XCOMP_PERM, feature_nr);
+
+ ARCH_REQ_XCOMP_PERM allows to request permission for a dynamically enabled
+ feature or a feature set. A feature set can be mapped to a facility, e.g.
+ AMX, and can require one or more XSTATE components to be enabled.
+
+ The feature argument is the number of the highest XSTATE component which
+ is required for a facility to work.
+
+When requesting permission for a feature, the kernel checks the
+availability. The kernel ensures that sigaltstacks in the process's tasks
+are large enough to accommodate the resulting large signal frame. It
+enforces this both during ARCH_REQ_XCOMP_SUPP and during any subsequent
+sigaltstack(2) calls. If an installed sigaltstack is smaller than the
+resulting sigframe size, ARCH_REQ_XCOMP_SUPP results in -ENOSUPP. Also,
+sigaltstack(2) results in -ENOMEM if the requested altstack is too small
+for the permitted features.
+
+Permission, when granted, is valid per process. Permissions are inherited
+on fork(2) and cleared on exec(3).
+
+The first use of an instruction related to a dynamically enabled feature is
+trapped by the kernel. The trap handler checks whether the process has
+permission to use the feature. If the process has no permission then the
+kernel sends SIGILL to the application. If the process has permission then
+the handler allocates a larger xstate buffer for the task so the large
+state can be context switched. In the unlikely cases that the allocation
+fails, the kernel sends SIGSEGV.
+
+AMX TILE_DATA enabling example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Below is the example of how userspace applications enable
+TILE_DATA dynamically:
+
+ 1. The application first needs to query the kernel for AMX
+ support::
+
+ #include <asm/prctl.h>
+ #include <sys/syscall.h>
+ #include <stdio.h>
+ #include <unistd.h>
+
+ #ifndef ARCH_GET_XCOMP_SUPP
+ #define ARCH_GET_XCOMP_SUPP 0x1021
+ #endif
+
+ #ifndef ARCH_XCOMP_TILECFG
+ #define ARCH_XCOMP_TILECFG 17
+ #endif
+
+ #ifndef ARCH_XCOMP_TILEDATA
+ #define ARCH_XCOMP_TILEDATA 18
+ #endif
+
+ #define MASK_XCOMP_TILE ((1 << ARCH_XCOMP_TILECFG) | \
+ (1 << ARCH_XCOMP_TILEDATA))
+
+ unsigned long features;
+ long rc;
+
+ ...
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+
+ if (!rc && (features & MASK_XCOMP_TILE) == MASK_XCOMP_TILE)
+ printf("AMX is available.\n");
+
+ 2. After that, determining support for AMX, an application must
+ explicitly ask permission to use it::
+
+ #ifndef ARCH_REQ_XCOMP_PERM
+ #define ARCH_REQ_XCOMP_PERM 0x1023
+ #endif
+
+ ...
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, ARCH_XCOMP_TILEDATA);
+
+ if (!rc)
+ printf("AMX is ready for use.\n");
+
+Note this example does not include the sigaltstack preparation.
+
+Dynamic features in signal frames
+---------------------------------
+
+Dynamically enabled features are not written to the signal frame upon signal
+entry if the feature is in its initial configuration. This differs from
+non-dynamic features which are always written regardless of their
+configuration. Signal handlers can examine the XSAVE buffer's XSTATE_BV
+field to determine if a features was written.
+
+Dynamic features for virtual machines
+-------------------------------------
+
+The permission for the guest state component needs to be managed separately
+from the host, as they are exclusive to each other. A coupled of options
+are extended to control the guest permission:
+
+-ARCH_GET_XCOMP_GUEST_PERM
+
+ arch_prctl(ARCH_GET_XCOMP_GUEST_PERM, &features);
+
+ ARCH_GET_XCOMP_GUEST_PERM is a variant of ARCH_GET_XCOMP_PERM. So it
+ provides the same semantics and functionality but for the guest
+ components.
+
+-ARCH_REQ_XCOMP_GUEST_PERM
+
+ arch_prctl(ARCH_REQ_XCOMP_GUEST_PERM, feature_nr);
+
+ ARCH_REQ_XCOMP_GUEST_PERM is a variant of ARCH_REQ_XCOMP_PERM. It has the
+ same semantics for the guest permission. While providing a similar
+ functionality, this comes with a constraint. Permission is frozen when the
+ first VCPU is created. Any attempt to change permission after that point
+ is going to be rejected. So, the permission has to be requested before the
+ first VCPU creation.
+
+Note that some VMMs may have already established a set of supported state
+components. These options are not presumed to support any particular VMM.
diff --git a/Documentation/arch/x86/zero-page.rst b/Documentation/arch/x86/zero-page.rst
new file mode 100644
index 000000000000..45aa9cceb4f1
--- /dev/null
+++ b/Documentation/arch/x86/zero-page.rst
@@ -0,0 +1,47 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========
+Zero Page
+=========
+The additional fields in struct boot_params as a part of 32-bit boot
+protocol of kernel. These should be filled by bootloader or 16-bit
+real-mode setup code of the kernel. References/settings to it mainly
+are in::
+
+ arch/x86/include/uapi/asm/bootparam.h
+
+=========== ===== ======================= =================================================
+Offset/Size Proto Name Meaning
+
+000/040 ALL screen_info Text mode or frame buffer information
+ (struct screen_info)
+040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info)
+058/008 ALL tboot_addr Physical address of tboot shared page
+060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
+ (struct ist_info)
+070/008 ALL acpi_rsdp_addr Physical address of ACPI RSDP table
+080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
+090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
+0A0/010 ALL sys_desc_table System description table (struct sys_desc_table),
+ OBSOLETE!!
+0B0/010 ALL olpc_ofw_header OLPC's OpenFirmware CIF and friends
+0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
+0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
+0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
+13C/004 ALL cc_blob_address Physical address of Confidential Computing blob
+140/080 ALL edid_info Video mode setup (struct edid_info)
+1C0/020 ALL efi_info EFI 32 information (struct efi_info)
+1E0/004 ALL alt_mem_k Alternative mem check, in KB
+1E4/004 ALL scratch Scratch field for the kernel setup code
+1E8/001 ALL e820_entries Number of entries in e820_table (below)
+1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
+1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
+ (below)
+1EB/001 ALL kbd_status Numlock is enabled
+1EC/001 ALL secure_boot Secure boot is enabled in the firmware
+1EF/001 ALL sentinel Used to detect broken bootloaders
+290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
+2D0/A00 ALL e820_table E820 memory map table
+ (array of struct e820_entry)
+D00/1EC ALL eddbuf EDD data (array of struct edd_info)
+=========== ===== ======================= =================================================
diff --git a/Documentation/arch/xtensa/atomctl.rst b/Documentation/arch/xtensa/atomctl.rst
new file mode 100644
index 000000000000..75d174169430
--- /dev/null
+++ b/Documentation/arch/xtensa/atomctl.rst
@@ -0,0 +1,51 @@
+===========================================
+Atomic Operation Control (ATOMCTL) Register
+===========================================
+
+We Have Atomic Operation Control (ATOMCTL) Register.
+This register determines the effect of using a S32C1I instruction
+with various combinations of:
+
+ 1. With and without an Coherent Cache Controller which
+ can do Atomic Transactions to the memory internally.
+
+ 2. With and without An Intelligent Memory Controller which
+ can do Atomic Transactions itself.
+
+The Core comes up with a default value of for the three types of cache ops::
+
+ 0x28: (WB: Internal, WT: Internal, BY:Exception)
+
+On the FPGA Cards we typically simulate an Intelligent Memory controller
+which can implement RCW transactions. For FPGA cards with an External
+Memory controller we let it to the atomic operations internally while
+doing a Cached (WB) transaction and use the Memory RCW for un-cached
+operations.
+
+For systems without an coherent cache controller, non-MX, we always
+use the memory controllers RCW, though non-MX controllers likely
+support the Internal Operation.
+
+CUSTOMER-WARNING:
+ Virtually all customers buy their memory controllers from vendors that
+ don't support atomic RCW memory transactions and will likely want to
+ configure this register to not use RCW.
+
+Developers might find using RCW in Bypass mode convenient when testing
+with the cache being bypassed; for example studying cache alias problems.
+
+See Section 4.3.12.4 of ISA; Bits::
+
+ WB WT BY
+ 5 4 | 3 2 | 1 0
+
+========= ================== ================== ===============
+ 2 Bit
+ Field
+ Values WB - Write Back WT - Write Thru BY - Bypass
+========= ================== ================== ===============
+ 0 Exception Exception Exception
+ 1 RCW Transaction RCW Transaction RCW Transaction
+ 2 Internal Operation Internal Operation Reserved
+ 3 Reserved Reserved Reserved
+========= ================== ================== ===============
diff --git a/Documentation/arch/xtensa/booting.rst b/Documentation/arch/xtensa/booting.rst
new file mode 100644
index 000000000000..e1b83707e5b6
--- /dev/null
+++ b/Documentation/arch/xtensa/booting.rst
@@ -0,0 +1,22 @@
+=====================================
+Passing boot parameters to the kernel
+=====================================
+
+Boot parameters are represented as a TLV list in the memory. Please see
+arch/xtensa/include/asm/bootparam.h for definition of the bp_tag structure and
+tag value constants. First entry in the list must have type BP_TAG_FIRST, last
+entry must have type BP_TAG_LAST. The address of the first list entry is
+passed to the kernel in the register a2. The address type depends on MMU type:
+
+- For configurations without MMU, with region protection or with MPU the
+ address must be the physical address.
+- For configurations with region translarion MMU or with MMUv3 and CONFIG_MMU=n
+ the address must be a valid address in the current mapping. The kernel will
+ not change the mapping on its own.
+- For configurations with MMUv2 the address must be a virtual address in the
+ default virtual mapping (0xd0000000..0xffffffff).
+- For configurations with MMUv3 and CONFIG_MMU=y the address may be either a
+ virtual or physical address. In either case it must be within the default
+ virtual mapping. It is considered physical if it is within the range of
+ physical addresses covered by the default KSEG mapping (XCHAL_KSEG_PADDR..
+ XCHAL_KSEG_PADDR + XCHAL_KSEG_SIZE), otherwise it is considered virtual.
diff --git a/Documentation/arch/xtensa/features.rst b/Documentation/arch/xtensa/features.rst
new file mode 100644
index 000000000000..28dcce1759be
--- /dev/null
+++ b/Documentation/arch/xtensa/features.rst
@@ -0,0 +1,3 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. kernel-feat:: features xtensa
diff --git a/Documentation/arch/xtensa/index.rst b/Documentation/arch/xtensa/index.rst
new file mode 100644
index 000000000000..69952446a9be
--- /dev/null
+++ b/Documentation/arch/xtensa/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+Xtensa Architecture
+===================
+
+.. toctree::
+ :maxdepth: 1
+
+ atomctl
+ booting
+ mmu
+
+ features
diff --git a/Documentation/arch/xtensa/mmu.rst b/Documentation/arch/xtensa/mmu.rst
new file mode 100644
index 000000000000..450573afa31a
--- /dev/null
+++ b/Documentation/arch/xtensa/mmu.rst
@@ -0,0 +1,198 @@
+=============================
+MMUv3 initialization sequence
+=============================
+
+The code in the initialize_mmu macro sets up MMUv3 memory mapping
+identically to MMUv2 fixed memory mapping. Depending on
+CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
+located in addresses it was linked for (symbol undefined), or not
+(symbol defined), so it needs to be position-independent.
+
+The code has the following assumptions:
+
+ - This code fragment is run only on an MMU v3.
+ - TLBs are in their reset state.
+ - ITLBCFG and DTLBCFG are zero (reset state).
+ - RASID is 0x04030201 (reset state).
+ - PS.RING is zero (reset state).
+ - LITBASE is zero (reset state, PC-relative literals); required to be PIC.
+
+TLB setup proceeds along the following steps.
+
+ Legend:
+
+ - VA = virtual address (two upper nibbles of it);
+ - PA = physical address (two upper nibbles of it);
+ - pc = physical range that contains this code;
+
+After step 2, we jump to virtual address in the range 0x40000000..0x5fffffff
+or 0x00000000..0x1fffffff, depending on whether the kernel was loaded below
+0x40000000 or above. That address corresponds to next instruction to execute
+in this code. After step 4, we jump to intended (linked) address of this code.
+The scheme below assumes that the kernel is loaded below 0x40000000.
+
+ ====== ===== ===== ===== ===== ====== ===== =====
+ - Step0 Step1 Step2 Step3 Step4 Step5
+
+ VA PA PA PA PA VA PA PA
+ ====== ===== ===== ===== ===== ====== ===== =====
+ E0..FF -> E0 -> E0 -> E0 F0..FF -> F0 -> F0
+ C0..DF -> C0 -> C0 -> C0 E0..EF -> F0 -> F0
+ A0..BF -> A0 -> A0 -> A0 D8..DF -> 00 -> 00
+ 80..9F -> 80 -> 80 -> 80 D0..D7 -> 00 -> 00
+ 60..7F -> 60 -> 60 -> 60
+ 40..5F -> 40 -> pc -> pc 40..5F -> pc
+ 20..3F -> 20 -> 20 -> 20
+ 00..1F -> 00 -> 00 -> 00
+ ====== ===== ===== ===== ===== ====== ===== =====
+
+The default location of IO peripherals is above 0xf0000000. This may be changed
+using a "ranges" property in a device tree simple-bus node. See the Devicetree
+Specification, section 4.5 for details on the syntax and semantics of
+simple-bus nodes. The following limitations apply:
+
+1. Only top level simple-bus nodes are considered
+
+2. Only one (first) simple-bus node is considered
+
+3. Empty "ranges" properties are not supported
+
+4. Only the first triplet in the "ranges" property is considered
+
+5. The parent-bus-address value is rounded down to the nearest 256MB boundary
+
+6. The IO area covers the entire 256MB segment of parent-bus-address; the
+ "ranges" triplet length field is ignored
+
+
+MMUv3 address space layouts.
+============================
+
+Default MMUv2-compatible layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0xcffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xd0000000 128MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xd8000000 128MB
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
+
+
+256MB cached + 256MB uncached layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0xaffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xb0000000 256MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 256MB
+ +------------------+
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
+
+
+512MB cached + 512MB uncached layout::
+
+ Symbol VADDR Size
+ +------------------+
+ | Userspace | 0x00000000 TASK_SIZE
+ +------------------+ 0x40000000
+ +------------------+
+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE
+ +------------------+
+ | KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE
+ +------------------+ 0x8e400000
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+ | remap area 2 |
+ +------------------+
+ +------------------+
+ | KMAP area | PKMAP_BASE PTRS_PER_PTE *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ | | (4MB * DCACHE_N_COLORS)
+ +------------------+
+ | Atomic KMAP area | FIXADDR_START KM_TYPE_NR *
+ | | NR_CPUS *
+ | | DCACHE_N_COLORS *
+ | | PAGE_SIZE
+ +------------------+ FIXADDR_TOP 0x9ffff000
+ +------------------+
+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xa0000000 512MB
+ +------------------+
+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 512MB
+ +------------------+
+ | Cached KIO | XCHAL_KIO_CACHED_VADDR 0xe0000000 256MB
+ +------------------+
+ | Uncached KIO | XCHAL_KIO_BYPASS_VADDR 0xf0000000 256MB
+ +------------------+
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
deleted file mode 100644
index 259f00af3ab3..000000000000
--- a/Documentation/arm/Booting
+++ /dev/null
@@ -1,218 +0,0 @@
- Booting ARM Linux
- =================
-
-Author: Russell King
-Date : 18 May 2002
-
-The following documentation is relevant to 2.4.18-rmk6 and beyond.
-
-In order to boot ARM Linux, you require a boot loader, which is a small
-program that runs before the main kernel. The boot loader is expected
-to initialise various devices, and eventually call the Linux kernel,
-passing information to the kernel.
-
-Essentially, the boot loader should provide (as a minimum) the
-following:
-
-1. Setup and initialise the RAM.
-2. Initialise one serial port.
-3. Detect the machine type.
-4. Setup the kernel tagged list.
-5. Load initramfs.
-6. Call the kernel image.
-
-
-1. Setup and initialise RAM
----------------------------
-
-Existing boot loaders: MANDATORY
-New boot loaders: MANDATORY
-
-The boot loader is expected to find and initialise all RAM that the
-kernel will use for volatile data storage in the system. It performs
-this in a machine dependent manner. (It may use internal algorithms
-to automatically locate and size all RAM, or it may use knowledge of
-the RAM in the machine, or any other method the boot loader designer
-sees fit.)
-
-
-2. Initialise one serial port
------------------------------
-
-Existing boot loaders: OPTIONAL, RECOMMENDED
-New boot loaders: OPTIONAL, RECOMMENDED
-
-The boot loader should initialise and enable one serial port on the
-target. This allows the kernel serial driver to automatically detect
-which serial port it should use for the kernel console (generally
-used for debugging purposes, or communication with the target.)
-
-As an alternative, the boot loader can pass the relevant 'console='
-option to the kernel via the tagged lists specifying the port, and
-serial format options as described in
-
- Documentation/admin-guide/kernel-parameters.rst.
-
-
-3. Detect the machine type
---------------------------
-
-Existing boot loaders: OPTIONAL
-New boot loaders: MANDATORY except for DT-only platforms
-
-The boot loader should detect the machine type its running on by some
-method. Whether this is a hard coded value or some algorithm that
-looks at the connected hardware is beyond the scope of this document.
-The boot loader must ultimately be able to provide a MACH_TYPE_xxx
-value to the kernel. (see linux/arch/arm/tools/mach-types). This
-should be passed to the kernel in register r1.
-
-For DT-only platforms, the machine type will be determined by device
-tree. set the machine type to all ones (~0). This is not strictly
-necessary, but assures that it will not match any existing types.
-
-4. Setup boot data
-------------------
-
-Existing boot loaders: OPTIONAL, HIGHLY RECOMMENDED
-New boot loaders: MANDATORY
-
-The boot loader must provide either a tagged list or a dtb image for
-passing configuration data to the kernel. The physical address of the
-boot data is passed to the kernel in register r2.
-
-4a. Setup the kernel tagged list
---------------------------------
-
-The boot loader must create and initialise the kernel tagged list.
-A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE.
-The ATAG_CORE tag may or may not be empty. An empty ATAG_CORE tag
-has the size field set to '2' (0x00000002). The ATAG_NONE must set
-the size field to zero.
-
-Any number of tags can be placed in the list. It is undefined
-whether a repeated tag appends to the information carried by the
-previous tag, or whether it replaces the information in its
-entirety; some tags behave as the former, others the latter.
-
-The boot loader must pass at a minimum the size and location of
-the system memory, and root filesystem location. Therefore, the
-minimum tagged list should look:
-
- +-----------+
-base -> | ATAG_CORE | |
- +-----------+ |
- | ATAG_MEM | | increasing address
- +-----------+ |
- | ATAG_NONE | |
- +-----------+ v
-
-The tagged list should be stored in system RAM.
-
-The tagged list must be placed in a region of memory where neither
-the kernel decompressor nor initrd 'bootp' program will overwrite
-it. The recommended placement is in the first 16KiB of RAM.
-
-4b. Setup the device tree
--------------------------
-
-The boot loader must load a device tree image (dtb) into system ram
-at a 64bit aligned address and initialize it with the boot data. The
-dtb format is documented in Documentation/devicetree/booting-without-of.txt.
-The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
-physical address to determine if a dtb has been passed instead of a
-tagged list.
-
-The boot loader must pass at a minimum the size and location of the
-system memory, and the root filesystem location. The dtb must be
-placed in a region of memory where the kernel decompressor will not
-overwrite it, whilst remaining within the region which will be covered
-by the kernel's low-memory mapping.
-
-A safe location is just above the 128MiB boundary from start of RAM.
-
-5. Load initramfs.
-------------------
-
-Existing boot loaders: OPTIONAL
-New boot loaders: OPTIONAL
-
-If an initramfs is in use then, as with the dtb, it must be placed in
-a region of memory where the kernel decompressor will not overwrite it
-while also with the region which will be covered by the kernel's
-low-memory mapping.
-
-A safe location is just above the device tree blob which itself will
-be loaded just above the 128MiB boundary from the start of RAM as
-recommended above.
-
-6. Calling the kernel image
----------------------------
-
-Existing boot loaders: MANDATORY
-New boot loaders: MANDATORY
-
-There are two options for calling the kernel zImage. If the zImage
-is stored in flash, and is linked correctly to be run from flash,
-then it is legal for the boot loader to call the zImage in flash
-directly.
-
-The zImage may also be placed in system RAM and called there. The
-kernel should be placed in the first 128MiB of RAM. It is recommended
-that it is loaded above 32MiB in order to avoid the need to relocate
-prior to decompression, which will make the boot process slightly
-faster.
-
-When booting a raw (non-zImage) kernel the constraints are tighter.
-In this case the kernel must be loaded at an offset into system equal
-to TEXT_OFFSET - PAGE_OFFSET.
-
-In any case, the following conditions must be met:
-
-- Quiesce all DMA capable devices so that memory does not get
- corrupted by bogus network packets or disk data. This will save
- you many hours of debug.
-
-- CPU register settings
- r0 = 0,
- r1 = machine type number discovered in (3) above.
- r2 = physical address of tagged list in system RAM, or
- physical address of device tree block (dtb) in system RAM
-
-- CPU mode
- All forms of interrupts must be disabled (IRQs and FIQs)
-
- For CPUs which do not include the ARM virtualization extensions, the
- CPU must be in SVC mode. (A special exception exists for Angel)
-
- CPUs which include support for the virtualization extensions can be
- entered in HYP mode in order to enable the kernel to make full use of
- these extensions. This is the recommended boot method for such CPUs,
- unless the virtualisations are already in use by a pre-installed
- hypervisor.
-
- If the kernel is not entered in HYP mode for any reason, it must be
- entered in SVC mode.
-
-- Caches, MMUs
- The MMU must be off.
- Instruction cache may be on or off.
- Data cache must be off.
-
- If the kernel is entered in HYP mode, the above requirements apply to
- the HYP mode configuration in addition to the ordinary PL1 (privileged
- kernel modes) configuration. In addition, all traps into the
- hypervisor must be disabled, and PL1 access must be granted for all
- peripherals and CPU resources for which this is architecturally
- possible. Except for entering in HYP mode, the system configuration
- should be such that a kernel which does not include support for the
- virtualization extensions can boot correctly without extra help.
-
-- The boot loader is expected to call the kernel image by jumping
- directly to the first instruction of the kernel image.
-
- On CPUs supporting the ARM instruction set, the entry must be
- made in ARM state, even for a Thumb-2 kernel.
-
- On CPUs supporting only the Thumb instruction set such as
- Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/IXP4xx b/Documentation/arm/IXP4xx
deleted file mode 100644
index e48b74de6ac0..000000000000
--- a/Documentation/arm/IXP4xx
+++ /dev/null
@@ -1,172 +0,0 @@
-
--------------------------------------------------------------------------
-Release Notes for Linux on Intel's IXP4xx Network Processor
-
-Maintained by Deepak Saxena <dsaxena@plexity.net>
--------------------------------------------------------------------------
-
-1. Overview
-
-Intel's IXP4xx network processor is a highly integrated SOC that
-is targeted for network applications, though it has become popular
-in industrial control and other areas due to low cost and power
-consumption. The IXP4xx family currently consists of several processors
-that support different network offload functions such as encryption,
-routing, firewalling, etc. The IXP46x family is an updated version which
-supports faster speeds, new memory and flash configurations, and more
-integration such as an on-chip I2C controller.
-
-For more information on the various versions of the CPU, see:
-
- http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
-
-Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
-stripped of much of the network intelligence.
-
-2. Linux Support
-
-Linux currently supports the following features on the IXP4xx chips:
-
-- Dual serial ports
-- PCI interface
-- Flash access (MTD/JFFS)
-- I2C through GPIO on IXP42x
-- GPIO for input/output/interrupts
- See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
-- Timers (watchdog, OS)
-
-The following components of the chips are not supported by Linux and
-require the use of Intel's proprietary CSR software:
-
-- USB device interface
-- Network interfaces (HSS, Utopia, NPEs, etc)
-- Network offload functionality
-
-If you need to use any of the above, you need to download Intel's
-software from:
-
- http://developer.intel.com/design/network/products/npfamily/ixp425.htm
-
-DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
-SOFTWARE.
-
-There are several websites that provide directions/pointers on using
-Intel's software:
-
- http://sourceforge.net/projects/ixp4xx-osdg/
- Open Source Developer's Guide for using uClinux and the Intel libraries
-
-http://gatewaymaker.sourceforge.net/
- Simple one page summary of building a gateway using an IXP425 and Linux
-
-http://ixp425.sourceforge.net/
- ATM device driver for IXP425 that relies on Intel's libraries
-
-3. Known Issues/Limitations
-
-3a. Limited inbound PCI window
-
-The IXP4xx family allows for up to 256MB of memory but the PCI interface
-can only expose 64MB of that memory to the PCI bus. This means that if
-you are running with > 64MB, all PCI buffers outside of the accessible
-range will be bounced using the routines in arch/arm/common/dmabounce.c.
-
-3b. Limited outbound PCI window
-
-IXP4xx provides two methods of accessing PCI memory space:
-
-1) A direct mapped window from 0x48000000 to 0x4bffffff (64MB).
- To access PCI via this space, we simply ioremap() the BAR
- into the kernel and we can use the standard read[bwl]/write[bwl]
- macros. This is the preffered method due to speed but it
- limits the system to just 64MB of PCI memory. This can be
- problamatic if using video cards and other memory-heavy devices.
-
-2) If > 64MB of memory space is required, the IXP4xx can be
- configured to use indirect registers to access PCI This allows
- for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
- The disadvantage of this is that every PCI access requires
- three local register accesses plus a spinlock, but in some
- cases the performance hit is acceptable. In addition, you cannot
- mmap() PCI devices in this case due to the indirect nature
- of the PCI window.
-
-By default, the direct method is used for performance reasons. If
-you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
-
-3c. GPIO as Interrupts
-
-Currently the code only handles level-sensitive GPIO interrupts
-
-4. Supported platforms
-
-ADI Engineering Coyote Gateway Reference Platform
-http://www.adiengineering.com/productsCoyote.html
-
- The ADI Coyote platform is reference design for those building
- small residential/office gateways. One NPE is connected to a 10/100
- interface, one to 4-port 10/100 switch, and the third to and ADSL
- interface. In addition, it also supports to POTs interfaces connected
- via SLICs. Note that those are not supported by Linux ATM. Finally,
- the platform has two mini-PCI slots used for 802.11[bga] cards.
- Finally, there is an IDE port hanging off the expansion bus.
-
-Gateworks Avila Network Platform
-http://www.gateworks.com/support/overview.php
-
- The Avila platform is basically and IXDP425 with the 4 PCI slots
- replaced with mini-PCI slots and a CF IDE interface hanging off
- the expansion bus.
-
-Intel IXDP425 Development Platform
-http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
-
- This is Intel's standard reference platform for the IXDP425 and is
- also known as the Richfield board. It contains 4 PCI slots, 16MB
- of flash, two 10/100 ports and one ADSL port.
-
-Intel IXDP465 Development Platform
-http://www.intel.com/design/network/products/npfamily/ixdp465.htm
-
- This is basically an IXDP425 with an IXP465 and 32M of flash instead
- of just 16.
-
-Intel IXDPG425 Development Platform
-
- This is basically and ADI Coyote board with a NEC EHCI controller
- added. One issue with this board is that the mini-PCI slots only
- have the 3.3v line connected, so you can't use a PCI to mini-PCI
- adapter with an E100 card. So to NFS root you need to use either
- the CSR or a WiFi card and a ramdisk that BOOTPs and then does
- a pivot_root to NFS.
-
-Motorola PrPMC1100 Processor Mezanine Card
-http://www.fountainsys.com
-
- The PrPMC1100 is based on the IXCP1100 and is meant to plug into
- and IXP2400/2800 system to act as the system controller. It simply
- contains a CPU and 16MB of flash on the board and needs to be
- plugged into a carrier board to function. Currently Linux only
- supports the Motorola PrPMC carrier board for this platform.
-
-5. TODO LIST
-
-- Add support for Coyote IDE
-- Add support for edge-based GPIO interrupts
-- Add support for CF IDE on expansion bus
-
-6. Thanks
-
-The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
-
-The following people have contributed patches/comments/etc:
-
-Lennerty Buytenhek
-Lutz Jaenicke
-Justin Mayfield
-Robert E. Ranslam
-[I know I've forgotten others, please email me to be added]
-
--------------------------------------------------------------------------
-
-Last Update: 01/04/2005
diff --git a/Documentation/arm/Interrupts b/Documentation/arm/Interrupts
deleted file mode 100644
index f09ab1b90ef1..000000000000
--- a/Documentation/arm/Interrupts
+++ /dev/null
@@ -1,167 +0,0 @@
-2.5.2-rmk5
-----------
-
-This is the first kernel that contains a major shake up of some of the
-major architecture-specific subsystems.
-
-Firstly, it contains some pretty major changes to the way we handle the
-MMU TLB. Each MMU TLB variant is now handled completely separately -
-we have TLB v3, TLB v4 (without write buffer), TLB v4 (with write buffer),
-and finally TLB v4 (with write buffer, with I TLB invalidate entry).
-There is more assembly code inside each of these functions, mainly to
-allow more flexible TLB handling for the future.
-
-Secondly, the IRQ subsystem.
-
-The 2.5 kernels will be having major changes to the way IRQs are handled.
-Unfortunately, this means that machine types that touch the irq_desc[]
-array (basically all machine types) will break, and this means every
-machine type that we currently have.
-
-Lets take an example. On the Assabet with Neponset, we have:
-
- GPIO25 IRR:2
- SA1100 ------------> Neponset -----------> SA1111
- IIR:1
- -----------> USAR
- IIR:0
- -----------> SMC9196
-
-The way stuff currently works, all SA1111 interrupts are mutually
-exclusive of each other - if you're processing one interrupt from the
-SA1111 and another comes in, you have to wait for that interrupt to
-finish processing before you can service the new interrupt. Eg, an
-IDE PIO-based interrupt on the SA1111 excludes all other SA1111 and
-SMC9196 interrupts until it has finished transferring its multi-sector
-data, which can be a long time. Note also that since we loop in the
-SA1111 IRQ handler, SA1111 IRQs can hold off SMC9196 IRQs indefinitely.
-
-
-The new approach brings several new ideas...
-
-We introduce the concept of a "parent" and a "child". For example,
-to the Neponset handler, the "parent" is GPIO25, and the "children"d
-are SA1111, SMC9196 and USAR.
-
-We also bring the idea of an IRQ "chip" (mainly to reduce the size of
-the irqdesc array). This doesn't have to be a real "IC"; indeed the
-SA11x0 IRQs are handled by two separate "chip" structures, one for
-GPIO0-10, and another for all the rest. It is just a container for
-the various operations (maybe this'll change to a better name).
-This structure has the following operations:
-
-struct irqchip {
- /*
- * Acknowledge the IRQ.
- * If this is a level-based IRQ, then it is expected to mask the IRQ
- * as well.
- */
- void (*ack)(unsigned int irq);
- /*
- * Mask the IRQ in hardware.
- */
- void (*mask)(unsigned int irq);
- /*
- * Unmask the IRQ in hardware.
- */
- void (*unmask)(unsigned int irq);
- /*
- * Re-run the IRQ
- */
- void (*rerun)(unsigned int irq);
- /*
- * Set the type of the IRQ.
- */
- int (*type)(unsigned int irq, unsigned int, type);
-};
-
-ack - required. May be the same function as mask for IRQs
- handled by do_level_IRQ.
-mask - required.
-unmask - required.
-rerun - optional. Not required if you're using do_level_IRQ for all
- IRQs that use this 'irqchip'. Generally expected to re-trigger
- the hardware IRQ if possible. If not, may call the handler
- directly.
-type - optional. If you don't support changing the type of an IRQ,
- it should be null so people can detect if they are unable to
- set the IRQ type.
-
-For each IRQ, we keep the following information:
-
- - "disable" depth (number of disable_irq()s without enable_irq()s)
- - flags indicating what we can do with this IRQ (valid, probe,
- noautounmask) as before
- - status of the IRQ (probing, enable, etc)
- - chip
- - per-IRQ handler
- - irqaction structure list
-
-The handler can be one of the 3 standard handlers - "level", "edge" and
-"simple", or your own specific handler if you need to do something special.
-
-The "level" handler is what we currently have - its pretty simple.
-"edge" knows about the brokenness of such IRQ implementations - that you
-need to leave the hardware IRQ enabled while processing it, and queueing
-further IRQ events should the IRQ happen again while processing. The
-"simple" handler is very basic, and does not perform any hardware
-manipulation, nor state tracking. This is useful for things like the
-SMC9196 and USAR above.
-
-So, what's changed?
-
-1. Machine implementations must not write to the irqdesc array.
-
-2. New functions to manipulate the irqdesc array. The first 4 are expected
- to be useful only to machine specific code. The last is recommended to
- only be used by machine specific code, but may be used in drivers if
- absolutely necessary.
-
- set_irq_chip(irq,chip)
-
- Set the mask/unmask methods for handling this IRQ
-
- set_irq_handler(irq,handler)
-
- Set the handler for this IRQ (level, edge, simple)
-
- set_irq_chained_handler(irq,handler)
-
- Set a "chained" handler for this IRQ - automatically
- enables this IRQ (eg, Neponset and SA1111 handlers).
-
- set_irq_flags(irq,flags)
-
- Set the valid/probe/noautoenable flags.
-
- set_irq_type(irq,type)
-
- Set active the IRQ edge(s)/level. This replaces the
- SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
- function. Type should be one of IRQ_TYPE_xxx defined in
- <linux/irq.h>
-
-3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type.
-
-4. Direct access to SA1111 INTPOL is deprecated. Use set_irq_type instead.
-
-5. A handler is expected to perform any necessary acknowledgement of the
- parent IRQ via the correct chip specific function. For instance, if
- the SA1111 is directly connected to a SA1110 GPIO, then you should
- acknowledge the SA1110 IRQ each time you re-read the SA1111 IRQ status.
-
-6. For any child which doesn't have its own IRQ enable/disable controls
- (eg, SMC9196), the handler must mask or acknowledge the parent IRQ
- while the child handler is called, and the child handler should be the
- "simple" handler (not "edge" nor "level"). After the handler completes,
- the parent IRQ should be unmasked, and the status of all children must
- be re-checked for pending events. (see the Neponset IRQ handler for
- details).
-
-7. fixup_irq() is gone, as is arch/arm/mach-*/include/mach/irq.h
-
-Please note that this will not solve all problems - some of them are
-hardware based. Mixing level-based and edge-based IRQs on the same
-parent signal (eg neponset) is one such area where a software based
-solution can't provide the full answer to low IRQ latency.
-
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
deleted file mode 100644
index 56ada27c53be..000000000000
--- a/Documentation/arm/Marvell/README
+++ /dev/null
@@ -1,395 +0,0 @@
-ARM Marvell SoCs
-================
-
-This document lists all the ARM Marvell SoCs that are currently
-supported in mainline by the Linux kernel. As the Marvell families of
-SoCs are large and complex, it is hard to understand where the support
-for a particular SoC is available in the Linux kernel. This document
-tries to help in understanding where those SoCs are supported, and to
-match them with their corresponding public datasheet, when available.
-
-Orion family
-------------
-
- Flavors:
- 88F5082
- 88F5181
- 88F5181L
- 88F5182
- Datasheet : http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
- Programmer's User Guide : http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
- User Manual : http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
- 88F5281
- Datasheet : http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
- 88F6183
- Core: Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
- Linux kernel mach directory: arch/arm/mach-orion5x
- Linux kernel plat directory: arch/arm/plat-orion
-
-Kirkwood family
----------------
-
- Flavors:
- 88F6282 a.k.a Armada 300
- Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
- 88F6283 a.k.a Armada 310
- Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
- 88F6190
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6192
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6182
- 88F6180
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- 88F6281
- Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
- Homepage: http://www.marvell.com/embedded-processors/kirkwood/
- Core: Feroceon 88fr131 ARMv5 compatible
- Linux kernel mach directory: arch/arm/mach-mvebu
- Linux kernel plat directory: none
-
-Discovery family
-----------------
-
- Flavors:
- MV78100
- Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
- MV78200
- Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
- Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
- MV76100
- Not supported by the Linux kernel.
-
- Core: Feroceon 88fr571-vd ARMv5 compatible
-
- Linux kernel mach directory: arch/arm/mach-mv78xx0
- Linux kernel plat directory: arch/arm/plat-orion
-
-EBU Armada family
------------------
-
- Armada 370 Flavors:
- 88F6710
- 88F6707
- 88F6W11
- Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
- Hardware Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
- Core: Sheeva ARMv7 compatible PJ4B
-
- Armada 375 Flavors:
- 88F6720
- Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
- Core: ARM Cortex-A9
-
- Armada 38x Flavors:
- 88F6810 Armada 380
- 88F6820 Armada 385
- 88F6828 Armada 388
- Product infos: http://www.marvell.com/embedded-processors/armada-38x/
- Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
- Core: ARM Cortex-A9
-
- Armada 39x Flavors:
- 88F6920 Armada 390
- 88F6928 Armada 398
- Product infos: http://www.marvell.com/embedded-processors/armada-39x/
- Core: ARM Cortex-A9
-
- Armada XP Flavors:
- MV78230
- MV78260
- MV78460
- NOTE: not to be confused with the non-SMP 78xx0 SoCs
- Product Brief: http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
- Functional Spec: http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
- Hardware Specs:
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
- http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
- Core: Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
- Linux kernel mach directory: arch/arm/mach-mvebu
- Linux kernel plat directory: none
-
-EBU Armada family ARMv8
------------------------
-
- Armada 3710/3720 Flavors:
- 88F3710
- 88F3720
- Core: ARM Cortex A53 (ARMv8)
-
- Homepage: http://www.marvell.com/embedded-processors/armada-3700/
- Product Brief: http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-37*
-
- Armada 7K Flavors:
- 88F7020 (AP806 Dual + one CP110)
- 88F7040 (AP806 Quad + one CP110)
- Core: ARM Cortex A72
-
- Homepage: http://www.marvell.com/embedded-processors/armada-70xx/
- Product Brief: http://www.marvell.com/embedded-processors/assets/Armada7020PB-Jan2016.pdf
- http://www.marvell.com/embedded-processors/assets/Armada7040PB-Jan2016.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-70*
-
- Armada 8K Flavors:
- 88F8020 (AP806 Dual + two CP110)
- 88F8040 (AP806 Quad + two CP110)
- Core: ARM Cortex A72
-
- Homepage: http://www.marvell.com/embedded-processors/armada-80xx/
- Product Brief: http://www.marvell.com/embedded-processors/assets/Armada8020PB-Jan2016.pdf
- http://www.marvell.com/embedded-processors/assets/Armada8040PB-Jan2016.pdf
- Device tree files: arch/arm64/boot/dts/marvell/armada-80*
-
-Avanta family
--------------
-
- Flavors:
- 88F6510
- 88F6530P
- 88F6550
- 88F6560
- Homepage : http://www.marvell.com/broadband/
- Product Brief: http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
- No public datasheet available.
-
- Core: ARMv5 compatible
-
- Linux kernel mach directory: no code in mainline yet, planned for the future
- Linux kernel plat directory: no code in mainline yet, planned for the future
-
-Storage family
---------------
-
- Armada SP:
- 88RC1580
- Product infos: http://www.marvell.com/storage/armada-sp/
- Core: Sheeva ARMv7 comatible Quad-core PJ4C
- (not supported in upstream Linux kernel)
-
-Dove family (application processor)
------------------------------------
-
- Flavors:
- 88AP510 a.k.a Armada 510
- Product Brief : http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
- Hardware Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
- Functional Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
- Homepage: http://www.marvell.com/application-processors/armada-500/
- Core: ARMv7 compatible
-
- Directory: arch/arm/mach-mvebu (DT enabled platforms)
- arch/arm/mach-dove (non-DT enabled platforms)
-
-PXA 2xx/3xx/93x/95x family
---------------------------
-
- Flavors:
- PXA21x, PXA25x, PXA26x
- Application processor only
- Core: ARMv5 XScale1 core
- PXA270, PXA271, PXA272
- Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
- Design guide : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
- Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
- Specification : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
- Specification update : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
- Application processor only
- Core: ARMv5 XScale2 core
- PXA300, PXA310, PXA320
- PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
- PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
- PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
- Design guide : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
- Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
- Specifications : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
- Specification Update : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
- Reference Manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
- Application processor only
- Core: ARMv5 XScale3 core
- PXA930, PXA935
- Application processor with Communication processor
- Core: ARMv5 XScale3 core
- PXA955
- Application processor with Communication processor
- Core: ARMv7 compatible Sheeva PJ4 core
-
- Comments:
-
- * This line of SoCs originates from the XScale family developed by
- Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
- PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
- the later PXA95x were developed by Marvell.
-
- * Due to their XScale origin, these SoCs have virtually nothing in
- common with the other (Kirkwood, Dove, etc.) families of Marvell
- SoCs, except with the MMP/MMP2 family of SoCs.
-
- Linux kernel mach directory: arch/arm/mach-pxa
- Linux kernel plat directory: arch/arm/plat-pxa
-
-MMP/MMP2/MMP3 family (communication processor)
------------------------------------------
-
- Flavors:
- PXA168, a.k.a Armada 168
- Homepage : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
- Product brief : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
- Hardware manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
- Software manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
- Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
- Boot ROM manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
- App node package : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
- Application processor only
- Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
- PXA910/PXA920
- Homepage : http://www.marvell.com/communication-processors/pxa910/
- Product Brief : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
- Application processor with Communication processor
- Core: ARMv5 compatible Marvell PJ1 88sv331 (Mohawk)
- PXA688, a.k.a. MMP2, a.k.a Armada 610
- Product Brief : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
- Application processor only
- Core: ARMv7 compatible Sheeva PJ4 88sv581x core
- PXA2128, a.k.a. MMP3 (OLPC XO4, Linux support not upstream)
- Product Brief : http://www.marvell.com/application-processors/armada/pxa2128/assets/Marvell-ARMADA-PXA2128-SoC-PB.pdf
- Application processor only
- Core: Dual-core ARMv7 compatible Sheeva PJ4C core
- PXA960/PXA968/PXA978 (Linux support not upstream)
- Application processor with Communication Processor
- Core: ARMv7 compatible Sheeva PJ4 core
- PXA986/PXA988 (Linux support not upstream)
- Application processor with Communication Processor
- Core: Dual-core ARMv7 compatible Sheeva PJ4B-MP core
- PXA1088/PXA1920 (Linux support not upstream)
- Application processor with Communication Processor
- Core: quad-core ARMv7 Cortex-A7
- PXA1908/PXA1928/PXA1936
- Application processor with Communication Processor
- Core: multi-core ARMv8 Cortex-A53
-
- Comments:
-
- * This line of SoCs originates from the XScale family developed by
- Intel and acquired by Marvell in ~2006. All the processors of
- this MMP/MMP2 family were developed by Marvell.
-
- * Due to their XScale origin, these SoCs have virtually nothing in
- common with the other (Kirkwood, Dove, etc.) families of Marvell
- SoCs, except with the PXA family of SoCs listed above.
-
- Linux kernel mach directory: arch/arm/mach-mmp
- Linux kernel plat directory: arch/arm/plat-pxa
-
-Berlin family (Multimedia Solutions)
--------------------------------------
-
- Flavors:
- 88DE3010, Armada 1000 (no Linux support)
- Core: Marvell PJ1 (ARMv5TE), Dual-core
- Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
- 88DE3005, Armada 1500 Mini
- Design name: BG2CD
- Core: ARM Cortex-A9, PL310 L2CC
- 88DE3006, Armada 1500 Mini Plus
- Design name: BG2CDP
- Core: Dual Core ARM Cortex-A7
- 88DE3100, Armada 1500
- Design name: BG2
- Core: Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
- 88DE3114, Armada 1500 Pro
- Design name: BG2Q
- Core: Quad Core ARM Cortex-A9, PL310 L2CC
- 88DE3214, Armada 1500 Pro 4K
- Design name: BG3
- Core: ARM Cortex-A15, CA15 integrated L2CC
- 88DE3218, ARMADA 1500 Ultra
- Core: ARM Cortex-A53
-
- Homepage: https://www.synaptics.com/products/multimedia-solutions
- Directory: arch/arm/mach-berlin
-
- Comments:
-
- * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
- with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
-
- * The Berlin family was acquired by Synaptics from Marvell in 2017.
-
-CPU Cores
----------
-
-The XScale cores were designed by Intel, and shipped by Marvell in the older
-PXA processors. Feroceon is a Marvell designed core that developed in-house,
-and that evolved into Sheeva. The XScale and Feroceon cores were phased out
-over time and replaced with Sheeva cores in later products, which subsequently
-got replaced with licensed ARM Cortex-A cores.
-
- XScale 1
- CPUID 0x69052xxx
- ARMv5, iWMMXt
- XScale 2
- CPUID 0x69054xxx
- ARMv5, iWMMXt
- XScale 3
- CPUID 0x69056xxx or 0x69056xxx
- ARMv5, iWMMXt
- Feroceon-1850 88fr331 "Mohawk"
- CPUID 0x5615331x or 0x41xx926x
- ARMv5TE, single issue
- Feroceon-2850 88fr531-vd "Jolteon"
- CPUID 0x5605531x or 0x41xx926x
- ARMv5TE, VFP, dual-issue
- Feroceon 88fr571-vd "Jolteon"
- CPUID 0x5615571x
- ARMv5TE, VFP, dual-issue
- Feroceon 88fr131 "Mohawk-D"
- CPUID 0x5625131x
- ARMv5TE, single-issue in-order
- Sheeva PJ1 88sv331 "Mohawk"
- CPUID 0x561584xx
- ARMv5, single-issue iWMMXt v2
- Sheeva PJ4 88sv581x "Flareon"
- CPUID 0x560f581x
- ARMv7, idivt, optional iWMMXt v2
- Sheeva PJ4B 88sv581x
- CPUID 0x561f581x
- ARMv7, idivt, optional iWMMXt v2
- Sheeva PJ4B-MP / PJ4C
- CPUID 0x562f584x
- ARMv7, idivt/idiva, LPAE, optional iWMMXt v2 and/or NEON
-
-Long-term plans
----------------
-
- * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
- mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
- Business Unit) in a single mach-<foo> directory. The plat-orion/
- would therefore disappear.
-
- * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
- directory. The plat-pxa/ would therefore disappear.
-
-Credits
--------
-
- Maen Suleiman <maen@marvell.com>
- Lior Amsalem <alior@marvell.com>
- Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- Andrew Lunn <andrew@lunn.ch>
- Nicolas Pitre <nico@fluxnic.net>
- Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/Microchip/README b/Documentation/arm/Microchip/README
deleted file mode 100644
index a366f37d38f1..000000000000
--- a/Documentation/arm/Microchip/README
+++ /dev/null
@@ -1,169 +0,0 @@
-ARM Microchip SoCs (aka AT91)
-=============================
-
-
-Introduction
-------------
-This document gives useful information about the ARM Microchip SoCs that are
-currently supported in Linux Mainline (you know, the one on kernel.org).
-
-It is important to note that the Microchip (previously Atmel) ARM-based MPU
-product line is historically named "AT91" or "at91" throughout the Linux kernel
-development process even if this product prefix has completely disappeared from
-the official Microchip product name. Anyway, files, directories, git trees,
-git branches/tags and email subject always contain this "at91" sub-string.
-
-
-AT91 SoCs
----------
-Documentation and detailed datasheet for each product are available on
-the Microchip website: http://www.microchip.com.
-
- Flavors:
- * ARM 920 based SoC
- - at91rm9200
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-1768-32-bit-ARM920T-Embedded-Microprocessor-AT91RM9200_Datasheet.pdf
-
- * ARM 926 based SoCs
- - at91sam9260
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6221-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9260_Datasheet.pdf
-
- - at91sam9xe
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
-
- - at91sam9261
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6062-ARM926EJ-S-Microprocessor-SAM9261_Datasheet.pdf
-
- - at91sam9263
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6249-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9263_Datasheet.pdf
-
- - at91sam9rl
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/doc6289.pdf
-
- - at91sam9g20
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001516A.pdf
-
- - at91sam9g45 family
- - at91sam9g45
- - at91sam9g46
- - at91sam9m10
- - at91sam9m11 (device superset)
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
-
- - at91sam9x5 family (aka "The 5 series")
- - at91sam9g15
- - at91sam9g25
- - at91sam9g35
- - at91sam9x25
- - at91sam9x35
- + Datasheet (can be considered as covering the whole family)
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11055-32-bit-ARM926EJ-S-Microcontroller-SAM9X35_Datasheet.pdf
-
- - at91sam9n12
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001517A.pdf
-
- * ARM Cortex-A5 based SoCs
- - sama5d3 family
- - sama5d31
- - sama5d33
- - sama5d34
- - sama5d35
- - sama5d36 (device superset)
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
-
- * ARM Cortex-A5 + NEON based SoCs
- - sama5d4 family
- - sama5d41
- - sama5d42
- - sama5d43
- - sama5d44 (device superset)
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/60001525A.pdf
-
- - sama5d2 family
- - sama5d21
- - sama5d22
- - sama5d23
- - sama5d24
- - sama5d26
- - sama5d27 (device superset)
- - sama5d28 (device superset + environmental monitors)
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/DS60001476B.pdf
-
- * ARM Cortex-M7 MCUs
- - sams70 family
- - sams70j19
- - sams70j20
- - sams70j21
- - sams70n19
- - sams70n20
- - sams70n21
- - sams70q19
- - sams70q20
- - sams70q21
-
- - samv70 family
- - samv70j19
- - samv70j20
- - samv70n19
- - samv70n20
- - samv70q19
- - samv70q20
-
- - samv71 family
- - samv71j19
- - samv71j20
- - samv71j21
- - samv71n19
- - samv71n20
- - samv71n21
- - samv71q19
- - samv71q20
- - samv71q21
-
- + Datasheet
- http://ww1.microchip.com/downloads/en/DeviceDoc/60001527A.pdf
-
-
-Linux kernel information
-------------------------
-Linux kernel mach directory: arch/arm/mach-at91
-MAINTAINERS entry is: "ARM/Microchip (AT91) SoC support"
-
-
-Device Tree for AT91 SoCs and boards
-------------------------------------
-All AT91 SoCs are converted to Device Tree. Since Linux 3.19, these products
-must use this method to boot the Linux kernel.
-
-Work In Progress statement:
-Device Tree files and Device Tree bindings that apply to AT91 SoCs and boards are
-considered as "Unstable". To be completely clear, any at91 binding can change at
-any time. So, be sure to use a Device Tree Binary and a Kernel Image generated from
-the same source tree.
-Please refer to the Documentation/devicetree/bindings/ABI.txt file for a
-definition of a "Stable" binding/ABI.
-This statement will be removed by AT91 MAINTAINERS when appropriate.
-
-Naming conventions and best practice:
-- SoCs Device Tree Source Include files are named after the official name of
- the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
-- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
- shared across SoCs or boards (sama5d3.dtsi or at91sam9x5cm.dtsi for instance).
- When collecting nodes for a particular peripheral or topic, the identifier have to
- be placed at the end of the file name, separated with a "_" (at91sam9x5_can.dtsi
- or sama5d3_gmac.dtsi for example).
-- board Device Tree Source files (.dts) are prefixed by the string "at91-" so
- that they can be identified easily. Note that some files are historical exceptions
- to this rule (sama5d3[13456]ek.dts, usb_a9g20.dts or animeo_ip.dts for example).
diff --git a/Documentation/arm/Netwinder b/Documentation/arm/Netwinder
deleted file mode 100644
index f1b457fbd3de..000000000000
--- a/Documentation/arm/Netwinder
+++ /dev/null
@@ -1,78 +0,0 @@
-NetWinder specific documentation
-================================
-
-The NetWinder is a small low-power computer, primarily designed
-to run Linux. It is based around the StrongARM RISC processor,
-DC21285 PCI bridge, with PC-type hardware glued around it.
-
-Port usage
-==========
-
-Min - Max Description
----------------------------
-0x0000 - 0x000f DMA1
-0x0020 - 0x0021 PIC1
-0x0060 - 0x006f Keyboard
-0x0070 - 0x007f RTC
-0x0080 - 0x0087 DMA1
-0x0088 - 0x008f DMA2
-0x00a0 - 0x00a3 PIC2
-0x00c0 - 0x00df DMA2
-0x0180 - 0x0187 IRDA
-0x01f0 - 0x01f6 ide0
-0x0201 Game port
-0x0203 RWA010 configuration read
-0x0220 - ? SoundBlaster
-0x0250 - ? WaveArtist
-0x0279 RWA010 configuration index
-0x02f8 - 0x02ff Serial ttyS1
-0x0300 - 0x031f Ether10
-0x0338 GPIO1
-0x033a GPIO2
-0x0370 - 0x0371 W83977F configuration registers
-0x0388 - ? AdLib
-0x03c0 - 0x03df VGA
-0x03f6 ide0
-0x03f8 - 0x03ff Serial ttyS0
-0x0400 - 0x0408 DC21143
-0x0480 - 0x0487 DMA1
-0x0488 - 0x048f DMA2
-0x0a79 RWA010 configuration write
-0xe800 - 0xe80f ide0/ide1 BM DMA
-
-
-Interrupt usage
-===============
-
-IRQ type Description
----------------------------
- 0 ISA 100Hz timer
- 1 ISA Keyboard
- 2 ISA cascade
- 3 ISA Serial ttyS1
- 4 ISA Serial ttyS0
- 5 ISA PS/2 mouse
- 6 ISA IRDA
- 7 ISA Printer
- 8 ISA RTC alarm
- 9 ISA
-10 ISA GP10 (Orange reset button)
-11 ISA
-12 ISA WaveArtist
-13 ISA
-14 ISA hda1
-15 ISA
-
-DMA usage
-=========
-
-DMA type Description
----------------------------
- 0 ISA IRDA
- 1 ISA
- 2 ISA cascade
- 3 ISA WaveArtist
- 4 ISA
- 5 ISA
- 6 ISA
- 7 ISA WaveArtist
diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/OMAP/DSS
deleted file mode 100644
index 4484e021290e..000000000000
--- a/Documentation/arm/OMAP/DSS
+++ /dev/null
@@ -1,362 +0,0 @@
-OMAP2/3 Display Subsystem
--------------------------
-
-This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
-(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
-TV-out and multiple display support, but there are lots of small improvements
-also.
-
-The DSS2 driver (omapdss module) is in arch/arm/plat-omap/dss/, and the FB,
-panel and controller drivers are in drivers/video/omap2/. DSS1 and DSS2 live
-currently side by side, you can choose which one to use.
-
-Features
---------
-
-Working and tested features include:
-
-- MIPI DPI (parallel) output
-- MIPI DSI output in command mode
-- MIPI DBI (RFBI) output
-- SDI output
-- TV output
-- All pieces can be compiled as a module or inside kernel
-- Use DISPC to update any of the outputs
-- Use CPU to update RFBI or DSI output
-- OMAP DISPC planes
-- RGB16, RGB24 packed, RGB24 unpacked
-- YUV2, UYVY
-- Scaling
-- Adjusting DSS FCK to find a good pixel clock
-- Use DSI DPLL to create DSS FCK
-
-Tested boards include:
-- OMAP3 SDP board
-- Beagle board
-- N810
-
-omapdss driver
---------------
-
-The DSS driver does not itself have any support for Linux framebuffer, V4L or
-such like the current ones, but it has an internal kernel API that upper level
-drivers can use.
-
-The DSS driver models OMAP's overlays, overlay managers and displays in a
-flexible way to enable non-common multi-display configuration. In addition to
-modelling the hardware overlays, omapdss supports virtual overlays and overlay
-managers. These can be used when updating a display with CPU or system DMA.
-
-omapdss driver support for audio
---------------------------------
-There exist several display technologies and standards that support audio as
-well. Hence, it is relevant to update the DSS device driver to provide an audio
-interface that may be used by an audio driver or any other driver interested in
-the functionality.
-
-The audio_enable function is intended to prepare the relevant
-IP for playback (e.g., enabling an audio FIFO, taking in/out of reset
-some IP, enabling companion chips, etc). It is intended to be called before
-audio_start. The audio_disable function performs the reverse operation and is
-intended to be called after audio_stop.
-
-While a given DSS device driver may support audio, it is possible that for
-certain configurations audio is not supported (e.g., an HDMI display using a
-VESA video timing). The audio_supported function is intended to query whether
-the current configuration of the display supports audio.
-
-The audio_config function is intended to configure all the relevant audio
-parameters of the display. In order to make the function independent of any
-specific DSS device driver, a struct omap_dss_audio is defined. Its purpose
-is to contain all the required parameters for audio configuration. At the
-moment, such structure contains pointers to IEC-60958 channel status word
-and CEA-861 audio infoframe structures. This should be enough to support
-HDMI and DisplayPort, as both are based on CEA-861 and IEC-60958.
-
-The audio_enable/disable, audio_config and audio_supported functions could be
-implemented as functions that may sleep. Hence, they should not be called
-while holding a spinlock or a readlock.
-
-The audio_start/audio_stop function is intended to effectively start/stop audio
-playback after the configuration has taken place. These functions are designed
-to be used in an atomic context. Hence, audio_start should return quickly and be
-called only after all the needed resources for audio playback (audio FIFOs,
-DMA channels, companion chips, etc) have been enabled to begin data transfers.
-audio_stop is designed to only stop the audio transfers. The resources used
-for playback are released using audio_disable.
-
-The enum omap_dss_audio_state may be used to help the implementations of
-the interface to keep track of the audio state. The initial state is _DISABLED;
-then, the state transitions to _CONFIGURED, and then, when it is ready to
-play audio, to _ENABLED. The state _PLAYING is used when the audio is being
-rendered.
-
-
-Panel and controller drivers
-----------------------------
-
-The drivers implement panel or controller specific functionality and are not
-usually visible to users except through omapfb driver. They register
-themselves to the DSS driver.
-
-omapfb driver
--------------
-
-The omapfb driver implements arbitrary number of standard linux framebuffers.
-These framebuffers can be routed flexibly to any overlays, thus allowing very
-dynamic display architecture.
-
-The driver exports some omapfb specific ioctls, which are compatible with the
-ioctls in the old driver.
-
-The rest of the non standard features are exported via sysfs. Whether the final
-implementation will use sysfs, or ioctls, is still open.
-
-V4L2 drivers
-------------
-
-V4L2 is being implemented in TI.
-
-From omapdss point of view the V4L2 drivers should be similar to framebuffer
-driver.
-
-Architecture
---------------------
-
-Some clarification what the different components do:
-
- - Framebuffer is a memory area inside OMAP's SRAM/SDRAM that contains the
- pixel data for the image. Framebuffer has width and height and color
- depth.
- - Overlay defines where the pixels are read from and where they go on the
- screen. The overlay may be smaller than framebuffer, thus displaying only
- part of the framebuffer. The position of the overlay may be changed if
- the overlay is smaller than the display.
- - Overlay manager combines the overlays in to one image and feeds them to
- display.
- - Display is the actual physical display device.
-
-A framebuffer can be connected to multiple overlays to show the same pixel data
-on all of the overlays. Note that in this case the overlay input sizes must be
-the same, but, in case of video overlays, the output size can be different. Any
-framebuffer can be connected to any overlay.
-
-An overlay can be connected to one overlay manager. Also DISPC overlays can be
-connected only to DISPC overlay managers, and virtual overlays can be only
-connected to virtual overlays.
-
-An overlay manager can be connected to one display. There are certain
-restrictions which kinds of displays an overlay manager can be connected:
-
- - DISPC TV overlay manager can be only connected to TV display.
- - Virtual overlay managers can only be connected to DBI or DSI displays.
- - DISPC LCD overlay manager can be connected to all displays, except TV
- display.
-
-Sysfs
------
-The sysfs interface is mainly used for testing. I don't think sysfs
-interface is the best for this in the final version, but I don't quite know
-what would be the best interfaces for these things.
-
-The sysfs interface is divided to two parts: DSS and FB.
-
-/sys/class/graphics/fb? directory:
-mirror 0=off, 1=on
-rotate Rotation 0-3 for 0, 90, 180, 270 degrees
-rotate_type 0 = DMA rotation, 1 = VRFB rotation
-overlays List of overlay numbers to which framebuffer pixels go
-phys_addr Physical address of the framebuffer
-virt_addr Virtual address of the framebuffer
-size Size of the framebuffer
-
-/sys/devices/platform/omapdss/overlay? directory:
-enabled 0=off, 1=on
-input_size width,height (ie. the framebuffer size)
-manager Destination overlay manager name
-name
-output_size width,height
-position x,y
-screen_width width
-global_alpha global alpha 0-255 0=transparent 255=opaque
-
-/sys/devices/platform/omapdss/manager? directory:
-display Destination display
-name
-alpha_blending_enabled 0=off, 1=on
-trans_key_enabled 0=off, 1=on
-trans_key_type gfx-destination, video-source
-trans_key_value transparency color key (RGB24)
-default_color default background color (RGB24)
-
-/sys/devices/platform/omapdss/display? directory:
-ctrl_name Controller name
-mirror 0=off, 1=on
-update_mode 0=off, 1=auto, 2=manual
-enabled 0=off, 1=on
-name
-rotate Rotation 0-3 for 0, 90, 180, 270 degrees
-timings Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
- When writing, two special timings are accepted for tv-out:
- "pal" and "ntsc"
-panel_name
-tear_elim Tearing elimination 0=off, 1=on
-output_type Output type (video encoder only): "composite" or "svideo"
-
-There are also some debugfs files at <debugfs>/omapdss/ which show information
-about clocks and registers.
-
-Examples
---------
-
-The following definitions have been made for the examples below:
-
-ovl0=/sys/devices/platform/omapdss/overlay0
-ovl1=/sys/devices/platform/omapdss/overlay1
-ovl2=/sys/devices/platform/omapdss/overlay2
-
-mgr0=/sys/devices/platform/omapdss/manager0
-mgr1=/sys/devices/platform/omapdss/manager1
-
-lcd=/sys/devices/platform/omapdss/display0
-dvi=/sys/devices/platform/omapdss/display1
-tv=/sys/devices/platform/omapdss/display2
-
-fb0=/sys/class/graphics/fb0
-fb1=/sys/class/graphics/fb1
-fb2=/sys/class/graphics/fb2
-
-Default setup on OMAP3 SDP
---------------------------
-
-Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
-and TV-out are not in use. The columns from left to right are:
-framebuffers, overlays, overlay managers, displays. Framebuffers are
-handled by omapfb, and the rest by the DSS.
-
-FB0 --- GFX -\ DVI
-FB1 --- VID1 --+- LCD ---- LCD
-FB2 --- VID2 -/ TV ----- TV
-
-Example: Switch from LCD to DVI
-----------------------
-
-w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-
-echo "0" > $lcd/enabled
-echo "" > $mgr0/display
-fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
-# at this point you have to switch the dvi/lcd dip-switch from the omap board
-echo "dvi" > $mgr0/display
-echo "1" > $dvi/enabled
-
-After this the configuration looks like:
-
-FB0 --- GFX -\ -- DVI
-FB1 --- VID1 --+- LCD -/ LCD
-FB2 --- VID2 -/ TV ----- TV
-
-Example: Clone GFX overlay to LCD and TV
--------------------------------
-
-w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
-h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
-
-echo "0" > $ovl0/enabled
-echo "0" > $ovl1/enabled
-
-echo "" > $fb1/overlays
-echo "0,1" > $fb0/overlays
-
-echo "$w,$h" > $ovl1/output_size
-echo "tv" > $ovl1/manager
-
-echo "1" > $ovl0/enabled
-echo "1" > $ovl1/enabled
-
-echo "1" > $tv/enabled
-
-After this the configuration looks like (only relevant parts shown):
-
-FB0 +-- GFX ---- LCD ---- LCD
- \- VID1 ---- TV ---- TV
-
-Misc notes
-----------
-
-OMAP FB allocates the framebuffer memory using the standard dma allocator. You
-can enable Contiguous Memory Allocator (CONFIG_CMA) to improve the dma
-allocator, and if CMA is enabled, you use "cma=" kernel parameter to increase
-the global memory area for CMA.
-
-Using DSI DPLL to generate pixel clock it is possible produce the pixel clock
-of 86.5MHz (max possible), and with that you get 1280x1024@57 output from DVI.
-
-Rotation and mirroring currently only supports RGB565 and RGB8888 modes. VRFB
-does not support mirroring.
-
-VRFB rotation requires much more memory than non-rotated framebuffer, so you
-probably need to increase your vram setting before using VRFB rotation. Also,
-many applications may not work with VRFB if they do not pay attention to all
-framebuffer parameters.
-
-Kernel boot arguments
----------------------
-
-omapfb.mode=<display>:<mode>[,...]
- - Default video mode for specified displays. For example,
- "dvi:800x400MR-24@60". See drivers/video/modedb.c.
- There are also two special modes: "pal" and "ntsc" that
- can be used to tv out.
-
-omapfb.vram=<fbnum>:<size>[@<physaddr>][,...]
- - VRAM allocated for a framebuffer. Normally omapfb allocates vram
- depending on the display size. With this you can manually allocate
- more or define the physical address of each framebuffer. For example,
- "1:4M" to allocate 4M for fb1.
-
-omapfb.debug=<y|n>
- - Enable debug printing. You have to have OMAPFB debug support enabled
- in kernel config.
-
-omapfb.test=<y|n>
- - Draw test pattern to framebuffer whenever framebuffer settings change.
- You need to have OMAPFB debug support enabled in kernel config.
-
-omapfb.vrfb=<y|n>
- - Use VRFB rotation for all framebuffers.
-
-omapfb.rotate=<angle>
- - Default rotation applied to all framebuffers.
- 0 - 0 degree rotation
- 1 - 90 degree rotation
- 2 - 180 degree rotation
- 3 - 270 degree rotation
-
-omapfb.mirror=<y|n>
- - Default mirror for all framebuffers. Only works with DMA rotation.
-
-omapdss.def_disp=<display>
- - Name of default display, to which all overlays will be connected.
- Common examples are "lcd" or "tv".
-
-omapdss.debug=<y|n>
- - Enable debug printing. You have to have DSS debug support enabled in
- kernel config.
-
-TODO
-----
-
-DSS locking
-
-Error checking
-- Lots of checks are missing or implemented just as BUG()
-
-System DMA update for DSI
-- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
- to skip the empty byte?)
-
-OMAP1 support
-- Not sure if needed
-
diff --git a/Documentation/arm/OMAP/README b/Documentation/arm/OMAP/README
deleted file mode 100644
index 90c6c57d61e8..000000000000
--- a/Documentation/arm/OMAP/README
+++ /dev/null
@@ -1,11 +0,0 @@
-This file contains documentation for running mainline
-kernel on omaps.
-
-KERNEL NEW DEPENDENCIES
-v4.3+ Update is needed for custom .config files to make sure
- CONFIG_REGULATOR_PBIAS is enabled for MMC1 to work
- properly.
-
-v4.18+ Update is needed for custom .config files to make sure
- CONFIG_MMC_SDHCI_OMAP is enabled for all MMC instances
- to work in DRA7 and K2G based boards.
diff --git a/Documentation/arm/OMAP/omap_pm b/Documentation/arm/OMAP/omap_pm
deleted file mode 100644
index 4ae915a9f899..000000000000
--- a/Documentation/arm/OMAP/omap_pm
+++ /dev/null
@@ -1,154 +0,0 @@
-
-The OMAP PM interface
-=====================
-
-This document describes the temporary OMAP PM interface. Driver
-authors use these functions to communicate minimum latency or
-throughput constraints to the kernel power management code.
-Over time, the intention is to merge features from the OMAP PM
-interface into the Linux PM QoS code.
-
-Drivers need to express PM parameters which:
-
-- support the range of power management parameters present in the TI SRF;
-
-- separate the drivers from the underlying PM parameter
- implementation, whether it is the TI SRF or Linux PM QoS or Linux
- latency framework or something else;
-
-- specify PM parameters in terms of fundamental units, such as
- latency and throughput, rather than units which are specific to OMAP
- or to particular OMAP variants;
-
-- allow drivers which are shared with other architectures (e.g.,
- DaVinci) to add these constraints in a way which won't affect non-OMAP
- systems,
-
-- can be implemented immediately with minimal disruption of other
- architectures.
-
-
-This document proposes the OMAP PM interface, including the following
-five power management functions for driver code:
-
-1. Set the maximum MPU wakeup latency:
- (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
-
-2. Set the maximum device wakeup latency:
- (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
-
-3. Set the maximum system DMA transfer start latency (CORE pwrdm):
- (*pdata->set_max_sdma_lat)(struct device *dev, long t)
-
-4. Set the minimum bus throughput needed by a device:
- (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
-
-5. Return the number of times the device has lost context
- (*pdata->get_dev_context_loss_count)(struct device *dev)
-
-
-Further documentation for all OMAP PM interface functions can be
-found in arch/arm/plat-omap/include/mach/omap-pm.h.
-
-
-The OMAP PM layer is intended to be temporary
----------------------------------------------
-
-The intention is that eventually the Linux PM QoS layer should support
-the range of power management features present in OMAP3. As this
-happens, existing drivers using the OMAP PM interface can be modified
-to use the Linux PM QoS code; and the OMAP PM interface can disappear.
-
-
-Driver usage of the OMAP PM functions
--------------------------------------
-
-As the 'pdata' in the above examples indicates, these functions are
-exposed to drivers through function pointers in driver .platform_data
-structures. The function pointers are initialized by the board-*.c
-files to point to the corresponding OMAP PM functions:
-.set_max_dev_wakeup_lat will point to
-omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
-not support these functions should leave these function pointers set
-to NULL. Drivers should use the following idiom:
-
- if (pdata->set_max_dev_wakeup_lat)
- (*pdata->set_max_dev_wakeup_lat)(dev, t);
-
-The most common usage of these functions will probably be to specify
-the maximum time from when an interrupt occurs, to when the device
-becomes accessible. To accomplish this, driver writers should use the
-set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
-latency, and the set_max_dev_wakeup_lat() function to constrain the
-device wakeup latency (from clk_enable() to accessibility). For
-example,
-
- /* Limit MPU wakeup latency */
- if (pdata->set_max_mpu_wakeup_lat)
- (*pdata->set_max_mpu_wakeup_lat)(dev, tc);
-
- /* Limit device powerdomain wakeup latency */
- if (pdata->set_max_dev_wakeup_lat)
- (*pdata->set_max_dev_wakeup_lat)(dev, td);
-
- /* total wakeup latency in this example: (tc + td) */
-
-The PM parameters can be overwritten by calling the function again
-with the new value. The settings can be removed by calling the
-function with a t argument of -1 (except in the case of
-set_max_bus_tput(), which should be called with an r argument of 0).
-
-The fifth function above, omap_pm_get_dev_context_loss_count(),
-is intended as an optimization to allow drivers to determine whether the
-device has lost its internal context. If context has been lost, the
-driver must restore its internal context before proceeding.
-
-
-Other specialized interface functions
--------------------------------------
-
-The five functions listed above are intended to be usable by any
-device driver. DSPBridge and CPUFreq have a few special requirements.
-DSPBridge expresses target DSP performance levels in terms of OPP IDs.
-CPUFreq expresses target MPU performance levels in terms of MPU
-frequency. The OMAP PM interface contains functions for these
-specialized cases to convert that input information (OPPs/MPU
-frequency) into the form that the underlying power management
-implementation needs:
-
-6. (*pdata->dsp_get_opp_table)(void)
-
-7. (*pdata->dsp_set_min_opp)(u8 opp_id)
-
-8. (*pdata->dsp_get_opp)(void)
-
-9. (*pdata->cpu_get_freq_table)(void)
-
-10. (*pdata->cpu_set_freq)(unsigned long f)
-
-11. (*pdata->cpu_get_freq)(void)
-
-Customizing OPP for platform
-============================
-Defining CONFIG_PM should enable OPP layer for the silicon
-and the registration of OPP table should take place automatically.
-However, in special cases, the default OPP table may need to be
-tweaked, for e.g.:
- * enable default OPPs which are disabled by default, but which
- could be enabled on a platform
- * Disable an unsupported OPP on the platform
- * Define and add a custom opp table entry
-in these cases, the board file needs to do additional steps as follows:
-arch/arm/mach-omapx/board-xyz.c
- #include "pm.h"
- ....
- static void __init omap_xyz_init_irq(void)
- {
- ....
- /* Initialize the default table */
- omapx_opp_init();
- /* Do customization to the defaults */
- ....
- }
-NOTE: omapx_opp_init will be omap3_opp_init or as required
-based on the omap family.
diff --git a/Documentation/arm/Porting b/Documentation/arm/Porting
deleted file mode 100644
index a492233931b9..000000000000
--- a/Documentation/arm/Porting
+++ /dev/null
@@ -1,135 +0,0 @@
-Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
-
-Initial definitions
--------------------
-
-The following symbol definitions rely on you knowing the translation that
-__virt_to_phys() does for your machine. This macro converts the passed
-virtual address to a physical address. Normally, it is simply:
-
- phys = virt - PAGE_OFFSET + PHYS_OFFSET
-
-
-Decompressor Symbols
---------------------
-
-ZTEXTADDR
- Start address of decompressor. There's no point in talking about
- virtual or physical addresses here, since the MMU will be off at
- the time when you call the decompressor code. You normally call
- the kernel at this address to start it booting. This doesn't have
- to be located in RAM, it can be in flash or other read-only or
- read-write addressable medium.
-
-ZBSSADDR
- Start address of zero-initialised work area for the decompressor.
- This must be pointing at RAM. The decompressor will zero initialise
- this for you. Again, the MMU will be off.
-
-ZRELADDR
- This is the address where the decompressed kernel will be written,
- and eventually executed. The following constraint must be valid:
-
- __virt_to_phys(TEXTADDR) == ZRELADDR
-
- The initial part of the kernel is carefully coded to be position
- independent.
-
-INITRD_PHYS
- Physical address to place the initial RAM disk. Only relevant if
- you are using the bootpImage stuff (which only works on the old
- struct param_struct).
-
-INITRD_VIRT
- Virtual address of the initial RAM disk. The following constraint
- must be valid:
-
- __virt_to_phys(INITRD_VIRT) == INITRD_PHYS
-
-PARAMS_PHYS
- Physical address of the struct param_struct or tag list, giving the
- kernel various parameters about its execution environment.
-
-
-Kernel Symbols
---------------
-
-PHYS_OFFSET
- Physical start address of the first bank of RAM.
-
-PAGE_OFFSET
- Virtual start address of the first bank of RAM. During the kernel
- boot phase, virtual address PAGE_OFFSET will be mapped to physical
- address PHYS_OFFSET, along with any other mappings you supply.
- This should be the same value as TASK_SIZE.
-
-TASK_SIZE
- The maximum size of a user process in bytes. Since user space
- always starts at zero, this is the maximum address that a user
- process can access+1. The user space stack grows down from this
- address.
-
- Any virtual address below TASK_SIZE is deemed to be user process
- area, and therefore managed dynamically on a process by process
- basis by the kernel. I'll call this the user segment.
-
- Anything above TASK_SIZE is common to all processes. I'll call
- this the kernel segment.
-
- (In other words, you can't put IO mappings below TASK_SIZE, and
- hence PAGE_OFFSET).
-
-TEXTADDR
- Virtual start address of kernel, normally PAGE_OFFSET + 0x8000.
- This is where the kernel image ends up. With the latest kernels,
- it must be located at 32768 bytes into a 128MB region. Previous
- kernels placed a restriction of 256MB here.
-
-DATAADDR
- Virtual address for the kernel data segment. Must not be defined
- when using the decompressor.
-
-VMALLOC_START
-VMALLOC_END
- Virtual addresses bounding the vmalloc() area. There must not be
- any static mappings in this area; vmalloc will overwrite them.
- The addresses must also be in the kernel segment (see above).
- Normally, the vmalloc() area starts VMALLOC_OFFSET bytes above the
- last virtual RAM address (found using variable high_memory).
-
-VMALLOC_OFFSET
- Offset normally incorporated into VMALLOC_START to provide a hole
- between virtual RAM and the vmalloc area. We do this to allow
- out of bounds memory accesses (eg, something writing off the end
- of the mapped memory map) to be caught. Normally set to 8MB.
-
-Architecture Specific Macros
-----------------------------
-
-BOOT_MEM(pram,pio,vio)
- `pram' specifies the physical start address of RAM. Must always
- be present, and should be the same as PHYS_OFFSET.
-
- `pio' is the physical address of an 8MB region containing IO for
- use with the debugging macros in arch/arm/kernel/debug-armv.S.
-
- `vio' is the virtual address of the 8MB debugging region.
-
- It is expected that the debugging region will be re-initialised
- by the architecture specific code later in the code (via the
- MAPIO function).
-
-BOOT_PARAMS
- Same as, and see PARAMS_PHYS.
-
-FIXUP(func)
- Machine specific fixups, run before memory subsystems have been
- initialised.
-
-MAPIO(func)
- Machine specific function to map IO areas (including the debug
- region above).
-
-INITIRQ(func)
- Machine specific function to initialise interrupts.
-
diff --git a/Documentation/arm/README b/Documentation/arm/README
deleted file mode 100644
index 9d1e5b2c92e6..000000000000
--- a/Documentation/arm/README
+++ /dev/null
@@ -1,204 +0,0 @@
- ARM Linux 2.6
- =============
-
- Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
- updates.
-
-Compilation of kernel
----------------------
-
- In order to compile ARM Linux, you will need a compiler capable of
- generating ARM ELF code with GNU extensions. GCC 3.3 is known to be
- a good compiler. Fortunately, you needn't guess. The kernel will report
- an error if your compiler is a recognized offender.
-
- To build ARM Linux natively, you shouldn't have to alter the ARCH = line
- in the top level Makefile. However, if you don't have the ARM Linux ELF
- tools installed as default, then you should change the CROSS_COMPILE
- line as detailed below.
-
- If you wish to cross-compile, then alter the following lines in the top
- level make file:
-
- ARCH = <whatever>
- with
- ARCH = arm
-
- and
-
- CROSS_COMPILE=
- to
- CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
- eg.
- CROSS_COMPILE=arm-linux-
-
- Do a 'make config', followed by 'make Image' to build the kernel
- (arch/arm/boot/Image). A compressed image can be built by doing a
- 'make zImage' instead of 'make Image'.
-
-
-Bug reports etc
----------------
-
- Please send patches to the patch system. For more information, see
- http://www.arm.linux.org.uk/developer/patches/info.php Always include some
- explanation as to what the patch does and why it is needed.
-
- Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
- or submitted through the web form at
- http://www.arm.linux.org.uk/developer/
-
- When sending bug reports, please ensure that they contain all relevant
- information, eg. the kernel messages that were printed before/during
- the problem, what you were doing, etc.
-
-
-Include files
--------------
-
- Several new include directories have been created under include/asm-arm,
- which are there to reduce the clutter in the top-level directory. These
- directories, and their purpose is listed below:
-
- arch-* machine/platform specific header files
- hardware driver-internal ARM specific data structures/definitions
- mach descriptions of generic ARM to specific machine interfaces
- proc-* processor dependent header files (currently only two
- categories)
-
-
-Machine/Platform support
-------------------------
-
- The ARM tree contains support for a lot of different machine types. To
- continue supporting these differences, it has become necessary to split
- machine-specific parts by directory. For this, the machine category is
- used to select which directories and files get included (we will use
- $(MACHINE) to refer to the category)
-
- To this end, we now have arch/arm/mach-$(MACHINE) directories which are
- designed to house the non-driver files for a particular machine (eg, PCI,
- memory management, architecture definitions etc). For all future
- machines, there should be a corresponding arch/arm/mach-$(MACHINE)/include/mach
- directory.
-
-
-Modules
--------
-
- Although modularisation is supported (and required for the FP emulator),
- each module on an ARM2/ARM250/ARM3 machine when is loaded will take
- memory up to the next 32k boundary due to the size of the pages.
- Therefore, is modularisation on these machines really worth it?
-
- However, ARM6 and up machines allow modules to take multiples of 4k, and
- as such Acorn RiscPCs and other architectures using these processors can
- make good use of modularisation.
-
-
-ADFS Image files
-----------------
-
- You can access image files on your ADFS partitions by mounting the ADFS
- partition, and then using the loopback device driver. You must have
- losetup installed.
-
- Please note that the PCEmulator DOS partitions have a partition table at
- the start, and as such, you will have to give '-o offset' to losetup.
-
-
-Request to developers
----------------------
-
- When writing device drivers which include a separate assembler file, please
- include it in with the C file, and not the arch/arm/lib directory. This
- allows the driver to be compiled as a loadable module without requiring
- half the code to be compiled into the kernel image.
-
- In general, try to avoid using assembler unless it is really necessary. It
- makes drivers far less easy to port to other hardware.
-
-
-ST506 hard drives
------------------
-
- The ST506 hard drive controllers seem to be working fine (if a little
- slowly). At the moment they will only work off the controllers on an
- A4x0's motherboard, but for it to work off a Podule just requires
- someone with a podule to add the addresses for the IRQ mask and the
- HDC base to the source.
-
- As of 31/3/96 it works with two drives (you should get the ADFS
- *configure harddrive set to 2). I've got an internal 20MB and a great
- big external 5.25" FH 64MB drive (who could ever want more :-) ).
-
- I've just got 240K/s off it (a dd with bs=128k); thats about half of what
- RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting
- last week :-)
-
- Known bug: Drive data errors can cause a hang; including cases where
- the controller has fixed the error using ECC. (Possibly ONLY
- in that case...hmm).
-
-
-1772 Floppy
------------
- This also seems to work OK, but hasn't been stressed much lately. It
- hasn't got any code for disc change detection in there at the moment which
- could be a bit of a problem! Suggestions on the correct way to do this
- are welcome.
-
-
-CONFIG_MACH_ and CONFIG_ARCH_
------------------------------
- A change was made in 2003 to the macro names for new machines.
- Historically, CONFIG_ARCH_ was used for the bonafide architecture,
- e.g. SA1100, as well as implementations of the architecture,
- e.g. Assabet. It was decided to change the implementation macros
- to read CONFIG_MACH_ for clarity. Moreover, a retroactive fixup has
- not been made because it would complicate patching.
-
- Previous registrations may be found online.
-
- <http://www.arm.linux.org.uk/developer/machines/>
-
-Kernel entry (head.S)
---------------------------
- The initial entry into the kernel is via head.S, which uses machine
- independent code. The machine is selected by the value of 'r1' on
- entry, which must be kept unique.
-
- Due to the large number of machines which the ARM port of Linux provides
- for, we have a method to manage this which ensures that we don't end up
- duplicating large amounts of code.
-
- We group machine (or platform) support code into machine classes. A
- class typically based around one or more system on a chip devices, and
- acts as a natural container around the actual implementations. These
- classes are given directories - arch/arm/mach-<class> and
- arch/arm/mach-<class> - which contain the source files to/include/mach
- support the machine class. This directories also contain any machine
- specific supporting code.
-
- For example, the SA1100 class is based upon the SA1100 and SA1110 SoC
- devices, and contains the code to support the way the on-board and off-
- board devices are used, or the device is setup, and provides that
- machine specific "personality."
-
- For platforms that support device tree (DT), the machine selection is
- controlled at runtime by passing the device tree blob to the kernel. At
- compile-time, support for the machine type must be selected. This allows for
- a single multiplatform kernel build to be used for several machine types.
-
- For platforms that do not use device tree, this machine selection is
- controlled by the machine type ID, which acts both as a run-time and a
- compile-time code selection method. You can register a new machine via the
- web site at:
-
- <http://www.arm.linux.org.uk/developer/machines/>
-
- Note: Please do not register a machine type for DT-only platforms. If your
- platform is DT-only, you do not need a registered machine type.
-
----
-Russell King (15/03/2004)
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/SA1100/ADSBitsy
deleted file mode 100644
index f9f62e8c0719..000000000000
--- a/Documentation/arm/SA1100/ADSBitsy
+++ /dev/null
@@ -1,43 +0,0 @@
-ADS Bitsy Single Board Computer
-(It is different from Bitsy(iPAQ) of Compaq)
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The Linux support for this product has been provided by
-Woojung Huh <whuh@applieddata.net>
-
-Use 'make adsbitsy_config' before any 'make config'.
-This will set up defaults for ADS Bitsy support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0400000.
-
-Linux can be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- SA1111 USB Master
-- SA1100 serial port
-- pcmcia, compact flash
-- touchscreen(ucb1200)
-- console on LCD screen
-- serial ports (ttyS[0-2])
- - ttyS0 is default for serial console
-
-To do:
-- everything else! :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions.
- You should be careful to use flash on board.
- Its partition is different from GraphicsClient Plus and GraphicsMaster
-
-- 16bpp mode requires a different cable than what ships with the board.
- Contact ADS or look through the manual to wire your own. Currently,
- if you compile with 16bit mode support and switch into a lower bpp
- mode, the timing is off so the image is corrupted. This will be
- fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
deleted file mode 100644
index e08a6739e72c..000000000000
--- a/Documentation/arm/SA1100/Assabet
+++ /dev/null
@@ -1,300 +0,0 @@
-The Intel Assabet (SA-1110 evaluation) board
-============================================
-
-Please see:
-http://developer.intel.com
-
-Also some notes from John G Dorsey <jd5q@andrew.cmu.edu>:
-http://www.cs.cmu.edu/~wearable/software/assabet.html
-
-
-Building the kernel
--------------------
-
-To build the kernel with current defaults:
-
- make assabet_config
- make oldconfig
- make zImage
-
-The resulting kernel image should be available in linux/arch/arm/boot/zImage.
-
-
-Installing a bootloader
------------------------
-
-A couple of bootloaders able to boot Linux on Assabet are available:
-
-BLOB (http://www.lartmaker.nl/lartware/blob/)
-
- BLOB is a bootloader used within the LART project. Some contributed
- patches were merged into BLOB to add support for Assabet.
-
-Compaq's Bootldr + John Dorsey's patch for Assabet support
-(http://www.handhelds.org/Compaq/bootldr.html)
-(http://www.wearablegroup.org/software/bootldr/)
-
- Bootldr is the bootloader developed by Compaq for the iPAQ Pocket PC.
- John Dorsey has produced add-on patches to add support for Assabet and
- the JFFS filesystem.
-
-RedBoot (http://sources.redhat.com/redboot/)
-
- RedBoot is a bootloader developed by Red Hat based on the eCos RTOS
- hardware abstraction layer. It supports Assabet amongst many other
- hardware platforms.
-
-RedBoot is currently the recommended choice since it's the only one to have
-networking support, and is the most actively maintained.
-
-Brief examples on how to boot Linux with RedBoot are shown below. But first
-you need to have RedBoot installed in your flash memory. A known to work
-precompiled RedBoot binary is available from the following location:
-
-ftp://ftp.netwinder.org/users/n/nico/
-ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
-ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
-
-Look for redboot-assabet*.tgz. Some installation infos are provided in
-redboot-assabet*.txt.
-
-
-Initial RedBoot configuration
------------------------------
-
-The commands used here are explained in The RedBoot User's Guide available
-on-line at http://sources.redhat.com/ecos/docs.html.
-Please refer to it for explanations.
-
-If you have a CF network card (my Assabet kit contained a CF+ LP-E from
-Socket Communications Inc.), you should strongly consider using it for TFTP
-file transfers. You must insert it before RedBoot runs since it can't detect
-it dynamically.
-
-To initialize the flash directory:
-
- fis init -f
-
-To initialize the non-volatile settings, like whether you want to use BOOTP or
-a static IP address, etc, use this command:
-
- fconfig -i
-
-
-Writing a kernel image into flash
----------------------------------
-
-First, the kernel image must be loaded into RAM. If you have the zImage file
-available on a TFTP server:
-
- load zImage -r -b 0x100000
-
-If you rather want to use Y-Modem upload over the serial port:
-
- load -m ymodem -r -b 0x100000
-
-To write it to flash:
-
- fis create "Linux kernel" -b 0x100000 -l 0xc0000
-
-
-Booting the kernel
-------------------
-
-The kernel still requires a filesystem to boot. A ramdisk image can be loaded
-as follows:
-
- load ramdisk_image.gz -r -b 0x800000
-
-Again, Y-Modem upload can be used instead of TFTP by replacing the file name
-by '-y ymodem'.
-
-Now the kernel can be retrieved from flash like this:
-
- fis load "Linux kernel"
-
-or loaded as described previously. To boot the kernel:
-
- exec -b 0x100000 -l 0xc0000
-
-The ramdisk image could be stored into flash as well, but there are better
-solutions for on-flash filesystems as mentioned below.
-
-
-Using JFFS2
------------
-
-Using JFFS2 (the Second Journalling Flash File System) is probably the most
-convenient way to store a writable filesystem into flash. JFFS2 is used in
-conjunction with the MTD layer which is responsible for low-level flash
-management. More information on the Linux MTD can be found on-line at:
-http://www.linux-mtd.infradead.org/. A JFFS howto with some infos about
-creating JFFS/JFFS2 images is available from the same site.
-
-For instance, a sample JFFS2 image can be retrieved from the same FTP sites
-mentioned below for the precompiled RedBoot image.
-
-To load this file:
-
- load sample_img.jffs2 -r -b 0x100000
-
-The result should look like:
-
-RedBoot> load sample_img.jffs2 -r -b 0x100000
-Raw file loaded 0x00100000-0x00377424
-
-Now we must know the size of the unallocated flash:
-
- fis free
-
-Result:
-
-RedBoot> fis free
- 0x500E0000 .. 0x503C0000
-
-The values above may be different depending on the size of the filesystem and
-the type of flash. See their usage below as an example and take care of
-substituting yours appropriately.
-
-We must determine some values:
-
-size of unallocated flash: 0x503c0000 - 0x500e0000 = 0x2e0000
-size of the filesystem image: 0x00377424 - 0x00100000 = 0x277424
-
-We want to fit the filesystem image of course, but we also want to give it all
-the remaining flash space as well. To write it:
-
- fis unlock -f 0x500E0000 -l 0x2e0000
- fis erase -f 0x500E0000 -l 0x2e0000
- fis write -b 0x100000 -l 0x277424 -f 0x500E0000
- fis create "JFFS2" -n -f 0x500E0000 -l 0x2e0000
-
-Now the filesystem is associated to a MTD "partition" once Linux has discovered
-what they are in the boot process. From Redboot, the 'fis list' command
-displays them:
-
-RedBoot> fis list
-Name FLASH addr Mem addr Length Entry point
-RedBoot 0x50000000 0x50000000 0x00020000 0x00000000
-RedBoot config 0x503C0000 0x503C0000 0x00020000 0x00000000
-FIS directory 0x503E0000 0x503E0000 0x00020000 0x00000000
-Linux kernel 0x50020000 0x00100000 0x000C0000 0x00000000
-JFFS2 0x500E0000 0x500E0000 0x002E0000 0x00000000
-
-However Linux should display something like:
-
-SA1100 flash: probing 32-bit flash bus
-SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
-Using RedBoot partition definition
-Creating 5 MTD partitions on "SA1100 flash":
-0x00000000-0x00020000 : "RedBoot"
-0x00020000-0x000e0000 : "Linux kernel"
-0x000e0000-0x003c0000 : "JFFS2"
-0x003c0000-0x003e0000 : "RedBoot config"
-0x003e0000-0x00400000 : "FIS directory"
-
-What's important here is the position of the partition we are interested in,
-which is the third one. Within Linux, this correspond to /dev/mtdblock2.
-Therefore to boot Linux with the kernel and its root filesystem in flash, we
-need this RedBoot command:
-
- fis load "Linux kernel"
- exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
-
-Of course other filesystems than JFFS might be used, like cramfs for example.
-You might want to boot with a root filesystem over NFS, etc. It is also
-possible, and sometimes more convenient, to flash a filesystem directly from
-within Linux while booted from a ramdisk or NFS. The Linux MTD repository has
-many tools to deal with flash memory as well, to erase it for example. JFFS2
-can then be mounted directly on a freshly erased partition and files can be
-copied over directly. Etc...
-
-
-RedBoot scripting
------------------
-
-All the commands above aren't so useful if they have to be typed in every
-time the Assabet is rebooted. Therefore it's possible to automate the boot
-process using RedBoot's scripting capability.
-
-For example, I use this to boot Linux with both the kernel and the ramdisk
-images retrieved from a TFTP server on the network:
-
-RedBoot> fconfig
-Run script at boot: false true
-Boot script:
-Enter script, terminate with empty line
->> load zImage -r -b 0x100000
->> load ramdisk_ks.gz -r -b 0x800000
->> exec -b 0x100000 -l 0xc0000
->>
-Boot script timeout (1000ms resolution): 3
-Use BOOTP for network configuration: true
-GDB connection port: 9000
-Network debug at boot time: false
-Update RedBoot non-volatile configuration - are you sure (y/n)? y
-
-Then, rebooting the Assabet is just a matter of waiting for the login prompt.
-
-
-
-Nicolas Pitre
-nico@fluxnic.net
-June 12, 2001
-
-
-Status of peripherals in -rmk tree (updated 14/10/2001)
--------------------------------------------------------
-
-Assabet:
- Serial ports:
- Radio: TX, RX, CTS, DSR, DCD, RI
- PM: Not tested.
- COM: TX, RX, CTS, DSR, DCD, RTS, DTR, PM
- PM: Not tested.
- I2C: Implemented, not fully tested.
- L3: Fully tested, pass.
- PM: Not tested.
-
- Video:
- LCD: Fully tested. PM
- (LCD doesn't like being blanked with
- neponset connected)
- Video out: Not fully
-
- Audio:
- UDA1341:
- Playback: Fully tested, pass.
- Record: Implemented, not tested.
- PM: Not tested.
-
- UCB1200:
- Audio play: Implemented, not heavily tested.
- Audio rec: Implemented, not heavily tested.
- Telco audio play: Implemented, not heavily tested.
- Telco audio rec: Implemented, not heavily tested.
- POTS control: No
- Touchscreen: Yes
- PM: Not tested.
-
- Other:
- PCMCIA:
- LPE: Fully tested, pass.
- USB: No
- IRDA:
- SIR: Fully tested, pass.
- FIR: Fully tested, pass.
- PM: Not tested.
-
-Neponset:
- Serial ports:
- COM1,2: TX, RX, CTS, DSR, DCD, RTS, DTR
- PM: Not tested.
- USB: Implemented, not heavily tested.
- PCMCIA: Implemented, not heavily tested.
- PM: Not tested.
- CF: Implemented, not heavily tested.
- PM: Not tested.
-
-More stuff can be found in the -np (Nicolas Pitre's) tree.
-
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/SA1100/Brutus
deleted file mode 100644
index 6a3aa95e9bfd..000000000000
--- a/Documentation/arm/SA1100/Brutus
+++ /dev/null
@@ -1,66 +0,0 @@
-Brutus is an evaluation platform for the SA1100 manufactured by Intel.
-For more details, see:
-
-http://developer.intel.com
-
-To compile for Brutus, you must issue the following commands:
-
- make brutus_config
- make config
- [accept all the defaults]
- make zImage
-
-The resulting kernel will end up in linux/arch/arm/boot/zImage. This file
-must be loaded at 0xc0008000 in Brutus's memory and execution started at
-0xc0008000 as well with the value of registers r0 = 0 and r1 = 16 upon
-entry.
-
-But prior to execute the kernel, a ramdisk image must also be loaded in
-memory. Use memory address 0xd8000000 for this. Note that the file
-containing the (compressed) ramdisk image must not exceed 4 MB.
-
-Typically, you'll need angelboot to load the kernel.
-The following angelboot.opt file should be used:
-
------ begin angelboot.opt -----
-base 0xc0008000
-entry 0xc0008000
-r0 0x00000000
-r1 0x00000010
-device /dev/ttyS0
-options "9600 8N1"
-baud 115200
-otherfile ramdisk_img.gz
-otherbase 0xd8000000
------ end angelboot.opt -----
-
-Then load the kernel and ramdisk with:
-
- angelboot -f angelboot.opt zImage
-
-The first Brutus serial port (assumed to be linked to /dev/ttyS0 on your
-host PC) is used by angel to load the kernel and ramdisk image. The serial
-console is provided through the second Brutus serial port. To access it,
-you may use minicom configured with /dev/ttyS1, 9600 baud, 8N1, no flow
-control.
-
-Currently supported:
- - RS232 serial ports
- - audio output
- - LCD screen
- - keyboard
-
-The actual Brutus support may not be complete without extra patches.
-If such patches exist, they should be found from
-ftp.netwinder.org/users/n/nico.
-
-A full PCMCIA support is still missing, although it's possible to hack
-some drivers in order to drive already inserted cards at boot time with
-little modifications.
-
-Any contribution is welcome.
-
-Please send patches to nico@fluxnic.net
-
-Have Fun !
-
diff --git a/Documentation/arm/SA1100/CERF b/Documentation/arm/SA1100/CERF
deleted file mode 100644
index b3d845301ef1..000000000000
--- a/Documentation/arm/SA1100/CERF
+++ /dev/null
@@ -1,29 +0,0 @@
-*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
-
-The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
-that measures approximately 2" square. It includes an Ethernet
-controller, an RS232-compatible serial port, a USB function port, and
-one CompactFlash+ slot on the back. Pictures can be found at the
-Intrinsyc website, http://www.intrinsyc.com.
-
-This document describes the support in the Linux kernel for the
-Intrinsyc CerfBoard.
-
-Supported in this version:
- - CompactFlash+ slot (select PCMCIA in General Setup and any options
- that may be required)
- - Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
- Network Devices)
- - Serial ports with a serial console (hardcoded to 38400 8N1)
-
-In order to get this kernel onto your Cerf, you need a server that runs
-both BOOTP and TFTP. Detailed instructions should have come with your
-evaluation kit on how to use the bootloader. This series of commands
-will suffice:
-
- make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
- make ARCH=arm CROSS_COMPILE=arm-linux- zImage
- make ARCH=arm CROSS_COMPILE=arm-linux- modules
- cp arch/arm/boot/zImage <TFTP directory>
-
-support@intrinsyc.com
diff --git a/Documentation/arm/SA1100/FreeBird b/Documentation/arm/SA1100/FreeBird
deleted file mode 100644
index ab9193663b2b..000000000000
--- a/Documentation/arm/SA1100/FreeBird
+++ /dev/null
@@ -1,21 +0,0 @@
-Freebird-1.1 is produced by Legend(C), Inc.
-http://web.archive.org/web/*/http://www.legend.com.cn
-and software/linux maintained by Coventive(C), Inc.
-(http://www.coventive.com)
-
-Based on the Nicolas's strongarm kernel tree.
-
-===============================================================
-Maintainer:
-
-Chester Kuo <chester@coventive.com>
- <chester@linux.org.tw>
-
-Author :
-Tim wu <timwu@coventive.com>
-CIH <cih@coventive.com>
-Eric Peng <ericpeng@coventive.com>
-Jeff Lee <jeff_lee@coventive.com>
-Allen Cheng
-Tony Liu <tonyliu@coventive.com>
-
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/SA1100/GraphicsClient
deleted file mode 100644
index 867bb35943af..000000000000
--- a/Documentation/arm/SA1100/GraphicsClient
+++ /dev/null
@@ -1,98 +0,0 @@
-ADS GraphicsClient Plus Single Board Computer
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The original Linux support for this product has been provided by
-Nicolas Pitre <nico@fluxnic.net>. Continued development work by
-Woojung Huh <whuh@applieddata.net>
-
-It's currently possible to mount a root filesystem via NFS providing a
-complete Linux environment. Otherwise a ramdisk image may be used. The
-board supports MTD/JFFS, so you could also mount something on there.
-
-Use 'make graphicsclient_config' before any 'make config'. This will set up
-defaults for GraphicsClient Plus support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0200000.
-Also the following registers should have the specified values upon entry:
-
- r0 = 0
- r1 = 29 (this is the GraphicsClient architecture number)
-
-Linux can be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-Angel is not available for the GraphicsClient Plus AFAIK.
-
-There is a board known as just the GraphicsClient that ADS used to
-produce but has end of lifed. This code will not work on the older
-board with the ADS bootloader, but should still work with Angel,
-as outlined below. In any case, if you're planning on deploying
-something en masse, you should probably get the newer board.
-
-If using Angel on the older boards, here is a typical angel.opt option file
-if the kernel is loaded through the Angel Debug Monitor:
-
------ begin angelboot.opt -----
-base 0xc0200000
-entry 0xc0200000
-r0 0x00000000
-r1 0x0000001d
-device /dev/ttyS1
-options "38400 8N1"
-baud 115200
-#otherfile ramdisk.gz
-#otherbase 0xc0800000
-exec minicom
------ end angelboot.opt -----
-
-Then the kernel (and ramdisk if otherfile/otherbase lines above are
-uncommented) would be loaded with:
-
- angelboot -f angelboot.opt zImage
-
-Here it is assumed that the board is connected to ttyS1 on your PC
-and that minicom is preconfigured with /dev/ttyS1, 38400 baud, 8N1, no flow
-control by default.
-
-If any other bootloader is used, ensure it accomplish the same, especially
-for r0/r1 register values before jumping into the kernel.
-
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- on-board SMC 92C96 ethernet NIC
-- SA1100 serial port
-- flash memory access (MTD/JFFS)
-- pcmcia
-- touchscreen(ucb1200)
-- ps/2 keyboard
-- console on LCD screen
-- serial ports (ttyS[0-2])
- - ttyS0 is default for serial console
-- Smart I/O (ADC, keypad, digital inputs, etc)
- See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
- and example user space code. ps/2 keybd is multiplexed through this driver
-
-To do:
-- UCB1200 audio with new ucb_generic layer
-- everything else! :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions. mtd0 is where
- the ADS boot ROM and zImage is stored. It's been marked as
- read-only to keep you from blasting over the bootloader. :) mtd1 is
- for the ramdisk.gz image. mtd2 is user flash space and can be
- utilized for either JFFS or if you're feeling crazy, running ext2
- on top of it. If you're not using the ADS bootloader, you're
- welcome to blast over the mtd1 partition also.
-
-- 16bpp mode requires a different cable than what ships with the board.
- Contact ADS or look through the manual to wire your own. Currently,
- if you compile with 16bit mode support and switch into a lower bpp
- mode, the timing is off so the image is corrupted. This will be
- fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
-
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/SA1100/GraphicsMaster
deleted file mode 100644
index 9145088a0ba2..000000000000
--- a/Documentation/arm/SA1100/GraphicsMaster
+++ /dev/null
@@ -1,53 +0,0 @@
-ADS GraphicsMaster Single Board Computer
-
-For more details, contact Applied Data Systems or see
-http://www.applieddata.net/products.html
-
-The original Linux support for this product has been provided by
-Nicolas Pitre <nico@fluxnic.net>. Continued development work by
-Woojung Huh <whuh@applieddata.net>
-
-Use 'make graphicsmaster_config' before any 'make config'.
-This will set up defaults for GraphicsMaster support.
-
-The kernel zImage is linked to be loaded and executed at 0xc0400000.
-
-Linux can be used with the ADS BootLoader that ships with the
-newer rev boards. See their documentation on how to load Linux.
-
-Supported peripherals:
-- SA1100 LCD frame buffer (8/16bpp...sort of)
-- SA1111 USB Master
-- on-board SMC 92C96 ethernet NIC
-- SA1100 serial port
-- flash memory access (MTD/JFFS)
-- pcmcia, compact flash
-- touchscreen(ucb1200)
-- ps/2 keyboard
-- console on LCD screen
-- serial ports (ttyS[0-2])
- - ttyS0 is default for serial console
-- Smart I/O (ADC, keypad, digital inputs, etc)
- See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
- and example user space code. ps/2 keybd is multiplexed through this driver
-
-To do:
-- everything else! :-)
-
-Notes:
-
-- The flash on board is divided into 3 partitions. mtd0 is where
- the zImage is stored. It's been marked as read-only to keep you
- from blasting over the bootloader. :) mtd1 is
- for the ramdisk.gz image. mtd2 is user flash space and can be
- utilized for either JFFS or if you're feeling crazy, running ext2
- on top of it. If you're not using the ADS bootloader, you're
- welcome to blast over the mtd1 partition also.
-
-- 16bpp mode requires a different cable than what ships with the board.
- Contact ADS or look through the manual to wire your own. Currently,
- if you compile with 16bit mode support and switch into a lower bpp
- mode, the timing is off so the image is corrupted. This will be
- fixed soon.
-
-Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/HUW_WEBPANEL b/Documentation/arm/SA1100/HUW_WEBPANEL
deleted file mode 100644
index fd56b48d4833..000000000000
--- a/Documentation/arm/SA1100/HUW_WEBPANEL
+++ /dev/null
@@ -1,17 +0,0 @@
-The HUW_WEBPANEL is a product of the german company Hoeft & Wessel AG
-
-If you want more information, please visit
-http://www.hoeft-wessel.de
-
-To build the kernel:
- make huw_webpanel_config
- make oldconfig
- [accept all defaults]
- make zImage
-
-Mostly of the work is done by:
-Roman Jordan jor@hoeft-wessel.de
-Christoph Schulz schu@hoeft-wessel.de
-
-2000/12/18/
-
diff --git a/Documentation/arm/SA1100/Itsy b/Documentation/arm/SA1100/Itsy
deleted file mode 100644
index 44b94997fa0d..000000000000
--- a/Documentation/arm/SA1100/Itsy
+++ /dev/null
@@ -1,39 +0,0 @@
-Itsy is a research project done by the Western Research Lab, and Systems
-Research Center in Palo Alto, CA. The Itsy project is one of several
-research projects at Compaq that are related to pocket computing.
-
-For more information, see:
-
- http://www.hpl.hp.com/downloads/crl/itsy/
-
-Notes on initial 2.4 Itsy support (8/27/2000) :
-The port was done on an Itsy version 1.5 machine with a daughtercard with
-64 Meg of DRAM and 32 Meg of Flash. The initial work includes support for
-serial console (to see what you're doing). No other devices have been
-enabled.
-
-To build, do a "make menuconfig" (or xmenuconfig) and select Itsy support.
-Disable Flash and LCD support. and then do a make zImage.
-Finally, you will need to cd to arch/arm/boot/tools and execute a make there
-to build the params-itsy program used to boot the kernel.
-
-In order to install the port of 2.4 to the itsy, You will need to set the
-configuration parameters in the monitor as follows:
-Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
-Make sure the start-routine address is set to 0x00060000.
-
-Next, flash the params-itsy program to 0x00060000 ("p 1 0x00060000" in the
-flash menu) Flash the kernel in arch/arm/boot/zImage into 0x08340000
-("p 1 0x00340000"). Finally flash an initial ramdisk into 0xC8000000
-("p 2 0x0") We used ramdisk-2-30.gz from the 0.11 version directory on
-handhelds.org.
-
-The serial connection we established was at:
- 8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
-params-itsy program, and in the kernel itself. This can be changed, but
-not easily. The monitor parameters are easily changed, the params program
-setup is assembly outl's, and the kernel is a configuration item specific to
-the itsy. (i.e. grep for CONFIG_SA1100_ITSY and you'll find where it is.)
-
-
-This should get you a properly booting 2.4 kernel on the itsy.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
deleted file mode 100644
index 6d412b685598..000000000000
--- a/Documentation/arm/SA1100/LART
+++ /dev/null
@@ -1,14 +0,0 @@
-Linux Advanced Radio Terminal (LART)
-------------------------------------
-
-The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
-applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
-other StrongARM-gadgets. Almost all SA signals are directly accessible
-through a number of connectors. The powersupply accepts voltages
-between 3.5V and 16V and is overdimensioned to support a range of
-daughterboards. A quad Ethernet / IDE / PS2 / sound daughterboard
-is under development, with plenty of others in different stages of
-planning.
-
-The hardware designs for this board have been released under an open license;
-see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arm/SA1100/PLEB b/Documentation/arm/SA1100/PLEB
deleted file mode 100644
index b9c8a631a351..000000000000
--- a/Documentation/arm/SA1100/PLEB
+++ /dev/null
@@ -1,11 +0,0 @@
-The PLEB project was started as a student initiative at the School of
-Computer Science and Engineering, University of New South Wales to make a
-pocket computer capable of running the Linux Kernel.
-
-PLEB support has yet to be fully integrated.
-
-For more information, see:
-
- http://www.cse.unsw.edu.au
-
-
diff --git a/Documentation/arm/SA1100/Pangolin b/Documentation/arm/SA1100/Pangolin
deleted file mode 100644
index 077a6120e129..000000000000
--- a/Documentation/arm/SA1100/Pangolin
+++ /dev/null
@@ -1,23 +0,0 @@
-Pangolin is a StrongARM 1110-based evaluation platform produced
-by Dialogue Technology (http://www.dialogue.com.tw/).
-It has EISA slots for ease of configuration with SDRAM/Flash
-memory card, USB/Serial/Audio card, Compact Flash card,
-PCMCIA/IDE card and TFT-LCD card.
-
-To compile for Pangolin, you must issue the following commands:
-
- make pangolin_config
- make oldconfig
- make zImage
-
-Supported peripherals:
-- SA1110 serial port (UART1/UART2/UART3)
-- flash memory access
-- compact flash driver
-- UDA1341 sound driver
-- SA1100 LCD controller for 800x600 16bpp TFT-LCD
-- MQ-200 driver for 800x600 16bpp TFT-LCD
-- Penmount(touch panel) driver
-- PCMCIA driver
-- SMC91C94 LAN driver
-- IDE driver (experimental)
diff --git a/Documentation/arm/SA1100/Tifon b/Documentation/arm/SA1100/Tifon
deleted file mode 100644
index dd1934d9c851..000000000000
--- a/Documentation/arm/SA1100/Tifon
+++ /dev/null
@@ -1,7 +0,0 @@
-Tifon
------
-
-More info has to come...
-
-Contact: Peter Danielsson <peter.danielsson@era-t.ericsson.se>
-
diff --git a/Documentation/arm/SA1100/Yopy b/Documentation/arm/SA1100/Yopy
deleted file mode 100644
index e14f16d836ac..000000000000
--- a/Documentation/arm/SA1100/Yopy
+++ /dev/null
@@ -1,2 +0,0 @@
-See http://www.yopydeveloper.org for more.
-
diff --git a/Documentation/arm/SA1100/empeg b/Documentation/arm/SA1100/empeg
deleted file mode 100644
index 4ece4849a42c..000000000000
--- a/Documentation/arm/SA1100/empeg
+++ /dev/null
@@ -1,2 +0,0 @@
-See ../empeg/README
-
diff --git a/Documentation/arm/SA1100/nanoEngine b/Documentation/arm/SA1100/nanoEngine
deleted file mode 100644
index 48a7934f95f6..000000000000
--- a/Documentation/arm/SA1100/nanoEngine
+++ /dev/null
@@ -1,11 +0,0 @@
-nanoEngine
-----------
-
-"nanoEngine" is a SA1110 based single board computer from
-Bright Star Engineering Inc. See www.brightstareng.com/arm
-for more info.
-(Ref: Stuart Adams <sja@brightstareng.com>)
-
-Also visit Larry Doolittle's "Linux for the nanoEngine" site:
-http://www.brightstareng.com/arm/nanoeng.htm
-
diff --git a/Documentation/arm/SA1100/serial_UART b/Documentation/arm/SA1100/serial_UART
deleted file mode 100644
index a63966f1d083..000000000000
--- a/Documentation/arm/SA1100/serial_UART
+++ /dev/null
@@ -1,47 +0,0 @@
-The SA1100 serial port had its major/minor numbers officially assigned:
-
-> Date: Sun, 24 Sep 2000 21:40:27 -0700
-> From: H. Peter Anvin <hpa@transmeta.com>
-> To: Nicolas Pitre <nico@CAM.ORG>
-> Cc: Device List Maintainer <device@lanana.org>
-> Subject: Re: device
->
-> Okay. Note that device numbers 204 and 205 are used for "low density
-> serial devices", so you will have a range of minors on those majors (the
-> tty device layer handles this just fine, so you don't have to worry about
-> doing anything special.)
->
-> So your assignments are:
->
-> 204 char Low-density serial ports
-> 5 = /dev/ttySA0 SA1100 builtin serial port 0
-> 6 = /dev/ttySA1 SA1100 builtin serial port 1
-> 7 = /dev/ttySA2 SA1100 builtin serial port 2
->
-> 205 char Low-density serial ports (alternate device)
-> 5 = /dev/cusa0 Callout device for ttySA0
-> 6 = /dev/cusa1 Callout device for ttySA1
-> 7 = /dev/cusa2 Callout device for ttySA2
->
-
-You must create those inodes in /dev on the root filesystem used
-by your SA1100-based device:
-
- mknod ttySA0 c 204 5
- mknod ttySA1 c 204 6
- mknod ttySA2 c 204 7
- mknod cusa0 c 205 5
- mknod cusa1 c 205 6
- mknod cusa2 c 205 7
-
-In addition to the creation of the appropriate device nodes above, you
-must ensure your user space applications make use of the correct device
-name. The classic example is the content of the /etc/inittab file where
-you might have a getty process started on ttyS0. In this case:
-
-- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
-
-- don't forget to add 'ttySA0', 'console', or the appropriate tty name
- in /etc/securetty for root to be allowed to login as well.
-
-
diff --git a/Documentation/arm/SH-Mobile/.gitignore b/Documentation/arm/SH-Mobile/.gitignore
deleted file mode 100644
index c928dbf3cc88..000000000000
--- a/Documentation/arm/SH-Mobile/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vrl4
diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt
deleted file mode 100644
index 1b049be6c84f..000000000000
--- a/Documentation/arm/SPEAr/overview.txt
+++ /dev/null
@@ -1,63 +0,0 @@
- SPEAr ARM Linux Overview
- ==========================
-
-Introduction
-------------
-
- SPEAr (Structured Processor Enhanced Architecture).
- weblink : http://www.st.com/spear
-
- The ST Microelectronics SPEAr range of ARM9/CortexA9 System-on-Chip CPUs are
- supported by the 'spear' platform of ARM Linux. Currently SPEAr1310,
- SPEAr1340, SPEAr300, SPEAr310, SPEAr320 and SPEAr600 SOCs are supported.
-
- Hierarchy in SPEAr is as follows:
-
- SPEAr (Platform)
- - SPEAr3XX (3XX SOC series, based on ARM9)
- - SPEAr300 (SOC)
- - SPEAr300 Evaluation Board
- - SPEAr310 (SOC)
- - SPEAr310 Evaluation Board
- - SPEAr320 (SOC)
- - SPEAr320 Evaluation Board
- - SPEAr6XX (6XX SOC series, based on ARM9)
- - SPEAr600 (SOC)
- - SPEAr600 Evaluation Board
- - SPEAr13XX (13XX SOC series, based on ARM CORTEXA9)
- - SPEAr1310 (SOC)
- - SPEAr1310 Evaluation Board
- - SPEAr1340 (SOC)
- - SPEAr1340 Evaluation Board
-
- Configuration
- -------------
-
- A generic configuration is provided for each machine, and can be used as the
- default by
- make spear13xx_defconfig
- make spear3xx_defconfig
- make spear6xx_defconfig
-
- Layout
- ------
-
- The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
- SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
- with headers in plat/.
-
- Each machine series have a directory with name arch/arm/mach-spear followed by
- series name. Like mach-spear3xx, mach-spear6xx and mach-spear13xx.
-
- Common file for machines of spear3xx family is mach-spear3xx/spear3xx.c, for
- spear6xx is mach-spear6xx/spear6xx.c and for spear13xx family is
- mach-spear13xx/spear13xx.c. mach-spear* also contain soc/machine specific
- files, like spear1310.c, spear1340.c spear300.c, spear310.c, spear320.c and
- spear600.c. mach-spear* doesn't contains board specific files as they fully
- support Flattened Device Tree.
-
-
- Document Author
- ---------------
-
- Viresh Kumar <vireshk@kernel.org>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
deleted file mode 100644
index fa968aa99d67..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ /dev/null
@@ -1,75 +0,0 @@
- S3C24XX CPUfreq support
- =======================
-
-Introduction
-------------
-
- The S3C24XX series support a number of power saving systems, such as
- the ability to change the core, memory and peripheral operating
- frequencies. The core control is exported via the CPUFreq driver
- which has a number of different manual or automatic controls over the
- rate the core is running at.
-
- There are two forms of the driver depending on the specific CPU and
- how the clocks are arranged. The first implementation used as single
- PLL to feed the ARM, memory and peripherals via a series of dividers
- and muxes and this is the implementation that is documented here. A
- newer version where there is a separate PLL and clock divider for the
- ARM core is available as a separate driver.
-
-
-Layout
-------
-
- The code core manages the CPU specific drivers, any data that they
- need to register and the interface to the generic drivers/cpufreq
- system. Each CPU registers a driver to control the PLL, clock dividers
- and anything else associated with it. Any board that wants to use this
- framework needs to supply at least basic details of what is required.
-
- The core registers with drivers/cpufreq at init time if all the data
- necessary has been supplied.
-
-
-CPU support
------------
-
- The support for each CPU depends on the facilities provided by the
- SoC and the driver as each device has different PLL and clock chains
- associated with it.
-
-
-Slow Mode
----------
-
- The SLOW mode where the PLL is turned off altogether and the
- system is fed by the external crystal input is currently not
- supported.
-
-
-sysfs
------
-
- The core code exports extra information via sysfs in the directory
- devices/system/cpu/cpu0/arch-freq.
-
-
-Board Support
--------------
-
- Each board that wants to use the cpufreq code must register some basic
- information with the core driver to provide information about what the
- board requires and any restrictions being placed on it.
-
- The board needs to supply information about whether it needs the IO bank
- timings changing, any maximum frequency limits and information about the
- SDRAM refresh rate.
-
-
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2009 Simtec Electronics
-Licensed under GPLv2
diff --git a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
deleted file mode 100644
index b87292e05f2f..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
+++ /dev/null
@@ -1,58 +0,0 @@
- Simtec Electronics EB2410ITX (BAST)
- ===================================
-
- http://www.simtec.co.uk/products/EB2410ITX/
-
-Introduction
-------------
-
- The EB2410ITX is a S3C2410 based development board with a variety of
- peripherals and expansion connectors. This board is also known by
- the shortened name of Bast.
-
-
-Configuration
--------------
-
- To set the default configuration, use `make bast_defconfig` which
- supports the commonly used features of this board.
-
-
-Support
--------
-
- Official support information can be found on the Simtec Electronics
- website, at the product page http://www.simtec.co.uk/products/EB2410ITX/
-
- Useful links:
-
- - Resources Page http://www.simtec.co.uk/products/EB2410ITX/resources.html
-
- - Board FAQ at http://www.simtec.co.uk/products/EB2410ITX/faq.html
-
- - Bootloader info http://www.simtec.co.uk/products/SWABLE/resources.html
- and FAQ http://www.simtec.co.uk/products/SWABLE/faq.html
-
-
-MTD
----
-
- The NAND and NOR support has been merged from the linux-mtd project.
- Any problems, see http://www.linux-mtd.infradead.org/ for more
- information or up-to-date versions of linux-mtd.
-
-
-IDE
----
-
- Both onboard IDE ports are supported, however there is no support for
- changing speed of devices, PIO Mode 4 capable drives should be used.
-
-
-Maintainers
------------
-
- This board is maintained by Simtec Electronics.
-
-
-Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
deleted file mode 100644
index 0ebd7e2244d0..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ /dev/null
@@ -1,171 +0,0 @@
- S3C24XX GPIO Control
- ====================
-
-Introduction
-------------
-
- The s3c2410 kernel provides an interface to configure and
- manipulate the state of the GPIO pins, and find out other
- information about them.
-
- There are a number of conditions attached to the configuration
- of the s3c2410 GPIO system, please read the Samsung provided
- data-sheet/users manual to find out the complete list.
-
- See Documentation/arm/Samsung/GPIO.txt for the core implementation.
-
-
-GPIOLIB
--------
-
- With the event of the GPIOLIB in drivers/gpio, support for some
- of the GPIO functions such as reading and writing a pin will
- be removed in favour of this common access method.
-
- Once all the extant drivers have been converted, the functions
- listed below will be removed (they may be marked as __deprecated
- in the near future).
-
- The following functions now either have a s3c_ specific variant
- or are merged into gpiolib. See the definitions in
- arch/arm/plat-samsung/include/plat/gpio-cfg.h:
-
- s3c2410_gpio_setpin() gpio_set_value() or gpio_direction_output()
- s3c2410_gpio_getpin() gpio_get_value() or gpio_direction_input()
- s3c2410_gpio_getirq() gpio_to_irq()
- s3c2410_gpio_cfgpin() s3c_gpio_cfgpin()
- s3c2410_gpio_getcfg() s3c_gpio_getcfg()
- s3c2410_gpio_pullup() s3c_gpio_setpull()
-
-
-GPIOLIB conversion
-------------------
-
-If you need to convert your board or driver to use gpiolib from the phased
-out s3c2410 API, then here are some notes on the process.
-
-1) If your board is exclusively using an GPIO, say to control peripheral
- power, then it will require to claim the gpio with gpio_request() before
- it can use it.
-
- It is recommended to check the return value, with at least WARN_ON()
- during initialisation.
-
-2) The s3c2410_gpio_cfgpin() can be directly replaced with s3c_gpio_cfgpin()
- as they have the same arguments, and can either take the pin specific
- values, or the more generic special-function-number arguments.
-
-3) s3c2410_gpio_pullup() changes have the problem that whilst the
- s3c2410_gpio_pullup(x, 1) can be easily translated to the
- s3c_gpio_setpull(x, S3C_GPIO_PULL_NONE), the s3c2410_gpio_pullup(x, 0)
- are not so easy.
-
- The s3c2410_gpio_pullup(x, 0) case enables the pull-up (or in the case
- of some of the devices, a pull-down) and as such the new API distinguishes
- between the UP and DOWN case. There is currently no 'just turn on' setting
- which may be required if this becomes a problem.
-
-4) s3c2410_gpio_setpin() can be replaced by gpio_set_value(), the old call
- does not implicitly configure the relevant gpio to output. The gpio
- direction should be changed before using gpio_set_value().
-
-5) s3c2410_gpio_getpin() is replaceable by gpio_get_value() if the pin
- has been set to input. It is currently unknown what the behaviour is
- when using gpio_get_value() on an output pin (s3c2410_gpio_getpin
- would return the value the pin is supposed to be outputting).
-
-6) s3c2410_gpio_getirq() should be directly replaceable with the
- gpio_to_irq() call.
-
-The s3c2410_gpio and gpio_ calls have always operated on the same gpio
-numberspace, so there is no problem with converting the gpio numbering
-between the calls.
-
-
-Headers
--------
-
- See arch/arm/mach-s3c24xx/include/mach/regs-gpio.h for the list
- of GPIO pins, and the configuration values for them. This
- is included by using #include <mach/regs-gpio.h>
-
-
-PIN Numbers
------------
-
- Each pin has an unique number associated with it in regs-gpio.h,
- e.g. S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
- the GPIO functions which pin is to be used.
-
- With the conversion to gpiolib, there is no longer a direct conversion
- from gpio pin number to register base address as in earlier kernels. This
- is due to the number space required for newer SoCs where the later
- GPIOs are not contiguous.
-
-
-Configuring a pin
------------------
-
- The following function allows the configuration of a given pin to
- be changed.
-
- void s3c_gpio_cfgpin(unsigned int pin, unsigned int function);
-
- e.g.:
-
- s3c_gpio_cfgpin(S3C2410_GPA(0), S3C_GPIO_SFN(1));
- s3c_gpio_cfgpin(S3C2410_GPE(8), S3C_GPIO_SFN(2));
-
- which would turn GPA(0) into the lowest Address line A0, and set
- GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
-
-
-Reading the current configuration
----------------------------------
-
- The current configuration of a pin can be read by using standard
- gpiolib function:
-
- s3c_gpio_getcfg(unsigned int pin);
-
- The return value will be from the same set of values which can be
- passed to s3c_gpio_cfgpin().
-
-
-Configuring a pull-up resistor
-------------------------------
-
- A large proportion of the GPIO pins on the S3C2410 can have weak
- pull-up resistors enabled. This can be configured by the following
- function:
-
- void s3c_gpio_setpull(unsigned int pin, unsigned int to);
-
- Where the to value is S3C_GPIO_PULL_NONE to set the pull-up off,
- and S3C_GPIO_PULL_UP to enable the specified pull-up. Any other
- values are currently undefined.
-
-
-Getting and setting the state of a PIN
---------------------------------------
-
- These calls are now implemented by the relevant gpiolib calls, convert
- your board or driver to use gpiolib.
-
-
-Getting the IRQ number associated with a PIN
---------------------------------------------
-
- A standard gpiolib function can map the given pin number to an IRQ
- number to pass to the IRQ system.
-
- int gpio_to_irq(unsigned int pin);
-
- Note, not all pins have an IRQ.
-
-
-Author
--------
-
-Ben Dooks, 03 October 2004
-Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/H1940.txt b/Documentation/arm/Samsung-S3C24XX/H1940.txt
deleted file mode 100644
index b738859b1fc0..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/H1940.txt
+++ /dev/null
@@ -1,40 +0,0 @@
- HP IPAQ H1940
- =============
-
-http://www.handhelds.org/projects/h1940.html
-
-Introduction
-------------
-
- The HP H1940 is a S3C2410 based handheld device, with
- bluetooth connectivity.
-
-
-Support
--------
-
- A variety of information is available
-
- handhelds.org project page:
-
- http://www.handhelds.org/projects/h1940.html
-
- handhelds.org wiki page:
-
- http://handhelds.org/moin/moin.cgi/HpIpaqH1940
-
- Herbert Pötzl pages:
-
- http://vserver.13thfloor.at/H1940/
-
-
-Maintainers
------------
-
- This project is being maintained and developed by a variety
- of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl.
-
- Thanks to the many others who have also provided support.
-
-
-(c) 2005 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt
deleted file mode 100644
index bc478a3409b8..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/NAND.txt
+++ /dev/null
@@ -1,30 +0,0 @@
- S3C24XX NAND Support
- ====================
-
-Introduction
-------------
-
-Small Page NAND
----------------
-
-The driver uses a 512 byte (1 page) ECC code for this setup. The
-ECC code is not directly compatible with the default kernel ECC
-code, so the driver enforces its own OOB layout and ECC parameters
-
-Large Page NAND
----------------
-
-The driver is capable of handling NAND flash with a 2KiB page
-size, with support for hardware ECC generation and correction.
-
-Unlike the 512byte page mode, the driver generates ECC data for
-each 256 byte block in an 2KiB page. This means that more than
-one error in a page can be rectified. It also means that the
-OOB layout remains the default kernel layout for these flashes.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2007 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
deleted file mode 100644
index 359587b2367b..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ /dev/null
@@ -1,318 +0,0 @@
- S3C24XX ARM Linux Overview
- ==========================
-
-
-
-Introduction
-------------
-
- The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
- by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
- S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
- are supported.
-
- Support for the S3C2400 and S3C24A0 series was never completed and the
- corresponding code has been removed after a while. If someone wishes to
- revive this effort, partial support can be retrieved from earlier Linux
- versions.
-
- The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
- included under the arch/arm/mach-s3c2416 directory. Note, whilst core
- support for these SoCs is in, work on some of the extra peripherals
- and extra interrupts is still ongoing.
-
-
-Configuration
--------------
-
- A generic S3C2410 configuration is provided, and can be used as the
- default by `make s3c2410_defconfig`. This configuration has support
- for all the machines, and the commonly used features on them.
-
- Certain machines may have their own default configurations as well,
- please check the machine specific documentation.
-
-
-Layout
-------
-
- The core support files are located in the platform code contained in
- arch/arm/plat-s3c24xx with headers in include/asm-arm/plat-s3c24xx.
- This directory should be kept to items shared between the platform
- code (arch/arm/plat-s3c24xx) and the arch/arm/mach-s3c24* code.
-
- Each cpu has a directory with the support files for it, and the
- machines that carry the device. For example S3C2410 is contained
- in arch/arm/mach-s3c2410 and S3C2440 in arch/arm/mach-s3c2440
-
- Register, kernel and platform data definitions are held in the
- arch/arm/mach-s3c2410 directory./include/mach
-
-arch/arm/plat-s3c24xx:
-
- Files in here are either common to all the s3c24xx family,
- or are common to only some of them with names to indicate this
- status. The files that are not common to all are generally named
- with the initial cpu they support in the series to ensure a short
- name without any possibility of confusion with newer devices.
-
- As an example, initially s3c244x would cover s3c2440 and s3c2442, but
- with the s3c2443 which does not share many of the same drivers in
- this directory, the name becomes invalid. We stick to s3c2440-<x>
- to indicate a driver that is s3c2440 and s3c2442 compatible.
-
- This does mean that to find the status of any given SoC, a number
- of directories may need to be searched.
-
-
-Machines
---------
-
- The currently supported machines are as follows:
-
- Simtec Electronics EB2410ITX (BAST)
-
- A general purpose development board, see EB2410ITX.txt for further
- details
-
- Simtec Electronics IM2440D20 (Osiris)
-
- CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
- and a PCMCIA controller.
-
- Samsung SMDK2410
-
- Samsung's own development board, geared for PDA work.
-
- Samsung/Aiji SMDK2412
-
- The S3C2412 version of the SMDK2440.
-
- Samsung/Aiji SMDK2413
-
- The S3C2412 version of the SMDK2440.
-
- Samsung/Meritech SMDK2440
-
- The S3C2440 compatible version of the SMDK2440, which has the
- option of an S3C2440 or S3C2442 CPU module.
-
- Thorcom VR1000
-
- Custom embedded board
-
- HP IPAQ 1940
-
- Handheld (IPAQ), available in several varieties
-
- HP iPAQ rx3715
-
- S3C2440 based IPAQ, with a number of variations depending on
- features shipped.
-
- Acer N30
-
- A S3C2410 based PDA from Acer. There is a Wiki page at
- http://handhelds.org/moin/moin.cgi/AcerN30Documentation .
-
- AML M5900
-
- American Microsystems' M5900
-
- Nex Vision Nexcoder
- Nex Vision Otom
-
- Two machines by Nex Vision
-
-
-Adding New Machines
--------------------
-
- The architecture has been designed to support as many machines as can
- be configured for it in one kernel build, and any future additions
- should keep this in mind before altering items outside of their own
- machine files.
-
- Machine definitions should be kept in linux/arch/arm/mach-s3c2410,
- and there are a number of examples that can be looked at.
-
- Read the kernel patch submission policies as well as the
- Documentation/arm directory before submitting patches. The
- ARM kernel series is managed by Russell King, and has a patch system
- located at http://www.arm.linux.org.uk/developer/patches/
- as well as mailing lists that can be found from the same site.
-
- As a courtesy, please notify <ben-linux@fluff.org> of any new
- machines or other modifications.
-
- Any large scale modifications, or new drivers should be discussed
- on the ARM kernel mailing list (linux-arm-kernel) before being
- attempted. See http://www.arm.linux.org.uk/mailinglists/ for the
- mailing list information.
-
-
-I2C
----
-
- The hardware I2C core in the CPU is supported in single master
- mode, and can be configured via platform data.
-
-
-RTC
----
-
- Support for the onboard RTC unit, including alarm function.
-
- This has recently been upgraded to use the new RTC core,
- and the module has been renamed to rtc-s3c to fit in with
- the new rtc naming scheme.
-
-
-Watchdog
---------
-
- The onchip watchdog is available via the standard watchdog
- interface.
-
-
-NAND
-----
-
- The current kernels now have support for the s3c2410 NAND
- controller. If there are any problems the latest linux-mtd
- code can be found from http://www.linux-mtd.infradead.org/
-
- For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt
-
-
-SD/MMC
-------
-
- The SD/MMC hardware pre S3C2443 is supported in the current
- kernel, the driver is drivers/mmc/host/s3cmci.c and supports
- 1 and 4 bit SD or MMC cards.
-
- The SDIO behaviour of this driver has not been fully tested. There is no
- current support for hardware SDIO interrupts.
-
-
-Serial
-------
-
- The s3c2410 serial driver provides support for the internal
- serial ports. These devices appear as /dev/ttySAC0 through 3.
-
- To create device nodes for these, use the following commands
-
- mknod ttySAC0 c 204 64
- mknod ttySAC1 c 204 65
- mknod ttySAC2 c 204 66
-
-
-GPIO
-----
-
- The core contains support for manipulating the GPIO, see the
- documentation in GPIO.txt in the same directory as this file.
-
- Newer kernels carry GPIOLIB, and support is being moved towards
- this with some of the older support in line to be removed.
-
- As of v2.6.34, the move towards using gpiolib support is almost
- complete, and very little of the old calls are left.
-
- See Documentation/arm/Samsung-S3C24XX/GPIO.txt for the S3C24XX specific
- support and Documentation/arm/Samsung/GPIO.txt for the core Samsung
- implementation.
-
-
-Clock Management
-----------------
-
- The core provides the interface defined in the header file
- include/asm-arm/hardware/clock.h, to allow control over the
- various clock units
-
-
-Suspend to RAM
---------------
-
- For boards that provide support for suspend to RAM, the
- system can be placed into low power suspend.
-
- See Suspend.txt for more information.
-
-
-SPI
----
-
- SPI drivers are available for both the in-built hardware
- (although there is no DMA support yet) and a generic
- GPIO based solution.
-
-
-LEDs
-----
-
- There is support for GPIO based LEDs via a platform driver
- in the LED subsystem.
-
-
-Platform Data
--------------
-
- Whenever a device has platform specific data that is specified
- on a per-machine basis, care should be taken to ensure the
- following:
-
- 1) that default data is not left in the device to confuse the
- driver if a machine does not set it at startup
-
- 2) the data should (if possible) be marked as __initdata,
- to ensure that the data is thrown away if the machine is
- not the one currently in use.
-
- The best way of doing this is to make a function that
- kmalloc()s an area of memory, and copies the __initdata
- and then sets the relevant device's platform data. Making
- the function `__init` takes care of ensuring it is discarded
- with the rest of the initialisation code
-
- static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
- {
- struct s3c2410_xxx_mach_info *npd;
-
- npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
- if (npd) {
- memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
- s3c_device_xxx.dev.platform_data = npd;
- } else {
- printk(KERN_ERR "no memory for xxx platform data\n");
- }
- }
-
- Note, since the code is marked as __init, it should not be
- exported outside arch/arm/mach-s3c2410/, or exported to
- modules via EXPORT_SYMBOL() and related functions.
-
-
-Port Contributors
------------------
-
- Ben Dooks (BJD)
- Vincent Sanders
- Herbert Potzl
- Arnaud Patard (RTP)
- Roc Wu
- Klaus Fetscher
- Dimitry Andric
- Shannon Holland
- Guillaume Gourat (NexVision)
- Christer Weinigel (wingel) (Acer N30)
- Lucas Correia Villa Real (S3C2400 port)
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2004-2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
deleted file mode 100644
index dc1fd362d3c1..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
+++ /dev/null
@@ -1,120 +0,0 @@
- S3C2412 ARM Linux Overview
- ==========================
-
-Introduction
-------------
-
- The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
- from Samsung. This part has an ARM926-EJS core, capable of running up
- to 266MHz (see data-sheet for more information)
-
-
-Clock
------
-
- The core clock code provides a set of clocks to the drivers, and allows
- for source selection and a number of other features.
-
-
-Power
------
-
- No support for suspend/resume to RAM in the current system.
-
-
-DMA
----
-
- No current support for DMA.
-
-
-GPIO
-----
-
- There is support for setting the GPIO to input/output/special function
- and reading or writing to them.
-
-
-UART
-----
-
- The UART hardware is similar to the S3C2440, and is supported by the
- s3c2410 driver in the drivers/serial directory.
-
-
-NAND
-----
-
- The NAND hardware is similar to the S3C2440, and is supported by the
- s3c2410 driver in the drivers/mtd/nand/raw directory.
-
-
-USB Host
---------
-
- The USB hardware is similar to the S3C2410, with extended clock source
- control. The OHCI portion is supported by the ohci-s3c2410 driver, and
- the clock control selection is supported by the core clock code.
-
-
-USB Device
-----------
-
- No current support in the kernel
-
-
-IRQs
-----
-
- All the standard, and external interrupt sources are supported. The
- extra sub-sources are not yet supported.
-
-
-RTC
----
-
- The RTC hardware is similar to the S3C2410, and is supported by the
- s3c2410-rtc driver.
-
-
-Watchdog
---------
-
- The watchdog hardware is the same as the S3C2410, and is supported by
- the s3c2410_wdt driver.
-
-
-MMC/SD/SDIO
------------
-
- No current support for the MMC/SD/SDIO block.
-
-IIC
----
-
- The IIC hardware is the same as the S3C2410, and is supported by the
- i2c-s3c24xx driver.
-
-
-IIS
----
-
- No current support for the IIS interface.
-
-
-SPI
----
-
- No current support for the SPI interfaces.
-
-
-ATA
----
-
- No current support for the on-board ATA block.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
deleted file mode 100644
index 909bdc7dd7b5..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
+++ /dev/null
@@ -1,21 +0,0 @@
- S3C2413 ARM Linux Overview
- ==========================
-
-Introduction
-------------
-
- The S3C2413 is an extended version of the S3C2412, with an camera
- interface and mobile DDR memory support. See the S3C2412 support
- documentation for more information.
-
-
-Camera Interface
----------------
-
- This block is currently not supported.
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt b/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
deleted file mode 100644
index 429390bd4684..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
+++ /dev/null
@@ -1,56 +0,0 @@
- Samsung/Meritech SMDK2440
- =========================
-
-Introduction
-------------
-
- The SMDK2440 is a two part evaluation board for the Samsung S3C2440
- processor. It includes support for LCD, SmartMedia, Audio, SD and
- 10MBit Ethernet, and expansion headers for various signals, including
- the camera and unused GPIO.
-
-
-Configuration
--------------
-
- To set the default configuration, use `make smdk2440_defconfig` which
- will configure the common features of this board, or use
- `make s3c2410_config` to include support for all s3c2410/s3c2440 machines
-
-
-Support
--------
-
- Ben Dooks' SMDK2440 site at http://www.fluff.org/ben/smdk2440/ which
- includes linux based USB download tools.
-
- Some of the h1940 patches that can be found from the H1940 project
- site at http://www.handhelds.org/projects/h1940.html can also be
- applied to this board.
-
-
-Peripherals
------------
-
- There is no current support for any of the extra peripherals on the
- base-board itself.
-
-
-MTD
----
-
- The NAND flash should be supported by the in kernel MTD NAND support,
- NOR flash will be added later.
-
-
-Maintainers
------------
-
- This board is being maintained by Ben Dooks, for more info, see
- http://www.fluff.org/ben/smdk2440/
-
- Many thanks to Dimitry Andric of TomTom for the loan of the SMDK2440,
- and to Simtec Electronics for allowing me time to work on this.
-
-
-(c) 2004 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
deleted file mode 100644
index 1ca63b3e5635..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt
+++ /dev/null
@@ -1,137 +0,0 @@
- S3C24XX Suspend Support
- =======================
-
-
-Introduction
-------------
-
- The S3C24XX supports a low-power suspend mode, where the SDRAM is kept
- in Self-Refresh mode, and all but the essential peripheral blocks are
- powered down. For more information on how this works, please look
- at the relevant CPU datasheet from Samsung.
-
-
-Requirements
-------------
-
- 1) A bootloader that can support the necessary resume operation
-
- 2) Support for at least 1 source for resume
-
- 3) CONFIG_PM enabled in the kernel
-
- 4) Any peripherals that are going to be powered down at the same
- time require suspend/resume support.
-
-
-Resuming
---------
-
- The S3C2410 user manual defines the process of sending the CPU to
- sleep and how it resumes. The default behaviour of the Linux code
- is to set the GSTATUS3 register to the physical address of the
- code to resume Linux operation.
-
- GSTATUS4 is currently left alone by the sleep code, and is free to
- use for any other purposes (for example, the EB2410ITX uses this to
- save memory configuration in).
-
-
-Machine Support
----------------
-
- The machine specific functions must call the s3c_pm_init() function
- to say that its bootloader is capable of resuming. This can be as
- simple as adding the following to the machine's definition:
-
- INITMACHINE(s3c_pm_init)
-
- A board can do its own setup before calling s3c_pm_init, if it
- needs to setup anything else for power management support.
-
- There is currently no support for over-riding the default method of
- saving the resume address, if your board requires it, then contact
- the maintainer and discuss what is required.
-
- Note, the original method of adding an late_initcall() is wrong,
- and will end up initialising all compiled machines' pm init!
-
- The following is an example of code used for testing wakeup from
- an falling edge on IRQ_EINT0:
-
-
-static irqreturn_t button_irq(int irq, void *pw)
-{
- return IRQ_HANDLED;
-}
-
-statuc void __init machine_init(void)
-{
- ...
-
- request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
- "button-irq-eint0", NULL);
-
- enable_irq_wake(IRQ_EINT0);
-
- s3c_pm_init();
-}
-
-
-Debugging
----------
-
- There are several important things to remember when using PM suspend:
-
- 1) The uart drivers will disable the clocks to the UART blocks when
- suspending, which means that use of printascii() or similar direct
- access to the UARTs will cause the debug to stop.
-
- 2) Whilst the pm code itself will attempt to re-enable the UART clocks,
- care should be taken that any external clock sources that the UARTs
- rely on are still enabled at that point.
-
- 3) If any debugging is placed in the resume path, then it must have the
- relevant clocks and peripherals setup before use (ie, bootloader).
-
- For example, if you transmit a character from the UART, the baud
- rate and uart controls must be setup beforehand.
-
-
-Configuration
--------------
-
- The S3C2410 specific configuration in `System Type` defines various
- aspects of how the S3C2410 suspend and resume support is configured
-
- `S3C2410 PM Suspend debug`
-
- This option prints messages to the serial console before and after
- the actual suspend, giving detailed information on what is
- happening
-
-
- `S3C2410 PM Suspend Memory CRC`
-
- Allows the entire memory to be checksummed before and after the
- suspend to see if there has been any corruption of the contents.
-
- Note, the time to calculate the CRC is dependent on the CPU speed
- and the size of memory. For an 64Mbyte RAM area on an 200MHz
- S3C2410, this can take approximately 4 seconds to complete.
-
- This support requires the CRC32 function to be enabled.
-
-
- `S3C2410 PM Suspend CRC Chunksize (KiB)`
-
- Defines the size of memory each CRC chunk covers. A smaller value
- will mean that the CRC data block will take more memory, but will
- identify any faults with better precision
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2004 Simtec Electronics
-
diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
deleted file mode 100644
index f82b1faefad5..000000000000
--- a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
+++ /dev/null
@@ -1,93 +0,0 @@
- S3C24XX USB Host support
- ========================
-
-
-
-Introduction
-------------
-
- This document details the S3C2410/S3C2440 in-built OHCI USB host support.
-
-Configuration
--------------
-
- Enable at least the following kernel options:
-
- menuconfig:
-
- Device Drivers --->
- USB support --->
- <*> Support for Host-side USB
- <*> OHCI HCD support
-
-
- .config:
- CONFIG_USB
- CONFIG_USB_OHCI_HCD
-
-
- Once these options are configured, the standard set of USB device
- drivers can be configured and used.
-
-
-Board Support
--------------
-
- The driver attaches to a platform device, which will need to be
- added by the board specific support file in linux/arch/arm/mach-s3c2410,
- such as mach-bast.c or mach-smdk2410.c
-
- The platform device's platform_data field is only needed if the
- board implements extra power control or over-current monitoring.
-
- The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
- register, so if both ports are to be used for the host, then it is
- the board support file's responsibility to ensure that the second
- port is configured to be connected to the OHCI core.
-
-
-Platform Data
--------------
-
- See arch/arm/mach-s3c2410/include/mach/usb-control.h for the
- descriptions of the platform device data. An implementation
- can be found in linux/arch/arm/mach-s3c2410/usb-simtec.c .
-
- The `struct s3c2410_hcd_info` contains a pair of functions
- that get called to enable over-current detection, and to
- control the port power status.
-
- The ports are numbered 0 and 1.
-
- power_control:
-
- Called to enable or disable the power on the port.
-
- enable_oc:
-
- Called to enable or disable the over-current monitoring.
- This should claim or release the resources being used to
- check the power condition on the port, such as an IRQ.
-
- report_oc:
-
- The OHCI driver fills this field in for the over-current code
- to call when there is a change to the over-current state on
- an port. The ports argument is a bitmask of 1 bit per port,
- with bit X being 1 for an over-current on port X.
-
- The function s3c2410_usb_report_oc() has been provided to
- ensure this is called correctly.
-
- port[x]:
-
- This is struct describes each port, 0 or 1. The platform driver
- should set the flags field of each port to S3C_HCDFLG_USED if
- the port is enabled.
-
-
-
-Document Author
----------------
-
-Ben Dooks, Copyright 2005 Simtec Electronics
diff --git a/Documentation/arm/Samsung/Bootloader-interface.txt b/Documentation/arm/Samsung/Bootloader-interface.txt
deleted file mode 100644
index d17ed518a7ea..000000000000
--- a/Documentation/arm/Samsung/Bootloader-interface.txt
+++ /dev/null
@@ -1,68 +0,0 @@
- Interface between kernel and boot loaders on Exynos boards
- ==========================================================
-
-Author: Krzysztof Kozlowski
-Date : 6 June 2015
-
-The document tries to describe currently used interface between Linux kernel
-and boot loaders on Samsung Exynos based boards. This is not a definition
-of interface but rather a description of existing state, a reference
-for information purpose only.
-
-In the document "boot loader" means any of following: U-boot, proprietary
-SBOOT or any other firmware for ARMv7 and ARMv8 initializing the board before
-executing kernel.
-
-
-1. Non-Secure mode
-
-Address: sysram_ns_base_addr
-Offset Value Purpose
-=============================================================================
-0x08 exynos_cpu_resume_ns, mcpm_entry_point System suspend
-0x0c 0x00000bad (Magic cookie) System suspend
-0x1c exynos4_secondary_startup Secondary CPU boot
-0x1c + 4*cpu exynos4_secondary_startup (Exynos4412) Secondary CPU boot
-0x20 0xfcba0d10 (Magic cookie) AFTR
-0x24 exynos_cpu_resume_ns AFTR
-0x28 + 4*cpu 0x8 (Magic cookie, Exynos3250) AFTR
-0x28 0x0 or last value during resume (Exynos542x) System suspend
-
-
-2. Secure mode
-
-Address: sysram_base_addr
-Offset Value Purpose
-=============================================================================
-0x00 exynos4_secondary_startup Secondary CPU boot
-0x04 exynos4_secondary_startup (Exynos542x) Secondary CPU boot
-4*cpu exynos4_secondary_startup (Exynos4412) Secondary CPU boot
-0x20 exynos_cpu_resume (Exynos4210 r1.0) AFTR
-0x24 0xfcba0d10 (Magic cookie, Exynos4210 r1.0) AFTR
-
-Address: pmu_base_addr
-Offset Value Purpose
-=============================================================================
-0x0800 exynos_cpu_resume AFTR, suspend
-0x0800 mcpm_entry_point (Exynos542x with MCPM) AFTR, suspend
-0x0804 0xfcba0d10 (Magic cookie) AFTR
-0x0804 0x00000bad (Magic cookie) System suspend
-0x0814 exynos4_secondary_startup (Exynos4210 r1.1) Secondary CPU boot
-0x0818 0xfcba0d10 (Magic cookie, Exynos4210 r1.1) AFTR
-0x081C exynos_cpu_resume (Exynos4210 r1.1) AFTR
-
-
-3. Other (regardless of secure/non-secure mode)
-
-Address: pmu_base_addr
-Offset Value Purpose
-=============================================================================
-0x0908 Non-zero Secondary CPU boot up indicator
- on Exynos3250 and Exynos542x
-
-
-4. Glossary
-
-AFTR - ARM Off Top Running, a low power mode, Cortex cores and many other
-modules are power gated, except the TOP modules
-MCPM - Multi-Cluster Power Management
diff --git a/Documentation/arm/Samsung/GPIO.txt b/Documentation/arm/Samsung/GPIO.txt
deleted file mode 100644
index 795adfd88081..000000000000
--- a/Documentation/arm/Samsung/GPIO.txt
+++ /dev/null
@@ -1,40 +0,0 @@
- Samsung GPIO implementation
- ===========================
-
-Introduction
-------------
-
-This outlines the Samsung GPIO implementation and the architecture
-specific calls provided alongside the drivers/gpio core.
-
-
-S3C24XX (Legacy)
-----------------
-
-See Documentation/arm/Samsung-S3C24XX/GPIO.txt for more information
-about these devices. Their implementation has been brought into line
-with the core samsung implementation described in this document.
-
-
-GPIOLIB integration
--------------------
-
-The gpio implementation uses gpiolib as much as possible, only providing
-specific calls for the items that require Samsung specific handling, such
-as pin special-function or pull resistor control.
-
-GPIO numbering is synchronised between the Samsung and gpiolib system.
-
-
-PIN configuration
------------------
-
-Pin configuration is specific to the Samsung architecture, with each SoC
-registering the necessary information for the core gpio configuration
-implementation to configure pins as necessary.
-
-The s3c_gpio_cfgpin() and s3c_gpio_setpull() provide the means for a
-driver or machine to change gpio configuration.
-
-See arch/arm/plat-samsung/include/plat/gpio-cfg.h for more information
-on these functions.
diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/Samsung/Overview.txt
deleted file mode 100644
index 8f7309bad460..000000000000
--- a/Documentation/arm/Samsung/Overview.txt
+++ /dev/null
@@ -1,86 +0,0 @@
- Samsung ARM Linux Overview
- ==========================
-
-Introduction
-------------
-
- The Samsung range of ARM SoCs spans many similar devices, from the initial
- ARM9 through to the newest ARM cores. This document shows an overview of
- the current kernel support, how to use it and where to find the code
- that supports this.
-
- The currently supported SoCs are:
-
- - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
- - S3C64XX: S3C6400 and S3C6410
- - S5PC110 / S5PV210
-
-
-S3C24XX Systems
----------------
-
- There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
- deals with the architecture and drivers specific to these devices.
-
- See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
- on the implementation details and specific support.
-
-
-Configuration
--------------
-
- A number of configurations are supplied, as there is no current way of
- unifying all the SoCs into one kernel.
-
- s5pc110_defconfig - S5PC110 specific default configuration
- s5pv210_defconfig - S5PV210 specific default configuration
-
-
-Layout
-------
-
- The directory layout is currently being restructured, and consists of
- several platform directories and then the machine specific directories
- of the CPUs being built for.
-
- plat-samsung provides the base for all the implementations, and is the
- last in the line of include directories that are processed for the build
- specific information. It contains the base clock, GPIO and device definitions
- to get the system running.
-
- plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
-
- plat-s5p is for s5p specific builds, and contains common support for the
- S5P specific systems. Not all S5Ps use all the features in this directory
- due to differences in the hardware.
-
-
-Layout changes
---------------
-
- The old plat-s3c and plat-s5pc1xx directories have been removed, with
- support moved to either plat-samsung or plat-s5p as necessary. These moves
- where to simplify the include and dependency issues involved with having
- so many different platform directories.
-
-
-Port Contributors
------------------
-
- Ben Dooks (BJD)
- Vincent Sanders
- Herbert Potzl
- Arnaud Patard (RTP)
- Roc Wu
- Klaus Fetscher
- Dimitry Andric
- Shannon Holland
- Guillaume Gourat (NexVision)
- Christer Weinigel (wingel) (Acer N30)
- Lucas Correia Villa Real (S3C2400 port)
-
-
-Document Author
----------------
-
-Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
diff --git a/Documentation/arm/Setup b/Documentation/arm/Setup
deleted file mode 100644
index 0cb1e64bde80..000000000000
--- a/Documentation/arm/Setup
+++ /dev/null
@@ -1,129 +0,0 @@
-Kernel initialisation parameters on ARM Linux
----------------------------------------------
-
-The following document describes the kernel initialisation parameter
-structure, otherwise known as 'struct param_struct' which is used
-for most ARM Linux architectures.
-
-This structure is used to pass initialisation parameters from the
-kernel loader to the Linux kernel proper, and may be short lived
-through the kernel initialisation process. As a general rule, it
-should not be referenced outside of arch/arm/kernel/setup.c:setup_arch().
-
-There are a lot of parameters listed in there, and they are described
-below:
-
- page_size
-
- This parameter must be set to the page size of the machine, and
- will be checked by the kernel.
-
- nr_pages
-
- This is the total number of pages of memory in the system. If
- the memory is banked, then this should contain the total number
- of pages in the system.
-
- If the system contains separate VRAM, this value should not
- include this information.
-
- ramdisk_size
-
- This is now obsolete, and should not be used.
-
- flags
-
- Various kernel flags, including:
- bit 0 - 1 = mount root read only
- bit 1 - unused
- bit 2 - 0 = load ramdisk
- bit 3 - 0 = prompt for ramdisk
-
- rootdev
-
- major/minor number pair of device to mount as the root filesystem.
-
- video_num_cols
- video_num_rows
-
- These two together describe the character size of the dummy console,
- or VGA console character size. They should not be used for any other
- purpose.
-
- It's generally a good idea to set these to be either standard VGA, or
- the equivalent character size of your fbcon display. This then allows
- all the bootup messages to be displayed correctly.
-
- video_x
- video_y
-
- This describes the character position of cursor on VGA console, and
- is otherwise unused. (should not be used for other console types, and
- should not be used for other purposes).
-
- memc_control_reg
-
- MEMC chip control register for Acorn Archimedes and Acorn A5000
- based machines. May be used differently by different architectures.
-
- sounddefault
-
- Default sound setting on Acorn machines. May be used differently by
- different architectures.
-
- adfsdrives
-
- Number of ADFS/MFM disks. May be used differently by different
- architectures.
-
- bytes_per_char_h
- bytes_per_char_v
-
- These are now obsolete, and should not be used.
-
- pages_in_bank[4]
-
- Number of pages in each bank of the systems memory (used for RiscPC).
- This is intended to be used on systems where the physical memory
- is non-contiguous from the processors point of view.
-
- pages_in_vram
-
- Number of pages in VRAM (used on Acorn RiscPC). This value may also
- be used by loaders if the size of the video RAM can't be obtained
- from the hardware.
-
- initrd_start
- initrd_size
-
- This describes the kernel virtual start address and size of the
- initial ramdisk.
-
- rd_start
-
- Start address in sectors of the ramdisk image on a floppy disk.
-
- system_rev
-
- system revision number.
-
- system_serial_low
- system_serial_high
-
- system 64-bit serial number
-
- mem_fclk_21285
-
- The speed of the external oscillator to the 21285 (footbridge),
- which control's the speed of the memory bus, timer & serial port.
- Depending upon the speed of the cpu its value can be between
- 0-66 MHz. If no params are passed or a value of zero is passed,
- then a value of 50 Mhz is the default on 21285 architectures.
-
- paths[8][128]
-
- These are now obsolete, and should not be used.
-
- commandline
-
- Kernel command line parameters. Details can be found elsewhere.
diff --git a/Documentation/arm/VFP/release-notes.txt b/Documentation/arm/VFP/release-notes.txt
deleted file mode 100644
index 28a2795705ca..000000000000
--- a/Documentation/arm/VFP/release-notes.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Release notes for Linux Kernel VFP support code
------------------------------------------------
-
-Date: 20 May 2004
-Author: Russell King
-
-This is the first release of the Linux Kernel VFP support code. It
-provides support for the exceptions bounced from VFP hardware found
-on ARM926EJ-S.
-
-This release has been validated against the SoftFloat-2b library by
-John R. Hauser using the TestFloat-2a test suite. Details of this
-library and test suite can be found at:
-
- http://www.jhauser.us/arithmetic/SoftFloat.html
-
-The operations which have been tested with this package are:
-
- - fdiv
- - fsub
- - fadd
- - fmul
- - fcmp
- - fcmpe
- - fcvtd
- - fcvts
- - fsito
- - ftosi
- - fsqrt
-
-All the above pass softfloat tests with the following exceptions:
-
-- fadd/fsub shows some differences in the handling of +0 / -0 results
- when input operands differ in signs.
-- the handling of underflow exceptions is slightly different. If a
- result underflows before rounding, but becomes a normalised number
- after rounding, we do not signal an underflow exception.
-
-Other operations which have been tested by basic assembly-only tests
-are:
-
- - fcpy
- - fabs
- - fneg
- - ftoui
- - ftosiz
- - ftouiz
-
-The combination operations have not been tested:
-
- - fmac
- - fnmac
- - fmsc
- - fnmsc
- - fnmul
diff --git a/Documentation/arm/cluster-pm-race-avoidance.txt b/Documentation/arm/cluster-pm-race-avoidance.txt
deleted file mode 100644
index 750b6fc24af9..000000000000
--- a/Documentation/arm/cluster-pm-race-avoidance.txt
+++ /dev/null
@@ -1,498 +0,0 @@
-Cluster-wide Power-up/power-down race avoidance algorithm
-=========================================================
-
-This file documents the algorithm which is used to coordinate CPU and
-cluster setup and teardown operations and to manage hardware coherency
-controls safely.
-
-The section "Rationale" explains what the algorithm is for and why it is
-needed. "Basic model" explains general concepts using a simplified view
-of the system. The other sections explain the actual details of the
-algorithm in use.
-
-
-Rationale
----------
-
-In a system containing multiple CPUs, it is desirable to have the
-ability to turn off individual CPUs when the system is idle, reducing
-power consumption and thermal dissipation.
-
-In a system containing multiple clusters of CPUs, it is also desirable
-to have the ability to turn off entire clusters.
-
-Turning entire clusters off and on is a risky business, because it
-involves performing potentially destructive operations affecting a group
-of independently running CPUs, while the OS continues to run. This
-means that we need some coordination in order to ensure that critical
-cluster-level operations are only performed when it is truly safe to do
-so.
-
-Simple locking may not be sufficient to solve this problem, because
-mechanisms like Linux spinlocks may rely on coherency mechanisms which
-are not immediately enabled when a cluster powers up. Since enabling or
-disabling those mechanisms may itself be a non-atomic operation (such as
-writing some hardware registers and invalidating large caches), other
-methods of coordination are required in order to guarantee safe
-power-down and power-up at the cluster level.
-
-The mechanism presented in this document describes a coherent memory
-based protocol for performing the needed coordination. It aims to be as
-lightweight as possible, while providing the required safety properties.
-
-
-Basic model
------------
-
-Each cluster and CPU is assigned a state, as follows:
-
- DOWN
- COMING_UP
- UP
- GOING_DOWN
-
- +---------> UP ----------+
- | v
-
- COMING_UP GOING_DOWN
-
- ^ |
- +--------- DOWN <--------+
-
-
-DOWN: The CPU or cluster is not coherent, and is either powered off or
- suspended, or is ready to be powered off or suspended.
-
-COMING_UP: The CPU or cluster has committed to moving to the UP state.
- It may be part way through the process of initialisation and
- enabling coherency.
-
-UP: The CPU or cluster is active and coherent at the hardware
- level. A CPU in this state is not necessarily being used
- actively by the kernel.
-
-GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
- state. It may be part way through the process of teardown and
- coherency exit.
-
-
-Each CPU has one of these states assigned to it at any point in time.
-The CPU states are described in the "CPU state" section, below.
-
-Each cluster is also assigned a state, but it is necessary to split the
-state value into two parts (the "cluster" state and "inbound" state) and
-to introduce additional states in order to avoid races between different
-CPUs in the cluster simultaneously modifying the state. The cluster-
-level states are described in the "Cluster state" section.
-
-To help distinguish the CPU states from cluster states in this
-discussion, the state names are given a CPU_ prefix for the CPU states,
-and a CLUSTER_ or INBOUND_ prefix for the cluster states.
-
-
-CPU state
----------
-
-In this algorithm, each individual core in a multi-core processor is
-referred to as a "CPU". CPUs are assumed to be single-threaded:
-therefore, a CPU can only be doing one thing at a single point in time.
-
-This means that CPUs fit the basic model closely.
-
-The algorithm defines the following states for each CPU in the system:
-
- CPU_DOWN
- CPU_COMING_UP
- CPU_UP
- CPU_GOING_DOWN
-
- cluster setup and
- CPU setup complete policy decision
- +-----------> CPU_UP ------------+
- | v
-
- CPU_COMING_UP CPU_GOING_DOWN
-
- ^ |
- +----------- CPU_DOWN <----------+
- policy decision CPU teardown complete
- or hardware event
-
-
-The definitions of the four states correspond closely to the states of
-the basic model.
-
-Transitions between states occur as follows.
-
-A trigger event (spontaneous) means that the CPU can transition to the
-next state as a result of making local progress only, with no
-requirement for any external event to happen.
-
-
-CPU_DOWN:
-
- A CPU reaches the CPU_DOWN state when it is ready for
- power-down. On reaching this state, the CPU will typically
- power itself down or suspend itself, via a WFI instruction or a
- firmware call.
-
- Next state: CPU_COMING_UP
- Conditions: none
-
- Trigger events:
-
- a) an explicit hardware power-up operation, resulting
- from a policy decision on another CPU;
-
- b) a hardware event, such as an interrupt.
-
-
-CPU_COMING_UP:
-
- A CPU cannot start participating in hardware coherency until the
- cluster is set up and coherent. If the cluster is not ready,
- then the CPU will wait in the CPU_COMING_UP state until the
- cluster has been set up.
-
- Next state: CPU_UP
- Conditions: The CPU's parent cluster must be in CLUSTER_UP.
- Trigger events: Transition of the parent cluster to CLUSTER_UP.
-
- Refer to the "Cluster state" section for a description of the
- CLUSTER_UP state.
-
-
-CPU_UP:
- When a CPU reaches the CPU_UP state, it is safe for the CPU to
- start participating in local coherency.
-
- This is done by jumping to the kernel's CPU resume code.
-
- Note that the definition of this state is slightly different
- from the basic model definition: CPU_UP does not mean that the
- CPU is coherent yet, but it does mean that it is safe to resume
- the kernel. The kernel handles the rest of the resume
- procedure, so the remaining steps are not visible as part of the
- race avoidance algorithm.
-
- The CPU remains in this state until an explicit policy decision
- is made to shut down or suspend the CPU.
-
- Next state: CPU_GOING_DOWN
- Conditions: none
- Trigger events: explicit policy decision
-
-
-CPU_GOING_DOWN:
-
- While in this state, the CPU exits coherency, including any
- operations required to achieve this (such as cleaning data
- caches).
-
- Next state: CPU_DOWN
- Conditions: local CPU teardown complete
- Trigger events: (spontaneous)
-
-
-Cluster state
--------------
-
-A cluster is a group of connected CPUs with some common resources.
-Because a cluster contains multiple CPUs, it can be doing multiple
-things at the same time. This has some implications. In particular, a
-CPU can start up while another CPU is tearing the cluster down.
-
-In this discussion, the "outbound side" is the view of the cluster state
-as seen by a CPU tearing the cluster down. The "inbound side" is the
-view of the cluster state as seen by a CPU setting the CPU up.
-
-In order to enable safe coordination in such situations, it is important
-that a CPU which is setting up the cluster can advertise its state
-independently of the CPU which is tearing down the cluster. For this
-reason, the cluster state is split into two parts:
-
- "cluster" state: The global state of the cluster; or the state
- on the outbound side:
-
- CLUSTER_DOWN
- CLUSTER_UP
- CLUSTER_GOING_DOWN
-
- "inbound" state: The state of the cluster on the inbound side.
-
- INBOUND_NOT_COMING_UP
- INBOUND_COMING_UP
-
-
- The different pairings of these states results in six possible
- states for the cluster as a whole:
-
- CLUSTER_UP
- +==========> INBOUND_NOT_COMING_UP -------------+
- # |
- |
- CLUSTER_UP <----+ |
- INBOUND_COMING_UP | v
-
- ^ CLUSTER_GOING_DOWN CLUSTER_GOING_DOWN
- # INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
-
- CLUSTER_DOWN | |
- INBOUND_COMING_UP <----+ |
- |
- ^ |
- +=========== CLUSTER_DOWN <------------+
- INBOUND_NOT_COMING_UP
-
- Transitions -----> can only be made by the outbound CPU, and
- only involve changes to the "cluster" state.
-
- Transitions ===##> can only be made by the inbound CPU, and only
- involve changes to the "inbound" state, except where there is no
- further transition possible on the outbound side (i.e., the
- outbound CPU has put the cluster into the CLUSTER_DOWN state).
-
- The race avoidance algorithm does not provide a way to determine
- which exact CPUs within the cluster play these roles. This must
- be decided in advance by some other means. Refer to the section
- "Last man and first man selection" for more explanation.
-
-
- CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
- cluster can actually be powered down.
-
- The parallelism of the inbound and outbound CPUs is observed by
- the existence of two different paths from CLUSTER_GOING_DOWN/
- INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
- model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
- COMING_UP in the basic model). The second path avoids cluster
- teardown completely.
-
- CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
- model. The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
- is trivial and merely resets the state machine ready for the
- next cycle.
-
- Details of the allowable transitions follow.
-
- The next state in each case is notated
-
- <cluster state>/<inbound state> (<transitioner>)
-
- where the <transitioner> is the side on which the transition
- can occur; either the inbound or the outbound side.
-
-
-CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
-
- Next state: CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
- Conditions: none
- Trigger events:
-
- a) an explicit hardware power-up operation, resulting
- from a policy decision on another CPU;
-
- b) a hardware event, such as an interrupt.
-
-
-CLUSTER_DOWN/INBOUND_COMING_UP:
-
- In this state, an inbound CPU sets up the cluster, including
- enabling of hardware coherency at the cluster level and any
- other operations (such as cache invalidation) which are required
- in order to achieve this.
-
- The purpose of this state is to do sufficient cluster-level
- setup to enable other CPUs in the cluster to enter coherency
- safely.
-
- Next state: CLUSTER_UP/INBOUND_COMING_UP (inbound)
- Conditions: cluster-level setup and hardware coherency complete
- Trigger events: (spontaneous)
-
-
-CLUSTER_UP/INBOUND_COMING_UP:
-
- Cluster-level setup is complete and hardware coherency is
- enabled for the cluster. Other CPUs in the cluster can safely
- enter coherency.
-
- This is a transient state, leading immediately to
- CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster
- should consider treat these two states as equivalent.
-
- Next state: CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
- Conditions: none
- Trigger events: (spontaneous)
-
-
-CLUSTER_UP/INBOUND_NOT_COMING_UP:
-
- Cluster-level setup is complete and hardware coherency is
- enabled for the cluster. Other CPUs in the cluster can safely
- enter coherency.
-
- The cluster will remain in this state until a policy decision is
- made to power the cluster down.
-
- Next state: CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
- Conditions: none
- Trigger events: policy decision to power down the cluster
-
-
-CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
-
- An outbound CPU is tearing the cluster down. The selected CPU
- must wait in this state until all CPUs in the cluster are in the
- CPU_DOWN state.
-
- When all CPUs are in the CPU_DOWN state, the cluster can be torn
- down, for example by cleaning data caches and exiting
- cluster-level coherency.
-
- To avoid wasteful unnecessary teardown operations, the outbound
- should check the inbound cluster state for asynchronous
- transitions to INBOUND_COMING_UP. Alternatively, individual
- CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
-
-
- Next states:
-
- CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
- Conditions: cluster torn down and ready to power off
- Trigger events: (spontaneous)
-
- CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
- Conditions: none
- Trigger events:
-
- a) an explicit hardware power-up operation,
- resulting from a policy decision on another
- CPU;
-
- b) a hardware event, such as an interrupt.
-
-
-CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
-
- The cluster is (or was) being torn down, but another CPU has
- come online in the meantime and is trying to set up the cluster
- again.
-
- If the outbound CPU observes this state, it has two choices:
-
- a) back out of teardown, restoring the cluster to the
- CLUSTER_UP state;
-
- b) finish tearing the cluster down and put the cluster
- in the CLUSTER_DOWN state; the inbound CPU will
- set up the cluster again from there.
-
- Choice (a) permits the removal of some latency by avoiding
- unnecessary teardown and setup operations in situations where
- the cluster is not really going to be powered down.
-
-
- Next states:
-
- CLUSTER_UP/INBOUND_COMING_UP (outbound)
- Conditions: cluster-level setup and hardware
- coherency complete
- Trigger events: (spontaneous)
-
- CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
- Conditions: cluster torn down and ready to power off
- Trigger events: (spontaneous)
-
-
-Last man and First man selection
---------------------------------
-
-The CPU which performs cluster tear-down operations on the outbound side
-is commonly referred to as the "last man".
-
-The CPU which performs cluster setup on the inbound side is commonly
-referred to as the "first man".
-
-The race avoidance algorithm documented above does not provide a
-mechanism to choose which CPUs should play these roles.
-
-
-Last man:
-
-When shutting down the cluster, all the CPUs involved are initially
-executing Linux and hence coherent. Therefore, ordinary spinlocks can
-be used to select a last man safely, before the CPUs become
-non-coherent.
-
-
-First man:
-
-Because CPUs may power up asynchronously in response to external wake-up
-events, a dynamic mechanism is needed to make sure that only one CPU
-attempts to play the first man role and do the cluster-level
-initialisation: any other CPUs must wait for this to complete before
-proceeding.
-
-Cluster-level initialisation may involve actions such as configuring
-coherency controls in the bus fabric.
-
-The current implementation in mcpm_head.S uses a separate mutual exclusion
-mechanism to do this arbitration. This mechanism is documented in
-detail in vlocks.txt.
-
-
-Features and Limitations
-------------------------
-
-Implementation:
-
- The current ARM-based implementation is split between
- arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
- arch/arm/common/mcpm_entry.c (everything else):
-
- __mcpm_cpu_going_down() signals the transition of a CPU to the
- CPU_GOING_DOWN state.
-
- __mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
- state.
-
- A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
- low-level power-up code in mcpm_head.S. This could
- involve CPU-specific setup code, but in the current
- implementation it does not.
-
- __mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
- handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
- and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
- the case of an aborted cluster power-down).
-
- These functions are more complex than the __mcpm_cpu_*()
- functions due to the extra inter-CPU coordination which
- is needed for safe transitions at the cluster level.
-
- A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
- the low-level power-up code in mcpm_head.S. This
- typically involves platform-specific setup code,
- provided by the platform-specific power_up_setup
- function registered via mcpm_sync_init.
-
-Deep topologies:
-
- As currently described and implemented, the algorithm does not
- support CPU topologies involving more than two levels (i.e.,
- clusters of clusters are not supported). The algorithm could be
- extended by replicating the cluster-level states for the
- additional topological levels, and modifying the transition
- rules for the intermediate (non-outermost) cluster levels.
-
-
-Colophon
---------
-
-Originally created and documented by Dave Martin for Linaro Limited, in
-collaboration with Nicolas Pitre and Achin Gupta.
-
-Copyright (C) 2012-2013 Linaro Limited
-Distributed under the terms of Version 2 of the GNU General Public
-License, as defined in linux/COPYING.
diff --git a/Documentation/arm/firmware.txt b/Documentation/arm/firmware.txt
deleted file mode 100644
index 7f175dbb427e..000000000000
--- a/Documentation/arm/firmware.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Interface for registering and calling firmware-specific operations for ARM.
-----
-Written by Tomasz Figa <t.figa@samsung.com>
-
-Some boards are running with secure firmware running in TrustZone secure
-world, which changes the way some things have to be initialized. This makes
-a need to provide an interface for such platforms to specify available firmware
-operations and call them when needed.
-
-Firmware operations can be specified by filling in a struct firmware_ops
-with appropriate callbacks and then registering it with register_firmware_ops()
-function.
-
- void register_firmware_ops(const struct firmware_ops *ops)
-
-The ops pointer must be non-NULL. More information about struct firmware_ops
-and its members can be found in arch/arm/include/asm/firmware.h header.
-
-There is a default, empty set of operations provided, so there is no need to
-set anything if platform does not require firmware operations.
-
-To call a firmware operation, a helper macro is provided
-
- #define call_firmware_op(op, ...) \
- ((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
-
-the macro checks if the operation is provided and calls it or otherwise returns
--ENOSYS to signal that given operation is not available (for example, to allow
-fallback to legacy operation).
-
-Example of registering firmware operations:
-
- /* board file */
-
- static int platformX_do_idle(void)
- {
- /* tell platformX firmware to enter idle */
- return 0;
- }
-
- static int platformX_cpu_boot(int i)
- {
- /* tell platformX firmware to boot CPU i */
- return 0;
- }
-
- static const struct firmware_ops platformX_firmware_ops = {
- .do_idle = exynos_do_idle,
- .cpu_boot = exynos_cpu_boot,
- /* other operations not available on platformX */
- };
-
- /* init_early callback of machine descriptor */
- static void __init board_init_early(void)
- {
- register_firmware_ops(&platformX_firmware_ops);
- }
-
-Example of using a firmware operation:
-
- /* some platform code, e.g. SMP initialization */
-
- __raw_writel(__pa_symbol(exynos4_secondary_startup),
- CPU1_BOOT_REG);
-
- /* Call Exynos specific smc call */
- if (call_firmware_op(cpu_boot, cpu) == -ENOSYS)
- cpu_boot_legacy(...); /* Try legacy way */
-
- gic_raise_softirq(cpumask_of(cpu), 1);
diff --git a/Documentation/arm/kernel_mode_neon.txt b/Documentation/arm/kernel_mode_neon.txt
deleted file mode 100644
index 525452726d31..000000000000
--- a/Documentation/arm/kernel_mode_neon.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Kernel mode NEON
-================
-
-TL;DR summary
--------------
-* Use only NEON instructions, or VFP instructions that don't rely on support
- code
-* Isolate your NEON code in a separate compilation unit, and compile it with
- '-mfpu=neon -mfloat-abi=softfp'
-* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
- NEON code
-* Don't sleep in your NEON code, and be aware that it will be executed with
- preemption disabled
-
-
-Introduction
-------------
-It is possible to use NEON instructions (and in some cases, VFP instructions) in
-code that runs in kernel mode. However, for performance reasons, the NEON/VFP
-register file is not preserved and restored at every context switch or taken
-exception like the normal register file is, so some manual intervention is
-required. Furthermore, special care is required for code that may sleep [i.e.,
-may call schedule()], as NEON or VFP instructions will be executed in a
-non-preemptible section for reasons outlined below.
-
-
-Lazy preserve and restore
--------------------------
-The NEON/VFP register file is managed using lazy preserve (on UP systems) and
-lazy restore (on both SMP and UP systems). This means that the register file is
-kept 'live', and is only preserved and restored when multiple tasks are
-contending for the NEON/VFP unit (or, in the SMP case, when a task migrates to
-another core). Lazy restore is implemented by disabling the NEON/VFP unit after
-every context switch, resulting in a trap when subsequently a NEON/VFP
-instruction is issued, allowing the kernel to step in and perform the restore if
-necessary.
-
-Any use of the NEON/VFP unit in kernel mode should not interfere with this, so
-it is required to do an 'eager' preserve of the NEON/VFP register file, and
-enable the NEON/VFP unit explicitly so no exceptions are generated on first
-subsequent use. This is handled by the function kernel_neon_begin(), which
-should be called before any kernel mode NEON or VFP instructions are issued.
-Likewise, the NEON/VFP unit should be disabled again after use to make sure user
-mode will hit the lazy restore trap upon next use. This is handled by the
-function kernel_neon_end().
-
-
-Interruptions in kernel mode
-----------------------------
-For reasons of performance and simplicity, it was decided that there shall be no
-preserve/restore mechanism for the kernel mode NEON/VFP register contents. This
-implies that interruptions of a kernel mode NEON section can only be allowed if
-they are guaranteed not to touch the NEON/VFP registers. For this reason, the
-following rules and restrictions apply in the kernel:
-* NEON/VFP code is not allowed in interrupt context;
-* NEON/VFP code is not allowed to sleep;
-* NEON/VFP code is executed with preemption disabled.
-
-If latency is a concern, it is possible to put back to back calls to
-kernel_neon_end() and kernel_neon_begin() in places in your code where none of
-the NEON registers are live. (Additional calls to kernel_neon_begin() should be
-reasonably cheap if no context switch occurred in the meantime)
-
-
-VFP and support code
---------------------
-Earlier versions of VFP (prior to version 3) rely on software support for things
-like IEEE-754 compliant underflow handling etc. When the VFP unit needs such
-software assistance, it signals the kernel by raising an undefined instruction
-exception. The kernel responds by inspecting the VFP control registers and the
-current instruction and arguments, and emulates the instruction in software.
-
-Such software assistance is currently not implemented for VFP instructions
-executed in kernel mode. If such a condition is encountered, the kernel will
-fail and generate an OOPS.
-
-
-Separating NEON code from ordinary code
----------------------------------------
-The compiler is not aware of the special significance of kernel_neon_begin() and
-kernel_neon_end(), i.e., that it is only allowed to issue NEON/VFP instructions
-between calls to these respective functions. Furthermore, GCC may generate NEON
-instructions of its own at -O3 level if -mfpu=neon is selected, and even if the
-kernel is currently compiled at -O2, future changes may result in NEON/VFP
-instructions appearing in unexpected places if no special care is taken.
-
-Therefore, the recommended and only supported way of using NEON/VFP in the
-kernel is by adhering to the following rules:
-* isolate the NEON code in a separate compilation unit and compile it with
- '-mfpu=neon -mfloat-abi=softfp';
-* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
- into the unit containing the NEON code from a compilation unit which is *not*
- built with the GCC flag '-mfpu=neon' set.
-
-As the kernel is compiled with '-msoft-float', the above will guarantee that
-both NEON and VFP instructions will only ever appear in designated compilation
-units at any optimization level.
-
-
-NEON assembler
---------------
-NEON assembler is supported with no additional caveats as long as the rules
-above are followed.
-
-
-NEON code generated by GCC
---------------------------
-The GCC option -ftree-vectorize (implied by -O3) tries to exploit implicit
-parallelism, and generates NEON code from ordinary C source code. This is fully
-supported as long as the rules above are followed.
-
-
-NEON intrinsics
----------------
-NEON intrinsics are also supported. However, as code using NEON intrinsics
-relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
-observe the following in addition to the rules above:
-* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
- uses its builtin version of <stdint.h> (this is a C99 header which the kernel
- does not supply);
-* Include <arm_neon.h> last, or at least after <linux/types.h>
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt
deleted file mode 100644
index 5673594717cf..000000000000
--- a/Documentation/arm/kernel_user_helpers.txt
+++ /dev/null
@@ -1,267 +0,0 @@
-Kernel-provided User Helpers
-============================
-
-These are segment of kernel provided user code reachable from user space
-at a fixed address in kernel memory. This is used to provide user space
-with some operations which require kernel help because of unimplemented
-native feature and/or instructions in many ARM CPUs. The idea is for this
-code to be executed directly in user mode for best efficiency but which is
-too intimate with the kernel counter part to be left to user libraries.
-In fact this code might even differ from one CPU to another depending on
-the available instruction set, or whether it is a SMP systems. In other
-words, the kernel reserves the right to change this code as needed without
-warning. Only the entry points and their results as documented here are
-guaranteed to be stable.
-
-This is different from (but doesn't preclude) a full blown VDSO
-implementation, however a VDSO would prevent some assembly tricks with
-constants that allows for efficient branching to those code segments. And
-since those code segments only use a few cycles before returning to user
-code, the overhead of a VDSO indirect far call would add a measurable
-overhead to such minimalistic operations.
-
-User space is expected to bypass those helpers and implement those things
-inline (either in the code emitted directly by the compiler, or part of
-the implementation of a library call) when optimizing for a recent enough
-processor that has the necessary native support, but only if resulting
-binaries are already to be incompatible with earlier ARM processors due to
-usage of similar native instructions for other things. In other words
-don't make binaries unable to run on earlier processors just for the sake
-of not using these kernel helpers if your compiled code is not going to
-use new instructions for other purpose.
-
-New helpers may be added over time, so an older kernel may be missing some
-helpers present in a newer kernel. For this reason, programs must check
-the value of __kuser_helper_version (see below) before assuming that it is
-safe to call any particular helper. This check should ideally be
-performed only once at process startup time, and execution aborted early
-if the required helpers are not provided by the kernel version that
-process is running on.
-
-kuser_helper_version
---------------------
-
-Location: 0xffff0ffc
-
-Reference declaration:
-
- extern int32_t __kuser_helper_version;
-
-Definition:
-
- This field contains the number of helpers being implemented by the
- running kernel. User space may read this to determine the availability
- of a particular helper.
-
-Usage example:
-
-#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
-
-void check_kuser_version(void)
-{
- if (__kuser_helper_version < 2) {
- fprintf(stderr, "can't do atomic operations, kernel too old\n");
- abort();
- }
-}
-
-Notes:
-
- User space may assume that the value of this field never changes
- during the lifetime of any single process. This means that this
- field can be read once during the initialisation of a library or
- startup phase of a program.
-
-kuser_get_tls
--------------
-
-Location: 0xffff0fe0
-
-Reference prototype:
-
- void * __kuser_get_tls(void);
-
-Input:
-
- lr = return address
-
-Output:
-
- r0 = TLS value
-
-Clobbered registers:
-
- none
-
-Definition:
-
- Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
-
-Usage example:
-
-typedef void * (__kuser_get_tls_t)(void);
-#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
-
-void foo()
-{
- void *tls = __kuser_get_tls();
- printf("TLS = %p\n", tls);
-}
-
-Notes:
-
- - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
-
-kuser_cmpxchg
--------------
-
-Location: 0xffff0fc0
-
-Reference prototype:
-
- int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
-
-Input:
-
- r0 = oldval
- r1 = newval
- r2 = ptr
- lr = return address
-
-Output:
-
- r0 = success code (zero or non-zero)
- C flag = set if r0 == 0, clear if r0 != 0
-
-Clobbered registers:
-
- r3, ip, flags
-
-Definition:
-
- Atomically store newval in *ptr only if *ptr is equal to oldval.
- Return zero if *ptr was changed or non-zero if no exchange happened.
- The C flag is also set if *ptr was changed to allow for assembly
- optimization in the calling code.
-
-Usage example:
-
-typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
-#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
-
-int atomic_add(volatile int *ptr, int val)
-{
- int old, new;
-
- do {
- old = *ptr;
- new = old + val;
- } while(__kuser_cmpxchg(old, new, ptr));
-
- return new;
-}
-
-Notes:
-
- - This routine already includes memory barriers as needed.
-
- - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
-
-kuser_memory_barrier
---------------------
-
-Location: 0xffff0fa0
-
-Reference prototype:
-
- void __kuser_memory_barrier(void);
-
-Input:
-
- lr = return address
-
-Output:
-
- none
-
-Clobbered registers:
-
- none
-
-Definition:
-
- Apply any needed memory barrier to preserve consistency with data modified
- manually and __kuser_cmpxchg usage.
-
-Usage example:
-
-typedef void (__kuser_dmb_t)(void);
-#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
-
-Notes:
-
- - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
-
-kuser_cmpxchg64
----------------
-
-Location: 0xffff0f60
-
-Reference prototype:
-
- int __kuser_cmpxchg64(const int64_t *oldval,
- const int64_t *newval,
- volatile int64_t *ptr);
-
-Input:
-
- r0 = pointer to oldval
- r1 = pointer to newval
- r2 = pointer to target value
- lr = return address
-
-Output:
-
- r0 = success code (zero or non-zero)
- C flag = set if r0 == 0, clear if r0 != 0
-
-Clobbered registers:
-
- r3, lr, flags
-
-Definition:
-
- Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
- is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
- changed or non-zero if no exchange happened.
-
- The C flag is also set if *ptr was changed to allow for assembly
- optimization in the calling code.
-
-Usage example:
-
-typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
- const int64_t *newval,
- volatile int64_t *ptr);
-#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
-
-int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
-{
- int64_t old, new;
-
- do {
- old = *ptr;
- new = old + val;
- } while(__kuser_cmpxchg64(&old, &new, ptr));
-
- return new;
-}
-
-Notes:
-
- - This routine already includes memory barriers as needed.
-
- - Due to the length of this sequence, this spans 2 conventional kuser
- "slots", therefore 0xffff0f80 is not used as a valid entry point.
-
- - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/Overview.txt
deleted file mode 100644
index 400c0c270d2e..000000000000
--- a/Documentation/arm/keystone/Overview.txt
+++ /dev/null
@@ -1,55 +0,0 @@
- TI Keystone Linux Overview
- --------------------------
-
-Introduction
-------------
-Keystone range of SoCs are based on ARM Cortex-A15 MPCore Processors
-and c66x DSP cores. This document describes essential information required
-for users to run Linux on Keystone based EVMs from Texas Instruments.
-
-Following SoCs & EVMs are currently supported:-
-
------------- K2HK SoC and EVM --------------------------------------------------
-
-a.k.a Keystone 2 Hawking/Kepler SoC
-TCI6636K2H & TCI6636K2K: See documentation at
- http://www.ti.com/product/tci6638k2k
- http://www.ti.com/product/tci6638k2h
-
-EVM:
-http://www.advantech.com/Support/TI-EVM/EVMK2HX_sd.aspx
-
------------- K2E SoC and EVM ---------------------------------------------------
-
-a.k.a Keystone 2 Edison SoC
-K2E - 66AK2E05: See documentation at
- http://www.ti.com/product/66AK2E05/technicaldocuments
-
-EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2e-evm.html
-
------------- K2L SoC and EVM ---------------------------------------------------
-
-a.k.a Keystone 2 Lamarr SoC
-K2L - TCI6630K2L: See documentation at
- http://www.ti.com/product/TCI6630K2L/technicaldocuments
-EVM:
-https://www.einfochips.com/index.php/partnerships/texas-instruments/k2l-evm.html
-
-Configuration
--------------
-
-All of the K2 SoCs/EVMs share a common defconfig, keystone_defconfig and same
-image is used to boot on individual EVMs. The platform configuration is
-specified through DTS. Following are the DTS used:-
- K2HK EVM : k2hk-evm.dts
- K2E EVM : k2e-evm.dts
- K2L EVM : k2l-evm.dts
-
-The device tree documentation for the keystone machines are located at
- Documentation/devicetree/bindings/arm/keystone/keystone.txt
-
-Document Author
----------------
-Murali Karicheri <m-karicheri2@ti.com>
-Copyright 2015 Texas Instruments
diff --git a/Documentation/arm/keystone/knav-qmss.txt b/Documentation/arm/keystone/knav-qmss.txt
deleted file mode 100644
index fcdb9fd5f53a..000000000000
--- a/Documentation/arm/keystone/knav-qmss.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-* Texas Instruments Keystone Navigator Queue Management SubSystem driver
-
-Driver source code path
- drivers/soc/ti/knav_qmss.c
- drivers/soc/ti/knav_qmss_acc.c
-
-The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
-the main hardware sub system which forms the backbone of the Keystone
-multi-core Navigator. QMSS consist of queue managers, packed-data structure
-processors(PDSP), linking RAM, descriptor pools and infrastructure
-Packet DMA.
-The Queue Manager is a hardware module that is responsible for accelerating
-management of the packet queues. Packets are queued/de-queued by writing or
-reading descriptor address to a particular memory mapped location. The PDSPs
-perform QMSS related functions like accumulation, QoS, or event management.
-Linking RAM registers are used to link the descriptors which are stored in
-descriptor RAM. Descriptor RAM is configurable as internal or external memory.
-The QMSS driver manages the PDSP setups, linking RAM regions,
-queue pool management (allocation, push, pop and notify) and descriptor
-pool management.
-
-knav qmss driver provides a set of APIs to drivers to open/close qmss queues,
-allocate descriptor pools, map the descriptors, push/pop to queues etc. For
-details of the available APIs, please refers to include/linux/soc/ti/knav_qmss.h
-
-DT documentation is available at
-Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
-
-Accumulator QMSS queues using PDSP firmware
-============================================
-The QMSS PDSP firmware support accumulator channel that can monitor a single
-queue or multiple contiguous queues. drivers/soc/ti/knav_qmss_acc.c is the
-driver that interface with the accumulator PDSP. This configures
-accumulator channels defined in DTS (example in DT documentation) to monitor
-1 or 32 queues per channel. More description on the firmware is available in
-CPPI/QMSS Low Level Driver document (docs/CPPI_QMSS_LLD_SDS.pdf) at
- git://git.ti.com/keystone-rtos/qmss-lld.git
-
-k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin firmware supports upto 48 accumulator
-channels. This firmware is available under ti-keystone folder of
-firmware.git at
- git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
-
-To use copy the firmware image to lib/firmware folder of the initramfs or
-ubifs file system and provide a sym link to k2_qmss_pdsp_acc48_k2_le_1_0_0_9.bin
-in the file system and boot up the kernel. User would see
-
- "firmware file ks2_qmss_pdsp_acc48.bin downloaded for PDSP"
-
-in the boot up log if loading of firmware to PDSP is successful.
-
-Use of accumulated queues requires the firmware image to be present in the
-file system. The driver doesn't acc queues to the supported queue range if
-PDSP is not running in the SoC. The API call fails if there is a queue open
-request to an acc queue and PDSP is not running. So make sure to copy firmware
-to file system before using these queue types.
diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment
deleted file mode 100644
index 6335fcacbba9..000000000000
--- a/Documentation/arm/mem_alignment
+++ /dev/null
@@ -1,58 +0,0 @@
-Too many problems poped up because of unnoticed misaligned memory access in
-kernel code lately. Therefore the alignment fixup is now unconditionally
-configured in for SA11x0 based targets. According to Alan Cox, this is a
-bad idea to configure it out, but Russell King has some good reasons for
-doing so on some f***ed up ARM architectures like the EBSA110. However
-this is not the case on many design I'm aware of, like all SA11x0 based
-ones.
-
-Of course this is a bad idea to rely on the alignment trap to perform
-unaligned memory access in general. If those access are predictable, you
-are better to use the macros provided by include/asm/unaligned.h. The
-alignment trap can fixup misaligned access for the exception cases, but at
-a high performance cost. It better be rare.
-
-Now for user space applications, it is possible to configure the alignment
-trap to SIGBUS any code performing unaligned access (good for debugging bad
-code), or even fixup the access by software like for kernel code. The later
-mode isn't recommended for performance reasons (just think about the
-floating point emulation that works about the same way). Fix your code
-instead!
-
-Please note that randomly changing the behaviour without good thought is
-real bad - it changes the behaviour of all unaligned instructions in user
-space, and might cause programs to fail unexpectedly.
-
-To change the alignment trap behavior, simply echo a number into
-/proc/cpu/alignment. The number is made up from various bits:
-
-bit behavior when set
---- -----------------
-
-0 A user process performing an unaligned memory access
- will cause the kernel to print a message indicating
- process name, pid, pc, instruction, address, and the
- fault code.
-
-1 The kernel will attempt to fix up the user process
- performing the unaligned access. This is of course
- slow (think about the floating point emulator) and
- not recommended for production use.
-
-2 The kernel will send a SIGBUS signal to the user process
- performing the unaligned access.
-
-Note that not all combinations are supported - only values 0 through 5.
-(6 and 7 don't make sense).
-
-For example, the following will turn on the warnings, but without
-fixing up or sending SIGBUS signals:
-
- echo 1 > /proc/cpu/alignment
-
-You can also read the content of the same file to get statistical
-information on unaligned access occurrences plus the current mode of
-operation for user space code.
-
-
-Nicolas Pitre, Mar 13, 2001. Modified Russell King, Nov 30, 2001.
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
deleted file mode 100644
index 546a39048eb0..000000000000
--- a/Documentation/arm/memory.txt
+++ /dev/null
@@ -1,88 +0,0 @@
- Kernel Memory Layout on ARM Linux
-
- Russell King <rmk@arm.linux.org.uk>
- November 17, 2005 (2.6.15)
-
-This document describes the virtual memory layout which the Linux
-kernel uses for ARM processors. It indicates which regions are
-free for platforms to use, and which are used by generic code.
-
-The ARM CPU is capable of addressing a maximum of 4GB virtual memory
-space, and this must be shared between user space processes, the
-kernel, and hardware devices.
-
-As the ARM architecture matures, it becomes necessary to reserve
-certain regions of VM space for use for new facilities; therefore
-this document may reserve more VM space over time.
-
-Start End Use
---------------------------------------------------------------------------
-ffff8000 ffffffff copy_user_page / clear_user_page use.
- For SA11xx and Xscale, this is used to
- setup a minicache mapping.
-
-ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs.
-
-ffff1000 ffff7fff Reserved.
- Platforms must not use this address range.
-
-ffff0000 ffff0fff CPU vector page.
- The CPU vectors are mapped here if the
- CPU supports vector relocation (control
- register V bit.)
-
-fffe0000 fffeffff XScale cache flush area. This is used
- in proc-xscale.S to flush the whole data
- cache. (XScale does not have TCM.)
-
-fffe8000 fffeffff DTCM mapping area for platforms with
- DTCM mounted inside the CPU.
-
-fffe0000 fffe7fff ITCM mapping area for platforms with
- ITCM mounted inside the CPU.
-
-ffc00000 ffefffff Fixmap mapping region. Addresses provided
- by fix_to_virt() will be located here.
-
-fee00000 feffffff Mapping of PCI I/O space. This is a static
- mapping within the vmalloc space.
-
-VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space.
- Memory returned by vmalloc/ioremap will
- be dynamically placed in this region.
- Machine specific static mappings are also
- located here through iotable_init().
- VMALLOC_START is based upon the value
- of the high_memory variable, and VMALLOC_END
- is equal to 0xff800000.
-
-PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region.
- This maps the platforms RAM, and typically
- maps all platform RAM in a 1:1 relationship.
-
-PKMAP_BASE PAGE_OFFSET-1 Permanent kernel mappings
- One way of mapping HIGHMEM pages into kernel
- space.
-
-MODULES_VADDR MODULES_END-1 Kernel module space
- Kernel modules inserted via insmod are
- placed here using dynamic mappings.
-
-00001000 TASK_SIZE-1 User space mappings
- Per-thread mappings are placed here via
- the mmap() system call.
-
-00000000 00000fff CPU vector page / null pointer trap
- CPUs which do not support vector remapping
- place their vector page here. NULL pointer
- dereferences by both the kernel and user
- space are also caught via this mapping.
-
-Please note that mappings which collide with the above areas may result
-in a non-bootable kernel, or may cause the kernel to (eventually) panic
-at run time.
-
-Since future CPUs may impact the kernel mapping layout, user programs
-must not access any memory which is not mapped inside their 0x0001000
-to TASK_SIZE address range. If they wish to access these areas, they
-must set up their own mappings using open() and mmap().
diff --git a/Documentation/arm/nwfpe/NOTES b/Documentation/arm/nwfpe/NOTES
deleted file mode 100644
index 40577b5a49d3..000000000000
--- a/Documentation/arm/nwfpe/NOTES
+++ /dev/null
@@ -1,29 +0,0 @@
-There seems to be a problem with exp(double) and our emulator. I haven't
-been able to track it down yet. This does not occur with the emulator
-supplied by Russell King.
-
-I also found one oddity in the emulator. I don't think it is serious but
-will point it out. The ARM calling conventions require floating point
-registers f4-f7 to be preserved over a function call. The compiler quite
-often uses an stfe instruction to save f4 on the stack upon entry to a
-function, and an ldfe instruction to restore it before returning.
-
-I was looking at some code, that calculated a double result, stored it in f4
-then made a function call. Upon return from the function call the number in
-f4 had been converted to an extended value in the emulator.
-
-This is a side effect of the stfe instruction. The double in f4 had to be
-converted to extended, then stored. If an lfm/sfm combination had been used,
-then no conversion would occur. This has performance considerations. The
-result from the function call and f4 were used in a multiplication. If the
-emulator sees a multiply of a double and extended, it promotes the double to
-extended, then does the multiply in extended precision.
-
-This code will cause this problem:
-
-double x, y, z;
-z = log(x)/log(y);
-
-The result of log(x) (a double) will be calculated, returned in f0, then
-moved to f4 to preserve it over the log(y) call. The division will be done
-in extended precision, due to the stfe instruction used to save f4 in log(y).
diff --git a/Documentation/arm/nwfpe/README b/Documentation/arm/nwfpe/README
deleted file mode 100644
index 771871de0c8b..000000000000
--- a/Documentation/arm/nwfpe/README
+++ /dev/null
@@ -1,70 +0,0 @@
-This directory contains the version 0.92 test release of the NetWinder
-Floating Point Emulator.
-
-The majority of the code was written by me, Scott Bambrough It is
-written in C, with a small number of routines in inline assembler
-where required. It was written quickly, with a goal of implementing a
-working version of all the floating point instructions the compiler
-emits as the first target. I have attempted to be as optimal as
-possible, but there remains much room for improvement.
-
-I have attempted to make the emulator as portable as possible. One of
-the problems is with leading underscores on kernel symbols. Elf
-kernels have no leading underscores, a.out compiled kernels do. I
-have attempted to use the C_SYMBOL_NAME macro wherever this may be
-important.
-
-Another choice I made was in the file structure. I have attempted to
-contain all operating system specific code in one module (fpmodule.*).
-All the other files contain emulator specific code. This should allow
-others to port the emulator to NetBSD for instance relatively easily.
-
-The floating point operations are based on SoftFloat Release 2, by
-John Hauser. SoftFloat is a software implementation of floating-point
-that conforms to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic. As many as four formats are supported: single precision,
-double precision, extended double precision, and quadruple precision.
-All operations required by the standard are implemented, except for
-conversions to and from decimal. We use only the single precision,
-double precision and extended double precision formats. The port of
-SoftFloat to the ARM was done by Phil Blundell, based on an earlier
-port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
-
-The file README.FPE contains a description of what has been implemented
-so far in the emulator. The file TODO contains a information on what
-remains to be done, and other ideas for the emulator.
-
-Bug reports, comments, suggestions should be directed to me at
-<scottb@netwinder.org>. General reports of "this program doesn't
-work correctly when your emulator is installed" are useful for
-determining that bugs still exist; but are virtually useless when
-attempting to isolate the problem. Please report them, but don't
-expect quick action. Bugs still exist. The problem remains in isolating
-which instruction contains the bug. Small programs illustrating a specific
-problem are a godsend.
-
-Legal Notices
--------------
-
-The NetWinder Floating Point Emulator is free software. Everything Rebel.com
-has written is provided under the GNU GPL. See the file COPYING for copying
-conditions. Excluded from the above is the SoftFloat code. John Hauser's
-legal notice for SoftFloat is included below.
-
--------------------------------------------------------------------------------
-SoftFloat Legal Notice
-
-SoftFloat was written by John R. Hauser. This work was made possible in
-part by the International Computer Science Institute, located at Suite 600,
-1947 Center Street, Berkeley, California 94704. Funding was partially
-provided by the National Science Foundation under grant MIP-9311980. The
-original version of this code was written as part of a project to build
-a fixed-point vector processor in collaboration with the University of
-California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
-has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
-TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
-PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
-AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
--------------------------------------------------------------------------------
diff --git a/Documentation/arm/nwfpe/README.FPE b/Documentation/arm/nwfpe/README.FPE
deleted file mode 100644
index 26f5d7bb9a41..000000000000
--- a/Documentation/arm/nwfpe/README.FPE
+++ /dev/null
@@ -1,156 +0,0 @@
-The following describes the current state of the NetWinder's floating point
-emulator.
-
-In the following nomenclature is used to describe the floating point
-instructions. It follows the conventions in the ARM manual.
-
-<S|D|E> = <single|double|extended>, no default
-{P|M|Z} = {round to +infinity,round to -infinity,round to zero},
- default = round to nearest
-
-Note: items enclosed in {} are optional.
-
-Floating Point Coprocessor Data Transfer Instructions (CPDT)
-------------------------------------------------------------
-
-LDF/STF - load and store floating
-
-<LDF|STF>{cond}<S|D|E> Fd, Rn
-<LDF|STF>{cond}<S|D|E> Fd, [Rn, #<expression>]{!}
-<LDF|STF>{cond}<S|D|E> Fd, [Rn], #<expression>
-
-These instructions are fully implemented.
-
-LFM/SFM - load and store multiple floating
-
-Form 1 syntax:
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn]
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn, #<expression>]{!}
-<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn], #<expression>
-
-Form 2 syntax:
-<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
-
-These instructions are fully implemented. They store/load three words
-for each floating point register into the memory location given in the
-instruction. The format in memory is unlikely to be compatible with
-other implementations, in particular the actual hardware. Specific
-mention of this is made in the ARM manuals.
-
-Floating Point Coprocessor Register Transfer Instructions (CPRT)
-----------------------------------------------------------------
-
-Conversions, read/write status/control register instructions
-
-FLT{cond}<S,D,E>{P,M,Z} Fn, Rd Convert integer to floating point
-FIX{cond}{P,M,Z} Rd, Fn Convert floating point to integer
-WFS{cond} Rd Write floating point status register
-RFS{cond} Rd Read floating point status register
-WFC{cond} Rd Write floating point control register
-RFC{cond} Rd Read floating point control register
-
-FLT/FIX are fully implemented.
-
-RFS/WFS are fully implemented.
-
-RFC/WFC are fully implemented. RFC/WFC are supervisor only instructions, and
-presently check the CPU mode, and do an invalid instruction trap if not called
-from supervisor mode.
-
-Compare instructions
-
-CMF{cond} Fn, Fm Compare floating
-CMFE{cond} Fn, Fm Compare floating with exception
-CNF{cond} Fn, Fm Compare negated floating
-CNFE{cond} Fn, Fm Compare negated floating with exception
-
-These are fully implemented.
-
-Floating Point Coprocessor Data Instructions (CPDT)
----------------------------------------------------
-
-Dyadic operations:
-
-ADF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - add
-SUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - subtract
-RSF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse subtract
-MUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - multiply
-DVF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - divide
-RDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse divide
-
-These are fully implemented.
-
-FML{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast multiply
-FDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast divide
-FRD{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast reverse divide
-
-These are fully implemented as well. They use the same algorithm as the
-non-fast versions. Hence, in this implementation their performance is
-equivalent to the MUF/DVF/RDV instructions. This is acceptable according
-to the ARM manual. The manual notes these are defined only for single
-operands, on the actual FPA11 hardware they do not work for double or
-extended precision operands. The emulator currently does not check
-the requested permissions conditions, and performs the requested operation.
-
-RMF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - IEEE remainder
-
-This is fully implemented.
-
-Monadic operations:
-
-MVF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move
-MNF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move negated
-
-These are fully implemented.
-
-ABS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - absolute value
-SQT{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - square root
-RND{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - round
-
-These are fully implemented.
-
-URD{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - unnormalized round
-NRM{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - normalize
-
-These are implemented. URD is implemented using the same code as the RND
-instruction. Since URD cannot return a unnormalized number, NRM becomes
-a NOP.
-
-Library calls:
-
-POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
-RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
-POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
-
-LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
-EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
-SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
-COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
-TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
-ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
-ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
-ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
-
-These are not implemented. They are not currently issued by the compiler,
-and are handled by routines in libc. These are not implemented by the FPA11
-hardware, but are handled by the floating point support code. They should
-be implemented in future versions.
-
-Signalling:
-
-Signals are implemented. However current ELF kernels produced by Rebel.com
-have a bug in them that prevents the module from generating a SIGFPE. This
-is caused by a failure to alias fp_current to the kernel variable
-current_set[0] correctly.
-
-The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
-a fix for this problem and also incorporates the current version of the
-emulator directly. It is possible to run with no floating point module
-loaded with this kernel. It is provided as a demonstration of the
-technology and for those who want to do floating point work that depends
-on signals. It is not strictly necessary to use the module.
-
-A module (either the one provided by Russell King, or the one in this
-distribution) can be loaded to replace the functionality of the emulator
-built into the kernel.
diff --git a/Documentation/arm/nwfpe/TODO b/Documentation/arm/nwfpe/TODO
deleted file mode 100644
index 8027061b60eb..000000000000
--- a/Documentation/arm/nwfpe/TODO
+++ /dev/null
@@ -1,67 +0,0 @@
-TODO LIST
----------
-
-POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
-RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
-POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
-
-LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
-LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
-EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
-SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
-COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
-TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
-ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
-ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
-ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
-
-These are not implemented. They are not currently issued by the compiler,
-and are handled by routines in libc. These are not implemented by the FPA11
-hardware, but are handled by the floating point support code. They should
-be implemented in future versions.
-
-There are a couple of ways to approach the implementation of these. One
-method would be to use accurate table methods for these routines. I have
-a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
-seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
-These methods are used in GLIBC for some of the transcendental functions.
-
-Another approach, which I know little about is CORDIC. This stands for
-Coordinate Rotation Digital Computer, and is a method of computing
-transcendental functions using mostly shifts and adds and a few
-multiplications and divisions. The ARM excels at shifts and adds,
-so such a method could be promising, but requires more research to
-determine if it is feasible.
-
-Rounding Methods
-
-The IEEE standard defines 4 rounding modes. Round to nearest is the
-default, but rounding to + or - infinity or round to zero are also allowed.
-Many architectures allow the rounding mode to be specified by modifying bits
-in a control register. Not so with the ARM FPA11 architecture. To change
-the rounding mode one must specify it with each instruction.
-
-This has made porting some benchmarks difficult. It is possible to
-introduce such a capability into the emulator. The FPCR contains
-bits describing the rounding mode. The emulator could be altered to
-examine a flag, which if set forced it to ignore the rounding mode in
-the instruction, and use the mode specified in the bits in the FPCR.
-
-This would require a method of getting/setting the flag, and the bits
-in the FPCR. This requires a kernel call in ArmLinux, as WFC/RFC are
-supervisor only instructions. If anyone has any ideas or comments I
-would like to hear them.
-
-[NOTE: pulled out from some docs on ARM floating point, specifically
- for the Acorn FPE, but not limited to it:
-
- The floating point control register (FPCR) may only be present in some
- implementations: it is there to control the hardware in an implementation-
- specific manner, for example to disable the floating point system. The user
- mode of the ARM is not permitted to use this register (since the right is
- reserved to alter it between implementations) and the WFC and RFC
- instructions will trap if tried in user mode.
-
- Hence, the answer is yes, you could do this, but then you will run a high
- risk of becoming isolated if and when hardware FP emulation comes out
- -- Russell].
diff --git a/Documentation/arm/pxa/mfp.txt b/Documentation/arm/pxa/mfp.txt
deleted file mode 100644
index 0b7cab978c02..000000000000
--- a/Documentation/arm/pxa/mfp.txt
+++ /dev/null
@@ -1,286 +0,0 @@
- MFP Configuration for PXA2xx/PXA3xx Processors
-
- Eric Miao <eric.miao@marvell.com>
-
-MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
-later PXA series processors. This document describes the existing MFP API,
-and how board/platform driver authors could make use of it.
-
- Basic Concept
-===============
-
-Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
-mechanism is introduced from PXA3xx to completely move the pin-mux functions
-out of the GPIO controller. In addition to pin-mux configurations, the MFP
-also controls the low power state, driving strength, pull-up/down and event
-detection of each pin. Below is a diagram of internal connections between
-the MFP logic and the remaining SoC peripherals:
-
- +--------+
- | |--(GPIO19)--+
- | GPIO | |
- | |--(GPIO...) |
- +--------+ |
- | +---------+
- +--------+ +------>| |
- | PWM2 |--(PWM_OUT)-------->| MFP |
- +--------+ +------>| |-------> to external PAD
- | +---->| |
- +--------+ | | +-->| |
- | SSP2 |---(TXD)----+ | | +---------+
- +--------+ | |
- | |
- +--------+ | |
- | Keypad |--(MKOUT4)----+ |
- +--------+ |
- |
- +--------+ |
- | UART2 |---(TXD)--------+
- +--------+
-
-NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily
-mean it's dedicated for GPIO19, only as a hint that internally this pin
-can be routed from GPIO19 of the GPIO controller.
-
-To better understand the change from PXA25x/PXA27x GPIO alternate function
-to this new MFP mechanism, here are several key points:
-
- 1. GPIO controller on PXA3xx is now a dedicated controller, same as other
- internal controllers like PWM, SSP and UART, with 128 internal signals
- which can be routed to external through one or more MFPs (e.g. GPIO<0>
- can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2,
- see arch/arm/mach-pxa/mfp-pxa300.h)
-
- 2. Alternate function configuration is removed from this GPIO controller,
- the remaining functions are pure GPIO-specific, i.e.
-
- - GPIO signal level control
- - GPIO direction control
- - GPIO level change detection
-
- 3. Low power state for each pin is now controlled by MFP, this means the
- PGSRx registers on PXA2xx are now useless on PXA3xx
-
- 4. Wakeup detection is now controlled by MFP, PWER does not control the
- wakeup from GPIO(s) any more, depending on the sleeping state, ADxER
- (as defined in pxa3xx-regs.h) controls the wakeup from MFP
-
-NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
-mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
-pad (or ball).
-
- MFP API Usage
-===============
-
-For board code writers, here are some guidelines:
-
-1. include ONE of the following header files in your <board>.c:
-
- - #include "mfp-pxa25x.h"
- - #include "mfp-pxa27x.h"
- - #include "mfp-pxa300.h"
- - #include "mfp-pxa320.h"
- - #include "mfp-pxa930.h"
-
- NOTE: only one file in your <board>.c, depending on the processors used,
- because pin configuration definitions may conflict in these file (i.e.
- same name, different meaning and settings on different processors). E.g.
- for zylonite platform, which support both PXA300/PXA310 and PXA320, two
- separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c
- (in addition to handle MFP configuration differences, they also handle
- the other differences between the two combinations).
-
- NOTE: PXA300 and PXA310 are almost identical in pin configurations (with
- PXA310 supporting some additional ones), thus the difference is actually
- covered in a single mfp-pxa300.h.
-
-2. prepare an array for the initial pin configurations, e.g.:
-
- static unsigned long mainstone_pin_config[] __initdata = {
- /* Chip Select */
- GPIO15_nCS_1,
-
- /* LCD - 16bpp Active TFT */
- GPIOxx_TFT_LCD_16BPP,
- GPIO16_PWM0_OUT, /* Backlight */
-
- /* MMC */
- GPIO32_MMC_CLK,
- GPIO112_MMC_CMD,
- GPIO92_MMC_DAT_0,
- GPIO109_MMC_DAT_1,
- GPIO110_MMC_DAT_2,
- GPIO111_MMC_DAT_3,
-
- ...
-
- /* GPIO */
- GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
- };
-
- a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
- and written to the actual registers, they are useless and may discard,
- adding '__initdata' will help save some additional bytes here.
-
- b) when there is only one possible pin configurations for a component,
- some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on
- PXA25x and PXA27x processors
-
- c) if by board design, a pin can be configured to wake up the system
- from low power state, it can be 'OR'ed with any of:
-
- WAKEUP_ON_EDGE_BOTH
- WAKEUP_ON_EDGE_RISE
- WAKEUP_ON_EDGE_FALL
- WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs,
-
- to indicate that this pin has the capability of wake-up the system,
- and on which edge(s). This, however, doesn't necessarily mean the
- pin _will_ wakeup the system, it will only when set_irq_wake() is
- invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq())
- and eventually calls gpio_set_wake() for the actual register setting.
-
- d) although PXA3xx MFP supports edge detection on each pin, the
- internal logic will only wakeup the system when those specific bits
- in ADxER registers are set, which can be well mapped to the
- corresponding peripheral, thus set_irq_wake() can be called with
- the peripheral IRQ to enable the wakeup.
-
-
- MFP on PXA3xx
-===============
-
-Every external I/O pad on PXA3xx (excluding those for special purpose) has
-one MFP logic associated, and is controlled by one MFP register (MFPR).
-
-The MFPR has the following bit definitions (for PXA300/PXA310/PXA320):
-
- 31 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
- +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- | RESERVED |PS|PU|PD| DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL |
- +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-
- Bit 3: RESERVED
- Bit 4: EDGE_RISE_EN - enable detection of rising edge on this pin
- Bit 5: EDGE_FALL_EN - enable detection of falling edge on this pin
- Bit 6: EDGE_CLEAR - disable edge detection on this pin
- Bit 7: SLEEP_OE_N - enable outputs during low power modes
- Bit 8: SLEEP_DATA - output data on the pin during low power modes
- Bit 9: SLEEP_SEL - selection control for low power modes signals
- Bit 13: PULLDOWN_EN - enable the internal pull-down resistor on this pin
- Bit 14: PULLUP_EN - enable the internal pull-up resistor on this pin
- Bit 15: PULL_SEL - pull state controlled by selected alternate function
- (0) or by PULL{UP,DOWN}_EN bits (1)
-
- Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7
- Bit 10-12: DRIVE - drive strength and slew rate
- 0b000 - fast 1mA
- 0b001 - fast 2mA
- 0b002 - fast 3mA
- 0b003 - fast 4mA
- 0b004 - slow 6mA
- 0b005 - fast 6mA
- 0b006 - slow 10mA
- 0b007 - fast 10mA
-
- MFP Design for PXA2xx/PXA3xx
-==============================
-
-Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
-MFP API is introduced to cover both series of processors.
-
-The basic idea of this design is to introduce definitions for all possible pin
-configurations, these definitions are processor and platform independent, and
-the actual API invoked to convert these definitions into register settings and
-make them effective there-after.
-
- Files Involved
- --------------
-
- - arch/arm/mach-pxa/include/mach/mfp.h
-
- for
- 1. Unified pin definitions - enum constants for all configurable pins
- 2. processor-neutral bit definitions for a possible MFP configuration
-
- - arch/arm/mach-pxa/mfp-pxa3xx.h
-
- for PXA3xx specific MFPR register bit definitions and PXA3xx common pin
- configurations
-
- - arch/arm/mach-pxa/mfp-pxa2xx.h
-
- for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations
-
- - arch/arm/mach-pxa/mfp-pxa25x.h
- arch/arm/mach-pxa/mfp-pxa27x.h
- arch/arm/mach-pxa/mfp-pxa300.h
- arch/arm/mach-pxa/mfp-pxa320.h
- arch/arm/mach-pxa/mfp-pxa930.h
-
- for processor specific definitions
-
- - arch/arm/mach-pxa/mfp-pxa3xx.c
- - arch/arm/mach-pxa/mfp-pxa2xx.c
-
- for implementation of the pin configuration to take effect for the actual
- processor.
-
- Pin Configuration
- -----------------
-
- The following comments are copied from mfp.h (see the actual source code
- for most updated info)
-
- /*
- * a possible MFP configuration is represented by a 32-bit integer
- *
- * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
- * bit 10..12 - Alternate Function Selection
- * bit 13..15 - Drive Strength
- * bit 16..18 - Low Power Mode State
- * bit 19..20 - Low Power Mode Edge Detection
- * bit 21..22 - Run Mode Pull State
- *
- * to facilitate the definition, the following macros are provided
- *
- * MFP_CFG_DEFAULT - default MFP configuration value, with
- * alternate function = 0,
- * drive strength = fast 3mA (MFP_DS03X)
- * low power mode = default
- * edge detection = none
- *
- * MFP_CFG - default MFPR value with alternate function
- * MFP_CFG_DRV - default MFPR value with alternate function and
- * pin drive strength
- * MFP_CFG_LPM - default MFPR value with alternate function and
- * low power mode
- * MFP_CFG_X - default MFPR value with alternate function,
- * pin drive strength and low power mode
- */
-
- Examples of pin configurations are:
-
- #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
-
- which reads GPIO94 can be configured as SSP3_RXD, with alternate function
- selection of 1, driving strength of 0b101, and a float state in low power
- modes.
-
- NOTE: this is the default setting of this pin being configured as SSP3_RXD
- which can be modified a bit in board code, though it is not recommended to
- do so, simply because this default setting is usually carefully encoded,
- and is supposed to work in most cases.
-
- Register Settings
- -----------------
-
- Register settings on PXA3xx for a pin configuration is actually very
- straight-forward, most bits can be converted directly into MFPR value
- in a easier way. Two sets of MFPR values are calculated: the run-time
- ones and the low power mode ones, to allow different settings.
-
- The conversion from a generic pin configuration to the actual register
- settings on PXA2xx is a bit complicated: many registers are involved,
- including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see
- mfp-pxa2xx.c for how the conversion is made.
diff --git a/Documentation/arm/sti/overview.txt b/Documentation/arm/sti/overview.txt
deleted file mode 100644
index 1a4e93d6027f..000000000000
--- a/Documentation/arm/sti/overview.txt
+++ /dev/null
@@ -1,33 +0,0 @@
- STi ARM Linux Overview
- ==========================
-
-Introduction
-------------
-
- The ST Microelectronics Multimedia and Application Processors range of
- CortexA9 System-on-Chip are supported by the 'STi' platform of
- ARM Linux. Currently STiH415, STiH416 SOCs are supported with both
- B2000 and B2020 Reference boards.
-
-
- configuration
- -------------
-
- A generic configuration is provided for both STiH415/416, and can be used as the
- default by
- make stih41x_defconfig
-
- Layout
- ------
- All the files for multiple machine families (STiH415, STiH416, and STiG125)
- are located in the platform code contained in arch/arm/mach-sti
-
- There is a generic board board-dt.c in the mach folder which support
- Flattened Device Tree, which means, It works with any compatible board with
- Device Trees.
-
-
- Document Author
- ---------------
-
- Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arm/sti/stih407-overview.txt b/Documentation/arm/sti/stih407-overview.txt
deleted file mode 100644
index 3343f32f58bc..000000000000
--- a/Documentation/arm/sti/stih407-overview.txt
+++ /dev/null
@@ -1,18 +0,0 @@
- STiH407 Overview
- ================
-
-Introduction
-------------
-
- The STiH407 is the new generation of SoC for Multi-HD, AVC set-top boxes
- and server/connected client application for satellite, cable, terrestrial
- and IP-STB markets.
-
- Features
- - ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
- - SATA2, USB 3.0, PCIe, Gbit Ethernet
-
- Document Author
- ---------------
-
- Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arm/sti/stih415-overview.txt b/Documentation/arm/sti/stih415-overview.txt
deleted file mode 100644
index 1383e33f265d..000000000000
--- a/Documentation/arm/sti/stih415-overview.txt
+++ /dev/null
@@ -1,12 +0,0 @@
- STiH415 Overview
- ================
-
-Introduction
-------------
-
- The STiH415 is the next generation of HD, AVC set-top box processors
- for satellite, cable, terrestrial and IP-STB markets.
-
- Features
- - ARM Cortex-A9 1.0 GHz, dual-core CPU
- - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.txt b/Documentation/arm/sti/stih416-overview.txt
deleted file mode 100644
index 558444c201c6..000000000000
--- a/Documentation/arm/sti/stih416-overview.txt
+++ /dev/null
@@ -1,12 +0,0 @@
- STiH416 Overview
- ================
-
-Introduction
-------------
-
- The STiH416 is the next generation of HD, AVC set-top box processors
- for satellite, cable, terrestrial and IP-STB markets.
-
- Features
- - ARM Cortex-A9 1.2 GHz dual core CPU
- - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih418-overview.txt b/Documentation/arm/sti/stih418-overview.txt
deleted file mode 100644
index 1cd8fc80646d..000000000000
--- a/Documentation/arm/sti/stih418-overview.txt
+++ /dev/null
@@ -1,20 +0,0 @@
- STiH418 Overview
- ================
-
-Introduction
-------------
-
- The STiH418 is the new generation of SoC for UHDp60 set-top boxes
- and server/connected client application for satellite, cable, terrestrial
- and IP-STB markets.
-
- Features
- - ARM Cortex-A9 1.5 GHz quad core CPU (28nm)
- - SATA2, USB 3.0, PCIe, Gbit Ethernet
- - HEVC L5.1 Main 10
- - VP9
-
- Document Author
- ---------------
-
- Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README
deleted file mode 100644
index f8efc21998bf..000000000000
--- a/Documentation/arm/sunxi/README
+++ /dev/null
@@ -1,102 +0,0 @@
-ARM Allwinner SoCs
-==================
-
-This document lists all the ARM Allwinner SoCs that are currently
-supported in mainline by the Linux kernel. This document will also
-provide links to documentation and/or datasheet for these SoCs.
-
-SunXi family
-------------
- Linux kernel mach directory: arch/arm/mach-sunxi
-
- Flavors:
- * ARM926 based SoCs
- - Allwinner F20 (sun3i)
- + Not Supported
-
- * ARM Cortex-A8 based SoCs
- - Allwinner A10 (sun4i)
- + Datasheet
- http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
- + User Manual
- http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
-
- - Allwinner A10s (sun5i)
- + Datasheet
- http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
-
- - Allwinner A13 / R8 (sun5i)
- + Datasheet
- http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
- + User Manual
- http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
-
- - Next Thing Co GR8 (sun5i)
-
- * Single ARM Cortex-A7 based SoCs
- - Allwinner V3s (sun8i)
- + Datasheet
- http://linux-sunxi.org/File:Allwinner_V3s_Datasheet_V1.0.pdf
-
- * Dual ARM Cortex-A7 based SoCs
- - Allwinner A20 (sun7i)
- + User Manual
- http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
-
- - Allwinner A23 (sun8i)
- + Datasheet
- http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
- + User Manual
- http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
-
- * Quad ARM Cortex-A7 based SoCs
- - Allwinner A31 (sun6i)
- + Datasheet
- http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
- + User Manual
- http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
-
- - Allwinner A31s (sun6i)
- + Datasheet
- http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
- + User Manual
- http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
-
- - Allwinner A33 (sun8i)
- + Datasheet
- http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
- + User Manual
- http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
-
- - Allwinner H2+ (sun8i)
- + No document available now, but is known to be working properly with
- H3 drivers and memory map.
-
- - Allwinner H3 (sun8i)
- + Datasheet
- http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf
-
- - Allwinner R40 (sun8i)
- + Datasheet
- https://github.com/tinalinux/docs/raw/r40-v1.y/R40_Datasheet_V1.0.pdf
- + User Manual
- https://github.com/tinalinux/docs/raw/r40-v1.y/Allwinner_R40_User_Manual_V1.0.pdf
-
- * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
- - Allwinner A80
- + Datasheet
- http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
-
- * Octa ARM Cortex-A7 based SoCs
- - Allwinner A83T
- + Datasheet
- https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
- + User Manual
- https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
-
- * Quad ARM Cortex-A53 based SoCs
- - Allwinner A64
- + Datasheet
- http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
- + User Manual
- http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.txt
deleted file mode 100644
index e09a88aa3136..000000000000
--- a/Documentation/arm/sunxi/clocks.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Frequently asked questions about the sunxi clock system
-=======================================================
-
-This document contains useful bits of information that people tend to ask
-about the sunxi clock system, as well as accompanying ASCII art when adequate.
-
-Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the
- system?
-
-A: The 24MHz oscillator allows gating to save power. Indeed, if gated
- carelessly the system would stop functioning, but with the right
- steps, one can gate it and keep the system running. Consider this
- simplified suspend example:
-
- While the system is operational, you would see something like
-
- 24MHz 32kHz
- |
- PLL1
- \
- \_ CPU Mux
- |
- [CPU]
-
- When you are about to suspend, you switch the CPU Mux to the 32kHz
- oscillator:
-
- 24Mhz 32kHz
- | |
- PLL1 |
- /
- CPU Mux _/
- |
- [CPU]
-
- Finally you can gate the main oscillator
-
- 32kHz
- |
- |
- /
- CPU Mux _/
- |
- [CPU]
-
-Q: Were can I learn more about the sunxi clocks?
-
-A: The linux-sunxi wiki contains a page documenting the clock registers,
- you can find it at
-
- http://linux-sunxi.org/A10/CCM
-
- The authoritative source for information at this time is the ccmu driver
- released by Allwinner, you can find it at
-
- https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/arm/swp_emulation b/Documentation/arm/swp_emulation
deleted file mode 100644
index af903d22fd93..000000000000
--- a/Documentation/arm/swp_emulation
+++ /dev/null
@@ -1,27 +0,0 @@
-Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE)
----------------------------------------------------------------------
-
-ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommeds
-moving to the load-locked/store-conditional instructions LDREX and STREX.
-
-ARMv7 multiprocessing extensions introduce the ability to disable these
-instructions, triggering an undefined instruction exception when executed.
-Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB
-sequence. If a memory access fault (an abort) occurs, a segmentation fault is
-signalled to the triggering process.
-
-/proc/cpu/swp_emulation holds some statistics/information, including the PID of
-the last process to trigger the emulation to be invocated. For example:
----
-Emulated SWP: 12
-Emulated SWPB: 0
-Aborted SWP{B}: 1
-Last process: 314
----
-
-NOTE: when accessing uncached shared regions, LDREX/STREX rely on an external
-transaction monitoring block called a global monitor to maintain update
-atomicity. If your system does not implement a global monitor, this option can
-cause programs that perform SWP operations to uncached memory to deadlock, as
-the STREX operation will always fail.
-
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
deleted file mode 100644
index 7c15871c1885..000000000000
--- a/Documentation/arm/tcm.txt
+++ /dev/null
@@ -1,155 +0,0 @@
-ARM TCM (Tightly-Coupled Memory) handling in Linux
-----
-Written by Linus Walleij <linus.walleij@stericsson.com>
-
-Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
-This is usually just a few (4-64) KiB of RAM inside the ARM
-processor.
-
-Due to being embedded inside the CPU The TCM has a
-Harvard-architecture, so there is an ITCM (instruction TCM)
-and a DTCM (data TCM). The DTCM can not contain any
-instructions, but the ITCM can actually contain data.
-The size of DTCM or ITCM is minimum 4KiB so the typical
-minimum configuration is 4KiB ITCM and 4KiB DTCM.
-
-ARM CPU:s have special registers to read out status, physical
-location and size of TCM memories. arch/arm/include/asm/cputype.h
-defines a CPUID_TCM register that you can read out from the
-system control coprocessor. Documentation from ARM can be found
-at http://infocenter.arm.com, search for "TCM Status Register"
-to see documents for all CPUs. Reading this register you can
-determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
-in the machine.
-
-There is further a TCM region register (search for "TCM Region
-Registers" at the ARM site) that can report and modify the location
-size of TCM memories at runtime. This is used to read out and modify
-TCM location and size. Notice that this is not a MMU table: you
-actually move the physical location of the TCM around. At the
-place you put it, it will mask any underlying RAM from the
-CPU so it is usually wise not to overlap any physical RAM with
-the TCM.
-
-The TCM memory can then be remapped to another address again using
-the MMU, but notice that the TCM if often used in situations where
-the MMU is turned off. To avoid confusion the current Linux
-implementation will map the TCM 1 to 1 from physical to virtual
-memory in the location specified by the kernel. Currently Linux
-will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
-on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
-
-Newer versions of the region registers also support dividing these
-TCMs in two separate banks, so for example an 8KiB ITCM is divided
-into two 4KiB banks with its own control registers. The idea is to
-be able to lock and hide one of the banks for use by the secure
-world (TrustZone).
-
-TCM is used for a few things:
-
-- FIQ and other interrupt handlers that need deterministic
- timing and cannot wait for cache misses.
-
-- Idle loops where all external RAM is set to self-refresh
- retention mode, so only on-chip RAM is accessible by
- the CPU and then we hang inside ITCM waiting for an
- interrupt.
-
-- Other operations which implies shutting off or reconfiguring
- the external RAM controller.
-
-There is an interface for using TCM on the ARM architecture
-in <asm/tcm.h>. Using this interface it is possible to:
-
-- Define the physical address and size of ITCM and DTCM.
-
-- Tag functions to be compiled into ITCM.
-
-- Tag data and constants to be allocated to DTCM and ITCM.
-
-- Have the remaining TCM RAM added to a special
- allocation pool with gen_pool_create() and gen_pool_add()
- and provice tcm_alloc() and tcm_free() for this
- memory. Such a heap is great for things like saving
- device state when shutting off device power domains.
-
-A machine that has TCM memory shall select HAVE_TCM from
-arch/arm/Kconfig for itself. Code that needs to use TCM shall
-#include <asm/tcm.h>
-
-Functions to go into itcm can be tagged like this:
-int __tcmfunc foo(int bar);
-
-Since these are marked to become long_calls and you may want
-to have functions called locally inside the TCM without
-wasting space, there is also the __tcmlocalfunc prefix that
-will make the call relative.
-
-Variables to go into dtcm can be tagged like this:
-int __tcmdata foo;
-
-Constants can be tagged like this:
-int __tcmconst foo;
-
-To put assembler into TCM just use
-.section ".tcm.text" or .section ".tcm.data"
-respectively.
-
-Example code:
-
-#include <asm/tcm.h>
-
-/* Uninitialized data */
-static u32 __tcmdata tcmvar;
-/* Initialized data */
-static u32 __tcmdata tcmassigned = 0x2BADBABEU;
-/* Constant */
-static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
-
-static void __tcmlocalfunc tcm_to_tcm(void)
-{
- int i;
- for (i = 0; i < 100; i++)
- tcmvar ++;
-}
-
-static void __tcmfunc hello_tcm(void)
-{
- /* Some abstract code that runs in ITCM */
- int i;
- for (i = 0; i < 100; i++) {
- tcmvar ++;
- }
- tcm_to_tcm();
-}
-
-static void __init test_tcm(void)
-{
- u32 *tcmem;
- int i;
-
- hello_tcm();
- printk("Hello TCM executed from ITCM RAM\n");
-
- printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
- tcmvar = 0xDEADBEEFU;
- printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
-
- printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
-
- printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
-
- /* Allocate some TCM memory from the pool */
- tcmem = tcm_alloc(20);
- if (tcmem) {
- printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
- tcmem[0] = 0xDEADBEEFU;
- tcmem[1] = 0x2BADBABEU;
- tcmem[2] = 0xCAFEBABEU;
- tcmem[3] = 0xDEADBEEFU;
- tcmem[4] = 0x2BADBABEU;
- for (i = 0; i < 5; i++)
- printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
- tcm_free(tcmem, 20);
- }
-}
diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.txt
deleted file mode 100644
index 6543a0adea8a..000000000000
--- a/Documentation/arm/uefi.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-UEFI, the Unified Extensible Firmware Interface, is a specification
-governing the behaviours of compatible firmware interfaces. It is
-maintained by the UEFI Forum - http://www.uefi.org/.
-
-UEFI is an evolution of its predecessor 'EFI', so the terms EFI and
-UEFI are used somewhat interchangeably in this document and associated
-source code. As a rule, anything new uses 'UEFI', whereas 'EFI' refers
-to legacy code or specifications.
-
-UEFI support in Linux
-=====================
-Booting on a platform with firmware compliant with the UEFI specification
-makes it possible for the kernel to support additional features:
-- UEFI Runtime Services
-- Retrieving various configuration information through the standardised
- interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
-
-For actually enabling [U]EFI support, enable:
-- CONFIG_EFI=y
-- CONFIG_EFI_VARS=y or m
-
-The implementation depends on receiving information about the UEFI environment
-in a Flattened Device Tree (FDT) - so is only available with CONFIG_OF.
-
-UEFI stub
-=========
-The "stub" is a feature that extends the Image/zImage into a valid UEFI
-PE/COFF executable, including a loader application that makes it possible to
-load the kernel directly from the UEFI shell, boot menu, or one of the
-lightweight bootloaders like Gummiboot or rEFInd.
-
-The kernel image built with stub support remains a valid kernel image for
-booting in non-UEFI environments.
-
-UEFI kernel support on ARM
-==========================
-UEFI kernel support on the ARM architectures (arm and arm64) is only available
-when boot is performed through the stub.
-
-When booting in UEFI mode, the stub deletes any memory nodes from a provided DT.
-Instead, the kernel reads the UEFI memory map.
-
-The stub populates the FDT /chosen node with (and the kernel scans for) the
-following parameters:
-________________________________________________________________________________
-Name | Size | Description
-================================================================================
-linux,uefi-system-table | 64-bit | Physical address of the UEFI System Table.
---------------------------------------------------------------------------------
-linux,uefi-mmap-start | 64-bit | Physical address of the UEFI memory map,
- | | populated by the UEFI GetMemoryMap() call.
---------------------------------------------------------------------------------
-linux,uefi-mmap-size | 32-bit | Size in bytes of the UEFI memory map
- | | pointed to in previous entry.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
- | | memory map.
---------------------------------------------------------------------------------
-linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
---------------------------------------------------------------------------------
diff --git a/Documentation/arm/vlocks.txt b/Documentation/arm/vlocks.txt
deleted file mode 100644
index 45731672c564..000000000000
--- a/Documentation/arm/vlocks.txt
+++ /dev/null
@@ -1,211 +0,0 @@
-vlocks for Bare-Metal Mutual Exclusion
-======================================
-
-Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
-mechanism, with reasonable but minimal requirements on the memory
-system.
-
-These are intended to be used to coordinate critical activity among CPUs
-which are otherwise non-coherent, in situations where the hardware
-provides no other mechanism to support this and ordinary spinlocks
-cannot be used.
-
-
-vlocks make use of the atomicity provided by the memory system for
-writes to a single memory location. To arbitrate, every CPU "votes for
-itself", by storing a unique number to a common memory location. The
-final value seen in that memory location when all the votes have been
-cast identifies the winner.
-
-In order to make sure that the election produces an unambiguous result
-in finite time, a CPU will only enter the election in the first place if
-no winner has been chosen and the election does not appear to have
-started yet.
-
-
-Algorithm
----------
-
-The easiest way to explain the vlocks algorithm is with some pseudo-code:
-
-
- int currently_voting[NR_CPUS] = { 0, };
- int last_vote = -1; /* no votes yet */
-
- bool vlock_trylock(int this_cpu)
- {
- /* signal our desire to vote */
- currently_voting[this_cpu] = 1;
- if (last_vote != -1) {
- /* someone already volunteered himself */
- currently_voting[this_cpu] = 0;
- return false; /* not ourself */
- }
-
- /* let's suggest ourself */
- last_vote = this_cpu;
- currently_voting[this_cpu] = 0;
-
- /* then wait until everyone else is done voting */
- for_each_cpu(i) {
- while (currently_voting[i] != 0)
- /* wait */;
- }
-
- /* result */
- if (last_vote == this_cpu)
- return true; /* we won */
- return false;
- }
-
- bool vlock_unlock(void)
- {
- last_vote = -1;
- }
-
-
-The currently_voting[] array provides a way for the CPUs to determine
-whether an election is in progress, and plays a role analogous to the
-"entering" array in Lamport's bakery algorithm [1].
-
-However, once the election has started, the underlying memory system
-atomicity is used to pick the winner. This avoids the need for a static
-priority rule to act as a tie-breaker, or any counters which could
-overflow.
-
-As long as the last_vote variable is globally visible to all CPUs, it
-will contain only one value that won't change once every CPU has cleared
-its currently_voting flag.
-
-
-Features and limitations
-------------------------
-
- * vlocks are not intended to be fair. In the contended case, it is the
- _last_ CPU which attempts to get the lock which will be most likely
- to win.
-
- vlocks are therefore best suited to situations where it is necessary
- to pick a unique winner, but it does not matter which CPU actually
- wins.
-
- * Like other similar mechanisms, vlocks will not scale well to a large
- number of CPUs.
-
- vlocks can be cascaded in a voting hierarchy to permit better scaling
- if necessary, as in the following hypothetical example for 4096 CPUs:
-
- /* first level: local election */
- my_town = towns[(this_cpu >> 4) & 0xf];
- I_won = vlock_trylock(my_town, this_cpu & 0xf);
- if (I_won) {
- /* we won the town election, let's go for the state */
- my_state = states[(this_cpu >> 8) & 0xf];
- I_won = vlock_lock(my_state, this_cpu & 0xf));
- if (I_won) {
- /* and so on */
- I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
- if (I_won) {
- /* ... */
- }
- vlock_unlock(the_whole_country);
- }
- vlock_unlock(my_state);
- }
- vlock_unlock(my_town);
-
-
-ARM implementation
-------------------
-
-The current ARM implementation [2] contains some optimisations beyond
-the basic algorithm:
-
- * By packing the members of the currently_voting array close together,
- we can read the whole array in one transaction (providing the number
- of CPUs potentially contending the lock is small enough). This
- reduces the number of round-trips required to external memory.
-
- In the ARM implementation, this means that we can use a single load
- and comparison:
-
- LDR Rt, [Rn]
- CMP Rt, #0
-
- ...in place of code equivalent to:
-
- LDRB Rt, [Rn]
- CMP Rt, #0
- LDRBEQ Rt, [Rn, #1]
- CMPEQ Rt, #0
- LDRBEQ Rt, [Rn, #2]
- CMPEQ Rt, #0
- LDRBEQ Rt, [Rn, #3]
- CMPEQ Rt, #0
-
- This cuts down on the fast-path latency, as well as potentially
- reducing bus contention in contended cases.
-
- The optimisation relies on the fact that the ARM memory system
- guarantees coherency between overlapping memory accesses of
- different sizes, similarly to many other architectures. Note that
- we do not care which element of currently_voting appears in which
- bits of Rt, so there is no need to worry about endianness in this
- optimisation.
-
- If there are too many CPUs to read the currently_voting array in
- one transaction then multiple transations are still required. The
- implementation uses a simple loop of word-sized loads for this
- case. The number of transactions is still fewer than would be
- required if bytes were loaded individually.
-
-
- In principle, we could aggregate further by using LDRD or LDM, but
- to keep the code simple this was not attempted in the initial
- implementation.
-
-
- * vlocks are currently only used to coordinate between CPUs which are
- unable to enable their caches yet. This means that the
- implementation removes many of the barriers which would be required
- when executing the algorithm in cached memory.
-
- packing of the currently_voting array does not work with cached
- memory unless all CPUs contending the lock are cache-coherent, due
- to cache writebacks from one CPU clobbering values written by other
- CPUs. (Though if all the CPUs are cache-coherent, you should be
- probably be using proper spinlocks instead anyway).
-
-
- * The "no votes yet" value used for the last_vote variable is 0 (not
- -1 as in the pseudocode). This allows statically-allocated vlocks
- to be implicitly initialised to an unlocked state simply by putting
- them in .bss.
-
- An offset is added to each CPU's ID for the purpose of setting this
- variable, so that no CPU uses the value 0 for its ID.
-
-
-Colophon
---------
-
-Originally created and documented by Dave Martin for Linaro Limited, for
-use in ARM-based big.LITTLE platforms, with review and input gratefully
-received from Nicolas Pitre and Achin Gupta. Thanks to Nicolas for
-grabbing most of this text out of the relevant mail thread and writing
-up the pseudocode.
-
-Copyright (C) 2012-2013 Linaro Limited
-Distributed under the terms of Version 2 of the GNU General Public
-License, as defined in linux/COPYING.
-
-
-References
-----------
-
-[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
- Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
-
- https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
-
-[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt
deleted file mode 100644
index c77010c5c1f0..000000000000
--- a/Documentation/arm64/acpi_object_usage.txt
+++ /dev/null
@@ -1,622 +0,0 @@
-ACPI Tables
------------
-The expectations of individual ACPI tables are discussed in the list that
-follows.
-
-If a section number is used, it refers to a section number in the ACPI
-specification where the object is defined. If "Signature Reserved" is used,
-the table signature (the first four bytes of the table) is the only portion
-of the table recognized by the specification, and the actual table is defined
-outside of the UEFI Forum (see Section 5.2.6 of the specification).
-
-For ACPI on arm64, tables also fall into the following categories:
-
- -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
-
- -- Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
-
- -- Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IORT,
- MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, STAO,
- TCPA, TPM2, UEFI, XENV
-
- -- Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IBFT, IVRS, LPIT,
- MSDM, OEMx, PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
-
-Table Usage for ARMv8 Linux
------ ----------------------------------------------------------------
-BERT Section 18.3 (signature == "BERT")
- == Boot Error Record Table ==
- Must be supplied if RAS support is provided by the platform. It
- is recommended this table be supplied.
-
-BOOT Signature Reserved (signature == "BOOT")
- == simple BOOT flag table ==
- Microsoft only table, will not be supported.
-
-BGRT Section 5.2.22 (signature == "BGRT")
- == Boot Graphics Resource Table ==
- Optional, not currently supported, with no real use-case for an
- ARM server.
-
-CPEP Section 5.2.18 (signature == "CPEP")
- == Corrected Platform Error Polling table ==
- Optional, not currently supported, and not recommended until such
- time as ARM-compatible hardware is available, and the specification
- suitably modified.
-
-CSRT Signature Reserved (signature == "CSRT")
- == Core System Resources Table ==
- Optional, not currently supported.
-
-DBG2 Signature Reserved (signature == "DBG2")
- == DeBuG port table 2 ==
- License has changed and should be usable. Optional if used instead
- of earlycon=<device> on the command line.
-
-DBGP Signature Reserved (signature == "DBGP")
- == DeBuG Port table ==
- Microsoft only table, will not be supported.
-
-DSDT Section 5.2.11.1 (signature == "DSDT")
- == Differentiated System Description Table ==
- A DSDT is required; see also SSDT.
-
- ACPI tables contain only one DSDT but can contain one or more SSDTs,
- which are optional. Each SSDT can only add to the ACPI namespace,
- but cannot modify or replace anything in the DSDT.
-
-DMAR Signature Reserved (signature == "DMAR")
- == DMA Remapping table ==
- x86 only table, will not be supported.
-
-DRTM Signature Reserved (signature == "DRTM")
- == Dynamic Root of Trust for Measurement table ==
- Optional, not currently supported.
-
-ECDT Section 5.2.16 (signature == "ECDT")
- == Embedded Controller Description Table ==
- Optional, not currently supported, but could be used on ARM if and
- only if one uses the GPE_BIT field to represent an IRQ number, since
- there are no GPE blocks defined in hardware reduced mode. This would
- need to be modified in the ACPI specification.
-
-EINJ Section 18.6 (signature == "EINJ")
- == Error Injection table ==
- This table is very useful for testing platform response to error
- conditions; it allows one to inject an error into the system as
- if it had actually occurred. However, this table should not be
- shipped with a production system; it should be dynamically loaded
- and executed with the ACPICA tools only during testing.
-
-ERST Section 18.5 (signature == "ERST")
- == Error Record Serialization Table ==
- On a platform supports RAS, this table must be supplied if it is not
- UEFI-based; if it is UEFI-based, this table may be supplied. When this
- table is not present, UEFI run time service will be utilized to save
- and retrieve hardware error information to and from a persistent store.
-
-ETDT Signature Reserved (signature == "ETDT")
- == Event Timer Description Table ==
- Obsolete table, will not be supported.
-
-FACS Section 5.2.10 (signature == "FACS")
- == Firmware ACPI Control Structure ==
- It is unlikely that this table will be terribly useful. If it is
- provided, the Global Lock will NOT be used since it is not part of
- the hardware reduced profile, and only 64-bit address fields will
- be considered valid.
-
-FADT Section 5.2.9 (signature == "FACP")
- == Fixed ACPI Description Table ==
- Required for arm64.
-
- The HW_REDUCED_ACPI flag must be set. All of the fields that are
- to be ignored when HW_REDUCED_ACPI is set are expected to be set to
- zero.
-
- If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
- used, not FIRMWARE_CTRL.
-
- If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
- filled in properly -- that the PSCI_COMPLIANT flag is set and that
- PSCI_USE_HVC is set or unset as needed (see table 5-37).
-
- For the DSDT that is also required, the X_DSDT field is to be used,
- not the DSDT field.
-
-FPDT Section 5.2.23 (signature == "FPDT")
- == Firmware Performance Data Table ==
- Optional, not currently supported.
-
-GTDT Section 5.2.24 (signature == "GTDT")
- == Generic Timer Description Table ==
- Required for arm64.
-
-HEST Section 18.3.2 (signature == "HEST")
- == Hardware Error Source Table ==
- ARM-specific error sources have been defined; please use those or the
- PCI types such as type 6 (AER Root Port), 7 (AER Endpoint), or 8 (AER
- Bridge), or use type 9 (Generic Hardware Error Source). Firmware first
- error handling is possible if and only if Trusted Firmware is being
- used on arm64.
-
- Must be supplied if RAS support is provided by the platform. It
- is recommended this table be supplied.
-
-HPET Signature Reserved (signature == "HPET")
- == High Precision Event timer Table ==
- x86 only table, will not be supported.
-
-IBFT Signature Reserved (signature == "IBFT")
- == iSCSI Boot Firmware Table ==
- Microsoft defined table, support TBD.
-
-IORT Signature Reserved (signature == "IORT")
- == Input Output Remapping Table ==
- arm64 only table, required in order to describe IO topology, SMMUs,
- and GIC ITSs, and how those various components are connected together,
- such as identifying which components are behind which SMMUs/ITSs.
- This table will only be required on certain SBSA platforms (e.g.,
- when using GICv3-ITS and an SMMU); on SBSA Level 0 platforms, it
- remains optional.
-
-IVRS Signature Reserved (signature == "IVRS")
- == I/O Virtualization Reporting Structure ==
- x86_64 (AMD) only table, will not be supported.
-
-LPIT Signature Reserved (signature == "LPIT")
- == Low Power Idle Table ==
- x86 only table as of ACPI 5.1; starting with ACPI 6.0, processor
- descriptions and power states on ARM platforms should use the DSDT
- and define processor container devices (_HID ACPI0010, Section 8.4,
- and more specifically 8.4.3 and and 8.4.4).
-
-MADT Section 5.2.12 (signature == "APIC")
- == Multiple APIC Description Table ==
- Required for arm64. Only the GIC interrupt controller structures
- should be used (types 0xA - 0xF).
-
-MCFG Signature Reserved (signature == "MCFG")
- == Memory-mapped ConFiGuration space ==
- If the platform supports PCI/PCIe, an MCFG table is required.
-
-MCHI Signature Reserved (signature == "MCHI")
- == Management Controller Host Interface table ==
- Optional, not currently supported.
-
-MPST Section 5.2.21 (signature == "MPST")
- == Memory Power State Table ==
- Optional, not currently supported.
-
-MSCT Section 5.2.19 (signature == "MSCT")
- == Maximum System Characteristic Table ==
- Optional, not currently supported.
-
-MSDM Signature Reserved (signature == "MSDM")
- == Microsoft Data Management table ==
- Microsoft only table, will not be supported.
-
-NFIT Section 5.2.25 (signature == "NFIT")
- == NVDIMM Firmware Interface Table ==
- Optional, not currently supported.
-
-OEMx Signature of "OEMx" only
- == OEM Specific Tables ==
- All tables starting with a signature of "OEM" are reserved for OEM
- use. Since these are not meant to be of general use but are limited
- to very specific end users, they are not recommended for use and are
- not supported by the kernel for arm64.
-
-PCCT Section 14.1 (signature == "PCCT)
- == Platform Communications Channel Table ==
- Recommend for use on arm64; use of PCC is recommended when using CPPC
- to control performance and power for platform processors.
-
-PMTT Section 5.2.21.12 (signature == "PMTT")
- == Platform Memory Topology Table ==
- Optional, not currently supported.
-
-PSDT Section 5.2.11.3 (signature == "PSDT")
- == Persistent System Description Table ==
- Obsolete table, will not be supported.
-
-RASF Section 5.2.20 (signature == "RASF")
- == RAS Feature table ==
- Optional, not currently supported.
-
-RSDP Section 5.2.5 (signature == "RSD PTR")
- == Root System Description PoinTeR ==
- Required for arm64.
-
-RSDT Section 5.2.7 (signature == "RSDT")
- == Root System Description Table ==
- Since this table can only provide 32-bit addresses, it is deprecated
- on arm64, and will not be used. If provided, it will be ignored.
-
-SBST Section 5.2.14 (signature == "SBST")
- == Smart Battery Subsystem Table ==
- Optional, not currently supported.
-
-SLIC Signature Reserved (signature == "SLIC")
- == Software LIcensing table ==
- Microsoft only table, will not be supported.
-
-SLIT Section 5.2.17 (signature == "SLIT")
- == System Locality distance Information Table ==
- Optional in general, but required for NUMA systems.
-
-SPCR Signature Reserved (signature == "SPCR")
- == Serial Port Console Redirection table ==
- Required for arm64.
-
-SPMI Signature Reserved (signature == "SPMI")
- == Server Platform Management Interface table ==
- Optional, not currently supported.
-
-SRAT Section 5.2.16 (signature == "SRAT")
- == System Resource Affinity Table ==
- Optional, but if used, only the GICC Affinity structures are read.
- To support arm64 NUMA, this table is required.
-
-SSDT Section 5.2.11.2 (signature == "SSDT")
- == Secondary System Description Table ==
- These tables are a continuation of the DSDT; these are recommended
- for use with devices that can be added to a running system, but can
- also serve the purpose of dividing up device descriptions into more
- manageable pieces.
-
- An SSDT can only ADD to the ACPI namespace. It cannot modify or
- replace existing device descriptions already in the namespace.
-
- These tables are optional, however. ACPI tables should contain only
- one DSDT but can contain many SSDTs.
-
-STAO Signature Reserved (signature == "STAO")
- == _STA Override table ==
- Optional, but only necessary in virtualized environments in order to
- hide devices from guest OSs.
-
-TCPA Signature Reserved (signature == "TCPA")
- == Trusted Computing Platform Alliance table ==
- Optional, not currently supported, and may need changes to fully
- interoperate with arm64.
-
-TPM2 Signature Reserved (signature == "TPM2")
- == Trusted Platform Module 2 table ==
- Optional, not currently supported, and may need changes to fully
- interoperate with arm64.
-
-UEFI Signature Reserved (signature == "UEFI")
- == UEFI ACPI data table ==
- Optional, not currently supported. No known use case for arm64,
- at present.
-
-WAET Signature Reserved (signature == "WAET")
- == Windows ACPI Emulated devices Table ==
- Microsoft only table, will not be supported.
-
-WDAT Signature Reserved (signature == "WDAT")
- == Watch Dog Action Table ==
- Microsoft only table, will not be supported.
-
-WDRT Signature Reserved (signature == "WDRT")
- == Watch Dog Resource Table ==
- Microsoft only table, will not be supported.
-
-WPBT Signature Reserved (signature == "WPBT")
- == Windows Platform Binary Table ==
- Microsoft only table, will not be supported.
-
-XENV Signature Reserved (signature == "XENV")
- == Xen project table ==
- Optional, used only by Xen at present.
-
-XSDT Section 5.2.8 (signature == "XSDT")
- == eXtended System Description Table ==
- Required for arm64.
-
-
-ACPI Objects
-------------
-The expectations on individual ACPI objects that are likely to be used are
-shown in the list that follows; any object not explicitly mentioned below
-should be used as needed for a particular platform or particular subsystem,
-such as power management or PCI.
-
-Name Section Usage for ARMv8 Linux
----- ------------ -------------------------------------------------
-_CCA 6.2.17 This method must be defined for all bus masters
- on arm64 -- there are no assumptions made about
- whether such devices are cache coherent or not.
- The _CCA value is inherited by all descendants of
- these devices so it does not need to be repeated.
- Without _CCA on arm64, the kernel does not know what
- to do about setting up DMA for the device.
-
- NB: this method provides default cache coherency
- attributes; the presence of an SMMU can be used to
- modify that, however. For example, a master could
- default to non-coherent, but be made coherent with
- the appropriate SMMU configuration (see Table 17 of
- the IORT specification, ARM Document DEN 0049B).
-
-_CID 6.1.2 Use as needed, see also _HID.
-
-_CLS 6.1.3 Use as needed, see also _HID.
-
-_CPC 8.4.7.1 Use as needed, power management specific. CPPC is
- recommended on arm64.
-
-_CRS 6.2.2 Required on arm64.
-
-_CSD 8.4.2.2 Use as needed, used only in conjunction with _CST.
-
-_CST 8.4.2.1 Low power idle states (8.4.4) are recommended instead
- of C-states.
-
-_DDN 6.1.4 This field can be used for a device name. However,
- it is meant for DOS device names (e.g., COM1), so be
- careful of its use across OSes.
-
-_DSD 6.2.5 To be used with caution. If this object is used, try
- to use it within the constraints already defined by the
- Device Properties UUID. Only in rare circumstances
- should it be necessary to create a new _DSD UUID.
-
- In either case, submit the _DSD definition along with
- any driver patches for discussion, especially when
- device properties are used. A driver will not be
- considered complete without a corresponding _DSD
- description. Once approved by kernel maintainers,
- the UUID or device properties must then be registered
- with the UEFI Forum; this may cause some iteration as
- more than one OS will be registering entries.
-
-_DSM 9.1.1 Do not use this method. It is not standardized, the
- return values are not well documented, and it is
- currently a frequent source of error.
-
-\_GL 5.7.1 This object is not to be used in hardware reduced
- mode, and therefore should not be used on arm64.
-
-_GLK 6.5.7 This object requires a global lock be defined; there
- is no global lock on arm64 since it runs in hardware
- reduced mode. Hence, do not use this object on arm64.
-
-\_GPE 5.3.1 This namespace is for x86 use only. Do not use it
- on arm64.
-
-_HID 6.1.5 This is the primary object to use in device probing,
- though _CID and _CLS may also be used.
-
-_INI 6.5.1 Not required, but can be useful in setting up devices
- when UEFI leaves them in a state that may not be what
- the driver expects before it starts probing.
-
-_LPI 8.4.4.3 Recommended for use with processor definitions (_HID
- ACPI0010) on arm64. See also _RDI.
-
-_MLS 6.1.7 Highly recommended for use in internationalization.
-
-_OFF 7.2.2 It is recommended to define this method for any device
- that can be turned on or off.
-
-_ON 7.2.3 It is recommended to define this method for any device
- that can be turned on or off.
-
-\_OS 5.7.3 This method will return "Linux" by default (this is
- the value of the macro ACPI_OS_NAME on Linux). The
- command line parameter acpi_os=<string> can be used
- to set it to some other value.
-
-_OSC 6.2.11 This method can be a global method in ACPI (i.e.,
- \_SB._OSC), or it may be associated with a specific
- device (e.g., \_SB.DEV0._OSC), or both. When used
- as a global method, only capabilities published in
- the ACPI specification are allowed. When used as
- a device-specific method, the process described for
- using _DSD MUST be used to create an _OSC definition;
- out-of-process use of _OSC is not allowed. That is,
- submit the device-specific _OSC usage description as
- part of the kernel driver submission, get it approved
- by the kernel community, then register it with the
- UEFI Forum.
-
-\_OSI 5.7.2 Deprecated on ARM64. As far as ACPI firmware is
- concerned, _OSI is not to be used to determine what
- sort of system is being used or what functionality
- is provided. The _OSC method is to be used instead.
-
-_PDC 8.4.1 Deprecated, do not use on arm64.
-
-\_PIC 5.8.1 The method should not be used. On arm64, the only
- interrupt model available is GIC.
-
-\_PR 5.3.1 This namespace is for x86 use only on legacy systems.
- Do not use it on arm64.
-
-_PRT 6.2.13 Required as part of the definition of all PCI root
- devices.
-
-_PRx 7.3.8-11 Use as needed; power management specific. If _PR0 is
- defined, _PR3 must also be defined.
-
-_PSx 7.3.2-5 Use as needed; power management specific. If _PS0 is
- defined, _PS3 must also be defined. If clocks or
- regulators need adjusting to be consistent with power
- usage, change them in these methods.
-
-_RDI 8.4.4.4 Recommended for use with processor definitions (_HID
- ACPI0010) on arm64. This should only be used in
- conjunction with _LPI.
-
-\_REV 5.7.4 Always returns the latest version of ACPI supported.
-
-\_SB 5.3.1 Required on arm64; all devices must be defined in this
- namespace.
-
-_SLI 6.2.15 Use is recommended when SLIT table is in use.
-
-_STA 6.3.7, It is recommended to define this method for any device
- 7.2.4 that can be turned on or off. See also the STAO table
- that provides overrides to hide devices in virtualized
- environments.
-
-_SRS 6.2.16 Use as needed; see also _PRS.
-
-_STR 6.1.10 Recommended for conveying device names to end users;
- this is preferred over using _DDN.
-
-_SUB 6.1.9 Use as needed; _HID or _CID are preferred.
-
-_SUN 6.1.11 Use as needed, but recommended.
-
-_SWS 7.4.3 Use as needed; power management specific; this may
- require specification changes for use on arm64.
-
-_UID 6.1.12 Recommended for distinguishing devices of the same
- class; define it if at all possible.
-
-
-
-
-ACPI Event Model
-----------------
-Do not use GPE block devices; these are not supported in the hardware reduced
-profile used by arm64. Since there are no GPE blocks defined for use on ARM
-platforms, ACPI events must be signaled differently.
-
-There are two options: GPIO-signaled interrupts (Section 5.6.5), and
-interrupt-signaled events (Section 5.6.9). Interrupt-signaled events are a
-new feature in the ACPI 6.1 specification. Either -- or both -- can be used
-on a given platform, and which to use may be dependent of limitations in any
-given SoC. If possible, interrupt-signaled events are recommended.
-
-
-ACPI Processor Control
-----------------------
-Section 8 of the ACPI specification changed significantly in version 6.0.
-Processors should now be defined as Device objects with _HID ACPI0007; do
-not use the deprecated Processor statement in ASL. All multiprocessor systems
-should also define a hierarchy of processors, done with Processor Container
-Devices (see Section 8.4.3.1, _HID ACPI0010); do not use processor aggregator
-devices (Section 8.5) to describe processor topology. Section 8.4 of the
-specification describes the semantics of these object definitions and how
-they interrelate.
-
-Most importantly, the processor hierarchy defined also defines the low power
-idle states that are available to the platform, along with the rules for
-determining which processors can be turned on or off and the circumstances
-that control that. Without this information, the processors will run in
-whatever power state they were left in by UEFI.
-
-Note too, that the processor Device objects defined and the entries in the
-MADT for GICs are expected to be in synchronization. The _UID of the Device
-object must correspond to processor IDs used in the MADT.
-
-It is recommended that CPPC (8.4.5) be used as the primary model for processor
-performance control on arm64. C-states and P-states may become available at
-some point in the future, but most current design work appears to favor CPPC.
-
-Further, it is essential that the ARMv8 SoC provide a fully functional
-implementation of PSCI; this will be the only mechanism supported by ACPI
-to control CPU power state. Booting of secondary CPUs using the ACPI
-parking protocol is possible, but discouraged, since only PSCI is supported
-for ARM servers.
-
-
-ACPI System Address Map Interfaces
-----------------------------------
-In Section 15 of the ACPI specification, several methods are mentioned as
-possible mechanisms for conveying memory resource information to the kernel.
-For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
-GetMemoryMap() boot service is the only mechanism that will be used.
-
-
-ACPI Platform Error Interfaces (APEI)
--------------------------------------
-The APEI tables supported are described above.
-
-APEI requires the equivalent of an SCI and an NMI on ARMv8. The SCI is used
-to notify the OSPM of errors that have occurred but can be corrected and the
-system can continue correct operation, even if possibly degraded. The NMI is
-used to indicate fatal errors that cannot be corrected, and require immediate
-attention.
-
-Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
-these slightly differently. The SCI is handled as a high priority interrupt;
-given that these are corrected (or correctable) errors being reported, this
-is sufficient. The NMI is emulated as the highest priority interrupt
-possible. This implies some caution must be used since there could be
-interrupts at higher privilege levels or even interrupts at the same priority
-as the emulated NMI. In Linux, this should not be the case but one should
-be aware it could happen.
-
-
-ACPI Objects Not Supported on ARM64
------------------------------------
-While this may change in the future, there are several classes of objects
-that can be defined, but are not currently of general interest to ARM servers.
-Some of these objects have x86 equivalents, and may actually make sense in ARM
-servers. However, there is either no hardware available at present, or there
-may not even be a non-ARM implementation yet. Hence, they are not currently
-supported.
-
-The following classes of objects are not supported:
-
- -- Section 9.2: ambient light sensor devices
-
- -- Section 9.3: battery devices
-
- -- Section 9.4: lids (e.g., laptop lids)
-
- -- Section 9.8.2: IDE controllers
-
- -- Section 9.9: floppy controllers
-
- -- Section 9.10: GPE block devices
-
- -- Section 9.15: PC/AT RTC/CMOS devices
-
- -- Section 9.16: user presence detection devices
-
- -- Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
-
- -- Section 9.18: time and alarm devices (see 9.15)
-
- -- Section 10: power source and power meter devices
-
- -- Section 11: thermal management
-
- -- Section 12: embedded controllers interface
-
- -- Section 13: SMBus interfaces
-
-
-This also means that there is no support for the following objects:
-
-Name Section Name Section
----- ------------ ---- ------------
-_ALC 9.3.4 _FDM 9.10.3
-_ALI 9.3.2 _FIX 6.2.7
-_ALP 9.3.6 _GAI 10.4.5
-_ALR 9.3.5 _GHL 10.4.7
-_ALT 9.3.3 _GTM 9.9.2.1.1
-_BCT 10.2.2.10 _LID 9.5.1
-_BDN 6.5.3 _PAI 10.4.4
-_BIF 10.2.2.1 _PCL 10.3.2
-_BIX 10.2.2.1 _PIF 10.3.3
-_BLT 9.2.3 _PMC 10.4.1
-_BMA 10.2.2.4 _PMD 10.4.8
-_BMC 10.2.2.12 _PMM 10.4.3
-_BMD 10.2.2.11 _PRL 10.3.4
-_BMS 10.2.2.5 _PSR 10.3.1
-_BST 10.2.2.6 _PTP 10.4.2
-_BTH 10.2.2.7 _SBS 10.1.3
-_BTM 10.2.2.9 _SHL 10.4.6
-_BTP 10.2.2.8 _STM 9.9.2.1.1
-_DCK 6.5.2 _UPD 9.16.1
-_EC 12.12 _UPP 9.16.2
-_FDE 9.10.1 _WPC 10.5.2
-_FDI 9.10.2 _WPP 10.5.3
-
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt
deleted file mode 100644
index 1a74a041a443..000000000000
--- a/Documentation/arm64/arm-acpi.txt
+++ /dev/null
@@ -1,519 +0,0 @@
-ACPI on ARMv8 Servers
----------------------
-ACPI can be used for ARMv8 general purpose servers designed to follow
-the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
-Base Boot Requirements) [1] specifications. Please note that the SBBR
-can be retrieved simply by visiting [1], but the SBSA is currently only
-available to those with an ARM login due to ARM IP licensing concerns.
-
-The ARMv8 kernel implements the reduced hardware model of ACPI version
-5.1 or later. Links to the specification and all external documents
-it refers to are managed by the UEFI Forum. The specification is
-available at http://www.uefi.org/specifications and documents referenced
-by the specification can be found via http://www.uefi.org/acpi.
-
-If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
-or cannot be described using the mechanisms defined in the required ACPI
-specifications, then ACPI may not be a good fit for the hardware.
-
-While the documents mentioned above set out the requirements for building
-industry-standard ARMv8 servers, they also apply to more than one operating
-system. The purpose of this document is to describe the interaction between
-ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
-ACPI and what ACPI can expect of Linux.
-
-
-Why ACPI on ARM?
-----------------
-Before examining the details of the interface between ACPI and Linux, it is
-useful to understand why ACPI is being used. Several technologies already
-exist in Linux for describing non-enumerable hardware, after all. In this
-section we summarize a blog post [2] from Grant Likely that outlines the
-reasoning behind ACPI on ARMv8 servers. Actually, we snitch a good portion
-of the summary text almost directly, to be honest.
-
-The short form of the rationale for ACPI on ARM is:
-
--- ACPI’s byte code (AML) allows the platform to encode hardware behavior,
- while DT explicitly does not support this. For hardware vendors, being
- able to encode behavior is a key tool used in supporting operating
- system releases on new hardware.
-
--- ACPI’s OSPM defines a power management model that constrains what the
- platform is allowed to do into a specific model, while still providing
- flexibility in hardware design.
-
--- In the enterprise server environment, ACPI has established bindings (such
- as for RAS) which are currently used in production systems. DT does not.
- Such bindings could be defined in DT at some point, but doing so means ARM
- and x86 would end up using completely different code paths in both firmware
- and the kernel.
-
--- Choosing a single interface to describe the abstraction between a platform
- and an OS is important. Hardware vendors would not be required to implement
- both DT and ACPI if they want to support multiple operating systems. And,
- agreeing on a single interface instead of being fragmented into per OS
- interfaces makes for better interoperability overall.
-
--- The new ACPI governance process works well and Linux is now at the same
- table as hardware vendors and other OS vendors. In fact, there is no
- longer any reason to feel that ACPI only belongs to Windows or that
- Linux is in any way secondary to Microsoft in this arena. The move of
- ACPI governance into the UEFI forum has significantly opened up the
- specification development process, and currently, a large portion of the
- changes being made to ACPI are being driven by Linux.
-
-Key to the use of ACPI is the support model. For servers in general, the
-responsibility for hardware behaviour cannot solely be the domain of the
-kernel, but rather must be split between the platform and the kernel, in
-order to allow for orderly change over time. ACPI frees the OS from needing
-to understand all the minute details of the hardware so that the OS doesn’t
-need to be ported to each and every device individually. It allows the
-hardware vendors to take responsibility for power management behaviour without
-depending on an OS release cycle which is not under their control.
-
-ACPI is also important because hardware and OS vendors have already worked
-out the mechanisms for supporting a general purpose computing ecosystem. The
-infrastructure is in place, the bindings are in place, and the processes are
-in place. DT does exactly what Linux needs it to when working with vertically
-integrated devices, but there are no good processes for supporting what the
-server vendors need. Linux could potentially get there with DT, but doing so
-really just duplicates something that already works. ACPI already does what
-the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
-vendors would still end up providing two completely separate firmware
-interfaces -- one for Linux and one for Windows.
-
-
-Kernel Compatibility
---------------------
-One of the primary motivations for ACPI is standardization, and using that
-to provide backward compatibility for Linux kernels. In the server market,
-software and hardware are often used for long periods. ACPI allows the
-kernel and firmware to agree on a consistent abstraction that can be
-maintained over time, even as hardware or software change. As long as the
-abstraction is supported, systems can be updated without necessarily having
-to replace the kernel.
-
-When a Linux driver or subsystem is first implemented using ACPI, it by
-definition ends up requiring a specific version of the ACPI specification
--- it's baseline. ACPI firmware must continue to work, even though it may
-not be optimal, with the earliest kernel version that first provides support
-for that baseline version of ACPI. There may be a need for additional drivers,
-but adding new functionality (e.g., CPU power management) should not break
-older kernel versions. Further, ACPI firmware must also work with the most
-recent version of the kernel.
-
-
-Relationship with Device Tree
------------------------------
-ACPI support in drivers and subsystems for ARMv8 should never be mutually
-exclusive with DT support at compile time.
-
-At boot time the kernel will only use one description method depending on
-parameters passed from the boot loader (including kernel bootargs).
-
-Regardless of whether DT or ACPI is used, the kernel must always be capable
-of booting with either scheme (in kernels with both schemes enabled at compile
-time).
-
-
-Booting using ACPI tables
--------------------------
-The only defined method for passing ACPI tables to the kernel on ARMv8
-is via the UEFI system configuration table. Just so it is explicit, this
-means that ACPI is only supported on platforms that boot via UEFI.
-
-When an ARMv8 system boots, it can either have DT information, ACPI tables,
-or in some very unusual cases, both. If no command line parameters are used,
-the kernel will try to use DT for device enumeration; if there is no DT
-present, the kernel will try to use ACPI tables, but only if they are present.
-In neither is available, the kernel will not boot. If acpi=force is used
-on the command line, the kernel will attempt to use ACPI tables first, but
-fall back to DT if there are no ACPI tables present. The basic idea is that
-the kernel will not fail to boot unless it absolutely has no other choice.
-
-Processing of ACPI tables may be disabled by passing acpi=off on the kernel
-command line; this is the default behavior.
-
-In order for the kernel to load and use ACPI tables, the UEFI implementation
-MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
-the ACPI signature "RSD PTR "). If this pointer is incorrect and acpi=force
-is used, the kernel will disable ACPI and try to use DT to boot instead; the
-kernel has, in effect, determined that ACPI tables are not present at that
-point.
-
-If the pointer to the RSDP table is correct, the table will be mapped into
-the kernel by the ACPI core, using the address provided by UEFI.
-
-The ACPI core will then locate and map in all other ACPI tables provided by
-using the addresses in the RSDP table to find the XSDT (eXtended System
-Description Table). The XSDT in turn provides the addresses to all other
-ACPI tables provided by the system firmware; the ACPI core will then traverse
-this table and map in the tables listed.
-
-The ACPI core will ignore any provided RSDT (Root System Description Table).
-RSDTs have been deprecated and are ignored on arm64 since they only allow
-for 32-bit addresses.
-
-Further, the ACPI core will only use the 64-bit address fields in the FADT
-(Fixed ACPI Description Table). Any 32-bit address fields in the FADT will
-be ignored on arm64.
-
-Hardware reduced mode (see Section 4.1 of the ACPI 6.1 specification) will
-be enforced by the ACPI core on arm64. Doing so allows the ACPI core to
-run less complex code since it no longer has to provide support for legacy
-hardware from other architectures. Any fields that are not to be used for
-hardware reduced mode must be set to zero.
-
-For the ACPI core to operate properly, and in turn provide the information
-the kernel needs to configure devices, it expects to find the following
-tables (all section numbers refer to the ACPI 6.1 specification):
-
- -- RSDP (Root System Description Pointer), section 5.2.5
-
- -- XSDT (eXtended System Description Table), section 5.2.8
-
- -- FADT (Fixed ACPI Description Table), section 5.2.9
-
- -- DSDT (Differentiated System Description Table), section
- 5.2.11.1
-
- -- MADT (Multiple APIC Description Table), section 5.2.12
-
- -- GTDT (Generic Timer Description Table), section 5.2.24
-
- -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
- Table), section 5.2.6, specifically Table 5-31.
-
- -- If booting without a console=<device> kernel parameter is
- supported, the SPCR (Serial Port Console Redirection table),
- section 5.2.6, specifically Table 5-31.
-
- -- If necessary to describe the I/O topology, SMMUs and GIC ITSs,
- the IORT (Input Output Remapping Table, section 5.2.6, specifically
- Table 5-31).
-
- -- If NUMA is supported, the SRAT (System Resource Affinity Table)
- and SLIT (System Locality distance Information Table), sections
- 5.2.16 and 5.2.17, respectively.
-
-If the above tables are not all present, the kernel may or may not be
-able to boot properly since it may not be able to configure all of the
-devices available. This list of tables is not meant to be all inclusive;
-in some environments other tables may be needed (e.g., any of the APEI
-tables from section 18) to support specific functionality.
-
-
-ACPI Detection
---------------
-Drivers should determine their probe() type by checking for a null
-value for ACPI_HANDLE, or checking .of_node, or other information in
-the device structure. This is detailed further in the "Driver
-Recommendations" section.
-
-In non-driver code, if the presence of ACPI needs to be detected at
-run time, then check the value of acpi_disabled. If CONFIG_ACPI is not
-set, acpi_disabled will always be 1.
-
-
-Device Enumeration
-------------------
-Device descriptions in ACPI should use standard recognized ACPI interfaces.
-These may contain less information than is typically provided via a Device
-Tree description for the same device. This is also one of the reasons that
-ACPI can be useful -- the driver takes into account that it may have less
-detailed information about the device and uses sensible defaults instead.
-If done properly in the driver, the hardware can change and improve over
-time without the driver having to change at all.
-
-Clocks provide an excellent example. In DT, clocks need to be specified
-and the drivers need to take them into account. In ACPI, the assumption
-is that UEFI will leave the device in a reasonable default state, including
-any clock settings. If for some reason the driver needs to change a clock
-value, this can be done in an ACPI method; all the driver needs to do is
-invoke the method and not concern itself with what the method needs to do
-to change the clock. Changing the hardware can then take place over time
-by changing what the ACPI method does, and not the driver.
-
-In DT, the parameters needed by the driver to set up clocks as in the example
-above are known as "bindings"; in ACPI, these are known as "Device Properties"
-and provided to a driver via the _DSD object.
-
-ACPI tables are described with a formal language called ASL, the ACPI
-Source Language (section 19 of the specification). This means that there
-are always multiple ways to describe the same thing -- including device
-properties. For example, device properties could use an ASL construct
-that looks like this: Name(KEY0, "value0"). An ACPI device driver would
-then retrieve the value of the property by evaluating the KEY0 object.
-However, using Name() this way has multiple problems: (1) ACPI limits
-names ("KEY0") to four characters unlike DT; (2) there is no industry
-wide registry that maintains a list of names, minimizing re-use; (3)
-there is also no registry for the definition of property values ("value0"),
-again making re-use difficult; and (4) how does one maintain backward
-compatibility as new hardware comes out? The _DSD method was created
-to solve precisely these sorts of problems; Linux drivers should ALWAYS
-use the _DSD method for device properties and nothing else.
-
-The _DSM object (ACPI Section 9.14.1) could also be used for conveying
-device properties to a driver. Linux drivers should only expect it to
-be used if _DSD cannot represent the data required, and there is no way
-to create a new UUID for the _DSD object. Note that there is even less
-regulation of the use of _DSM than there is of _DSD. Drivers that depend
-on the contents of _DSM objects will be more difficult to maintain over
-time because of this; as of this writing, the use of _DSM is the cause
-of quite a few firmware problems and is not recommended.
-
-Drivers should look for device properties in the _DSD object ONLY; the _DSD
-object is described in the ACPI specification section 6.2.5, but this only
-describes how to define the structure of an object returned via _DSD, and
-how specific data structures are defined by specific UUIDs. Linux should
-only use the _DSD Device Properties UUID [5]:
-
- -- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
-
- -- http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-
-The UEFI Forum provides a mechanism for registering device properties [4]
-so that they may be used across all operating systems supporting ACPI.
-Device properties that have not been registered with the UEFI Forum should
-not be used.
-
-Before creating new device properties, check to be sure that they have not
-been defined before and either registered in the Linux kernel documentation
-as DT bindings, or the UEFI Forum as device properties. While we do not want
-to simply move all DT bindings into ACPI device properties, we can learn from
-what has been previously defined.
-
-If it is necessary to define a new device property, or if it makes sense to
-synthesize the definition of a binding so it can be used in any firmware,
-both DT bindings and ACPI device properties for device drivers have review
-processes. Use them both. When the driver itself is submitted for review
-to the Linux mailing lists, the device property definitions needed must be
-submitted at the same time. A driver that supports ACPI and uses device
-properties will not be considered complete without their definitions. Once
-the device property has been accepted by the Linux community, it must be
-registered with the UEFI Forum [4], which will review it again for consistency
-within the registry. This may require iteration. The UEFI Forum, though,
-will always be the canonical site for device property definitions.
-
-It may make sense to provide notice to the UEFI Forum that there is the
-intent to register a previously unused device property name as a means of
-reserving the name for later use. Other operating system vendors will
-also be submitting registration requests and this may help smooth the
-process.
-
-Once registration and review have been completed, the kernel provides an
-interface for looking up device properties in a manner independent of
-whether DT or ACPI is being used. This API should be used [6]; it can
-eliminate some duplication of code paths in driver probing functions and
-discourage divergence between DT bindings and ACPI device properties.
-
-
-Programmable Power Control Resources
-------------------------------------
-Programmable power control resources include such resources as voltage/current
-providers (regulators) and clock sources.
-
-With ACPI, the kernel clock and regulator framework is not expected to be used
-at all.
-
-The kernel assumes that power control of these resources is represented with
-Power Resource Objects (ACPI section 7.1). The ACPI core will then handle
-correctly enabling and disabling resources as they are needed. In order to
-get that to work, ACPI assumes each device has defined D-states and that these
-can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
-in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
-turning a device full off.
-
-There are two options for using those Power Resources. They can:
-
- -- be managed in a _PSx method which gets called on entry to power
- state Dx.
-
- -- be declared separately as power resources with their own _ON and _OFF
- methods. They are then tied back to D-states for a particular device
- via _PRx which specifies which power resources a device needs to be on
- while in Dx. Kernel then tracks number of devices using a power resource
- and calls _ON/_OFF as needed.
-
-The kernel ACPI code will also assume that the _PSx methods follow the normal
-ACPI rules for such methods:
-
- -- If either _PS0 or _PS3 is implemented, then the other method must also
- be implemented.
-
- -- If a device requires usage or setup of a power resource when on, the ASL
- should organize that it is allocated/enabled using the _PS0 method.
-
- -- Resources allocated or enabled in the _PS0 method should be disabled
- or de-allocated in the _PS3 method.
-
- -- Firmware will leave the resources in a reasonable state before handing
- over control to the kernel.
-
-Such code in _PSx methods will of course be very platform specific. But,
-this allows the driver to abstract out the interface for operating the device
-and avoid having to read special non-standard values from ACPI tables. Further,
-abstracting the use of these resources allows the hardware to change over time
-without requiring updates to the driver.
-
-
-Clocks
-------
-ACPI makes the assumption that clocks are initialized by the firmware --
-UEFI, in this case -- to some working value before control is handed over
-to the kernel. This has implications for devices such as UARTs, or SoC-driven
-LCD displays, for example.
-
-When the kernel boots, the clocks are assumed to be set to reasonable
-working values. If for some reason the frequency needs to change -- e.g.,
-throttling for power management -- the device driver should expect that
-process to be abstracted out into some ACPI method that can be invoked
-(please see the ACPI specification for further recommendations on standard
-methods to be expected). The only exceptions to this are CPU clocks where
-CPPC provides a much richer interface than ACPI methods. If the clocks
-are not set, there is no direct way for Linux to control them.
-
-If an SoC vendor wants to provide fine-grained control of the system clocks,
-they could do so by providing ACPI methods that could be invoked by Linux
-drivers. However, this is NOT recommended and Linux drivers should NOT use
-such methods, even if they are provided. Such methods are not currently
-standardized in the ACPI specification, and using them could tie a kernel
-to a very specific SoC, or tie an SoC to a very specific version of the
-kernel, both of which we are trying to avoid.
-
-
-Driver Recommendations
-----------------------
-DO NOT remove any DT handling when adding ACPI support for a driver. The
-same device may be used on many different systems.
-
-DO try to structure the driver so that it is data-driven. That is, set up
-a struct containing internal per-device state based on defaults and whatever
-else must be discovered by the driver probe function. Then, have the rest
-of the driver operate off of the contents of that struct. Doing so should
-allow most divergence between ACPI and DT functionality to be kept local to
-the probe function instead of being scattered throughout the driver. For
-example:
-
-static int device_probe_dt(struct platform_device *pdev)
-{
- /* DT specific functionality */
- ...
-}
-
-static int device_probe_acpi(struct platform_device *pdev)
-{
- /* ACPI specific functionality */
- ...
-}
-
-static int device_probe(struct platform_device *pdev)
-{
- ...
- struct device_node node = pdev->dev.of_node;
- ...
-
- if (node)
- ret = device_probe_dt(pdev);
- else if (ACPI_HANDLE(&pdev->dev))
- ret = device_probe_acpi(pdev);
- else
- /* other initialization */
- ...
- /* Continue with any generic probe operations */
- ...
-}
-
-DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
-clear the different names the driver is probed for, both from DT and from
-ACPI:
-
-static struct of_device_id virtio_mmio_match[] = {
- { .compatible = "virtio,mmio", },
- { }
-};
-MODULE_DEVICE_TABLE(of, virtio_mmio_match);
-
-static const struct acpi_device_id virtio_mmio_acpi_match[] = {
- { "LNRO0005", },
- { }
-};
-MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
-
-
-ASWG
-----
-The ACPI specification changes regularly. During the year 2014, for instance,
-version 5.1 was released and version 6.0 substantially completed, with most of
-the changes being driven by ARM-specific requirements. Proposed changes are
-presented and discussed in the ASWG (ACPI Specification Working Group) which
-is a part of the UEFI Forum. The current version of the ACPI specification
-is 6.1 release in January 2016.
-
-Participation in this group is open to all UEFI members. Please see
-http://www.uefi.org/workinggroup for details on group membership.
-
-It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
-as closely as possible, and to only implement functionality that complies with
-the released standards from UEFI ASWG. As a practical matter, there will be
-vendors that provide bad ACPI tables or violate the standards in some way.
-If this is because of errors, quirks and fix-ups may be necessary, but will
-be avoided if possible. If there are features missing from ACPI that preclude
-it from being used on a platform, ECRs (Engineering Change Requests) should be
-submitted to ASWG and go through the normal approval process; for those that
-are not UEFI members, many other members of the Linux community are and would
-likely be willing to assist in submitting ECRs.
-
-
-Linux Code
-----------
-Individual items specific to Linux on ARM, contained in the the Linux
-source code, are in the list that follows:
-
-ACPI_OS_NAME This macro defines the string to be returned when
- an ACPI method invokes the _OS method. On ARM64
- systems, this macro will be "Linux" by default.
- The command line parameter acpi_os=<string>
- can be used to set it to some other value. The
- default value for other architectures is "Microsoft
- Windows NT", for example.
-
-ACPI Objects
-------------
-Detailed expectations for ACPI tables and object are listed in the file
-Documentation/arm64/acpi_object_usage.txt.
-
-
-References
-----------
-[0] http://silver.arm.com -- document ARM-DEN-0029, or newer
- "Server Base System Architecture", version 2.3, dated 27 Mar 2014
-
-[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
- Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
- Software on ARM Platforms", dated 16 Aug 2014
-
-[2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
- Linaro Ltd., written by Grant Likely.
-
-[3] AMD ACPI for Seattle platform documentation:
- http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
-
-[4] http://www.uefi.org/acpi -- please see the link for the "ACPI _DSD Device
- Property Registry Instructions"
-
-[5] http://www.uefi.org/acpi -- please see the link for the "_DSD (Device
- Specific Data) Implementation Guide"
-
-[6] Kernel code for the unified device property interface can be found in
- include/linux/property.h and drivers/base/property.c.
-
-
-Authors
--------
-Al Stone <al.stone@linaro.org>
-Graeme Gregory <graeme.gregory@linaro.org>
-Hanjun Guo <hanjun.guo@linaro.org>
-
-Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
deleted file mode 100644
index 8d0df62c3fe0..000000000000
--- a/Documentation/arm64/booting.txt
+++ /dev/null
@@ -1,253 +0,0 @@
- Booting AArch64 Linux
- =====================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date : 07 September 2012
-
-This document is based on the ARM booting document by Russell King and
-is relevant to all public releases of the AArch64 Linux kernel.
-
-The AArch64 exception model is made up of a number of exception levels
-(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
-counterpart. EL2 is the hypervisor level and exists only in non-secure
-mode. EL3 is the highest priority level and exists only in secure mode.
-
-For the purposes of this document, we will use the term `boot loader'
-simply to define all software that executes on the CPU(s) before control
-is passed to the Linux kernel. This may include secure monitor and
-hypervisor code, or it may just be a handful of instructions for
-preparing a minimal boot environment.
-
-Essentially, the boot loader should provide (as a minimum) the
-following:
-
-1. Setup and initialise the RAM
-2. Setup the device tree
-3. Decompress the kernel image
-4. Call the kernel image
-
-
-1. Setup and initialise RAM
----------------------------
-
-Requirement: MANDATORY
-
-The boot loader is expected to find and initialise all RAM that the
-kernel will use for volatile data storage in the system. It performs
-this in a machine dependent manner. (It may use internal algorithms
-to automatically locate and size all RAM, or it may use knowledge of
-the RAM in the machine, or any other method the boot loader designer
-sees fit.)
-
-
-2. Setup the device tree
--------------------------
-
-Requirement: MANDATORY
-
-The device tree blob (dtb) must be placed on an 8-byte boundary and must
-not exceed 2 megabytes in size. Since the dtb will be mapped cacheable
-using blocks of up to 2 megabytes in size, it must not be placed within
-any 2M region which must be mapped with any specific attributes.
-
-NOTE: versions prior to v4.2 also require that the DTB be placed within
-the 512 MB region starting at text_offset bytes below the kernel Image.
-
-3. Decompress the kernel image
-------------------------------
-
-Requirement: OPTIONAL
-
-The AArch64 kernel does not currently provide a decompressor and
-therefore requires decompression (gzip etc.) to be performed by the boot
-loader if a compressed Image target (e.g. Image.gz) is used. For
-bootloaders that do not implement this requirement, the uncompressed
-Image target is available instead.
-
-
-4. Call the kernel image
-------------------------
-
-Requirement: MANDATORY
-
-The decompressed kernel image contains a 64-byte header as follows:
-
- u32 code0; /* Executable code */
- u32 code1; /* Executable code */
- u64 text_offset; /* Image load offset, little endian */
- u64 image_size; /* Effective Image size, little endian */
- u64 flags; /* kernel flags, little endian */
- u64 res2 = 0; /* reserved */
- u64 res3 = 0; /* reserved */
- u64 res4 = 0; /* reserved */
- u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */
- u32 res5; /* reserved (used for PE COFF offset) */
-
-
-Header notes:
-
-- As of v3.17, all fields are little endian unless stated otherwise.
-
-- code0/code1 are responsible for branching to stext.
-
-- when booting through EFI, code0/code1 are initially skipped.
- res5 is an offset to the PE header and the PE header has the EFI
- entry point (efi_stub_entry). When the stub has done its work, it
- jumps to code0 to resume the normal boot process.
-
-- Prior to v3.17, the endianness of text_offset was not specified. In
- these cases image_size is zero and text_offset is 0x80000 in the
- endianness of the kernel. Where image_size is non-zero image_size is
- little-endian and must be respected. Where image_size is zero,
- text_offset can be assumed to be 0x80000.
-
-- The flags field (introduced in v3.17) is a little-endian 64-bit field
- composed as follows:
- Bit 0: Kernel endianness. 1 if BE, 0 if LE.
- Bit 1-2: Kernel Page size.
- 0 - Unspecified.
- 1 - 4K
- 2 - 16K
- 3 - 64K
- Bit 3: Kernel physical placement
- 0 - 2MB aligned base should be as close as possible
- to the base of DRAM, since memory below it is not
- accessible via the linear mapping
- 1 - 2MB aligned base may be anywhere in physical
- memory
- Bits 4-63: Reserved.
-
-- When image_size is zero, a bootloader should attempt to keep as much
- memory as possible free for use by the kernel immediately after the
- end of the kernel image. The amount of space required will vary
- depending on selected features, and is effectively unbound.
-
-The Image must be placed text_offset bytes from a 2MB aligned base
-address anywhere in usable system RAM and called there. The region
-between the 2 MB aligned base address and the start of the image has no
-special significance to the kernel, and may be used for other purposes.
-At least image_size bytes from the start of the image must be free for
-use by the kernel.
-NOTE: versions prior to v4.6 cannot make use of memory below the
-physical offset of the Image so it is recommended that the Image be
-placed as close as possible to the start of system RAM.
-
-If an initrd/initramfs is passed to the kernel at boot, it must reside
-entirely within a 1 GB aligned physical memory window of up to 32 GB in
-size that fully covers the kernel Image as well.
-
-Any memory described to the kernel (even that below the start of the
-image) which is not marked as reserved from the kernel (e.g., with a
-memreserve region in the device tree) will be considered as available to
-the kernel.
-
-Before jumping into the kernel, the following conditions must be met:
-
-- Quiesce all DMA capable devices so that memory does not get
- corrupted by bogus network packets or disk data. This will save
- you many hours of debug.
-
-- Primary CPU general-purpose register settings
- x0 = physical address of device tree blob (dtb) in system RAM.
- x1 = 0 (reserved for future use)
- x2 = 0 (reserved for future use)
- x3 = 0 (reserved for future use)
-
-- CPU mode
- All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
- IRQ and FIQ).
- The CPU must be in either EL2 (RECOMMENDED in order to have access to
- the virtualisation extensions) or non-secure EL1.
-
-- Caches, MMUs
- The MMU must be off.
- Instruction cache may be on or off.
- The address range corresponding to the loaded kernel image must be
- cleaned to the PoC. In the presence of a system cache or other
- coherent masters with caches enabled, this will typically require
- cache maintenance by VA rather than set/way operations.
- System caches which respect the architected cache maintenance by VA
- operations must be configured and may be enabled.
- System caches which do not respect architected cache maintenance by VA
- operations (not recommended) must be configured and disabled.
-
-- Architected timers
- CNTFRQ must be programmed with the timer frequency and CNTVOFF must
- be programmed with a consistent value on all CPUs. If entering the
- kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
- available.
-
-- Coherency
- All CPUs to be booted by the kernel must be part of the same coherency
- domain on entry to the kernel. This may require IMPLEMENTATION DEFINED
- initialisation to enable the receiving of maintenance operations on
- each CPU.
-
-- System registers
- All writable architected system registers at the exception level where
- the kernel image will be entered must be initialised by software at a
- higher exception level to prevent execution in an UNKNOWN state.
-
- For systems with a GICv3 interrupt controller to be used in v3 mode:
- - If EL3 is present:
- ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
- ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
- - If the kernel is entered at EL1:
- ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
- ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
- - The DT or ACPI tables must describe a GICv3 interrupt controller.
-
- For systems with a GICv3 interrupt controller to be used in
- compatibility (v2) mode:
- - If EL3 is present:
- ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b0.
- - If the kernel is entered at EL1:
- ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b0.
- - The DT or ACPI tables must describe a GICv2 interrupt controller.
-
-The requirements described above for CPU mode, caches, MMUs, architected
-timers, coherency and system registers apply to all CPUs. All CPUs must
-enter the kernel in the same exception level.
-
-The boot loader is expected to enter the kernel on each CPU in the
-following manner:
-
-- The primary CPU must jump directly to the first instruction of the
- kernel image. The device tree blob passed by this CPU must contain
- an 'enable-method' property for each cpu node. The supported
- enable-methods are described below.
-
- It is expected that the bootloader will generate these device tree
- properties and insert them into the blob prior to kernel entry.
-
-- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
- property in their cpu node. This property identifies a
- naturally-aligned 64-bit zero-initalised memory location.
-
- These CPUs should spin outside of the kernel in a reserved area of
- memory (communicated to the kernel by a /memreserve/ region in the
- device tree) polling their cpu-release-addr location, which must be
- contained in the reserved region. A wfe instruction may be inserted
- to reduce the overhead of the busy-loop and a sev will be issued by
- the primary CPU. When a read of the location pointed to by the
- cpu-release-addr returns a non-zero value, the CPU must jump to this
- value. The value will be written as a single 64-bit little-endian
- value, so CPUs must convert the read value to their native endianness
- before jumping to it.
-
-- CPUs with a "psci" enable method should remain outside of
- the kernel (i.e. outside of the regions of memory described to the
- kernel in the memory node, or in a reserved area of memory described
- to the kernel by a /memreserve/ region in the device tree). The
- kernel will issue CPU_ON calls as described in ARM document number ARM
- DEN 0022A ("Power State Coordination Interface System Software on ARM
- processors") to bring CPUs into the kernel.
-
- The device tree should contain a 'psci' node, as described in
- Documentation/devicetree/bindings/arm/psci.txt.
-
-- Secondary CPU general-purpose register settings
- x0 = 0 (reserved for future use)
- x1 = 0 (reserved for future use)
- x2 = 0 (reserved for future use)
- x3 = 0 (reserved for future use)
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
deleted file mode 100644
index 7964f03846b1..000000000000
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ /dev/null
@@ -1,272 +0,0 @@
- ARM64 CPU Feature Registers
- ===========================
-
-Author: Suzuki K Poulose <suzuki.poulose@arm.com>
-
-
-This file describes the ABI for exporting the AArch64 CPU ID/feature
-registers to userspace. The availability of this ABI is advertised
-via the HWCAP_CPUID in HWCAPs.
-
-1. Motivation
----------------
-
-The ARM architecture defines a set of feature registers, which describe
-the capabilities of the CPU/system. Access to these system registers is
-restricted from EL0 and there is no reliable way for an application to
-extract this information to make better decisions at runtime. There is
-limited information available to the application via HWCAPs, however
-there are some issues with their usage.
-
- a) Any change to the HWCAPs requires an update to userspace (e.g libc)
- to detect the new changes, which can take a long time to appear in
- distributions. Exposing the registers allows applications to get the
- information without requiring updates to the toolchains.
-
- b) Access to HWCAPs is sometimes limited (e.g prior to libc, or
- when ld is initialised at startup time).
-
- c) HWCAPs cannot represent non-boolean information effectively. The
- architecture defines a canonical format for representing features
- in the ID registers; this is well defined and is capable of
- representing all valid architecture variations.
-
-
-2. Requirements
------------------
-
- a) Safety :
- Applications should be able to use the information provided by the
- infrastructure to run safely across the system. This has greater
- implications on a system with heterogeneous CPUs.
- The infrastructure exports a value that is safe across all the
- available CPU on the system.
-
- e.g, If at least one CPU doesn't implement CRC32 instructions, while
- others do, we should report that the CRC32 is not implemented.
- Otherwise an application could crash when scheduled on the CPU
- which doesn't support CRC32.
-
- b) Security :
- Applications should only be able to receive information that is
- relevant to the normal operation in userspace. Hence, some of the
- fields are masked out(i.e, made invisible) and their values are set to
- indicate the feature is 'not supported'. See Section 4 for the list
- of visible features. Also, the kernel may manipulate the fields
- based on what it supports. e.g, If FP is not supported by the
- kernel, the values could indicate that the FP is not available
- (even when the CPU provides it).
-
- c) Implementation Defined Features
- The infrastructure doesn't expose any register which is
- IMPLEMENTATION DEFINED as per ARMv8-A Architecture.
-
- d) CPU Identification :
- MIDR_EL1 is exposed to help identify the processor. On a
- heterogeneous system, this could be racy (just like getcpu()). The
- process could be migrated to another CPU by the time it uses the
- register value, unless the CPU affinity is set. Hence, there is no
- guarantee that the value reflects the processor that it is
- currently executing on. The REVIDR is not exposed due to this
- constraint, as REVIDR makes sense only in conjunction with the
- MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
- at:
-
- /sys/devices/system/cpu/cpu$ID/regs/identification/
- \- midr
- \- revidr
-
-3. Implementation
---------------------
-
-The infrastructure is built on the emulation of the 'MRS' instruction.
-Accessing a restricted system register from an application generates an
-exception and ends up in SIGILL being delivered to the process.
-The infrastructure hooks into the exception handler and emulates the
-operation if the source belongs to the supported system register space.
-
-The infrastructure emulates only the following system register space:
- Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
-
-(See Table C5-6 'System instruction encodings for non-Debug System
-register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
-registers).
-
-The following rules are applied to the value returned by the
-infrastructure:
-
- a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0.
- b) The value of a reserved field is populated with the reserved
- value as defined by the architecture.
- c) The value of a 'visible' field holds the system wide safe value
- for the particular feature (except for MIDR_EL1, see section 4).
- d) All other fields (i.e, invisible fields) are set to indicate
- the feature is missing (as defined by the architecture).
-
-4. List of registers with visible features
--------------------------------------------
-
- 1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
- x--------------------------------------------------x
- | Name | bits | visible |
- |--------------------------------------------------|
- | TS | [55-52] | y |
- |--------------------------------------------------|
- | FHM | [51-48] | y |
- |--------------------------------------------------|
- | DP | [47-44] | y |
- |--------------------------------------------------|
- | SM4 | [43-40] | y |
- |--------------------------------------------------|
- | SM3 | [39-36] | y |
- |--------------------------------------------------|
- | SHA3 | [35-32] | y |
- |--------------------------------------------------|
- | RDM | [31-28] | y |
- |--------------------------------------------------|
- | ATOMICS | [23-20] | y |
- |--------------------------------------------------|
- | CRC32 | [19-16] | y |
- |--------------------------------------------------|
- | SHA2 | [15-12] | y |
- |--------------------------------------------------|
- | SHA1 | [11-8] | y |
- |--------------------------------------------------|
- | AES | [7-4] | y |
- x--------------------------------------------------x
-
-
- 2) ID_AA64PFR0_EL1 - Processor Feature Register 0
- x--------------------------------------------------x
- | Name | bits | visible |
- |--------------------------------------------------|
- | DIT | [51-48] | y |
- |--------------------------------------------------|
- | SVE | [35-32] | y |
- |--------------------------------------------------|
- | GIC | [27-24] | n |
- |--------------------------------------------------|
- | AdvSIMD | [23-20] | y |
- |--------------------------------------------------|
- | FP | [19-16] | y |
- |--------------------------------------------------|
- | EL3 | [15-12] | n |
- |--------------------------------------------------|
- | EL2 | [11-8] | n |
- |--------------------------------------------------|
- | EL1 | [7-4] | n |
- |--------------------------------------------------|
- | EL0 | [3-0] | n |
- x--------------------------------------------------x
-
-
- 3) MIDR_EL1 - Main ID Register
- x--------------------------------------------------x
- | Name | bits | visible |
- |--------------------------------------------------|
- | Implementer | [31-24] | y |
- |--------------------------------------------------|
- | Variant | [23-20] | y |
- |--------------------------------------------------|
- | Architecture | [19-16] | y |
- |--------------------------------------------------|
- | PartNum | [15-4] | y |
- |--------------------------------------------------|
- | Revision | [3-0] | y |
- x--------------------------------------------------x
-
- NOTE: The 'visible' fields of MIDR_EL1 will contain the value
- as available on the CPU where it is fetched and is not a system
- wide safe value.
-
- 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
-
- x--------------------------------------------------x
- | Name | bits | visible |
- |--------------------------------------------------|
- | LRCPC | [23-20] | y |
- |--------------------------------------------------|
- | FCMA | [19-16] | y |
- |--------------------------------------------------|
- | JSCVT | [15-12] | y |
- |--------------------------------------------------|
- | DPB | [3-0] | y |
- x--------------------------------------------------x
-
- 5) ID_AA64MMFR2_EL1 - Memory model feature register 2
-
- x--------------------------------------------------x
- | Name | bits | visible |
- |--------------------------------------------------|
- | AT | [35-32] | y |
- x--------------------------------------------------x
-
-Appendix I: Example
----------------------------
-
-/*
- * Sample program to demonstrate the MRS emulation ABI.
- *
- * Copyright (C) 2015-2016, ARM Ltd
- *
- * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <asm/hwcap.h>
-#include <stdio.h>
-#include <sys/auxv.h>
-
-#define get_cpu_ftr(id) ({ \
- unsigned long __val; \
- asm("mrs %0, "#id : "=r" (__val)); \
- printf("%-20s: 0x%016lx\n", #id, __val); \
- })
-
-int main(void)
-{
-
- if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
- fputs("CPUID registers unavailable\n", stderr);
- return 1;
- }
-
- get_cpu_ftr(ID_AA64ISAR0_EL1);
- get_cpu_ftr(ID_AA64ISAR1_EL1);
- get_cpu_ftr(ID_AA64MMFR0_EL1);
- get_cpu_ftr(ID_AA64MMFR1_EL1);
- get_cpu_ftr(ID_AA64PFR0_EL1);
- get_cpu_ftr(ID_AA64PFR1_EL1);
- get_cpu_ftr(ID_AA64DFR0_EL1);
- get_cpu_ftr(ID_AA64DFR1_EL1);
-
- get_cpu_ftr(MIDR_EL1);
- get_cpu_ftr(MPIDR_EL1);
- get_cpu_ftr(REVIDR_EL1);
-
-#if 0
- /* Unexposed register access causes SIGILL */
- get_cpu_ftr(ID_MMFR0_EL1);
-#endif
-
- return 0;
-}
-
-
-
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
deleted file mode 100644
index ea819ae024dd..000000000000
--- a/Documentation/arm64/elf_hwcaps.txt
+++ /dev/null
@@ -1,184 +0,0 @@
-ARM64 ELF hwcaps
-================
-
-This document describes the usage and semantics of the arm64 ELF hwcaps.
-
-
-1. Introduction
----------------
-
-Some hardware or software features are only available on some CPU
-implementations, and/or with certain kernel configurations, but have no
-architected discovery mechanism available to userspace code at EL0. The
-kernel exposes the presence of these features to userspace through a set
-of flags called hwcaps, exposed in the auxilliary vector.
-
-Userspace software can test for features by acquiring the AT_HWCAP entry
-of the auxilliary vector, and testing whether the relevant flags are
-set, e.g.
-
-bool floating_point_is_present(void)
-{
- unsigned long hwcaps = getauxval(AT_HWCAP);
- if (hwcaps & HWCAP_FP)
- return true;
-
- return false;
-}
-
-Where software relies on a feature described by a hwcap, it should check
-the relevant hwcap flag to verify that the feature is present before
-attempting to make use of the feature.
-
-Features cannot be probed reliably through other means. When a feature
-is not available, attempting to use it may result in unpredictable
-behaviour, and is not guaranteed to result in any reliable indication
-that the feature is unavailable, such as a SIGILL.
-
-
-2. Interpretation of hwcaps
----------------------------
-
-The majority of hwcaps are intended to indicate the presence of features
-which are described by architected ID registers inaccessible to
-userspace code at EL0. These hwcaps are defined in terms of ID register
-fields, and should be interpreted with reference to the definition of
-these fields in the ARM Architecture Reference Manual (ARM ARM).
-
-Such hwcaps are described below in the form:
-
- Functionality implied by idreg.field == val.
-
-Such hwcaps indicate the availability of functionality that the ARM ARM
-defines as being present when idreg.field has value val, but do not
-indicate that idreg.field is precisely equal to val, nor do they
-indicate the absence of functionality implied by other values of
-idreg.field.
-
-Other hwcaps may indicate the presence of features which cannot be
-described by ID registers alone. These may be described without
-reference to ID registers, and may refer to other documentation.
-
-
-3. The hwcaps exposed in AT_HWCAP
----------------------------------
-
-HWCAP_FP
-
- Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
-
-HWCAP_ASIMD
-
- Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
-
-HWCAP_EVTSTRM
-
- The generic timer is configured to generate events at a frequency of
- approximately 100KHz.
-
-HWCAP_AES
-
- Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
-
-HWCAP_PMULL
-
- Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010.
-
-HWCAP_SHA1
-
- Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
-
-HWCAP_SHA2
-
- Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
-
-HWCAP_CRC32
-
- Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
-
-HWCAP_ATOMICS
-
- Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
-
-HWCAP_FPHP
-
- Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
-
-HWCAP_ASIMDHP
-
- Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
-
-HWCAP_CPUID
-
- EL0 access to certain ID registers is available, to the extent
- described by Documentation/arm64/cpu-feature-registers.txt.
-
- These ID registers may imply the availability of features.
-
-HWCAP_ASIMDRDM
-
- Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
-
-HWCAP_JSCVT
-
- Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
-
-HWCAP_FCMA
-
- Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
-
-HWCAP_LRCPC
-
- Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
-
-HWCAP_DCPOP
-
- Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
-
-HWCAP_SHA3
-
- Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
-
-HWCAP_SM3
-
- Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
-
-HWCAP_SM4
-
- Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
-
-HWCAP_ASIMDDP
-
- Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
-
-HWCAP_SHA512
-
- Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010.
-
-HWCAP_SVE
-
- Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
-
-HWCAP_ASIMDFHM
-
- Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
-
-HWCAP_DIT
-
- Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
-
-HWCAP_USCAT
-
- Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
-
-HWCAP_ILRCPC
-
- Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
-
-HWCAP_FLAGM
-
- Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
-
-HWCAP_SSBS
-
- Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
diff --git a/Documentation/arm64/hugetlbpage.txt b/Documentation/arm64/hugetlbpage.txt
deleted file mode 100644
index cfae87dc653b..000000000000
--- a/Documentation/arm64/hugetlbpage.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-HugeTLBpage on ARM64
-====================
-
-Hugepage relies on making efficient use of TLBs to improve performance of
-address translations. The benefit depends on both -
-
- - the size of hugepages
- - size of entries supported by the TLBs
-
-The ARM64 port supports two flavours of hugepages.
-
-1) Block mappings at the pud/pmd level
---------------------------------------
-
-These are regular hugepages where a pmd or a pud page table entry points to a
-block of memory. Regardless of the supported size of entries in TLB, block
-mappings reduce the depth of page table walk needed to translate hugepage
-addresses.
-
-2) Using the Contiguous bit
----------------------------
-
-The architecture provides a contiguous bit in the translation table entries
-(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a
-contiguous set of entries that can be cached in a single TLB entry.
-
-The contiguous bit is used in Linux to increase the mapping size at the pmd and
-pte (last) level. The number of supported contiguous entries varies by page size
-and level of the page table.
-
-
-The following hugepage sizes are supported -
-
- CONT PTE PMD CONT PMD PUD
- -------- --- -------- ---
- 4K: 64K 2M 32M 1G
- 16K: 2M 32M 1G
- 64K: 2M 512M 16G
diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt
deleted file mode 100644
index 01bf3d9fac85..000000000000
--- a/Documentation/arm64/legacy_instructions.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-The arm64 port of the Linux kernel provides infrastructure to support
-emulation of instructions which have been deprecated, or obsoleted in
-the architecture. The infrastructure code uses undefined instruction
-hooks to support emulation. Where available it also allows turning on
-the instruction execution in hardware.
-
-The emulation mode can be controlled by writing to sysctl nodes
-(/proc/sys/abi). The following explains the different execution
-behaviours and the corresponding values of the sysctl nodes -
-
-* Undef
- Value: 0
- Generates undefined instruction abort. Default for instructions that
- have been obsoleted in the architecture, e.g., SWP
-
-* Emulate
- Value: 1
- Uses software emulation. To aid migration of software, in this mode
- usage of emulated instruction is traced as well as rate limited
- warnings are issued. This is the default for deprecated
- instructions, .e.g., CP15 barriers
-
-* Hardware Execution
- Value: 2
- Although marked as deprecated, some implementations may support the
- enabling/disabling of hardware support for the execution of these
- instructions. Using hardware execution generally provides better
- performance, but at the loss of ability to gather runtime statistics
- about the use of the deprecated instructions.
-
-The default mode depends on the status of the instruction in the
-architecture. Deprecated instructions should default to emulation
-while obsolete instructions must be undefined by default.
-
-Note: Instruction emulation may not be possible in all cases. See
-individual instruction notes for further information.
-
-Supported legacy instructions
------------------------------
-* SWP{B}
-Node: /proc/sys/abi/swp
-Status: Obsolete
-Default: Undef (0)
-
-* CP15 Barriers
-Node: /proc/sys/abi/cp15_barrier
-Status: Deprecated
-Default: Emulate (1)
-
-* SETEND
-Node: /proc/sys/abi/setend
-Status: Deprecated
-Default: Emulate (1)*
-Note: All the cpus on the system must have mixed endian support at EL0
-for this feature to be enabled. If a new CPU - which doesn't support mixed
-endian - is hotplugged in after this feature has been enabled, there could
-be unexpected results in the application.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
deleted file mode 100644
index c5dab30d3389..000000000000
--- a/Documentation/arm64/memory.txt
+++ /dev/null
@@ -1,97 +0,0 @@
- Memory Layout on AArch64 Linux
- ==============================
-
-Author: Catalin Marinas <catalin.marinas@arm.com>
-
-This document describes the virtual memory layout used by the AArch64
-Linux kernel. The architecture allows up to 4 levels of translation
-tables with a 4KB page size and up to 3 levels with a 64KB page size.
-
-AArch64 Linux uses either 3 levels or 4 levels of translation tables
-with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit
-(256TB) virtual addresses, respectively, for both user and kernel. With
-64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB)
-virtual address, are used but the memory layout is the same.
-
-User addresses have bits 63:48 set to 0 while the kernel addresses have
-the same bits set to 1. TTBRx selection is given by bit 63 of the
-virtual address. The swapper_pg_dir contains only kernel (global)
-mappings while the user pgd contains only user (non-global) mappings.
-The swapper_pg_dir address is written to TTBR1 and never written to
-TTBR0.
-
-
-AArch64 Linux memory layout with 4KB pages + 3 levels:
-
-Start End Size Use
------------------------------------------------------------------------
-0000000000000000 0000007fffffffff 512GB user
-ffffff8000000000 ffffffffffffffff 512GB kernel
-
-
-AArch64 Linux memory layout with 4KB pages + 4 levels:
-
-Start End Size Use
------------------------------------------------------------------------
-0000000000000000 0000ffffffffffff 256TB user
-ffff000000000000 ffffffffffffffff 256TB kernel
-
-
-AArch64 Linux memory layout with 64KB pages + 2 levels:
-
-Start End Size Use
------------------------------------------------------------------------
-0000000000000000 000003ffffffffff 4TB user
-fffffc0000000000 ffffffffffffffff 4TB kernel
-
-
-AArch64 Linux memory layout with 64KB pages + 3 levels:
-
-Start End Size Use
------------------------------------------------------------------------
-0000000000000000 0000ffffffffffff 256TB user
-ffff000000000000 ffffffffffffffff 256TB kernel
-
-
-For details of the virtual kernel memory layout please see the kernel
-booting log.
-
-
-Translation table lookup with 4KB pages:
-
-+--------+--------+--------+--------+--------+--------+--------+--------+
-|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
-+--------+--------+--------+--------+--------+--------+--------+--------+
- | | | | | |
- | | | | | v
- | | | | | [11:0] in-page offset
- | | | | +-> [20:12] L3 index
- | | | +-----------> [29:21] L2 index
- | | +---------------------> [38:30] L1 index
- | +-------------------------------> [47:39] L0 index
- +-------------------------------------------------> [63] TTBR0/1
-
-
-Translation table lookup with 64KB pages:
-
-+--------+--------+--------+--------+--------+--------+--------+--------+
-|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
-+--------+--------+--------+--------+--------+--------+--------+--------+
- | | | | |
- | | | | v
- | | | | [15:0] in-page offset
- | | | +----------> [28:16] L3 index
- | | +--------------------------> [41:29] L2 index
- | +-------------------------------> [47:42] L1 index
- +-------------------------------------------------> [63] TTBR0/1
-
-
-When using KVM without the Virtualization Host Extensions, the
-hypervisor maps kernel pages in EL2 at a fixed (and potentially
-random) offset from the linear mapping. See the kern_hyp_va macro and
-kvm_update_va_mask function for more details. MMIO devices such as
-GICv2 gets mapped next to the HYP idmap page, as do vectors when
-ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
-
-When using KVM with the Virtualization Host Extensions, no additional
-mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
deleted file mode 100644
index 76ccded8b74c..000000000000
--- a/Documentation/arm64/silicon-errata.txt
+++ /dev/null
@@ -1,80 +0,0 @@
- Silicon Errata and Software Workarounds
- =======================================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date : 27 November 2015
-
-It is an unfortunate fact of life that hardware is often produced with
-so-called "errata", which can cause it to deviate from the architecture
-under specific circumstances. For hardware produced by ARM, these
-errata are broadly classified into the following categories:
-
- Category A: A critical error without a viable workaround.
- Category B: A significant or critical error with an acceptable
- workaround.
- Category C: A minor error that is not expected to occur under normal
- operation.
-
-For more information, consult one of the "Software Developers Errata
-Notice" documents available on infocenter.arm.com (registration
-required).
-
-As far as Linux is concerned, Category B errata may require some special
-treatment in the operating system. For example, avoiding a particular
-sequence of code, or configuring the processor in a particular way. A
-less common situation may require similar actions in order to declassify
-a Category A erratum into a Category C erratum. These are collectively
-known as "software workarounds" and are only required in the minority of
-cases (e.g. those cases that both require a non-secure workaround *and*
-can be triggered by Linux).
-
-For software workarounds that may adversely impact systems unaffected by
-the erratum in question, a Kconfig entry is added under "Kernel
-Features" -> "ARM errata workarounds via the alternatives framework".
-These are enabled by default and patched in at runtime when an affected
-CPU is detected. For less-intrusive workarounds, a Kconfig option is not
-available and the code is structured (preferably with a comment) in such
-a way that the erratum will not be hit.
-
-This approach can make it slightly onerous to determine exactly which
-errata are worked around in an arbitrary kernel source tree, so this
-file acts as a registry of software workarounds in the Linux Kernel and
-will be updated when new workarounds are committed and backported to
-stable kernels.
-
-| Implementor | Component | Erratum ID | Kconfig |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
-| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
-| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 |
-| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 |
-| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 |
-| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 |
-| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
-| ARM | Cortex-A57 | #852523 | N/A |
-| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
-| ARM | Cortex-A72 | #853709 | N/A |
-| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
-| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 |
-| ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 |
-| ARM | MMU-500 | #841119,#826419 | N/A |
-| | | | |
-| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
-| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
-| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
-| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
-| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 |
-| Cavium | ThunderX SMMUv2 | #27704 | N/A |
-| Cavium | ThunderX2 SMMUv3| #74 | N/A |
-| Cavium | ThunderX2 SMMUv3| #126 | N/A |
-| | | | |
-| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
-| | | | |
-| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 |
-| Hisilicon | Hip0{6,7} | #161010701 | N/A |
-| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
-| | | | |
-| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
-| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
-| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
-| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
deleted file mode 100644
index 7169a0ec41d8..000000000000
--- a/Documentation/arm64/sve.txt
+++ /dev/null
@@ -1,508 +0,0 @@
- Scalable Vector Extension support for AArch64 Linux
- ===================================================
-
-Author: Dave Martin <Dave.Martin@arm.com>
-Date: 4 August 2017
-
-This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
-
-This is an outline of the most important features and issues only and not
-intended to be exhaustive.
-
-This document does not aim to describe the SVE architecture or programmer's
-model. To aid understanding, a minimal description of relevant programmer's
-model features for SVE is included in Appendix A.
-
-
-1. General
------------
-
-* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
- tracked per-thread.
-
-* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
- AT_HWCAP entry. Presence of this flag implies the presence of the SVE
- instructions and registers, and the Linux-specific system interfaces
- described in this document. SVE is reported in /proc/cpuinfo as "sve".
-
-* Support for the execution of SVE instructions in userspace can also be
- detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
- instruction, and checking that the value of the SVE field is nonzero. [3]
-
- It does not guarantee the presence of the system interfaces described in the
- following sections: software that needs to verify that those interfaces are
- present must check for HWCAP_SVE instead.
-
-* Debuggers should restrict themselves to interacting with the target via the
- NT_ARM_SVE regset. The recommended way of detecting support for this regset
- is to connect to a target process first and then attempt a
- ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
-
-
-2. Vector length terminology
------------------------------
-
-The size of an SVE vector (Z) register is referred to as the "vector length".
-
-To avoid confusion about the units used to express vector length, the kernel
-adopts the following conventions:
-
-* Vector length (VL) = size of a Z-register in bytes
-
-* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
-
-(So, VL = 16 * VQ.)
-
-The VQ convention is used where the underlying granularity is important, such
-as in data structure definitions. In most other situations, the VL convention
-is used. This is consistent with the meaning of the "VL" pseudo-register in
-the SVE instruction set architecture.
-
-
-3. System call behaviour
--------------------------
-
-* On syscall, V0..V31 are preserved (as without SVE). Thus, bits [127:0] of
- Z0..Z31 are preserved. All other bits of Z0..Z31, and all of P0..P15 and FFR
- become unspecified on return from a syscall.
-
-* The SVE registers are not used to pass arguments to or receive results from
- any syscall.
-
-* In practice the affected registers/bits will be preserved or will be replaced
- with zeros on return from a syscall, but userspace should not make
- assumptions about this. The kernel behaviour may vary on a case-by-case
- basis.
-
-* All other SVE state of a thread, including the currently configured vector
- length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
- length (if any), is preserved across all syscalls, subject to the specific
- exceptions for execve() described in section 6.
-
- In particular, on return from a fork() or clone(), the parent and new child
- process or thread share identical SVE configuration, matching that of the
- parent before the call.
-
-
-4. Signal handling
--------------------
-
-* A new signal frame record sve_context encodes the SVE registers on signal
- delivery. [1]
-
-* This record is supplementary to fpsimd_context. The FPSR and FPCR registers
- are only present in fpsimd_context. For convenience, the content of V0..V31
- is duplicated between sve_context and fpsimd_context.
-
-* The signal frame record for SVE always contains basic metadata, in particular
- the thread's vector length (in sve_context.vl).
-
-* The SVE registers may or may not be included in the record, depending on
- whether the registers are live for the thread. The registers are present if
- and only if:
- sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
-
-* If the registers are present, the remainder of the record has a vl-dependent
- size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
- the members.
-
-* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
- space is allocated on the stack, an extra_context record is written in
- __reserved[] referencing this space. sve_context is then written in the
- extra space. Refer to [1] for further details about this mechanism.
-
-
-5. Signal return
------------------
-
-When returning from a signal handler:
-
-* If there is no sve_context record in the signal frame, or if the record is
- present but contains no register data as desribed in the previous section,
- then the SVE registers/bits become non-live and take unspecified values.
-
-* If sve_context is present in the signal frame and contains full register
- data, the SVE registers become live and are populated with the specified
- data. However, for backward compatibility reasons, bits [127:0] of Z0..Z31
- are always restored from the corresponding members of fpsimd_context.vregs[]
- and not from sve_context. The remaining bits are restored from sve_context.
-
-* Inclusion of fpsimd_context in the signal frame remains mandatory,
- irrespective of whether sve_context is present or not.
-
-* The vector length cannot be changed via signal return. If sve_context.vl in
- the signal frame does not match the current vector length, the signal return
- attempt is treated as illegal, resulting in a forced SIGSEGV.
-
-
-6. prctl extensions
---------------------
-
-Some new prctl() calls are added to allow programs to manage the SVE vector
-length:
-
-prctl(PR_SVE_SET_VL, unsigned long arg)
-
- Sets the vector length of the calling thread and related flags, where
- arg == vl | flags. Other threads of the calling process are unaffected.
-
- vl is the desired vector length, where sve_vl_valid(vl) must be true.
-
- flags:
-
- PR_SVE_SET_VL_INHERIT
-
- Inherit the current vector length across execve(). Otherwise, the
- vector length is reset to the system default at execve(). (See
- Section 9.)
-
- PR_SVE_SET_VL_ONEXEC
-
- Defer the requested vector length change until the next execve()
- performed by this thread.
-
- The effect is equivalent to implicit exceution of the following
- call immediately after the next execve() (if any) by the thread:
-
- prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
-
- This allows launching of a new program with a different vector
- length, while avoiding runtime side effects in the caller.
-
-
- Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
- immediately.
-
-
- Return value: a nonnegative on success, or a negative value on error:
- EINVAL: SVE not supported, invalid vector length requested, or
- invalid flags.
-
-
- On success:
-
- * Either the calling thread's vector length or the deferred vector length
- to be applied at the next execve() by the thread (dependent on whether
- PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
- supported by the system that is less than or equal to vl. If vl ==
- SVE_VL_MAX, the value set will be the largest value supported by the
- system.
-
- * Any previously outstanding deferred vector length change in the calling
- thread is cancelled.
-
- * The returned value describes the resulting configuration, encoded as for
- PR_SVE_GET_VL. The vector length reported in this value is the new
- current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
- present in arg; otherwise, the reported vector length is the deferred
- vector length that will be applied at the next execve() by the calling
- thread.
-
- * Changing the vector length causes all of P0..P15, FFR and all bits of
- Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
- unspecified. Calling PR_SVE_SET_VL with vl equal to the thread's current
- vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
- flag, does not constitute a change to the vector length for this purpose.
-
-
-prctl(PR_SVE_GET_VL)
-
- Gets the vector length of the calling thread.
-
- The following flag may be OR-ed into the result:
-
- PR_SVE_SET_VL_INHERIT
-
- Vector length will be inherited across execve().
-
- There is no way to determine whether there is an outstanding deferred
- vector length change (which would only normally be the case between a
- fork() or vfork() and the corresponding execve() in typical use).
-
- To extract the vector length from the result, and it with
- PR_SVE_VL_LEN_MASK.
-
- Return value: a nonnegative value on success, or a negative value on error:
- EINVAL: SVE not supported.
-
-
-7. ptrace extensions
----------------------
-
-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
- PTRACE_SETREGSET.
-
- Refer to [2] for definitions.
-
-The regset data starts with struct user_sve_header, containing:
-
- size
-
- Size of the complete regset, in bytes.
- This depends on vl and possibly on other things in the future.
-
- If a call to PTRACE_GETREGSET requests less data than the value of
- size, the caller can allocate a larger buffer and retry in order to
- read the complete regset.
-
- max_size
-
- Maximum size in bytes that the regset can grow to for the target
- thread. The regset won't grow bigger than this even if the target
- thread changes its vector length etc.
-
- vl
-
- Target thread's current vector length, in bytes.
-
- max_vl
-
- Maximum possible vector length for the target thread.
-
- flags
-
- either
-
- SVE_PT_REGS_FPSIMD
-
- SVE registers are not live (GETREGSET) or are to be made
- non-live (SETREGSET).
-
- The payload is of type struct user_fpsimd_state, with the same
- meaning as for NT_PRFPREG, starting at offset
- SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
-
- Extra data might be appended in the future: the size of the
- payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
-
- vq should be obtained using sve_vq_from_vl(vl).
-
- or
-
- SVE_PT_REGS_SVE
-
- SVE registers are live (GETREGSET) or are to be made live
- (SETREGSET).
-
- The payload contains the SVE register data, starting at offset
- SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
- size SVE_PT_SVE_SIZE(vq, flags);
-
- ... OR-ed with zero or more of the following flags, which have the same
- meaning and behaviour as the corresponding PR_SET_VL_* flags:
-
- SVE_PT_VL_INHERIT
-
- SVE_PT_VL_ONEXEC (SETREGSET only).
-
-* The effects of changing the vector length and/or flags are equivalent to
- those documented for PR_SVE_SET_VL.
-
- The caller must make a further GETREGSET call if it needs to know what VL is
- actually set by SETREGSET, unless is it known in advance that the requested
- VL is supported.
-
-* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
- the header fields. The SVE_PT_SVE_*() macros are provided to facilitate
- access to the members.
-
-* In either case, for SETREGSET it is permissible to omit the payload, in which
- case only the vector length and flags are changed (along with any
- consequences of those changes).
-
-* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
- requested VL is not supported, the effect will be the same as if the
- payload were omitted, except that an EIO error is reported. No
- attempt is made to translate the payload data to the correct layout
- for the vector length actually set. The thread's FPSIMD state is
- preserved, but the remaining bits of the SVE registers become
- unspecified. It is up to the caller to translate the payload layout
- for the actual VL and retry.
-
-* The effect of writing a partial, incomplete payload is unspecified.
-
-
-8. ELF coredump extensions
----------------------------
-
-* A NT_ARM_SVE note will be added to each coredump for each thread of the
- dumped process. The contents will be equivalent to the data that would have
- been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
- when the coredump was generated.
-
-
-9. System runtime configuration
---------------------------------
-
-* To mitigate the ABI impact of expansion of the signal frame, a policy
- mechanism is provided for administrators, distro maintainers and developers
- to set the default vector length for userspace processes:
-
-/proc/sys/abi/sve_default_vector_length
-
- Writing the text representation of an integer to this file sets the system
- default vector length to the specified value, unless the value is greater
- than the maximum vector length supported by the system in which case the
- default vector length is set to that maximum.
-
- The result can be determined by reopening the file and reading its
- contents.
-
- At boot, the default vector length is initially set to 64 or the maximum
- supported vector length, whichever is smaller. This determines the initial
- vector length of the init process (PID 1).
-
- Reading this file returns the current system default vector length.
-
-* At every execve() call, the new vector length of the new process is set to
- the system default vector length, unless
-
- * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
- calling thread, or
-
- * a deferred vector length change is pending, established via the
- PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
-
-* Modifying the system default vector length does not affect the vector length
- of any existing process or thread that does not make an execve() call.
-
-
-Appendix A. SVE programmer's model (informative)
-=================================================
-
-This section provides a minimal description of the additions made by SVE to the
-ARMv8-A programmer's model that are relevant to this document.
-
-Note: This section is for information only and not intended to be complete or
-to replace any architectural specification.
-
-A.1. Registers
----------------
-
-In A64 state, SVE adds the following:
-
-* 32 8VL-bit vector registers Z0..Z31
- For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
-
- A register write using a Vn register name zeros all bits of the corresponding
- Zn except for bits [127:0].
-
-* 16 VL-bit predicate registers P0..P15
-
-* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
-
-* a VL "pseudo-register" that determines the size of each vector register
-
- The SVE instruction set architecture provides no way to write VL directly.
- Instead, it can be modified only by EL1 and above, by writing appropriate
- system registers.
-
-* The value of VL can be configured at runtime by EL1 and above:
- 16 <= VL <= VLmax, where VL must be a multiple of 16.
-
-* The maximum vector length is determined by the hardware:
- 16 <= VLmax <= 256.
-
- (The SVE architecture specifies 256, but permits future architecture
- revisions to raise this limit.)
-
-* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
- operations in a similar way to the way in which they interact with ARMv8
- floating-point operations.
-
- 8VL-1 128 0 bit index
- +---- //// -----------------+
- Z0 | : V0 |
- : :
- Z7 | : V7 |
- Z8 | : * V8 |
- : : :
- Z15 | : *V15 |
- Z16 | : V16 |
- : :
- Z31 | : V31 |
- +---- //// -----------------+
- 31 0
- VL-1 0 +-------+
- +---- //// --+ FPSR | |
- P0 | | +-------+
- : | | *FPCR | |
- P15 | | +-------+
- +---- //// --+
- FFR | | +-----+
- +---- //// --+ VL | |
- +-----+
-
-(*) callee-save:
- This only applies to bits [63:0] of Z-/V-registers.
- FPCR contains callee-save and caller-save bits. See [4] for details.
-
-
-A.2. Procedure call standard
------------------------------
-
-The ARMv8-A base procedure call standard is extended as follows with respect to
-the additional SVE register state:
-
-* All SVE register bits that are not shared with FP/SIMD are caller-save.
-
-* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
-
- This follows from the way these bits are mapped to V8..V15, which are caller-
- save in the base procedure call standard.
-
-
-Appendix B. ARMv8-A FP/SIMD programmer's model
-===============================================
-
-Note: This section is for information only and not intended to be complete or
-to replace any architectural specification.
-
-Refer to [4] for for more information.
-
-ARMv8-A defines the following floating-point / SIMD register state:
-
-* 32 128-bit vector registers V0..V31
-* 2 32-bit status/control registers FPSR, FPCR
-
- 127 0 bit index
- +---------------+
- V0 | |
- : : :
- V7 | |
- * V8 | |
- : : : :
- *V15 | |
- V16 | |
- : : :
- V31 | |
- +---------------+
-
- 31 0
- +-------+
- FPSR | |
- +-------+
- *FPCR | |
- +-------+
-
-(*) callee-save:
- This only applies to bits [63:0] of V-registers.
- FPCR contains a mixture of callee-save and caller-save bits.
-
-
-References
-==========
-
-[1] arch/arm64/include/uapi/asm/sigcontext.h
- AArch64 Linux signal ABI definitions
-
-[2] arch/arm64/include/uapi/asm/ptrace.h
- AArch64 Linux ptrace ABI definitions
-
-[3] Documentation/arm64/cpu-feature-registers.txt
-
-[4] ARM IHI0055C
- http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
- http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
- Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
deleted file mode 100644
index a25a99e82bb1..000000000000
--- a/Documentation/arm64/tagged-pointers.txt
+++ /dev/null
@@ -1,66 +0,0 @@
- Tagged virtual addresses in AArch64 Linux
- =========================================
-
-Author: Will Deacon <will.deacon@arm.com>
-Date : 12 June 2013
-
-This document briefly describes the provision of tagged virtual
-addresses in the AArch64 translation system and their potential uses
-in AArch64 Linux.
-
-The kernel configures the translation tables so that translations made
-via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
-the virtual address ignored by the translation hardware. This frees up
-this byte for application use.
-
-
-Passing tagged addresses to the kernel
---------------------------------------
-
-All interpretation of userspace memory addresses by the kernel assumes
-an address tag of 0x00.
-
-This includes, but is not limited to, addresses found in:
-
- - pointer arguments to system calls, including pointers in structures
- passed to system calls,
-
- - the stack pointer (sp), e.g. when interpreting it to deliver a
- signal,
-
- - the frame pointer (x29) and frame records, e.g. when interpreting
- them to generate a backtrace or call graph.
-
-Using non-zero address tags in any of these locations may result in an
-error code being returned, a (fatal) signal being raised, or other modes
-of failure.
-
-For these reasons, passing non-zero address tags to the kernel via
-system calls is forbidden, and using a non-zero address tag for sp is
-strongly discouraged.
-
-Programs maintaining a frame pointer and frame records that use non-zero
-address tags may suffer impaired or inaccurate debug and profiling
-visibility.
-
-
-Preserving tags
----------------
-
-Non-zero tags are not preserved when delivering signals. This means that
-signal handlers in applications making use of tags cannot rely on the
-tag information for user virtual addresses being maintained for fields
-inside siginfo_t. One exception to this rule is for signals raised in
-response to watchpoint debug exceptions, where the tag information will
-be preserved.
-
-The architecture prevents the use of a tagged PC, so the upper byte will
-be set to a sign-extension of bit 55 on exception return.
-
-
-Other considerations
---------------------
-
-Special care should be taken when using tagged pointers, since it is
-likely that C compilers will not hazard two virtual addresses differing
-only in the upper byte.
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
index be70b32c95d9..edea4656c5c0 100644
--- a/Documentation/atomic_bitops.txt
+++ b/Documentation/atomic_bitops.txt
@@ -1,6 +1,6 @@
-
-On atomic bitops.
-
+=============
+Atomic bitops
+=============
While our bitmap_{}() functions are non-atomic, we have a number of operations
operating on single bits in a bitmap that are atomic.
@@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is:
- RMW operations that have a return value are fully ordered.
- - RMW operations that are conditional are unordered on FAILURE,
- otherwise the above rules apply. In the case of test_and_{}_bit() operations,
- if the bit in memory is unchanged by the operation then it is deemed to have
- failed.
+ - RMW operations that are conditional are fully ordered.
-Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
-clear_bit_unlock() which has RELEASE semantics.
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics,
+clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has
+ACQUIRE semantics.
Since a platform only has a single means of achieving atomic operations
the same barriers as for atomic_t are used, see atomic_t.txt.
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 913396ac5824..bee3b1bca9a7 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -56,6 +56,23 @@ Barriers:
smp_mb__{before,after}_atomic()
+TYPES (signed vs unsigned)
+-----
+
+While atomic_t, atomic_long_t and atomic64_t use int, long and s64
+respectively (for hysterical raisins), the kernel uses -fno-strict-overflow
+(which implies -fwrapv) and defines signed overflow to behave like
+2s-complement.
+
+Therefore, an explicitly unsigned variant of the atomic ops is strictly
+unnecessary and we can simply cast, there is no UB.
+
+There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for
+signed types.
+
+With this we also conform to the C/C++ _Atomic behaviour and things like
+P1236R1.
+
SEMANTICS
---------
@@ -64,23 +81,25 @@ Non-RMW ops:
The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
-smp_store_release() respectively.
+smp_store_release() respectively. Therefore, if you find yourself only using
+the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
+and are doing it wrong.
-The one detail to this is that atomic_set{}() should be observable to the RMW
-ops. That is:
+A note for the implementation of atomic_set{}() is that it must not break the
+atomicity of the RMW ops. That is:
- C atomic-set
+ C Atomic-RMW-ops-are-atomic-WRT-atomic_set
{
- atomic_set(v, 1);
+ atomic_t v = ATOMIC_INIT(1);
}
- P1(atomic_t *v)
+ P0(atomic_t *v)
{
- atomic_add_unless(v, 1, 0);
+ (void)atomic_add_unless(v, 1, 0);
}
- P2(atomic_t *v)
+ P1(atomic_t *v)
{
atomic_set(v, 0);
}
@@ -152,14 +171,14 @@ The rule of thumb:
- RMW operations that are conditional are unordered on FAILURE,
otherwise the above rules apply.
-Except of course when an operation has an explicit ordering like:
+Except of course when a successful operation has an explicit ordering like:
{}_relaxed: unordered
{}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE
{}_release: the W of the RMW (or atomic_set) is a RELEASE
Where 'unordered' is against other memory locations. Address dependencies are
-not defeated.
+not defeated. Conditional operations are still unordered on FAILURE.
Fully ordered primitives are ordered against everything prior and everything
subsequent. Therefore a fully ordered primitive is like having an smp_mb()
@@ -170,13 +189,22 @@ The barriers:
smp_mb__{before,after}_atomic()
-only apply to the RMW ops and can be used to augment/upgrade the ordering
-inherent to the used atomic op. These barriers provide a full smp_mb().
+only apply to the RMW atomic ops and can be used to augment/upgrade the
+ordering inherent to the op. These barriers act almost like a full smp_mb():
+smp_mb__before_atomic() orders all earlier accesses against the RMW op
+itself and all accesses following it, and smp_mb__after_atomic() orders all
+later accesses against the RMW op and all accesses preceding it. However,
+accesses between the smp_mb__{before,after}_atomic() and the RMW op are not
+ordered, so it is advisable to place the barrier right next to the RMW atomic
+op whenever possible.
These helper barriers exist because architectures have varying implicit
ordering on their SMP atomic primitives. For example our TSO architectures
provide full ordered atomics and these barriers are no-ops.
+NOTE: when the atomic RmW ops are fully ordered, they should also imply a
+compiler barrier.
+
Thus:
atomic_fetch_add();
@@ -195,7 +223,9 @@ Further, while something like:
atomic_dec(&X);
is a 'typical' RELEASE pattern, the barrier is strictly stronger than
-a RELEASE. Similarly for something like:
+a RELEASE because it orders preceding instructions against both the read
+and write parts of the atomic_dec(), and against all following instructions
+as well. Similarly, something like:
atomic_inc(&X);
smp_mb__after_atomic();
@@ -203,19 +233,19 @@ a RELEASE. Similarly for something like:
is an ACQUIRE pattern (though very much not typical), but again the barrier is
strictly stronger than ACQUIRE. As illustrated:
- C strong-acquire
+ C Atomic-RMW+mb__after_atomic-is-stronger-than-acquire
{
}
- P1(int *x, atomic_t *y)
+ P0(int *x, atomic_t *y)
{
r0 = READ_ONCE(*x);
smp_rmb();
r1 = atomic_read(y);
}
- P2(int *x, atomic_t *y)
+ P1(int *x, atomic_t *y)
{
atomic_inc(y);
smp_mb__after_atomic();
@@ -223,13 +253,14 @@ strictly stronger than ACQUIRE. As illustrated:
}
exists
- (r0=1 /\ r1=0)
+ (0:r0=1 /\ 0:r1=0)
This should not happen; but a hypothetical atomic_inc_acquire() --
(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
-since then:
+because it would not order the W part of the RMW against the following
+WRITE_ONCE. Thus:
- P1 P2
+ P0 P1
t = LL.acq *y (0)
t++;
@@ -240,3 +271,97 @@ since then:
SC *y, t;
is allowed.
+
+
+CMPXCHG vs TRY_CMPXCHG
+----------------------
+
+ int atomic_cmpxchg(atomic_t *ptr, int old, int new);
+ bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new);
+
+Both provide the same functionality, but try_cmpxchg() can lead to more
+compact code. The functions relate like:
+
+ bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+ {
+ int ret, old = *oldp;
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+ }
+
+and:
+
+ int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+ {
+ (void)atomic_try_cmpxchg(ptr, &old, new);
+ return old;
+ }
+
+Usage:
+
+ old = atomic_read(&v); old = atomic_read(&v);
+ for (;;) { do {
+ new = func(old); new = func(old);
+ tmp = atomic_cmpxchg(&v, old, new); } while (!atomic_try_cmpxchg(&v, &old, new));
+ if (tmp == old)
+ break;
+ old = tmp;
+ }
+
+NB. try_cmpxchg() also generates better code on some platforms (notably x86)
+where the function more closely matches the hardware instruction.
+
+
+FORWARD PROGRESS
+----------------
+
+In general strong forward progress is expected of all unconditional atomic
+operations -- those in the Arithmetic and Bitwise classes and xchg(). However
+a fair amount of code also requires forward progress from the conditional
+atomic operations.
+
+Specifically 'simple' cmpxchg() loops are expected to not starve one another
+indefinitely. However, this is not evident on LL/SC architectures, because
+while an LL/SC architecture 'can/should/must' provide forward progress
+guarantees between competing LL/SC sections, such a guarantee does not
+transfer to cmpxchg() implemented using LL/SC. Consider:
+
+ old = atomic_read(&v);
+ do {
+ new = func(old);
+ } while (!atomic_try_cmpxchg(&v, &old, new));
+
+which on LL/SC becomes something like:
+
+ old = atomic_read(&v);
+ do {
+ new = func(old);
+ } while (!({
+ volatile asm ("1: LL %[oldval], %[v]\n"
+ " CMP %[oldval], %[old]\n"
+ " BNE 2f\n"
+ " SC %[new], %[v]\n"
+ " BNE 1b\n"
+ "2:\n"
+ : [oldval] "=&r" (oldval), [v] "m" (v)
+ : [old] "r" (old), [new] "r" (new)
+ : "memory");
+ success = (oldval == old);
+ if (!success)
+ old = oldval;
+ success; }));
+
+However, even the forward branch from the failed compare can cause the LL/SC
+to fail on some architectures, let alone whatever the compiler makes of the C
+loop body. As a result there is no guarantee what so ever the cacheline
+containing @v will stay on the local CPU and progress is made.
+
+Even native CAS architectures can fail to provide forward progress for their
+primitive (See Sparc64 for an example).
+
+Such implementations are strongly encouraged to add exponential backoff loops
+to a failed CAS in order to ensure some progress. Affected architectures are
+also strongly encouraged to inspect/audit the atomic fallbacks, refcount_t and
+their locking primitives.
diff --git a/Documentation/auxdisplay/cfag12864b b/Documentation/auxdisplay/cfag12864b
deleted file mode 100644
index 12fd51b8de75..000000000000
--- a/Documentation/auxdisplay/cfag12864b
+++ /dev/null
@@ -1,105 +0,0 @@
- ===================================
- cfag12864b LCD Driver Documentation
- ===================================
-
-License: GPLv2
-Author & Maintainer: Miguel Ojeda Sandonis
-Date: 2006-10-27
-
-
-
---------
-0. INDEX
---------
-
- 1. DRIVER INFORMATION
- 2. DEVICE INFORMATION
- 3. WIRING
- 4. USERSPACE PROGRAMMING
-
-
----------------------
-1. DRIVER INFORMATION
----------------------
-
-This driver supports a cfag12864b LCD.
-
-
----------------------
-2. DEVICE INFORMATION
----------------------
-
-Manufacturer: Crystalfontz
-Device Name: Crystalfontz 12864b LCD Series
-Device Code: cfag12864b
-Webpage: http://www.crystalfontz.com
-Device Webpage: http://www.crystalfontz.com/products/12864b/
-Type: LCD (Liquid Crystal Display)
-Width: 128
-Height: 64
-Colors: 2 (B/N)
-Controller: ks0108
-Controllers: 2
-Pages: 8 each controller
-Addresses: 64 each page
-Data size: 1 byte each address
-Memory size: 2 * 8 * 64 * 1 = 1024 bytes = 1 Kbyte
-
-
----------
-3. WIRING
----------
-
-The cfag12864b LCD Series don't have official wiring.
-
-The common wiring is done to the parallel port as shown:
-
-Parallel Port cfag12864b
-
- Name Pin# Pin# Name
-
-Strobe ( 1)------------------------------(17) Enable
-Data 0 ( 2)------------------------------( 4) Data 0
-Data 1 ( 3)------------------------------( 5) Data 1
-Data 2 ( 4)------------------------------( 6) Data 2
-Data 3 ( 5)------------------------------( 7) Data 3
-Data 4 ( 6)------------------------------( 8) Data 4
-Data 5 ( 7)------------------------------( 9) Data 5
-Data 6 ( 8)------------------------------(10) Data 6
-Data 7 ( 9)------------------------------(11) Data 7
- (10) [+5v]---( 1) Vdd
- (11) [GND]---( 2) Ground
- (12) [+5v]---(14) Reset
- (13) [GND]---(15) Read / Write
- Line (14)------------------------------(13) Controller Select 1
- (15)
- Init (16)------------------------------(12) Controller Select 2
-Select (17)------------------------------(16) Data / Instruction
-Ground (18)---[GND] [+5v]---(19) LED +
-Ground (19)---[GND]
-Ground (20)---[GND] E A Values:
-Ground (21)---[GND] [GND]---[P1]---(18) Vee - R = Resistor = 22 ohm
-Ground (22)---[GND] | - P1 = Preset = 10 Kohm
-Ground (23)---[GND] ---- S ------( 3) V0 - P2 = Preset = 1 Kohm
-Ground (24)---[GND] | |
-Ground (25)---[GND] [GND]---[P2]---[R]---(20) LED -
-
-
-------------------------
-4. USERSPACE PROGRAMMING
-------------------------
-
-The cfag12864bfb describes a framebuffer device (/dev/fbX).
-
-It has a size of 1024 bytes = 1 Kbyte.
-Each bit represents one pixel. If the bit is high, the pixel will
-turn on. If the pixel is low, the pixel will turn off.
-
-You can use the framebuffer as a file: fopen, fwrite, fclose...
-Although the LCD won't get updated until the next refresh time arrives.
-
-Also, you can mmap the framebuffer: open & mmap, munmap & close...
-which is the best option for most uses.
-
-Check samples/auxdisplay/cfag12864b-example.c
-for a real working userspace complete program with usage examples.
diff --git a/Documentation/auxdisplay/ks0108 b/Documentation/auxdisplay/ks0108
deleted file mode 100644
index 8ddda0c8ceef..000000000000
--- a/Documentation/auxdisplay/ks0108
+++ /dev/null
@@ -1,55 +0,0 @@
- ==========================================
- ks0108 LCD Controller Driver Documentation
- ==========================================
-
-License: GPLv2
-Author & Maintainer: Miguel Ojeda Sandonis
-Date: 2006-10-27
-
-
-
---------
-0. INDEX
---------
-
- 1. DRIVER INFORMATION
- 2. DEVICE INFORMATION
- 3. WIRING
-
-
----------------------
-1. DRIVER INFORMATION
----------------------
-
-This driver supports the ks0108 LCD controller.
-
-
----------------------
-2. DEVICE INFORMATION
----------------------
-
-Manufacturer: Samsung
-Device Name: KS0108 LCD Controller
-Device Code: ks0108
-Webpage: -
-Device Webpage: -
-Type: LCD Controller (Liquid Crystal Display Controller)
-Width: 64
-Height: 64
-Colors: 2 (B/N)
-Pages: 8
-Addresses: 64 each page
-Data size: 1 byte each address
-Memory size: 8 * 64 * 1 = 512 bytes
-
-
----------
-3. WIRING
----------
-
-The driver supports data parallel port wiring.
-
-If you aren't building LCD related hardware, you should check
-your LCD specific wiring information in the same folder.
-
-For example, check Documentation/auxdisplay/cfag12864b.
diff --git a/Documentation/auxdisplay/lcd-panel-cgram.txt b/Documentation/auxdisplay/lcd-panel-cgram.txt
deleted file mode 100644
index 7f82c905763d..000000000000
--- a/Documentation/auxdisplay/lcd-panel-cgram.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Some LCDs allow you to define up to 8 characters, mapped to ASCII
-characters 0 to 7. The escape code to define a new character is
-'\e[LG' followed by one digit from 0 to 7, representing the character
-number, and up to 8 couples of hex digits terminated by a semi-colon
-(';'). Each couple of digits represents a line, with 1-bits for each
-illuminated pixel with LSB on the right. Lines are numbered from the
-top of the character to the bottom. On a 5x7 matrix, only the 5 lower
-bits of the 7 first bytes are used for each character. If the string
-is incomplete, only complete lines will be redefined. Here are some
-examples :
-
- printf "\e[LG0010101050D1F0C04;" => 0 = [enter]
- printf "\e[LG1040E1F0000000000;" => 1 = [up]
- printf "\e[LG2000000001F0E0400;" => 2 = [down]
- printf "\e[LG3040E1F001F0E0400;" => 3 = [up-down]
- printf "\e[LG40002060E1E0E0602;" => 4 = [left]
- printf "\e[LG500080C0E0F0E0C08;" => 5 = [right]
- printf "\e[LG60016051516141400;" => 6 = "IP"
-
- printf "\e[LG00103071F1F070301;" => big speaker
- printf "\e[LG00002061E1E060200;" => small speaker
-
-Willy
-
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
deleted file mode 100644
index 01bce243d3d7..000000000000
--- a/Documentation/backlight/lp855x-driver.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Kernel driver lp855x
-====================
-
-Backlight driver for LP855x ICs
-
-Supported chips:
- Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
- LP8557
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-
-* Brightness control
-
-Brightness can be controlled by the pwm input or the i2c command.
-The lp855x driver supports both cases.
-
-* Device attributes
-
-1) bl_ctl_mode
-Backlight control mode.
-Value : pwm based or register based
-
-2) chip_id
-The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
-
-Platform data for lp855x
-------------------------
-
-For supporting platform specific data, the lp855x platform data can be used.
-
-* name : Backlight driver name. If it is not defined, default name is set.
-* device_control : Value of DEVICE CONTROL register.
-* initial_brightness : Initial value of backlight brightness.
-* period_ns : Platform specific PWM period value. unit is nano.
- Only valid when brightness is pwm input mode.
-* size_program : Total size of lp855x_rom_data.
-* rom_data : List of new eeprom/eprom registers.
-
-example 1) lp8552 platform data : i2c register mode with new eeprom data
-
-#define EEPROM_A5_ADDR 0xA5
-#define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */
-
-static struct lp855x_rom_data lp8552_eeprom_arr[] = {
- {EEPROM_A5_ADDR, EEPROM_A5_VAL},
-};
-
-static struct lp855x_platform_data lp8552_pdata = {
- .name = "lcd-bl",
- .device_control = I2C_CONFIG(LP8552),
- .initial_brightness = INITIAL_BRT,
- .size_program = ARRAY_SIZE(lp8552_eeprom_arr),
- .rom_data = lp8552_eeprom_arr,
-};
-
-example 2) lp8556 platform data : pwm input mode with default rom data
-
-static struct lp855x_platform_data lp8556_pdata = {
- .device_control = PWM_CONFIG(LP8556),
- .initial_brightness = INITIAL_BRT,
- .period_ns = 1000000,
-};
diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
new file mode 100644
index 000000000000..a0ff0eb11e7f
--- /dev/null
+++ b/Documentation/block/bfq-iosched.rst
@@ -0,0 +1,606 @@
+==========================
+BFQ (Budget Fair Queueing)
+==========================
+
+BFQ is a proportional-share I/O scheduler, with some extra
+low-latency capabilities. In addition to cgroups support (blkio or io
+controllers), BFQ's main features are:
+
+- BFQ guarantees a high system and application responsiveness, and a
+ low latency for time-sensitive applications, such as audio or video
+ players;
+- BFQ distributes bandwidth, not just time, among processes or
+ groups (switching back to time distribution when needed to keep
+ throughput high).
+
+In its default configuration, BFQ privileges latency over
+throughput. So, when needed for achieving a lower latency, BFQ builds
+schedules that may lead to a lower throughput. If your main or only
+goal, for a given device, is to achieve the maximum-possible
+throughput at all times, then do switch off all low-latency heuristics
+for that device, by setting low_latency to 0. See Section 3 for
+details on how to configure BFQ for the desired tradeoff between
+latency and throughput, or on how to maximize throughput.
+
+As every I/O scheduler, BFQ adds some overhead to per-I/O-request
+processing. To give an idea of this overhead, the total,
+single-lock-protected, per-request processing time of BFQ---i.e., the
+sum of the execution times of the request insertion, dispatch and
+completion hooks---is, e.g., 1.9 us on an Intel Core i7-2760QM@2.40GHz
+(dated CPU for notebooks; time measured with simple code
+instrumentation, and using the throughput-sync.sh script of the S
+suite [1], in performance-profiling mode). To put this result into
+context, the total, single-lock-protected, per-request execution time
+of the lightest I/O scheduler available in blk-mq, mq-deadline, is 0.7
+us (mq-deadline is ~800 LOC, against ~10500 LOC for BFQ).
+
+Scheduling overhead further limits the maximum IOPS that a CPU can
+process (already limited by the execution of the rest of the I/O
+stack). To give an idea of the limits with BFQ, on slow or average
+CPUs, here are, first, the limits of BFQ for three different CPUs, on,
+respectively, an average laptop, an old desktop, and a cheap embedded
+system, in case full hierarchical support is enabled (i.e.,
+CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_BFQ_CGROUP_DEBUG is not
+set (Section 4-2):
+- Intel i7-4850HQ: 400 KIOPS
+- AMD A8-3850: 250 KIOPS
+- ARM CortexTM-A53 Octa-core: 80 KIOPS
+
+If CONFIG_BFQ_CGROUP_DEBUG is set (and of course full hierarchical
+support is enabled), then the sustainable throughput with BFQ
+decreases, because all blkio.bfq* statistics are created and updated
+(Section 4-2). For BFQ, this leads to the following maximum
+sustainable throughputs, on the same systems as above:
+- Intel i7-4850HQ: 310 KIOPS
+- AMD A8-3850: 200 KIOPS
+- ARM CortexTM-A53 Octa-core: 56 KIOPS
+
+BFQ works for multi-queue devices too.
+
+.. The table of contents follow. Impatients can just jump to Section 3.
+
+.. CONTENTS
+
+ 1. When may BFQ be useful?
+ 1-1 Personal systems
+ 1-2 Server systems
+ 2. How does BFQ work?
+ 3. What are BFQ's tunables and how to properly configure BFQ?
+ 4. BFQ group scheduling
+ 4-1 Service guarantees provided
+ 4-2 Interface
+
+1. When may BFQ be useful?
+==========================
+
+BFQ provides the following benefits on personal and server systems.
+
+1-1 Personal systems
+--------------------
+
+Low latency for interactive applications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Regardless of the actual background workload, BFQ guarantees that, for
+interactive tasks, the storage device is virtually as responsive as if
+it was idle. For example, even if one or more of the following
+background workloads are being executed:
+
+- one or more large files are being read, written or copied,
+- a tree of source files is being compiled,
+- one or more virtual machines are performing I/O,
+- a software update is in progress,
+- indexing daemons are scanning filesystems and updating their
+ databases,
+
+starting an application or loading a file from within an application
+takes about the same time as if the storage device was idle. As a
+comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
+applications experience high latencies, or even become unresponsive
+until the background workload terminates (also on SSDs).
+
+Low latency for soft real-time applications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Also soft real-time applications, such as audio and video
+players/streamers, enjoy a low latency and a low drop rate, regardless
+of the background I/O workload. As a consequence, these applications
+do not suffer from almost any glitch due to the background workload.
+
+Higher speed for code-development tasks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If some additional workload happens to be executed in parallel, then
+BFQ executes the I/O-related components of typical code-development
+tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
+NOOP or DEADLINE.
+
+High throughput
+^^^^^^^^^^^^^^^
+
+On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
+up to 150% higher throughput than DEADLINE and NOOP, with all the
+sequential workloads considered in our tests. With random workloads,
+and with all the workloads on flash-based devices, BFQ achieves,
+instead, about the same throughput as the other schedulers.
+
+Strong fairness, bandwidth and delay guarantees
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+BFQ distributes the device throughput, and not just the device time,
+among I/O-bound applications in proportion to their weights, with any
+workload and regardless of the device parameters. From these bandwidth
+guarantees, it is possible to compute a tight per-I/O-request delay
+guarantees by a simple formula. If not configured for strict service
+guarantees, BFQ switches to time-based resource sharing (only) for
+applications that would otherwise cause a throughput loss.
+
+1-2 Server systems
+------------------
+
+Most benefits for server systems follow from the same service
+properties as above. In particular, regardless of whether additional,
+possibly heavy workloads are being served, BFQ guarantees:
+
+* audio and video-streaming with zero or very low jitter and drop
+ rate;
+
+* fast retrieval of WEB pages and embedded objects;
+
+* real-time recording of data in live-dumping applications (e.g.,
+ packet logging);
+
+* responsiveness in local and remote access to a server.
+
+
+2. How does BFQ work?
+=====================
+
+BFQ is a proportional-share I/O scheduler, whose general structure,
+plus a lot of code, are borrowed from CFQ.
+
+- Each process doing I/O on a device is associated with a weight and a
+ `(bfq_)queue`.
+
+- BFQ grants exclusive access to the device, for a while, to one queue
+ (process) at a time, and implements this service model by
+ associating every queue with a budget, measured in number of
+ sectors.
+
+ - After a queue is granted access to the device, the budget of the
+ queue is decremented, on each request dispatch, by the size of the
+ request.
+
+ - The in-service queue is expired, i.e., its service is suspended,
+ only if one of the following events occurs: 1) the queue finishes
+ its budget, 2) the queue empties, 3) a "budget timeout" fires.
+
+ - The budget timeout prevents processes doing random I/O from
+ holding the device for too long and dramatically reducing
+ throughput.
+
+ - Actually, as in CFQ, a queue associated with a process issuing
+ sync requests may not be expired immediately when it empties. In
+ contrast, BFQ may idle the device for a short time interval,
+ giving the process the chance to go on being served if it issues
+ a new request in time. Device idling typically boosts the
+ throughput on rotational devices and on non-queueing flash-based
+ devices, if processes do synchronous and sequential I/O. In
+ addition, under BFQ, device idling is also instrumental in
+ guaranteeing the desired throughput fraction to processes
+ issuing sync requests (see the description of the slice_idle
+ tunable in this document, or [1, 2], for more details).
+
+ - With respect to idling for service guarantees, if several
+ processes are competing for the device at the same time, but
+ all processes and groups have the same weight, then BFQ
+ guarantees the expected throughput distribution without ever
+ idling the device. Throughput is thus as high as possible in
+ this common scenario.
+
+ - On flash-based storage with internal queueing of commands
+ (typically NCQ), device idling happens to be always detrimental
+ to throughput. So, with these devices, BFQ performs idling
+ only when strictly needed for service guarantees, i.e., for
+ guaranteeing low latency or fairness. In these cases, overall
+ throughput may be sub-optimal. No solution currently exists to
+ provide both strong service guarantees and optimal throughput
+ on devices with internal queueing.
+
+ - If low-latency mode is enabled (default configuration), BFQ
+ executes some special heuristics to detect interactive and soft
+ real-time applications (e.g., video or audio players/streamers),
+ and to reduce their latency. The most important action taken to
+ achieve this goal is to give to the queues associated with these
+ applications more than their fair share of the device
+ throughput. For brevity, we call it just "weight-raising" the whole
+ sets of actions taken by BFQ to privilege these queues. In
+ particular, BFQ provides a milder form of weight-raising for
+ interactive applications, and a stronger form for soft real-time
+ applications.
+
+ - BFQ automatically deactivates idling for queues born in a burst of
+ queue creations. In fact, these queues are usually associated with
+ the processes of applications and services that benefit mostly
+ from a high throughput. Examples are systemd during boot, or git
+ grep.
+
+ - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
+ performing random I/O that becomes mostly sequential if
+ merged. Differently from CFQ, BFQ achieves this goal with a more
+ reactive mechanism, called Early Queue Merge (EQM). EQM is so
+ responsive in detecting interleaved I/O (cooperating processes),
+ that it enables BFQ to achieve a high throughput, by queue
+ merging, even for queues for which CFQ needs a different
+ mechanism, preemption, to get a high throughput. As such, EQM is a
+ unified mechanism to achieve a high throughput with interleaved
+ I/O.
+
+ - Queues are scheduled according to a variant of WF2Q+, named
+ B-WF2Q+, and implemented using an augmented rb-tree to preserve an
+ O(log N) overall complexity. See [2] for more details. B-WF2Q+ is
+ also ready for hierarchical scheduling, details in Section 4.
+
+ - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
+ perfectly fair, and smooth service. In particular, B-WF2Q+
+ guarantees that each queue receives a fraction of the device
+ throughput proportional to its weight, even if the throughput
+ fluctuates, and regardless of: the device parameters, the current
+ workload and the budgets assigned to the queue.
+
+ - The last, budget-independence, property (although probably
+ counterintuitive in the first place) is definitely beneficial, for
+ the following reasons:
+
+ - First, with any proportional-share scheduler, the maximum
+ deviation with respect to an ideal service is proportional to
+ the maximum budget (slice) assigned to queues. As a consequence,
+ BFQ can keep this deviation tight, not only because of the
+ accurate service of B-WF2Q+, but also because BFQ *does not*
+ need to assign a larger budget to a queue to let the queue
+ receive a higher fraction of the device throughput.
+
+ - Second, BFQ is free to choose, for every process (queue), the
+ budget that best fits the needs of the process, or best
+ leverages the I/O pattern of the process. In particular, BFQ
+ updates queue budgets with a simple feedback-loop algorithm that
+ allows a high throughput to be achieved, while still providing
+ tight latency guarantees to time-sensitive applications. When
+ the in-service queue expires, this algorithm computes the next
+ budget of the queue so as to:
+
+ - Let large budgets be eventually assigned to the queues
+ associated with I/O-bound applications performing sequential
+ I/O: in fact, the longer these applications are served once
+ got access to the device, the higher the throughput is.
+
+ - Let small budgets be eventually assigned to the queues
+ associated with time-sensitive applications (which typically
+ perform sporadic and short I/O), because, the smaller the
+ budget assigned to a queue waiting for service is, the sooner
+ B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+
+- If several processes are competing for the device at the same time,
+ but all processes and groups have the same weight, then BFQ
+ guarantees the expected throughput distribution without ever idling
+ the device. It uses preemption instead. Throughput is then much
+ higher in this common scenario.
+
+- ioprio classes are served in strict priority order, i.e.,
+ lower-priority queues are not served as long as there are
+ higher-priority queues. Among queues in the same class, the
+ bandwidth is distributed in proportion to the weight of each
+ queue. A very thin extra bandwidth is however guaranteed to
+ the Idle class, to prevent it from starving.
+
+
+3. What are BFQ's tunables and how to properly configure BFQ?
+=============================================================
+
+Most BFQ tunables affect service guarantees (basically latency and
+fairness) and throughput. For full details on how to choose the
+desired tradeoff between service guarantees and throughput, see the
+parameters slice_idle, strict_guarantees and low_latency. For details
+on how to maximise throughput, see slice_idle, timeout_sync and
+max_budget. The other performance-related parameters have been
+inherited from, and have been preserved mostly for compatibility with
+CFQ. So far, no performance improvement has been reported after
+changing the latter parameters in BFQ.
+
+In particular, the tunables back_seek-max, back_seek_penalty,
+fifo_expire_async and fifo_expire_sync below are the same as in
+CFQ. Their description is just copied from that for CFQ. Some
+considerations in the description of slice_idle are copied from CFQ
+too.
+
+per-process ioprio and weight
+-----------------------------
+
+Unless the cgroups interface is used (see "4. BFQ group scheduling"),
+weights can be assigned to processes only indirectly, through I/O
+priorities, and according to the relation:
+weight = (IOPRIO_BE_NR - ioprio) * 10.
+
+Beware that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
+slice_idle
+----------
+
+This parameter specifies how long BFQ should idle for the next I/O
+request, when certain sync BFQ queues become empty. By default
+slice_idle is a non-zero value. Idling has a double purpose: boosting
+throughput and making sure that the desired throughput distribution is
+respected (see the description of how BFQ works, and, if needed, the
+papers referred there).
+
+As for throughput, idling can be very helpful on highly seeky media
+like single spindle SATA/SAS disks where we can cut down on overall
+number of seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues and one
+should see an overall improved throughput on faster storage devices
+like multiple SATA/SAS disks in hardware RAID configuration, as well
+as flash-based storage with internal command queueing (and
+parallelism).
+
+So depending on storage and workload, it might be useful to set
+slice_idle=0. In general for SATA/SAS disks and software RAID of
+SATA/SAS disks keeping slice_idle enabled should be useful. For any
+configurations where there are multiple spindles behind single LUN
+(Host based hardware RAID controller or for storage arrays), or with
+flash-based fast storage, setting slice_idle=0 might end up in better
+throughput and acceptable latencies.
+
+Idling is however necessary to have service guarantees enforced in
+case of differentiated weights or differentiated I/O-request lengths.
+To see why, suppose that a given BFQ queue A must get several I/O
+requests served for each request served for another queue B. Idling
+ensures that, if A makes a new I/O request slightly after becoming
+empty, then no request of B is dispatched in the middle, and thus A
+does not lose the possibility to get more than one request dispatched
+before the next request of B is dispatched. Note that idling
+guarantees the desired differentiated treatment of queues only in
+terms of I/O-request dispatches. To guarantee that the actual service
+order then corresponds to the dispatch order, the strict_guarantees
+tunable must be set too.
+
+There is an important flip side to idling: apart from the above cases
+where it is beneficial also for throughput, idling can severely impact
+throughput. One important case is random workload. Because of this
+issue, BFQ tends to avoid idling as much as possible, when it is not
+beneficial also for throughput (as detailed in Section 2). As a
+consequence of this behavior, and of further issues described for the
+strict_guarantees tunable, short-term service guarantees may be
+occasionally violated. And, in some cases, these guarantees may be
+more important than guaranteeing maximum throughput. For example, in
+video playing/streaming, a very low drop rate may be more important
+than maximum throughput. In these cases, consider setting the
+strict_guarantees parameter.
+
+slice_idle_us
+-------------
+
+Controls the same tuning parameter as slice_idle, but in microseconds.
+Either tunable can be used to set idling behavior. Afterwards, the
+other tunable will reflect the newly set value in sysfs.
+
+strict_guarantees
+-----------------
+
+If this parameter is set (default: unset), then BFQ
+
+- always performs idling when the in-service queue becomes empty;
+
+- forces the device to serve one I/O request at a time, by dispatching a
+ new request only if there is no outstanding request.
+
+In the presence of differentiated weights or I/O-request sizes, both
+the above conditions are needed to guarantee that every BFQ queue
+receives its allotted share of the bandwidth. The first condition is
+needed for the reasons explained in the description of the slice_idle
+tunable. The second condition is needed because all modern storage
+devices reorder internally-queued requests, which may trivially break
+the service guarantees enforced by the I/O scheduler.
+
+Setting strict_guarantees may evidently affect throughput.
+
+back_seek_max
+-------------
+
+This specifies, given in Kbytes, the maximum "distance" for backward seeking.
+The distance is the amount of space from the current head location to the
+sectors that are backward in terms of distance.
+
+This parameter allows the scheduler to anticipate requests in the "backward"
+direction and consider them as being the "next" if they are within this
+distance from the current head location.
+
+back_seek_penalty
+-----------------
+
+This parameter is used to compute the cost of backward seeking. If the
+backward distance of request is just 1/back_seek_penalty from a "front"
+request, then the seeking cost of two requests is considered equivalent.
+
+So scheduler will not bias toward one or the other request (otherwise scheduler
+will bias toward front request). Default value of back_seek_penalty is 2.
+
+fifo_expire_async
+-----------------
+
+This parameter is used to set the timeout of asynchronous requests. Default
+value of this is 250ms.
+
+fifo_expire_sync
+----------------
+
+This parameter is used to set the timeout of synchronous requests. Default
+value of this is 125ms. In case to favor synchronous requests over asynchronous
+one, this value should be decreased relative to fifo_expire_async.
+
+low_latency
+-----------
+
+This parameter is used to enable/disable BFQ's low latency mode. By
+default, low latency mode is enabled. If enabled, interactive and soft
+real-time applications are privileged and experience a lower latency,
+as explained in more detail in the description of how BFQ works.
+
+DISABLE this mode if you need full control on bandwidth
+distribution. In fact, if it is enabled, then BFQ automatically
+increases the bandwidth share of privileged applications, as the main
+means to guarantee a lower latency to them.
+
+In addition, as already highlighted at the beginning of this document,
+DISABLE this mode if your only goal is to achieve a high throughput.
+In fact, privileging the I/O of some application over the rest may
+entail a lower throughput. To achieve the highest-possible throughput
+on a non-rotational device, setting slice_idle to 0 may be needed too
+(at the cost of giving up any strong guarantee on fairness and low
+latency).
+
+timeout_sync
+------------
+
+Maximum amount of device time that can be given to a task (queue) once
+it has been selected for service. On devices with costly seeks,
+increasing this time usually increases maximum throughput. On the
+opposite end, increasing this time coarsens the granularity of the
+short-term bandwidth and latency guarantees, especially if the
+following parameter is set to zero.
+
+max_budget
+----------
+
+Maximum amount of service, measured in sectors, that can be provided
+to a BFQ queue once it is set in service (of course within the limits
+of the above timeout). According to what was said in the description of
+the algorithm, larger values increase the throughput in proportion to
+the percentage of sequential I/O requests issued. The price of larger
+values is that they coarsen the granularity of short-term bandwidth
+and latency guarantees.
+
+The default value is 0, which enables auto-tuning: BFQ sets max_budget
+to the maximum number of sectors that can be served during
+timeout_sync, according to the estimated peak rate.
+
+For specific devices, some users have occasionally reported to have
+reached a higher throughput by setting max_budget explicitly, i.e., by
+setting max_budget to a higher value than 0. In particular, they have
+set max_budget to higher values than those to which BFQ would have set
+it with auto-tuning. An alternative way to achieve this goal is to
+just increase the value of timeout_sync, leaving max_budget equal to 0.
+
+4. Group scheduling with BFQ
+============================
+
+BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
+blkio and io. In particular, BFQ supports weight-based proportional
+share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
+
+4-1 Service guarantees provided
+-------------------------------
+
+With BFQ, proportional share means true proportional share of the
+device bandwidth, according to group weights. For example, a group
+with weight 200 gets twice the bandwidth, and not just twice the time,
+of a group with weight 100.
+
+BFQ supports hierarchies (group trees) of any depth. Bandwidth is
+distributed among groups and processes in the expected way: for each
+group, the children of the group share the whole bandwidth of the
+group in proportion to their weights. In particular, this implies
+that, for each leaf group, every process of the group receives the
+same share of the whole group bandwidth, unless the ioprio of the
+process is modified.
+
+The resource-sharing guarantee for a group may partially or totally
+switch from bandwidth to time, if providing bandwidth guarantees to
+the group lowers the throughput too much. This switch occurs on a
+per-process basis: if a process of a leaf group causes throughput loss
+if served in such a way to receive its share of the bandwidth, then
+BFQ switches back to just time-based proportional share for that
+process.
+
+4-2 Interface
+-------------
+
+To get proportional sharing of bandwidth with BFQ for a given device,
+BFQ must of course be the active scheduler for that device.
+
+Within each group directory, the names of the files associated with
+BFQ-specific cgroup parameters and stats begin with the "bfq."
+prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
+BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
+parameter to set the weight of a group with BFQ is blkio.bfq.weight
+or io.bfq.weight.
+
+As for cgroups-v1 (blkio controller), the exact set of stat files
+created, and kept up-to-date by bfq, depends on whether
+CONFIG_BFQ_CGROUP_DEBUG is set. If it is set, then bfq creates all
+the stat files documented in
+Documentation/admin-guide/cgroup-v1/blkio-controller.rst. If, instead,
+CONFIG_BFQ_CGROUP_DEBUG is not set, then bfq creates only the files::
+
+ blkio.bfq.io_service_bytes
+ blkio.bfq.io_service_bytes_recursive
+ blkio.bfq.io_serviced
+ blkio.bfq.io_serviced_recursive
+
+The value of CONFIG_BFQ_CGROUP_DEBUG greatly influences the maximum
+throughput sustainable with bfq, because updating the blkio.bfq.*
+stats is rather costly, especially for some of the stats enabled by
+CONFIG_BFQ_CGROUP_DEBUG.
+
+Parameters
+----------
+
+For each group, the following parameters can be set:
+
+ weight
+ This specifies the default weight for the cgroup inside its parent.
+ Available values: 1..1000 (default: 100).
+
+ For cgroup v1, it is set by writing the value to `blkio.bfq.weight`.
+
+ For cgroup v2, it is set by writing the value to `io.bfq.weight`.
+ (with an optional prefix of `default` and a space).
+
+ The linear mapping between ioprio and weights, described at the beginning
+ of the tunable section, is still valid, but all weights higher than
+ IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
+ Recall that, if low-latency is set, then BFQ automatically raises the
+ weight of the queues associated with interactive and soft real-time
+ applications. Unset this tunable if you need/want to control weights.
+
+ weight_device
+ This specifies a per-device weight for the cgroup. The syntax is
+ `minor:major weight`. A weight of `0` may be used to reset to the default
+ weight.
+
+ For cgroup v1, it is set by writing the value to `blkio.bfq.weight_device`.
+
+ For cgroup v2, the file name is `io.bfq.weight`.
+
+
+[1]
+ P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ Scheduler", Proceedings of the First Workshop on Mobile System
+ Technologies (MST-2015), May 2015.
+
+ http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+
+[2]
+ P. Valente and M. Andreolini, "Improving Application
+ Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
+ the 5th Annual International Systems and Storage Conference
+ (SYSTOR '12), June 2012.
+
+ Slightly extended version:
+
+ http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-results.pdf
+
+[3]
+ https://github.com/Algodev-github/S
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
deleted file mode 100644
index 8d8d8f06cab2..000000000000
--- a/Documentation/block/bfq-iosched.txt
+++ /dev/null
@@ -1,561 +0,0 @@
-BFQ (Budget Fair Queueing)
-==========================
-
-BFQ is a proportional-share I/O scheduler, with some extra
-low-latency capabilities. In addition to cgroups support (blkio or io
-controllers), BFQ's main features are:
-- BFQ guarantees a high system and application responsiveness, and a
- low latency for time-sensitive applications, such as audio or video
- players;
-- BFQ distributes bandwidth, and not just time, among processes or
- groups (switching back to time distribution when needed to keep
- throughput high).
-
-In its default configuration, BFQ privileges latency over
-throughput. So, when needed for achieving a lower latency, BFQ builds
-schedules that may lead to a lower throughput. If your main or only
-goal, for a given device, is to achieve the maximum-possible
-throughput at all times, then do switch off all low-latency heuristics
-for that device, by setting low_latency to 0. See Section 3 for
-details on how to configure BFQ for the desired tradeoff between
-latency and throughput, or on how to maximize throughput.
-
-BFQ has a non-null overhead, which limits the maximum IOPS that a CPU
-can process for a device scheduled with BFQ. To give an idea of the
-limits on slow or average CPUs, here are, first, the limits of BFQ for
-three different CPUs, on, respectively, an average laptop, an old
-desktop, and a cheap embedded system, in case full hierarchical
-support is enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set), but
-CONFIG_DEBUG_BLK_CGROUP is not set (Section 4-2):
-- Intel i7-4850HQ: 400 KIOPS
-- AMD A8-3850: 250 KIOPS
-- ARM CortexTM-A53 Octa-core: 80 KIOPS
-
-If CONFIG_DEBUG_BLK_CGROUP is set (and of course full hierarchical
-support is enabled), then the sustainable throughput with BFQ
-decreases, because all blkio.bfq* statistics are created and updated
-(Section 4-2). For BFQ, this leads to the following maximum
-sustainable throughputs, on the same systems as above:
-- Intel i7-4850HQ: 310 KIOPS
-- AMD A8-3850: 200 KIOPS
-- ARM CortexTM-A53 Octa-core: 56 KIOPS
-
-BFQ works for multi-queue devices too.
-
-The table of contents follow. Impatients can just jump to Section 3.
-
-CONTENTS
-
-1. When may BFQ be useful?
- 1-1 Personal systems
- 1-2 Server systems
-2. How does BFQ work?
-3. What are BFQ's tunables and how to properly configure BFQ?
-4. BFQ group scheduling
- 4-1 Service guarantees provided
- 4-2 Interface
-
-1. When may BFQ be useful?
-==========================
-
-BFQ provides the following benefits on personal and server systems.
-
-1-1 Personal systems
---------------------
-
-Low latency for interactive applications
-
-Regardless of the actual background workload, BFQ guarantees that, for
-interactive tasks, the storage device is virtually as responsive as if
-it was idle. For example, even if one or more of the following
-background workloads are being executed:
-- one or more large files are being read, written or copied,
-- a tree of source files is being compiled,
-- one or more virtual machines are performing I/O,
-- a software update is in progress,
-- indexing daemons are scanning filesystems and updating their
- databases,
-starting an application or loading a file from within an application
-takes about the same time as if the storage device was idle. As a
-comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
-applications experience high latencies, or even become unresponsive
-until the background workload terminates (also on SSDs).
-
-Low latency for soft real-time applications
-
-Also soft real-time applications, such as audio and video
-players/streamers, enjoy a low latency and a low drop rate, regardless
-of the background I/O workload. As a consequence, these applications
-do not suffer from almost any glitch due to the background workload.
-
-Higher speed for code-development tasks
-
-If some additional workload happens to be executed in parallel, then
-BFQ executes the I/O-related components of typical code-development
-tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
-NOOP or DEADLINE.
-
-High throughput
-
-On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
-up to 150% higher throughput than DEADLINE and NOOP, with all the
-sequential workloads considered in our tests. With random workloads,
-and with all the workloads on flash-based devices, BFQ achieves,
-instead, about the same throughput as the other schedulers.
-
-Strong fairness, bandwidth and delay guarantees
-
-BFQ distributes the device throughput, and not just the device time,
-among I/O-bound applications in proportion their weights, with any
-workload and regardless of the device parameters. From these bandwidth
-guarantees, it is possible to compute tight per-I/O-request delay
-guarantees by a simple formula. If not configured for strict service
-guarantees, BFQ switches to time-based resource sharing (only) for
-applications that would otherwise cause a throughput loss.
-
-1-2 Server systems
-------------------
-
-Most benefits for server systems follow from the same service
-properties as above. In particular, regardless of whether additional,
-possibly heavy workloads are being served, BFQ guarantees:
-
-. audio and video-streaming with zero or very low jitter and drop
- rate;
-
-. fast retrieval of WEB pages and embedded objects;
-
-. real-time recording of data in live-dumping applications (e.g.,
- packet logging);
-
-. responsiveness in local and remote access to a server.
-
-
-2. How does BFQ work?
-=====================
-
-BFQ is a proportional-share I/O scheduler, whose general structure,
-plus a lot of code, are borrowed from CFQ.
-
-- Each process doing I/O on a device is associated with a weight and a
- (bfq_)queue.
-
-- BFQ grants exclusive access to the device, for a while, to one queue
- (process) at a time, and implements this service model by
- associating every queue with a budget, measured in number of
- sectors.
-
- - After a queue is granted access to the device, the budget of the
- queue is decremented, on each request dispatch, by the size of the
- request.
-
- - The in-service queue is expired, i.e., its service is suspended,
- only if one of the following events occurs: 1) the queue finishes
- its budget, 2) the queue empties, 3) a "budget timeout" fires.
-
- - The budget timeout prevents processes doing random I/O from
- holding the device for too long and dramatically reducing
- throughput.
-
- - Actually, as in CFQ, a queue associated with a process issuing
- sync requests may not be expired immediately when it empties. In
- contrast, BFQ may idle the device for a short time interval,
- giving the process the chance to go on being served if it issues
- a new request in time. Device idling typically boosts the
- throughput on rotational devices and on non-queueing flash-based
- devices, if processes do synchronous and sequential I/O. In
- addition, under BFQ, device idling is also instrumental in
- guaranteeing the desired throughput fraction to processes
- issuing sync requests (see the description of the slice_idle
- tunable in this document, or [1, 2], for more details).
-
- - With respect to idling for service guarantees, if several
- processes are competing for the device at the same time, but
- all processes and groups have the same weight, then BFQ
- guarantees the expected throughput distribution without ever
- idling the device. Throughput is thus as high as possible in
- this common scenario.
-
- - On flash-based storage with internal queueing of commands
- (typically NCQ), device idling happens to be always detrimental
- for throughput. So, with these devices, BFQ performs idling
- only when strictly needed for service guarantees, i.e., for
- guaranteeing low latency or fairness. In these cases, overall
- throughput may be sub-optimal. No solution currently exists to
- provide both strong service guarantees and optimal throughput
- on devices with internal queueing.
-
- - If low-latency mode is enabled (default configuration), BFQ
- executes some special heuristics to detect interactive and soft
- real-time applications (e.g., video or audio players/streamers),
- and to reduce their latency. The most important action taken to
- achieve this goal is to give to the queues associated with these
- applications more than their fair share of the device
- throughput. For brevity, we call just "weight-raising" the whole
- sets of actions taken by BFQ to privilege these queues. In
- particular, BFQ provides a milder form of weight-raising for
- interactive applications, and a stronger form for soft real-time
- applications.
-
- - BFQ automatically deactivates idling for queues born in a burst of
- queue creations. In fact, these queues are usually associated with
- the processes of applications and services that benefit mostly
- from a high throughput. Examples are systemd during boot, or git
- grep.
-
- - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
- performing random I/O that becomes mostly sequential if
- merged. Differently from CFQ, BFQ achieves this goal with a more
- reactive mechanism, called Early Queue Merge (EQM). EQM is so
- responsive in detecting interleaved I/O (cooperating processes),
- that it enables BFQ to achieve a high throughput, by queue
- merging, even for queues for which CFQ needs a different
- mechanism, preemption, to get a high throughput. As such EQM is a
- unified mechanism to achieve a high throughput with interleaved
- I/O.
-
- - Queues are scheduled according to a variant of WF2Q+, named
- B-WF2Q+, and implemented using an augmented rb-tree to preserve an
- O(log N) overall complexity. See [2] for more details. B-WF2Q+ is
- also ready for hierarchical scheduling, details in Section 4.
-
- - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
- perfectly fair, and smooth service. In particular, B-WF2Q+
- guarantees that each queue receives a fraction of the device
- throughput proportional to its weight, even if the throughput
- fluctuates, and regardless of: the device parameters, the current
- workload and the budgets assigned to the queue.
-
- - The last, budget-independence, property (although probably
- counterintuitive in the first place) is definitely beneficial, for
- the following reasons:
-
- - First, with any proportional-share scheduler, the maximum
- deviation with respect to an ideal service is proportional to
- the maximum budget (slice) assigned to queues. As a consequence,
- BFQ can keep this deviation tight not only because of the
- accurate service of B-WF2Q+, but also because BFQ *does not*
- need to assign a larger budget to a queue to let the queue
- receive a higher fraction of the device throughput.
-
- - Second, BFQ is free to choose, for every process (queue), the
- budget that best fits the needs of the process, or best
- leverages the I/O pattern of the process. In particular, BFQ
- updates queue budgets with a simple feedback-loop algorithm that
- allows a high throughput to be achieved, while still providing
- tight latency guarantees to time-sensitive applications. When
- the in-service queue expires, this algorithm computes the next
- budget of the queue so as to:
-
- - Let large budgets be eventually assigned to the queues
- associated with I/O-bound applications performing sequential
- I/O: in fact, the longer these applications are served once
- got access to the device, the higher the throughput is.
-
- - Let small budgets be eventually assigned to the queues
- associated with time-sensitive applications (which typically
- perform sporadic and short I/O), because, the smaller the
- budget assigned to a queue waiting for service is, the sooner
- B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
-
-- If several processes are competing for the device at the same time,
- but all processes and groups have the same weight, then BFQ
- guarantees the expected throughput distribution without ever idling
- the device. It uses preemption instead. Throughput is then much
- higher in this common scenario.
-
-- ioprio classes are served in strict priority order, i.e.,
- lower-priority queues are not served as long as there are
- higher-priority queues. Among queues in the same class, the
- bandwidth is distributed in proportion to the weight of each
- queue. A very thin extra bandwidth is however guaranteed to
- the Idle class, to prevent it from starving.
-
-
-3. What are BFQ's tunables and how to properly configure BFQ?
-=============================================================
-
-Most BFQ tunables affect service guarantees (basically latency and
-fairness) and throughput. For full details on how to choose the
-desired tradeoff between service guarantees and throughput, see the
-parameters slice_idle, strict_guarantees and low_latency. For details
-on how to maximise throughput, see slice_idle, timeout_sync and
-max_budget. The other performance-related parameters have been
-inherited from, and have been preserved mostly for compatibility with
-CFQ. So far, no performance improvement has been reported after
-changing the latter parameters in BFQ.
-
-In particular, the tunables back_seek-max, back_seek_penalty,
-fifo_expire_async and fifo_expire_sync below are the same as in
-CFQ. Their description is just copied from that for CFQ. Some
-considerations in the description of slice_idle are copied from CFQ
-too.
-
-per-process ioprio and weight
------------------------------
-
-Unless the cgroups interface is used (see "4. BFQ group scheduling"),
-weights can be assigned to processes only indirectly, through I/O
-priorities, and according to the relation:
-weight = (IOPRIO_BE_NR - ioprio) * 10.
-
-Beware that, if low-latency is set, then BFQ automatically raises the
-weight of the queues associated with interactive and soft real-time
-applications. Unset this tunable if you need/want to control weights.
-
-slice_idle
-----------
-
-This parameter specifies how long BFQ should idle for next I/O
-request, when certain sync BFQ queues become empty. By default
-slice_idle is a non-zero value. Idling has a double purpose: boosting
-throughput and making sure that the desired throughput distribution is
-respected (see the description of how BFQ works, and, if needed, the
-papers referred there).
-
-As for throughput, idling can be very helpful on highly seeky media
-like single spindle SATA/SAS disks where we can cut down on overall
-number of seeks and see improved throughput.
-
-Setting slice_idle to 0 will remove all the idling on queues and one
-should see an overall improved throughput on faster storage devices
-like multiple SATA/SAS disks in hardware RAID configuration, as well
-as flash-based storage with internal command queueing (and
-parallelism).
-
-So depending on storage and workload, it might be useful to set
-slice_idle=0. In general for SATA/SAS disks and software RAID of
-SATA/SAS disks keeping slice_idle enabled should be useful. For any
-configurations where there are multiple spindles behind single LUN
-(Host based hardware RAID controller or for storage arrays), or with
-flash-based fast storage, setting slice_idle=0 might end up in better
-throughput and acceptable latencies.
-
-Idling is however necessary to have service guarantees enforced in
-case of differentiated weights or differentiated I/O-request lengths.
-To see why, suppose that a given BFQ queue A must get several I/O
-requests served for each request served for another queue B. Idling
-ensures that, if A makes a new I/O request slightly after becoming
-empty, then no request of B is dispatched in the middle, and thus A
-does not lose the possibility to get more than one request dispatched
-before the next request of B is dispatched. Note that idling
-guarantees the desired differentiated treatment of queues only in
-terms of I/O-request dispatches. To guarantee that the actual service
-order then corresponds to the dispatch order, the strict_guarantees
-tunable must be set too.
-
-There is an important flipside for idling: apart from the above cases
-where it is beneficial also for throughput, idling can severely impact
-throughput. One important case is random workload. Because of this
-issue, BFQ tends to avoid idling as much as possible, when it is not
-beneficial also for throughput (as detailed in Section 2). As a
-consequence of this behavior, and of further issues described for the
-strict_guarantees tunable, short-term service guarantees may be
-occasionally violated. And, in some cases, these guarantees may be
-more important than guaranteeing maximum throughput. For example, in
-video playing/streaming, a very low drop rate may be more important
-than maximum throughput. In these cases, consider setting the
-strict_guarantees parameter.
-
-strict_guarantees
------------------
-
-If this parameter is set (default: unset), then BFQ
-
-- always performs idling when the in-service queue becomes empty;
-
-- forces the device to serve one I/O request at a time, by dispatching a
- new request only if there is no outstanding request.
-
-In the presence of differentiated weights or I/O-request sizes, both
-the above conditions are needed to guarantee that every BFQ queue
-receives its allotted share of the bandwidth. The first condition is
-needed for the reasons explained in the description of the slice_idle
-tunable. The second condition is needed because all modern storage
-devices reorder internally-queued requests, which may trivially break
-the service guarantees enforced by the I/O scheduler.
-
-Setting strict_guarantees may evidently affect throughput.
-
-back_seek_max
--------------
-
-This specifies, given in Kbytes, the maximum "distance" for backward seeking.
-The distance is the amount of space from the current head location to the
-sectors that are backward in terms of distance.
-
-This parameter allows the scheduler to anticipate requests in the "backward"
-direction and consider them as being the "next" if they are within this
-distance from the current head location.
-
-back_seek_penalty
------------------
-
-This parameter is used to compute the cost of backward seeking. If the
-backward distance of request is just 1/back_seek_penalty from a "front"
-request, then the seeking cost of two requests is considered equivalent.
-
-So scheduler will not bias toward one or the other request (otherwise scheduler
-will bias toward front request). Default value of back_seek_penalty is 2.
-
-fifo_expire_async
------------------
-
-This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
-
-fifo_expire_sync
-----------------
-
-This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
-one, this value should be decreased relative to fifo_expire_async.
-
-low_latency
------------
-
-This parameter is used to enable/disable BFQ's low latency mode. By
-default, low latency mode is enabled. If enabled, interactive and soft
-real-time applications are privileged and experience a lower latency,
-as explained in more detail in the description of how BFQ works.
-
-DISABLE this mode if you need full control on bandwidth
-distribution. In fact, if it is enabled, then BFQ automatically
-increases the bandwidth share of privileged applications, as the main
-means to guarantee a lower latency to them.
-
-In addition, as already highlighted at the beginning of this document,
-DISABLE this mode if your only goal is to achieve a high throughput.
-In fact, privileging the I/O of some application over the rest may
-entail a lower throughput. To achieve the highest-possible throughput
-on a non-rotational device, setting slice_idle to 0 may be needed too
-(at the cost of giving up any strong guarantee on fairness and low
-latency).
-
-timeout_sync
-------------
-
-Maximum amount of device time that can be given to a task (queue) once
-it has been selected for service. On devices with costly seeks,
-increasing this time usually increases maximum throughput. On the
-opposite end, increasing this time coarsens the granularity of the
-short-term bandwidth and latency guarantees, especially if the
-following parameter is set to zero.
-
-max_budget
-----------
-
-Maximum amount of service, measured in sectors, that can be provided
-to a BFQ queue once it is set in service (of course within the limits
-of the above timeout). According to what said in the description of
-the algorithm, larger values increase the throughput in proportion to
-the percentage of sequential I/O requests issued. The price of larger
-values is that they coarsen the granularity of short-term bandwidth
-and latency guarantees.
-
-The default value is 0, which enables auto-tuning: BFQ sets max_budget
-to the maximum number of sectors that can be served during
-timeout_sync, according to the estimated peak rate.
-
-For specific devices, some users have occasionally reported to have
-reached a higher throughput by setting max_budget explicitly, i.e., by
-setting max_budget to a higher value than 0. In particular, they have
-set max_budget to higher values than those to which BFQ would have set
-it with auto-tuning. An alternative way to achieve this goal is to
-just increase the value of timeout_sync, leaving max_budget equal to 0.
-
-weights
--------
-
-Read-only parameter, used to show the weights of the currently active
-BFQ queues.
-
-
-4. Group scheduling with BFQ
-============================
-
-BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
-blkio and io. In particular, BFQ supports weight-based proportional
-share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
-
-4-1 Service guarantees provided
--------------------------------
-
-With BFQ, proportional share means true proportional share of the
-device bandwidth, according to group weights. For example, a group
-with weight 200 gets twice the bandwidth, and not just twice the time,
-of a group with weight 100.
-
-BFQ supports hierarchies (group trees) of any depth. Bandwidth is
-distributed among groups and processes in the expected way: for each
-group, the children of the group share the whole bandwidth of the
-group in proportion to their weights. In particular, this implies
-that, for each leaf group, every process of the group receives the
-same share of the whole group bandwidth, unless the ioprio of the
-process is modified.
-
-The resource-sharing guarantee for a group may partially or totally
-switch from bandwidth to time, if providing bandwidth guarantees to
-the group lowers the throughput too much. This switch occurs on a
-per-process basis: if a process of a leaf group causes throughput loss
-if served in such a way to receive its share of the bandwidth, then
-BFQ switches back to just time-based proportional share for that
-process.
-
-4-2 Interface
--------------
-
-To get proportional sharing of bandwidth with BFQ for a given device,
-BFQ must of course be the active scheduler for that device.
-
-Within each group directory, the names of the files associated with
-BFQ-specific cgroup parameters and stats begin with the "bfq."
-prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
-BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
-parameter to set the weight of a group with BFQ is blkio.bfq.weight
-or io.bfq.weight.
-
-As for cgroups-v1 (blkio controller), the exact set of stat files
-created, and kept up-to-date by bfq, depends on whether
-CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
-the stat files documented in
-Documentation/cgroup-v1/blkio-controller.txt. If, instead,
-CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
-blkio.bfq.io_service_bytes
-blkio.bfq.io_service_bytes_recursive
-blkio.bfq.io_serviced
-blkio.bfq.io_serviced_recursive
-
-The value of CONFIG_DEBUG_BLK_CGROUP greatly influences the maximum
-throughput sustainable with bfq, because updating the blkio.bfq.*
-stats is rather costly, especially for some of the stats enabled by
-CONFIG_DEBUG_BLK_CGROUP.
-
-Parameters to set
------------------
-
-For each group, there is only the following parameter to set.
-
-weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
-group inside its parent. Available values: 1..10000 (default 100). The
-linear mapping between ioprio and weights, described at the beginning
-of the tunable section, is still valid, but all weights higher than
-IOPRIO_BE_NR*10 are mapped to ioprio 0.
-
-Recall that, if low-latency is set, then BFQ automatically raises the
-weight of the queues associated with interactive and soft real-time
-applications. Unset this tunable if you need/want to control weights.
-
-
-[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
- Scheduler", Proceedings of the First Workshop on Mobile System
- Technologies (MST-2015), May 2015.
- http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
-
-[2] P. Valente and M. Andreolini, "Improving Application
- Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
- the 5th Annual International Systems and Storage Conference
- (SYSTOR '12), June 2012.
- Slightly extended version:
- http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
- results.pdf
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
deleted file mode 100644
index 207eca58efaa..000000000000
--- a/Documentation/block/biodoc.txt
+++ /dev/null
@@ -1,1165 +0,0 @@
- Notes on the Generic Block Layer Rewrite in Linux 2.5
- =====================================================
-
-Notes Written on Jan 15, 2002:
- Jens Axboe <jens.axboe@oracle.com>
- Suparna Bhattacharya <suparna@in.ibm.com>
-
-Last Updated May 2, 2002
-September 2003: Updated I/O Scheduler portions
- Nick Piggin <npiggin@kernel.dk>
-
-Introduction:
-
-These are some notes describing some aspects of the 2.5 block layer in the
-context of the bio rewrite. The idea is to bring out some of the key
-changes and a glimpse of the rationale behind those changes.
-
-Please mail corrections & suggestions to suparna@in.ibm.com.
-
-Credits:
----------
-
-2.5 bio rewrite:
- Jens Axboe <jens.axboe@oracle.com>
-
-Many aspects of the generic block layer redesign were driven by and evolved
-over discussions, prior patches and the collective experience of several
-people. See sections 8 and 9 for a list of some related references.
-
-The following people helped with review comments and inputs for this
-document:
- Christoph Hellwig <hch@infradead.org>
- Arjan van de Ven <arjanv@redhat.com>
- Randy Dunlap <rdunlap@xenotime.net>
- Andre Hedrick <andre@linux-ide.org>
-
-The following people helped with fixes/contributions to the bio patches
-while it was still work-in-progress:
- David S. Miller <davem@redhat.com>
-
-
-Description of Contents:
-------------------------
-
-1. Scope for tuning of logic to various needs
- 1.1 Tuning based on device or low level driver capabilities
- - Per-queue parameters
- - Highmem I/O support
- - I/O scheduler modularization
- 1.2 Tuning based on high level requirements/capabilities
- 1.2.1 Request Priority/Latency
- 1.3 Direct access/bypass to lower layers for diagnostics and special
- device operations
- 1.3.1 Pre-built commands
-2. New flexible and generic but minimalist i/o structure or descriptor
- (instead of using buffer heads at the i/o layer)
- 2.1 Requirements/Goals addressed
- 2.2 The bio struct in detail (multi-page io unit)
- 2.3 Changes in the request structure
-3. Using bios
- 3.1 Setup/teardown (allocation, splitting)
- 3.2 Generic bio helper routines
- 3.2.1 Traversing segments and completion units in a request
- 3.2.2 Setting up DMA scatterlists
- 3.2.3 I/O completion
- 3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
- 3.2.5 Request command tagging
- 3.3 I/O submission
-4. The I/O scheduler
-5. Scalability related changes
- 5.1 Granular locking: Removal of io_request_lock
- 5.2 Prepare for transition to 64 bit sector_t
-6. Other Changes/Implications
- 6.1 Partition re-mapping handled by the generic block layer
-7. A few tips on migration of older drivers
-8. A list of prior/related/impacted patches/ideas
-9. Other References/Discussion Threads
-
----------------------------------------------------------------------------
-
-Bio Notes
---------
-
-Let us discuss the changes in the context of how some overall goals for the
-block layer are addressed.
-
-1. Scope for tuning the generic logic to satisfy various requirements
-
-The block layer design supports adaptable abstractions to handle common
-processing with the ability to tune the logic to an appropriate extent
-depending on the nature of the device and the requirements of the caller.
-One of the objectives of the rewrite was to increase the degree of tunability
-and to enable higher level code to utilize underlying device/driver
-capabilities to the maximum extent for better i/o performance. This is
-important especially in the light of ever improving hardware capabilities
-and application/middleware software designed to take advantage of these
-capabilities.
-
-1.1 Tuning based on low level device / driver capabilities
-
-Sophisticated devices with large built-in caches, intelligent i/o scheduling
-optimizations, high memory DMA support, etc may find some of the
-generic processing an overhead, while for less capable devices the
-generic functionality is essential for performance or correctness reasons.
-Knowledge of some of the capabilities or parameters of the device should be
-used at the generic block layer to take the right decisions on
-behalf of the driver.
-
-How is this achieved ?
-
-Tuning at a per-queue level:
-
-i. Per-queue limits/values exported to the generic layer by the driver
-
-Various parameters that the generic i/o scheduler logic uses are set at
-a per-queue level (e.g maximum request size, maximum number of segments in
-a scatter-gather list, logical block size)
-
-Some parameters that were earlier available as global arrays indexed by
-major/minor are now directly associated with the queue. Some of these may
-move into the block device structure in the future. Some characteristics
-have been incorporated into a queue flags field rather than separate fields
-in themselves. There are blk_queue_xxx functions to set the parameters,
-rather than update the fields directly
-
-Some new queue property settings:
-
- blk_queue_bounce_limit(q, u64 dma_address)
- Enable I/O to highmem pages, dma_address being the
- limit. No highmem default.
-
- blk_queue_max_sectors(q, max_sectors)
- Sets two variables that limit the size of the request.
-
- - The request queue's max_sectors, which is a soft size in
- units of 512 byte sectors, and could be dynamically varied
- by the core kernel.
-
- - The request queue's max_hw_sectors, which is a hard limit
- and reflects the maximum size request a driver can handle
- in units of 512 byte sectors.
-
- The default for both max_sectors and max_hw_sectors is
- 255. The upper limit of max_sectors is 1024.
-
- blk_queue_max_phys_segments(q, max_segments)
- Maximum physical segments you can handle in a request. 128
- default (driver limit). (See 3.2.2)
-
- blk_queue_max_hw_segments(q, max_segments)
- Maximum dma segments the hardware can handle in a request. 128
- default (host adapter limit, after dma remapping).
- (See 3.2.2)
-
- blk_queue_max_segment_size(q, max_seg_size)
- Maximum size of a clustered segment, 64kB default.
-
- blk_queue_logical_block_size(q, logical_block_size)
- Lowest possible sector size that the hardware can operate
- on, 512 bytes default.
-
-New queue flags:
-
- QUEUE_FLAG_CLUSTER (see 3.2.2)
- QUEUE_FLAG_QUEUED (see 3.2.4)
-
-
-ii. High-mem i/o capabilities are now considered the default
-
-The generic bounce buffer logic, present in 2.4, where the block layer would
-by default copyin/out i/o requests on high-memory buffers to low-memory buffers
-assuming that the driver wouldn't be able to handle it directly, has been
-changed in 2.5. The bounce logic is now applied only for memory ranges
-for which the device cannot handle i/o. A driver can specify this by
-setting the queue bounce limit for the request queue for the device
-(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
-where a device is capable of handling high memory i/o.
-
-In order to enable high-memory i/o where the device is capable of supporting
-it, the pci dma mapping routines and associated data structures have now been
-modified to accomplish a direct page -> bus translation, without requiring
-a virtual address mapping (unlike the earlier scheme of virtual address
--> bus translation). So this works uniformly for high-memory pages (which
-do not have a corresponding kernel virtual address space mapping) and
-low-memory pages.
-
-Note: Please refer to Documentation/DMA-API-HOWTO.txt for a discussion
-on PCI high mem DMA aspects and mapping of scatter gather lists, and support
-for 64 bit PCI.
-
-Special handling is required only for cases where i/o needs to happen on
-pages at physical memory addresses beyond what the device can support. In these
-cases, a bounce bio representing a buffer from the supported memory range
-is used for performing the i/o with copyin/copyout as needed depending on
-the type of the operation. For example, in case of a read operation, the
-data read has to be copied to the original buffer on i/o completion, so a
-callback routine is set up to do this, while for write, the data is copied
-from the original buffer to the bounce buffer prior to issuing the
-operation. Since an original buffer may be in a high memory area that's not
-mapped in kernel virtual addr, a kmap operation may be required for
-performing the copy, and special care may be needed in the completion path
-as it may not be in irq context. Special care is also required (by way of
-GFP flags) when allocating bounce buffers, to avoid certain highmem
-deadlock possibilities.
-
-It is also possible that a bounce buffer may be allocated from high-memory
-area that's not mapped in kernel virtual addr, but within the range that the
-device can use directly; so the bounce page may need to be kmapped during
-copy operations. [Note: This does not hold in the current implementation,
-though]
-
-There are some situations when pages from high memory may need to
-be kmapped, even if bounce buffers are not necessary. For example a device
-may need to abort DMA operations and revert to PIO for the transfer, in
-which case a virtual mapping of the page is required. For SCSI it is also
-done in some scenarios where the low level driver cannot be trusted to
-handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions as appropriate. A driver could also use
-the blk_queue_bounce() routine on its own to bounce highmem i/o to low
-memory for specific requests if so desired.
-
-iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
-
-As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
-queue or pick from (copy) existing generic schedulers and replace/override
-certain portions of it. The 2.5 rewrite provides improved modularization
-of the i/o scheduler. There are more pluggable callbacks, e.g for init,
-add request, extract request, which makes it possible to abstract specific
-i/o scheduling algorithm aspects and details outside of the generic loop.
-It also makes it possible to completely hide the implementation details of
-the i/o scheduler from block drivers.
-
-I/O scheduler wrappers are to be used instead of accessing the queue directly.
-See section 4. The I/O scheduler for details.
-
-1.2 Tuning Based on High level code capabilities
-
-i. Application capabilities for raw i/o
-
-This comes from some of the high-performance database/middleware
-requirements where an application prefers to make its own i/o scheduling
-decisions based on an understanding of the access patterns and i/o
-characteristics
-
-ii. High performance filesystems or other higher level kernel code's
-capabilities
-
-Kernel components like filesystems could also take their own i/o scheduling
-decisions for optimizing performance. Journalling filesystems may need
-some control over i/o ordering.
-
-What kind of support exists at the generic block layer for this ?
-
-The flags and rw fields in the bio structure can be used for some tuning
-from above e.g indicating that an i/o is just a readahead request, or priority
-settings (currently unused). As far as user applications are concerned they
-would need an additional mechanism either via open flags or ioctls, or some
-other upper level mechanism to communicate such settings to block.
-
-1.2.1 Request Priority/Latency
-
-Todo/Under discussion:
-Arjan's proposed request priority scheme allows higher levels some broad
- control (high/med/low) over the priority of an i/o request vs other pending
- requests in the queue. For example it allows reads for bringing in an
- executable page on demand to be given a higher priority over pending write
- requests which haven't aged too much on the queue. Potentially this priority
- could even be exposed to applications in some manner, providing higher level
- tunability. Time based aging avoids starvation of lower priority
- requests. Some bits in the bi_opf flags field in the bio structure are
- intended to be used for this priority information.
-
-
-1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
- (e.g Diagnostics, Systems Management)
-
-There are situations where high-level code needs to have direct access to
-the low level device capabilities or requires the ability to issue commands
-to the device bypassing some of the intermediate i/o layers.
-These could, for example, be special control commands issued through ioctl
-interfaces, or could be raw read/write commands that stress the drive's
-capabilities for certain kinds of fitness tests. Having direct interfaces at
-multiple levels without having to pass through upper layers makes
-it possible to perform bottom up validation of the i/o path, layer by
-layer, starting from the media.
-
-The normal i/o submission interfaces, e.g submit_bio, could be bypassed
-for specially crafted requests which such ioctl or diagnostics
-interfaces would typically use, and the elevator add_request routine
-can instead be used to directly insert such requests in the queue or preferably
-the blk_do_rq routine can be used to place the request on the queue and
-wait for completion. Alternatively, sometimes the caller might just
-invoke a lower level driver specific interface with the request as a
-parameter.
-
-If the request is a means for passing on special information associated with
-the command, then such information is associated with the request->special
-field (rather than misuse the request->buffer field which is meant for the
-request data buffer's virtual mapping).
-
-For passing request data, the caller must build up a bio descriptor
-representing the concerned memory buffer if the underlying driver interprets
-bio segments or uses the block layer end*request* functions for i/o
-completion. Alternatively one could directly use the request->buffer field to
-specify the virtual address of the buffer, if the driver expects buffer
-addresses passed in this way and ignores bio entries for the request type
-involved. In the latter case, the driver would modify and manage the
-request->buffer, request->sector and request->nr_sectors or
-request->current_nr_sectors fields itself rather than using the block layer
-end_request or end_that_request_first completion interfaces.
-(See 2.3 or Documentation/block/request.txt for a brief explanation of
-the request structure fields)
-
-[TBD: end_that_request_last should be usable even in this case;
-Perhaps an end_that_direct_request_first routine could be implemented to make
-handling direct requests easier for such drivers; Also for drivers that
-expect bios, a helper function could be provided for setting up a bio
-corresponding to a data buffer]
-
-<JENS: I dont understand the above, why is end_that_request_first() not
-usable? Or _last for that matter. I must be missing something>
-<SUP: What I meant here was that if the request doesn't have a bio, then
- end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
- and hence can't be used for advancing request state settings on the
- completion of partial transfers. The driver has to modify these fields
- directly by hand.
- This is because end_that_request_first only iterates over the bio list,
- and always returns 0 if there are none associated with the request.
- _last works OK in this case, and is not a problem, as I mentioned earlier
->
-
-1.3.1 Pre-built Commands
-
-A request can be created with a pre-built custom command to be sent directly
-to the device. The cmd block in the request structure has room for filling
-in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
-command pre-building, and the type of the request is now indicated
-through rq->flags instead of via rq->cmd)
-
-The request structure flags can be set up to indicate the type of request
-in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
-packet command issued via blk_do_rq, REQ_SPECIAL: special request).
-
-It can help to pre-build device commands for requests in advance.
-Drivers can now specify a request prepare function (q->prep_rq_fn) that the
-block layer would invoke to pre-build device commands for a given request,
-or perform other preparatory processing for the request. This is routine is
-called by elv_next_request(), i.e. typically just before servicing a request.
-(The prepare function would not be called for requests that have RQF_DONTPREP
-enabled)
-
-Aside:
- Pre-building could possibly even be done early, i.e before placing the
- request on the queue, rather than construct the command on the fly in the
- driver while servicing the request queue when it may affect latencies in
- interrupt context or responsiveness in general. One way to add early
- pre-building would be to do it whenever we fail to merge on a request.
- Now REQ_NOMERGE is set in the request flags to skip this one in the future,
- which means that it will not change before we feed it to the device. So
- the pre-builder hook can be invoked there.
-
-
-2. Flexible and generic but minimalist i/o structure/descriptor.
-
-2.1 Reason for a new structure and requirements addressed
-
-Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
-layer, and the low level request structure was associated with a chain of
-buffer heads for a contiguous i/o request. This led to certain inefficiencies
-when it came to large i/o requests and readv/writev style operations, as it
-forced such requests to be broken up into small chunks before being passed
-on to the generic block layer, only to be merged by the i/o scheduler
-when the underlying device was capable of handling the i/o in one shot.
-Also, using the buffer head as an i/o structure for i/os that didn't originate
-from the buffer cache unnecessarily added to the weight of the descriptors
-which were generated for each such chunk.
-
-The following were some of the goals and expectations considered in the
-redesign of the block i/o data structure in 2.5.
-
-i. Should be appropriate as a descriptor for both raw and buffered i/o -
- avoid cache related fields which are irrelevant in the direct/page i/o path,
- or filesystem block size alignment restrictions which may not be relevant
- for raw i/o.
-ii. Ability to represent high-memory buffers (which do not have a virtual
- address mapping in kernel address space).
-iii.Ability to represent large i/os w/o unnecessarily breaking them up (i.e
- greater than PAGE_SIZE chunks in one shot)
-iv. At the same time, ability to retain independent identity of i/os from
- different sources or i/o units requiring individual completion (e.g. for
- latency reasons)
-v. Ability to represent an i/o involving multiple physical memory segments
- (including non-page aligned page fragments, as specified via readv/writev)
- without unnecessarily breaking it up, if the underlying device is capable of
- handling it.
-vi. Preferably should be based on a memory descriptor structure that can be
- passed around different types of subsystems or layers, maybe even
- networking, without duplication or extra copies of data/descriptor fields
- themselves in the process
-vii.Ability to handle the possibility of splits/merges as the structure passes
- through layered drivers (lvm, md, evms), with minimal overhead.
-
-The solution was to define a new structure (bio) for the block layer,
-instead of using the buffer head structure (bh) directly, the idea being
-avoidance of some associated baggage and limitations. The bio structure
-is uniformly used for all i/o at the block layer ; it forms a part of the
-bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
-mapped to bio structures.
-
-2.2 The bio struct
-
-The bio structure uses a vector representation pointing to an array of tuples
-of <page, offset, len> to describe the i/o buffer, and has various other
-fields describing i/o parameters and state that needs to be maintained for
-performing the i/o.
-
-Notice that this representation means that a bio has no virtual address
-mapping at all (unlike buffer heads).
-
-struct bio_vec {
- struct page *bv_page;
- unsigned short bv_len;
- unsigned short bv_offset;
-};
-
-/*
- * main unit of I/O for the block layer and lower layers (ie drivers)
- */
-struct bio {
- struct bio *bi_next; /* request queue link */
- struct block_device *bi_bdev; /* target device */
- unsigned long bi_flags; /* status, command, etc */
- unsigned long bi_opf; /* low bits: r/w, high: priority */
-
- unsigned int bi_vcnt; /* how may bio_vec's */
- struct bvec_iter bi_iter; /* current index into bio_vec array */
-
- unsigned int bi_size; /* total size in bytes */
- unsigned short bi_phys_segments; /* segments after physaddr coalesce*/
- unsigned short bi_hw_segments; /* segments after DMA remapping */
- unsigned int bi_max; /* max bio_vecs we can hold
- used as index into pool */
- struct bio_vec *bi_io_vec; /* the actual vec list */
- bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
- atomic_t bi_cnt; /* pin count: free when it hits zero */
- void *bi_private;
-};
-
-With this multipage bio design:
-
-- Large i/os can be sent down in one go using a bio_vec list consisting
- of an array of <page, offset, len> fragments (similar to the way fragments
- are represented in the zero-copy network code)
-- Splitting of an i/o request across multiple devices (as in the case of
- lvm or raid) is achieved by cloning the bio (where the clone points to
- the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges (*) - this
- avoids reallocs and makes independent completions easier to handle.
-- Code that traverses the req list can find all the segments of a bio
- by using rq_for_each_segment. This handles the fact that a request
- has multiple bios, each of which can have multiple segments.
-- Drivers which can't process a large bio in one shot can use the bi_iter
- field to keep track of the next bio_vec entry to process.
- (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
- [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
- bi_offset an len fields]
-
-(*) unrelated merges -- a request ends up containing two or more bios that
- didn't originate from the same place.
-
-bi_end_io() i/o callback gets called on i/o completion of the entire bio.
-
-At a lower level, drivers build a scatter gather list from the merged bios.
-The scatter gather list is in the form of an array of <page, offset, len>
-entries with their corresponding dma address mappings filled in at the
-appropriate time. As an optimization, contiguous physical pages can be
-covered by a single entry where <page> refers to the first page and <len>
-covers the range of pages (up to 16 contiguous pages could be covered this
-way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
-the sg list.
-
-Note: Right now the only user of bios with more than one page is ll_rw_kio,
-which in turn means that only raw I/O uses it (direct i/o may not work
-right now). The intent however is to enable clustering of pages etc to
-become possible. The pagebuf abstraction layer from SGI also uses multi-page
-bios, but that is currently not included in the stock development kernels.
-The same is true of Andrew Morton's work-in-progress multipage bio writeout
-and readahead patches.
-
-2.3 Changes in the Request Structure
-
-The request structure is the structure that gets passed down to low level
-drivers. The block layer make_request function builds up a request structure,
-places it on the queue and invokes the drivers request_fn. The driver makes
-use of block layer helper routine elv_next_request to pull the next request
-off the queue. Control or diagnostic functions might bypass block and directly
-invoke underlying driver entry points passing in a specially constructed
-request structure.
-
-Only some relevant fields (mainly those which changed or may be referred
-to in some of the discussion here) are listed below, not necessarily in
-the order in which they occur in the structure (see include/linux/blkdev.h)
-Refer to Documentation/block/request.txt for details about all the request
-structure fields and a quick reference about the layers which are
-supposed to use or modify those fields.
-
-struct request {
- struct list_head queuelist; /* Not meant to be directly accessed by
- the driver.
- Used by q->elv_next_request_fn
- rq->queue is gone
- */
- .
- .
- unsigned char cmd[16]; /* prebuilt command data block */
- unsigned long flags; /* also includes earlier rq->cmd settings */
- .
- .
- sector_t sector; /* this field is now of type sector_t instead of int
- preparation for 64 bit sectors */
- .
- .
-
- /* Number of scatter-gather DMA addr+len pairs after
- * physical address coalescing is performed.
- */
- unsigned short nr_phys_segments;
-
- /* Number of scatter-gather addr+len pairs after
- * physical and DMA remapping hardware coalescing is performed.
- * This is the number of scatter-gather entries the driver
- * will actually have to deal with after DMA mapping is done.
- */
- unsigned short nr_hw_segments;
-
- /* Various sector counts */
- unsigned long nr_sectors; /* no. of sectors left: driver modifiable */
- unsigned long hard_nr_sectors; /* block internal copy of above */
- unsigned int current_nr_sectors; /* no. of sectors left in the
- current segment:driver modifiable */
- unsigned long hard_cur_sectors; /* block internal copy of the above */
- .
- .
- int tag; /* command tag associated with request */
- void *special; /* same as before */
- char *buffer; /* valid only for low memory buffers up to
- current_nr_sectors */
- .
- .
- struct bio *bio, *biotail; /* bio list instead of bh */
- struct request_list *rl;
-}
-
-See the req_ops and req_flag_bits definitions for an explanation of the various
-flags available. Some bits are used by the block layer or i/o scheduler.
-
-The behaviour of the various sector counts are almost the same as before,
-except that since we have multi-segment bios, current_nr_sectors refers
-to the numbers of sectors in the current segment being processed which could
-be one of the many segments in the current bio (i.e i/o completion unit).
-The nr_sectors value refers to the total number of sectors in the whole
-request that remain to be transferred (no change). The purpose of the
-hard_xxx values is for block to remember these counts every time it hands
-over the request to the driver. These values are updated by block on
-end_that_request_first, i.e. every time the driver completes a part of the
-transfer and invokes block end*request helpers to mark this. The
-driver should not modify these values. The block layer sets up the
-nr_sectors and current_nr_sectors fields (based on the corresponding
-hard_xxx values and the number of bytes transferred) and updates it on
-every transfer that invokes end_that_request_first. It does the same for the
-buffer, bio, bio->bi_iter fields too.
-
-The buffer field is just a virtual address mapping of the current segment
-of the i/o buffer in cases where the buffer resides in low-memory. For high
-memory i/o, this field is not valid and must not be used by drivers.
-
-Code that sets up its own request structures and passes them down to
-a driver needs to be careful about interoperation with the block layer helper
-functions which the driver uses. (Section 1.3)
-
-3. Using bios
-
-3.1 Setup/Teardown
-
-There are routines for managing the allocation, and reference counting, and
-freeing of bios (bio_alloc, bio_get, bio_put).
-
-This makes use of Ingo Molnar's mempool implementation, which enables
-subsystems like bio to maintain their own reserve memory pools for guaranteed
-deadlock-free allocations during extreme VM load. For example, the VM
-subsystem makes use of the block layer to writeout dirty pages in order to be
-able to free up memory space, a case which needs careful handling. The
-allocation logic draws from the preallocated emergency reserve in situations
-where it cannot allocate through normal means. If the pool is empty and it
-can wait, then it would trigger action that would help free up memory or
-replenish the pool (without deadlocking) and wait for availability in the pool.
-If it is in IRQ context, and hence not in a position to do this, allocation
-could fail if the pool is empty. In general mempool always first tries to
-perform allocation without having to wait, even if it means digging into the
-pool as long it is not less that 50% full.
-
-On a free, memory is released to the pool or directly freed depending on
-the current availability in the pool. The mempool interface lets the
-subsystem specify the routines to be used for normal alloc and free. In the
-case of bio, these routines make use of the standard slab allocator.
-
-The caller of bio_alloc is expected to taken certain steps to avoid
-deadlocks, e.g. avoid trying to allocate more memory from the pool while
-already holding memory obtained from the pool.
-[TBD: This is a potential issue, though a rare possibility
- in the bounce bio allocation that happens in the current code, since
- it ends up allocating a second bio from the same pool while
- holding the original bio ]
-
-Memory allocated from the pool should be released back within a limited
-amount of time (in the case of bio, that would be after the i/o is completed).
-This ensures that if part of the pool has been used up, some work (in this
-case i/o) must already be in progress and memory would be available when it
-is over. If allocating from multiple pools in the same code path, the order
-or hierarchy of allocation needs to be consistent, just the way one deals
-with multiple locks.
-
-The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
-for a non-clone bio. There are the 6 pools setup for different size biovecs,
-so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
-given size from these slabs.
-
-The bio_get() routine may be used to hold an extra reference on a bio prior
-to i/o submission, if the bio fields are likely to be accessed after the
-i/o is issued (since the bio may otherwise get freed in case i/o completion
-happens in the meantime).
-
-The bio_clone_fast() routine may be used to duplicate a bio, where the clone
-shares the bio_vec_list with the original bio (i.e. both point to the
-same bio_vec_list). This would typically be used for splitting i/o requests
-in lvm or md.
-
-3.2 Generic bio helper Routines
-
-3.2.1 Traversing segments and completion units in a request
-
-The macro rq_for_each_segment() should be used for traversing the bios
-in the request list (drivers should avoid directly trying to do it
-themselves). Using these helpers should also make it easier to cope
-with block changes in the future.
-
- struct req_iterator iter;
- rq_for_each_segment(bio_vec, rq, iter)
- /* bio_vec is now current segment */
-
-I/O completion callbacks are per-bio rather than per-segment, so drivers
-that traverse bio chains on completion need to keep that in mind. Drivers
-which don't make a distinction between segments and completion units would
-need to be reorganized to support multi-segment bios.
-
-3.2.2 Setting up DMA scatterlists
-
-The blk_rq_map_sg() helper routine would be used for setting up scatter
-gather lists from a request, so a driver need not do it on its own.
-
- nr_segments = blk_rq_map_sg(q, rq, scatterlist);
-
-The helper routine provides a level of abstraction which makes it easier
-to modify the internals of request to scatterlist conversion down the line
-without breaking drivers. The blk_rq_map_sg routine takes care of several
-things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
-is set) and correct segment accounting to avoid exceeding the limits which
-the i/o hardware can handle, based on various queue properties.
-
-- Prevents a clustered segment from crossing a 4GB mem boundary
-- Avoids building segments that would exceed the number of physical
- memory segments that the driver can handle (phys_segments) and the
- number that the underlying hardware can handle at once, accounting for
- DMA remapping (hw_segments) (i.e. IOMMU aware limits).
-
-Routines which the low level driver can use to set up the segment limits:
-
-blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
-hw data segments in a request (i.e. the maximum number of address/length
-pairs the host adapter can actually hand to the device at once)
-
-blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
-of physical data segments in a request (i.e. the largest sized scatter list
-a driver could handle)
-
-3.2.3 I/O completion
-
-The existing generic block layer helper routines end_request,
-end_that_request_first and end_that_request_last can be used for i/o
-completion (and setting things up so the rest of the i/o or the next
-request can be kicked of) as before. With the introduction of multi-page
-bio support, end_that_request_first requires an additional argument indicating
-the number of sectors completed.
-
-3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
-
-Drivers that do not interpret bios e.g those which do not handle multiple
-segments and do not support i/o into high memory addresses (require bounce
-buffers) and expect only virtually mapped buffers, can access the rq->buffer
-field. As before the driver should use current_nr_sectors to determine the
-size of remaining data in the current segment (that is the maximum it can
-transfer in one go unless it interprets segments), and rely on the block layer
-end_request, or end_that_request_first/last to take care of all accounting
-and transparent mapping of the next bio segment when a segment boundary
-is crossed on completion of a transfer. (The end*request* functions should
-be used if only if the request has come down from block/bio path, not for
-direct access requests which only specify rq->buffer without a valid rq->bio)
-
-3.2.5 Generic request command tagging
-
-3.2.5.1 Tag helpers
-
-Block now offers some simple generic functionality to help support command
-queueing (typically known as tagged command queueing), ie manage more than
-one outstanding command on a queue at any given time.
-
- blk_queue_init_tags(struct request_queue *q, int depth)
-
- Initialize internal command tagging structures for a maximum
- depth of 'depth'.
-
- blk_queue_free_tags((struct request_queue *q)
-
- Teardown tag info associated with the queue. This will be done
- automatically by block if blk_queue_cleanup() is called on a queue
- that is using tagging.
-
-The above are initialization and exit management, the main helpers during
-normal operations are:
-
- blk_queue_start_tag(struct request_queue *q, struct request *rq)
-
- Start tagged operation for this request. A free tag number between
- 0 and 'depth' is assigned to the request (rq->tag holds this number),
- and 'rq' is added to the internal tag management. If the maximum depth
- for this queue is already achieved (or if the tag wasn't started for
- some other reason), 1 is returned. Otherwise 0 is returned.
-
- blk_queue_end_tag(struct request_queue *q, struct request *rq)
-
- End tagged operation on this request. 'rq' is removed from the internal
- book keeping structures.
-
-To minimize struct request and queue overhead, the tag helpers utilize some
-of the same request members that are used for normal request queue management.
-This means that a request cannot both be an active tag and be on the queue
-list at the same time. blk_queue_start_tag() will remove the request, but
-the driver must remember to call blk_queue_end_tag() before signalling
-completion of the request to the block layer. This means ending tag
-operations before calling end_that_request_last()! For an example of a user
-of these helpers, see the IDE tagged command queueing support.
-
-3.2.5.2 Tag info
-
-Some block functions exist to query current tag status or to go from a
-tag number to the associated request. These are, in no particular order:
-
- blk_queue_tagged(q)
-
- Returns 1 if the queue 'q' is using tagging, 0 if not.
-
- blk_queue_tag_request(q, tag)
-
- Returns a pointer to the request associated with tag 'tag'.
-
- blk_queue_tag_depth(q)
-
- Return current queue depth.
-
- blk_queue_tag_queue(q)
-
- Returns 1 if the queue can accept a new queued command, 0 if we are
- at the maximum depth already.
-
- blk_queue_rq_tagged(rq)
-
- Returns 1 if the request 'rq' is tagged.
-
-3.2.5.2 Internal structure
-
-Internally, block manages tags in the blk_queue_tag structure:
-
- struct blk_queue_tag {
- struct request **tag_index; /* array or pointers to rq */
- unsigned long *tag_map; /* bitmap of free tags */
- struct list_head busy_list; /* fifo list of busy tags */
- int busy; /* queue depth */
- int max_depth; /* max queue depth */
- };
-
-Most of the above is simple and straight forward, however busy_list may need
-a bit of explaining. Normally we don't care too much about request ordering,
-but in the event of any barrier requests in the tag queue we need to ensure
-that requests are restarted in the order they were queue.
-
-3.3 I/O Submission
-
-The routine submit_bio() is used to submit a single io. Higher level i/o
-routines make use of this:
-
-(a) Buffered i/o:
-The routine submit_bh() invokes submit_bio() on a bio corresponding to the
-bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
-
-(b) Kiobuf i/o (for raw/direct i/o):
-The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
-maps the array to one or more multi-page bios, issuing submit_bio() to
-perform the i/o on each of these.
-
-The embedded bh array in the kiobuf structure has been removed and no
-preallocation of bios is done for kiobufs. [The intent is to remove the
-blocks array as well, but it's currently in there to kludge around direct i/o.]
-Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
-
-Todo/Observation:
-
- A single kiobuf structure is assumed to correspond to a contiguous range
- of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
- So right now it wouldn't work for direct i/o on non-contiguous blocks.
- This is to be resolved. The eventual direction is to replace kiobuf
- by kvec's.
-
- Badari Pulavarty has a patch to implement direct i/o correctly using
- bio and kvec.
-
-
-(c) Page i/o:
-Todo/Under discussion:
-
- Andrew Morton's multi-page bio patches attempt to issue multi-page
- writeouts (and reads) from the page cache, by directly building up
- large bios for submission completely bypassing the usage of buffer
- heads. This work is still in progress.
-
- Christoph Hellwig had some code that uses bios for page-io (rather than
- bh). This isn't included in bio as yet. Christoph was also working on a
- design for representing virtual/real extents as an entity and modifying
- some of the address space ops interfaces to utilize this abstraction rather
- than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
- abstraction, but intended to be as lightweight as possible).
-
-(d) Direct access i/o:
-Direct access requests that do not contain bios would be submitted differently
-as discussed earlier in section 1.3.
-
-Aside:
-
- Kvec i/o:
-
- Ben LaHaise's aio code uses a slightly different structure instead
- of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
- tuples (very much like the networking code), together with a callback function
- and data pointer. This is embedded into a brw_cb structure when passed
- to brw_kvec_async().
-
- Now it should be possible to directly map these kvecs to a bio. Just as while
- cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
- array pointer to point to the veclet array in kvecs.
-
- TBD: In order for this to work, some changes are needed in the way multi-page
- bios are handled today. The values of the tuples in such a vector passed in
- from higher level code should not be modified by the block layer in the course
- of its request processing, since that would make it hard for the higher layer
- to continue to use the vector descriptor (kvec) after i/o completes. Instead,
- all such transient state should either be maintained in the request structure,
- and passed on in some way to the endio completion routine.
-
-
-4. The I/O scheduler
-I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
-queue and specific I/O schedulers. Unless stated otherwise, elevator is used
-to refer to both parts and I/O scheduler to specific I/O schedulers.
-
-Block layer implements generic dispatch queue in block/*.c.
-The generic dispatch queue is responsible for requeueing, handling non-fs
-requests and all other subtleties.
-
-Specific I/O schedulers are responsible for ordering normal filesystem
-requests. They can also choose to delay certain requests to improve
-throughput or whatever purpose. As the plural form indicates, there are
-multiple I/O schedulers. They can be built as modules but at least one should
-be built inside the kernel. Each queue can choose different one and can also
-change to another one dynamically.
-
-A block layer call to the i/o scheduler follows the convention elv_xxx(). This
-calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
-and xxx might not match exactly, but use your imagination. If an elevator
-doesn't implement a function, the switch does nothing or some minimal house
-keeping work.
-
-4.1. I/O scheduler API
-
-The functions an elevator may implement are: (* are mandatory)
-elevator_merge_fn called to query requests for merge with a bio
-
-elevator_merge_req_fn called when two requests get merged. the one
- which gets merged into the other one will be
- never seen by I/O scheduler again. IOW, after
- being merged, the request is gone.
-
-elevator_merged_fn called when a request in the scheduler has been
- involved in a merge. It is used in the deadline
- scheduler for example, to reposition the request
- if its sorting order has changed.
-
-elevator_allow_merge_fn called whenever the block layer determines
- that a bio can be merged into an existing
- request safely. The io scheduler may still
- want to stop a merge at this point if it
- results in some sort of conflict internally,
- this hook allows it to do that. Note however
- that two *requests* can still be merged at later
- time. Currently the io scheduler has no way to
- prevent that. It can only learn about the fact
- from elevator_merge_req_fn callback.
-
-elevator_dispatch_fn* fills the dispatch queue with ready requests.
- I/O schedulers are free to postpone requests by
- not filling the dispatch queue unless @force
- is non-zero. Once dispatched, I/O schedulers
- are not allowed to manipulate the requests -
- they belong to generic dispatch queue.
-
-elevator_add_req_fn* called to add a new request into the scheduler
-
-elevator_former_req_fn
-elevator_latter_req_fn These return the request before or after the
- one specified in disk sort order. Used by the
- block layer to find merge possibilities.
-
-elevator_completed_req_fn called when a request is completed.
-
-elevator_may_queue_fn returns true if the scheduler wants to allow the
- current context to queue a new request even if
- it is over the queue limit. This must be used
- very carefully!!
-
-elevator_set_req_fn
-elevator_put_req_fn Must be used to allocate and free any elevator
- specific storage for a request.
-
-elevator_activate_req_fn Called when device driver first sees a request.
- I/O schedulers can use this callback to
- determine when actual execution of a request
- starts.
-elevator_deactivate_req_fn Called when device driver decides to delay
- a request by requeueing it.
-
-elevator_init_fn*
-elevator_exit_fn Allocate and free any elevator specific storage
- for a queue.
-
-4.2 Request flows seen by I/O schedulers
-All requests seen by I/O schedulers strictly follow one of the following three
-flows.
-
- set_req_fn ->
-
- i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
- (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
- ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn
- iii. [none]
-
- -> put_req_fn
-
-4.3 I/O scheduler implementation
-The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
-optimal disk scan and request servicing performance (based on generic
-principles and device capabilities), optimized for:
-i. improved throughput
-ii. improved latency
-iii. better utilization of h/w & CPU time
-
-Characteristics:
-
-i. Binary tree
-AS and deadline i/o schedulers use red black binary trees for disk position
-sorting and searching, and a fifo linked list for time-based searching. This
-gives good scalability and good availability of information. Requests are
-almost always dispatched in disk sort order, so a cache is kept of the next
-request in sort order to prevent binary tree lookups.
-
-This arrangement is not a generic block layer characteristic however, so
-elevators may implement queues as they please.
-
-ii. Merge hash
-AS and deadline use a hash table indexed by the last sector of a request. This
-enables merging code to quickly look up "back merge" candidates, even when
-multiple I/O streams are being performed at once on one disk.
-
-"Front merges", a new request being merged at the front of an existing request,
-are far less common than "back merges" due to the nature of most I/O patterns.
-Front merges are handled by the binary trees in AS and deadline schedulers.
-
-iii. Plugging the queue to batch requests in anticipation of opportunities for
- merge/sort optimizations
-
-Plugging is an approach that the current i/o scheduling algorithm resorts to so
-that it collects up enough requests in the queue to be able to take
-advantage of the sorting/merging logic in the elevator. If the
-queue is empty when a request comes in, then it plugs the request queue
-(sort of like plugging the bath tub of a vessel to get fluid to build up)
-till it fills up with a few more requests, before starting to service
-the requests. This provides an opportunity to merge/sort the requests before
-passing them down to the device. There are various conditions when the queue is
-unplugged (to open up the flow again), either through a scheduled task or
-could be on demand. For example wait_on_buffer sets the unplugging going
-through sync_buffer() running blk_run_address_space(mapping). Or the caller
-can do it explicity through blk_unplug(bdev). So in the read case,
-the queue gets explicitly unplugged as part of waiting for completion on that
-buffer.
-
-Aside:
- This is kind of controversial territory, as it's not clear if plugging is
- always the right thing to do. Devices typically have their own queues,
- and allowing a big queue to build up in software, while letting the device be
- idle for a while may not always make sense. The trick is to handle the fine
- balance between when to plug and when to open up. Also now that we have
- multi-page bios being queued in one shot, we may not need to wait to merge
- a big request from the broken up pieces coming by.
-
-4.4 I/O contexts
-I/O contexts provide a dynamically allocated per process data area. They may
-be used in I/O schedulers, and in the block layer (could be used for IO statis,
-priorities for example). See *io_context in block/ll_rw_blk.c, and as-iosched.c
-for an example of usage in an i/o scheduler.
-
-
-5. Scalability related changes
-
-5.1 Granular Locking: io_request_lock replaced by a per-queue lock
-
-The global io_request_lock has been removed as of 2.5, to avoid
-the scalability bottleneck it was causing, and has been replaced by more
-granular locking. The request queue structure has a pointer to the
-lock to be used for that queue. As a result, locking can now be
-per-queue, with a provision for sharing a lock across queues if
-necessary (e.g the scsi layer sets the queue lock pointers to the
-corresponding adapter lock, which results in a per host locking
-granularity). The locking semantics are the same, i.e. locking is
-still imposed by the block layer, grabbing the lock before
-request_fn execution which it means that lots of older drivers
-should still be SMP safe. Drivers are free to drop the queue
-lock themselves, if required. Drivers that explicitly used the
-io_request_lock for serialization need to be modified accordingly.
-Usually it's as easy as adding a global lock:
-
- static DEFINE_SPINLOCK(my_driver_lock);
-
-and passing the address to that lock to blk_init_queue().
-
-5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
-
-The sector number used in the bio structure has been changed to sector_t,
-which could be defined as 64 bit in preparation for 64 bit sector support.
-
-6. Other Changes/Implications
-
-6.1 Partition re-mapping handled by the generic block layer
-
-In 2.5 some of the gendisk/partition related code has been reorganized.
-Now the generic block layer performs partition-remapping early and thus
-provides drivers with a sector number relative to whole device, rather than
-having to take partition number into account in order to arrive at the true
-sector number. The routine blk_partition_remap() is invoked by
-generic_make_request even before invoking the queue specific make_request_fn,
-so the i/o scheduler also gets to operate on whole disk sector numbers. This
-should typically not require changes to block drivers, it just never gets
-to invoke its own partition sector offset calculations since all bios
-sent are offset from the beginning of the device.
-
-
-7. A Few Tips on Migration of older drivers
-
-Old-style drivers that just use CURRENT and ignores clustered requests,
-may not need much change. The generic layer will automatically handle
-clustered requests, multi-page bios, etc for the driver.
-
-For a low performance driver or hardware that is PIO driven or just doesn't
-support scatter-gather changes should be minimal too.
-
-The following are some points to keep in mind when converting old drivers
-to bio.
-
-Drivers should use elv_next_request to pick up requests and are no longer
-supposed to handle looping directly over the request list.
-(struct request->queue has been removed)
-
-Now end_that_request_first takes an additional number_of_sectors argument.
-It used to handle always just the first buffer_head in a request, now
-it will loop and handle as many sectors (on a bio-segment granularity)
-as specified.
-
-Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
-right thing to use is bio_endio(bio) instead.
-
-If the driver is dropping the io_request_lock from its request_fn strategy,
-then it just needs to replace that with q->queue_lock instead.
-
-As described in Sec 1.1, drivers can set max sector size, max segment size
-etc per queue now. Drivers that used to define their own merge functions i
-to handle things like this can now just use the blk_queue_* functions at
-blk_init_queue time.
-
-Drivers no longer have to map a {partition, sector offset} into the
-correct absolute location anymore, this is done by the block layer, so
-where a driver received a request ala this before:
-
- rq->rq_dev = mk_kdev(3, 5); /* /dev/hda5 */
- rq->sector = 0; /* first sector on hda5 */
-
- it will now see
-
- rq->rq_dev = mk_kdev(3, 0); /* /dev/hda */
- rq->sector = 123128; /* offset from start of disk */
-
-As mentioned, there is no virtual mapping of a bio. For DMA, this is
-not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (dma_map_page for a single segment or
-use dma_map_sg for scatter gather) to be able to ship it to the driver. For
-PIO drivers (or drivers that need to revert to PIO transfer once in a
-while (IDE for example)), where the CPU is doing the actual data
-transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map
-a bio into the virtual address space.
-
-
-8. Prior/Related/Impacted patches
-
-8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
-- orig kiobuf & raw i/o patches (now in 2.4 tree)
-- direct kiobuf based i/o to devices (no intermediate bh's)
-- page i/o using kiobuf
-- kiobuf splitting for lvm (mkp)
-- elevator support for kiobuf request merging (axboe)
-8.2. Zero-copy networking (Dave Miller)
-8.3. SGI XFS - pagebuf patches - use of kiobufs
-8.4. Multi-page pioent patch for bio (Christoph Hellwig)
-8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
-8.6. Async i/o implementation patch (Ben LaHaise)
-8.7. EVMS layering design (IBM EVMS team)
-8.8. Larger page cache size patch (Ben LaHaise) and
- Large page size (Daniel Phillips)
- => larger contiguous physical memory buffers
-8.9. VM reservations patch (Ben LaHaise)
-8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
-8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
-8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar,
- Badari)
-8.13 Priority based i/o scheduler - prepatches (Arjan van de Ven)
-8.14 IDE Taskfile i/o patch (Andre Hedrick)
-8.15 Multi-page writeout and readahead patches (Andrew Morton)
-8.16 Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
-
-9. Other References:
-
-9.1 The Splice I/O Model - Larry McVoy (and subsequent discussions on lkml,
-and Linus' comments - Jan 2001)
-9.2 Discussions about kiobuf and bh design on lkml between sct, linus, alan
-et al - Feb-March 2001 (many of the initial thoughts that led to bio were
-brought up in this discussion thread)
-9.3 Discussions on mempool on lkml - Dec 2001.
-
diff --git a/Documentation/block/biovecs.rst b/Documentation/block/biovecs.rst
new file mode 100644
index 000000000000..b9dc0c9dbee4
--- /dev/null
+++ b/Documentation/block/biovecs.rst
@@ -0,0 +1,151 @@
+======================================
+Immutable biovecs and biovec iterators
+======================================
+
+Kent Overstreet <kmo@daterainc.com>
+
+As of 3.13, biovecs should never be modified after a bio has been submitted.
+Instead, we have a new struct bvec_iter which represents a range of a biovec -
+the iterator will be modified as the bio is completed, not the biovec.
+
+More specifically, old code that needed to partially complete a bio would
+update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
+ended up partway through a biovec, it would increment bv_offset and decrement
+bv_len by the number of bytes completed in that biovec.
+
+In the new scheme of things, everything that must be mutated in order to
+partially complete a bio is segregated into struct bvec_iter: bi_sector,
+bi_size and bi_idx have been moved there; and instead of modifying bv_offset
+and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
+bytes completed in the current bvec.
+
+There are a bunch of new helper macros for hiding the gory details - in
+particular, presenting the illusion of partially completed biovecs so that
+normal code doesn't have to deal with bi_bvec_done.
+
+ * Driver code should no longer refer to biovecs directly; we now have
+ bio_iovec() and bio_iter_iovec() macros that return literal struct biovecs,
+ constructed from the raw biovecs but taking into account bi_bvec_done and
+ bi_size.
+
+ bio_for_each_segment() has been updated to take a bvec_iter argument
+ instead of an integer (that corresponded to bi_idx); for a lot of code the
+ conversion just required changing the types of the arguments to
+ bio_for_each_segment().
+
+ * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
+ wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
+ advances the bio integrity's iter if present.
+
+ There is a lower level advance function - bvec_iter_advance() - which takes
+ a pointer to a biovec, not a bio; this is used by the bio integrity code.
+
+As of 5.12 bvec segments with zero bv_len are not supported.
+
+What's all this get us?
+=======================
+
+Having a real iterator, and making biovecs immutable, has a number of
+advantages:
+
+ * Before, iterating over bios was very awkward when you weren't processing
+ exactly one bvec at a time - for example, bio_copy_data() in block/bio.c,
+ which copies the contents of one bio into another. Because the biovecs
+ wouldn't necessarily be the same size, the old code was tricky convoluted -
+ it had to walk two different bios at the same time, keeping both bi_idx and
+ and offset into the current biovec for each.
+
+ The new code is much more straightforward - have a look. This sort of
+ pattern comes up in a lot of places; a lot of drivers were essentially open
+ coding bvec iterators before, and having common implementation considerably
+ simplifies a lot of code.
+
+ * Before, any code that might need to use the biovec after the bio had been
+ completed (perhaps to copy the data somewhere else, or perhaps to resubmit
+ it somewhere else if there was an error) had to save the entire bvec array
+ - again, this was being done in a fair number of places.
+
+ * Biovecs can be shared between multiple bios - a bvec iter can represent an
+ arbitrary range of an existing biovec, both starting and ending midway
+ through biovecs. This is what enables efficient splitting of arbitrary
+ bios. Note that this means we _only_ use bi_size to determine when we've
+ reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
+ bi_size into account when constructing biovecs.
+
+ * Splitting bios is now much simpler. The old bio_split() didn't even work on
+ bios with more than a single bvec! Now, we can efficiently split arbitrary
+ size bios - because the new bio can share the old bio's biovec.
+
+ Care must be taken to ensure the biovec isn't freed while the split bio is
+ still using it, in case the original bio completes first, though. Using
+ bio_chain() when splitting bios helps with this.
+
+ * Submitting partially completed bios is now perfectly fine - this comes up
+ occasionally in stacking block drivers and various code (e.g. md and
+ bcache) had some ugly workarounds for this.
+
+ It used to be the case that submitting a partially completed bio would work
+ fine to _most_ devices, but since accessing the raw bvec array was the
+ norm, not all drivers would respect bi_idx and those would break. Now,
+ since all drivers _must_ go through the bvec iterator - and have been
+ audited to make sure they are - submitting partially completed bios is
+ perfectly fine.
+
+Other implications:
+===================
+
+ * Almost all usage of bi_idx is now incorrect and has been removed; instead,
+ where previously you would have used bi_idx you'd now use a bvec_iter,
+ probably passing it to one of the helper macros.
+
+ I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
+ now use bio_iter_iovec(), which takes a bvec_iter and returns a
+ literal struct bio_vec - constructed on the fly from the raw biovec but
+ taking into account bi_bvec_done (and bi_size).
+
+ * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
+ doesn't actually own the bio. The reason is twofold: firstly, it's not
+ actually needed for iterating over the bio anymore - we only use bi_size.
+ Secondly, when cloning a bio and reusing (a portion of) the original bio's
+ biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
+ over all the biovecs in the new bio - which is silly as it's not needed.
+
+ So, don't use bi_vcnt anymore.
+
+ * The current interface allows the block layer to split bios as needed, so we
+ could eliminate a lot of complexity particularly in stacked drivers. Code
+ that creates bios can then create whatever size bios are convenient, and
+ more importantly stacked drivers don't have to deal with both their own bio
+ size limitations and the limitations of the underlying devices. Thus
+ there's no need to define ->merge_bvec_fn() callbacks for individual block
+ drivers.
+
+Usage of helpers:
+=================
+
+* The following helpers whose names have the suffix of `_all` can only be used
+ on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
+ shouldn't use them because the bio may have been split before it reached the
+ driver.
+
+::
+
+ bio_for_each_segment_all()
+ bio_for_each_bvec_all()
+ bio_first_bvec_all()
+ bio_first_page_all()
+ bio_first_folio_all()
+ bio_last_bvec_all()
+
+* The following helpers iterate over single-page segment. The passed 'struct
+ bio_vec' will contain a single-page IO vector during the iteration::
+
+ bio_for_each_segment()
+ bio_for_each_segment_all()
+
+* The following helpers iterate over multi-page bvec. The passed 'struct
+ bio_vec' will contain a multi-page IO vector during the iteration::
+
+ bio_for_each_bvec()
+ bio_for_each_bvec_all()
+ rq_for_each_bvec()
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
deleted file mode 100644
index 25689584e6e0..000000000000
--- a/Documentation/block/biovecs.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-
-Immutable biovecs and biovec iterators:
-=======================================
-
-Kent Overstreet <kmo@daterainc.com>
-
-As of 3.13, biovecs should never be modified after a bio has been submitted.
-Instead, we have a new struct bvec_iter which represents a range of a biovec -
-the iterator will be modified as the bio is completed, not the biovec.
-
-More specifically, old code that needed to partially complete a bio would
-update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
-ended up partway through a biovec, it would increment bv_offset and decrement
-bv_len by the number of bytes completed in that biovec.
-
-In the new scheme of things, everything that must be mutated in order to
-partially complete a bio is segregated into struct bvec_iter: bi_sector,
-bi_size and bi_idx have been moved there; and instead of modifying bv_offset
-and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
-bytes completed in the current bvec.
-
-There are a bunch of new helper macros for hiding the gory details - in
-particular, presenting the illusion of partially completed biovecs so that
-normal code doesn't have to deal with bi_bvec_done.
-
- * Driver code should no longer refer to biovecs directly; we now have
- bio_iovec() and bio_iter_iovec() macros that return literal struct biovecs,
- constructed from the raw biovecs but taking into account bi_bvec_done and
- bi_size.
-
- bio_for_each_segment() has been updated to take a bvec_iter argument
- instead of an integer (that corresponded to bi_idx); for a lot of code the
- conversion just required changing the types of the arguments to
- bio_for_each_segment().
-
- * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
- wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
- advances the bio integrity's iter if present.
-
- There is a lower level advance function - bvec_iter_advance() - which takes
- a pointer to a biovec, not a bio; this is used by the bio integrity code.
-
-What's all this get us?
-=======================
-
-Having a real iterator, and making biovecs immutable, has a number of
-advantages:
-
- * Before, iterating over bios was very awkward when you weren't processing
- exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
- which copies the contents of one bio into another. Because the biovecs
- wouldn't necessarily be the same size, the old code was tricky convoluted -
- it had to walk two different bios at the same time, keeping both bi_idx and
- and offset into the current biovec for each.
-
- The new code is much more straightforward - have a look. This sort of
- pattern comes up in a lot of places; a lot of drivers were essentially open
- coding bvec iterators before, and having common implementation considerably
- simplifies a lot of code.
-
- * Before, any code that might need to use the biovec after the bio had been
- completed (perhaps to copy the data somewhere else, or perhaps to resubmit
- it somewhere else if there was an error) had to save the entire bvec array
- - again, this was being done in a fair number of places.
-
- * Biovecs can be shared between multiple bios - a bvec iter can represent an
- arbitrary range of an existing biovec, both starting and ending midway
- through biovecs. This is what enables efficient splitting of arbitrary
- bios. Note that this means we _only_ use bi_size to determine when we've
- reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
- bi_size into account when constructing biovecs.
-
- * Splitting bios is now much simpler. The old bio_split() didn't even work on
- bios with more than a single bvec! Now, we can efficiently split arbitrary
- size bios - because the new bio can share the old bio's biovec.
-
- Care must be taken to ensure the biovec isn't freed while the split bio is
- still using it, in case the original bio completes first, though. Using
- bio_chain() when splitting bios helps with this.
-
- * Submitting partially completed bios is now perfectly fine - this comes up
- occasionally in stacking block drivers and various code (e.g. md and
- bcache) had some ugly workarounds for this.
-
- It used to be the case that submitting a partially completed bio would work
- fine to _most_ devices, but since accessing the raw bvec array was the
- norm, not all drivers would respect bi_idx and those would break. Now,
- since all drivers _must_ go through the bvec iterator - and have been
- audited to make sure they are - submitting partially completed bios is
- perfectly fine.
-
-Other implications:
-===================
-
- * Almost all usage of bi_idx is now incorrect and has been removed; instead,
- where previously you would have used bi_idx you'd now use a bvec_iter,
- probably passing it to one of the helper macros.
-
- I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
- now use bio_iter_iovec(), which takes a bvec_iter and returns a
- literal struct bio_vec - constructed on the fly from the raw biovec but
- taking into account bi_bvec_done (and bi_size).
-
- * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
- doesn't actually own the bio. The reason is twofold: firstly, it's not
- actually needed for iterating over the bio anymore - we only use bi_size.
- Secondly, when cloning a bio and reusing (a portion of) the original bio's
- biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
- over all the biovecs in the new bio - which is silly as it's not needed.
-
- So, don't use bi_vcnt anymore.
-
- * The current interface allows the block layer to split bios as needed, so we
- could eliminate a lot of complexity particularly in stacked drivers. Code
- that creates bios can then create whatever size bios are convenient, and
- more importantly stacked drivers don't have to deal with both their own bio
- size limitations and the limitations of the underlying devices. Thus
- there's no need to define ->merge_bvec_fn() callbacks for individual block
- drivers.
diff --git a/Documentation/block/blk-mq.rst b/Documentation/block/blk-mq.rst
new file mode 100644
index 000000000000..fc06761b6ea9
--- /dev/null
+++ b/Documentation/block/blk-mq.rst
@@ -0,0 +1,153 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================
+Multi-Queue Block IO Queueing Mechanism (blk-mq)
+================================================
+
+The Multi-Queue Block IO Queueing Mechanism is an API to enable fast storage
+devices to achieve a huge number of input/output operations per second (IOPS)
+through queueing and submitting IO requests to block devices simultaneously,
+benefiting from the parallelism offered by modern storage devices.
+
+Introduction
+============
+
+Background
+----------
+
+Magnetic hard disks have been the de facto standard from the beginning of the
+development of the kernel. The Block IO subsystem aimed to achieve the best
+performance possible for those devices with a high penalty when doing random
+access, and the bottleneck was the mechanical moving parts, a lot slower than
+any layer on the storage stack. One example of such optimization technique
+involves ordering read/write requests according to the current position of the
+hard disk head.
+
+However, with the development of Solid State Drives and Non-Volatile Memories
+without mechanical parts nor random access penalty and capable of performing
+high parallel access, the bottleneck of the stack had moved from the storage
+device to the operating system. In order to take advantage of the parallelism
+in those devices' design, the multi-queue mechanism was introduced.
+
+The former design had a single queue to store block IO requests with a single
+lock. That did not scale well in SMP systems due to dirty data in cache and the
+bottleneck of having a single lock for multiple processors. This setup also
+suffered with congestion when different processes (or the same process, moving
+to different CPUs) wanted to perform block IO. Instead of this, the blk-mq API
+spawns multiple queues with individual entry points local to the CPU, removing
+the need for a lock. A deeper explanation on how this works is covered in the
+following section (`Operation`_).
+
+Operation
+---------
+
+When the userspace performs IO to a block device (reading or writing a file,
+for instance), blk-mq takes action: it will store and manage IO requests to
+the block device, acting as middleware between the userspace (and a file
+system, if present) and the block device driver.
+
+blk-mq has two group of queues: software staging queues and hardware dispatch
+queues. When the request arrives at the block layer, it will try the shortest
+path possible: send it directly to the hardware queue. However, there are two
+cases that it might not do that: if there's an IO scheduler attached at the
+layer or if we want to try to merge requests. In both cases, requests will be
+sent to the software queue.
+
+Then, after the requests are processed by software queues, they will be placed
+at the hardware queue, a second stage queue where the hardware has direct access
+to process those requests. However, if the hardware does not have enough
+resources to accept more requests, blk-mq will place requests on a temporary
+queue, to be sent in the future, when the hardware is able.
+
+Software staging queues
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The block IO subsystem adds requests in the software staging queues
+(represented by struct blk_mq_ctx) in case that they weren't sent
+directly to the driver. A request is one or more BIOs. They arrived at the
+block layer through the data structure struct bio. The block layer
+will then build a new structure from it, the struct request that will
+be used to communicate with the device driver. Each queue has its own lock and
+the number of queues is defined by a per-CPU or per-node basis.
+
+The staging queue can be used to merge requests for adjacent sectors. For
+instance, requests for sector 3-6, 6-7, 7-9 can become one request for 3-9.
+Even if random access to SSDs and NVMs have the same time of response compared
+to sequential access, grouped requests for sequential access decreases the
+number of individual requests. This technique of merging requests is called
+plugging.
+
+Along with that, the requests can be reordered to ensure fairness of system
+resources (e.g. to ensure that no application suffers from starvation) and/or to
+improve IO performance, by an IO scheduler.
+
+IO Schedulers
+^^^^^^^^^^^^^
+
+There are several schedulers implemented by the block layer, each one following
+a heuristic to improve the IO performance. They are "pluggable" (as in plug
+and play), in the sense of they can be selected at run time using sysfs. You
+can read more about Linux's IO schedulers `here
+<https://www.kernel.org/doc/html/latest/block/index.html>`_. The scheduling
+happens only between requests in the same queue, so it is not possible to merge
+requests from different queues, otherwise there would be cache trashing and a
+need to have a lock for each queue. After the scheduling, the requests are
+eligible to be sent to the hardware. One of the possible schedulers to be
+selected is the NONE scheduler, the most straightforward one. It will just
+place requests on whatever software queue the process is running on, without
+any reordering. When the device starts processing requests in the hardware
+queue (a.k.a. run the hardware queue), the software queues mapped to that
+hardware queue will be drained in sequence according to their mapping.
+
+Hardware dispatch queues
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+The hardware queue (represented by struct blk_mq_hw_ctx) is a struct
+used by device drivers to map the device submission queues (or device DMA ring
+buffer), and are the last step of the block layer submission code before the
+low level device driver taking ownership of the request. To run this queue, the
+block layer removes requests from the associated software queues and tries to
+dispatch to the hardware.
+
+If it's not possible to send the requests directly to hardware, they will be
+added to a linked list (``hctx->dispatch``) of requests. Then,
+next time the block layer runs a queue, it will send the requests laying at the
+``dispatch`` list first, to ensure a fairness dispatch with those
+requests that were ready to be sent first. The number of hardware queues
+depends on the number of hardware contexts supported by the hardware and its
+device driver, but it will not be more than the number of cores of the system.
+There is no reordering at this stage, and each software queue has a set of
+hardware queues to send requests for.
+
+.. note::
+
+ Neither the block layer nor the device protocols guarantee
+ the order of completion of requests. This must be handled by
+ higher layers, like the filesystem.
+
+Tag-based completion
+~~~~~~~~~~~~~~~~~~~~
+
+In order to indicate which request has been completed, every request is
+identified by an integer, ranging from 0 to the dispatch queue size. This tag
+is generated by the block layer and later reused by the device driver, removing
+the need to create a redundant identifier. When a request is completed in the
+driver, the tag is sent back to the block layer to notify it of the finalization.
+This removes the need to do a linear search to find out which IO has been
+completed.
+
+Further reading
+---------------
+
+- `Linux Block IO: Introducing Multi-queue SSD Access on Multi-core Systems <http://kernel.dk/blk-mq.pdf>`_
+
+- `NOOP scheduler <https://en.wikipedia.org/wiki/Noop_scheduler>`_
+
+- `Null block device driver <https://www.kernel.org/doc/html/latest/block/null_blk.html>`_
+
+Source code documentation
+=========================
+
+.. kernel-doc:: include/linux/blk-mq.h
+
+.. kernel-doc:: block/blk-mq.c
diff --git a/Documentation/block/capability.txt b/Documentation/block/capability.txt
deleted file mode 100644
index 2f1729424ef4..000000000000
--- a/Documentation/block/capability.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Generic Block Device Capability
-===============================================================================
-This file documents the sysfs file block/<disk>/capability
-
-capability is a hex word indicating which capabilities a specific disk
-supports. For more information on bits not listed here, see
-include/linux/genhd.h
-
-Capability Value
--------------------------------------------------------------------------------
-GENHD_FL_MEDIA_CHANGE_NOTIFY 4
- When this bit is set, the disk supports Asynchronous Notification
- of media change events. These events will be broadcast to user
- space via kernel uevent.
-
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
deleted file mode 100644
index 895bd3813115..000000000000
--- a/Documentation/block/cfq-iosched.txt
+++ /dev/null
@@ -1,291 +0,0 @@
-CFQ (Complete Fairness Queueing)
-===============================
-
-The main aim of CFQ scheduler is to provide a fair allocation of the disk
-I/O bandwidth for all the processes which requests an I/O operation.
-
-CFQ maintains the per process queue for the processes which request I/O
-operation(synchronous requests). In case of asynchronous requests, all the
-requests from all the processes are batched together according to their
-process's I/O priority.
-
-CFQ ioscheduler tunables
-========================
-
-slice_idle
-----------
-This specifies how long CFQ should idle for next request on certain cfq queues
-(for sequential workloads) and service trees (for random workloads) before
-queue is expired and CFQ selects next queue to dispatch from.
-
-By default slice_idle is a non-zero value. That means by default we idle on
-queues/service trees. This can be very helpful on highly seeky media like
-single spindle SATA/SAS disks where we can cut down on overall number of
-seeks and see improved throughput.
-
-Setting slice_idle to 0 will remove all the idling on queues/service tree
-level and one should see an overall improved throughput on faster storage
-devices like multiple SATA/SAS disks in hardware RAID configuration. The down
-side is that isolation provided from WRITES also goes down and notion of
-IO priority becomes weaker.
-
-So depending on storage and workload, it might be useful to set slice_idle=0.
-In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
-keeping slice_idle enabled should be useful. For any configurations where
-there are multiple spindles behind single LUN (Host based hardware RAID
-controller or for storage arrays), setting slice_idle=0 might end up in better
-throughput and acceptable latencies.
-
-back_seek_max
--------------
-This specifies, given in Kbytes, the maximum "distance" for backward seeking.
-The distance is the amount of space from the current head location to the
-sectors that are backward in terms of distance.
-
-This parameter allows the scheduler to anticipate requests in the "backward"
-direction and consider them as being the "next" if they are within this
-distance from the current head location.
-
-back_seek_penalty
------------------
-This parameter is used to compute the cost of backward seeking. If the
-backward distance of request is just 1/back_seek_penalty from a "front"
-request, then the seeking cost of two requests is considered equivalent.
-
-So scheduler will not bias toward one or the other request (otherwise scheduler
-will bias toward front request). Default value of back_seek_penalty is 2.
-
-fifo_expire_async
------------------
-This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
-
-fifo_expire_sync
-----------------
-This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
-one, this value should be decreased relative to fifo_expire_async.
-
-group_idle
------------
-This parameter forces idling at the CFQ group level instead of CFQ
-queue level. This was introduced after a bottleneck was observed
-in higher end storage due to idle on sequential queue and allow dispatch
-from a single queue. The idea with this parameter is that it can be run with
-slice_idle=0 and group_idle=8, so that idling does not happen on individual
-queues in the group but happens overall on the group and thus still keeps the
-IO controller working.
-Not idling on individual queues in the group will dispatch requests from
-multiple queues in the group at the same time and achieve higher throughput
-on higher end storage.
-
-Default value for this parameter is 8ms.
-
-low_latency
------------
-This parameter is used to enable/disable the low latency mode of the CFQ
-scheduler. If enabled, CFQ tries to recompute the slice time for each process
-based on the target_latency set for the system. This favors fairness over
-throughput. Disabling low latency (setting it to 0) ignores target latency,
-allowing each process in the system to get a full time slice.
-
-By default low latency mode is enabled.
-
-target_latency
---------------
-This parameter is used to calculate the time slice for a process if cfq's
-latency mode is enabled. It will ensure that sync requests have an estimated
-latency. But if sequential workload is higher(e.g. sequential read),
-then to meet the latency constraints, throughput may decrease because of less
-time for each process to issue I/O request before the cfq queue is switched.
-
-Though this can be overcome by disabling the latency_mode, it may increase
-the read latency for some applications. This parameter allows for changing
-target_latency through the sysfs interface which can provide the balanced
-throughput and read latency.
-
-Default value for target_latency is 300ms.
-
-slice_async
------------
-This parameter is same as of slice_sync but for asynchronous queue. The
-default value is 40ms.
-
-slice_async_rq
---------------
-This parameter is used to limit the dispatching of asynchronous request to
-device request queue in queue's slice time. The maximum number of request that
-are allowed to be dispatched also depends upon the io priority. Default value
-for this is 2.
-
-slice_sync
-----------
-When a queue is selected for execution, the queues IO requests are only
-executed for a certain amount of time(time_slice) before switching to another
-queue. This parameter is used to calculate the time slice of synchronous
-queue.
-
-time_slice is computed using the below equation:-
-time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the
-time_slice of synchronous queue, increase the value of slice_sync. Default
-value is 100ms.
-
-quantum
--------
-This specifies the number of request dispatched to the device queue. In a
-queue's time slice, a request will not be dispatched if the number of request
-in the device exceeds this parameter. This parameter is used for synchronous
-request.
-
-In case of storage with several disk, this setting can limit the parallel
-processing of request. Therefore, increasing the value can improve the
-performance although this can cause the latency of some I/O to increase due
-to more number of requests.
-
-CFQ Group scheduling
-====================
-
-CFQ supports blkio cgroup and has "blkio." prefixed files in each
-blkio cgroup directory. It is weight-based and there are four knobs
-for configuration - weight[_device] and leaf_weight[_device].
-Internal cgroup nodes (the ones with children) can also have tasks in
-them, so the former two configure how much proportion the cgroup as a
-whole is entitled to at its parent's level while the latter two
-configure how much proportion the tasks in the cgroup have compared to
-its direct children.
-
-Another way to think about it is assuming that each internal node has
-an implicit leaf child node which hosts all the tasks whose weight is
-configured by leaf_weight[_device]. Let's assume a blkio hierarchy
-composed of five cgroups - root, A, B, AA and AB - with the following
-weights where the names represent the hierarchy.
-
- weight leaf_weight
- root : 125 125
- A : 500 750
- B : 250 500
- AA : 500 500
- AB : 1000 500
-
-root never has a parent making its weight is meaningless. For backward
-compatibility, weight is always kept in sync with leaf_weight. B, AA
-and AB have no child and thus its tasks have no children cgroup to
-compete with. They always get 100% of what the cgroup won at the
-parent level. Considering only the weights which matter, the hierarchy
-looks like the following.
-
- root
- / | \
- A B leaf
- 500 250 125
- / | \
- AA AB leaf
- 500 1000 750
-
-If all cgroups have active IOs and competing with each other, disk
-time will be distributed like the following.
-
-Distribution below root. The total active weight at this level is
-A:500 + B:250 + C:125 = 875.
-
- root-leaf : 125 / 875 =~ 14%
- A : 500 / 875 =~ 57%
- B(-leaf) : 250 / 875 =~ 28%
-
-A has children and further distributes its 57% among the children and
-the implicit leaf node. The total active weight at this level is
-AA:500 + AB:1000 + A-leaf:750 = 2250.
-
- A-leaf : ( 750 / 2250) * A =~ 19%
- AA(-leaf) : ( 500 / 2250) * A =~ 12%
- AB(-leaf) : (1000 / 2250) * A =~ 25%
-
-CFQ IOPS Mode for group scheduling
-===================================
-Basic CFQ design is to provide priority based time slices. Higher priority
-process gets bigger time slice and lower priority process gets smaller time
-slice. Measuring time becomes harder if storage is fast and supports NCQ and
-it would be better to dispatch multiple requests from multiple cfq queues in
-request queue at a time. In such scenario, it is not possible to measure time
-consumed by single queue accurately.
-
-What is possible though is to measure number of requests dispatched from a
-single queue and also allow dispatch from multiple cfq queue at the same time.
-This effectively becomes the fairness in terms of IOPS (IO operations per
-second).
-
-If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
-to IOPS mode and starts providing fairness in terms of number of requests
-dispatched. Note that this mode switching takes effect only for group
-scheduling. For non-cgroup users nothing should change.
-
-CFQ IO scheduler Idling Theory
-===============================
-Idling on a queue is primarily about waiting for the next request to come
-on same queue after completion of a request. In this process CFQ will not
-dispatch requests from other cfq queues even if requests are pending there.
-
-The rationale behind idling is that it can cut down on number of seeks
-on rotational media. For example, if a process is doing dependent
-sequential reads (next read will come on only after completion of previous
-one), then not dispatching request from other queue should help as we
-did not move the disk head and kept on dispatching sequential IO from
-one queue.
-
-CFQ has following service trees and various queues are put on these trees.
-
- sync-idle sync-noidle async
-
-All cfq queues doing synchronous sequential IO go on to sync-idle tree.
-On this tree we idle on each queue individually.
-
-All synchronous non-sequential queues go on sync-noidle tree. Also any
-synchronous write request which is not marked with REQ_IDLE goes on this
-service tree. On this tree we do not idle on individual queues instead idle
-on the whole group of queues or the tree. So if there are 4 queues waiting
-for IO to dispatch we will idle only once last queue has dispatched the IO
-and there is no more IO on this service tree.
-
-All async writes go on async service tree. There is no idling on async
-queues.
-
-CFQ has some optimizations for SSDs and if it detects a non-rotational
-media which can support higher queue depth (multiple requests at in
-flight at a time), then it cuts down on idling of individual queues and
-all the queues move to sync-noidle tree and only tree idle remains. This
-tree idling provides isolation with buffered write queues on async tree.
-
-FAQ
-===
-Q1. Why to idle at all on queues not marked with REQ_IDLE.
-
-A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked
- with REQ_IDLE. This helps in providing isolation with all the sync-idle
- queues. Otherwise in presence of many sequential readers, other
- synchronous IO might not get fair share of disk.
-
- For example, if there are 10 sequential readers doing IO and they get
- 100ms each. If a !REQ_IDLE request comes in, it will be scheduled
- roughly after 1 second. If after completion of !REQ_IDLE request we
- do not idle, and after a couple of milli seconds a another !REQ_IDLE
- request comes in, again it will be scheduled after 1second. Repeat it
- and notice how a workload can lose its disk share and suffer due to
- multiple sequential readers.
-
- fsync can generate dependent IO where bunch of data is written in the
- context of fsync, and later some journaling data is written. Journaling
- data comes in only after fsync has finished its IO (atleast for ext4
- that seemed to be the case). Now if one decides not to idle on fsync
- thread due to !REQ_IDLE, then next journaling write will not get
- scheduled for another second. A process doing small fsync, will suffer
- badly in presence of multiple sequential readers.
-
- Hence doing tree idling on threads using !REQ_IDLE flag on requests
- provides isolation from multiple sequential readers and at the same
- time we do not idle on individual threads.
-
-Q2. When to specify REQ_IDLE
-A2. I would think whenever one is doing synchronous write and expecting
- more writes to be dispatched from same context soon, should be able
- to specify REQ_IDLE on writes and that probably should work well for
- most of the cases.
diff --git a/Documentation/block/cmdline-partition.rst b/Documentation/block/cmdline-partition.rst
new file mode 100644
index 000000000000..526ba201dddc
--- /dev/null
+++ b/Documentation/block/cmdline-partition.rst
@@ -0,0 +1,56 @@
+==============================================
+Embedded device command line partition parsing
+==============================================
+
+The "blkdevparts" command line option adds support for reading the
+block device partition table from the kernel command line.
+
+It is typically used for fixed block (eMMC) embedded devices.
+It has no MBR, so saves storage space. Bootloader can be easily accessed
+by absolute address of data on the block device.
+Users can easily change the partition.
+
+The format for the command line is just like mtdparts:
+
+blkdevparts=<blkdev-def>[;<blkdev-def>]
+ <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
+ <partdef> := <size>[@<offset>](part-name)
+
+<blkdev-id>
+ block device disk name. Embedded device uses fixed block device.
+ Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
+
+<size>
+ partition size, in bytes, such as: 512, 1m, 1G.
+ size may contain an optional suffix of (upper or lower case):
+
+ K, M, G, T, P, E.
+
+ "-" is used to denote all remaining space.
+
+<offset>
+ partition start address, in bytes.
+ offset may contain an optional suffix of (upper or lower case):
+
+ K, M, G, T, P, E.
+
+(part-name)
+ partition name. Kernel sends uevent with "PARTNAME". Application can
+ create a link to block device partition with the name "PARTNAME".
+ User space application can access partition by partition name.
+
+ro
+ read-only. Flag the partition as read-only.
+
+Example:
+
+ eMMC disk names are "mmcblk0" and "mmcblk0boot0".
+
+ bootargs::
+
+ 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
+
+ dmesg::
+
+ mmcblk0: p1(data0) p2(data1) p3()
+ mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
deleted file mode 100644
index 760a3f7c3ed4..000000000000
--- a/Documentation/block/cmdline-partition.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-Embedded device command line partition parsing
-=====================================================================
-
-The "blkdevparts" command line option adds support for reading the
-block device partition table from the kernel command line.
-
-It is typically used for fixed block (eMMC) embedded devices.
-It has no MBR, so saves storage space. Bootloader can be easily accessed
-by absolute address of data on the block device.
-Users can easily change the partition.
-
-The format for the command line is just like mtdparts:
-
-blkdevparts=<blkdev-def>[;<blkdev-def>]
- <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
- <partdef> := <size>[@<offset>](part-name)
-
-<blkdev-id>
- block device disk name. Embedded device uses fixed block device.
- Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
-
-<size>
- partition size, in bytes, such as: 512, 1m, 1G.
- size may contain an optional suffix of (upper or lower case):
- K, M, G, T, P, E.
- "-" is used to denote all remaining space.
-
-<offset>
- partition start address, in bytes.
- offset may contain an optional suffix of (upper or lower case):
- K, M, G, T, P, E.
-
-(part-name)
- partition name. Kernel sends uevent with "PARTNAME". Application can
- create a link to block device partition with the name "PARTNAME".
- User space application can access partition by partition name.
-
-Example:
- eMMC disk names are "mmcblk0" and "mmcblk0boot0".
-
- bootargs:
- 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
-
- dmesg:
- mmcblk0: p1(data0) p2(data1) p3()
- mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
new file mode 100644
index 000000000000..99905e880a0e
--- /dev/null
+++ b/Documentation/block/data-integrity.rst
@@ -0,0 +1,248 @@
+==============
+Data Integrity
+==============
+
+1. Introduction
+===============
+
+Modern filesystems feature checksumming of data and metadata to
+protect against data corruption. However, the detection of the
+corruption is done at read time which could potentially be months
+after the data was written. At that point the original data that the
+application tried to write is most likely lost.
+
+The solution is to ensure that the disk is actually storing what the
+application meant it to. Recent additions to both the SCSI family
+protocols (SBC Data Integrity Field, SCC protection proposal) as well
+as SATA/T13 (External Path Protection) try to remedy this by adding
+support for appending integrity metadata to an I/O. The integrity
+metadata (or protection information in SCSI terminology) includes a
+checksum for each sector as well as an incrementing counter that
+ensures the individual sectors are written in the right order. And
+for some protection schemes also that the I/O is written to the right
+place on disk.
+
+Current storage controllers and devices implement various protective
+measures, for instance checksumming and scrubbing. But these
+technologies are working in their own isolated domains or at best
+between adjacent nodes in the I/O path. The interesting thing about
+DIF and the other integrity extensions is that the protection format
+is well defined and every node in the I/O path can verify the
+integrity of the I/O and reject it if corruption is detected. This
+allows not only corruption prevention but also isolation of the point
+of failure.
+
+2. The Data Integrity Extensions
+================================
+
+As written, the protocol extensions only protect the path between
+controller and storage device. However, many controllers actually
+allow the operating system to interact with the integrity metadata
+(IMD). We have been working with several FC/SAS HBA vendors to enable
+the protection information to be transferred to and from their
+controllers.
+
+The SCSI Data Integrity Field works by appending 8 bytes of protection
+information to each sector. The data + integrity metadata is stored
+in 520 byte sectors on disk. Data + IMD are interleaved when
+transferred between the controller and target. The T13 proposal is
+similar.
+
+Because it is highly inconvenient for operating systems to deal with
+520 (and 4104) byte sectors, we approached several HBA vendors and
+encouraged them to allow separation of the data and integrity metadata
+scatter-gather lists.
+
+The controller will interleave the buffers on write and split them on
+read. This means that Linux can DMA the data buffers to and from
+host memory without changes to the page cache.
+
+Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
+is somewhat heavy to compute in software. Benchmarks found that
+calculating this checksum had a significant impact on system
+performance for a number of workloads. Some controllers allow a
+lighter-weight checksum to be used when interfacing with the operating
+system. Emulex, for instance, supports the TCP/IP checksum instead.
+The IP checksum received from the OS is converted to the 16-bit CRC
+when writing and vice versa. This allows the integrity metadata to be
+generated by Linux or the application at very low cost (comparable to
+software RAID5).
+
+The IP checksum is weaker than the CRC in terms of detecting bit
+errors. However, the strength is really in the separation of the data
+buffers and the integrity metadata. These two distinct buffers must
+match up for an I/O to complete.
+
+The separation of the data and integrity metadata buffers as well as
+the choice in checksums is referred to as the Data Integrity
+Extensions. As these extensions are outside the scope of the protocol
+bodies (T10, T13), Oracle and its partners are trying to standardize
+them within the Storage Networking Industry Association.
+
+3. Kernel Changes
+=================
+
+The data integrity framework in Linux enables protection information
+to be pinned to I/Os and sent to/received from controllers that
+support it.
+
+The advantage to the integrity extensions in SCSI and SATA is that
+they enable us to protect the entire path from application to storage
+device. However, at the same time this is also the biggest
+disadvantage. It means that the protection information must be in a
+format that can be understood by the disk.
+
+Generally Linux/POSIX applications are agnostic to the intricacies of
+the storage devices they are accessing. The virtual filesystem switch
+and the block layer make things like hardware sector size and
+transport protocols completely transparent to the application.
+
+However, this level of detail is required when preparing the
+protection information to send to a disk. Consequently, the very
+concept of an end-to-end protection scheme is a layering violation.
+It is completely unreasonable for an application to be aware whether
+it is accessing a SCSI or SATA disk.
+
+The data integrity support implemented in Linux attempts to hide this
+from the application. As far as the application (and to some extent
+the kernel) is concerned, the integrity metadata is opaque information
+that's attached to the I/O.
+
+The current implementation allows the block layer to automatically
+generate the protection information for any I/O. Eventually the
+intent is to move the integrity metadata calculation to userspace for
+user data. Metadata and other I/O that originates within the kernel
+will still use the automatic generation interface.
+
+Some storage devices allow each hardware sector to be tagged with a
+16-bit value. The owner of this tag space is the owner of the block
+device. I.e. the filesystem in most cases. The filesystem can use
+this extra space to tag sectors as they see fit. Because the tag
+space is limited, the block interface allows tagging bigger chunks by
+way of interleaving. This way, 8*16 bits of information can be
+attached to a typical 4KB filesystem block.
+
+This also means that applications such as fsck and mkfs will need
+access to manipulate the tags from user space. A passthrough
+interface for this is being worked on.
+
+
+4. Block Layer Implementation Details
+=====================================
+
+4.1 Bio
+-------
+
+The data integrity patches add a new field to struct bio when
+CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a
+pointer to a struct bip which contains the bio integrity payload.
+Essentially a bip is a trimmed down struct bio which holds a bio_vec
+containing the integrity metadata and the required housekeeping
+information (bvec pool, vector count, etc.)
+
+A kernel subsystem can enable data integrity protection on a bio by
+calling bio_integrity_alloc(bio). This will allocate and attach the
+bip to the bio.
+
+Individual pages containing integrity metadata can subsequently be
+attached using bio_integrity_add_page().
+
+bio_free() will automatically free the bip.
+
+
+4.2 Block Device
+----------------
+
+Block devices can set up the integrity information in the integrity
+sub-struture of the queue_limits structure.
+
+Layered block devices will need to pick a profile that's appropriate
+for all subdevices. queue_limits_stack_integrity() can help with that. DM
+and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
+will require extra work due to the application tag.
+
+
+5.0 Block Layer Integrity API
+=============================
+
+5.1 Normal Filesystem
+---------------------
+
+ The normal filesystem is unaware that the underlying block device
+ is capable of sending/receiving integrity metadata. The IMD will
+ be automatically generated by the block layer at submit_bio() time
+ in case of a WRITE. A READ request will cause the I/O integrity
+ to be verified upon completion.
+
+ IMD generation and verification can be toggled using the::
+
+ /sys/block/<bdev>/integrity/write_generate
+
+ and::
+
+ /sys/block/<bdev>/integrity/read_verify
+
+ flags.
+
+
+5.2 Integrity-Aware Filesystem
+------------------------------
+
+ A filesystem that is integrity-aware can prepare I/Os with IMD
+ attached. It can also use the application tag space if this is
+ supported by the block device.
+
+
+ `bool bio_integrity_prep(bio);`
+
+ To generate IMD for WRITE and to set up buffers for READ, the
+ filesystem must call bio_integrity_prep(bio).
+
+ Prior to calling this function, the bio data direction and start
+ sector must be set, and the bio should have all data pages
+ added. It is up to the caller to ensure that the bio does not
+ change while I/O is in progress.
+ Complete bio with error if prepare failed for some reason.
+
+
+5.3 Passing Existing Integrity Metadata
+---------------------------------------
+
+ Filesystems that either generate their own integrity metadata or
+ are capable of transferring IMD from user space can use the
+ following calls:
+
+
+ `struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);`
+
+ Allocates the bio integrity payload and hangs it off of the bio.
+ nr_pages indicate how many pages of protection data need to be
+ stored in the integrity bio_vec list (similar to bio_alloc()).
+
+ The integrity payload will be freed at bio_free() time.
+
+
+ `int bio_integrity_add_page(bio, page, len, offset);`
+
+ Attaches a page containing integrity metadata to an existing
+ bio. The bio must have an existing bip,
+ i.e. bio_integrity_alloc() must have been called. For a WRITE,
+ the integrity metadata in the pages must be in a format
+ understood by the target device with the notable exception that
+ the sector numbers will be remapped as the request traverses the
+ I/O stack. This implies that the pages added using this call
+ will be modified during I/O! The first reference tag in the
+ integrity metadata must have a value of bip->bip_sector.
+
+ Pages can be added using bio_integrity_add_page() as long as
+ there is room in the bip bio_vec array (nr_pages).
+
+ Upon completion of a READ operation, the attached pages will
+ contain the integrity metadata received from the storage device.
+ It is up to the receiver to process them and verify data
+ integrity upon completion.
+
+
+----------------------------------------------------------------------
+
+2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
deleted file mode 100644
index 934c44ea0c57..000000000000
--- a/Documentation/block/data-integrity.txt
+++ /dev/null
@@ -1,281 +0,0 @@
-----------------------------------------------------------------------
-1. INTRODUCTION
-
-Modern filesystems feature checksumming of data and metadata to
-protect against data corruption. However, the detection of the
-corruption is done at read time which could potentially be months
-after the data was written. At that point the original data that the
-application tried to write is most likely lost.
-
-The solution is to ensure that the disk is actually storing what the
-application meant it to. Recent additions to both the SCSI family
-protocols (SBC Data Integrity Field, SCC protection proposal) as well
-as SATA/T13 (External Path Protection) try to remedy this by adding
-support for appending integrity metadata to an I/O. The integrity
-metadata (or protection information in SCSI terminology) includes a
-checksum for each sector as well as an incrementing counter that
-ensures the individual sectors are written in the right order. And
-for some protection schemes also that the I/O is written to the right
-place on disk.
-
-Current storage controllers and devices implement various protective
-measures, for instance checksumming and scrubbing. But these
-technologies are working in their own isolated domains or at best
-between adjacent nodes in the I/O path. The interesting thing about
-DIF and the other integrity extensions is that the protection format
-is well defined and every node in the I/O path can verify the
-integrity of the I/O and reject it if corruption is detected. This
-allows not only corruption prevention but also isolation of the point
-of failure.
-
-----------------------------------------------------------------------
-2. THE DATA INTEGRITY EXTENSIONS
-
-As written, the protocol extensions only protect the path between
-controller and storage device. However, many controllers actually
-allow the operating system to interact with the integrity metadata
-(IMD). We have been working with several FC/SAS HBA vendors to enable
-the protection information to be transferred to and from their
-controllers.
-
-The SCSI Data Integrity Field works by appending 8 bytes of protection
-information to each sector. The data + integrity metadata is stored
-in 520 byte sectors on disk. Data + IMD are interleaved when
-transferred between the controller and target. The T13 proposal is
-similar.
-
-Because it is highly inconvenient for operating systems to deal with
-520 (and 4104) byte sectors, we approached several HBA vendors and
-encouraged them to allow separation of the data and integrity metadata
-scatter-gather lists.
-
-The controller will interleave the buffers on write and split them on
-read. This means that Linux can DMA the data buffers to and from
-host memory without changes to the page cache.
-
-Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
-is somewhat heavy to compute in software. Benchmarks found that
-calculating this checksum had a significant impact on system
-performance for a number of workloads. Some controllers allow a
-lighter-weight checksum to be used when interfacing with the operating
-system. Emulex, for instance, supports the TCP/IP checksum instead.
-The IP checksum received from the OS is converted to the 16-bit CRC
-when writing and vice versa. This allows the integrity metadata to be
-generated by Linux or the application at very low cost (comparable to
-software RAID5).
-
-The IP checksum is weaker than the CRC in terms of detecting bit
-errors. However, the strength is really in the separation of the data
-buffers and the integrity metadata. These two distinct buffers must
-match up for an I/O to complete.
-
-The separation of the data and integrity metadata buffers as well as
-the choice in checksums is referred to as the Data Integrity
-Extensions. As these extensions are outside the scope of the protocol
-bodies (T10, T13), Oracle and its partners are trying to standardize
-them within the Storage Networking Industry Association.
-
-----------------------------------------------------------------------
-3. KERNEL CHANGES
-
-The data integrity framework in Linux enables protection information
-to be pinned to I/Os and sent to/received from controllers that
-support it.
-
-The advantage to the integrity extensions in SCSI and SATA is that
-they enable us to protect the entire path from application to storage
-device. However, at the same time this is also the biggest
-disadvantage. It means that the protection information must be in a
-format that can be understood by the disk.
-
-Generally Linux/POSIX applications are agnostic to the intricacies of
-the storage devices they are accessing. The virtual filesystem switch
-and the block layer make things like hardware sector size and
-transport protocols completely transparent to the application.
-
-However, this level of detail is required when preparing the
-protection information to send to a disk. Consequently, the very
-concept of an end-to-end protection scheme is a layering violation.
-It is completely unreasonable for an application to be aware whether
-it is accessing a SCSI or SATA disk.
-
-The data integrity support implemented in Linux attempts to hide this
-from the application. As far as the application (and to some extent
-the kernel) is concerned, the integrity metadata is opaque information
-that's attached to the I/O.
-
-The current implementation allows the block layer to automatically
-generate the protection information for any I/O. Eventually the
-intent is to move the integrity metadata calculation to userspace for
-user data. Metadata and other I/O that originates within the kernel
-will still use the automatic generation interface.
-
-Some storage devices allow each hardware sector to be tagged with a
-16-bit value. The owner of this tag space is the owner of the block
-device. I.e. the filesystem in most cases. The filesystem can use
-this extra space to tag sectors as they see fit. Because the tag
-space is limited, the block interface allows tagging bigger chunks by
-way of interleaving. This way, 8*16 bits of information can be
-attached to a typical 4KB filesystem block.
-
-This also means that applications such as fsck and mkfs will need
-access to manipulate the tags from user space. A passthrough
-interface for this is being worked on.
-
-
-----------------------------------------------------------------------
-4. BLOCK LAYER IMPLEMENTATION DETAILS
-
-4.1 BIO
-
-The data integrity patches add a new field to struct bio when
-CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a
-pointer to a struct bip which contains the bio integrity payload.
-Essentially a bip is a trimmed down struct bio which holds a bio_vec
-containing the integrity metadata and the required housekeeping
-information (bvec pool, vector count, etc.)
-
-A kernel subsystem can enable data integrity protection on a bio by
-calling bio_integrity_alloc(bio). This will allocate and attach the
-bip to the bio.
-
-Individual pages containing integrity metadata can subsequently be
-attached using bio_integrity_add_page().
-
-bio_free() will automatically free the bip.
-
-
-4.2 BLOCK DEVICE
-
-Because the format of the protection data is tied to the physical
-disk, each block device has been extended with a block integrity
-profile (struct blk_integrity). This optional profile is registered
-with the block layer using blk_integrity_register().
-
-The profile contains callback functions for generating and verifying
-the protection data, as well as getting and setting application tags.
-The profile also contains a few constants to aid in completing,
-merging and splitting the integrity metadata.
-
-Layered block devices will need to pick a profile that's appropriate
-for all subdevices. blk_integrity_compare() can help with that. DM
-and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
-will require extra work due to the application tag.
-
-
-----------------------------------------------------------------------
-5.0 BLOCK LAYER INTEGRITY API
-
-5.1 NORMAL FILESYSTEM
-
- The normal filesystem is unaware that the underlying block device
- is capable of sending/receiving integrity metadata. The IMD will
- be automatically generated by the block layer at submit_bio() time
- in case of a WRITE. A READ request will cause the I/O integrity
- to be verified upon completion.
-
- IMD generation and verification can be toggled using the
-
- /sys/block/<bdev>/integrity/write_generate
-
- and
-
- /sys/block/<bdev>/integrity/read_verify
-
- flags.
-
-
-5.2 INTEGRITY-AWARE FILESYSTEM
-
- A filesystem that is integrity-aware can prepare I/Os with IMD
- attached. It can also use the application tag space if this is
- supported by the block device.
-
-
- bool bio_integrity_prep(bio);
-
- To generate IMD for WRITE and to set up buffers for READ, the
- filesystem must call bio_integrity_prep(bio).
-
- Prior to calling this function, the bio data direction and start
- sector must be set, and the bio should have all data pages
- added. It is up to the caller to ensure that the bio does not
- change while I/O is in progress.
- Complete bio with error if prepare failed for some reson.
-
-
-5.3 PASSING EXISTING INTEGRITY METADATA
-
- Filesystems that either generate their own integrity metadata or
- are capable of transferring IMD from user space can use the
- following calls:
-
-
- struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
-
- Allocates the bio integrity payload and hangs it off of the bio.
- nr_pages indicate how many pages of protection data need to be
- stored in the integrity bio_vec list (similar to bio_alloc()).
-
- The integrity payload will be freed at bio_free() time.
-
-
- int bio_integrity_add_page(bio, page, len, offset);
-
- Attaches a page containing integrity metadata to an existing
- bio. The bio must have an existing bip,
- i.e. bio_integrity_alloc() must have been called. For a WRITE,
- the integrity metadata in the pages must be in a format
- understood by the target device with the notable exception that
- the sector numbers will be remapped as the request traverses the
- I/O stack. This implies that the pages added using this call
- will be modified during I/O! The first reference tag in the
- integrity metadata must have a value of bip->bip_sector.
-
- Pages can be added using bio_integrity_add_page() as long as
- there is room in the bip bio_vec array (nr_pages).
-
- Upon completion of a READ operation, the attached pages will
- contain the integrity metadata received from the storage device.
- It is up to the receiver to process them and verify data
- integrity upon completion.
-
-
-5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
- METADATA
-
- To enable integrity exchange on a block device the gendisk must be
- registered as capable:
-
- int blk_integrity_register(gendisk, blk_integrity);
-
- The blk_integrity struct is a template and should contain the
- following:
-
- static struct blk_integrity my_profile = {
- .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
- .generate_fn = my_generate_fn,
- .verify_fn = my_verify_fn,
- .tuple_size = sizeof(struct my_tuple_size),
- .tag_size = <tag bytes per hw sector>,
- };
-
- 'name' is a text string which will be visible in sysfs. This is
- part of the userland API so chose it carefully and never change
- it. The format is standards body-type-variant.
- E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
-
- 'generate_fn' generates appropriate integrity metadata (for WRITE).
-
- 'verify_fn' verifies that the data buffer matches the integrity
- metadata.
-
- 'tuple_size' must be set to match the size of the integrity
- metadata per sector. I.e. 8 for DIF and EPP.
-
- 'tag_size' must be set to identify how many bytes of tag space
- are available per hardware sector. For DIF this is either 2 or
- 0 depending on the value of the Control Mode Page ATO bit.
-
-----------------------------------------------------------------------
-2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/deadline-iosched.rst b/Documentation/block/deadline-iosched.rst
new file mode 100644
index 000000000000..9f5c5a4c370e
--- /dev/null
+++ b/Documentation/block/deadline-iosched.rst
@@ -0,0 +1,72 @@
+==============================
+Deadline IO scheduler tunables
+==============================
+
+This little file attempts to document how the deadline io scheduler works.
+In particular, it will clarify the meaning of the exposed tunables that may be
+of interest to power users.
+
+Selecting IO schedulers
+-----------------------
+Refer to Documentation/block/switching-sched.rst for information on
+selecting an io scheduler on a per-device basis.
+
+------------------------------------------------------------------------------
+
+read_expire (in ms)
+-----------------------
+
+The goal of the deadline io scheduler is to attempt to guarantee a start
+service time for a request. As we focus mainly on read latencies, this is
+tunable. When a read request first enters the io scheduler, it is assigned
+a deadline that is the current time + the read_expire value in units of
+milliseconds.
+
+
+write_expire (in ms)
+-----------------------
+
+Similar to read_expire mentioned above, but for writes.
+
+
+fifo_batch (number of requests)
+------------------------------------
+
+Requests are grouped into ``batches`` of a particular data direction (read or
+write) which are serviced in increasing sector order. To limit extra seeking,
+deadline expiries are only checked between batches. fifo_batch controls the
+maximum number of requests per batch.
+
+This parameter tunes the balance between per-request latency and aggregate
+throughput. When low latency is the primary concern, smaller is better (where
+a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
+generally improves throughput, at the cost of latency variation.
+
+
+writes_starved (number of dispatches)
+--------------------------------------
+
+When we have to move requests from the io scheduler queue to the block
+device dispatch queue, we always give a preference to reads. However, we
+don't want to starve writes indefinitely either. So writes_starved controls
+how many times we give preference to reads over writes. When that has been
+done writes_starved number of times, we dispatch some writes based on the
+same criteria as reads.
+
+
+front_merges (bool)
+----------------------
+
+Sometimes it happens that a request enters the io scheduler that is contiguous
+with a request that is already on the queue. Either it fits in the back of that
+request, or it fits at the front. That is called either a back merge candidate
+or a front merge candidate. Due to the way files are typically laid out,
+back merges are much more common than front merges. For some work loads, you
+may even know that it is a waste of time to spend any time attempting to
+front merge requests. Setting front_merges to 0 disables this functionality.
+Front merges may still occur due to the cached last_merge hint, but since
+that comes at basically 0 cost we leave that on. We simply disable the
+rbtree front sector lookup when the io scheduler merge function is called.
+
+
+Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
deleted file mode 100644
index 2d82c80322cb..000000000000
--- a/Documentation/block/deadline-iosched.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Deadline IO scheduler tunables
-==============================
-
-This little file attempts to document how the deadline io scheduler works.
-In particular, it will clarify the meaning of the exposed tunables that may be
-of interest to power users.
-
-Selecting IO schedulers
------------------------
-Refer to Documentation/block/switching-sched.txt for information on
-selecting an io scheduler on a per-device basis.
-
-
-********************************************************************************
-
-
-read_expire (in ms)
------------
-
-The goal of the deadline io scheduler is to attempt to guarantee a start
-service time for a request. As we focus mainly on read latencies, this is
-tunable. When a read request first enters the io scheduler, it is assigned
-a deadline that is the current time + the read_expire value in units of
-milliseconds.
-
-
-write_expire (in ms)
------------
-
-Similar to read_expire mentioned above, but for writes.
-
-
-fifo_batch (number of requests)
-----------
-
-Requests are grouped into ``batches'' of a particular data direction (read or
-write) which are serviced in increasing sector order. To limit extra seeking,
-deadline expiries are only checked between batches. fifo_batch controls the
-maximum number of requests per batch.
-
-This parameter tunes the balance between per-request latency and aggregate
-throughput. When low latency is the primary concern, smaller is better (where
-a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
-generally improves throughput, at the cost of latency variation.
-
-
-writes_starved (number of dispatches)
---------------
-
-When we have to move requests from the io scheduler queue to the block
-device dispatch queue, we always give a preference to reads. However, we
-don't want to starve writes indefinitely either. So writes_starved controls
-how many times we give preference to reads over writes. When that has been
-done writes_starved number of times, we dispatch some writes based on the
-same criteria as reads.
-
-
-front_merges (bool)
-------------
-
-Sometimes it happens that a request enters the io scheduler that is contiguous
-with a request that is already on the queue. Either it fits in the back of that
-request, or it fits at the front. That is called either a back merge candidate
-or a front merge candidate. Due to the way files are typically laid out,
-back merges are much more common than front merges. For some work loads, you
-may even know that it is a waste of time to spend any time attempting to
-front merge requests. Setting front_merges to 0 disables this functionality.
-Front merges may still occur due to the cached last_merge hint, but since
-that comes at basically 0 cost we leave that on. We simply disable the
-rbtree front sector lookup when the io scheduler merge function is called.
-
-
-Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
-
-
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
new file mode 100644
index 000000000000..9fea696f9daa
--- /dev/null
+++ b/Documentation/block/index.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+Block
+=====
+
+.. toctree::
+ :maxdepth: 1
+
+ bfq-iosched
+ biovecs
+ blk-mq
+ cmdline-partition
+ data-integrity
+ deadline-iosched
+ inline-encryption
+ ioprio
+ kyber-iosched
+ null_blk
+ pr
+ stat
+ switching-sched
+ writeback_cache_control
+ ublk
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
new file mode 100644
index 000000000000..6380e6ab492b
--- /dev/null
+++ b/Documentation/block/inline-encryption.rst
@@ -0,0 +1,550 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _inline_encryption:
+
+=================
+Inline Encryption
+=================
+
+Background
+==========
+
+Inline encryption hardware sits logically between memory and disk, and can
+en/decrypt data as it goes in/out of the disk. For each I/O request, software
+can control exactly how the inline encryption hardware will en/decrypt the data
+in terms of key, algorithm, data unit size (the granularity of en/decryption),
+and data unit number (a value that determines the initialization vector(s)).
+
+Some inline encryption hardware accepts all encryption parameters including raw
+keys directly in low-level I/O requests. However, most inline encryption
+hardware instead has a fixed number of "keyslots" and requires that the key,
+algorithm, and data unit size first be programmed into a keyslot. Each
+low-level I/O request then just contains a keyslot index and data unit number.
+
+Note that inline encryption hardware is very different from traditional crypto
+accelerators, which are supported through the kernel crypto API. Traditional
+crypto accelerators operate on memory regions, whereas inline encryption
+hardware operates on I/O requests. Thus, inline encryption hardware needs to be
+managed by the block layer, not the kernel crypto API.
+
+Inline encryption hardware is also very different from "self-encrypting drives",
+such as those based on the TCG Opal or ATA Security standards. Self-encrypting
+drives don't provide fine-grained control of encryption and provide no way to
+verify the correctness of the resulting ciphertext. Inline encryption hardware
+provides fine-grained control of encryption, including the choice of key and
+initialization vector for each sector, and can be tested for correctness.
+
+Objective
+=========
+
+We want to support inline encryption in the kernel. To make testing easier, we
+also want support for falling back to the kernel crypto API when actual inline
+encryption hardware is absent. We also want inline encryption to work with
+layered devices like device-mapper and loopback (i.e. we want to be able to use
+the inline encryption hardware of the underlying devices if present, or else
+fall back to crypto API en/decryption).
+
+Constraints and notes
+=====================
+
+- We need a way for upper layers (e.g. filesystems) to specify an encryption
+ context to use for en/decrypting a bio, and device drivers (e.g. UFSHCD) need
+ to be able to use that encryption context when they process the request.
+ Encryption contexts also introduce constraints on bio merging; the block layer
+ needs to be aware of these constraints.
+
+- Different inline encryption hardware has different supported algorithms,
+ supported data unit sizes, maximum data unit numbers, etc. We call these
+ properties the "crypto capabilities". We need a way for device drivers to
+ advertise crypto capabilities to upper layers in a generic way.
+
+- Inline encryption hardware usually (but not always) requires that keys be
+ programmed into keyslots before being used. Since programming keyslots may be
+ slow and there may not be very many keyslots, we shouldn't just program the
+ key for every I/O request, but rather keep track of which keys are in the
+ keyslots and reuse an already-programmed keyslot when possible.
+
+- Upper layers typically define a specific end-of-life for crypto keys, e.g.
+ when an encrypted directory is locked or when a crypto mapping is torn down.
+ At these times, keys are wiped from memory. We must provide a way for upper
+ layers to also evict keys from any keyslots they are present in.
+
+- When possible, device-mapper devices must be able to pass through the inline
+ encryption support of their underlying devices. However, it doesn't make
+ sense for device-mapper devices to have keyslots themselves.
+
+Basic design
+============
+
+We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
+how it will be used. This includes the type of the key (raw or
+hardware-wrapped); the actual bytes of the key; the size of the key; the
+algorithm and data unit size the key will be used with; and the number of bytes
+needed to represent the maximum data unit number the key will be used with.
+
+We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It
+contains a data unit number and a pointer to a blk_crypto_key. We add pointers
+to a bio_crypt_ctx to ``struct bio`` and ``struct request``; this allows users
+of the block layer (e.g. filesystems) to provide an encryption context when
+creating a bio and have it be passed down the stack for processing by the block
+layer and device drivers. Note that the encryption context doesn't explicitly
+say whether to encrypt or decrypt, as that is implicit from the direction of the
+bio; WRITE means encrypt, and READ means decrypt.
+
+We also introduce ``struct blk_crypto_profile`` to contain all generic inline
+encryption-related state for a particular inline encryption device. The
+blk_crypto_profile serves as the way that drivers for inline encryption hardware
+advertise their crypto capabilities and provide certain functions (e.g.,
+functions to program and evict keys) to upper layers. Each device driver that
+wants to support inline encryption will construct a blk_crypto_profile, then
+associate it with the disk's request_queue.
+
+The blk_crypto_profile also manages the hardware's keyslots, when applicable.
+This happens in the block layer, so that users of the block layer can just
+specify encryption contexts and don't need to know about keyslots at all, nor do
+device drivers need to care about most details of keyslot management.
+
+Specifically, for each keyslot, the block layer (via the blk_crypto_profile)
+keeps track of which blk_crypto_key that keyslot contains (if any), and how many
+in-flight I/O requests are using it. When the block layer creates a
+``struct request`` for a bio that has an encryption context, it grabs a keyslot
+that already contains the key if possible. Otherwise it waits for an idle
+keyslot (a keyslot that isn't in-use by any I/O), then programs the key into the
+least-recently-used idle keyslot using the function the device driver provided.
+In both cases, the resulting keyslot is stored in the ``crypt_keyslot`` field of
+the request, where it is then accessible to device drivers and is released after
+the request completes.
+
+``struct request`` also contains a pointer to the original bio_crypt_ctx.
+Requests can be built from multiple bios, and the block layer must take the
+encryption context into account when trying to merge bios and requests. For two
+bios/requests to be merged, they must have compatible encryption contexts: both
+unencrypted, or both encrypted with the same key and contiguous data unit
+numbers. Only the encryption context for the first bio in a request is
+retained, since the remaining bios have been verified to be merge-compatible
+with the first bio.
+
+To make it possible for inline encryption to work with request_queue based
+layered devices, when a request is cloned, its encryption context is cloned as
+well. When the cloned request is submitted, it is then processed as usual; this
+includes getting a keyslot from the clone's target device if needed.
+
+blk-crypto-fallback
+===================
+
+It is desirable for the inline encryption support of upper layers (e.g.
+filesystems) to be testable without real inline encryption hardware, and
+likewise for the block layer's keyslot management logic. It is also desirable
+to allow upper layers to just always use inline encryption rather than have to
+implement encryption in multiple ways.
+
+Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
+of inline encryption using the kernel crypto API. blk-crypto-fallback is built
+into the block layer, so it works on any block device without any special setup.
+Essentially, when a bio with an encryption context is submitted to a
+block_device that doesn't support that encryption context, the block layer will
+handle en/decryption of the bio using blk-crypto-fallback.
+
+For encryption, the data cannot be encrypted in-place, as callers usually rely
+on it being unmodified. Instead, blk-crypto-fallback allocates bounce pages,
+fills a new bio with those bounce pages, encrypts the data into those bounce
+pages, and submits that "bounce" bio. When the bounce bio completes,
+blk-crypto-fallback completes the original bio. If the original bio is too
+large, multiple bounce bios may be required; see the code for details.
+
+For decryption, blk-crypto-fallback "wraps" the bio's completion callback
+(``bi_complete``) and private data (``bi_private``) with its own, unsets the
+bio's encryption context, then submits the bio. If the read completes
+successfully, blk-crypto-fallback restores the bio's original completion
+callback and private data, then decrypts the bio's data in-place using the
+kernel crypto API. Decryption happens from a workqueue, as it may sleep.
+Afterwards, blk-crypto-fallback completes the bio.
+
+In both cases, the bios that blk-crypto-fallback submits no longer have an
+encryption context. Therefore, lower layers only see standard unencrypted I/O.
+
+blk-crypto-fallback also defines its own blk_crypto_profile and has its own
+"keyslots"; its keyslots contain ``struct crypto_skcipher`` objects. The reason
+for this is twofold. First, it allows the keyslot management logic to be tested
+without actual inline encryption hardware. Second, similar to actual inline
+encryption hardware, the crypto API doesn't accept keys directly in requests but
+rather requires that keys be set ahead of time, and setting keys can be
+expensive; moreover, allocating a crypto_skcipher can't happen on the I/O path
+at all due to the locks it takes. Therefore, the concept of keyslots still
+makes sense for blk-crypto-fallback.
+
+Note that regardless of whether real inline encryption hardware or
+blk-crypto-fallback is used, the ciphertext written to disk (and hence the
+on-disk format of data) will be the same (assuming that both the inline
+encryption hardware's implementation and the kernel crypto API's implementation
+of the algorithm being used adhere to spec and function correctly).
+
+blk-crypto-fallback is optional and is controlled by the
+``CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK`` kernel configuration option.
+
+API presented to users of the block layer
+=========================================
+
+``blk_crypto_config_supported()`` allows users to check ahead of time whether
+inline encryption with particular crypto settings will work on a particular
+block_device -- either via hardware or via blk-crypto-fallback. This function
+takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
+the actual bytes of the key and instead just contains the algorithm, data unit
+size, etc. This function can be useful if blk-crypto-fallback is disabled.
+
+``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.
+
+Users must call ``blk_crypto_start_using_key()`` before actually starting to use
+a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
+was called earlier). This is needed to initialize blk-crypto-fallback if it
+will be needed. This must not be called from the data path, as this may have to
+allocate resources, which may deadlock in that case.
+
+Next, to attach an encryption context to a bio, users should call
+``bio_crypt_set_ctx()``. This function allocates a bio_crypt_ctx and attaches
+it to a bio, given the blk_crypto_key and the data unit number that will be used
+for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx
+later, as that happens automatically when the bio is freed or reset.
+
+Finally, when done using inline encryption with a blk_crypto_key on a
+block_device, users must call ``blk_crypto_evict_key()``. This ensures that
+the key is evicted from all keyslots it may be programmed into and unlinked from
+any kernel data structures it may be linked into.
+
+In summary, for users of the block layer, the lifecycle of a blk_crypto_key is
+as follows:
+
+1. ``blk_crypto_config_supported()`` (optional)
+2. ``blk_crypto_init_key()``
+3. ``blk_crypto_start_using_key()``
+4. ``bio_crypt_set_ctx()`` (potentially many times)
+5. ``blk_crypto_evict_key()`` (after all I/O has completed)
+6. Zeroize the blk_crypto_key (this has no dedicated function)
+
+If a blk_crypto_key is being used on multiple block_devices, then
+``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
+and ``blk_crypto_evict_key()`` must be called on each block_device.
+
+API presented to device drivers
+===============================
+
+A device driver that wants to support inline encryption must set up a
+blk_crypto_profile in the request_queue of its device. To do this, it first
+must call ``blk_crypto_profile_init()`` (or its resource-managed variant
+``devm_blk_crypto_profile_init()``), providing the number of keyslots.
+
+Next, it must advertise its crypto capabilities by setting fields in the
+blk_crypto_profile, e.g. ``modes_supported`` and ``max_dun_bytes_supported``.
+
+It then must set function pointers in the ``ll_ops`` field of the
+blk_crypto_profile to tell upper layers how to control the inline encryption
+hardware, e.g. how to program and evict keyslots. Most drivers will need to
+implement ``keyslot_program`` and ``keyslot_evict``. For details, see the
+comments for ``struct blk_crypto_ll_ops``.
+
+Once the driver registers a blk_crypto_profile with a request_queue, I/O
+requests the driver receives via that queue may have an encryption context. All
+encryption contexts will be compatible with the crypto capabilities declared in
+the blk_crypto_profile, so drivers don't need to worry about handling
+unsupported requests. Also, if a nonzero number of keyslots was declared in the
+blk_crypto_profile, then all I/O requests that have an encryption context will
+also have a keyslot which was already programmed with the appropriate key.
+
+If the driver implements runtime suspend and its blk_crypto_ll_ops don't work
+while the device is runtime-suspended, then the driver must also set the ``dev``
+field of the blk_crypto_profile to point to the ``struct device`` that will be
+resumed before any of the low-level operations are called.
+
+If there are situations where the inline encryption hardware loses the contents
+of its keyslots, e.g. device resets, the driver must handle reprogramming the
+keyslots. To do this, the driver may call ``blk_crypto_reprogram_all_keys()``.
+
+Finally, if the driver used ``blk_crypto_profile_init()`` instead of
+``devm_blk_crypto_profile_init()``, then it is responsible for calling
+``blk_crypto_profile_destroy()`` when the crypto profile is no longer needed.
+
+Layered Devices
+===============
+
+Request queue based layered devices like dm-rq that wish to support inline
+encryption need to create their own blk_crypto_profile for their request_queue,
+and expose whatever functionality they choose. When a layered device wants to
+pass a clone of that request to another request_queue, blk-crypto will
+initialize and prepare the clone as necessary.
+
+Interaction between inline encryption and blk integrity
+=======================================================
+
+At the time of this patch, there is no real hardware that supports both these
+features. However, these features do interact with each other, and it's not
+completely trivial to make them both work together properly. In particular,
+when a WRITE bio wants to use inline encryption on a device that supports both
+features, the bio will have an encryption context specified, after which
+its integrity information is calculated (using the plaintext data, since
+the encryption will happen while data is being written), and the data and
+integrity info is sent to the device. Obviously, the integrity info must be
+verified before the data is encrypted. After the data is encrypted, the device
+must not store the integrity info that it received with the plaintext data
+since that might reveal information about the plaintext data. As such, it must
+re-generate the integrity info from the ciphertext data and store that on disk
+instead. Another issue with storing the integrity info of the plaintext data is
+that it changes the on disk format depending on whether hardware inline
+encryption support is present or the kernel crypto API fallback is used (since
+if the fallback is used, the device will receive the integrity info of the
+ciphertext, not that of the plaintext).
+
+Because there isn't any real hardware yet, it seems prudent to assume that
+hardware implementations might not implement both features together correctly,
+and disallow the combination for now. Whenever a device supports integrity, the
+kernel will pretend that the device does not support hardware inline encryption
+(by setting the blk_crypto_profile in the request_queue of the device to NULL).
+When the crypto API fallback is enabled, this means that all bios with and
+encryption context will use the fallback, and IO will complete as usual. When
+the fallback is disabled, a bio with an encryption context will be failed.
+
+.. _hardware_wrapped_keys:
+
+Hardware-wrapped keys
+=====================
+
+Motivation and threat model
+---------------------------
+
+Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
+relies on the raw encryption key(s) being present in kernel memory so that the
+encryption can be performed. This traditionally isn't seen as a problem because
+the key(s) won't be present during an offline attack, which is the main type of
+attack that storage encryption is intended to protect from.
+
+However, there is an increasing desire to also protect users' data from other
+types of attacks (to the extent possible), including:
+
+- Cold boot attacks, where an attacker with physical access to a system suddenly
+ powers it off, then immediately dumps the system memory to extract recently
+ in-use encryption keys, then uses these keys to decrypt user data on-disk.
+
+- Online attacks where the attacker is able to read kernel memory without fully
+ compromising the system, followed by an offline attack where any extracted
+ keys can be used to decrypt user data on-disk. An example of such an online
+ attack would be if the attacker is able to run some code on the system that
+ exploits a Meltdown-like vulnerability but is unable to escalate privileges.
+
+- Online attacks where the attacker fully compromises the system, but their data
+ exfiltration is significantly time-limited and/or bandwidth-limited, so in
+ order to completely exfiltrate the data they need to extract the encryption
+ keys to use in a later offline attack.
+
+Hardware-wrapped keys are a feature of inline encryption hardware that is
+designed to protect users' data from the above attacks (to the extent possible),
+without introducing limitations such as a maximum number of keys.
+
+Note that it is impossible to **fully** protect users' data from these attacks.
+Even in the attacks where the attacker "just" gets read access to kernel memory,
+they can still extract any user data that is present in memory, including
+plaintext pagecache pages of encrypted files. The focus here is just on
+protecting the encryption keys, as those instantly give access to **all** user
+data in any following offline attack, rather than just some of it (where which
+data is included in that "some" might not be controlled by the attacker).
+
+Solution overview
+-----------------
+
+Inline encryption hardware typically has "keyslots" into which software can
+program keys for the hardware to use; the contents of keyslots typically can't
+be read back by software. As such, the above security goals could be achieved
+if the kernel simply erased its copy of the key(s) after programming them into
+keyslot(s) and thereafter only referred to them via keyslot number.
+
+However, that naive approach runs into a couple problems:
+
+- It limits the number of unlocked keys to the number of keyslots, which
+ typically is a small number. In cases where there is only one encryption key
+ system-wide (e.g., a full-disk encryption key), that can be tolerable.
+ However, in general there can be many logged-in users with many different
+ keys, and/or many running applications with application-specific encrypted
+ storage areas. This is especially true if file-based encryption (e.g.
+ fscrypt) is being used.
+
+- Inline crypto engines typically lose the contents of their keyslots if the
+ storage controller (usually UFS or eMMC) is reset. Resetting the storage
+ controller is a standard error recovery procedure that is executed if certain
+ types of storage errors occur, and such errors can occur at any time.
+ Therefore, when inline crypto is being used, the operating system must always
+ be ready to reprogram the keyslots without user intervention.
+
+Thus, it is important for the kernel to still have a way to "remind" the
+hardware about a key, without actually having the raw key itself.
+
+Somewhat less importantly, it is also desirable that the raw keys are never
+visible to software at all, even while being initially unlocked. This would
+ensure that a read-only compromise of system memory will never allow a key to be
+extracted to be used off-system, even if it occurs when a key is being unlocked.
+
+To solve all these problems, some vendors of inline encryption hardware have
+made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys
+are encrypted keys that can only be unwrapped (decrypted) and used by hardware
+-- either by the inline encryption hardware itself, or by a dedicated hardware
+block that can directly provision keys to the inline encryption hardware.
+
+(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
+to add some clarity in cases where there could be other types of wrapped keys,
+such as in file-based encryption. Key wrapping is a commonly used technique.)
+
+The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
+that is never exposed to software; it is either a persistent key (a "long-term
+wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
+wrapped form of the key is what is initially unlocked, but it is erased from
+memory as soon as it is converted into an ephemerally-wrapped key. In-use
+hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
+
+As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
+the hardware also includes a level of indirection; it doesn't use the unwrapped
+key directly for inline encryption, but rather derives both an inline encryption
+key and a "software secret" from it. Software can use the "software secret" for
+tasks that can't use the inline encryption hardware, such as filenames
+encryption. The software secret is not protected from memory compromise.
+
+Key hierarchy
+-------------
+
+Here is the key hierarchy for a hardware-wrapped key::
+
+ Hardware-wrapped key
+ |
+ |
+ <Hardware KDF>
+ |
+ -----------------------------
+ | |
+ Inline encryption key Software secret
+
+The components are:
+
+- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
+ Function), in ephemerally-wrapped form. The key wrapping algorithm is a
+ hardware implementation detail that doesn't impact kernel operation, but a
+ strong authenticated encryption algorithm such as AES-256-GCM is recommended.
+
+- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
+ derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF
+ doesn't impact kernel operation, but it does need to be known for testing
+ purposes, and it's also assumed to have at least a 256-bit security strength.
+ All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
+ with a particular choice of labels and contexts; new hardware should use this
+ already-vetted KDF.
+
+- *Inline encryption key*: a derived key which the hardware directly provisions
+ to a keyslot of the inline encryption hardware, without exposing it to
+ software. In all known hardware, this will always be an AES-256-XTS key.
+ However, in principle other encryption algorithms could be supported too.
+ Hardware must derive distinct subkeys for each supported encryption algorithm.
+
+- *Software secret*: a derived key which the hardware returns to software so
+ that software can use it for cryptographic tasks that can't use inline
+ encryption. This value is cryptographically isolated from the inline
+ encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures
+ this.) Currently, the software secret is always 32 bytes and thus is suitable
+ for cryptographic applications that require up to a 256-bit security strength.
+ Some use cases (e.g. full-disk encryption) won't require the software secret.
+
+Example: in the case of fscrypt, the fscrypt master key (the key that protects a
+particular set of encrypted directories) is made hardware-wrapped. The inline
+encryption key is used as the file contents encryption key, while the software
+secret (rather than the master key directly) is used to key fscrypt's KDF
+(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
+
+Note that currently this design assumes a single inline encryption key per
+hardware-wrapped key, without any further key derivation. Thus, in the case of
+fscrypt, currently hardware-wrapped keys are only compatible with the "inline
+encryption optimized" settings, which use one file contents encryption key per
+encryption policy rather than one per file. This design could be extended to
+make the hardware derive per-file keys using per-file nonces passed down the
+storage stack, and in fact some hardware already supports this; future work is
+planned to remove this limitation by adding the corresponding kernel support.
+
+Kernel support
+--------------
+
+The inline encryption support of the kernel's block layer ("blk-crypto") has
+been extended to support hardware-wrapped keys as an alternative to raw keys,
+when hardware support is available. This works in the following way:
+
+- A ``key_types_supported`` field is added to the crypto capabilities in
+ ``struct blk_crypto_profile``. This allows device drivers to declare that
+ they support raw keys, hardware-wrapped keys, or both.
+
+- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
+ alternative to a raw key; a ``key_type`` field is added to
+ ``struct blk_crypto_config`` to distinguish between the different key types.
+ This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
+ key in a way very similar to using a raw key.
+
+- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers
+ that support hardware-wrapped keys must implement this method. Users of
+ blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
+
+- The programming and eviction of hardware-wrapped keys happens via
+ ``blk_crypto_ll_ops::keyslot_program`` and
+ ``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys. If a
+ driver supports hardware-wrapped keys, then it must handle hardware-wrapped
+ keys being passed to these methods.
+
+blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore,
+hardware-wrapped keys can only be used with actual inline encryption hardware.
+
+All the above deals with hardware-wrapped keys in ephemerally-wrapped form only.
+To get such keys in the first place, new block device ioctls have been added to
+provide a generic interface to creating and preparing such keys:
+
+- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form. It takes
+ in a pointer to a ``struct blk_crypto_import_key_arg``. The caller must set
+ ``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the
+ raw key to import. On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes
+ the resulting long-term wrapped key blob to the buffer pointed to by
+ ``lt_key_ptr``, which is of maximum size ``lt_key_size``. It also updates
+ ``lt_key_size`` to be the actual size of the key. On failure, it returns -1
+ and sets errno. An errno of ``EOPNOTSUPP`` indicates that the block device
+ does not support hardware-wrapped keys. An errno of ``EOVERFLOW`` indicates
+ that the output buffer did not have enough space for the key blob.
+
+- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the
+ hardware generate the key instead of importing one. It takes in a pointer to
+ a ``struct blk_crypto_generate_key_arg``.
+
+- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to
+ ephemerally-wrapped form. It takes in a pointer to a ``struct
+ blk_crypto_prepare_key_arg``. The caller must set ``lt_key_ptr`` and
+ ``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped
+ key blob to convert. On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes
+ the resulting ephemerally-wrapped key blob to the buffer pointed to by
+ ``eph_key_ptr``, which is of maximum size ``eph_key_size``. It also updates
+ ``eph_key_size`` to be the actual size of the key. On failure, it returns -1
+ and sets errno. Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the
+ same as they do for ``BLKCRYPTOIMPORTKEY``. An errno of ``EBADMSG`` indicates
+ that the long-term wrapped key is invalid.
+
+Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY``
+once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is
+unlocked and added to the kernel. Note that these ioctls have no relevance for
+raw keys; they are only for hardware-wrapped keys.
+
+Testability
+-----------
+
+Both the hardware KDF and the inline encryption itself are well-defined
+algorithms that don't depend on any secrets other than the unwrapped key.
+Therefore, if the unwrapped key is known to software, these algorithms can be
+reproduced in software in order to verify the ciphertext that is written to disk
+by the inline encryption hardware.
+
+However, the unwrapped key will only be known to software for testing if the
+"import" functionality is used. Proper testing is not possible in the
+"generate" case where the hardware generates the key itself. The correct
+operation of the "generate" mode thus relies on the security and correctness of
+the hardware RNG and its use to generate the key, as well as the testing of the
+"import" mode as that should cover all parts other than the key generation.
+
+For an example of a test that verifies the ciphertext written to disk in the
+"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
+`Android's vts_kernel_encryption_test
+<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/main/encryption/>`_.
diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst
new file mode 100644
index 000000000000..4662e1ff3d81
--- /dev/null
+++ b/Documentation/block/ioprio.rst
@@ -0,0 +1,178 @@
+===================
+Block io priorities
+===================
+
+
+Intro
+-----
+
+The io priority feature enables users to io nice processes or process groups,
+similar to what has been possible with cpu scheduling for ages. Support for io
+priorities is io scheduler dependent and currently supported by bfq and
+mq-deadline.
+
+Scheduling classes
+------------------
+
+Three generic scheduling classes are implemented for io priorities that
+determine how io is served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage::
+
+ # ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level::
+
+ # ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead::
+
+ # ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+ionice.c tool::
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <getopt.h>
+ #include <unistd.h>
+ #include <sys/ptrace.h>
+ #include <asm/unistd.h>
+
+ extern int sys_ioprio_set(int, int, int);
+ extern int sys_ioprio_get(int, int);
+
+ #if defined(__i386__)
+ #define __NR_ioprio_set 289
+ #define __NR_ioprio_get 290
+ #elif defined(__ppc__)
+ #define __NR_ioprio_set 273
+ #define __NR_ioprio_get 274
+ #elif defined(__x86_64__)
+ #define __NR_ioprio_set 251
+ #define __NR_ioprio_get 252
+ #else
+ #error "Unsupported arch"
+ #endif
+
+ static inline int ioprio_set(int which, int who, int ioprio)
+ {
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+ }
+
+ static inline int ioprio_get(int which, int who)
+ {
+ return syscall(__NR_ioprio_get, which, who);
+ }
+
+ enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+ };
+
+ enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+ };
+
+ #define IOPRIO_CLASS_SHIFT 13
+
+ const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+ int main(int argc, char *argv[])
+ {
+ int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+ int c, pid = 0;
+
+ while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+ switch (c) {
+ case 'n':
+ ioprio = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'c':
+ ioprio_class = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'p':
+ pid = strtol(optarg, NULL, 10);
+ break;
+ }
+ }
+
+ switch (ioprio_class) {
+ case IOPRIO_CLASS_NONE:
+ ioprio_class = IOPRIO_CLASS_BE;
+ break;
+ case IOPRIO_CLASS_RT:
+ case IOPRIO_CLASS_BE:
+ break;
+ case IOPRIO_CLASS_IDLE:
+ ioprio = 7;
+ break;
+ default:
+ printf("bad prio class %d\n", ioprio_class);
+ return 1;
+ }
+
+ if (!set) {
+ if (!pid && argv[optind])
+ pid = strtol(argv[optind], NULL, 10);
+
+ ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+ printf("pid=%d, %d\n", pid, ioprio);
+
+ if (ioprio == -1)
+ perror("ioprio_get");
+ else {
+ ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+ ioprio = ioprio & 0xff;
+ printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+ }
+ } else {
+ if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+ perror("ioprio_set");
+ return 1;
+ }
+
+ if (argv[optind])
+ execvp(argv[optind], &argv[optind]);
+ }
+
+ return 0;
+ }
+
+
+March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt
deleted file mode 100644
index 8ed8c59380b4..000000000000
--- a/Documentation/block/ioprio.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-Block io priorities
-===================
-
-
-Intro
------
-
-With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
-priorities are supported for reads on files. This enables users to io nice
-processes or process groups, similar to what has been possible with cpu
-scheduling for ages. This document mainly details the current possibilities
-with cfq; other io schedulers do not support io priorities thus far.
-
-Scheduling classes
-------------------
-
-CFQ implements three generic scheduling classes that determine how io is
-served for a process.
-
-IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
-higher priority than any other in the system, processes from this class are
-given first access to the disk every time. Thus it needs to be used with some
-care, one io RT process can starve the entire system. Within the RT class,
-there are 8 levels of class data that determine exactly how much time this
-process needs the disk for on each service. In the future this might change
-to be more directly mappable to performance, by passing in a wanted data
-rate instead.
-
-IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
-for any process that hasn't set a specific io priority. The class data
-determines how much io bandwidth the process will get, it's directly mappable
-to the cpu nice levels just more coarsely implemented. 0 is the highest
-BE prio level, 7 is the lowest. The mapping between cpu nice level and io
-nice level is determined as: io_nice = (cpu_nice + 20) / 5.
-
-IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
-level only get io time when no one else needs the disk. The idle class has no
-class data, since it doesn't really apply here.
-
-Tools
------
-
-See below for a sample ionice tool. Usage:
-
-# ionice -c<class> -n<level> -p<pid>
-
-If pid isn't given, the current process is assumed. IO priority settings
-are inherited on fork, so you can use ionice to start the process at a given
-level:
-
-# ionice -c2 -n0 /bin/ls
-
-will run ls at the best-effort scheduling class at the highest priority.
-For a running process, you can give the pid instead:
-
-# ionice -c1 -n2 -p100
-
-will change pid 100 to run at the realtime scheduling class, at priority 2.
-
----> snip ionice.c tool <---
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <asm/unistd.h>
-
-extern int sys_ioprio_set(int, int, int);
-extern int sys_ioprio_get(int, int);
-
-#if defined(__i386__)
-#define __NR_ioprio_set 289
-#define __NR_ioprio_get 290
-#elif defined(__ppc__)
-#define __NR_ioprio_set 273
-#define __NR_ioprio_get 274
-#elif defined(__x86_64__)
-#define __NR_ioprio_set 251
-#define __NR_ioprio_get 252
-#elif defined(__ia64__)
-#define __NR_ioprio_set 1274
-#define __NR_ioprio_get 1275
-#else
-#error "Unsupported arch"
-#endif
-
-static inline int ioprio_set(int which, int who, int ioprio)
-{
- return syscall(__NR_ioprio_set, which, who, ioprio);
-}
-
-static inline int ioprio_get(int which, int who)
-{
- return syscall(__NR_ioprio_get, which, who);
-}
-
-enum {
- IOPRIO_CLASS_NONE,
- IOPRIO_CLASS_RT,
- IOPRIO_CLASS_BE,
- IOPRIO_CLASS_IDLE,
-};
-
-enum {
- IOPRIO_WHO_PROCESS = 1,
- IOPRIO_WHO_PGRP,
- IOPRIO_WHO_USER,
-};
-
-#define IOPRIO_CLASS_SHIFT 13
-
-const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
-
-int main(int argc, char *argv[])
-{
- int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
- int c, pid = 0;
-
- while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
- switch (c) {
- case 'n':
- ioprio = strtol(optarg, NULL, 10);
- set = 1;
- break;
- case 'c':
- ioprio_class = strtol(optarg, NULL, 10);
- set = 1;
- break;
- case 'p':
- pid = strtol(optarg, NULL, 10);
- break;
- }
- }
-
- switch (ioprio_class) {
- case IOPRIO_CLASS_NONE:
- ioprio_class = IOPRIO_CLASS_BE;
- break;
- case IOPRIO_CLASS_RT:
- case IOPRIO_CLASS_BE:
- break;
- case IOPRIO_CLASS_IDLE:
- ioprio = 7;
- break;
- default:
- printf("bad prio class %d\n", ioprio_class);
- return 1;
- }
-
- if (!set) {
- if (!pid && argv[optind])
- pid = strtol(argv[optind], NULL, 10);
-
- ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
-
- printf("pid=%d, %d\n", pid, ioprio);
-
- if (ioprio == -1)
- perror("ioprio_get");
- else {
- ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
- ioprio = ioprio & 0xff;
- printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
- }
- } else {
- if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
- perror("ioprio_set");
- return 1;
- }
-
- if (argv[optind])
- execvp(argv[optind], &argv[optind]);
- }
-
- return 0;
-}
-
----> snip ionice.c tool <---
-
-
-March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/kyber-iosched.rst b/Documentation/block/kyber-iosched.rst
new file mode 100644
index 000000000000..3e164dd0617c
--- /dev/null
+++ b/Documentation/block/kyber-iosched.rst
@@ -0,0 +1,15 @@
+============================
+Kyber I/O scheduler tunables
+============================
+
+The only two tunables for the Kyber scheduler are the target latencies for
+reads and synchronous writes. Kyber will throttle requests in order to meet
+these target latencies.
+
+read_lat_nsec
+-------------
+Target latency for reads (in nanoseconds).
+
+write_lat_nsec
+--------------
+Target latency for synchronous writes (in nanoseconds).
diff --git a/Documentation/block/kyber-iosched.txt b/Documentation/block/kyber-iosched.txt
deleted file mode 100644
index e94feacd7edc..000000000000
--- a/Documentation/block/kyber-iosched.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Kyber I/O scheduler tunables
-===========================
-
-The only two tunables for the Kyber scheduler are the target latencies for
-reads and synchronous writes. Kyber will throttle requests in order to meet
-these target latencies.
-
-read_lat_nsec
--------------
-Target latency for reads (in nanoseconds).
-
-write_lat_nsec
---------------
-Target latency for synchronous writes (in nanoseconds).
diff --git a/Documentation/block/null_blk.rst b/Documentation/block/null_blk.rst
new file mode 100644
index 000000000000..4dd78f24d10a
--- /dev/null
+++ b/Documentation/block/null_blk.rst
@@ -0,0 +1,151 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Null block device driver
+========================
+
+Overview
+========
+
+The null block device (``/dev/nullb*``) is used for benchmarking the various
+block-layer implementations. It emulates a block device of X gigabytes in size.
+It does not execute any read/write operation, just mark them as complete in
+the request queue. The following instances are possible:
+
+ Multi-queue block-layer
+
+ - Request-based.
+ - Configurable submission queues per device.
+
+ No block-layer (Known as bio-based)
+
+ - Bio-based. IO requests are submitted directly to the device driver.
+ - Directly accepts bio data structure and returns them.
+
+All of them have a completion queue for each core in the system.
+
+Module parameters
+=================
+
+queue_mode=[0-2]: Default: 2-Multi-queue
+ Selects which block-layer the module should instantiate with.
+
+ = ============
+ 0 Bio-based
+ 1 Single-queue (deprecated)
+ 2 Multi-queue
+ = ============
+
+home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
+ Selects what CPU node the data structures are allocated from.
+
+gb=[Size in GB]: Default: 250GB
+ The size of the device reported to the system.
+
+bs=[Block size (in bytes)]: Default: 512 bytes
+ The block size reported to the system.
+
+nr_devices=[Number of devices]: Default: 1
+ Number of block devices instantiated. They are instantiated as /dev/nullb0,
+ etc.
+
+irqmode=[0-2]: Default: 1-Soft-irq
+ The completion mode used for completing IOs to the block-layer.
+
+ = ===========================================================================
+ 0 None.
+ 1 Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
+ when IOs are issued from another CPU node than the home the device is
+ connected to.
+ 2 Timer: Waits a specific period (completion_nsec) for each IO before
+ completion.
+ = ===========================================================================
+
+completion_nsec=[ns]: Default: 10,000ns
+ Combined with irqmode=2 (timer). The time each completion event must wait.
+
+submit_queues=[1..nr_cpus]: Default: 1
+ The number of submission queues attached to the device driver. If unset, it
+ defaults to 1. For multi-queue, it is ignored when use_per_node_hctx module
+ parameter is 1.
+
+hw_queue_depth=[0..qdepth]: Default: 64
+ The hardware queue depth of the device.
+
+memory_backed=[0/1]: Default: 0
+ Whether or not to use a memory buffer to respond to IO requests
+
+ = =============================================
+ 0 Transfer no data in response to IO requests
+ 1 Use a memory buffer to respond to IO requests
+ = =============================================
+
+discard=[0/1]: Default: 0
+ Support discard operations (requires memory-backed null_blk device).
+
+ = =====================================
+ 0 Do not support discard operations
+ 1 Enable support for discard operations
+ = =====================================
+
+cache_size=[Size in MB]: Default: 0
+ Cache size in MB for memory-backed device.
+
+mbps=[Maximum bandwidth in MB/s]: Default: 0 (no limit)
+ Bandwidth limit for device performance.
+
+Multi-queue specific parameters
+-------------------------------
+
+use_per_node_hctx=[0/1]: Default: 0
+ Number of hardware context queues.
+
+ = =====================================================================
+ 0 The number of submit queues are set to the value of the submit_queues
+ parameter.
+ 1 The multi-queue block layer is instantiated with a hardware dispatch
+ queue for each CPU node in the system.
+ = =====================================================================
+
+no_sched=[0/1]: Default: 0
+ Enable/disable the io scheduler.
+
+ = ======================================
+ 0 nullb* use default blk-mq io scheduler
+ 1 nullb* doesn't use io scheduler
+ = ======================================
+
+blocking=[0/1]: Default: 0
+ Blocking behavior of the request queue.
+
+ = ===============================================================
+ 0 Register as a non-blocking blk-mq driver device.
+ 1 Register as a blocking blk-mq driver device, null_blk will set
+ the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
+ needs to block in its ->queue_rq() function.
+ = ===============================================================
+
+shared_tags=[0/1]: Default: 0
+ Sharing tags between devices.
+
+ = ================================================================
+ 0 Tag set is not shared.
+ 1 Tag set shared between devices for blk-mq. Only makes sense with
+ nr_devices > 1, otherwise there's no tag set to share.
+ = ================================================================
+
+zoned=[0/1]: Default: 0
+ Device is a random-access or a zoned block device.
+
+ = ======================================================================
+ 0 Block device is exposed as a random-access block device.
+ 1 Block device is exposed as a host-managed zoned block device. Requires
+ CONFIG_BLK_DEV_ZONED.
+ = ======================================================================
+
+zone_size=[MB]: Default: 256
+ Per zone size when exposed as a zoned block device. Must be a power of two.
+
+zone_nr_conv=[nr_conv]: Default: 0
+ The number of conventional zones to create when block device is zoned. If
+ zone_nr_conv >= nr_zones, it will be reduced to nr_zones - 1.
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
deleted file mode 100644
index ea2dafe49ae8..000000000000
--- a/Documentation/block/null_blk.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-Null block device driver
-================================================================================
-
-I. Overview
-
-The null block device (/dev/nullb*) is used for benchmarking the various
-block-layer implementations. It emulates a block device of X gigabytes in size.
-The following instances are possible:
-
- Single-queue block-layer
- - Request-based.
- - Single submission queue per device.
- - Implements IO scheduling algorithms (CFQ, Deadline, noop).
- Multi-queue block-layer
- - Request-based.
- - Configurable submission queues per device.
- No block-layer (Known as bio-based)
- - Bio-based. IO requests are submitted directly to the device driver.
- - Directly accepts bio data structure and returns them.
-
-All of them have a completion queue for each core in the system.
-
-II. Module parameters applicable for all instances:
-
-queue_mode=[0-2]: Default: 2-Multi-queue
- Selects which block-layer the module should instantiate with.
-
- 0: Bio-based.
- 1: Single-queue.
- 2: Multi-queue.
-
-home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
- Selects what CPU node the data structures are allocated from.
-
-gb=[Size in GB]: Default: 250GB
- The size of the device reported to the system.
-
-bs=[Block size (in bytes)]: Default: 512 bytes
- The block size reported to the system.
-
-nr_devices=[Number of devices]: Default: 1
- Number of block devices instantiated. They are instantiated as /dev/nullb0,
- etc.
-
-irqmode=[0-2]: Default: 1-Soft-irq
- The completion mode used for completing IOs to the block-layer.
-
- 0: None.
- 1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
- when IOs are issued from another CPU node than the home the device is
- connected to.
- 2: Timer: Waits a specific period (completion_nsec) for each IO before
- completion.
-
-completion_nsec=[ns]: Default: 10,000ns
- Combined with irqmode=2 (timer). The time each completion event must wait.
-
-submit_queues=[1..nr_cpus]:
- The number of submission queues attached to the device driver. If unset, it
- defaults to 1. For multi-queue, it is ignored when use_per_node_hctx module
- parameter is 1.
-
-hw_queue_depth=[0..qdepth]: Default: 64
- The hardware queue depth of the device.
-
-III: Multi-queue specific parameters
-
-use_per_node_hctx=[0/1]: Default: 0
- 0: The number of submit queues are set to the value of the submit_queues
- parameter.
- 1: The multi-queue block layer is instantiated with a hardware dispatch
- queue for each CPU node in the system.
-
-no_sched=[0/1]: Default: 0
- 0: nullb* use default blk-mq io scheduler.
- 1: nullb* doesn't use io scheduler.
-
-blocking=[0/1]: Default: 0
- 0: Register as a non-blocking blk-mq driver device.
- 1: Register as a blocking blk-mq driver device, null_blk will set
- the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
- needs to block in its ->queue_rq() function.
-
-shared_tags=[0/1]: Default: 0
- 0: Tag set is not shared.
- 1: Tag set shared between devices for blk-mq. Only makes sense with
- nr_devices > 1, otherwise there's no tag set to share.
-
-zoned=[0/1]: Default: 0
- 0: Block device is exposed as a random-access block device.
- 1: Block device is exposed as a host-managed zoned block device.
-
-zone_size=[MB]: Default: 256
- Per zone size when exposed as a zoned block device. Must be a power of two.
diff --git a/Documentation/block/pr.rst b/Documentation/block/pr.rst
new file mode 100644
index 000000000000..c893d6da8e04
--- /dev/null
+++ b/Documentation/block/pr.rst
@@ -0,0 +1,119 @@
+===============================================
+Block layer support for Persistent Reservations
+===============================================
+
+The Linux kernel supports a user space interface for simplified
+Persistent Reservations which map to block devices that support
+these (like SCSI). Persistent Reservations allow restricting
+access to block devices to specific initiators in a shared storage
+setup.
+
+This document gives a general overview of the support ioctl commands.
+For a more detailed reference please refer to the SCSI Primary
+Commands standard, specifically the section on Reservations and the
+"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
+
+All implementations are expected to ensure the reservations survive
+a power loss and cover all connections in a multi path environment.
+These behaviors are optional in SPC but will be automatically applied
+by Linux.
+
+
+The following types of reservations are supported:
+--------------------------------------------------
+
+ - PR_WRITE_EXCLUSIVE
+ Only the initiator that owns the reservation can write to the
+ device. Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS
+ Only the initiator that owns the reservation can access the
+ device.
+
+ - PR_WRITE_EXCLUSIVE_REG_ONLY
+ Only initiators with a registered key can write to the device,
+ Any initiator can read from the device.
+
+ - PR_EXCLUSIVE_ACCESS_REG_ONLY
+ Only initiators with a registered key can access the device.
+
+ - PR_WRITE_EXCLUSIVE_ALL_REGS
+
+ Only initiators with a registered key can write to the device,
+ Any initiator can read from the device.
+ All initiators with a registered key are considered reservation
+ holders.
+ Please reference the SPC spec on the meaning of a reservation
+ holder if you want to use this type.
+
+ - PR_EXCLUSIVE_ACCESS_ALL_REGS
+ Only initiators with a registered key can access the device.
+ All initiators with a registered key are considered reservation
+ holders.
+ Please reference the SPC spec on the meaning of a reservation
+ holder if you want to use this type.
+
+
+The following ioctl are supported:
+----------------------------------
+
+1. IOC_PR_REGISTER
+^^^^^^^^^^^^^^^^^^
+
+This ioctl command registers a new reservation if the new_key argument
+is non-null. If no existing reservation exists old_key must be zero,
+if an existing reservation should be replaced old_key must contain
+the old reservation key.
+
+If the new_key argument is 0 it unregisters the existing reservation passed
+in old_key.
+
+
+2. IOC_PR_RESERVE
+^^^^^^^^^^^^^^^^^
+
+This ioctl command reserves the device and thus restricts access for other
+devices based on the type argument. The key argument must be the existing
+reservation key for the device as acquired by the IOC_PR_REGISTER,
+IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
+
+
+3. IOC_PR_RELEASE
+^^^^^^^^^^^^^^^^^
+
+This ioctl command releases the reservation specified by key and flags
+and thus removes any access restriction implied by it.
+
+
+4. IOC_PR_PREEMPT
+^^^^^^^^^^^^^^^^^
+
+This ioctl command releases the existing reservation referred to by
+old_key and replaces it with a new reservation of type for the
+reservation key new_key.
+
+
+5. IOC_PR_PREEMPT_ABORT
+^^^^^^^^^^^^^^^^^^^^^^^
+
+This ioctl command works like IOC_PR_PREEMPT except that it also aborts
+any outstanding command sent over a connection identified by old_key.
+
+6. IOC_PR_CLEAR
+^^^^^^^^^^^^^^^
+
+This ioctl command unregisters both key and any other reservation key
+registered with the device and drops any existing reservation.
+
+
+Flags
+-----
+
+All the ioctls have a flag field. Currently only one flag is supported:
+
+ - PR_FL_IGNORE_KEY
+ Ignore the existing reservation key. This is commonly supported for
+ IOC_PR_REGISTER, and some implementation may support the flag for
+ IOC_PR_RESERVE.
+
+For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
deleted file mode 100644
index ac9b8e70e64b..000000000000
--- a/Documentation/block/pr.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-
-Block layer support for Persistent Reservations
-===============================================
-
-The Linux kernel supports a user space interface for simplified
-Persistent Reservations which map to block devices that support
-these (like SCSI). Persistent Reservations allow restricting
-access to block devices to specific initiators in a shared storage
-setup.
-
-This document gives a general overview of the support ioctl commands.
-For a more detailed reference please refer the the SCSI Primary
-Commands standard, specifically the section on Reservations and the
-"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
-
-All implementations are expected to ensure the reservations survive
-a power loss and cover all connections in a multi path environment.
-These behaviors are optional in SPC but will be automatically applied
-by Linux.
-
-
-The following types of reservations are supported:
---------------------------------------------------
-
- - PR_WRITE_EXCLUSIVE
-
- Only the initiator that owns the reservation can write to the
- device. Any initiator can read from the device.
-
- - PR_EXCLUSIVE_ACCESS
-
- Only the initiator that owns the reservation can access the
- device.
-
- - PR_WRITE_EXCLUSIVE_REG_ONLY
-
- Only initiators with a registered key can write to the device,
- Any initiator can read from the device.
-
- - PR_EXCLUSIVE_ACCESS_REG_ONLY
-
- Only initiators with a registered key can access the device.
-
- - PR_WRITE_EXCLUSIVE_ALL_REGS
-
- Only initiators with a registered key can write to the device,
- Any initiator can read from the device.
- All initiators with a registered key are considered reservation
- holders.
- Please reference the SPC spec on the meaning of a reservation
- holder if you want to use this type.
-
- - PR_EXCLUSIVE_ACCESS_ALL_REGS
-
- Only initiators with a registered key can access the device.
- All initiators with a registered key are considered reservation
- holders.
- Please reference the SPC spec on the meaning of a reservation
- holder if you want to use this type.
-
-
-The following ioctl are supported:
-----------------------------------
-
-1. IOC_PR_REGISTER
-
-This ioctl command registers a new reservation if the new_key argument
-is non-null. If no existing reservation exists old_key must be zero,
-if an existing reservation should be replaced old_key must contain
-the old reservation key.
-
-If the new_key argument is 0 it unregisters the existing reservation passed
-in old_key.
-
-
-2. IOC_PR_RESERVE
-
-This ioctl command reserves the device and thus restricts access for other
-devices based on the type argument. The key argument must be the existing
-reservation key for the device as acquired by the IOC_PR_REGISTER,
-IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
-
-
-3. IOC_PR_RELEASE
-
-This ioctl command releases the reservation specified by key and flags
-and thus removes any access restriction implied by it.
-
-
-4. IOC_PR_PREEMPT
-
-This ioctl command releases the existing reservation referred to by
-old_key and replaces it with a new reservation of type for the
-reservation key new_key.
-
-
-5. IOC_PR_PREEMPT_ABORT
-
-This ioctl command works like IOC_PR_PREEMPT except that it also aborts
-any outstanding command sent over a connection identified by old_key.
-
-6. IOC_PR_CLEAR
-
-This ioctl command unregisters both key and any other reservation key
-registered with the device and drops any existing reservation.
-
-
-Flags
------
-
-All the ioctls have a flag field. Currently only one flag is supported:
-
- - PR_FL_IGNORE_KEY
-
- Ignore the existing reservation key. This is commonly supported for
- IOC_PR_REGISTER, and some implementation may support the flag for
- IOC_PR_RESERVE.
-
-For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
deleted file mode 100644
index 2c1e67058fd3..000000000000
--- a/Documentation/block/queue-sysfs.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-Queue sysfs files
-=================
-
-This text file will detail the queue files that are located in the sysfs tree
-for each block device. Note that stacked devices typically do not export
-any settings, since their queue merely functions are a remapping target.
-These files are the ones found in the /sys/block/xxx/queue/ directory.
-
-Files denoted with a RO postfix are readonly and the RW postfix means
-read-write.
-
-add_random (RW)
-----------------
-This file allows to turn off the disk entropy contribution. Default
-value of this file is '1'(on).
-
-dax (RO)
---------
-This file indicates whether the device supports Direct Access (DAX),
-used by CPU-addressable storage to bypass the pagecache. It shows '1'
-if true, '0' if not.
-
-discard_granularity (RO)
------------------------
-This shows the size of internal allocation of the device in bytes, if
-reported by the device. A value of '0' means device does not support
-the discard functionality.
-
-discard_max_hw_bytes (RO)
-----------------------
-Devices that support discard functionality may have internal limits on
-the number of bytes that can be trimmed or unmapped in a single operation.
-The discard_max_bytes parameter is set by the device driver to the maximum
-number of bytes that can be discarded in a single operation. Discard
-requests issued to the device must not exceed this limit. A discard_max_bytes
-value of 0 means that the device does not support discard functionality.
-
-discard_max_bytes (RW)
-----------------------
-While discard_max_hw_bytes is the hardware limit for the device, this
-setting is the software limit. Some devices exhibit large latencies when
-large discards are issued, setting this value lower will make Linux issue
-smaller discards and potentially help reduce latencies induced by large
-discard operations.
-
-hw_sector_size (RO)
--------------------
-This is the hardware sector size of the device, in bytes.
-
-io_poll (RW)
-------------
-When read, this file shows whether polling is enabled (1) or disabled
-(0). Writing '0' to this file will disable polling for this device.
-Writing any non-zero value will enable this feature.
-
-io_poll_delay (RW)
-------------------
-If polling is enabled, this controls what kind of polling will be
-performed. It defaults to -1, which is classic polling. In this mode,
-the CPU will repeatedly ask for completions without giving up any time.
-If set to 0, a hybrid polling mode is used, where the kernel will attempt
-to make an educated guess at when the IO will complete. Based on this
-guess, the kernel will put the process issuing IO to sleep for an amount
-of time, before entering a classic poll loop. This mode might be a
-little slower than pure classic polling, but it will be more efficient.
-If set to a value larger than 0, the kernel will put the process issuing
-IO to sleep for this amont of microseconds before entering classic
-polling.
-
-iostats (RW)
--------------
-This file is used to control (on/off) the iostats accounting of the
-disk.
-
-logical_block_size (RO)
------------------------
-This is the logical block size of the device, in bytes.
-
-max_hw_sectors_kb (RO)
-----------------------
-This is the maximum number of kilobytes supported in a single data transfer.
-
-max_integrity_segments (RO)
----------------------------
-When read, this file shows the max limit of integrity segments as
-set by block layer which a hardware controller can handle.
-
-max_sectors_kb (RW)
--------------------
-This is the maximum number of kilobytes that the block layer will allow
-for a filesystem request. Must be smaller than or equal to the maximum
-size allowed by the hardware.
-
-max_segments (RO)
------------------
-Maximum number of segments of the device.
-
-max_segment_size (RO)
----------------------
-Maximum segment size of the device.
-
-minimum_io_size (RO)
---------------------
-This is the smallest preferred IO size reported by the device.
-
-nomerges (RW)
--------------
-This enables the user to disable the lookup logic involved with IO
-merging requests in the block layer. By default (0) all merges are
-enabled. When set to 1 only simple one-hit merges will be tried. When
-set to 2 no merge algorithms will be tried (including one-hit or more
-complex tree/hash lookups).
-
-nr_requests (RW)
-----------------
-This controls how many requests may be allocated in the block layer for
-read or write requests. Note that the total allocated number may be twice
-this amount, since it applies only to reads or writes (not the accumulated
-sum).
-
-To avoid priority inversion through request starvation, a request
-queue maintains a separate request pool per each cgroup when
-CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
-per-block-cgroup request pool. IOW, if there are N block cgroups,
-each request queue may have up to N request pools, each independently
-regulated by nr_requests.
-
-optimal_io_size (RO)
---------------------
-This is the optimal IO size reported by the device.
-
-physical_block_size (RO)
-------------------------
-This is the physical block size of device, in bytes.
-
-read_ahead_kb (RW)
-------------------
-Maximum number of kilobytes to read-ahead for filesystems on this block
-device.
-
-rotational (RW)
----------------
-This file is used to stat if the device is of rotational type or
-non-rotational type.
-
-rq_affinity (RW)
-----------------
-If this option is '1', the block layer will migrate request completions to the
-cpu "group" that originally submitted the request. For some workloads this
-provides a significant reduction in CPU cycles due to caching effects.
-
-For storage configurations that need to maximize distribution of completion
-processing setting this option to '2' forces the completion to run on the
-requesting cpu (bypassing the "group" aggregation logic).
-
-scheduler (RW)
---------------
-When read, this file will display the current and available IO schedulers
-for this block device. The currently active IO scheduler will be enclosed
-in [] brackets. Writing an IO scheduler name to this file will switch
-control of this block device to that new IO scheduler. Note that writing
-an IO scheduler name to this file will attempt to load that IO scheduler
-module, if it isn't already present in the system.
-
-write_cache (RW)
-----------------
-When read, this file will display whether the device has write back
-caching enabled or not. It will return "write back" for the former
-case, and "write through" for the latter. Writing to this file can
-change the kernels view of the device, but it doesn't alter the
-device state. This means that it might not be safe to toggle the
-setting from "write back" to "write through", since that will also
-eliminate cache flushes issued by the kernel.
-
-write_same_max_bytes (RO)
--------------------------
-This is the number of bytes the device can write in a single write-same
-command. A value of '0' means write-same is not supported by this
-device.
-
-wb_lat_usec (RW)
-----------------
-If the device is registered for writeback throttling, then this file shows
-the target minimum read latency. If this latency is exceeded in a given
-window of time (see wb_window_usec), then the writeback throttling will start
-scaling back writes. Writing a value of '0' to this file disables the
-feature. Writing a value of '-1' to this file resets the value to the
-default setting.
-
-throttle_sample_time (RW)
--------------------------
-This is the time window that blk-throttle samples data, in millisecond.
-blk-throttle makes decision based on the samplings. Lower time means cgroups
-have more smooth throughput, but higher CPU overhead. This exists only when
-CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
-
-Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/block/request.txt b/Documentation/block/request.txt
deleted file mode 100644
index 754e104ed369..000000000000
--- a/Documentation/block/request.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-
-struct request documentation
-
-Jens Axboe <jens.axboe@oracle.com> 27/05/02
-
-1.0
-Index
-
-2.0 Struct request members classification
-
- 2.1 struct request members explanation
-
-3.0
-
-
-2.0
-Short explanation of request members
-
-Classification flags:
-
- D driver member
- B block layer member
- I I/O scheduler member
-
-Unless an entry contains a D classification, a device driver must not access
-this member. Some members may contain D classifications, but should only be
-access through certain macros or functions (eg ->flags).
-
-<linux/blkdev.h>
-
-2.1
-Member Flag Comment
------- ---- -------
-
-struct list_head queuelist BI Organization on various internal
- queues
-
-void *elevator_private I I/O scheduler private data
-
-unsigned char cmd[16] D Driver can use this for setting up
- a cdb before execution, see
- blk_queue_prep_rq
-
-unsigned long flags DBI Contains info about data direction,
- request type, etc.
-
-int rq_status D Request status bits
-
-kdev_t rq_dev DBI Target device
-
-int errors DB Error counts
-
-sector_t sector DBI Target location
-
-unsigned long hard_nr_sectors B Used to keep sector sane
-
-unsigned long nr_sectors DBI Total number of sectors in request
-
-unsigned long hard_nr_sectors B Used to keep nr_sectors sane
-
-unsigned short nr_phys_segments DB Number of physical scatter gather
- segments in a request
-
-unsigned short nr_hw_segments DB Number of hardware scatter gather
- segments in a request
-
-unsigned int current_nr_sectors DB Number of sectors in first segment
- of request
-
-unsigned int hard_cur_sectors B Used to keep current_nr_sectors sane
-
-int tag DB TCQ tag, if assigned
-
-void *special D Free to be used by driver
-
-char *buffer D Map of first segment, also see
- section on bouncing SECTION
-
-struct completion *waiting D Can be used by driver to get signalled
- on request completion
-
-struct bio *bio DBI First bio in request
-
-struct bio *biotail DBI Last bio in request
-
-struct request_queue *q DB Request queue this request belongs to
-
-struct request_list *rl B Request list this request came from
diff --git a/Documentation/block/stat.rst b/Documentation/block/stat.rst
new file mode 100644
index 000000000000..a1cd9db2058f
--- /dev/null
+++ b/Documentation/block/stat.rst
@@ -0,0 +1,103 @@
+===============================================
+Block layer statistics in /sys/block/<dev>/stat
+===============================================
+
+This file documents the contents of the /sys/block/<dev>/stat file.
+
+The stat file provides several statistics about the state of block
+device <dev>.
+
+Q.
+ Why are there multiple statistics in a single file? Doesn't sysfs
+ normally contain a single value per file?
+
+A.
+ By having a single file, the kernel can guarantee that the statistics
+ represent a consistent snapshot of the state of the device. If the
+ statistics were exported as multiple files containing one statistic
+ each, it would be impossible to guarantee that a set of readings
+ represent a single point in time.
+
+The stat file consists of a single line of text containing 17 decimal
+values separated by whitespace. The fields are summarized in the
+following table, and described in more detail below.
+
+
+=============== ============= =================================================
+Name units description
+=============== ============= =================================================
+read I/Os requests number of read I/Os processed
+read merges requests number of read I/Os merged with in-queue I/O
+read sectors sectors number of sectors read
+read ticks milliseconds total wait time for read requests
+write I/Os requests number of write I/Os processed
+write merges requests number of write I/Os merged with in-queue I/O
+write sectors sectors number of sectors written
+write ticks milliseconds total wait time for write requests
+in_flight requests number of I/Os currently in flight
+io_ticks milliseconds total time this block device has been active
+time_in_queue milliseconds total wait time for all requests
+discard I/Os requests number of discard I/Os processed
+discard merges requests number of discard I/Os merged with in-queue I/O
+discard sectors sectors number of sectors discarded
+discard ticks milliseconds total wait time for discard requests
+flush I/Os requests number of flush I/Os processed
+flush ticks milliseconds total wait time for flush requests
+=============== ============= =================================================
+
+read I/Os, write I/Os, discard I/0s
+===================================
+
+These values increment when an I/O request completes.
+
+flush I/Os
+==========
+
+These values increment when an flush I/O request completes.
+
+Block layer combines flush requests and executes at most one at a time.
+This counts flush requests executed by disk. Not tracked for partitions.
+
+read merges, write merges, discard merges
+=========================================
+
+These values increment when an I/O request is merged with an
+already-queued I/O request.
+
+read sectors, write sectors, discard_sectors
+============================================
+
+These values count the number of sectors read from, written to, or
+discarded from this block device. The "sectors" in question are the
+standard UNIX 512-byte sectors, not any device- or filesystem-specific
+block size. The counters are incremented when the I/O completes.
+
+read ticks, write ticks, discard ticks, flush ticks
+===================================================
+
+These values count the number of milliseconds that I/O requests have
+waited on this block device. If there are multiple I/O requests waiting,
+these values will increase at a rate greater than 1000/second; for
+example, if 60 read requests wait for an average of 30 ms, the read_ticks
+field will increase by 60*30 = 1800.
+
+in_flight
+=========
+
+This value counts the number of I/O requests that have been issued to
+the device driver but have not yet completed. It does not include I/O
+requests that are in the queue but not yet issued to the device driver.
+
+io_ticks
+========
+
+This value counts the number of milliseconds during which the device has
+had I/O requests queued.
+
+time_in_queue
+=============
+
+This value counts the number of milliseconds that I/O requests have waited
+on this block device. If there are multiple I/O requests waiting, this
+value will increase as the product of the number of milliseconds times the
+number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
deleted file mode 100644
index 0aace9cc536c..000000000000
--- a/Documentation/block/stat.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-Block layer statistics in /sys/block/<dev>/stat
-===============================================
-
-This file documents the contents of the /sys/block/<dev>/stat file.
-
-The stat file provides several statistics about the state of block
-device <dev>.
-
-Q. Why are there multiple statistics in a single file? Doesn't sysfs
- normally contain a single value per file?
-A. By having a single file, the kernel can guarantee that the statistics
- represent a consistent snapshot of the state of the device. If the
- statistics were exported as multiple files containing one statistic
- each, it would be impossible to guarantee that a set of readings
- represent a single point in time.
-
-The stat file consists of a single line of text containing 11 decimal
-values separated by whitespace. The fields are summarized in the
-following table, and described in more detail below.
-
-Name units description
----- ----- -----------
-read I/Os requests number of read I/Os processed
-read merges requests number of read I/Os merged with in-queue I/O
-read sectors sectors number of sectors read
-read ticks milliseconds total wait time for read requests
-write I/Os requests number of write I/Os processed
-write merges requests number of write I/Os merged with in-queue I/O
-write sectors sectors number of sectors written
-write ticks milliseconds total wait time for write requests
-in_flight requests number of I/Os currently in flight
-io_ticks milliseconds total time this block device has been active
-time_in_queue milliseconds total wait time for all requests
-discard I/Os requests number of discard I/Os processed
-discard merges requests number of discard I/Os merged with in-queue I/O
-discard sectors sectors number of sectors discarded
-discard ticks milliseconds total wait time for discard requests
-
-read I/Os, write I/Os, discard I/0s
-===================================
-
-These values increment when an I/O request completes.
-
-read merges, write merges, discard merges
-=========================================
-
-These values increment when an I/O request is merged with an
-already-queued I/O request.
-
-read sectors, write sectors, discard_sectors
-============================================
-
-These values count the number of sectors read from, written to, or
-discarded from this block device. The "sectors" in question are the
-standard UNIX 512-byte sectors, not any device- or filesystem-specific
-block size. The counters are incremented when the I/O completes.
-
-read ticks, write ticks, discard ticks
-======================================
-
-These values count the number of milliseconds that I/O requests have
-waited on this block device. If there are multiple I/O requests waiting,
-these values will increase at a rate greater than 1000/second; for
-example, if 60 read requests wait for an average of 30 ms, the read_ticks
-field will increase by 60*30 = 1800.
-
-in_flight
-=========
-
-This value counts the number of I/O requests that have been issued to
-the device driver but have not yet completed. It does not include I/O
-requests that are in the queue but not yet issued to the device driver.
-
-io_ticks
-========
-
-This value counts the number of milliseconds during which the device has
-had I/O requests queued.
-
-time_in_queue
-=============
-
-This value counts the number of milliseconds that I/O requests have waited
-on this block device. If there are multiple I/O requests waiting, this
-value will increase as the product of the number of milliseconds times the
-number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/block/switching-sched.rst b/Documentation/block/switching-sched.rst
new file mode 100644
index 000000000000..520f6b857544
--- /dev/null
+++ b/Documentation/block/switching-sched.rst
@@ -0,0 +1,35 @@
+===================
+Switching Scheduler
+===================
+
+Each io queue has a set of io scheduler tunables associated with it. These
+tunables control how the io scheduler works. You can find these entries
+in::
+
+ /sys/block/<device>/queue/iosched
+
+assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
+you can do so by typing::
+
+ # mount none /sys -t sysfs
+
+It is possible to change the IO scheduler for a given block device on
+the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
+which can improve that device's throughput.
+
+To set a specific scheduler, simply do this::
+
+ echo SCHEDNAME > /sys/block/DEV/queue/scheduler
+
+where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
+device name (hda, hdb, sga, or whatever you happen to have).
+
+The list of defined schedulers can be found by simply doing
+a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
+will be displayed, with the currently selected scheduler in brackets::
+
+ # cat /sys/block/sda/queue/scheduler
+ [mq-deadline] kyber bfq none
+ # echo none >/sys/block/sda/queue/scheduler
+ # cat /sys/block/sda/queue/scheduler
+ [none] mq-deadline kyber bfq
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
deleted file mode 100644
index 3b2612e342f1..000000000000
--- a/Documentation/block/switching-sched.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
-'noop' and 'cfq' (the default) are also available. IO schedulers are assigned
-globally at boot time only presently.
-
-Each io queue has a set of io scheduler tunables associated with it. These
-tunables control how the io scheduler works. You can find these entries
-in:
-
-/sys/block/<device>/queue/iosched
-
-assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
-you can do so by typing:
-
-# mount none /sys -t sysfs
-
-As of the Linux 2.6.10 kernel, it is now possible to change the
-IO scheduler for a given block device on the fly (thus making it possible,
-for instance, to set the CFQ scheduler for the system default, but
-set a specific device to use the deadline or noop schedulers - which
-can improve that device's throughput).
-
-To set a specific scheduler, simply do this:
-
-echo SCHEDNAME > /sys/block/DEV/queue/scheduler
-
-where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
-device name (hda, hdb, sga, or whatever you happen to have).
-
-The list of defined schedulers can be found by simply doing
-a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
-will be displayed, with the currently selected scheduler in brackets:
-
-# cat /sys/block/hda/queue/scheduler
-noop deadline [cfq]
-# echo deadline > /sys/block/hda/queue/scheduler
-# cat /sys/block/hda/queue/scheduler
-noop [deadline] cfq
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
new file mode 100644
index 000000000000..8c4030bcabb6
--- /dev/null
+++ b/Documentation/block/ublk.rst
@@ -0,0 +1,441 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Userspace block device driver (ublk driver)
+===========================================
+
+Overview
+========
+
+ublk is a generic framework for implementing block device logic from userspace.
+The motivation behind it is that moving virtual block drivers into userspace,
+such as loop, nbd and similar can be very helpful. It can help to implement
+new virtual block device such as ublk-qcow2 (there are several attempts of
+implementing qcow2 driver in kernel).
+
+Userspace block devices are attractive because:
+
+- They can be written many programming languages.
+- They can use libraries that are not available in the kernel.
+- They can be debugged with tools familiar to application developers.
+- Crashes do not kernel panic the machine.
+- Bugs are likely to have a lower security impact than bugs in kernel
+ code.
+- They can be installed and updated independently of the kernel.
+- They can be used to simulate block device easily with user specified
+ parameters/setting for test/debug purpose
+
+ublk block device (``/dev/ublkb*``) is added by ublk driver. Any IO request
+on the device will be forwarded to ublk userspace program. For convenience,
+in this document, ``ublk server`` refers to generic ublk userspace
+program. ``ublksrv`` [#userspace]_ is one of such implementation. It
+provides ``libublksrv`` [#userspace_lib]_ library for developing specific
+user block device conveniently, while also generic type block device is
+included, such as loop and null. Richard W.M. Jones wrote userspace nbd device
+``nbdublk`` [#userspace_nbdublk]_ based on ``libublksrv`` [#userspace_lib]_.
+
+After the IO is handled by userspace, the result is committed back to the
+driver, thus completing the request cycle. This way, any specific IO handling
+logic is totally done by userspace, such as loop's IO handling, NBD's IO
+communication, or qcow2's IO mapping.
+
+``/dev/ublkb*`` is driven by blk-mq request-based driver. Each request is
+assigned by one queue wide unique tag. ublk server assigns unique tag to each
+IO too, which is 1:1 mapped with IO of ``/dev/ublkb*``.
+
+Both the IO request forward and IO handling result committing are done via
+``io_uring`` passthrough command; that is why ublk is also one io_uring based
+block driver. It has been observed that using io_uring passthrough command can
+give better IOPS than block IO; which is why ublk is one of high performance
+implementation of userspace block device: not only IO request communication is
+done by io_uring, but also the preferred IO handling in ublk server is io_uring
+based approach too.
+
+ublk provides control interface to set/get ublk block device parameters.
+The interface is extendable and kabi compatible: basically any ublk request
+queue's parameter or ublk generic feature parameters can be set/get via the
+interface. Thus, ublk is generic userspace block device framework.
+For example, it is easy to setup a ublk device with specified block
+parameters from userspace.
+
+Using ublk
+==========
+
+ublk requires userspace ublk server to handle real block device logic.
+
+Below is example of using ``ublksrv`` to provide ublk-based loop device.
+
+- add a device::
+
+ ublk add -t loop -f ublk-loop.img
+
+- format with xfs, then use it::
+
+ mkfs.xfs /dev/ublkb0
+ mount /dev/ublkb0 /mnt
+ # do anything. all IOs are handled by io_uring
+ ...
+ umount /mnt
+
+- list the devices with their info::
+
+ ublk list
+
+- delete the device::
+
+ ublk del -a
+ ublk del -n $ublk_dev_id
+
+See usage details in README of ``ublksrv`` [#userspace_readme]_.
+
+Design
+======
+
+Control plane
+-------------
+
+ublk driver provides global misc device node (``/dev/ublk-control``) for
+managing and controlling ublk devices with help of several control commands:
+
+- ``UBLK_CMD_ADD_DEV``
+
+ Add a ublk char device (``/dev/ublkc*``) which is talked with ublk server
+ WRT IO command communication. Basic device info is sent together with this
+ command. It sets UAPI structure of ``ublksrv_ctrl_dev_info``,
+ such as ``nr_hw_queues``, ``queue_depth``, and max IO request buffer size,
+ for which the info is negotiated with the driver and sent back to the server.
+ When this command is completed, the basic device info is immutable.
+
+- ``UBLK_CMD_SET_PARAMS`` / ``UBLK_CMD_GET_PARAMS``
+
+ Set or get parameters of the device, which can be either generic feature
+ related, or request queue limit related, but can't be IO logic specific,
+ because the driver does not handle any IO logic. This command has to be
+ sent before sending ``UBLK_CMD_START_DEV``.
+
+- ``UBLK_CMD_START_DEV``
+
+ After the server prepares userspace resources (such as creating I/O handler
+ threads & io_uring for handling ublk IO), this command is sent to the
+ driver for allocating & exposing ``/dev/ublkb*``. Parameters set via
+ ``UBLK_CMD_SET_PARAMS`` are applied for creating the device.
+
+- ``UBLK_CMD_STOP_DEV``
+
+ Halt IO on ``/dev/ublkb*`` and remove the device. When this command returns,
+ ublk server will release resources (such as destroying I/O handler threads &
+ io_uring).
+
+- ``UBLK_CMD_DEL_DEV``
+
+ Remove ``/dev/ublkc*``. When this command returns, the allocated ublk device
+ number can be reused.
+
+- ``UBLK_CMD_GET_QUEUE_AFFINITY``
+
+ When ``/dev/ublkc`` is added, the driver creates block layer tagset, so
+ that each queue's affinity info is available. The server sends
+ ``UBLK_CMD_GET_QUEUE_AFFINITY`` to retrieve queue affinity info. It can
+ set up the per-queue context efficiently, such as bind affine CPUs with IO
+ pthread and try to allocate buffers in IO thread context.
+
+- ``UBLK_CMD_GET_DEV_INFO``
+
+ For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
+ responsibility to save IO target specific info in userspace.
+
+- ``UBLK_CMD_GET_DEV_INFO2``
+ Same purpose with ``UBLK_CMD_GET_DEV_INFO``, but ublk server has to
+ provide path of the char device of ``/dev/ublkc*`` for kernel to run
+ permission check, and this command is added for supporting unprivileged
+ ublk device, and introduced with ``UBLK_F_UNPRIVILEGED_DEV`` together.
+ Only the user owning the requested device can retrieve the device info.
+
+ How to deal with userspace/kernel compatibility:
+
+ 1) if kernel is capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
+
+ If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ublk server should send ``UBLK_CMD_GET_DEV_INFO2``, given anytime
+ unprivileged application needs to query devices the current user owns,
+ when the application has no idea if ``UBLK_F_UNPRIVILEGED_DEV`` is set
+ given the capability info is stateless, and application should always
+ retrieve it via ``UBLK_CMD_GET_DEV_INFO2``
+
+ If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
+ UBLK_F_UNPRIVILEGED_DEV isn't available for user
+
+ 2) if kernel isn't capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
+
+ If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO2`` is tried first, and will be failed, then
+ ``UBLK_CMD_GET_DEV_INFO`` needs to be retried given
+ ``UBLK_F_UNPRIVILEGED_DEV`` can't be set
+
+ If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
+
+ ``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
+ ``UBLK_F_UNPRIVILEGED_DEV`` isn't available for user
+
+- ``UBLK_CMD_START_USER_RECOVERY``
+
+ This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
+ command is accepted after the old process has exited, ublk device is quiesced
+ and ``/dev/ublkc*`` is released. User should send this command before he starts
+ a new process which re-opens ``/dev/ublkc*``. When this command returns, the
+ ublk device is ready for the new process.
+
+- ``UBLK_CMD_END_USER_RECOVERY``
+
+ This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
+ command is accepted after ublk device is quiesced and a new process has
+ opened ``/dev/ublkc*`` and get all ublk queues be ready. When this command
+ returns, ublk device is unquiesced and new I/O requests are passed to the
+ new process.
+
+- user recovery feature description
+
+ Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
+ ``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
+ enable recovery of ublk devices after the ublk server exits, the ublk server
+ should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
+ ublk server may additionally specify at most one of
+ ``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
+ modify how I/O is handled while the ublk server is dying/dead (this is called
+ the ``nosrv`` case in the driver code).
+
+ With just ``UBLK_F_USER_RECOVERY`` set, after the ublk server exits,
+ ublk does not delete ``/dev/ublkb*`` during the whole
+ recovery stage and ublk device ID is kept. It is ublk server's
+ responsibility to recover the device context by its own knowledge.
+ Requests which have not been issued to userspace are requeued. Requests
+ which have been issued to userspace are aborted.
+
+ With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after the ublk server
+ exits, contrary to ``UBLK_F_USER_RECOVERY``,
+ requests which have been issued to userspace are requeued and will be
+ re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
+ ``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
+ double-write since the driver may issue the same I/O request twice. It
+ might be useful to a read-only FS or a VM backend.
+
+ With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
+ exits, requests which have issued to userspace are failed, as are any
+ subsequently issued requests. Applications continuously issuing I/O against
+ devices with this flag set will see a stream of I/O errors until a new ublk
+ server recovers the device.
+
+Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
+Once the flag is set, all control commands can be sent by unprivileged
+user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
+the specified char device(``/dev/ublkc*``) is done for all other control
+commands by ublk driver, for doing that, path of the char device has to
+be provided in these commands' payload from ublk server. With this way,
+ublk device becomes container-ware, and device created in one container
+can be controlled/accessed just inside this container.
+
+Data plane
+----------
+
+The ublk server should create dedicated threads for handling I/O. Each
+thread should have its own io_uring through which it is notified of new
+I/O, and through which it can complete I/O. These dedicated threads
+should focus on IO handling and shouldn't handle any control &
+management tasks.
+
+The's IO is assigned by a unique tag, which is 1:1 mapping with IO
+request of ``/dev/ublkb*``.
+
+UAPI structure of ``ublksrv_io_desc`` is defined for describing each IO from
+the driver. A fixed mmapped area (array) on ``/dev/ublkc*`` is provided for
+exporting IO info to the server; such as IO offset, length, OP/flags and
+buffer address. Each ``ublksrv_io_desc`` instance can be indexed via queue id
+and IO tag directly.
+
+The following IO commands are communicated via io_uring passthrough command,
+and each command is only for forwarding the IO and committing the result
+with specified IO tag in the command data:
+
+- ``UBLK_IO_FETCH_REQ``
+
+ Sent from the server IO pthread for fetching future incoming IO requests
+ destined to ``/dev/ublkb*``. This command is sent only once from the server
+ IO pthread for ublk driver to setup IO forward environment.
+
+ Once a thread issues this command against a given (qid,tag) pair, the thread
+ registers itself as that I/O's daemon. In the future, only that I/O's daemon
+ is allowed to issue commands against the I/O. If any other thread attempts
+ to issue a command against a (qid,tag) pair for which the thread is not the
+ daemon, the command will fail. Daemons can be reset only be going through
+ recovery.
+
+ The ability for every (qid,tag) pair to have its own independent daemon task
+ is indicated by the ``UBLK_F_PER_IO_DAEMON`` feature. If this feature is not
+ supported by the driver, daemons must be per-queue instead - i.e. all I/Os
+ associated to a single qid must be handled by the same task.
+
+- ``UBLK_IO_COMMIT_AND_FETCH_REQ``
+
+ When an IO request is destined to ``/dev/ublkb*``, the driver stores
+ the IO's ``ublksrv_io_desc`` to the specified mapped area; then the
+ previous received IO command of this IO tag (either ``UBLK_IO_FETCH_REQ``
+ or ``UBLK_IO_COMMIT_AND_FETCH_REQ)`` is completed, so the server gets
+ the IO notification via io_uring.
+
+ After the server handles the IO, its result is committed back to the
+ driver by sending ``UBLK_IO_COMMIT_AND_FETCH_REQ`` back. Once ublkdrv
+ received this command, it parses the result and complete the request to
+ ``/dev/ublkb*``. In the meantime setup environment for fetching future
+ requests with the same IO tag. That is, ``UBLK_IO_COMMIT_AND_FETCH_REQ``
+ is reused for both fetching request and committing back IO result.
+
+- ``UBLK_IO_NEED_GET_DATA``
+
+ With ``UBLK_F_NEED_GET_DATA`` enabled, the WRITE request will be firstly
+ issued to ublk server without data copy. Then, IO backend of ublk server
+ receives the request and it can allocate data buffer and embed its addr
+ inside this new io command. After the kernel driver gets the command,
+ data copy is done from request pages to this backend's buffer. Finally,
+ backend receives the request again with data to be written and it can
+ truly handle the request.
+
+ ``UBLK_IO_NEED_GET_DATA`` adds one additional round-trip and one
+ io_uring_enter() syscall. Any user thinks that it may lower performance
+ should not enable UBLK_F_NEED_GET_DATA. ublk server pre-allocates IO
+ buffer for each IO by default. Any new project should try to use this
+ buffer to communicate with ublk driver. However, existing project may
+ break or not able to consume the new buffer interface; that's why this
+ command is added for backwards compatibility so that existing projects
+ can still consume existing buffers.
+
+- data copy between ublk server IO buffer and ublk block IO request
+
+ The driver needs to copy the block IO request pages into the server buffer
+ (pages) first for WRITE before notifying the server of the coming IO, so
+ that the server can handle WRITE request.
+
+ When the server handles READ request and sends
+ ``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy
+ the server buffer (pages) read to the IO request pages.
+
+Zero copy
+---------
+
+ublk zero copy relies on io_uring's fixed kernel buffer, which provides
+two APIs: `io_buffer_register_bvec()` and `io_buffer_unregister_bvec`.
+
+ublk adds IO command of `UBLK_IO_REGISTER_IO_BUF` to call
+`io_buffer_register_bvec()` for ublk server to register client request
+buffer into io_uring buffer table, then ublk server can submit io_uring
+IOs with the registered buffer index. IO command of `UBLK_IO_UNREGISTER_IO_BUF`
+calls `io_buffer_unregister_bvec()` to unregister the buffer, which is
+guaranteed to be live between calling `io_buffer_register_bvec()` and
+`io_buffer_unregister_bvec()`. Any io_uring operation which supports this
+kind of kernel buffer will grab one reference of the buffer until the
+operation is completed.
+
+ublk server implementing zero copy or user copy has to be CAP_SYS_ADMIN and
+be trusted, because it is ublk server's responsibility to make sure IO buffer
+filled with data for handling read command, and ublk server has to return
+correct result to ublk driver when handling READ command, and the result
+has to match with how many bytes filled to the IO buffer. Otherwise,
+uninitialized kernel IO buffer will be exposed to client application.
+
+ublk server needs to align the parameter of `struct ublk_param_dma_align`
+with backend for zero copy to work correctly.
+
+For reaching best IO performance, ublk server should align its segment
+parameter of `struct ublk_param_segment` with backend for avoiding
+unnecessary IO split, which usually hurts io_uring performance.
+
+Auto Buffer Registration
+------------------------
+
+The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration
+and unregistration for I/O requests, which simplifies the buffer management
+process and reduces overhead in the ublk server implementation.
+
+This is another feature flag for using zero copy, and it is compatible with
+``UBLK_F_SUPPORT_ZERO_COPY``.
+
+Feature Overview
+~~~~~~~~~~~~~~~~
+
+This feature automatically registers request buffers to the io_uring context
+before delivering I/O commands to the ublk server and unregisters them when
+completing I/O commands. This eliminates the need for manual buffer
+registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and
+``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server
+can avoid dependency on the two uring_cmd operations.
+
+IOs can't be issued concurrently to io_uring if there is any dependency
+among these IOs. So this way not only simplifies ublk server implementation,
+but also makes concurrent IO handling becomes possible by removing the
+dependency on buffer registration & unregistration commands.
+
+Usage Requirements
+~~~~~~~~~~~~~~~~~~
+
+1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx``
+ used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If
+ uring_cmd is issued on a different ``io_ring_ctx``, manual buffer
+ unregistration is required.
+
+2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the
+ following structure::
+
+ struct ublk_auto_buf_reg {
+ __u16 index; /* Buffer index for registration */
+ __u8 flags; /* Registration flags */
+ __u8 reserved0; /* Reserved for future use */
+ __u32 reserved1; /* Reserved for future use */
+ };
+
+ ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into
+ ``sqe->addr``.
+
+3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed.
+
+4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``.
+
+Fallback Behavior
+~~~~~~~~~~~~~~~~~
+
+If auto buffer registration fails:
+
+1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled:
+
+ - The uring_cmd is completed
+ - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags``
+ - The ublk server must manually deal with the failure, such as, register
+ the buffer manually, or using user copy feature for retrieving the data
+ for handling ublk IO
+
+2. If fallback is not enabled:
+
+ - The ublk I/O request fails silently
+ - The uring_cmd won't be completed
+
+Limitations
+~~~~~~~~~~~
+
+- Requires same ``io_ring_ctx`` for all operations
+- May require manual buffer management in fallback cases
+- io_ring_ctx buffer table has a max size of 16K, which may not be enough
+ in case that too many ublk devices are handled by this single io_ring_ctx
+ and each one has very large queue depth
+
+References
+==========
+
+.. [#userspace] https://github.com/ming1/ubdsrv
+
+.. [#userspace_lib] https://github.com/ming1/ubdsrv/tree/master/lib
+
+.. [#userspace_nbdublk] https://gitlab.com/rwmjones/libnbd/-/tree/nbdublk
+
+.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst
new file mode 100644
index 000000000000..c3707d071780
--- /dev/null
+++ b/Documentation/block/writeback_cache_control.rst
@@ -0,0 +1,95 @@
+==========================================
+Explicit volatile write back cache control
+==========================================
+
+Introduction
+------------
+
+Many storage devices, especially in the consumer market, come with volatile
+write back caches. That means the devices signal I/O completion to the
+operating system before data actually has hit the non-volatile storage. This
+behavior obviously speeds up various workloads, but it means the operating
+system needs to force data out to the non-volatile storage when it performs
+a data integrity operation like fsync, sync or an unmount.
+
+The Linux block layer provides two simple mechanisms that let filesystems
+control the caching behavior of the storage device. These mechanisms are
+a forced cache flush, and the Force Unit Access (FUA) flag for requests.
+
+
+Explicit cache flushes
+----------------------
+
+The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
+the filesystem and will make sure the volatile cache of the storage device
+has been flushed before the actual I/O operation is started. This explicitly
+guarantees that previously completed write requests are on non-volatile
+storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
+set on an otherwise empty bio structure, which causes only an explicit cache
+flush without any dependent I/O. It is recommend to use
+the blkdev_issue_flush() helper for a pure cache flush.
+
+
+Forced Unit Access
+------------------
+
+The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
+filesystem and will make sure that I/O completion for this request is only
+signaled after the data has been committed to non-volatile storage.
+
+
+Implementation details for filesystems
+--------------------------------------
+
+Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
+worry if the underlying devices need any explicit cache flushing and how
+the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
+may both be set on a single bio.
+
+Feature settings for block drivers
+----------------------------------
+
+For devices that do not support volatile write caches there is no driver
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
+requests that have a payload.
+
+For devices with volatile write caches the driver needs to tell the block layer
+that it supports flushing caches by setting the
+
+ BLK_FEAT_WRITE_CACHE
+
+flag in the queue_limits feature field. For devices that also support the FUA
+bit the block layer needs to be told to pass on the REQ_FUA bit by also setting
+the
+
+ BLK_FEAT_FUA
+
+flag in the features field of the queue_limits structure.
+
+Implementation details for bio based block drivers
+--------------------------------------------------
+
+For bio based drivers the REQ_PREFLUSH and REQ_FUA bit are simply passed on to
+the driver if the driver sets the BLK_FEAT_WRITE_CACHE flag and the driver
+needs to handle them.
+
+*NOTE*: The REQ_FUA bit also gets passed on when the BLK_FEAT_FUA flags is
+_not_ set. Any bio based driver that sets BLK_FEAT_WRITE_CACHE also needs to
+handle REQ_FUA.
+
+For remapping drivers the REQ_FUA bits need to be propagated to underlying
+devices, and a global flush needs to be implemented for bios with the
+REQ_PREFLUSH bit set.
+
+Implementation details for blk-mq drivers
+-----------------------------------------
+
+When the BLK_FEAT_WRITE_CACHE flag is set, REQ_OP_WRITE | REQ_PREFLUSH requests
+with a payload are automatically turned into a sequence of a REQ_OP_FLUSH
+request followed by the actual write by the block layer.
+
+When the BLK_FEAT_FUA flags is set, the REQ_FUA bit is simply passed on for the
+REQ_OP_WRITE request, else a REQ_OP_FLUSH request is sent by the block layer
+after the completion of the write request for bio submissions with the REQ_FUA
+bit set.
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt
deleted file mode 100644
index 8a6bdada5f6b..000000000000
--- a/Documentation/block/writeback_cache_control.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-
-Explicit volatile write back cache control
-=====================================
-
-Introduction
-------------
-
-Many storage devices, especially in the consumer market, come with volatile
-write back caches. That means the devices signal I/O completion to the
-operating system before data actually has hit the non-volatile storage. This
-behavior obviously speeds up various workloads, but it means the operating
-system needs to force data out to the non-volatile storage when it performs
-a data integrity operation like fsync, sync or an unmount.
-
-The Linux block layer provides two simple mechanisms that let filesystems
-control the caching behavior of the storage device. These mechanisms are
-a forced cache flush, and the Force Unit Access (FUA) flag for requests.
-
-
-Explicit cache flushes
-----------------------
-
-The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
-the filesystem and will make sure the volatile cache of the storage device
-has been flushed before the actual I/O operation is started. This explicitly
-guarantees that previously completed write requests are on non-volatile
-storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
-set on an otherwise empty bio structure, which causes only an explicit cache
-flush without any dependent I/O. It is recommend to use
-the blkdev_issue_flush() helper for a pure cache flush.
-
-
-Forced Unit Access
------------------
-
-The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
-filesystem and will make sure that I/O completion for this request is only
-signaled after the data has been committed to non-volatile storage.
-
-
-Implementation details for filesystems
---------------------------------------
-
-Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
-worry if the underlying devices need any explicit cache flushing and how
-the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
-may both be set on a single bio.
-
-
-Implementation details for make_request_fn based block drivers
---------------------------------------------------------------
-
-These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
-directly below the submit_bio interface. For remapping drivers the REQ_FUA
-bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_PREFLUSH bit set. For real device
-drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
-data can be completed successfully without doing any work. Drivers for
-devices with volatile caches need to implement the support for these
-flags themselves without any help from the block layer.
-
-
-Implementation details for request_fn based block drivers
---------------------------------------------------------------
-
-For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_PREFLUSH requests before
-entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
-requests that have a payload. For devices with volatile write caches the
-driver needs to tell the block layer that it supports flushing caches by
-doing:
-
- blk_queue_write_cache(sdkp->disk->queue, true, false);
-
-and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that
-REQ_PREFLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_OP_FLUSH request followed by the actual write by the block
-layer. For devices that also support the FUA bit the block layer needs
-to be told to pass through the REQ_FUA bit using:
-
- blk_queue_write_cache(sdkp->disk->queue, true, true);
-
-and the driver must handle write requests that have the REQ_FUA bit set
-in prep_fn/request_fn. If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_OP_FLUSH request after the actual write.
diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt
deleted file mode 100644
index 627b0a1bf35e..000000000000
--- a/Documentation/blockdev/drbd/README.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Description
-
- DRBD is a shared-nothing, synchronously replicated block device. It
- is designed to serve as a building block for high availability
- clusters and in this context, is a "drop-in" replacement for shared
- storage. Simplistically, you could see it as a network RAID 1.
-
- Please visit http://www.drbd.org to find out more.
-
-The here included files are intended to help understand the implementation
-
-DRBD-8.3-data-packets.svg, DRBD-data-packets.svg
- relates some functions, and write packets.
-
-conn-states-8.dot, disk-states-8.dot, node-states-8.dot
- The sub graphs of DRBD's state transitions
diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/blockdev/drbd/data-structure-v9.txt
deleted file mode 100644
index 1e52a0e32624..000000000000
--- a/Documentation/blockdev/drbd/data-structure-v9.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-This describes the in kernel data structure for DRBD-9. Starting with
-Linux v3.14 we are reorganizing DRBD to use this data structure.
-
-Basic Data Structure
-====================
-
-A node has a number of DRBD resources. Each such resource has a number of
-devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
-device is represented by a block device locally.
-
-The DRBD objects are interconnected to form a matrix as depicted below; a
-drbd_peer_device object sits at each intersection between a drbd_device and a
-drbd_connection:
-
- /--------------+---------------+.....+---------------\
- | resource | device | | device |
- +--------------+---------------+.....+---------------+
- | connection | peer_device | | peer_device |
- +--------------+---------------+.....+---------------+
- : : : : :
- : : : : :
- +--------------+---------------+.....+---------------+
- | connection | peer_device | | peer_device |
- \--------------+---------------+.....+---------------/
-
-In this table, horizontally, devices can be accessed from resources by their
-volume number. Likewise, peer_devices can be accessed from connections by
-their volume number. Objects in the vertical direction are connected by double
-linked lists. There are back pointers from peer_devices to their connections a
-devices, and from connections and devices to their resource.
-
-All resources are in the drbd_resources double-linked list. In addition, all
-devices can be accessed by their minor device number via the drbd_devices idr.
-
-The drbd_resource, drbd_connection, and drbd_device objects are reference
-counted. The peer_device objects only serve to establish the links between
-devices and connections; their lifetime is determined by the lifetime of the
-device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot
deleted file mode 100644
index 4a2b00c23547..000000000000
--- a/Documentation/blockdev/drbd/node-states-8.dot
+++ /dev/null
@@ -1,14 +0,0 @@
-digraph node_states {
- Secondary -> Primary [ label = "ioctl_set_state()" ]
- Primary -> Secondary [ label = "ioctl_set_state()" ]
-}
-
-digraph peer_states {
- Secondary -> Primary [ label = "recv state packet" ]
- Primary -> Secondary [ label = "recv state packet" ]
- Primary -> Unknown [ label = "connection lost" ]
- Secondary -> Unknown [ label = "connection lost" ]
- Unknown -> Primary [ label = "connected" ]
- Unknown -> Secondary [ label = "connected" ]
-}
-
diff --git a/Documentation/blockdev/floppy.txt b/Documentation/blockdev/floppy.txt
deleted file mode 100644
index e2240f5ab64d..000000000000
--- a/Documentation/blockdev/floppy.txt
+++ /dev/null
@@ -1,245 +0,0 @@
-This file describes the floppy driver.
-
-FAQ list:
-=========
-
- A FAQ list may be found in the fdutils package (see below), and also
-at <http://fdutils.linux.lu/faq.html>.
-
-
-LILO configuration options (Thinkpad users, read this)
-======================================================
-
- The floppy driver is configured using the 'floppy=' option in
-lilo. This option can be typed at the boot prompt, or entered in the
-lilo configuration file.
-
- Example: If your kernel is called linux-2.6.9, type the following line
-at the lilo boot prompt (if you have a thinkpad):
-
- linux-2.6.9 floppy=thinkpad
-
-You may also enter the following line in /etc/lilo.conf, in the description
-of linux-2.6.9:
-
- append = "floppy=thinkpad"
-
- Several floppy related options may be given, example:
-
- linux-2.6.9 floppy=daring floppy=two_fdc
- append = "floppy=daring floppy=two_fdc"
-
- If you give options both in the lilo config file and on the boot
-prompt, the option strings of both places are concatenated, the boot
-prompt options coming last. That's why there are also options to
-restore the default behavior.
-
-
-Module configuration options
-============================
-
- If you use the floppy driver as a module, use the following syntax:
-modprobe floppy floppy="<options>"
-
-Example:
- modprobe floppy floppy="omnibook messages"
-
- If you need certain options enabled every time you load the floppy driver,
-you can put:
-
- options floppy floppy="omnibook messages"
-
-in a configuration file in /etc/modprobe.d/.
-
-
- The floppy driver related options are:
-
- floppy=asus_pci
- Sets the bit mask to allow only units 0 and 1. (default)
-
- floppy=daring
- Tells the floppy driver that you have a well behaved floppy controller.
- This allows more efficient and smoother operation, but may fail on
- certain controllers. This may speed up certain operations.
-
- floppy=0,daring
- Tells the floppy driver that your floppy controller should be used
- with caution.
-
- floppy=one_fdc
- Tells the floppy driver that you have only one floppy controller.
- (default)
-
- floppy=two_fdc
- floppy=<address>,two_fdc
- Tells the floppy driver that you have two floppy controllers.
- The second floppy controller is assumed to be at <address>.
- This option is not needed if the second controller is at address
- 0x370, and if you use the 'cmos' option.
-
- floppy=thinkpad
- Tells the floppy driver that you have a Thinkpad. Thinkpads use an
- inverted convention for the disk change line.
-
- floppy=0,thinkpad
- Tells the floppy driver that you don't have a Thinkpad.
-
- floppy=omnibook
- floppy=nodma
- Tells the floppy driver not to use Dma for data transfers.
- This is needed on HP Omnibooks, which don't have a workable
- DMA channel for the floppy driver. This option is also useful
- if you frequently get "Unable to allocate DMA memory" messages.
- Indeed, dma memory needs to be continuous in physical memory,
- and is thus harder to find, whereas non-dma buffers may be
- allocated in virtual memory. However, I advise against this if
- you have an FDC without a FIFO (8272A or 82072). 82072A and
- later are OK. You also need at least a 486 to use nodma.
- If you use nodma mode, I suggest you also set the FIFO
- threshold to 10 or lower, in order to limit the number of data
- transfer interrupts.
-
- If you have a FIFO-able FDC, the floppy driver automatically
- falls back on non DMA mode if no DMA-able memory can be found.
- If you want to avoid this, explicitly ask for 'yesdma'.
-
- floppy=yesdma
- Tells the floppy driver that a workable DMA channel is available.
- (default)
-
- floppy=nofifo
- Disables the FIFO entirely. This is needed if you get "Bus
- master arbitration error" messages from your Ethernet card (or
- from other devices) while accessing the floppy.
-
- floppy=usefifo
- Enables the FIFO. (default)
-
- floppy=<threshold>,fifo_depth
- Sets the FIFO threshold. This is mostly relevant in DMA
- mode. If this is higher, the floppy driver tolerates more
- interrupt latency, but it triggers more interrupts (i.e. it
- imposes more load on the rest of the system). If this is
- lower, the interrupt latency should be lower too (faster
- processor). The benefit of a lower threshold is less
- interrupts.
-
- To tune the fifo threshold, switch on over/underrun messages
- using 'floppycontrol --messages'. Then access a floppy
- disk. If you get a huge amount of "Over/Underrun - retrying"
- messages, then the fifo threshold is too low. Try with a
- higher value, until you only get an occasional Over/Underrun.
- It is a good idea to compile the floppy driver as a module
- when doing this tuning. Indeed, it allows to try different
- fifo values without rebooting the machine for each test. Note
- that you need to do 'floppycontrol --messages' every time you
- re-insert the module.
-
- Usually, tuning the fifo threshold should not be needed, as
- the default (0xa) is reasonable.
-
- floppy=<drive>,<type>,cmos
- Sets the CMOS type of <drive> to <type>. This is mandatory if
- you have more than two floppy drives (only two can be
- described in the physical CMOS), or if your BIOS uses
- non-standard CMOS types. The CMOS types are:
-
- 0 - Use the value of the physical CMOS
- 1 - 5 1/4 DD
- 2 - 5 1/4 HD
- 3 - 3 1/2 DD
- 4 - 3 1/2 HD
- 5 - 3 1/2 ED
- 6 - 3 1/2 ED
- 16 - unknown or not installed
-
- (Note: there are two valid types for ED drives. This is because 5 was
- initially chosen to represent floppy *tapes*, and 6 for ED drives.
- AMI ignored this, and used 5 for ED drives. That's why the floppy
- driver handles both.)
-
- floppy=unexpected_interrupts
- Print a warning message when an unexpected interrupt is received.
- (default)
-
- floppy=no_unexpected_interrupts
- floppy=L40SX
- Don't print a message when an unexpected interrupt is received. This
- is needed on IBM L40SX laptops in certain video modes. (There seems
- to be an interaction between video and floppy. The unexpected
- interrupts affect only performance, and can be safely ignored.)
-
- floppy=broken_dcl
- Don't use the disk change line, but assume that the disk was
- changed whenever the device node is reopened. Needed on some
- boxes where the disk change line is broken or unsupported.
- This should be regarded as a stopgap measure, indeed it makes
- floppy operation less efficient due to unneeded cache
- flushings, and slightly more unreliable. Please verify your
- cable, connection and jumper settings if you have any DCL
- problems. However, some older drives, and also some laptops
- are known not to have a DCL.
-
- floppy=debug
- Print debugging messages.
-
- floppy=messages
- Print informational messages for some operations (disk change
- notifications, warnings about over and underruns, and about
- autodetection).
-
- floppy=silent_dcl_clear
- Uses a less noisy way to clear the disk change line (which
- doesn't involve seeks). Implied by 'daring' option.
-
- floppy=<nr>,irq
- Sets the floppy IRQ to <nr> instead of 6.
-
- floppy=<nr>,dma
- Sets the floppy DMA channel to <nr> instead of 2.
-
- floppy=slow
- Use PS/2 stepping rate:
- " PS/2 floppies have much slower step rates than regular floppies.
- It's been recommended that take about 1/4 of the default speed
- in some more extreme cases."
-
-
-Supporting utilities and additional documentation:
-==================================================
-
- Additional parameters of the floppy driver can be configured at
-runtime. Utilities which do this can be found in the fdutils package.
-This package also contains a new version of mtools which allows to
-access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
-It also contains additional documentation about the floppy driver.
-
-The latest version can be found at fdutils homepage:
- http://fdutils.linux.lu
-
-The fdutils releases can be found at:
- http://fdutils.linux.lu/download.html
- http://www.tux.org/pub/knaff/fdutils/
- ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
-
-Reporting problems about the floppy driver
-==========================================
-
- If you have a question or a bug report about the floppy driver, mail
-me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
-comp.os.linux.hardware. As the volume in these groups is rather high,
-be sure to include the word "floppy" (or "FLOPPY") in the subject
-line. If the reported problem happens when mounting floppy disks, be
-sure to mention also the type of the filesystem in the subject line.
-
- Be sure to read the FAQ before mailing/posting any bug reports!
-
- Alain
-
-Changelog
-=========
-
-10-30-2004 : Cleanup, updating, add reference to module configuration.
- James Nelson <james4765@gmail.com>
-
-6-3-2000 : Original Document
diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.txt
deleted file mode 100644
index db242ea2bce8..000000000000
--- a/Documentation/blockdev/nbd.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Network Block Device (TCP version)
-==================================
-
-1) Overview
------------
-
-What is it: With this compiled in the kernel (or as a module), Linux
-can use a remote server as one of its block devices. So every time
-the client computer wants to read, e.g., /dev/nb0, it sends a
-request over TCP to the server, which will reply with the data read.
-This can be used for stations with low disk space (or even diskless)
-to borrow disk space from another computer.
-Unlike NFS, it is possible to put any filesystem on it, etc.
-
-For more information, or to download the nbd-client and nbd-server
-tools, go to http://nbd.sf.net/.
-
-The nbd kernel module need only be installed on the client
-system, as the nbd-server is completely in userspace. In fact,
-the nbd-server has been successfully ported to other operating
-systems, including Windows.
-
-A) NBD parameters
------------------
-
-max_part
- Number of partitions per device (default: 0).
-
-nbds_max
- Number of block devices that should be initialized (default: 16).
-
diff --git a/Documentation/blockdev/paride.txt b/Documentation/blockdev/paride.txt
deleted file mode 100644
index ee6717e3771d..000000000000
--- a/Documentation/blockdev/paride.txt
+++ /dev/null
@@ -1,417 +0,0 @@
-
- Linux and parallel port IDE devices
-
-PARIDE v1.03 (c) 1997-8 Grant Guenther <grant@torque.net>
-
-1. Introduction
-
-Owing to the simplicity and near universality of the parallel port interface
-to personal computers, many external devices such as portable hard-disk,
-CD-ROM, LS-120 and tape drives use the parallel port to connect to their
-host computer. While some devices (notably scanners) use ad-hoc methods
-to pass commands and data through the parallel port interface, most
-external devices are actually identical to an internal model, but with
-a parallel-port adapter chip added in. Some of the original parallel port
-adapters were little more than mechanisms for multiplexing a SCSI bus.
-(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
-approach). Most current designs, however, take a different approach.
-The adapter chip reproduces a small ISA or IDE bus in the external device
-and the communication protocol provides operations for reading and writing
-device registers, as well as data block transfer functions. Sometimes,
-the device being addressed via the parallel cable is a standard SCSI
-controller like an NCR 5380. The "ditto" family of external tape
-drives use the ISA replicator to interface a floppy disk controller,
-which is then connected to a floppy-tape mechanism. The vast majority
-of external parallel port devices, however, are now based on standard
-IDE type devices, which require no intermediate controller. If one
-were to open up a parallel port CD-ROM drive, for instance, one would
-find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
-that interconnected a standard PC parallel port cable and a standard
-IDE cable. It is usually possible to exchange the CD-ROM device with
-any other device using the IDE interface.
-
-The document describes the support in Linux for parallel port IDE
-devices. It does not cover parallel port SCSI devices, "ditto" tape
-drives or scanners. Many different devices are supported by the
-parallel port IDE subsystem, including:
-
- MicroSolutions backpack CD-ROM
- MicroSolutions backpack PD/CD
- MicroSolutions backpack hard-drives
- MicroSolutions backpack 8000t tape drive
- SyQuest EZ-135, EZ-230 & SparQ drives
- Avatar Shark
- Imation Superdisk LS-120
- Maxell Superdisk LS-120
- FreeCom Power CD
- Hewlett-Packard 5GB and 8GB tape drives
- Hewlett-Packard 7100 and 7200 CD-RW drives
-
-as well as most of the clone and no-name products on the market.
-
-To support such a wide range of devices, PARIDE, the parallel port IDE
-subsystem, is actually structured in three parts. There is a base
-paride module which provides a registry and some common methods for
-accessing the parallel ports. The second component is a set of
-high-level drivers for each of the different types of supported devices:
-
- pd IDE disk
- pcd ATAPI CD-ROM
- pf ATAPI disk
- pt ATAPI tape
- pg ATAPI generic
-
-(Currently, the pg driver is only used with CD-R drives).
-
-The high-level drivers function according to the relevant standards.
-The third component of PARIDE is a set of low-level protocol drivers
-for each of the parallel port IDE adapter chips. Thanks to the interest
-and encouragement of Linux users from many parts of the world,
-support is available for almost all known adapter protocols:
-
- aten ATEN EH-100 (HK)
- bpck Microsolutions backpack (US)
- comm DataStor (old-type) "commuter" adapter (TW)
- dstr DataStor EP-2000 (TW)
- epat Shuttle EPAT (UK)
- epia Shuttle EPIA (UK)
- fit2 FIT TD-2000 (US)
- fit3 FIT TD-3000 (US)
- friq Freecom IQ cable (DE)
- frpw Freecom Power (DE)
- kbic KingByte KBIC-951A and KBIC-971A (TW)
- ktti KT Technology PHd adapter (SG)
- on20 OnSpec 90c20 (US)
- on26 OnSpec 90c26 (US)
-
-
-2. Using the PARIDE subsystem
-
-While configuring the Linux kernel, you may choose either to build
-the PARIDE drivers into your kernel, or to build them as modules.
-
-In either case, you will need to select "Parallel port IDE device support"
-as well as at least one of the high-level drivers and at least one
-of the parallel port communication protocols. If you do not know
-what kind of parallel port adapter is used in your drive, you could
-begin by checking the file names and any text files on your DOS
-installation floppy. Alternatively, you can look at the markings on
-the adapter chip itself. That's usually sufficient to identify the
-correct device.
-
-You can actually select all the protocol modules, and allow the PARIDE
-subsystem to try them all for you.
-
-For the "brand-name" products listed above, here are the protocol
-and high-level drivers that you would use:
-
- Manufacturer Model Driver Protocol
-
- MicroSolutions CD-ROM pcd bpck
- MicroSolutions PD drive pf bpck
- MicroSolutions hard-drive pd bpck
- MicroSolutions 8000t tape pt bpck
- SyQuest EZ, SparQ pd epat
- Imation Superdisk pf epat
- Maxell Superdisk pf friq
- Avatar Shark pd epat
- FreeCom CD-ROM pcd frpw
- Hewlett-Packard 5GB Tape pt epat
- Hewlett-Packard 7200e (CD) pcd epat
- Hewlett-Packard 7200e (CD-R) pg epat
-
-2.1 Configuring built-in drivers
-
-We recommend that you get to know how the drivers work and how to
-configure them as loadable modules, before attempting to compile a
-kernel with the drivers built-in.
-
-If you built all of your PARIDE support directly into your kernel,
-and you have just a single parallel port IDE device, your kernel should
-locate it automatically for you. If you have more than one device,
-you may need to give some command line options to your bootloader
-(eg: LILO), how to do that is beyond the scope of this document.
-
-The high-level drivers accept a number of command line parameters, all
-of which are documented in the source files in linux/drivers/block/paride.
-By default, each driver will automatically try all parallel ports it
-can find, and all protocol types that have been installed, until it finds
-a parallel port IDE adapter. Once it finds one, the probe stops. So,
-if you have more than one device, you will need to tell the drivers
-how to identify them. This requires specifying the port address, the
-protocol identification number and, for some devices, the drive's
-chain ID. While your system is booting, a number of messages are
-displayed on the console. Like all such messages, they can be
-reviewed with the 'dmesg' command. Among those messages will be
-some lines like:
-
- paride: bpck registered as protocol 0
- paride: epat registered as protocol 1
-
-The numbers will always be the same until you build a new kernel with
-different protocol selections. You should note these numbers as you
-will need them to identify the devices.
-
-If you happen to be using a MicroSolutions backpack device, you will
-also need to know the unit ID number for each drive. This is usually
-the last two digits of the drive's serial number (but read MicroSolutions'
-documentation about this).
-
-As an example, let's assume that you have a MicroSolutions PD/CD drive
-with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
-EZ-135 connected to the chained port on the PD/CD drive and also an
-Imation Superdisk connected to port 0x278. You could give the following
-options on your boot command:
-
- pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
-
-In the last option, pf.drive1 configures device /dev/pf1, the 0x378
-is the parallel port base address, the 0 is the protocol registration
-number and 36 is the chain ID.
-
-Please note: while PARIDE will work both with and without the
-PARPORT parallel port sharing system that is included by the
-"Parallel port support" option, PARPORT must be included and enabled
-if you want to use chains of devices on the same parallel port.
-
-2.2 Loading and configuring PARIDE as modules
-
-It is much faster and simpler to get to understand the PARIDE drivers
-if you use them as loadable kernel modules.
-
-Note 1: using these drivers with the "kerneld" automatic module loading
-system is not recommended for beginners, and is not documented here.
-
-Note 2: if you build PARPORT support as a loadable module, PARIDE must
-also be built as loadable modules, and PARPORT must be loaded before the
-PARIDE modules.
-
-To use PARIDE, you must begin by
-
- insmod paride
-
-this loads a base module which provides a registry for the protocols,
-among other tasks.
-
-Then, load as many of the protocol modules as you think you might need.
-As you load each module, it will register the protocols that it supports,
-and print a log message to your kernel log file and your console. For
-example:
-
- # insmod epat
- paride: epat registered as protocol 0
- # insmod kbic
- paride: k951 registered as protocol 1
- paride: k971 registered as protocol 2
-
-Finally, you can load high-level drivers for each kind of device that
-you have connected. By default, each driver will autoprobe for a single
-device, but you can support up to four similar devices by giving their
-individual co-ordinates when you load the driver.
-
-For example, if you had two no-name CD-ROM drives both using the
-KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
-you could give the following command:
-
- # insmod pcd drive0=0x378,1 drive1=0x3bc,1
-
-For most adapters, giving a port address and protocol number is sufficient,
-but check the source files in linux/drivers/block/paride for more
-information. (Hopefully someone will write some man pages one day !).
-
-As another example, here's what happens when PARPORT is installed, and
-a SyQuest EZ-135 is attached to port 0x378:
-
- # insmod paride
- paride: version 1.0 installed
- # insmod epat
- paride: epat registered as protocol 0
- # insmod pd
- pd: pd version 1.0, major 45, cluster 64, nice 0
- pda: Sharing parport1 at 0x378
- pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
- pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
- pda: pda1
-
-Note that the last line is the output from the generic partition table
-scanner - in this case it reports that it has found a disk with one partition.
-
-2.3 Using a PARIDE device
-
-Once the drivers have been loaded, you can access PARIDE devices in the
-same way as their traditional counterparts. You will probably need to
-create the device "special files". Here is a simple script that you can
-cut to a file and execute:
-
-#!/bin/bash
-#
-# mkd -- a script to create the device special files for the PARIDE subsystem
-#
-function mkdev {
- mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
-}
-#
-function pd {
- D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
- mkdev pd$D b 45 $[ $1 * 16 ]
- for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
- done
-}
-#
-cd /dev
-#
-for u in 0 1 2 3 ; do pd $u ; done
-for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
-for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done
-for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done
-for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
-for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done
-#
-# end of mkd
-
-With the device files and drivers in place, you can access PARIDE devices
-like any other Linux device. For example, to mount a CD-ROM in pcd0, use:
-
- mount /dev/pcd0 /cdrom
-
-If you have a fresh Avatar Shark cartridge, and the drive is pda, you
-might do something like:
-
- fdisk /dev/pda -- make a new partition table with
- partition 1 of type 83
-
- mke2fs /dev/pda1 -- to build the file system
-
- mkdir /shark -- make a place to mount the disk
-
- mount /dev/pda1 /shark
-
-Devices like the Imation superdisk work in the same way, except that
-they do not have a partition table. For example to make a 120MB
-floppy that you could share with a DOS system:
-
- mkdosfs /dev/pf0
- mount /dev/pf0 /mnt
-
-
-2.4 The pf driver
-
-The pf driver is intended for use with parallel port ATAPI disk
-devices. The most common devices in this category are PD drives
-and LS-120 drives. Traditionally, media for these devices are not
-partitioned. Consequently, the pf driver does not support partitioned
-media. This may be changed in a future version of the driver.
-
-2.5 Using the pt driver
-
-The pt driver for parallel port ATAPI tape drives is a minimal driver.
-It does not yet support many of the standard tape ioctl operations.
-For best performance, a block size of 32KB should be used. You will
-probably want to set the parallel port delay to 0, if you can.
-
-2.6 Using the pg driver
-
-The pg driver can be used in conjunction with the cdrecord program
-to create CD-ROMs. Please get cdrecord version 1.6.1 or later
-from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media
-your parallel port should ideally be set to EPP mode, and the "port delay"
-should be set to 0. With those settings it is possible to record at 2x
-speed without any buffer underruns. If you cannot get the driver to work
-in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
-
-
-3. Troubleshooting
-
-3.1 Use EPP mode if you can
-
-The most common problems that people report with the PARIDE drivers
-concern the parallel port CMOS settings. At this time, none of the
-PARIDE protocol modules support ECP mode, or any ECP combination modes.
-If you are able to do so, please set your parallel port into EPP mode
-using your CMOS setup procedure.
-
-3.2 Check the port delay
-
-Some parallel ports cannot reliably transfer data at full speed. To
-offset the errors, the PARIDE protocol modules introduce a "port
-delay" between each access to the i/o ports. Each protocol sets
-a default value for this delay. In most cases, the user can override
-the default and set it to 0 - resulting in somewhat higher transfer
-rates. In some rare cases (especially with older 486 systems) the
-default delays are not long enough. if you experience corrupt data
-transfers, or unexpected failures, you may wish to increase the
-port delay. The delay can be programmed using the "driveN" parameters
-to each of the high-level drivers. Please see the notes above, or
-read the comments at the beginning of the driver source files in
-linux/drivers/block/paride.
-
-3.3 Some drives need a printer reset
-
-There appear to be a number of "noname" external drives on the market
-that do not always power up correctly. We have noticed this with some
-drives based on OnSpec and older Freecom adapters. In these rare cases,
-the adapter can often be reinitialised by issuing a "printer reset" on
-the parallel port. As the reset operation is potentially disruptive in
-multiple device environments, the PARIDE drivers will not do it
-automatically. You can however, force a printer reset by doing:
-
- insmod lp reset=1
- rmmod lp
-
-If you have one of these marginal cases, you should probably build
-your paride drivers as modules, and arrange to do the printer reset
-before loading the PARIDE drivers.
-
-3.4 Use the verbose option and dmesg if you need help
-
-While a lot of testing has gone into these drivers to make them work
-as smoothly as possible, problems will arise. If you do have problems,
-please check all the obvious things first: does the drive work in
-DOS with the manufacturer's drivers ? If that doesn't yield any useful
-clues, then please make sure that only one drive is hooked to your system,
-and that either (a) PARPORT is enabled or (b) no other device driver
-is using your parallel port (check in /proc/ioports). Then, load the
-appropriate drivers (you can load several protocol modules if you want)
-as in:
-
- # insmod paride
- # insmod epat
- # insmod bpck
- # insmod kbic
- ...
- # insmod pd verbose=1
-
-(using the correct driver for the type of device you have, of course).
-The verbose=1 parameter will cause the drivers to log a trace of their
-activity as they attempt to locate your drive.
-
-Use 'dmesg' to capture a log of all the PARIDE messages (any messages
-beginning with paride:, a protocol module's name or a driver's name) and
-include that with your bug report. You can submit a bug report in one
-of two ways. Either send it directly to the author of the PARIDE suite,
-by e-mail to grant@torque.net, or join the linux-parport mailing list
-and post your report there.
-
-3.5 For more information or help
-
-You can join the linux-parport mailing list by sending a mail message
-to
- linux-parport-request@torque.net
-
-with the single word
-
- subscribe
-
-in the body of the mail message (not in the subject line). Please be
-sure that your mail program is correctly set up when you do this, as
-the list manager is a robot that will subscribe you using the reply
-address in your mail headers. REMOVE any anti-spam gimmicks you may
-have in your mail headers, when sending mail to the list server.
-
-You might also find some useful information on the linux-parport
-web pages (although they are not always up to date) at
-
- http://web.archive.org/web/*/http://www.torque.net/parport/
-
-
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt
deleted file mode 100644
index 501e12e0323e..000000000000
--- a/Documentation/blockdev/ramdisk.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-Using the RAM disk block device with Linux
-------------------------------------------
-
-Contents:
-
- 1) Overview
- 2) Kernel Command Line Parameters
- 3) Using "rdev -r"
- 4) An Example of Creating a Compressed RAM Disk
-
-
-1) Overview
------------
-
-The RAM disk driver is a way to use main system memory as a block device. It
-is required for initrd, an initial filesystem used if you need to load modules
-in order to access the root filesystem (see Documentation/admin-guide/initrd.rst). It can
-also be used for a temporary filesystem for crypto work, since the contents
-are erased on reboot.
-
-The RAM disk dynamically grows as more space is required. It does this by using
-RAM from the buffer cache. The driver marks the buffers it is using as dirty
-so that the VM subsystem does not try to reclaim them later.
-
-The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
-to support an unlimited number of RAM disks (at your own risk). Just change
-the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
-and (re)build the kernel.
-
-To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
-directory. RAM disks are all major number 1, and start with minor number 0
-for /dev/ram0, etc. If used, modern kernels use /dev/ram0 for an initrd.
-
-The new RAM disk also has the ability to load compressed RAM disk images,
-allowing one to squeeze more programs onto an average installation or
-rescue floppy disk.
-
-
-2) Parameters
----------------------------------
-
-2a) Kernel Command Line Parameters
-
- ramdisk_size=N
- ==============
-
-This parameter tells the RAM disk driver to set up RAM disks of N k size. The
-default is 4096 (4 MB).
-
-2b) Module parameters
-
- rd_nr
- =====
- /dev/ramX devices created.
-
- max_part
- ========
- Maximum partition number.
-
- rd_size
- =======
- See ramdisk_size.
-
-3) Using "rdev -r"
-------------------
-
-The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
-as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
-to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
-14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
-prompt/wait sequence is to be given before trying to read the RAM disk. Since
-the RAM disk dynamically grows as data is being written into it, a size field
-is not required. Bits 11 to 13 are not currently used and may as well be zero.
-These numbers are no magical secrets, as seen below:
-
-./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
-./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
-./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
-
-Consider a typical two floppy disk setup, where you will have the
-kernel on disk one, and have already put a RAM disk image onto disk #2.
-
-Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
-starts at an offset of 0 kB from the beginning of the floppy.
-The command line equivalent is: "ramdisk_start=0"
-
-You want bit 14 as one, indicating that a RAM disk is to be loaded.
-The command line equivalent is: "load_ramdisk=1"
-
-You want bit 15 as one, indicating that you want a prompt/keypress
-sequence so that you have a chance to switch floppy disks.
-The command line equivalent is: "prompt_ramdisk=1"
-
-Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
-So to create disk one of the set, you would do:
-
- /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
- /usr/src/linux# rdev /dev/fd0 /dev/fd0
- /usr/src/linux# rdev -r /dev/fd0 49152
-
-If you make a boot disk that has LILO, then for the above, you would use:
- append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
-Since the default start = 0 and the default prompt = 1, you could use:
- append = "load_ramdisk=1"
-
-
-4) An Example of Creating a Compressed RAM Disk
-----------------------------------------------
-
-To create a RAM disk image, you will need a spare block device to
-construct it on. This can be the RAM disk device itself, or an
-unused disk partition (such as an unmounted swap partition). For this
-example, we will use the RAM disk device, "/dev/ram0".
-
-Note: This technique should not be done on a machine with less than 8 MB
-of RAM. If using a spare disk partition instead of /dev/ram0, then this
-restriction does not apply.
-
-a) Decide on the RAM disk size that you want. Say 2 MB for this example.
- Create it by writing to the RAM disk device. (This step is not currently
- required, but may be in the future.) It is wise to zero out the
- area (esp. for disks) so that maximal compression is achieved for
- the unused blocks of the image that you are about to create.
-
- dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
-
-b) Make a filesystem on it. Say ext2fs for this example.
-
- mke2fs -vm0 /dev/ram0 2048
-
-c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
- and unmount it again.
-
-d) Compress the contents of the RAM disk. The level of compression
- will be approximately 50% of the space used by the files. Unused
- space on the RAM disk will compress to almost nothing.
-
- dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
-
-e) Put the kernel onto the floppy
-
- dd if=zImage of=/dev/fd0 bs=1k
-
-f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
- that is slightly larger than the kernel, so that you can put another
- (possibly larger) kernel onto the same floppy later without overlapping
- the RAM disk image. An offset of 400 kB for kernels about 350 kB in
- size would be reasonable. Make sure offset+size of ram_image.gz is
- not larger than the total space on your floppy (usually 1440 kB).
-
- dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
-
-g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
- For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
- have 2^15 + 2^14 + 400 = 49552.
-
- rdev /dev/fd0 /dev/fd0
- rdev -r /dev/fd0 49552
-
-That is it. You now have your boot/root compressed RAM disk floppy. Some
-users may wish to combine steps (d) and (f) by using a pipe.
-
---------------------------------------------------------------------------
- Paul Gortmaker 12/95
-
-Changelog:
-----------
-
-10-22-04 : Updated to reflect changes in command line options, remove
- obsolete references, general cleanup.
- James Nelson (james4765@gmail.com)
-
-
-12-95 : Original Document
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
deleted file mode 100644
index 3c1b5ab54bc0..000000000000
--- a/Documentation/blockdev/zram.txt
+++ /dev/null
@@ -1,271 +0,0 @@
-zram: Compressed RAM based block devices
-----------------------------------------
-
-* Introduction
-
-The zram module creates RAM based block devices named /dev/zram<id>
-(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
-in memory itself. These disks allow very fast I/O and compression provides
-good amounts of memory savings. Some of the usecases include /tmp storage,
-use as swap disks, various caches under /var and maybe many more :)
-
-Statistics for individual zram devices are exported through sysfs nodes at
-/sys/block/zram<id>/
-
-* Usage
-
-There are several ways to configure and manage zram device(-s):
-a) using zram and zram_control sysfs attributes
-b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org).
-
-In this document we will describe only 'manual' zram configuration steps,
-IOW, zram and zram_control sysfs attributes.
-
-In order to get a better idea about zramctl please consult util-linux
-documentation, zramctl man-page or `zramctl --help'. Please be informed
-that zram maintainers do not develop/maintain util-linux or zramctl, should
-you have any questions please contact util-linux@vger.kernel.org
-
-Following shows a typical sequence of steps for using zram.
-
-WARNING
-=======
-For the sake of simplicity we skip error checking parts in most of the
-examples below. However, it is your sole responsibility to handle errors.
-
-zram sysfs attributes always return negative values in case of errors.
-The list of possible return codes:
--EBUSY -- an attempt to modify an attribute that cannot be changed once
-the device has been initialised. Please reset device first;
--ENOMEM -- zram was not able to allocate enough memory to fulfil your
-needs;
--EINVAL -- invalid input has been provided.
-
-If you use 'echo', the returned value that is changed by 'echo' utility,
-and, in general case, something like:
-
- echo 3 > /sys/block/zram0/max_comp_streams
- if [ $? -ne 0 ];
- handle_error
- fi
-
-should suffice.
-
-1) Load Module:
- modprobe zram num_devices=4
- This creates 4 devices: /dev/zram{0,1,2,3}
-
-num_devices parameter is optional and tells zram how many devices should be
-pre-created. Default: 1.
-
-2) Set max number of compression streams
-Regardless the value passed to this attribute, ZRAM will always
-allocate multiple compression streams - one per online CPUs - thus
-allowing several concurrent compression operations. The number of
-allocated compression streams goes down when some of the CPUs
-become offline. There is no single-compression-stream mode anymore,
-unless you are running a UP system or has only 1 CPU online.
-
-To find out how many streams are currently available:
- cat /sys/block/zram0/max_comp_streams
-
-3) Select compression algorithm
-Using comp_algorithm device attribute one can see available and
-currently selected (shown in square brackets) compression algorithms,
-change selected compression algorithm (once the device is initialised
-there is no way to change compression algorithm).
-
-Examples:
- #show supported compression algorithms
- cat /sys/block/zram0/comp_algorithm
- lzo [lz4]
-
- #select lzo compression algorithm
- echo lzo > /sys/block/zram0/comp_algorithm
-
-For the time being, the `comp_algorithm' content does not necessarily
-show every compression algorithm supported by the kernel. We keep this
-list primarily to simplify device configuration and one can configure
-a new device with a compression algorithm that is not listed in
-`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API
-and, if some of the algorithms were built as modules, it's impossible
-to list all of them using, for instance, /proc/crypto or any other
-method. This, however, has an advantage of permitting the usage of
-custom crypto compression modules (implementing S/W or H/W compression).
-
-4) Set Disksize
-Set disk size by writing the value to sysfs node 'disksize'.
-The value can be either in bytes or you can use mem suffixes.
-Examples:
- # Initialize /dev/zram0 with 50MB disksize
- echo $((50*1024*1024)) > /sys/block/zram0/disksize
-
- # Using mem suffixes
- echo 256K > /sys/block/zram0/disksize
- echo 512M > /sys/block/zram0/disksize
- echo 1G > /sys/block/zram0/disksize
-
-Note:
-There is little point creating a zram of greater than twice the size of memory
-since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
-size of the disk when not in use so a huge zram is wasteful.
-
-5) Set memory limit: Optional
-Set memory limit by writing the value to sysfs node 'mem_limit'.
-The value can be either in bytes or you can use mem suffixes.
-In addition, you could change the value in runtime.
-Examples:
- # limit /dev/zram0 with 50MB memory
- echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
-
- # Using mem suffixes
- echo 256K > /sys/block/zram0/mem_limit
- echo 512M > /sys/block/zram0/mem_limit
- echo 1G > /sys/block/zram0/mem_limit
-
- # To disable memory limit
- echo 0 > /sys/block/zram0/mem_limit
-
-6) Activate:
- mkswap /dev/zram0
- swapon /dev/zram0
-
- mkfs.ext4 /dev/zram1
- mount /dev/zram1 /tmp
-
-7) Add/remove zram devices
-
-zram provides a control interface, which enables dynamic (on-demand) device
-addition and removal.
-
-In order to add a new /dev/zramX device, perform read operation on hot_add
-attribute. This will return either new device's device id (meaning that you
-can use /dev/zram<id>) or error code.
-
-Example:
- cat /sys/class/zram-control/hot_add
- 1
-
-To remove the existing /dev/zramX device (where X is a device id)
-execute
- echo X > /sys/class/zram-control/hot_remove
-
-8) Stats:
-Per-device statistics are exported as various nodes under /sys/block/zram<id>/
-
-A brief description of exported device attributes. For more details please
-read Documentation/ABI/testing/sysfs-block-zram.
-
-Name access description
----- ------ -----------
-disksize RW show and set the device's disk size
-initstate RO shows the initialization state of the device
-reset WO trigger device reset
-mem_used_max WO reset the `mem_used_max' counter (see later)
-mem_limit WO specifies the maximum amount of memory ZRAM can use
- to store the compressed data
-max_comp_streams RW the number of possible concurrent compress operations
-comp_algorithm RW show and change the compression algorithm
-compact WO trigger memory compaction
-debug_stat RO this file is used for zram debugging purposes
-backing_dev RW set up backend storage for zram to write out
-
-
-User space is advised to use the following files to read the device statistics.
-
-File /sys/block/zram<id>/stat
-
-Represents block layer statistics. Read Documentation/block/stat.txt for
-details.
-
-File /sys/block/zram<id>/io_stat
-
-The stat file represents device's I/O statistics not accounted by block
-layer and, thus, not available in zram<id>/stat file. It consists of a
-single line of text and contains the following stats separated by
-whitespace:
- failed_reads the number of failed reads
- failed_writes the number of failed writes
- invalid_io the number of non-page-size-aligned I/O requests
- notify_free Depending on device usage scenario it may account
- a) the number of pages freed because of swap slot free
- notifications or b) the number of pages freed because of
- REQ_OP_DISCARD requests sent by bio. The former ones are
- sent to a swap block device when a swap slot is freed,
- which implies that this disk is being used as a swap disk.
- The latter ones are sent by filesystem mounted with
- discard option, whenever some data blocks are getting
- discarded.
-
-File /sys/block/zram<id>/mm_stat
-
-The stat file represents device's mm statistics. It consists of a single
-line of text and contains the following stats separated by whitespace:
- orig_data_size uncompressed size of data stored in this disk.
- This excludes same-element-filled pages (same_pages) since
- no memory is allocated for them.
- Unit: bytes
- compr_data_size compressed size of data stored in this disk
- mem_used_total the amount of memory allocated for this disk. This
- includes allocator fragmentation and metadata overhead,
- allocated for this disk. So, allocator space efficiency
- can be calculated using compr_data_size and this statistic.
- Unit: bytes
- mem_limit the maximum amount of memory ZRAM can use to store
- the compressed data
- mem_used_max the maximum amount of memory zram have consumed to
- store the data
- same_pages the number of same element filled pages written to this disk.
- No memory is allocated for such pages.
- pages_compacted the number of pages freed during compaction
- huge_pages the number of incompressible pages
-
-9) Deactivate:
- swapoff /dev/zram0
- umount /dev/zram1
-
-10) Reset:
- Write any positive value to 'reset' sysfs node
- echo 1 > /sys/block/zram0/reset
- echo 1 > /sys/block/zram1/reset
-
- This frees all the memory allocated for the given device and
- resets the disksize to zero. You must set the disksize again
- before reusing the device.
-
-* Optional Feature
-
-= writeback
-
-With incompressible pages, there is no memory saving with zram.
-Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page
-to backing storage rather than keeping it in memory.
-User should set up backing device via /sys/block/zramX/backing_dev
-before disksize setting.
-
-= memory tracking
-
-With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
-zram block. It could be useful to catch cold or incompressible
-pages of the process with*pagemap.
-If you enable the feature, you could see block state via
-/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
-
- 300 75.033841 .wh
- 301 63.806904 s..
- 302 63.806919 ..h
-
-First column is zram's block index.
-Second column is access time since the system was booted
-Third column is state of the block.
-(s: same page
-w: written page to backing store
-h: huge page)
-
-First line of above example says 300th block is accessed at 75.033841sec
-and the block's state is huge so it is written back to the backing
-storage. It's a debugging feature so anyone shouldn't rely on it to work
-properly.
-
-Nitin Gupta
-ngupta@vflare.org
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index 6780a6d81745..eb19c945f4d5 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -36,27 +36,27 @@ consideration important quirks of other architectures) and
defines calling convention that is compatible with C calling
convention of the linux kernel on those architectures.
-Q: can multiple return values be supported in the future?
+Q: Can multiple return values be supported in the future?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A: NO. BPF allows only register R0 to be used as return value.
-Q: can more than 5 function arguments be supported in the future?
+Q: Can more than 5 function arguments be supported in the future?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A: NO. BPF calling convention only allows registers R1-R5 to be used
as arguments. BPF is not a standalone instruction set.
(unlike x64 ISA that allows msft, cdecl and other conventions)
-Q: can BPF programs access instruction pointer or return address?
+Q: Can BPF programs access instruction pointer or return address?
-----------------------------------------------------------------
A: NO.
-Q: can BPF programs access stack pointer ?
+Q: Can BPF programs access stack pointer ?
------------------------------------------
A: NO.
Only frame pointer (register R10) is accessible.
From compiler point of view it's necessary to have stack pointer.
-For example LLVM defines register R11 as stack pointer in its
+For example, LLVM defines register R11 as stack pointer in its
BPF backend, but it makes sure that generated code never uses it.
Q: Does C-calling convention diminishes possible use cases?
@@ -66,8 +66,8 @@ A: YES.
BPF design forces addition of major functionality in the form
of kernel helper functions and kernel objects like BPF maps with
seamless interoperability between them. It lets kernel call into
-BPF programs and programs call kernel helpers with zero overhead.
-As all of them were native C code. That is particularly the case
+BPF programs and programs call kernel helpers with zero overhead,
+as all of them were native C code. That is particularly the case
for JITed BPF programs that are indistinguishable from
native kernel C code.
@@ -75,9 +75,9 @@ Q: Does it mean that 'innovative' extensions to BPF code are disallowed?
------------------------------------------------------------------------
A: Soft yes.
-At least for now until BPF core has support for
+At least for now, until BPF core has support for
bpf-to-bpf calls, indirect calls, loops, global variables,
-jump tables, read only sections and all other normal constructs
+jump tables, read-only sections, and all other normal constructs
that C code can produce.
Q: Can loops be supported in a safe way?
@@ -85,8 +85,33 @@ Q: Can loops be supported in a safe way?
A: It's not clear yet.
BPF developers are trying to find a way to
-support bounded loops where the verifier can guarantee that
-the program terminates in less than 4096 instructions.
+support bounded loops.
+
+Q: What are the verifier limits?
+--------------------------------
+A: The only limit known to the user space is BPF_MAXINSNS (4096).
+It's the maximum number of instructions that the unprivileged bpf
+program can have. The verifier has various internal limits.
+Like the maximum number of instructions that can be explored during
+program analysis. Currently, that limit is set to 1 million.
+Which essentially means that the largest program can consist
+of 1 million NOP instructions. There is a limit to the maximum number
+of subsequent branches, a limit to the number of nested bpf-to-bpf
+calls, a limit to the number of the verifier states per instruction,
+a limit to the number of maps used by the program.
+All these limits can be hit with a sufficiently complex program.
+There are also non-numerical limits that can cause the program
+to be rejected. The verifier used to recognize only pointer + constant
+expressions. Now it can recognize pointer + bounded_register.
+bpf_lookup_map_elem(key) had a requirement that 'key' must be
+a pointer to the stack. Now, 'key' can be a pointer to map value.
+The verifier is steadily getting 'smarter'. The limits are
+being removed. The only way to know that the program is going to
+be accepted by the verifier is to try to load it.
+The bpf development process guarantees that the future kernel
+versions will accept all bpf programs that were accepted by
+the earlier versions.
+
Instruction level questions
---------------------------
@@ -109,17 +134,12 @@ For example why BPF_JNE and other compare and jumps are not cpu-like?
A: This was necessary to avoid introducing flags into ISA which are
impossible to make generic and efficient across CPU architectures.
-Q: why BPF_DIV instruction doesn't map to x64 div?
+Q: Why BPF_DIV instruction doesn't map to x64 div?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A: Because if we picked one-to-one relationship to x64 it would have made
it more complicated to support on arm64 and other archs. Also it
needs div-by-zero runtime check.
-Q: why there is no BPF_SDIV for signed divide operation?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A: Because it would be rarely used. llvm errors in such case and
-prints a suggestion to use unsigned divide instead
-
Q: Why BPF has implicit prologue and epilogue?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A: Because architectures like sparc have register windows and in general
@@ -147,22 +167,57 @@ registers which makes BPF inefficient virtual machine for 32-bit
CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
be added to BPF in the future?
-A: NO. The first thing to improve performance on 32-bit archs is to teach
-LLVM to generate code that uses 32-bit subregisters. Then second step
-is to teach verifier to mark operations where zero-ing upper bits
-is unnecessary. Then JITs can take advantage of those markings and
-drastically reduce size of generated code and improve performance.
+A: NO.
+
+But some optimizations on zero-ing the upper 32 bits for BPF registers are
+available, and can be leveraged to improve the performance of JITed BPF
+programs for 32-bit architectures.
+
+Starting with version 7, LLVM is able to generate instructions that operate
+on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
+compiling a program. Furthermore, the verifier can now mark the
+instructions for which zero-ing the upper bits of the destination register
+is required, and insert an explicit zero-extension (zext) instruction
+(a mov32 variant). This means that for architectures without zext hardware
+support, the JIT back-ends do not need to clear the upper bits for
+subregisters written by alu32 instructions or narrow loads. Instead, the
+back-ends simply need to support code generation for that mov32 variant,
+and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
+enable zext insertion in the verifier).
+
+Note that it is possible for a JIT back-end to have partial hardware
+support for zext. In that case, if verifier zext insertion is enabled,
+it could lead to the insertion of unnecessary zext instructions. Such
+instructions could be removed by creating a simple peephole inside the JIT
+back-end: if one instruction has hardware support for zext and if the next
+instruction is an explicit zext, then the latter can be skipped when doing
+the code generation.
Q: Does BPF have a stable ABI?
------------------------------
A: YES. BPF instructions, arguments to BPF programs, set of helper
functions and their arguments, recognized return codes are all part
-of ABI. However when tracing programs are using bpf_probe_read() helper
-to walk kernel internal datastructures and compile with kernel
-internal headers these accesses can and will break with newer
-kernels. The union bpf_attr -> kern_version is checked at load time
-to prevent accidentally loading kprobe-based bpf programs written
-for a different kernel. Networking programs don't do kern_version check.
+of ABI. However there is one specific exception to tracing programs
+which are using helpers like bpf_probe_read() to walk kernel internal
+data structures and compile with kernel internal headers. Both of these
+kernel internals are subject to change and can break with newer kernels
+such that the program needs to be adapted accordingly.
+
+New BPF functionality is generally added through the use of kfuncs instead of
+new helpers. Kfuncs are not considered part of the stable API, and have their own
+lifecycle expectations as described in :ref:`BPF_kfunc_lifecycle_expectations`.
+
+Q: Are tracepoints part of the stable ABI?
+------------------------------------------
+A: NO. Tracepoints are tied to internal implementation details hence they are
+subject to change and can break with newer kernels. BPF programs need to change
+accordingly when this happens.
+
+Q: Are places where kprobes can attach part of the stable ABI?
+--------------------------------------------------------------
+A: NO. The places to which kprobes can attach are internal implementation
+details, which means that they are subject to change and can break with
+newer kernels. BPF programs need to change accordingly when this happens.
Q: How much stack space a BPF program uses?
-------------------------------------------
@@ -180,8 +235,8 @@ A: NO. Classic BPF programs are converted into extend BPF instructions.
Q: Can BPF call arbitrary kernel functions?
-------------------------------------------
-A: NO. BPF programs can only call a set of helper functions which
-is defined for every program type.
+A: NO. BPF programs can only call specific functions exposed as BPF helpers or
+kfuncs. The set of available functions is defined for every program type.
Q: Can BPF overwrite arbitrary kernel memory?
---------------------------------------------
@@ -202,20 +257,95 @@ program is loaded the kernel will print warning message, so
this helper is only useful for experiments and prototypes.
Tracing BPF programs are root only.
-Q: bpf_trace_printk() helper warning
-------------------------------------
-Q: When bpf_trace_printk() helper is used the kernel prints nasty
-warning message. Why is that?
-
-A: This is done to nudge program authors into better interfaces when
-programs need to pass data to user space. Like bpf_perf_event_output()
-can be used to efficiently stream data via perf ring buffer.
-BPF maps can be used for asynchronous data sharing between kernel
-and user space. bpf_trace_printk() should only be used for debugging.
-
Q: New functionality via kernel modules?
----------------------------------------
Q: Can BPF functionality such as new program or map types, new
helpers, etc be added out of kernel module code?
+A: Yes, through kfuncs and kptrs
+
+The core BPF functionality such as program types, maps and helpers cannot be
+added to by modules. However, modules can expose functionality to BPF programs
+by exporting kfuncs (which may return pointers to module-internal data
+structures as kptrs).
+
+Q: Directly calling kernel function is an ABI?
+----------------------------------------------
+Q: Some kernel functions (e.g. tcp_slow_start) can be called
+by BPF programs. Do these kernel functions become an ABI?
+
A: NO.
+
+The kernel function protos will change and the bpf programs will be
+rejected by the verifier. Also, for example, some of the bpf-callable
+kernel functions have already been used by other kernel tcp
+cc (congestion-control) implementations. If any of these kernel
+functions has changed, both the in-tree and out-of-tree kernel tcp cc
+implementations have to be changed. The same goes for the bpf
+programs and they have to be adjusted accordingly. See
+:ref:`BPF_kfunc_lifecycle_expectations` for details.
+
+Q: Attaching to arbitrary kernel functions is an ABI?
+-----------------------------------------------------
+Q: BPF programs can be attached to many kernel functions. Do these
+kernel functions become part of the ABI?
+
+A: NO.
+
+The kernel function prototypes will change, and BPF programs attaching to
+them will need to change. The BPF compile-once-run-everywhere (CO-RE)
+should be used in order to make it easier to adapt your BPF programs to
+different versions of the kernel.
+
+Q: Marking a function with BTF_ID makes that function an ABI?
+-------------------------------------------------------------
+A: NO.
+
+The BTF_ID macro does not cause a function to become part of the ABI
+any more than does the EXPORT_SYMBOL_GPL macro.
+
+Q: What is the compatibility story for special BPF types in map values?
+-----------------------------------------------------------------------
+Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
+values (when using BTF support for BPF maps). This allows to use helpers for
+such objects on these fields inside map values. Users are also allowed to embed
+pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the
+kernel preserve backwards compatibility for these features?
+
+A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
+NO, but see below.
+
+For struct types that have been added already, like bpf_spin_lock and bpf_timer,
+the kernel will preserve backwards compatibility, as they are part of UAPI.
+
+For kptrs, they are also part of UAPI, but only with respect to the kptr
+mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged
+pointer in your struct are NOT part of the UAPI contract. The supported types can
+and will change across kernel releases. However, operations like accessing kptr
+fields and bpf_kptr_xchg() helper will continue to be supported across kernel
+releases for the supported types.
+
+For any other supported struct type, unless explicitly stated in this document
+and added to bpf.h UAPI header, such types can and will arbitrarily change their
+size, type, and alignment, or any other user visible API or ABI detail across
+kernel releases. The users must adapt their BPF programs to the new changes and
+update them to make sure their programs continue to work correctly.
+
+NOTE: BPF subsystem specially reserves the 'bpf\_' prefix for type names, in
+order to introduce more special fields in the future. Hence, user programs must
+avoid defining types with 'bpf\_' prefix to not be broken in future releases.
+In other words, no backwards compatibility is guaranteed if one using a type
+in BTF with 'bpf\_' prefix.
+
+Q: What is the compatibility story for special BPF types in allocated objects?
+------------------------------------------------------------------------------
+Q: Same as above, but for allocated objects (i.e. objects allocated using
+bpf_obj_new for user defined types). Will the kernel preserve backwards
+compatibility for these features?
+
+A: NO.
+
+Unlike map value types, the API to work with allocated objects and any support
+for special fields inside them is exposed through kfuncs, and thus has the same
+lifecycle expectations as the kfuncs themselves. See
+:ref:`BPF_kfunc_lifecycle_expectations` for details.
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index c9856b927055..45bc5c5cd793 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -7,8 +7,8 @@ workflows related to reporting bugs, submitting patches, and queueing
patches for stable kernels.
For general information about submitting patches, please refer to
-`Documentation/process/`_. This document only describes additional specifics
-related to BPF.
+Documentation/process/submitting-patches.rst. This document only describes
+additional specifics related to BPF.
.. contents::
:local:
@@ -20,16 +20,16 @@ Reporting bugs
Q: How do I report bugs for BPF kernel code?
--------------------------------------------
A: Since all BPF kernel development as well as bpftool and iproute2 BPF
-loader development happens through the netdev kernel mailing list,
+loader development happens through the bpf kernel mailing list,
please report any found issues around BPF to the following mailing
list:
- netdev@vger.kernel.org
+ bpf@vger.kernel.org
This may also include issues related to XDP, BPF tracing, etc.
Given netdev has a high volume of traffic, please also add the BPF
-maintainers to Cc (from kernel MAINTAINERS_ file):
+maintainers to Cc (from kernel ``MAINTAINERS`` file):
* Alexei Starovoitov <ast@kernel.org>
* Daniel Borkmann <daniel@iogearbox.net>
@@ -44,19 +44,41 @@ is a guarantee that the reported issue will be overlooked.**
Submitting patches
==================
+Q: How do I run BPF CI on my changes before sending them out for review?
+------------------------------------------------------------------------
+A: BPF CI is GitHub based and hosted at https://github.com/kernel-patches/bpf.
+While GitHub also provides a CLI that can be used to accomplish the same
+results, here we focus on the UI based workflow.
+
+The following steps lay out how to start a CI run for your patches:
+
+- Create a fork of the aforementioned repository in your own account (one time
+ action)
+
+- Clone the fork locally, check out a new branch tracking either the bpf-next
+ or bpf branch, and apply your to-be-tested patches on top of it
+
+- Push the local branch to your fork and create a pull request against
+ kernel-patches/bpf's bpf-next_base or bpf_base branch, respectively
+
+Shortly after the pull request has been created, the CI workflow will run. Note
+that capacity is shared with patches submitted upstream being checked and so
+depending on utilization the run can take a while to finish.
+
+Note furthermore that both base branches (bpf-next_base and bpf_base) will be
+updated as patches are pushed to the respective upstream branches they track. As
+such, your patch set will automatically (be attempted to) be rebased as well.
+This behavior can result in a CI run being aborted and restarted with the new
+base line.
+
Q: To which mailing list do I need to submit my BPF patches?
------------------------------------------------------------
-A: Please submit your BPF patches to the netdev kernel mailing list:
+A: Please submit your BPF patches to the bpf kernel mailing list:
- netdev@vger.kernel.org
-
-Historically, BPF came out of networking and has always been maintained
-by the kernel networking community. Although these days BPF touches
-many other subsystems as well, the patches are still routed mainly
-through the networking community.
+ bpf@vger.kernel.org
In case your patch has changes in various different subsystems (e.g.
-tracing, security, etc), make sure to Cc the related kernel mailing
+networking, tracing, security, etc), make sure to Cc the related kernel mailing
lists and maintainers from there as well, so they are able to review
the changes and provide their Acked-by's to the patches.
@@ -65,13 +87,13 @@ Q: Where can I find patches currently under discussion for BPF subsystem?
A: All patches that are Cc'ed to netdev are queued for review under netdev
patchwork project:
- http://patchwork.ozlabs.org/project/netdev/list/
+ https://patchwork.kernel.org/project/netdevbpf/list/
Those patches which target BPF, are assigned to a 'bpf' delegate for
further processing from BPF maintainers. The current queue with
patches under review can be found at:
- https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+ https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
Once the patches have been reviewed by the BPF community as a whole
and approved by the BPF maintainers, their status in patchwork will be
@@ -106,7 +128,8 @@ into the bpf-next tree will make their way into net-next tree. net and
net-next are both run by David S. Miller. From there, they will go
into the kernel mainline tree run by Linus Torvalds. To read up on the
process of net and net-next being merged into the mainline tree, see
-the :ref:`netdev-FAQ`
+the documentation on netdev subsystem at
+Documentation/process/maintainer-netdev.rst.
@@ -125,7 +148,8 @@ request)::
Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
---------------------------------------------------------------------------------
-A: The process is the very same as described in the :ref:`netdev-FAQ`,
+A: The process is the very same as described in the netdev subsystem
+documentation at Documentation/process/maintainer-netdev.rst,
so please read up on it. The subject line must indicate whether the
patch is a fix or rather "next-like" content in order to let the
maintainers know whether it is targeted at bpf or bpf-next.
@@ -154,7 +178,7 @@ In case the patch or patch series has to be reworked and sent out
again in a second or later revision, it is also required to add a
version number (``v2``, ``v3``, ...) into the subject prefix::
- git format-patch --subject-prefix='PATCH net-next v2' start..finish
+ git format-patch --subject-prefix='PATCH bpf-next v2' start..finish
When changes have been requested to the patch series, always send the
whole patch series again with the feedback incorporated (never send
@@ -168,7 +192,7 @@ a BPF point of view.
Be aware that this is not a final verdict that the patch will
automatically get accepted into net or net-next trees eventually:
-On the netdev kernel mailing list reviews can come in at any point
+On the bpf kernel mailing list reviews can come in at any point
in time. If discussions around a patch conclude that they cannot
get included as-is, we will either apply a follow-up fix or drop
them from the trees entirely. Therefore, we also reserve to rebase
@@ -184,8 +208,9 @@ ii) run extensive BPF test suite and
Once the BPF pull request was accepted by David S. Miller, then
the patches end up in net or net-next tree, respectively, and
make their way from there further into mainline. Again, see the
-:ref:`netdev-FAQ` for additional information e.g. on how often they are
-merged to mainline.
+documentation for netdev subsystem at
+Documentation/process/maintainer-netdev.rst for additional information
+e.g. on how often they are merged to mainline.
Q: How long do I need to wait for feedback on my BPF patches?
-------------------------------------------------------------
@@ -208,7 +233,8 @@ Q: Are patches applied to bpf-next when the merge window is open?
-----------------------------------------------------------------
A: For the time when the merge window is open, bpf-next will not be
processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the :ref:`netdev-FAQ` about further details.
+so feel free to read up on the netdev docs at
+Documentation/process/maintainer-netdev.rst about further details.
During those two weeks of merge window, we might ask you to resend
your patch series once bpf-next is open again. Once Linus released
@@ -239,11 +265,11 @@ be subject to change.
Q: samples/bpf preference vs selftests?
---------------------------------------
-Q: When should I add code to `samples/bpf/`_ and when to BPF kernel
-selftests_ ?
+Q: When should I add code to ``samples/bpf/`` and when to BPF kernel
+selftests_?
A: In general, we prefer additions to BPF kernel selftests_ rather than
-`samples/bpf/`_. The rationale is very simple: kernel selftests are
+``samples/bpf/``. The rationale is very simple: kernel selftests are
regularly run by various bots to test for kernel regressions.
The more test cases we add to BPF selftests, the better the coverage
@@ -251,9 +277,9 @@ and the less likely it is that those could accidentally break. It is
not that BPF kernel selftests cannot demo how a specific feature can
be used.
-That said, `samples/bpf/`_ may be a good place for people to get started,
+That said, ``samples/bpf/`` may be a good place for people to get started,
so it might be advisable that simple demos of features could go into
-`samples/bpf/`_, but advanced functional and corner-case testing rather
+``samples/bpf/``, but advanced functional and corner-case testing rather
into kernel selftests.
If your sample looks like a test case, then go for BPF kernel selftests
@@ -356,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted
into the Linux kernel, please implement support into LLVM's BPF back
end. See LLVM_ section below for further information.
+Q: What "BPF_INTERNAL" symbol namespace is for?
+-----------------------------------------------
+A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure
+like preload kernel modules with light skeleton. Most symbols outside
+of BPF_INTERNAL are not expected to be used by code outside of BPF either.
+Symbols may lack the designation because they predate the namespaces,
+or due to an oversight.
+
Stable submission
=================
@@ -372,7 +406,8 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
netdev@vger.kernel.org
The process in general is the same as on netdev itself, see also the
-:ref:`netdev-FAQ`.
+the documentation on networking subsystem at
+Documentation/process/maintainer-netdev.rst.
Q: Do you also backport to kernels not currently maintained as stable?
----------------------------------------------------------------------
@@ -388,7 +423,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
What should I do?
A: The same rules apply as with netdev patch submissions in general, see
-the :ref:`netdev-FAQ`.
+the netdev docs at Documentation/process/maintainer-netdev.rst.
Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
ask the BPF maintainers to queue the patches instead. This can be done
@@ -439,8 +474,36 @@ needed::
$ sudo make run_tests
-See the kernels selftest `Documentation/dev-tools/kselftest.rst`_
-document for further documentation.
+See :doc:`kernel selftest documentation </dev-tools/kselftest>`
+for details.
+
+To maximize the number of tests passing, the .config of the kernel
+under test should match the config file fragment in
+tools/testing/selftests/bpf as closely as possible.
+
+Finally to ensure support for latest BPF Type Format features -
+discussed in Documentation/bpf/btf.rst - pahole version 1.16
+is required for kernels built with CONFIG_DEBUG_INFO_BTF=y.
+pahole is delivered in the dwarves package or can be built
+from source at
+
+https://github.com/acmel/dwarves
+
+pahole starts to use libbpf definitions and APIs since v1.13 after the
+commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf").
+It works well with the git repository because the libbpf submodule will
+use "git submodule update --init --recursive" to update.
+
+Unfortunately, the default github release source code does not contain
+libbpf submodule source code and this will cause build issues, the tarball
+from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with
+github, you can get the source tarball with corresponding libbpf submodule
+codes from
+
+https://fedorapeople.org/~acme/dwarves
+
+Some distros have pahole version 1.16 packaged already, e.g.
+Fedora, Gentoo.
Q: Which BPF kernel selftests version should I run my kernel against?
---------------------------------------------------------------------
@@ -469,17 +532,18 @@ LLVM's static compiler lists the supported targets through
$ llc --version
LLVM (http://llvm.org/):
- LLVM version 6.0.0svn
+ LLVM version 10.0.0
Optimized build.
Default target: x86_64-unknown-linux-gnu
Host CPU: skylake
Registered Targets:
- bpf - BPF (host endian)
- bpfeb - BPF (big endian)
- bpfel - BPF (little endian)
- x86 - 32-bit X86: Pentium-Pro and above
- x86-64 - 64-bit X86: EM64T and AMD64
+ aarch64 - AArch64 (little endian)
+ bpf - BPF (host endian)
+ bpfeb - BPF (big endian)
+ bpfel - BPF (little endian)
+ x86 - 32-bit X86: Pentium-Pro and above
+ x86-64 - 64-bit X86: EM64T and AMD64
For developers in order to utilize the latest features added to LLVM's
BPF back end, it is advisable to run the latest LLVM releases. Support
@@ -490,23 +554,30 @@ All LLVM releases can be found at: http://releases.llvm.org/
Q: Got it, so how do I build LLVM manually anyway?
--------------------------------------------------
-A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
-that set up, proceed with building the latest LLVM and clang version
+A: We recommend that developers who want the fastest incremental builds
+use the Ninja build system, you can find it in your system's package
+manager, usually the package is ninja or ninja-build.
+
+You need ninja, cmake and gcc-c++ as build requisites for LLVM. Once you
+have that set up, proceed with building the latest LLVM and clang version
from the git repositories::
- $ git clone http://llvm.org/git/llvm.git
- $ cd llvm/tools
- $ git clone --depth 1 http://llvm.org/git/clang.git
- $ cd ..; mkdir build; cd build
- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
- -DBUILD_SHARED_LIBS=OFF \
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ mkdir -p llvm-project/llvm/build
+ $ cd llvm-project/llvm/build
+ $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+ -DLLVM_ENABLE_PROJECTS="clang" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_BUILD_RUNTIME=OFF
- $ make -j $(getconf _NPROCESSORS_ONLN)
+ $ ninja
The built binaries can then be found in the build/bin/ directory, where
you can point the PATH variable to.
+Set ``-DLLVM_TARGETS_TO_BUILD`` equal to the target you wish to build, you
+will find a full list of targets within the llvm-project/llvm/lib/Target
+directory.
+
Q: Reporting LLVM BPF issues
----------------------------
Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
@@ -540,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate
it into LLVM?
A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
-the selection of BPF instruction set extensions. By default the
-``generic`` processor target is used, which is the base instruction set
-(v1) of BPF.
+the selection of BPF instruction set extensions. Before llvm version 20,
+the ``generic`` processor target is used, which is the base instruction
+set (v1) of BPF. Since llvm 20, the default processor target has changed
+to instruction set v3.
LLVM has an option to select ``-mcpu=probe`` where it will probe the host
kernel for supported BPF instruction set extensions and selects the
@@ -572,12 +644,12 @@ test coverage.
Q: clang flag for target bpf?
-----------------------------
-Q: In some cases clang flag ``-target bpf`` is used but in other cases the
+Q: In some cases clang flag ``--target=bpf`` is used but in other cases the
default clang target, which matches the underlying architecture, is used.
What is the difference and when I should use which?
A: Although LLVM IR generation and optimization try to stay architecture
-independent, ``-target <arch>`` still has some impact on generated code:
+independent, ``--target=<arch>`` still has some impact on generated code:
- BPF program may recursively include header file(s) with file scope
inline assembly codes. The default target can handle this well,
@@ -595,7 +667,7 @@ independent, ``-target <arch>`` still has some impact on generated code:
The clang option ``-fno-jump-tables`` can be used to disable
switch table generation.
-- For clang ``-target bpf``, it is guaranteed that pointer or long /
+- For clang ``--target=bpf``, it is guaranteed that pointer or long /
unsigned long types will always have a width of 64 bit, no matter
whether underlying clang binary or default target (or kernel) is
32 bit. However, when native clang target is used, then it will
@@ -605,7 +677,7 @@ independent, ``-target <arch>`` still has some impact on generated code:
while the BPF LLVM back end still operates in 64 bit. The native
target is mostly needed in tracing for the case of walking ``pt_regs``
or other kernel structures where CPU's register width matters.
- Otherwise, ``clang -target bpf`` is generally recommended.
+ Otherwise, ``clang --target=bpf`` is generally recommended.
You should use default target when:
@@ -622,16 +694,11 @@ when:
into these structures is verified by the BPF verifier and may result
in verification failures if the native architecture is not aligned with
the BPF architecture, e.g. 64-bit. An example of this is
- BPF_PROG_TYPE_SK_MSG require ``-target bpf``
+ BPF_PROG_TYPE_SK_MSG require ``--target=bpf``
.. Links
-.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _MAINTAINERS: ../../MAINTAINERS
-.. _netdev-FAQ: ../networking/netdev-FAQ.rst
-.. _samples/bpf/: ../../samples/bpf/
-.. _selftests: ../../tools/testing/selftests/bpf/
-.. _Documentation/dev-tools/kselftest.rst:
- https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
+.. _selftests:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
Happy BPF hacking!
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
new file mode 100644
index 000000000000..189e3ec1c6c8
--- /dev/null
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -0,0 +1,589 @@
+=============
+BPF Iterators
+=============
+
+--------
+Overview
+--------
+
+BPF supports two separate entities collectively known as "BPF iterators": BPF
+iterator *program type* and *open-coded* BPF iterators. The former is
+a stand-alone BPF program type which, when attached and activated by user,
+will be called once for each entity (task_struct, cgroup, etc) that is being
+iterated. The latter is a set of BPF-side APIs implementing iterator
+functionality and available across multiple BPF program types. Open-coded
+iterators provide similar functionality to BPF iterator programs, but gives
+more flexibility and control to all other BPF program types. BPF iterator
+programs, on the other hand, can be used to implement anonymous or BPF
+FS-mounted special files, whose contents are generated by attached BPF iterator
+program, backed by seq_file functionality. Both are useful depending on
+specific needs.
+
+When adding a new BPF iterator program, it is expected that similar
+functionality will be added as open-coded iterator for maximum flexibility.
+It's also expected that iteration logic and code will be maximally shared and
+reused between two iterator API surfaces.
+
+------------------------
+Open-coded BPF Iterators
+------------------------
+
+Open-coded BPF iterators are implemented as tightly-coupled trios of kfuncs
+(constructor, next element fetch, destructor) and iterator-specific type
+describing on-the-stack iterator state, which is guaranteed by the BPF
+verifier to not be tampered with outside of the corresponding
+constructor/destructor/next APIs.
+
+Each kind of open-coded BPF iterator has its own associated
+struct bpf_iter_<type>, where <type> denotes a specific type of iterator.
+bpf_iter_<type> state needs to live on BPF program stack, so make sure it's
+small enough to fit on BPF stack. For performance reasons its best to avoid
+dynamic memory allocation for iterator state and size the state struct big
+enough to fit everything necessary. But if necessary, dynamic memory
+allocation is a way to bypass BPF stack limitations. Note, state struct size
+is part of iterator's user-visible API, so changing it will break backwards
+compatibility, so be deliberate about designing it.
+
+All kfuncs (constructor, next, destructor) have to be named consistently as
+bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents iterator
+type, and iterator state should be represented as a matching
+`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
+a pointer to this `struct bpf_iter_<type>` as the very first argument.
+
+Additionally:
+ - Constructor, i.e., `bpf_iter_<type>_new()`, can have arbitrary extra
+ number of arguments. Return type is not enforced either.
+ - Next method, i.e., `bpf_iter_<type>_next()`, has to return a pointer
+ type and should have exactly one argument: `struct bpf_iter_<type> *`
+ (const/volatile/restrict and typedefs are ignored).
+ - Destructor, i.e., `bpf_iter_<type>_destroy()`, should return void and
+ should have exactly one argument, similar to the next method.
+ - `struct bpf_iter_<type>` size is enforced to be positive and
+ a multiple of 8 bytes (to fit stack slots correctly).
+
+Such strictness and consistency allows to build generic helpers abstracting
+important, but boilerplate, details to be able to use open-coded iterators
+effectively and ergonomically (see libbpf's bpf_for_each() macro). This is
+enforced at kfunc registration point by the kernel.
+
+Constructor/next/destructor implementation contract is as follows:
+ - constructor, `bpf_iter_<type>_new()`, always initializes iterator state on
+ the stack. If any of the input arguments are invalid, constructor should
+ make sure to still initialize it such that subsequent next() calls will
+ return NULL. I.e., on error, *return error and construct empty iterator*.
+ Constructor kfunc is marked with KF_ITER_NEW flag.
+
+ - next method, `bpf_iter_<type>_next()`, accepts pointer to iterator state
+ and produces an element. Next method should always return a pointer. The
+ contract between BPF verifier is that next method *guarantees* that it
+ will eventually return NULL when elements are exhausted. Once NULL is
+ returned, subsequent next calls *should keep returning NULL*. Next method
+ is marked with KF_ITER_NEXT (and should also have KF_RET_NULL as
+ NULL-returning kfunc, of course).
+
+ - destructor, `bpf_iter_<type>_destroy()`, is always called once. Even if
+ constructor failed or next returned nothing. Destructor frees up any
+ resources and marks stack space used by `struct bpf_iter_<type>` as usable
+ for something else. Destructor is marked with KF_ITER_DESTROY flag.
+
+Any open-coded BPF iterator implementation has to implement at least these
+three methods. It is enforced that for any given type of iterator only
+applicable constructor/destructor/next are callable. I.e., verifier ensures
+you can't pass number iterator state into, say, cgroup iterator's next method.
+
+From a 10,000-feet BPF verification point of view, next methods are the points
+of forking a verification state, which are conceptually similar to what
+verifier is doing when validating conditional jumps. Verifier is branching out
+`call bpf_iter_<type>_next` instruction and simulates two outcomes: NULL
+(iteration is done) and non-NULL (new element is returned). NULL is simulated
+first and is supposed to reach exit without looping. After that non-NULL case
+is validated and it either reaches exit (for trivial examples with no real
+loop), or reaches another `call bpf_iter_<type>_next` instruction with the
+state equivalent to already (partially) validated one. State equivalency at
+that point means we technically are going to be looping forever without
+"breaking out" out of established "state envelope" (i.e., subsequent
+iterations don't add any new knowledge or constraints to the verifier state,
+so running 1, 2, 10, or a million of them doesn't matter). But taking into
+account the contract stating that iterator next method *has to* return NULL
+eventually, we can conclude that loop body is safe and will eventually
+terminate. Given we validated logic outside of the loop (NULL case), and
+concluded that loop body is safe (though potentially looping many times),
+verifier can claim safety of the overall program logic.
+
+------------------------
+BPF Iterators Motivation
+------------------------
+
+There are a few existing ways to dump kernel data into user space. The most
+popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
+all tcp6 sockets in the system, and ``cat /proc/net/netlink`` dumps all netlink
+sockets in the system. However, their output format tends to be fixed, and if
+users want more information about these sockets, they have to patch the kernel,
+which often takes time to publish upstream and release. The same is true for popular
+tools like `ss <https://man7.org/linux/man-pages/man8/ss.8.html>`_ where any
+additional information needs a kernel patch.
+
+To solve this problem, the `drgn
+<https://www.kernel.org/doc/html/latest/bpf/drgn.html>`_ tool is often used to
+dig out the kernel data with no kernel change. However, the main drawback for
+drgn is performance, as it cannot do pointer tracing inside the kernel. In
+addition, drgn cannot validate a pointer value and may read invalid data if the
+pointer becomes invalid inside the kernel.
+
+The BPF iterator solves the above problem by providing flexibility on what data
+(e.g., tasks, bpf_maps, etc.) to collect by calling BPF programs for each kernel
+data object.
+
+----------------------
+How BPF Iterators Work
+----------------------
+
+A BPF iterator is a type of BPF program that allows users to iterate over
+specific types of kernel objects. Unlike traditional BPF tracing programs that
+allow users to define callbacks that are invoked at particular points of
+execution in the kernel, BPF iterators allow users to define callbacks that
+should be executed for every entry in a variety of kernel data structures.
+
+For example, users can define a BPF iterator that iterates over every task on
+the system and dumps the total amount of CPU runtime currently used by each of
+them. Another BPF task iterator may instead dump the cgroup information for each
+task. Such flexibility is the core value of BPF iterators.
+
+A BPF program is always loaded into the kernel at the behest of a user space
+process. A user space process loads a BPF program by opening and initializing
+the program skeleton as required and then invoking a syscall to have the BPF
+program verified and loaded by the kernel.
+
+In traditional tracing programs, a program is activated by having user space
+obtain a ``bpf_link`` to the program with ``bpf_program__attach()``. Once
+activated, the program callback will be invoked whenever the tracepoint is
+triggered in the main kernel. For BPF iterator programs, a ``bpf_link`` to the
+program is obtained using ``bpf_link_create()``, and the program callback is
+invoked by issuing system calls from user space.
+
+Next, let us see how you can use the iterators to iterate on kernel objects and
+read data.
+
+------------------------
+How to Use BPF iterators
+------------------------
+
+BPF selftests are a great resource to illustrate how to use the iterators. In
+this section, we’ll walk through a BPF selftest which shows how to load and use
+a BPF iterator program. To begin, we’ll look at `bpf_iter.c
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/prog_tests/bpf_iter.c>`_,
+which illustrates how to load and trigger BPF iterators on the user space side.
+Later, we’ll look at a BPF program that runs in kernel space.
+
+Loading a BPF iterator in the kernel from user space typically involves the
+following steps:
+
+* The BPF program is loaded into the kernel through ``libbpf``. Once the kernel
+ has verified and loaded the program, it returns a file descriptor (fd) to user
+ space.
+* Obtain a ``link_fd`` to the BPF program by calling the ``bpf_link_create()``
+ specified with the BPF program file descriptor received from the kernel.
+* Next, obtain a BPF iterator file descriptor (``bpf_iter_fd``) by calling the
+ ``bpf_iter_create()`` specified with the ``bpf_link`` received from Step 2.
+* Trigger the iteration by calling ``read(bpf_iter_fd)`` until no data is
+ available.
+* Close the iterator fd using ``close(bpf_iter_fd)``.
+* If needed to reread the data, get a new ``bpf_iter_fd`` and do the read again.
+
+The following are a few examples of selftest BPF iterator programs:
+
+* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
+* `bpf_iter_task_vmas.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c>`_
+* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
+
+Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
+
+Here is the definition of ``bpf_iter__task_file`` in `vmlinux.h
+<https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html#btf>`_.
+Any struct name in ``vmlinux.h`` in the format ``bpf_iter__<iter_name>``
+represents a BPF iterator. The suffix ``<iter_name>`` represents the type of
+iterator.
+
+::
+
+ struct bpf_iter__task_file {
+ union {
+ struct bpf_iter_meta *meta;
+ };
+ union {
+ struct task_struct *task;
+ };
+ u32 fd;
+ union {
+ struct file *file;
+ };
+ };
+
+In the above code, the field 'meta' contains the metadata, which is the same for
+all BPF iterator programs. The rest of the fields are specific to different
+iterators. For example, for task_file iterators, the kernel layer provides the
+'task', 'fd' and 'file' field values. The 'task' and 'file' are `reference
+counted
+<https://facebookmicrosites.github.io/bpf/blog/2018/08/31/object-lifetime.html#file-descriptors-and-reference-counters>`_,
+so they won't go away when the BPF program runs.
+
+Here is a snippet from the ``bpf_iter_task_file.c`` file:
+
+::
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
+
+ if (task == NULL || file == NULL)
+ return 0;
+
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
+ BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
+
+ if (last_tgid != task->tgid) {
+ last_tgid = task->tgid;
+ unique_tgid_count++;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+In the above example, the section name ``SEC(iter/task_file)``, indicates that
+the program is a BPF iterator program to iterate all files from all tasks. The
+context of the program is ``bpf_iter__task_file`` struct.
+
+The user space program invokes the BPF iterator program running in the kernel
+by issuing a ``read()`` syscall. Once invoked, the BPF
+program can export data to user space using a variety of BPF helper functions.
+You can use either ``bpf_seq_printf()`` (and BPF_SEQ_PRINTF helper macro) or
+``bpf_seq_write()`` function based on whether you need formatted output or just
+binary data, respectively. For binary-encoded data, the user space applications
+can process the data from ``bpf_seq_write()`` as needed. For the formatted data,
+you can use ``cat <path>`` to print the results similar to ``cat
+/proc/net/netlink`` after pinning the BPF iterator to the bpffs mount. Later,
+use ``rm -f <path>`` to remove the pinned iterator.
+
+For example, you can use the following command to create a BPF iterator from the
+``bpf_iter_ipv6_route.o`` object file and pin it to the ``/sys/fs/bpf/my_route``
+path:
+
+::
+
+ $ bpftool iter pin ./bpf_iter_ipv6_route.o /sys/fs/bpf/my_route
+
+And then print out the results using the following command:
+
+::
+
+ $ cat /sys/fs/bpf/my_route
+
+
+-------------------------------------------------------
+Implement Kernel Support for BPF Iterator Program Types
+-------------------------------------------------------
+
+To implement a BPF iterator in the kernel, the developer must make a one-time
+change to the following key data structure defined in the `bpf.h
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/include/linux/bpf.h>`_
+file.
+
+::
+
+ struct bpf_iter_reg {
+ const char *target;
+ bpf_iter_attach_target_t attach_target;
+ bpf_iter_detach_target_t detach_target;
+ bpf_iter_show_fdinfo_t show_fdinfo;
+ bpf_iter_fill_link_info_t fill_link_info;
+ bpf_iter_get_func_proto_t get_func_proto;
+ u32 ctx_arg_info_size;
+ u32 feature;
+ struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
+ const struct bpf_iter_seq_info *seq_info;
+ };
+
+After filling the data structure fields, call ``bpf_iter_reg_target()`` to
+register the iterator to the main BPF iterator subsystem.
+
+The following is the breakdown for each field in struct ``bpf_iter_reg``.
+
+.. list-table::
+ :widths: 25 50
+ :header-rows: 1
+
+ * - Fields
+ - Description
+ * - target
+ - Specifies the name of the BPF iterator. For example: ``bpf_map``,
+ ``bpf_map_elem``. The name should be different from other ``bpf_iter`` target names in the kernel.
+ * - attach_target and detach_target
+ - Allows for target specific ``link_create`` action since some targets
+ may need special processing. Called during the user space link_create stage.
+ * - show_fdinfo and fill_link_info
+ - Called to fill target specific information when user tries to get link
+ info associated with the iterator.
+ * - get_func_proto
+ - Permits a BPF iterator to access BPF helpers specific to the iterator.
+ * - ctx_arg_info_size and ctx_arg_info
+ - Specifies the verifier states for BPF program arguments associated with
+ the bpf iterator.
+ * - feature
+ - Specifies certain action requests in the kernel BPF iterator
+ infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
+ that the kernel function cond_resched() is called to avoid other kernel
+ subsystem (e.g., rcu) misbehaving.
+ * - seq_info
+ - Specifies the set of seq operations for the BPF iterator and helpers to
+ initialize/free the private data for the corresponding ``seq_file``.
+
+`Click here
+<https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_
+to see an implementation of the ``task_vma`` BPF iterator in the kernel.
+
+---------------------------------
+Parameterizing BPF Task Iterators
+---------------------------------
+
+By default, BPF iterators walk through all the objects of the specified types
+(processes, cgroups, maps, etc.) across the entire system to read relevant
+kernel data. But often, there are cases where we only care about a much smaller
+subset of iterable kernel objects, such as only iterating tasks within a
+specific process. Therefore, BPF iterator programs support filtering out objects
+from iteration by allowing user space to configure the iterator program when it
+is attached.
+
+--------------------------
+BPF Task Iterator Program
+--------------------------
+
+The following code is a BPF iterator program to print files and task information
+through the ``seq_file`` of the iterator. It is a standard BPF iterator program
+that visits every file of an iterator. We will use this BPF program in our
+example later.
+
+::
+
+ #include <vmlinux.h>
+ #include <bpf/bpf_helpers.h>
+
+ char _license[] SEC("license") = "GPL";
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
+ if (task == NULL || file == NULL)
+ return 0;
+ if (ctx->meta->seq_num == 0) {
+ BPF_SEQ_PRINTF(seq, " tgid pid fd file\n");
+ }
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+----------------------------------------
+Creating a File Iterator with Parameters
+----------------------------------------
+
+Now, let us look at how to create an iterator that includes only files of a
+process.
+
+First, fill the ``bpf_iter_attach_opts`` struct as shown below:
+
+::
+
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+``linfo.task.pid``, if it is non-zero, directs the kernel to create an iterator
+that only includes opened files for the process with the specified ``pid``. In
+this example, we will only be iterating files for our process. If
+``linfo.task.pid`` is zero, the iterator will visit every opened file of every
+process. Similarly, ``linfo.task.tid`` directs the kernel to create an iterator
+that visits opened files of a specific thread, not a process. In this example,
+``linfo.task.tid`` is different from ``linfo.task.pid`` only if the thread has a
+separate file descriptor table. In most circumstances, all process threads share
+a single file descriptor table.
+
+Now, in the userspace program, pass the pointer of struct to the
+``bpf_program__attach_iter()``.
+
+::
+
+ link = bpf_program__attach_iter(prog, &opts);
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+
+If both *tid* and *pid* are zero, an iterator created from this struct
+``bpf_iter_attach_opts`` will include every opened file of every task in the
+system (in the namespace, actually.) It is the same as passing a NULL as the
+second argument to ``bpf_program__attach_iter()``.
+
+The whole program looks like the following code:
+
+::
+
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <bpf/bpf.h>
+ #include <bpf/libbpf.h>
+ #include "bpf_iter_task_ex.skel.h"
+
+ static int do_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
+ {
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd = -1, len;
+ int ret = 0;
+
+ link = bpf_program__attach_iter(prog, opts);
+ if (!link) {
+ fprintf(stderr, "bpf_program__attach_iter() fails\n");
+ return -1;
+ }
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (iter_fd < 0) {
+ fprintf(stderr, "bpf_iter_create() fails\n");
+ ret = -1;
+ goto free_link;
+ }
+ /* not check contents, but ensure read() ends without error */
+ while ((len = read(iter_fd, buf, sizeof(buf) - 1)) > 0) {
+ buf[len] = 0;
+ printf("%s", buf);
+ }
+ printf("\n");
+ free_link:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ return 0;
+ }
+
+ static void test_task_file(void)
+ {
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_task_ex *skel;
+ union bpf_iter_link_info linfo;
+ skel = bpf_iter_task_ex__open_and_load();
+ if (skel == NULL)
+ return;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ printf("PID %d\n", getpid());
+ do_read_opts(skel->progs.dump_task_file, &opts);
+ bpf_iter_task_ex__destroy(skel);
+ }
+
+ int main(int argc, const char * const * argv)
+ {
+ test_task_file();
+ return 0;
+ }
+
+The following lines are the output of the program.
+::
+
+ PID 1859
+
+ tgid pid fd file
+ 1859 1859 0 ffffffff82270aa0
+ 1859 1859 1 ffffffff82270aa0
+ 1859 1859 2 ffffffff82270aa0
+ 1859 1859 3 ffffffff82272980
+ 1859 1859 4 ffffffff8225e120
+ 1859 1859 5 ffffffff82255120
+ 1859 1859 6 ffffffff82254f00
+ 1859 1859 7 ffffffff82254d80
+ 1859 1859 8 ffffffff8225abe0
+
+------------------
+Without Parameters
+------------------
+
+Let us look at how a BPF iterator without parameters skips files of other
+processes in the system. In this case, the BPF program has to check the pid or
+the tid of tasks, or it will receive every opened file in the system (in the
+current *pid* namespace, actually). So, we usually add a global variable in the
+BPF program to pass a *pid* to the BPF program.
+
+The BPF program would look like the following block.
+
+ ::
+
+ ......
+ int target_pid = 0;
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ ......
+ if (task->tgid != target_pid) /* Check task->pid instead to check thread IDs */
+ return 0;
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+The user space program would look like the following block:
+
+ ::
+
+ ......
+ static void test_task_file(void)
+ {
+ ......
+ skel = bpf_iter_task_ex__open_and_load();
+ if (skel == NULL)
+ return;
+ skel->bss->target_pid = getpid(); /* process ID. For thread id, use gettid() */
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ ......
+ }
+
+``target_pid`` is a global variable in the BPF program. The user space program
+should initialize the variable with a process ID to skip opened files of other
+processes in the BPF program. When you parametrize a BPF iterator, the iterator
+calls the BPF program fewer times which can save significant resources.
+
+---------------------------
+Parametrizing VMA Iterators
+---------------------------
+
+By default, a BPF VMA iterator includes every VMA in every process. However,
+you can still specify a process or a thread to include only its VMAs. Unlike
+files, a thread can not have a separate address space (since Linux 2.6.0-test6).
+Here, using *tid* makes no difference from using *pid*.
+
+----------------------------
+Parametrizing Task Iterators
+----------------------------
+
+A BPF task iterator with *pid* includes all tasks (threads) of a process. The
+BPF program receives these tasks one after another. You can specify a BPF task
+iterator with *tid* parameter to include only the tasks that match the given
+*tid*.
diff --git a/Documentation/bpf/bpf_licensing.rst b/Documentation/bpf/bpf_licensing.rst
new file mode 100644
index 000000000000..b19c433f41d2
--- /dev/null
+++ b/Documentation/bpf/bpf_licensing.rst
@@ -0,0 +1,92 @@
+=============
+BPF licensing
+=============
+
+Background
+==========
+
+* Classic BPF was BSD licensed
+
+"BPF" was originally introduced as BSD Packet Filter in
+http://www.tcpdump.org/papers/bpf-usenix93.pdf. The corresponding instruction
+set and its implementation came from BSD with BSD license. That original
+instruction set is now known as "classic BPF".
+
+However an instruction set is a specification for machine-language interaction,
+similar to a programming language. It is not a code. Therefore, the
+application of a BSD license may be misleading in a certain context, as the
+instruction set may enjoy no copyright protection.
+
+* eBPF (extended BPF) instruction set continues to be BSD
+
+In 2014, the classic BPF instruction set was significantly extended. We
+typically refer to this instruction set as eBPF to disambiguate it from cBPF.
+The eBPF instruction set is still BSD licensed.
+
+Implementations of eBPF
+=======================
+
+Using the eBPF instruction set requires implementing code in both kernel space
+and user space.
+
+In Linux Kernel
+---------------
+
+The reference implementations of the eBPF interpreter and various just-in-time
+compilers are part of Linux and are GPLv2 licensed. The implementation of
+eBPF helper functions is also GPLv2 licensed. Interpreters, JITs, helpers,
+and verifiers are called eBPF runtime.
+
+In User Space
+-------------
+
+There are also implementations of eBPF runtime (interpreter, JITs, helper
+functions) under
+Apache2 (https://github.com/iovisor/ubpf),
+MIT (https://github.com/qmonnet/rbpf), and
+BSD (https://github.com/DPDK/dpdk/blob/main/lib/librte_bpf).
+
+In HW
+-----
+
+The HW can choose to execute eBPF instruction natively and provide eBPF runtime
+in HW or via the use of implementing firmware with a proprietary license.
+
+In other operating systems
+--------------------------
+
+Other kernels or user space implementations of eBPF instruction set and runtime
+can have proprietary licenses.
+
+Using BPF programs in the Linux kernel
+======================================
+
+Linux Kernel (while being GPLv2) allows linking of proprietary kernel modules
+under these rules:
+Documentation/process/license-rules.rst
+
+When a kernel module is loaded, the linux kernel checks which functions it
+intends to use. If any function is marked as "GPL only," the corresponding
+module or program has to have GPL compatible license.
+
+Loading BPF program into the Linux kernel is similar to loading a kernel
+module. BPF is loaded at run time and not statically linked to the Linux
+kernel. BPF program loading follows the same license checking rules as kernel
+modules. BPF programs can be proprietary if they don't use "GPL only" BPF
+helper functions.
+
+Further, some BPF program types - Linux Security Modules (LSM) and TCP
+Congestion Control (struct_ops), as of Aug 2021 - are required to be GPL
+compatible even if they don't use "GPL only" helper functions directly. The
+registration step of LSM and TCP congestion control modules of the Linux
+kernel is done through EXPORT_SYMBOL_GPL kernel functions. In that sense LSM
+and struct_ops BPF programs are implicitly calling "GPL only" functions.
+The same restriction applies to BPF programs that call kernel functions
+directly via unstable interface also known as "kfunc".
+
+Packaging BPF programs with user space applications
+====================================================
+
+Generally, proprietary-licensed applications and GPL licensed BPF programs
+written for the Linux kernel in the same package can co-exist because they are
+separate executable processes. This applies to both cBPF and eBPF programs.
diff --git a/Documentation/bpf/bpf_prog_run.rst b/Documentation/bpf/bpf_prog_run.rst
new file mode 100644
index 000000000000..4868c909df5c
--- /dev/null
+++ b/Documentation/bpf/bpf_prog_run.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Running BPF programs from userspace
+===================================
+
+This document describes the ``BPF_PROG_RUN`` facility for running BPF programs
+from userspace.
+
+.. contents::
+ :local:
+ :depth: 2
+
+
+Overview
+--------
+
+The ``BPF_PROG_RUN`` command can be used through the ``bpf()`` syscall to
+execute a BPF program in the kernel and return the results to userspace. This
+can be used to unit test BPF programs against user-supplied context objects, and
+as way to explicitly execute programs in the kernel for their side effects. The
+command was previously named ``BPF_PROG_TEST_RUN``, and both constants continue
+to be defined in the UAPI header, aliased to the same value.
+
+The ``BPF_PROG_RUN`` command can be used to execute BPF programs of the
+following types:
+
+- ``BPF_PROG_TYPE_SOCKET_FILTER``
+- ``BPF_PROG_TYPE_SCHED_CLS``
+- ``BPF_PROG_TYPE_SCHED_ACT``
+- ``BPF_PROG_TYPE_XDP``
+- ``BPF_PROG_TYPE_SK_LOOKUP``
+- ``BPF_PROG_TYPE_CGROUP_SKB``
+- ``BPF_PROG_TYPE_LWT_IN``
+- ``BPF_PROG_TYPE_LWT_OUT``
+- ``BPF_PROG_TYPE_LWT_XMIT``
+- ``BPF_PROG_TYPE_LWT_SEG6LOCAL``
+- ``BPF_PROG_TYPE_FLOW_DISSECTOR``
+- ``BPF_PROG_TYPE_STRUCT_OPS``
+- ``BPF_PROG_TYPE_RAW_TRACEPOINT``
+- ``BPF_PROG_TYPE_SYSCALL``
+
+When using the ``BPF_PROG_RUN`` command, userspace supplies an input context
+object and (for program types operating on network packets) a buffer containing
+the packet data that the BPF program will operate on. The kernel will then
+execute the program and return the results to userspace. Note that programs will
+not have any side effects while being run in this mode; in particular, packets
+will not actually be redirected or dropped, the program return code will just be
+returned to userspace. A separate mode for live execution of XDP programs is
+provided, documented separately below.
+
+Running XDP programs in "live frame mode"
+-----------------------------------------
+
+The ``BPF_PROG_RUN`` command has a separate mode for running live XDP programs,
+which can be used to execute XDP programs in a way where packets will actually
+be processed by the kernel after the execution of the XDP program as if they
+arrived on a physical interface. This mode is activated by setting the
+``BPF_F_TEST_XDP_LIVE_FRAMES`` flag when supplying an XDP program to
+``BPF_PROG_RUN``.
+
+The live packet mode is optimised for high performance execution of the supplied
+XDP program many times (suitable for, e.g., running as a traffic generator),
+which means the semantics are not quite as straight-forward as the regular test
+run mode. Specifically:
+
+- When executing an XDP program in live frame mode, the result of the execution
+ will not be returned to userspace; instead, the kernel will perform the
+ operation indicated by the program's return code (drop the packet, redirect
+ it, etc). For this reason, setting the ``data_out`` or ``ctx_out`` attributes
+ in the syscall parameters when running in this mode will be rejected. In
+ addition, not all failures will be reported back to userspace directly;
+ specifically, only fatal errors in setup or during execution (like memory
+ allocation errors) will halt execution and return an error. If an error occurs
+ in packet processing, like a failure to redirect to a given interface,
+ execution will continue with the next repetition; these errors can be detected
+ via the same trace points as for regular XDP programs.
+
+- Userspace can supply an ifindex as part of the context object, just like in
+ the regular (non-live) mode. The XDP program will be executed as though the
+ packet arrived on this interface; i.e., the ``ingress_ifindex`` of the context
+ object will point to that interface. Furthermore, if the XDP program returns
+ ``XDP_PASS``, the packet will be injected into the kernel networking stack as
+ though it arrived on that ifindex, and if it returns ``XDP_TX``, the packet
+ will be transmitted *out* of that same interface. Do note, though, that
+ because the program execution is not happening in driver context, an
+ ``XDP_TX`` is actually turned into the same action as an ``XDP_REDIRECT`` to
+ that same interface (i.e., it will only work if the driver has support for the
+ ``ndo_xdp_xmit`` driver op).
+
+- When running the program with multiple repetitions, the execution will happen
+ in batches. The batch size defaults to 64 packets (which is same as the
+ maximum NAPI receive batch size), but can be specified by userspace through
+ the ``batch_size`` parameter, up to a maximum of 256 packets. For each batch,
+ the kernel executes the XDP program repeatedly, each invocation getting a
+ separate copy of the packet data. For each repetition, if the program drops
+ the packet, the data page is immediately recycled (see below). Otherwise, the
+ packet is buffered until the end of the batch, at which point all packets
+ buffered this way during the batch are transmitted at once.
+
+- When setting up the test run, the kernel will initialise a pool of memory
+ pages of the same size as the batch size. Each memory page will be initialised
+ with the initial packet data supplied by userspace at ``BPF_PROG_RUN``
+ invocation. When possible, the pages will be recycled on future program
+ invocations, to improve performance. Pages will generally be recycled a full
+ batch at a time, except when a packet is dropped (by return code or because
+ of, say, a redirection error), in which case that page will be recycled
+ immediately. If a packet ends up being passed to the regular networking stack
+ (because the XDP program returns ``XDP_PASS``, or because it ends up being
+ redirected to an interface that injects it into the stack), the page will be
+ released and a new one will be allocated when the pool is empty.
+
+ When recycling, the page content is not rewritten; only the packet boundary
+ pointers (``data``, ``data_end`` and ``data_meta``) in the context object will
+ be reset to the original values. This means that if a program rewrites the
+ packet contents, it has to be prepared to see either the original content or
+ the modified version on subsequent invocations.
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
new file mode 100644
index 000000000000..3b60583f5db2
--- /dev/null
+++ b/Documentation/bpf/btf.rst
@@ -0,0 +1,1210 @@
+=====================
+BPF Type Format (BTF)
+=====================
+
+1. Introduction
+===============
+
+BTF (BPF Type Format) is the metadata format which encodes the debug info
+related to BPF program/map. The name BTF was used initially to describe data
+types. The BTF was later extended to include function info for defined
+subroutines, and line info for source/line information.
+
+The debug info is used for map pretty print, function signature, etc. The
+function signature enables better bpf program/function kernel symbol. The line
+info helps generate source annotated translated byte code, jited code and
+verifier log.
+
+The BTF specification contains two parts,
+ * BTF kernel API
+ * BTF ELF file format
+
+The kernel API is the contract between user space and kernel. The kernel
+verifies the BTF info before using it. The ELF file format is a user space
+contract between ELF file and libbpf loader.
+
+The type and string sections are part of the BTF kernel API, describing the
+debug info (mostly types related) referenced by the bpf program. These two
+sections are discussed in details in :ref:`BTF_Type_String`.
+
+.. _BTF_Type_String:
+
+2. BTF Type and String Encoding
+===============================
+
+The file ``include/uapi/linux/btf.h`` provides high-level definition of how
+types/strings are encoded.
+
+The beginning of data blob must be::
+
+ struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+ };
+
+The magic is ``0xeB9F``, which has different encoding for big and little
+endian systems, and can be used to test whether BTF is generated for big- or
+little-endian target. The ``btf_header`` is designed to be extensible with
+``hdr_len`` equal to ``sizeof(struct btf_header)`` when a data blob is
+generated.
+
+2.1 String Encoding
+-------------------
+
+The first string in the string section must be a null string. The rest of
+string table is a concatenation of other null-terminated strings.
+
+2.2 Type Encoding
+-----------------
+
+The type id ``0`` is reserved for ``void`` type. The type section is parsed
+sequentially and type id is assigned to each recognized type starting from id
+``1``. Currently, the following types are supported::
+
+ #define BTF_KIND_INT 1 /* Integer */
+ #define BTF_KIND_PTR 2 /* Pointer */
+ #define BTF_KIND_ARRAY 3 /* Array */
+ #define BTF_KIND_STRUCT 4 /* Struct */
+ #define BTF_KIND_UNION 5 /* Union */
+ #define BTF_KIND_ENUM 6 /* Enumeration up to 32-bit values */
+ #define BTF_KIND_FWD 7 /* Forward */
+ #define BTF_KIND_TYPEDEF 8 /* Typedef */
+ #define BTF_KIND_VOLATILE 9 /* Volatile */
+ #define BTF_KIND_CONST 10 /* Const */
+ #define BTF_KIND_RESTRICT 11 /* Restrict */
+ #define BTF_KIND_FUNC 12 /* Function */
+ #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+ #define BTF_KIND_VAR 14 /* Variable */
+ #define BTF_KIND_DATASEC 15 /* Section */
+ #define BTF_KIND_FLOAT 16 /* Floating point */
+ #define BTF_KIND_DECL_TAG 17 /* Decl Tag */
+ #define BTF_KIND_TYPE_TAG 18 /* Type Tag */
+ #define BTF_KIND_ENUM64 19 /* Enumeration up to 64-bit values */
+
+Note that the type section encodes debug info, not just pure types.
+``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
+
+Each type contains the following common data::
+
+ struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
+ * bit 31: kind_flag, currently used by
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+ * FUNC, FUNC_PROTO, DECL_TAG and TYPE_TAG.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+ };
+
+For certain kinds, the common data are followed by kind-specific data. The
+``name_off`` in ``struct btf_type`` specifies the offset in the string table.
+The following sections detail encoding of each kind.
+
+2.2.1 BTF_KIND_INT
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: any valid offset
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_INT
+ * ``info.vlen``: 0
+ * ``size``: the size of the int type in bytes.
+
+``btf_type`` is followed by a ``u32`` with the following bits arrangement::
+
+ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
+ #define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
+ #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
+
+The ``BTF_INT_ENCODING`` has the following attributes::
+
+ #define BTF_INT_SIGNED (1 << 0)
+ #define BTF_INT_CHAR (1 << 1)
+ #define BTF_INT_BOOL (1 << 2)
+
+The ``BTF_INT_ENCODING()`` provides extra information: signedness, char, or
+bool, for the int type. The char and bool encoding are mostly useful for
+pretty print. At most one encoding can be specified for the int type.
+
+The ``BTF_INT_BITS()`` specifies the number of actual bits held by this int
+type. For example, a 4-bit bitfield encodes ``BTF_INT_BITS()`` equals to 4.
+The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
+for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
+
+The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
+for this int. For example, a bitfield struct member has:
+
+ * btf member bit offset 100 from the start of the structure,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
+
+Then in the struct memory layout, this member will occupy ``4`` bits starting
+from bits ``100 + 2 = 102``.
+
+Alternatively, the bitfield struct member can be the following to access the
+same bits as the above:
+
+ * btf member bit offset 102,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
+
+The original intention of ``BTF_INT_OFFSET()`` is to provide flexibility of
+bitfield encoding. Currently, both llvm and pahole generate
+``BTF_INT_OFFSET() = 0`` for all int types.
+
+2.2.2 BTF_KIND_PTR
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_PTR
+ * ``info.vlen``: 0
+ * ``type``: the pointee type of the pointer
+
+No additional type data follow ``btf_type``.
+
+2.2.3 BTF_KIND_ARRAY
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_ARRAY
+ * ``info.vlen``: 0
+ * ``size/type``: 0, not used
+
+``btf_type`` is followed by one ``struct btf_array``::
+
+ struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+ };
+
+The ``struct btf_array`` encoding:
+ * ``type``: the element type
+ * ``index_type``: the index type
+ * ``nelems``: the number of elements for this array (``0`` is also allowed).
+
+The ``index_type`` can be any regular int type (``u8``, ``u16``, ``u32``,
+``u64``, ``unsigned __int128``). The original design of including
+``index_type`` follows DWARF, which has an ``index_type`` for its array type.
+Currently in BTF, beyond type verification, the ``index_type`` is not used.
+
+The ``struct btf_array`` allows chaining through element type to represent
+multidimensional arrays. For example, for ``int a[5][6]``, the following type
+information illustrates the chaining:
+
+ * [1]: int
+ * [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6``
+ * [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5``
+
+Currently, both pahole and llvm collapse multidimensional array into
+one-dimensional array, e.g., for ``a[5][6]``, the ``btf_array.nelems`` is
+equal to ``30``. This is because the original use case is map pretty print
+where the whole array is dumped out so one-dimensional array is enough. As
+more BTF usage is explored, pahole and llvm can be changed to generate proper
+chained representation for multidimensional arrays.
+
+2.2.4 BTF_KIND_STRUCT
+~~~~~~~~~~~~~~~~~~~~~
+2.2.5 BTF_KIND_UNION
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0 or offset to a valid C identifier
+ * ``info.kind_flag``: 0 or 1
+ * ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION
+ * ``info.vlen``: the number of struct/union members
+ * ``info.size``: the size of the struct/union in bytes
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.::
+
+ struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ __u32 offset;
+ };
+
+``struct btf_member`` encoding:
+ * ``name_off``: offset to a valid C identifier
+ * ``type``: the member type
+ * ``offset``: <see below>
+
+If the type info ``kind_flag`` is not set, the offset contains only bit offset
+of the member. Note that the base type of the bitfield can only be int or enum
+type. If the bitfield size is 32, the base type can be either int or enum
+type. If the bitfield size is not 32, the base type must be int, and int type
+``BTF_INT_BITS()`` encodes the bitfield size.
+
+If the ``kind_flag`` is set, the ``btf_member.offset`` contains both member
+bitfield size and bit offset. The bitfield size and bit offset are calculated
+as below.::
+
+ #define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
+ #define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
+
+In this case, if the base type is an int type, it must be a regular int type:
+
+ * ``BTF_INT_OFFSET()`` must be 0.
+ * ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``.
+
+Commit 9d5f9f701b18 introduced ``kind_flag`` and explains why both modes
+exist.
+
+2.2.6 BTF_KIND_ENUM
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0 or offset to a valid C identifier
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
+ * ``info.kind``: BTF_KIND_ENUM
+ * ``info.vlen``: number of enum values
+ * ``size``: 1/2/4/8
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
+
+ struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+ };
+
+The ``btf_enum`` encoding:
+ * ``name_off``: offset to a valid C identifier
+ * ``val``: any value
+
+If the original enum value is signed and the size is less than 4,
+that value will be sign extended into 4 bytes. If the size is 8,
+the value will be truncated into 4 bytes.
+
+2.2.7 BTF_KIND_FWD
+~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0 for struct, 1 for union
+ * ``info.kind``: BTF_KIND_FWD
+ * ``info.vlen``: 0
+ * ``type``: 0
+
+No additional type data follow ``btf_type``.
+
+2.2.8 BTF_KIND_TYPEDEF
+~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_TYPEDEF
+ * ``info.vlen``: 0
+ * ``type``: the type which can be referred by name at ``name_off``
+
+No additional type data follow ``btf_type``.
+
+2.2.9 BTF_KIND_VOLATILE
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_VOLATILE
+ * ``info.vlen``: 0
+ * ``type``: the type with ``volatile`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.10 BTF_KIND_CONST
+~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_CONST
+ * ``info.vlen``: 0
+ * ``type``: the type with ``const`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.11 BTF_KIND_RESTRICT
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_RESTRICT
+ * ``info.vlen``: 0
+ * ``type``: the type with ``restrict`` qualifier
+
+No additional type data follow ``btf_type``.
+
+2.2.12 BTF_KIND_FUNC
+~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_FUNC
+ * ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
+ or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
+ * ``type``: a BTF_KIND_FUNC_PROTO type
+
+No additional type data follow ``btf_type``.
+
+A BTF_KIND_FUNC defines not a type, but a subprogram (function) whose
+signature is defined by ``type``. The subprogram is thus an instance of that
+type. The BTF_KIND_FUNC may in turn be referenced by a func_info in the
+:ref:`BTF_Ext_Section` (ELF) or in the arguments to :ref:`BPF_Prog_Load`
+(ABI).
+
+Currently, only linkage values of BTF_FUNC_STATIC and BTF_FUNC_GLOBAL are
+supported in the kernel.
+
+2.2.13 BTF_KIND_FUNC_PROTO
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_FUNC_PROTO
+ * ``info.vlen``: # of parameters
+ * ``type``: the return type
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.::
+
+ struct btf_param {
+ __u32 name_off;
+ __u32 type;
+ };
+
+If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type, then
+``btf_param.name_off`` must point to a valid C identifier except for the
+possible last argument representing the variable argument. The btf_param.type
+refers to parameter type.
+
+If the function has variable arguments, the last parameter is encoded with
+``name_off = 0`` and ``type = 0``.
+
+2.2.14 BTF_KIND_VAR
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_VAR
+ * ``info.vlen``: 0
+ * ``type``: the type of the variable
+
+``btf_type`` is followed by a single ``struct btf_variable`` with the
+following data::
+
+ struct btf_var {
+ __u32 linkage;
+ };
+
+``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
+see :ref:`BTF_Var_Linkage_Constants`.
+
+Not all type of global variables are supported by LLVM at this point.
+The following is currently available:
+
+ * static variables with or without section attributes
+ * global variables with section attributes
+
+The latter is for future extraction of map key/value type id's from a
+map definition.
+
+2.2.15 BTF_KIND_DATASEC
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid name associated with a variable or
+ one of .data/.bss/.rodata
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_DATASEC
+ * ``info.vlen``: # of variables
+ * ``size``: total section size in bytes (0 at compilation time, patched
+ to actual size by BPF loaders such as libbpf)
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_var_secinfo``.::
+
+ struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+ };
+
+``struct btf_var_secinfo`` encoding:
+ * ``type``: the type of the BTF_KIND_VAR variable
+ * ``offset``: the in-section offset of the variable
+ * ``size``: the size of the variable in bytes
+
+2.2.16 BTF_KIND_FLOAT
+~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: any valid offset
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_FLOAT
+ * ``info.vlen``: 0
+ * ``size``: the size of the float type in bytes: 2, 4, 8, 12 or 16.
+
+No additional type data follow ``btf_type``.
+
+2.2.17 BTF_KIND_DECL_TAG
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a non-empty string
+ * ``info.kind_flag``: 0 or 1
+ * ``info.kind``: BTF_KIND_DECL_TAG
+ * ``info.vlen``: 0
+ * ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef``
+
+``btf_type`` is followed by ``struct btf_decl_tag``.::
+
+ struct btf_decl_tag {
+ __u32 component_idx;
+ };
+
+The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``.
+For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``.
+For the other three types, if the btf_decl_tag attribute is
+applied to the ``struct``, ``union`` or ``func`` itself,
+``btf_decl_tag.component_idx`` must be ``-1``. Otherwise,
+the attribute is applied to a ``struct``/``union`` member or
+a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
+valid index (starting from 0) pointing to a member or an argument.
+
+If ``info.kind_flag`` is 0, then this is a normal decl tag, and the
+``name_off`` encodes btf_decl_tag attribute string.
+
+If ``info.kind_flag`` is 1, then the decl tag represents an arbitrary
+__attribute__. In this case, ``name_off`` encodes a string
+representing the attribute-list of the attribute specifier. For
+example, for an ``__attribute__((aligned(4)))`` the string's contents
+is ``aligned(4)``.
+
+2.2.18 BTF_KIND_TYPE_TAG
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a non-empty string
+ * ``info.kind_flag``: 0 or 1
+ * ``info.kind``: BTF_KIND_TYPE_TAG
+ * ``info.vlen``: 0
+ * ``type``: the type with ``btf_type_tag`` attribute
+
+Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
+It has the following btf type chain:
+::
+
+ ptr -> [type_tag]*
+ -> [const | volatile | restrict | typedef]*
+ -> base_type
+
+Basically, a pointer type points to zero or more
+type_tag, then zero or more const/volatile/restrict/typedef
+and finally the base type. The base type is one of
+int, ptr, array, struct, union, enum, func_proto and float types.
+
+Similarly to decl tags, if the ``info.kind_flag`` is 0, then this is a
+normal type tag, and the ``name_off`` encodes btf_type_tag attribute
+string.
+
+If ``info.kind_flag`` is 1, then the type tag represents an arbitrary
+__attribute__, and the ``name_off`` encodes a string representing the
+attribute-list of the attribute specifier.
+
+2.2.19 BTF_KIND_ENUM64
+~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: 0 or offset to a valid C identifier
+ * ``info.kind_flag``: 0 for unsigned, 1 for signed
+ * ``info.kind``: BTF_KIND_ENUM64
+ * ``info.vlen``: number of enum values
+ * ``size``: 1/2/4/8
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum64``.::
+
+ struct btf_enum64 {
+ __u32 name_off;
+ __u32 val_lo32;
+ __u32 val_hi32;
+ };
+
+The ``btf_enum64`` encoding:
+ * ``name_off``: offset to a valid C identifier
+ * ``val_lo32``: lower 32-bit value for a 64-bit value
+ * ``val_hi32``: high 32-bit value for a 64-bit value
+
+If the original enum value is signed and the size is less than 8,
+that value will be sign extended into 8 bytes.
+
+2.3 Constant Values
+-------------------
+
+.. _BTF_Function_Linkage_Constants:
+
+2.3.1 Function Linkage Constant Values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table:: Function Linkage Values and Meanings
+
+ =================== ===== ===========
+ kind value description
+ =================== ===== ===========
+ ``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
+ ``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
+ ``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
+ =================== ===== ===========
+
+
+.. _BTF_Var_Linkage_Constants:
+
+2.3.2 Variable Linkage Constant Values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table:: Variable Linkage Values and Meanings
+
+ ============================ ===== ===========
+ kind value description
+ ============================ ===== ===========
+ ``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
+ ``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
+ ``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
+ ============================ ===== ===========
+
+3. BTF Kernel API
+=================
+
+The following bpf syscall command involves BTF:
+ * BPF_BTF_LOAD: load a blob of BTF data into kernel
+ * BPF_MAP_CREATE: map creation with btf key and value type info.
+ * BPF_PROG_LOAD: prog load with btf function and line info.
+ * BPF_BTF_GET_FD_BY_ID: get a btf fd
+ * BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info
+ and other btf related info are returned.
+
+The workflow typically looks like:
+::
+
+ Application:
+ BPF_BTF_LOAD
+ |
+ v
+ BPF_MAP_CREATE and BPF_PROG_LOAD
+ |
+ V
+ ......
+
+ Introspection tool:
+ ......
+ BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's)
+ |
+ V
+ BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd)
+ |
+ V
+ BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id)
+ | |
+ V |
+ BPF_BTF_GET_FD_BY_ID (get btf_fd) |
+ | |
+ V |
+ BPF_OBJ_GET_INFO_BY_FD (get btf) |
+ | |
+ V V
+ pretty print types, dump func signatures and line info, etc.
+
+
+3.1 BPF_BTF_LOAD
+----------------
+
+Load a blob of BTF data into kernel. A blob of data, described in
+:ref:`BTF_Type_String`, can be directly loaded into the kernel. A ``btf_fd``
+is returned to a userspace.
+
+3.2 BPF_MAP_CREATE
+------------------
+
+A map can be created with ``btf_fd`` and specified key/value type id.::
+
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+
+In libbpf, the map can be defined with extra annotation like below:
+::
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct ipv_counts);
+ __uint(max_entries, 4);
+ } btf_map SEC(".maps");
+
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
+
+.. _BPF_Prog_Load:
+
+3.3 BPF_PROG_LOAD
+-----------------
+
+During prog_load, func_info and line_info can be passed to kernel with proper
+values for the following attributes:
+::
+
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ ......
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
+
+The func_info and line_info are an array of below, respectively.::
+
+ struct bpf_func_info {
+ __u32 insn_off; /* [0, insn_cnt - 1] */
+ __u32 type_id; /* pointing to a BTF_KIND_FUNC type */
+ };
+ struct bpf_line_info {
+ __u32 insn_off; /* [0, insn_cnt - 1] */
+ __u32 file_name_off; /* offset to string table for the filename */
+ __u32 line_off; /* offset to string table for the source line */
+ __u32 line_col; /* line number and column number */
+ };
+
+func_info_rec_size is the size of each func_info record, and
+line_info_rec_size is the size of each line_info record. Passing the record
+size to kernel make it possible to extend the record itself in the future.
+
+Below are requirements for func_info:
+ * func_info[0].insn_off must be 0.
+ * the func_info insn_off is in strictly increasing order and matches
+ bpf func boundaries.
+
+Below are requirements for line_info:
+ * the first insn in each func must have a line_info record pointing to it.
+ * the line_info insn_off is in strictly increasing order.
+
+For line_info, the line number and column number are defined as below:
+::
+
+ #define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+ #define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+3.4 BPF_{PROG,MAP}_GET_NEXT_ID
+------------------------------
+
+In kernel, every loaded program, map or btf has a unique id. The id won't
+change during the lifetime of a program, map, or btf.
+
+The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID returns all id's, one for
+each command, to user space, for bpf program or maps, respectively, so an
+inspection tool can inspect all programs and maps.
+
+3.5 BPF_{PROG,MAP}_GET_FD_BY_ID
+-------------------------------
+
+An introspection tool cannot use id to get details about program or maps.
+A file descriptor needs to be obtained first for reference-counting purpose.
+
+3.6 BPF_OBJ_GET_INFO_BY_FD
+--------------------------
+
+Once a program/map fd is acquired, an introspection tool can get the detailed
+information from kernel about this fd, some of which are BTF-related. For
+example, ``bpf_map_info`` returns ``btf_id`` and key/value type ids.
+``bpf_prog_info`` returns ``btf_id``, func_info, and line info for translated
+bpf byte codes, and jited_line_info.
+
+3.7 BPF_BTF_GET_FD_BY_ID
+------------------------
+
+With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``, bpf
+syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd. Then, with
+command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally loaded into the
+kernel with BPF_BTF_LOAD, can be retrieved.
+
+With the btf blob, ``bpf_map_info``, and ``bpf_prog_info``, an introspection
+tool has full btf knowledge and is able to pretty print map key/values, dump
+func signatures and line info, along with byte/jit codes.
+
+4. ELF File Format Interface
+============================
+
+4.1 .BTF section
+----------------
+
+The .BTF section contains type and string data. The format of this section is
+same as the one describe in :ref:`BTF_Type_String`.
+
+.. _BTF_Ext_Section:
+
+4.2 .BTF.ext section
+--------------------
+
+The .BTF.ext section encodes func_info, line_info and CO-RE relocations
+which needs loader manipulation before loading into the kernel.
+
+The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
+and ``tools/lib/bpf/btf.c``.
+
+The current header of .BTF.ext section::
+
+ struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
+ };
+
+It is very similar to .BTF section. Instead of type/string section, it
+contains func_info, line_info and core_relo sub-sections.
+See :ref:`BPF_Prog_Load` for details about func_info and line_info
+record format.
+
+The func_info is organized as below.::
+
+ func_info_rec_size /* __u32 value */
+ btf_ext_info_sec for section #1 /* func_info for section #1 */
+ btf_ext_info_sec for section #2 /* func_info for section #2 */
+ ...
+
+``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure when
+.BTF.ext is generated. ``btf_ext_info_sec``, defined below, is a collection of
+func_info for each specific ELF section.::
+
+ struct btf_ext_info_sec {
+ __u32 sec_name_off; /* offset to section name */
+ __u32 num_info;
+ /* Followed by num_info * record_size number of bytes */
+ __u8 data[0];
+ };
+
+Here, num_info must be greater than 0.
+
+The line_info is organized as below.::
+
+ line_info_rec_size /* __u32 value */
+ btf_ext_info_sec for section #1 /* line_info for section #1 */
+ btf_ext_info_sec for section #2 /* line_info for section #2 */
+ ...
+
+``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure when
+.BTF.ext is generated.
+
+The interpretation of ``bpf_func_info->insn_off`` and
+``bpf_line_info->insn_off`` is different between kernel API and ELF API. For
+kernel API, the ``insn_off`` is the instruction offset in the unit of ``struct
+bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
+beginning of section (``btf_ext_info_sec->sec_name_off``).
+
+The core_relo is organized as below.::
+
+ core_relo_rec_size /* __u32 value */
+ btf_ext_info_sec for section #1 /* core_relo for section #1 */
+ btf_ext_info_sec for section #2 /* core_relo for section #2 */
+
+``core_relo_rec_size`` specifies the size of ``bpf_core_relo``
+structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
+within a single ``btf_ext_info_sec`` describe relocations applied to
+section named by ``btf_ext_info_sec->sec_name_off``.
+
+See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
+for more information on CO-RE relocations.
+
+4.3 .BTF_ids section
+--------------------
+
+The .BTF_ids section encodes BTF ID values that are used within the kernel.
+
+This section is created during the kernel compilation with the help of
+macros defined in ``include/linux/btf_ids.h`` header file. Kernel code can
+use them to create lists and sets (sorted lists) of BTF ID values.
+
+The ``BTF_ID_LIST`` and ``BTF_ID`` macros define unsorted list of BTF ID values,
+with following syntax::
+
+ BTF_ID_LIST(list)
+ BTF_ID(type1, name1)
+ BTF_ID(type2, name2)
+
+resulting in following layout in .BTF_ids section::
+
+ __BTF_ID__type1__name1__1:
+ .zero 4
+ __BTF_ID__type2__name2__2:
+ .zero 4
+
+The ``u32 list[];`` variable is defined to access the list.
+
+The ``BTF_ID_UNUSED`` macro defines 4 zero bytes. It's used when we
+want to define unused entry in BTF_ID_LIST, like::
+
+ BTF_ID_LIST(bpf_skb_output_btf_ids)
+ BTF_ID(struct, sk_buff)
+ BTF_ID_UNUSED
+ BTF_ID(struct, task_struct)
+
+The ``BTF_SET_START/END`` macros pair defines sorted list of BTF ID values
+and their count, with following syntax::
+
+ BTF_SET_START(set)
+ BTF_ID(type1, name1)
+ BTF_ID(type2, name2)
+ BTF_SET_END(set)
+
+resulting in following layout in .BTF_ids section::
+
+ __BTF_ID__set__set:
+ .zero 4
+ __BTF_ID__type1__name1__3:
+ .zero 4
+ __BTF_ID__type2__name2__4:
+ .zero 4
+
+The ``struct btf_id_set set;`` variable is defined to access the list.
+
+The ``typeX`` name can be one of following::
+
+ struct, union, typedef, func
+
+and is used as a filter when resolving the BTF ID value.
+
+All the BTF ID lists and sets are compiled in the .BTF_ids section and
+resolved during the linking phase of kernel build by ``resolve_btfids`` tool.
+
+4.4 .BTF.base section
+---------------------
+Split BTF - where the .BTF section only contains types not in the associated
+base .BTF section - is an extremely efficient way to encode type information
+for kernel modules, since they generally consist of a few module-specific
+types along with a large set of shared kernel types. The former are encoded
+in split BTF, while the latter are encoded in base BTF, resulting in more
+compact representations. A type in split BTF that refers to a type in
+base BTF refers to it using its base BTF ID, and split BTF IDs start
+at last_base_BTF_ID + 1.
+
+The downside of this approach however is that this makes the split BTF
+somewhat brittle - when the base BTF changes, base BTF ID references are
+no longer valid and the split BTF itself becomes useless. The role of the
+.BTF.base section is to make split BTF more resilient for cases where
+the base BTF may change, as is the case for kernel modules not built every
+time the kernel is for example. .BTF.base contains named base types; INTs,
+FLOATs, STRUCTs, UNIONs, ENUM[64]s and FWDs. INTs and FLOATs are fully
+described in .BTF.base sections, while composite types like structs
+and unions are not fully defined - the .BTF.base type simply serves as
+a description of the type the split BTF referred to, so structs/unions
+have 0 members in the .BTF.base section. ENUM[64]s are similarly recorded
+with 0 members. Any other types are added to the split BTF. This
+distillation process then leaves us with a .BTF.base section with
+such minimal descriptions of base types and .BTF split section which refers
+to those base types. Later, we can relocate the split BTF using both the
+information stored in the .BTF.base section and the new .BTF base; the type
+information in the .BTF.base section allows us to update the split BTF
+references to point at the corresponding new base BTF IDs.
+
+BTF relocation happens on kernel module load when a kernel module has a
+.BTF.base section, and libbpf also provides a btf__relocate() API to
+accomplish this.
+
+As an example consider the following base BTF::
+
+ [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [2] STRUCT 'foo' size=8 vlen=2
+ 'f1' type_id=1 bits_offset=0
+ 'f2' type_id=1 bits_offset=32
+
+...and associated split BTF::
+
+ [3] PTR '(anon)' type_id=2
+
+i.e. split BTF describes a pointer to struct foo { int f1; int f2 };
+
+.BTF.base will consist of::
+
+ [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [2] STRUCT 'foo' size=8 vlen=0
+
+If we relocate the split BTF later using the following new base BTF::
+
+ [1] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
+ [2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [3] STRUCT 'foo' size=8 vlen=2
+ 'f1' type_id=2 bits_offset=0
+ 'f2' type_id=2 bits_offset=32
+
+...we can use our .BTF.base description to know that the split BTF reference
+is to struct foo, and relocation results in new split BTF::
+
+ [4] PTR '(anon)' type_id=3
+
+Note that we had to update BTF ID and start BTF ID for the split BTF.
+
+So we see how .BTF.base plays the role of facilitating later relocation,
+leading to more resilient split BTF.
+
+.BTF.base sections will be generated automatically for out-of-tree kernel module
+builds - i.e. where KBUILD_EXTMOD is set (as it would be for "make M=path/2/mod"
+cases). .BTF.base generation requires pahole support for the "distilled_base"
+BTF feature; this is available in pahole v1.28 and later.
+
+5. Using BTF
+============
+
+5.1 bpftool map pretty print
+----------------------------
+
+With BTF, the map key/value can be printed based on fields rather than simply
+raw bytes. This is especially valuable for large structure or if your data
+structure has bitfields. For example, for the following map,::
+
+ enum A { A1, A2, A3, A4, A5 };
+ typedef enum A ___A;
+ struct tmp_t {
+ char a1:4;
+ int a2:4;
+ int :4;
+ __u32 a3:4;
+ int b;
+ ___A b1:4;
+ enum A b2:4;
+ };
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct tmp_t);
+ __uint(max_entries, 1);
+ } tmpmap SEC(".maps");
+
+bpftool is able to pretty print like below:
+::
+
+ [{
+ "key": 0,
+ "value": {
+ "a1": 0x2,
+ "a2": 0x4,
+ "a3": 0x6,
+ "b": 7,
+ "b1": 0x8,
+ "b2": 0xa
+ }
+ }
+ ]
+
+5.2 bpftool prog dump
+---------------------
+
+The following is an example showing how func_info and line_info can help prog
+dump with better kernel symbol names, function prototypes and line
+information.::
+
+ $ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv
+ [...]
+ int test_long_fname_2(struct dummy_tracepoint_args * arg):
+ bpf_prog_44a040bf25481309_test_long_fname_2:
+ ; static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+ 0: push %rbp
+ 1: mov %rsp,%rbp
+ 4: sub $0x30,%rsp
+ b: sub $0x28,%rbp
+ f: mov %rbx,0x0(%rbp)
+ 13: mov %r13,0x8(%rbp)
+ 17: mov %r14,0x10(%rbp)
+ 1b: mov %r15,0x18(%rbp)
+ 1f: xor %eax,%eax
+ 21: mov %rax,0x20(%rbp)
+ 25: xor %esi,%esi
+ ; int key = 0;
+ 27: mov %esi,-0x4(%rbp)
+ ; if (!arg->sock)
+ 2a: mov 0x8(%rdi),%rdi
+ ; if (!arg->sock)
+ 2e: cmp $0x0,%rdi
+ 32: je 0x0000000000000070
+ 34: mov %rbp,%rsi
+ ; counts = bpf_map_lookup_elem(&btf_map, &key);
+ [...]
+
+5.3 Verifier Log
+----------------
+
+The following is an example of how line_info can help debugging verification
+failure.::
+
+ /* The code at tools/testing/selftests/bpf/test_xdp_noinline.c
+ * is modified as below.
+ */
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ /*
+ if (data + 4 > data_end)
+ return XDP_DROP;
+ */
+ *(u32 *)data = dst->dst;
+
+ $ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp
+ ; data = (void *)(long)xdp->data;
+ 224: (79) r2 = *(u64 *)(r10 -112)
+ 225: (61) r2 = *(u32 *)(r2 +0)
+ ; *(u32 *)data = dst->dst;
+ 226: (63) *(u32 *)(r2 +0) = r1
+ invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0)
+ R2 offset is outside of the packet
+
+6. BTF Generation
+=================
+
+You need latest pahole
+
+ https://git.kernel.org/pub/scm/devel/pahole/pahole.git/
+
+or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't
+support .BTF.ext and btf BTF_KIND_FUNC type yet. For example,::
+
+ -bash-4.4$ cat t.c
+ struct t {
+ int a:2;
+ int b:3;
+ int c:2;
+ } g;
+ -bash-4.4$ gcc -c -O2 -g t.c
+ -bash-4.4$ pahole -JV t.o
+ File t.o:
+ [1] STRUCT t kind_flag=1 size=4 vlen=3
+ a type_id=2 bitfield_size=2 bits_offset=0
+ b type_id=2 bitfield_size=3 bits_offset=2
+ c type_id=2 bitfield_size=2 bits_offset=5
+ [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
+
+The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target
+only. The assembly code (-S) is able to show the BTF encoding in assembly
+format.::
+
+ -bash-4.4$ cat t2.c
+ typedef int __int32;
+ struct t2 {
+ int a2;
+ int (*f2)(char q1, __int32 q2, ...);
+ int (*f3)();
+ } g2;
+ int main() { return 0; }
+ int test() { return 0; }
+ -bash-4.4$ clang -c -g -O2 --target=bpf t2.c
+ -bash-4.4$ readelf -S t2.o
+ ......
+ [ 8] .BTF PROGBITS 0000000000000000 00000247
+ 000000000000016e 0000000000000000 0 0 1
+ [ 9] .BTF.ext PROGBITS 0000000000000000 000003b5
+ 0000000000000060 0000000000000000 0 0 1
+ [10] .rel.BTF.ext REL 0000000000000000 000007e0
+ 0000000000000040 0000000000000010 16 9 8
+ ......
+ -bash-4.4$ clang -S -g -O2 --target=bpf t2.c
+ -bash-4.4$ cat t2.s
+ ......
+ .section .BTF,"",@progbits
+ .short 60319 # 0xeb9f
+ .byte 1
+ .byte 0
+ .long 24
+ .long 0
+ .long 220
+ .long 220
+ .long 122
+ .long 0 # BTF_KIND_FUNC_PROTO(id = 1)
+ .long 218103808 # 0xd000000
+ .long 2
+ .long 83 # BTF_KIND_INT(id = 2)
+ .long 16777216 # 0x1000000
+ .long 4
+ .long 16777248 # 0x1000020
+ ......
+ .byte 0 # string offset=0
+ .ascii ".text" # string offset=1
+ .byte 0
+ .ascii "/home/yhs/tmp-pahole/t2.c" # string offset=7
+ .byte 0
+ .ascii "int main() { return 0; }" # string offset=33
+ .byte 0
+ .ascii "int test() { return 0; }" # string offset=58
+ .byte 0
+ .ascii "int" # string offset=83
+ ......
+ .section .BTF.ext,"",@progbits
+ .short 60319 # 0xeb9f
+ .byte 1
+ .byte 0
+ .long 24
+ .long 0
+ .long 28
+ .long 28
+ .long 44
+ .long 8 # FuncInfo
+ .long 1 # FuncInfo section string offset=1
+ .long 2
+ .long .Lfunc_begin0
+ .long 3
+ .long .Lfunc_begin1
+ .long 5
+ .long 16 # LineInfo
+ .long 1 # LineInfo section string offset=1
+ .long 2
+ .long .Ltmp0
+ .long 7
+ .long 33
+ .long 7182 # Line 7 Col 14
+ .long .Ltmp3
+ .long 7
+ .long 58
+ .long 8206 # Line 8 Col 14
+
+7. Testing
+==========
+
+The kernel BPF selftest `tools/testing/selftests/bpf/prog_tests/btf.c`_
+provides an extensive set of BTF-related tests.
+
+.. Links
+.. _tools/testing/selftests/bpf/prog_tests/btf.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/btf.c
diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst
new file mode 100644
index 000000000000..2c872a1ee08e
--- /dev/null
+++ b/Documentation/bpf/clang-notes.rst
@@ -0,0 +1,36 @@
+.. contents::
+.. sectnum::
+
+==========================
+Clang implementation notes
+==========================
+
+This document provides more details specific to the Clang/LLVM implementation of the eBPF instruction set.
+
+Versions
+========
+
+Clang defined "CPU" versions, where a CPU version of 3 corresponds to the current eBPF ISA.
+
+Clang can select the eBPF ISA version using ``-mcpu=v3`` for example to select version 3.
+
+Arithmetic instructions
+=======================
+
+For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with
+``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included.
+
+Jump instructions
+=================
+
+If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d)
+instruction, which is not supported by the Linux kernel verifier.
+
+Atomic operations
+=================
+
+Clang can generate atomic instructions by default when ``-mcpu=v3`` is
+enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
+Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
+the atomics features, while keeping a lower ``-mcpu`` version, you can use
+``-Xclang -target-feature -Xclang +alu32``.
diff --git a/Documentation/bpf/classic_vs_extended.rst b/Documentation/bpf/classic_vs_extended.rst
new file mode 100644
index 000000000000..2f81a81f5267
--- /dev/null
+++ b/Documentation/bpf/classic_vs_extended.rst
@@ -0,0 +1,376 @@
+
+===================
+Classic BPF vs eBPF
+===================
+
+eBPF is designed to be JITed with one to one mapping, which can also open up
+the possibility for GCC/LLVM compilers to generate optimized eBPF code through
+an eBPF backend that performs almost as fast as natively compiled code.
+
+Some core changes of the eBPF format from classic BPF:
+
+- Number of registers increase from 2 to 10:
+
+ The old format had two registers A and X, and a hidden frame pointer. The
+ new layout extends this to be 10 internal registers and a read-only frame
+ pointer. Since 64-bit CPUs are passing arguments to functions via registers
+ the number of args from eBPF program to in-kernel function is restricted
+ to 5 and one register is used to accept return value from an in-kernel
+ function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
+ sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
+ registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
+
+ Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
+ etc, and eBPF calling convention maps directly to ABIs used by the kernel on
+ 64-bit architectures.
+
+ On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
+ and may let more complex programs to be interpreted.
+
+ R0 - R5 are scratch registers and eBPF program needs spill/fill them if
+ necessary across calls. Note that there is only one eBPF program (== one
+ eBPF main routine) and it cannot call other eBPF functions, it can only
+ call predefined in-kernel functions, though.
+
+- Register width increases from 32-bit to 64-bit:
+
+ Still, the semantics of the original 32-bit ALU operations are preserved
+ via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
+ subregisters that zero-extend into 64-bit if they are being written to.
+ That behavior maps directly to x86_64 and arm64 subregister definition, but
+ makes other JITs more difficult.
+
+ 32-bit architectures run 64-bit eBPF programs via interpreter.
+ Their JITs may convert BPF programs that only use 32-bit subregisters into
+ native instruction set and let the rest being interpreted.
+
+ Operation is 64-bit, because on 64-bit architectures, pointers are also
+ 64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
+ so 32-bit eBPF registers would otherwise require to define register-pair
+ ABI, thus, there won't be able to use a direct eBPF register to HW register
+ mapping and JIT would need to do combine/split/move operations for every
+ register in and out of the function, which is complex, bug prone and slow.
+ Another reason is the use of atomic 64-bit counters.
+
+- Conditional jt/jf targets replaced with jt/fall-through:
+
+ While the original design has constructs such as ``if (cond) jump_true;
+ else jump_false;``, they are being replaced into alternative constructs like
+ ``if (cond) jump_true; /* else fall-through */``.
+
+- Introduces bpf_call insn and register passing convention for zero overhead
+ calls from/to other kernel functions:
+
+ Before an in-kernel function call, the eBPF program needs to
+ place function arguments into R1 to R5 registers to satisfy calling
+ convention, then the interpreter will take them from registers and pass
+ to in-kernel function. If R1 - R5 registers are mapped to CPU registers
+ that are used for argument passing on given architecture, the JIT compiler
+ doesn't need to emit extra moves. Function arguments will be in the correct
+ registers and BPF_CALL instruction will be JITed as single 'call' HW
+ instruction. This calling convention was picked to cover common call
+ situations without performance penalty.
+
+ After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
+ a return value of the function. Since R6 - R9 are callee saved, their state
+ is preserved across the call.
+
+ For example, consider three C functions::
+
+ u64 f1() { return (*_f2)(1); }
+ u64 f2(u64 a) { return f3(a + 1, a); }
+ u64 f3(u64 a, u64 b) { return a - b; }
+
+ GCC can compile f1, f3 into x86_64::
+
+ f1:
+ movl $1, %edi
+ movq _f2(%rip), %rax
+ jmp *%rax
+ f3:
+ movq %rdi, %rax
+ subq %rsi, %rax
+ ret
+
+ Function f2 in eBPF may look like::
+
+ f2:
+ bpf_mov R2, R1
+ bpf_add R1, 1
+ bpf_call f3
+ bpf_exit
+
+ If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and
+ returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
+ be used to call into f2.
+
+ For practical reasons all eBPF programs have only one argument 'ctx' which is
+ already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
+ can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
+ are currently not supported, but these restrictions can be lifted if necessary
+ in the future.
+
+ On 64-bit architectures all register map to HW registers one to one. For
+ example, x86_64 JIT compiler can map them as ...
+
+ ::
+
+ R0 - rax
+ R1 - rdi
+ R2 - rsi
+ R3 - rdx
+ R4 - rcx
+ R5 - r8
+ R6 - rbx
+ R7 - r13
+ R8 - r14
+ R9 - r15
+ R10 - rbp
+
+ ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
+ and rbx, r12 - r15 are callee saved.
+
+ Then the following eBPF pseudo-program::
+
+ bpf_mov R6, R1 /* save ctx */
+ bpf_mov R2, 2
+ bpf_mov R3, 3
+ bpf_mov R4, 4
+ bpf_mov R5, 5
+ bpf_call foo
+ bpf_mov R7, R0 /* save foo() return value */
+ bpf_mov R1, R6 /* restore ctx for next call */
+ bpf_mov R2, 6
+ bpf_mov R3, 7
+ bpf_mov R4, 8
+ bpf_mov R5, 9
+ bpf_call bar
+ bpf_add R0, R7
+ bpf_exit
+
+ After JIT to x86_64 may look like::
+
+ push %rbp
+ mov %rsp,%rbp
+ sub $0x228,%rsp
+ mov %rbx,-0x228(%rbp)
+ mov %r13,-0x220(%rbp)
+ mov %rdi,%rbx
+ mov $0x2,%esi
+ mov $0x3,%edx
+ mov $0x4,%ecx
+ mov $0x5,%r8d
+ callq foo
+ mov %rax,%r13
+ mov %rbx,%rdi
+ mov $0x6,%esi
+ mov $0x7,%edx
+ mov $0x8,%ecx
+ mov $0x9,%r8d
+ callq bar
+ add %r13,%rax
+ mov -0x228(%rbp),%rbx
+ mov -0x220(%rbp),%r13
+ leaveq
+ retq
+
+ Which is in this example equivalent in C to::
+
+ u64 bpf_filter(u64 ctx)
+ {
+ return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
+ }
+
+ In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
+ arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
+ registers and place their return value into ``%rax`` which is R0 in eBPF.
+ Prologue and epilogue are emitted by JIT and are implicit in the
+ interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
+ them across the calls as defined by calling convention.
+
+ For example the following program is invalid::
+
+ bpf_mov R1, 1
+ bpf_call foo
+ bpf_mov R0, R1
+ bpf_exit
+
+ After the call the registers R1-R5 contain junk values and cannot be read.
+ An in-kernel verifier.rst is used to validate eBPF programs.
+
+Also in the new design, eBPF is limited to 4096 insns, which means that any
+program will terminate quickly and will only call a fixed number of kernel
+functions. Original BPF and eBPF are two operand instructions,
+which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
+
+The input context pointer for invoking the interpreter function is generic,
+its content is defined by a specific use case. For seccomp register R1 points
+to seccomp_data, for converted BPF filters R1 points to a skb.
+
+A program, that is translated internally consists of the following elements::
+
+ op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32
+
+So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field
+has room for new instructions. Some of them may use 16/24/32 byte encoding. New
+instructions must be multiple of 8 bytes to preserve backward compatibility.
+
+eBPF is a general purpose RISC instruction set. Not every register and
+every instruction are used during translation from original BPF to eBPF.
+For example, socket filters are not using ``exclusive add`` instruction, but
+tracing filters may do to maintain counters of events, for example. Register R9
+is not used by socket filters either, but more complex filters may be running
+out of registers and would have to resort to spill/fill to stack.
+
+eBPF can be used as a generic assembler for last step performance
+optimizations, socket filters and seccomp are using it as assembler. Tracing
+filters may use it as assembler to generate code from kernel. In kernel usage
+may not be bounded by security considerations, since generated eBPF code
+may be optimizing internal code path and not being exposed to the user space.
+Safety of eBPF can come from the verifier.rst. In such use cases as
+described, it may be used as safe instruction set.
+
+Just like the original BPF, eBPF runs within a controlled environment,
+is deterministic and the kernel can easily prove that. The safety of the program
+can be determined in two steps: first step does depth-first-search to disallow
+loops and other CFG validation; second step starts from the first insn and
+descends all possible paths. It simulates execution of every insn and observes
+the state change of registers and stack.
+
+opcode encoding
+===============
+
+eBPF is reusing most of the opcode encoding from classic to simplify conversion
+of classic BPF to eBPF.
+
+For arithmetic and jump instructions the 8-bit 'code' field is divided into three
+parts::
+
+ +----------------+--------+--------------------+
+ | 4 bits | 1 bit | 3 bits |
+ | operation code | source | instruction class |
+ +----------------+--------+--------------------+
+ (MSB) (LSB)
+
+Three LSB bits store instruction class which is one of:
+
+ =================== ===============
+ Classic BPF classes eBPF classes
+ =================== ===============
+ BPF_LD 0x00 BPF_LD 0x00
+ BPF_LDX 0x01 BPF_LDX 0x01
+ BPF_ST 0x02 BPF_ST 0x02
+ BPF_STX 0x03 BPF_STX 0x03
+ BPF_ALU 0x04 BPF_ALU 0x04
+ BPF_JMP 0x05 BPF_JMP 0x05
+ BPF_RET 0x06 BPF_JMP32 0x06
+ BPF_MISC 0x07 BPF_ALU64 0x07
+ =================== ===============
+
+The 4th bit encodes the source operand ...
+
+ ::
+
+ BPF_K 0x00
+ BPF_X 0x08
+
+ * in classic BPF, this means::
+
+ BPF_SRC(code) == BPF_X - use register X as source operand
+ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+ * in eBPF, this means::
+
+ BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
+ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+... and four MSB bits store operation code.
+
+If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of::
+
+ BPF_ADD 0x00
+ BPF_SUB 0x10
+ BPF_MUL 0x20
+ BPF_DIV 0x30
+ BPF_OR 0x40
+ BPF_AND 0x50
+ BPF_LSH 0x60
+ BPF_RSH 0x70
+ BPF_NEG 0x80
+ BPF_MOD 0x90
+ BPF_XOR 0xa0
+ BPF_MOV 0xb0 /* eBPF only: mov reg to reg */
+ BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
+ BPF_END 0xd0 /* eBPF only: endianness conversion */
+
+If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of::
+
+ BPF_JA 0x00 /* BPF_JMP only */
+ BPF_JEQ 0x10
+ BPF_JGT 0x20
+ BPF_JGE 0x30
+ BPF_JSET 0x40
+ BPF_JNE 0x50 /* eBPF only: jump != */
+ BPF_JSGT 0x60 /* eBPF only: signed '>' */
+ BPF_JSGE 0x70 /* eBPF only: signed '>=' */
+ BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */
+ BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */
+ BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
+ BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
+ BPF_JSLT 0xc0 /* eBPF only: signed '<' */
+ BPF_JSLE 0xd0 /* eBPF only: signed '<=' */
+
+So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
+and eBPF. There are only two registers in classic BPF, so it means A += X.
+In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
+BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
+src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
+
+Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
+eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
+BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
+exactly the same operations as BPF_ALU, but with 64-bit wide operands
+instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
+dst_reg = dst_reg + src_reg
+
+Classic BPF wastes the whole BPF_RET class to represent a single ``ret``
+operation. Classic BPF_RET | BPF_K means copy imm32 into return register
+and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
+in eBPF means function exit only. The eBPF program needs to store return
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
+BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
+operands for the comparisons instead.
+
+For load and store instructions the 8-bit 'code' field is divided as::
+
+ +--------+--------+-------------------+
+ | 3 bits | 2 bits | 3 bits |
+ | mode | size | instruction class |
+ +--------+--------+-------------------+
+ (MSB) (LSB)
+
+Size modifier is one of ...
+
+::
+
+ BPF_W 0x00 /* word */
+ BPF_H 0x08 /* half word */
+ BPF_B 0x10 /* byte */
+ BPF_DW 0x18 /* eBPF only, double word */
+
+... which encodes size of load/store operation::
+
+ B - 1 byte
+ H - 2 byte
+ W - 4 byte
+ DW - 8 byte (eBPF only)
+
+Mode modifier is one of::
+
+ BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+ BPF_ABS 0x20
+ BPF_IND 0x40
+ BPF_MEM 0x60
+ BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */
+ BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */
+ BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
new file mode 100644
index 000000000000..b5d47a04da5d
--- /dev/null
+++ b/Documentation/bpf/cpumasks.rst
@@ -0,0 +1,384 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _cpumasks-header-label:
+
+==================
+BPF cpumask kfuncs
+==================
+
+1. Introduction
+===============
+
+``struct cpumask`` is a bitmap data structure in the kernel whose indices
+reflect the CPUs on the system. Commonly, cpumasks are used to track which CPUs
+a task is affinitized to, but they can also be used to e.g. track which cores
+are associated with a scheduling domain, which cores on a machine are idle,
+etc.
+
+BPF provides programs with a set of :ref:`kfuncs-header-label` that can be
+used to allocate, mutate, query, and free cpumasks.
+
+2. BPF cpumask objects
+======================
+
+There are two different types of cpumasks that can be used by BPF programs.
+
+2.1 ``struct bpf_cpumask *``
+----------------------------
+
+``struct bpf_cpumask *`` is a cpumask that is allocated by BPF, on behalf of a
+BPF program, and whose lifecycle is entirely controlled by BPF. These cpumasks
+are RCU-protected, can be mutated, can be used as kptrs, and can be safely cast
+to a ``struct cpumask *``.
+
+2.1.1 ``struct bpf_cpumask *`` lifecycle
+----------------------------------------
+
+A ``struct bpf_cpumask *`` is allocated, acquired, and released, using the
+following functions:
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_create
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_acquire
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_release
+
+For example:
+
+.. code-block:: c
+
+ struct cpumask_map_value {
+ struct bpf_cpumask __kptr * cpumask;
+ };
+
+ struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct cpumask_map_value);
+ __uint(max_entries, 65536);
+ } cpumask_map SEC(".maps");
+
+ static int cpumask_map_insert(struct bpf_cpumask *mask, u32 pid)
+ {
+ struct cpumask_map_value local, *v;
+ long status;
+ struct bpf_cpumask *old;
+ u32 key = pid;
+
+ local.cpumask = NULL;
+ status = bpf_map_update_elem(&cpumask_map, &key, &local, 0);
+ if (status) {
+ bpf_cpumask_release(mask);
+ return status;
+ }
+
+ v = bpf_map_lookup_elem(&cpumask_map, &key);
+ if (!v) {
+ bpf_cpumask_release(mask);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->cpumask, mask);
+ if (old)
+ bpf_cpumask_release(old);
+
+ return 0;
+ }
+
+ /**
+ * A sample tracepoint showing how a task's cpumask can be queried and
+ * recorded as a kptr.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(record_task_cpumask, struct task_struct *task, u64 clone_flags)
+ {
+ struct bpf_cpumask *cpumask;
+ int ret;
+
+ cpumask = bpf_cpumask_create();
+ if (!cpumask)
+ return -ENOMEM;
+
+ if (!bpf_cpumask_full(task->cpus_ptr))
+ bpf_printk("task %s has CPU affinity", task->comm);
+
+ bpf_cpumask_copy(cpumask, task->cpus_ptr);
+ return cpumask_map_insert(cpumask, task->pid);
+ }
+
+----
+
+2.1.1 ``struct bpf_cpumask *`` as kptrs
+---------------------------------------
+
+As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
+also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
+a map, the reference can be removed from the map with bpf_kptr_xchg(), or
+opportunistically acquired using RCU:
+
+.. code-block:: c
+
+ /* struct containing the struct bpf_cpumask kptr which is stored in the map. */
+ struct cpumasks_kfunc_map_value {
+ struct bpf_cpumask __kptr * bpf_cpumask;
+ };
+
+ /* The map containing struct cpumasks_kfunc_map_value entries. */
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct cpumasks_kfunc_map_value);
+ __uint(max_entries, 1);
+ } cpumasks_kfunc_map SEC(".maps");
+
+ /* ... */
+
+ /**
+ * A simple example tracepoint program showing how a
+ * struct bpf_cpumask * kptr that is stored in a map can
+ * be passed to kfuncs using RCU protection.
+ */
+ SEC("tp_btf/cgroup_mkdir")
+ int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
+ {
+ struct bpf_cpumask *kptr;
+ struct cpumasks_kfunc_map_value *v;
+ u32 key = 0;
+
+ /* Assume a bpf_cpumask * kptr was previously stored in the map. */
+ v = bpf_map_lookup_elem(&cpumasks_kfunc_map, &key);
+ if (!v)
+ return -ENOENT;
+
+ bpf_rcu_read_lock();
+ /* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
+ kptr = v->cpumask;
+ if (!kptr) {
+ /* If no bpf_cpumask was present in the map, it's because
+ * we're racing with another CPU that removed it with
+ * bpf_kptr_xchg() between the bpf_map_lookup_elem()
+ * above, and our load of the pointer from the map.
+ */
+ bpf_rcu_read_unlock();
+ return -EBUSY;
+ }
+
+ bpf_cpumask_setall(kptr);
+ bpf_rcu_read_unlock();
+
+ return 0;
+ }
+
+----
+
+2.2 ``struct cpumask``
+----------------------
+
+``struct cpumask`` is the object that actually contains the cpumask bitmap
+being queried, mutated, etc. A ``struct bpf_cpumask`` wraps a ``struct
+cpumask``, which is why it's safe to cast it as such (note however that it is
+**not** safe to cast a ``struct cpumask *`` to a ``struct bpf_cpumask *``, and
+the verifier will reject any program that tries to do so).
+
+As we'll see below, any kfunc that mutates its cpumask argument will take a
+``struct bpf_cpumask *`` as that argument. Any argument that simply queries the
+cpumask will instead take a ``struct cpumask *``.
+
+3. cpumask kfuncs
+=================
+
+Above, we described the kfuncs that can be used to allocate, acquire, release,
+etc a ``struct bpf_cpumask *``. This section of the document will describe the
+kfuncs for mutating and querying cpumasks.
+
+3.1 Mutating cpumasks
+---------------------
+
+Some cpumask kfuncs are "read-only" in that they don't mutate any of their
+arguments, whereas others mutate at least one argument (which means that the
+argument must be a ``struct bpf_cpumask *``, as described above).
+
+This section will describe all of the cpumask kfuncs which mutate at least one
+argument. :ref:`cpumasks-querying-label` below describes the read-only kfuncs.
+
+3.1.1 Setting and clearing CPUs
+-------------------------------
+
+bpf_cpumask_set_cpu() and bpf_cpumask_clear_cpu() can be used to set and clear
+a CPU in a ``struct bpf_cpumask`` respectively:
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_set_cpu bpf_cpumask_clear_cpu
+
+These kfuncs are pretty straightforward, and can be used, for example, as
+follows:
+
+.. code-block:: c
+
+ /**
+ * A sample tracepoint showing how a cpumask can be queried.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
+ {
+ struct bpf_cpumask *cpumask;
+
+ cpumask = bpf_cpumask_create();
+ if (!cpumask)
+ return -ENOMEM;
+
+ bpf_cpumask_set_cpu(0, cpumask);
+ if (!bpf_cpumask_test_cpu(0, cast(cpumask)))
+ /* Should never happen. */
+ goto release_exit;
+
+ bpf_cpumask_clear_cpu(0, cpumask);
+ if (bpf_cpumask_test_cpu(0, cast(cpumask)))
+ /* Should never happen. */
+ goto release_exit;
+
+ /* struct cpumask * pointers such as task->cpus_ptr can also be queried. */
+ if (bpf_cpumask_test_cpu(0, task->cpus_ptr))
+ bpf_printk("task %s can use CPU %d", task->comm, 0);
+
+ release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+ }
+
+----
+
+bpf_cpumask_test_and_set_cpu() and bpf_cpumask_test_and_clear_cpu() are
+complementary kfuncs that allow callers to atomically test and set (or clear)
+CPUs:
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_test_and_set_cpu bpf_cpumask_test_and_clear_cpu
+
+----
+
+We can also set and clear entire ``struct bpf_cpumask *`` objects in one
+operation using bpf_cpumask_setall() and bpf_cpumask_clear():
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_setall bpf_cpumask_clear
+
+3.1.2 Operations between cpumasks
+---------------------------------
+
+In addition to setting and clearing individual CPUs in a single cpumask,
+callers can also perform bitwise operations between multiple cpumasks using
+bpf_cpumask_and(), bpf_cpumask_or(), and bpf_cpumask_xor():
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_and bpf_cpumask_or bpf_cpumask_xor
+
+The following is an example of how they may be used. Note that some of the
+kfuncs shown in this example will be covered in more detail below.
+
+.. code-block:: c
+
+ /**
+ * A sample tracepoint showing how a cpumask can be mutated using
+ bitwise operators (and queried).
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
+ {
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+ mask1 = bpf_cpumask_create();
+ if (!mask1)
+ return -ENOMEM;
+
+ mask2 = bpf_cpumask_create();
+ if (!mask2) {
+ bpf_cpumask_release(mask1);
+ return -ENOMEM;
+ }
+
+ // ...Safely create the other two masks... */
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+ bpf_cpumask_and(dst1, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
+ if (!bpf_cpumask_empty((const struct cpumask *)dst1))
+ /* Should never happen. */
+ goto release_exit;
+
+ bpf_cpumask_or(dst1, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
+ if (!bpf_cpumask_test_cpu(0, (const struct cpumask *)dst1))
+ /* Should never happen. */
+ goto release_exit;
+
+ if (!bpf_cpumask_test_cpu(1, (const struct cpumask *)dst1))
+ /* Should never happen. */
+ goto release_exit;
+
+ bpf_cpumask_xor(dst2, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
+ if (!bpf_cpumask_equal((const struct cpumask *)dst1,
+ (const struct cpumask *)dst2))
+ /* Should never happen. */
+ goto release_exit;
+
+ release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+ }
+
+----
+
+The contents of an entire cpumask may be copied to another using
+bpf_cpumask_copy():
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_copy
+
+----
+
+.. _cpumasks-querying-label:
+
+3.2 Querying cpumasks
+---------------------
+
+In addition to the above kfuncs, there is also a set of read-only kfuncs that
+can be used to query the contents of cpumasks.
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and
+ bpf_cpumask_test_cpu bpf_cpumask_weight
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset
+ bpf_cpumask_empty bpf_cpumask_full
+
+.. kernel-doc:: kernel/bpf/cpumask.c
+ :identifiers: bpf_cpumask_any_distribute bpf_cpumask_any_and_distribute
+
+----
+
+Some example usages of these querying kfuncs were shown above. We will not
+replicate those examples here. Note, however, that all of the aforementioned
+kfuncs are tested in `tools/testing/selftests/bpf/progs/cpumask_success.c`_, so
+please take a look there if you're looking for more examples of how they can be
+used.
+
+.. _tools/testing/selftests/bpf/progs/cpumask_success.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/cpumask_success.c
+
+
+4. Adding BPF cpumask kfuncs
+============================
+
+The set of supported BPF cpumask kfuncs are not (yet) a 1-1 match with the
+cpumask operations in include/linux/cpumask.h. Any of those cpumask operations
+could easily be encapsulated in a new kfunc if and when required. If you'd like
+to support a new cpumask operation, please feel free to submit a patch. If you
+do add a new cpumask kfunc, please document it here, and add any relevant
+selftest testcases to the cpumask selftest suite.
diff --git a/Documentation/bpf/drgn.rst b/Documentation/bpf/drgn.rst
new file mode 100644
index 000000000000..41f223c3161e
--- /dev/null
+++ b/Documentation/bpf/drgn.rst
@@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+==============
+BPF drgn tools
+==============
+
+drgn scripts is a convenient and easy to use mechanism to retrieve arbitrary
+kernel data structures. drgn is not relying on kernel UAPI to read the data.
+Instead it's reading directly from ``/proc/kcore`` or vmcore and pretty prints
+the data based on DWARF debug information from vmlinux.
+
+This document describes BPF related drgn tools.
+
+See `drgn/tools`_ for all tools available at the moment and `drgn/doc`_ for
+more details on drgn itself.
+
+bpf_inspect.py
+--------------
+
+Description
+===========
+
+`bpf_inspect.py`_ is a tool intended to inspect BPF programs and maps. It can
+iterate over all programs and maps in the system and print basic information
+about these objects, including id, type and name.
+
+The main use-case `bpf_inspect.py`_ covers is to show BPF programs of types
+``BPF_PROG_TYPE_EXT`` and ``BPF_PROG_TYPE_TRACING`` attached to other BPF
+programs via ``freplace``/``fentry``/``fexit`` mechanisms, since there is no
+user-space API to get this information.
+
+Getting started
+===============
+
+List BPF programs (full names are obtained from BTF)::
+
+ % sudo bpf_inspect.py prog
+ 27: BPF_PROG_TYPE_TRACEPOINT tracepoint__tcp__tcp_send_reset
+ 4632: BPF_PROG_TYPE_CGROUP_SOCK_ADDR tw_ipt_bind
+ 49464: BPF_PROG_TYPE_RAW_TRACEPOINT raw_tracepoint__sched_process_exit
+
+List BPF maps::
+
+ % sudo bpf_inspect.py map
+ 2577: BPF_MAP_TYPE_HASH tw_ipt_vips
+ 4050: BPF_MAP_TYPE_STACK_TRACE stack_traces
+ 4069: BPF_MAP_TYPE_PERCPU_ARRAY ned_dctcp_cntr
+
+Find BPF programs attached to BPF program ``test_pkt_access``::
+
+ % sudo bpf_inspect.py p | grep test_pkt_access
+ 650: BPF_PROG_TYPE_SCHED_CLS test_pkt_access
+ 654: BPF_PROG_TYPE_TRACING test_main linked:[650->25: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access()]
+ 655: BPF_PROG_TYPE_TRACING test_subprog1 linked:[650->29: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog1()]
+ 656: BPF_PROG_TYPE_TRACING test_subprog2 linked:[650->31: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog2()]
+ 657: BPF_PROG_TYPE_TRACING test_subprog3 linked:[650->21: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog3()]
+ 658: BPF_PROG_TYPE_EXT new_get_skb_len linked:[650->16: BPF_TRAMP_REPLACE test_pkt_access->get_skb_len()]
+ 659: BPF_PROG_TYPE_EXT new_get_skb_ifindex linked:[650->23: BPF_TRAMP_REPLACE test_pkt_access->get_skb_ifindex()]
+ 660: BPF_PROG_TYPE_EXT new_get_constant linked:[650->19: BPF_TRAMP_REPLACE test_pkt_access->get_constant()]
+
+It can be seen that there is a program ``test_pkt_access``, id 650 and there
+are multiple other tracing and ext programs attached to functions in
+``test_pkt_access``.
+
+For example the line::
+
+ 658: BPF_PROG_TYPE_EXT new_get_skb_len linked:[650->16: BPF_TRAMP_REPLACE test_pkt_access->get_skb_len()]
+
+, means that BPF program id 658, type ``BPF_PROG_TYPE_EXT``, name
+``new_get_skb_len`` replaces (``BPF_TRAMP_REPLACE``) function ``get_skb_len()``
+that has BTF id 16 in BPF program id 650, name ``test_pkt_access``.
+
+Getting help:
+
+.. code-block:: none
+
+ % sudo bpf_inspect.py
+ usage: bpf_inspect.py [-h] {prog,p,map,m} ...
+
+ drgn script to list BPF programs or maps and their properties
+ unavailable via kernel API.
+
+ See https://github.com/osandov/drgn/ for more details on drgn.
+
+ optional arguments:
+ -h, --help show this help message and exit
+
+ subcommands:
+ {prog,p,map,m}
+ prog (p) list BPF programs
+ map (m) list BPF maps
+
+Customization
+=============
+
+The script is intended to be customized by developers to print relevant
+information about BPF programs, maps and other objects.
+
+For example, to print ``struct bpf_prog_aux`` for BPF program id 53077:
+
+.. code-block:: none
+
+ % git diff
+ diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py
+ index 650e228..aea2357 100755
+ --- a/tools/bpf_inspect.py
+ +++ b/tools/bpf_inspect.py
+ @@ -112,7 +112,9 @@ def list_bpf_progs(args):
+ if linked:
+ linked = f" linked:[{linked}]"
+
+ - print(f"{id_:>6}: {type_:32} {name:32} {linked}")
+ + if id_ == 53077:
+ + print(f"{id_:>6}: {type_:32} {name:32}")
+ + print(f"{bpf_prog.aux}")
+
+
+ def list_bpf_maps(args):
+
+It produces the output::
+
+ % sudo bpf_inspect.py p
+ 53077: BPF_PROG_TYPE_XDP tw_xdp_policer
+ *(struct bpf_prog_aux *)0xffff8893fad4b400 = {
+ .refcnt = (atomic64_t){
+ .counter = (long)58,
+ },
+ .used_map_cnt = (u32)1,
+ .max_ctx_offset = (u32)8,
+ .max_pkt_offset = (u32)15,
+ .max_tp_access = (u32)0,
+ .stack_depth = (u32)8,
+ .id = (u32)53077,
+ .func_cnt = (u32)0,
+ .func_idx = (u32)0,
+ .attach_btf_id = (u32)0,
+ .linked_prog = (struct bpf_prog *)0x0,
+ .verifier_zext = (bool)0,
+ .offload_requested = (bool)0,
+ .attach_btf_trace = (bool)0,
+ .func_proto_unreliable = (bool)0,
+ .trampoline_prog_type = (enum bpf_tramp_prog_type)BPF_TRAMP_FENTRY,
+ .trampoline = (struct bpf_trampoline *)0x0,
+ .tramp_hlist = (struct hlist_node){
+ .next = (struct hlist_node *)0x0,
+ .pprev = (struct hlist_node **)0x0,
+ },
+ .attach_func_proto = (const struct btf_type *)0x0,
+ .attach_func_name = (const char *)0x0,
+ .func = (struct bpf_prog **)0x0,
+ .jit_data = (void *)0x0,
+ .poke_tab = (struct bpf_jit_poke_descriptor *)0x0,
+ .size_poke_tab = (u32)0,
+ .ksym_tnode = (struct latch_tree_node){
+ .node = (struct rb_node [2]){
+ {
+ .__rb_parent_color = (unsigned long)18446612956263126665,
+ .rb_right = (struct rb_node *)0x0,
+ .rb_left = (struct rb_node *)0xffff88a0be3d0088,
+ },
+ {
+ .__rb_parent_color = (unsigned long)18446612956263126689,
+ .rb_right = (struct rb_node *)0x0,
+ .rb_left = (struct rb_node *)0xffff88a0be3d00a0,
+ },
+ },
+ },
+ .ksym_lnode = (struct list_head){
+ .next = (struct list_head *)0xffff88bf481830b8,
+ .prev = (struct list_head *)0xffff888309f536b8,
+ },
+ .ops = (const struct bpf_prog_ops *)xdp_prog_ops+0x0 = 0xffffffff820fa350,
+ .used_maps = (struct bpf_map **)0xffff889ff795de98,
+ .prog = (struct bpf_prog *)0xffffc9000cf2d000,
+ .user = (struct user_struct *)root_user+0x0 = 0xffffffff82444820,
+ .load_time = (u64)2408348759285319,
+ .cgroup_storage = (struct bpf_map *[2]){},
+ .name = (char [16])"tw_xdp_policer",
+ .security = (void *)0xffff889ff795d548,
+ .offload = (struct bpf_prog_offload *)0x0,
+ .btf = (struct btf *)0xffff8890ce6d0580,
+ .func_info = (struct bpf_func_info *)0xffff889ff795d240,
+ .func_info_aux = (struct bpf_func_info_aux *)0xffff889ff795de20,
+ .linfo = (struct bpf_line_info *)0xffff888a707afc00,
+ .jited_linfo = (void **)0xffff8893fad48600,
+ .func_info_cnt = (u32)1,
+ .nr_linfo = (u32)37,
+ .linfo_idx = (u32)0,
+ .num_exentries = (u32)0,
+ .extable = (struct exception_table_entry *)0xffffffffa032d950,
+ .stats = (struct bpf_prog_stats *)0x603fe3a1f6d0,
+ .work = (struct work_struct){
+ .data = (atomic_long_t){
+ .counter = (long)0,
+ },
+ .entry = (struct list_head){
+ .next = (struct list_head *)0x0,
+ .prev = (struct list_head *)0x0,
+ },
+ .func = (work_func_t)0x0,
+ },
+ .rcu = (struct callback_head){
+ .next = (struct callback_head *)0x0,
+ .func = (void (*)(struct callback_head *))0x0,
+ },
+ }
+
+
+.. Links
+.. _drgn/doc: https://drgn.readthedocs.io/en/latest/
+.. _drgn/tools: https://github.com/osandov/drgn/tree/master/tools
+.. _bpf_inspect.py:
+ https://github.com/osandov/drgn/blob/master/tools/bpf_inspect.py
diff --git a/Documentation/bpf/faq.rst b/Documentation/bpf/faq.rst
new file mode 100644
index 000000000000..a622602ce9ad
--- /dev/null
+++ b/Documentation/bpf/faq.rst
@@ -0,0 +1,11 @@
+================================
+Frequently asked questions (FAQ)
+================================
+
+Two sets of Questions and Answers (Q&A) are maintained.
+
+.. toctree::
+ :maxdepth: 1
+
+ bpf_design_QA
+ bpf_devel_QA
diff --git a/Documentation/bpf/fs_kfuncs.rst b/Documentation/bpf/fs_kfuncs.rst
new file mode 100644
index 000000000000..8762c3233a3d
--- /dev/null
+++ b/Documentation/bpf/fs_kfuncs.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _fs_kfuncs-header-label:
+
+=====================
+BPF filesystem kfuncs
+=====================
+
+BPF LSM programs need to access filesystem data from LSM hooks. The following
+BPF kfuncs can be used to get these data.
+
+ * ``bpf_get_file_xattr()``
+
+ * ``bpf_get_fsverity_digest()``
+
+To avoid recursions, these kfuncs follow the following rules:
+
+1. These kfuncs are only permitted from BPF LSM function.
+2. These kfuncs should not call into other LSM hooks, i.e. security_*(). For
+ example, ``bpf_get_file_xattr()`` does not use ``vfs_getxattr()``, because
+ the latter calls LSM hook ``security_inode_getxattr``.
diff --git a/Documentation/bpf/graph_ds_impl.rst b/Documentation/bpf/graph_ds_impl.rst
new file mode 100644
index 000000000000..06288cc719b3
--- /dev/null
+++ b/Documentation/bpf/graph_ds_impl.rst
@@ -0,0 +1,267 @@
+=========================
+BPF Graph Data Structures
+=========================
+
+This document describes implementation details of new-style "graph" data
+structures (linked_list, rbtree), with particular focus on the verifier's
+implementation of semantics specific to those data structures.
+
+Although no specific verifier code is referred to in this document, the document
+assumes that the reader has general knowledge of BPF verifier internals, BPF
+maps, and BPF program writing.
+
+Note that the intent of this document is to describe the current state of
+these graph data structures. **No guarantees** of stability for either
+semantics or APIs are made or implied here.
+
+.. contents::
+ :local:
+ :depth: 2
+
+Introduction
+------------
+
+The BPF map API has historically been the main way to expose data structures
+of various types for use within BPF programs. Some data structures fit naturally
+with the map API (HASH, ARRAY), others less so. Consequently, programs
+interacting with the latter group of data structures can be hard to parse
+for kernel programmers without previous BPF experience.
+
+Luckily, some restrictions which necessitated the use of BPF map semantics are
+no longer relevant. With the introduction of kfuncs, kptrs, and the any-context
+BPF allocator, it is now possible to implement BPF data structures whose API
+and semantics more closely match those exposed to the rest of the kernel.
+
+Two such data structures - linked_list and rbtree - have many verification
+details in common. Because both have "root"s ("head" for linked_list) and
+"node"s, the verifier code and this document refer to common functionality
+as "graph_api", "graph_root", "graph_node", etc.
+
+Unless otherwise stated, examples and semantics below apply to both graph data
+structures.
+
+Unstable API
+------------
+
+Data structures implemented using the BPF map API have historically used BPF
+helper functions - either standard map API helpers like ``bpf_map_update_elem``
+or map-specific helpers. The new-style graph data structures instead use kfuncs
+to define their manipulation helpers. Because there are no stability guarantees
+for kfuncs, the API and semantics for these data structures can be evolved in
+a way that breaks backwards compatibility if necessary.
+
+Root and node types for the new data structures are opaquely defined in the
+``uapi/linux/bpf.h`` header.
+
+Locking
+-------
+
+The new-style data structures are intrusive and are defined similarly to their
+vanilla kernel counterparts:
+
+.. code-block:: c
+
+ struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ };
+
+ struct bpf_spin_lock glock;
+ struct bpf_rb_root groot __contains(node_data, node);
+
+The "root" type for both linked_list and rbtree expects to be in a map_value
+which also contains a ``bpf_spin_lock`` - in the above example both global
+variables are placed in a single-value arraymap. The verifier considers this
+spin_lock to be associated with the ``bpf_rb_root`` by virtue of both being in
+the same map_value and will enforce that the correct lock is held when
+verifying BPF programs that manipulate the tree. Since this lock checking
+happens at verification time, there is no runtime penalty.
+
+Non-owning references
+---------------------
+
+**Motivation**
+
+Consider the following BPF code:
+
+.. code-block:: c
+
+ struct node_data *n = bpf_obj_new(typeof(*n)); /* ACQUIRED */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* PASSED */
+
+ bpf_spin_unlock(&lock);
+
+From the verifier's perspective, the pointer ``n`` returned from ``bpf_obj_new``
+has type ``PTR_TO_BTF_ID | MEM_ALLOC``, with a ``btf_id`` of
+``struct node_data`` and a nonzero ``ref_obj_id``. Because it holds ``n``, the
+program has ownership of the pointee's (object pointed to by ``n``) lifetime.
+The BPF program must pass off ownership before exiting - either via
+``bpf_obj_drop``, which ``free``'s the object, or by adding it to ``tree`` with
+``bpf_rbtree_add``.
+
+(``ACQUIRED`` and ``PASSED`` comments in the example denote statements where
+"ownership is acquired" and "ownership is passed", respectively)
+
+What should the verifier do with ``n`` after ownership is passed off? If the
+object was ``free``'d with ``bpf_obj_drop`` the answer is obvious: the verifier
+should reject programs which attempt to access ``n`` after ``bpf_obj_drop`` as
+the object is no longer valid. The underlying memory may have been reused for
+some other allocation, unmapped, etc.
+
+When ownership is passed to ``tree`` via ``bpf_rbtree_add`` the answer is less
+obvious. The verifier could enforce the same semantics as for ``bpf_obj_drop``,
+but that would result in programs with useful, common coding patterns being
+rejected, e.g.:
+
+.. code-block:: c
+
+ int x;
+ struct node_data *n = bpf_obj_new(typeof(*n)); /* ACQUIRED */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* PASSED */
+ x = n->data;
+ n->data = 42;
+
+ bpf_spin_unlock(&lock);
+
+Both the read from and write to ``n->data`` would be rejected. The verifier
+can do better, though, by taking advantage of two details:
+
+ * Graph data structure APIs can only be used when the ``bpf_spin_lock``
+ associated with the graph root is held
+
+ * Both graph data structures have pointer stability
+
+ * Because graph nodes are allocated with ``bpf_obj_new`` and
+ adding / removing from the root involves fiddling with the
+ ``bpf_{list,rb}_node`` field of the node struct, a graph node will
+ remain at the same address after either operation.
+
+Because the associated ``bpf_spin_lock`` must be held by any program adding
+or removing, if we're in the critical section bounded by that lock, we know
+that no other program can add or remove until the end of the critical section.
+This combined with pointer stability means that, until the critical section
+ends, we can safely access the graph node through ``n`` even after it was used
+to pass ownership.
+
+The verifier considers such a reference a *non-owning reference*. The ref
+returned by ``bpf_obj_new`` is accordingly considered an *owning reference*.
+Both terms currently only have meaning in the context of graph nodes and API.
+
+**Details**
+
+Let's enumerate the properties of both types of references.
+
+*owning reference*
+
+ * This reference controls the lifetime of the pointee
+
+ * Ownership of pointee must be 'released' by passing it to some graph API
+ kfunc, or via ``bpf_obj_drop``, which ``free``'s the pointee
+
+ * If not released before program ends, verifier considers program invalid
+
+ * Access to the pointee's memory will not page fault
+
+*non-owning reference*
+
+ * This reference does not own the pointee
+
+ * It cannot be used to add the graph node to a graph root, nor ``free``'d via
+ ``bpf_obj_drop``
+
+ * No explicit control of lifetime, but can infer valid lifetime based on
+ non-owning ref existence (see explanation below)
+
+ * Access to the pointee's memory will not page fault
+
+From verifier's perspective non-owning references can only exist
+between spin_lock and spin_unlock. Why? After spin_unlock another program
+can do arbitrary operations on the data structure like removing and ``free``-ing
+via bpf_obj_drop. A non-owning ref to some chunk of memory that was remove'd,
+``free``'d, and reused via bpf_obj_new would point to an entirely different thing.
+Or the memory could go away.
+
+To prevent this logic violation all non-owning references are invalidated by the
+verifier after a critical section ends. This is necessary to ensure the "will
+not page fault" property of non-owning references. So if the verifier hasn't
+invalidated a non-owning ref, accessing it will not page fault.
+
+Currently ``bpf_obj_drop`` is not allowed in the critical section, so
+if there's a valid non-owning ref, we must be in a critical section, and can
+conclude that the ref's memory hasn't been dropped-and- ``free``'d or
+dropped-and-reused.
+
+Any reference to a node that is in an rbtree _must_ be non-owning, since
+the tree has control of the pointee's lifetime. Similarly, any ref to a node
+that isn't in rbtree _must_ be owning. This results in a nice property:
+graph API add / remove implementations don't need to check if a node
+has already been added (or already removed), as the ownership model
+allows the verifier to prevent such a state from being valid by simply checking
+types.
+
+However, pointer aliasing poses an issue for the above "nice property".
+Consider the following example:
+
+.. code-block:: c
+
+ struct node_data *n, *m, *o, *p;
+ n = bpf_obj_new(typeof(*n)); /* 1 */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* 2 */
+ m = bpf_rbtree_first(&tree); /* 3 */
+
+ o = bpf_rbtree_remove(&tree, n); /* 4 */
+ p = bpf_rbtree_remove(&tree, m); /* 5 */
+
+ bpf_spin_unlock(&lock);
+
+ bpf_obj_drop(o);
+ bpf_obj_drop(p); /* 6 */
+
+Assume the tree is empty before this program runs. If we track verifier state
+changes here using numbers in above comments:
+
+ 1) n is an owning reference
+
+ 2) n is a non-owning reference, it's been added to the tree
+
+ 3) n and m are non-owning references, they both point to the same node
+
+ 4) o is an owning reference, n and m non-owning, all point to same node
+
+ 5) o and p are owning, n and m non-owning, all point to the same node
+
+ 6) a double-free has occurred, since o and p point to same node and o was
+ ``free``'d in previous statement
+
+States 4 and 5 violate our "nice property", as there are non-owning refs to
+a node which is not in an rbtree. Statement 5 will try to remove a node which
+has already been removed as a result of this violation. State 6 is a dangerous
+double-free.
+
+At a minimum we should prevent state 6 from being possible. If we can't also
+prevent state 5 then we must abandon our "nice property" and check whether a
+node has already been removed at runtime.
+
+We prevent both by generalizing the "invalidate non-owning references" behavior
+of ``bpf_spin_unlock`` and doing similar invalidation after
+``bpf_rbtree_remove``. The logic here being that any graph API kfunc which:
+
+ * takes an arbitrary node argument
+
+ * removes it from the data structure
+
+ * returns an owning reference to the removed node
+
+May result in a state where some other non-owning reference points to the same
+node. So ``remove``-type kfuncs must be considered a non-owning reference
+invalidation point as well.
diff --git a/Documentation/bpf/helpers.rst b/Documentation/bpf/helpers.rst
new file mode 100644
index 000000000000..c4ee0cc20dec
--- /dev/null
+++ b/Documentation/bpf/helpers.rst
@@ -0,0 +1,7 @@
+Helper functions
+================
+
+* `bpf-helpers(7)`_ maintains a list of helpers available to eBPF programs.
+
+.. Links
+.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html \ No newline at end of file
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 00a8450a602f..0bb5cb8157f1 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -5,32 +5,41 @@ BPF Documentation
This directory contains documentation for the BPF (Berkeley Packet
Filter) facility, with a focus on the extended BPF version (eBPF).
-This kernel side documentation is still work in progress. The main
-textual documentation is (for historical reasons) described in
-`Documentation/networking/filter.txt`_, which describe both classical
-and extended BPF instruction-set.
+This kernel side documentation is still work in progress.
The Cilium project also maintains a `BPF and XDP Reference Guide`_
that goes into great technical depth about the BPF Architecture.
-The primary info for the bpf syscall is available in the `man-pages`_
-for `bpf(2)`_.
-
-
-
-Frequently asked questions (FAQ)
-================================
-
-Two sets of Questions and Answers (Q&A) are maintained.
-
.. toctree::
:maxdepth: 1
- bpf_design_QA
- bpf_devel_QA
-
+ verifier
+ libbpf/index
+ standardization/index
+ btf
+ faq
+ syscall_api
+ helpers
+ kfuncs
+ cpumasks
+ fs_kfuncs
+ programs
+ maps
+ bpf_prog_run
+ classic_vs_extended.rst
+ bpf_iterators
+ bpf_licensing
+ test_debug
+ clang-notes
+ linux-notes
+ other
+ redirect
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
.. Links:
-.. _Documentation/networking/filter.txt: ../networking/filter.txt
-.. _man-pages: https://www.kernel.org/doc/man-pages/
-.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
-.. _BPF and XDP Reference Guide: http://cilium.readthedocs.io/en/latest/bpf/
+.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
new file mode 100644
index 000000000000..e38941370b90
--- /dev/null
+++ b/Documentation/bpf/kfuncs.rst
@@ -0,0 +1,712 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _kfuncs-header-label:
+
+=============================
+BPF Kernel Functions (kfuncs)
+=============================
+
+1. Introduction
+===============
+
+BPF Kernel Functions or more commonly known as kfuncs are functions in the Linux
+kernel which are exposed for use by BPF programs. Unlike normal BPF helpers,
+kfuncs do not have a stable interface and can change from one kernel release to
+another. Hence, BPF programs need to be updated in response to changes in the
+kernel. See :ref:`BPF_kfunc_lifecycle_expectations` for more information.
+
+2. Defining a kfunc
+===================
+
+There are two ways to expose a kernel function to BPF programs, either make an
+existing function in the kernel visible, or add a new wrapper for BPF. In both
+cases, care must be taken that BPF program can only call such function in a
+valid context. To enforce this, visibility of a kfunc can be per program type.
+
+If you are not creating a BPF wrapper for existing kernel function, skip ahead
+to :ref:`BPF_kfunc_nodef`.
+
+2.1 Creating a wrapper kfunc
+----------------------------
+
+When defining a wrapper kfunc, the wrapper function should have extern linkage.
+This prevents the compiler from optimizing away dead code, as this wrapper kfunc
+is not invoked anywhere in the kernel itself. It is not necessary to provide a
+prototype in a header for the wrapper kfunc.
+
+An example is given below::
+
+ /* Disables missing prototype warnings */
+ __bpf_kfunc_start_defs();
+
+ __bpf_kfunc struct task_struct *bpf_find_get_task_by_vpid(pid_t nr)
+ {
+ return find_get_task_by_vpid(nr);
+ }
+
+ __bpf_kfunc_end_defs();
+
+A wrapper kfunc is often needed when we need to annotate parameters of the
+kfunc. Otherwise one may directly make the kfunc visible to the BPF program by
+registering it with the BPF subsystem. See :ref:`BPF_kfunc_nodef`.
+
+2.2 Annotating kfunc parameters
+-------------------------------
+
+Similar to BPF helpers, there is sometime need for additional context required
+by the verifier to make the usage of kernel functions safer and more useful.
+Hence, we can annotate a parameter by suffixing the name of the argument of the
+kfunc with a __tag, where tag may be one of the supported annotations.
+
+2.2.1 __sz Annotation
+---------------------
+
+This annotation is used to indicate a memory and size pair in the argument list.
+An example is given below::
+
+ __bpf_kfunc void bpf_memzero(void *mem, int mem__sz)
+ {
+ ...
+ }
+
+Here, the verifier will treat first argument as a PTR_TO_MEM, and second
+argument as its size. By default, without __sz annotation, the size of the type
+of the pointer is used. Without __sz annotation, a kfunc cannot accept a void
+pointer.
+
+2.2.2 __k Annotation
+--------------------
+
+This annotation is only understood for scalar arguments, where it indicates that
+the verifier must check the scalar argument to be a known constant, which does
+not indicate a size parameter, and the value of the constant is relevant to the
+safety of the program.
+
+An example is given below::
+
+ __bpf_kfunc void *bpf_obj_new(u32 local_type_id__k, ...)
+ {
+ ...
+ }
+
+Here, bpf_obj_new uses local_type_id argument to find out the size of that type
+ID in program's BTF and return a sized pointer to it. Each type ID will have a
+distinct size, hence it is crucial to treat each such call as distinct when
+values don't match during verifier state pruning checks.
+
+Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
+size parameter, and the value of the constant matters for program safety, __k
+suffix should be used.
+
+2.2.3 __uninit Annotation
+-------------------------
+
+This annotation is used to indicate that the argument will be treated as
+uninitialized.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
+ {
+ ...
+ }
+
+Here, the dynptr will be treated as an uninitialized dynptr. Without this
+annotation, the verifier will reject the program if the dynptr passed in is
+not initialized.
+
+2.2.4 __opt Annotation
+-------------------------
+
+This annotation is used to indicate that the buffer associated with an __sz or __szk
+argument may be null. If the function is passed a nullptr in place of the buffer,
+the verifier will not check that length is appropriate for the buffer. The kfunc is
+responsible for checking if this buffer is null before using it.
+
+An example is given below::
+
+ __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+ {
+ ...
+ }
+
+Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
+Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
+annotation, the verifier will reject the program if a null pointer is passed in with
+a nonzero size.
+
+2.2.5 __str Annotation
+----------------------------
+This annotation is used to indicate that the argument is a constant string.
+
+An example is given below::
+
+ __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...)
+ {
+ ...
+ }
+
+In this case, ``bpf_get_file_xattr()`` can be called as::
+
+ bpf_get_file_xattr(..., "xattr_name", ...);
+
+Or::
+
+ const char name[] = "xattr_name"; /* This need to be global */
+ int BPF_PROG(...)
+ {
+ ...
+ bpf_get_file_xattr(..., name, ...);
+ ...
+ }
+
+2.2.6 __prog Annotation
+---------------------------
+This annotation is used to indicate that the argument needs to be fixed up to
+the bpf_prog_aux of the caller BPF program. Any value passed into this argument
+is ignored, and rewritten by the verifier.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags,
+ void *aux__prog)
+ {
+ struct bpf_prog_aux *aux = aux__prog;
+ ...
+ }
+
+.. _BPF_kfunc_nodef:
+
+2.3 Using an existing kernel function
+-------------------------------------
+
+When an existing function in the kernel is fit for consumption by BPF programs,
+it can be directly registered with the BPF subsystem. However, care must still
+be taken to review the context in which it will be invoked by the BPF program
+and whether it is safe to do so.
+
+2.4 Annotating kfuncs
+---------------------
+
+In addition to kfuncs' arguments, verifier may need more information about the
+type of kfunc(s) being registered with the BPF subsystem. To do so, we define
+flags on a set of kfuncs as follows::
+
+ BTF_KFUNCS_START(bpf_task_set)
+ BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
+ BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
+ BTF_KFUNCS_END(bpf_task_set)
+
+This set encodes the BTF ID of each kfunc listed above, and encodes the flags
+along with it. Ofcourse, it is also allowed to specify no flags.
+
+kfunc definitions should also always be annotated with the ``__bpf_kfunc``
+macro. This prevents issues such as the compiler inlining the kfunc if it's a
+static kernel function, or the function being elided in an LTO build as it's
+not used in the rest of the kernel. Developers should not manually add
+annotations to their kfunc to prevent these issues. If an annotation is
+required to prevent such an issue with your kfunc, it is a bug and should be
+added to the definition of the macro so that other kfuncs are similarly
+protected. An example is given below::
+
+ __bpf_kfunc struct task_struct *bpf_get_task_pid(s32 pid)
+ {
+ ...
+ }
+
+2.4.1 KF_ACQUIRE flag
+---------------------
+
+The KF_ACQUIRE flag is used to indicate that the kfunc returns a pointer to a
+refcounted object. The verifier will then ensure that the pointer to the object
+is eventually released using a release kfunc, or transferred to a map using a
+referenced kptr (by invoking bpf_kptr_xchg). If not, the verifier fails the
+loading of the BPF program until no lingering references remain in all possible
+explored states of the program.
+
+2.4.2 KF_RET_NULL flag
+----------------------
+
+The KF_RET_NULL flag is used to indicate that the pointer returned by the kfunc
+may be NULL. Hence, it forces the user to do a NULL check on the pointer
+returned from the kfunc before making use of it (dereferencing or passing to
+another helper). This flag is often used in pairing with KF_ACQUIRE flag, but
+both are orthogonal to each other.
+
+2.4.3 KF_RELEASE flag
+---------------------
+
+The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
+passed in to it. There can be only one referenced pointer that can be passed
+in. All copies of the pointer being released are invalidated as a result of
+invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
+protection afforded by the KF_TRUSTED_ARGS flag described below.
+
+2.4.4 KF_TRUSTED_ARGS flag
+--------------------------
+
+The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
+indicates that the all pointer arguments are valid, and that all pointers to
+BTF objects have been passed in their unmodified form (that is, at a zero
+offset, and without having been obtained from walking another pointer, with one
+exception described below).
+
+There are two types of pointers to kernel objects which are considered "valid":
+
+1. Pointers which are passed as tracepoint or struct_ops callback arguments.
+2. Pointers which were returned from a KF_ACQUIRE kfunc.
+
+Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
+KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
+
+The definition of "valid" pointers is subject to change at any time, and has
+absolutely no ABI stability guarantees.
+
+As mentioned above, a nested pointer obtained from walking a trusted pointer is
+no longer trusted, with one exception. If a struct type has a field that is
+guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long
+as its parent pointer is valid, the following macros can be used to express
+that to the verifier:
+
+* ``BTF_TYPE_SAFE_TRUSTED``
+* ``BTF_TYPE_SAFE_RCU``
+* ``BTF_TYPE_SAFE_RCU_OR_NULL``
+
+For example,
+
+.. code-block:: c
+
+ BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+ };
+
+or
+
+.. code-block:: c
+
+ BTF_TYPE_SAFE_RCU(struct task_struct) {
+ const cpumask_t *cpus_ptr;
+ struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
+ };
+
+In other words, you must:
+
+1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro.
+
+2. Specify the type and name of the valid nested field. This field must match
+ the field in the original type definition exactly.
+
+A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so
+that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)``
+is emitted in the ``type_is_trusted()`` function as follows:
+
+.. code-block:: c
+
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+
+2.4.5 KF_SLEEPABLE flag
+-----------------------
+
+The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only
+be called by sleepable BPF programs (BPF_F_SLEEPABLE).
+
+2.4.6 KF_DESTRUCTIVE flag
+--------------------------
+
+The KF_DESTRUCTIVE flag is used to indicate functions calling which is
+destructive to the system. For example such a call can result in system
+rebooting or panicking. Due to this additional restrictions apply to these
+calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
+added later.
+
+2.4.7 KF_RCU flag
+-----------------
+
+The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
+KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
+that the objects are valid and there is no use-after-free. The pointers are not
+NULL, but the object's refcount could have reached zero. The kfuncs need to
+consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
+pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
+also be KF_RET_NULL.
+
+2.4.8 KF_RCU_PROTECTED flag
+---------------------------
+
+The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
+an RCU critical section. This is assumed by default in non-sleepable programs,
+and must be explicitly ensured by calling ``bpf_rcu_read_lock`` for sleepable
+ones.
+
+If the kfunc returns a pointer value, this flag also enforces that the returned
+pointer is RCU protected, and can only be used while the RCU critical section is
+active.
+
+The flag is distinct from the ``KF_RCU`` flag, which only ensures that its
+arguments are at least RCU protected pointers. This may transitively imply that
+RCU protection is ensured, but it does not work in cases of kfuncs which require
+RCU protection but do not take RCU protected arguments.
+
+.. _KF_deprecated_flag:
+
+2.4.9 KF_DEPRECATED flag
+------------------------
+
+The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
+changed or removed in a subsequent kernel release. A kfunc that is
+marked with KF_DEPRECATED should also have any relevant information
+captured in its kernel doc. Such information typically includes the
+kfunc's expected remaining lifespan, a recommendation for new
+functionality that can replace it if any is available, and possibly a
+rationale for why it is being removed.
+
+Note that while on some occasions, a KF_DEPRECATED kfunc may continue to be
+supported and have its KF_DEPRECATED flag removed, it is likely to be far more
+difficult to remove a KF_DEPRECATED flag after it's been added than it is to
+prevent it from being added in the first place. As described in
+:ref:`BPF_kfunc_lifecycle_expectations`, users that rely on specific kfuncs are
+encouraged to make their use-cases known as early as possible, and participate
+in upstream discussions regarding whether to keep, change, deprecate, or remove
+those kfuncs if and when such discussions occur.
+
+2.5 Registering the kfuncs
+--------------------------
+
+Once the kfunc is prepared for use, the final step to making it visible is
+registering it with the BPF subsystem. Registration is done per BPF program
+type. An example is shown below::
+
+ BTF_KFUNCS_START(bpf_task_set)
+ BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
+ BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
+ BTF_KFUNCS_END(bpf_task_set)
+
+ static const struct btf_kfunc_id_set bpf_task_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_task_set,
+ };
+
+ static int init_subsystem(void)
+ {
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set);
+ }
+ late_initcall(init_subsystem);
+
+2.6 Specifying no-cast aliases with ___init
+--------------------------------------------
+
+The verifier will always enforce that the BTF type of a pointer passed to a
+kfunc by a BPF program, matches the type of pointer specified in the kfunc
+definition. The verifier, does, however, allow types that are equivalent
+according to the C standard to be passed to the same kfunc arg, even if their
+BTF_IDs differ.
+
+For example, for the following type definition:
+
+.. code-block:: c
+
+ struct bpf_cpumask {
+ cpumask_t cpumask;
+ refcount_t usage;
+ };
+
+The verifier would allow a ``struct bpf_cpumask *`` to be passed to a kfunc
+taking a ``cpumask_t *`` (which is a typedef of ``struct cpumask *``). For
+instance, both ``struct cpumask *`` and ``struct bpf_cpmuask *`` can be passed
+to bpf_cpumask_test_cpu().
+
+In some cases, this type-aliasing behavior is not desired. ``struct
+nf_conn___init`` is one such example:
+
+.. code-block:: c
+
+ struct nf_conn___init {
+ struct nf_conn ct;
+ };
+
+The C standard would consider these types to be equivalent, but it would not
+always be safe to pass either type to a trusted kfunc. ``struct
+nf_conn___init`` represents an allocated ``struct nf_conn`` object that has
+*not yet been initialized*, so it would therefore be unsafe to pass a ``struct
+nf_conn___init *`` to a kfunc that's expecting a fully initialized ``struct
+nf_conn *`` (e.g. ``bpf_ct_change_timeout()``).
+
+In order to accommodate such requirements, the verifier will enforce strict
+PTR_TO_BTF_ID type matching if two types have the exact same name, with one
+being suffixed with ``___init``.
+
+.. _BPF_kfunc_lifecycle_expectations:
+
+3. kfunc lifecycle expectations
+===============================
+
+kfuncs provide a kernel <-> kernel API, and thus are not bound by any of the
+strict stability restrictions associated with kernel <-> user UAPIs. This means
+they can be thought of as similar to EXPORT_SYMBOL_GPL, and can therefore be
+modified or removed by a maintainer of the subsystem they're defined in when
+it's deemed necessary.
+
+Like any other change to the kernel, maintainers will not change or remove a
+kfunc without having a reasonable justification. Whether or not they'll choose
+to change a kfunc will ultimately depend on a variety of factors, such as how
+widely used the kfunc is, how long the kfunc has been in the kernel, whether an
+alternative kfunc exists, what the norm is in terms of stability for the
+subsystem in question, and of course what the technical cost is of continuing
+to support the kfunc.
+
+There are several implications of this:
+
+a) kfuncs that are widely used or have been in the kernel for a long time will
+ be more difficult to justify being changed or removed by a maintainer. In
+ other words, kfuncs that are known to have a lot of users and provide
+ significant value provide stronger incentives for maintainers to invest the
+ time and complexity in supporting them. It is therefore important for
+ developers that are using kfuncs in their BPF programs to communicate and
+ explain how and why those kfuncs are being used, and to participate in
+ discussions regarding those kfuncs when they occur upstream.
+
+b) Unlike regular kernel symbols marked with EXPORT_SYMBOL_GPL, BPF programs
+ that call kfuncs are generally not part of the kernel tree. This means that
+ refactoring cannot typically change callers in-place when a kfunc changes,
+ as is done for e.g. an upstreamed driver being updated in place when a
+ kernel symbol is changed.
+
+ Unlike with regular kernel symbols, this is expected behavior for BPF
+ symbols, and out-of-tree BPF programs that use kfuncs should be considered
+ relevant to discussions and decisions around modifying and removing those
+ kfuncs. The BPF community will take an active role in participating in
+ upstream discussions when necessary to ensure that the perspectives of such
+ users are taken into account.
+
+c) A kfunc will never have any hard stability guarantees. BPF APIs cannot and
+ will not ever hard-block a change in the kernel purely for stability
+ reasons. That being said, kfuncs are features that are meant to solve
+ problems and provide value to users. The decision of whether to change or
+ remove a kfunc is a multivariate technical decision that is made on a
+ case-by-case basis, and which is informed by data points such as those
+ mentioned above. It is expected that a kfunc being removed or changed with
+ no warning will not be a common occurrence or take place without sound
+ justification, but it is a possibility that must be accepted if one is to
+ use kfuncs.
+
+3.1 kfunc deprecation
+---------------------
+
+As described above, while sometimes a maintainer may find that a kfunc must be
+changed or removed immediately to accommodate some changes in their subsystem,
+usually kfuncs will be able to accommodate a longer and more measured
+deprecation process. For example, if a new kfunc comes along which provides
+superior functionality to an existing kfunc, the existing kfunc may be
+deprecated for some period of time to allow users to migrate their BPF programs
+to use the new one. Or, if a kfunc has no known users, a decision may be made
+to remove the kfunc (without providing an alternative API) after some
+deprecation period so as to provide users with a window to notify the kfunc
+maintainer if it turns out that the kfunc is actually being used.
+
+It's expected that the common case will be that kfuncs will go through a
+deprecation period rather than being changed or removed without warning. As
+described in :ref:`KF_deprecated_flag`, the kfunc framework provides the
+KF_DEPRECATED flag to kfunc developers to signal to users that a kfunc has been
+deprecated. Once a kfunc has been marked with KF_DEPRECATED, the following
+procedure is followed for removal:
+
+1. Any relevant information for deprecated kfuncs is documented in the kfunc's
+ kernel docs. This documentation will typically include the kfunc's expected
+ remaining lifespan, a recommendation for new functionality that can replace
+ the usage of the deprecated function (or an explanation as to why no such
+ replacement exists), etc.
+
+2. The deprecated kfunc is kept in the kernel for some period of time after it
+ was first marked as deprecated. This time period will be chosen on a
+ case-by-case basis, and will typically depend on how widespread the use of
+ the kfunc is, how long it has been in the kernel, and how hard it is to move
+ to alternatives. This deprecation time period is "best effort", and as
+ described :ref:`above<BPF_kfunc_lifecycle_expectations>`, circumstances may
+ sometimes dictate that the kfunc be removed before the full intended
+ deprecation period has elapsed.
+
+3. After the deprecation period the kfunc will be removed. At this point, BPF
+ programs calling the kfunc will be rejected by the verifier.
+
+4. Core kfuncs
+==============
+
+The BPF subsystem provides a number of "core" kfuncs that are potentially
+applicable to a wide variety of different possible use cases and programs.
+Those kfuncs are documented here.
+
+4.1 struct task_struct * kfuncs
+-------------------------------
+
+There are a number of kfuncs that allow ``struct task_struct *`` objects to be
+used as kptrs:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_task_acquire bpf_task_release
+
+These kfuncs are useful when you want to acquire or release a reference to a
+``struct task_struct *`` that was passed as e.g. a tracepoint arg, or a
+struct_ops callback arg. For example:
+
+.. code-block:: c
+
+ /**
+ * A trivial example tracepoint program that shows how to
+ * acquire and release a struct task_struct * pointer.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_acquire_release_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ /*
+ * In a typical program you'd do something like store
+ * the task in a map, and the map will automatically
+ * release it later. Here, we release it manually.
+ */
+ bpf_task_release(acquired);
+ return 0;
+ }
+
+
+References acquired on ``struct task_struct *`` objects are RCU protected.
+Therefore, when in an RCU read region, you can obtain a pointer to a task
+embedded in a map value without having to acquire a reference:
+
+.. code-block:: c
+
+ #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+ private(TASK) static struct task_struct *global;
+
+ /**
+ * A trivial example showing how to access a task stored
+ * in a map using RCU.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *local_copy;
+
+ bpf_rcu_read_lock();
+ local_copy = global;
+ if (local_copy)
+ /*
+ * We could also pass local_copy to kfuncs or helper functions here,
+ * as we're guaranteed that local_copy will be valid until we exit
+ * the RCU read region below.
+ */
+ bpf_printk("Global task %s is valid", local_copy->comm);
+ else
+ bpf_printk("No global task found");
+ bpf_rcu_read_unlock();
+
+ /* At this point we can no longer reference local_copy. */
+
+ return 0;
+ }
+
+----
+
+A BPF program can also look up a task from a pid. This can be useful if the
+caller doesn't have a trusted pointer to a ``struct task_struct *`` object that
+it can acquire a reference on with bpf_task_acquire().
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_task_from_pid
+
+Here is an example of it being used:
+
+.. code-block:: c
+
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_get_pid_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *lookup;
+
+ lookup = bpf_task_from_pid(task->pid);
+ if (!lookup)
+ /* A task should always be found, as %task is a tracepoint arg. */
+ return -ENOENT;
+
+ if (lookup->pid != task->pid) {
+ /* bpf_task_from_pid() looks up the task via its
+ * globally-unique pid from the init_pid_ns. Thus,
+ * the pid of the lookup task should always be the
+ * same as the input task.
+ */
+ bpf_task_release(lookup);
+ return -EINVAL;
+ }
+
+ /* bpf_task_from_pid() returns an acquired reference,
+ * so it must be dropped before returning from the
+ * tracepoint handler.
+ */
+ bpf_task_release(lookup);
+ return 0;
+ }
+
+4.2 struct cgroup * kfuncs
+--------------------------
+
+``struct cgroup *`` objects also have acquire and release functions:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_acquire bpf_cgroup_release
+
+These kfuncs are used in exactly the same manner as bpf_task_acquire() and
+bpf_task_release() respectively, so we won't provide examples for them.
+
+----
+
+Other kfuncs available for interacting with ``struct cgroup *`` objects are
+bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
+the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
+return a cgroup kptr.
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_ancestor
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_from_id
+
+Eventually, BPF should be updated to allow this to happen with a normal memory
+load in the program itself. This is currently not possible without more work in
+the verifier. bpf_cgroup_ancestor() can be used as follows:
+
+.. code-block:: c
+
+ /**
+ * Simple tracepoint example that illustrates how a cgroup's
+ * ancestor can be accessed using bpf_cgroup_ancestor().
+ */
+ SEC("tp_btf/cgroup_mkdir")
+ int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
+ {
+ struct cgroup *parent;
+
+ /* The parent cgroup resides at the level before the current cgroup's level. */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent)
+ return -ENOENT;
+
+ bpf_printk("Parent id is %d", parent->self.id);
+
+ /* Return the parent cgroup that was acquired above. */
+ bpf_cgroup_release(parent);
+ return 0;
+ }
+
+4.3 struct cpumask * kfuncs
+---------------------------
+
+BPF provides a set of kfuncs that can be used to query, allocate, mutate, and
+destroy struct cpumask * objects. Please refer to :ref:`cpumasks-header-label`
+for more details.
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
new file mode 100644
index 000000000000..7545a2049692
--- /dev/null
+++ b/Documentation/bpf/libbpf/index.rst
@@ -0,0 +1,33 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+.. _libbpf:
+
+======
+libbpf
+======
+
+If you are looking to develop BPF applications using the libbpf library, this
+directory contains important documentation that you should read.
+
+To get started, it is recommended to begin with the :doc:`libbpf Overview
+<libbpf_overview>` document, which provides a high-level understanding of the
+libbpf APIs and their usage. This will give you a solid foundation to start
+exploring and utilizing the various features of libbpf to develop your BPF
+applications.
+
+.. toctree::
+ :maxdepth: 1
+
+ libbpf_overview
+ API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
+ program_types
+ libbpf_naming_convention
+ libbpf_build
+
+
+All general BPF questions, including kernel functionality, libbpf APIs and their
+application, should be sent to bpf@vger.kernel.org mailing list. You can
+`subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the mailing list
+search its `archive <https://lore.kernel.org/bpf/>`_. Please search the archive
+before asking new questions. It may be that this was already addressed or
+answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_build.rst b/Documentation/bpf/libbpf/libbpf_build.rst
new file mode 100644
index 000000000000..8e8c23e8093d
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_build.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+Building libbpf
+===============
+
+libelf and zlib are internal dependencies of libbpf and thus are required to link
+against and must be installed on the system for applications to work.
+pkg-config is used by default to find libelf, and the program called
+can be overridden with PKG_CONFIG.
+
+If using pkg-config at build time is not desired, it can be disabled by
+setting NO_PKG_CONFIG=1 when calling make.
+
+To build both static libbpf.a and shared libbpf.so:
+
+.. code-block:: bash
+
+ $ cd src
+ $ make
+
+To build only static libbpf.a library in directory build/ and install them
+together with libbpf headers in a staging directory root/:
+
+.. code-block:: bash
+
+ $ cd src
+ $ mkdir build root
+ $ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install
+
+To build both static libbpf.a and shared libbpf.so against a custom libelf
+dependency installed in /build/root/ and install them together with libbpf
+headers in a build directory /build/root/:
+
+.. code-block:: bash
+
+ $ cd src
+ $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make \ No newline at end of file
diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
new file mode 100644
index 000000000000..b5b41b61b3c0
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -0,0 +1,193 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+API naming convention
+=====================
+
+libbpf API provides access to a few logically separated groups of
+functions and types. Every group has its own naming convention
+described here. It's recommended to follow these conventions whenever a
+new function or type is added to keep libbpf API clean and consistent.
+
+All types and functions provided by libbpf API should have one of the
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``,
+``ring_buffer_``, ``perf_buffer_``.
+
+System call wrappers
+--------------------
+
+System call wrappers are simple wrappers for commands supported by
+sys_bpf system call. These wrappers should go to ``bpf.h`` header file
+and map one to one to corresponding commands.
+
+For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
+command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
+
+Objects
+-------
+
+Another class of types and functions provided by libbpf API is "objects"
+and functions to work with them. Objects are high-level abstractions
+such as BPF program or BPF map. They're represented by corresponding
+structures such as ``struct bpf_object``, ``struct bpf_program``,
+``struct bpf_map``, etc.
+
+Structures are forward declared and access to their fields should be
+provided via corresponding getters and setters rather than directly.
+
+These objects are associated with corresponding parts of ELF object that
+contains compiled BPF programs.
+
+For example ``struct bpf_object`` represents ELF object itself created
+from an ELF file or from a buffer, ``struct bpf_program`` represents a
+program in ELF object and ``struct bpf_map`` is a map.
+
+Functions that work with an object have names built from object name,
+double underscore and part that describes function purpose.
+
+For example ``bpf_object__open`` consists of the name of corresponding
+object, ``bpf_object``, double underscore and ``open`` that defines the
+purpose of the function to open ELF file and create ``bpf_object`` from
+it.
+
+All objects and corresponding functions other than BTF related should go
+to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
+
+Auxiliary functions
+-------------------
+
+Auxiliary functions and types that don't fit well in any of categories
+described above should have ``libbpf_`` prefix, e.g.
+``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
+
+ABI
+---
+
+libbpf can be both linked statically or used as DSO. To avoid possible
+conflicts with other libraries an application is linked with, all
+non-static libbpf symbols should have one of the prefixes mentioned in
+API documentation above. See API naming convention to choose the right
+name for a new symbol.
+
+Symbol visibility
+-----------------
+
+libbpf follow the model when all global symbols have visibility "hidden"
+by default and to make a symbol visible it has to be explicitly
+attributed with ``LIBBPF_API`` macro. For example:
+
+.. code-block:: c
+
+ LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+
+This prevents from accidentally exporting a symbol, that is not supposed
+to be a part of ABI what, in turn, improves both libbpf developer- and
+user-experiences.
+
+ABI versioning
+--------------
+
+To make future ABI extensions possible libbpf ABI is versioned.
+Versioning is implemented by ``libbpf.map`` version script that is
+passed to linker.
+
+Version name is ``LIBBPF_`` prefix + three-component numeric version,
+starting from ``0.0.1``.
+
+Every time ABI is being changed, e.g. because a new symbol is added or
+semantic of existing symbol is changed, ABI version should be bumped.
+This bump in ABI version is at most once per kernel development cycle.
+
+For example, if current state of ``libbpf.map`` is:
+
+.. code-block:: none
+
+ LIBBPF_0.0.1 {
+ global:
+ bpf_func_a;
+ bpf_func_b;
+ local:
+ \*;
+ };
+
+, and a new symbol ``bpf_func_c`` is being introduced, then
+``libbpf.map`` should be changed like this:
+
+.. code-block:: none
+
+ LIBBPF_0.0.1 {
+ global:
+ bpf_func_a;
+ bpf_func_b;
+ local:
+ \*;
+ };
+ LIBBPF_0.0.2 {
+ global:
+ bpf_func_c;
+ } LIBBPF_0.0.1;
+
+, where new version ``LIBBPF_0.0.2`` depends on the previous
+``LIBBPF_0.0.1``.
+
+Format of version script and ways to handle ABI changes, including
+incompatible ones, described in details in [1].
+
+Stand-alone build
+-------------------
+
+Under https://github.com/libbpf/libbpf there is a (semi-)automated
+mirror of the mainline's version of libbpf for a stand-alone build.
+
+However, all changes to libbpf's code base must be upstreamed through
+the mainline kernel tree.
+
+
+API documentation convention
+============================
+
+The libbpf API is documented via comments above definitions in
+header files. These comments can be rendered by doxygen and sphinx
+for well organized html output. This section describes the
+convention in which these comments should be formatted.
+
+Here is an example from btf.h:
+
+.. code-block:: c
+
+ /**
+ * @brief **btf__new()** creates a new instance of a BTF object from the raw
+ * bytes of an ELF's BTF section
+ * @param data raw bytes
+ * @param size number of bytes passed in `data`
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
+
+The comment must start with a block comment of the form '/\*\*'.
+
+The documentation always starts with a @brief directive. This line is a short
+description about this API. It starts with the name of the API, denoted in bold
+like so: **api_name**. Please include an open and close parenthesis if this is a
+function. Follow with the short description of the API. A longer form description
+can be added below the last directive, at the bottom of the comment.
+
+Parameters are denoted with the @param directive, there should be one for each
+parameter. If this is a function with a non-void return, use the @return directive
+to document it.
+
+License
+-------------------
+
+libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
+
+Links
+-------------------
+
+[1] https://www.akkadia.org/drepper/dsohowto.pdf
+ (Chapter 3. Maintaining APIs and ABIs).
diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst
new file mode 100644
index 000000000000..f4d22f0c62b0
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_overview.rst
@@ -0,0 +1,236 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+libbpf Overview
+===============
+
+libbpf is a C-based library containing a BPF loader that takes compiled BPF
+object files and prepares and loads them into the Linux kernel. libbpf takes the
+heavy lifting of loading, verifying, and attaching BPF programs to various
+kernel hooks, allowing BPF application developers to focus only on BPF program
+correctness and performance.
+
+The following are the high-level features supported by libbpf:
+
+* Provides high-level and low-level APIs for user space programs to interact
+ with BPF programs. The low-level APIs wrap all the bpf system call
+ functionality, which is useful when users need more fine-grained control
+ over the interactions between user space and BPF programs.
+* Provides overall support for the BPF object skeleton generated by bpftool.
+ The skeleton file simplifies the process for the user space programs to access
+ global variables and work with BPF programs.
+* Provides BPF-side APIS, including BPF helper definitions, BPF maps support,
+ and tracing helpers, allowing developers to simplify BPF code writing.
+* Supports BPF CO-RE mechanism, enabling BPF developers to write portable
+ BPF programs that can be compiled once and run across different kernel
+ versions.
+
+This document will delve into the above concepts in detail, providing a deeper
+understanding of the capabilities and advantages of libbpf and how it can help
+you develop BPF applications efficiently.
+
+BPF App Lifecycle and libbpf APIs
+==================================
+
+A BPF application consists of one or more BPF programs (either cooperating or
+completely independent), BPF maps, and global variables. The global
+variables are shared between all BPF programs, which allows them to cooperate on
+a common set of data. libbpf provides APIs that user space programs can use to
+manipulate the BPF programs by triggering different phases of a BPF application
+lifecycle.
+
+The following section provides a brief overview of each phase in the BPF life
+cycle:
+
+* **Open phase**: In this phase, libbpf parses the BPF
+ object file and discovers BPF maps, BPF programs, and global variables. After
+ a BPF app is opened, user space apps can make additional adjustments
+ (setting BPF program types, if necessary; pre-setting initial values for
+ global variables, etc.) before all the entities are created and loaded.
+
+* **Load phase**: In the load phase, libbpf creates BPF
+ maps, resolves various relocations, and verifies and loads BPF programs into
+ the kernel. At this point, libbpf validates all the parts of a BPF application
+ and loads the BPF program into the kernel, but no BPF program has yet been
+ executed. After the load phase, it’s possible to set up the initial BPF map
+ state without racing with the BPF program code execution.
+
+* **Attachment phase**: In this phase, libbpf
+ attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes,
+ cgroup hooks, network packet processing pipeline, etc.). During this
+ phase, BPF programs perform useful work such as processing
+ packets, or updating BPF maps and global variables that can be read from user
+ space.
+
+* **Tear down phase**: In the tear down phase,
+ libbpf detaches BPF programs and unloads them from the kernel. BPF maps are
+ destroyed, and all the resources used by the BPF app are freed.
+
+BPF Object Skeleton File
+========================
+
+BPF skeleton is an alternative interface to libbpf APIs for working with BPF
+objects. Skeleton code abstract away generic libbpf APIs to significantly
+simplify code for manipulating BPF programs from user space. Skeleton code
+includes a bytecode representation of the BPF object file, simplifying the
+process of distributing your BPF code. With BPF bytecode embedded, there are no
+extra files to deploy along with your application binary.
+
+You can generate the skeleton header file ``(.skel.h)`` for a specific object
+file by passing the BPF object to the bpftool. The generated BPF skeleton
+provides the following custom functions that correspond to the BPF lifecycle,
+each of them prefixed with the specific object name:
+
+* ``<name>__open()`` – creates and opens BPF application (``<name>`` stands for
+ the specific bpf object name)
+* ``<name>__load()`` – instantiates, loads,and verifies BPF application parts
+* ``<name>__attach()`` – attaches all auto-attachable BPF programs (it’s
+ optional, you can have more control by using libbpf APIs directly)
+* ``<name>__destroy()`` – detaches all BPF programs and
+ frees up all used resources
+
+Using the skeleton code is the recommended way to work with bpf programs. Keep
+in mind, BPF skeleton provides access to the underlying BPF object, so whatever
+was possible to do with generic libbpf APIs is still possible even when the BPF
+skeleton is used. It's an additive convenience feature, with no syscalls, and no
+cumbersome code.
+
+Other Advantages of Using Skeleton File
+---------------------------------------
+
+* BPF skeleton provides an interface for user space programs to work with BPF
+ global variables. The skeleton code memory maps global variables as a struct
+ into user space. The struct interface allows user space programs to initialize
+ BPF programs before the BPF load phase and fetch and update data from user
+ space afterward.
+
+* The ``skel.h`` file reflects the object file structure by listing out the
+ available maps, programs, etc. BPF skeleton provides direct access to all the
+ BPF maps and BPF programs as struct fields. This eliminates the need for
+ string-based lookups with ``bpf_object_find_map_by_name()`` and
+ ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source
+ code and user-space code getting out of sync.
+
+* The embedded bytecode representation of the object file ensures that the
+ skeleton and the BPF object file are always in sync.
+
+BPF Helpers
+===========
+
+libbpf provides BPF-side APIs that BPF programs can use to interact with the
+system. The BPF helpers definition allows developers to use them in BPF code as
+any other plain C function. For example, there are helper functions to print
+debugging messages, get the time since the system was booted, interact with BPF
+maps, manipulate network packets, etc.
+
+For a complete description of what the helpers do, the arguments they take, and
+the return value, see the `bpf-helpers
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man page.
+
+BPF CO-RE (Compile Once – Run Everywhere)
+=========================================
+
+BPF programs work in the kernel space and have access to kernel memory and data
+structures. One limitation that BPF applications come across is the lack of
+portability across different kernel versions and configurations. `BCC
+<https://github.com/iovisor/bcc/>`_ is one of the solutions for BPF
+portability. However, it comes with runtime overhead and a large binary size
+from embedding the compiler with the application.
+
+libbpf steps up the BPF program portability by supporting the BPF CO-RE concept.
+BPF CO-RE brings together BTF type information, libbpf, and the compiler to
+produce a single executable binary that you can run on multiple kernel versions
+and configurations.
+
+To make BPF programs portable libbpf relies on the BTF type information of the
+running kernel. Kernel also exposes this self-describing authoritative BTF
+information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``.
+
+You can generate the BTF information for the running kernel with the following
+command:
+
+::
+
+ $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+
+The command generates a ``vmlinux.h`` header file with all kernel types
+(:doc:`BTF types <../btf>`) that the running kernel uses. Including
+``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel
+headers.
+
+libbpf enables portability of BPF programs by looking at the BPF program’s
+recorded BTF type and relocation information and matching them to BTF
+information (vmlinux) provided by the running kernel. libbpf then resolves and
+matches all the types and fields, and updates necessary offsets and other
+relocatable data to ensure that BPF program’s logic functions correctly for a
+specific kernel on the host. BPF CO-RE concept thus eliminates overhead
+associated with BPF development and allows developers to write portable BPF
+applications without modifications and runtime source code compilation on the
+target machine.
+
+The following code snippet shows how to read the parent field of a kernel
+``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a
+CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read
+``sz`` bytes from the field referenced by ``src`` into the memory pointed to by
+``dst``.
+
+.. code-block:: C
+ :emphasize-lines: 6
+
+ //...
+ struct task_struct *task = (void *)bpf_get_current_task();
+ struct task_struct *parent_task;
+ int err;
+
+ err = bpf_core_read(&parent_task, sizeof(void *), &task->parent);
+ if (err) {
+ /* handle error */
+ }
+
+ /* parent_task contains the value of task->parent pointer */
+
+In the code snippet, we first get a pointer to the current ``task_struct`` using
+``bpf_get_current_task()``. We then use ``bpf_core_read()`` to read the parent
+field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is
+just like ``bpf_probe_read_kernel()`` BPF helper, except it records information
+about the field that should be relocated on the target kernel. i.e, if the
+``parent`` field gets shifted to a different offset within
+``struct task_struct`` due to some new field added in front of it, libbpf will
+automatically adjust the actual offset to the proper value.
+
+Getting Started with libbpf
+===========================
+
+Check out the `libbpf-bootstrap <https://github.com/libbpf/libbpf-bootstrap>`_
+repository with simple examples of using libbpf to build various BPF
+applications.
+
+See also `libbpf API documentation
+<https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+libbpf and Rust
+===============
+
+If you are building BPF applications in Rust, it is recommended to use the
+`Libbpf-rs <https://github.com/libbpf/libbpf-rs>`_ library instead of bindgen
+bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in
+Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code
+compilation and skeleton generation. Using Libbpf-rs will make building user
+space part of the BPF application easier. Note that the BPF program themselves
+must still be written in plain C.
+
+libbpf logging
+==============
+
+By default, libbpf logs informational and warning messages to stderr. The
+verbosity of these messages can be controlled by setting the environment
+variable LIBBPF_LOG_LEVEL to either warn, info, or debug. A custom log
+callback can be set using ``libbpf_set_print()``.
+
+Additional Documentation
+========================
+
+* `Program types and ELF Sections <https://libbpf.readthedocs.io/en/latest/program_types.html>`_
+* `API naming convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html>`_
+* `Building libbpf <https://libbpf.readthedocs.io/en/latest/libbpf_build.html>`_
+* `API documentation Convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html#api-documentation-convention>`_
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
new file mode 100644
index 000000000000..218b020a2f81
--- /dev/null
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -0,0 +1,235 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+.. _program_types_and_elf:
+
+Program Types and ELF Sections
+==============================
+
+The table below lists the program types, their attach types where relevant and the ELF section
+names supported by libbpf for them. The ELF section names follow these rules:
+
+- ``type`` is an exact match, e.g. ``SEC("socket")``
+- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")``
+ with a '``/``' separator between ``type`` and ``extras``.
+
+When ``extras`` are specified, they provide details of how to auto-attach the BPF program. The
+format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")``
+for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are
+described in more detail in the footnotes.
+
+
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| Program Type | Attach Type | ELF Section Name | Sleepable |
++===========================================+========================================+==================================+===========+
+| ``BPF_PROG_TYPE_CGROUP_DEVICE`` | ``BPF_CGROUP_DEVICE`` | ``cgroup/dev`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SKB`` | | ``cgroup/skb`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_EGRESS`` | ``cgroup_skb/egress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_INGRESS`` | ``cgroup_skb/ingress`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCKOPT`` | ``BPF_CGROUP_GETSOCKOPT`` | ``cgroup/getsockopt`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_SETSOCKOPT`` | ``cgroup/setsockopt`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR`` | ``BPF_CGROUP_INET4_BIND`` | ``cgroup/bind4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_CONNECT`` | ``cgroup/connect4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_GETPEERNAME`` | ``cgroup/getpeername4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET4_GETSOCKNAME`` | ``cgroup/getsockname4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_BIND`` | ``cgroup/bind6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_CONNECT`` | ``cgroup/connect6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_GETPEERNAME`` | ``cgroup/getpeername6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_GETSOCKNAME`` | ``cgroup/getsockname6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP4_RECVMSG`` | ``cgroup/recvmsg4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP4_SENDMSG`` | ``cgroup/sendmsg4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET6_POST_BIND`` | ``cgroup/post_bind6`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_SOCK_CREATE`` | ``cgroup/sock_create`` | |
++ + +----------------------------------+-----------+
+| | | ``cgroup/sock`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_INET_SOCK_RELEASE`` | ``cgroup/sock_release`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SYSCTL`` | ``BPF_CGROUP_SYSCTL`` | ``cgroup/sysctl`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_EXT`` | | ``freplace+`` [#fentry]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_FLOW_DISSECTOR`` | ``BPF_FLOW_DISSECTOR`` | ``flow_dissector`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_KPROBE`` | | ``kprobe+`` [#kprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretprobe+`` [#kprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``ksyscall+`` [#ksyscall]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretsyscall+`` [#ksyscall]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe+`` [#uprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe.s+`` [#uprobe]_ | Yes |
++ + +----------------------------------+-----------+
+| | | ``uretprobe+`` [#uprobe]_ | |
++ + +----------------------------------+-----------+
+| | | ``uretprobe.s+`` [#uprobe]_ | Yes |
++ + +----------------------------------+-----------+
+| | | ``usdt+`` [#usdt]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | |
++ + +----------------------------------+-----------+
+| | | ``kretprobe.multi+`` [#kpmulti]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LSM`` | ``BPF_LSM_CGROUP`` | ``lsm_cgroup+`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_LSM_MAC`` | ``lsm+`` [#lsm]_ | |
++ + +----------------------------------+-----------+
+| | | ``lsm.s+`` [#lsm]_ | Yes |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_IN`` | | ``lwt_in`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_OUT`` | | ``lwt_out`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_SEG6LOCAL`` | | ``lwt_seg6local`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
++ + +----------------------------------+-----------+
+| | | ``raw_tracepoint.w+`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT`` | | ``raw_tp+`` [#rawtp]_ | |
++ + +----------------------------------+-----------+
+| | | ``raw_tracepoint+`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
++ + +----------------------------------+-----------+
+| | | ``tc`` [#tc_legacy]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_MSG`` | ``BPF_SK_MSG_VERDICT`` | ``sk_msg`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_REUSEPORT`` | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_REUSEPORT_SELECT`` | ``sk_reuseport`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_SKB`` | | ``sk_skb`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_SKB_STREAM_PARSER`` | ``sk_skb/stream_parser`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_SK_SKB_STREAM_VERDICT`` | ``sk_skb/stream_verdict`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCKET_FILTER`` | | ``socket`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
++ + +----------------------------------+-----------+
+| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACEPOINT`` | | ``tp+`` [#tp]_ | |
++ + +----------------------------------+-----------+
+| | | ``tracepoint+`` [#tp]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACING`` | ``BPF_MODIFY_RETURN`` | ``fmod_ret+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fmod_ret.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_FENTRY`` | ``fentry+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fentry.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_FEXIT`` | ``fexit+`` [#fentry]_ | |
++ + +----------------------------------+-----------+
+| | | ``fexit.s+`` [#fentry]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_ITER`` | ``iter+`` [#iter]_ | |
++ + +----------------------------------+-----------+
+| | | ``iter.s+`` [#iter]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_RAW_TP`` | ``tp_btf+`` [#fentry]_ | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_XDP`` | ``BPF_XDP_CPUMAP`` | ``xdp.frags/cpumap`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp/cpumap`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_XDP_DEVMAP`` | ``xdp.frags/devmap`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp/devmap`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_XDP`` | ``xdp.frags`` | |
++ + +----------------------------------+-----------+
+| | | ``xdp`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+
+
+.. rubric:: Footnotes
+
+.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``.
+.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid
+ characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid
+ non-negative integer.
+.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
+.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
+.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
+.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
+ supports ``*`` and ``?`` wildcards. Valid characters for pattern are
+ ``a-zA-Z0-9_.*?``.
+.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
+.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
+.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
+ ``tcx/*`` instead.
+.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
+ but ``name`` is ignored and it is recommended to just use plain
+ ``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
+ that is tagged with ``SEC(".struct_ops[.link]")``.
+.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
+.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst
new file mode 100644
index 000000000000..00d2693de025
--- /dev/null
+++ b/Documentation/bpf/linux-notes.rst
@@ -0,0 +1,84 @@
+.. contents::
+.. sectnum::
+
+==========================
+Linux implementation notes
+==========================
+
+This document provides more details specific to the Linux kernel implementation of the eBPF instruction set.
+
+Byte swap instructions
+======================
+
+``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively.
+
+Jump instructions
+=================
+
+``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function
+integer would be read from a specified register, is not currently supported
+by the verifier. Any programs with this instruction will fail to load
+until such support is added.
+
+Maps
+====
+
+Linux only supports the 'map_val(map)' operation on array maps with a single element.
+
+Linux uses an fd_array to store maps associated with a BPF program. Thus,
+map_by_idx(imm) uses the fd at that index in the array.
+
+Variables
+=========
+
+The following 64-bit immediate instruction specifies that a variable address,
+which corresponds to some integer stored in the 'imm' field, should be loaded:
+
+========================= ====== === ========================================= =========== ==============
+opcode construction opcode src pseudocode imm type dst type
+========================= ====== === ========================================= =========== ==============
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
+========================= ====== === ========================================= =========== ==============
+
+On Linux, this integer is a BTF ID.
+
+Legacy BPF Packet access instructions
+=====================================
+
+As mentioned in the `ISA standard documentation
+<instruction-set.html#legacy-bpf-packet-access-instructions>`_,
+Linux has special eBPF instructions for access to packet data that have been
+carried over from classic BPF to retain the performance of legacy socket
+filters running in the eBPF interpreter.
+
+The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
+``BPF_IND | <size> | BPF_LD``.
+
+These instructions are used to access packet data and can only be used when
+the program context is a pointer to a networking packet. ``BPF_ABS``
+accesses packet data at an absolute offset specified by the immediate data
+and ``BPF_IND`` access packet data at an offset that includes the value of
+a register in addition to the immediate data.
+
+These instructions have seven implicit operands:
+
+* Register R6 is an implicit input that must contain a pointer to a
+ struct sk_buff.
+* Register R0 is an implicit output which contains the data fetched from
+ the packet.
+* Registers R1-R5 are scratch registers that are clobbered by the
+ instruction.
+
+These instructions have an implicit program exit condition as well. If an
+eBPF program attempts access data beyond the packet boundary, the
+program execution will be aborted.
+
+``BPF_ABS | BPF_W | BPF_LD`` (0x20) means::
+
+ R0 = ntohl(*(u32 *) ((struct sk_buff *) R6->data + imm))
+
+where ``ntohl()`` converts a 32-bit value from network byte order to host byte order.
+
+``BPF_IND | BPF_W | BPF_LD`` (0x40) means::
+
+ R0 = ntohl(*(u32 *) ((struct sk_buff *) R6->data + src + imm))
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst
new file mode 100644
index 000000000000..44188e219d32
--- /dev/null
+++ b/Documentation/bpf/llvm_reloc.rst
@@ -0,0 +1,546 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+====================
+BPF LLVM Relocations
+====================
+
+This document describes LLVM BPF backend relocation types.
+
+Relocation Record
+=================
+
+LLVM BPF backend records each relocation with the following 16-byte
+ELF structure::
+
+ typedef struct
+ {
+ Elf64_Addr r_offset; // Offset from the beginning of section.
+ Elf64_Xword r_info; // Relocation type and symbol index.
+ } Elf64_Rel;
+
+For example, for the following code::
+
+ int g1 __attribute__((section("sec")));
+ int g2 __attribute__((section("sec")));
+ static volatile int l1 __attribute__((section("sec")));
+ static volatile int l2 __attribute__((section("sec")));
+ int test() {
+ return g1 + g2 + l1 + l2;
+ }
+
+Compiled with ``clang --target=bpf -O2 -c test.c``, the following is
+the code with ``llvm-objdump -dr test.o``::
+
+ 0: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+ 0000000000000000: R_BPF_64_64 g1
+ 2: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+ 3: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
+ 0000000000000018: R_BPF_64_64 g2
+ 5: 61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0)
+ 6: 0f 10 00 00 00 00 00 00 r0 += r1
+ 7: 18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
+ 0000000000000038: R_BPF_64_64 sec
+ 9: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+ 10: 0f 10 00 00 00 00 00 00 r0 += r1
+ 11: 18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll
+ 0000000000000058: R_BPF_64_64 sec
+ 13: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+ 14: 0f 10 00 00 00 00 00 00 r0 += r1
+ 15: 95 00 00 00 00 00 00 00 exit
+
+There are four relocations in the above for four ``LD_imm64`` instructions.
+The following ``llvm-readelf -r test.o`` shows the binary values of the four
+relocations::
+
+ Relocation section '.rel.text' at offset 0x190 contains 4 entries:
+ Offset Info Type Symbol's Value Symbol's Name
+ 0000000000000000 0000000600000001 R_BPF_64_64 0000000000000000 g1
+ 0000000000000018 0000000700000001 R_BPF_64_64 0000000000000004 g2
+ 0000000000000038 0000000400000001 R_BPF_64_64 0000000000000000 sec
+ 0000000000000058 0000000400000001 R_BPF_64_64 0000000000000000 sec
+
+Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes).
+For example, the first relocation corresponds to the first instruction
+(Offset 0x0) and the corresponding ``Info`` indicates the relocation type
+of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6).
+The following is the symbol table with ``llvm-readelf -s test.o``::
+
+ Symbol table '.symtab' contains 8 entries:
+ Num: Value Size Type Bind Vis Ndx Name
+ 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
+ 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS test.c
+ 2: 0000000000000008 4 OBJECT LOCAL DEFAULT 4 l1
+ 3: 000000000000000c 4 OBJECT LOCAL DEFAULT 4 l2
+ 4: 0000000000000000 0 SECTION LOCAL DEFAULT 4 sec
+ 5: 0000000000000000 128 FUNC GLOBAL DEFAULT 2 test
+ 6: 0000000000000000 4 OBJECT GLOBAL DEFAULT 4 g1
+ 7: 0000000000000004 4 OBJECT GLOBAL DEFAULT 4 g2
+
+The 6th entry is global variable ``g1`` with value 0.
+
+Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
+has a type of ``R_BPF_64_64`` and refers to entry 7 in the symbol table.
+The second relocation resolves to global variable ``g2`` which has a symbol
+value 4. The symbol value represents the offset from the start of ``.data``
+section where the initial value of the global variable ``g2`` is stored.
+
+The third and fourth relocations refer to static variables ``l1``
+and ``l2``. From the ``.rel.text`` section above, it is not clear
+to which symbols they really refer as they both refer to
+symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
+and represents a section. So for a static variable or function,
+the section offset is written to the original insn
+buffer, which is called ``A`` (addend). Looking at
+above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
+From symbol table, we can find that they correspond to entries ``2``
+and ``3`` for ``l1`` and ``l2``.
+
+In general, the ``A`` is 0 for global variables and functions,
+and is the section offset or some computation result based on
+section offset for static variables/functions. The non-section-offset
+case refers to function calls. See below for more details.
+
+Different Relocation Types
+==========================
+
+Six relocation types are supported. The following is an overview and
+``S`` represents the value of the symbol in the symbol table::
+
+ Enum ELF Reloc Type Description BitSize Offset Calculation
+ 0 R_BPF_NONE None
+ 1 R_BPF_64_64 ld_imm64 insn 32 r_offset + 4 S + A
+ 2 R_BPF_64_ABS64 normal data 64 r_offset S + A
+ 3 R_BPF_64_ABS32 normal data 32 r_offset S + A
+ 4 R_BPF_64_NODYLD32 .BTF[.ext] data 32 r_offset S + A
+ 10 R_BPF_64_32 call insn 32 r_offset + 4 (S + A) / 8 - 1
+
+For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction.
+The actual to-be-relocated data (0 or section offset)
+is stored at ``r_offset + 4`` and the read/write
+data bitsize is 32 (4 bytes). The relocation can be resolved with
+the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which
+means the section offset must be less than or equal to ``UINT32_MAX`` and this
+is enforced by LLVM BPF backend.
+
+In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data.
+The actual to-be-relocated data is stored at ``r_offset`` and the read/write data
+bitsize is 64 (8 bytes). The relocation can be resolved with
+the symbol value plus implicit addend.
+
+Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data.
+But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and
+``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld``
+is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved
+to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext``
+become unusable by bcc and kernel.
+
+Type ``R_BPF_64_32`` is used for call instruction. The call target section
+offset is stored at ``r_offset + 4`` (32bit) and calculated as
+``(S + A) / 8 - 1``.
+
+Examples
+========
+
+Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64``
+and ``call`` instructions. For example::
+
+ __attribute__((noinline)) __attribute__((section("sec1")))
+ int gfunc(int a, int b) {
+ return a * b;
+ }
+ static __attribute__((noinline)) __attribute__((section("sec1")))
+ int lfunc(int a, int b) {
+ return a + b;
+ }
+ int global __attribute__((section("sec2")));
+ int test(int a, int b) {
+ return gfunc(a, b) + lfunc(a, b) + global;
+ }
+
+Compiled with ``clang --target=bpf -O2 -c test.c``, we will have
+following code with `llvm-objdump -dr test.o``::
+
+ Disassembly of section .text:
+
+ 0000000000000000 <test>:
+ 0: bf 26 00 00 00 00 00 00 r6 = r2
+ 1: bf 17 00 00 00 00 00 00 r7 = r1
+ 2: 85 10 00 00 ff ff ff ff call -1
+ 0000000000000010: R_BPF_64_32 gfunc
+ 3: bf 08 00 00 00 00 00 00 r8 = r0
+ 4: bf 71 00 00 00 00 00 00 r1 = r7
+ 5: bf 62 00 00 00 00 00 00 r2 = r6
+ 6: 85 10 00 00 02 00 00 00 call 2
+ 0000000000000030: R_BPF_64_32 sec1
+ 7: 0f 80 00 00 00 00 00 00 r0 += r8
+ 8: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+ 0000000000000040: R_BPF_64_64 global
+ 10: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+ 11: 0f 10 00 00 00 00 00 00 r0 += r1
+ 12: 95 00 00 00 00 00 00 00 exit
+
+ Disassembly of section sec1:
+
+ 0000000000000000 <gfunc>:
+ 0: bf 20 00 00 00 00 00 00 r0 = r2
+ 1: 2f 10 00 00 00 00 00 00 r0 *= r1
+ 2: 95 00 00 00 00 00 00 00 exit
+
+ 0000000000000018 <lfunc>:
+ 3: bf 20 00 00 00 00 00 00 r0 = r2
+ 4: 0f 10 00 00 00 00 00 00 r0 += r1
+ 5: 95 00 00 00 00 00 00 00 exit
+
+The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0,
+so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``.
+The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section
+offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``.
+The third relocation corresponds to ld_imm64 of ``global``, which has a section
+offset ``0``.
+
+The following is an example to show how R_BPF_64_ABS64 could be generated::
+
+ int global() { return 0; }
+ struct t { void *g; } gbl = { global };
+
+Compiled with ``clang --target=bpf -O2 -g -c test.c``, we will see a
+relocation below in ``.data`` section with command
+``llvm-readelf -r test.o``::
+
+ Relocation section '.rel.data' at offset 0x458 contains 1 entries:
+ Offset Info Type Symbol's Value Symbol's Name
+ 0000000000000000 0000000700000002 R_BPF_64_ABS64 0000000000000000 global
+
+The relocation says the first 8-byte of ``.data`` section should be
+filled with address of ``global`` variable.
+
+With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of
+``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations::
+
+ Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries:
+ Offset Info Type Symbol's Value Symbol's Name
+ 0000000000000006 0000000300000003 R_BPF_64_ABS32 0000000000000000 .debug_abbrev
+ 000000000000000c 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
+ 0000000000000012 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
+ 0000000000000016 0000000600000003 R_BPF_64_ABS32 0000000000000000 .debug_line
+ 000000000000001a 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
+ 000000000000001e 0000000200000002 R_BPF_64_ABS64 0000000000000000 .text
+ 000000000000002b 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
+ 0000000000000037 0000000800000002 R_BPF_64_ABS64 0000000000000000 gbl
+ 0000000000000040 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
+ ......
+
+The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
+
+ Relocation section '.rel.BTF' at offset 0x538 contains 1 entries:
+ Offset Info Type Symbol's Value Symbol's Name
+ 0000000000000084 0000000800000004 R_BPF_64_NODYLD32 0000000000000000 gbl
+
+ Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries:
+ Offset Info Type Symbol's Value Symbol's Name
+ 000000000000002c 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
+ 0000000000000040 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
+
+.. _btf-co-re-relocations:
+
+=================
+CO-RE Relocations
+=================
+
+From object file point of view CO-RE mechanism is implemented as a set
+of CO-RE specific relocation records. These relocation records are not
+related to ELF relocations and are encoded in .BTF.ext section.
+See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
+information on .BTF.ext structure.
+
+CO-RE relocations are applied to BPF instructions to update immediate
+or offset fields of the instruction at load time with information
+relevant for target kernel.
+
+Field to patch is selected basing on the instruction class:
+
+* For BPF_ALU, BPF_ALU64, BPF_LD `immediate` field is patched;
+* For BPF_LDX, BPF_STX, BPF_ST `offset` field is patched;
+* BPF_JMP, BPF_JMP32 instructions **should not** be patched.
+
+Relocation kinds
+================
+
+There are several kinds of CO-RE relocations that could be split in
+three groups:
+
+* Field-based - patch instruction with field related information, e.g.
+ change offset field of the BPF_LDX instruction to reflect offset
+ of a specific structure field in the target kernel.
+
+* Type-based - patch instruction with type related information, e.g.
+ change immediate field of the BPF_ALU move instruction to 0 or 1 to
+ reflect if specific type is present in the target kernel.
+
+* Enum-based - patch instruction with enum related information, e.g.
+ change immediate field of the BPF_LD_IMM64 instruction to reflect
+ value of a specific enum literal in the target kernel.
+
+The complete list of relocation kinds is represented by the following enum:
+
+.. code-block:: c
+
+ enum bpf_core_relo_kind {
+ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */
+ BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */
+ };
+
+Notes:
+
+* ``BPF_CORE_FIELD_LSHIFT_U64`` and ``BPF_CORE_FIELD_RSHIFT_U64`` are
+ supposed to be used to read bitfield values using the following
+ algorithm:
+
+ .. code-block:: c
+
+ // To read bitfield ``f`` from ``struct s``
+ is_signed = relo(s->f, BPF_CORE_FIELD_SIGNED)
+ off = relo(s->f, BPF_CORE_FIELD_BYTE_OFFSET)
+ sz = relo(s->f, BPF_CORE_FIELD_BYTE_SIZE)
+ l = relo(s->f, BPF_CORE_FIELD_LSHIFT_U64)
+ r = relo(s->f, BPF_CORE_FIELD_RSHIFT_U64)
+ // define ``v`` as signed or unsigned integer of size ``sz``
+ v = *({s|u}<sz> *)((void *)s + off)
+ v <<= l
+ v >>= r
+
+* The ``BPF_CORE_TYPE_MATCHES`` queries matching relation, defined as
+ follows:
+
+ * for integers: types match if size and signedness match;
+ * for arrays & pointers: target types are recursively matched;
+ * for structs & unions:
+
+ * local members need to exist in target with the same name;
+
+ * for each member we recursively check match unless it is already behind a
+ pointer, in which case we only check matching names and compatible kind;
+
+ * for enums:
+
+ * local variants have to have a match in target by symbolic name (but not
+ numeric value);
+
+ * size has to match (but enum may match enum64 and vice versa);
+
+ * for function pointers:
+
+ * number and position of arguments in local type has to match target;
+ * for each argument and the return value we recursively check match.
+
+CO-RE Relocation Record
+=======================
+
+Relocation record is encoded as the following structure:
+
+.. code-block:: c
+
+ struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+ };
+
+* ``insn_off`` - instruction offset (in bytes) within a code section
+ associated with this relocation;
+
+* ``type_id`` - BTF type ID of the "root" (containing) entity of a
+ relocatable type or field;
+
+* ``access_str_off`` - offset into corresponding .BTF string section.
+ String interpretation depends on specific relocation kind:
+
+ * for field-based relocations, string encodes an accessed field using
+ a sequence of field and array indices, separated by colon (:). It's
+ conceptually very close to LLVM's `getelementptr <GEP_>`_ instruction's
+ arguments for identifying offset to a field. For example, consider the
+ following C code:
+
+ .. code-block:: c
+
+ struct sample {
+ int a;
+ int b;
+ struct { int c[10]; };
+ } __attribute__((preserve_access_index));
+ struct sample *s;
+
+ * Access to ``s[0].a`` would be encoded as ``0:0``:
+
+ * ``0``: first element of ``s`` (as if ``s`` is an array);
+ * ``0``: index of field ``a`` in ``struct sample``.
+
+ * Access to ``s->a`` would be encoded as ``0:0`` as well.
+ * Access to ``s->b`` would be encoded as ``0:1``:
+
+ * ``0``: first element of ``s``;
+ * ``1``: index of field ``b`` in ``struct sample``.
+
+ * Access to ``s[1].c[5]`` would be encoded as ``1:2:0:5``:
+
+ * ``1``: second element of ``s``;
+ * ``2``: index of anonymous structure field in ``struct sample``;
+ * ``0``: index of field ``c`` in anonymous structure;
+ * ``5``: access to array element #5.
+
+ * for type-based relocations, string is expected to be just "0";
+
+ * for enum value-based relocations, string contains an index of enum
+ value within its enum type;
+
+* ``kind`` - one of ``enum bpf_core_relo_kind``.
+
+.. _GEP: https://llvm.org/docs/LangRef.html#getelementptr-instruction
+
+.. _btf_co_re_relocation_examples:
+
+CO-RE Relocation Examples
+=========================
+
+For the following C code:
+
+.. code-block:: c
+
+ struct foo {
+ int a;
+ int b;
+ unsigned c:15;
+ } __attribute__((preserve_access_index));
+
+ enum bar { U, V };
+
+With the following BTF definitions:
+
+.. code-block::
+
+ ...
+ [2] STRUCT 'foo' size=8 vlen=2
+ 'a' type_id=3 bits_offset=0
+ 'b' type_id=3 bits_offset=32
+ 'c' type_id=4 bits_offset=64 bitfield_size=15
+ [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [4] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+ ...
+ [16] ENUM 'bar' encoding=UNSIGNED size=4 vlen=2
+ 'U' val=0
+ 'V' val=1
+
+Field offset relocations are generated automatically when
+``__attribute__((preserve_access_index))`` is used, for example:
+
+.. code-block:: c
+
+ void alpha(struct foo *s, volatile unsigned long *g) {
+ *g = s->a;
+ s->a = 1;
+ }
+
+ 00 <alpha>:
+ 0: r3 = *(s32 *)(r1 + 0x0)
+ 00: CO-RE <byte_off> [2] struct foo::a (0:0)
+ 1: *(u64 *)(r2 + 0x0) = r3
+ 2: *(u32 *)(r1 + 0x0) = 0x1
+ 10: CO-RE <byte_off> [2] struct foo::a (0:0)
+ 3: exit
+
+
+All relocation kinds could be requested via built-in functions.
+E.g. field-based relocations:
+
+.. code-block:: c
+
+ void bravo(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_field_info(s->b, 0 /* field byte offset */);
+ *g = __builtin_preserve_field_info(s->b, 1 /* field byte size */);
+ *g = __builtin_preserve_field_info(s->b, 2 /* field existence */);
+ *g = __builtin_preserve_field_info(s->b, 3 /* field signedness */);
+ *g = __builtin_preserve_field_info(s->c, 4 /* bitfield left shift */);
+ *g = __builtin_preserve_field_info(s->c, 5 /* bitfield right shift */);
+ }
+
+ 20 <bravo>:
+ 4: r1 = 0x4
+ 20: CO-RE <byte_off> [2] struct foo::b (0:1)
+ 5: *(u64 *)(r2 + 0x0) = r1
+ 6: r1 = 0x4
+ 30: CO-RE <byte_sz> [2] struct foo::b (0:1)
+ 7: *(u64 *)(r2 + 0x0) = r1
+ 8: r1 = 0x1
+ 40: CO-RE <field_exists> [2] struct foo::b (0:1)
+ 9: *(u64 *)(r2 + 0x0) = r1
+ 10: r1 = 0x1
+ 50: CO-RE <signed> [2] struct foo::b (0:1)
+ 11: *(u64 *)(r2 + 0x0) = r1
+ 12: r1 = 0x31
+ 60: CO-RE <lshift_u64> [2] struct foo::c (0:2)
+ 13: *(u64 *)(r2 + 0x0) = r1
+ 14: r1 = 0x31
+ 70: CO-RE <rshift_u64> [2] struct foo::c (0:2)
+ 15: *(u64 *)(r2 + 0x0) = r1
+ 16: exit
+
+
+Type-based relocations:
+
+.. code-block:: c
+
+ void charlie(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_type_info(*s, 0 /* type existence */);
+ *g = __builtin_preserve_type_info(*s, 1 /* type size */);
+ *g = __builtin_preserve_type_info(*s, 2 /* type matches */);
+ *g = __builtin_btf_type_id(*s, 0 /* type id in this object file */);
+ *g = __builtin_btf_type_id(*s, 1 /* type id in target kernel */);
+ }
+
+ 88 <charlie>:
+ 17: r1 = 0x1
+ 88: CO-RE <type_exists> [2] struct foo
+ 18: *(u64 *)(r2 + 0x0) = r1
+ 19: r1 = 0xc
+ 98: CO-RE <type_size> [2] struct foo
+ 20: *(u64 *)(r2 + 0x0) = r1
+ 21: r1 = 0x1
+ a8: CO-RE <type_matches> [2] struct foo
+ 22: *(u64 *)(r2 + 0x0) = r1
+ 23: r1 = 0x2 ll
+ b8: CO-RE <local_type_id> [2] struct foo
+ 25: *(u64 *)(r2 + 0x0) = r1
+ 26: r1 = 0x2 ll
+ d0: CO-RE <target_type_id> [2] struct foo
+ 28: *(u64 *)(r2 + 0x0) = r1
+ 29: exit
+
+Enum-based relocations:
+
+.. code-block:: c
+
+ void delta(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_enum_value(*(enum bar *)U, 0 /* enum literal existence */);
+ *g = __builtin_preserve_enum_value(*(enum bar *)V, 1 /* enum literal value */);
+ }
+
+ f0 <delta>:
+ 30: r1 = 0x1 ll
+ f0: CO-RE <enumval_exists> [16] enum bar::U = 0
+ 32: *(u64 *)(r2 + 0x0) = r1
+ 33: r1 = 0x1 ll
+ 108: CO-RE <enumval_value> [16] enum bar::V = 1
+ 35: *(u64 *)(r2 + 0x0) = r1
+ 36: exit
diff --git a/Documentation/bpf/map_array.rst b/Documentation/bpf/map_array.rst
new file mode 100644
index 000000000000..f2f51a53e8ae
--- /dev/null
+++ b/Documentation/bpf/map_array.rst
@@ -0,0 +1,262 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+================================================
+BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY
+================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_ARRAY`` was introduced in kernel version 3.19
+ - ``BPF_MAP_TYPE_PERCPU_ARRAY`` was introduced in version 4.6
+
+``BPF_MAP_TYPE_ARRAY`` and ``BPF_MAP_TYPE_PERCPU_ARRAY`` provide generic array
+storage. The key type is an unsigned 32-bit integer (4 bytes) and the map is
+of constant size. The size of the array is defined in ``max_entries`` at
+creation time. All array elements are pre-allocated and zero initialized when
+created. ``BPF_MAP_TYPE_PERCPU_ARRAY`` uses a different memory region for each
+CPU whereas ``BPF_MAP_TYPE_ARRAY`` uses the same memory region. The value
+stored can be of any size, however, all array elements are aligned to 8
+bytes.
+
+Since kernel 5.5, memory mapping may be enabled for ``BPF_MAP_TYPE_ARRAY`` by
+setting the flag ``BPF_F_MMAPABLE``. The map definition is page-aligned and
+starts on the first page. Sufficient page-sized and page-aligned blocks of
+memory are allocated to store all array values, starting on the second page,
+which in some cases will result in over-allocation of memory. The benefit of
+using this is increased performance and ease of use since userspace programs
+would not be required to use helper functions to access and mutate data.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Array elements can be retrieved using the ``bpf_map_lookup_elem()`` helper.
+This helper returns a pointer into the array element, so to avoid data races
+with userspace reading the value, the user must use primitives like
+``__sync_fetch_and_add()`` when updating the value in-place.
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Array elements can be updated using the ``bpf_map_update_elem()`` helper.
+
+``bpf_map_update_elem()`` returns 0 on success, or negative error in case of
+failure.
+
+Since the array is of constant size, ``bpf_map_delete_elem()`` is not supported.
+To clear an array element, you may use ``bpf_map_update_elem()`` to insert a
+zero value to that index.
+
+Per CPU Array
+-------------
+
+Values stored in ``BPF_MAP_TYPE_ARRAY`` can be accessed by multiple programs
+across different CPUs. To restrict storage to a single CPU, you may use a
+``BPF_MAP_TYPE_PERCPU_ARRAY``.
+
+When using a ``BPF_MAP_TYPE_PERCPU_ARRAY`` the ``bpf_map_update_elem()`` and
+``bpf_map_lookup_elem()`` helpers automatically access the slot for the current
+CPU.
+
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+
+The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the array
+value for a specific CPU. Returns value on success , or ``NULL`` if no entry was
+found or ``cpu`` is invalid.
+
+Concurrency
+-----------
+
+Since kernel version 5.1, the BPF infrastructure provides ``struct bpf_spin_lock``
+to synchronize access.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by its ``fd``.
+
+Examples
+========
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples. The code samples below demonstrate API usage.
+
+Kernel BPF
+----------
+
+This snippet shows how to declare an array in a BPF program.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 256);
+ } my_map SEC(".maps");
+
+
+This example BPF program shows how to access an array element.
+
+.. code-block:: c
+
+ int bpf_prog(struct __sk_buff *skb)
+ {
+ struct iphdr ip;
+ int index;
+ long *value;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip, sizeof(ip)) < 0)
+ return 0;
+
+ index = ip.protocol;
+ value = bpf_map_lookup_elem(&my_map, &index);
+ if (value)
+ __sync_fetch_and_add(value, skb->len);
+
+ return 0;
+ }
+
+Userspace
+---------
+
+BPF_MAP_TYPE_ARRAY
+~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to create an array, using ``bpf_map_create_opts`` to
+set flags.
+
+.. code-block:: c
+
+ #include <bpf/libbpf.h>
+ #include <bpf/bpf.h>
+
+ int create_array()
+ {
+ int fd;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY,
+ "example_array", /* name */
+ sizeof(__u32), /* key size */
+ sizeof(long), /* value size */
+ 256, /* max entries */
+ &opts); /* create opts */
+ return fd;
+ }
+
+This snippet shows how to initialize the elements of an array.
+
+.. code-block:: c
+
+ int initialize_array(int fd)
+ {
+ __u32 i;
+ long value;
+ int ret;
+
+ for (i = 0; i < 256; i++) {
+ value = i;
+ ret = bpf_map_update_elem(fd, &i, &value, BPF_ANY);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+ }
+
+This snippet shows how to retrieve an element value from an array.
+
+.. code-block:: c
+
+ int lookup(int fd)
+ {
+ __u32 index = 42;
+ long value;
+ int ret;
+
+ ret = bpf_map_lookup_elem(fd, &index, &value);
+ if (ret < 0)
+ return ret;
+
+ /* use value here */
+ assert(value == 42);
+
+ return ret;
+ }
+
+BPF_MAP_TYPE_PERCPU_ARRAY
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to initialize the elements of a per CPU array.
+
+.. code-block:: c
+
+ int initialize_array(int fd)
+ {
+ int ncpus = libbpf_num_possible_cpus();
+ long values[ncpus];
+ __u32 i, j;
+ int ret;
+
+ for (i = 0; i < 256 ; i++) {
+ for (j = 0; j < ncpus; j++)
+ values[j] = i;
+ ret = bpf_map_update_elem(fd, &i, &values, BPF_ANY);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+ }
+
+This snippet shows how to access the per CPU elements of an array value.
+
+.. code-block:: c
+
+ int lookup(int fd)
+ {
+ int ncpus = libbpf_num_possible_cpus();
+ __u32 index = 42, j;
+ long values[ncpus];
+ int ret;
+
+ ret = bpf_map_lookup_elem(fd, &index, &values);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < ncpus; j++) {
+ /* Use per CPU value here */
+ assert(values[j] == 42);
+ }
+
+ return ret;
+ }
+
+Semantics
+=========
+
+As shown in the example above, when accessing a ``BPF_MAP_TYPE_PERCPU_ARRAY``
+in userspace, each value is an array with ``ncpus`` elements.
+
+When calling ``bpf_map_update_elem()`` the flag ``BPF_NOEXIST`` can not be used
+for these maps.
diff --git a/Documentation/bpf/map_bloom_filter.rst b/Documentation/bpf/map_bloom_filter.rst
new file mode 100644
index 000000000000..c82487f2fe0d
--- /dev/null
+++ b/Documentation/bpf/map_bloom_filter.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================
+BPF_MAP_TYPE_BLOOM_FILTER
+=========================
+
+.. note::
+ - ``BPF_MAP_TYPE_BLOOM_FILTER`` was introduced in kernel version 5.16
+
+``BPF_MAP_TYPE_BLOOM_FILTER`` provides a BPF bloom filter map. Bloom
+filters are a space-efficient probabilistic data structure used to
+quickly test whether an element exists in a set. In a bloom filter,
+false positives are possible whereas false negatives are not.
+
+The bloom filter map does not have keys, only values. When the bloom
+filter map is created, it must be created with a ``key_size`` of 0. The
+bloom filter map supports two operations:
+
+- push: adding an element to the map
+- peek: determining whether an element is present in the map
+
+BPF programs must use ``bpf_map_push_elem`` to add an element to the
+bloom filter map and ``bpf_map_peek_elem`` to query the map. These
+operations are exposed to userspace applications using the existing
+``bpf`` syscall in the following way:
+
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+
+The ``max_entries`` size that is specified at map creation time is used
+to approximate a reasonable bitmap size for the bloom filter, and is not
+otherwise strictly enforced. If the user wishes to insert more entries
+into the bloom filter than ``max_entries``, this may lead to a higher
+false positive rate.
+
+The number of hashes to use for the bloom filter is configurable using
+the lower 4 bits of ``map_extra`` in ``union bpf_attr`` at map creation
+time. If no number is specified, the default used will be 5 hash
+functions. In general, using more hashes decreases both the false
+positive rate and the speed of a lookup.
+
+It is not possible to delete elements from a bloom filter map. A bloom
+filter map may be used as an inner map. The user is responsible for
+synchronising concurrent updates and lookups to ensure no false negative
+lookups occur.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+A ``value`` can be added to a bloom filter using the
+``bpf_map_push_elem()`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` when adding an entry to the bloom filter. This helper
+returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+The ``bpf_map_peek_elem()`` helper is used to determine whether
+``value`` is present in the bloom filter map. This helper returns ``0``
+if ``value`` is probably present in the map, or ``-ENOENT`` if ``value``
+is definitely not present in the map.
+
+Userspace
+---------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can add a ``value`` to a bloom filter using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY``. Returns ``0`` on
+success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can determine the presence of ``value`` in a bloom
+filter using libbpf's ``bpf_map_lookup_elem`` function. The ``key``
+parameter must be set to ``NULL``. Returns ``0`` if ``value`` is
+probably present in the map, or ``-ENOENT`` if ``value`` is definitely
+not present in the map.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a bloom filter in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ __type(value, __u32);
+ __uint(max_entries, 1000);
+ __uint(map_extra, 3);
+ } bloom_filter SEC(".maps");
+
+This snippet shows how to determine presence of a value in a bloom
+filter in a BPF program:
+
+.. code-block:: c
+
+ void *lookup(__u32 key)
+ {
+ if (bpf_map_peek_elem(&bloom_filter, &key) == 0) {
+ /* Verify not a false positive and fetch an associated
+ * value using a secondary lookup, e.g. in a hash table
+ */
+ return bpf_map_lookup_elem(&hash_table, &key);
+ }
+ return 0;
+ }
+
+Userspace
+---------
+
+This snippet shows how to use libbpf to create a bloom filter map from
+userspace:
+
+.. code-block:: c
+
+ int create_bloom()
+ {
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_extra = 3); /* number of hashes */
+
+ return bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER,
+ "ipv6_bloom", /* name */
+ 0, /* key size, must be zero */
+ sizeof(ipv6_addr), /* value size */
+ 10000, /* max entries */
+ &opts); /* create options */
+ }
+
+This snippet shows how to add an element to a bloom filter from
+userspace:
+
+.. code-block:: c
+
+ int add_element(struct bpf_map *bloom_map, __u32 value)
+ {
+ int bloom_fd = bpf_map__fd(bloom_map);
+ return bpf_map_update_elem(bloom_fd, NULL, &value, BPF_ANY);
+ }
+
+References
+==========
+
+https://lwn.net/ml/bpf/20210831225005.2762202-1-joannekoong@fb.com/
diff --git a/Documentation/bpf/map_cgroup_storage.rst b/Documentation/bpf/map_cgroup_storage.rst
new file mode 100644
index 000000000000..8e5fe532c07e
--- /dev/null
+++ b/Documentation/bpf/map_cgroup_storage.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2020 Google LLC.
+
+===========================
+BPF_MAP_TYPE_CGROUP_STORAGE
+===========================
+
+The ``BPF_MAP_TYPE_CGROUP_STORAGE`` map type represents a local fix-sized
+storage. It is only available with ``CONFIG_CGROUP_BPF``, and to programs that
+attach to cgroups; the programs are made available by the same Kconfig. The
+storage is identified by the cgroup the program is attached to.
+
+The map provide a local storage at the cgroup that the BPF program is attached
+to. It provides a faster and simpler access than the general purpose hash
+table, which performs a hash table lookups, and requires user to track live
+cgroups on their own.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGROUP_STORAGE`` map type. Some of its behaviors was changed in
+Linux 5.9 and this document will describe the differences.
+
+Usage
+=====
+
+The map uses key of type of either ``__u64 cgroup_inode_id`` or
+``struct bpf_cgroup_storage_key``, declared in ``linux/bpf.h``::
+
+ struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id;
+ __u32 attach_type;
+ };
+
+``cgroup_inode_id`` is the inode id of the cgroup directory.
+``attach_type`` is the program's attach type.
+
+Linux 5.9 added support for type ``__u64 cgroup_inode_id`` as the key type.
+When this key type is used, then all attach types of the particular cgroup and
+map will share the same storage. Otherwise, if the type is
+``struct bpf_cgroup_storage_key``, then programs of different attach types
+be isolated and see different storages.
+
+To access the storage in a program, use ``bpf_get_local_storage``::
+
+ void *bpf_get_local_storage(void *map, u64 flags)
+
+``flags`` is reserved for future use and must be 0.
+
+There is no implicit synchronization. Storages of ``BPF_MAP_TYPE_CGROUP_STORAGE``
+can be accessed by multiple programs across different CPUs, and user should
+take care of synchronization by themselves. The bpf infrastructure provides
+``struct bpf_spin_lock`` to synchronize the storage. See
+``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
+
+Examples
+========
+
+Usage with key type as ``struct bpf_cgroup_storage_key``::
+
+ #include <bpf/bpf.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+ } cgroup_storage SEC(".maps");
+
+ int program(struct __sk_buff *skb)
+ {
+ __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+Userspace accessing map declared above::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+ {
+ struct bpf_cgroup_storage_key = {
+ .cgroup_inode_id = cgrp,
+ .attach_type = type,
+ };
+ __u32 value;
+ bpf_map_lookup_elem(bpf_map__fd(map), &key, &value);
+ // error checking omitted
+ return value;
+ }
+
+Alternatively, using just ``__u64 cgroup_inode_id`` as key type::
+
+ #include <bpf/bpf.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, __u32);
+ } cgroup_storage SEC(".maps");
+
+ int program(struct __sk_buff *skb)
+ {
+ __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+And userspace::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+ {
+ __u32 value;
+ bpf_map_lookup_elem(bpf_map__fd(map), &cgrp, &value);
+ // error checking omitted
+ return value;
+ }
+
+Semantics
+=========
+
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE`` is a variant of this map type. This
+per-CPU variant will have different memory regions for each CPU for each
+storage. The non-per-CPU will have the same memory region for each storage.
+
+Prior to Linux 5.9, the lifetime of a storage is precisely per-attachment, and
+for a single ``CGROUP_STORAGE`` map, there can be at most one program loaded
+that uses the map. A program may be attached to multiple cgroups or have
+multiple attach types, and each attach creates a fresh zeroed storage. The
+storage is freed upon detach.
+
+There is a one-to-one association between the map of each type (per-CPU and
+non-per-CPU) and the BPF program during load verification time. As a result,
+each map can only be used by one BPF program and each BPF program can only use
+one storage map of each type. Because of map can only be used by one BPF
+program, sharing of this cgroup's storage with other BPF programs were
+impossible.
+
+Since Linux 5.9, storage can be shared by multiple programs. When a program is
+attached to a cgroup, the kernel would create a new storage only if the map
+does not already contain an entry for the cgroup and attach type pair, or else
+the old storage is reused for the new attachment. If the map is attach type
+shared, then attach type is simply ignored during comparison. Storage is freed
+only when either the map or the cgroup attached to is being freed. Detaching
+will not directly free the storage, but it may cause the reference to the map
+to reach zero and indirectly freeing all storage in the map.
+
+The map is not associated with any BPF program, thus making sharing possible.
+However, the BPF program can still only associate with one map of each type
+(per-CPU and non-per-CPU). A BPF program cannot use more than one
+``BPF_MAP_TYPE_CGROUP_STORAGE`` or more than one
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE``.
+
+In all versions, userspace may use the attach parameters of cgroup and
+attach type pair in ``struct bpf_cgroup_storage_key`` as the key to the BPF map
+APIs to read or update the storage for a given attachment. For Linux 5.9
+attach type shared storages, only the first value in the struct, cgroup inode
+id, is used during comparison, so userspace may just specify a ``__u64``
+directly.
+
+The storage is bound at attach time. Even if the program is attached to parent
+and triggers in child, the storage still belongs to the parent.
+
+Userspace cannot create a new entry in the map or delete an existing entry.
+Program test runs always use a temporary storage.
diff --git a/Documentation/bpf/map_cgrp_storage.rst b/Documentation/bpf/map_cgrp_storage.rst
new file mode 100644
index 000000000000..5d3f603efffa
--- /dev/null
+++ b/Documentation/bpf/map_cgrp_storage.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Meta Platforms, Inc. and affiliates.
+
+=========================
+BPF_MAP_TYPE_CGRP_STORAGE
+=========================
+
+The ``BPF_MAP_TYPE_CGRP_STORAGE`` map type represents a local fix-sized
+storage for cgroups. It is only available with ``CONFIG_CGROUPS``.
+The programs are made available by the same Kconfig. The
+data for a particular cgroup can be retrieved by looking up the map
+with that cgroup.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGRP_STORAGE`` map type.
+
+Usage
+=====
+
+The map key must be ``sizeof(int)`` representing a cgroup fd.
+To access the storage in a program, use ``bpf_cgrp_storage_get``::
+
+ void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+
+``flags`` could be 0 or ``BPF_LOCAL_STORAGE_GET_F_CREATE`` which indicates that
+a new local storage will be created if one does not exist.
+
+The local storage can be removed with ``bpf_cgrp_storage_delete``::
+
+ long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+
+The map is available to all program types.
+
+Examples
+========
+
+A BPF program example with BPF_MAP_TYPE_CGRP_STORAGE::
+
+ #include <vmlinux.h>
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_tracing.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+ } cgrp_storage SEC(".maps");
+
+ SEC("tp_btf/sys_enter")
+ int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+ {
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&cgrp_storage, task->cgroups->dfl_cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+Userspace accessing map declared above::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, int cgrp_fd)
+ {
+ __u32 *value;
+ value = bpf_map_lookup_elem(bpf_map__fd(map), &cgrp_fd);
+ if (value)
+ return *value;
+ return 0;
+ }
+
+Difference Between BPF_MAP_TYPE_CGRP_STORAGE and BPF_MAP_TYPE_CGROUP_STORAGE
+============================================================================
+
+The old cgroup storage map ``BPF_MAP_TYPE_CGROUP_STORAGE`` has been marked as
+deprecated (renamed to ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``). The new
+``BPF_MAP_TYPE_CGRP_STORAGE`` map should be used instead. The following
+illusates the main difference between ``BPF_MAP_TYPE_CGRP_STORAGE`` and
+``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
+
+(1). ``BPF_MAP_TYPE_CGRP_STORAGE`` can be used by all program types while
+ ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` is available only to cgroup program types
+ like BPF_CGROUP_INET_INGRESS or BPF_CGROUP_SOCK_OPS, etc.
+
+(2). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports local storage for more than one
+ cgroup while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only supports one cgroup
+ which is attached by a BPF program.
+
+(3). ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` allocates local storage at attach time so
+ ``bpf_get_local_storage()`` always returns non-NULL local storage.
+ ``BPF_MAP_TYPE_CGRP_STORAGE`` allocates local storage at runtime so
+ it is possible that ``bpf_cgrp_storage_get()`` may return null local storage.
+ To avoid such null local storage issue, user space can do
+ ``bpf_map_update_elem()`` to pre-allocate local storage before a BPF program
+ is attached.
+
+(4). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports deleting local storage by a BPF program
+ while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only deletes storage during
+ prog detach time.
+
+So overall, ``BPF_MAP_TYPE_CGRP_STORAGE`` supports all ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``
+functionality and beyond. It is recommended to use ``BPF_MAP_TYPE_CGRP_STORAGE``
+instead of ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
diff --git a/Documentation/bpf/map_cpumap.rst b/Documentation/bpf/map_cpumap.rst
new file mode 100644
index 000000000000..923cfc8ab51f
--- /dev/null
+++ b/Documentation/bpf/map_cpumap.rst
@@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_CPUMAP
+===================
+
+.. note::
+ - ``BPF_MAP_TYPE_CPUMAP`` was introduced in kernel version 4.15
+
+.. kernel-doc:: kernel/bpf/cpumap.c
+ :doc: cpu map
+
+An example use-case for this map type is software based Receive Side Scaling (RSS).
+
+The CPUMAP represents the CPUs in the system indexed as the map-key, and the
+map-value is the config setting (per CPUMAP entry). Each CPUMAP entry has a dedicated
+kernel thread bound to the given CPU to represent the remote CPU execution unit.
+
+Starting from Linux kernel version 5.9 the CPUMAP can run a second XDP program
+on the remote CPU. This allows an XDP program to split its processing across
+multiple CPUs. For example, a scenario where the initial CPU (that sees/receives
+the packets) needs to do minimal packet processing and the remote CPU (to which
+the packet is directed) can afford to spend more cycles processing the frame. The
+initial CPU is where the XDP redirect program is executed. The remote CPU
+receives raw ``xdp_frame`` objects.
+
+Usage
+=====
+
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs.
+
+The lower two bits of ``flags`` are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller.
+
+User space
+----------
+.. note::
+ CPUMAP entries can only be updated/looked up/deleted from user space and not
+ from an eBPF program. Trying to call these functions from a kernel eBPF
+ program will result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
+
+CPU entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_cpumap_val``.
+
+ .. code-block:: c
+
+ struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+ };
+
+The flags argument can be one of the following:
+ - BPF_ANY: Create a new element or update an existing element.
+ - BPF_NOEXIST: Create a new element only if it did not exist.
+ - BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+CPU entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_delete_elem(int fd, const void *key);
+
+CPU entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_CPUMAP`` called
+``cpu_map`` and how to redirect packets to a remote CPU using a round robin scheme.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __type(key, __u32);
+ __type(value, struct bpf_cpumap_val);
+ __uint(max_entries, 12);
+ } cpu_map SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 12);
+ } cpus_available SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+ } cpus_iterator SEC(".maps");
+
+ SEC("xdp")
+ int xdp_redir_cpu_round_robin(struct xdp_md *ctx)
+ {
+ __u32 key = 0;
+ __u32 cpu_dest = 0;
+ __u32 *cpu_selected, *cpu_iterator;
+ __u32 cpu_idx;
+
+ cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key);
+ if (!cpu_iterator)
+ return XDP_ABORTED;
+ cpu_idx = *cpu_iterator;
+
+ *cpu_iterator += 1;
+ if (*cpu_iterator == bpf_num_possible_cpus())
+ *cpu_iterator = 0;
+
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ if (cpu_dest >= bpf_num_possible_cpus())
+ return XDP_ABORTED;
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+ }
+
+User space
+----------
+
+The following code snippet shows how to dynamically set the max_entries for a
+CPUMAP to the max number of cpus available on the system.
+
+.. code-block:: c
+
+ int set_max_cpu_entries(struct bpf_map *cpu_map)
+ {
+ if (bpf_map__set_max_entries(cpu_map, libbpf_num_possible_cpus()) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+
+References
+===========
+
+- https://developers.redhat.com/blog/2021/05/13/receive-side-scaling-rss-with-ebpf-and-cpumap#redirecting_into_a_cpumap
diff --git a/Documentation/bpf/map_devmap.rst b/Documentation/bpf/map_devmap.rst
new file mode 100644
index 000000000000..927312c7b8c8
--- /dev/null
+++ b/Documentation/bpf/map_devmap.rst
@@ -0,0 +1,238 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=================================================
+BPF_MAP_TYPE_DEVMAP and BPF_MAP_TYPE_DEVMAP_HASH
+=================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_DEVMAP`` was introduced in kernel version 4.14
+ - ``BPF_MAP_TYPE_DEVMAP_HASH`` was introduced in kernel version 5.4
+
+``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` are BPF maps primarily
+used as backend maps for the XDP BPF helper call ``bpf_redirect_map()``.
+``BPF_MAP_TYPE_DEVMAP`` is backed by an array that uses the key as
+the index to lookup a reference to a net device. While ``BPF_MAP_TYPE_DEVMAP_HASH``
+is backed by a hash table that uses a key to lookup a reference to a net device.
+The user provides either <``key``/ ``ifindex``> or <``key``/ ``struct bpf_devmap_val``>
+pairs to update the maps with new net devices.
+
+.. note::
+ - The key to a hash map doesn't have to be an ``ifindex``.
+ - While ``BPF_MAP_TYPE_DEVMAP_HASH`` allows for densely packing the net devices
+ it comes at the cost of a hash of the key when performing a look up.
+
+The setup and packet enqueue/send code is shared between the two types of
+devmap; only the lookup and insertion is different.
+
+Usage
+=====
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` this map contains
+references to net devices (for forwarding packets through other ports).
+
+The lower two bits of *flags* are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller. The higher bits of ``flags``
+can be set to ``BPF_F_BROADCAST`` or ``BPF_F_EXCLUDE_INGRESS`` as defined
+below.
+
+With ``BPF_F_BROADCAST`` the packet will be broadcast to all the interfaces
+in the map, with ``BPF_F_EXCLUDE_INGRESS`` the ingress interface will be excluded
+from the broadcast.
+
+.. note::
+ - The key is ignored if BPF_F_BROADCAST is set.
+ - The broadcast feature can also be used to implement multicast forwarding:
+ simply create multiple DEVMAPs, each one corresponding to a single multicast group.
+
+This helper will return ``XDP_REDIRECT`` on success, or the value of the two
+lower bits of the ``flags`` argument if the map lookup fails.
+
+More information about redirection can be found :doc:`redirect`
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+User space
+----------
+.. note::
+ DEVMAP entries can only be updated/deleted from user space and not
+ from an eBPF program. Trying to call these functions from a kernel eBPF
+ program will result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
+
+Net device entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_devmap_val`` or a simple ``int ifindex`` for backwards
+compatibility.
+
+ .. code-block:: c
+
+ struct bpf_devmap_val {
+ __u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+ };
+
+The ``flags`` argument can be one of the following:
+ - ``BPF_ANY``: Create a new element or update an existing element.
+ - ``BPF_NOEXIST``: Create a new element only if it did not exist.
+ - ``BPF_EXIST``: Update an existing element.
+
+DEVMAPs can associate a program with a device entry by adding a ``bpf_prog.fd``
+to ``struct bpf_devmap_val``. Programs are run after ``XDP_REDIRECT`` and have
+access to both Rx device and Tx device. The program associated with the ``fd``
+must have type XDP with expected attach type ``xdp_devmap``.
+When a program is associated with a device index, the program is run on an
+``XDP_REDIRECT`` and before the buffer is added to the per-cpu queue. Examples
+of how to attach/use xdp_devmap progs can be found in the kernel selftests:
+
+- ``tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c``
+- ``tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c``
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+ int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+ int bpf_map_delete_elem(int fd, const void *key);
+
+Net device entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP``
+called tx_port.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 256);
+ } tx_port SEC(".maps");
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP_HASH``
+called forward_map.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __type(key, __u32);
+ __type(value, struct bpf_devmap_val);
+ __uint(max_entries, 32);
+ } forward_map SEC(".maps");
+
+.. note::
+
+ The value type in the DEVMAP above is a ``struct bpf_devmap_val``
+
+The following code snippet shows a simple xdp_redirect_map program. This program
+would work with a user space program that populates the devmap ``forward_map`` based
+on ingress ifindexes. The BPF program (below) is redirecting packets using the
+ingress ``ifindex`` as the ``key``.
+
+.. code-block:: c
+
+ SEC("xdp")
+ int xdp_redirect_map_func(struct xdp_md *ctx)
+ {
+ int index = ctx->ingress_ifindex;
+
+ return bpf_redirect_map(&forward_map, index, 0);
+ }
+
+The following code snippet shows a BPF program that is broadcasting packets to
+all the interfaces in the ``tx_port`` devmap.
+
+.. code-block:: c
+
+ SEC("xdp")
+ int xdp_redirect_map_func(struct xdp_md *ctx)
+ {
+ return bpf_redirect_map(&tx_port, 0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ }
+
+User space
+----------
+
+The following code snippet shows how to update a devmap called ``tx_port``.
+
+.. code-block:: c
+
+ int update_devmap(int ifindex, int redirect_ifindex)
+ {
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port), &ifindex, &redirect_ifindex, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap_ value: %s\n",
+ strerror(errno));
+ }
+
+ return ret;
+ }
+
+The following code snippet shows how to update a hash_devmap called ``forward_map``.
+
+.. code-block:: c
+
+ int update_devmap(int ifindex, int redirect_ifindex)
+ {
+ struct bpf_devmap_val devmap_val = { .ifindex = redirect_ifindex };
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap_ value: %s\n",
+ strerror(errno));
+ }
+ return ret;
+ }
+
+References
+===========
+
+- https://lwn.net/Articles/728146/
+- https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=6f9d451ab1a33728adb72d7ff66a7b374d665176
+- https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4106
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
new file mode 100644
index 000000000000..8606bf958a8c
--- /dev/null
+++ b/Documentation/bpf/map_hash.rst
@@ -0,0 +1,265 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+.. Copyright (C) 2022-2023 Isovalent, Inc.
+
+===============================================
+BPF_MAP_TYPE_HASH, with PERCPU and LRU Variants
+===============================================
+
+.. note::
+ - ``BPF_MAP_TYPE_HASH`` was introduced in kernel version 3.19
+ - ``BPF_MAP_TYPE_PERCPU_HASH`` was introduced in version 4.6
+ - Both ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+ were introduced in version 4.10
+
+``BPF_MAP_TYPE_HASH`` and ``BPF_MAP_TYPE_PERCPU_HASH`` provide general
+purpose hash map storage. Both the key and the value can be structs,
+allowing for composite keys and values.
+
+The kernel is responsible for allocating and freeing key/value pairs, up
+to the max_entries limit that you specify. Hash maps use pre-allocation
+of hash table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be
+used to disable pre-allocation when it is too memory expensive.
+
+``BPF_MAP_TYPE_PERCPU_HASH`` provides a separate value slot per
+CPU. The per-cpu values are stored internally in an array.
+
+The ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+variants add LRU semantics to their respective hash tables. An LRU hash
+will automatically evict the least recently used entries when the hash
+table reaches capacity. An LRU hash maintains an internal LRU list that
+is used to select elements for eviction. This internal LRU list is
+shared across CPUs but it is possible to request a per CPU LRU list with
+the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``. The
+following table outlines the properties of LRU maps depending on the a
+map type and the flags used to create the map.
+
+======================== ========================= ================================
+Flag ``BPF_MAP_TYPE_LRU_HASH`` ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+======================== ========================= ================================
+**BPF_F_NO_COMMON_LRU** Per-CPU LRU, global map Per-CPU LRU, per-cpu map
+**!BPF_F_NO_COMMON_LRU** Global LRU, global map Global LRU, per-cpu map
+======================== ========================= ================================
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Hash entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``flags``
+parameter can be used to control the update behaviour:
+
+- ``BPF_ANY`` will create a new element or update an existing element
+- ``BPF_NOEXIST`` will create a new element only if one did not already
+ exist
+- ``BPF_EXIST`` will update an existing element
+
+``bpf_map_update_elem()`` returns 0 on success, or negative error in
+case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Hash entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper. This helper returns a pointer to the value associated with
+``key``, or ``NULL`` if no entry was found.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_delete_elem(struct bpf_map *map, const void *key)
+
+Hash entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case
+of failure.
+
+Per CPU Hashes
+--------------
+
+For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers
+automatically access the hash slot for the current CPU.
+
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+
+The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the
+value in the hash slot for a specific CPU. Returns value associated with
+``key`` on ``cpu`` , or ``NULL`` if no entry was found or ``cpu`` is
+invalid.
+
+Concurrency
+-----------
+
+Values stored in ``BPF_MAP_TYPE_HASH`` can be accessed concurrently by
+programs running on different CPUs. Since Kernel version 5.1, the BPF
+infrastructure provides ``struct bpf_spin_lock`` to synchronise access.
+See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
+
+Userspace
+---------
+
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key)
+
+In userspace, it is possible to iterate through the keys of a hash using
+libbpf's ``bpf_map_get_next_key()`` function. The first key can be fetched by
+calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
+``NULL``. Subsequent calls will fetch the next key that follows the
+current key. ``bpf_map_get_next_key()`` returns 0 on success, -ENOENT if
+cur_key is the last key in the hash, or negative error in case of
+failure.
+
+Note that if ``cur_key`` gets deleted then ``bpf_map_get_next_key()``
+will instead return the *first* key in the hash table which is
+undesirable. It is recommended to use batched lookup if there is going
+to be key deletion intermixed with ``bpf_map_get_next_key()``.
+
+Examples
+========
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples. The code snippets below demonstrates API usage.
+
+This example shows how to declare an LRU Hash with a struct key and a
+struct value.
+
+.. code-block:: c
+
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ struct key {
+ __u32 srcip;
+ };
+
+ struct value {
+ __u64 packets;
+ __u64 bytes;
+ };
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 32);
+ __type(key, struct key);
+ __type(value, struct value);
+ } packet_stats SEC(".maps");
+
+This example shows how to create or update hash values using atomic
+instructions:
+
+.. code-block:: c
+
+ static void update_stats(__u32 srcip, int bytes)
+ {
+ struct key key = {
+ .srcip = srcip,
+ };
+ struct value *value = bpf_map_lookup_elem(&packet_stats, &key);
+
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, bytes);
+ } else {
+ struct value newval = { 1, bytes };
+
+ bpf_map_update_elem(&packet_stats, &key, &newval, BPF_NOEXIST);
+ }
+ }
+
+Userspace walking the map elements from the map declared above:
+
+.. code-block:: c
+
+ #include <bpf/libbpf.h>
+ #include <bpf/bpf.h>
+
+ static void walk_hash_elements(int map_fd)
+ {
+ struct key *cur_key = NULL;
+ struct key next_key;
+ struct value value;
+ int err;
+
+ for (;;) {
+ err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
+ if (err)
+ break;
+
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
+
+ // Use key and value here
+
+ cur_key = &next_key;
+ }
+ }
+
+Internals
+=========
+
+This section of the document is targeted at Linux developers and describes
+aspects of the map implementations that are not considered stable ABI. The
+following details are subject to change in future versions of the kernel.
+
+``BPF_MAP_TYPE_LRU_HASH`` and variants
+--------------------------------------
+
+Updating elements in LRU maps may trigger eviction behaviour when the capacity
+of the map is reached. There are various steps that the update algorithm
+attempts in order to enforce the LRU property which have increasing impacts on
+other CPUs involved in the following operation attempts:
+
+- Attempt to use CPU-local state to batch operations
+- Attempt to fetch ``target_free`` free nodes from global lists
+- Attempt to pull any node from a global list and remove it from the hashmap
+- Attempt to pull any node from any CPU's list and remove it from the hashmap
+
+The number of nodes to borrow from the global list in a batch, ``target_free``,
+depends on the size of the map. Larger batch size reduces lock contention, but
+may also exhaust the global structure. The value is computed at map init to
+avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map
+size. With a minimum of a single element and maximum budget of 128 at a time.
+
+This algorithm is described visually in the following diagram. See the
+description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
+the corresponding operations:
+
+.. kernel-figure:: map_lru_hash_update.dot
+ :alt: Diagram outlining the LRU eviction steps taken during map update.
+
+ LRU hash eviction during map update for ``BPF_MAP_TYPE_LRU_HASH`` and
+ variants. See the dot file source for kernel function name code references.
+
+Map updates start from the oval in the top right "begin ``bpf_map_update()``"
+and progress through the graph towards the bottom where the result may be
+either a successful update or a failure with various error codes. The key in
+the top right provides indicators for which locks may be involved in specific
+operations. This is intended as a visual hint for reasoning about how map
+contention may impact update operations, though the map type and flags may
+impact the actual contention on those locks, based on the logic described in
+the table above. For instance, if the map is created with type
+``BPF_MAP_TYPE_LRU_PERCPU_HASH`` and flags ``BPF_F_NO_COMMON_LRU`` then all map
+properties would be per-cpu.
diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
new file mode 100644
index 000000000000..f9cd579496c9
--- /dev/null
+++ b/Documentation/bpf/map_lpm_trie.rst
@@ -0,0 +1,197 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=====================
+BPF_MAP_TYPE_LPM_TRIE
+=====================
+
+.. note::
+ - ``BPF_MAP_TYPE_LPM_TRIE`` was introduced in kernel version 4.11
+
+``BPF_MAP_TYPE_LPM_TRIE`` provides a longest prefix match algorithm that
+can be used to match IP addresses to a stored set of prefixes.
+Internally, data is stored in an unbalanced trie of nodes that uses
+``prefixlen,data`` pairs as its keys. The ``data`` is interpreted in
+network byte order, i.e. big endian, so ``data[0]`` stores the most
+significant byte.
+
+LPM tries may be created with a maximum prefix length that is a multiple
+of 8, in the range from 8 to 2048. The key used for lookup and update
+operations is a ``struct bpf_lpm_trie_key_u8``, extended by
+``max_prefixlen/8`` bytes.
+
+- For IPv4 addresses the data length is 4 bytes
+- For IPv6 addresses the data length is 16 bytes
+
+The value type stored in the LPM trie can be any user defined type.
+
+.. note::
+ When creating a map of type ``BPF_MAP_TYPE_LPM_TRIE`` you must set the
+ ``BPF_F_NO_PREALLOC`` flag.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+The longest prefix entry for a given data value can be found using the
+``bpf_map_lookup_elem()`` helper. This helper returns a pointer to the
+value associated with the longest matching ``key``, or ``NULL`` if no
+entry was found.
+
+The ``key`` should have ``prefixlen`` set to ``max_prefixlen`` when
+performing longest prefix lookups. For example, when searching for the
+longest prefix match for an IPv4 address, ``prefixlen`` should be set to
+``32``.
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Prefix entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically.
+
+``bpf_map_update_elem()`` returns ``0`` on success, or negative error in
+case of failure.
+
+ .. note::
+ The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST,
+ but the value is ignored, giving BPF_ANY semantics.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_delete_elem(struct bpf_map *map, const void *key)
+
+Prefix entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case
+of failure.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by ``fd``.
+
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key)
+
+A userspace program can iterate through the entries in an LPM trie using
+libbpf's ``bpf_map_get_next_key()`` function. The first key can be
+fetched by calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
+``NULL``. Subsequent calls will fetch the next key that follows the
+current key. ``bpf_map_get_next_key()`` returns ``0`` on success,
+``-ENOENT`` if ``cur_key`` is the last key in the trie, or negative
+error in case of failure.
+
+``bpf_map_get_next_key()`` will iterate through the LPM trie elements
+from leftmost leaf first. This means that iteration will return more
+specific keys before less specific ones.
+
+Examples
+========
+
+Please see ``tools/testing/selftests/bpf/test_lpm_map.c`` for examples
+of LPM trie usage from userspace. The code snippets below demonstrate
+API usage.
+
+Kernel BPF
+----------
+
+The following BPF code snippet shows how to declare a new LPM trie for IPv4
+address prefixes:
+
+.. code-block:: c
+
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ struct ipv4_lpm_key {
+ __u32 prefixlen;
+ __u32 data;
+ };
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct ipv4_lpm_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 255);
+ } ipv4_lpm_map SEC(".maps");
+
+The following BPF code snippet shows how to lookup by IPv4 address:
+
+.. code-block:: c
+
+ void *lookup(__u32 ipaddr)
+ {
+ struct ipv4_lpm_key key = {
+ .prefixlen = 32,
+ .data = ipaddr
+ };
+
+ return bpf_map_lookup_elem(&ipv4_lpm_map, &key);
+ }
+
+Userspace
+---------
+
+The following snippet shows how to insert an IPv4 prefix entry into an
+LPM trie:
+
+.. code-block:: c
+
+ int add_prefix_entry(int lpm_fd, __u32 addr, __u32 prefixlen, struct value *value)
+ {
+ struct ipv4_lpm_key ipv4_key = {
+ .prefixlen = prefixlen,
+ .data = addr
+ };
+ return bpf_map_update_elem(lpm_fd, &ipv4_key, value, BPF_ANY);
+ }
+
+The following snippet shows a userspace program walking through the entries
+of an LPM trie:
+
+
+.. code-block:: c
+
+ #include <bpf/libbpf.h>
+ #include <bpf/bpf.h>
+
+ void iterate_lpm_trie(int map_fd)
+ {
+ struct ipv4_lpm_key *cur_key = NULL;
+ struct ipv4_lpm_key next_key;
+ struct value value;
+ int err;
+
+ for (;;) {
+ err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
+ if (err)
+ break;
+
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
+
+ /* Use key and value here */
+
+ cur_key = &next_key;
+ }
+ }
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
new file mode 100644
index 000000000000..ab10058f5b79
--- /dev/null
+++ b/Documentation/bpf/map_lru_hash_update.dot
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022-2023 Isovalent, Inc.
+digraph {
+ node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
+ graph [splines=ortho, nodesep=1]
+
+ subgraph cluster_key {
+ label = "Key\n(locks held during operation)";
+ rankdir = TB;
+
+ remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
+ hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
+ lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
+ local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
+ no_lock [shape=rectangle,label="no locks held"]
+ }
+
+ begin [shape=oval,label="begin\nbpf_map_update()"]
+
+ // Nodes below with an 'fn_' prefix are roughly labeled by the C function
+ // names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
+ // Number suffixes and errno suffixes handle subsections of the corresponding
+ // logic in the function as of the writing of this dot.
+
+ // cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
+ local_freelist_check [shape=diamond,fillcolor=1,
+ label="Local freelist\nnode available?"];
+ use_local_node [shape=rectangle,
+ label="Use node owned\nby this CPU"]
+
+ // cf. bpf_lru_pop_free()
+ common_lru_check [shape=diamond,
+ label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+ fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
+ label="Flush local pending,
+ Rotate Global list, move
+ target_free
+ from global -> local"]
+ // Also corresponds to:
+ // fn__local_list_flush()
+ // fn_bpf_lru_list_rotate()
+ fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
+ label="Able to free\ntarget_free\nnodes?"]
+
+ fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
+ label="Shrink inactive list
+ up to remaining
+ target_free
+ (global LRU -> local)"]
+ fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
+ label="> 0 entries in\nlocal free list?"]
+ fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
+ label="Steal one node from
+ inactive, or if empty,
+ from active global list"]
+ fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
+ label="Try to remove\nnode from hashtab"]
+
+ local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
+ common_lru_check2 [shape=diamond,
+ label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+ subgraph cluster_remote_lock {
+ label = "Iterate through CPUs\n(start from current)";
+ style = dashed;
+ rankdir=LR;
+
+ local_freelist_check5 [shape=diamond,fillcolor=4,
+ label="Steal a node from\nper-cpu freelist?"]
+ local_freelist_check6 [shape=rectangle,fillcolor=4,
+ label="Steal a node from
+ (1) Unreferenced pending, or
+ (2) Any pending node"]
+ local_freelist_check7 [shape=rectangle,fillcolor=3,
+ label="Try to remove\nnode from hashtab"]
+ fn_htab_lru_map_update_elem [shape=diamond,
+ label="Stole node\nfrom remote\nCPU?"]
+ fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
+ // Also corresponds to:
+ // use_local_node()
+ // fn__local_list_pop_pending()
+ }
+
+ fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
+ label="Use node that was\nnot recently referenced"]
+ local_freelist_check4 [shape=rectangle,
+ label="Use node that was\nactively referenced\nin global list"]
+ fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
+ fn_htab_lru_map_update_elem3 [shape=rectangle,
+ label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
+ fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
+ label="Update hashmap\nwith new element"]
+ fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
+ fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
+ fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
+ fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
+
+ begin -> local_freelist_check
+ local_freelist_check -> use_local_node [xlabel="Y"]
+ local_freelist_check -> common_lru_check [xlabel="N"]
+ common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
+ common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+ fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
+ fn___bpf_lru_node_move_to_free ->
+ fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
+ fn___bpf_lru_node_move_to_free ->
+ fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+ fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
+ fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
+ fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
+ fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
+ fn___bpf_lru_list_shrink3 -> local_freelist_check2
+ local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
+ local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
+ common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
+ common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
+ local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
+ local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
+ local_freelist_check6 -> local_freelist_check7
+ local_freelist_check7 -> fn_htab_lru_map_update_elem
+
+ fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem3 [xlabel = "Y"]
+ fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2 [xlabel = "N"]
+ fn_htab_lru_map_update_elem2 ->
+ fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
+ fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
+ fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
+
+ use_local_node -> fn_htab_lru_map_update_elem4
+ fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
+ local_freelist_check4 -> fn_htab_lru_map_update_elem4
+
+ fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
+
+ // Create invisible pad nodes to line up various nodes
+ pad0 [style=invis]
+ pad1 [style=invis]
+ pad2 [style=invis]
+ pad3 [style=invis]
+ pad4 [style=invis]
+
+ // Line up the key with the top of the graph
+ no_lock -> local_lock [style=invis]
+ local_lock -> lru_lock [style=invis]
+ lru_lock -> hash_lock [style=invis]
+ hash_lock -> remote_lock [style=invis]
+ remote_lock -> local_freelist_check5 [style=invis]
+ remote_lock -> fn___bpf_lru_list_shrink [style=invis]
+
+ // Line up return code nodes at the bottom of the graph
+ fn_htab_lru_map_update_elem -> pad0 [style=invis]
+ pad0 -> pad1 [style=invis]
+ pad1 -> pad2 [style=invis]
+ //pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
+ fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem5 [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_EBUSY [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_EEXIST [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_ENOENT [style=invis]
+
+ // Reduce diagram width by forcing some nodes to appear above others
+ local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
+ common_lru_check2 -> pad4 [style=invis]
+ pad4 -> local_freelist_check5 [style=invis]
+}
diff --git a/Documentation/bpf/map_of_maps.rst b/Documentation/bpf/map_of_maps.rst
new file mode 100644
index 000000000000..7b5617c2d017
--- /dev/null
+++ b/Documentation/bpf/map_of_maps.rst
@@ -0,0 +1,130 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========================================================
+BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS
+========================================================
+
+.. note::
+ - ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` were
+ introduced in kernel version 4.12
+
+``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` provide general
+purpose support for map in map storage. One level of nesting is supported, where
+an outer map contains instances of a single type of inner map, for example
+``array_of_maps->sock_map``.
+
+When creating an outer map, an inner map instance is used to initialize the
+metadata that the outer map holds about its inner maps. This inner map has a
+separate lifetime from the outer map and can be deleted after the outer map has
+been created.
+
+The outer map supports element lookup, update and delete from user space using
+the syscall API. A BPF program is only allowed to do element lookup in the outer
+map.
+
+.. note::
+ - Multi-level nesting is not supported.
+ - Any BPF map type can be used as an inner map, except for
+ ``BPF_MAP_TYPE_PROG_ARRAY``.
+ - A BPF program cannot update or delete outer map entries.
+
+For ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` the key is an unsigned 32-bit integer index
+into the array. The array is a fixed size with ``max_entries`` elements that are
+zero initialized when created.
+
+For ``BPF_MAP_TYPE_HASH_OF_MAPS`` the key type can be chosen when defining the
+map. The kernel is responsible for allocating and freeing key/value pairs, up to
+the max_entries limit that you specify. Hash maps use pre-allocation of hash
+table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be used to disable
+pre-allocation when it is too memory expensive.
+
+Usage
+=====
+
+Kernel BPF Helper
+-----------------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Inner maps can be retrieved using the ``bpf_map_lookup_elem()`` helper. This
+helper returns a pointer to the inner map, or ``NULL`` if no entry was found.
+
+Examples
+========
+
+Kernel BPF Example
+------------------
+
+This snippet shows how to create and initialise an array of devmaps in a BPF
+program. Note that the outer array can only be modified from user space using
+the syscall API.
+
+.. code-block:: c
+
+ struct inner_map {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 10);
+ __type(key, __u32);
+ __type(value, __u32);
+ } inner_map1 SEC(".maps"), inner_map2 SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __array(values, struct inner_map);
+ } outer_map SEC(".maps") = {
+ .values = { &inner_map1,
+ &inner_map2 }
+ };
+
+See ``progs/test_btf_map_in_map.c`` in ``tools/testing/selftests/bpf`` for more
+examples of declarative initialisation of outer maps.
+
+User Space
+----------
+
+This snippet shows how to create an array based outer map:
+
+.. code-block:: c
+
+ int create_outer_array(int inner_fd) {
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .inner_map_fd = inner_fd);
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ "example_array", /* name */
+ sizeof(__u32), /* key size */
+ sizeof(__u32), /* value size */
+ 256, /* max entries */
+ &opts); /* create opts */
+ return fd;
+ }
+
+
+This snippet shows how to add an inner map to an outer map:
+
+.. code-block:: c
+
+ int add_devmap(int outer_fd, int index, const char *name) {
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, name,
+ sizeof(__u32), sizeof(__u32), 256, NULL);
+ if (fd < 0)
+ return fd;
+
+ return bpf_map_update_elem(outer_fd, &index, &fd, BPF_ANY);
+ }
+
+References
+==========
+
+- https://lore.kernel.org/netdev/20170322170035.923581-3-kafai@fb.com/
+- https://lore.kernel.org/netdev/20170322170035.923581-4-kafai@fb.com/
diff --git a/Documentation/bpf/map_queue_stack.rst b/Documentation/bpf/map_queue_stack.rst
new file mode 100644
index 000000000000..8d14ed49d6e1
--- /dev/null
+++ b/Documentation/bpf/map_queue_stack.rst
@@ -0,0 +1,146 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================================
+BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK
+=========================================
+
+.. note::
+ - ``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` were introduced
+ in kernel version 4.20
+
+``BPF_MAP_TYPE_QUEUE`` provides FIFO storage and ``BPF_MAP_TYPE_STACK``
+provides LIFO storage for BPF programs. These maps support peek, pop and
+push operations that are exposed to BPF programs through the respective
+helpers. These operations are exposed to userspace applications using
+the existing ``bpf`` syscall in the following way:
+
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+- ``BPF_MAP_LOOKUP_AND_DELETE_ELEM`` -> pop
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+
+``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` do not support
+``BPF_F_NO_PREALLOC``.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+An element ``value`` can be added to a queue or stack using the
+``bpf_map_push_elem`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` or ``BPF_EXIST``. If ``flags`` is set to ``BPF_EXIST`` then,
+when the queue or stack is full, the oldest element will be removed to
+make room for ``value`` to be added. Returns ``0`` on success, or
+negative error in case of failure.
+
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+This helper fetches an element ``value`` from a queue or stack without
+removing it. Returns ``0`` on success, or negative error in case of
+failure.
+
+bpf_map_pop_elem()
+~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_map_pop_elem(struct bpf_map *map, void *value)
+
+This helper removes an element into ``value`` from a queue or
+stack. Returns ``0`` on success, or negative error in case of failure.
+
+
+Userspace
+---------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can push ``value`` onto a queue or stack using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY`` or ``BPF_EXIST``, with the
+same semantics as the ``bpf_map_push_elem`` kernel helper. Returns ``0`` on
+success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can peek at the ``value`` at the head of a queue or stack
+using the libbpf ``bpf_map_lookup_elem`` function. The ``key`` parameter must be
+set to ``NULL``. Returns ``0`` on success, or negative error in case of
+failure.
+
+bpf_map_lookup_and_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_and_delete_elem (int fd, const void *key, void *value)
+
+A userspace program can pop a ``value`` from the head of a queue or stack using
+the libbpf ``bpf_map_lookup_and_delete_elem`` function. The ``key`` parameter
+must be set to ``NULL``. Returns ``0`` on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a queue in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __type(value, __u32);
+ __uint(max_entries, 10);
+ } queue SEC(".maps");
+
+
+Userspace
+---------
+
+This snippet shows how to use libbpf's low-level API to create a queue from
+userspace:
+
+.. code-block:: c
+
+ int create_queue()
+ {
+ return bpf_map_create(BPF_MAP_TYPE_QUEUE,
+ "sample_queue", /* name */
+ 0, /* key size, must be zero */
+ sizeof(__u32), /* value size */
+ 10, /* max entries */
+ NULL); /* create options */
+ }
+
+
+References
+==========
+
+https://lwn.net/ml/netdev/153986858555.9127.14517764371945179514.stgit@kernel/
diff --git a/Documentation/bpf/map_sk_storage.rst b/Documentation/bpf/map_sk_storage.rst
new file mode 100644
index 000000000000..4e9d23ab9ecd
--- /dev/null
+++ b/Documentation/bpf/map_sk_storage.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=======================
+BPF_MAP_TYPE_SK_STORAGE
+=======================
+
+.. note::
+ - ``BPF_MAP_TYPE_SK_STORAGE`` was introduced in kernel version 5.2
+
+``BPF_MAP_TYPE_SK_STORAGE`` is used to provide socket-local storage for BPF
+programs. A map of type ``BPF_MAP_TYPE_SK_STORAGE`` declares the type of storage
+to be provided and acts as the handle for accessing the socket-local
+storage. The values for maps of type ``BPF_MAP_TYPE_SK_STORAGE`` are stored
+locally with each socket instead of with the map. The kernel is responsible for
+allocating storage for a socket when requested and for freeing the storage when
+either the map or the socket is deleted.
+
+.. note::
+ - The key type must be ``int`` and ``max_entries`` must be set to ``0``.
+ - The ``BPF_F_NO_PREALLOC`` flag must be used when creating a map for
+ socket-local storage.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_sk_storage_get()
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
+
+Socket-local storage for ``map`` can be retrieved from socket ``sk`` using the
+``bpf_sk_storage_get()`` helper. If the ``BPF_LOCAL_STORAGE_GET_F_CREATE``
+flag is used then ``bpf_sk_storage_get()`` will create the storage for ``sk``
+if it does not already exist. ``value`` can be used together with
+``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise
+it will be zero initialized. Returns a pointer to the storage on success, or
+``NULL`` in case of failure.
+
+.. note::
+ - ``sk`` is a kernel ``struct sock`` pointer for LSM or tracing programs.
+ - ``sk`` is a ``struct bpf_sock`` pointer for other program types.
+
+bpf_sk_storage_delete()
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
+
+Socket-local storage for ``map`` can be deleted from socket ``sk`` using the
+``bpf_sk_storage_delete()`` helper. Returns ``0`` on success, or negative
+error in case of failure.
+
+User space
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags)
+
+Socket-local storage for map ``map_fd`` can be added or updated locally to a
+socket using the ``bpf_map_update_elem()`` libbpf function. The socket is
+identified by a `socket` ``fd`` stored in the pointer ``key``. The pointer
+``value`` has the data to be added or updated to the socket ``fd``. The type
+and size of ``value`` should be the same as the value type of the map
+definition.
+
+The ``flags`` parameter can be used to control the update behaviour:
+
+- ``BPF_ANY`` will create storage for `socket` ``fd`` or update existing storage.
+- ``BPF_NOEXIST`` will create storage for `socket` ``fd`` only if it did not
+ already exist, otherwise the call will fail with ``-EEXIST``.
+- ``BPF_EXIST`` will update existing storage for `socket` ``fd`` if it already
+ exists, otherwise the call will fail with ``-ENOENT``.
+
+Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int map_fd, const void *key, void *value)
+
+Socket-local storage for map ``map_fd`` can be retrieved from a socket using
+the ``bpf_map_lookup_elem()`` libbpf function. The storage is retrieved from
+the socket identified by a `socket` ``fd`` stored in the pointer
+``key``. Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_delete_elem(int map_fd, const void *key)
+
+Socket-local storage for map ``map_fd`` can be deleted from a socket using the
+``bpf_map_delete_elem()`` libbpf function. The storage is deleted from the
+socket identified by a `socket` ``fd`` stored in the pointer ``key``. Returns
+``0`` on success, or negative error in case of failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare socket-local storage in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct my_storage);
+ } socket_storage SEC(".maps");
+
+This snippet shows how to retrieve socket-local storage in a BPF program:
+
+.. code-block:: c
+
+ SEC("sockops")
+ int _sockops(struct bpf_sock_ops *ctx)
+ {
+ struct my_storage *storage;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ storage = bpf_sk_storage_get(&socket_storage, sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ /* Use 'storage' here */
+
+ return 1;
+ }
+
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples.
+
+References
+==========
+
+https://lwn.net/ml/netdev/20190426171103.61892-1-kafai@fb.com/
diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst
new file mode 100644
index 000000000000..2d630686a00b
--- /dev/null
+++ b/Documentation/bpf/map_sockmap.rst
@@ -0,0 +1,498 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright Red Hat
+
+==============================================
+BPF_MAP_TYPE_SOCKMAP and BPF_MAP_TYPE_SOCKHASH
+==============================================
+
+.. note::
+ - ``BPF_MAP_TYPE_SOCKMAP`` was introduced in kernel version 4.14
+ - ``BPF_MAP_TYPE_SOCKHASH`` was introduced in kernel version 4.18
+
+``BPF_MAP_TYPE_SOCKMAP`` and ``BPF_MAP_TYPE_SOCKHASH`` maps can be used to
+redirect skbs between sockets or to apply policy at the socket level based on
+the result of a BPF (verdict) program with the help of the BPF helpers
+``bpf_sk_redirect_map()``, ``bpf_sk_redirect_hash()``,
+``bpf_msg_redirect_map()`` and ``bpf_msg_redirect_hash()``.
+
+``BPF_MAP_TYPE_SOCKMAP`` is backed by an array that uses an integer key as the
+index to look up a reference to a ``struct sock``. The map values are socket
+descriptors. Similarly, ``BPF_MAP_TYPE_SOCKHASH`` is a hash backed BPF map that
+holds references to sockets via their socket descriptors.
+
+.. note::
+ The value type is either __u32 or __u64; the latter (__u64) is to support
+ returning socket cookies to userspace. Returning the ``struct sock *`` that
+ the map holds to user-space is neither safe nor useful.
+
+These maps may have BPF programs attached to them, specifically a parser program
+and a verdict program. The parser program determines how much data has been
+parsed and therefore how much data needs to be queued to come to a verdict. The
+verdict program is essentially the redirect program and can return a verdict
+of ``__SK_DROP``, ``__SK_PASS``, or ``__SK_REDIRECT``.
+
+When a socket is inserted into one of these maps, its socket callbacks are
+replaced and a ``struct sk_psock`` is attached to it. Additionally, this
+``sk_psock`` inherits the programs that are attached to the map.
+
+A sock object may be in multiple maps, but can only inherit a single
+parse or verdict program. If adding a sock object to a map would result
+in having multiple parser programs the update will return an EBUSY error.
+
+The supported programs to attach to these maps are:
+
+.. code-block:: c
+
+ struct sk_psock_progs {
+ struct bpf_prog *msg_parser;
+ struct bpf_prog *stream_parser;
+ struct bpf_prog *stream_verdict;
+ struct bpf_prog *skb_verdict;
+ };
+
+.. note::
+ Users are not allowed to attach ``stream_verdict`` and ``skb_verdict``
+ programs to the same map.
+
+The attach types for the map programs are:
+
+- ``msg_parser`` program - ``BPF_SK_MSG_VERDICT``.
+- ``stream_parser`` program - ``BPF_SK_SKB_STREAM_PARSER``.
+- ``stream_verdict`` program - ``BPF_SK_SKB_STREAM_VERDICT``.
+- ``skb_verdict`` program - ``BPF_SK_SKB_VERDICT``.
+
+There are additional helpers available to use with the parser and verdict
+programs: ``bpf_msg_apply_bytes()`` and ``bpf_msg_cork_bytes()``. With
+``bpf_msg_apply_bytes()`` BPF programs can tell the infrastructure how many
+bytes the given verdict should apply to. The helper ``bpf_msg_cork_bytes()``
+handles a different case where a BPF program cannot reach a verdict on a msg
+until it receives more bytes AND the program doesn't want to forward the packet
+until it is known to be good.
+
+Finally, the helpers ``bpf_msg_pull_data()`` and ``bpf_msg_push_data()`` are
+available to ``BPF_PROG_TYPE_SK_MSG`` BPF programs to pull in data and set the
+start and end pointers to given values or to add metadata to the ``struct
+sk_msg_buff *msg``.
+
+All these helpers will be described in more detail below.
+
+Usage
+=====
+Kernel BPF
+----------
+bpf_msg_redirect_map()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+
+This helper is used in programs implementing policies at the socket level. If
+the message ``msg`` is allowed to pass (i.e., if the verdict BPF program
+returns ``SK_PASS``), redirect it to the socket referenced by ``map`` (of type
+``BPF_MAP_TYPE_SOCKMAP``) at index ``key``. Both ingress and egress interfaces
+can be used for redirection. The ``BPF_F_INGRESS`` value in ``flags`` is used
+to select the ingress path otherwise the egress path is selected. This is the
+only flag supported for now.
+
+Returns ``SK_PASS`` on success, or ``SK_DROP`` on error.
+
+bpf_sk_redirect_map()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key u64 flags)
+
+Redirect the packet to the socket referenced by ``map`` (of type
+``BPF_MAP_TYPE_SOCKMAP``) at index ``key``. Both ingress and egress interfaces
+can be used for redirection. The ``BPF_F_INGRESS`` value in ``flags`` is used
+to select the ingress path otherwise the egress path is selected. This is the
+only flag supported for now.
+
+Returns ``SK_PASS`` on success, or ``SK_DROP`` on error.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+socket entries of type ``struct sock *`` can be retrieved using the
+``bpf_map_lookup_elem()`` helper.
+
+bpf_sock_map_update()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+
+Add an entry to, or update a ``map`` referencing sockets. The ``skops`` is used
+as a new value for the entry associated to ``key``. The ``flags`` argument can
+be one of the following:
+
+- ``BPF_ANY``: Create a new element or update an existing element.
+- ``BPF_NOEXIST``: Create a new element only if it did not exist.
+- ``BPF_EXIST``: Update an existing element.
+
+If the ``map`` has BPF programs (parser and verdict), those will be inherited
+by the socket being added. If the socket is already attached to BPF programs,
+this results in an error.
+
+Returns 0 on success, or a negative error in case of failure.
+
+bpf_sock_hash_update()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+
+Add an entry to, or update a sockhash ``map`` referencing sockets. The ``skops``
+is used as a new value for the entry associated to ``key``.
+
+The ``flags`` argument can be one of the following:
+
+- ``BPF_ANY``: Create a new element or update an existing element.
+- ``BPF_NOEXIST``: Create a new element only if it did not exist.
+- ``BPF_EXIST``: Update an existing element.
+
+If the ``map`` has BPF programs (parser and verdict), those will be inherited
+by the socket being added. If the socket is already attached to BPF programs,
+this results in an error.
+
+Returns 0 on success, or a negative error in case of failure.
+
+bpf_msg_redirect_hash()
+^^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+
+This helper is used in programs implementing policies at the socket level. If
+the message ``msg`` is allowed to pass (i.e., if the verdict BPF program returns
+``SK_PASS``), redirect it to the socket referenced by ``map`` (of type
+``BPF_MAP_TYPE_SOCKHASH``) using hash ``key``. Both ingress and egress
+interfaces can be used for redirection. The ``BPF_F_INGRESS`` value in
+``flags`` is used to select the ingress path otherwise the egress path is
+selected. This is the only flag supported for now.
+
+Returns ``SK_PASS`` on success, or ``SK_DROP`` on error.
+
+bpf_sk_redirect_hash()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+
+This helper is used in programs implementing policies at the skb socket level.
+If the sk_buff ``skb`` is allowed to pass (i.e., if the verdict BPF program
+returns ``SK_PASS``), redirect it to the socket referenced by ``map`` (of type
+``BPF_MAP_TYPE_SOCKHASH``) using hash ``key``. Both ingress and egress
+interfaces can be used for redirection. The ``BPF_F_INGRESS`` value in
+``flags`` is used to select the ingress path otherwise the egress path is
+selected. This is the only flag supported for now.
+
+Returns ``SK_PASS`` on success, or ``SK_DROP`` on error.
+
+bpf_msg_apply_bytes()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+
+For socket policies, apply the verdict of the BPF program to the next (number
+of ``bytes``) of message ``msg``. For example, this helper can be used in the
+following cases:
+
+- A single ``sendmsg()`` or ``sendfile()`` system call contains multiple
+ logical messages that the BPF program is supposed to read and for which it
+ should apply a verdict.
+- A BPF program only cares to read the first ``bytes`` of a ``msg``. If the
+ message has a large payload, then setting up and calling the BPF program
+ repeatedly for all bytes, even though the verdict is already known, would
+ create unnecessary overhead.
+
+Returns 0
+
+bpf_msg_cork_bytes()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+
+For socket policies, prevent the execution of the verdict BPF program for
+message ``msg`` until the number of ``bytes`` have been accumulated.
+
+This can be used when one needs a specific number of bytes before a verdict can
+be assigned, even if the data spans multiple ``sendmsg()`` or ``sendfile()``
+calls.
+
+Returns 0
+
+bpf_msg_pull_data()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+
+For socket policies, pull in non-linear data from user space for ``msg`` and set
+pointers ``msg->data`` and ``msg->data_end`` to ``start`` and ``end`` bytes
+offsets into ``msg``, respectively.
+
+If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only
+parse data that the (``data``, ``data_end``) pointers have already consumed.
+For ``sendmsg()`` hooks this is likely the first scatterlist element. But for
+calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the
+range (**0**, **0**) because the data is shared with user space and by default
+the objective is to avoid allowing user space to modify data while (or after)
+BPF verdict is being decided. This helper can be used to pull in data and to
+set the start and end pointers to given values. Data will be copied if
+necessary (i.e., if data was not linear and if start and end pointers do not
+point to the same chunk).
+
+A call to this helper is susceptible to change the underlying packet buffer.
+Therefore, at load time, all checks on pointers previously done by the verifier
+are invalidated and must be performed again, if the helper is used in
+combination with direct packet access.
+
+All values for ``flags`` are reserved for future usage, and must be left at
+zero.
+
+Returns 0 on success, or a negative error in case of failure.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Look up a socket entry in the sockmap or sockhash map.
+
+Returns the socket entry associated to ``key``, or NULL if no entry was found.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Add or update a socket entry in a sockmap or sockhash.
+
+The flags argument can be one of the following:
+
+- BPF_ANY: Create a new element or update an existing element.
+- BPF_NOEXIST: Create a new element only if it did not exist.
+- BPF_EXIST: Update an existing element.
+
+Returns 0 on success, or a negative error in case of failure.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_map_delete_elem(struct bpf_map *map, const void *key)
+
+Delete a socket entry from a sockmap or a sockhash.
+
+Returns 0 on success, or a negative error in case of failure.
+
+User space
+----------
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags)
+
+Sockmap entries can be added or updated using the ``bpf_map_update_elem()``
+function. The ``key`` parameter is the index value of the sockmap array. And the
+``value`` parameter is the FD value of that socket.
+
+Under the hood, the sockmap update function uses the socket FD value to
+retrieve the associated socket and its attached psock.
+
+The flags argument can be one of the following:
+
+- BPF_ANY: Create a new element or update an existing element.
+- BPF_NOEXIST: Create a new element only if it did not exist.
+- BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int fd, const void *key, void *value)
+
+Sockmap entries can be retrieved using the ``bpf_map_lookup_elem()`` function.
+
+.. note::
+ The entry returned is a socket cookie rather than a socket itself.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_delete_elem(int fd, const void *key)
+
+Sockmap entries can be deleted using the ``bpf_map_delete_elem()``
+function.
+
+Returns 0 on success, or negative error in case of failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+Several examples of the use of sockmap APIs can be found in:
+
+- `tools/testing/selftests/bpf/progs/test_sockmap_kern.h`_
+- `tools/testing/selftests/bpf/progs/sockmap_parse_prog.c`_
+- `tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c`_
+- `tools/testing/selftests/bpf/progs/test_sockmap_listen.c`_
+- `tools/testing/selftests/bpf/progs/test_sockmap_update.c`_
+
+The following code snippet shows how to declare a sockmap.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+ } sock_map_rx SEC(".maps");
+
+The following code snippet shows a sample parser program.
+
+.. code-block:: c
+
+ SEC("sk_skb/stream_parser")
+ int bpf_prog_parser(struct __sk_buff *skb)
+ {
+ return skb->len;
+ }
+
+The following code snippet shows a simple verdict program that interacts with a
+sockmap to redirect traffic to another socket based on the local port.
+
+.. code-block:: c
+
+ SEC("sk_skb/stream_verdict")
+ int bpf_prog_verdict(struct __sk_buff *skb)
+ {
+ __u32 lport = skb->local_port;
+ __u32 idx = 0;
+
+ if (lport == 10000)
+ return bpf_sk_redirect_map(skb, &sock_map_rx, idx, 0);
+
+ return SK_PASS;
+ }
+
+The following code snippet shows how to declare a sockhash map.
+
+.. code-block:: c
+
+ struct socket_key {
+ __u32 src_ip;
+ __u32 dst_ip;
+ __u32 src_port;
+ __u32 dst_port;
+ };
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, struct socket_key);
+ __type(value, __u64);
+ } sock_hash_rx SEC(".maps");
+
+The following code snippet shows a simple verdict program that interacts with a
+sockhash to redirect traffic to another socket based on a hash of some of the
+skb parameters.
+
+.. code-block:: c
+
+ static inline
+ void extract_socket_key(struct __sk_buff *skb, struct socket_key *key)
+ {
+ key->src_ip = skb->remote_ip4;
+ key->dst_ip = skb->local_ip4;
+ key->src_port = skb->remote_port >> 16;
+ key->dst_port = (bpf_htonl(skb->local_port)) >> 16;
+ }
+
+ SEC("sk_skb/stream_verdict")
+ int bpf_prog_verdict(struct __sk_buff *skb)
+ {
+ struct socket_key key;
+
+ extract_socket_key(skb, &key);
+
+ return bpf_sk_redirect_hash(skb, &sock_hash_rx, &key, 0);
+ }
+
+User space
+----------
+Several examples of the use of sockmap APIs can be found in:
+
+- `tools/testing/selftests/bpf/prog_tests/sockmap_basic.c`_
+- `tools/testing/selftests/bpf/test_sockmap.c`_
+- `tools/testing/selftests/bpf/test_maps.c`_
+
+The following code sample shows how to create a sockmap, attach a parser and
+verdict program, as well as add a socket entry.
+
+.. code-block:: c
+
+ int create_sample_sockmap(int sock, int parse_prog_fd, int verdict_prog_fd)
+ {
+ int index = 0;
+ int map, err;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
+ if (map < 0) {
+ fprintf(stderr, "Failed to create sockmap: %s\n", strerror(errno));
+ return -1;
+ }
+
+ err = bpf_prog_attach(parse_prog_fd, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err){
+ fprintf(stderr, "Failed to attach_parser_prog_to_map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ err = bpf_prog_attach(verdict_prog_fd, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err){
+ fprintf(stderr, "Failed to attach_verdict_prog_to_map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map, &index, &sock, BPF_NOEXIST);
+ if (err) {
+ fprintf(stderr, "Failed to update sockmap: %s\n", strerror(errno));
+ goto out;
+ }
+
+ out:
+ close(map);
+ return err;
+ }
+
+References
+===========
+
+- https://github.com/jrfastab/linux-kernel-xdp/commit/c89fd73cb9d2d7f3c716c3e00836f07b1aeb261f
+- https://lwn.net/Articles/731133/
+- http://vger.kernel.org/lpc_net2018_talks/ktls_bpf_paper.pdf
+- https://lwn.net/Articles/748628/
+- https://lore.kernel.org/bpf/20200218171023.844439-7-jakub@cloudflare.com/
+
+.. _`tools/testing/selftests/bpf/progs/test_sockmap_kern.h`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+.. _`tools/testing/selftests/bpf/progs/sockmap_parse_prog.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+.. _`tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+.. _`tools/testing/selftests/bpf/prog_tests/sockmap_basic.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+.. _`tools/testing/selftests/bpf/test_sockmap.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/test_sockmap.c
+.. _`tools/testing/selftests/bpf/test_maps.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/test_maps.c
+.. _`tools/testing/selftests/bpf/progs/test_sockmap_listen.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+.. _`tools/testing/selftests/bpf/progs/test_sockmap_update.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_update.c
diff --git a/Documentation/bpf/map_xskmap.rst b/Documentation/bpf/map_xskmap.rst
new file mode 100644
index 000000000000..dc143edd9233
--- /dev/null
+++ b/Documentation/bpf/map_xskmap.rst
@@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_XSKMAP
+===================
+
+.. note::
+ - ``BPF_MAP_TYPE_XSKMAP`` was introduced in kernel version 4.18
+
+The ``BPF_MAP_TYPE_XSKMAP`` is used as a backend map for XDP BPF helper
+call ``bpf_redirect_map()`` and ``XDP_REDIRECT`` action, like 'devmap' and 'cpumap'.
+This map type redirects raw XDP frames to `AF_XDP`_ sockets (XSKs), a new type of
+address family in the kernel that allows redirection of frames from a driver to
+user space without having to traverse the full network stack. An AF_XDP socket
+binds to a single netdev queue. A mapping of XSKs to queues is shown below:
+
+.. code-block:: none
+
+ +---------------------------------------------------+
+ | xsk A | xsk B | xsk C |<---+ User space
+ =========================================================|==========
+ | Queue 0 | Queue 1 | Queue 2 | | Kernel
+ +---------------------------------------------------+ |
+ | Netdev eth0 | |
+ +---------------------------------------------------+ |
+ | +=============+ | |
+ | | key | xsk | | |
+ | +---------+ +=============+ | |
+ | | | | 0 | xsk A | | |
+ | | | +-------------+ | |
+ | | | | 1 | xsk B | | |
+ | | BPF |-- redirect -->+-------------+-------------+
+ | | prog | | 2 | xsk C | |
+ | | | +-------------+ |
+ | | | |
+ | | | |
+ | +---------+ |
+ | |
+ +---------------------------------------------------+
+
+.. note::
+ An AF_XDP socket that is bound to a certain <netdev/queue_id> will *only*
+ accept XDP frames from that <netdev/queue_id>. If an XDP program tries to redirect
+ from a <netdev/queue_id> other than what the socket is bound to, the frame will
+ not be received on the socket.
+
+Typically an XSKMAP is created per netdev. This map contains an array of XSK File
+Descriptors (FDs). The number of array elements is typically set or adjusted using
+the ``max_entries`` map parameter. For AF_XDP ``max_entries`` is equal to the number
+of queues supported by the netdev.
+
+.. note::
+ Both the map key and map value size must be 4 bytes.
+
+Usage
+=====
+
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_XSKMAP`` this map contains references to XSK FDs
+for sockets attached to a netdev's queues.
+
+.. note::
+ If the map is empty at an index, the packet is dropped. This means that it is
+ necessary to have an XDP program loaded with at least one XSK in the
+ XSKMAP to be able to get any traffic to user space through the socket.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+XSK entry references of type ``struct xdp_sock *`` can be retrieved using the
+``bpf_map_lookup_elem()`` helper.
+
+User space
+----------
+.. note::
+ XSK entries can only be updated/deleted from user space and not from
+ a BPF program. Trying to call these functions from a kernel BPF program will
+ result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags)
+
+XSK entries can be added or updated using the ``bpf_map_update_elem()``
+helper. The ``key`` parameter is equal to the queue_id of the queue the XSK
+is attaching to. And the ``value`` parameter is the FD value of that socket.
+
+Under the hood, the XSKMAP update function uses the XSK FD value to retrieve the
+associated ``struct xdp_sock`` instance.
+
+The flags argument can be one of the following:
+
+- BPF_ANY: Create a new element or update an existing element.
+- BPF_NOEXIST: Create a new element only if it did not exist.
+- BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int fd, const void *key, void *value)
+
+Returns ``struct xdp_sock *`` or negative error in case of failure.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_map_delete_elem(int fd, const void *key)
+
+XSK entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+.. note::
+ When `libxdp`_ deletes an XSK it also removes the associated socket
+ entry from the XSKMAP.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_XSKMAP`` called
+``xsks_map`` and how to redirect packets to an XSK.
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 64);
+ } xsks_map SEC(".maps");
+
+
+ SEC("xdp")
+ int xsk_redir_prog(struct xdp_md *ctx)
+ {
+ __u32 index = ctx->rx_queue_index;
+
+ if (bpf_map_lookup_elem(&xsks_map, &index))
+ return bpf_redirect_map(&xsks_map, index, 0);
+ return XDP_PASS;
+ }
+
+User space
+----------
+
+The following code snippet shows how to update an XSKMAP with an XSK entry.
+
+.. code-block:: c
+
+ int update_xsks_map(struct bpf_map *xsks_map, int queue_id, int xsk_fd)
+ {
+ int ret;
+
+ ret = bpf_map_update_elem(bpf_map__fd(xsks_map), &queue_id, &xsk_fd, 0);
+ if (ret < 0)
+ fprintf(stderr, "Failed to update xsks_map: %s\n", strerror(errno));
+
+ return ret;
+ }
+
+For an example on how create AF_XDP sockets, please see the AF_XDP-example and
+AF_XDP-forwarding programs in the `bpf-examples`_ directory in the `libxdp`_ repository.
+For a detailed explanation of the AF_XDP interface please see:
+
+- `libxdp-readme`_.
+- `AF_XDP`_ kernel documentation.
+
+.. note::
+ The most comprehensive resource for using XSKMAPs and AF_XDP is `libxdp`_.
+
+.. _libxdp: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp
+.. _AF_XDP: https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+.. _bpf-examples: https://github.com/xdp-project/bpf-examples
+.. _libxdp-readme: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp#using-af_xdp-sockets
diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
new file mode 100644
index 000000000000..6f069f3d6f4b
--- /dev/null
+++ b/Documentation/bpf/maps.rst
@@ -0,0 +1,82 @@
+
+========
+BPF maps
+========
+
+BPF 'maps' provide generic storage of different types for sharing data between
+kernel and user space. There are several storage types available, including
+hash, array, bloom filter and radix-tree. Several of the map types exist to
+support specific BPF helpers that perform actions based on the map contents. The
+maps are accessed from BPF programs via BPF helpers which are documented in the
+`man-pages`_ for `bpf-helpers(7)`_.
+
+BPF maps are accessed from user space via the ``bpf`` syscall, which provides
+commands to create maps, lookup elements, update elements and delete elements.
+More details of the BPF syscall are available in `ebpf-syscall`_ and in the
+`man-pages`_ for `bpf(2)`_.
+
+Map Types
+=========
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ map_*
+
+Usage Notes
+===========
+
+.. c:function::
+ int bpf(int command, union bpf_attr *attr, u32 size)
+
+Use the ``bpf()`` system call to perform the operation specified by
+``command``. The operation takes parameters provided in ``attr``. The ``size``
+argument is the size of the ``union bpf_attr`` in ``attr``.
+
+**BPF_MAP_CREATE**
+
+Create a map with the desired type and attributes in ``attr``:
+
+.. code-block:: c
+
+ int fd;
+ union bpf_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY; /* mandatory */
+ .key_size = sizeof(__u32); /* mandatory */
+ .value_size = sizeof(__u32); /* mandatory */
+ .max_entries = 256; /* mandatory */
+ .map_flags = BPF_F_MMAPABLE;
+ .map_name = "example_array";
+ };
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+
+Returns a process-local file descriptor on success, or negative error in case of
+failure. The map can be deleted by calling ``close(fd)``. Maps held by open
+file descriptors will be deleted automatically when a process exits.
+
+.. note:: Valid characters for ``map_name`` are ``A-Z``, ``a-z``, ``0-9``,
+ ``'_'`` and ``'.'``.
+
+**BPF_MAP_LOOKUP_ELEM**
+
+Lookup key in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero and stores found elem into ``attr->value`` on
+success, or negative error on failure.
+
+**BPF_MAP_UPDATE_ELEM**
+
+Create or update key/value pair in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero on success or negative error on failure.
+
+**BPF_MAP_DELETE_ELEM**
+
+Find and delete element by key in a given map using ``attr->map_fd``,
+``attr->key``. Returns zero on success or negative error on failure.
+
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
+.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html
diff --git a/Documentation/bpf/other.rst b/Documentation/bpf/other.rst
new file mode 100644
index 000000000000..7e6b12018802
--- /dev/null
+++ b/Documentation/bpf/other.rst
@@ -0,0 +1,10 @@
+=====
+Other
+=====
+
+.. toctree::
+ :maxdepth: 1
+
+ ringbuf
+ llvm_reloc
+ graph_ds_impl
diff --git a/Documentation/bpf/prog_cgroup_sockopt.rst b/Documentation/bpf/prog_cgroup_sockopt.rst
new file mode 100644
index 000000000000..1226a94af07a
--- /dev/null
+++ b/Documentation/bpf/prog_cgroup_sockopt.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+BPF_PROG_TYPE_CGROUP_SOCKOPT
+============================
+
+``BPF_PROG_TYPE_CGROUP_SOCKOPT`` program type can be attached to two
+cgroup hooks:
+
+* ``BPF_CGROUP_GETSOCKOPT`` - called every time process executes ``getsockopt``
+ system call.
+* ``BPF_CGROUP_SETSOCKOPT`` - called every time process executes ``setsockopt``
+ system call.
+
+The context (``struct bpf_sockopt``) has associated socket (``sk``) and
+all input arguments: ``level``, ``optname``, ``optval`` and ``optlen``.
+
+BPF_CGROUP_SETSOCKOPT
+=====================
+
+``BPF_CGROUP_SETSOCKOPT`` is triggered *before* the kernel handling of
+sockopt and it has writable context: it can modify the supplied arguments
+before passing them down to the kernel. This hook has access to the cgroup
+and socket local storage.
+
+If BPF program sets ``optlen`` to -1, the control will be returned
+back to the userspace after all other BPF programs in the cgroup
+chain finish (i.e. kernel ``setsockopt`` handling will *not* be executed).
+
+Note, that ``optlen`` can not be increased beyond the user-supplied
+value. It can only be decreased or set to -1. Any other value will
+trigger ``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success, continue with next BPF program in the cgroup chain.
+
+BPF_CGROUP_GETSOCKOPT
+=====================
+
+``BPF_CGROUP_GETSOCKOPT`` is triggered *after* the kernel handing of
+sockopt. The BPF hook can observe ``optval``, ``optlen`` and ``retval``
+if it's interested in whatever kernel has returned. BPF hook can override
+the values above, adjust ``optlen`` and reset ``retval`` to 0. If ``optlen``
+has been increased above initial ``getsockopt`` value (i.e. userspace
+buffer is too small), ``EFAULT`` is returned.
+
+This hook has access to the cgroup and socket local storage.
+
+Note, that the only acceptable value to set to ``retval`` is 0 and the
+original value that the kernel returned. Any other value will trigger
+``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success: copy ``optval`` and ``optlen`` to userspace, return
+ ``retval`` from the syscall (note that this can be overwritten by
+ the BPF program from the parent cgroup).
+
+Cgroup Inheritance
+==================
+
+Suppose, there is the following cgroup hierarchy where each cgroup
+has ``BPF_CGROUP_GETSOCKOPT`` attached at each level with
+``BPF_F_ALLOW_MULTI`` flag::
+
+ A (root, parent)
+ \
+ B (child)
+
+When the application calls ``getsockopt`` syscall from the cgroup B,
+the programs are executed from the bottom up: B, A. First program
+(B) sees the result of kernel's ``getsockopt``. It can optionally
+adjust ``optval``, ``optlen`` and reset ``retval`` to 0. After that
+control will be passed to the second (A) program which will see the
+same context as B including any potential modifications.
+
+Same for ``BPF_CGROUP_SETSOCKOPT``: if the program is attached to
+A and B, the trigger order is B, then A. If B does any changes
+to the input arguments (``level``, ``optname``, ``optval``, ``optlen``),
+then the next program in the chain (A) will see those changes,
+*not* the original input ``setsockopt`` arguments. The potentially
+modified values will be then passed down to the kernel.
+
+Large optval
+============
+When the ``optval`` is greater than the ``PAGE_SIZE``, the BPF program
+can access only the first ``PAGE_SIZE`` of that data. So it has to options:
+
+* Set ``optlen`` to zero, which indicates that the kernel should
+ use the original buffer from the userspace. Any modifications
+ done by the BPF program to the ``optval`` are ignored.
+* Set ``optlen`` to the value less than ``PAGE_SIZE``, which
+ indicates that the kernel should use BPF's trimmed ``optval``.
+
+When the BPF program returns with the ``optlen`` greater than
+``PAGE_SIZE``, the userspace will receive original kernel
+buffers without any modifications that the BPF program might have
+applied.
+
+Example
+=======
+
+Recommended way to handle BPF programs is as follows:
+
+.. code-block:: c
+
+ SEC("cgroup/getsockopt")
+ int getsockopt(struct bpf_sockopt *ctx)
+ {
+ /* Custom socket option. */
+ if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+ ctx->retval = 0;
+ optval[0] = ...;
+ ctx->optlen = 1;
+ return 1;
+ }
+
+ /* Modify kernel's socket option. */
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ ctx->retval = 0;
+ optval[0] = ...;
+ ctx->optlen = 1;
+ return 1;
+ }
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > PAGE_SIZE)
+ ctx->optlen = 0;
+
+ return 1;
+ }
+
+ SEC("cgroup/setsockopt")
+ int setsockopt(struct bpf_sockopt *ctx)
+ {
+ /* Custom socket option. */
+ if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+ /* do something */
+ ctx->optlen = -1;
+ return 1;
+ }
+
+ /* Modify kernel's socket option. */
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ optval[0] = ...;
+ return 1;
+ }
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > PAGE_SIZE)
+ ctx->optlen = 0;
+
+ return 1;
+ }
+
+See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
+of BPF program that handles socket options.
diff --git a/Documentation/bpf/prog_cgroup_sysctl.rst b/Documentation/bpf/prog_cgroup_sysctl.rst
new file mode 100644
index 000000000000..677d6c637cf3
--- /dev/null
+++ b/Documentation/bpf/prog_cgroup_sysctl.rst
@@ -0,0 +1,125 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+===========================
+BPF_PROG_TYPE_CGROUP_SYSCTL
+===========================
+
+This document describes ``BPF_PROG_TYPE_CGROUP_SYSCTL`` program type that
+provides cgroup-bpf hook for sysctl.
+
+The hook has to be attached to a cgroup and will be called every time a
+process inside that cgroup tries to read from or write to sysctl knob in proc.
+
+1. Attach type
+**************
+
+``BPF_CGROUP_SYSCTL`` attach type has to be used to attach
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` program to a cgroup.
+
+2. Context
+**********
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` provides access to the following context from
+BPF program::
+
+ struct bpf_sysctl {
+ __u32 write;
+ __u32 file_pos;
+ };
+
+* ``write`` indicates whether sysctl value is being read (``0``) or written
+ (``1``). This field is read-only.
+
+* ``file_pos`` indicates file position sysctl is being accessed at, read
+ or written. This field is read-write. Writing to the field sets the starting
+ position in sysctl proc file ``read(2)`` will be reading from or ``write(2)``
+ will be writing to. Writing zero to the field can be used e.g. to override
+ whole sysctl value by ``bpf_sysctl_set_new_value()`` on ``write(2)`` even
+ when it's called by user space on ``file_pos > 0``. Writing non-zero
+ value to the field can be used to access part of sysctl value starting from
+ specified ``file_pos``. Not all sysctl support access with ``file_pos !=
+ 0``, e.g. writes to numeric sysctl entries must always be at file position
+ ``0``. See also ``kernel.sysctl_writes_strict`` sysctl.
+
+See `linux/bpf.h`_ for more details on how context field can be accessed.
+
+3. Return code
+**************
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` program must return one of the following
+return codes:
+
+* ``0`` means "reject access to sysctl";
+* ``1`` means "proceed with access".
+
+If program returns ``0`` user space will get ``-1`` from ``read(2)`` or
+``write(2)`` and ``errno`` will be set to ``EPERM``.
+
+4. Helpers
+**********
+
+Since sysctl knob is represented by a name and a value, sysctl specific BPF
+helpers focus on providing access to these properties:
+
+* ``bpf_sysctl_get_name()`` to get sysctl name as it is visible in
+ ``/proc/sys`` into provided by BPF program buffer;
+
+* ``bpf_sysctl_get_current_value()`` to get string value currently held by
+ sysctl into provided by BPF program buffer. This helper is available on both
+ ``read(2)`` from and ``write(2)`` to sysctl;
+
+* ``bpf_sysctl_get_new_value()`` to get new string value currently being
+ written to sysctl before actual write happens. This helper can be used only
+ on ``ctx->write == 1``;
+
+* ``bpf_sysctl_set_new_value()`` to override new string value currently being
+ written to sysctl before actual write happens. Sysctl value will be
+ overridden starting from the current ``ctx->file_pos``. If the whole value
+ has to be overridden BPF program can set ``file_pos`` to zero before calling
+ to the helper. This helper can be used only on ``ctx->write == 1``. New
+ string value set by the helper is treated and verified by kernel same way as
+ an equivalent string passed by user space.
+
+BPF program sees sysctl value same way as user space does in proc filesystem,
+i.e. as a string. Since many sysctl values represent an integer or a vector
+of integers, the following helpers can be used to get numeric value from the
+string:
+
+* ``bpf_strtol()`` to convert initial part of the string to long integer
+ similar to user space `strtol(3)`_;
+* ``bpf_strtoul()`` to convert initial part of the string to unsigned long
+ integer similar to user space `strtoul(3)`_;
+
+See `linux/bpf.h`_ for more details on helpers described here.
+
+5. Examples
+***********
+
+See `test_sysctl_prog.c`_ for an example of BPF program in C that access
+sysctl name and value, parses string value to get vector of integers and uses
+the result to make decision whether to allow or deny access to sysctl.
+
+6. Notes
+********
+
+``BPF_PROG_TYPE_CGROUP_SYSCTL`` is intended to be used in **trusted** root
+environment, for example to monitor sysctl usage or catch unreasonable values
+an application, running as root in a separate cgroup, is trying to set.
+
+Since `task_dfl_cgroup(current)` is called at `sys_read` / `sys_write` time it
+may return results different from that at `sys_open` time, i.e. process that
+opened sysctl file in proc filesystem may differ from process that is trying
+to read from / write to it and two such processes may run in different
+cgroups, what means ``BPF_PROG_TYPE_CGROUP_SYSCTL`` should not be used as a
+security mechanism to limit sysctl usage.
+
+As with any cgroup-bpf program additional care should be taken if an
+application running as root in a cgroup should not be allowed to
+detach/replace BPF program attached by administrator.
+
+.. Links
+.. _linux/bpf.h: ../../include/uapi/linux/bpf.h
+.. _strtol(3): http://man7.org/linux/man-pages/man3/strtol.3p.html
+.. _strtoul(3): http://man7.org/linux/man-pages/man3/strtoul.3p.html
+.. _test_sysctl_prog.c:
+ ../../tools/testing/selftests/bpf/progs/test_sysctl_prog.c
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst
new file mode 100644
index 000000000000..f24270b8b034
--- /dev/null
+++ b/Documentation/bpf/prog_flow_dissector.rst
@@ -0,0 +1,147 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+BPF_PROG_TYPE_FLOW_DISSECTOR
+============================
+
+Overview
+========
+
+Flow dissector is a routine that parses metadata out of the packets. It's
+used in the various places in the networking subsystem (RFS, flow hash, etc).
+
+BPF flow dissector is an attempt to reimplement C-based flow dissector logic
+in BPF to gain all the benefits of BPF verifier (namely, limits on the
+number of instructions and tail calls).
+
+API
+===
+
+BPF flow dissector programs operate on an ``__sk_buff``. However, only the
+limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``.
+``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input
+and output arguments.
+
+The inputs are:
+ * ``nhoff`` - initial offset of the networking header
+ * ``thoff`` - initial offset of the transport header, initialized to nhoff
+ * ``n_proto`` - L3 protocol type, parsed out of L2 header
+ * ``flags`` - optional flags
+
+Flow dissector BPF program should fill out the rest of the ``struct
+bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
+also adjusted accordingly.
+
+The return code of the BPF program is either BPF_OK to indicate successful
+dissection, or BPF_DROP to indicate parsing error.
+
+__sk_buff->data
+===============
+
+In the VLAN-less case, this is what the initial state of the BPF flow
+dissector looks like::
+
+ +------+------+------------+-----------+
+ | DMAC | SMAC | ETHER_TYPE | L3_HEADER |
+ +------+------+------------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point to the first byte of L3_HEADER
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = ETHER_TYPE
+
+In case of VLAN, flow dissector can be called with the two different states.
+
+Pre-VLAN parsing::
+
+ +------+------+------+-----+-----------+-----------+
+ | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+ +------+------+------+-----+-----------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point the to first byte of TCI
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = TPID
+
+Please note that TPID can be 802.1AD and, hence, BPF program would
+have to parse VLAN information twice for double tagged packets.
+
+
+Post-VLAN parsing::
+
+ +------+------+------+-----+-----------+-----------+
+ | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+ +------+------+------+-----+-----------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point the to first byte of L3_HEADER
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = ETHER_TYPE
+
+In this case VLAN information has been processed before the flow dissector
+and BPF flow dissector is not required to handle it.
+
+
+The takeaway here is as follows: BPF flow dissector program can be called with
+the optional VLAN header and should gracefully handle both cases: when single
+or double VLAN is present and when it is not present. The same program
+can be called for both cases and would have to be written carefully to
+handle both cases.
+
+
+Flags
+=====
+
+``flow_keys->flags`` might contain optional input flags that work as follows:
+
+* ``BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG`` - tells BPF flow dissector to
+ continue parsing first fragment; the default expected behavior is that
+ flow dissector returns as soon as it finds out that the packet is fragmented;
+ used by ``eth_get_headlen`` to estimate length of all headers for GRO.
+* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
+ stop parsing as soon as it reaches IPv6 flow label; used by
+ ``___skb_get_hash`` to get flow hash.
+* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
+ parsing as soon as it reaches encapsulated headers; used by routing
+ infrastructure.
+
+
+Reference Implementation
+========================
+
+See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference
+implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]``
+for the loader. bpftool can be used to load BPF flow dissector program as well.
+
+The reference implementation is organized as follows:
+ * ``jmp_table`` map that contains sub-programs for each supported L3 protocol
+ * ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and
+ does ``bpf_tail_call`` to the appropriate L3 handler
+
+Since BPF at this point doesn't support looping (or any jumping back),
+jmp_table is used instead to handle multiple levels of encapsulation (and
+IPv6 options).
+
+
+Current Limitations
+===================
+BPF flow dissector doesn't support exporting all the metadata that in-kernel
+C-based implementation can export. Notable example is single VLAN (802.1Q)
+and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
+for a set of information that's currently can be exported from the BPF context.
+
+When BPF flow dissector is attached to the root network namespace (machine-wide
+policy), users can't override it in their child network namespaces.
diff --git a/Documentation/bpf/prog_lsm.rst b/Documentation/bpf/prog_lsm.rst
new file mode 100644
index 000000000000..ad2be02f30c2
--- /dev/null
+++ b/Documentation/bpf/prog_lsm.rst
@@ -0,0 +1,143 @@
+.. SPDX-License-Identifier: GPL-2.0+
+.. Copyright (C) 2020 Google LLC.
+
+================
+LSM BPF Programs
+================
+
+These BPF programs allow runtime instrumentation of the LSM hooks by privileged
+users to implement system-wide MAC (Mandatory Access Control) and Audit
+policies using eBPF.
+
+Structure
+---------
+
+The example shows an eBPF program that can be attached to the ``file_mprotect``
+LSM hook:
+
+.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot);
+
+Other LSM hooks which can be instrumented can be found in
+``security/security.c``.
+
+eBPF programs that use Documentation/bpf/btf.rst do not need to include kernel
+headers for accessing information from the attached eBPF program's context.
+They can simply declare the structures in the eBPF program and only specify
+the fields that need to be accessed.
+
+.. code-block:: c
+
+ struct mm_struct {
+ unsigned long start_brk, brk, start_stack;
+ } __attribute__((preserve_access_index));
+
+ struct vm_area_struct {
+ unsigned long start_brk, brk, start_stack;
+ unsigned long vm_start, vm_end;
+ struct mm_struct *vm_mm;
+ } __attribute__((preserve_access_index));
+
+
+.. note:: The order of the fields is irrelevant.
+
+This can be further simplified (if one has access to the BTF information at
+build time) by generating the ``vmlinux.h`` with:
+
+.. code-block:: console
+
+ # bpftool btf dump file <path-to-btf-vmlinux> format c > vmlinux.h
+
+.. note:: ``path-to-btf-vmlinux`` can be ``/sys/kernel/btf/vmlinux`` if the
+ build environment matches the environment the BPF programs are
+ deployed in.
+
+The ``vmlinux.h`` can then simply be included in the BPF programs without
+requiring the definition of the types.
+
+The eBPF programs can be declared using the``BPF_PROG``
+macros defined in `tools/lib/bpf/bpf_tracing.h`_. In this
+example:
+
+ * ``"lsm/file_mprotect"`` indicates the LSM hook that the program must
+ be attached to
+ * ``mprotect_audit`` is the name of the eBPF program
+
+.. code-block:: c
+
+ SEC("lsm/file_mprotect")
+ int BPF_PROG(mprotect_audit, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+ {
+ /* ret is the return value from the previous BPF program
+ * or 0 if it's the first hook.
+ */
+ if (ret != 0)
+ return ret;
+
+ int is_heap;
+
+ is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+ vma->vm_end <= vma->vm_mm->brk);
+
+ /* Return an -EPERM or write information to the perf events buffer
+ * for auditing
+ */
+ if (is_heap)
+ return -EPERM;
+ }
+
+The ``__attribute__((preserve_access_index))`` is a clang feature that allows
+the BPF verifier to update the offsets for the access at runtime using the
+Documentation/bpf/btf.rst information. Since the BPF verifier is aware of the
+types, it also validates all the accesses made to the various types in the
+eBPF program.
+
+Loading
+-------
+
+eBPF programs can be loaded with the :manpage:`bpf(2)` syscall's
+``BPF_PROG_LOAD`` operation:
+
+.. code-block:: c
+
+ struct bpf_object *obj;
+
+ obj = bpf_object__open("./my_prog.o");
+ bpf_object__load(obj);
+
+This can be simplified by using a skeleton header generated by ``bpftool``:
+
+.. code-block:: console
+
+ # bpftool gen skeleton my_prog.o > my_prog.skel.h
+
+and the program can be loaded by including ``my_prog.skel.h`` and using
+the generated helper, ``my_prog__open_and_load``.
+
+Attachment to LSM Hooks
+-----------------------
+
+The LSM allows attachment of eBPF programs as LSM hooks using :manpage:`bpf(2)`
+syscall's ``BPF_RAW_TRACEPOINT_OPEN`` operation or more simply by
+using the libbpf helper ``bpf_program__attach_lsm``.
+
+The program can be detached from the LSM hook by *destroying* the ``link``
+link returned by ``bpf_program__attach_lsm`` using ``bpf_link__destroy``.
+
+One can also use the helpers generated in ``my_prog.skel.h`` i.e.
+``my_prog__attach`` for attachment and ``my_prog__destroy`` for cleaning up.
+
+Examples
+--------
+
+An example eBPF program can be found in
+`tools/testing/selftests/bpf/progs/lsm.c`_ and the corresponding
+userspace code in `tools/testing/selftests/bpf/prog_tests/test_lsm.c`_
+
+.. Links
+.. _tools/lib/bpf/bpf_tracing.h:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/lib/bpf/bpf_tracing.h
+.. _tools/testing/selftests/bpf/progs/lsm.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/lsm.c
+.. _tools/testing/selftests/bpf/prog_tests/test_lsm.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/test_lsm.c
diff --git a/Documentation/bpf/prog_sk_lookup.rst b/Documentation/bpf/prog_sk_lookup.rst
new file mode 100644
index 000000000000..85a305c19bcd
--- /dev/null
+++ b/Documentation/bpf/prog_sk_lookup.rst
@@ -0,0 +1,98 @@
+.. SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+=====================
+BPF sk_lookup program
+=====================
+
+BPF sk_lookup program type (``BPF_PROG_TYPE_SK_LOOKUP``) introduces programmability
+into the socket lookup performed by the transport layer when a packet is to be
+delivered locally.
+
+When invoked BPF sk_lookup program can select a socket that will receive the
+incoming packet by calling the ``bpf_sk_assign()`` BPF helper function.
+
+Hooks for a common attach point (``BPF_SK_LOOKUP``) exist for both TCP and UDP.
+
+Motivation
+==========
+
+BPF sk_lookup program type was introduced to address setup scenarios where
+binding sockets to an address with ``bind()`` socket call is impractical, such
+as:
+
+1. receiving connections on a range of IP addresses, e.g. 192.0.2.0/24, when
+ binding to a wildcard address ``INADRR_ANY`` is not possible due to a port
+ conflict,
+2. receiving connections on all or a wide range of ports, i.e. an L7 proxy use
+ case.
+
+Such setups would require creating and ``bind()``'ing one socket to each of the
+IP address/port in the range, leading to resource consumption and potential
+latency spikes during socket lookup.
+
+Attachment
+==========
+
+BPF sk_lookup program can be attached to a network namespace with
+``bpf(BPF_LINK_CREATE, ...)`` syscall using the ``BPF_SK_LOOKUP`` attach type and a
+netns FD as attachment ``target_fd``.
+
+Multiple programs can be attached to one network namespace. Programs will be
+invoked in the same order as they were attached.
+
+Hooks
+=====
+
+The attached BPF sk_lookup programs run whenever the transport layer needs to
+find a listening (TCP) or an unconnected (UDP) socket for an incoming packet.
+
+Incoming traffic to established (TCP) and connected (UDP) sockets is delivered
+as usual without triggering the BPF sk_lookup hook.
+
+The attached BPF programs must return with either ``SK_PASS`` or ``SK_DROP``
+verdict code. As for other BPF program types that are network filters,
+``SK_PASS`` signifies that the socket lookup should continue on to regular
+hashtable-based lookup, while ``SK_DROP`` causes the transport layer to drop the
+packet.
+
+A BPF sk_lookup program can also select a socket to receive the packet by
+calling ``bpf_sk_assign()`` BPF helper. Typically, the program looks up a socket
+in a map holding sockets, such as ``SOCKMAP`` or ``SOCKHASH``, and passes a
+``struct bpf_sock *`` to ``bpf_sk_assign()`` helper to record the
+selection. Selecting a socket only takes effect if the program has terminated
+with ``SK_PASS`` code.
+
+When multiple programs are attached, the end result is determined from return
+codes of all the programs according to the following rules:
+
+1. If any program returned ``SK_PASS`` and selected a valid socket, the socket
+ is used as the result of the socket lookup.
+2. If more than one program returned ``SK_PASS`` and selected a socket, the last
+ selection takes effect.
+3. If any program returned ``SK_DROP``, and no program returned ``SK_PASS`` and
+ selected a socket, socket lookup fails.
+4. If all programs returned ``SK_PASS`` and none of them selected a socket,
+ socket lookup continues on.
+
+API
+===
+
+In its context, an instance of ``struct bpf_sk_lookup``, BPF sk_lookup program
+receives information about the packet that triggered the socket lookup. Namely:
+
+* IP version (``AF_INET`` or ``AF_INET6``),
+* L4 protocol identifier (``IPPROTO_TCP`` or ``IPPROTO_UDP``),
+* source and destination IP address,
+* source and destination L4 port,
+* the socket that has been selected with ``bpf_sk_assign()``.
+
+Refer to ``struct bpf_sk_lookup`` declaration in ``linux/bpf.h`` user API
+header, and `bpf-helpers(7)
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man-page section
+for ``bpf_sk_assign()`` for details.
+
+Example
+=======
+
+See ``tools/testing/selftests/bpf/prog_tests/sk_lookup.c`` for the reference
+implementation.
diff --git a/Documentation/bpf/programs.rst b/Documentation/bpf/programs.rst
new file mode 100644
index 000000000000..c99000ab6d9b
--- /dev/null
+++ b/Documentation/bpf/programs.rst
@@ -0,0 +1,12 @@
+=============
+Program Types
+=============
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ prog_*
+
+For a list of all program types, see :ref:`program_types_and_elf` in
+the :ref:`libbpf` documentation.
diff --git a/Documentation/bpf/redirect.rst b/Documentation/bpf/redirect.rst
new file mode 100644
index 000000000000..2fa2b0b05004
--- /dev/null
+++ b/Documentation/bpf/redirect.rst
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========
+Redirect
+========
+XDP_REDIRECT
+############
+Supported maps
+--------------
+
+XDP_REDIRECT works with the following map types:
+
+- ``BPF_MAP_TYPE_DEVMAP``
+- ``BPF_MAP_TYPE_DEVMAP_HASH``
+- ``BPF_MAP_TYPE_CPUMAP``
+- ``BPF_MAP_TYPE_XSKMAP``
+
+For more information on these maps, please see the specific map documentation.
+
+Process
+-------
+
+.. kernel-doc:: net/core/filter.c
+ :doc: xdp redirect
+
+.. note::
+ Not all drivers support transmitting frames after a redirect, and for
+ those that do, not all of them support non-linear frames. Non-linear xdp
+ bufs/frames are bufs/frames that contain more than one fragment.
+
+Debugging packet drops
+----------------------
+Silent packet drops for XDP_REDIRECT can be debugged using:
+
+- bpf_trace
+- perf_record
+
+bpf_trace
+^^^^^^^^^
+The following bpftrace command can be used to capture and count all XDP tracepoints:
+
+.. code-block:: none
+
+ sudo bpftrace -e 'tracepoint:xdp:* { @cnt[probe] = count(); }'
+ Attaching 12 probes...
+ ^C
+
+ @cnt[tracepoint:xdp:mem_connect]: 18
+ @cnt[tracepoint:xdp:mem_disconnect]: 18
+ @cnt[tracepoint:xdp:xdp_exception]: 19605
+ @cnt[tracepoint:xdp:xdp_devmap_xmit]: 1393604
+ @cnt[tracepoint:xdp:xdp_redirect]: 22292200
+
+.. note::
+ The various xdp tracepoints can be found in ``source/include/trace/events/xdp.h``
+
+The following bpftrace command can be used to extract the ``ERRNO`` being returned as
+part of the err parameter:
+
+.. code-block:: none
+
+ sudo bpftrace -e \
+ 'tracepoint:xdp:xdp_redirect*_err {@redir_errno[-args->err] = count();}
+ tracepoint:xdp:xdp_devmap_xmit {@devmap_errno[-args->err] = count();}'
+
+perf record
+^^^^^^^^^^^
+The perf tool also supports recording tracepoints:
+
+.. code-block:: none
+
+ perf record -a -e xdp:xdp_redirect_err \
+ -e xdp:xdp_redirect_map_err \
+ -e xdp:xdp_exception \
+ -e xdp:xdp_devmap_xmit
+
+References
+===========
+
+- https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor
diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst
new file mode 100644
index 000000000000..a99cd05d79d4
--- /dev/null
+++ b/Documentation/bpf/ringbuf.rst
@@ -0,0 +1,206 @@
+===============
+BPF ring buffer
+===============
+
+This document describes BPF ring buffer design, API, and implementation details.
+
+.. contents::
+ :local:
+ :depth: 2
+
+Motivation
+----------
+
+There are two distinctive motivators for this work, which are not satisfied by
+existing perf buffer, which prompted creation of a new ring buffer
+implementation.
+
+- more efficient memory utilization by sharing ring buffer across CPUs;
+- preserving ordering of events that happen sequentially in time, even across
+ multiple CPUs (e.g., fork/exec/exit events for a task).
+
+These two problems are independent, but perf buffer fails to satisfy both.
+Both are a result of a choice to have per-CPU perf ring buffer. Both can be
+also solved by having an MPSC implementation of ring buffer. The ordering
+problem could technically be solved for perf buffer with some in-kernel
+counting, but given the first one requires an MPSC buffer, the same solution
+would solve the second problem automatically.
+
+Semantics and APIs
+------------------
+
+Single ring buffer is presented to BPF programs as an instance of BPF map of
+type ``BPF_MAP_TYPE_RINGBUF``. Two other alternatives considered, but
+ultimately rejected.
+
+One way would be to, similar to ``BPF_MAP_TYPE_PERF_EVENT_ARRAY``, make
+``BPF_MAP_TYPE_RINGBUF`` could represent an array of ring buffers, but not
+enforce "same CPU only" rule. This would be more familiar interface compatible
+with existing perf buffer use in BPF, but would fail if application needed more
+advanced logic to lookup ring buffer by arbitrary key.
+``BPF_MAP_TYPE_HASH_OF_MAPS`` addresses this with current approach.
+Additionally, given the performance of BPF ringbuf, many use cases would just
+opt into a simple single ring buffer shared among all CPUs, for which current
+approach would be an overkill.
+
+Another approach could introduce a new concept, alongside BPF map, to represent
+generic "container" object, which doesn't necessarily have key/value interface
+with lookup/update/delete operations. This approach would add a lot of extra
+infrastructure that has to be built for observability and verifier support. It
+would also add another concept that BPF developers would have to familiarize
+themselves with, new syntax in libbpf, etc. But then would really provide no
+additional benefits over the approach of using a map. ``BPF_MAP_TYPE_RINGBUF``
+doesn't support lookup/update/delete operations, but so doesn't few other map
+types (e.g., queue and stack; array doesn't support delete, etc).
+
+The approach chosen has an advantage of re-using existing BPF map
+infrastructure (introspection APIs in kernel, libbpf support, etc), being
+familiar concept (no need to teach users a new type of object in BPF program),
+and utilizing existing tooling (bpftool). For common scenario of using a single
+ring buffer for all CPUs, it's as simple and straightforward, as would be with
+a dedicated "container" object. On the other hand, by being a map, it can be
+combined with ``ARRAY_OF_MAPS`` and ``HASH_OF_MAPS`` map-in-maps to implement
+a wide variety of topologies, from one ring buffer for each CPU (e.g., as
+a replacement for perf buffer use cases), to a complicated application
+hashing/sharding of ring buffers (e.g., having a small pool of ring buffers
+with hashed task's tgid being a look up key to preserve order, but reduce
+contention).
+
+Key and value sizes are enforced to be zero. ``max_entries`` is used to specify
+the size of ring buffer and has to be a power of 2 value.
+
+There are a bunch of similarities between perf buffer
+(``BPF_MAP_TYPE_PERF_EVENT_ARRAY``) and new BPF ring buffer semantics:
+
+- variable-length records;
+- if there is no more space left in ring buffer, reservation fails, no
+ blocking;
+- memory-mappable data area for user-space applications for ease of
+ consumption and high performance;
+- epoll notifications for new incoming data;
+- but still the ability to do busy polling for new data to achieve the
+ lowest latency, if necessary.
+
+BPF ringbuf provides two sets of APIs to BPF programs:
+
+- ``bpf_ringbuf_output()`` allows to *copy* data from one place to a ring
+ buffer, similarly to ``bpf_perf_event_output()``;
+- ``bpf_ringbuf_reserve()``/``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()``
+ APIs split the whole process into two steps. First, a fixed amount of space
+ is reserved. If successful, a pointer to a data inside ring buffer data
+ area is returned, which BPF programs can use similarly to a data inside
+ array/hash maps. Once ready, this piece of memory is either committed or
+ discarded. Discard is similar to commit, but makes consumer ignore the
+ record.
+
+``bpf_ringbuf_output()`` has disadvantage of incurring extra memory copy,
+because record has to be prepared in some other place first. But it allows to
+submit records of the length that's not known to verifier beforehand. It also
+closely matches ``bpf_perf_event_output()``, so will simplify migration
+significantly.
+
+``bpf_ringbuf_reserve()`` avoids the extra copy of memory by providing a memory
+pointer directly to ring buffer memory. In a lot of cases records are larger
+than BPF stack space allows, so many programs have use extra per-CPU array as
+a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs
+completely. But in exchange, it only allows a known constant size of memory to
+be reserved, such that verifier can verify that BPF program can't access memory
+outside its reserved record space. bpf_ringbuf_output(), while slightly slower
+due to extra memory copy, covers some use cases that are not suitable for
+``bpf_ringbuf_reserve()``.
+
+The difference between commit and discard is very small. Discard just marks
+a record as discarded, and such records are supposed to be ignored by consumer
+code. Discard is useful for some advanced use-cases, such as ensuring
+all-or-nothing multi-record submission, or emulating temporary
+``malloc()``/``free()`` within single BPF program invocation.
+
+Each reserved record is tracked by verifier through existing
+reference-tracking logic, similar to socket ref-tracking. It is thus
+impossible to reserve a record, but forget to submit (or discard) it.
+
+``bpf_ringbuf_query()`` helper allows to query various properties of ring
+buffer. Currently 4 are supported:
+
+- ``BPF_RB_AVAIL_DATA`` returns amount of unconsumed data in ring buffer;
+- ``BPF_RB_RING_SIZE`` returns the size of ring buffer;
+- ``BPF_RB_CONS_POS``/``BPF_RB_PROD_POS`` returns current logical position
+ of consumer/producer, respectively.
+
+Returned values are momentarily snapshots of ring buffer state and could be
+off by the time helper returns, so this should be used only for
+debugging/reporting reasons or for implementing various heuristics, that take
+into account highly-changeable nature of some of those characteristics.
+
+One such heuristic might involve more fine-grained control over poll/epoll
+notifications about new data availability in ring buffer. Together with
+``BPF_RB_NO_WAKEUP``/``BPF_RB_FORCE_WAKEUP`` flags for output/commit/discard
+helpers, it allows BPF program a high degree of control and, e.g., more
+efficient batched notifications. Default self-balancing strategy, though,
+should be adequate for most applications and will work reliable and efficiently
+already.
+
+Design and Implementation
+-------------------------
+
+This reserve/commit schema allows a natural way for multiple producers, either
+on different CPUs or even on the same CPU/in the same BPF program, to reserve
+independent records and work with them without blocking other producers. This
+means that if BPF program was interrupted by another BPF program sharing the
+same ring buffer, they will both get a record reserved (provided there is
+enough space left) and can work with it and submit it independently. This
+applies to NMI context as well, except that due to using a spinlock during
+reservation, in NMI context, ``bpf_ringbuf_reserve()`` might fail to get
+a lock, in which case reservation will fail even if ring buffer is not full.
+
+The ring buffer itself internally is implemented as a power-of-2 sized
+circular buffer, with two logical and ever-increasing counters (which might
+wrap around on 32-bit architectures, that's not a problem):
+
+- consumer counter shows up to which logical position consumer consumed the
+ data;
+- producer counter denotes amount of data reserved by all producers.
+
+Each time a record is reserved, producer that "owns" the record will
+successfully advance producer counter. At that point, data is still not yet
+ready to be consumed, though. Each record has 8 byte header, which contains the
+length of reserved record, as well as two extra bits: busy bit to denote that
+record is still being worked on, and discard bit, which might be set at commit
+time if record is discarded. In the latter case, consumer is supposed to skip
+the record and move on to the next one. Record header also encodes record's
+relative offset from the beginning of ring buffer data area (in pages). This
+allows ``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()`` to accept only the
+pointer to the record itself, without requiring also the pointer to ring buffer
+itself. Ring buffer memory location will be restored from record metadata
+header. This significantly simplifies verifier, as well as improving API
+usability.
+
+Producer counter increments are serialized under spinlock, so there is
+a strict ordering between reservations. Commits, on the other hand, are
+completely lockless and independent. All records become available to consumer
+in the order of reservations, but only after all previous records where
+already committed. It is thus possible for slow producers to temporarily hold
+off submitted records, that were reserved later.
+
+One interesting implementation bit, that significantly simplifies (and thus
+speeds up as well) implementation of both producers and consumers is how data
+area is mapped twice contiguously back-to-back in the virtual memory. This
+allows to not take any special measures for samples that have to wrap around
+at the end of the circular buffer data area, because the next page after the
+last data page would be first data page again, and thus the sample will still
+appear completely contiguous in virtual memory. See comment and a simple ASCII
+diagram showing this visually in ``bpf_ringbuf_area_alloc()``.
+
+Another feature that distinguishes BPF ringbuf from perf ring buffer is
+a self-pacing notifications of new data being availability.
+``bpf_ringbuf_commit()`` implementation will send a notification of new record
+being available after commit only if consumer has already caught up right up to
+the record being committed. If not, consumer still has to catch up and thus
+will see new data anyways without needing an extra poll notification.
+Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c) show that
+this allows to achieve a very high throughput without having to resort to
+tricks like "notify only every Nth sample", which are necessary with perf
+buffer. For extreme cases, when BPF program wants more manual control of
+notifications, commit/discard/output helpers accept ``BPF_RB_NO_WAKEUP`` and
+``BPF_RB_FORCE_WAKEUP`` flags, which give full control over notifications of
+data availability, but require extra caution and diligence in using this API.
diff --git a/Documentation/bpf/s390.rst b/Documentation/bpf/s390.rst
new file mode 100644
index 000000000000..21ecb309daea
--- /dev/null
+++ b/Documentation/bpf/s390.rst
@@ -0,0 +1,205 @@
+===================
+Testing BPF on s390
+===================
+
+1. Introduction
+***************
+
+IBM Z are mainframe computers, which are descendants of IBM System/360 from
+year 1964. They are supported by the Linux kernel under the name "s390". This
+document describes how to test BPF in an s390 QEMU guest.
+
+2. One-time setup
+*****************
+
+The following is required to build and run the test suite:
+
+ * s390 GCC
+ * s390 development headers and libraries
+ * Clang with BPF support
+ * QEMU with s390 support
+ * Disk image with s390 rootfs
+
+Debian supports installing compiler and libraries for s390 out of the box.
+Users of other distros may use debootstrap in order to set up a Debian chroot::
+
+ sudo debootstrap \
+ --variant=minbase \
+ --include=sudo \
+ testing \
+ ./s390-toolchain
+ sudo mount --rbind /dev ./s390-toolchain/dev
+ sudo mount --rbind /proc ./s390-toolchain/proc
+ sudo mount --rbind /sys ./s390-toolchain/sys
+ sudo chroot ./s390-toolchain
+
+Once on Debian, the build prerequisites can be installed as follows::
+
+ sudo dpkg --add-architecture s390x
+ sudo apt-get update
+ sudo apt-get install \
+ bc \
+ bison \
+ cmake \
+ debootstrap \
+ dwarves \
+ flex \
+ g++ \
+ gcc \
+ g++-s390x-linux-gnu \
+ gcc-s390x-linux-gnu \
+ gdb-multiarch \
+ git \
+ make \
+ python3 \
+ qemu-system-misc \
+ qemu-utils \
+ rsync \
+ libcap-dev:s390x \
+ libelf-dev:s390x \
+ libncurses-dev
+
+Latest Clang targeting BPF can be installed as follows::
+
+ git clone https://github.com/llvm/llvm-project.git
+ ln -s ../../clang llvm-project/llvm/tools/
+ mkdir llvm-project-build
+ cd llvm-project-build
+ cmake \
+ -DLLVM_TARGETS_TO_BUILD=BPF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
+ ../llvm-project/llvm
+ make
+ sudo make install
+ export PATH=/opt/clang-bpf/bin:$PATH
+
+The disk image can be prepared using a loopback mount and debootstrap::
+
+ qemu-img create -f raw ./s390.img 1G
+ sudo losetup -f ./s390.img
+ sudo mkfs.ext4 /dev/loopX
+ mkdir ./s390.rootfs
+ sudo mount /dev/loopX ./s390.rootfs
+ sudo debootstrap \
+ --foreign \
+ --arch=s390x \
+ --variant=minbase \
+ --include=" \
+ iproute2, \
+ iputils-ping, \
+ isc-dhcp-client, \
+ kmod, \
+ libcap2, \
+ libelf1, \
+ netcat, \
+ procps" \
+ testing \
+ ./s390.rootfs
+ sudo umount ./s390.rootfs
+ sudo losetup -d /dev/loopX
+
+3. Compilation
+**************
+
+In addition to the usual Kconfig options required to run the BPF test suite, it
+is also helpful to select::
+
+ CONFIG_NET_9P=y
+ CONFIG_9P_FS=y
+ CONFIG_NET_9P_VIRTIO=y
+ CONFIG_VIRTIO_PCI=y
+
+as that would enable a very easy way to share files with the s390 virtual
+machine.
+
+Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
+simplify debugging, can be done using the following commands::
+
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
+ -C tools/testing/selftests \
+ TARGETS=bpf \
+ INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
+ install
+
+4. Running the test suite
+*************************
+
+The virtual machine can be started as follows::
+
+ qemu-system-s390x \
+ -cpu max,zpci=on \
+ -smp 2 \
+ -m 4G \
+ -kernel linux/arch/s390/boot/compressed/vmlinux \
+ -drive file=./s390.img,if=virtio,format=raw \
+ -nographic \
+ -append 'root=/dev/vda rw console=ttyS1' \
+ -virtfs local,path=./linux,security_model=none,mount_tag=linux \
+ -object rng-random,filename=/dev/urandom,id=rng0 \
+ -device virtio-rng-ccw,rng=rng0 \
+ -netdev user,id=net0 \
+ -device virtio-net-ccw,netdev=net0
+
+When using this on a real IBM Z, ``-enable-kvm`` may be added for better
+performance. When starting the virtual machine for the first time, disk image
+setup must be finalized using the following command::
+
+ /debootstrap/debootstrap --second-stage
+
+Directory with the code built on the host as well as ``/proc`` and ``/sys``
+need to be mounted as follows::
+
+ mkdir -p /linux
+ mount -t 9p linux /linux
+ mount -t proc proc /proc
+ mount -t sysfs sys /sys
+
+After that, the test suite can be run using the following commands::
+
+ cd /linux/tools/testing/selftests/kselftest_install
+ ./run_kselftest.sh
+
+As usual, tests can be also run individually::
+
+ cd /linux/tools/testing/selftests/bpf
+ ./test_verifier
+
+5. Debugging
+************
+
+It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
+by passing ``-s`` to QEMU.
+
+It is preferable to turn KASLR off, so that gdb would know where to find the
+kernel image in memory, by building the kernel with::
+
+ RANDOMIZE_BASE=n
+
+GDB can then be attached using the following command::
+
+ gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
+
+6. Network
+**********
+
+In case one needs to use the network in the virtual machine in order to e.g.
+install additional packages, it can be configured using::
+
+ dhclient eth0
+
+7. Links
+********
+
+This document is a compilation of techniques, whose more comprehensive
+descriptions can be found by following these links:
+
+- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
+- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
+- `Building LLVM <https://llvm.org/docs/CMake.html>`_
+- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
+- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
+- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
+- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
diff --git a/Documentation/bpf/standardization/abi.rst b/Documentation/bpf/standardization/abi.rst
new file mode 100644
index 000000000000..41514137cb7b
--- /dev/null
+++ b/Documentation/bpf/standardization/abi.rst
@@ -0,0 +1,28 @@
+.. contents::
+.. sectnum::
+
+===================================================
+BPF ABI Recommended Conventions and Guidelines v1.0
+===================================================
+
+This is version 1.0 of an informational document containing recommended
+conventions and guidelines for producing portable BPF program binaries.
+
+Registers and calling convention
+================================
+
+BPF has 10 general purpose registers and a read-only frame pointer register,
+all of which are 64-bits wide.
+
+The BPF calling convention is defined as:
+
+* R0: return value from function calls, and exit value for BPF programs
+* R1 - R5: arguments for function calls
+* R6 - R9: callee saved registers that function calls will preserve
+* R10: read-only frame pointer to access stack
+
+R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
+necessary across calls.
+
+The BPF program needs to store the return value into register R0 before doing an
+``EXIT``.
diff --git a/Documentation/bpf/standardization/index.rst b/Documentation/bpf/standardization/index.rst
new file mode 100644
index 000000000000..a50c3baf6345
--- /dev/null
+++ b/Documentation/bpf/standardization/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+===================
+BPF Standardization
+===================
+
+This directory contains documents that are being iterated on as part of the BPF
+standardization effort with the IETF. See the `IETF BPF Working Group`_ page
+for the working group charter, documents, and more.
+
+.. toctree::
+ :maxdepth: 1
+
+ instruction-set
+ abi
+
+.. Links:
+.. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
new file mode 100644
index 000000000000..39c74611752b
--- /dev/null
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -0,0 +1,790 @@
+.. contents::
+.. sectnum::
+
+======================================
+BPF Instruction Set Architecture (ISA)
+======================================
+
+eBPF, also commonly
+referred to as BPF, is a technology with origins in the Linux kernel
+that can run untrusted programs in a privileged context such as an
+operating system kernel. This document specifies the BPF instruction
+set architecture (ISA).
+
+As a historical note, BPF originally stood for Berkeley Packet Filter,
+but now that it can do so much more than packet filtering, the acronym
+no longer makes sense. BPF is now considered a standalone term that
+does not stand for anything. The original BPF is sometimes referred to
+as cBPF (classic BPF) to distinguish it from the now widely deployed
+eBPF (extended BPF).
+
+Documentation conventions
+=========================
+
+The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
+"OPTIONAL" in this document are to be interpreted as described in
+BCP 14 `<https://www.rfc-editor.org/info/rfc2119>`_
+`<https://www.rfc-editor.org/info/rfc8174>`_
+when, and only when, they appear in all capitals, as shown here.
+
+For brevity and consistency, this document refers to families
+of types using a shorthand syntax and refers to several expository,
+mnemonic functions when describing the semantics of instructions.
+The range of valid values for those types and the semantics of those
+functions are defined in the following subsections.
+
+Types
+-----
+This document refers to integer types with the notation `SN` to specify
+a type's signedness (`S`) and bit width (`N`), respectively.
+
+.. table:: Meaning of signedness notation
+
+ ==== =========
+ S Meaning
+ ==== =========
+ u unsigned
+ s signed
+ ==== =========
+
+.. table:: Meaning of bit-width notation
+
+ ===== =========
+ N Bit width
+ ===== =========
+ 8 8 bits
+ 16 16 bits
+ 32 32 bits
+ 64 64 bits
+ 128 128 bits
+ ===== =========
+
+For example, `u32` is a type whose valid values are all the 32-bit unsigned
+numbers and `s16` is a type whose valid values are all the 16-bit signed
+numbers.
+
+Functions
+---------
+
+The following byteswap functions are direction-agnostic. That is,
+the same function is used for conversion in either direction discussed
+below.
+
+* be16: Takes an unsigned 16-bit number and converts it between
+ host byte order and big-endian
+ (`IEN137 <https://www.rfc-editor.org/ien/ien137.txt>`_) byte order.
+* be32: Takes an unsigned 32-bit number and converts it between
+ host byte order and big-endian byte order.
+* be64: Takes an unsigned 64-bit number and converts it between
+ host byte order and big-endian byte order.
+* bswap16: Takes an unsigned 16-bit number in either big- or little-endian
+ format and returns the equivalent number with the same bit width but
+ opposite endianness.
+* bswap32: Takes an unsigned 32-bit number in either big- or little-endian
+ format and returns the equivalent number with the same bit width but
+ opposite endianness.
+* bswap64: Takes an unsigned 64-bit number in either big- or little-endian
+ format and returns the equivalent number with the same bit width but
+ opposite endianness.
+* le16: Takes an unsigned 16-bit number and converts it between
+ host byte order and little-endian byte order.
+* le32: Takes an unsigned 32-bit number and converts it between
+ host byte order and little-endian byte order.
+* le64: Takes an unsigned 64-bit number and converts it between
+ host byte order and little-endian byte order.
+
+Definitions
+-----------
+
+.. glossary::
+
+ Sign Extend
+ To `sign extend an` ``X`` `-bit number, A, to a` ``Y`` `-bit number, B ,` means to
+
+ #. Copy all ``X`` bits from `A` to the lower ``X`` bits of `B`.
+ #. Set the value of the remaining ``Y`` - ``X`` bits of `B` to the value of
+ the most-significant bit of `A`.
+
+.. admonition:: Example
+
+ Sign extend an 8-bit number ``A`` to a 16-bit number ``B`` on a big-endian platform:
+ ::
+
+ A: 10000110
+ B: 11111111 10000110
+
+Conformance groups
+------------------
+
+An implementation does not need to support all instructions specified in this
+document (e.g., deprecated instructions). Instead, a number of conformance
+groups are specified. An implementation MUST support the base32 conformance
+group and MAY support additional conformance groups, where supporting a
+conformance group means it MUST support all instructions in that conformance
+group.
+
+The use of named conformance groups enables interoperability between a runtime
+that executes instructions, and tools such as compilers that generate
+instructions for the runtime. Thus, capability discovery in terms of
+conformance groups might be done manually by users or automatically by tools.
+
+Each conformance group has a short ASCII label (e.g., "base32") that
+corresponds to a set of instructions that are mandatory. That is, each
+instruction has one or more conformance groups of which it is a member.
+
+This document defines the following conformance groups:
+
+* base32: includes all instructions defined in this
+ specification unless otherwise noted.
+* base64: includes base32, plus instructions explicitly noted
+ as being in the base64 conformance group.
+* atomic32: includes 32-bit atomic operation instructions (see `Atomic operations`_).
+* atomic64: includes atomic32, plus 64-bit atomic operation instructions.
+* divmul32: includes 32-bit division, multiplication, and modulo instructions.
+* divmul64: includes divmul32, plus 64-bit division, multiplication,
+ and modulo instructions.
+* packet: deprecated packet access instructions.
+
+Instruction encoding
+====================
+
+BPF has two instruction encodings:
+
+* the basic instruction encoding, which uses 64 bits to encode an instruction
+* the wide instruction encoding, which appends a second 64 bits
+ after the basic instruction for a total of 128 bits.
+
+Basic instruction encoding
+--------------------------
+
+A basic instruction is encoded as follows::
+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+**opcode**
+ operation to perform, encoded as follows::
+
+ +-+-+-+-+-+-+-+-+
+ |specific |class|
+ +-+-+-+-+-+-+-+-+
+
+ **specific**
+ The format of these bits varies by instruction class
+
+ **class**
+ The instruction class (see `Instruction classes`_)
+
+**regs**
+ The source and destination register numbers, encoded as follows
+ on a little-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |src_reg|dst_reg|
+ +-+-+-+-+-+-+-+-+
+
+ and as follows on a big-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |dst_reg|src_reg|
+ +-+-+-+-+-+-+-+-+
+
+ **src_reg**
+ the source register number (0-10), except where otherwise specified
+ (`64-bit immediate instructions`_ reuse this field for other purposes)
+
+ **dst_reg**
+ destination register number (0-10), unless otherwise specified
+ (future instructions might reuse this field for other purposes)
+
+**offset**
+ signed integer offset used with pointer arithmetic, except where
+ otherwise specified (some arithmetic instructions reuse this field
+ for other purposes)
+
+**imm**
+ signed integer immediate value
+
+Note that the contents of multi-byte fields ('offset' and 'imm') are
+stored using big-endian byte ordering on big-endian hosts and
+little-endian byte ordering on little-endian hosts.
+
+For example::
+
+ opcode offset imm assembly
+ src_reg dst_reg
+ 07 0 1 00 00 44 33 22 11 r1 += 0x11223344 // little
+ dst_reg src_reg
+ 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big
+
+Note that most instructions do not use all of the fields.
+Unused fields SHALL be cleared to zero.
+
+Wide instruction encoding
+--------------------------
+
+Some instructions are defined to use the wide instruction encoding,
+which uses two 32-bit immediate values. The 64 bits following
+the basic instruction format contain a pseudo instruction
+with 'opcode', 'dst_reg', 'src_reg', and 'offset' all set to zero.
+
+This is depicted in the following figure::
+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | next_imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+**opcode**
+ operation to perform, encoded as explained above
+
+**regs**
+ The source and destination register numbers (unless otherwise
+ specified), encoded as explained above
+
+**offset**
+ signed integer offset used with pointer arithmetic, unless
+ otherwise specified
+
+**imm**
+ signed integer immediate value
+
+**reserved**
+ unused, set to zero
+
+**next_imm**
+ second signed integer immediate value
+
+Instruction classes
+-------------------
+
+The three least significant bits of the 'opcode' field store the instruction class:
+
+.. table:: Instruction class
+
+ ===== ===== =============================== ===================================
+ class value description reference
+ ===== ===== =============================== ===================================
+ LD 0x0 non-standard load operations `Load and store instructions`_
+ LDX 0x1 load into register operations `Load and store instructions`_
+ ST 0x2 store from immediate operations `Load and store instructions`_
+ STX 0x3 store from register operations `Load and store instructions`_
+ ALU 0x4 32-bit arithmetic operations `Arithmetic and jump instructions`_
+ JMP 0x5 64-bit jump operations `Arithmetic and jump instructions`_
+ JMP32 0x6 32-bit jump operations `Arithmetic and jump instructions`_
+ ALU64 0x7 64-bit arithmetic operations `Arithmetic and jump instructions`_
+ ===== ===== =============================== ===================================
+
+Arithmetic and jump instructions
+================================
+
+For arithmetic and jump instructions (``ALU``, ``ALU64``, ``JMP`` and
+``JMP32``), the 8-bit 'opcode' field is divided into three parts::
+
+ +-+-+-+-+-+-+-+-+
+ | code |s|class|
+ +-+-+-+-+-+-+-+-+
+
+**code**
+ the operation code, whose meaning varies by instruction class
+
+**s (source)**
+ the source operand location, which unless otherwise specified is one of:
+
+ .. table:: Source operand location
+
+ ====== ===== ==============================================
+ source value description
+ ====== ===== ==============================================
+ K 0 use 32-bit 'imm' value as source operand
+ X 1 use 'src_reg' register value as source operand
+ ====== ===== ==============================================
+
+**instruction class**
+ the instruction class (see `Instruction classes`_)
+
+Arithmetic instructions
+-----------------------
+
+``ALU`` uses 32-bit wide operands while ``ALU64`` uses 64-bit wide operands for
+otherwise identical operations. ``ALU64`` instructions belong to the
+base64 conformance group unless noted otherwise.
+The 'code' field encodes the operation as below, where 'src' refers to the
+the source operand and 'dst' refers to the value of the destination
+register.
+
+.. table:: Arithmetic instructions
+
+ ===== ===== ======= ===================================================================================
+ name code offset description
+ ===== ===== ======= ===================================================================================
+ ADD 0x0 0 dst += src
+ SUB 0x1 0 dst -= src
+ MUL 0x2 0 dst \*= src
+ DIV 0x3 0 dst = (src != 0) ? (dst / src) : 0
+ SDIV 0x3 1 dst = (src == 0) ? 0 : ((src == -1 && dst == LLONG_MIN) ? LLONG_MIN : (dst s/ src))
+ OR 0x4 0 dst \|= src
+ AND 0x5 0 dst &= src
+ LSH 0x6 0 dst <<= (src & mask)
+ RSH 0x7 0 dst >>= (src & mask)
+ NEG 0x8 0 dst = -dst
+ MOD 0x9 0 dst = (src != 0) ? (dst % src) : dst
+ SMOD 0x9 1 dst = (src == 0) ? dst : ((src == -1 && dst == LLONG_MIN) ? 0: (dst s% src))
+ XOR 0xa 0 dst ^= src
+ MOV 0xb 0 dst = src
+ MOVSX 0xb 8/16/32 dst = (s8,s16,s32)src
+ ARSH 0xc 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
+ END 0xd 0 byte swap operations (see `Byte swap instructions`_ below)
+ ===== ===== ======= ===================================================================================
+
+Underflow and overflow are allowed during arithmetic operations, meaning
+the 64-bit or 32-bit value will wrap. If BPF program execution would
+result in division by zero, the destination register is instead set to zero.
+Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
+divided by -1, the destination register is instead set to ``LLONG_MIN``. For
+``ALU``, if execution would result in ``INT_MIN`` divided by -1, the
+destination register is instead set to ``INT_MIN``.
+
+If execution would result in modulo by zero, for ``ALU64`` the value of
+the destination register is unchanged whereas for ``ALU`` the upper
+32 bits of the destination register are zeroed. Otherwise, for ``ALU64``,
+if execution would resuslt in ``LLONG_MIN`` modulo -1, the destination
+register is instead set to 0. For ``ALU``, if execution would result in
+``INT_MIN`` modulo -1, the destination register is instead set to 0.
+
+``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means::
+
+ dst = (u32) ((u32) dst + (u32) src)
+
+where '(u32)' indicates that the upper 32 bits are zeroed.
+
+``{ADD, X, ALU64}`` means::
+
+ dst = dst + src
+
+``{XOR, K, ALU}`` means::
+
+ dst = (u32) dst ^ (u32) imm
+
+``{XOR, K, ALU64}`` means::
+
+ dst = dst ^ imm
+
+Note that most arithmetic instructions have 'offset' set to 0. Only three instructions
+(``SDIV``, ``SMOD``, ``MOVSX``) have a non-zero 'offset'.
+
+Division, multiplication, and modulo operations for ``ALU`` are part
+of the "divmul32" conformance group, and division, multiplication, and
+modulo operations for ``ALU64`` are part of the "divmul64" conformance
+group.
+The division and modulo operations support both unsigned and signed flavors.
+
+For unsigned operations (``DIV`` and ``MOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit unsigned value. For ``ALU64``,
+'imm' is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
+interpreted as a 64-bit unsigned value.
+
+For signed operations (``SDIV`` and ``SMOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit signed value. For ``ALU64``, 'imm'
+is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
+interpreted as a 64-bit signed value.
+
+Note that there are varying definitions of the signed modulo operation
+when the dividend or divisor are negative, where implementations often
+vary by language such that Python, Ruby, etc. differ from C, Go, Java,
+etc. This specification requires that signed modulo MUST use truncated division
+(where -13 % 3 == -1) as implemented in C, Go, etc.::
+
+ a % n = a - n * trunc(a / n)
+
+The ``MOVSX`` instruction does a move operation with sign extension.
+``{MOVSX, X, ALU}`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into
+32-bit operands, and zeroes the remaining upper 32 bits.
+``{MOVSX, X, ALU64}`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
+operands into 64-bit operands. Unlike other arithmetic instructions,
+``MOVSX`` is only defined for register source operands (``X``).
+
+``{MOV, K, ALU64}`` means::
+
+ dst = (s64)imm
+
+``{MOV, X, ALU}`` means::
+
+ dst = (u32)src
+
+``{MOVSX, X, ALU}`` with 'offset' 8 means::
+
+ dst = (u32)(s32)(s8)src
+
+
+The ``NEG`` instruction is only defined when the source bit is clear
+(``K``).
+
+Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
+for 32-bit operations.
+
+Byte swap instructions
+----------------------
+
+The byte swap instructions use instruction classes of ``ALU`` and ``ALU64``
+and a 4-bit 'code' field of ``END``.
+
+The byte swap instructions operate on the destination register
+only and do not use a separate source register or immediate value.
+
+For ``ALU``, the 1-bit source operand field in the opcode is used to
+select what byte order the operation converts from or to. For
+``ALU64``, the 1-bit source operand field in the opcode is reserved
+and MUST be set to 0.
+
+.. table:: Byte swap instructions
+
+ ===== ======== ===== =================================================
+ class source value description
+ ===== ======== ===== =================================================
+ ALU LE 0 convert between host byte order and little endian
+ ALU BE 1 convert between host byte order and big endian
+ ALU64 Reserved 0 do byte swap unconditionally
+ ===== ======== ===== =================================================
+
+The 'imm' field encodes the width of the swap operations. The following widths
+are supported: 16, 32 and 64. Width 64 operations belong to the base64
+conformance group and other swap operations belong to the base32
+conformance group.
+
+Examples:
+
+``{END, LE, ALU}`` with 'imm' = 16/32/64 means::
+
+ dst = le16(dst)
+ dst = le32(dst)
+ dst = le64(dst)
+
+``{END, BE, ALU}`` with 'imm' = 16/32/64 means::
+
+ dst = be16(dst)
+ dst = be32(dst)
+ dst = be64(dst)
+
+``{END, TO, ALU64}`` with 'imm' = 16/32/64 means::
+
+ dst = bswap16(dst)
+ dst = bswap32(dst)
+ dst = bswap64(dst)
+
+Jump instructions
+-----------------
+
+``JMP32`` uses 32-bit wide operands and indicates the base32
+conformance group, while ``JMP`` uses 64-bit wide operands for
+otherwise identical operations, and indicates the base64 conformance
+group unless otherwise specified.
+The 'code' field encodes the operation as below:
+
+.. table:: Jump instructions
+
+ ======== ===== ======= ================================= ===================================================
+ code value src_reg description notes
+ ======== ===== ======= ================================= ===================================================
+ JA 0x0 0x0 PC += offset {JA, K, JMP} only
+ JA 0x0 0x0 PC += imm {JA, K, JMP32} only
+ JEQ 0x1 any PC += offset if dst == src
+ JGT 0x2 any PC += offset if dst > src unsigned
+ JGE 0x3 any PC += offset if dst >= src unsigned
+ JSET 0x4 any PC += offset if dst & src
+ JNE 0x5 any PC += offset if dst != src
+ JSGT 0x6 any PC += offset if dst > src signed
+ JSGE 0x7 any PC += offset if dst >= src signed
+ CALL 0x8 0x0 call helper function by static ID {CALL, K, JMP} only, see `Helper functions`_
+ CALL 0x8 0x1 call PC += imm {CALL, K, JMP} only, see `Program-local functions`_
+ CALL 0x8 0x2 call helper function by BTF ID {CALL, K, JMP} only, see `Helper functions`_
+ EXIT 0x9 0x0 return {CALL, K, JMP} only
+ JLT 0xa any PC += offset if dst < src unsigned
+ JLE 0xb any PC += offset if dst <= src unsigned
+ JSLT 0xc any PC += offset if dst < src signed
+ JSLE 0xd any PC += offset if dst <= src signed
+ ======== ===== ======= ================================= ===================================================
+
+where 'PC' denotes the program counter, and the offset to increment by
+is in units of 64-bit instructions relative to the instruction following
+the jump instruction. Thus 'PC += 1' skips execution of the next
+instruction if it's a basic instruction or results in undefined behavior
+if the next instruction is a 128-bit wide instruction.
+
+Example:
+
+``{JSGE, X, JMP32}`` means::
+
+ if (s32)dst s>= (s32)src goto +offset
+
+where 's>=' indicates a signed '>=' comparison.
+
+``{JLE, K, JMP}`` means::
+
+ if dst <= (u64)(s64)imm goto +offset
+
+``{JA, K, JMP32}`` means::
+
+ gotol +imm
+
+where 'imm' means the branch offset comes from the 'imm' field.
+
+Note that there are two flavors of ``JA`` instructions. The
+``JMP`` class permits a 16-bit jump offset specified by the 'offset'
+field, whereas the ``JMP32`` class permits a 32-bit jump offset
+specified by the 'imm' field. A > 16-bit conditional jump may be
+converted to a < 16-bit conditional jump plus a 32-bit unconditional
+jump.
+
+All ``CALL`` and ``JA`` instructions belong to the
+base32 conformance group.
+
+Helper functions
+~~~~~~~~~~~~~~~~
+
+Helper functions are a concept whereby BPF programs can call into a
+set of function calls exposed by the underlying platform.
+
+Historically, each helper function was identified by a static ID
+encoded in the 'imm' field. Further documentation of helper functions
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
+
+Platforms that support the BPF Type Format (BTF) support identifying
+a helper function by a BTF ID encoded in the 'imm' field, where the BTF ID
+identifies the helper name and type. Further documentation of BTF
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
+
+Program-local functions
+~~~~~~~~~~~~~~~~~~~~~~~
+Program-local functions are functions exposed by the same BPF program as the
+caller, and are referenced by offset from the instruction following the call
+instruction, similar to ``JA``. The offset is encoded in the 'imm' field of
+the call instruction. An ``EXIT`` within the program-local function will
+return to the caller.
+
+Load and store instructions
+===========================
+
+For load and store instructions (``LD``, ``LDX``, ``ST``, and ``STX``), the
+8-bit 'opcode' field is divided as follows::
+
+ +-+-+-+-+-+-+-+-+
+ |mode |sz |class|
+ +-+-+-+-+-+-+-+-+
+
+**mode**
+ The mode modifier is one of:
+
+ .. table:: Mode modifier
+
+ ============= ===== ==================================== =============
+ mode modifier value description reference
+ ============= ===== ==================================== =============
+ IMM 0 64-bit immediate instructions `64-bit immediate instructions`_
+ ABS 1 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
+ IND 2 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
+ MEM 3 regular load and store operations `Regular load and store operations`_
+ MEMSX 4 sign-extension load operations `Sign-extension load operations`_
+ ATOMIC 6 atomic operations `Atomic operations`_
+ ============= ===== ==================================== =============
+
+**sz (size)**
+ The size modifier is one of:
+
+ .. table:: Size modifier
+
+ ==== ===== =====================
+ size value description
+ ==== ===== =====================
+ W 0 word (4 bytes)
+ H 1 half word (2 bytes)
+ B 2 byte
+ DW 3 double word (8 bytes)
+ ==== ===== =====================
+
+ Instructions using ``DW`` belong to the base64 conformance group.
+
+**class**
+ The instruction class (see `Instruction classes`_)
+
+Regular load and store operations
+---------------------------------
+
+The ``MEM`` mode modifier is used to encode regular load and store
+instructions that transfer data between a register and memory.
+
+``{MEM, <size>, STX}`` means::
+
+ *(size *) (dst + offset) = src
+
+``{MEM, <size>, ST}`` means::
+
+ *(size *) (dst + offset) = imm
+
+``{MEM, <size>, LDX}`` means::
+
+ dst = *(unsigned size *) (src + offset)
+
+Where '<size>' is one of: ``B``, ``H``, ``W``, or ``DW``, and
+'unsigned size' is one of: u8, u16, u32, or u64.
+
+Sign-extension load operations
+------------------------------
+
+The ``MEMSX`` mode modifier is used to encode :term:`sign-extension<Sign Extend>` load
+instructions that transfer data between a register and memory.
+
+``{MEMSX, <size>, LDX}`` means::
+
+ dst = *(signed size *) (src + offset)
+
+Where '<size>' is one of: ``B``, ``H``, or ``W``, and
+'signed size' is one of: s8, s16, or s32.
+
+Atomic operations
+-----------------
+
+Atomic operations are operations that operate on memory and can not be
+interrupted or corrupted by other access to the same memory region
+by other BPF programs or means outside of this specification.
+
+All atomic operations supported by BPF are encoded as store operations
+that use the ``ATOMIC`` mode modifier as follows:
+
+* ``{ATOMIC, W, STX}`` for 32-bit operations, which are
+ part of the "atomic32" conformance group.
+* ``{ATOMIC, DW, STX}`` for 64-bit operations, which are
+ part of the "atomic64" conformance group.
+* 8-bit and 16-bit wide atomic operations are not supported.
+
+The 'imm' field is used to encode the actual atomic operation.
+Simple atomic operation use a subset of the values defined to encode
+arithmetic operations in the 'imm' field to encode the atomic operation:
+
+.. table:: Simple atomic operations
+
+ ======== ===== ===========
+ imm value description
+ ======== ===== ===========
+ ADD 0x00 atomic add
+ OR 0x40 atomic or
+ AND 0x50 atomic and
+ XOR 0xa0 atomic xor
+ ======== ===== ===========
+
+
+``{ATOMIC, W, STX}`` with 'imm' = ADD means::
+
+ *(u32 *)(dst + offset) += src
+
+``{ATOMIC, DW, STX}`` with 'imm' = ADD means::
+
+ *(u64 *)(dst + offset) += src
+
+In addition to the simple atomic operations, there also is a modifier and
+two complex atomic operations:
+
+.. table:: Complex atomic operations
+
+ =========== ================ ===========================
+ imm value description
+ =========== ================ ===========================
+ FETCH 0x01 modifier: return old value
+ XCHG 0xe0 | FETCH atomic exchange
+ CMPXCHG 0xf0 | FETCH atomic compare and exchange
+ =========== ================ ===========================
+
+The ``FETCH`` modifier is optional for simple atomic operations, and
+always set for the complex atomic operations. If the ``FETCH`` flag
+is set, then the operation also overwrites ``src`` with the value that
+was in memory before it was modified.
+
+The ``XCHG`` operation atomically exchanges ``src`` with the value
+addressed by ``dst + offset``.
+
+The ``CMPXCHG`` operation atomically compares the value addressed by
+``dst + offset`` with ``R0``. If they match, the value addressed by
+``dst + offset`` is replaced with ``src``. In either case, the
+value that was at ``dst + offset`` before the operation is zero-extended
+and loaded back to ``R0``.
+
+64-bit immediate instructions
+-----------------------------
+
+Instructions with the ``IMM`` 'mode' modifier use the wide instruction
+encoding defined in `Instruction encoding`_, and use the 'src_reg' field of the
+basic instruction to hold an opcode subtype.
+
+The following table defines a set of ``{IMM, DW, LD}`` instructions
+with opcode subtypes in the 'src_reg' field, using new terms such as "map"
+defined further below:
+
+.. table:: 64-bit immediate instructions
+
+ ======= ========================================= =========== ==============
+ src_reg pseudocode imm type dst type
+ ======= ========================================= =========== ==============
+ 0x0 dst = (next_imm << 32) | imm integer integer
+ 0x1 dst = map_by_fd(imm) map fd map
+ 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data address
+ 0x3 dst = var_addr(imm) variable id data address
+ 0x4 dst = code_addr(imm) integer code address
+ 0x5 dst = map_by_idx(imm) map index map
+ 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data address
+ ======= ========================================= =========== ==============
+
+where
+
+* map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map (see `Maps`_)
+* map_by_idx(imm) means to convert a 32-bit index into an address of a map
+* map_val(map) gets the address of the first value in a given map
+* var_addr(imm) gets the address of a platform variable (see `Platform Variables`_) with a given id
+* code_addr(imm) gets the address of the instruction at a specified relative offset in number of (64-bit) instructions
+* the 'imm type' can be used by disassemblers for display
+* the 'dst type' can be used for verification and JIT compilation purposes
+
+Maps
+~~~~
+
+Maps are shared memory regions accessible by BPF programs on some platforms.
+A map can have various semantics as defined in a separate document, and may or
+may not have a single contiguous memory region, but the 'map_val(map)' is
+currently only defined for maps that do have a single contiguous memory region.
+
+Each map can have a file descriptor (fd) if supported by the platform, where
+'map_by_fd(imm)' means to get the map with the specified file descriptor. Each
+BPF program can also be defined to use a set of maps associated with the
+program at load time, and 'map_by_idx(imm)' means to get the map with the given
+index in the set associated with the BPF program containing the instruction.
+
+Platform Variables
+~~~~~~~~~~~~~~~~~~
+
+Platform variables are memory regions, identified by integer ids, exposed by
+the runtime and accessible by BPF programs on some platforms. The
+'var_addr(imm)' operation means to get the address of the memory region
+identified by the given id.
+
+Legacy BPF Packet access instructions
+-------------------------------------
+
+BPF previously introduced special instructions for access to packet data that were
+carried over from classic BPF. These instructions used an instruction
+class of ``LD``, a size modifier of ``W``, ``H``, or ``B``, and a
+mode modifier of ``ABS`` or ``IND``. The 'dst_reg' and 'offset' fields were
+set to zero, and 'src_reg' was set to zero for ``ABS``. However, these
+instructions are deprecated and SHOULD no longer be used. All legacy packet
+access instructions belong to the "packet" conformance group.
diff --git a/Documentation/bpf/syscall_api.rst b/Documentation/bpf/syscall_api.rst
new file mode 100644
index 000000000000..f0a1dff087ad
--- /dev/null
+++ b/Documentation/bpf/syscall_api.rst
@@ -0,0 +1,11 @@
+===========
+Syscall API
+===========
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_. For more information about the userspace API, see
+Documentation/userspace-api/ebpf/index.rst.
+
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html \ No newline at end of file
diff --git a/Documentation/bpf/test_debug.rst b/Documentation/bpf/test_debug.rst
new file mode 100644
index 000000000000..ebf0caceb6a6
--- /dev/null
+++ b/Documentation/bpf/test_debug.rst
@@ -0,0 +1,9 @@
+=========================
+Testing and debugging BPF
+=========================
+
+.. toctree::
+ :maxdepth: 1
+
+ drgn
+ s390
diff --git a/Documentation/bpf/verifier.rst b/Documentation/bpf/verifier.rst
new file mode 100644
index 000000000000..510d15bc697b
--- /dev/null
+++ b/Documentation/bpf/verifier.rst
@@ -0,0 +1,560 @@
+
+=============
+eBPF verifier
+=============
+
+The safety of the eBPF program is determined in two steps.
+
+First step does DAG check to disallow loops and other CFG validation.
+In particular it will detect programs that have unreachable instructions.
+(though classic BPF checker allows them)
+
+Second step starts from the first insn and descends all possible paths.
+It simulates execution of every insn and observes the state change of
+registers and stack.
+
+At the start of the program the register R1 contains a pointer to context
+and has type PTR_TO_CTX.
+If verifier sees an insn that does R2=R1, then R2 has now type
+PTR_TO_CTX as well and can be used on the right hand side of expression.
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE,
+since addition of two valid pointers makes invalid pointer.
+(In 'secure' mode verifier will reject any type of pointer arithmetic to make
+sure that kernel addresses don't leak to unprivileged users)
+
+If register was never written to, it's not readable::
+
+ bpf_mov R0 = R2
+ bpf_exit
+
+will be rejected, since R2 is unreadable at the start of the program.
+
+After kernel function call, R1-R5 are reset to unreadable and
+R0 has a return type of the function.
+
+Since R6-R9 are callee saved, their state is preserved across the call.
+
+::
+
+ bpf_mov R6 = 1
+ bpf_call foo
+ bpf_mov R0 = R6
+ bpf_exit
+
+is a correct program. If there was R1 instead of R6, it would have
+been rejected.
+
+load/store instructions are allowed only with registers of valid types, which
+are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked.
+For example::
+
+ bpf_mov R1 = 1
+ bpf_mov R2 = 2
+ bpf_xadd *(u32 *)(R1 + 3) += R2
+ bpf_exit
+
+will be rejected, since R1 doesn't have a valid pointer type at the time of
+execution of instruction bpf_xadd.
+
+At the start R1 type is PTR_TO_CTX (a pointer to generic ``struct bpf_context``)
+A callback is used to customize verifier to restrict eBPF program access to only
+certain fields within ctx structure with specified size and alignment.
+
+For example, the following insn::
+
+ bpf_ld R0 = *(u32 *)(R6 + 8)
+
+intends to load a word from address R6 + 8 and store it into R0
+If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
+that offset 8 of size 4 bytes can be accessed for reading, otherwise
+the verifier will reject the program.
+If R6=PTR_TO_STACK, then access should be aligned and be within
+stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
+so it will fail verification, since it's out of bounds.
+
+The verifier will allow eBPF program to read data from stack only after
+it wrote into it.
+
+Classic BPF verifier does similar check with M[0-15] memory slots.
+For example::
+
+ bpf_ld R0 = *(u32 *)(R10 - 4)
+ bpf_exit
+
+is invalid program.
+Though R10 is correct read-only register and has type PTR_TO_STACK
+and R10 - 4 is within stack bounds, there were no stores into that location.
+
+Pointer register spill/fill is tracked as well, since four (R6-R9)
+callee saved registers may not be enough for some programs.
+
+Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
+The eBPF verifier will check that registers match argument constraints.
+After the call register R0 will be set to return type of the function.
+
+Function calls is a main mechanism to extend functionality of eBPF programs.
+Socket filters may let programs to call one set of functions, whereas tracing
+filters may allow completely different set.
+
+If a function made accessible to eBPF program, it needs to be thought through
+from safety point of view. The verifier will guarantee that the function is
+called with valid arguments.
+
+seccomp vs socket filters have different security restrictions for classic BPF.
+Seccomp solves this by two stage verifier: classic BPF verifier is followed
+by seccomp verifier. In case of eBPF one configurable verifier is shared for
+all use cases.
+
+See details of eBPF verifier in kernel/bpf/verifier.c
+
+Register value tracking
+=======================
+
+In order to determine the safety of an eBPF program, the verifier must track
+the range of possible values in each register and also in each stack slot.
+This is done with ``struct bpf_reg_state``, defined in include/linux/
+bpf_verifier.h, which unifies tracking of scalar and pointer values. Each
+register state has a type, which is either NOT_INIT (the register has not been
+written to), SCALAR_VALUE (some value which is not usable as a pointer), or a
+pointer type. The types of pointers describe their base, as follows:
+
+
+ PTR_TO_CTX
+ Pointer to bpf_context.
+ CONST_PTR_TO_MAP
+ Pointer to struct bpf_map. "Const" because arithmetic
+ on these pointers is forbidden.
+ PTR_TO_MAP_VALUE
+ Pointer to the value stored in a map element.
+ PTR_TO_MAP_VALUE_OR_NULL
+ Either a pointer to a map value, or NULL; map accesses
+ (see maps.rst) return this type, which becomes a
+ PTR_TO_MAP_VALUE when checked != NULL. Arithmetic on
+ these pointers is forbidden.
+ PTR_TO_STACK
+ Frame pointer.
+ PTR_TO_PACKET
+ skb->data.
+ PTR_TO_PACKET_END
+ skb->data + headlen; arithmetic forbidden.
+ PTR_TO_SOCKET
+ Pointer to struct bpf_sock_ops, implicitly refcounted.
+ PTR_TO_SOCKET_OR_NULL
+ Either a pointer to a socket, or NULL; socket lookup
+ returns this type, which becomes a PTR_TO_SOCKET when
+ checked != NULL. PTR_TO_SOCKET is reference-counted,
+ so programs must release the reference through the
+ socket release function before the end of the program.
+ Arithmetic on these pointers is forbidden.
+
+However, a pointer may be offset from this base (as a result of pointer
+arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
+offset'. The former is used when an exactly-known value (e.g. an immediate
+operand) is added to a pointer, while the latter is used for values which are
+not exactly known. The variable offset is also used in SCALAR_VALUEs, to track
+the range of possible values in the register.
+
+The verifier's knowledge about the variable offset consists of:
+
+* minimum and maximum values as unsigned
+* minimum and maximum values as signed
+
+* knowledge of the values of individual bits, in the form of a 'tnum': a u64
+ 'mask' and a u64 'value'. 1s in the mask represent bits whose value is unknown;
+ 1s in the value represent bits known to be 1. Bits known to be 0 have 0 in both
+ mask and value; no bit should ever be 1 in both. For example, if a byte is read
+ into a register from memory, the register's top 56 bits are known zero, while
+ the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
+ then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
+ 0x1ff), because of potential carries.
+
+Besides arithmetic, the register state can also be updated by conditional
+branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
+it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false'
+branch it will have a umax_value of 8. A signed compare (with BPF_JSGT or
+BPF_JSGE) would instead update the signed minimum/maximum values. Information
+from the signed and unsigned bounds can be combined; for instance if a value is
+first tested < 8 and then tested s> 4, the verifier will conclude that the value
+is also > 4 and s< 8, since the bounds prevent crossing the sign boundary.
+
+PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all
+pointers sharing that same variable offset. This is important for packet range
+checks: after adding a variable to a packet pointer register A, if you then copy
+it to another register B and then add a constant 4 to A, both registers will
+share the same 'id' but the A will have a fixed offset of +4. Then if A is
+bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is
+now known to have a safe range of at least 4 bytes. See 'Direct packet access',
+below, for more on PTR_TO_PACKET ranges.
+
+The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of
+the pointer returned from a map lookup. This means that when one copy is
+checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs.
+As well as range-checking, the tracked information is also used for enforcing
+alignment of pointer accesses. For instance, on most systems the packet pointer
+is 2 bytes after a 4-byte alignment. If a program adds 14 bytes to that to jump
+over the Ethernet header, then reads IHL and adds (IHL * 4), the resulting
+pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
+bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
+that pointer are safe.
+The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
+to all copies of the pointer returned from a socket lookup. This has similar
+behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
+it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
+represents a reference to the corresponding ``struct sock``. To ensure that the
+reference is not leaked, it is imperative to NULL-check the reference and in
+the non-NULL case, and pass the valid reference to the socket release function.
+
+Direct packet access
+====================
+
+In cls_bpf and act_bpf programs the verifier allows direct access to the packet
+data via skb->data and skb->data_end pointers.
+Ex::
+
+ 1: r4 = *(u32 *)(r1 +80) /* load skb->data_end */
+ 2: r3 = *(u32 *)(r1 +76) /* load skb->data */
+ 3: r5 = r3
+ 4: r5 += 14
+ 5: if r5 > r4 goto pc+16
+ R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+ 6: r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */
+
+this 2byte load from the packet is safe to do, since the program author
+did check ``if (skb->data + 14 > skb->data_end) goto err`` at insn #5 which
+means that in the fall-through case the register R3 (which points to skb->data)
+has at least 14 directly accessible bytes. The verifier marks it
+as R3=pkt(id=0,off=0,r=14).
+id=0 means that no additional variables were added to the register.
+off=0 means that no additional constants were added.
+r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok.
+Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points
+to the packet data, but constant 14 was added to the register, so
+it now points to ``skb->data + 14`` and accessible range is [R5, R5 + 14 - 14)
+which is zero bytes.
+
+More complex packet access may look like::
+
+
+ R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+ 6: r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */
+ 7: r4 = *(u8 *)(r3 +12)
+ 8: r4 *= 14
+ 9: r3 = *(u32 *)(r1 +76) /* load skb->data */
+ 10: r3 += r4
+ 11: r2 = r1
+ 12: r2 <<= 48
+ 13: r2 >>= 48
+ 14: r3 += r2
+ 15: r2 = r3
+ 16: r2 += 8
+ 17: r1 = *(u32 *)(r1 +80) /* load skb->data_end */
+ 18: if r2 > r1 goto pc+2
+ R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp
+ 19: r1 = *(u8 *)(r3 +4)
+
+The state of the register R3 is R3=pkt(id=2,off=0,r=8)
+id=2 means that two ``r3 += rX`` instructions were seen, so r3 points to some
+offset within a packet and since the program author did
+``if (r3 + 8 > r1) goto err`` at insn #18, the safe range is [R3, R3 + 8).
+The verifier only allows 'add'/'sub' operations on packet registers. Any other
+operation will set the register state to 'SCALAR_VALUE' and it won't be
+available for direct packet access.
+
+Operation ``r3 += rX`` may overflow and become less than original skb->data,
+therefore the verifier has to prevent that. So when it sees ``r3 += rX``
+instruction and rX is more than 16-bit value, any subsequent bounds-check of r3
+against skb->data_end will not give us 'range' information, so attempts to read
+through the pointer will give "invalid access to packet" error.
+
+Ex. after insn ``r4 = *(u8 *)(r3 +12)`` (insn #7 above) the state of r4 is
+R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits
+of the register are guaranteed to be zero, and nothing is known about the lower
+8 bits. After insn ``r4 *= 14`` the state becomes
+R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit
+value by constant 14 will keep upper 52 bits as zero, also the least significant
+bit will be zero as 14 is even. Similarly ``r2 >>= 48`` will make
+R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign
+extending. This logic is implemented in adjust_reg_min_max_vals() function,
+which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice
+versa) and adjust_scalar_min_max_vals() for operations on two scalars.
+
+The end result is that bpf program author can access packet directly
+using normal C code as::
+
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct eth_hdr *eth = data;
+ struct iphdr *iph = data + sizeof(*eth);
+ struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph);
+
+ if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end)
+ return 0;
+ if (eth->h_proto != htons(ETH_P_IP))
+ return 0;
+ if (iph->protocol != IPPROTO_UDP || iph->ihl != 5)
+ return 0;
+ if (udp->dest == 53 || udp->source == 9)
+ ...;
+
+which makes such programs easier to write comparing to LD_ABS insn
+and significantly faster.
+
+Pruning
+=======
+
+The verifier does not actually walk all possible paths through the program. For
+each new branch to analyse, the verifier looks at all the states it's previously
+been in when at this instruction. If any of them contain the current state as a
+subset, the branch is 'pruned' - that is, the fact that the previous state was
+accepted implies the current state would be as well. For instance, if in the
+previous state, r1 held a packet-pointer, and in the current state, r1 holds a
+packet-pointer with a range as long or longer and at least as strict an
+alignment, then r1 is safe. Similarly, if r2 was NOT_INIT before then it can't
+have been used by any path from that point, so any value in r2 (including
+another NOT_INIT) is safe. The implementation is in the function regsafe().
+Pruning considers not only the registers but also the stack (and any spilled
+registers it may hold). They must all be safe for the branch to be pruned.
+This is implemented in states_equal().
+
+Some technical details about state pruning implementation could be found below.
+
+Register liveness tracking
+--------------------------
+
+In order to make state pruning effective, liveness state is tracked for each
+register and stack slot. The basic idea is to track which registers and stack
+slots are actually used during subseqeuent execution of the program, until
+program exit is reached. Registers and stack slots that were never used could be
+removed from the cached state thus making more states equivalent to a cached
+state. This could be illustrated by the following program::
+
+ 0: call bpf_get_prandom_u32()
+ 1: r1 = 0
+ 2: if r0 == 0 goto +1
+ 3: r0 = 1
+ --- checkpoint ---
+ 4: r0 = r1
+ 5: exit
+
+Suppose that a state cache entry is created at instruction #4 (such entries are
+also called "checkpoints" in the text below). The verifier could reach the
+instruction with one of two possible register states:
+
+* r0 = 1, r1 = 0
+* r0 = 0, r1 = 0
+
+However, only the value of register ``r1`` is important to successfully finish
+verification. The goal of the liveness tracking algorithm is to spot this fact
+and figure out that both states are actually equivalent.
+
+Understanding eBPF verifier messages
+====================================
+
+The following are few examples of invalid eBPF programs and verifier error
+messages as seen in the log:
+
+Program with unreachable instructions::
+
+ static struct bpf_insn prog[] = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ };
+
+Error::
+
+ unreachable insn 1
+
+Program that reads uninitialized register::
+
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (bf) r0 = r2
+ R2 !read_ok
+
+Program that doesn't initialize R0 before exiting::
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (bf) r2 = r1
+ 1: (95) exit
+ R0 !read_ok
+
+Program that accesses stack out of bounds::
+
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (7a) *(u64 *)(r10 +8) = 0
+ invalid stack off=8 size=8
+
+Program that doesn't initialize stack before passing its address into function::
+
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (bf) r2 = r10
+ 1: (07) r2 += -8
+ 2: (b7) r1 = 0x0
+ 3: (85) call 1
+ invalid indirect read from stack off -8+0 size 8
+
+Program that uses invalid map_fd=0 while calling to map_lookup_elem() function::
+
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 0x0
+ 4: (85) call 1
+ fd 0 is not pointing to valid bpf_map
+
+Program that doesn't check return value of map_lookup_elem() before accessing
+map element::
+
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 0x0
+ 4: (85) call 1
+ 5: (7a) *(u64 *)(r0 +0) = 0
+ R0 invalid mem access 'map_value_or_null'
+
+Program that correctly checks map_lookup_elem() returned value for NULL, but
+accesses the memory with incorrect alignment::
+
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 1
+ 4: (85) call 1
+ 5: (15) if r0 == 0x0 goto pc+1
+ R0=map_ptr R10=fp
+ 6: (7a) *(u64 *)(r0 +4) = 0
+ misaligned access off 4 size 8
+
+Program that correctly checks map_lookup_elem() returned value for NULL and
+accesses memory with correct alignment in one side of 'if' branch, but fails
+to do so in the other side of 'if' branch::
+
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 1
+ 4: (85) call 1
+ 5: (15) if r0 == 0x0 goto pc+2
+ R0=map_ptr R10=fp
+ 6: (7a) *(u64 *)(r0 +0) = 0
+ 7: (95) exit
+
+ from 5 to 8: R0=imm0 R10=fp
+ 8: (7a) *(u64 *)(r0 +0) = 1
+ R0 invalid mem access 'imm'
+
+Program that performs a socket lookup then sets the pointer to NULL without
+checking it::
+
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (b7) r2 = 0
+ 1: (63) *(u32 *)(r10 -8) = r2
+ 2: (bf) r2 = r10
+ 3: (07) r2 += -8
+ 4: (b7) r3 = 4
+ 5: (b7) r4 = 0
+ 6: (b7) r5 = 0
+ 7: (85) call bpf_sk_lookup_tcp#65
+ 8: (b7) r0 = 0
+ 9: (95) exit
+ Unreleased reference id=1, alloc_insn=7
+
+Program that performs a socket lookup but does not NULL-check the returned
+value::
+
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+ BPF_EXIT_INSN(),
+
+Error::
+
+ 0: (b7) r2 = 0
+ 1: (63) *(u32 *)(r10 -8) = r2
+ 2: (bf) r2 = r10
+ 3: (07) r2 += -8
+ 4: (b7) r3 = 4
+ 5: (b7) r4 = 0
+ 6: (b7) r5 = 0
+ 7: (85) call bpf_sk_lookup_tcp#65
+ 8: (95) exit
+ Unreleased reference id=1, alloc_insn=7
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
deleted file mode 100644
index a845feb074de..000000000000
--- a/Documentation/bt8xxgpio.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-===================================================================
-A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio)
-===================================================================
-
-For advanced documentation, see http://www.bu3sch.de/btgpio.php
-
-A generic digital 24-port PCI GPIO card can be built out of an ordinary
-Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
-Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
-find them used for low prices on the net.
-
-The bt8xx chip does have 24 digital GPIO ports.
-These ports are accessible via 24 pins on the SMD chip package.
-
-
-How to physically access the GPIO pins
-======================================
-
-The are several ways to access these pins. One might unsolder the whole chip
-and put it on a custom PCI board, or one might only unsolder each individual
-GPIO pin and solder that to some tiny wire. As the chip package really is tiny
-there are some advanced soldering skills needed in any case.
-
-The physical pinouts are drawn in the following ASCII art.
-The GPIO pins are marked with G00-G23::
-
- G G G G G G G G G G G G G G G G G G
- 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
- | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
- ---------------------------------------------------------------------------
- --| ^ ^ |--
- --| pin 86 pin 67 |--
- --| |--
- --| pin 61 > |-- G18
- --| |-- G19
- --| |-- G20
- --| |-- G21
- --| |-- G22
- --| pin 56 > |-- G23
- --| |--
- --| Brooktree 878/879 |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| |--
- --| O |--
- --| |--
- ---------------------------------------------------------------------------
- | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
- ^
- This is pin 1
-
diff --git a/Documentation/bus-devices/ti-gpmc.txt b/Documentation/bus-devices/ti-gpmc.txt
deleted file mode 100644
index cc9ce57e0a26..000000000000
--- a/Documentation/bus-devices/ti-gpmc.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-GPMC (General Purpose Memory Controller):
-=========================================
-
-GPMC is an unified memory controller dedicated to interfacing external
-memory devices like
- * Asynchronous SRAM like memories and application specific integrated
- circuit devices.
- * Asynchronous, synchronous, and page mode burst NOR flash devices
- NAND flash
- * Pseudo-SRAM devices
-
-GPMC is found on Texas Instruments SoC's (OMAP based)
-IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
-
-
-GPMC generic timing calculation:
-================================
-
-GPMC has certain timings that has to be programmed for proper
-functioning of the peripheral, while peripheral has another set of
-timings. To have peripheral work with gpmc, peripheral timings has to
-be translated to the form gpmc can understand. The way it has to be
-translated depends on the connected peripheral. Also there is a
-dependency for certain gpmc timings on gpmc clock frequency. Hence a
-generic timing routine was developed to achieve above requirements.
-
-Generic routine provides a generic method to calculate gpmc timings
-from gpmc peripheral timings. struct gpmc_device_timings fields has to
-be updated with timings from the datasheet of the peripheral that is
-connected to gpmc. A few of the peripheral timings can be fed either
-in time or in cycles, provision to handle this scenario has been
-provided (refer struct gpmc_device_timings definition). It may so
-happen that timing as specified by peripheral datasheet is not present
-in timing structure, in this scenario, try to correlate peripheral
-timing to the one available. If that doesn't work, try to add a new
-field as required by peripheral, educate generic timing routine to
-handle it, make sure that it does not break any of the existing.
-Then there may be cases where peripheral datasheet doesn't mention
-certain fields of struct gpmc_device_timings, zero those entries.
-
-Generic timing routine has been verified to work properly on
-multiple onenand's and tusb6010 peripherals.
-
-A word of caution: generic timing routine has been developed based
-on understanding of gpmc timings, peripheral timings, available
-custom timing routines, a kind of reverse engineering without
-most of the datasheets & hardware (to be exact none of those supported
-in mainline having custom timing routine) and by simulation.
-
-gpmc timing dependency on peripheral timings:
-[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
-
-1. common
-cs_on: t_ceasu
-adv_on: t_avdasu, t_ceavd
-
-2. sync common
-sync_clk: clk
-page_burst_access: t_bacc
-clk_activation: t_ces, t_avds
-
-3. read async muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu, t_aavdh
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
-
-4. read async non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, t_oe, t_ce, t_aa
-rd_cycle: t_rd_cycle, t_cez_r, t_oez
-
-5. read sync muxed
-adv_rd_off: t_avdp_r, t_avdh
-oe_on: t_oeasu, t_ach, cyc_aavdh_oe
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
-
-6. read sync non-muxed
-adv_rd_off: t_avdp_r
-oe_on: t_oeasu
-access: t_iaa, cyc_iaa, cyc_oe
-rd_cycle: t_cez_r, t_oez, t_ce_rdyz
-
-7. write async muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_aavdh, cyc_aavhd_we
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
-
-8. write async non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu
-we_off: t_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_wr_cycle
-
-9. write sync muxed
-adv_wr_off: t_avdp_w, t_avdh
-we_on, wr_data_mux_bus: t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
-
-10. write sync non-muxed
-adv_wr_off: t_avdp_w
-we_on, wr_data_mux_bus: t_weasu, t_rdyo
-we_off: t_wpl, cyc_wpl
-cs_wr_off: t_wph
-wr_cycle: t_cez_w, t_ce_rdyz
-
-
-Note: Many of gpmc timings are dependent on other gpmc timings (a few
-gpmc timings purely dependent on other gpmc timings, a reason that
-some of the gpmc timings are missing above), and it will result in
-indirect dependency of peripheral timings to gpmc timings other than
-mentioned above, refer timing routine for more details. To know what
-these peripheral timings correspond to, please see explanations in
-struct gpmc_device_timings definition. And for gpmc timings refer
-IP details (link above).
diff --git a/Documentation/bus-virt-phys-mapping.txt b/Documentation/bus-virt-phys-mapping.txt
deleted file mode 100644
index 4bb07c2f3e7d..000000000000
--- a/Documentation/bus-virt-phys-mapping.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-==========================================================
-How to access I/O mapped memory from within device drivers
-==========================================================
-
-:Author: Linus
-
-.. warning::
-
- The virt_to_bus() and bus_to_virt() functions have been
- superseded by the functionality provided by the PCI DMA interface
- (see Documentation/DMA-API-HOWTO.txt). They continue
- to be documented below for historical purposes, but new code
- must not use them. --davidm 00/12/12
-
-::
-
- [ This is a mail message in response to a query on IO mapping, thus the
- strange format for a "document" ]
-
-The AHA-1542 is a bus-master device, and your patch makes the driver give the
-controller the physical address of the buffers, which is correct on x86
-(because all bus master devices see the physical memory mappings directly).
-
-However, on many setups, there are actually **three** different ways of looking
-at memory addresses, and in this case we actually want the third, the
-so-called "bus address".
-
-Essentially, the three ways of addressing memory are (this is "real memory",
-that is, normal RAM--see later about other details):
-
- - CPU untranslated. This is the "physical" address. Physical address
- 0 is what the CPU sees when it drives zeroes on the memory bus.
-
- - CPU translated address. This is the "virtual" address, and is
- completely internal to the CPU itself with the CPU doing the appropriate
- translations into "CPU untranslated".
-
- - bus address. This is the address of memory as seen by OTHER devices,
- not the CPU. Now, in theory there could be many different bus
- addresses, with each device seeing memory in some device-specific way, but
- happily most hardware designers aren't actually actively trying to make
- things any more complex than necessary, so you can assume that all
- external hardware sees the memory the same way.
-
-Now, on normal PCs the bus address is exactly the same as the physical
-address, and things are very simple indeed. However, they are that simple
-because the memory and the devices share the same address space, and that is
-not generally necessarily true on other PCI/ISA setups.
-
-Now, just as an example, on the PReP (PowerPC Reference Platform), the
-CPU sees a memory map something like this (this is from memory)::
-
- 0-2 GB "real memory"
- 2 GB-3 GB "system IO" (inb/out and similar accesses on x86)
- 3 GB-4 GB "IO memory" (shared memory over the IO bus)
-
-Now, that looks simple enough. However, when you look at the same thing from
-the viewpoint of the devices, you have the reverse, and the physical memory
-address 0 actually shows up as address 2 GB for any IO master.
-
-So when the CPU wants any bus master to write to physical memory 0, it
-has to give the master address 0x80000000 as the memory address.
-
-So, for example, depending on how the kernel is actually mapped on the
-PPC, you can end up with a setup like this::
-
- physical address: 0
- virtual address: 0xC0000000
- bus address: 0x80000000
-
-where all the addresses actually point to the same thing. It's just seen
-through different translations..
-
-Similarly, on the Alpha, the normal translation is::
-
- physical address: 0
- virtual address: 0xfffffc0000000000
- bus address: 0x40000000
-
-(but there are also Alphas where the physical address and the bus address
-are the same).
-
-Anyway, the way to look up all these translations, you do::
-
- #include <asm/io.h>
-
- phys_addr = virt_to_phys(virt_addr);
- virt_addr = phys_to_virt(phys_addr);
- bus_addr = virt_to_bus(virt_addr);
- virt_addr = bus_to_virt(bus_addr);
-
-Now, when do you need these?
-
-You want the **virtual** address when you are actually going to access that
-pointer from the kernel. So you can have something like this::
-
- /*
- * this is the hardware "mailbox" we use to communicate with
- * the controller. The controller sees this directly.
- */
- struct mailbox {
- __u32 status;
- __u32 bufstart;
- __u32 buflen;
- ..
- } mbox;
-
- unsigned char * retbuffer;
-
- /* get the address from the controller */
- retbuffer = bus_to_virt(mbox.bufstart);
- switch (retbuffer[0]) {
- case STATUS_OK:
- ...
-
-on the other hand, you want the bus address when you have a buffer that
-you want to give to the controller::
-
- /* ask the controller to read the sense status into "sense_buffer" */
- mbox.bufstart = virt_to_bus(&sense_buffer);
- mbox.buflen = sizeof(sense_buffer);
- mbox.status = 0;
- notify_controller(&mbox);
-
-And you generally **never** want to use the physical address, because you can't
-use that from the CPU (the CPU only uses translated virtual addresses), and
-you can't use it from the bus master.
-
-So why do we care about the physical address at all? We do need the physical
-address in some cases, it's just not very often in normal code. The physical
-address is needed if you use memory mappings, for example, because the
-"remap_pfn_range()" mm function wants the physical address of the memory to
-be remapped as measured in units of pages, a.k.a. the pfn (the memory
-management layer doesn't know about devices outside the CPU, so it
-shouldn't need to know about "bus addresses" etc).
-
-.. note::
-
- The above is only one part of the whole equation. The above
- only talks about "real memory", that is, CPU memory (RAM).
-
-There is a completely different type of memory too, and that's the "shared
-memory" on the PCI or ISA bus. That's generally not RAM (although in the case
-of a video graphics card it can be normal DRAM that is just used for a frame
-buffer), but can be things like a packet buffer in a network card etc.
-
-This memory is called "PCI memory" or "shared memory" or "IO memory" or
-whatever, and there is only one way to access it: the readb/writeb and
-related functions. You should never take the address of such memory, because
-there is really nothing you can do with such an address: it's not
-conceptually in the same memory space as "real memory" at all, so you cannot
-just dereference a pointer. (Sadly, on x86 it **is** in the same memory space,
-so on x86 it actually works to just deference a pointer, but it's not
-portable).
-
-For such memory, you can do things like:
-
- - reading::
-
- /*
- * read first 32 bits from ISA memory at 0xC0000, aka
- * C000:0000 in DOS terms
- */
- unsigned int signature = isa_readl(0xC0000);
-
- - remapping and writing::
-
- /*
- * remap framebuffer PCI memory area at 0xFC000000,
- * size 1MB, so that we can access it: We can directly
- * access only the 640k-1MB area, so anything else
- * has to be remapped.
- */
- void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
-
- /* write a 'A' to the offset 10 of the area */
- writeb('A',baseptr+10);
-
- /* unmap when we unload the driver */
- iounmap(baseptr);
-
- - copying and clearing::
-
- /* get the 6-byte Ethernet address at ISA address E000:0040 */
- memcpy_fromio(kernel_buffer, 0xE0040, 6);
- /* write a packet to the driver */
- memcpy_toio(0xE1000, skb->data, skb->len);
- /* clear the frame buffer */
- memset_io(0xA0000, 0, 0x10000);
-
-OK, that just about covers the basics of accessing IO portably. Questions?
-Comments? You may think that all the above is overly complex, but one day you
-might find yourself with a 500 MHz Alpha in front of you, and then you'll be
-happy that your driver works ;)
-
-Note that kernel versions 2.0.x (and earlier) mistakenly called the
-ioremap() function "vremap()". ioremap() is the proper name, but I
-didn't think straight when I wrote it originally. People who have to
-support both can do something like::
-
- /* support old naming silliness */
- #if LINUX_VERSION_CODE < 0x020100
- #define ioremap vremap
- #define iounmap vfree
- #endif
-
-at the top of their source files, and then they can use the right names
-even on 2.0.x systems.
-
-And the above sounds worse than it really is. Most real drivers really
-don't do all that complex things (or rather: the complexity is not so
-much in the actual IO accesses as in error handling and timeouts etc).
-It's generally not hard to fix drivers, and in many cases the code
-actually looks better afterwards::
-
- unsigned long signature = *(unsigned int *) 0xC0000;
- vs
- unsigned long signature = readl(0xC0000);
-
-I think the second version actually is more readable, no?
diff --git a/Documentation/cdrom/Makefile b/Documentation/cdrom/Makefile
deleted file mode 100644
index a19e321928e1..000000000000
--- a/Documentation/cdrom/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-LATEXFILE = cdrom-standard
-
-all:
- make clean
- latex $(LATEXFILE)
- latex $(LATEXFILE)
- @if [ -x `which gv` ]; then \
- `dvips -q -t letter -o $(LATEXFILE).ps $(LATEXFILE).dvi` ;\
- `gv -antialias -media letter -nocenter $(LATEXFILE).ps` ;\
- else \
- `xdvi $(LATEXFILE).dvi &` ;\
- fi
- make sortofclean
-
-clean:
- rm -f $(LATEXFILE).ps $(LATEXFILE).dvi $(LATEXFILE).aux $(LATEXFILE).log
-
-sortofclean:
- rm -f $(LATEXFILE).aux $(LATEXFILE).log
-
-
diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
new file mode 100644
index 000000000000..b97a4e9b9bd3
--- /dev/null
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -0,0 +1,1047 @@
+=======================
+A Linux CD-ROM standard
+=======================
+
+:Author: David van Leeuwen <david@ElseWare.cistron.nl>
+:Date: 12 March 1999
+:Updated by: Erik Andersen (andersee@debian.org)
+:Updated by: Jens Axboe (axboe@image.dk)
+
+
+Introduction
+============
+
+Linux is probably the Unix-like operating system that supports
+the widest variety of hardware devices. The reasons for this are
+presumably
+
+- The large list of hardware devices available for the many platforms
+ that Linux now supports (i.e., i386-PCs, Sparc Suns, etc.)
+- The open design of the operating system, such that anybody can write a
+ driver for Linux.
+- There is plenty of source code around as examples of how to write a driver.
+
+The openness of Linux, and the many different types of available
+hardware has allowed Linux to support many different hardware devices.
+Unfortunately, the very openness that has allowed Linux to support
+all these different devices has also allowed the behavior of each
+device driver to differ significantly from one device to another.
+This divergence of behavior has been very significant for CD-ROM
+devices; the way a particular drive reacts to a `standard` *ioctl()*
+call varies greatly from one device driver to another. To avoid making
+their drivers totally inconsistent, the writers of Linux CD-ROM
+drivers generally created new device drivers by understanding, copying,
+and then changing an existing one. Unfortunately, this practice did not
+maintain uniform behavior across all the Linux CD-ROM drivers.
+
+This document describes an effort to establish Uniform behavior across
+all the different CD-ROM device drivers for Linux. This document also
+defines the various *ioctl()'s*, and how the low-level CD-ROM device
+drivers should implement them. Currently (as of the Linux 2.1.\ *x*
+development kernels) several low-level CD-ROM device drivers, including
+both IDE/ATAPI and SCSI, now use this Uniform interface.
+
+When the CD-ROM was developed, the interface between the CD-ROM drive
+and the computer was not specified in the standards. As a result, many
+different CD-ROM interfaces were developed. Some of them had their
+own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
+manufacturers adopted an existing electrical interface and changed
+the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
+adapted their drives to one or more of the already existing electrical
+interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
+most of the `NoName` manufacturers). In cases where a new drive really
+brought its own interface or used its own command set and flow control
+scheme, either a separate driver had to be written, or an existing
+driver had to be enhanced. History has delivered us CD-ROM support for
+many of these different interfaces. Nowadays, almost all new CD-ROM
+drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
+manufacturer will create a new interface. Even finding drives for the
+old proprietary interfaces is getting difficult.
+
+When (in the 1.3.70's) I looked at the existing software interface,
+which was expressed through `cdrom.h`, it appeared to be a rather wild
+set of commands and data formats [#f1]_. It seemed that many
+features of the software interface had been added to accommodate the
+capabilities of a particular drive, in an *ad hoc* manner. More
+importantly, it appeared that the behavior of the `standard` commands
+was different for most of the different drivers: e. g., some drivers
+close the tray if an *open()* call occurs when the tray is open, while
+others do not. Some drivers lock the door upon opening the device, to
+prevent an incoherent file system, but others don't, to allow software
+ejection. Undoubtedly, the capabilities of the different drives vary,
+but even when two drives have the same capability their drivers'
+behavior was usually different.
+
+.. [#f1]
+ I cannot recollect what kernel version I looked at, then,
+ presumably 1.2.13 and 1.3.34 --- the latest kernel that I was
+ indirectly involved in.
+
+I decided to start a discussion on how to make all the Linux CD-ROM
+drivers behave more uniformly. I began by contacting the developers of
+the many CD-ROM drivers found in the Linux kernel. Their reactions
+encouraged me to write the Uniform CD-ROM Driver which this document is
+intended to describe. The implementation of the Uniform CD-ROM Driver is
+in the file `cdrom.c`. This driver is intended to be an additional software
+layer that sits on top of the low-level device drivers for each CD-ROM drive.
+By adding this additional layer, it is possible to have all the different
+CD-ROM devices behave **exactly** the same (insofar as the underlying
+hardware will allow).
+
+The goal of the Uniform CD-ROM Driver is **not** to alienate driver developers
+whohave not yet taken steps to support this effort. The goal of Uniform CD-ROM
+Driver is simply to give people writing application programs for CD-ROM drives
+**one** Linux CD-ROM interface with consistent behavior for all
+CD-ROM devices. In addition, this also provides a consistent interface
+between the low-level device driver code and the Linux kernel. Care
+is taken that 100% compatibility exists with the data structures and
+programmer's interface defined in `cdrom.h`. This guide was written to
+help CD-ROM driver developers adapt their code to use the Uniform CD-ROM
+Driver code defined in `cdrom.c`.
+
+Personally, I think that the most important hardware interfaces are
+the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
+of hardware drop continuously, it is also likely that people may have
+more than one CD-ROM drive, possibly of mixed types. It is important
+that these drives behave in the same way. In December 1994, one of the
+cheapest CD-ROM drives was a Philips cm206, a double-speed proprietary
+drive. In the months that I was busy writing a Linux driver for it,
+proprietary drives became obsolete and IDE/ATAPI drives became the
+standard. At the time of the last update to this document (November
+1997) it is becoming difficult to even **find** anything less than a
+16 speed CD-ROM drive, and 24 speed drives are common.
+
+.. _cdrom_api:
+
+Standardizing through another software level
+============================================
+
+At the time this document was conceived, all drivers directly
+implemented the CD-ROM *ioctl()* calls through their own routines. This
+led to the danger of different drivers forgetting to do important things
+like checking that the user was giving the driver valid data. More
+importantly, this led to the divergence of behavior, which has already
+been discussed.
+
+For this reason, the Uniform CD-ROM Driver was created to enforce consistent
+CD-ROM drive behavior, and to provide a common set of services to the various
+low-level CD-ROM device drivers. The Uniform CD-ROM Driver now provides another
+software-level, that separates the *ioctl()* and *open()* implementation
+from the actual hardware implementation. Note that this effort has
+made few changes which will affect a user's application programs. The
+greatest change involved moving the contents of the various low-level
+CD-ROM drivers\' header files to the kernel's cdrom directory. This was
+done to help ensure that the user is only presented with only one cdrom
+interface, the interface defined in `cdrom.h`.
+
+CD-ROM drives are specific enough (i. e., different from other
+block-devices such as floppy or hard disc drives), to define a set
+of common **CD-ROM device operations**, *<cdrom-device>_dops*.
+These operations are different from the classical block-device file
+operations, *<block-device>_fops*.
+
+The routines for the Uniform CD-ROM Driver interface level are implemented
+in the file `cdrom.c`. In this file, the Uniform CD-ROM Driver interfaces
+with the kernel as a block device by registering the following general
+*struct file_operations*::
+
+ struct file_operations cdrom_fops = {
+ NULL, /* lseek */
+ block _read , /* read--general block-dev read */
+ block _write, /* write--general block-dev write */
+ NULL, /* readdir */
+ NULL, /* select */
+ cdrom_ioctl, /* ioctl */
+ NULL, /* mmap */
+ cdrom_open, /* open */
+ cdrom_release, /* release */
+ NULL, /* fsync */
+ NULL, /* fasync */
+ NULL /* revalidate */
+ };
+
+Every active CD-ROM device shares this *struct*. The routines
+declared above are all implemented in `cdrom.c`, since this file is the
+place where the behavior of all CD-ROM-devices is defined and
+standardized. The actual interface to the various types of CD-ROM
+hardware is still performed by various low-level CD-ROM-device
+drivers. These routines simply implement certain **capabilities**
+that are common to all CD-ROM (and really, all removable-media
+devices).
+
+Registration of a low-level CD-ROM device driver is now done through
+the general routines in `cdrom.c`, not through the Virtual File System
+(VFS) any more. The interface implemented in `cdrom.c` is carried out
+through two general structures that contain information about the
+capabilities of the driver, and the specific drives on which the
+driver operates. The structures are:
+
+cdrom_device_ops
+ This structure contains information about the low-level driver for a
+ CD-ROM device. This structure is conceptually connected to the major
+ number of the device (although some drivers may have different
+ major numbers, as is the case for the IDE driver).
+
+cdrom_device_info
+ This structure contains information about a particular CD-ROM drive,
+ such as its device name, speed, etc. This structure is conceptually
+ connected to the minor number of the device.
+
+Registering a particular CD-ROM drive with the Uniform CD-ROM Driver
+is done by the low-level device driver though a call to::
+
+ register_cdrom(struct cdrom_device_info * <device>_info)
+
+The device information structure, *<device>_info*, contains all the
+information needed for the kernel to interface with the low-level
+CD-ROM device driver. One of the most important entries in this
+structure is a pointer to the *cdrom_device_ops* structure of the
+low-level driver.
+
+The device operations structure, *cdrom_device_ops*, contains a list
+of pointers to the functions which are implemented in the low-level
+device driver. When `cdrom.c` accesses a CD-ROM device, it does it
+through the functions in this structure. It is impossible to know all
+the capabilities of future CD-ROM drives, so it is expected that this
+list may need to be expanded from time to time as new technologies are
+developed. For example, CD-R and CD-R/W drives are beginning to become
+popular, and support will soon need to be added for them. For now, the
+current *struct* is::
+
+ struct cdrom_device_ops {
+ int (*open)(struct cdrom_device_info *, int)
+ void (*release)(struct cdrom_device_info *);
+ int (*drive_status)(struct cdrom_device_info *, int);
+ unsigned int (*check_events)(struct cdrom_device_info *,
+ unsigned int, int);
+ int (*media_changed)(struct cdrom_device_info *, int);
+ int (*tray_move)(struct cdrom_device_info *, int);
+ int (*lock_door)(struct cdrom_device_info *, int);
+ int (*select_speed)(struct cdrom_device_info *, unsigned long);
+ int (*get_last_session) (struct cdrom_device_info *,
+ struct cdrom_multisession *);
+ int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
+ int (*reset)(struct cdrom_device_info *);
+ int (*audio_ioctl)(struct cdrom_device_info *,
+ unsigned int, void *);
+ const int capability; /* capability flags */
+ int (*generic_packet)(struct cdrom_device_info *,
+ struct packet_command *);
+ };
+
+When a low-level device driver implements one of these capabilities,
+it should add a function pointer to this *struct*. When a particular
+function is not implemented, however, this *struct* should contain a
+NULL instead. The *capability* flags specify the capabilities of the
+CD-ROM hardware and/or low-level CD-ROM driver when a CD-ROM drive
+is registered with the Uniform CD-ROM Driver.
+
+Note that most functions have fewer parameters than their
+*blkdev_fops* counterparts. This is because very little of the
+information in the structures *inode* and *file* is used. For most
+drivers, the main parameter is the *struct* *cdrom_device_info*, from
+which the major and minor number can be extracted. (Most low-level
+CD-ROM drivers don't even look at the major and minor number though,
+since many of them only support one device.) This will be available
+through *dev* in *cdrom_device_info* described below.
+
+The drive-specific, minor-like information that is registered with
+`cdrom.c`, currently contains the following fields::
+
+ struct cdrom_device_info {
+ const struct cdrom_device_ops * ops; /* device operations for this major */
+ struct list_head list; /* linked list of all device_info */
+ struct gendisk * disk; /* matching block layer disk */
+ void * handle; /* driver-dependent data */
+
+ int mask; /* mask of capability: disables them */
+ int speed; /* maximum speed for reading data */
+ int capacity; /* number of discs in a jukebox */
+
+ unsigned int options:30; /* options flags */
+ unsigned mc_flags:2; /* media-change buffer flags */
+ unsigned int vfs_events; /* cached events for vfs path */
+ unsigned int ioctl_events; /* cached events for ioctl path */
+ int use_count; /* number of times device is opened */
+ char name[20]; /* name of the device type */
+
+ __u8 sanyo_slot : 2; /* Sanyo 3-CD changer support */
+ __u8 keeplocked : 1; /* CDROM_LOCKDOOR status */
+ __u8 reserved : 5; /* not used yet */
+ int cdda_method; /* see CDDA_* flags */
+ __u8 last_sense; /* saves last sense key */
+ __u8 media_written; /* dirty flag, DVD+RW bookkeeping */
+ unsigned short mmc3_profile; /* current MMC3 profile */
+ int for_data; /* unknown:TBD */
+ int mrw_mode_page; /* which MRW mode page is in use */
+ };
+
+Using this *struct*, a linked list of the registered minor devices is
+built, using the *next* field. The device number, the device operations
+struct and specifications of properties of the drive are stored in this
+structure.
+
+The *mask* flags can be used to mask out some of the capabilities listed
+in *ops->capability*, if a specific drive doesn't support a feature
+of the driver. The value *speed* specifies the maximum head-rate of the
+drive, measured in units of normal audio speed (176kB/sec raw data or
+150kB/sec file system data). The parameters are declared *const*
+because they describe properties of the drive, which don't change after
+registration.
+
+A few registers contain variables local to the CD-ROM drive. The
+flags *options* are used to specify how the general CD-ROM routines
+should behave. These various flags registers should provide enough
+flexibility to adapt to the different users' wishes (and **not** the
+`arbitrary` wishes of the author of the low-level device driver, as is
+the case in the old scheme). The register *mc_flags* is used to buffer
+the information from *media_changed()* to two separate queues. Other
+data that is specific to a minor drive, can be accessed through *handle*,
+which can point to a data structure specific to the low-level driver.
+The fields *use_count*, *next*, *options* and *mc_flags* need not be
+initialized.
+
+The intermediate software layer that `cdrom.c` forms will perform some
+additional bookkeeping. The use count of the device (the number of
+processes that have the device opened) is registered in *use_count*. The
+function *cdrom_ioctl()* will verify the appropriate user-memory regions
+for read and write, and in case a location on the CD is transferred,
+it will `sanitize` the format by making requests to the low-level
+drivers in a standard format, and translating all formats between the
+user-software and low level drivers. This relieves much of the drivers'
+memory checking and format checking and translation. Also, the necessary
+structures will be declared on the program stack.
+
+The implementation of the functions should be as defined in the
+following sections. Two functions **must** be implemented, namely
+*open()* and *release()*. Other functions may be omitted, their
+corresponding capability flags will be cleared upon registration.
+Generally, a function returns zero on success and negative on error. A
+function call should return only after the command has completed, but of
+course waiting for the device should not use processor time.
+
+::
+
+ int open(struct cdrom_device_info *cdi, int purpose)
+
+*Open()* should try to open the device for a specific *purpose*, which
+can be either:
+
+- Open for reading data, as done by `mount()` (2), or the
+ user commands `dd` or `cat`.
+- Open for *ioctl* commands, as done by audio-CD playing programs.
+
+Notice that any strategic code (closing tray upon *open()*, etc.) is
+done by the calling routine in `cdrom.c`, so the low-level routine
+should only be concerned with proper initialization, such as spinning
+up the disc, etc.
+
+::
+
+ void release(struct cdrom_device_info *cdi)
+
+Device-specific actions should be taken such as spinning down the device.
+However, strategic actions such as ejection of the tray, or unlocking
+the door, should be left over to the general routine *cdrom_release()*.
+This is the only function returning type *void*.
+
+.. _cdrom_drive_status:
+
+::
+
+ int drive_status(struct cdrom_device_info *cdi, int slot_nr)
+
+The function *drive_status*, if implemented, should provide
+information on the status of the drive (not the status of the disc,
+which may or may not be in the drive). If the drive is not a changer,
+*slot_nr* should be ignored. In `cdrom.h` the possibilities are listed::
+
+
+ CDS_NO_INFO /* no information available */
+ CDS_NO_DISC /* no disc is inserted, tray is closed */
+ CDS_TRAY_OPEN /* tray is opened */
+ CDS_DRIVE_NOT_READY /* something is wrong, tray is moving? */
+ CDS_DISC_OK /* a disc is loaded and everything is fine */
+
+::
+
+ int tray_move(struct cdrom_device_info *cdi, int position)
+
+This function, if implemented, should control the tray movement. (No
+other function should control this.) The parameter *position* controls
+the desired direction of movement:
+
+- 0 Close tray
+- 1 Open tray
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the tray is already in the desired position, no
+action need be taken, and the return value should be 0.
+
+::
+
+ int lock_door(struct cdrom_device_info *cdi, int lock)
+
+This function (and no other code) controls locking of the door, if the
+drive allows this. The value of *lock* controls the desired locking
+state:
+
+- 0 Unlock door, manual opening is allowed
+- 1 Lock door, tray cannot be ejected manually
+
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the door is already in the requested state, no
+action need be taken, and the return value should be 0.
+
+::
+
+ int select_speed(struct cdrom_device_info *cdi, unsigned long speed)
+
+Some CD-ROM drives are capable of changing their head-speed. There
+are several reasons for changing the speed of a CD-ROM drive. Badly
+pressed CD-ROM s may benefit from less-than-maximum head rate. Modern
+CD-ROM drives can obtain very high head rates (up to *24x* is
+common). It has been reported that these drives can make reading
+errors at these high speeds, reducing the speed can prevent data loss
+in these circumstances. Finally, some of these drives can
+make an annoyingly loud noise, which a lower speed may reduce.
+
+This function specifies the speed at which data is read or audio is
+played back. The value of *speed* specifies the head-speed of the
+drive, measured in units of standard cdrom speed (176kB/sec raw data
+or 150kB/sec file system data). So to request that a CD-ROM drive
+operate at 300kB/sec you would call the CDROM_SELECT_SPEED *ioctl*
+with *speed=2*. The special value `0` means `auto-selection`, i. e.,
+maximum data-rate or real-time audio rate. If the drive doesn't have
+this `auto-selection` capability, the decision should be made on the
+current disc loaded and the return value should be positive. A negative
+return value indicates an error.
+
+::
+
+ int get_last_session(struct cdrom_device_info *cdi,
+ struct cdrom_multisession *ms_info)
+
+This function should implement the old corresponding *ioctl()*. For
+device *cdi->dev*, the start of the last session of the current disc
+should be returned in the pointer argument *ms_info*. Note that
+routines in `cdrom.c` have sanitized this argument: its requested
+format will **always** be of the type *CDROM_LBA* (linear block
+addressing mode), whatever the calling software requested. But
+sanitization goes even further: the low-level implementation may
+return the requested information in *CDROM_MSF* format if it wishes so
+(setting the *ms_info->addr_format* field appropriately, of
+course) and the routines in `cdrom.c` will make the transformation if
+necessary. The return value is 0 upon success.
+
+::
+
+ int get_mcn(struct cdrom_device_info *cdi,
+ struct cdrom_mcn *mcn)
+
+Some discs carry a `Media Catalog Number` (MCN), also called
+`Universal Product Code` (UPC). This number should reflect the number
+that is generally found in the bar-code on the product. Unfortunately,
+the few discs that carry such a number on the disc don't even use the
+same format. The return argument to this function is a pointer to a
+pre-declared memory region of type *struct cdrom_mcn*. The MCN is
+expected as a 13-character string, terminated by a null-character.
+
+::
+
+ int reset(struct cdrom_device_info *cdi)
+
+This call should perform a hard-reset on the drive (although in
+circumstances that a hard-reset is necessary, a drive may very well not
+listen to commands anymore). Preferably, control is returned to the
+caller only after the drive has finished resetting. If the drive is no
+longer listening, it may be wise for the underlying low-level cdrom
+driver to time out.
+
+::
+
+ int audio_ioctl(struct cdrom_device_info *cdi,
+ unsigned int cmd, void *arg)
+
+Some of the CD-ROM-\ *ioctl()*\ 's defined in `cdrom.h` can be
+implemented by the routines described above, and hence the function
+*cdrom_ioctl* will use those. However, most *ioctl()*\ 's deal with
+audio-control. We have decided to leave these to be accessed through a
+single function, repeating the arguments *cmd* and *arg*. Note that
+the latter is of type *void*, rather than *unsigned long int*.
+The routine *cdrom_ioctl()* does do some useful things,
+though. It sanitizes the address format type to *CDROM_MSF* (Minutes,
+Seconds, Frames) for all audio calls. It also verifies the memory
+location of *arg*, and reserves stack-memory for the argument. This
+makes implementation of the *audio_ioctl()* much simpler than in the
+old driver scheme. For example, you may look up the function
+*cm206_audio_ioctl()* `cm206.c` that should be updated with
+this documentation.
+
+An unimplemented ioctl should return *-ENOSYS*, but a harmless request
+(e. g., *CDROMSTART*) may be ignored by returning 0 (success). Other
+errors should be according to the standards, whatever they are. When
+an error is returned by the low-level driver, the Uniform CD-ROM Driver
+tries whenever possible to return the error code to the calling program.
+(We may decide to sanitize the return value in *cdrom_ioctl()* though, in
+order to guarantee a uniform interface to the audio-player software.)
+
+::
+
+ int dev_ioctl(struct cdrom_device_info *cdi,
+ unsigned int cmd, unsigned long arg)
+
+Some *ioctl()'s* seem to be specific to certain CD-ROM drives. That is,
+they are introduced to service some capabilities of certain drives. In
+fact, there are 6 different *ioctl()'s* for reading data, either in some
+particular kind of format, or audio data. Not many drives support
+reading audio tracks as data, I believe this is because of protection
+of copyrights of artists. Moreover, I think that if audio-tracks are
+supported, it should be done through the VFS and not via *ioctl()'s*. A
+problem here could be the fact that audio-frames are 2352 bytes long,
+so either the audio-file-system should ask for 75264 bytes at once
+(the least common multiple of 512 and 2352), or the drivers should
+bend their backs to cope with this incoherence (to which I would be
+opposed). Furthermore, it is very difficult for the hardware to find
+the exact frame boundaries, since there are no synchronization headers
+in audio frames. Once these issues are resolved, this code should be
+standardized in `cdrom.c`.
+
+Because there are so many *ioctl()'s* that seem to be introduced to
+satisfy certain drivers [#f2]_, any non-standard *ioctl()*\ s
+are routed through the call *dev_ioctl()*. In principle, `private`
+*ioctl()*\ 's should be numbered after the device's major number, and not
+the general CD-ROM *ioctl* number, `0x53`. Currently the
+non-supported *ioctl()'s* are:
+
+ CDROMREADMODE1, CDROMREADMODE2, CDROMREADAUDIO, CDROMREADRAW,
+ CDROMREADCOOKED, CDROMSEEK, CDROMPLAY-BLK and CDROM-READALL
+
+.. [#f2]
+
+ Is there software around that actually uses these? I'd be interested!
+
+.. _cdrom_capabilities:
+
+CD-ROM capabilities
+-------------------
+
+Instead of just implementing some *ioctl* calls, the interface in
+`cdrom.c` supplies the possibility to indicate the **capabilities**
+of a CD-ROM drive. This can be done by ORing any number of
+capability-constants that are defined in `cdrom.h` at the registration
+phase. Currently, the capabilities are any of::
+
+ CDC_CLOSE_TRAY /* can close tray by software control */
+ CDC_OPEN_TRAY /* can open tray */
+ CDC_LOCK /* can lock and unlock the door */
+ CDC_SELECT_SPEED /* can select speed, in units of * sim*150 ,kB/s */
+ CDC_SELECT_DISC /* drive is juke-box */
+ CDC_MULTI_SESSION /* can read sessions *> rm1* */
+ CDC_MCN /* can read Media Catalog Number */
+ CDC_MEDIA_CHANGED /* can report if disc has changed */
+ CDC_PLAY_AUDIO /* can perform audio-functions (play, pause, etc) */
+ CDC_RESET /* hard reset device */
+ CDC_IOCTLS /* driver has non-standard ioctls */
+ CDC_DRIVE_STATUS /* driver implements drive status */
+
+The capability flag is declared *const*, to prevent drivers from
+accidentally tampering with the contents. The capability flags actually
+inform `cdrom.c` of what the driver can do. If the drive found
+by the driver does not have the capability, is can be masked out by
+the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM
+driver has implemented the code for loading and ejecting CD-ROM's, and
+hence its corresponding flags in *capability* will be set. But a SCSI
+CD-ROM drive might be a caddy system, which can't load the tray, and
+hence for this drive the *cdrom_device_info* struct will have set
+the *CDC_CLOSE_TRAY* bit in *mask*.
+
+In the file `cdrom.c` you will encounter many constructions of the type::
+
+ if (cdo->capability & ~cdi->mask & CDC _<capability>) ...
+
+There is no *ioctl* to set the mask... The reason is that
+I think it is better to control the **behavior** rather than the
+**capabilities**.
+
+Options
+-------
+
+A final flag register controls the **behavior** of the CD-ROM
+drives, in order to satisfy different users' wishes, hopefully
+independently of the ideas of the respective author who happened to
+have made the drive's support available to the Linux community. The
+current behavior options are::
+
+ CDO_AUTO_CLOSE /* try to close tray upon device open() */
+ CDO_AUTO_EJECT /* try to open tray on last device close() */
+ CDO_USE_FFLAGS /* use file_pointer->f_flags to indicate purpose for open() */
+ CDO_LOCK /* try to lock door if device is opened */
+ CDO_CHECK_TYPE /* ensure disc type is data if opened for data */
+
+The initial value of this register is
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`, reflecting my own view on user
+interface and software standards. Before you protest, there are two
+new *ioctl()'s* implemented in `cdrom.c`, that allow you to control the
+behavior by software. These are::
+
+ CDROM_SET_OPTIONS /* set options specified in (int)arg */
+ CDROM_CLEAR_OPTIONS /* clear options specified in (int)arg */
+
+One option needs some more explanation: *CDO_USE_FFLAGS*. In the next
+newsection we explain what the need for this option is.
+
+A software package `setcd`, available from the Debian distribution
+and `sunsite.unc.edu`, allows user level control of these flags.
+
+
+The need to know the purpose of opening the CD-ROM device
+=========================================================
+
+Traditionally, Unix devices can be used in two different `modes`,
+either by reading/writing to the device file, or by issuing
+controlling commands to the device, by the device's *ioctl()*
+call. The problem with CD-ROM drives, is that they can be used for
+two entirely different purposes. One is to mount removable
+file systems, CD-ROM's, the other is to play audio CD's. Audio commands
+are implemented entirely through *ioctl()\'s*, presumably because the
+first implementation (SUN?) has been such. In principle there is
+nothing wrong with this, but a good control of the `CD player` demands
+that the device can **always** be opened in order to give the
+*ioctl* commands, regardless of the state the drive is in.
+
+On the other hand, when used as a removable-media disc drive (what the
+original purpose of CD-ROM s is) we would like to make sure that the
+disc drive is ready for operation upon opening the device. In the old
+scheme, some CD-ROM drivers don't do any integrity checking, resulting
+in a number of i/o errors reported by the VFS to the kernel when an
+attempt for mounting a CD-ROM on an empty drive occurs. This is not a
+particularly elegant way to find out that there is no CD-ROM inserted;
+it more-or-less looks like the old IBM-PC trying to read an empty floppy
+drive for a couple of seconds, after which the system complains it
+can't read from it. Nowadays we can **sense** the existence of a
+removable medium in a drive, and we believe we should exploit that
+fact. An integrity check on opening of the device, that verifies the
+availability of a CD-ROM and its correct type (data), would be
+desirable.
+
+These two ways of using a CD-ROM drive, principally for data and
+secondarily for playing audio discs, have different demands for the
+behavior of the *open()* call. Audio use simply wants to open the
+device in order to get a file handle which is needed for issuing
+*ioctl* commands, while data use wants to open for correct and
+reliable data transfer. The only way user programs can indicate what
+their *purpose* of opening the device is, is through the *flags*
+parameter (see `open(2)`). For CD-ROM devices, these flags aren't
+implemented (some drivers implement checking for write-related flags,
+but this is not strictly necessary if the device file has correct
+permission flags). Most option flags simply don't make sense to
+CD-ROM devices: *O_CREAT*, *O_NOCTTY*, *O_TRUNC*, *O_APPEND*, and
+*O_SYNC* have no meaning to a CD-ROM.
+
+We therefore propose to use the flag *O_NONBLOCK* to indicate
+that the device is opened just for issuing *ioctl*
+commands. Strictly, the meaning of *O_NONBLOCK* is that opening and
+subsequent calls to the device don't cause the calling process to
+wait. We could interpret this as don't wait until someone has
+inserted some valid data-CD-ROM. Thus, our proposal of the
+implementation for the *open()* call for CD-ROM s is:
+
+- If no other flags are set than *O_RDONLY*, the device is opened
+ for data transfer, and the return value will be 0 only upon successful
+ initialization of the transfer. The call may even induce some actions
+ on the CD-ROM, such as closing the tray.
+- If the option flag *O_NONBLOCK* is set, opening will always be
+ successful, unless the whole device doesn't exist. The drive will take
+ no actions whatsoever.
+
+And what about standards?
+-------------------------
+
+You might hesitate to accept this proposal as it comes from the
+Linux community, and not from some standardizing institute. What
+about SUN, SGI, HP and all those other Unix and hardware vendors?
+Well, these companies are in the lucky position that they generally
+control both the hardware and software of their supported products,
+and are large enough to set their own standard. They do not have to
+deal with a dozen or more different, competing hardware
+configurations\ [#f3]_.
+
+.. [#f3]
+
+ Incidentally, I think that SUN's approach to mounting CD-ROM s is very
+ good in origin: under Solaris a volume-daemon automatically mounts a
+ newly inserted CD-ROM under `/cdrom/*<volume-name>*`.
+
+ In my opinion they should have pushed this
+ further and have **every** CD-ROM on the local area network be
+ mounted at the similar location, i. e., no matter in which particular
+ machine you insert a CD-ROM, it will always appear at the same
+ position in the directory tree, on every system. When I wanted to
+ implement such a user-program for Linux, I came across the
+ differences in behavior of the various drivers, and the need for an
+ *ioctl* informing about media changes.
+
+We believe that using *O_NONBLOCK* to indicate that a device is being opened
+for *ioctl* commands only can be easily introduced in the Linux
+community. All the CD-player authors will have to be informed, we can
+even send in our own patches to the programs. The use of *O_NONBLOCK*
+has most likely no influence on the behavior of the CD-players on
+other operating systems than Linux. Finally, a user can always revert
+to old behavior by a call to
+*ioctl(file_descriptor, CDROM_CLEAR_OPTIONS, CDO_USE_FFLAGS)*.
+
+The preferred strategy of *open()*
+----------------------------------
+
+The routines in `cdrom.c` are designed in such a way that run-time
+configuration of the behavior of CD-ROM devices (of **any** type)
+can be carried out, by the *CDROM_SET/CLEAR_OPTIONS* *ioctls*. Thus, various
+modes of operation can be set:
+
+`CDO_AUTO_CLOSE | CDO_USE_FFLAGS | CDO_LOCK`
+ This is the default setting. (With *CDO_CHECK_TYPE* it will be better, in
+ the future.) If the device is not yet opened by any other process, and if
+ the device is being opened for data (*O_NONBLOCK* is not set) and the
+ tray is found to be open, an attempt to close the tray is made. Then,
+ it is verified that a disc is in the drive and, if *CDO_CHECK_TYPE* is
+ set, that it contains tracks of type `data mode 1`. Only if all tests
+ are passed is the return value zero. The door is locked to prevent file
+ system corruption. If the drive is opened for audio (*O_NONBLOCK* is
+ set), no actions are taken and a value of 0 will be returned.
+
+`CDO_AUTO_CLOSE | CDO_AUTO_EJECT | CDO_LOCK`
+ This mimics the behavior of the current sbpcd-driver. The option flags are
+ ignored, the tray is closed on the first open, if necessary. Similarly,
+ the tray is opened on the last release, i. e., if a CD-ROM is unmounted,
+ it is automatically ejected, such that the user can replace it.
+
+We hope that these option can convince everybody (both driver
+maintainers and user program developers) to adopt the new CD-ROM
+driver scheme and option flag interpretation.
+
+Description of routines in `cdrom.c`
+====================================
+
+Only a few routines in `cdrom.c` are exported to the drivers. In this
+new section we will discuss these, as well as the functions that `take
+over` the CD-ROM interface to the kernel. The header file belonging
+to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this
+file were placed in the file `ucdrom.h`, but this file has now been
+merged back into `cdrom.h`.
+
+::
+
+ struct file_operations cdrom_fops
+
+The contents of this structure were described in cdrom_api_.
+A pointer to this structure is assigned to the *fops* field
+of the *struct gendisk*.
+
+::
+
+ int register_cdrom(struct cdrom_device_info *cdi)
+
+This function is used in about the same way one registers *cdrom_fops*
+with the kernel, the device operations and information structures,
+as described in cdrom_api_, should be registered with the
+Uniform CD-ROM Driver::
+
+ register_cdrom(&<device>_info);
+
+
+This function returns zero upon success, and non-zero upon
+failure. The structure *<device>_info* should have a pointer to the
+driver's *<device>_dops*, as in::
+
+ struct cdrom_device_info <device>_info = {
+ <device>_dops;
+ ...
+ }
+
+Note that a driver must have one static structure, *<device>_dops*, while
+it may have as many structures *<device>_info* as there are minor devices
+active. *Register_cdrom()* builds a linked list from these.
+
+
+::
+
+ void unregister_cdrom(struct cdrom_device_info *cdi)
+
+Unregistering device *cdi* with minor number *MINOR(cdi->dev)* removes
+the minor device from the list. If it was the last registered minor for
+the low-level driver, this disconnects the registered device-operation
+routines from the CD-ROM interface. This function returns zero upon
+success, and non-zero upon failure.
+
+::
+
+ int cdrom_open(struct inode * ip, struct file * fp)
+
+This function is not called directly by the low-level drivers, it is
+listed in the standard *cdrom_fops*. If the VFS opens a file, this
+function becomes active. A strategy is implemented in this routine,
+taking care of all capabilities and options that are set in the
+*cdrom_device_ops* connected to the device. Then, the program flow is
+transferred to the device_dependent *open()* call.
+
+::
+
+ void cdrom_release(struct inode *ip, struct file *fp)
+
+This function implements the reverse-logic of *cdrom_open()*, and then
+calls the device-dependent *release()* routine. When the use-count has
+reached 0, the allocated buffers are flushed by calls to *sync_dev(dev)*
+and *invalidate_buffers(dev)*.
+
+
+.. _cdrom_ioctl:
+
+::
+
+ int cdrom_ioctl(struct inode *ip, struct file *fp,
+ unsigned int cmd, unsigned long arg)
+
+This function handles all the standard *ioctl* requests for CD-ROM
+devices in a uniform way. The different calls fall into three
+categories: *ioctl()'s* that can be directly implemented by device
+operations, ones that are routed through the call *audio_ioctl()*, and
+the remaining ones, that are presumable device-dependent. Generally, a
+negative return value indicates an error.
+
+Directly implemented *ioctl()'s*
+--------------------------------
+
+The following `old` CD-ROM *ioctl()*\ 's are implemented by directly
+calling device-operations in *cdrom_device_ops*, if implemented and
+not masked:
+
+`CDROMMULTISESSION`
+ Requests the last session on a CD-ROM.
+`CDROMEJECT`
+ Open tray.
+`CDROMCLOSETRAY`
+ Close tray.
+`CDROMEJECT_SW`
+ If *arg\not=0*, set behavior to auto-close (close
+ tray on first open) and auto-eject (eject on last release), otherwise
+ set behavior to non-moving on *open()* and *release()* calls.
+`CDROM_GET_MCN`
+ Get the Media Catalog Number from a CD.
+
+*Ioctl*s routed through *audio_ioctl()*
+---------------------------------------
+
+The following set of *ioctl()'s* are all implemented through a call to
+the *cdrom_fops* function *audio_ioctl()*. Memory checks and
+allocation are performed in *cdrom_ioctl()*, and also sanitization of
+address format (*CDROM_LBA*/*CDROM_MSF*) is done.
+
+`CDROMSUBCHNL`
+ Get sub-channel data in argument *arg* of type
+ `struct cdrom_subchnl *`.
+`CDROMREADTOCHDR`
+ Read Table of Contents header, in *arg* of type
+ `struct cdrom_tochdr *`.
+`CDROMREADTOCENTRY`
+ Read a Table of Contents entry in *arg* and specified by *arg*
+ of type `struct cdrom_tocentry *`.
+`CDROMPLAYMSF`
+ Play audio fragment specified in Minute, Second, Frame format,
+ delimited by *arg* of type `struct cdrom_msf *`.
+`CDROMPLAYTRKIND`
+ Play audio fragment in track-index format delimited by *arg*
+ of type `struct cdrom_ti *`.
+`CDROMVOLCTRL`
+ Set volume specified by *arg* of type `struct cdrom_volctrl *`.
+`CDROMVOLREAD`
+ Read volume into by *arg* of type `struct cdrom_volctrl *`.
+`CDROMSTART`
+ Spin up disc.
+`CDROMSTOP`
+ Stop playback of audio fragment.
+`CDROMPAUSE`
+ Pause playback of audio fragment.
+`CDROMRESUME`
+ Resume playing.
+
+New *ioctl()'s* in `cdrom.c`
+----------------------------
+
+The following *ioctl()'s* have been introduced to allow user programs to
+control the behavior of individual CD-ROM devices. New *ioctl*
+commands can be identified by the underscores in their names.
+
+`CDROM_SET_OPTIONS`
+ Set options specified by *arg*. Returns the option flag register
+ after modification. Use *arg = \rm0* for reading the current flags.
+`CDROM_CLEAR_OPTIONS`
+ Clear options specified by *arg*. Returns the option flag register
+ after modification.
+`CDROM_SELECT_SPEED`
+ Select head-rate speed of disc specified as by *arg* in units
+ of standard cdrom speed (176\,kB/sec raw data or
+ 150kB/sec file system data). The value 0 means `auto-select`,
+ i. e., play audio discs at real time and data discs at maximum speed.
+ The value *arg* is checked against the maximum head rate of the
+ drive found in the *cdrom_dops*.
+`CDROM_SELECT_DISC`
+ Select disc numbered *arg* from a juke-box.
+
+ First disc is numbered 0. The number *arg* is checked against the
+ maximum number of discs in the juke-box found in the *cdrom_dops*.
+`CDROM_MEDIA_CHANGED`
+ Returns 1 if a disc has been changed since the last call.
+ For juke-boxes, an extra argument *arg*
+ specifies the slot for which the information is given. The special
+ value *CDSL_CURRENT* requests that information about the currently
+ selected slot be returned.
+`CDROM_TIMED_MEDIA_CHANGE`
+ Checks whether the disc has been changed since a user supplied time
+ and returns the time of the last disc change.
+
+ *arg* is a pointer to a *cdrom_timed_media_change_info* struct.
+ *arg->last_media_change* may be set by calling code to signal
+ the timestamp of the last known media change (by the caller).
+ Upon successful return, this ioctl call will set
+ *arg->last_media_change* to the latest media change timestamp (in ms)
+ known by the kernel/driver and set *arg->has_changed* to 1 if
+ that timestamp is more recent than the timestamp set by the caller.
+`CDROM_DRIVE_STATUS`
+ Returns the status of the drive by a call to
+ *drive_status()*. Return values are defined in cdrom_drive_status_.
+ Note that this call doesn't return information on the
+ current playing activity of the drive; this can be polled through
+ an *ioctl* call to *CDROMSUBCHNL*. For juke-boxes, an extra argument
+ *arg* specifies the slot for which (possibly limited) information is
+ given. The special value *CDSL_CURRENT* requests that information
+ about the currently selected slot be returned.
+`CDROM_DISC_STATUS`
+ Returns the type of the disc currently in the drive.
+ It should be viewed as a complement to *CDROM_DRIVE_STATUS*.
+ This *ioctl* can provide *some* information about the current
+ disc that is inserted in the drive. This functionality used to be
+ implemented in the low level drivers, but is now carried out
+ entirely in Uniform CD-ROM Driver.
+
+ The history of development of the CD's use as a carrier medium for
+ various digital information has lead to many different disc types.
+ This *ioctl* is useful only in the case that CDs have \emph {only
+ one} type of data on them. While this is often the case, it is
+ also very common for CDs to have some tracks with data, and some
+ tracks with audio. Because this is an existing interface, rather
+ than fixing this interface by changing the assumptions it was made
+ under, thereby breaking all user applications that use this
+ function, the Uniform CD-ROM Driver implements this *ioctl* as
+ follows: If the CD in question has audio tracks on it, and it has
+ absolutely no CD-I, XA, or data tracks on it, it will be reported
+ as *CDS_AUDIO*. If it has both audio and data tracks, it will
+ return *CDS_MIXED*. If there are no audio tracks on the disc, and
+ if the CD in question has any CD-I tracks on it, it will be
+ reported as *CDS_XA_2_2*. Failing that, if the CD in question
+ has any XA tracks on it, it will be reported as *CDS_XA_2_1*.
+ Finally, if the CD in question has any data tracks on it,
+ it will be reported as a data CD (*CDS_DATA_1*).
+
+ This *ioctl* can return::
+
+ CDS_NO_INFO /* no information available */
+ CDS_NO_DISC /* no disc is inserted, or tray is opened */
+ CDS_AUDIO /* Audio disc (2352 audio bytes/frame) */
+ CDS_DATA_1 /* data disc, mode 1 (2048 user bytes/frame) */
+ CDS_XA_2_1 /* mixed data (XA), mode 2, form 1 (2048 user bytes) */
+ CDS_XA_2_2 /* mixed data (XA), mode 2, form 1 (2324 user bytes) */
+ CDS_MIXED /* mixed audio/data disc */
+
+ For some information concerning frame layout of the various disc
+ types, see a recent version of `cdrom.h`.
+
+`CDROM_CHANGER_NSLOTS`
+ Returns the number of slots in a juke-box.
+`CDROMRESET`
+ Reset the drive.
+`CDROM_GET_CAPABILITY`
+ Returns the *capability* flags for the drive. Refer to section
+ cdrom_capabilities_ for more information on these flags.
+`CDROM_LOCKDOOR`
+ Locks the door of the drive. `arg == 0` unlocks the door,
+ any other value locks it.
+`CDROM_DEBUG`
+ Turns on debugging info. Only root is allowed to do this.
+ Same semantics as CDROM_LOCKDOOR.
+
+
+Device dependent *ioctl()'s*
+----------------------------
+
+Finally, all other *ioctl()'s* are passed to the function *dev_ioctl()*,
+if implemented. No memory allocation or verification is carried out.
+
+How to update your driver
+=========================
+
+- Make a backup of your current driver.
+- Get hold of the files `cdrom.c` and `cdrom.h`, they should be in
+ the directory tree that came with this documentation.
+- Make sure you include `cdrom.h`.
+- Change the 3rd argument of *register_blkdev* from `&<your-drive>_fops`
+ to `&cdrom_fops`.
+- Just after that line, add the following to register with the Uniform
+ CD-ROM Driver::
+
+ register_cdrom(&<your-drive>_info);*
+
+ Similarly, add a call to *unregister_cdrom()* at the appropriate place.
+- Copy an example of the device-operations *struct* to your
+ source, e. g., from `cm206.c` *cm206_dops*, and change all
+ entries to names corresponding to your driver, or names you just
+ happen to like. If your driver doesn't support a certain function,
+ make the entry *NULL*. At the entry *capability* you should list all
+ capabilities your driver currently supports. If your driver
+ has a capability that is not listed, please send me a message.
+- Copy the *cdrom_device_info* declaration from the same example
+ driver, and modify the entries according to your needs. If your
+ driver dynamically determines the capabilities of the hardware, this
+ structure should also be declared dynamically.
+- Implement all functions in your `<device>_dops` structure,
+ according to prototypes listed in `cdrom.h`, and specifications given
+ in cdrom_api_. Most likely you have already implemented
+ the code in a large part, and you will almost certainly need to adapt the
+ prototype and return values.
+- Rename your `<device>_ioctl()` function to *audio_ioctl* and
+ change the prototype a little. Remove entries listed in the first
+ part in cdrom_ioctl_, if your code was OK, these are
+ just calls to the routines you adapted in the previous step.
+- You may remove all remaining memory checking code in the
+ *audio_ioctl()* function that deals with audio commands (these are
+ listed in the second part of cdrom_ioctl_. There is no
+ need for memory allocation either, so most *case*s in the *switch*
+ statement look similar to::
+
+ case CDROMREADTOCENTRY:
+ get_toc_entry\bigl((struct cdrom_tocentry *) arg);
+
+- All remaining *ioctl* cases must be moved to a separate
+ function, *<device>_ioctl*, the device-dependent *ioctl()'s*. Note that
+ memory checking and allocation must be kept in this code!
+- Change the prototypes of *<device>_open()* and
+ *<device>_release()*, and remove any strategic code (i. e., tray
+ movement, door locking, etc.).
+- Try to recompile the drivers. We advise you to use modules, both
+ for `cdrom.o` and your driver, as debugging is much easier this
+ way.
+
+Thanks
+======
+
+Thanks to all the people involved. First, Erik Andersen, who has
+taken over the torch in maintaining `cdrom.c` and integrating much
+CD-ROM-related code in the 2.1-kernel. Thanks to Scott Snyder and
+Gerd Knorr, who were the first to implement this interface for SCSI
+and IDE-CD drivers and added many ideas for extension of the data
+structures relative to kernel~2.0. Further thanks to Heiko Eißfeldt,
+Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard Mönkeberg and Andrew Kroll,
+the Linux CD-ROM device driver developers who were kind
+enough to give suggestions and criticisms during the writing. Finally
+of course, I want to thank Linus Torvalds for making this possible in
+the first place.
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
deleted file mode 100644
index f7cd455973f7..000000000000
--- a/Documentation/cdrom/cdrom-standard.tex
+++ /dev/null
@@ -1,1026 +0,0 @@
-\documentclass{article}
-\def\version{$Id: cdrom-standard.tex,v 1.9 1997/12/28 15:42:49 david Exp $}
-\newcommand{\newsection}[1]{\newpage\section{#1}}
-
-\evensidemargin=0pt
-\oddsidemargin=0pt
-\topmargin=-\headheight \advance\topmargin by -\headsep
-\textwidth=15.99cm \textheight=24.62cm % normal A4, 1'' margin
-
-\def\linux{{\sc Linux}}
-\def\cdrom{{\sc cd-rom}}
-\def\UCD{{\sc Uniform cd-rom Driver}}
-\def\cdromc{{\tt {cdrom.c}}}
-\def\cdromh{{\tt {cdrom.h}}}
-\def\fo{\sl} % foreign words
-\def\ie{{\fo i.e.}}
-\def\eg{{\fo e.g.}}
-
-\everymath{\it} \everydisplay{\it}
-\catcode `\_=\active \def_{\_\penalty100 }
-\catcode`\<=\active \def<#1>{{\langle\hbox{\rm#1}\rangle}}
-
-\begin{document}
-\title{A \linux\ \cdrom\ standard}
-\author{David van Leeuwen\\{\normalsize\tt david@ElseWare.cistron.nl}
-\\{\footnotesize updated by Erik Andersen {\tt(andersee@debian.org)}}
-\\{\footnotesize updated by Jens Axboe {\tt(axboe@image.dk)}}}
-\date{12 March 1999}
-
-\maketitle
-
-\newsection{Introduction}
-
-\linux\ is probably the Unix-like operating system that supports
-the widest variety of hardware devices. The reasons for this are
-presumably
-\begin{itemize}
-\item
- The large list of hardware devices available for the many platforms
- that \linux\ now supports (\ie, i386-PCs, Sparc Suns, etc.)
-\item
- The open design of the operating system, such that anybody can write a
- driver for \linux.
-\item
- There is plenty of source code around as examples of how to write a driver.
-\end{itemize}
-The openness of \linux, and the many different types of available
-hardware has allowed \linux\ to support many different hardware devices.
-Unfortunately, the very openness that has allowed \linux\ to support
-all these different devices has also allowed the behavior of each
-device driver to differ significantly from one device to another.
-This divergence of behavior has been very significant for \cdrom\
-devices; the way a particular drive reacts to a `standard' $ioctl()$
-call varies greatly from one device driver to another. To avoid making
-their drivers totally inconsistent, the writers of \linux\ \cdrom\
-drivers generally created new device drivers by understanding, copying,
-and then changing an existing one. Unfortunately, this practice did not
-maintain uniform behavior across all the \linux\ \cdrom\ drivers.
-
-This document describes an effort to establish Uniform behavior across
-all the different \cdrom\ device drivers for \linux. This document also
-defines the various $ioctl$s, and how the low-level \cdrom\ device
-drivers should implement them. Currently (as of the \linux\ 2.1.$x$
-development kernels) several low-level \cdrom\ device drivers, including
-both IDE/ATAPI and SCSI, now use this Uniform interface.
-
-When the \cdrom\ was developed, the interface between the \cdrom\ drive
-and the computer was not specified in the standards. As a result, many
-different \cdrom\ interfaces were developed. Some of them had their
-own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
-manufacturers adopted an existing electrical interface and changed
-the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
-adapted their drives to one or more of the already existing electrical
-interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
-most of the `NoName' manufacturers). In cases where a new drive really
-brought its own interface or used its own command set and flow control
-scheme, either a separate driver had to be written, or an existing
-driver had to be enhanced. History has delivered us \cdrom\ support for
-many of these different interfaces. Nowadays, almost all new \cdrom\
-drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
-manufacturer will create a new interface. Even finding drives for the
-old proprietary interfaces is getting difficult.
-
-When (in the 1.3.70's) I looked at the existing software interface,
-which was expressed through \cdromh, it appeared to be a rather wild
-set of commands and data formats.\footnote{I cannot recollect what
-kernel version I looked at, then, presumably 1.2.13 and 1.3.34---the
-latest kernel that I was indirectly involved in.} It seemed that many
-features of the software interface had been added to accommodate the
-capabilities of a particular drive, in an {\fo ad hoc\/} manner. More
-importantly, it appeared that the behavior of the `standard' commands
-was different for most of the different drivers: \eg, some drivers
-close the tray if an $open()$ call occurs when the tray is open, while
-others do not. Some drivers lock the door upon opening the device, to
-prevent an incoherent file system, but others don't, to allow software
-ejection. Undoubtedly, the capabilities of the different drives vary,
-but even when two drives have the same capability their drivers'
-behavior was usually different.
-
-I decided to start a discussion on how to make all the \linux\ \cdrom\
-drivers behave more uniformly. I began by contacting the developers of
-the many \cdrom\ drivers found in the \linux\ kernel. Their reactions
-encouraged me to write the \UCD\ which this document is intended to
-describe. The implementation of the \UCD\ is in the file \cdromc. This
-driver is intended to be an additional software layer that sits on top
-of the low-level device drivers for each \cdrom\ drive. By adding this
-additional layer, it is possible to have all the different \cdrom\
-devices behave {\em exactly\/} the same (insofar as the underlying
-hardware will allow).
-
-The goal of the \UCD\ is {\em not\/} to alienate driver developers who
-have not yet taken steps to support this effort. The goal of \UCD\ is
-simply to give people writing application programs for \cdrom\ drives
-{\em one\/} \linux\ \cdrom\ interface with consistent behavior for all
-\cdrom\ devices. In addition, this also provides a consistent interface
-between the low-level device driver code and the \linux\ kernel. Care
-is taken that 100\,\% compatibility exists with the data structures and
-programmer's interface defined in \cdromh. This guide was written to
-help \cdrom\ driver developers adapt their code to use the \UCD\ code
-defined in \cdromc.
-
-Personally, I think that the most important hardware interfaces are
-the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
-of hardware drop continuously, it is also likely that people may have
-more than one \cdrom\ drive, possibly of mixed types. It is important
-that these drives behave in the same way. In December 1994, one of the
-cheapest \cdrom\ drives was a Philips cm206, a double-speed proprietary
-drive. In the months that I was busy writing a \linux\ driver for it,
-proprietary drives became obsolete and IDE/ATAPI drives became the
-standard. At the time of the last update to this document (November
-1997) it is becoming difficult to even {\em find} anything less than a
-16 speed \cdrom\ drive, and 24 speed drives are common.
-
-\newsection{Standardizing through another software level}
-\label{cdrom.c}
-
-At the time this document was conceived, all drivers directly
-implemented the \cdrom\ $ioctl()$ calls through their own routines. This
-led to the danger of different drivers forgetting to do important things
-like checking that the user was giving the driver valid data. More
-importantly, this led to the divergence of behavior, which has already
-been discussed.
-
-For this reason, the \UCD\ was created to enforce consistent \cdrom\
-drive behavior, and to provide a common set of services to the various
-low-level \cdrom\ device drivers. The \UCD\ now provides another
-software-level, that separates the $ioctl()$ and $open()$ implementation
-from the actual hardware implementation. Note that this effort has
-made few changes which will affect a user's application programs. The
-greatest change involved moving the contents of the various low-level
-\cdrom\ drivers' header files to the kernel's cdrom directory. This was
-done to help ensure that the user is only presented with only one cdrom
-interface, the interface defined in \cdromh.
-
-\cdrom\ drives are specific enough (\ie, different from other
-block-devices such as floppy or hard disc drives), to define a set
-of common {\em \cdrom\ device operations}, $<cdrom-device>_dops$.
-These operations are different from the classical block-device file
-operations, $<block-device>_fops$.
-
-The routines for the \UCD\ interface level are implemented in the file
-\cdromc. In this file, the \UCD\ interfaces with the kernel as a block
-device by registering the following general $struct\ file_operations$:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-struct& file_operations\ cdrom_fops = \{\hidewidth\cr
- &NULL, & lseek \cr
- &block_read, & read---general block-dev read \cr
- &block_write, & write---general block-dev write \cr
- &NULL, & readdir \cr
- &NULL, & select \cr
- &cdrom_ioctl, & ioctl \cr
- &NULL, & mmap \cr
- &cdrom_open, & open \cr
- &cdrom_release, & release \cr
- &NULL, & fsync \cr
- &NULL, & fasync \cr
- &cdrom_media_changed, & media change \cr
- &NULL & revalidate \cr
-\};\cr
-}
-$$
-
-Every active \cdrom\ device shares this $struct$. The routines
-declared above are all implemented in \cdromc, since this file is the
-place where the behavior of all \cdrom-devices is defined and
-standardized. The actual interface to the various types of \cdrom\
-hardware is still performed by various low-level \cdrom-device
-drivers. These routines simply implement certain {\em capabilities\/}
-that are common to all \cdrom\ (and really, all removable-media
-devices).
-
-Registration of a low-level \cdrom\ device driver is now done through
-the general routines in \cdromc, not through the Virtual File System
-(VFS) any more. The interface implemented in \cdromc\ is carried out
-through two general structures that contain information about the
-capabilities of the driver, and the specific drives on which the
-driver operates. The structures are:
-\begin{description}
-\item[$cdrom_device_ops$]
- This structure contains information about the low-level driver for a
- \cdrom\ device. This structure is conceptually connected to the major
- number of the device (although some drivers may have different
- major numbers, as is the case for the IDE driver).
-\item[$cdrom_device_info$]
- This structure contains information about a particular \cdrom\ drive,
- such as its device name, speed, etc. This structure is conceptually
- connected to the minor number of the device.
-\end{description}
-
-Registering a particular \cdrom\ drive with the \UCD\ is done by the
-low-level device driver though a call to:
-$$register_cdrom(struct\ cdrom_device_info * <device>_info)
-$$
-The device information structure, $<device>_info$, contains all the
-information needed for the kernel to interface with the low-level
-\cdrom\ device driver. One of the most important entries in this
-structure is a pointer to the $cdrom_device_ops$ structure of the
-low-level driver.
-
-The device operations structure, $cdrom_device_ops$, contains a list
-of pointers to the functions which are implemented in the low-level
-device driver. When \cdromc\ accesses a \cdrom\ device, it does it
-through the functions in this structure. It is impossible to know all
-the capabilities of future \cdrom\ drives, so it is expected that this
-list may need to be expanded from time to time as new technologies are
-developed. For example, CD-R and CD-R/W drives are beginning to become
-popular, and support will soon need to be added for them. For now, the
-current $struct$ is:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
- $/*$ \rm# $*/$\hfil\cr
-struct& cdrom_device_ops\ \{ \hidewidth\cr
- &int& (* open)(struct\ cdrom_device_info *, int)\cr
- &void& (* release)(struct\ cdrom_device_info *);\cr
- &int& (* drive_status)(struct\ cdrom_device_info *, int);\cr
- &unsigned\ int& (* check_events)(struct\ cdrom_device_info *, unsigned\ int, int);\cr
- &int& (* media_changed)(struct\ cdrom_device_info *, int);\cr
- &int& (* tray_move)(struct\ cdrom_device_info *, int);\cr
- &int& (* lock_door)(struct\ cdrom_device_info *, int);\cr
- &int& (* select_speed)(struct\ cdrom_device_info *, int);\cr
- &int& (* select_disc)(struct\ cdrom_device_info *, int);\cr
- &int& (* get_last_session) (struct\ cdrom_device_info *,
- struct\ cdrom_multisession *{});\cr
- &int& (* get_mcn)(struct\ cdrom_device_info *, struct\ cdrom_mcn *{});\cr
- &int& (* reset)(struct\ cdrom_device_info *);\cr
- &int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
- void *{});\cr
-\noalign{\medskip}
- &const\ int& capability;& capability flags \cr
- &int& (* generic_packet)(struct\ cdrom_device_info *, struct\ packet_command *{});\cr
-\};\cr
-}
-$$
-When a low-level device driver implements one of these capabilities,
-it should add a function pointer to this $struct$. When a particular
-function is not implemented, however, this $struct$ should contain a
-NULL instead. The $capability$ flags specify the capabilities of the
-\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive
-is registered with the \UCD.
-
-Note that most functions have fewer parameters than their
-$blkdev_fops$ counterparts. This is because very little of the
-information in the structures $inode$ and $file$ is used. For most
-drivers, the main parameter is the $struct$ $cdrom_device_info$, from
-which the major and minor number can be extracted. (Most low-level
-\cdrom\ drivers don't even look at the major and minor number though,
-since many of them only support one device.) This will be available
-through $dev$ in $cdrom_device_info$ described below.
-
-The drive-specific, minor-like information that is registered with
-\cdromc, currently contains the following fields:
-$$
-\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
- $/*$ \rm# $*/$\hfil\cr
-struct& cdrom_device_info\ \{ \hidewidth\cr
- & const\ struct\ cdrom_device_ops *& ops;& device operations for this major\cr
- & struct\ list_head& list;& linked list of all device_info\cr
- & struct\ gendisk *& disk;& matching block layer disk\cr
- & void *& handle;& driver-dependent data\cr
-\noalign{\medskip}
- & int& mask;& mask of capability: disables them \cr
- & int& speed;& maximum speed for reading data \cr
- & int& capacity;& number of discs in a jukebox \cr
-\noalign{\medskip}
- &unsigned\ int& options : 30;& options flags \cr
- &unsigned& mc_flags : 2;& media-change buffer flags \cr
- &unsigned\ int& vfs_events;& cached events for vfs path\cr
- &unsigned\ int& ioctl_events;& cached events for ioctl path\cr
- & int& use_count;& number of times device is opened\cr
- & char& name[20];& name of the device type\cr
-\noalign{\medskip}
- &__u8& sanyo_slot : 2;& Sanyo 3-CD changer support\cr
- &__u8& keeplocked : 1;& CDROM_LOCKDOOR status\cr
- &__u8& reserved : 5;& not used yet\cr
- & int& cdda_method;& see CDDA_* flags\cr
- &__u8& last_sense;& saves last sense key\cr
- &__u8& media_written;& dirty flag, DVD+RW bookkeeping\cr
- &unsigned\ short& mmc3_profile;& current MMC3 profile\cr
- & int& for_data;& unknown:TBD\cr
- & int\ (* exit)\ (struct\ cdrom_device_info *);&& unknown:TBD\cr
- & int& mrw_mode_page;& which MRW mode page is in use\cr
-\}\cr
-}$$
-Using this $struct$, a linked list of the registered minor devices is
-built, using the $next$ field. The device number, the device operations
-struct and specifications of properties of the drive are stored in this
-structure.
-
-The $mask$ flags can be used to mask out some of the capabilities listed
-in $ops\to capability$, if a specific drive doesn't support a feature
-of the driver. The value $speed$ specifies the maximum head-rate of the
-drive, measured in units of normal audio speed (176\,kB/sec raw data or
-150\,kB/sec file system data). The parameters are declared $const$
-because they describe properties of the drive, which don't change after
-registration.
-
-A few registers contain variables local to the \cdrom\ drive. The
-flags $options$ are used to specify how the general \cdrom\ routines
-should behave. These various flags registers should provide enough
-flexibility to adapt to the different users' wishes (and {\em not\/} the
-`arbitrary' wishes of the author of the low-level device driver, as is
-the case in the old scheme). The register $mc_flags$ is used to buffer
-the information from $media_changed()$ to two separate queues. Other
-data that is specific to a minor drive, can be accessed through $handle$,
-which can point to a data structure specific to the low-level driver.
-The fields $use_count$, $next$, $options$ and $mc_flags$ need not be
-initialized.
-
-The intermediate software layer that \cdromc\ forms will perform some
-additional bookkeeping. The use count of the device (the number of
-processes that have the device opened) is registered in $use_count$. The
-function $cdrom_ioctl()$ will verify the appropriate user-memory regions
-for read and write, and in case a location on the CD is transferred,
-it will `sanitize' the format by making requests to the low-level
-drivers in a standard format, and translating all formats between the
-user-software and low level drivers. This relieves much of the drivers'
-memory checking and format checking and translation. Also, the necessary
-structures will be declared on the program stack.
-
-The implementation of the functions should be as defined in the
-following sections. Two functions {\em must\/} be implemented, namely
-$open()$ and $release()$. Other functions may be omitted, their
-corresponding capability flags will be cleared upon registration.
-Generally, a function returns zero on success and negative on error. A
-function call should return only after the command has completed, but of
-course waiting for the device should not use processor time.
-
-\subsection{$Int\ open(struct\ cdrom_device_info * cdi, int\ purpose)$}
-
-$Open()$ should try to open the device for a specific $purpose$, which
-can be either:
-\begin{itemize}
-\item[0] Open for reading data, as done by {\tt {mount()}} (2), or the
-user commands {\tt {dd}} or {\tt {cat}}.
-\item[1] Open for $ioctl$ commands, as done by audio-CD playing
-programs.
-\end{itemize}
-Notice that any strategic code (closing tray upon $open()$, etc.)\ is
-done by the calling routine in \cdromc, so the low-level routine
-should only be concerned with proper initialization, such as spinning
-up the disc, etc. % and device-use count
-
-
-\subsection{$Void\ release(struct\ cdrom_device_info * cdi)$}
-
-
-Device-specific actions should be taken such as spinning down the device.
-However, strategic actions such as ejection of the tray, or unlocking
-the door, should be left over to the general routine $cdrom_release()$.
-This is the only function returning type $void$.
-
-\subsection{$Int\ drive_status(struct\ cdrom_device_info * cdi, int\ slot_nr)$}
-\label{drive status}
-
-The function $drive_status$, if implemented, should provide
-information on the status of the drive (not the status of the disc,
-which may or may not be in the drive). If the drive is not a changer,
-$slot_nr$ should be ignored. In \cdromh\ the possibilities are listed:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDS_NO_INFO& no information available\cr
-CDS_NO_DISC& no disc is inserted, tray is closed\cr
-CDS_TRAY_OPEN& tray is opened\cr
-CDS_DRIVE_NOT_READY& something is wrong, tray is moving?\cr
-CDS_DISC_OK& a disc is loaded and everything is fine\cr
-}
-$$
-
-\subsection{$Int\ media_changed(struct\ cdrom_device_info * cdi, int\ disc_nr)$}
-
-This function is very similar to the original function in $struct\
-file_operations$. It returns 1 if the medium of the device $cdi\to
-dev$ has changed since the last call, and 0 otherwise. The parameter
-$disc_nr$ identifies a specific slot in a juke-box, it should be
-ignored for single-disc drives. Note that by `re-routing' this
-function through $cdrom_media_changed()$, we can implement separate
-queues for the VFS and a new $ioctl()$ function that can report device
-changes to software (\eg, an auto-mounting daemon).
-
-\subsection{$Int\ tray_move(struct\ cdrom_device_info * cdi, int\ position)$}
-
-This function, if implemented, should control the tray movement. (No
-other function should control this.) The parameter $position$ controls
-the desired direction of movement:
-\begin{itemize}
-\item[0] Close tray
-\item[1] Open tray
-\end{itemize}
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the tray is already in the desired position, no
-action need be taken, and the return value should be 0.
-
-\subsection{$Int\ lock_door(struct\ cdrom_device_info * cdi, int\ lock)$}
-
-This function (and no other code) controls locking of the door, if the
-drive allows this. The value of $lock$ controls the desired locking
-state:
-\begin{itemize}
-\item[0] Unlock door, manual opening is allowed
-\item[1] Lock door, tray cannot be ejected manually
-\end{itemize}
-This function returns 0 upon success, and a non-zero value upon
-error. Note that if the door is already in the requested state, no
-action need be taken, and the return value should be 0.
-
-\subsection{$Int\ select_speed(struct\ cdrom_device_info * cdi, int\ speed)$}
-
-Some \cdrom\ drives are capable of changing their head-speed. There
-are several reasons for changing the speed of a \cdrom\ drive. Badly
-pressed \cdrom s may benefit from less-than-maximum head rate. Modern
-\cdrom\ drives can obtain very high head rates (up to $24\times$ is
-common). It has been reported that these drives can make reading
-errors at these high speeds, reducing the speed can prevent data loss
-in these circumstances. Finally, some of these drives can
-make an annoyingly loud noise, which a lower speed may reduce. %Finally,
-%although the audio-low-pass filters probably aren't designed for it,
-%more than real-time playback of audio might be used for high-speed
-%copying of audio tracks.
-
-This function specifies the speed at which data is read or audio is
-played back. The value of $speed$ specifies the head-speed of the
-drive, measured in units of standard cdrom speed (176\,kB/sec raw data
-or 150\,kB/sec file system data). So to request that a \cdrom\ drive
-operate at 300\,kB/sec you would call the CDROM_SELECT_SPEED $ioctl$
-with $speed=2$. The special value `0' means `auto-selection', \ie,
-maximum data-rate or real-time audio rate. If the drive doesn't have
-this `auto-selection' capability, the decision should be made on the
-current disc loaded and the return value should be positive. A negative
-return value indicates an error.
-
-\subsection{$Int\ select_disc(struct\ cdrom_device_info * cdi, int\ number)$}
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
-\subsection{$Int\ get_last_session(struct\ cdrom_device_info * cdi, struct\
- cdrom_multisession * ms_info)$}
-
-This function should implement the old corresponding $ioctl()$. For
-device $cdi\to dev$, the start of the last session of the current disc
-should be returned in the pointer argument $ms_info$. Note that
-routines in \cdromc\ have sanitized this argument: its requested
-format will {\em always\/} be of the type $CDROM_LBA$ (linear block
-addressing mode), whatever the calling software requested. But
-sanitization goes even further: the low-level implementation may
-return the requested information in $CDROM_MSF$ format if it wishes so
-(setting the $ms_info\rightarrow addr_format$ field appropriately, of
-course) and the routines in \cdromc\ will make the transformation if
-necessary. The return value is 0 upon success.
-
-\subsection{$Int\ get_mcn(struct\ cdrom_device_info * cdi, struct\
- cdrom_mcn * mcn)$}
-
-Some discs carry a `Media Catalog Number' (MCN), also called
-`Universal Product Code' (UPC). This number should reflect the number
-that is generally found in the bar-code on the product. Unfortunately,
-the few discs that carry such a number on the disc don't even use the
-same format. The return argument to this function is a pointer to a
-pre-declared memory region of type $struct\ cdrom_mcn$. The MCN is
-expected as a 13-character string, terminated by a null-character.
-
-\subsection{$Int\ reset(struct\ cdrom_device_info * cdi)$}
-
-This call should perform a hard-reset on the drive (although in
-circumstances that a hard-reset is necessary, a drive may very well not
-listen to commands anymore). Preferably, control is returned to the
-caller only after the drive has finished resetting. If the drive is no
-longer listening, it may be wise for the underlying low-level cdrom
-driver to time out.
-
-\subsection{$Int\ audio_ioctl(struct\ cdrom_device_info * cdi, unsigned\
- int\ cmd, void * arg)$}
-
-Some of the \cdrom-$ioctl$s defined in \cdromh\ can be
-implemented by the routines described above, and hence the function
-$cdrom_ioctl$ will use those. However, most $ioctl$s deal with
-audio-control. We have decided to leave these to be accessed through a
-single function, repeating the arguments $cmd$ and $arg$. Note that
-the latter is of type $void*{}$, rather than $unsigned\ long\
-int$. The routine $cdrom_ioctl()$ does do some useful things,
-though. It sanitizes the address format type to $CDROM_MSF$ (Minutes,
-Seconds, Frames) for all audio calls. It also verifies the memory
-location of $arg$, and reserves stack-memory for the argument. This
-makes implementation of the $audio_ioctl()$ much simpler than in the
-old driver scheme. For example, you may look up the function
-$cm206_audio_ioctl()$ in {\tt {cm206.c}} that should be updated with
-this documentation.
-
-An unimplemented ioctl should return $-ENOSYS$, but a harmless request
-(\eg, $CDROMSTART$) may be ignored by returning 0 (success). Other
-errors should be according to the standards, whatever they are. When
-an error is returned by the low-level driver, the \UCD\ tries whenever
-possible to return the error code to the calling program. (We may decide
-to sanitize the return value in $cdrom_ioctl()$ though, in order to
-guarantee a uniform interface to the audio-player software.)
-
-\subsection{$Int\ dev_ioctl(struct\ cdrom_device_info * cdi, unsigned\ int\
- cmd, unsigned\ long\ arg)$}
-
-Some $ioctl$s seem to be specific to certain \cdrom\ drives. That is,
-they are introduced to service some capabilities of certain drives. In
-fact, there are 6 different $ioctl$s for reading data, either in some
-particular kind of format, or audio data. Not many drives support
-reading audio tracks as data, I believe this is because of protection
-of copyrights of artists. Moreover, I think that if audio-tracks are
-supported, it should be done through the VFS and not via $ioctl$s. A
-problem here could be the fact that audio-frames are 2352 bytes long,
-so either the audio-file-system should ask for 75264 bytes at once
-(the least common multiple of 512 and 2352), or the drivers should
-bend their backs to cope with this incoherence (to which I would be
-opposed). Furthermore, it is very difficult for the hardware to find
-the exact frame boundaries, since there are no synchronization headers
-in audio frames. Once these issues are resolved, this code should be
-standardized in \cdromc.
-
-Because there are so many $ioctl$s that seem to be introduced to
-satisfy certain drivers,\footnote{Is there software around that
- actually uses these? I'd be interested!} any `non-standard' $ioctl$s
-are routed through the call $dev_ioctl()$. In principle, `private'
-$ioctl$s should be numbered after the device's major number, and not
-the general \cdrom\ $ioctl$ number, {\tt {0x53}}. Currently the
-non-supported $ioctl$s are: {\it CDROMREADMODE1, CDROMREADMODE2,
- CDROMREADAUDIO, CDROMREADRAW, CDROMREADCOOKED, CDROMSEEK,
- CDROMPLAY\-BLK and CDROM\-READALL}.
-
-
-\subsection{\cdrom\ capabilities}
-\label{capability}
-
-Instead of just implementing some $ioctl$ calls, the interface in
-\cdromc\ supplies the possibility to indicate the {\em capabilities\/}
-of a \cdrom\ drive. This can be done by ORing any number of
-capability-constants that are defined in \cdromh\ at the registration
-phase. Currently, the capabilities are any of:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDC_CLOSE_TRAY& can close tray by software control\cr
-CDC_OPEN_TRAY& can open tray\cr
-CDC_LOCK& can lock and unlock the door\cr
-CDC_SELECT_SPEED& can select speed, in units of $\sim$150\,kB/s\cr
-CDC_SELECT_DISC& drive is juke-box\cr
-CDC_MULTI_SESSION& can read sessions $>\rm1$\cr
-CDC_MCN& can read Media Catalog Number\cr
-CDC_MEDIA_CHANGED& can report if disc has changed\cr
-CDC_PLAY_AUDIO& can perform audio-functions (play, pause, etc)\cr
-CDC_RESET& hard reset device\cr
-CDC_IOCTLS& driver has non-standard ioctls\cr
-CDC_DRIVE_STATUS& driver implements drive status\cr
-}
-$$
-The capability flag is declared $const$, to prevent drivers from
-accidentally tampering with the contents. The capability fags actually
-inform \cdromc\ of what the driver can do. If the drive found
-by the driver does not have the capability, is can be masked out by
-the $cdrom_device_info$ variable $mask$. For instance, the SCSI \cdrom\
-driver has implemented the code for loading and ejecting \cdrom's, and
-hence its corresponding flags in $capability$ will be set. But a SCSI
-\cdrom\ drive might be a caddy system, which can't load the tray, and
-hence for this drive the $cdrom_device_info$ struct will have set
-the $CDC_CLOSE_TRAY$ bit in $mask$.
-
-In the file \cdromc\ you will encounter many constructions of the type
-$$\it
-if\ (cdo\rightarrow capability \mathrel\& \mathord{\sim} cdi\rightarrow mask
- \mathrel{\&} CDC_<capability>) \ldots
-$$
-There is no $ioctl$ to set the mask\dots The reason is that
-I think it is better to control the {\em behavior\/} rather than the
-{\em capabilities}.
-
-\subsection{Options}
-
-A final flag register controls the {\em behavior\/} of the \cdrom\
-drives, in order to satisfy different users' wishes, hopefully
-independently of the ideas of the respective author who happened to
-have made the drive's support available to the \linux\ community. The
-current behavior options are:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDO_AUTO_CLOSE& try to close tray upon device $open()$\cr
-CDO_AUTO_EJECT& try to open tray on last device $close()$\cr
-CDO_USE_FFLAGS& use $file_pointer\rightarrow f_flags$ to indicate
- purpose for $open()$\cr
-CDO_LOCK& try to lock door if device is opened\cr
-CDO_CHECK_TYPE& ensure disc type is data if opened for data\cr
-}
-$$
-
-The initial value of this register is $CDO_AUTO_CLOSE \mathrel|
-CDO_USE_FFLAGS \mathrel| CDO_LOCK$, reflecting my own view on user
-interface and software standards. Before you protest, there are two
-new $ioctl$s implemented in \cdromc, that allow you to control the
-behavior by software. These are:
-$$
-\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
-CDROM_SET_OPTIONS& set options specified in $(int)\ arg$\cr
-CDROM_CLEAR_OPTIONS& clear options specified in $(int)\ arg$\cr
-}
-$$
-One option needs some more explanation: $CDO_USE_FFLAGS$. In the next
-newsection we explain what the need for this option is.
-
-A software package {\tt setcd}, available from the Debian distribution
-and {\tt sunsite.unc.edu}, allows user level control of these flags.
-
-\newsection{The need to know the purpose of opening the \cdrom\ device}
-
-Traditionally, Unix devices can be used in two different `modes',
-either by reading/writing to the device file, or by issuing
-controlling commands to the device, by the device's $ioctl()$
-call. The problem with \cdrom\ drives, is that they can be used for
-two entirely different purposes. One is to mount removable
-file systems, \cdrom s, the other is to play audio CD's. Audio commands
-are implemented entirely through $ioctl$s, presumably because the
-first implementation (SUN?) has been such. In principle there is
-nothing wrong with this, but a good control of the `CD player' demands
-that the device can {\em always\/} be opened in order to give the
-$ioctl$ commands, regardless of the state the drive is in.
-
-On the other hand, when used as a removable-media disc drive (what the
-original purpose of \cdrom s is) we would like to make sure that the
-disc drive is ready for operation upon opening the device. In the old
-scheme, some \cdrom\ drivers don't do any integrity checking, resulting
-in a number of i/o errors reported by the VFS to the kernel when an
-attempt for mounting a \cdrom\ on an empty drive occurs. This is not a
-particularly elegant way to find out that there is no \cdrom\ inserted;
-it more-or-less looks like the old IBM-PC trying to read an empty floppy
-drive for a couple of seconds, after which the system complains it
-can't read from it. Nowadays we can {\em sense\/} the existence of a
-removable medium in a drive, and we believe we should exploit that
-fact. An integrity check on opening of the device, that verifies the
-availability of a \cdrom\ and its correct type (data), would be
-desirable.
-
-These two ways of using a \cdrom\ drive, principally for data and
-secondarily for playing audio discs, have different demands for the
-behavior of the $open()$ call. Audio use simply wants to open the
-device in order to get a file handle which is needed for issuing
-$ioctl$ commands, while data use wants to open for correct and
-reliable data transfer. The only way user programs can indicate what
-their {\em purpose\/} of opening the device is, is through the $flags$
-parameter (see {\tt {open(2)}}). For \cdrom\ devices, these flags aren't
-implemented (some drivers implement checking for write-related flags,
-but this is not strictly necessary if the device file has correct
-permission flags). Most option flags simply don't make sense to
-\cdrom\ devices: $O_CREAT$, $O_NOCTTY$, $O_TRUNC$, $O_APPEND$, and
-$O_SYNC$ have no meaning to a \cdrom.
-
-We therefore propose to use the flag $O_NONBLOCK$ to indicate
-that the device is opened just for issuing $ioctl$
-commands. Strictly, the meaning of $O_NONBLOCK$ is that opening and
-subsequent calls to the device don't cause the calling process to
-wait. We could interpret this as ``don't wait until someone has
-inserted some valid data-\cdrom.'' Thus, our proposal of the
-implementation for the $open()$ call for \cdrom s is:
-\begin{itemize}
-\item If no other flags are set than $O_RDONLY$, the device is opened
-for data transfer, and the return value will be 0 only upon successful
-initialization of the transfer. The call may even induce some actions
-on the \cdrom, such as closing the tray.
-\item If the option flag $O_NONBLOCK$ is set, opening will always be
-successful, unless the whole device doesn't exist. The drive will take
-no actions whatsoever.
-\end{itemize}
-
-\subsection{And what about standards?}
-
-You might hesitate to accept this proposal as it comes from the
-\linux\ community, and not from some standardizing institute. What
-about SUN, SGI, HP and all those other Unix and hardware vendors?
-Well, these companies are in the lucky position that they generally
-control both the hardware and software of their supported products,
-and are large enough to set their own standard. They do not have to
-deal with a dozen or more different, competing hardware
-configurations.\footnote{Incidentally, I think that SUN's approach to
-mounting \cdrom s is very good in origin: under Solaris a
-volume-daemon automatically mounts a newly inserted \cdrom\ under {\tt
-{/cdrom/$<volume-name>$/}}. In my opinion they should have pushed this
-further and have {\em every\/} \cdrom\ on the local area network be
-mounted at the similar location, \ie, no matter in which particular
-machine you insert a \cdrom, it will always appear at the same
-position in the directory tree, on every system. When I wanted to
-implement such a user-program for \linux, I came across the
-differences in behavior of the various drivers, and the need for an
-$ioctl$ informing about media changes.}
-
-We believe that using $O_NONBLOCK$ to indicate that a device is being opened
-for $ioctl$ commands only can be easily introduced in the \linux\
-community. All the CD-player authors will have to be informed, we can
-even send in our own patches to the programs. The use of $O_NONBLOCK$
-has most likely no influence on the behavior of the CD-players on
-other operating systems than \linux. Finally, a user can always revert
-to old behavior by a call to $ioctl(file_descriptor, CDROM_CLEAR_OPTIONS,
-CDO_USE_FFLAGS)$.
-
-\subsection{The preferred strategy of $open()$}
-
-The routines in \cdromc\ are designed in such a way that run-time
-configuration of the behavior of \cdrom\ devices (of {\em any\/} type)
-can be carried out, by the $CDROM_SET/CLEAR_OPTIONS$ $ioctls$. Thus, various
-modes of operation can be set:
-\begin{description}
-\item[$CDO_AUTO_CLOSE \mathrel| CDO_USE_FFLAGS \mathrel| CDO_LOCK$] This
-is the default setting. (With $CDO_CHECK_TYPE$ it will be better, in the
-future.) If the device is not yet opened by any other process, and if
-the device is being opened for data ($O_NONBLOCK$ is not set) and the
-tray is found to be open, an attempt to close the tray is made. Then,
-it is verified that a disc is in the drive and, if $CDO_CHECK_TYPE$ is
-set, that it contains tracks of type `data mode 1.' Only if all tests
-are passed is the return value zero. The door is locked to prevent file
-system corruption. If the drive is opened for audio ($O_NONBLOCK$ is
-set), no actions are taken and a value of 0 will be returned.
-\item[$CDO_AUTO_CLOSE \mathrel| CDO_AUTO_EJECT \mathrel| CDO_LOCK$] This
-mimics the behavior of the current sbpcd-driver. The option flags are
-ignored, the tray is closed on the first open, if necessary. Similarly,
-the tray is opened on the last release, \ie, if a \cdrom\ is unmounted,
-it is automatically ejected, such that the user can replace it.
-\end{description}
-We hope that these option can convince everybody (both driver
-maintainers and user program developers) to adopt the new \cdrom\
-driver scheme and option flag interpretation.
-
-\newsection{Description of routines in \cdromc}
-
-Only a few routines in \cdromc\ are exported to the drivers. In this
-new section we will discuss these, as well as the functions that `take
-over' the \cdrom\ interface to the kernel. The header file belonging
-to \cdromc\ is called \cdromh. Formerly, some of the contents of this
-file were placed in the file {\tt {ucdrom.h}}, but this file has now been
-merged back into \cdromh.
-
-\subsection{$Struct\ file_operations\ cdrom_fops$}
-
-The contents of this structure were described in section~\ref{cdrom.c}.
-A pointer to this structure is assigned to the $fops$ field
-of the $struct gendisk$.
-
-\subsection{$Int\ register_cdrom( struct\ cdrom_device_info\ * cdi)$}
-
-This function is used in about the same way one registers $cdrom_fops$
-with the kernel, the device operations and information structures,
-as described in section~\ref{cdrom.c}, should be registered with the
-\UCD:
-$$
-register_cdrom(\&<device>_info));
-$$
-This function returns zero upon success, and non-zero upon
-failure. The structure $<device>_info$ should have a pointer to the
-driver's $<device>_dops$, as in
-$$
-\vbox{\halign{&$#$\hfil\cr
-struct\ &cdrom_device_info\ <device>_info = \{\cr
-& <device>_dops;\cr
-&\ldots\cr
-\}\cr
-}}$$
-Note that a driver must have one static structure, $<device>_dops$, while
-it may have as many structures $<device>_info$ as there are minor devices
-active. $Register_cdrom()$ builds a linked list from these.
-
-\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$}
-
-Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes
-the minor device from the list. If it was the last registered minor for
-the low-level driver, this disconnects the registered device-operation
-routines from the \cdrom\ interface. This function returns zero upon
-success, and non-zero upon failure.
-
-\subsection{$Int\ cdrom_open(struct\ inode * ip, struct\ file * fp)$}
-
-This function is not called directly by the low-level drivers, it is
-listed in the standard $cdrom_fops$. If the VFS opens a file, this
-function becomes active. A strategy is implemented in this routine,
-taking care of all capabilities and options that are set in the
-$cdrom_device_ops$ connected to the device. Then, the program flow is
-transferred to the device_dependent $open()$ call.
-
-\subsection{$Void\ cdrom_release(struct\ inode *ip, struct\ file
-*fp)$}
-
-This function implements the reverse-logic of $cdrom_open()$, and then
-calls the device-dependent $release()$ routine. When the use-count has
-reached 0, the allocated buffers are flushed by calls to $sync_dev(dev)$
-and $invalidate_buffers(dev)$.
-
-
-\subsection{$Int\ cdrom_ioctl(struct\ inode *ip, struct\ file *fp,
-unsigned\ int\ cmd, unsigned\ long\ arg)$}
-\label{cdrom-ioctl}
-
-This function handles all the standard $ioctl$ requests for \cdrom\
-devices in a uniform way. The different calls fall into three
-categories: $ioctl$s that can be directly implemented by device
-operations, ones that are routed through the call $audio_ioctl()$, and
-the remaining ones, that are presumable device-dependent. Generally, a
-negative return value indicates an error.
-
-\subsubsection{Directly implemented $ioctl$s}
-\label{ioctl-direct}
-
-The following `old' \cdrom-$ioctl$s are implemented by directly
-calling device-operations in $cdrom_device_ops$, if implemented and
-not masked:
-\begin{description}
-\item[CDROMMULTISESSION] Requests the last session on a \cdrom.
-\item[CDROMEJECT] Open tray.
-\item[CDROMCLOSETRAY] Close tray.
-\item[CDROMEJECT_SW] If $arg\not=0$, set behavior to auto-close (close
-tray on first open) and auto-eject (eject on last release), otherwise
-set behavior to non-moving on $open()$ and $release()$ calls.
-\item[CDROM_GET_MCN] Get the Media Catalog Number from a CD.
-\end{description}
-
-\subsubsection{$Ioctl$s routed through $audio_ioctl()$}
-\label{ioctl-audio}
-
-The following set of $ioctl$s are all implemented through a call to
-the $cdrom_fops$ function $audio_ioctl()$. Memory checks and
-allocation are performed in $cdrom_ioctl()$, and also sanitization of
-address format ($CDROM_LBA$/$CDROM_MSF$) is done.
-\begin{description}
-\item[CDROMSUBCHNL] Get sub-channel data in argument $arg$ of type $struct\
-cdrom_subchnl *{}$.
-\item[CDROMREADTOCHDR] Read Table of Contents header, in $arg$ of type
-$struct\ cdrom_tochdr *{}$.
-\item[CDROMREADTOCENTRY] Read a Table of Contents entry in $arg$ and
-specified by $arg$ of type $struct\ cdrom_tocentry *{}$.
-\item[CDROMPLAYMSF] Play audio fragment specified in Minute, Second,
-Frame format, delimited by $arg$ of type $struct\ cdrom_msf *{}$.
-\item[CDROMPLAYTRKIND] Play audio fragment in track-index format
-delimited by $arg$ of type $struct\ \penalty-1000 cdrom_ti *{}$.
-\item[CDROMVOLCTRL] Set volume specified by $arg$ of type $struct\
-cdrom_volctrl *{}$.
-\item[CDROMVOLREAD] Read volume into by $arg$ of type $struct\
-cdrom_volctrl *{}$.
-\item[CDROMSTART] Spin up disc.
-\item[CDROMSTOP] Stop playback of audio fragment.
-\item[CDROMPAUSE] Pause playback of audio fragment.
-\item[CDROMRESUME] Resume playing.
-\end{description}
-
-\subsubsection{New $ioctl$s in \cdromc}
-
-The following $ioctl$s have been introduced to allow user programs to
-control the behavior of individual \cdrom\ devices. New $ioctl$
-commands can be identified by the underscores in their names.
-\begin{description}
-\item[CDROM_SET_OPTIONS] Set options specified by $arg$. Returns the
-option flag register after modification. Use $arg = \rm0$ for reading
-the current flags.
-\item[CDROM_CLEAR_OPTIONS] Clear options specified by $arg$. Returns
- the option flag register after modification.
-\item[CDROM_SELECT_SPEED] Select head-rate speed of disc specified as
- by $arg$ in units of standard cdrom speed (176\,kB/sec raw data or
- 150\,kB/sec file system data). The value 0 means `auto-select', \ie,
- play audio discs at real time and data discs at maximum speed. The value
- $arg$ is checked against the maximum head rate of the drive found in the
- $cdrom_dops$.
-\item[CDROM_SELECT_DISC] Select disc numbered $arg$ from a juke-box.
- First disc is numbered 0. The number $arg$ is checked against the
- maximum number of discs in the juke-box found in the $cdrom_dops$.
-\item[CDROM_MEDIA_CHANGED] Returns 1 if a disc has been changed since
- the last call. Note that calls to $cdrom_media_changed$ by the VFS
- are treated by an independent queue, so both mechanisms will detect
- a media change once. For juke-boxes, an extra argument $arg$
- specifies the slot for which the information is given. The special
- value $CDSL_CURRENT$ requests that information about the currently
- selected slot be returned.
-\item[CDROM_DRIVE_STATUS] Returns the status of the drive by a call to
- $drive_status()$. Return values are defined in section~\ref{drive
- status}. Note that this call doesn't return information on the
- current playing activity of the drive; this can be polled through an
- $ioctl$ call to $CDROMSUBCHNL$. For juke-boxes, an extra argument
- $arg$ specifies the slot for which (possibly limited) information is
- given. The special value $CDSL_CURRENT$ requests that information
- about the currently selected slot be returned.
-\item[CDROM_DISC_STATUS] Returns the type of the disc currently in the
- drive. It should be viewed as a complement to $CDROM_DRIVE_STATUS$.
- This $ioctl$ can provide \emph {some} information about the current
- disc that is inserted in the drive. This functionality used to be
- implemented in the low level drivers, but is now carried out
- entirely in \UCD.
-
- The history of development of the CD's use as a carrier medium for
- various digital information has lead to many different disc types.
- This $ioctl$ is useful only in the case that CDs have \emph {only
- one} type of data on them. While this is often the case, it is
- also very common for CDs to have some tracks with data, and some
- tracks with audio. Because this is an existing interface, rather
- than fixing this interface by changing the assumptions it was made
- under, thereby breaking all user applications that use this
- function, the \UCD\ implements this $ioctl$ as follows: If the CD in
- question has audio tracks on it, and it has absolutely no CD-I, XA,
- or data tracks on it, it will be reported as $CDS_AUDIO$. If it has
- both audio and data tracks, it will return $CDS_MIXED$. If there
- are no audio tracks on the disc, and if the CD in question has any
- CD-I tracks on it, it will be reported as $CDS_XA_2_2$. Failing
- that, if the CD in question has any XA tracks on it, it will be
- reported as $CDS_XA_2_1$. Finally, if the CD in question has any
- data tracks on it, it will be reported as a data CD ($CDS_DATA_1$).
-
- This $ioctl$ can return:
- $$
- \halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
- CDS_NO_INFO& no information available\cr
- CDS_NO_DISC& no disc is inserted, or tray is opened\cr
- CDS_AUDIO& Audio disc (2352 audio bytes/frame)\cr
- CDS_DATA_1& data disc, mode 1 (2048 user bytes/frame)\cr
- CDS_XA_2_1& mixed data (XA), mode 2, form 1 (2048 user bytes)\cr
- CDS_XA_2_2& mixed data (XA), mode 2, form 1 (2324 user bytes)\cr
- CDS_MIXED& mixed audio/data disc\cr
- }
- $$
- For some information concerning frame layout of the various disc
- types, see a recent version of \cdromh.
-
-\item[CDROM_CHANGER_NSLOTS] Returns the number of slots in a
- juke-box.
-\item[CDROMRESET] Reset the drive.
-\item[CDROM_GET_CAPABILITY] Returns the $capability$ flags for the
- drive. Refer to section \ref{capability} for more information on
- these flags.
-\item[CDROM_LOCKDOOR] Locks the door of the drive. $arg == \rm0$
- unlocks the door, any other value locks it.
-\item[CDROM_DEBUG] Turns on debugging info. Only root is allowed
- to do this. Same semantics as CDROM_LOCKDOOR.
-\end{description}
-
-\subsubsection{Device dependent $ioctl$s}
-
-Finally, all other $ioctl$s are passed to the function $dev_ioctl()$,
-if implemented. No memory allocation or verification is carried out.
-
-\newsection{How to update your driver}
-
-\begin{enumerate}
-\item Make a backup of your current driver.
-\item Get hold of the files \cdromc\ and \cdromh, they should be in
- the directory tree that came with this documentation.
-\item Make sure you include \cdromh.
-\item Change the 3rd argument of $register_blkdev$ from
-$\&<your-drive>_fops$ to $\&cdrom_fops$.
-\item Just after that line, add the following to register with the \UCD:
- $$register_cdrom(\&<your-drive>_info);$$
- Similarly, add a call to $unregister_cdrom()$ at the appropriate place.
-\item Copy an example of the device-operations $struct$ to your
- source, \eg, from {\tt {cm206.c}} $cm206_dops$, and change all
- entries to names corresponding to your driver, or names you just
- happen to like. If your driver doesn't support a certain function,
- make the entry $NULL$. At the entry $capability$ you should list all
- capabilities your driver currently supports. If your driver
- has a capability that is not listed, please send me a message.
-\item Copy the $cdrom_device_info$ declaration from the same example
- driver, and modify the entries according to your needs. If your
- driver dynamically determines the capabilities of the hardware, this
- structure should also be declared dynamically.
-\item Implement all functions in your $<device>_dops$ structure,
- according to prototypes listed in \cdromh, and specifications given
- in section~\ref{cdrom.c}. Most likely you have already implemented
- the code in a large part, and you will almost certainly need to adapt the
- prototype and return values.
-\item Rename your $<device>_ioctl()$ function to $audio_ioctl$ and
- change the prototype a little. Remove entries listed in the first
- part in section~\ref{cdrom-ioctl}, if your code was OK, these are
- just calls to the routines you adapted in the previous step.
-\item You may remove all remaining memory checking code in the
- $audio_ioctl()$ function that deals with audio commands (these are
- listed in the second part of section~\ref{cdrom-ioctl}). There is no
- need for memory allocation either, so most $case$s in the $switch$
- statement look similar to:
- $$
- case\ CDROMREADTOCENTRY\colon get_toc_entry\bigl((struct\
- cdrom_tocentry *{})\ arg\bigr);
- $$
-\item All remaining $ioctl$ cases must be moved to a separate
- function, $<device>_ioctl$, the device-dependent $ioctl$s. Note that
- memory checking and allocation must be kept in this code!
-\item Change the prototypes of $<device>_open()$ and
- $<device>_release()$, and remove any strategic code (\ie, tray
- movement, door locking, etc.).
-\item Try to recompile the drivers. We advise you to use modules, both
- for {\tt {cdrom.o}} and your driver, as debugging is much easier this
- way.
-\end{enumerate}
-
-\newsection{Thanks}
-
-Thanks to all the people involved. First, Erik Andersen, who has
-taken over the torch in maintaining \cdromc\ and integrating much
-\cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and
-Gerd Knorr, who were the first to implement this interface for SCSI
-and IDE-CD drivers and added many ideas for extension of the data
-structures relative to kernel~2.0. Further thanks to Heiko Ei{\ss}feldt,
-Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew
-Kroll, the \linux\ \cdrom\ device driver developers who were kind
-enough to give suggestions and criticisms during the writing. Finally
-of course, I want to thank Linus Torvalds for making this possible in
-the first place.
-
-\vfill
-$ \version\ $
-\eject
-\end{document}
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
deleted file mode 100644
index a5f2a7f1ff46..000000000000
--- a/Documentation/cdrom/ide-cd
+++ /dev/null
@@ -1,534 +0,0 @@
-IDE-CD driver documentation
-Originally by scott snyder <snyder@fnald0.fnal.gov> (19 May 1996)
-Carrying on the torch is: Erik Andersen <andersee@debian.org>
-New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
-
-1. Introduction
----------------
-
-The ide-cd driver should work with all ATAPI ver 1.2 to ATAPI 2.6 compliant
-CDROM drives which attach to an IDE interface. Note that some CDROM vendors
-(including Mitsumi, Sony, Creative, Aztech, and Goldstar) have made
-both ATAPI-compliant drives and drives which use a proprietary
-interface. If your drive uses one of those proprietary interfaces,
-this driver will not work with it (but one of the other CDROM drivers
-probably will). This driver will not work with `ATAPI' drives which
-attach to the parallel port. In addition, there is at least one drive
-(CyCDROM CR520ie) which attaches to the IDE port but is not ATAPI;
-this driver will not work with drives like that either (but see the
-aztcd driver).
-
-This driver provides the following features:
-
- - Reading from data tracks, and mounting ISO 9660 filesystems.
-
- - Playing audio tracks. Most of the CDROM player programs floating
- around should work; I usually use Workman.
-
- - Multisession support.
-
- - On drives which support it, reading digital audio data directly
- from audio tracks. The program cdda2wav can be used for this.
- Note, however, that only some drives actually support this.
-
- - There is now support for CDROM changers which comply with the
- ATAPI 2.6 draft standard (such as the NEC CDR-251). This additional
- functionality includes a function call to query which slot is the
- currently selected slot, a function call to query which slots contain
- CDs, etc. A sample program which demonstrates this functionality is
- appended to the end of this file. The Sanyo 3-disc changer
- (which does not conform to the standard) is also now supported.
- Please note the driver refers to the first CD as slot # 0.
-
-
-2. Installation
----------------
-
-0. The ide-cd relies on the ide disk driver. See
- Documentation/ide/ide.txt for up-to-date information on the ide
- driver.
-
-1. Make sure that the ide and ide-cd drivers are compiled into the
- kernel you're using. When configuring the kernel, in the section
- entitled "Floppy, IDE, and other block devices", say either `Y'
- (which will compile the support directly into the kernel) or `M'
- (to compile support as a module which can be loaded and unloaded)
- to the options:
-
- ATA/ATAPI/MFM/RLL support
- Include IDE/ATAPI CDROM support
-
- Depending on what type of IDE interface you have, you may need to
- specify additional configuration options. See
- Documentation/ide/ide.txt.
-
-2. You should also ensure that the iso9660 filesystem is either
- compiled into the kernel or available as a loadable module. You
- can see if a filesystem is known to the kernel by catting
- /proc/filesystems.
-
-3. The CDROM drive should be connected to the host on an IDE
- interface. Each interface on a system is defined by an I/O port
- address and an IRQ number, the standard assignments being
- 0x1f0 and 14 for the primary interface and 0x170 and 15 for the
- secondary interface. Each interface can control up to two devices,
- where each device can be a hard drive, a CDROM drive, a floppy drive,
- or a tape drive. The two devices on an interface are called `master'
- and `slave'; this is usually selectable via a jumper on the drive.
-
- Linux names these devices as follows. The master and slave devices
- on the primary IDE interface are called `hda' and `hdb',
- respectively. The drives on the secondary interface are called
- `hdc' and `hdd'. (Interfaces at other locations get other letters
- in the third position; see Documentation/ide/ide.txt.)
-
- If you want your CDROM drive to be found automatically by the
- driver, you should make sure your IDE interface uses either the
- primary or secondary addresses mentioned above. In addition, if
- the CDROM drive is the only device on the IDE interface, it should
- be jumpered as `master'. (If for some reason you cannot configure
- your system in this manner, you can probably still use the driver.
- You may have to pass extra configuration information to the kernel
- when you boot, however. See Documentation/ide/ide.txt for more
- information.)
-
-4. Boot the system. If the drive is recognized, you should see a
- message which looks like
-
- hdb: NEC CD-ROM DRIVE:260, ATAPI CDROM drive
-
- If you do not see this, see section 5 below.
-
-5. You may want to create a symbolic link /dev/cdrom pointing to the
- actual device. You can do this with the command
-
- ln -s /dev/hdX /dev/cdrom
-
- where X should be replaced by the letter indicating where your
- drive is installed.
-
-6. You should be able to see any error messages from the driver with
- the `dmesg' command.
-
-
-3. Basic usage
---------------
-
-An ISO 9660 CDROM can be mounted by putting the disc in the drive and
-typing (as root)
-
- mount -t iso9660 /dev/cdrom /mnt/cdrom
-
-where it is assumed that /dev/cdrom is a link pointing to the actual
-device (as described in step 5 of the last section) and /mnt/cdrom is
-an empty directory. You should now be able to see the contents of the
-CDROM under the /mnt/cdrom directory. If you want to eject the CDROM,
-you must first dismount it with a command like
-
- umount /mnt/cdrom
-
-Note that audio CDs cannot be mounted.
-
-Some distributions set up /etc/fstab to always try to mount a CDROM
-filesystem on bootup. It is not required to mount the CDROM in this
-manner, though, and it may be a nuisance if you change CDROMs often.
-You should feel free to remove the cdrom line from /etc/fstab and
-mount CDROMs manually if that suits you better.
-
-Multisession and photocd discs should work with no special handling.
-The hpcdtoppm package (ftp.gwdg.de:/pub/linux/hpcdtoppm/) may be
-useful for reading photocds.
-
-To play an audio CD, you should first unmount and remove any data
-CDROM. Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).
-
-On a few drives, you can read digital audio directly using a program
-such as cdda2wav. The only types of drive which I've heard support
-this are Sony and Toshiba drives. You will get errors if you try to
-use this function on a drive which does not support it.
-
-For supported changers, you can use the `cdchange' program (appended to
-the end of this file) to switch between changer slots. Note that the
-drive should be unmounted before attempting this. The program takes
-two arguments: the CDROM device, and the slot number to which you wish
-to change. If the slot number is -1, the drive is unloaded.
-
-
-4. Common problems
-------------------
-
-This section discusses some common problems encountered when trying to
-use the driver, and some possible solutions. Note that if you are
-experiencing problems, you should probably also review
-Documentation/ide/ide.txt for current information about the underlying
-IDE support code. Some of these items apply only to earlier versions
-of the driver, but are mentioned here for completeness.
-
-In most cases, you should probably check with `dmesg' for any errors
-from the driver.
-
-a. Drive is not detected during booting.
-
- - Review the configuration instructions above and in
- Documentation/ide/ide.txt, and check how your hardware is
- configured.
-
- - If your drive is the only device on an IDE interface, it should
- be jumpered as master, if at all possible.
-
- - If your IDE interface is not at the standard addresses of 0x170
- or 0x1f0, you'll need to explicitly inform the driver using a
- lilo option. See Documentation/ide/ide.txt. (This feature was
- added around kernel version 1.3.30.)
-
- - If the autoprobing is not finding your drive, you can tell the
- driver to assume that one exists by using a lilo option of the
- form `hdX=cdrom', where X is the drive letter corresponding to
- where your drive is installed. Note that if you do this and you
- see a boot message like
-
- hdX: ATAPI cdrom (?)
-
- this does _not_ mean that the driver has successfully detected
- the drive; rather, it means that the driver has not detected a
- drive, but is assuming there's one there anyway because you told
- it so. If you actually try to do I/O to a drive defined at a
- nonexistent or nonresponding I/O address, you'll probably get
- errors with a status value of 0xff.
-
- - Some IDE adapters require a nonstandard initialization sequence
- before they'll function properly. (If this is the case, there
- will often be a separate MS-DOS driver just for the controller.)
- IDE interfaces on sound cards often fall into this category.
-
- Support for some interfaces needing extra initialization is
- provided in later 1.3.x kernels. You may need to turn on
- additional kernel configuration options to get them to work;
- see Documentation/ide/ide.txt.
-
- Even if support is not available for your interface, you may be
- able to get it to work with the following procedure. First boot
- MS-DOS and load the appropriate drivers. Then warm-boot linux
- (i.e., without powering off). If this works, it can be automated
- by running loadlin from the MS-DOS autoexec.
-
-
-b. Timeout/IRQ errors.
-
- - If you always get timeout errors, interrupts from the drive are
- probably not making it to the host.
-
- - IRQ problems may also be indicated by the message
- `IRQ probe failed (<n>)' while booting. If <n> is zero, that
- means that the system did not see an interrupt from the drive when
- it was expecting one (on any feasible IRQ). If <n> is negative,
- that means the system saw interrupts on multiple IRQ lines, when
- it was expecting to receive just one from the CDROM drive.
-
- - Double-check your hardware configuration to make sure that the IRQ
- number of your IDE interface matches what the driver expects.
- (The usual assignments are 14 for the primary (0x1f0) interface
- and 15 for the secondary (0x170) interface.) Also be sure that
- you don't have some other hardware which might be conflicting with
- the IRQ you're using. Also check the BIOS setup for your system;
- some have the ability to disable individual IRQ levels, and I've
- had one report of a system which was shipped with IRQ 15 disabled
- by default.
-
- - Note that many MS-DOS CDROM drivers will still function even if
- there are hardware problems with the interrupt setup; they
- apparently don't use interrupts.
-
- - If you own a Pioneer DR-A24X, you _will_ get nasty error messages
- on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }"
- The Pioneer DR-A24X CDROM drives are fairly popular these days.
- Unfortunately, these drives seem to become very confused when we perform
- the standard Linux ATA disk drive probe. If you own one of these drives,
- you can bypass the ATA probing which confuses these CDROM drives, by
- adding `append="hdX=noprobe hdX=cdrom"' to your lilo.conf file and running
- lilo (again where X is the drive letter corresponding to where your drive
- is installed.)
-
-c. System hangups.
-
- - If the system locks up when you try to access the CDROM, the most
- likely cause is that you have a buggy IDE adapter which doesn't
- properly handle simultaneous transactions on multiple interfaces.
- The most notorious of these is the CMD640B chip. This problem can
- be worked around by specifying the `serialize' option when
- booting. Recent kernels should be able to detect the need for
- this automatically in most cases, but the detection is not
- foolproof. See Documentation/ide/ide.txt for more information
- about the `serialize' option and the CMD640B.
-
- - Note that many MS-DOS CDROM drivers will work with such buggy
- hardware, apparently because they never attempt to overlap CDROM
- operations with other disk activity.
-
-
-d. Can't mount a CDROM.
-
- - If you get errors from mount, it may help to check `dmesg' to see
- if there are any more specific errors from the driver or from the
- filesystem.
-
- - Make sure there's a CDROM loaded in the drive, and that's it's an
- ISO 9660 disc. You can't mount an audio CD.
-
- - With the CDROM in the drive and unmounted, try something like
-
- cat /dev/cdrom | od | more
-
- If you see a dump, then the drive and driver are probably working
- OK, and the problem is at the filesystem level (i.e., the CDROM is
- not ISO 9660 or has errors in the filesystem structure).
-
- - If you see `not a block device' errors, check that the definitions
- of the device special files are correct. They should be as
- follows:
-
- brw-rw---- 1 root disk 3, 0 Nov 11 18:48 /dev/hda
- brw-rw---- 1 root disk 3, 64 Nov 11 18:48 /dev/hdb
- brw-rw---- 1 root disk 22, 0 Nov 11 18:48 /dev/hdc
- brw-rw---- 1 root disk 22, 64 Nov 11 18:48 /dev/hdd
-
- Some early Slackware releases had these defined incorrectly. If
- these are wrong, you can remake them by running the script
- scripts/MAKEDEV.ide. (You may have to make it executable
- with chmod first.)
-
- If you have a /dev/cdrom symbolic link, check that it is pointing
- to the correct device file.
-
- If you hear people talking of the devices `hd1a' and `hd1b', these
- were old names for what are now called hdc and hdd. Those names
- should be considered obsolete.
-
- - If mount is complaining that the iso9660 filesystem is not
- available, but you know it is (check /proc/filesystems), you
- probably need a newer version of mount. Early versions would not
- always give meaningful error messages.
-
-
-e. Directory listings are unpredictably truncated, and `dmesg' shows
- `buffer botch' error messages from the driver.
-
- - There was a bug in the version of the driver in 1.2.x kernels
- which could cause this. It was fixed in 1.3.0. If you can't
- upgrade, you can probably work around the problem by specifying a
- blocksize of 2048 when mounting. (Note that you won't be able to
- directly execute binaries off the CDROM in that case.)
-
- If you see this in kernels later than 1.3.0, please report it as a
- bug.
-
-
-f. Data corruption.
-
- - Random data corruption was occasionally observed with the Hitachi
- CDR-7730 CDROM. If you experience data corruption, using "hdx=slow"
- as a command line parameter may work around the problem, at the
- expense of low system performance.
-
-
-5. cdchange.c
--------------
-
-/*
- * cdchange.c [-v] <device> [<slot>]
- *
- * This loads a CDROM from a specified slot in a changer, and displays
- * information about the changer status. The drive should be unmounted before
- * using this program.
- *
- * Changer information is displayed if either the -v flag is specified
- * or no slot was specified.
- *
- * Based on code originally from Gerhard Zuber <zuber@berlin.snafu.de>.
- * Changer status information, and rewrite for the new Uniform CDROM driver
- * interface by Erik Andersen <andersee@debian.org>.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <linux/cdrom.h>
-
-
-int
-main (int argc, char **argv)
-{
- char *program;
- char *device;
- int fd; /* file descriptor for CD-ROM device */
- int status; /* return status for system calls */
- int verbose = 0;
- int slot=-1, x_slot;
- int total_slots_available;
-
- program = argv[0];
-
- ++argv;
- --argc;
-
- if (argc < 1 || argc > 3) {
- fprintf (stderr, "usage: %s [-v] <device> [<slot>]\n",
- program);
- fprintf (stderr, " Slots are numbered 1 -- n.\n");
- exit (1);
- }
-
- if (strcmp (argv[0], "-v") == 0) {
- verbose = 1;
- ++argv;
- --argc;
- }
-
- device = argv[0];
-
- if (argc == 2)
- slot = atoi (argv[1]) - 1;
-
- /* open device */
- fd = open(device, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- fprintf (stderr, "%s: open failed for `%s': %s\n",
- program, device, strerror (errno));
- exit (1);
- }
-
- /* Check CD player status */
- total_slots_available = ioctl (fd, CDROM_CHANGER_NSLOTS);
- if (total_slots_available <= 1 ) {
- fprintf (stderr, "%s: Device `%s' is not an ATAPI "
- "compliant CD changer.\n", program, device);
- exit (1);
- }
-
- if (slot >= 0) {
- if (slot >= total_slots_available) {
- fprintf (stderr, "Bad slot number. "
- "Should be 1 -- %d.\n",
- total_slots_available);
- exit (1);
- }
-
- /* load */
- slot=ioctl (fd, CDROM_SELECT_DISC, slot);
- if (slot<0) {
- fflush(stdout);
- perror ("CDROM_SELECT_DISC ");
- exit(1);
- }
- }
-
- if (slot < 0 || verbose) {
-
- status=ioctl (fd, CDROM_SELECT_DISC, CDSL_CURRENT);
- if (status<0) {
- fflush(stdout);
- perror (" CDROM_SELECT_DISC");
- exit(1);
- }
- slot=status;
-
- printf ("Current slot: %d\n", slot+1);
- printf ("Total slots available: %d\n",
- total_slots_available);
-
- printf ("Drive status: ");
- status = ioctl (fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
- if (status<0) {
- perror(" CDROM_DRIVE_STATUS");
- } else switch(status) {
- case CDS_DISC_OK:
- printf ("Ready.\n");
- break;
- case CDS_TRAY_OPEN:
- printf ("Tray Open.\n");
- break;
- case CDS_DRIVE_NOT_READY:
- printf ("Drive Not Ready.\n");
- break;
- default:
- printf ("This Should not happen!\n");
- break;
- }
-
- for (x_slot=0; x_slot<total_slots_available; x_slot++) {
- printf ("Slot %2d: ", x_slot+1);
- status = ioctl (fd, CDROM_DRIVE_STATUS, x_slot);
- if (status<0) {
- perror(" CDROM_DRIVE_STATUS");
- } else switch(status) {
- case CDS_DISC_OK:
- printf ("Disc present.");
- break;
- case CDS_NO_DISC:
- printf ("Empty slot.");
- break;
- case CDS_TRAY_OPEN:
- printf ("CD-ROM tray open.\n");
- break;
- case CDS_DRIVE_NOT_READY:
- printf ("CD-ROM drive not ready.\n");
- break;
- case CDS_NO_INFO:
- printf ("No Information available.");
- break;
- default:
- printf ("This Should not happen!\n");
- break;
- }
- if (slot == x_slot) {
- status = ioctl (fd, CDROM_DISC_STATUS);
- if (status<0) {
- perror(" CDROM_DISC_STATUS");
- }
- switch (status) {
- case CDS_AUDIO:
- printf ("\tAudio disc.\t");
- break;
- case CDS_DATA_1:
- case CDS_DATA_2:
- printf ("\tData disc type %d.\t", status-CDS_DATA_1+1);
- break;
- case CDS_XA_2_1:
- case CDS_XA_2_2:
- printf ("\tXA data disc type %d.\t", status-CDS_XA_2_1+1);
- break;
- default:
- printf ("\tUnknown disc type 0x%x!\t", status);
- break;
- }
- }
- status = ioctl (fd, CDROM_MEDIA_CHANGED, x_slot);
- if (status<0) {
- perror(" CDROM_MEDIA_CHANGED");
- }
- switch (status) {
- case 1:
- printf ("Changed.\n");
- break;
- default:
- printf ("\n");
- break;
- }
- }
- }
-
- /* close device */
- status = close (fd);
- if (status != 0) {
- fprintf (stderr, "%s: close failed for `%s': %s\n",
- program, device, strerror (errno));
- exit (1);
- }
-
- exit (0);
-}
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
new file mode 100644
index 000000000000..3ac4f716612f
--- /dev/null
+++ b/Documentation/cdrom/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+CD-ROM
+======
+
+.. toctree::
+ :maxdepth: 1
+
+ cdrom-standard
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt
deleted file mode 100644
index 2834170d821e..000000000000
--- a/Documentation/cdrom/packet-writing.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-Getting started quick
----------------------
-
-- Select packet support in the block device section and UDF support in
- the file system section.
-
-- Compile and install kernel and modules, reboot.
-
-- You need the udftools package (pktsetup, mkudffs, cdrwtool).
- Download from http://sourceforge.net/projects/linux-udf/
-
-- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
- as appropriate):
- # cdrwtool -d /dev/hdc -q
-
-- Setup your writer
- # pktsetup dev_name /dev/hdc
-
-- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy!
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RW media
--------------------------------
-
-DVD-RW discs can be written to much like CD-RW discs if they are in
-the so called "restricted overwrite" mode. To put a disc in restricted
-overwrite mode, run:
-
- # dvd+rw-format /dev/hdc
-
-You can then use the disc the same way you would use a CD-RW disc:
-
- # pktsetup dev_name /dev/hdc
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD+RW media
--------------------------------
-
-According to the DVD+RW specification, a drive supporting DVD+RW discs
-shall implement "true random writes with 2KB granularity", which means
-that it should be possible to put any filesystem with a block size >=
-2KB on such a disc. For example, it should be possible to do:
-
- # dvd+rw-format /dev/hdc (only needed if the disc has never
- been formatted)
- # mkudffs /dev/hdc
- # mount /dev/hdc /cdrom -t udf -o rw,noatime
-
-However, some drives don't follow the specification and expect the
-host to perform aligned writes at 32KB boundaries. Other drives do
-follow the specification, but suffer bad performance problems if the
-writes are not 32KB aligned.
-
-Both problems can be solved by using the pktcdvd driver, which always
-generates aligned writes.
-
- # dvd+rw-format /dev/hdc
- # pktsetup dev_name /dev/hdc
- # mkudffs /dev/pktcdvd/dev_name
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RAM media
---------------------------------
-
-DVD-RAM discs are random writable, so using the pktcdvd driver is not
-necessary. However, using the pktcdvd driver can improve performance
-in the same way it does for DVD+RW media.
-
-
-Notes
------
-
-- CD-RW media can usually not be overwritten more than about 1000
- times, so to avoid unnecessary wear on the media, you should always
- use the noatime mount option.
-
-- Defect management (ie automatic remapping of bad sectors) has not
- been implemented yet, so you are likely to get at least some
- filesystem corruption if the disc wears out.
-
-- Since the pktcdvd driver makes the disc appear as a regular block
- device with a 2KB block size, you can put any filesystem you like on
- the disc. For example, run:
-
- # /sbin/mke2fs /dev/pktcdvd/dev_name
-
- to create an ext2 filesystem on the disc.
-
-
-Using the pktcdvd sysfs interface
----------------------------------
-
-Since Linux 2.6.20, the pktcdvd module has a sysfs interface
-and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
-
-"pktcdvd" works similar to "pktsetup", e.g.:
-
- # pktcdvd -a dev_name /dev/hdc
- # mkudffs /dev/pktcdvd/dev_name
- # mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
- # cp files /dvdram
- # umount /dvdram
- # pktcdvd -r dev_name
-
-
-For a description of the sysfs interface look into the file:
-
- Documentation/ABI/testing/sysfs-class-pktcdvd
-
-
-Using the pktcdvd debugfs interface
------------------------------------
-
-To read pktcdvd device infos in human readable form, do:
-
- # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
-
-For a description of the debugfs interface look into the file:
-
- Documentation/ABI/testing/debugfs-pktcdvd
-
-
-
-Links
------
-
-See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
-about DVD writing.
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt
deleted file mode 100644
index 673dc34d3f78..000000000000
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ /dev/null
@@ -1,375 +0,0 @@
- Block IO Controller
- ===================
-Overview
-========
-cgroup subsys "blkio" implements the block io controller. There seems to be
-a need of various kinds of IO control policies (like proportional BW, max BW)
-both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
-Plan is to use the same cgroup based management interface for blkio controller
-and based on user options switch IO policies in the background.
-
-Currently two IO control policies are implemented. First one is proportional
-weight time based division of disk policy. It is implemented in CFQ. Hence
-this policy takes effect only on leaf nodes when CFQ is being used. The second
-one is throttling policy which can be used to specify upper IO rate limits
-on devices. This policy is implemented in generic block layer and can be
-used on leaf nodes as well as higher level logical devices like device mapper.
-
-HOWTO
-=====
-Proportional Weight division of bandwidth
------------------------------------------
-You can do a very simple testing of running two dd threads in two different
-cgroups. Here is what you can do.
-
-- Enable Block IO controller
- CONFIG_BLK_CGROUP=y
-
-- Enable group scheduling in CFQ
- CONFIG_CFQ_GROUP_IOSCHED=y
-
-- Compile and boot into kernel and mount IO controller (blkio); see
- cgroups.txt, Why are cgroups needed?.
-
- mount -t tmpfs cgroup_root /sys/fs/cgroup
- mkdir /sys/fs/cgroup/blkio
- mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Create two cgroups
- mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
-
-- Set weights of group test1 and test2
- echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
- echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
-
-- Create two same size files (say 512MB each) on same disk (file1, file2) and
- launch two dd threads in different cgroup to read those files.
-
- sync
- echo 3 > /proc/sys/vm/drop_caches
-
- dd if=/mnt/sdb/zerofile1 of=/dev/null &
- echo $! > /sys/fs/cgroup/blkio/test1/tasks
- cat /sys/fs/cgroup/blkio/test1/tasks
-
- dd if=/mnt/sdb/zerofile2 of=/dev/null &
- echo $! > /sys/fs/cgroup/blkio/test2/tasks
- cat /sys/fs/cgroup/blkio/test2/tasks
-
-- At macro level, first dd should finish first. To get more precise data, keep
- on looking at (with the help of script), at blkio.disk_time and
- blkio.disk_sectors files of both test1 and test2 groups. This will tell how
- much disk time (in milliseconds), each group got and how many sectors each
- group dispatched to the disk. We provide fairness in terms of disk time, so
- ideally io.disk_time of cgroups should be in proportion to the weight.
-
-Throttling/Upper Limit policy
------------------------------
-- Enable Block IO controller
- CONFIG_BLK_CGROUP=y
-
-- Enable throttling in block layer
- CONFIG_BLK_DEV_THROTTLING=y
-
-- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
- mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Specify a bandwidth rate on particular device for root group. The format
- for policy is "<major>:<minor> <bytes_per_second>".
-
- echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
-
- Above will put a limit of 1MB/second on reads happening for root group
- on device having major/minor number 8:16.
-
-- Run dd to read a file and see if rate is throttled to 1MB/s or not.
-
- # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
- 1024+0 records in
- 1024+0 records out
- 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
-
- Limits for writes can be put using blkio.throttle.write_bps_device file.
-
-Hierarchical Cgroups
-====================
-
-Both CFQ and throttling implement hierarchy support; however,
-throttling's hierarchy support is enabled iff "sane_behavior" is
-enabled from cgroup side, which currently is a development option and
-not publicly available.
-
-If somebody created a hierarchy like as follows.
-
- root
- / \
- test1 test2
- |
- test3
-
-CFQ by default and throttling with "sane_behavior" will handle the
-hierarchy correctly. For details on CFQ hierarchy support, refer to
-Documentation/block/cfq-iosched.txt. For throttling, all limits apply
-to the whole subtree while all statistics are local to the IOs
-directly generated by tasks in that cgroup.
-
-Throttling without "sane_behavior" enabled from cgroup side will
-practically treat all groups at same level as if it looks like the
-following.
-
- pivot
- / / \ \
- root test1 test2 test3
-
-Various user visible config options
-===================================
-CONFIG_BLK_CGROUP
- - Block IO controller.
-
-CONFIG_DEBUG_BLK_CGROUP
- - Debug help. Right now some additional stats file show up in cgroup
- if this option is enabled.
-
-CONFIG_CFQ_GROUP_IOSCHED
- - Enables group scheduling in CFQ. Currently only 1 level of group
- creation is allowed.
-
-CONFIG_BLK_DEV_THROTTLING
- - Enable block device throttling support in block layer.
-
-Details of cgroup files
-=======================
-Proportional weight policy files
---------------------------------
-- blkio.weight
- - Specifies per cgroup weight. This is default weight of the group
- on all the devices until and unless overridden by per device rule.
- (See blkio.weight_device).
- Currently allowed range of weights is from 10 to 1000.
-
-- blkio.weight_device
- - One can specify per cgroup per device rules using this interface.
- These rules override the default value of group weight as specified
- by blkio.weight.
-
- Following is the format.
-
- # echo dev_maj:dev_minor weight > blkio.weight_device
- Configure weight=300 on /dev/sdb (8:16) in this cgroup
- # echo 8:16 300 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:16 300
-
- Configure weight=500 on /dev/sda (8:0) in this cgroup
- # echo 8:0 500 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:0 500
- 8:16 300
-
- Remove specific weight for /dev/sda in this cgroup
- # echo 8:0 0 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:16 300
-
-- blkio.leaf_weight[_device]
- - Equivalents of blkio.weight[_device] for the purpose of
- deciding how much weight tasks in the given cgroup has while
- competing with the cgroup's child cgroups. For details,
- please refer to Documentation/block/cfq-iosched.txt.
-
-- blkio.time
- - disk time allocated to cgroup per device in milliseconds. First
- two fields specify the major and minor number of the device and
- third field specifies the disk time allocated to group in
- milliseconds.
-
-- blkio.sectors
- - number of sectors transferred to/from disk by the group. First
- two fields specify the major and minor number of the device and
- third field specifies the number of sectors transferred by the
- group to/from the device.
-
-- blkio.io_service_bytes
- - Number of bytes transferred to/from the disk by the group. These
- are further divided by the type of operation - read or write, sync
- or async. First two fields specify the major and minor number of the
- device, third field specifies the operation type and the fourth field
- specifies the number of bytes.
-
-- blkio.io_serviced
- - Number of IOs (bio) issued to the disk by the group. These
- are further divided by the type of operation - read or write, sync
- or async. First two fields specify the major and minor number of the
- device, third field specifies the operation type and the fourth field
- specifies the number of IOs.
-
-- blkio.io_service_time
- - Total amount of time between request dispatch and request completion
- for the IOs done by this cgroup. This is in nanoseconds to make it
- meaningful for flash devices too. For devices with queue depth of 1,
- this time represents the actual service time. When queue_depth > 1,
- that is no longer true as requests may be served out of order. This
- may cause the service time for a given IO to include the service time
- of multiple IOs when served out of order which may result in total
- io_service_time > actual time elapsed. This time is further divided by
- the type of operation - read or write, sync or async. First two fields
- specify the major and minor number of the device, third field
- specifies the operation type and the fourth field specifies the
- io_service_time in ns.
-
-- blkio.io_wait_time
- - Total amount of time the IOs for this cgroup spent waiting in the
- scheduler queues for service. This can be greater than the total time
- elapsed since it is cumulative io_wait_time for all IOs. It is not a
- measure of total time the cgroup spent waiting but rather a measure of
- the wait_time for its individual IOs. For devices with queue_depth > 1
- this metric does not include the time spent waiting for service once
- the IO is dispatched to the device but till it actually gets serviced
- (there might be a time lag here due to re-ordering of requests by the
- device). This is in nanoseconds to make it meaningful for flash
- devices too. This time is further divided by the type of operation -
- read or write, sync or async. First two fields specify the major and
- minor number of the device, third field specifies the operation type
- and the fourth field specifies the io_wait_time in ns.
-
-- blkio.io_merged
- - Total number of bios/requests merged into requests belonging to this
- cgroup. This is further divided by the type of operation - read or
- write, sync or async.
-
-- blkio.io_queued
- - Total number of requests queued up at any given instant for this
- cgroup. This is further divided by the type of operation - read or
- write, sync or async.
-
-- blkio.avg_queue_size
- - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
- The average queue size for this cgroup over the entire time of this
- cgroup's existence. Queue size samples are taken each time one of the
- queues of this cgroup gets a timeslice.
-
-- blkio.group_wait_time
- - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
- This is the amount of time the cgroup had to wait since it became busy
- (i.e., went from 0 to 1 request queued) to get a timeslice for one of
- its queues. This is different from the io_wait_time which is the
- cumulative total of the amount of time spent by each IO in that cgroup
- waiting in the scheduler queue. This is in nanoseconds. If this is
- read when the cgroup is in a waiting (for timeslice) state, the stat
- will only report the group_wait_time accumulated till the last time it
- got a timeslice and will not include the current delta.
-
-- blkio.empty_time
- - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
- This is the amount of time a cgroup spends without any pending
- requests when not being served, i.e., it does not include any time
- spent idling for one of the queues of the cgroup. This is in
- nanoseconds. If this is read when the cgroup is in an empty state,
- the stat will only report the empty_time accumulated till the last
- time it had a pending request and will not include the current delta.
-
-- blkio.idle_time
- - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
- This is the amount of time spent by the IO scheduler idling for a
- given cgroup in anticipation of a better request than the existing ones
- from other queues/cgroups. This is in nanoseconds. If this is read
- when the cgroup is in an idling state, the stat will only report the
- idle_time accumulated till the last idle period and will not include
- the current delta.
-
-- blkio.dequeue
- - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This
- gives the statistics about how many a times a group was dequeued
- from service tree of the device. First two fields specify the major
- and minor number of the device and third field specifies the number
- of times a group was dequeued from a particular device.
-
-- blkio.*_recursive
- - Recursive version of various stats. These files show the
- same information as their non-recursive counterparts but
- include stats from all the descendant cgroups.
-
-Throttling/Upper limit policy files
------------------------------------
-- blkio.throttle.read_bps_device
- - Specifies upper limit on READ rate from the device. IO rate is
- specified in bytes per second. Rules are per device. Following is
- the format.
-
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
-
-- blkio.throttle.write_bps_device
- - Specifies upper limit on WRITE rate to the device. IO rate is
- specified in bytes per second. Rules are per device. Following is
- the format.
-
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
-
-- blkio.throttle.read_iops_device
- - Specifies upper limit on READ rate from the device. IO rate is
- specified in IO per second. Rules are per device. Following is
- the format.
-
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
-
-- blkio.throttle.write_iops_device
- - Specifies upper limit on WRITE rate to the device. IO rate is
- specified in io per second. Rules are per device. Following is
- the format.
-
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
-
-Note: If both BW and IOPS rules are specified for a device, then IO is
- subjected to both the constraints.
-
-- blkio.throttle.io_serviced
- - Number of IOs (bio) issued to the disk by the group. These
- are further divided by the type of operation - read or write, sync
- or async. First two fields specify the major and minor number of the
- device, third field specifies the operation type and the fourth field
- specifies the number of IOs.
-
-- blkio.throttle.io_service_bytes
- - Number of bytes transferred to/from the disk by the group. These
- are further divided by the type of operation - read or write, sync
- or async. First two fields specify the major and minor number of the
- device, third field specifies the operation type and the fourth field
- specifies the number of bytes.
-
-Common files among various policies
------------------------------------
-- blkio.reset_stats
- - Writing an int to this file will result in resetting all the stats
- for that cgroup.
-
-CFQ sysfs tunable
-=================
-/sys/block/<disk>/queue/iosched/slice_idle
-------------------------------------------
-On a faster hardware CFQ can be slow, especially with sequential workload.
-This happens because CFQ idles on a single queue and single queue might not
-drive deeper request queue depths to keep the storage busy. In such scenarios
-one can try setting slice_idle=0 and that would switch CFQ to IOPS
-(IO operations per second) mode on NCQ supporting hardware.
-
-That means CFQ will not idle between cfq queues of a cfq group and hence be
-able to driver higher queue depth and achieve better throughput. That also
-means that cfq provides fairness among groups in terms of IOPS and not in
-terms of disk time.
-
-/sys/block/<disk>/queue/iosched/group_idle
-------------------------------------------
-If one disables idling on individual cfq queues and cfq service trees by
-setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
-on the group in an attempt to provide fairness among groups.
-
-By default group_idle is same as slice_idle and does not do anything if
-slice_idle is enabled.
-
-One can experience an overall throughput drop if you have created multiple
-groups and put applications in that group which are not driving enough
-IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
-on individual groups and throughput should improve.
diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt
deleted file mode 100644
index 059f7063eea6..000000000000
--- a/Documentation/cgroup-v1/cgroups.txt
+++ /dev/null
@@ -1,677 +0,0 @@
- CGROUPS
- -------
-
-Written by Paul Menage <menage@google.com> based on
-Documentation/cgroup-v1/cpusets.txt
-
-Original copyright statements from cpusets.txt:
-Portions Copyright (C) 2004 BULL SA.
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
-
-CONTENTS:
-=========
-
-1. Control Groups
- 1.1 What are cgroups ?
- 1.2 Why are cgroups needed ?
- 1.3 How are cgroups implemented ?
- 1.4 What does notify_on_release do ?
- 1.5 What does clone_children do ?
- 1.6 How do I use cgroups ?
-2. Usage Examples and Syntax
- 2.1 Basic Usage
- 2.2 Attaching processes
- 2.3 Mounting hierarchies by name
-3. Kernel API
- 3.1 Overview
- 3.2 Synchronization
- 3.3 Subsystem API
-4. Extended attributes usage
-5. Questions
-
-1. Control Groups
-=================
-
-1.1 What are cgroups ?
-----------------------
-
-Control Groups provide a mechanism for aggregating/partitioning sets of
-tasks, and all their future children, into hierarchical groups with
-specialized behaviour.
-
-Definitions:
-
-A *cgroup* associates a set of tasks with a set of parameters for one
-or more subsystems.
-
-A *subsystem* is a module that makes use of the task grouping
-facilities provided by cgroups to treat groups of tasks in
-particular ways. A subsystem is typically a "resource controller" that
-schedules a resource or applies per-cgroup limits, but it may be
-anything that wants to act on a group of processes, e.g. a
-virtualization subsystem.
-
-A *hierarchy* is a set of cgroups arranged in a tree, such that
-every task in the system is in exactly one of the cgroups in the
-hierarchy, and a set of subsystems; each subsystem has system-specific
-state attached to each cgroup in the hierarchy. Each hierarchy has
-an instance of the cgroup virtual filesystem associated with it.
-
-At any one time there may be multiple active hierarchies of task
-cgroups. Each hierarchy is a partition of all tasks in the system.
-
-User-level code may create and destroy cgroups by name in an
-instance of the cgroup virtual file system, specify and query to
-which cgroup a task is assigned, and list the task PIDs assigned to
-a cgroup. Those creations and assignments only affect the hierarchy
-associated with that instance of the cgroup file system.
-
-On their own, the only use for cgroups is for simple job
-tracking. The intention is that other subsystems hook into the generic
-cgroup support to provide new attributes for cgroups, such as
-accounting/limiting the resources which processes in a cgroup can
-access. For example, cpusets (see Documentation/cgroup-v1/cpusets.txt) allow
-you to associate a set of CPUs and a set of memory nodes with the
-tasks in each cgroup.
-
-1.2 Why are cgroups needed ?
-----------------------------
-
-There are multiple efforts to provide process aggregations in the
-Linux kernel, mainly for resource-tracking purposes. Such efforts
-include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
-namespaces. These all require the basic notion of a
-grouping/partitioning of processes, with newly forked processes ending
-up in the same group (cgroup) as their parent process.
-
-The kernel cgroup patch provides the minimum essential kernel
-mechanisms required to efficiently implement such groups. It has
-minimal impact on the system fast paths, and provides hooks for
-specific subsystems such as cpusets to provide additional behaviour as
-desired.
-
-Multiple hierarchy support is provided to allow for situations where
-the division of tasks into cgroups is distinctly different for
-different subsystems - having parallel hierarchies allows each
-hierarchy to be a natural division of tasks, without having to handle
-complex combinations of tasks that would be present if several
-unrelated subsystems needed to be forced into the same tree of
-cgroups.
-
-At one extreme, each resource controller or subsystem could be in a
-separate hierarchy; at the other extreme, all subsystems
-would be attached to the same hierarchy.
-
-As an example of a scenario (originally proposed by vatsa@in.ibm.com)
-that can benefit from multiple hierarchies, consider a large
-university server with various users - students, professors, system
-tasks etc. The resource planning for this server could be along the
-following lines:
-
- CPU : "Top cpuset"
- / \
- CPUSet1 CPUSet2
- | |
- (Professors) (Students)
-
- In addition (system tasks) are attached to topcpuset (so
- that they can run anywhere) with a limit of 20%
-
- Memory : Professors (50%), Students (30%), system (20%)
-
- Disk : Professors (50%), Students (30%), system (20%)
-
- Network : WWW browsing (20%), Network File System (60%), others (20%)
- / \
- Professors (15%) students (5%)
-
-Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
-into the NFS network class.
-
-At the same time Firefox/Lynx will share an appropriate CPU/Memory class
-depending on who launched it (prof/student).
-
-With the ability to classify tasks differently for different resources
-(by putting those resource subsystems in different hierarchies),
-the admin can easily set up a script which receives exec notifications
-and depending on who is launching the browser he can
-
- # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
-
-With only a single hierarchy, he now would potentially have to create
-a separate cgroup for every browser launched and associate it with
-appropriate network and other resource class. This may lead to
-proliferation of such cgroups.
-
-Also let's say that the administrator would like to give enhanced network
-access temporarily to a student's browser (since it is night and the user
-wants to do online gaming :)) OR give one of the student's simulation
-apps enhanced CPU power.
-
-With ability to write PIDs directly to resource classes, it's just a
-matter of:
-
- # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
- (after some time)
- # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
-
-Without this ability, the administrator would have to split the cgroup into
-multiple separate ones and then associate the new cgroups with the
-new resource classes.
-
-
-
-1.3 How are cgroups implemented ?
----------------------------------
-
-Control Groups extends the kernel as follows:
-
- - Each task in the system has a reference-counted pointer to a
- css_set.
-
- - A css_set contains a set of reference-counted pointers to
- cgroup_subsys_state objects, one for each cgroup subsystem
- registered in the system. There is no direct link from a task to
- the cgroup of which it's a member in each hierarchy, but this
- can be determined by following pointers through the
- cgroup_subsys_state objects. This is because accessing the
- subsystem state is something that's expected to happen frequently
- and in performance-critical code, whereas operations that require a
- task's actual cgroup assignments (in particular, moving between
- cgroups) are less common. A linked list runs through the cg_list
- field of each task_struct using the css_set, anchored at
- css_set->tasks.
-
- - A cgroup hierarchy filesystem can be mounted for browsing and
- manipulation from user space.
-
- - You can list all the tasks (by PID) attached to any cgroup.
-
-The implementation of cgroups requires a few, simple hooks
-into the rest of the kernel, none in performance-critical paths:
-
- - in init/main.c, to initialize the root cgroups and initial
- css_set at system boot.
-
- - in fork and exit, to attach and detach a task from its css_set.
-
-In addition, a new file system of type "cgroup" may be mounted, to
-enable browsing and modifying the cgroups presently known to the
-kernel. When mounting a cgroup hierarchy, you may specify a
-comma-separated list of subsystems to mount as the filesystem mount
-options. By default, mounting the cgroup filesystem attempts to
-mount a hierarchy containing all registered subsystems.
-
-If an active hierarchy with exactly the same set of subsystems already
-exists, it will be reused for the new mount. If no existing hierarchy
-matches, and any of the requested subsystems are in use in an existing
-hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
-is activated, associated with the requested subsystems.
-
-It's not currently possible to bind a new subsystem to an active
-cgroup hierarchy, or to unbind a subsystem from an active cgroup
-hierarchy. This may be possible in future, but is fraught with nasty
-error-recovery issues.
-
-When a cgroup filesystem is unmounted, if there are any
-child cgroups created below the top-level cgroup, that hierarchy
-will remain active even though unmounted; if there are no
-child cgroups then the hierarchy will be deactivated.
-
-No new system calls are added for cgroups - all support for
-querying and modifying cgroups is via this cgroup file system.
-
-Each task under /proc has an added file named 'cgroup' displaying,
-for each active hierarchy, the subsystem names and the cgroup name
-as the path relative to the root of the cgroup file system.
-
-Each cgroup is represented by a directory in the cgroup file system
-containing the following files describing that cgroup:
-
- - tasks: list of tasks (by PID) attached to that cgroup. This list
- is not guaranteed to be sorted. Writing a thread ID into this file
- moves the thread into this cgroup.
- - cgroup.procs: list of thread group IDs in the cgroup. This list is
- not guaranteed to be sorted or free of duplicate TGIDs, and userspace
- should sort/uniquify the list if this property is required.
- Writing a thread group ID into this file moves all threads in that
- group into this cgroup.
- - notify_on_release flag: run the release agent on exit?
- - release_agent: the path to use for release notifications (this file
- exists in the top cgroup only)
-
-Other subsystems such as cpusets may add additional files in each
-cgroup dir.
-
-New cgroups are created using the mkdir system call or shell
-command. The properties of a cgroup, such as its flags, are
-modified by writing to the appropriate file in that cgroups
-directory, as listed above.
-
-The named hierarchical structure of nested cgroups allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cgroup allows organizing the work load
-on a system into related sets of tasks. A task may be re-attached to
-any other cgroup, if allowed by the permissions on the necessary
-cgroup file system directories.
-
-When a task is moved from one cgroup to another, it gets a new
-css_set pointer - if there's an already existing css_set with the
-desired collection of cgroups then that group is reused, otherwise a new
-css_set is allocated. The appropriate existing css_set is located by
-looking into a hash table.
-
-To allow access from a cgroup to the css_sets (and hence tasks)
-that comprise it, a set of cg_cgroup_link objects form a lattice;
-each cg_cgroup_link is linked into a list of cg_cgroup_links for
-a single cgroup on its cgrp_link_list field, and a list of
-cg_cgroup_links for a single css_set on its cg_link_list.
-
-Thus the set of tasks in a cgroup can be listed by iterating over
-each css_set that references the cgroup, and sub-iterating over
-each css_set's task set.
-
-The use of a Linux virtual file system (vfs) to represent the
-cgroup hierarchy provides for a familiar permission and name space
-for cgroups, with a minimum of additional kernel code.
-
-1.4 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cgroup, then
-whenever the last task in the cgroup leaves (exits or attaches to
-some other cgroup) and the last child cgroup of that cgroup
-is removed, then the kernel runs the command specified by the contents
-of the "release_agent" file in that hierarchy's root directory,
-supplying the pathname (relative to the mount point of the cgroup
-file system) of the abandoned cgroup. This enables automatic
-removal of abandoned cgroups. The default value of
-notify_on_release in the root cgroup at system boot is disabled
-(0). The default value of other cgroups at creation is the current
-value of their parents' notify_on_release settings. The default value of
-a cgroup hierarchy's release_agent path is empty.
-
-1.5 What does clone_children do ?
----------------------------------
-
-This flag only affects the cpuset controller. If the clone_children
-flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
-configuration from the parent during initialization.
-
-1.6 How do I use cgroups ?
---------------------------
-
-To start a new job that is to be contained within a cgroup, using
-the "cpuset" cgroup subsystem, the steps are something like:
-
- 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
- 2) mkdir /sys/fs/cgroup/cpuset
- 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
- the /sys/fs/cgroup/cpuset virtual file system.
- 5) Start a task that will be the "founding father" of the new job.
- 6) Attach that task to the new cgroup by writing its PID to the
- /sys/fs/cgroup/cpuset tasks file for that cgroup.
- 7) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cgroup
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cgroup:
-
- mount -t tmpfs cgroup_root /sys/fs/cgroup
- mkdir /sys/fs/cgroup/cpuset
- mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
- cd /sys/fs/cgroup/cpuset
- mkdir Charlie
- cd Charlie
- /bin/echo 2-3 > cpuset.cpus
- /bin/echo 1 > cpuset.mems
- /bin/echo $$ > tasks
- sh
- # The subshell 'sh' is now running in cgroup Charlie
- # The next line should display '/Charlie'
- cat /proc/self/cgroup
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using cgroups can be done through the cgroup
-virtual filesystem.
-
-To mount a cgroup hierarchy with all available subsystems, type:
-# mount -t cgroup xxx /sys/fs/cgroup
-
-The "xxx" is not interpreted by the cgroup code, but will appear in
-/proc/mounts so may be any useful identifying string that you like.
-
-Note: Some subsystems do not work without some user input first. For instance,
-if cpusets are enabled the user will have to populate the cpus and mems files
-for each new cgroup created before that group can be used.
-
-As explained in section `1.2 Why are cgroups needed?' you should create
-different hierarchies of cgroups for each single resource or group of
-resources you want to control. Therefore, you should mount a tmpfs on
-/sys/fs/cgroup and create directories for each cgroup resource or resource
-group.
-
-# mount -t tmpfs cgroup_root /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/rg1
-
-To mount a cgroup hierarchy with just the cpuset and memory
-subsystems, type:
-# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
-
-While remounting cgroups is currently supported, it is not recommend
-to use it. Remounting allows changing bound subsystems and
-release_agent. Rebinding is hardly useful as it only works when the
-hierarchy is empty and release_agent itself should be replaced with
-conventional fsnotify. The support for remounting will be removed in
-the future.
-
-To Specify a hierarchy's release_agent:
-# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
- xxx /sys/fs/cgroup/rg1
-
-Note that specifying 'release_agent' more than once will return failure.
-
-Note that changing the set of subsystems is currently only supported
-when the hierarchy consists of a single (root) cgroup. Supporting
-the ability to arbitrarily bind/unbind subsystems from an existing
-cgroup hierarchy is intended to be implemented in the future.
-
-Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
-tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
-is the cgroup that holds the whole system.
-
-If you want to change the value of release_agent:
-# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
-
-It can also be changed via remount.
-
-If you want to create a new cgroup under /sys/fs/cgroup/rg1:
-# cd /sys/fs/cgroup/rg1
-# mkdir my_cgroup
-
-Now you want to do something with this cgroup.
-# cd my_cgroup
-
-In this directory you can find several files:
-# ls
-cgroup.procs notify_on_release tasks
-(plus whatever files added by the attached subsystems)
-
-Now attach your shell to this cgroup:
-# /bin/echo $$ > tasks
-
-You can also create cgroups inside your cgroup by using mkdir in this
-directory.
-# mkdir my_sub_cs
-
-To remove a cgroup, just use rmdir:
-# rmdir my_sub_cs
-
-This will fail if the cgroup is in use (has cgroups inside, or
-has processes attached, or is held alive by other subsystem-specific
-reference).
-
-2.2 Attaching processes
------------------------
-
-# /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
-
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
- ...
-# /bin/echo PIDn > tasks
-
-You can attach the current shell task by echoing 0:
-
-# echo 0 > tasks
-
-You can use the cgroup.procs file instead of the tasks file to move all
-threads in a threadgroup at once. Echoing the PID of any task in a
-threadgroup to cgroup.procs causes all tasks in that threadgroup to be
-attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
-in the writing task's threadgroup.
-
-Note: Since every task is always a member of exactly one cgroup in each
-mounted hierarchy, to remove a task from its current cgroup you must
-move it into a new cgroup (possibly the root cgroup) by writing to the
-new cgroup's tasks file.
-
-Note: Due to some restrictions enforced by some cgroup subsystems, moving
-a process to another cgroup can fail.
-
-2.3 Mounting hierarchies by name
---------------------------------
-
-Passing the name=<x> option when mounting a cgroups hierarchy
-associates the given name with the hierarchy. This can be used when
-mounting a pre-existing hierarchy, in order to refer to it by name
-rather than by its set of active subsystems. Each hierarchy is either
-nameless, or has a unique name.
-
-The name should match [\w.-]+
-
-When passing a name=<x> option for a new hierarchy, you need to
-specify subsystems manually; the legacy behaviour of mounting all
-subsystems when none are explicitly specified is not supported when
-you give a subsystem a name.
-
-The name of the subsystem appears as part of the hierarchy description
-in /proc/mounts and /proc/<pid>/cgroups.
-
-
-3. Kernel API
-=============
-
-3.1 Overview
-------------
-
-Each kernel subsystem that wants to hook into the generic cgroup
-system needs to create a cgroup_subsys object. This contains
-various methods, which are callbacks from the cgroup system, along
-with a subsystem ID which will be assigned by the cgroup system.
-
-Other fields in the cgroup_subsys object include:
-
-- subsys_id: a unique array index for the subsystem, indicating which
- entry in cgroup->subsys[] this subsystem should be managing.
-
-- name: should be initialized to a unique subsystem name. Should be
- no longer than MAX_CGROUP_TYPE_NAMELEN.
-
-- early_init: indicate if the subsystem needs early initialization
- at system boot.
-
-Each cgroup object created by the system has an array of pointers,
-indexed by subsystem ID; this pointer is entirely managed by the
-subsystem; the generic cgroup code will never touch this pointer.
-
-3.2 Synchronization
--------------------
-
-There is a global mutex, cgroup_mutex, used by the cgroup
-system. This should be taken by anything that wants to modify a
-cgroup. It may also be taken to prevent cgroups from being
-modified, but more specific locks may be more appropriate in that
-situation.
-
-See kernel/cgroup.c for more details.
-
-Subsystems can take/release the cgroup_mutex via the functions
-cgroup_lock()/cgroup_unlock().
-
-Accessing a task's cgroup pointer may be done in the following ways:
-- while holding cgroup_mutex
-- while holding the task's alloc_lock (via task_lock())
-- inside an rcu_read_lock() section via rcu_dereference()
-
-3.3 Subsystem API
------------------
-
-Each subsystem should:
-
-- add an entry in linux/cgroup_subsys.h
-- define a cgroup_subsys object called <name>_cgrp_subsys
-
-Each subsystem may export the following methods. The only mandatory
-methods are css_alloc/free. Any others that are null are presumed to
-be successful no-ops.
-
-struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-Called to allocate a subsystem state object for a cgroup. The
-subsystem should allocate its subsystem state object for the passed
-cgroup, returning a pointer to the new object on success or a
-ERR_PTR() value. On success, the subsystem pointer should point to
-a structure of type cgroup_subsys_state (typically embedded in a
-larger subsystem-specific object), which will be initialized by the
-cgroup system. Note that this will be called at initialization to
-create the root subsystem state for this subsystem; this case can be
-identified by the passed cgroup object having a NULL parent (since
-it's the root of the hierarchy) and may be an appropriate place for
-initialization code.
-
-int css_online(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-Called after @cgrp successfully completed all allocations and made
-visible to cgroup_for_each_child/descendant_*() iterators. The
-subsystem may choose to fail creation by returning -errno. This
-callback can be used to implement reliable state sharing and
-propagation along the hierarchy. See the comment on
-cgroup_for_each_descendant_pre() for details.
-
-void css_offline(struct cgroup *cgrp);
-(cgroup_mutex held by caller)
-
-This is the counterpart of css_online() and called iff css_online()
-has succeeded on @cgrp. This signifies the beginning of the end of
-@cgrp. @cgrp is being removed and the subsystem should start dropping
-all references it's holding on @cgrp. When all references are dropped,
-cgroup removal will proceed to the next step - css_free(). After this
-callback, @cgrp should be considered dead to the subsystem.
-
-void css_free(struct cgroup *cgrp)
-(cgroup_mutex held by caller)
-
-The cgroup system is about to free @cgrp; the subsystem should free
-its subsystem state object. By the time this method is called, @cgrp
-is completely unused; @cgrp->parent is still valid. (Note - can also
-be called for a newly-created cgroup if an error occurs after this
-subsystem's create() method has been called for the new cgroup).
-
-int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called prior to moving one or more tasks into a cgroup; if the
-subsystem returns an error, this will abort the attach operation.
-@tset contains the tasks to be attached and is guaranteed to have at
-least one task in it.
-
-If there are multiple tasks in the taskset, then:
- - it's guaranteed that all are from the same thread group
- - @tset contains all tasks from the thread group whether or not
- they're switching cgroups
- - the first task is the leader
-
-Each @tset entry also contains the task's old cgroup and tasks which
-aren't switching cgroup can be skipped easily using the
-cgroup_taskset_for_each() iterator. Note that this isn't called on a
-fork. If this method returns 0 (success) then this should remain valid
-while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future.
-
-void css_reset(struct cgroup_subsys_state *css)
-(cgroup_mutex held by caller)
-
-An optional operation which should restore @css's configuration to the
-initial state. This is currently only used on the unified hierarchy
-when a subsystem is disabled on a cgroup through
-"cgroup.subtree_control" but should remain enabled because other
-subsystems depend on it. cgroup core makes such a css invisible by
-removing the associated interface files and invokes this callback so
-that the hidden subsystem can return to the initial neutral state.
-This prevents unexpected resource control from a hidden css and
-ensures that the configuration is in the initial state when it is made
-visible again later.
-
-void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called when a task attach operation has failed after can_attach() has succeeded.
-A subsystem whose can_attach() has some side-effects should provide this
-function, so that the subsystem can implement a rollback. If not, not necessary.
-This will be called only about subsystems whose can_attach() operation have
-succeeded. The parameters are identical to can_attach().
-
-void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-(cgroup_mutex held by caller)
-
-Called after the task has been attached to the cgroup, to allow any
-post-attachment activity that requires memory allocations or blocking.
-The parameters are identical to can_attach().
-
-void fork(struct task_struct *task)
-
-Called when a task is forked into a cgroup.
-
-void exit(struct task_struct *task)
-
-Called during task exit.
-
-void free(struct task_struct *task)
-
-Called when the task_struct is freed.
-
-void bind(struct cgroup *root)
-(cgroup_mutex held by caller)
-
-Called when a cgroup subsystem is rebound to a different hierarchy
-and root cgroup. Currently this will only involve movement between
-the default hierarchy (which never has sub-cgroups) and a hierarchy
-that is being created/destroyed (and hence has no sub-cgroups).
-
-4. Extended attribute usage
-===========================
-
-cgroup filesystem supports certain types of extended attributes in its
-directories and files. The current supported types are:
- - Trusted (XATTR_TRUSTED)
- - Security (XATTR_SECURITY)
-
-Both require CAP_SYS_ADMIN capability to set.
-
-Like in tmpfs, the extended attributes in cgroup filesystem are stored
-using kernel memory and it's advised to keep the usage at minimum. This
-is the reason why user defined extended attributes are not supported, since
-any user can do it and there's no limit in the value size.
-
-The current known users for this feature are SELinux to limit cgroup usage
-in containers and systemd for assorted meta data like main PID in a cgroup
-(systemd creates a cgroup per service).
-
-5. Questions
-============
-
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
- errors. If you use it in the cgroup file system, you won't be
- able to tell whether a command succeeded or failed.
-
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
- put only ONE PID.
-
diff --git a/Documentation/cgroup-v1/cpuacct.txt b/Documentation/cgroup-v1/cpuacct.txt
deleted file mode 100644
index 9d73cc0cadb9..000000000000
--- a/Documentation/cgroup-v1/cpuacct.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-CPU Accounting Controller
--------------------------
-
-The CPU accounting controller is used to group tasks using cgroups and
-account the CPU usage of these groups of tasks.
-
-The CPU accounting controller supports multi-hierarchy groups. An accounting
-group accumulates the CPU usage of all of its child groups and the tasks
-directly present in its group.
-
-Accounting groups can be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -ocpuacct none /sys/fs/cgroup
-
-With the above step, the initial or the parent accounting group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
-by this group which is essentially the CPU time obtained by all the tasks
-in the system.
-
-New accounting groups can be created under the parent group /sys/fs/cgroup.
-
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it. CPU time consumed by this bash and its children
-can be obtained from g1/cpuacct.usage and the same is accumulated in
-/sys/fs/cgroup/cpuacct.usage also.
-
-cpuacct.stat file lists a few statistics which further divide the
-CPU time obtained by the cgroup into user and system times. Currently
-the following statistics are supported:
-
-user: Time spent by tasks of the cgroup in user mode.
-system: Time spent by tasks of the cgroup in kernel mode.
-
-user and system are in USER_HZ unit.
-
-cpuacct controller uses percpu_counter interface to collect user and
-system times. This has two side effects:
-
-- It is theoretically possible to see wrong values for user and system times.
- This is because percpu_counter_read() on 32bit systems isn't safe
- against concurrent writes.
-- It is possible to see slightly outdated values for user and system times
- due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt
deleted file mode 100644
index 8402dd6de8df..000000000000
--- a/Documentation/cgroup-v1/cpusets.txt
+++ /dev/null
@@ -1,839 +0,0 @@
- CPUSETS
- -------
-
-Copyright (C) 2004 BULL SA.
-Written by Simon.Derr@bull.net
-
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
-Modified by Paul Menage <menage@google.com>
-Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
-
-CONTENTS:
-=========
-
-1. Cpusets
- 1.1 What are cpusets ?
- 1.2 Why are cpusets needed ?
- 1.3 How are cpusets implemented ?
- 1.4 What are exclusive cpusets ?
- 1.5 What is memory_pressure ?
- 1.6 What is memory spread ?
- 1.7 What is sched_load_balance ?
- 1.8 What is sched_relax_domain_level ?
- 1.9 How do I use cpusets ?
-2. Usage Examples and Syntax
- 2.1 Basic Usage
- 2.2 Adding/removing cpus
- 2.3 Setting flags
- 2.4 Attaching processes
-3. Questions
-4. Contact
-
-1. Cpusets
-==========
-
-1.1 What are cpusets ?
-----------------------
-
-Cpusets provide a mechanism for assigning a set of CPUs and Memory
-Nodes to a set of tasks. In this document "Memory Node" refers to
-an on-line node that contains memory.
-
-Cpusets constrain the CPU and Memory placement of tasks to only
-the resources within a task's current cpuset. They form a nested
-hierarchy visible in a virtual file system. These are the essential
-hooks, beyond what is already present, required to manage dynamic
-job placement on large systems.
-
-Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup-v1/cgroups.txt.
-
-Requests by a task, using the sched_setaffinity(2) system call to
-include CPUs in its CPU affinity mask, and using the mbind(2) and
-set_mempolicy(2) system calls to include Memory Nodes in its memory
-policy, are both filtered through that task's cpuset, filtering out any
-CPUs or Memory Nodes not in that cpuset. The scheduler will not
-schedule a task on a CPU that is not allowed in its cpus_allowed
-vector, and the kernel page allocator will not allocate a page on a
-node that is not allowed in the requesting task's mems_allowed vector.
-
-User level code may create and destroy cpusets by name in the cgroup
-virtual file system, manage the attributes and permissions of these
-cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
-specify and query to which cpuset a task is assigned, and list the
-task pids assigned to a cpuset.
-
-
-1.2 Why are cpusets needed ?
-----------------------------
-
-The management of large computer systems, with many processors (CPUs),
-complex memory cache hierarchies and multiple Memory Nodes having
-non-uniform access times (NUMA) presents additional challenges for
-the efficient scheduling and memory placement of processes.
-
-Frequently more modest sized systems can be operated with adequate
-efficiency just by letting the operating system automatically share
-the available CPU and Memory resources amongst the requesting tasks.
-
-But larger systems, which benefit more from careful processor and
-memory placement to reduce memory access times and contention,
-and which typically represent a larger investment for the customer,
-can benefit from explicitly placing jobs on properly sized subsets of
-the system.
-
-This can be especially valuable on:
-
- * Web Servers running multiple instances of the same web application,
- * Servers running different applications (for instance, a web server
- and a database), or
- * NUMA systems running large HPC applications with demanding
- performance characteristics.
-
-These subsets, or "soft partitions" must be able to be dynamically
-adjusted, as the job mix changes, without impacting other concurrently
-executing jobs. The location of the running jobs pages may also be moved
-when the memory locations are changed.
-
-The kernel cpuset patch provides the minimum essential kernel
-mechanisms required to efficiently implement such subsets. It
-leverages existing CPU and Memory Placement facilities in the Linux
-kernel to avoid any additional impact on the critical scheduler or
-memory allocator code.
-
-
-1.3 How are cpusets implemented ?
----------------------------------
-
-Cpusets provide a Linux kernel mechanism to constrain which CPUs and
-Memory Nodes are used by a process or set of processes.
-
-The Linux kernel already has a pair of mechanisms to specify on which
-CPUs a task may be scheduled (sched_setaffinity) and on which Memory
-Nodes it may obtain memory (mbind, set_mempolicy).
-
-Cpusets extends these two mechanisms as follows:
-
- - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
- kernel.
- - Each task in the system is attached to a cpuset, via a pointer
- in the task structure to a reference counted cgroup structure.
- - Calls to sched_setaffinity are filtered to just those CPUs
- allowed in that task's cpuset.
- - Calls to mbind and set_mempolicy are filtered to just
- those Memory Nodes allowed in that task's cpuset.
- - The root cpuset contains all the systems CPUs and Memory
- Nodes.
- - For any cpuset, one can define child cpusets containing a subset
- of the parents CPU and Memory Node resources.
- - The hierarchy of cpusets can be mounted at /dev/cpuset, for
- browsing and manipulation from user space.
- - A cpuset may be marked exclusive, which ensures that no other
- cpuset (except direct ancestors and descendants) may contain
- any overlapping CPUs or Memory Nodes.
- - You can list all the tasks (by pid) attached to any cpuset.
-
-The implementation of cpusets requires a few, simple hooks
-into the rest of the kernel, none in performance critical paths:
-
- - in init/main.c, to initialize the root cpuset at system boot.
- - in fork and exit, to attach and detach a task from its cpuset.
- - in sched_setaffinity, to mask the requested CPUs by what's
- allowed in that task's cpuset.
- - in sched.c migrate_live_tasks(), to keep migrating tasks within
- the CPUs allowed by their cpuset, if possible.
- - in the mbind and set_mempolicy system calls, to mask the requested
- Memory Nodes by what's allowed in that task's cpuset.
- - in page_alloc.c, to restrict memory to allowed nodes.
- - in vmscan.c, to restrict page recovery to the current cpuset.
-
-You should mount the "cgroup" filesystem type in order to enable
-browsing and modifying the cpusets presently known to the kernel. No
-new system calls are added for cpusets - all support for querying and
-modifying cpusets is via this cpuset file system.
-
-The /proc/<pid>/status file for each task has four added lines,
-displaying the task's cpus_allowed (on which CPUs it may be scheduled)
-and mems_allowed (on which Memory Nodes it may obtain memory),
-in the two formats seen in the following example:
-
- Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
- Cpus_allowed_list: 0-127
- Mems_allowed: ffffffff,ffffffff
- Mems_allowed_list: 0-63
-
-Each cpuset is represented by a directory in the cgroup file system
-containing (on top of the standard cgroup files) the following
-files describing that cpuset:
-
- - cpuset.cpus: list of CPUs in that cpuset
- - cpuset.mems: list of Memory Nodes in that cpuset
- - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
- - cpuset.cpu_exclusive flag: is cpu placement exclusive?
- - cpuset.mem_exclusive flag: is memory placement exclusive?
- - cpuset.mem_hardwall flag: is memory allocation hardwalled
- - cpuset.memory_pressure: measure of how much paging pressure in cpuset
- - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
- - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
- - cpuset.sched_relax_domain_level: the searching range when migrating tasks
-
-In addition, only the root cpuset has the following file:
- - cpuset.memory_pressure_enabled flag: compute memory_pressure?
-
-New cpusets are created using the mkdir system call or shell
-command. The properties of a cpuset, such as its flags, allowed
-CPUs and Memory Nodes, and attached tasks, are modified by writing
-to the appropriate file in that cpusets directory, as listed above.
-
-The named hierarchical structure of nested cpusets allows partitioning
-a large system into nested, dynamically changeable, "soft-partitions".
-
-The attachment of each task, automatically inherited at fork by any
-children of that task, to a cpuset allows organizing the work load
-on a system into related sets of tasks such that each set is constrained
-to using the CPUs and Memory Nodes of a particular cpuset. A task
-may be re-attached to any other cpuset, if allowed by the permissions
-on the necessary cpuset file system directories.
-
-Such management of a system "in the large" integrates smoothly with
-the detailed placement done on individual tasks and memory regions
-using the sched_setaffinity, mbind and set_mempolicy system calls.
-
-The following rules apply to each cpuset:
-
- - Its CPUs and Memory Nodes must be a subset of its parents.
- - It can't be marked exclusive unless its parent is.
- - If its cpu or memory is exclusive, they may not overlap any sibling.
-
-These rules, and the natural hierarchy of cpusets, enable efficient
-enforcement of the exclusive guarantee, without having to scan all
-cpusets every time any of them change to ensure nothing overlaps a
-exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
-to represent the cpuset hierarchy provides for a familiar permission
-and name space for cpusets, with a minimum of additional kernel code.
-
-The cpus and mems files in the root (top_cpuset) cpuset are
-read-only. The cpus file automatically tracks the value of
-cpu_online_mask using a CPU hotplug notifier, and the mems file
-automatically tracks the value of node_states[N_MEMORY]--i.e.,
-nodes with memory--using the cpuset_track_online_nodes() hook.
-
-
-1.4 What are exclusive cpusets ?
---------------------------------
-
-If a cpuset is cpu or mem exclusive, no other cpuset, other than
-a direct ancestor or descendant, may share any of the same CPUs or
-Memory Nodes.
-
-A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
-i.e. it restricts kernel allocations for page, buffer and other data
-commonly shared by the kernel across multiple users. All cpusets,
-whether hardwalled or not, restrict allocations of memory for user
-space. This enables configuring a system so that several independent
-jobs can share common kernel data, such as file system pages, while
-isolating each job's user allocation in its own cpuset. To do this,
-construct a large mem_exclusive cpuset to hold all the jobs, and
-construct child, non-mem_exclusive cpusets for each individual job.
-Only a small amount of typical kernel memory, such as requests from
-interrupt handlers, is allowed to be taken outside even a
-mem_exclusive cpuset.
-
-
-1.5 What is memory_pressure ?
------------------------------
-The memory_pressure of a cpuset provides a simple per-cpuset metric
-of the rate that the tasks in a cpuset are attempting to free up in
-use memory on the nodes of the cpuset to satisfy additional memory
-requests.
-
-This enables batch managers monitoring jobs running in dedicated
-cpusets to efficiently detect what level of memory pressure that job
-is causing.
-
-This is useful both on tightly managed systems running a wide mix of
-submitted jobs, which may choose to terminate or re-prioritize jobs that
-are trying to use more memory than allowed on the nodes assigned to them,
-and with tightly coupled, long running, massively parallel scientific
-computing jobs that will dramatically fail to meet required performance
-goals if they start to use more memory than allowed to them.
-
-This mechanism provides a very economical way for the batch manager
-to monitor a cpuset for signs of memory pressure. It's up to the
-batch manager or other user code to decide what to do about it and
-take action.
-
-==> Unless this feature is enabled by writing "1" to the special file
- /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
- code of __alloc_pages() for this metric reduces to simply noticing
- that the cpuset_memory_pressure_enabled flag is zero. So only
- systems that enable this feature will compute the metric.
-
-Why a per-cpuset, running average:
-
- Because this meter is per-cpuset, rather than per-task or mm,
- the system load imposed by a batch scheduler monitoring this
- metric is sharply reduced on large systems, because a scan of
- the tasklist can be avoided on each set of queries.
-
- Because this meter is a running average, instead of an accumulating
- counter, a batch scheduler can detect memory pressure with a
- single read, instead of having to read and accumulate results
- for a period of time.
-
- Because this meter is per-cpuset rather than per-task or mm,
- the batch scheduler can obtain the key information, memory
- pressure in a cpuset, with a single read, rather than having to
- query and accumulate results over all the (dynamically changing)
- set of tasks in the cpuset.
-
-A per-cpuset simple digital filter (requires a spinlock and 3 words
-of data per-cpuset) is kept, and updated by any task attached to that
-cpuset, if it enters the synchronous (direct) page reclaim code.
-
-A per-cpuset file provides an integer number representing the recent
-(half-life of 10 seconds) rate of direct page reclaims caused by
-the tasks in the cpuset, in units of reclaims attempted per second,
-times 1000.
-
-
-1.6 What is memory spread ?
----------------------------
-There are two boolean flag files per cpuset that control where the
-kernel allocates pages for the file system buffers and related in
-kernel data structures. They are called 'cpuset.memory_spread_page' and
-'cpuset.memory_spread_slab'.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
-the kernel will spread the file system buffers (page cache) evenly
-over all the nodes that the faulting task is allowed to use, instead
-of preferring to put those pages on the node where the task is running.
-
-If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
-then the kernel will spread some file system related slab caches,
-such as for inodes and dentries evenly over all the nodes that the
-faulting task is allowed to use, instead of preferring to put those
-pages on the node where the task is running.
-
-The setting of these flags does not affect anonymous data segment or
-stack segment pages of a task.
-
-By default, both kinds of memory spreading are off, and memory
-pages are allocated on the node local to where the task is running,
-except perhaps as modified by the task's NUMA mempolicy or cpuset
-configuration, so long as sufficient free memory pages are available.
-
-When new cpusets are created, they inherit the memory spread settings
-of their parent.
-
-Setting memory spreading causes allocations for the affected page
-or slab caches to ignore the task's NUMA mempolicy and be spread
-instead. Tasks using mbind() or set_mempolicy() calls to set NUMA
-mempolicies will not notice any change in these calls as a result of
-their containing task's memory spread settings. If memory spreading
-is turned off, then the currently specified NUMA mempolicy once again
-applies to memory page allocations.
-
-Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
-files. By default they contain "0", meaning that the feature is off
-for that cpuset. If a "1" is written to that file, then that turns
-the named feature on.
-
-The implementation is simple.
-
-Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
-PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
-joins that cpuset. The page allocation calls for the page cache
-is modified to perform an inline check for this PFA_SPREAD_PAGE task
-flag, and if set, a call to a new routine cpuset_mem_spread_node()
-returns the node to prefer for the allocation.
-
-Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
-PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
-pages from the node returned by cpuset_mem_spread_node().
-
-The cpuset_mem_spread_node() routine is also simple. It uses the
-value of a per-task rotor cpuset_mem_spread_rotor to select the next
-node in the current task's mems_allowed to prefer for the allocation.
-
-This memory placement policy is also known (in other contexts) as
-round-robin or interleave.
-
-This policy can provide substantial improvements for jobs that need
-to place thread local data on the corresponding node, but that need
-to access large file system data sets that need to be spread across
-the several nodes in the jobs cpuset in order to fit. Without this
-policy, especially for jobs that might have one thread reading in the
-data set, the memory allocation across the nodes in the jobs cpuset
-can become very uneven.
-
-1.7 What is sched_load_balance ?
---------------------------------
-
-The kernel scheduler (kernel/sched/core.c) automatically load balances
-tasks. If one CPU is underutilized, kernel code running on that
-CPU will look for tasks on other more overloaded CPUs and move those
-tasks to itself, within the constraints of such placement mechanisms
-as cpusets and sched_setaffinity.
-
-The algorithmic cost of load balancing and its impact on key shared
-kernel data structures such as the task list increases more than
-linearly with the number of CPUs being balanced. So the scheduler
-has support to partition the systems CPUs into a number of sched
-domains such that it only load balances within each sched domain.
-Each sched domain covers some subset of the CPUs in the system;
-no two sched domains overlap; some CPUs might not be in any sched
-domain and hence won't be load balanced.
-
-Put simply, it costs less to balance between two smaller sched domains
-than one big one, but doing so means that overloads in one of the
-two domains won't be load balanced to the other one.
-
-By default, there is one sched domain covering all CPUs, including those
-marked isolated using the kernel boot time "isolcpus=" argument. However,
-the isolated CPUs will not participate in load balancing, and will not
-have tasks running on them unless explicitly assigned.
-
-This default load balancing across all CPUs is not well suited for
-the following two situations:
- 1) On large systems, load balancing across many CPUs is expensive.
- If the system is managed using cpusets to place independent jobs
- on separate sets of CPUs, full load balancing is unnecessary.
- 2) Systems supporting realtime on some CPUs need to minimize
- system overhead on those CPUs, including avoiding task load
- balancing if that is not needed.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
-setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
-be contained in a single sched domain, ensuring that load balancing
-can move a task (not otherwised pinned, as by sched_setaffinity)
-from any CPU in that cpuset to any other.
-
-When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
-scheduler will avoid load balancing across the CPUs in that cpuset,
---except-- in so far as is necessary because some overlapping cpuset
-has "sched_load_balance" enabled.
-
-So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
-enabled, then the scheduler will have one sched domain covering all
-CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
-cpusets won't matter, as we're already fully load balancing.
-
-Therefore in the above two situations, the top cpuset flag
-"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
-child cpusets have this flag enabled.
-
-When doing this, you don't usually want to leave any unpinned tasks in
-the top cpuset that might use non-trivial amounts of CPU, as such tasks
-may be artificially constrained to some subset of CPUs, depending on
-the particulars of this flag setting in descendant cpusets. Even if
-such a task could use spare CPU cycles in some other CPUs, the kernel
-scheduler might not consider the possibility of load balancing that
-task to that underused CPU.
-
-Of course, tasks pinned to a particular CPU can be left in a cpuset
-that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
-else anyway.
-
-There is an impedance mismatch here, between cpusets and sched domains.
-Cpusets are hierarchical and nest. Sched domains are flat; they don't
-overlap and each CPU is in at most one sched domain.
-
-It is necessary for sched domains to be flat because load balancing
-across partially overlapping sets of CPUs would risk unstable dynamics
-that would be beyond our understanding. So if each of two partially
-overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
-form a single sched domain that is a superset of both. We won't move
-a task to a CPU outside its cpuset, but the scheduler load balancing
-code might waste some compute cycles considering that possibility.
-
-This mismatch is why there is not a simple one-to-one relation
-between which cpusets have the flag "cpuset.sched_load_balance" enabled,
-and the sched domain configuration. If a cpuset enables the flag, it
-will get balancing across all its CPUs, but if it disables the flag,
-it will only be assured of no load balancing if no other overlapping
-cpuset enables the flag.
-
-If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
-one of them has this flag enabled, then the other may find its
-tasks only partially load balanced, just on the overlapping CPUs.
-This is just the general case of the top_cpuset example given a few
-paragraphs above. In the general case, as in the top cpuset case,
-don't leave tasks that might use non-trivial amounts of CPU in
-such partially load balanced cpusets, as they may be artificially
-constrained to some subset of the CPUs allowed to them, for lack of
-load balancing to the other CPUs.
-
-CPUs in "cpuset.isolcpus" were excluded from load balancing by the
-isolcpus= kernel boot option, and will never be load balanced regardless
-of the value of "cpuset.sched_load_balance" in any cpuset.
-
-1.7.1 sched_load_balance implementation details.
-------------------------------------------------
-
-The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
-to most cpuset flags.) When enabled for a cpuset, the kernel will
-ensure that it can load balance across all the CPUs in that cpuset
-(makes sure that all the CPUs in the cpus_allowed of that cpuset are
-in the same sched domain.)
-
-If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
-then they will be (must be) both in the same sched domain.
-
-If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
-then by the above that means there is a single sched domain covering
-the whole system, regardless of any other cpuset settings.
-
-The kernel commits to user space that it will avoid load balancing
-where it can. It will pick as fine a granularity partition of sched
-domains as it can while still providing load balancing for any set
-of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
-
-The internal kernel cpuset to scheduler interface passes from the
-cpuset code to the scheduler code a partition of the load balanced
-CPUs in the system. This partition is a set of subsets (represented
-as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
-all the CPUs that must be load balanced.
-
-The cpuset code builds a new such partition and passes it to the
-scheduler sched domain setup code, to have the sched domains rebuilt
-as necessary, whenever:
- - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
- - or CPUs come or go from a cpuset with this flag enabled,
- - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
- and with this flag enabled changes,
- - or a cpuset with non-empty CPUs and with this flag enabled is removed,
- - or a cpu is offlined/onlined.
-
-This partition exactly defines what sched domains the scheduler should
-setup - one sched domain for each element (struct cpumask) in the
-partition.
-
-The scheduler remembers the currently active sched domain partitions.
-When the scheduler routine partition_sched_domains() is invoked from
-the cpuset code to update these sched domains, it compares the new
-partition requested with the current, and updates its sched domains,
-removing the old and adding the new, for each change.
-
-
-1.8 What is sched_relax_domain_level ?
---------------------------------------
-
-In sched domain, the scheduler migrates tasks in 2 ways; periodic load
-balance on tick, and at time of some schedule events.
-
-When a task is woken up, scheduler try to move the task on idle CPU.
-For example, if a task A running on CPU X activates another task B
-on the same CPU X, and if CPU Y is X's sibling and performing idle,
-then scheduler migrate task B to CPU Y so that task B can start on
-CPU Y without waiting task A on CPU X.
-
-And if a CPU run out of tasks in its runqueue, the CPU try to pull
-extra tasks from other busy CPUs to help them before it is going to
-be idle.
-
-Of course it takes some searching cost to find movable tasks and/or
-idle CPUs, the scheduler might not search all CPUs in the domain
-every time. In fact, in some architectures, the searching ranges on
-events are limited in the same socket or node where the CPU locates,
-while the load balance on tick searches all.
-
-For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
-is idle while CPU X and the siblings are busy, scheduler can't migrate
-woken task B from X to Z since it is out of its searching range.
-As the result, task B on CPU X need to wait task A or wait load balance
-on the next tick. For some applications in special situation, waiting
-1 tick may be too long.
-
-The 'cpuset.sched_relax_domain_level' file allows you to request changing
-this searching range as you like. This file takes int value which
-indicates size of searching range in levels ideally as follows,
-otherwise initial value -1 that indicates the cpuset has no request.
-
- -1 : no request. use system default or follow request of others.
- 0 : no search.
- 1 : search siblings (hyperthreads in a core).
- 2 : search cores in a package.
- 3 : search cpus in a node [= system wide on non-NUMA system]
- 4 : search nodes in a chunk of node [on NUMA system]
- 5 : search system wide [on NUMA system]
-
-The system default is architecture dependent. The system default
-can be changed using the relax_domain_level= boot parameter.
-
-This file is per-cpuset and affect the sched domain where the cpuset
-belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
-is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
-there is no sched domain belonging the cpuset.
-
-If multiple cpusets are overlapping and hence they form a single sched
-domain, the largest value among those is used. Be careful, if one
-requests 0 and others are -1 then 0 is used.
-
-Note that modifying this file will have both good and bad effects,
-and whether it is acceptable or not depends on your situation.
-Don't modify this file if you are not sure.
-
-If your situation is:
- - The migration costs between each cpu can be assumed considerably
- small(for you) due to your special application's behavior or
- special hardware support for CPU cache etc.
- - The searching cost doesn't have impact(for you) or you can make
- the searching cost enough small by managing cpuset to compact etc.
- - The latency is required even it sacrifices cache hit rate etc.
-then increasing 'sched_relax_domain_level' would benefit you.
-
-
-1.9 How do I use cpusets ?
---------------------------
-
-In order to minimize the impact of cpusets on critical kernel
-code, such as the scheduler, and due to the fact that the kernel
-does not support one task updating the memory placement of another
-task directly, the impact on a task of changing its cpuset CPU
-or Memory Node placement, or of changing to which cpuset a task
-is attached, is subtle.
-
-If a cpuset has its Memory Nodes modified, then for each task attached
-to that cpuset, the next time that the kernel attempts to allocate
-a page of memory for that task, the kernel will notice the change
-in the task's cpuset, and update its per-task memory placement to
-remain within the new cpusets memory placement. If the task was using
-mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
-its new cpuset, then the task will continue to use whatever subset
-of MPOL_BIND nodes are still allowed in the new cpuset. If the task
-was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
-in the new cpuset, then the task will be essentially treated as if it
-was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
-as queried by get_mempolicy(), doesn't change). If a task is moved
-from one cpuset to another, then the kernel will adjust the task's
-memory placement, as above, the next time that the kernel attempts
-to allocate a page of memory for that task.
-
-If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
-will have its allowed CPU placement changed immediately. Similarly,
-if a task's pid is written to another cpuset's 'tasks' file, then its
-allowed CPU placement is changed immediately. If such a task had been
-bound to some subset of its cpuset using the sched_setaffinity() call,
-the task will be allowed to run on any CPU allowed in its new cpuset,
-negating the effect of the prior sched_setaffinity() call.
-
-In summary, the memory placement of a task whose cpuset is changed is
-updated by the kernel, on the next allocation of a page for that task,
-and the processor placement is updated immediately.
-
-Normally, once a page is allocated (given a physical page
-of main memory) then that page stays on whatever node it
-was allocated, so long as it remains allocated, even if the
-cpusets memory placement policy 'cpuset.mems' subsequently changes.
-If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
-tasks are attached to that cpuset, any pages that task had
-allocated to it on nodes in its previous cpuset are migrated
-to the task's new cpuset. The relative placement of the page within
-the cpuset is preserved during these migration operations if possible.
-For example if the page was on the second valid node of the prior cpuset
-then the page will be placed on the second valid node of the new cpuset.
-
-Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
-'cpuset.mems' file is modified, pages allocated to tasks in that
-cpuset, that were on nodes in the previous setting of 'cpuset.mems',
-will be moved to nodes in the new setting of 'mems.'
-Pages that were not in the task's prior cpuset, or in the cpuset's
-prior 'cpuset.mems' setting, will not be moved.
-
-There is an exception to the above. If hotplug functionality is used
-to remove all the CPUs that are currently assigned to a cpuset,
-then all the tasks in that cpuset will be moved to the nearest ancestor
-with non-empty cpus. But the moving of some (or all) tasks might fail if
-cpuset is bound with another cgroup subsystem which has some restrictions
-on task attaching. In this failing case, those tasks will stay
-in the original cpuset, and the kernel will automatically update
-their cpus_allowed to allow all online CPUs. When memory hotplug
-functionality for removing Memory Nodes is available, a similar exception
-is expected to apply there as well. In general, the kernel prefers to
-violate cpuset placement, over starving a task that has had all
-its allowed CPUs or Memory Nodes taken offline.
-
-There is a second exception to the above. GFP_ATOMIC requests are
-kernel internal allocations that must be satisfied, immediately.
-The kernel may drop some request, in rare cases even panic, if a
-GFP_ATOMIC alloc fails. If the request cannot be satisfied within
-the current task's cpuset, then we relax the cpuset, and look for
-memory anywhere we can find it. It's better to violate the cpuset
-than stress the kernel.
-
-To start a new job that is to be contained within a cpuset, the steps are:
-
- 1) mkdir /sys/fs/cgroup/cpuset
- 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
- the /sys/fs/cgroup/cpuset virtual file system.
- 4) Start a task that will be the "founding father" of the new job.
- 5) Attach that task to the new cpuset by writing its pid to the
- /sys/fs/cgroup/cpuset tasks file for that cpuset.
- 6) fork, exec or clone the job tasks from this founding father task.
-
-For example, the following sequence of commands will setup a cpuset
-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cpuset:
-
- mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
- cd /sys/fs/cgroup/cpuset
- mkdir Charlie
- cd Charlie
- /bin/echo 2-3 > cpuset.cpus
- /bin/echo 1 > cpuset.mems
- /bin/echo $$ > tasks
- sh
- # The subshell 'sh' is now running in cpuset Charlie
- # The next line should display '/Charlie'
- cat /proc/self/cpuset
-
-There are ways to query or modify cpusets:
- - via the cpuset file system directly, using the various cd, mkdir, echo,
- cat, rmdir commands from the shell, or their equivalent from C.
- - via the C library libcpuset.
- - via the C library libcgroup.
- (http://sourceforge.net/projects/libcg/)
- - via the python application cset.
- (http://code.google.com/p/cpuset/)
-
-The sched_setaffinity calls can also be done at the shell prompt using
-SGI's runon or Robert Love's taskset. The mbind and set_mempolicy
-calls can be done at the shell prompt using the numactl command
-(part of Andi Kleen's numa package).
-
-2. Usage Examples and Syntax
-============================
-
-2.1 Basic Usage
----------------
-
-Creating, modifying, using the cpusets can be done through the cpuset
-virtual filesystem.
-
-To mount it, type:
-# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
-
-Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
-tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
-is the cpuset that holds the whole system.
-
-If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
-# cd /sys/fs/cgroup/cpuset
-# mkdir my_cpuset
-
-Now you want to do something with this cpuset.
-# cd my_cpuset
-
-In this directory you can find several files:
-# ls
-cgroup.clone_children cpuset.memory_pressure
-cgroup.event_control cpuset.memory_spread_page
-cgroup.procs cpuset.memory_spread_slab
-cpuset.cpu_exclusive cpuset.mems
-cpuset.cpus cpuset.sched_load_balance
-cpuset.mem_exclusive cpuset.sched_relax_domain_level
-cpuset.mem_hardwall notify_on_release
-cpuset.memory_migrate tasks
-
-Reading them will give you information about the state of this cpuset:
-the CPUs and Memory Nodes it can use, the processes that are using
-it, its properties. By writing to these files you can manipulate
-the cpuset.
-
-Set some flags:
-# /bin/echo 1 > cpuset.cpu_exclusive
-
-Add some cpus:
-# /bin/echo 0-7 > cpuset.cpus
-
-Add some mems:
-# /bin/echo 0-7 > cpuset.mems
-
-Now attach your shell to this cpuset:
-# /bin/echo $$ > tasks
-
-You can also create cpusets inside your cpuset by using mkdir in this
-directory.
-# mkdir my_sub_cs
-
-To remove a cpuset, just use rmdir:
-# rmdir my_sub_cs
-This will fail if the cpuset is in use (has cpusets inside, or has
-processes attached).
-
-Note that for legacy reasons, the "cpuset" filesystem exists as a
-wrapper around the cgroup filesystem.
-
-The command
-
-mount -t cpuset X /sys/fs/cgroup/cpuset
-
-is equivalent to
-
-mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
-echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
-
-2.2 Adding/removing cpus
-------------------------
-
-This is the syntax to use when writing in the cpus or mems files
-in cpuset directories:
-
-# /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
-# /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
-
-To add a CPU to a cpuset, write the new list of CPUs including the
-CPU to be added. To add 6 to the above cpuset:
-
-# /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6
-
-Similarly to remove a CPU from a cpuset, write the new list of CPUs
-without the CPU to be removed.
-
-To remove all the CPUs:
-
-# /bin/echo "" > cpuset.cpus -> clear cpus list
-
-2.3 Setting flags
------------------
-
-The syntax is very simple:
-
-# /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive'
-# /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive'
-
-2.4 Attaching processes
------------------------
-
-# /bin/echo PID > tasks
-
-Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
-
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
- ...
-# /bin/echo PIDn > tasks
-
-
-3. Questions
-============
-
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
- errors. If you use it in the cpuset file system, you won't be
- able to tell whether a command succeeded or failed.
-
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
- put only ONE pid.
-
-4. Contact
-==========
-
-Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/cgroup-v1/devices.txt b/Documentation/cgroup-v1/devices.txt
deleted file mode 100644
index 3c1095ca02ea..000000000000
--- a/Documentation/cgroup-v1/devices.txt
+++ /dev/null
@@ -1,116 +0,0 @@
-Device Whitelist Controller
-
-1. Description:
-
-Implement a cgroup to track and enforce open and mknod restrictions
-on device files. A device cgroup associates a device access
-whitelist with each cgroup. A whitelist entry has 4 fields.
-'type' is a (all), c (char), or b (block). 'all' means it applies
-to all types and all major and minor numbers. Major and minor are
-either an integer or * for all. Access is a composition of r
-(read), w (write), and m (mknod).
-
-The root device cgroup starts with rwm to 'all'. A child device
-cgroup gets a copy of the parent. Administrators can then remove
-devices from the whitelist or add new entries. A child cgroup can
-never receive a device access which is denied by its parent.
-
-2. User Interface
-
-An entry is added using devices.allow, and removed using
-devices.deny. For instance
-
- echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
-
-allows cgroup 1 to read and mknod the device usually known as
-/dev/null. Doing
-
- echo a > /sys/fs/cgroup/1/devices.deny
-
-will remove the default 'a *:* rwm' entry. Doing
-
- echo a > /sys/fs/cgroup/1/devices.allow
-
-will add the 'a *:* rwm' entry to the whitelist.
-
-3. Security
-
-Any task can move itself between cgroups. This clearly won't
-suffice, but we can decide the best way to adequately restrict
-movement as people get some experience with this. We may just want
-to require CAP_SYS_ADMIN, which at least is a separate bit from
-CAP_MKNOD. We may want to just refuse moving to a cgroup which
-isn't a descendant of the current one. Or we may want to use
-CAP_MAC_ADMIN, since we really are trying to lock down root.
-
-CAP_SYS_ADMIN is needed to modify the whitelist or move another
-task to a new cgroup. (Again we'll probably want to change that).
-
-A cgroup may not be granted more permissions than the cgroup's
-parent has.
-
-4. Hierarchy
-
-device cgroups maintain hierarchy by making sure a cgroup never has more
-access permissions than its parent. Every time an entry is written to
-a cgroup's devices.deny file, all its children will have that entry removed
-from their whitelist and all the locally set whitelist entries will be
-re-evaluated. In case one of the locally set whitelist entries would provide
-more access than the cgroup's parent, it'll be removed from the whitelist.
-
-Example:
- A
- / \
- B
-
- group behavior exceptions
- A allow "b 8:* rwm", "c 116:1 rw"
- B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
-
-If a device is denied in group A:
- # echo "c 116:* r" > A/devices.deny
-it'll propagate down and after revalidating B's entries, the whitelist entry
-"c 116:2 rwm" will be removed:
-
- group whitelist entries denied devices
- A all "b 8:* rwm", "c 116:* rw"
- B "c 1:3 rwm", "b 3:* rwm" all the rest
-
-In case parent's exceptions change and local exceptions are not allowed
-anymore, they'll be deleted.
-
-Notice that new whitelist entries will not be propagated:
- A
- / \
- B
-
- group whitelist entries denied devices
- A "c 1:3 rwm", "c 1:5 r" all the rest
- B "c 1:3 rwm", "c 1:5 r" all the rest
-
-when adding "c *:3 rwm":
- # echo "c *:3 rwm" >A/devices.allow
-
-the result:
- group whitelist entries denied devices
- A "c *:3 rwm", "c 1:5 r" all the rest
- B "c 1:3 rwm", "c 1:5 r" all the rest
-
-but now it'll be possible to add new entries to B:
- # echo "c 2:3 rwm" >B/devices.allow
- # echo "c 50:3 r" >B/devices.allow
-or even
- # echo "c *:3 rwm" >B/devices.allow
-
-Allowing or denying all by writing 'a' to devices.allow or devices.deny will
-not be possible once the device cgroups has children.
-
-4.1 Hierarchy (internal implementation)
-
-device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
-list of exceptions. The internal state is controlled using the same user
-interface to preserve compatibility with the previous whitelist-only
-implementation. Removal or addition of exceptions that will reduce the access
-to devices will be propagated down the hierarchy.
-For every propagated exception, the effective rules will be re-evaluated based
-on current parent's access rules.
diff --git a/Documentation/cgroup-v1/freezer-subsystem.txt b/Documentation/cgroup-v1/freezer-subsystem.txt
deleted file mode 100644
index e831cb2b8394..000000000000
--- a/Documentation/cgroup-v1/freezer-subsystem.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-The cgroup freezer is useful to batch job management system which start
-and stop sets of tasks in order to schedule the resources of a machine
-according to the desires of a system administrator. This sort of program
-is often used on HPC clusters to schedule access to the cluster as a
-whole. The cgroup freezer uses cgroups to describe the set of tasks to
-be started/stopped by the batch job management system. It also provides
-a means to start and stop the tasks composing the job.
-
-The cgroup freezer will also be useful for checkpointing running groups
-of tasks. The freezer allows the checkpoint code to obtain a consistent
-image of the tasks by attempting to force the tasks in a cgroup into a
-quiescent state. Once the tasks are quiescent another task can
-walk /proc or invoke a kernel interface to gather information about the
-quiesced tasks. Checkpointed tasks can be restarted later should a
-recoverable error occur. This also allows the checkpointed tasks to be
-migrated between nodes in a cluster by copying the gathered information
-to another node and restarting the tasks there.
-
-Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
-and resuming tasks in userspace. Both of these signals are observable
-from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
-blocked, or ignored it can be seen by waiting or ptracing parent tasks.
-SIGCONT is especially unsuitable since it can be caught by the task. Any
-programs designed to watch for SIGSTOP and SIGCONT could be broken by
-attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
-demonstrate this problem using nested bash shells:
-
- $ echo $$
- 16644
- $ bash
- $ echo $$
- 16690
-
- From a second, unrelated bash shell:
- $ kill -SIGSTOP 16690
- $ kill -SIGCONT 16690
-
- <at this point 16690 exits and causes 16644 to exit too>
-
-This happens because bash can observe both signals and choose how it
-responds to them.
-
-Another example of a program which catches and responds to these
-signals is gdb. In fact any program designed to use ptrace is likely to
-have a problem with this method of stopping and resuming tasks.
-
-In contrast, the cgroup freezer uses the kernel freezer code to
-prevent the freeze/unfreeze cycle from becoming visible to the tasks
-being frozen. This allows the bash example above and gdb to run as
-expected.
-
-The cgroup freezer is hierarchical. Freezing a cgroup freezes all
-tasks belonging to the cgroup and all its descendant cgroups. Each
-cgroup has its own state (self-state) and the state inherited from the
-parent (parent-state). Iff both states are THAWED, the cgroup is
-THAWED.
-
-The following cgroupfs files are created by cgroup freezer.
-
-* freezer.state: Read-write.
-
- When read, returns the effective state of the cgroup - "THAWED",
- "FREEZING" or "FROZEN". This is the combined self and parent-states.
- If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
-
- FREEZING cgroup transitions into FROZEN state when all tasks
- belonging to the cgroup and its descendants become frozen. Note that
- a cgroup reverts to FREEZING from FROZEN after a new task is added
- to the cgroup or one of its descendant cgroups until the new task is
- frozen.
-
- When written, sets the self-state of the cgroup. Two values are
- allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
- if not already freezing, enters FREEZING state along with all its
- descendant cgroups.
-
- If THAWED is written, the self-state of the cgroup is changed to
- THAWED. Note that the effective state may not change to THAWED if
- the parent-state is still freezing. If a cgroup's effective state
- becomes THAWED, all its descendants which are freezing because of
- the cgroup also leave the freezing state.
-
-* freezer.self_freezing: Read only.
-
- Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
- This value is 1 iff the last write to freezer.state was "FROZEN".
-
-* freezer.parent_freezing: Read only.
-
- Shows the parent-state. 0 if none of the cgroup's ancestors is
- frozen; otherwise, 1.
-
-The root cgroup is non-freezable and the above interface files don't
-exist.
-
-* Examples of usage :
-
- # mkdir /sys/fs/cgroup/freezer
- # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
- # mkdir /sys/fs/cgroup/freezer/0
- # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
-
-to get status of the freezer subsystem :
-
- # cat /sys/fs/cgroup/freezer/0/freezer.state
- THAWED
-
-to freeze all tasks in the container :
-
- # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
- # cat /sys/fs/cgroup/freezer/0/freezer.state
- FREEZING
- # cat /sys/fs/cgroup/freezer/0/freezer.state
- FROZEN
-
-to unfreeze all tasks in the container :
-
- # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
- # cat /sys/fs/cgroup/freezer/0/freezer.state
- THAWED
-
-This is the basic mechanism which should do the right thing for user space task
-in a simple scenario.
diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt
deleted file mode 100644
index 106245c3aecc..000000000000
--- a/Documentation/cgroup-v1/hugetlb.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-HugeTLB Controller
--------------------
-
-The HugeTLB controller allows to limit the HugeTLB usage per control group and
-enforces the controller limit during page fault. Since HugeTLB doesn't
-support page reclaim, enforcing the limit at page fault time implies that,
-the application will get SIGBUS signal if it tries to access HugeTLB pages
-beyond its limit. This requires the application to know beforehand how much
-HugeTLB pages it would require for its use.
-
-HugeTLB controller can be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -o hugetlb none /sys/fs/cgroup
-
-With the above step, the initial or the parent HugeTLB group becomes
-visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
-the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-
-New groups can be created under the parent group /sys/fs/cgroup.
-
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
-
-The above steps create a new group g1 and move the current shell
-process (bash) into it.
-
-Brief summary of control files
-
- hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
- hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
- hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
-
-For a system supporting two hugepage size (16M and 16G) the control
-files include:
-
-hugetlb.16GB.limit_in_bytes
-hugetlb.16GB.max_usage_in_bytes
-hugetlb.16GB.usage_in_bytes
-hugetlb.16GB.failcnt
-hugetlb.16MB.limit_in_bytes
-hugetlb.16MB.max_usage_in_bytes
-hugetlb.16MB.usage_in_bytes
-hugetlb.16MB.failcnt
diff --git a/Documentation/cgroup-v1/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.txt
deleted file mode 100644
index 5c7f310f32bb..000000000000
--- a/Documentation/cgroup-v1/memcg_test.txt
+++ /dev/null
@@ -1,280 +0,0 @@
-Memory Resource Controller(Memcg) Implementation Memo.
-Last Updated: 2010/2
-Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
-
-Because VM is getting complex (one of reasons is memcg...), memcg's behavior
-is complex. This is a document for memcg's internal behavior.
-Please note that implementation details can be changed.
-
-(*) Topics on API should be in Documentation/cgroup-v1/memory.txt)
-
-0. How to record usage ?
- 2 objects are used.
-
- page_cgroup ....an object per page.
- Allocated at boot or memory hotplug. Freed at memory hot removal.
-
- swap_cgroup ... an entry per swp_entry.
- Allocated at swapon(). Freed at swapoff().
-
- The page_cgroup has USED bit and double count against a page_cgroup never
- occurs. swap_cgroup is used only when a charged page is swapped-out.
-
-1. Charge
-
- a page/swp_entry may be charged (usage += PAGE_SIZE) at
-
- mem_cgroup_try_charge()
-
-2. Uncharge
- a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
-
- mem_cgroup_uncharge()
- Called when a page's refcount goes down to 0.
-
- mem_cgroup_uncharge_swap()
- Called when swp_entry's refcnt goes down to 0. A charge against swap
- disappears.
-
-3. charge-commit-cancel
- Memcg pages are charged in two steps:
- mem_cgroup_try_charge()
- mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
-
- At try_charge(), there are no flags to say "this page is charged".
- at this point, usage += PAGE_SIZE.
-
- At commit(), the page is associated with the memcg.
-
- At cancel(), simply usage -= PAGE_SIZE.
-
-Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
-
-4. Anonymous
- Anonymous page is newly allocated at
- - page fault into MAP_ANONYMOUS mapping.
- - Copy-On-Write.
-
- 4.1 Swap-in.
- At swap-in, the page is taken from swap-cache. There are 2 cases.
-
- (a) If the SwapCache is newly allocated and read, it has no charges.
- (b) If the SwapCache has been mapped by processes, it has been
- charged already.
-
- 4.2 Swap-out.
- At swap-out, typical state transition is below.
-
- (a) add to swap cache. (marked as SwapCache)
- swp_entry's refcnt += 1.
- (b) fully unmapped.
- swp_entry's refcnt += # of ptes.
- (c) write back to swap.
- (d) delete from swap cache. (remove from SwapCache)
- swp_entry's refcnt -= 1.
-
-
- Finally, at task exit,
- (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-
-5. Page Cache
- Page Cache is charged at
- - add_to_page_cache_locked().
-
- The logic is very clear. (About migration, see below)
- Note: __remove_from_page_cache() is called by remove_from_page_cache()
- and __remove_mapping().
-
-6. Shmem(tmpfs) Page Cache
- The best way to understand shmem's page state transition is to read
- mm/shmem.c.
- But brief explanation of the behavior of memcg around shmem will be
- helpful to understand the logic.
-
- Shmem's page (just leaf page, not direct/indirect block) can be on
- - radix-tree of shmem's inode.
- - SwapCache.
- - Both on radix-tree and SwapCache. This happens at swap-in
- and swap-out,
-
- It's charged when...
- - A new page is added to shmem's radix-tree.
- - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-
-7. Page Migration
-
- mem_cgroup_migrate()
-
-8. LRU
- Each memcg has its own private LRU. Now, its handling is under global
- VM's control (means that it's handled under global zone_lru_lock).
- Almost all routines around memcg's LRU is called by global LRU's
- list management functions under zone_lru_lock().
-
- A special function is mem_cgroup_isolate_pages(). This scans
- memcg's private LRU and call __isolate_lru_page() to extract a page
- from LRU.
- (By __isolate_lru_page(), the page is removed from both of global and
- private LRU.)
-
-
-9. Typical Tests.
-
- Tests for racy cases.
-
- 9.1 Small limit to memcg.
- When you do test to do racy case, it's good test to set memcg's limit
- to be very small rather than GB. Many races found in the test under
- xKB or xxMB limits.
- (Memory behavior under GB and Memory behavior under MB shows very
- different situation.)
-
- 9.2 Shmem
- Historically, memcg's shmem handling was poor and we saw some amount
- of troubles here. This is because shmem is page-cache but can be
- SwapCache. Test with shmem/tmpfs is always good test.
-
- 9.3 Migration
- For NUMA, migration is an another special case. To do easy test, cpuset
- is useful. Following is a sample script to do migration.
-
- mount -t cgroup -o cpuset none /opt/cpuset
-
- mkdir /opt/cpuset/01
- echo 1 > /opt/cpuset/01/cpuset.cpus
- echo 0 > /opt/cpuset/01/cpuset.mems
- echo 1 > /opt/cpuset/01/cpuset.memory_migrate
- mkdir /opt/cpuset/02
- echo 1 > /opt/cpuset/02/cpuset.cpus
- echo 1 > /opt/cpuset/02/cpuset.mems
- echo 1 > /opt/cpuset/02/cpuset.memory_migrate
-
- In above set, when you moves a task from 01 to 02, page migration to
- node 0 to node 1 will occur. Following is a script to migrate all
- under cpuset.
- --
- move_task()
- {
- for pid in $1
- do
- /bin/echo $pid >$2/tasks 2>/dev/null
- echo -n $pid
- echo -n " "
- done
- echo END
- }
-
- G1_TASK=`cat ${G1}/tasks`
- G2_TASK=`cat ${G2}/tasks`
- move_task "${G1_TASK}" ${G2} &
- --
- 9.4 Memory hotplug.
- memory hotplug test is one of good test.
- to offline memory, do following.
- # echo offline > /sys/devices/system/memory/memoryXXX/state
- (XXX is the place of memory)
- This is an easy way to test page migration, too.
-
- 9.5 mkdir/rmdir
- When using hierarchy, mkdir/rmdir test should be done.
- Use tests like the following.
-
- echo 1 >/opt/cgroup/01/memory/use_hierarchy
- mkdir /opt/cgroup/01/child_a
- mkdir /opt/cgroup/01/child_b
-
- set limit to 01.
- add limit to 01/child_b
- run jobs under child_a and child_b
-
- create/delete following groups at random while jobs are running.
- /opt/cgroup/01/child_a/child_aa
- /opt/cgroup/01/child_b/child_bb
- /opt/cgroup/01/child_c
-
- running new jobs in new group is also good.
-
- 9.6 Mount with other subsystems.
- Mounting with other subsystems is a good test because there is a
- race and lock dependency with other cgroup subsystems.
-
- example)
- # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
-
- and do task move, mkdir, rmdir etc...under this.
-
- 9.7 swapoff.
- Besides management of swap is one of complicated parts of memcg,
- call path of swap-in at swapoff is not same as usual swap-in path..
- It's worth to be tested explicitly.
-
- For example, test like following is good.
- (Shell-A)
- # mount -t cgroup none /cgroup -o memory
- # mkdir /cgroup/test
- # echo 40M > /cgroup/test/memory.limit_in_bytes
- # echo 0 > /cgroup/test/tasks
- Run malloc(100M) program under this. You'll see 60M of swaps.
- (Shell-B)
- # move all tasks in /cgroup/test to /cgroup
- # /sbin/swapoff -a
- # rmdir /cgroup/test
- # kill malloc task.
-
- Of course, tmpfs v.s. swapoff test should be tested, too.
-
- 9.8 OOM-Killer
- Out-of-memory caused by memcg's limit will kill tasks under
- the memcg. When hierarchy is used, a task under hierarchy
- will be killed by the kernel.
- In this case, panic_on_oom shouldn't be invoked and tasks
- in other groups shouldn't be killed.
-
- It's not difficult to cause OOM under memcg as following.
- Case A) when you can swapoff
- #swapoff -a
- #echo 50M > /memory.limit_in_bytes
- run 51M of malloc
-
- Case B) when you use mem+swap limitation.
- #echo 50M > memory.limit_in_bytes
- #echo 50M > memory.memsw.limit_in_bytes
- run 51M of malloc
-
- 9.9 Move charges at task migration
- Charges associated with a task can be moved along with task migration.
-
- (Shell-A)
- #mkdir /cgroup/A
- #echo $$ >/cgroup/A/tasks
- run some programs which uses some amount of memory in /cgroup/A.
-
- (Shell-B)
- #mkdir /cgroup/B
- #echo 1 >/cgroup/B/memory.move_charge_at_immigrate
- #echo "pid of the program running in group A" >/cgroup/B/tasks
-
- You can see charges have been moved by reading *.usage_in_bytes or
- memory.stat of both A and B.
- See 8.2 of Documentation/cgroup-v1/memory.txt to see what value should be
- written to move_charge_at_immigrate.
-
- 9.10 Memory thresholds
- Memory controller implements memory thresholds using cgroups notification
- API. You can use tools/cgroup/cgroup_event_listener.c to test it.
-
- (Shell-A) Create cgroup and run event listener
- # mkdir /cgroup/A
- # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
-
- (Shell-B) Add task to cgroup and try to allocate and free memory
- # echo $$ >/cgroup/A/tasks
- # a="$(dd if=/dev/zero bs=1M count=10)"
- # a=
-
- You will see message from cgroup_event_listener every time you cross
- the thresholds.
-
- Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
-
- It's good idea to test root cgroup as well.
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
deleted file mode 100644
index 3682e99234c2..000000000000
--- a/Documentation/cgroup-v1/memory.txt
+++ /dev/null
@@ -1,891 +0,0 @@
-Memory Resource Controller
-
-NOTE: This document is hopelessly outdated and it asks for a complete
- rewrite. It still contains a useful information so we are keeping it
- here but make sure to check the current code if you need a deeper
- understanding.
-
-NOTE: The Memory Resource Controller has generically been referred to as the
- memory controller in this document. Do not confuse memory controller
- used here with the memory controller that is used in hardware.
-
-(For editors)
-In this document:
- When we mention a cgroup (cgroupfs's directory) with memory controller,
- we call it "memory cgroup". When you see git-log and source code, you'll
- see patch's title and function names tend to use "memcg".
- In this document, we avoid using it.
-
-Benefits and Purpose of the memory controller
-
-The memory controller isolates the memory behaviour of a group of tasks
-from the rest of the system. The article on LWN [12] mentions some probable
-uses of the memory controller. The memory controller can be used to
-
-a. Isolate an application or a group of applications
- Memory-hungry applications can be isolated and limited to a smaller
- amount of memory.
-b. Create a cgroup with a limited amount of memory; this can be used
- as a good alternative to booting with mem=XXXX.
-c. Virtualization solutions can control the amount of memory they want
- to assign to a virtual machine instance.
-d. A CD/DVD burner could control the amount of memory used by the
- rest of the system to ensure that burning does not fail due to lack
- of available memory.
-e. There are several other use cases; find one or use the controller just
- for fun (to learn and hack on the VM subsystem).
-
-Current Status: linux-2.6.34-mmotm(development version of 2010/April)
-
-Features:
- - accounting anonymous pages, file caches, swap caches usage and limiting them.
- - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
- - optionally, memory+swap usage can be accounted and limited.
- - hierarchical accounting
- - soft limit
- - moving (recharging) account at moving a task is selectable.
- - usage threshold notifier
- - memory pressure notifier
- - oom-killer disable knob and oom-notifier
- - Root cgroup has no limit controls.
-
- Kernel memory support is a work in progress, and the current version provides
- basically functionality. (See Section 2.7)
-
-Brief summary of control files.
-
- tasks # attach a task(thread) and show list of threads
- cgroup.procs # show list of processes
- cgroup.event_control # an interface for event_fd()
- memory.usage_in_bytes # show current usage for memory
- (See 5.5 for details)
- memory.memsw.usage_in_bytes # show current usage for memory+Swap
- (See 5.5 for details)
- memory.limit_in_bytes # set/show limit of memory usage
- memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
- memory.failcnt # show the number of memory usage hits limits
- memory.memsw.failcnt # show the number of memory+Swap hits limits
- memory.max_usage_in_bytes # show max memory usage recorded
- memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
- memory.soft_limit_in_bytes # set/show soft limit of memory usage
- memory.stat # show various statistics
- memory.use_hierarchy # set/show hierarchical account enabled
- memory.force_empty # trigger forced move charge to parent
- memory.pressure_level # set memory pressure notifications
- memory.swappiness # set/show swappiness parameter of vmscan
- (See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate # set/show controls of moving charges
- memory.oom_control # set/show oom controls.
- memory.numa_stat # show the number of memory usage per numa node
-
- memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
- memory.kmem.usage_in_bytes # show current kernel memory allocation
- memory.kmem.failcnt # show the number of kernel memory usage hits limits
- memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded
-
- memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
- memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
- memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits
- memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded
-
-1. History
-
-The memory controller has a long history. A request for comments for the memory
-controller was posted by Balbir Singh [1]. At the time the RFC was posted
-there were several implementations for memory control. The goal of the
-RFC was to build consensus and agreement for the minimal features required
-for memory control. The first RSS controller was posted by Balbir Singh[2]
-in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
-RSS controller. At OLS, at the resource management BoF, everyone suggested
-that we handle both page cache and RSS together. Another request was raised
-to allow user space handling of OOM. The current memory controller is
-at version 6; it combines both mapped (RSS) and unmapped Page
-Cache Control [11].
-
-2. Memory Control
-
-Memory is a unique resource in the sense that it is present in a limited
-amount. If a task requires a lot of CPU processing, the task can spread
-its processing over a period of hours, days, months or years, but with
-memory, the same physical memory needs to be reused to accomplish the task.
-
-The memory controller implementation has been divided into phases. These
-are:
-
-1. Memory controller
-2. mlock(2) controller
-3. Kernel user memory accounting and slab control
-4. user mappings length controller
-
-The memory controller is the first controller developed.
-
-2.1. Design
-
-The core of the design is a counter called the page_counter. The
-page_counter tracks the current memory usage and limit of the group of
-processes associated with the controller. Each cgroup has a memory controller
-specific data structure (mem_cgroup) associated with it.
-
-2.2. Accounting
-
- +--------------------+
- | mem_cgroup |
- | (page_counter) |
- +--------------------+
- / ^ \
- / | \
- +---------------+ | +---------------+
- | mm_struct | |.... | mm_struct |
- | | | | |
- +---------------+ | +---------------+
- |
- + --------------+
- |
- +---------------+ +------+--------+
- | page +----------> page_cgroup|
- | | | |
- +---------------+ +---------------+
-
- (Figure 1: Hierarchy of Accounting)
-
-
-Figure 1 shows the important aspects of the controller
-
-1. Accounting happens per cgroup
-2. Each mm_struct knows about which cgroup it belongs to
-3. Each page has a pointer to the page_cgroup, which in turn knows the
- cgroup it belongs to
-
-The accounting is done as follows: mem_cgroup_charge_common() is invoked to
-set up the necessary data structures and check if the cgroup that is being
-charged is over its limit. If it is, then reclaim is invoked on the cgroup.
-More details can be found in the reclaim section of this document.
-If everything goes well, a page meta-data-structure called page_cgroup is
-updated. page_cgroup has its own LRU on cgroup.
-(*) page_cgroup structure is allocated at boot/memory-hotplug time.
-
-2.2.1 Accounting details
-
-All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
-Some pages which are never reclaimable and will not be on the LRU
-are not accounted. We just account pages under usual VM management.
-
-RSS pages are accounted at page_fault unless they've already been accounted
-for earlier. A file page will be accounted for as Page Cache when it's
-inserted into inode (radix-tree). While it's mapped into the page tables of
-processes, duplicate accounting is carefully avoided.
-
-An RSS page is unaccounted when it's fully unmapped. A PageCache page is
-unaccounted when it's removed from radix-tree. Even if RSS pages are fully
-unmapped (by kswapd), they may exist as SwapCache in the system until they
-are really freed. Such SwapCaches are also accounted.
-A swapped-in page is not accounted until it's mapped.
-
-Note: The kernel does swapin-readahead and reads multiple swaps at once.
-This means swapped-in pages may contain pages for other tasks than a task
-causing page fault. So, we avoid accounting at swap-in I/O.
-
-At page migration, accounting information is kept.
-
-Note: we just account pages-on-LRU because our purpose is to control amount
-of used pages; not-on-LRU pages tend to be out-of-control from VM view.
-
-2.3 Shared Page Accounting
-
-Shared pages are accounted on the basis of the first touch approach. The
-cgroup that first touches a page is accounted for the page. The principle
-behind this approach is that a cgroup that aggressively uses a shared
-page will eventually get charged for it (once it is uncharged from
-the cgroup that brought it in -- this will happen on memory pressure).
-
-But see section 8.2: when moving a task to another cgroup, its pages may
-be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
-
-Exception: If CONFIG_MEMCG_SWAP is not used.
-When you do swapoff and make swapped-out pages of shmem(tmpfs) to
-be backed into memory in force, charges for pages are accounted against the
-caller of swapoff rather than the users of shmem.
-
-2.4 Swap Extension (CONFIG_MEMCG_SWAP)
-
-Swap Extension allows you to record charge for swap. A swapped-in page is
-charged back to original page allocator if possible.
-
-When swap is accounted, following files are added.
- - memory.memsw.usage_in_bytes.
- - memory.memsw.limit_in_bytes.
-
-memsw means memory+swap. Usage of memory+swap is limited by
-memsw.limit_in_bytes.
-
-Example: Assume a system with 4G of swap. A task which allocates 6G of memory
-(by mistake) under 2G memory limitation will use all swap.
-In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
-By using the memsw limit, you can avoid system OOM which can be caused by swap
-shortage.
-
-* why 'memory+swap' rather than swap.
-The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
-to move account from memory to swap...there is no change in usage of
-memory+swap. In other words, when we want to limit the usage of swap without
-affecting global LRU, memory+swap limit is better than just limiting swap from
-an OS point of view.
-
-* What happens when a cgroup hits memory.memsw.limit_in_bytes
-When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
-in this cgroup. Then, swap-out will not be done by cgroup routine and file
-caches are dropped. But as mentioned above, global LRU can do swapout memory
-from it for sanity of the system's memory management state. You can't forbid
-it by cgroup.
-
-2.5 Reclaim
-
-Each cgroup maintains a per cgroup LRU which has the same structure as
-global VM. When a cgroup goes over its limit, we first try
-to reclaim memory from the cgroup so as to make space for the new
-pages that the cgroup has touched. If the reclaim is unsuccessful,
-an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup. (See 10. OOM Control below.)
-
-The reclaim algorithm has not been modified for cgroups, except that
-pages that are selected for reclaiming come from the per-cgroup LRU
-list.
-
-NOTE: Reclaim does not work for the root cgroup, since we cannot set any
-limits on the root cgroup.
-
-Note2: When panic_on_oom is set to "2", the whole system will panic.
-
-When oom event notifier is registered, event will be delivered.
-(See oom_control section)
-
-2.6 Locking
-
- lock_page_cgroup()/unlock_page_cgroup() should not be called under
- the i_pages lock.
-
- Other lock order is following:
- PG_locked.
- mm->page_table_lock
- zone_lru_lock
- lock_page_cgroup.
- In many cases, just lock_page_cgroup() is called.
- per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
- zone_lru_lock, it has no lock of its own.
-
-2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
-
-With the Kernel memory extension, the Memory Controller is able to limit
-the amount of kernel memory used by the system. Kernel memory is fundamentally
-different than user memory, since it can't be swapped out, which makes it
-possible to DoS the system by consuming too much of this precious resource.
-
-Kernel memory accounting is enabled for all memory cgroups by default. But
-it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
-at boot time. In this case, kernel memory will not be accounted at all.
-
-Kernel memory limits are not imposed for the root cgroup. Usage for the root
-cgroup may or may not be accounted. The memory used is accumulated into
-memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
-(currently only for tcp).
-The main "kmem" counter is fed into the main counter, so kmem charges will
-also be visible from the user counter.
-
-Currently no soft limit is implemented for kernel memory. It is future work
-to trigger slab reclaim when those limits are reached.
-
-2.7.1 Current Kernel Memory resources accounted
-
-* stack pages: every process consumes some stack pages. By accounting into
-kernel memory, we prevent new processes from being created when the kernel
-memory usage is too high.
-
-* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
-of each kmem_cache is created every time the cache is touched by the first time
-from inside the memcg. The creation is done lazily, so some objects can still be
-skipped while the cache is being created. All objects in a slab page should
-belong to the same memcg. This only fails to hold when a task is migrated to a
-different memcg during the page allocation by the cache.
-
-* sockets memory pressure: some sockets protocols have memory pressure
-thresholds. The Memory Controller allows them to be controlled individually
-per cgroup, instead of globally.
-
-* tcp memory pressure: sockets memory pressure for the tcp protocol.
-
-2.7.2 Common use cases
-
-Because the "kmem" counter is fed to the main user counter, kernel memory can
-never be limited completely independently of user memory. Say "U" is the user
-limit, and "K" the kernel limit. There are three possible ways limits can be
-set:
-
- U != 0, K = unlimited:
- This is the standard memcg limitation mechanism already present before kmem
- accounting. Kernel memory is completely ignored.
-
- U != 0, K < U:
- Kernel memory is a subset of the user memory. This setup is useful in
- deployments where the total amount of memory per-cgroup is overcommited.
- Overcommiting kernel memory limits is definitely not recommended, since the
- box can still run out of non-reclaimable memory.
- In this case, the admin could set up K so that the sum of all groups is
- never greater than the total memory, and freely set U at the cost of his
- QoS.
- WARNING: In the current implementation, memory reclaim will NOT be
- triggered for a cgroup when it hits K while staying below U, which makes
- this setup impractical.
-
- U != 0, K >= U:
- Since kmem charges will also be fed to the user counter and reclaim will be
- triggered for the cgroup for both kinds of memory. This setup gives the
- admin a unified view of memory, and it is also useful for people who just
- want to track kernel memory usage.
-
-3. User Interface
-
-3.0. Configuration
-
-a. Enable CONFIG_CGROUPS
-b. Enable CONFIG_MEMCG
-c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
-d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
-
-3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
-# mount -t tmpfs none /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/memory
-# mount -t cgroup none /sys/fs/cgroup/memory -o memory
-
-3.2. Make the new group and move bash into it
-# mkdir /sys/fs/cgroup/memory/0
-# echo $$ > /sys/fs/cgroup/memory/0/tasks
-
-Since now we're in the 0 cgroup, we can alter the memory limit:
-# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-
-NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
-mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
-
-NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
-NOTE: We cannot set limits on the root cgroup any more.
-
-# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-4194304
-
-We can check the usage:
-# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
-1216512
-
-A successful write to this file does not guarantee a successful setting of
-this limit to the value written into the file. This can be due to a
-number of factors, such as rounding up to page boundaries or the total
-availability of memory on the system. The user is required to re-read
-this file after a write to guarantee the value committed by the kernel.
-
-# echo 1 > memory.limit_in_bytes
-# cat memory.limit_in_bytes
-4096
-
-The memory.failcnt field gives the number of times that the cgroup limit was
-exceeded.
-
-The memory.stat file gives accounting information. Now, the number of
-caches, RSS and Active pages/Inactive pages are shown.
-
-4. Testing
-
-For testing features and implementation, see memcg_test.txt.
-
-Performance test is also important. To see pure memory controller's overhead,
-testing on tmpfs will give you good numbers of small overheads.
-Example: do kernel make on tmpfs.
-
-Page-fault scalability is also important. At measuring parallel
-page fault test, multi-process test may be better than multi-thread
-test because it has noise of shared objects/status.
-
-But the above two are testing extreme situations.
-Trying usual test under memory controller is always helpful.
-
-4.1 Troubleshooting
-
-Sometimes a user might find that the application under a cgroup is
-terminated by the OOM killer. There are several causes for this:
-
-1. The cgroup limit is too low (just too low to do anything useful)
-2. The user is using anonymous memory and swap is turned off or too low
-
-A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
-some of the pages cached in the cgroup (page cache pages).
-
-To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
-seeing what happens will be helpful.
-
-4.2 Task migration
-
-When a task migrates from one cgroup to another, its charge is not
-carried forward by default. The pages allocated from the original cgroup still
-remain charged to it, the charge is dropped when the page is freed or
-reclaimed.
-
-You can move charges of a task along with task migration.
-See 8. "Move charges at task migration"
-
-4.3 Removing a cgroup
-
-A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
-cgroup might have some charge associated with it, even though all
-tasks have migrated away from it. (because we charge against pages, not
-against tasks.)
-
-We move the stats to root (if use_hierarchy==0) or parent (if
-use_hierarchy==1), and no change on the charge except uncharging
-from the child.
-
-Charges recorded in swap information is not updated at removal of cgroup.
-Recorded information is discarded and a cgroup which uses swap (swapcache)
-will be charged as a new owner of it.
-
-About use_hierarchy, see Section 6.
-
-5. Misc. interfaces.
-
-5.1 force_empty
- memory.force_empty interface is provided to make cgroup's memory usage empty.
- When writing anything to this
-
- # echo 0 > memory.force_empty
-
- the cgroup will be reclaimed and as many pages reclaimed as possible.
-
- The typical use case for this interface is before calling rmdir().
- Because rmdir() moves all pages to parent, some out-of-use page caches can be
- moved to the parent. If you want to avoid that, force_empty will be useful.
-
- Also, note that when memory.kmem.limit_in_bytes is set the charges due to
- kernel pages will still be seen. This is not considered a failure and the
- write will still return success. In this case, it is expected that
- memory.kmem.usage_in_bytes == memory.usage_in_bytes.
-
- About use_hierarchy, see Section 6.
-
-5.2 stat file
-
-memory.stat file includes following statistics
-
-# per-memory cgroup local status
-cache - # of bytes of page cache memory.
-rss - # of bytes of anonymous and swap cache memory (includes
- transparent hugepages).
-rss_huge - # of bytes of anonymous transparent hugepages.
-mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
-pgpgin - # of charging events to the memory cgroup. The charging
- event happens each time a page is accounted as either mapped
- anon page(RSS) or cache page(Page Cache) to the cgroup.
-pgpgout - # of uncharging events to the memory cgroup. The uncharging
- event happens each time a page is unaccounted from the cgroup.
-swap - # of bytes of swap usage
-dirty - # of bytes that are waiting to get written back to the disk.
-writeback - # of bytes of file/anon cache that are queued for syncing to
- disk.
-inactive_anon - # of bytes of anonymous and swap cache memory on inactive
- LRU list.
-active_anon - # of bytes of anonymous and swap cache memory on active
- LRU list.
-inactive_file - # of bytes of file-backed memory on inactive LRU list.
-active_file - # of bytes of file-backed memory on active LRU list.
-unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
-
-# status considering hierarchy (see memory.use_hierarchy settings)
-
-hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
- under which the memory cgroup is
-hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
- hierarchy under which memory cgroup is.
-
-total_<counter> - # hierarchical version of <counter>, which in
- addition to the cgroup's own value includes the
- sum of all hierarchical children's values of
- <counter>, i.e. total_cache
-
-# The following additional stats are dependent on CONFIG_DEBUG_VM.
-
-recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
-recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
-recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
-recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
-
-Memo:
- recent_rotated means recent frequency of LRU rotation.
- recent_scanned means recent # of scans to LRU.
- showing for better debug please see the code for meanings.
-
-Note:
- Only anonymous and swap cache memory is listed as part of 'rss' stat.
- This should not be confused with the true 'resident set size' or the
- amount of physical memory used by the cgroup.
- 'rss + mapped_file" will give you resident set size of cgroup.
- (Note: file and shmem may be shared among other cgroups. In that case,
- mapped_file is accounted only when the memory cgroup is owner of page
- cache.)
-
-5.3 swappiness
-
-Overrides /proc/sys/vm/swappiness for the particular group. The tunable
-in the root cgroup corresponds to the global swappiness setting.
-
-Please note that unlike during the global reclaim, limit reclaim
-enforces that 0 swappiness really prevents from any swapping even if
-there is a swap storage available. This might lead to memcg OOM killer
-if there are no file pages to reclaim.
-
-5.4 failcnt
-
-A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
-This failcnt(== failure count) shows the number of times that a usage counter
-hit its limit. When a memory cgroup hits a limit, failcnt increases and
-memory under it will be reclaimed.
-
-You can reset failcnt by writing 0 to failcnt file.
-# echo 0 > .../memory.failcnt
-
-5.5 usage_in_bytes
-
-For efficiency, as other kernel components, memory cgroup uses some optimization
-to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
-method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
-value for efficient access. (Of course, when necessary, it's synchronized.)
-If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
-value in memory.stat(see 5.2).
-
-5.6 numa_stat
-
-This is similar to numa_maps but operates on a per-memcg basis. This is
-useful for providing visibility into the numa locality information within
-an memcg since the pages are allowed to be allocated from any physical
-node. One of the use cases is evaluating application performance by
-combining this information with the application's CPU allocation.
-
-Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
-per-node page counts including "hierarchical_<counter>" which sums up all
-hierarchical children's values in addition to the memcg's own value.
-
-The output format of memory.numa_stat is:
-
-total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
-file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
-anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
-
-The "total" count is sum of file + anon + unevictable.
-
-6. Hierarchy support
-
-The memory controller supports a deep hierarchy and hierarchical accounting.
-The hierarchy is created by creating the appropriate cgroups in the
-cgroup filesystem. Consider for example, the following cgroup filesystem
-hierarchy
-
- root
- / | \
- / | \
- a b c
- | \
- | \
- d e
-
-In the diagram above, with hierarchical accounting enabled, all memory
-usage of e, is accounted to its ancestors up until the root (i.e, c and root),
-that has memory.use_hierarchy enabled. If one of the ancestors goes over its
-limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
-children of the ancestor.
-
-6.1 Enabling hierarchical accounting and reclaim
-
-A memory cgroup by default disables the hierarchy feature. Support
-can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
-
-# echo 1 > memory.use_hierarchy
-
-The feature can be disabled by
-
-# echo 0 > memory.use_hierarchy
-
-NOTE1: Enabling/disabling will fail if either the cgroup already has other
- cgroups created below it, or if the parent cgroup has use_hierarchy
- enabled.
-
-NOTE2: When panic_on_oom is set to "2", the whole system will panic in
- case of an OOM event in any cgroup.
-
-7. Soft limits
-
-Soft limits allow for greater sharing of memory. The idea behind soft limits
-is to allow control groups to use as much of the memory as needed, provided
-
-a. There is no memory contention
-b. They do not exceed their hard limit
-
-When the system detects memory contention or low memory, control groups
-are pushed back to their soft limits. If the soft limit of each control
-group is very high, they are pushed back as much as possible to make
-sure that one control group does not starve the others of memory.
-
-Please note that soft limits is a best-effort feature; it comes with
-no guarantees, but it does its best to make sure that when memory is
-heavily contended for, memory is allocated based on the soft limit
-hints/setup. Currently soft limit based reclaim is set up such that
-it gets invoked from balance_pgdat (kswapd).
-
-7.1 Interface
-
-Soft limits can be setup by using the following commands (in this example we
-assume a soft limit of 256 MiB)
-
-# echo 256M > memory.soft_limit_in_bytes
-
-If we want to change this to 1G, we can at any time use
-
-# echo 1G > memory.soft_limit_in_bytes
-
-NOTE1: Soft limits take effect over a long period of time, since they involve
- reclaiming memory for balancing between memory cgroups
-NOTE2: It is recommended to set the soft limit always below the hard limit,
- otherwise the hard limit will take precedence.
-
-8. Move charges at task migration
-
-Users can move charges associated with a task along with task migration, that
-is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
-This feature is not supported in !CONFIG_MMU environments because of lack of
-page tables.
-
-8.1 Interface
-
-This feature is disabled by default. It can be enabled (and disabled again) by
-writing to memory.move_charge_at_immigrate of the destination cgroup.
-
-If you want to enable it:
-
-# echo (some positive value) > memory.move_charge_at_immigrate
-
-Note: Each bits of move_charge_at_immigrate has its own meaning about what type
- of charges should be moved. See 8.2 for details.
-Note: Charges are moved only when you move mm->owner, in other words,
- a leader of a thread group.
-Note: If we cannot find enough space for the task in the destination cgroup, we
- try to make space by reclaiming memory. Task migration may fail if we
- cannot make enough space.
-Note: It can take several seconds if you move charges much.
-
-And if you want disable it again:
-
-# echo 0 > memory.move_charge_at_immigrate
-
-8.2 Type of charges which can be moved
-
-Each bit in move_charge_at_immigrate has its own meaning about what type of
-charges should be moved. But in any case, it must be noted that an account of
-a page or a swap can be moved only when it is charged to the task's current
-(old) memory cgroup.
-
- bit | what type of charges would be moved ?
- -----+------------------------------------------------------------------------
- 0 | A charge of an anonymous page (or swap of it) used by the target task.
- | You must enable Swap Extension (see 2.4) to enable move of swap charges.
- -----+------------------------------------------------------------------------
- 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory)
- | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
- | anonymous pages, file pages (and swaps) in the range mmapped by the task
- | will be moved even if the task hasn't done page fault, i.e. they might
- | not be the task's "RSS", but other task's "RSS" that maps the same file.
- | And mapcount of the page is ignored (the page can be moved even if
- | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to
- | enable move of swap charges.
-
-8.3 TODO
-
-- All of moving charge operations are done under cgroup_mutex. It's not good
- behavior to hold the mutex too long, so we may need some trick.
-
-9. Memory thresholds
-
-Memory cgroup implements memory thresholds using the cgroups notification
-API (see cgroups.txt). It allows to register multiple memory and memsw
-thresholds and gets notifications when it crosses.
-
-To register a threshold, an application must:
-- create an eventfd using eventfd(2);
-- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
-- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
- cgroup.event_control.
-
-Application will be notified through eventfd when memory usage crosses
-threshold in any direction.
-
-It's applicable for root and non-root cgroup.
-
-10. OOM Control
-
-memory.oom_control file is for OOM notification and other controls.
-
-Memory cgroup implements OOM notifier using the cgroup notification
-API (See cgroups.txt). It allows to register multiple OOM notification
-delivery and gets notification when OOM happens.
-
-To register a notifier, an application must:
- - create an eventfd using eventfd(2)
- - open memory.oom_control file
- - write string like "<event_fd> <fd of memory.oom_control>" to
- cgroup.event_control
-
-The application will be notified through eventfd when OOM happens.
-OOM notification doesn't work for the root cgroup.
-
-You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
-
- #echo 1 > memory.oom_control
-
-If OOM-killer is disabled, tasks under cgroup will hang/sleep
-in memory cgroup's OOM-waitqueue when they request accountable memory.
-
-For running them, you have to relax the memory cgroup's OOM status by
- * enlarge limit or reduce usage.
-To reduce usage,
- * kill some tasks.
- * move some tasks to other group with account migration.
- * remove some files (on tmpfs?)
-
-Then, stopped tasks will work again.
-
-At reading, current status of OOM is shown.
- oom_kill_disable 0 or 1 (if 1, oom-killer is disabled)
- under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may
- be stopped.)
-
-11. Memory Pressure
-
-The pressure level notifications can be used to monitor the memory
-allocation cost; based on the pressure, applications can implement
-different strategies of managing their memory resources. The pressure
-levels are defined as following:
-
-The "low" level means that the system is reclaiming memory for new
-allocations. Monitoring this reclaiming activity might be useful for
-maintaining cache level. Upon notification, the program (typically
-"Activity Manager") might analyze vmstat and act in advance (i.e.
-prematurely shutdown unimportant services).
-
-The "medium" level means that the system is experiencing medium memory
-pressure, the system might be making swap, paging out active file caches,
-etc. Upon this event applications may decide to further analyze
-vmstat/zoneinfo/memcg or internal memory usage statistics and free any
-resources that can be easily reconstructed or re-read from a disk.
-
-The "critical" level means that the system is actively thrashing, it is
-about to out of memory (OOM) or even the in-kernel OOM killer is on its
-way to trigger. Applications should do whatever they can to help the
-system. It might be too late to consult with vmstat or any other
-statistics, so it's advisable to take an immediate action.
-
-By default, events are propagated upward until the event is handled, i.e. the
-events are not pass-through. For example, you have three cgroups: A->B->C. Now
-you set up an event listener on cgroups A, B and C, and suppose group C
-experiences some pressure. In this situation, only group C will receive the
-notification, i.e. groups A and B will not receive it. This is done to avoid
-excessive "broadcasting" of messages, which disturbs the system and which is
-especially bad if we are low on memory or thrashing. Group B, will receive
-notification only if there are no event listers for group C.
-
-There are three optional modes that specify different propagation behavior:
-
- - "default": this is the default behavior specified above. This mode is the
- same as omitting the optional mode parameter, preserved by backwards
- compatibility.
-
- - "hierarchy": events always propagate up to the root, similar to the default
- behavior, except that propagation continues regardless of whether there are
- event listeners at each level, with the "hierarchy" mode. In the above
- example, groups A, B, and C will receive notification of memory pressure.
-
- - "local": events are pass-through, i.e. they only receive notifications when
- memory pressure is experienced in the memcg for which the notification is
- registered. In the above example, group C will receive notification if
- registered for "local" notification and the group experiences memory
- pressure. However, group B will never receive notification, regardless if
- there is an event listener for group C or not, if group B is registered for
- local notification.
-
-The level and event notification mode ("hierarchy" or "local", if necessary) are
-specified by a comma-delimited string, i.e. "low,hierarchy" specifies
-hierarchical, pass-through, notification for all ancestor memcgs. Notification
-that is the default, non pass-through behavior, does not specify a mode.
-"medium,local" specifies pass-through notification for the medium level.
-
-The file memory.pressure_level is only used to setup an eventfd. To
-register a notification, an application must:
-
-- create an eventfd using eventfd(2);
-- open memory.pressure_level;
-- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
- to cgroup.event_control.
-
-Application will be notified through eventfd when memory pressure is at
-the specific level (or higher). Read/write operations to
-memory.pressure_level are no implemented.
-
-Test:
-
- Here is a small script example that makes a new cgroup, sets up a
- memory limit, sets up a notification in the cgroup and then makes child
- cgroup experience a critical pressure:
-
- # cd /sys/fs/cgroup/memory/
- # mkdir foo
- # cd foo
- # cgroup_event_listener memory.pressure_level low,hierarchy &
- # echo 8000000 > memory.limit_in_bytes
- # echo 8000000 > memory.memsw.limit_in_bytes
- # echo $$ > tasks
- # dd if=/dev/zero | read x
-
- (Expect a bunch of notifications, and eventually, the oom-killer will
- trigger.)
-
-12. TODO
-
-1. Make per-cgroup scanner reclaim not-shared pages first
-2. Teach controller to account for shared-pages
-3. Start reclamation in the background when the limit is
- not yet hit but the usage is getting closer
-
-Summary
-
-Overall, the memory controller has been a stable controller and has been
-commented and discussed quite extensively in the community.
-
-References
-
-1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
-2. Singh, Balbir. Memory Controller (RSS Control),
- http://lwn.net/Articles/222762/
-3. Emelianov, Pavel. Resource controllers based on process cgroups
- http://lkml.org/lkml/2007/3/6/198
-4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
- http://lkml.org/lkml/2007/4/9/78
-5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
- http://lkml.org/lkml/2007/5/30/244
-6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
-7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
- subsystem (v3), http://lwn.net/Articles/235534/
-8. Singh, Balbir. RSS controller v2 test results (lmbench),
- http://lkml.org/lkml/2007/5/17/232
-9. Singh, Balbir. RSS controller v2 AIM9 results
- http://lkml.org/lkml/2007/5/18/1
-10. Singh, Balbir. Memory controller v6 test results,
- http://lkml.org/lkml/2007/8/19/36
-11. Singh, Balbir. Memory controller introduction (v6),
- http://lkml.org/lkml/2007/8/17/69
-12. Corbet, Jonathan, Controlling memory use in cgroups,
- http://lwn.net/Articles/243795/
diff --git a/Documentation/cgroup-v1/net_cls.txt b/Documentation/cgroup-v1/net_cls.txt
deleted file mode 100644
index ec182346dea2..000000000000
--- a/Documentation/cgroup-v1/net_cls.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Network classifier cgroup
--------------------------
-
-The Network classifier cgroup provides an interface to
-tag network packets with a class identifier (classid).
-
-The Traffic Controller (tc) can be used to assign
-different priorities to packets from different cgroups.
-Also, Netfilter (iptables) can use this tag to perform
-actions on such packets.
-
-Creating a net_cls cgroups instance creates a net_cls.classid file.
-This net_cls.classid value is initialized to 0.
-
-You can write hexadecimal values to net_cls.classid; the format for these
-values is 0xAAAABBBB; AAAA is the major handle number and BBBB
-is the minor handle number.
-Reading net_cls.classid yields a decimal result.
-
-Example:
-mkdir /sys/fs/cgroup/net_cls
-mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
-mkdir /sys/fs/cgroup/net_cls/0
-echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid
- - setting a 10:1 handle.
-
-cat /sys/fs/cgroup/net_cls/0/net_cls.classid
-1048577
-
-configuring tc:
-tc qdisc add dev eth0 root handle 10: htb
-
-tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
- - creating traffic class 10:1
-
-tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
-
-configuring iptables, basic example:
-iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroup-v1/net_prio.txt b/Documentation/cgroup-v1/net_prio.txt
deleted file mode 100644
index a82cbd28ea8a..000000000000
--- a/Documentation/cgroup-v1/net_prio.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Network priority cgroup
--------------------------
-
-The Network priority cgroup provides an interface to allow an administrator to
-dynamically set the priority of network traffic generated by various
-applications
-
-Nominally, an application would set the priority of its traffic via the
-SO_PRIORITY socket option. This however, is not always possible because:
-
-1) The application may not have been coded to set this value
-2) The priority of application traffic is often a site-specific administrative
- decision rather than an application defined one.
-
-This cgroup allows an administrator to assign a process to a group which defines
-the priority of egress traffic on a given interface. Network priority groups can
-be created by first mounting the cgroup filesystem.
-
-# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
-
-With the above step, the initial group acting as the parent accounting group
-becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in
-the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
-
-Each net_prio cgroup contains two files that are subsystem specific
-
-net_prio.prioidx
-This file is read-only, and is simply informative. It contains a unique integer
-value that the kernel uses as an internal representation of this cgroup.
-
-net_prio.ifpriomap
-This file contains a map of the priorities assigned to traffic originating from
-processes in this group and egressing the system on various interfaces. It
-contains a list of tuples in the form <ifname priority>. Contents of this file
-can be modified by echoing a string into the file using the same tuple format.
-for example:
-
-echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
-
-This command would force any traffic originating from processes belonging to the
-iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
-said traffic set to the value 5. The parent accounting group also has a
-writeable 'net_prio.ifpriomap' file that can be used to set a system default
-priority.
-
-Priorities are set immediately prior to queueing a frame to the device
-queueing discipline (qdisc) so priorities will be assigned prior to the hardware
-queue selection being made.
-
-One usage for the net_prio cgroup is with mqprio qdisc allowing application
-traffic to be steered to hardware/driver based traffic classes. These mappings
-can then be managed by administrators or other networking protocols such as
-DCBX.
-
-A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/cgroup-v1/pids.txt b/Documentation/cgroup-v1/pids.txt
deleted file mode 100644
index 1a078b5d281a..000000000000
--- a/Documentation/cgroup-v1/pids.txt
+++ /dev/null
@@ -1,85 +0,0 @@
- Process Number Controller
- =========================
-
-Abstract
---------
-
-The process number controller is used to allow a cgroup hierarchy to stop any
-new tasks from being fork()'d or clone()'d after a certain limit is reached.
-
-Since it is trivial to hit the task limit without hitting any kmemcg limits in
-place, PIDs are a fundamental resource. As such, PID exhaustion must be
-preventable in the scope of a cgroup hierarchy by allowing resource limiting of
-the number of tasks in a cgroup.
-
-Usage
------
-
-In order to use the `pids` controller, set the maximum number of tasks in
-pids.max (this is not available in the root cgroup for obvious reasons). The
-number of processes currently in the cgroup is given by pids.current.
-
-Organisational operations are not blocked by cgroup policies, so it is possible
-to have pids.current > pids.max. This can be done by either setting the limit to
-be smaller than pids.current, or attaching enough processes to the cgroup such
-that pids.current > pids.max. However, it is not possible to violate a cgroup
-policy through fork() or clone(). fork() and clone() will return -EAGAIN if the
-creation of a new process would cause a cgroup policy to be violated.
-
-To set a cgroup to have no limit, set pids.max to "max". This is the default for
-all new cgroups (N.B. that PID limits are hierarchical, so the most stringent
-limit in the hierarchy is followed).
-
-pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
-superset of parent/child/pids.current.
-
-Example
--------
-
-First, we mount the pids controller:
-# mkdir -p /sys/fs/cgroup/pids
-# mount -t cgroup -o pids none /sys/fs/cgroup/pids
-
-Then we create a hierarchy, set limits and attach processes to it:
-# mkdir -p /sys/fs/cgroup/pids/parent/child
-# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
-# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-#
-
-It should be noted that attempts to overcome the set limit (2 in this case) will
-fail:
-
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
-
-Even if we migrate to a child cgroup (which doesn't have a set limit), we will
-not be able to overcome the most stringent limit in the hierarchy (in this case,
-parent's):
-
-# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.max
-max
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
-
-We can set a limit that is smaller than pids.current, which will stop any new
-processes from being forked at all (note that the shell itself counts towards
-pids.current):
-
-# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-#
diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.txt
deleted file mode 100644
index 9bdb7fd03f83..000000000000
--- a/Documentation/cgroup-v1/rdma.txt
+++ /dev/null
@@ -1,109 +0,0 @@
- RDMA Controller
- ----------------
-
-Contents
---------
-
-1. Overview
- 1-1. What is RDMA controller?
- 1-2. Why RDMA controller needed?
- 1-3. How is RDMA controller implemented?
-2. Usage Examples
-
-1. Overview
-
-1-1. What is RDMA controller?
------------------------------
-
-RDMA controller allows user to limit RDMA/IB specific resources that a given
-set of processes can use. These processes are grouped using RDMA controller.
-
-RDMA controller defines two resources which can be limited for processes of a
-cgroup.
-
-1-2. Why RDMA controller needed?
---------------------------------
-
-Currently user space applications can easily take away all the rdma verb
-specific resources such as AH, CQ, QP, MR etc. Due to which other applications
-in other cgroup or kernel space ULPs may not even get chance to allocate any
-rdma resources. This can lead to service unavailability.
-
-Therefore RDMA controller is needed through which resource consumption
-of processes can be limited. Through this controller different rdma
-resources can be accounted.
-
-1-3. How is RDMA controller implemented?
-----------------------------------------
-
-RDMA cgroup allows limit configuration of resources. Rdma cgroup maintains
-resource accounting per cgroup, per device using resource pool structure.
-Each such resource pool is limited up to 64 resources in given resource pool
-by rdma cgroup, which can be extended later if required.
-
-This resource pool object is linked to the cgroup css. Typically there
-are 0 to 4 resource pool instances per cgroup, per device in most use cases.
-But nothing limits to have it more. At present hundreds of RDMA devices per
-single cgroup may not be handled optimally, however there is no
-known use case or requirement for such configuration either.
-
-Since RDMA resources can be allocated from any process and can be freed by any
-of the child processes which shares the address space, rdma resources are
-always owned by the creator cgroup css. This allows process migration from one
-to other cgroup without major complexity of transferring resource ownership;
-because such ownership is not really present due to shared nature of
-rdma resources. Linking resources around css also ensures that cgroups can be
-deleted after processes migrated. This allow progress migration as well with
-active resources, even though that is not a primary use case.
-
-Whenever RDMA resource charging occurs, owner rdma cgroup is returned to
-the caller. Same rdma cgroup should be passed while uncharging the resource.
-This also allows process migrated with active RDMA resource to charge
-to new owner cgroup for new resource. It also allows to uncharge resource of
-a process from previously charged cgroup which is migrated to new cgroup,
-even though that is not a primary use case.
-
-Resource pool object is created in following situations.
-(a) User sets the limit and no previous resource pool exist for the device
-of interest for the cgroup.
-(b) No resource limits were configured, but IB/RDMA stack tries to
-charge the resource. So that it correctly uncharge them when applications are
-running without limits and later on when limits are enforced during uncharging,
-otherwise usage count will drop to negative.
-
-Resource pool is destroyed if all the resource limits are set to max and
-it is the last resource getting deallocated.
-
-User should set all the limit to max value if it intents to remove/unconfigure
-the resource pool for a particular device.
-
-IB stack honors limits enforced by the rdma controller. When application
-query about maximum resource limits of IB device, it returns minimum of
-what is configured by user for a given cgroup and what is supported by
-IB device.
-
-Following resources can be accounted by rdma controller.
- hca_handle Maximum number of HCA Handles
- hca_object Maximum number of HCA Objects
-
-2. Usage Examples
------------------
-
-(a) Configure resource limit:
-echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
-echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
-
-(b) Query resource limit:
-cat /sys/fs/cgroup/rdma/2/rdma.max
-#Output:
-mlx4_0 hca_handle=2 hca_object=2000
-ocrdma1 hca_handle=3 hca_object=max
-
-(c) Query current usage:
-cat /sys/fs/cgroup/rdma/2/rdma.current
-#Output:
-mlx4_0 hca_handle=1 hca_object=20
-ocrdma1 hca_handle=1 hca_object=23
-
-(d) Delete resource limit:
-echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/clearing-warn-once.txt b/Documentation/clearing-warn-once.txt
deleted file mode 100644
index 5b1f5d547be1..000000000000
--- a/Documentation/clearing-warn-once.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-
-WARN_ONCE / WARN_ON_ONCE only print a warning once.
-
-echo 1 > /sys/kernel/debug/clear_warn_once
-
-clears the state and allows the warnings to print once again.
-This can be useful after test suite runs to reproduce problems.
diff --git a/Documentation/cma/debugfs.txt b/Documentation/cma/debugfs.txt
deleted file mode 100644
index 6cef20a8cedc..000000000000
--- a/Documentation/cma/debugfs.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-The CMA debugfs interface is useful to retrieve basic information out of the
-different CMA areas and to test allocation/release in each of the areas.
-
-Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
-kernel's CMA index. So the first CMA zone would be:
-
- <debugfs>/cma/cma-0
-
-The structure of the files created under that directory is as follows:
-
- - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
- - [RO] count: Amount of memory in the CMA area.
- - [RO] order_per_bit: Order of pages represented by one bit.
- - [RO] bitmap: The bitmap of page states in the zone.
- - [WO] alloc: Allocate N pages from that CMA area. For example:
-
- echo 5 > <debugfs>/cma/cma-2/alloc
-
-would try to allocate 5 pages from the cma-2 area.
-
- - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 72647a38b5c2..574896cca198 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -1,71 +1,276 @@
-# -*- coding: utf-8 -*-
-#
-# The Linux Kernel documentation build configuration file, created by
-# sphinx-quickstart on Fri Feb 12 13:51:46 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
+# SPDX-License-Identifier: GPL-2.0-only
+# pylint: disable=C0103,C0209
+
+"""
+The Linux Kernel documentation build configuration file.
+"""
-import sys
import os
-import sphinx
+import shutil
+import sys
-# Get Sphinx version
-major, minor, patch = sphinx.version_info[:3]
+from textwrap import dedent
+import sphinx
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('sphinx'))
-from load_config import loadConfig
+sys.path.insert(0, os.path.abspath("sphinx"))
+
+from load_config import loadConfig # pylint: disable=C0413,E0401
+
+# Minimal supported version
+needs_sphinx = "3.4.3"
+
+# Get Sphinx version
+major, minor, patch = sphinx.version_info[:3] # pylint: disable=I1101
+
+# Include_patterns were added on Sphinx 5.1
+if (major < 5) or (major == 5 and minor < 1):
+ has_include_patterns = False
+else:
+ has_include_patterns = True
+ # Include patterns that don't contain directory names, in glob format
+ include_patterns = ["**.rst"]
+
+# Location of Documentation/ directory
+doctree = os.path.abspath(".")
+
+# Exclude of patterns that don't contain directory names, in glob format.
+exclude_patterns = []
+
+# List of patterns that contain directory names in glob format.
+dyn_include_patterns = []
+dyn_exclude_patterns = ["output"]
+
+# Currently, only netlink/specs has a parser for yaml.
+# Prefer using include patterns if available, as it is faster
+if has_include_patterns:
+ dyn_include_patterns.append("netlink/specs/*.yaml")
+else:
+ dyn_exclude_patterns.append("netlink/*.yaml")
+ dyn_exclude_patterns.append("devicetree/bindings/**.yaml")
+ dyn_exclude_patterns.append("core-api/kho/bindings/**.yaml")
+
+# Properly handle directory patterns and LaTeX docs
+# -------------------------------------------------
+
+def config_init(app, config):
+ """
+ Initialize path-dependent variabled
+
+ On Sphinx, all directories are relative to what it is passed as
+ SOURCEDIR parameter for sphinx-build. Due to that, all patterns
+ that have directory names on it need to be dynamically set, after
+ converting them to a relative patch.
+
+ As Sphinx doesn't include any patterns outside SOURCEDIR, we should
+ exclude relative patterns that start with "../".
+ """
+
+ # setup include_patterns dynamically
+ if has_include_patterns:
+ for p in dyn_include_patterns:
+ full = os.path.join(doctree, p)
+
+ rel_path = os.path.relpath(full, start=app.srcdir)
+ if rel_path.startswith("../"):
+ continue
+
+ config.include_patterns.append(rel_path)
+
+ # setup exclude_patterns dynamically
+ for p in dyn_exclude_patterns:
+ full = os.path.join(doctree, p)
+
+ rel_path = os.path.relpath(full, start=app.srcdir)
+ if rel_path.startswith("../"):
+ continue
+
+ config.exclude_patterns.append(rel_path)
+
+ # LaTeX and PDF output require a list of documents with are dependent
+ # of the app.srcdir. Add them here
+
+ # When SPHINXDIRS is used, we just need to get index.rst, if it exists
+ if not os.path.samefile(doctree, app.srcdir):
+ doc = os.path.basename(app.srcdir)
+ fname = "index"
+ if os.path.exists(os.path.join(app.srcdir, fname + ".rst")):
+ latex_documents.append((fname, doc + ".tex",
+ "Linux %s Documentation" % doc.capitalize(),
+ "The kernel development community",
+ "manual"))
+ return
+
+ # When building all docs, or when a main index.rst doesn't exist, seek
+ # for it on subdirectories
+ for doc in os.listdir(app.srcdir):
+ fname = os.path.join(doc, "index")
+ if not os.path.exists(os.path.join(app.srcdir, fname + ".rst")):
+ continue
+
+ has = False
+ for l in latex_documents:
+ if l[0] == fname:
+ has = True
+ break
+
+ if not has:
+ latex_documents.append((fname, doc + ".tex",
+ "Linux %s Documentation" % doc.capitalize(),
+ "The kernel development community",
+ "manual"))
+
+# helper
+# ------
+
+
+def have_command(cmd):
+ """Search ``cmd`` in the ``PATH`` environment.
+
+ If found, return True.
+ If not found, return False.
+ """
+ return shutil.which(cmd) is not None
+
# -- General configuration ------------------------------------------------
-# If your documentation needs a minimal Sphinx version, state it here.
-needs_sphinx = '1.3'
+# Add any Sphinx extensions in alphabetic order
+extensions = [
+ "automarkup",
+ "kernel_abi",
+ "kerneldoc",
+ "kernel_feat",
+ "kernel_include",
+ "kfigure",
+ "maintainers_include",
+ "parser_yaml",
+ "rstFlatTable",
+ "sphinx.ext.autosectionlabel",
+ "sphinx.ext.ifconfig",
+ "translations",
+]
+# Since Sphinx version 3, the C function parser is more pedantic with regards
+# to type checking. Due to that, having macros at c:function cause problems.
+# Those needed to be escaped by using c_id_attributes[] array
+c_id_attributes = [
+ # GCC Compiler types not parsed by Sphinx:
+ "__restrict__",
+
+ # include/linux/compiler_types.h:
+ "__iomem",
+ "__kernel",
+ "noinstr",
+ "notrace",
+ "__percpu",
+ "__rcu",
+ "__user",
+ "__force",
+ "__counted_by_le",
+ "__counted_by_be",
+
+ # include/linux/compiler_attributes.h:
+ "__alias",
+ "__aligned",
+ "__aligned_largest",
+ "__always_inline",
+ "__assume_aligned",
+ "__cold",
+ "__attribute_const__",
+ "__copy",
+ "__pure",
+ "__designated_init",
+ "__visible",
+ "__printf",
+ "__scanf",
+ "__gnu_inline",
+ "__malloc",
+ "__mode",
+ "__no_caller_saved_registers",
+ "__noclone",
+ "__nonstring",
+ "__noreturn",
+ "__packed",
+ "__pure",
+ "__section",
+ "__always_unused",
+ "__maybe_unused",
+ "__used",
+ "__weak",
+ "noinline",
+ "__fix_address",
+ "__counted_by",
+
+ # include/linux/memblock.h:
+ "__init_memblock",
+ "__meminit",
+
+ # include/linux/init.h:
+ "__init",
+ "__ref",
+
+ # include/linux/linkage.h:
+ "asmlinkage",
+
+ # include/linux/btf.h
+ "__bpf_kfunc",
+]
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain', 'kfigure', 'sphinx.ext.ifconfig']
+# Ensure that autosectionlabel will produce unique names
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2
+
+# Load math renderer:
+# For html builder, load imgmath only when its dependencies are met.
+# mathjax is the default math renderer since Sphinx 1.8.
+have_latex = have_command("latex")
+have_dvipng = have_command("dvipng")
+load_imgmath = have_latex and have_dvipng
+
+# Respect SPHINX_IMGMATH (for html docs only)
+if "SPHINX_IMGMATH" in os.environ:
+ env_sphinx_imgmath = os.environ["SPHINX_IMGMATH"]
+ if "yes" in env_sphinx_imgmath:
+ load_imgmath = True
+ elif "no" in env_sphinx_imgmath:
+ load_imgmath = False
+ else:
+ sys.stderr.write("Unknown env SPHINX_IMGMATH=%s ignored.\n" % env_sphinx_imgmath)
-# The name of the math extension changed on Sphinx 1.4
-if major == 1 and minor > 3:
+if load_imgmath:
extensions.append("sphinx.ext.imgmath")
+ math_renderer = "imgmath"
else:
- extensions.append("sphinx.ext.pngmath")
+ math_renderer = "mathjax"
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["sphinx/templates"]
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+# The suffixes of source filenames that will be automatically parsed
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".yaml": "yaml",
+}
# The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'The Linux Kernel'
-copyright = 'The kernel development community'
-author = 'The kernel development community'
+project = "The Linux Kernel"
+copyright = "The kernel development community" # pylint: disable=W0622
+author = "The kernel development community"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
-# In a normal build, version and release are are set to KERNELVERSION and
+# In a normal build, version and release are set to KERNELVERSION and
# KERNELRELEASE, respectively, from the Makefile via Sphinx command line
# arguments.
#
@@ -74,360 +279,249 @@ author = 'The kernel development community'
try:
makefile_version = None
makefile_patchlevel = None
- for line in open('../Makefile'):
- key, val = [x.strip() for x in line.split('=', 2)]
- if key == 'VERSION':
- makefile_version = val
- elif key == 'PATCHLEVEL':
- makefile_patchlevel = val
- if makefile_version and makefile_patchlevel:
- break
-except:
+ with open("../Makefile", encoding="utf=8") as fp:
+ for line in fp:
+ key, val = [x.strip() for x in line.split("=", 2)]
+ if key == "VERSION":
+ makefile_version = val
+ elif key == "PATCHLEVEL":
+ makefile_patchlevel = val
+ if makefile_version and makefile_patchlevel:
+ break
+except Exception:
pass
finally:
if makefile_version and makefile_patchlevel:
- version = release = makefile_version + '.' + makefile_patchlevel
+ version = release = makefile_version + "." + makefile_patchlevel
else:
version = release = "unknown version"
+
+def get_cline_version():
+ """
+ HACK: There seems to be no easy way for us to get at the version and
+ release information passed in from the makefile...so go pawing through the
+ command-line options and find it for ourselves.
+ """
+
+ c_version = c_release = ""
+ for arg in sys.argv:
+ if arg.startswith("version="):
+ c_version = arg[8:]
+ elif arg.startswith("release="):
+ c_release = arg[8:]
+ if c_version:
+ if c_release:
+ return c_version + "-" + c_release
+ return c_version
+ return version # Whatever we came up with before
+
+
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-#today = ''
+# today = ''
# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['output']
+# today_fmt = '%B %d, %Y'
# The reST default role (used for this markup: `text`) to use for all
# documents.
-#default_role = None
+# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-#show_authors = False
+# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
-primary_domain = 'c'
-highlight_language = 'none'
+primary_domain = "c"
+highlight_language = "none"
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-# The Read the Docs theme is available from
-# - https://github.com/snide/sphinx_rtd_theme
-# - https://pypi.python.org/pypi/sphinx_rtd_theme
-# - python-sphinx-rtd-theme package (on Debian)
-try:
- import sphinx_rtd_theme
- html_theme = 'sphinx_rtd_theme'
- html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-except ImportError:
- sys.stderr.write('Warning: The Sphinx \'sphinx_rtd_theme\' HTML theme was not found. Make sure you have the theme installed to produce pretty HTML output. Falling back to the default theme.\n')
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents. If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
+# Default theme
+html_theme = "alabaster"
+html_css_files = []
-html_static_path = ['sphinx-static']
+if "DOCS_THEME" in os.environ:
+ html_theme = os.environ["DOCS_THEME"]
-html_context = {
- 'css_files': [
- '_static/theme_overrides.css',
- ],
-}
+if html_theme in ["sphinx_rtd_theme", "sphinx_rtd_dark_mode"]:
+ # Read the Docs theme
+ try:
+ import sphinx_rtd_theme
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#html_extra_path = []
+ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+ # Add any paths that contain custom static files (such as style sheets) here,
+ # relative to this directory. They are copied after the builtin static files,
+ # so a file named "default.css" will overwrite the builtin "default.css".
+ html_css_files = [
+ "theme_overrides.css",
+ ]
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
+ # Read the Docs dark mode override theme
+ if html_theme == "sphinx_rtd_dark_mode":
+ try:
+ import sphinx_rtd_dark_mode # pylint: disable=W0611
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+ extensions.append("sphinx_rtd_dark_mode")
+ except ImportError:
+ html_theme = "sphinx_rtd_theme"
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
+ if html_theme == "sphinx_rtd_theme":
+ # Add color-specific RTD normal mode
+ html_css_files.append("theme_rtd_colors.css")
-# If false, no module index is generated.
-#html_domain_indices = True
+ html_theme_options = {
+ "navigation_depth": -1,
+ }
-# If false, no index is generated.
-#html_use_index = True
+ except ImportError:
+ html_theme = "alabaster"
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
+if "DOCS_CSS" in os.environ:
+ css = os.environ["DOCS_CSS"].split(" ")
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+ for l in css:
+ html_css_files.append(l)
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+if html_theme == "alabaster":
+ html_theme_options = {
+ "description": get_cline_version(),
+ "page_width": "65em",
+ "sidebar_width": "15em",
+ "fixed_sidebar": "true",
+ "font_size": "inherit",
+ "font_family": "serif",
+ }
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+sys.stderr.write("Using %s theme\n" % html_theme)
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it. The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["sphinx-static"]
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# If true, Docutils "smart quotes" will be used to convert quotes and dashes
+# to typographically correct entities. However, conversion of "--" to "—"
+# is not always what we want, so enable only quotes.
+smartquotes_action = "q"
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
-# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
-#html_search_language = 'en'
+# Custom sidebar templates, maps document names to template names.
+# Note that the RTD theme ignores this
+html_sidebars = {"**": ["searchbox.html",
+ "kernel-toc.html",
+ "sourcelink.html"]}
-# A dictionary with options for the search language support, empty by default.
-# Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# about.html is available for alabaster theme. Add it at the front.
+if html_theme == "alabaster":
+ html_sidebars["**"].insert(0, "about.html")
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+html_logo = "images/logo.svg"
# Output file base name for HTML help builder.
-htmlhelp_basename = 'TheLinuxKerneldoc'
+htmlhelp_basename = "TheLinuxKerneldoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-'papersize': 'a4paper',
-
-# The font size ('10pt', '11pt' or '12pt').
-'pointsize': '11pt',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
-
-# Don't mangle with UTF-8 chars
-'inputenc': '',
-'utf8extra': '',
-
-# Additional stuff for the LaTeX preamble.
- 'preamble': '''
- % Use some font with UTF-8 support with XeLaTeX
- \\usepackage{fontspec}
- \\setsansfont{DejaVu Sans}
- \\setromanfont{DejaVu Serif}
- \\setmonofont{DejaVu Sans Mono}
-
- '''
+ # The paper size ('letterpaper' or 'a4paper').
+ "papersize": "a4paper",
+ "passoptionstopackages": dedent(r"""
+ \PassOptionsToPackage{svgnames}{xcolor}
+ """),
+ # The font size ('10pt', '11pt' or '12pt').
+ "pointsize": "11pt",
+ # Needed to generate a .ind file
+ "printindex": r"\footnotesize\raggedright\printindex",
+ # Latex figure (float) alignment
+ # 'figure_align': 'htbp',
+ # Don't mangle with UTF-8 chars
+ "fontenc": "",
+ "inputenc": "",
+ "utf8extra": "",
+ # Set document margins
+ "sphinxsetup": dedent(r"""
+ hmargin=0.5in, vmargin=1in,
+ parsedliteralwraps=true,
+ verbatimhintsturnover=false,
+ """),
+ #
+ # Some of our authors are fond of deep nesting; tell latex to
+ # cope.
+ #
+ "maxlistdepth": "10",
+ # For CJK One-half spacing, need to be in front of hyperref
+ "extrapackages": r"\usepackage{setspace}",
+ "fontpkg": dedent(r"""
+ \usepackage{fontspec}
+ \setmainfont{DejaVu Serif}
+ \setsansfont{DejaVu Sans}
+ \setmonofont{DejaVu Sans Mono}
+ \newfontfamily\headingfont{DejaVu Serif}
+ """),
+ "preamble": dedent(r"""
+ % Load kerneldoc specific LaTeX settings
+ \input{kerneldoc-preamble.sty}
+ """)
}
-# Fix reference escape troubles with Sphinx 1.4.x
-if major == 1 and minor > 3:
- latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
-
-if major == 1 and minor <= 4:
- latex_elements['preamble'] += '\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}'
-elif major == 1 and (minor > 5 or (minor == 5 and patch >= 3)):
- latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=1in'
- latex_elements['preamble'] += '\\fvset{fontsize=auto}\n'
-
-# Customize notice background colors on Sphinx < 1.6:
-if major == 1 and minor < 6:
- latex_elements['preamble'] += '''
- \\usepackage{ifthen}
-
- % Put notes in color and let them be inside a table
- \\definecolor{NoteColor}{RGB}{204,255,255}
- \\definecolor{WarningColor}{RGB}{255,204,204}
- \\definecolor{AttentionColor}{RGB}{255,255,204}
- \\definecolor{ImportantColor}{RGB}{192,255,204}
- \\definecolor{OtherColor}{RGB}{204,204,204}
- \\newlength{\\mynoticelength}
- \\makeatletter\\newenvironment{coloredbox}[1]{%
- \\setlength{\\fboxrule}{1pt}
- \\setlength{\\fboxsep}{7pt}
- \\setlength{\\mynoticelength}{\\linewidth}
- \\addtolength{\\mynoticelength}{-2\\fboxsep}
- \\addtolength{\\mynoticelength}{-2\\fboxrule}
- \\begin{lrbox}{\\@tempboxa}\\begin{minipage}{\\mynoticelength}}{\\end{minipage}\\end{lrbox}%
- \\ifthenelse%
- {\\equal{\\py@noticetype}{note}}%
- {\\colorbox{NoteColor}{\\usebox{\\@tempboxa}}}%
- {%
- \\ifthenelse%
- {\\equal{\\py@noticetype}{warning}}%
- {\\colorbox{WarningColor}{\\usebox{\\@tempboxa}}}%
- {%
- \\ifthenelse%
- {\\equal{\\py@noticetype}{attention}}%
- {\\colorbox{AttentionColor}{\\usebox{\\@tempboxa}}}%
- {%
- \\ifthenelse%
- {\\equal{\\py@noticetype}{important}}%
- {\\colorbox{ImportantColor}{\\usebox{\\@tempboxa}}}%
- {\\colorbox{OtherColor}{\\usebox{\\@tempboxa}}}%
- }%
- }%
- }%
- }\\makeatother
-
- \\makeatletter
- \\renewenvironment{notice}[2]{%
- \\def\\py@noticetype{#1}
- \\begin{coloredbox}{#1}
- \\bf\\it
- \\par\\strong{#2}
- \\csname py@noticestart@#1\\endcsname
- }
- {
- \\csname py@noticeend@\\py@noticetype\\endcsname
- \\end{coloredbox}
- }
- \\makeatother
-
- '''
-
-# With Sphinx 1.6, it is possible to change the Bg color directly
-# by using:
-# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
-# \definecolor{sphinxwarningBgColor}{RGB}{255,204,204}
-# \definecolor{sphinxattentionBgColor}{RGB}{255,255,204}
-# \definecolor{sphinximportantBgColor}{RGB}{192,255,204}
-#
-# However, it require to use sphinx heavy box with:
-#
-# \renewenvironment{sphinxlightbox} {%
-# \\begin{sphinxheavybox}
-# }
-# \\end{sphinxheavybox}
-# }
-#
-# Unfortunately, the implementation is buggy: if a note is inside a
-# table, it isn't displayed well. So, for now, let's use boring
-# black and white notes.
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-# Sorted in alphabetical order
-latex_documents = [
- ('admin-guide/index', 'linux-user.tex', 'Linux Kernel User Documentation',
- 'The kernel development community', 'manual'),
- ('core-api/index', 'core-api.tex', 'The kernel core API manual',
- 'The kernel development community', 'manual'),
- ('crypto/index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
- 'The kernel development community', 'manual'),
- ('dev-tools/index', 'dev-tools.tex', 'Development tools for the Kernel',
- 'The kernel development community', 'manual'),
- ('doc-guide/index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
- 'The kernel development community', 'manual'),
- ('driver-api/index', 'driver-api.tex', 'The kernel driver API manual',
- 'The kernel development community', 'manual'),
- ('filesystems/index', 'filesystems.tex', 'Linux Filesystems API',
- 'The kernel development community', 'manual'),
- ('admin-guide/ext4', 'ext4-admin-guide.tex', 'ext4 Administration Guide',
- 'ext4 Community', 'manual'),
- ('filesystems/ext4/index', 'ext4-data-structures.tex',
- 'ext4 Data Structures and Algorithms', 'ext4 Community', 'manual'),
- ('gpu/index', 'gpu.tex', 'Linux GPU Driver Developer\'s Guide',
- 'The kernel development community', 'manual'),
- ('input/index', 'linux-input.tex', 'The Linux input driver subsystem',
- 'The kernel development community', 'manual'),
- ('kernel-hacking/index', 'kernel-hacking.tex', 'Unreliable Guide To Hacking The Linux Kernel',
- 'The kernel development community', 'manual'),
- ('media/index', 'media.tex', 'Linux Media Subsystem Documentation',
- 'The kernel development community', 'manual'),
- ('networking/index', 'networking.tex', 'Linux Networking Documentation',
- 'The kernel development community', 'manual'),
- ('process/index', 'development-process.tex', 'Linux Kernel Development Documentation',
- 'The kernel development community', 'manual'),
- ('security/index', 'security.tex', 'The kernel security subsystem manual',
- 'The kernel development community', 'manual'),
- ('sh/index', 'sh.tex', 'SuperH architecture implementation manual',
- 'The kernel development community', 'manual'),
- ('sound/index', 'sound.tex', 'Linux Sound Subsystem Documentation',
- 'The kernel development community', 'manual'),
- ('userspace-api/index', 'userspace-api.tex', 'The Linux kernel user-space API guide',
- 'The kernel development community', 'manual'),
-]
+# This will be filled up by config-inited event
+latex_documents = []
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-#latex_logo = None
+# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
# If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
# If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
# Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
# If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
+
+# Additional LaTeX stuff to be copied to build directory
+latex_additional_files = [
+ "sphinx/kerneldoc-preamble.sty",
+]
# -- Options for manual page output ---------------------------------------
@@ -435,12 +529,11 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- (master_doc, 'thelinuxkernel', 'The Linux Kernel Documentation',
- [author], 1)
+ (master_doc, "thelinuxkernel", "The Linux Kernel Documentation", [author], 1)
]
# If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
@@ -448,24 +541,15 @@ man_pages = [
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'TheLinuxKernel', 'The Linux Kernel Documentation',
- author, 'TheLinuxKernel', 'One line description of project.',
- 'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
-
-# If false, no module index is generated.
-#texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
-
+texinfo_documents = [(
+ master_doc,
+ "TheLinuxKernel",
+ "The Linux Kernel Documentation",
+ author,
+ "TheLinuxKernel",
+ "One line description of project.",
+ "Miscellaneous",
+ ),]
# -- Options for Epub output ----------------------------------------------
@@ -475,90 +559,38 @@ epub_author = author
epub_publisher = author
epub_copyright = copyright
-# The basename for the epub file. It defaults to the project name.
-#epub_basename = project
-
-# The HTML theme for the epub output. Since the default themes are not
-# optimized for small screen space, using the same theme for HTML and epub
-# output is usually not wise. This defaults to 'epub', a theme designed to save
-# visual space.
-#epub_theme = 'epub'
-
-# The language of the text. It defaults to the language option
-# or 'en' if the language is not set.
-#epub_language = ''
-
-# The scheme of the identifier. Typical schemes are ISBN or URL.
-#epub_scheme = ''
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#epub_identifier = ''
-
-# A unique identification for the text.
-#epub_uid = ''
-
-# A tuple containing the cover image and cover page html template filenames.
-#epub_cover = ()
-
-# A sequence of (type, uri, title) tuples for the guide element of content.opf.
-#epub_guide = ()
-
-# HTML files that should be inserted before the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_pre_files = []
-
-# HTML files that should be inserted after the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_post_files = []
-
# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-# The depth of the table of contents in toc.ncx.
-#epub_tocdepth = 3
-
-# Allow duplicate toc entries.
-#epub_tocdup = True
-
-# Choose between 'default' and 'includehidden'.
-#epub_tocscope = 'default'
+epub_exclude_files = ["search.html"]
-# Fix unsupported image types using the Pillow.
-#epub_fix_images = False
-
-# Scale large images.
-#epub_max_image_width = 0
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#epub_show_urls = 'inline'
-
-# If false, no index is generated.
-#epub_use_index = True
-
-#=======
+# =======
# rst2pdf
#
# Grouping the document tree into PDF files. List of tuples
# (source start file, target name, title, author, options).
#
-# See the Sphinx chapter of http://ralsina.me/static/manual.pdf
+# See the Sphinx chapter of https://ralsina.me/static/manual.pdf
#
# FIXME: Do not add the index file here; the result will be too big. Adding
# multiple PDF files here actually tries to get the cross-referencing right
# *between* PDF files.
pdf_documents = [
- ('kernel-documentation', u'Kernel', u'Kernel', u'J. Random Bozo'),
+ ("kernel-documentation", "Kernel", "Kernel", "J. Random Bozo"),
]
# kernel-doc extension configuration for running Sphinx directly (e.g. by Read
# the Docs). In a normal build, these are supplied from the Makefile via command
# line arguments.
-kerneldoc_bin = '../scripts/kernel-doc'
-kerneldoc_srctree = '..'
+kerneldoc_bin = "../scripts/kernel-doc.py"
+kerneldoc_srctree = ".."
# ------------------------------------------------------------------------------
# Since loadConfig overwrites settings from the global namespace, it has to be
# the last statement in the conf.py file
# ------------------------------------------------------------------------------
loadConfig(globals())
+
+
+def setup(app):
+ """Patterns need to be updated at init time on older Sphinx versions"""
+
+ app.connect('config-inited', config_init)
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
deleted file mode 100644
index ab7ca897fab7..000000000000
--- a/Documentation/connector/connector.txt
+++ /dev/null
@@ -1,196 +0,0 @@
-/*****************************************/
-Kernel Connector.
-/*****************************************/
-
-Kernel connector - new netlink based userspace <-> kernel space easy
-to use communication module.
-
-The Connector driver makes it easy to connect various agents using a
-netlink based network. One must register a callback and an identifier.
-When the driver receives a special netlink message with the appropriate
-identifier, the appropriate callback will be called.
-
-From the userspace point of view it's quite straightforward:
-
- socket();
- bind();
- send();
- recv();
-
-But if kernelspace wants to use the full power of such connections, the
-driver writer must create special sockets, must know about struct sk_buff
-handling, etc... The Connector driver allows any kernelspace agents to use
-netlink based networking for inter-process communication in a significantly
-easier way:
-
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
-void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
-
-struct cb_id
-{
- __u32 idx;
- __u32 val;
-};
-
-idx and val are unique identifiers which must be registered in the
-connector.h header for in-kernel usage. void (*callback) (void *) is a
-callback function which will be called when a message with above idx.val
-is received by the connector core. The argument for that function must
-be dereferenced to struct cn_msg *.
-
-struct cn_msg
-{
- struct cb_id id;
-
- __u32 seq;
- __u32 ack;
-
- __u32 len; /* Length of the following data */
- __u8 data[0];
-};
-
-/*****************************************/
-Connector interfaces.
-/*****************************************/
-
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-
- Registers new callback with connector core.
-
- struct cb_id *id - unique connector's user identifier.
- It must be registered in connector.h for legal in-kernel users.
- char *name - connector's callback symbolic name.
- void (*callback) (struct cn..) - connector's callback.
- cn_msg and the sender's credentials
-
-
-void cn_del_callback(struct cb_id *id);
-
- Unregisters new callback with connector core.
-
- struct cb_id *id - unique connector's user identifier.
-
-
-int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __groups, int gfp_mask);
-int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int gfp_mask);
-
- Sends message to the specified groups. It can be safely called from
- softirq context, but may silently fail under strong memory pressure.
- If there are no listeners for given group -ESRCH can be returned.
-
- struct cn_msg * - message header(with attached data).
- u16 len - for *_multi multiple cn_msg messages can be sent
- u32 port - destination port.
- If non-zero the message will be sent to the
- given port, which should be set to the
- original sender.
- u32 __group - destination group.
- If port and __group is zero, then appropriate group will
- be searched through all registered connector users,
- and message will be delivered to the group which was
- created for user with the same ID as in msg.
- If __group is not zero, then message will be delivered
- to the specified group.
- int gfp_mask - GFP mask.
-
- Note: When registering new callback user, connector core assigns
- netlink group to the user which is equal to its id.idx.
-
-/*****************************************/
-Protocol description.
-/*****************************************/
-
-The current framework offers a transport layer with fixed headers. The
-recommended protocol which uses such a header is as following:
-
-msg->seq and msg->ack are used to determine message genealogy. When
-someone sends a message, they use a locally unique sequence and random
-acknowledge number. The sequence number may be copied into
-nlmsghdr->nlmsg_seq too.
-
-The sequence number is incremented with each message sent.
-
-If you expect a reply to the message, then the sequence number in the
-received message MUST be the same as in the original message, and the
-acknowledge number MUST be the same + 1.
-
-If we receive a message and its sequence number is not equal to one we
-are expecting, then it is a new message. If we receive a message and
-its sequence number is the same as one we are expecting, but its
-acknowledge is not equal to the sequence number in the original
-message + 1, then it is a new message.
-
-Obviously, the protocol header contains the above id.
-
-The connector allows event notification in the following form: kernel
-driver or userspace process can ask connector to notify it when
-selected ids will be turned on or off (registered or unregistered its
-callback). It is done by sending a special command to the connector
-driver (it also registers itself with id={-1, -1}).
-
-As example of this usage can be found in the cn_test.c module which
-uses the connector to request notification and to send messages.
-
-/*****************************************/
-Reliability.
-/*****************************************/
-
-Netlink itself is not a reliable protocol. That means that messages can
-be lost due to memory pressure or process' receiving queue overflowed,
-so caller is warned that it must be prepared. That is why the struct
-cn_msg [main connector's message header] contains u32 seq and u32 ack
-fields.
-
-/*****************************************/
-Userspace usage.
-/*****************************************/
-
-2.6.14 has a new netlink socket implementation, which by default does not
-allow people to send data to netlink groups other than 1.
-So, if you wish to use a netlink socket (for example using connector)
-with a different group number, the userspace application must subscribe to
-that group first. It can be achieved by the following pseudocode:
-
-s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
-
-l_local.nl_family = AF_NETLINK;
-l_local.nl_groups = 12345;
-l_local.nl_pid = 0;
-
-if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
- perror("bind");
- close(s);
- return -1;
-}
-
-{
- int on = l_local.nl_groups;
- setsockopt(s, 270, 1, &on, sizeof(on));
-}
-
-Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
-option. To drop a multicast subscription, one should call the above socket
-option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
-
-2.6.14 netlink code only allows to select a group which is less or equal to
-the maximum group number, which is used at netlink_kernel_create() time.
-In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
-group number 12345, you must increment CN_NETLINK_USERS to that number.
-Additional 0xf numbers are allocated to be used by non-in-kernel users.
-
-Due to this limitation, group 0xffffffff does not work now, so one can
-not use add/remove connector's group notifications, but as far as I know,
-only cn_test.c test module used it.
-
-Some work in netlink area is still being done, so things can be changed in
-2.6.15 timeframe, if it will happen, documentation will be updated for that
-kernel.
-
-/*****************************************/
-Code samples
-/*****************************************/
-
-Sample code for a connector test module and user space can be found
-in samples/connector/. To build this code, enable CONFIG_CONNECTOR
-and CONFIG_SAMPLES.
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt
deleted file mode 100644
index d73c2ab4beda..000000000000
--- a/Documentation/console/console.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-Console Drivers
-===============
-
-The Linux kernel has 2 general types of console drivers. The first type is
-assigned by the kernel to all the virtual consoles during the boot process.
-This type will be called 'system driver', and only one system driver is allowed
-to exist. The system driver is persistent and it can never be unloaded, though
-it may become inactive.
-
-The second type has to be explicitly loaded and unloaded. This will be called
-'modular driver' by this document. Multiple modular drivers can coexist at
-any time with each driver sharing the console with other drivers including
-the system driver. However, modular drivers cannot take over the console
-that is currently occupied by another modular driver. (Exception: Drivers that
-call do_take_over_console() will succeed in the takeover regardless of the type
-of driver occupying the consoles.) They can only take over the console that is
-occupied by the system driver. In the same token, if the modular driver is
-released by the console, the system driver will take over.
-
-Modular drivers, from the programmer's point of view, have to call:
-
- do_take_over_console() - load and bind driver to console layer
- give_up_console() - unload driver; it will only work if driver
- is fully unbound
-
-In newer kernels, the following are also available:
-
- do_register_con_driver()
- do_unregister_con_driver()
-
-If sysfs is enabled, the contents of /sys/class/vtconsole can be
-examined. This shows the console backends currently registered by the
-system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
-
- ls /sys/class/vtconsole
- . .. vtcon0 vtcon1
-
-Each directory in /sys/class/vtconsole has 3 files:
-
- ls /sys/class/vtconsole/vtcon0
- . .. bind name uevent
-
-What do these files signify?
-
- 1. bind - this is a read/write file. It shows the status of the driver if
- read, or acts to bind or unbind the driver to the virtual consoles
- when written to. The possible values are:
-
- 0 - means the driver is not bound and if echo'ed, commands the driver
- to unbind
-
- 1 - means the driver is bound and if echo'ed, commands the driver to
- bind
-
- 2. name - read-only file. Shows the name of the driver in this format:
-
- cat /sys/class/vtconsole/vtcon0/name
- (S) VGA+
-
- '(S)' stands for a (S)ystem driver, i.e., it cannot be directly
- commanded to bind or unbind
-
- 'VGA+' is the name of the driver
-
- cat /sys/class/vtconsole/vtcon1/name
- (M) frame buffer device
-
- In this case, '(M)' stands for a (M)odular driver, one that can be
- directly commanded to bind or unbind.
-
- 3. uevent - ignore this file
-
-When unbinding, the modular driver is detached first, and then the system
-driver takes over the consoles vacated by the driver. Binding, on the other
-hand, will bind the driver to the consoles that are currently occupied by a
-system driver.
-
-NOTE1: Binding and unbinding must be selected in Kconfig. It's under:
-
-Device Drivers -> Character devices -> Support for binding and unbinding
-console drivers
-
-NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
-unbinding will not succeed. An example of an application that sets the console
-to KD_GRAPHICS is X.
-
-How useful is this feature? This is very useful for console driver
-developers. By unbinding the driver from the console layer, one can unload the
-driver, make changes, recompile, reload and rebind the driver without any need
-for rebooting the kernel. For regular users who may want to switch from
-framebuffer console to VGA console and vice versa, this feature also makes
-this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
-for more details.)
-
-Notes for developers:
-=====================
-
-do_take_over_console() is now broken up into:
-
- do_register_con_driver()
- do_bind_con_driver() - private function
-
-give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
-be fully unbound for this call to succeed. con_is_bound() will check if the
-driver is bound or not.
-
-Guidelines for console driver writers:
-=====================================
-
-In order for binding to and unbinding from the console to properly work,
-console drivers must follow these guidelines:
-
-1. All drivers, except system drivers, must call either do_register_con_driver()
- or do_take_over_console(). do_register_con_driver() will just add the driver
- to the console's internal list. It won't take over the
- console. do_take_over_console(), as it name implies, will also take over (or
- bind to) the console.
-
-2. All resources allocated during con->con_init() must be released in
- con->con_deinit().
-
-3. All resources allocated in con->con_startup() must be released when the
- driver, which was previously bound, becomes unbound. The console layer
- does not have a complementary call to con->con_startup() so it's up to the
- driver to check when it's legal to release these resources. Calling
- con_is_bound() in con->con_deinit() will help. If the call returned
- false(), then it's safe to release the resources. This balance has to be
- ensured because con->con_startup() can be called again when a request to
- rebind the driver to the console arrives.
-
-4. Upon exit of the driver, ensure that the driver is totally unbound. If the
- condition is satisfied, then the driver must call do_unregister_con_driver()
- or give_up_console().
-
-5. do_unregister_con_driver() can also be called on conditions which make it
- impossible for the driver to service console requests. This can happen
- with the framebuffer console that suddenly lost all of its drivers.
-
-The current crop of console drivers should still work correctly, but binding
-and unbinding them may cause problems. With minimal fixes, these drivers can
-be made to work correctly.
-
-==========================
-Antonino Daplas <adaplas@pol.net>
-
diff --git a/Documentation/core-api/asm-annotations.rst b/Documentation/core-api/asm-annotations.rst
new file mode 100644
index 000000000000..11c96d3f9ad6
--- /dev/null
+++ b/Documentation/core-api/asm-annotations.rst
@@ -0,0 +1,222 @@
+Assembler Annotations
+=====================
+
+Copyright (c) 2017-2019 Jiri Slaby
+
+This document describes the new macros for annotation of data and code in
+assembly. In particular, it contains information about ``SYM_FUNC_START``,
+``SYM_FUNC_END``, ``SYM_CODE_START``, and similar.
+
+Rationale
+---------
+Some code like entries, trampolines, or boot code needs to be written in
+assembly. The same as in C, such code is grouped into functions and
+accompanied with data. Standard assemblers do not force users into precisely
+marking these pieces as code, data, or even specifying their length.
+Nevertheless, assemblers provide developers with such annotations to aid
+debuggers throughout assembly. On top of that, developers also want to mark
+some functions as *global* in order to be visible outside of their translation
+units.
+
+Over time, the Linux kernel has adopted macros from various projects (like
+``binutils``) to facilitate such annotations. So for historic reasons,
+developers have been using ``ENTRY``, ``END``, ``ENDPROC``, and other
+annotations in assembly. Due to the lack of their documentation, the macros
+are used in rather wrong contexts at some locations. Clearly, ``ENTRY`` was
+intended to denote the beginning of global symbols (be it data or code).
+``END`` used to mark the end of data or end of special functions with
+*non-standard* calling convention. In contrast, ``ENDPROC`` should annotate
+only ends of *standard* functions.
+
+When these macros are used correctly, they help assemblers generate a nice
+object with both sizes and types set correctly. For example, the result of
+``arch/x86/lib/putuser.S``::
+
+ Num: Value Size Type Bind Vis Ndx Name
+ 25: 0000000000000000 33 FUNC GLOBAL DEFAULT 1 __put_user_1
+ 29: 0000000000000030 37 FUNC GLOBAL DEFAULT 1 __put_user_2
+ 32: 0000000000000060 36 FUNC GLOBAL DEFAULT 1 __put_user_4
+ 35: 0000000000000090 37 FUNC GLOBAL DEFAULT 1 __put_user_8
+
+This is not only important for debugging purposes. When there are properly
+annotated objects like this, tools can be run on them to generate more useful
+information. In particular, on properly annotated objects, ``objtool`` can be
+run to check and fix the object if needed. Currently, ``objtool`` can report
+missing frame pointer setup/destruction in functions. It can also
+automatically generate annotations for the ORC unwinder
+(Documentation/arch/x86/orc-unwinder.rst)
+for most code. Both of these are especially important to support reliable
+stack traces which are in turn necessary for kernel live patching
+(Documentation/livepatch/livepatch.rst).
+
+Caveat and Discussion
+---------------------
+As one might realize, there were only three macros previously. That is indeed
+insufficient to cover all the combinations of cases:
+
+* standard/non-standard function
+* code/data
+* global/local symbol
+
+There was a discussion_ and instead of extending the current ``ENTRY/END*``
+macros, it was decided that brand new macros should be introduced instead::
+
+ So how about using macro names that actually show the purpose, instead
+ of importing all the crappy, historic, essentially randomly chosen
+ debug symbol macro names from the binutils and older kernels?
+
+.. _discussion: https://lore.kernel.org/r/20170217104757.28588-1-jslaby@suse.cz
+
+Macros Description
+------------------
+
+The new macros are prefixed with the ``SYM_`` prefix and can be divided into
+three main groups:
+
+1. ``SYM_FUNC_*`` -- to annotate C-like functions. This means functions with
+ standard C calling conventions. For example, on x86, this means that the
+ stack contains a return address at the predefined place and a return from
+ the function can happen in a standard way. When frame pointers are enabled,
+ save/restore of frame pointer shall happen at the start/end of a function,
+ respectively, too.
+
+ Checking tools like ``objtool`` should ensure such marked functions conform
+ to these rules. The tools can also easily annotate these functions with
+ debugging information (like *ORC data*) automatically.
+
+2. ``SYM_CODE_*`` -- special functions called with special stack. Be it
+ interrupt handlers with special stack content, trampolines, or startup
+ functions.
+
+ Checking tools mostly ignore checking of these functions. But some debug
+ information still can be generated automatically. For correct debug data,
+ this code needs hints like ``UNWIND_HINT_REGS`` provided by developers.
+
+3. ``SYM_DATA*`` -- obviously data belonging to ``.data`` sections and not to
+ ``.text``. Data do not contain instructions, so they have to be treated
+ specially by the tools: they should not treat the bytes as instructions,
+ nor assign any debug information to them.
+
+Instruction Macros
+~~~~~~~~~~~~~~~~~~
+This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above.
+
+``objtool`` requires that all code must be contained in an ELF symbol. Symbol
+names that have a ``.L`` prefix do not emit symbol table entries. ``.L``
+prefixed symbols can be used within a code region, but should be avoided for
+denoting a range of code via ``SYM_*_START/END`` annotations.
+
+* ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the
+ most frequent markings**. They are used for functions with standard calling
+ conventions -- global and local. Like in C, they both align the functions to
+ architecture specific ``__ALIGN`` bytes. There are also ``_NOALIGN`` variants
+ for special cases where developers do not want this implicit alignment.
+
+ ``SYM_FUNC_START_WEAK`` and ``SYM_FUNC_START_WEAK_NOALIGN`` markings are
+ also offered as an assembler counterpart to the *weak* attribute known from
+ C.
+
+ All of these **shall** be coupled with ``SYM_FUNC_END``. First, it marks
+ the sequence of instructions as a function and computes its size to the
+ generated object file. Second, it also eases checking and processing such
+ object files as the tools can trivially find exact function boundaries.
+
+ So in most cases, developers should write something like in the following
+ example, having some asm instructions in between the macros, of course::
+
+ SYM_FUNC_START(memset)
+ ... asm insns ...
+ SYM_FUNC_END(memset)
+
+ In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
+ and ``ENDPROC`` macros.
+
+* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
+ be used to define multiple names for a function. The typical use is::
+
+ SYM_FUNC_START(__memset)
+ ... asm insns ...
+ SYN_FUNC_END(__memset)
+ SYM_FUNC_ALIAS(memset, __memset)
+
+ In this example, one can call ``__memset`` or ``memset`` with the same
+ result, except the debug information for the instructions is generated to
+ the object file only once -- for the non-``ALIAS`` case.
+
+* ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in
+ special cases -- if you know what you are doing. This is used exclusively
+ for interrupt handlers and similar where the calling convention is not the C
+ one. ``_NOALIGN`` variants exist too. The use is the same as for the ``FUNC``
+ category above::
+
+ SYM_CODE_START_LOCAL(bad_put_user)
+ ... asm insns ...
+ SYM_CODE_END(bad_put_user)
+
+ Again, every ``SYM_CODE_START*`` **shall** be coupled by ``SYM_CODE_END``.
+
+ To some extent, this category corresponds to deprecated ``ENTRY`` and
+ ``END``. Except ``END`` had several other meanings too.
+
+* ``SYM_INNER_LABEL*`` is used to denote a label inside some
+ ``SYM_{CODE,FUNC}_START`` and ``SYM_{CODE,FUNC}_END``. They are very similar
+ to C labels, except they can be made global. An example of use::
+
+ SYM_CODE_START(ftrace_caller)
+ /* save_mcount_regs fills in first two parameters */
+ ...
+
+ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
+ /* Load the ftrace_ops into the 3rd parameter */
+ ...
+
+ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
+ call ftrace_stub
+ ...
+ retq
+ SYM_CODE_END(ftrace_caller)
+
+Data Macros
+~~~~~~~~~~~
+Similar to instructions, there is a couple of macros to describe data in the
+assembly.
+
+* ``SYM_DATA_START`` and ``SYM_DATA_START_LOCAL`` mark the start of some data
+ and shall be used in conjunction with either ``SYM_DATA_END``, or
+ ``SYM_DATA_END_LABEL``. The latter adds also a label to the end, so that
+ people can use ``lstack`` and (local) ``lstack_end`` in the following
+ example::
+
+ SYM_DATA_START_LOCAL(lstack)
+ .skip 4096
+ SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end)
+
+* ``SYM_DATA`` and ``SYM_DATA_LOCAL`` are variants for simple, mostly one-line
+ data::
+
+ SYM_DATA(HEAP, .long rm_heap)
+ SYM_DATA(heap_end, .long rm_stack)
+
+ In the end, they expand to ``SYM_DATA_START`` with ``SYM_DATA_END``
+ internally.
+
+Support Macros
+~~~~~~~~~~~~~~
+All the above reduce themselves to some invocation of ``SYM_START``,
+``SYM_END``, or ``SYM_ENTRY`` at last. Normally, developers should avoid using
+these.
+
+Further, in the above examples, one could see ``SYM_L_LOCAL``. There are also
+``SYM_L_GLOBAL`` and ``SYM_L_WEAK``. All are intended to denote linkage of a
+symbol marked by them. They are used either in ``_LABEL`` variants of the
+earlier macros, or in ``SYM_START``.
+
+
+Overriding Macros
+~~~~~~~~~~~~~~~~~
+Architecture can also override any of the macros in their own
+``asm/linkage.h``, including macros specifying the type of a symbol
+(``SYM_T_FUNC``, ``SYM_T_OBJECT``, and ``SYM_T_NONE``). As every macro
+described in this file is surrounded by ``#ifdef`` + ``#endif``, it is enough
+to define the macros differently in the aforementioned architecture-dependent
+header.
diff --git a/Documentation/core-api/assoc_array.rst b/Documentation/core-api/assoc_array.rst
index 8231b915c939..792bbf9939e1 100644
--- a/Documentation/core-api/assoc_array.rst
+++ b/Documentation/core-api/assoc_array.rst
@@ -34,7 +34,7 @@ properties:
8. The array can iterated over. The objects will not necessarily come out in
key order.
-9. The array can be iterated over whilst it is being modified, provided the
+9. The array can be iterated over while it is being modified, provided the
RCU readlock is being held by the iterator. Note, however, under these
circumstances, some objects may be seen more than once. If this is a
problem, the iterator should lock against modification. Objects will not
@@ -42,7 +42,7 @@ properties:
10. Objects in the array can be looked up by means of their index key.
-11. Objects can be looked up whilst the array is being modified, provided the
+11. Objects can be looked up while the array is being modified, provided the
RCU readlock is being held by the thread doing the look up.
The implementation uses a tree of 16-pointer nodes internally that are indexed
@@ -273,7 +273,7 @@ The function will return ``0`` if successful and ``-ENOMEM`` if there wasn't
enough memory.
It is possible for other threads to iterate over or search the array under
-the RCU read lock whilst this function is in progress. The caller should
+the RCU read lock while this function is in progress. The caller should
lock exclusively against other modifiers of the array.
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
deleted file mode 100644
index 724583453e1f..000000000000
--- a/Documentation/core-api/atomic_ops.rst
+++ /dev/null
@@ -1,664 +0,0 @@
-=======================================================
-Semantics and Behavior of Atomic and Bitmask Operations
-=======================================================
-
-:Author: David S. Miller
-
-This document is intended to serve as a guide to Linux port
-maintainers on how to implement atomic counter, bitops, and spinlock
-interfaces properly.
-
-Atomic Type And Operations
-==========================
-
-The atomic_t type should be defined as a signed integer and
-the atomic_long_t type as a signed long integer. Also, they should
-be made opaque such that any kind of cast to a normal C integer type
-will fail. Something like the following should suffice::
-
- typedef struct { int counter; } atomic_t;
- typedef struct { long counter; } atomic_long_t;
-
-Historically, counter has been declared volatile. This is now discouraged.
-See :ref:`Documentation/process/volatile-considered-harmful.rst
-<volatile_considered_harmful>` for the complete rationale.
-
-local_t is very similar to atomic_t. If the counter is per CPU and only
-updated by one CPU, local_t is probably more appropriate. Please see
-:ref:`Documentation/core-api/local_ops.rst <local_ops>` for the semantics of
-local_t.
-
-The first operations to implement for atomic_t's are the initializers and
-plain writes. ::
-
- #define ATOMIC_INIT(i) { (i) }
- #define atomic_set(v, i) ((v)->counter = (i))
-
-The first macro is used in definitions, such as::
-
- static atomic_t my_counter = ATOMIC_INIT(1);
-
-The initializer is atomic in that the return values of the atomic operations
-are guaranteed to be correct reflecting the initialized value if the
-initializer is used before runtime. If the initializer is used at runtime, a
-proper implicit or explicit read memory barrier is needed before reading the
-value with atomic_read from another thread.
-
-As with all of the ``atomic_`` interfaces, replace the leading ``atomic_``
-with ``atomic_long_`` to operate on atomic_long_t.
-
-The second interface can be used at runtime, as in::
-
- struct foo { atomic_t counter; };
- ...
-
- struct foo *k;
-
- k = kmalloc(sizeof(*k), GFP_KERNEL);
- if (!k)
- return -ENOMEM;
- atomic_set(&k->counter, 0);
-
-The setting is atomic in that the return values of the atomic operations by
-all threads are guaranteed to be correct reflecting either the value that has
-been set with this operation or set with another operation. A proper implicit
-or explicit memory barrier is needed before the value set with the operation
-is guaranteed to be readable with atomic_read from another thread.
-
-Next, we have::
-
- #define atomic_read(v) ((v)->counter)
-
-which simply reads the counter value currently visible to the calling thread.
-The read is atomic in that the return value is guaranteed to be one of the
-values initialized or modified with the interface operations if a proper
-implicit or explicit memory barrier is used after possible runtime
-initialization by any other thread and the value is modified only with the
-interface operations. atomic_read does not guarantee that the runtime
-initialization by any other thread is visible yet, so the user of the
-interface must take care of that with a proper implicit or explicit memory
-barrier.
-
-.. warning::
-
- ``atomic_read()`` and ``atomic_set()`` DO NOT IMPLY BARRIERS!
-
- Some architectures may choose to use the volatile keyword, barriers, or
- inline assembly to guarantee some degree of immediacy for atomic_read()
- and atomic_set(). This is not uniformly guaranteed, and may change in
- the future, so all users of atomic_t should treat atomic_read() and
- atomic_set() as simple C statements that may be reordered or optimized
- away entirely by the compiler or processor, and explicitly invoke the
- appropriate compiler and/or memory barrier for each use case. Failure
- to do so will result in code that may suddenly break when used with
- different architectures or compiler optimizations, or even changes in
- unrelated code which changes how the compiler optimizes the section
- accessing atomic_t variables.
-
-Properly aligned pointers, longs, ints, and chars (and unsigned
-equivalents) may be atomically loaded from and stored to in the same
-sense as described for atomic_read() and atomic_set(). The READ_ONCE()
-and WRITE_ONCE() macros should be used to prevent the compiler from using
-optimizations that might otherwise optimize accesses out of existence on
-the one hand, or that might create unsolicited accesses on the other.
-
-For example consider the following code::
-
- while (a > 0)
- do_something();
-
-If the compiler can prove that do_something() does not store to the
-variable a, then the compiler is within its rights transforming this to
-the following::
-
- if (a > 0)
- for (;;)
- do_something();
-
-If you don't want the compiler to do this (and you probably don't), then
-you should use something like the following::
-
- while (READ_ONCE(a) > 0)
- do_something();
-
-Alternatively, you could place a barrier() call in the loop.
-
-For another example, consider the following code::
-
- tmp_a = a;
- do_something_with(tmp_a);
- do_something_else_with(tmp_a);
-
-If the compiler can prove that do_something_with() does not store to the
-variable a, then the compiler is within its rights to manufacture an
-additional load as follows::
-
- tmp_a = a;
- do_something_with(tmp_a);
- tmp_a = a;
- do_something_else_with(tmp_a);
-
-This could fatally confuse your code if it expected the same value
-to be passed to do_something_with() and do_something_else_with().
-
-The compiler would be likely to manufacture this additional load if
-do_something_with() was an inline function that made very heavy use
-of registers: reloading from variable a could save a flush to the
-stack and later reload. To prevent the compiler from attacking your
-code in this manner, write the following::
-
- tmp_a = READ_ONCE(a);
- do_something_with(tmp_a);
- do_something_else_with(tmp_a);
-
-For a final example, consider the following code, assuming that the
-variable a is set at boot time before the second CPU is brought online
-and never changed later, so that memory barriers are not needed::
-
- if (a)
- b = 9;
- else
- b = 42;
-
-The compiler is within its rights to manufacture an additional store
-by transforming the above code into the following::
-
- b = 42;
- if (a)
- b = 9;
-
-This could come as a fatal surprise to other code running concurrently
-that expected b to never have the value 42 if a was zero. To prevent
-the compiler from doing this, write something like::
-
- if (a)
- WRITE_ONCE(b, 9);
- else
- WRITE_ONCE(b, 42);
-
-Don't even -think- about doing this without proper use of memory barriers,
-locks, or atomic operations if variable a can change at runtime!
-
-.. warning::
-
- ``READ_ONCE()`` OR ``WRITE_ONCE()`` DO NOT IMPLY A BARRIER!
-
-Now, we move onto the atomic operation interfaces typically implemented with
-the help of assembly code. ::
-
- void atomic_add(int i, atomic_t *v);
- void atomic_sub(int i, atomic_t *v);
- void atomic_inc(atomic_t *v);
- void atomic_dec(atomic_t *v);
-
-These four routines add and subtract integral values to/from the given
-atomic_t value. The first two routines pass explicit integers by
-which to make the adjustment, whereas the latter two use an implicit
-adjustment value of "1".
-
-One very important aspect of these two routines is that they DO NOT
-require any explicit memory barriers. They need only perform the
-atomic_t counter update in an SMP safe manner.
-
-Next, we have::
-
- int atomic_inc_return(atomic_t *v);
- int atomic_dec_return(atomic_t *v);
-
-These routines add 1 and subtract 1, respectively, from the given
-atomic_t and return the new counter value after the operation is
-performed.
-
-Unlike the above routines, it is required that these primitives
-include explicit memory barriers that are performed before and after
-the operation. It must be done such that all memory operations before
-and after the atomic operation calls are strongly ordered with respect
-to the atomic operation itself.
-
-For example, it should behave as if a smp_mb() call existed both
-before and after the atomic operation.
-
-If the atomic instructions used in an implementation provide explicit
-memory barrier semantics which satisfy the above requirements, that is
-fine as well.
-
-Let's move on::
-
- int atomic_add_return(int i, atomic_t *v);
- int atomic_sub_return(int i, atomic_t *v);
-
-These behave just like atomic_{inc,dec}_return() except that an
-explicit counter adjustment is given instead of the implicit "1".
-This means that like atomic_{inc,dec}_return(), the memory barrier
-semantics are required.
-
-Next::
-
- int atomic_inc_and_test(atomic_t *v);
- int atomic_dec_and_test(atomic_t *v);
-
-These two routines increment and decrement by 1, respectively, the
-given atomic counter. They return a boolean indicating whether the
-resulting counter value was zero or not.
-
-Again, these primitives provide explicit memory barrier semantics around
-the atomic operation::
-
- int atomic_sub_and_test(int i, atomic_t *v);
-
-This is identical to atomic_dec_and_test() except that an explicit
-decrement is given instead of the implicit "1". This primitive must
-provide explicit memory barrier semantics around the operation::
-
- int atomic_add_negative(int i, atomic_t *v);
-
-The given increment is added to the given atomic counter value. A boolean
-is return which indicates whether the resulting counter value is negative.
-This primitive must provide explicit memory barrier semantics around
-the operation.
-
-Then::
-
- int atomic_xchg(atomic_t *v, int new);
-
-This performs an atomic exchange operation on the atomic variable v, setting
-the given new value. It returns the old value that the atomic variable v had
-just before the operation.
-
-atomic_xchg must provide explicit memory barriers around the operation. ::
-
- int atomic_cmpxchg(atomic_t *v, int old, int new);
-
-This performs an atomic compare exchange operation on the atomic value v,
-with the given old and new values. Like all atomic_xxx operations,
-atomic_cmpxchg will only satisfy its atomicity semantics as long as all
-other accesses of \*v are performed through atomic_xxx operations.
-
-atomic_cmpxchg must provide explicit memory barriers around the operation,
-although if the comparison fails then no memory ordering guarantees are
-required.
-
-The semantics for atomic_cmpxchg are the same as those defined for 'cas'
-below.
-
-Finally::
-
- int atomic_add_unless(atomic_t *v, int a, int u);
-
-If the atomic value v is not equal to u, this function adds a to v, and
-returns non zero. If v is equal to u then it returns zero. This is done as
-an atomic operation.
-
-atomic_add_unless must provide explicit memory barriers around the
-operation unless it fails (returns 0).
-
-atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0)
-
-
-If a caller requires memory barrier semantics around an atomic_t
-operation which does not return a value, a set of interfaces are
-defined which accomplish this::
-
- void smp_mb__before_atomic(void);
- void smp_mb__after_atomic(void);
-
-Preceding a non-value-returning read-modify-write atomic operation with
-smp_mb__before_atomic() and following it with smp_mb__after_atomic()
-provides the same full ordering that is provided by value-returning
-read-modify-write atomic operations.
-
-For example, smp_mb__before_atomic() can be used like so::
-
- obj->dead = 1;
- smp_mb__before_atomic();
- atomic_dec(&obj->ref_count);
-
-It makes sure that all memory operations preceding the atomic_dec()
-call are strongly ordered with respect to the atomic counter
-operation. In the above example, it guarantees that the assignment of
-"1" to obj->dead will be globally visible to other cpus before the
-atomic counter decrement.
-
-Without the explicit smp_mb__before_atomic() call, the
-implementation could legally allow the atomic counter update visible
-to other cpus before the "obj->dead = 1;" assignment.
-
-A missing memory barrier in the cases where they are required by the
-atomic_t implementation above can have disastrous results. Here is
-an example, which follows a pattern occurring frequently in the Linux
-kernel. It is the use of atomic counters to implement reference
-counting, and it works such that once the counter falls to zero it can
-be guaranteed that no other entity can be accessing the object::
-
- static void obj_list_add(struct obj *obj, struct list_head *head)
- {
- obj->active = 1;
- list_add(&obj->list, head);
- }
-
- static void obj_list_del(struct obj *obj)
- {
- list_del(&obj->list);
- obj->active = 0;
- }
-
- static void obj_destroy(struct obj *obj)
- {
- BUG_ON(obj->active);
- kfree(obj);
- }
-
- struct obj *obj_list_peek(struct list_head *head)
- {
- if (!list_empty(head)) {
- struct obj *obj;
-
- obj = list_entry(head->next, struct obj, list);
- atomic_inc(&obj->refcnt);
- return obj;
- }
- return NULL;
- }
-
- void obj_poke(void)
- {
- struct obj *obj;
-
- spin_lock(&global_list_lock);
- obj = obj_list_peek(&global_list);
- spin_unlock(&global_list_lock);
-
- if (obj) {
- obj->ops->poke(obj);
- if (atomic_dec_and_test(&obj->refcnt))
- obj_destroy(obj);
- }
- }
-
- void obj_timeout(struct obj *obj)
- {
- spin_lock(&global_list_lock);
- obj_list_del(obj);
- spin_unlock(&global_list_lock);
-
- if (atomic_dec_and_test(&obj->refcnt))
- obj_destroy(obj);
- }
-
-.. note::
-
- This is a simplification of the ARP queue management in the generic
- neighbour discover code of the networking. Olaf Kirch found a bug wrt.
- memory barriers in kfree_skb() that exposed the atomic_t memory barrier
- requirements quite clearly.
-
-Given the above scheme, it must be the case that the obj->active
-update done by the obj list deletion be visible to other processors
-before the atomic counter decrement is performed.
-
-Otherwise, the counter could fall to zero, yet obj->active would still
-be set, thus triggering the assertion in obj_destroy(). The error
-sequence looks like this::
-
- cpu 0 cpu 1
- obj_poke() obj_timeout()
- obj = obj_list_peek();
- ... gains ref to obj, refcnt=2
- obj_list_del(obj);
- obj->active = 0 ...
- ... visibility delayed ...
- atomic_dec_and_test()
- ... refcnt drops to 1 ...
- atomic_dec_and_test()
- ... refcount drops to 0 ...
- obj_destroy()
- BUG() triggers since obj->active
- still seen as one
- obj->active update visibility occurs
-
-With the memory barrier semantics required of the atomic_t operations
-which return values, the above sequence of memory visibility can never
-happen. Specifically, in the above case the atomic_dec_and_test()
-counter decrement would not become globally visible until the
-obj->active update does.
-
-As a historical note, 32-bit Sparc used to only allow usage of
-24-bits of its atomic_t type. This was because it used 8 bits
-as a spinlock for SMP safety. Sparc32 lacked a "compare and swap"
-type instruction. However, 32-bit Sparc has since been moved over
-to a "hash table of spinlocks" scheme, that allows the full 32-bit
-counter to be realized. Essentially, an array of spinlocks are
-indexed into based upon the address of the atomic_t being operated
-on, and that lock protects the atomic operation. Parisc uses the
-same scheme.
-
-Another note is that the atomic_t operations returning values are
-extremely slow on an old 386.
-
-
-Atomic Bitmask
-==============
-
-We will now cover the atomic bitmask operations. You will find that
-their SMP and memory barrier semantics are similar in shape and scope
-to the atomic_t ops above.
-
-Native atomic bit operations are defined to operate on objects aligned
-to the size of an "unsigned long" C data type, and are least of that
-size. The endianness of the bits within each "unsigned long" are the
-native endianness of the cpu. ::
-
- void set_bit(unsigned long nr, volatile unsigned long *addr);
- void clear_bit(unsigned long nr, volatile unsigned long *addr);
- void change_bit(unsigned long nr, volatile unsigned long *addr);
-
-These routines set, clear, and change, respectively, the bit number
-indicated by "nr" on the bit mask pointed to by "ADDR".
-
-They must execute atomically, yet there are no implicit memory barrier
-semantics required of these interfaces. ::
-
- int test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
- int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
- int test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
-
-Like the above, except that these routines return a boolean which
-indicates whether the changed bit was set _BEFORE_ the atomic bit
-operation.
-
-
-.. warning::
- It is incredibly important that the value be a boolean, ie. "0" or "1".
- Do not try to be fancy and save a few instructions by declaring the
- above to return "long" and just returning something like "old_val &
- mask" because that will not work.
-
-For one thing, this return value gets truncated to int in many code
-paths using these interfaces, so on 64-bit if the bit is set in the
-upper 32-bits then testers will never see that.
-
-One great example of where this problem crops up are the thread_info
-flag operations. Routines such as test_and_set_ti_thread_flag() chop
-the return value into an int. There are other places where things
-like this occur as well.
-
-These routines, like the atomic_t counter operations returning values,
-must provide explicit memory barrier semantics around their execution.
-All memory operations before the atomic bit operation call must be
-made visible globally before the atomic bit operation is made visible.
-Likewise, the atomic bit operation must be visible globally before any
-subsequent memory operation is made visible. For example::
-
- obj->dead = 1;
- if (test_and_set_bit(0, &obj->flags))
- /* ... */;
- obj->killed = 1;
-
-The implementation of test_and_set_bit() must guarantee that
-"obj->dead = 1;" is visible to cpus before the atomic memory operation
-done by test_and_set_bit() becomes visible. Likewise, the atomic
-memory operation done by test_and_set_bit() must become visible before
-"obj->killed = 1;" is visible.
-
-Finally there is the basic operation::
-
- int test_bit(unsigned long nr, __const__ volatile unsigned long *addr);
-
-Which returns a boolean indicating if bit "nr" is set in the bitmask
-pointed to by "addr".
-
-If explicit memory barriers are required around {set,clear}_bit() (which do
-not return a value, and thus does not need to provide memory barrier
-semantics), two interfaces are provided::
-
- void smp_mb__before_atomic(void);
- void smp_mb__after_atomic(void);
-
-They are used as follows, and are akin to their atomic_t operation
-brothers::
-
- /* All memory operations before this call will
- * be globally visible before the clear_bit().
- */
- smp_mb__before_atomic();
- clear_bit( ... );
-
- /* The clear_bit() will be visible before all
- * subsequent memory operations.
- */
- smp_mb__after_atomic();
-
-There are two special bitops with lock barrier semantics (acquire/release,
-same as spinlocks). These operate in the same way as their non-_lock/unlock
-postfixed variants, except that they are to provide acquire/release semantics,
-respectively. This means they can be used for bit_spin_trylock and
-bit_spin_unlock type operations without specifying any more barriers. ::
-
- int test_and_set_bit_lock(unsigned long nr, unsigned long *addr);
- void clear_bit_unlock(unsigned long nr, unsigned long *addr);
- void __clear_bit_unlock(unsigned long nr, unsigned long *addr);
-
-The __clear_bit_unlock version is non-atomic, however it still implements
-unlock barrier semantics. This can be useful if the lock itself is protecting
-the other bits in the word.
-
-Finally, there are non-atomic versions of the bitmask operations
-provided. They are used in contexts where some other higher-level SMP
-locking scheme is being used to protect the bitmask, and thus less
-expensive non-atomic operations may be used in the implementation.
-They have names similar to the above bitmask operation interfaces,
-except that two underscores are prefixed to the interface name. ::
-
- void __set_bit(unsigned long nr, volatile unsigned long *addr);
- void __clear_bit(unsigned long nr, volatile unsigned long *addr);
- void __change_bit(unsigned long nr, volatile unsigned long *addr);
- int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
- int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
- int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
-
-These non-atomic variants also do not require any special memory
-barrier semantics.
-
-The routines xchg() and cmpxchg() must provide the same exact
-memory-barrier semantics as the atomic and bit operations returning
-values.
-
-.. note::
-
- If someone wants to use xchg(), cmpxchg() and their variants,
- linux/atomic.h should be included rather than asm/cmpxchg.h, unless the
- code is in arch/* and can take care of itself.
-
-Spinlocks and rwlocks have memory barrier expectations as well.
-The rule to follow is simple:
-
-1) When acquiring a lock, the implementation must make it globally
- visible before any subsequent memory operation.
-
-2) When releasing a lock, the implementation must make it such that
- all previous memory operations are globally visible before the
- lock release.
-
-Which finally brings us to _atomic_dec_and_lock(). There is an
-architecture-neutral version implemented in lib/dec_and_lock.c,
-but most platforms will wish to optimize this in assembler. ::
-
- int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
-
-Atomically decrement the given counter, and if will drop to zero
-atomically acquire the given spinlock and perform the decrement
-of the counter to zero. If it does not drop to zero, do nothing
-with the spinlock.
-
-It is actually pretty simple to get the memory barrier correct.
-Simply satisfy the spinlock grab requirements, which is make
-sure the spinlock operation is globally visible before any
-subsequent memory operation.
-
-We can demonstrate this operation more clearly if we define
-an abstract atomic operation::
-
- long cas(long *mem, long old, long new);
-
-"cas" stands for "compare and swap". It atomically:
-
-1) Compares "old" with the value currently at "mem".
-2) If they are equal, "new" is written to "mem".
-3) Regardless, the current value at "mem" is returned.
-
-As an example usage, here is what an atomic counter update
-might look like::
-
- void example_atomic_inc(long *counter)
- {
- long old, new, ret;
-
- while (1) {
- old = *counter;
- new = old + 1;
-
- ret = cas(counter, old, new);
- if (ret == old)
- break;
- }
- }
-
-Let's use cas() in order to build a pseudo-C atomic_dec_and_lock()::
-
- int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
- {
- long old, new, ret;
- int went_to_zero;
-
- went_to_zero = 0;
- while (1) {
- old = atomic_read(atomic);
- new = old - 1;
- if (new == 0) {
- went_to_zero = 1;
- spin_lock(lock);
- }
- ret = cas(atomic, old, new);
- if (ret == old)
- break;
- if (went_to_zero) {
- spin_unlock(lock);
- went_to_zero = 0;
- }
- }
-
- return went_to_zero;
- }
-
-Now, as far as memory barriers go, as long as spin_lock()
-strictly orders all subsequent memory operations (including
-the cas()) with respect to itself, things will be fine.
-
-Said another way, _atomic_dec_and_lock() must guarantee that
-a counter dropping to zero is never made visible before the
-spinlock being acquired.
-
-.. note::
-
- Note that this also means that for the case where the counter is not
- dropping to zero, there are no memory ordering requirements.
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index 6eb9d3f090cd..889fc84ccd1b 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -88,29 +88,23 @@ changes occur:
This is used primarily during fault processing.
-5) ``void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)``
+5) ``void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long address, pte_t *ptep,
+ unsigned int nr)``
- At the end of every page fault, this routine is invoked to
- tell the architecture specific code that a translation
- now exists at virtual address "address" for address space
- "vma->vm_mm", in the software page tables.
+ At the end of every page fault, this routine is invoked to tell
+ the architecture specific code that translations now exists
+ in the software page tables for address space "vma->vm_mm"
+ at virtual address "address" for "nr" consecutive pages.
+
+ This routine is also invoked in various other places which pass
+ a NULL "vmf".
A port may use this information in any way it so chooses.
For example, it could use this event to pre-load TLB
translations for software managed TLB configurations.
The sparc64 port currently does this.
-6) ``void tlb_migrate_finish(struct mm_struct *mm)``
-
- This interface is called at the end of an explicit
- process migration. This interface provides a hook
- to allow a platform to update TLB or context-specific
- information for the address space.
-
- The ia64 sn2 platform is one example of a platform
- that uses this interface.
-
Next, we have the cache flushing interfaces. In general, when Linux
is changing an existing virtual-->physical mapping to a new value,
the sequence will be in one of the following forms::
@@ -223,9 +217,9 @@ Here are the routines, one by one:
there will be no entries in the cache for the kernel address
space for virtual addresses in the range 'start' to 'end-1'.
- The first of these two routines is invoked after map_vm_area()
+ The first of these two routines is invoked after vmap_range()
has installed the page table entries. The second is invoked
- before unmap_kernel_range() deletes the page table entries.
+ before vunmap_range() deletes the page table entries.
There exists another whole class of cpu cache issues which currently
require a whole different set of interfaces to handle properly.
@@ -279,12 +273,17 @@ maps this page at its virtual address.
If D-cache aliasing is not an issue, these two routines may
simply call memcpy/memset directly and do nothing more.
- ``void flush_dcache_page(struct page *page)``
+ ``void flush_dcache_folio(struct folio *folio)``
- Any time the kernel writes to a page cache page, _OR_
- the kernel is about to read from a page cache page and
- user space shared/writable mappings of this page potentially
- exist, this routine is called.
+ This routines must be called when:
+
+ a) the kernel did write to a page that is in the page cache page
+ and / or in high memory
+ b) the kernel is about to read from a page cache page and user space
+ shared/writable mappings of this page potentially exist. Note
+ that {get,pin}_user_pages{_fast} already call flush_dcache_folio
+ on any page found in the user address space and thus driver
+ code rarely needs to take this into account.
.. note::
@@ -294,38 +293,35 @@ maps this page at its virtual address.
handling vfs symlinks in the page cache need not call
this interface at all.
- The phrase "kernel writes to a page cache page" means,
- specifically, that the kernel executes store instructions
- that dirty data in that page at the page->virtual mapping
- of that page. It is important to flush here to handle
- D-cache aliasing, to make sure these kernel stores are
- visible to user space mappings of that page.
+ The phrase "kernel writes to a page cache page" means, specifically,
+ that the kernel executes store instructions that dirty data in that
+ page at the kernel virtual mapping of that page. It is important to
+ flush here to handle D-cache aliasing, to make sure these kernel stores
+ are visible to user space mappings of that page.
- The corollary case is just as important, if there are users
- which have shared+writable mappings of this file, we must make
- sure that kernel reads of these pages will see the most recent
- stores done by the user.
+ The corollary case is just as important, if there are users which have
+ shared+writable mappings of this file, we must make sure that kernel
+ reads of these pages will see the most recent stores done by the user.
- If D-cache aliasing is not an issue, this routine may
- simply be defined as a nop on that architecture.
+ If D-cache aliasing is not an issue, this routine may simply be defined
+ as a nop on that architecture.
- There is a bit set aside in page->flags (PG_arch_1) as
- "architecture private". The kernel guarantees that,
- for pagecache pages, it will clear this bit when such
- a page first enters the pagecache.
+ There is a bit set aside in folio->flags (PG_arch_1) as "architecture
+ private". The kernel guarantees that, for pagecache pages, it will
+ clear this bit when such a page first enters the pagecache.
This allows these interfaces to be implemented much more
- efficiently. It allows one to "defer" (perhaps indefinitely)
- the actual flush if there are currently no user processes
- mapping this page. See sparc64's flush_dcache_page and
- update_mmu_cache implementations for an example of how to go
- about doing this.
-
- The idea is, first at flush_dcache_page() time, if
- page->mapping->i_mmap is an empty tree, just mark the architecture
- private page flag bit. Later, in update_mmu_cache(), a check is
- made of this flag bit, and if set the flush is done and the flag
- bit is cleared.
+ efficiently. It allows one to "defer" (perhaps indefinitely) the
+ actual flush if there are currently no user processes mapping this
+ page. See sparc64's flush_dcache_folio and update_mmu_cache_range
+ implementations for an example of how to go about doing this.
+
+ The idea is, first at flush_dcache_folio() time, if
+ folio_flush_mapping() returns a mapping, and mapping_mapped() on that
+ mapping returns %false, just mark the architecture private page
+ flag bit. Later, in update_mmu_cache_range(), a check is made
+ of this flag bit, and if set the flush is done and the flag bit
+ is cleared.
.. important::
@@ -355,25 +351,12 @@ maps this page at its virtual address.
When the kernel needs to access the contents of an anonymous
page, it calls this function (currently only
- get_user_pages()). Note: flush_dcache_page() deliberately
+ get_user_pages()). Note: flush_dcache_folio() deliberately
doesn't work for an anonymous page. The default
implementation is a nop (and should remain so for all coherent
architectures). For incoherent architectures, it should flush
the cache of the page at vmaddr.
- ``void flush_kernel_dcache_page(struct page *page)``
-
- When the kernel needs to modify a user page is has obtained
- with kmap, it calls this function after all modifications are
- complete (but before kunmapping it) to bring the underlying
- page up to date. It is assumed here that the user has no
- incoherent cached copies (i.e. the original page was obtained
- from a mechanism like get_user_pages()). The default
- implementation is a nop and should remain so on all coherent
- architectures. On incoherent architectures, this should flush
- the kernel cache for page (using page_address(page)).
-
-
``void flush_icache_range(unsigned long start, unsigned long end)``
When the kernel stores into addresses that it will execute
@@ -385,7 +368,7 @@ maps this page at its virtual address.
``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``
All the functionality of flush_icache_page can be implemented in
- flush_dcache_page and update_mmu_cache. In the future, the hope
+ flush_dcache_folio and update_mmu_cache_range. In the future, the hope
is to remove this interface completely.
The final category of APIs is for I/O to deliberately aliased address
diff --git a/Documentation/core-api/cgroup.rst b/Documentation/core-api/cgroup.rst
new file mode 100644
index 000000000000..734ea21e1e17
--- /dev/null
+++ b/Documentation/core-api/cgroup.rst
@@ -0,0 +1,9 @@
+==================
+Cgroup Kernel APIs
+==================
+
+Device Memory Cgroup API (dmemcg)
+=================================
+.. kernel-doc:: kernel/cgroup/dmem.c
+ :export:
+
diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..50966f66e398 100644
--- a/Documentation/core-api/circular-buffers.rst
+++ b/Documentation/core-api/circular-buffers.rst
@@ -3,7 +3,7 @@ Circular Buffers
================
:Author: David Howells <dhowells@redhat.com>
-:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+:Author: Paul E. McKenney <paulmck@linux.ibm.com>
Linux provides a number of features that can be used to implement circular
diff --git a/Documentation/core-api/cleanup.rst b/Documentation/core-api/cleanup.rst
new file mode 100644
index 000000000000..527eb2f8ec6e
--- /dev/null
+++ b/Documentation/core-api/cleanup.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Scope-based Cleanup Helpers
+===========================
+
+.. kernel-doc:: include/linux/cleanup.h
+ :doc: scope-based cleanup helpers
diff --git a/Documentation/core-api/conf.py b/Documentation/core-api/conf.py
deleted file mode 100644
index db1f7659f3da..000000000000
--- a/Documentation/core-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Core-API Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'core-api.tex', project,
- 'The kernel development community', 'manual'),
-]
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index 4a50ab7817f7..e1b0eeabbb5e 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -2,12 +2,13 @@
CPU hotplug in the Kernel
=========================
-:Date: December, 2016
+:Date: September, 2021
:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
- Rusty Russell <rusty@rustcorp.com.au>,
- Srivatsa Vaddagiri <vatsa@in.ibm.com>,
- Ashok Raj <ashok.raj@intel.com>,
- Joel Schopp <jschopp@austin.ibm.com>
+ Rusty Russell <rusty@rustcorp.com.au>,
+ Srivatsa Vaddagiri <vatsa@in.ibm.com>,
+ Ashok Raj <ashok.raj@intel.com>,
+ Joel Schopp <jschopp@austin.ibm.com>,
+ Thomas Gleixner <tglx@linutronix.de>
Introduction
============
@@ -30,33 +31,20 @@ which didn't support these methods.
Command Line Switches
=====================
``maxcpus=n``
- Restrict boot time CPUs to *n*. Say if you have fourV CPUs, using
+ Restrict boot time CPUs to *n*. Say if you have four CPUs, using
``maxcpus=2`` will only boot two. You can choose to bring the
other CPUs later online.
``nr_cpus=n``
- Restrict the total amount CPUs the kernel will support. If the number
- supplied here is lower than the number of physically available CPUs than
+ Restrict the total amount of CPUs the kernel will support. If the number
+ supplied here is lower than the number of physically available CPUs, then
those CPUs can not be brought online later.
-``additional_cpus=n``
- Use this to limit hotpluggable CPUs. This option sets
- ``cpu_possible_mask = cpu_present_mask + additional_cpus``
-
- This option is limited to the IA64 architecture.
-
``possible_cpus=n``
This option sets ``possible_cpus`` bits in ``cpu_possible_mask``.
This option is limited to the X86 and S390 architecture.
-``cede_offline={"off","on"}``
- Use this option to disable/enable putting offlined processors to an extended
- ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
- ``cede_offline`` is set to "on".
-
- This option is limited to the PowerPC architecture.
-
``cpu0_hotplug``
Allow to shutdown CPU0.
@@ -98,9 +86,10 @@ Never use anything other than ``cpumask_t`` to represent bitmap of CPUs.
Using CPU hotplug
=================
+
The kernel option *CONFIG_HOTPLUG_CPU* needs to be enabled. It is currently
available on multiple architectures including ARM, MIPS, PowerPC and X86. The
-configuration is done via the sysfs interface: ::
+configuration is done via the sysfs interface::
$ ls -lh /sys/devices/system/cpu
total 0
@@ -120,35 +109,27 @@ configuration is done via the sysfs interface: ::
The files *offline*, *online*, *possible*, *present* represent the CPU masks.
Each CPU folder contains an *online* file which controls the logical on (1) and
-off (0) state. To logically shutdown CPU4: ::
+off (0) state. To logically shutdown CPU4::
$ echo 0 > /sys/devices/system/cpu/cpu4/online
smpboot: CPU 4 is now offline
Once the CPU is shutdown, it will be removed from */proc/interrupts*,
*/proc/cpuinfo* and should also not be shown visible by the *top* command. To
-bring CPU4 back online: ::
+bring CPU4 back online::
$ echo 1 > /sys/devices/system/cpu/cpu4/online
smpboot: Booting Node 0 Processor 4 APIC 0x1
-The CPU is usable again. This should work on all CPUs. CPU0 is often special
-and excluded from CPU hotplug. On X86 the kernel option
-*CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
-shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
-used. Some known dependencies of CPU0:
-
-* Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
-* PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
-
-Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
-on CPU0.
+The CPU is usable again. This should work on all CPUs, but CPU0 is often special
+and excluded from CPU hotplug.
The CPU hotplug coordination
============================
The offline case
----------------
+
Once a CPU has been logically shutdown the teardown callbacks of registered
hotplug states will be invoked, starting with ``CPUHP_ONLINE`` and terminating
at state ``CPUHP_OFFLINE``. This includes:
@@ -163,105 +144,491 @@ at state ``CPUHP_OFFLINE``. This includes:
* Once all services are migrated, kernel calls an arch specific routine
``__cpu_disable()`` to perform arch specific cleanup.
-Using the hotplug API
----------------------
-It is possible to receive notifications once a CPU is offline or onlined. This
-might be important to certain drivers which need to perform some kind of setup
-or clean up functions based on the number of available CPUs: ::
-
- #include <linux/cpuhotplug.h>
-
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
- Y_online, Y_prepare_down);
-
-*X* is the subsystem and *Y* the particular driver. The *Y_online* callback
-will be invoked during registration on all online CPUs. If an error
-occurs during the online callback the *Y_prepare_down* callback will be
-invoked on all CPUs on which the online callback was previously invoked.
-After registration completed, the *Y_online* callback will be invoked
-once a CPU is brought online and *Y_prepare_down* will be invoked when a
-CPU is shutdown. All resources which were previously allocated in
-*Y_online* should be released in *Y_prepare_down*.
-The return value *ret* is negative if an error occurred during the
-registration process. Otherwise a positive value is returned which
-contains the allocated hotplug for dynamically allocated states
-(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
-
-The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
-dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
-During the removal of a hotplug state the teardown callback will be invoked.
-
-Multiple instances
-~~~~~~~~~~~~~~~~~~
-If a driver has multiple instances and each instance needs to perform the
-callback independently then it is likely that a ''multi-state'' should be used.
-First a multi-state state needs to be registered: ::
-
- ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
- Y_online, Y_prepare_down);
- Y_hp_online = ret;
-
-The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
-except it prepares the callbacks for a multi state and does not invoke
-the callbacks. This is a one time setup.
-Once a new instance is allocated, you need to register this new instance: ::
-
- ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
-
-This function will add this instance to your previously allocated
-*Y_hp_online* state and invoke the previously registered callback
-(*Y_online*) on all online CPUs. The *node* element is a ``struct
-hlist_node`` member of your per-instance data structure.
-
-On removal of the instance: ::
- cpuhp_state_remove_instance(Y_hp_online, &d->node)
-
-should be invoked which will invoke the teardown callback on all online
-CPUs.
-
-Manual setup
-~~~~~~~~~~~~
-Usually it is handy to invoke setup and teardown callbacks on registration or
-removal of a state because usually the operation needs to performed once a CPU
-goes online (offline) and during initial setup (shutdown) of the driver. However
-each registration and removal function is also available with a ``_nocalls``
-suffix which does not invoke the provided callbacks if the invocation of the
-callbacks is not desired. During the manual setup (or teardown) the functions
-``get_online_cpus()`` and ``put_online_cpus()`` should be used to inhibit CPU
-hotplug operations.
-
-
-The ordering of the events
---------------------------
-The hotplug states are defined in ``include/linux/cpuhotplug.h``:
-
-* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
- CPU is up.
-* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
- just the after the CPU has been brought up. The interrupts are off and
- the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
- the callbacks are invoked on the target CPU.
-* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
- reserved for the dynamic allocation.
-* The states are invoked in the reverse order on CPU shutdown starting with
- *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
- invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
-
-A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
-However if an earlier invocation during the bring up or shutdown is required
-then an explicit state should be acquired. An explicit state might also be
-required if the hotplug event requires specific ordering in respect to
-another hotplug event.
+
+The CPU hotplug API
+===================
+
+CPU hotplug state machine
+-------------------------
+
+CPU hotplug uses a trivial state machine with a linear state space from
+CPUHP_OFFLINE to CPUHP_ONLINE. Each state has a startup and a teardown
+callback.
+
+When a CPU is onlined, the startup callbacks are invoked sequentially until
+the state CPUHP_ONLINE is reached. They can also be invoked when the
+callbacks of a state are set up or an instance is added to a multi-instance
+state.
+
+When a CPU is offlined the teardown callbacks are invoked in the reverse
+order sequentially until the state CPUHP_OFFLINE is reached. They can also
+be invoked when the callbacks of a state are removed or an instance is
+removed from a multi-instance state.
+
+If a usage site requires only a callback in one direction of the hotplug
+operations (CPU online or CPU offline) then the other not-required callback
+can be set to NULL when the state is set up.
+
+The state space is divided into three sections:
+
+* The PREPARE section
+
+ The PREPARE section covers the state space from CPUHP_OFFLINE to
+ CPUHP_BRINGUP_CPU.
+
+ The startup callbacks in this section are invoked before the CPU is
+ started during a CPU online operation. The teardown callbacks are invoked
+ after the CPU has become dysfunctional during a CPU offline operation.
+
+ The callbacks are invoked on a control CPU as they can't obviously run on
+ the hotplugged CPU which is either not yet started or has become
+ dysfunctional already.
+
+ The startup callbacks are used to setup resources which are required to
+ bring a CPU successfully online. The teardown callbacks are used to free
+ resources or to move pending work to an online CPU after the hotplugged
+ CPU became dysfunctional.
+
+ The startup callbacks are allowed to fail. If a callback fails, the CPU
+ online operation is aborted and the CPU is brought down to the previous
+ state (usually CPUHP_OFFLINE) again.
+
+ The teardown callbacks in this section are not allowed to fail.
+
+* The STARTING section
+
+ The STARTING section covers the state space between CPUHP_BRINGUP_CPU + 1
+ and CPUHP_AP_ONLINE.
+
+ The startup callbacks in this section are invoked on the hotplugged CPU
+ with interrupts disabled during a CPU online operation in the early CPU
+ setup code. The teardown callbacks are invoked with interrupts disabled
+ on the hotplugged CPU during a CPU offline operation shortly before the
+ CPU is completely shut down.
+
+ The callbacks in this section are not allowed to fail.
+
+ The callbacks are used for low level hardware initialization/shutdown and
+ for core subsystems.
+
+* The ONLINE section
+
+ The ONLINE section covers the state space between CPUHP_AP_ONLINE + 1 and
+ CPUHP_ONLINE.
+
+ The startup callbacks in this section are invoked on the hotplugged CPU
+ during a CPU online operation. The teardown callbacks are invoked on the
+ hotplugged CPU during a CPU offline operation.
+
+ The callbacks are invoked in the context of the per CPU hotplug thread,
+ which is pinned on the hotplugged CPU. The callbacks are invoked with
+ interrupts and preemption enabled.
+
+ The callbacks are allowed to fail. When a callback fails the hotplug
+ operation is aborted and the CPU is brought back to the previous state.
+
+CPU online/offline operations
+-----------------------------
+
+A successful online operation looks like this::
+
+ [CPUHP_OFFLINE]
+ [CPUHP_OFFLINE + 1]->startup() -> success
+ [CPUHP_OFFLINE + 2]->startup() -> success
+ [CPUHP_OFFLINE + 3] -> skipped because startup == NULL
+ ...
+ [CPUHP_BRINGUP_CPU]->startup() -> success
+ === End of PREPARE section
+ [CPUHP_BRINGUP_CPU + 1]->startup() -> success
+ ...
+ [CPUHP_AP_ONLINE]->startup() -> success
+ === End of STARTUP section
+ [CPUHP_AP_ONLINE + 1]->startup() -> success
+ ...
+ [CPUHP_ONLINE - 1]->startup() -> success
+ [CPUHP_ONLINE]
+
+A successful offline operation looks like this::
+
+ [CPUHP_ONLINE]
+ [CPUHP_ONLINE - 1]->teardown() -> success
+ ...
+ [CPUHP_AP_ONLINE + 1]->teardown() -> success
+ === Start of STARTUP section
+ [CPUHP_AP_ONLINE]->teardown() -> success
+ ...
+ [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+ ...
+ === Start of PREPARE section
+ [CPUHP_BRINGUP_CPU]->teardown()
+ [CPUHP_OFFLINE + 3]->teardown()
+ [CPUHP_OFFLINE + 2] -> skipped because teardown == NULL
+ [CPUHP_OFFLINE + 1]->teardown()
+ [CPUHP_OFFLINE]
+
+A failed online operation looks like this::
+
+ [CPUHP_OFFLINE]
+ [CPUHP_OFFLINE + 1]->startup() -> success
+ [CPUHP_OFFLINE + 2]->startup() -> success
+ [CPUHP_OFFLINE + 3] -> skipped because startup == NULL
+ ...
+ [CPUHP_BRINGUP_CPU]->startup() -> success
+ === End of PREPARE section
+ [CPUHP_BRINGUP_CPU + 1]->startup() -> success
+ ...
+ [CPUHP_AP_ONLINE]->startup() -> success
+ === End of STARTUP section
+ [CPUHP_AP_ONLINE + 1]->startup() -> success
+ ---
+ [CPUHP_AP_ONLINE + N]->startup() -> fail
+ [CPUHP_AP_ONLINE + (N - 1)]->teardown()
+ ...
+ [CPUHP_AP_ONLINE + 1]->teardown()
+ === Start of STARTUP section
+ [CPUHP_AP_ONLINE]->teardown()
+ ...
+ [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+ ...
+ === Start of PREPARE section
+ [CPUHP_BRINGUP_CPU]->teardown()
+ [CPUHP_OFFLINE + 3]->teardown()
+ [CPUHP_OFFLINE + 2] -> skipped because teardown == NULL
+ [CPUHP_OFFLINE + 1]->teardown()
+ [CPUHP_OFFLINE]
+
+A failed offline operation looks like this::
+
+ [CPUHP_ONLINE]
+ [CPUHP_ONLINE - 1]->teardown() -> success
+ ...
+ [CPUHP_ONLINE - N]->teardown() -> fail
+ [CPUHP_ONLINE - (N - 1)]->startup()
+ ...
+ [CPUHP_ONLINE - 1]->startup()
+ [CPUHP_ONLINE]
+
+Recursive failures cannot be handled sensibly. Look at the following
+example of a recursive fail due to a failed offline operation: ::
+
+ [CPUHP_ONLINE]
+ [CPUHP_ONLINE - 1]->teardown() -> success
+ ...
+ [CPUHP_ONLINE - N]->teardown() -> fail
+ [CPUHP_ONLINE - (N - 1)]->startup() -> success
+ [CPUHP_ONLINE - (N - 2)]->startup() -> fail
+
+The CPU hotplug state machine stops right here and does not try to go back
+down again because that would likely result in an endless loop::
+
+ [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+ [CPUHP_ONLINE - N]->teardown() -> fail
+ [CPUHP_ONLINE - (N - 1)]->startup() -> success
+ [CPUHP_ONLINE - (N - 2)]->startup() -> fail
+ [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+ [CPUHP_ONLINE - N]->teardown() -> fail
+
+Lather, rinse and repeat. In this case the CPU left in state::
+
+ [CPUHP_ONLINE - (N - 1)]
+
+which at least lets the system make progress and gives the user a chance to
+debug or even resolve the situation.
+
+Allocating a state
+------------------
+
+There are two ways to allocate a CPU hotplug state:
+
+* Static allocation
+
+ Static allocation has to be used when the subsystem or driver has
+ ordering requirements versus other CPU hotplug states. E.g. the PERF core
+ startup callback has to be invoked before the PERF driver startup
+ callbacks during a CPU online operation. During a CPU offline operation
+ the driver teardown callbacks have to be invoked before the core teardown
+ callback. The statically allocated states are described by constants in
+ the cpuhp_state enum which can be found in include/linux/cpuhotplug.h.
+
+ Insert the state into the enum at the proper place so the ordering
+ requirements are fulfilled. The state constant has to be used for state
+ setup and removal.
+
+ Static allocation is also required when the state callbacks are not set
+ up at runtime and are part of the initializer of the CPU hotplug state
+ array in kernel/cpu.c.
+
+* Dynamic allocation
+
+ When there are no ordering requirements for the state callbacks then
+ dynamic allocation is the preferred method. The state number is allocated
+ by the setup function and returned to the caller on success.
+
+ Only the PREPARE and ONLINE sections provide a dynamic allocation
+ range. The STARTING section does not as most of the callbacks in that
+ section have explicit ordering requirements.
+
+Setup of a CPU hotplug state
+----------------------------
+
+The core code provides the following functions to setup a state:
+
+* cpuhp_setup_state(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls(state, name, startup, teardown)
+* cpuhp_setup_state_cpuslocked(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls_cpuslocked(state, name, startup, teardown)
+
+For cases where a driver or a subsystem has multiple instances and the same
+CPU hotplug state callbacks need to be invoked for each instance, the CPU
+hotplug core provides multi-instance support. The advantage over driver
+specific instance lists is that the instance related functions are fully
+serialized against CPU hotplug operations and provide the automatic
+invocations of the state callbacks on add and removal. To set up such a
+multi-instance state the following function is available:
+
+* cpuhp_setup_state_multi(state, name, startup, teardown)
+
+The @state argument is either a statically allocated state or one of the
+constants for dynamically allocated states - CPUHP_BP_PREPARE_DYN,
+CPUHP_AP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
+which a dynamic state should be allocated.
+
+The @name argument is used for sysfs output and for instrumentation. The
+naming convention is "subsys:mode" or "subsys/driver:mode",
+e.g. "perf:mode" or "perf/x86:mode". The common mode names are:
+
+======== =======================================================
+prepare For states in the PREPARE section
+
+dead For states in the PREPARE section which do not provide
+ a startup callback
+
+starting For states in the STARTING section
+
+dying For states in the STARTING section which do not provide
+ a startup callback
+
+online For states in the ONLINE section
+
+offline For states in the ONLINE section which do not provide
+ a startup callback
+======== =======================================================
+
+As the @name argument is only used for sysfs and instrumentation other mode
+descriptors can be used as well if they describe the nature of the state
+better than the common ones.
+
+Examples for @name arguments: "perf/online", "perf/x86:prepare",
+"RCU/tree:dying", "sched/waitempty"
+
+The @startup argument is a function pointer to the callback which should be
+invoked during a CPU online operation. If the usage site does not require a
+startup callback set the pointer to NULL.
+
+The @teardown argument is a function pointer to the callback which should
+be invoked during a CPU offline operation. If the usage site does not
+require a teardown callback set the pointer to NULL.
+
+The functions differ in the way how the installed callbacks are treated:
+
+ * cpuhp_setup_state_nocalls(), cpuhp_setup_state_nocalls_cpuslocked()
+ and cpuhp_setup_state_multi() only install the callbacks
+
+ * cpuhp_setup_state() and cpuhp_setup_state_cpuslocked() install the
+ callbacks and invoke the @startup callback (if not NULL) for all online
+ CPUs which have currently a state greater than the newly installed
+ state. Depending on the state section the callback is either invoked on
+ the current CPU (PREPARE section) or on each online CPU (ONLINE
+ section) in the context of the CPU's hotplug thread.
+
+ If a callback fails for CPU N then the teardown callback for CPU
+ 0 .. N-1 is invoked to rollback the operation. The state setup fails,
+ the callbacks for the state are not installed and in case of dynamic
+ allocation the allocated state is freed.
+
+The state setup and the callback invocations are serialized against CPU
+hotplug operations. If the setup function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+The function return values:
+ ======== ===================================================================
+ 0 Statically allocated state was successfully set up
+
+ >0 Dynamically allocated state was successfully set up.
+
+ The returned number is the state number which was allocated. If
+ the state callbacks have to be removed later, e.g. module
+ removal, then this number has to be saved by the caller and used
+ as @state argument for the state remove function. For
+ multi-instance states the dynamically allocated state number is
+ also required as @state argument for the instance add/remove
+ operations.
+
+ <0 Operation failed
+ ======== ===================================================================
+
+Removal of a CPU hotplug state
+------------------------------
+
+To remove a previously set up state, the following functions are provided:
+
+* cpuhp_remove_state(state)
+* cpuhp_remove_state_nocalls(state)
+* cpuhp_remove_state_nocalls_cpuslocked(state)
+* cpuhp_remove_multi_state(state)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state*(). If
+the state is in the dynamic range, then the state number is freed and
+available for dynamic allocation again.
+
+The functions differ in the way how the installed callbacks are treated:
+
+ * cpuhp_remove_state_nocalls(), cpuhp_remove_state_nocalls_cpuslocked()
+ and cpuhp_remove_multi_state() only remove the callbacks.
+
+ * cpuhp_remove_state() removes the callbacks and invokes the teardown
+ callback (if not NULL) for all online CPUs which have currently a state
+ greater than the removed state. Depending on the state section the
+ callback is either invoked on the current CPU (PREPARE section) or on
+ each online CPU (ONLINE section) in the context of the CPU's hotplug
+ thread.
+
+ In order to complete the removal, the teardown callback should not fail.
+
+The state removal and the callback invocations are serialized against CPU
+hotplug operations. If the remove function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+If a multi-instance state is removed then the caller has to remove all
+instances first.
+
+Multi-Instance state instance management
+----------------------------------------
+
+Once the multi-instance state is set up, instances can be added to the
+state:
+
+ * cpuhp_state_add_instance(state, node)
+ * cpuhp_state_add_instance_nocalls(state, node)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state_multi().
+
+The @node argument is a pointer to an hlist_node which is embedded in the
+instance's data structure. The pointer is handed to the multi-instance
+state callbacks and can be used by the callback to retrieve the instance
+via container_of().
+
+The functions differ in the way how the installed callbacks are treated:
+
+ * cpuhp_state_add_instance_nocalls() and only adds the instance to the
+ multi-instance state's node list.
+
+ * cpuhp_state_add_instance() adds the instance and invokes the startup
+ callback (if not NULL) associated with @state for all online CPUs which
+ have currently a state greater than @state. The callback is only
+ invoked for the to be added instance. Depending on the state section
+ the callback is either invoked on the current CPU (PREPARE section) or
+ on each online CPU (ONLINE section) in the context of the CPU's hotplug
+ thread.
+
+ If a callback fails for CPU N then the teardown callback for CPU
+ 0 .. N-1 is invoked to rollback the operation, the function fails and
+ the instance is not added to the node list of the multi-instance state.
+
+To remove an instance from the state's node list these functions are
+available:
+
+ * cpuhp_state_remove_instance(state, node)
+ * cpuhp_state_remove_instance_nocalls(state, node)
+
+The arguments are the same as for the cpuhp_state_add_instance*()
+variants above.
+
+The functions differ in the way how the installed callbacks are treated:
+
+ * cpuhp_state_remove_instance_nocalls() only removes the instance from the
+ state's node list.
+
+ * cpuhp_state_remove_instance() removes the instance and invokes the
+ teardown callback (if not NULL) associated with @state for all online
+ CPUs which have currently a state greater than @state. The callback is
+ only invoked for the to be removed instance. Depending on the state
+ section the callback is either invoked on the current CPU (PREPARE
+ section) or on each online CPU (ONLINE section) in the context of the
+ CPU's hotplug thread.
+
+ In order to complete the removal, the teardown callback should not fail.
+
+The node list add/remove operations and the callback invocations are
+serialized against CPU hotplug operations. These functions cannot be used
+from within CPU hotplug callbacks and CPU hotplug read locked regions.
+
+Examples
+--------
+
+Setup and teardown a statically allocated state in the STARTING section for
+notifications on online and offline operations::
+
+ ret = cpuhp_setup_state(CPUHP_SUBSYS_STARTING, "subsys:starting", subsys_cpu_starting, subsys_cpu_dying);
+ if (ret < 0)
+ return ret;
+ ....
+ cpuhp_remove_state(CPUHP_SUBSYS_STARTING);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on offline operations::
+
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
+ if (state < 0)
+ return state;
+ ....
+ cpuhp_remove_state(state);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on online operations without invoking the callbacks::
+
+ state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
+ if (state < 0)
+ return state;
+ ....
+ cpuhp_remove_state_nocalls(state);
+
+Setup, use and teardown a dynamically allocated multi-instance state in the
+ONLINE section for notifications on online and offline operation::
+
+ state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
+ if (state < 0)
+ return state;
+ ....
+ ret = cpuhp_state_add_instance(state, &inst1->node);
+ if (ret)
+ return ret;
+ ....
+ ret = cpuhp_state_add_instance(state, &inst2->node);
+ if (ret)
+ return ret;
+ ....
+ cpuhp_remove_instance(state, &inst1->node);
+ ....
+ cpuhp_remove_instance(state, &inst2->node);
+ ....
+ cpuhp_remove_multi_state(state);
+
Testing of hotplug states
=========================
+
One way to verify whether a custom state is working as expected or not is to
shutdown a CPU and then put it online again. It is also possible to put the CPU
to certain state (for instance *CPUHP_AP_ONLINE*) and then go back to
*CPUHP_ONLINE*. This would simulate an error one state after *CPUHP_AP_ONLINE*
which would lead to rollback to the online state.
-All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states``: ::
+All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states`` ::
$ tail /sys/devices/system/cpu/hotplug/states
138: mm/vmscan:online
@@ -275,7 +642,7 @@ All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states
168: sched:active
169: online
-To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
+To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue::
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
169
@@ -283,14 +650,14 @@ To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
140
-It is important to note that the teardown callbac of state 140 have been
-invoked. And now get back online: ::
+It is important to note that the teardown callback of state 140 have been
+invoked. And now get back online::
$ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
169
-With trace events enabled, the individual steps are visible, too: ::
+With trace events enabled, the individual steps are visible, too::
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
@@ -325,6 +692,7 @@ trace.
Architecture's requirements
===========================
+
The following functions and configurations are required:
``CONFIG_HOTPLUG_CPU``
@@ -346,11 +714,12 @@ The following functions and configurations are required:
User Space Notification
=======================
-After CPU successfully onlined or offline udev events are sent. A udev rule like: ::
+
+After CPU successfully onlined or offline udev events are sent. A udev rule like::
SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
-will receive all events. A script like: ::
+will receive all events. A script like::
#!/bin/sh
@@ -366,6 +735,26 @@ will receive all events. A script like: ::
can process the event further.
+When changes to the CPUs in the system occur, the sysfs file
+/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel
+updates the kdump capture kernel list of CPUs itself (via elfcorehdr and
+other relevant kexec segment), or '0' if userspace must update the kdump
+capture kernel list of CPUs.
+
+The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration
+option.
+
+To skip userspace processing of CPU hot un/plug events for kdump
+(i.e. the unload-then-reload to obtain a current list of CPUs), this sysfs
+file can be used in a udev rule as follows:
+
+ SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
+
+For a CPU hot un/plug event, if the architecture supports kernel updates
+of the elfcorehdr (which contains the list of CPUs) and other relevant
+kexec segments, then the rule skips the unload-then-reload of the kdump
+capture kernel.
+
Kernel Inline Documentations Reference
======================================
diff --git a/Documentation/core-api/debugging-via-ohci1394.rst b/Documentation/core-api/debugging-via-ohci1394.rst
new file mode 100644
index 000000000000..cb3d3228dfc8
--- /dev/null
+++ b/Documentation/core-api/debugging-via-ohci1394.rst
@@ -0,0 +1,185 @@
+===========================================================================
+Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+===========================================================================
+
+Introduction
+------------
+
+Basically all FireWire controllers which are in use today are compliant
+to the OHCI-1394 specification which defines the controller to be a PCI
+bus master which uses DMA to offload data transfers from the CPU and has
+a "Physical Response Unit" which executes specific requests by employing
+PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
+
+Once properly configured, remote machines can send these requests to
+ask the OHCI-1394 controller to perform read and write requests on
+physical system memory and, for read requests, send the result of
+the physical memory read back to the requester.
+
+With that, it is possible to debug issues by reading interesting memory
+locations such as buffers like the printk buffer or the process table.
+
+Retrieving a full system memory dump is also possible over the FireWire,
+using data transfer rates in the order of 10MB/s or more.
+
+With most FireWire controllers, memory access is limited to the low 4 GB
+of physical address space. This can be a problem on machines where memory is
+located mostly above that limit, but it is rarely a problem on more common
+hardware such as x86, x86-64 and PowerPC.
+
+At least LSI FW643e and FW643e2 controllers are known to support access to
+physical addresses above 4 GB, but this feature is currently not enabled by
+Linux.
+
+Together with a early initialization of the OHCI-1394 controller for debugging,
+this facility proved most useful for examining long debugs logs in the printk
+buffer on to debug early boot problems in areas like ACPI where the system
+fails to boot and other means for debugging (serial port) are either not
+available (notebooks) or too slow for extensive debug information (like ACPI).
+
+Drivers
+-------
+
+The firewire-ohci driver in drivers/firewire uses filtered physical
+DMA by default, which is more secure but not suitable for remote debugging.
+Pass the remote_dma=1 parameter to the driver to get unfiltered physical DMA.
+
+Because the firewire-ohci driver depends on the PCI enumeration to be
+completed, an initialization routine which runs pretty early has been
+implemented for x86. This routine runs long before console_init() can be
+called, i.e. before the printk buffer appears on the console.
+
+To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
+Remote debugging over FireWire early on boot) and pass the parameter
+"ohci1394_dma=early" to the recompiled kernel on boot.
+
+Tools
+-----
+
+firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
+it from PowerPC to x86 and x86_64 and added functionality, firescope can now
+be used to view the printk buffer of a remote machine, even with live update.
+
+Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
+from 32-bit firescope and vice versa:
+- http://v3.sk/~lkundrak/firescope/
+
+and he implemented fast system dump (alpha version - read README.txt):
+- http://halobates.de/firewire/firedump-0.1.tar.bz2
+
+There is also a gdb proxy for firewire which allows to use gdb to access
+data which can be referenced from symbols found by gdb in vmlinux:
+- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
+
+The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
+yet stable) with kgdb over an memory-based communication module (kgdbom).
+
+Getting Started
+---------------
+
+The OHCI-1394 specification regulates that the OHCI-1394 controller must
+disable all physical DMA on each bus reset.
+
+This means that if you want to debug an issue in a system state where
+interrupts are disabled and where no polling of the OHCI-1394 controller
+for bus resets takes place, you have to establish any FireWire cable
+connections and fully initialize all FireWire hardware __before__ the
+system enters such state.
+
+Step-by-step instructions for using firescope with early OHCI initialization:
+
+1) Verify that your hardware is supported:
+
+ Load the firewire-ohci module and check your kernel logs.
+ You should see a line similar to::
+
+ firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
+ ... contexts, quirks 0x11
+
+ when loading the driver. If you have no supported controller, many PCI,
+ CardBus and even some Express cards which are fully compliant to OHCI-1394
+ specification are available. If it requires no driver for Windows operating
+ systems, it most likely is. Only specialized shops have cards which are not
+ compliant, they are based on TI PCILynx chips and require drivers for Windows
+ operating systems.
+
+ The mentioned kernel log message contains the string "physUB" if the
+ controller implements a writable Physical Upper Bound register. This is
+ required for physical DMA above 4 GB (but not utilized by Linux yet).
+
+2) Establish a working FireWire cable connection:
+
+ Any FireWire cable, as long at it provides electrically and mechanically
+ stable connection and has matching connectors (there are small 4-pin and
+ large 6-pin FireWire ports) will do.
+
+ If an driver is running on both machines you should see a line like::
+
+ firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
+
+ on both machines in the kernel log when the cable is plugged in
+ and connects the two machines.
+
+3) Test physical DMA using firescope:
+
+ On the debug host, make sure that /dev/fw* is accessible,
+ then start firescope::
+
+ $ firescope
+ Port 0 (/dev/fw1) opened, 2 nodes detected
+
+ FireScope
+ ---------
+ Target : <unspecified>
+ Gen : 1
+ [Ctrl-T] choose target
+ [Ctrl-H] this menu
+ [Ctrl-Q] quit
+
+ ------> Press Ctrl-T now, the output should be similar to:
+
+ 2 nodes available, local node is: 0
+ 0: ffc0, uuid: 00000000 00000000 [LOCAL]
+ 1: ffc1, uuid: 00279000 ba4bb801
+
+ Besides the [LOCAL] node, it must show another node without error message.
+
+4) Prepare for debugging with early OHCI-1394 initialization:
+
+ 4.1) Kernel compilation and installation on debug target
+
+ Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
+ enabled and install it on the machine to be debugged (debug target).
+
+ 4.2) Transfer the System.map of the debugged kernel to the debug host
+
+ Copy the System.map of the kernel be debugged to the debug host (the host
+ which is connected to the debugged machine over the FireWire cable).
+
+5) Retrieving the printk buffer contents:
+
+ With the FireWire cable connected, the OHCI-1394 driver on the debugging
+ host loaded, reboot the debugged machine, booting the kernel which has
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
+
+ Then, on the debugging host, run firescope, for example by using -A::
+
+ firescope -A System.map-of-debug-target-kernel
+
+ Note: -A automatically attaches to the first non-local node. It only works
+ reliably if only connected two machines are connected using FireWire.
+
+ After having attached to the debug target, press Ctrl-D to view the
+ complete printk buffer or Ctrl-U to enter auto update mode and get an
+ updated live view of recent kernel messages logged on the debug target.
+
+ Call "firescope -h" to get more information on firescope's options.
+
+Notes
+-----
+
+Documentation and specifications: http://halobates.de/firewire/
+
+FireWire is a trademark of Apple Inc. - for more information please refer to:
+https://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/core-api/dma-api-howto.rst b/Documentation/core-api/dma-api-howto.rst
new file mode 100644
index 000000000000..96fce2a9aa90
--- /dev/null
+++ b/Documentation/core-api/dma-api-howto.rst
@@ -0,0 +1,935 @@
+=========================
+Dynamic DMA mapping Guide
+=========================
+
+:Author: David S. Miller <davem@redhat.com>
+:Author: Richard Henderson <rth@cygnus.com>
+:Author: Jakub Jelinek <jakub@redhat.com>
+
+This is a guide to device driver writers on how to use the DMA API
+with example pseudo-code. For a concise description of the API, see
+Documentation/core-api/dma-api.rst.
+
+CPU and DMA addresses
+=====================
+
+There are several kinds of addresses involved in the DMA API, and it's
+important to understand the differences.
+
+The kernel normally uses virtual addresses. Any address returned by
+kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
+be stored in a ``void *``.
+
+The virtual memory system (TLB, page tables, etc.) translates virtual
+addresses to CPU physical addresses, which are stored as "phys_addr_t" or
+"resource_size_t". The kernel manages device resources like registers as
+physical addresses. These are the addresses in /proc/iomem. The physical
+address is not directly useful to a driver; it must use ioremap() to map
+the space and produce a virtual address.
+
+I/O devices use a third kind of address: a "bus address". If a device has
+registers at an MMIO address, or if it performs DMA to read or write system
+memory, the addresses used by the device are bus addresses. In some
+systems, bus addresses are identical to CPU physical addresses, but in
+general they are not. IOMMUs and host bridges can produce arbitrary
+mappings between physical and bus addresses.
+
+From a device's point of view, DMA uses the bus address space, but it may
+be restricted to a subset of that space. For example, even if a system
+supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
+so devices only need to use 32-bit DMA addresses.
+
+Here's a picture and some examples::
+
+ CPU CPU Bus
+ Virtual Physical Address
+ Address Address Space
+ Space Space
+
+ +-------+ +------+ +------+
+ | | |MMIO | Offset | |
+ | | Virtual |Space | applied | |
+ C +-------+ --------> B +------+ ----------> +------+ A
+ | | mapping | | by host | |
+ +-----+ | | | | bridge | | +--------+
+ | | | | +------+ | | | |
+ | CPU | | | | RAM | | | | Device |
+ | | | | | | | | | |
+ +-----+ +-------+ +------+ +------+ +--------+
+ | | Virtual |Buffer| Mapping | |
+ X +-------+ --------> Y +------+ <---------- +------+ Z
+ | | mapping | RAM | by IOMMU
+ | | | |
+ | | | |
+ +-------+ +------+
+
+During the enumeration process, the kernel learns about I/O devices and
+their MMIO space and the host bridges that connect them to the system. For
+example, if a PCI device has a BAR, the kernel reads the bus address (A)
+from the BAR and converts it to a CPU physical address (B). The address B
+is stored in a struct resource and usually exposed via /proc/iomem. When a
+driver claims a device, it typically uses ioremap() to map physical address
+B at a virtual address (C). It can then use, e.g., ioread32(C), to access
+the device registers at bus address A.
+
+If the device supports DMA, the driver sets up a buffer using kmalloc() or
+a similar interface, which returns a virtual address (X). The virtual
+memory system maps X to a physical address (Y) in system RAM. The driver
+can use virtual address X to access the buffer, but the device itself
+cannot because DMA doesn't go through the CPU virtual memory system.
+
+In some simple systems, the device can do DMA directly to physical address
+Y. But in many others, there is IOMMU hardware that translates DMA
+addresses to physical addresses, e.g., it translates Z to Y. This is part
+of the reason for the DMA API: the driver can give a virtual address X to
+an interface like dma_map_single(), which sets up any required IOMMU
+mapping and returns the DMA address Z. The driver then tells the device to
+do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
+RAM.
+
+So that Linux can use the dynamic DMA mapping, it needs some help from the
+drivers, namely it has to take into account that DMA addresses should be
+mapped only for the time they are actually used and unmapped after the DMA
+transfer.
+
+The following API will work of course even on platforms where no such
+hardware exists.
+
+Note that the DMA API works with any bus independent of the underlying
+microprocessor architecture. You should use the DMA API rather than the
+bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
+pci_map_*() interfaces.
+
+First of all, you should make sure::
+
+ #include <linux/dma-mapping.h>
+
+is in your driver, which provides the definition of dma_addr_t. This type
+can hold any valid DMA address for the platform and should be used
+everywhere you hold a DMA address returned from the DMA mapping functions.
+
+What memory is DMA'able?
+========================
+
+The first piece of information you must know is what kernel memory can
+be used with the DMA mapping facilities. There has been an unwritten
+set of rules regarding this, and this text is an attempt to finally
+write them down.
+
+If you acquired your memory via the page allocator
+(i.e. __get_free_page*()) or the generic memory allocators
+(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from
+that memory using the addresses returned from those routines.
+
+This means specifically that you may _not_ use the memory/addresses
+returned from vmalloc() for DMA. It is possible to DMA to the
+_underlying_ memory mapped into a vmalloc() area, but this requires
+walking page tables to get the physical addresses, and then
+translating each of those pages back to a kernel address using
+something like __va(). [ EDIT: Update this when we integrate
+Gerd Knorr's generic code which does this. ]
+
+This rule also means that you may use neither kernel image addresses
+(items in data/text/bss segments), nor module image addresses, nor
+stack addresses for DMA. These could all be mapped somewhere entirely
+different than the rest of physical memory. Even if those classes of
+memory could physically work with DMA, you'd need to ensure the I/O
+buffers were cacheline-aligned. Without that, you'd see cacheline
+sharing problems (data corruption) on CPUs with DMA-incoherent caches.
+(The CPU could write to one word, DMA would write to a different one
+in the same cache line, and one of them could be overwritten.)
+
+Also, this means that you cannot take the return of a kmap()
+call and DMA to/from that. This is similar to vmalloc().
+
+What about block I/O and networking buffers? The block I/O and
+networking subsystems make sure that the buffers they use are valid
+for you to DMA from/to.
+
+DMA addressing capabilities
+===========================
+
+By default, the kernel assumes that your device can address 32-bits of DMA
+addressing. For a 64-bit capable device, this needs to be increased, and for
+a device with limitations, it needs to be decreased.
+
+Special note about PCI: PCI-X specification requires PCI-X devices to support
+64-bit addressing (DAC) for all transactions. And at least one platform (SGI
+SN2) requires 64-bit coherent allocations to operate correctly when the IO
+bus is in PCI-X mode.
+
+For correct operation, you must set the DMA mask to inform the kernel about
+your devices DMA addressing capabilities.
+
+This is performed via a call to dma_set_mask_and_coherent()::
+
+ int dma_set_mask_and_coherent(struct device *dev, u64 mask);
+
+which will set the mask for both streaming and coherent APIs together. If you
+have some special requirements, then the following two separate calls can be
+used instead:
+
+ The setup for streaming mappings is performed via a call to
+ dma_set_mask()::
+
+ int dma_set_mask(struct device *dev, u64 mask);
+
+ The setup for coherent allocations is performed via a call
+ to dma_set_coherent_mask()::
+
+ int dma_set_coherent_mask(struct device *dev, u64 mask);
+
+Here, dev is a pointer to the device struct of your device, and mask is a bit
+mask describing which bits of an address your device supports. Often the
+device struct of your device is embedded in the bus-specific device struct of
+your device. For example, &pdev->dev is a pointer to the device struct of a
+PCI device (pdev is a pointer to the PCI device struct of your device).
+
+These calls usually return zero to indicate your device can perform DMA
+properly on the machine given the address mask you provided, but they might
+return an error if the mask is too small to be supportable on the given
+system. If it returns non-zero, your device cannot perform DMA properly on
+this platform, and attempting to do so will result in undefined behavior.
+You must not use DMA on this device unless the dma_set_mask family of
+functions has returned success.
+
+This means that in the failure case, you have two options:
+
+1) Use some non-DMA mode for data transfer, if possible.
+2) Ignore this device and do not initialize it.
+
+It is recommended that your driver print a kernel KERN_WARNING message when
+setting the DMA mask fails. In this manner, if a user of your driver reports
+that performance is bad or that the device is not even detected, you can ask
+them for the kernel messages to find out exactly why.
+
+The 24-bit addressing device would do something like this::
+
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(24))) {
+ dev_warn(dev, "mydev: No suitable DMA available\n");
+ goto ignore_this_device;
+ }
+
+The standard 64-bit addressing device would do something like this::
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))
+
+dma_set_mask_and_coherent() never return fail when DMA_BIT_MASK(64). Typical
+error code like::
+
+ /* Wrong code */
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))
+
+dma_set_mask_and_coherent() will never return failure when bigger than 32.
+So typical code like::
+
+ /* Recommended code */
+ if (support_64bit)
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ else
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+
+If the device only supports 32-bit addressing for descriptors in the
+coherent allocations, but supports full 64-bits for streaming mappings
+it would look like this::
+
+ if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
+ dev_warn(dev, "mydev: No suitable DMA available\n");
+ goto ignore_this_device;
+ }
+
+The coherent mask will always be able to set the same or a smaller mask as
+the streaming mask. However for the rare case that a device driver only
+uses coherent allocations, one would have to check the return value from
+dma_set_coherent_mask().
+
+Finally, if your device can only drive the low 24-bits of
+address you might do something like::
+
+ if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
+ dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
+ goto ignore_this_device;
+ }
+
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided. The
+kernel will use this information later when you make DMA mappings.
+
+There is a case which we are aware of at this time, which is worth
+mentioning in this documentation. If your device supports multiple
+functions (for example a sound card provides playback and record
+functions) and the various different functions have _different_
+DMA addressing limitations, you may wish to probe each mask and
+only provide the functionality which the machine can handle. It
+is important that the last call to dma_set_mask() be for the
+most specific mask.
+
+Here is pseudo-code showing how this might be done::
+
+ #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32)
+ #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24)
+
+ struct my_sound_card *card;
+ struct device *dev;
+
+ ...
+ if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
+ card->playback_enabled = 1;
+ } else {
+ card->playback_enabled = 0;
+ dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
+ card->name);
+ }
+ if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
+ card->record_enabled = 1;
+ } else {
+ card->record_enabled = 0;
+ dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
+ card->name);
+ }
+
+A sound card was used as an example here because this genre of PCI
+devices seems to be littered with ISA chips given a PCI front end,
+and thus retaining the 16MB DMA addressing limitations of ISA.
+
+Types of DMA mappings
+=====================
+
+There are two types of DMA mappings:
+
+- Coherent DMA mappings which are usually mapped at driver
+ initialization, unmapped at the end and for which the hardware should
+ guarantee that the device and the CPU can access the data
+ in parallel and will see updates made by each other without any
+ explicit software flushing.
+
+ Think of "coherent" as "synchronous".
+
+ The current default is to return coherent memory in the low 32
+ bits of the DMA space. However, for future compatibility you should
+ set the coherent mask even if this default is fine for your
+ driver.
+
+ Good examples of what to use coherent mappings for are:
+
+ - Network card DMA ring descriptors.
+ - SCSI adapter mailbox command data structures.
+ - Device firmware microcode executed out of
+ main memory.
+
+ The invariant these examples all require is that any CPU store
+ to memory is immediately visible to the device, and vice
+ versa. Coherent mappings guarantee this.
+
+ .. important::
+
+ Coherent DMA memory does not preclude the usage of
+ proper memory barriers. The CPU may reorder stores to
+ coherent memory just as it may normal memory. Example:
+ if it is important for the device to see the first word
+ of a descriptor updated before the second, you must do
+ something like::
+
+ desc->word0 = address;
+ wmb();
+ desc->word1 = DESC_VALID;
+
+ in order to get correct behavior on all platforms.
+
+ Also, on some platforms your driver may need to flush CPU write
+ buffers in much the same way as it needs to flush write buffers
+ found in PCI bridges (such as by reading a register's value
+ after writing it).
+
+- Streaming DMA mappings which are usually mapped for one DMA
+ transfer, unmapped right after it (unless you use dma_sync_* below)
+ and for which hardware can optimize for sequential accesses.
+
+ Think of "streaming" as "asynchronous" or "outside the coherency
+ domain".
+
+ Good examples of what to use streaming mappings for are:
+
+ - Networking buffers transmitted/received by a device.
+ - Filesystem buffers written/read by a SCSI device.
+
+ The interfaces for using this type of mapping were designed in
+ such a way that an implementation can make whatever performance
+ optimizations the hardware allows. To this end, when using
+ such mappings you must be explicit about what you want to happen.
+
+Neither type of DMA mapping has alignment restrictions that come from
+the underlying bus, although some devices may have such restrictions.
+Also, systems with caches that aren't DMA-coherent will work better
+when the underlying buffers don't share cache lines with other data.
+
+
+Using Coherent DMA mappings
+===========================
+
+To allocate and map large (PAGE_SIZE or so) coherent DMA regions,
+you should do::
+
+ dma_addr_t dma_handle;
+
+ cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
+
+where device is a ``struct device *``. This may be called in interrupt
+context with the GFP_ATOMIC flag.
+
+Size is the length of the region you want to allocate, in bytes.
+
+This routine will allocate RAM for that region, so it acts similarly to
+__get_free_pages() (but takes size instead of a page order). If your
+driver needs regions sized smaller than a page, you may prefer using
+the dma_pool interface, described below.
+
+The coherent DMA mapping interfaces, will by default return a DMA address
+which is 32-bit addressable. Even if the device indicates (via the DMA mask)
+that it may address the upper 32-bits, coherent allocation will only
+return > 32-bit addresses for DMA if the coherent DMA mask has been
+explicitly changed via dma_set_coherent_mask(). This is true of the
+dma_pool interface as well.
+
+dma_alloc_coherent() returns two values: the virtual address which you
+can use to access it from the CPU and dma_handle which you pass to the
+card.
+
+The CPU virtual address and the DMA address are both
+guaranteed to be aligned to the smallest PAGE_SIZE order which
+is greater than or equal to the requested size. This invariant
+exists (for example) to guarantee that if you allocate a chunk
+which is smaller than or equal to 64 kilobytes, the extent of the
+buffer you receive will not cross a 64K boundary.
+
+To unmap and free such a DMA region, you call::
+
+ dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+where dev, size are the same as in the above call and cpu_addr and
+dma_handle are the values dma_alloc_coherent() returned to you.
+This function may not be called in interrupt context.
+
+If your driver needs lots of smaller memory regions, you can write
+custom code to subdivide pages returned by dma_alloc_coherent(),
+or you can use the dma_pool API to do that. A dma_pool is like
+a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
+Also, it understands common hardware constraints for alignment,
+like queue heads needing to be aligned on N byte boundaries.
+
+Create a dma_pool like this::
+
+ struct dma_pool *pool;
+
+ pool = dma_pool_create(name, dev, size, align, boundary);
+
+The "name" is for diagnostics (like a kmem_cache name); dev and size
+are as above. The device's hardware alignment requirement for this
+type of data is "align" (which is expressed in bytes, and must be a
+power of two). If your device has no boundary crossing restrictions,
+pass 0 for boundary; passing 4096 says memory allocated from this pool
+must not cross 4KByte boundaries (but at that time it may be better to
+use dma_alloc_coherent() directly instead).
+
+Allocate memory from a DMA pool like this::
+
+ cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
+
+flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
+holding SMP locks), GFP_ATOMIC otherwise. Like dma_alloc_coherent(),
+this returns two values, cpu_addr and dma_handle.
+
+Free memory that was allocated from a dma_pool like this::
+
+ dma_pool_free(pool, cpu_addr, dma_handle);
+
+where pool is what you passed to dma_pool_alloc(), and cpu_addr and
+dma_handle are the values dma_pool_alloc() returned. This function
+may be called in interrupt context.
+
+Destroy a dma_pool by calling::
+
+ dma_pool_destroy(pool);
+
+Make sure you've called dma_pool_free() for all memory allocated
+from a pool before you destroy the pool. This function may not
+be called in interrupt context.
+
+DMA Direction
+=============
+
+The interfaces described in subsequent portions of this document
+take a DMA direction argument, which is an integer and takes on
+one of the following values::
+
+ DMA_BIDIRECTIONAL
+ DMA_TO_DEVICE
+ DMA_FROM_DEVICE
+ DMA_NONE
+
+You should provide the exact DMA direction if you know it.
+
+DMA_TO_DEVICE means "from main memory to the device"
+DMA_FROM_DEVICE means "from the device to main memory"
+It is the direction in which the data moves during the DMA
+transfer.
+
+You are _strongly_ encouraged to specify this as precisely
+as you possibly can.
+
+If you absolutely cannot know the direction of the DMA transfer,
+specify DMA_BIDIRECTIONAL. It means that the DMA can go in
+either direction. The platform guarantees that you may legally
+specify this, and that it will work, but this may be at the
+cost of performance for example.
+
+The value DMA_NONE is to be used for debugging. One can
+hold this in a data structure before you come to know the
+precise direction, and this will help catch cases where your
+direction tracking logic has failed to set things up properly.
+
+Another advantage of specifying this value precisely (outside of
+potential platform-specific optimizations of such) is for debugging.
+Some platforms actually have a write permission boolean which DMA
+mappings can be marked with, much like page protections in the user
+program address space. Such platforms can and do report errors in the
+kernel logs when the DMA controller hardware detects violation of the
+permission setting.
+
+Only streaming mappings specify a direction, coherent mappings
+implicitly have a direction attribute setting of
+DMA_BIDIRECTIONAL.
+
+The SCSI subsystem tells you the direction to use in the
+'sc_data_direction' member of the SCSI command your driver is
+working on.
+
+For Networking drivers, it's a rather simple affair. For transmit
+packets, map/unmap them with the DMA_TO_DEVICE direction
+specifier. For receive packets, just the opposite, map/unmap them
+with the DMA_FROM_DEVICE direction specifier.
+
+Using Streaming DMA mappings
+============================
+
+The streaming DMA mapping routines can be called from interrupt
+context. There are two versions of each map/unmap, one which will
+map/unmap a single memory region, and one which will map/unmap a
+scatterlist.
+
+To map a single region, you do::
+
+ struct device *dev = &my_dev->dev;
+ dma_addr_t dma_handle;
+ void *addr = buffer->ptr;
+ size_t size = buffer->len;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+and to unmap it::
+
+ dma_unmap_single(dev, dma_handle, size, direction);
+
+You should call dma_mapping_error() as dma_map_single() could fail and return
+error. Doing so will ensure that the mapping code will work correctly on all
+DMA implementations without any dependency on the specifics of the underlying
+implementation. Using the returned address without checking for errors could
+result in failures ranging from panics to silent data corruption. The same
+applies to dma_map_page() as well.
+
+You should call dma_unmap_single() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+Using CPU pointers like this for single mappings has a disadvantage:
+you cannot reference HIGHMEM memory in this way. Thus, there is a
+map/unmap interface pair akin to dma_{map,unmap}_single(). These
+interfaces deal with page/offset pairs instead of CPU pointers.
+Specifically::
+
+ struct device *dev = &my_dev->dev;
+ dma_addr_t dma_handle;
+ struct page *page = buffer->page;
+ unsigned long offset = buffer->offset;
+ size_t size = buffer->len;
+
+ dma_handle = dma_map_page(dev, page, offset, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+ ...
+
+ dma_unmap_page(dev, dma_handle, size, direction);
+
+Here, "offset" means byte offset within the given page.
+
+You should call dma_mapping_error() as dma_map_page() could fail and return
+error as outlined under the dma_map_single() discussion.
+
+You should call dma_unmap_page() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+With scatterlists, you map a region gathered from several regions by::
+
+ int i, count = dma_map_sg(dev, sglist, nents, direction);
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, count, i) {
+ hw_address[i] = sg_dma_address(sg);
+ hw_len[i] = sg_dma_len(sg);
+ }
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any
+consecutive sglist entries can be merged into one provided the first one
+ends and the second one starts on a page boundary - in fact this is a huge
+advantage for cards which either cannot do scatter-gather or have very
+limited number of scatter-gather entries) and returns the actual number
+of sg entries it mapped them to. On failure 0 is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+To unmap a scatterlist, just call::
+
+ dma_unmap_sg(dev, sglist, nents, direction);
+
+Again, make sure DMA activity has already finished.
+
+.. note::
+
+ The 'nents' argument to the dma_unmap_sg call must be
+ the _same_ one you passed into the dma_map_sg call,
+ it should _NOT_ be the 'count' value _returned_ from the
+ dma_map_sg call.
+
+Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
+counterpart, because the DMA address space is a shared resource and
+you could render the machine unusable by consuming all DMA addresses.
+
+If you need to use the same streaming DMA region multiple times and touch
+the data in between the DMA transfers, the buffer needs to be synced
+properly in order for the CPU and device to see the most up-to-date and
+correct copy of the DMA buffer.
+
+So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
+transfer call either::
+
+ dma_sync_single_for_cpu(dev, dma_handle, size, direction);
+
+or::
+
+ dma_sync_sg_for_cpu(dev, sglist, nents, direction);
+
+as appropriate.
+
+Then, if you wish to let the device get at the DMA area again,
+finish accessing the data with the CPU, and then before actually
+giving the buffer to the hardware call either::
+
+ dma_sync_single_for_device(dev, dma_handle, size, direction);
+
+or::
+
+ dma_sync_sg_for_device(dev, sglist, nents, direction);
+
+as appropriate.
+
+.. note::
+
+ The 'nents' argument to dma_sync_sg_for_cpu() and
+ dma_sync_sg_for_device() must be the same passed to
+ dma_map_sg(). It is _NOT_ the count returned by
+ dma_map_sg().
+
+After the last DMA transfer call one of the DMA unmap routines
+dma_unmap_{single,sg}(). If you don't touch the data from the first
+dma_map_*() call till dma_unmap_*(), then you don't have to call the
+dma_sync_*() routines at all.
+
+Here is pseudo code which shows a situation in which you would need
+to use the dma_sync_*() interfaces::
+
+ my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
+ {
+ dma_addr_t mapping;
+
+ mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(cp->dev, mapping)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+ cp->rx_buf = buffer;
+ cp->rx_len = len;
+ cp->rx_dma = mapping;
+
+ give_rx_buf_to_card(cp);
+ }
+
+ ...
+
+ my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs)
+ {
+ struct my_card *cp = devid;
+
+ ...
+ if (read_card_status(cp) == RX_BUF_TRANSFERRED) {
+ struct my_card_header *hp;
+
+ /* Examine the header to see if we wish
+ * to accept the data. But synchronize
+ * the DMA transfer with the CPU first
+ * so that we see updated contents.
+ */
+ dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
+ cp->rx_len,
+ DMA_FROM_DEVICE);
+
+ /* Now it is safe to examine the buffer. */
+ hp = (struct my_card_header *) cp->rx_buf;
+ if (header_is_ok(hp)) {
+ dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
+ DMA_FROM_DEVICE);
+ pass_to_upper_layers(cp->rx_buf);
+ make_and_setup_new_rx_buf(cp);
+ } else {
+ /* CPU should not write to
+ * DMA_FROM_DEVICE-mapped area,
+ * so dma_sync_single_for_device() is
+ * not needed here. It would be required
+ * for DMA_BIDIRECTIONAL mapping if
+ * the memory was modified.
+ */
+ give_rx_buf_to_card(cp);
+ }
+ }
+ }
+
+Handling Errors
+===============
+
+DMA address space is limited on some architectures and an allocation
+failure can be determined by:
+
+- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
+
+- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
+ by using dma_mapping_error()::
+
+ dma_addr_t dma_handle;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+- unmap pages that are already mapped, when mapping error occurs in the middle
+ of a multiple page mapping attempt. These example are applicable to
+ dma_map_page() as well.
+
+Example 1::
+
+ dma_addr_t dma_handle1;
+ dma_addr_t dma_handle2;
+
+ dma_handle1 = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle1)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling1;
+ }
+ dma_handle2 = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle2)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling2;
+ }
+
+ ...
+
+ map_error_handling2:
+ dma_unmap_single(dma_handle1);
+ map_error_handling1:
+
+Example 2::
+
+ /*
+ * if buffers are allocated in a loop, unmap all mapped buffers when
+ * mapping error is detected in the middle
+ */
+
+ dma_addr_t dma_addr;
+ dma_addr_t array[DMA_BUFFERS];
+ int save_index = 0;
+
+ for (i = 0; i < DMA_BUFFERS; i++) {
+
+ ...
+
+ dma_addr = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_addr)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+ array[i].dma_addr = dma_addr;
+ save_index++;
+ }
+
+ ...
+
+ map_error_handling:
+
+ for (i = 0; i < save_index; i++) {
+
+ ...
+
+ dma_unmap_single(array[i].dma_addr);
+ }
+
+Networking drivers must call dev_kfree_skb() to free the socket buffer
+and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
+(ndo_start_xmit). This means that the socket buffer is just dropped in
+the failure case.
+
+SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
+fails in the queuecommand hook. This means that the SCSI subsystem
+passes the command to the driver again later.
+
+Optimizing Unmap State Space Consumption
+========================================
+
+On many platforms, dma_unmap_{single,page}() is simply a nop.
+Therefore, keeping track of the mapping address and length is a waste
+of space. Instead of filling your drivers up with ifdefs and the like
+to "work around" this (which would defeat the whole purpose of a
+portable API) the following facilities are provided.
+
+Actually, instead of describing the macros one by one, we'll
+transform some example code.
+
+1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
+ Example, before::
+
+ struct ring_state {
+ struct sk_buff *skb;
+ dma_addr_t mapping;
+ __u32 len;
+ };
+
+ after::
+
+ struct ring_state {
+ struct sk_buff *skb;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ DEFINE_DMA_UNMAP_LEN(len);
+ };
+
+2) Use dma_unmap_{addr,len}_set() to set these values.
+ Example, before::
+
+ ringp->mapping = FOO;
+ ringp->len = BAR;
+
+ after::
+
+ dma_unmap_addr_set(ringp, mapping, FOO);
+ dma_unmap_len_set(ringp, len, BAR);
+
+3) Use dma_unmap_{addr,len}() to access these values.
+ Example, before::
+
+ dma_unmap_single(dev, ringp->mapping, ringp->len,
+ DMA_FROM_DEVICE);
+
+ after::
+
+ dma_unmap_single(dev,
+ dma_unmap_addr(ringp, mapping),
+ dma_unmap_len(ringp, len),
+ DMA_FROM_DEVICE);
+
+It really should be self-explanatory. We treat the ADDR and LEN
+separately, because it is possible for an implementation to only
+need the address in order to perform the unmap operation.
+
+Platform Issues
+===============
+
+If you are just writing drivers for Linux and do not maintain
+an architecture port for the kernel, you can safely skip down
+to "Closing".
+
+1) Struct scatterlist requirements.
+
+ You need to enable CONFIG_NEED_SG_DMA_LENGTH if the architecture
+ supports IOMMUs (including software IOMMU).
+
+2) ARCH_DMA_MINALIGN
+
+ Architectures must ensure that kmalloc'ed buffer is
+ DMA-safe. Drivers and subsystems depend on it. If an architecture
+ isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
+ the CPU cache is identical to data in main memory),
+ ARCH_DMA_MINALIGN must be set so that the memory allocator
+ makes sure that kmalloc'ed buffer doesn't share a cache line with
+ the others. See arch/arm/include/asm/cache.h as an example.
+
+ Note that ARCH_DMA_MINALIGN is about DMA memory alignment
+ constraints. You don't need to worry about the architecture data
+ alignment constraints (e.g. the alignment constraints about 64-bit
+ objects).
+
+Closing
+=======
+
+This document, and the API itself, would not be in its current
+form without the feedback and suggestions from numerous individuals.
+We would like to specifically mention, in no particular order, the
+following people::
+
+ Russell King <rmk@arm.linux.org.uk>
+ Leo Dagum <dagum@barrel.engr.sgi.com>
+ Ralf Baechle <ralf@oss.sgi.com>
+ Grant Grundler <grundler@cup.hp.com>
+ Jay Estabrook <Jay.Estabrook@compaq.com>
+ Thomas Sailer <sailer@ife.ee.ethz.ch>
+ Andrea Arcangeli <andrea@suse.de>
+ Jens Axboe <jens.axboe@oracle.com>
+ David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst
new file mode 100644
index 000000000000..ca75b3541679
--- /dev/null
+++ b/Documentation/core-api/dma-api.rst
@@ -0,0 +1,868 @@
+============================================
+Dynamic DMA mapping using the generic device
+============================================
+
+:Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
+
+This document describes the DMA API. For a more gentle introduction
+of the API (and actual examples), see Documentation/core-api/dma-api-howto.rst.
+
+This API is split into two pieces. Part I describes the basic API.
+Part II describes extensions for supporting non-coherent memory
+machines. Unless you know that your driver absolutely has to support
+non-coherent platforms (this is usually only legacy platforms) you
+should only use the API described in part I.
+
+Part I - DMA API
+----------------
+
+To get the DMA API, you must #include <linux/dma-mapping.h>. This
+provides dma_addr_t and the interfaces described below.
+
+A dma_addr_t can hold any valid DMA address for the platform. It can be
+given to a device to use as a DMA source or target. A CPU cannot reference
+a dma_addr_t directly because there may be translation between its physical
+address space and the DMA address space.
+
+Part Ia - Using large DMA-coherent buffers
+------------------------------------------
+
+::
+
+ void *
+ dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+
+Coherent memory is memory for which a write by either the device or
+the processor can immediately be read by the processor or device
+without having to worry about caching effects. (You may however need
+to make sure to flush the processor's write buffers before telling
+devices to read that memory.)
+
+This routine allocates a region of <size> bytes of coherent memory.
+
+It returns a pointer to the allocated region (in the processor's virtual
+address space) or NULL if the allocation failed.
+
+It also returns a <dma_handle> which may be cast to an unsigned integer the
+same width as the bus and given to the device as the DMA address base of
+the region.
+
+Note: coherent memory can be expensive on some platforms, and the
+minimum allocation length may be as big as a page, so you should
+consolidate your requests for coherent memory as much as possible.
+The simplest way to do that is to use the dma_pool calls (see below).
+
+The flag parameter allows the caller to specify the ``GFP_`` flags (see
+kmalloc()) for the allocation (the implementation may ignore flags that affect
+the location of the returned memory, like GFP_DMA).
+
+::
+
+ void
+ dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+
+Free a previously allocated region of coherent memory. dev, size and dma_handle
+must all be the same as those passed into dma_alloc_coherent(). cpu_addr must
+be the virtual address returned by dma_alloc_coherent().
+
+Note that unlike the sibling allocation call, this routine may only be called
+with IRQs enabled.
+
+
+Part Ib - Using small DMA-coherent buffers
+------------------------------------------
+
+To get this part of the DMA API, you must #include <linux/dmapool.h>
+
+Many drivers need lots of small DMA-coherent memory regions for DMA
+descriptors or I/O buffers. Rather than allocating in units of a page
+or more using dma_alloc_coherent(), you can use DMA pools. These work
+much like a struct kmem_cache, except that they use the DMA-coherent allocator,
+not __get_free_pages(). Also, they understand common hardware constraints
+for alignment, like queue heads needing to be aligned on N-byte boundaries.
+
+.. kernel-doc:: mm/dmapool.c
+ :export:
+
+.. kernel-doc:: include/linux/dmapool.h
+
+
+Part Ic - DMA addressing limitations
+------------------------------------
+
+DMA mask is a bit mask of the addressable region for the device. In other words,
+if applying the DMA mask (a bitwise AND operation) to the DMA address of a
+memory region does not clear any bits in the address, then the device can
+perform DMA to that memory region.
+
+All the below functions which set a DMA mask may fail if the requested mask
+cannot be used with the device, or if the device is not capable of doing DMA.
+
+::
+
+ int
+ dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Updates both streaming and coherent DMA masks.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+ int
+ dma_set_mask(struct device *dev, u64 mask)
+
+Updates only the streaming DMA mask.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+ int
+ dma_set_coherent_mask(struct device *dev, u64 mask)
+
+Updates only the coherent DMA mask.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+ u64
+ dma_get_required_mask(struct device *dev)
+
+This API returns the mask that the platform requires to
+operate efficiently. Usually this means the returned mask
+is the minimum required to cover all of memory. Examining the
+required mask gives drivers with variable descriptor sizes the
+opportunity to use smaller descriptors as necessary.
+
+Requesting the required mask does not alter the current mask. If you
+wish to take advantage of it, you should issue a dma_set_mask()
+call to set the mask to the value returned.
+
+::
+
+ size_t
+ dma_max_mapping_size(struct device *dev);
+
+Returns the maximum size of a mapping for the device. The size parameter
+of the mapping functions like dma_map_single(), dma_map_page() and
+others should not be larger than the returned value.
+
+::
+
+ size_t
+ dma_opt_mapping_size(struct device *dev);
+
+Returns the maximum optimal size of a mapping for the device.
+
+Mapping larger buffers may take much longer in certain scenarios. In
+addition, for high-rate short-lived streaming mappings, the upfront time
+spent on the mapping may account for an appreciable part of the total
+request lifetime. As such, if splitting larger requests incurs no
+significant performance penalty, then device drivers are advised to
+limit total DMA streaming mappings length to the returned value.
+
+::
+
+ bool
+ dma_need_sync(struct device *dev, dma_addr_t dma_addr);
+
+Returns %true if dma_sync_single_for_{device,cpu} calls are required to
+transfer memory ownership. Returns %false if those calls can be skipped.
+
+::
+
+ unsigned long
+ dma_get_merge_boundary(struct device *dev);
+
+Returns the DMA merge boundary. If the device cannot merge any DMA address
+segments, the function returns 0.
+
+Part Id - Streaming DMA mappings
+--------------------------------
+
+Streaming DMA allows to map an existing buffer for DMA transfers and then
+unmap it when finished. Map functions are not guaranteed to succeed, so the
+return value must be checked.
+
+.. note::
+
+ In particular, mapping may fail for memory not addressable by the
+ device, e.g. if it is not within the DMA mask of the device and/or a
+ connecting bus bridge. Streaming DMA functions try to overcome such
+ addressing constraints, either by using an IOMMU (a device which maps
+ I/O DMA addresses to physical memory addresses), or by copying the
+ data to/from a bounce buffer if the kernel is configured with a
+ :doc:`SWIOTLB <swiotlb>`. However, these methods are not always
+ available, and even if they are, they may still fail for a number of
+ reasons.
+
+ In short, a device driver may need to be wary of where buffers are
+ located in physical memory, especially if the DMA mask is less than 32
+ bits.
+
+::
+
+ dma_addr_t
+ dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+
+Maps a piece of processor virtual memory so it can be accessed by the
+device and returns the DMA address of the memory.
+
+The DMA API uses a strongly typed enumerator for its direction:
+
+======================= =============================================
+DMA_NONE no direction (used for debugging)
+DMA_TO_DEVICE data is going from the memory to the device
+DMA_FROM_DEVICE data is coming from the device to the memory
+DMA_BIDIRECTIONAL direction isn't known
+======================= =============================================
+
+.. note::
+
+ Contiguous kernel virtual space may not be contiguous as
+ physical memory. Since this API does not provide any scatter/gather
+ capability, it will fail if the user tries to map a non-physically
+ contiguous piece of memory. For this reason, memory to be mapped by
+ this API should be obtained from sources which guarantee it to be
+ physically contiguous (like kmalloc).
+
+.. warning::
+
+ Memory coherency operates at a granularity called the cache
+ line width. In order for memory mapped by this API to operate
+ correctly, the mapped region must begin exactly on a cache line
+ boundary and end exactly on one (to prevent two separately mapped
+ regions from sharing a single cache line). Since the cache line size
+ may not be known at compile time, the API will not enforce this
+ requirement. Therefore, it is recommended that driver writers who
+ don't take special care to determine the cache line size at run time
+ only map virtual regions that begin and end on page boundaries (which
+ are guaranteed also to be cache line boundaries).
+
+ DMA_TO_DEVICE synchronisation must be done after the last modification
+ of the memory region by the software and before it is handed off to
+ the device. Once this primitive is used, memory covered by this
+ primitive should be treated as read-only by the device. If the device
+ may write to it at any point, it should be DMA_BIDIRECTIONAL (see
+ below).
+
+ DMA_FROM_DEVICE synchronisation must be done before the driver
+ accesses data that may be changed by the device. This memory should
+ be treated as read-only by the driver. If the driver needs to write
+ to it at any point, it should be DMA_BIDIRECTIONAL (see below).
+
+ DMA_BIDIRECTIONAL requires special handling: it means that the driver
+ isn't sure if the memory was modified before being handed off to the
+ device and also isn't sure if the device will also modify it. Thus,
+ you must always sync bidirectional memory twice: once before the
+ memory is handed off to the device (to make sure all memory changes
+ are flushed from the processor) and once before the data may be
+ accessed after being used by the device (to make sure any processor
+ cache lines are updated with data that the device may have changed).
+
+::
+
+ void
+ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+
+Unmaps the region previously mapped. All the parameters passed in
+must be identical to those passed to (and returned by) dma_map_single().
+
+::
+
+ dma_addr_t
+ dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+
+ void
+ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+
+API for mapping and unmapping for pages. All the notes and warnings
+for the other mapping APIs apply here. Also, although the <offset>
+and <size> parameters are provided to do partial page mapping, it is
+recommended that you never use these unless you really know what the
+cache width is.
+
+::
+
+ dma_addr_t
+ dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+
+ void
+ dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+
+API for mapping and unmapping for MMIO resources. All the notes and
+warnings for the other mapping APIs apply here. The API should only be
+used to map device MMIO resources, mapping of RAM is not permitted.
+
+::
+
+ int
+ dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+
+In some circumstances dma_map_single(), dma_map_page() and dma_map_resource()
+will fail to create a mapping. A driver can check for these errors by testing
+the returned DMA address with dma_mapping_error(). A non-zero return value
+means the mapping could not be created and the driver should take appropriate
+action (e.g. reduce current DMA mapping usage or delay and try again later).
+
+::
+
+ int
+ dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+
+Maps a scatter/gather list for DMA. Returns the number of DMA address segments
+mapped, which may be smaller than <nents> passed in if several consecutive
+sglist entries are merged (e.g. with an IOMMU, or if some adjacent segments
+just happen to be physically contiguous).
+
+Please note that the sg cannot be mapped again if it has been mapped once.
+The mapping process is allowed to destroy information in the sg.
+
+As with the other mapping interfaces, dma_map_sg() can fail. When it
+does, 0 is returned and a driver must take appropriate action. It is
+critical that the driver do something, in the case of a block driver
+aborting the request or even oopsing is better than doing nothing and
+corrupting the filesystem.
+
+With scatterlists, you use the resulting mapping like this::
+
+ int i, count = dma_map_sg(dev, sglist, nents, direction);
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, count, i) {
+ hw_address[i] = sg_dma_address(sg);
+ hw_len[i] = sg_dma_len(sg);
+ }
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one. The returned number is the actual number of sg entries it
+mapped them to. On failure, 0 is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+::
+
+ void
+ dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+
+Unmap the previously mapped scatter/gather list. All the parameters
+must be the same as those and passed in to the scatter/gather mapping
+API.
+
+Note: <nents> must be the number you passed in, *not* the number of
+DMA address entries returned.
+
+::
+
+ void
+ dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
+
+ void
+ dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
+
+ void
+ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nents,
+ enum dma_data_direction direction)
+
+ void
+ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nents,
+ enum dma_data_direction direction)
+
+Synchronise a single contiguous or scatter/gather mapping for the CPU
+and device. With the sync_sg API, all the parameters must be the same
+as those passed into the sg mapping API. With the sync_single API,
+you can use dma_handle and size parameters that aren't identical to
+those passed into the single mapping API to do a partial sync.
+
+
+.. note::
+
+ You must do this:
+
+ - Before reading values that have been written by DMA from the device
+ (use the DMA_FROM_DEVICE direction)
+ - After writing values that will be written to the device using DMA
+ (use the DMA_TO_DEVICE) direction
+ - before *and* after handing memory to the device if the memory is
+ DMA_BIDIRECTIONAL
+
+See also dma_map_single().
+
+::
+
+ dma_addr_t
+ dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+
+ void
+ dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+
+ int
+ dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+
+ void
+ dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+
+The four functions above are just like the counterpart functions
+without the _attrs suffixes, except that they pass an optional
+dma_attrs.
+
+The interpretation of DMA attributes is architecture-specific, and
+each attribute should be documented in
+Documentation/core-api/dma-attributes.rst.
+
+If dma_attrs are 0, the semantics of each of these functions
+is identical to those of the corresponding function
+without the _attrs suffix. As a result dma_map_single_attrs()
+can generally replace dma_map_single(), etc.
+
+As an example of the use of the ``*_attrs`` functions, here's how
+you could pass an attribute DMA_ATTR_FOO when mapping memory
+for DMA::
+
+ #include <linux/dma-mapping.h>
+ /* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
+ * documented in Documentation/core-api/dma-attributes.rst */
+ ...
+
+ unsigned long attr;
+ attr |= DMA_ATTR_FOO;
+ ....
+ n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
+ ....
+
+Architectures that care about DMA_ATTR_FOO would check for its
+presence in their implementations of the mapping and unmapping
+routines, e.g.:::
+
+ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+ {
+ ....
+ if (attrs & DMA_ATTR_FOO)
+ /* twizzle the frobnozzle */
+ ....
+ }
+
+Part Ie - IOVA-based DMA mappings
+---------------------------------
+
+These APIs allow a very efficient mapping when using an IOMMU. They are an
+optional path that requires extra code and are only recommended for drivers
+where DMA mapping performance, or the space usage for storing the DMA addresses
+matter. All the considerations from the previous section apply here as well.
+
+::
+
+ bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size);
+
+Is used to try to allocate IOVA space for mapping operation. If it returns
+false this API can't be used for the given device and the normal streaming
+DMA mapping API should be used. The ``struct dma_iova_state`` is allocated
+by the driver and must be kept around until unmap time.
+
+::
+
+ static inline bool dma_use_iova(struct dma_iova_state *state)
+
+Can be used by the driver to check if the IOVA-based API is used after a
+call to dma_iova_try_alloc. This can be useful in the unmap path.
+
+::
+
+ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+
+Is used to link ranges to the IOVA previously allocated. The start of all
+but the first call to dma_iova_link for a given state must be aligned
+to the DMA merge boundary returned by ``dma_get_merge_boundary())``, and
+the size of all but the last range must be aligned to the DMA merge boundary
+as well.
+
+::
+
+ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+
+Must be called to sync the IOMMU page tables for IOVA-range mapped by one or
+more calls to ``dma_iova_link()``.
+
+For drivers that use a one-shot mapping, all ranges can be unmapped and the
+IOVA freed by calling:
+
+::
+
+ void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+
+Alternatively drivers can dynamically manage the IOVA space by unmapping
+and mapping individual regions. In that case
+
+::
+
+ void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+
+is used to unmap a range previously mapped, and
+
+::
+
+ void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+
+is used to free the IOVA space. All regions must have been unmapped using
+``dma_iova_unlink()`` before calling ``dma_iova_free()``.
+
+Part II - Non-coherent DMA allocations
+--------------------------------------
+
+These APIs allow to allocate pages that are guaranteed to be DMA addressable
+by the passed in device, but which need explicit management of memory ownership
+for the kernel vs the device.
+
+If you don't understand how cache line coherency works between a processor and
+an I/O device, you should not be using this part of the API.
+
+::
+
+ struct page *
+ dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ enum dma_data_direction dir, gfp_t gfp)
+
+This routine allocates a region of <size> bytes of non-coherent memory. It
+returns a pointer to first struct page for the region, or NULL if the
+allocation failed. The resulting struct page can be used for everything a
+struct page is suitable for.
+
+It also returns a <dma_handle> which may be cast to an unsigned integer the
+same width as the bus and given to the device as the DMA address base of
+the region.
+
+The dir parameter specified if data is read and/or written by the device,
+see dma_map_single() for details.
+
+The gfp parameter allows the caller to specify the ``GFP_`` flags (see
+kmalloc()) for the allocation, but rejects flags used to specify a memory
+zone such as GFP_DMA or GFP_HIGHMEM.
+
+Before giving the memory to the device, dma_sync_single_for_device() needs
+to be called, and before reading memory written by the device,
+dma_sync_single_for_cpu(), just like for streaming DMA mappings that are
+reused.
+
+::
+
+ void
+ dma_free_pages(struct device *dev, size_t size, struct page *page,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+
+Free a region of memory previously allocated using dma_alloc_pages().
+dev, size, dma_handle and dir must all be the same as those passed into
+dma_alloc_pages(). page must be the pointer returned by dma_alloc_pages().
+
+::
+
+ int
+ dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct page *page)
+
+Map an allocation returned from dma_alloc_pages() into a user address space.
+dev and size must be the same as those passed into dma_alloc_pages().
+page must be the pointer returned by dma_alloc_pages().
+
+::
+
+ void *
+ dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir,
+ gfp_t gfp)
+
+This routine is a convenient wrapper around dma_alloc_pages that returns the
+kernel virtual address for the allocated memory instead of the page structure.
+
+::
+
+ void
+ dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+
+Free a region of memory previously allocated using dma_alloc_noncoherent().
+dev, size, dma_handle and dir must all be the same as those passed into
+dma_alloc_noncoherent(). cpu_addr must be the virtual address returned by
+dma_alloc_noncoherent().
+
+::
+
+ struct sg_table *
+ dma_alloc_noncontiguous(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp,
+ unsigned long attrs);
+
+This routine allocates <size> bytes of non-coherent and possibly non-contiguous
+memory. It returns a pointer to struct sg_table that describes the allocated
+and DMA mapped memory, or NULL if the allocation failed. The resulting memory
+can be used for struct page mapped into a scatterlist are suitable for.
+
+The return sg_table is guaranteed to have 1 single DMA mapped segment as
+indicated by sgt->nents, but it might have multiple CPU side segments as
+indicated by sgt->orig_nents.
+
+The dir parameter specified if data is read and/or written by the device,
+see dma_map_single() for details.
+
+The gfp parameter allows the caller to specify the ``GFP_`` flags (see
+kmalloc()) for the allocation, but rejects flags used to specify a memory
+zone such as GFP_DMA or GFP_HIGHMEM.
+
+The attrs argument must be either 0 or DMA_ATTR_ALLOC_SINGLE_PAGES.
+
+Before giving the memory to the device, dma_sync_sgtable_for_device() needs
+to be called, and before reading memory written by the device,
+dma_sync_sgtable_for_cpu(), just like for streaming DMA mappings that are
+reused.
+
+::
+
+ void
+ dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+
+Free memory previously allocated using dma_alloc_noncontiguous(). dev, size,
+and dir must all be the same as those passed into dma_alloc_noncontiguous().
+sgt must be the pointer returned by dma_alloc_noncontiguous().
+
+::
+
+ void *
+ dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt)
+
+Return a contiguous kernel mapping for an allocation returned from
+dma_alloc_noncontiguous(). dev and size must be the same as those passed into
+dma_alloc_noncontiguous(). sgt must be the pointer returned by
+dma_alloc_noncontiguous().
+
+Once a non-contiguous allocation is mapped using this function, the
+flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must be used
+to manage the coherency between the kernel mapping, the device and user space
+mappings (if any).
+
+::
+
+ void
+ dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
+
+Unmap a kernel mapping returned by dma_vmap_noncontiguous(). dev must be the
+same the one passed into dma_alloc_noncontiguous(). vaddr must be the pointer
+returned by dma_vmap_noncontiguous().
+
+
+::
+
+ int
+ dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct sg_table *sgt)
+
+Map an allocation returned from dma_alloc_noncontiguous() into a user address
+space. dev and size must be the same as those passed into
+dma_alloc_noncontiguous(). sgt must be the pointer returned by
+dma_alloc_noncontiguous().
+
+::
+
+ int
+ dma_get_cache_alignment(void)
+
+Returns the processor cache alignment. This is the absolute minimum
+alignment *and* width that you must observe when either mapping
+memory or doing partial flushes.
+
+.. note::
+
+ This API may return a number *larger* than the actual cache
+ line, but it will guarantee that one or more cache lines fit exactly
+ into the width returned by this call. It will also always be a power
+ of two for easy alignment.
+
+
+Part III - Debug drivers use of the DMA API
+-------------------------------------------
+
+The DMA API as described above has some constraints. DMA addresses must be
+released with the corresponding function with the same size for example. With
+the advent of hardware IOMMUs it becomes more and more important that drivers
+do not violate those constraints. In the worst case such a violation can
+result in data corruption up to destroyed filesystems.
+
+To debug drivers and find bugs in the usage of the DMA API checking code can
+be compiled into the kernel which will tell the developer about those
+violations. If your architecture supports it you can select the "Enable
+debugging of DMA API usage" option in your kernel configuration. Enabling this
+option has a performance impact. Do not enable it in production kernels.
+
+If you boot the resulting kernel will contain code which does some bookkeeping
+about what DMA memory was allocated for which device. If this code detects an
+error it prints a warning message with some details into your kernel log. An
+example warning message may look like this::
+
+ WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
+ check_unmap+0x203/0x490()
+ Hardware name:
+ forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
+ function [device address=0x00000000640444be] [size=66 bytes] [mapped as
+ single] [unmapped as page]
+ Modules linked in: nfsd exportfs bridge stp llc r8169
+ Pid: 0, comm: swapper Tainted: G W 2.6.28-dmatest-09289-g8bb99c0 #1
+ Call Trace:
+ <IRQ> [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
+ [<ffffffff80647b70>] _spin_unlock+0x10/0x30
+ [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
+ [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
+ [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
+ [<ffffffff80252f96>] queue_work+0x56/0x60
+ [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
+ [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
+ [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
+ [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
+ [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
+ [<ffffffff803c7ea3>] check_unmap+0x203/0x490
+ [<ffffffff803c8259>] debug_dma_unmap_phys+0x49/0x50
+ [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
+ [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
+ [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
+ [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
+ [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
+ [<ffffffff8020c093>] ret_from_intr+0x0/0xa
+ <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
+
+The driver developer can find the driver and the device including a stacktrace
+of the DMA API call which caused this warning.
+
+Per default only the first error will result in a warning message. All other
+errors will only silently counted. This limitation exist to prevent the code
+from flooding your kernel log. To support debugging a device driver this can
+be disabled via debugfs. See the debugfs interface documentation below for
+details.
+
+The debugfs directory for the DMA API debugging code is called dma-api/. In
+this directory the following files can currently be found:
+
+=============================== ===============================================
+dma-api/all_errors This file contains a numeric value. If this
+ value is not equal to zero the debugging code
+ will print a warning for every error it finds
+ into the kernel log. Be careful with this
+ option, as it can easily flood your logs.
+
+dma-api/disabled This read-only file contains the character 'Y'
+ if the debugging code is disabled. This can
+ happen when it runs out of memory or if it was
+ disabled at boot time
+
+dma-api/dump This read-only file contains current DMA
+ mappings.
+
+dma-api/error_count This file is read-only and shows the total
+ numbers of errors found.
+
+dma-api/num_errors The number in this file shows how many
+ warnings will be printed to the kernel log
+ before it stops. This number is initialized to
+ one at system boot and be set by writing into
+ this file
+
+dma-api/min_free_entries This read-only file can be read to get the
+ minimum number of free dma_debug_entries the
+ allocator has ever seen. If this value goes
+ down to zero the code will attempt to increase
+ nr_total_entries to compensate.
+
+dma-api/num_free_entries The current number of free dma_debug_entries
+ in the allocator.
+
+dma-api/nr_total_entries The total number of dma_debug_entries in the
+ allocator, both free and used.
+
+dma-api/driver_filter You can write a name of a driver into this file
+ to limit the debug output to requests from that
+ particular driver. Write an empty string to
+ that file to disable the filter and see
+ all errors again.
+=============================== ===============================================
+
+If you have this code compiled into your kernel it will be enabled by default.
+If you want to boot without the bookkeeping anyway you can provide
+'dma_debug=off' as a boot parameter. This will disable DMA API debugging.
+Notice that you can not enable it again at runtime. You have to reboot to do
+so.
+
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
+When the code disables itself at runtime this is most likely because it ran
+out of dma_debug_entries and was unable to allocate more on-demand. 65536
+entries are preallocated at boot - if this is too low for you boot with
+'dma_debug_entries=<your_desired_number>' to overwrite the default. Note
+that the code allocates entries in batches, so the exact number of
+preallocated entries may be greater than the actual number requested. The
+code will print to the kernel log each time it has dynamically allocated
+as many entries as were initially preallocated. This is to indicate that a
+larger preallocation size may be appropriate, or if it happens continually
+that a driver may be leaking mappings.
+
+::
+
+ void
+ debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check DMA mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_phys() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable DMA mapping error check debugging.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/scatterlist.h
+.. kernel-doc:: lib/scatterlist.c
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
new file mode 100644
index 000000000000..0bdc2be65e57
--- /dev/null
+++ b/Documentation/core-api/dma-attributes.rst
@@ -0,0 +1,150 @@
+==============
+DMA attributes
+==============
+
+This document describes the semantics of the DMA attributes that are
+defined in linux/dma-mapping.h.
+
+DMA_ATTR_WEAK_ORDERING
+----------------------
+
+DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping
+may be weakly ordered, that is that reads and writes may pass each other.
+
+Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_WRITE_COMBINE
+----------------------
+
+DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
+buffered to improve performance.
+
+Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_NO_KERNEL_MAPPING
+--------------------------
+
+DMA_ATTR_NO_KERNEL_MAPPING lets the platform to avoid creating a kernel
+virtual mapping for the allocated buffer. On some architectures creating
+such mapping is non-trivial task and consumes very limited resources
+(like kernel virtual address space or dma consistent address space).
+Buffers allocated with this attribute can be only passed to user space
+by calling dma_mmap_attrs(). By using this API, you are guaranteeing
+that you won't dereference the pointer returned by dma_alloc_attr(). You
+can treat it as a cookie that must be passed to dma_mmap_attrs() and
+dma_free_attrs(). Make sure that both of these also get this attribute
+set on each call.
+
+Since it is optional for platforms to implement
+DMA_ATTR_NO_KERNEL_MAPPING, those that do not will simply ignore the
+attribute and exhibit default behavior.
+
+DMA_ATTR_SKIP_CPU_SYNC
+----------------------
+
+By default dma_map_{single,page,sg} functions family transfer a given
+buffer from CPU domain to device domain. Some advanced use cases might
+require sharing a buffer between more than one device. This requires
+having a mapping created separately for each device and is usually
+performed by calling dma_map_{single,page,sg} function more than once
+for the given buffer with device pointer to each device taking part in
+the buffer sharing. The first call transfers a buffer from 'CPU' domain
+to 'device' domain, what synchronizes CPU caches for the given region
+(usually it means that the cache has been flushed or invalidated
+depending on the dma direction). However, next calls to
+dma_map_{single,page,sg}() for other devices will perform exactly the
+same synchronization operation on the CPU cache. CPU cache synchronization
+might be a time consuming operation, especially if the buffers are
+large, so it is highly recommended to avoid it if possible.
+DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of
+the CPU cache for the given buffer assuming that it has been already
+transferred to 'device' domain. This attribute can be also used for
+dma_unmap_{single,page,sg} functions family to force buffer to stay in
+device domain after releasing a mapping for it. Use this attribute with
+care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifying this attribute the allocated buffer is forced to be contiguous
+also in physical memory.
+
+DMA_ATTR_ALLOC_SINGLE_PAGES
+---------------------------
+
+This is a hint to the DMA-mapping subsystem that it's probably not worth
+the time to try to allocate memory to in a way that gives better TLB
+efficiency (AKA it's not worth trying to build the mapping out of larger
+pages). You might want to specify this if:
+
+- You know that the accesses to this memory won't thrash the TLB.
+ You might know that the accesses are likely to be sequential or
+ that they aren't sequential but it's unlikely you'll ping-pong
+ between many addresses that are likely to be in different physical
+ pages.
+- You know that the penalty of TLB misses while accessing the
+ memory will be small enough to be inconsequential. If you are
+ doing a heavy operation like decryption or decompression this
+ might be the case.
+- You know that the DMA mapping is fairly transitory. If you expect
+ the mapping to have a short lifetime then it may be worth it to
+ optimize allocation (avoid coming up with large pages) instead of
+ getting the slight performance win of larger pages.
+
+Setting this hint doesn't guarantee that you won't get huge pages, but it
+means that we won't try quite as hard to get them.
+
+.. note:: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
+ though ARM64 patches will likely be posted soon.
+
+DMA_ATTR_NO_WARN
+----------------
+
+This tells the DMA-mapping subsystem to suppress allocation failure reports
+(similarly to __GFP_NOWARN).
+
+On some architectures allocation failures are reported with error messages
+to the system logs. Although this can help to identify and debug problems,
+drivers which handle failures (eg, retry later) have no problems with them,
+and can actually flood the system logs with error messages that aren't any
+problem at all, depending on the implementation of the retry mechanism.
+
+So, this provides a way for drivers to avoid those error messages on calls
+where allocation failures are not a problem, and shouldn't bother the logs.
+
+.. note:: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
+
+DMA_ATTR_PRIVILEGED
+-------------------
+
+Some advanced peripherals such as remote processors and GPUs perform
+accesses to DMA buffers in both privileged "supervisor" and unprivileged
+"user" modes. This attribute is used to indicate to the DMA-mapping
+subsystem that the buffer is fully accessible at the elevated privilege
+level (and ideally inaccessible or at least read-only at the
+lesser-privileged levels).
+
+DMA_ATTR_MMIO
+-------------
+
+This attribute indicates the physical address is not normal system
+memory. It may not be used with kmap*()/phys_to_virt()/phys_to_page()
+functions, it may not be cacheable, and access using CPU load/store
+instructions may not be allowed.
+
+Usually this will be used to describe MMIO addresses, or other non-cacheable
+register addresses. When DMA mapping this sort of address we call
+the operation Peer to Peer as a one device is DMA'ing to another device.
+For PCI devices the p2pdma APIs must be used to determine if
+DMA_ATTR_MMIO is appropriate.
+
+For architectures that require cache flushing for DMA coherence
+DMA_ATTR_MMIO will not perform any cache flushing. The address
+provided must never be mapped cacheable into the CPU.
diff --git a/Documentation/core-api/dma-isa-lpc.rst b/Documentation/core-api/dma-isa-lpc.rst
new file mode 100644
index 000000000000..17b193603f0a
--- /dev/null
+++ b/Documentation/core-api/dma-isa-lpc.rst
@@ -0,0 +1,152 @@
+============================
+DMA with ISA and LPC devices
+============================
+
+:Author: Pierre Ossman <drzeus@drzeus.cx>
+
+This document describes how to do DMA transfers using the old ISA DMA
+controller. Even though ISA is more or less dead today the LPC bus
+uses the same DMA system so it will be around for quite some time.
+
+Headers and dependencies
+------------------------
+
+To do ISA style DMA you need to include two headers::
+
+ #include <linux/dma-mapping.h>
+ #include <asm/dma.h>
+
+The first is the generic DMA API used to convert virtual addresses to
+bus addresses (see Documentation/core-api/dma-api.rst for details).
+
+The second contains the routines specific to ISA DMA transfers. Since
+this is not present on all platforms make sure you construct your
+Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
+to build your driver on unsupported platforms.
+
+Buffer allocation
+-----------------
+
+The ISA DMA controller has some very strict requirements on which
+memory it can access so extra care must be taken when allocating
+buffers.
+
+(You usually need a special buffer for DMA transfers instead of
+transferring directly to and from your normal data structures.)
+
+The DMA-able address space is the lowest 16 MB of _physical_ memory.
+Also the transfer block may not cross page boundaries (which are 64
+or 128 KiB depending on which channel you use).
+
+In order to allocate a piece of memory that satisfies all these
+requirements you pass the flag GFP_DMA to kmalloc.
+
+Unfortunately the memory available for ISA DMA is scarce so unless you
+allocate the memory during boot-up it's a good idea to also pass
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
+
+(This scarcity also means that you should allocate the buffer as
+early as possible and not release it until the driver is unloaded.)
+
+Address translation
+-------------------
+
+To translate the virtual address to a bus address, use the normal DMA
+API. Do _not_ use isa_virt_to_bus() even though it does the same
+thing. The reason for this is that the function isa_virt_to_bus()
+will require a Kconfig dependency to ISA, not just ISA_DMA_API which
+is really all you need. Remember that even though the DMA controller
+has its origins in ISA it is used elsewhere.
+
+Note: x86_64 had a broken DMA API when it came to ISA but has since
+been fixed. If your arch has problems then fix the DMA API instead of
+reverting to the ISA functions.
+
+Channels
+--------
+
+A normal ISA DMA controller has 8 channels. The lower four are for
+8-bit transfers and the upper four are for 16-bit transfers.
+
+(Actually the DMA controller is really two separate controllers where
+channel 4 is used to give DMA access for the second controller (0-3).
+This means that of the four 16-bits channels only three are usable.)
+
+You allocate these in a similar fashion as all basic resources:
+
+extern int request_dma(unsigned int dmanr, const char * device_id);
+extern void free_dma(unsigned int dmanr);
+
+The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
+driver author but depends on what the hardware supports. Check your
+specs or test different channels.
+
+Transfer data
+-------------
+
+Now for the good stuff, the actual DMA transfer. :)
+
+Before you use any ISA DMA routines you need to claim the DMA lock
+using claim_dma_lock(). The reason is that some DMA operations are
+not atomic so only one driver may fiddle with the registers at a
+time.
+
+The first time you use the DMA controller you should call
+clear_dma_ff(). This clears an internal register in the DMA
+controller that is used for the non-atomic operations. As long as you
+(and everyone else) uses the locking functions then you only need to
+reset this once.
+
+Next, you tell the controller in which direction you intend to do the
+transfer using set_dma_mode(). Currently you have the options
+DMA_MODE_READ and DMA_MODE_WRITE.
+
+Set the address from where the transfer should start (this needs to
+be 16-bit aligned for 16-bit transfers) and how many bytes to
+transfer. Note that it's _bytes_. The DMA routines will do all the
+required translation to values that the DMA controller understands.
+
+The final step is enabling the DMA channel and releasing the DMA
+lock.
+
+Once the DMA transfer is finished (or timed out) you should disable
+the channel again. You should also check get_dma_residue() to make
+sure that all data has been transferred.
+
+Example::
+
+ int flags, residue;
+
+ flags = claim_dma_lock();
+
+ clear_dma_ff();
+
+ set_dma_mode(channel, DMA_MODE_WRITE);
+ set_dma_addr(channel, phys_addr);
+ set_dma_count(channel, num_bytes);
+
+ dma_enable(channel);
+
+ release_dma_lock(flags);
+
+ while (!device_done());
+
+ flags = claim_dma_lock();
+
+ dma_disable(channel);
+
+ residue = dma_get_residue(channel);
+ if (residue != 0)
+ printk(KERN_ERR "driver: Incomplete DMA transfer!"
+ " %d bytes left!\n", residue);
+
+ release_dma_lock(flags);
+
+Suspend/resume
+--------------
+
+It is the driver's responsibility to make sure that the machine isn't
+suspended while a DMA transfer is in progress. Also, all DMA settings
+are lost when the system suspends so if your driver relies on the DMA
+controller being in a certain state then you have to restore these
+registers upon resume.
diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
new file mode 100644
index 000000000000..71d8eedc0549
--- /dev/null
+++ b/Documentation/core-api/entry.rst
@@ -0,0 +1,279 @@
+Entry/exit handling for exceptions, interrupts, syscalls and KVM
+================================================================
+
+All transitions between execution domains require state updates which are
+subject to strict ordering constraints. State updates are required for the
+following:
+
+ * Lockdep
+ * RCU / Context tracking
+ * Preemption counter
+ * Tracing
+ * Time accounting
+
+The update order depends on the transition type and is explained below in
+the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
+exceptions`_, `NMI and NMI-like exceptions`_.
+
+Non-instrumentable code - noinstr
+---------------------------------
+
+Most instrumentation facilities depend on RCU, so instrumentation is prohibited
+for entry code before RCU starts watching and exit code after RCU stops
+watching. In addition, many architectures must save and restore register state,
+which means that (for example) a breakpoint in the breakpoint entry code would
+overwrite the debug registers of the initial breakpoint.
+
+Such code must be marked with the 'noinstr' attribute, placing that code into a
+special section inaccessible to instrumentation and debug facilities. Some
+functions are partially instrumentable, which is handled by marking them
+noinstr and using instrumentation_begin() and instrumentation_end() to flag the
+instrumentable ranges of code:
+
+.. code-block:: c
+
+ noinstr void entry(void)
+ {
+ handle_entry(); // <-- must be 'noinstr' or '__always_inline'
+ ...
+
+ instrumentation_begin();
+ handle_context(); // <-- instrumentable code
+ instrumentation_end();
+
+ ...
+ handle_exit(); // <-- must be 'noinstr' or '__always_inline'
+ }
+
+This allows verification of the 'noinstr' restrictions via objtool on
+supported architectures.
+
+Invoking non-instrumentable functions from instrumentable context has no
+restrictions and is useful to protect e.g. state switching which would
+cause malfunction if instrumented.
+
+All non-instrumentable entry/exit code sections before and after the RCU
+state transitions must run with interrupts disabled.
+
+Syscalls
+--------
+
+Syscall-entry code starts in assembly code and calls out into low-level C code
+after establishing low-level architecture-specific state and stack frames. This
+low-level C code must not be instrumented. A typical syscall handling function
+invoked from low-level assembly code looks like this:
+
+.. code-block:: c
+
+ noinstr void syscall(struct pt_regs *regs, int nr)
+ {
+ arch_syscall_enter(regs);
+ nr = syscall_enter_from_user_mode(regs, nr);
+
+ instrumentation_begin();
+ if (!invoke_syscall(regs, nr) && nr != -1)
+ result_reg(regs) = __sys_ni_syscall(regs);
+ instrumentation_end();
+
+ syscall_exit_to_user_mode(regs);
+ }
+
+syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
+establishes state in the following order:
+
+ * Lockdep
+ * RCU / Context tracking
+ * Tracing
+
+and then invokes the various entry work functions like ptrace, seccomp, audit,
+syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
+function can be invoked. The instrumentable code section then ends, after which
+syscall_exit_to_user_mode() is invoked.
+
+syscall_exit_to_user_mode() handles all work which needs to be done before
+returning to user space like tracing, audit, signals, task work etc. After
+that it invokes exit_to_user_mode() which again handles the state
+transition in the reverse order:
+
+ * Tracing
+ * RCU / Context tracking
+ * Lockdep
+
+syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
+available as fine grained subfunctions in cases where the architecture code
+has to do extra work between the various steps. In such cases it has to
+ensure that enter_from_user_mode() is called first on entry and
+exit_to_user_mode() is called last on exit.
+
+Do not nest syscalls. Nested syscalls will cause RCU and/or context tracking
+to print a warning.
+
+KVM
+---
+
+Entering or exiting guest mode is very similar to syscalls. From the host
+kernel point of view the CPU goes off into user space when entering the
+guest and returns to the kernel on exit.
+
+guest_state_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
+and guest_state_exit_irqoff() is the KVM variant of enter_from_user_mode().
+The state operations have the same ordering.
+
+Task work handling is done separately for guest at the boundary of the
+vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
+the work handled on return to user space.
+
+Do not nest KVM entry/exit transitions because doing so is nonsensical.
+
+Interrupts and regular exceptions
+---------------------------------
+
+Interrupts entry and exit handling is slightly more complex than syscalls
+and KVM transitions.
+
+If an interrupt is raised while the CPU executes in user space, the entry
+and exit handling is exactly the same as for syscalls.
+
+If the interrupt is raised while the CPU executes in kernel space the entry and
+exit handling is slightly different. RCU state is only updated when the
+interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
+already be watching. Lockdep and tracing have to be updated unconditionally.
+
+irqentry_enter() and irqentry_exit() provide the implementation for this.
+
+The architecture-specific part looks similar to syscall handling:
+
+.. code-block:: c
+
+ noinstr void interrupt(struct pt_regs *regs, int nr)
+ {
+ arch_interrupt_enter(regs);
+ state = irqentry_enter(regs);
+
+ instrumentation_begin();
+
+ irq_enter_rcu();
+ invoke_irq_handler(regs, nr);
+ irq_exit_rcu();
+
+ instrumentation_end();
+
+ irqentry_exit(regs, state);
+ }
+
+Note that the invocation of the actual interrupt handler is within a
+irq_enter_rcu() and irq_exit_rcu() pair.
+
+irq_enter_rcu() updates the preemption count which makes in_hardirq()
+return true, handles NOHZ tick state and interrupt time accounting. This
+means that up to the point where irq_enter_rcu() is invoked in_hardirq()
+returns false.
+
+irq_exit_rcu() handles interrupt time accounting, undoes the preemption
+count update and eventually handles soft interrupts and NOHZ tick state.
+
+In theory, the preemption count could be updated in irqentry_enter(). In
+practice, deferring this update to irq_enter_rcu() allows the preemption-count
+code to be traced, while also maintaining symmetry with irq_exit_rcu() and
+irqentry_exit(), which are described in the next paragraph. The only downside
+is that the early entry code up to irq_enter_rcu() must be aware that the
+preemption count has not yet been updated with the HARDIRQ_OFFSET state.
+
+Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
+before it handles soft interrupts, whose handlers must run in BH context rather
+than irq-disabled context. In addition, irqentry_exit() might schedule, which
+also requires that HARDIRQ_OFFSET has been removed from the preemption count.
+
+Even though interrupt handlers are expected to run with local interrupts
+disabled, interrupt nesting is common from an entry/exit perspective. For
+example, softirq handling happens within an irqentry_{enter,exit}() block with
+local interrupts enabled. Also, although uncommon, nothing prevents an
+interrupt handler from re-enabling interrupts.
+
+Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
+runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
+the entry code is shared between the two.
+
+NMI and NMI-like exceptions
+---------------------------
+
+NMIs and NMI-like exceptions (machine checks, double faults, debug
+interrupts, etc.) can hit any context and must be extra careful with
+the state.
+
+State changes for debug exceptions and machine-check exceptions depend on
+whether these exceptions happened in user-space (breakpoints or watchpoints) or
+in kernel mode (code patching). From user-space, they are treated like
+interrupts, while from kernel mode they are treated like NMIs.
+
+NMIs and other NMI-like exceptions handle state transitions without
+distinguishing between user-mode and kernel-mode origin.
+
+The state update on entry is handled in irqentry_nmi_enter() which updates
+state in the following order:
+
+ * Preemption counter
+ * Lockdep
+ * RCU / Context tracking
+ * Tracing
+
+The exit counterpart irqentry_nmi_exit() does the reverse operation in the
+reverse order.
+
+Note that the update of the preemption counter has to be the first
+operation on enter and the last operation on exit. The reason is that both
+lockdep and RCU rely on in_nmi() returning true in this case. The
+preemption count modification in the NMI entry/exit case must not be
+traced.
+
+Architecture-specific code looks like this:
+
+.. code-block:: c
+
+ noinstr void nmi(struct pt_regs *regs)
+ {
+ arch_nmi_enter(regs);
+ state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+ nmi_handler(regs);
+ instrumentation_end();
+
+ irqentry_nmi_exit(regs);
+ }
+
+and for e.g. a debug exception it can look like this:
+
+.. code-block:: c
+
+ noinstr void debug(struct pt_regs *regs)
+ {
+ arch_nmi_enter(regs);
+
+ debug_regs = save_debug_regs();
+
+ if (user_mode(regs)) {
+ state = irqentry_enter(regs);
+
+ instrumentation_begin();
+ user_mode_debug_handler(regs, debug_regs);
+ instrumentation_end();
+
+ irqentry_exit(regs, state);
+ } else {
+ state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+ kernel_mode_debug_handler(regs, debug_regs);
+ instrumentation_end();
+
+ irqentry_nmi_exit(regs, state);
+ }
+ }
+
+There is no combined irqentry_nmi_if_kernel() function available as the
+above cannot be handled in an exception-agnostic way.
+
+NMIs can happen in any context. For example, an NMI-like exception triggered
+while handling an NMI. So NMI entry code has to be reentrant and state updates
+need to handle nesting.
diff --git a/Documentation/core-api/flexible-arrays.rst b/Documentation/core-api/flexible-arrays.rst
deleted file mode 100644
index b6b85a1b518e..000000000000
--- a/Documentation/core-api/flexible-arrays.rst
+++ /dev/null
@@ -1,130 +0,0 @@
-
-===================================
-Using flexible arrays in the kernel
-===================================
-
-Large contiguous memory allocations can be unreliable in the Linux kernel.
-Kernel programmers will sometimes respond to this problem by allocating
-pages with :c:func:`vmalloc()`. This solution not ideal, though. On 32-bit
-systems, memory from vmalloc() must be mapped into a relatively small address
-space; it's easy to run out. On SMP systems, the page table changes required
-by vmalloc() allocations can require expensive cross-processor interrupts on
-all CPUs. And, on all systems, use of space in the vmalloc() range increases
-pressure on the translation lookaside buffer (TLB), reducing the performance
-of the system.
-
-In many cases, the need for memory from vmalloc() can be eliminated by piecing
-together an array from smaller parts; the flexible array library exists to make
-this task easier.
-
-A flexible array holds an arbitrary (within limits) number of fixed-sized
-objects, accessed via an integer index. Sparse arrays are handled
-reasonably well. Only single-page allocations are made, so memory
-allocation failures should be relatively rare. The down sides are that the
-arrays cannot be indexed directly, individual object size cannot exceed the
-system page size, and putting data into a flexible array requires a copy
-operation. It's also worth noting that flexible arrays do no internal
-locking at all; if concurrent access to an array is possible, then the
-caller must arrange for appropriate mutual exclusion.
-
-The creation of a flexible array is done with :c:func:`flex_array_alloc()`::
-
- #include <linux/flex_array.h>
-
- struct flex_array *flex_array_alloc(int element_size,
- unsigned int total,
- gfp_t flags);
-
-The individual object size is provided by ``element_size``, while total is the
-maximum number of objects which can be stored in the array. The flags
-argument is passed directly to the internal memory allocation calls. With
-the current code, using flags to ask for high memory is likely to lead to
-notably unpleasant side effects.
-
-It is also possible to define flexible arrays at compile time with::
-
- DEFINE_FLEX_ARRAY(name, element_size, total);
-
-This macro will result in a definition of an array with the given name; the
-element size and total will be checked for validity at compile time.
-
-Storing data into a flexible array is accomplished with a call to
-:c:func:`flex_array_put()`::
-
- int flex_array_put(struct flex_array *array, unsigned int element_nr,
- void *src, gfp_t flags);
-
-This call will copy the data from src into the array, in the position
-indicated by ``element_nr`` (which must be less than the maximum specified when
-the array was created). If any memory allocations must be performed, flags
-will be used. The return value is zero on success, a negative error code
-otherwise.
-
-There might possibly be a need to store data into a flexible array while
-running in some sort of atomic context; in this situation, sleeping in the
-memory allocator would be a bad thing. That can be avoided by using
-``GFP_ATOMIC`` for the flags value, but, often, there is a better way. The
-trick is to ensure that any needed memory allocations are done before
-entering atomic context, using :c:func:`flex_array_prealloc()`::
-
- int flex_array_prealloc(struct flex_array *array, unsigned int start,
- unsigned int nr_elements, gfp_t flags);
-
-This function will ensure that memory for the elements indexed in the range
-defined by ``start`` and ``nr_elements`` has been allocated. Thereafter, a
-``flex_array_put()`` call on an element in that range is guaranteed not to
-block.
-
-Getting data back out of the array is done with :c:func:`flex_array_get()`::
-
- void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
-
-The return value is a pointer to the data element, or NULL if that
-particular element has never been allocated.
-
-Note that it is possible to get back a valid pointer for an element which
-has never been stored in the array. Memory for array elements is allocated
-one page at a time; a single allocation could provide memory for several
-adjacent elements. Flexible array elements are normally initialized to the
-value ``FLEX_ARRAY_FREE`` (defined as 0x6c in <linux/poison.h>), so errors
-involving that number probably result from use of unstored array entries.
-Note that, if array elements are allocated with ``__GFP_ZERO``, they will be
-initialized to zero and this poisoning will not happen.
-
-Individual elements in the array can be cleared with
-:c:func:`flex_array_clear()`::
-
- int flex_array_clear(struct flex_array *array, unsigned int element_nr);
-
-This function will set the given element to ``FLEX_ARRAY_FREE`` and return
-zero. If storage for the indicated element is not allocated for the array,
-``flex_array_clear()`` will return ``-EINVAL`` instead. Note that clearing an
-element does not release the storage associated with it; to reduce the
-allocated size of an array, call :c:func:`flex_array_shrink()`::
-
- int flex_array_shrink(struct flex_array *array);
-
-The return value will be the number of pages of memory actually freed.
-This function works by scanning the array for pages containing nothing but
-``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work
-if the array's pages are allocated with ``__GFP_ZERO``.
-
-It is possible to remove all elements of an array with a call to
-:c:func:`flex_array_free_parts()`::
-
- void flex_array_free_parts(struct flex_array *array);
-
-This call frees all elements, but leaves the array itself in place.
-Freeing the entire array is done with :c:func:`flex_array_free()`::
-
- void flex_array_free(struct flex_array *array);
-
-As of this writing, there are no users of flexible arrays in the mainline
-kernel. The functions described here are also not exported to modules;
-that will probably be fixed when somebody comes up with a need for it.
-
-
-Flexible array functions
-------------------------
-
-.. kernel-doc:: include/linux/flex_array.h
diff --git a/Documentation/core-api/floating-point.rst b/Documentation/core-api/floating-point.rst
new file mode 100644
index 000000000000..a8d0d4b05052
--- /dev/null
+++ b/Documentation/core-api/floating-point.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Floating-point API
+==================
+
+Kernel code is normally prohibited from using floating-point (FP) registers or
+instructions, including the C float and double data types. This rule reduces
+system call overhead, because the kernel does not need to save and restore the
+userspace floating-point register state.
+
+However, occasionally drivers or library functions may need to include FP code.
+This is supported by isolating the functions containing FP code to a separate
+translation unit (a separate source file), and saving/restoring the FP register
+state around calls to those functions. This creates "critical sections" of
+floating-point usage.
+
+The reason for this isolation is to prevent the compiler from generating code
+touching the FP registers outside these critical sections. Compilers sometimes
+use FP registers to optimize inlined ``memcpy`` or variable assignment, as
+floating-point registers may be wider than general-purpose registers.
+
+Usability of floating-point code within the kernel is architecture-specific.
+Additionally, because a single kernel may be configured to support platforms
+both with and without a floating-point unit, FPU availability must be checked
+both at build time and at run time.
+
+Several architectures implement the generic kernel floating-point API from
+``linux/fpu.h``, as described below. Some other architectures implement their
+own unique APIs, which are documented separately.
+
+Build-time API
+--------------
+
+Floating-point code may be built if the option ``ARCH_HAS_KERNEL_FPU_SUPPORT``
+is enabled. For C code, such code must be placed in a separate file, and that
+file must have its compilation flags adjusted using the following pattern::
+
+ CFLAGS_foo.o += $(CC_FLAGS_FPU)
+ CFLAGS_REMOVE_foo.o += $(CC_FLAGS_NO_FPU)
+
+Architectures are expected to define one or both of these variables in their
+top-level Makefile as needed. For example::
+
+ CC_FLAGS_FPU := -mhard-float
+
+or::
+
+ CC_FLAGS_NO_FPU := -msoft-float
+
+Normal kernel code is assumed to use the equivalent of ``CC_FLAGS_NO_FPU``.
+
+Runtime API
+-----------
+
+The runtime API is provided in ``linux/fpu.h``. This header cannot be included
+from files implementing FP code (those with their compilation flags adjusted as
+above). Instead, it must be included when defining the FP critical sections.
+
+.. c:function:: bool kernel_fpu_available( void )
+
+ This function reports if floating-point code can be used on this CPU or
+ platform. The value returned by this function is not expected to change
+ at runtime, so it only needs to be called once, not before every
+ critical section.
+
+.. c:function:: void kernel_fpu_begin( void )
+ void kernel_fpu_end( void )
+
+ These functions create a floating-point critical section. It is only
+ valid to call ``kernel_fpu_begin()`` after a previous call to
+ ``kernel_fpu_available()`` returned ``true``. These functions are only
+ guaranteed to be callable from (preemptible or non-preemptible) process
+ context.
+
+ Preemption may be disabled inside critical sections, so their size
+ should be minimized. They are *not* required to be reentrant. If the
+ caller expects to nest critical sections, it must implement its own
+ reference counting.
diff --git a/Documentation/core-api/folio_queue.rst b/Documentation/core-api/folio_queue.rst
new file mode 100644
index 000000000000..b7628896d2b6
--- /dev/null
+++ b/Documentation/core-api/folio_queue.rst
@@ -0,0 +1,209 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+===========
+Folio Queue
+===========
+
+:Author: David Howells <dhowells@redhat.com>
+
+.. Contents:
+
+ * Overview
+ * Initialisation
+ * Adding and removing folios
+ * Querying information about a folio
+ * Querying information about a folio_queue
+ * Folio queue iteration
+ * Folio marks
+ * Lockless simultaneous production/consumption issues
+
+
+Overview
+========
+
+The folio_queue struct forms a single segment in a segmented list of folios
+that can be used to form an I/O buffer. As such, the list can be iterated over
+using the ITER_FOLIOQ iov_iter type.
+
+The publicly accessible members of the structure are::
+
+ struct folio_queue {
+ struct folio_queue *next;
+ struct folio_queue *prev;
+ ...
+ };
+
+A pair of pointers are provided, ``next`` and ``prev``, that point to the
+segments on either side of the segment being accessed. Whilst this is a
+doubly-linked list, it is intentionally not a circular list; the outward
+sibling pointers in terminal segments should be NULL.
+
+Each segment in the list also stores:
+
+ * an ordered sequence of folio pointers,
+ * the size of each folio and
+ * three 1-bit marks per folio,
+
+but these should not be accessed directly as the underlying data structure may
+change, but rather the access functions outlined below should be used.
+
+The facility can be made accessible by::
+
+ #include <linux/folio_queue.h>
+
+and to use the iterator::
+
+ #include <linux/uio.h>
+
+
+Initialisation
+==============
+
+A segment should be initialised by calling::
+
+ void folioq_init(struct folio_queue *folioq);
+
+with a pointer to the segment to be initialised. Note that this will not
+necessarily initialise all the folio pointers, so care must be taken to check
+the number of folios added.
+
+
+Adding and removing folios
+==========================
+
+Folios can be set in the next unused slot in a segment struct by calling one
+of::
+
+ unsigned int folioq_append(struct folio_queue *folioq,
+ struct folio *folio);
+
+ unsigned int folioq_append_mark(struct folio_queue *folioq,
+ struct folio *folio);
+
+Both functions update the stored folio count, store the folio and note its
+size. The second function also sets the first mark for the folio added. Both
+functions return the number of the slot used. [!] Note that no attempt is made
+to check that the capacity wasn't overrun and the list will not be extended
+automatically.
+
+A folio can be excised by calling::
+
+ void folioq_clear(struct folio_queue *folioq, unsigned int slot);
+
+This clears the slot in the array and also clears all the marks for that folio,
+but doesn't change the folio count - so future accesses of that slot must check
+if the slot is occupied.
+
+
+Querying information about a folio
+==================================
+
+Information about the folio in a particular slot may be queried by the
+following function::
+
+ struct folio *folioq_folio(const struct folio_queue *folioq,
+ unsigned int slot);
+
+If a folio has not yet been set in that slot, this may yield an undefined
+pointer. The size of the folio in a slot may be queried with either of::
+
+ unsigned int folioq_folio_order(const struct folio_queue *folioq,
+ unsigned int slot);
+
+ size_t folioq_folio_size(const struct folio_queue *folioq,
+ unsigned int slot);
+
+The first function returns the size as an order and the second as a number of
+bytes.
+
+
+Querying information about a folio_queue
+========================================
+
+Information may be retrieved about a particular segment with the following
+functions::
+
+ unsigned int folioq_nr_slots(const struct folio_queue *folioq);
+
+ unsigned int folioq_count(struct folio_queue *folioq);
+
+ bool folioq_full(struct folio_queue *folioq);
+
+The first function returns the maximum capacity of a segment. It must not be
+assumed that this won't vary between segments. The second returns the number
+of folios added to a segments and the third is a shorthand to indicate if the
+segment has been filled to capacity.
+
+Not that the count and fullness are not affected by clearing folios from the
+segment. These are more about indicating how many slots in the array have been
+initialised, and it assumed that slots won't get reused, but rather the segment
+will get discarded as the queue is consumed.
+
+
+Folio marks
+===========
+
+Folios within a queue can also have marks assigned to them. These marks can be
+used to note information such as if a folio needs folio_put() calling upon it.
+There are three marks available to be set for each folio.
+
+The marks can be set by::
+
+ void folioq_mark(struct folio_queue *folioq, unsigned int slot);
+ void folioq_mark2(struct folio_queue *folioq, unsigned int slot);
+
+Cleared by::
+
+ void folioq_unmark(struct folio_queue *folioq, unsigned int slot);
+ void folioq_unmark2(struct folio_queue *folioq, unsigned int slot);
+
+And the marks can be queried by::
+
+ bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot);
+ bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot);
+
+The marks can be used for any purpose and are not interpreted by this API.
+
+
+Folio queue iteration
+=====================
+
+A list of segments may be iterated over using the I/O iterator facility using
+an ``iov_iter`` iterator of ``ITER_FOLIOQ`` type. The iterator may be
+initialised with::
+
+ void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
+ const struct folio_queue *folioq,
+ unsigned int first_slot, unsigned int offset,
+ size_t count);
+
+This may be told to start at a particular segment, slot and offset within a
+queue. The iov iterator functions will follow the next pointers when advancing
+and prev pointers when reverting when needed.
+
+
+Lockless simultaneous production/consumption issues
+===================================================
+
+If properly managed, the list can be extended by the producer at the head end
+and shortened by the consumer at the tail end simultaneously without the need
+to take locks. The ITER_FOLIOQ iterator inserts appropriate barriers to aid
+with this.
+
+Care must be taken when simultaneously producing and consuming a list. If the
+last segment is reached and the folios it refers to are entirely consumed by
+the IOV iterators, an iov_iter struct will be left pointing to the last segment
+with a slot number equal to the capacity of that segment. The iterator will
+try to continue on from this if there's another segment available when it is
+used again, but care must be taken lest the segment got removed and freed by
+the consumer before the iterator was advanced.
+
+It is recommended that the queue always contain at least one segment, even if
+that segment has never been filled or is entirely spent. This prevents the
+head and tail pointers from collapsing.
+
+
+API Function Reference
+======================
+
+.. kernel-doc:: include/linux/folio_queue.h
diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst
index 6b38a39fab24..a5af2cbf58a5 100644
--- a/Documentation/core-api/genalloc.rst
+++ b/Documentation/core-api/genalloc.rst
@@ -23,7 +23,7 @@ begins with the creation of a pool using one of:
.. kernel-doc:: lib/genalloc.c
:functions: devm_gen_pool_create
-A call to :c:func:`gen_pool_create` will create a pool. The granularity of
+A call to gen_pool_create() will create a pool. The granularity of
allocations is set with min_alloc_order; it is a log-base-2 number like
those used by the page allocator, but it refers to bytes rather than pages.
So, if min_alloc_order is passed as 3, then all allocations will be a
@@ -32,7 +32,7 @@ required to track the memory in the pool. The nid parameter specifies
which NUMA node should be used for the allocation of the housekeeping
structures; it can be -1 if the caller doesn't care.
-The "managed" interface :c:func:`devm_gen_pool_create` ties the pool to a
+The "managed" interface devm_gen_pool_create() ties the pool to a
specific device. Among other things, it will automatically clean up the
pool when the given device is destroyed.
@@ -53,32 +53,32 @@ to the pool. That can be done with one of:
:functions: gen_pool_add
.. kernel-doc:: lib/genalloc.c
- :functions: gen_pool_add_virt
+ :functions: gen_pool_add_owner
-A call to :c:func:`gen_pool_add` will place the size bytes of memory
+A call to gen_pool_add() will place the size bytes of memory
starting at addr (in the kernel's virtual address space) into the given
pool, once again using nid as the node ID for ancillary memory allocations.
-The :c:func:`gen_pool_add_virt` variant associates an explicit physical
+The gen_pool_add_virt() variant associates an explicit physical
address with the memory; this is only necessary if the pool will be used
for DMA allocations.
The functions for allocating memory from the pool (and putting it back)
are:
-.. kernel-doc:: lib/genalloc.c
+.. kernel-doc:: include/linux/genalloc.h
:functions: gen_pool_alloc
.. kernel-doc:: lib/genalloc.c
:functions: gen_pool_dma_alloc
.. kernel-doc:: lib/genalloc.c
- :functions: gen_pool_free
+ :functions: gen_pool_free_owner
-As one would expect, :c:func:`gen_pool_alloc` will allocate size< bytes
-from the given pool. The :c:func:`gen_pool_dma_alloc` variant allocates
+As one would expect, gen_pool_alloc() will allocate size< bytes
+from the given pool. The gen_pool_dma_alloc() variant allocates
memory for use with DMA operations, returning the associated physical
address in the space pointed to by dma. This will only work if the memory
-was added with :c:func:`gen_pool_add_virt`. Note that this function
+was added with gen_pool_add_virt(). Note that this function
departs from the usual genpool pattern of using unsigned long values to
represent kernel addresses; it returns a void * instead.
@@ -89,14 +89,14 @@ return. If that sort of control is needed, the following functions will be
of interest:
.. kernel-doc:: lib/genalloc.c
- :functions: gen_pool_alloc_algo
+ :functions: gen_pool_alloc_algo_owner
.. kernel-doc:: lib/genalloc.c
:functions: gen_pool_set_algo
-Allocations with :c:func:`gen_pool_alloc_algo` specify an algorithm to be
+Allocations with gen_pool_alloc_algo() specify an algorithm to be
used to choose the memory to be allocated; the default algorithm can be set
-with :c:func:`gen_pool_set_algo`. The data value is passed to the
+with gen_pool_set_algo(). The data value is passed to the
algorithm; most ignore it, but it is occasionally needed. One can,
naturally, write a special-purpose algorithm, but there is a fair set
already available:
@@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future.
:functions: gen_pool_for_each_chunk
.. kernel-doc:: lib/genalloc.c
- :functions: addr_in_gen_pool
+ :functions: gen_pool_has_addr
.. kernel-doc:: lib/genalloc.c
:functions: gen_pool_avail
diff --git a/Documentation/core-api/generic-radix-tree.rst b/Documentation/core-api/generic-radix-tree.rst
new file mode 100644
index 000000000000..ed42839ae42f
--- /dev/null
+++ b/Documentation/core-api/generic-radix-tree.rst
@@ -0,0 +1,12 @@
+=================================
+Generic radix trees/sparse arrays
+=================================
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+ :doc: Generic radix trees/sparse arrays
+
+generic radix tree functions
+----------------------------
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+ :functions:
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 4da67b65cecf..582bde9bf5a9 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -26,7 +26,7 @@ Rationale
=========
The original implementation of interrupt handling in Linux uses the
-:c:func:`__do_IRQ` super-handler, which is able to deal with every type of
+__do_IRQ() super-handler, which is able to deal with every type of
interrupt logic.
Originally, Russell King identified different types of handlers to build
@@ -43,7 +43,7 @@ During the implementation we identified another type:
- Fast EOI type
-In the SMP world of the :c:func:`__do_IRQ` super-handler another type was
+In the SMP world of the __do_IRQ() super-handler another type was
identified:
- Per CPU type
@@ -83,7 +83,7 @@ IRQ-flow implementation for 'level type' interrupts and add a
(sub)architecture specific 'edge type' implementation.
To make the transition to the new model easier and prevent the breakage
-of existing implementations, the :c:func:`__do_IRQ` super-handler is still
+of existing implementations, the __do_IRQ() super-handler is still
available. This leads to a kind of duality for the time being. Over time
the new model should be used in more and more architectures, as it
enables smaller and cleaner IRQ subsystems. It's deprecated for three
@@ -116,7 +116,7 @@ status information and pointers to the interrupt flow method and the
interrupt chip structure which are assigned to this interrupt.
Whenever an interrupt triggers, the low-level architecture code calls
-into the generic interrupt code by calling :c:func:`desc->handle_irq`. This
+into the generic interrupt code by calling desc->handle_irq(). This
high-level IRQ handling function only uses desc->irq_data.chip
primitives referenced by the assigned chip descriptor structure.
@@ -125,27 +125,29 @@ High-level Driver API
The high-level Driver API consists of following functions:
-- :c:func:`request_irq`
+- request_irq()
-- :c:func:`free_irq`
+- request_threaded_irq()
-- :c:func:`disable_irq`
+- free_irq()
-- :c:func:`enable_irq`
+- disable_irq()
-- :c:func:`disable_irq_nosync` (SMP only)
+- enable_irq()
-- :c:func:`synchronize_irq` (SMP only)
+- disable_irq_nosync() (SMP only)
-- :c:func:`irq_set_irq_type`
+- synchronize_irq() (SMP only)
-- :c:func:`irq_set_irq_wake`
+- irq_set_irq_type()
-- :c:func:`irq_set_handler_data`
+- irq_set_irq_wake()
-- :c:func:`irq_set_chip`
+- irq_set_handler_data()
-- :c:func:`irq_set_chip_data`
+- irq_set_chip()
+
+- irq_set_chip_data()
See the autogenerated function documentation for details.
@@ -154,19 +156,19 @@ High-level IRQ flow handlers
The generic layer provides a set of pre-defined irq-flow methods:
-- :c:func:`handle_level_irq`
+- handle_level_irq()
-- :c:func:`handle_edge_irq`
+- handle_edge_irq()
-- :c:func:`handle_fasteoi_irq`
+- handle_fasteoi_irq()
-- :c:func:`handle_simple_irq`
+- handle_simple_irq()
-- :c:func:`handle_percpu_irq`
+- handle_percpu_irq()
-- :c:func:`handle_edge_eoi_irq`
+- handle_edge_eoi_irq()
-- :c:func:`handle_bad_irq`
+- handle_bad_irq()
The interrupt flow handlers (either pre-defined or architecture
specific) are assigned to specific interrupts by the architecture either
@@ -208,7 +210,7 @@ implemented (simplified excerpt)::
}
}
- noop(struct irq_data *data))
+ noop(struct irq_data *data)
{
}
@@ -262,7 +264,7 @@ The following control flow is implemented (simplified excerpt)::
desc->irq_data.chip->irq_unmask();
desc->status &= ~pending;
handle_irq_event(desc->action);
- } while (status & pending);
+ } while (desc->status & pending);
desc->status &= ~running;
@@ -325,14 +327,14 @@ Delayed interrupt disable
This per interrupt selectable feature, which was introduced by Russell
King in the ARM interrupt implementation, does not mask an interrupt at
-the hardware level when :c:func:`disable_irq` is called. The interrupt is kept
+the hardware level when disable_irq() is called. The interrupt is kept
enabled and is masked in the flow handler when an interrupt event
happens. This prevents losing edge interrupts on hardware which does not
store an edge interrupt event while the interrupt is disabled at the
hardware level. When an interrupt arrives while the IRQ_DISABLED flag
is set, then the interrupt is masked at the hardware level and the
IRQ_PENDING bit is set. When the interrupt is re-enabled by
-:c:func:`enable_irq` the pending bit is checked and if it is set, the interrupt
+enable_irq() the pending bit is checked and if it is set, the interrupt
is resent either via hardware or by a software resend mechanism. (It's
necessary to enable CONFIG_HARDIRQS_SW_RESEND when you want to use
the delayed interrupt disable feature and your hardware is not capable
@@ -369,7 +371,7 @@ handler(s) to use these basic units of low-level functionality.
__do_IRQ entry point
====================
-The original implementation :c:func:`__do_IRQ` was an alternative entry point
+The original implementation __do_IRQ() was an alternative entry point
for all types of interrupts. It no longer exists.
This handler turned out to be not suitable for all interrupt hardware
@@ -417,6 +419,7 @@ functions which are exported.
.. kernel-doc:: kernel/irq/manage.c
.. kernel-doc:: kernel/irq/chip.c
+ :export:
Internal Functions Provided
===========================
@@ -429,6 +432,7 @@ functions.
.. kernel-doc:: kernel/irq/handle.c
.. kernel-doc:: kernel/irq/chip.c
+ :internal:
Credits
=======
diff --git a/Documentation/core-api/gfp_mask-from-fs-io.rst b/Documentation/core-api/gfp_mask-from-fs-io.rst
index e7c32a8de126..858b2fbcb36c 100644
--- a/Documentation/core-api/gfp_mask-from-fs-io.rst
+++ b/Documentation/core-api/gfp_mask-from-fs-io.rst
@@ -55,14 +55,16 @@ scope.
What about __vmalloc(GFP_NOFS)
==============================
-vmalloc doesn't support GFP_NOFS semantic because there are hardcoded
-GFP_KERNEL allocations deep inside the allocator which are quite non-trivial
-to fix up. That means that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO is
-almost always a bug. The good news is that the NOFS/NOIO semantic can be
-achieved by the scope API.
+Since v5.17, and specifically after the commit 451769ebb7e79 ("mm/vmalloc:
+alloc GFP_NO{FS,IO} for vmalloc"), GFP_NOFS/GFP_NOIO are now supported in
+``[k]vmalloc`` by implicitly using scope API.
+
+In earlier kernels ``vmalloc`` didn't support GFP_NOFS semantic because there
+were hardcoded GFP_KERNEL allocations deep inside the allocator. That means
+that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO was almost always a bug.
In the ideal world, upper layers should already mark dangerous contexts
-and so no special care is required and vmalloc should be called without
-any problems. Sometimes if the context is not really clear or there are
-layering violations then the recommended way around that is to wrap ``vmalloc``
-by the scope API with a comment explaining the problem.
+and so no special care is required and ``vmalloc`` should be called without any
+problems. Sometimes if the context is not really clear or there are layering
+violations then the recommended way around that (on pre-v5.17 kernels) is to
+wrap ``vmalloc`` by the scope API with a comment explaining the problem.
diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst
index a2738050c4f0..18d724867064 100644
--- a/Documentation/core-api/idr.rst
+++ b/Documentation/core-api/idr.rst
@@ -17,51 +17,54 @@ solution to the problem to avoid everybody inventing their own. The IDR
provides the ability to map an ID to a pointer, while the IDA provides
only ID allocation, and as a result is much more memory-efficient.
+The IDR interface is deprecated; please use the :doc:`XArray <xarray>`
+instead.
+
IDR usage
=========
-Start by initialising an IDR, either with :c:func:`DEFINE_IDR`
-for statically allocated IDRs or :c:func:`idr_init` for dynamically
+Start by initialising an IDR, either with DEFINE_IDR()
+for statically allocated IDRs or idr_init() for dynamically
allocated IDRs.
-You can call :c:func:`idr_alloc` to allocate an unused ID. Look up
-the pointer you associated with the ID by calling :c:func:`idr_find`
-and free the ID by calling :c:func:`idr_remove`.
+You can call idr_alloc() to allocate an unused ID. Look up
+the pointer you associated with the ID by calling idr_find()
+and free the ID by calling idr_remove().
If you need to change the pointer associated with an ID, you can call
-:c:func:`idr_replace`. One common reason to do this is to reserve an
+idr_replace(). One common reason to do this is to reserve an
ID by passing a ``NULL`` pointer to the allocation function; initialise the
object with the reserved ID and finally insert the initialised object
into the IDR.
Some users need to allocate IDs larger than ``INT_MAX``. So far all of
these users have been content with a ``UINT_MAX`` limit, and they use
-:c:func:`idr_alloc_u32`. If you need IDs that will not fit in a u32,
+idr_alloc_u32(). If you need IDs that will not fit in a u32,
we will work with you to address your needs.
If you need to allocate IDs sequentially, you can use
-:c:func:`idr_alloc_cyclic`. The IDR becomes less efficient when dealing
+idr_alloc_cyclic(). The IDR becomes less efficient when dealing
with larger IDs, so using this function comes at a slight cost.
To perform an action on all pointers used by the IDR, you can
-either use the callback-based :c:func:`idr_for_each` or the
-iterator-style :c:func:`idr_for_each_entry`. You may need to use
-:c:func:`idr_for_each_entry_continue` to continue an iteration. You can
-also use :c:func:`idr_get_next` if the iterator doesn't fit your needs.
+either use the callback-based idr_for_each() or the
+iterator-style idr_for_each_entry(). You may need to use
+idr_for_each_entry_continue() to continue an iteration. You can
+also use idr_get_next() if the iterator doesn't fit your needs.
-When you have finished using an IDR, you can call :c:func:`idr_destroy`
+When you have finished using an IDR, you can call idr_destroy()
to release the memory used by the IDR. This will not free the objects
pointed to from the IDR; if you want to do that, use one of the iterators
to do it.
-You can use :c:func:`idr_is_empty` to find out whether there are any
+You can use idr_is_empty() to find out whether there are any
IDs currently allocated.
If you need to take a lock while allocating a new ID from the IDR,
you may need to pass a restrictive set of GFP flags, which can lead
to the IDR being unable to allocate memory. To work around this,
-you can call :c:func:`idr_preload` before taking the lock, and then
-:c:func:`idr_preload_end` after the allocation.
+you can call idr_preload() before taking the lock, and then
+idr_preload_end() after the allocation.
.. kernel-doc:: include/linux/idr.h
:doc: idr sync
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 3adee82be311..6cbdcbfa79c3 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -8,33 +8,116 @@ This is the beginning of a manual for core kernel APIs. The conversion
Core utilities
==============
+This section has general and "core core" documentation. The first is a
+massive grab-bag of kerneldoc info left over from the docbook days; it
+should really be broken up someday when somebody finds the energy to do
+it.
+
.. toctree::
:maxdepth: 1
kernel-api
+ workqueue
+ watch_queue
+ printk-basics
+ printk-formats
+ printk-index
+ symbol-namespaces
+ asm-annotations
+ real-time/index
+
+Data structures and low-level utilities
+=======================================
+
+Library functionality that is used throughout the kernel.
+
+.. toctree::
+ :maxdepth: 1
+
+ kobject
+ kref
+ cleanup
assoc_array
- atomic_ops
- cachetlb
- refcount-vs-atomic
- cpu_hotplug
+ folio_queue
+ xarray
+ maple_tree
idr
+ circular-buffers
+ rbtree
+ generic-radix-tree
+ packing
+ this_cpu_ops
+ timekeeping
+ errseq
+ wrappers/atomic_t
+ wrappers/atomic_bitops
+ floating-point
+ union_find
+ min_heap
+ parser
+ list
+
+Low level entry and exit
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ entry
+
+Concurrency primitives
+======================
+
+How Linux keeps everything from happening at the same time. See
+Documentation/locking/index.rst for more related documentation.
+
+.. toctree::
+ :maxdepth: 1
+
+ refcount-vs-atomic
+ irq/index
local_ops
- workqueue
+ padata
+ ../RCU/index
+ wrappers/memory-barriers.rst
+
+Low-level hardware management
+=============================
+
+Cache management, managing CPU hotplug, etc.
+
+.. toctree::
+ :maxdepth: 1
+
+ cachetlb
+ cpu_hotplug
+ memory-hotplug
genericirq
- xarray
- flexible-arrays
- librs
- genalloc
- errseq
- printk-formats
- circular-buffers
+ protection-keys
+
+Memory management
+=================
+
+How to allocate and use memory in the kernel. Note that there is a lot
+more memory-management documentation in Documentation/mm/index.rst.
+
+.. toctree::
+ :maxdepth: 1
+
memory-allocation
+ unaligned-memory-access
+ dma-api
+ dma-api-howto
+ dma-attributes
+ dma-isa-lpc
+ swiotlb
mm-api
- gfp_mask-from-fs-io
- timekeeping
+ cgroup
+ genalloc
+ pin_user_pages
boot-time-mm
- memory-hotplug
-
+ gfp_mask-from-fs-io
+ kho/index
Interfaces for kernel debugging
===============================
@@ -44,8 +127,20 @@ Interfaces for kernel debugging
debug-objects
tracepoint
+ debugging-via-ohci1394
+
+Everything else
+===============
+
+Documents that don't fit elsewhere or which have yet to be categorized.
+
+.. toctree::
+ :maxdepth: 1
+
+ librs
+ netlink
-.. only:: subproject
+.. only:: subproject and html
Indices
=======
diff --git a/Documentation/core-api/irq/concepts.rst b/Documentation/core-api/irq/concepts.rst
new file mode 100644
index 000000000000..7c4564f3cbdf
--- /dev/null
+++ b/Documentation/core-api/irq/concepts.rst
@@ -0,0 +1,25 @@
+===============
+What is an IRQ?
+===============
+
+An IRQ is an interrupt request from a device. Currently, they can come
+in over a pin, or over a packet. Several devices may be connected to
+the same pin thus sharing an IRQ. Such as on legacy PCI bus: All devices
+typically share 4 lanes/pins. Note that each device can request an
+interrupt on each of the lanes.
+
+An IRQ number is a kernel identifier used to talk about a hardware
+interrupt source. Typically, this is an index into the global irq_desc
+array or sparse_irqs tree. But except for what linux/interrupt.h
+implements, the details are architecture specific.
+
+An IRQ number is an enumeration of the possible interrupt sources on a
+machine. Typically, what is enumerated is the number of input pins on
+all of the interrupt controllers in the system. In the case of ISA,
+what is enumerated are the 8 input pins on each of the two i8259
+interrupt controllers.
+
+Architectures can assign additional meaning to the IRQ numbers, and
+are encouraged to in the case where there is any manual configuration
+of the hardware involved. The ISA IRQs are a classic example of
+assigning this kind of additional meaning.
diff --git a/Documentation/core-api/irq/index.rst b/Documentation/core-api/irq/index.rst
new file mode 100644
index 000000000000..0d65d11e5420
--- /dev/null
+++ b/Documentation/core-api/irq/index.rst
@@ -0,0 +1,11 @@
+====
+IRQs
+====
+
+.. toctree::
+ :maxdepth: 1
+
+ concepts
+ irq-affinity
+ irq-domain
+ irqflags-tracing
diff --git a/Documentation/core-api/irq/irq-affinity.rst b/Documentation/core-api/irq/irq-affinity.rst
new file mode 100644
index 000000000000..9cb460cf60b6
--- /dev/null
+++ b/Documentation/core-api/irq/irq-affinity.rst
@@ -0,0 +1,70 @@
+================
+SMP IRQ affinity
+================
+
+ChangeLog:
+ - Started by Ingo Molnar <mingo@redhat.com>
+ - Update by Max Krasnyansky <maxk@qualcomm.com>
+
+
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source. It's a bitmask
+(smp_affinity) or CPU list (smp_affinity_list) of allowed CPUs. It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all CPUs.
+
+/proc/irq/default_smp_affinity specifies default affinity mask that applies
+to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
+will be set to the default mask. It can then be changed as described above.
+Default mask is 0xffffffff.
+
+Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
+it to CPU4-7 (this is an 8-CPU SMP box)::
+
+ [root@moon 44]# cd /proc/irq/44
+ [root@moon 44]# cat smp_affinity
+ ffffffff
+
+ [root@moon 44]# echo 0f > smp_affinity
+ [root@moon 44]# cat smp_affinity
+ 0000000f
+ [root@moon 44]# ping -f h
+ PING hell (195.4.7.3): 56 data bytes
+ ...
+ --- hell ping statistics ---
+ 6029 packets transmitted, 6027 packets received, 0% packet loss
+ round-trip min/avg/max = 0.1/0.1/0.4 ms
+ [root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
+ CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
+ 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1
+
+As can be seen from the line above IRQ44 was delivered only to the first four
+processors (0-3).
+Now lets restrict that IRQ to CPU(4-7).
+
+::
+
+ [root@moon 44]# echo f0 > smp_affinity
+ [root@moon 44]# cat smp_affinity
+ 000000f0
+ [root@moon 44]# ping -f h
+ PING hell (195.4.7.3): 56 data bytes
+ ..
+ --- hell ping statistics ---
+ 2779 packets transmitted, 2777 packets received, 0% packet loss
+ round-trip min/avg/max = 0.1/0.5/585.4 ms
+ [root@moon 44]# cat /proc/interrupts | 'CPU\|44:'
+ CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
+ 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1
+
+This time around IRQ44 was delivered only to the last four processors.
+i.e counters for the CPU0-3 did not change.
+
+Here is an example of limiting that same IRQ (44) to CPUs 1024 to 1031::
+
+ [root@moon 44]# echo 1024-1031 > smp_affinity_list
+ [root@moon 44]# cat smp_affinity_list
+ 1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
new file mode 100644
index 000000000000..68eb2612e8a4
--- /dev/null
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -0,0 +1,320 @@
+===============================================
+The irq_domain Interrupt Number Mapping Library
+===============================================
+
+The current design of the Linux kernel uses a single large number
+space where each separate IRQ source is assigned a unique number.
+This is simple when there is only one interrupt controller. But in
+systems with multiple interrupt controllers, the kernel must ensure
+that each one gets assigned non-overlapping allocations of Linux
+IRQ numbers.
+
+The number of interrupt controllers registered as unique irqchips
+shows a rising tendency. For example, subdrivers of different kinds
+such as GPIO controllers avoid reimplementing identical callback
+mechanisms as the IRQ core system by modelling their interrupt
+handlers as irqchips. I.e. in effect cascading interrupt controllers.
+
+So in the past, IRQ numbers could be chosen so that they match the
+hardware IRQ line into the root interrupt controller (i.e. the
+component actually firing the interrupt line to the CPU). Nowadays,
+this number is just a number and the number has no
+relationship to hardware interrupt numbers.
+
+For this reason, we need a mechanism to separate controller-local
+interrupt numbers, called hardware IRQs, from Linux IRQ numbers.
+
+The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
+IRQ numbers, but they don't provide any support for reverse mapping of
+the controller-local IRQ (hwirq) number into the Linux IRQ number
+space.
+
+The irq_domain library adds a mapping between hwirq and IRQ numbers on
+top of the irq_alloc_desc*() API. An irq_domain to manage the mapping
+is preferred over interrupt controller drivers open coding their own
+reverse mapping scheme.
+
+irq_domain also implements a translation from an abstract struct
+irq_fwspec to hwirq numbers (Device Tree, non-DT firmware node, ACPI
+GSI, and software node so far), and can be easily extended to support
+other IRQ topology data sources. The implementation is performed
+without any extra platform support code.
+
+irq_domain Usage
+================
+struct irq_domain could be defined as an irq domain controller. That
+is, it handles the mapping between hardware and virtual interrupt
+numbers for a given interrupt domain. The domain structure is
+generally created by the PIC code for a given PIC instance (though a
+domain can cover more than one PIC if they have a flat number model).
+It is the domain callbacks that are responsible for setting the
+irq_chip on a given irq_desc after it has been mapped.
+
+The host code and data structures use a fwnode_handle pointer to
+identify the domain. In some cases, and in order to preserve source
+code compatibility, this fwnode pointer is "upgraded" to a DT
+device_node. For those firmware infrastructures that do not provide a
+unique identifier for an interrupt controller, the irq_domain code
+offers a fwnode allocator.
+
+An interrupt controller driver creates and registers a struct irq_domain
+by calling one of the irq_domain_create_*() functions (each mapping
+method has a different allocator function, more on that later). The
+function will return a pointer to the struct irq_domain on success. The
+caller must provide the allocator function with a struct irq_domain_ops
+pointer.
+
+In most cases, the irq_domain will begin empty without any mappings
+between hwirq and IRQ numbers. Mappings are added to the irq_domain
+by calling irq_create_mapping() which accepts the irq_domain and a
+hwirq number as arguments. If a mapping for the hwirq doesn't already
+exist, irq_create_mapping() allocates a new Linux irq_desc, associates
+it with the hwirq, and calls the :c:member:`irq_domain_ops.map()`
+callback. In there, the driver can perform any required hardware
+setup.
+
+Once a mapping has been established, it can be retrieved or used via a
+variety of methods:
+
+- irq_resolve_mapping() returns a pointer to the irq_desc structure
+ for a given domain and hwirq number, or NULL if there was no
+ mapping.
+- irq_find_mapping() returns a Linux IRQ number for a given domain and
+ hwirq number, or 0 if there was no mapping
+- generic_handle_domain_irq() handles an interrupt described by a
+ domain and a hwirq number
+
+Note that irq_domain lookups must happen in contexts that are
+compatible with an RCU read-side critical section.
+
+The irq_create_mapping() function must be called *at least once*
+before any call to irq_find_mapping(), lest the descriptor will not
+be allocated.
+
+If the driver has the Linux IRQ number or the irq_data pointer, and
+needs to know the associated hwirq number (such as in the irq_chip
+callbacks) then it can be directly obtained from
+:c:member:`irq_data.hwirq`.
+
+Types of irq_domain Mappings
+============================
+
+There are several mechanisms available for reverse mapping from hwirq
+to Linux IRQ, and each mechanism uses a different allocation function.
+Which reverse map type should be used depends on the use case. Each
+of the reverse map types are described below:
+
+Linear
+------
+
+::
+
+ irq_domain_create_linear()
+
+The linear reverse map maintains a fixed-size table indexed by the
+hwirq number. When a hwirq is mapped, an irq_desc is allocated for
+the hwirq, and the IRQ number is stored in the table.
+
+The Linear map is a good choice when the maximum number of hwirqs is
+fixed and a relatively small number (~ < 256). The advantages of this
+map are fixed-time lookup for IRQ numbers, and irq_descs are only
+allocated for in-use IRQs. The disadvantage is that the table must be
+as large as the largest possible hwirq number.
+
+The majority of drivers should use the Linear map.
+
+Tree
+----
+
+::
+
+ irq_domain_create_tree()
+
+The irq_domain maintains a radix tree map from hwirq numbers to Linux
+IRQs. When an hwirq is mapped, an irq_desc is allocated and the
+hwirq is used as the lookup key for the radix tree.
+
+The Tree map is a good choice if the hwirq number can be very large
+since it doesn't need to allocate a table as large as the largest
+hwirq number. The disadvantage is that hwirq to IRQ number lookup is
+dependent on how many entries are in the table.
+
+Very few drivers should need this mapping.
+
+No Map
+------
+
+::
+
+ irq_domain_create_nomap()
+
+The No Map mapping is to be used when the hwirq number is
+programmable in the hardware. In this case it is best to program the
+Linux IRQ number into the hardware itself so that no mapping is
+required. Calling irq_create_direct_mapping() will allocate a Linux
+IRQ number and call the .map() callback so that driver can program the
+Linux IRQ number into the hardware.
+
+Most drivers cannot use this mapping, and it is now gated on the
+CONFIG_IRQ_DOMAIN_NOMAP option. Please refrain from introducing new
+users of this API.
+
+Legacy
+------
+
+::
+
+ irq_domain_create_simple()
+ irq_domain_create_legacy()
+
+The Legacy mapping is a special case for drivers that already have a
+range of irq_descs allocated for the hwirqs. It is used when the
+driver cannot be immediately converted to use the Linear mapping. For
+example, many embedded system board support files use a set of #defines
+for IRQ numbers that are passed to struct device registrations. In that
+case the Linux IRQ numbers cannot be dynamically assigned and the Legacy
+mapping should be used.
+
+As the name implies, the \*_legacy() functions are deprecated and only
+exist to ease the support of ancient platforms. No new users should be
+added. Same goes for the \*_simple() functions when their use results
+in the legacy behaviour.
+
+The Legacy map assumes a contiguous range of IRQ numbers has already
+been allocated for the controller and that the IRQ number can be
+calculated by adding a fixed offset to the hwirq number, and
+visa-versa. The disadvantage is that it requires the interrupt
+controller to manage IRQ allocations and it requires an irq_desc to be
+allocated for every hwirq, even if it is unused.
+
+The Legacy map should only be used if fixed IRQ mappings must be
+supported. For example, ISA controllers would use the Legacy map for
+mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
+numbers.
+
+Most users of legacy mappings should use irq_domain_create_simple()
+which will use a legacy domain only if an IRQ range is supplied by the
+system and will otherwise use a linear domain mapping. The semantics of
+this call are such that if an IRQ range is specified then descriptors
+will be allocated on-the-fly for it, and if no range is specified it
+will fall through to irq_domain_create_linear() which means *no* IRQ
+descriptors will be allocated.
+
+A typical use case for simple domains is where an irqchip provider
+is supporting both dynamic and static IRQ assignments.
+
+In order to avoid ending up in a situation where a linear domain is
+used and no descriptor gets allocated it is very important to make sure
+that the driver using the simple domain call irq_create_mapping()
+before any irq_find_mapping() since the latter will actually work
+for the static IRQ assignment case.
+
+Hierarchy IRQ Domain
+--------------------
+
+On some architectures, there may be multiple interrupt controllers
+involved in delivering an interrupt from the device to the target CPU.
+Let's look at a typical interrupt delivery path on x86 platforms::
+
+ Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
+
+There are three interrupt controllers involved:
+
+1) IOAPIC controller
+2) Interrupt remapping controller
+3) Local APIC controller
+
+To support such a hardware topology and make software architecture match
+hardware architecture, an irq_domain data structure is built for each
+interrupt controller and those irq_domains are organized into hierarchy.
+When building irq_domain hierarchy, the irq_domain nearest the device is
+child and the irq_domain nearest the CPU is parent. So a hierarchy structure
+as below will be built for the example above::
+
+ CPU Vector irq_domain (root irq_domain to manage CPU vectors)
+ ^
+ |
+ Interrupt Remapping irq_domain (manage irq_remapping entries)
+ ^
+ |
+ IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
+
+There are four major interfaces to use hierarchy irq_domain:
+
+1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
+ controller related resources to deliver these interrupts.
+2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
+ related resources associated with these interrupts.
+3) irq_domain_activate_irq(): activate interrupt controller hardware to
+ deliver the interrupt.
+4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
+ to stop delivering the interrupt.
+
+The following is needed to support hierarchy irq_domain:
+
+1) The :c:member:`parent` field in struct irq_domain is used to
+ maintain irq_domain hierarchy information.
+2) The :c:member:`parent_data` field in struct irq_data is used to
+ build hierarchy irq_data to match hierarchy irq_domains. The
+ irq_data is used to store irq_domain pointer and hardware irq
+ number.
+3) The :c:member:`alloc()`, :c:member:`free()`, and other callbacks in
+ struct irq_domain_ops to support hierarchy irq_domain operations.
+
+With the support of hierarchy irq_domain and hierarchy irq_data ready,
+an irq_domain structure is built for each interrupt controller, and an
+irq_data structure is allocated for each irq_domain associated with an
+IRQ.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+
+1) Implement irq_domain_ops.alloc() and irq_domain_ops.free()
+2) Optionally, implement irq_domain_ops.activate() and
+ irq_domain_ops.deactivate().
+3) Optionally, implement an irq_chip to manage the interrupt controller
+ hardware.
+4) There is no need to implement irq_domain_ops.map() and
+ irq_domain_ops.unmap(). They are unused with hierarchy irq_domain.
+
+Note the hierarchy irq_domain is in no way x86-specific, and is
+heavily used to support other architectures, such as ARM, ARM64 etc.
+
+Stacked irq_chip
+~~~~~~~~~~~~~~~~
+
+Now, we could go one step further to support stacked (hierarchy)
+irq_chip. That is, an irq_chip is associated with each irq_data along
+the hierarchy. A child irq_chip may implement a required action by
+itself or by cooperating with its parent irq_chip.
+
+With stacked irq_chip, interrupt controller driver only needs to deal
+with the hardware managed by itself and may ask for services from its
+parent irq_chip when needed. So we could achieve a much cleaner
+software architecture.
+
+Debugging
+=========
+
+Most of the internals of the IRQ subsystem are exposed in debugfs by
+turning CONFIG_GENERIC_IRQ_DEBUGFS on.
+
+Structures and Public Functions Provided
+========================================
+
+This chapter contains the autogenerated documentation of the structures
+and exported kernel API functions which are used for IRQ domains.
+
+.. kernel-doc:: include/linux/irqdomain.h
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :export:
+
+Internal Functions Provided
+===========================
+
+This chapter contains the autogenerated documentation of the internal
+functions.
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :internal:
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/core-api/irq/irqflags-tracing.rst
index bdd208259fb3..bdd208259fb3 100644
--- a/Documentation/irqflags-tracing.txt
+++ b/Documentation/core-api/irq/irqflags-tracing.rst
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 3431337ee4e6..e8211c4ca662 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -3,12 +3,6 @@ The Linux Kernel API
====================
-List Management Functions
-=========================
-
-.. kernel-doc:: include/linux/list.h
- :internal:
-
Basic C Library Functions
=========================
@@ -24,21 +18,27 @@ String Conversions
.. kernel-doc:: lib/vsprintf.c
:export:
-.. kernel-doc:: include/linux/kernel.h
- :functions: kstrtol
-
-.. kernel-doc:: include/linux/kernel.h
- :functions: kstrtoul
+.. kernel-doc:: include/linux/kstrtox.h
+ :functions: kstrtol kstrtoul
.. kernel-doc:: lib/kstrtox.c
:export:
+.. kernel-doc:: lib/string_helpers.c
+ :export:
+
String Manipulation
-------------------
+.. kernel-doc:: include/linux/fortify-string.h
+ :internal:
+
.. kernel-doc:: lib/string.c
:export:
+.. kernel-doc:: include/linux/string.h
+ :internal:
+
.. kernel-doc:: mm/util.c
:functions: kstrdup kstrdup_const kstrndup kmemdup kmemdup_nul memdup_user
vmemdup_user strndup_user memdup_user_nul
@@ -51,7 +51,13 @@ The Linux kernel provides more basic utility functions.
Bit Operations
--------------
-.. kernel-doc:: arch/x86/include/asm/bitops.h
+.. kernel-doc:: include/asm-generic/bitops/instrumented-atomic.h
+ :internal:
+
+.. kernel-doc:: include/asm-generic/bitops/instrumented-non-atomic.h
+ :internal:
+
+.. kernel-doc:: include/asm-generic/bitops/instrumented-lock.h
:internal:
Bitmap Operations
@@ -84,6 +90,12 @@ Command-line Parsing
.. kernel-doc:: lib/cmdline.c
:export:
+Error Pointers
+--------------
+
+.. kernel-doc:: include/linux/err.h
+ :internal:
+
Sorting
-------
@@ -109,35 +121,54 @@ Text Searching
CRC and Math Functions in Linux
===============================
+Arithmetic Overflow Checking
+----------------------------
+
+.. kernel-doc:: include/linux/overflow.h
+ :internal:
+
CRC Functions
-------------
-.. kernel-doc:: lib/crc4.c
+.. kernel-doc:: lib/crc/crc4.c
:export:
-.. kernel-doc:: lib/crc7.c
+.. kernel-doc:: lib/crc/crc7.c
:export:
-.. kernel-doc:: lib/crc8.c
+.. kernel-doc:: lib/crc/crc8.c
:export:
-.. kernel-doc:: lib/crc16.c
+.. kernel-doc:: lib/crc/crc16.c
:export:
-.. kernel-doc:: lib/crc32.c
-
-.. kernel-doc:: lib/crc-ccitt.c
+.. kernel-doc:: lib/crc/crc-ccitt.c
:export:
-.. kernel-doc:: lib/crc-itu-t.c
+.. kernel-doc:: lib/crc/crc-itu-t.c
:export:
+.. kernel-doc:: include/linux/crc32.h
+
+.. kernel-doc:: include/linux/crc64.h
+
Base 2 log and power Functions
------------------------------
.. kernel-doc:: include/linux/log2.h
:internal:
+Integer log and power Functions
+-------------------------------
+
+.. kernel-doc:: include/linux/int_log.h
+
+.. kernel-doc:: lib/math/int_pow.c
+ :export:
+
+.. kernel-doc:: lib/math/int_sqrt.c
+ :export:
+
Division Functions
------------------
@@ -147,10 +178,7 @@ Division Functions
.. kernel-doc:: include/linux/math64.h
:internal:
-.. kernel-doc:: lib/div64.c
- :functions: div_s64_rem div64_u64_rem div64_u64 div64_s64
-
-.. kernel-doc:: lib/gcd.c
+.. kernel-doc:: lib/math/gcd.c
:export:
UUID/GUID
@@ -196,26 +224,38 @@ relay interface
Module Support
==============
-Module Loading
---------------
+Kernel module auto-loading
+--------------------------
-.. kernel-doc:: kernel/kmod.c
+.. kernel-doc:: kernel/module/kmod.c
:export:
+Module debugging
+----------------
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: module debugging statistics overview
+
+dup_failed_modules - tracks duplicate failed modules
+****************************************************
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: dup_failed_modules - tracks duplicate failed modules
+
+module statistics debugfs counters
+**********************************
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: module statistics debugfs counters
+
Inter Module support
--------------------
-Refer to the file kernel/module.c for more information.
+Refer to the files in kernel/module/ for more information.
Hardware Interfaces
===================
-Interrupt Handling
-------------------
-
-.. kernel-doc:: kernel/irq/manage.c
- :export:
-
DMA Channels
------------
@@ -267,6 +307,7 @@ Accounting Framework
Block Devices
=============
+.. kernel-doc:: include/linux/bio.h
.. kernel-doc:: block/blk-core.c
:export:
@@ -282,21 +323,12 @@ Block Devices
.. kernel-doc:: block/blk-settings.c
:export:
-.. kernel-doc:: block/blk-exec.c
- :export:
-
.. kernel-doc:: block/blk-flush.c
:export:
.. kernel-doc:: block/blk-lib.c
:export:
-.. kernel-doc:: block/blk-tag.c
- :export:
-
-.. kernel-doc:: block/blk-tag.c
- :internal:
-
.. kernel-doc:: block/blk-integrity.c
:export:
@@ -309,6 +341,9 @@ Block Devices
.. kernel-doc:: block/genhd.c
:export:
+.. kernel-doc:: block/bdev.c
+ :export:
+
Char devices
============
@@ -362,14 +397,8 @@ Read-Copy Update (RCU)
.. kernel-doc:: include/linux/rcupdate.h
-.. kernel-doc:: include/linux/rcupdate_wait.h
-
-.. kernel-doc:: include/linux/rcutree.h
-
.. kernel-doc:: kernel/rcu/tree.c
-.. kernel-doc:: kernel/rcu/tree_plugin.h
-
.. kernel-doc:: kernel/rcu/tree_exp.h
.. kernel-doc:: kernel/rcu/update.c
@@ -387,3 +416,15 @@ Read-Copy Update (RCU)
.. kernel-doc:: include/linux/rcu_sync.h
.. kernel-doc:: kernel/rcu/sync.c
+
+.. kernel-doc:: kernel/rcu/tasks.h
+
+.. kernel-doc:: kernel/rcu/tree_stall.h
+
+.. kernel-doc:: include/linux/rcupdate_trace.h
+
+.. kernel-doc:: include/linux/rcupdate_wait.h
+
+.. kernel-doc:: include/linux/rcuref.h
+
+.. kernel-doc:: include/linux/rcutree.h
diff --git a/Documentation/core-api/kho/bindings/kho.yaml b/Documentation/core-api/kho/bindings/kho.yaml
new file mode 100644
index 000000000000..11e8ab7b219d
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/kho.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Kexec HandOver (KHO) root tree
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+ - Changyuan Lyu <changyuanl@google.com>
+
+description: |
+ System memory preserved by KHO across kexec.
+
+properties:
+ compatible:
+ enum:
+ - kho-v1
+
+ preserved-memory-map:
+ description: |
+ physical address (u64) of an in-memory structure describing all preserved
+ folios and memory ranges.
+
+patternProperties:
+ "$[0-9a-f_]+^":
+ $ref: sub-fdt.yaml#
+ description: physical address of a KHO user's own FDT.
+
+required:
+ - compatible
+ - preserved-memory-map
+
+additionalProperties: false
+
+examples:
+ - |
+ kho {
+ compatible = "kho-v1";
+ preserved-memory-map = <0xf0be16 0x1000000>;
+
+ memblock {
+ fdt = <0x80cc16 0x1000000>;
+ };
+ };
diff --git a/Documentation/core-api/kho/bindings/memblock/memblock.yaml b/Documentation/core-api/kho/bindings/memblock/memblock.yaml
new file mode 100644
index 000000000000..d388c28eb91d
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/memblock/memblock.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Memblock reserved memory
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+
+description: |
+ Memblock can serialize its current memory reservations created with
+ reserve_mem command line option across kexec through KHO.
+ The post-KHO kernel can then consume these reservations and they are
+ guaranteed to have the same physical address.
+
+properties:
+ compatible:
+ enum:
+ - reserve-mem-v1
+
+patternProperties:
+ "$[0-9a-f_]+^":
+ $ref: reserve-mem.yaml#
+ description: reserved memory regions
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ memblock {
+ compatible = "memblock-v1";
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
+ };
diff --git a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
new file mode 100644
index 000000000000..10282d3d1bcd
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Memblock reserved memory regions
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+
+description: |
+ Memblock can serialize its current memory reservations created with
+ reserve_mem command line option across kexec through KHO.
+ This object describes each such region.
+
+properties:
+ compatible:
+ enum:
+ - reserve-mem-v1
+
+ start:
+ description: |
+ physical address (u64) of the reserved memory region.
+
+ size:
+ description: |
+ size (u64) of the reserved memory region.
+
+required:
+ - compatible
+ - start
+ - size
+
+additionalProperties: false
+
+examples:
+ - |
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
diff --git a/Documentation/core-api/kho/bindings/sub-fdt.yaml b/Documentation/core-api/kho/bindings/sub-fdt.yaml
new file mode 100644
index 000000000000..b9a3d2d24850
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/sub-fdt.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: KHO users' FDT address
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+ - Changyuan Lyu <changyuanl@google.com>
+
+description: |
+ Physical address of an FDT blob registered by a KHO user.
+
+properties:
+ fdt:
+ description: |
+ physical address (u64) of an FDT blob.
+
+required:
+ - fdt
+
+additionalProperties: false
+
+examples:
+ - |
+ memblock {
+ fdt = <0x80cc16 0x1000000>;
+ };
diff --git a/Documentation/core-api/kho/concepts.rst b/Documentation/core-api/kho/concepts.rst
new file mode 100644
index 000000000000..36d5c05cfb30
--- /dev/null
+++ b/Documentation/core-api/kho/concepts.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+.. _kho-concepts:
+
+=======================
+Kexec Handover Concepts
+=======================
+
+Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
+regions, which could contain serialized system states, across kexec.
+
+It introduces multiple concepts:
+
+KHO FDT
+=======
+
+Every KHO kexec carries a KHO specific flattened device tree (FDT) blob
+that describes preserved memory regions. These regions contain either
+serialized subsystem states, or in-memory data that shall not be touched
+across kexec. After KHO, subsystems can retrieve and restore preserved
+memory regions from KHO FDT.
+
+KHO only uses the FDT container format and libfdt library, but does not
+adhere to the same property semantics that normal device trees do: Properties
+are passed in native endianness and standardized properties like ``regs`` and
+``ranges`` do not exist, hence there are no ``#...-cells`` properties.
+
+KHO is still under development. The FDT schema is unstable and would change
+in the future.
+
+Scratch Regions
+===============
+
+To boot into kexec, we need to have a physically contiguous memory range that
+contains no handed over memory. Kexec then places the target kernel and initrd
+into that region. The new kernel exclusively uses this region for memory
+allocations before during boot up to the initialization of the page allocator.
+
+We guarantee that we always have such regions through the scratch regions: On
+first boot KHO allocates several physically contiguous memory regions. Since
+after kexec these regions will be used by early memory allocations, there is a
+scratch region per NUMA node plus a scratch region to satisfy allocations
+requests that do not require particular NUMA node assignment.
+By default, size of the scratch region is calculated based on amount of memory
+allocated during boot. The ``kho_scratch`` kernel command line option may be
+used to explicitly define size of the scratch regions.
+The scratch regions are declared as CMA when page allocator is initialized so
+that their memory can be used during system lifetime. CMA gives us the
+guarantee that no handover pages land in that region, because handover pages
+must be at a static physical memory location and CMA enforces that only
+movable pages can be located inside.
+
+After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and
+instead reuse the exact same region that was originally allocated. This allows
+us to recursively execute any amount of KHO kexecs. Because we used this region
+for boot memory allocations and as target memory for kexec blobs, some parts
+of that memory region may be reserved. These reservations are irrelevant for
+the next KHO, because kexec can overwrite even the original kernel.
+
+.. _kho-finalization-phase:
+
+KHO finalization phase
+======================
+
+To enable user space based kexec file loader, the kernel needs to be able to
+provide the FDT that describes the current kernel's state before
+performing the actual kexec. The process of generating that FDT is
+called serialization. When the FDT is generated, some properties
+of the system may become immutable because they are already written down
+in the FDT. That state is called the KHO finalization phase.
+
+Public API
+==========
+.. kernel-doc:: kernel/kexec_handover.c
+ :export:
diff --git a/Documentation/core-api/kho/fdt.rst b/Documentation/core-api/kho/fdt.rst
new file mode 100644
index 000000000000..62505285d60d
--- /dev/null
+++ b/Documentation/core-api/kho/fdt.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+=======
+KHO FDT
+=======
+
+KHO uses the flattened device tree (FDT) container format and libfdt
+library to create and parse the data that is passed between the
+kernels. The properties in KHO FDT are stored in native format.
+It includes the physical address of an in-memory structure describing
+all preserved memory regions, as well as physical addresses of KHO users'
+own FDTs. Interpreting those sub FDTs is the responsibility of KHO users.
+
+KHO nodes and properties
+========================
+
+Property ``preserved-memory-map``
+---------------------------------
+
+KHO saves a special property named ``preserved-memory-map`` under the root node.
+This node contains the physical address of an in-memory structure for KHO to
+preserve memory regions across kexec.
+
+Property ``compatible``
+-----------------------
+
+The ``compatible`` property determines compatibility between the kernel
+that created the KHO FDT and the kernel that attempts to load it.
+If the kernel that loads the KHO FDT is not compatible with it, the entire
+KHO process will be bypassed.
+
+Property ``fdt``
+----------------
+
+Generally, a KHO user serialize its state into its own FDT and instructs
+KHO to preserve the underlying memory, such that after kexec, the new kernel
+can recover its state from the preserved FDT.
+
+A KHO user thus can create a node in KHO root tree and save the physical address
+of its own FDT in that node's property ``fdt`` .
+
+Examples
+========
+
+The following example demonstrates KHO FDT that preserves two memory
+regions created with ``reserve_mem`` kernel command line parameter::
+
+ /dts-v1/;
+
+ / {
+ compatible = "kho-v1";
+
+ preserved-memory-map = <0x40be16 0x1000000>;
+
+ memblock {
+ fdt = <0x1517 0x1000000>;
+ };
+ };
+
+where the ``memblock`` node contains an FDT that is requested by the
+subsystem memblock for preservation. The FDT contains the following
+serialized data::
+
+ /dts-v1/;
+
+ / {
+ compatible = "memblock-v1";
+
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
+
+ n2 {
+ compatible = "reserve-mem-v1";
+ start = <0xc067 0x4000000>;
+ size = <0x04 0x00>;
+ };
+ };
diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst
new file mode 100644
index 000000000000..0c63b0c5c143
--- /dev/null
+++ b/Documentation/core-api/kho/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+========================
+Kexec Handover Subsystem
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ concepts
+ fdt
+
+.. only:: subproject and html
diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst
new file mode 100644
index 000000000000..7310247310a0
--- /dev/null
+++ b/Documentation/core-api/kobject.rst
@@ -0,0 +1,434 @@
+=====================================================================
+Everything you never wanted to know about kobjects, ksets, and ktypes
+=====================================================================
+
+:Author: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+:Last updated: December 19, 2007
+
+Based on an original article by Jon Corbet for lwn.net written October 1,
+2003 and located at https://lwn.net/Articles/51437/
+
+Part of the difficulty in understanding the driver model - and the kobject
+abstraction upon which it is built - is that there is no obvious starting
+place. Dealing with kobjects requires understanding a few different types,
+all of which make reference to each other. In an attempt to make things
+easier, we'll take a multi-pass approach, starting with vague terms and
+adding detail as we go. To that end, here are some quick definitions of
+some terms we will be working with.
+
+ - A kobject is an object of type struct kobject. Kobjects have a name
+ and a reference count. A kobject also has a parent pointer (allowing
+ objects to be arranged into hierarchies), a specific type, and,
+ usually, a representation in the sysfs virtual filesystem.
+
+ Kobjects are generally not interesting on their own; instead, they are
+ usually embedded within some other structure which contains the stuff
+ the code is really interested in.
+
+ No structure should **EVER** have more than one kobject embedded within it.
+ If it does, the reference counting for the object is sure to be messed
+ up and incorrect, and your code will be buggy. So do not do this.
+
+ - A ktype is the type of object that embeds a kobject. Every structure
+ that embeds a kobject needs a corresponding ktype. The ktype controls
+ what happens to the kobject when it is created and destroyed.
+
+ - A kset is a group of kobjects. These kobjects can be of the same ktype
+ or belong to different ktypes. The kset is the basic container type for
+ collections of kobjects. Ksets contain their own kobjects, but you can
+ safely ignore that implementation detail as the kset core code handles
+ this kobject automatically.
+
+ When you see a sysfs directory full of other directories, generally each
+ of those directories corresponds to a kobject in the same kset.
+
+We'll look at how to create and manipulate all of these types. A bottom-up
+approach will be taken, so we'll go back to kobjects.
+
+
+Embedding kobjects
+==================
+
+It is rare for kernel code to create a standalone kobject, with one major
+exception explained below. Instead, kobjects are used to control access to
+a larger, domain-specific object. To this end, kobjects will be found
+embedded in other structures. If you are used to thinking of things in
+object-oriented terms, kobjects can be seen as a top-level, abstract class
+from which other classes are derived. A kobject implements a set of
+capabilities which are not particularly useful by themselves, but are
+nice to have in other objects. The C language does not allow for the
+direct expression of inheritance, so other techniques - such as structure
+embedding - must be used.
+
+(As an aside, for those familiar with the kernel linked list implementation,
+this is analogous as to how "list_head" structs are rarely useful on
+their own, but are invariably found embedded in the larger objects of
+interest.)
+
+So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that
+defines the memory region associated with a uio device::
+
+ struct uio_map {
+ struct kobject kobj;
+ struct uio_mem *mem;
+ };
+
+If you have a struct uio_map structure, finding its embedded kobject is
+just a matter of using the kobj member. Code that works with kobjects will
+often have the opposite problem, however: given a struct kobject pointer,
+what is the pointer to the containing structure? You must avoid tricks
+(such as assuming that the kobject is at the beginning of the structure)
+and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::
+
+ container_of(ptr, type, member)
+
+where:
+
+ * ``ptr`` is the pointer to the embedded kobject,
+ * ``type`` is the type of the containing structure, and
+ * ``member`` is the name of the structure field to which ``pointer`` points.
+
+The return value from container_of() is a pointer to the corresponding
+container type. So, for example, a pointer ``kp`` to a struct kobject
+embedded **within** a struct uio_map could be converted to a pointer to the
+**containing** uio_map structure with::
+
+ struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
+
+For convenience, programmers often define a simple macro for **back-casting**
+kobject pointers to the containing type. Exactly this happens in the
+earlier ``drivers/uio/uio.c``, as you can see here::
+
+ struct uio_map {
+ struct kobject kobj;
+ struct uio_mem *mem;
+ };
+
+ #define to_map(map) container_of(map, struct uio_map, kobj)
+
+where the macro argument "map" is a pointer to the struct kobject in
+question. That macro is subsequently invoked with::
+
+ struct uio_map *map = to_map(kobj);
+
+
+Initialization of kobjects
+==========================
+
+Code which creates a kobject must, of course, initialize that object. Some
+of the internal fields are setup with a (mandatory) call to kobject_init()::
+
+ void kobject_init(struct kobject *kobj, const struct kobj_type *ktype);
+
+The ktype is required for a kobject to be created properly, as every kobject
+must have an associated kobj_type. After calling kobject_init(), to
+register the kobject with sysfs, the function kobject_add() must be called::
+
+ int kobject_add(struct kobject *kobj, struct kobject *parent,
+ const char *fmt, ...);
+
+This sets up the parent of the kobject and the name for the kobject
+properly. If the kobject is to be associated with a specific kset,
+kobj->kset must be assigned before calling kobject_add(). If a kset is
+associated with a kobject, then the parent for the kobject can be set to
+NULL in the call to kobject_add() and then the kobject's parent will be the
+kset itself.
+
+As the name of the kobject is set when it is added to the kernel, the name
+of the kobject should never be manipulated directly. If you must change
+the name of the kobject, call kobject_rename()::
+
+ int kobject_rename(struct kobject *kobj, const char *new_name);
+
+kobject_rename() does not perform any locking or have a solid notion of
+what names are valid so the caller must provide their own sanity checking
+and serialization.
+
+There is a function called kobject_set_name() but that is legacy cruft and
+is being removed. If your code needs to call this function, it is
+incorrect and needs to be fixed.
+
+To properly access the name of the kobject, use the function
+kobject_name()::
+
+ const char *kobject_name(const struct kobject * kobj);
+
+There is a helper function to both initialize and add the kobject to the
+kernel at the same time, called surprisingly enough kobject_init_and_add()::
+
+ int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype,
+ struct kobject *parent, const char *fmt, ...);
+
+The arguments are the same as the individual kobject_init() and
+kobject_add() functions described above.
+
+
+Uevents
+=======
+
+After a kobject has been registered with the kobject core, you need to
+announce to the world that it has been created. This can be done with a
+call to kobject_uevent()::
+
+ int kobject_uevent(struct kobject *kobj, enum kobject_action action);
+
+Use the **KOBJ_ADD** action for when the kobject is first added to the kernel.
+This should be done only after any attributes or children of the kobject
+have been initialized properly, as userspace will instantly start to look
+for them when this call happens.
+
+When the kobject is removed from the kernel (details on how to do that are
+below), the uevent for **KOBJ_REMOVE** will be automatically created by the
+kobject core, so the caller does not have to worry about doing that by
+hand.
+
+
+Reference counts
+================
+
+One of the key functions of a kobject is to serve as a reference counter
+for the object in which it is embedded. As long as references to the object
+exist, the object (and the code which supports it) must continue to exist.
+The low-level functions for manipulating a kobject's reference counts are::
+
+ struct kobject *kobject_get(struct kobject *kobj);
+ void kobject_put(struct kobject *kobj);
+
+A successful call to kobject_get() will increment the kobject's reference
+counter and return the pointer to the kobject.
+
+When a reference is released, the call to kobject_put() will decrement the
+reference count and, possibly, free the object. Note that kobject_init()
+sets the reference count to one, so the code which sets up the kobject will
+need to do a kobject_put() eventually to release that reference.
+
+Because kobjects are dynamic, they must not be declared statically or on
+the stack, but instead, always allocated dynamically. Future versions of
+the kernel will contain a run-time check for kobjects that are created
+statically and will warn the developer of this improper usage.
+
+If all that you want to use a kobject for is to provide a reference counter
+for your structure, please use the struct kref instead; a kobject would be
+overkill. For more information on how to use struct kref, please see the
+file Documentation/core-api/kref.rst in the Linux kernel source tree.
+
+
+Creating "simple" kobjects
+==========================
+
+Sometimes all that a developer wants is a way to create a simple directory
+in the sysfs hierarchy, and not have to mess with the whole complication of
+ksets, show and store functions, and other details. This is the one
+exception where a single kobject should be created. To create such an
+entry, use the function::
+
+ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent);
+
+This function will create a kobject and place it in sysfs in the location
+underneath the specified parent kobject. To create simple attributes
+associated with this kobject, use::
+
+ int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
+
+or::
+
+ int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp);
+
+Both types of attributes used here, with a kobject that has been created
+with the kobject_create_and_add(), can be of type kobj_attribute, so no
+special custom attribute is needed to be created.
+
+See the example module, ``samples/kobject/kobject-example.c`` for an
+implementation of a simple kobject and attributes.
+
+
+
+ktypes and release methods
+==========================
+
+One important thing still missing from the discussion is what happens to a
+kobject when its reference count reaches zero. The code which created the
+kobject generally does not know when that will happen; if it did, there
+would be little point in using a kobject in the first place. Even
+predictable object lifecycles become more complicated when sysfs is brought
+in as other portions of the kernel can get a reference on any kobject that
+is registered in the system.
+
+The end result is that a structure protected by a kobject cannot be freed
+before its reference count goes to zero. The reference count is not under
+the direct control of the code which created the kobject. So that code must
+be notified asynchronously whenever the last reference to one of its
+kobjects goes away.
+
+Once you registered your kobject via kobject_add(), you must never use
+kfree() to free it directly. The only safe way is to use kobject_put(). It
+is good practice to always use kobject_put() after kobject_init() to avoid
+errors creeping in.
+
+This notification is done through a kobject's release() method. Usually
+such a method has a form like::
+
+ void my_object_release(struct kobject *kobj)
+ {
+ struct my_object *mine = container_of(kobj, struct my_object, kobj);
+
+ /* Perform any additional cleanup on this object, then... */
+ kfree(mine);
+ }
+
+One important point cannot be overstated: every kobject must have a
+release() method, and the kobject must persist (in a consistent state)
+until that method is called. If these constraints are not met, the code is
+flawed. Note that the kernel will warn you if you forget to provide a
+release() method. Do not try to get rid of this warning by providing an
+"empty" release function.
+
+If all your cleanup function needs to do is call kfree(), then you must
+create a wrapper function which uses container_of() to upcast to the correct
+type (as shown in the example above) and then calls kfree() on the overall
+structure.
+
+Note, the name of the kobject is available in the release function, but it
+must NOT be changed within this callback. Otherwise there will be a memory
+leak in the kobject core, which makes people unhappy.
+
+Interestingly, the release() method is not stored in the kobject itself;
+instead, it is associated with the ktype. So let us introduce struct
+kobj_type::
+
+ struct kobj_type {
+ void (*release)(struct kobject *kobj);
+ const struct sysfs_ops *sysfs_ops;
+ const struct attribute_group **default_groups;
+ const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+ const void *(*namespace)(struct kobject *kobj);
+ void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
+ };
+
+This structure is used to describe a particular type of kobject (or, more
+correctly, of containing object). Every kobject needs to have an associated
+kobj_type structure; a pointer to that structure must be specified when you
+call kobject_init() or kobject_init_and_add().
+
+The release field in struct kobj_type is, of course, a pointer to the
+release() method for this type of kobject. The other two fields (sysfs_ops
+and default_groups) control how objects of this type are represented in
+sysfs; they are beyond the scope of this document.
+
+The default_groups pointer is a list of default attributes that will be
+automatically created for any kobject that is registered with this ktype.
+
+
+ksets
+=====
+
+A kset is merely a collection of kobjects that want to be associated with
+each other. There is no restriction that they be of the same ktype, but be
+very careful if they are not.
+
+A kset serves these functions:
+
+ - It serves as a bag containing a group of objects. A kset can be used by
+ the kernel to track "all block devices" or "all PCI device drivers."
+
+ - A kset is also a subdirectory in sysfs, where the associated kobjects
+ with the kset can show up. Every kset contains a kobject which can be
+ set up to be the parent of other kobjects; the top-level directories of
+ the sysfs hierarchy are constructed in this way.
+
+ - Ksets can support the "hotplugging" of kobjects and influence how
+ uevent events are reported to user space.
+
+In object-oriented terms, "kset" is the top-level container class; ksets
+contain their own kobject, but that kobject is managed by the kset code and
+should not be manipulated by any other user.
+
+A kset keeps its children in a standard kernel linked list. Kobjects point
+back to their containing kset via their kset field. In almost all cases,
+the kobjects belonging to a kset have that kset (or, strictly, its embedded
+kobject) in their parent.
+
+As a kset contains a kobject within it, it should always be dynamically
+created and never declared statically or on the stack. To create a new
+kset use::
+
+ struct kset *kset_create_and_add(const char *name,
+ const struct kset_uevent_ops *uevent_ops,
+ struct kobject *parent_kobj);
+
+When you are finished with the kset, call::
+
+ void kset_unregister(struct kset *k);
+
+to destroy it. This removes the kset from sysfs and decrements its reference
+count. When the reference count goes to zero, the kset will be released.
+Because other references to the kset may still exist, the release may happen
+after kset_unregister() returns.
+
+An example of using a kset can be seen in the
+``samples/kobject/kset-example.c`` file in the kernel tree.
+
+If a kset wishes to control the uevent operations of the kobjects
+associated with it, it can use the struct kset_uevent_ops to handle it::
+
+ struct kset_uevent_ops {
+ int (* const filter)(struct kobject *kobj);
+ const char *(* const name)(struct kobject *kobj);
+ int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env);
+ };
+
+
+The filter function allows a kset to prevent a uevent from being emitted to
+userspace for a specific kobject. If the function returns 0, the uevent
+will not be emitted.
+
+The name function will be called to override the default name of the kset
+that the uevent sends to userspace. By default, the name will be the same
+as the kset itself, but this function, if present, can override that name.
+
+The uevent function will be called when the uevent is about to be sent to
+userspace to allow more environment variables to be added to the uevent.
+
+One might ask how, exactly, a kobject is added to a kset, given that no
+functions which perform that function have been presented. The answer is
+that this task is handled by kobject_add(). When a kobject is passed to
+kobject_add(), its kset member should point to the kset to which the
+kobject will belong. kobject_add() will handle the rest.
+
+If the kobject belonging to a kset has no parent kobject set, it will be
+added to the kset's directory. Not all members of a kset do necessarily
+live in the kset directory. If an explicit parent kobject is assigned
+before the kobject is added, the kobject is registered with the kset, but
+added below the parent kobject.
+
+
+Kobject removal
+===============
+
+After a kobject has been registered with the kobject core successfully, it
+must be cleaned up when the code is finished with it. To do that, call
+kobject_put(). By doing this, the kobject core will automatically clean up
+all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been
+sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and
+any other sysfs housekeeping will be handled for the caller properly.
+
+If you need to do a two-stage delete of the kobject (say you are not
+allowed to sleep when you need to destroy the object), then call
+kobject_del() which will unregister the kobject from sysfs. This makes the
+kobject "invisible", but it is not cleaned up, and the reference count of
+the object is still the same. At a later time call kobject_put() to finish
+the cleanup of the memory associated with the kobject.
+
+kobject_del() can be used to drop the reference to the parent object, if
+circular references are constructed. It is valid in some cases, that a
+parent objects references a child. Circular references _must_ be broken
+with an explicit call to kobject_del(), so that a release functions will be
+called, and the objects in the former circle release each other.
+
+
+Example code to copy from
+=========================
+
+For a more complete example of using ksets and kobjects properly, see the
+example programs ``samples/kobject/{kobject-example.c,kset-example.c}``,
+which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``.
diff --git a/Documentation/core-api/kref.rst b/Documentation/core-api/kref.rst
new file mode 100644
index 000000000000..8db9ff03d952
--- /dev/null
+++ b/Documentation/core-api/kref.rst
@@ -0,0 +1,328 @@
+===================================================
+Adding reference counters (krefs) to kernel objects
+===================================================
+
+:Author: Corey Minyard <minyard@acm.org>
+:Author: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+
+A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
+presentation on krefs, which can be found at:
+
+ - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
+ - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
+
+Introduction
+============
+
+krefs allow you to add reference counters to your objects. If you
+have objects that are used in multiple places and passed around, and
+you don't have refcounts, your code is almost certainly broken. If
+you want refcounts, krefs are the way to go.
+
+To use a kref, add one to your data structures like::
+
+ struct my_data
+ {
+ .
+ .
+ struct kref refcount;
+ .
+ .
+ };
+
+The kref can occur anywhere within the data structure.
+
+Initialization
+==============
+
+You must initialize the kref after you allocate it. To do this, call
+kref_init as so::
+
+ struct my_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ kref_init(&data->refcount);
+
+This sets the refcount in the kref to 1.
+
+Kref rules
+==========
+
+Once you have an initialized kref, you must follow the following
+rules:
+
+1) If you make a non-temporary copy of a pointer, especially if
+ it can be passed to another thread of execution, you must
+ increment the refcount with kref_get() before passing it off::
+
+ kref_get(&data->refcount);
+
+ If you already have a valid pointer to a kref-ed structure (the
+ refcount cannot go to zero) you may do this without a lock.
+
+2) When you are done with a pointer, you must call kref_put()::
+
+ kref_put(&data->refcount, data_release);
+
+ If this is the last reference to the pointer, the release
+ routine will be called. If the code never tries to get
+ a valid pointer to a kref-ed structure without already
+ holding a valid pointer, it is safe to do this without
+ a lock.
+
+3) If the code attempts to gain a reference to a kref-ed structure
+ without already holding a valid pointer, it must serialize access
+ where a kref_put() cannot occur during the kref_get(), and the
+ structure must remain valid during the kref_get().
+
+For example, if you allocate some data and then pass it to another
+thread to process::
+
+ void data_release(struct kref *ref)
+ {
+ struct my_data *data = container_of(ref, struct my_data, refcount);
+ kfree(data);
+ }
+
+ void more_data_handling(void *cb_data)
+ {
+ struct my_data *data = cb_data;
+ .
+ . do stuff with data here
+ .
+ kref_put(&data->refcount, data_release);
+ }
+
+ int my_data_handler(void)
+ {
+ int rv = 0;
+ struct my_data *data;
+ struct task_struct *task;
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ kref_init(&data->refcount);
+
+ kref_get(&data->refcount);
+ task = kthread_run(more_data_handling, data, "more_data_handling");
+ if (task == ERR_PTR(-ENOMEM)) {
+ rv = -ENOMEM;
+ kref_put(&data->refcount, data_release);
+ goto out;
+ }
+
+ .
+ . do stuff with data here
+ .
+ out:
+ kref_put(&data->refcount, data_release);
+ return rv;
+ }
+
+This way, it doesn't matter what order the two threads handle the
+data, the kref_put() handles knowing when the data is not referenced
+any more and releasing it. The kref_get() does not require a lock,
+since we already have a valid pointer that we own a refcount for. The
+put needs no lock because nothing tries to get the data without
+already holding a pointer.
+
+In the above example, kref_put() will be called 2 times in both success
+and error paths. This is necessary because the reference count got
+incremented 2 times by kref_init() and kref_get().
+
+Note that the "before" in rule 1 is very important. You should never
+do something like::
+
+ task = kthread_run(more_data_handling, data, "more_data_handling");
+ if (task == ERR_PTR(-ENOMEM)) {
+ rv = -ENOMEM;
+ goto out;
+ } else
+ /* BAD BAD BAD - get is after the handoff */
+ kref_get(&data->refcount);
+
+Don't assume you know what you are doing and use the above construct.
+First of all, you may not know what you are doing. Second, you may
+know what you are doing (there are some situations where locking is
+involved where the above may be legal) but someone else who doesn't
+know what they are doing may change the code or copy the code. It's
+bad style. Don't do it.
+
+There are some situations where you can optimize the gets and puts.
+For instance, if you are done with an object and enqueuing it for
+something else or passing it off to something else, there is no reason
+to do a get then a put::
+
+ /* Silly extra get and put */
+ kref_get(&obj->ref);
+ enqueue(obj);
+ kref_put(&obj->ref, obj_cleanup);
+
+Just do the enqueue. A comment about this is always welcome::
+
+ enqueue(obj);
+ /* We are done with obj, so we pass our refcount off
+ to the queue. DON'T TOUCH obj AFTER HERE! */
+
+The last rule (rule 3) is the nastiest one to handle. Say, for
+instance, you have a list of items that are each kref-ed, and you wish
+to get the first one. You can't just pull the first item off the list
+and kref_get() it. That violates rule 3 because you are not already
+holding a valid pointer. You must add a mutex (or some other lock).
+For instance::
+
+ static DEFINE_MUTEX(mutex);
+ static LIST_HEAD(q);
+ struct my_data
+ {
+ struct kref refcount;
+ struct list_head link;
+ };
+
+ static struct my_data *get_entry()
+ {
+ struct my_data *entry = NULL;
+ mutex_lock(&mutex);
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ kref_get(&entry->refcount);
+ }
+ mutex_unlock(&mutex);
+ return entry;
+ }
+
+ static void release_entry(struct kref *ref)
+ {
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ list_del(&entry->link);
+ kfree(entry);
+ }
+
+ static void put_entry(struct my_data *entry)
+ {
+ mutex_lock(&mutex);
+ kref_put(&entry->refcount, release_entry);
+ mutex_unlock(&mutex);
+ }
+
+The kref_put() return value is useful if you do not want to hold the
+lock during the whole release operation. Say you didn't want to call
+kfree() with the lock held in the example above (since it is kind of
+pointless to do so). You could use kref_put() as follows::
+
+ static void release_entry(struct kref *ref)
+ {
+ /* All work is done after the return from kref_put(). */
+ }
+
+ static void put_entry(struct my_data *entry)
+ {
+ mutex_lock(&mutex);
+ if (kref_put(&entry->refcount, release_entry)) {
+ list_del(&entry->link);
+ mutex_unlock(&mutex);
+ kfree(entry);
+ } else
+ mutex_unlock(&mutex);
+ }
+
+This is really more useful if you have to call other routines as part
+of the free operations that could take a long time or might claim the
+same lock. Note that doing everything in the release routine is still
+preferred as it is a little neater.
+
+The above example could also be optimized using kref_get_unless_zero() in
+the following way::
+
+ static struct my_data *get_entry()
+ {
+ struct my_data *entry = NULL;
+ mutex_lock(&mutex);
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ if (!kref_get_unless_zero(&entry->refcount))
+ entry = NULL;
+ }
+ mutex_unlock(&mutex);
+ return entry;
+ }
+
+ static void release_entry(struct kref *ref)
+ {
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ mutex_lock(&mutex);
+ list_del(&entry->link);
+ mutex_unlock(&mutex);
+ kfree(entry);
+ }
+
+ static void put_entry(struct my_data *entry)
+ {
+ kref_put(&entry->refcount, release_entry);
+ }
+
+Which is useful to remove the mutex lock around kref_put() in put_entry(), but
+it's important that kref_get_unless_zero is enclosed in the same critical
+section that finds the entry in the lookup table,
+otherwise kref_get_unless_zero may reference already freed memory.
+Note that it is illegal to use kref_get_unless_zero without checking its
+return value. If you are sure (by already having a valid pointer) that
+kref_get_unless_zero() will return true, then use kref_get() instead.
+
+Krefs and RCU
+=============
+
+The function kref_get_unless_zero also makes it possible to use rcu
+locking for lookups in the above example::
+
+ struct my_data
+ {
+ struct rcu_head rhead;
+ .
+ struct kref refcount;
+ .
+ .
+ };
+
+ static struct my_data *get_entry_rcu()
+ {
+ struct my_data *entry = NULL;
+ rcu_read_lock();
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ if (!kref_get_unless_zero(&entry->refcount))
+ entry = NULL;
+ }
+ rcu_read_unlock();
+ return entry;
+ }
+
+ static void release_entry_rcu(struct kref *ref)
+ {
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ mutex_lock(&mutex);
+ list_del_rcu(&entry->link);
+ mutex_unlock(&mutex);
+ kfree_rcu(entry, rhead);
+ }
+
+ static void put_entry(struct my_data *entry)
+ {
+ kref_put(&entry->refcount, release_entry_rcu);
+ }
+
+But note that the struct kref member needs to remain in valid memory for a
+rcu grace period after release_entry_rcu was called. That can be accomplished
+by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
+before using kfree, but note that synchronize_rcu() may sleep for a
+substantial amount of time.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/kref.h
diff --git a/Documentation/core-api/list.rst b/Documentation/core-api/list.rst
new file mode 100644
index 000000000000..86873ce9adbf
--- /dev/null
+++ b/Documentation/core-api/list.rst
@@ -0,0 +1,776 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=====================
+Linked Lists in Linux
+=====================
+
+:Author: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+
+.. contents::
+
+Introduction
+============
+
+Linked lists are one of the most basic data structures used in many programs.
+The Linux kernel implements several different flavours of linked lists. The
+purpose of this document is not to explain linked lists in general, but to show
+new kernel developers how to use the Linux kernel implementations of linked
+lists.
+
+Please note that while linked lists certainly are ubiquitous, they are rarely
+the best data structure to use in cases where a simple array doesn't already
+suffice. In particular, due to their poor data locality, linked lists are a bad
+choice in situations where performance may be of consideration. Familiarizing
+oneself with other in-kernel generic data structures, especially for concurrent
+accesses, is highly encouraged.
+
+Linux implementation of doubly linked lists
+===========================================
+
+Linux's linked list implementations can be used by including the header file
+``<linux/list.h>``.
+
+The doubly-linked list will likely be the most familiar to many readers. It's a
+list that can efficiently be traversed forwards and backwards.
+
+The Linux kernel's doubly-linked list is circular in nature. This means that to
+get from the head node to the tail, we can just travel one edge backwards.
+Similarly, to get from the tail node to the head, we can simply travel forwards
+"beyond" the tail and arrive back at the head.
+
+Declaring a node
+----------------
+
+A node in a doubly-linked list is declared by adding a struct list_head
+member to the data structure you wish to be contained in the list:
+
+.. code-block:: c
+
+ struct clown {
+ unsigned long long shoe_size;
+ const char *name;
+ struct list_head node; /* the aforementioned member */
+ };
+
+This may be an unfamiliar approach to some, as the classical explanation of a
+linked list is a list node data structure with pointers to the previous and next
+list node, as well the payload data. Linux chooses this approach because it
+allows for generic list modification code regardless of what data structure is
+contained within the list. Since the struct list_head member is not a pointer
+but part of the data structure proper, the container_of() pattern can be used by
+the list implementation to access the payload data regardless of its type, while
+staying oblivious to what said type actually is.
+
+Declaring and initializing a list
+---------------------------------
+
+A doubly-linked list can then be declared as just another struct list_head,
+and initialized with the LIST_HEAD_INIT() macro during initial assignment, or
+with the INIT_LIST_HEAD() function later:
+
+.. code-block:: c
+
+ struct clown_car {
+ int tyre_pressure[4];
+ struct list_head clowns; /* Looks like a node! */
+ };
+
+ /* ... Somewhere later in our driver ... */
+
+ static int circus_init(struct circus_priv *circus)
+ {
+ struct clown_car other_car = {
+ .tyre_pressure = {10, 12, 11, 9},
+ .clowns = LIST_HEAD_INIT(other_car.clowns)
+ };
+
+ INIT_LIST_HEAD(&circus->car.clowns);
+
+ return 0;
+ }
+
+A further point of confusion to some may be that the list itself doesn't really
+have its own type. The concept of the entire linked list and a
+struct list_head member that points to other entries in the list are one and
+the same.
+
+Adding nodes to the list
+------------------------
+
+Adding a node to the linked list is done through the list_add() macro.
+
+We'll return to our clown car example to illustrate how nodes get added to the
+list:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ struct clown_car *car = &circus->car;
+ struct clown *grock;
+ struct clown *dimitri;
+
+ /* State 1 */
+
+ grock = kzalloc(sizeof(*grock), GFP_KERNEL);
+ if (!grock)
+ return -ENOMEM;
+ grock->name = "Grock";
+ grock->shoe_size = 1000;
+
+ /* Note that we're adding the "node" member */
+ list_add(&grock->node, &car->clowns);
+
+ /* State 2 */
+
+ dimitri = kzalloc(sizeof(*dimitri), GFP_KERNEL);
+ if (!dimitri)
+ return -ENOMEM;
+ dimitri->name = "Dimitri";
+ dimitri->shoe_size = 50;
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ return 0;
+ }
+
+In State 1, our list of clowns is still empty::
+
+ .------.
+ v |
+ .--------. |
+ | clowns |--'
+ '--------'
+
+This diagram shows the singular "clowns" node pointing at itself. In this
+diagram, and all following diagrams, only the forward edges are shown, to aid in
+clarity.
+
+In State 2, we've added Grock after the list head::
+
+ .--------------------.
+ v |
+ .--------. .-------. |
+ | clowns |---->| Grock |--'
+ '--------' '-------'
+
+This diagram shows the "clowns" node pointing at a new node labeled "Grock".
+The Grock node is pointing back at the "clowns" node.
+
+In State 3, we've added Dimitri after the list head, resulting in the following::
+
+ .------------------------------------.
+ v |
+ .--------. .---------. .-------. |
+ | clowns |---->| Dimitri |---->| Grock |--'
+ '--------' '---------' '-------'
+
+This diagram shows the "clowns" node pointing at a new node labeled "Dimitri",
+which then points at the node labeled "Grock". The "Grock" node still points
+back at the "clowns" node.
+
+If we wanted to have Dimitri inserted at the end of the list instead, we'd use
+list_add_tail(). Our code would then look like this:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add_tail(&dimitri->node, &car->clowns);
+
+ /* State 3b */
+
+ return 0;
+ }
+
+This results in the following list::
+
+ .------------------------------------.
+ v |
+ .--------. .-------. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |--'
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points at the new node labeled "Dimitri". The node labeled "Dimitri"
+points back at the "clowns" node.
+
+Traversing the list
+-------------------
+
+To iterate the list, we can loop through all nodes within the list with
+list_for_each().
+
+In our clown example, this results in the following somewhat awkward code:
+
+.. code-block:: c
+
+ static unsigned long long circus_get_max_shoe_size(struct circus_priv *circus)
+ {
+ unsigned long long res = 0;
+ struct clown *e;
+ struct list_head *cur;
+
+ list_for_each(cur, &circus->car.clowns) {
+ e = list_entry(cur, struct clown, node);
+ if (e->shoe_size > res)
+ res = e->shoe_size;
+ }
+
+ return res;
+ }
+
+The list_entry() macro internally uses the aforementioned container_of() to
+retrieve the data structure instance that ``node`` is a member of.
+
+Note how the additional list_entry() call is a little awkward here. It's only
+there because we're iterating through the ``node`` members, but we really want
+to iterate through the payload, i.e. the ``struct clown`` that contains each
+node's struct list_head. For this reason, there is a second macro:
+list_for_each_entry()
+
+Using it would change our code to something like this:
+
+.. code-block:: c
+
+ static unsigned long long circus_get_max_shoe_size(struct circus_priv *circus)
+ {
+ unsigned long long res = 0;
+ struct clown *e;
+
+ list_for_each_entry(e, &circus->car.clowns, node) {
+ if (e->shoe_size > res)
+ res = e->shoe_size;
+ }
+
+ return res;
+ }
+
+This eliminates the need for the list_entry() step, and our loop cursor is now
+of the type of our payload. The macro is given the member name that corresponds
+to the list's struct list_head within the clown data structure so that it can
+still walk the list.
+
+Removing nodes from the list
+----------------------------
+
+The list_del() function can be used to remove entries from the list. It not only
+removes the given entry from the list, but poisons the entry's ``prev`` and
+``next`` pointers, so that unintended use of the entry after removal does not
+go unnoticed.
+
+We can extend our previous example to remove one of the entries:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ list_del(&dimitri->node);
+
+ /* State 4 */
+
+ return 0;
+ }
+
+The result of this would be this::
+
+ .--------------------.
+ v |
+ .--------. .-------. | .---------.
+ | clowns |---->| Grock |--' | Dimitri |
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points back at the "clowns" node. Off to the side is a lone node labeled
+"Dimitri", which has no arrows pointing anywhere.
+
+Note how the Dimitri node does not point to itself; its pointers are
+intentionally set to a "poison" value that the list code refuses to traverse.
+
+If we wanted to reinitialize the removed node instead to make it point at itself
+again like an empty list head, we can use list_del_init() instead:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ list_del_init(&dimitri->node);
+
+ /* State 4b */
+
+ return 0;
+ }
+
+This results in the deleted node pointing to itself again::
+
+ .--------------------. .-------.
+ v | v |
+ .--------. .-------. | .---------. |
+ | clowns |---->| Grock |--' | Dimitri |--'
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points back at the "clowns" node. Off to the side is a lone node labeled
+"Dimitri", which points to itself.
+
+Traversing whilst removing nodes
+--------------------------------
+
+Deleting entries while we're traversing the list will cause problems if we use
+list_for_each() and list_for_each_entry(), as deleting the current entry would
+modify the ``next`` pointer of it, which means the traversal can't properly
+advance to the next list entry.
+
+There is a solution to this however: list_for_each_safe() and
+list_for_each_entry_safe(). These take an additional parameter of a pointer to
+a struct list_head to use as temporary storage for the next entry during
+iteration, solving the issue.
+
+An example of how to use it:
+
+.. code-block:: c
+
+ static void circus_eject_insufficient_clowns(struct circus_priv *circus)
+ {
+ struct clown *e;
+ struct clown *n; /* temporary storage for safe iteration */
+
+ list_for_each_entry_safe(e, n, &circus->car.clowns, node) {
+ if (e->shoe_size < 500)
+ list_del(&e->node);
+ }
+ }
+
+Proper memory management (i.e. freeing the deleted node while making sure
+nothing still references it) in this case is left as an exercise to the reader.
+
+Cutting a list
+--------------
+
+There are two helper functions to cut lists with. Both take elements from the
+list ``head``, and replace the contents of the list ``list``.
+
+The first such function is list_cut_position(). It removes all list entries from
+``head`` up to and including ``entry``, placing them in ``list`` instead.
+
+In this example, it's assumed we start with the following list::
+
+ .----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Pic |---->| Alfredo |--'
+ '--------' '-------' '---------' '-----' '---------'
+
+With the following code, every clown up to and including "Pic" is moved from
+the "clowns" list head to a separate struct list_head initialized at local
+stack variable ``retirement``:
+
+.. code-block:: c
+
+ static void circus_retire_clowns(struct circus_priv *circus)
+ {
+ struct list_head retirement = LIST_HEAD_INIT(retirement);
+ struct clown *grock, *dimitri, *pic, *alfredo;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ list_cut_position(&retirement, &car->clowns, &pic->node);
+
+ /* State 1 */
+ }
+
+The resulting ``car->clowns`` list would be this::
+
+ .----------------------.
+ v |
+ .--------. .---------. |
+ | clowns |---->| Alfredo |--'
+ '--------' '---------'
+
+Meanwhile, the ``retirement`` list is transformed to the following::
+
+ .--------------------------------------------------.
+ v |
+ .------------. .-------. .---------. .-----. |
+ | retirement |---->| Grock |---->| Dimitri |---->| Pic |--'
+ '------------' '-------' '---------' '-----'
+
+The second function, list_cut_before(), is much the same, except it cuts before
+the ``entry`` node, i.e. it removes all list entries from ``head`` up to but
+excluding ``entry``, placing them in ``list`` instead. This example assumes the
+same initial starting list as the previous example:
+
+.. code-block:: c
+
+ static void circus_retire_clowns(struct circus_priv *circus)
+ {
+ struct list_head retirement = LIST_HEAD_INIT(retirement);
+ struct clown *grock, *dimitri, *pic, *alfredo;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ list_cut_before(&retirement, &car->clowns, &pic->node);
+
+ /* State 1b */
+ }
+
+The resulting ``car->clowns`` list would be this::
+
+ .----------------------------------.
+ v |
+ .--------. .-----. .---------. |
+ | clowns |---->| Pic |---->| Alfredo |--'
+ '--------' '-----' '---------'
+
+Meanwhile, the ``retirement`` list is transformed to the following::
+
+ .--------------------------------------.
+ v |
+ .------------. .-------. .---------. |
+ | retirement |---->| Grock |---->| Dimitri |--'
+ '------------' '-------' '---------'
+
+It should be noted that both functions will destroy links to any existing nodes
+in the destination ``struct list_head *list``.
+
+Moving entries and partial lists
+--------------------------------
+
+The list_move() and list_move_tail() functions can be used to move an entry
+from one list to another, to either the start or end respectively.
+
+In the following example, we'll assume we start with two lists ("clowns" and
+"sidewalk" in the following initial state "State 0"::
+
+ .----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Pic |---->| Alfredo |--'
+ '--------' '-------' '---------' '-----' '---------'
+
+ .-------------------.
+ v |
+ .----------. .-----. |
+ | sidewalk |---->| Pio |--'
+ '----------' '-----'
+
+We apply the following example code to the two lists:
+
+.. code-block:: c
+
+ static void circus_clowns_exit_car(struct circus_priv *circus)
+ {
+ struct list_head sidewalk = LIST_HEAD_INIT(sidewalk);
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_move(&pic->node, &sidewalk);
+
+ /* State 1 */
+
+ list_move_tail(&dimitri->node, &sidewalk);
+
+ /* State 2 */
+ }
+
+In State 1, we arrive at the following situation::
+
+ .-----------------------------------------------------.
+ | |
+ v |
+ .--------. .-------. .---------. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Alfredo |--'
+ '--------' '-------' '---------' '---------'
+
+ .-------------------------------.
+ v |
+ .----------. .-----. .-----. |
+ | sidewalk |---->| Pic |---->| Pio |--'
+ '----------' '-----' '-----'
+
+In State 2, after we've moved Dimitri to the tail of sidewalk, the situation
+changes as follows::
+
+ .-------------------------------------.
+ | |
+ v |
+ .--------. .-------. .---------. |
+ | clowns |---->| Grock |---->| Alfredo |--'
+ '--------' '-------' '---------'
+
+ .-----------------------------------------------.
+ v |
+ .----------. .-----. .-----. .---------. |
+ | sidewalk |---->| Pic |---->| Pio |---->| Dimitri |--'
+ '----------' '-----' '-----' '---------'
+
+As long as the source and destination list head are part of the same list, we
+can also efficiently bulk move a segment of the list to the tail end of the
+list. We continue the previous example by adding a list_bulk_move_tail() after
+State 2, moving Pic and Pio to the tail end of the sidewalk list.
+
+.. code-block:: c
+
+ static void circus_clowns_exit_car(struct circus_priv *circus)
+ {
+ struct list_head sidewalk = LIST_HEAD_INIT(sidewalk);
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_move(&pic->node, &sidewalk);
+
+ /* State 1 */
+
+ list_move_tail(&dimitri->node, &sidewalk);
+
+ /* State 2 */
+
+ list_bulk_move_tail(&sidewalk, &pic->node, &pio->node);
+
+ /* State 3 */
+ }
+
+For the sake of brevity, only the altered "sidewalk" list at State 3 is depicted
+in the following diagram::
+
+ .-----------------------------------------------.
+ v |
+ .----------. .---------. .-----. .-----. |
+ | sidewalk |---->| Dimitri |---->| Pic |---->| Pio |--'
+ '----------' '---------' '-----' '-----'
+
+Do note that list_bulk_move_tail() does not do any checking as to whether all
+three supplied ``struct list_head *`` parameters really do belong to the same
+list. If you use it outside the constraints the documentation gives, then the
+result is a matter between you and the implementation.
+
+Rotating entries
+----------------
+
+A common write operation on lists, especially when using them as queues, is
+to rotate it. A list rotation means entries at the front are sent to the back.
+
+For rotation, Linux provides us with two functions: list_rotate_left() and
+list_rotate_to_front(). The former can be pictured like a bicycle chain, taking
+the entry after the supplied ``struct list_head *`` and moving it to the tail,
+which in essence means the entire list, due to its circular nature, rotates by
+one position.
+
+The latter, list_rotate_to_front(), takes the same concept one step further:
+instead of advancing the list by one entry, it advances it *until* the specified
+entry is the new front.
+
+In the following example, our starting state, State 0, is the following::
+
+ .-----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. .-----. |
+ | clowns |-->| Grock |-->| Dimitri |-->| Pic |-->| Alfredo |-->| Pio |-'
+ '--------' '-------' '---------' '-----' '---------' '-----'
+
+The example code being used to demonstrate list rotations is the following:
+
+.. code-block:: c
+
+ static void circus_clowns_rotate(struct circus_priv *circus)
+ {
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_rotate_left(&car->clowns);
+
+ /* State 1 */
+
+ list_rotate_to_front(&alfredo->node, &car->clowns);
+
+ /* State 2 */
+
+ }
+
+In State 1, we arrive at the following situation::
+
+ .-----------------------------------------------------------------.
+ v |
+ .--------. .---------. .-----. .---------. .-----. .-------. |
+ | clowns |-->| Dimitri |-->| Pic |-->| Alfredo |-->| Pio |-->| Grock |-'
+ '--------' '---------' '-----' '---------' '-----' '-------'
+
+Next, after the list_rotate_to_front() call, we arrive in the following
+State 2::
+
+ .-----------------------------------------------------------------.
+ v |
+ .--------. .---------. .-----. .-------. .---------. .-----. |
+ | clowns |-->| Alfredo |-->| Pio |-->| Grock |-->| Dimitri |-->| Pic |-'
+ '--------' '---------' '-----' '-------' '---------' '-----'
+
+As is hopefully evident from the diagrams, the entries in front of "Alfredo"
+were cycled to the tail end of the list.
+
+Swapping entries
+----------------
+
+Another common operation is that two entries need to be swapped with each other.
+
+For this, Linux provides us with list_swap().
+
+In the following example, we have a list with three entries, and swap two of
+them. This is our starting state in "State 0"::
+
+ .-----------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. |
+ | clowns |-->| Grock |-->| Dimitri |-->| Pic |-'
+ '--------' '-------' '---------' '-----'
+
+.. code-block:: c
+
+ static void circus_clowns_swap(struct circus_priv *circus)
+ {
+ struct clown *grock, *dimitri, *pic;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_swap(&dimitri->node, &pic->node);
+
+ /* State 1 */
+ }
+
+The resulting list at State 1 is the following::
+
+ .-----------------------------------------.
+ v |
+ .--------. .-------. .-----. .---------. |
+ | clowns |-->| Grock |-->| Pic |-->| Dimitri |-'
+ '--------' '-------' '-----' '---------'
+
+As is evident by comparing the diagrams, the "Pic" and "Dimitri" nodes have
+traded places.
+
+Splicing two lists together
+---------------------------
+
+Say we have two lists, in the following example one represented by a list head
+we call "knie" and one we call "stey". In a hypothetical circus acquisition,
+the two list of clowns should be spliced together. The following is our
+situation in "State 0"::
+
+ .-----------------------------------------.
+ | |
+ v |
+ .------. .-------. .---------. .-----. |
+ | knie |-->| Grock |-->| Dimitri |-->| Pic |--'
+ '------' '-------' '---------' '-----'
+
+ .-----------------------------.
+ v |
+ .------. .---------. .-----. |
+ | stey |-->| Alfredo |-->| Pio |--'
+ '------' '---------' '-----'
+
+The function to splice these two lists together is list_splice(). Our example
+code is as follows:
+
+.. code-block:: c
+
+ static void circus_clowns_splice(void)
+ {
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct list_head knie = LIST_HEAD_INIT(knie);
+ struct list_head stey = LIST_HEAD_INIT(stey);
+
+ /* ... Clown allocation and initialization here ... */
+
+ list_add_tail(&grock->node, &knie);
+ list_add_tail(&dimitri->node, &knie);
+ list_add_tail(&pic->node, &knie);
+ list_add_tail(&alfredo->node, &stey);
+ list_add_tail(&pio->node, &stey);
+
+ /* State 0 */
+
+ list_splice(&stey, &dimitri->node);
+
+ /* State 1 */
+ }
+
+The list_splice() call here adds all the entries in ``stey`` to the list
+``dimitri``'s ``node`` list_head is in, after the ``node`` of ``dimitri``. A
+somewhat surprising diagram of the resulting "State 1" follows::
+
+ .-----------------------------------------------------------------.
+ | |
+ v |
+ .------. .-------. .---------. .---------. .-----. .-----. |
+ | knie |-->| Grock |-->| Dimitri |-->| Alfredo |-->| Pio |-->| Pic |--'
+ '------' '-------' '---------' '---------' '-----' '-----'
+ ^
+ .-------------------------------'
+ |
+ .------. |
+ | stey |--'
+ '------'
+
+Traversing the ``stey`` list no longer results in correct behavior. A call of
+list_for_each() on ``stey`` results in an infinite loop, as it never returns
+back to the ``stey`` list head.
+
+This is because list_splice() did not reinitialize the list_head it took
+entries from, leaving its pointer pointing into what is now a different list.
+
+If we want to avoid this situation, list_splice_init() can be used. It does the
+same thing as list_splice(), except reinitalizes the donor list_head after the
+transplant.
+
+Concurrency considerations
+--------------------------
+
+Concurrent access and modification of a list needs to be protected with a lock
+in most cases. Alternatively and preferably, one may use the RCU primitives for
+lists in read-mostly use-cases, where read accesses to the list are common but
+modifications to the list less so. See Documentation/RCU/listRCU.rst for more
+details.
+
+Further reading
+---------------
+
+* `How does the kernel implements Linked Lists? - KernelNewbies <https://kernelnewbies.org/FAQ/LinkedLists>`_
+
+Full List API
+=============
+
+.. kernel-doc:: include/linux/list.h
+ :internal:
diff --git a/Documentation/core-api/local_ops.rst b/Documentation/core-api/local_ops.rst
index 2ac3f9f29845..0b42ceaaf3c4 100644
--- a/Documentation/core-api/local_ops.rst
+++ b/Documentation/core-api/local_ops.rst
@@ -191,7 +191,7 @@ Here is a sample module which implements a basic per cpu counter using
static void __exit test_exit(void)
{
- del_timer_sync(&test_timer);
+ timer_shutdown_sync(&test_timer);
}
module_init(test_init);
diff --git a/Documentation/core-api/maple_tree.rst b/Documentation/core-api/maple_tree.rst
new file mode 100644
index 000000000000..ccdd1615cf97
--- /dev/null
+++ b/Documentation/core-api/maple_tree.rst
@@ -0,0 +1,221 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+
+==========
+Maple Tree
+==========
+
+:Author: Liam R. Howlett
+
+Overview
+========
+
+The Maple Tree is a B-Tree data type which is optimized for storing
+non-overlapping ranges, including ranges of size 1. The tree was designed to
+be simple to use and does not require a user written search method. It
+supports iterating over a range of entries and going to the previous or next
+entry in a cache-efficient manner. The tree can also be put into an RCU-safe
+mode of operation which allows reading and writing concurrently. Writers must
+synchronize on a lock, which can be the default spinlock, or the user can set
+the lock to an external lock of a different type.
+
+The Maple Tree maintains a small memory footprint and was designed to use
+modern processor cache efficiently. The majority of the users will be able to
+use the normal API. An :ref:`maple-tree-advanced-api` exists for more complex
+scenarios. The most important usage of the Maple Tree is the tracking of the
+virtual memory areas.
+
+The Maple Tree can store values between ``0`` and ``ULONG_MAX``. The Maple
+Tree reserves values with the bottom two bits set to '10' which are below 4096
+(ie 2, 6, 10 .. 4094) for internal use. If the entries may use reserved
+entries then the users can convert the entries using xa_mk_value() and convert
+them back by calling xa_to_value(). If the user needs to use a reserved
+value, then the user can convert the value when using the
+:ref:`maple-tree-advanced-api`, but are blocked by the normal API.
+
+The Maple Tree can also be configured to support searching for a gap of a given
+size (or larger).
+
+Pre-allocating of nodes is also supported using the
+:ref:`maple-tree-advanced-api`. This is useful for users who must guarantee a
+successful store operation within a given
+code segment when allocating cannot be done. Allocations of nodes are
+relatively small at around 256 bytes.
+
+.. _maple-tree-normal-api:
+
+Normal API
+==========
+
+Start by initialising a maple tree, either with DEFINE_MTREE() for statically
+allocated maple trees or mt_init() for dynamically allocated ones. A
+freshly-initialised maple tree contains a ``NULL`` pointer for the range ``0``
+- ``ULONG_MAX``. There are currently two types of maple trees supported: the
+allocation tree and the regular tree. The regular tree has a higher branching
+factor for internal nodes. The allocation tree has a lower branching factor
+but allows the user to search for a gap of a given size or larger from either
+``0`` upwards or ``ULONG_MAX`` down. An allocation tree can be used by
+passing in the ``MT_FLAGS_ALLOC_RANGE`` flag when initialising the tree.
+
+You can then set entries using mtree_store() or mtree_store_range().
+mtree_store() will overwrite any entry with the new entry and return 0 on
+success or an error code otherwise. mtree_store_range() works in the same way
+but takes a range. mtree_load() is used to retrieve the entry stored at a
+given index. You can use mtree_erase() to erase an entire range by only
+knowing one value within that range, or mtree_store() call with an entry of
+NULL may be used to partially erase a range or many ranges at once.
+
+If you want to only store a new entry to a range (or index) if that range is
+currently ``NULL``, you can use mtree_insert_range() or mtree_insert() which
+return -EEXIST if the range is not empty.
+
+You can search for an entry from an index upwards by using mt_find().
+
+You can walk each entry within a range by calling mt_for_each(). You must
+provide a temporary variable to store a cursor. If you want to walk each
+element of the tree then ``0`` and ``ULONG_MAX`` may be used as the range. If
+the caller is going to hold the lock for the duration of the walk then it is
+worth looking at the mas_for_each() API in the :ref:`maple-tree-advanced-api`
+section.
+
+Sometimes it is necessary to ensure the next call to store to a maple tree does
+not allocate memory, please see :ref:`maple-tree-advanced-api` for this use case.
+
+You can use mtree_dup() to duplicate an entire maple tree. It is a more
+efficient way than inserting all elements one by one into a new tree.
+
+Finally, you can remove all entries from a maple tree by calling
+mtree_destroy(). If the maple tree entries are pointers, you may wish to free
+the entries first.
+
+Allocating Nodes
+----------------
+
+The allocations are handled by the internal tree code. See
+:ref:`maple-tree-advanced-alloc` for other options.
+
+Locking
+-------
+
+You do not have to worry about locking. See :ref:`maple-tree-advanced-locks`
+for other options.
+
+The Maple Tree uses RCU and an internal spinlock to synchronise access:
+
+Takes RCU read lock:
+ * mtree_load()
+ * mt_find()
+ * mt_for_each()
+ * mt_next()
+ * mt_prev()
+
+Takes ma_lock internally:
+ * mtree_store()
+ * mtree_store_range()
+ * mtree_insert()
+ * mtree_insert_range()
+ * mtree_erase()
+ * mtree_dup()
+ * mtree_destroy()
+ * mt_set_in_rcu()
+ * mt_clear_in_rcu()
+
+If you want to take advantage of the internal lock to protect the data
+structures that you are storing in the Maple Tree, you can call mtree_lock()
+before calling mtree_load(), then take a reference count on the object you
+have found before calling mtree_unlock(). This will prevent stores from
+removing the object from the tree between looking up the object and
+incrementing the refcount. You can also use RCU to avoid dereferencing
+freed memory, but an explanation of that is beyond the scope of this
+document.
+
+.. _maple-tree-advanced-api:
+
+Advanced API
+============
+
+The advanced API offers more flexibility and better performance at the
+cost of an interface which can be harder to use and has fewer safeguards.
+You must take care of your own locking while using the advanced API.
+You can use the ma_lock, RCU or an external lock for protection.
+You can mix advanced and normal operations on the same array, as long
+as the locking is compatible. The :ref:`maple-tree-normal-api` is implemented
+in terms of the advanced API.
+
+The advanced API is based around the ma_state, this is where the 'mas'
+prefix originates. The ma_state struct keeps track of tree operations to make
+life easier for both internal and external tree users.
+
+Initialising the maple tree is the same as in the :ref:`maple-tree-normal-api`.
+Please see above.
+
+The maple state keeps track of the range start and end in mas->index and
+mas->last, respectively.
+
+mas_walk() will walk the tree to the location of mas->index and set the
+mas->index and mas->last according to the range for the entry.
+
+You can set entries using mas_store(). mas_store() will overwrite any entry
+with the new entry and return the first existing entry that is overwritten.
+The range is passed in as members of the maple state: index and last.
+
+You can use mas_erase() to erase an entire range by setting index and
+last of the maple state to the desired range to erase. This will erase
+the first range that is found in that range, set the maple state index
+and last as the range that was erased and return the entry that existed
+at that location.
+
+You can walk each entry within a range by using mas_for_each(). If you want
+to walk each element of the tree then ``0`` and ``ULONG_MAX`` may be used as
+the range. If the lock needs to be periodically dropped, see the locking
+section mas_pause().
+
+Using a maple state allows mas_next() and mas_prev() to function as if the
+tree was a linked list. With such a high branching factor the amortized
+performance penalty is outweighed by cache optimization. mas_next() will
+return the next entry which occurs after the entry at index. mas_prev()
+will return the previous entry which occurs before the entry at index.
+
+mas_find() will find the first entry which exists at or above index on
+the first call, and the next entry from every subsequent calls.
+
+mas_find_rev() will find the first entry which exists at or below the last on
+the first call, and the previous entry from every subsequent calls.
+
+If the user needs to yield the lock during an operation, then the maple state
+must be paused using mas_pause().
+
+There are a few extra interfaces provided when using an allocation tree.
+If you wish to search for a gap within a range, then mas_empty_area()
+or mas_empty_area_rev() can be used. mas_empty_area() searches for a gap
+starting at the lowest index given up to the maximum of the range.
+mas_empty_area_rev() searches for a gap starting at the highest index given
+and continues downward to the lower bound of the range.
+
+.. _maple-tree-advanced-alloc:
+
+Advanced Allocating Nodes
+-------------------------
+
+Allocations are usually handled internally to the tree, however if allocations
+need to occur before a write occurs then calling mas_expected_entries() will
+allocate the worst-case number of needed nodes to insert the provided number of
+ranges. This also causes the tree to enter mass insertion mode. Once
+insertions are complete calling mas_destroy() on the maple state will free the
+unused allocations.
+
+.. _maple-tree-advanced-locks:
+
+Advanced Locking
+----------------
+
+The maple tree uses a spinlock by default, but external locks can be used for
+tree updates as well. To use an external lock, the tree must be initialized
+with the ``MT_FLAGS_LOCK_EXTERN flag``, this is usually done with the
+MTREE_INIT_EXT() #define, which takes an external lock as an argument.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/maple_tree.h
+.. kernel-doc:: lib/maple_tree.c
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index f8bb9aa120c4..0f19dd524323 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -1,3 +1,5 @@
+.. _memory_allocation:
+
=======================
Memory Allocation Guide
=======================
@@ -43,8 +45,9 @@ here we briefly outline their recommended usage:
* If the allocation is performed from an atomic context, e.g interrupt
handler, use ``GFP_NOWAIT``. This flag prevents direct reclaim and
IO or filesystem operations. Consequently, under memory pressure
- ``GFP_NOWAIT`` allocation is likely to fail. Allocations which
- have a reasonable fallback should be using ``GFP_NOWARN``.
+ ``GFP_NOWAIT`` allocation is likely to fail. Users of this flag need
+ to provide a suitable fallback to cope with such failures where
+ appropriate.
* If you think that accessing memory reserves is justified and the kernel
will be stressed unless allocation succeeds, you may use ``GFP_ATOMIC``.
* Untrusted allocations triggered from userspace should be a subject
@@ -82,41 +85,104 @@ driver for a device with such restrictions, avoid using these flags.
And even with hardware with restrictions it is preferable to use
`dma_alloc*` APIs.
+GFP flags and reclaim behavior
+------------------------------
+Memory allocations may trigger direct or background reclaim and it is
+useful to understand how hard the page allocator will try to satisfy that
+or another request.
+
+ * ``GFP_KERNEL & ~__GFP_RECLAIM`` - optimistic allocation without _any_
+ attempt to free memory at all. The most light weight mode which even
+ doesn't kick the background reclaim. Should be used carefully because it
+ might deplete the memory and the next user might hit the more aggressive
+ reclaim.
+
+ * ``GFP_KERNEL & ~__GFP_DIRECT_RECLAIM`` (or ``GFP_NOWAIT``)- optimistic
+ allocation without any attempt to free memory from the current
+ context but can wake kswapd to reclaim memory if the zone is below
+ the low watermark. Can be used from either atomic contexts or when
+ the request is a performance optimization and there is another
+ fallback for a slow path.
+
+ * ``(GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM`` (aka ``GFP_ATOMIC``) -
+ non sleeping allocation with an expensive fallback so it can access
+ some portion of memory reserves. Usually used from interrupt/bottom-half
+ context with an expensive slow path fallback.
+
+ * ``GFP_KERNEL`` - both background and direct reclaim are allowed and the
+ **default** page allocator behavior is used. That means that not costly
+ allocation requests are basically no-fail but there is no guarantee of
+ that behavior so failures have to be checked properly by callers
+ (e.g. OOM killer victim is allowed to fail currently).
+
+ * ``GFP_KERNEL | __GFP_NORETRY`` - overrides the default allocator behavior
+ and all allocation requests fail early rather than cause disruptive
+ reclaim (one round of reclaim in this implementation). The OOM killer
+ is not invoked.
+
+ * ``GFP_KERNEL | __GFP_RETRY_MAYFAIL`` - overrides the default allocator
+ behavior and all allocation requests try really hard. The request
+ will fail if the reclaim cannot make any progress. The OOM killer
+ won't be triggered.
+
+ * ``GFP_KERNEL | __GFP_NOFAIL`` - overrides the default allocator behavior
+ and all allocation requests will loop endlessly until they succeed.
+ This might be really dangerous especially for larger orders.
+
Selecting memory allocator
==========================
The most straightforward way to allocate memory is to use a function
-from the :c:func:`kmalloc` family. And, to be on the safe size it's
-best to use routines that set memory to zero, like
-:c:func:`kzalloc`. If you need to allocate memory for an array, there
-are :c:func:`kmalloc_array` and :c:func:`kcalloc` helpers.
+from the kmalloc() family. And, to be on the safe side it's best to use
+routines that set memory to zero, like kzalloc(). If you need to
+allocate memory for an array, there are kmalloc_array() and kcalloc()
+helpers. The helpers struct_size(), array_size() and array3_size() can
+be used to safely calculate object sizes without overflowing.
The maximal size of a chunk that can be allocated with `kmalloc` is
limited. The actual limit depends on the hardware and the kernel
configuration, but it is a good practice to use `kmalloc` for objects
smaller than page size.
-For large allocations you can use :c:func:`vmalloc` and
-:c:func:`vzalloc`, or directly request pages from the page
-allocator. The memory allocated by `vmalloc` and related functions is
-not physically contiguous.
+The address of a chunk allocated with `kmalloc` is aligned to at least
+ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
+alignment is also guaranteed to be at least the respective size. For other
+sizes, the alignment is guaranteed to be at least the largest power-of-two
+divisor of the size.
+
+Chunks allocated with kmalloc() can be resized with krealloc(). Similarly
+to kmalloc_array(): a helper for resizing arrays is provided in the form of
+krealloc_array().
+
+For large allocations you can use vmalloc() and vzalloc(), or directly
+request pages from the page allocator. The memory allocated by `vmalloc`
+and related functions is not physically contiguous.
If you are not sure whether the allocation size is too large for
-`kmalloc`, it is possible to use :c:func:`kvmalloc` and its
-derivatives. It will try to allocate memory with `kmalloc` and if the
-allocation fails it will be retried with `vmalloc`. There are
-restrictions on which GFP flags can be used with `kvmalloc`; please
-see :c:func:`kvmalloc_node` reference documentation. Note that
-`kvmalloc` may return memory that is not physically contiguous.
+`kmalloc`, it is possible to use kvmalloc() and its derivatives. It will
+try to allocate memory with `kmalloc` and if the allocation fails it
+will be retried with `vmalloc`. There are restrictions on which GFP
+flags can be used with `kvmalloc`; please see kvmalloc_node() reference
+documentation. Note that `kvmalloc` may return memory that is not
+physically contiguous.
If you need to allocate many identical objects you can use the slab
-cache allocator. The cache should be set up with
-:c:func:`kmem_cache_create` before it can be used. Afterwards
-:c:func:`kmem_cache_alloc` and its convenience wrappers can allocate
-memory from that cache.
-
-When the allocated memory is no longer needed it must be freed. You
-can use :c:func:`kvfree` for the memory allocated with `kmalloc`,
-`vmalloc` and `kvmalloc`. The slab caches should be freed with
-:c:func:`kmem_cache_free`. And don't forget to destroy the cache with
-:c:func:`kmem_cache_destroy`.
+cache allocator. The cache should be set up with kmem_cache_create() or
+kmem_cache_create_usercopy() before it can be used. The second function
+should be used if a part of the cache might be copied to the userspace.
+After the cache is created kmem_cache_alloc() and its convenience
+wrappers can allocate memory from that cache.
+
+When the allocated memory is no longer needed it must be freed.
+
+Objects allocated by `kmalloc` can be freed by `kfree` or `kvfree`. Objects
+allocated by `kmem_cache_alloc` can be freed with `kmem_cache_free`, `kfree`
+or `kvfree`, where the latter two might be more convenient thanks to not
+needing the kmem_cache pointer.
+
+The same rules apply to _bulk and _rcu flavors of freeing functions.
+
+Memory allocated by `vmalloc` can be freed with `vfree` or `kvfree`.
+Memory allocated by `kvmalloc` can be freed with `kvfree`.
+Caches created by `kmem_cache_create` should be freed with
+`kmem_cache_destroy` only after freeing all the allocated objects first.
diff --git a/Documentation/core-api/memory-hotplug.rst b/Documentation/core-api/memory-hotplug.rst
index de7467e48067..8fc97c2379de 100644
--- a/Documentation/core-api/memory-hotplug.rst
+++ b/Documentation/core-api/memory-hotplug.rst
@@ -9,6 +9,9 @@ Memory hotplug event notifier
Hotplugging events are sent to a notification queue.
+Memory notifier
+----------------
+
There are six types of notification defined in ``include/linux/memory.h``:
MEM_GOING_ONLINE
@@ -56,23 +59,18 @@ The third argument (arg) passes a pointer of struct memory_notify::
struct memory_notify {
unsigned long start_pfn;
unsigned long nr_pages;
- int status_change_nid_normal;
- int status_change_nid_high;
- int status_change_nid;
}
- start_pfn is start_pfn of online/offline memory.
- nr_pages is # of pages of online/offline memory.
-- status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
- is (will be) set/clear, if this is -1, then nodemask status is not changed.
-- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
- is (will be) set/clear, if this is -1, then nodemask status is not changed.
-- status_change_nid is set node id when N_MEMORY of nodemask is (will be)
- set/clear. It means a new(memoryless) node gets new memory by online and a
- node loses all memory. If this is -1, then nodemask status is not changed.
- If status_changed_nid* >= 0, callback should create/discard structures for the
- node if necessary.
+It is possible to get notified for MEM_CANCEL_ONLINE without having been notified
+for MEM_GOING_ONLINE, and the same applies to MEM_CANCEL_OFFLINE and
+MEM_GOING_OFFLINE.
+This can happen when a consumer fails, meaning we break the callchain and we
+stop calling the remaining consumers of the notifier.
+It is then important that users of memory_notify make no assumptions and get
+prepared to handle such cases.
The callback routine shall return one of the values
NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
@@ -86,6 +84,78 @@ further processing of the notification queue.
NOTIFY_STOP stops further processing of the notification queue.
+Numa node notifier
+------------------
+
+There are six types of notification defined in ``include/linux/node.h``:
+
+NODE_ADDING_FIRST_MEMORY
+ Generated before memory becomes available to this node for the first time.
+
+NODE_CANCEL_ADDING_FIRST_MEMORY
+ Generated if NODE_ADDING_FIRST_MEMORY fails.
+
+NODE_ADDED_FIRST_MEMORY
+ Generated when memory has become available fo this node for the first time.
+
+NODE_REMOVING_LAST_MEMORY
+ Generated when the last memory available to this node is about to be offlined.
+
+NODE_CANCEL_REMOVING_LAST_MEMORY
+ Generated when NODE_CANCEL_REMOVING_LAST_MEMORY fails.
+
+NODE_REMOVED_LAST_MEMORY
+ Generated when the last memory available to this node has been offlined.
+
+A callback routine can be registered by calling::
+
+ hotplug_node_notifier(callback_func, priority)
+
+Callback functions with higher values of priority are called before callback
+functions with lower values.
+
+A callback function must have the following prototype::
+
+ int callback_func(
+
+ struct notifier_block *self, unsigned long action, void *arg);
+
+The first argument of the callback function (self) is a pointer to the block
+of the notifier chain that points to the callback function itself.
+The second argument (action) is one of the event types described above.
+The third argument (arg) passes a pointer of struct node_notify::
+
+ struct node_notify {
+ int nid;
+ }
+
+- nid is the node we are adding or removing memory to.
+
+It is possible to get notified for NODE_CANCEL_ADDING_FIRST_MEMORY without
+having been notified for NODE_ADDING_FIRST_MEMORY, and the same applies to
+NODE_CANCEL_REMOVING_LAST_MEMORY and NODE_REMOVING_LAST_MEMORY.
+This can happen when a consumer fails, meaning we break the callchain and we
+stop calling the remaining consumers of the notifier.
+It is then important that users of node_notify make no assumptions and get
+prepared to handle such cases.
+
+The callback routine shall return one of the values
+NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
+defined in ``include/linux/notifier.h``
+
+NOTIFY_DONE and NOTIFY_OK have no effect on the further processing.
+
+NOTIFY_BAD is used as response to the NODE_ADDING_FIRST_MEMORY,
+NODE_REMOVING_LAST_MEMORY, NODE_ADDED_FIRST_MEMORY or
+NODE_REMOVED_LAST_MEMORY action to cancel hotplugging.
+It stops further processing of the notification queue.
+
+NOTIFY_STOP stops further processing of the notification queue.
+
+Please note that we should not fail for NODE_ADDED_FIRST_MEMORY /
+NODE_REMOVED_FIRST_MEMORY, as memory_hotplug code cannot rollback at that
+point anymore.
+
Locking Internals
=================
diff --git a/Documentation/core-api/min_heap.rst b/Documentation/core-api/min_heap.rst
new file mode 100644
index 000000000000..9f57766581df
--- /dev/null
+++ b/Documentation/core-api/min_heap.rst
@@ -0,0 +1,302 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Min Heap API
+============
+
+:Author: Kuan-Wei Chiu <visitorckw@gmail.com>
+
+Introduction
+============
+
+The Min Heap API provides a set of functions and macros for managing min-heaps
+in the Linux kernel. A min-heap is a binary tree structure where the value of
+each node is less than or equal to the values of its children, ensuring that
+the smallest element is always at the root.
+
+This document provides a guide to the Min Heap API, detailing how to define and
+use min-heaps. Users should not directly call functions with **__min_heap_*()**
+prefixes, but should instead use the provided macro wrappers.
+
+In addition to the standard version of the functions, the API also includes a
+set of inline versions for performance-critical scenarios. These inline
+functions have the same names as their non-inline counterparts but include an
+**_inline** suffix. For example, **__min_heap_init_inline** and its
+corresponding macro wrapper **min_heap_init_inline**. The inline versions allow
+custom comparison and swap functions to be called directly, rather than through
+indirect function calls. This can significantly reduce overhead, especially
+when CONFIG_MITIGATION_RETPOLINE is enabled, as indirect function calls become
+more expensive. As with the non-inline versions, it is important to use the
+macro wrappers for inline functions instead of directly calling the functions
+themselves.
+
+Data Structures
+===============
+
+Min-Heap Definition
+-------------------
+
+The core data structure for representing a min-heap is defined using the
+**MIN_HEAP_PREALLOCATED** and **DEFINE_MIN_HEAP** macros. These macros allow
+you to define a min-heap with a preallocated buffer or dynamically allocated
+memory.
+
+Example:
+
+.. code-block:: c
+
+ #define MIN_HEAP_PREALLOCATED(_type, _name, _nr)
+ struct _name {
+ size_t nr; /* Number of elements in the heap */
+ size_t size; /* Maximum number of elements that can be held */
+ _type *data; /* Pointer to the heap data */
+ _type preallocated[_nr]; /* Static preallocated array */
+ }
+
+ #define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0)
+
+A typical heap structure will include a counter for the number of elements
+(`nr`), the maximum capacity of the heap (`size`), and a pointer to an array of
+elements (`data`). Optionally, you can specify a static array for preallocated
+heap storage using **MIN_HEAP_PREALLOCATED**.
+
+Min Heap Callbacks
+------------------
+
+The **struct min_heap_callbacks** provides customization options for ordering
+elements in the heap and swapping them. It contains two function pointers:
+
+.. code-block:: c
+
+ struct min_heap_callbacks {
+ bool (*less)(const void *lhs, const void *rhs, void *args);
+ void (*swp)(void *lhs, void *rhs, void *args);
+ };
+
+- **less** is the comparison function used to establish the order of elements.
+- **swp** is a function for swapping elements in the heap. If swp is set to
+ NULL, the default swap function will be used, which swaps the elements based on their size
+
+Macro Wrappers
+==============
+
+The following macro wrappers are provided for interacting with the heap in a
+user-friendly manner. Each macro corresponds to a function that operates on the
+heap, and they abstract away direct calls to internal functions.
+
+Each macro accepts various parameters that are detailed below.
+
+Heap Initialization
+--------------------
+
+.. code-block:: c
+
+ min_heap_init(heap, data, size);
+
+- **heap**: A pointer to the min-heap structure to be initialized.
+- **data**: A pointer to the buffer where the heap elements will be stored. If
+ `NULL`, the preallocated buffer within the heap structure will be used.
+- **size**: The maximum number of elements the heap can hold.
+
+This macro initializes the heap, setting its initial state. If `data` is
+`NULL`, the preallocated memory inside the heap structure will be used for
+storage. Otherwise, the user-provided buffer is used. The operation is **O(1)**.
+
+**Inline Version:** min_heap_init_inline(heap, data, size)
+
+Accessing the Top Element
+-------------------------
+
+.. code-block:: c
+
+ element = min_heap_peek(heap);
+
+- **heap**: A pointer to the min-heap from which to retrieve the smallest
+ element.
+
+This macro returns a pointer to the smallest element (the root) of the heap, or
+`NULL` if the heap is empty. The operation is **O(1)**.
+
+**Inline Version:** min_heap_peek_inline(heap)
+
+Heap Insertion
+--------------
+
+.. code-block:: c
+
+ success = min_heap_push(heap, element, callbacks, args);
+
+- **heap**: A pointer to the min-heap into which the element should be inserted.
+- **element**: A pointer to the element to be inserted into the heap.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro inserts an element into the heap. It returns `true` if the insertion
+was successful and `false` if the heap is full. The operation is **O(log n)**.
+
+**Inline Version:** min_heap_push_inline(heap, element, callbacks, args)
+
+Heap Removal
+------------
+
+.. code-block:: c
+
+ success = min_heap_pop(heap, callbacks, args);
+
+- **heap**: A pointer to the min-heap from which to remove the smallest element.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro removes the smallest element (the root) from the heap. It returns
+`true` if the element was successfully removed, or `false` if the heap is
+empty. The operation is **O(log n)**.
+
+**Inline Version:** min_heap_pop_inline(heap, callbacks, args)
+
+Heap Maintenance
+----------------
+
+You can use the following macros to maintain the heap's structure:
+
+.. code-block:: c
+
+ min_heap_sift_down(heap, pos, callbacks, args);
+
+- **heap**: A pointer to the min-heap.
+- **pos**: The index from which to start sifting down.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro restores the heap property by moving the element at the specified
+index (`pos`) down the heap until it is in the correct position. The operation
+is **O(log n)**.
+
+**Inline Version:** min_heap_sift_down_inline(heap, pos, callbacks, args)
+
+.. code-block:: c
+
+ min_heap_sift_up(heap, idx, callbacks, args);
+
+- **heap**: A pointer to the min-heap.
+- **idx**: The index of the element to sift up.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro restores the heap property by moving the element at the specified
+index (`idx`) up the heap. The operation is **O(log n)**.
+
+**Inline Version:** min_heap_sift_up_inline(heap, idx, callbacks, args)
+
+.. code-block:: c
+
+ min_heapify_all(heap, callbacks, args);
+
+- **heap**: A pointer to the min-heap.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro ensures that the entire heap satisfies the heap property. It is
+called when the heap is built from scratch or after many modifications. The
+operation is **O(n)**.
+
+**Inline Version:** min_heapify_all_inline(heap, callbacks, args)
+
+Removing Specific Elements
+--------------------------
+
+.. code-block:: c
+
+ success = min_heap_del(heap, idx, callbacks, args);
+
+- **heap**: A pointer to the min-heap.
+- **idx**: The index of the element to delete.
+- **callbacks**: A pointer to a `struct min_heap_callbacks` providing the
+ `less` and `swp` functions.
+- **args**: Optional arguments passed to the `less` and `swp` functions.
+
+This macro removes an element at the specified index (`idx`) from the heap and
+restores the heap property. The operation is **O(log n)**.
+
+**Inline Version:** min_heap_del_inline(heap, idx, callbacks, args)
+
+Other Utilities
+===============
+
+- **min_heap_full(heap)**: Checks whether the heap is full.
+ Complexity: **O(1)**.
+
+.. code-block:: c
+
+ bool full = min_heap_full(heap);
+
+- `heap`: A pointer to the min-heap to check.
+
+This macro returns `true` if the heap is full, otherwise `false`.
+
+**Inline Version:** min_heap_full_inline(heap)
+
+- **min_heap_empty(heap)**: Checks whether the heap is empty.
+ Complexity: **O(1)**.
+
+.. code-block:: c
+
+ bool empty = min_heap_empty(heap);
+
+- `heap`: A pointer to the min-heap to check.
+
+This macro returns `true` if the heap is empty, otherwise `false`.
+
+**Inline Version:** min_heap_empty_inline(heap)
+
+Example Usage
+=============
+
+An example usage of the min-heap API would involve defining a heap structure,
+initializing it, and inserting and removing elements as needed.
+
+.. code-block:: c
+
+ #include <linux/min_heap.h>
+
+ int my_less_function(const void *lhs, const void *rhs, void *args) {
+ return (*(int *)lhs < *(int *)rhs);
+ }
+
+ struct min_heap_callbacks heap_cb = {
+ .less = my_less_function, /* Comparison function for heap order */
+ .swp = NULL, /* Use default swap function */
+ };
+
+ void example_usage(void) {
+ /* Pre-populate the buffer with elements */
+ int buffer[5] = {5, 2, 8, 1, 3};
+ /* Declare a min-heap */
+ DEFINE_MIN_HEAP(int, my_heap);
+
+ /* Initialize the heap with preallocated buffer and size */
+ min_heap_init(&my_heap, buffer, 5);
+
+ /* Build the heap using min_heapify_all */
+ my_heap.nr = 5; /* Set the number of elements in the heap */
+ min_heapify_all(&my_heap, &heap_cb, NULL);
+
+ /* Peek at the top element (should be 1 in this case) */
+ int *top = min_heap_peek(&my_heap);
+ pr_info("Top element: %d\n", *top);
+
+ /* Pop the top element (1) and get the new top (2) */
+ min_heap_pop(&my_heap, &heap_cb, NULL);
+ top = min_heap_peek(&my_heap);
+ pr_info("New top element: %d\n", *top);
+
+ /* Insert a new element (0) and recheck the top */
+ int new_element = 0;
+ min_heap_push(&my_heap, &new_element, &heap_cb, NULL);
+ top = min_heap_peek(&my_heap);
+ pr_info("Top element after insertion: %d\n", *top);
+ }
diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 5ce1ec1dd066..68193a4cfcf5 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -11,7 +11,7 @@ User Space Memory Access
.. kernel-doc:: arch/x86/lib/usercopy_32.c
:export:
-.. kernel-doc:: mm/util.c
+.. kernel-doc:: mm/gup.c
:functions: get_user_pages_fast
.. _mm-api-gfp-flags:
@@ -19,23 +19,17 @@ User Space Memory Access
Memory Allocation Controls
==========================
-Functions which need to allocate memory often use GFP flags to express
-how that memory should be allocated. The GFP acronym stands for "get
-free pages", the underlying memory allocation function. Not every GFP
-flag is allowed to every function which may allocate memory. Most
-users will want to use a plain ``GFP_KERNEL``.
-
-.. kernel-doc:: include/linux/gfp.h
+.. kernel-doc:: include/linux/gfp_types.h
:doc: Page mobility and placement hints
-.. kernel-doc:: include/linux/gfp.h
+.. kernel-doc:: include/linux/gfp_types.h
:doc: Watermark modifiers
-.. kernel-doc:: include/linux/gfp.h
+.. kernel-doc:: include/linux/gfp_types.h
:doc: Reclaim modifiers
-.. kernel-doc:: include/linux/gfp.h
- :doc: Common combinations
+.. kernel-doc:: include/linux/gfp_types.h
+ :doc: Useful GFP flag combinations
The Slab Cache
==============
@@ -43,38 +37,98 @@ The Slab Cache
.. kernel-doc:: include/linux/slab.h
:internal:
-.. kernel-doc:: mm/slab.c
+.. kernel-doc:: mm/slub.c
+ :export:
+
+.. kernel-doc:: mm/slab_common.c
:export:
.. kernel-doc:: mm/util.c
:functions: kfree_const kvmalloc_node kvfree
-More Memory Management Functions
-================================
+Virtually Contiguous Mappings
+=============================
-.. kernel-doc:: mm/readahead.c
+.. kernel-doc:: mm/vmalloc.c
:export:
+File Mapping and Page Cache
+===========================
+
+Filemap
+-------
+
.. kernel-doc:: mm/filemap.c
:export:
-.. kernel-doc:: mm/memory.c
+Readahead
+---------
+
+.. kernel-doc:: mm/readahead.c
+ :doc: Readahead Overview
+
+.. kernel-doc:: mm/readahead.c
:export:
-.. kernel-doc:: mm/vmalloc.c
+Writeback
+---------
+
+.. kernel-doc:: mm/page-writeback.c
:export:
-.. kernel-doc:: mm/page_alloc.c
+Truncate
+--------
+
+.. kernel-doc:: mm/truncate.c
+ :export:
+
+.. kernel-doc:: include/linux/pagemap.h
:internal:
+Memory pools
+============
+
.. kernel-doc:: mm/mempool.c
:export:
-.. kernel-doc:: mm/dmapool.c
- :export:
+More Memory Management Functions
+================================
-.. kernel-doc:: mm/page-writeback.c
+.. kernel-doc:: mm/memory.c
:export:
-.. kernel-doc:: mm/truncate.c
- :export:
+.. kernel-doc:: mm/page_alloc.c
+.. kernel-doc:: mm/mempolicy.c
+.. kernel-doc:: include/linux/mm_types.h
+ :internal:
+.. kernel-doc:: include/linux/mm_inline.h
+.. kernel-doc:: include/linux/page-flags.h
+.. kernel-doc:: include/linux/mm.h
+ :internal:
+.. kernel-doc:: include/linux/page_ref.h
+.. kernel-doc:: include/linux/mmzone.h
+.. kernel-doc:: mm/util.c
+ :functions: folio_mapping
+
+.. kernel-doc:: mm/rmap.c
+.. kernel-doc:: mm/migrate.c
+.. kernel-doc:: mm/mmap.c
+.. kernel-doc:: mm/kmemleak.c
+.. #kernel-doc:: mm/hmm.c (build warnings)
+.. kernel-doc:: mm/memremap.c
+.. kernel-doc:: mm/hugetlb.c
+.. kernel-doc:: mm/swap.c
+.. kernel-doc:: mm/memcontrol.c
+.. #kernel-doc:: mm/memory-tiers.c (build warnings)
+.. kernel-doc:: mm/shmem.c
+.. kernel-doc:: mm/migrate_device.c
+.. #kernel-doc:: mm/nommu.c (duplicates kernel-doc from other files)
+.. kernel-doc:: mm/mapping_dirty_helpers.c
+.. #kernel-doc:: mm/memory-failure.c (build warnings)
+.. kernel-doc:: mm/percpu.c
+.. kernel-doc:: mm/maccess.c
+.. kernel-doc:: mm/vmscan.c
+.. kernel-doc:: mm/memory_hotplug.c
+.. kernel-doc:: mm/mmu_notifier.c
+.. kernel-doc:: mm/balloon_compaction.c
+.. kernel-doc:: mm/huge_memory.c
diff --git a/Documentation/core-api/netlink.rst b/Documentation/core-api/netlink.rst
new file mode 100644
index 000000000000..9f692b02bfe6
--- /dev/null
+++ b/Documentation/core-api/netlink.rst
@@ -0,0 +1,102 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+.. _kernel_netlink:
+
+===================================
+Netlink notes for kernel developers
+===================================
+
+General guidance
+================
+
+Attribute enums
+---------------
+
+Older families often define "null" attributes and commands with value
+of ``0`` and named ``unspec``. This is supported (``type: unused``)
+but should be avoided in new families. The ``unspec`` enum values are
+not used in practice, so just set the value of the first attribute to ``1``.
+
+Message enums
+-------------
+
+Use the same command IDs for requests and replies. This makes it easier
+to match them up, and we have plenty of ID space.
+
+Use separate command IDs for notifications. This makes it easier to
+sort the notifications from replies (and present them to the user
+application via a different API than replies).
+
+Answer requests
+---------------
+
+Older families do not reply to all of the commands, especially NEW / ADD
+commands. User only gets information whether the operation succeeded or
+not via the ACK. Try to find useful data to return. Once the command is
+added whether it replies with a full message or only an ACK is uAPI and
+cannot be changed. It's better to err on the side of replying.
+
+Specifically NEW and ADD commands should reply with information identifying
+the created object such as the allocated object's ID (without having to
+resort to using ``NLM_F_ECHO``).
+
+NLM_F_ECHO
+----------
+
+Make sure to pass the request info to genl_notify() to allow ``NLM_F_ECHO``
+to take effect. This is useful for programs that need precise feedback
+from the kernel (for example for logging purposes).
+
+Support dump consistency
+------------------------
+
+If iterating over objects during dump may skip over objects or repeat
+them - make sure to report dump inconsistency with ``NLM_F_DUMP_INTR``.
+This is usually implemented by maintaining a generation id for the
+structure and recording it in the ``seq`` member of struct netlink_callback.
+
+Netlink specification
+=====================
+
+Documentation of the Netlink specification parts which are only relevant
+to the kernel space.
+
+Globals
+-------
+
+kernel-policy
+~~~~~~~~~~~~~
+
+Defines whether the kernel validation policy is ``global`` i.e. the same for all
+operations of the family, defined for each operation individually - ``per-op``,
+or separately for each operation and operation type (do vs dump) - ``split``.
+New families should use ``per-op`` (default) to be able to narrow down the
+attributes accepted by a specific command.
+
+checks
+------
+
+Documentation for the ``checks`` sub-sections of attribute specs.
+
+unterminated-ok
+~~~~~~~~~~~~~~~
+
+Accept strings without the null-termination (for legacy families only).
+Switches from the ``NLA_NUL_STRING`` to ``NLA_STRING`` policy type.
+
+max-len
+~~~~~~~
+
+Defines max length for a binary or string attribute (corresponding
+to the ``len`` member of struct nla_policy). For string attributes terminating
+null character is not counted towards ``max-len``.
+
+The field may either be a literal integer value or a name of a defined
+constant. String types may reduce the constant by one
+(i.e. specify ``max-len: CONST - 1``) to reserve space for the terminating
+character so implementations should recognize such pattern.
+
+min-len
+~~~~~~~
+
+Similar to ``max-len`` but defines minimum length.
diff --git a/Documentation/core-api/packing.rst b/Documentation/core-api/packing.rst
new file mode 100644
index 000000000000..f68f1e08fef9
--- /dev/null
+++ b/Documentation/core-api/packing.rst
@@ -0,0 +1,345 @@
+================================================
+Generic bitfield packing and unpacking functions
+================================================
+
+Problem statement
+-----------------
+
+When working with hardware, one has to choose between several approaches of
+interfacing with it.
+One can memory-map a pointer to a carefully crafted struct over the hardware
+device's memory region, and access its fields as struct members (potentially
+declared as bitfields). But writing code this way would make it less portable,
+due to potential endianness mismatches between the CPU and the hardware device.
+Additionally, one has to pay close attention when translating register
+definitions from the hardware documentation into bit field indices for the
+structs. Also, some hardware (typically networking equipment) tends to group
+its register fields in ways that violate any reasonable word boundaries
+(sometimes even 64 bit ones). This creates the inconvenience of having to
+define "high" and "low" portions of register fields within the struct.
+A more robust alternative to struct field definitions would be to extract the
+required fields by shifting the appropriate number of bits. But this would
+still not protect from endianness mismatches, except if all memory accesses
+were performed byte-by-byte. Also the code can easily get cluttered, and the
+high-level idea might get lost among the many bit shifts required.
+Many drivers take the bit-shifting approach and then attempt to reduce the
+clutter with tailored macros, but more often than not these macros take
+shortcuts that still prevent the code from being truly portable.
+
+The solution
+------------
+
+This API deals with 2 basic operations:
+
+ - Packing a CPU-usable number into a memory buffer (with hardware
+ constraints/quirks)
+ - Unpacking a memory buffer (which has hardware constraints/quirks)
+ into a CPU-usable number.
+
+The API offers an abstraction over said hardware constraints and quirks,
+over CPU endianness and therefore between possible mismatches between
+the two.
+
+The basic unit of these API functions is the u64. From the CPU's
+perspective, bit 63 always means bit offset 7 of byte 7, albeit only
+logically. The question is: where do we lay this bit out in memory?
+
+The following examples cover the memory layout of a packed u64 field.
+The byte offsets in the packed buffer are always implicitly 0, 1, ... 7.
+What the examples show is where the logical bytes and bits sit.
+
+1. Normally (no quirks), we would do it like this:
+
+::
+
+ 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
+ 7 6 5 4
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ 3 2 1 0
+
+That is, the MSByte (7) of the CPU-usable u64 sits at memory offset 0, and the
+LSByte (0) of the u64 sits at memory offset 7.
+This corresponds to what most folks would regard to as "big endian", where
+bit i corresponds to the number 2^i. This is also referred to in the code
+comments as "logical" notation.
+
+
+2. If QUIRK_MSB_ON_THE_RIGHT is set, we do it like this:
+
+::
+
+ 56 57 58 59 60 61 62 63 48 49 50 51 52 53 54 55 40 41 42 43 44 45 46 47 32 33 34 35 36 37 38 39
+ 7 6 5 4
+ 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7
+ 3 2 1 0
+
+That is, QUIRK_MSB_ON_THE_RIGHT does not affect byte positioning, but
+inverts bit offsets inside a byte.
+
+
+3. If QUIRK_LITTLE_ENDIAN is set, we do it like this:
+
+::
+
+ 39 38 37 36 35 34 33 32 47 46 45 44 43 42 41 40 55 54 53 52 51 50 49 48 63 62 61 60 59 58 57 56
+ 4 5 6 7
+ 7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24
+ 0 1 2 3
+
+Therefore, QUIRK_LITTLE_ENDIAN means that inside the memory region, every
+byte from each 4-byte word is placed at its mirrored position compared to
+the boundary of that word.
+
+4. If QUIRK_MSB_ON_THE_RIGHT and QUIRK_LITTLE_ENDIAN are both set, we do it
+ like this:
+
+::
+
+ 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+ 4 5 6 7
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ 0 1 2 3
+
+
+5. If just QUIRK_LSW32_IS_FIRST is set, we do it like this:
+
+::
+
+ 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ 3 2 1 0
+ 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
+ 7 6 5 4
+
+In this case the 8 byte memory region is interpreted as follows: first
+4 bytes correspond to the least significant 4-byte word, next 4 bytes to
+the more significant 4-byte word.
+
+
+6. If QUIRK_LSW32_IS_FIRST and QUIRK_MSB_ON_THE_RIGHT are set, we do it like
+ this:
+
+::
+
+ 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7
+ 3 2 1 0
+ 56 57 58 59 60 61 62 63 48 49 50 51 52 53 54 55 40 41 42 43 44 45 46 47 32 33 34 35 36 37 38 39
+ 7 6 5 4
+
+
+7. If QUIRK_LSW32_IS_FIRST and QUIRK_LITTLE_ENDIAN are set, it looks like
+ this:
+
+::
+
+ 7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24
+ 0 1 2 3
+ 39 38 37 36 35 34 33 32 47 46 45 44 43 42 41 40 55 54 53 52 51 50 49 48 63 62 61 60 59 58 57 56
+ 4 5 6 7
+
+
+8. If QUIRK_LSW32_IS_FIRST, QUIRK_LITTLE_ENDIAN and QUIRK_MSB_ON_THE_RIGHT
+ are set, it looks like this:
+
+::
+
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ 0 1 2 3
+ 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+ 4 5 6 7
+
+
+We always think of our offsets as if there were no quirk, and we translate
+them afterwards, before accessing the memory region.
+
+Note on buffer lengths not multiple of 4
+----------------------------------------
+
+To deal with memory layout quirks where groups of 4 bytes are laid out "little
+endian" relative to each other, but "big endian" within the group itself, the
+concept of groups of 4 bytes is intrinsic to the packing API (not to be
+confused with the memory access, which is performed byte by byte, though).
+
+With buffer lengths not multiple of 4, this means one group will be incomplete.
+Depending on the quirks, this may lead to discontinuities in the bit fields
+accessible through the buffer. The packing API assumes discontinuities were not
+the intention of the memory layout, so it avoids them by effectively logically
+shortening the most significant group of 4 octets to the number of octets
+actually available.
+
+Example with a 31 byte sized buffer given below. Physical buffer offsets are
+implicit, and increase from left to right within a group, and from top to
+bottom within a column.
+
+No quirks:
+
+::
+
+ 31 29 28 | Group 7 (most significant)
+ 27 26 25 24 | Group 6
+ 23 22 21 20 | Group 5
+ 19 18 17 16 | Group 4
+ 15 14 13 12 | Group 3
+ 11 10 9 8 | Group 2
+ 7 6 5 4 | Group 1
+ 3 2 1 0 | Group 0 (least significant)
+
+QUIRK_LSW32_IS_FIRST:
+
+::
+
+ 3 2 1 0 | Group 0 (least significant)
+ 7 6 5 4 | Group 1
+ 11 10 9 8 | Group 2
+ 15 14 13 12 | Group 3
+ 19 18 17 16 | Group 4
+ 23 22 21 20 | Group 5
+ 27 26 25 24 | Group 6
+ 30 29 28 | Group 7 (most significant)
+
+QUIRK_LITTLE_ENDIAN:
+
+::
+
+ 30 28 29 | Group 7 (most significant)
+ 24 25 26 27 | Group 6
+ 20 21 22 23 | Group 5
+ 16 17 18 19 | Group 4
+ 12 13 14 15 | Group 3
+ 8 9 10 11 | Group 2
+ 4 5 6 7 | Group 1
+ 0 1 2 3 | Group 0 (least significant)
+
+QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST:
+
+::
+
+ 0 1 2 3 | Group 0 (least significant)
+ 4 5 6 7 | Group 1
+ 8 9 10 11 | Group 2
+ 12 13 14 15 | Group 3
+ 16 17 18 19 | Group 4
+ 20 21 22 23 | Group 5
+ 24 25 26 27 | Group 6
+ 28 29 30 | Group 7 (most significant)
+
+Intended use
+------------
+
+Drivers that opt to use this API first need to identify which of the above 3
+quirk combinations (for a total of 8) match what the hardware documentation
+describes.
+
+There are 3 supported usage patterns, detailed below.
+
+packing()
+^^^^^^^^^
+
+This API function is deprecated.
+
+The packing() function returns an int-encoded error code, which protects the
+programmer against incorrect API use. The errors are not expected to occur
+during runtime, therefore it is reasonable to wrap packing() into a custom
+function which returns void and swallows those errors. Optionally it can
+dump stack or print the error description.
+
+.. code-block:: c
+
+ void my_packing(void *buf, u64 *val, int startbit, int endbit,
+ size_t len, enum packing_op op)
+ {
+ int err;
+
+ /* Adjust quirks accordingly */
+ err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST);
+ if (likely(!err))
+ return;
+
+ if (err == -EINVAL) {
+ pr_err("Start bit (%d) expected to be larger than end (%d)\n",
+ startbit, endbit);
+ } else if (err == -ERANGE) {
+ if ((startbit - endbit + 1) > 64)
+ pr_err("Field %d-%d too large for 64 bits!\n",
+ startbit, endbit);
+ else
+ pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n",
+ *val, startbit, endbit);
+ }
+ dump_stack();
+ }
+
+pack() and unpack()
+^^^^^^^^^^^^^^^^^^^
+
+These are const-correct variants of packing(), and eliminate the last "enum
+packing_op op" argument.
+
+Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK).
+
+Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK).
+
+pack_fields() and unpack_fields()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The library exposes optimized functions for the scenario where there are many
+fields represented in a buffer, and it encourages consumer drivers to avoid
+repetitive calls to pack() and unpack() for each field, but instead use
+pack_fields() and unpack_fields(), which reduces the code footprint.
+
+These APIs use field definitions in arrays of ``struct packed_field_u8`` or
+``struct packed_field_u16``, allowing consumer drivers to minimize the size
+of these arrays according to their custom requirements.
+
+The pack_fields() and unpack_fields() API functions are actually macros which
+automatically select the appropriate function at compile time, based on the
+type of the fields array passed in.
+
+An additional benefit over pack() and unpack() is that sanity checks on the
+field definitions are handled at compile time with ``BUILD_BUG_ON`` rather
+than only when the offending code is executed. These functions return void and
+wrapping them to handle unexpected errors is not necessary.
+
+It is recommended, but not required, that you wrap your packed buffer into a
+structured type with a fixed size. This generally makes it easier for the
+compiler to enforce that the correct size buffer is used.
+
+Here is an example of how to use the fields APIs:
+
+.. code-block:: c
+
+ /* Ordering inside the unpacked structure is flexible and can be different
+ * from the packed buffer. Here, it is optimized to reduce padding.
+ */
+ struct data {
+ u64 field3;
+ u32 field4;
+ u16 field1;
+ u8 field2;
+ };
+
+ #define SIZE 13
+
+ typedef struct __packed { u8 buf[SIZE]; } packed_buf_t;
+
+ static const struct packed_field_u8 fields[] = {
+ PACKED_FIELD(100, 90, struct data, field1),
+ PACKED_FIELD(90, 87, struct data, field2),
+ PACKED_FIELD(86, 30, struct data, field3),
+ PACKED_FIELD(29, 0, struct data, field4),
+ };
+
+ void unpack_your_data(const packed_buf_t *buf, struct data *unpacked)
+ {
+ BUILD_BUG_ON(sizeof(*buf) != SIZE;
+
+ unpack_fields(buf, sizeof(*buf), unpacked, fields,
+ QUIRK_LITTLE_ENDIAN);
+ }
+
+ void pack_your_data(const struct data *unpacked, packed_buf_t *buf)
+ {
+ BUILD_BUG_ON(sizeof(*buf) != SIZE;
+
+ pack_fields(buf, sizeof(*buf), unpacked, fields,
+ QUIRK_LITTLE_ENDIAN);
+ }
diff --git a/Documentation/core-api/padata.rst b/Documentation/core-api/padata.rst
new file mode 100644
index 000000000000..05b73c6c105f
--- /dev/null
+++ b/Documentation/core-api/padata.rst
@@ -0,0 +1,178 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+The padata parallel execution mechanism
+=======================================
+
+:Date: May 2020
+
+Padata is a mechanism by which the kernel can farm jobs out to be done in
+parallel on multiple CPUs while optionally retaining their ordering.
+
+It was originally developed for IPsec, which needs to perform encryption and
+decryption on large numbers of packets without reordering those packets. This
+is currently the sole consumer of padata's serialized job support.
+
+Padata also supports multithreaded jobs, splitting up the job evenly while load
+balancing and coordinating between threads.
+
+Running Serialized Jobs
+=======================
+
+Initializing
+------------
+
+The first step in using padata to run serialized jobs is to set up a
+padata_instance structure for overall control of how jobs are to be run::
+
+ #include <linux/padata.h>
+
+ struct padata_instance *padata_alloc(const char *name);
+
+'name' simply identifies the instance.
+
+Then, complete padata initialization by allocating a padata_shell::
+
+ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
+
+A padata_shell is used to submit a job to padata and allows a series of such
+jobs to be serialized independently. A padata_instance may have one or more
+padata_shells associated with it, each allowing a separate series of jobs.
+
+Modifying cpumasks
+------------------
+
+The CPUs used to run jobs can be changed in two ways, programmatically with
+padata_set_cpumask() or via sysfs. The former is defined::
+
+ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+ cpumask_var_t cpumask);
+
+Here cpumask_type is one of PADATA_CPU_PARALLEL or PADATA_CPU_SERIAL, where a
+parallel cpumask describes which processors will be used to execute jobs
+submitted to this instance in parallel and a serial cpumask defines which
+processors are allowed to be used as the serialization callback processor.
+cpumask specifies the new cpumask to use.
+
+There may be sysfs files for an instance's cpumasks. For example, pcrypt's
+live in /sys/kernel/pcrypt/<instance-name>. Within an instance's directory
+there are two files, parallel_cpumask and serial_cpumask, and either cpumask
+may be changed by echoing a bitmask into the file, for example::
+
+ echo f > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
+
+Reading one of these files shows the user-supplied cpumask, which may be
+different from the 'usable' cpumask.
+
+Padata maintains two pairs of cpumasks internally, the user-supplied cpumasks
+and the 'usable' cpumasks. (Each pair consists of a parallel and a serial
+cpumask.) The user-supplied cpumasks default to all possible CPUs on instance
+allocation and may be changed as above. The usable cpumasks are always a
+subset of the user-supplied cpumasks and contain only the online CPUs in the
+user-supplied masks; these are the cpumasks padata actually uses. So it is
+legal to supply a cpumask to padata that contains offline CPUs. Once an
+offline CPU in the user-supplied cpumask comes online, padata is going to use
+it.
+
+Changing the CPU masks are expensive operations, so it should not be done with
+great frequency.
+
+Running A Job
+-------------
+
+Actually submitting work to the padata instance requires the creation of a
+padata_priv structure, which represents one job::
+
+ struct padata_priv {
+ /* Other stuff here... */
+ void (*parallel)(struct padata_priv *padata);
+ void (*serial)(struct padata_priv *padata);
+ };
+
+This structure will almost certainly be embedded within some larger
+structure specific to the work to be done. Most of its fields are private to
+padata, but the structure should be zeroed at initialisation time, and the
+parallel() and serial() functions should be provided. Those functions will
+be called in the process of getting the work done as we will see
+momentarily.
+
+The submission of the job is done with::
+
+ int padata_do_parallel(struct padata_shell *ps,
+ struct padata_priv *padata, int *cb_cpu);
+
+The ps and padata structures must be set up as described above; cb_cpu
+points to the preferred CPU to be used for the final callback when the job is
+done; it must be in the current instance's CPU mask (if not the cb_cpu pointer
+is updated to point to the CPU actually chosen). The return value from
+padata_do_parallel() is zero on success, indicating that the job is in
+progress. -EBUSY means that somebody, somewhere else is messing with the
+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being in the
+serial cpumask, no online CPUs in the parallel or serial cpumasks, or a stopped
+instance.
+
+Each job submitted to padata_do_parallel() will, in turn, be passed to
+exactly one call to the above-mentioned parallel() function, on one CPU, so
+true parallelism is achieved by submitting multiple jobs. parallel() runs with
+software interrupts disabled and thus cannot sleep. The parallel()
+function gets the padata_priv structure pointer as its lone parameter;
+information about the actual work to be done is probably obtained by using
+container_of() to find the enclosing structure.
+
+Note that parallel() has no return value; the padata subsystem assumes that
+parallel() will take responsibility for the job from this point. The job
+need not be completed during this call, but, if parallel() leaves work
+outstanding, it should be prepared to be called again with a new job before
+the previous one completes.
+
+Serializing Jobs
+----------------
+
+When a job does complete, parallel() (or whatever function actually finishes
+the work) should inform padata of the fact with a call to::
+
+ void padata_do_serial(struct padata_priv *padata);
+
+At some point in the future, padata_do_serial() will trigger a call to the
+serial() function in the padata_priv structure. That call will happen on
+the CPU requested in the initial call to padata_do_parallel(); it, too, is
+run with local software interrupts disabled.
+Note that this call may be deferred for a while since the padata code takes
+pains to ensure that jobs are completed in the order in which they were
+submitted.
+
+Destroying
+----------
+
+Cleaning up a padata instance predictably involves calling the two free
+functions that correspond to the allocation in reverse::
+
+ void padata_free_shell(struct padata_shell *ps);
+ void padata_free(struct padata_instance *pinst);
+
+It is the user's responsibility to ensure all outstanding jobs are complete
+before any of the above are called.
+
+Running Multithreaded Jobs
+==========================
+
+A multithreaded job has a main thread and zero or more helper threads, with the
+main thread participating in the job and then waiting until all helpers have
+finished. padata splits the job into units called chunks, where a chunk is a
+piece of the job that one thread completes in one call to the thread function.
+
+A user has to do three things to run a multithreaded job. First, describe the
+job by defining a padata_mt_job structure, which is explained in the Interface
+section. This includes a pointer to the thread function, which padata will
+call each time it assigns a job chunk to a thread. Then, define the thread
+function, which accepts three arguments, ``start``, ``end``, and ``arg``, where
+the first two delimit the range that the thread operates on and the last is a
+pointer to the job's shared state, if any. Prepare the shared state, which is
+typically allocated on the main thread's stack. Last, call
+padata_do_multithreaded(), which will return once the job is finished.
+
+Interface
+=========
+
+.. kernel-doc:: include/linux/padata.h
+.. kernel-doc:: kernel/padata.c
diff --git a/Documentation/core-api/parser.rst b/Documentation/core-api/parser.rst
new file mode 100644
index 000000000000..45750d04b895
--- /dev/null
+++ b/Documentation/core-api/parser.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==============
+Generic parser
+==============
+
+Overview
+========
+
+The generic parser is a simple parser for parsing mount options,
+filesystem options, driver options, subsystem options, etc.
+
+Parser API
+==========
+
+.. kernel-doc:: lib/parser.c
+ :export:
diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
new file mode 100644
index 000000000000..c16ca163b55e
--- /dev/null
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================
+pin_user_pages() and related calls
+====================================================
+
+.. contents:: :local:
+
+Overview
+========
+
+This document describes the following functions::
+
+ pin_user_pages()
+ pin_user_pages_fast()
+ pin_user_pages_remote()
+
+Basic description of FOLL_PIN
+=============================
+
+FOLL_PIN and FOLL_LONGTERM are flags that can be passed to the get_user_pages*()
+("gup") family of functions. FOLL_PIN has significant interactions and
+interdependencies with FOLL_LONGTERM, so both are covered here.
+
+FOLL_PIN is internal to gup, meaning that it should not appear at the gup call
+sites. This allows the associated wrapper functions (pin_user_pages*() and
+others) to set the correct combination of these flags, and to check for problems
+as well.
+
+FOLL_LONGTERM, on the other hand, *is* allowed to be set at the gup call sites.
+This is in order to avoid creating a large number of wrapper functions to cover
+all combinations of get*(), pin*(), FOLL_LONGTERM, and more. Also, the
+pin_user_pages*() APIs are clearly distinct from the get_user_pages*() APIs, so
+that's a natural dividing line, and a good point to make separate wrapper calls.
+In other words, use pin_user_pages*() for DMA-pinned pages, and
+get_user_pages*() for other cases. There are five cases described later on in
+this document, to further clarify that concept.
+
+FOLL_PIN and FOLL_GET are mutually exclusive for a given gup call. However,
+multiple threads and call sites are free to pin the same struct pages, via both
+FOLL_PIN and FOLL_GET. It's just the call site that needs to choose one or the
+other, not the struct page(s).
+
+The FOLL_PIN implementation is nearly the same as FOLL_GET, except that FOLL_PIN
+uses a different reference counting technique.
+
+FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying that is,
+FOLL_LONGTERM is a specific case, more restrictive case of FOLL_PIN.
+
+Which flags are set by each wrapper
+===================================
+
+For these pin_user_pages*() functions, FOLL_PIN is OR'd in with whatever gup
+flags the caller provides. The caller is required to pass in a non-null struct
+pages* array, and the function then pins pages by incrementing each by a special
+value: GUP_PIN_COUNTING_BIAS.
+
+For large folios, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
+the extra space available in the struct folio is used to store the
+pincount directly.
+
+This approach for large folios avoids the counting upper limit problems
+that are discussed below. Those limitations would have been aggravated
+severely by huge pages, because each tail page adds a refcount to the
+head page. And in fact, testing revealed that, without a separate pincount
+field, refcount overflows were seen in some huge page stress tests.
+
+This also means that huge pages and large folios do not suffer
+from the false positives problem that is mentioned below.::
+
+ Function
+ --------
+ pin_user_pages FOLL_PIN is always set internally by this function.
+ pin_user_pages_fast FOLL_PIN is always set internally by this function.
+ pin_user_pages_remote FOLL_PIN is always set internally by this function.
+
+For these get_user_pages*() functions, FOLL_GET might not even be specified.
+Behavior is a little more complex than above. If FOLL_GET was *not* specified,
+but the caller passed in a non-null struct pages* array, then the function
+sets FOLL_GET for you, and proceeds to pin pages by incrementing the refcount
+of each page by +1.::
+
+ Function
+ --------
+ get_user_pages FOLL_GET is sometimes set internally by this function.
+ get_user_pages_fast FOLL_GET is sometimes set internally by this function.
+ get_user_pages_remote FOLL_GET is sometimes set internally by this function.
+
+Tracking dma-pinned pages
+=========================
+
+Some of the key design constraints, and solutions, for tracking dma-pinned
+pages:
+
+* An actual reference count, per struct page, is required. This is because
+ multiple processes may pin and unpin a page.
+
+* False positives (reporting that a page is dma-pinned, when in fact it is not)
+ are acceptable, but false negatives are not.
+
+* struct page may not be increased in size for this, and all fields are already
+ used.
+
+* Given the above, we can overload the page->_refcount field by using, sort of,
+ the upper bits in that field for a dma-pinned count. "Sort of", means that,
+ rather than dividing page->_refcount into bit fields, we simple add a medium-
+ large value (GUP_PIN_COUNTING_BIAS, initially chosen to be 1024: 10 bits) to
+ page->_refcount. This provides fuzzy behavior: if a page has get_page() called
+ on it 1024 times, then it will appear to have a single dma-pinned count.
+ And again, that's acceptable.
+
+This also leads to limitations: there are only 31-10==21 bits available for a
+counter that increments 10 bits at a time.
+
+* Because of that limitation, special handling is applied to the zero pages
+ when using FOLL_PIN. We only pretend to pin a zero page - we don't alter its
+ refcount or pincount at all (it is permanent, so there's no need). The
+ unpinning functions also don't do anything to a zero page. This is
+ transparent to the caller.
+
+* Callers must specifically request "dma-pinned tracking of pages". In other
+ words, just calling get_user_pages() will not suffice; a new set of functions,
+ pin_user_page() and related, must be used.
+
+FOLL_PIN, FOLL_GET, FOLL_LONGTERM: when to use which flags
+==========================================================
+
+Thanks to Jan Kara, Vlastimil Babka and several other -mm people, for describing
+these categories:
+
+CASE 1: Direct IO (DIO)
+-----------------------
+There are GUP references to pages that are serving
+as DIO buffers. These buffers are needed for a relatively short time (so they
+are not "long term"). No special synchronization with folio_mkclean() or
+munmap() is provided. Therefore, flags to set at the call site are: ::
+
+ FOLL_PIN
+
+...but rather than setting FOLL_PIN directly, call sites should use one of
+the pin_user_pages*() routines that set FOLL_PIN.
+
+CASE 2: RDMA
+------------
+There are GUP references to pages that are serving as DMA
+buffers. These buffers are needed for a long time ("long term"). No special
+synchronization with folio_mkclean() or munmap() is provided. Therefore, flags
+to set at the call site are: ::
+
+ FOLL_PIN | FOLL_LONGTERM
+
+NOTE: Some pages, such as DAX pages, cannot be pinned with longterm pins. That's
+because DAX pages do not have a separate page cache, and so "pinning" implies
+locking down file system blocks, which is not (yet) supported in that way.
+
+.. _mmu-notifier-registration-case:
+
+CASE 3: MMU notifier registration, with or without page faulting hardware
+-------------------------------------------------------------------------
+Device drivers can pin pages via get_user_pages*(), and register for mmu
+notifier callbacks for the memory range. Then, upon receiving a notifier
+"invalidate range" callback , stop the device from using the range, and unpin
+the pages. There may be other possible schemes, such as for example explicitly
+synchronizing against pending IO, that accomplish approximately the same thing.
+
+Or, if the hardware supports replayable page faults, then the device driver can
+avoid pinning entirely (this is ideal), as follows: register for mmu notifier
+callbacks as above, but instead of stopping the device and unpinning in the
+callback, simply remove the range from the device's page tables.
+
+Either way, as long as the driver unpins the pages upon mmu notifier callback,
+then there is proper synchronization with both filesystem and mm
+(folio_mkclean(), munmap(), etc). Therefore, neither flag needs to be set.
+
+CASE 4: Pinning for struct page manipulation only
+-------------------------------------------------
+If only struct page data (as opposed to the actual memory contents that a page
+is tracking) is affected, then normal GUP calls are sufficient, and neither flag
+needs to be set.
+
+CASE 5: Pinning in order to write to the data within the page
+-------------------------------------------------------------
+Even though neither DMA nor Direct IO is involved, just a simple case of "pin,
+write to a page's data, unpin" can cause a problem. Case 5 may be considered a
+superset of Case 1, plus Case 2, plus anything that invokes that pattern. In
+other words, if the code is neither Case 1 nor Case 2, it may still require
+FOLL_PIN, for patterns like this:
+
+Correct (uses FOLL_PIN calls):
+ pin_user_pages()
+ write to the data within the pages
+ unpin_user_pages()
+
+INCORRECT (uses FOLL_GET calls):
+ get_user_pages()
+ write to the data within the pages
+ put_page()
+
+folio_maybe_dma_pinned(): the whole point of pinning
+====================================================
+
+The whole point of marking folios as "DMA-pinned" or "gup-pinned" is to be able
+to query, "is this folio DMA-pinned?" That allows code such as folio_mkclean()
+(and file system writeback code in general) to make informed decisions about
+what to do when a folio cannot be unmapped due to such pins.
+
+What to do in those cases is the subject of a years-long series of discussions
+and debates (see the References at the end of this document). It's a TODO item
+here: fill in the details once that's worked out. Meanwhile, it's safe to say
+that having this available: ::
+
+ static inline bool folio_maybe_dma_pinned(struct folio *folio)
+
+...is a prerequisite to solving the long-running gup+DMA problem.
+
+Another way of thinking about FOLL_GET, FOLL_PIN, and FOLL_LONGTERM
+===================================================================
+
+Another way of thinking about these flags is as a progression of restrictions:
+FOLL_GET is for struct page manipulation, without affecting the data that the
+struct page refers to. FOLL_PIN is a *replacement* for FOLL_GET, and is for
+short term pins on pages whose data *will* get accessed. As such, FOLL_PIN is
+a "more severe" form of pinning. And finally, FOLL_LONGTERM is an even more
+restrictive case that has FOLL_PIN as a prerequisite: this is for pages that
+will be pinned longterm, and whose data will be accessed.
+
+Unit testing
+============
+This file::
+
+ tools/testing/selftests/mm/gup_test.c
+
+has the following new calls to exercise the new pin*() wrapper functions:
+
+* PIN_FAST_BENCHMARK (./gup_test -a)
+* PIN_BASIC_TEST (./gup_test -b)
+
+You can monitor how many total dma-pinned pages have been acquired and released
+since the system was booted, via two new /proc/vmstat entries: ::
+
+ /proc/vmstat/nr_foll_pin_acquired
+ /proc/vmstat/nr_foll_pin_released
+
+Under normal conditions, these two values will be equal unless there are any
+long-term [R]DMA pins in place, or during pin/unpin transitions.
+
+* nr_foll_pin_acquired: This is the number of logical pins that have been
+ acquired since the system was powered on. For huge pages, the head page is
+ pinned once for each page (head page and each tail page) within the huge page.
+ This follows the same sort of behavior that get_user_pages() uses for huge
+ pages: the head page is refcounted once for each tail or head page in the huge
+ page, when get_user_pages() is applied to a huge page.
+
+* nr_foll_pin_released: The number of logical pins that have been released since
+ the system was powered on. Note that pages are released (unpinned) on a
+ PAGE_SIZE granularity, even if the original pin was applied to a huge page.
+ Becaused of the pin count behavior described above in "nr_foll_pin_acquired",
+ the accounting balances out, so that after doing this::
+
+ pin_user_pages(huge_page);
+ for (each page in huge_page)
+ unpin_user_page(page);
+
+...the following is expected::
+
+ nr_foll_pin_released == nr_foll_pin_acquired
+
+(...unless it was already out of balance due to a long-term RDMA pin being in
+place.)
+
+Other diagnostics
+=================
+
+dump_page() has been enhanced slightly to handle these new counting
+fields, and to better report on large folios in general. Specifically,
+for large folios, the exact pincount is reported.
+
+References
+==========
+
+* `Some slow progress on get_user_pages() (Apr 2, 2019) <https://lwn.net/Articles/784574/>`_
+* `DMA and get_user_pages() (LPC: Dec 12, 2018) <https://lwn.net/Articles/774411/>`_
+* `The trouble with get_user_pages() (Apr 30, 2018) <https://lwn.net/Articles/753027/>`_
+* `LWN kernel index: get_user_pages() <https://lwn.net/Kernel/Index/#Memory_management-get_user_pages>`_
+
+John Hubbard, October, 2019
diff --git a/Documentation/core-api/printk-basics.rst b/Documentation/core-api/printk-basics.rst
new file mode 100644
index 000000000000..2dde24ca7d9f
--- /dev/null
+++ b/Documentation/core-api/printk-basics.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Message logging with printk
+===========================
+
+printk() is one of the most widely known functions in the Linux kernel. It's the
+standard tool we have for printing messages and usually the most basic way of
+tracing and debugging. If you're familiar with printf(3) you can tell printk()
+is based on it, although it has some functional differences:
+
+ - printk() messages can specify a log level.
+
+ - the format string, while largely compatible with C99, doesn't follow the
+ exact same specification. It has some extensions and a few limitations
+ (no ``%n`` or floating point conversion specifiers). See :ref:`How to get
+ printk format specifiers right <printk-specifiers>`.
+
+All printk() messages are printed to the kernel log buffer, which is a ring
+buffer exported to userspace through /dev/kmsg. The usual way to read it is
+using ``dmesg``.
+
+printk() is typically used like this::
+
+ printk(KERN_INFO "Message: %s\n", arg);
+
+where ``KERN_INFO`` is the log level (note that it's concatenated to the format
+string, the log level is not a separate argument). The available log levels are:
+
++----------------+--------+-----------------------------------------------+
+| Name | String | Alias function |
++================+========+===============================================+
+| KERN_EMERG | "0" | pr_emerg() |
++----------------+--------+-----------------------------------------------+
+| KERN_ALERT | "1" | pr_alert() |
++----------------+--------+-----------------------------------------------+
+| KERN_CRIT | "2" | pr_crit() |
++----------------+--------+-----------------------------------------------+
+| KERN_ERR | "3" | pr_err() |
++----------------+--------+-----------------------------------------------+
+| KERN_WARNING | "4" | pr_warn() |
++----------------+--------+-----------------------------------------------+
+| KERN_NOTICE | "5" | pr_notice() |
++----------------+--------+-----------------------------------------------+
+| KERN_INFO | "6" | pr_info() |
++----------------+--------+-----------------------------------------------+
+| KERN_DEBUG | "7" | pr_debug() and pr_devel() if DEBUG is defined |
++----------------+--------+-----------------------------------------------+
+| KERN_DEFAULT | "" | |
++----------------+--------+-----------------------------------------------+
+| KERN_CONT | "c" | pr_cont() |
++----------------+--------+-----------------------------------------------+
+
+
+The log level specifies the importance of a message. The kernel decides whether
+to show the message immediately (printing it to the current console) depending
+on its log level and the current *console_loglevel* (a kernel variable). If the
+message priority is higher (lower log level value) than the *console_loglevel*
+the message will be printed to the console.
+
+If the log level is omitted, the message is printed with ``KERN_DEFAULT``
+level.
+
+You can check the current *console_loglevel* with::
+
+ $ cat /proc/sys/kernel/printk
+ 4 4 1 7
+
+The result shows the *current*, *default*, *minimum* and *boot-time-default* log
+levels.
+
+To change the current console_loglevel simply write the desired level to
+``/proc/sys/kernel/printk``. For example, to print all messages to the console::
+
+ # echo 8 > /proc/sys/kernel/printk
+
+Another way, using ``dmesg``::
+
+ # dmesg -n 5
+
+sets the console_loglevel to print KERN_WARNING (4) or more severe messages to
+console. See ``dmesg(1)`` for more information.
+
+As an alternative to printk() you can use the ``pr_*()`` aliases for
+logging. This family of macros embed the log level in the macro names. For
+example::
+
+ pr_info("Info message no. %d\n", msg_num);
+
+prints a ``KERN_INFO`` message.
+
+Besides being more concise than the equivalent printk() calls, they can use a
+common definition for the format string through the pr_fmt() macro. For
+instance, defining this at the top of a source file (before any ``#include``
+directive)::
+
+ #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+would prefix every pr_*() message in that file with the module and function name
+that originated the message.
+
+For debugging purposes there are also two conditionally-compiled macros:
+pr_debug() and pr_devel(), which are compiled-out unless ``DEBUG`` (or
+also ``CONFIG_DYNAMIC_DEBUG`` in the case of pr_debug()) is defined.
+
+
+Function reference
+==================
+
+.. kernel-doc:: include/linux/printk.h
+ :functions: printk pr_emerg pr_alert pr_crit pr_err pr_warn pr_notice pr_info
+ pr_fmt pr_debug pr_devel pr_cont
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index ff48b55040ef..7f2f11b48286 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -2,6 +2,8 @@
How to get printk format specifiers right
=========================================
+.. _printk-specifiers:
+
:Author: Randy Dunlap <rdunlap@infradead.org>
:Author: Andrew Murray <amurray@mpc-data.co.uk>
@@ -13,6 +15,11 @@ Integer types
If variable is of Type, use printk format specifier:
------------------------------------------------------------
+ signed char %d or %hhx
+ unsigned char %u or %x
+ char %u or %x
+ short int %d or %hx
+ unsigned short int %u or %x
int %d or %x
unsigned int %u or %x
long %ld or %lx
@@ -21,20 +28,23 @@ Integer types
unsigned long long %llu or %llx
size_t %zu or %zx
ssize_t %zd or %zx
+ s8 %d or %hhx
+ u8 %u or %x
+ s16 %d or %hx
+ u16 %u or %x
s32 %d or %x
u32 %u or %x
s64 %lld or %llx
u64 %llu or %llx
-If <type> is dependent on a config option for its size (e.g., sector_t,
-blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
-format specifier of its largest possible type and explicitly cast to it.
+If <type> is architecture-dependent for its size (e.g., cycles_t, tcflag_t) or
+is dependent on a config option for its size (e.g., blk_status_t), use a format
+specifier of its largest possible type and explicitly cast to it.
Example::
- printk("test: sector number/total blocks: %llu/%llu\n",
- (unsigned long long)sector, (unsigned long long)blockcount);
+ printk("test: latency: %llu cycles\n", (unsigned long long)time);
Reminder: sizeof() returns type size_t.
@@ -50,6 +60,14 @@ A raw pointer value may be printed with %p which will hash the address
before printing. The kernel also supports extended specifiers for printing
pointers of different types.
+Some of the extended specifiers print the data on the given address instead
+of printing the address itself. In this case, the following error messages
+might be printed instead of the unreachable information::
+
+ (null) data on plain NULL address
+ (efault) data on invalid address
+ (einval) invalid data on a valid address
+
Plain Pointers
--------------
@@ -61,7 +79,31 @@ Pointers printed without a specifier extension (i.e unadorned %p) are
hashed to prevent leaking information about the kernel memory layout. This
has the added benefit of providing a unique identifier. On 64-bit machines
the first 32 bits are zeroed. The kernel will print ``(ptrval)`` until it
-gathers enough entropy. If you *really* want the address see %px below.
+gathers enough entropy.
+
+When possible, use specialised modifiers such as %pS or %pB (described below)
+to avoid the need of providing an unhashed address that has to be interpreted
+post-hoc. If not possible, and the aim of printing the address is to provide
+more information for debugging, use %p and boot the kernel with the
+``no_hash_pointers`` parameter during debugging, which will print all %p
+addresses unmodified. If you *really* always want the unmodified address, see
+%px below.
+
+If (and only if) you are printing addresses as a content of a virtual file in
+e.g. procfs or sysfs (using e.g. seq_printf(), not printk()) read by a
+userspace process, use the %pK modifier described below instead of %p or %px.
+
+Error Pointers
+--------------
+
+::
+
+ %pe -ENOSPC
+
+For printing error pointers (i.e. a pointer for which IS_ERR() is true)
+as a symbolic error name. Error values for which no symbolic name is
+known are printed in decimal, while a non-ERR_PTR passed as the
+argument to %pe gets treated as ordinary %p.
Symbols/Function Pointers
-------------------------
@@ -70,8 +112,6 @@ Symbols/Function Pointers
%pS versatile_init+0x0/0x110
%ps versatile_init
- %pF versatile_init+0x0/0x110
- %pf versatile_init
%pSR versatile_init+0x9/0x110
(with __builtin_extract_return_addr() translation)
%pB prev_fn_of_versatile_init+0x88/0x88
@@ -81,19 +121,37 @@ The ``S`` and ``s`` specifiers are used for printing a pointer in symbolic
format. They result in the symbol name with (S) or without (s)
offsets. If KALLSYMS are disabled then the symbol address is printed instead.
-Note, that the ``F`` and ``f`` specifiers are identical to ``S`` (``s``)
-and thus deprecated. We have ``F`` and ``f`` because on ia64, ppc64 and
-parisc64 function pointers are indirect and, in fact, are function
-descriptors, which require additional dereferencing before we can lookup
-the symbol. As of now, ``S`` and ``s`` perform dereferencing on those
-platforms (when needed), so ``F`` and ``f`` exist for compatibility
-reasons only.
-
The ``B`` specifier results in the symbol name with offsets and should be
used when printing stack backtraces. The specifier takes into
consideration the effect of compiler optimisations which may occur
when tail-calls are used and marked with the noreturn GCC attribute.
+If the pointer is within a module, the module name and optionally build ID is
+printed after the symbol name with an extra ``b`` appended to the end of the
+specifier.
+
+::
+
+ %pS versatile_init+0x0/0x110 [module_name]
+ %pSb versatile_init+0x0/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+ %pSRb versatile_init+0x9/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+ (with __builtin_extract_return_addr() translation)
+ %pBb prev_fn_of_versatile_init+0x88/0x88 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+
+Probed Pointers from BPF / tracing
+----------------------------------
+
+::
+
+ %pks kernel string
+ %pus user string
+
+The ``k`` and ``u`` specifiers are used for printing prior probed memory from
+either kernel memory (k) or user memory (u). The subsequent ``s`` specifier
+results in printing a string. For direct use in regular vsnprintf() the (k)
+and (u) annotation is ignored, however, when used out of BPF's bpf_trace_printk(),
+for example, it reads the memory it is pointing to without faulting.
+
Kernel Pointers
---------------
@@ -103,7 +161,12 @@ Kernel Pointers
For printing kernel pointers which should be hidden from unprivileged
users. The behaviour of %pK depends on the kptr_restrict sysctl - see
-Documentation/sysctl/kernel.txt for more details.
+Documentation/admin-guide/sysctl/kernel.rst for more details.
+
+This modifier is *only* intended when producing content of a file read by
+userspace from e.g. procfs or sysfs, not for dmesg. Please refer to the
+section about %p above for discussion about how to manage hashing pointers
+in printk().
Unmodified Addresses
--------------------
@@ -119,18 +182,44 @@ equivalent to %lx (or %lu). %px is preferred because it is more uniquely
grep'able. If in the future we need to modify the way the kernel handles
printing pointers we will be better equipped to find the call sites.
+Before using %px, consider if using %p is sufficient together with enabling the
+``no_hash_pointers`` kernel parameter during debugging sessions (see the %p
+description above). One valid scenario for %px might be printing information
+immediately before a panic, which prevents any sensitive information to be
+exploited anyway, and with %px there would be no need to reproduce the panic
+with no_hash_pointers.
+
+Pointer Differences
+-------------------
+
+::
+
+ %td 2560
+ %tx a00
+
+For printing the pointer differences, use the %t modifier for ptrdiff_t.
+
+Example::
+
+ printk("test: difference between pointers: %td\n", ptr2 - ptr1);
+
Struct Resources
----------------
::
%pr [mem 0x60000000-0x6fffffff flags 0x2200] or
+ [mem 0x60000000 flags 0x2200] or
[mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
+ [mem 0x0000000060000000 flags 0x2200]
%pR [mem 0x60000000-0x6fffffff pref] or
+ [mem 0x60000000 pref] or
[mem 0x0000000060000000-0x000000006fffffff pref]
+ [mem 0x0000000060000000 pref]
For printing struct resources. The ``R`` and ``r`` specifiers result in a
-printed resource with (R) or without (r) a decoded flags member.
+printed resource with (R) or without (r) a decoded flags member. If start is
+equal to end only print the start value.
Passed by reference.
@@ -147,6 +236,19 @@ width of the CPU data path.
Passed by reference.
+Struct Range
+------------
+
+::
+
+ %pra [range 0x0000000060000000-0x000000006fffffff] or
+ [range 0x0000000060000000]
+
+For printing struct range. struct range holds an arbitrary range of u64
+values. If start is equal to end only print the start value.
+
+Passed by reference.
+
DMA address types dma_addr_t
----------------------------
@@ -269,7 +371,7 @@ colon-separators. Leading zeros are always used.
The additional ``c`` specifier can be used with the ``I`` specifier to
print a compressed IPv6 address as described by
-http://tools.ietf.org/html/rfc5952
+https://tools.ietf.org/html/rfc5952
Passed by reference.
@@ -293,7 +395,7 @@ The additional ``p``, ``f``, and ``s`` specifiers are used to specify port
flowinfo a ``/`` and scope a ``%``, each followed by the actual value.
In case of an IPv6 address the compressed IPv6 address as described by
-http://tools.ietf.org/html/rfc5952 is being used if the additional
+https://tools.ietf.org/html/rfc5952 is being used if the additional
specifier ``c`` is given. The IPv6 address is surrounded by ``[``, ``]`` in
case of additional specifiers ``p``, ``f`` or ``s`` as suggested by
https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07
@@ -412,15 +514,65 @@ Examples::
Passed by reference.
+Fwnode handles
+--------------
+
+::
+
+ %pfw[fP]
+
+For printing information on an fwnode_handle. The default is to print the full
+node name, including the path. The modifiers are functionally equivalent to
+%pOF above.
+
+ - f - full name of the node, including the path
+ - P - the name of the node including an address (if there is one)
+
+Examples (ACPI)::
+
+ %pfwf \_SB.PCI0.CIO2.port@1.endpoint@0 - Full node name
+ %pfwP endpoint@0 - Node name
+
+Examples (OF)::
+
+ %pfwf /ocp@68000000/i2c@48072000/camera@10/port/endpoint - Full name
+ %pfwP endpoint - Node name
+
+Time and date
+-------------
+
+::
+
+ %pt[RT] YYYY-mm-ddTHH:MM:SS
+ %pt[RT]s YYYY-mm-dd HH:MM:SS
+ %pt[RT]d YYYY-mm-dd
+ %pt[RT]t HH:MM:SS
+ %pt[RT][dt][r][s]
+
+For printing date and time as represented by::
+
+ R struct rtc_time structure
+ T time64_t type
+
+in human readable format.
+
+By default year will be incremented by 1900 and month by 1.
+Use %pt[RT]r (raw) to suppress this behaviour.
+
+The %pt[RT]s (space) will override ISO 8601 separator by using ' ' (space)
+instead of 'T' (Capital T) between date and time. It won't have any effect
+when date or time is omitted.
+
+Passed by reference.
+
struct clk
----------
::
%pC pll1
- %pCn pll1
-For printing struct clk structures. %pC and %pCn print the name of the clock
+For printing struct clk structures. %pC prints the name of the clock
(Common Clock Framework) or a unique 32-bit ID (legacy clock framework).
Passed by reference.
@@ -437,22 +589,28 @@ For printing bitmap and its derivatives such as cpumask and nodemask,
%*pb outputs the bitmap with field width as the number of bits and %*pbl
output the bitmap as range list with field width as the number of bits.
-Passed by reference.
+The field width is passed by value, the bitmap is passed by reference.
+Helper macros cpumask_pr_args() and nodemask_pr_args() are available to ease
+printing cpumask and nodemask.
-Flags bitfields such as page flags, gfp_flags
----------------------------------------------
+Flags bitfields such as page flags and gfp_flags
+--------------------------------------------------------
::
- %pGp referenced|uptodate|lru|active|private
+ %pGp 0x17ffffc0002036(referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff)
%pGg GFP_USER|GFP_DMA32|GFP_NOWARN
%pGv read|exec|mayread|maywrite|mayexec|denywrite
For printing flags bitfields as a collection of symbolic constants that
would construct the value. The type of flags is given by the third
-character. Currently supported are [p]age flags, [v]ma_flags (both
-expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag
-names and print order depends on the particular type.
+character. Currently supported are:
+
+ - p - [p]age flags, expects value of type (``unsigned long *``)
+ - v - [v]ma_flags, expects value of type (``unsigned long *``)
+ - g - [g]fp_flags, expects value of type (``gfp_t *``)
+
+The flag names and print order depends on the particular type.
Note that this format should not be used directly in the
:c:func:`TP_printk()` part of a tracepoint. Instead, use the show_*_flags()
@@ -471,10 +629,70 @@ For printing netdev_features_t.
Passed by reference.
+V4L2 and DRM FourCC code (pixel format)
+---------------------------------------
+
+::
+
+ %p4cc
+
+Print a FourCC code used by V4L2 or DRM, including format endianness and
+its numerical value as hexadecimal.
+
+Passed by reference.
+
+Examples::
+
+ %p4cc BG12 little-endian (0x32314742)
+ %p4cc Y10 little-endian (0x20303159)
+ %p4cc NV12 big-endian (0xb231564e)
+
+Generic FourCC code
+-------------------
+
+::
+ %p4c[h[R]lb] gP00 (0x67503030)
+
+Print a generic FourCC code, as both ASCII characters and its numerical
+value as hexadecimal.
+
+The generic FourCC code is always printed in the big-endian format,
+the most significant byte first. This is the opposite of V4L/DRM FourCCs.
+
+The additional ``h``, ``hR``, ``l``, and ``b`` specifiers define what
+endianness is used to load the stored bytes. The data might be interpreted
+using the host, reversed host byte order, little-endian, or big-endian.
+
+Passed by reference.
+
+Examples for a little-endian machine, given &(u32)0x67503030::
+
+ %p4ch gP00 (0x67503030)
+ %p4chR 00Pg (0x30305067)
+ %p4cl gP00 (0x67503030)
+ %p4cb 00Pg (0x30305067)
+
+Examples for a big-endian machine, given &(u32)0x67503030::
+
+ %p4ch gP00 (0x67503030)
+ %p4chR 00Pg (0x30305067)
+ %p4cl 00Pg (0x30305067)
+ %p4cb gP00 (0x67503030)
+
+Rust
+----
+
+::
+
+ %pA
+
+Only intended to be used from Rust code to format ``core::fmt::Arguments``.
+Do *not* use it from C.
+
Thanks
======
-If you add other %p extensions, please extend <lib/test_printf.c> with
-one or more test cases, if at all feasible.
+If you add other %p extensions, please extend <lib/tests/printf_kunit.c>
+with one or more test cases, if at all feasible.
Thank you for your cooperation and attention.
diff --git a/Documentation/core-api/printk-index.rst b/Documentation/core-api/printk-index.rst
new file mode 100644
index 000000000000..1979c5dd32fe
--- /dev/null
+++ b/Documentation/core-api/printk-index.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Printk Index
+============
+
+There are many ways to monitor the state of the system. One important
+source of information is the system log. It provides a lot of information,
+including more or less important warnings and error messages.
+
+There are monitoring tools that filter and take action based on messages
+logged.
+
+The kernel messages are evolving together with the code. As a result,
+particular kernel messages are not KABI and never will be!
+
+It is a huge challenge for maintaining the system log monitors. It requires
+knowing what messages were updated in a particular kernel version and why.
+Finding these changes in the sources would require non-trivial parsers.
+Also it would require matching the sources with the binary kernel which
+is not always trivial. Various changes might be backported. Various kernel
+versions might be used on different monitored systems.
+
+This is where the printk index feature might become useful. It provides
+a dump of printk formats used all over the source code used for the kernel
+and modules on the running system. It is accessible at runtime via debugfs.
+
+The printk index helps to find changes in the message formats. Also it helps
+to track the strings back to the kernel sources and the related commit.
+
+
+User Interface
+==============
+
+The index of printk formats are split in into separate files. The files are
+named according to the binaries where the printk formats are built-in. There
+is always "vmlinux" and optionally also modules, for example::
+
+ /sys/kernel/debug/printk/index/vmlinux
+ /sys/kernel/debug/printk/index/ext4
+ /sys/kernel/debug/printk/index/scsi_mod
+
+Note that only loaded modules are shown. Also printk formats from a module
+might appear in "vmlinux" when the module is built-in.
+
+The content is inspired by the dynamic debug interface and looks like::
+
+ $> head -1 /sys/kernel/debug/printk/index/vmlinux; shuf -n 5 vmlinux
+ # <level[,flags]> filename:line function "format"
+ <5> block/blk-settings.c:661 disk_stack_limits "%s: Warning: Device %s is misaligned\n"
+ <4> kernel/trace/trace.c:8296 trace_create_file "Could not create tracefs '%s' entry\n"
+ <6> arch/x86/kernel/hpet.c:144 _hpet_print_config "hpet: %s(%d):\n"
+ <6> init/do_mounts.c:605 prepare_namespace "Waiting for root device %s...\n"
+ <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+, where the meaning is:
+
+ - :level: log level value: 0-7 for particular severity, -1 as default,
+ 'c' as continuous line without an explicit log level
+ - :flags: optional flags: currently only 'c' for KERN_CONT
+ - :filename\:line: source filename and line number of the related
+ printk() call. Note that there are many wrappers, for example,
+ pr_warn(), pr_warn_once(), dev_warn().
+ - :function: function name where the printk() call is used.
+ - :format: format string
+
+The extra information makes it a bit harder to find differences
+between various kernels. Especially the line number might change
+very often. On the other hand, it helps a lot to confirm that
+it is the same string or find the commit that is responsible
+for eventual changes.
+
+
+printk() Is Not a Stable KABI
+=============================
+
+Several developers are afraid that exporting all these implementation
+details into the user space will transform particular printk() calls
+into KABI.
+
+But it is exactly the opposite. printk() calls must _not_ be KABI.
+And the printk index helps user space tools to deal with this.
+
+
+Subsystem specific printk wrappers
+==================================
+
+The printk index is generated using extra metadata that are stored in
+a dedicated .elf section ".printk_index". It is achieved using macro
+wrappers doing __printk_index_emit() together with the real printk()
+call. The same technique is used also for the metadata used by
+the dynamic debug feature.
+
+The metadata are stored for a particular message only when it is printed
+using these special wrappers. It is implemented for the commonly
+used printk() calls, including, for example, pr_warn(), or pr_once().
+
+Additional changes are necessary for various subsystem specific wrappers
+that call the original printk() via a common helper function. These needs
+their own wrappers adding __printk_index_emit().
+
+Only few subsystem specific wrappers have been updated so far,
+for example, dev_printk(). As a result, the printk formats from
+some subsystems can be missing in the printk index.
+
+
+Subsystem specific prefix
+=========================
+
+The macro pr_fmt() macro allows to define a prefix that is printed
+before the string generated by the related printk() calls.
+
+Subsystem specific wrappers usually add even more complicated
+prefixes.
+
+These prefixes can be stored into the printk index metadata
+by an optional parameter of __printk_index_emit(). The debugfs
+interface might then show the printk formats including these prefixes.
+For example, drivers/acpi/osl.c contains::
+
+ #define pr_fmt(fmt) "ACPI: OSL: " fmt
+
+ static int __init acpi_no_auto_serialize_setup(char *str)
+ {
+ acpi_gbl_auto_serialize_methods = FALSE;
+ pr_info("Auto-serialization disabled\n");
+
+ return 1;
+ }
+
+This results in the following printk index entry::
+
+ <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+It helps matching messages from the real log with printk index.
+Then the source file name, line number, and function name can
+be used to match the string with the source code.
diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst
new file mode 100644
index 000000000000..7eb7c6023e09
--- /dev/null
+++ b/Documentation/core-api/protection-keys.rst
@@ -0,0 +1,120 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Memory Protection Keys
+======================
+
+Memory Protection Keys provide a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables when an
+application changes protection domains.
+
+Pkeys Userspace (PKU) is a feature which can be found on:
+ * Intel server CPUs, Skylake and later
+ * Intel client CPUs, Tiger Lake (11th Gen Core) and later
+ * Future AMD CPUs
+ * arm64 CPUs implementing the Permission Overlay Extension (FEAT_S1POE)
+
+x86_64
+======
+Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
+a "protection key", giving 16 possible keys.
+
+Protections for each key are defined with a per-CPU user-accessible register
+(PKRU). Each of these is a 32-bit register storing two bits (Access Disable
+and Write Disable) for each of 16 keys.
+
+Being a CPU register, PKRU is inherently thread-local, potentially giving each
+thread a different set of protections from every other thread.
+
+There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
+register. The feature is only available in 64-bit mode, even though there is
+theoretically space in the PAE PTEs. These permissions are enforced on data
+access only and have no effect on instruction fetches.
+
+arm64
+=====
+
+Pkeys use 3 bits in each page table entry, to encode a "protection key index",
+giving 8 possible keys.
+
+Protections for each key are defined with a per-CPU user-writable system
+register (POR_EL0). This is a 64-bit register encoding read, write and execute
+overlay permissions for each protection key index.
+
+Being a CPU register, POR_EL0 is inherently thread-local, potentially giving
+each thread a different set of protections from every other thread.
+
+Unlike x86_64, the protection key permissions also apply to instruction
+fetches.
+
+Syscalls
+========
+
+There are 3 system calls which directly interact with pkeys::
+
+ int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+ int pkey_free(int pkey);
+ int pkey_mprotect(unsigned long start, size_t len,
+ unsigned long prot, int pkey);
+
+Before a pkey can be used, it must first be allocated with pkey_alloc(). An
+application writes to the architecture specific CPU register directly in order
+to change access permissions to memory covered with a key. In this example
+this is wrapped by a C function called pkey_set().
+::
+
+ int real_prot = PROT_READ|PROT_WRITE;
+ pkey = pkey_alloc(0, PKEY_DISABLE_WRITE);
+ ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey);
+ ... application runs here
+
+Now, if the application needs to update the data at 'ptr', it can
+gain access, do the update, then remove its write access::
+
+ pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE
+ *ptr = foo; // assign something
+ pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again
+
+Now when it frees the memory, it will also free the pkey since it
+is no longer in use::
+
+ munmap(ptr, PAGE_SIZE);
+ pkey_free(pkey);
+
+.. note:: pkey_set() is a wrapper around writing to the CPU register.
+ Example implementations can be found in
+ tools/testing/selftests/mm/pkey-{arm64,powerpc,x86}.h
+
+Behavior
+========
+
+The kernel attempts to make protection keys consistent with the
+behavior of a plain mprotect(). For instance if you do this::
+
+ mprotect(ptr, size, PROT_NONE);
+ something(ptr);
+
+you can expect the same effects with protection keys when doing this::
+
+ pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
+ pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
+ something(ptr);
+
+That should be true whether something() is a direct access to 'ptr'
+like::
+
+ *ptr = foo;
+
+or when the kernel does the access on the application's behalf like
+with a read()::
+
+ read(fd, ptr, 1);
+
+The kernel will send a SIGSEGV in both cases, but si_code will be set
+to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
+the plain mprotect() permissions are violated.
+
+Note that kernel accesses from a kthread (such as io_uring) will use a default
+value for the protection key register and so will not be consistent with
+userspace's value of the register or mprotect().
diff --git a/Documentation/core-api/rbtree.rst b/Documentation/core-api/rbtree.rst
new file mode 100644
index 000000000000..ed1a9fbc779e
--- /dev/null
+++ b/Documentation/core-api/rbtree.rst
@@ -0,0 +1,429 @@
+=================================
+Red-black Trees (rbtree) in Linux
+=================================
+
+
+:Date: January 18, 2007
+:Author: Rob Landley <rob@landley.net>
+
+What are red-black trees, and what are they for?
+------------------------------------------------
+
+Red-black trees are a type of self-balancing binary search tree, used for
+storing sortable key/value data pairs. This differs from radix trees (which
+are used to efficiently store sparse arrays and thus use long integer indexes
+to insert/access/delete nodes) and hash tables (which are not kept sorted to
+be easily traversed in order, and must be tuned for a specific size and
+hash function where rbtrees scale gracefully storing arbitrary keys).
+
+Red-black trees are similar to AVL trees, but provide faster real-time bounded
+worst case performance for insertion and deletion (at most two rotations and
+three rotations, respectively, to balance the tree), with slightly slower
+(but still O(log n)) lookup time.
+
+To quote Linux Weekly News:
+
+ There are a number of red-black trees in use in the kernel.
+ The deadline and CFQ I/O schedulers employ rbtrees to
+ track requests; the packet CD/DVD driver does the same.
+ The high-resolution timer code uses an rbtree to organize outstanding
+ timer requests. The ext3 filesystem tracks directory entries in a
+ red-black tree. Virtual memory areas (VMAs) are tracked with red-black
+ trees, as are epoll file descriptors, cryptographic keys, and network
+ packets in the "hierarchical token bucket" scheduler.
+
+This document covers use of the Linux rbtree implementation. For more
+information on the nature and implementation of Red Black Trees, see:
+
+ Linux Weekly News article on red-black trees
+ https://lwn.net/Articles/184495/
+
+ Wikipedia entry on red-black trees
+ https://en.wikipedia.org/wiki/Red-black_tree
+
+Linux implementation of red-black trees
+---------------------------------------
+
+Linux's rbtree implementation lives in the file "lib/rbtree.c". To use it,
+"#include <linux/rbtree.h>".
+
+The Linux rbtree implementation is optimized for speed, and thus has one
+less layer of indirection (and better cache locality) than more traditional
+tree implementations. Instead of using pointers to separate rb_node and data
+structures, each instance of struct rb_node is embedded in the data structure
+it organizes. And instead of using a comparison callback function pointer,
+users are expected to write their own tree search and insert functions
+which call the provided rbtree functions. Locking is also left up to the
+user of the rbtree code.
+
+Creating a new rbtree
+---------------------
+
+Data nodes in an rbtree tree are structures containing a struct rb_node member::
+
+ struct mytype {
+ struct rb_node node;
+ char *keystring;
+ };
+
+When dealing with a pointer to the embedded struct rb_node, the containing data
+structure may be accessed with the standard container_of() macro. In addition,
+individual members may be accessed directly via rb_entry(node, type, member).
+
+At the root of each rbtree is an rb_root structure, which is initialized to be
+empty via:
+
+ struct rb_root mytree = RB_ROOT;
+
+Searching for a value in an rbtree
+----------------------------------
+
+Writing a search function for your tree is fairly straightforward: start at the
+root, compare each value, and follow the left or right branch as necessary.
+
+Example::
+
+ struct mytype *my_search(struct rb_root *root, char *string)
+ {
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct mytype *data = container_of(node, struct mytype, node);
+ int result;
+
+ result = strcmp(string, data->keystring);
+
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+ }
+
+Inserting data into an rbtree
+-----------------------------
+
+Inserting data in the tree involves first searching for the place to insert the
+new node, then inserting the node and rebalancing ("recoloring") the tree.
+
+The search for insertion differs from the previous search by finding the
+location of the pointer on which to graft the new node. The new node also
+needs a link to its parent node for rebalancing purposes.
+
+Example::
+
+ int my_insert(struct rb_root *root, struct mytype *data)
+ {
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct mytype *this = container_of(*new, struct mytype, node);
+ int result = strcmp(data->keystring, this->keystring);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return FALSE;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ return TRUE;
+ }
+
+Removing or replacing existing data in an rbtree
+------------------------------------------------
+
+To remove an existing node from a tree, call::
+
+ void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+Example::
+
+ struct mytype *data = mysearch(&mytree, "walrus");
+
+ if (data) {
+ rb_erase(&data->node, &mytree);
+ myfree(data);
+ }
+
+To replace an existing node in a tree with a new one with the same key, call::
+
+ void rb_replace_node(struct rb_node *old, struct rb_node *new,
+ struct rb_root *tree);
+
+Replacing a node this way does not re-sort the tree: If the new node doesn't
+have the same key as the old node, the rbtree will probably become corrupted.
+
+Iterating through the elements stored in an rbtree (in sort order)
+------------------------------------------------------------------
+
+Four functions are provided for iterating through an rbtree's contents in
+sorted order. These work on arbitrary trees, and should not need to be
+modified or wrapped (except for locking purposes)::
+
+ struct rb_node *rb_first(struct rb_root *tree);
+ struct rb_node *rb_last(struct rb_root *tree);
+ struct rb_node *rb_next(struct rb_node *node);
+ struct rb_node *rb_prev(struct rb_node *node);
+
+To start iterating, call rb_first() or rb_last() with a pointer to the root
+of the tree, which will return a pointer to the node structure contained in
+the first or last element in the tree. To continue, fetch the next or previous
+node by calling rb_next() or rb_prev() on the current node. This will return
+NULL when there are no more nodes left.
+
+The iterator functions return a pointer to the embedded struct rb_node, from
+which the containing data structure may be accessed with the container_of()
+macro, and individual members may be accessed directly via
+rb_entry(node, type, member).
+
+Example::
+
+ struct rb_node *node;
+ for (node = rb_first(&mytree); node; node = rb_next(node))
+ printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+Cached rbtrees
+--------------
+
+Computing the leftmost (smallest) node is quite a common task for binary
+search trees, such as for traversals or users relying on a the particular
+order for their own logic. To this end, users can use 'struct rb_root_cached'
+to optimize O(logN) rb_first() calls to a simple pointer fetch avoiding
+potentially expensive tree iterations. This is done at negligible runtime
+overhead for maintenance; albeit larger memory footprint.
+
+Similar to the rb_root structure, cached rbtrees are initialized to be
+empty via::
+
+ struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+Cached rbtree is simply a regular rb_root with an extra pointer to cache the
+leftmost node. This allows rb_root_cached to exist wherever rb_root does,
+which permits augmented trees to be supported as well as only a few extra
+interfaces::
+
+ struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+ void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+ void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+Both insert and erase calls have their respective counterpart of augmented
+trees::
+
+ void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+ bool, struct rb_augment_callbacks *);
+ void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+ struct rb_augment_callbacks *);
+
+
+Support for Augmented rbtrees
+-----------------------------
+
+Augmented rbtree is an rbtree with "some" additional data stored in
+each node, where the additional data for node N must be a function of
+the contents of all nodes in the subtree rooted at N. This data can
+be used to augment some new functionality to rbtree. Augmented rbtree
+is an optional feature built on top of basic rbtree infrastructure.
+An rbtree user who wants this feature will have to call the augmentation
+functions with the user provided augmentation callback when inserting
+and erasing nodes.
+
+C files implementing augmented rbtree manipulation must include
+<linux/rbtree_augmented.h> instead of <linux/rbtree.h>. Note that
+linux/rbtree_augmented.h exposes some rbtree implementations details
+you are not expected to rely on; please stick to the documented APIs
+there and do not include <linux/rbtree_augmented.h> from header files
+either so as to minimize chances of your users accidentally relying on
+such implementation details.
+
+On insertion, the user must update the augmented information on the path
+leading to the inserted node, then call rb_link_node() as usual and
+rb_augment_inserted() instead of the usual rb_insert_color() call.
+If rb_augment_inserted() rebalances the rbtree, it will callback into
+a user provided function to update the augmented information on the
+affected subtrees.
+
+When erasing a node, the user must call rb_erase_augmented() instead of
+rb_erase(). rb_erase_augmented() calls back into user provided functions
+to updated the augmented information on affected subtrees.
+
+In both cases, the callbacks are provided through struct rb_augment_callbacks.
+3 callbacks must be defined:
+
+- A propagation callback, which updates the augmented value for a given
+ node and its ancestors, up to a given stop point (or NULL to update
+ all the way to the root).
+
+- A copy callback, which copies the augmented value for a given subtree
+ to a newly assigned subtree root.
+
+- A tree rotation callback, which copies the augmented value for a given
+ subtree to a newly assigned subtree root AND recomputes the augmented
+ information for the former subtree root.
+
+The compiled code for rb_erase_augmented() may inline the propagation and
+copy callbacks, which results in a large function, so each augmented rbtree
+user should have a single rb_erase_augmented() call site in order to limit
+compiled code size.
+
+
+Sample usage
+^^^^^^^^^^^^
+
+Interval tree is an example of augmented rb tree. Reference -
+"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
+More details about interval trees:
+
+Classical rbtree has a single key and it cannot be directly used to store
+interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
+lo:hi or to find whether there is an exact match for a new lo:hi.
+
+However, rbtree can be augmented to store such interval ranges in a structured
+way making it possible to do efficient lookup and exact match.
+
+This "extra information" stored in each node is the maximum hi
+(max_hi) value among all the nodes that are its descendants. This
+information can be maintained at each node just be looking at the node
+and its immediate children. And this will be used in O(log n) lookup
+for lowest match (lowest start address among all possible matches)
+with something like::
+
+ struct interval_tree_node *
+ interval_tree_first_match(struct rb_root *root,
+ unsigned long start, unsigned long last)
+ {
+ struct interval_tree_node *node;
+
+ if (!root->rb_node)
+ return NULL;
+ node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+ while (true) {
+ if (node->rb.rb_left) {
+ struct interval_tree_node *left =
+ rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb);
+ if (left->__subtree_last >= start) {
+ /*
+ * Some nodes in left subtree satisfy Cond2.
+ * Iterate to find the leftmost such node N.
+ * If it also satisfies Cond1, that's the match
+ * we are looking for. Otherwise, there is no
+ * matching interval as nodes to the right of N
+ * can't satisfy Cond1 either.
+ */
+ node = left;
+ continue;
+ }
+ }
+ if (node->start <= last) { /* Cond1 */
+ if (node->last >= start) /* Cond2 */
+ return node; /* node is leftmost match */
+ if (node->rb.rb_right) {
+ node = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb);
+ if (node->__subtree_last >= start)
+ continue;
+ }
+ }
+ return NULL; /* No match */
+ }
+ }
+
+Insertion/removal are defined using the following augmented callbacks::
+
+ static inline unsigned long
+ compute_subtree_last(struct interval_tree_node *node)
+ {
+ unsigned long max = node->last, subtree_last;
+ if (node->rb.rb_left) {
+ subtree_last = rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ if (node->rb.rb_right) {
+ subtree_last = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ return max;
+ }
+
+ static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+ {
+ while (rb != stop) {
+ struct interval_tree_node *node =
+ rb_entry(rb, struct interval_tree_node, rb);
+ unsigned long subtree_last = compute_subtree_last(node);
+ if (node->__subtree_last == subtree_last)
+ break;
+ node->__subtree_last = subtree_last;
+ rb = rb_parent(&node->rb);
+ }
+ }
+
+ static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+ {
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+ }
+
+ static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+ {
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+ old->__subtree_last = compute_subtree_last(old);
+ }
+
+ static const struct rb_augment_callbacks augment_callbacks = {
+ augment_propagate, augment_copy, augment_rotate
+ };
+
+ void interval_tree_insert(struct interval_tree_node *node,
+ struct rb_root *root)
+ {
+ struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+ unsigned long start = node->start, last = node->last;
+ struct interval_tree_node *parent;
+
+ while (*link) {
+ rb_parent = *link;
+ parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+ if (parent->__subtree_last < last)
+ parent->__subtree_last = last;
+ if (start < parent->start)
+ link = &parent->rb.rb_left;
+ else
+ link = &parent->rb.rb_right;
+ }
+
+ node->__subtree_last = last;
+ rb_link_node(&node->rb, rb_parent, link);
+ rb_insert_augmented(&node->rb, root, &augment_callbacks);
+ }
+
+ void interval_tree_remove(struct interval_tree_node *node,
+ struct rb_root *root)
+ {
+ rb_erase_augmented(&node->rb, root, &augment_callbacks);
+ }
diff --git a/Documentation/core-api/real-time/architecture-porting.rst b/Documentation/core-api/real-time/architecture-porting.rst
new file mode 100644
index 000000000000..d822fac29922
--- /dev/null
+++ b/Documentation/core-api/real-time/architecture-porting.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================
+Porting an architecture to support PREEMPT_RT
+=============================================
+
+:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+This list outlines the architecture specific requirements that must be
+implemented in order to enable PREEMPT_RT. Once all required features are
+implemented, ARCH_SUPPORTS_RT can be selected in architecture’s Kconfig to make
+PREEMPT_RT selectable.
+Many prerequisites (genirq support for example) are enforced by the common code
+and are omitted here.
+
+The optional features are not strictly required but it is worth to consider
+them.
+
+Requirements
+------------
+
+Forced threaded interrupts
+ CONFIG_IRQ_FORCED_THREADING must be selected. Any interrupts that must
+ remain in hard-IRQ context must be marked with IRQF_NO_THREAD. This
+ requirement applies for instance to clocksource event interrupts,
+ perf interrupts and cascading interrupt-controller handlers.
+
+PREEMPTION support
+ Kernel preemption must be supported and requires that
+ CONFIG_ARCH_NO_PREEMPT remain unselected. Scheduling requests, such as those
+ issued from an interrupt or other exception handler, must be processed
+ immediately.
+
+POSIX CPU timers and KVM
+ POSIX CPU timers must expire from thread context rather than directly within
+ the timer interrupt. This behavior is enabled by setting the configuration
+ option CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK.
+ When KVM is enabled, CONFIG_KVM_XFER_TO_GUEST_WORK must also be set to ensure
+ that any pending work, such as POSIX timer expiration, is handled before
+ transitioning into guest mode.
+
+Hard-IRQ and Soft-IRQ stacks
+ Soft interrupts are handled in the thread context in which they are raised. If
+ a soft interrupt is triggered from hard-IRQ context, its execution is deferred
+ to the ksoftirqd thread. Preemption is never disabled during soft interrupt
+ handling, which makes soft interrupts preemptible.
+ If an architecture provides a custom __do_softirq() implementation that uses a
+ separate stack, it must select CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK. The
+ functionality should only be enabled when CONFIG_SOFTIRQ_ON_OWN_STACK is set.
+
+FPU and SIMD access in kernel mode
+ FPU and SIMD registers are typically not used in kernel mode and are therefore
+ not saved during kernel preemption. As a result, any kernel code that uses
+ these registers must be enclosed within a kernel_fpu_begin() and
+ kernel_fpu_end() section.
+ The kernel_fpu_begin() function usually invokes local_bh_disable() to prevent
+ interruptions from softirqs and to disable regular preemption. This allows the
+ protected code to run safely in both thread and softirq contexts.
+ On PREEMPT_RT kernels, however, kernel_fpu_begin() must not call
+ local_bh_disable(). Instead, it should use preempt_disable(), since softirqs
+ are always handled in thread context under PREEMPT_RT. In this case, disabling
+ preemption alone is sufficient.
+ The crypto subsystem operates on memory pages and requires users to "walk and
+ map" these pages while processing a request. This operation must occur outside
+ the kernel_fpu_begin()/ kernel_fpu_end() section because it requires preemption
+ to be enabled. These preemption points are generally sufficient to avoid
+ excessive scheduling latency.
+
+Exception handlers
+ Exception handlers, such as the page fault handler, typically enable interrupts
+ early, before invoking any generic code to process the exception. This is
+ necessary because handling a page fault may involve operations that can sleep.
+ Enabling interrupts is especially important on PREEMPT_RT, where certain
+ locks, such as spinlock_t, become sleepable. For example, handling an
+ invalid opcode may result in sending a SIGILL signal to the user task. A
+ debug excpetion will send a SIGTRAP signal.
+ In both cases, if the exception occurred in user space, it is safe to enable
+ interrupts early. Sending a signal requires both interrupts and kernel
+ preemption to be enabled.
+
+Optional features
+-----------------
+
+Timer and clocksource
+ A high-resolution clocksource and clockevents device are recommended. The
+ clockevents device should support the CLOCK_EVT_FEAT_ONESHOT feature for
+ optimal timer behavior. In most cases, microsecond-level accuracy is
+ sufficient
+
+Lazy preemption
+ This mechanism allows an in-kernel scheduling request for non-real-time tasks
+ to be delayed until the task is about to return to user space. It helps avoid
+ preempting a task that holds a sleeping lock at the time of the scheduling
+ request.
+ With CONFIG_GENERIC_IRQ_ENTRY enabled, supporting this feature requires
+ defining a bit for TIF_NEED_RESCHED_LAZY, preferably near TIF_NEED_RESCHED.
+
+Serial console with NBCON
+ With PREEMPT_RT enabled, all console output is handled by a dedicated thread
+ rather than directly from the context in which printk() is invoked. This design
+ allows printk() to be safely used in atomic contexts.
+ However, this also means that if the kernel crashes and cannot switch to the
+ printing thread, no output will be visible preventing the system from printing
+ its final messages.
+ There are exceptions for immediate output, such as during panic() handling. To
+ support this, the console driver must implement new-style lock handling. This
+ involves setting the CON_NBCON flag in console::flags and providing
+ implementations for the write_atomic, write_thread, device_lock, and
+ device_unlock callbacks.
diff --git a/Documentation/core-api/real-time/differences.rst b/Documentation/core-api/real-time/differences.rst
new file mode 100644
index 000000000000..83ec9aa1c61a
--- /dev/null
+++ b/Documentation/core-api/real-time/differences.rst
@@ -0,0 +1,242 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+How realtime kernels differ
+===========================
+
+:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+Preface
+=======
+
+With forced-threaded interrupts and sleeping spin locks, code paths that
+previously caused long scheduling latencies have been made preemptible and
+moved into process context. This allows the scheduler to manage them more
+effectively and respond to higher-priority tasks with reduced latency.
+
+The following chapters provide an overview of key differences between a
+PREEMPT_RT kernel and a standard, non-PREEMPT_RT kernel.
+
+Locking
+=======
+
+Spinning locks such as spinlock_t are used to provide synchronization for data
+structures accessed from both interrupt context and process context. For this
+reason, locking functions are also available with the _irq() or _irqsave()
+suffixes, which disable interrupts before acquiring the lock. This ensures that
+the lock can be safely acquired in process context when interrupts are enabled.
+
+However, on a PREEMPT_RT system, interrupts are forced-threaded and no longer
+run in hard IRQ context. As a result, there is no need to disable interrupts as
+part of the locking procedure when using spinlock_t.
+
+For low-level core components such as interrupt handling, the scheduler, or the
+timer subsystem the kernel uses raw_spinlock_t. This lock type preserves
+traditional semantics: it disables preemption and, when used with _irq() or
+_irqsave(), also disables interrupts. This ensures proper synchronization in
+critical sections that must remain non-preemptible or with interrupts disabled.
+
+Execution context
+=================
+
+Interrupt handling in a PREEMPT_RT system is invoked in process context through
+the use of threaded interrupts. Other parts of the kernel also shift their
+execution into threaded context by different mechanisms. The goal is to keep
+execution paths preemptible, allowing the scheduler to interrupt them when a
+higher-priority task needs to run.
+
+Below is an overview of the kernel subsystems involved in this transition to
+threaded, preemptible execution.
+
+Interrupt handling
+------------------
+
+All interrupts are forced-threaded in a PREEMPT_RT system. The exceptions are
+interrupts that are requested with the IRQF_NO_THREAD, IRQF_PERCPU, or
+IRQF_ONESHOT flags.
+
+The IRQF_ONESHOT flag is used together with threaded interrupts, meaning those
+registered using request_threaded_irq() and providing only a threaded handler.
+Its purpose is to keep the interrupt line masked until the threaded handler has
+completed.
+
+If a primary handler is also provided in this case, it is essential that the
+handler does not acquire any sleeping locks, as it will not be threaded. The
+handler should be minimal and must avoid introducing delays, such as
+busy-waiting on hardware registers.
+
+
+Soft interrupts, bottom half handling
+-------------------------------------
+
+Soft interrupts are raised by the interrupt handler and are executed after the
+handler returns. Since they run in thread context, they can be preempted by
+other threads. Do not assume that softirq context runs with preemption
+disabled. This means you must not rely on mechanisms like local_bh_disable() in
+process context to protect per-CPU variables. Because softirq handlers are
+preemptible under PREEMPT_RT, this approach does not provide reliable
+synchronization.
+
+If this kind of protection is required for performance reasons, consider using
+local_lock_nested_bh(). On non-PREEMPT_RT kernels, this allows lockdep to
+verify that bottom halves are disabled. On PREEMPT_RT systems, it adds the
+necessary locking to ensure proper protection.
+
+Using local_lock_nested_bh() also makes the locking scope explicit and easier
+for readers and maintainers to understand.
+
+
+per-CPU variables
+-----------------
+
+Protecting access to per-CPU variables solely by using preempt_disable() should
+be avoided, especially if the critical section has unbounded runtime or may
+call APIs that can sleep.
+
+If using a spinlock_t is considered too costly for performance reasons,
+consider using local_lock_t. On non-PREEMPT_RT configurations, this introduces
+no runtime overhead when lockdep is disabled. With lockdep enabled, it verifies
+that the lock is only acquired in process context and never from softirq or
+hard IRQ context.
+
+On a PREEMPT_RT kernel, local_lock_t is implemented using a per-CPU spinlock_t,
+which provides safe local protection for per-CPU data while keeping the system
+preemptible.
+
+Because spinlock_t on PREEMPT_RT does not disable preemption, it cannot be used
+to protect per-CPU data by relying on implicit preemption disabling. If this
+inherited preemption disabling is essential and if local_lock_t cannot be used
+due to performance constraints, brevity of the code, or abstraction boundaries
+within an API then preempt_disable_nested() may be a suitable alternative. On
+non-PREEMPT_RT kernels, it verifies with lockdep that preemption is already
+disabled. On PREEMPT_RT, it explicitly disables preemption.
+
+Timers
+------
+
+By default, an hrtimer is executed in hard interrupt context. The exception is
+timers initialized with the HRTIMER_MODE_SOFT flag, which are executed in
+softirq context.
+
+On a PREEMPT_RT kernel, this behavior is reversed: hrtimers are executed in
+softirq context by default, typically within the ktimersd thread. This thread
+runs at the lowest real-time priority, ensuring it executes before any
+SCHED_OTHER tasks but does not interfere with higher-priority real-time
+threads. To explicitly request execution in hard interrupt context on
+PREEMPT_RT, the timer must be marked with the HRTIMER_MODE_HARD flag.
+
+Memory allocation
+-----------------
+
+The memory allocation APIs, such as kmalloc() and alloc_pages(), require a
+gfp_t flag to indicate the allocation context. On non-PREEMPT_RT kernels, it is
+necessary to use GFP_ATOMIC when allocating memory from interrupt context or
+from sections where preemption is disabled. This is because the allocator must
+not sleep in these contexts waiting for memory to become available.
+
+However, this approach does not work on PREEMPT_RT kernels. The memory
+allocator in PREEMPT_RT uses sleeping locks internally, which cannot be
+acquired when preemption is disabled. Fortunately, this is generally not a
+problem, because PREEMPT_RT moves most contexts that would traditionally run
+with preemption or interrupts disabled into threaded context, where sleeping is
+allowed.
+
+What remains problematic is code that explicitly disables preemption or
+interrupts. In such cases, memory allocation must be performed outside the
+critical section.
+
+This restriction also applies to memory deallocation routines such as kfree()
+and free_pages(), which may also involve internal locking and must not be
+called from non-preemptible contexts.
+
+IRQ work
+--------
+
+The irq_work API provides a mechanism to schedule a callback in interrupt
+context. It is designed for use in contexts where traditional scheduling is not
+possible, such as from within NMI handlers or from inside the scheduler, where
+using a workqueue would be unsafe.
+
+On non-PREEMPT_RT systems, all irq_work items are executed immediately in
+interrupt context. Items marked with IRQ_WORK_LAZY are deferred until the next
+timer tick but are still executed in interrupt context.
+
+On PREEMPT_RT systems, the execution model changes. Because irq_work callbacks
+may acquire sleeping locks or have unbounded execution time, they are handled
+in thread context by a per-CPU irq_work kernel thread. This thread runs at the
+lowest real-time priority, ensuring it executes before any SCHED_OTHER tasks
+but does not interfere with higher-priority real-time threads.
+
+The exception are work items marked with IRQ_WORK_HARD_IRQ, which are still
+executed in hard interrupt context. Lazy items (IRQ_WORK_LAZY) continue to be
+deferred until the next timer tick and are also executed by the irq_work/
+thread.
+
+RCU callbacks
+-------------
+
+RCU callbacks are invoked by default in softirq context. Their execution is
+important because, depending on the use case, they either free memory or ensure
+progress in state transitions. Running these callbacks as part of the softirq
+chain can lead to undesired situations, such as contention for CPU resources
+with other SCHED_OTHER tasks when executed within ksoftirqd.
+
+To avoid running callbacks in softirq context, the RCU subsystem provides a
+mechanism to execute them in process context instead. This behavior can be
+enabled by setting the boot command-line parameter rcutree.use_softirq=0. This
+setting is enforced in kernels configured with PREEMPT_RT.
+
+Spin until ready
+================
+
+The "spin until ready" pattern involves repeatedly checking (spinning on) the
+state of a data structure until it becomes available. This pattern assumes that
+preemption, soft interrupts, or interrupts are disabled. If the data structure
+is marked busy, it is presumed to be in use by another CPU, and spinning should
+eventually succeed as that CPU makes progress.
+
+Some examples are hrtimer_cancel() or timer_delete_sync(). These functions
+cancel timers that execute with interrupts or soft interrupts disabled. If a
+thread attempts to cancel a timer and finds it active, spinning until the
+callback completes is safe because the callback can only run on another CPU and
+will eventually finish.
+
+On PREEMPT_RT kernels, however, timer callbacks run in thread context. This
+introduces a challenge: a higher-priority thread attempting to cancel the timer
+may preempt the timer callback thread. Since the scheduler cannot migrate the
+callback thread to another CPU due to affinity constraints, spinning can result
+in livelock even on multiprocessor systems.
+
+To avoid this, both the canceling and callback sides must use a handshake
+mechanism that supports priority inheritance. This allows the canceling thread
+to suspend until the callback completes, ensuring forward progress without
+risking livelock.
+
+In order to solve the problem at the API level, the sequence locks were extended
+to allow a proper handover between the the spinning reader and the maybe
+blocked writer.
+
+Sequence locks
+--------------
+
+Sequence counters and sequential locks are documented in
+Documentation/locking/seqlock.rst.
+
+The interface has been extended to ensure proper preemption states for the
+writer and spinning reader contexts. This is achieved by embedding the writer
+serialization lock directly into the sequence counter type, resulting in
+composite types such as seqcount_spinlock_t or seqcount_mutex_t.
+
+These composite types allow readers to detect an ongoing write and actively
+boost the writer’s priority to help it complete its update instead of spinning
+and waiting for its completion.
+
+If the plain seqcount_t is used, extra care must be taken to synchronize the
+reader with the writer during updates. The writer must ensure its update is
+serialized and non-preemptible relative to the reader. This cannot be achieved
+using a regular spinlock_t because spinlock_t on PREEMPT_RT does not disable
+preemption. In such cases, using seqcount_spinlock_t is the preferred solution.
+
+However, if there is no spinning involved i.e., if the reader only needs to
+detect whether a write has started and not serialize against it then using
+seqcount_t is reasonable.
diff --git a/Documentation/core-api/real-time/index.rst b/Documentation/core-api/real-time/index.rst
new file mode 100644
index 000000000000..7e14c4ea3d59
--- /dev/null
+++ b/Documentation/core-api/real-time/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Real-time preemption
+=====================
+
+This documentation is intended for Linux kernel developers and contributors
+interested in the inner workings of PREEMPT_RT. It explains key concepts and
+the required changes compared to a non-PREEMPT_RT configuration.
+
+.. toctree::
+ :maxdepth: 2
+
+ theory
+ differences
+ architecture-porting
diff --git a/Documentation/core-api/real-time/theory.rst b/Documentation/core-api/real-time/theory.rst
new file mode 100644
index 000000000000..43d0120737f8
--- /dev/null
+++ b/Documentation/core-api/real-time/theory.rst
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Theory of operation
+=====================
+
+:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+Preface
+=======
+
+PREEMPT_RT transforms the Linux kernel into a real-time kernel. It achieves
+this by replacing locking primitives, such as spinlock_t, with a preemptible
+and priority-inheritance aware implementation known as rtmutex, and by enforcing
+the use of threaded interrupts. As a result, the kernel becomes fully
+preemptible, with the exception of a few critical code paths, including entry
+code, the scheduler, and low-level interrupt handling routines.
+
+This transformation places the majority of kernel execution contexts under the
+control of the scheduler and significantly increasing the number of preemption
+points. Consequently, it reduces the latency between a high-priority task
+becoming runnable and its actual execution on the CPU.
+
+Scheduling
+==========
+
+The core principles of Linux scheduling and the associated user-space API are
+documented in the man page sched(7)
+`sched(7) <https://man7.org/linux/man-pages/man7/sched.7.html>`_.
+By default, the Linux kernel uses the SCHED_OTHER scheduling policy. Under
+this policy, a task is preempted when the scheduler determines that it has
+consumed a fair share of CPU time relative to other runnable tasks. However,
+the policy does not guarantee immediate preemption when a new SCHED_OTHER task
+becomes runnable. The currently running task may continue executing.
+
+This behavior differs from that of real-time scheduling policies such as
+SCHED_FIFO. When a task with a real-time policy becomes runnable, the
+scheduler immediately selects it for execution if it has a higher priority than
+the currently running task. The task continues to run until it voluntarily
+yields the CPU, typically by blocking on an event.
+
+Sleeping spin locks
+===================
+
+The various lock types and their behavior under real-time configurations are
+described in detail in Documentation/locking/locktypes.rst.
+In a non-PREEMPT_RT configuration, a spinlock_t is acquired by first disabling
+preemption and then actively spinning until the lock becomes available. Once
+the lock is released, preemption is enabled. From a real-time perspective,
+this approach is undesirable because disabling preemption prevents the
+scheduler from switching to a higher-priority task, potentially increasing
+latency.
+
+To address this, PREEMPT_RT replaces spinning locks with sleeping spin locks
+that do not disable preemption. On PREEMPT_RT, spinlock_t is implemented using
+rtmutex. Instead of spinning, a task attempting to acquire a contended lock
+disables CPU migration, donates its priority to the lock owner (priority
+inheritance), and voluntarily schedules out while waiting for the lock to
+become available.
+
+Disabling CPU migration provides the same effect as disabling preemption, while
+still allowing preemption and ensuring that the task continues to run on the
+same CPU while holding a sleeping lock.
+
+Priority inheritance
+====================
+
+Lock types such as spinlock_t and mutex_t in a PREEMPT_RT enabled kernel are
+implemented on top of rtmutex, which provides support for priority inheritance
+(PI). When a task blocks on such a lock, the PI mechanism temporarily
+propagates the blocked task’s scheduling parameters to the lock owner.
+
+For example, if a SCHED_FIFO task A blocks on a lock currently held by a
+SCHED_OTHER task B, task A’s scheduling policy and priority are temporarily
+inherited by task B. After this inheritance, task A is put to sleep while
+waiting for the lock, and task B effectively becomes the highest-priority task
+in the system. This allows B to continue executing, make progress, and
+eventually release the lock.
+
+Once B releases the lock, it reverts to its original scheduling parameters, and
+task A can resume execution.
+
+Threaded interrupts
+===================
+
+Interrupt handlers are another source of code that executes with preemption
+disabled and outside the control of the scheduler. To bring interrupt handling
+under scheduler control, PREEMPT_RT enforces threaded interrupt handlers.
+
+With forced threading, interrupt handling is split into two stages. The first
+stage, the primary handler, is executed in IRQ context with interrupts disabled.
+Its sole responsibility is to wake the associated threaded handler. The second
+stage, the threaded handler, is the function passed to request_irq() as the
+interrupt handler. It runs in process context, scheduled by the kernel.
+
+From waking the interrupt thread until threaded handling is completed, the
+interrupt source is masked in the interrupt controller. This ensures that the
+device interrupt remains pending but does not retrigger the CPU, allowing the
+system to exit IRQ context and handle the interrupt in a scheduled thread.
+
+By default, the threaded handler executes with the SCHED_FIFO scheduling policy
+and a priority of 50 (MAX_RT_PRIO / 2), which is midway between the minimum and
+maximum real-time priorities.
+
+If the threaded interrupt handler raises any soft interrupts during its
+execution, those soft interrupt routines are invoked after the threaded handler
+completes, within the same thread. Preemption remains enabled during the
+execution of the soft interrupt handler.
+
+Summary
+=======
+
+By using sleeping locks and forced-threaded interrupts, PREEMPT_RT
+significantly reduces sections of code where interrupts or preemption is
+disabled, allowing the scheduler to preempt the current execution context and
+switch to a higher-priority task.
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst
index 322851bada16..94e628c1eb49 100644
--- a/Documentation/core-api/refcount-vs-atomic.rst
+++ b/Documentation/core-api/refcount-vs-atomic.rst
@@ -35,7 +35,7 @@ atomics & refcounters only provide atomicity and
program order (po) relation (on the same CPU). It guarantees that
each ``atomic_*()`` and ``refcount_*()`` operation is atomic and instructions
are executed in program order on a single CPU.
-This is implemented using :c:func:`READ_ONCE`/:c:func:`WRITE_ONCE` and
+This is implemented using READ_ONCE()/WRITE_ONCE() and
compare-and-swap primitives.
A strong (full) memory ordering guarantees that all prior loads and
@@ -44,7 +44,7 @@ before any po-later instruction is executed on the same CPU.
It also guarantees that all po-earlier stores on the same CPU
and all propagated stores from other CPUs must propagate to all
other CPUs before any po-later instruction is executed on the original
-CPU (A-cumulative property). This is implemented using :c:func:`smp_mb`.
+CPU (A-cumulative property). This is implemented using smp_mb().
A RELEASE memory ordering guarantees that all prior loads and
stores (all po-earlier instructions) on the same CPU are completed
@@ -52,7 +52,14 @@ before the operation. It also guarantees that all po-earlier
stores on the same CPU and all propagated stores from other CPUs
must propagate to all other CPUs before the release operation
(A-cumulative property). This is implemented using
-:c:func:`smp_store_release`.
+smp_store_release().
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+smp_acquire__after_ctrl_dep().
A control dependency (on success) for refcounters guarantees that
if a reference for an object was successfully obtained (reference
@@ -71,45 +78,57 @@ case 1) - non-"Read/Modify/Write" (RMW) ops
Function changes:
- * :c:func:`atomic_set` --> :c:func:`refcount_set`
- * :c:func:`atomic_read` --> :c:func:`refcount_read`
+ * atomic_set() --> refcount_set()
+ * atomic_read() --> refcount_read()
Memory ordering guarantee changes:
* none (both fully unordered)
-case 2) - increment-based ops that return no value
+case 2) - non-"Read/Modify/Write" (RMW) ops with release ordering
+-----------------------------------------------------------------
+
+Function changes:
+
+ * atomic_set_release() --> refcount_set_release()
+
+Memory ordering guarantee changes:
+
+ * none (both provide RELEASE ordering)
+
+
+case 3) - increment-based ops that return no value
--------------------------------------------------
Function changes:
- * :c:func:`atomic_inc` --> :c:func:`refcount_inc`
- * :c:func:`atomic_add` --> :c:func:`refcount_add`
+ * atomic_inc() --> refcount_inc()
+ * atomic_add() --> refcount_add()
Memory ordering guarantee changes:
* none (both fully unordered)
-case 3) - decrement-based RMW ops that return no value
+case 4) - decrement-based RMW ops that return no value
------------------------------------------------------
Function changes:
- * :c:func:`atomic_dec` --> :c:func:`refcount_dec`
+ * atomic_dec() --> refcount_dec()
Memory ordering guarantee changes:
* fully unordered --> RELEASE ordering
-case 4) - increment-based RMW ops that return a value
+case 5) - increment-based RMW ops that return a value
-----------------------------------------------------
Function changes:
- * :c:func:`atomic_inc_not_zero` --> :c:func:`refcount_inc_not_zero`
- * no atomic counterpart --> :c:func:`refcount_add_not_zero`
+ * atomic_inc_not_zero() --> refcount_inc_not_zero()
+ * no atomic counterpart --> refcount_add_not_zero()
Memory ordering guarantees changes:
@@ -119,32 +138,56 @@ Memory ordering guarantees changes:
result of obtaining pointer to the object!
-case 5) - decrement-based RMW ops that return a value
------------------------------------------------------
+case 6) - increment-based RMW ops with acquire ordering that return a value
+---------------------------------------------------------------------------
+
+Function changes:
+
+ * atomic_inc_not_zero() --> refcount_inc_not_zero_acquire()
+ * no atomic counterpart --> refcount_add_not_zero_acquire()
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> ACQUIRE ordering on success
+
+
+case 7) - generic dec/sub decrement-based RMW ops that return a value
+---------------------------------------------------------------------
+
+Function changes:
+
+ * atomic_dec_and_test() --> refcount_dec_and_test()
+ * atomic_sub_and_test() --> refcount_sub_and_test()
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> RELEASE ordering + ACQUIRE ordering on success
+
+
+case 8) other decrement-based RMW ops that return a value
+---------------------------------------------------------
Function changes:
- * :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test`
- * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test`
- * no atomic counterpart --> :c:func:`refcount_dec_if_one`
+ * no atomic counterpart --> refcount_dec_if_one()
* ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
Memory ordering guarantees changes:
* fully ordered --> RELEASE ordering + control dependency
-.. note:: :c:func:`atomic_add_unless` only provides full order on success.
+.. note:: atomic_add_unless() only provides full order on success.
-case 6) - lock-based RMW
+case 9) - lock-based RMW
------------------------
Function changes:
- * :c:func:`atomic_dec_and_lock` --> :c:func:`refcount_dec_and_lock`
- * :c:func:`atomic_dec_and_mutex_lock` --> :c:func:`refcount_dec_and_mutex_lock`
+ * atomic_dec_and_lock() --> refcount_dec_and_lock()
+ * atomic_dec_and_mutex_lock() --> refcount_dec_and_mutex_lock()
Memory ordering guarantees changes:
* fully ordered --> RELEASE ordering + control dependency + hold
- :c:func:`spin_lock` on success
+ spin_lock() on success
diff --git a/Documentation/core-api/swiotlb.rst b/Documentation/core-api/swiotlb.rst
new file mode 100644
index 000000000000..9e0fe027dd3b
--- /dev/null
+++ b/Documentation/core-api/swiotlb.rst
@@ -0,0 +1,321 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+DMA and swiotlb
+===============
+
+swiotlb is a memory buffer allocator used by the Linux kernel DMA layer. It is
+typically used when a device doing DMA can't directly access the target memory
+buffer because of hardware limitations or other requirements. In such a case,
+the DMA layer calls swiotlb to allocate a temporary memory buffer that conforms
+to the limitations. The DMA is done to/from this temporary memory buffer, and
+the CPU copies the data between the temporary buffer and the original target
+memory buffer. This approach is generically called "bounce buffering", and the
+temporary memory buffer is called a "bounce buffer".
+
+Device drivers don't interact directly with swiotlb. Instead, drivers inform
+the DMA layer of the DMA attributes of the devices they are managing, and use
+the normal DMA map, unmap, and sync APIs when programming a device to do DMA.
+These APIs use the device DMA attributes and kernel-wide settings to determine
+if bounce buffering is necessary. If so, the DMA layer manages the allocation,
+freeing, and sync'ing of bounce buffers. Since the DMA attributes are per
+device, some devices in a system may use bounce buffering while others do not.
+
+Because the CPU copies data between the bounce buffer and the original target
+memory buffer, doing bounce buffering is slower than doing DMA directly to the
+original memory buffer, and it consumes more CPU resources. So it is used only
+when necessary for providing DMA functionality.
+
+Usage Scenarios
+---------------
+swiotlb was originally created to handle DMA for devices with addressing
+limitations. As physical memory sizes grew beyond 4 GiB, some devices could
+only provide 32-bit DMA addresses. By allocating bounce buffer memory below
+the 4 GiB line, these devices with addressing limitations could still work and
+do DMA.
+
+More recently, Confidential Computing (CoCo) VMs have the guest VM's memory
+encrypted by default, and the memory is not accessible by the host hypervisor
+and VMM. For the host to do I/O on behalf of the guest, the I/O must be
+directed to guest memory that is unencrypted. CoCo VMs set a kernel-wide option
+to force all DMA I/O to use bounce buffers, and the bounce buffer memory is set
+up as unencrypted. The host does DMA I/O to/from the bounce buffer memory, and
+the Linux kernel DMA layer does "sync" operations to cause the CPU to copy the
+data to/from the original target memory buffer. The CPU copying bridges between
+the unencrypted and the encrypted memory. This use of bounce buffers allows
+device drivers to "just work" in a CoCo VM, with no modifications
+needed to handle the memory encryption complexity.
+
+Other edge case scenarios arise for bounce buffers. For example, when IOMMU
+mappings are set up for a DMA operation to/from a device that is considered
+"untrusted", the device should be given access only to the memory containing
+the data being transferred. But if that memory occupies only part of an IOMMU
+granule, other parts of the granule may contain unrelated kernel data. Since
+IOMMU access control is per-granule, the untrusted device can gain access to
+the unrelated kernel data. This problem is solved by bounce buffering the DMA
+operation and ensuring that unused portions of the bounce buffers do not
+contain any unrelated kernel data.
+
+Core Functionality
+------------------
+The primary swiotlb APIs are swiotlb_tbl_map_single() and
+swiotlb_tbl_unmap_single(). The "map" API allocates a bounce buffer of a
+specified size in bytes and returns the physical address of the buffer. The
+buffer memory is physically contiguous. The expectation is that the DMA layer
+maps the physical memory address to a DMA address, and returns the DMA address
+to the driver for programming into the device. If a DMA operation specifies
+multiple memory buffer segments, a separate bounce buffer must be allocated for
+each segment. swiotlb_tbl_map_single() always does a "sync" operation (i.e., a
+CPU copy) to initialize the bounce buffer to match the contents of the original
+buffer.
+
+swiotlb_tbl_unmap_single() does the reverse. If the DMA operation might have
+updated the bounce buffer memory and DMA_ATTR_SKIP_CPU_SYNC is not set, the
+unmap does a "sync" operation to cause a CPU copy of the data from the bounce
+buffer back to the original buffer. Then the bounce buffer memory is freed.
+
+swiotlb also provides "sync" APIs that correspond to the dma_sync_*() APIs that
+a driver may use when control of a buffer transitions between the CPU and the
+device. The swiotlb "sync" APIs cause a CPU copy of the data between the
+original buffer and the bounce buffer. Like the dma_sync_*() APIs, the swiotlb
+"sync" APIs support doing a partial sync, where only a subset of the bounce
+buffer is copied to/from the original buffer.
+
+Core Functionality Constraints
+------------------------------
+The swiotlb map/unmap/sync APIs must operate without blocking, as they are
+called by the corresponding DMA APIs which may run in contexts that cannot
+block. Hence the default memory pool for swiotlb allocations must be
+pre-allocated at boot time (but see Dynamic swiotlb below). Because swiotlb
+allocations must be physically contiguous, the entire default memory pool is
+allocated as a single contiguous block.
+
+The need to pre-allocate the default swiotlb pool creates a boot-time tradeoff.
+The pool should be large enough to ensure that bounce buffer requests can
+always be satisfied, as the non-blocking requirement means requests can't wait
+for space to become available. But a large pool potentially wastes memory, as
+this pre-allocated memory is not available for other uses in the system. The
+tradeoff is particularly acute in CoCo VMs that use bounce buffers for all DMA
+I/O. These VMs use a heuristic to set the default pool size to ~6% of memory,
+with a max of 1 GiB, which has the potential to be very wasteful of memory.
+Conversely, the heuristic might produce a size that is insufficient, depending
+on the I/O patterns of the workload in the VM. The dynamic swiotlb feature
+described below can help, but has limitations. Better management of the swiotlb
+default memory pool size remains an open issue.
+
+A single allocation from swiotlb is limited to IO_TLB_SIZE * IO_TLB_SEGSIZE
+bytes, which is 256 KiB with current definitions. When a device's DMA settings
+are such that the device might use swiotlb, the maximum size of a DMA segment
+must be limited to that 256 KiB. This value is communicated to higher-level
+kernel code via dma_map_mapping_size() and swiotlb_max_mapping_size(). If the
+higher-level code fails to account for this limit, it may make requests that
+are too large for swiotlb, and get a "swiotlb full" error.
+
+A key device DMA setting is "min_align_mask", which is a power of 2 minus 1
+so that some number of low order bits are set, or it may be zero. swiotlb
+allocations ensure these min_align_mask bits of the physical address of the
+bounce buffer match the same bits in the address of the original buffer. When
+min_align_mask is non-zero, it may produce an "alignment offset" in the address
+of the bounce buffer that slightly reduces the maximum size of an allocation.
+This potential alignment offset is reflected in the value returned by
+swiotlb_max_mapping_size(), which can show up in places like
+/sys/block/<device>/queue/max_sectors_kb. For example, if a device does not use
+swiotlb, max_sectors_kb might be 512 KiB or larger. If a device might use
+swiotlb, max_sectors_kb will be 256 KiB. When min_align_mask is non-zero,
+max_sectors_kb might be even smaller, such as 252 KiB.
+
+swiotlb_tbl_map_single() also takes an "alloc_align_mask" parameter. This
+parameter specifies the allocation of bounce buffer space must start at a
+physical address with the alloc_align_mask bits set to zero. But the actual
+bounce buffer might start at a larger address if min_align_mask is non-zero.
+Hence there may be pre-padding space that is allocated prior to the start of
+the bounce buffer. Similarly, the end of the bounce buffer is rounded up to an
+alloc_align_mask boundary, potentially resulting in post-padding space. Any
+pre-padding or post-padding space is not initialized by swiotlb code. The
+"alloc_align_mask" parameter is used by IOMMU code when mapping for untrusted
+devices. It is set to the granule size - 1 so that the bounce buffer is
+allocated entirely from granules that are not used for any other purpose.
+
+Data structures concepts
+------------------------
+Memory used for swiotlb bounce buffers is allocated from overall system memory
+as one or more "pools". The default pool is allocated during system boot with a
+default size of 64 MiB. The default pool size may be modified with the
+"swiotlb=" kernel boot line parameter. The default size may also be adjusted
+due to other conditions, such as running in a CoCo VM, as described above. If
+CONFIG_SWIOTLB_DYNAMIC is enabled, additional pools may be allocated later in
+the life of the system. Each pool must be a contiguous range of physical
+memory. The default pool is allocated below the 4 GiB physical address line so
+it works for devices that can only address 32-bits of physical memory (unless
+architecture-specific code provides the SWIOTLB_ANY flag). In a CoCo VM, the
+pool memory must be decrypted before swiotlb is used.
+
+Each pool is divided into "slots" of size IO_TLB_SIZE, which is 2 KiB with
+current definitions. IO_TLB_SEGSIZE contiguous slots (128 slots) constitute
+what might be called a "slot set". When a bounce buffer is allocated, it
+occupies one or more contiguous slots. A slot is never shared by multiple
+bounce buffers. Furthermore, a bounce buffer must be allocated from a single
+slot set, which leads to the maximum bounce buffer size being IO_TLB_SIZE *
+IO_TLB_SEGSIZE. Multiple smaller bounce buffers may co-exist in a single slot
+set if the alignment and size constraints can be met.
+
+Slots are also grouped into "areas", with the constraint that a slot set exists
+entirely in a single area. Each area has its own spin lock that must be held to
+manipulate the slots in that area. The division into areas avoids contending
+for a single global spin lock when swiotlb is heavily used, such as in a CoCo
+VM. The number of areas defaults to the number of CPUs in the system for
+maximum parallelism, but since an area can't be smaller than IO_TLB_SEGSIZE
+slots, it might be necessary to assign multiple CPUs to the same area. The
+number of areas can also be set via the "swiotlb=" kernel boot parameter.
+
+When allocating a bounce buffer, if the area associated with the calling CPU
+does not have enough free space, areas associated with other CPUs are tried
+sequentially. For each area tried, the area's spin lock must be obtained before
+trying an allocation, so contention may occur if swiotlb is relatively busy
+overall. But an allocation request does not fail unless all areas do not have
+enough free space.
+
+IO_TLB_SIZE, IO_TLB_SEGSIZE, and the number of areas must all be powers of 2 as
+the code uses shifting and bit masking to do many of the calculations. The
+number of areas is rounded up to a power of 2 if necessary to meet this
+requirement.
+
+The default pool is allocated with PAGE_SIZE alignment. If an alloc_align_mask
+argument to swiotlb_tbl_map_single() specifies a larger alignment, one or more
+initial slots in each slot set might not meet the alloc_align_mask criterium.
+Because a bounce buffer allocation can't cross a slot set boundary, eliminating
+those initial slots effectively reduces the max size of a bounce buffer.
+Currently, there's no problem because alloc_align_mask is set based on IOMMU
+granule size, and granules cannot be larger than PAGE_SIZE. But if that were to
+change in the future, the initial pool allocation might need to be done with
+alignment larger than PAGE_SIZE.
+
+Dynamic swiotlb
+---------------
+When CONFIG_SWIOTLB_DYNAMIC is enabled, swiotlb can do on-demand expansion of
+the amount of memory available for allocation as bounce buffers. If a bounce
+buffer request fails due to lack of available space, an asynchronous background
+task is kicked off to allocate memory from general system memory and turn it
+into an swiotlb pool. Creating an additional pool must be done asynchronously
+because the memory allocation may block, and as noted above, swiotlb requests
+are not allowed to block. Once the background task is kicked off, the bounce
+buffer request creates a "transient pool" to avoid returning an "swiotlb full"
+error. A transient pool has the size of the bounce buffer request, and is
+deleted when the bounce buffer is freed. Memory for this transient pool comes
+from the general system memory atomic pool so that creation does not block.
+Creating a transient pool has relatively high cost, particularly in a CoCo VM
+where the memory must be decrypted, so it is done only as a stopgap until the
+background task can add another non-transient pool.
+
+Adding a dynamic pool has limitations. Like with the default pool, the memory
+must be physically contiguous, so the size is limited to MAX_PAGE_ORDER pages
+(e.g., 4 MiB on a typical x86 system). Due to memory fragmentation, a max size
+allocation may not be available. The dynamic pool allocator tries smaller sizes
+until it succeeds, but with a minimum size of 1 MiB. Given sufficient system
+memory fragmentation, dynamically adding a pool might not succeed at all.
+
+The number of areas in a dynamic pool may be different from the number of areas
+in the default pool. Because the new pool size is typically a few MiB at most,
+the number of areas will likely be smaller. For example, with a new pool size
+of 4 MiB and the 256 KiB minimum area size, only 16 areas can be created. If
+the system has more than 16 CPUs, multiple CPUs must share an area, creating
+more lock contention.
+
+New pools added via dynamic swiotlb are linked together in a linear list.
+swiotlb code frequently must search for the pool containing a particular
+swiotlb physical address, so that search is linear and not performant with a
+large number of dynamic pools. The data structures could be improved for
+faster searches.
+
+Overall, dynamic swiotlb works best for small configurations with relatively
+few CPUs. It allows the default swiotlb pool to be smaller so that memory is
+not wasted, with dynamic pools making more space available if needed (as long
+as fragmentation isn't an obstacle). It is less useful for large CoCo VMs.
+
+Data Structure Details
+----------------------
+swiotlb is managed with four primary data structures: io_tlb_mem, io_tlb_pool,
+io_tlb_area, and io_tlb_slot. io_tlb_mem describes a swiotlb memory allocator,
+which includes the default memory pool and any dynamic or transient pools
+linked to it. Limited statistics on swiotlb usage are kept per memory allocator
+and are stored in this data structure. These statistics are available under
+/sys/kernel/debug/swiotlb when CONFIG_DEBUG_FS is set.
+
+io_tlb_pool describes a memory pool, either the default pool, a dynamic pool,
+or a transient pool. The description includes the start and end addresses of
+the memory in the pool, a pointer to an array of io_tlb_area structures, and a
+pointer to an array of io_tlb_slot structures that are associated with the pool.
+
+io_tlb_area describes an area. The primary field is the spin lock used to
+serialize access to slots in the area. The io_tlb_area array for a pool has an
+entry for each area, and is accessed using a 0-based area index derived from the
+calling processor ID. Areas exist solely to allow parallel access to swiotlb
+from multiple CPUs.
+
+io_tlb_slot describes an individual memory slot in the pool, with size
+IO_TLB_SIZE (2 KiB currently). The io_tlb_slot array is indexed by the slot
+index computed from the bounce buffer address relative to the starting memory
+address of the pool. The size of struct io_tlb_slot is 24 bytes, so the
+overhead is about 1% of the slot size.
+
+The io_tlb_slot array is designed to meet several requirements. First, the DMA
+APIs and the corresponding swiotlb APIs use the bounce buffer address as the
+identifier for a bounce buffer. This address is returned by
+swiotlb_tbl_map_single(), and then passed as an argument to
+swiotlb_tbl_unmap_single() and the swiotlb_sync_*() functions. The original
+memory buffer address obviously must be passed as an argument to
+swiotlb_tbl_map_single(), but it is not passed to the other APIs. Consequently,
+swiotlb data structures must save the original memory buffer address so that it
+can be used when doing sync operations. This original address is saved in the
+io_tlb_slot array.
+
+Second, the io_tlb_slot array must handle partial sync requests. In such cases,
+the argument to swiotlb_sync_*() is not the address of the start of the bounce
+buffer but an address somewhere in the middle of the bounce buffer, and the
+address of the start of the bounce buffer isn't known to swiotlb code. But
+swiotlb code must be able to calculate the corresponding original memory buffer
+address to do the CPU copy dictated by the "sync". So an adjusted original
+memory buffer address is populated into the struct io_tlb_slot for each slot
+occupied by the bounce buffer. An adjusted "alloc_size" of the bounce buffer is
+also recorded in each struct io_tlb_slot so a sanity check can be performed on
+the size of the "sync" operation. The "alloc_size" field is not used except for
+the sanity check.
+
+Third, the io_tlb_slot array is used to track available slots. The "list" field
+in struct io_tlb_slot records how many contiguous available slots exist starting
+at that slot. A "0" indicates that the slot is occupied. A value of "1"
+indicates only the current slot is available. A value of "2" indicates the
+current slot and the next slot are available, etc. The maximum value is
+IO_TLB_SEGSIZE, which can appear in the first slot in a slot set, and indicates
+that the entire slot set is available. These values are used when searching for
+available slots to use for a new bounce buffer. They are updated when allocating
+a new bounce buffer and when freeing a bounce buffer. At pool creation time, the
+"list" field is initialized to IO_TLB_SEGSIZE down to 1 for the slots in every
+slot set.
+
+Fourth, the io_tlb_slot array keeps track of any "padding slots" allocated to
+meet alloc_align_mask requirements described above. When
+swiotlb_tbl_map_single() allocates bounce buffer space to meet alloc_align_mask
+requirements, it may allocate pre-padding space across zero or more slots. But
+when swiotlb_tbl_unmap_single() is called with the bounce buffer address, the
+alloc_align_mask value that governed the allocation, and therefore the
+allocation of any padding slots, is not known. The "pad_slots" field records
+the number of padding slots so that swiotlb_tbl_unmap_single() can free them.
+The "pad_slots" value is recorded only in the first non-padding slot allocated
+to the bounce buffer.
+
+Restricted pools
+----------------
+The swiotlb machinery is also used for "restricted pools", which are pools of
+memory separate from the default swiotlb pool, and that are dedicated for DMA
+use by a particular device. Restricted pools provide a level of DMA memory
+protection on systems with limited hardware protection capabilities, such as
+those lacking an IOMMU. Such usage is specified by DeviceTree entries and
+requires that CONFIG_DMA_RESTRICTED_POOL is set. Each restricted pool is based
+on its own io_tlb_mem data structure that is independent of the main swiotlb
+io_tlb_mem.
+
+Restricted pools add swiotlb_alloc() and swiotlb_free() APIs, which are called
+from the dma_alloc_*() and dma_free_*() APIs. The swiotlb_alloc/free() APIs
+allocate/free slots from/to the restricted pool directly and do not go through
+swiotlb_tbl_map/unmap_single().
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
new file mode 100644
index 000000000000..034898e81ba2
--- /dev/null
+++ b/Documentation/core-api/symbol-namespaces.rst
@@ -0,0 +1,168 @@
+=================
+Symbol Namespaces
+=================
+
+The following document describes how to use Symbol Namespaces to structure the
+export surface of in-kernel symbols exported through the family of
+EXPORT_SYMBOL() macros.
+
+Introduction
+============
+
+Symbol Namespaces have been introduced as a means to structure the export
+surface of the in-kernel API. It allows subsystem maintainers to partition
+their exported symbols into separate namespaces. That is useful for
+documentation purposes (think of the SUBSYSTEM_DEBUG namespace) as well as for
+limiting the availability of a set of symbols for use in other parts of the
+kernel. As of today, modules that make use of symbols exported into namespaces,
+are required to import the namespace. Otherwise the kernel will, depending on
+its configuration, reject loading the module or warn about a missing import.
+
+Additionally, it is possible to put symbols into a module namespace, strictly
+limiting which modules are allowed to use these symbols.
+
+How to define Symbol Namespaces
+===============================
+
+Symbols can be exported into namespace using different methods. All of them are
+changing the way EXPORT_SYMBOL and friends are instrumented to create ksymtab
+entries.
+
+Using the EXPORT_SYMBOL macros
+------------------------------
+
+In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
+exporting of kernel symbols to the kernel symbol table, variants of these are
+available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
+EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace as a
+string constant. Note that this string must not contain whitespaces.
+E.g. to export the symbol ``usb_stor_suspend`` into the
+namespace ``USB_STORAGE``, use::
+
+ EXPORT_SYMBOL_NS(usb_stor_suspend, "USB_STORAGE");
+
+The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
+``namespace`` set accordingly. A symbol that is exported without a namespace will
+refer to ``NULL``. There is no default namespace if none is defined. ``modpost``
+and kernel/module/main.c make use the namespace at build time or module load
+time, respectively.
+
+Using the DEFAULT_SYMBOL_NAMESPACE define
+-----------------------------------------
+
+Defining namespaces for all symbols of a subsystem can be very verbose and may
+become hard to maintain. Therefore a default define (DEFAULT_SYMBOL_NAMESPACE)
+is been provided, that, if set, will become the default for all EXPORT_SYMBOL()
+and EXPORT_SYMBOL_GPL() macro expansions that do not specify a namespace.
+
+There are multiple ways of specifying this define and it depends on the
+subsystem and the maintainer's preference, which one to use. The first option
+is to define the default namespace in the ``Makefile`` of the subsystem. E.g. to
+export all symbols defined in usb-common into the namespace USB_COMMON, add a
+line like this to drivers/usb/common/Makefile::
+
+ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"'
+
+That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
+symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
+still be exported into the namespace that is passed as the namespace argument
+as this argument has preference over a default symbol namespace.
+
+A second option to define the default namespace is directly in the compilation
+unit as preprocessor statement. The above example would then read::
+
+ #define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON"
+
+within the corresponding compilation unit before the #include for
+<linux/export.h>. Typically it's placed before the first #include statement.
+
+Using the EXPORT_SYMBOL_FOR_MODULES() macro
+-------------------------------------------
+
+Symbols exported using this macro are put into a module namespace. This
+namespace cannot be imported. These exports are GPL-only as they are only
+intended for in-tree modules.
+
+The macro takes a comma separated list of module names, allowing only those
+modules to access this symbol. Simple tail-globs are supported.
+
+For example::
+
+ EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*")
+
+will limit usage of this symbol to modules whose name matches the given
+patterns.
+
+How to use Symbols exported in Namespaces
+=========================================
+
+In order to use symbols that are exported into namespaces, kernel modules need
+to explicitly import these namespaces. Otherwise the kernel might reject to
+load the module. The module code is required to use the macro MODULE_IMPORT_NS
+for the namespaces it uses symbols from. E.g. a module using the
+usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
+using a statement like::
+
+ MODULE_IMPORT_NS("USB_STORAGE");
+
+This will create a ``modinfo`` tag in the module for each imported namespace.
+This has the side effect, that the imported namespaces of a module can be
+inspected with modinfo::
+
+ $ modinfo drivers/usb/storage/ums-karma.ko
+ [...]
+ import_ns: USB_STORAGE
+ [...]
+
+
+It is advisable to add the MODULE_IMPORT_NS() statement close to other module
+metadata definitions like MODULE_AUTHOR() or MODULE_LICENSE().
+
+Loading Modules that use namespaced Symbols
+===========================================
+
+At module loading time (e.g. ``insmod``), the kernel will check each symbol
+referenced from the module for its availability and whether the namespace it
+might be exported to has been imported by the module. The default behaviour of
+the kernel is to reject loading modules that don't specify sufficient imports.
+An error will be logged and loading will be failed with EINVAL. In order to
+allow loading of modules that don't satisfy this precondition, a configuration
+option is available: Setting MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y will
+enable loading regardless, but will emit a warning.
+
+Automatically creating MODULE_IMPORT_NS statements
+==================================================
+
+Missing namespaces imports can easily be detected at build time. In fact,
+modpost will emit a warning if a module uses a symbol from a namespace
+without importing it.
+MODULE_IMPORT_NS() statements will usually be added at a definite location
+(along with other module meta data). To make the life of module authors (and
+subsystem maintainers) easier, a script and make target is available to fixup
+missing imports. Fixing missing imports can be done with::
+
+ $ make nsdeps
+
+A typical scenario for module authors would be::
+
+ - write code that depends on a symbol from a not imported namespace
+ - ``make``
+ - notice the warning of modpost telling about a missing import
+ - run ``make nsdeps`` to add the import to the correct code location
+
+For subsystem maintainers introducing a namespace, the steps are very similar.
+Again, ``make nsdeps`` will eventually add the missing namespace imports for
+in-tree modules::
+
+ - move or add symbols to a namespace (e.g. with EXPORT_SYMBOL_NS())
+ - ``make`` (preferably with an allmodconfig to cover all in-kernel
+ modules)
+ - notice the warning of modpost telling about a missing import
+ - run ``make nsdeps`` to add the import to the correct code location
+
+You can also run nsdeps for external module builds. A typical usage is::
+
+ $ make -C <path_to_kernel_src> M=$PWD nsdeps
+
+Note: it will happily generate an import statement for the module namespace;
+which will not work and generates build and runtime failures.
diff --git a/Documentation/core-api/this_cpu_ops.rst b/Documentation/core-api/this_cpu_ops.rst
new file mode 100644
index 000000000000..533ac5dd5750
--- /dev/null
+++ b/Documentation/core-api/this_cpu_ops.rst
@@ -0,0 +1,347 @@
+===================
+this_cpu operations
+===================
+
+:Author: Christoph Lameter, August 4th, 2014
+:Author: Pranith Kumar, Aug 2nd, 2014
+
+this_cpu operations are a way of optimizing access to per cpu
+variables associated with the *currently* executing processor. This is
+done through the use of segment registers (or a dedicated register where
+the cpu permanently stored the beginning of the per cpu area for a
+specific processor).
+
+this_cpu operations add a per cpu variable offset to the processor
+specific per cpu base and encode that operation in the instruction
+operating on the per cpu variable.
+
+This means that there are no atomicity issues between the calculation of
+the offset and the operation on the data. Therefore it is not
+necessary to disable preemption or interrupts to ensure that the
+processor is not changed between the calculation of the address and
+the operation on the data.
+
+Read-modify-write operations are of particular interest. Frequently
+processors have special lower latency instructions that can operate
+without the typical synchronization overhead, but still provide some
+sort of relaxed atomicity guarantees. The x86, for example, can execute
+RMW (Read Modify Write) instructions like inc/dec/cmpxchg without the
+lock prefix and the associated latency penalty.
+
+Access to the variable without the lock prefix is not synchronized but
+synchronization is not necessary since we are dealing with per cpu
+data specific to the currently executing processor. Only the current
+processor should be accessing that variable and therefore there are no
+concurrency issues with other processors in the system.
+
+Please note that accesses by remote processors to a per cpu area are
+exceptional situations and may impact performance and/or correctness
+(remote write operations) of local RMW operations via this_cpu_*.
+
+The main use of the this_cpu operations has been to optimize counter
+operations.
+
+The following this_cpu() operations with implied preemption protection
+are defined. These operations can be used without worrying about
+preemption and interrupts::
+
+ this_cpu_read(pcp)
+ this_cpu_write(pcp, val)
+ this_cpu_add(pcp, val)
+ this_cpu_and(pcp, val)
+ this_cpu_or(pcp, val)
+ this_cpu_add_return(pcp, val)
+ this_cpu_xchg(pcp, nval)
+ this_cpu_cmpxchg(pcp, oval, nval)
+ this_cpu_sub(pcp, val)
+ this_cpu_inc(pcp)
+ this_cpu_dec(pcp)
+ this_cpu_sub_return(pcp, val)
+ this_cpu_inc_return(pcp)
+ this_cpu_dec_return(pcp)
+
+
+Inner working of this_cpu operations
+------------------------------------
+
+On x86 the fs: or the gs: segment registers contain the base of the
+per cpu area. It is then possible to simply use the segment override
+to relocate a per cpu relative address to the proper per cpu area for
+the processor. So the relocation to the per cpu base is encoded in the
+instruction via a segment register prefix.
+
+For example::
+
+ DEFINE_PER_CPU(int, x);
+ int z;
+
+ z = this_cpu_read(x);
+
+results in a single instruction::
+
+ mov ax, gs:[x]
+
+instead of a sequence of calculation of the address and then a fetch
+from that address which occurs with the per cpu operations. Before
+this_cpu_ops such sequence also required preempt disable/enable to
+prevent the kernel from moving the thread to a different processor
+while the calculation is performed.
+
+Consider the following this_cpu operation::
+
+ this_cpu_inc(x)
+
+The above results in the following single instruction (no lock prefix!)::
+
+ inc gs:[x]
+
+instead of the following operations required if there is no segment
+register::
+
+ int *y;
+ int cpu;
+
+ cpu = get_cpu();
+ y = per_cpu_ptr(&x, cpu);
+ (*y)++;
+ put_cpu();
+
+Note that these operations can only be used on per cpu data that is
+reserved for a specific processor. Without disabling preemption in the
+surrounding code this_cpu_inc() will only guarantee that one of the
+per cpu counters is correctly incremented. However, there is no
+guarantee that the OS will not move the process directly before or
+after the this_cpu instruction is executed. In general this means that
+the value of the individual counters for each processor are
+meaningless. The sum of all the per cpu counters is the only value
+that is of interest.
+
+Per cpu variables are used for performance reasons. Bouncing cache
+lines can be avoided if multiple processors concurrently go through
+the same code paths. Since each processor has its own per cpu
+variables no concurrent cache line updates take place. The price that
+has to be paid for this optimization is the need to add up the per cpu
+counters when the value of a counter is needed.
+
+
+Special operations
+------------------
+
+::
+
+ y = this_cpu_ptr(&x)
+
+Takes the offset of a per cpu variable (&x !) and returns the address
+of the per cpu variable that belongs to the currently executing
+processor. this_cpu_ptr avoids multiple steps that the common
+get_cpu/put_cpu sequence requires. No processor number is
+available. Instead, the offset of the local per cpu area is simply
+added to the per cpu offset.
+
+Note that this operation can only be used in code segments where
+smp_processor_id() may be used, for example, where preemption has been
+disabled. The pointer is then used to access local per cpu data in a
+critical section. When preemption is re-enabled this pointer is usually
+no longer useful since it may no longer point to per cpu data of the
+current processor.
+
+The special cases where it makes sense to obtain a per-CPU pointer in
+preemptible code are addressed by raw_cpu_ptr(), but such use cases need
+to handle cases where two different CPUs are accessing the same per cpu
+variable, which might well be that of a third CPU. These use cases are
+typically performance optimizations. For example, SRCU implements a pair
+of counters as a pair of per-CPU variables, and rcu_read_lock_nmisafe()
+uses raw_cpu_ptr() to get a pointer to some CPU's counter, and uses
+atomic_inc_long() to handle migration between the raw_cpu_ptr() and
+the atomic_inc_long().
+
+Per cpu variables and offsets
+-----------------------------
+
+Per cpu variables have *offsets* to the beginning of the per cpu
+area. They do not have addresses although they look like that in the
+code. Offsets cannot be directly dereferenced. The offset must be
+added to a base pointer of a per cpu area of a processor in order to
+form a valid address.
+
+Therefore the use of x or &x outside of the context of per cpu
+operations is invalid and will generally be treated like a NULL
+pointer dereference.
+
+::
+
+ DEFINE_PER_CPU(int, x);
+
+In the context of per cpu operations the above implies that x is a per
+cpu variable. Most this_cpu operations take a cpu variable.
+
+::
+
+ int __percpu *p = &x;
+
+&x and hence p is the *offset* of a per cpu variable. this_cpu_ptr()
+takes the offset of a per cpu variable which makes this look a bit
+strange.
+
+
+Operations on a field of a per cpu structure
+--------------------------------------------
+
+Let's say we have a percpu structure::
+
+ struct s {
+ int n,m;
+ };
+
+ DEFINE_PER_CPU(struct s, p);
+
+
+Operations on these fields are straightforward::
+
+ this_cpu_inc(p.m)
+
+ z = this_cpu_cmpxchg(p.m, 0, 1);
+
+
+If we have an offset to struct s::
+
+ struct s __percpu *ps = &p;
+
+ this_cpu_dec(ps->m);
+
+ z = this_cpu_inc_return(ps->n);
+
+
+The calculation of the pointer may require the use of this_cpu_ptr()
+if we do not make use of this_cpu ops later to manipulate fields::
+
+ struct s *pp;
+
+ pp = this_cpu_ptr(&p);
+
+ pp->m--;
+
+ z = pp->n++;
+
+
+Variants of this_cpu ops
+------------------------
+
+this_cpu ops are interrupt safe. Some architectures do not support
+these per cpu local operations. In that case the operation must be
+replaced by code that disables interrupts, then does the operations
+that are guaranteed to be atomic and then re-enable interrupts. Doing
+so is expensive. If there are other reasons why the scheduler cannot
+change the processor we are executing on then there is no reason to
+disable interrupts. For that purpose the following __this_cpu operations
+are provided.
+
+These operations have no guarantee against concurrent interrupts or
+preemption. If a per cpu variable is not used in an interrupt context
+and the scheduler cannot preempt, then they are safe. If any interrupts
+still occur while an operation is in progress and if the interrupt too
+modifies the variable, then RMW actions can not be guaranteed to be
+safe::
+
+ __this_cpu_read(pcp)
+ __this_cpu_write(pcp, val)
+ __this_cpu_add(pcp, val)
+ __this_cpu_and(pcp, val)
+ __this_cpu_or(pcp, val)
+ __this_cpu_add_return(pcp, val)
+ __this_cpu_xchg(pcp, nval)
+ __this_cpu_cmpxchg(pcp, oval, nval)
+ __this_cpu_sub(pcp, val)
+ __this_cpu_inc(pcp)
+ __this_cpu_dec(pcp)
+ __this_cpu_sub_return(pcp, val)
+ __this_cpu_inc_return(pcp)
+ __this_cpu_dec_return(pcp)
+
+
+Will increment x and will not fall-back to code that disables
+interrupts on platforms that cannot accomplish atomicity through
+address relocation and a Read-Modify-Write operation in the same
+instruction.
+
+
+&this_cpu_ptr(pp)->n vs this_cpu_ptr(&pp->n)
+--------------------------------------------
+
+The first operation takes the offset and forms an address and then
+adds the offset of the n field. This may result in two add
+instructions emitted by the compiler.
+
+The second one first adds the two offsets and then does the
+relocation. IMHO the second form looks cleaner and has an easier time
+with (). The second form also is consistent with the way
+this_cpu_read() and friends are used.
+
+
+Remote access to per cpu data
+------------------------------
+
+Per cpu data structures are designed to be used by one cpu exclusively.
+If you use the variables as intended, this_cpu_ops() are guaranteed to
+be "atomic" as no other CPU has access to these data structures.
+
+There are special cases where you might need to access per cpu data
+structures remotely. It is usually safe to do a remote read access
+and that is frequently done to summarize counters. Remote write access
+something which could be problematic because this_cpu ops do not
+have lock semantics. A remote write may interfere with a this_cpu
+RMW operation.
+
+Remote write accesses to percpu data structures are highly discouraged
+unless absolutely necessary. Please consider using an IPI to wake up
+the remote CPU and perform the update to its per cpu area.
+
+To access per-cpu data structure remotely, typically the per_cpu_ptr()
+function is used::
+
+
+ DEFINE_PER_CPU(struct data, datap);
+
+ struct data *p = per_cpu_ptr(&datap, cpu);
+
+This makes it explicit that we are getting ready to access a percpu
+area remotely.
+
+You can also do the following to convert the datap offset to an address::
+
+ struct data *p = this_cpu_ptr(&datap);
+
+but, passing of pointers calculated via this_cpu_ptr to other cpus is
+unusual and should be avoided.
+
+Remote access are typically only for reading the status of another cpus
+per cpu data. Write accesses can cause unique problems due to the
+relaxed synchronization requirements for this_cpu operations.
+
+One example that illustrates some concerns with write operations is
+the following scenario that occurs because two per cpu variables
+share a cache-line but the relaxed synchronization is applied to
+only one process updating the cache-line.
+
+Consider the following example::
+
+
+ struct test {
+ atomic_t a;
+ int b;
+ };
+
+ DEFINE_PER_CPU(struct test, onecacheline);
+
+There is some concern about what would happen if the field 'a' is updated
+remotely from one processor and the local processor would use this_cpu ops
+to update field b. Care should be taken that such simultaneous accesses to
+data within the same cache line are avoided. Also costly synchronization
+may be necessary. IPIs are generally recommended in such scenarios instead
+of a remote write to the per cpu area of another processor.
+
+Even in cases where the remote writes are rare, please bear in
+mind that a remote write will evict the cache line from the processor
+that most likely will access it. If the processor wakes up and finds a
+missing local cache line of a per cpu area, its performance and hence
+the wake up times will be affected.
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..22ec68f24421 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -65,7 +65,7 @@ different format depending on what is required by the user:
.. c:function:: u64 ktime_get_ns( void )
u64 ktime_get_boottime_ns( void )
u64 ktime_get_real_ns( void )
- u64 ktime_get_tai_ns( void )
+ u64 ktime_get_clocktai_ns( void )
u64 ktime_get_raw_ns( void )
Same as the plain ktime_get functions, but returning a u64 number
@@ -99,19 +99,23 @@ Coarse and fast_ns access
Some additional variants exist for more specialized cases:
-.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+.. c:function:: ktime_t ktime_get_coarse( void )
+ ktime_t ktime_get_coarse_boottime( void )
ktime_t ktime_get_coarse_real( void )
ktime_t ktime_get_coarse_clocktai( void )
- ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: u64 ktime_get_coarse_ns( void )
+ u64 ktime_get_coarse_boottime_ns( void )
+ u64 ktime_get_coarse_real_ns( void )
+ u64 ktime_get_coarse_clocktai_ns( void )
.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
void ktime_get_coarse_boottime_ts64( struct timespec64 * )
void ktime_get_coarse_real_ts64( struct timespec64 * )
void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
- void ktime_get_coarse_raw_ts64( struct timespec64 * )
These are quicker than the non-coarse versions, but less accurate,
- corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
+ corresponding to CLOCK_MONOTONIC_COARSE and CLOCK_REALTIME_COARSE
in user space, along with the equivalent boottime/tai/raw
timebase not available in user space.
@@ -128,6 +132,7 @@ Some additional variants exist for more specialized cases:
.. c:function:: u64 ktime_get_mono_fast_ns( void )
u64 ktime_get_raw_fast_ns( void )
u64 ktime_get_boot_fast_ns( void )
+ u64 ktime_get_tai_fast_ns( void )
u64 ktime_get_real_fast_ns( void )
These variants are safe to call from any context, including from
@@ -150,9 +155,9 @@ architectures. These are the recommended replacements:
Use ktime_get() or ktime_get_ts64() instead.
-.. c:function:: struct timeval do_gettimeofday( void )
- struct timespec getnstimeofday( void )
- struct timespec64 getnstimeofday64( void )
+.. c:function:: void do_gettimeofday( struct timeval * )
+ void getnstimeofday( struct timespec * )
+ void getnstimeofday64( struct timespec64 * )
void ktime_get_real_ts( struct timespec * )
ktime_get_real_ts64() is a direct replacement, but consider using
diff --git a/Documentation/core-api/unaligned-memory-access.rst b/Documentation/core-api/unaligned-memory-access.rst
new file mode 100644
index 000000000000..5ceeb80eb539
--- /dev/null
+++ b/Documentation/core-api/unaligned-memory-access.rst
@@ -0,0 +1,265 @@
+=========================
+Unaligned Memory Accesses
+=========================
+
+:Author: Daniel Drake <dsd@gentoo.org>,
+:Author: Johannes Berg <johannes@sipsolutions.net>
+
+:With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+ Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+ Vadim Lobanov
+
+
+Linux runs on a wide variety of architectures which have varying behaviour
+when it comes to memory access. This document presents some details about
+unaligned accesses, why you need to write code that doesn't cause them,
+and how to write such code!
+
+
+The definition of an unaligned access
+=====================================
+
+Unaligned memory accesses occur when you try to read N bytes of data starting
+from an address that is not evenly divisible by N (i.e. addr % N != 0).
+For example, reading 4 bytes of data from address 0x10004 is fine, but
+reading 4 bytes of data from address 0x10005 would be an unaligned memory
+access.
+
+The above may seem a little vague, as memory access can happen in different
+ways. The context here is at the machine code level: certain instructions read
+or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
+assembly). As will become clear, it is relatively easy to spot C statements
+which will compile to multiple-byte memory access instructions, namely when
+dealing with types such as u16, u32 and u64.
+
+
+Natural alignment
+=================
+
+The rule mentioned above forms what we refer to as natural alignment:
+When accessing N bytes of memory, the base memory address must be evenly
+divisible by N, i.e. addr % N == 0.
+
+When writing code, assume the target architecture has natural alignment
+requirements.
+
+In reality, only a few architectures require natural alignment on all sizes
+of memory access. However, we must consider ALL supported architectures;
+writing code that satisfies natural alignment requirements is the easiest way
+to achieve full portability.
+
+
+Why unaligned access is bad
+===========================
+
+The effects of performing an unaligned memory access vary from architecture
+to architecture. It would be easy to write a whole document on the differences
+here; a summary of the common scenarios is presented below:
+
+ - Some architectures are able to perform unaligned memory accesses
+ transparently, but there is usually a significant performance cost.
+ - Some architectures raise processor exceptions when unaligned accesses
+ happen. The exception handler is able to correct the unaligned access,
+ at significant cost to performance.
+ - Some architectures raise processor exceptions when unaligned accesses
+ happen, but the exceptions do not contain enough information for the
+ unaligned access to be corrected.
+ - Some architectures are not capable of unaligned memory access, but will
+ silently perform a different memory access to the one that was requested,
+ resulting in a subtle code bug that is hard to detect!
+
+It should be obvious from the above that if your code causes unaligned
+memory accesses to happen, your code will not work correctly on certain
+platforms and will cause performance problems on others.
+
+
+Code that does not cause unaligned access
+=========================================
+
+At first, the concepts above may seem a little hard to relate to actual
+coding practice. After all, you don't have a great deal of control over
+memory addresses of certain variables, etc.
+
+Fortunately things are not too complex, as in most cases, the compiler
+ensures that things will work for you. For example, take the following
+structure::
+
+ struct foo {
+ u16 field1;
+ u32 field2;
+ u8 field3;
+ };
+
+Let us assume that an instance of the above structure resides in memory
+starting at address 0x10000. With a basic level of understanding, it would
+not be unreasonable to expect that accessing field2 would cause an unaligned
+access. You'd be expecting field2 to be located at offset 2 bytes into the
+structure, i.e. address 0x10002, but that address is not evenly divisible
+by 4 (remember, we're reading a 4 byte value here).
+
+Fortunately, the compiler understands the alignment constraints, so in the
+above case it would insert 2 bytes of padding in between field1 and field2.
+Therefore, for standard structure types you can always rely on the compiler
+to pad structures so that accesses to fields are suitably aligned (assuming
+you do not cast the field to a type of different length).
+
+Similarly, you can also rely on the compiler to align variables and function
+parameters to a naturally aligned scheme, based on the size of the type of
+the variable.
+
+At this point, it should be clear that accessing a single byte (u8 or char)
+will never cause an unaligned access, because all memory addresses are evenly
+divisible by one.
+
+On a related topic, with the above considerations in mind you may observe
+that you could reorder the fields in the structure in order to place fields
+where padding would otherwise be inserted, and hence reduce the overall
+resident memory size of structure instances. The optimal layout of the
+above example is::
+
+ struct foo {
+ u32 field2;
+ u16 field1;
+ u8 field3;
+ };
+
+For a natural alignment scheme, the compiler would only have to add a single
+byte of padding at the end of the structure. This padding is added in order
+to satisfy alignment constraints for arrays of these structures.
+
+Another point worth mentioning is the use of __attribute__((packed)) on a
+structure type. This GCC-specific attribute tells the compiler never to
+insert any padding within structures, useful when you want to use a C struct
+to represent some data that comes in a fixed arrangement 'off the wire'.
+
+You might be inclined to believe that usage of this attribute can easily
+lead to unaligned accesses when accessing fields that do not satisfy
+architectural alignment requirements. However, again, the compiler is aware
+of the alignment constraints and will generate extra instructions to perform
+the memory access in a way that does not cause unaligned access. Of course,
+the extra instructions obviously cause a loss in performance compared to the
+non-packed case, so the packed attribute should only be used when avoiding
+structure padding is of importance.
+
+
+Code that causes unaligned access
+=================================
+
+With the above in mind, let's move onto a real life example of a function
+that can cause an unaligned memory access. The following function taken
+from include/linux/etherdevice.h is an optimized routine to compare two
+ethernet MAC addresses for equality::
+
+ bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
+ {
+ #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
+ ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
+
+ return fold == 0;
+ #else
+ const u16 *a = (const u16 *)addr1;
+ const u16 *b = (const u16 *)addr2;
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
+ #endif
+ }
+
+In the above function, when the hardware has efficient unaligned access
+capability, there is no issue with this code. But when the hardware isn't
+able to access memory on arbitrary boundaries, the reference to a[0] causes
+2 bytes (16 bits) to be read from memory starting at address addr1.
+
+Think about what would happen if addr1 was an odd address such as 0x10003.
+(Hint: it'd be an unaligned access.)
+
+Despite the potential unaligned access problems with the above function, it
+is included in the kernel anyway but is understood to only work normally on
+16-bit-aligned addresses. It is up to the caller to ensure this alignment or
+not use this function at all. This alignment-unsafe function is still useful
+as it is a decent optimization for the cases when you can ensure alignment,
+which is true almost all of the time in ethernet networking context.
+
+
+Here is another example of some code that could cause unaligned accesses::
+
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ *((u32 *) data) = cpu_to_le32(value);
+ [...]
+ }
+
+This code will cause unaligned accesses every time the data parameter points
+to an address that is not evenly divisible by 4.
+
+In summary, the 2 main scenarios where you may run into unaligned access
+problems involve:
+
+ 1. Casting variables to types of different lengths
+ 2. Pointer arithmetic followed by access to at least 2 bytes of data
+
+
+Avoiding unaligned accesses
+===========================
+
+The easiest way to avoid unaligned access is to use the get_unaligned() and
+put_unaligned() macros provided by the <linux/unaligned.h> header file.
+
+Going back to an earlier example of code that potentially causes unaligned
+access::
+
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ *((u32 *) data) = cpu_to_le32(value);
+ [...]
+ }
+
+To avoid the unaligned memory access, you would rewrite it as follows::
+
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ value = cpu_to_le32(value);
+ put_unaligned(value, (u32 *) data);
+ [...]
+ }
+
+The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
+memory and you wish to avoid unaligned access, its usage is as follows::
+
+ u32 value = get_unaligned((u32 *) data);
+
+These macros work for memory accesses of any length (not just 32 bits as
+in the examples above). Be aware that when compared to standard access of
+aligned memory, using these macros to access unaligned memory can be costly in
+terms of performance.
+
+If use of such macros is not convenient, another option is to use memcpy(),
+where the source or destination (or both) are of type u8* or unsigned char*.
+Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so::
+
+ #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ skb = original skb
+ #else
+ skb = copy skb
+ #endif
diff --git a/Documentation/core-api/union_find.rst b/Documentation/core-api/union_find.rst
new file mode 100644
index 000000000000..6df8b94fdb5a
--- /dev/null
+++ b/Documentation/core-api/union_find.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Union-Find in Linux
+====================
+
+
+:Date: June 21, 2024
+:Author: Xavier <xavier_qy@163.com>
+
+What is union-find, and what is it used for?
+------------------------------------------------
+
+Union-find is a data structure used to handle the merging and querying
+of disjoint sets. The primary operations supported by union-find are:
+
+ Initialization: Resetting each element as an individual set, with
+ each set's initial parent node pointing to itself.
+
+ Find: Determine which set a particular element belongs to, usually by
+ returning a “representative element” of that set. This operation
+ is used to check if two elements are in the same set.
+
+ Union: Merge two sets into one.
+
+As a data structure used to maintain sets (groups), union-find is commonly
+utilized to solve problems related to offline queries, dynamic connectivity,
+and graph theory. It is also a key component in Kruskal's algorithm for
+computing the minimum spanning tree, which is crucial in scenarios like
+network routing. Consequently, union-find is widely referenced. Additionally,
+union-find has applications in symbolic computation, register allocation,
+and more.
+
+Space Complexity: O(n), where n is the number of nodes.
+
+Time Complexity: Using path compression can reduce the time complexity of
+the find operation, and using union by rank can reduce the time complexity
+of the union operation. These optimizations reduce the average time
+complexity of each find and union operation to O(α(n)), where α(n) is the
+inverse Ackermann function. This can be roughly considered a constant time
+complexity for practical purposes.
+
+This document covers use of the Linux union-find implementation. For more
+information on the nature and implementation of union-find, see:
+
+ Wikipedia entry on union-find
+ https://en.wikipedia.org/wiki/Disjoint-set_data_structure
+
+Linux implementation of union-find
+-----------------------------------
+
+Linux's union-find implementation resides in the file "lib/union_find.c".
+To use it, "#include <linux/union_find.h>".
+
+The union-find data structure is defined as follows::
+
+ struct uf_node {
+ struct uf_node *parent;
+ unsigned int rank;
+ };
+
+In this structure, parent points to the parent node of the current node.
+The rank field represents the height of the current tree. During a union
+operation, the tree with the smaller rank is attached under the tree with the
+larger rank to maintain balance.
+
+Initializing union-find
+-----------------------
+
+You can complete the initialization using either static or initialization
+interface. Initialize the parent pointer to point to itself and set the rank
+to 0.
+Example::
+
+ struct uf_node my_node = UF_INIT_NODE(my_node);
+
+or
+
+ uf_node_init(&my_node);
+
+Find the Root Node of union-find
+--------------------------------
+
+This operation is mainly used to determine whether two nodes belong to the same
+set in the union-find. If they have the same root, they are in the same set.
+During the find operation, path compression is performed to improve the
+efficiency of subsequent find operations.
+Example::
+
+ int connected;
+ struct uf_node *root1 = uf_find(&node_1);
+ struct uf_node *root2 = uf_find(&node_2);
+ if (root1 == root2)
+ connected = 1;
+ else
+ connected = 0;
+
+Union Two Sets in union-find
+----------------------------
+
+To union two sets in the union-find, you first find their respective root nodes
+and then link the smaller node to the larger node based on the rank of the root
+nodes.
+Example::
+
+ uf_union(&node_1, &node_2);
diff --git a/Documentation/core-api/watch_queue.rst b/Documentation/core-api/watch_queue.rst
new file mode 100644
index 000000000000..54f13ad5fc17
--- /dev/null
+++ b/Documentation/core-api/watch_queue.rst
@@ -0,0 +1,343 @@
+==============================
+General notification mechanism
+==============================
+
+The general notification mechanism is built on top of the standard pipe driver
+whereby it effectively splices notification messages from the kernel into pipes
+opened by userspace. This can be used in conjunction with::
+
+ * Key/keyring notifications
+
+
+The notifications buffers can be enabled by:
+
+ "General setup"/"General notification queue"
+ (CONFIG_WATCH_QUEUE)
+
+This document has the following sections:
+
+.. contents:: :local:
+
+
+Overview
+========
+
+This facility appears as a pipe that is opened in a special mode. The pipe's
+internal ring buffer is used to hold messages that are generated by the kernel.
+These messages are then read out by read(). Splice and similar are disabled on
+such pipes due to them wanting to, under some circumstances, revert their
+additions to the ring - which might end up interleaved with notification
+messages.
+
+The owner of the pipe has to tell the kernel which sources it would like to
+watch through that pipe. Only sources that have been connected to a pipe will
+insert messages into it. Note that a source may be bound to multiple pipes and
+insert messages into all of them simultaneously.
+
+Filters may also be emplaced on a pipe so that certain source types and
+subevents can be ignored if they're not of interest.
+
+A message will be discarded if there isn't a slot available in the ring or if
+no preallocated message buffer is available. In both of these cases, read()
+will insert a WATCH_META_LOSS_NOTIFICATION message into the output buffer after
+the last message currently in the buffer has been read.
+
+Note that when producing a notification, the kernel does not wait for the
+consumers to collect it, but rather just continues on. This means that
+notifications can be generated whilst spinlocks are held and also protects the
+kernel from being held up indefinitely by a userspace malfunction.
+
+
+Message Structure
+=================
+
+Notification messages begin with a short header::
+
+ struct watch_notification {
+ __u32 type:24;
+ __u32 subtype:8;
+ __u32 info;
+ };
+
+"type" indicates the source of the notification record and "subtype" indicates
+the type of record from that source (see the Watch Sources section below). The
+type may also be "WATCH_TYPE_META". This is a special record type generated
+internally by the watch queue itself. There are two subtypes:
+
+ * WATCH_META_REMOVAL_NOTIFICATION
+ * WATCH_META_LOSS_NOTIFICATION
+
+The first indicates that an object on which a watch was installed was removed
+or destroyed and the second indicates that some messages have been lost.
+
+"info" indicates a bunch of things, including:
+
+ * The length of the message in bytes, including the header (mask with
+ WATCH_INFO_LENGTH and shift by WATCH_INFO_LENGTH__SHIFT). This indicates
+ the size of the record, which may be between 8 and 127 bytes.
+
+ * The watch ID (mask with WATCH_INFO_ID and shift by WATCH_INFO_ID__SHIFT).
+ This indicates that caller's ID of the watch, which may be between 0
+ and 255. Multiple watches may share a queue, and this provides a means to
+ distinguish them.
+
+ * A type-specific field (WATCH_INFO_TYPE_INFO). This is set by the
+ notification producer to indicate some meaning specific to the type and
+ subtype.
+
+Everything in info apart from the length can be used for filtering.
+
+The header can be followed by supplementary information. The format of this is
+at the discretion is defined by the type and subtype.
+
+
+Watch List (Notification Source) API
+====================================
+
+A "watch list" is a list of watchers that are subscribed to a source of
+notifications. A list may be attached to an object (say a key or a superblock)
+or may be global (say for device events). From a userspace perspective, a
+non-global watch list is typically referred to by reference to the object it
+belongs to (such as using KEYCTL_NOTIFY and giving it a key serial number to
+watch that specific key).
+
+To manage a watch list, the following functions are provided:
+
+ * ::
+
+ void init_watch_list(struct watch_list *wlist,
+ void (*release_watch)(struct watch *wlist));
+
+ Initialise a watch list. If ``release_watch`` is not NULL, then this
+ indicates a function that should be called when the watch_list object is
+ destroyed to discard any references the watch list holds on the watched
+ object.
+
+ * ``void remove_watch_list(struct watch_list *wlist);``
+
+ This removes all of the watches subscribed to a watch_list and frees them
+ and then destroys the watch_list object itself.
+
+
+Watch Queue (Notification Output) API
+=====================================
+
+A "watch queue" is the buffer allocated by an application that notification
+records will be written into. The workings of this are hidden entirely inside
+of the pipe device driver, but it is necessary to gain a reference to it to set
+a watch. These can be managed with:
+
+ * ``struct watch_queue *get_watch_queue(int fd);``
+
+ Since watch queues are indicated to the kernel by the fd of the pipe that
+ implements the buffer, userspace must hand that fd through a system call.
+ This can be used to look up an opaque pointer to the watch queue from the
+ system call.
+
+ * ``void put_watch_queue(struct watch_queue *wqueue);``
+
+ This discards the reference obtained from ``get_watch_queue()``.
+
+
+Watch Subscription API
+======================
+
+A "watch" is a subscription on a watch list, indicating the watch queue, and
+thus the buffer, into which notification records should be written. The watch
+queue object may also carry filtering rules for that object, as set by
+userspace. Some parts of the watch struct can be set by the driver::
+
+ struct watch {
+ union {
+ u32 info_id; /* ID to be OR'd in to info field */
+ ...
+ };
+ void *private; /* Private data for the watched object */
+ u64 id; /* Internal identifier */
+ ...
+ };
+
+The ``info_id`` value should be an 8-bit number obtained from userspace and
+shifted by WATCH_INFO_ID__SHIFT. This is OR'd into the WATCH_INFO_ID field of
+struct watch_notification::info when and if the notification is written into
+the associated watch queue buffer.
+
+The ``private`` field is the driver's data associated with the watch_list and
+is cleaned up by the ``watch_list::release_watch()`` method.
+
+The ``id`` field is the source's ID. Notifications that are posted with a
+different ID are ignored.
+
+The following functions are provided to manage watches:
+
+ * ``void init_watch(struct watch *watch, struct watch_queue *wqueue);``
+
+ Initialise a watch object, setting its pointer to the watch queue, using
+ appropriate barriering to avoid lockdep complaints.
+
+ * ``int add_watch_to_object(struct watch *watch, struct watch_list *wlist);``
+
+ Subscribe a watch to a watch list (notification source). The
+ driver-settable fields in the watch struct must have been set before this
+ is called.
+
+ * ::
+
+ int remove_watch_from_object(struct watch_list *wlist,
+ struct watch_queue *wqueue,
+ u64 id, false);
+
+ Remove a watch from a watch list, where the watch must match the specified
+ watch queue (``wqueue``) and object identifier (``id``). A notification
+ (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue to
+ indicate that the watch got removed.
+
+ * ``int remove_watch_from_object(struct watch_list *wlist, NULL, 0, true);``
+
+ Remove all the watches from a watch list. It is expected that this will be
+ called preparatory to destruction and that the watch list will be
+ inaccessible to new watches by this point. A notification
+ (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue of each
+ subscribed watch to indicate that the watch got removed.
+
+
+Notification Posting API
+========================
+
+To post a notification to watch list so that the subscribed watches can see it,
+the following function should be used::
+
+ void post_watch_notification(struct watch_list *wlist,
+ struct watch_notification *n,
+ const struct cred *cred,
+ u64 id);
+
+The notification should be preformatted and a pointer to the header (``n``)
+should be passed in. The notification may be larger than this and the size in
+units of buffer slots is noted in ``n->info & WATCH_INFO_LENGTH``.
+
+The ``cred`` struct indicates the credentials of the source (subject) and is
+passed to the LSMs, such as SELinux, to allow or suppress the recording of the
+note in each individual queue according to the credentials of that queue
+(object).
+
+The ``id`` is the ID of the source object (such as the serial number on a key).
+Only watches that have the same ID set in them will see this notification.
+
+
+Watch Sources
+=============
+
+Any particular buffer can be fed from multiple sources. Sources include:
+
+ * WATCH_TYPE_KEY_NOTIFY
+
+ Notifications of this type indicate changes to keys and keyrings, including
+ the changes of keyring contents or the attributes of keys.
+
+ See Documentation/security/keys/core.rst for more information.
+
+
+Event Filtering
+===============
+
+Once a watch queue has been created, a set of filters can be applied to limit
+the events that are received using::
+
+ struct watch_notification_filter filter = {
+ ...
+ };
+ ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter)
+
+The filter description is a variable of type::
+
+ struct watch_notification_filter {
+ __u32 nr_filters;
+ __u32 __reserved;
+ struct watch_notification_type_filter filters[];
+ };
+
+Where "nr_filters" is the number of filters in filters[] and "__reserved"
+should be 0. The "filters" array has elements of the following type::
+
+ struct watch_notification_type_filter {
+ __u32 type;
+ __u32 info_filter;
+ __u32 info_mask;
+ __u32 subtype_filter[8];
+ };
+
+Where:
+
+ * ``type`` is the event type to filter for and should be something like
+ "WATCH_TYPE_KEY_NOTIFY"
+
+ * ``info_filter`` and ``info_mask`` act as a filter on the info field of the
+ notification record. The notification is only written into the buffer if::
+
+ (watch.info & info_mask) == info_filter
+
+ This could be used, for example, to ignore events that are not exactly on
+ the watched point in a mount tree.
+
+ * ``subtype_filter`` is a bitmask indicating the subtypes that are of
+ interest. Bit 0 of subtype_filter[0] corresponds to subtype 0, bit 1 to
+ subtype 1, and so on.
+
+If the argument to the ioctl() is NULL, then the filters will be removed and
+all events from the watched sources will come through.
+
+
+Userspace Code Example
+======================
+
+A buffer is created with something like the following::
+
+ pipe2(fds, O_TMPFILE);
+ ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);
+
+It can then be set to receive keyring change notifications::
+
+ keyctl(KEYCTL_WATCH_KEY, KEY_SPEC_SESSION_KEYRING, fds[1], 0x01);
+
+The notifications can then be consumed by something like the following::
+
+ static void consumer(int rfd, struct watch_queue_buffer *buf)
+ {
+ unsigned char buffer[128];
+ ssize_t buf_len;
+
+ while (buf_len = read(rfd, buffer, sizeof(buffer)),
+ buf_len > 0
+ ) {
+ void *p = buffer;
+ void *end = buffer + buf_len;
+ while (p < end) {
+ union {
+ struct watch_notification n;
+ unsigned char buf1[128];
+ } n;
+ size_t largest, len;
+
+ largest = end - p;
+ if (largest > 128)
+ largest = 128;
+ memcpy(&n, p, largest);
+
+ len = (n->info & WATCH_INFO_LENGTH) >>
+ WATCH_INFO_LENGTH__SHIFT;
+ if (len == 0 || len > largest)
+ return;
+
+ switch (n.n.type) {
+ case WATCH_TYPE_META:
+ got_meta(&n.n);
+ case WATCH_TYPE_KEY_NOTIFY:
+ saw_key_change(&n.n);
+ break;
+ }
+
+ p += len;
+ }
+ }
+ }
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index 00a5ba51e63f..165ca73e8351 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -1,6 +1,6 @@
-====================================
-Concurrency Managed Workqueue (cmwq)
-====================================
+=========
+Workqueue
+=========
:Date: September, 2010
:Author: Tejun Heo <tj@kernel.org>
@@ -25,8 +25,8 @@ there is no work item left on the workqueue the worker becomes idle.
When a new work item gets queued, the worker begins executing again.
-Why cmwq?
-=========
+Why Concurrency Managed Workqueue?
+==================================
In the original wq implementation, a multi threaded (MT) wq had one
worker thread per CPU and a single threaded (ST) wq had one worker
@@ -77,10 +77,12 @@ wants a function to be executed asynchronously it has to set up a work
item pointing to that function and queue that work item on a
workqueue.
-Special purpose threads, called worker threads, execute the functions
-off of the queue, one after the other. If no work is queued, the
-worker threads become idle. These worker threads are managed in so
-called worker-pools.
+A work item can be executed in either a thread or the BH (softirq) context.
+
+For threaded workqueues, special purpose threads, called [k]workers, execute
+the functions off of the queue, one after the other. If no work is queued,
+the worker threads become idle. These worker threads are managed in
+worker-pools.
The cmwq design differentiates between the user-facing workqueues that
subsystems and drivers queue work items on and the backend mechanism
@@ -91,6 +93,12 @@ for high priority ones, for each possible CPU and some extra
worker-pools to serve work items queued on unbound workqueues - the
number of these backing pools is dynamic.
+BH workqueues use the same framework. However, as there can only be one
+concurrent execution context, there's no need to worry about concurrency.
+Each per-CPU BH worker pool contains only one pseudo worker which represents
+the BH execution context. A BH workqueue can be considered a convenience
+interface to softirq.
+
Subsystems and drivers can create and queue work items through special
workqueue API functions as they see fit. They can influence some
aspects of the way the work items are executed by setting flags on the
@@ -106,7 +114,7 @@ unless specifically overridden, a work item of a bound workqueue will
be queued on the worklist of either normal or highpri worker-pool that
is associated to the CPU the issuer is running on.
-For any worker pool implementation, managing the concurrency level
+For any thread pool implementation, managing the concurrency level
(how many execution contexts are active) is an important issue. cmwq
tries to keep the concurrency at a minimal but sufficient level.
Minimal to save resources and sufficient in that the system is used at
@@ -164,6 +172,23 @@ resources, scheduled and executed.
``flags``
---------
+``WQ_BH``
+ BH workqueues can be considered a convenience interface to softirq. BH
+ workqueues are always per-CPU and all BH work items are executed in the
+ queueing CPU's softirq context in the queueing order.
+
+ All BH workqueues must have 0 ``max_active`` and ``WQ_HIGHPRI`` is the
+ only allowed additional flag.
+
+ BH work items cannot sleep. All other features such as delayed queueing,
+ flushing and canceling are supported.
+
+``WQ_PERCPU``
+ Work items queued to a per-cpu wq are bound to a specific CPU.
+ This flag is the right choice when cpu locality is important.
+
+ This flag is the complement of ``WQ_UNBOUND``.
+
``WQ_UNBOUND``
Work items queued to an unbound wq are served by the special
worker-pools which host workers which are not bound to any
@@ -216,25 +241,20 @@ resources, scheduled and executed.
This flag is meaningless for unbound wq.
-Note that the flag ``WQ_NON_REENTRANT`` no longer exists as all
-workqueues are now non-reentrant - any work item is guaranteed to be
-executed by at most one worker system-wide at any given time.
-
``max_active``
--------------
-``@max_active`` determines the maximum number of execution contexts
-per CPU which can be assigned to the work items of a wq. For example,
-with ``@max_active`` of 16, at most 16 work items of the wq can be
-executing at the same time per CPU.
+``@max_active`` determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq. For example, with
+``@max_active`` of 16, at most 16 work items of the wq can be executing
+at the same time per CPU. This is always a per-CPU attribute, even for
+unbound workqueues.
-Currently, for a bound wq, the maximum limit for ``@max_active`` is
-512 and the default value used when 0 is specified is 256. For an
-unbound wq, the limit is higher of 512 and 4 *
-``num_possible_cpus()``. These values are chosen sufficiently high
-such that they are not the limiting factor while providing protection
-in runaway cases.
+The maximum limit for ``@max_active`` is 2048 and the default value used
+when 0 is specified is 1024. These values are chosen sufficiently high
+such that they are not the limiting factor while providing protection in
+runaway cases.
The number of active work items of a wq is usually regulated by the
users of the wq, more specifically, by how many work items the users
@@ -242,15 +262,11 @@ may queue at the same time. Unless there is a specific need for
throttling the number of active work items, specifying '0' is
recommended.
-Some users depend on the strict execution ordering of ST wq. The
-combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to
-achieve this behavior. Work items on such wq were always queued to the
-unbound worker-pools and only one work item could be active at any given
-time thus achieving the same ordering property as ST wq.
-
-In the current implementation the above configuration only guarantees
-ST behavior within a given NUMA node. Instead ``alloc_ordered_queue()`` should
-be used to achieve system-wide ST behavior.
+Some users depend on strict execution ordering where only one work item
+is in flight at any given time and the work items are processed in
+queueing order. While the combination of ``@max_active`` of 1 and
+``WQ_UNBOUND`` used to achieve this behavior, this is no longer the
+case. Use alloc_ordered_workqueue() instead.
Example Execution Scenarios
@@ -347,11 +363,366 @@ Guidelines
difference in execution characteristics between using a dedicated wq
and a system wq.
+ Note: If something may generate more than @max_active outstanding
+ work items (do stress test your producers), it may saturate a system
+ wq and potentially lead to deadlock. It should utilize its own
+ dedicated workqueue rather than the system wq.
+
* Unless work items are expected to consume a huge amount of CPU
cycles, using a bound wq is usually beneficial due to the increased
level of locality in wq operations and work item execution.
+Affinity Scopes
+===============
+
+An unbound workqueue groups CPUs according to its affinity scope to improve
+cache locality. For example, if a workqueue is using the default affinity
+scope of "cache", it will group CPUs according to last level cache
+boundaries. A work item queued on the workqueue will be assigned to a worker
+on one of the CPUs which share the last level cache with the issuing CPU.
+Once started, the worker may or may not be allowed to move outside the scope
+depending on the ``affinity_strict`` setting of the scope.
+
+Workqueue currently supports the following affinity scopes.
+
+``default``
+ Use the scope in module parameter ``workqueue.default_affinity_scope``
+ which is always set to one of the scopes below.
+
+``cpu``
+ CPUs are not grouped. A work item issued on one CPU is processed by a
+ worker on the same CPU. This makes unbound workqueues behave as per-cpu
+ workqueues without concurrency management.
+
+``smt``
+ CPUs are grouped according to SMT boundaries. This usually means that the
+ logical threads of each physical CPU core are grouped together.
+
+``cache``
+ CPUs are grouped according to cache boundaries. Which specific cache
+ boundary is used is determined by the arch code. L3 is used in a lot of
+ cases. This is the default affinity scope.
+
+``numa``
+ CPUs are grouped according to NUMA boundaries.
+
+``system``
+ All CPUs are put in the same group. Workqueue makes no effort to process a
+ work item on a CPU close to the issuing CPU.
+
+The default affinity scope can be changed with the module parameter
+``workqueue.default_affinity_scope`` and a specific workqueue's affinity
+scope can be changed using ``apply_workqueue_attrs()``.
+
+If ``WQ_SYSFS`` is set, the workqueue will have the following affinity scope
+related interface files under its ``/sys/devices/virtual/workqueue/WQ_NAME/``
+directory.
+
+``affinity_scope``
+ Read to see the current affinity scope. Write to change.
+
+ When default is the current scope, reading this file will also show the
+ current effective scope in parentheses, for example, ``default (cache)``.
+
+``affinity_strict``
+ 0 by default indicating that affinity scopes are not strict. When a work
+ item starts execution, workqueue makes a best-effort attempt to ensure
+ that the worker is inside its affinity scope, which is called
+ repatriation. Once started, the scheduler is free to move the worker
+ anywhere in the system as it sees fit. This enables benefiting from scope
+ locality while still being able to utilize other CPUs if necessary and
+ available.
+
+ If set to 1, all workers of the scope are guaranteed always to be in the
+ scope. This may be useful when crossing affinity scopes has other
+ implications, for example, in terms of power consumption or workload
+ isolation. Strict NUMA scope can also be used to match the workqueue
+ behavior of older kernels.
+
+
+Affinity Scopes and Performance
+===============================
+
+It'd be ideal if an unbound workqueue's behavior is optimal for vast
+majority of use cases without further tuning. Unfortunately, in the current
+kernel, there exists a pronounced trade-off between locality and utilization
+necessitating explicit configurations when workqueues are heavily used.
+
+Higher locality leads to higher efficiency where more work is performed for
+the same number of consumed CPU cycles. However, higher locality may also
+cause lower overall system utilization if the work items are not spread
+enough across the affinity scopes by the issuers. The following performance
+testing with dm-crypt clearly illustrates this trade-off.
+
+The tests are run on a CPU with 12-cores/24-threads split across four L3
+caches (AMD Ryzen 9 3900x). CPU clock boost is turned off for consistency.
+``/dev/dm-0`` is a dm-crypt device created on NVME SSD (Samsung 990 PRO) and
+opened with ``cryptsetup`` with default settings.
+
+
+Scenario 1: Enough issuers and work spread across the machine
+-------------------------------------------------------------
+
+The command used: ::
+
+ $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k --ioengine=libaio \
+ --iodepth=64 --runtime=60 --numjobs=24 --time_based --group_reporting \
+ --name=iops-test-job --verify=sha512
+
+There are 24 issuers, each issuing 64 IOs concurrently. ``--verify=sha512``
+makes ``fio`` generate and read back the content each time which makes
+execution locality matter between the issuer and ``kcryptd``. The following
+are the read bandwidths and CPU utilizations depending on different affinity
+scope settings on ``kcryptd`` measured over five runs. Bandwidths are in
+MiBps, and CPU util in percents.
+
+.. list-table::
+ :widths: 16 20 20
+ :header-rows: 1
+
+ * - Affinity
+ - Bandwidth (MiBps)
+ - CPU util (%)
+
+ * - system
+ - 1159.40 ±1.34
+ - 99.31 ±0.02
+
+ * - cache
+ - 1166.40 ±0.89
+ - 99.34 ±0.01
+
+ * - cache (strict)
+ - 1166.00 ±0.71
+ - 99.35 ±0.01
+
+With enough issuers spread across the system, there is no downside to
+"cache", strict or otherwise. All three configurations saturate the whole
+machine but the cache-affine ones outperform by 0.6% thanks to improved
+locality.
+
+
+Scenario 2: Fewer issuers, enough work for saturation
+-----------------------------------------------------
+
+The command used: ::
+
+ $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k \
+ --ioengine=libaio --iodepth=64 --runtime=60 --numjobs=8 \
+ --time_based --group_reporting --name=iops-test-job --verify=sha512
+
+The only difference from the previous scenario is ``--numjobs=8``. There are
+a third of the issuers but is still enough total work to saturate the
+system.
+
+.. list-table::
+ :widths: 16 20 20
+ :header-rows: 1
+
+ * - Affinity
+ - Bandwidth (MiBps)
+ - CPU util (%)
+
+ * - system
+ - 1155.40 ±0.89
+ - 97.41 ±0.05
+
+ * - cache
+ - 1154.40 ±1.14
+ - 96.15 ±0.09
+
+ * - cache (strict)
+ - 1112.00 ±4.64
+ - 93.26 ±0.35
+
+This is more than enough work to saturate the system. Both "system" and
+"cache" are nearly saturating the machine but not fully. "cache" is using
+less CPU but the better efficiency puts it at the same bandwidth as
+"system".
+
+Eight issuers moving around over four L3 cache scope still allow "cache
+(strict)" to mostly saturate the machine but the loss of work conservation
+is now starting to hurt with 3.7% bandwidth loss.
+
+
+Scenario 3: Even fewer issuers, not enough work to saturate
+-----------------------------------------------------------
+
+The command used: ::
+
+ $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k \
+ --ioengine=libaio --iodepth=64 --runtime=60 --numjobs=4 \
+ --time_based --group_reporting --name=iops-test-job --verify=sha512
+
+Again, the only difference is ``--numjobs=4``. With the number of issuers
+reduced to four, there now isn't enough work to saturate the whole system
+and the bandwidth becomes dependent on completion latencies.
+
+.. list-table::
+ :widths: 16 20 20
+ :header-rows: 1
+
+ * - Affinity
+ - Bandwidth (MiBps)
+ - CPU util (%)
+
+ * - system
+ - 993.60 ±1.82
+ - 75.49 ±0.06
+
+ * - cache
+ - 973.40 ±1.52
+ - 74.90 ±0.07
+
+ * - cache (strict)
+ - 828.20 ±4.49
+ - 66.84 ±0.29
+
+Now, the tradeoff between locality and utilization is clearer. "cache" shows
+2% bandwidth loss compared to "system" and "cache (struct)" whopping 20%.
+
+
+Conclusion and Recommendations
+------------------------------
+
+In the above experiments, the efficiency advantage of the "cache" affinity
+scope over "system" is, while consistent and noticeable, small. However, the
+impact is dependent on the distances between the scopes and may be more
+pronounced in processors with more complex topologies.
+
+While the loss of work-conservation in certain scenarios hurts, it is a lot
+better than "cache (strict)" and maximizing workqueue utilization is
+unlikely to be the common case anyway. As such, "cache" is the default
+affinity scope for unbound pools.
+
+* As there is no one option which is great for most cases, workqueue usages
+ that may consume a significant amount of CPU are recommended to configure
+ the workqueues using ``apply_workqueue_attrs()`` and/or enable
+ ``WQ_SYSFS``.
+
+* An unbound workqueue with strict "cpu" affinity scope behaves the same as
+ ``WQ_CPU_INTENSIVE`` per-cpu workqueue. There is no real advanage to the
+ latter and an unbound workqueue provides a lot more flexibility.
+
+* Affinity scopes are introduced in Linux v6.5. To emulate the previous
+ behavior, use strict "numa" affinity scope.
+
+* The loss of work-conservation in non-strict affinity scopes is likely
+ originating from the scheduler. There is no theoretical reason why the
+ kernel wouldn't be able to do the right thing and maintain
+ work-conservation in most cases. As such, it is possible that future
+ scheduler improvements may make most of these tunables unnecessary.
+
+
+Examining Configuration
+=======================
+
+Use tools/workqueue/wq_dump.py to examine unbound CPU affinity
+configuration, worker pools and how workqueues map to the pools: ::
+
+ $ tools/workqueue/wq_dump.py
+ Affinity Scopes
+ ===============
+ wq_unbound_cpumask=0000000f
+
+ CPU
+ nr_pods 4
+ pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008
+ pod_node [0]=0 [1]=0 [2]=1 [3]=1
+ cpu_pod [0]=0 [1]=1 [2]=2 [3]=3
+
+ SMT
+ nr_pods 4
+ pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008
+ pod_node [0]=0 [1]=0 [2]=1 [3]=1
+ cpu_pod [0]=0 [1]=1 [2]=2 [3]=3
+
+ CACHE (default)
+ nr_pods 2
+ pod_cpus [0]=00000003 [1]=0000000c
+ pod_node [0]=0 [1]=1
+ cpu_pod [0]=0 [1]=0 [2]=1 [3]=1
+
+ NUMA
+ nr_pods 2
+ pod_cpus [0]=00000003 [1]=0000000c
+ pod_node [0]=0 [1]=1
+ cpu_pod [0]=0 [1]=0 [2]=1 [3]=1
+
+ SYSTEM
+ nr_pods 1
+ pod_cpus [0]=0000000f
+ pod_node [0]=-1
+ cpu_pod [0]=0 [1]=0 [2]=0 [3]=0
+
+ Worker Pools
+ ============
+ pool[00] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 0
+ pool[01] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 0
+ pool[02] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 1
+ pool[03] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 1
+ pool[04] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 2
+ pool[05] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 2
+ pool[06] ref= 1 nice= 0 idle/workers= 3/ 3 cpu= 3
+ pool[07] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 3
+ pool[08] ref=42 nice= 0 idle/workers= 6/ 6 cpus=0000000f
+ pool[09] ref=28 nice= 0 idle/workers= 3/ 3 cpus=00000003
+ pool[10] ref=28 nice= 0 idle/workers= 17/ 17 cpus=0000000c
+ pool[11] ref= 1 nice=-20 idle/workers= 1/ 1 cpus=0000000f
+ pool[12] ref= 2 nice=-20 idle/workers= 1/ 1 cpus=00000003
+ pool[13] ref= 2 nice=-20 idle/workers= 1/ 1 cpus=0000000c
+
+ Workqueue CPU -> pool
+ =====================
+ [ workqueue \ CPU 0 1 2 3 dfl]
+ events percpu 0 2 4 6
+ events_highpri percpu 1 3 5 7
+ events_long percpu 0 2 4 6
+ events_unbound unbound 9 9 10 10 8
+ events_freezable percpu 0 2 4 6
+ events_power_efficient percpu 0 2 4 6
+ events_freezable_pwr_ef percpu 0 2 4 6
+ rcu_gp percpu 0 2 4 6
+ rcu_par_gp percpu 0 2 4 6
+ slub_flushwq percpu 0 2 4 6
+ netns ordered 8 8 8 8 8
+ ...
+
+See the command's help message for more info.
+
+
+Monitoring
+==========
+
+Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
+
+ $ tools/workqueue/wq_monitor.py events
+ total infl CPUtime CPUhog CMW/RPR mayday rescued
+ events 18545 0 6.1 0 5 - -
+ events_highpri 8 0 0.0 0 0 - -
+ events_long 3 0 0.0 0 0 - -
+ events_unbound 38306 0 0.1 - 7 - -
+ events_freezable 0 0 0.0 0 0 - -
+ events_power_efficient 29598 0 0.2 0 0 - -
+ events_freezable_pwr_ef 10 0 0.0 0 0 - -
+ sock_diag_events 0 0 0.0 0 0 - -
+
+ total infl CPUtime CPUhog CMW/RPR mayday rescued
+ events 18548 0 6.1 0 5 - -
+ events_highpri 8 0 0.0 0 0 - -
+ events_long 3 0 0.0 0 0 - -
+ events_unbound 38322 0 0.1 - 7 - -
+ events_freezable 0 0 0.0 0 0 - -
+ events_power_efficient 29603 0 0.2 0 0 - -
+ events_freezable_pwr_ef 10 0 0.0 0 0 - -
+ sock_diag_events 0 0 0.0 0 0 - -
+
+ ...
+
+See the command's help message for more info.
+
+
Debugging
=========
@@ -374,8 +745,8 @@ of possible problems:
The first one can be tracked using tracing: ::
- $ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace_pipe > out.txt
+ $ echo workqueue:workqueue_queue_work > /sys/kernel/tracing/set_event
+ $ cat /sys/kernel/tracing/trace_pipe > out.txt
(wait a few secs)
^C
@@ -392,7 +763,27 @@ The work item's function should be trivially visible in the stack
trace.
+Non-reentrance Conditions
+=========================
+
+Workqueue guarantees that a work item cannot be re-entrant if the following
+conditions hold after a work item gets queued:
+
+ 1. The work function hasn't been changed.
+ 2. No one queues the work item to another workqueue.
+ 3. The work item hasn't been reinitiated.
+
+In other words, if the above conditions hold, the work item is guaranteed to be
+executed by at most one worker system-wide at any given time.
+
+Note that requeuing the work item (to the same queue) in the self function
+doesn't break these conditions, so it's safe to do. Otherwise, caution is
+required when breaking the conditions inside a work function.
+
+
Kernel Inline Documentations Reference
======================================
.. kernel-doc:: include/linux/workqueue.h
+
+.. kernel-doc:: kernel/workqueue.c
diff --git a/Documentation/core-api/wrappers/atomic_bitops.rst b/Documentation/core-api/wrappers/atomic_bitops.rst
new file mode 100644
index 000000000000..bf24e4081a8f
--- /dev/null
+++ b/Documentation/core-api/wrappers/atomic_bitops.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+ This is a simple wrapper to bring atomic_bitops.txt into the RST world
+ until such a time as that file can be converted directly.
+
+=============
+Atomic bitops
+=============
+
+.. raw:: latex
+
+ \footnotesize
+
+.. include:: ../../atomic_bitops.txt
+ :literal:
+
+.. raw:: latex
+
+ \normalsize
diff --git a/Documentation/core-api/wrappers/atomic_t.rst b/Documentation/core-api/wrappers/atomic_t.rst
new file mode 100644
index 000000000000..ed109a964c77
--- /dev/null
+++ b/Documentation/core-api/wrappers/atomic_t.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+ This is a simple wrapper to bring atomic_t.txt into the RST world
+ until such a time as that file can be converted directly.
+
+============
+Atomic types
+============
+
+.. raw:: latex
+
+ \footnotesize
+
+.. include:: ../../atomic_t.txt
+ :literal:
+
+.. raw:: latex
+
+ \normalsize
+
diff --git a/Documentation/core-api/wrappers/memory-barriers.rst b/Documentation/core-api/wrappers/memory-barriers.rst
new file mode 100644
index 000000000000..532460b5e3eb
--- /dev/null
+++ b/Documentation/core-api/wrappers/memory-barriers.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+ This is a simple wrapper to bring memory-barriers.txt into the RST world
+ until such a time as that file can be converted directly.
+
+============================
+Linux kernel memory barriers
+============================
+
+.. raw:: latex
+
+ \footnotesize
+
+.. include:: ../../memory-barriers.txt
+ :literal:
+
+.. raw:: latex
+
+ \normalsize
diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index dbe96cb5558e..c6c91cbd0c3c 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -25,122 +25,144 @@ good performance with large indices. If your index can be larger than
``ULONG_MAX`` then the XArray is not the data type for you. The most
important user of the XArray is the page cache.
-Each non-``NULL`` entry in the array has three bits associated with
-it called marks. Each mark may be set or cleared independently of
-the others. You can iterate over entries which are marked.
-
Normal pointers may be stored in the XArray directly. They must be 4-byte
-aligned, which is true for any pointer returned from :c:func:`kmalloc` and
-:c:func:`alloc_page`. It isn't true for arbitrary user-space pointers,
+aligned, which is true for any pointer returned from kmalloc() and
+alloc_page(). It isn't true for arbitrary user-space pointers,
nor for function pointers. You can store pointers to statically allocated
objects, as long as those objects have an alignment of at least 4.
You can also store integers between 0 and ``LONG_MAX`` in the XArray.
-You must first convert it into an entry using :c:func:`xa_mk_value`.
+You must first convert it into an entry using xa_mk_value().
When you retrieve an entry from the XArray, you can check whether it is
-a value entry by calling :c:func:`xa_is_value`, and convert it back to
-an integer by calling :c:func:`xa_to_value`.
-
-Some users want to store tagged pointers instead of using the marks
-described above. They can call :c:func:`xa_tag_pointer` to create an
-entry with a tag, :c:func:`xa_untag_pointer` to turn a tagged entry
-back into an untagged pointer and :c:func:`xa_pointer_tag` to retrieve
-the tag of an entry. Tagged pointers use the same bits that are used
-to distinguish value entries from normal pointers, so each user must
-decide whether they want to store value entries or tagged pointers in
-any particular XArray.
-
-The XArray does not support storing :c:func:`IS_ERR` pointers as some
+a value entry by calling xa_is_value(), and convert it back to
+an integer by calling xa_to_value().
+
+Some users want to tag the pointers they store in the XArray. You can
+call xa_tag_pointer() to create an entry with a tag, xa_untag_pointer()
+to turn a tagged entry back into an untagged pointer and xa_pointer_tag()
+to retrieve the tag of an entry. Tagged pointers use the same bits that
+are used to distinguish value entries from normal pointers, so you must
+decide whether you want to store value entries or tagged pointers in any
+particular XArray.
+
+The XArray does not support storing IS_ERR() pointers as some
conflict with value entries or internal entries.
An unusual feature of the XArray is the ability to create entries which
occupy a range of indices. Once stored to, looking up any index in
the range will return the same entry as looking up any other index in
-the range. Setting a mark on one index will set it on all of them.
-Storing to any index will store to all of them. Multi-index entries can
-be explicitly split into smaller entries, or storing ``NULL`` into any
-entry will cause the XArray to forget about the range.
+the range. Storing to any index will store to all of them. Multi-index
+entries can be explicitly split into smaller entries. Unsetting (using
+xa_erase() or xa_store() with ``NULL``) any entry will cause the XArray
+to forget about the range.
Normal API
==========
-Start by initialising an XArray, either with :c:func:`DEFINE_XARRAY`
-for statically allocated XArrays or :c:func:`xa_init` for dynamically
+Start by initialising an XArray, either with DEFINE_XARRAY()
+for statically allocated XArrays or xa_init() for dynamically
allocated ones. A freshly-initialised XArray contains a ``NULL``
pointer at every index.
-You can then set entries using :c:func:`xa_store` and get entries
-using :c:func:`xa_load`. xa_store will overwrite any entry with the
-new entry and return the previous entry stored at that index. You can
-use :c:func:`xa_erase` instead of calling :c:func:`xa_store` with a
-``NULL`` entry. There is no difference between an entry that has never
-been stored to, one that has been erased and one that has most recently
-had ``NULL`` stored to it.
+You can then set entries using xa_store() and get entries using
+xa_load(). xa_store() will overwrite any entry with the new entry and
+return the previous entry stored at that index. You can unset entries
+using xa_erase() or by setting the entry to ``NULL`` using xa_store().
+There is no difference between an entry that has never been stored to
+and one that has been erased with xa_erase(); an entry that has most
+recently had ``NULL`` stored to it is also equivalent except if the
+XArray was initialized with ``XA_FLAGS_ALLOC``.
You can conditionally replace an entry at an index by using
-:c:func:`xa_cmpxchg`. Like :c:func:`cmpxchg`, it will only succeed if
+xa_cmpxchg(). Like cmpxchg(), it will only succeed if
the entry at that index has the 'old' value. It also returns the entry
which was at that index; if it returns the same entry which was passed as
-'old', then :c:func:`xa_cmpxchg` succeeded.
+'old', then xa_cmpxchg() succeeded.
If you want to only store a new entry to an index if the current entry
-at that index is ``NULL``, you can use :c:func:`xa_insert` which
-returns ``-EEXIST`` if the entry is not empty.
-
-You can enquire whether a mark is set on an entry by using
-:c:func:`xa_get_mark`. If the entry is not ``NULL``, you can set a mark
-on it by using :c:func:`xa_set_mark` and remove the mark from an entry by
-calling :c:func:`xa_clear_mark`. You can ask whether any entry in the
-XArray has a particular mark set by calling :c:func:`xa_marked`.
+at that index is ``NULL``, you can use xa_insert() which
+returns ``-EBUSY`` if the entry is not empty.
You can copy entries out of the XArray into a plain array by calling
-:c:func:`xa_extract`. Or you can iterate over the present entries in
-the XArray by calling :c:func:`xa_for_each`. You may prefer to use
-:c:func:`xa_find` or :c:func:`xa_find_after` to move to the next present
-entry in the XArray.
+xa_extract(). Or you can iterate over the present entries in the XArray
+by calling xa_for_each(), xa_for_each_start() or xa_for_each_range().
+You may prefer to use xa_find() or xa_find_after() to move to the next
+present entry in the XArray.
-Calling :c:func:`xa_store_range` stores the same entry in a range
+Calling xa_store_range() stores the same entry in a range
of indices. If you do this, some of the other operations will behave
in a slightly odd way. For example, marking the entry at one index
may result in the entry being marked at some, but not all of the other
indices. Storing into one index may result in the entry retrieved by
some, but not all of the other indices changing.
-Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
-will not need to allocate memory. The :c:func:`xa_reserve` function
-will store a reserved entry at the indicated index. Users of the normal
-API will see this entry as containing ``NULL``. If you do not need to
-use the reserved entry, you can call :c:func:`xa_release` to remove the
-unused entry. If another user has stored to the entry in the meantime,
-:c:func:`xa_release` will do nothing; if instead you want the entry to
-become ``NULL``, you should use :c:func:`xa_erase`.
-
-If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
+Sometimes you need to ensure that a subsequent call to xa_store()
+will not need to allocate memory. The xa_reserve() function
+will store a reserved entry at the indicated index. Users of the
+normal API will see this entry as containing ``NULL``. If you do
+not need to use the reserved entry, you can call xa_release()
+to remove the unused entry. If another user has stored to the entry
+in the meantime, xa_release() will do nothing; if instead you
+want the entry to become ``NULL``, you should use xa_erase().
+Using xa_insert() on a reserved entry will fail.
+
+If all entries in the array are ``NULL``, the xa_empty() function
will return ``true``.
Finally, you can remove all entries from an XArray by calling
-:c:func:`xa_destroy`. If the XArray entries are pointers, you may wish
+xa_destroy(). If the XArray entries are pointers, you may wish
to free the entries first. You can do this by iterating over all present
-entries in the XArray using the :c:func:`xa_for_each` iterator.
+entries in the XArray using the xa_for_each() iterator.
+
+Search Marks
+------------
+
+Each entry in the array has three bits associated with it called marks.
+Each mark may be set or cleared independently of the others. You can
+iterate over marked entries by using the xa_for_each_marked() iterator.
+
+You can enquire whether a mark is set on an entry by using
+xa_get_mark(). If the entry is not ``NULL``, you can set a mark on it
+by using xa_set_mark() and remove the mark from an entry by calling
+xa_clear_mark(). You can ask whether any entry in the XArray has a
+particular mark set by calling xa_marked(). Erasing an entry from the
+XArray causes all marks associated with that entry to be cleared.
+
+Setting or clearing a mark on any index of a multi-index entry will
+affect all indices covered by that entry. Querying the mark on any
+index will return the same result.
+
+There is no way to iterate over entries which are not marked; the data
+structure does not allow this to be implemented efficiently. There are
+not currently iterators to search for logical combinations of bits (eg
+iterate over all entries which have both ``XA_MARK_1`` and ``XA_MARK_2``
+set, or iterate over all entries which have ``XA_MARK_0`` or ``XA_MARK_2``
+set). It would be possible to add these if a user arises.
Allocating XArrays
------------------
-If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or
-initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+If you use DEFINE_XARRAY_ALLOC() to define the XArray, or
+initialise it by passing ``XA_FLAGS_ALLOC`` to xa_init_flags(),
the XArray changes to track whether entries are in use or not.
-You can call :c:func:`xa_alloc` to store the entry at any unused index
+You can call xa_alloc() to store the entry at an unused index
in the XArray. If you need to modify the array from interrupt context,
-you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
+you can use xa_alloc_bh() or xa_alloc_irq() to disable
interrupts while allocating the ID.
-Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert`
-will mark the entry as being allocated. Unlike a normal XArray, storing
-``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`.
-To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if
+Using xa_store(), xa_cmpxchg() or xa_insert() will
+also mark the entry as being allocated. Unlike a normal XArray, storing
+``NULL`` will mark the entry as being in use, like xa_reserve().
+To free an entry, use xa_erase() (or xa_release() if
you only want to free the entry if it's ``NULL``).
+By default, the lowest free entry is allocated starting from 0. If you
+want to allocate entries starting at 1, it is more efficient to use
+DEFINE_XARRAY_ALLOC1() or ``XA_FLAGS_ALLOC1``. If you want to
+allocate IDs up to a maximum, then wrap back around to the lowest free
+ID, you can use xa_alloc_cyclic().
+
You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
is used to track whether an entry is free or not. The other marks are
available for your use.
@@ -148,17 +170,17 @@ available for your use.
Memory allocation
-----------------
-The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_alloc`,
-:c:func:`xa_reserve` and :c:func:`xa_insert` functions take a gfp_t
+The xa_store(), xa_cmpxchg(), xa_alloc(),
+xa_reserve() and xa_insert() functions take a gfp_t
parameter in case the XArray needs to allocate memory to store this entry.
If the entry is being deleted, no memory allocation needs to be performed,
and the GFP flags specified will be ignored.
It is possible for no memory to be allocatable, particularly if you pass
a restrictive set of GFP flags. In that case, the functions return a
-special value which can be turned into an errno using :c:func:`xa_err`.
+special value which can be turned into an errno using xa_err().
If you don't need to know exactly which error occurred, using
-:c:func:`xa_is_err` is slightly more efficient.
+xa_is_err() is slightly more efficient.
Locking
-------
@@ -167,51 +189,56 @@ When using the Normal API, you do not have to worry about locking.
The XArray uses RCU and an internal spinlock to synchronise access:
No lock needed:
- * :c:func:`xa_empty`
- * :c:func:`xa_marked`
+ * xa_empty()
+ * xa_marked()
Takes RCU read lock:
- * :c:func:`xa_load`
- * :c:func:`xa_for_each`
- * :c:func:`xa_find`
- * :c:func:`xa_find_after`
- * :c:func:`xa_extract`
- * :c:func:`xa_get_mark`
+ * xa_load()
+ * xa_for_each()
+ * xa_for_each_start()
+ * xa_for_each_range()
+ * xa_find()
+ * xa_find_after()
+ * xa_extract()
+ * xa_get_mark()
Takes xa_lock internally:
- * :c:func:`xa_store`
- * :c:func:`xa_store_bh`
- * :c:func:`xa_store_irq`
- * :c:func:`xa_insert`
- * :c:func:`xa_erase`
- * :c:func:`xa_erase_bh`
- * :c:func:`xa_erase_irq`
- * :c:func:`xa_cmpxchg`
- * :c:func:`xa_store_range`
- * :c:func:`xa_alloc`
- * :c:func:`xa_alloc_bh`
- * :c:func:`xa_alloc_irq`
- * :c:func:`xa_reserve`
- * :c:func:`xa_reserve_bh`
- * :c:func:`xa_reserve_irq`
- * :c:func:`xa_destroy`
- * :c:func:`xa_set_mark`
- * :c:func:`xa_clear_mark`
+ * xa_store()
+ * xa_store_bh()
+ * xa_store_irq()
+ * xa_insert()
+ * xa_insert_bh()
+ * xa_insert_irq()
+ * xa_erase()
+ * xa_erase_bh()
+ * xa_erase_irq()
+ * xa_cmpxchg()
+ * xa_cmpxchg_bh()
+ * xa_cmpxchg_irq()
+ * xa_store_range()
+ * xa_alloc()
+ * xa_alloc_bh()
+ * xa_alloc_irq()
+ * xa_reserve()
+ * xa_reserve_bh()
+ * xa_reserve_irq()
+ * xa_destroy()
+ * xa_set_mark()
+ * xa_clear_mark()
Assumes xa_lock held on entry:
- * :c:func:`__xa_store`
- * :c:func:`__xa_insert`
- * :c:func:`__xa_erase`
- * :c:func:`__xa_cmpxchg`
- * :c:func:`__xa_alloc`
- * :c:func:`__xa_reserve`
- * :c:func:`__xa_set_mark`
- * :c:func:`__xa_clear_mark`
+ * __xa_store()
+ * __xa_insert()
+ * __xa_erase()
+ * __xa_cmpxchg()
+ * __xa_alloc()
+ * __xa_set_mark()
+ * __xa_clear_mark()
If you want to take advantage of the lock to protect the data structures
-that you are storing in the XArray, you can call :c:func:`xa_lock`
-before calling :c:func:`xa_load`, then take a reference count on the
-object you have found before calling :c:func:`xa_unlock`. This will
+that you are storing in the XArray, you can call xa_lock()
+before calling xa_load(), then take a reference count on the
+object you have found before calling xa_unlock(). This will
prevent stores from removing the object from the array between looking
up the object and incrementing the refcount. You can also use RCU to
avoid dereferencing freed memory, but an explanation of that is beyond
@@ -251,7 +278,7 @@ context and then erase them in softirq context, you can do that this way::
}
If you are going to modify the XArray from interrupt or softirq context,
-you need to initialise the array using :c:func:`xa_init_flags`, passing
+you need to initialise the array using xa_init_flags(), passing
``XA_FLAGS_LOCK_IRQ`` or ``XA_FLAGS_LOCK_BH``.
The above example also shows a common pattern of wanting to extend the
@@ -259,19 +286,20 @@ coverage of the xa_lock on the store side to protect some statistics
associated with the array.
Sharing the XArray with interrupt context is also possible, either
-using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
-context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
+using xa_lock_irqsave() in both the interrupt handler and process
+context, or xa_lock_irq() in process context and xa_lock()
in the interrupt handler. Some of the more common patterns have helper
-functions such as :c:func:`xa_store_bh`, :c:func:`xa_store_irq`,
-:c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
+functions such as xa_store_bh(), xa_store_irq(),
+xa_erase_bh(), xa_erase_irq(), xa_cmpxchg_bh()
+and xa_cmpxchg_irq().
Sometimes you need to protect access to the XArray with a mutex because
that lock sits above another mutex in the locking hierarchy. That does
-not entitle you to use functions like :c:func:`__xa_erase` without taking
+not entitle you to use functions like __xa_erase() without taking
the xa_lock; the xa_lock is used for lockdep validation and will be used
for other purposes in the future.
-The :c:func:`__xa_set_mark` and :c:func:`__xa_clear_mark` functions are also
+The __xa_set_mark() and __xa_clear_mark() functions are also
available for situations where you look up an entry and want to atomically
set or clear a mark. It may be more efficient to use the advanced API
in this case, as it will save you from walking the tree twice.
@@ -289,27 +317,31 @@ indeed the normal API is implemented in terms of the advanced API. The
advanced API is only available to modules with a GPL-compatible license.
The advanced API is based around the xa_state. This is an opaque data
-structure which you declare on the stack using the :c:func:`XA_STATE`
-macro. This macro initialises the xa_state ready to start walking
-around the XArray. It is used as a cursor to maintain the position
-in the XArray and let you compose various operations together without
-having to restart from the top every time.
+structure which you declare on the stack using the XA_STATE() macro.
+This macro initialises the xa_state ready to start walking around the
+XArray. It is used as a cursor to maintain the position in the XArray
+and let you compose various operations together without having to restart
+from the top every time. The contents of the xa_state are protected by
+the rcu_read_lock() or the xas_lock(). If you need to drop whichever of
+those locks is protecting your state and tree, you must call xas_pause()
+so that future calls do not rely on the parts of the state which were
+left unprotected.
The xa_state is also used to store errors. You can call
-:c:func:`xas_error` to retrieve the error. All operations check whether
+xas_error() to retrieve the error. All operations check whether
the xa_state is in an error state before proceeding, so there's no need
for you to check for an error after each call; you can make multiple
calls in succession and only check at a convenient point. The only
errors currently generated by the XArray code itself are ``ENOMEM`` and
``EINVAL``, but it supports arbitrary errors in case you want to call
-:c:func:`xas_set_err` yourself.
+xas_set_err() yourself.
-If the xa_state is holding an ``ENOMEM`` error, calling :c:func:`xas_nomem`
+If the xa_state is holding an ``ENOMEM`` error, calling xas_nomem()
will attempt to allocate more memory using the specified gfp flags and
cache it in the xa_state for the next attempt. The idea is that you take
the xa_lock, attempt the operation and drop the lock. The operation
attempts to allocate memory while holding the lock, but it is more
-likely to fail. Once you have dropped the lock, :c:func:`xas_nomem`
+likely to fail. Once you have dropped the lock, xas_nomem()
can try harder to allocate more memory. It will return ``true`` if it
is worth retrying the operation (i.e. that there was a memory error *and*
more memory was allocated). If it has previously allocated memory, and
@@ -322,7 +354,7 @@ Internal Entries
The XArray reserves some entries for its own purposes. These are never
exposed through the normal API, but when using the advanced API, it's
possible to see them. Usually the best way to handle them is to pass them
-to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
+to xas_retry(), and retry the operation if it returns ``true``.
.. flat-table::
:widths: 1 1 6
@@ -332,89 +364,88 @@ to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
- Usage
* - Node
- - :c:func:`xa_is_node`
+ - xa_is_node()
- An XArray node. May be visible when using a multi-index xa_state.
* - Sibling
- - :c:func:`xa_is_sibling`
+ - xa_is_sibling()
- A non-canonical entry for a multi-index entry. The value indicates
which slot in this node has the canonical entry.
* - Retry
- - :c:func:`xa_is_retry`
+ - xa_is_retry()
- This entry is currently being modified by a thread which has the
xa_lock. The node containing this entry may be freed at the end
of this RCU period. You should restart the lookup from the head
of the array.
* - Zero
- - :c:func:`xa_is_zero`
+ - xa_is_zero()
- Zero entries appear as ``NULL`` through the Normal API, but occupy
an entry in the XArray which can be used to reserve the index for
future use. This is used by allocating XArrays for allocated entries
which are ``NULL``.
Other internal entries may be added in the future. As far as possible, they
-will be handled by :c:func:`xas_retry`.
+will be handled by xas_retry().
Additional functionality
------------------------
-The :c:func:`xas_create_range` function allocates all the necessary memory
+The xas_create_range() function allocates all the necessary memory
to store every entry in a range. It will set ENOMEM in the xa_state if
it cannot allocate memory.
-You can use :c:func:`xas_init_marks` to reset the marks on an entry
+You can use xas_init_marks() to reset the marks on an entry
to their default state. This is usually all marks clear, unless the
XArray is marked with ``XA_FLAGS_TRACK_FREE``, in which case mark 0 is set
and all other marks are clear. Replacing one entry with another using
-:c:func:`xas_store` will not reset the marks on that entry; if you want
+xas_store() will not reset the marks on that entry; if you want
the marks reset, you should do that explicitly.
-The :c:func:`xas_load` will walk the xa_state as close to the entry
+The xas_load() will walk the xa_state as close to the entry
as it can. If you know the xa_state has already been walked to the
entry and need to check that the entry hasn't changed, you can use
-:c:func:`xas_reload` to save a function call.
+xas_reload() to save a function call.
If you need to move to a different index in the XArray, call
-:c:func:`xas_set`. This resets the cursor to the top of the tree, which
+xas_set(). This resets the cursor to the top of the tree, which
will generally make the next operation walk the cursor to the desired
spot in the tree. If you want to move to the next or previous index,
-call :c:func:`xas_next` or :c:func:`xas_prev`. Setting the index does
+call xas_next() or xas_prev(). Setting the index does
not walk the cursor around the array so does not require a lock to be
held, while moving to the next or previous index does.
-You can search for the next present entry using :c:func:`xas_find`. This
-is the equivalent of both :c:func:`xa_find` and :c:func:`xa_find_after`;
+You can search for the next present entry using xas_find(). This
+is the equivalent of both xa_find() and xa_find_after();
if the cursor has been walked to an entry, then it will find the next
entry after the one currently referenced. If not, it will return the
-entry at the index of the xa_state. Using :c:func:`xas_next_entry` to
-move to the next present entry instead of :c:func:`xas_find` will save
+entry at the index of the xa_state. Using xas_next_entry() to
+move to the next present entry instead of xas_find() will save
a function call in the majority of cases at the expense of emitting more
inline code.
-The :c:func:`xas_find_marked` function is similar. If the xa_state has
+The xas_find_marked() function is similar. If the xa_state has
not been walked, it will return the entry at the index of the xa_state,
if it is marked. Otherwise, it will return the first marked entry after
-the entry referenced by the xa_state. The :c:func:`xas_next_marked`
-function is the equivalent of :c:func:`xas_next_entry`.
+the entry referenced by the xa_state. The xas_next_marked()
+function is the equivalent of xas_next_entry().
-When iterating over a range of the XArray using :c:func:`xas_for_each`
-or :c:func:`xas_for_each_marked`, it may be necessary to temporarily stop
-the iteration. The :c:func:`xas_pause` function exists for this purpose.
+When iterating over a range of the XArray using xas_for_each()
+or xas_for_each_marked(), it may be necessary to temporarily stop
+the iteration. The xas_pause() function exists for this purpose.
After you have done the necessary work and wish to resume, the xa_state
is in an appropriate state to continue the iteration after the entry
you last processed. If you have interrupts disabled while iterating,
then it is good manners to pause the iteration and reenable interrupts
every ``XA_CHECK_SCHED`` entries.
-The :c:func:`xas_get_mark`, :c:func:`xas_set_mark` and
-:c:func:`xas_clear_mark` functions require the xa_state cursor to have
-been moved to the appropriate location in the xarray; they will do
-nothing if you have called :c:func:`xas_pause` or :c:func:`xas_set`
+The xas_get_mark(), xas_set_mark() and xas_clear_mark() functions require
+the xa_state cursor to have been moved to the appropriate location in the
+XArray; they will do nothing if you have called xas_pause() or xas_set()
immediately before.
-You can call :c:func:`xas_set_update` to have a callback function
+You can call xas_set_update() to have a callback function
called each time the XArray updates a node. This is used by the page
cache workingset code to maintain its list of nodes which contain only
shadow entries.
@@ -432,31 +463,45 @@ eg indices 64-127 may be tied together, but 2-6 may not be. This may
save substantial quantities of memory; for example tying 512 entries
together will save over 4kB.
-You can create a multi-index entry by using :c:func:`XA_STATE_ORDER`
-or :c:func:`xas_set_order` followed by a call to :c:func:`xas_store`.
-Calling :c:func:`xas_load` with a multi-index xa_state will walk the
+You can create a multi-index entry by using XA_STATE_ORDER()
+or xas_set_order() followed by a call to xas_store().
+Calling xas_load() with a multi-index xa_state will walk the
xa_state to the right location in the tree, but the return value is not
meaningful, potentially being an internal entry or ``NULL`` even when there
-is an entry stored within the range. Calling :c:func:`xas_find_conflict`
+is an entry stored within the range. Calling xas_find_conflict()
will return the first entry within the range or ``NULL`` if there are no
-entries in the range. The :c:func:`xas_for_each_conflict` iterator will
+entries in the range. The xas_for_each_conflict() iterator will
iterate over every entry which overlaps the specified range.
-If :c:func:`xas_load` encounters a multi-index entry, the xa_index
+If xas_load() encounters a multi-index entry, the xa_index
in the xa_state will not be changed. When iterating over an XArray
-or calling :c:func:`xas_find`, if the initial index is in the middle
+or calling xas_find(), if the initial index is in the middle
of a multi-index entry, it will not be altered. Subsequent calls
or iterations will move the index to the first index in the range.
Each entry will only be returned once, no matter how many indices it
occupies.
-Using :c:func:`xas_next` or :c:func:`xas_prev` with a multi-index xa_state
-is not supported. Using either of these functions on a multi-index entry
-will reveal sibling entries; these should be skipped over by the caller.
-
-Storing ``NULL`` into any index of a multi-index entry will set the entry
-at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
-entry into entries occupying smaller ranges is not yet supported.
+Using xas_next() or xas_prev() with a multi-index xa_state is not
+supported. Using either of these functions on a multi-index entry will
+reveal sibling entries; these should be skipped over by the caller.
+
+Storing ``NULL`` into any index of a multi-index entry will set the
+entry at every index to ``NULL`` and dissolve the tie. A multi-index
+entry can be split into entries occupying smaller ranges by calling
+xas_split_alloc() without the xa_lock held, followed by taking the lock
+and calling xas_split() or calling xas_try_split() with xa_lock. The
+difference between xas_split_alloc()+xas_split() and xas_try_alloc() is
+that xas_split_alloc() + xas_split() split the entry from the original
+order to the new order in one shot uniformly, whereas xas_try_split()
+iteratively splits the entry containing the index non-uniformly.
+For example, to split an order-9 entry, which takes 2^(9-6)=8 slots,
+assuming ``XA_CHUNK_SHIFT`` is 6, xas_split_alloc() + xas_split() need
+8 xa_node. xas_try_split() splits the order-9 entry into
+2 order-8 entries, then split one order-8 entry, based on the given index,
+to 2 order-7 entries, ..., and split one order-1 entry to 2 order-0 entries.
+When splitting the order-6 entry and a new xa_node is needed, xas_try_split()
+will try to allocate one if possible. As a result, xas_try_split() would only
+need 1 xa_node instead of 8.
Functions and structures
========================
diff --git a/Documentation/cpu-freq/amd-powernow.txt b/Documentation/cpu-freq/amd-powernow.txt
deleted file mode 100644
index 254da155fa47..000000000000
--- a/Documentation/cpu-freq/amd-powernow.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-
-PowerNow! and Cool'n'Quiet are AMD names for frequency
-management capabilities in AMD processors. As the hardware
-implementation changes in new generations of the processors,
-there is a different cpu-freq driver for each generation.
-
-Note that the driver's will not load on the "wrong" hardware,
-so it is safe to try each driver in turn when in doubt as to
-which is the correct driver.
-
-Note that the functionality to change frequency (and voltage)
-is not available in all processors. The drivers will refuse
-to load on processors without this capability. The capability
-is detected with the cpuid instruction.
-
-The drivers use BIOS supplied tables to obtain frequency and
-voltage information appropriate for a particular platform.
-Frequency transitions will be unavailable if the BIOS does
-not supply these tables.
-
-6th Generation: powernow-k6
-
-7th Generation: powernow-k7: Athlon, Duron, Geode.
-
-8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron.
-Documentation on this functionality in 8th generation processors
-is available in the "BIOS and Kernel Developer's Guide", publication
-26094, in chapter 9, available for download from www.amd.com.
-
-BIOS supplied data, for powernow-k7 and for powernow-k8, may be
-from either the PSB table or from ACPI objects. The ACPI support
-is only available if the kernel config sets CONFIG_ACPI_PROCESSOR.
-The powernow-k8 driver will attempt to use ACPI if so configured,
-and fall back to PST if that fails.
-The powernow-k7 driver will try to use the PSB support first, and
-fall back to ACPI if the PSB support fails. A module parameter,
-acpi_force, is provided to force ACPI support to be used instead
-of PSB support.
diff --git a/Documentation/cpu-freq/core.rst b/Documentation/cpu-freq/core.rst
new file mode 100644
index 000000000000..4ceef8e7217c
--- /dev/null
+++ b/Documentation/cpu-freq/core.rst
@@ -0,0 +1,113 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================================
+General description of the CPUFreq core and CPUFreq notifiers
+=============================================================
+
+Authors:
+ - Dominik Brodowski <linux@brodo.de>
+ - David Kimdon <dwhedon@debian.org>
+ - Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+.. Contents:
+
+ 1. CPUFreq core and interfaces
+ 2. CPUFreq notifiers
+ 3. CPUFreq Table Generation with Operating Performance Point (OPP)
+
+1. General Information
+======================
+
+The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This
+cpufreq code offers a standardized interface for the CPUFreq
+architecture drivers (those pieces of code that do actual
+frequency transitions), as well as to "notifiers". These are device
+drivers or other part of the kernel that need to be informed of
+policy changes (ex. thermal modules like ACPI) or of all
+frequency changes (ex. timing code) or even need to force certain
+speed limits (like LCD drivers on ARM architecture). Additionally, the
+kernel "constant" loops_per_jiffy is updated on frequency changes
+here.
+
+Reference counting of the cpufreq policies is done by cpufreq_cpu_get
+and cpufreq_cpu_put, which make sure that the cpufreq driver is
+correctly registered with the core, and will not be unloaded until
+cpufreq_put_cpu is called. That also ensures that the respective cpufreq
+policy doesn't get freed while being used.
+
+2. CPUFreq notifiers
+====================
+
+CPUFreq notifiers conform to the standard kernel notifier interface.
+See linux/include/linux/notifier.h for details on notifiers.
+
+There are two different CPUFreq notifiers - policy notifiers and
+transition notifiers.
+
+
+2.1 CPUFreq policy notifiers
+----------------------------
+
+These are notified when a new policy is created or removed.
+
+The phase is specified in the second argument to the notifier. The phase is
+CPUFREQ_CREATE_POLICY when the policy is first created and it is
+CPUFREQ_REMOVE_POLICY when the policy is removed.
+
+The third argument, a ``void *pointer``, points to a struct cpufreq_policy
+consisting of several values, including min, max (the lower and upper
+frequencies (in kHz) of the new policy).
+
+
+2.2 CPUFreq transition notifiers
+--------------------------------
+
+These are notified twice for each online CPU in the policy, when the
+CPUfreq driver switches the CPU core frequency and this change has no
+any external implications.
+
+The second argument specifies the phase - CPUFREQ_PRECHANGE or
+CPUFREQ_POSTCHANGE.
+
+The third argument is a struct cpufreq_freqs with the following
+values:
+
+====== ======================================
+policy a pointer to the struct cpufreq_policy
+old old frequency
+new new frequency
+flags flags of the cpufreq driver
+====== ======================================
+
+3. CPUFreq Table Generation with Operating Performance Point (OPP)
+==================================================================
+For details about OPP, see Documentation/power/opp.rst
+
+dev_pm_opp_init_cpufreq_table -
+ This function provides a ready to use conversion routine to translate
+ the OPP layer's internal information about the available frequencies
+ into a format readily providable to cpufreq.
+
+ .. Warning::
+
+ Do not use this function in interrupt context.
+
+ Example::
+
+ soc_pm_init()
+ {
+ /* Do things */
+ r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
+ if (!r)
+ policy->freq_table = freq_table;
+ /* Do other things */
+ }
+
+ .. note::
+
+ This function is available only if CONFIG_CPU_FREQ is enabled in
+ addition to CONFIG_PM_OPP.
+
+dev_pm_opp_free_cpufreq_table
+ Free up the table allocated by dev_pm_opp_init_cpufreq_table
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
deleted file mode 100644
index 073f128af5a7..000000000000
--- a/Documentation/cpu-freq/core.txt
+++ /dev/null
@@ -1,120 +0,0 @@
- CPU frequency and voltage scaling code in the Linux(TM) kernel
-
-
- L i n u x C P U F r e q
-
- C P U F r e q C o r e
-
-
- Dominik Brodowski <linux@brodo.de>
- David Kimdon <dwhedon@debian.org>
- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Viresh Kumar <viresh.kumar@linaro.org>
-
-
-
- Clock scaling allows you to change the clock speed of the CPUs on the
- fly. This is a nice method to save battery power, because the lower
- the clock speed, the less power the CPU consumes.
-
-
-Contents:
----------
-1. CPUFreq core and interfaces
-2. CPUFreq notifiers
-3. CPUFreq Table Generation with Operating Performance Point (OPP)
-
-1. General Information
-=======================
-
-The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This
-cpufreq code offers a standardized interface for the CPUFreq
-architecture drivers (those pieces of code that do actual
-frequency transitions), as well as to "notifiers". These are device
-drivers or other part of the kernel that need to be informed of
-policy changes (ex. thermal modules like ACPI) or of all
-frequency changes (ex. timing code) or even need to force certain
-speed limits (like LCD drivers on ARM architecture). Additionally, the
-kernel "constant" loops_per_jiffy is updated on frequency changes
-here.
-
-Reference counting of the cpufreq policies is done by cpufreq_cpu_get
-and cpufreq_cpu_put, which make sure that the cpufreq driver is
-correctly registered with the core, and will not be unloaded until
-cpufreq_put_cpu is called. That also ensures that the respective cpufreq
-policy doesn't get freed while being used.
-
-2. CPUFreq notifiers
-====================
-
-CPUFreq notifiers conform to the standard kernel notifier interface.
-See linux/include/linux/notifier.h for details on notifiers.
-
-There are two different CPUFreq notifiers - policy notifiers and
-transition notifiers.
-
-
-2.1 CPUFreq policy notifiers
-----------------------------
-
-These are notified when a new policy is intended to be set. Each
-CPUFreq policy notifier is called twice for a policy transition:
-
-1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if
- they see a need for this - may it be thermal considerations or
- hardware limitations.
-
-2.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy
- - if two hardware drivers failed to agree on a new policy before this
- stage, the incompatible hardware shall be shut down, and the user
- informed of this.
-
-The phase is specified in the second argument to the notifier.
-
-The third argument, a void *pointer, points to a struct cpufreq_policy
-consisting of several values, including min, max (the lower and upper
-frequencies (in kHz) of the new policy).
-
-
-2.2 CPUFreq transition notifiers
---------------------------------
-
-These are notified twice for each online CPU in the policy, when the
-CPUfreq driver switches the CPU core frequency and this change has no
-any external implications.
-
-The second argument specifies the phase - CPUFREQ_PRECHANGE or
-CPUFREQ_POSTCHANGE.
-
-The third argument is a struct cpufreq_freqs with the following
-values:
-cpu - number of the affected CPU
-old - old frequency
-new - new frequency
-flags - flags of the cpufreq driver
-
-3. CPUFreq Table Generation with Operating Performance Point (OPP)
-==================================================================
-For details about OPP, see Documentation/power/opp.txt
-
-dev_pm_opp_init_cpufreq_table -
- This function provides a ready to use conversion routine to translate
- the OPP layer's internal information about the available frequencies
- into a format readily providable to cpufreq.
-
- WARNING: Do not use this function in interrupt context.
-
- Example:
- soc_pm_init()
- {
- /* Do things */
- r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
- if (!r)
- policy->freq_table = freq_table;
- /* Do other things */
- }
-
- NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
- addition to CONFIG_PM_OPP.
-
-dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst
new file mode 100644
index 000000000000..c5635ac3de54
--- /dev/null
+++ b/Documentation/cpu-freq/cpu-drivers.rst
@@ -0,0 +1,285 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+How to Implement a new CPUFreq Processor Driver
+===============================================
+
+Authors:
+
+
+ - Dominik Brodowski <linux@brodo.de>
+ - Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+.. Contents
+
+ 1. What To Do?
+ 1.1 Initialization
+ 1.2 Per-CPU Initialization
+ 1.3 verify
+ 1.4 target/target_index or setpolicy?
+ 1.5 target/target_index
+ 1.6 setpolicy
+ 1.7 get_intermediate and target_intermediate
+ 2. Frequency Table Helpers
+
+
+
+1. What To Do?
+==============
+
+So, you just got a brand-new CPU / chipset with datasheets and want to
+add cpufreq support for this CPU / chipset? Great. Here are some hints
+on what is necessary:
+
+
+1.1 Initialization
+------------------
+
+First of all, in an __initcall level 7 (module_init()) or later
+function check whether this kernel runs on the right CPU and the right
+chipset. If so, register a struct cpufreq_driver with the CPUfreq core
+using cpufreq_register_driver()
+
+What shall this struct cpufreq_driver contain?
+
+ .name - The name of this driver.
+
+ .init - A pointer to the per-policy initialization function.
+
+ .verify - A pointer to a "verification" function.
+
+ .setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See
+ below on the differences.
+
+And optionally
+
+ .flags - Hints for the cpufreq core.
+
+ .driver_data - cpufreq driver specific data.
+
+ .get_intermediate and target_intermediate - Used to switch to stable
+ frequency while changing CPU frequency.
+
+ .get - Returns current frequency of the CPU.
+
+ .bios_limit - Returns HW/BIOS max frequency limitations for the CPU.
+
+ .exit - A pointer to a per-policy cleanup function called during
+ CPU_POST_DEAD phase of cpu hotplug process.
+
+ .suspend - A pointer to a per-policy suspend function which is called
+ with interrupts disabled and _after_ the governor is stopped for the
+ policy.
+
+ .resume - A pointer to a per-policy resume function which is called
+ with interrupts disabled and _before_ the governor is started again.
+
+ .ready - A pointer to a per-policy ready function which is called after
+ the policy is fully initialized.
+
+ .attr - A pointer to a NULL-terminated list of "struct freq_attr" which
+ allow to export values to sysfs.
+
+ .boost_enabled - If set, boost frequencies are enabled.
+
+ .set_boost - A pointer to a per-policy function to enable/disable boost
+ frequencies.
+
+
+1.2 Per-CPU Initialization
+--------------------------
+
+Whenever a new CPU is registered with the device model, or after the
+cpufreq driver registers itself, the per-policy initialization function
+cpufreq_driver.init is called if no cpufreq policy existed for the CPU.
+Note that the .init() and .exit() routines are called only once for the
+policy and not for each CPU managed by the policy. It takes a ``struct
+cpufreq_policy *policy`` as argument. What to do now?
+
+If necessary, activate the CPUfreq support on your CPU.
+
+Then, the driver must fill in the following values:
+
++-----------------------------------+--------------------------------------+
+|policy->cpuinfo.min_freq _and_ | |
+|policy->cpuinfo.max_freq | the minimum and maximum frequency |
+| | (in kHz) which is supported by |
+| | this CPU |
++-----------------------------------+--------------------------------------+
+|policy->cpuinfo.transition_latency | the time it takes on this CPU to |
+| | switch between two frequencies in |
+| | nanoseconds |
++-----------------------------------+--------------------------------------+
+|policy->cur | The current operating frequency of |
+| | this CPU (if appropriate) |
++-----------------------------------+--------------------------------------+
+|policy->min, | |
+|policy->max, | |
+|policy->policy and, if necessary, | |
+|policy->governor | must contain the "default policy" for|
+| | this CPU. A few moments later, |
+| | cpufreq_driver.verify and either |
+| | cpufreq_driver.setpolicy or |
+| | cpufreq_driver.target/target_index is|
+| | called with these values. |
++-----------------------------------+--------------------------------------+
+|policy->cpus | Update this with the masks of the |
+| | (online + offline) CPUs that do DVFS |
+| | along with this CPU (i.e. that share|
+| | clock/voltage rails with it). |
++-----------------------------------+--------------------------------------+
+
+For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
+frequency table helpers might be helpful. See the section 2 for more information
+on them.
+
+
+1.3 verify
+----------
+
+When the user decides a new policy (consisting of
+"policy,governor,min,max") shall be set, this policy must be validated
+so that incompatible values can be corrected. For verifying these
+values cpufreq_verify_within_limits(``struct cpufreq_policy *policy``,
+``unsigned int min_freq``, ``unsigned int max_freq``) function might be helpful.
+See section 2 for details on frequency table helpers.
+
+You need to make sure that at least one valid frequency (or operating
+range) is within policy->min and policy->max. If necessary, increase
+policy->max first, and only if this is no solution, decrease policy->min.
+
+
+1.4 target or target_index or setpolicy or fast_switch?
+-------------------------------------------------------
+
+Most cpufreq drivers or even most cpu frequency scaling algorithms
+only allow the CPU frequency to be set to predefined fixed values. For
+these, you use the ->target(), ->target_index() or ->fast_switch()
+callbacks.
+
+Some cpufreq capable processors switch the frequency between certain
+limits on their own. These shall use the ->setpolicy() callback.
+
+
+1.5. target/target_index
+------------------------
+
+The target_index call has two arguments: ``struct cpufreq_policy *policy``,
+and ``unsigned int`` index (into the exposed frequency table).
+
+The CPUfreq driver must set the new frequency when called here. The
+actual frequency must be determined by freq_table[index].frequency.
+
+It should always restore to earlier frequency (i.e. policy->restore_freq) in
+case of errors, even if we switched to intermediate frequency earlier.
+
+Deprecated
+----------
+The target call has three arguments: ``struct cpufreq_policy *policy``,
+unsigned int target_frequency, unsigned int relation.
+
+The CPUfreq driver must set the new frequency when called here. The
+actual frequency must be determined using the following rules:
+
+- keep close to "target_freq"
+- policy->min <= new_freq <= policy->max (THIS MUST BE VALID!!!)
+- if relation==CPUFREQ_REL_L, try to select a new_freq higher than or equal
+ target_freq. ("L for lowest, but no lower than")
+- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal
+ target_freq. ("H for highest, but no higher than")
+
+Here again the frequency table helper might assist you - see section 2
+for details.
+
+1.6. fast_switch
+----------------
+
+This function is used for frequency switching from scheduler's context.
+Not all drivers are expected to implement it, as sleeping from within
+this callback isn't allowed. This callback must be highly optimized to
+do switching as fast as possible.
+
+This function has two arguments: ``struct cpufreq_policy *policy`` and
+``unsigned int target_frequency``.
+
+
+1.7 setpolicy
+-------------
+
+The setpolicy call only takes a ``struct cpufreq_policy *policy`` as
+argument. You need to set the lower limit of the in-processor or
+in-chipset dynamic frequency switching to policy->min, the upper limit
+to policy->max, and -if supported- select a performance-oriented
+setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
+powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
+the reference implementation in drivers/cpufreq/longrun.c
+
+1.8 get_intermediate and target_intermediate
+--------------------------------------------
+
+Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
+
+get_intermediate should return a stable intermediate frequency platform wants to
+switch to, and target_intermediate() should set CPU to that frequency, before
+jumping to the frequency corresponding to 'index'. Core will take care of
+sending notifications and driver doesn't have to handle them in
+target_intermediate() or target_index().
+
+Drivers can return '0' from get_intermediate() in case they don't wish to switch
+to intermediate frequency for some target frequency. In that case core will
+directly call ->target_index().
+
+NOTE: ->target_index() should restore to policy->restore_freq in case of
+failures as core would send notifications for that.
+
+
+2. Frequency Table Helpers
+==========================
+
+As most cpufreq processors only allow for being set to a few specific
+frequencies, a "frequency table" with some functions might assist in
+some work of the processor driver. Such a "frequency table" consists of
+an array of struct cpufreq_frequency_table entries, with driver specific
+values in "driver_data", the corresponding frequency in "frequency" and
+flags set. At the end of the table, you need to add a
+cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END.
+And if you want to skip one entry in the table, set the frequency to
+CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
+particular order, but if they are cpufreq core will do DVFS a bit
+quickly for them as search for best match is faster.
+
+The cpufreq table is verified automatically by the core if the policy contains a
+valid pointer in its policy->freq_table field.
+
+cpufreq_frequency_table_verify() assures that at least one valid
+frequency is within policy->min and policy->max, and all other criteria
+are met. This is helpful for the ->verify call.
+
+cpufreq_frequency_table_target() is the corresponding frequency table
+helper for the ->target stage. Just pass the values to this function,
+and this function returns the of the frequency table entry which
+contains the frequency the CPU shall be set to.
+
+The following macros can be used as iterators over cpufreq_frequency_table:
+
+cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
+table.
+
+cpufreq_for_each_valid_entry(pos, table) - iterates over all entries,
+excluding CPUFREQ_ENTRY_INVALID frequencies.
+Use arguments "pos" - a ``cpufreq_frequency_table *`` as a loop cursor and
+"table" - the ``cpufreq_frequency_table *`` you want to iterate over.
+
+For example::
+
+ struct cpufreq_frequency_table *pos, *driver_freq_table;
+
+ cpufreq_for_each_entry(pos, driver_freq_table) {
+ /* Do something with pos */
+ pos->frequency = ...
+ }
+
+If you need to work with the position of pos within driver_freq_table,
+do not subtract the pointers, as it is quite costly. Instead, use the
+macros cpufreq_for_each_entry_idx() and cpufreq_for_each_valid_entry_idx().
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
deleted file mode 100644
index 6e353d00cdc6..000000000000
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ /dev/null
@@ -1,295 +0,0 @@
- CPU frequency and voltage scaling code in the Linux(TM) kernel
-
-
- L i n u x C P U F r e q
-
- C P U D r i v e r s
-
- - information for developers -
-
-
- Dominik Brodowski <linux@brodo.de>
- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Viresh Kumar <viresh.kumar@linaro.org>
-
-
-
- Clock scaling allows you to change the clock speed of the CPUs on the
- fly. This is a nice method to save battery power, because the lower
- the clock speed, the less power the CPU consumes.
-
-
-Contents:
----------
-1. What To Do?
-1.1 Initialization
-1.2 Per-CPU Initialization
-1.3 verify
-1.4 target/target_index or setpolicy?
-1.5 target/target_index
-1.6 setpolicy
-1.7 get_intermediate and target_intermediate
-2. Frequency Table Helpers
-
-
-
-1. What To Do?
-==============
-
-So, you just got a brand-new CPU / chipset with datasheets and want to
-add cpufreq support for this CPU / chipset? Great. Here are some hints
-on what is necessary:
-
-
-1.1 Initialization
-------------------
-
-First of all, in an __initcall level 7 (module_init()) or later
-function check whether this kernel runs on the right CPU and the right
-chipset. If so, register a struct cpufreq_driver with the CPUfreq core
-using cpufreq_register_driver()
-
-What shall this struct cpufreq_driver contain?
-
- .name - The name of this driver.
-
- .init - A pointer to the per-policy initialization function.
-
- .verify - A pointer to a "verification" function.
-
- .setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See
- below on the differences.
-
-And optionally
-
- .flags - Hints for the cpufreq core.
-
- .driver_data - cpufreq driver specific data.
-
- .resolve_freq - Returns the most appropriate frequency for a target
- frequency. Doesn't change the frequency though.
-
- .get_intermediate and target_intermediate - Used to switch to stable
- frequency while changing CPU frequency.
-
- .get - Returns current frequency of the CPU.
-
- .bios_limit - Returns HW/BIOS max frequency limitations for the CPU.
-
- .exit - A pointer to a per-policy cleanup function called during
- CPU_POST_DEAD phase of cpu hotplug process.
-
- .stop_cpu - A pointer to a per-policy stop function called during
- CPU_DOWN_PREPARE phase of cpu hotplug process.
-
- .suspend - A pointer to a per-policy suspend function which is called
- with interrupts disabled and _after_ the governor is stopped for the
- policy.
-
- .resume - A pointer to a per-policy resume function which is called
- with interrupts disabled and _before_ the governor is started again.
-
- .ready - A pointer to a per-policy ready function which is called after
- the policy is fully initialized.
-
- .attr - A pointer to a NULL-terminated list of "struct freq_attr" which
- allow to export values to sysfs.
-
- .boost_enabled - If set, boost frequencies are enabled.
-
- .set_boost - A pointer to a per-policy function to enable/disable boost
- frequencies.
-
-
-1.2 Per-CPU Initialization
---------------------------
-
-Whenever a new CPU is registered with the device model, or after the
-cpufreq driver registers itself, the per-policy initialization function
-cpufreq_driver.init is called if no cpufreq policy existed for the CPU.
-Note that the .init() and .exit() routines are called only once for the
-policy and not for each CPU managed by the policy. It takes a struct
-cpufreq_policy *policy as argument. What to do now?
-
-If necessary, activate the CPUfreq support on your CPU.
-
-Then, the driver must fill in the following values:
-
-policy->cpuinfo.min_freq _and_
-policy->cpuinfo.max_freq - the minimum and maximum frequency
- (in kHz) which is supported by
- this CPU
-policy->cpuinfo.transition_latency the time it takes on this CPU to
- switch between two frequencies in
- nanoseconds (if appropriate, else
- specify CPUFREQ_ETERNAL)
-
-policy->cur The current operating frequency of
- this CPU (if appropriate)
-policy->min,
-policy->max,
-policy->policy and, if necessary,
-policy->governor must contain the "default policy" for
- this CPU. A few moments later,
- cpufreq_driver.verify and either
- cpufreq_driver.setpolicy or
- cpufreq_driver.target/target_index is called
- with these values.
-policy->cpus Update this with the masks of the
- (online + offline) CPUs that do DVFS
- along with this CPU (i.e. that share
- clock/voltage rails with it).
-
-For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
-frequency table helpers might be helpful. See the section 2 for more information
-on them.
-
-
-1.3 verify
-----------
-
-When the user decides a new policy (consisting of
-"policy,governor,min,max") shall be set, this policy must be validated
-so that incompatible values can be corrected. For verifying these
-values cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-unsigned int min_freq, unsigned int max_freq) function might be helpful.
-See section 2 for details on frequency table helpers.
-
-You need to make sure that at least one valid frequency (or operating
-range) is within policy->min and policy->max. If necessary, increase
-policy->max first, and only if this is no solution, decrease policy->min.
-
-
-1.4 target or target_index or setpolicy or fast_switch?
--------------------------------------------------------
-
-Most cpufreq drivers or even most cpu frequency scaling algorithms
-only allow the CPU frequency to be set to predefined fixed values. For
-these, you use the ->target(), ->target_index() or ->fast_switch()
-callbacks.
-
-Some cpufreq capable processors switch the frequency between certain
-limits on their own. These shall use the ->setpolicy() callback.
-
-
-1.5. target/target_index
-------------------------
-
-The target_index call has two arguments: struct cpufreq_policy *policy,
-and unsigned int index (into the exposed frequency table).
-
-The CPUfreq driver must set the new frequency when called here. The
-actual frequency must be determined by freq_table[index].frequency.
-
-It should always restore to earlier frequency (i.e. policy->restore_freq) in
-case of errors, even if we switched to intermediate frequency earlier.
-
-Deprecated:
-----------
-The target call has three arguments: struct cpufreq_policy *policy,
-unsigned int target_frequency, unsigned int relation.
-
-The CPUfreq driver must set the new frequency when called here. The
-actual frequency must be determined using the following rules:
-
-- keep close to "target_freq"
-- policy->min <= new_freq <= policy->max (THIS MUST BE VALID!!!)
-- if relation==CPUFREQ_REL_L, try to select a new_freq higher than or equal
- target_freq. ("L for lowest, but no lower than")
-- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal
- target_freq. ("H for highest, but no higher than")
-
-Here again the frequency table helper might assist you - see section 2
-for details.
-
-1.6. fast_switch
-----------------
-
-This function is used for frequency switching from scheduler's context.
-Not all drivers are expected to implement it, as sleeping from within
-this callback isn't allowed. This callback must be highly optimized to
-do switching as fast as possible.
-
-This function has two arguments: struct cpufreq_policy *policy and
-unsigned int target_frequency.
-
-
-1.7 setpolicy
--------------
-
-The setpolicy call only takes a struct cpufreq_policy *policy as
-argument. You need to set the lower limit of the in-processor or
-in-chipset dynamic frequency switching to policy->min, the upper limit
-to policy->max, and -if supported- select a performance-oriented
-setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
-powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
-the reference implementation in drivers/cpufreq/longrun.c
-
-1.8 get_intermediate and target_intermediate
---------------------------------------------
-
-Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
-
-get_intermediate should return a stable intermediate frequency platform wants to
-switch to, and target_intermediate() should set CPU to that frequency, before
-jumping to the frequency corresponding to 'index'. Core will take care of
-sending notifications and driver doesn't have to handle them in
-target_intermediate() or target_index().
-
-Drivers can return '0' from get_intermediate() in case they don't wish to switch
-to intermediate frequency for some target frequency. In that case core will
-directly call ->target_index().
-
-NOTE: ->target_index() should restore to policy->restore_freq in case of
-failures as core would send notifications for that.
-
-
-2. Frequency Table Helpers
-==========================
-
-As most cpufreq processors only allow for being set to a few specific
-frequencies, a "frequency table" with some functions might assist in
-some work of the processor driver. Such a "frequency table" consists of
-an array of struct cpufreq_frequency_table entries, with driver specific
-values in "driver_data", the corresponding frequency in "frequency" and
-flags set. At the end of the table, you need to add a
-cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END.
-And if you want to skip one entry in the table, set the frequency to
-CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
-particular order, but if they are cpufreq core will do DVFS a bit
-quickly for them as search for best match is faster.
-
-The cpufreq table is verified automatically by the core if the policy contains a
-valid pointer in its policy->freq_table field.
-
-cpufreq_frequency_table_verify() assures that at least one valid
-frequency is within policy->min and policy->max, and all other criteria
-are met. This is helpful for the ->verify call.
-
-cpufreq_frequency_table_target() is the corresponding frequency table
-helper for the ->target stage. Just pass the values to this function,
-and this function returns the of the frequency table entry which
-contains the frequency the CPU shall be set to.
-
-The following macros can be used as iterators over cpufreq_frequency_table:
-
-cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
-table.
-
-cpufreq_for_each_valid_entry(pos, table) - iterates over all entries,
-excluding CPUFREQ_ENTRY_INVALID frequencies.
-Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
-"table" - the cpufreq_frequency_table * you want to iterate over.
-
-For example:
-
- struct cpufreq_frequency_table *pos, *driver_freq_table;
-
- cpufreq_for_each_entry(pos, driver_freq_table) {
- /* Do something with pos */
- pos->frequency = ...
- }
-
-If you need to work with the position of pos within driver_freq_table,
-do not subtract the pointers, as it is quite costly. Instead, use the
-macros cpufreq_for_each_entry_idx() and cpufreq_for_each_valid_entry_idx().
diff --git a/Documentation/cpu-freq/cpufreq-nforce2.txt b/Documentation/cpu-freq/cpufreq-nforce2.txt
deleted file mode 100644
index babce1315026..000000000000
--- a/Documentation/cpu-freq/cpufreq-nforce2.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-
-The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms.
-
-This works better than on other platforms, because the FSB of the CPU
-can be controlled independently from the PCI/AGP clock.
-
-The module has two options:
-
- fid: multiplier * 10 (for example 8.5 = 85)
- min_fsb: minimum FSB
-
-If not set, fid is calculated from the current CPU speed and the FSB.
-min_fsb defaults to FSB at boot time - 50 MHz.
-
-IMPORTANT: The available range is limited downwards!
- Also the minimum available FSB can differ, for systems
- booting with 200 MHz, 150 should always work.
-
-
diff --git a/Documentation/cpu-freq/cpufreq-stats.rst b/Documentation/cpu-freq/cpufreq-stats.rst
new file mode 100644
index 000000000000..9ad695b1c7db
--- /dev/null
+++ b/Documentation/cpu-freq/cpufreq-stats.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+General Description of sysfs CPUFreq Stats
+==========================================
+
+information for users
+
+
+Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+
+.. Contents
+
+ 1. Introduction
+ 2. Statistics Provided (with example)
+ 3. Configuring cpufreq-stats
+
+
+1. Introduction
+===============
+
+cpufreq-stats is a driver that provides CPU frequency statistics for each CPU.
+These statistics are provided in /sysfs as a bunch of read_only interfaces. This
+interface (when configured) will appear in a separate directory under cpufreq
+in /sysfs (<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU.
+Various statistics will form read_only files under this directory.
+
+This driver is designed to be independent of any particular cpufreq_driver
+that may be running on your CPU. So, it will work with any cpufreq_driver.
+
+
+2. Statistics Provided (with example)
+=====================================
+
+cpufreq stats provides following statistics (explained in detail below).
+
+- time_in_state
+- total_trans
+- trans_table
+
+All the statistics will be from the time the stats driver has been inserted
+(or the time the stats were reset) to the time when a read of a particular
+statistic is done. Obviously, stats driver will not have any information
+about the frequency transitions before the stats driver insertion.
+
+::
+
+ <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
+ total 0
+ drwxr-xr-x 2 root root 0 May 14 16:06 .
+ drwxr-xr-x 3 root root 0 May 14 15:58 ..
+ --w------- 1 root root 4096 May 14 16:06 reset
+ -r--r--r-- 1 root root 4096 May 14 16:06 time_in_state
+ -r--r--r-- 1 root root 4096 May 14 16:06 total_trans
+ -r--r--r-- 1 root root 4096 May 14 16:06 trans_table
+
+- **reset**
+
+Write-only attribute that can be used to reset the stat counters. This can be
+useful for evaluating system behaviour under different governors without the
+need for a reboot.
+
+- **time_in_state**
+
+This gives the amount of time spent in each of the frequencies supported by
+this CPU. The cat output will have "<frequency> <time>" pair in each line, which
+will mean this CPU spent <time> usertime units of time at <frequency>. Output
+will have one line for each of the supported frequencies. usertime units here
+is 10mS (similar to other time exported in /proc).
+
+::
+
+ <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state
+ 3600000 2089
+ 3400000 136
+ 3200000 34
+ 3000000 67
+ 2800000 172488
+
+
+- **total_trans**
+
+This gives the total number of frequency transitions on this CPU. The cat
+output will have a single count which is the total number of frequency
+transitions.
+
+::
+
+ <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
+ 20
+
+- **trans_table**
+
+This will give a fine grained information about all the CPU frequency
+transitions. The cat output here is a two dimensional matrix, where an entry
+<i,j> (row i, column j) represents the count of number of transitions from
+Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in
+which the driver has provided the frequency table initially to the cpufreq core
+and so can be sorted (ascending or descending) or unsorted. The output here
+also contains the actual freq values for each row and column for better
+readability.
+
+If the transition table is bigger than PAGE_SIZE, reading this will
+return an -EFBIG error.
+
+::
+
+ <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
+ From : To
+ : 3600000 3400000 3200000 3000000 2800000
+ 3600000: 0 5 0 0 0
+ 3400000: 4 0 2 0 0
+ 3200000: 0 1 0 2 0
+ 3000000: 0 0 1 0 3
+ 2800000: 0 0 0 2 0
+
+3. Configuring cpufreq-stats
+============================
+
+To configure cpufreq-stats in your kernel::
+
+ Config Main Menu
+ Power management options (ACPI, APM) --->
+ CPU Frequency scaling --->
+ [*] CPU Frequency scaling
+ [*] CPU frequency translation statistics
+
+
+"CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
+cpufreq-stats.
+
+"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the
+statistics which includes time_in_state, total_trans and trans_table.
+
+Once this option is enabled and your CPU supports cpufrequency, you
+will be able to see the CPU frequency statistics in /sysfs.
diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt
deleted file mode 100644
index 14378cecb172..000000000000
--- a/Documentation/cpu-freq/cpufreq-stats.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-
- CPU frequency and voltage scaling statistics in the Linux(TM) kernel
-
-
- L i n u x c p u f r e q - s t a t s d r i v e r
-
- - information for users -
-
-
- Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
-
-Contents
-1. Introduction
-2. Statistics Provided (with example)
-3. Configuring cpufreq-stats
-
-
-1. Introduction
-
-cpufreq-stats is a driver that provides CPU frequency statistics for each CPU.
-These statistics are provided in /sysfs as a bunch of read_only interfaces. This
-interface (when configured) will appear in a separate directory under cpufreq
-in /sysfs (<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU.
-Various statistics will form read_only files under this directory.
-
-This driver is designed to be independent of any particular cpufreq_driver
-that may be running on your CPU. So, it will work with any cpufreq_driver.
-
-
-2. Statistics Provided (with example)
-
-cpufreq stats provides following statistics (explained in detail below).
-- time_in_state
-- total_trans
-- trans_table
-
-All the statistics will be from the time the stats driver has been inserted
-(or the time the stats were reset) to the time when a read of a particular
-statistic is done. Obviously, stats driver will not have any information
-about the frequency transitions before the stats driver insertion.
-
---------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
-total 0
-drwxr-xr-x 2 root root 0 May 14 16:06 .
-drwxr-xr-x 3 root root 0 May 14 15:58 ..
---w------- 1 root root 4096 May 14 16:06 reset
--r--r--r-- 1 root root 4096 May 14 16:06 time_in_state
--r--r--r-- 1 root root 4096 May 14 16:06 total_trans
--r--r--r-- 1 root root 4096 May 14 16:06 trans_table
---------------------------------------------------------------------------------
-
-- reset
-Write-only attribute that can be used to reset the stat counters. This can be
-useful for evaluating system behaviour under different governors without the
-need for a reboot.
-
-- time_in_state
-This gives the amount of time spent in each of the frequencies supported by
-this CPU. The cat output will have "<frequency> <time>" pair in each line, which
-will mean this CPU spent <time> usertime units of time at <frequency>. Output
-will have one line for each of the supported frequencies. usertime units here
-is 10mS (similar to other time exported in /proc).
-
---------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state
-3600000 2089
-3400000 136
-3200000 34
-3000000 67
-2800000 172488
---------------------------------------------------------------------------------
-
-
-- total_trans
-This gives the total number of frequency transitions on this CPU. The cat
-output will have a single count which is the total number of frequency
-transitions.
-
---------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
-20
---------------------------------------------------------------------------------
-
-- trans_table
-This will give a fine grained information about all the CPU frequency
-transitions. The cat output here is a two dimensional matrix, where an entry
-<i,j> (row i, column j) represents the count of number of transitions from
-Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in
-which the driver has provided the frequency table initially to the cpufreq core
-and so can be sorted (ascending or descending) or unsorted. The output here
-also contains the actual freq values for each row and column for better
-readability.
-
-If the transition table is bigger than PAGE_SIZE, reading this will
-return an -EFBIG error.
-
---------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
- From : To
- : 3600000 3400000 3200000 3000000 2800000
- 3600000: 0 5 0 0 0
- 3400000: 4 0 2 0 0
- 3200000: 0 1 0 2 0
- 3000000: 0 0 1 0 3
- 2800000: 0 0 0 2 0
---------------------------------------------------------------------------------
-
-
-3. Configuring cpufreq-stats
-
-To configure cpufreq-stats in your kernel
-Config Main Menu
- Power management options (ACPI, APM) --->
- CPU Frequency scaling --->
- [*] CPU Frequency scaling
- [*] CPU frequency translation statistics
-
-
-"CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
-cpufreq-stats.
-
-"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the
-statistics which includes time_in_state, total_trans and trans_table.
-
-Once this option is enabled and your CPU supports cpufrequency, you
-will be able to see the CPU frequency statistics in /sysfs.
diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst
new file mode 100644
index 000000000000..de25740651f7
--- /dev/null
+++ b/Documentation/cpu-freq/index.rst
@@ -0,0 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================================================
+CPUFreq - CPU frequency and voltage scaling code in the Linux(TM) kernel
+========================================================================
+
+Author: Dominik Brodowski <linux@brodo.de>
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+.. toctree::
+ :maxdepth: 1
+
+ core
+ cpu-drivers
+ cpufreq-stats
+
+Mailing List
+------------
+There is a CPU frequency general list where you can report bugs,
+problems or submit patches. To post a message, send an email to
+linux-pm@vger.kernel.org.
+
+Links
+-----
+the FTP archives:
+* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
+
+the CPUFreq Mailing list:
+* http://vger.kernel.org/vger-lists.html#linux-pm
+
+Clock and voltage scaling for the SA-1100:
+* http://www.lartmaker.nl/projects/scaling
diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
deleted file mode 100644
index c15e75386a05..000000000000
--- a/Documentation/cpu-freq/index.txt
+++ /dev/null
@@ -1,56 +0,0 @@
- CPU frequency and voltage scaling code in the Linux(TM) kernel
-
-
- L i n u x C P U F r e q
-
-
-
-
- Dominik Brodowski <linux@brodo.de>
-
-
-
- Clock scaling allows you to change the clock speed of the CPUs on the
- fly. This is a nice method to save battery power, because the lower
- the clock speed, the less power the CPU consumes.
-
-
-
-Documents in this directory:
-----------------------------
-
-amd-powernow.txt - AMD powernow driver specific file.
-
-core.txt - General description of the CPUFreq core and
- of CPUFreq notifiers.
-
-cpu-drivers.txt - How to implement a new cpufreq processor driver.
-
-cpufreq-nforce2.txt - nVidia nForce2 platform specific file.
-
-cpufreq-stats.txt - General description of sysfs cpufreq stats.
-
-index.txt - File index, Mailing list and Links (this document)
-
-pcc-cpufreq.txt - PCC cpufreq driver specific file.
-
-
-Mailing List
-------------
-There is a CPU frequency changing CVS commit and general list where
-you can report bugs, problems or submit patches. To post a message,
-send an email to linux-pm@vger.kernel.org.
-
-Links
------
-the FTP archives:
-* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
-
-how to access the CVS repository:
-* http://cvs.arm.linux.org.uk/
-
-the CPUFreq Mailing list:
-* http://vger.kernel.org/vger-lists.html#linux-pm
-
-Clock and voltage scaling for the SA-1100:
-* http://www.lartmaker.nl/projects/scaling
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
deleted file mode 100644
index 9e3c3b33514c..000000000000
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * pcc-cpufreq.txt - PCC interface documentation
- *
- * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
- * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
- * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
- * INFRINGEMENT. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-
- Processor Clocking Control Driver
- ---------------------------------
-
-Contents:
----------
-1. Introduction
-1.1 PCC interface
-1.1.1 Get Average Frequency
-1.1.2 Set Desired Frequency
-1.2 Platforms affected
-2. Driver and /sys details
-2.1 scaling_available_frequencies
-2.2 cpuinfo_transition_latency
-2.3 cpuinfo_cur_freq
-2.4 related_cpus
-3. Caveats
-
-1. Introduction:
-----------------
-Processor Clocking Control (PCC) is an interface between the platform
-firmware and OSPM. It is a mechanism for coordinating processor
-performance (ie: frequency) between the platform firmware and the OS.
-
-The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
-interface.
-
-OS utilizes the PCC interface to inform platform firmware what frequency the
-OS wants for a logical processor. The platform firmware attempts to achieve
-the requested frequency. If the request for the target frequency could not be
-satisfied by platform firmware, then it usually means that power budget
-conditions are in place, and "power capping" is taking place.
-
-1.1 PCC interface:
-------------------
-The complete PCC specification is available here:
-http://www.acpica.org/download/Processor-Clocking-Control-v1p0.pdf
-
-PCC relies on a shared memory region that provides a channel for communication
-between the OS and platform firmware. PCC also implements a "doorbell" that
-is used by the OS to inform the platform firmware that a command has been
-sent.
-
-The ACPI PCCH() method is used to discover the location of the PCC shared
-memory region. The shared memory region header contains the "command" and
-"status" interface. PCCH() also contains details on how to access the platform
-doorbell.
-
-The following commands are supported by the PCC interface:
-* Get Average Frequency
-* Set Desired Frequency
-
-The ACPI PCCP() method is implemented for each logical processor and is
-used to discover the offsets for the input and output buffers in the shared
-memory region.
-
-When PCC mode is enabled, the platform will not expose processor performance
-or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
-the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
-AMD) will not load.
-
-However, OSPM remains in control of policy. The governor (eg: "ondemand")
-computes the required performance for each processor based on server workload.
-The PCC driver fills in the command interface, and the input buffer and
-communicates the request to the platform firmware. The platform firmware is
-responsible for delivering the requested performance.
-
-Each PCC command is "global" in scope and can affect all the logical CPUs in
-the system. Therefore, PCC is capable of performing "group" updates. With PCC
-the OS is capable of getting/setting the frequency of all the logical CPUs in
-the system with a single call to the BIOS.
-
-1.1.1 Get Average Frequency:
-----------------------------
-This command is used by the OSPM to query the running frequency of the
-processor since the last time this command was completed. The output buffer
-indicates the average unhalted frequency of the logical processor expressed as
-a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
-also signifies if the CPU frequency is limited by a power budget condition.
-
-1.1.2 Set Desired Frequency:
-----------------------------
-This command is used by the OSPM to communicate to the platform firmware the
-desired frequency for a logical processor. The output buffer is currently
-ignored by OSPM. The next invocation of "Get Average Frequency" will inform
-OSPM if the desired frequency was achieved or not.
-
-1.2 Platforms affected:
------------------------
-The PCC driver will load on any system where the platform firmware:
-* supports the PCC interface, and the associated PCCH() and PCCP() methods
-* assumes responsibility for managing the hardware clocking controls in order
-to deliver the requested processor performance
-
-Currently, certain HP ProLiant platforms implement the PCC interface. On those
-platforms PCC is the "default" choice.
-
-However, it is possible to disable this interface via a BIOS setting. In
-such an instance, as is also the case on platforms where the PCC interface
-is not implemented, the PCC driver will fail to load silently.
-
-2. Driver and /sys details:
----------------------------
-When the driver loads, it merely prints the lowest and the highest CPU
-frequencies supported by the platform firmware.
-
-The PCC driver loads with a message such as:
-pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
-MHz
-
-This means that the OPSM can request the CPU to run at any frequency in
-between the limits (1600 MHz, and 2933 MHz) specified in the message.
-
-Internally, there is no need for the driver to convert the "target" frequency
-to a corresponding P-state.
-
-The VERSION number for the driver will be of the format v.xy.ab.
-eg: 1.00.02
- ----- --
- | |
- | -- this will increase with bug fixes/enhancements to the driver
- |-- this is the version of the PCC specification the driver adheres to
-
-
-The following is a brief discussion on some of the fields exported via the
-/sys filesystem and how their values are affected by the PCC driver:
-
-2.1 scaling_available_frequencies:
-----------------------------------
-scaling_available_frequencies is not created in /sys. No intermediate
-frequencies need to be listed because the BIOS will try to achieve any
-frequency, within limits, requested by the governor. A frequency does not have
-to be strictly associated with a P-state.
-
-2.2 cpuinfo_transition_latency:
--------------------------------
-The cpuinfo_transition_latency field is 0. The PCC specification does
-not include a field to expose this value currently.
-
-2.3 cpuinfo_cur_freq:
----------------------
-A) Often cpuinfo_cur_freq will show a value different than what is declared
-in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
-This is due to "turbo boost" available on recent Intel processors. If certain
-conditions are met the BIOS can achieve a slightly higher speed than requested
-by OSPM. An example:
-
-scaling_cur_freq : 2933000
-cpuinfo_cur_freq : 3196000
-
-B) There is a round-off error associated with the cpuinfo_cur_freq value.
-Since the driver obtains the current frequency as a "percentage" (%) of the
-nominal frequency from the BIOS, sometimes, the values displayed by
-scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
-
-scaling_cur_freq : 1600000
-cpuinfo_cur_freq : 1583000
-
-In this example, the nominal frequency is 2933 MHz. The driver obtains the
-current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
-
- 54% of 2933 MHz = 1583 MHz
-
-Nominal frequency is the maximum frequency of the processor, and it usually
-corresponds to the frequency of the P0 P-state.
-
-2.4 related_cpus:
------------------
-The related_cpus field is identical to affected_cpus.
-
-affected_cpus : 4
-related_cpus : 4
-
-Currently, the PCC driver does not evaluate _PSD. The platforms that support
-PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
-to ensure that the same frequency is requested of all dependent CPUs.
-
-3. Caveats:
------------
-The "cpufreq_stats" module in its present form cannot be loaded and
-expected to work with the PCC driver. Since the "cpufreq_stats" module
-provides information wrt each P-state, it is not applicable to the PCC driver.
diff --git a/Documentation/cpu-load.txt b/Documentation/cpu-load.txt
deleted file mode 100644
index 2d01ce43d2a2..000000000000
--- a/Documentation/cpu-load.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-========
-CPU load
-========
-
-Linux exports various bits of information via ``/proc/stat`` and
-``/proc/uptime`` that userland tools, such as top(1), use to calculate
-the average time system spent in a particular state, for example::
-
- $ iostat
- Linux 2.6.18.3-exp (linmac) 02/20/2007
-
- avg-cpu: %user %nice %system %iowait %steal %idle
- 10.01 0.00 2.92 5.44 0.00 81.63
-
- ...
-
-Here the system thinks that over the default sampling period the
-system spent 10.01% of the time doing work in user space, 2.92% in the
-kernel, and was overall 81.63% of the time idle.
-
-In most cases the ``/proc/stat`` information reflects the reality quite
-closely, however due to the nature of how/when the kernel collects
-this data sometimes it can not be trusted at all.
-
-So how is this information collected? Whenever timer interrupt is
-signalled the kernel looks what kind of task was running at this
-moment and increments the counter that corresponds to this tasks
-kind/state. The problem with this is that the system could have
-switched between various states multiple times between two timer
-interrupts yet the counter is incremented only for the last state.
-
-
-Example
--------
-
-If we imagine the system with one task that periodically burns cycles
-in the following manner::
-
- time line between two timer interrupts
- |--------------------------------------|
- ^ ^
- |_ something begins working |
- |_ something goes to sleep
- (only to be awaken quite soon)
-
-In the above situation the system will be 0% loaded according to the
-``/proc/stat`` (since the timer interrupt will always happen when the
-system is executing the idle handler), but in reality the load is
-closer to 99%.
-
-One can imagine many more situations where this behavior of the kernel
-will lead to quite erratic information inside ``/proc/stat``::
-
-
- /* gcc -o hog smallhog.c */
- #include <time.h>
- #include <limits.h>
- #include <signal.h>
- #include <sys/time.h>
- #define HIST 10
-
- static volatile sig_atomic_t stop;
-
- static void sighandler (int signr)
- {
- (void) signr;
- stop = 1;
- }
- static unsigned long hog (unsigned long niters)
- {
- stop = 0;
- while (!stop && --niters);
- return niters;
- }
- int main (void)
- {
- int i;
- struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
- .it_value = { .tv_sec = 0, .tv_usec = 1 } };
- sigset_t set;
- unsigned long v[HIST];
- double tmp = 0.0;
- unsigned long n;
- signal (SIGALRM, &sighandler);
- setitimer (ITIMER_REAL, &it, NULL);
-
- hog (ULONG_MAX);
- for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
- for (i = 0; i < HIST; ++i) tmp += v[i];
- tmp /= HIST;
- n = tmp - (tmp / 3.0);
-
- sigemptyset (&set);
- sigaddset (&set, SIGALRM);
-
- for (;;) {
- hog (n);
- sigwait (&set, &i);
- }
- return 0;
- }
-
-
-References
-----------
-
-- http://lkml.org/lkml/2007/2/12/6
-- Documentation/filesystems/proc.txt (1.8)
-
-
-Thanks
-------
-
-Con Kolivas, Pavel Machek
diff --git a/Documentation/cpuidle/core.txt b/Documentation/cpuidle/core.txt
deleted file mode 100644
index 63ecc5dc9d8a..000000000000
--- a/Documentation/cpuidle/core.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
- Supporting multiple CPU idle levels in kernel
-
- cpuidle
-
-General Information:
-
-Various CPUs today support multiple idle levels that are differentiated
-by varying exit latencies and power consumption during idle.
-cpuidle is a generic in-kernel infrastructure that separates
-idle policy (governor) from idle mechanism (driver) and provides a
-standardized infrastructure to support independent development of
-governors and drivers.
-
-cpuidle resides under drivers/cpuidle.
-
-Boot options:
-"cpuidle_sysfs_switch"
-enables current_governor interface in /sys/devices/system/cpu/cpuidle/,
-which can be used to switch governors at run time. This boot option
-is meant for developer testing only. In normal usage, kernel picks the
-best governor based on governor ratings.
-SEE ALSO: sysfs.txt in this directory.
diff --git a/Documentation/cpuidle/driver.txt b/Documentation/cpuidle/driver.txt
deleted file mode 100644
index 1b0d81d92583..000000000000
--- a/Documentation/cpuidle/driver.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-
-
- Supporting multiple CPU idle levels in kernel
-
- cpuidle drivers
-
-
-
-
-cpuidle driver hooks into the cpuidle infrastructure and handles the
-architecture/platform dependent part of CPU idle states. Driver
-provides the platform idle state detection capability and also
-has mechanisms in place to support actual entry-exit into CPU idle states.
-
-cpuidle driver initializes the cpuidle_device structure for each CPU device
-and registers with cpuidle using cpuidle_register_device.
-
-If all the idle states are the same, the wrapper function cpuidle_register
-could be used instead.
-
-It can also support the dynamic changes (like battery <-> AC), by using
-cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device,
-cpuidle_resume_and_unlock.
-
-Interfaces:
-extern int cpuidle_register(struct cpuidle_driver *drv,
- const struct cpumask *const coupled_cpus);
-extern int cpuidle_unregister(struct cpuidle_driver *drv);
-extern int cpuidle_register_driver(struct cpuidle_driver *drv);
-extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
-extern int cpuidle_register_device(struct cpuidle_device *dev);
-extern void cpuidle_unregister_device(struct cpuidle_device *dev);
-
-extern void cpuidle_pause_and_lock(void);
-extern void cpuidle_resume_and_unlock(void);
-extern int cpuidle_enable_device(struct cpuidle_device *dev);
-extern void cpuidle_disable_device(struct cpuidle_device *dev);
diff --git a/Documentation/cpuidle/governor.txt b/Documentation/cpuidle/governor.txt
deleted file mode 100644
index d9020f5e847b..000000000000
--- a/Documentation/cpuidle/governor.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-
-
-
- Supporting multiple CPU idle levels in kernel
-
- cpuidle governors
-
-
-
-
-cpuidle governor is policy routine that decides what idle state to enter at
-any given time. cpuidle core uses different callbacks to the governor.
-
-* enable() to enable governor for a particular device
-* disable() to disable governor for a particular device
-* select() to select an idle state to enter
-* reflect() called after returning from the idle state, which can be used
- by the governor for some record keeping.
-
-More than one governor can be registered at the same time and
-users can switch between drivers using /sysfs interface (when enabled).
-More than one governor part is supported for developers to easily experiment
-with different governors. By default, most optimal governor based on your
-kernel configuration and platform will be selected by cpuidle.
-
-Interfaces:
-extern int cpuidle_register_governor(struct cpuidle_governor *gov);
-struct cpuidle_governor
diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt
deleted file mode 100644
index d1587f434e7b..000000000000
--- a/Documentation/cpuidle/sysfs.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-
-
- Supporting multiple CPU idle levels in kernel
-
- cpuidle sysfs
-
-System global cpuidle related information and tunables are under
-/sys/devices/system/cpu/cpuidle
-
-The current interfaces in this directory has self-explanatory names:
-* current_driver
-* current_governor_ro
-
-With cpuidle_sysfs_switch boot option (meant for developer testing)
-following objects are visible instead.
-* current_driver
-* available_governors
-* current_governor
-In this case users can switch the governor at run time by writing
-to current_governor.
-
-
-Per logical CPU specific cpuidle information are under
-/sys/devices/system/cpu/cpuX/cpuidle
-for each online cpu X
-
---------------------------------------------------------------------------------
-# ls -lR /sys/devices/system/cpu/cpu0/cpuidle/
-/sys/devices/system/cpu/cpu0/cpuidle/:
-total 0
-drwxr-xr-x 2 root root 0 Feb 8 10:42 state0
-drwxr-xr-x 2 root root 0 Feb 8 10:42 state1
-drwxr-xr-x 2 root root 0 Feb 8 10:42 state2
-drwxr-xr-x 2 root root 0 Feb 8 10:42 state3
-
-/sys/devices/system/cpu/cpu0/cpuidle/state0:
-total 0
--r--r--r-- 1 root root 4096 Feb 8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
--r--r--r-- 1 root root 4096 Feb 8 10:42 latency
--r--r--r-- 1 root root 4096 Feb 8 10:42 name
--r--r--r-- 1 root root 4096 Feb 8 10:42 power
--r--r--r-- 1 root root 4096 Feb 8 10:42 residency
--r--r--r-- 1 root root 4096 Feb 8 10:42 time
--r--r--r-- 1 root root 4096 Feb 8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state1:
-total 0
--r--r--r-- 1 root root 4096 Feb 8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
--r--r--r-- 1 root root 4096 Feb 8 10:42 latency
--r--r--r-- 1 root root 4096 Feb 8 10:42 name
--r--r--r-- 1 root root 4096 Feb 8 10:42 power
--r--r--r-- 1 root root 4096 Feb 8 10:42 residency
--r--r--r-- 1 root root 4096 Feb 8 10:42 time
--r--r--r-- 1 root root 4096 Feb 8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state2:
-total 0
--r--r--r-- 1 root root 4096 Feb 8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
--r--r--r-- 1 root root 4096 Feb 8 10:42 latency
--r--r--r-- 1 root root 4096 Feb 8 10:42 name
--r--r--r-- 1 root root 4096 Feb 8 10:42 power
--r--r--r-- 1 root root 4096 Feb 8 10:42 residency
--r--r--r-- 1 root root 4096 Feb 8 10:42 time
--r--r--r-- 1 root root 4096 Feb 8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state3:
-total 0
--r--r--r-- 1 root root 4096 Feb 8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
--r--r--r-- 1 root root 4096 Feb 8 10:42 latency
--r--r--r-- 1 root root 4096 Feb 8 10:42 name
--r--r--r-- 1 root root 4096 Feb 8 10:42 power
--r--r--r-- 1 root root 4096 Feb 8 10:42 residency
--r--r--r-- 1 root root 4096 Feb 8 10:42 time
--r--r--r-- 1 root root 4096 Feb 8 10:42 usage
---------------------------------------------------------------------------------
-
-
-* desc : Small description about the idle state (string)
-* disable : Option to disable this idle state (bool) -> see note below
-* latency : Latency to exit out of this idle state (in microseconds)
-* residency : Time after which a state becomes more effecient than any
- shallower state (in microseconds)
-* name : Name of the idle state (string)
-* power : Power consumed while in this idle state (in milliwatts)
-* time : Total time spent in this idle state (in microseconds)
-* usage : Number of times this state was entered (count)
-
-Note:
-The behavior and the effect of the disable variable depends on the
-implementation of a particular governor. In the ladder governor, for
-example, it is not coherent, i.e. if one is disabling a light state,
-then all deeper states are disabled as well, but the disable variable
-does not reflect it. Likewise, if one enables a deep state but a lighter
-state still is disabled, then this has no effect.
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
deleted file mode 100644
index c6e7e9196a8b..000000000000
--- a/Documentation/cputopology.txt
+++ /dev/null
@@ -1,159 +0,0 @@
-===========================================
-How CPU topology info is exported via sysfs
-===========================================
-
-Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo output of some architectures:
-
-1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
-
- physical package id of cpuX. Typically corresponds to a physical
- socket number, but the actual value is architecture and platform
- dependent.
-
-2) /sys/devices/system/cpu/cpuX/topology/core_id:
-
- the CPU core ID of cpuX. Typically it is the hardware platform's
- identifier (rather than the kernel's). The actual value is
- architecture and platform dependent.
-
-3) /sys/devices/system/cpu/cpuX/topology/book_id:
-
- the book ID of cpuX. Typically it is the hardware platform's
- identifier (rather than the kernel's). The actual value is
- architecture and platform dependent.
-
-4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
-
- the drawer ID of cpuX. Typically it is the hardware platform's
- identifier (rather than the kernel's). The actual value is
- architecture and platform dependent.
-
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
-
- internal kernel map of cpuX's hardware threads within the same
- core as cpuX.
-
-6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
-
- human-readable list of cpuX's hardware threads within the same
- core as cpuX.
-
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
-
- internal kernel map of cpuX's hardware threads within the same
- physical_package_id.
-
-8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
-
- human-readable list of cpuX's hardware threads within the same
- physical_package_id.
-
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
-
- internal kernel map of cpuX's hardware threads within the same
- book_id.
-
-10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
-
- human-readable list of cpuX's hardware threads within the same
- book_id.
-
-11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
-
- internal kernel map of cpuX's hardware threads within the same
- drawer_id.
-
-12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
-
- human-readable list of cpuX's hardware threads within the same
- drawer_id.
-
-To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 to 12 attributes. The book
-and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
-and CONFIG_SCHED_DRAWER are selected.
-
-CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
-they reflect the cpu and cache hierarchy.
-
-For an architecture to support this feature, it must define some of
-these macros in include/asm-XXX/topology.h::
-
- #define topology_physical_package_id(cpu)
- #define topology_core_id(cpu)
- #define topology_book_id(cpu)
- #define topology_drawer_id(cpu)
- #define topology_sibling_cpumask(cpu)
- #define topology_core_cpumask(cpu)
- #define topology_book_cpumask(cpu)
- #define topology_drawer_cpumask(cpu)
-
-The type of ``**_id macros`` is int.
-The type of ``**_cpumask macros`` is ``(const) struct cpumask *``. The latter
-correspond with appropriate ``**_siblings`` sysfs attributes (except for
-topology_sibling_cpumask() which corresponds with thread_siblings).
-
-To be consistent on all architectures, include/linux/topology.h
-provides default definitions for any of the above macros that are
-not defined by include/asm-XXX/topology.h:
-
-1) physical_package_id: -1
-2) core_id: 0
-3) sibling_cpumask: just the given CPU
-4) core_cpumask: just the given CPU
-
-For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
-default definitions for topology_book_id() and topology_book_cpumask().
-For architectures that don't support drawers (CONFIG_SCHED_DRAWER) there are
-no default definitions for topology_drawer_id() and topology_drawer_cpumask().
-
-Additionally, CPU topology information is provided under
-/sys/devices/system/cpu and includes these files. The internal
-source for the output is in brackets ("[]").
-
- =========== ==========================================================
- kernel_max: the maximum CPU index allowed by the kernel configuration.
- [NR_CPUS-1]
-
- offline: CPUs that are not online because they have been
- HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
- of CPUs allowed by the kernel configuration (kernel_max
- above). [~cpu_online_mask + cpus >= NR_CPUS]
-
- online: CPUs that are online and being scheduled [cpu_online_mask]
-
- possible: CPUs that have been allocated resources and can be
- brought online if they are present. [cpu_possible_mask]
-
- present: CPUs that have been identified as being present in the
- system. [cpu_present_mask]
- =========== ==========================================================
-
-The format for the above output is compatible with cpulist_parse()
-[see <linux/cpumask.h>]. Some examples follow.
-
-In this example, there are 64 CPUs in the system but cpus 32-63 exceed
-the kernel max which is limited to 0..31 by the NR_CPUS config option
-being 32. Note also that CPUs 2 and 4-31 are not online but could be
-brought online as they are both present and possible::
-
- kernel_max: 31
- offline: 2,4-31,32-63
- online: 0-1,3
- possible: 0-31
- present: 0-31
-
-In this example, the NR_CPUS config option is 128, but the kernel was
-started with possible_cpus=144. There are 4 CPUs in the system and cpu2
-was manually taken offline (and is the only CPU that can be brought
-online.)::
-
- kernel_max: 127
- offline: 2,4-127,128-143
- online: 0-1,3
- possible: 0-127
- present: 0-3
-
-See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
-as well as more information on the various cpumasks.
diff --git a/Documentation/crc32.txt b/Documentation/crc32.txt
deleted file mode 100644
index 8a6860f33b4e..000000000000
--- a/Documentation/crc32.txt
+++ /dev/null
@@ -1,189 +0,0 @@
-=================================
-brief tutorial on CRC computation
-=================================
-
-A CRC is a long-division remainder. You add the CRC to the message,
-and the whole thing (message+CRC) is a multiple of the given
-CRC polynomial. To check the CRC, you can either check that the
-CRC matches the recomputed value, *or* you can check that the
-remainder computed on the message+CRC is 0. This latter approach
-is used by a lot of hardware implementations, and is why so many
-protocols put the end-of-frame flag after the CRC.
-
-It's actually the same long division you learned in school, except that:
-
-- We're working in binary, so the digits are only 0 and 1, and
-- When dividing polynomials, there are no carries. Rather than add and
- subtract, we just xor. Thus, we tend to get a bit sloppy about
- the difference between adding and subtracting.
-
-Like all division, the remainder is always smaller than the divisor.
-To produce a 32-bit CRC, the divisor is actually a 33-bit CRC polynomial.
-Since it's 33 bits long, bit 32 is always going to be set, so usually the
-CRC is written in hex with the most significant bit omitted. (If you're
-familiar with the IEEE 754 floating-point format, it's the same idea.)
-
-Note that a CRC is computed over a string of *bits*, so you have
-to decide on the endianness of the bits within each byte. To get
-the best error-detecting properties, this should correspond to the
-order they're actually sent. For example, standard RS-232 serial is
-little-endian; the most significant bit (sometimes used for parity)
-is sent last. And when appending a CRC word to a message, you should
-do it in the right order, matching the endianness.
-
-Just like with ordinary division, you proceed one digit (bit) at a time.
-Each step of the division you take one more digit (bit) of the dividend
-and append it to the current remainder. Then you figure out the
-appropriate multiple of the divisor to subtract to being the remainder
-back into range. In binary, this is easy - it has to be either 0 or 1,
-and to make the XOR cancel, it's just a copy of bit 32 of the remainder.
-
-When computing a CRC, we don't care about the quotient, so we can
-throw the quotient bit away, but subtract the appropriate multiple of
-the polynomial from the remainder and we're back to where we started,
-ready to process the next bit.
-
-A big-endian CRC written this way would be coded like::
-
- for (i = 0; i < input_bits; i++) {
- multiple = remainder & 0x80000000 ? CRCPOLY : 0;
- remainder = (remainder << 1 | next_input_bit()) ^ multiple;
- }
-
-Notice how, to get at bit 32 of the shifted remainder, we look
-at bit 31 of the remainder *before* shifting it.
-
-But also notice how the next_input_bit() bits we're shifting into
-the remainder don't actually affect any decision-making until
-32 bits later. Thus, the first 32 cycles of this are pretty boring.
-Also, to add the CRC to a message, we need a 32-bit-long hole for it at
-the end, so we have to add 32 extra cycles shifting in zeros at the
-end of every message.
-
-These details lead to a standard trick: rearrange merging in the
-next_input_bit() until the moment it's needed. Then the first 32 cycles
-can be precomputed, and merging in the final 32 zero bits to make room
-for the CRC can be skipped entirely. This changes the code to::
-
- for (i = 0; i < input_bits; i++) {
- remainder ^= next_input_bit() << 31;
- multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
- remainder = (remainder << 1) ^ multiple;
- }
-
-With this optimization, the little-endian code is particularly simple::
-
- for (i = 0; i < input_bits; i++) {
- remainder ^= next_input_bit();
- multiple = (remainder & 1) ? CRCPOLY : 0;
- remainder = (remainder >> 1) ^ multiple;
- }
-
-The most significant coefficient of the remainder polynomial is stored
-in the least significant bit of the binary "remainder" variable.
-The other details of endianness have been hidden in CRCPOLY (which must
-be bit-reversed) and next_input_bit().
-
-As long as next_input_bit is returning the bits in a sensible order, we don't
-*have* to wait until the last possible moment to merge in additional bits.
-We can do it 8 bits at a time rather than 1 bit at a time::
-
- for (i = 0; i < input_bytes; i++) {
- remainder ^= next_input_byte() << 24;
- for (j = 0; j < 8; j++) {
- multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
- remainder = (remainder << 1) ^ multiple;
- }
- }
-
-Or in little-endian::
-
- for (i = 0; i < input_bytes; i++) {
- remainder ^= next_input_byte();
- for (j = 0; j < 8; j++) {
- multiple = (remainder & 1) ? CRCPOLY : 0;
- remainder = (remainder >> 1) ^ multiple;
- }
- }
-
-If the input is a multiple of 32 bits, you can even XOR in a 32-bit
-word at a time and increase the inner loop count to 32.
-
-You can also mix and match the two loop styles, for example doing the
-bulk of a message byte-at-a-time and adding bit-at-a-time processing
-for any fractional bytes at the end.
-
-To reduce the number of conditional branches, software commonly uses
-the byte-at-a-time table method, popularized by Dilip V. Sarwate,
-"Computation of Cyclic Redundancy Checks via Table Look-Up", Comm. ACM
-v.31 no.8 (August 1998) p. 1008-1013.
-
-Here, rather than just shifting one bit of the remainder to decide
-in the correct multiple to subtract, we can shift a byte at a time.
-This produces a 40-bit (rather than a 33-bit) intermediate remainder,
-and the correct multiple of the polynomial to subtract is found using
-a 256-entry lookup table indexed by the high 8 bits.
-
-(The table entries are simply the CRC-32 of the given one-byte messages.)
-
-When space is more constrained, smaller tables can be used, e.g. two
-4-bit shifts followed by a lookup in a 16-entry table.
-
-It is not practical to process much more than 8 bits at a time using this
-technique, because tables larger than 256 entries use too much memory and,
-more importantly, too much of the L1 cache.
-
-To get higher software performance, a "slicing" technique can be used.
-See "High Octane CRC Generation with the Intel Slicing-by-8 Algorithm",
-ftp://download.intel.com/technology/comms/perfnet/download/slicing-by-8.pdf
-
-This does not change the number of table lookups, but does increase
-the parallelism. With the classic Sarwate algorithm, each table lookup
-must be completed before the index of the next can be computed.
-
-A "slicing by 2" technique would shift the remainder 16 bits at a time,
-producing a 48-bit intermediate remainder. Rather than doing a single
-lookup in a 65536-entry table, the two high bytes are looked up in
-two different 256-entry tables. Each contains the remainder required
-to cancel out the corresponding byte. The tables are different because the
-polynomials to cancel are different. One has non-zero coefficients from
-x^32 to x^39, while the other goes from x^40 to x^47.
-
-Since modern processors can handle many parallel memory operations, this
-takes barely longer than a single table look-up and thus performs almost
-twice as fast as the basic Sarwate algorithm.
-
-This can be extended to "slicing by 4" using 4 256-entry tables.
-Each step, 32 bits of data is fetched, XORed with the CRC, and the result
-broken into bytes and looked up in the tables. Because the 32-bit shift
-leaves the low-order bits of the intermediate remainder zero, the
-final CRC is simply the XOR of the 4 table look-ups.
-
-But this still enforces sequential execution: a second group of table
-look-ups cannot begin until the previous groups 4 table look-ups have all
-been completed. Thus, the processor's load/store unit is sometimes idle.
-
-To make maximum use of the processor, "slicing by 8" performs 8 look-ups
-in parallel. Each step, the 32-bit CRC is shifted 64 bits and XORed
-with 64 bits of input data. What is important to note is that 4 of
-those 8 bytes are simply copies of the input data; they do not depend
-on the previous CRC at all. Thus, those 4 table look-ups may commence
-immediately, without waiting for the previous loop iteration.
-
-By always having 4 loads in flight, a modern superscalar processor can
-be kept busy and make full use of its L1 cache.
-
-Two more details about CRC implementation in the real world:
-
-Normally, appending zero bits to a message which is already a multiple
-of a polynomial produces a larger multiple of that polynomial. Thus,
-a basic CRC will not detect appended zero bits (or bytes). To enable
-a CRC to detect this condition, it's common to invert the CRC before
-appending it. This makes the remainder of the message+crc come out not
-as zero, but some fixed non-zero value. (The CRC of the inversion
-pattern, 0xffffffff.)
-
-The same problem applies to zero bits prepended to the message, and a
-similar solution is used. Instead of starting the CRC computation with
-a remainder of 0, an initial remainder of all ones is used. As long as
-you start the same way on decoding, it doesn't make a difference.
diff --git a/Documentation/crypto/api-aead.rst b/Documentation/crypto/api-aead.rst
index d15256f1ae36..78d073319f96 100644
--- a/Documentation/crypto/api-aead.rst
+++ b/Documentation/crypto/api-aead.rst
@@ -1,3 +1,6 @@
+Authenticated Encryption With Associated Data (AEAD)
+====================================================
+
Authenticated Encryption With Associated Data (AEAD) Algorithm Definitions
--------------------------------------------------------------------------
diff --git a/Documentation/crypto/api-akcipher.rst b/Documentation/crypto/api-akcipher.rst
index 40aa8746e2a1..a31f5aef7667 100644
--- a/Documentation/crypto/api-akcipher.rst
+++ b/Documentation/crypto/api-akcipher.rst
@@ -1,3 +1,6 @@
+Asymmetric Cipher
+=================
+
Asymmetric Cipher Algorithm Definitions
---------------------------------------
@@ -8,10 +11,10 @@ Asymmetric Cipher API
---------------------
.. kernel-doc:: include/crypto/akcipher.h
- :doc: Generic Public Key API
+ :doc: Generic Public Key Cipher API
.. kernel-doc:: include/crypto/akcipher.h
- :functions: crypto_alloc_akcipher crypto_free_akcipher crypto_akcipher_set_pub_key crypto_akcipher_set_priv_key crypto_akcipher_maxsize crypto_akcipher_encrypt crypto_akcipher_decrypt crypto_akcipher_sign crypto_akcipher_verify
+ :functions: crypto_alloc_akcipher crypto_free_akcipher crypto_akcipher_set_pub_key crypto_akcipher_set_priv_key crypto_akcipher_maxsize crypto_akcipher_encrypt crypto_akcipher_decrypt
Asymmetric Cipher Request Handle
--------------------------------
diff --git a/Documentation/crypto/api-digest.rst b/Documentation/crypto/api-digest.rst
index 7a1e670d6ce1..02a2bcc26a64 100644
--- a/Documentation/crypto/api-digest.rst
+++ b/Documentation/crypto/api-digest.rst
@@ -1,3 +1,6 @@
+Message Digest
+==============
+
Message Digest Algorithm Definitions
------------------------------------
diff --git a/Documentation/crypto/api-intro.rst b/Documentation/crypto/api-intro.rst
new file mode 100644
index 000000000000..15201be0b811
--- /dev/null
+++ b/Documentation/crypto/api-intro.rst
@@ -0,0 +1,262 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+Scatterlist Cryptographic API
+=============================
+
+Introduction
+============
+
+The Scatterlist Crypto API takes page vectors (scatterlists) as
+arguments, and works directly on pages. In some cases (e.g. ECB
+mode ciphers), this will allow for pages to be encrypted in-place
+with no copying.
+
+One of the initial goals of this design was to readily support IPsec,
+so that processing can be applied to paged skb's without the need
+for linearization.
+
+
+Details
+=======
+
+At the lowest level are algorithms, which register dynamically with the
+API.
+
+'Transforms' are user-instantiated objects, which maintain state, handle all
+of the implementation logic (e.g. manipulating page vectors) and provide an
+abstraction to the underlying algorithms. However, at the user
+level they are very simple.
+
+Conceptually, the API layering looks like this::
+
+ [transform api] (user interface)
+ [transform ops] (per-type logic glue e.g. cipher.c, compress.c)
+ [algorithm api] (for registering algorithms)
+
+The idea is to make the user interface and algorithm registration API
+very simple, while hiding the core logic from both. Many good ideas
+from existing APIs such as Cryptoapi and Nettle have been adapted for this.
+
+The API currently supports five main types of transforms: AEAD (Authenticated
+Encryption with Associated Data), Block Ciphers, Ciphers, Compressors and
+Hashes.
+
+Please note that Block Ciphers is somewhat of a misnomer. It is in fact
+meant to support all ciphers including stream ciphers. The difference
+between Block Ciphers and Ciphers is that the latter operates on exactly
+one block while the former can operate on an arbitrary amount of data,
+subject to block size requirements (i.e., non-stream ciphers can only
+process multiples of blocks).
+
+Here's an example of how to use the API::
+
+ #include <crypto/hash.h>
+ #include <linux/err.h>
+ #include <linux/scatterlist.h>
+
+ struct scatterlist sg[2];
+ char result[128];
+ struct crypto_ahash *tfm;
+ struct ahash_request *req;
+
+ tfm = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ fail();
+
+ /* ... set up the scatterlists ... */
+
+ req = ahash_request_alloc(tfm, GFP_ATOMIC);
+ if (!req)
+ fail();
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_crypt(req, sg, result, 2);
+
+ if (crypto_ahash_digest(req))
+ fail();
+
+ ahash_request_free(req);
+ crypto_free_ahash(tfm);
+
+
+Many real examples are available in the regression test module (tcrypt.c).
+
+
+Developer Notes
+===============
+
+Transforms may only be allocated in user context, and cryptographic
+methods may only be called from softirq and user contexts. For
+transforms with a setkey method it too should only be called from
+user context.
+
+When using the API for ciphers, performance will be optimal if each
+scatterlist contains data which is a multiple of the cipher's block
+size (typically 8 bytes). This prevents having to do any copying
+across non-aligned page fragment boundaries.
+
+
+Adding New Algorithms
+=====================
+
+When submitting a new algorithm for inclusion, a mandatory requirement
+is that at least a few test vectors from known sources (preferably
+standards) be included.
+
+Converting existing well known code is preferred, as it is more likely
+to have been reviewed and widely tested. If submitting code from LGPL
+sources, please consider changing the license to GPL (see section 3 of
+the LGPL).
+
+Algorithms submitted must also be generally patent-free (e.g. IDEA
+will not be included in the mainline until around 2011), and be based
+on a recognized standard and/or have been subjected to appropriate
+peer review.
+
+Also check for any RFCs which may relate to the use of specific algorithms,
+as well as general application notes such as RFC2451 ("The ESP CBC-Mode
+Cipher Algorithms").
+
+It's a good idea to avoid using lots of macros and use inlined functions
+instead, as gcc does a good job with inlining, while excessive use of
+macros can cause compilation problems on some platforms.
+
+Also check the TODO list at the web site listed below to see what people
+might already be working on.
+
+
+Bugs
+====
+
+Send bug reports to:
+ linux-crypto@vger.kernel.org
+
+Cc:
+ Herbert Xu <herbert@gondor.apana.org.au>,
+ David S. Miller <davem@redhat.com>
+
+
+Further Information
+===================
+
+For further patches and various updates, including the current TODO
+list, see:
+http://gondor.apana.org.au/~herbert/crypto/
+
+
+Authors
+=======
+
+- James Morris
+- David S. Miller
+- Herbert Xu
+
+
+Credits
+=======
+
+The following people provided invaluable feedback during the development
+of the API:
+
+ - Alexey Kuznetzov
+ - Rusty Russell
+ - Herbert Valerio Riedel
+ - Jeff Garzik
+ - Michael Richardson
+ - Andrew Morton
+ - Ingo Oeser
+ - Christoph Hellwig
+
+Portions of this API were derived from the following projects:
+
+ Kerneli Cryptoapi (http://www.kerneli.org/)
+ - Alexander Kjeldaas
+ - Herbert Valerio Riedel
+ - Kyle McMartin
+ - Jean-Luc Cooke
+ - David Bryson
+ - Clemens Fruhwirth
+ - Tobias Ringstrom
+ - Harald Welte
+
+and;
+
+ Nettle (https://www.lysator.liu.se/~nisse/nettle/)
+ - Niels Möller
+
+Original developers of the crypto algorithms:
+
+ - Dana L. How (DES)
+ - Andrew Tridgell and Steve French (MD4)
+ - Colin Plumb (MD5)
+ - Steve Reid (SHA1)
+ - Jean-Luc Cooke (SHA256, SHA384, SHA512)
+ - Kazunori Miyazawa / USAGI (HMAC)
+ - Matthew Skala (Twofish)
+ - Dag Arne Osvik (Serpent)
+ - Brian Gladman (AES)
+ - Kartikey Mahendra Bhatt (CAST6)
+ - Jon Oberheide (ARC4)
+ - Jouni Malinen (Michael MIC)
+ - NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
+
+SHA1 algorithm contributors:
+ - Jean-Francois Dive
+
+DES algorithm contributors:
+ - Raimar Falke
+ - Gisle Sælensminde
+ - Niels Möller
+
+Blowfish algorithm contributors:
+ - Herbert Valerio Riedel
+ - Kyle McMartin
+
+Twofish algorithm contributors:
+ - Werner Koch
+ - Marc Mutz
+
+SHA256/384/512 algorithm contributors:
+ - Andrew McDonald
+ - Kyle McMartin
+ - Herbert Valerio Riedel
+
+AES algorithm contributors:
+ - Alexander Kjeldaas
+ - Herbert Valerio Riedel
+ - Kyle McMartin
+ - Adam J. Richter
+ - Fruhwirth Clemens (i586)
+ - Linus Torvalds (i586)
+
+CAST5 algorithm contributors:
+ - Kartikey Mahendra Bhatt (original developers unknown, FSF copyright).
+
+TEA/XTEA algorithm contributors:
+ - Aaron Grothe
+ - Michael Ringe
+
+Khazad algorithm contributors:
+ - Aaron Grothe
+
+Whirlpool algorithm contributors:
+ - Aaron Grothe
+ - Jean-Luc Cooke
+
+Anubis algorithm contributors:
+ - Aaron Grothe
+
+Tiger algorithm contributors:
+ - Aaron Grothe
+
+VIA PadLock contributors:
+ - Michal Ludvig
+
+Camellia algorithm contributors:
+ - NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
+
+Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
+
+Please send any credits updates or corrections to:
+Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt
deleted file mode 100644
index 45d943fcae5b..000000000000
--- a/Documentation/crypto/api-intro.txt
+++ /dev/null
@@ -1,250 +0,0 @@
-
- Scatterlist Cryptographic API
-
-INTRODUCTION
-
-The Scatterlist Crypto API takes page vectors (scatterlists) as
-arguments, and works directly on pages. In some cases (e.g. ECB
-mode ciphers), this will allow for pages to be encrypted in-place
-with no copying.
-
-One of the initial goals of this design was to readily support IPsec,
-so that processing can be applied to paged skb's without the need
-for linearization.
-
-
-DETAILS
-
-At the lowest level are algorithms, which register dynamically with the
-API.
-
-'Transforms' are user-instantiated objects, which maintain state, handle all
-of the implementation logic (e.g. manipulating page vectors) and provide an
-abstraction to the underlying algorithms. However, at the user
-level they are very simple.
-
-Conceptually, the API layering looks like this:
-
- [transform api] (user interface)
- [transform ops] (per-type logic glue e.g. cipher.c, compress.c)
- [algorithm api] (for registering algorithms)
-
-The idea is to make the user interface and algorithm registration API
-very simple, while hiding the core logic from both. Many good ideas
-from existing APIs such as Cryptoapi and Nettle have been adapted for this.
-
-The API currently supports five main types of transforms: AEAD (Authenticated
-Encryption with Associated Data), Block Ciphers, Ciphers, Compressors and
-Hashes.
-
-Please note that Block Ciphers is somewhat of a misnomer. It is in fact
-meant to support all ciphers including stream ciphers. The difference
-between Block Ciphers and Ciphers is that the latter operates on exactly
-one block while the former can operate on an arbitrary amount of data,
-subject to block size requirements (i.e., non-stream ciphers can only
-process multiples of blocks).
-
-Here's an example of how to use the API:
-
- #include <crypto/hash.h>
- #include <linux/err.h>
- #include <linux/scatterlist.h>
-
- struct scatterlist sg[2];
- char result[128];
- struct crypto_ahash *tfm;
- struct ahash_request *req;
-
- tfm = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- fail();
-
- /* ... set up the scatterlists ... */
-
- req = ahash_request_alloc(tfm, GFP_ATOMIC);
- if (!req)
- fail();
-
- ahash_request_set_callback(req, 0, NULL, NULL);
- ahash_request_set_crypt(req, sg, result, 2);
-
- if (crypto_ahash_digest(req))
- fail();
-
- ahash_request_free(req);
- crypto_free_ahash(tfm);
-
-
-Many real examples are available in the regression test module (tcrypt.c).
-
-
-DEVELOPER NOTES
-
-Transforms may only be allocated in user context, and cryptographic
-methods may only be called from softirq and user contexts. For
-transforms with a setkey method it too should only be called from
-user context.
-
-When using the API for ciphers, performance will be optimal if each
-scatterlist contains data which is a multiple of the cipher's block
-size (typically 8 bytes). This prevents having to do any copying
-across non-aligned page fragment boundaries.
-
-
-ADDING NEW ALGORITHMS
-
-When submitting a new algorithm for inclusion, a mandatory requirement
-is that at least a few test vectors from known sources (preferably
-standards) be included.
-
-Converting existing well known code is preferred, as it is more likely
-to have been reviewed and widely tested. If submitting code from LGPL
-sources, please consider changing the license to GPL (see section 3 of
-the LGPL).
-
-Algorithms submitted must also be generally patent-free (e.g. IDEA
-will not be included in the mainline until around 2011), and be based
-on a recognized standard and/or have been subjected to appropriate
-peer review.
-
-Also check for any RFCs which may relate to the use of specific algorithms,
-as well as general application notes such as RFC2451 ("The ESP CBC-Mode
-Cipher Algorithms").
-
-It's a good idea to avoid using lots of macros and use inlined functions
-instead, as gcc does a good job with inlining, while excessive use of
-macros can cause compilation problems on some platforms.
-
-Also check the TODO list at the web site listed below to see what people
-might already be working on.
-
-
-BUGS
-
-Send bug reports to:
-linux-crypto@vger.kernel.org
-Cc: Herbert Xu <herbert@gondor.apana.org.au>,
- David S. Miller <davem@redhat.com>
-
-
-FURTHER INFORMATION
-
-For further patches and various updates, including the current TODO
-list, see:
-http://gondor.apana.org.au/~herbert/crypto/
-
-
-AUTHORS
-
-James Morris
-David S. Miller
-Herbert Xu
-
-
-CREDITS
-
-The following people provided invaluable feedback during the development
-of the API:
-
- Alexey Kuznetzov
- Rusty Russell
- Herbert Valerio Riedel
- Jeff Garzik
- Michael Richardson
- Andrew Morton
- Ingo Oeser
- Christoph Hellwig
-
-Portions of this API were derived from the following projects:
-
- Kerneli Cryptoapi (http://www.kerneli.org/)
- Alexander Kjeldaas
- Herbert Valerio Riedel
- Kyle McMartin
- Jean-Luc Cooke
- David Bryson
- Clemens Fruhwirth
- Tobias Ringstrom
- Harald Welte
-
-and;
-
- Nettle (http://www.lysator.liu.se/~nisse/nettle/)
- Niels Möller
-
-Original developers of the crypto algorithms:
-
- Dana L. How (DES)
- Andrew Tridgell and Steve French (MD4)
- Colin Plumb (MD5)
- Steve Reid (SHA1)
- Jean-Luc Cooke (SHA256, SHA384, SHA512)
- Kazunori Miyazawa / USAGI (HMAC)
- Matthew Skala (Twofish)
- Dag Arne Osvik (Serpent)
- Brian Gladman (AES)
- Kartikey Mahendra Bhatt (CAST6)
- Jon Oberheide (ARC4)
- Jouni Malinen (Michael MIC)
- NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
-
-SHA1 algorithm contributors:
- Jean-Francois Dive
-
-DES algorithm contributors:
- Raimar Falke
- Gisle Sælensminde
- Niels Möller
-
-Blowfish algorithm contributors:
- Herbert Valerio Riedel
- Kyle McMartin
-
-Twofish algorithm contributors:
- Werner Koch
- Marc Mutz
-
-SHA256/384/512 algorithm contributors:
- Andrew McDonald
- Kyle McMartin
- Herbert Valerio Riedel
-
-AES algorithm contributors:
- Alexander Kjeldaas
- Herbert Valerio Riedel
- Kyle McMartin
- Adam J. Richter
- Fruhwirth Clemens (i586)
- Linus Torvalds (i586)
-
-CAST5 algorithm contributors:
- Kartikey Mahendra Bhatt (original developers unknown, FSF copyright).
-
-TEA/XTEA algorithm contributors:
- Aaron Grothe
- Michael Ringe
-
-Khazad algorithm contributors:
- Aaron Grothe
-
-Whirlpool algorithm contributors:
- Aaron Grothe
- Jean-Luc Cooke
-
-Anubis algorithm contributors:
- Aaron Grothe
-
-Tiger algorithm contributors:
- Aaron Grothe
-
-VIA PadLock contributors:
- Michal Ludvig
-
-Camellia algorithm contributors:
- NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
-
-Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
-
-Please send any credits updates or corrections to:
-Herbert Xu <herbert@gondor.apana.org.au>
-
diff --git a/Documentation/crypto/api-kpp.rst b/Documentation/crypto/api-kpp.rst
index 7d86ab906bdf..5794e2d10c95 100644
--- a/Documentation/crypto/api-kpp.rst
+++ b/Documentation/crypto/api-kpp.rst
@@ -1,3 +1,6 @@
+Key-agreement Protocol Primitives (KPP)
+=======================================
+
Key-agreement Protocol Primitives (KPP) Cipher Algorithm Definitions
--------------------------------------------------------------------
diff --git a/Documentation/crypto/api-rng.rst b/Documentation/crypto/api-rng.rst
index 10ba7436cee4..23a94c0b272e 100644
--- a/Documentation/crypto/api-rng.rst
+++ b/Documentation/crypto/api-rng.rst
@@ -1,3 +1,6 @@
+Random Number Generator (RNG)
+=============================
+
Random Number Algorithm Definitions
-----------------------------------
diff --git a/Documentation/crypto/api-samples.rst b/Documentation/crypto/api-samples.rst
index 0f6ca8b7261e..e923f17bc2bd 100644
--- a/Documentation/crypto/api-samples.rst
+++ b/Documentation/crypto/api-samples.rst
@@ -4,111 +4,89 @@ Code Examples
Code Example For Symmetric Key Cipher Operation
-----------------------------------------------
-::
-
-
- /* tie all data structures together */
- struct skcipher_def {
- struct scatterlist sg;
- struct crypto_skcipher *tfm;
- struct skcipher_request *req;
- struct crypto_wait wait;
- };
-
- /* Perform cipher operation */
- static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
- int enc)
- {
- int rc;
-
- if (enc)
- rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req), &sk->wait);
- else
- rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req), &sk->wait);
-
- if (rc)
- pr_info("skcipher encrypt returned with result %d\n", rc);
+This code encrypts some data with AES-256-XTS. For sake of example,
+all inputs are random bytes, the encryption is done in-place, and it's
+assumed the code is running in a context where it can sleep.
- return rc;
- }
+::
- /* Initialize and trigger cipher operation */
static int test_skcipher(void)
{
- struct skcipher_def sk;
- struct crypto_skcipher *skcipher = NULL;
- struct skcipher_request *req = NULL;
- char *scratchpad = NULL;
- char *ivdata = NULL;
- unsigned char key[32];
- int ret = -EFAULT;
-
- skcipher = crypto_alloc_skcipher("cbc-aes-aesni", 0, 0);
- if (IS_ERR(skcipher)) {
- pr_info("could not allocate skcipher handle\n");
- return PTR_ERR(skcipher);
- }
-
- req = skcipher_request_alloc(skcipher, GFP_KERNEL);
- if (!req) {
- pr_info("could not allocate skcipher request\n");
- ret = -ENOMEM;
- goto out;
- }
-
- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done,
- &sk.wait);
-
- /* AES 256 with random key */
- get_random_bytes(&key, 32);
- if (crypto_skcipher_setkey(skcipher, key, 32)) {
- pr_info("key could not be set\n");
- ret = -EAGAIN;
- goto out;
- }
-
- /* IV will be random */
- ivdata = kmalloc(16, GFP_KERNEL);
- if (!ivdata) {
- pr_info("could not allocate ivdata\n");
- goto out;
- }
- get_random_bytes(ivdata, 16);
-
- /* Input data will be random */
- scratchpad = kmalloc(16, GFP_KERNEL);
- if (!scratchpad) {
- pr_info("could not allocate scratchpad\n");
- goto out;
- }
- get_random_bytes(scratchpad, 16);
-
- sk.tfm = skcipher;
- sk.req = req;
-
- /* We encrypt one block */
- sg_init_one(&sk.sg, scratchpad, 16);
- skcipher_request_set_crypt(req, &sk.sg, &sk.sg, 16, ivdata);
- crypto_init_wait(&sk.wait);
-
- /* encrypt data */
- ret = test_skcipher_encdec(&sk, 1);
- if (ret)
- goto out;
-
- pr_info("Encryption triggered successfully\n");
-
+ struct crypto_skcipher *tfm = NULL;
+ struct skcipher_request *req = NULL;
+ u8 *data = NULL;
+ const size_t datasize = 512; /* data size in bytes */
+ struct scatterlist sg;
+ DECLARE_CRYPTO_WAIT(wait);
+ u8 iv[16]; /* AES-256-XTS takes a 16-byte IV */
+ u8 key[64]; /* AES-256-XTS takes a 64-byte key */
+ int err;
+
+ /*
+ * Allocate a tfm (a transformation object) and set the key.
+ *
+ * In real-world use, a tfm and key are typically used for many
+ * encryption/decryption operations. But in this example, we'll just do a
+ * single encryption operation with it (which is not very efficient).
+ */
+
+ tfm = crypto_alloc_skcipher("xts(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("Error allocating xts(aes) handle: %ld\n", PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+
+ get_random_bytes(key, sizeof(key));
+ err = crypto_skcipher_setkey(tfm, key, sizeof(key));
+ if (err) {
+ pr_err("Error setting key: %d\n", err);
+ goto out;
+ }
+
+ /* Allocate a request object */
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Prepare the input data */
+ data = kmalloc(datasize, GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto out;
+ }
+ get_random_bytes(data, datasize);
+
+ /* Initialize the IV */
+ get_random_bytes(iv, sizeof(iv));
+
+ /*
+ * Encrypt the data in-place.
+ *
+ * For simplicity, in this example we wait for the request to complete
+ * before proceeding, even if the underlying implementation is asynchronous.
+ *
+ * To decrypt instead of encrypt, just change crypto_skcipher_encrypt() to
+ * crypto_skcipher_decrypt().
+ */
+ sg_init_one(&sg, data, datasize);
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ crypto_req_done, &wait);
+ skcipher_request_set_crypt(req, &sg, &sg, datasize, iv);
+ err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+ if (err) {
+ pr_err("Error encrypting data: %d\n", err);
+ goto out;
+ }
+
+ pr_debug("Encryption was successful\n");
out:
- if (skcipher)
- crypto_free_skcipher(skcipher);
- if (req)
+ crypto_free_skcipher(tfm);
skcipher_request_free(req);
- if (ivdata)
- kfree(ivdata);
- if (scratchpad)
- kfree(scratchpad);
- return ret;
+ kfree(data);
+ return err;
}
@@ -133,7 +111,6 @@ Code Example For Use of Operational State Memory With SHASH
if (!sdesc)
return ERR_PTR(-ENOMEM);
sdesc->shash.tfm = alg;
- sdesc->shash.flags = 0x0;
return sdesc;
}
diff --git a/Documentation/crypto/api-sig.rst b/Documentation/crypto/api-sig.rst
new file mode 100644
index 000000000000..4d8aba8aee8e
--- /dev/null
+++ b/Documentation/crypto/api-sig.rst
@@ -0,0 +1,18 @@
+Asymmetric Signature
+====================
+
+Asymmetric Signature Algorithm Definitions
+------------------------------------------
+
+.. kernel-doc:: include/crypto/sig.h
+ :functions: sig_alg
+
+Asymmetric Signature API
+------------------------
+
+.. kernel-doc:: include/crypto/sig.h
+ :doc: Generic Public Key Signature API
+
+.. kernel-doc:: include/crypto/sig.h
+ :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_keysize crypto_sig_maxsize crypto_sig_digestsize crypto_sig_sign crypto_sig_verify
+
diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst
index 4eec4a93f7e3..4b7c8160790a 100644
--- a/Documentation/crypto/api-skcipher.rst
+++ b/Documentation/crypto/api-skcipher.rst
@@ -1,3 +1,6 @@
+Symmetric Key Cipher
+====================
+
Block Cipher Algorithm Definitions
----------------------------------
@@ -5,7 +8,7 @@ Block Cipher Algorithm Definitions
:doc: Block Cipher Algorithm Definitions
.. kernel-doc:: include/linux/crypto.h
- :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg
+ :functions: crypto_alg cipher_alg compress_alg
Symmetric Key Cipher API
------------------------
@@ -28,35 +31,8 @@ Symmetric Key Cipher Request Handle
Single Block Cipher API
-----------------------
-.. kernel-doc:: include/linux/crypto.h
+.. kernel-doc:: include/crypto/internal/cipher.h
:doc: Single Block Cipher API
-.. kernel-doc:: include/linux/crypto.h
+.. kernel-doc:: include/crypto/internal/cipher.h
:functions: crypto_alloc_cipher crypto_free_cipher crypto_has_cipher crypto_cipher_blocksize crypto_cipher_setkey crypto_cipher_encrypt_one crypto_cipher_decrypt_one
-
-Asynchronous Block Cipher API - Deprecated
-------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
- :doc: Asynchronous Block Cipher API
-
-.. kernel-doc:: include/linux/crypto.h
- :functions: crypto_free_ablkcipher crypto_has_ablkcipher crypto_ablkcipher_ivsize crypto_ablkcipher_blocksize crypto_ablkcipher_setkey crypto_ablkcipher_reqtfm crypto_ablkcipher_encrypt crypto_ablkcipher_decrypt
-
-Asynchronous Cipher Request Handle - Deprecated
------------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
- :doc: Asynchronous Cipher Request Handle
-
-.. kernel-doc:: include/linux/crypto.h
- :functions: crypto_ablkcipher_reqsize ablkcipher_request_set_tfm ablkcipher_request_alloc ablkcipher_request_free ablkcipher_request_set_callback ablkcipher_request_set_crypt
-
-Synchronous Block Cipher API - Deprecated
------------------------------------------
-
-.. kernel-doc:: include/linux/crypto.h
- :doc: Synchronous Block Cipher API
-
-.. kernel-doc:: include/linux/crypto.h
- :functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
diff --git a/Documentation/crypto/api.rst b/Documentation/crypto/api.rst
index 2e519193ab4a..8b2a90521886 100644
--- a/Documentation/crypto/api.rst
+++ b/Documentation/crypto/api.rst
@@ -1,20 +1,8 @@
Programming Interface
=====================
-Please note that the kernel crypto API contains the AEAD givcrypt API
-(crypto_aead_giv\* and aead_givcrypt\* function calls in
-include/crypto/aead.h). This API is obsolete and will be removed in the
-future. To obtain the functionality of an AEAD cipher with internal IV
-generation, use the IV generator as a regular cipher. For example,
-rfc4106(gcm(aes)) is the AEAD cipher with external IV generation and
-seqniv(rfc4106(gcm(aes))) implies that the kernel crypto API generates
-the IV. Different IV generators are available.
-
-.. class:: toc-title
-
- Table of contents
-
.. toctree::
+ :caption: Table of contents
:maxdepth: 2
api-skcipher
@@ -22,4 +10,5 @@ the IV. Different IV generators are available.
api-digest
api-rng
api-akcipher
+ api-sig
api-kpp
diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst
index ca2d09b991f5..249b54d0849f 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -157,10 +157,6 @@ applicable to a cipher, it is not displayed:
- rng for random number generator
- - givcipher for cipher with associated IV generator (see the geniv
- entry below for the specification of the IV generator type used by
- the cipher implementation)
-
- kpp for a Key-agreement Protocol Primitive (KPP) cipher such as
an ECDH or DH implementation
@@ -174,16 +170,7 @@ applicable to a cipher, it is not displayed:
- digestsize: output size of the message digest
-- geniv: IV generation type:
-
- - eseqiv for encrypted sequence number based IV generation
-
- - seqiv for sequence number based IV generation
-
- - chainiv for chain iv generation
-
- - <builtin> is a marker that the cipher implements IV generation and
- handling as it is specific to the given cipher
+- geniv: IV generator (obsolete)
Key Sizes
---------
@@ -209,25 +196,13 @@ the aforementioned cipher types:
- CRYPTO_ALG_TYPE_CIPHER Single block cipher
-- CRYPTO_ALG_TYPE_COMPRESS Compression
-
- CRYPTO_ALG_TYPE_AEAD Authenticated Encryption with Associated Data
(MAC)
-- CRYPTO_ALG_TYPE_BLKCIPHER Synchronous multi-block cipher
-
-- CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher
-
-- CRYPTO_ALG_TYPE_GIVCIPHER Asynchronous multi-block cipher packed
- together with an IV generator (see geniv field in the /proc/crypto
- listing for the known IV generators)
-
- CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
an ECDH or DH implementation
-- CRYPTO_ALG_TYPE_DIGEST Raw message digest
-
-- CRYPTO_ALG_TYPE_HASH Alias for CRYPTO_ALG_TYPE_DIGEST
+- CRYPTO_ALG_TYPE_HASH Raw message digest
- CRYPTO_ALG_TYPE_SHASH Synchronous multi-block hash
@@ -237,6 +212,8 @@ the aforementioned cipher types:
- CRYPTO_ALG_TYPE_AKCIPHER Asymmetric cipher
+- CRYPTO_ALG_TYPE_SIG Asymmetric signature
+
- CRYPTO_ALG_TYPE_PCOMPRESS Enhanced version of
CRYPTO_ALG_TYPE_COMPRESS allowing for segmented compression /
decompression instead of performing the operation on one segment
@@ -338,18 +315,14 @@ uses the API applicable to the cipher type specified for the block.
The following call sequence is applicable when the IPSEC layer triggers
an encryption operation with the esp_output function. During
-configuration, the administrator set up the use of rfc4106(gcm(aes)) as
-the cipher for ESP. The following call sequence is now depicted in the
-ASCII art above:
+configuration, the administrator set up the use of seqiv(rfc4106(gcm(aes)))
+as the cipher for ESP. The following call sequence is now depicted in
+the ASCII art above:
1. esp_output() invokes crypto_aead_encrypt() to trigger an
encryption operation of the AEAD cipher with IV generator.
- In case of GCM, the SEQIV implementation is registered as GIVCIPHER
- in crypto_rfc4106_alloc().
-
- The SEQIV performs its operation to generate an IV where the core
- function is seqiv_geniv().
+ The SEQIV generates the IV.
2. Now, SEQIV uses the AEAD API function calls to invoke the associated
AEAD cipher. In our case, during the instantiation of SEQIV, the
diff --git a/Documentation/crypto/asymmetric-keys.rst b/Documentation/crypto/asymmetric-keys.rst
new file mode 100644
index 000000000000..349f44a29392
--- /dev/null
+++ b/Documentation/crypto/asymmetric-keys.rst
@@ -0,0 +1,424 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================
+Asymmetric / Public-key Cryptography Key Type
+=============================================
+
+.. Contents:
+
+ - Overview.
+ - Key identification.
+ - Accessing asymmetric keys.
+ - Signature verification.
+ - Asymmetric key subtypes.
+ - Instantiation data parsers.
+ - Keyring link restrictions.
+
+
+Overview
+========
+
+The "asymmetric" key type is designed to be a container for the keys used in
+public-key cryptography, without imposing any particular restrictions on the
+form or mechanism of the cryptography or form of the key.
+
+The asymmetric key is given a subtype that defines what sort of data is
+associated with the key and provides operations to describe and destroy it.
+However, no requirement is made that the key data actually be stored in the
+key.
+
+A completely in-kernel key retention and operation subtype can be defined, but
+it would also be possible to provide access to cryptographic hardware (such as
+a TPM) that might be used to both retain the relevant key and perform
+operations using that key. In such a case, the asymmetric key would then
+merely be an interface to the TPM driver.
+
+Also provided is the concept of a data parser. Data parsers are responsible
+for extracting information from the blobs of data passed to the instantiation
+function. The first data parser that recognises the blob gets to set the
+subtype of the key and define the operations that can be done on that key.
+
+A data parser may interpret the data blob as containing the bits representing a
+key, or it may interpret it as a reference to a key held somewhere else in the
+system (for example, a TPM).
+
+
+Key Identification
+==================
+
+If a key is added with an empty name, the instantiation data parsers are given
+the opportunity to pre-parse a key and to determine the description the key
+should be given from the content of the key.
+
+This can then be used to refer to the key, either by complete match or by
+partial match. The key type may also use other criteria to refer to a key.
+
+The asymmetric key type's match function can then perform a wider range of
+comparisons than just the straightforward comparison of the description with
+the criterion string:
+
+ 1) If the criterion string is of the form "id:<hexdigits>" then the match
+ function will examine a key's fingerprint to see if the hex digits given
+ after the "id:" match the tail. For instance::
+
+ keyctl search @s asymmetric id:5acc2142
+
+ will match a key with fingerprint::
+
+ 1A00 2040 7601 7889 DE11 882C 3823 04AD 5ACC 2142
+
+ 2) If the criterion string is of the form "<subtype>:<hexdigits>" then the
+ match will match the ID as in (1), but with the added restriction that
+ only keys of the specified subtype (e.g. tpm) will be matched. For
+ instance::
+
+ keyctl search @s asymmetric tpm:5acc2142
+
+Looking in /proc/keys, the last 8 hex digits of the key fingerprint are
+displayed, along with the subtype::
+
+ 1a39e171 I----- 1 perm 3f010000 0 0 asymmetric modsign.0: DSA 5acc2142 []
+
+
+Accessing Asymmetric Keys
+=========================
+
+For general access to asymmetric keys from within the kernel, the following
+inclusion is required::
+
+ #include <crypto/public_key.h>
+
+This gives access to functions for dealing with asymmetric / public keys.
+Three enums are defined there for representing public-key cryptography
+algorithms::
+
+ enum pkey_algo
+
+digest algorithms used by those::
+
+ enum pkey_hash_algo
+
+and key identifier representations::
+
+ enum pkey_id_type
+
+Note that the key type representation types are required because key
+identifiers from different standards aren't necessarily compatible. For
+instance, PGP generates key identifiers by hashing the key data plus some
+PGP-specific metadata, whereas X.509 has arbitrary certificate identifiers.
+
+The operations defined upon a key are:
+
+ 1) Signature verification.
+
+Other operations are possible (such as encryption) with the same key data
+required for verification, but not currently supported, and others
+(eg. decryption and signature generation) require extra key data.
+
+
+Signature Verification
+----------------------
+
+An operation is provided to perform cryptographic signature verification, using
+an asymmetric key to provide or to provide access to the public key::
+
+ int verify_signature(const struct key *key,
+ const struct public_key_signature *sig);
+
+The caller must have already obtained the key from some source and can then use
+it to check the signature. The caller must have parsed the signature and
+transferred the relevant bits to the structure pointed to by sig::
+
+ struct public_key_signature {
+ u8 *digest;
+ u8 digest_size;
+ enum pkey_hash_algo pkey_hash_algo : 8;
+ u8 nr_mpi;
+ union {
+ MPI mpi[2];
+ ...
+ };
+ };
+
+The algorithm used must be noted in sig->pkey_hash_algo, and all the MPIs that
+make up the actual signature must be stored in sig->mpi[] and the count of MPIs
+placed in sig->nr_mpi.
+
+In addition, the data must have been digested by the caller and the resulting
+hash must be pointed to by sig->digest and the size of the hash be placed in
+sig->digest_size.
+
+The function will return 0 upon success or -EKEYREJECTED if the signature
+doesn't match.
+
+The function may also return -ENOTSUPP if an unsupported public-key algorithm
+or public-key/hash algorithm combination is specified or the key doesn't
+support the operation; -EBADMSG or -ERANGE if some of the parameters have weird
+data; or -ENOMEM if an allocation can't be performed. -EINVAL can be returned
+if the key argument is the wrong type or is incompletely set up.
+
+
+Asymmetric Key Subtypes
+=======================
+
+Asymmetric keys have a subtype that defines the set of operations that can be
+performed on that key and that determines what data is attached as the key
+payload. The payload format is entirely at the whim of the subtype.
+
+The subtype is selected by the key data parser and the parser must initialise
+the data required for it. The asymmetric key retains a reference on the
+subtype module.
+
+The subtype definition structure can be found in::
+
+ #include <keys/asymmetric-subtype.h>
+
+and looks like the following::
+
+ struct asymmetric_key_subtype {
+ struct module *owner;
+ const char *name;
+
+ void (*describe)(const struct key *key, struct seq_file *m);
+ void (*destroy)(void *payload);
+ int (*query)(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info);
+ int (*eds_op)(struct kernel_pkey_params *params,
+ const void *in, void *out);
+ int (*verify_signature)(const struct key *key,
+ const struct public_key_signature *sig);
+ };
+
+Asymmetric keys point to this with their payload[asym_subtype] member.
+
+The owner and name fields should be set to the owning module and the name of
+the subtype. Currently, the name is only used for print statements.
+
+There are a number of operations defined by the subtype:
+
+ 1) describe().
+
+ Mandatory. This allows the subtype to display something in /proc/keys
+ against the key. For instance the name of the public key algorithm type
+ could be displayed. The key type will display the tail of the key
+ identity string after this.
+
+ 2) destroy().
+
+ Mandatory. This should free the memory associated with the key. The
+ asymmetric key will look after freeing the fingerprint and releasing the
+ reference on the subtype module.
+
+ 3) query().
+
+ Mandatory. This is a function for querying the capabilities of a key.
+
+ 4) eds_op().
+
+ Optional. This is the entry point for the encryption, decryption and
+ signature creation operations (which are distinguished by the operation ID
+ in the parameter struct). The subtype may do anything it likes to
+ implement an operation, including offloading to hardware.
+
+ 5) verify_signature().
+
+ Optional. This is the entry point for signature verification. The
+ subtype may do anything it likes to implement an operation, including
+ offloading to hardware.
+
+Instantiation Data Parsers
+==========================
+
+The asymmetric key type doesn't generally want to store or to deal with a raw
+blob of data that holds the key data. It would have to parse it and error
+check it each time it wanted to use it. Further, the contents of the blob may
+have various checks that can be performed on it (eg. self-signatures, validity
+dates) and may contain useful data about the key (identifiers, capabilities).
+
+Also, the blob may represent a pointer to some hardware containing the key
+rather than the key itself.
+
+Examples of blob formats for which parsers could be implemented include:
+
+ - OpenPGP packet stream [RFC 4880].
+ - X.509 ASN.1 stream.
+ - Pointer to TPM key.
+ - Pointer to UEFI key.
+ - PKCS#8 private key [RFC 5208].
+ - PKCS#5 encrypted private key [RFC 2898].
+
+During key instantiation each parser in the list is tried until one doesn't
+return -EBADMSG.
+
+The parser definition structure can be found in::
+
+ #include <keys/asymmetric-parser.h>
+
+and looks like the following::
+
+ struct asymmetric_key_parser {
+ struct module *owner;
+ const char *name;
+
+ int (*parse)(struct key_preparsed_payload *prep);
+ };
+
+The owner and name fields should be set to the owning module and the name of
+the parser.
+
+There is currently only a single operation defined by the parser, and it is
+mandatory:
+
+ 1) parse().
+
+ This is called to preparse the key from the key creation and update paths.
+ In particular, it is called during the key creation _before_ a key is
+ allocated, and as such, is permitted to provide the key's description in
+ the case that the caller declines to do so.
+
+ The caller passes a pointer to the following struct with all of the fields
+ cleared, except for data, datalen and quotalen [see
+ Documentation/security/keys/core.rst]::
+
+ struct key_preparsed_payload {
+ char *description;
+ void *payload[4];
+ const void *data;
+ size_t datalen;
+ size_t quotalen;
+ };
+
+ The instantiation data is in a blob pointed to by data and is datalen in
+ size. The parse() function is not permitted to change these two values at
+ all, and shouldn't change any of the other values _unless_ they are
+ recognise the blob format and will not return -EBADMSG to indicate it is
+ not theirs.
+
+ If the parser is happy with the blob, it should propose a description for
+ the key and attach it to ->description, ->payload[asym_subtype] should be
+ set to point to the subtype to be used, ->payload[asym_crypto] should be
+ set to point to the initialised data for that subtype,
+ ->payload[asym_key_ids] should point to one or more hex fingerprints and
+ quotalen should be updated to indicate how much quota this key should
+ account for.
+
+ When clearing up, the data attached to ->payload[asym_key_ids] and
+ ->description will be kfree()'d and the data attached to
+ ->payload[asm_crypto] will be passed to the subtype's ->destroy() method
+ to be disposed of. A module reference for the subtype pointed to by
+ ->payload[asym_subtype] will be put.
+
+
+ If the data format is not recognised, -EBADMSG should be returned. If it
+ is recognised, but the key cannot for some reason be set up, some other
+ negative error code should be returned. On success, 0 should be returned.
+
+ The key's fingerprint string may be partially matched upon. For a
+ public-key algorithm such as RSA and DSA this will likely be a printable
+ hex version of the key's fingerprint.
+
+Functions are provided to register and unregister parsers::
+
+ int register_asymmetric_key_parser(struct asymmetric_key_parser *parser);
+ void unregister_asymmetric_key_parser(struct asymmetric_key_parser *subtype);
+
+Parsers may not have the same name. The names are otherwise only used for
+displaying in debugging messages.
+
+
+Keyring Link Restrictions
+=========================
+
+Keyrings created from userspace using add_key can be configured to check the
+signature of the key being linked. Keys without a valid signature are not
+allowed to link.
+
+Several restriction methods are available:
+
+ 1) Restrict using the kernel builtin trusted keyring
+
+ - Option string used with KEYCTL_RESTRICT_KEYRING:
+ - "builtin_trusted"
+
+ The kernel builtin trusted keyring will be searched for the signing key.
+ If the builtin trusted keyring is not configured, all links will be
+ rejected. The ca_keys kernel parameter also affects which keys are used
+ for signature verification.
+
+ 2) Restrict using the kernel builtin and secondary trusted keyrings
+
+ - Option string used with KEYCTL_RESTRICT_KEYRING:
+ - "builtin_and_secondary_trusted"
+
+ The kernel builtin and secondary trusted keyrings will be searched for the
+ signing key. If the secondary trusted keyring is not configured, this
+ restriction will behave like the "builtin_trusted" option. The ca_keys
+ kernel parameter also affects which keys are used for signature
+ verification.
+
+ 3) Restrict using a separate key or keyring
+
+ - Option string used with KEYCTL_RESTRICT_KEYRING:
+ - "key_or_keyring:<key or keyring serial number>[:chain]"
+
+ Whenever a key link is requested, the link will only succeed if the key
+ being linked is signed by one of the designated keys. This key may be
+ specified directly by providing a serial number for one asymmetric key, or
+ a group of keys may be searched for the signing key by providing the
+ serial number for a keyring.
+
+ When the "chain" option is provided at the end of the string, the keys
+ within the destination keyring will also be searched for signing keys.
+ This allows for verification of certificate chains by adding each
+ certificate in order (starting closest to the root) to a keyring. For
+ instance, one keyring can be populated with links to a set of root
+ certificates, with a separate, restricted keyring set up for each
+ certificate chain to be validated::
+
+ # Create and populate a keyring for root certificates
+ root_id=`keyctl add keyring root-certs "" @s`
+ keyctl padd asymmetric "" $root_id < root1.cert
+ keyctl padd asymmetric "" $root_id < root2.cert
+
+ # Create and restrict a keyring for the certificate chain
+ chain_id=`keyctl add keyring chain "" @s`
+ keyctl restrict_keyring $chain_id asymmetric key_or_keyring:$root_id:chain
+
+ # Attempt to add each certificate in the chain, starting with the
+ # certificate closest to the root.
+ keyctl padd asymmetric "" $chain_id < intermediateA.cert
+ keyctl padd asymmetric "" $chain_id < intermediateB.cert
+ keyctl padd asymmetric "" $chain_id < end-entity.cert
+
+ If the final end-entity certificate is successfully added to the "chain"
+ keyring, we can be certain that it has a valid signing chain going back to
+ one of the root certificates.
+
+ A single keyring can be used to verify a chain of signatures by
+ restricting the keyring after linking the root certificate::
+
+ # Create a keyring for the certificate chain and add the root
+ chain2_id=`keyctl add keyring chain2 "" @s`
+ keyctl padd asymmetric "" $chain2_id < root1.cert
+
+ # Restrict the keyring that already has root1.cert linked. The cert
+ # will remain linked by the keyring.
+ keyctl restrict_keyring $chain2_id asymmetric key_or_keyring:0:chain
+
+ # Attempt to add each certificate in the chain, starting with the
+ # certificate closest to the root.
+ keyctl padd asymmetric "" $chain2_id < intermediateA.cert
+ keyctl padd asymmetric "" $chain2_id < intermediateB.cert
+ keyctl padd asymmetric "" $chain2_id < end-entity.cert
+
+ If the final end-entity certificate is successfully added to the "chain2"
+ keyring, we can be certain that there is a valid signing chain going back
+ to the root certificate that was added before the keyring was restricted.
+
+
+In all of these cases, if the signing key is found the signature of the key to
+be linked will be verified using the signing key. The requested key is added
+to the keyring only if the signature is successfully verified. -ENOKEY is
+returned if the parent certificate could not be found, or -EKEYREJECTED is
+returned if the signature check fails or the key is blacklisted. Other errors
+may be returned if the signature check could not be performed.
diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
deleted file mode 100644
index 8763866b11cf..000000000000
--- a/Documentation/crypto/asymmetric-keys.txt
+++ /dev/null
@@ -1,429 +0,0 @@
- =============================================
- ASYMMETRIC / PUBLIC-KEY CRYPTOGRAPHY KEY TYPE
- =============================================
-
-Contents:
-
- - Overview.
- - Key identification.
- - Accessing asymmetric keys.
- - Signature verification.
- - Asymmetric key subtypes.
- - Instantiation data parsers.
- - Keyring link restrictions.
-
-
-========
-OVERVIEW
-========
-
-The "asymmetric" key type is designed to be a container for the keys used in
-public-key cryptography, without imposing any particular restrictions on the
-form or mechanism of the cryptography or form of the key.
-
-The asymmetric key is given a subtype that defines what sort of data is
-associated with the key and provides operations to describe and destroy it.
-However, no requirement is made that the key data actually be stored in the
-key.
-
-A completely in-kernel key retention and operation subtype can be defined, but
-it would also be possible to provide access to cryptographic hardware (such as
-a TPM) that might be used to both retain the relevant key and perform
-operations using that key. In such a case, the asymmetric key would then
-merely be an interface to the TPM driver.
-
-Also provided is the concept of a data parser. Data parsers are responsible
-for extracting information from the blobs of data passed to the instantiation
-function. The first data parser that recognises the blob gets to set the
-subtype of the key and define the operations that can be done on that key.
-
-A data parser may interpret the data blob as containing the bits representing a
-key, or it may interpret it as a reference to a key held somewhere else in the
-system (for example, a TPM).
-
-
-==================
-KEY IDENTIFICATION
-==================
-
-If a key is added with an empty name, the instantiation data parsers are given
-the opportunity to pre-parse a key and to determine the description the key
-should be given from the content of the key.
-
-This can then be used to refer to the key, either by complete match or by
-partial match. The key type may also use other criteria to refer to a key.
-
-The asymmetric key type's match function can then perform a wider range of
-comparisons than just the straightforward comparison of the description with
-the criterion string:
-
- (1) If the criterion string is of the form "id:<hexdigits>" then the match
- function will examine a key's fingerprint to see if the hex digits given
- after the "id:" match the tail. For instance:
-
- keyctl search @s asymmetric id:5acc2142
-
- will match a key with fingerprint:
-
- 1A00 2040 7601 7889 DE11 882C 3823 04AD 5ACC 2142
-
- (2) If the criterion string is of the form "<subtype>:<hexdigits>" then the
- match will match the ID as in (1), but with the added restriction that
- only keys of the specified subtype (e.g. tpm) will be matched. For
- instance:
-
- keyctl search @s asymmetric tpm:5acc2142
-
-Looking in /proc/keys, the last 8 hex digits of the key fingerprint are
-displayed, along with the subtype:
-
- 1a39e171 I----- 1 perm 3f010000 0 0 asymmetric modsign.0: DSA 5acc2142 []
-
-
-=========================
-ACCESSING ASYMMETRIC KEYS
-=========================
-
-For general access to asymmetric keys from within the kernel, the following
-inclusion is required:
-
- #include <crypto/public_key.h>
-
-This gives access to functions for dealing with asymmetric / public keys.
-Three enums are defined there for representing public-key cryptography
-algorithms:
-
- enum pkey_algo
-
-digest algorithms used by those:
-
- enum pkey_hash_algo
-
-and key identifier representations:
-
- enum pkey_id_type
-
-Note that the key type representation types are required because key
-identifiers from different standards aren't necessarily compatible. For
-instance, PGP generates key identifiers by hashing the key data plus some
-PGP-specific metadata, whereas X.509 has arbitrary certificate identifiers.
-
-The operations defined upon a key are:
-
- (1) Signature verification.
-
-Other operations are possible (such as encryption) with the same key data
-required for verification, but not currently supported, and others
-(eg. decryption and signature generation) require extra key data.
-
-
-SIGNATURE VERIFICATION
-----------------------
-
-An operation is provided to perform cryptographic signature verification, using
-an asymmetric key to provide or to provide access to the public key.
-
- int verify_signature(const struct key *key,
- const struct public_key_signature *sig);
-
-The caller must have already obtained the key from some source and can then use
-it to check the signature. The caller must have parsed the signature and
-transferred the relevant bits to the structure pointed to by sig.
-
- struct public_key_signature {
- u8 *digest;
- u8 digest_size;
- enum pkey_hash_algo pkey_hash_algo : 8;
- u8 nr_mpi;
- union {
- MPI mpi[2];
- ...
- };
- };
-
-The algorithm used must be noted in sig->pkey_hash_algo, and all the MPIs that
-make up the actual signature must be stored in sig->mpi[] and the count of MPIs
-placed in sig->nr_mpi.
-
-In addition, the data must have been digested by the caller and the resulting
-hash must be pointed to by sig->digest and the size of the hash be placed in
-sig->digest_size.
-
-The function will return 0 upon success or -EKEYREJECTED if the signature
-doesn't match.
-
-The function may also return -ENOTSUPP if an unsupported public-key algorithm
-or public-key/hash algorithm combination is specified or the key doesn't
-support the operation; -EBADMSG or -ERANGE if some of the parameters have weird
-data; or -ENOMEM if an allocation can't be performed. -EINVAL can be returned
-if the key argument is the wrong type or is incompletely set up.
-
-
-=======================
-ASYMMETRIC KEY SUBTYPES
-=======================
-
-Asymmetric keys have a subtype that defines the set of operations that can be
-performed on that key and that determines what data is attached as the key
-payload. The payload format is entirely at the whim of the subtype.
-
-The subtype is selected by the key data parser and the parser must initialise
-the data required for it. The asymmetric key retains a reference on the
-subtype module.
-
-The subtype definition structure can be found in:
-
- #include <keys/asymmetric-subtype.h>
-
-and looks like the following:
-
- struct asymmetric_key_subtype {
- struct module *owner;
- const char *name;
-
- void (*describe)(const struct key *key, struct seq_file *m);
- void (*destroy)(void *payload);
- int (*query)(const struct kernel_pkey_params *params,
- struct kernel_pkey_query *info);
- int (*eds_op)(struct kernel_pkey_params *params,
- const void *in, void *out);
- int (*verify_signature)(const struct key *key,
- const struct public_key_signature *sig);
- };
-
-Asymmetric keys point to this with their payload[asym_subtype] member.
-
-The owner and name fields should be set to the owning module and the name of
-the subtype. Currently, the name is only used for print statements.
-
-There are a number of operations defined by the subtype:
-
- (1) describe().
-
- Mandatory. This allows the subtype to display something in /proc/keys
- against the key. For instance the name of the public key algorithm type
- could be displayed. The key type will display the tail of the key
- identity string after this.
-
- (2) destroy().
-
- Mandatory. This should free the memory associated with the key. The
- asymmetric key will look after freeing the fingerprint and releasing the
- reference on the subtype module.
-
- (3) query().
-
- Mandatory. This is a function for querying the capabilities of a key.
-
- (4) eds_op().
-
- Optional. This is the entry point for the encryption, decryption and
- signature creation operations (which are distinguished by the operation ID
- in the parameter struct). The subtype may do anything it likes to
- implement an operation, including offloading to hardware.
-
- (5) verify_signature().
-
- Optional. This is the entry point for signature verification. The
- subtype may do anything it likes to implement an operation, including
- offloading to hardware.
-
-
-==========================
-INSTANTIATION DATA PARSERS
-==========================
-
-The asymmetric key type doesn't generally want to store or to deal with a raw
-blob of data that holds the key data. It would have to parse it and error
-check it each time it wanted to use it. Further, the contents of the blob may
-have various checks that can be performed on it (eg. self-signatures, validity
-dates) and may contain useful data about the key (identifiers, capabilities).
-
-Also, the blob may represent a pointer to some hardware containing the key
-rather than the key itself.
-
-Examples of blob formats for which parsers could be implemented include:
-
- - OpenPGP packet stream [RFC 4880].
- - X.509 ASN.1 stream.
- - Pointer to TPM key.
- - Pointer to UEFI key.
- - PKCS#8 private key [RFC 5208].
- - PKCS#5 encrypted private key [RFC 2898].
-
-During key instantiation each parser in the list is tried until one doesn't
-return -EBADMSG.
-
-The parser definition structure can be found in:
-
- #include <keys/asymmetric-parser.h>
-
-and looks like the following:
-
- struct asymmetric_key_parser {
- struct module *owner;
- const char *name;
-
- int (*parse)(struct key_preparsed_payload *prep);
- };
-
-The owner and name fields should be set to the owning module and the name of
-the parser.
-
-There is currently only a single operation defined by the parser, and it is
-mandatory:
-
- (1) parse().
-
- This is called to preparse the key from the key creation and update paths.
- In particular, it is called during the key creation _before_ a key is
- allocated, and as such, is permitted to provide the key's description in
- the case that the caller declines to do so.
-
- The caller passes a pointer to the following struct with all of the fields
- cleared, except for data, datalen and quotalen [see
- Documentation/security/keys/core.rst].
-
- struct key_preparsed_payload {
- char *description;
- void *payload[4];
- const void *data;
- size_t datalen;
- size_t quotalen;
- };
-
- The instantiation data is in a blob pointed to by data and is datalen in
- size. The parse() function is not permitted to change these two values at
- all, and shouldn't change any of the other values _unless_ they are
- recognise the blob format and will not return -EBADMSG to indicate it is
- not theirs.
-
- If the parser is happy with the blob, it should propose a description for
- the key and attach it to ->description, ->payload[asym_subtype] should be
- set to point to the subtype to be used, ->payload[asym_crypto] should be
- set to point to the initialised data for that subtype,
- ->payload[asym_key_ids] should point to one or more hex fingerprints and
- quotalen should be updated to indicate how much quota this key should
- account for.
-
- When clearing up, the data attached to ->payload[asym_key_ids] and
- ->description will be kfree()'d and the data attached to
- ->payload[asm_crypto] will be passed to the subtype's ->destroy() method
- to be disposed of. A module reference for the subtype pointed to by
- ->payload[asym_subtype] will be put.
-
-
- If the data format is not recognised, -EBADMSG should be returned. If it
- is recognised, but the key cannot for some reason be set up, some other
- negative error code should be returned. On success, 0 should be returned.
-
- The key's fingerprint string may be partially matched upon. For a
- public-key algorithm such as RSA and DSA this will likely be a printable
- hex version of the key's fingerprint.
-
-Functions are provided to register and unregister parsers:
-
- int register_asymmetric_key_parser(struct asymmetric_key_parser *parser);
- void unregister_asymmetric_key_parser(struct asymmetric_key_parser *subtype);
-
-Parsers may not have the same name. The names are otherwise only used for
-displaying in debugging messages.
-
-
-=========================
-KEYRING LINK RESTRICTIONS
-=========================
-
-Keyrings created from userspace using add_key can be configured to check the
-signature of the key being linked. Keys without a valid signature are not
-allowed to link.
-
-Several restriction methods are available:
-
- (1) Restrict using the kernel builtin trusted keyring
-
- - Option string used with KEYCTL_RESTRICT_KEYRING:
- - "builtin_trusted"
-
- The kernel builtin trusted keyring will be searched for the signing key.
- If the builtin trusted keyring is not configured, all links will be
- rejected. The ca_keys kernel parameter also affects which keys are used
- for signature verification.
-
- (2) Restrict using the kernel builtin and secondary trusted keyrings
-
- - Option string used with KEYCTL_RESTRICT_KEYRING:
- - "builtin_and_secondary_trusted"
-
- The kernel builtin and secondary trusted keyrings will be searched for the
- signing key. If the secondary trusted keyring is not configured, this
- restriction will behave like the "builtin_trusted" option. The ca_keys
- kernel parameter also affects which keys are used for signature
- verification.
-
- (3) Restrict using a separate key or keyring
-
- - Option string used with KEYCTL_RESTRICT_KEYRING:
- - "key_or_keyring:<key or keyring serial number>[:chain]"
-
- Whenever a key link is requested, the link will only succeed if the key
- being linked is signed by one of the designated keys. This key may be
- specified directly by providing a serial number for one asymmetric key, or
- a group of keys may be searched for the signing key by providing the
- serial number for a keyring.
-
- When the "chain" option is provided at the end of the string, the keys
- within the destination keyring will also be searched for signing keys.
- This allows for verification of certificate chains by adding each
- certificate in order (starting closest to the root) to a keyring. For
- instance, one keyring can be populated with links to a set of root
- certificates, with a separate, restricted keyring set up for each
- certificate chain to be validated:
-
- # Create and populate a keyring for root certificates
- root_id=`keyctl add keyring root-certs "" @s`
- keyctl padd asymmetric "" $root_id < root1.cert
- keyctl padd asymmetric "" $root_id < root2.cert
-
- # Create and restrict a keyring for the certificate chain
- chain_id=`keyctl add keyring chain "" @s`
- keyctl restrict_keyring $chain_id asymmetric key_or_keyring:$root_id:chain
-
- # Attempt to add each certificate in the chain, starting with the
- # certificate closest to the root.
- keyctl padd asymmetric "" $chain_id < intermediateA.cert
- keyctl padd asymmetric "" $chain_id < intermediateB.cert
- keyctl padd asymmetric "" $chain_id < end-entity.cert
-
- If the final end-entity certificate is successfully added to the "chain"
- keyring, we can be certain that it has a valid signing chain going back to
- one of the root certificates.
-
- A single keyring can be used to verify a chain of signatures by
- restricting the keyring after linking the root certificate:
-
- # Create a keyring for the certificate chain and add the root
- chain2_id=`keyctl add keyring chain2 "" @s`
- keyctl padd asymmetric "" $chain2_id < root1.cert
-
- # Restrict the keyring that already has root1.cert linked. The cert
- # will remain linked by the keyring.
- keyctl restrict_keyring $chain2_id asymmetric key_or_keyring:0:chain
-
- # Attempt to add each certificate in the chain, starting with the
- # certificate closest to the root.
- keyctl padd asymmetric "" $chain2_id < intermediateA.cert
- keyctl padd asymmetric "" $chain2_id < intermediateB.cert
- keyctl padd asymmetric "" $chain2_id < end-entity.cert
-
- If the final end-entity certificate is successfully added to the "chain2"
- keyring, we can be certain that there is a valid signing chain going back
- to the root certificate that was added before the keyring was restricted.
-
-
-In all of these cases, if the signing key is found the signature of the key to
-be linked will be verified using the signing key. The requested key is added
-to the keyring only if the signature is successfully verified. -ENOKEY is
-returned if the parent certificate could not be found, or -EKEYREJECTED is
-returned if the signature check fails or the key is blacklisted. Other errors
-may be returned if the signature check could not be performed.
diff --git a/Documentation/crypto/async-tx-api.rst b/Documentation/crypto/async-tx-api.rst
new file mode 100644
index 000000000000..f88a7809385e
--- /dev/null
+++ b/Documentation/crypto/async-tx-api.rst
@@ -0,0 +1,270 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Asynchronous Transfers/Transforms API
+=====================================
+
+.. Contents
+
+ 1. INTRODUCTION
+
+ 2 GENEALOGY
+
+ 3 USAGE
+ 3.1 General format of the API
+ 3.2 Supported operations
+ 3.3 Descriptor management
+ 3.4 When does the operation execute?
+ 3.5 When does the operation complete?
+ 3.6 Constraints
+ 3.7 Example
+
+ 4 DMAENGINE DRIVER DEVELOPER NOTES
+ 4.1 Conformance points
+ 4.2 "My application needs exclusive control of hardware channels"
+
+ 5 SOURCE
+
+1. Introduction
+===============
+
+The async_tx API provides methods for describing a chain of asynchronous
+bulk memory transfers/transforms with support for inter-transactional
+dependencies. It is implemented as a dmaengine client that smooths over
+the details of different hardware offload engine implementations. Code
+that is written to the API can optimize for asynchronous operation and
+the API will fit the chain of operations to the available offload
+resources.
+
+2.Genealogy
+===========
+
+The API was initially designed to offload the memory copy and
+xor-parity-calculations of the md-raid5 driver using the offload engines
+present in the Intel(R) Xscale series of I/O processors. It also built
+on the 'dmaengine' layer developed for offloading memory copies in the
+network stack using Intel(R) I/OAT engines. The following design
+features surfaced as a result:
+
+1. implicit synchronous path: users of the API do not need to know if
+ the platform they are running on has offload capabilities. The
+ operation will be offloaded when an engine is available and carried out
+ in software otherwise.
+2. cross channel dependency chains: the API allows a chain of dependent
+ operations to be submitted, like xor->copy->xor in the raid5 case. The
+ API automatically handles cases where the transition from one operation
+ to another implies a hardware channel switch.
+3. dmaengine extensions to support multiple clients and operation types
+ beyond 'memcpy'
+
+3. Usage
+========
+
+3.1 General format of the API
+-----------------------------
+
+::
+
+ struct dma_async_tx_descriptor *
+ async_<operation>(<op specific parameters>, struct async_submit_ctl *submit)
+
+3.2 Supported operations
+------------------------
+
+======== ====================================================================
+memcpy memory copy between a source and a destination buffer
+memset fill a destination buffer with a byte value
+xor xor a series of source buffers and write the result to a
+ destination buffer
+xor_val xor a series of source buffers and set a flag if the
+ result is zero. The implementation attempts to prevent
+ writes to memory
+pq generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val validate that a p and or q buffer are in sync with a given series of
+ sources
+datap (raid6_datap_recov) recover a raid6 data block and the p block
+ from the given sources
+2data (raid6_2data_recov) recover 2 raid6 data blocks from the given
+ sources
+======== ====================================================================
+
+3.3 Descriptor management
+-------------------------
+
+The return value is non-NULL and points to a 'descriptor' when the operation
+has been queued to execute asynchronously. Descriptors are recycled
+resources, under control of the offload engine driver, to be reused as
+operations complete. When an application needs to submit a chain of
+operations it must guarantee that the descriptor is not automatically recycled
+before the dependency is submitted. This requires that all descriptors be
+acknowledged by the application before the offload engine driver is allowed to
+recycle (or free) the descriptor. A descriptor can be acked by one of the
+following methods:
+
+1. setting the ASYNC_TX_ACK flag if no child operations are to be submitted
+2. submitting an unacknowledged descriptor as a dependency to another
+ async_tx call will implicitly set the acknowledged state.
+3. calling async_tx_ack() on the descriptor.
+
+3.4 When does the operation execute?
+------------------------------------
+
+Operations do not immediately issue after return from the
+async_<operation> call. Offload engine drivers batch operations to
+improve performance by reducing the number of mmio cycles needed to
+manage the channel. Once a driver-specific threshold is met the driver
+automatically issues pending operations. An application can force this
+event by calling async_tx_issue_pending_all(). This operates on all
+channels since the application has no knowledge of channel to operation
+mapping.
+
+3.5 When does the operation complete?
+-------------------------------------
+
+There are two methods for an application to learn about the completion
+of an operation.
+
+1. Call dma_wait_for_async_tx(). This call causes the CPU to spin while
+ it polls for the completion of the operation. It handles dependency
+ chains and issuing pending operations.
+2. Specify a completion callback. The callback routine runs in tasklet
+ context if the offload engine driver supports interrupts, or it is
+ called in application context if the operation is carried out
+ synchronously in software. The callback can be set in the call to
+ async_<operation>, or when the application needs to submit a chain of
+ unknown length it can use the async_trigger_callback() routine to set a
+ completion interrupt/callback at the end of the chain.
+
+3.6 Constraints
+---------------
+
+1. Calls to async_<operation> are not permitted in IRQ context. Other
+ contexts are permitted provided constraint #2 is not violated.
+2. Completion callback routines cannot submit new operations. This
+ results in recursion in the synchronous case and spin_locks being
+ acquired twice in the asynchronous case.
+
+3.7 Example
+-----------
+
+Perform a xor->copy->xor operation where each operation depends on the
+result from the previous operation::
+
+ #include <linux/async_tx.h>
+
+ static void callback(void *param)
+ {
+ complete(param);
+ }
+
+ #define NDISKS 2
+
+ static void run_xor_copy_xor(struct page **xor_srcs,
+ struct page *xor_dest,
+ size_t xor_len,
+ struct page *copy_src,
+ struct page *copy_dest,
+ size_t copy_len)
+ {
+ struct dma_async_tx_descriptor *tx;
+ struct async_submit_ctl submit;
+ addr_conv_t addr_conv[NDISKS];
+ struct completion cmp;
+
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+ addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, NDISKS, xor_len, &submit);
+
+ submit.depend_tx = tx;
+ tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+ callback, &cmp, addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, NDISKS, xor_len, &submit);
+
+ async_tx_issue_pending_all();
+
+ wait_for_completion(&cmp);
+ }
+
+See include/linux/async_tx.h for more information on the flags. See the
+ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
+implementation examples.
+
+4. Driver Development Notes
+===========================
+
+4.1 Conformance points
+----------------------
+
+There are a few conformance points required in dmaengine drivers to
+accommodate assumptions made by applications using the async_tx API:
+
+1. Completion callbacks are expected to happen in tasklet context
+2. dma_async_tx_descriptor fields are never manipulated in IRQ context
+3. Use async_tx_run_dependencies() in the descriptor clean up path to
+ handle submission of dependent operations
+
+4.2 "My application needs exclusive control of hardware channels"
+-----------------------------------------------------------------
+
+Primarily this requirement arises from cases where a DMA engine driver
+is being used to support device-to-memory operations. A channel that is
+performing these operations cannot, for many platform specific reasons,
+be shared. For these cases the dma_request_channel() interface is
+provided.
+
+The interface is::
+
+ struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+ dma_filter_fn filter_fn,
+ void *filter_param);
+
+Where dma_filter_fn is defined as::
+
+ typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+When the optional 'filter_fn' parameter is set to NULL
+dma_request_channel simply returns the first channel that satisfies the
+capability mask. Otherwise, when the mask parameter is insufficient for
+specifying the necessary channel, the filter_fn routine can be used to
+disposition the available channels in the system. The filter_fn routine
+is called once for each free channel in the system. Upon seeing a
+suitable channel filter_fn returns DMA_ACK which flags that channel to
+be the return value from dma_request_channel. A channel allocated via
+this interface is exclusive to the caller, until dma_release_channel()
+is called.
+
+The DMA_PRIVATE capability flag is used to tag dma devices that should
+not be used by the general-purpose allocator. It can be set at
+initialization time if it is known that a channel will always be
+private. Alternatively, it is set when dma_request_channel() finds an
+unused "public" channel.
+
+A couple caveats to note when implementing a driver and consumer:
+
+1. Once a channel has been privately allocated it will no longer be
+ considered by the general-purpose allocator even after a call to
+ dma_release_channel().
+2. Since capabilities are specified at the device level a dma_device
+ with multiple channels will either have all channels public, or all
+ channels private.
+
+5. Source
+---------
+
+include/linux/dmaengine.h:
+ core header file for DMA drivers and api users
+drivers/dma/dmaengine.c:
+ offload engine channel management routines
+drivers/dma/:
+ location for offload engine drivers
+include/linux/async_tx.h:
+ core header file for the async_tx api
+crypto/async_tx/async_tx.c:
+ async_tx interface to dmaengine and common code
+crypto/async_tx/async_memcpy.c:
+ copy offload
+crypto/async_tx/async_xor.c:
+ xor and xor zero sum offload
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
deleted file mode 100644
index 7bf1be20d93a..000000000000
--- a/Documentation/crypto/async-tx-api.txt
+++ /dev/null
@@ -1,225 +0,0 @@
- Asynchronous Transfers/Transforms API
-
-1 INTRODUCTION
-
-2 GENEALOGY
-
-3 USAGE
-3.1 General format of the API
-3.2 Supported operations
-3.3 Descriptor management
-3.4 When does the operation execute?
-3.5 When does the operation complete?
-3.6 Constraints
-3.7 Example
-
-4 DMAENGINE DRIVER DEVELOPER NOTES
-4.1 Conformance points
-4.2 "My application needs exclusive control of hardware channels"
-
-5 SOURCE
-
----
-
-1 INTRODUCTION
-
-The async_tx API provides methods for describing a chain of asynchronous
-bulk memory transfers/transforms with support for inter-transactional
-dependencies. It is implemented as a dmaengine client that smooths over
-the details of different hardware offload engine implementations. Code
-that is written to the API can optimize for asynchronous operation and
-the API will fit the chain of operations to the available offload
-resources.
-
-2 GENEALOGY
-
-The API was initially designed to offload the memory copy and
-xor-parity-calculations of the md-raid5 driver using the offload engines
-present in the Intel(R) Xscale series of I/O processors. It also built
-on the 'dmaengine' layer developed for offloading memory copies in the
-network stack using Intel(R) I/OAT engines. The following design
-features surfaced as a result:
-1/ implicit synchronous path: users of the API do not need to know if
- the platform they are running on has offload capabilities. The
- operation will be offloaded when an engine is available and carried out
- in software otherwise.
-2/ cross channel dependency chains: the API allows a chain of dependent
- operations to be submitted, like xor->copy->xor in the raid5 case. The
- API automatically handles cases where the transition from one operation
- to another implies a hardware channel switch.
-3/ dmaengine extensions to support multiple clients and operation types
- beyond 'memcpy'
-
-3 USAGE
-
-3.1 General format of the API:
-struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
-
-3.2 Supported operations:
-memcpy - memory copy between a source and a destination buffer
-memset - fill a destination buffer with a byte value
-xor - xor a series of source buffers and write the result to a
- destination buffer
-xor_val - xor a series of source buffers and set a flag if the
- result is zero. The implementation attempts to prevent
- writes to memory
-pq - generate the p+q (raid6 syndrome) from a series of source buffers
-pq_val - validate that a p and or q buffer are in sync with a given series of
- sources
-datap - (raid6_datap_recov) recover a raid6 data block and the p block
- from the given sources
-2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
- sources
-
-3.3 Descriptor management:
-The return value is non-NULL and points to a 'descriptor' when the operation
-has been queued to execute asynchronously. Descriptors are recycled
-resources, under control of the offload engine driver, to be reused as
-operations complete. When an application needs to submit a chain of
-operations it must guarantee that the descriptor is not automatically recycled
-before the dependency is submitted. This requires that all descriptors be
-acknowledged by the application before the offload engine driver is allowed to
-recycle (or free) the descriptor. A descriptor can be acked by one of the
-following methods:
-1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ submitting an unacknowledged descriptor as a dependency to another
- async_tx call will implicitly set the acknowledged state.
-3/ calling async_tx_ack() on the descriptor.
-
-3.4 When does the operation execute?
-Operations do not immediately issue after return from the
-async_<operation> call. Offload engine drivers batch operations to
-improve performance by reducing the number of mmio cycles needed to
-manage the channel. Once a driver-specific threshold is met the driver
-automatically issues pending operations. An application can force this
-event by calling async_tx_issue_pending_all(). This operates on all
-channels since the application has no knowledge of channel to operation
-mapping.
-
-3.5 When does the operation complete?
-There are two methods for an application to learn about the completion
-of an operation.
-1/ Call dma_wait_for_async_tx(). This call causes the CPU to spin while
- it polls for the completion of the operation. It handles dependency
- chains and issuing pending operations.
-2/ Specify a completion callback. The callback routine runs in tasklet
- context if the offload engine driver supports interrupts, or it is
- called in application context if the operation is carried out
- synchronously in software. The callback can be set in the call to
- async_<operation>, or when the application needs to submit a chain of
- unknown length it can use the async_trigger_callback() routine to set a
- completion interrupt/callback at the end of the chain.
-
-3.6 Constraints:
-1/ Calls to async_<operation> are not permitted in IRQ context. Other
- contexts are permitted provided constraint #2 is not violated.
-2/ Completion callback routines cannot submit new operations. This
- results in recursion in the synchronous case and spin_locks being
- acquired twice in the asynchronous case.
-
-3.7 Example:
-Perform a xor->copy->xor operation where each operation depends on the
-result from the previous operation:
-
-void callback(void *param)
-{
- struct completion *cmp = param;
-
- complete(cmp);
-}
-
-void run_xor_copy_xor(struct page **xor_srcs,
- int xor_src_cnt,
- struct page *xor_dest,
- size_t xor_len,
- struct page *copy_src,
- struct page *copy_dest,
- size_t copy_len)
-{
- struct dma_async_tx_descriptor *tx;
- addr_conv_t addr_conv[xor_src_cnt];
- struct async_submit_ctl submit;
- addr_conv_t addr_conv[NDISKS];
- struct completion cmp;
-
- init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
- addr_conv);
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
-
- submit->depend_tx = tx;
- tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
-
- init_completion(&cmp);
- init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
- callback, &cmp, addr_conv);
- tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
-
- async_tx_issue_pending_all();
-
- wait_for_completion(&cmp);
-}
-
-See include/linux/async_tx.h for more information on the flags. See the
-ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
-implementation examples.
-
-4 DRIVER DEVELOPMENT NOTES
-
-4.1 Conformance points:
-There are a few conformance points required in dmaengine drivers to
-accommodate assumptions made by applications using the async_tx API:
-1/ Completion callbacks are expected to happen in tasklet context
-2/ dma_async_tx_descriptor fields are never manipulated in IRQ context
-3/ Use async_tx_run_dependencies() in the descriptor clean up path to
- handle submission of dependent operations
-
-4.2 "My application needs exclusive control of hardware channels"
-Primarily this requirement arises from cases where a DMA engine driver
-is being used to support device-to-memory operations. A channel that is
-performing these operations cannot, for many platform specific reasons,
-be shared. For these cases the dma_request_channel() interface is
-provided.
-
-The interface is:
-struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
- dma_filter_fn filter_fn,
- void *filter_param);
-
-Where dma_filter_fn is defined as:
-typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
-
-When the optional 'filter_fn' parameter is set to NULL
-dma_request_channel simply returns the first channel that satisfies the
-capability mask. Otherwise, when the mask parameter is insufficient for
-specifying the necessary channel, the filter_fn routine can be used to
-disposition the available channels in the system. The filter_fn routine
-is called once for each free channel in the system. Upon seeing a
-suitable channel filter_fn returns DMA_ACK which flags that channel to
-be the return value from dma_request_channel. A channel allocated via
-this interface is exclusive to the caller, until dma_release_channel()
-is called.
-
-The DMA_PRIVATE capability flag is used to tag dma devices that should
-not be used by the general-purpose allocator. It can be set at
-initialization time if it is known that a channel will always be
-private. Alternatively, it is set when dma_request_channel() finds an
-unused "public" channel.
-
-A couple caveats to note when implementing a driver and consumer:
-1/ Once a channel has been privately allocated it will no longer be
- considered by the general-purpose allocator even after a call to
- dma_release_channel().
-2/ Since capabilities are specified at the device level a dma_device
- with multiple channels will either have all channels public, or all
- channels private.
-
-5 SOURCE
-
-include/linux/dmaengine.h: core header file for DMA drivers and api users
-drivers/dma/dmaengine.c: offload engine channel management routines
-drivers/dma/: location for offload engine drivers
-include/linux/async_tx.h: core header file for the async_tx api
-crypto/async_tx/async_tx.c: async_tx interface to dmaengine and common code
-crypto/async_tx/async_memcpy.c: copy offload
-crypto/async_tx/async_xor.c: xor and xor zero sum offload
diff --git a/Documentation/crypto/conf.py b/Documentation/crypto/conf.py
deleted file mode 100644
index 4335d251ddf3..000000000000
--- a/Documentation/crypto/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Crypto API'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
- 'The kernel development community', 'manual'),
-]
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
index 1d56221dfe35..7ef850e28016 100644
--- a/Documentation/crypto/crypto_engine.rst
+++ b/Documentation/crypto/crypto_engine.rst
@@ -1,50 +1,80 @@
-=============
-CRYPTO ENGINE
+.. SPDX-License-Identifier: GPL-2.0
+
+Crypto Engine
=============
Overview
--------
-The crypto engine API (CE), is a crypto queue manager.
+The crypto engine (CE) API is a crypto queue manager.
Requirement
-----------
-You have to put at start of your tfm_ctx the struct crypto_engine_ctx::
+You must put, at the start of your transform context your_tfm_ctx, the structure
+crypto_engine:
+
+::
- struct your_tfm_ctx {
- struct crypto_engine_ctx enginectx;
- ...
- };
+ struct your_tfm_ctx {
+ struct crypto_engine engine;
+ ...
+ };
-Why: Since CE manage only crypto_async_request, it cannot know the underlying
-request_type and so have access only on the TFM.
-So using container_of for accessing __ctx is impossible.
-Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
-so it must assume that crypto_engine_ctx is at start of it.
+The crypto engine only manages asynchronous requests in the form of
+crypto_async_request. It cannot know the underlying request type and thus only
+has access to the transform structure. It is not possible to access the context
+using container_of. In addition, the engine knows nothing about your
+structure "``struct your_tfm_ctx``". The engine assumes (requires) the placement
+of the known member ``struct crypto_engine`` at the beginning.
Order of operations
-------------------
-You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
-And start it via crypto_engine_start().
-
-Before transferring any request, you have to fill the enginectx.
-- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
-- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
-- do_one_request: (taking a function pointer) Do encryption for current request
-
-Note: that those three functions get the crypto_async_request associated with the received request.
-So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
-
-When your driver receive a crypto_request, you have to transfer it to
-the cryptoengine via one of:
-- crypto_transfer_ablkcipher_request_to_engine()
-- crypto_transfer_aead_request_to_engine()
-- crypto_transfer_akcipher_request_to_engine()
-- crypto_transfer_hash_request_to_engine()
-- crypto_transfer_skcipher_request_to_engine()
-
-At the end of the request process, a call to one of the following function is needed:
-- crypto_finalize_ablkcipher_request
-- crypto_finalize_aead_request
-- crypto_finalize_akcipher_request
-- crypto_finalize_hash_request
-- crypto_finalize_skcipher_request
+You are required to obtain a struct crypto_engine via ``crypto_engine_alloc_init()``.
+Start it via ``crypto_engine_start()``. When finished with your work, shut down the
+engine using ``crypto_engine_stop()`` and destroy the engine with
+``crypto_engine_exit()``.
+
+Before transferring any request, you have to fill the context enginectx by
+providing functions for the following:
+
+* ``prepare_cipher_request``/``prepare_hash_request``: Called before each
+ corresponding request is performed. If some processing or other preparatory
+ work is required, do it here.
+
+* ``unprepare_cipher_request``/``unprepare_hash_request``: Called after each
+ request is handled. Clean up / undo what was done in the prepare function.
+
+* ``cipher_one_request``/``hash_one_request``: Handle the current request by
+ performing the operation.
+
+Note that these functions access the crypto_async_request structure
+associated with the received request. You are able to retrieve the original
+request by using:
+
+::
+
+ container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receives a crypto_request, you must to transfer it to
+the crypto engine via one of:
+
+* crypto_transfer_aead_request_to_engine()
+
+* crypto_transfer_akcipher_request_to_engine()
+
+* crypto_transfer_hash_request_to_engine()
+
+* crypto_transfer_kpp_request_to_engine()
+
+* crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following functions is needed:
+
+* crypto_finalize_aead_request()
+
+* crypto_finalize_akcipher_request()
+
+* crypto_finalize_hash_request()
+
+* crypto_finalize_kpp_request()
+
+* crypto_finalize_skcipher_request()
diff --git a/Documentation/crypto/descore-readme.rst b/Documentation/crypto/descore-readme.rst
new file mode 100644
index 000000000000..45bd9c8babf4
--- /dev/null
+++ b/Documentation/crypto/descore-readme.rst
@@ -0,0 +1,414 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================
+Fast & Portable DES encryption & decryption
+===========================================
+
+.. note::
+
+ Below is the original README file from the descore.shar package,
+ converted to ReST format.
+
+------------------------------------------------------------------------------
+
+des - fast & portable DES encryption & decryption.
+
+Copyright |copy| 1992 Dana L. How
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Author's address: how@isl.stanford.edu
+
+.. README,v 1.15 1992/05/20 00:25:32 how E
+
+==>> To compile after untarring/unsharring, just ``make`` <<==
+
+This package was designed with the following goals:
+
+1. Highest possible encryption/decryption PERFORMANCE.
+2. PORTABILITY to any byte-addressable host with a 32bit unsigned C type
+3. Plug-compatible replacement for KERBEROS's low-level routines.
+
+This second release includes a number of performance enhancements for
+register-starved machines. My discussions with Richard Outerbridge,
+71755.204@compuserve.com, sparked a number of these enhancements.
+
+To more rapidly understand the code in this package, inspect desSmallFips.i
+(created by typing ``make``) BEFORE you tackle desCode.h. The latter is set
+up in a parameterized fashion so it can easily be modified by speed-daemon
+hackers in pursuit of that last microsecond. You will find it more
+illuminating to inspect one specific implementation,
+and then move on to the common abstract skeleton with this one in mind.
+
+
+performance comparison to other available des code which i could
+compile on a SPARCStation 1 (cc -O4, gcc -O2):
+
+this code (byte-order independent):
+
+ - 30us per encryption (options: 64k tables, no IP/FP)
+ - 33us per encryption (options: 64k tables, FIPS standard bit ordering)
+ - 45us per encryption (options: 2k tables, no IP/FP)
+ - 48us per encryption (options: 2k tables, FIPS standard bit ordering)
+ - 275us to set a new key (uses 1k of key tables)
+
+ this has the quickest encryption/decryption routines i've seen.
+ since i was interested in fast des filters rather than crypt(3)
+ and password cracking, i haven't really bothered yet to speed up
+ the key setting routine. also, i have no interest in re-implementing
+ all the other junk in the mit kerberos des library, so i've just
+ provided my routines with little stub interfaces so they can be
+ used as drop-in replacements with mit's code or any of the mit-
+ compatible packages below. (note that the first two timings above
+ are highly variable because of cache effects).
+
+kerberos des replacement from australia (version 1.95):
+
+ - 53us per encryption (uses 2k of tables)
+ - 96us to set a new key (uses 2.25k of key tables)
+
+ so despite the author's inclusion of some of the performance
+ improvements i had suggested to him, this package's
+ encryption/decryption is still slower on the sparc and 68000.
+ more specifically, 19-40% slower on the 68020 and 11-35% slower
+ on the sparc, depending on the compiler;
+ in full gory detail (ALT_ECB is a libdes variant):
+
+ =============== ============== =============== =================
+ compiler machine desCore libdes ALT_ECB slower by
+ =============== ============== =============== =================
+ gcc 2.1 -O2 Sun 3/110 304 uS 369.5uS 461.8uS 22%
+ cc -O1 Sun 3/110 336 uS 436.6uS 399.3uS 19%
+ cc -O2 Sun 3/110 360 uS 532.4uS 505.1uS 40%
+ cc -O4 Sun 3/110 365 uS 532.3uS 505.3uS 38%
+ gcc 2.1 -O2 Sun 4/50 48 uS 53.4uS 57.5uS 11%
+ cc -O2 Sun 4/50 48 uS 64.6uS 64.7uS 35%
+ cc -O4 Sun 4/50 48 uS 64.7uS 64.9uS 35%
+ =============== ============== =============== =================
+
+ (my time measurements are not as accurate as his).
+
+ the comments in my first release of desCore on version 1.92:
+
+ - 68us per encryption (uses 2k of tables)
+ - 96us to set a new key (uses 2.25k of key tables)
+
+ this is a very nice package which implements the most important
+ of the optimizations which i did in my encryption routines.
+ it's a bit weak on common low-level optimizations which is why
+ it's 39%-106% slower. because he was interested in fast crypt(3) and
+ password-cracking applications, he also used the same ideas to
+ speed up the key-setting routines with impressive results.
+ (at some point i may do the same in my package). he also implements
+ the rest of the mit des library.
+
+ (code from eay@psych.psy.uq.oz.au via comp.sources.misc)
+
+fast crypt(3) package from denmark:
+
+ the des routine here is buried inside a loop to do the
+ crypt function and i didn't feel like ripping it out and measuring
+ performance. his code takes 26 sparc instructions to compute one
+ des iteration; above, Quick (64k) takes 21 and Small (2k) takes 37.
+ he claims to use 280k of tables but the iteration calculation seems
+ to use only 128k. his tables and code are machine independent.
+
+ (code from glad@daimi.aau.dk via alt.sources or comp.sources.misc)
+
+swedish reimplementation of Kerberos des library
+
+ - 108us per encryption (uses 34k worth of tables)
+ - 134us to set a new key (uses 32k of key tables to get this speed!)
+
+ the tables used seem to be machine-independent;
+ he seems to have included a lot of special case code
+ so that, e.g., ``long`` loads can be used instead of 4 ``char`` loads
+ when the machine's architecture allows it.
+
+ (code obtained from chalmers.se:pub/des)
+
+crack 3.3c package from england:
+
+ as in crypt above, the des routine is buried in a loop. it's
+ also very modified for crypt. his iteration code uses 16k
+ of tables and appears to be slow.
+
+ (code obtained from aem@aber.ac.uk via alt.sources or comp.sources.misc)
+
+``highly optimized`` and tweaked Kerberos/Athena code (byte-order dependent):
+
+ - 165us per encryption (uses 6k worth of tables)
+ - 478us to set a new key (uses <1k of key tables)
+
+ so despite the comments in this code, it was possible to get
+ faster code AND smaller tables, as well as making the tables
+ machine-independent.
+ (code obtained from prep.ai.mit.edu)
+
+UC Berkeley code (depends on machine-endedness):
+ - 226us per encryption
+ - 10848us to set a new key
+
+ table sizes are unclear, but they don't look very small
+ (code obtained from wuarchive.wustl.edu)
+
+
+motivation and history
+======================
+
+a while ago i wanted some des routines and the routines documented on sun's
+man pages either didn't exist or dumped core. i had heard of kerberos,
+and knew that it used des, so i figured i'd use its routines. but once
+i got it and looked at the code, it really set off a lot of pet peeves -
+it was too convoluted, the code had been written without taking
+advantage of the regular structure of operations such as IP, E, and FP
+(i.e. the author didn't sit down and think before coding),
+it was excessively slow, the author had attempted to clarify the code
+by adding MORE statements to make the data movement more ``consistent``
+instead of simplifying his implementation and cutting down on all data
+movement (in particular, his use of L1, R1, L2, R2), and it was full of
+idiotic ``tweaks`` for particular machines which failed to deliver significant
+speedups but which did obfuscate everything. so i took the test data
+from his verification program and rewrote everything else.
+
+a while later i ran across the great crypt(3) package mentioned above.
+the fact that this guy was computing 2 sboxes per table lookup rather
+than one (and using a MUCH larger table in the process) emboldened me to
+do the same - it was a trivial change from which i had been scared away
+by the larger table size. in his case he didn't realize you don't need to keep
+the working data in TWO forms, one for easy use of half the sboxes in
+indexing, the other for easy use of the other half; instead you can keep
+it in the form for the first half and use a simple rotate to get the other
+half. this means i have (almost) half the data manipulation and half
+the table size. in fairness though he might be encoding something particular
+to crypt(3) in his tables - i didn't check.
+
+i'm glad that i implemented it the way i did, because this C version is
+portable (the ifdef's are performance enhancements) and it is faster
+than versions hand-written in assembly for the sparc!
+
+
+porting notes
+=============
+
+one thing i did not want to do was write an enormous mess
+which depended on endedness and other machine quirks,
+and which necessarily produced different code and different lookup tables
+for different machines. see the kerberos code for an example
+of what i didn't want to do; all their endedness-specific ``optimizations``
+obfuscate the code and in the end were slower than a simpler machine
+independent approach. however, there are always some portability
+considerations of some kind, and i have included some options
+for varying numbers of register variables.
+perhaps some will still regard the result as a mess!
+
+1) i assume everything is byte addressable, although i don't actually
+ depend on the byte order, and that bytes are 8 bits.
+ i assume word pointers can be freely cast to and from char pointers.
+ note that 99% of C programs make these assumptions.
+ i always use unsigned char's if the high bit could be set.
+2) the typedef ``word`` means a 32 bit unsigned integral type.
+ if ``unsigned long`` is not 32 bits, change the typedef in desCore.h.
+ i assume sizeof(word) == 4 EVERYWHERE.
+
+the (worst-case) cost of my NOT doing endedness-specific optimizations
+in the data loading and storing code surrounding the key iterations
+is less than 12%. also, there is the added benefit that
+the input and output work areas do not need to be word-aligned.
+
+
+OPTIONAL performance optimizations
+==================================
+
+1) you should define one of ``i386,`` ``vax,`` ``mc68000,`` or ``sparc,``
+ whichever one is closest to the capabilities of your machine.
+ see the start of desCode.h to see exactly what this selection implies.
+ note that if you select the wrong one, the des code will still work;
+ these are just performance tweaks.
+2) for those with functional ``asm`` keywords: you should change the
+ ROR and ROL macros to use machine rotate instructions if you have them.
+ this will save 2 instructions and a temporary per use,
+ or about 32 to 40 instructions per en/decryption.
+
+ note that gcc is smart enough to translate the ROL/R macros into
+ machine rotates!
+
+these optimizations are all rather persnickety, yet with them you should
+be able to get performance equal to assembly-coding, except that:
+
+1) with the lack of a bit rotate operator in C, rotates have to be synthesized
+ from shifts. so access to ``asm`` will speed things up if your machine
+ has rotates, as explained above in (3) (not necessary if you use gcc).
+2) if your machine has less than 12 32-bit registers i doubt your compiler will
+ generate good code.
+
+ ``i386`` tries to configure the code for a 386 by only declaring 3 registers
+ (it appears that gcc can use ebx, esi and edi to hold register variables).
+ however, if you like assembly coding, the 386 does have 7 32-bit registers,
+ and if you use ALL of them, use ``scaled by 8`` address modes with displacement
+ and other tricks, you can get reasonable routines for DesQuickCore... with
+ about 250 instructions apiece. For DesSmall... it will help to rearrange
+ des_keymap, i.e., now the sbox # is the high part of the index and
+ the 6 bits of data is the low part; it helps to exchange these.
+
+ since i have no way to conveniently test it i have not provided my
+ shoehorned 386 version. note that with this release of desCore, gcc is able
+ to put everything in registers(!), and generate about 370 instructions apiece
+ for the DesQuickCore... routines!
+
+coding notes
+============
+
+the en/decryption routines each use 6 necessary register variables,
+with 4 being actively used at once during the inner iterations.
+if you don't have 4 register variables get a new machine.
+up to 8 more registers are used to hold constants in some configurations.
+
+i assume that the use of a constant is more expensive than using a register:
+
+a) additionally, i have tried to put the larger constants in registers.
+ registering priority was by the following:
+
+ - anything more than 12 bits (bad for RISC and CISC)
+ - greater than 127 in value (can't use movq or byte immediate on CISC)
+ - 9-127 (may not be able to use CISC shift immediate or add/sub quick),
+ - 1-8 were never registered, being the cheapest constants.
+
+b) the compiler may be too stupid to realize table and table+256 should
+ be assigned to different constant registers and instead repetitively
+ do the arithmetic, so i assign these to explicit ``m`` register variables
+ when possible and helpful.
+
+i assume that indexing is cheaper or equivalent to auto increment/decrement,
+where the index is 7 bits unsigned or smaller.
+this assumption is reversed for 68k and vax.
+
+i assume that addresses can be cheaply formed from two registers,
+or from a register and a small constant.
+for the 68000, the ``two registers and small offset`` form is used sparingly.
+all index scaling is done explicitly - no hidden shifts by log2(sizeof).
+
+the code is written so that even a dumb compiler
+should never need more than one hidden temporary,
+increasing the chance that everything will fit in the registers.
+KEEP THIS MORE SUBTLE POINT IN MIND IF YOU REWRITE ANYTHING.
+
+(actually, there are some code fragments now which do require two temps,
+but fixing it would either break the structure of the macros or
+require declaring another temporary).
+
+
+special efficient data format
+==============================
+
+bits are manipulated in this arrangement most of the time (S7 S5 S3 S1)::
+
+ 003130292827xxxx242322212019xxxx161514131211xxxx080706050403xxxx
+
+(the x bits are still there, i'm just emphasizing where the S boxes are).
+bits are rotated left 4 when computing S6 S4 S2 S0::
+
+ 282726252423xxxx201918171615xxxx121110090807xxxx040302010031xxxx
+
+the rightmost two bits are usually cleared so the lower byte can be used
+as an index into an sbox mapping table. the next two x'd bits are set
+to various values to access different parts of the tables.
+
+
+how to use the routines
+
+datatypes:
+ pointer to 8 byte area of type DesData
+ used to hold keys and input/output blocks to des.
+
+ pointer to 128 byte area of type DesKeys
+ used to hold full 768-bit key.
+ must be long-aligned.
+
+DesQuickInit()
+ call this before using any other routine with ``Quick`` in its name.
+ it generates the special 64k table these routines need.
+DesQuickDone()
+ frees this table
+
+DesMethod(m, k)
+ m points to a 128byte block, k points to an 8 byte des key
+ which must have odd parity (or -1 is returned) and which must
+ not be a (semi-)weak key (or -2 is returned).
+ normally DesMethod() returns 0.
+
+ m is filled in from k so that when one of the routines below
+ is called with m, the routine will act like standard des
+ en/decryption with the key k. if you use DesMethod,
+ you supply a standard 56bit key; however, if you fill in
+ m yourself, you will get a 768bit key - but then it won't
+ be standard. it's 768bits not 1024 because the least significant
+ two bits of each byte are not used. note that these two bits
+ will be set to magic constants which speed up the encryption/decryption
+ on some machines. and yes, each byte controls
+ a specific sbox during a specific iteration.
+
+ you really shouldn't use the 768bit format directly; i should
+ provide a routine that converts 128 6-bit bytes (specified in
+ S-box mapping order or something) into the right format for you.
+ this would entail some byte concatenation and rotation.
+
+Des{Small|Quick}{Fips|Core}{Encrypt|Decrypt}(d, m, s)
+ performs des on the 8 bytes at s into the 8 bytes at
+ ``d. (d,s: char *)``.
+
+ uses m as a 768bit key as explained above.
+
+ the Encrypt|Decrypt choice is obvious.
+
+ Fips|Core determines whether a completely standard FIPS initial
+ and final permutation is done; if not, then the data is loaded
+ and stored in a nonstandard bit order (FIPS w/o IP/FP).
+
+ Fips slows down Quick by 10%, Small by 9%.
+
+ Small|Quick determines whether you use the normal routine
+ or the crazy quick one which gobbles up 64k more of memory.
+ Small is 50% slower then Quick, but Quick needs 32 times as much
+ memory. Quick is included for programs that do nothing but DES,
+ e.g., encryption filters, etc.
+
+
+Getting it to compile on your machine
+=====================================
+
+there are no machine-dependencies in the code (see porting),
+except perhaps the ``now()`` macro in desTest.c.
+ALL generated tables are machine independent.
+you should edit the Makefile with the appropriate optimization flags
+for your compiler (MAX optimization).
+
+
+Speeding up kerberos (and/or its des library)
+=============================================
+
+note that i have included a kerberos-compatible interface in desUtil.c
+through the functions des_key_sched() and des_ecb_encrypt().
+to use these with kerberos or kerberos-compatible code put desCore.a
+ahead of the kerberos-compatible library on your linker's command line.
+you should not need to #include desCore.h; just include the header
+file provided with the kerberos library.
+
+Other uses
+==========
+
+the macros in desCode.h would be very useful for putting inline des
+functions in more complicated encryption routines.
diff --git a/Documentation/crypto/descore-readme.txt b/Documentation/crypto/descore-readme.txt
deleted file mode 100644
index 16e9e6350755..000000000000
--- a/Documentation/crypto/descore-readme.txt
+++ /dev/null
@@ -1,352 +0,0 @@
-Below is the original README file from the descore.shar package.
-------------------------------------------------------------------------------
-
-des - fast & portable DES encryption & decryption.
-Copyright (C) 1992 Dana L. How
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU Library General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-Author's address: how@isl.stanford.edu
-
-$Id: README,v 1.15 1992/05/20 00:25:32 how E $
-
-
-==>> To compile after untarring/unsharring, just `make' <<==
-
-
-This package was designed with the following goals:
-1. Highest possible encryption/decryption PERFORMANCE.
-2. PORTABILITY to any byte-addressable host with a 32bit unsigned C type
-3. Plug-compatible replacement for KERBEROS's low-level routines.
-
-This second release includes a number of performance enhancements for
-register-starved machines. My discussions with Richard Outerbridge,
-71755.204@compuserve.com, sparked a number of these enhancements.
-
-To more rapidly understand the code in this package, inspect desSmallFips.i
-(created by typing `make') BEFORE you tackle desCode.h. The latter is set
-up in a parameterized fashion so it can easily be modified by speed-daemon
-hackers in pursuit of that last microsecond. You will find it more
-illuminating to inspect one specific implementation,
-and then move on to the common abstract skeleton with this one in mind.
-
-
-performance comparison to other available des code which i could
-compile on a SPARCStation 1 (cc -O4, gcc -O2):
-
-this code (byte-order independent):
- 30us per encryption (options: 64k tables, no IP/FP)
- 33us per encryption (options: 64k tables, FIPS standard bit ordering)
- 45us per encryption (options: 2k tables, no IP/FP)
- 48us per encryption (options: 2k tables, FIPS standard bit ordering)
- 275us to set a new key (uses 1k of key tables)
- this has the quickest encryption/decryption routines i've seen.
- since i was interested in fast des filters rather than crypt(3)
- and password cracking, i haven't really bothered yet to speed up
- the key setting routine. also, i have no interest in re-implementing
- all the other junk in the mit kerberos des library, so i've just
- provided my routines with little stub interfaces so they can be
- used as drop-in replacements with mit's code or any of the mit-
- compatible packages below. (note that the first two timings above
- are highly variable because of cache effects).
-
-kerberos des replacement from australia (version 1.95):
- 53us per encryption (uses 2k of tables)
- 96us to set a new key (uses 2.25k of key tables)
- so despite the author's inclusion of some of the performance
- improvements i had suggested to him, this package's
- encryption/decryption is still slower on the sparc and 68000.
- more specifically, 19-40% slower on the 68020 and 11-35% slower
- on the sparc, depending on the compiler;
- in full gory detail (ALT_ECB is a libdes variant):
- compiler machine desCore libdes ALT_ECB slower by
- gcc 2.1 -O2 Sun 3/110 304 uS 369.5uS 461.8uS 22%
- cc -O1 Sun 3/110 336 uS 436.6uS 399.3uS 19%
- cc -O2 Sun 3/110 360 uS 532.4uS 505.1uS 40%
- cc -O4 Sun 3/110 365 uS 532.3uS 505.3uS 38%
- gcc 2.1 -O2 Sun 4/50 48 uS 53.4uS 57.5uS 11%
- cc -O2 Sun 4/50 48 uS 64.6uS 64.7uS 35%
- cc -O4 Sun 4/50 48 uS 64.7uS 64.9uS 35%
- (my time measurements are not as accurate as his).
- the comments in my first release of desCore on version 1.92:
- 68us per encryption (uses 2k of tables)
- 96us to set a new key (uses 2.25k of key tables)
- this is a very nice package which implements the most important
- of the optimizations which i did in my encryption routines.
- it's a bit weak on common low-level optimizations which is why
- it's 39%-106% slower. because he was interested in fast crypt(3) and
- password-cracking applications, he also used the same ideas to
- speed up the key-setting routines with impressive results.
- (at some point i may do the same in my package). he also implements
- the rest of the mit des library.
- (code from eay@psych.psy.uq.oz.au via comp.sources.misc)
-
-fast crypt(3) package from denmark:
- the des routine here is buried inside a loop to do the
- crypt function and i didn't feel like ripping it out and measuring
- performance. his code takes 26 sparc instructions to compute one
- des iteration; above, Quick (64k) takes 21 and Small (2k) takes 37.
- he claims to use 280k of tables but the iteration calculation seems
- to use only 128k. his tables and code are machine independent.
- (code from glad@daimi.aau.dk via alt.sources or comp.sources.misc)
-
-swedish reimplementation of Kerberos des library
- 108us per encryption (uses 34k worth of tables)
- 134us to set a new key (uses 32k of key tables to get this speed!)
- the tables used seem to be machine-independent;
- he seems to have included a lot of special case code
- so that, e.g., `long' loads can be used instead of 4 `char' loads
- when the machine's architecture allows it.
- (code obtained from chalmers.se:pub/des)
-
-crack 3.3c package from england:
- as in crypt above, the des routine is buried in a loop. it's
- also very modified for crypt. his iteration code uses 16k
- of tables and appears to be slow.
- (code obtained from aem@aber.ac.uk via alt.sources or comp.sources.misc)
-
-``highly optimized'' and tweaked Kerberos/Athena code (byte-order dependent):
- 165us per encryption (uses 6k worth of tables)
- 478us to set a new key (uses <1k of key tables)
- so despite the comments in this code, it was possible to get
- faster code AND smaller tables, as well as making the tables
- machine-independent.
- (code obtained from prep.ai.mit.edu)
-
-UC Berkeley code (depends on machine-endedness):
- 226us per encryption
-10848us to set a new key
- table sizes are unclear, but they don't look very small
- (code obtained from wuarchive.wustl.edu)
-
-
-motivation and history
-
-a while ago i wanted some des routines and the routines documented on sun's
-man pages either didn't exist or dumped core. i had heard of kerberos,
-and knew that it used des, so i figured i'd use its routines. but once
-i got it and looked at the code, it really set off a lot of pet peeves -
-it was too convoluted, the code had been written without taking
-advantage of the regular structure of operations such as IP, E, and FP
-(i.e. the author didn't sit down and think before coding),
-it was excessively slow, the author had attempted to clarify the code
-by adding MORE statements to make the data movement more `consistent'
-instead of simplifying his implementation and cutting down on all data
-movement (in particular, his use of L1, R1, L2, R2), and it was full of
-idiotic `tweaks' for particular machines which failed to deliver significant
-speedups but which did obfuscate everything. so i took the test data
-from his verification program and rewrote everything else.
-
-a while later i ran across the great crypt(3) package mentioned above.
-the fact that this guy was computing 2 sboxes per table lookup rather
-than one (and using a MUCH larger table in the process) emboldened me to
-do the same - it was a trivial change from which i had been scared away
-by the larger table size. in his case he didn't realize you don't need to keep
-the working data in TWO forms, one for easy use of half the sboxes in
-indexing, the other for easy use of the other half; instead you can keep
-it in the form for the first half and use a simple rotate to get the other
-half. this means i have (almost) half the data manipulation and half
-the table size. in fairness though he might be encoding something particular
-to crypt(3) in his tables - i didn't check.
-
-i'm glad that i implemented it the way i did, because this C version is
-portable (the ifdef's are performance enhancements) and it is faster
-than versions hand-written in assembly for the sparc!
-
-
-porting notes
-
-one thing i did not want to do was write an enormous mess
-which depended on endedness and other machine quirks,
-and which necessarily produced different code and different lookup tables
-for different machines. see the kerberos code for an example
-of what i didn't want to do; all their endedness-specific `optimizations'
-obfuscate the code and in the end were slower than a simpler machine
-independent approach. however, there are always some portability
-considerations of some kind, and i have included some options
-for varying numbers of register variables.
-perhaps some will still regard the result as a mess!
-
-1) i assume everything is byte addressable, although i don't actually
- depend on the byte order, and that bytes are 8 bits.
- i assume word pointers can be freely cast to and from char pointers.
- note that 99% of C programs make these assumptions.
- i always use unsigned char's if the high bit could be set.
-2) the typedef `word' means a 32 bit unsigned integral type.
- if `unsigned long' is not 32 bits, change the typedef in desCore.h.
- i assume sizeof(word) == 4 EVERYWHERE.
-
-the (worst-case) cost of my NOT doing endedness-specific optimizations
-in the data loading and storing code surrounding the key iterations
-is less than 12%. also, there is the added benefit that
-the input and output work areas do not need to be word-aligned.
-
-
-OPTIONAL performance optimizations
-
-1) you should define one of `i386,' `vax,' `mc68000,' or `sparc,'
- whichever one is closest to the capabilities of your machine.
- see the start of desCode.h to see exactly what this selection implies.
- note that if you select the wrong one, the des code will still work;
- these are just performance tweaks.
-2) for those with functional `asm' keywords: you should change the
- ROR and ROL macros to use machine rotate instructions if you have them.
- this will save 2 instructions and a temporary per use,
- or about 32 to 40 instructions per en/decryption.
- note that gcc is smart enough to translate the ROL/R macros into
- machine rotates!
-
-these optimizations are all rather persnickety, yet with them you should
-be able to get performance equal to assembly-coding, except that:
-1) with the lack of a bit rotate operator in C, rotates have to be synthesized
- from shifts. so access to `asm' will speed things up if your machine
- has rotates, as explained above in (3) (not necessary if you use gcc).
-2) if your machine has less than 12 32-bit registers i doubt your compiler will
- generate good code.
- `i386' tries to configure the code for a 386 by only declaring 3 registers
- (it appears that gcc can use ebx, esi and edi to hold register variables).
- however, if you like assembly coding, the 386 does have 7 32-bit registers,
- and if you use ALL of them, use `scaled by 8' address modes with displacement
- and other tricks, you can get reasonable routines for DesQuickCore... with
- about 250 instructions apiece. For DesSmall... it will help to rearrange
- des_keymap, i.e., now the sbox # is the high part of the index and
- the 6 bits of data is the low part; it helps to exchange these.
- since i have no way to conveniently test it i have not provided my
- shoehorned 386 version. note that with this release of desCore, gcc is able
- to put everything in registers(!), and generate about 370 instructions apiece
- for the DesQuickCore... routines!
-
-coding notes
-
-the en/decryption routines each use 6 necessary register variables,
-with 4 being actively used at once during the inner iterations.
-if you don't have 4 register variables get a new machine.
-up to 8 more registers are used to hold constants in some configurations.
-
-i assume that the use of a constant is more expensive than using a register:
-a) additionally, i have tried to put the larger constants in registers.
- registering priority was by the following:
- anything more than 12 bits (bad for RISC and CISC)
- greater than 127 in value (can't use movq or byte immediate on CISC)
- 9-127 (may not be able to use CISC shift immediate or add/sub quick),
- 1-8 were never registered, being the cheapest constants.
-b) the compiler may be too stupid to realize table and table+256 should
- be assigned to different constant registers and instead repetitively
- do the arithmetic, so i assign these to explicit `m' register variables
- when possible and helpful.
-
-i assume that indexing is cheaper or equivalent to auto increment/decrement,
-where the index is 7 bits unsigned or smaller.
-this assumption is reversed for 68k and vax.
-
-i assume that addresses can be cheaply formed from two registers,
-or from a register and a small constant.
-for the 68000, the `two registers and small offset' form is used sparingly.
-all index scaling is done explicitly - no hidden shifts by log2(sizeof).
-
-the code is written so that even a dumb compiler
-should never need more than one hidden temporary,
-increasing the chance that everything will fit in the registers.
-KEEP THIS MORE SUBTLE POINT IN MIND IF YOU REWRITE ANYTHING.
-(actually, there are some code fragments now which do require two temps,
-but fixing it would either break the structure of the macros or
-require declaring another temporary).
-
-
-special efficient data format
-
-bits are manipulated in this arrangement most of the time (S7 S5 S3 S1):
- 003130292827xxxx242322212019xxxx161514131211xxxx080706050403xxxx
-(the x bits are still there, i'm just emphasizing where the S boxes are).
-bits are rotated left 4 when computing S6 S4 S2 S0:
- 282726252423xxxx201918171615xxxx121110090807xxxx040302010031xxxx
-the rightmost two bits are usually cleared so the lower byte can be used
-as an index into an sbox mapping table. the next two x'd bits are set
-to various values to access different parts of the tables.
-
-
-how to use the routines
-
-datatypes:
- pointer to 8 byte area of type DesData
- used to hold keys and input/output blocks to des.
-
- pointer to 128 byte area of type DesKeys
- used to hold full 768-bit key.
- must be long-aligned.
-
-DesQuickInit()
- call this before using any other routine with `Quick' in its name.
- it generates the special 64k table these routines need.
-DesQuickDone()
- frees this table
-
-DesMethod(m, k)
- m points to a 128byte block, k points to an 8 byte des key
- which must have odd parity (or -1 is returned) and which must
- not be a (semi-)weak key (or -2 is returned).
- normally DesMethod() returns 0.
- m is filled in from k so that when one of the routines below
- is called with m, the routine will act like standard des
- en/decryption with the key k. if you use DesMethod,
- you supply a standard 56bit key; however, if you fill in
- m yourself, you will get a 768bit key - but then it won't
- be standard. it's 768bits not 1024 because the least significant
- two bits of each byte are not used. note that these two bits
- will be set to magic constants which speed up the encryption/decryption
- on some machines. and yes, each byte controls
- a specific sbox during a specific iteration.
- you really shouldn't use the 768bit format directly; i should
- provide a routine that converts 128 6-bit bytes (specified in
- S-box mapping order or something) into the right format for you.
- this would entail some byte concatenation and rotation.
-
-Des{Small|Quick}{Fips|Core}{Encrypt|Decrypt}(d, m, s)
- performs des on the 8 bytes at s into the 8 bytes at d. (d,s: char *).
- uses m as a 768bit key as explained above.
- the Encrypt|Decrypt choice is obvious.
- Fips|Core determines whether a completely standard FIPS initial
- and final permutation is done; if not, then the data is loaded
- and stored in a nonstandard bit order (FIPS w/o IP/FP).
- Fips slows down Quick by 10%, Small by 9%.
- Small|Quick determines whether you use the normal routine
- or the crazy quick one which gobbles up 64k more of memory.
- Small is 50% slower then Quick, but Quick needs 32 times as much
- memory. Quick is included for programs that do nothing but DES,
- e.g., encryption filters, etc.
-
-
-Getting it to compile on your machine
-
-there are no machine-dependencies in the code (see porting),
-except perhaps the `now()' macro in desTest.c.
-ALL generated tables are machine independent.
-you should edit the Makefile with the appropriate optimization flags
-for your compiler (MAX optimization).
-
-
-Speeding up kerberos (and/or its des library)
-
-note that i have included a kerberos-compatible interface in desUtil.c
-through the functions des_key_sched() and des_ecb_encrypt().
-to use these with kerberos or kerberos-compatible code put desCore.a
-ahead of the kerberos-compatible library on your linker's command line.
-you should not need to #include desCore.h; just include the header
-file provided with the kerberos library.
-
-Other uses
-
-the macros in desCode.h would be very useful for putting inline des
-functions in more complicated encryption routines.
diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst
index c45c6f400dbd..9b7782f4f6e0 100644
--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@@ -31,33 +31,23 @@ The counterparts to those functions are listed below.
::
- int crypto_unregister_alg(struct crypto_alg *alg);
- int crypto_unregister_algs(struct crypto_alg *algs, int count);
+ void crypto_unregister_alg(struct crypto_alg *alg);
+ void crypto_unregister_algs(struct crypto_alg *algs, int count);
-Notice that both registration and unregistration functions do return a
-value, so make sure to handle errors. A return code of zero implies
-success. Any return code < 0 implies an error.
+The registration functions return 0 on success, or a negative errno
+value on failure. crypto_register_algs() succeeds only if it
+successfully registered all the given algorithms; if it fails partway
+through, then any changes are rolled back.
-The bulk registration/unregistration functions register/unregister each
-transformation in the given array of length count. They handle errors as
-follows:
-
-- crypto_register_algs() succeeds if and only if it successfully
- registers all the given transformations. If an error occurs partway
- through, then it rolls back successful registrations before returning
- the error code. Note that if a driver needs to handle registration
- errors for individual transformations, then it will need to use the
- non-bulk function crypto_register_alg() instead.
-
-- crypto_unregister_algs() tries to unregister all the given
- transformations, continuing on error. It logs errors and always
- returns zero.
+The unregistration functions always succeed, so they don't have a
+return value. Don't try to unregister algorithms that aren't
+currently registered.
Single-Block Symmetric Ciphers [CIPHER]
---------------------------------------
-Example of transformations: aes, arc4, ...
+Example of transformations: aes, serpent, ...
This section describes the simplest of all transformation
implementations, that being the CIPHER type used for symmetric ciphers.
@@ -108,7 +98,7 @@ is also valid:
Multi-Block Ciphers
-------------------
-Example of transformations: cbc(aes), ecb(arc4), ...
+Example of transformations: cbc(aes), chacha20, ...
This section describes the multi-block cipher transformation
implementations. The multi-block ciphers are used for transformations
@@ -128,25 +118,20 @@ process requests that are unaligned. This implies, however, additional
overhead as the kernel crypto API needs to perform the realignment of
the data which may imply moving of data.
-Cipher Definition With struct blkcipher_alg and ablkcipher_alg
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Struct blkcipher_alg defines a synchronous block cipher whereas struct
-ablkcipher_alg defines an asynchronous block cipher.
-
-Please refer to the single block cipher description for schematics of
-the block cipher usage.
+Cipher Definition With struct skcipher_alg
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Specifics Of Asynchronous Multi-Block Cipher
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Struct skcipher_alg defines a multi-block cipher, or more generally, a
+length-preserving symmetric cipher algorithm.
-There are a couple of specifics to the asynchronous interface.
+Scatterlist handling
+~~~~~~~~~~~~~~~~~~~~
-First of all, some of the drivers will want to use the Generic
-ScatterWalk in case the hardware needs to be fed separate chunks of the
-scatterlist which contains the plaintext and will contain the
-ciphertext. Please refer to the ScatterWalk interface offered by the
-Linux kernel scatter / gather list implementation.
+Some drivers will want to use the Generic ScatterWalk in case the
+hardware needs to be fed separate chunks of the scatterlist which
+contains the plaintext and will contain the ciphertext. Please refer
+to the ScatterWalk interface offered by the Linux kernel scatter /
+gather list implementation.
Hashing [HASH]
--------------
@@ -174,10 +159,10 @@ are as follows:
::
- int crypto_unregister_ahash(struct ahash_alg *alg);
+ void crypto_unregister_ahash(struct ahash_alg *alg);
- int crypto_unregister_shash(struct shash_alg *alg);
- int crypto_unregister_shashes(struct shash_alg *algs, int count);
+ void crypto_unregister_shash(struct shash_alg *alg);
+ void crypto_unregister_shashes(struct shash_alg *algs, int count);
Cipher Definition With struct shash_alg and ahash_alg
@@ -187,7 +172,7 @@ Here are schematics of how these functions are called when operated from
other part of the kernel. Note that the .setkey() call might happen
before or after any of these schematics happen, but must not happen
during any of these are in-flight. Please note that calling .init()
-followed immediately by .finish() is also a perfectly valid
+followed immediately by .final() is also a perfectly valid
transformation.
::
@@ -250,6 +235,4 @@ Specifics Of Asynchronous HASH Transformation
Some of the drivers will want to use the Generic ScatterWalk in case the
implementation needs to be fed separate chunks of the scatterlist which
-contains the input data. The buffer containing the resulting hash will
-always be properly aligned to .cra_alignmask so there is no need to
-worry about this.
+contains the input data.
diff --git a/Documentation/crypto/device_drivers/index.rst b/Documentation/crypto/device_drivers/index.rst
new file mode 100644
index 000000000000..c81d311ac61b
--- /dev/null
+++ b/Documentation/crypto/device_drivers/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Hardware Device Driver Specific Documentation
+---------------------------------------------
+
+.. toctree::
+ :maxdepth: 1
+
+ octeontx2
diff --git a/Documentation/crypto/device_drivers/octeontx2.rst b/Documentation/crypto/device_drivers/octeontx2.rst
new file mode 100644
index 000000000000..7e469b173ac8
--- /dev/null
+++ b/Documentation/crypto/device_drivers/octeontx2.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+octeontx2 devlink support
+=========================
+
+This document describes the devlink features implemented by the ``octeontx2 CPT``
+device drivers.
+
+Parameters
+==========
+
+The ``octeontx2`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``t106_mode``
+ - u8
+ - runtime
+ - Used to configure CN10KA B0/CN10KB CPT to work as CN10KA A0/A1.
diff --git a/Documentation/crypto/index.rst b/Documentation/crypto/index.rst
index c4ff5d791233..100b47d049c0 100644
--- a/Documentation/crypto/index.rst
+++ b/Documentation/crypto/index.rst
@@ -1,6 +1,6 @@
-=======================
-Linux Kernel Crypto API
-=======================
+==========
+Crypto API
+==========
:Author: Stephan Mueller
:Author: Marek Vasut
@@ -9,17 +9,21 @@ This documentation outlines the Linux kernel crypto API with its
concepts, details about developing cipher implementations, employment of the API
for cryptographic use cases, as well as programming examples.
-.. class:: toc-title
-
- Table of contents
-
.. toctree::
+ :caption: Table of contents
:maxdepth: 2
intro
+ api-intro
architecture
+
+ async-tx-api
+ asymmetric-keys
devel-algos
userspace-if
crypto_engine
api
api-samples
+ descore-readme
+ device_drivers/index
+ krb5
diff --git a/Documentation/crypto/krb5.rst b/Documentation/crypto/krb5.rst
new file mode 100644
index 000000000000..beffa0133446
--- /dev/null
+++ b/Documentation/crypto/krb5.rst
@@ -0,0 +1,262 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Kerberos V Cryptography API
+===========================
+
+.. Contents:
+
+ - Overview.
+ - Small Buffer.
+ - Encoding Type.
+ - Key Derivation.
+ - PRF+ Calculation.
+ - Kc, Ke And Ki Derivation.
+ - Crypto Functions.
+ - Preparation Functions.
+ - Encryption Mode.
+ - Checksum Mode.
+ - The krb5enc AEAD algorithm
+
+Overview
+========
+
+This API provides Kerberos 5-style cryptography for key derivation, encryption
+and checksumming for use in network filesystems and can be used to implement
+the low-level crypto that's needed for GSSAPI.
+
+The following crypto types are supported::
+
+ KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ KRB5_ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ KRB5_ENCTYPE_AES128_CTS_HMAC_SHA256_128
+ KRB5_ENCTYPE_AES256_CTS_HMAC_SHA384_192
+ KRB5_ENCTYPE_CAMELLIA128_CTS_CMAC
+ KRB5_ENCTYPE_CAMELLIA256_CTS_CMAC
+
+ KRB5_CKSUMTYPE_HMAC_SHA1_96_AES128
+ KRB5_CKSUMTYPE_HMAC_SHA1_96_AES256
+ KRB5_CKSUMTYPE_CMAC_CAMELLIA128
+ KRB5_CKSUMTYPE_CMAC_CAMELLIA256
+ KRB5_CKSUMTYPE_HMAC_SHA256_128_AES128
+ KRB5_CKSUMTYPE_HMAC_SHA384_192_AES256
+
+The API can be included by::
+
+ #include <crypto/krb5.h>
+
+Small Buffer
+------------
+
+To pass small pieces of data about, such as keys, a buffer structure is
+defined, giving a pointer to the data and the size of that data::
+
+ struct krb5_buffer {
+ unsigned int len;
+ void *data;
+ };
+
+Encoding Type
+=============
+
+The encoding type is defined by the following structure::
+
+ struct krb5_enctype {
+ int etype;
+ int ctype;
+ const char *name;
+ u16 key_bytes;
+ u16 key_len;
+ u16 Kc_len;
+ u16 Ke_len;
+ u16 Ki_len;
+ u16 prf_len;
+ u16 block_len;
+ u16 conf_len;
+ u16 cksum_len;
+ ...
+ };
+
+The fields of interest to the user of the API are as follows:
+
+ * ``etype`` and ``ctype`` indicate the protocol number for this encoding
+ type for encryption and checksumming respectively. They hold
+ ``KRB5_ENCTYPE_*`` and ``KRB5_CKSUMTYPE_*`` constants.
+
+ * ``name`` is the formal name of the encoding.
+
+ * ``key_len`` and ``key_bytes`` are the input key length and the derived key
+ length. (I think they only differ for DES, which isn't supported here).
+
+ * ``Kc_len``, ``Ke_len`` and ``Ki_len`` are the sizes of the derived Kc, Ke
+ and Ki keys. Kc is used for in checksum mode; Ke and Ki are used in
+ encryption mode.
+
+ * ``prf_len`` is the size of the result from the PRF+ function calculation.
+
+ * ``block_len``, ``conf_len`` and ``cksum_len`` are the encryption block
+ length, confounder length and checksum length respectively. All three are
+ used in encryption mode, but only the checksum length is used in checksum
+ mode.
+
+The encoding type is looked up by number using the following function::
+
+ const struct krb5_enctype *crypto_krb5_find_enctype(u32 enctype);
+
+Key Derivation
+==============
+
+Once the application has selected an encryption type, the keys that will be
+used to do the actual crypto can be derived from the transport key.
+
+PRF+ Calculation
+----------------
+
+To aid in key derivation, a function to calculate the Kerberos GSSAPI
+mechanism's PRF+ is provided::
+
+ int crypto_krb5_calc_PRFplus(const struct krb5_enctype *krb5,
+ const struct krb5_buffer *K,
+ unsigned int L,
+ const struct krb5_buffer *S,
+ struct krb5_buffer *result,
+ gfp_t gfp);
+
+This can be used to derive the transport key from a source key plus additional
+data to limit its use.
+
+Crypto Functions
+================
+
+Once the keys have been derived, crypto can be performed on the data. The
+caller must leave gaps in the buffer for the storage of the confounder (if
+needed) and the checksum when preparing a message for transmission. An enum
+and a pair of functions are provided to aid in this::
+
+ enum krb5_crypto_mode {
+ KRB5_CHECKSUM_MODE,
+ KRB5_ENCRYPT_MODE,
+ };
+
+ size_t crypto_krb5_how_much_buffer(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t data_size, size_t *_offset);
+
+ size_t crypto_krb5_how_much_data(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t *_buffer_size, size_t *_offset);
+
+All these functions take the encoding type and an indication the mode of crypto
+(checksum-only or full encryption).
+
+The first function returns how big the buffer will need to be to house a given
+amount of data; the second function returns how much data will fit in a buffer
+of a particular size, and adjusts down the size of the required buffer
+accordingly. In both cases, the offset of the data within the buffer is also
+returned.
+
+When a message has been received, the location and size of the data with the
+message can be determined by calling::
+
+ void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t *_offset, size_t *_len);
+
+The caller provides the offset and length of the message to the function, which
+then alters those values to indicate the region containing the data (plus any
+padding). It is up to the caller to determine how much padding there is.
+
+Preparation Functions
+---------------------
+
+Two functions are provided to allocated and prepare a crypto object for use by
+the action functions::
+
+ struct crypto_aead *
+ crypto_krb5_prepare_encryption(const struct krb5_enctype *krb5,
+ const struct krb5_buffer *TK,
+ u32 usage, gfp_t gfp);
+ struct crypto_shash *
+ crypto_krb5_prepare_checksum(const struct krb5_enctype *krb5,
+ const struct krb5_buffer *TK,
+ u32 usage, gfp_t gfp);
+
+Both of these functions take the encoding type, the transport key and the usage
+value used to derive the appropriate subkey(s). They create an appropriate
+crypto object, an AEAD template for encryption and a synchronous hash for
+checksumming, set the key(s) on it and configure it. The caller is expected to
+pass these handles to the action functions below.
+
+Encryption Mode
+---------------
+
+A pair of functions are provided to encrypt and decrypt a message::
+
+ ssize_t crypto_krb5_encrypt(const struct krb5_enctype *krb5,
+ struct crypto_aead *aead,
+ struct scatterlist *sg, unsigned int nr_sg,
+ size_t sg_len,
+ size_t data_offset, size_t data_len,
+ bool preconfounded);
+ int crypto_krb5_decrypt(const struct krb5_enctype *krb5,
+ struct crypto_aead *aead,
+ struct scatterlist *sg, unsigned int nr_sg,
+ size_t *_offset, size_t *_len);
+
+In both cases, the input and output buffers are indicated by the same
+scatterlist.
+
+For the encryption function, the output buffer may be larger than is needed
+(the amount of output generated is returned) and the location and size of the
+data are indicated (which must match the encoding). If no confounder is set,
+the function will insert one.
+
+For the decryption function, the offset and length of the message in buffer are
+supplied and these are shrunk to fit the data. The decryption function will
+verify any checksums within the message and give an error if they don't match.
+
+Checksum Mode
+-------------
+
+A pair of function are provided to generate the checksum on a message and to
+verify that checksum::
+
+ ssize_t crypto_krb5_get_mic(const struct krb5_enctype *krb5,
+ struct crypto_shash *shash,
+ const struct krb5_buffer *metadata,
+ struct scatterlist *sg, unsigned int nr_sg,
+ size_t sg_len,
+ size_t data_offset, size_t data_len);
+ int crypto_krb5_verify_mic(const struct krb5_enctype *krb5,
+ struct crypto_shash *shash,
+ const struct krb5_buffer *metadata,
+ struct scatterlist *sg, unsigned int nr_sg,
+ size_t *_offset, size_t *_len);
+
+In both cases, the input and output buffers are indicated by the same
+scatterlist. Additional metadata can be passed in which will get added to the
+hash before the data.
+
+For the get_mic function, the output buffer may be larger than is needed (the
+amount of output generated is returned) and the location and size of the data
+are indicated (which must match the encoding).
+
+For the verification function, the offset and length of the message in buffer
+are supplied and these are shrunk to fit the data. An error will be returned
+if the checksums don't match.
+
+The krb5enc AEAD algorithm
+==========================
+
+A template AEAD crypto algorithm, called "krb5enc", is provided that hashes the
+plaintext before encrypting it (the reverse of authenc). The handle returned
+by ``crypto_krb5_prepare_encryption()`` may be one of these, but there's no
+requirement for the user of this API to interact with it directly.
+
+For reference, its key format begins with a BE32 of the format number. Only
+format 1 is provided and that continues with a BE32 of the Ke key length
+followed by a BE32 of the Ki key length, followed by the bytes from the Ke key
+and then the Ki key.
+
+Using specifically ordered words means that the static test data doesn't
+require byteswapping.
diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
index ff86befa61e0..f80f243e227e 100644
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@@ -23,7 +23,7 @@ user space, however. This includes the difference between synchronous
and asynchronous invocations. The user space API call is fully
synchronous.
-[1] http://www.chronox.de/libkcapi.html
+[1] https://www.chronox.de/libkcapi.html
User Space API General Remarks
------------------------------
@@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message
digest, the flag MSG_TRUNC is set by the kernel.
In order to set a message digest key, the calling application must use
-the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
-operation is performed without the initial HMAC state change caused by
-the key.
+the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the
+key is not set the HMAC operation is performed without the initial HMAC state
+change caused by the key.
Symmetric Cipher API
--------------------
@@ -296,15 +296,16 @@ follows:
struct sockaddr_alg sa = {
.salg_family = AF_ALG,
- .salg_type = "rng", /* this selects the symmetric cipher */
- .salg_name = "drbg_nopr_sha256" /* this is the cipher name */
+ .salg_type = "rng", /* this selects the random number generator */
+ .salg_name = "drbg_nopr_sha256" /* this is the RNG name */
};
Depending on the RNG type, the RNG must be seeded. The seed is provided
using the setsockopt interface to set the key. For example, the
ansi_cprng requires a seed. The DRBGs do not require a seed, but may be
-seeded.
+seeded. The seed is also known as a *Personalization String* in NIST SP 800-90A
+standard.
Using the read()/recvmsg() system calls, random numbers can be obtained.
The kernel generates at most 128 bytes in one call. If user space
@@ -314,6 +315,16 @@ WARNING: The user space caller may invoke the initially mentioned accept
system call multiple times. In this case, the returned file descriptors
have the same state.
+Following CAVP testing interfaces are enabled when kernel is built with
+CRYPTO_USER_API_RNG_CAVP option:
+
+- the concatenation of *Entropy* and *Nonce* can be provided to the RNG via
+ ALG_SET_DRBG_ENTROPY setsockopt interface. Setting the entropy requires
+ CAP_SYS_ADMIN permission.
+
+- *Additional Data* can be provided using the send()/sendmsg() system calls,
+ but only after the entropy has been set.
+
Zero-Copy Interface
-------------------
@@ -371,12 +382,24 @@ mentioned optname:
- the RNG cipher type to provide the seed
+- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t.
+ This operation behaves the same as ALG_SET_KEY. The decrypted
+ data is copied from a keyring key, and uses that data as the
+ key for symmetric encryption.
+
+ The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH
+ permission set, otherwise -EPERM is returned. Supports key types: user,
+ logon, encrypted, and trusted.
+
- ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for
AEAD ciphers. For a encryption operation, the authentication tag of
the given size will be generated. For a decryption operation, the
provided ciphertext is assumed to contain an authentication tag of
the given size (see section about AEAD memory layout below).
+- ALG_SET_DRBG_ENTROPY -- Setting the entropy of the random number generator.
+ This option is applicable to RNG cipher type only.
+
User space API example
----------------------
@@ -384,4 +407,4 @@ Please see [1] for libkcapi which provides an easy-to-use wrapper around
the aforementioned Netlink kernel interface. [1] also contains a test
application that invokes all libkcapi API calls.
-[1] http://www.chronox.de/libkcapi.html
+[1] https://www.chronox.de/libkcapi.html
diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt
deleted file mode 100644
index 172ad4aec493..000000000000
--- a/Documentation/debugging-modules.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Debugging Modules after 2.6.3
------------------------------
-
-In almost all distributions, the kernel asks for modules which don't
-exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to
-"succeed" in this case is hacky and breaks some setups, and also we
-want to know if it failed for the fallback code for old aliases in
-fs/char_dev.c, for example.
-
-In the past a debugging message which would fill people's logs was
-emitted. This debugging message has been removed. The correct way
-of debugging module problems is something like this:
-
-echo '#! /bin/sh' > /tmp/modprobe
-echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
-echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
-chmod a+x /tmp/modprobe
-echo /tmp/modprobe > /proc/sys/kernel/modprobe
-
-Note that the above applies only when the *kernel* is requesting
-that the module be loaded -- it won't have any effect if that module
-is being loaded explicitly using "modprobe" from userspace.
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
deleted file mode 100644
index 981ad4f89fd3..000000000000
--- a/Documentation/debugging-via-ohci1394.txt
+++ /dev/null
@@ -1,185 +0,0 @@
-===========================================================================
-Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
-===========================================================================
-
-Introduction
-------------
-
-Basically all FireWire controllers which are in use today are compliant
-to the OHCI-1394 specification which defines the controller to be a PCI
-bus master which uses DMA to offload data transfers from the CPU and has
-a "Physical Response Unit" which executes specific requests by employing
-PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
-
-Once properly configured, remote machines can send these requests to
-ask the OHCI-1394 controller to perform read and write requests on
-physical system memory and, for read requests, send the result of
-the physical memory read back to the requester.
-
-With that, it is possible to debug issues by reading interesting memory
-locations such as buffers like the printk buffer or the process table.
-
-Retrieving a full system memory dump is also possible over the FireWire,
-using data transfer rates in the order of 10MB/s or more.
-
-With most FireWire controllers, memory access is limited to the low 4 GB
-of physical address space. This can be a problem on IA64 machines where
-memory is located mostly above that limit, but it is rarely a problem on
-more common hardware such as x86, x86-64 and PowerPC.
-
-At least LSI FW643e and FW643e2 controllers are known to support access to
-physical addresses above 4 GB, but this feature is currently not enabled by
-Linux.
-
-Together with a early initialization of the OHCI-1394 controller for debugging,
-this facility proved most useful for examining long debugs logs in the printk
-buffer on to debug early boot problems in areas like ACPI where the system
-fails to boot and other means for debugging (serial port) are either not
-available (notebooks) or too slow for extensive debug information (like ACPI).
-
-Drivers
--------
-
-The firewire-ohci driver in drivers/firewire uses filtered physical
-DMA by default, which is more secure but not suitable for remote debugging.
-Pass the remote_dma=1 parameter to the driver to get unfiltered physical DMA.
-
-Because the firewire-ohci driver depends on the PCI enumeration to be
-completed, an initialization routine which runs pretty early has been
-implemented for x86. This routine runs long before console_init() can be
-called, i.e. before the printk buffer appears on the console.
-
-To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
-Remote debugging over FireWire early on boot) and pass the parameter
-"ohci1394_dma=early" to the recompiled kernel on boot.
-
-Tools
------
-
-firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
-it from PowerPC to x86 and x86_64 and added functionality, firescope can now
-be used to view the printk buffer of a remote machine, even with live update.
-
-Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
-from 32-bit firescope and vice versa:
-- http://v3.sk/~lkundrak/firescope/
-
-and he implemented fast system dump (alpha version - read README.txt):
-- http://halobates.de/firewire/firedump-0.1.tar.bz2
-
-There is also a gdb proxy for firewire which allows to use gdb to access
-data which can be referenced from symbols found by gdb in vmlinux:
-- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
-
-The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
-yet stable) with kgdb over an memory-based communication module (kgdbom).
-
-Getting Started
----------------
-
-The OHCI-1394 specification regulates that the OHCI-1394 controller must
-disable all physical DMA on each bus reset.
-
-This means that if you want to debug an issue in a system state where
-interrupts are disabled and where no polling of the OHCI-1394 controller
-for bus resets takes place, you have to establish any FireWire cable
-connections and fully initialize all FireWire hardware __before__ the
-system enters such state.
-
-Step-by-step instructions for using firescope with early OHCI initialization:
-
-1) Verify that your hardware is supported:
-
- Load the firewire-ohci module and check your kernel logs.
- You should see a line similar to::
-
- firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
- ... contexts, quirks 0x11
-
- when loading the driver. If you have no supported controller, many PCI,
- CardBus and even some Express cards which are fully compliant to OHCI-1394
- specification are available. If it requires no driver for Windows operating
- systems, it most likely is. Only specialized shops have cards which are not
- compliant, they are based on TI PCILynx chips and require drivers for Windows
- operating systems.
-
- The mentioned kernel log message contains the string "physUB" if the
- controller implements a writable Physical Upper Bound register. This is
- required for physical DMA above 4 GB (but not utilized by Linux yet).
-
-2) Establish a working FireWire cable connection:
-
- Any FireWire cable, as long at it provides electrically and mechanically
- stable connection and has matching connectors (there are small 4-pin and
- large 6-pin FireWire ports) will do.
-
- If an driver is running on both machines you should see a line like::
-
- firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
-
- on both machines in the kernel log when the cable is plugged in
- and connects the two machines.
-
-3) Test physical DMA using firescope:
-
- On the debug host, make sure that /dev/fw* is accessible,
- then start firescope::
-
- $ firescope
- Port 0 (/dev/fw1) opened, 2 nodes detected
-
- FireScope
- ---------
- Target : <unspecified>
- Gen : 1
- [Ctrl-T] choose target
- [Ctrl-H] this menu
- [Ctrl-Q] quit
-
- ------> Press Ctrl-T now, the output should be similar to:
-
- 2 nodes available, local node is: 0
- 0: ffc0, uuid: 00000000 00000000 [LOCAL]
- 1: ffc1, uuid: 00279000 ba4bb801
-
- Besides the [LOCAL] node, it must show another node without error message.
-
-4) Prepare for debugging with early OHCI-1394 initialization:
-
- 4.1) Kernel compilation and installation on debug target
-
- Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
- (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
- enabled and install it on the machine to be debugged (debug target).
-
- 4.2) Transfer the System.map of the debugged kernel to the debug host
-
- Copy the System.map of the kernel be debugged to the debug host (the host
- which is connected to the debugged machine over the FireWire cable).
-
-5) Retrieving the printk buffer contents:
-
- With the FireWire cable connected, the OHCI-1394 driver on the debugging
- host loaded, reboot the debugged machine, booting the kernel which has
- CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
-
- Then, on the debugging host, run firescope, for example by using -A::
-
- firescope -A System.map-of-debug-target-kernel
-
- Note: -A automatically attaches to the first non-local node. It only works
- reliably if only connected two machines are connected using FireWire.
-
- After having attached to the debug target, press Ctrl-D to view the
- complete printk buffer or Ctrl-U to enter auto update mode and get an
- updated live view of recent kernel messages logged on the debug target.
-
- Call "firescope -h" to get more information on firescope's options.
-
-Notes
------
-
-Documentation and specifications: http://halobates.de/firewire/
-
-FireWire is a trademark of Apple Inc. - for more information please refer to:
-https://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
deleted file mode 100644
index 5d1ce7bcd04d..000000000000
--- a/Documentation/dell_rbu.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-=============================================================
-Usage of the new open sourced rbu (Remote BIOS Update) driver
-=============================================================
-
-Purpose
-=======
-
-Document demonstrating the use of the Dell Remote BIOS Update driver.
-for updating BIOS images on Dell servers and desktops.
-
-Scope
-=====
-
-This document discusses the functionality of the rbu driver only.
-It does not cover the support needed from applications to enable the BIOS to
-update itself with the image downloaded in to the memory.
-
-Overview
-========
-
-This driver works with Dell OpenManage or Dell Update Packages for updating
-the BIOS on Dell servers (starting from servers sold since 1999), desktops
-and notebooks (starting from those sold in 2005).
-
-Please go to http://support.dell.com register and you can find info on
-OpenManage and Dell Update packages (DUP).
-
-Libsmbios can also be used to update BIOS on Dell systems go to
-http://linux.dell.com/libsmbios/ for details.
-
-Dell_RBU driver supports BIOS update using the monolithic image and packetized
-image methods. In case of monolithic the driver allocates a contiguous chunk
-of physical pages having the BIOS image. In case of packetized the app
-using the driver breaks the image in to packets of fixed sizes and the driver
-would place each packet in contiguous physical memory. The driver also
-maintains a link list of packets for reading them back.
-
-If the dell_rbu driver is unloaded all the allocated memory is freed.
-
-The rbu driver needs to have an application (as mentioned above)which will
-inform the BIOS to enable the update in the next system reboot.
-
-The user should not unload the rbu driver after downloading the BIOS image
-or updating.
-
-The driver load creates the following directories under the /sys file system::
-
- /sys/class/firmware/dell_rbu/loading
- /sys/class/firmware/dell_rbu/data
- /sys/devices/platform/dell_rbu/image_type
- /sys/devices/platform/dell_rbu/data
- /sys/devices/platform/dell_rbu/packet_size
-
-The driver supports two types of update mechanism; monolithic and packetized.
-These update mechanism depends upon the BIOS currently running on the system.
-Most of the Dell systems support a monolithic update where the BIOS image is
-copied to a single contiguous block of physical memory.
-
-In case of packet mechanism the single memory can be broken in smaller chunks
-of contiguous memory and the BIOS image is scattered in these packets.
-
-By default the driver uses monolithic memory for the update type. This can be
-changed to packets during the driver load time by specifying the load
-parameter image_type=packet. This can also be changed later as below::
-
- echo packet > /sys/devices/platform/dell_rbu/image_type
-
-In packet update mode the packet size has to be given before any packets can
-be downloaded. It is done as below::
-
- echo XXXX > /sys/devices/platform/dell_rbu/packet_size
-
-In the packet update mechanism, the user needs to create a new file having
-packets of data arranged back to back. It can be done as follows
-The user creates packets header, gets the chunk of the BIOS image and
-places it next to the packetheader; now, the packetheader + BIOS image chunk
-added together should match the specified packet_size. This makes one
-packet, the user needs to create more such packets out of the entire BIOS
-image file and then arrange all these packets back to back in to one single
-file.
-
-This file is then copied to /sys/class/firmware/dell_rbu/data.
-Once this file gets to the driver, the driver extracts packet_size data from
-the file and spreads it across the physical memory in contiguous packet_sized
-space.
-
-This method makes sure that all the packets get to the driver in a single operation.
-
-In monolithic update the user simply get the BIOS image (.hdr file) and copies
-to the data file as is without any change to the BIOS image itself.
-
-Do the steps below to download the BIOS image.
-
-1) echo 1 > /sys/class/firmware/dell_rbu/loading
-2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
-3) echo 0 > /sys/class/firmware/dell_rbu/loading
-
-The /sys/class/firmware/dell_rbu/ entries will remain till the following is
-done.
-
-::
-
- echo -1 > /sys/class/firmware/dell_rbu/loading
-
-Until this step is completed the driver cannot be unloaded.
-
-Also echoing either mono, packet or init in to image_type will free up the
-memory allocated by the driver.
-
-If a user by accident executes steps 1 and 3 above without executing step 2;
-it will make the /sys/class/firmware/dell_rbu/ entries disappear.
-
-The entries can be recreated by doing the following::
-
- echo init > /sys/devices/platform/dell_rbu/image_type
-
-.. note:: echoing init in image_type does not change it original value.
-
-Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
-read back the image downloaded.
-
-.. note::
-
- After updating the BIOS image a user mode application needs to execute
- code which sends the BIOS update request to the BIOS. So on the next reboot
- the BIOS knows about the new image downloaded and it updates itself.
- Also don't unload the rbu driver if the image has to be updated.
-
diff --git a/Documentation/dev-tools/autofdo.rst b/Documentation/dev-tools/autofdo.rst
new file mode 100644
index 000000000000..bcf06e7d6ffa
--- /dev/null
+++ b/Documentation/dev-tools/autofdo.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Using AutoFDO with the Linux kernel
+===================================
+
+This enables AutoFDO build support for the kernel when using
+the Clang compiler. AutoFDO (Auto-Feedback-Directed Optimization)
+is a type of profile-guided optimization (PGO) used to enhance the
+performance of binary executables. It gathers information about the
+frequency of execution of various code paths within a binary using
+hardware sampling. This data is then used to guide the compiler's
+optimization decisions, resulting in a more efficient binary. AutoFDO
+is a powerful optimization technique, and data indicates that it can
+significantly improve kernel performance. It's especially beneficial
+for workloads affected by front-end stalls.
+
+For AutoFDO builds, unlike non-FDO builds, the user must supply a
+profile. Acquiring an AutoFDO profile can be done in several ways.
+AutoFDO profiles are created by converting hardware sampling using
+the "perf" tool. It is crucial that the workload used to create these
+perf files is representative; they must exhibit runtime
+characteristics similar to the workloads that are intended to be
+optimized. Failure to do so will result in the compiler optimizing
+for the wrong objective.
+
+The AutoFDO profile often encapsulates the program's behavior. If the
+performance-critical codes are architecture-independent, the profile
+can be applied across platforms to achieve performance gains. For
+instance, using the profile generated on Intel architecture to build
+a kernel for AMD architecture can also yield performance improvements.
+
+There are two methods for acquiring a representative profile:
+(1) Sample real workloads using a production environment.
+(2) Generate the profile using a representative load test.
+When enabling the AutoFDO build configuration without providing an
+AutoFDO profile, the compiler only modifies the dwarf information in
+the kernel without impacting runtime performance. It's advisable to
+use a kernel binary built with the same AutoFDO configuration to
+collect the perf profile. While it's possible to use a kernel built
+with different options, it may result in inferior performance.
+
+One can collect profiles using AutoFDO build for the previous kernel.
+AutoFDO employs relative line numbers to match the profiles, offering
+some tolerance for source changes. This mode is commonly used in a
+production environment for profile collection.
+
+In a profile collection based on a load test, the AutoFDO collection
+process consists of the following steps:
+
+#. Initial build: The kernel is built with AutoFDO options
+ without a profile.
+
+#. Profiling: The above kernel is then run with a representative
+ workload to gather execution frequency data. This data is
+ collected using hardware sampling, via perf. AutoFDO is most
+ effective on platforms supporting advanced PMU features like
+ LBR on Intel machines.
+
+#. AutoFDO profile generation: Perf output file is converted to
+ the AutoFDO profile via offline tools.
+
+The support requires a Clang compiler LLVM 17 or later.
+
+Preparation
+===========
+
+Configure the kernel with::
+
+ CONFIG_AUTOFDO_CLANG=y
+
+Customization
+=============
+
+The default CONFIG_AUTOFDO_CLANG setting covers kernel space objects for
+AutoFDO builds. One can, however, enable or disable AutoFDO build for
+individual files and directories by adding a line similar to the following
+to the respective kernel Makefile:
+
+- For enabling a single file (e.g. foo.o) ::
+
+ AUTOFDO_PROFILE_foo.o := y
+
+- For enabling all files in one directory ::
+
+ AUTOFDO_PROFILE := y
+
+- For disabling one file ::
+
+ AUTOFDO_PROFILE_foo.o := n
+
+- For disabling all files in one directory ::
+
+ AUTOFDO_PROFILE := n
+
+Workflow
+========
+
+Here is an example workflow for AutoFDO kernel:
+
+1) Build the kernel on the host machine with LLVM enabled,
+ for example, ::
+
+ $ make menuconfig LLVM=1
+
+ Turn on AutoFDO build config::
+
+ CONFIG_AUTOFDO_CLANG=y
+
+ With a configuration that with LLVM enabled, use the following command::
+
+ $ scripts/config -e AUTOFDO_CLANG
+
+ After getting the config, build with ::
+
+ $ make LLVM=1
+
+2) Install the kernel on the test machine.
+
+3) Run the load tests. The '-c' option in perf specifies the sample
+ event period. We suggest using a suitable prime number, like 500009,
+ for this purpose.
+
+ - For Intel platforms::
+
+ $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+ - For AMD platforms:
+
+ The supported systems are: Zen3 with BRS, or Zen4 with amd_lbr_v2. To check,
+
+ For Zen3::
+
+ $ cat /proc/cpuinfo | grep " brs"
+
+ For Zen4::
+
+ $ cat /proc/cpuinfo | grep amd_lbr_v2
+
+ The following command generated the perf data file::
+
+ $ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+4) (Optional) Download the raw perf file to the host machine.
+
+5) To generate an AutoFDO profile, two offline tools are available:
+ create_llvm_prof and llvm_profgen. The create_llvm_prof tool is part
+ of the AutoFDO project and can be found on GitHub
+ (https://github.com/google/autofdo), version v0.30.1 or later.
+ The llvm_profgen tool is included in the LLVM compiler itself. It's
+ important to note that the version of llvm_profgen doesn't need to match
+ the version of Clang. It needs to be the LLVM 19 release of Clang
+ or later, or just from the LLVM trunk. ::
+
+ $ llvm-profgen --kernel --binary=<vmlinux> --perfdata=<perf_file> -o <profile_file>
+
+ or ::
+
+ $ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> --format=extbinary --out=<profile_file>
+
+ Note that multiple AutoFDO profile files can be merged into one via::
+
+ $ llvm-profdata merge -o <profile_file> <profile_1> <profile_2> ... <profile_n>
+
+6) Rebuild the kernel using the AutoFDO profile file with the same config as step 1,
+ (Note CONFIG_AUTOFDO_CLANG needs to be enabled)::
+
+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file>
diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
new file mode 100644
index 000000000000..d5c47e560324
--- /dev/null
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -0,0 +1,1259 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+==========
+Checkpatch
+==========
+
+Checkpatch (scripts/checkpatch.pl) is a perl script which checks for trivial
+style violations in patches and optionally corrects them. Checkpatch can
+also be run on file contexts and without the kernel tree.
+
+Checkpatch is not always right. Your judgement takes precedence over checkpatch
+messages. If your code looks better with the violations, then its probably
+best left alone.
+
+
+Options
+=======
+
+This section will describe the options checkpatch can be run with.
+
+Usage::
+
+ ./scripts/checkpatch.pl [OPTION]... [FILE]...
+
+Available options:
+
+ - -q, --quiet
+
+ Enable quiet mode.
+
+ - -v, --verbose
+ Enable verbose mode. Additional verbose test descriptions are output
+ so as to provide information on why that particular message is shown.
+
+ - --no-tree
+
+ Run checkpatch without the kernel tree.
+
+ - --no-signoff
+
+ Disable the 'Signed-off-by' line check. The sign-off is a simple line at
+ the end of the explanation for the patch, which certifies that you wrote it
+ or otherwise have the right to pass it on as an open-source patch.
+
+ Example::
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+
+ Setting this flag effectively stops a message for a missing signed-off-by
+ line in a patch context.
+
+ - --patch
+
+ Treat FILE as a patch. This is the default option and need not be
+ explicitly specified.
+
+ - --emacs
+
+ Set output to emacs compile window format. This allows emacs users to jump
+ from the error in the compile window directly to the offending line in the
+ patch.
+
+ - --terse
+
+ Output only one line per report.
+
+ - --showfile
+
+ Show the diffed file position instead of the input file position.
+
+ - -g, --git
+
+ Treat FILE as a single commit or a git revision range.
+
+ Single commit with:
+
+ - <rev>
+ - <rev>^
+ - <rev>~n
+
+ Multiple commits with:
+
+ - <rev1>..<rev2>
+ - <rev1>...<rev2>
+ - <rev>-<count>
+
+ - -f, --file
+
+ Treat FILE as a regular source file. This option must be used when running
+ checkpatch on source files in the kernel.
+
+ - --subjective, --strict
+
+ Enable stricter tests in checkpatch. By default the tests emitted as CHECK
+ do not activate by default. Use this flag to activate the CHECK tests.
+
+ - --list-types
+
+ Every message emitted by checkpatch has an associated TYPE. Add this flag
+ to display all the types in checkpatch.
+
+ Note that when this flag is active, checkpatch does not read the input FILE,
+ and no message is emitted. Only a list of types in checkpatch is output.
+
+ - --types TYPE(,TYPE2...)
+
+ Only display messages with the given types.
+
+ Example::
+
+ ./scripts/checkpatch.pl mypatch.patch --types EMAIL_SUBJECT,BRACES
+
+ - --ignore TYPE(,TYPE2...)
+
+ Checkpatch will not emit messages for the specified types.
+
+ Example::
+
+ ./scripts/checkpatch.pl mypatch.patch --ignore EMAIL_SUBJECT,BRACES
+
+ - --show-types
+
+ By default checkpatch doesn't display the type associated with the messages.
+ Set this flag to show the message type in the output.
+
+ - --max-line-length=n
+
+ Set the max line length (default 100). If a line exceeds the specified
+ length, a LONG_LINE message is emitted.
+
+
+ The message level is different for patch and file contexts. For patches,
+ a WARNING is emitted. While a milder CHECK is emitted for files. So for
+ file contexts, the --strict flag must also be enabled.
+
+ - --min-conf-desc-length=n
+
+ Set the Kconfig entry minimum description length, if shorter, warn.
+
+ - --tab-size=n
+
+ Set the number of spaces for tab (default 8).
+
+ - --root=PATH
+
+ PATH to the kernel tree root.
+
+ This option must be specified when invoking checkpatch from outside
+ the kernel root.
+
+ - --no-summary
+
+ Suppress the per file summary.
+
+ - --mailback
+
+ Only produce a report in case of Warnings or Errors. Milder Checks are
+ excluded from this.
+
+ - --summary-file
+
+ Include the filename in summary.
+
+ - --debug KEY=[0|1]
+
+ Turn on/off debugging of KEY, where KEY is one of 'values', 'possible',
+ 'type', and 'attr' (default is all off).
+
+ - --fix
+
+ This is an EXPERIMENTAL feature. If correctable errors exist, a file
+ <inputfile>.EXPERIMENTAL-checkpatch-fixes is created which has the
+ automatically fixable errors corrected.
+
+ - --fix-inplace
+
+ EXPERIMENTAL - Similar to --fix but input file is overwritten with fixes.
+
+ DO NOT USE this flag unless you are absolutely sure and you have a backup
+ in place.
+
+ - --ignore-perl-version
+
+ Override checking of perl version. Runtime errors may be encountered after
+ enabling this flag if the perl version does not meet the minimum specified.
+
+ - --codespell
+
+ Use the codespell dictionary for checking spelling errors.
+
+ - --codespellfile
+
+ Use the specified codespell file.
+ Default is '/usr/share/codespell/dictionary.txt'.
+
+ - --typedefsfile
+
+ Read additional types from this file.
+
+ - --color[=WHEN]
+
+ Use colors 'always', 'never', or only when output is a terminal ('auto').
+ Default is 'auto'.
+
+ - --kconfig-prefix=WORD
+
+ Use WORD as a prefix for Kconfig symbols (default is `CONFIG_`).
+
+ - -h, --help, --version
+
+ Display the help text.
+
+Message Levels
+==============
+
+Messages in checkpatch are divided into three levels. The levels of messages
+in checkpatch denote the severity of the error. They are:
+
+ - ERROR
+
+ This is the most strict level. Messages of type ERROR must be taken
+ seriously as they denote things that are very likely to be wrong.
+
+ - WARNING
+
+ This is the next stricter level. Messages of type WARNING requires a
+ more careful review. But it is milder than an ERROR.
+
+ - CHECK
+
+ This is the mildest level. These are things which may require some thought.
+
+Type Descriptions
+=================
+
+This section contains a description of all the message types in checkpatch.
+
+.. Types in this section are also parsed by checkpatch.
+.. The types are grouped into subsections based on use.
+
+
+Allocation style
+----------------
+
+ **ALLOC_ARRAY_ARGS**
+ The first argument for kcalloc or kmalloc_array should be the
+ number of elements. sizeof() as the first argument is generally
+ wrong.
+
+ See: https://www.kernel.org/doc/html/latest/core-api/memory-allocation.html
+
+ **ALLOC_SIZEOF_STRUCT**
+ The allocation style is bad. In general for family of
+ allocation functions using sizeof() to get memory size,
+ constructs like::
+
+ p = alloc(sizeof(struct foo), ...)
+
+ should be::
+
+ p = alloc(sizeof(*p), ...)
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#allocating-memory
+
+ **ALLOC_WITH_MULTIPLY**
+ Prefer kmalloc_array/kcalloc over kmalloc/kzalloc with a
+ sizeof multiply.
+
+ See: https://www.kernel.org/doc/html/latest/core-api/memory-allocation.html
+
+
+API usage
+---------
+
+ **ARCH_DEFINES**
+ Architecture specific defines should be avoided wherever
+ possible.
+
+ **ARCH_INCLUDE_LINUX**
+ Whenever asm/file.h is included and linux/file.h exists, a
+ conversion can be made when linux/file.h includes asm/file.h.
+ However this is not always the case (See signal.h).
+ This message type is emitted only for includes from arch/.
+
+ **AVOID_BUG**
+ BUG() or BUG_ON() should be avoided totally.
+ Use WARN() and WARN_ON() instead, and handle the "impossible"
+ error condition as gracefully as possible.
+
+ See: https://www.kernel.org/doc/html/latest/process/deprecated.html#bug-and-bug-on
+
+ **CONSIDER_KSTRTO**
+ The simple_strtol(), simple_strtoll(), simple_strtoul(), and
+ simple_strtoull() functions explicitly ignore overflows, which
+ may lead to unexpected results in callers. The respective kstrtol(),
+ kstrtoll(), kstrtoul(), and kstrtoull() functions tend to be the
+ correct replacements.
+
+ See: https://www.kernel.org/doc/html/latest/process/deprecated.html#simple-strtol-simple-strtoll-simple-strtoul-simple-strtoull
+
+ **CONSTANT_CONVERSION**
+ Use of __constant_<foo> form is discouraged for the following functions::
+
+ __constant_cpu_to_be[x]
+ __constant_cpu_to_le[x]
+ __constant_be[x]_to_cpu
+ __constant_le[x]_to_cpu
+ __constant_htons
+ __constant_ntohs
+
+ Using any of these outside of include/uapi/ is not preferred as using the
+ function without __constant_ is identical when the argument is a
+ constant.
+
+ In big endian systems, the macros like __constant_cpu_to_be32(x) and
+ cpu_to_be32(x) expand to the same expression::
+
+ #define __constant_cpu_to_be32(x) ((__force __be32)(__u32)(x))
+ #define __cpu_to_be32(x) ((__force __be32)(__u32)(x))
+
+ In little endian systems, the macros __constant_cpu_to_be32(x) and
+ cpu_to_be32(x) expand to __constant_swab32 and __swab32. __swab32
+ has a __builtin_constant_p check::
+
+ #define __swab32(x) \
+ (__builtin_constant_p((__u32)(x)) ? \
+ ___constant_swab32(x) : \
+ __fswab32(x))
+
+ So ultimately they have a special case for constants.
+ Similar is the case with all of the macros in the list. Thus
+ using the __constant_... forms are unnecessarily verbose and
+ not preferred outside of include/uapi.
+
+ See: https://lore.kernel.org/lkml/1400106425.12666.6.camel@joe-AO725/
+
+ **DEPRECATED_API**
+ Usage of a deprecated RCU API is detected. It is recommended to replace
+ old flavourful RCU APIs by their new vanilla-RCU counterparts.
+
+ The full list of available RCU APIs can be viewed from the kernel docs.
+
+ See: https://www.kernel.org/doc/html/latest/RCU/whatisRCU.html#full-list-of-rcu-apis
+
+ **DEVICE_ATTR_FUNCTIONS**
+ The function names used in DEVICE_ATTR is unusual.
+ Typically, the store and show functions are used with <attr>_store and
+ <attr>_show, where <attr> is a named attribute variable of the device.
+
+ Consider the following examples::
+
+ static DEVICE_ATTR(type, 0444, type_show, NULL);
+ static DEVICE_ATTR(power, 0644, power_show, power_store);
+
+ The function names should preferably follow the above pattern.
+
+ See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+ **DEVICE_ATTR_RO**
+ The DEVICE_ATTR_RO(name) helper macro can be used instead of
+ DEVICE_ATTR(name, 0444, name_show, NULL);
+
+ Note that the macro automatically appends _show to the named
+ attribute variable of the device for the show method.
+
+ See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+ **DEVICE_ATTR_RW**
+ The DEVICE_ATTR_RW(name) helper macro can be used instead of
+ DEVICE_ATTR(name, 0644, name_show, name_store);
+
+ Note that the macro automatically appends _show and _store to the
+ named attribute variable of the device for the show and store methods.
+
+ See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+ **DEVICE_ATTR_WO**
+ The DEVICE_AATR_WO(name) helper macro can be used instead of
+ DEVICE_ATTR(name, 0200, NULL, name_store);
+
+ Note that the macro automatically appends _store to the
+ named attribute variable of the device for the store method.
+
+ See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+ **DUPLICATED_SYSCTL_CONST**
+ Commit d91bff3011cf ("proc/sysctl: add shared variables for range
+ check") added some shared const variables to be used instead of a local
+ copy in each source file.
+
+ Consider replacing the sysctl range checking value with the shared
+ one in include/linux/sysctl.h. The following conversion scheme may
+ be used::
+
+ &zero -> SYSCTL_ZERO
+ &one -> SYSCTL_ONE
+ &int_max -> SYSCTL_INT_MAX
+
+ See:
+
+ 1. https://lore.kernel.org/lkml/20190430180111.10688-1-mcroce@redhat.com/
+ 2. https://lore.kernel.org/lkml/20190531131422.14970-1-mcroce@redhat.com/
+
+ **ENOSYS**
+ ENOSYS means that a nonexistent system call was called.
+ Earlier, it was wrongly used for things like invalid operations on
+ otherwise valid syscalls. This should be avoided in new code.
+
+ See: https://lore.kernel.org/lkml/5eb299021dec23c1a48fa7d9f2c8b794e967766d.1408730669.git.luto@amacapital.net/
+
+ **ENOTSUPP**
+ ENOTSUPP is not a standard error code and should be avoided in new patches.
+ EOPNOTSUPP should be used instead.
+
+ See: https://lore.kernel.org/netdev/20200510182252.GA411829@lunn.ch/
+
+ **EXPORT_SYMBOL**
+ EXPORT_SYMBOL should immediately follow the symbol to be exported.
+
+ **IN_ATOMIC**
+ in_atomic() is not for driver use so any such use is reported as an ERROR.
+ Also in_atomic() is often used to determine if sleeping is permitted,
+ but it is not reliable in this use model. Therefore its use is
+ strongly discouraged.
+
+ However, in_atomic() is ok for core kernel use.
+
+ See: https://lore.kernel.org/lkml/20080320201723.b87b3732.akpm@linux-foundation.org/
+
+ **LOCKDEP**
+ The lockdep_no_validate class was added as a temporary measure to
+ prevent warnings on conversion of device->sem to device->mutex.
+ It should not be used for any other purpose.
+
+ See: https://lore.kernel.org/lkml/1268959062.9440.467.camel@laptop/
+
+ **MALFORMED_INCLUDE**
+ The #include statement has a malformed path. This has happened
+ because the author has included a double slash "//" in the pathname
+ accidentally.
+
+ **USE_LOCKDEP**
+ lockdep_assert_held() annotations should be preferred over
+ assertions based on spin_is_locked()
+
+ See: https://www.kernel.org/doc/html/latest/locking/lockdep-design.html#annotations
+
+ **UAPI_INCLUDE**
+ No #include statements in include/uapi should use a uapi/ path.
+
+ **USLEEP_RANGE**
+ usleep_range() should be preferred over udelay(). The proper way of
+ using usleep_range() is mentioned in the kernel docs.
+
+
+Comments
+--------
+
+ **BLOCK_COMMENT_STYLE**
+ The comment style is incorrect. The preferred style for multi-
+ line comments is::
+
+ /*
+ * This is the preferred style
+ * for multi line comments.
+ */
+
+ The networking comment style is a bit different, with the first line
+ not empty like the former::
+
+ /* This is the preferred comment style
+ * for files in net/ and drivers/net/
+ */
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
+
+ **C99_COMMENTS**
+ C99 style single line comments (//) should not be used.
+ Prefer the block comment style instead.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
+
+ **DATA_RACE**
+ Applications of data_race() should have a comment so as to document the
+ reasoning behind why it was deemed safe.
+
+ See: https://lore.kernel.org/lkml/20200401101714.44781-1-elver@google.com/
+
+ **FSF_MAILING_ADDRESS**
+ Kernel maintainers reject new instances of the GPL boilerplate paragraph
+ directing people to write to the FSF for a copy of the GPL, since the
+ FSF has moved in the past and may do so again.
+ So do not write paragraphs about writing to the Free Software Foundation's
+ mailing address.
+
+ See: https://lore.kernel.org/lkml/20131006222342.GT19510@leaf/
+
+ **UNCOMMENTED_RGMII_MODE**
+ Historically, the RGMII PHY modes specified in Device Trees have been
+ used inconsistently, often referring to the usage of delays on the PHY
+ side rather than describing the board.
+
+ PHY modes "rgmii", "rgmii-rxid" and "rgmii-txid" modes require the clock
+ signal to be delayed on the PCB; this unusual configuration should be
+ described in a comment. If they are not (meaning that the delay is realized
+ internally in the MAC or PHY), "rgmii-id" is the correct PHY mode.
+
+Commit message
+--------------
+
+ **BAD_SIGN_OFF**
+ The signed-off-by line does not fall in line with the standards
+ specified by the community.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#developer-s-certificate-of-origin-1-1
+
+ **BAD_STABLE_ADDRESS_STYLE**
+ The email format for stable is incorrect.
+ Some valid options for stable address are::
+
+ 1. stable@vger.kernel.org
+ 2. stable@kernel.org
+
+ For adding version info, the following comment style should be used::
+
+ stable@vger.kernel.org # version info
+
+ **COMMIT_COMMENT_SYMBOL**
+ Commit log lines starting with a '#' are ignored by git as
+ comments. To solve this problem addition of a single space
+ infront of the log line is enough.
+
+ **COMMIT_MESSAGE**
+ The patch is missing a commit description. A brief
+ description of the changes made by the patch should be added.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
+
+ **EMAIL_SUBJECT**
+ Naming the tool that found the issue is not very useful in the
+ subject line. A good subject line summarizes the change that
+ the patch brings.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
+
+ **FROM_SIGN_OFF_MISMATCH**
+ The author's email does not match with that in the Signed-off-by:
+ line(s). This can be sometimes caused due to an improperly configured
+ email client.
+
+ This message is emitted due to any of the following reasons::
+
+ - The email names do not match.
+ - The email addresses do not match.
+ - The email subaddresses do not match.
+ - The email comments do not match.
+
+ **MISSING_SIGN_OFF**
+ The patch is missing a Signed-off-by line. A signed-off-by
+ line should be added according to Developer's certificate of
+ Origin.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
+
+ **NO_AUTHOR_SIGN_OFF**
+ The author of the patch has not signed off the patch. It is
+ required that a simple sign off line should be present at the
+ end of explanation of the patch to denote that the author has
+ written it or otherwise has the rights to pass it on as an open
+ source patch.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
+
+ **DIFF_IN_COMMIT_MSG**
+ Avoid having diff content in commit message.
+ This causes problems when one tries to apply a file containing both
+ the changelog and the diff because patch(1) tries to apply the diff
+ which it found in the changelog.
+
+ See: https://lore.kernel.org/lkml/20150611134006.9df79a893e3636019ad2759e@linux-foundation.org/
+
+ **GERRIT_CHANGE_ID**
+ To be picked up by gerrit, the footer of the commit message might
+ have a Change-Id like::
+
+ Change-Id: Ic8aaa0728a43936cd4c6e1ed590e01ba8f0fbf5b
+ Signed-off-by: A. U. Thor <author@example.com>
+
+ The Change-Id line must be removed before submitting.
+
+ **GIT_COMMIT_ID**
+ The proper way to reference a commit id is:
+ commit <12+ chars of sha1> ("<title line>")
+
+ An example may be::
+
+ Commit e21d2170f36602ae2708 ("video: remove unnecessary
+ platform_set_drvdata()") removed the unnecessary
+ platform_set_drvdata(), but left the variable "dev" unused,
+ delete it.
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
+
+ **BAD_FIXES_TAG**
+ The Fixes: tag is malformed or does not follow the community conventions.
+ This can occur if the tag have been split into multiple lines (e.g., when
+ pasted in an email program with word wrapping enabled).
+
+ See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
+
+
+Comparison style
+----------------
+
+ **ASSIGN_IN_IF**
+ Do not use assignments in if condition.
+ Example::
+
+ if ((foo = bar(...)) < BAZ) {
+
+ should be written as::
+
+ foo = bar(...);
+ if (foo < BAZ) {
+
+ **BOOL_COMPARISON**
+ Comparisons of A to true and false are better written
+ as A and !A.
+
+ See: https://lore.kernel.org/lkml/1365563834.27174.12.camel@joe-AO722/
+
+ **COMPARISON_TO_NULL**
+ Comparisons to NULL in the form (foo == NULL) or (foo != NULL)
+ are better written as (!foo) and (foo).
+
+ **CONSTANT_COMPARISON**
+ Comparisons with a constant or upper case identifier on the left
+ side of the test should be avoided.
+
+
+Indentation and Line Breaks
+---------------------------
+
+ **CODE_INDENT**
+ Code indent should use tabs instead of spaces.
+ Outside of comments, documentation and Kconfig,
+ spaces are never used for indentation.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
+
+ **DEEP_INDENTATION**
+ Indentation with 6 or more tabs usually indicate overly indented
+ code.
+
+ It is suggested to refactor excessive indentation of
+ if/else/for/do/while/switch statements.
+
+ See: https://lore.kernel.org/lkml/1328311239.21255.24.camel@joe2Laptop/
+
+ **SWITCH_CASE_INDENT_LEVEL**
+ switch should be at the same indent as case.
+ Example::
+
+ switch (suffix) {
+ case 'G':
+ case 'g':
+ mem <<= 30;
+ break;
+ case 'M':
+ case 'm':
+ mem <<= 20;
+ break;
+ case 'K':
+ case 'k':
+ mem <<= 10;
+ fallthrough;
+ default:
+ break;
+ }
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
+
+ **LONG_LINE**
+ The line has exceeded the specified maximum length.
+ To use a different maximum line length, the --max-line-length=n option
+ may be added while invoking checkpatch.
+
+ Earlier, the default line length was 80 columns. Commit bdc48fa11e46
+ ("checkpatch/coding-style: deprecate 80-column warning") increased the
+ limit to 100 columns. This is not a hard limit either and it's
+ preferable to stay within 80 columns whenever possible.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+ **LONG_LINE_STRING**
+ A string starts before but extends beyond the maximum line length.
+ To use a different maximum line length, the --max-line-length=n option
+ may be added while invoking checkpatch.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+ **LONG_LINE_COMMENT**
+ A comment starts before but extends beyond the maximum line length.
+ To use a different maximum line length, the --max-line-length=n option
+ may be added while invoking checkpatch.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+ **SPLIT_STRING**
+ Quoted strings that appear as messages in userspace and can be
+ grepped, should not be split across multiple lines.
+
+ See: https://lore.kernel.org/lkml/20120203052727.GA15035@leaf/
+
+ **MULTILINE_DEREFERENCE**
+ A single dereferencing identifier spanned on multiple lines like::
+
+ struct_identifier->member[index].
+ member = <foo>;
+
+ is generally hard to follow. It can easily lead to typos and so makes
+ the code vulnerable to bugs.
+
+ If fixing the multiple line dereferencing leads to an 80 column
+ violation, then either rewrite the code in a more simple way or if the
+ starting part of the dereferencing identifier is the same and used at
+ multiple places then store it in a temporary variable, and use that
+ temporary variable only at all the places. For example, if there are
+ two dereferencing identifiers::
+
+ member1->member2->member3.foo1;
+ member1->member2->member3.foo2;
+
+ then store the member1->member2->member3 part in a temporary variable.
+ It not only helps to avoid the 80 column violation but also reduces
+ the program size by removing the unnecessary dereferences.
+
+ But if none of the above methods work then ignore the 80 column
+ violation because it is much easier to read a dereferencing identifier
+ on a single line.
+
+ **TRAILING_STATEMENTS**
+ Trailing statements (for example after any conditional) should be
+ on the next line.
+ Statements, such as::
+
+ if (x == y) break;
+
+ should be::
+
+ if (x == y)
+ break;
+
+
+Macros, Attributes and Symbols
+------------------------------
+
+ **ARRAY_SIZE**
+ The ARRAY_SIZE(foo) macro should be preferred over
+ sizeof(foo)/sizeof(foo[0]) for finding number of elements in an
+ array.
+
+ The macro is defined in include/linux/kernel.h::
+
+ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+ **AVOID_EXTERNS**
+ Function prototypes don't need to be declared extern in .h
+ files. It's assumed by the compiler and is unnecessary.
+
+ **AVOID_L_PREFIX**
+ Local symbol names that are prefixed with `.L` should be avoided,
+ as this has special meaning for the assembler; a symbol entry will
+ not be emitted into the symbol table. This can prevent `objtool`
+ from generating correct unwind info.
+
+ Symbols with STB_LOCAL binding may still be used, and `.L` prefixed
+ local symbol names are still generally usable within a function,
+ but `.L` prefixed local symbol names should not be used to denote
+ the beginning or end of code regions via
+ `SYM_CODE_START_LOCAL`/`SYM_CODE_END`
+
+ **BIT_MACRO**
+ Defines like: 1 << <digit> could be BIT(digit).
+ The BIT() macro is defined via include/linux/bits.h::
+
+ #define BIT(nr) (1UL << (nr))
+
+ **CONST_READ_MOSTLY**
+ When a variable is tagged with the __read_mostly annotation, it is a
+ signal to the compiler that accesses to the variable will be mostly
+ reads and rarely(but NOT never) a write.
+
+ const __read_mostly does not make any sense as const data is already
+ read-only. The __read_mostly annotation thus should be removed.
+
+ **DATE_TIME**
+ It is generally desirable that building the same source code with
+ the same set of tools is reproducible, i.e. the output is always
+ exactly the same.
+
+ The kernel does *not* use the ``__DATE__`` and ``__TIME__`` macros,
+ and enables warnings if they are used as they can lead to
+ non-deterministic builds.
+
+ See: https://www.kernel.org/doc/html/latest/kbuild/reproducible-builds.html#timestamps
+
+ **DEFINE_ARCH_HAS**
+ The ARCH_HAS_xyz and ARCH_HAVE_xyz patterns are wrong.
+
+ For big conceptual features use Kconfig symbols instead. And for
+ smaller things where we have compatibility fallback functions but
+ want architectures able to override them with optimized ones, we
+ should either use weak functions (appropriate for some cases), or
+ the symbol that protects them should be the same symbol we use.
+
+ See: https://lore.kernel.org/lkml/CA+55aFycQ9XJvEOsiM3txHL5bjUc8CeKWJNR_H+MiicaddB42Q@mail.gmail.com/
+
+ **DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON**
+ do {} while(0) macros should not have a trailing semicolon.
+
+ **INIT_ATTRIBUTE**
+ Const init definitions should use __initconst instead of
+ __initdata.
+
+ Similarly init definitions without const require a separate
+ use of const.
+
+ **INLINE_LOCATION**
+ The inline keyword should sit between storage class and type.
+
+ For example, the following segment::
+
+ inline static int example_function(void)
+ {
+ ...
+ }
+
+ should be::
+
+ static inline int example_function(void)
+ {
+ ...
+ }
+
+ **MISPLACED_INIT**
+ It is possible to use section markers on variables in a way
+ which gcc doesn't understand (or at least not the way the
+ developer intended)::
+
+ static struct __initdata samsung_pll_clock exynos4_plls[nr_plls] = {
+
+ does not put exynos4_plls in the .initdata section. The __initdata
+ marker can be virtually anywhere on the line, except right after
+ "struct". The preferred location is before the "=" sign if there is
+ one, or before the trailing ";" otherwise.
+
+ See: https://lore.kernel.org/lkml/1377655732.3619.19.camel@joe-AO722/
+
+ **MULTISTATEMENT_MACRO_USE_DO_WHILE**
+ Macros with multiple statements should be enclosed in a
+ do - while block. Same should also be the case for macros
+ starting with `if` to avoid logic defects::
+
+ #define macrofun(a, b, c) \
+ do { \
+ if (a == 5) \
+ do_this(b, c); \
+ } while (0)
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#macros-enums-and-rtl
+
+ **PREFER_FALLTHROUGH**
+ Use the `fallthrough;` pseudo keyword instead of
+ `/* fallthrough */` like comments.
+
+ **TRAILING_SEMICOLON**
+ Macro definition should not end with a semicolon. The macro
+ invocation style should be consistent with function calls.
+ This can prevent any unexpected code paths::
+
+ #define MAC do_something;
+
+ If this macro is used within a if else statement, like::
+
+ if (some_condition)
+ MAC;
+
+ else
+ do_something;
+
+ Then there would be a compilation error, because when the macro is
+ expanded there are two trailing semicolons, so the else branch gets
+ orphaned.
+
+ See: https://lore.kernel.org/lkml/1399671106.2912.21.camel@joe-AO725/
+
+ **MACRO_ARG_UNUSED**
+ If function-like macros do not utilize a parameter, it might result
+ in a build warning. We advocate for utilizing static inline functions
+ to replace such macros.
+ For example, for a macro such as the one below::
+
+ #define test(a) do { } while (0)
+
+ there would be a warning like below::
+
+ WARNING: Argument 'a' is not used in function-like macro.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#macros-enums-and-rtl
+
+ **SINGLE_STATEMENT_DO_WHILE_MACRO**
+ For the multi-statement macros, it is necessary to use the do-while
+ loop to avoid unpredictable code paths. The do-while loop helps to
+ group the multiple statements into a single one so that a
+ function-like macro can be used as a function only.
+
+ But for the single statement macros, it is unnecessary to use the
+ do-while loop. Although the code is syntactically correct but using
+ the do-while loop is redundant. So remove the do-while loop for single
+ statement macros.
+
+ **WEAK_DECLARATION**
+ Using weak declarations like __attribute__((weak)) or __weak
+ can have unintended link defects. Avoid using them.
+
+
+Functions and Variables
+-----------------------
+
+ **CAMELCASE**
+ Avoid CamelCase Identifiers.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#naming
+
+ **CONST_CONST**
+ Using `const <type> const *` is generally meant to be
+ written `const <type> * const`.
+
+ **CONST_STRUCT**
+ Using const is generally a good idea. Checkpatch reads
+ a list of frequently used structs that are always or
+ almost always constant.
+
+ The existing structs list can be viewed from
+ `scripts/const_structs.checkpatch`.
+
+ See: https://lore.kernel.org/lkml/alpine.DEB.2.10.1608281509480.3321@hadrien/
+
+ **EMBEDDED_FUNCTION_NAME**
+ Embedded function names are less appropriate to use as
+ refactoring can cause function renaming. Prefer the use of
+ "%s", __func__ to embedded function names.
+
+ Note that this does not work with -f (--file) checkpatch option
+ as it depends on patch context providing the function name.
+
+ **FUNCTION_ARGUMENTS**
+ This warning is emitted due to any of the following reasons:
+
+ 1. Arguments for the function declaration do not follow
+ the identifier name. Example::
+
+ void foo
+ (int bar, int baz)
+
+ This should be corrected to::
+
+ void foo(int bar, int baz)
+
+ 2. Some arguments for the function definition do not
+ have an identifier name. Example::
+
+ void foo(int)
+
+ All arguments should have identifier names.
+
+ **FUNCTION_WITHOUT_ARGS**
+ Function declarations without arguments like::
+
+ int foo()
+
+ should be::
+
+ int foo(void)
+
+ **GLOBAL_INITIALISERS**
+ Global variables should not be initialized explicitly to
+ 0 (or NULL, false, etc.). Your compiler (or rather your
+ loader, which is responsible for zeroing out the relevant
+ sections) automatically does it for you.
+
+ **INITIALISED_STATIC**
+ Static variables should not be initialized explicitly to zero.
+ Your compiler (or rather your loader) automatically does
+ it for you.
+
+ **MULTIPLE_ASSIGNMENTS**
+ Multiple assignments on a single line makes the code unnecessarily
+ complicated. So on a single line assign value to a single variable
+ only, this makes the code more readable and helps avoid typos.
+
+ **RETURN_PARENTHESES**
+ return is not a function and as such doesn't need parentheses::
+
+ return (bar);
+
+ can simply be::
+
+ return bar;
+
+
+Permissions
+-----------
+
+ **DEVICE_ATTR_PERMS**
+ The permissions used in DEVICE_ATTR are unusual.
+ Typically only three permissions are used - 0644 (RW), 0444 (RO)
+ and 0200 (WO).
+
+ See: https://www.kernel.org/doc/html/latest/filesystems/sysfs.html#attributes
+
+ **EXECUTE_PERMISSIONS**
+ There is no reason for source files to be executable. The executable
+ bit can be removed safely.
+
+ **EXPORTED_WORLD_WRITABLE**
+ Exporting world writable sysfs/debugfs files is usually a bad thing.
+ When done arbitrarily they can introduce serious security bugs.
+ In the past, some of the debugfs vulnerabilities would seemingly allow
+ any local user to write arbitrary values into device registers - a
+ situation from which little good can be expected to emerge.
+
+ See: https://lore.kernel.org/linux-arm-kernel/cover.1296818921.git.segoon@openwall.com/
+
+ **NON_OCTAL_PERMISSIONS**
+ Permission bits should use 4 digit octal permissions (like 0700 or 0444).
+ Avoid using any other base like decimal.
+
+ **SYMBOLIC_PERMS**
+ Permission bits in the octal form are more readable and easier to
+ understand than their symbolic counterparts because many command-line
+ tools use this notation. Experienced kernel developers have been using
+ these traditional Unix permission bits for decades and so they find it
+ easier to understand the octal notation than the symbolic macros.
+ For example, it is harder to read S_IWUSR|S_IRUGO than 0644, which
+ obscures the developer's intent rather than clarifying it.
+
+ See: https://lore.kernel.org/lkml/CA+55aFw5v23T-zvDZp-MmD_EYxF8WbafwwB59934FV7g21uMGQ@mail.gmail.com/
+
+
+Spacing and Brackets
+--------------------
+
+ **ASSIGNMENT_CONTINUATIONS**
+ Assignment operators should not be written at the start of a
+ line but should follow the operand at the previous line.
+
+ **BRACES**
+ The placement of braces is stylistically incorrect.
+ The preferred way is to put the opening brace last on the line,
+ and put the closing brace first::
+
+ if (x is true) {
+ we do y
+ }
+
+ This applies for all non-functional blocks.
+ However, there is one special case, namely functions: they have the
+ opening brace at the beginning of the next line, thus::
+
+ int function(int x)
+ {
+ body of function
+ }
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
+
+ **BRACKET_SPACE**
+ Whitespace before opening bracket '[' is prohibited.
+ There are some exceptions:
+
+ 1. With a type on the left::
+
+ int [] a;
+
+ 2. At the beginning of a line for slice initialisers::
+
+ [0...10] = 5,
+
+ 3. Inside a curly brace::
+
+ = { [0...10] = 5 }
+
+ **CONCATENATED_STRING**
+ Concatenated elements should have a space in between.
+ Example::
+
+ printk(KERN_INFO"bar");
+
+ should be::
+
+ printk(KERN_INFO "bar");
+
+ **ELSE_AFTER_BRACE**
+ `else {` should follow the closing block `}` on the same line.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
+
+ **LINE_SPACING**
+ Vertical space is wasted given the limited number of lines an
+ editor window can display when multiple blank lines are used.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
+
+ **OPEN_BRACE**
+ The opening brace should be following the function definitions on the
+ next line. For any non-functional block it should be on the same line
+ as the last construct.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
+
+ **POINTER_LOCATION**
+ When using pointer data or a function that returns a pointer type,
+ the preferred use of * is adjacent to the data name or function name
+ and not adjacent to the type name.
+ Examples::
+
+ char *linux_banner;
+ unsigned long long memparse(char *ptr, char **retptr);
+ char *match_strdup(substring_t *s);
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
+
+ **SPACING**
+ Whitespace style used in the kernel sources is described in kernel docs.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
+
+ **TRAILING_WHITESPACE**
+ Trailing whitespace should always be removed.
+ Some editors highlight the trailing whitespace and cause visual
+ distractions when editing files.
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
+
+ **UNNECESSARY_PARENTHESES**
+ Parentheses are not required in the following cases:
+
+ 1. Function pointer uses::
+
+ (foo->bar)();
+
+ could be::
+
+ foo->bar();
+
+ 2. Comparisons in if::
+
+ if ((foo->bar) && (foo->baz))
+ if ((foo == bar))
+
+ could be::
+
+ if (foo->bar && foo->baz)
+ if (foo == bar)
+
+ 3. addressof/dereference single Lvalues::
+
+ &(foo->bar)
+ *(foo->bar)
+
+ could be::
+
+ &foo->bar
+ *foo->bar
+
+ **WHILE_AFTER_BRACE**
+ while should follow the closing bracket on the same line::
+
+ do {
+ ...
+ } while(something);
+
+ See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
+
+
+Others
+------
+
+ **CONFIG_DESCRIPTION**
+ Kconfig symbols should have a help text which fully describes
+ it.
+
+ **CORRUPTED_PATCH**
+ The patch seems to be corrupted or lines are wrapped.
+ Please regenerate the patch file before sending it to the maintainer.
+
+ **CVS_KEYWORD**
+ Since linux moved to git, the CVS markers are no longer used.
+ So, CVS style keywords ($Id$, $Revision$, $Log$) should not be
+ added.
+
+ **DEFAULT_NO_BREAK**
+ switch default case is sometimes written as "default:;". This can
+ cause new cases added below default to be defective.
+
+ A "break;" should be added after empty default statement to avoid
+ unwanted fallthrough.
+
+ **DOS_LINE_ENDINGS**
+ For DOS-formatted patches, there are extra ^M symbols at the end of
+ the line. These should be removed.
+
+ **DT_SCHEMA_BINDING_PATCH**
+ DT bindings moved to a json-schema based format instead of
+ freeform text.
+
+ See: https://www.kernel.org/doc/html/latest/devicetree/bindings/writing-schema.html
+
+ **DT_SPLIT_BINDING_PATCH**
+ Devicetree bindings should be their own patch. This is because
+ bindings are logically independent from a driver implementation,
+ they have a different maintainer (even though they often
+ are applied via the same tree), and it makes for a cleaner history in the
+ DT only tree created with git-filter-branch.
+
+ See: https://www.kernel.org/doc/html/latest/devicetree/bindings/submitting-patches.html#i-for-patch-submitters
+
+ **EMBEDDED_FILENAME**
+ Embedding the complete filename path inside the file isn't particularly
+ useful as often the path is moved around and becomes incorrect.
+
+ **FILE_PATH_CHANGES**
+ Whenever files are added, moved, or deleted, the MAINTAINERS file
+ patterns can be out of sync or outdated.
+
+ So MAINTAINERS might need updating in these cases.
+
+ **MEMSET**
+ The memset use appears to be incorrect. This may be caused due to
+ badly ordered parameters. Please recheck the usage.
+
+ **NOT_UNIFIED_DIFF**
+ The patch file does not appear to be in unified-diff format. Please
+ regenerate the patch file before sending it to the maintainer.
+
+ **PRINTF_0XDECIMAL**
+ Prefixing 0x with decimal output is defective and should be corrected.
+
+ **SPDX_LICENSE_TAG**
+ The source file is missing or has an improper SPDX identifier tag.
+ The Linux kernel requires the precise SPDX identifier in all source files,
+ and it is thoroughly documented in the kernel docs.
+
+ See: https://www.kernel.org/doc/html/latest/process/license-rules.html
+
+ **TYPO_SPELLING**
+ Some words may have been misspelled. Consider reviewing them.
diff --git a/Documentation/dev-tools/checkuapi.rst b/Documentation/dev-tools/checkuapi.rst
new file mode 100644
index 000000000000..9072f21b50b0
--- /dev/null
+++ b/Documentation/dev-tools/checkuapi.rst
@@ -0,0 +1,477 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+============
+UAPI Checker
+============
+
+The UAPI checker (``scripts/check-uapi.sh``) is a shell script which
+checks UAPI header files for userspace backwards-compatibility across
+the git tree.
+
+Options
+=======
+
+This section will describe the options with which ``check-uapi.sh``
+can be run.
+
+Usage::
+
+ check-uapi.sh [-b BASE_REF] [-p PAST_REF] [-j N] [-l ERROR_LOG] [-i] [-q] [-v]
+
+Available options::
+
+ -b BASE_REF Base git reference to use for comparison. If unspecified or empty,
+ will use any dirty changes in tree to UAPI files. If there are no
+ dirty changes, HEAD will be used.
+ -p PAST_REF Compare BASE_REF to PAST_REF (e.g. -p v6.1). If unspecified or empty,
+ will use BASE_REF^1. Must be an ancestor of BASE_REF. Only headers
+ that exist on PAST_REF will be checked for compatibility.
+ -j JOBS Number of checks to run in parallel (default: number of CPU cores).
+ -l ERROR_LOG Write error log to file (default: no error log is generated).
+ -i Ignore ambiguous changes that may or may not break UAPI compatibility.
+ -q Quiet operation.
+ -v Verbose operation (print more information about each header being checked).
+
+Environmental args::
+
+ ABIDIFF Custom path to abidiff binary
+ CC C compiler (default is "gcc")
+ ARCH Target architecture of C compiler (default is host arch)
+
+Exit codes::
+
+ 0) Success
+ 1) ABI difference detected
+ 2) Prerequisite not met
+
+Examples
+========
+
+Basic Usage
+-----------
+
+First, let's try making a change to a UAPI header file that obviously
+won't break userspace::
+
+ cat << 'EOF' | patch -l -p1
+ --- a/include/uapi/linux/acct.h
+ +++ b/include/uapi/linux/acct.h
+ @@ -21,7 +21,9 @@
+ #include <asm/param.h>
+ #include <asm/byteorder.h>
+
+ -/*
+ +#define FOO
+ +
+ +/*
+ * comp_t is a 16-bit "floating" point number with a 3-bit base 8
+ * exponent and a 13-bit fraction.
+ * comp2_t is 24-bit with 5-bit base 2 exponent and 20 bit fraction
+ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+ EOF
+
+Now, let's use the script to validate::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+Let's add another change that *might* break userspace::
+
+ cat << 'EOF' | patch -l -p1
+ --- a/include/uapi/linux/bpf.h
+ +++ b/include/uapi/linux/bpf.h
+ @@ -74,7 +74,7 @@ struct bpf_insn {
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ - __s32 imm; /* signed immediate constant */
+ + __u32 imm; /* unsigned immediate constant */
+ };
+
+ /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+ EOF
+
+The script will catch this::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ====
+ [C] 'struct bpf_insn' changed:
+ type size hasn't changed
+ 1 data member change:
+ type of '__s32 imm' changed:
+ typedef name changed from __s32 to __u32 at int-ll64.h:27:1
+ underlying type 'int' changed:
+ type name changed from 'int' to 'unsigned int'
+ type size hasn't changed
+ ==================================================================================
+
+ error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+In this case, the script is reporting the type change because it could
+break a userspace program that passes in a negative number. Now, let's
+say you know that no userspace program could possibly be using a negative
+value in ``imm``, so changing to an unsigned type there shouldn't hurt
+anything. You can pass the ``-i`` flag to the script to ignore changes
+in which the userspace backwards compatibility is ambiguous::
+
+ % ./scripts/check-uapi.sh -i
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+Now, let's make a similar change that *will* break userspace::
+
+ cat << 'EOF' | patch -l -p1
+ --- a/include/uapi/linux/bpf.h
+ +++ b/include/uapi/linux/bpf.h
+ @@ -71,8 +71,8 @@ enum {
+
+ struct bpf_insn {
+ __u8 code; /* opcode */
+ - __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ + __u8 dst_reg:4; /* dest register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+ };
+ EOF
+
+Since we're re-ordering an existing struct member, there's no ambiguity,
+and the script will report the breakage even if you pass ``-i``::
+
+ % ./scripts/check-uapi.sh -i
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ====
+ [C] 'struct bpf_insn' changed:
+ type size hasn't changed
+ 2 data member changes:
+ '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits)
+ '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits)
+ ==================================================================================
+
+ error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Let's commit the breaking change, then commit the innocuous change::
+
+ % git commit -m 'Breaking UAPI change' include/uapi/linux/bpf.h
+ [detached HEAD f758e574663a] Breaking UAPI change
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+ % git commit -m 'Innocuous UAPI change' include/uapi/linux/acct.h
+ [detached HEAD 2e87df769081] Innocuous UAPI change
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+Now, let's run the script again with no arguments::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from HEAD... OK
+ Installing user-facing UAPI headers from HEAD^1... OK
+ Checking changes to UAPI headers between HEAD^1 and HEAD...
+ All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+It doesn't catch any breaking change because, by default, it only
+compares ``HEAD`` to ``HEAD^1``. The breaking change was committed on
+``HEAD~2``. If we wanted the search scope to go back further, we'd have to
+use the ``-p`` option to pass a different past reference. In this case,
+let's pass ``-p HEAD~2`` to the script so it checks UAPI changes between
+``HEAD~2`` and ``HEAD``::
+
+ % ./scripts/check-uapi.sh -p HEAD~2
+ Installing user-facing UAPI headers from HEAD... OK
+ Installing user-facing UAPI headers from HEAD~2... OK
+ Checking changes to UAPI headers between HEAD~2 and HEAD...
+ ==== ABI differences detected in include/linux/bpf.h from HEAD~2 -> HEAD ====
+ [C] 'struct bpf_insn' changed:
+ type size hasn't changed
+ 2 data member changes:
+ '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits)
+ '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits)
+ ==============================================================================
+
+ error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Alternatively, we could have also run with ``-b HEAD~``. This would set the
+base reference to ``HEAD~`` so then the script would compare it to ``HEAD~^1``.
+
+Architecture-specific Headers
+-----------------------------
+
+Consider this change::
+
+ cat << 'EOF' | patch -l -p1
+ --- a/arch/arm64/include/uapi/asm/sigcontext.h
+ +++ b/arch/arm64/include/uapi/asm/sigcontext.h
+ @@ -70,6 +70,7 @@ struct sigcontext {
+ struct _aarch64_ctx {
+ __u32 magic;
+ __u32 size;
+ + __u32 new_var;
+ };
+
+ #define FPSIMD_MAGIC 0x46508001
+ EOF
+
+This is a change to an arm64-specific UAPI header file. In this example, I'm
+running the script from an x86 machine with an x86 compiler, so, by default,
+the script only checks x86-compatible UAPI header files::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ No changes to UAPI headers were applied between HEAD and dirty tree
+
+With an x86 compiler, we can't check header files in ``arch/arm64``, so the
+script doesn't even try.
+
+If we want to check the header file, we'll have to use an arm64 compiler and
+set ``ARCH`` accordingly::
+
+ % CC=aarch64-linux-gnu-gcc ARCH=arm64 ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ ==== ABI differences detected in include/asm/sigcontext.h from HEAD -> dirty tree ====
+ [C] 'struct _aarch64_ctx' changed:
+ type size changed from 64 to 96 (in bits)
+ 1 data member insertion:
+ '__u32 new_var', at offset 64 (in bits) at sigcontext.h:73:1
+ -- snip --
+ [C] 'struct zt_context' changed:
+ type size changed from 128 to 160 (in bits)
+ 2 data member changes (1 filtered):
+ '__u16 nregs' offset changed from 64 to 96 (in bits) (by +32 bits)
+ '__u16 __reserved[3]' offset changed from 80 to 112 (in bits) (by +32 bits)
+ =======================================================================================
+
+ error - 1/884 UAPI headers compatible with arm64 appear _not_ to be backwards compatible
+
+We can see with ``ARCH`` and ``CC`` set properly for the file, the ABI
+change is reported properly. Also notice that the total number of UAPI
+header files checked by the script changes. This is because the number
+of headers installed for arm64 platforms is different than x86.
+
+Cross-Dependency Breakages
+--------------------------
+
+Consider this change::
+
+ cat << 'EOF' | patch -l -p1
+ --- a/include/uapi/linux/types.h
+ +++ b/include/uapi/linux/types.h
+ @@ -52,7 +52,7 @@ typedef __u32 __bitwise __wsum;
+ #define __aligned_be64 __be64 __attribute__((aligned(8)))
+ #define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+ -typedef unsigned __bitwise __poll_t;
+ +typedef unsigned short __bitwise __poll_t;
+
+ #endif /* __ASSEMBLY__ */
+ #endif /* _UAPI_LINUX_TYPES_H */
+ EOF
+
+Here, we're changing a ``typedef`` in ``types.h``. This doesn't break
+a UAPI in ``types.h``, but other UAPIs in the tree may break due to
+this change::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ ==== ABI differences detected in include/linux/eventpoll.h from HEAD -> dirty tree ====
+ [C] 'struct epoll_event' changed:
+ type size changed from 96 to 80 (in bits)
+ 2 data member changes:
+ type of '__poll_t events' changed:
+ underlying type 'unsigned int' changed:
+ type name changed from 'unsigned int' to 'unsigned short int'
+ type size changed from 32 to 16 (in bits)
+ '__u64 data' offset changed from 32 to 16 (in bits) (by -16 bits)
+ ========================================================================================
+ include/linux/eventpoll.h did not change between HEAD and dirty tree...
+ It's possible a change to one of the headers it includes caused this error:
+ #include <linux/fcntl.h>
+ #include <linux/types.h>
+
+Note that the script noticed the failing header file did not change,
+so it assumes one of its includes must have caused the breakage. Indeed,
+we can see ``linux/types.h`` is used from ``eventpoll.h``.
+
+UAPI Header Removals
+--------------------
+
+Consider this change::
+
+ cat << 'EOF' | patch -l -p1
+ diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
+ index ebb180aac74e..a9c88b0a8b3b 100644
+ --- a/include/uapi/asm-generic/Kbuild
+ +++ b/include/uapi/asm-generic/Kbuild
+ @@ -31,6 +31,6 @@ mandatory-y += stat.h
+ mandatory-y += statfs.h
+ mandatory-y += swab.h
+ mandatory-y += termbits.h
+ -mandatory-y += termios.h
+ +#mandatory-y += termios.h
+ mandatory-y += types.h
+ mandatory-y += unistd.h
+ EOF
+
+This script removes a UAPI header file from the install list. Let's run
+the script::
+
+ % ./scripts/check-uapi.sh
+ Installing user-facing UAPI headers from dirty tree... OK
+ Installing user-facing UAPI headers from HEAD... OK
+ Checking changes to UAPI headers between HEAD and dirty tree...
+ ==== UAPI header include/asm/termios.h was removed between HEAD and dirty tree ====
+
+ error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Removing a UAPI header is considered a breaking change, and the script
+will flag it as such.
+
+Checking Historic UAPI Compatibility
+------------------------------------
+
+You can use the ``-b`` and ``-p`` options to examine different chunks of your
+git tree. For example, to check all changed UAPI header files between tags
+v6.0 and v6.1, you'd run::
+
+ % ./scripts/check-uapi.sh -b v6.1 -p v6.0
+ Installing user-facing UAPI headers from v6.1... OK
+ Installing user-facing UAPI headers from v6.0... OK
+ Checking changes to UAPI headers between v6.0 and v6.1...
+
+ --- snip ---
+ error - 37/907 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Note: Before v5.3, a header file needed by the script is not present,
+so the script is unable to check changes before then.
+
+You'll notice that the script detected many UAPI changes that are not
+backwards compatible. Knowing that kernel UAPIs are supposed to be stable
+forever, this is an alarming result. This brings us to the next section:
+caveats.
+
+Caveats
+=======
+
+The UAPI checker makes no assumptions about the author's intention, so some
+types of changes may be flagged even though they intentionally break UAPI.
+
+Removals For Refactoring or Deprecation
+---------------------------------------
+
+Sometimes drivers for very old hardware are removed, such as in this example::
+
+ % ./scripts/check-uapi.sh -b ba47652ba655
+ Installing user-facing UAPI headers from ba47652ba655... OK
+ Installing user-facing UAPI headers from ba47652ba655^1... OK
+ Checking changes to UAPI headers between ba47652ba655^1 and ba47652ba655...
+ ==== UAPI header include/linux/meye.h was removed between ba47652ba655^1 and ba47652ba655 ====
+
+ error - 1/910 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+The script will always flag removals (even if they're intentional).
+
+Struct Expansions
+-----------------
+
+Depending on how a structure is handled in kernelspace, a change which
+expands a struct could be non-breaking.
+
+If a struct is used as the argument to an ioctl, then the kernel driver
+must be able to handle ioctl commands of any size. Beyond that, you need
+to be careful when copying data from the user. Say, for example, that
+``struct foo`` is changed like this::
+
+ struct foo {
+ __u64 a; /* added in version 1 */
+ + __u32 b; /* added in version 2 */
+ + __u32 c; /* added in version 2 */
+ }
+
+By default, the script will flag this kind of change for further review::
+
+ [C] 'struct foo' changed:
+ type size changed from 64 to 128 (in bits)
+ 2 data member insertions:
+ '__u32 b', at offset 64 (in bits)
+ '__u32 c', at offset 96 (in bits)
+
+However, it is possible that this change was made safely.
+
+If a userspace program was built with version 1, it will think
+``sizeof(struct foo)`` is 8. That size will be encoded in the
+ioctl value that gets sent to the kernel. If the kernel is built
+with version 2, it will think the ``sizeof(struct foo)`` is 16.
+
+The kernel can use the ``_IOC_SIZE`` macro to get the size encoded
+in the ioctl code that the user passed in and then use
+``copy_struct_from_user()`` to safely copy the value::
+
+ int handle_ioctl(unsigned long cmd, unsigned long arg)
+ {
+ switch _IOC_NR(cmd) {
+ 0x01: {
+ struct foo my_cmd; /* size 16 in the kernel */
+
+ ret = copy_struct_from_user(&my_cmd, arg, sizeof(struct foo), _IOC_SIZE(cmd));
+ ...
+
+``copy_struct_from_user`` will zero the struct in the kernel and then copy
+only the bytes passed in from the user (leaving new members zeroized).
+If the user passed in a larger struct, the extra members are ignored.
+
+If you know this situation is accounted for in the kernel code, you can
+pass ``-i`` to the script, and struct expansions like this will be ignored.
+
+Flex Array Migration
+--------------------
+
+While the script handles expansion into an existing flex array, it does
+still flag initial migration to flex arrays from 1-element fake flex
+arrays. For example::
+
+ struct foo {
+ __u32 x;
+ - __u32 flex[1]; /* fake flex */
+ + __u32 flex[]; /* real flex */
+ };
+
+This change would be flagged by the script::
+
+ [C] 'struct foo' changed:
+ type size changed from 64 to 32 (in bits)
+ 1 data member change:
+ type of '__u32 flex[1]' changed:
+ type name changed from '__u32[1]' to '__u32[]'
+ array type size changed from 32 to 'unknown'
+ array type subrange 1 changed length from 1 to 'unknown'
+
+At this time, there's no way to filter these types of changes, so be
+aware of this possible false positive.
+
+Summary
+-------
+
+While many types of false positives are filtered out by the script,
+it's possible there are some cases where the script flags a change
+which does not break UAPI. It's also possible a change which *does*
+break userspace would not be flagged by this script. While the script
+has been run on much of the kernel history, there could still be corner
+cases that are not accounted for.
+
+The intention is for this script to be used as a quick check for
+maintainers or automated tooling, not as the end-all authority on
+patch compatibility. It's best to remember: use your best judgment
+(and ideally a unit test in userspace) to make sure your UAPI changes
+are backwards-compatible!
diff --git a/Documentation/dev-tools/clang-format.rst b/Documentation/dev-tools/clang-format.rst
new file mode 100644
index 000000000000..1d089a847c1b
--- /dev/null
+++ b/Documentation/dev-tools/clang-format.rst
@@ -0,0 +1,184 @@
+.. _clangformat:
+
+clang-format
+============
+
+``clang-format`` is a tool to format C/C++/... code according to
+a set of rules and heuristics. Like most tools, it is not perfect
+nor covers every single case, but it is good enough to be helpful.
+
+``clang-format`` can be used for several purposes:
+
+ - Quickly reformat a block of code to the kernel style. Specially useful
+ when moving code around and aligning/sorting. See clangformatreformat_.
+
+ - Spot style mistakes, typos and possible improvements in files
+ you maintain, patches you review, diffs, etc. See clangformatreview_.
+
+ - Help you follow the coding style rules, specially useful for those
+ new to kernel development or working at the same time in several
+ projects with different coding styles.
+
+Its configuration file is ``.clang-format`` in the root of the kernel tree.
+The rules contained there try to approximate the most common kernel
+coding style. They also try to follow :ref:`Documentation/process/coding-style.rst <codingstyle>`
+as much as possible. Since not all the kernel follows the same style,
+it is possible that you may want to tweak the defaults for a particular
+subsystem or folder. To do so, you can override the defaults by writing
+another ``.clang-format`` file in a subfolder.
+
+The tool itself has already been included in the repositories of popular
+Linux distributions for a long time. Search for ``clang-format`` in
+your repositories. Otherwise, you can either download pre-built
+LLVM/clang binaries or build the source code from:
+
+ https://releases.llvm.org/download.html
+
+See more information about the tool at:
+
+ https://clang.llvm.org/docs/ClangFormat.html
+
+ https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+
+.. _clangformatreview:
+
+Review files and patches for coding style
+-----------------------------------------
+
+By running the tool in its inline mode, you can review full subsystems,
+folders or individual files for code style mistakes, typos or improvements.
+
+To do so, you can run something like::
+
+ # Make sure your working directory is clean!
+ clang-format -i kernel/*.[ch]
+
+And then take a look at the git diff.
+
+Counting the lines of such a diff is also useful for improving/tweaking
+the style options in the configuration file; as well as testing new
+``clang-format`` features/versions.
+
+``clang-format`` also supports reading unified diffs, so you can review
+patches and git diffs easily. See the documentation at:
+
+ https://clang.llvm.org/docs/ClangFormat.html#script-for-patch-reformatting
+
+To avoid ``clang-format`` formatting some portion of a file, you can do::
+
+ int formatted_code;
+ // clang-format off
+ void unformatted_code ;
+ // clang-format on
+ void formatted_code_again;
+
+While it might be tempting to use this to keep a file always in sync with
+``clang-format``, specially if you are writing new files or if you are
+a maintainer, please note that people might be running different
+``clang-format`` versions or not have it available at all. Therefore,
+you should probably refrain yourself from using this in kernel sources;
+at least until we see if ``clang-format`` becomes commonplace.
+
+
+.. _clangformatreformat:
+
+Reformatting blocks of code
+---------------------------
+
+By using an integration with your text editor, you can reformat arbitrary
+blocks (selections) of code with a single keystroke. This is specially
+useful when moving code around, for complex code that is deeply intended,
+for multi-line macros (and aligning their backslashes), etc.
+
+Remember that you can always tweak the changes afterwards in those cases
+where the tool did not do an optimal job. But as a first approximation,
+it can be very useful.
+
+There are integrations for many popular text editors. For some of them,
+like vim, emacs, BBEdit and Visual Studio you can find support built-in.
+For instructions, read the appropriate section at:
+
+ https://clang.llvm.org/docs/ClangFormat.html
+
+For Atom, Eclipse, Sublime Text, Visual Studio Code, XCode and other
+editors and IDEs you should be able to find ready-to-use plugins.
+
+For this use case, consider using a secondary ``.clang-format``
+so that you can tweak a few options. See clangformatextra_.
+
+
+.. _clangformatmissing:
+
+Missing support
+---------------
+
+``clang-format`` is missing support for some things that are common
+in kernel code. They are easy to remember, so if you use the tool
+regularly, you will quickly learn to avoid/ignore those.
+
+In particular, some very common ones you will notice are:
+
+ - Aligned blocks of one-line ``#defines``, e.g.::
+
+ #define TRACING_MAP_BITS_DEFAULT 11
+ #define TRACING_MAP_BITS_MAX 17
+ #define TRACING_MAP_BITS_MIN 7
+
+ vs.::
+
+ #define TRACING_MAP_BITS_DEFAULT 11
+ #define TRACING_MAP_BITS_MAX 17
+ #define TRACING_MAP_BITS_MIN 7
+
+ - Aligned designated initializers, e.g.::
+
+ static const struct file_operations uprobe_events_ops = {
+ .owner = THIS_MODULE,
+ .open = probes_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = probes_write,
+ };
+
+ vs.::
+
+ static const struct file_operations uprobe_events_ops = {
+ .owner = THIS_MODULE,
+ .open = probes_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = probes_write,
+ };
+
+
+.. _clangformatextra:
+
+Extra features/options
+----------------------
+
+Some features/style options are not enabled by default in the configuration
+file in order to minimize the differences between the output and the current
+code. In other words, to make the difference as small as possible,
+which makes reviewing full-file style, as well diffs and patches as easy
+as possible.
+
+In other cases (e.g. particular subsystems/folders/files), the kernel style
+might be different and enabling some of these options may approximate
+better the style there.
+
+For instance:
+
+ - Aligning assignments (``AlignConsecutiveAssignments``).
+
+ - Aligning declarations (``AlignConsecutiveDeclarations``).
+
+ - Reflowing text in comments (``ReflowComments``).
+
+ - Sorting ``#includes`` (``SortIncludes``).
+
+They are typically useful for block re-formatting, rather than full-file.
+You might want to create another ``.clang-format`` file and use that one
+from your editor/IDE instead.
diff --git a/Documentation/dev-tools/coccinelle.rst b/Documentation/dev-tools/coccinelle.rst
index aa14f05cabb1..6e70a1e9a3c0 100644
--- a/Documentation/dev-tools/coccinelle.rst
+++ b/Documentation/dev-tools/coccinelle.rst
@@ -4,6 +4,8 @@
.. highlight:: none
+.. _devtools_coccinelle:
+
Coccinelle
==========
@@ -12,7 +14,7 @@ many uses in kernel development, including the application of complex,
tree-wide patches and detection of problematic programming patterns.
Getting Coccinelle
--------------------
+------------------
The semantic patches included in the kernel use features and options
which are provided by Coccinelle version 1.0.0-rc11 and above.
@@ -54,7 +56,7 @@ found at:
https://github.com/coccinelle/coccinelle/blob/master/install.txt
Supplemental documentation
----------------------------
+--------------------------
For supplemental documentation refer to the wiki:
@@ -64,7 +66,7 @@ The wiki documentation always refers to the linux-next version of the script.
For Semantic Patch Language(SmPL) grammar documentation refer to:
-http://coccinelle.lip6.fr/documentation.php
+https://coccinelle.gitlabpages.inria.fr/website/docs/main_grammar.html
Using Coccinelle on the Linux kernel
------------------------------------
@@ -83,7 +85,7 @@ Four basic modes are defined: ``patch``, ``report``, ``context``, and
file:line:column-column: message
- ``context`` highlights lines of interest and their context in a
- diff-like style.Lines of interest are indicated with ``-``.
+ diff-like style. Lines of interest are indicated with ``-``.
- ``org`` generates a report in the Org mode format of Emacs.
@@ -117,7 +119,7 @@ For each semantic patch, a commit message is proposed. It gives a
description of the problem being checked by the semantic patch, and
includes a reference to Coccinelle.
-As any static code analyzer, Coccinelle produces false
+As with any static code analyzer, Coccinelle produces false
positives. Thus, reports must be carefully checked, and patches
reviewed.
@@ -126,25 +128,25 @@ To enable verbose messages set the V= variable, for example::
make coccicheck MODE=report V=1
Coccinelle parallelization
----------------------------
+--------------------------
By default, coccicheck tries to run as parallel as possible. To change
the parallelism, set the J= variable. For example, to run across 4 CPUs::
make coccicheck MODE=report J=4
-As of Coccinelle 1.0.2 Coccinelle uses Ocaml parmap for parallelization,
+As of Coccinelle 1.0.2 Coccinelle uses Ocaml parmap for parallelization;
if support for this is detected you will benefit from parmap parallelization.
When parmap is enabled coccicheck will enable dynamic load balancing by using
-``--chunksize 1`` argument, this ensures we keep feeding threads with work
+``--chunksize 1`` argument. This ensures we keep feeding threads with work
one by one, so that we avoid the situation where most work gets done by only
a few threads. With dynamic load balancing, if a thread finishes early we keep
feeding it more work.
When parmap is enabled, if an error occurs in Coccinelle, this error
-value is propagated back, the return value of the ``make coccicheck``
-captures this return value.
+value is propagated back, and the return value of the ``make coccicheck``
+command captures this return value.
Using Coccinelle with a single semantic patch
---------------------------------------------
@@ -173,15 +175,22 @@ For example, to check drivers/net/wireless/ one may write::
make coccicheck M=drivers/net/wireless/
To apply Coccinelle on a file basis, instead of a directory basis, the
-following command may be used::
+C variable is used by the makefile to select which files to work with.
+This variable can be used to run scripts for the entire kernel, a
+specific directory, or for a single file.
+
+For example, to check drivers/bluetooth/bfusb.c, the value 1 is
+passed to the C variable to check files that make considers
+need to be compiled.::
- make C=1 CHECK="scripts/coccicheck"
+ make C=1 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o
-To check only newly edited code, use the value 2 for the C flag, i.e.::
+The value 2 is passed to the C variable to check files regardless of
+whether they need to be compiled or not.::
- make C=2 CHECK="scripts/coccicheck"
+ make C=2 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o
-In these modes, which works on a file basis, there is no information
+In these modes, which work on a file basis, there is no information
about semantic patches displayed, and no commit message proposed.
This runs every semantic patch in scripts/coccinelle by default. The
@@ -196,12 +205,12 @@ Debugging Coccinelle SmPL patches
Using coccicheck is best as it provides in the spatch command line
include options matching the options used when we compile the kernel.
-You can learn what these options are by using V=1, you could then
+You can learn what these options are by using V=1; you could then
manually run Coccinelle with debug options added.
Alternatively you can debug running Coccinelle against SmPL patches
-by asking for stderr to be redirected to stderr, by default stderr
-is redirected to /dev/null, if you'd like to capture stderr you
+by asking for stderr to be redirected to stderr. By default stderr
+is redirected to /dev/null; if you'd like to capture stderr you
can specify the ``DEBUG_FILE="file.txt"`` option to coccicheck. For
instance::
@@ -209,62 +218,61 @@ instance::
make coccicheck COCCI=scripts/coccinelle/free/kfree.cocci MODE=report DEBUG_FILE=cocci.err
cat cocci.err
-You can use SPFLAGS to add debugging flags, for instance you may want to
-add both --profile --show-trying to SPFLAGS when debugging. For instance
+You can use SPFLAGS to add debugging flags; for instance you may want to
+add both ``--profile --show-trying`` to SPFLAGS when debugging. For example
you may want to use::
rm -f err.log
export COCCI=scripts/coccinelle/misc/irqf_oneshot.cocci
- make coccicheck DEBUG_FILE="err.log" MODE=report SPFLAGS="--profile --show-trying" M=./drivers/mfd/arizona-irq.c
+ make coccicheck DEBUG_FILE="err.log" MODE=report SPFLAGS="--profile --show-trying" M=./drivers/mfd
err.log will now have the profiling information, while stdout will
provide some progress information as Coccinelle moves forward with
work.
+NOTE:
+
DEBUG_FILE support is only supported when using coccinelle >= 1.0.2.
+Currently, DEBUG_FILE support is only available to check folders, and
+not single files. This is because checking a single file requires spatch
+to be called twice leading to DEBUG_FILE being set both times to the same value,
+giving rise to an error.
+
.cocciconfig support
--------------------
Coccinelle supports reading .cocciconfig for default Coccinelle options that
-should be used every time spatch is spawned, the order of precedence for
+should be used every time spatch is spawned. The order of precedence for
variables for .cocciconfig is as follows:
- Your current user's home directory is processed first
- Your directory from which spatch is called is processed next
-- The directory provided with the --dir option is processed last, if used
-
-Since coccicheck runs through make, it naturally runs from the kernel
-proper dir, as such the second rule above would be implied for picking up a
-.cocciconfig when using ``make coccicheck``.
+- The directory provided with the ``--dir`` option is processed last, if used
``make coccicheck`` also supports using M= targets. If you do not supply
any M= target, it is assumed you want to target the entire kernel.
The kernel coccicheck script has::
- if [ "$KBUILD_EXTMOD" = "" ] ; then
- OPTIONS="--dir $srctree $COCCIINCLUDE"
- else
- OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE"
- fi
+ OPTIONS="--dir $srcroot $COCCIINCLUDE"
-KBUILD_EXTMOD is set when an explicit target with M= is used. For both cases
-the spatch --dir argument is used, as such third rule applies when whether M=
-is used or not, and when M= is used the target directory can have its own
-.cocciconfig file. When M= is not passed as an argument to coccicheck the
-target directory is the same as the directory from where spatch was called.
+Here, $srcroot refers to the source directory of the target: it points to the
+external module's source directory when M= used, and otherwise, to the kernel
+source directory. The third rule ensures the spatch reads the .cocciconfig from
+the target directory, allowing external modules to have their own .cocciconfig
+file.
If not using the kernel's coccicheck target, keep the above precedence
order logic of .cocciconfig reading. If using the kernel's coccicheck target,
override any of the kernel's .coccicheck's settings using SPFLAGS.
-We help Coccinelle when used against Linux with a set of sensible defaults
+We help Coccinelle when used against Linux with a set of sensible default
options for Linux with our own Linux .cocciconfig. This hints to coccinelle
-git can be used for ``git grep`` queries over coccigrep. A timeout of 200
+that git can be used for ``git grep`` queries over coccigrep. A timeout of 200
seconds should suffice for now.
The options picked up by coccinelle when reading a .cocciconfig do not appear
-as arguments to spatch processes running on your system, to confirm what
+as arguments to spatch processes running on your system. To confirm what
options will be used by Coccinelle run::
spatch --print-options-only
@@ -288,7 +296,7 @@ given to it when options are in conflict. ::
Coccinelle supports idutils as well but requires coccinelle >= 1.0.6.
When no ID file is specified coccinelle assumes your ID database file
-is in the file .id-utils.index on the top level of the kernel, coccinelle
+is in the file .id-utils.index on the top level of the kernel. Coccinelle
carries a script scripts/idutils_index.sh which creates the database with::
mkid -i C --output .id-utils.index
@@ -315,7 +323,7 @@ SmPL patch specific options
---------------------------
SmPL patches can have their own requirements for options passed
-to Coccinelle. SmPL patch specific options can be provided by
+to Coccinelle. SmPL patch-specific options can be provided by
providing them at the top of the SmPL patch, for instance::
// Options: --no-includes --include-headers
@@ -325,13 +333,13 @@ SmPL patch Coccinelle requirements
As Coccinelle features get added some more advanced SmPL patches
may require newer versions of Coccinelle. If an SmPL patch requires
-at least a version of Coccinelle, this can be specified as follows,
+a minimum version of Coccinelle, this can be specified as follows,
as an example if requiring at least Coccinelle >= 1.0.5::
// Requires: 1.0.5
Proposing new semantic patches
--------------------------------
+------------------------------
New semantic patches can be proposed and submitted by kernel
developers. For sake of clarity, they should be organized in the
diff --git a/Documentation/dev-tools/conf.py b/Documentation/dev-tools/conf.py
deleted file mode 100644
index 7faafa3f7888..000000000000
--- a/Documentation/dev-tools/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Development tools for the kernel"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'dev-tools.tex', project,
- 'The kernel development community', 'manual'),
-]
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
index 69a7d90c320a..075df6a4598d 100644
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -22,8 +22,8 @@ Possible uses:
* minimizing kernel configurations (do I need this option if the
associated code is never run?)
-.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
-.. _lcov: http://ltp.sourceforge.net/coverage/lcov.php
+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _lcov: https://github.com/linux-test-project/lcov
Preparation
@@ -34,10 +34,6 @@ Configure the kernel with::
CONFIG_DEBUG_FS=y
CONFIG_GCOV_KERNEL=y
-select the gcc's gcov format, default is autodetect based on gcc version::
-
- CONFIG_GCOV_FORMAT_AUTODETECT=y
-
and to get coverage data for the entire kernel::
CONFIG_GCOV_PROFILE_ALL=y
@@ -79,6 +75,17 @@ Only files which are linked to the main kernel image or are compiled as
kernel modules are supported by this mechanism.
+Module specific configs
+-----------------------
+
+Gcov kernel configs for specific modules are described below:
+
+CONFIG_GCOV_PROFILE_RDS:
+ Enables GCOV profiling on RDS for checking which functions or
+ lines are executed. This config is used by the rds selftest to
+ generate coverage reports. If left unset the report is omitted.
+
+
Files
-----
@@ -128,6 +135,8 @@ box for setups where kernels are built and run on the same machine. In
cases where the kernel runs on a separate machine, special preparations
must be made, depending on where the gcov tool is used:
+.. _gcov-test:
+
a) gcov is run on the TEST machine
The gcov tool version on the test machine must be compatible with the
@@ -147,6 +156,8 @@ a) gcov is run on the TEST machine
machine. If any of the path components is symbolic link, the actual
directory needs to be used instead (due to make's CURDIR handling).
+.. _gcov-build:
+
b) gcov is run on the BUILD machine
The following files need to be copied after each test case from test
@@ -169,6 +180,20 @@ b) gcov is run on the BUILD machine
[user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+Note on compilers
+-----------------
+
+GCC and LLVM gcov tools are not necessarily compatible. Use gcov_ to work with
+GCC-generated .gcno and .gcda files, and use llvm-cov_ for Clang.
+
+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
+
+Build differences between GCC and Clang gcov are handled by Kconfig. It
+automatically selects the appropriate gcov format depending on the detected
+toolchain.
+
+
Troubleshooting
---------------
@@ -193,7 +218,7 @@ Cause
may not correctly copy files from sysfs.
Solution
- Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links.
+ Use ``cat`` to read ``.gcda`` files and ``cp -d`` to copy links.
Alternatively use the mechanism shown in Appendix B.
@@ -201,7 +226,7 @@ Appendix A: gather_on_build.sh
------------------------------
Sample script to gather coverage meta files on the build machine
-(see 6a):
+(see :ref:`Separated build and test machines a. <gcov-test>`):
.. code-block:: sh
@@ -234,7 +259,7 @@ Appendix B: gather_on_test.sh
-----------------------------
Sample script to gather coverage data files on the test machine
-(see 6b):
+(see :ref:`Separated build and test machines b. <gcov-build>`):
.. code-block:: sh
diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst
deleted file mode 100644
index 19df79286f00..000000000000
--- a/Documentation/dev-tools/gdb-kernel-debugging.rst
+++ /dev/null
@@ -1,175 +0,0 @@
-.. highlight:: none
-
-Debugging kernel and modules via gdb
-====================================
-
-The kernel debugger kgdb, hypervisors like QEMU or JTAG-based hardware
-interfaces allow to debug the Linux kernel and its modules during runtime
-using gdb. Gdb comes with a powerful scripting interface for python. The
-kernel provides a collection of helper scripts that can simplify typical
-kernel debugging steps. This is a short tutorial about how to enable and use
-them. It focuses on QEMU/KVM virtual machines as target, but the examples can
-be transferred to the other gdb stubs as well.
-
-
-Requirements
-------------
-
-- gdb 7.2+ (recommended: 7.4+) with python support enabled (typically true
- for distributions)
-
-
-Setup
------
-
-- Create a virtual Linux machine for QEMU/KVM (see www.linux-kvm.org and
- www.qemu.org for more details). For cross-development,
- http://landley.net/aboriginal/bin keeps a pool of machine images and
- toolchains that can be helpful to start from.
-
-- Build the kernel with CONFIG_GDB_SCRIPTS enabled, but leave
- CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports
- CONFIG_FRAME_POINTER, keep it enabled.
-
-- Install that kernel on the guest, turn off KASLR if necessary by adding
- "nokaslr" to the kernel command line.
- Alternatively, QEMU allows to boot the kernel directly using -kernel,
- -append, -initrd command line switches. This is generally only useful if
- you do not depend on modules. See QEMU documentation for more details on
- this mode. In this case, you should build the kernel with
- CONFIG_RANDOMIZE_BASE disabled if the architecture supports KASLR.
-
-- Enable the gdb stub of QEMU/KVM, either
-
- - at VM startup time by appending "-s" to the QEMU command line
-
- or
-
- - during runtime by issuing "gdbserver" from the QEMU monitor
- console
-
-- cd /path/to/linux-build
-
-- Start gdb: gdb vmlinux
-
- Note: Some distros may restrict auto-loading of gdb scripts to known safe
- directories. In case gdb reports to refuse loading vmlinux-gdb.py, add::
-
- add-auto-load-safe-path /path/to/linux-build
-
- to ~/.gdbinit. See gdb help for more details.
-
-- Attach to the booted guest::
-
- (gdb) target remote :1234
-
-
-Examples of using the Linux-provided gdb helpers
-------------------------------------------------
-
-- Load module (and main kernel) symbols::
-
- (gdb) lx-symbols
- loading vmlinux
- scanning for modules in /home/user/linux/build
- loading @0xffffffffa0020000: /home/user/linux/build/net/netfilter/xt_tcpudp.ko
- loading @0xffffffffa0016000: /home/user/linux/build/net/netfilter/xt_pkttype.ko
- loading @0xffffffffa0002000: /home/user/linux/build/net/netfilter/xt_limit.ko
- loading @0xffffffffa00ca000: /home/user/linux/build/net/packet/af_packet.ko
- loading @0xffffffffa003c000: /home/user/linux/build/fs/fuse/fuse.ko
- ...
- loading @0xffffffffa0000000: /home/user/linux/build/drivers/ata/ata_generic.ko
-
-- Set a breakpoint on some not yet loaded module function, e.g.::
-
- (gdb) b btrfs_init_sysfs
- Function "btrfs_init_sysfs" not defined.
- Make breakpoint pending on future shared library load? (y or [n]) y
- Breakpoint 1 (btrfs_init_sysfs) pending.
-
-- Continue the target::
-
- (gdb) c
-
-- Load the module on the target and watch the symbols being loaded as well as
- the breakpoint hit::
-
- loading @0xffffffffa0034000: /home/user/linux/build/lib/libcrc32c.ko
- loading @0xffffffffa0050000: /home/user/linux/build/lib/lzo/lzo_compress.ko
- loading @0xffffffffa006e000: /home/user/linux/build/lib/zlib_deflate/zlib_deflate.ko
- loading @0xffffffffa01b1000: /home/user/linux/build/fs/btrfs/btrfs.ko
-
- Breakpoint 1, btrfs_init_sysfs () at /home/user/linux/fs/btrfs/sysfs.c:36
- 36 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
-
-- Dump the log buffer of the target kernel::
-
- (gdb) lx-dmesg
- [ 0.000000] Initializing cgroup subsys cpuset
- [ 0.000000] Initializing cgroup subsys cpu
- [ 0.000000] Linux version 3.8.0-rc4-dbg+ (...
- [ 0.000000] Command line: root=/dev/sda2 resume=/dev/sda1 vga=0x314
- [ 0.000000] e820: BIOS-provided physical RAM map:
- [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
- [ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
- ....
-
-- Examine fields of the current task struct::
-
- (gdb) p $lx_current().pid
- $1 = 4998
- (gdb) p $lx_current().comm
- $2 = "modprobe\000\000\000\000\000\000\000"
-
-- Make use of the per-cpu function for the current or a specified CPU::
-
- (gdb) p $lx_per_cpu("runqueues").nr_running
- $3 = 1
- (gdb) p $lx_per_cpu("runqueues", 2).nr_running
- $4 = 0
-
-- Dig into hrtimers using the container_of helper::
-
- (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next
- (gdb) p *$container_of($next, "struct hrtimer", "node")
- $5 = {
- node = {
- node = {
- __rb_parent_color = 18446612133355256072,
- rb_right = 0x0 <irq_stack_union>,
- rb_left = 0x0 <irq_stack_union>
- },
- expires = {
- tv64 = 1835268000000
- }
- },
- _softexpires = {
- tv64 = 1835268000000
- },
- function = 0xffffffff81078232 <tick_sched_timer>,
- base = 0xffff88003fd0d6f0,
- state = 1,
- start_pid = 0,
- start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>,
- start_comm = "swapper/2\000\000\000\000\000\000"
- }
-
-
-List of commands and functions
-------------------------------
-
-The number of commands and convenience functions may evolve over the time,
-this is just a snapshot of the initial version::
-
- (gdb) apropos lx
- function lx_current -- Return current task
- function lx_module -- Find module by name and return the module variable
- function lx_per_cpu -- Return per-cpu variable
- function lx_task_by_pid -- Find Linux task by PID and return the task_struct variable
- function lx_thread_info -- Calculate Linux thread_info from task variable
- lx-dmesg -- Print Linux kernel log buffer
- lx-lsmod -- List currently loaded modules
- lx-symbols -- (Re-)load symbols of Linux kernel and currently loaded modules
-
-Detailed help can be obtained via "help <command-name>" for commands and "help
-function <function-name>" for convenience functions.
diff --git a/Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst b/Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst
new file mode 100644
index 000000000000..d69f24c0d9e1
--- /dev/null
+++ b/Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst
@@ -0,0 +1,93 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================
+Linux Kernel GPIO based sloppy logic analyzer
+=============================================
+
+:Author: Wolfram Sang
+
+Introduction
+============
+
+This document briefly describes how to run the GPIO based in-kernel sloppy
+logic analyzer running on an isolated CPU.
+
+The sloppy logic analyzer will utilize a few GPIO lines in input mode on a
+system to rapidly sample these digital lines, which will, if the Nyquist
+criteria is met, result in a time series log with approximate waveforms as they
+appeared on these lines. One way to use it is to analyze external traffic
+connected to these GPIO lines with wires (i.e. digital probes), acting as a
+common logic analyzer.
+
+Another feature is to snoop on on-chip peripherals if the I/O cells of these
+peripherals can be used in GPIO input mode at the same time as they are being
+used as inputs or outputs for the peripheral. That means you could e.g. snoop
+I2C traffic without any wiring (if your hardware supports it). In the pin
+control subsystem such pin controllers are called "non-strict": a certain pin
+can be used with a certain peripheral and as a GPIO input line at the same
+time.
+
+Note that this is a last resort analyzer which can be affected by latencies,
+non-deterministic code paths and non-maskable interrupts. It is called 'sloppy'
+for a reason. However, for e.g. remote development, it may be useful to get a
+first view and aid further debugging.
+
+Setup
+=====
+
+Your kernel must have CONFIG_DEBUG_FS and CONFIG_CPUSETS enabled. Ideally, your
+runtime environment does not utilize cpusets otherwise, then isolation of a CPU
+core is easiest. If you do need cpusets, check that helper script for the
+sloppy logic analyzer does not interfere with your other settings.
+
+Tell the kernel which GPIOs are used as probes. For a Device Tree based system,
+you need to use the following bindings. Because these bindings are only for
+debugging, there is no official schema::
+
+ i2c-analyzer {
+ compatible = "gpio-sloppy-logic-analyzer";
+ probe-gpios = <&gpio6 21 GPIO_OPEN_DRAIN>, <&gpio6 4 GPIO_OPEN_DRAIN>;
+ probe-names = "SCL", "SDA";
+ };
+
+Note that you must provide a name for every GPIO specified. Currently a
+maximum of 8 probes are supported. 32 are likely possible but are not
+implemented yet.
+
+Usage
+=====
+
+The logic analyzer is configurable via files in debugfs. However, it is
+strongly recommended to not use them directly, but to use the script
+``tools/gpio/gpio-sloppy-logic-analyzer``. Besides checking parameters more
+extensively, it will isolate the CPU core so you will have the least
+disturbance while measuring.
+
+The script has a help option explaining the parameters. For the above DT
+snippet which analyzes an I2C bus at 400kHz on a Renesas Salvator-XS board, the
+following settings are used: The isolated CPU shall be CPU1 because it is a big
+core in a big.LITTLE setup. Because CPU1 is the default, we don't need a
+parameter. The bus speed is 400kHz. So, the sampling theorem says we need to
+sample at least at 800kHz. However, falling edges of both signals in an I2C
+start condition happen faster, so we need a higher sampling frequency, e.g.
+``-s 1500000`` for 1.5MHz. Also, we don't want to sample right away but wait
+for a start condition on an idle bus. So, we need to set a trigger to a falling
+edge on SDA while SCL stays high, i.e. ``-t 1H+2F``. Last is the duration, let
+us assume 15ms here which results in the parameter ``-d 15000``. So,
+altogether::
+
+ gpio-sloppy-logic-analyzer -s 1500000 -t 1H+2F -d 15000
+
+Note that the process will return you back to the prompt but a sub-process is
+still sampling in the background. Unless this has finished, you will not find a
+result file in the current or specified directory. For the above example, we
+will then need to trigger I2C communication::
+
+ i2cdetect -y -r <your bus number>
+
+Result is a .sr file to be consumed with PulseView or sigrok-cli from the free
+`sigrok`_ project. It is a zip file which also contains the binary sample data
+which may be consumed by other software. The filename is the logic analyzer
+instance name plus a since-epoch timestamp.
+
+.. _sigrok: https://sigrok.org/
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index e313925fb0fa..4b8425e348ab 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -3,27 +3,41 @@ Development tools for the kernel
================================
This document is a collection of documents about development tools that can
-be used to work on the kernel. For now, the documents have been pulled
-together without any significant effot to integrate them into a coherent
+be used to work on the kernel. For now, the documents have been pulled
+together without any significant effort to integrate them into a coherent
whole; patches welcome!
-.. class:: toc-title
+A brief overview of testing-specific tools can be found in
+Documentation/dev-tools/testing-overview.rst
- Table of contents
+Tools that are specific to debugging can be found in
+Documentation/process/debugging/index.rst
.. toctree::
+ :caption: Table of contents
:maxdepth: 2
+ testing-overview
+ checkpatch
+ clang-format
coccinelle
sparse
kcov
gcov
kasan
+ kmsan
ubsan
kmemleak
- gdb-kernel-debugging
- kgdb
+ kcsan
+ lkmm/index
+ kfence
kselftest
+ kunit/index
+ ktap
+ checkuapi
+ gpio-sloppy-logic-analyzer
+ autofdo
+ propeller
.. only:: subproject and html
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index aabc8738b3d8..a034700da7c4 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -1,173 +1,571 @@
-The Kernel Address Sanitizer (KASAN)
-====================================
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2023, Google LLC.
+
+Kernel Address Sanitizer (KASAN)
+================================
Overview
--------
-KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides
-a fast and comprehensive solution for finding use-after-free and out-of-bounds
-bugs.
+Kernel Address Sanitizer (KASAN) is a dynamic memory safety error detector
+designed to find out-of-bounds and use-after-free bugs.
-KASAN uses compile-time instrumentation for checking every memory access,
-therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
-required for detection of out-of-bounds accesses to stack or global variables.
+KASAN has three modes:
-Currently KASAN is supported only for the x86_64 and arm64 architectures.
+1. Generic KASAN
+2. Software Tag-Based KASAN
+3. Hardware Tag-Based KASAN
-Usage
------
+Generic KASAN, enabled with CONFIG_KASAN_GENERIC, is the mode intended for
+debugging, similar to userspace ASan. This mode is supported on many CPU
+architectures, but it has significant performance and memory overheads.
-To enable KASAN configure kernel with::
+Software Tag-Based KASAN or SW_TAGS KASAN, enabled with CONFIG_KASAN_SW_TAGS,
+can be used for both debugging and dogfood testing, similar to userspace HWASan.
+This mode is only supported for arm64, but its moderate memory overhead allows
+using it for testing on memory-restricted devices with real workloads.
- CONFIG_KASAN = y
+Hardware Tag-Based KASAN or HW_TAGS KASAN, enabled with CONFIG_KASAN_HW_TAGS,
+is the mode intended to be used as an in-field memory bug detector or as a
+security mitigation. This mode only works on arm64 CPUs that support MTE
+(Memory Tagging Extension), but it has low memory and performance overheads and
+thus can be used in production.
-and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and
-inline are compiler instrumentation types. The former produces smaller binary
-the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
-version 5.0 or later.
+For details about the memory and performance impact of each KASAN mode, see the
+descriptions of the corresponding Kconfig options.
-KASAN works with both SLUB and SLAB memory allocators.
-For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+The Generic and the Software Tag-Based modes are commonly referred to as the
+software modes. The Software Tag-Based and the Hardware Tag-Based modes are
+referred to as the tag-based modes.
-To disable instrumentation for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
+Support
+-------
-- For a single file (e.g. main.o)::
+Architectures
+~~~~~~~~~~~~~
- KASAN_SANITIZE_main.o := n
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
-- For all files in one directory::
+Compilers
+~~~~~~~~~
- KASAN_SANITIZE := n
+Software KASAN modes use compile-time instrumentation to insert validity checks
+before every memory access and thus require a compiler version that provides
+support for that. The Hardware Tag-Based mode relies on hardware to perform
+these checks but still requires a compiler version that supports the memory
+tagging instructions.
+
+Generic KASAN requires GCC version 8.3.0 or later
+or any Clang version supported by the kernel.
+
+Software Tag-Based KASAN requires GCC 11+
+or any Clang version supported by the kernel.
+
+Hardware Tag-Based KASAN requires GCC 10+ or Clang 12+.
+
+Memory types
+~~~~~~~~~~~~
+
+Generic KASAN supports finding bugs in all of slab, page_alloc, vmap, vmalloc,
+stack, and global memory.
+
+Software Tag-Based KASAN supports slab, page_alloc, vmalloc, and stack memory.
+
+Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
+memory.
+
+For slab, both software KASAN modes support SLUB and SLAB allocators, while
+Hardware Tag-Based KASAN only supports SLUB.
+
+Usage
+-----
+
+To enable KASAN, configure the kernel with::
+
+ CONFIG_KASAN=y
+
+and choose between ``CONFIG_KASAN_GENERIC`` (to enable Generic KASAN),
+``CONFIG_KASAN_SW_TAGS`` (to enable Software Tag-Based KASAN), and
+``CONFIG_KASAN_HW_TAGS`` (to enable Hardware Tag-Based KASAN).
+
+For the software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
+``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types.
+The former produces a smaller binary while the latter is up to 2 times faster.
+
+To include alloc and free stack traces of affected slab objects into reports,
+enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected
+physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``.
+
+Boot parameters
+~~~~~~~~~~~~~~~
+
+KASAN is affected by the generic ``panic_on_warn`` command line parameter.
+When it is enabled, KASAN panics the kernel after printing a bug report.
+
+By default, KASAN prints a bug report only for the first invalid memory access.
+With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
+effectively disables ``panic_on_warn`` for KASAN reports.
+
+Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
+parameter can be used to control panic and reporting behaviour:
+
+- ``kasan.fault=report``, ``=panic``, or ``=panic_on_write`` controls whether
+ to only print a KASAN report, panic the kernel, or panic the kernel on
+ invalid writes only (default: ``report``). The panic happens even if
+ ``kasan_multi_shot`` is enabled. Note that when using asynchronous mode of
+ Hardware Tag-Based KASAN, ``kasan.fault=panic_on_write`` always panics on
+ asynchronously checked accesses (including reads).
+
+Software and Hardware Tag-Based KASAN modes (see the section about various
+modes below) support altering stack trace collection behavior:
+
+- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
+ traces collection (default: ``on``).
+- ``kasan.stack_ring_size=<number of entries>`` specifies the number of entries
+ in the stack ring (default: ``32768``).
+
+Hardware Tag-Based KASAN mode is intended for use in production as a security
+mitigation. Therefore, it supports additional boot parameters that allow
+disabling KASAN altogether or controlling its features:
+
+- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
+
+- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
+ is configured in synchronous, asynchronous or asymmetric mode of
+ execution (default: ``sync``).
+ Synchronous mode: a bad access is detected immediately when a tag
+ check fault occurs.
+ Asynchronous mode: a bad access detection is delayed. When a tag check
+ fault occurs, the information is stored in hardware (in the TFSR_EL1
+ register for arm64). The kernel periodically checks the hardware and
+ only reports tag faults during these checks.
+ Asymmetric mode: a bad access is detected synchronously on reads and
+ asynchronously on writes.
+
+- ``kasan.write_only=off`` or ``kasan.write_only=on`` controls whether KASAN
+ checks the write (store) accesses only or all accesses (default: ``off``).
+
+- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
+ allocations (default: ``on``).
+
+- ``kasan.page_alloc.sample=<sampling interval>`` makes KASAN tag only every
+ Nth page_alloc allocation with the order equal or greater than
+ ``kasan.page_alloc.sample.order``, where N is the value of the ``sample``
+ parameter (default: ``1``, or tag every such allocation).
+ This parameter is intended to mitigate the performance overhead introduced
+ by KASAN.
+ Note that enabling this parameter makes Hardware Tag-Based KASAN skip checks
+ of allocations chosen by sampling and thus miss bad accesses to these
+ allocations. Use the default value for accurate bug detection.
+
+- ``kasan.page_alloc.sample.order=<minimum page order>`` specifies the minimum
+ order of allocations that are affected by sampling (default: ``3``).
+ Only applies when ``kasan.page_alloc.sample`` is set to a value greater
+ than ``1``.
+ This parameter is intended to allow sampling only large page_alloc
+ allocations, which is the biggest source of the performance overhead.
Error reports
~~~~~~~~~~~~~
-A typical out of bounds access report looks like this::
+A typical KASAN report looks like this::
==================================================================
- BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
- Write of size 1 by task modprobe/1689
- =============================================================================
- BUG kmalloc-128 (Not tainted): kasan error
- -----------------------------------------------------------------------------
-
- Disabling lock debugging due to kernel taint
- INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
- __slab_alloc+0x4b4/0x4f0
- kmem_cache_alloc_trace+0x10b/0x190
- kmalloc_oob_right+0x3d/0x75 [test_kasan]
- init_module+0x9/0x47 [test_kasan]
- do_one_initcall+0x99/0x200
- load_module+0x2cb3/0x3b20
- SyS_finit_module+0x76/0x80
- system_call_fastpath+0x12/0x17
- INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
- INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
-
- Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
- Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
- Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
- Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........
- Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
- CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98
- Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
- ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
- ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
- ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
+ BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [kasan_test]
+ Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
+
+ CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
- [<ffffffff81cc68ae>] dump_stack+0x46/0x58
- [<ffffffff811fd848>] print_trailer+0xf8/0x160
- [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
- [<ffffffff811ff0f5>] object_err+0x35/0x40
- [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
- [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
- [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
- [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
- [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
- [<ffffffff8120a995>] __asan_store1+0x75/0xb0
- [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
- [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
- [<ffffffff810002d9>] do_one_initcall+0x99/0x200
- [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
- [<ffffffff81114f63>] load_module+0x2cb3/0x3b20
- [<ffffffff8110fd70>] ? m_show+0x240/0x240
- [<ffffffff81115f06>] SyS_finit_module+0x76/0x80
- [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
+ dump_stack+0x94/0xd8
+ print_address_description+0x73/0x280
+ kasan_report+0x144/0x187
+ __asan_report_store1_noabort+0x17/0x20
+ kmalloc_oob_right+0xa8/0xbc [kasan_test]
+ kmalloc_tests_init+0x16/0x700 [kasan_test]
+ do_one_initcall+0xa5/0x3ae
+ do_init_module+0x1b6/0x547
+ load_module+0x75df/0x8070
+ __do_sys_init_module+0x1c6/0x200
+ __x64_sys_init_module+0x6e/0xb0
+ do_syscall_64+0x9f/0x2c0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x7f96443109da
+ RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
+ RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
+ RDX: 00007f96445cff88 RSI: 0000000000057a50 RDI: 00007f9644992000
+ RBP: 000055dc3ee510b0 R08: 0000000000000003 R09: 0000000000000000
+ R10: 00007f964430cd0a R11: 0000000000000202 R12: 00007f96445cff88
+ R13: 000055dc3ee51090 R14: 0000000000000000 R15: 0000000000000000
+
+ Allocated by task 2760:
+ save_stack+0x43/0xd0
+ kasan_kmalloc+0xa7/0xd0
+ kmem_cache_alloc_trace+0xe1/0x1b0
+ kmalloc_oob_right+0x56/0xbc [kasan_test]
+ kmalloc_tests_init+0x16/0x700 [kasan_test]
+ do_one_initcall+0xa5/0x3ae
+ do_init_module+0x1b6/0x547
+ load_module+0x75df/0x8070
+ __do_sys_init_module+0x1c6/0x200
+ __x64_sys_init_module+0x6e/0xb0
+ do_syscall_64+0x9f/0x2c0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ Freed by task 815:
+ save_stack+0x43/0xd0
+ __kasan_slab_free+0x135/0x190
+ kasan_slab_free+0xe/0x10
+ kfree+0x93/0x1a0
+ umh_complete+0x6a/0xa0
+ call_usermodehelper_exec_async+0x4c3/0x640
+ ret_from_fork+0x35/0x40
+
+ The buggy address belongs to the object at ffff8801f44ec300
+ which belongs to the cache kmalloc-128 of size 128
+ The buggy address is located 123 bytes inside of
+ 128-byte region [ffff8801f44ec300, ffff8801f44ec380)
+ The buggy address belongs to the page:
+ page:ffffea0007d13b00 count:1 mapcount:0 mapping:ffff8801f7001640 index:0x0
+ flags: 0x200000000000100(slab)
+ raw: 0200000000000100 ffffea0007d11dc0 0000001a0000001a ffff8801f7001640
+ raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000
+ page dumped because: kasan: bad access detected
+
Memory state around the buggy address:
- ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
- ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
- >ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
- ^
- ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
- ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
- ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8801f44ec200: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+ ffff8801f44ec280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+ >ffff8801f44ec300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03
+ ^
+ ffff8801f44ec380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+ ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
==================================================================
-The header of the report discribe what kind of bug happened and what kind of
-access caused it. It's followed by the description of the accessed slub object
-(see 'SLUB Debug output' section in Documentation/vm/slub.rst for details) and
-the description of the accessed memory page.
-
-In the last section the report shows memory state around the accessed address.
-Reading this part requires some understanding of how KASAN works.
-
-The state of each 8 aligned bytes of memory is encoded in one shadow byte.
-Those 8 bytes can be accessible, partially accessible, freed or be a redzone.
-We use the following encoding for each shadow byte: 0 means that all 8 bytes
-of the corresponding memory region are accessible; number N (1 <= N <= 7) means
-that the first N bytes are accessible, and other (8 - N) bytes are not;
-any negative value indicates that the entire 8-byte word is inaccessible.
-We use different negative values to distinguish between different kinds of
-inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
-
-In the report above the arrows point to the shadow byte 03, which means that
-the accessed address is partially accessible.
+The report header summarizes what kind of bug happened and what kind of access
+caused it. It is followed by a stack trace of the bad access, a stack trace of
+where the accessed memory was allocated (in case a slab object was accessed),
+and a stack trace of where the object was freed (in case of a use-after-free
+bug report). Next comes a description of the accessed slab object and the
+information about the accessed memory page.
+
+In the end, the report shows the memory state around the accessed address.
+Internally, KASAN tracks memory state separately for each memory granule, which
+is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
+memory state section of the report shows the state of one of the memory
+granules that surround the accessed address.
+
+For Generic KASAN, the size of each memory granule is 8. The state of each
+granule is encoded in one shadow byte. Those 8 bytes can be accessible,
+partially accessible, freed, or be a part of a redzone. KASAN uses the following
+encoding for each shadow byte: 00 means that all 8 bytes of the corresponding
+memory region are accessible; number N (1 <= N <= 7) means that the first N
+bytes are accessible, and other (8 - N) bytes are not; any negative value
+indicates that the entire 8-byte word is inaccessible. KASAN uses different
+negative values to distinguish between different kinds of inaccessible memory
+like redzones or freed memory (see mm/kasan/kasan.h).
+
+In the report above, the arrow points to the shadow byte ``03``, which means
+that the accessed address is partially accessible.
+
+For tag-based KASAN modes, this last report section shows the memory tags around
+the accessed address (see the `Implementation details`_ section).
+
+Note that KASAN bug titles (like ``slab-out-of-bounds`` or ``use-after-free``)
+are best-effort: KASAN prints the most probable bug type based on the limited
+information it has. The actual type of the bug might be different.
+
+Generic KASAN also reports up to two auxiliary call stack traces. These stack
+traces point to places in code that interacted with the object but that are not
+directly present in the bad access stack trace. Currently, this includes
+call_rcu() and workqueue queuing.
+
+CONFIG_KASAN_EXTRA_INFO
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Enabling CONFIG_KASAN_EXTRA_INFO allows KASAN to record and report more
+information. The extra information currently supported is the CPU number and
+timestamp at allocation and free. More information can help find the cause of
+the bug and correlate the error with other system events, at the cost of using
+extra memory to record more information (more cost details in the help text of
+CONFIG_KASAN_EXTRA_INFO).
+
+Here is the report with CONFIG_KASAN_EXTRA_INFO enabled (only the
+different parts are shown)::
+ ==================================================================
+ ...
+ Allocated by task 134 on cpu 5 at 229.133855s:
+ ...
+ Freed by task 136 on cpu 3 at 230.199335s:
+ ...
+ ==================================================================
Implementation details
----------------------
-From a high level, our approach to memory error detection is similar to that
-of kmemcheck: use shadow memory to record whether each byte of memory is safe
-to access, and use compile-time instrumentation to check shadow memory on each
-memory access.
+Generic KASAN
+~~~~~~~~~~~~~
+
+Software KASAN modes use shadow memory to record whether each byte of memory is
+safe to access and use compile-time instrumentation to insert shadow memory
+checks before each memory access.
-AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
-(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
-offset to translate a memory address to its corresponding shadow address.
+Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (16TB
+to cover 128TB on x86_64) and uses direct mapping with a scale and offset to
+translate a memory address to its corresponding shadow address.
Here is the function which translates an address to its corresponding shadow
address::
static inline void *kasan_mem_to_shadow(const void *addr)
{
- return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ KASAN_SHADOW_OFFSET;
}
where ``KASAN_SHADOW_SCALE_SHIFT = 3``.
-Compile-time instrumentation used for checking memory accesses. Compiler inserts
-function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
-access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
-valid or not by checking corresponding shadow memory.
+Compile-time instrumentation is used to insert memory access checks. Compiler
+inserts function calls (``__asan_load*(addr)``, ``__asan_store*(addr)``) before
+each memory access of size 1, 2, 4, 8, or 16. These functions check whether
+memory accesses are valid or not by checking corresponding shadow memory.
+
+With inline instrumentation, instead of making function calls, the compiler
+directly inserts the code to check shadow memory. This option significantly
+enlarges the kernel, but it gives an x1.1-x2 performance boost over the
+outline-instrumented kernel.
+
+Generic KASAN is the only mode that delays the reuse of freed objects via
+quarantine (see mm/kasan/quarantine.c for implementation).
+
+Software Tag-Based KASAN
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Software Tag-Based KASAN uses a software memory tagging approach to checking
+access validity. It is currently only implemented for the arm64 architecture.
+
+Software Tag-Based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
+to store a pointer tag in the top byte of kernel pointers. It uses shadow memory
+to store memory tags associated with each 16-byte memory cell (therefore, it
+dedicates 1/16th of the kernel memory for shadow memory).
+
+On each memory allocation, Software Tag-Based KASAN generates a random tag, tags
+the allocated memory with this tag, and embeds the same tag into the returned
+pointer.
+
+Software Tag-Based KASAN uses compile-time instrumentation to insert checks
+before each memory access. These checks make sure that the tag of the memory
+that is being accessed is equal to the tag of the pointer that is used to access
+this memory. In case of a tag mismatch, Software Tag-Based KASAN prints a bug
+report.
+
+Software Tag-Based KASAN also has two instrumentation modes (outline, which
+emits callbacks to check memory accesses; and inline, which performs the shadow
+memory checks inline). With outline instrumentation mode, a bug report is
+printed from the function that performs the access check. With inline
+instrumentation, a ``brk`` instruction is emitted by the compiler, and a
+dedicated ``brk`` handler is used to print bug reports.
+
+Software Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
+pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
+reserved to tag freed memory regions.
+
+Hardware Tag-Based KASAN
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hardware Tag-Based KASAN is similar to the software mode in concept but uses
+hardware memory tagging support instead of compiler instrumentation and
+shadow memory.
+
+Hardware Tag-Based KASAN is currently only implemented for arm64 architecture
+and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
+Instruction Set Architecture and Top Byte Ignore (TBI).
+
+Special arm64 instructions are used to assign memory tags for each allocation.
+Same tags are assigned to pointers to those allocations. On every memory
+access, hardware makes sure that the tag of the memory that is being accessed is
+equal to the tag of the pointer that is used to access this memory. In case of a
+tag mismatch, a fault is generated, and a report is printed.
+
+Hardware Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
+pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
+reserved to tag freed memory regions.
+
+If the hardware does not support MTE (pre ARMv8.5), Hardware Tag-Based KASAN
+will not be enabled. In this case, all KASAN boot parameters are ignored.
+
+Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
+enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not
+support MTE (but supports TBI).
+
+Hardware Tag-Based KASAN only reports the first found bug. After that, MTE tag
+checking gets disabled.
+
+Shadow memory
+-------------
+
+The contents of this section are only applicable to software KASAN modes.
+
+The kernel maps memory in several different parts of the address space.
+The range of kernel virtual addresses is large: there is not enough real
+memory to support a real shadow region for every address that could be
+accessed by the kernel. Therefore, KASAN only maps real shadow for certain
+parts of the address space.
+
+Default behaviour
+~~~~~~~~~~~~~~~~~
+
+By default, architectures only map real memory over the shadow region
+for the linear mapping (and potentially other small areas). For all
+other areas - such as vmalloc and vmemmap space - a single read-only
+page is mapped over the shadow area. This read-only shadow page
+declares all memory accesses as permitted.
+
+This presents a problem for modules: they do not live in the linear
+mapping but in a dedicated module space. By hooking into the module
+allocator, KASAN temporarily maps real shadow memory to cover them.
+This allows detection of invalid accesses to module globals, for example.
+
+This also creates an incompatibility with ``VMAP_STACK``: if the stack
+lives in vmalloc space, it will be shadowed by the read-only page, and
+the kernel will fault when trying to set up the shadow data for stack
+variables.
+
+CONFIG_KASAN_VMALLOC
+~~~~~~~~~~~~~~~~~~~~
+
+With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
+cost of greater memory usage. Currently, this is supported on x86,
+arm64, riscv, s390, and powerpc.
+
+This works by hooking into vmalloc and vmap and dynamically
+allocating real shadow memory to back the mappings.
+
+Most mappings in vmalloc space are small, requiring less than a full
+page of shadow space. Allocating a full shadow page per mapping would
+therefore be wasteful. Furthermore, to ensure that different mappings
+use different shadow pages, mappings would have to be aligned to
+``KASAN_GRANULE_SIZE * PAGE_SIZE``.
+
+Instead, KASAN shares backing space across multiple mappings. It allocates
+a backing page when a mapping in vmalloc space uses a particular page
+of the shadow region. This page can be shared by other vmalloc
+mappings later on.
+
+KASAN hooks into the vmap infrastructure to lazily clean up unused shadow
+memory.
+
+To avoid the difficulties around swapping mappings around, KASAN expects
+that the part of the shadow region that covers the vmalloc space will
+not be covered by the early shadow page but will be left unmapped.
+This will require changes in arch-specific code.
+
+This allows ``VMAP_STACK`` support on x86 and can simplify support of
+architectures that do not have a fixed module region.
+
+For developers
+--------------
+
+Ignoring accesses
+~~~~~~~~~~~~~~~~~
+
+Software KASAN modes use compiler instrumentation to insert validity checks.
+Such instrumentation might be incompatible with some parts of the kernel, and
+therefore needs to be disabled.
+
+Other parts of the kernel might access metadata for allocated objects.
+Normally, KASAN detects and reports such accesses, but in some cases (e.g.,
+in memory allocators), these accesses are valid.
+
+For software KASAN modes, to disable instrumentation for a specific file or
+directory, add a ``KASAN_SANITIZE`` annotation to the respective kernel
+Makefile:
+
+- For a single file (e.g., main.o)::
+
+ KASAN_SANITIZE_main.o := n
+
+- For all files in one directory::
+
+ KASAN_SANITIZE := n
+
+For software KASAN modes, to disable instrumentation on a per-function basis,
+use the KASAN-specific ``__no_sanitize_address`` function attribute or the
+generic ``noinstr`` one.
+
+Note that disabling compiler instrumentation (either on a per-file or a
+per-function basis) makes KASAN ignore the accesses that happen directly in
+that code for software KASAN modes. It does not help when the accesses happen
+indirectly (through calls to instrumented functions) or with Hardware
+Tag-Based KASAN, which does not use compiler instrumentation.
+
+For software KASAN modes, to disable KASAN reports in a part of the kernel code
+for the current task, annotate this part of the code with a
+``kasan_disable_current()``/``kasan_enable_current()`` section. This also
+disables the reports for indirect accesses that happen through function calls.
+
+For tag-based KASAN modes, to disable access checking, use
+``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that temporarily
+disabling access checking via ``page_kasan_tag_reset()`` requires saving and
+restoring the per-page KASAN tag via ``page_kasan_tag``/``page_kasan_tag_set``.
+
+Tests
+~~~~~
+
+There are KASAN tests that allow verifying that KASAN works and can detect
+certain types of memory corruptions.
+
+All KASAN tests are integrated with the KUnit Test Framework and can be enabled
+via ``CONFIG_KASAN_KUNIT_TEST``. The tests can be run and partially verified
+automatically in a few different ways; see the instructions below.
+
+Each KASAN test prints one of multiple KASAN reports if an error is detected.
+Then the test prints its number and status.
+
+When a test passes::
+
+ ok 28 - kmalloc_double_kzfree
+
+When a test fails due to a failed ``kmalloc``::
+
+ # kmalloc_large_oob_right: ASSERTION FAILED at mm/kasan/kasan_test.c:245
+ Expected ptr is not null, but is
+ not ok 5 - kmalloc_large_oob_right
+
+When a test fails due to a missing KASAN report::
+
+ # kmalloc_double_kzfree: EXPECTATION FAILED at mm/kasan/kasan_test.c:709
+ KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
+ not ok 28 - kmalloc_double_kzfree
+
+
+At the end the cumulative status of all KASAN tests is printed. On success::
+
+ ok 1 - kasan
+
+Or, if one of the tests failed::
+
+ not ok 1 - kasan
+
+There are a few ways to run the KASAN tests.
+
+1. Loadable module
+
+ With ``CONFIG_KUNIT`` enabled, the tests can be built as a loadable module
+ and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.
+
+2. Built-In
+
+ With ``CONFIG_KUNIT`` built-in, the tests can be built-in as well.
+ In this case, the tests will run at boot as a late-init call.
+
+3. Using kunit_tool
+
+ With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it is also
+ possible to use ``kunit_tool`` to see the results of KUnit tests in a more
+ readable way. This will not print the KASAN reports of the tests that passed.
+ See `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_
+ for more up-to-date information on ``kunit_tool``.
-GCC 5.0 has possibility to perform inline instrumentation. Instead of making
-function calls GCC directly inserts the code to check the shadow memory.
-This option significantly enlarges kernel but it gives x1.1-x2 performance
-boost over outline instrumented kernel.
+.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index c2f6452e38ed..8127849d40f5 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -1,41 +1,50 @@
-kcov: code coverage for fuzzing
+KCOV: code coverage for fuzzing
===============================
-kcov exposes kernel code coverage information in a form suitable for coverage-
-guided fuzzing (randomized testing). Coverage data of a running kernel is
-exported via the "kcov" debugfs file. Coverage collection is enabled on a task
-basis, and thus it can capture precise coverage of a single system call.
+KCOV collects and exposes kernel code coverage information in a form suitable
+for coverage-guided fuzzing. Coverage data of a running kernel is exported via
+the ``kcov`` debugfs file. Coverage collection is enabled on a task basis, and
+thus KCOV can capture precise coverage of a single system call.
-Note that kcov does not aim to collect as much coverage as possible. It aims
-to collect more or less stable coverage that is function of syscall inputs.
-To achieve this goal it does not collect coverage in soft/hard interrupts
-and instrumentation of some inherently non-deterministic parts of kernel is
-disabled (e.g. scheduler, locking).
+Note that KCOV does not aim to collect as much coverage as possible. It aims
+to collect more or less stable coverage that is a function of syscall inputs.
+To achieve this goal, it does not collect coverage in soft/hard interrupts
+(unless remove coverage collection is enabled, see below) and from some
+inherently non-deterministic parts of the kernel (e.g. scheduler, locking).
-kcov is also able to collect comparison operands from the instrumented code
-(this feature currently requires that the kernel is compiled with clang).
+Besides collecting code coverage, KCOV can also collect comparison operands.
+See the "Comparison operands collection" section for details.
+
+Besides collecting coverage data from syscall handlers, KCOV can also collect
+coverage for annotated parts of the kernel executing in background kernel
+tasks or soft interrupts. See the "Remote coverage collection" section for
+details.
Prerequisites
-------------
-Configure the kernel with::
+KCOV relies on compiler instrumentation and requires GCC 6.1.0 or later
+or any Clang version supported by the kernel.
- CONFIG_KCOV=y
+Collecting comparison operands is supported with GCC 8+ or with Clang.
-CONFIG_KCOV requires gcc built on revision 231296 or later.
+To enable KCOV, configure the kernel with::
-If the comparison operands need to be collected, set::
+ CONFIG_KCOV=y
+
+To enable comparison operands collection, set::
CONFIG_KCOV_ENABLE_COMPARISONS=y
-Profiling data will only become accessible once debugfs has been mounted::
+Coverage data only becomes accessible once debugfs has been mounted::
mount -t debugfs none /sys/kernel/debug
Coverage collection
-------------------
-The following program demonstrates coverage collection from within a test
-program using kcov:
+
+The following program demonstrates how to use KCOV to collect coverage for a
+single syscall from within a test program:
.. code-block:: c
@@ -49,6 +58,7 @@ program using kcov:
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
+ #include <linux/types.h>
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
#define KCOV_ENABLE _IO('c', 100)
@@ -82,7 +92,7 @@ program using kcov:
perror("ioctl"), exit(1);
/* Reset coverage from the tail of the ioctl() call. */
__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
- /* That's the target syscal call. */
+ /* Call the target syscall call. */
read(-1, NULL, 0);
/* Read number of PCs collected. */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
@@ -101,7 +111,7 @@ program using kcov:
return 0;
}
-After piping through addr2line output of the program looks as follows::
+After piping through ``addr2line`` the output of the program looks as follows::
SyS_read
fs/read_write.c:562
@@ -119,15 +129,17 @@ After piping through addr2line output of the program looks as follows::
fs/read_write.c:562
If a program needs to collect coverage from several threads (independently),
-it needs to open /sys/kernel/debug/kcov in each thread separately.
+it needs to open ``/sys/kernel/debug/kcov`` in each thread separately.
The interface is fine-grained to allow efficient forking of test processes.
-That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
-mmaps coverage buffer and then forks child processes in a loop. Child processes
-only need to enable coverage (disable happens automatically on thread end).
+That is, a parent process opens ``/sys/kernel/debug/kcov``, enables trace mode,
+mmaps coverage buffer, and then forks child processes in a loop. The child
+processes only need to enable coverage (it gets disabled automatically when
+a thread exits).
Comparison operands collection
------------------------------
+
Comparison operands collection is similar to coverage collection:
.. code-block:: c
@@ -175,6 +187,8 @@ Comparison operands collection is similar to coverage collection:
/* Read number of comparisons collected. */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
for (i = 0; i < n; i++) {
+ uint64_t ip;
+
type = cover[i * KCOV_WORDS_PER_CMP + 1];
/* arg1 and arg2 - operands of the comparison. */
arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
@@ -200,5 +214,166 @@ Comparison operands collection is similar to coverage collection:
return 0;
}
-Note that the kcov modes (coverage collection or comparison operands) are
-mutually exclusive.
+Note that the KCOV modes (collection of code coverage or comparison operands)
+are mutually exclusive.
+
+Remote coverage collection
+--------------------------
+
+Besides collecting coverage data from handlers of syscalls issued from a
+userspace process, KCOV can also collect coverage for parts of the kernel
+executing in other contexts - so-called "remote" coverage.
+
+Using KCOV to collect remote coverage requires:
+
+1. Modifying kernel code to annotate the code section from where coverage
+ should be collected with ``kcov_remote_start`` and ``kcov_remote_stop``.
+
+2. Using ``KCOV_REMOTE_ENABLE`` instead of ``KCOV_ENABLE`` in the userspace
+ process that collects coverage.
+
+Both ``kcov_remote_start`` and ``kcov_remote_stop`` annotations and the
+``KCOV_REMOTE_ENABLE`` ioctl accept handles that identify particular coverage
+collection sections. The way a handle is used depends on the context where the
+matching code section executes.
+
+KCOV supports collecting remote coverage from the following contexts:
+
+1. Global kernel background tasks. These are the tasks that are spawned during
+ kernel boot in a limited number of instances (e.g. one USB ``hub_event``
+ worker is spawned per one USB HCD).
+
+2. Local kernel background tasks. These are spawned when a userspace process
+ interacts with some kernel interface and are usually killed when the process
+ exits (e.g. vhost workers).
+
+3. Soft interrupts.
+
+For #1 and #3, a unique global handle must be chosen and passed to the
+corresponding ``kcov_remote_start`` call. Then a userspace process must pass
+this handle to ``KCOV_REMOTE_ENABLE`` in the ``handles`` array field of the
+``kcov_remote_arg`` struct. This will attach the used KCOV device to the code
+section referenced by this handle. Multiple global handles identifying
+different code sections can be passed at once.
+
+For #2, the userspace process instead must pass a non-zero handle through the
+``common_handle`` field of the ``kcov_remote_arg`` struct. This common handle
+gets saved to the ``kcov_handle`` field in the current ``task_struct`` and
+needs to be passed to the newly spawned local tasks via custom kernel code
+modifications. Those tasks should in turn use the passed handle in their
+``kcov_remote_start`` and ``kcov_remote_stop`` annotations.
+
+KCOV follows a predefined format for both global and common handles. Each
+handle is a ``u64`` integer. Currently, only the one top and the lower 4 bytes
+are used. Bytes 4-7 are reserved and must be zero.
+
+For global handles, the top byte of the handle denotes the id of a subsystem
+this handle belongs to. For example, KCOV uses ``1`` as the USB subsystem id.
+The lower 4 bytes of a global handle denote the id of a task instance within
+that subsystem. For example, each ``hub_event`` worker uses the USB bus number
+as the task instance id.
+
+For common handles, a reserved value ``0`` is used as a subsystem id, as such
+handles don't belong to a particular subsystem. The lower 4 bytes of a common
+handle identify a collective instance of all local tasks spawned by the
+userspace process that passed a common handle to ``KCOV_REMOTE_ENABLE``.
+
+In practice, any value can be used for common handle instance id if coverage
+is only collected from a single userspace process on the system. However, if
+common handles are used by multiple processes, unique instance ids must be
+used for each process. One option is to use the process id as the common
+handle instance id.
+
+The following program demonstrates using KCOV to collect coverage from both
+local tasks spawned by the process and the global task that handles USB bus #1:
+
+.. code-block:: c
+
+ /* Same includes and defines as above. */
+
+ struct kcov_remote_arg {
+ __u32 trace_mode;
+ __u32 area_size;
+ __u32 num_handles;
+ __aligned_u64 common_handle;
+ __aligned_u64 handles[0];
+ };
+
+ #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
+ #define KCOV_DISABLE _IO('c', 101)
+ #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg)
+
+ #define COVER_SIZE (64 << 10)
+
+ #define KCOV_TRACE_PC 0
+
+ #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56)
+ #define KCOV_SUBSYSTEM_USB (0x01ull << 56)
+
+ #define KCOV_SUBSYSTEM_MASK (0xffull << 56)
+ #define KCOV_INSTANCE_MASK (0xffffffffull)
+
+ static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+ {
+ if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+ return 0;
+ return subsys | inst;
+ }
+
+ #define KCOV_COMMON_ID 0x42
+ #define KCOV_USB_BUS_NUM 1
+
+ int main(int argc, char **argv)
+ {
+ int fd;
+ unsigned long *cover, n, i;
+ struct kcov_remote_arg *arg;
+
+ fd = open("/sys/kernel/debug/kcov", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+ if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+ perror("ioctl"), exit(1);
+ cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if ((void*)cover == MAP_FAILED)
+ perror("mmap"), exit(1);
+
+ /* Enable coverage collection via common handle and from USB bus #1. */
+ arg = calloc(1, sizeof(*arg) + sizeof(uint64_t));
+ if (!arg)
+ perror("calloc"), exit(1);
+ arg->trace_mode = KCOV_TRACE_PC;
+ arg->area_size = COVER_SIZE;
+ arg->num_handles = 1;
+ arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON,
+ KCOV_COMMON_ID);
+ arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB,
+ KCOV_USB_BUS_NUM);
+ if (ioctl(fd, KCOV_REMOTE_ENABLE, arg))
+ perror("ioctl"), free(arg), exit(1);
+ free(arg);
+
+ /*
+ * Here the user needs to trigger execution of a kernel code section
+ * that is either annotated with the common handle, or to trigger some
+ * activity on USB bus #1.
+ */
+ sleep(2);
+
+ /*
+ * The load to the coverage count should be an acquire to pair with
+ * pair with the corresponding write memory barrier (smp_wmb()) on
+ * the kernel-side in kcov_move_area().
+ */
+ n = __atomic_load_n(&cover[0], __ATOMIC_ACQUIRE);
+ for (i = 0; i < n; i++)
+ printf("0x%lx\n", cover[i + 1]);
+ if (ioctl(fd, KCOV_DISABLE, 0))
+ perror("ioctl"), exit(1);
+ if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+ perror("munmap"), exit(1);
+ if (close(fd))
+ perror("close"), exit(1);
+ return 0;
+ }
diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst
new file mode 100644
index 000000000000..8575178aa87f
--- /dev/null
+++ b/Documentation/dev-tools/kcsan.rst
@@ -0,0 +1,377 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2019, Google LLC.
+
+Kernel Concurrency Sanitizer (KCSAN)
+====================================
+
+The Kernel Concurrency Sanitizer (KCSAN) is a dynamic race detector, which
+relies on compile-time instrumentation, and uses a watchpoint-based sampling
+approach to detect races. KCSAN's primary purpose is to detect `data races`_.
+
+Usage
+-----
+
+KCSAN is supported by both GCC and Clang. With GCC we require version 11 or
+later, and with Clang also require version 11 or later.
+
+To enable KCSAN configure the kernel with::
+
+ CONFIG_KCSAN = y
+
+KCSAN provides several other configuration options to customize behaviour (see
+the respective help text in ``lib/Kconfig.kcsan`` for more info).
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical data race report looks like this::
+
+ ==================================================================
+ BUG: KCSAN: data-race in test_kernel_read / test_kernel_write
+
+ write to 0xffffffffc009a628 of 8 bytes by task 487 on cpu 0:
+ test_kernel_write+0x1d/0x30
+ access_thread+0x89/0xd0
+ kthread+0x23e/0x260
+ ret_from_fork+0x22/0x30
+
+ read to 0xffffffffc009a628 of 8 bytes by task 488 on cpu 6:
+ test_kernel_read+0x10/0x20
+ access_thread+0x89/0xd0
+ kthread+0x23e/0x260
+ ret_from_fork+0x22/0x30
+
+ value changed: 0x00000000000009a6 -> 0x00000000000009b2
+
+ Reported by Kernel Concurrency Sanitizer on:
+ CPU: 6 PID: 488 Comm: access_thread Not tainted 5.12.0-rc2+ #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+The header of the report provides a short summary of the functions involved in
+the race. It is followed by the access types and stack traces of the 2 threads
+involved in the data race. If KCSAN also observed a value change, the observed
+old value and new value are shown on the "value changed" line respectively.
+
+The other less common type of data race report looks like this::
+
+ ==================================================================
+ BUG: KCSAN: data-race in test_kernel_rmw_array+0x71/0xd0
+
+ race at unknown origin, with read to 0xffffffffc009bdb0 of 8 bytes by task 515 on cpu 2:
+ test_kernel_rmw_array+0x71/0xd0
+ access_thread+0x89/0xd0
+ kthread+0x23e/0x260
+ ret_from_fork+0x22/0x30
+
+ value changed: 0x0000000000002328 -> 0x0000000000002329
+
+ Reported by Kernel Concurrency Sanitizer on:
+ CPU: 2 PID: 515 Comm: access_thread Not tainted 5.12.0-rc2+ #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+This report is generated where it was not possible to determine the other
+racing thread, but a race was inferred due to the data value of the watched
+memory location having changed. These reports always show a "value changed"
+line. A common reason for reports of this type are missing instrumentation in
+the racing thread, but could also occur due to e.g. DMA accesses. Such reports
+are shown only if ``CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=y``, which is
+enabled by default.
+
+Selective analysis
+~~~~~~~~~~~~~~~~~~
+
+It may be desirable to disable data race detection for specific accesses,
+functions, compilation units, or entire subsystems. For static blacklisting,
+the below options are available:
+
+* KCSAN understands the ``data_race(expr)`` annotation, which tells KCSAN that
+ any data races due to accesses in ``expr`` should be ignored and resulting
+ behaviour when encountering a data race is deemed safe. Please see
+ `"Marking Shared-Memory Accesses" in the LKMM`_ for more information.
+
+* Similar to ``data_race(...)``, the type qualifier ``__data_racy`` can be used
+ to document that all data races due to accesses to a variable are intended
+ and should be ignored by KCSAN::
+
+ struct foo {
+ ...
+ int __data_racy stats_counter;
+ ...
+ };
+
+* Disabling data race detection for entire functions can be accomplished by
+ using the function attribute ``__no_kcsan``::
+
+ __no_kcsan
+ void foo(void) {
+ ...
+
+ To dynamically limit for which functions to generate reports, see the
+ `DebugFS interface`_ blacklist/whitelist feature.
+
+* To disable data race detection for a particular compilation unit, add to the
+ ``Makefile``::
+
+ KCSAN_SANITIZE_file.o := n
+
+* To disable data race detection for all compilation units listed in a
+ ``Makefile``, add to the respective ``Makefile``::
+
+ KCSAN_SANITIZE := n
+
+.. _"Marking Shared-Memory Accesses" in the LKMM: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/memory-model/Documentation/access-marking.txt
+
+Furthermore, it is possible to tell KCSAN to show or hide entire classes of
+data races, depending on preferences. These can be changed via the following
+Kconfig options:
+
+* ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY``: If enabled and a conflicting write
+ is observed via a watchpoint, but the data value of the memory location was
+ observed to remain unchanged, do not report the data race.
+
+* ``CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC``: Assume that plain aligned writes
+ up to word size are atomic by default. Assumes that such writes are not
+ subject to unsafe compiler optimizations resulting in data races. The option
+ causes KCSAN to not report data races due to conflicts where the only plain
+ accesses are aligned writes up to word size.
+
+* ``CONFIG_KCSAN_PERMISSIVE``: Enable additional permissive rules to ignore
+ certain classes of common data races. Unlike the above, the rules are more
+ complex involving value-change patterns, access type, and address. This
+ option depends on ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=y``. For details
+ please see the ``kernel/kcsan/permissive.h``. Testers and maintainers that
+ only focus on reports from specific subsystems and not the whole kernel are
+ recommended to disable this option.
+
+To use the strictest possible rules, select ``CONFIG_KCSAN_STRICT=y``, which
+configures KCSAN to follow the Linux-kernel memory consistency model (LKMM) as
+closely as possible.
+
+DebugFS interface
+~~~~~~~~~~~~~~~~~
+
+The file ``/sys/kernel/debug/kcsan`` provides the following interface:
+
+* Reading ``/sys/kernel/debug/kcsan`` returns various runtime statistics.
+
+* Writing ``on`` or ``off`` to ``/sys/kernel/debug/kcsan`` allows turning KCSAN
+ on or off, respectively.
+
+* Writing ``!some_func_name`` to ``/sys/kernel/debug/kcsan`` adds
+ ``some_func_name`` to the report filter list, which (by default) blacklists
+ reporting data races where either one of the top stackframes are a function
+ in the list.
+
+* Writing either ``blacklist`` or ``whitelist`` to ``/sys/kernel/debug/kcsan``
+ changes the report filtering behaviour. For example, the blacklist feature
+ can be used to silence frequently occurring data races; the whitelist feature
+ can help with reproduction and testing of fixes.
+
+Tuning performance
+~~~~~~~~~~~~~~~~~~
+
+Core parameters that affect KCSAN's overall performance and bug detection
+ability are exposed as kernel command-line arguments whose defaults can also be
+changed via the corresponding Kconfig options.
+
+* ``kcsan.skip_watch`` (``CONFIG_KCSAN_SKIP_WATCH``): Number of per-CPU memory
+ operations to skip, before another watchpoint is set up. Setting up
+ watchpoints more frequently will result in the likelihood of races to be
+ observed to increase. This parameter has the most significant impact on
+ overall system performance and race detection ability.
+
+* ``kcsan.udelay_task`` (``CONFIG_KCSAN_UDELAY_TASK``): For tasks, the
+ microsecond delay to stall execution after a watchpoint has been set up.
+ Larger values result in the window in which we may observe a race to
+ increase.
+
+* ``kcsan.udelay_interrupt`` (``CONFIG_KCSAN_UDELAY_INTERRUPT``): For
+ interrupts, the microsecond delay to stall execution after a watchpoint has
+ been set up. Interrupts have tighter latency requirements, and their delay
+ should generally be smaller than the one chosen for tasks.
+
+They may be tweaked at runtime via ``/sys/module/kcsan/parameters/``.
+
+Data Races
+----------
+
+In an execution, two memory accesses form a *data race* if they *conflict*,
+they happen concurrently in different threads, and at least one of them is a
+*plain access*; they *conflict* if both access the same memory location, and at
+least one is a write. For a more thorough discussion and definition, see `"Plain
+Accesses and Data Races" in the LKMM`_.
+
+.. _"Plain Accesses and Data Races" in the LKMM: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/memory-model/Documentation/explanation.txt?id=8f6629c004b193d23612641c3607e785819e97ab#n2164
+
+Relationship with the Linux-Kernel Memory Consistency Model (LKMM)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The LKMM defines the propagation and ordering rules of various memory
+operations, which gives developers the ability to reason about concurrent code.
+Ultimately this allows to determine the possible executions of concurrent code,
+and if that code is free from data races.
+
+KCSAN is aware of *marked atomic operations* (``READ_ONCE``, ``WRITE_ONCE``,
+``atomic_*``, etc.), and a subset of ordering guarantees implied by memory
+barriers. With ``CONFIG_KCSAN_WEAK_MEMORY=y``, KCSAN models load or store
+buffering, and can detect missing ``smp_mb()``, ``smp_wmb()``, ``smp_rmb()``,
+``smp_store_release()``, and all ``atomic_*`` operations with equivalent
+implied barriers.
+
+Note, KCSAN will not report all data races due to missing memory ordering,
+specifically where a memory barrier would be required to prohibit subsequent
+memory operation from reordering before the barrier. Developers should
+therefore carefully consider the required memory ordering requirements that
+remain unchecked.
+
+Race Detection Beyond Data Races
+--------------------------------
+
+For code with complex concurrency design, race-condition bugs may not always
+manifest as data races. Race conditions occur if concurrently executing
+operations result in unexpected system behaviour. On the other hand, data races
+are defined at the C-language level. The following macros can be used to check
+properties of concurrent code where bugs would not manifest as data races.
+
+.. kernel-doc:: include/linux/kcsan-checks.h
+ :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_WRITER_SCOPED
+ ASSERT_EXCLUSIVE_ACCESS ASSERT_EXCLUSIVE_ACCESS_SCOPED
+ ASSERT_EXCLUSIVE_BITS
+
+Implementation Details
+----------------------
+
+KCSAN relies on observing that two accesses happen concurrently. Crucially, we
+want to (a) increase the chances of observing races (especially for races that
+manifest rarely), and (b) be able to actually observe them. We can accomplish
+(a) by injecting various delays, and (b) by using address watchpoints (or
+breakpoints).
+
+If we deliberately stall a memory access, while we have a watchpoint for its
+address set up, and then observe the watchpoint to fire, two accesses to the
+same address just raced. Using hardware watchpoints, this is the approach taken
+in `DataCollider
+<http://usenix.org/legacy/events/osdi10/tech/full_papers/Erickson.pdf>`_.
+Unlike DataCollider, KCSAN does not use hardware watchpoints, but instead
+relies on compiler instrumentation and "soft watchpoints".
+
+In KCSAN, watchpoints are implemented using an efficient encoding that stores
+access type, size, and address in a long; the benefits of using "soft
+watchpoints" are portability and greater flexibility. KCSAN then relies on the
+compiler instrumenting plain accesses. For each instrumented plain access:
+
+1. Check if a matching watchpoint exists; if yes, and at least one access is a
+ write, then we encountered a racing access.
+
+2. Periodically, if no matching watchpoint exists, set up a watchpoint and
+ stall for a small randomized delay.
+
+3. Also check the data value before the delay, and re-check the data value
+ after delay; if the values mismatch, we infer a race of unknown origin.
+
+To detect data races between plain and marked accesses, KCSAN also annotates
+marked accesses, but only to check if a watchpoint exists; i.e. KCSAN never
+sets up a watchpoint on marked accesses. By never setting up watchpoints for
+marked operations, if all accesses to a variable that is accessed concurrently
+are properly marked, KCSAN will never trigger a watchpoint and therefore never
+report the accesses.
+
+Modeling Weak Memory
+~~~~~~~~~~~~~~~~~~~~
+
+KCSAN's approach to detecting data races due to missing memory barriers is
+based on modeling access reordering (with ``CONFIG_KCSAN_WEAK_MEMORY=y``).
+Each plain memory access for which a watchpoint is set up, is also selected for
+simulated reordering within the scope of its function (at most 1 in-flight
+access).
+
+Once an access has been selected for reordering, it is checked along every
+other access until the end of the function scope. If an appropriate memory
+barrier is encountered, the access will no longer be considered for simulated
+reordering.
+
+When the result of a memory operation should be ordered by a barrier, KCSAN can
+then detect data races where the conflict only occurs as a result of a missing
+barrier. Consider the example::
+
+ int x, flag;
+ void T1(void)
+ {
+ x = 1; // data race!
+ WRITE_ONCE(flag, 1); // correct: smp_store_release(&flag, 1)
+ }
+ void T2(void)
+ {
+ while (!READ_ONCE(flag)); // correct: smp_load_acquire(&flag)
+ ... = x; // data race!
+ }
+
+When weak memory modeling is enabled, KCSAN can consider ``x`` in ``T1`` for
+simulated reordering. After the write of ``flag``, ``x`` is again checked for
+concurrent accesses: because ``T2`` is able to proceed after the write of
+``flag``, a data race is detected. With the correct barriers in place, ``x``
+would not be considered for reordering after the proper release of ``flag``,
+and no data race would be detected.
+
+Deliberate trade-offs in complexity but also practical limitations mean only a
+subset of data races due to missing memory barriers can be detected. With
+currently available compiler support, the implementation is limited to modeling
+the effects of "buffering" (delaying accesses), since the runtime cannot
+"prefetch" accesses. Also recall that watchpoints are only set up for plain
+accesses, and the only access type for which KCSAN simulates reordering. This
+means reordering of marked accesses is not modeled.
+
+A consequence of the above is that acquire operations do not require barrier
+instrumentation (no prefetching). Furthermore, marked accesses introducing
+address or control dependencies do not require special handling (the marked
+access cannot be reordered, later dependent accesses cannot be prefetched).
+
+Key Properties
+~~~~~~~~~~~~~~
+
+1. **Memory Overhead:** The overall memory overhead is only a few MiB
+ depending on configuration. The current implementation uses a small array of
+ longs to encode watchpoint information, which is negligible.
+
+2. **Performance Overhead:** KCSAN's runtime aims to be minimal, using an
+ efficient watchpoint encoding that does not require acquiring any shared
+ locks in the fast-path. For kernel boot on a system with 8 CPUs:
+
+ - 5.0x slow-down with the default KCSAN config;
+ - 2.8x slow-down from runtime fast-path overhead only (set very large
+ ``KCSAN_SKIP_WATCH`` and unset ``KCSAN_SKIP_WATCH_RANDOMIZE``).
+
+3. **Annotation Overheads:** Minimal annotations are required outside the KCSAN
+ runtime. As a result, maintenance overheads are minimal as the kernel
+ evolves.
+
+4. **Detects Racy Writes from Devices:** Due to checking data values upon
+ setting up watchpoints, racy writes from devices can also be detected.
+
+5. **Memory Ordering:** KCSAN is aware of only a subset of LKMM ordering rules;
+ this may result in missed data races (false negatives).
+
+6. **Analysis Accuracy:** For observed executions, due to using a sampling
+ strategy, the analysis is *unsound* (false negatives possible), but aims to
+ be complete (no false positives).
+
+Alternatives Considered
+-----------------------
+
+An alternative data race detection approach for the kernel can be found in the
+`Kernel Thread Sanitizer (KTSAN)
+<https://github.com/google/kernel-sanitizers/blob/master/KTSAN.md>`_.
+KTSAN is a happens-before data race detector, which explicitly establishes the
+happens-before order between memory operations, which can then be used to
+determine data races as defined in `Data Races`_.
+
+To build a correct happens-before relation, KTSAN must be aware of all ordering
+rules of the LKMM and synchronization primitives. Unfortunately, any omission
+leads to large numbers of false positives, which is especially detrimental in
+the context of the kernel which includes numerous custom synchronization
+mechanisms. To track the happens-before relation, KTSAN's implementation
+requires metadata for each memory location (shadow memory), which for each page
+corresponds to 4 pages of shadow memory, and can translate into overhead of
+tens of GiB on a large system.
diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
new file mode 100644
index 000000000000..541899353865
--- /dev/null
+++ b/Documentation/dev-tools/kfence.rst
@@ -0,0 +1,340 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2020, Google LLC.
+
+Kernel Electric-Fence (KFENCE)
+==============================
+
+Kernel Electric-Fence (KFENCE) is a low-overhead sampling-based memory safety
+error detector. KFENCE detects heap out-of-bounds access, use-after-free, and
+invalid-free errors.
+
+KFENCE is designed to be enabled in production kernels, and has near zero
+performance overhead. Compared to KASAN, KFENCE trades performance for
+precision. The main motivation behind KFENCE's design, is that with enough
+total uptime KFENCE will detect bugs in code paths not typically exercised by
+non-production test workloads. One way to quickly achieve a large enough total
+uptime is when the tool is deployed across a large fleet of machines.
+
+Usage
+-----
+
+To enable KFENCE, configure the kernel with::
+
+ CONFIG_KFENCE=y
+
+To build a kernel with KFENCE support, but disabled by default (to enable, set
+``kfence.sample_interval`` to non-zero value), configure the kernel with::
+
+ CONFIG_KFENCE=y
+ CONFIG_KFENCE_SAMPLE_INTERVAL=0
+
+KFENCE provides several other configuration options to customize behaviour (see
+the respective help text in ``lib/Kconfig.kfence`` for more info).
+
+Tuning performance
+~~~~~~~~~~~~~~~~~~
+
+The most important parameter is KFENCE's sample interval, which can be set via
+the kernel boot parameter ``kfence.sample_interval`` in milliseconds. The
+sample interval determines the frequency with which heap allocations will be
+guarded by KFENCE. The default is configurable via the Kconfig option
+``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
+disables KFENCE.
+
+The sample interval controls a timer that sets up KFENCE allocations. By
+default, to keep the real sample interval predictable, the normal timer also
+causes CPU wake-ups when the system is completely idle. This may be undesirable
+on power-constrained systems. The boot parameter ``kfence.deferrable=1``
+instead switches to a "deferrable" timer which does not force CPU wake-ups on
+idle systems, at the risk of unpredictable sample intervals. The default is
+configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
+
+.. warning::
+ The KUnit test suite is very likely to fail when using a deferrable timer
+ since it currently causes very unpredictable sample intervals.
+
+By default KFENCE will only sample 1 heap allocation within each sample
+interval. *Burst mode* allows to sample successive heap allocations, where the
+kernel boot parameter ``kfence.burst`` can be set to a non-zero value which
+denotes the *additional* successive allocations within a sample interval;
+setting ``kfence.burst=N`` means that ``1 + N`` successive allocations are
+attempted through KFENCE for each sample interval.
+
+The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
+further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
+255), the number of available guarded objects can be controlled. Each object
+requires 2 pages, one for the object itself and the other one used as a guard
+page; object pages are interleaved with guard pages, and every object page is
+therefore surrounded by two guard pages.
+
+The total memory dedicated to the KFENCE memory pool can be computed as::
+
+ ( #objects + 1 ) * 2 * PAGE_SIZE
+
+Using the default config, and assuming a page size of 4 KiB, results in
+dedicating 2 MiB to the KFENCE memory pool.
+
+Note: On architectures that support huge pages, KFENCE will ensure that the
+pool is using pages of size ``PAGE_SIZE``. This will result in additional page
+tables being allocated.
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical out-of-bounds access looks like this::
+
+ ==================================================================
+ BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa6/0x234
+
+ Out-of-bounds read at 0xffff8c3f2e291fff (1B left of kfence-#72):
+ test_out_of_bounds_read+0xa6/0x234
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ kfence-#72: 0xffff8c3f2e292000-0xffff8c3f2e29201f, size=32, cache=kmalloc-32
+
+ allocated by task 484 on cpu 0 at 32.919330s:
+ test_alloc+0xfe/0x738
+ test_out_of_bounds_read+0x9b/0x234
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ CPU: 0 PID: 484 Comm: kunit_try_catch Not tainted 5.13.0-rc3+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+The header of the report provides a short summary of the function involved in
+the access. It is followed by more detailed information about the access and
+its origin. Note that, real kernel addresses are only shown when using the
+kernel command line option ``no_hash_pointers``.
+
+Use-after-free accesses are reported as::
+
+ ==================================================================
+ BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143
+
+ Use-after-free read at 0xffff8c3f2e2a0000 (in kfence-#79):
+ test_use_after_free_read+0xb3/0x143
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ kfence-#79: 0xffff8c3f2e2a0000-0xffff8c3f2e2a001f, size=32, cache=kmalloc-32
+
+ allocated by task 488 on cpu 2 at 33.871326s:
+ test_alloc+0xfe/0x738
+ test_use_after_free_read+0x76/0x143
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ freed by task 488 on cpu 2 at 33.871358s:
+ test_use_after_free_read+0xa8/0x143
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ CPU: 2 PID: 488 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+KFENCE also reports on invalid frees, such as double-frees::
+
+ ==================================================================
+ BUG: KFENCE: invalid free in test_double_free+0xdc/0x171
+
+ Invalid free of 0xffff8c3f2e2a4000 (in kfence-#81):
+ test_double_free+0xdc/0x171
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ kfence-#81: 0xffff8c3f2e2a4000-0xffff8c3f2e2a401f, size=32, cache=kmalloc-32
+
+ allocated by task 490 on cpu 1 at 34.175321s:
+ test_alloc+0xfe/0x738
+ test_double_free+0x76/0x171
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ freed by task 490 on cpu 1 at 34.175348s:
+ test_double_free+0xa8/0x171
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ CPU: 1 PID: 490 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+KFENCE also uses pattern-based redzones on the other side of an object's guard
+page, to detect out-of-bounds writes on the unprotected side of the object.
+These are reported on frees::
+
+ ==================================================================
+ BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184
+
+ Corrupted memory at 0xffff8c3f2e33aff9 [ 0xac . . . . . . ] (in kfence-#156):
+ test_kmalloc_aligned_oob_write+0xef/0x184
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ kfence-#156: 0xffff8c3f2e33afb0-0xffff8c3f2e33aff8, size=73, cache=kmalloc-96
+
+ allocated by task 502 on cpu 7 at 42.159302s:
+ test_alloc+0xfe/0x738
+ test_kmalloc_aligned_oob_write+0x57/0x184
+ kunit_try_run_case+0x61/0xa0
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x176/0x1b0
+ ret_from_fork+0x22/0x30
+
+ CPU: 7 PID: 502 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ ==================================================================
+
+For such errors, the address where the corruption occurred as well as the
+invalidly written bytes (offset from the address) are shown; in this
+representation, '.' denote untouched bytes. In the example above ``0xac`` is
+the value written to the invalid address at offset 0, and the remaining '.'
+denote that no following bytes have been touched. Note that, real values are
+only shown if the kernel was booted with ``no_hash_pointers``; to avoid
+information disclosure otherwise, '!' is used instead to denote invalidly
+written bytes.
+
+And finally, KFENCE may also report on invalid accesses to any protected page
+where it was not possible to determine an associated object, e.g. if adjacent
+object pages had not yet been allocated::
+
+ ==================================================================
+ BUG: KFENCE: invalid read in test_invalid_access+0x26/0xe0
+
+ Invalid read at 0xffffffffb670b00a:
+ test_invalid_access+0x26/0xe0
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 124 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+DebugFS interface
+~~~~~~~~~~~~~~~~~
+
+Some debugging information is exposed via debugfs:
+
+* The file ``/sys/kernel/debug/kfence/stats`` provides runtime statistics.
+
+* The file ``/sys/kernel/debug/kfence/objects`` provides a list of objects
+ allocated via KFENCE, including those already freed but protected.
+
+Implementation Details
+----------------------
+
+Guarded allocations are set up based on the sample interval. After expiration
+of the sample interval, the next allocation through the main allocator (SLAB or
+SLUB) returns a guarded allocation from the KFENCE object pool (allocation
+sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and
+the next allocation is set up after the expiration of the interval.
+
+When using ``CONFIG_KFENCE_STATIC_KEYS=y``, KFENCE allocations are "gated"
+through the main allocator's fast-path by relying on static branches via the
+static keys infrastructure. The static branch is toggled to redirect the
+allocation to KFENCE. Depending on sample interval, target workloads, and
+system architecture, this may perform better than the simple dynamic branch.
+Careful benchmarking is recommended.
+
+KFENCE objects each reside on a dedicated page, at either the left or right
+page boundaries selected at random. The pages to the left and right of the
+object page are "guard pages", whose attributes are changed to a protected
+state, and cause page faults on any attempted access. Such page faults are then
+intercepted by KFENCE, which handles the fault gracefully by reporting an
+out-of-bounds access, and marking the page as accessible so that the faulting
+code can (wrongly) continue executing (set ``panic_on_warn`` to panic instead).
+
+To detect out-of-bounds writes to memory within the object's page itself,
+KFENCE also uses pattern-based redzones. For each object page, a redzone is set
+up for all non-object memory. For typical alignments, the redzone is only
+required on the unguarded side of an object. Because KFENCE must honor the
+cache's requested alignment, special alignments may result in unprotected gaps
+on either side of an object, all of which are redzoned.
+
+The following figure illustrates the page layout::
+
+ ---+-----------+-----------+-----------+-----------+-----------+---
+ | xxxxxxxxx | O : | xxxxxxxxx | : O | xxxxxxxxx |
+ | xxxxxxxxx | B : | xxxxxxxxx | : B | xxxxxxxxx |
+ | x GUARD x | J : RED- | x GUARD x | RED- : J | x GUARD x |
+ | xxxxxxxxx | E : ZONE | xxxxxxxxx | ZONE : E | xxxxxxxxx |
+ | xxxxxxxxx | C : | xxxxxxxxx | : C | xxxxxxxxx |
+ | xxxxxxxxx | T : | xxxxxxxxx | : T | xxxxxxxxx |
+ ---+-----------+-----------+-----------+-----------+-----------+---
+
+Upon deallocation of a KFENCE object, the object's page is again protected and
+the object is marked as freed. Any further access to the object causes a fault
+and KFENCE reports a use-after-free access. Freed objects are inserted at the
+tail of KFENCE's freelist, so that the least recently freed objects are reused
+first, and the chances of detecting use-after-frees of recently freed objects
+is increased.
+
+If pool utilization reaches 75% (default) or above, to reduce the risk of the
+pool eventually being fully occupied by allocated objects yet ensure diverse
+coverage of allocations, KFENCE limits currently covered allocations of the
+same source from further filling up the pool. The "source" of an allocation is
+based on its partial allocation stack trace. A side-effect is that this also
+limits frequent long-lived allocations (e.g. pagecache) of the same source
+filling up the pool permanently, which is the most common risk for the pool
+becoming full and the sampled allocation rate dropping to zero. The threshold
+at which to start limiting currently covered allocations can be configured via
+the boot parameter ``kfence.skip_covered_thresh`` (pool usage%).
+
+Interface
+---------
+
+The following describes the functions which are used by allocators as well as
+page handling code to set up and deal with KFENCE allocations.
+
+.. kernel-doc:: include/linux/kfence.h
+ :functions: is_kfence_address
+ kfence_shutdown_cache
+ kfence_alloc kfence_free __kfence_free
+ kfence_ksize kfence_object_start
+ kfence_handle_page_fault
+
+Related Tools
+-------------
+
+In userspace, a similar approach is taken by `GWP-ASan
+<http://llvm.org/docs/GwpAsan.html>`_. GWP-ASan also relies on guard pages and
+a sampling strategy to detect memory unsafety bugs at scale. KFENCE's design is
+directly influenced by GWP-ASan, and can be seen as its kernel sibling. Another
+similar but non-sampling approach, that also inspired the name "KFENCE", can be
+found in the userspace `Electric Fence Malloc Debugger
+<https://linux.die.net/man/3/efence>`_.
+
+In the kernel, several tools exist to debug memory access errors, and in
+particular KASAN can detect all bug classes that KFENCE can detect. While KASAN
+is more precise, relying on compiler instrumentation, this comes at a
+performance cost.
+
+It is worth highlighting that KASAN and KFENCE are complementary, with
+different target environments. For instance, KASAN is the better debugging-aid,
+where test cases or reproducers exists: due to the lower chance to detect the
+error, it would require more effort using KFENCE to debug. Deployments at scale
+that cannot afford to enable KASAN, however, would benefit from using KFENCE to
+discover bugs due to code paths not exercised by test cases or fuzzers.
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index e6f51260ff32..7d784e03f3f9 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -2,23 +2,25 @@ Kernel Memory Leak Detector
===========================
Kmemleak provides a way of detecting possible kernel memory leaks in a
-way similar to a tracing garbage collector
-(https://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+way similar to a `tracing garbage collector
+<https://en.wikipedia.org/wiki/Tracing_garbage_collection>`_,
with the difference that the orphan objects are not freed but only
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390 and tile.
Usage
-----
CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
thread scans the memory every 10 minutes (by default) and prints the
-number of new unreferenced objects found. To display the details of all
-the possible memory leaks::
+number of new unreferenced objects found. If the ``debugfs`` isn't already
+mounted, mount with::
# mount -t debugfs nodev /sys/kernel/debug/
+
+To display the details of all the possible scanned memory leaks::
+
# cat /sys/kernel/debug/kmemleak
To trigger an intermediate memory scan::
@@ -66,12 +68,15 @@ the kernel command line.
Memory may be allocated or freed before kmemleak is initialised and
these actions are stored in an early log buffer. The size of this buffer
-is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option.
+is configured via the CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE option.
If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
disabled by default. Passing ``kmemleak=on`` on the kernel command
line enables the function.
+If you are getting errors like "Error while writing to stdout" or "write_loop:
+Invalid argument", make sure kmemleak is properly enabled.
+
Basic Algorithm
---------------
@@ -156,6 +161,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
- ``kmemleak_free_percpu`` - notify of a percpu memory block freeing
- ``kmemleak_update_trace`` - update object allocation stack trace
- ``kmemleak_not_leak`` - mark an object as not a leak
+- ``kmemleak_transient_leak`` - mark an object as a transient leak
- ``kmemleak_ignore`` - do not scan or report an object as leak
- ``kmemleak_scan_area`` - add scan areas inside a memory block
- ``kmemleak_no_scan`` - do not scan a memory block
@@ -169,7 +175,6 @@ mapping:
- ``kmemleak_alloc_phys``
- ``kmemleak_free_part_phys``
-- ``kmemleak_not_leak_phys``
- ``kmemleak_ignore_phys``
Dealing with false positives/negatives
@@ -218,3 +223,37 @@ the pointer is calculated by other methods than the usual container_of
macro or the pointer is stored in a location not scanned by kmemleak.
Page allocations and ioremap are not tracked.
+
+Testing with kmemleak-test
+--------------------------
+
+To check if you have all set up to use kmemleak, you can use the kmemleak-test
+module, a module that deliberately leaks memory. Set CONFIG_SAMPLE_KMEMLEAK
+as module (it can't be used as built-in) and boot the kernel with kmemleak
+enabled. Load the module and perform a scan with::
+
+ # modprobe kmemleak-test
+ # echo scan > /sys/kernel/debug/kmemleak
+
+Note that the you may not get results instantly or on the first scanning. When
+kmemleak gets results, it'll log ``kmemleak: <count of leaks> new suspected
+memory leaks``. Then read the file to see then::
+
+ # cat /sys/kernel/debug/kmemleak
+ unreferenced object 0xffff89862ca702e8 (size 32):
+ comm "modprobe", pid 2088, jiffies 4294680594 (age 375.486s)
+ hex dump (first 32 bytes):
+ 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+ 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
+ backtrace:
+ [<00000000e0a73ec7>] 0xffffffffc01d2036
+ [<000000000c5d2a46>] do_one_initcall+0x41/0x1df
+ [<0000000046db7e0a>] do_init_module+0x55/0x200
+ [<00000000542b9814>] load_module+0x203c/0x2480
+ [<00000000c2850256>] __do_sys_finit_module+0xba/0xe0
+ [<000000006564e7ef>] do_syscall_64+0x43/0x110
+ [<000000007c873fa6>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ ...
+
+Removing the module with ``rmmod kmemleak_test`` should also trigger some
+kmemleak results.
diff --git a/Documentation/dev-tools/kmsan.rst b/Documentation/dev-tools/kmsan.rst
new file mode 100644
index 000000000000..0dc668b183f6
--- /dev/null
+++ b/Documentation/dev-tools/kmsan.rst
@@ -0,0 +1,435 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2022, Google LLC.
+
+===============================
+Kernel Memory Sanitizer (KMSAN)
+===============================
+
+KMSAN is a dynamic error detector aimed at finding uses of uninitialized
+values. It is based on compiler instrumentation, and is quite similar to the
+userspace `MemorySanitizer tool`_.
+
+An important note is that KMSAN is not intended for production use, because it
+drastically increases kernel memory footprint and slows the whole system down.
+
+Usage
+=====
+
+Building the kernel
+-------------------
+
+In order to build a kernel with KMSAN you will need a fresh Clang (14.0.6+).
+Please refer to `LLVM documentation`_ for the instructions on how to build Clang.
+
+Now configure and build the kernel with CONFIG_KMSAN enabled.
+
+Example report
+--------------
+
+Here is an example of a KMSAN report::
+
+ =====================================================
+ BUG: KMSAN: uninit-value in test_uninit_kmsan_check_memory+0x1be/0x380 [kmsan_test]
+ test_uninit_kmsan_check_memory+0x1be/0x380 mm/kmsan/kmsan_test.c:273
+ kunit_run_case_internal lib/kunit/test.c:333
+ kunit_try_run_case+0x206/0x420 lib/kunit/test.c:374
+ kunit_generic_run_threadfn_adapter+0x6d/0xc0 lib/kunit/try-catch.c:28
+ kthread+0x721/0x850 kernel/kthread.c:327
+ ret_from_fork+0x1f/0x30 ??:?
+
+ Uninit was stored to memory at:
+ do_uninit_local_array+0xfa/0x110 mm/kmsan/kmsan_test.c:260
+ test_uninit_kmsan_check_memory+0x1a2/0x380 mm/kmsan/kmsan_test.c:271
+ kunit_run_case_internal lib/kunit/test.c:333
+ kunit_try_run_case+0x206/0x420 lib/kunit/test.c:374
+ kunit_generic_run_threadfn_adapter+0x6d/0xc0 lib/kunit/try-catch.c:28
+ kthread+0x721/0x850 kernel/kthread.c:327
+ ret_from_fork+0x1f/0x30 ??:?
+
+ Local variable uninit created at:
+ do_uninit_local_array+0x4a/0x110 mm/kmsan/kmsan_test.c:256
+ test_uninit_kmsan_check_memory+0x1a2/0x380 mm/kmsan/kmsan_test.c:271
+
+ Bytes 4-7 of 8 are uninitialized
+ Memory access of size 8 starts at ffff888083fe3da0
+
+ CPU: 0 PID: 6731 Comm: kunit_try_catch Tainted: G B E 5.16.0-rc3+ #104
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+ =====================================================
+
+The report says that the local variable ``uninit`` was created uninitialized in
+``do_uninit_local_array()``. The third stack trace corresponds to the place
+where this variable was created.
+
+The first stack trace shows where the uninit value was used (in
+``test_uninit_kmsan_check_memory()``). The tool shows the bytes which were left
+uninitialized in the local variable, as well as the stack where the value was
+copied to another memory location before use.
+
+A use of uninitialized value ``v`` is reported by KMSAN in the following cases:
+
+ - in a condition, e.g. ``if (v) { ... }``;
+ - in an indexing or pointer dereferencing, e.g. ``array[v]`` or ``*v``;
+ - when it is copied to userspace or hardware, e.g. ``copy_to_user(..., &v, ...)``;
+ - when it is passed as an argument to a function, and
+ ``CONFIG_KMSAN_CHECK_PARAM_RETVAL`` is enabled (see below).
+
+The mentioned cases (apart from copying data to userspace or hardware, which is
+a security issue) are considered undefined behavior from the C11 Standard point
+of view.
+
+Disabling the instrumentation
+-----------------------------
+
+A function can be marked with ``__no_kmsan_checks``. Doing so makes KMSAN
+ignore uninitialized values in that function and mark its output as initialized.
+As a result, the user will not get KMSAN reports related to that function.
+
+Another function attribute supported by KMSAN is ``__no_sanitize_memory``.
+Applying this attribute to a function will result in KMSAN not instrumenting
+it, which can be helpful if we do not want the compiler to interfere with some
+low-level code (e.g. that marked with ``noinstr`` which implicitly adds
+``__no_sanitize_memory``).
+
+This however comes at a cost: stack allocations from such functions will have
+incorrect shadow/origin values, likely leading to false positives. Functions
+called from non-instrumented code may also receive incorrect metadata for their
+parameters.
+
+As a rule of thumb, avoid using ``__no_sanitize_memory`` explicitly.
+
+It is also possible to disable KMSAN for a single file (e.g. main.o)::
+
+ KMSAN_SANITIZE_main.o := n
+
+or for the whole directory::
+
+ KMSAN_SANITIZE := n
+
+in the Makefile. Think of this as applying ``__no_sanitize_memory`` to every
+function in the file or directory. Most users won't need KMSAN_SANITIZE, unless
+their code gets broken by KMSAN (e.g. runs at early boot time).
+
+KMSAN checks can also be temporarily disabled for the current task using
+``kmsan_disable_current()`` and ``kmsan_enable_current()`` calls. Each
+``kmsan_enable_current()`` call must be preceded by a
+``kmsan_disable_current()`` call; these call pairs may be nested. One needs to
+be careful with these calls, keeping the regions short and preferring other
+ways to disable instrumentation, where possible.
+
+Support
+=======
+
+In order for KMSAN to work the kernel must be built with Clang, which so far is
+the only compiler that has KMSAN support. The kernel instrumentation pass is
+based on the userspace `MemorySanitizer tool`_.
+
+The runtime library only supports x86_64 at the moment.
+
+How KMSAN works
+===============
+
+KMSAN shadow memory
+-------------------
+
+KMSAN associates a metadata byte (also called shadow byte) with every byte of
+kernel memory. A bit in the shadow byte is set if the corresponding bit of the
+kernel memory byte is uninitialized. Marking the memory uninitialized (i.e.
+setting its shadow bytes to ``0xff``) is called poisoning, marking it
+initialized (setting the shadow bytes to ``0x00``) is called unpoisoning.
+
+When a new variable is allocated on the stack, it is poisoned by default by
+instrumentation code inserted by the compiler (unless it is a stack variable
+that is immediately initialized). Any new heap allocation done without
+``__GFP_ZERO`` is also poisoned.
+
+Compiler instrumentation also tracks the shadow values as they are used along
+the code. When needed, instrumentation code invokes the runtime library in
+``mm/kmsan/`` to persist shadow values.
+
+The shadow value of a basic or compound type is an array of bytes of the same
+length. When a constant value is written into memory, that memory is unpoisoned.
+When a value is read from memory, its shadow memory is also obtained and
+propagated into all the operations which use that value. For every instruction
+that takes one or more values the compiler generates code that calculates the
+shadow of the result depending on those values and their shadows.
+
+Example::
+
+ int a = 0xff; // i.e. 0x000000ff
+ int b;
+ int c = a | b;
+
+In this case the shadow of ``a`` is ``0``, shadow of ``b`` is ``0xffffffff``,
+shadow of ``c`` is ``0xffffff00``. This means that the upper three bytes of
+``c`` are uninitialized, while the lower byte is initialized.
+
+Origin tracking
+---------------
+
+Every four bytes of kernel memory also have a so-called origin mapped to them.
+This origin describes the point in program execution at which the uninitialized
+value was created. Every origin is associated with either the full allocation
+stack (for heap-allocated memory), or the function containing the uninitialized
+variable (for locals).
+
+When an uninitialized variable is allocated on stack or heap, a new origin
+value is created, and that variable's origin is filled with that value. When a
+value is read from memory, its origin is also read and kept together with the
+shadow. For every instruction that takes one or more values, the origin of the
+result is one of the origins corresponding to any of the uninitialized inputs.
+If a poisoned value is written into memory, its origin is written to the
+corresponding storage as well.
+
+Example 1::
+
+ int a = 42;
+ int b;
+ int c = a + b;
+
+In this case the origin of ``b`` is generated upon function entry, and is
+stored to the origin of ``c`` right before the addition result is written into
+memory.
+
+Several variables may share the same origin address, if they are stored in the
+same four-byte chunk. In this case every write to either variable updates the
+origin for all of them. We have to sacrifice precision in this case, because
+storing origins for individual bits (and even bytes) would be too costly.
+
+Example 2::
+
+ int combine(short a, short b) {
+ union ret_t {
+ int i;
+ short s[2];
+ } ret;
+ ret.s[0] = a;
+ ret.s[1] = b;
+ return ret.i;
+ }
+
+If ``a`` is initialized and ``b`` is not, the shadow of the result would be
+0xffff0000, and the origin of the result would be the origin of ``b``.
+``ret.s[0]`` would have the same origin, but it will never be used, because
+that variable is initialized.
+
+If both function arguments are uninitialized, only the origin of the second
+argument is preserved.
+
+Origin chaining
+~~~~~~~~~~~~~~~
+
+To ease debugging, KMSAN creates a new origin for every store of an
+uninitialized value to memory. The new origin references both its creation stack
+and the previous origin the value had. This may cause increased memory
+consumption, so we limit the length of origin chains in the runtime.
+
+Clang instrumentation API
+-------------------------
+
+Clang instrumentation pass inserts calls to functions defined in
+``mm/kmsan/nstrumentation.c`` into the kernel code.
+
+Shadow manipulation
+~~~~~~~~~~~~~~~~~~~
+
+For every memory access the compiler emits a call to a function that returns a
+pair of pointers to the shadow and origin addresses of the given memory::
+
+ typedef struct {
+ void *shadow, *origin;
+ } shadow_origin_ptr_t
+
+ shadow_origin_ptr_t __msan_metadata_ptr_for_load_{1,2,4,8}(void *addr)
+ shadow_origin_ptr_t __msan_metadata_ptr_for_store_{1,2,4,8}(void *addr)
+ shadow_origin_ptr_t __msan_metadata_ptr_for_load_n(void *addr, uintptr_t size)
+ shadow_origin_ptr_t __msan_metadata_ptr_for_store_n(void *addr, uintptr_t size)
+
+The function name depends on the memory access size.
+
+The compiler makes sure that for every loaded value its shadow and origin
+values are read from memory. When a value is stored to memory, its shadow and
+origin are also stored using the metadata pointers.
+
+Handling locals
+~~~~~~~~~~~~~~~
+
+A special function is used to create a new origin value for a local variable and
+set the origin of that variable to that value::
+
+ void __msan_poison_alloca(void *addr, uintptr_t size, char *descr)
+
+Access to per-task data
+~~~~~~~~~~~~~~~~~~~~~~~
+
+At the beginning of every instrumented function KMSAN inserts a call to
+``__msan_get_context_state()``::
+
+ kmsan_context_state *__msan_get_context_state(void)
+
+``kmsan_context_state`` is declared in ``include/linux/kmsan.h``::
+
+ struct kmsan_context_state {
+ char param_tls[KMSAN_PARAM_SIZE];
+ char retval_tls[KMSAN_RETVAL_SIZE];
+ char va_arg_tls[KMSAN_PARAM_SIZE];
+ char va_arg_origin_tls[KMSAN_PARAM_SIZE];
+ u64 va_arg_overflow_size_tls;
+ char param_origin_tls[KMSAN_PARAM_SIZE];
+ depot_stack_handle_t retval_origin_tls;
+ };
+
+This structure is used by KMSAN to pass parameter shadows and origins between
+instrumented functions (unless the parameters are checked immediately by
+``CONFIG_KMSAN_CHECK_PARAM_RETVAL``).
+
+Passing uninitialized values to functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Clang's MemorySanitizer instrumentation has an option,
+``-fsanitize-memory-param-retval``, which makes the compiler check function
+parameters passed by value, as well as function return values.
+
+The option is controlled by ``CONFIG_KMSAN_CHECK_PARAM_RETVAL``, which is
+enabled by default to let KMSAN report uninitialized values earlier.
+Please refer to the `LKML discussion`_ for more details.
+
+Because of the way the checks are implemented in LLVM (they are only applied to
+parameters marked as ``noundef``), not all parameters are guaranteed to be
+checked, so we cannot give up the metadata storage in ``kmsan_context_state``.
+
+String functions
+~~~~~~~~~~~~~~~~
+
+The compiler replaces calls to ``memcpy()``/``memmove()``/``memset()`` with the
+following functions. These functions are also called when data structures are
+initialized or copied, making sure shadow and origin values are copied alongside
+with the data::
+
+ void *__msan_memcpy(void *dst, void *src, uintptr_t n)
+ void *__msan_memmove(void *dst, void *src, uintptr_t n)
+ void *__msan_memset(void *dst, int c, uintptr_t n)
+
+Error reporting
+~~~~~~~~~~~~~~~
+
+For each use of a value the compiler emits a shadow check that calls
+``__msan_warning()`` in the case that value is poisoned::
+
+ void __msan_warning(u32 origin)
+
+``__msan_warning()`` causes KMSAN runtime to print an error report.
+
+Inline assembly instrumentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+KMSAN instruments every inline assembly output with a call to::
+
+ void __msan_instrument_asm_store(void *addr, uintptr_t size)
+
+, which unpoisons the memory region.
+
+This approach may mask certain errors, but it also helps to avoid a lot of
+false positives in bitwise operations, atomics etc.
+
+Sometimes the pointers passed into inline assembly do not point to valid memory.
+In such cases they are ignored at runtime.
+
+
+Runtime library
+---------------
+
+The code is located in ``mm/kmsan/``.
+
+Per-task KMSAN state
+~~~~~~~~~~~~~~~~~~~~
+
+Every task_struct has an associated KMSAN task state that holds the KMSAN
+context (see above) and a per-task counter disallowing KMSAN reports::
+
+ struct kmsan_context {
+ ...
+ unsigned int depth;
+ struct kmsan_context_state cstate;
+ ...
+ }
+
+ struct task_struct {
+ ...
+ struct kmsan_context kmsan;
+ ...
+ }
+
+KMSAN contexts
+~~~~~~~~~~~~~~
+
+When running in a kernel task context, KMSAN uses ``current->kmsan.cstate`` to
+hold the metadata for function parameters and return values.
+
+But in the case the kernel is running in the interrupt, softirq or NMI context,
+where ``current`` is unavailable, KMSAN switches to per-cpu interrupt state::
+
+ DEFINE_PER_CPU(struct kmsan_ctx, kmsan_percpu_ctx);
+
+Metadata allocation
+~~~~~~~~~~~~~~~~~~~
+
+There are several places in the kernel for which the metadata is stored.
+
+1. Each ``struct page`` instance contains two pointers to its shadow and
+origin pages::
+
+ struct page {
+ ...
+ struct page *shadow, *origin;
+ ...
+ };
+
+At boot-time, the kernel allocates shadow and origin pages for every available
+kernel page. This is done quite late, when the kernel address space is already
+fragmented, so normal data pages may arbitrarily interleave with the metadata
+pages.
+
+This means that in general for two contiguous memory pages their shadow/origin
+pages may not be contiguous. Consequently, if a memory access crosses the
+boundary of a memory block, accesses to shadow/origin memory may potentially
+corrupt other pages or read incorrect values from them.
+
+In practice, contiguous memory pages returned by the same ``alloc_pages()``
+call will have contiguous metadata, whereas if these pages belong to two
+different allocations their metadata pages can be fragmented.
+
+For the kernel data (``.data``, ``.bss`` etc.) and percpu memory regions
+there also are no guarantees on metadata contiguity.
+
+In the case ``__msan_metadata_ptr_for_XXX_YYY()`` hits the border between two
+pages with non-contiguous metadata, it returns pointers to fake shadow/origin regions::
+
+ char dummy_load_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+ char dummy_store_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+
+``dummy_load_page`` is zero-initialized, so reads from it always yield zeroes.
+All stores to ``dummy_store_page`` are ignored.
+
+2. For vmalloc memory and modules, there is a direct mapping between the memory
+range, its shadow and origin. KMSAN reduces the vmalloc area by 3/4, making only
+the first quarter available to ``vmalloc()``. The second quarter of the vmalloc
+area contains shadow memory for the first quarter, the third one holds the
+origins. A small part of the fourth quarter contains shadow and origins for the
+kernel modules. Please refer to ``arch/x86/include/asm/pgtable_64_types.h`` for
+more details.
+
+When an array of pages is mapped into a contiguous virtual memory space, their
+shadow and origin pages are similarly mapped into contiguous regions.
+
+References
+==========
+
+E. Stepanov, K. Serebryany. `MemorySanitizer: fast detector of uninitialized
+memory use in C++
+<https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43308.pdf>`_.
+In Proceedings of CGO 2015.
+
+.. _MemorySanitizer tool: https://clang.llvm.org/docs/MemorySanitizer.html
+.. _LLVM documentation: https://llvm.org/docs/GettingStarted.html
+.. _LKML discussion: https://lore.kernel.org/all/20220614144853.3693273-1-glider@google.com/
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index dad1bb8711e2..18c2da67fae4 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -7,18 +7,45 @@ directory. These are intended to be small tests to exercise individual code
paths in the kernel. Tests are intended to be run after building, installing
and booting a kernel.
+Kselftest from mainline can be run on older stable kernels. Running tests
+from mainline offers the best coverage. Several test rings run mainline
+kselftest suite on stable releases. The reason is that when a new test
+gets added to test existing code to regression test a bug, we should be
+able to run that test on an older kernel. Hence, it is important to keep
+code that can still test an older kernel and make sure it skips the test
+gracefully on newer releases.
+
+You can find additional information on Kselftest framework, how to
+write new tests using the framework on Kselftest wiki:
+
+https://kselftest.wiki.kernel.org/
+
On some systems, hot-plug tests could hang forever waiting for cpu and
memory to be ready to be offlined. A special hot-plug target is created
-to run full range of hot-plug tests. In default mode, hot-plug tests run
+to run the full range of hot-plug tests. In default mode, hot-plug tests run
in safe mode with a limited scope. In limited mode, cpu-hotplug test is
run on a single cpu as opposed to all hotplug capable cpus, and memory
hotplug test is run on 2% of hotplug capable memory instead of 10%.
+kselftest runs as a userspace process. Tests that can be written/run in
+userspace may wish to use the `Test Harness`_. Tests that need to be
+run in kernel space may wish to use a `Test Module`_.
+
+Documentation on the tests
+==========================
+
+For documentation on the kselftests themselves, see:
+
+.. toctree::
+
+ testing-devices
+
Running the selftests (hotplug tests are run in limited mode)
=============================================================
To build the tests::
+ $ make headers
$ make -C tools/testing/selftests
To run the tests::
@@ -31,17 +58,32 @@ To build and run the tests with a single command, use::
Note that some tests will require root privileges.
-Build and run from user specific object directory (make O=dir)::
+Kselftest supports saving output files in a separate directory and then
+running tests. To locate output files in a separate directory two syntaxes
+are supported. In both cases the working directory must be the root of the
+kernel src. This is applicable to "Running a subset of selftests" section
+below.
+
+To build, save output files in a separate directory with O= ::
$ make O=/tmp/kselftest kselftest
-Build and run KBUILD_OUTPUT directory (make KBUILD_OUTPUT=)::
+To build, save output files in a separate directory with KBUILD_OUTPUT ::
+
+ $ export KBUILD_OUTPUT=/tmp/kselftest; make kselftest
- $ make KBUILD_OUTPUT=/tmp/kselftest kselftest
+The O= assignment takes precedence over the KBUILD_OUTPUT environment
+variable.
-The above commands run the tests and print pass/fail summary to make it
-easier to understand the test results. Please find the detailed individual
-test results for each test in /tmp/testname file(s).
+The above commands by default run the tests and print full pass/fail report.
+Kselftest supports "summary" option to make it easier to understand the test
+results. Please find the detailed individual test results for each test in
+/tmp/testname file(s) when summary option is specified. This is applicable
+to "Running a subset of selftests" section below.
+
+To run kselftest with summary option enabled ::
+
+ $ make summary=1 kselftest
Running a subset of selftests
=============================
@@ -57,17 +99,29 @@ You can specify multiple tests to build and run::
$ make TARGETS="size timers" kselftest
-Build and run from user specific object directory (make O=dir)::
+To build, save output files in a separate directory with O= ::
$ make O=/tmp/kselftest TARGETS="size timers" kselftest
-Build and run KBUILD_OUTPUT directory (make KBUILD_OUTPUT=)::
+To build, save output files in a separate directory with KBUILD_OUTPUT ::
+
+ $ export KBUILD_OUTPUT=/tmp/kselftest; make TARGETS="size timers" kselftest
+
+Additionally you can use the "SKIP_TARGETS" variable on the make command
+line to specify one or more targets to exclude from the TARGETS list.
+
+To run all tests but a single subsystem::
- $ make KBUILD_OUTPUT=/tmp/kselftest TARGETS="size timers" kselftest
+ $ make -C tools/testing/selftests SKIP_TARGETS=ptrace run_tests
-The above commands run the tests and print pass/fail summary to make it
-easier to understand the test results. Please find the detailed individual
-test results for each test in /tmp/testname file(s).
+You can specify multiple tests to skip::
+
+ $ make SKIP_TARGETS="size timers" kselftest
+
+You can also specify a restricted list of tests to run together with a
+dedicated skiplist::
+
+ $ make TARGETS="breakpoints size timers" SKIP_TARGETS=size kselftest
See the top-level tools/testing/selftests/Makefile for the list of all
possible targets.
@@ -89,32 +143,86 @@ Note that some tests will require root privileges.
Install selftests
=================
-You can use kselftest_install.sh tool installs selftests in default
-location which is tools/testing/selftests/kselftest or a user specified
-location.
+You can use the "install" target of "make" (which calls the `kselftest_install.sh`
+tool) to install selftests in the default location (`tools/testing/selftests/kselftest_install`),
+or in a user specified location via the `INSTALL_PATH` "make" variable.
To install selftests in default location::
- $ cd tools/testing/selftests
- $ ./kselftest_install.sh
+ $ make -C tools/testing/selftests install
To install selftests in a user specified location::
- $ cd tools/testing/selftests
- $ ./kselftest_install.sh install_dir
+ $ make -C tools/testing/selftests install INSTALL_PATH=/some/other/path
Running installed selftests
===========================
-Kselftest install as well as the Kselftest tarball provide a script
-named "run_kselftest.sh" to run the tests.
+Found in the install directory, as well as in the Kselftest tarball,
+is a script named `run_kselftest.sh` to run the tests.
-You can simply do the following to run the installed Kselftests. Please
+You can simply do the following to run the installed Kselftests. Please
note some tests will require root privileges::
- $ cd kselftest
+ $ cd kselftest_install
$ ./run_kselftest.sh
+To see the list of available tests, the `-l` option can be used::
+
+ $ ./run_kselftest.sh -l
+
+The `-c` option can be used to run all the tests from a test collection, or
+the `-t` option for specific single tests. Either can be used multiple times::
+
+ $ ./run_kselftest.sh -c size -c seccomp -t timers:posix_timers -t timer:nanosleep
+
+For other features see the script usage output, seen with the `-h` option.
+
+Timeout for selftests
+=====================
+
+Selftests are designed to be quick and so a default timeout is used of 45
+seconds for each test. Tests can override the default timeout by adding
+a settings file in their directory and set a timeout variable there to the
+configured a desired upper timeout for the test. Only a few tests override
+the timeout with a value higher than 45 seconds, selftests strives to keep
+it that way. Timeouts in selftests are not considered fatal because the
+system under which a test runs may change and this can also modify the
+expected time it takes to run a test. If you have control over the systems
+which will run the tests you can configure a test runner on those systems to
+use a greater or lower timeout on the command line as with the `-o` or
+the `--override-timeout` argument. For example to use 165 seconds instead
+one would use::
+
+ $ ./run_kselftest.sh --override-timeout 165
+
+You can look at the TAP output to see if you ran into the timeout. Test
+runners which know a test must run under a specific time can then optionally
+treat these timeouts then as fatal.
+
+Packaging selftests
+===================
+
+In some cases packaging is desired, such as when tests need to run on a
+different system. To package selftests, run::
+
+ $ make -C tools/testing/selftests gen_tar
+
+This generates a tarball in the `INSTALL_PATH/kselftest-packages` directory. By
+default, `.gz` format is used. The tar compression format can be overridden by
+specifying a `FORMAT` make variable. Any value recognized by `tar's auto-compress`_
+option is supported, such as::
+
+ $ make -C tools/testing/selftests gen_tar FORMAT=.xz
+
+`make gen_tar` invokes `make install` so you can use it to package a subset of
+tests by using variables specified in `Running a subset of selftests`_
+section::
+
+ $ make -C tools/testing/selftests gen_tar TARGETS="size" FORMAT=.xz
+
+.. _tar's auto-compress: https://www.gnu.org/software/tar/manual/html_node/gzip.html#auto_002dcompress
+
Contributing new tests
======================
@@ -129,17 +237,36 @@ In general, the rules for selftests are
* Don't cause the top-level "make run_tests" to fail if your feature is
unconfigured.
+ * The output of tests must conform to the TAP standard to ensure high
+ testing quality and to capture failures/errors with specific details.
+ The kselftest.h and kselftest_harness.h headers provide wrappers for
+ outputting test results. These wrappers should be used for pass,
+ fail, exit, and skip messages. CI systems can easily parse TAP output
+ messages to detect test results.
+
Contributing new tests (details)
================================
+ * In your Makefile, use facilities from lib.mk by including it instead of
+ reinventing the wheel. Specify flags and binaries generation flags on
+ need basis before including lib.mk. ::
+
+ CFLAGS = $(KHDR_INCLUDES)
+ TEST_GEN_PROGS := close_range_test
+ include ../lib.mk
+
* Use TEST_GEN_XXX if such binaries or files are generated during
compiling.
TEST_PROGS, TEST_GEN_PROGS mean it is the executable tested by
default.
+ TEST_GEN_MODS_DIR should be used by tests that require modules to be built
+ before the test starts. The variable will contain the name of the directory
+ containing the modules.
+
TEST_CUSTOM_PROGS should be used by tests that require custom build
- rule and prevent common build rule use.
+ rules and prevent common build rule use.
TEST_PROGS are for test shell scripts. Please ensure shell script has
its exec bit set. Otherwise, lib.mk run_tests will generate a warning.
@@ -148,24 +275,148 @@ Contributing new tests (details)
TEST_PROGS_EXTENDED, TEST_GEN_PROGS_EXTENDED mean it is the
executable which is not tested by default.
+
TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
test.
+ TEST_INCLUDES is similar to TEST_FILES, it lists files which should be
+ included when exporting or installing the tests, with the following
+ differences:
+
+ * symlinks to files in other directories are preserved
+ * the part of paths below tools/testing/selftests/ is preserved when
+ copying the files to the output directory
+
+ TEST_INCLUDES is meant to list dependencies located in other directories of
+ the selftests hierarchy.
+
* First use the headers inside the kernel source and/or git repo, and then the
system headers. Headers for the kernel release as opposed to headers
installed by the distro on the system should be the primary focus to be able
- to find regressions.
+ to find regressions. Use KHDR_INCLUDES in Makefile to include headers from
+ the kernel source.
* If a test needs specific kernel config options enabled, add a config file in
the test directory to enable them.
e.g: tools/testing/selftests/android/config
+ * Create a .gitignore file inside test directory and add all generated objects
+ in it.
+
+ * Add new test name in TARGETS in selftests/Makefile::
+
+ TARGETS += android
+
+ * All changes should pass::
+
+ kselftest-{all,install,clean,gen_tar}
+ kselftest-{all,install,clean,gen_tar} O=abo_path
+ kselftest-{all,install,clean,gen_tar} O=rel_path
+ make -C tools/testing/selftests {all,install,clean,gen_tar}
+ make -C tools/testing/selftests {all,install,clean,gen_tar} O=abs_path
+ make -C tools/testing/selftests {all,install,clean,gen_tar} O=rel_path
+
+Test Module
+===========
+
+Kselftest tests the kernel from userspace. Sometimes things need
+testing from within the kernel, one method of doing this is to create a
+test module. We can tie the module into the kselftest framework by
+using a shell script test runner. ``kselftest/module.sh`` is designed
+to facilitate this process. There is also a header file provided to
+assist writing kernel modules that are for use with kselftest:
+
+- ``tools/testing/selftests/kselftest_module.h``
+- ``tools/testing/selftests/kselftest/module.sh``
+
+Note that test modules should taint the kernel with TAINT_TEST. This will
+happen automatically for modules which are in the ``tools/testing/``
+directory, or for modules which use the ``kselftest_module.h`` header above.
+Otherwise, you'll need to add ``MODULE_INFO(test, "Y")`` to your module
+source. selftests which do not load modules typically should not taint the
+kernel, but in cases where a non-test module is loaded, TEST_TAINT can be
+applied from userspace by writing to ``/proc/sys/kernel/tainted``.
+
+How to use
+----------
+
+Here we show the typical steps to create a test module and tie it into
+kselftest. We use kselftests for lib/ as an example.
+
+1. Create the test module
+
+2. Create the test script that will run (load/unload) the module
+ e.g. ``tools/testing/selftests/lib/bitmap.sh``
+
+3. Add line to config file e.g. ``tools/testing/selftests/lib/config``
+
+4. Add test script to makefile e.g. ``tools/testing/selftests/lib/Makefile``
+
+5. Verify it works:
+
+.. code-block:: sh
+
+ # Assumes you have booted a fresh build of this kernel tree
+ cd /path/to/linux/tree
+ make kselftest-merge
+ make modules
+ sudo make modules_install
+ make TARGETS=lib kselftest
+
+Example Module
+--------------
+
+A bare bones test module might look like this:
+
+.. code-block:: c
+
+ // SPDX-License-Identifier: GPL-2.0+
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+ #include "../tools/testing/selftests/kselftest_module.h"
+
+ KSTM_MODULE_GLOBALS();
+
+ /*
+ * Kernel module for testing the foobinator
+ */
+
+ static int __init test_function()
+ {
+ ...
+ }
+
+ static void __init selftest(void)
+ {
+ KSTM_CHECK_ZERO(do_test_case("", 0));
+ }
+
+ KSTM_MODULE_LOADERS(test_foo);
+ MODULE_AUTHOR("John Developer <jd@fooman.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_INFO(test, "Y");
+
+Example test script
+-------------------
+
+.. code-block:: sh
+
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0+
+ $(dirname $0)/../kselftest/module.sh "foo" test_foo
+
+
Test Harness
============
-The kselftest_harness.h file contains useful helpers to build tests. The tests
-from tools/testing/selftests/seccomp/seccomp_bpf.c can be used as example.
+The kselftest_harness.h file contains useful helpers to build tests. The
+test harness is for userspace testing, for kernel space testing see `Test
+Module`_ above.
+
+The tests from tools/testing/selftests/seccomp/seccomp_bpf.c can be used as
+example.
Example
-------
@@ -179,7 +430,8 @@ Helpers
.. kernel-doc:: tools/testing/selftests/kselftest_harness.h
:functions: TH_LOG TEST TEST_SIGNAL FIXTURE FIXTURE_DATA FIXTURE_SETUP
- FIXTURE_TEARDOWN TEST_F TEST_HARNESS_MAIN
+ FIXTURE_TEARDOWN TEST_F TEST_HARNESS_MAIN FIXTURE_VARIANT
+ FIXTURE_VARIANT_ADD
Operators
---------
diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
new file mode 100644
index 000000000000..a9810bed5fd4
--- /dev/null
+++ b/Documentation/dev-tools/ktap.rst
@@ -0,0 +1,314 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
+The Kernel Test Anything Protocol (KTAP), version 1
+===================================================
+
+TAP, or the Test Anything Protocol is a format for specifying test results used
+by a number of projects. Its website and specification are found at this `link
+<https://testanything.org/>`_. The Linux Kernel largely uses TAP output for test
+results. However, Kernel testing frameworks have special needs for test results
+which don't align with the original TAP specification. Thus, a "Kernel TAP"
+(KTAP) format is specified to extend and alter TAP to support these use-cases.
+This specification describes the generally accepted format of KTAP as it is
+currently used in the kernel.
+
+KTAP test results describe a series of tests (which may be nested: i.e., test
+can have subtests), each of which can contain both diagnostic data -- e.g., log
+lines -- and a final result. The test structure and results are
+machine-readable, whereas the diagnostic data is unstructured and is there to
+aid human debugging.
+
+KTAP output is built from four different types of lines:
+
+- Version lines
+- Plan lines
+- Test case result lines
+- Diagnostic lines
+
+In general, valid KTAP output should also form valid TAP output, but some
+information, in particular nested test results, may be lost. Also note that
+there is a stagnant draft specification for TAP14, KTAP diverges from this in
+a couple of places (notably the "Subtest" header), which are described where
+relevant later in this document.
+
+Version lines
+-------------
+
+All KTAP-formatted results begin with a "version line" which specifies which
+version of the (K)TAP standard the result is compliant with.
+
+For example:
+
+- "KTAP version 1"
+- "TAP version 13"
+- "TAP version 14"
+
+Note that, in KTAP, subtests also begin with a version line, which denotes the
+start of the nested test results. This differs from TAP14, which uses a
+separate "Subtest" line.
+
+While, going forward, "KTAP version 1" should be used by compliant tests, it
+is expected that most parsers and other tooling will accept the other versions
+listed here for compatibility with existing tests and frameworks.
+
+Plan lines
+----------
+
+A test plan provides the number of tests (or subtests) in the KTAP output.
+
+Plan lines must follow the format of "1..N" where N is the number of tests or subtests.
+Plan lines follow version lines to indicate the number of nested tests.
+
+While there are cases where the number of tests is not known in advance -- in
+which case the test plan may be omitted -- it is strongly recommended one is
+present where possible.
+
+Test case result lines
+----------------------
+
+Test case result lines indicate the final status of a test.
+They are required and must have the format:
+
+.. code-block:: none
+
+ <result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
+
+The result can be either "ok", which indicates the test case passed,
+or "not ok", which indicates that the test case failed.
+
+<number> represents the number of the test being performed. The first test must
+have the number 1 and the number then must increase by 1 for each additional
+subtest within the same test at the same nesting level.
+
+The description is a description of the test, generally the name of
+the test, and can be any string of characters other than # or a
+newline. The description is optional, but recommended.
+
+The directive and any diagnostic data is optional. If either are present, they
+must follow a hash sign, "#".
+
+A directive is a keyword that indicates a different outcome for a test other
+than passed and failed. The directive is optional, and consists of a single
+keyword preceding the diagnostic data. In the event that a parser encounters
+a directive it doesn't support, it should fall back to the "ok" / "not ok"
+result.
+
+Currently accepted directives are:
+
+- "SKIP", which indicates a test was skipped (note the result of the test case
+ result line can be either "ok" or "not ok" if the SKIP directive is used)
+- "TODO", which indicates that a test is not expected to pass at the moment,
+ e.g. because the feature it is testing is known to be broken. While this
+ directive is inherited from TAP, its use in the kernel is discouraged.
+- "XFAIL", which indicates that a test is expected to fail. This is similar
+ to "TODO", above, and is used by some kselftest tests.
+- “TIMEOUT”, which indicates a test has timed out (note the result of the test
+ case result line should be “not ok” if the TIMEOUT directive is used)
+- “ERROR”, which indicates that the execution of a test has failed due to a
+ specific error that is included in the diagnostic data. (note the result of
+ the test case result line should be “not ok” if the ERROR directive is used)
+
+The diagnostic data is a plain-text field which contains any additional details
+about why this result was produced. This is typically an error message for ERROR
+or failed tests, or a description of missing dependencies for a SKIP result.
+
+The diagnostic data field is optional, and results which have neither a
+directive nor any diagnostic data do not need to include the "#" field
+separator.
+
+Example result lines include::
+
+ ok 1 test_case_name
+
+The test "test_case_name" passed.
+
+::
+
+ not ok 1 test_case_name
+
+The test "test_case_name" failed.
+
+::
+
+ ok 1 test # SKIP necessary dependency unavailable
+
+The test "test" was SKIPPED with the diagnostic message "necessary dependency
+unavailable".
+
+::
+
+ not ok 1 test # TIMEOUT 30 seconds
+
+The test "test" timed out, with diagnostic data "30 seconds".
+
+::
+
+ ok 5 check return code # rcode=0
+
+The test "check return code" passed, with additional diagnostic data “rcode=0”
+
+
+Diagnostic lines
+----------------
+
+If tests wish to output any further information, they should do so using
+"diagnostic lines". Diagnostic lines are optional, freeform text, and are
+often used to describe what is being tested and any intermediate results in
+more detail than the final result and diagnostic data line provides.
+
+Diagnostic lines are formatted as "# <diagnostic_description>", where the
+description can be any string. Diagnostic lines can be anywhere in the test
+output. As a rule, diagnostic lines regarding a test are directly before the
+test result line for that test.
+
+Note that most tools will treat unknown lines (see below) as diagnostic lines,
+even if they do not start with a "#": this is to capture any other useful
+kernel output which may help debug the test. It is nevertheless recommended
+that tests always prefix any diagnostic output they have with a "#" character.
+
+Unknown lines
+-------------
+
+There may be lines within KTAP output that do not follow the format of one of
+the four formats for lines described above. This is allowed, however, they will
+not influence the status of the tests.
+
+This is an important difference from TAP. Kernel tests may print messages
+to the system console or a log file. Both of these destinations may contain
+messages either from unrelated kernel or userspace activity, or kernel
+messages from non-test code that is invoked by the test. The kernel code
+invoked by the test likely is not aware that a test is in progress and
+thus can not print the message as a diagnostic message.
+
+Nested tests
+------------
+
+In KTAP, tests can be nested. This is done by having a test include within its
+output an entire set of KTAP-formatted results. This can be used to categorize
+and group related tests, or to split out different results from the same test.
+
+The "parent" test's result should consist of all of its subtests' results,
+starting with another KTAP version line and test plan, and end with the overall
+result. If one of the subtests fail, for example, the parent test should also
+fail.
+
+Additionally, all lines in a subtest should be indented. One level of
+indentation is two spaces: " ". The indentation should begin at the version
+line and should end before the parent test's result line.
+
+"Unknown lines" are not considered to be lines in a subtest and thus are
+allowed to be either indented or not indented.
+
+An example of a test with two nested subtests:
+
+::
+
+ KTAP version 1
+ 1..1
+ KTAP version 1
+ 1..2
+ ok 1 test_1
+ not ok 2 test_2
+ # example failed
+ not ok 1 example
+
+An example format with multiple levels of nested testing:
+
+::
+
+ KTAP version 1
+ 1..2
+ KTAP version 1
+ 1..2
+ KTAP version 1
+ 1..2
+ not ok 1 test_1
+ ok 2 test_2
+ not ok 1 test_3
+ ok 2 test_4 # SKIP
+ not ok 1 example_test_1
+ ok 2 example_test_2
+
+
+Major differences between TAP and KTAP
+--------------------------------------
+
+================================================== ========= ===============
+Feature TAP KTAP
+================================================== ========= ===============
+yaml and json in diagnosic message ok not recommended
+TODO directive ok not recognized
+allows an arbitrary number of tests to be nested no yes
+"Unknown lines" are in category of "Anything else" yes no
+"Unknown lines" are incorrect allowed
+================================================== ========= ===============
+
+The TAP14 specification does permit nested tests, but instead of using another
+nested version line, uses a line of the form
+"Subtest: <name>" where <name> is the name of the parent test.
+
+Example KTAP output
+--------------------
+::
+
+ KTAP version 1
+ 1..1
+ KTAP version 1
+ 1..3
+ KTAP version 1
+ 1..1
+ # test_1: initializing test_1
+ ok 1 test_1
+ ok 1 example_test_1
+ KTAP version 1
+ 1..2
+ ok 1 test_1 # SKIP test_1 skipped
+ ok 2 test_2
+ ok 2 example_test_2
+ KTAP version 1
+ 1..3
+ ok 1 test_1
+ # test_2: FAIL
+ not ok 2 test_2
+ ok 3 test_3 # SKIP test_3 skipped
+ not ok 3 example_test_3
+ not ok 1 main_test
+
+This output defines the following hierarchy:
+
+A single test called "main_test", which fails, and has three subtests:
+
+- "example_test_1", which passes, and has one subtest:
+
+ - "test_1", which passes, and outputs the diagnostic message "test_1: initializing test_1"
+
+- "example_test_2", which passes, and has two subtests:
+
+ - "test_1", which is skipped, with the explanation "test_1 skipped"
+ - "test_2", which passes
+
+- "example_test_3", which fails, and has three subtests
+
+ - "test_1", which passes
+ - "test_2", which outputs the diagnostic line "test_2: FAIL", and fails.
+ - "test_3", which is skipped with the explanation "test_3 skipped"
+
+Note that the individual subtests with the same names do not conflict, as they
+are found in different parent tests. This output also exhibits some sensible
+rules for "bubbling up" test results: a test fails if any of its subtests fail.
+Skipped tests do not affect the result of the parent test (though it often
+makes sense for a test to be marked skipped if _all_ of its subtests have been
+skipped).
+
+See also:
+---------
+
+- The TAP specification:
+ https://testanything.org/tap-version-13-specification.html
+- The (stagnant) TAP version 14 specification:
+ https://github.com/TestAnything/Specification/blob/tap-14-specification/specification.md
+- The kselftest documentation:
+ Documentation/dev-tools/kselftest.rst
+- The KUnit documentation:
+ Documentation/dev-tools/kunit/index.rst
diff --git a/Documentation/dev-tools/kunit/api/clk.rst b/Documentation/dev-tools/kunit/api/clk.rst
new file mode 100644
index 000000000000..eeaa50089453
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/clk.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+Clk API
+========
+
+The KUnit clk API is used to test clk providers and clk consumers.
+
+.. kernel-doc:: drivers/clk/clk_kunit_helpers.c
+ :export:
diff --git a/Documentation/dev-tools/kunit/api/functionredirection.rst b/Documentation/dev-tools/kunit/api/functionredirection.rst
new file mode 100644
index 000000000000..3791efc2fcca
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/functionredirection.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Function Redirection API
+========================
+
+Overview
+========
+
+When writing unit tests, it's important to be able to isolate the code being
+tested from other parts of the kernel. This ensures the reliability of the test
+(it won't be affected by external factors), reduces dependencies on specific
+hardware or config options (making the test easier to run), and protects the
+stability of the rest of the system (making it less likely for test-specific
+state to interfere with the rest of the system).
+
+While for some code (typically generic data structures, helpers, and other
+"pure functions") this is trivial, for others (like device drivers,
+filesystems, core subsystems) the code is heavily coupled with other parts of
+the kernel.
+
+This coupling is often due to global state in some way: be it a global list of
+devices, the filesystem, or some hardware state. Tests need to either carefully
+manage, isolate, and restore state, or they can avoid it altogether by
+replacing access to and mutation of this state with a "fake" or "mock" variant.
+
+By refactoring access to such state, such as by introducing a layer of
+indirection which can use or emulate a separate set of test state. However,
+such refactoring comes with its own costs (and undertaking significant
+refactoring before being able to write tests is suboptimal).
+
+A simpler way to intercept and replace some of the function calls is to use
+function redirection via static stubs.
+
+
+Static Stubs
+============
+
+Static stubs are a way of redirecting calls to one function (the "real"
+function) to another function (the "replacement" function).
+
+It works by adding a macro to the "real" function which checks to see if a test
+is running, and if a replacement function is available. If so, that function is
+called in place of the original.
+
+Using static stubs is pretty straightforward:
+
+1. Add the KUNIT_STATIC_STUB_REDIRECT() macro to the start of the "real"
+ function.
+
+ This should be the first statement in the function, after any variable
+ declarations. KUNIT_STATIC_STUB_REDIRECT() takes the name of the
+ function, followed by all of the arguments passed to the real function.
+
+ For example:
+
+ .. code-block:: c
+
+ void send_data_to_hardware(const char *str)
+ {
+ KUNIT_STATIC_STUB_REDIRECT(send_data_to_hardware, str);
+ /* real implementation */
+ }
+
+2. Write one or more replacement functions.
+
+ These functions should have the same function signature as the real function.
+ In the event they need to access or modify test-specific state, they can use
+ kunit_get_current_test() to get a struct kunit pointer. This can then
+ be passed to the expectation/assertion macros, or used to look up KUnit
+ resources.
+
+ For example:
+
+ .. code-block:: c
+
+ void fake_send_data_to_hardware(const char *str)
+ {
+ struct kunit *test = kunit_get_current_test();
+ KUNIT_EXPECT_STREQ(test, str, "Hello World!");
+ }
+
+3. Activate the static stub from your test.
+
+ From within a test, the redirection can be enabled with
+ kunit_activate_static_stub(), which accepts a struct kunit pointer,
+ the real function, and the replacement function. You can call this several
+ times with different replacement functions to swap out implementations of the
+ function.
+
+ In our example, this would be
+
+ .. code-block:: c
+
+ kunit_activate_static_stub(test,
+ send_data_to_hardware,
+ fake_send_data_to_hardware);
+
+4. Call (perhaps indirectly) the real function.
+
+ Once the redirection is activated, any call to the real function will call
+ the replacement function instead. Such calls may be buried deep in the
+ implementation of another function, but must occur from the test's kthread.
+
+ For example:
+
+ .. code-block:: c
+
+ send_data_to_hardware("Hello World!"); /* Succeeds */
+ send_data_to_hardware("Something else"); /* Fails the test. */
+
+5. (Optionally) disable the stub.
+
+ When you no longer need it, disable the redirection (and hence resume the
+ original behaviour of the 'real' function) using
+ kunit_deactivate_static_stub(). Otherwise, it will be automatically disabled
+ when the test exits.
+
+ For example:
+
+ .. code-block:: c
+
+ kunit_deactivate_static_stub(test, send_data_to_hardware);
+
+
+It's also possible to use these replacement functions to test to see if a
+function is called at all, for example:
+
+.. code-block:: c
+
+ void send_data_to_hardware(const char *str)
+ {
+ KUNIT_STATIC_STUB_REDIRECT(send_data_to_hardware, str);
+ /* real implementation */
+ }
+
+ /* In test file */
+ int times_called = 0;
+ void fake_send_data_to_hardware(const char *str)
+ {
+ times_called++;
+ }
+ ...
+ /* In the test case, redirect calls for the duration of the test */
+ kunit_activate_static_stub(test, send_data_to_hardware, fake_send_data_to_hardware);
+
+ send_data_to_hardware("hello");
+ KUNIT_EXPECT_EQ(test, times_called, 1);
+
+ /* Can also deactivate the stub early, if wanted */
+ kunit_deactivate_static_stub(test, send_data_to_hardware);
+
+ send_data_to_hardware("hello again");
+ KUNIT_EXPECT_EQ(test, times_called, 1);
+
+
+
+API Reference
+=============
+
+.. kernel-doc:: include/kunit/static_stub.h
+ :internal:
diff --git a/Documentation/dev-tools/kunit/api/index.rst b/Documentation/dev-tools/kunit/api/index.rst
new file mode 100644
index 000000000000..5cdb552a0808
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/index.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+API Reference
+=============
+.. toctree::
+ :hidden:
+
+ test
+ resource
+ functionredirection
+ clk
+ of
+ platformdevice
+
+
+This page documents the KUnit kernel testing API. It is divided into the
+following sections:
+
+Core KUnit API
+==============
+
+Documentation/dev-tools/kunit/api/test.rst
+
+ - Documents all of the standard testing API
+
+Documentation/dev-tools/kunit/api/resource.rst
+
+ - Documents the KUnit resource API
+
+Documentation/dev-tools/kunit/api/functionredirection.rst
+
+ - Documents the KUnit Function Redirection API
+
+Driver KUnit API
+================
+
+Documentation/dev-tools/kunit/api/clk.rst
+
+ - Documents the KUnit clk API
+
+Documentation/dev-tools/kunit/api/of.rst
+
+ - Documents the KUnit device tree (OF) API
+
+Documentation/dev-tools/kunit/api/platformdevice.rst
+
+ - Documents the KUnit platform device API
diff --git a/Documentation/dev-tools/kunit/api/of.rst b/Documentation/dev-tools/kunit/api/of.rst
new file mode 100644
index 000000000000..cb4193dcddbb
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/of.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Device Tree (OF) API
+====================
+
+The KUnit device tree API is used to test device tree (of_*) dependent code.
+
+.. kernel-doc:: include/kunit/of.h
+ :internal:
+
+.. kernel-doc:: drivers/of/of_kunit_helpers.c
+ :export:
diff --git a/Documentation/dev-tools/kunit/api/platformdevice.rst b/Documentation/dev-tools/kunit/api/platformdevice.rst
new file mode 100644
index 000000000000..49ddd5729003
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/platformdevice.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+Platform Device API
+===================
+
+The KUnit platform device API is used to test platform devices.
+
+.. kernel-doc:: lib/kunit/platform.c
+ :export:
diff --git a/Documentation/dev-tools/kunit/api/resource.rst b/Documentation/dev-tools/kunit/api/resource.rst
new file mode 100644
index 000000000000..ec6002a6b0db
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/resource.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Resource API
+============
+
+This file documents the KUnit resource API.
+
+Most users won't need to use this API directly, power users can use it to store
+state on a per-test basis, register custom cleanup actions, and more.
+
+.. kernel-doc:: include/kunit/resource.h
+ :internal:
+
+Managed Devices
+---------------
+
+Functions for using KUnit-managed struct device and struct device_driver.
+Include ``kunit/device.h`` to use these.
+
+.. kernel-doc:: include/kunit/device.h
+ :internal:
diff --git a/Documentation/dev-tools/kunit/api/test.rst b/Documentation/dev-tools/kunit/api/test.rst
new file mode 100644
index 000000000000..c5eca423e8b6
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/test.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+Test API
+========
+
+This file documents all of the standard testing API.
+
+.. kernel-doc:: include/kunit/test.h
+ :internal:
diff --git a/Documentation/dev-tools/kunit/architecture.rst b/Documentation/dev-tools/kunit/architecture.rst
new file mode 100644
index 000000000000..f335f883f8f6
--- /dev/null
+++ b/Documentation/dev-tools/kunit/architecture.rst
@@ -0,0 +1,196 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+KUnit Architecture
+==================
+
+The KUnit architecture is divided into two parts:
+
+- `In-Kernel Testing Framework`_
+- `kunit_tool (Command-line Test Harness)`_
+
+In-Kernel Testing Framework
+===========================
+
+The kernel testing library supports KUnit tests written in C using
+KUnit. These KUnit tests are kernel code. KUnit performs the following
+tasks:
+
+- Organizes tests
+- Reports test results
+- Provides test utilities
+
+Test Cases
+----------
+
+The test case is the fundamental unit in KUnit. KUnit test cases are organised
+into suites. A KUnit test case is a function with type signature
+``void (*)(struct kunit *test)``. These test case functions are wrapped in a
+struct called struct kunit_case.
+
+.. note:
+ ``generate_params`` is optional for non-parameterized tests.
+
+Each KUnit test case receives a ``struct kunit`` context object that tracks a
+running test. The KUnit assertion macros and other KUnit utilities use the
+``struct kunit`` context object. As an exception, there are two fields:
+
+- ``->priv``: The setup functions can use it to store arbitrary test
+ user data.
+
+- ``->param_value``: It contains the parameter value which can be
+ retrieved in the parameterized tests.
+
+Test Suites
+-----------
+
+A KUnit suite includes a collection of test cases. The KUnit suites
+are represented by the ``struct kunit_suite``. For example:
+
+.. code-block:: c
+
+ static struct kunit_case example_test_cases[] = {
+ KUNIT_CASE(example_test_foo),
+ KUNIT_CASE(example_test_bar),
+ KUNIT_CASE(example_test_baz),
+ {}
+ };
+
+ static struct kunit_suite example_test_suite = {
+ .name = "example",
+ .init = example_test_init,
+ .exit = example_test_exit,
+ .test_cases = example_test_cases,
+ };
+ kunit_test_suite(example_test_suite);
+
+In the above example, the test suite ``example_test_suite``, runs the
+test cases ``example_test_foo``, ``example_test_bar``, and
+``example_test_baz``. Before running the test, the ``example_test_init``
+is called and after running the test, ``example_test_exit`` is called.
+The ``kunit_test_suite(example_test_suite)`` registers the test suite
+with the KUnit test framework.
+
+Executor
+--------
+
+The KUnit executor can list and run built-in KUnit tests on boot.
+The Test suites are stored in a linker section
+called ``.kunit_test_suites``. For the code, see ``KUNIT_TABLE()`` macro
+definition in
+`include/asm-generic/vmlinux.lds.h <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/asm-generic/vmlinux.lds.h?h=v6.0#n950>`_.
+The linker section consists of an array of pointers to
+``struct kunit_suite``, and is populated by the ``kunit_test_suites()``
+macro. The KUnit executor iterates over the linker section array in order to
+run all the tests that are compiled into the kernel.
+
+.. kernel-figure:: kunit_suitememorydiagram.svg
+ :alt: KUnit Suite Memory
+
+ KUnit Suite Memory Diagram
+
+On the kernel boot, the KUnit executor uses the start and end addresses
+of this section to iterate over and run all tests. For the implementation of the
+executor, see
+`lib/kunit/executor.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/executor.c>`_.
+When built as a module, the ``kunit_test_suites()`` macro defines a
+``module_init()`` function, which runs all the tests in the compilation
+unit instead of utilizing the executor.
+
+In KUnit tests, some error classes do not affect other tests
+or parts of the kernel, each KUnit case executes in a separate thread
+context. See the ``kunit_try_catch_run()`` function in
+`lib/kunit/try-catch.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/try-catch.c?h=v5.15#n58>`_.
+
+Assertion Macros
+----------------
+
+KUnit tests verify state using expectations/assertions.
+All expectations/assertions are formatted as:
+``KUNIT_{EXPECT|ASSERT}_<op>[_MSG](kunit, property[, message])``
+
+- ``{EXPECT|ASSERT}`` determines whether the check is an assertion or an
+ expectation.
+ In the event of a failure, the testing flow differs as follows:
+
+ - For expectations, the test is marked as failed and the failure is logged.
+
+ - Failing assertions, on the other hand, result in the test case being
+ terminated immediately.
+
+ - Assertions call the function:
+ ``void __noreturn __kunit_abort(struct kunit *)``.
+
+ - ``__kunit_abort`` calls the function:
+ ``void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)``.
+
+ - ``kunit_try_catch_throw`` calls the function:
+ ``void kthread_complete_and_exit(struct completion *, long) __noreturn;``
+ and terminates the special thread context.
+
+- ``<op>`` denotes a check with options: ``TRUE`` (supplied property
+ has the boolean value "true"), ``EQ`` (two supplied properties are
+ equal), ``NOT_ERR_OR_NULL`` (supplied pointer is not null and does not
+ contain an "err" value).
+
+- ``[_MSG]`` prints a custom message on failure.
+
+Test Result Reporting
+---------------------
+KUnit prints the test results in KTAP format. KTAP is based on TAP14, see
+Documentation/dev-tools/ktap.rst.
+KTAP works with KUnit and Kselftest. The KUnit executor prints KTAP results to
+dmesg, and debugfs (if configured).
+
+Parameterized Tests
+-------------------
+
+Each KUnit parameterized test is associated with a collection of
+parameters. The test is invoked multiple times, once for each parameter
+value and the parameter is stored in the ``param_value`` field.
+The test case includes a KUNIT_CASE_PARAM() macro that accepts a
+generator function. The generator function is passed the previous parameter
+and returns the next parameter. It also includes a macro for generating
+array-based common-case generators.
+
+kunit_tool (Command-line Test Harness)
+======================================
+
+``kunit_tool`` is a Python script, found in ``tools/testing/kunit/kunit.py``. It
+is used to configure, build, execute, parse test results and run all of the
+previous commands in correct order (i.e., configure, build, execute and parse).
+You have two options for running KUnit tests: either build the kernel with KUnit
+enabled and manually parse the results (see
+Documentation/dev-tools/kunit/run_manual.rst) or use ``kunit_tool``
+(see Documentation/dev-tools/kunit/run_wrapper.rst).
+
+- ``configure`` command generates the kernel ``.config`` from a
+ ``.kunitconfig`` file (and any architecture-specific options).
+ The Python scripts available in ``qemu_configs`` folder
+ (for example, ``tools/testing/kunit/qemu configs/powerpc.py``) contains
+ additional configuration options for specific architectures.
+ It parses both the existing ``.config`` and the ``.kunitconfig`` files
+ to ensure that ``.config`` is a superset of ``.kunitconfig``.
+ If not, it will combine the two and run ``make olddefconfig`` to regenerate
+ the ``.config`` file. It then checks to see if ``.config`` has become a superset.
+ This verifies that all the Kconfig dependencies are correctly specified in the
+ file ``.kunitconfig``. The ``kunit_config.py`` script contains the code for parsing
+ Kconfigs. The code which runs ``make olddefconfig`` is part of the
+ ``kunit_kernel.py`` script. You can invoke this command through:
+ ``./tools/testing/kunit/kunit.py config`` and
+ generate a ``.config`` file.
+- ``build`` runs ``make`` on the kernel tree with required options
+ (depends on the architecture and some options, for example: build_dir)
+ and reports any errors.
+ To build a KUnit kernel from the current ``.config``, you can use the
+ ``build`` argument: ``./tools/testing/kunit/kunit.py build``.
+- ``exec`` command executes kernel results either directly (using
+ User-mode Linux configuration), or through an emulator such
+ as QEMU. It reads results from the log using standard
+ output (stdout), and passes them to ``parse`` to be parsed.
+ If you already have built a kernel with built-in KUnit tests,
+ you can run the kernel and display the test results with the ``exec``
+ argument: ``./tools/testing/kunit/kunit.py exec``.
+- ``parse`` extracts the KTAP output from a kernel log, parses
+ the test results, and prints a summary. For failed tests, any
+ diagnostic output will be included.
diff --git a/Documentation/dev-tools/kunit/faq.rst b/Documentation/dev-tools/kunit/faq.rst
new file mode 100644
index 000000000000..fae426f2634a
--- /dev/null
+++ b/Documentation/dev-tools/kunit/faq.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Frequently Asked Questions
+==========================
+
+How is this different from Autotest, kselftest, and so on?
+==========================================================
+KUnit is a unit testing framework. Autotest, kselftest (and some others) are
+not.
+
+A `unit test <https://martinfowler.com/bliki/UnitTest.html>`_ is supposed to
+test a single unit of code in isolation and hence the name *unit test*. A unit
+test should be the finest granularity of testing and should allow all possible
+code paths to be tested in the code under test. This is only possible if the
+code under test is small and does not have any external dependencies outside of
+the test's control like hardware.
+
+There are no testing frameworks currently available for the kernel that do not
+require installing the kernel on a test machine or in a virtual machine. All
+testing frameworks require tests to be written in userspace and run on the
+kernel under test. This is true for Autotest, kselftest, and some others,
+disqualifying any of them from being considered unit testing frameworks.
+
+Does KUnit support running on architectures other than UML?
+===========================================================
+
+Yes, mostly.
+
+For the most part, the KUnit core framework (what we use to write the tests)
+can compile to any architecture. It compiles like just another part of the
+kernel and runs when the kernel boots, or when built as a module, when the
+module is loaded. However, there is infrastructure, like the KUnit Wrapper
+(``tools/testing/kunit/kunit.py``) that might not support some architectures
+(see :ref:`kunit-on-qemu`).
+
+In short, yes, you can run KUnit on other architectures, but it might require
+more work than using KUnit on UML.
+
+For more information, see :ref:`kunit-on-non-uml`.
+
+.. _kinds-of-tests:
+
+What is the difference between a unit test and other kinds of tests?
+====================================================================
+Most existing tests for the Linux kernel would be categorized as an integration
+test, or an end-to-end test.
+
+- A unit test is supposed to test a single unit of code in isolation. A unit
+ test should be the finest granularity of testing and, as such, allows all
+ possible code paths to be tested in the code under test. This is only possible
+ if the code under test is small and does not have any external dependencies
+ outside of the test's control like hardware.
+- An integration test tests the interaction between a minimal set of components,
+ usually just two or three. For example, someone might write an integration
+ test to test the interaction between a driver and a piece of hardware, or to
+ test the interaction between the userspace libraries the kernel provides and
+ the kernel itself. However, one of these tests would probably not test the
+ entire kernel along with hardware interactions and interactions with the
+ userspace.
+- An end-to-end test usually tests the entire system from the perspective of the
+ code under test. For example, someone might write an end-to-end test for the
+ kernel by installing a production configuration of the kernel on production
+ hardware with a production userspace and then trying to exercise some behavior
+ that depends on interactions between the hardware, the kernel, and userspace.
+
+KUnit is not working, what should I do?
+=======================================
+
+Unfortunately, there are a number of things which can break, but here are some
+things to try.
+
+1. Run ``./tools/testing/kunit/kunit.py run`` with the ``--raw_output``
+ parameter. This might show details or error messages hidden by the kunit_tool
+ parser.
+2. Instead of running ``kunit.py run``, try running ``kunit.py config``,
+ ``kunit.py build``, and ``kunit.py exec`` independently. This can help track
+ down where an issue is occurring. (If you think the parser is at fault, you
+ can run it manually against ``stdin`` or a file with ``kunit.py parse``.)
+3. Running the UML kernel directly can often reveal issues or error messages,
+ ``kunit_tool`` ignores. This should be as simple as running ``./vmlinux``
+ after building the UML kernel (for example, by using ``kunit.py build``).
+ Note that UML has some unusual requirements (such as the host having a tmpfs
+ filesystem mounted), and has had issues in the past when built statically and
+ the host has KASLR enabled. (On older host kernels, you may need to run
+ ``setarch `uname -m` -R ./vmlinux`` to disable KASLR.)
+4. Make sure the kernel .config has ``CONFIG_KUNIT=y`` and at least one test
+ (e.g. ``CONFIG_KUNIT_EXAMPLE_TEST=y``). kunit_tool will keep its .config
+ around, so you can see what config was used after running ``kunit.py run``.
+ It also preserves any config changes you might make, so you can
+ enable/disable things with ``make ARCH=um menuconfig`` or similar, and then
+ re-run kunit_tool.
+5. Try to run ``make ARCH=um defconfig`` before running ``kunit.py run``. This
+ may help clean up any residual config items which could be causing problems.
+6. Finally, try running KUnit outside UML. KUnit and KUnit tests can be
+ built into any kernel, or can be built as a module and loaded at runtime.
+ Doing so should allow you to determine if UML is causing the issue you're
+ seeing. When tests are built-in, they will execute when the kernel boots, and
+ modules will automatically execute associated tests when loaded. Test results
+ can be collected from ``/sys/kernel/debug/kunit/<test suite>/results``, and
+ can be parsed with ``kunit.py parse``. For more details, see :ref:`kunit-on-qemu`.
+
+If none of the above tricks help, you are always welcome to email any issues to
+kunit-dev@googlegroups.com.
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
new file mode 100644
index 000000000000..b3593ae29ace
--- /dev/null
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
+KUnit - Linux Kernel Unit Testing
+=================================
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ start
+ architecture
+ run_wrapper
+ run_manual
+ usage
+ api/index
+ style
+ faq
+ running_tips
+
+This section details the kernel unit testing framework.
+
+Introduction
+============
+
+KUnit (Kernel unit testing framework) provides a common framework for
+unit tests within the Linux kernel. Using KUnit, you can define groups
+of test cases called test suites. The tests either run on kernel boot
+if built-in, or load as a module. KUnit automatically flags and reports
+failed test cases in the kernel log. The test results appear in
+:doc:`KTAP (Kernel - Test Anything Protocol) format</dev-tools/ktap>`.
+It is inspired by JUnit, Python’s unittest.mock, and GoogleTest/GoogleMock
+(C++ unit testing framework).
+
+KUnit tests are part of the kernel, written in the C (programming)
+language, and test parts of the Kernel implementation (example: a C
+language function). Excluding build time, from invocation to
+completion, KUnit can run around 100 tests in less than 10 seconds.
+KUnit can test any kernel component, for example: file system, system
+calls, memory management, device drivers and so on.
+
+KUnit follows the white-box testing approach. The test has access to
+internal system functionality. KUnit runs in kernel space and is not
+restricted to things exposed to user-space.
+
+In addition, KUnit has kunit_tool, a script (``tools/testing/kunit/kunit.py``)
+that configures the Linux kernel, runs KUnit tests under QEMU or UML
+(:doc:`User Mode Linux </virt/uml/user_mode_linux_howto_v2>`),
+parses the test results and
+displays them in a user friendly manner.
+
+Features
+--------
+
+- Provides a framework for writing unit tests.
+- Runs tests on any kernel architecture.
+- Runs a test in milliseconds.
+
+Prerequisites
+-------------
+
+- Any Linux kernel compatible hardware.
+- For Kernel under test, Linux kernel version 5.5 or greater.
+
+Unit Testing
+============
+
+A unit test tests a single unit of code in isolation. A unit test is the finest
+granularity of testing and allows all possible code paths to be tested in the
+code under test. This is possible if the code under test is small and does not
+have any external dependencies outside of the test's control like hardware.
+
+
+Write Unit Tests
+----------------
+
+To write good unit tests, there is a simple but powerful pattern:
+Arrange-Act-Assert. This is a great way to structure test cases and
+defines an order of operations.
+
+- Arrange inputs and targets: At the start of the test, arrange the data
+ that allows a function to work. Example: initialize a statement or
+ object.
+- Act on the target behavior: Call your function/code under test.
+- Assert expected outcome: Verify that the result (or resulting state) is as
+ expected.
+
+Unit Testing Advantages
+-----------------------
+
+- Increases testing speed and development in the long run.
+- Detects bugs at initial stage and therefore decreases bug fix cost
+ compared to acceptance testing.
+- Improves code quality.
+- Encourages writing testable code.
+
+Read also :ref:`kinds-of-tests`.
+
+How do I use it?
+================
+
+You can find a step-by-step guide to writing and running KUnit tests in
+Documentation/dev-tools/kunit/start.rst
+
+Alternatively, feel free to look through the rest of the KUnit documentation,
+or to experiment with tools/testing/kunit/kunit.py and the example test under
+lib/kunit/kunit-example-test.c
+
+Happy testing!
diff --git a/Documentation/dev-tools/kunit/kunit_suitememorydiagram.svg b/Documentation/dev-tools/kunit/kunit_suitememorydiagram.svg
new file mode 100644
index 000000000000..cf8fddc27500
--- /dev/null
+++ b/Documentation/dev-tools/kunit/kunit_suitememorydiagram.svg
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="796.93" height="555.73" version="1.1" viewBox="0 0 796.93 555.73" xmlns="http://www.w3.org/2000/svg">
+ <g transform="translate(-13.724 -17.943)">
+ <g fill="#dad4d4" fill-opacity=".91765" stroke="#1a1a1a">
+ <rect x="323.56" y="18.443" width="115.75" height="41.331"/>
+ <rect x="323.56" y="463.09" width="115.75" height="41.331"/>
+ <rect x="323.56" y="531.84" width="115.75" height="41.331"/>
+ <rect x="323.56" y="88.931" width="115.75" height="74.231"/>
+ </g>
+ <g>
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(0 -258.6)">
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(0 -217.27)">
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(0 -175.94)">
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(0 -134.61)">
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(0 -41.331)">
+ <rect x="323.56" y="421.76" width="115.75" height="41.331" fill="#b9dbc6" stroke="#1a1a1a"/>
+ <text x="328.00888" y="446.61826" fill="#000000" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="328.00888" y="446.61826" font-family="monospace" font-size="16px">kunit_suite</tspan></text>
+ </g>
+ <g transform="translate(3.4459e-5 -.71088)">
+ <rect x="502.19" y="143.16" width="201.13" height="41.331" fill="#dad4d4" fill-opacity=".91765" stroke="#1a1a1a"/>
+ <text x="512.02319" y="168.02026" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="512.02319" y="168.02026" font-family="monospace">_kunit_suites_start</tspan></text>
+ </g>
+ <g transform="translate(3.0518e-5 -3.1753)">
+ <rect x="502.19" y="445.69" width="201.13" height="41.331" fill="#dad4d4" fill-opacity=".91765" stroke="#1a1a1a"/>
+ <text x="521.61694" y="470.54846" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="521.61694" y="470.54846" font-family="monospace">_kunit_suites_end</tspan></text>
+ </g>
+ <rect x="14.224" y="277.78" width="134.47" height="41.331" fill="#dad4d4" fill-opacity=".91765" stroke="#1a1a1a"/>
+ <text x="32.062176" y="304.41287" font-family="sans-serif" font-size="16px" style="line-height:1.25" xml:space="preserve"><tspan x="32.062176" y="304.41287" font-family="monospace">.init.data</tspan></text>
+ <g transform="translate(217.98 145.12)" stroke="#1a1a1a">
+ <circle cx="149.97" cy="373.01" r="3.4012"/>
+ <circle cx="163.46" cy="373.01" r="3.4012"/>
+ <circle cx="176.95" cy="373.01" r="3.4012"/>
+ </g>
+ <g transform="translate(217.98 -298.66)" stroke="#1a1a1a">
+ <circle cx="149.97" cy="373.01" r="3.4012"/>
+ <circle cx="163.46" cy="373.01" r="3.4012"/>
+ <circle cx="176.95" cy="373.01" r="3.4012"/>
+ </g>
+ <g stroke="#1a1a1a">
+ <rect x="323.56" y="328.49" width="115.75" height="51.549" fill="#b9dbc6"/>
+ <g transform="translate(217.98 -18.75)">
+ <circle cx="149.97" cy="373.01" r="3.4012"/>
+ <circle cx="163.46" cy="373.01" r="3.4012"/>
+ <circle cx="176.95" cy="373.01" r="3.4012"/>
+ </g>
+ </g>
+ <g transform="scale(1.0933 .9147)" stroke-width="32.937" aria-label="{">
+ <path d="m275.49 545.57c-35.836-8.432-47.43-24.769-47.957-64.821v-88.536c-0.527-44.795-10.54-57.97-49.538-67.456 38.998-10.013 49.011-23.715 49.538-67.983v-88.536c0.527-40.052 12.121-56.389 47.957-64.821v-5.797c-65.348 0-85.901 17.391-86.955 73.253v93.806c-0.527 36.89-10.013 50.065-44.795 59.551 34.782 10.013 44.268 23.188 44.795 60.078v93.279c1.581 56.389 21.607 73.78 86.955 73.78z"/>
+ </g>
+ <g transform="scale(1.1071 .90325)" stroke-width="14.44" aria-label="{">
+ <path d="m461.46 443.55c-15.711-3.6967-20.794-10.859-21.025-28.418v-38.815c-0.23104-19.639-4.6209-25.415-21.718-29.574 17.097-4.3898 21.487-10.397 21.718-29.805v-38.815c0.23105-17.559 5.314-24.722 21.025-28.418v-2.5415c-28.649 0-37.66 7.6244-38.122 32.115v41.126c-0.23105 16.173-4.3898 21.949-19.639 26.108 15.249 4.3898 19.408 10.166 19.639 26.339v40.895c0.69313 24.722 9.4728 32.346 38.122 32.346z"/>
+ </g>
+ <path d="m449.55 161.84v2.5h49.504v-2.5z" color="#000000" style="-inkscape-stroke:none"/>
+ <g fill-rule="evenodd">
+ <path d="m443.78 163.09 8.65-5v10z" color="#000000" stroke-width="1pt" style="-inkscape-stroke:none"/>
+ <path d="m453.1 156.94-10.648 6.1543 0.99804 0.57812 9.6504 5.5781zm-1.334 2.3125v7.6856l-6.6504-3.8438z" color="#000000" style="-inkscape-stroke:none"/>
+ </g>
+ <path d="m449.55 461.91v2.5h49.504v-2.5z" color="#000000" style="-inkscape-stroke:none"/>
+ <g fill-rule="evenodd">
+ <path d="m443.78 463.16 8.65-5v10z" color="#000000" stroke-width="1pt" style="-inkscape-stroke:none"/>
+ <path d="m453.1 457-10.648 6.1562 0.99804 0.57617 9.6504 5.5781zm-1.334 2.3125v7.6856l-6.6504-3.8438z" color="#000000" style="-inkscape-stroke:none"/>
+ </g>
+ <rect x="515.64" y="223.9" width="294.52" height="178.49" fill="#dad4d4" fill-opacity=".91765" stroke="#1a1a1a"/>
+ <text x="523.33319" y="262.52542" font-family="monospace" font-size="14.667px" style="line-height:1.25" xml:space="preserve"><tspan x="523.33319" y="262.52542"><tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">struct</tspan> kunit_suite {</tspan><tspan x="523.33319" y="280.8588"><tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold"> const char</tspan> name[<tspan fill="#ff00ff" font-size="14.667px">256</tspan>];</tspan><tspan x="523.33319" y="299.19217"> <tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">int</tspan> (*init)(<tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">struct</tspan> kunit *);</tspan><tspan x="523.33319" y="317.52554"> <tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">void</tspan> (*exit)(<tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">struct</tspan> kunit *);</tspan><tspan x="523.33319" y="335.85892"> <tspan fill="#008000" font-family="monospace" font-size="14.667px" font-weight="bold">struct</tspan> kunit_case *test_cases;</tspan><tspan x="523.33319" y="354.19229"> ...</tspan><tspan x="523.33319" y="372.52567">};</tspan></text>
+ </g>
+</svg>
diff --git a/Documentation/dev-tools/kunit/run_manual.rst b/Documentation/dev-tools/kunit/run_manual.rst
new file mode 100644
index 000000000000..699d92885075
--- /dev/null
+++ b/Documentation/dev-tools/kunit/run_manual.rst
@@ -0,0 +1,100 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Run Tests without kunit_tool
+============================
+
+If we do not want to use kunit_tool (For example: we want to integrate
+with other systems, or run tests on real hardware), we can
+include KUnit in any kernel, read out results, and parse manually.
+
+.. note:: KUnit is not designed for use in a production system. It is
+ possible that tests may reduce the stability or security of
+ the system.
+
+Configure the Kernel
+====================
+
+KUnit tests can run without kunit_tool. This can be useful, if:
+
+- We have an existing kernel configuration to test.
+- Need to run on real hardware (or using an emulator/VM kunit_tool
+ does not support).
+- Wish to integrate with some existing testing systems.
+
+KUnit is configured with the ``CONFIG_KUNIT`` option, and individual
+tests can also be built by enabling their config options in our
+``.config``. KUnit tests usually (but don't always) have config options
+ending in ``_KUNIT_TEST``. Most tests can either be built as a module,
+or be built into the kernel.
+
+.. note ::
+
+ We can enable the ``KUNIT_ALL_TESTS`` config option to
+ automatically enable all tests with satisfied dependencies. This is
+ a good way of quickly testing everything applicable to the current
+ config.
+
+Once we have built our kernel (and/or modules), it is simple to run
+the tests. If the tests are built-in, they will run automatically on the
+kernel boot. The results will be written to the kernel log (``dmesg``)
+in TAP format.
+
+If the tests are built as modules, they will run when the module is
+loaded.
+
+.. code-block :: bash
+
+ # modprobe example-test
+
+The results will appear in TAP format in ``dmesg``.
+
+debugfs
+=======
+
+KUnit can be accessed from userspace via the debugfs filesystem (See more
+information about debugfs at Documentation/filesystems/debugfs.rst).
+
+If ``CONFIG_KUNIT_DEBUGFS`` is enabled, the KUnit debugfs filesystem is
+mounted at /sys/kernel/debug/kunit. You can use this filesystem to perform
+the following actions.
+
+Retrieve Test Results
+=====================
+
+You can use debugfs to retrieve KUnit test results. The test results are
+accessible from the debugfs filesystem in the following read-only file:
+
+.. code-block :: bash
+
+ /sys/kernel/debug/kunit/<test_suite>/results
+
+The test results are printed in a KTAP document. Note this document is separate
+to the kernel log and thus, may have different test suite numbering.
+
+Run Tests After Kernel Has Booted
+=================================
+
+You can use the debugfs filesystem to trigger built-in tests to run after
+boot. To run the test suite, you can use the following command to write to
+the ``/sys/kernel/debug/kunit/<test_suite>/run`` file:
+
+.. code-block :: bash
+
+ echo "any string" > /sys/kernel/debugfs/kunit/<test_suite>/run
+
+As a result, the test suite runs and the results are printed to the kernel
+log.
+
+However, this feature is not available with KUnit suites that use init data,
+because init data may have been discarded after the kernel boots. KUnit
+suites that use init data should be defined using the
+kunit_test_init_section_suites() macro.
+
+Also, you cannot use this feature to run tests concurrently. Instead a test
+will wait to run until other tests have completed or failed.
+
+.. note ::
+
+ For test authors, to use this feature, tests will need to correctly initialise
+ and/or clean up any data, so the test runs correctly a second time.
diff --git a/Documentation/dev-tools/kunit/run_wrapper.rst b/Documentation/dev-tools/kunit/run_wrapper.rst
new file mode 100644
index 000000000000..6697c71ee8ca
--- /dev/null
+++ b/Documentation/dev-tools/kunit/run_wrapper.rst
@@ -0,0 +1,337 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+Running tests with kunit_tool
+=============================
+
+We can either run KUnit tests using kunit_tool or can run tests
+manually, and then use kunit_tool to parse the results. To run tests
+manually, see: Documentation/dev-tools/kunit/run_manual.rst.
+As long as we can build the kernel, we can run KUnit.
+
+kunit_tool is a Python script which configures and builds a kernel, runs
+tests, and formats the test results.
+
+Run command:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run
+
+We should see the following:
+
+.. code-block::
+
+ Configuring KUnit Kernel ...
+ Building KUnit kernel...
+ Starting KUnit kernel...
+
+We may want to use the following options:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all`
+
+- ``--timeout`` sets a maximum amount of time for tests to run.
+- ``--jobs`` sets the number of threads to build the kernel.
+
+kunit_tool will generate a ``.kunitconfig`` with a default
+configuration, if no other ``.kunitconfig`` file exists
+(in the build directory). In addition, it verifies that the
+generated ``.config`` file contains the ``CONFIG`` options in the
+``.kunitconfig``.
+It is also possible to pass a separate ``.kunitconfig`` fragment to
+kunit_tool. This is useful if we have several different groups of
+tests we want to run independently, or if we want to use pre-defined
+test configs for certain subsystems.
+
+To use a different ``.kunitconfig`` file (such as one
+provided to test a particular subsystem), pass it as an option:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run --kunitconfig=fs/ext4/.kunitconfig
+
+To view kunit_tool flags (optional command-line arguments), run:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run --help
+
+Creating a ``.kunitconfig`` file
+================================
+
+If we want to run a specific set of tests (rather than those listed
+in the KUnit ``defconfig``), we can provide Kconfig options in the
+``.kunitconfig`` file. For default .kunitconfig, see:
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/kunit/configs/default.config.
+A ``.kunitconfig`` is a ``minconfig`` (a .config
+generated by running ``make savedefconfig``), used for running a
+specific set of tests. This file contains the regular Kernel configs
+with specific test targets. The ``.kunitconfig`` also
+contains any other config options required by the tests (For example:
+dependencies for features under tests, configs that enable/disable
+certain code blocks, arch configs and so on).
+
+To create a ``.kunitconfig``, using the KUnit ``defconfig``:
+
+.. code-block::
+
+ cd $PATH_TO_LINUX_REPO
+ cp tools/testing/kunit/configs/default.config .kunit/.kunitconfig
+
+We can then add any other Kconfig options. For example:
+
+.. code-block::
+
+ CONFIG_LIST_KUNIT_TEST=y
+
+kunit_tool ensures that all config options in ``.kunitconfig`` are
+set in the kernel ``.config`` before running the tests. It warns if we
+have not included the options dependencies.
+
+.. note:: Removing something from the ``.kunitconfig`` will
+ not rebuild the ``.config file``. The configuration is only
+ updated if the ``.kunitconfig`` is not a subset of ``.config``.
+ This means that we can use other tools
+ (For example: ``make menuconfig``) to adjust other config options.
+ The build dir needs to be set for ``make menuconfig`` to
+ work, therefore by default use ``make O=.kunit menuconfig``.
+
+Configuring, building, and running tests
+========================================
+
+If we want to make manual changes to the KUnit build process, we
+can run part of the KUnit build process independently.
+When running kunit_tool, from a ``.kunitconfig``, we can generate a
+``.config`` by using the ``config`` argument:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py config
+
+To build a KUnit kernel from the current ``.config``, we can use the
+``build`` argument:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py build
+
+If we already have built UML kernel with built-in KUnit tests, we
+can run the kernel, and display the test results with the ``exec``
+argument:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py exec
+
+The ``run`` command discussed in section: **Running tests with kunit_tool**,
+is equivalent to running the above three commands in sequence.
+
+Parsing test results
+====================
+
+KUnit tests output displays results in TAP (Test Anything Protocol)
+format. When running tests, kunit_tool parses this output and prints
+a summary. To see the raw test results in TAP format, we can pass the
+``--raw_output`` argument:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run --raw_output
+
+If we have KUnit results in the raw TAP format, we can parse them and
+print the human-readable summary with the ``parse`` command for
+kunit_tool. This accepts a filename for an argument, or will read from
+standard input.
+
+.. code-block:: bash
+
+ # Reading from a file
+ ./tools/testing/kunit/kunit.py parse /var/log/dmesg
+ # Reading from stdin
+ dmesg | ./tools/testing/kunit/kunit.py parse
+
+Filtering tests
+===============
+
+By passing a bash style glob filter to the ``exec`` or ``run``
+commands, we can run a subset of the tests built into a kernel . For
+example: if we only want to run KUnit resource tests, use:
+
+.. code-block::
+
+ ./tools/testing/kunit/kunit.py run 'kunit-resource*'
+
+This uses the standard glob format with wildcard characters.
+
+.. _kunit-on-qemu:
+
+Running tests on QEMU
+=====================
+
+kunit_tool supports running tests on qemu as well as
+via UML. To run tests on qemu, by default it requires two flags:
+
+- ``--arch``: Selects a configs collection (Kconfig, qemu config options
+ and so on), that allow KUnit tests to be run on the specified
+ architecture in a minimal way. The architecture argument is same as
+ the option name passed to the ``ARCH`` variable used by Kbuild.
+ Not all architectures currently support this flag, but we can use
+ ``--qemu_config`` to handle it. If ``um`` is passed (or this flag
+ is ignored), the tests will run via UML. Non-UML architectures,
+ for example: i386, x86_64, arm and so on; run on qemu.
+
+ ``--arch help`` lists all valid ``--arch`` values.
+
+- ``--cross_compile``: Specifies the Kbuild toolchain. It passes the
+ same argument as passed to the ``CROSS_COMPILE`` variable used by
+ Kbuild. As a reminder, this will be the prefix for the toolchain
+ binaries such as GCC. For example:
+
+ - ``sparc64-linux-gnu`` if we have the sparc toolchain installed on
+ our system.
+
+ - ``$HOME/toolchains/microblaze/gcc-9.2.0-nolibc/microblaze-linux/bin/microblaze-linux``
+ if we have downloaded the microblaze toolchain from the 0-day
+ website to a directory in our home directory called toolchains.
+
+This means that for most architectures, running under qemu is as simple as:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run --arch=x86_64
+
+When cross-compiling, we'll likely need to specify a different toolchain, for
+example:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run \
+ --arch=s390 \
+ --cross_compile=s390x-linux-gnu-
+
+If we want to run KUnit tests on an architecture not supported by
+the ``--arch`` flag, or want to run KUnit tests on qemu using a
+non-default configuration; then we can write our own``QemuConfig``.
+These ``QemuConfigs`` are written in Python. They have an import line
+``from..qemu_config import QemuArchParams`` at the top of the file.
+The file must contain a variable called ``QEMU_ARCH`` that has an
+instance of ``QemuArchParams`` assigned to it. See example in:
+``tools/testing/kunit/qemu_configs/x86_64.py``.
+
+Once we have a ``QemuConfig``, we can pass it into kunit_tool,
+using the ``--qemu_config`` flag. When used, this flag replaces the
+``--arch`` flag. For example: using
+``tools/testing/kunit/qemu_configs/x86_64.py``, the invocation appear
+as
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run \
+ --timeout=60 \
+ --jobs=12 \
+ --qemu_config=./tools/testing/kunit/qemu_configs/x86_64.py
+
+Running command-line arguments
+==============================
+
+kunit_tool has a number of other command-line arguments which can
+be useful for our test environment. Below are the most commonly used
+command line arguments:
+
+- ``--help``: Lists all available options. To list common options,
+ place ``--help`` before the command. To list options specific to that
+ command, place ``--help`` after the command.
+
+ .. note:: Different commands (``config``, ``build``, ``run``, etc)
+ have different supported options.
+- ``--build_dir``: Specifies kunit_tool build directory. It includes
+ the ``.kunitconfig``, ``.config`` files and compiled kernel.
+
+- ``--make_options``: Specifies additional options to pass to make, when
+ compiling a kernel (using ``build`` or ``run`` commands). For example:
+ to enable compiler warnings, we can pass ``--make_options W=1``.
+
+- ``--alltests``: Enable a predefined set of options in order to build
+ as many tests as possible.
+
+ .. note:: The list of enabled options can be found in
+ ``tools/testing/kunit/configs/all_tests.config``.
+
+ If you only want to enable all tests with otherwise satisfied
+ dependencies, instead add ``CONFIG_KUNIT_ALL_TESTS=y`` to your
+ ``.kunitconfig``.
+
+- ``--kunitconfig``: Specifies the path or the directory of the ``.kunitconfig``
+ file. For example:
+
+ - ``lib/kunit/.kunitconfig`` can be the path of the file.
+
+ - ``lib/kunit`` can be the directory in which the file is located.
+
+ This file is used to build and run with a predefined set of tests
+ and their dependencies. For example, to run tests for a given subsystem.
+
+- ``--kconfig_add``: Specifies additional configuration options to be
+ appended to the ``.kunitconfig`` file. For example:
+
+ .. code-block::
+
+ ./tools/testing/kunit/kunit.py run --kconfig_add CONFIG_KASAN=y
+
+- ``--arch``: Runs tests on the specified architecture. The architecture
+ argument is same as the Kbuild ARCH environment variable.
+ For example, i386, x86_64, arm, um, etc. Non-UML architectures run on qemu.
+ Default is `um`.
+
+- ``--cross_compile``: Specifies the Kbuild toolchain. It passes the
+ same argument as passed to the ``CROSS_COMPILE`` variable used by
+ Kbuild. This will be the prefix for the toolchain
+ binaries such as GCC. For example:
+
+ - ``sparc64-linux-gnu-`` if we have the sparc toolchain installed on
+ our system.
+
+ - ``$HOME/toolchains/microblaze/gcc-9.2.0-nolibc/microblaze-linux/bin/microblaze-linux``
+ if we have downloaded the microblaze toolchain from the 0-day
+ website to a specified path in our home directory called toolchains.
+
+- ``--qemu_config``: Specifies the path to a file containing a
+ custom qemu architecture definition. This should be a python file
+ containing a `QemuArchParams` object.
+
+- ``--qemu_args``: Specifies additional qemu arguments, for example, ``-smp 8``.
+
+- ``--jobs``: Specifies the number of jobs (commands) to run simultaneously.
+ By default, this is set to the number of cores on your system.
+
+- ``--timeout``: Specifies the maximum number of seconds allowed for all tests to run.
+ This does not include the time taken to build the tests.
+
+- ``--kernel_args``: Specifies additional kernel command-line arguments. May be repeated.
+
+- ``--run_isolated``: If set, boots the kernel for each individual suite/test.
+ This is useful for debugging a non-hermetic test, one that
+ might pass/fail based on what ran before it.
+
+- ``--raw_output``: If set, generates unformatted output from kernel. Possible options are:
+
+ - ``all``: To view the full kernel output, use ``--raw_output=all``.
+
+ - ``kunit``: This is the default option and filters to KUnit output. Use ``--raw_output`` or ``--raw_output=kunit``.
+
+- ``--json``: If set, stores the test results in a JSON format and prints to `stdout` or
+ saves to a file if a filename is specified.
+
+- ``--filter``: Specifies filters on test attributes, for example, ``speed!=slow``.
+ Multiple filters can be used by wrapping input in quotes and separating filters
+ by commas. Example: ``--filter "speed>slow, module=example"``.
+
+- ``--filter_action``: If set to ``skip``, filtered tests will be shown as skipped
+ in the output rather than showing no output.
+
+- ``--list_tests``: If set, lists all tests that will be run.
+
+- ``--list_tests_attr``: If set, lists all tests that will be run and all of their
+ attributes.
diff --git a/Documentation/dev-tools/kunit/running_tips.rst b/Documentation/dev-tools/kunit/running_tips.rst
new file mode 100644
index 000000000000..bd689db6fdd2
--- /dev/null
+++ b/Documentation/dev-tools/kunit/running_tips.rst
@@ -0,0 +1,448 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Tips For Running KUnit Tests
+============================
+
+Using ``kunit.py run`` ("kunit tool")
+=====================================
+
+Running from any directory
+--------------------------
+
+It can be handy to create a bash function like:
+
+.. code-block:: bash
+
+ function run_kunit() {
+ ( cd "$(git rev-parse --show-toplevel)" && ./tools/testing/kunit/kunit.py run "$@" )
+ }
+
+.. note::
+ Early versions of ``kunit.py`` (before 5.6) didn't work unless run from
+ the kernel root, hence the use of a subshell and ``cd``.
+
+Running a subset of tests
+-------------------------
+
+``kunit.py run`` accepts an optional glob argument to filter tests. The format
+is ``"<suite_glob>[.test_glob]"``.
+
+Say that we wanted to run the sysctl tests, we could do so via:
+
+.. code-block:: bash
+
+ $ echo -e 'CONFIG_KUNIT=y\nCONFIG_KUNIT_ALL_TESTS=y' > .kunit/.kunitconfig
+ $ ./tools/testing/kunit/kunit.py run 'sysctl*'
+
+We can filter down to just the "write" tests via:
+
+.. code-block:: bash
+
+ $ echo -e 'CONFIG_KUNIT=y\nCONFIG_KUNIT_ALL_TESTS=y' > .kunit/.kunitconfig
+ $ ./tools/testing/kunit/kunit.py run 'sysctl*.*write*'
+
+We're paying the cost of building more tests than we need this way, but it's
+easier than fiddling with ``.kunitconfig`` files or commenting out
+``kunit_suite``'s.
+
+However, if we wanted to define a set of tests in a less ad hoc way, the next
+tip is useful.
+
+Defining a set of tests
+-----------------------
+
+``kunit.py run`` (along with ``build``, and ``config``) supports a
+``--kunitconfig`` flag. So if you have a set of tests that you want to run on a
+regular basis (especially if they have other dependencies), you can create a
+specific ``.kunitconfig`` for them.
+
+E.g. kunit has one for its tests:
+
+.. code-block:: bash
+
+ $ ./tools/testing/kunit/kunit.py run --kunitconfig=lib/kunit/.kunitconfig
+
+Alternatively, if you're following the convention of naming your
+file ``.kunitconfig``, you can just pass in the dir, e.g.
+
+.. code-block:: bash
+
+ $ ./tools/testing/kunit/kunit.py run --kunitconfig=lib/kunit
+
+.. note::
+ This is a relatively new feature (5.12+) so we don't have any
+ conventions yet about on what files should be checked in versus just
+ kept around locally. It's up to you and your maintainer to decide if a
+ config is useful enough to submit (and therefore have to maintain).
+
+.. note::
+ Having ``.kunitconfig`` fragments in a parent and child directory is
+ iffy. There's discussion about adding an "import" statement in these
+ files to make it possible to have a top-level config run tests from all
+ child directories. But that would mean ``.kunitconfig`` files are no
+ longer just simple .config fragments.
+
+ One alternative would be to have kunit tool recursively combine configs
+ automagically, but tests could theoretically depend on incompatible
+ options, so handling that would be tricky.
+
+Setting kernel commandline parameters
+-------------------------------------
+
+You can use ``--kernel_args`` to pass arbitrary kernel arguments, e.g.
+
+.. code-block:: bash
+
+ $ ./tools/testing/kunit/kunit.py run --kernel_args=param=42 --kernel_args=param2=false
+
+
+Generating code coverage reports under UML
+------------------------------------------
+
+.. note::
+ TODO(brendanhiggins@google.com): There are various issues with UML and
+ versions of gcc 7 and up. You're likely to run into missing ``.gcda``
+ files or compile errors.
+
+This is different from the "normal" way of getting coverage information that is
+documented in Documentation/dev-tools/gcov.rst.
+
+Instead of enabling ``CONFIG_GCOV_KERNEL=y``, we can set these options:
+
+.. code-block:: none
+
+ CONFIG_DEBUG_KERNEL=y
+ CONFIG_DEBUG_INFO=y
+ CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_GCOV=y
+
+
+Putting it together into a copy-pastable sequence of commands:
+
+.. code-block:: bash
+
+ # Append coverage options to the current config
+ $ ./tools/testing/kunit/kunit.py run --kunitconfig=.kunit/ --kunitconfig=tools/testing/kunit/configs/coverage_uml.config
+ # Extract the coverage information from the build dir (.kunit/)
+ $ lcov -t "my_kunit_tests" -o coverage.info -c -d .kunit/
+
+ # From here on, it's the same process as with CONFIG_GCOV_KERNEL=y
+ # E.g. can generate an HTML report in a tmp dir like so:
+ $ genhtml -o /tmp/coverage_html coverage.info
+
+
+If your installed version of gcc doesn't work, you can tweak the steps:
+
+.. code-block:: bash
+
+ $ ./tools/testing/kunit/kunit.py run --make_options=CC=/usr/bin/gcc-6
+ $ lcov -t "my_kunit_tests" -o coverage.info -c -d .kunit/ --gcov-tool=/usr/bin/gcov-6
+
+Alternatively, LLVM-based toolchains can also be used:
+
+.. code-block:: bash
+
+ # Build with LLVM and append coverage options to the current config
+ $ ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --kunitconfig=.kunit/ --kunitconfig=tools/testing/kunit/configs/coverage_uml.config
+ $ llvm-profdata merge -sparse default.profraw -o default.profdata
+ $ llvm-cov export --format=lcov .kunit/vmlinux -instr-profile default.profdata > coverage.info
+ # The coverage.info file is in lcov-compatible format and it can be used to e.g. generate HTML report
+ $ genhtml -o /tmp/coverage_html coverage.info
+
+
+Running tests manually
+======================
+
+Running tests without using ``kunit.py run`` is also an important use case.
+Currently it's your only option if you want to test on architectures other than
+UML.
+
+As running the tests under UML is fairly straightforward (configure and compile
+the kernel, run the ``./linux`` binary), this section will focus on testing
+non-UML architectures.
+
+
+Running built-in tests
+----------------------
+
+When setting tests to ``=y``, the tests will run as part of boot and print
+results to dmesg in TAP format. So you just need to add your tests to your
+``.config``, build and boot your kernel as normal.
+
+So if we compiled our kernel with:
+
+.. code-block:: none
+
+ CONFIG_KUNIT=y
+ CONFIG_KUNIT_EXAMPLE_TEST=y
+
+Then we'd see output like this in dmesg signaling the test ran and passed:
+
+.. code-block:: none
+
+ TAP version 14
+ 1..1
+ # Subtest: example
+ 1..1
+ # example_simple_test: initializing
+ ok 1 - example_simple_test
+ ok 1 - example
+
+Running tests as modules
+------------------------
+
+Depending on the tests, you can build them as loadable modules.
+
+For example, we'd change the config options from before to
+
+.. code-block:: none
+
+ CONFIG_KUNIT=y
+ CONFIG_KUNIT_EXAMPLE_TEST=m
+
+Then after booting into our kernel, we can run the test via
+
+.. code-block:: none
+
+ $ modprobe kunit-example-test
+
+This will then cause it to print TAP output to stdout.
+
+.. note::
+ The ``modprobe`` will *not* have a non-zero exit code if any test
+ failed (as of 5.13). But ``kunit.py parse`` would, see below.
+
+.. note::
+ You can set ``CONFIG_KUNIT=m`` as well, however, some features will not
+ work and thus some tests might break. Ideally tests would specify they
+ depend on ``KUNIT=y`` in their ``Kconfig``'s, but this is an edge case
+ most test authors won't think about.
+ As of 5.13, the only difference is that ``current->kunit_test`` will
+ not exist.
+
+Pretty-printing results
+-----------------------
+
+You can use ``kunit.py parse`` to parse dmesg for test output and print out
+results in the same familiar format that ``kunit.py run`` does.
+
+.. code-block:: bash
+
+ $ ./tools/testing/kunit/kunit.py parse /var/log/dmesg
+
+
+Retrieving per suite results
+----------------------------
+
+Regardless of how you're running your tests, you can enable
+``CONFIG_KUNIT_DEBUGFS`` to expose per-suite TAP-formatted results:
+
+.. code-block:: none
+
+ CONFIG_KUNIT=y
+ CONFIG_KUNIT_EXAMPLE_TEST=m
+ CONFIG_KUNIT_DEBUGFS=y
+
+The results for each suite will be exposed under
+``/sys/kernel/debug/kunit/<suite>/results``.
+So using our example config:
+
+.. code-block:: bash
+
+ $ modprobe kunit-example-test > /dev/null
+ $ cat /sys/kernel/debug/kunit/example/results
+ ... <TAP output> ...
+
+ # After removing the module, the corresponding files will go away
+ $ modprobe -r kunit-example-test
+ $ cat /sys/kernel/debug/kunit/example/results
+ /sys/kernel/debug/kunit/example/results: No such file or directory
+
+Generating code coverage reports
+--------------------------------
+
+See Documentation/dev-tools/gcov.rst for details on how to do this.
+
+The only vaguely KUnit-specific advice here is that you probably want to build
+your tests as modules. That way you can isolate the coverage from tests from
+other code executed during boot, e.g.
+
+.. code-block:: bash
+
+ # Reset coverage counters before running the test.
+ $ echo 0 > /sys/kernel/debug/gcov/reset
+ $ modprobe kunit-example-test
+
+
+Test Attributes and Filtering
+=============================
+
+Test suites and cases can be marked with test attributes, such as speed of
+test. These attributes will later be printed in test output and can be used to
+filter test execution.
+
+Marking Test Attributes
+-----------------------
+
+Tests are marked with an attribute by including a ``kunit_attributes`` object
+in the test definition.
+
+Test cases can be marked using the ``KUNIT_CASE_ATTR(test_name, attributes)``
+macro to define the test case instead of ``KUNIT_CASE(test_name)``.
+
+.. code-block:: c
+
+ static const struct kunit_attributes example_attr = {
+ .speed = KUNIT_VERY_SLOW,
+ };
+
+ static struct kunit_case example_test_cases[] = {
+ KUNIT_CASE_ATTR(example_test, example_attr),
+ };
+
+.. note::
+ To mark a test case as slow, you can also use ``KUNIT_CASE_SLOW(test_name)``.
+ This is a helpful macro as the slow attribute is the most commonly used.
+
+Test suites can be marked with an attribute by setting the "attr" field in the
+suite definition.
+
+.. code-block:: c
+
+ static const struct kunit_attributes example_attr = {
+ .speed = KUNIT_VERY_SLOW,
+ };
+
+ static struct kunit_suite example_test_suite = {
+ ...,
+ .attr = example_attr,
+ };
+
+.. note::
+ Not all attributes need to be set in a ``kunit_attributes`` object. Unset
+ attributes will remain uninitialized and act as though the attribute is set
+ to 0 or NULL. Thus, if an attribute is set to 0, it is treated as unset.
+ These unset attributes will not be reported and may act as a default value
+ for filtering purposes.
+
+Reporting Attributes
+--------------------
+
+When a user runs tests, attributes will be present in the raw kernel output (in
+KTAP format). Note that attributes will be hidden by default in kunit.py output
+for all passing tests but the raw kernel output can be accessed using the
+``--raw_output`` flag. This is an example of how test attributes for test cases
+will be formatted in kernel output:
+
+.. code-block:: none
+
+ # example_test.speed: slow
+ ok 1 example_test
+
+This is an example of how test attributes for test suites will be formatted in
+kernel output:
+
+.. code-block:: none
+
+ KTAP version 2
+ # Subtest: example_suite
+ # module: kunit_example_test
+ 1..3
+ ...
+ ok 1 example_suite
+
+Additionally, users can output a full attribute report of tests with their
+attributes, using the command line flag ``--list_tests_attr``:
+
+.. code-block:: bash
+
+ kunit.py run "example" --list_tests_attr
+
+.. note::
+ This report can be accessed when running KUnit manually by passing in the
+ module_param ``kunit.action=list_attr``.
+
+Filtering
+---------
+
+Users can filter tests using the ``--filter`` command line flag when running
+tests. As an example:
+
+.. code-block:: bash
+
+ kunit.py run --filter speed=slow
+
+
+You can also use the following operations on filters: "<", ">", "<=", ">=",
+"!=", and "=". Example:
+
+.. code-block:: bash
+
+ kunit.py run --filter "speed>slow"
+
+This example will run all tests with speeds faster than slow. Note that the
+characters < and > are often interpreted by the shell, so they may need to be
+quoted or escaped, as above.
+
+Additionally, you can use multiple filters at once. Simply separate filters
+using commas. Example:
+
+.. code-block:: bash
+
+ kunit.py run --filter "speed>slow, module=kunit_example_test"
+
+.. note::
+ You can use this filtering feature when running KUnit manually by passing
+ the filter as a module param: ``kunit.filter="speed>slow, speed<=normal"``.
+
+Filtered tests will not run or show up in the test output. You can use the
+``--filter_action=skip`` flag to skip filtered tests instead. These tests will be
+shown in the test output in the test but will not run. To use this feature when
+running KUnit manually, use the module param ``kunit.filter_action=skip``.
+
+Rules of Filtering Procedure
+----------------------------
+
+Since both suites and test cases can have attributes, there may be conflicts
+between attributes during filtering. The process of filtering follows these
+rules:
+
+- Filtering always operates at a per-test level.
+
+- If a test has an attribute set, then the test's value is filtered on.
+
+- Otherwise, the value falls back to the suite's value.
+
+- If neither are set, the attribute has a global "default" value, which is used.
+
+List of Current Attributes
+--------------------------
+
+``speed``
+
+This attribute indicates the speed of a test's execution (how slow or fast the
+test is).
+
+This attribute is saved as an enum with the following categories: "normal",
+"slow", or "very_slow". The assumed default speed for tests is "normal". This
+indicates that the test takes a relatively trivial amount of time (less than
+1 second), regardless of the machine it is running on. Any test slower than
+this could be marked as "slow" or "very_slow".
+
+The macro ``KUNIT_CASE_SLOW(test_name)`` can be easily used to set the speed
+of a test case to "slow".
+
+``module``
+
+This attribute indicates the name of the module associated with the test.
+
+This attribute is automatically saved as a string and is printed for each suite.
+Tests can also be filtered using this attribute.
+
+``is_init``
+
+This attribute indicates whether the test uses init data or functions.
+
+This attribute is automatically saved as a boolean and tests can also be
+filtered using this attribute.
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
new file mode 100644
index 000000000000..a98235326bab
--- /dev/null
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -0,0 +1,309 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Getting Started
+===============
+
+This page contains an overview of the kunit_tool and KUnit framework,
+teaching how to run existing tests and then how to write a simple test case,
+and covers common problems users face when using KUnit for the first time.
+
+Installing Dependencies
+=======================
+KUnit has the same dependencies as the Linux kernel. As long as you can
+build the kernel, you can run KUnit.
+
+Running tests with kunit_tool
+=============================
+kunit_tool is a Python script, which configures and builds a kernel, runs
+tests, and formats the test results. From the kernel repository, you
+can run kunit_tool:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run
+
+.. note ::
+ You may see the following error:
+ "The source tree is not clean, please run 'make ARCH=um mrproper'"
+
+ This happens because internally kunit.py specifies ``.kunit``
+ (default option) as the build directory in the command ``make O=output/dir``
+ through the argument ``--build_dir``. Hence, before starting an
+ out-of-tree build, the source tree must be clean.
+
+ There is also the same caveat mentioned in the "Build directory for
+ the kernel" section of the :doc:`admin-guide </admin-guide/README>`,
+ that is, its use, it must be used for all invocations of ``make``.
+ The good news is that it can indeed be solved by running
+ ``make ARCH=um mrproper``, just be aware that this will delete the
+ current configuration and all generated files.
+
+If everything worked correctly, you should see the following:
+
+.. code-block::
+
+ Configuring KUnit Kernel ...
+ Building KUnit Kernel ...
+ Starting KUnit Kernel ...
+
+The tests will pass or fail.
+
+.. note ::
+ Because it is building a lot of sources for the first time,
+ the ``Building KUnit Kernel`` step may take a while.
+
+For detailed information on this wrapper, see:
+Documentation/dev-tools/kunit/run_wrapper.rst.
+
+Selecting which tests to run
+----------------------------
+
+By default, kunit_tool runs all tests reachable with minimal configuration,
+that is, using default values for most of the kconfig options. However,
+you can select which tests to run by:
+
+- `Customizing Kconfig`_ used to compile the kernel, or
+- `Filtering tests by name`_ to select specifically which compiled tests to run.
+
+Customizing Kconfig
+~~~~~~~~~~~~~~~~~~~
+A good starting point for the ``.kunitconfig`` is the KUnit default config.
+If you didn't run ``kunit.py run`` yet, you can generate it by running:
+
+.. code-block:: bash
+
+ cd $PATH_TO_LINUX_REPO
+ tools/testing/kunit/kunit.py config
+ cat .kunit/.kunitconfig
+
+.. note ::
+ ``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is
+ ``.kunit`` by default.
+
+Before running the tests, kunit_tool ensures that all config options
+set in ``.kunitconfig`` are set in the kernel ``.config``. It will warn
+you if you have not included dependencies for the options used.
+
+There are many ways to customize the configurations:
+
+a. Edit ``.kunit/.kunitconfig``. The file should contain the list of kconfig
+ options required to run the desired tests, including their dependencies.
+ You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as
+ it will enable a number of additional tests that you may not want.
+ If you need to run on an architecture other than UML see :ref:`kunit-on-qemu`.
+
+b. Enable additional kconfig options on top of ``.kunit/.kunitconfig``.
+ For example, to include the kernel's linked-list test you can run::
+
+ ./tools/testing/kunit/kunit.py run \
+ --kconfig_add CONFIG_LIST_KUNIT_TEST=y
+
+c. Provide the path of one or more .kunitconfig files from the tree.
+ For example, to run only ``FAT_FS`` and ``EXT4`` tests you can run::
+
+ ./tools/testing/kunit/kunit.py run \
+ --kunitconfig ./fs/fat/.kunitconfig \
+ --kunitconfig ./fs/ext4/.kunitconfig
+
+d. If you change the ``.kunitconfig``, kunit.py will trigger a rebuild of the
+ ``.config`` file. But you can edit the ``.config`` file directly or with
+ tools like ``make menuconfig O=.kunit``. As long as its a superset of
+ ``.kunitconfig``, kunit.py won't overwrite your changes.
+
+
+.. note ::
+
+ To save a .kunitconfig after finding a satisfactory configuration::
+
+ make savedefconfig O=.kunit
+ cp .kunit/defconfig .kunit/.kunitconfig
+
+Filtering tests by name
+~~~~~~~~~~~~~~~~~~~~~~~
+If you want to be more specific than Kconfig can provide, it is also possible
+to select which tests to execute at boot-time by passing a glob filter
+(read instructions regarding the pattern in the manpage :manpage:`glob(7)`).
+If there is a ``"."`` (period) in the filter, it will be interpreted as a
+separator between the name of the test suite and the test case,
+otherwise, it will be interpreted as the name of the test suite.
+For example, let's assume we are using the default config:
+
+a. inform the name of a test suite, like ``"kunit_executor_test"``,
+ to run every test case it contains::
+
+ ./tools/testing/kunit/kunit.py run "kunit_executor_test"
+
+b. inform the name of a test case prefixed by its test suite,
+ like ``"example.example_simple_test"``, to run specifically that test case::
+
+ ./tools/testing/kunit/kunit.py run "example.example_simple_test"
+
+c. use wildcard characters (``*?[``) to run any test case that matches the pattern,
+ like ``"*.*64*"`` to run test cases containing ``"64"`` in the name inside
+ any test suite::
+
+ ./tools/testing/kunit/kunit.py run "*.*64*"
+
+Running Tests without the KUnit Wrapper
+=======================================
+If you do not want to use the KUnit Wrapper (for example: you want code
+under test to integrate with other systems, or use a different/
+unsupported architecture or configuration), KUnit can be included in
+any kernel, and the results are read out and parsed manually.
+
+.. note ::
+ ``CONFIG_KUNIT`` should not be enabled in a production environment.
+ Enabling KUnit disables Kernel Address-Space Layout Randomization
+ (KASLR), and tests may affect the state of the kernel in ways not
+ suitable for production.
+
+Configuring the Kernel
+----------------------
+To enable KUnit itself, you need to enable the ``CONFIG_KUNIT`` Kconfig
+option (under Kernel Hacking/Kernel Testing and Coverage in
+``menuconfig``). From there, you can enable any KUnit tests. They
+usually have config options ending in ``_KUNIT_TEST``.
+
+KUnit and KUnit tests can be compiled as modules. The tests in a module
+will run when the module is loaded.
+
+Running Tests (without KUnit Wrapper)
+-------------------------------------
+Build and run your kernel. In the kernel log, the test output is printed
+out in the TAP format. This will only happen by default if KUnit/tests
+are built-in. Otherwise the module will need to be loaded.
+
+.. note ::
+ Some lines and/or data may get interspersed in the TAP output.
+
+Writing Your First Test
+=======================
+In your kernel repository, let's add some code that we can test.
+
+1. Create a file ``drivers/misc/example.h``, which includes:
+
+.. code-block:: c
+
+ int misc_example_add(int left, int right);
+
+2. Create a file ``drivers/misc/example.c``, which includes:
+
+.. code-block:: c
+
+ #include <linux/errno.h>
+
+ #include "example.h"
+
+ int misc_example_add(int left, int right)
+ {
+ return left + right;
+ }
+
+3. Add the following lines to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+ config MISC_EXAMPLE
+ bool "My example"
+
+4. Add the following lines to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+ obj-$(CONFIG_MISC_EXAMPLE) += example.o
+
+Now we are ready to write the test cases.
+
+1. Add the below test case in ``drivers/misc/example_test.c``:
+
+.. code-block:: c
+
+ #include <kunit/test.h>
+ #include "example.h"
+
+ /* Define the test cases. */
+
+ static void misc_example_add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, misc_example_add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, misc_example_add(1, 1));
+ KUNIT_EXPECT_EQ(test, 0, misc_example_add(-1, 1));
+ KUNIT_EXPECT_EQ(test, INT_MAX, misc_example_add(0, INT_MAX));
+ KUNIT_EXPECT_EQ(test, -1, misc_example_add(INT_MAX, INT_MIN));
+ }
+
+ static void misc_example_test_failure(struct kunit *test)
+ {
+ KUNIT_FAIL(test, "This test never passes.");
+ }
+
+ static struct kunit_case misc_example_test_cases[] = {
+ KUNIT_CASE(misc_example_add_test_basic),
+ KUNIT_CASE(misc_example_test_failure),
+ {}
+ };
+
+ static struct kunit_suite misc_example_test_suite = {
+ .name = "misc-example",
+ .test_cases = misc_example_test_cases,
+ };
+ kunit_test_suite(misc_example_test_suite);
+
+ MODULE_LICENSE("GPL");
+
+2. Add the following lines to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+ config MISC_EXAMPLE_TEST
+ tristate "Test for my example" if !KUNIT_ALL_TESTS
+ depends on MISC_EXAMPLE && KUNIT
+ default KUNIT_ALL_TESTS
+
+Note: If your test does not support being built as a loadable module (which is
+discouraged), replace tristate by bool, and depend on KUNIT=y instead of KUNIT.
+
+3. Add the following lines to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+ obj-$(CONFIG_MISC_EXAMPLE_TEST) += example_test.o
+
+4. Add the following lines to ``.kunit/.kunitconfig``:
+
+.. code-block:: none
+
+ CONFIG_MISC_EXAMPLE=y
+ CONFIG_MISC_EXAMPLE_TEST=y
+
+5. Run the test:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run
+
+You should see the following failure:
+
+.. code-block:: none
+
+ ...
+ [16:08:57] [PASSED] misc-example:misc_example_add_test_basic
+ [16:08:57] [FAILED] misc-example:misc_example_test_failure
+ [16:08:57] EXPECTATION FAILED at drivers/misc/example-test.c:17
+ [16:08:57] This test never passes.
+ ...
+
+Congrats! You just wrote your first KUnit test.
+
+Next Steps
+==========
+
+If you're interested in using some of the more advanced features of kunit.py,
+take a look at Documentation/dev-tools/kunit/run_wrapper.rst
+
+If you'd like to run tests without using kunit.py, check out
+Documentation/dev-tools/kunit/run_manual.rst
+
+For more information on writing KUnit tests (including some common techniques
+for testing different things), see Documentation/dev-tools/kunit/usage.rst
diff --git a/Documentation/dev-tools/kunit/style.rst b/Documentation/dev-tools/kunit/style.rst
new file mode 100644
index 000000000000..eac81a714a29
--- /dev/null
+++ b/Documentation/dev-tools/kunit/style.rst
@@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Test Style and Nomenclature
+===========================
+
+To make finding, writing, and using KUnit tests as simple as possible, it is
+strongly encouraged that they are named and written according to the guidelines
+below. While it is possible to write KUnit tests which do not follow these rules,
+they may break some tooling, may conflict with other tests, and may not be run
+automatically by testing systems.
+
+It is recommended that you only deviate from these guidelines when:
+
+1. Porting tests to KUnit which are already known with an existing name.
+2. Writing tests which would cause serious problems if automatically run. For
+ example, non-deterministically producing false positives or negatives, or
+ taking a long time to run.
+
+Subsystems, Suites, and Tests
+=============================
+
+To make tests easy to find, they are grouped into suites and subsystems. A test
+suite is a group of tests which test a related area of the kernel. A subsystem
+is a set of test suites which test different parts of a kernel subsystem
+or a driver.
+
+Subsystems
+----------
+
+Every test suite must belong to a subsystem. A subsystem is a collection of one
+or more KUnit test suites which test the same driver or part of the kernel. A
+test subsystem should match a single kernel module. If the code being tested
+cannot be compiled as a module, in many cases the subsystem should correspond to
+a directory in the source tree or an entry in the ``MAINTAINERS`` file. If
+unsure, follow the conventions set by tests in similar areas.
+
+Test subsystems should be named after the code being tested, either after the
+module (wherever possible), or after the directory or files being tested. Test
+subsystems should be named to avoid ambiguity where necessary.
+
+If a test subsystem name has multiple components, they should be separated by
+underscores. *Do not* include "test" or "kunit" directly in the subsystem name
+unless we are actually testing other tests or the kunit framework itself. For
+example, subsystems could be called:
+
+``ext4``
+ Matches the module and filesystem name.
+``apparmor``
+ Matches the module name and LSM name.
+``kasan``
+ Common name for the tool, prominent part of the path ``mm/kasan``
+``snd_hda_codec_hdmi``
+ Has several components (``snd``, ``hda``, ``codec``, ``hdmi``) separated by
+ underscores. Matches the module name.
+
+Avoid names as shown in examples below:
+
+``linear-ranges``
+ Names should use underscores, not dashes, to separate words. Prefer
+ ``linear_ranges``.
+``qos-kunit-test``
+ This name should use underscores, and not have "kunit-test" as a
+ suffix. ``qos`` is also ambiguous as a subsystem name, because several parts
+ of the kernel have a ``qos`` subsystem. ``power_qos`` would be a better name.
+``pc_parallel_port``
+ The corresponding module name is ``parport_pc``, so this subsystem should also
+ be named ``parport_pc``.
+
+.. note::
+ The KUnit API and tools do not explicitly know about subsystems. They are
+ a way of categorizing test suites and naming modules which provides a
+ simple, consistent way for humans to find and run tests. This may change
+ in the future.
+
+Suites
+------
+
+KUnit tests are grouped into test suites, which cover a specific area of
+functionality being tested. Test suites can have shared initialization and
+shutdown code which is run for all tests in the suite. Not all subsystems need
+to be split into multiple test suites (for example, simple drivers).
+
+Test suites are named after the subsystem they are part of. If a subsystem
+contains several suites, the specific area under test should be appended to the
+subsystem name, separated by an underscore.
+
+In the event that there are multiple types of test using KUnit within a
+subsystem (for example, both unit tests and integration tests), they should be
+put into separate suites, with the type of test as the last element in the suite
+name. Unless these tests are actually present, avoid using ``_test``, ``_unittest``
+or similar in the suite name.
+
+The full test suite name (including the subsystem name) should be specified as
+the ``.name`` member of the ``kunit_suite`` struct, and forms the base for the
+module name. For example, test suites could include:
+
+``ext4_inode``
+ Part of the ``ext4`` subsystem, testing the ``inode`` area.
+``kunit_try_catch``
+ Part of the ``kunit`` implementation itself, testing the ``try_catch`` area.
+``apparmor_property_entry``
+ Part of the ``apparmor`` subsystem, testing the ``property_entry`` area.
+``kasan``
+ The ``kasan`` subsystem has only one suite, so the suite name is the same as
+ the subsystem name.
+
+Avoid names, for example:
+
+``ext4_ext4_inode``
+ There is no reason to state the subsystem twice.
+``property_entry``
+ The suite name is ambiguous without the subsystem name.
+``kasan_integration_test``
+ Because there is only one suite in the ``kasan`` subsystem, the suite should
+ just be called as ``kasan``. Do not redundantly add
+ ``integration_test``. It should be a separate test suite. For example, if the
+ unit tests are added, then that suite could be named as ``kasan_unittest`` or
+ similar.
+
+Test Cases
+----------
+
+Individual tests consist of a single function which tests a constrained
+codepath, property, or function. In the test output, an individual test's
+results will show up as subtests of the suite's results.
+
+Tests should be named after what they are testing. This is often the name of the
+function being tested, with a description of the input or codepath being tested.
+As tests are C functions, they should be named and written in accordance with
+the kernel coding style.
+
+.. note::
+ As tests are themselves functions, their names cannot conflict with
+ other C identifiers in the kernel. This may require some creative
+ naming. It is a good idea to make your test functions `static` to avoid
+ polluting the global namespace.
+
+Example test names include:
+
+``unpack_u32_with_null_name``
+ Tests the ``unpack_u32`` function when a NULL name is passed in.
+``test_list_splice``
+ Tests the ``list_splice`` macro. It has the prefix ``test_`` to avoid a
+ name conflict with the macro itself.
+
+
+Should it be necessary to refer to a test outside the context of its test suite,
+the *fully-qualified* name of a test should be the suite name followed by the
+test name, separated by a colon (i.e. ``suite:test``).
+
+Test Kconfig Entries
+====================
+
+Every test suite should be tied to a Kconfig entry.
+
+This Kconfig entry must:
+
+* be named ``CONFIG_<name>_KUNIT_TEST``: where <name> is the name of the test
+ suite.
+* be listed either alongside the config entries for the driver/subsystem being
+ tested, or be under [Kernel Hacking]->[Kernel Testing and Coverage]
+* depend on ``CONFIG_KUNIT``.
+* be visible only if ``CONFIG_KUNIT_ALL_TESTS`` is not enabled.
+* have a default value of ``CONFIG_KUNIT_ALL_TESTS``.
+* have a brief description of KUnit in the help text.
+
+If we are not able to meet above conditions (for example, the test is unable to
+be built as a module), Kconfig entries for tests should be tristate.
+
+For example, a Kconfig entry might look like:
+
+.. code-block:: none
+
+ config FOO_KUNIT_TEST
+ tristate "KUnit test for foo" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds unit tests for foo.
+
+ For more information on KUnit and unit tests in general,
+ please refer to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
+
+Test File and Module Names
+==========================
+
+KUnit tests are often compiled as a separate module. To avoid conflicting
+with regular modules, KUnit modules should be named after the test suite,
+followed by ``_kunit`` (e.g. if "foobar" is the core module, then
+"foobar_kunit" is the KUnit test module).
+
+Test source files, whether compiled as a separate module or an
+``#include`` in another source file, are best kept in a ``tests/``
+subdirectory to not conflict with other source files (e.g. for
+tab-completion).
+
+Note that the ``_test`` suffix has also been used in some existing
+tests. The ``_kunit`` suffix is preferred, as it makes the distinction
+between KUnit and non-KUnit tests clearer.
+
+So for the common case, name the file containing the test suite
+``tests/<suite>_kunit.c``. The ``tests`` directory should be placed at
+the same level as the code under test. For example, tests for
+``lib/string.c`` live in ``lib/tests/string_kunit.c``.
+
+If the suite name contains some or all of the name of the test's parent
+directory, it may make sense to modify the source filename to reduce
+redundancy. For example, a ``foo_firmware`` suite could be in the
+``foo/tests/firmware_kunit.c`` file.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
new file mode 100644
index 000000000000..ebd06f5ea455
--- /dev/null
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -0,0 +1,1214 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Writing Tests
+=============
+
+Test Cases
+----------
+
+The fundamental unit in KUnit is the test case. A test case is a function with
+the signature ``void (*)(struct kunit *test)``. It calls the function under test
+and then sets *expectations* for what should happen. For example:
+
+.. code-block:: c
+
+ void example_test_success(struct kunit *test)
+ {
+ }
+
+ void example_test_failure(struct kunit *test)
+ {
+ KUNIT_FAIL(test, "This test never passes.");
+ }
+
+In the above example, ``example_test_success`` always passes because it does
+nothing; no expectations are set, and therefore all expectations pass. On the
+other hand ``example_test_failure`` always fails because it calls ``KUNIT_FAIL``,
+which is a special expectation that logs a message and causes the test case to
+fail.
+
+Expectations
+~~~~~~~~~~~~
+An *expectation* specifies that we expect a piece of code to do something in a
+test. An expectation is called like a function. A test is made by setting
+expectations about the behavior of a piece of code under test. When one or more
+expectations fail, the test case fails and information about the failure is
+logged. For example:
+
+.. code-block:: c
+
+ void add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+ }
+
+In the above example, ``add_test_basic`` makes a number of assertions about the
+behavior of a function called ``add``. The first parameter is always of type
+``struct kunit *``, which contains information about the current test context.
+The second parameter, in this case, is what the value is expected to be. The
+last value is what the value actually is. If ``add`` passes all of these
+expectations, the test case, ``add_test_basic`` will pass; if any one of these
+expectations fails, the test case will fail.
+
+A test case *fails* when any expectation is violated; however, the test will
+continue to run, and try other expectations until the test case ends or is
+otherwise terminated. This is as opposed to *assertions* which are discussed
+later.
+
+To learn about more KUnit expectations, see Documentation/dev-tools/kunit/api/test.rst.
+
+.. note::
+ A single test case should be short, easy to understand, and focused on a
+ single behavior.
+
+For example, if we want to rigorously test the ``add`` function above, create
+additional tests cases which would test each property that an ``add`` function
+should have as shown below:
+
+.. code-block:: c
+
+ void add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+ }
+
+ void add_test_negative(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 0, add(-1, 1));
+ }
+
+ void add_test_max(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, INT_MAX, add(0, INT_MAX));
+ KUNIT_EXPECT_EQ(test, -1, add(INT_MAX, INT_MIN));
+ }
+
+ void add_test_overflow(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, INT_MIN, add(INT_MAX, 1));
+ }
+
+Assertions
+~~~~~~~~~~
+
+An assertion is like an expectation, except that the assertion immediately
+terminates the test case if the condition is not satisfied. For example:
+
+.. code-block:: c
+
+ static void test_sort(struct kunit *test)
+ {
+ int *a, i, r = 1;
+ a = kunit_kmalloc_array(test, TEST_LEN, sizeof(*a), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, a);
+ for (i = 0; i < TEST_LEN; i++) {
+ r = (r * 725861) % 6599;
+ a[i] = r;
+ }
+ sort(a, TEST_LEN, sizeof(*a), cmpint, NULL);
+ for (i = 0; i < TEST_LEN-1; i++)
+ KUNIT_EXPECT_LE(test, a[i], a[i + 1]);
+ }
+
+In this example, we need to be able to allocate an array to test the ``sort()``
+function. So we use ``KUNIT_ASSERT_NOT_ERR_OR_NULL()`` to abort the test if
+there's an allocation error.
+
+.. note::
+ In other test frameworks, ``ASSERT`` macros are often implemented by calling
+ ``return`` so they only work from the test function. In KUnit, we stop the
+ current kthread on failure, so you can call them from anywhere.
+
+.. note::
+ Warning: There is an exception to the above rule. You shouldn't use assertions
+ in the suite's exit() function, or in the free function for a resource. These
+ run when a test is shutting down, and an assertion here prevents further
+ cleanup code from running, potentially leading to a memory leak.
+
+Customizing error messages
+--------------------------
+
+Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG``
+variant. These take a format string and arguments to provide additional
+context to the automatically generated error messages.
+
+.. code-block:: c
+
+ char some_str[41];
+ generate_sha1_hex_string(some_str);
+
+ /* Before. Not easy to tell why the test failed. */
+ KUNIT_EXPECT_EQ(test, strlen(some_str), 40);
+
+ /* After. Now we see the offending string. */
+ KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str);
+
+Alternatively, one can take full control over the error message by using
+``KUNIT_FAIL()``, e.g.
+
+.. code-block:: c
+
+ /* Before */
+ KUNIT_EXPECT_EQ(test, some_setup_function(), 0);
+
+ /* After: full control over the failure message. */
+ if (some_setup_function())
+ KUNIT_FAIL(test, "Failed to setup thing for testing");
+
+
+Test Suites
+~~~~~~~~~~~
+
+We need many test cases covering all the unit's behaviors. It is common to have
+many similar tests. In order to reduce duplication in these closely related
+tests, most unit testing frameworks (including KUnit) provide the concept of a
+*test suite*. A test suite is a collection of test cases for a unit of code
+with optional setup and teardown functions that run before/after the whole
+suite and/or every test case.
+
+.. note::
+ A test case will only run if it is associated with a test suite.
+
+For example:
+
+.. code-block:: c
+
+ static struct kunit_case example_test_cases[] = {
+ KUNIT_CASE(example_test_foo),
+ KUNIT_CASE(example_test_bar),
+ KUNIT_CASE(example_test_baz),
+ {}
+ };
+
+ static struct kunit_suite example_test_suite = {
+ .name = "example",
+ .init = example_test_init,
+ .exit = example_test_exit,
+ .suite_init = example_suite_init,
+ .suite_exit = example_suite_exit,
+ .test_cases = example_test_cases,
+ };
+ kunit_test_suite(example_test_suite);
+
+In the above example, the test suite ``example_test_suite`` would first run
+``example_suite_init``, then run the test cases ``example_test_foo``,
+``example_test_bar``, and ``example_test_baz``. Each would have
+``example_test_init`` called immediately before it and ``example_test_exit``
+called immediately after it. Finally, ``example_suite_exit`` would be called
+after everything else. ``kunit_test_suite(example_test_suite)`` registers the
+test suite with the KUnit test framework.
+
+.. note::
+ The ``exit`` and ``suite_exit`` functions will run even if ``init`` or
+ ``suite_init`` fail. Make sure that they can handle any inconsistent
+ state which may result from ``init`` or ``suite_init`` encountering errors
+ or exiting early.
+
+``kunit_test_suite(...)`` is a macro which tells the linker to put the
+specified test suite in a special linker section so that it can be run by KUnit
+either after ``late_init``, or when the test module is loaded (if the test was
+built as a module).
+
+For more information, see Documentation/dev-tools/kunit/api/test.rst.
+
+.. _kunit-on-non-uml:
+
+Writing Tests For Other Architectures
+-------------------------------------
+
+It is better to write tests that run on UML to tests that only run under a
+particular architecture. It is better to write tests that run under QEMU or
+another easy to obtain (and monetarily free) software environment to a specific
+piece of hardware.
+
+Nevertheless, there are still valid reasons to write a test that is architecture
+or hardware specific. For example, we might want to test code that really
+belongs in ``arch/some-arch/*``. Even so, try to write the test so that it does
+not depend on physical hardware. Some of our test cases may not need hardware,
+only few tests actually require the hardware to test it. When hardware is not
+available, instead of disabling tests, we can skip them.
+
+Now that we have narrowed down exactly what bits are hardware specific, the
+actual procedure for writing and running the tests is same as writing normal
+KUnit tests.
+
+.. important::
+ We may have to reset hardware state. If this is not possible, we may only
+ be able to run one test case per invocation.
+
+.. TODO(brendanhiggins@google.com): Add an actual example of an architecture-
+ dependent KUnit test.
+
+Common Patterns
+===============
+
+Isolating Behavior
+------------------
+
+Unit testing limits the amount of code under test to a single unit. It controls
+what code gets run when the unit under test calls a function. Where a function
+is exposed as part of an API such that the definition of that function can be
+changed without affecting the rest of the code base. In the kernel, this comes
+from two constructs: classes, which are structs that contain function pointers
+provided by the implementer, and architecture-specific functions, which have
+definitions selected at compile time.
+
+Classes
+~~~~~~~
+
+Classes are not a construct that is built into the C programming language;
+however, it is an easily derived concept. Accordingly, in most cases, every
+project that does not use a standardized object oriented library (like GNOME's
+GObject) has their own slightly different way of doing object oriented
+programming; the Linux kernel is no exception.
+
+The central concept in kernel object oriented programming is the class. In the
+kernel, a *class* is a struct that contains function pointers. This creates a
+contract between *implementers* and *users* since it forces them to use the
+same function signature without having to call the function directly. To be a
+class, the function pointers must specify that a pointer to the class, known as
+a *class handle*, be one of the parameters. Thus the member functions (also
+known as *methods*) have access to member variables (also known as *fields*)
+allowing the same implementation to have multiple *instances*.
+
+A class can be *overridden* by *child classes* by embedding the *parent class*
+in the child class. Then when the child class *method* is called, the child
+implementation knows that the pointer passed to it is of a parent contained
+within the child. Thus, the child can compute the pointer to itself because the
+pointer to the parent is always a fixed offset from the pointer to the child.
+This offset is the offset of the parent contained in the child struct. For
+example:
+
+.. code-block:: c
+
+ struct shape {
+ int (*area)(struct shape *this);
+ };
+
+ struct rectangle {
+ struct shape parent;
+ int length;
+ int width;
+ };
+
+ int rectangle_area(struct shape *this)
+ {
+ struct rectangle *self = container_of(this, struct rectangle, parent);
+
+ return self->length * self->width;
+ };
+
+ void rectangle_new(struct rectangle *self, int length, int width)
+ {
+ self->parent.area = rectangle_area;
+ self->length = length;
+ self->width = width;
+ }
+
+In this example, computing the pointer to the child from the pointer to the
+parent is done by ``container_of``.
+
+Faking Classes
+~~~~~~~~~~~~~~
+
+In order to unit test a piece of code that calls a method in a class, the
+behavior of the method must be controllable, otherwise the test ceases to be a
+unit test and becomes an integration test.
+
+A fake class implements a piece of code that is different than what runs in a
+production instance, but behaves identical from the standpoint of the callers.
+This is done to replace a dependency that is hard to deal with, or is slow. For
+example, implementing a fake EEPROM that stores the "contents" in an
+internal buffer. Assume we have a class that represents an EEPROM:
+
+.. code-block:: c
+
+ struct eeprom {
+ ssize_t (*read)(struct eeprom *this, size_t offset, char *buffer, size_t count);
+ ssize_t (*write)(struct eeprom *this, size_t offset, const char *buffer, size_t count);
+ };
+
+And we want to test code that buffers writes to the EEPROM:
+
+.. code-block:: c
+
+ struct eeprom_buffer {
+ ssize_t (*write)(struct eeprom_buffer *this, const char *buffer, size_t count);
+ int flush(struct eeprom_buffer *this);
+ size_t flush_count; /* Flushes when buffer exceeds flush_count. */
+ };
+
+ struct eeprom_buffer *new_eeprom_buffer(struct eeprom *eeprom);
+ void destroy_eeprom_buffer(struct eeprom *eeprom);
+
+We can test this code by *faking out* the underlying EEPROM:
+
+.. code-block:: c
+
+ struct fake_eeprom {
+ struct eeprom parent;
+ char contents[FAKE_EEPROM_CONTENTS_SIZE];
+ };
+
+ ssize_t fake_eeprom_read(struct eeprom *parent, size_t offset, char *buffer, size_t count)
+ {
+ struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+ count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+ memcpy(buffer, this->contents + offset, count);
+
+ return count;
+ }
+
+ ssize_t fake_eeprom_write(struct eeprom *parent, size_t offset, const char *buffer, size_t count)
+ {
+ struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+ count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+ memcpy(this->contents + offset, buffer, count);
+
+ return count;
+ }
+
+ void fake_eeprom_init(struct fake_eeprom *this)
+ {
+ this->parent.read = fake_eeprom_read;
+ this->parent.write = fake_eeprom_write;
+ memset(this->contents, 0, FAKE_EEPROM_CONTENTS_SIZE);
+ }
+
+We can now use it to test ``struct eeprom_buffer``:
+
+.. code-block:: c
+
+ struct eeprom_buffer_test {
+ struct fake_eeprom *fake_eeprom;
+ struct eeprom_buffer *eeprom_buffer;
+ };
+
+ static void eeprom_buffer_test_does_not_write_until_flush(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff};
+
+ eeprom_buffer->flush_count = SIZE_MAX;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0);
+
+ eeprom_buffer->flush(eeprom_buffer);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ }
+
+ static void eeprom_buffer_test_flushes_after_flush_count_met(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff};
+
+ eeprom_buffer->flush_count = 2;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ }
+
+ static void eeprom_buffer_test_flushes_increments_of_flush_count(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff, 0xff};
+
+ eeprom_buffer->flush_count = 2;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 2);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ /* Should have only flushed the first two bytes. */
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[2], 0);
+ }
+
+ static int eeprom_buffer_test_init(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx;
+
+ ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+ ctx->fake_eeprom = kunit_kzalloc(test, sizeof(*ctx->fake_eeprom), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->fake_eeprom);
+ fake_eeprom_init(ctx->fake_eeprom);
+
+ ctx->eeprom_buffer = new_eeprom_buffer(&ctx->fake_eeprom->parent);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->eeprom_buffer);
+
+ test->priv = ctx;
+
+ return 0;
+ }
+
+ static void eeprom_buffer_test_exit(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+
+ destroy_eeprom_buffer(ctx->eeprom_buffer);
+ }
+
+Testing Against Multiple Inputs
+-------------------------------
+
+Testing just a few inputs is not enough to ensure that the code works correctly,
+for example: testing a hash function.
+
+We can write a helper macro or function. The function is called for each input.
+For example, to test ``sha1sum(1)``, we can write:
+
+.. code-block:: c
+
+ #define TEST_SHA1(in, want) \
+ sha1sum(in, out); \
+ KUNIT_EXPECT_STREQ_MSG(test, out, want, "sha1sum(%s)", in);
+
+ char out[40];
+ TEST_SHA1("hello world", "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed");
+ TEST_SHA1("hello world!", "430ce34d020724ed75a196dfc2ad67c77772d169");
+
+Note the use of the ``_MSG`` version of ``KUNIT_EXPECT_STREQ`` to print a more
+detailed error and make the assertions clearer within the helper macros.
+
+The ``_MSG`` variants are useful when the same expectation is called multiple
+times (in a loop or helper function) and thus the line number is not enough to
+identify what failed, as shown below.
+
+In complicated cases, we recommend using a *table-driven test* compared to the
+helper macro variation, for example:
+
+.. code-block:: c
+
+ int i;
+ char out[40];
+
+ struct sha1_test_case {
+ const char *str;
+ const char *sha1;
+ };
+
+ struct sha1_test_case cases[] = {
+ {
+ .str = "hello world",
+ .sha1 = "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
+ },
+ {
+ .str = "hello world!",
+ .sha1 = "430ce34d020724ed75a196dfc2ad67c77772d169",
+ },
+ };
+ for (i = 0; i < ARRAY_SIZE(cases); ++i) {
+ sha1sum(cases[i].str, out);
+ KUNIT_EXPECT_STREQ_MSG(test, out, cases[i].sha1,
+ "sha1sum(%s)", cases[i].str);
+ }
+
+
+There is more boilerplate code involved, but it can:
+
+* be more readable when there are multiple inputs/outputs (due to field names).
+
+ * For example, see ``fs/ext4/inode-test.c``.
+
+* reduce duplication if test cases are shared across multiple tests.
+
+ * For example: if we want to test ``sha256sum``, we could add a ``sha256``
+ field and reuse ``cases``.
+
+* be converted to a "parameterized test".
+
+Parameterized Testing
+~~~~~~~~~~~~~~~~~~~~~
+
+To run a test case against multiple inputs, KUnit provides a parameterized
+testing framework. This feature formalizes and extends the concept of
+table-driven tests discussed previously.
+
+A KUnit test is determined to be parameterized if a parameter generator function
+is provided when registering the test case. A test user can either write their
+own generator function or use one that is provided by KUnit. The generator
+function is stored in ``kunit_case->generate_params`` and can be set using the
+macros described in the section below.
+
+To establish the terminology, a "parameterized test" is a test which is run
+multiple times (once per "parameter" or "parameter run"). Each parameter run has
+both its own independent ``struct kunit`` (the "parameter run context") and
+access to a shared parent ``struct kunit`` (the "parameterized test context").
+
+Passing Parameters to a Test
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+There are three ways to provide the parameters to a test:
+
+Array Parameter Macros:
+
+ KUnit provides special support for the common table-driven testing pattern.
+ By applying either ``KUNIT_ARRAY_PARAM`` or ``KUNIT_ARRAY_PARAM_DESC`` to the
+ ``cases`` array from the previous section, we can create a parameterized test
+ as shown below:
+
+.. code-block:: c
+
+ // This is copy-pasted from above.
+ struct sha1_test_case {
+ const char *str;
+ const char *sha1;
+ };
+ static const struct sha1_test_case cases[] = {
+ {
+ .str = "hello world",
+ .sha1 = "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
+ },
+ {
+ .str = "hello world!",
+ .sha1 = "430ce34d020724ed75a196dfc2ad67c77772d169",
+ },
+ };
+
+ // Creates `sha1_gen_params()` to iterate over `cases` while using
+ // the struct member `str` for the case description.
+ KUNIT_ARRAY_PARAM_DESC(sha1, cases, str);
+
+ // Looks no different from a normal test.
+ static void sha1_test(struct kunit *test)
+ {
+ // This function can just contain the body of the for-loop.
+ // The former `cases[i]` is accessible under test->param_value.
+ char out[40];
+ struct sha1_test_case *test_param = (struct sha1_test_case *)(test->param_value);
+
+ sha1sum(test_param->str, out);
+ KUNIT_EXPECT_STREQ_MSG(test, out, test_param->sha1,
+ "sha1sum(%s)", test_param->str);
+ }
+
+ // Instead of KUNIT_CASE, we use KUNIT_CASE_PARAM and pass in the
+ // function declared by KUNIT_ARRAY_PARAM or KUNIT_ARRAY_PARAM_DESC.
+ static struct kunit_case sha1_test_cases[] = {
+ KUNIT_CASE_PARAM(sha1_test, sha1_gen_params),
+ {}
+ };
+
+Custom Parameter Generator Function:
+
+ The generator function is responsible for generating parameters one-by-one
+ and has the following signature:
+ ``const void* (*)(struct kunit *test, const void *prev, char *desc)``.
+ You can pass the generator function to the ``KUNIT_CASE_PARAM``
+ or ``KUNIT_CASE_PARAM_WITH_INIT`` macros.
+
+ The function receives the previously generated parameter as the ``prev`` argument
+ (which is ``NULL`` on the first call) and can also access the parameterized
+ test context passed as the ``test`` argument. KUnit calls this function
+ repeatedly until it returns ``NULL``, which signifies that a parameterized
+ test ended.
+
+ Below is an example of how it works:
+
+.. code-block:: c
+
+ #define MAX_TEST_BUFFER_SIZE 8
+
+ // Example generator function. It produces a sequence of buffer sizes that
+ // are powers of two, starting at 1 (e.g., 1, 2, 4, 8).
+ static const void *buffer_size_gen_params(struct kunit *test, const void *prev, char *desc)
+ {
+ long prev_buffer_size = (long)prev;
+ long next_buffer_size = 1; // Start with an initial size of 1.
+
+ // Stop generating parameters if the limit is reached or exceeded.
+ if (prev_buffer_size >= MAX_TEST_BUFFER_SIZE)
+ return NULL;
+
+ // For subsequent calls, calculate the next size by doubling the previous one.
+ if (prev)
+ next_buffer_size = prev_buffer_size << 1;
+
+ return (void *)next_buffer_size;
+ }
+
+ // Simple test to validate that kunit_kzalloc provides zeroed memory.
+ static void buffer_zero_test(struct kunit *test)
+ {
+ long buffer_size = (long)test->param_value;
+ // Use kunit_kzalloc to allocate a zero-initialized buffer. This makes the
+ // memory "parameter run managed," meaning it's automatically cleaned up at
+ // the end of each parameter run.
+ int *buf = kunit_kzalloc(test, buffer_size * sizeof(int), GFP_KERNEL);
+
+ // Ensure the allocation was successful.
+ KUNIT_ASSERT_NOT_NULL(test, buf);
+
+ // Loop through the buffer and confirm every element is zero.
+ for (int i = 0; i < buffer_size; i++)
+ KUNIT_EXPECT_EQ(test, buf[i], 0);
+ }
+
+ static struct kunit_case buffer_test_cases[] = {
+ KUNIT_CASE_PARAM(buffer_zero_test, buffer_size_gen_params),
+ {}
+ };
+
+Runtime Parameter Array Registration in the Init Function:
+
+ For scenarios where you might need to initialize a parameterized test, you
+ can directly register a parameter array to the parameterized test context.
+
+ To do this, you must pass the parameterized test context, the array itself,
+ the array size, and a ``get_description()`` function to the
+ ``kunit_register_params_array()`` macro. This macro populates
+ ``struct kunit_params`` within the parameterized test context, effectively
+ storing a parameter array object. The ``get_description()`` function will
+ be used for populating parameter descriptions and has the following signature:
+ ``void (*)(struct kunit *test, const void *param, char *desc)``. Note that it
+ also has access to the parameterized test context.
+
+ .. important::
+ When using this way to register a parameter array, you will need to
+ manually pass ``kunit_array_gen_params()`` as the generator function to
+ ``KUNIT_CASE_PARAM_WITH_INIT``. ``kunit_array_gen_params()`` is a KUnit
+ helper that will use the registered array to generate the parameters.
+
+ If needed, instead of passing the KUnit helper, you can also pass your
+ own custom generator function that utilizes the parameter array. To
+ access the parameter array from within the parameter generator
+ function use ``test->params_array.params``.
+
+ The ``kunit_register_params_array()`` macro should be called within a
+ ``param_init()`` function that initializes the parameterized test and has
+ the following signature ``int (*)(struct kunit *test)``. For a detailed
+ explanation of this mechanism please refer to the "Adding Shared Resources"
+ section that is after this one. This method supports registering both
+ dynamically built and static parameter arrays.
+
+ The code snippet below shows the ``example_param_init_dynamic_arr`` test that
+ utilizes ``make_fibonacci_params()`` to create a dynamic array, which is then
+ registered using ``kunit_register_params_array()``. To see the full code
+ please refer to lib/kunit/kunit-example-test.c.
+
+.. code-block:: c
+
+ /*
+ * Example of a parameterized test param_init() function that registers a dynamic
+ * array of parameters.
+ */
+ static int example_param_init_dynamic_arr(struct kunit *test)
+ {
+ size_t seq_size;
+ int *fibonacci_params;
+
+ kunit_info(test, "initializing parameterized test\n");
+
+ seq_size = 6;
+ fibonacci_params = make_fibonacci_params(test, seq_size);
+ if (!fibonacci_params)
+ return -ENOMEM;
+ /*
+ * Passes the dynamic parameter array information to the parameterized test
+ * context struct kunit. The array and its metadata will be stored in
+ * test->parent->params_array. The array itself will be located in
+ * params_data.params.
+ */
+ kunit_register_params_array(test, fibonacci_params, seq_size,
+ example_param_dynamic_arr_get_desc);
+ return 0;
+ }
+
+ static struct kunit_case example_test_cases[] = {
+ /*
+ * Note how we pass kunit_array_gen_params() to use the array we
+ * registered in example_param_init_dynamic_arr() to generate
+ * parameters.
+ */
+ KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init_dynamic_arr,
+ kunit_array_gen_params,
+ example_param_init_dynamic_arr,
+ example_param_exit_dynamic_arr),
+ {}
+ };
+
+Adding Shared Resources
+^^^^^^^^^^^^^^^^^^^^^^^
+All parameter runs in this framework hold a reference to the parameterized test
+context, which can be accessed using the parent ``struct kunit`` pointer. The
+parameterized test context is not used to execute any test logic itself; instead,
+it serves as a container for shared resources.
+
+It's possible to add resources to share between parameter runs within a
+parameterized test by using ``KUNIT_CASE_PARAM_WITH_INIT``, to which you pass
+custom ``param_init()`` and ``param_exit()`` functions. These functions run once
+before and once after the parameterized test, respectively.
+
+The ``param_init()`` function, with the signature ``int (*)(struct kunit *test)``,
+can be used for adding resources to the ``resources`` or ``priv`` fields of
+the parameterized test context, registering the parameter array, and any other
+initialization logic.
+
+The ``param_exit()`` function, with the signature ``void (*)(struct kunit *test)``,
+can be used to release any resources that were not parameterized test managed (i.e.
+not automatically cleaned up after the parameterized test ends) and for any other
+exit logic.
+
+Both ``param_init()`` and ``param_exit()`` are passed the parameterized test
+context behind the scenes. However, the test case function receives the parameter
+run context. Therefore, to manage and access shared resources from within a test
+case function, you must use ``test->parent``.
+
+For instance, finding a shared resource allocated by the Resource API requires
+passing ``test->parent`` to ``kunit_find_resource()``. This principle extends to
+all other APIs that might be used in the test case function, including
+``kunit_kzalloc()``, ``kunit_kmalloc_array()``, and others (see
+Documentation/dev-tools/kunit/api/test.rst and the
+Documentation/dev-tools/kunit/api/resource.rst).
+
+.. note::
+ The ``suite->init()`` function, which executes before each parameter run,
+ receives the parameter run context. Therefore, any resources set up in
+ ``suite->init()`` are cleaned up after each parameter run.
+
+The code below shows how you can add the shared resources. Note that this code
+utilizes the Resource API, which you can read more about here:
+Documentation/dev-tools/kunit/api/resource.rst. To see the full version of this
+code please refer to lib/kunit/kunit-example-test.c.
+
+.. code-block:: c
+
+ static int example_resource_init(struct kunit_resource *res, void *context)
+ {
+ ... /* Code that allocates memory and stores context in res->data. */
+ }
+
+ /* This function deallocates memory for the kunit_resource->data field. */
+ static void example_resource_free(struct kunit_resource *res)
+ {
+ kfree(res->data);
+ }
+
+ /* This match function locates a test resource based on defined criteria. */
+ static bool example_resource_alloc_match(struct kunit *test, struct kunit_resource *res,
+ void *match_data)
+ {
+ return res->data && res->free == example_resource_free;
+ }
+
+ /* Function to initialize the parameterized test. */
+ static int example_param_init(struct kunit *test)
+ {
+ int ctx = 3; /* Data to be stored. */
+ void *data = kunit_alloc_resource(test, example_resource_init,
+ example_resource_free,
+ GFP_KERNEL, &ctx);
+ if (!data)
+ return -ENOMEM;
+ kunit_register_params_array(test, example_params_array,
+ ARRAY_SIZE(example_params_array));
+ return 0;
+ }
+
+ /* Example test that uses shared resources in test->resources. */
+ static void example_params_test_with_init(struct kunit *test)
+ {
+ int threshold;
+ const struct example_param *param = test->param_value;
+ /* Here we pass test->parent to access the parameterized test context. */
+ struct kunit_resource *res = kunit_find_resource(test->parent,
+ example_resource_alloc_match,
+ NULL);
+
+ threshold = *((int *)res->data);
+ KUNIT_ASSERT_LE(test, param->value, threshold);
+ kunit_put_resource(res);
+ }
+
+ static struct kunit_case example_test_cases[] = {
+ KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init, kunit_array_gen_params,
+ example_param_init, NULL),
+ {}
+ };
+
+As an alternative to using the KUnit Resource API for sharing resources, you can
+place them in ``test->parent->priv``. This serves as a more lightweight method
+for resource storage, best for scenarios where complex resource management is
+not required.
+
+As stated previously ``param_init()`` and ``param_exit()`` get the parameterized
+test context. So, you can directly use ``test->priv`` within ``param_init/exit``
+to manage shared resources. However, from within the test case function, you must
+navigate up to the parent ``struct kunit`` i.e. the parameterized test context.
+Therefore, you need to use ``test->parent->priv`` to access those same
+resources.
+
+The resources placed in ``test->parent->priv`` will need to be allocated in
+memory to persist across the parameter runs. If memory is allocated using the
+KUnit memory allocation APIs (described more in the "Allocating Memory" section
+below), you won't need to worry about deallocation. The APIs will make the memory
+parameterized test 'managed', ensuring that it will automatically get cleaned up
+after the parameterized test concludes.
+
+The code below demonstrates example usage of the ``priv`` field for shared
+resources:
+
+.. code-block:: c
+
+ static const struct example_param {
+ int value;
+ } example_params_array[] = {
+ { .value = 3, },
+ { .value = 2, },
+ { .value = 1, },
+ { .value = 0, },
+ };
+
+ /* Initialize the parameterized test context. */
+ static int example_param_init_priv(struct kunit *test)
+ {
+ int ctx = 3; /* Data to be stored. */
+ int arr_size = ARRAY_SIZE(example_params_array);
+
+ /*
+ * Allocate memory using kunit_kzalloc(). Since the `param_init`
+ * function receives the parameterized test context, this memory
+ * allocation will be scoped to the lifetime of the parameterized test.
+ */
+ test->priv = kunit_kzalloc(test, sizeof(int), GFP_KERNEL);
+
+ /* Assign the context value to test->priv.*/
+ *((int *)test->priv) = ctx;
+
+ /* Register the parameter array. */
+ kunit_register_params_array(test, example_params_array, arr_size, NULL);
+ return 0;
+ }
+
+ static void example_params_test_with_init_priv(struct kunit *test)
+ {
+ int threshold;
+ const struct example_param *param = test->param_value;
+
+ /* By design, test->parent will not be NULL. */
+ KUNIT_ASSERT_NOT_NULL(test, test->parent);
+
+ /* Here we use test->parent->priv to access the shared resource. */
+ threshold = *(int *)test->parent->priv;
+
+ KUNIT_ASSERT_LE(test, param->value, threshold);
+ }
+
+ static struct kunit_case example_tests[] = {
+ KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init_priv,
+ kunit_array_gen_params,
+ example_param_init_priv, NULL),
+ {}
+ };
+
+Allocating Memory
+-----------------
+
+Where you might use ``kzalloc``, you can instead use ``kunit_kzalloc`` as KUnit
+will then ensure that the memory is freed once the test completes.
+
+This is useful because it lets us use the ``KUNIT_ASSERT_EQ`` macros to exit
+early from a test without having to worry about remembering to call ``kfree``.
+For example:
+
+.. code-block:: c
+
+ void example_test_allocation(struct kunit *test)
+ {
+ char *buffer = kunit_kzalloc(test, 16, GFP_KERNEL);
+ /* Ensure allocation succeeded. */
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
+
+ KUNIT_ASSERT_STREQ(test, buffer, "");
+ }
+
+Registering Cleanup Actions
+---------------------------
+
+If you need to perform some cleanup beyond simple use of ``kunit_kzalloc``,
+you can register a custom "deferred action", which is a cleanup function
+run when the test exits (whether cleanly, or via a failed assertion).
+
+Actions are simple functions with no return value, and a single ``void*``
+context argument, and fulfill the same role as "cleanup" functions in Python
+and Go tests, "defer" statements in languages which support them, and
+(in some cases) destructors in RAII languages.
+
+These are very useful for unregistering things from global lists, closing
+files or other resources, or freeing resources.
+
+For example:
+
+.. code-block:: C
+
+ static void cleanup_device(void *ctx)
+ {
+ struct device *dev = (struct device *)ctx;
+
+ device_unregister(dev);
+ }
+
+ void example_device_test(struct kunit *test)
+ {
+ struct my_device dev;
+
+ device_register(&dev);
+
+ kunit_add_action(test, &cleanup_device, &dev);
+ }
+
+Note that, for functions like device_unregister which only accept a single
+pointer-sized argument, it's possible to automatically generate a wrapper
+with the ``KUNIT_DEFINE_ACTION_WRAPPER()`` macro, for example:
+
+.. code-block:: C
+
+ KUNIT_DEFINE_ACTION_WRAPPER(device_unregister, device_unregister_wrapper, struct device *);
+ kunit_add_action(test, &device_unregister_wrapper, &dev);
+
+You should do this in preference to manually casting to the ``kunit_action_t`` type,
+as casting function pointers will break Control Flow Integrity (CFI).
+
+``kunit_add_action`` can fail if, for example, the system is out of memory.
+You can use ``kunit_add_action_or_reset`` instead which runs the action
+immediately if it cannot be deferred.
+
+If you need more control over when the cleanup function is called, you
+can trigger it early using ``kunit_release_action``, or cancel it entirely
+with ``kunit_remove_action``.
+
+
+Testing Static Functions
+------------------------
+
+If you want to test static functions without exposing those functions outside of
+testing, one option is conditionally export the symbol. When KUnit is enabled,
+the symbol is exposed but remains static otherwise. To use this method, follow
+the template below.
+
+.. code-block:: c
+
+ /* In the file containing functions to test "my_file.c" */
+
+ #include <kunit/visibility.h>
+ #include <my_file.h>
+ ...
+ VISIBLE_IF_KUNIT int do_interesting_thing()
+ {
+ ...
+ }
+ EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing);
+
+ /* In the header file "my_file.h" */
+
+ #if IS_ENABLED(CONFIG_KUNIT)
+ int do_interesting_thing(void);
+ #endif
+
+ /* In the KUnit test file "my_file_test.c" */
+
+ #include <kunit/visibility.h>
+ #include <my_file.h>
+ ...
+ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+ ...
+ // Use do_interesting_thing() in tests
+
+For a full example, see this `patch <https://lore.kernel.org/all/20221207014024.340230-3-rmoar@google.com/>`_
+where a test is modified to conditionally expose static functions for testing
+using the macros above.
+
+As an **alternative** to the method above, you could conditionally ``#include``
+the test file at the end of your .c file. This is not recommended but works
+if needed. For example:
+
+.. code-block:: c
+
+ /* In "my_file.c" */
+
+ static int do_interesting_thing();
+
+ #ifdef CONFIG_MY_KUNIT_TEST
+ #include "my_kunit_test.c"
+ #endif
+
+Injecting Test-Only Code
+------------------------
+
+Similar to as shown above, we can add test-specific logic. For example:
+
+.. code-block:: c
+
+ /* In my_file.h */
+
+ #ifdef CONFIG_MY_KUNIT_TEST
+ /* Defined in my_kunit_test.c */
+ void test_only_hook(void);
+ #else
+ void test_only_hook(void) { }
+ #endif
+
+This test-only code can be made more useful by accessing the current ``kunit_test``
+as shown in next section: *Accessing The Current Test*.
+
+Accessing The Current Test
+--------------------------
+
+In some cases, we need to call test-only code from outside the test file. This
+is helpful, for example, when providing a fake implementation of a function, or
+to fail any current test from within an error handler.
+We can do this via the ``kunit_test`` field in ``task_struct``, which we can
+access using the ``kunit_get_current_test()`` function in ``kunit/test-bug.h``.
+
+``kunit_get_current_test()`` is safe to call even if KUnit is not enabled. If
+KUnit is not enabled, or if no test is running in the current task, it will
+return ``NULL``. This compiles down to either a no-op or a static key check,
+so will have a negligible performance impact when no test is running.
+
+The example below uses this to implement a "mock" implementation of a function, ``foo``:
+
+.. code-block:: c
+
+ #include <kunit/test-bug.h> /* for kunit_get_current_test */
+
+ struct test_data {
+ int foo_result;
+ int want_foo_called_with;
+ };
+
+ static int fake_foo(int arg)
+ {
+ struct kunit *test = kunit_get_current_test();
+ struct test_data *test_data = test->priv;
+
+ KUNIT_EXPECT_EQ(test, test_data->want_foo_called_with, arg);
+ return test_data->foo_result;
+ }
+
+ static void example_simple_test(struct kunit *test)
+ {
+ /* Assume priv (private, a member used to pass test data from
+ * the init function) is allocated in the suite's .init */
+ struct test_data *test_data = test->priv;
+
+ test_data->foo_result = 42;
+ test_data->want_foo_called_with = 1;
+
+ /* In a real test, we'd probably pass a pointer to fake_foo somewhere
+ * like an ops struct, etc. instead of calling it directly. */
+ KUNIT_EXPECT_EQ(test, fake_foo(1), 42);
+ }
+
+In this example, we are using the ``priv`` member of ``struct kunit`` as a way
+of passing data to the test from the init function. In general ``priv`` is
+pointer that can be used for any user data. This is preferred over static
+variables, as it avoids concurrency issues.
+
+Had we wanted something more flexible, we could have used a named ``kunit_resource``.
+Each test can have multiple resources which have string names providing the same
+flexibility as a ``priv`` member, but also, for example, allowing helper
+functions to create resources without conflicting with each other. It is also
+possible to define a clean up function for each resource, making it easy to
+avoid resource leaks. For more information, see Documentation/dev-tools/kunit/api/resource.rst.
+
+Failing The Current Test
+------------------------
+
+If we want to fail the current test, we can use ``kunit_fail_current_test(fmt, args...)``
+which is defined in ``<kunit/test-bug.h>`` and does not require pulling in ``<kunit/test.h>``.
+For example, we have an option to enable some extra debug checks on some data
+structures as shown below:
+
+.. code-block:: c
+
+ #include <kunit/test-bug.h>
+
+ #ifdef CONFIG_EXTRA_DEBUG_CHECKS
+ static void validate_my_data(struct data *data)
+ {
+ if (is_valid(data))
+ return;
+
+ kunit_fail_current_test("data %p is invalid", data);
+
+ /* Normal, non-KUnit, error reporting code here. */
+ }
+ #else
+ static void my_debug_function(void) { }
+ #endif
+
+``kunit_fail_current_test()`` is safe to call even if KUnit is not enabled. If
+KUnit is not enabled, or if no test is running in the current task, it will do
+nothing. This compiles down to either a no-op or a static key check, so will
+have a negligible performance impact when no test is running.
+
+Managing Fake Devices and Drivers
+---------------------------------
+
+When testing drivers or code which interacts with drivers, many functions will
+require a ``struct device`` or ``struct device_driver``. In many cases, setting
+up a real device is not required to test any given function, so a fake device
+can be used instead.
+
+KUnit provides helper functions to create and manage these fake devices, which
+are internally of type ``struct kunit_device``, and are attached to a special
+``kunit_bus``. These devices support managed device resources (devres), as
+described in Documentation/driver-api/driver-model/devres.rst
+
+To create a KUnit-managed ``struct device_driver``, use ``kunit_driver_create()``,
+which will create a driver with the given name, on the ``kunit_bus``. This driver
+will automatically be destroyed when the corresponding test finishes, but can also
+be manually destroyed with ``driver_unregister()``.
+
+To create a fake device, use the ``kunit_device_register()``, which will create
+and register a device, using a new KUnit-managed driver created with ``kunit_driver_create()``.
+To provide a specific, non-KUnit-managed driver, use ``kunit_device_register_with_driver()``
+instead. Like with managed drivers, KUnit-managed fake devices are automatically
+cleaned up when the test finishes, but can be manually cleaned up early with
+``kunit_device_unregister()``.
+
+The KUnit devices should be used in preference to ``root_device_register()``, and
+instead of ``platform_device_register()`` in cases where the device is not otherwise
+a platform device.
+
+For example:
+
+.. code-block:: c
+
+ #include <kunit/device.h>
+
+ static void test_my_device(struct kunit *test)
+ {
+ struct device *fake_device;
+ const char *dev_managed_string;
+
+ // Create a fake device.
+ fake_device = kunit_device_register(test, "my_device");
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, fake_device)
+
+ // Pass it to functions which need a device.
+ dev_managed_string = devm_kstrdup(fake_device, "Hello, World!");
+
+ // Everything is cleaned up automatically when the test ends.
+ } \ No newline at end of file
diff --git a/Documentation/dev-tools/lkmm/docs/access-marking.rst b/Documentation/dev-tools/lkmm/docs/access-marking.rst
new file mode 100644
index 000000000000..80058a4da980
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/access-marking.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Access Marking
+--------------
+
+Literal include of ``tools/memory-model/Documentation/access-marking.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/access-marking.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/cheatsheet.rst b/Documentation/dev-tools/lkmm/docs/cheatsheet.rst
new file mode 100644
index 000000000000..37681f6a6a8c
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/cheatsheet.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Cheatsheet
+----------
+
+Literal include of ``tools/memory-model/Documentation/cheatsheet.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/cheatsheet.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/control-dependencies.rst b/Documentation/dev-tools/lkmm/docs/control-dependencies.rst
new file mode 100644
index 000000000000..5ae97e8861eb
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/control-dependencies.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Control Dependencies
+--------------------
+
+Literal include of ``tools/memory-model/Documentation/control-dependencies.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/control-dependencies.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/explanation.rst b/Documentation/dev-tools/lkmm/docs/explanation.rst
new file mode 100644
index 000000000000..0bcba9a5ddf7
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/explanation.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Explanation
+-----------
+
+Literal include of ``tools/memory-model/Documentation/explanation.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/explanation.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/glossary.rst b/Documentation/dev-tools/lkmm/docs/glossary.rst
new file mode 100644
index 000000000000..849aefdf3d6e
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/glossary.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Glossary
+--------
+
+Literal include of ``tools/memory-model/Documentation/glossary.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/glossary.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/herd-representation.rst b/Documentation/dev-tools/lkmm/docs/herd-representation.rst
new file mode 100644
index 000000000000..f7b41f286eb9
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/herd-representation.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+herd-representation
+-------------------
+
+Literal include of ``tools/memory-model/Documentation/herd-representation.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/herd-representation.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/index.rst b/Documentation/dev-tools/lkmm/docs/index.rst
new file mode 100644
index 000000000000..abbddcc009de
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/index.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Documentation
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ readme
+ simple
+ ordering
+ litmus-tests
+ locking
+ recipes
+ control-dependencies
+ access-marking
+ cheatsheet
+ explanation
+ herd-representation
+ glossary
+ references
diff --git a/Documentation/dev-tools/lkmm/docs/litmus-tests.rst b/Documentation/dev-tools/lkmm/docs/litmus-tests.rst
new file mode 100644
index 000000000000..3293f4540156
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/litmus-tests.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Litmus Tests
+------------
+
+Literal include of ``tools/memory-model/Documentation/litmus-tests.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/litmus-tests.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/locking.rst b/Documentation/dev-tools/lkmm/docs/locking.rst
new file mode 100644
index 000000000000..b5eae4c0acb7
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/locking.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Locking
+-------
+
+Literal include of ``tools/memory-model/Documentation/locking.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/locking.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/ordering.rst b/Documentation/dev-tools/lkmm/docs/ordering.rst
new file mode 100644
index 000000000000..a2343c12462d
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/ordering.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Ordering
+--------
+
+Literal include of ``tools/memory-model/Documentation/ordering.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/ordering.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/readme.rst b/Documentation/dev-tools/lkmm/docs/readme.rst
new file mode 100644
index 000000000000..51e7a64e4435
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/readme.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+README (for LKMM Documentation)
+-------------------------------
+
+Literal include of ``tools/memory-model/Documentation/README``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/README
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/recipes.rst b/Documentation/dev-tools/lkmm/docs/recipes.rst
new file mode 100644
index 000000000000..e55952640047
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/recipes.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Recipes
+-------
+
+Literal include of ``tools/memory-model/Documentation/recipes.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/recipes.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/references.rst b/Documentation/dev-tools/lkmm/docs/references.rst
new file mode 100644
index 000000000000..c6831b3c9c02
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/references.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+References
+----------
+
+Literal include of ``tools/memory-model/Documentation/references.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/references.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/docs/simple.rst b/Documentation/dev-tools/lkmm/docs/simple.rst
new file mode 100644
index 000000000000..5c1094c95f45
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/docs/simple.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Simple
+------
+
+Literal include of ``tools/memory-model/Documentation/simple.txt``.
+
+------------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/Documentation/simple.txt
+ :literal:
diff --git a/Documentation/dev-tools/lkmm/index.rst b/Documentation/dev-tools/lkmm/index.rst
new file mode 100644
index 000000000000..e52782449ca3
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/index.rst
@@ -0,0 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Linux Kernel Memory Consistency Model (LKMM)
+============================================
+
+This section literally renders documents under ``tools/memory-model/``
+and ``tools/memory-model/Documentation/``, which are maintained in
+the *pure* plain text form.
+
+.. toctree::
+ :maxdepth: 2
+
+ readme
+ docs/index
diff --git a/Documentation/dev-tools/lkmm/readme.rst b/Documentation/dev-tools/lkmm/readme.rst
new file mode 100644
index 000000000000..a7f847109584
--- /dev/null
+++ b/Documentation/dev-tools/lkmm/readme.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+README (for LKMM)
+=================
+
+Literal include of ``tools/memory-model/README``.
+
+------------------------------------------------------------
+
+.. kernel-include:: tools/memory-model/README
+ :literal:
diff --git a/Documentation/dev-tools/propeller.rst b/Documentation/dev-tools/propeller.rst
new file mode 100644
index 000000000000..92195958e3db
--- /dev/null
+++ b/Documentation/dev-tools/propeller.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Using Propeller with the Linux kernel
+=====================================
+
+This enables Propeller build support for the kernel when using Clang
+compiler. Propeller is a profile-guided optimization (PGO) method used
+to optimize binary executables. Like AutoFDO, it utilizes hardware
+sampling to gather information about the frequency of execution of
+different code paths within a binary. Unlike AutoFDO, this information
+is then used right before linking phase to optimize (among others)
+block layout within and across functions.
+
+A few important notes about adopting Propeller optimization:
+
+#. Although it can be used as a standalone optimization step, it is
+ strongly recommended to apply Propeller on top of AutoFDO,
+ AutoFDO+ThinLTO or Instrument FDO. The rest of this document
+ assumes this paradigm.
+
+#. Propeller uses another round of profiling on top of
+ AutoFDO/AutoFDO+ThinLTO/iFDO. The whole build process involves
+ "build-afdo - train-afdo - build-propeller - train-propeller -
+ build-optimized".
+
+#. Propeller requires LLVM 19 release or later for Clang/Clang++
+ and the linker(ld.lld).
+
+#. In addition to LLVM toolchain, Propeller requires a profiling
+ conversion tool: https://github.com/google/autofdo with a release
+ after v0.30.1: https://github.com/google/autofdo/releases/tag/v0.30.1.
+
+The Propeller optimization process involves the following steps:
+
+#. Initial building: Build the AutoFDO or AutoFDO+ThinLTO binary as
+ you would normally do, but with a set of compile-time / link-time
+ flags, so that a special metadata section is created within the
+ kernel binary. The special section is only intend to be used by the
+ profiling tool, it is not part of the runtime image, nor does it
+ change kernel run time text sections.
+
+#. Profiling: The above kernel is then run with a representative
+ workload to gather execution frequency data. This data is collected
+ using hardware sampling, via perf. Propeller is most effective on
+ platforms supporting advanced PMU features like LBR on Intel
+ machines. This step is the same as profiling the kernel for AutoFDO
+ (the exact perf parameters can be different).
+
+#. Propeller profile generation: Perf output file is converted to a
+ pair of Propeller profiles via an offline tool.
+
+#. Optimized build: Build the AutoFDO or AutoFDO+ThinLTO optimized
+ binary as you would normally do, but with a compile-time /
+ link-time flag to pick up the Propeller compile time and link time
+ profiles. This build step uses 3 profiles - the AutoFDO profile,
+ the Propeller compile-time profile and the Propeller link-time
+ profile.
+
+#. Deployment: The optimized kernel binary is deployed and used
+ in production environments, providing improved performance
+ and reduced latency.
+
+Preparation
+===========
+
+Configure the kernel with::
+
+ CONFIG_AUTOFDO_CLANG=y
+ CONFIG_PROPELLER_CLANG=y
+
+Customization
+=============
+
+The default CONFIG_PROPELLER_CLANG setting covers kernel space objects
+for Propeller builds. One can, however, enable or disable Propeller build
+for individual files and directories by adding a line similar to the
+following to the respective kernel Makefile:
+
+- For enabling a single file (e.g. foo.o)::
+
+ PROPELLER_PROFILE_foo.o := y
+
+- For enabling all files in one directory::
+
+ PROPELLER_PROFILE := y
+
+- For disabling one file::
+
+ PROPELLER_PROFILE_foo.o := n
+
+- For disabling all files in one directory::
+
+ PROPELLER__PROFILE := n
+
+
+Workflow
+========
+
+Here is an example workflow for building an AutoFDO+Propeller kernel:
+
+1) Assuming an AutoFDO profile is already collected following
+ instructions in the AutoFDO document, build the kernel on the host
+ machine, with AutoFDO and Propeller build configs ::
+
+ CONFIG_AUTOFDO_CLANG=y
+ CONFIG_PROPELLER_CLANG=y
+
+ and ::
+
+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo-profile-name>
+
+2) Install the kernel on the test machine.
+
+3) Run the load tests. The '-c' option in perf specifies the sample
+ event period. We suggest using a suitable prime number, like 500009,
+ for this purpose.
+
+ - For Intel platforms::
+
+ $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+ - For AMD platforms::
+
+ $ perf record --pfm-event RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+ Note you can repeat the above steps to collect multiple <perf_file>s.
+
+4) (Optional) Download the raw perf file(s) to the host machine.
+
+5) Use the create_llvm_prof tool (https://github.com/google/autofdo) to
+ generate Propeller profile. ::
+
+ $ create_llvm_prof --binary=<vmlinux> --profile=<perf_file>
+ --format=propeller --propeller_output_module_name
+ --out=<propeller_profile_prefix>_cc_profile.txt
+ --propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
+
+ "<propeller_profile_prefix>" can be something like "/home/user/dir/any_string".
+
+ This command generates a pair of Propeller profiles:
+ "<propeller_profile_prefix>_cc_profile.txt" and
+ "<propeller_profile_prefix>_ld_profile.txt".
+
+ If there are more than 1 perf_file collected in the previous step,
+ you can create a temp list file "<perf_file_list>" with each line
+ containing one perf file name and run::
+
+ $ create_llvm_prof --binary=<vmlinux> --profile=@<perf_file_list>
+ --format=propeller --propeller_output_module_name
+ --out=<propeller_profile_prefix>_cc_profile.txt
+ --propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
+
+6) Rebuild the kernel using the AutoFDO and Propeller
+ profiles. ::
+
+ CONFIG_AUTOFDO_CLANG=y
+ CONFIG_PROPELLER_CLANG=y
+
+ and ::
+
+ $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file> CLANG_PROPELLER_PROFILE_PREFIX=<propeller_profile_prefix>
diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index c401c952a340..dc791c8d84d1 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -9,6 +9,8 @@ Sparse is a semantic checker for C programs; it can be used to find a
number of potential problems with kernel code. See
https://lwn.net/Articles/689907/ for an overview of sparse; this document
contains some kernel-specific sparse information.
+More information on sparse, mainly about its internals, can be found in
+its official pages at https://sparse.docs.kernel.org.
Using sparse for typechecking
@@ -73,19 +75,14 @@ sparse would otherwise report a context imbalance.
Getting sparse
--------------
-You can get latest released versions from the Sparse homepage at
-https://sparse.wiki.kernel.org/index.php/Main_Page
+You can get tarballs of the latest released versions from:
+https://www.kernel.org/pub/software/devel/sparse/dist/
Alternatively, you can get snapshots of the latest development version
of sparse using git to clone::
git://git.kernel.org/pub/scm/devel/sparse/sparse.git
-DaveJ has hourly generated tarballs of the git tree available at::
-
- http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
-
-
Once you have it, just do::
make
@@ -103,3 +100,5 @@ have already built it.
The optional make variable CF can be used to pass arguments to sparse. The
build system passes -Wbitwise to sparse automatically.
+
+Note that sparse defines the __CHECKER__ preprocessor symbol.
diff --git a/Documentation/dev-tools/testing-devices.rst b/Documentation/dev-tools/testing-devices.rst
new file mode 100644
index 000000000000..ab26adb99051
--- /dev/null
+++ b/Documentation/dev-tools/testing-devices.rst
@@ -0,0 +1,47 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (c) 2024 Collabora Ltd
+
+=============================
+Device testing with kselftest
+=============================
+
+
+There are a few different kselftests available for testing devices generically,
+with some overlap in coverage and different requirements. This document aims to
+give an overview of each one.
+
+Note: Paths in this document are relative to the kselftest folder
+(``tools/testing/selftests``).
+
+Device oriented kselftests:
+
+* Devicetree (``dt``)
+
+ * **Coverage**: Probe status for devices described in Devicetree
+ * **Requirements**: None
+
+* Error logs (``devices/error_logs``)
+
+ * **Coverage**: Error (or more critical) log messages presence coming from any
+ device
+ * **Requirements**: None
+
+* Discoverable bus (``devices/probe``)
+
+ * **Coverage**: Presence and probe status of USB or PCI devices that have been
+ described in the reference file
+ * **Requirements**: Manually describe the devices that should be tested in a
+ YAML reference file (see ``devices/probe/boards/google,spherion.yaml`` for
+ an example)
+
+* Exist (``devices/exist``)
+
+ * **Coverage**: Presence of all devices
+ * **Requirements**: Generate the reference (see ``devices/exist/README.rst``
+ for details) on a known-good kernel
+
+Therefore, the suggestion is to enable the error log and devicetree tests on all
+(DT-based) platforms, since they don't have any requirements. Then to greatly
+improve coverage, generate the reference for each platform and enable the exist
+test. The discoverable bus test can be used to verify the probe status of
+specific USB or PCI devices, but is probably not worth it for most cases.
diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst
new file mode 100644
index 000000000000..1619e5e5cc9c
--- /dev/null
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -0,0 +1,182 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Kernel Testing Guide
+====================
+
+
+There are a number of different tools for testing the Linux kernel, so knowing
+when to use each of them can be a challenge. This document provides a rough
+overview of their differences, and how they fit together.
+
+
+Writing and Running Tests
+=========================
+
+The bulk of kernel tests are written using either the kselftest or KUnit
+frameworks. These both provide infrastructure to help make running tests and
+groups of tests easier, as well as providing helpers to aid in writing new
+tests.
+
+If you're looking to verify the behaviour of the Kernel — particularly specific
+parts of the kernel — then you'll want to use KUnit or kselftest.
+
+
+The Difference Between KUnit and kselftest
+------------------------------------------
+
+KUnit (Documentation/dev-tools/kunit/index.rst) is an entirely in-kernel system
+for "white box" testing: because test code is part of the kernel, it can access
+internal structures and functions which aren't exposed to userspace.
+
+KUnit tests therefore are best written against small, self-contained parts
+of the kernel, which can be tested in isolation. This aligns well with the
+concept of 'unit' testing.
+
+For example, a KUnit test might test an individual kernel function (or even a
+single codepath through a function, such as an error handling case), rather
+than a feature as a whole.
+
+This also makes KUnit tests very fast to build and run, allowing them to be
+run frequently as part of the development process.
+
+There is a KUnit test style guide which may give further pointers in
+Documentation/dev-tools/kunit/style.rst
+
+
+kselftest (Documentation/dev-tools/kselftest.rst), on the other hand, is
+largely implemented in userspace, and tests are normal userspace scripts or
+programs.
+
+This makes it easier to write more complicated tests, or tests which need to
+manipulate the overall system state more (e.g., spawning processes, etc.).
+However, it's not possible to call kernel functions directly from kselftest.
+This means that only kernel functionality which is exposed to userspace somehow
+(e.g. by a syscall, device, filesystem, etc.) can be tested with kselftest. To
+work around this, some tests include a companion kernel module which exposes
+more information or functionality. If a test runs mostly or entirely within the
+kernel, however, KUnit may be the more appropriate tool.
+
+kselftest is therefore suited well to tests of whole features, as these will
+expose an interface to userspace, which can be tested, but not implementation
+details. This aligns well with 'system' or 'end-to-end' testing.
+
+For example, all new system calls should be accompanied by kselftest tests.
+
+Code Coverage Tools
+===================
+
+The Linux Kernel supports two different code coverage measurement tools. These
+can be used to verify that a test is executing particular functions or lines
+of code. This is useful for determining how much of the kernel is being tested,
+and for finding corner-cases which are not covered by the appropriate test.
+
+Documentation/dev-tools/gcov.rst is GCC's coverage testing tool, which can be
+used with the kernel to get global or per-module coverage. Unlike KCOV, it
+does not record per-task coverage. Coverage data can be read from debugfs,
+and interpreted using the usual gcov tooling.
+
+Documentation/dev-tools/kcov.rst is a feature which can be built in to the
+kernel to allow capturing coverage on a per-task level. It's therefore useful
+for fuzzing and other situations where information about code executed during,
+for example, a single syscall is useful.
+
+
+Dynamic Analysis Tools
+======================
+
+The kernel also supports a number of dynamic analysis tools, which attempt to
+detect classes of issues when they occur in a running kernel. These typically
+each look for a different class of bugs, such as invalid memory accesses,
+concurrency issues such as data races, or other undefined behaviour like
+integer overflows.
+
+Some of these tools are listed below:
+
+* kmemleak detects possible memory leaks. See
+ Documentation/dev-tools/kmemleak.rst
+* KASAN detects invalid memory accesses such as out-of-bounds and
+ use-after-free errors. See Documentation/dev-tools/kasan.rst
+* UBSAN detects behaviour that is undefined by the C standard, like integer
+ overflows. See Documentation/dev-tools/ubsan.rst
+* KCSAN detects data races. See Documentation/dev-tools/kcsan.rst
+* KFENCE is a low-overhead detector of memory issues, which is much faster than
+ KASAN and can be used in production. See Documentation/dev-tools/kfence.rst
+* lockdep is a locking correctness validator. See
+ Documentation/locking/lockdep-design.rst
+* Runtime Verification (RV) supports checking specific behaviours for a given
+ subsystem. See Documentation/trace/rv/runtime-verification.rst
+* There are several other pieces of debug instrumentation in the kernel, many
+ of which can be found in lib/Kconfig.debug
+
+These tools tend to test the kernel as a whole, and do not "pass" like
+kselftest or KUnit tests. They can be combined with KUnit or kselftest by
+running tests on a kernel with these tools enabled: you can then be sure
+that none of these errors are occurring during the test.
+
+Some of these tools integrate with KUnit or kselftest and will
+automatically fail tests if an issue is detected.
+
+Static Analysis Tools
+=====================
+
+In addition to testing a running kernel, one can also analyze kernel source code
+directly (**at compile time**) using **static analysis** tools. The tools
+commonly used in the kernel allow one to inspect the whole source tree or just
+specific files within it. They make it easier to detect and fix problems during
+the development process.
+
+Sparse can help test the kernel by performing type-checking, lock checking,
+value range checking, in addition to reporting various errors and warnings while
+examining the code. See the Documentation/dev-tools/sparse.rst documentation
+page for details on how to use it.
+
+Smatch extends Sparse and provides additional checks for programming logic
+mistakes such as missing breaks in switch statements, unused return values on
+error checking, forgetting to set an error code in the return of an error path,
+etc. Smatch also has tests against more serious issues such as integer
+overflows, null pointer dereferences, and memory leaks. See the project page at
+http://smatch.sourceforge.net/.
+
+Coccinelle is another static analyzer at our disposal. Coccinelle is often used
+to aid refactoring and collateral evolution of source code, but it can also help
+to avoid certain bugs that occur in common code patterns. The types of tests
+available include API tests, tests for correct usage of kernel iterators, checks
+for the soundness of free operations, analysis of locking behavior, and further
+tests known to help keep consistent kernel usage. See the
+Documentation/dev-tools/coccinelle.rst documentation page for details.
+
+Beware, though, that static analysis tools suffer from **false positives**.
+Errors and warns need to be evaluated carefully before attempting to fix them.
+
+When to use Sparse and Smatch
+-----------------------------
+
+Sparse does type checking, such as verifying that annotated variables do not
+cause endianness bugs, detecting places that use ``__user`` pointers improperly,
+and analyzing the compatibility of symbol initializers.
+
+Smatch does flow analysis and, if allowed to build the function database, it
+also does cross function analysis. Smatch tries to answer questions like where
+is this buffer allocated? How big is it? Can this index be controlled by the
+user? Is this variable larger than that variable?
+
+It's generally easier to write checks in Smatch than it is to write checks in
+Sparse. Nevertheless, there are some overlaps between Sparse and Smatch checks.
+
+Strong points of Smatch and Coccinelle
+--------------------------------------
+
+Coccinelle is probably the easiest for writing checks. It works before the
+pre-processor so it's easier to check for bugs in macros using Coccinelle.
+Coccinelle also creates patches for you, which no other tool does.
+
+For example, with Coccinelle you can do a mass conversion from
+``kmalloc(x * size, GFP_KERNEL)`` to ``kmalloc_array(x, size, GFP_KERNEL)``, and
+that's really useful. If you just created a Smatch warning and try to push the
+work of converting on to the maintainers they would be annoyed. You'd have to
+argue about each warning if can really overflow or not.
+
+Coccinelle does no analysis of variable values, which is the strong point of
+Smatch. On the other hand, Coccinelle allows you to do simple things in a simple
+way.
diff --git a/Documentation/dev-tools/ubsan.rst b/Documentation/dev-tools/ubsan.rst
index 655e6b63c227..e3591f8e9d5b 100644
--- a/Documentation/dev-tools/ubsan.rst
+++ b/Documentation/dev-tools/ubsan.rst
@@ -1,5 +1,7 @@
-The Undefined Behavior Sanitizer - UBSAN
-========================================
+.. SPDX-License-Identifier: GPL-2.0
+
+Undefined Behavior Sanitizer - UBSAN
+====================================
UBSAN is a runtime undefined behaviour checker.
@@ -47,34 +49,22 @@ Report example
Usage
-----
-To enable UBSAN configure kernel with::
-
- CONFIG_UBSAN=y
-
-and to check the entire kernel::
-
- CONFIG_UBSAN_SANITIZE_ALL=y
-
-To enable instrumentation for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
+To enable UBSAN, configure the kernel with::
-- For a single file (e.g. main.o)::
+ CONFIG_UBSAN=y
- UBSAN_SANITIZE_main.o := y
-
-- For all files in one directory::
-
- UBSAN_SANITIZE := y
-
-To exclude files from being instrumented even if
-``CONFIG_UBSAN_SANITIZE_ALL=y``, use::
+To exclude files from being instrumented use::
UBSAN_SANITIZE_main.o := n
-and::
+and to exclude all targets in one directory use::
UBSAN_SANITIZE := n
+When disabled for all targets, specific files can be enabled using::
+
+ UBSAN_SANITIZE_main.o := y
+
Detection of unaligned accesses controlled through the separate option -
CONFIG_UBSAN_ALIGNMENT. It's off by default on architectures that support
unaligned accesses (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y). One could
@@ -86,3 +76,4 @@ References
.. _1: https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html
.. _2: https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
+.. _3: https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
deleted file mode 100644
index 86786d87d9a8..000000000000
--- a/Documentation/device-mapper/cache-policies.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Guidance for writing policies
-=============================
-
-Try to keep transactionality out of it. The core is careful to
-avoid asking about anything that is migrating. This is a pain, but
-makes it easier to write the policies.
-
-Mappings are loaded into the policy at construction time.
-
-Every bio that is mapped by the target is referred to the policy.
-The policy can return a simple HIT or MISS or issue a migration.
-
-Currently there's no way for the policy to issue background work,
-e.g. to start writing back dirty blocks that are going to be evicted
-soon.
-
-Because we map bios, rather than requests it's easy for the policy
-to get fooled by many small bios. For this reason the core target
-issues periodic ticks to the policy. It's suggested that the policy
-doesn't update states (eg, hit counts) for a block more than once
-for each tick. The core ticks by watching bios complete, and so
-trying to see when the io scheduler has let the ios run.
-
-
-Overview of supplied cache replacement policies
-===============================================
-
-multiqueue (mq)
----------------
-
-This policy is now an alias for smq (see below).
-
-The following tunables are accepted, but have no effect:
-
- 'sequential_threshold <#nr_sequential_ios>'
- 'random_threshold <#nr_random_ios>'
- 'read_promote_adjustment <value>'
- 'write_promote_adjustment <value>'
- 'discard_promote_adjustment <value>'
-
-Stochastic multiqueue (smq)
----------------------------
-
-This policy is the default.
-
-The stochastic multi-queue (smq) policy addresses some of the problems
-with the multiqueue (mq) policy.
-
-The smq policy (vs mq) offers the promise of less memory utilization,
-improved performance and increased adaptability in the face of changing
-workloads. smq also does not have any cumbersome tuning knobs.
-
-Users may switch from "mq" to "smq" simply by appropriately reloading a
-DM table that is using the cache target. Doing so will cause all of the
-mq policy's hints to be dropped. Also, performance of the cache may
-degrade slightly until smq recalculates the origin device's hotspots
-that should be cached.
-
-Memory usage:
-The mq policy used a lot of memory; 88 bytes per cache block on a 64
-bit machine.
-
-smq uses 28bit indexes to implement its data structures rather than
-pointers. It avoids storing an explicit hit count for each block. It
-has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
-the entries (each hotspot block covers a larger area than a single
-cache block).
-
-All this means smq uses ~25bytes per cache block. Still a lot of
-memory, but a substantial improvement nontheless.
-
-Level balancing:
-mq placed entries in different levels of the multiqueue structures
-based on their hit count (~ln(hit count)). This meant the bottom
-levels generally had the most entries, and the top ones had very
-few. Having unbalanced levels like this reduced the efficacy of the
-multiqueue.
-
-smq does not maintain a hit count, instead it swaps hit entries with
-the least recently used entry from the level above. The overall
-ordering being a side effect of this stochastic process. With this
-scheme we can decide how many entries occupy each multiqueue level,
-resulting in better promotion/demotion decisions.
-
-Adaptability:
-The mq policy maintained a hit count for each cache block. For a
-different block to get promoted to the cache its hit count has to
-exceed the lowest currently in the cache. This meant it could take a
-long time for the cache to adapt between varying IO patterns.
-
-smq doesn't maintain hit counts, so a lot of this problem just goes
-away. In addition it tracks performance of the hotspot queue, which
-is used to decide which blocks to promote. If the hotspot queue is
-performing badly then it starts moving entries more quickly between
-levels. This lets it adapt to new IO patterns very quickly.
-
-Performance:
-Testing smq shows substantially better performance than mq.
-
-cleaner
--------
-
-The cleaner writes back all dirty blocks in a cache to decommission it.
-
-Examples
-========
-
-The syntax for a table is:
- cache <metadata dev> <cache dev> <origin dev> <block size>
- <#feature_args> [<feature arg>]*
- <policy> <#policy_args> [<policy arg>]*
-
-The syntax to send a message using the dmsetup command is:
- dmsetup message <mapped device> 0 sequential_threshold 1024
- dmsetup message <mapped device> 0 random_threshold 8
-
-Using dmsetup:
- dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
- /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
- creates a 128GB large mapped device named 'blah' with the
- sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
deleted file mode 100644
index ff0841711fd5..000000000000
--- a/Documentation/device-mapper/cache.txt
+++ /dev/null
@@ -1,308 +0,0 @@
-Introduction
-============
-
-dm-cache is a device mapper target written by Joe Thornber, Heinz
-Mauelshagen, and Mike Snitzer.
-
-It aims to improve performance of a block device (eg, a spindle) by
-dynamically migrating some of its data to a faster, smaller device
-(eg, an SSD).
-
-This device-mapper solution allows us to insert this caching at
-different levels of the dm stack, for instance above the data device for
-a thin-provisioning pool. Caching solutions that are integrated more
-closely with the virtual memory system should give better performance.
-
-The target reuses the metadata library used in the thin-provisioning
-library.
-
-The decision as to what data to migrate and when is left to a plug-in
-policy module. Several of these have been written as we experiment,
-and we hope other people will contribute others for specific io
-scenarios (eg. a vm image server).
-
-Glossary
-========
-
- Migration - Movement of the primary copy of a logical block from one
- device to the other.
- Promotion - Migration from slow device to fast device.
- Demotion - Migration from fast device to slow device.
-
-The origin device always contains a copy of the logical block, which
-may be out of date or kept in sync with the copy on the cache device
-(depending on policy).
-
-Design
-======
-
-Sub-devices
------------
-
-The target is constructed by passing three devices to it (along with
-other parameters detailed later):
-
-1. An origin device - the big, slow one.
-
-2. A cache device - the small, fast one.
-
-3. A small metadata device - records which blocks are in the cache,
- which are dirty, and extra hints for use by the policy object.
- This information could be put on the cache device, but having it
- separate allows the volume manager to configure it differently,
- e.g. as a mirror for extra robustness. This metadata device may only
- be used by a single cache device.
-
-Fixed block size
-----------------
-
-The origin is divided up into blocks of a fixed size. This block size
-is configurable when you first create the cache. Typically we've been
-using block sizes of 256KB - 1024KB. The block size must be between 64
-sectors (32KB) and 2097152 sectors (1GB) and a multiple of 64 sectors (32KB).
-
-Having a fixed block size simplifies the target a lot. But it is
-something of a compromise. For instance, a small part of a block may be
-getting hit a lot, yet the whole block will be promoted to the cache.
-So large block sizes are bad because they waste cache space. And small
-block sizes are bad because they increase the amount of metadata (both
-in core and on disk).
-
-Cache operating modes
----------------------
-
-The cache has three operating modes: writeback, writethrough and
-passthrough.
-
-If writeback, the default, is selected then a write to a block that is
-cached will go only to the cache and the block will be marked dirty in
-the metadata.
-
-If writethrough is selected then a write to a cached block will not
-complete until it has hit both the origin and cache devices. Clean
-blocks should remain clean.
-
-If passthrough is selected, useful when the cache contents are not known
-to be coherent with the origin device, then all reads are served from
-the origin device (all reads miss the cache) and all writes are
-forwarded to the origin device; additionally, write hits cause cache
-block invalidates. To enable passthrough mode the cache must be clean.
-Passthrough mode allows a cache device to be activated without having to
-worry about coherency. Coherency that exists is maintained, although
-the cache will gradually cool as writes take place. If the coherency of
-the cache can later be verified, or established through use of the
-"invalidate_cblocks" message, the cache device can be transitioned to
-writethrough or writeback mode while still warm. Otherwise, the cache
-contents can be discarded prior to transitioning to the desired
-operating mode.
-
-A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache. Useful for decommissioning a cache or when
-shrinking a cache. Shrinking the cache's fast device requires all cache
-blocks, in the area of the cache being removed, to be clean. If the
-area being removed from the cache still contains dirty blocks the resize
-will fail. Care must be taken to never reduce the volume used for the
-cache's fast device until the cache is clean. This is of particular
-importance if writeback mode is used. Writethrough and passthrough
-modes already maintain a clean cache. Future support to partially clean
-the cache, above a specified threshold, will allow for keeping the cache
-warm and in writeback mode during resize.
-
-Migration throttling
---------------------
-
-Migrating data between the origin and cache device uses bandwidth.
-The user can set a throttle to prevent more than a certain amount of
-migration occurring at any one time. Currently we're not taking any
-account of normal io traffic going to the devices. More work needs
-doing here to avoid migrating during those peak io moments.
-
-For the time being, a message "migration_threshold <#sectors>"
-can be used to set the maximum number of sectors being migrated,
-the default being 2048 sectors (1MB).
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second. This
-means the cache behaves like a physical disk that has a volatile write
-cache. If power is lost you may lose some recent writes. The metadata
-should always be consistent in spite of any crash.
-
-The 'dirty' state for a cache block changes far too frequently for us
-to keep updating it on the fly. So we treat it as a hint. In normal
-operation it will be written when the dm device is suspended. If the
-system crashes all cache blocks will be assumed dirty when restarted.
-
-Per-block policy hints
-----------------------
-
-Policy plug-ins can store a chunk of data per cache block. It's up to
-the policy how big this chunk is, but it should be kept small. Like the
-dirty flags this data is lost if there's a crash so a safe fallback
-value should always be possible.
-
-Policy hints affect performance, not correctness.
-
-Policy messaging
-----------------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these. Device-mapper
-messages are used. Refer to cache-policies.txt.
-
-Discard bitset resolution
--------------------------
-
-We can avoid copying data during migration if we know the block has
-been discarded. A prime example of this is when mkfs discards the
-whole block device. We store a bitset tracking the discard state of
-blocks. However, we allow this bitset to have a different block size
-from the cache blocks. This is because we need to track the discard
-state for all of the origin device (compare with the dirty bitset
-which is just for the smaller cache device).
-
-Target interface
-================
-
-Constructor
------------
-
- cache <metadata dev> <cache dev> <origin dev> <block size>
- <#feature args> [<feature arg>]*
- <policy> <#policy args> [policy args]*
-
- metadata dev : fast device holding the persistent metadata
- cache dev : fast device holding cached data blocks
- origin dev : slow device holding original data blocks
- block size : cache unit size in sectors
-
- #feature args : number of feature arguments passed
- feature args : writethrough or passthrough (The default is writeback.)
-
- policy : the replacement policy to use
- #policy args : an even number of arguments corresponding to
- key/value pairs passed to the policy
- policy args : key/value pairs passed to the policy
- E.g. 'sequential_threshold 1024'
- See cache-policies.txt for details.
-
-Optional feature arguments are:
- writethrough : write through caching that prohibits cache block
- content from being different from origin block content.
- Without this argument, the default behaviour is to write
- back cache block contents later for performance reasons,
- so they may differ from the corresponding origin blocks.
-
- passthrough : a degraded mode useful for various cache coherency
- situations (e.g., rolling back snapshots of
- underlying storage). Reads and writes always go to
- the origin. If a write goes to a cached origin
- block, then the cache block is invalidated.
- To enable passthrough mode the cache must be clean.
-
- metadata2 : use version 2 of the metadata. This stores the dirty bits
- in a separate btree, which improves speed of shutting
- down the cache.
-
-A policy called 'default' is always registered. This is an alias for
-the policy we currently think is giving best all round performance.
-
-As the default policy could vary between kernels, if you are relying on
-the characteristics of a specific policy, always request it by name.
-
-Status
-------
-
-<metadata block size> <#used metadata blocks>/<#total metadata blocks>
-<cache block size> <#used cache blocks>/<#total cache blocks>
-<#read hits> <#read misses> <#write hits> <#write misses>
-<#demotions> <#promotions> <#dirty> <#features> <features>*
-<#core args> <core args>* <policy name> <#policy args> <policy args>*
-<cache metadata mode>
-
-metadata block size : Fixed block size for each metadata block in
- sectors
-#used metadata blocks : Number of metadata blocks used
-#total metadata blocks : Total number of metadata blocks
-cache block size : Configurable block size for the cache device
- in sectors
-#used cache blocks : Number of blocks resident in the cache
-#total cache blocks : Total number of cache blocks
-#read hits : Number of times a READ bio has been mapped
- to the cache
-#read misses : Number of times a READ bio has been mapped
- to the origin
-#write hits : Number of times a WRITE bio has been mapped
- to the cache
-#write misses : Number of times a WRITE bio has been
- mapped to the origin
-#demotions : Number of times a block has been removed
- from the cache
-#promotions : Number of times a block has been moved to
- the cache
-#dirty : Number of blocks in the cache that differ
- from the origin
-#feature args : Number of feature args to follow
-feature args : 'writethrough' (optional)
-#core args : Number of core arguments (must be even)
-core args : Key/value pairs for tuning the core
- e.g. migration_threshold
-policy name : Name of the policy
-#policy args : Number of policy arguments to follow (must be even)
-policy args : Key/value pairs e.g. sequential_threshold
-cache metadata mode : ro if read-only, rw if read-write
- In serious cases where even a read-only mode is deemed unsafe
- no further I/O will be permitted and the status will just
- contain the string 'Fail'. The userspace recovery tools
- should then be used.
-needs_check : 'needs_check' if set, '-' if not set
- A metadata operation has failed, resulting in the needs_check
- flag being set in the metadata's superblock. The metadata
- device must be deactivated and checked/repaired before the
- cache can be made fully operational again. '-' indicates
- needs_check is not set.
-
-Messages
---------
-
-Policies will have different tunables, specific to each one, so we
-need a generic way of getting and setting these. Device-mapper
-messages are used. (A sysfs interface would also be possible.)
-
-The message format is:
-
- <key> <value>
-
-E.g.
- dmsetup message my_cache 0 sequential_threshold 1024
-
-
-Invalidation is removing an entry from the cache without writing it
-back. Cache blocks can be invalidated via the invalidate_cblocks
-message, which takes an arbitrary number of cblock ranges. Each cblock
-range's end value is "one past the end", meaning 5-10 expresses a range
-of values from 5 to 9. Each cblock must be expressed as a decimal
-value, in the future a variant message that takes cblock ranges
-expressed in hexadecimal may be needed to better support efficient
-invalidation of larger caches. The cache must be in passthrough mode
-when invalidate_cblocks is used.
-
- invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
-
-E.g.
- dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
-
-Examples
-========
-
-The test suite can be found here:
-
-https://github.com/jthornber/device-mapper-test-suite
-
-dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
- /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
-dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
- /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
- mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt
deleted file mode 100644
index 6426c45273cb..000000000000
--- a/Documentation/device-mapper/delay.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-dm-delay
-========
-
-Device-Mapper's "delay" target delays reads and/or writes
-and maps them to different devices.
-
-Parameters:
- <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
- [<flush_device> <flush_offset> <flush_delay>]]
-
-With separate write parameters, the first set is only used for reads.
-Offsets are specified in sectors.
-Delays are specified in milliseconds.
-
-Example scripts
-===============
-[[
-#!/bin/sh
-# Create device delaying rw operation for 500ms
-echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
-]]
-
-[[
-#!/bin/sh
-# Create device delaying only write operation for 500ms and
-# splitting reads and writes to different devices $1 $2
-echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
-]]
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
deleted file mode 100644
index 3b3e1de21c9c..000000000000
--- a/Documentation/device-mapper/dm-crypt.txt
+++ /dev/null
@@ -1,162 +0,0 @@
-dm-crypt
-=========
-
-Device-Mapper's "crypt" target provides transparent encryption of block devices
-using the kernel crypto API.
-
-For a more detailed description of supported parameters see:
-https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
-
-Parameters: <cipher> <key> <iv_offset> <device path> \
- <offset> [<#opt_params> <opt_params>]
-
-<cipher>
- Encryption cipher, encryption mode and Initial Vector (IV) generator.
-
- The cipher specifications format is:
- cipher[:keycount]-chainmode-ivmode[:ivopts]
- Examples:
- aes-cbc-essiv:sha256
- aes-xts-plain64
- serpent-xts-plain64
-
- Cipher format also supports direct specification with kernel crypt API
- format (selected by capi: prefix). The IV specification is the same
- as for the first format type.
- This format is mainly used for specification of authenticated modes.
-
- The crypto API cipher specifications format is:
- capi:cipher_api_spec-ivmode[:ivopts]
- Examples:
- capi:cbc(aes)-essiv:sha256
- capi:xts(aes)-plain64
- Examples of authenticated modes:
- capi:gcm(aes)-random
- capi:authenc(hmac(sha256),xts(aes))-random
- capi:rfc7539(chacha20,poly1305)-random
-
- The /proc/crypto contains a list of curently loaded crypto modes.
-
-<key>
- Key used for encryption. It is encoded either as a hexadecimal number
- or it can be passed as <key_string> prefixed with single colon
- character (':') for keys residing in kernel keyring service.
- You can only use key sizes that are valid for the selected cipher
- in combination with the selected iv mode.
- Note that for some iv modes the key string can contain additional
- keys (for example IV seed) so the key contains more parts concatenated
- into a single string.
-
-<key_string>
- The kernel keyring key is identified by string in following format:
- <key_size>:<key_type>:<key_description>.
-
-<key_size>
- The encryption key size in bytes. The kernel key payload size must match
- the value passed in <key_size>.
-
-<key_type>
- Either 'logon' or 'user' kernel key type.
-
-<key_description>
- The kernel keyring key description crypt target should look for
- when loading key of <key_type>.
-
-<keycount>
- Multi-key compatibility mode. You can define <keycount> keys and
- then sectors are encrypted according to their offsets (sector 0 uses key0;
- sector 1 uses key1 etc.). <keycount> must be a power of two.
-
-<iv_offset>
- The IV offset is a sector count that is added to the sector number
- before creating the IV.
-
-<device path>
- This is the device that is going to be used as backend and contains the
- encrypted data. You can specify it as a path like /dev/xxx or a device
- number <major>:<minor>.
-
-<offset>
- Starting sector within the device where the encrypted data begins.
-
-<#opt_params>
- Number of optional parameters. If there are no optional parameters,
- the optional paramaters section can be skipped or #opt_params can be zero.
- Otherwise #opt_params is the number of following arguments.
-
- Example of optional parameters section:
- 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
-
-allow_discards
- Block discard requests (a.k.a. TRIM) are passed through the crypt device.
- The default is to ignore discard requests.
-
- WARNING: Assess the specific security risks carefully before enabling this
- option. For example, allowing discards on encrypted devices may lead to
- the leak of information about the ciphertext device (filesystem type,
- used space etc.) if the discarded blocks can be located easily on the
- device later.
-
-same_cpu_crypt
- Perform encryption using the same cpu that IO was submitted on.
- The default is to use an unbound workqueue so that encryption work
- is automatically balanced between available CPUs.
-
-submit_from_crypt_cpus
- Disable offloading writes to a separate thread after encryption.
- There are some situations where offloading write bios from the
- encryption threads to a single thread degrades performance
- significantly. The default is to offload write bios to the same
- thread because it benefits CFQ to have writes submitted using the
- same context.
-
-integrity:<bytes>:<type>
- The device requires additional <bytes> metadata per-sector stored
- in per-bio integrity structure. This metadata must by provided
- by underlying dm-integrity target.
-
- The <type> can be "none" if metadata is used only for persistent IV.
-
- For Authenticated Encryption with Additional Data (AEAD)
- the <type> is "aead". An AEAD mode additionally calculates and verifies
- integrity for the encrypted device. The additional space is then
- used for storing authentication tag (and persistent IV if needed).
-
-sector_size:<bytes>
- Use <bytes> as the encryption unit instead of 512 bytes sectors.
- This option can be in range 512 - 4096 bytes and must be power of two.
- Virtual device will announce this size as a minimal IO and logical sector.
-
-iv_large_sectors
- IV generators will use sector number counted in <sector_size> units
- instead of default 512 bytes sectors.
-
- For example, if <sector_size> is 4096 bytes, plain64 IV for the second
- sector will be 8 (without flag) and 1 if iv_large_sectors is present.
- The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
- if this flag is specified.
-
-Example scripts
-===============
-LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
-encryption with dm-crypt using the 'cryptsetup' utility, see
-https://gitlab.com/cryptsetup/cryptsetup
-
-[[
-#!/bin/sh
-# Create a crypt device using dmsetup
-dmsetup create crypt1 --table "0 `blockdev --getsz $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
-]]
-
-[[
-#!/bin/sh
-# Create a crypt device using dmsetup when encryption key is stored in keyring service
-dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
-]]
-
-[[
-#!/bin/sh
-# Create a crypt device using cryptsetup and LUKS header with default cipher
-cryptsetup luksFormat $1
-cryptsetup luksOpen $1 crypt1
-]]
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
deleted file mode 100644
index 9f0e247d0877..000000000000
--- a/Documentation/device-mapper/dm-flakey.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-dm-flakey
-=========
-
-This target is the same as the linear target except that it exhibits
-unreliable behaviour periodically. It's been found useful in simulating
-failing devices for testing purposes.
-
-Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then exhibits unreliable behaviour for <down
-interval> seconds, and then this cycle repeats.
-
-Also, consider using this in combination with the dm-delay target too,
-which can delay reads and writes and/or send them to different
-underlying devices.
-
-Table parameters
-----------------
- <dev path> <offset> <up interval> <down interval> \
- [<num_features> [<feature arguments>]]
-
-Mandatory parameters:
- <dev path>: Full pathname to the underlying block-device, or a
- "major:minor" device-number.
- <offset>: Starting sector within the device.
- <up interval>: Number of seconds device is available.
- <down interval>: Number of seconds device returns errors.
-
-Optional feature parameters:
- If no feature parameters are present, during the periods of
- unreliability, all I/O returns errors.
-
- drop_writes:
- All write I/O is silently ignored.
- Read I/O is handled correctly.
-
- error_writes:
- All write I/O is failed with an error signalled.
- Read I/O is handled correctly.
-
- corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
- During <down interval>, replace <Nth_byte> of the data of
- each matching bio with <value>.
-
- <Nth_byte>: The offset of the byte to replace.
- Counting starts at 1, to replace the first byte.
- <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
- 'w' is incompatible with drop_writes.
- <value>: The value (from 0-255) to write.
- <flags>: Perform the replacement only if bio->bi_opf has all the
- selected flags set.
-
-Examples:
- corrupt_bio_byte 32 r 1 0
- - replaces the 32nd byte of READ bios with the value 1
-
- corrupt_bio_byte 224 w 0 32
- - replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
deleted file mode 100644
index 297251b0d2d5..000000000000
--- a/Documentation/device-mapper/dm-integrity.txt
+++ /dev/null
@@ -1,203 +0,0 @@
-The dm-integrity target emulates a block device that has additional
-per-sector tags that can be used for storing integrity information.
-
-A general problem with storing integrity tags with every sector is that
-writing the sector and the integrity tag must be atomic - i.e. in case of
-crash, either both sector and integrity tag or none of them is written.
-
-To guarantee write atomicity, the dm-integrity target uses journal, it
-writes sector data and integrity tags into a journal, commits the journal
-and then copies the data and integrity tags to their respective location.
-
-The dm-integrity target can be used with the dm-crypt target - in this
-situation the dm-crypt target creates the integrity data and passes them
-to the dm-integrity target via bio_integrity_payload attached to the bio.
-In this mode, the dm-crypt and dm-integrity targets provide authenticated
-disk encryption - if the attacker modifies the encrypted device, an I/O
-error is returned instead of random data.
-
-The dm-integrity target can also be used as a standalone target, in this
-mode it calculates and verifies the integrity tag internally. In this
-mode, the dm-integrity target can be used to detect silent data
-corruption on the disk or in the I/O path.
-
-
-When loading the target for the first time, the kernel driver will format
-the device. But it will only format the device if the superblock contains
-zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
-target can't be loaded.
-
-To use the target for the first time:
-1. overwrite the superblock with zeroes
-2. load the dm-integrity target with one-sector size, the kernel driver
- will format the device
-3. unload the dm-integrity target
-4. read the "provided_data_sectors" value from the superblock
-5. load the dm-integrity target with the the target size
- "provided_data_sectors"
-6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
- with the size "provided_data_sectors"
-
-
-Target arguments:
-
-1. the underlying block device
-
-2. the number of reserved sector at the beginning of the device - the
- dm-integrity won't read of write these sectors
-
-3. the size of the integrity tag (if "-" is used, the size is taken from
- the internal-hash algorithm)
-
-4. mode:
- D - direct writes (without journal) - in this mode, journaling is
- not used and data sectors and integrity tags are written
- separately. In case of crash, it is possible that the data
- and integrity tag doesn't match.
- J - journaled writes - data and integrity tags are written to the
- journal and atomicity is guaranteed. In case of crash,
- either both data and tag or none of them are written. The
- journaled mode degrades write throughput twice because the
- data have to be written twice.
- R - recovery mode - in this mode, journal is not replayed,
- checksums are not checked and writes to the device are not
- allowed. This mode is useful for data recovery if the
- device cannot be activated in any of the other standard
- modes.
-
-5. the number of additional arguments
-
-Additional arguments:
-
-journal_sectors:number
- The size of journal, this argument is used only if formatting the
- device. If the device is already formatted, the value from the
- superblock is used.
-
-interleave_sectors:number
- The number of interleaved sectors. This values is rounded down to
- a power of two. If the device is already formatted, the value from
- the superblock is used.
-
-buffer_sectors:number
- The number of sectors in one buffer. The value is rounded down to
- a power of two.
-
- The tag area is accessed using buffers, the buffer size is
- configurable. The large buffer size means that the I/O size will
- be larger, but there could be less I/Os issued.
-
-journal_watermark:number
- The journal watermark in percents. When the size of the journal
- exceeds this watermark, the thread that flushes the journal will
- be started.
-
-commit_time:number
- Commit time in milliseconds. When this time passes, the journal is
- written. The journal is also written immediatelly if the FLUSH
- request is received.
-
-internal_hash:algorithm(:key) (the key is optional)
- Use internal hash or crc.
- When this argument is used, the dm-integrity target won't accept
- integrity tags from the upper target, but it will automatically
- generate and verify the integrity tags.
-
- You can use a crc algorithm (such as crc32), then integrity target
- will protect the data against accidental corruption.
- You can also use a hmac algorithm (for example
- "hmac(sha256):0123456789abcdef"), in this mode it will provide
- cryptographic authentication of the data without encryption.
-
- When this argument is not used, the integrity tags are accepted
- from an upper layer target, such as dm-crypt. The upper layer
- target should check the validity of the integrity tags.
-
-recalculate
- Recalculate the integrity tags automatically. It is only valid
- when using internal hash.
-
-journal_crypt:algorithm(:key) (the key is optional)
- Encrypt the journal using given algorithm to make sure that the
- attacker can't read the journal. You can use a block cipher here
- (such as "cbc(aes)") or a stream cipher (for example "chacha20",
- "salsa20", "ctr(aes)" or "ecb(arc4)").
-
- The journal contains history of last writes to the block device,
- an attacker reading the journal could see the last sector nubmers
- that were written. From the sector numbers, the attacker can infer
- the size of files that were written. To protect against this
- situation, you can encrypt the journal.
-
-journal_mac:algorithm(:key) (the key is optional)
- Protect sector numbers in the journal from accidental or malicious
- modification. To protect against accidental modification, use a
- crc algorithm, to protect against malicious modification, use a
- hmac algorithm with a key.
-
- This option is not needed when using internal-hash because in this
- mode, the integrity of journal entries is checked when replaying
- the journal. Thus, modified sector number would be detected at
- this stage.
-
-block_size:number
- The size of a data block in bytes. The larger the block size the
- less overhead there is for per-block integrity metadata.
- Supported values are 512, 1024, 2048 and 4096 bytes. If not
- specified the default block size is 512 bytes.
-
-The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
-be changed when reloading the target (load an inactive table and swap the
-tables with suspend and resume). The other arguments should not be changed
-when reloading the target because the layout of disk data depend on them
-and the reloaded target would be non-functional.
-
-
-The layout of the formatted block device:
-* reserved sectors (they are not used by this target, they can be used for
- storing LUKS metadata or for other purpose), the size of the reserved
- area is specified in the target arguments
-* superblock (4kiB)
- * magic string - identifies that the device was formatted
- * version
- * log2(interleave sectors)
- * integrity tag size
- * the number of journal sections
- * provided data sectors - the number of sectors that this target
- provides (i.e. the size of the device minus the size of all
- metadata and padding). The user of this target should not send
- bios that access data beyond the "provided data sectors" limit.
- * flags - a flag is set if journal_mac is used
-* journal
- The journal is divided into sections, each section contains:
- * metadata area (4kiB), it contains journal entries
- every journal entry contains:
- * logical sector (specifies where the data and tag should
- be written)
- * last 8 bytes of data
- * integrity tag (the size is specified in the superblock)
- every metadata sector ends with
- * mac (8-bytes), all the macs in 8 metadata sectors form a
- 64-byte value. It is used to store hmac of sector
- numbers in the journal section, to protect against a
- possibility that the attacker tampers with sector
- numbers in the journal.
- * commit id
- * data area (the size is variable; it depends on how many journal
- entries fit into the metadata area)
- every sector in the data area contains:
- * data (504 bytes of data, the last 8 bytes are stored in
- the journal entry)
- * commit id
- To test if the whole journal section was written correctly, every
- 512-byte sector of the journal ends with 8-byte commit id. If the
- commit id matches on all sectors in a journal section, then it is
- assumed that the section was written correctly. If the commit id
- doesn't match, the section was written partially and it should not
- be replayed.
-* one or more runs of interleaved tags and data. Each run contains:
- * tag area - it contains integrity tags. There is one tag for each
- sector in the data area
- * data area - it contains data sectors. The number of data sectors
- in one run must be a power of two. log2 of this value is stored
- in the superblock.
diff --git a/Documentation/device-mapper/dm-io.txt b/Documentation/device-mapper/dm-io.txt
deleted file mode 100644
index 3b5d9a52cdcf..000000000000
--- a/Documentation/device-mapper/dm-io.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-dm-io
-=====
-
-Dm-io provides synchronous and asynchronous I/O services. There are three
-types of I/O services available, and each type has a sync and an async
-version.
-
-The user must set up an io_region structure to describe the desired location
-of the I/O. Each io_region indicates a block-device along with the starting
-sector and size of the region.
-
- struct io_region {
- struct block_device *bdev;
- sector_t sector;
- sector_t count;
- };
-
-Dm-io can read from one io_region or write to one or more io_regions. Writes
-to multiple regions are specified by an array of io_region structures.
-
-The first I/O service type takes a list of memory pages as the data buffer for
-the I/O, along with an offset into the first page.
-
- struct page_list {
- struct page_list *next;
- struct page *page;
- };
-
- int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- unsigned long *error_bits);
- int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- io_notify_fn fn, void *context);
-
-The second I/O service type takes an array of bio vectors as the data buffer
-for the I/O. This service can be handy if the caller has a pre-assembled bio,
-but wants to direct different portions of the bio to different devices.
-
- int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
- int rw, struct bio_vec *bvec,
- unsigned long *error_bits);
- int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
- int rw, struct bio_vec *bvec,
- io_notify_fn fn, void *context);
-
-The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
-data buffer for the I/O. This service can be handy if the caller needs to do
-I/O to a large region but doesn't want to allocate a large number of individual
-memory pages.
-
- int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, unsigned long *error_bits);
- int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, io_notify_fn fn, void *context);
-
-Callers of the asynchronous I/O services must include the name of a completion
-callback routine and a pointer to some context data for the I/O.
-
- typedef void (*io_notify_fn)(unsigned long error, void *context);
-
-The "error" parameter in this callback, as well as the "*error" parameter in
-all of the synchronous versions, is a bitset (instead of a simple error value).
-In the case of an write-I/O to multiple regions, this bitset allows dm-io to
-indicate success or failure on each individual region.
-
-Before using any of the dm-io services, the user should call dm_io_get()
-and specify the number of pages they expect to perform I/O on concurrently.
-Dm-io will attempt to resize its mempool to make sure enough pages are
-always available in order to avoid unnecessary waiting while performing I/O.
-
-When the user is finished using the dm-io services, they should call
-dm_io_put() and specify the same number of pages that were given on the
-dm_io_get() call.
-
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
deleted file mode 100644
index c155ac569c44..000000000000
--- a/Documentation/device-mapper/dm-log.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Device-Mapper Logging
-=====================
-The device-mapper logging code is used by some of the device-mapper
-RAID targets to track regions of the disk that are not consistent.
-A region (or portion of the address space) of the disk may be
-inconsistent because a RAID stripe is currently being operated on or
-a machine died while the region was being altered. In the case of
-mirrors, a region would be considered dirty/inconsistent while you
-are writing to it because the writes need to be replicated for all
-the legs of the mirror and may not reach the legs at the same time.
-Once all writes are complete, the region is considered clean again.
-
-There is a generic logging interface that the device-mapper RAID
-implementations use to perform logging operations (see
-dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different
-logging implementations are available and provide different
-capabilities. The list includes:
-
-Type Files
-==== =====
-disk drivers/md/dm-log.c
-core drivers/md/dm-log.c
-userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
-
-The "disk" log type
--------------------
-This log implementation commits the log state to disk. This way, the
-logging state survives reboots/crashes.
-
-The "core" log type
--------------------
-This log implementation keeps the log state in memory. The log state
-will not survive a reboot or crash, but there may be a small boost in
-performance. This method can also be used if no storage device is
-available for storing log state.
-
-The "userspace" log type
-------------------------
-This log type simply provides a way to export the log API to userspace,
-so log implementations can be done there. This is done by forwarding most
-logging requests to userspace, where a daemon receives and processes the
-request.
-
-The structure used for communication between kernel and userspace are
-located in include/linux/dm-log-userspace.h. Due to the frequency,
-diversity, and 2-way communication nature of the exchanges between
-kernel and userspace, 'connector' is used as the interface for
-communication.
-
-There are currently two userspace log implementations that leverage this
-framework - "clustered-disk" and "clustered-core". These implementations
-provide a cluster-coherent log for shared-storage. Device-mapper mirroring
-can be used in a shared-storage environment when the cluster log implementations
-are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt
deleted file mode 100644
index f4db2562175c..000000000000
--- a/Documentation/device-mapper/dm-queue-length.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-dm-queue-length
-===============
-
-dm-queue-length is a path selector module for device-mapper targets,
-which selects a path with the least number of in-flight I/Os.
-The path selector name is 'queue-length'.
-
-Table parameters for each path: [<repeat_count>]
- <repeat_count>: The number of I/Os to dispatch using the selected
- path before switching to the next path.
- If not given, internal default is used. To check
- the default value, see the activated table.
-
-Status for each path: <status> <fail-count> <in-flight>
- <status>: 'A' if the path is active, 'F' if the path is failed.
- <fail-count>: The number of path failures.
- <in-flight>: The number of in-flight I/Os on the path.
-
-
-Algorithm
-=========
-
-dm-queue-length increments/decrements 'in-flight' when an I/O is
-dispatched/completed respectively.
-dm-queue-length selects a path with the minimum 'in-flight'.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128.
-
-# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
- dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
deleted file mode 100644
index 52a719b49afd..000000000000
--- a/Documentation/device-mapper/dm-raid.txt
+++ /dev/null
@@ -1,354 +0,0 @@
-dm-raid
-=======
-
-The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
-It allows the MD RAID drivers to be accessed using a device-mapper
-interface.
-
-
-Mapping Table Interface
------------------------
-The target is named "raid" and it accepts the following parameters:
-
- <raid_type> <#raid_params> <raid_params> \
- <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
-
-<raid_type>:
- raid0 RAID0 striping (no resilience)
- raid1 RAID1 mirroring
- raid4 RAID4 with dedicated last parity disk
- raid5_n RAID5 with dedicated last parity disk supporting takeover
- Same as raid4
- -Transitory layout
- raid5_la RAID5 left asymmetric
- - rotating parity 0 with data continuation
- raid5_ra RAID5 right asymmetric
- - rotating parity N with data continuation
- raid5_ls RAID5 left symmetric
- - rotating parity 0 with data restart
- raid5_rs RAID5 right symmetric
- - rotating parity N with data restart
- raid6_zr RAID6 zero restart
- - rotating parity zero (left-to-right) with data restart
- raid6_nr RAID6 N restart
- - rotating parity N (right-to-left) with data restart
- raid6_nc RAID6 N continue
- - rotating parity N (right-to-left) with data continuation
- raid6_n_6 RAID6 with dedicate parity disks
- - parity and Q-syndrome on the last 2 disks;
- layout for takeover from/to raid4/raid5_n
- raid6_la_6 Same as "raid_la" plus dedicated last Q-syndrome disk
- - layout for takeover from raid5_la from/to raid6
- raid6_ra_6 Same as "raid5_ra" dedicated last Q-syndrome disk
- - layout for takeover from raid5_ra from/to raid6
- raid6_ls_6 Same as "raid5_ls" dedicated last Q-syndrome disk
- - layout for takeover from raid5_ls from/to raid6
- raid6_rs_6 Same as "raid5_rs" dedicated last Q-syndrome disk
- - layout for takeover from raid5_rs from/to raid6
- raid10 Various RAID10 inspired algorithms chosen by additional params
- (see raid10_format and raid10_copies below)
- - RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
- - RAID1E: Integrated Adjacent Stripe Mirroring
- - RAID1E: Integrated Offset Stripe Mirroring
- - and other similar RAID10 variants
-
- Reference: Chapter 4 of
- http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
-
-<#raid_params>: The number of parameters that follow.
-
-<raid_params> consists of
- Mandatory parameters:
- <chunk_size>: Chunk size in sectors. This parameter is often known as
- "stripe size". It is the only mandatory parameter and
- is placed first.
-
- followed by optional parameters (in any order):
- [sync|nosync] Force or prevent RAID initialization.
-
- [rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
-
- [daemon_sleep <ms>]
- Interval between runs of the bitmap daemon that
- clear bits. A longer interval means less bitmap I/O but
- resyncing after a failure is likely to take longer.
-
- [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
- [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
- [write_mostly <idx>] Mark drive index 'idx' write-mostly.
- [max_write_behind <sectors>] See '--write-behind=' (man mdadm)
- [stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
- [region_size <sectors>]
- The region_size multiplied by the number of regions is the
- logical size of the array. The bitmap records the device
- synchronisation state for each region.
-
- [raid10_copies <# copies>]
- [raid10_format <near|far|offset>]
- These two options are used to alter the default layout of
- a RAID10 configuration. The number of copies is can be
- specified, but the default is 2. There are also three
- variations to how the copies are laid down - the default
- is "near". Near copies are what most people think of with
- respect to mirroring. If these options are left unspecified,
- or 'raid10_copies 2' and/or 'raid10_format near' are given,
- then the layouts for 2, 3 and 4 devices are:
- 2 drives 3 drives 4 drives
- -------- ---------- --------------
- A1 A1 A1 A1 A2 A1 A1 A2 A2
- A2 A2 A2 A3 A3 A3 A3 A4 A4
- A3 A3 A4 A4 A5 A5 A5 A6 A6
- A4 A4 A5 A6 A6 A7 A7 A8 A8
- .. .. .. .. .. .. .. .. ..
- The 2-device layout is equivalent 2-way RAID1. The 4-device
- layout is what a traditional RAID10 would look like. The
- 3-device layout is what might be called a 'RAID1E - Integrated
- Adjacent Stripe Mirroring'.
-
- If 'raid10_copies 2' and 'raid10_format far', then the layouts
- for 2, 3 and 4 devices are:
- 2 drives 3 drives 4 drives
- -------- -------------- --------------------
- A1 A2 A1 A2 A3 A1 A2 A3 A4
- A3 A4 A4 A5 A6 A5 A6 A7 A8
- A5 A6 A7 A8 A9 A9 A10 A11 A12
- .. .. .. .. .. .. .. .. ..
- A2 A1 A3 A1 A2 A2 A1 A4 A3
- A4 A3 A6 A4 A5 A6 A5 A8 A7
- A6 A5 A9 A7 A8 A10 A9 A12 A11
- .. .. .. .. .. .. .. .. ..
-
- If 'raid10_copies 2' and 'raid10_format offset', then the
- layouts for 2, 3 and 4 devices are:
- 2 drives 3 drives 4 drives
- -------- ------------ -----------------
- A1 A2 A1 A2 A3 A1 A2 A3 A4
- A2 A1 A3 A1 A2 A2 A1 A4 A3
- A3 A4 A4 A5 A6 A5 A6 A7 A8
- A4 A3 A6 A4 A5 A6 A5 A8 A7
- A5 A6 A7 A8 A9 A9 A10 A11 A12
- A6 A5 A9 A7 A8 A10 A9 A12 A11
- .. .. .. .. .. .. .. .. ..
- Here we see layouts closely akin to 'RAID1E - Integrated
- Offset Stripe Mirroring'.
-
- [delta_disks <N>]
- The delta_disks option value (-251 < N < +251) triggers
- device removal (negative value) or device addition (positive
- value) to any reshape supporting raid levels 4/5/6 and 10.
- RAID levels 4/5/6 allow for addition of devices (metadata
- and data device tuple), raid10_near and raid10_offset only
- allow for device addition. raid10_far does not support any
- reshaping at all.
- A minimum of devices have to be kept to enforce resilience,
- which is 3 devices for raid4/5 and 4 devices for raid6.
-
- [data_offset <sectors>]
- This option value defines the offset into each data device
- where the data starts. This is used to provide out-of-place
- reshaping space to avoid writing over data whilst
- changing the layout of stripes, hence an interruption/crash
- may happen at any time without the risk of losing data.
- E.g. when adding devices to an existing raid set during
- forward reshaping, the out-of-place space will be allocated
- at the beginning of each raid device. The kernel raid4/5/6/10
- MD personalities supporting such device addition will read the data from
- the existing first stripes (those with smaller number of stripes)
- starting at data_offset to fill up a new stripe with the larger
- number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
- and write that new stripe to offset 0. Same will be applied to all
- N-1 other new stripes. This out-of-place scheme is used to change
- the RAID type (i.e. the allocation algorithm) as well, e.g.
- changing from raid5_ls to raid5_n.
-
- [journal_dev <dev>]
- This option adds a journal device to raid4/5/6 raid sets and
- uses it to close the 'write hole' caused by the non-atomic updates
- to the component devices which can cause data loss during recovery.
- The journal device is used as writethrough thus causing writes to
- be throttled versus non-journaled raid4/5/6 sets.
- Takeover/reshape is not possible with a raid4/5/6 journal device;
- it has to be deconfigured before requesting these.
-
- [journal_mode <mode>]
- This option sets the caching mode on journaled raid4/5/6 raid sets
- (see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
- If 'writeback' is selected the journal device has to be resilient
- and must not suffer from the 'write hole' problem itself (e.g. use
- raid1 or raid10) to avoid a single point of failure.
-
-<#raid_devs>: The number of devices composing the array.
- Each device consists of two entries. The first is the device
- containing the metadata (if any); the second is the one containing the
- data. A Maximum of 64 metadata/data device entries are supported
- up to target version 1.8.0.
- 1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
-
- If a drive has failed or is missing at creation time, a '-' can be
- given for both the metadata and data drives for a given position.
-
-
-Example Tables
---------------
-# RAID4 - 4 data drives, 1 parity (no metadata devices)
-# No metadata devices specified to hold superblock/bitmap info
-# Chunk size of 1MiB
-# (Lines separated for easy reading)
-
-0 1960893648 raid \
- raid4 1 2048 \
- 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
-
-# RAID4 - 4 data drives, 1 parity (with metadata devices)
-# Chunk size of 1MiB, force RAID initialization,
-# min recovery rate at 20 kiB/sec/disk
-
-0 1960893648 raid \
- raid4 4 2048 sync min_recovery_rate 20 \
- 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
-
-
-Status Output
--------------
-'dmsetup table' displays the table used to construct the mapping.
-The optional parameters are always printed in the order listed
-above with "sync" or "nosync" always output ahead of the other
-arguments, regardless of the order used when originally loading the table.
-Arguments that can be repeated are ordered by value.
-
-
-'dmsetup status' yields information on the state and health of the array.
-The output is as follows (normally a single line, but expanded here for
-clarity):
-1: <s> <l> raid \
-2: <raid_type> <#devices> <health_chars> \
-3: <sync_ratio> <sync_action> <mismatch_cnt>
-
-Line 1 is the standard output produced by device-mapper.
-Line 2 & 3 are produced by the raid target and are best explained by example:
- 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
-Here we can see the RAID type is raid4, there are 5 devices - all of
-which are 'A'live, and the array is 2/490221568 complete with its initial
-recovery. Here is a fuller description of the individual fields:
- <raid_type> Same as the <raid_type> used to create the array.
- <health_chars> One char for each device, indicating: 'A' = alive and
- in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
- <sync_ratio> The ratio indicating how much of the array has undergone
- the process described by 'sync_action'. If the
- 'sync_action' is "check" or "repair", then the process
- of "resync" or "recover" can be considered complete.
- <sync_action> One of the following possible states:
- idle - No synchronization action is being performed.
- frozen - The current action has been halted.
- resync - Array is undergoing its initial synchronization
- or is resynchronizing after an unclean shutdown
- (possibly aided by a bitmap).
- recover - A device in the array is being rebuilt or
- replaced.
- check - A user-initiated full check of the array is
- being performed. All blocks are read and
- checked for consistency. The number of
- discrepancies found are recorded in
- <mismatch_cnt>. No changes are made to the
- array by this action.
- repair - The same as "check", but discrepancies are
- corrected.
- reshape - The array is undergoing a reshape.
- <mismatch_cnt> The number of discrepancies found between mirror copies
- in RAID1/10 or wrong parity values found in RAID4/5/6.
- This value is valid only after a "check" of the array
- is performed. A healthy array has a 'mismatch_cnt' of 0.
- <data_offset> The current data offset to the start of the user data on
- each component device of a raid set (see the respective
- raid parameter to support out-of-place reshaping).
- <journal_char> 'A' - active write-through journal device.
- 'a' - active write-back journal device.
- 'D' - dead journal device.
- '-' - no journal device.
-
-
-Message Interface
------------------
-The dm-raid target will accept certain actions through the 'message' interface.
-('man dmsetup' for more information on the message interface.) These actions
-include:
- "idle" - Halt the current sync action.
- "frozen" - Freeze the current sync action.
- "resync" - Initiate/continue a resync.
- "recover"- Initiate/continue a recover process.
- "check" - Initiate a check (i.e. a "scrub") of the array.
- "repair" - Initiate a repair of the array.
-
-
-Discard Support
----------------
-The implementation of discard support among hardware vendors varies.
-When a block is discarded, some storage devices will return zeroes when
-the block is read. These devices set the 'discard_zeroes_data'
-attribute. Other devices will return random data. Confusingly, some
-devices that advertise 'discard_zeroes_data' will not reliably return
-zeroes when discarded blocks are read! Since RAID 4/5/6 uses blocks
-from a number of devices to calculate parity blocks and (for performance
-reasons) relies on 'discard_zeroes_data' being reliable, it is important
-that the devices be consistent. Blocks may be discarded in the middle
-of a RAID 4/5/6 stripe and if subsequent read results are not
-consistent, the parity blocks may be calculated differently at any time;
-making the parity blocks useless for redundancy. It is important to
-understand how your hardware behaves with discards if you are going to
-enable discards with RAID 4/5/6.
-
-Since the behavior of storage devices is unreliable in this respect,
-even when reporting 'discard_zeroes_data', by default RAID 4/5/6
-discard support is disabled -- this ensures data integrity at the
-expense of losing some performance.
-
-Storage devices that properly support 'discard_zeroes_data' are
-increasingly whitelisted in the kernel and can thus be trusted.
-
-For trusted devices, the following dm-raid module parameter can be set
-to safely enable discard support for RAID 4/5/6:
- 'devices_handle_discards_safely'
-
-
-Version History
----------------
-1.0.0 Initial version. Support for RAID 4/5/6
-1.1.0 Added support for RAID 1
-1.2.0 Handle creation of arrays that contain failed devices.
-1.3.0 Added support for RAID 10
-1.3.1 Allow device replacement/rebuild for RAID 10
-1.3.2 Fix/improve redundancy checking for RAID10
-1.4.0 Non-functional change. Removes arg from mapping function.
-1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
-1.4.2 Add RAID10 "far" and "offset" algorithm support.
-1.5.0 Add message interface to allow manipulation of the sync_action.
- New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
-1.5.1 Add ability to restore transiently failed devices on resume.
-1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
-1.6.0 Add discard support (and devices_handle_discard_safely module param).
-1.7.0 Add support for MD RAID0 mappings.
-1.8.0 Explicitly check for compatible flags in the superblock metadata
- and reject to start the raid set if any are set by a newer
- target version, thus avoiding data corruption on a raid set
- with a reshape in progress.
-1.9.0 Add support for RAID level takeover/reshape/region size
- and set size reduction.
-1.9.1 Fix activation of existing RAID 4/10 mapped devices
-1.9.2 Don't emit '- -' on the status table line in case the constructor
- fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
- 'D' on the status line. If '- -' is passed into the constructor, emit
- '- -' on the table line and '-' as the status line health character.
-1.10.0 Add support for raid4/5/6 journal device
-1.10.1 Fix data corruption on reshape request
-1.11.0 Fix table line argument order
- (wrong raid10_copies/raid10_format sequence)
-1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
-1.12.1 Fix for MD deadlock between mddev_suspend() and md_write_start() available
-1.13.0 Fix dev_health status at end of "recover" (was 'a', now 'A')
-1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an
- state races.
-1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen
-1.14.0 Fix reshape race on small devices. Fix stripe adding reshape
- deadlock/potential data corruption. Update superblock when
- specific devices are requested via rebuild. Fix RAID leg
- rebuild errors.
diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt
deleted file mode 100644
index fb1d4a0cf122..000000000000
--- a/Documentation/device-mapper/dm-service-time.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-dm-service-time
-===============
-
-dm-service-time is a path selector module for device-mapper targets,
-which selects a path with the shortest estimated service time for
-the incoming I/O.
-
-The service time for each path is estimated by dividing the total size
-of in-flight I/Os on a path with the performance value of the path.
-The performance value is a relative throughput value among all paths
-in a path-group, and it can be specified as a table argument.
-
-The path selector name is 'service-time'.
-
-Table parameters for each path: [<repeat_count> [<relative_throughput>]]
- <repeat_count>: The number of I/Os to dispatch using the selected
- path before switching to the next path.
- If not given, internal default is used. To check
- the default value, see the activated table.
- <relative_throughput>: The relative throughput value of the path
- among all paths in the path-group.
- The valid range is 0-100.
- If not given, minimum value '1' is used.
- If '0' is given, the path isn't selected while
- other paths having a positive value are available.
-
-Status for each path: <status> <fail-count> <in-flight-size> \
- <relative_throughput>
- <status>: 'A' if the path is active, 'F' if the path is failed.
- <fail-count>: The number of path failures.
- <in-flight-size>: The size of in-flight I/Os on the path.
- <relative_throughput>: The relative throughput value of the path
- among all paths in the path-group.
-
-
-Algorithm
-=========
-
-dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
-dispatched and subtracts when completed.
-Basically, dm-service-time selects a path having minimum service time
-which is calculated by:
-
- ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
-
-However, some optimizations below are used to reduce the calculation
-as much as possible.
-
- 1. If the paths have the same 'relative_throughput', skip
- the division and just compare the 'in-flight-size'.
-
- 2. If the paths have the same 'in-flight-size', skip the division
- and just compare the 'relative_throughput'.
-
- 3. If some paths have non-zero 'relative_throughput' and others
- have zero 'relative_throughput', ignore those paths with zero
- 'relative_throughput'.
-
-If such optimizations can't be applied, calculate service time, and
-compare service time.
-If calculated service time is equal, the path having maximum
-'relative_throughput' may be better. So compare 'relative_throughput'
-then.
-
-
-Examples
-========
-In case that 2 paths (sda and sdb) are used with repeat_count == 128
-and sda has an average throughput 1GB/s and sdb has 4GB/s,
-'relative_throughput' value may be '1' for sda and '4' for sdb.
-
-# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
- dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
-
-
-Or '2' for sda and '8' for sdb would be also true.
-
-# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
- dmsetup create test
-#
-# dmsetup table
-test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
-#
-# dmsetup status
-test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
deleted file mode 100644
index 07edbd85c714..000000000000
--- a/Documentation/device-mapper/dm-uevent.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-The device-mapper uevent code adds the capability to device-mapper to create
-and send kobject uevents (uevents). Previously device-mapper events were only
-available through the ioctl interface. The advantage of the uevents interface
-is the event contains environment attributes providing increased context for
-the event avoiding the need to query the state of the device-mapper device after
-the event is received.
-
-There are two functions currently for device-mapper events. The first function
-listed creates the event and the second function sends the event(s).
-
-void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
- const char *path, unsigned nr_valid_paths)
-
-void dm_send_uevents(struct list_head *events, struct kobject *kobj)
-
-
-The variables added to the uevent environment are:
-
-Variable Name: DM_TARGET
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description:
-Value: Name of device-mapper target that generated the event.
-
-Variable Name: DM_ACTION
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description:
-Value: Device-mapper specific action that caused the uevent action.
- PATH_FAILED - A path has failed.
- PATH_REINSTATED - A path has been reinstated.
-
-Variable Name: DM_SEQNUM
-Uevent Action(s): KOBJ_CHANGE
-Type: unsigned integer
-Description: A sequence number for this specific device-mapper device.
-Value: Valid unsigned integer range.
-
-Variable Name: DM_PATH
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: Major and minor number of the path device pertaining to this
-event.
-Value: Path name in the form of "Major:Minor"
-
-Variable Name: DM_NR_VALID_PATHS
-Uevent Action(s): KOBJ_CHANGE
-Type: unsigned integer
-Description:
-Value: Valid unsigned integer range.
-
-Variable Name: DM_NAME
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: Name of the device-mapper device.
-Value: Name
-
-Variable Name: DM_UUID
-Uevent Action(s): KOBJ_CHANGE
-Type: string
-Description: UUID of the device-mapper device.
-Value: UUID. (Empty string if there isn't one.)
-
-An example of the uevents generated as captured by udevmonitor is shown
-below.
-
-1.) Path failure.
-UEVENT[1192521009.711215] change@/block/dm-3
-ACTION=change
-DEVPATH=/block/dm-3
-SUBSYSTEM=block
-DM_TARGET=multipath
-DM_ACTION=PATH_FAILED
-DM_SEQNUM=1
-DM_PATH=8:32
-DM_NR_VALID_PATHS=0
-DM_NAME=mpath2
-DM_UUID=mpath-35333333000002328
-MINOR=3
-MAJOR=253
-SEQNUM=1130
-
-2.) Path reinstate.
-UEVENT[1192521132.989927] change@/block/dm-3
-ACTION=change
-DEVPATH=/block/dm-3
-SUBSYSTEM=block
-DM_TARGET=multipath
-DM_ACTION=PATH_REINSTATED
-DM_SEQNUM=2
-DM_PATH=8:32
-DM_NR_VALID_PATHS=1
-DM_NAME=mpath2
-DM_UUID=mpath-35333333000002328
-MINOR=3
-MAJOR=253
-SEQNUM=1131
diff --git a/Documentation/device-mapper/dm-zoned.txt b/Documentation/device-mapper/dm-zoned.txt
deleted file mode 100644
index 736fcc78d193..000000000000
--- a/Documentation/device-mapper/dm-zoned.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-dm-zoned
-========
-
-The dm-zoned device mapper target exposes a zoned block device (ZBC and
-ZAC compliant devices) as a regular block device without any write
-pattern constraints. In effect, it implements a drive-managed zoned
-block device which hides from the user (a file system or an application
-doing raw block device accesses) the sequential write constraints of
-host-managed zoned block devices and can mitigate the potential
-device-side performance degradation due to excessive random writes on
-host-aware zoned block devices.
-
-For a more detailed description of the zoned block device models and
-their constraints see (for SCSI devices):
-
-http://www.t10.org/drafts.htm#ZBC_Family
-
-and (for ATA devices):
-
-http://www.t13.org/Documents/UploadedDocuments/docs2015/di537r05-Zoned_Device_ATA_Command_Set_ZAC.pdf
-
-The dm-zoned implementation is simple and minimizes system overhead (CPU
-and memory usage as well as storage capacity loss). For a 10TB
-host-managed disk with 256 MB zones, dm-zoned memory usage per disk
-instance is at most 4.5 MB and as little as 5 zones will be used
-internally for storing metadata and performaing reclaim operations.
-
-dm-zoned target devices are formatted and checked using the dmzadm
-utility available at:
-
-https://github.com/hgst/dm-zoned-tools
-
-Algorithm
-=========
-
-dm-zoned implements an on-disk buffering scheme to handle non-sequential
-write accesses to the sequential zones of a zoned block device.
-Conventional zones are used for caching as well as for storing internal
-metadata.
-
-The zones of the device are separated into 2 types:
-
-1) Metadata zones: these are conventional zones used to store metadata.
-Metadata zones are not reported as useable capacity to the user.
-
-2) Data zones: all remaining zones, the vast majority of which will be
-sequential zones used exclusively to store user data. The conventional
-zones of the device may be used also for buffering user random writes.
-Data in these zones may be directly mapped to the conventional zone, but
-later moved to a sequential zone so that the conventional zone can be
-reused for buffering incoming random writes.
-
-dm-zoned exposes a logical device with a sector size of 4096 bytes,
-irrespective of the physical sector size of the backend zoned block
-device being used. This allows reducing the amount of metadata needed to
-manage valid blocks (blocks written).
-
-The on-disk metadata format is as follows:
-
-1) The first block of the first conventional zone found contains the
-super block which describes the on disk amount and position of metadata
-blocks.
-
-2) Following the super block, a set of blocks is used to describe the
-mapping of the logical device blocks. The mapping is done per chunk of
-blocks, with the chunk size equal to the zoned block device size. The
-mapping table is indexed by chunk number and each mapping entry
-indicates the zone number of the device storing the chunk of data. Each
-mapping entry may also indicate if the zone number of a conventional
-zone used to buffer random modification to the data zone.
-
-3) A set of blocks used to store bitmaps indicating the validity of
-blocks in the data zones follows the mapping table. A valid block is
-defined as a block that was written and not discarded. For a buffered
-data chunk, a block is always valid only in the data zone mapping the
-chunk or in the buffer zone of the chunk.
-
-For a logical chunk mapped to a conventional zone, all write operations
-are processed by directly writing to the zone. If the mapping zone is a
-sequential zone, the write operation is processed directly only if the
-write offset within the logical chunk is equal to the write pointer
-offset within of the sequential data zone (i.e. the write operation is
-aligned on the zone write pointer). Otherwise, write operations are
-processed indirectly using a buffer zone. In that case, an unused
-conventional zone is allocated and assigned to the chunk being
-accessed. Writing a block to the buffer zone of a chunk will
-automatically invalidate the same block in the sequential zone mapping
-the chunk. If all blocks of the sequential zone become invalid, the zone
-is freed and the chunk buffer zone becomes the primary zone mapping the
-chunk, resulting in native random write performance similar to a regular
-block device.
-
-Read operations are processed according to the block validity
-information provided by the bitmaps. Valid blocks are read either from
-the sequential zone mapping a chunk, or if the chunk is buffered, from
-the buffer zone assigned. If the accessed chunk has no mapping, or the
-accessed blocks are invalid, the read buffer is zeroed and the read
-operation terminated.
-
-After some time, the limited number of convnetional zones available may
-be exhausted (all used to map chunks or buffer sequential zones) and
-unaligned writes to unbuffered chunks become impossible. To avoid this
-situation, a reclaim process regularly scans used conventional zones and
-tries to reclaim the least recently used zones by copying the valid
-blocks of the buffer zone to a free sequential zone. Once the copy
-completes, the chunk mapping is updated to point to the sequential zone
-and the buffer zone freed for reuse.
-
-Metadata Protection
-===================
-
-To protect metadata against corruption in case of sudden power loss or
-system crash, 2 sets of metadata zones are used. One set, the primary
-set, is used as the main metadata region, while the secondary set is
-used as a staging area. Modified metadata is first written to the
-secondary set and validated by updating the super block in the secondary
-set, a generation counter is used to indicate that this set contains the
-newest metadata. Once this operation completes, in place of metadata
-block updates can be done in the primary metadata set. This ensures that
-one of the set is always consistent (all modifications committed or none
-at all). Flush operations are used as a commit point. Upon reception of
-a flush request, metadata modification activity is temporarily blocked
-(for both incoming BIO processing and reclaim process) and all dirty
-metadata blocks are staged and updated. Normal operation is then
-resumed. Flushing metadata thus only temporarily delays write and
-discard requests. Read requests can be processed concurrently while
-metadata flush is being executed.
-
-Usage
-=====
-
-A zoned block device must first be formatted using the dmzadm tool. This
-will analyze the device zone configuration, determine where to place the
-metadata sets on the device and initialize the metadata sets.
-
-Ex:
-
-dmzadm --format /dev/sdxx
-
-For a formatted device, the target can be created normally with the
-dmsetup utility. The only parameter that dm-zoned requires is the
-underlying zoned block device name. Ex:
-
-echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | dmsetup create dmz-`basename ${dev}`
diff --git a/Documentation/device-mapper/era.txt b/Documentation/device-mapper/era.txt
deleted file mode 100644
index 3c6d01be3560..000000000000
--- a/Documentation/device-mapper/era.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-Introduction
-============
-
-dm-era is a target that behaves similar to the linear target. In
-addition it keeps track of which blocks were written within a user
-defined period of time called an 'era'. Each era target instance
-maintains the current era as a monotonically increasing 32-bit
-counter.
-
-Use cases include tracking changed blocks for backup software, and
-partially invalidating the contents of a cache to restore cache
-coherency after rolling back a vendor snapshot.
-
-Constructor
-===========
-
- era <metadata dev> <origin dev> <block size>
-
- metadata dev : fast device holding the persistent metadata
- origin dev : device holding data blocks that may change
- block size : block size of origin data device, granularity that is
- tracked by the target
-
-Messages
-========
-
-None of the dm messages take any arguments.
-
-checkpoint
-----------
-
-Possibly move to a new era. You shouldn't assume the era has
-incremented. After sending this message, you should check the
-current era via the status line.
-
-take_metadata_snap
-------------------
-
-Create a clone of the metadata, to allow a userland process to read it.
-
-drop_metadata_snap
-------------------
-
-Drop the metadata snapshot.
-
-Status
-======
-
-<metadata block size> <#used metadata blocks>/<#total metadata blocks>
-<current era> <held metadata root | '-'>
-
-metadata block size : Fixed block size for each metadata block in
- sectors
-#used metadata blocks : Number of metadata blocks used
-#total metadata blocks : Total number of metadata blocks
-current era : The current era
-held metadata root : The location, in blocks, of the metadata root
- that has been 'held' for userspace read
- access. '-' indicates there is no held root
-
-Detailed use case
-=================
-
-The scenario of invalidating a cache when rolling back a vendor
-snapshot was the primary use case when developing this target:
-
-Taking a vendor snapshot
-------------------------
-
-- Send a checkpoint message to the era target
-- Make a note of the current era in its status line
-- Take vendor snapshot (the era and snapshot should be forever
- associated now).
-
-Rolling back to an vendor snapshot
-----------------------------------
-
-- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
-- Rollback vendor storage
-- Take metadata snapshot
-- Ascertain which blocks have been written since the snapshot was taken
- by checking each block's era
-- Invalidate those blocks in the caching software
-- Cache returns to writeback/writethrough mode
-
-Memory usage
-============
-
-The target uses a bitset to record writes in the current era. It also
-has a spare bitset ready for switching over to a new era. Other than
-that it uses a few 4k blocks for updating metadata.
-
- (4 * nr_blocks) bytes + buffers
-
-Resilience
-==========
-
-Metadata is updated on disk before a write to a previously unwritten
-block is performed. As such dm-era should not be effected by a hard
-crash such as power failure.
-
-Userland tools
-==============
-
-Userland tools are found in the increasingly poorly named
-thin-provisioning-tools project:
-
- https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/device-mapper/kcopyd.txt b/Documentation/device-mapper/kcopyd.txt
deleted file mode 100644
index 820382c4cecf..000000000000
--- a/Documentation/device-mapper/kcopyd.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-kcopyd
-======
-
-Kcopyd provides the ability to copy a range of sectors from one block-device
-to one or more other block-devices, with an asynchronous completion
-notification. It is used by dm-snapshot and dm-mirror.
-
-Users of kcopyd must first create a client and indicate how many memory pages
-to set aside for their copy jobs. This is done with a call to
-kcopyd_client_create().
-
- int kcopyd_client_create(unsigned int num_pages,
- struct kcopyd_client **result);
-
-To start a copy job, the user must set up io_region structures to describe
-the source and destinations of the copy. Each io_region indicates a
-block-device along with the starting sector and size of the region. The source
-of the copy is given as one io_region structure, and the destinations of the
-copy are given as an array of io_region structures.
-
- struct io_region {
- struct block_device *bdev;
- sector_t sector;
- sector_t count;
- };
-
-To start the copy, the user calls kcopyd_copy(), passing in the client
-pointer, pointers to the source and destination io_regions, the name of a
-completion callback routine, and a pointer to some context data for the copy.
-
- int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
- unsigned int num_dests, struct io_region *dests,
- unsigned int flags, kcopyd_notify_fn fn, void *context);
-
- typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
- void *context);
-
-When the copy completes, kcopyd will call the user's completion routine,
-passing back the user's context pointer. It will also indicate if a read or
-write error occurred during the copy.
-
-When a user is done with all their copy jobs, they should call
-kcopyd_client_destroy() to delete the kcopyd client, which will release the
-associated memory pages.
-
- void kcopyd_client_destroy(struct kcopyd_client *kc);
-
diff --git a/Documentation/device-mapper/linear.txt b/Documentation/device-mapper/linear.txt
deleted file mode 100644
index 7cb98d89d3f8..000000000000
--- a/Documentation/device-mapper/linear.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-dm-linear
-=========
-
-Device-Mapper's "linear" target maps a linear range of the Device-Mapper
-device onto a linear range of another device. This is the basic building
-block of logical volume managers.
-
-Parameters: <dev path> <offset>
- <dev path>: Full pathname to the underlying block-device, or a
- "major:minor" device-number.
- <offset>: Starting sector within the device.
-
-
-Example scripts
-===============
-[[
-#!/bin/sh
-# Create an identity mapping for a device
-echo "0 `blockdev --getsz $1` linear $1 0" | dmsetup create identity
-]]
-
-
-[[
-#!/bin/sh
-# Join 2 devices together
-size1=`blockdev --getsz $1`
-size2=`blockdev --getsz $2`
-echo "0 $size1 linear $1 0
-$size1 $size2 linear $2 0" | dmsetup create joined
-]]
-
-
-[[
-#!/usr/bin/perl -w
-# Split a device into 4M chunks and then join them together in reverse order.
-
-my $name = "reverse";
-my $extent_size = 4 * 1024 * 2;
-my $dev = $ARGV[0];
-my $table = "";
-my $count = 0;
-
-if (!defined($dev)) {
- die("Please specify a device.\n");
-}
-
-my $dev_size = `blockdev --getsz $dev`;
-my $extents = int($dev_size / $extent_size) -
- (($dev_size % $extent_size) ? 1 : 0);
-
-while ($extents > 0) {
- my $this_start = $count * $extent_size;
- $extents--;
- $count++;
- my $this_offset = $extents * $extent_size;
-
- $table .= "$this_start $extent_size linear $dev $this_offset\n";
-}
-
-`echo \"$table\" | dmsetup create $name`;
-]]
diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt
deleted file mode 100644
index b638d124be6a..000000000000
--- a/Documentation/device-mapper/log-writes.txt
+++ /dev/null
@@ -1,140 +0,0 @@
-dm-log-writes
-=============
-
-This target takes 2 devices, one to pass all IO to normally, and one to log all
-of the write operations to. This is intended for file system developers wishing
-to verify the integrity of metadata or data as the file system is written to.
-There is a log_write_entry written for every WRITE request and the target is
-able to take arbitrary data from userspace to insert into the log. The data
-that is in the WRITE requests is copied into the log to make the replay happen
-exactly as it happened originally.
-
-Log Ordering
-============
-
-We log things in order of completion once we are sure the write is no longer in
-cache. This means that normal WRITE requests are not actually logged until the
-next REQ_PREFLUSH request. This is to make it easier for userspace to replay
-the log in a way that correlates to what is on disk and not what is in cache,
-to make it easier to detect improper waiting/flushing.
-
-This works by attaching all WRITE requests to a list once the write completes.
-Once we see a REQ_PREFLUSH request we splice this list onto the request and once
-the FLUSH request completes we log all of the WRITEs and then the FLUSH. Only
-completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
-simulate the worst case scenario with regard to power failures. Consider the
-following example (W means write, C means complete):
-
-W1,W2,W3,C3,C2,Wflush,C1,Cflush
-
-The log would show the following
-
-W3,W2,flush,W1....
-
-Again this is to simulate what is actually on disk, this allows us to detect
-cases where a power failure at a particular point in time would create an
-inconsistent file system.
-
-Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
-they complete as those requests will obviously bypass the device cache.
-
-Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would
-have all the DISCARD requests, and then the WRITE requests and then the FLUSH
-request. Consider the following example:
-
-WRITE block 1, DISCARD block 1, FLUSH
-
-If we logged DISCARD when it completed, the replay would look like this
-
-DISCARD 1, WRITE 1, FLUSH
-
-which isn't quite what happened and wouldn't be caught during the log replay.
-
-Target interface
-================
-
-i) Constructor
-
- log-writes <dev_path> <log_dev_path>
-
- dev_path : Device that all of the IO will go to normally.
- log_dev_path : Device where the log entries are written to.
-
-ii) Status
-
- <#logged entries> <highest allocated sector>
-
- #logged entries : Number of logged entries
- highest allocated sector : Highest allocated sector
-
-iii) Messages
-
- mark <description>
-
- You can use a dmsetup message to set an arbitrary mark in a log.
- For example say you want to fsck a file system after every
- write, but first you need to replay up to the mkfs to make sure
- we're fsck'ing something reasonable, you would do something like
- this:
-
- mkfs.btrfs -f /dev/mapper/log
- dmsetup message log 0 mark mkfs
- <run test>
-
- This would allow you to replay the log up to the mkfs mark and
- then replay from that point on doing the fsck check in the
- interval that you want.
-
- Every log has a mark at the end labeled "dm-log-writes-end".
-
-Userspace component
-===================
-
-There is a userspace tool that will replay the log for you in various ways.
-It can be found here: https://github.com/josefbacik/log-writes
-
-Example usage
-=============
-
-Say you want to test fsync on your file system. You would do something like
-this:
-
-TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-dmsetup create log --table "$TABLE"
-mkfs.btrfs -f /dev/mapper/log
-dmsetup message log 0 mark mkfs
-
-mount /dev/mapper/log /mnt/btrfs-test
-<some test that does fsync at the end>
-dmsetup message log 0 mark fsync
-md5sum /mnt/btrfs-test/foo
-umount /mnt/btrfs-test
-
-dmsetup remove log
-replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
-mount /dev/sdb /mnt/btrfs-test
-md5sum /mnt/btrfs-test/foo
-<verify md5sum's are correct>
-
-Another option is to do a complicated file system operation and verify the file
-system is consistent during the entire operation. You could do this with:
-
-TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
-dmsetup create log --table "$TABLE"
-mkfs.btrfs -f /dev/mapper/log
-dmsetup message log 0 mark mkfs
-
-mount /dev/mapper/log /mnt/btrfs-test
-<fsstress to dirty the fs>
-btrfs filesystem balance /mnt/btrfs-test
-umount /mnt/btrfs-test
-dmsetup remove log
-
-replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
-btrfsck /dev/sdb
-replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
- --fsck "btrfsck /dev/sdb" --check fua
-
-And that will replay the log until it sees a FUA request, run the fsck command
-and if the fsck passes it will replay to the next FUA, until it is completed or
-the fsck command exists abnormally.
diff --git a/Documentation/device-mapper/persistent-data.txt b/Documentation/device-mapper/persistent-data.txt
deleted file mode 100644
index a333bcb3a6c2..000000000000
--- a/Documentation/device-mapper/persistent-data.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-Introduction
-============
-
-The more-sophisticated device-mapper targets require complex metadata
-that is managed in kernel. In late 2010 we were seeing that various
-different targets were rolling their own data structures, for example:
-
-- Mikulas Patocka's multisnap implementation
-- Heinz Mauelshagen's thin provisioning target
-- Another btree-based caching target posted to dm-devel
-- Another multi-snapshot target based on a design of Daniel Phillips
-
-Maintaining these data structures takes a lot of work, so if possible
-we'd like to reduce the number.
-
-The persistent-data library is an attempt to provide a re-usable
-framework for people who want to store metadata in device-mapper
-targets. It's currently used by the thin-provisioning target and an
-upcoming hierarchical storage target.
-
-Overview
-========
-
-The main documentation is in the header files which can all be found
-under drivers/md/persistent-data.
-
-The block manager
------------------
-
-dm-block-manager.[hc]
-
-This provides access to the data on disk in fixed sized-blocks. There
-is a read/write locking interface to prevent concurrent accesses, and
-keep data that is being used in the cache.
-
-Clients of persistent-data are unlikely to use this directly.
-
-The transaction manager
------------------------
-
-dm-transaction-manager.[hc]
-
-This restricts access to blocks and enforces copy-on-write semantics.
-The only way you can get hold of a writable block through the
-transaction manager is by shadowing an existing block (ie. doing
-copy-on-write) or allocating a fresh one. Shadowing is elided within
-the same transaction so performance is reasonable. The commit method
-ensures that all data is flushed before it writes the superblock.
-On power failure your metadata will be as it was when last committed.
-
-The Space Maps
---------------
-
-dm-space-map.h
-dm-space-map-metadata.[hc]
-dm-space-map-disk.[hc]
-
-On-disk data structures that keep track of reference counts of blocks.
-Also acts as the allocator of new blocks. Currently two
-implementations: a simpler one for managing blocks on a different
-device (eg. thinly-provisioned data blocks); and one for managing
-the metadata space. The latter is complicated by the need to store
-its own data within the space it's managing.
-
-The data structures
--------------------
-
-dm-btree.[hc]
-dm-btree-remove.c
-dm-btree-spine.c
-dm-btree-internal.h
-
-Currently there is only one data structure, a hierarchical btree.
-There are plans to add more. For example, something with an
-array-like interface would see a lot of use.
-
-The btree is 'hierarchical' in that you can define it to be composed
-of nested btrees, and take multiple keys. For example, the
-thin-provisioning target uses a btree with two levels of nesting.
-The first maps a device id to a mapping tree, and that in turn maps a
-virtual block to a physical block.
-
-Values stored in the btrees can have arbitrary size. Keys are always
-64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
deleted file mode 100644
index b8bbb516f989..000000000000
--- a/Documentation/device-mapper/snapshot.txt
+++ /dev/null
@@ -1,176 +0,0 @@
-Device-mapper snapshot support
-==============================
-
-Device-mapper allows you, without massive data copying:
-
-*) To create snapshots of any block device i.e. mountable, saved states of
-the block device which are also writable without interfering with the
-original content;
-*) To create device "forks", i.e. multiple different versions of the
-same data stream.
-*) To merge a snapshot of a block device back into the snapshot's origin
-device.
-
-In the first two cases, dm copies only the chunks of data that get
-changed and uses a separate copy-on-write (COW) block device for
-storage.
-
-For snapshot merge the contents of the COW storage are merged back into
-the origin device.
-
-
-There are three dm targets available:
-snapshot, snapshot-origin, and snapshot-merge.
-
-*) snapshot-origin <origin>
-
-which will normally have one or more snapshots based on it.
-Reads will be mapped directly to the backing device. For each write, the
-original data will be saved in the <COW device> of each snapshot to keep
-its visible content unchanged, at least until the <COW device> fills up.
-
-
-*) snapshot <origin> <COW device> <persistent?> <chunksize>
-
-A snapshot of the <origin> block device is created. Changed chunks of
-<chunksize> sectors will be stored on the <COW device>. Writes will
-only go to the <COW device>. Reads will come from the <COW device> or
-from <origin> for unchanged data. <COW device> will often be
-smaller than the origin and if it fills up the snapshot will become
-useless and be disabled, returning errors. So it is important to monitor
-the amount of free space and expand the <COW device> before it fills up.
-
-<persistent?> is P (Persistent) or N (Not persistent - will not survive
-after reboot). O (Overflow) can be added as a persistent store option
-to allow userspace to advertise its support for seeing "Overflow" in the
-snapshot status. So supported store types are "P", "PO" and "N".
-
-The difference between persistent and transient is with transient
-snapshots less metadata must be saved on disk - they can be kept in
-memory by the kernel.
-
-When loading or unloading the snapshot target, the corresponding
-snapshot-origin or snapshot-merge target must be suspended. A failure to
-suspend the origin target could result in data corruption.
-
-
-* snapshot-merge <origin> <COW device> <persistent> <chunksize>
-
-takes the same table arguments as the snapshot target except it only
-works with persistent snapshots. This target assumes the role of the
-"snapshot-origin" target and must not be loaded if the "snapshot-origin"
-is still present for <origin>.
-
-Creates a merging snapshot that takes control of the changed chunks
-stored in the <COW device> of an existing snapshot, through a handover
-procedure, and merges these chunks back into the <origin>. Once merging
-has started (in the background) the <origin> may be opened and the merge
-will continue while I/O is flowing to it. Changes to the <origin> are
-deferred until the merging snapshot's corresponding chunk(s) have been
-merged. Once merging has started the snapshot device, associated with
-the "snapshot" target, will return -EIO when accessed.
-
-
-How snapshot is used by LVM2
-============================
-When you create the first LVM2 snapshot of a volume, four dm devices are used:
-
-1) a device containing the original mapping table of the source volume;
-2) a device used as the <COW device>;
-3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
- volume;
-4) the "original" volume (which uses the device number used by the original
- source volume), whose table is replaced by a "snapshot-origin" mapping
- from device #1.
-
-A fixed naming scheme is used, so with the following commands:
-
-lvcreate -L 1G -n base volumeGroup
-lvcreate -L 100M --snapshot -n snap volumeGroup/base
-
-we'll have this situation (with volumes in above order):
-
-# dmsetup table|grep volumeGroup
-
-volumeGroup-base-real: 0 2097152 linear 8:19 384
-volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
-volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
-volumeGroup-base: 0 2097152 snapshot-origin 254:11
-
-# ls -lL /dev/mapper/volumeGroup-*
-brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
-brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
-brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
-
-
-How snapshot-merge is used by LVM2
-==================================
-A merging snapshot assumes the role of the "snapshot-origin" while
-merging. As such the "snapshot-origin" is replaced with
-"snapshot-merge". The "-real" device is not changed and the "-cow"
-device is renamed to <origin name>-cow to aid LVM2's cleanup of the
-merging snapshot after it completes. The "snapshot" that hands over its
-COW device to the "snapshot-merge" is deactivated (unless using lvchange
---refresh); but if it is left active it will simply return I/O errors.
-
-A snapshot will merge into its origin with the following command:
-
-lvconvert --merge volumeGroup/snap
-
-we'll now have this situation:
-
-# dmsetup table|grep volumeGroup
-
-volumeGroup-base-real: 0 2097152 linear 8:19 384
-volumeGroup-base-cow: 0 204800 linear 8:19 2097536
-volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
-
-# ls -lL /dev/mapper/volumeGroup-*
-brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
-brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
-brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
-
-
-How to determine when a merging is complete
-===========================================
-The snapshot-merge and snapshot status lines end with:
- <sectors_allocated>/<total_sectors> <metadata_sectors>
-
-Both <sectors_allocated> and <total_sectors> include both data and metadata.
-During merging, the number of sectors allocated gets smaller and
-smaller. Merging has finished when the number of sectors holding data
-is zero, in other words <sectors_allocated> == <metadata_sectors>.
-
-Here is a practical example (using a hybrid of lvm and dmsetup commands):
-
-# lvs
- LV VG Attr LSize Origin Snap% Move Log Copy% Convert
- base volumeGroup owi-a- 4.00g
- snap volumeGroup swi-a- 1.00g base 18.97
-
-# dmsetup status volumeGroup-snap
-0 8388608 snapshot 397896/2097152 1560
- ^^^^ metadata sectors
-
-# lvconvert --merge -b volumeGroup/snap
- Merging of volume snap started.
-
-# lvs volumeGroup/snap
- LV VG Attr LSize Origin Snap% Move Log Copy% Convert
- base volumeGroup Owi-a- 4.00g 17.23
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 281688/2097152 1104
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 180480/2097152 712
-
-# dmsetup status volumeGroup-base
-0 8388608 snapshot-merge 16/2097152 16
-
-Merging has finished.
-
-# lvs
- LV VG Attr LSize Origin Snap% Move Log Copy% Convert
- base volumeGroup owi-a- 4.00g
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
deleted file mode 100644
index 170ac02a1f50..000000000000
--- a/Documentation/device-mapper/statistics.txt
+++ /dev/null
@@ -1,223 +0,0 @@
-DM statistics
-=============
-
-Device Mapper supports the collection of I/O statistics on user-defined
-regions of a DM device. If no regions are defined no statistics are
-collected so there isn't any performance impact. Only bio-based DM
-devices are currently supported.
-
-Each user-defined region specifies a starting sector, length and step.
-Individual statistics will be collected for each step-sized area within
-the range specified.
-
-The I/O statistics counters for each step-sized area of a region are
-in the same format as /sys/block/*/stat or /proc/diskstats (see:
-Documentation/iostats.txt). But two extra counters (12 and 13) are
-provided: total time spent reading and writing. When the histogram
-argument is used, the 14th parameter is reported that represents the
-histogram of latencies. All these counters may be accessed by sending
-the @stats_print message to the appropriate DM device via dmsetup.
-
-The reported times are in milliseconds and the granularity depends on
-the kernel ticks. When the option precise_timestamps is used, the
-reported times are in nanoseconds.
-
-Each region has a corresponding unique identifier, which we call a
-region_id, that is assigned when the region is created. The region_id
-must be supplied when querying statistics about the region, deleting the
-region, etc. Unique region_ids enable multiple userspace programs to
-request and process statistics for the same DM device without stepping
-on each other's data.
-
-The creation of DM statistics will allocate memory via kmalloc or
-fallback to using vmalloc space. At most, 1/4 of the overall system
-memory may be allocated by DM statistics. The admin can see how much
-memory is used by reading
-/sys/module/dm_mod/parameters/stats_current_allocated_bytes
-
-Messages
-========
-
- @stats_create <range> <step>
- [<number_of_optional_arguments> <optional_arguments>...]
- [<program_id> [<aux_data>]]
-
- Create a new region and return the region_id.
-
- <range>
- "-" - whole device
- "<start_sector>+<length>" - a range of <length> 512-byte sectors
- starting with <start_sector>.
-
- <step>
- "<area_size>" - the range is subdivided into areas each containing
- <area_size> sectors.
- "/<number_of_areas>" - the range is subdivided into the specified
- number of areas.
-
- <number_of_optional_arguments>
- The number of optional arguments
-
- <optional_arguments>
- The following optional arguments are supported
- precise_timestamps - use precise timer with nanosecond resolution
- instead of the "jiffies" variable. When this argument is
- used, the resulting times are in nanoseconds instead of
- milliseconds. Precise timestamps are a little bit slower
- to obtain than jiffies-based timestamps.
- histogram:n1,n2,n3,n4,... - collect histogram of latencies. The
- numbers n1, n2, etc are times that represent the boundaries
- of the histogram. If precise_timestamps is not used, the
- times are in milliseconds, otherwise they are in
- nanoseconds. For each range, the kernel will report the
- number of requests that completed within this range. For
- example, if we use "histogram:10,20,30", the kernel will
- report four numbers a:b:c:d. a is the number of requests
- that took 0-10 ms to complete, b is the number of requests
- that took 10-20 ms to complete, c is the number of requests
- that took 20-30 ms to complete and d is the number of
- requests that took more than 30 ms to complete.
-
- <program_id>
- An optional parameter. A name that uniquely identifies
- the userspace owner of the range. This groups ranges together
- so that userspace programs can identify the ranges they
- created and ignore those created by others.
- The kernel returns this string back in the output of
- @stats_list message, but it doesn't use it for anything else.
- If we omit the number of optional arguments, program id must not
- be a number, otherwise it would be interpreted as the number of
- optional arguments.
-
- <aux_data>
- An optional parameter. A word that provides auxiliary data
- that is useful to the client program that created the range.
- The kernel returns this string back in the output of
- @stats_list message, but it doesn't use this value for anything.
-
- @stats_delete <region_id>
-
- Delete the region with the specified id.
-
- <region_id>
- region_id returned from @stats_create
-
- @stats_clear <region_id>
-
- Clear all the counters except the in-flight i/o counters.
-
- <region_id>
- region_id returned from @stats_create
-
- @stats_list [<program_id>]
-
- List all regions registered with @stats_create.
-
- <program_id>
- An optional parameter.
- If this parameter is specified, only matching regions
- are returned.
- If it is not specified, all regions are returned.
-
- Output format:
- <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
- precise_timestamps histogram:n1,n2,n3,...
-
- The strings "precise_timestamps" and "histogram" are printed only
- if they were specified when creating the region.
-
- @stats_print <region_id> [<starting_line> <number_of_lines>]
-
- Print counters for each step-sized area of a region.
-
- <region_id>
- region_id returned from @stats_create
-
- <starting_line>
- The index of the starting line in the output.
- If omitted, all lines are returned.
-
- <number_of_lines>
- The number of lines to include in the output.
- If omitted, all lines are returned.
-
- Output format for each step-sized area of a region:
-
- <start_sector>+<length> counters
-
- The first 11 counters have the same meaning as
- /sys/block/*/stat or /proc/diskstats.
-
- Please refer to Documentation/iostats.txt for details.
-
- 1. the number of reads completed
- 2. the number of reads merged
- 3. the number of sectors read
- 4. the number of milliseconds spent reading
- 5. the number of writes completed
- 6. the number of writes merged
- 7. the number of sectors written
- 8. the number of milliseconds spent writing
- 9. the number of I/Os currently in progress
- 10. the number of milliseconds spent doing I/Os
- 11. the weighted number of milliseconds spent doing I/Os
-
- Additional counters:
- 12. the total time spent reading in milliseconds
- 13. the total time spent writing in milliseconds
-
- @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
-
- Atomically print and then clear all the counters except the
- in-flight i/o counters. Useful when the client consuming the
- statistics does not want to lose any statistics (those updated
- between printing and clearing).
-
- <region_id>
- region_id returned from @stats_create
-
- <starting_line>
- The index of the starting line in the output.
- If omitted, all lines are printed and then cleared.
-
- <number_of_lines>
- The number of lines to process.
- If omitted, all lines are printed and then cleared.
-
- @stats_set_aux <region_id> <aux_data>
-
- Store auxiliary data aux_data for the specified region.
-
- <region_id>
- region_id returned from @stats_create
-
- <aux_data>
- The string that identifies data which is useful to the client
- program that created the range. The kernel returns this
- string back in the output of @stats_list message, but it
- doesn't use this value for anything.
-
-Examples
-========
-
-Subdivide the DM device 'vol' into 100 pieces and start collecting
-statistics on them:
-
- dmsetup message vol 0 @stats_create - /100
-
-Set the auxiliary data string to "foo bar baz" (the escape for each
-space must also be escaped, otherwise the shell will consume them):
-
- dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
-
-List the statistics:
-
- dmsetup message vol 0 @stats_list
-
-Print the statistics:
-
- dmsetup message vol 0 @stats_print 0
-
-Delete the statistics:
-
- dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt
deleted file mode 100644
index 07ec492cceee..000000000000
--- a/Documentation/device-mapper/striped.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-dm-stripe
-=========
-
-Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
-device across one or more underlying devices. Data is written in "chunks",
-with consecutive chunks rotating among the underlying devices. This can
-potentially provide improved I/O throughput by utilizing several physical
-devices in parallel.
-
-Parameters: <num devs> <chunk size> [<dev path> <offset>]+
- <num devs>: Number of underlying devices.
- <chunk size>: Size of each chunk of data. Must be at least as
- large as the system's PAGE_SIZE.
- <dev path>: Full pathname to the underlying block-device, or a
- "major:minor" device-number.
- <offset>: Starting sector within the device.
-
-One or more underlying devices can be specified. The striped device size must
-be a multiple of the chunk size multiplied by the number of underlying devices.
-
-
-Example scripts
-===============
-
-[[
-#!/usr/bin/perl -w
-# Create a striped device across any number of underlying devices. The device
-# will be called "stripe_dev" and have a chunk-size of 128k.
-
-my $chunk_size = 128 * 2;
-my $dev_name = "stripe_dev";
-my $num_devs = @ARGV;
-my @devs = @ARGV;
-my ($min_dev_size, $stripe_dev_size, $i);
-
-if (!$num_devs) {
- die("Specify at least one device\n");
-}
-
-$min_dev_size = `blockdev --getsz $devs[0]`;
-for ($i = 1; $i < $num_devs; $i++) {
- my $this_size = `blockdev --getsz $devs[$i]`;
- $min_dev_size = ($min_dev_size < $this_size) ?
- $min_dev_size : $this_size;
-}
-
-$stripe_dev_size = $min_dev_size * $num_devs;
-$stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
-
-$table = "0 $stripe_dev_size striped $num_devs $chunk_size";
-for ($i = 0; $i < $num_devs; $i++) {
- $table .= " $devs[$i] 0";
-}
-
-`echo $table | dmsetup create $dev_name`;
-]]
-
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt
deleted file mode 100644
index 5bd4831db4a8..000000000000
--- a/Documentation/device-mapper/switch.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-dm-switch
-=========
-
-The device-mapper switch target creates a device that supports an
-arbitrary mapping of fixed-size regions of I/O across a fixed set of
-paths. The path used for any specific region can be switched
-dynamically by sending the target a message.
-
-It maps I/O to underlying block devices efficiently when there is a large
-number of fixed-sized address regions but there is no simple pattern
-that would allow for a compact representation of the mapping such as
-dm-stripe.
-
-Background
-----------
-
-Dell EqualLogic and some other iSCSI storage arrays use a distributed
-frameless architecture. In this architecture, the storage group
-consists of a number of distinct storage arrays ("members") each having
-independent controllers, disk storage and network adapters. When a LUN
-is created it is spread across multiple members. The details of the
-spreading are hidden from initiators connected to this storage system.
-The storage group exposes a single target discovery portal, no matter
-how many members are being used. When iSCSI sessions are created, each
-session is connected to an eth port on a single member. Data to a LUN
-can be sent on any iSCSI session, and if the blocks being accessed are
-stored on another member the I/O will be forwarded as required. This
-forwarding is invisible to the initiator. The storage layout is also
-dynamic, and the blocks stored on disk may be moved from member to
-member as needed to balance the load.
-
-This architecture simplifies the management and configuration of both
-the storage group and initiators. In a multipathing configuration, it
-is possible to set up multiple iSCSI sessions to use multiple network
-interfaces on both the host and target to take advantage of the
-increased network bandwidth. An initiator could use a simple round
-robin algorithm to send I/O across all paths and let the storage array
-members forward it as necessary, but there is a performance advantage to
-sending data directly to the correct member.
-
-A device-mapper table already lets you map different regions of a
-device onto different targets. However in this architecture the LUN is
-spread with an address region size on the order of 10s of MBs, which
-means the resulting table could have more than a million entries and
-consume far too much memory.
-
-Using this device-mapper switch target we can now build a two-layer
-device hierarchy:
-
- Upper Tier - Determine which array member the I/O should be sent to.
- Lower Tier - Load balance amongst paths to a particular member.
-
-The lower tier consists of a single dm multipath device for each member.
-Each of these multipath devices contains the set of paths directly to
-the array member in one priority group, and leverages existing path
-selectors to load balance amongst these paths. We also build a
-non-preferred priority group containing paths to other array members for
-failover reasons.
-
-The upper tier consists of a single dm-switch device. This device uses
-a bitmap to look up the location of the I/O and choose the appropriate
-lower tier device to route the I/O. By using a bitmap we are able to
-use 4 bits for each address range in a 16 member group (which is very
-large for us). This is a much denser representation than the dm table
-b-tree can achieve.
-
-Construction Parameters
-=======================
-
- <num_paths> <region_size> <num_optional_args> [<optional_args>...]
- [<dev_path> <offset>]+
-
-<num_paths>
- The number of paths across which to distribute the I/O.
-
-<region_size>
- The number of 512-byte sectors in a region. Each region can be redirected
- to any of the available paths.
-
-<num_optional_args>
- The number of optional arguments. Currently, no optional arguments
- are supported and so this must be zero.
-
-<dev_path>
- The block device that represents a specific path to the device.
-
-<offset>
- The offset of the start of data on the specific <dev_path> (in units
- of 512-byte sectors). This number is added to the sector number when
- forwarding the request to the specific path. Typically it is zero.
-
-Messages
-========
-
-set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
-
-Modify the region table by specifying which regions are redirected to
-which paths.
-
-<index>
- The region number (region size was specified in constructor parameters).
- If index is omitted, the next region (previous index + 1) is used.
- Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-<path_nr>
- The path number in the range 0 ... (<num_paths> - 1).
- Expressed in hexadecimal (WITHOUT any prefix like 0x).
-
-R<n>,<m>
- This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
- are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
- slots.
-
-Status
-======
-
-No status line is reported.
-
-Example
-=======
-
-Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
-the same size.
-
-Create a switch device with 64kB region size:
- dmsetup create switch --table "0 `blockdev --getsz /dev/vg1/switch0`
- switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
-
-Set mappings for the first 7 entries to point to devices switch0, switch1,
-switch2, switch0, switch1, switch2, switch1:
- dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
-
-Set repetitive mapping. This command:
- dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
-is equivalent to:
- dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
- :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
-
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
deleted file mode 100644
index 883e7ca5f745..000000000000
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ /dev/null
@@ -1,411 +0,0 @@
-Introduction
-============
-
-This document describes a collection of device-mapper targets that
-between them implement thin-provisioning and snapshots.
-
-The main highlight of this implementation, compared to the previous
-implementation of snapshots, is that it allows many virtual devices to
-be stored on the same data volume. This simplifies administration and
-allows the sharing of data between volumes, thus reducing disk usage.
-
-Another significant feature is support for an arbitrary depth of
-recursive snapshots (snapshots of snapshots of snapshots ...). The
-previous implementation of snapshots did this by chaining together
-lookup tables, and so performance was O(depth). This new
-implementation uses a single data structure to avoid this degradation
-with depth. Fragmentation may still be an issue, however, in some
-scenarios.
-
-Metadata is stored on a separate device from data, giving the
-administrator some freedom, for example to:
-
-- Improve metadata resilience by storing metadata on a mirrored volume
- but data on a non-mirrored one.
-
-- Improve performance by storing the metadata on SSD.
-
-Status
-======
-
-These targets are considered safe for production use. But different use
-cases will have different performance characteristics, for example due
-to fragmentation of the data volume.
-
-If you find this software is not performing as expected please mail
-dm-devel@redhat.com with details and we'll try our best to improve
-things for you.
-
-Userspace tools for checking and repairing the metadata have been fully
-developed and are available as 'thin_check' and 'thin_repair'. The name
-of the package that provides these utilities varies by distribution (on
-a Red Hat distribution it is named 'device-mapper-persistent-data').
-
-Cookbook
-========
-
-This section describes some quick recipes for using thin provisioning.
-They use the dmsetup program to control the device-mapper driver
-directly. End users will be advised to use a higher-level volume
-manager such as LVM2 once support has been added.
-
-Pool device
------------
-
-The pool device ties together the metadata volume and the data volume.
-It maps I/O linearly to the data volume and updates the metadata via
-two mechanisms:
-
-- Function calls from the thin targets
-
-- Device-mapper 'messages' from userspace which control the creation of new
- virtual devices amongst other things.
-
-Setting up a fresh pool device
-------------------------------
-
-Setting up a pool device requires a valid metadata device, and a
-data device. If you do not have an existing metadata device you can
-make one by zeroing the first 4k to indicate empty metadata.
-
- dd if=/dev/zero of=$metadata_dev bs=4096 count=1
-
-The amount of metadata you need will vary according to how many blocks
-are shared between thin devices (i.e. through snapshots). If you have
-less sharing than average you'll need a larger-than-average metadata device.
-
-As a guide, we suggest you calculate the number of bytes to use in the
-metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller. If you're creating large numbers of
-snapshots which are recording large amounts of change, you may find you
-need to increase this.
-
-The largest size supported is 16GB: If the device is larger,
-a warning will be issued and the excess space will not be used.
-
-Reloading a pool table
-----------------------
-
-You may reload a pool's table, indeed this is how the pool is resized
-if it runs out of space. (N.B. While specifying a different metadata
-device when reloading is not forbidden at the moment, things will go
-wrong if it does not route I/O to exactly the same on-disk location as
-previously.)
-
-Using an existing pool device
------------------------------
-
- dmsetup create pool \
- --table "0 20971520 thin-pool $metadata_dev $data_dev \
- $data_block_size $low_water_mark"
-
-$data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.
-$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
-multiple of 128 (64KB). $data_block_size cannot be changed after the
-thin-pool is created. People primarily interested in thin provisioning
-may want to use a value such as 1024 (512KB). People doing lots of
-snapshotting may want a smaller value such as 128 (64KB). If you are
-not zeroing newly-allocated data, a larger $data_block_size in the
-region of 256000 (128MB) is suggested.
-
-$low_water_mark is expressed in blocks of size $data_block_size. If
-free space on the data device drops below this level then a dm event
-will be triggered which a userspace daemon should catch allowing it to
-extend the pool device. Only one such event will be sent.
-
-No special event is triggered if a just resumed device's free space is below
-the low water mark. However, resuming a device always triggers an
-event; a userspace daemon should verify that free space exceeds the low
-water mark when handling this event.
-
-A low water mark for the metadata device is maintained in the kernel and
-will trigger a dm event if free space on the metadata device drops below
-it.
-
-Updating on-disk metadata
--------------------------
-
-On-disk metadata is committed every time a FLUSH or FUA bio is written.
-If no such requests are made then commits will occur every second. This
-means the thin-provisioning target behaves like a physical disk that has
-a volatile write cache. If power is lost you may lose some recent
-writes. The metadata should always be consistent in spite of any crash.
-
-If data space is exhausted the pool will either error or queue IO
-according to the configuration (see: error_if_no_space). If metadata
-space is exhausted or a metadata operation fails: the pool will error IO
-until the pool is taken offline and repair is performed to 1) fix any
-potential inconsistencies and 2) clear the flag that imposes repair.
-Once the pool's metadata device is repaired it may be resized, which
-will allow the pool to return to normal operation. Note that if a pool
-is flagged as needing repair, the pool's data and metadata devices
-cannot be resized until repair is performed. It should also be noted
-that when the pool's metadata space is exhausted the current metadata
-transaction is aborted. Given that the pool will cache IO whose
-completion may have already been acknowledged to upper IO layers
-(e.g. filesystem) it is strongly suggested that consistency checks
-(e.g. fsck) be performed on those layers when repair of the pool is
-required.
-
-Thin provisioning
------------------
-
-i) Creating a new thinly-provisioned volume.
-
- To create a new thinly- provisioned volume you must send a message to an
- active pool device, /dev/mapper/pool in this example.
-
- dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
- Here '0' is an identifier for the volume, a 24-bit number. It's up
- to the caller to allocate and manage these identifiers. If the
- identifier is already in use, the message will fail with -EEXIST.
-
-ii) Using a thinly-provisioned volume.
-
- Thinly-provisioned volumes are activated using the 'thin' target:
-
- dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
-
- The last parameter is the identifier for the thinp device.
-
-Internal snapshots
-------------------
-
-i) Creating an internal snapshot.
-
- Snapshots are created with another message to the pool.
-
- N.B. If the origin device that you wish to snapshot is active, you
- must suspend it before creating the snapshot to avoid corruption.
- This is NOT enforced at the moment, so please be careful!
-
- dmsetup suspend /dev/mapper/thin
- dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
- dmsetup resume /dev/mapper/thin
-
- Here '1' is the identifier for the volume, a 24-bit number. '0' is the
- identifier for the origin device.
-
-ii) Using an internal snapshot.
-
- Once created, the user doesn't have to worry about any connection
- between the origin and the snapshot. Indeed the snapshot is no
- different from any other thinly-provisioned device and can be
- snapshotted itself via the same method. It's perfectly legal to
- have only one of them active, and there's no ordering requirement on
- activating or removing them both. (This differs from conventional
- device-mapper snapshots.)
-
- Activate it exactly the same way as any other thinly-provisioned volume:
-
- dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
-
-External snapshots
-------------------
-
-You can use an external _read only_ device as an origin for a
-thinly-provisioned volume. Any read to an unprovisioned area of the
-thin device will be passed through to the origin. Writes trigger
-the allocation of new blocks as usual.
-
-One use case for this is VM hosts that want to run guests on
-thinly-provisioned volumes but have the base image on another device
-(possibly shared between many VMs).
-
-You must not write to the origin device if you use this technique!
-Of course, you may write to the thin device and take internal snapshots
-of the thin volume.
-
-i) Creating a snapshot of an external device
-
- This is the same as creating a thin device.
- You don't mention the origin at this stage.
-
- dmsetup message /dev/mapper/pool 0 "create_thin 0"
-
-ii) Using a snapshot of an external device.
-
- Append an extra parameter to the thin target specifying the origin:
-
- dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
-
- N.B. All descendants (internal snapshots) of this snapshot require the
- same extra origin parameter.
-
-Deactivation
-------------
-
-All devices using a pool must be deactivated before the pool itself
-can be.
-
- dmsetup remove thin
- dmsetup remove snap
- dmsetup remove pool
-
-Reference
-=========
-
-'thin-pool' target
-------------------
-
-i) Constructor
-
- thin-pool <metadata dev> <data dev> <data block size (sectors)> \
- <low water mark (blocks)> [<number of feature args> [<arg>]*]
-
- Optional feature arguments:
-
- skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
-
- ignore_discard: Disable discard support.
-
- no_discard_passdown: Don't pass discards down to the underlying
- data device, but just remove the mapping.
-
- read_only: Don't allow any changes to be made to the pool
- metadata. This mode is only available after the
- thin-pool has been created and first used in full
- read/write mode. It cannot be specified on initial
- thin-pool creation.
-
- error_if_no_space: Error IOs, instead of queueing, if no space.
-
- Data block size must be between 64KB (128 sectors) and 1GB
- (2097152 sectors) inclusive.
-
-
-ii) Status
-
- <transaction id> <used metadata blocks>/<total metadata blocks>
- <used data blocks>/<total data blocks> <held metadata root>
- ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
- needs_check|- metadata_low_watermark
-
- transaction id:
- A 64-bit number used by userspace to help synchronise with metadata
- from volume managers.
-
- used data blocks / total data blocks
- If the number of free blocks drops below the pool's low water mark a
- dm event will be sent to userspace. This event is edge-triggered and
- it will occur only once after each resume so volume manager writers
- should register for the event and then check the target's status.
-
- held metadata root:
- The location, in blocks, of the metadata root that has been
- 'held' for userspace read access. '-' indicates there is no
- held root.
-
- discard_passdown|no_discard_passdown
- Whether or not discards are actually being passed down to the
- underlying device. When this is enabled when loading the table,
- it can get disabled if the underlying device doesn't support it.
-
- ro|rw|out_of_data_space
- If the pool encounters certain types of device failures it will
- drop into a read-only metadata mode in which no changes to
- the pool metadata (like allocating new blocks) are permitted.
-
- In serious cases where even a read-only mode is deemed unsafe
- no further I/O will be permitted and the status will just
- contain the string 'Fail'. The userspace recovery tools
- should then be used.
-
- error_if_no_space|queue_if_no_space
- If the pool runs out of data or metadata space, the pool will
- either queue or error the IO destined to the data device. The
- default is to queue the IO until more space is added or the
- 'no_space_timeout' expires. The 'no_space_timeout' dm-thin-pool
- module parameter can be used to change this timeout -- it
- defaults to 60 seconds but may be disabled using a value of 0.
-
- needs_check
- A metadata operation has failed, resulting in the needs_check
- flag being set in the metadata's superblock. The metadata
- device must be deactivated and checked/repaired before the
- thin-pool can be made fully operational again. '-' indicates
- needs_check is not set.
-
- metadata_low_watermark:
- Value of metadata low watermark in blocks. The kernel sets this
- value internally but userspace needs to know this value to
- determine if an event was caused by crossing this threshold.
-
-iii) Messages
-
- create_thin <dev id>
-
- Create a new thinly-provisioned device.
- <dev id> is an arbitrary unique 24-bit identifier chosen by
- the caller.
-
- create_snap <dev id> <origin id>
-
- Create a new snapshot of another thinly-provisioned device.
- <dev id> is an arbitrary unique 24-bit identifier chosen by
- the caller.
- <origin id> is the identifier of the thinly-provisioned device
- of which the new device will be a snapshot.
-
- delete <dev id>
-
- Deletes a thin device. Irreversible.
-
- set_transaction_id <current id> <new id>
-
- Userland volume managers, such as LVM, need a way to
- synchronise their external metadata with the internal metadata of the
- pool target. The thin-pool target offers to store an
- arbitrary 64-bit transaction id and return it on the target's
- status line. To avoid races you must provide what you think
- the current transaction id is when you change it with this
- compare-and-swap message.
-
- reserve_metadata_snap
-
- Reserve a copy of the data mapping btree for use by userland.
- This allows userland to inspect the mappings as they were when
- this message was executed. Use the pool's status command to
- get the root block associated with the metadata snapshot.
-
- release_metadata_snap
-
- Release a previously reserved copy of the data mapping btree.
-
-'thin' target
--------------
-
-i) Constructor
-
- thin <pool dev> <dev id> [<external origin dev>]
-
- pool dev:
- the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
-
- dev id:
- the internal device identifier of the device to be
- activated.
-
- external origin dev:
- an optional block device outside the pool to be treated as a
- read-only snapshot origin: reads to unprovisioned areas of the
- thin target will be mapped to this device.
-
-The pool doesn't store any size against the thin devices. If you
-load a thin target that is smaller than you've been using previously,
-then you'll have no access to blocks mapped beyond the end. If you
-load a target that is bigger than before, then extra blocks will be
-provisioned as and when needed.
-
-ii) Status
-
- <nr mapped sectors> <highest mapped sector>
-
- If the pool has encountered device errors and failed, the status
- will just contain the string 'Fail'. The userspace recovery
- tools should then be used.
-
- In the case where <nr mapped sectors> is 0, there is no highest
- mapped sector and the value of <highest mapped sector> is unspecified.
diff --git a/Documentation/device-mapper/unstriped.txt b/Documentation/device-mapper/unstriped.txt
deleted file mode 100644
index 0b2a306c54ee..000000000000
--- a/Documentation/device-mapper/unstriped.txt
+++ /dev/null
@@ -1,124 +0,0 @@
-Introduction
-============
-
-The device-mapper "unstriped" target provides a transparent mechanism to
-unstripe a device-mapper "striped" target to access the underlying disks
-without having to touch the true backing block-device. It can also be
-used to unstripe a hardware RAID-0 to access backing disks.
-
-Parameters:
-<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
-
-<number of stripes>
- The number of stripes in the RAID 0.
-
-<chunk size>
- The amount of 512B sectors in the chunk striping.
-
-<dev_path>
- The block device you wish to unstripe.
-
-<stripe #>
- The stripe number within the device that corresponds to physical
- drive you wish to unstripe. This must be 0 indexed.
-
-
-Why use this module?
-====================
-
-An example of undoing an existing dm-stripe
--------------------------------------------
-
-This small bash script will setup 4 loop devices and use the existing
-striped target to combine the 4 devices into one. It then will use
-the unstriped target ontop of the striped device to access the
-individual backing loop devices. We write data to the newly exposed
-unstriped devices and verify the data written matches the correct
-underlying device on the striped array.
-
-#!/bin/bash
-
-MEMBER_SIZE=$((128 * 1024 * 1024))
-NUM=4
-SEQ_END=$((${NUM}-1))
-CHUNK=256
-BS=4096
-
-RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
-DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
-COUNT=$((${MEMBER_SIZE} / ${BS}))
-
-for i in $(seq 0 ${SEQ_END}); do
- dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
- losetup /dev/loop${i} member-${i}
- DM_PARMS+=" /dev/loop${i} 0"
-done
-
-echo $DM_PARMS | dmsetup create raid0
-for i in $(seq 0 ${SEQ_END}); do
- echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
-done;
-
-for i in $(seq 0 ${SEQ_END}); do
- dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
- diff /dev/mapper/set-${i} member-${i}
-done;
-
-for i in $(seq 0 ${SEQ_END}); do
- dmsetup remove set-${i}
-done
-
-dmsetup remove raid0
-
-for i in $(seq 0 ${SEQ_END}); do
- losetup -d /dev/loop${i}
- rm -f member-${i}
-done
-
-Another example
----------------
-
-Intel NVMe drives contain two cores on the physical device.
-Each core of the drive has segregated access to its LBA range.
-The current LBA model has a RAID 0 128k chunk on each core, resulting
-in a 256k stripe across the two cores:
-
- Core 0: Core 1:
- __________ __________
- | LBA 512| | LBA 768|
- | LBA 0 | | LBA 256|
- ---------- ----------
-
-The purpose of this unstriping is to provide better QoS in noisy
-neighbor environments. When two partitions are created on the
-aggregate drive without this unstriping, reads on one partition
-can affect writes on another partition. This is because the partitions
-are striped across the two cores. When we unstripe this hardware RAID 0
-and make partitions on each new exposed device the two partitions are now
-physically separated.
-
-With the dm-unstriped target we're able to segregate an fio script that
-has read and write jobs that are independent of each other. Compared to
-when we run the test on a combined drive with partitions, we were able
-to get a 92% reduction in read latency using this device mapper target.
-
-
-Example dmsetup usage
-=====================
-
-unstriped ontop of Intel NVMe device that has 2 cores
------------------------------------------------------
-dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
-dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
-
-There will now be two devices that expose Intel NVMe core 0 and 1
-respectively:
-/dev/mapper/nvmset0
-/dev/mapper/nvmset1
-
-unstriped ontop of striped with 4 drives using 128K chunk size
---------------------------------------------------------------
-dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
-dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
-dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
-dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
deleted file mode 100644
index b3d2e4a42255..000000000000
--- a/Documentation/device-mapper/verity.txt
+++ /dev/null
@@ -1,219 +0,0 @@
-dm-verity
-==========
-
-Device-Mapper's "verity" target provides transparent integrity checking of
-block devices using a cryptographic digest provided by the kernel crypto API.
-This target is read-only.
-
-Construction Parameters
-=======================
- <version> <dev> <hash_dev>
- <data_block_size> <hash_block_size>
- <num_data_blocks> <hash_start_block>
- <algorithm> <digest> <salt>
- [<#opt_params> <opt_params>]
-
-<version>
- This is the type of the on-disk hash format.
-
- 0 is the original format used in the Chromium OS.
- The salt is appended when hashing, digests are stored continuously and
- the rest of the block is padded with zeroes.
-
- 1 is the current format that should be used for new devices.
- The salt is prepended when hashing and each digest is
- padded with zeroes to the power of two.
-
-<dev>
- This is the device containing data, the integrity of which needs to be
- checked. It may be specified as a path, like /dev/sdaX, or a device number,
- <major>:<minor>.
-
-<hash_dev>
- This is the device that supplies the hash tree data. It may be
- specified similarly to the device path and may be the same device. If the
- same device is used, the hash_start should be outside the configured
- dm-verity device.
-
-<data_block_size>
- The block size on a data device in bytes.
- Each block corresponds to one digest on the hash device.
-
-<hash_block_size>
- The size of a hash block in bytes.
-
-<num_data_blocks>
- The number of data blocks on the data device. Additional blocks are
- inaccessible. You can place hashes to the same partition as data, in this
- case hashes are placed after <num_data_blocks>.
-
-<hash_start_block>
- This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
- to the root block of the hash tree.
-
-<algorithm>
- The cryptographic hash algorithm used for this device. This should
- be the name of the algorithm, like "sha1".
-
-<digest>
- The hexadecimal encoding of the cryptographic hash of the root hash block
- and the salt. This hash should be trusted as there is no other authenticity
- beyond this point.
-
-<salt>
- The hexadecimal encoding of the salt value.
-
-<#opt_params>
- Number of optional parameters. If there are no optional parameters,
- the optional paramaters section can be skipped or #opt_params can be zero.
- Otherwise #opt_params is the number of following arguments.
-
- Example of optional parameters section:
- 1 ignore_corruption
-
-ignore_corruption
- Log corrupted blocks, but allow read operations to proceed normally.
-
-restart_on_corruption
- Restart the system when a corrupted block is discovered. This option is
- not compatible with ignore_corruption and requires user space support to
- avoid restart loops.
-
-ignore_zero_blocks
- Do not verify blocks that are expected to contain zeroes and always return
- zeroes instead. This may be useful if the partition contains unused blocks
- that are not guaranteed to contain zeroes.
-
-use_fec_from_device <fec_dev>
- Use forward error correction (FEC) to recover from corruption if hash
- verification fails. Use encoding data from the specified device. This
- may be the same device where data and hash blocks reside, in which case
- fec_start must be outside data and hash areas.
-
- If the encoding data covers additional metadata, it must be accessible
- on the hash device after the hash blocks.
-
- Note: block sizes for data and hash devices must match. Also, if the
- verity <dev> is encrypted the <fec_dev> should be too.
-
-fec_roots <num>
- Number of generator roots. This equals to the number of parity bytes in
- the encoding data. For example, in RS(M, N) encoding, the number of roots
- is M-N.
-
-fec_blocks <num>
- The number of encoding data blocks on the FEC device. The block size for
- the FEC device is <data_block_size>.
-
-fec_start <offset>
- This is the offset, in <data_block_size> blocks, from the start of the
- FEC device to the beginning of the encoding data.
-
-check_at_most_once
- Verify data blocks only the first time they are read from the data device,
- rather than every time. This reduces the overhead of dm-verity so that it
- can be used on systems that are memory and/or CPU constrained. However, it
- provides a reduced level of security because only offline tampering of the
- data device's content will be detected, not online tampering.
-
- Hash blocks are still verified each time they are read from the hash device,
- since verification of hash blocks is less performance critical than data
- blocks, and a hash block will not be verified any more after all the data
- blocks it covers have been verified anyway.
-
-Theory of operation
-===================
-
-dm-verity is meant to be set up as part of a verified boot path. This
-may be anything ranging from a boot using tboot or trustedgrub to just
-booting from a known-good device (like a USB drive or CD).
-
-When a dm-verity device is configured, it is expected that the caller
-has been authenticated in some way (cryptographic signatures, etc).
-After instantiation, all hashes will be verified on-demand during
-disk access. If they cannot be verified up to the root node of the
-tree, the root hash, then the I/O will fail. This should detect
-tampering with any data on the device and the hash data.
-
-Cryptographic hashes are used to assert the integrity of the device on a
-per-block basis. This allows for a lightweight hash computation on first read
-into the page cache. Block hashes are stored linearly, aligned to the nearest
-block size.
-
-If forward error correction (FEC) support is enabled any recovery of
-corrupted data will be verified using the cryptographic hash of the
-corresponding data. This is why combining error correction with
-integrity checking is essential.
-
-Hash Tree
----------
-
-Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
-of some data block on disk is calculated. If it is an intermediary node,
-the hash of a number of child nodes is calculated.
-
-Each entry in the tree is a collection of neighboring nodes that fit in one
-block. The number is determined based on block_size and the size of the
-selected cryptographic digest algorithm. The hashes are linearly-ordered in
-this entry and any unaligned trailing space is ignored but included when
-calculating the parent node.
-
-The tree looks something like:
-
-alg = sha256, num_blocks = 32768, block_size = 4096
-
- [ root ]
- / . . . \
- [entry_0] [entry_1]
- / . . . \ . . . \
- [entry_0_0] . . . [entry_0_127] . . . . [entry_1_127]
- / ... \ / . . . \ / \
- blk_0 ... blk_127 blk_16256 blk_16383 blk_32640 . . . blk_32767
-
-
-On-disk format
-==============
-
-The verity kernel code does not read the verity metadata on-disk header.
-It only reads the hash blocks which directly follow the header.
-It is expected that a user-space tool will verify the integrity of the
-verity header.
-
-Alternatively, the header can be omitted and the dmsetup parameters can
-be passed via the kernel command-line in a rooted chain of trust where
-the command-line is verified.
-
-Directly following the header (and with sector number padded to the next hash
-block boundary) are the hash blocks which are stored a depth at a time
-(starting from the root), sorted in order of increasing index.
-
-The full specification of kernel parameters and on-disk metadata format
-is available at the cryptsetup project's wiki page
- https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
-
-Status
-======
-V (for Valid) is returned if every check performed so far was valid.
-If any check failed, C (for Corruption) is returned.
-
-Example
-=======
-Set up a device:
- # dmsetup create vroot --readonly --table \
- "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
- "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
- "1234000000000000000000000000000000000000000000000000000000000000"
-
-A command line tool veritysetup is available to compute or verify
-the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
-(as a libcryptsetup extension).
-
-Create hash on the device:
- # veritysetup format /dev/sda1 /dev/sda2
- ...
- Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
-
-Activate the device:
- # veritysetup create vroot /dev/sda1 /dev/sda2 \
- 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/device-mapper/writecache.txt b/Documentation/device-mapper/writecache.txt
deleted file mode 100644
index 01532b3008ae..000000000000
--- a/Documentation/device-mapper/writecache.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-The writecache target caches writes on persistent memory or on SSD. It
-doesn't cache reads because reads are supposed to be cached in page cache
-in normal RAM.
-
-When the device is constructed, the first sector should be zeroed or the
-first sector should contain valid superblock from previous invocation.
-
-Constructor parameters:
-1. type of the cache device - "p" or "s"
- p - persistent memory
- s - SSD
-2. the underlying device that will be cached
-3. the cache device
-4. block size (4096 is recommended; the maximum block size is the page
- size)
-5. the number of optional parameters (the parameters with an argument
- count as two)
- start_sector n (default: 0)
- offset from the start of cache device in 512-byte sectors
- high_watermark n (default: 50)
- start writeback when the number of used blocks reach this
- watermark
- low_watermark x (default: 45)
- stop writeback when the number of used blocks drops below
- this watermark
- writeback_jobs n (default: unlimited)
- limit the number of blocks that are in flight during
- writeback. Setting this value reduces writeback
- throughput, but it may improve latency of read requests
- autocommit_blocks n (default: 64 for pmem, 65536 for ssd)
- when the application writes this amount of blocks without
- issuing the FLUSH request, the blocks are automatically
- commited
- autocommit_time ms (default: 1000)
- autocommit time in milliseconds. The data is automatically
- commited if this time passes and no FLUSH request is
- received
- fua (by default on)
- applicable only to persistent memory - use the FUA flag
- when writing data from persistent memory back to the
- underlying device
- nofua
- applicable only to persistent memory - don't use the FUA
- flag when writing back data and send the FLUSH request
- afterwards
- - some underlying devices perform better with fua, some
- with nofua. The user should test it
-
-Status:
-1. error indicator - 0 if there was no error, otherwise error number
-2. the number of blocks
-3. the number of free blocks
-4. the number of blocks under writeback
-
-Messages:
- flush
- flush the cache device. The message returns successfully
- if the cache device was flushed without an error
- flush_on_suspend
- flush the cache device on next suspend. Use this message
- when you are going to remove the cache device. The proper
- sequence for removing the cache device is:
- 1. send the "flush_on_suspend" message
- 2. load an inactive table with a linear target that maps
- to the underlying device
- 3. suspend the device
- 4. ask for status and verify that there are no errors
- 5. resume the device, so that it will use the linear
- target
- 6. the cache device is now inactive and it can be deleted
diff --git a/Documentation/device-mapper/zero.txt b/Documentation/device-mapper/zero.txt
deleted file mode 100644
index 20fb38e7fa7e..000000000000
--- a/Documentation/device-mapper/zero.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-dm-zero
-=======
-
-Device-Mapper's "zero" target provides a block-device that always returns
-zero'd data on reads and silently drops writes. This is similar behavior to
-/dev/zero, but as a block-device instead of a character-device.
-
-Dm-zero has no target-specific parameters.
-
-One very interesting use of dm-zero is for creating "sparse" devices in
-conjunction with dm-snapshot. A sparse device reports a device-size larger
-than the amount of actual storage space available for that device. A user can
-write data anywhere within the sparse device and read it back like a normal
-device. Reads to previously unwritten areas will return a zero'd buffer. When
-enough data has been written to fill up the actual storage space, the sparse
-device is deactivated. This can be very useful for testing device and
-filesystem limitations.
-
-To create a sparse device, start by creating a dm-zero device that's the
-desired size of the sparse device. For this example, we'll assume a 10TB
-sparse device.
-
-TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2` # 10 TB in sectors
-echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
-
-Then create a snapshot of the zero device, using any available block-device as
-the COW device. The size of the COW device will determine the amount of real
-space available to the sparse device. For this example, we'll assume /dev/sdb1
-is an available 10GB partition.
-
-echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
- dmsetup create sparse1
-
-This will create a 10TB sparse device called /dev/mapper/sparse1 that has
-10GB of actual storage space available. If more than 10GB of data is written
-to this device, it will start returning I/O errors.
-
diff --git a/Documentation/devicetree/bindings/.gitignore b/Documentation/devicetree/bindings/.gitignore
new file mode 100644
index 000000000000..51ddb26d93f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/.gitignore
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.example.dts
+/processed-schema*.yaml
+/processed-schema*.json
+
+#
+# We don't want to ignore the following even if they are dot-files
+#
+!.yamllint
diff --git a/Documentation/devicetree/bindings/.yamllint b/Documentation/devicetree/bindings/.yamllint
new file mode 100644
index 000000000000..532799501800
--- /dev/null
+++ b/Documentation/devicetree/bindings/.yamllint
@@ -0,0 +1,44 @@
+extends: relaxed
+
+rules:
+ quoted-strings:
+ required: only-when-needed
+ extra-allowed:
+ - '[$^[]'
+ - '^/$'
+ line-length:
+ # 80 chars should be enough, but don't fail if a line is longer
+ max: 110
+ allow-non-breakable-words: true
+ level: warning
+ braces:
+ min-spaces-inside: 0
+ max-spaces-inside: 1
+ min-spaces-inside-empty: 0
+ max-spaces-inside-empty: 0
+ brackets:
+ min-spaces-inside: 0
+ max-spaces-inside: 1
+ min-spaces-inside-empty: 0
+ max-spaces-inside-empty: 0
+ colons: {max-spaces-before: 0, max-spaces-after: 1}
+ commas: {min-spaces-after: 1, max-spaces-after: 1}
+ comments:
+ require-starting-space: true
+ min-spaces-from-content: 1
+ comments-indentation: disable
+ document-start:
+ present: true
+ empty-lines:
+ max: 3
+ max-end: 1
+ empty-values:
+ forbid-in-block-mappings: true
+ forbid-in-flow-mappings: true
+ hyphens:
+ max-spaces-after: 1
+ indentation:
+ spaces: 2
+ indent-sequences: true
+ check-multi-line-strings: false
+ trailing-spaces: false
diff --git a/Documentation/devicetree/bindings/ABI.rst b/Documentation/devicetree/bindings/ABI.rst
new file mode 100644
index 000000000000..a885713cf184
--- /dev/null
+++ b/Documentation/devicetree/bindings/ABI.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+Devicetree (DT) ABI
+===================
+
+I. Regarding stable bindings/ABI, we quote from the 2013 ARM mini-summit
+ summary document:
+
+ "That still leaves the question of, what does a stable binding look
+ like? Certainly a stable binding means that a newer kernel will not
+ break on an older device tree, but that doesn't mean the binding is
+ frozen for all time. Grant said there are ways to change bindings that
+ don't result in breakage. For instance, if a new property is added,
+ then default to the previous behaviour if it is missing. If a binding
+ truly needs an incompatible change, then change the compatible string
+ at the same time. The driver can bind against both the old and the
+ new. These guidelines aren't new, but they desperately need to be
+ documented."
+
+II. General binding rules
+
+ 1) Maintainers, don't let perfect be the enemy of good. Don't hold up a
+ binding because it isn't perfect.
+
+ 2) Use specific compatible strings so that if we need to add a feature (DMA)
+ in the future, we can create a new compatible string. See I.
+
+ 3) Bindings can be augmented, but the driver shouldn't break when given
+ the old binding. ie. add additional properties, but don't change the
+ meaning of an existing property. For drivers, default to the original
+ behaviour when a newly added property is missing.
+
+ 4) Don't submit bindings for staging or unstable. That will be decided by
+ the devicetree maintainers *after* discussion on the mailinglist.
+
+III. Notes
+
+ 1) This document is intended as a general familiarization with the process as
+ decided at the 2013 Kernel Summit. When in doubt, the current word of the
+ devicetree maintainers overrules this document. In that situation, a patch
+ updating this document would be appreciated.
diff --git a/Documentation/devicetree/bindings/ABI.txt b/Documentation/devicetree/bindings/ABI.txt
deleted file mode 100644
index d25f8d379680..000000000000
--- a/Documentation/devicetree/bindings/ABI.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-
- Devicetree (DT) ABI
-
-I. Regarding stable bindings/ABI, we quote from the 2013 ARM mini-summit
- summary document:
-
- "That still leaves the question of, what does a stable binding look
- like? Certainly a stable binding means that a newer kernel will not
- break on an older device tree, but that doesn't mean the binding is
- frozen for all time. Grant said there are ways to change bindings that
- don't result in breakage. For instance, if a new property is added,
- then default to the previous behaviour if it is missing. If a binding
- truly needs an incompatible change, then change the compatible string
- at the same time. The driver can bind against both the old and the
- new. These guidelines aren't new, but they desperately need to be
- documented."
-
-II. General binding rules
-
- 1) Maintainers, don't let perfect be the enemy of good. Don't hold up a
- binding because it isn't perfect.
-
- 2) Use specific compatible strings so that if we need to add a feature (DMA)
- in the future, we can create a new compatible string. See I.
-
- 3) Bindings can be augmented, but the driver shouldn't break when given
- the old binding. ie. add additional properties, but don't change the
- meaning of an existing property. For drivers, default to the original
- behaviour when a newly added property is missing.
-
- 4) Don't submit bindings for staging or unstable. That will be decided by
- the devicetree maintainers *after* discussion on the mailinglist.
-
-III. Notes
-
- 1) This document is intended as a general familiarization with the process as
- decided at the 2013 Kernel Summit. When in doubt, the current word of the
- devicetree maintainers overrules this document. In that situation, a patch
- updating this document would be appreciated.
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
new file mode 100644
index 000000000000..8390d6c00030
--- /dev/null
+++ b/Documentation/devicetree/bindings/Makefile
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+DT_DOC_CHECKER ?= dt-doc-validate
+DT_EXTRACT_EX ?= dt-extract-example
+DT_MK_SCHEMA ?= dt-mk-schema
+
+DT_SCHEMA_LINT = $(shell which yamllint || \
+ echo "warning: python package 'yamllint' not installed, skipping" >&2)
+
+DT_SCHEMA_MIN_VERSION = 2023.9
+
+PHONY += check_dtschema_version
+check_dtschema_version:
+ @which $(DT_DOC_CHECKER) >/dev/null || \
+ { echo "Error: '$(DT_DOC_CHECKER)' not found!" >&2; \
+ echo "Ensure dtschema python package is installed and in your PATH." >&2; \
+ echo "Current PATH is:" >&2; \
+ echo "$$PATH" >&2; false; }
+ @{ echo $(DT_SCHEMA_MIN_VERSION); \
+ $(DT_DOC_CHECKER) --version 2>/dev/null || echo 0; } | sort -Vc >/dev/null || \
+ { echo "ERROR: dtschema minimum version is v$(DT_SCHEMA_MIN_VERSION)" >&2; false; }
+
+quiet_cmd_extract_ex = DTEX $@
+ cmd_extract_ex = $(DT_EXTRACT_EX) $< > $@
+
+$(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
+ $(call if_changed,extract_ex)
+
+find_all_cmd = find $(src) \( -name '*.yaml' ! \
+ -name 'processed-schema*' \)
+
+find_cmd = $(find_all_cmd) | \
+ sed 's|^$(srctree)/||' | \
+ grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | \
+ sed 's|^|$(srctree)/|'
+CHK_DT_EXAMPLES := $(patsubst $(srctree)/%.yaml,%.example.dtb, $(shell $(find_cmd)))
+
+quiet_cmd_yamllint = LINT $(src)
+ cmd_yamllint = ($(find_cmd) | \
+ xargs -n200 -P$$(nproc) \
+ $(DT_SCHEMA_LINT) -f parsable -c $(src)/.yamllint >&2) \
+ && touch $@ || true
+
+quiet_cmd_chk_bindings = CHKDT $(src)
+ cmd_chk_bindings = ($(find_cmd) | \
+ xargs -n200 -P$$(nproc) $(DT_DOC_CHECKER) -u $(src)) \
+ && touch $@ || true
+
+quiet_cmd_mk_schema = SCHEMA $@
+ cmd_mk_schema = f=$$(mktemp) ; \
+ $(find_all_cmd) > $$f ; \
+ $(DT_MK_SCHEMA) -j $(DT_MK_SCHEMA_FLAGS) @$$f > $@ ; \
+ rm -f $$f
+
+DT_DOCS = $(patsubst $(srctree)/%,%,$(shell $(find_all_cmd)))
+
+override DTC_FLAGS := \
+ -Wno-avoid_unnecessary_addr_size \
+ -Wno-graph_child_address \
+ -Wno-unique_unit_address \
+ -Wunique_unit_address_if_enabled
+
+$(obj)/processed-schema.json: $(DT_DOCS) check_dtschema_version FORCE
+ $(call if_changed,mk_schema)
+
+targets += .dt-binding.checked .yamllint.checked
+$(obj)/.yamllint.checked: $(DT_DOCS) $(src)/.yamllint FORCE
+ $(if $(DT_SCHEMA_LINT),$(call if_changed,yamllint),)
+
+$(obj)/.dt-binding.checked: $(DT_DOCS) FORCE
+ $(call if_changed,chk_bindings)
+
+always-y += processed-schema.json
+targets += $(patsubst $(obj)/%,%, $(CHK_DT_EXAMPLES))
+targets += $(patsubst $(obj)/%.dtb,%.dts, $(CHK_DT_EXAMPLES))
+
+# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
+# build artifacts here before they are processed by scripts/Makefile.clean
+clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
+ -name '*.example.dtb' \) -delete 2>/dev/null)
+
+dt_compatible_check: $(obj)/processed-schema.json
+ $(Q)$(srctree)/scripts/dtc/dt-extract-compatibles $(srctree) | xargs dt-check-compatible -v -s $<
+
+PHONY += dt_binding_check
+dt_binding_check: $(obj)/.dt-binding.checked $(obj)/.yamllint.checked $(CHK_DT_EXAMPLES)
diff --git a/Documentation/devicetree/bindings/access-controllers/access-controllers.yaml b/Documentation/devicetree/bindings/access-controllers/access-controllers.yaml
new file mode 100644
index 000000000000..99e2865f0e46
--- /dev/null
+++ b/Documentation/devicetree/bindings/access-controllers/access-controllers.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/access-controllers/access-controllers.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic Domain Access Controllers
+
+maintainers:
+ - Oleksii Moisieiev <oleksii_moisieiev@epam.com>
+
+description: |+
+ Common access controllers properties
+
+ Access controllers are in charge of stating which of the hardware blocks under
+ their responsibility (their domain) can be accesssed by which compartment. A
+ compartment can be a cluster of CPUs (or coprocessors), a range of addresses
+ or a group of hardware blocks. An access controller's domain is the set of
+ resources covered by the access controller.
+
+ This device tree binding can be used to bind devices to their access
+ controller provided by access-controllers property. In this case, the device
+ is a consumer and the access controller is the provider.
+
+ An access controller can be represented by any node in the device tree and
+ can provide one or more configuration parameters, needed to control parameters
+ of the consumer device. A consumer node can refer to the provider by phandle
+ and a set of phandle arguments, specified by '#access-controller-cells'
+ property in the access controller node.
+
+ Access controllers are typically used to set/read the permissions of a
+ hardware block and grant access to it. Any of which depends on the access
+ controller. The capabilities of each access controller are defined by the
+ binding of the access controller device.
+
+ Each node can be a consumer for the several access controllers.
+
+# always select the core schema
+select: true
+
+properties:
+ "#access-controller-cells":
+ description:
+ Number of cells in an access-controllers specifier;
+ Can be any value as specified by device tree binding documentation
+ of a particular provider. The node is an access controller.
+
+ access-controller-names:
+ $ref: /schemas/types.yaml#/definitions/string-array
+ description:
+ A list of access-controllers names, sorted in the same order as
+ access-controllers entries. Consumer drivers will use
+ access-controller-names to match with existing access-controllers entries.
+
+ access-controllers:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ A list of access controller specifiers, as defined by the
+ bindings of the access-controllers provider.
+
+additionalProperties: true
+
+examples:
+ - |
+ clock_controller: access-controllers@50000 {
+ reg = <0x50000 0x400>;
+ #access-controller-cells = <2>;
+ };
+
+ bus_controller: bus@60000 {
+ reg = <0x60000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ #access-controller-cells = <3>;
+
+ uart4: serial@60100 {
+ reg = <0x60100 0x400>;
+ clocks = <&clk_serial>;
+ access-controllers = <&clock_controller 1 2>,
+ <&bus_controller 1 3 5>;
+ access-controller-names = "clock", "bus";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arc/archs-pct.txt b/Documentation/devicetree/bindings/arc/archs-pct.txt
deleted file mode 100644
index e4b9dcee6d41..000000000000
--- a/Documentation/devicetree/bindings/arc/archs-pct.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* ARC HS Performance Counters
-
-The ARC HS can be configured with a pipeline performance monitor for counting
-CPU and cache events like cache misses and hits. Like conventional PCT there
-are 100+ hardware conditions dynamically mapped to up to 32 counters.
-It also supports overflow interrupts.
-
-Required properties:
-
-- compatible : should contain
- "snps,archs-pct"
-
-Example:
-
-pmu {
- compatible = "snps,archs-pct";
-};
diff --git a/Documentation/devicetree/bindings/arc/snps,archs-pct.yaml b/Documentation/devicetree/bindings/arc/snps,archs-pct.yaml
new file mode 100644
index 000000000000..532f7584f59f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/snps,archs-pct.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arc/snps,archs-pct.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARC HS Performance Counters
+
+maintainers:
+ - Aryabhatta Dey <aryabhattadey35@gmail.com>
+
+description:
+ The ARC HS can be configured with a pipeline performance monitor for counting
+ CPU and cache events like cache misses and hits. Like conventional PCT there
+ are 100+ hardware conditions dynamically mapped to up to 32 counters.
+ It also supports overflow interrupts.
+
+properties:
+ compatible:
+ const: snps,archs-pct
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/actions.txt b/Documentation/devicetree/bindings/arm/actions.txt
deleted file mode 100644
index d54f33c4e0da..000000000000
--- a/Documentation/devicetree/bindings/arm/actions.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Actions Semi platforms device tree bindings
--------------------------------------------
-
-
-S500 SoC
-========
-
-Required root node properties:
-
- - compatible : must contain "actions,s500"
-
-
-Modules:
-
-Root node property compatible must contain, depending on module:
-
- - LeMaker Guitar: "lemaker,guitar"
-
-
-Boards:
-
-Root node property compatible must contain, depending on board:
-
- - Allo.com Sparky: "allo,sparky"
- - Cubietech CubieBoard6: "cubietech,cubieboard6"
- - LeMaker Guitar Base Board rev. B: "lemaker,guitar-bb-rev-b", "lemaker,guitar"
-
-
-S700 SoC
-========
-
-Required root node properties:
-
-- compatible : must contain "actions,s700"
-
-
-Boards:
-
-Root node property compatible must contain, depending on board:
-
- - Cubietech CubieBoard7: "cubietech,cubieboard7"
-
-
-S900 SoC
-========
-
-Required root node properties:
-
-- compatible : must contain "actions,s900"
-
-
-Boards:
-
-Root node property compatible must contain, depending on board:
-
- - uCRobotics Bubblegum-96: "ucrobotics,bubblegum-96"
diff --git a/Documentation/devicetree/bindings/arm/actions.yaml b/Documentation/devicetree/bindings/arm/actions.yaml
new file mode 100644
index 000000000000..e012f612f039
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/actions.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/actions.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi platforms
+
+maintainers:
+ - Andreas Färber <afaerber@suse.de>
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ # The Actions Semi S500 is a quad-core ARM Cortex-A9 SoC.
+ - items:
+ - enum:
+ - allo,sparky # Allo.com Sparky
+ - cubietech,cubieboard6 # Cubietech CubieBoard6
+ - roseapplepi,roseapplepi # RoseapplePi.org RoseapplePi
+ - const: actions,s500
+ - items:
+ - enum:
+ - caninos,labrador-base-m # Labrador Base Board M v1
+ - const: caninos,labrador-v2 # Labrador Core v2
+ - const: actions,s500
+ - items:
+ - enum:
+ - lemaker,guitar-bb-rev-b # LeMaker Guitar Base Board rev. B
+ - const: lemaker,guitar
+ - const: actions,s500
+
+ # The Actions Semi S700 is a quad-core ARM Cortex-A53 SoC.
+ - items:
+ - enum:
+ - caninos,labrador-base-m2 # Labrador Base Board M v2
+ - const: caninos,labrador-v3 # Labrador Core v3
+ - const: actions,s700
+ - items:
+ - enum:
+ - cubietech,cubieboard7 # Cubietech CubieBoard7
+ - const: actions,s700
+
+ # The Actions Semi S900 is a quad-core ARM Cortex-A53 SoC.
+ - items:
+ - enum:
+ - ucrobotics,bubblegum-96 # uCRobotics Bubblegum-96
+ - const: actions,s900
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/airoha,en7581-chip-scu.yaml b/Documentation/devicetree/bindings/arm/airoha,en7581-chip-scu.yaml
new file mode 100644
index 000000000000..67c449d804c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/airoha,en7581-chip-scu.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/airoha,en7581-chip-scu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha Chip SCU Controller for EN7581 SoC
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+
+description:
+ The airoha chip-scu block provides a configuration interface for clock,
+ io-muxing and other functionalities used by multiple controllers (e.g. clock,
+ pinctrl, ecc) on EN7581 SoC.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - airoha,en7581-chip-scu
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ syscon@1fa20000 {
+ compatible = "airoha,en7581-chip-scu", "syscon";
+ reg = <0x0 0x1fa20000 0x0 0x388>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/airoha.yaml b/Documentation/devicetree/bindings/arm/airoha.yaml
new file mode 100644
index 000000000000..7c38c08dbf3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/airoha.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/airoha.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha SoC based Platforms
+
+maintainers:
+ - Felix Fietkau <nbd@nbd.name>
+ - John Crispin <john@phrozen.org>
+
+description:
+ Boards with an Airoha SoC shall have the following properties.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - airoha,en7523-evb
+ - const: airoha,en7523
+ - items:
+ - enum:
+ - airoha,en7581-evb
+ - const: airoha,en7581
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/al,alpine.txt b/Documentation/devicetree/bindings/arm/al,alpine.txt
deleted file mode 100644
index d00debe2e86f..000000000000
--- a/Documentation/devicetree/bindings/arm/al,alpine.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Annapurna Labs Alpine Platform Device Tree Bindings
----------------------------------------------------------------
-
-Boards in the Alpine family shall have the following properties:
-
-* Required root node properties:
-compatible: must contain "al,alpine"
-
-* Example:
-
-/ {
- model = "Annapurna Labs Alpine Dev Board";
- compatible = "al,alpine";
-
- ...
-}
diff --git a/Documentation/devicetree/bindings/arm/altera.txt b/Documentation/devicetree/bindings/arm/altera.txt
deleted file mode 100644
index 558735aacca8..000000000000
--- a/Documentation/devicetree/bindings/arm/altera.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Altera's SoCFPGA platform device tree bindings
----------------------------------------------
-
-Boards with Cyclone 5 SoC:
-Required root node properties:
-compatible = "altr,socfpga-cyclone5", "altr,socfpga";
-
-Boards with Arria 5 SoC:
-Required root node properties:
-compatible = "altr,socfpga-arria5", "altr,socfpga";
-
-Boards with Arria 10 SoC:
-Required root node properties:
-compatible = "altr,socfpga-arria10", "altr,socfpga";
diff --git a/Documentation/devicetree/bindings/arm/altera.yaml b/Documentation/devicetree/bindings/arm/altera.yaml
new file mode 100644
index 000000000000..30c44a0e6407
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/altera.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Altera's SoCFPGA platform
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - description: Arria 5 boards
+ items:
+ - enum:
+ - altr,socfpga-arria5-socdk
+ - const: altr,socfpga-arria5
+ - const: altr,socfpga
+
+ - description: Arria 10 boards
+ items:
+ - enum:
+ - altr,socfpga-arria10-socdk
+ - const: altr,socfpga-arria10
+ - const: altr,socfpga
+
+ - description: Mercury+ AA1 boards
+ items:
+ - enum:
+ - enclustra,mercury-pe1
+ - google,chameleon-v3
+ - const: enclustra,mercury-aa1
+ - const: altr,socfpga-arria10
+ - const: altr,socfpga
+
+ - description: Cyclone 5 boards
+ items:
+ - enum:
+ - altr,socfpga-cyclone5-socdk
+ - denx,mcvevk
+ - ebv,socrates
+ - macnica,sodia
+ - novtech,chameleon96
+ - samtec,vining
+ - terasic,de0-atlas
+ - terasic,de10-nano
+ - terasic,socfpga-cyclone5-sockit
+ - const: altr,socfpga-cyclone5
+ - const: altr,socfpga
+
+ - description: Stratix 10 boards
+ items:
+ - enum:
+ - altr,socfpga-stratix10-socdk
+ - altr,socfpga-stratix10-swvp
+ - const: altr,socfpga-stratix10
+
+ - description: SoCFPGA VT
+ items:
+ - const: altr,socfpga-vt
+ - const: altr,socfpga
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt
deleted file mode 100644
index 2c28f1d12f45..000000000000
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Altera SOCFPGA Clock Manager
-
-Required properties:
-- compatible : "altr,clk-mgr"
-- reg : Should contain base address and length for Clock Manager
-
-Example:
- clkmgr@ffd04000 {
- compatible = "altr,clk-mgr";
- reg = <0xffd04000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml
new file mode 100644
index 000000000000..a758f4bb2bb3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/altera/socfpga-clk-manager.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Altera SOCFPGA Clock Manager
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+description:
+ This binding describes the Altera SOCFGPA Clock Manager and its associated
+ tree of clocks, pll's, and clock gates for the Cyclone5, Arria5 and Arria10
+ chip families.
+
+properties:
+ compatible:
+ items:
+ - const: altr,clk-mgr
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ type: object
+ additionalProperties: false
+
+ properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ patternProperties:
+ "^osc[0-9]$":
+ type: object
+
+ "^[a-z0-9,_]+(clk|pll|clk_gate|clk_divided)(@[a-f0-9]+)?$":
+ type: object
+ $ref: '#/$defs/clock-props'
+ unevaluatedProperties: false
+
+ properties:
+ compatible:
+ enum:
+ - altr,socfpga-pll-clock
+ - altr,socfpga-perip-clk
+ - altr,socfpga-gate-clk
+ - altr,socfpga-a10-pll-clock
+ - altr,socfpga-a10-perip-clk
+ - altr,socfpga-a10-gate-clk
+ - fixed-clock
+
+ clocks:
+ description: one or more phandles to input clock
+ minItems: 1
+ maxItems: 5
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ patternProperties:
+ "^[a-z0-9,_]+(clk|pll)(@[a-f0-9]+)?$":
+ type: object
+ $ref: '#/$defs/clock-props'
+ unevaluatedProperties: false
+
+ properties:
+ compatible:
+ enum:
+ - altr,socfpga-perip-clk
+ - altr,socfpga-gate-clk
+ - altr,socfpga-a10-perip-clk
+ - altr,socfpga-a10-gate-clk
+
+ clocks:
+ description: one or more phandles to input clock
+ minItems: 1
+ maxItems: 4
+
+ required:
+ - compatible
+ - clocks
+ - "#clock-cells"
+
+ required:
+ - compatible
+ - "#clock-cells"
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+$defs:
+ clock-props:
+ properties:
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 0
+
+ clk-gate:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: gating register offset
+ - description: bit index
+
+ div-reg:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: divider register offset
+ - description: bit shift
+ - description: bit width
+
+ fixed-divider:
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+examples:
+ - |
+ clkmgr@ffd04000 {
+ compatible = "altr,clk-mgr";
+ reg = <0xffd04000 0x1000>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-controller.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-controller.txt
deleted file mode 100644
index 77ca635765e1..000000000000
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-controller.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Altera SOCFPGA SDRAM Controller
-
-Required properties:
-- compatible : Should contain "altr,sdr-ctl" and "syscon".
- syscon is required by the Altera SOCFPGA SDRAM EDAC.
-- reg : Should contain 1 register range (address and length)
-
-Example:
- sdr: sdr@ffc25000 {
- compatible = "altr,sdr-ctl", "syscon";
- reg = <0xffc25000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
deleted file mode 100644
index f5ad0ff69fae..000000000000
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Altera SOCFPGA SDRAM Error Detection & Correction [EDAC]
-The EDAC accesses a range of registers in the SDRAM controller.
-
-Required properties:
-- compatible : should contain "altr,sdram-edac" or "altr,sdram-edac-a10"
-- altr,sdr-syscon : phandle of the sdr module
-- interrupts : Should contain the SDRAM ECC IRQ in the
- appropriate format for the IRQ controller.
-
-Example:
- sdramedac {
- compatible = "altr,sdram-edac";
- altr,sdr-syscon = <&sdr>;
- interrupts = <0 39 4>;
- };
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
deleted file mode 100644
index f4d04a067282..000000000000
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Altera SOCFPGA System Manager
-
-Required properties:
-- compatible : "altr,sys-mgr"
-- reg : Should contain 1 register ranges(address and length)
-- cpu1-start-addr : CPU1 start address in hex.
-
-Example:
- sysmgr@ffd08000 {
- compatible = "altr,sys-mgr";
- reg = <0xffd08000 0x1000>;
- cpu1-start-addr = <0xffd080c4>;
- };
diff --git a/Documentation/devicetree/bindings/arm/amazon,al.yaml b/Documentation/devicetree/bindings/arm/amazon,al.yaml
new file mode 100644
index 000000000000..37dbb4768e5b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amazon,al.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amazon,al.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amazon's Annapurna Labs Alpine Platform
+
+maintainers:
+ - Hanna Hawa <hhhawa@amazon.com>
+ - Talel Shenhar <talel@amazon.com>, <talelshenhar@gmail.com>
+ - Ronen Krupnik <ronenk@amazon.com>
+
+properties:
+ compatible:
+ oneOf:
+ - description: Boards with Alpine V1 SoC
+ items:
+ - const: al,alpine
+
+ - description: Boards with Alpine V2 SoC
+ items:
+ - enum:
+ - al,alpine-v2-evp
+ - const: al,alpine-v2
+
+ - description: Boards with Alpine V3 SoC
+ items:
+ - enum:
+ - amazon,al-alpine-v3-evp
+ - const: amazon,al-alpine-v3
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/amd,pensando.yaml b/Documentation/devicetree/bindings/arm/amd,pensando.yaml
new file mode 100644
index 000000000000..e5c2591834a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amd,pensando.yaml
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amd,pensando.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMD Pensando SoC Platforms
+
+maintainers:
+ - Brad Larson <blarson@amd.com>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+
+ - description: Boards with Pensando Elba SoC
+ items:
+ - enum:
+ - amd,pensando-elba-ortano
+ - const: amd,pensando-elba
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/amlogic,scpi.txt b/Documentation/devicetree/bindings/arm/amlogic,scpi.txt
deleted file mode 100644
index 7b9a861e9306..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic,scpi.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-System Control and Power Interface (SCPI) Message Protocol
-(in addition to the standard binding in [0])
-----------------------------------------------------------
-Required properties
-
-- compatible : should be "amlogic,meson-gxbb-scpi"
-
-AMLOGIC SRAM and Shared Memory for SCPI
-------------------------------------
-
-Required properties:
-- compatible : should be "amlogic,meson-gxbb-sram"
-
-Each sub-node represents the reserved area for SCPI.
-
-Required sub-node properties:
-- compatible : should be "amlogic,meson-gxbb-scp-shmem" for SRAM based shared
- memory on Amlogic GXBB SoC.
-
-[0] Documentation/devicetree/bindings/arm/arm,scpi.txt
diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
deleted file mode 100644
index 4498292b833d..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-Amlogic MesonX device tree bindings
--------------------------------------------
-
-Work in progress statement:
-
-Device tree files and bindings applying to Amlogic SoCs and boards are
-considered "unstable". Any Amlogic device tree binding may change at
-any time. Be sure to use a device tree binary and a kernel image
-generated from the same source tree.
-
-Please refer to Documentation/devicetree/bindings/ABI.txt for a definition of a
-stable binding/ABI.
-
----------------------------------------------------------------
-
-Boards with the Amlogic Meson6 SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,meson6"
-
-Boards with the Amlogic Meson8 SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,meson8";
-
-Boards with the Amlogic Meson8b SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,meson8b";
-
-Boards with the Amlogic Meson8m2 SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,meson8m2";
-
-Boards with the Amlogic Meson GXBaby SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,meson-gxbb";
-
-Boards with the Amlogic Meson GXL S905X SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,s905x", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S905D SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,s905d", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S805X SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,s805x", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXL S905W SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,s905w", "amlogic,meson-gxl";
-
-Boards with the Amlogic Meson GXM S912 SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,s912", "amlogic,meson-gxm";
-
-Boards with the Amlogic Meson AXG A113D SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,a113d", "amlogic,meson-axg";
-
-Boards with the Amlogic Meson G12A S905D2 SoC shall have the following properties:
- Required root node property:
- compatible: "amlogic,g12a";
-
-Board compatible values (alphabetically, grouped by SoC):
-
- - "geniatech,atv1200" (Meson6)
-
- - "minix,neo-x8" (Meson8)
-
- - "endless,ec100" (Meson8b)
- - "hardkernel,odroid-c1" (Meson8b)
- - "tronfy,mxq" (Meson8b)
-
- - "tronsmart,mxiii-plus" (Meson8m2)
-
- - "amlogic,p200" (Meson gxbb)
- - "amlogic,p201" (Meson gxbb)
- - "friendlyarm,nanopi-k2" (Meson gxbb)
- - "hardkernel,odroid-c2" (Meson gxbb)
- - "nexbox,a95x" (Meson gxbb or Meson gxl s905x)
- - "tronsmart,vega-s95-pro", "tronsmart,vega-s95" (Meson gxbb)
- - "tronsmart,vega-s95-meta", "tronsmart,vega-s95" (Meson gxbb)
- - "tronsmart,vega-s95-telos", "tronsmart,vega-s95" (Meson gxbb)
- - "wetek,hub" (Meson gxbb)
- - "wetek,play2" (Meson gxbb)
-
- - "amlogic,p212" (Meson gxl s905x)
- - "hwacom,amazetv" (Meson gxl s905x)
- - "khadas,vim" (Meson gxl s905x)
- - "libretech,cc" (Meson gxl s905x)
-
- - "amlogic,p230" (Meson gxl s905d)
- - "amlogic,p231" (Meson gxl s905d)
-
- - "amlogic,p241" (Meson gxl s805x)
-
- - "amlogic,p281" (Meson gxl s905w)
- - "oranth,tx3-mini" (Meson gxl s905w)
-
- - "amlogic,q200" (Meson gxm s912)
- - "amlogic,q201" (Meson gxm s912)
- - "khadas,vim2" (Meson gxm s912)
- - "kingnovel,r-box-pro" (Meson gxm S912)
- - "nexbox,a1" (Meson gxm s912)
- - "tronsmart,vega-s96" (Meson gxm s912)
-
- - "amlogic,s400" (Meson axg a113d)
-
- - "amlogic,u200" (Meson g12a s905d2)
-
-Amlogic Meson Firmware registers Interface
-------------------------------------------
-
-The Meson SoCs have a register bank with status and data shared with the
-secure firmware.
-
-Required properties:
- - compatible: For Meson GX SoCs, must be "amlogic,meson-gx-ao-secure", "syscon"
-
-Properties should indentify components of this register interface :
-
-Meson GX SoC Information
-------------------------
-A firmware register encodes the SoC type, package and revision information on
-the Meson GX SoCs.
-If present, the following property should be added :
-
-Optional properties:
- - amlogic,has-chip-id: If present, the interface gives the current SoC version.
-
-Example
--------
-
-ao-secure@140 {
- compatible = "amlogic,meson-gx-ao-secure", "syscon";
- reg = <0x0 0x140 0x0 0x140>;
- amlogic,has-chip-id;
-};
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
new file mode 100644
index 000000000000..2a096e060ed3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amlogic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic SoC based Platforms
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ - Jerome Brunet <jbrunet@baylibre.com>
+ - Kevin Hilman <khilman@baylibre.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Boards with the Amlogic Meson6 SoC
+ items:
+ - enum:
+ - geniatech,atv1200
+ - const: amlogic,meson6
+
+ - description: Boards with the Amlogic Meson8 SoC
+ items:
+ - enum:
+ - minix,neo-x8
+ - tcu,fernsehfee3
+ - const: amlogic,meson8
+
+ - description: Boards with the Amlogic Meson8m2 SoC
+ items:
+ - enum:
+ - tronsmart,mxiii-plus
+ - const: amlogic,meson8m2
+
+ - description: Boards with the Amlogic Meson8b SoC
+ items:
+ - enum:
+ - endless,ec100
+ - hardkernel,odroid-c1
+ - tronfy,mxq
+ - const: amlogic,meson8b
+
+ - description: Boards with the Amlogic Meson GXBaby SoC
+ items:
+ - enum:
+ - amlogic,p200
+ - amlogic,p201
+ - friendlyarm,nanopi-k2
+ - hardkernel,odroid-c2
+ - nexbox,a95x
+ - videostrong,kii-pro
+ - wetek,hub
+ - wetek,play2
+ - const: amlogic,meson-gxbb
+
+ - description: Tronsmart Vega S95 devices
+ items:
+ - enum:
+ - tronsmart,vega-s95-pro
+ - tronsmart,vega-s95-meta
+ - tronsmart,vega-s95-telos
+ - const: tronsmart,vega-s95
+ - const: amlogic,meson-gxbb
+
+ - description: Boards with the Amlogic Meson GXL S805X SoC
+ items:
+ - enum:
+ - amlogic,p241
+ - libretech,aml-s805x-ac
+ - const: amlogic,s805x
+ - const: amlogic,meson-gxl
+
+ - description: Boards with the Amlogic Meson GXL S805Y SoC
+ items:
+ - enum:
+ - xiaomi,aquaman
+ - const: amlogic,s805y
+ - const: amlogic,meson-gxl
+
+ - description: Boards with the Amlogic Meson GXL S905W SoC
+ items:
+ - enum:
+ - amlogic,p281
+ - oranth,tx3-mini
+ - jethome,jethub-j80
+ - const: amlogic,s905w
+ - const: amlogic,meson-gxl
+
+ - description: Boards with the Amlogic Meson GXL S905X SoC
+ items:
+ - enum:
+ - amlogic,p212
+ - hwacom,amazetv
+ - khadas,vim
+ - libretech,aml-s905x-cc
+ - libretech,aml-s905x-cc-v2
+ - nexbox,a95x
+ - osmc,vero4k
+ - const: amlogic,s905x
+ - const: amlogic,meson-gxl
+
+ - description: Boards with the Amlogic Meson GXL S905D SoC
+ items:
+ - enum:
+ - amlogic,p230
+ - amlogic,p231
+ - libretech,aml-s905d-pc
+ - osmc,vero4k-plus
+ - phicomm,n1
+ - smartlabs,sml5442tw
+ - videostrong,gxl-kii-pro
+ - const: amlogic,s905d
+ - const: amlogic,meson-gxl
+
+ - description: Boards with the Amlogic Meson GXLX S905L SoC
+ items:
+ - enum:
+ - amlogic,p271
+ - const: amlogic,s905l
+ - const: amlogic,meson-gxlx
+
+ - description: Boards with the Amlogic Meson GXM S912 SoC
+ items:
+ - enum:
+ - amlogic,q200
+ - amlogic,q201
+ - azw,gt1-ultimate
+ - khadas,vim2
+ - kingnovel,r-box-pro
+ - libretech,aml-s912-pc
+ - minix,neo-u9h
+ - nexbox,a1
+ - tronsmart,vega-s96
+ - ugoos,am3
+ - videostrong,gxm-kiii-pro
+ - wetek,core2
+ - const: amlogic,s912
+ - const: amlogic,meson-gxm
+
+ - description: Boards with the Amlogic Meson AXG A113D SoC
+ items:
+ - enum:
+ - amlogic,s400
+ - jethome,jethub-j100
+ - jethome,jethub-j110
+ - const: amlogic,a113d
+ - const: amlogic,meson-axg
+
+ - description: Boards with the Amlogic Meson G12A S905D2/X2/Y2 SoC
+ items:
+ - enum:
+ - amediatech,x96-max
+ - amlogic,u200
+ - freebox,fbx8am
+ - radxa,zero
+ - seirobotics,sei510
+ - const: amlogic,g12a
+
+ - description: Boards with the Amlogic Meson G12B A311D SoC
+ items:
+ - enum:
+ - bananapi,bpi-m2s
+ - khadas,vim3
+ - libretech,aml-a311d-cc
+ - radxa,zero2
+ - const: amlogic,a311d
+ - const: amlogic,g12b
+
+ - description: Boards using the BPI-CM4 module with Amlogic Meson G12B A311D SoC
+ items:
+ - enum:
+ - bananapi,bpi-cm4io
+ - mntre,reform2-cm4
+ - const: bananapi,bpi-cm4
+ - const: amlogic,a311d
+ - const: amlogic,g12b
+
+ - description: Boards with the Amlogic Meson G12B S922X SoC
+ items:
+ - enum:
+ - azw,gsking-x
+ - azw,gtking
+ - azw,gtking-pro
+ - bananapi,bpi-m2s
+ - dream,dreambox-one
+ - dream,dreambox-two
+ - hardkernel,odroid-go-ultra
+ - hardkernel,odroid-n2
+ - hardkernel,odroid-n2l
+ - hardkernel,odroid-n2-plus
+ - khadas,vim3
+ - ugoos,am6
+ - const: amlogic,s922x
+ - const: amlogic,g12b
+
+ - description: Boards with the Amlogic Meson SM1 S905X3/D3/Y3 SoC
+ items:
+ - enum:
+ - amediatech,x96-air
+ - amediatech,x96-air-gbit
+ - bananapi,bpi-m2-pro
+ - bananapi,bpi-m5
+ - cyx,a95xf3-air
+ - cyx,a95xf3-air-gbit
+ - hardkernel,odroid-c4
+ - hardkernel,odroid-hc4
+ - haochuangyi,h96-max
+ - khadas,vim3l
+ - libretech,aml-s905d3-cc
+ - seirobotics,sei610
+ - const: amlogic,sm1
+
+ - description: Boards with the Amlogic Meson A1 A113L SoC
+ items:
+ - enum:
+ - amlogic,ad401
+ - amlogic,ad402
+ - const: amlogic,a1
+
+ - description: Boards with the Amlogic A4 A113L2 SoC
+ items:
+ - enum:
+ - amlogic,ba400
+ - const: amlogic,a4
+
+ - description: Boards with the Amlogic A5 A113X2 SoC
+ items:
+ - enum:
+ - amlogic,av400
+ - const: amlogic,a5
+
+ - description: Boards with the Amlogic C3 C302X/C308L SoC
+ items:
+ - enum:
+ - amlogic,aw409
+ - amlogic,aw419
+ - const: amlogic,c3
+
+ - description: Boards with the Amlogic Meson S4 S805X2 SoC
+ items:
+ - enum:
+ - amlogic,aq222
+ - const: amlogic,s4
+
+ - description: Boards with the Amlogic S6 S905X5 SoC
+ items:
+ - enum:
+ - amlogic,bl209
+ - const: amlogic,s6
+
+ - description: Boards with the Amlogic S7 S805X3 SoC
+ items:
+ - enum:
+ - amlogic,bp201
+ - const: amlogic,s7
+
+ - description: Boards with the Amlogic S7D S905X5M SoC
+ items:
+ - enum:
+ - amlogic,bm202
+ - const: amlogic,s7d
+
+ - description: Boards with the Amlogic T7 A311D2 SoC
+ items:
+ - enum:
+ - amlogic,an400
+ - khadas,vim4
+ - const: amlogic,a311d2
+ - const: amlogic,t7
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
new file mode 100644
index 000000000000..b4f6695a6015
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amlogic/amlogic,meson-gx-ao-secure.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson Firmware registers Interface
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+ The Meson SoCs have a register bank with status and data shared with the
+ secure firmware.
+
+# We need a select here so we don't match all nodes with 'syscon'
+select:
+ properties:
+ compatible:
+ contains:
+ const: amlogic,meson-gx-ao-secure
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: amlogic,meson-gx-ao-secure
+ - const: syscon
+ - items:
+ - enum:
+ - amlogic,a4-ao-secure
+ - amlogic,c3-ao-secure
+ - amlogic,s4-ao-secure
+ - amlogic,t7-ao-secure
+ - const: amlogic,meson-gx-ao-secure
+ - const: syscon
+ reg:
+ maxItems: 1
+
+ amlogic,has-chip-id:
+ description: |
+ A firmware register encodes the SoC type, package and revision
+ information on the Meson GX SoCs. If present, the interface gives
+ the current SoC version.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ ao-secure@140 {
+ compatible = "amlogic,meson-gx-ao-secure", "syscon";
+ reg = <0x140 0x140>;
+ amlogic,has-chip-id;
+ };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml
new file mode 100644
index 000000000000..09b27e98d4c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/amlogic/amlogic,meson-mx-secbus2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson8/Meson8b/Meson8m2 SECBUS2 register interface
+
+maintainers:
+ - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+description: |
+ The Meson8/Meson8b/Meson8m2 SoCs have a register bank called SECBUS2 which
+ contains registers for various IP blocks such as pin-controller bits for
+ the BSD_EN and TEST_N GPIOs as well as some AO ARC core control bits.
+ The registers can be accessed directly when not running in "secure mode".
+ When "secure mode" is enabled then these registers have to be accessed
+ through secure monitor calls.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - amlogic,meson8-secbus2
+ - amlogic,meson8b-secbus2
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ secbus2: system-controller@4000 {
+ compatible = "amlogic,meson8-secbus2", "syscon";
+ reg = <0x4000 0x2000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/analog-top.txt b/Documentation/devicetree/bindings/arm/amlogic/analog-top.txt
deleted file mode 100644
index 101dc21014ec..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/analog-top.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Amlogic Meson8 and Meson8b "analog top" registers:
---------------------------------------------------
-
-The analog top registers contain information about the so-called
-"metal revision" (which encodes the "minor version") of the SoC.
-
-Required properties:
-- reg: the register range of the analog top registers
-- compatible: depending on the SoC this should be one of:
- - "amlogic,meson8-analog-top"
- - "amlogic,meson8b-analog-top"
- along with "syscon"
-
-
-Example:
-
- analog_top: analog-top@81a8 {
- compatible = "amlogic,meson8-analog-top", "syscon";
- reg = <0x81a8 0x14>;
- };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/assist.txt b/Documentation/devicetree/bindings/arm/amlogic/assist.txt
deleted file mode 100644
index 7656812b67b9..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/assist.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Amlogic Meson6/Meson8/Meson8b assist registers:
------------------------------------------------
-
-The assist registers contain basic information about the SoC,
-for example the encoded SoC part number.
-
-Required properties:
-- reg: the register range of the assist registers
-- compatible: should be "amlogic,meson-mx-assist" along with "syscon"
-
-
-Example:
-
- assist: assist@7c00 {
- compatible = "amlogic,meson-mx-assist", "syscon";
- reg = <0x7c00 0x200>;
- };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/bootrom.txt b/Documentation/devicetree/bindings/arm/amlogic/bootrom.txt
deleted file mode 100644
index 407e27f230ab..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/bootrom.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Amlogic Meson6/Meson8/Meson8b bootrom:
---------------------------------------
-
-The bootrom register area can be used to access SoC specific
-information, such as the "misc version".
-
-Required properties:
-- reg: the register range of the bootrom registers
-- compatible: should be "amlogic,meson-mx-bootrom" along with "syscon"
-
-
-Example:
-
- bootrom: bootrom@d9040000 {
- compatible = "amlogic,meson-mx-bootrom", "syscon";
- reg = <0xd9040000 0x10000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/pmu.txt b/Documentation/devicetree/bindings/arm/amlogic/pmu.txt
deleted file mode 100644
index 72f8d08198b6..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/pmu.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Amlogic Meson8 and Meson8b power-management-unit:
--------------------------------------------------
-
-The pmu is used to turn off and on different power domains of the SoCs
-This includes the power to the CPU cores.
-
-Required node properties:
-- compatible value : depending on the SoC this should be one of:
- "amlogic,meson8-pmu"
- "amlogic,meson8b-pmu"
-- reg : physical base address and the size of the registers window
-
-Example:
-
- pmu@c81000e4 {
- compatible = "amlogic,meson8b-pmu", "syscon";
- reg = <0xc81000e0 0x18>;
- };
diff --git a/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt b/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt
deleted file mode 100644
index 3473ddaadfac..000000000000
--- a/Documentation/devicetree/bindings/arm/amlogic/smp-sram.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Amlogic Meson8 and Meson8b SRAM for smp bringup:
-------------------------------------------------
-
-Amlogic's SMP-capable SoCs use part of the sram for the bringup of the cores.
-Once the core gets powered up it executes the code that is residing at a
-specific location.
-
-Therefore a reserved section sub-node has to be added to the mmio-sram
-declaration.
-
-Required sub-node properties:
-- compatible : depending on the SoC this should be one of:
- "amlogic,meson8-smp-sram"
- "amlogic,meson8b-smp-sram"
-
-The rest of the properties should follow the generic mmio-sram discription
-found in ../../misc/sram.txt
-
-Example:
-
- sram: sram@d9000000 {
- compatible = "mmio-sram";
- reg = <0xd9000000 0x20000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0xd9000000 0x20000>;
-
- smp-sram@1ff80 {
- compatible = "amlogic,meson8b-smp-sram";
- reg = <0x1ff80 0x8>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/apm/scu.txt b/Documentation/devicetree/bindings/arm/apm/scu.txt
deleted file mode 100644
index b45be06625fd..000000000000
--- a/Documentation/devicetree/bindings/arm/apm/scu.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-APM X-GENE SoC series SCU Registers
-
-This system clock unit contain various register that control block resets,
-clock enable/disables, clock divisors and other deepsleep registers.
-
-Properties:
- - compatible : should contain two values. First value must be:
- - "apm,xgene-scu"
- second value must be always "syscon".
-
- - reg : offset and length of the register set.
-
-Example :
- scu: system-clk-controller@17000000 {
- compatible = "apm,xgene-scu","syscon";
- reg = <0x0 0x17000000 0x0 0x400>;
- };
diff --git a/Documentation/devicetree/bindings/arm/apple.yaml b/Documentation/devicetree/bindings/arm/apple.yaml
new file mode 100644
index 000000000000..5c2629ec3d4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/apple.yaml
@@ -0,0 +1,352 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/apple.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple ARM Machine
+
+maintainers:
+ - Hector Martin <marcan@marcan.st>
+
+description: |
+ ARM platforms using SoCs designed by Apple Inc., branded "Apple Silicon".
+
+ This currently includes devices based on the "A7" SoC:
+
+ - iPhone 5s
+ - iPad Air (1)
+ - iPad mini 2
+ - iPad mini 3
+
+ Devices based on the "A8" SoC:
+
+ - iPhone 6
+ - iPhone 6 Plus
+ - iPad mini 4
+ - iPod touch 6
+ - Apple TV HD
+
+ Device based on the "A8X" SoC:
+
+ - iPad Air 2
+
+ Devices based on the "A9" SoC:
+
+ - iPhone 6s
+ - iPhone 6s Plus
+ - iPhone SE (2016)
+ - iPad 5
+
+ Devices based on the "A9X" SoC:
+
+ - iPad Pro (9.7-inch)
+ - iPad Pro (12.9-inch)
+
+ Devices based on the "A10" SoC:
+
+ - iPhone 7
+ - iPhone 7 Plus
+ - iPod touch 7
+ - iPad 6
+ - iPad 7
+
+ Devices based on the "A10X" SoC:
+
+ - Apple TV 4K (1st generation)
+ - iPad Pro (2nd Generation) (10.5 Inch)
+ - iPad Pro (2nd Generation) (12.9 Inch)
+
+ Devices based on the "T2" SoC:
+
+ - Apple T2 MacBookPro15,2 (j132)
+ - Apple T2 iMacPro1,1 (j137)
+ - Apple T2 MacBookAir8,2 (j140a)
+ - Apple T2 MacBookAir8,1 (j140k)
+ - Apple T2 MacBookPro16,1 (j152f)
+ - Apple T2 MacPro7,1 (j160)
+ - Apple T2 Macmini8,1 (j174)
+ - Apple T2 iMac20,1 (j185)
+ - Apple T2 iMac20,2 (j185f)
+ - Apple T2 MacBookPro15,4 (j213)
+ - Apple T2 MacBookPro16,2 (j214k)
+ - Apple T2 MacBookPro16,4 (j215)
+ - Apple T2 MacBookPro16,3 (j223)
+ - Apple T2 MacBookAir9,1 (j230k)
+ - Apple T2 MacBookPro15,1 (j680)
+ - Apple T2 MacBookPro15,3 (j780)
+
+ Devices based on the "A11" SoC:
+
+ - iPhone 8
+ - iPhone 8 Plus
+ - iPhone X
+
+ Devices based on the "M1" SoC:
+
+ - Mac mini (M1, 2020)
+ - MacBook Pro (13-inch, M1, 2020)
+ - MacBook Air (M1, 2020)
+ - iMac (24-inch, M1, 2021)
+
+ Devices based on the "M2" SoC:
+
+ - MacBook Air (M2, 2022)
+ - MacBook Air (15-inch, M2, 2023)
+ - MacBook Pro (13-inch, M2, 2022)
+ - Mac mini (M2, 2023)
+
+ Devices based on the "M1 Pro", "M1 Max" and "M1 Ultra" SoCs:
+
+ - MacBook Pro (14-inch, M1 Pro, 2021)
+ - MacBook Pro (14-inch, M1 Max, 2021)
+ - MacBook Pro (16-inch, M1 Pro, 2021)
+ - MacBook Pro (16-inch, M1 Max, 2021)
+ - Mac Studio (M1 Max, 2022)
+ - Mac Studio (M1 Ultra, 2022)
+
+ Devices based on the "M2 Pro", "M2 Max" and "M2 Ultra" SoCs:
+
+ - MacBook Pro (14-inch, M2 Pro, 2023)
+ - MacBook Pro (14-inch, M2 Max, 2023)
+ - MacBook Pro (16-inch, M2 Pro, 2023)
+ - MacBook Pro (16-inch, M2 Max, 2023)
+ - Mac mini (M2 Pro, 2023)
+ - Mac Studio (M2 Max, 2023)
+ - Mac Studio (M2 Ultra, 2023)
+ - Mac Pro (M2 Ultra, 2023)
+
+ The compatible property should follow this format:
+
+ compatible = "apple,<targettype>", "apple,<socid>", "apple,arm-platform";
+
+ <targettype> represents the board/device and comes from the `target-type`
+ property of the root node of the Apple Device Tree, lowercased. It can be
+ queried on macOS using the following command:
+
+ $ ioreg -d2 -l | grep target-type
+
+ <socid> is the lowercased SoC ID. Apple uses at least *five* different
+ names for their SoCs:
+
+ - Marketing name ("M1")
+ - Internal name ("H13G")
+ - Codename ("Tonga")
+ - SoC ID ("T8103")
+ - Package/IC part number ("APL1102")
+
+ Devicetrees should use the lowercased SoC ID, to avoid confusion if
+ multiple SoCs share the same marketing name. This can be obtained from
+ the `compatible` property of the arm-io node of the Apple Device Tree,
+ which can be queried as follows on macOS:
+
+ $ ioreg -n arm-io | grep compatible
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - description: Apple A7 SoC based platforms
+ items:
+ - enum:
+ - apple,j71 # iPad Air (Wi-Fi)
+ - apple,j72 # iPad Air (Cellular)
+ - apple,j73 # iPad Air (Cellular, China)
+ - apple,j85 # iPad mini 2 (Wi-Fi)
+ - apple,j85m # iPad mini 3 (Wi-Fi)
+ - apple,j86 # iPad mini 2 (Cellular)
+ - apple,j86m # iPad mini 3 (Cellular)
+ - apple,j87 # iPad mini 2 (Cellular, China)
+ - apple,j87m # iPad mini 3 (Cellular, China)
+ - apple,n51 # iPhone 5s (GSM)
+ - apple,n53 # iPhone 5s (LTE)
+ - const: apple,s5l8960x
+ - const: apple,arm-platform
+
+ - description: Apple A8 SoC based platforms
+ items:
+ - enum:
+ - apple,j42d # Apple TV HD
+ - apple,j96 # iPad mini 4 (Wi-Fi)
+ - apple,j97 # iPad mini 4 (Cellular)
+ - apple,n56 # iPhone 6 Plus
+ - apple,n61 # iPhone 6
+ - apple,n102 # iPod touch 6
+ - const: apple,t7000
+ - const: apple,arm-platform
+
+ - description: Apple A8X SoC based platforms
+ items:
+ - enum:
+ - apple,j81 # iPad Air 2 (Wi-Fi)
+ - apple,j82 # iPad Air 2 (Cellular)
+ - const: apple,t7001
+ - const: apple,arm-platform
+
+ - description: Apple Samsung A9 SoC based platforms
+ items:
+ - enum:
+ - apple,j71s # iPad 5 (Wi-Fi) (S8000)
+ - apple,j72s # iPad 5 (Cellular) (S8000)
+ - apple,n66 # iPhone 6s Plus (S8000)
+ - apple,n69u # iPhone SE (S8000)
+ - apple,n71 # iPhone 6S (S8000)
+ - const: apple,s8000
+ - const: apple,arm-platform
+
+ - description: Apple TSMC A9 SoC based platforms
+ items:
+ - enum:
+ - apple,j71t # iPad 5 (Wi-Fi) (S8003)
+ - apple,j72t # iPad 5 (Cellular) (S8003)
+ - apple,n66m # iPhone 6s Plus (S8003)
+ - apple,n69 # iPhone SE (S8003)
+ - apple,n71m # iPhone 6S (S8003)
+ - const: apple,s8003
+ - const: apple,arm-platform
+
+ - description: Apple A9X SoC based platforms
+ items:
+ - enum:
+ - apple,j127 # iPad Pro (9.7-inch) (Wi-Fi)
+ - apple,j128 # iPad Pro (9.7-inch) (Cellular)
+ - apple,j98a # iPad Pro (12.9-inch) (Wi-Fi)
+ - apple,j99a # iPad Pro (12.9-inch) (Cellular)
+ - const: apple,s8001
+ - const: apple,arm-platform
+
+ - description: Apple A10 SoC based platforms
+ items:
+ - enum:
+ - apple,d10 # iPhone 7 (Qualcomm)
+ - apple,d11 # iPhone 7 (Intel)
+ - apple,d101 # iPhone 7 Plus (Qualcomm)
+ - apple,d111 # iPhone 7 Plus (Intel)
+ - apple,j71b # iPad 6 (Wi-Fi)
+ - apple,j72b # iPad 6 (Cellular)
+ - apple,j171 # iPad 7 (Wi-Fi)
+ - apple,j172 # iPad 7 (Cellular)
+ - apple,n112 # iPod touch 7
+ - const: apple,t8010
+ - const: apple,arm-platform
+
+ - description: Apple A10X SoC based platforms
+ items:
+ - enum:
+ - apple,j105a # Apple TV 4K (1st Generation)
+ - apple,j120 # iPad Pro 2 (12.9-inch) (Wi-Fi)
+ - apple,j121 # iPad Pro 2 (12.9-inch) (Cellular)
+ - apple,j207 # iPad Pro 2 (10.5-inch) (Wi-Fi)
+ - apple,j208 # iPad Pro 2 (10.5-inch) (Cellular)
+ - const: apple,t8011
+ - const: apple,arm-platform
+
+ - description: Apple T2 SoC based platforms
+ items:
+ - enum:
+ - apple,j132 # Apple T2 MacBookPro15,2 (j132)
+ - apple,j137 # Apple T2 iMacPro1,1 (j137)
+ - apple,j140a # Apple T2 MacBookAir8,2 (j140a)
+ - apple,j140k # Apple T2 MacBookAir8,1 (j140k)
+ - apple,j152f # Apple T2 MacBookPro16,1 (j152f)
+ - apple,j160 # Apple T2 MacPro7,1 (j160)
+ - apple,j174 # Apple T2 Macmini8,1 (j174)
+ - apple,j185 # Apple T2 iMac20,1 (j185)
+ - apple,j185f # Apple T2 iMac20,2 (j185f)
+ - apple,j213 # Apple T2 MacBookPro15,4 (j213)
+ - apple,j214k # Apple T2 MacBookPro16,2 (j214k)
+ - apple,j215 # Apple T2 MacBookPro16,4 (j215)
+ - apple,j223 # Apple T2 MacBookPro16,3 (j223)
+ - apple,j230k # Apple T2 MacBookAir9,1 (j230k)
+ - apple,j680 # Apple T2 MacBookPro15,1 (j680)
+ - apple,j780 # Apple T2 MacBookPro15,3 (j780)
+ - const: apple,t8012
+ - const: apple,arm-platform
+
+ - description: Apple A11 SoC based platforms
+ items:
+ - enum:
+ - apple,d20 # iPhone 8 (Global)
+ - apple,d21 # iPhone 8 Plus (Global)
+ - apple,d22 # iPhone X (Global)
+ - apple,d201 # iPhone 8 (GSM)
+ - apple,d211 # iPhone 8 Plus (GSM)
+ - apple,d221 # iPhone X (GSM)
+ - const: apple,t8015
+ - const: apple,arm-platform
+
+ - description: Apple M1 SoC based platforms
+ items:
+ - enum:
+ - apple,j274 # Mac mini (M1, 2020)
+ - apple,j293 # MacBook Pro (13-inch, M1, 2020)
+ - apple,j313 # MacBook Air (M1, 2020)
+ - apple,j456 # iMac (24-inch, 4x USB-C, M1, 2021)
+ - apple,j457 # iMac (24-inch, 2x USB-C, M1, 2021)
+ - const: apple,t8103
+ - const: apple,arm-platform
+
+ - description: Apple M2 SoC based platforms
+ items:
+ - enum:
+ - apple,j413 # MacBook Air (M2, 2022)
+ - apple,j415 # MacBook Air (15-inch, M2, 2023)
+ - apple,j473 # Mac mini (M2, 2023)
+ - apple,j493 # MacBook Pro (13-inch, M2, 2022)
+ - const: apple,t8112
+ - const: apple,arm-platform
+
+ - description: Apple M1 Pro SoC based platforms
+ items:
+ - enum:
+ - apple,j314s # MacBook Pro (14-inch, M1 Pro, 2021)
+ - apple,j316s # MacBook Pro (16-inch, M1 Pro, 2021)
+ - const: apple,t6000
+ - const: apple,arm-platform
+
+ - description: Apple M1 Max SoC based platforms
+ items:
+ - enum:
+ - apple,j314c # MacBook Pro (14-inch, M1 Max, 2021)
+ - apple,j316c # MacBook Pro (16-inch, M1 Max, 2021)
+ - apple,j375c # Mac Studio (M1 Max, 2022)
+ - const: apple,t6001
+ - const: apple,arm-platform
+
+ - description: Apple M1 Ultra SoC based platforms
+ items:
+ - enum:
+ - apple,j375d # Mac Studio (M1 Ultra, 2022)
+ - const: apple,t6002
+ - const: apple,arm-platform
+
+ - description: Apple M2 Pro SoC based platforms
+ items:
+ - enum:
+ - apple,j414s # MacBook Pro (14-inch, M2 Pro, 2023)
+ - apple,j416s # MacBook Pro (16-inch, M2 Pro, 2023)
+ - apple,j474s # Mac mini (M2 Pro, 2023)
+ - const: apple,t6020
+ - const: apple,arm-platform
+
+ - description: Apple M2 Max SoC based platforms
+ items:
+ - enum:
+ - apple,j414c # MacBook Pro (14-inch, M2 Max, 2023)
+ - apple,j416c # MacBook Pro (16-inch, M2 Max, 2023)
+ - apple,j475c # Mac Studio (M2 Max, 2023)
+ - const: apple,t6021
+ - const: apple,arm-platform
+
+ - description: Apple M2 Ultra SoC based platforms
+ items:
+ - enum:
+ - apple,j180d # Mac Pro (M2 Ultra, 2023)
+ - apple,j475d # Mac Studio (M2 Ultra, 2023)
+ - const: apple,t6022
+ - const: apple,arm-platform
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml b/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml
new file mode 100644
index 000000000000..b88f41a225a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/apple/apple,pmgr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple SoC Power Manager (PMGR)
+
+maintainers:
+ - Hector Martin <marcan@marcan.st>
+
+description: |
+ Apple SoCs include PMGR blocks responsible for power management,
+ which can control various clocks, resets, power states, and
+ performance features. This node represents the PMGR as a syscon,
+ with sub-nodes representing individual features.
+
+properties:
+ $nodename:
+ pattern: "^power-management@[0-9a-f]+$"
+
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ # Do not add additional SoC to this list.
+ - apple,s5l8960x-pmgr
+ - apple,t7000-pmgr
+ - apple,s8000-pmgr
+ - apple,t8010-pmgr
+ - apple,t8015-pmgr
+ - apple,t8103-pmgr
+ - apple,t8112-pmgr
+ - apple,t6000-pmgr
+ - const: apple,pmgr
+ - const: syscon
+ - const: simple-mfd
+ - items:
+ - const: apple,t6020-pmgr
+ - const: apple,t8103-pmgr
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+patternProperties:
+ "power-controller@[0-9a-f]+$":
+ description:
+ The individual power management domains within this controller
+ type: object
+ $ref: /schemas/power/apple,pmgr-pwrstate.yaml#
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ power-management@23b700000 {
+ compatible = "apple,t8103-pmgr", "apple,pmgr", "syscon", "simple-mfd";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x2 0x3b700000 0x0 0x14000>;
+
+ ps_sio: power-controller@1c0 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x1c0 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "sio";
+ apple,always-on;
+ };
+
+ ps_uart_p: power-controller@220 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x220 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "uart_p";
+ power-domains = <&ps_sio>;
+ };
+
+ ps_uart0: power-controller@270 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x270 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "uart0";
+ power-domains = <&ps_uart_p>;
+ };
+ };
+
+ power-management@23d280000 {
+ compatible = "apple,t8103-pmgr", "apple,pmgr", "syscon", "simple-mfd";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x2 0x3d280000 0x0 0xc000>;
+
+ ps_aop_filter: power-controller@4000 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x4000 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "aop_filter";
+ };
+
+ ps_aop_base: power-controller@4010 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x4010 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "aop_base";
+ power-domains = <&ps_aop_filter>;
+ };
+
+ ps_aop_shim: power-controller@4038 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x4038 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "aop_shim";
+ power-domains = <&ps_aop_base>;
+ };
+
+ ps_aop_uart0: power-controller@4048 {
+ compatible = "apple,t8103-pmgr-pwrstate", "apple,pmgr-pwrstate";
+ reg = <0x4048 8>;
+ #power-domain-cells = <0>;
+ #reset-cells = <0>;
+ label = "aop_uart0";
+ power-domains = <&ps_aop_shim>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/arm,cci-400.yaml b/Documentation/devicetree/bindings/arm/arm,cci-400.yaml
new file mode 100644
index 000000000000..d28303d909e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,cci-400.yaml
@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,cci-400.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM CCI Cache Coherent Interconnect
+
+maintainers:
+ - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: >
+ ARM multi-cluster systems maintain intra-cluster coherency through a cache
+ coherent interconnect (CCI) that is capable of monitoring bus transactions
+ and manage coherency, TLB invalidations and memory barriers.
+
+ It allows snooping and distributed virtual memory message broadcast across
+ clusters, through memory mapped interface, with a global control register
+ space and multiple sets of interface control registers, one per slave
+ interface.
+
+properties:
+ $nodename:
+ pattern: "^cci(@[0-9a-f]+)?$"
+
+ compatible:
+ enum:
+ - arm,cci-400
+ - arm,cci-500
+ - arm,cci-550
+
+ reg:
+ maxItems: 1
+ description: >
+ Specifies base physical address of CCI control registers common to all
+ interfaces.
+
+ "#address-cells": true
+ "#size-cells": true
+ ranges: true
+
+patternProperties:
+ "^slave-if@[0-9a-f]+$":
+ type: object
+
+ properties:
+ compatible:
+ const: arm,cci-400-ctrl-if
+
+ interface-type:
+ enum:
+ - ace
+ - ace-lite
+
+ reg:
+ maxItems: 1
+
+ required:
+ - compatible
+ - interface-type
+ - reg
+
+ additionalProperties: false
+
+ "^pmu@[0-9a-f]+$":
+ type: object
+
+ properties:
+ compatible:
+ oneOf:
+ - const: arm,cci-400-pmu,r0
+ - const: arm,cci-400-pmu,r1
+ - const: arm,cci-400-pmu
+ deprecated: true
+ description: >
+ Permitted only where OS has secure access to CCI registers
+ - const: arm,cci-500-pmu,r0
+ - const: arm,cci-550-pmu,r0
+
+ interrupts:
+ minItems: 1
+ maxItems: 8
+ description: >
+ List of counter overflow interrupts, one per counter. The interrupts
+ must be specified starting with the cycle counter overflow interrupt,
+ followed by counter0 overflow interrupt, counter1 overflow
+ interrupt,... ,counterN overflow interrupt.
+
+ The CCI PMU has an interrupt signal for each counter. The number of
+ interrupts must be equal to the number of counters.
+
+ reg:
+ maxItems: 1
+
+ required:
+ - compatible
+ - interrupts
+ - reg
+
+ additionalProperties: false
+
+required:
+ - "#address-cells"
+ - "#size-cells"
+ - compatible
+ - ranges
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ / {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress";
+ model = "V2P-CA15_CA7";
+ arm,hbi = <0x249>;
+ interrupt-parent = <&gic>;
+
+ gic: interrupt-controller {
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ };
+
+ /*
+ * This CCI node corresponds to a CCI component whose control
+ * registers sits at address 0x000000002c090000.
+ *
+ * CCI slave interface @0x000000002c091000 is connected to dma
+ * controller dma0.
+ *
+ * CCI slave interface @0x000000002c094000 is connected to CPUs
+ * {CPU0, CPU1};
+ *
+ * CCI slave interface @0x000000002c095000 is connected to CPUs
+ * {CPU2, CPU3};
+ */
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x1>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x100>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x101>;
+ };
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+ cci_control0: slave-if@1000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace-lite";
+ reg = <0x1000 0x1000>;
+ };
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+
+ pmu@9000 {
+ compatible = "arm,cci-400-pmu";
+ reg = <0x9000 0x5000>;
+ interrupts = <0 101 4>,
+ <0 102 4>,
+ <0 103 4>,
+ <0 104 4>,
+ <0 105 4>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml
new file mode 100644
index 000000000000..2bae06eed693
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-catu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Coresight Address Translation Unit (CATU)
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The CoreSight Address Translation Unit (CATU) translates addresses between an
+ AXI master and system memory. The CATU is normally used along with the TMC to
+ implement scattering of virtual trace buffers in physical memory. The CATU
+ translates contiguous Virtual Addresses (VAs) from an AXI master into
+ non-contiguous Physical Addresses (PAs) that are intended for system memory.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-catu
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-catu
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ interrupts:
+ maxItems: 1
+ description: Address translation error interrupt
+
+ power-domains:
+ maxItems: 1
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: AXI Slave connected to another Coresight component
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ catu@207e0000 {
+ compatible = "arm,coresight-catu", "arm,primecell";
+ reg = <0x207e0000 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+ in-ports {
+ port {
+ catu_in_port: endpoint {
+ remote-endpoint = <&etr_out_port>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml
new file mode 100644
index 000000000000..0a6bc03ebe00
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-cpu-debug.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CoreSight CPU Debug Component
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight CPU debug component are compliant with the ARMv8 architecture
+ reference manual (ARM DDI 0487A.k) Chapter 'Part H: External debug'. The
+ external debug module is mainly used for two modes: self-hosted debug and
+ external debug, and it can be accessed from mmio region from Coresight and
+ eventually the debug module connects with CPU for debugging. And the debug
+ module provides sample-based profiling extension, which can be used to sample
+ CPU program counter, secure state and exception level, etc; usually every CPU
+ has one dedicated debug module to be connected.
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-cpu-debug
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-cpu-debug
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ cpu:
+ description:
+ A phandle to the cpu this debug component is bound to.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ power-domains:
+ maxItems: 1
+ description:
+ A phandle to the debug power domain if the debug logic has its own
+ dedicated power domain. CPU idle states may also need to be separately
+ constrained to keep CPU cores powered.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - cpu
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ debug@f6590000 {
+ compatible = "arm,coresight-cpu-debug", "arm,primecell";
+ reg = <0xf6590000 0x1000>;
+ clocks = <&sys_ctrl 1>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu0>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml
new file mode 100644
index 000000000000..2a91670ccb8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml
@@ -0,0 +1,352 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright 2019 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-cti.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Coresight Cross Trigger Interface (CTI) device.
+
+description: |
+ The CoreSight Embedded Cross Trigger (ECT) consists of CTI devices connected
+ to one or more CoreSight components and/or a CPU, with CTIs interconnected in
+ a star topology via the Cross Trigger Matrix (CTM), which is not programmable.
+ The ECT components are not part of the trace generation data path and are thus
+ not part of the CoreSight graph.
+
+ The CTI component properties define the connections between the individual
+ CTI and the components it is directly connected to, consisting of input and
+ output hardware trigger signals. CTIs can have a maximum number of input and
+ output hardware trigger signals (8 each for v1 CTI, 32 each for v2 CTI). The
+ number is defined at design time, the maximum of each defined in the DEVID
+ register.
+
+ CTIs are interconnected in a star topology via the CTM, using a number of
+ programmable channels, usually 4, but again implementation defined and
+ described in the DEVID register. The star topology is not required to be
+ described in the bindings as the actual connections are software
+ programmable.
+
+ In general the connections between CTI and components via the trigger signals
+ are implementation defined, except when the CTI is connected to an ARM v8
+ architecture core and optional ETM.
+
+ In this case the ARM v8 architecture defines the required signal connections
+ between CTI and the CPU core and ETM if present. In the case of a v8
+ architecturally connected CTI an additional compatible string is used to
+ indicate this feature (arm,coresight-cti-v8-arch).
+
+ When CTI trigger connection information is unavailable then a minimal driver
+ binding can be declared with no explicit trigger signals. This will result
+ the driver detecting the maximum available triggers and channels from the
+ DEVID register and make them all available for use as a single default
+ connection. Any user / client application will require additional information
+ on the connections between the CTI and other components for correct operation.
+ This information might be found by enabling the Integration Test registers in
+ the driver (set CONFIG_CORESIGHT_CTI_INTEGRATION_TEST in Kernel
+ configuration). These registers may be used to explore the trigger connections
+ between CTI and other CoreSight components.
+
+ Certain triggers between CoreSight devices and the CTI have specific types
+ and usages. These can be defined along with the signal indexes with the
+ constants defined in <dt-bindings/arm/coresight-cti-dt.h>
+
+ For example a CTI connected to a core will usually have a DBGREQ signal. This
+ is defined in the binding as type PE_EDBGREQ. These types will appear in an
+ optional array alongside the signal indexes. Omitting types will default all
+ signals to GEN_IO.
+
+ Note that some hardware trigger signals can be connected to non-CoreSight
+ components (e.g. UART etc) depending on hardware implementation.
+
+maintainers:
+ - Mike Leach <mike.leach@linaro.org>
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - arm,coresight-cti
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^cti(@[0-9a-f]+)$"
+ compatible:
+ oneOf:
+ - items:
+ - const: arm,coresight-cti
+ - const: arm,primecell
+ - items:
+ - const: arm,coresight-cti-v8-arch
+ - const: arm,coresight-cti
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ cpu:
+ description:
+ Handle to cpu this CTI is associated with.
+
+ power-domains:
+ maxItems: 1
+
+ label:
+ description:
+ Description of a coresight device.
+
+ arm,cti-ctm-id:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Defines the CTM this CTI is connected to, in large systems with multiple
+ separate CTI/CTM nets. Typically multi-socket systems where the CTM is
+ propagated between sockets.
+
+ arm,cs-dev-assoc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ defines a phandle reference to an associated CoreSight trace device.
+ When the associated trace device is enabled, then the respective CTI
+ will be enabled. Use in CTI base node when compatible string
+ arm,coresight-cti-v8-arch used. If the associated device has not been
+ registered then the node name will be stored as the connection name for
+ later resolution. If the associated device is not a CoreSight device or
+ not registered then the node name will remain the connection name and
+ automatic enabling will not occur.
+
+ # size cells and address cells required if trig-conns node present.
+ "#size-cells":
+ const: 0
+
+ "#address-cells":
+ const: 1
+
+patternProperties:
+ '^trig-conns@([0-9]+)$':
+ type: object
+ additionalProperties: false
+
+ description:
+ A trigger connections child node which describes the trigger signals
+ between this CTI and another hardware device. This device may be a CPU,
+ CoreSight device, any other hardware device or simple external IO lines.
+ The connection may have both input and output triggers, or only one or the
+ other.
+
+ properties:
+ reg:
+ maxItems: 1
+
+ cpu:
+ description:
+ Handle to cpu this trigger connection is associated with.
+
+ arm,cs-dev-assoc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ defines a phandle reference to an associated CoreSight trace device.
+ When the associated trace device is enabled, then the respective CTI
+ will be enabled. If the associated device has not been registered
+ then the node name will be stored as the connection name for later
+ resolution. If the associated device is not a CoreSight device or
+ not registered then the node name will remain the connection name
+ and automatic enabling will not occur.
+
+ arm,trig-in-sigs:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 32
+ description:
+ List of CTI trigger in signal numbers in use by a trig-conns node.
+
+ arm,trig-in-types:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 32
+ description:
+ List of constants representing the types for the CTI trigger in
+ signals. Types in this array match to the corresponding signal in the
+ arm,trig-in-sigs array. If the -types array is smaller, or omitted
+ completely, then the types will default to GEN_IO.
+
+ arm,trig-out-sigs:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 32
+ description:
+ List of CTI trigger out signal numbers in use by a trig-conns node.
+
+ arm,trig-out-types:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 32
+ description:
+ List of constants representing the types for the CTI trigger out
+ signals. Types in this array match to the corresponding signal
+ in the arm,trig-out-sigs array. If the "-types" array is smaller,
+ or omitted completely, then the types will default to GEN_IO.
+
+ arm,trig-filters:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 32
+ description:
+ List of CTI trigger out signals that will be blocked from becoming
+ active, unless filtering is disabled on the driver.
+
+ arm,trig-conn-name:
+ $ref: /schemas/types.yaml#/definitions/string
+ description:
+ Defines a connection name that will be displayed, if the cpu or
+ arm,cs-dev-assoc properties are not being used in this connection.
+ Principle use for CTI that are connected to non-CoreSight devices, or
+ external IO.
+
+ anyOf:
+ - required:
+ - arm,trig-in-sigs
+ - required:
+ - arm,trig-out-sigs
+ oneOf:
+ - required:
+ - arm,trig-conn-name
+ - required:
+ - cpu
+ - required:
+ - arm,cs-dev-assoc
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-cti-v8-arch
+
+then:
+ required:
+ - cpu
+
+unevaluatedProperties: false
+
+examples:
+ # minimum CTI definition. DEVID register used to set number of triggers.
+ - |
+ cti@20020000 {
+ compatible = "arm,coresight-cti", "arm,primecell";
+ reg = <0x20020000 0x1000>;
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ };
+ # v8 architecturally defined CTI - CPU + ETM connections generated by the
+ # driver according to the v8 architecture specification.
+ - |
+ cti@859000 {
+ compatible = "arm,coresight-cti-v8-arch", "arm,coresight-cti",
+ "arm,primecell";
+ reg = <0x859000 0x1000>;
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+
+ cpu = <&CPU1>;
+ arm,cs-dev-assoc = <&etm1>;
+ };
+ # Implementation defined CTI - CPU + ETM connections explicitly defined..
+ # Shows use of type constants from dt-bindings/arm/coresight-cti-dt.h
+ # #size-cells and #address-cells are required if trig-conns@ nodes present.
+ - |
+ #include <dt-bindings/arm/coresight-cti-dt.h>
+
+ cti@858000 {
+ compatible = "arm,coresight-cti", "arm,primecell";
+ reg = <0x858000 0x1000>;
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+
+ arm,cti-ctm-id = <1>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ trig-conns@0 {
+ reg = <0>;
+ arm,trig-in-sigs = <4 5 6 7>;
+ arm,trig-in-types = <ETM_EXTOUT
+ ETM_EXTOUT
+ ETM_EXTOUT
+ ETM_EXTOUT>;
+ arm,trig-out-sigs = <4 5 6 7>;
+ arm,trig-out-types = <ETM_EXTIN
+ ETM_EXTIN
+ ETM_EXTIN
+ ETM_EXTIN>;
+ arm,cs-dev-assoc = <&etm0>;
+ };
+
+ trig-conns@1 {
+ reg = <1>;
+ cpu = <&CPU0>;
+ arm,trig-in-sigs = <0 1>;
+ arm,trig-in-types = <PE_DBGTRIGGER
+ PE_PMUIRQ>;
+ arm,trig-out-sigs = <0 1 2 >;
+ arm,trig-out-types = <PE_EDBGREQ
+ PE_DBGRESTART
+ PE_CTIIRQ>;
+
+ arm,trig-filters = <0>;
+ };
+ };
+ # Implementation defined CTI - non CoreSight component connections.
+ - |
+ cti@20110000 {
+ compatible = "arm,coresight-cti", "arm,primecell";
+ reg = <0x20110000 0x1000>;
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ trig-conns@0 {
+ reg = <0>;
+ arm,trig-in-sigs = <0>;
+ arm,trig-in-types = <GEN_INTREQ>;
+ arm,trig-out-sigs = <0>;
+ arm,trig-out-types = <GEN_HALTREQ>;
+ arm,trig-conn-name = "sys_profiler";
+ };
+
+ trig-conns@1 {
+ reg = <1>;
+ arm,trig-out-sigs = <2 3>;
+ arm,trig-out-types = <GEN_HALTREQ GEN_RESTARTREQ>;
+ arm,trig-conn-name = "watchdog";
+ };
+
+ trig-conns@2 {
+ reg = <2>;
+ arm,trig-in-sigs = <1 6>;
+ arm,trig-in-types = <GEN_HALTREQ GEN_RESTARTREQ>;
+ arm,trig-conn-name = "g_counter";
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml
new file mode 100644
index 000000000000..ed091dc0c10a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-dummy-sink.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Coresight Dummy sink component
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight dummy sink component is for the specific coresight sink devices
+ kernel don't have permission to access or configure, e.g., CoreSight EUD on
+ Qualcomm platforms. It is a mini-USB hub implemented to support the USB-based
+ debug and trace capabilities. For this device, a dummy driver is needed to
+ register it as Coresight sink device in kernel side, so that path can be
+ created in the driver. Then the trace flow would be transferred to EUD via
+ coresight link of AP processor. It provides Coresight API for operations on
+ dummy source devices, such as enabling and disabling them. It also provides
+ the Coresight dummy source paths for debugging.
+
+ The primary use case of the coresight dummy sink is to build path in kernel
+ side for dummy sink component.
+
+maintainers:
+ - Mike Leach <mike.leach@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+ - James Clark <james.clark@linaro.org>
+ - Mao Jinlong <quic_jinlmao@quicinc.com>
+ - Hao Zhang <quic_hazha@quicinc.com>
+
+properties:
+ compatible:
+ enum:
+ - arm,coresight-dummy-sink
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port:
+ description: Input connection from the Coresight Trace bus to
+ dummy sink, such as Embedded USB debugger(EUD).
+
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - in-ports
+
+additionalProperties: false
+
+examples:
+ # Minimum dummy sink definition. Dummy sink connect to coresight replicator.
+ - |
+ sink {
+ compatible = "arm,coresight-dummy-sink";
+
+ in-ports {
+ port {
+ eud_in_replicator_swao: endpoint {
+ remote-endpoint = <&replicator_swao_out_eud>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml
new file mode 100644
index 000000000000..78337be42b55
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-dummy-source.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Coresight Dummy source component
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight dummy source component is for the specific coresight source
+ devices kernel don't have permission to access or configure. For some SOCs,
+ there would be Coresight source trace components on sub-processor which
+ are connected to AP processor via debug bus. For these devices, a dummy driver
+ is needed to register them as Coresight source devices, so that paths can be
+ created in the driver. It provides Coresight API for operations on dummy
+ source devices, such as enabling and disabling them. It also provides the
+ Coresight dummy source paths for debugging.
+
+ The primary use case of the coresight dummy source is to build path in kernel
+ side for dummy source component.
+
+maintainers:
+ - Mike Leach <mike.leach@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+ - James Clark <james.clark@linaro.org>
+ - Mao Jinlong <quic_jinlmao@quicinc.com>
+ - Hao Zhang <quic_hazha@quicinc.com>
+
+properties:
+ compatible:
+ enum:
+ - arm,coresight-dummy-source
+
+ label:
+ description:
+ Description of a coresight device.
+
+ arm,static-trace-id:
+ description: If dummy source needs static id support, use this to set trace id.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 111
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port:
+ description: Output connection from the source to Coresight
+ Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ # Minimum dummy source definition. Dummy source connect to coresight funnel.
+ - |
+ source {
+ compatible = "arm,coresight-dummy-source";
+
+ out-ports {
+ port {
+ dummy_riscv_out_funnel_swao: endpoint {
+ remote-endpoint = <&funnel_swao_in_dummy_riscv>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml
new file mode 100644
index 000000000000..b74db15e5f8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-dynamic-funnel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Programmable Trace Bus Funnel
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight funnel merges 2-8 trace sources into a single trace
+ stream with programmable enable and priority of input ports.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-dynamic-funnel
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-dynamic-funnel
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ power-domains:
+ maxItems: 1
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port(@[0-7])?$':
+ description: Input connections from CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Output connection to CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+ - out-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ funnel@20040000 {
+ compatible = "arm,coresight-dynamic-funnel", "arm,primecell";
+ reg = <0x20040000 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ out-ports {
+ port {
+ funnel_out_port0: endpoint {
+ remote-endpoint = <&replicator_in_port0>;
+ };
+ };
+ };
+
+ in-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ funnel_in_port0: endpoint {
+ remote-endpoint = <&ptm0_out_port>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ funnel_in_port1: endpoint {
+ remote-endpoint = <&ptm1_out_port>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ funnel_in_port2: endpoint {
+ remote-endpoint = <&etm0_out_port>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml
new file mode 100644
index 000000000000..17ea936b796f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-dynamic-replicator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Coresight Programmable Trace Bus Replicator
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight replicator splits a single trace stream into two trace streams
+ for systems that have more than one trace sink component.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-dynamic-replicator
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-dynamic-replicator
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ label:
+ description:
+ Description of a coresight device.
+
+ power-domains:
+ maxItems: 1
+
+ qcom,replicator-loses-context:
+ type: boolean
+ description:
+ Indicates that the replicator will lose register context when AMBA clock
+ is removed which is observed in some replicator designs.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Input connection from CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port(@[01])?$':
+ description: Output connections to CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+ - out-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ replicator@20120000 {
+ compatible = "arm,coresight-dynamic-replicator", "arm,primecell";
+ reg = <0x20120000 0x1000>;
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+
+ out-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* replicator output ports */
+ port@0 {
+ reg = <0>;
+ replicator_out_port0: endpoint {
+ remote-endpoint = <&tpiu_in_port>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ replicator_out_port1: endpoint {
+ remote-endpoint = <&etr_in_port>;
+ };
+ };
+ };
+ in-ports {
+ port {
+ replicator_in_port0: endpoint {
+ remote-endpoint = <&csys2_funnel_out_port>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml
new file mode 100644
index 000000000000..892df7aca1ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-etb10.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Embedded Trace Buffer
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The CoreSight Embedded Trace Buffer stores traces in a dedicated SRAM that is
+ used as a circular buffer.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-etb10
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-etb10
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ label:
+ description:
+ Description of a coresight device.
+
+ power-domains:
+ maxItems: 1
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Input connection from CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ etb@20010000 {
+ compatible = "arm,coresight-etb10", "arm,primecell";
+ reg = <0x20010000 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ in-ports {
+ port {
+ etb_in_port: endpoint {
+ remote-endpoint = <&replicator_out_port0>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml
new file mode 100644
index 000000000000..71f2e1ed27e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-etm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Embedded Trace MacroCell
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Embedded Trace Macrocell (ETM) is a real-time trace module providing
+ instruction and data tracing of a processor.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - arm,coresight-etm3x
+ - arm,coresight-etm4x
+ - arm,coresight-etm4x-sysreg
+ required:
+ - compatible
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-etm4x-sysreg
+ then:
+ $ref: /schemas/arm/primecell.yaml#
+ required:
+ - reg
+
+properties:
+ compatible:
+ oneOf:
+ - description:
+ Embedded Trace Macrocell with memory mapped access.
+ items:
+ - enum:
+ - arm,coresight-etm3x
+ - arm,coresight-etm4x
+ - const: arm,primecell
+ - description:
+ Embedded Trace Macrocell (version 4.x), with system register access only
+ const: arm,coresight-etm4x-sysreg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ power-domains:
+ maxItems: 1
+
+ arm,coresight-loses-context-with-cpu:
+ type: boolean
+ description:
+ Indicates that the hardware will lose register context on CPU power down
+ (e.g. CPUIdle). An example of where this may be needed are systems which
+ contain a coresight component and CPU in the same power domain. When the
+ CPU powers down the coresight component also powers down and loses its
+ context.
+
+ label:
+ description:
+ Description of a coresight device.
+
+ arm,cp14:
+ type: boolean
+ description:
+ Must be present if the system accesses ETM/PTM management registers via
+ co-processor 14.
+
+ qcom,skip-power-up:
+ type: boolean
+ description:
+ Indicates that an implementation can skip powering up the trace unit.
+ TRCPDCR.PU does not have to be set on Qualcomm Technologies Inc. systems
+ since ETMs are in the same power domain as their CPU cores. This property
+ is required to identify such systems with hardware errata where the CPU
+ watchdog counter is stopped when TRCPDCR.PU is set.
+
+ cpu:
+ description:
+ phandle to the cpu this ETM is bound to.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Output connection from the ETM to CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - cpu
+ - out-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ ptm@2201c000 {
+ compatible = "arm,coresight-etm3x", "arm,primecell";
+ reg = <0x2201c000 0x1000>;
+
+ cpu = <&cpu0>;
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ out-ports {
+ port {
+ ptm0_out_port: endpoint {
+ remote-endpoint = <&funnel_in_port0>;
+ };
+ };
+ };
+ };
+
+ ptm@2201d000 {
+ compatible = "arm,coresight-etm3x", "arm,primecell";
+ reg = <0x2201d000 0x1000>;
+
+ cpu = <&cpu1>;
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ out-ports {
+ port {
+ ptm1_out_port: endpoint {
+ remote-endpoint = <&funnel_in_port1>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml
new file mode 100644
index 000000000000..9598a3d0a95b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-static-funnel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Static Trace Bus Funnel
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight static funnel merges 2-8 trace sources into a single trace
+ stream.
+
+properties:
+ compatible:
+ const: arm,coresight-static-funnel
+
+ power-domains:
+ maxItems: 1
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port@[0-7]$':
+ description: Input connections from CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Output connection to CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - in-ports
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ - |
+ funnel {
+ /*
+ * non-configurable replicators don't show up on the
+ * AMBA bus. As such no need to add "arm,primecell".
+ */
+ compatible = "arm,coresight-static-funnel";
+
+ out-ports {
+ port {
+ combo_funnel_out: endpoint {
+ remote-endpoint = <&top_funnel_in>;
+ };
+ };
+ };
+
+ in-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ combo_funnel_in0: endpoint {
+ remote-endpoint = <&cluster0_etf_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ combo_funnel_in1: endpoint {
+ remote-endpoint = <&cluster1_etf_out>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml
new file mode 100644
index 000000000000..b81851b26c74
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-static-replicator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Static Trace Bus Replicator
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The Coresight replicator splits a single trace stream into two trace streams
+ for systems that have more than one trace sink component.
+
+properties:
+ compatible:
+ const: arm,coresight-static-replicator
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ oneOf:
+ - items:
+ - enum: [apb_pclk, atclk]
+ - items: # Zynq-700
+ - const: apb_pclk
+ - const: dbg_trc
+ - const: dbg_apb
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Input connection from CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port@[01]$':
+ description: Output connections to CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ filter-source:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the coresight trace source device matching the
+ hard coded filtering for this port
+
+ remote-endpoint: true
+
+required:
+ - compatible
+ - in-ports
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ - |
+ replicator {
+ /*
+ * non-configurable replicators don't show up on the
+ * AMBA bus. As such no need to add "arm,primecell".
+ */
+ compatible = "arm,coresight-static-replicator";
+
+ out-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* replicator output ports */
+ port@0 {
+ reg = <0>;
+ replicator_out_port0: endpoint {
+ remote-endpoint = <&etb_in_port>;
+ filter-source = <&tpdm_video>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ replicator_out_port1: endpoint {
+ remote-endpoint = <&tpiu_in_port>;
+ filter-source = <&tpdm_mdss>;
+ };
+ };
+ };
+
+ in-ports {
+ port {
+ replicator_in_port0: endpoint {
+ remote-endpoint = <&funnel_out_port0>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml
new file mode 100644
index 000000000000..378380c3f5aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-stm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight System Trace MacroCell
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The STM is a trace source that is integrated into a CoreSight system, designed
+ primarily for high-bandwidth trace of instrumentation embedded into software.
+ This instrumentation is made up of memory-mapped writes to the STM Advanced
+ eXtensible Interface (AXI) slave, which carry information about the behavior
+ of the software.
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-stm
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-stm
+ - const: arm,primecell
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: stm-base
+ - const: stm-stimulus-base
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ power-domains:
+ maxItems: 1
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Output connection to the CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - clock-names
+ - out-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ stm@20100000 {
+ compatible = "arm,coresight-stm", "arm,primecell";
+ reg = <0x20100000 0x1000>,
+ <0x28000000 0x180000>;
+ reg-names = "stm-base", "stm-stimulus-base";
+
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ out-ports {
+ port {
+ stm_out_port: endpoint {
+ remote-endpoint = <&main_funnel_in_port2>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml
new file mode 100644
index 000000000000..96dd5b5f771a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-tmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Trace Memory Controller
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ Trace Memory Controller is used for Embedded Trace Buffer(ETB), Embedded Trace
+ FIFO(ETF) and Embedded Trace Router(ETR) configurations. The configuration
+ mode (ETB, ETF, ETR) is discovered at boot time when the device is probed.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-tmc
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-tmc
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ label:
+ description:
+ Description of a coresight device.
+
+ iommus:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ arm,buffer-size:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ deprecated: true
+ description:
+ Size of contiguous buffer space for TMC ETR (embedded trace router). The
+ buffer size can be configured dynamically via buffer_size property in
+ sysfs instead.
+
+ arm,scatter-gather:
+ type: boolean
+ description:
+ Indicates that the TMC-ETR can safely use the SG mode on this system.
+
+ arm,max-burst-size:
+ description:
+ The maximum burst size initiated by TMC on the AXI master interface. The
+ burst size can be in the range [0..15], the setting supports one data
+ transfer per burst up to a maximum of 16 data transfers per burst.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 15
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Input connection from the CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: AXI or ATB Master output connection. Used for ETR
+ and ETF configurations.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ memory-region:
+ items:
+ - description: Reserved trace buffer memory for ETR and ETF sinks.
+ For ETR, this reserved memory region is used for trace data capture.
+ Same region is used for trace data retention as well after a panic
+ or watchdog reset.
+ This reserved memory region is used as trace buffer or used for trace
+ data retention only if specifically selected by the user in sysfs
+ interface.
+ The default memory usage models for ETR in sysfs/perf modes are
+ otherwise unaltered.
+
+ For ETF, this reserved memory region is used by default for
+ retention of trace data synced from internal SRAM after a panic
+ or watchdog reset.
+ - description: Reserved meta data memory. Used for ETR and ETF sinks
+ for storing metadata.
+
+ memory-region-names:
+ items:
+ - const: tracedata
+ - const: metadata
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ etr@20070000 {
+ compatible = "arm,coresight-tmc", "arm,primecell";
+ reg = <0x20070000 0x1000>;
+ memory-region = <&etr_trace_mem_reserved>,
+ <&etr_mdata_mem_reserved>;
+ memory-region-names = "tracedata", "metadata";
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ in-ports {
+ port {
+ etr_in_port: endpoint {
+ remote-endpoint = <&replicator2_out_port0>;
+ };
+ };
+ };
+
+ out-ports {
+ port {
+ etr_out_port: endpoint {
+ remote-endpoint = <&catu_in_port>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml
new file mode 100644
index 000000000000..a207f6899e67
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-tpiu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CoreSight Trace Port Interface Unit
+
+maintainers:
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+ - Mike Leach <mike.leach@linaro.org>
+ - Leo Yan <leo.yan@linaro.org>
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+
+description: |
+ CoreSight components are compliant with the ARM CoreSight architecture
+ specification and can be connected in various topologies to suit a particular
+ SoCs tracing needs. These trace components can generally be classified as
+ sinks, links and sources. Trace data produced by one or more sources flows
+ through the intermediate links connecting the source to the currently selected
+ sink.
+
+ The CoreSight Trace Port Interface Unit captures trace data from the trace bus
+ and outputs it to an external trace port.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,coresight-tpiu
+ required:
+ - compatible
+
+allOf:
+ - $ref: /schemas/arm/primecell.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: arm,coresight-tpiu
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: apb_pclk
+ - const: atclk
+
+ label:
+ description:
+ Description of a coresight device.
+
+ power-domains:
+ maxItems: 1
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Input connection from the CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ tpiu@e3c05000 {
+ compatible = "arm,coresight-tpiu", "arm,primecell";
+ reg = <0xe3c05000 0x1000>;
+
+ clocks = <&clk_375m>;
+ clock-names = "apb_pclk";
+ in-ports {
+ port {
+ tpiu_in_port: endpoint {
+ remote-endpoint = <&funnel4_out_port0>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,corstone1000.yaml b/Documentation/devicetree/bindings/arm/arm,corstone1000.yaml
new file mode 100644
index 000000000000..cff1cdaadb13
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,corstone1000.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,corstone1000.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Corstone1000
+
+maintainers:
+ - Abdellatif El Khlifi <abdellatif.elkhlifi@arm.com>
+ - Hugues Kamba Mpiana <hugues.kambampiana@arm.com>
+
+description: |+
+ ARM's Corstone1000 includes pre-verified Corstone SSE-710 subsystem that
+ provides a flexible compute architecture that combines Cortex‑A and Cortex‑M
+ processors.
+
+ Support for Cortex‑A32, Cortex‑A35 and Cortex‑A53 processors. Two expansion
+ systems for M-Class (or other) processors for adding sensors, connectivity,
+ video, audio and machine learning at the edge System and security IPs to build
+ a secure SoC for a range of rich IoT applications, for example gateways, smart
+ cameras and embedded systems.
+
+ Integrated Secure Enclave providing hardware Root of Trust and supporting
+ seamless integration of the optional CryptoCell™-312 cryptographic
+ accelerator.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Corstone1000 MPS3 it has 1 Cortex-A35 CPU core in a FPGA
+ implementation of the Corstone1000 in the MPS3 prototyping board. See
+ ARM document DAI0550.
+ items:
+ - const: arm,corstone1000-mps3
+ - description: Corstone1000 FVP is the Fixed Virtual Platform
+ implementation of this system. See ARM ecosystems FVP's.
+ items:
+ - const: arm,corstone1000-fvp
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml b/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml
new file mode 100644
index 000000000000..9c2c9ac9705a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,embedded-trace-extension.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+ - Suzuki K Poulose <suzuki.poulose@arm.com>
+ - Mathieu Poirier <mathieu.poirier@linaro.org>
+
+description: |
+ Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+ allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+ architecture and has extended support for future architecture changes.
+ The trace generated by the ETE could be stored via legacy CoreSight
+ components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+ Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+ legacy CoreSight components, a node must be listed per instance, along
+ with any optional connection graph as per the coresight bindings.
+
+properties:
+ $nodename:
+ pattern: "^ete(-[0-9]+)?$"
+ compatible:
+ items:
+ - const: arm,embedded-trace-extension
+
+ cpu:
+ description: |
+ Handle to the cpu this ETE is bound to.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ power-domains:
+ maxItems: 1
+
+ out-ports:
+ description: |
+ Output connections from the ETE to legacy CoreSight trace bus.
+ $ref: /schemas/graph.yaml#/properties/ports
+ properties:
+ port:
+ description: Output connection from the ETE to legacy CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - cpu
+
+additionalProperties: false
+
+examples:
+
+# An ETE node without legacy CoreSight connections
+ - |
+ ete-0 {
+ compatible = "arm,embedded-trace-extension";
+ cpu = <&cpu_0>;
+ };
+# An ETE node with legacy CoreSight connections
+ - |
+ ete-1 {
+ compatible = "arm,embedded-trace-extension";
+ cpu = <&cpu_1>;
+
+ out-ports { /* legacy coresight connection */
+ port {
+ ete1_out_port: endpoint {
+ remote-endpoint = <&funnel_in_port0>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,integrator.yaml b/Documentation/devicetree/bindings/arm/arm,integrator.yaml
new file mode 100644
index 000000000000..1bdbd1b7ee38
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,integrator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Integrator Boards
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |+
+ These were the first ARM platforms officially supported by ARM Ltd.
+ They are ARMv4, ARMv5 and ARMv6-capable using different core tiles,
+ so the system is modular and can host a variety of CPU tiles called
+ "core tiles" and referred to in the device tree as "core modules".
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: ARM Integrator Application Platform, this board has a PCI
+ host and several PCI slots, as well as a number of slots for logical
+ expansion modules, it is referred to as an "ASIC Development
+ Motherboard" and is extended with custom FPGA and is intended for
+ rapid prototyping. See ARM DUI 0098B. This board can physically come
+ pre-packaged in a PC Tower form factor called Integrator/PP1 or a
+ special metal fixture called Integrator/PP2, see ARM DUI 0169A.
+ items:
+ - const: arm,integrator-ap
+ - description: ARM Integrator Compact Platform (HBI-0086), this board has
+ a compact form factor and mainly consists of the bare minimum
+ peripherals to make use of the core module. See ARM DUI 0159B.
+ items:
+ - const: arm,integrator-cp
+ - description: ARM Integrator Standard Development Board (SDB) Platform,
+ this board is a PCI-based board conforming to the Microsoft SDB
+ (HARP) specification. See ARM DUI 0099A.
+ items:
+ - const: arm,integrator-sp
+
+required:
+ - compatible
+ - core-module@10000000
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,juno-fpga-apb-regs.yaml b/Documentation/devicetree/bindings/arm/arm,juno-fpga-apb-regs.yaml
new file mode 100644
index 000000000000..ce5f2e1ec1ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,juno-fpga-apb-regs.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,juno-fpga-apb-regs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Juno FPGA APB Registers
+
+maintainers:
+ - Sudeep Holla <sudeep.holla@arm.com>
+
+properties:
+ compatible:
+ items:
+ - const: arm,juno-fpga-apb-regs
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ ranges: true
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+patternProperties:
+ "^led@[0-9a-f]+,[0-9a-f]$":
+ $ref: /schemas/leds/register-bit-led.yaml#
+
+required:
+ - compatible
+ - reg
+ - ranges
+ - "#address-cells"
+ - "#size-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ syscon@10000 {
+ compatible = "arm,juno-fpga-apb-regs", "syscon", "simple-mfd";
+ reg = <0x010000 0x1000>;
+ ranges = <0x0 0x10000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ led@8,0 {
+ compatible = "register-bit-led";
+ reg = <0x08 0x04>;
+ offset = <0x08>;
+ mask = <0x01>;
+ label = "vexpress:0";
+ linux,default-trigger = "heartbeat";
+ default-state = "on";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/arm,morello.yaml b/Documentation/devicetree/bindings/arm/arm,morello.yaml
new file mode 100644
index 000000000000..e843b97fa485
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,morello.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,morello.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Morello Platforms
+
+maintainers:
+ - Vincenzo Frascino <vincenzo.frascino@arm.com>
+
+description: |+
+ The Morello architecture is an experimental extension to Armv8.2-A,
+ which extends the AArch64 state with the principles proposed in
+ version 7 of the Capability Hardware Enhanced RISC Instructions
+ (CHERI) ISA.
+
+ ARM's Morello Platforms are built as a research project to explore
+ capability architectures based on arm.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Arm Morello System Platforms
+ items:
+ - enum:
+ - arm,morello-sdp
+ - arm,morello-fvp
+ - const: arm,morello
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml
new file mode 100644
index 000000000000..3c5f1688dbd7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,realview.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM RealView Boards
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |+
+ The ARM RealView series of reference designs were built to explore the Arm11,
+ Cortex-A8, and Cortex-A9 CPUs. This included new features compared to the
+ earlier CPUs such as TrustZone and multicore (MPCore).
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: ARM RealView Emulation Baseboard (HBI-0140) was created
+ as a generic platform to test different FPGA designs, and has
+ pluggable CPU modules, see ARM DUI 0303E.
+ items:
+ - const: arm,realview-eb
+ - description: ARM RealView Platform Baseboard for ARM1176JZF-S
+ (HBI-0147) was created as a development board to test ARM TrustZone,
+ CoreSight and Intelligent Energy Management (IEM) see ARM DUI 0425F.
+ items:
+ - const: arm,realview-pb1176
+ - description: ARM RealView Platform Baseboard for ARM 11 MPCore
+ (HBI-0159, HBI-0175 and HBI-0176) was created to showcase
+ multiprocessing with ARM11 using MPCore using symmetric
+ multiprocessing (SMP). See ARM DUI 0351E.
+ items:
+ - const: arm,realview-pb11mp
+ - description: ARM RealView Platform Baseboard for Cortex-A8 (HBI-0178,
+ HBI-0176 and HBI-0175) was the first reference platform for the
+ Cortex CPU family, including a Cortex-A8 test chip.
+ items:
+ - const: arm,realview-pba8
+ - description: ARM RealView Platform Baseboard Explore for Cortex-A9
+ (HBI-0182 and HBI-0183) was the reference platform for the Cortex-A9
+ CPU.
+ items:
+ - const: arm,realview-pbx
+
+ soc:
+ description: All RealView boards must provide a soc node in the root of the
+ device tree, representing the System-on-Chip since these test chips are
+ rather complex.
+ type: object
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: arm,realview-eb-soc
+ - const: simple-bus
+ - items:
+ - const: arm,realview-pb1176-soc
+ - const: simple-bus
+ - items:
+ - const: arm,realview-pb11mp-soc
+ - const: simple-bus
+ - items:
+ - const: arm,realview-pba8-soc
+ - const: simple-bus
+ - items:
+ - const: arm,realview-pbx-soc
+ - const: simple-bus
+
+ patternProperties:
+ "^.*syscon@[0-9a-f]+$":
+ type: object
+ description: All RealView boards must provide a syscon system controller
+ node inside the soc node.
+
+ required:
+ - compatible
+
+required:
+ - compatible
+ - soc
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,scmi.txt b/Documentation/devicetree/bindings/arm/arm,scmi.txt
deleted file mode 100644
index 5f3719ab7075..000000000000
--- a/Documentation/devicetree/bindings/arm/arm,scmi.txt
+++ /dev/null
@@ -1,179 +0,0 @@
-System Control and Management Interface (SCMI) Message Protocol
-----------------------------------------------------------
-
-The SCMI is intended to allow agents such as OSPM to manage various functions
-that are provided by the hardware platform it is running on, including power
-and performance functions.
-
-This binding is intended to define the interface the firmware implementing
-the SCMI as described in ARM document number ARM DUI 0922B ("ARM System Control
-and Management Interface Platform Design Document")[0] provide for OSPM in
-the device tree.
-
-Required properties:
-
-The scmi node with the following properties shall be under the /firmware/ node.
-
-- compatible : shall be "arm,scmi"
-- mboxes: List of phandle and mailbox channel specifiers. It should contain
- exactly one or two mailboxes, one for transmitting messages("tx")
- and another optional for receiving the notifications("rx") if
- supported.
-- shmem : List of phandle pointing to the shared memory(SHM) area as per
- generic mailbox client binding.
-- #address-cells : should be '1' if the device has sub-nodes, maps to
- protocol identifier for a given sub-node.
-- #size-cells : should be '0' as 'reg' property doesn't have any size
- associated with it.
-
-Optional properties:
-
-- mbox-names: shall be "tx" or "rx" depending on mboxes entries.
-
-See Documentation/devicetree/bindings/mailbox/mailbox.txt for more details
-about the generic mailbox controller and client driver bindings.
-
-The mailbox is the only permitted method of calling the SCMI firmware.
-Mailbox doorbell is used as a mechanism to alert the presence of a
-messages and/or notification.
-
-Each protocol supported shall have a sub-node with corresponding compatible
-as described in the following sections. If the platform supports dedicated
-communication channel for a particular protocol, the 3 properties namely:
-mboxes, mbox-names and shmem shall be present in the sub-node corresponding
-to that protocol.
-
-Clock/Performance bindings for the clocks/OPPs based on SCMI Message Protocol
-------------------------------------------------------------
-
-This binding uses the common clock binding[1].
-
-Required properties:
-- #clock-cells : Should be 1. Contains the Clock ID value used by SCMI commands.
-
-Power domain bindings for the power domains based on SCMI Message Protocol
-------------------------------------------------------------
-
-This binding for the SCMI power domain providers uses the generic power
-domain binding[2].
-
-Required properties:
- - #power-domain-cells : Should be 1. Contains the device or the power
- domain ID value used by SCMI commands.
-
-Sensor bindings for the sensors based on SCMI Message Protocol
---------------------------------------------------------------
-SCMI provides an API to access the various sensors on the SoC.
-
-Required properties:
-- #thermal-sensor-cells: should be set to 1. This property follows the
- thermal device tree bindings[3].
-
- Valid cell values are raw identifiers (Sensor ID)
- as used by the firmware. Refer to platform details
- for your implementation for the IDs to use.
-
-SRAM and Shared Memory for SCMI
--------------------------------
-
-A small area of SRAM is reserved for SCMI communication between application
-processors and SCP.
-
-The properties should follow the generic mmio-sram description found in [4]
-
-Each sub-node represents the reserved area for SCMI.
-
-Required sub-node properties:
-- reg : The base offset and size of the reserved area with the SRAM
-- compatible : should be "arm,scmi-shmem" for Non-secure SRAM based
- shared memory
-
-[0] http://infocenter.arm.com/help/topic/com.arm.doc.den0056a/index.html
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/power/power_domain.txt
-[3] Documentation/devicetree/bindings/thermal/thermal.txt
-[4] Documentation/devicetree/bindings/sram/sram.txt
-
-Example:
-
-sram@50000000 {
- compatible = "mmio-sram";
- reg = <0x0 0x50000000 0x0 0x10000>;
-
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x0 0x50000000 0x10000>;
-
- cpu_scp_lpri: scp-shmem@0 {
- compatible = "arm,scmi-shmem";
- reg = <0x0 0x200>;
- };
-
- cpu_scp_hpri: scp-shmem@200 {
- compatible = "arm,scmi-shmem";
- reg = <0x200 0x200>;
- };
-};
-
-mailbox@40000000 {
- ....
- #mbox-cells = <1>;
- reg = <0x0 0x40000000 0x0 0x10000>;
-};
-
-firmware {
-
- ...
-
- scmi {
- compatible = "arm,scmi";
- mboxes = <&mailbox 0 &mailbox 1>;
- mbox-names = "tx", "rx";
- shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- scmi_devpd: protocol@11 {
- reg = <0x11>;
- #power-domain-cells = <1>;
- };
-
- scmi_dvfs: protocol@13 {
- reg = <0x13>;
- #clock-cells = <1>;
- };
-
- scmi_clk: protocol@14 {
- reg = <0x14>;
- #clock-cells = <1>;
- };
-
- scmi_sensors0: protocol@15 {
- reg = <0x15>;
- #thermal-sensor-cells = <1>;
- };
- };
-};
-
-cpu@0 {
- ...
- reg = <0 0>;
- clocks = <&scmi_dvfs 0>;
-};
-
-hdlcd@7ff60000 {
- ...
- reg = <0 0x7ff60000 0 0x1000>;
- clocks = <&scmi_clk 4>;
- power-domains = <&scmi_devpd 1>;
-};
-
-thermal-zones {
- soc_thermal {
- polling-delay-passive = <100>;
- polling-delay = <1000>;
- /* sensor ID */
- thermal-sensors = <&scmi_sensors0 3>;
- ...
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/arm,scpi.txt b/Documentation/devicetree/bindings/arm/arm,scpi.txt
deleted file mode 100644
index 401831973638..000000000000
--- a/Documentation/devicetree/bindings/arm/arm,scpi.txt
+++ /dev/null
@@ -1,219 +0,0 @@
-System Control and Power Interface (SCPI) Message Protocol
-----------------------------------------------------------
-
-Firmware implementing the SCPI described in ARM document number ARM DUI 0922B
-("ARM Compute Subsystem SCP: Message Interface Protocols")[0] can be used
-by Linux to initiate various system control and power operations.
-
-Required properties:
-
-- compatible : should be
- * "arm,scpi" : For implementations complying to SCPI v1.0 or above
- * "arm,scpi-pre-1.0" : For implementations complying to all
- unversioned releases prior to SCPI v1.0
-- mboxes: List of phandle and mailbox channel specifiers
- All the channels reserved by remote SCP firmware for use by
- SCPI message protocol should be specified in any order
-- shmem : List of phandle pointing to the shared memory(SHM) area between the
- processors using these mailboxes for IPC, one for each mailbox
- SHM can be any memory reserved for the purpose of this communication
- between the processors.
-
-See Documentation/devicetree/bindings/mailbox/mailbox.txt
-for more details about the generic mailbox controller and
-client driver bindings.
-
-Clock bindings for the clocks based on SCPI Message Protocol
-------------------------------------------------------------
-
-This binding uses the common clock binding[1].
-
-Container Node
-==============
-Required properties:
-- compatible : should be "arm,scpi-clocks"
- All the clocks provided by SCP firmware via SCPI message
- protocol much be listed as sub-nodes under this node.
-
-Sub-nodes
-=========
-Required properties:
-- compatible : shall include one of the following
- "arm,scpi-dvfs-clocks" - all the clocks that are variable and index based.
- These clocks don't provide an entire range of values between the
- limits but only discrete points within the range. The firmware
- provides the mapping for each such operating frequency and the
- index associated with it. The firmware also manages the
- voltage scaling appropriately with the clock scaling.
- "arm,scpi-variable-clocks" - all the clocks that are variable and provide full
- range within the specified range. The firmware provides the
- range of values within a specified range.
-
-Other required properties for all clocks(all from common clock binding):
-- #clock-cells : Should be 1. Contains the Clock ID value used by SCPI commands.
-- clock-output-names : shall be the corresponding names of the outputs.
-- clock-indices: The identifying number for the clocks(i.e.clock_id) in the
- node. It can be non linear and hence provide the mapping of identifiers
- into the clock-output-names array.
-
-SRAM and Shared Memory for SCPI
--------------------------------
-
-A small area of SRAM is reserved for SCPI communication between application
-processors and SCP.
-
-The properties should follow the generic mmio-sram description found in [3]
-
-Each sub-node represents the reserved area for SCPI.
-
-Required sub-node properties:
-- reg : The base offset and size of the reserved area with the SRAM
-- compatible : should be "arm,scp-shmem" for Non-secure SRAM based
- shared memory
-
-Sensor bindings for the sensors based on SCPI Message Protocol
---------------------------------------------------------------
-SCPI provides an API to access the various sensors on the SoC.
-
-Required properties:
-- compatible : should be "arm,scpi-sensors".
-- #thermal-sensor-cells: should be set to 1. This property follows the
- thermal device tree bindings[2].
-
- Valid cell values are raw identifiers (Sensor ID)
- as used by the firmware. Refer to platform details
- for your implementation for the IDs to use.
-
-Power domain bindings for the power domains based on SCPI Message Protocol
-------------------------------------------------------------
-
-This binding uses the generic power domain binding[4].
-
-PM domain providers
-===================
-
-Required properties:
- - #power-domain-cells : Should be 1. Contains the device or the power
- domain ID value used by SCPI commands.
- - num-domains: Total number of power domains provided by SCPI. This is
- needed as the SCPI message protocol lacks a mechanism to
- query this information at runtime.
-
-PM domain consumers
-===================
-
-Required properties:
- - power-domains : A phandle and PM domain specifier as defined by bindings of
- the power controller specified by phandle.
-
-[0] http://infocenter.arm.com/help/topic/com.arm.doc.dui0922b/index.html
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/thermal/thermal.txt
-[3] Documentation/devicetree/bindings/sram/sram.txt
-[4] Documentation/devicetree/bindings/power/power_domain.txt
-
-Example:
-
-sram: sram@50000000 {
- compatible = "arm,juno-sram-ns", "mmio-sram";
- reg = <0x0 0x50000000 0x0 0x10000>;
-
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x0 0x50000000 0x10000>;
-
- cpu_scp_lpri: scp-shmem@0 {
- compatible = "arm,juno-scp-shmem";
- reg = <0x0 0x200>;
- };
-
- cpu_scp_hpri: scp-shmem@200 {
- compatible = "arm,juno-scp-shmem";
- reg = <0x200 0x200>;
- };
-};
-
-mailbox: mailbox0@40000000 {
- ....
- #mbox-cells = <1>;
-};
-
-scpi_protocol: scpi@2e000000 {
- compatible = "arm,scpi";
- mboxes = <&mailbox 0 &mailbox 1>;
- shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
-
- clocks {
- compatible = "arm,scpi-clocks";
-
- scpi_dvfs: scpi_clocks@0 {
- compatible = "arm,scpi-dvfs-clocks";
- #clock-cells = <1>;
- clock-indices = <0>, <1>, <2>;
- clock-output-names = "atlclk", "aplclk","gpuclk";
- };
- scpi_clk: scpi_clocks@3 {
- compatible = "arm,scpi-variable-clocks";
- #clock-cells = <1>;
- clock-indices = <3>, <4>;
- clock-output-names = "pxlclk0", "pxlclk1";
- };
- };
-
- scpi_sensors0: sensors {
- compatible = "arm,scpi-sensors";
- #thermal-sensor-cells = <1>;
- };
-
- scpi_devpd: scpi-power-domains {
- compatible = "arm,scpi-power-domains";
- num-domains = <2>;
- #power-domain-cells = <1>;
- };
-};
-
-cpu@0 {
- ...
- reg = <0 0>;
- clocks = <&scpi_dvfs 0>;
-};
-
-hdlcd@7ff60000 {
- ...
- reg = <0 0x7ff60000 0 0x1000>;
- clocks = <&scpi_clk 4>;
- power-domains = <&scpi_devpd 1>;
-};
-
-thermal-zones {
- soc_thermal {
- polling-delay-passive = <100>;
- polling-delay = <1000>;
-
- /* sensor ID */
- thermal-sensors = <&scpi_sensors0 3>;
- ...
- };
-};
-
-In the above example, the #clock-cells is set to 1 as required.
-scpi_dvfs has 3 output clocks namely: atlclk, aplclk, and gpuclk with 0,
-1 and 2 as clock-indices. scpi_clk has 2 output clocks namely: pxlclk0
-and pxlclk1 with 3 and 4 as clock-indices.
-
-The first consumer in the example is cpu@0 and it has '0' as the clock
-specifier which points to the first entry in the output clocks of
-scpi_dvfs i.e. "atlclk".
-
-Similarly the second example is hdlcd@7ff60000 and it has pxlclk1 as input
-clock. '4' in the clock specifier here points to the second entry
-in the output clocks of scpi_clocks i.e. "pxlclk1"
-
-The thermal-sensors property in the soc_thermal node uses the
-temperature sensor provided by SCP firmware to setup a thermal
-zone. The ID "3" is the sensor identifier for the temperature sensor
-as used by the firmware.
-
-The num-domains property in scpi-power-domains domain specifies that
-SCPI provides 2 power domains. The hdlcd node uses the power domain with
-domain ID 1.
diff --git a/Documentation/devicetree/bindings/arm/arm,scu.yaml b/Documentation/devicetree/bindings/arm/arm,scu.yaml
new file mode 100644
index 000000000000..dae2aa27e641
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,scu.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,scu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Snoop Control Unit (SCU)
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ As part of the MPCore complex, Cortex-A5 and Cortex-A9 are provided
+ with a Snoop Control Unit. The register range is usually 256 (0x100)
+ bytes.
+
+ References:
+ - Cortex-A9: see DDI0407E Cortex-A9 MPCore Technical Reference Manual
+ Revision r2p0
+ - Cortex-A5: see DDI0434B Cortex-A5 MPCore Technical Reference Manual
+ Revision r0p1
+ - ARM11 MPCore: see DDI0360F ARM 11 MPCore Processor Technical Reference
+ Manial Revision r2p0
+
+properties:
+ compatible:
+ enum:
+ - arm,cortex-a9-scu
+ - arm,cortex-a5-scu
+ - arm,arm11mp-scu
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ scu@a0410000 {
+ compatible = "arm,cortex-a9-scu";
+ reg = <0xa0410000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/arm,trace-buffer-extension.yaml b/Documentation/devicetree/bindings/arm/arm,trace-buffer-extension.yaml
new file mode 100644
index 000000000000..f5b54b4fc55d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,trace-buffer-extension.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,trace-buffer-extension.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Trace Buffer Extensions
+
+maintainers:
+ - Anshuman Khandual <anshuman.khandual@arm.com>
+
+description: |
+ Arm Trace Buffer Extension (TRBE) is a per CPU component
+ for storing trace generated on the CPU to memory. It is
+ accessed via CPU system registers. The software can verify
+ if it is permitted to use the component by checking the
+ TRBIDR register.
+
+properties:
+ $nodename:
+ const: trbe
+
+ compatible:
+ items:
+ - const: arm,trace-buffer-extension
+
+ interrupts:
+ description: |
+ Exactly 1 PPI must be listed. For heterogeneous systems where
+ TRBE is only supported on a subset of the CPUs, please consult
+ the arm,gic-v3 binding for details on describing a PPI partition.
+ maxItems: 1
+
+required:
+ - compatible
+ - interrupts
+
+additionalProperties: false
+
+examples:
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ trbe {
+ compatible = "arm,trace-buffer-extension";
+ interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml b/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml
new file mode 100644
index 000000000000..3b060c36b90c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,versatile-sysreg.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,versatile-sysreg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Versatile system registers
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description:
+ This is a system control registers block, providing multiple low level
+ platform functions like board detection and identification, software
+ interrupt generation, MMC and NOR Flash control, etc.
+
+properties:
+ compatible:
+ items:
+ - const: arm,versatile-sysreg
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ panel:
+ type: object
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,versatile.yaml b/Documentation/devicetree/bindings/arm/arm,versatile.yaml
new file mode 100644
index 000000000000..7a3caf6af200
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,versatile.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,versatile.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Boards
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |+
+ The ARM Versatile boards are two variants of ARM926EJ-S evaluation boards
+ with various pluggable interface boards, in essence the Versatile PB version
+ is a superset of the Versatile AB version.
+
+ The root node in the Versatile platforms must contain a core module child
+ node. They are always at physical address 0x10000000 in all the Versatile
+ variants.
+
+ When fitted with the IB2 Interface Board, the Versatile AB will present an
+ optional system controller node which controls the extra peripherals on the
+ interface board.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: The ARM Versatile Application Baseboard (HBI-0118) is an
+ evaluation board specifically for the ARM926EJ-S. It can be connected
+ to an IB1 interface board for a touchscreen-type use case or an IB2
+ for a candybar phone-type use case. See ARM DUI 0225D.
+ items:
+ - const: arm,versatile-ab
+ - description: The ARM Versatile Platform Baseboard (HBI-0117) is an
+ extension of the Versatile Application Baseboard that includes a
+ PCI host controller. Like the sibling board, it is done specifically
+ for ARM926EJ-S. See ARM DUI 0224B.
+ items:
+ - const: arm,versatile-pb
+
+required:
+ - compatible
+ - core-module@10000000
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
new file mode 100644
index 000000000000..4cdca5320544
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
@@ -0,0 +1,231 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,vexpress-juno.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Express and Juno Boards
+
+maintainers:
+ - Sudeep Holla <sudeep.holla@arm.com>
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |+
+ ARM's Versatile Express platform were built as reference designs for exploring
+ multicore Cortex-A class systems. The Versatile Express family contains both
+ 32 bit (Aarch32) and 64 bit (Aarch64) systems.
+
+ The board consist of a motherboard and one or more daughterboards (tiles). The
+ motherboard provides a set of peripherals. Processor and RAM "live" on the
+ tiles.
+
+ The motherboard and each core tile should be described by a separate Device
+ Tree source file, with the tile's description including the motherboard file
+ using an include directive. As the motherboard can be initialized in one of
+ two different configurations ("memory maps"), care must be taken to include
+ the correct one.
+
+ When a new generation of boards were introduced under the name "Juno", these
+ shared to many common characteristics with the Versatile Express that the
+ "arm,vexpress" compatible was retained in the root node, and these are
+ included in this binding schema as well.
+
+ The root node indicates the CPU SoC on the core tile, and this
+ is a daughterboard to the main motherboard. The name used in the compatible
+ string shall match the name given in the core tile's technical reference
+ manual, followed by "arm,vexpress" as an additional compatible value. If
+ further subvariants are released of the core tile, even more fine-granular
+ compatible strings with up to three compatible strings are used.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: CoreTile Express A9x4 (V2P-CA9) has 4 Cortex A9 CPU cores
+ in MPCore configuration in a test chip on the core tile. See ARM
+ DUI 0448I. This was the first Versatile Express platform.
+ items:
+ - const: arm,vexpress,v2p-ca9
+ - const: arm,vexpress
+ - description: CoreTile Express A5x2 (V2P-CA5s) has 2 Cortex A5 CPU cores
+ in a test chip on the core tile. It is intended to evaluate NEON, FPU
+ and Jazelle support in the Cortex A5 family. See ARM DUI 0541C.
+ items:
+ - const: arm,vexpress,v2p-ca5s
+ - const: arm,vexpress
+ - description: Coretile Express A15x2 (V2P-CA15) has 2 Cortex A15 CPU
+ cores in a MPCore configuration in a test chip on the core tile. See
+ ARM DUI 0604F.
+ items:
+ - const: arm,vexpress,v2p-ca15
+ - const: arm,vexpress
+ - description: CoreTile Express A15x4 (V2P-CA15, HBI-0237A) has 4 Cortex
+ A15 CPU cores in a test chip on the core tile. This is the first test
+ chip called "TC1".
+ items:
+ - const: arm,vexpress,v2p-ca15,tc1
+ - const: arm,vexpress,v2p-ca15
+ - const: arm,vexpress
+ - description: Coretile Express A15x2 A7x3 (V2P-CA15_A7) has 2 Cortex A15
+ CPU cores and 3 Cortex A7 cores in a big.LITTLE MPCore configuration
+ in a test chip on the core tile. See ARM DDI 0503I.
+ items:
+ - const: arm,vexpress,v2p-ca15_a7
+ - const: arm,vexpress
+ - description: LogicTile Express 20MG (V2F-1XV7) has 2 Cortex A53 CPU
+ cores in a test chip on the core tile. See ARM DDI 0498D.
+ items:
+ - const: arm,vexpress,v2f-1xv7,ca53x2
+ - const: arm,vexpress,v2f-1xv7
+ - const: arm,vexpress
+ - description: Arm Versatile Express Juno "r0" (the first Juno board,
+ V2M-Juno) was introduced as a vehicle for evaluating big.LITTLE on
+ AArch64 CPU cores. It has 2 Cortex A57 CPU cores and 4 Cortex A53
+ cores in a big.LITTLE configuration. It also features the MALI T624
+ GPU. See ARM document 100113_0000_07_en.
+ items:
+ - const: arm,juno
+ - const: arm,vexpress
+ - description: Arm Versatile Express Juno r1 Development Platform
+ (V2M-Juno r1) was introduced mainly aimed at development of PCIe
+ based systems. Juno r1 also has support for AXI masters placed on
+ the TLX connectors to join the coherency domain. Otherwise it is the
+ same configuration as Juno r0. See ARM document 100122_0100_06_en.
+ items:
+ - const: arm,juno-r1
+ - const: arm,juno
+ - const: arm,vexpress
+ - description: Arm Versatile Express Juno r2 Development Platform
+ (V2M-Juno r2). It has the same feature set as Juno r0 and r1. See
+ ARM document 100114_0200_04_en.
+ items:
+ - const: arm,juno-r2
+ - const: arm,juno
+ - const: arm,vexpress
+ - description: Arm AEMv8a (Architecture Envelope Model)
+ Versatile Express Real-Time System Model (VE RTSM)
+ is a programmers view of the Versatile Express with Arm
+ v8A hardware. See ARM DUI 0575D.
+ items:
+ - const: arm,rtsm_ve,aemv8a
+ - const: arm,vexpress
+ - description: Arm FVP (Fixed Virtual Platform) base model revision C
+ See ARM Document 100964_1190_00_en.
+ items:
+ - const: arm,fvp-base-revc
+ - const: arm,vexpress
+ - description: Arm Foundation model for Aarch64
+ items:
+ - const: arm,foundation-aarch64
+ - const: arm,vexpress
+
+ arm,vexpress,position:
+ description: When daughterboards are stacked on one site, their position
+ in the stack be be described this attribute.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 3
+
+ arm,vexpress,dcc:
+ description: When describing tiles consisting of more than one DCC, its
+ number can be specified with this attribute.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 3
+
+patternProperties:
+ "^bus@[0-9a-f]+$":
+ description: Static Memory Bus (SMB) node, if this exists it describes
+ the connection between the motherboard and any tiles. Sometimes the
+ compatible is placed directly under this node, sometimes it is placed
+ in a subnode named "motherboard-bus". Sometimes the compatible includes
+ "arm,vexpress,v2?-p1" sometimes (on software models) it is just
+ "simple-bus". If the compatible is placed in the "motherboard-bus" node,
+ it is stricter and always has two compatibles.
+ type: object
+ $ref: /schemas/simple-bus.yaml
+ unevaluatedProperties: false
+
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - arm,vexpress,v2m-p1
+ - arm,vexpress,v2p-p1
+ - const: simple-bus
+ - const: simple-bus
+
+ patternProperties:
+ '^motherboard-bus@':
+ type: object
+ description: The motherboard description provides a single "motherboard"
+ node using 2 address cells corresponding to the Static Memory Bus
+ used between the motherboard and the tile. The first cell defines the
+ Chip Select (CS) line number, the second cell address offset within
+ the CS. All interrupt lines between the motherboard and the tile
+ are active high and are described using single cell.
+ properties:
+ "#address-cells":
+ const: 2
+ "#size-cells":
+ const: 1
+ ranges: true
+
+ compatible:
+ items:
+ - enum:
+ - arm,vexpress,v2m-p1
+ - arm,vexpress,v2p-p1
+ - const: simple-bus
+ arm,v2m-memory-map:
+ description: This describes the memory map type.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum:
+ - rs1
+ - rs2
+
+ arm,hbi:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: This indicates the ARM HBI (Hardware Board ID), this is
+ ARM's unique board model ID, visible on the PCB's silkscreen.
+
+ arm,vexpress,site:
+ description: As Versatile Express can be configured in number of physically
+ different setups, the device tree should describe platform topology.
+ For this reason the root node and main motherboard node must define this
+ property, describing the physical location of the children nodes.
+ 0 means motherboard site, while 1 and 2 are daughterboard sites, and
+ 0xf means "sisterboard" which is the site containing the main CPU tile.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+
+ required:
+ - compatible
+
+ additionalProperties:
+ type: object
+
+ required:
+ - compatible
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - arm,vexpress,v2p-ca9
+ - arm,vexpress,v2p-ca5s
+ - arm,vexpress,v2p-ca15
+ - arm,vexpress,v2p-ca15_a7
+ - arm,vexpress,v2f-1xv7,ca53x2
+ then:
+ required:
+ - arm,hbi
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
deleted file mode 100644
index b6e810c2781a..000000000000
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ /dev/null
@@ -1,237 +0,0 @@
-ARM Integrator/AP (Application Platform) and Integrator/CP (Compact Platform)
------------------------------------------------------------------------------
-ARM's oldest Linux-supported platform with connectors for different core
-tiles of ARMv4, ARMv5 and ARMv6 type.
-
-Required properties (in root node):
- compatible = "arm,integrator-ap"; /* Application Platform */
- compatible = "arm,integrator-cp"; /* Compact Platform */
-
-FPGA type interrupt controllers, see the versatile-fpga-irq binding doc.
-
-Required nodes:
-
-- core-module: the root node to the Integrator platforms must have
- a core-module with regs and the compatible string
- "arm,core-module-integrator"
-- external-bus-interface: the root node to the Integrator platforms
- must have an external bus interface with regs and the
- compatible-string "arm,external-bus-interface"
-
- Required properties for the core module:
- - regs: the location and size of the core module registers, one
- range of 0x200 bytes.
-
-- syscon: the root node of the Integrator platforms must have a
- system controller node pointing to the control registers,
- with the compatible string
- "arm,integrator-ap-syscon"
- "arm,integrator-cp-syscon"
- respectively.
-
- Required properties for the system controller:
- - regs: the location and size of the system controller registers,
- one range of 0x100 bytes.
-
- Required properties for the AP system controller:
- - interrupts: the AP syscon node must include the logical module
- interrupts, stated in order of module instance <module 0>,
- <module 1>, <module 2> ... for the CP system controller this
- is not required not of any use.
-
-/dts-v1/;
-/include/ "integrator.dtsi"
-
-/ {
- model = "ARM Integrator/AP";
- compatible = "arm,integrator-ap";
-
- core-module@10000000 {
- compatible = "arm,core-module-integrator";
- reg = <0x10000000 0x200>;
- };
-
- ebi@12000000 {
- compatible = "arm,external-bus-interface";
- reg = <0x12000000 0x100>;
- };
-
- syscon {
- compatible = "arm,integrator-ap-syscon";
- reg = <0x11000000 0x100>;
- interrupt-parent = <&pic>;
- /* These are the logic module IRQs */
- interrupts = <9>, <10>, <11>, <12>;
- };
-};
-
-
-ARM Versatile Application and Platform Baseboards
--------------------------------------------------
-ARM's development hardware platform with connectors for customizable
-core tiles. The hardware configuration of the Versatile boards is
-highly customizable.
-
-Required properties (in root node):
- compatible = "arm,versatile-ab"; /* Application baseboard */
- compatible = "arm,versatile-pb"; /* Platform baseboard */
-
-Interrupt controllers:
-- VIC required properties:
- compatible = "arm,versatile-vic";
- interrupt-controller;
- #interrupt-cells = <1>;
-
-- SIC required properties:
- compatible = "arm,versatile-sic";
- interrupt-controller;
- #interrupt-cells = <1>;
-
-Required nodes:
-
-- core-module: the root node to the Versatile platforms must have
- a core-module with regs and the compatible strings
- "arm,core-module-versatile", "syscon"
-
-Optional nodes:
-
-- arm,versatile-ib2-syscon : if the Versatile has an IB2 interface
- board mounted, this has a separate system controller that is
- defined in this node.
- Required properties:
- compatible = "arm,versatile-ib2-syscon", "syscon"
-
-ARM RealView Boards
--------------------
-The RealView boards cover tailored evaluation boards that are used to explore
-the ARM11 and Cortex A-8 and Cortex A-9 processors.
-
-Required properties (in root node):
- /* RealView Emulation Baseboard */
- compatible = "arm,realview-eb";
- /* RealView Platform Baseboard for ARM1176JZF-S */
- compatible = "arm,realview-pb1176";
- /* RealView Platform Baseboard for ARM11 MPCore */
- compatible = "arm,realview-pb11mp";
- /* RealView Platform Baseboard for Cortex A-8 */
- compatible = "arm,realview-pba8";
- /* RealView Platform Baseboard Explore for Cortex A-9 */
- compatible = "arm,realview-pbx";
-
-Required nodes:
-
-- soc: some node of the RealView platforms must be the SoC
- node that contain the SoC-specific devices, withe the compatible
- string set to one of these tuples:
- "arm,realview-eb-soc", "simple-bus"
- "arm,realview-pb1176-soc", "simple-bus"
- "arm,realview-pb11mp-soc", "simple-bus"
- "arm,realview-pba8-soc", "simple-bus"
- "arm,realview-pbx-soc", "simple-bus"
-
-- syscon: some subnode of the RealView SoC node must be a
- system controller node pointing to the control registers,
- with the compatible string set to one of these:
- "arm,realview-eb11mp-revb-syscon", "arm,realview-eb-syscon", "syscon"
- "arm,realview-eb11mp-revc-syscon", "arm,realview-eb-syscon", "syscon"
- "arm,realview-eb-syscon", "syscon"
- "arm,realview-pb1176-syscon", "syscon"
- "arm,realview-pb11mp-syscon", "syscon"
- "arm,realview-pba8-syscon", "syscon"
- "arm,realview-pbx-syscon", "syscon"
-
- Required properties for the system controller:
- - regs: the location and size of the system controller registers,
- one range of 0x1000 bytes.
-
-Example:
-
-/dts-v1/;
-#include <dt-bindings/interrupt-controller/irq.h>
-
-/ {
- model = "ARM RealView PB1176 with device tree";
- compatible = "arm,realview-pb1176";
- #address-cells = <1>;
- #size-cells = <1>;
-
- soc {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "arm,realview-pb1176-soc", "simple-bus";
- ranges;
-
- syscon: syscon@10000000 {
- compatible = "arm,realview-syscon", "syscon";
- reg = <0x10000000 0x1000>;
- };
-
- };
-};
-
-ARM Versatile Express Boards
------------------------------
-For details on the device tree bindings for ARM Versatile Express boards
-please consult the vexpress.txt file in the same directory as this file.
-
-ARM Juno Boards
-----------------
-The Juno boards are targeting development for AArch64 systems. The first
-iteration, Juno r0, is a vehicle for evaluating big.LITTLE on AArch64,
-with the second iteration, Juno r1, mainly aimed at development of PCIe
-based systems. Juno r1 also has support for AXI masters placed on the TLX
-connectors to join the coherency domain.
-
-Juno boards are described in a similar way to ARM Versatile Express boards,
-with the motherboard part of the hardware being described in a separate file
-to highlight the fact that is part of the support infrastructure for the SoC.
-Juno device tree bindings also share the Versatile Express bindings as
-described under the RS1 memory mapping.
-
-Required properties (in root node):
- compatible = "arm,juno"; /* For Juno r0 board */
- compatible = "arm,juno-r1"; /* For Juno r1 board */
- compatible = "arm,juno-r2"; /* For Juno r2 board */
-
-Required nodes:
-The description for the board must include:
- - a "psci" node describing the boot method used for the secondary CPUs.
- A detailed description of the bindings used for "psci" nodes is present
- in the psci.txt file.
- - a "cpus" node describing the available cores and their associated
- "enable-method"s. For more details see cpus.txt file.
-
-Example:
-
-/dts-v1/;
-/ {
- model = "ARM Juno development board (r0)";
- compatible = "arm,juno", "arm,vexpress";
- interrupt-parent = <&gic>;
- #address-cells = <2>;
- #size-cells = <2>;
-
- cpus {
- #address-cells = <2>;
- #size-cells = <0>;
-
- A57_0: cpu@0 {
- compatible = "arm,cortex-a57","arm,armv8";
- reg = <0x0 0x0>;
- device_type = "cpu";
- enable-method = "psci";
- };
-
- .....
-
- A53_0: cpu@100 {
- compatible = "arm,cortex-a53","arm,armv8";
- reg = <0x0 0x100>;
- device_type = "cpu";
- enable-method = "psci";
- };
-
- .....
- };
-
-};
diff --git a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
deleted file mode 100644
index 6efabba530f1..000000000000
--- a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* ARM DynamIQ Shared Unit (DSU) Performance Monitor Unit (PMU)
-
-ARM DyanmIQ Shared Unit (DSU) integrates one or more CPU cores
-with a shared L3 memory system, control logic and external interfaces to
-form a multicore cluster. The PMU enables to gather various statistics on
-the operations of the DSU. The PMU provides independent 32bit counters that
-can count any of the supported events, along with a 64bit cycle counter.
-The PMU is accessed via CPU system registers and has no MMIO component.
-
-** DSU PMU required properties:
-
-- compatible : should be one of :
-
- "arm,dsu-pmu"
-
-- interrupts : Exactly 1 SPI must be listed.
-
-- cpus : List of phandles for the CPUs connected to this DSU instance.
-
-
-** Example:
-
-dsu-pmu-0 {
- compatible = "arm,dsu-pmu";
- interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>;
- cpus = <&cpu_0>, <&cpu_1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/armadeus.txt b/Documentation/devicetree/bindings/arm/armadeus.txt
deleted file mode 100644
index 9821283ff516..000000000000
--- a/Documentation/devicetree/bindings/arm/armadeus.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Armadeus i.MX Platforms Device Tree Bindings
------------------------------------------------
-
-APF51: i.MX51 based module.
-Required root node properties:
- - compatible = "armadeus,imx51-apf51", "fsl,imx51";
diff --git a/Documentation/devicetree/bindings/arm/aspeed/aspeed,sbc.yaml b/Documentation/devicetree/bindings/arm/aspeed/aspeed,sbc.yaml
new file mode 100644
index 000000000000..b8c5cacb09bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/aspeed/aspeed,sbc.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+# Copyright 2021 Joel Stanley, IBM Corp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/aspeed/aspeed,sbc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED Secure Boot Controller
+
+maintainers:
+ - Joel Stanley <joel@jms.id.au>
+ - Andrew Jeffery <andrew@aj.id.au>
+
+description: |
+ The ASPEED SoCs have a register bank for interacting with the secure boot
+ controller.
+
+properties:
+ compatible:
+ items:
+ - const: aspeed,ast2600-sbc
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ sbc: secure-boot-controller@1e6f2000 {
+ compatible = "aspeed,ast2600-sbc";
+ reg = <0x1e6f2000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml
new file mode 100644
index 000000000000..aedefca7cf4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/aspeed/aspeed.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Aspeed SoC based boards
+
+maintainers:
+ - Joel Stanley <joel@jms.id.au>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: AST2400 based boards
+ items:
+ - enum:
+ - delta,ahe50dc-bmc
+ - facebook,galaxy100-bmc
+ - facebook,wedge100-bmc
+ - facebook,wedge40-bmc
+ - microsoft,olympus-bmc
+ - quanta,q71l-bmc
+ - tyan,palmetto-bmc
+ - yadro,vesnin-bmc
+ - const: aspeed,ast2400
+
+ - description: AST2500 based boards
+ items:
+ - enum:
+ - amd,daytonax-bmc
+ - amd,ethanolx-bmc
+ - ampere,mtjade-bmc
+ - aspeed,ast2500-evb
+ - asrock,e3c246d4i-bmc
+ - asrock,e3c256d4i-bmc
+ - asrock,romed8hm3-bmc
+ - asrock,spc621d8hm3-bmc
+ - asrock,x570d4u-bmc
+ - bytedance,g220a-bmc
+ - facebook,cmm-bmc
+ - facebook,minipack-bmc
+ - facebook,tiogapass-bmc
+ - facebook,yamp-bmc
+ - facebook,yosemitev2-bmc
+ - facebook,wedge400-bmc
+ - facebook,wedge400-data64-bmc
+ - hxt,stardragon4800-rep2-bmc
+ - ibm,mihawk-bmc
+ - ibm,mowgli-bmc
+ - ibm,romulus-bmc
+ - ibm,swift-bmc
+ - ibm,witherspoon-bmc
+ - ingrasys,zaius-bmc
+ - inspur,fp5280g2-bmc
+ - inspur,nf5280m6-bmc
+ - inspur,on5263m5-bmc
+ - intel,s2600wf-bmc
+ - inventec,lanyang-bmc
+ - lenovo,hr630-bmc
+ - lenovo,hr855xg2-bmc
+ - portwell,neptune-bmc
+ - qcom,centriq2400-rep-bmc
+ - supermicro,x11spi-bmc
+ - tyan,s7106-bmc
+ - tyan,s8036-bmc
+ - yadro,nicole-bmc
+ - yadro,vegman-n110-bmc
+ - yadro,vegman-rx20-bmc
+ - yadro,vegman-sx20-bmc
+ - const: aspeed,ast2500
+
+ - description: AST2600 based boards
+ items:
+ - enum:
+ - ampere,mtjefferson-bmc
+ - ampere,mtmitchell-bmc
+ - aspeed,ast2600-evb
+ - aspeed,ast2600-evb-a1
+ - asus,x4tf-bmc
+ - facebook,bletchley-bmc
+ - facebook,catalina-bmc
+ - facebook,clemente-bmc
+ - facebook,cloudripper-bmc
+ - facebook,darwin-bmc
+ - facebook,elbert-bmc
+ - facebook,fuji-bmc
+ - facebook,fuji-data64-bmc
+ - facebook,greatlakes-bmc
+ - facebook,harma-bmc
+ - facebook,minerva-cmc
+ - facebook,santabarbara-bmc
+ - facebook,yosemite4-bmc
+ - ibm,blueridge-bmc
+ - ibm,everest-bmc
+ - ibm,fuji-bmc
+ - ibm,rainier-bmc
+ - ibm,sbp1-bmc
+ - ibm,system1-bmc
+ - ibm,tacoma-bmc
+ - inventec,starscream-bmc
+ - inventec,transformer-bmc
+ - jabil,rbp-bmc
+ - nvidia,gb200nvl-bmc
+ - qcom,dc-scm-v1-bmc
+ - quanta,s6q-bmc
+ - ufispace,ncplite-bmc
+ - const: aspeed,ast2600
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/atmel,sama5d2-secumod.yaml b/Documentation/devicetree/bindings/arm/atmel,sama5d2-secumod.yaml
new file mode 100644
index 000000000000..ad4a98a4ee67
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel,sama5d2-secumod.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/atmel,sama5d2-secumod.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip AT91 Security Module (SECUMOD)
+
+maintainers:
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+
+description:
+ The Security Module also offers the PIOBU pins which can be used as GPIO pins.
+ Note that they maintain their voltage during Backup/Self-refresh.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: atmel,sama5d2-secumod
+ - const: syscon
+ - items:
+ - enum:
+ - microchip,sama7d65-secumod
+ - microchip,sama7g5-secumod
+ - const: atmel,sama5d2-secumod
+ - const: syscon
+ reg:
+ maxItems: 1
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 2
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ security-module@fc040000 {
+ compatible = "atmel,sama5d2-secumod", "syscon";
+ reg = <0xfc040000 0x100>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
deleted file mode 100644
index 4bf1b4da7659..000000000000
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-Atmel AT91 device tree bindings.
-================================
-
-Boards with a SoC of the Atmel AT91 or SMART family shall have the following
-properties:
-
-Required root node properties:
-compatible: must be one of:
- * "atmel,at91rm9200"
-
- * "atmel,at91sam9" for SoCs using an ARM926EJ-S core, shall be extended with
- the specific SoC family or compatible:
- o "atmel,at91sam9260"
- o "atmel,at91sam9261"
- o "atmel,at91sam9263"
- o "atmel,at91sam9x5" for the 5 series, shall be extended with the specific
- SoC compatible:
- - "atmel,at91sam9g15"
- - "atmel,at91sam9g25"
- - "atmel,at91sam9g35"
- - "atmel,at91sam9x25"
- - "atmel,at91sam9x35"
- o "atmel,at91sam9g20"
- o "atmel,at91sam9g45"
- o "atmel,at91sam9n12"
- o "atmel,at91sam9rl"
- o "atmel,at91sam9xe"
- * "atmel,sama5" for SoCs using a Cortex-A5, shall be extended with the specific
- SoC family:
- o "atmel,sama5d2" shall be extended with the specific SoC compatible:
- - "atmel,sama5d27"
- o "atmel,sama5d3" shall be extended with the specific SoC compatible:
- - "atmel,sama5d31"
- - "atmel,sama5d33"
- - "atmel,sama5d34"
- - "atmel,sama5d35"
- - "atmel,sama5d36"
- o "atmel,sama5d4" shall be extended with the specific SoC compatible:
- - "atmel,sama5d41"
- - "atmel,sama5d42"
- - "atmel,sama5d43"
- - "atmel,sama5d44"
-
- * "atmel,samv7" for MCUs using a Cortex-M7, shall be extended with the specific
- SoC family:
- o "atmel,sams70" shall be extended with the specific MCU compatible:
- - "atmel,sams70j19"
- - "atmel,sams70j20"
- - "atmel,sams70j21"
- - "atmel,sams70n19"
- - "atmel,sams70n20"
- - "atmel,sams70n21"
- - "atmel,sams70q19"
- - "atmel,sams70q20"
- - "atmel,sams70q21"
- o "atmel,samv70" shall be extended with the specific MCU compatible:
- - "atmel,samv70j19"
- - "atmel,samv70j20"
- - "atmel,samv70n19"
- - "atmel,samv70n20"
- - "atmel,samv70q19"
- - "atmel,samv70q20"
- o "atmel,samv71" shall be extended with the specific MCU compatible:
- - "atmel,samv71j19"
- - "atmel,samv71j20"
- - "atmel,samv71j21"
- - "atmel,samv71n19"
- - "atmel,samv71n20"
- - "atmel,samv71n21"
- - "atmel,samv71q19"
- - "atmel,samv71q20"
- - "atmel,samv71q21"
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
new file mode 100644
index 000000000000..3a34b7a2e8d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
@@ -0,0 +1,294 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/atmel-at91.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel AT91.
+
+maintainers:
+ - Alexandre Belloni <alexandre.belloni@bootlin.com>
+ - Claudiu Beznea <claudiu.beznea@microchip.com>
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+
+description: |
+ Boards with a SoC of the Atmel AT91 or SMART family shall have the following
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - const: atmel,at91rm9200
+ - items:
+ - enum:
+ - atmel,at91sam9260
+ - atmel,at91sam9261
+ - atmel,at91sam9263
+ - atmel,at91sam9g20
+ - atmel,at91sam9g45
+ - atmel,at91sam9n12
+ - atmel,at91sam9rl
+ - atmel,at91sam9xe
+ - atmel,at91sam9x60
+ - const: atmel,at91sam9
+
+ - description: Olimex SAM9-L9260
+ items:
+ - const: olimex,sam9-l9260
+ - const: atmel,at91sam9260
+ - const: atmel,at91sam9
+
+ - description: Calao USB A9260
+ items:
+ - const: calao,usb-a9260
+ - const: atmel,at91sam9260
+ - const: atmel,at91sam9
+
+ - description: Calao USB A9263
+ items:
+ - const: calao,usb-a9263
+ - const: atmel,at91sam9263
+ - const: atmel,at91sam9
+
+ - description: Calao USB A9G20
+ items:
+ - const: calao,usb-a9g20
+ - const: atmel,at91sam9g20
+ - const: atmel,at91sam9
+
+ - description: Calao USB A9G20-LPW
+ items:
+ - const: calao,usb-a9g20-lpw
+ - const: calao,usb-a9g20
+ - const: atmel,at91sam9g20
+ - const: atmel,at91sam9
+
+ - items:
+ - enum:
+ - overkiz,kizboxmini-base # Overkiz kizbox Mini Base Board
+ - overkiz,kizboxmini-mb # Overkiz kizbox Mini Mother Board
+ - overkiz,kizboxmini-rd # Overkiz kizbox Mini RailDIN
+ - overkiz,smartkiz # Overkiz SmartKiz Board
+ - gardena,smart-gateway-at91sam # GARDENA smart Gateway (Article No. 19000)
+ - const: atmel,at91sam9g25
+ - const: atmel,at91sam9x5
+ - const: atmel,at91sam9
+
+ - items:
+ - enum:
+ - atmel,at91sam9g15
+ - atmel,at91sam9g25
+ - atmel,at91sam9g35
+ - atmel,at91sam9x25
+ - atmel,at91sam9x35
+ - const: atmel,at91sam9x5
+ - const: atmel,at91sam9
+
+ - description: Overkiz kizbox3 board
+ items:
+ - const: overkiz,kizbox3-hs
+ - const: atmel,sama5d27
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - description: Microchip SAMA5D27 WLSOM1
+ items:
+ - const: microchip,sama5d27-wlsom1
+ - const: atmel,sama5d27
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - description: Microchip SAMA5D27 WLSOM1 Evaluation Kit
+ items:
+ - const: microchip,sama5d27-wlsom1-ek
+ - const: microchip,sama5d27-wlsom1
+ - const: atmel,sama5d27
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - description: Microchip SAMA5D29 Curiosity
+ items:
+ - const: microchip,sama5d29-curiosity
+ - const: atmel,sama5d29
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - items:
+ - const: atmel,sama5d27
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - description: Microchip SAMA5D2 Industrial Connectivity Platform
+ items:
+ - const: microchip,sama5d2-icp
+ - const: atmel,sama5d27
+ - const: atmel,sama5d2
+ - const: atmel,sama5
+
+ - description: Microchip SAM9X60 Evaluation Boards
+ items:
+ - enum:
+ - microchip,sam9x60ek
+ - microchip,sam9x60-curiosity
+ - const: microchip,sam9x60
+ - const: atmel,at91sam9
+
+ - description: Microchip SAM9X7 Evaluation Boards
+ items:
+ - const: microchip,sam9x75-curiosity
+ - const: microchip,sam9x7
+ - const: atmel,at91sam9
+
+ - description: Nattis v2 board with Natte v2 power board
+ items:
+ - const: axentia,nattis-2
+ - const: axentia,natte-2
+ - const: axentia,linea
+ - const: atmel,sama5d31
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - description: TSE-850 v3 board
+ items:
+ - const: axentia,tse850v3
+ - const: axentia,linea
+ - const: atmel,sama5d31
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - items:
+ - const: axentia,linea
+ - const: atmel,sama5d31
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - description: Overkiz kizbox2 board with two heads
+ items:
+ - const: overkiz,kizbox2-2
+ - const: atmel,sama5d31
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - description: Microchip SAMA5D3 Ethernet Development System Board
+ items:
+ - const: microchip,sama5d3-eds
+ - const: atmel,sama5d36
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - description: CalAmp LMU5000 board
+ items:
+ - const: calamp,lmu5000
+ - const: atmel,at91sam9g20
+ - const: atmel,at91sam9
+
+ - description: Exegin Q5xR5 board
+ items:
+ - const: exegin,q5xr5
+ - const: atmel,at91sam9g20
+ - const: atmel,at91sam9
+
+ - items:
+ - enum:
+ - atmel,sama5d31
+ - atmel,sama5d33
+ - atmel,sama5d34
+ - atmel,sama5d35
+ - atmel,sama5d36
+ - const: atmel,sama5d3
+ - const: atmel,sama5
+
+ - items:
+ - enum:
+ - atmel,sama5d41
+ - atmel,sama5d42
+ - atmel,sama5d43
+ - atmel,sama5d44
+ - const: atmel,sama5d4
+ - const: atmel,sama5
+
+ - description: Microchip SAMA7D65 Curiosity Board
+ items:
+ - const: microchip,sama7d65-curiosity
+ - const: microchip,sama7d65
+ - const: microchip,sama7d6
+ - const: microchip,sama7
+
+ - items:
+ - const: microchip,sama7g5ek # SAMA7G5 Evaluation Kit
+ - const: microchip,sama7g5
+ - const: microchip,sama7
+
+ - description: Microchip SAMA7G54 Curiosity Board
+ items:
+ - const: microchip,sama7g54-curiosity
+ - const: microchip,sama7g5
+ - const: microchip,sama7
+
+ - description: Microchip LAN9662 Evaluation Boards.
+ items:
+ - enum:
+ - microchip,lan9662-pcb8291
+ - microchip,lan9662-pcb8309
+ - const: microchip,lan9662
+ - const: microchip,lan966
+
+ - description: Microchip LAN9668 PCB8290 Evaluation Board.
+ items:
+ - const: microchip,lan9668-pcb8290
+ - const: microchip,lan9668
+ - const: microchip,lan966
+
+ - description: Kontron KSwitch D10 MMT series
+ items:
+ - enum:
+ - kontron,kswitch-d10-mmt-8g
+ - kontron,kswitch-d10-mmt-6g-2gs
+ - const: kontron,s1921
+ - const: microchip,lan9668
+ - const: microchip,lan966
+
+ - items:
+ - enum:
+ - atmel,sams70j19
+ - atmel,sams70j20
+ - atmel,sams70j21
+ - atmel,sams70n19
+ - atmel,sams70n20
+ - atmel,sams70n21
+ - atmel,sams70q19
+ - atmel,sams70q20
+ - atmel,sams70q21
+ - const: atmel,sams70
+ - const: atmel,samv7
+
+ - items:
+ - enum:
+ - atmel,samv70j19
+ - atmel,samv70j20
+ - atmel,samv70n19
+ - atmel,samv70n20
+ - atmel,samv70q19
+ - atmel,samv70q20
+ - const: atmel,samv70
+ - const: atmel,samv7
+
+ - items:
+ - enum:
+ - atmel,samv71j19
+ - atmel,samv71j20
+ - atmel,samv71j21
+ - atmel,samv71n19
+ - atmel,samv71n20
+ - atmel,samv71n21
+ - atmel,samv71q19
+ - atmel,samv71q20
+ - atmel,samv71q21
+ - const: atmel,samv71
+ - const: atmel,samv7
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
index 4b96608ad692..5ce54f9befe6 100644
--- a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
@@ -1,7 +1,8 @@
Atmel system registers
Chipid required properties:
-- compatible: Should be "atmel,sama5d2-chipid"
+- compatible: Should be "atmel,sama5d2-chipid" or "microchip,sama7g5-chipid"
+ "microchip,sama7d65-chipid"
- reg : Should contain registers location and length
PIT Timer required properties:
@@ -10,6 +11,14 @@ PIT Timer required properties:
- interrupts: Should contain interrupt for the PIT which is the IRQ line
shared across all System Controller members.
+PIT64B Timer required properties:
+- compatible: Should be "microchip,sam9x60-pit64b" or
+ "microchip,sam9x7-pit64b", "microchip,sam9x60-pit64b"
+ "microchip,sama7d65-pit64b", "microchip,sam9x60-pit64b"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for PIT64B timer
+- clocks: Should contain the available clock sources for PIT64B timer.
+
System Timer (ST) required properties:
- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
- reg: Should contain registers location and length
@@ -19,25 +28,15 @@ System Timer (ST) required properties:
Its subnodes can be:
- watchdog: compatible should be "atmel,at91rm9200-wdt"
-RSTC Reset Controller required properties:
-- compatible: Should be "atmel,<chip>-rstc".
- <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-Example:
-
- rstc@fffffd00 {
- compatible = "atmel,at91sam9260-rstc";
- reg = <0xfffffd00 0x10>;
- clocks = <&clk32k>;
- };
-
RAMC SDRAM/DDR Controller required properties:
-- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
- "atmel,at91sam9260-sdramc",
- "atmel,at91sam9g45-ddramc",
- "atmel,sama5d3-ddramc",
+- compatible: Should be "atmel,at91rm9200-sdramc", "syscon" or
+ "atmel,at91sam9260-sdramc" or
+ "atmel,at91sam9g45-ddramc" or
+ "atmel,sama5d3-ddramc" or
+ "microchip,sam9x60-ddramc" or
+ "microchip,sama7g5-uddrc" or
+ "microchip,sama7d65-uddrc", "microchip,sama7g5-uddrc" or
+ "microchip,sam9x7-ddramc", "atmel,sama5d3-ddramc".
- reg: Should contain registers location and length
Examples:
@@ -47,125 +46,3 @@ Examples:
reg = <0xffffe800 0x200>;
};
-SHDWC Shutdown Controller
-
-required properties:
-- compatible: Should be "atmel,<chip>-shdwc".
- <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-optional properties:
-- atmel,wakeup-mode: String, operation mode of the wakeup mode.
- Supported values are: "none", "high", "low", "any".
-- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
-
-optional at91sam9260 properties:
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9rl properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9x5 properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-
-Example:
-
- shdwc@fffffd10 {
- compatible = "atmel,at91sam9260-shdwc";
- reg = <0xfffffd10 0x10>;
- clocks = <&clk32k>;
- };
-
-SHDWC SAMA5D2-Compatible Shutdown Controller
-
-1) shdwc node
-
-required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
-- reg: should contain registers location and length
-- clocks: phandle to input clock.
-- #address-cells: should be one. The cell is the wake-up input index.
-- #size-cells: should be zero.
-
-optional properties:
-
-- debounce-delay-us: minimum wake-up inputs debouncer period in
- microseconds. It's usually a board-related property.
-- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
-
-The node contains child nodes for each wake-up input that the platform uses.
-
-2) input nodes
-
-Wake-up input nodes are usually described in the "board" part of the Device
-Tree. Note also that input 0 is linked to the wake-up pin and is frequently
-used.
-
-Required properties:
-- reg: should contain the wake-up input index [0 - 15].
-
-Optional properties:
-- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
- by the child, forces the wake-up of the core power supply on a high level.
- The default is to be active low.
-
-Example:
-
-On the SoC side:
- shdwc@f8048010 {
- compatible = "atmel,sama5d2-shdwc";
- reg = <0xf8048010 0x10>;
- clocks = <&clk32k>;
- #address-cells = <1>;
- #size-cells = <0>;
- atmel,wakeup-rtc-timer;
- };
-
-On the board side:
- shdwc@f8048010 {
- debounce-delay-us = <976>;
-
- input@0 {
- reg = <0>;
- };
-
- input@1 {
- reg = <1>;
- atmel,wakeup-active-high;
- };
- };
-
-Special Function Registers (SFR)
-
-Special Function Registers (SFR) manage specific aspects of the integrated
-memory, bridge implementations, processor and other functionality not controlled
-elsewhere.
-
-required properties:
-- compatible: Should be "atmel,<chip>-sfr", "syscon" or
- "atmel,<chip>-sfrbu", "syscon"
- <chip> can be "sama5d3", "sama5d4" or "sama5d2".
-- reg: Should contain registers location and length
-
- sfr@f0038000 {
- compatible = "atmel,sama5d3-sfr", "syscon";
- reg = <0xf0038000 0x60>;
- };
-
-Security Module (SECUMOD)
-
-The Security Module macrocell provides all necessary secure functions to avoid
-voltage, temperature, frequency and mechanical attacks on the chip. It also
-embeds secure memories that can be scrambled
-
-required properties:
-- compatible: Should be "atmel,<chip>-secumod", "syscon".
- <chip> can be "sama5d2".
-- reg: Should contain registers location and length
-
- secumod@fc040000 {
- compatible = "atmel,sama5d2-secumod", "syscon";
- reg = <0xfc040000 0x100>;
- };
diff --git a/Documentation/devicetree/bindings/arm/axentia.txt b/Documentation/devicetree/bindings/arm/axentia.txt
deleted file mode 100644
index de58f2463880..000000000000
--- a/Documentation/devicetree/bindings/arm/axentia.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Device tree bindings for Axentia ARM devices
-============================================
-
-Linea CPU module
-----------------
-
-Required root node properties:
-compatible = "axentia,linea",
- "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from atmel-at91.txt for a sama5d31 SoC.
-
-
-Nattis v2 board with Natte v2 power board
------------------------------------------
-
-Required root node properties:
-compatible = "axentia,nattis-2", "axentia,natte-2", "axentia,linea",
- "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from above for the axentia,linea CPU module.
-
-
-TSE-850 v3 board
-----------------
-
-Required root node properties:
-compatible = "axentia,tse850v3", "axentia,linea",
- "atmel,sama5d31", "atmel,sama5d3", "atmel,sama5";
-and following the rules from above for the axentia,linea CPU module.
diff --git a/Documentation/devicetree/bindings/arm/axiado.yaml b/Documentation/devicetree/bindings/arm/axiado.yaml
new file mode 100644
index 000000000000..bfabe7b32e65
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/axiado.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/axiado.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axiado Platforms
+
+maintainers:
+ - Harshit Shah <hshah@axiado.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: AX3000 based boards
+ items:
+ - enum:
+ - axiado,ax3000-evk # Axiado AX3000 Evaluation Board
+ - const: axiado,ax3000 # Axiado AX3000 SoC
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/axis.txt b/Documentation/devicetree/bindings/arm/axis.txt
deleted file mode 100644
index ae345e1c8d2b..000000000000
--- a/Documentation/devicetree/bindings/arm/axis.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-Axis Communications AB
-ARTPEC series SoC Device Tree Bindings
-
-ARTPEC-6 ARM SoC
-================
-
-Required root node properties:
-- compatible = "axis,artpec6";
-
-ARTPEC-6 System Controller
---------------------------
-
-The ARTPEC-6 has a system controller with mixed functions controlling DMA, PCIe
-and resets.
-
-Required properties:
-- compatible: "axis,artpec6-syscon", "syscon"
-- reg: Address and length of the register bank.
-
-Example:
- syscon {
- compatible = "axis,artpec6-syscon", "syscon";
- reg = <0xf8000000 0x48>;
- };
-
-ARTPEC-6 Development board:
----------------------------
-Required root node properties:
-- compatible = "axis,artpec6-dev-board", "axis,artpec6";
diff --git a/Documentation/devicetree/bindings/arm/axis.yaml b/Documentation/devicetree/bindings/arm/axis.yaml
new file mode 100644
index 000000000000..63e9aca85db7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/axis.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/axis.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axis ARTPEC platforms
+
+maintainers:
+ - Jesper Nilsson <jesper.nilsson@axis.com>
+ - Lars Persson <lars.persson@axis.com>
+ - linux-arm-kernel@axis.com
+
+description: |
+ ARM platforms using SoCs designed by Axis branded as "ARTPEC".
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Axis ARTPEC-6 SoC board
+ items:
+ - enum:
+ - axis,artpec6-dev-board
+ - const: axis,artpec6
+
+ - description: Axis ARTPEC-8 SoC board
+ items:
+ - enum:
+ - axis,artpec8-grizzly
+ - const: axis,artpec8
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/axxia.txt b/Documentation/devicetree/bindings/arm/axxia.txt
deleted file mode 100644
index 7b4ef9c07696..000000000000
--- a/Documentation/devicetree/bindings/arm/axxia.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Axxia AXM55xx device tree bindings
-
-Boards using the AXM55xx SoC need to have the following properties:
-
-Required root node property:
-
- - compatible = "lsi,axm5516"
-
-Boards:
-
- LSI AXM5516 Validation board (Amarillo)
- compatible = "lsi,axm5516-amarillo", "lsi,axm5516"
diff --git a/Documentation/devicetree/bindings/arm/axxia.yaml b/Documentation/devicetree/bindings/arm/axxia.yaml
new file mode 100644
index 000000000000..d60907e43efc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/axxia.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/axxia.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axxia AXM55xx
+
+maintainers:
+ - Anders Berg <anders.berg@lsi.com>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ description: LSI AXM5516 Validation board (Amarillo)
+ items:
+ - const: lsi,axm5516-amarillo
+ - const: lsi,axm5516
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
new file mode 100644
index 000000000000..2729a542c4f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/bcm2835.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2711/BCM2835 Platforms
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+ - Stefan Wahren <wahrenst@gmx.net>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: BCM2711 based Boards
+ items:
+ - enum:
+ - raspberrypi,400
+ - raspberrypi,4-compute-module
+ - raspberrypi,4-model-b
+ - const: brcm,bcm2711
+
+ - description: BCM2712 based Boards
+ items:
+ - enum:
+ - raspberrypi,5-model-b
+ - const: brcm,bcm2712
+
+ - description: BCM2835 based Boards
+ items:
+ - enum:
+ - raspberrypi,model-a
+ - raspberrypi,model-a-plus
+ - raspberrypi,model-b
+ - raspberrypi,model-b-i2c0 # Raspberry Pi Model B (no P5)
+ - raspberrypi,model-b-rev2
+ - raspberrypi,model-b-plus
+ - raspberrypi,compute-module
+ - raspberrypi,model-zero
+ - raspberrypi,model-zero-w
+ - const: brcm,bcm2835
+
+ - description: BCM2836 based Boards
+ items:
+ - enum:
+ - raspberrypi,2-model-b
+ - const: brcm,bcm2836
+
+ - description: BCM2837 based Boards
+ items:
+ - enum:
+ - raspberrypi,2-model-b-rev2
+ - raspberrypi,3-model-a-plus
+ - raspberrypi,3-model-b
+ - raspberrypi,3-model-b-plus
+ - raspberrypi,3-compute-module
+ - raspberrypi,3-compute-module-lite
+ - raspberrypi,model-zero-2-w
+ - const: brcm,bcm2837
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt
deleted file mode 100644
index e3f996920403..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Broadcom Kona Family CPU Enable Method
---------------------------------------
-This binding defines the enable method used for starting secondary
-CPUs in the following Broadcom SoCs:
- BCM11130, BCM11140, BCM11351, BCM28145, BCM28155, BCM21664
-
-The enable method is specified by defining the following required
-properties in the "cpu" device tree node:
- - enable-method = "brcm,bcm11351-cpu-method";
- - secondary-boot-reg = <...>;
-
-The secondary-boot-reg property is a u32 value that specifies the
-physical address of the register used to request the ROM holding pen
-code release a secondary CPU. The value written to the register is
-formed by encoding the target CPU id into the low bits of the
-physical start address it should jump to.
-
-Example:
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- reg = <0>;
- };
-
- cpu1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- reg = <1>;
- enable-method = "brcm,bcm11351-cpu-method";
- secondary-boot-reg = <0x3500417c>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt
deleted file mode 100644
index 0ff6560e6094..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Broadcom BCM11351 device tree bindings
--------------------------------------------
-
-Boards with the bcm281xx SoC family (which includes bcm11130, bcm11140,
-bcm11351, bcm28145, bcm28155 SoCs) shall have the following properties:
-
-Required root node property:
-
-compatible = "brcm,bcm11351";
-DEPRECATED: compatible = "bcm,bcm11351";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml
new file mode 100644
index 000000000000..f2bcac0096b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcm11351.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM11351
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm28155-ap
+ - const: brcm,bcm11351
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt
deleted file mode 100644
index e0774255e1a6..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Broadcom BCM21664 device tree bindings
---------------------------------------
-
-This document describes the device tree bindings for boards with the BCM21664
-SoC.
-
-Required root node property:
- - compatible: brcm,bcm21664
-
-Example:
- / {
- model = "BCM21664 SoC";
- compatible = "brcm,bcm21664";
- [...]
- }
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml
new file mode 100644
index 000000000000..cf4e254e32f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcm21664.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM21664
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm21664-garnet
+ - const: brcm,bcm21664
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550-cpu-method.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550-cpu-method.txt
deleted file mode 100644
index a3af54c0e404..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550-cpu-method.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Broadcom Kona Family CPU Enable Method
---------------------------------------
-This binding defines the enable method used for starting secondary
-CPUs in the following Broadcom SoCs:
- BCM23550
-
-The enable method is specified by defining the following required
-properties in the "cpu" device tree node:
- - enable-method = "brcm,bcm23550";
- - secondary-boot-reg = <...>;
-
-The secondary-boot-reg property is a u32 value that specifies the
-physical address of the register used to request the ROM holding pen
-code release a secondary CPU. The value written to the register is
-formed by encoding the target CPU id into the low bits of the
-physical start address it should jump to.
-
-Example:
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- reg = <0>;
- };
-
- cpu1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- reg = <1>;
- enable-method = "brcm,bcm23550";
- secondary-boot-reg = <0x3500417c>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.txt
deleted file mode 100644
index 080baad923d6..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Broadcom BCM23550 device tree bindings
---------------------------------------
-
-This document describes the device tree bindings for boards with the BCM23550
-SoC.
-
-Required root node property:
- - compatible: brcm,bcm23550
-
-Example:
- / {
- model = "BCM23550 SoC";
- compatible = "brcm,bcm23550";
- [...]
- }
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml
new file mode 100644
index 000000000000..eafec29ba7ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcm23550.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM23550
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm23550-sparrow
+ - const: brcm,bcm23550
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
deleted file mode 100644
index 0dcc3ea5adff..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-Broadcom BCM2835 device tree bindings
--------------------------------------------
-
-Raspberry Pi Model A
-Required root node properties:
-compatible = "raspberrypi,model-a", "brcm,bcm2835";
-
-Raspberry Pi Model A+
-Required root node properties:
-compatible = "raspberrypi,model-a-plus", "brcm,bcm2835";
-
-Raspberry Pi Model B
-Required root node properties:
-compatible = "raspberrypi,model-b", "brcm,bcm2835";
-
-Raspberry Pi Model B (no P5)
-early model B with I2C0 rather than I2C1 routed to the expansion header
-Required root node properties:
-compatible = "raspberrypi,model-b-i2c0", "brcm,bcm2835";
-
-Raspberry Pi Model B rev2
-Required root node properties:
-compatible = "raspberrypi,model-b-rev2", "brcm,bcm2835";
-
-Raspberry Pi Model B+
-Required root node properties:
-compatible = "raspberrypi,model-b-plus", "brcm,bcm2835";
-
-Raspberry Pi 2 Model B
-Required root node properties:
-compatible = "raspberrypi,2-model-b", "brcm,bcm2836";
-
-Raspberry Pi 3 Model B
-Required root node properties:
-compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
-
-Raspberry Pi 3 Model B+
-Required root node properties:
-compatible = "raspberrypi,3-model-b-plus", "brcm,bcm2837";
-
-Raspberry Pi Compute Module
-Required root node properties:
-compatible = "raspberrypi,compute-module", "brcm,bcm2835";
-
-Raspberry Pi Compute Module 3
-Required root node properties:
-compatible = "raspberrypi,3-compute-module", "brcm,bcm2837";
-
-Raspberry Pi Compute Module 3 Lite
-Required root node properties:
-compatible = "raspberrypi,3-compute-module-lite", "brcm,bcm2837";
-
-Raspberry Pi Zero
-Required root node properties:
-compatible = "raspberrypi,model-zero", "brcm,bcm2835";
-
-Raspberry Pi Zero W
-Required root node properties:
-compatible = "raspberrypi,model-zero-w", "brcm,bcm2835";
-
-Generic BCM2835 board
-Required root node properties:
-compatible = "brcm,bcm2835";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt
deleted file mode 100644
index 8608a776caa7..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Broadcom BCM4708 device tree bindings
--------------------------------------------
-
-Boards with the BCM4708 SoC shall have the following properties:
-
-Required root node property:
-
-bcm4708
-compatible = "brcm,bcm4708";
-
-bcm4709
-compatible = "brcm,bcm4709";
-
-bcm53012
-compatible = "brcm,bcm53012";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml
new file mode 100644
index 000000000000..f47d74a5b0b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcm4708.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM4708
+
+description:
+ Broadcom BCM4708/47081/4709/47094/53012 Wi-Fi/network SoCs based
+ on the iProc architecture (Northstar).
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+ - Hauke Mehrtens <hauke@hauke-m.de>
+ - Rafal Milecki <zajec5@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: BCM4708 based boards
+ items:
+ - enum:
+ - asus,rt-ac56u
+ - asus,rt-ac68u
+ - buffalo,wxr-1750dhp
+ - buffalo,wzr-1166dhp
+ - buffalo,wzr-1166dhp2
+ - buffalo,wzr-1750dhp
+ - linksys,ea6300-v1
+ - linksys,ea6500-v2
+ - luxul,xap-1510-v1
+ - luxul,xwc-1000
+ - netgear,r6250-v1
+ - netgear,r6300-v2
+ - smartrg,sr400ac
+ - brcm,bcm94708
+ - const: brcm,bcm4708
+
+ - description: BCM47081 based boards
+ items:
+ - enum:
+ - asus,rt-n18u
+ - buffalo,wzr-600dhp2
+ - buffalo,wzr-900dhp
+ - luxul,xap-1410-v1
+ - luxul,xwr-1200-v1
+ - tplink,archer-c5-v2
+ - const: brcm,bcm47081
+ - const: brcm,bcm4708
+
+ - description: BCM4709 based boards
+ items:
+ - enum:
+ - asus,rt-ac3200
+ - asus,rt-ac87u
+ - buffalo,wxr-1900dhp
+ - linksys,ea9200
+ - netgear,r7000
+ - netgear,r8000
+ - tplink,archer-c9-v1
+ - brcm,bcm94709
+ - const: brcm,bcm4709
+ - const: brcm,bcm4708
+
+ - description: BCM47094 based boards
+ items:
+ - enum:
+ - asus,rt-ac3100
+ - asus,rt-ac5300
+ - asus,rt-ac88u
+ - dlink,dir-885l
+ - dlink,dir-890l
+ - linksys,panamera
+ - luxul,abr-4500-v1
+ - luxul,xap-1610-v1
+ - luxul,xbr-4500-v1
+ - luxul,xwc-2000-v1
+ - luxul,xwr-3100-v1
+ - luxul,xwr-3150-v1
+ - netgear,r8500
+ - phicomm,k3
+ - const: brcm,bcm47094
+ - const: brcm,bcm4708
+
+ - description: BCM53012 based boards
+ items:
+ - enum:
+ - brcm,bcm953012er
+ - brcm,bcm953012hr
+ - brcm,bcm953012k
+ - const: brcm,bcm53012
+ - const: brcm,bcm4708
+
+ - description: BCM53015 based boards
+ items:
+ - enum:
+ - meraki,mr26
+ - const: brcm,bcm53015
+ - const: brcm,bcm4708
+
+ - description: BCM53016 based boards
+ items:
+ - enum:
+ - dlink,dwl-8610ap
+ - meraki,mr32
+ - const: brcm,bcm53016
+ - const: brcm,bcm4708
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm53573.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm53573.yaml
new file mode 100644
index 000000000000..81b9a4a641c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm53573.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcm53573.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM53573 SoCs family
+
+description:
+ Broadcom BCM53573 / BCM47189 Wi-Fi SoCs derived from Northstar.
+
+maintainers:
+ - Rafał Miłecki <rafal@milecki.pl>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: BCM53573 based boards
+ items:
+ - enum:
+ - tenda,ac6-v1
+ - tenda,w15e-v1
+ - const: brcm,bcm53573
+
+ - description: BCM47189 based boards
+ items:
+ - enum:
+ - brcm,bcm947189acdbmr
+ - luxul,xap-810-v1
+ - luxul,xap-1440-v1
+ - tenda,ac9
+ - const: brcm,bcm47189
+ - const: brcm,bcm53573
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
index b82b6a0ae6f7..a8866c6e9d46 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
@@ -30,7 +30,7 @@ Example:
cpus {
cpu@0 {
- compatible = "arm,cotex-a9";
+ compatible = "arm,cortex-a9";
reg = <0>;
...
enable-method = "brcm,bcm63138";
@@ -62,7 +62,7 @@ Timer node:
Syscon reboot node:
-See Documentation/devicetree/bindings/power/reset/syscon-reboot.txt for the
+See Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml for the
detailed list of properties, the two values defined below are specific to the
BCM6328-style timer:
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml
new file mode 100644
index 000000000000..354bb1420cdd
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,bcmbca.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Broadband SoC
+
+description:
+ Broadcom Broadband SoCs include family of high performance DSL/PON/Wireless
+ chips that can be used as home gateway, router and WLAN AP for residential,
+ enterprise and carrier applications.
+
+maintainers:
+ - William Zhang <william.zhang@broadcom.com>
+ - Anand Gore <anand.gore@broadcom.com>
+ - Kursad Oney <kursad.oney@broadcom.com>
+ - Rafał Miłecki <rafal@milecki.pl>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: BCM47622 based boards
+ items:
+ - enum:
+ - brcm,bcm947622
+ - const: brcm,bcm47622
+ - const: brcm,bcmbca
+
+ - description: BCM4906 based boards
+ items:
+ - enum:
+ - netgear,r8000p
+ - tplink,archer-c2300-v1
+ - zyxel,ex3510b
+ - const: brcm,bcm4906
+ - const: brcm,bcm4908
+ - const: brcm,bcmbca
+
+ - description: BCM4908 based boards
+ items:
+ - enum:
+ - asus,gt-ac5300
+ - brcm,bcm94908
+ - netgear,raxe500
+ - const: brcm,bcm4908
+ - const: brcm,bcmbca
+
+ - description: BCM49408 based boards
+ items:
+ - const: brcm,bcm49408
+ - const: brcm,bcm4908
+ - const: brcm,bcmbca
+
+ - description: BCM4912 based boards
+ items:
+ - enum:
+ - asus,gt-ax6000
+ - brcm,bcm94912
+ - const: brcm,bcm4912
+ - const: brcm,bcmbca
+
+ - description: BCM63138 based boards
+ items:
+ - enum:
+ - brcm,bcm963138
+ - brcm,BCM963138DVT
+ - const: brcm,bcm63138
+ - const: brcm,bcmbca
+
+ - description: BCM63146 based boards
+ items:
+ - enum:
+ - brcm,bcm963146
+ - const: brcm,bcm63146
+ - const: brcm,bcmbca
+
+ - description: BCM63148 based boards
+ items:
+ - enum:
+ - brcm,bcm963148
+ - const: brcm,bcm63148
+ - const: brcm,bcmbca
+
+ - description: BCM63158 based boards
+ items:
+ - enum:
+ - brcm,bcm963158
+ - const: brcm,bcm63158
+ - const: brcm,bcmbca
+
+ - description: BCM63178 based boards
+ items:
+ - enum:
+ - brcm,bcm963178
+ - const: brcm,bcm63178
+ - const: brcm,bcmbca
+
+ - description: BCM6756 based boards
+ items:
+ - enum:
+ - brcm,bcm96756
+ - const: brcm,bcm6756
+ - const: brcm,bcmbca
+
+ - description: BCM6813 based boards
+ items:
+ - enum:
+ - brcm,bcm96813
+ - const: brcm,bcm6813
+ - const: brcm,bcmbca
+
+ - description: BCM6846 based boards
+ items:
+ - enum:
+ - brcm,bcm96846
+ - genexis,xg6846b
+ - const: brcm,bcm6846
+ - const: brcm,bcmbca
+
+ - description: BCM6855 based boards
+ items:
+ - enum:
+ - brcm,bcm96855
+ - const: brcm,bcm6855
+ - const: brcm,bcmbca
+
+ - description: BCM6856 based boards
+ items:
+ - enum:
+ - brcm,bcm96856
+ - const: brcm,bcm6856
+ - const: brcm,bcmbca
+
+ - description: BCM6858 based boards
+ items:
+ - enum:
+ - brcm,bcm96858
+ - const: brcm,bcm6858
+ - const: brcm,bcmbca
+
+ - description: BCM6878 based boards
+ items:
+ - enum:
+ - brcm,bcm96878
+ - const: brcm,bcm6878
+ - const: brcm,bcmbca
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
index 104cc9b41df4..071421dbc4d0 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
@@ -187,15 +187,8 @@ Required properties:
Sequencer DRAM parameters and control registers. Used for Self-Refresh
Power-Down (SRPD), among other things.
-Required properties:
-- compatible : should contain one of these
- "brcm,brcmstb-memc-ddr-rev-b.2.1"
- "brcm,brcmstb-memc-ddr-rev-b.2.2"
- "brcm,brcmstb-memc-ddr-rev-b.2.3"
- "brcm,brcmstb-memc-ddr-rev-b.3.0"
- "brcm,brcmstb-memc-ddr-rev-b.3.1"
- "brcm,brcmstb-memc-ddr"
-- reg : the MEMC DDR register range
+See Documentation/devicetree/bindings/memory-controllers/brcm,brcmstb-memc-ddr.yaml for a
+full list of supported compatible strings and properties.
Example:
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt
deleted file mode 100644
index 4c77169bb534..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Broadcom Cygnus device tree bindings
-------------------------------------
-
-
-Boards with Cygnus SoCs shall have the following properties:
-
-Required root node property:
-
-BCM11300
-compatible = "brcm,bcm11300", "brcm,cygnus";
-
-BCM11320
-compatible = "brcm,bcm11320", "brcm,cygnus";
-
-BCM11350
-compatible = "brcm,bcm11350", "brcm,cygnus";
-
-BCM11360
-compatible = "brcm,bcm11360", "brcm,cygnus";
-
-BCM58300
-compatible = "brcm,bcm58300", "brcm,cygnus";
-
-BCM58302
-compatible = "brcm,bcm58302", "brcm,cygnus";
-
-BCM58303
-compatible = "brcm,bcm58303", "brcm,cygnus";
-
-BCM58305
-compatible = "brcm,bcm58305", "brcm,cygnus";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml
new file mode 100644
index 000000000000..a0a3f32db54e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,cygnus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Cygnus
+
+maintainers:
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm11300
+ - brcm,bcm11320
+ - brcm,bcm11350
+ - brcm,bcm11360
+ - brcm,bcm58300
+ - brcm,bcm58302
+ - brcm,bcm58303
+ - brcm,bcm58305
+ - const: brcm,cygnus
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.txt
deleted file mode 100644
index a124c7fc4dcd..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Broadcom Hurricane 2 device tree bindings
----------------------------------------
-
-Broadcom Hurricane 2 family of SoCs are used for switching control. These SoCs
-are based on Broadcom's iProc SoC architecture and feature a single core Cortex
-A9 ARM CPUs, DDR2/DDR3 memory, PCIe GEN-2, USB 2.0 and USB 3.0, serial and NAND
-flash and a PCIe attached integrated switching engine.
-
-Boards with Hurricane SoCs shall have the following properties:
-
-Required root node property:
-
-BCM53342
-compatible = "brcm,bcm53342", "brcm,hr2";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml
new file mode 100644
index 000000000000..cc6add0e933a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,hr2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Hurricane 2
+
+description:
+ Broadcom Hurricane 2 family of SoCs are used for switching control. These SoCs
+ are based on Broadcom's iProc SoC architecture and feature a single core Cortex
+ A9 ARM CPUs, DDR2/DDR3 memory, PCIe GEN-2, USB 2.0 and USB 3.0, serial and NAND
+ flash and a PCIe attached integrated switching engine.
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - ubnt,unifi-switch8
+ - const: brcm,bcm53342
+ - const: brcm,hr2
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.txt
deleted file mode 100644
index 35f056f4a1c3..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Broadcom North Star 2 (NS2) device tree bindings
-------------------------------------------------
-
-Boards with NS2 shall have the following properties:
-
-Required root node property:
-
-NS2 SVK board
-compatible = "brcm,ns2-svk", "brcm,ns2";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml
new file mode 100644
index 000000000000..6696598eca0e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,ns2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom North Star 2 (NS2)
+
+maintainers:
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,ns2-svk
+ - brcm,ns2-xmc
+ - const: brcm,ns2
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp-cpu-method.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp-cpu-method.txt
deleted file mode 100644
index 677ef9d9f445..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp-cpu-method.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Broadcom Northstar Plus SoC CPU Enable Method
----------------------------------------------
-This binding defines the enable method used for starting secondary
-CPU in the following Broadcom SoCs:
- BCM58522, BCM58525, BCM58535, BCM58622, BCM58623, BCM58625, BCM88312
-
-The enable method is specified by defining the following required
-properties in the corresponding secondary "cpu" device tree node:
- - enable-method = "brcm,bcm-nsp-smp";
- - secondary-boot-reg = <...>;
-
-The secondary-boot-reg property is a u32 value that specifies the
-physical address of the register which should hold the common
-entry point for a secondary CPU. This entry is cpu node specific
-and should be added per cpu. E.g., in case of NSP (BCM58625) which
-is a dual core CPU SoC, this entry should be added to cpu1 node.
-
-
-Example:
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- next-level-cache = <&L2>;
- reg = <0>;
- };
-
- cpu1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a9";
- next-level-cache = <&L2>;
- enable-method = "brcm,bcm-nsp-smp";
- secondary-boot-reg = <0xffff042c>;
- reg = <1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.txt
deleted file mode 100644
index eae53e4556be..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Broadcom Northstar Plus device tree bindings
---------------------------------------------
-
-Broadcom Northstar Plus family of SoCs are used for switching control
-and management applications as well as residential router/gateway
-applications. The SoC features dual core Cortex A9 ARM CPUs, integrating
-several peripheral interfaces including multiple Gigabit Ethernet PHYs,
-DDR3 memory, PCIE Gen-2, USB 2.0 and USB 3.0, serial and NAND flash,
-SATA and several other IO controllers.
-
-Boards with Northstar Plus SoCs shall have the following properties:
-
-Required root node property:
-
-BCM58522
-compatible = "brcm,bcm58522", "brcm,nsp";
-
-BCM58525
-compatible = "brcm,bcm58525", "brcm,nsp";
-
-BCM58535
-compatible = "brcm,bcm58535", "brcm,nsp";
-
-BCM58622
-compatible = "brcm,bcm58622", "brcm,nsp";
-
-BCM58623
-compatible = "brcm,bcm58623", "brcm,nsp";
-
-BCM58625
-compatible = "brcm,bcm58625", "brcm,nsp";
-
-BCM88312
-compatible = "brcm,bcm88312", "brcm,nsp";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml
new file mode 100644
index 000000000000..a43b2d4d936b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,nsp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Northstar Plus
+
+description:
+ Broadcom Northstar Plus family of SoCs are used for switching control
+ and management applications as well as residential router/gateway
+ applications. The SoC features dual core Cortex A9 ARM CPUs, integrating
+ several peripheral interfaces including multiple Gigabit Ethernet PHYs,
+ DDR3 memory, PCIE Gen-2, USB 2.0 and USB 3.0, serial and NAND flash,
+ SATA and several other IO controllers.
+
+maintainers:
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: BCM58522 based boards
+ items:
+ - enum:
+ - brcm,bcm958522er
+ - const: brcm,bcm58522
+ - const: brcm,nsp
+
+ - description: BCM58525 based boards
+ items:
+ - enum:
+ - brcm,bcm958525er
+ - brcm,bcm958525xmc
+ - const: brcm,bcm58525
+ - const: brcm,nsp
+
+ - description: BCM58535 based boards
+ items:
+ - const: brcm,bcm58535
+ - const: brcm,nsp
+
+ - description: BCM58622 based boards
+ items:
+ - enum:
+ - brcm,bcm958622hr
+ - const: brcm,bcm58622
+ - const: brcm,nsp
+
+ - description: BCM58623 based boards
+ items:
+ - enum:
+ - brcm,bcm958623hr
+ - const: brcm,bcm58623
+ - const: brcm,nsp
+
+ - description: BCM58625 based boards
+ items:
+ - enum:
+ - brcm,bcm958625hr
+ - brcm,bcm958625k
+ - meraki,mx64
+ - meraki,mx64-a0
+ - meraki,mx64w
+ - meraki,mx64w-a0
+ - meraki,mx65
+ - meraki,mx65w
+ - const: brcm,bcm58625
+ - const: brcm,nsp
+
+ - description: BCM88312 based boards
+ items:
+ - enum:
+ - brcm,bcm988312hr
+ - const: brcm,bcm88312
+ - const: brcm,nsp
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.txt
deleted file mode 100644
index 23a02178dd44..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Broadcom Stingray device tree bindings
-------------------------------------------------
-
-Boards with Stingray shall have the following properties:
-
-Required root node property:
-
-Stingray Combo SVK board
-compatible = "brcm,bcm958742k", "brcm,stingray";
-
-Stingray SST100 board
-compatible = "brcm,bcm958742t", "brcm,stingray";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml
new file mode 100644
index 000000000000..c6ccb78aab0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,stingray.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Stingray
+
+maintainers:
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm958742k
+ - brcm,bcm958742t
+ - brcm,bcm958802a802x
+ - const: brcm,stingray
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.txt
deleted file mode 100644
index 223ed3471c08..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Broadcom Vulcan device tree bindings
-------------------------------------
-
-Boards with Broadcom Vulcan shall have the following root property:
-
-Broadcom Vulcan Evaluation Board:
- compatible = "brcm,vulcan-eval", "brcm,vulcan-soc";
-
-Generic Vulcan board:
- compatible = "brcm,vulcan-soc";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml
new file mode 100644
index 000000000000..3f441352fbf0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/brcm,vulcan-soc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Vulcan
+
+maintainers:
+ - Robert Richter <rrichter@marvell.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - brcm,vulcan-eval
+ - cavium,thunderx2-cn9900
+ - const: brcm,vulcan-soc
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.txt b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.txt
deleted file mode 100644
index 6824b3180ffb..000000000000
--- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Raspberry Pi VideoCore firmware driver
-
-Required properties:
-
-- compatible: Should be "raspberrypi,bcm2835-firmware"
-- mboxes: Phandle to the firmware device's Mailbox.
- (See: ../mailbox/mailbox.txt for more information)
-
-Example:
-
-firmware {
- compatible = "raspberrypi,bcm2835-firmware";
- mboxes = <&mailbox>;
-};
diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
new file mode 100644
index 000000000000..8349c0a854d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bcm/raspberrypi,bcm2835-firmware.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Raspberry Pi VideoCore firmware driver
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+ - Stefan Wahren <wahrenst@gmx.net>
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: raspberrypi,bcm2835-firmware
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - const: raspberrypi,bcm2835-firmware
+ - const: simple-mfd
+
+ mboxes:
+ maxItems: 1
+
+ clocks:
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: raspberrypi,firmware-clocks
+
+ "#clock-cells":
+ const: 1
+ description: >
+ The argument is the ID of the clocks contained by the
+ firmware messages.
+
+ required:
+ - compatible
+ - "#clock-cells"
+
+ gpio:
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: raspberrypi,firmware-gpio
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 2
+ description:
+ The first cell is the pin number, and the second cell is used to
+ specify the gpio polarity (GPIO_ACTIVE_HIGH or GPIO_ACTIVE_LOW).
+
+ gpio-line-names:
+ minItems: 8
+
+ required:
+ - compatible
+ - gpio-controller
+ - "#gpio-cells"
+
+ reset:
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: raspberrypi,firmware-reset
+
+ "#reset-cells":
+ const: 1
+ description: >
+ The argument is the ID of the firmware reset line to affect.
+
+ required:
+ - compatible
+ - "#reset-cells"
+
+ pwm:
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: raspberrypi,firmware-poe-pwm
+
+ "#pwm-cells":
+ # See pwm.yaml in this directory for a description of the cells format.
+ const: 2
+
+ required:
+ - compatible
+ - "#pwm-cells"
+
+ touchscreen:
+ type: object
+ $ref: /schemas/input/touchscreen/touchscreen.yaml#
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: raspberrypi,firmware-ts
+
+ firmware:
+ deprecated: true
+ description: Phandle to RPi's firmware device node.
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+ required:
+ - compatible
+
+required:
+ - compatible
+ - mboxes
+
+additionalProperties: false
+
+examples:
+ - |
+ firmware {
+ compatible = "raspberrypi,bcm2835-firmware", "simple-mfd";
+ mboxes = <&mailbox>;
+
+ firmware_clocks: clocks {
+ compatible = "raspberrypi,firmware-clocks";
+ #clock-cells = <1>;
+ };
+
+ expgpio: gpio {
+ compatible = "raspberrypi,firmware-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ reset: reset {
+ compatible = "raspberrypi,firmware-reset";
+ #reset-cells = <1>;
+ };
+
+ pwm: pwm {
+ compatible = "raspberrypi,firmware-poe-pwm";
+ #pwm-cells = <2>;
+ };
+
+ ts: touchscreen {
+ compatible = "raspberrypi,firmware-ts";
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <480>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/bhf.txt b/Documentation/devicetree/bindings/arm/bhf.txt
deleted file mode 100644
index 886b503caf9c..000000000000
--- a/Documentation/devicetree/bindings/arm/bhf.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Beckhoff Automation Platforms Device Tree Bindings
---------------------------------------------------
-
-CX9020 Embedded PC
-Required root node properties:
- - compatible = "bhf,cx9020", "fsl,imx53";
diff --git a/Documentation/devicetree/bindings/arm/bitmain.yaml b/Documentation/devicetree/bindings/arm/bitmain.yaml
new file mode 100644
index 000000000000..55a5a570b5bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bitmain.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/bitmain.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bitmain platform
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ items:
+ - enum:
+ - bitmain,sophon-edge
+ - const: bitmain,bm1880
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/blaize.yaml b/Documentation/devicetree/bindings/arm/blaize.yaml
new file mode 100644
index 000000000000..af39e2756407
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/blaize.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/blaize.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Blaize Platforms
+
+maintainers:
+ - James Cowgill <james.cowgill@blaize.com>
+ - Matt Redfearn <matt.redfearn@blaize.com>
+ - Neil Jones <neil.jones@blaize.com>
+ - Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>
+
+description: |
+ Blaize Platforms using SoCs designed by Blaize Inc.
+
+ The products based on the BLZP1600 SoC:
+
+ - BLZP1600-SoM: SoM (System on Module)
+ - BLZP1600-CB2: Development board CB2 based on BLZP1600-SoM
+
+ BLZP1600 SoC integrates a dual core ARM Cortex A53 cluster
+ and a Blaize Graph Streaming Processor for AI and ML workloads,
+ plus a suite of connectivity and other peripherals.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Blaize BLZP1600 based boards
+ items:
+ - enum:
+ - blaize,blzp1600-cb2
+ - const: blaize,blzp1600
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/calxeda.txt b/Documentation/devicetree/bindings/arm/calxeda.txt
deleted file mode 100644
index 25fcf96795ca..000000000000
--- a/Documentation/devicetree/bindings/arm/calxeda.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Calxeda Platforms Device Tree Bindings
------------------------------------------------
-
-Boards with Calxeda Cortex-A9 based ECX-1000 (Highbank) SOC shall have the
-following properties.
-
-Required root node properties:
- - compatible = "calxeda,highbank";
-
-
-Boards with Calxeda Cortex-A15 based ECX-2000 SOC shall have the following
-properties.
-
-Required root node properties:
- - compatible = "calxeda,ecx-2000";
diff --git a/Documentation/devicetree/bindings/arm/calxeda.yaml b/Documentation/devicetree/bindings/arm/calxeda.yaml
new file mode 100644
index 000000000000..3e9f5e1d862e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda.yaml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/calxeda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Calxeda Platforms
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+description: |+
+ Bindings for boards with Calxeda Cortex-A9 based ECX-1000 (Highbank) SOC
+ or Cortex-A15 based ECX-2000 SOCs
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - calxeda,highbank
+ - calxeda,ecx-2000
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/calxeda/hb-sregs.yaml b/Documentation/devicetree/bindings/arm/calxeda/hb-sregs.yaml
new file mode 100644
index 000000000000..dfdc97083efb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda/hb-sregs.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/calxeda/hb-sregs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Calxeda Highbank system registers
+
+description: |
+ The Calxeda Highbank system has a block of MMIO registers controlling
+ several generic system aspects. Those can be used to control some power
+ management, they also contain some gate and PLL clocks.
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+properties:
+ compatible:
+ const: calxeda,hb-sregs
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ type: object
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ sregs@fff3c000 {
+ compatible = "calxeda,hb-sregs";
+ reg = <0xfff3c000 0x1000>;
+
+ clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: oscillator {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <33333000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt b/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt
deleted file mode 100644
index 94e642a33db0..000000000000
--- a/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Calxeda Highbank L2 cache ECC
-
-Properties:
-- compatible : Should be "calxeda,hb-sregs-l2-ecc"
-- reg : Address and size for ECC error interrupt clear registers.
-- interrupts : Should be single bit error interrupt, then double bit error
- interrupt.
-
-Example:
-
- sregs@fff3c200 {
- compatible = "calxeda,hb-sregs-l2-ecc";
- reg = <0xfff3c200 0x100>;
- interrupts = <0 71 4 0 72 4>;
- };
diff --git a/Documentation/devicetree/bindings/arm/calxeda/l2ecc.yaml b/Documentation/devicetree/bindings/arm/calxeda/l2ecc.yaml
new file mode 100644
index 000000000000..76b65ea149b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda/l2ecc.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/calxeda/l2ecc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Calxeda Highbank L2 cache ECC
+
+description: |
+ Binding for the Calxeda Highbank L2 cache controller ECC device.
+ This does not cover the actual L2 cache controller control registers,
+ but just the error reporting functionality.
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+properties:
+ compatible:
+ const: calxeda,hb-sregs-l2-ecc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: single bit error interrupt
+ - description: double bit error interrupt
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ sregs@fff3c200 {
+ compatible = "calxeda,hb-sregs-l2-ecc";
+ reg = <0xfff3c200 0x100>;
+ interrupts = <0 71 4>, <0 72 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/cavium,thunder-88xx.yaml b/Documentation/devicetree/bindings/arm/cavium,thunder-88xx.yaml
new file mode 100644
index 000000000000..d7c813118c1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cavium,thunder-88xx.yaml
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/cavium,thunder-88xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cavium Thunder 88xx SoC
+
+maintainers:
+ - Robert Richter <rric@kernel.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - const: cavium,thunder-88xx
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/cavium-thunder.txt b/Documentation/devicetree/bindings/arm/cavium-thunder.txt
deleted file mode 100644
index 6f63a5866902..000000000000
--- a/Documentation/devicetree/bindings/arm/cavium-thunder.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Cavium Thunder platform device tree bindings
---------------------------------------------
-
-Boards with Cavium's Thunder SoC shall have following properties.
-
-Root Node
----------
-Required root node properties:
-
- - compatible = "cavium,thunder-88xx";
diff --git a/Documentation/devicetree/bindings/arm/cavium-thunder2.txt b/Documentation/devicetree/bindings/arm/cavium-thunder2.txt
deleted file mode 100644
index dc5dd65cbce7..000000000000
--- a/Documentation/devicetree/bindings/arm/cavium-thunder2.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-Cavium ThunderX2 CN99XX platform tree bindings
-----------------------------------------------
-
-Boards with Cavium ThunderX2 CN99XX SoC shall have the root property:
- compatible = "cavium,thunderx2-cn9900", "brcm,vulcan-soc";
-
-These SoC uses the "cavium,thunder2" core which will be compatible
-with "brcm,vulcan".
diff --git a/Documentation/devicetree/bindings/arm/cci-control-port.yaml b/Documentation/devicetree/bindings/arm/cci-control-port.yaml
new file mode 100644
index 000000000000..c29d250a6d77
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cci-control-port.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/cci-control-port.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CCI Interconnect Bus Masters
+
+maintainers:
+ - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: |
+ Masters in the device tree connected to a CCI port (inclusive of CPUs
+ and their cpu nodes).
+
+select: true
+
+properties:
+ cci-control-port:
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+additionalProperties: true
+
+examples:
+ - |
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ cci-control-port = <&cci_control1>;
+ reg = <0>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
deleted file mode 100644
index 9600761f2d5b..000000000000
--- a/Documentation/devicetree/bindings/arm/cci.txt
+++ /dev/null
@@ -1,224 +0,0 @@
-=======================================================
-ARM CCI cache coherent interconnect binding description
-=======================================================
-
-ARM multi-cluster systems maintain intra-cluster coherency through a
-cache coherent interconnect (CCI) that is capable of monitoring bus
-transactions and manage coherency, TLB invalidations and memory barriers.
-
-It allows snooping and distributed virtual memory message broadcast across
-clusters, through memory mapped interface, with a global control register
-space and multiple sets of interface control registers, one per slave
-interface.
-
-* CCI interconnect node
-
- Description: Describes a CCI cache coherent Interconnect component
-
- Node name must be "cci".
- Node's parent must be the root node /, and the address space visible
- through the CCI interconnect is the same as the one seen from the
- root node (ie from CPUs perspective as per DT standard).
- Every CCI node has to define the following properties:
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: must contain one of the following:
- "arm,cci-400"
- "arm,cci-500"
- "arm,cci-550"
-
- - reg
- Usage: required
- Value type: Integer cells. A register entry, expressed as a pair
- of cells, containing base and size.
- Definition: A standard property. Specifies base physical
- address of CCI control registers common to all
- interfaces.
-
- - ranges:
- Usage: required
- Value type: Integer cells. An array of range entries, expressed
- as a tuple of cells, containing child address,
- parent address and the size of the region in the
- child address space.
- Definition: A standard property. Follow rules in the Devicetree
- Specification for hierarchical bus addressing. CCI
- interfaces addresses refer to the parent node
- addressing scheme to declare their register bases.
-
- CCI interconnect node can define the following child nodes:
-
- - CCI control interface nodes
-
- Node name must be "slave-if".
- Parent node must be CCI interconnect node.
-
- A CCI control interface node must contain the following
- properties:
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: must be set to
- "arm,cci-400-ctrl-if"
-
- - interface-type:
- Usage: required
- Value type: <string>
- Definition: must be set to one of {"ace", "ace-lite"}
- depending on the interface type the node
- represents.
-
- - reg:
- Usage: required
- Value type: Integer cells. A register entry, expressed
- as a pair of cells, containing base and
- size.
- Definition: the base address and size of the
- corresponding interface programming
- registers.
-
- - CCI PMU node
-
- Parent node must be CCI interconnect node.
-
- A CCI pmu node must contain the following properties:
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must contain one of:
- "arm,cci-400-pmu,r0"
- "arm,cci-400-pmu,r1"
- "arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
- secure access to CCI registers
- "arm,cci-500-pmu,r0"
- "arm,cci-550-pmu,r0"
- - reg:
- Usage: required
- Value type: Integer cells. A register entry, expressed
- as a pair of cells, containing base and
- size.
- Definition: the base address and size of the
- corresponding interface programming
- registers.
-
- - interrupts:
- Usage: required
- Value type: Integer cells. Array of interrupt specifier
- entries, as defined in
- ../interrupt-controller/interrupts.txt.
- Definition: list of counter overflow interrupts, one per
- counter. The interrupts must be specified
- starting with the cycle counter overflow
- interrupt, followed by counter0 overflow
- interrupt, counter1 overflow interrupt,...
- ,counterN overflow interrupt.
-
- The CCI PMU has an interrupt signal for each
- counter. The number of interrupts must be
- equal to the number of counters.
-
-* CCI interconnect bus masters
-
- Description: masters in the device tree connected to a CCI port
- (inclusive of CPUs and their cpu nodes).
-
- A CCI interconnect bus master node must contain the following
- properties:
-
- - cci-control-port:
- Usage: required
- Value type: <phandle>
- Definition: a phandle containing the CCI control interface node
- the master is connected to.
-
-Example:
-
- cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- CPU0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- cci-control-port = <&cci_control1>;
- reg = <0x0>;
- };
-
- CPU1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- cci-control-port = <&cci_control1>;
- reg = <0x1>;
- };
-
- CPU2: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- cci-control-port = <&cci_control2>;
- reg = <0x100>;
- };
-
- CPU3: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- cci-control-port = <&cci_control2>;
- reg = <0x101>;
- };
-
- };
-
- dma0: dma@3000000 {
- compatible = "arm,pl330", "arm,primecell";
- cci-control-port = <&cci_control0>;
- reg = <0x0 0x3000000 0x0 0x1000>;
- interrupts = <10>;
- #dma-cells = <1>;
- #dma-channels = <8>;
- #dma-requests = <32>;
- };
-
- cci@2c090000 {
- compatible = "arm,cci-400";
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x0 0x2c090000 0 0x1000>;
- ranges = <0x0 0x0 0x2c090000 0x10000>;
-
- cci_control0: slave-if@1000 {
- compatible = "arm,cci-400-ctrl-if";
- interface-type = "ace-lite";
- reg = <0x1000 0x1000>;
- };
-
- cci_control1: slave-if@4000 {
- compatible = "arm,cci-400-ctrl-if";
- interface-type = "ace";
- reg = <0x4000 0x1000>;
- };
-
- cci_control2: slave-if@5000 {
- compatible = "arm,cci-400-ctrl-if";
- interface-type = "ace";
- reg = <0x5000 0x1000>;
- };
-
- pmu@9000 {
- compatible = "arm,cci-400-pmu";
- reg = <0x9000 0x5000>;
- interrupts = <0 101 4>,
- <0 102 4>,
- <0 103 4>,
- <0 104 4>,
- <0 105 4>;
- };
- };
-
-This CCI node corresponds to a CCI component whose control registers sits
-at address 0x000000002c090000.
-CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
-CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
-CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
diff --git a/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml b/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml
new file mode 100644
index 000000000000..170aad5dd7ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/cirrus/cirrus,ep9301.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic EP93xx platforms
+
+description:
+ The EP93xx SoC is a ARMv4T-based with 200 MHz ARM9 CPU.
+
+maintainers:
+ - Alexander Sverdlin <alexander.sverdlin@gmail.com>
+ - Nikita Shubin <nikita.shubin@maquefel.me>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: The TS-7250 is a compact, full-featured Single Board
+ Computer (SBC) based upon the Cirrus EP9302 ARM9 CPU
+ items:
+ - const: technologic,ts7250
+ - const: cirrus,ep9301
+
+ - description: The Liebherr BK3 is a derivate from ts7250 board
+ items:
+ - const: liebherr,bk3
+ - const: cirrus,ep9301
+
+ - description: EDB302 is an evaluation board by Cirrus Logic,
+ based on a Cirrus Logic EP9302 CPU
+ items:
+ - const: cirrus,edb9302
+ - const: cirrus,ep9301
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/cix.yaml b/Documentation/devicetree/bindings/arm/cix.yaml
new file mode 100644
index 000000000000..114dab4bc4d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cix.yaml
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/cix.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CIX platforms
+
+maintainers:
+ - Peter Chen <peter.chen@cixtech.com>
+ - Fugang Duan <fugang.duan@cixtech.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: Radxa Orion O6
+ items:
+ - const: radxa,orion-o6
+ - const: cix,sky1
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/compulab-boards.txt b/Documentation/devicetree/bindings/arm/compulab-boards.txt
deleted file mode 100644
index 42a10285af9c..000000000000
--- a/Documentation/devicetree/bindings/arm/compulab-boards.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-CompuLab SB-SOM is a multi-module baseboard capable of carrying:
- - CM-T43
- - CM-T54
- - CM-QS600
- - CL-SOM-AM57x
- - CL-SOM-iMX7
-modules with minor modifications to the SB-SOM assembly.
-
-Required root node properties:
- - compatible = should be "compulab,sb-som"
-
-Compulab CL-SOM-iMX7 is a miniature System-on-Module (SoM) based on
-Freescale i.MX7 ARM Cortex-A7 System-on-Chip.
-
-Required root node properties:
- - compatible = "compulab,cl-som-imx7", "fsl,imx7d";
-
-Compulab SBC-iMX7 is a single board computer based on the
-Freescale i.MX7 system-on-chip. SBC-iMX7 is implemented with
-the CL-SOM-iMX7 System-on-Module providing most of the functions,
-and SB-SOM-iMX7 carrier board providing additional peripheral
-functions and connectors.
-
-Required root node properties:
- - compatible = "compulab,sbc-imx7", "compulab,cl-som-imx7", "fsl,imx7d";
diff --git a/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt b/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
deleted file mode 100644
index 298291211ea4..000000000000
--- a/Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* CoreSight CPU Debug Component:
-
-CoreSight CPU debug component are compliant with the ARMv8 architecture
-reference manual (ARM DDI 0487A.k) Chapter 'Part H: External debug'. The
-external debug module is mainly used for two modes: self-hosted debug and
-external debug, and it can be accessed from mmio region from Coresight
-and eventually the debug module connects with CPU for debugging. And the
-debug module provides sample-based profiling extension, which can be used
-to sample CPU program counter, secure state and exception level, etc;
-usually every CPU has one dedicated debug module to be connected.
-
-Required properties:
-
-- compatible : should be "arm,coresight-cpu-debug"; supplemented with
- "arm,primecell" since this driver is using the AMBA bus
- interface.
-
-- reg : physical base address and length of the register set.
-
-- clocks : the clock associated to this component.
-
-- clock-names : the name of the clock referenced by the code. Since we are
- using the AMBA framework, the name of the clock providing
- the interconnect should be "apb_pclk" and the clock is
- mandatory. The interface between the debug logic and the
- processor core is clocked by the internal CPU clock, so it
- is enabled with CPU clock by default.
-
-- cpu : the CPU phandle the debug module is affined to. When omitted
- the module is considered to belong to CPU0.
-
-Optional properties:
-
-- power-domains: a phandle to the debug power domain. We use "power-domains"
- binding to turn on the debug logic if it has own dedicated
- power domain and if necessary to use "cpuidle.off=1" or
- "nohlt" in the kernel command line or sysfs node to
- constrain idle states to ensure registers in the CPU power
- domain are accessible.
-
-Example:
-
- debug@f6590000 {
- compatible = "arm,coresight-cpu-debug","arm,primecell";
- reg = <0 0xf6590000 0 0x1000>;
- clocks = <&sys_ctrl HI6220_DAPB_CLK>;
- clock-names = "apb_pclk";
- cpu = <&cpu0>;
- };
diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
deleted file mode 100644
index f8aff65ab921..000000000000
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ /dev/null
@@ -1,317 +0,0 @@
-* CoreSight Components:
-
-CoreSight components are compliant with the ARM CoreSight architecture
-specification and can be connected in various topologies to suit a particular
-SoCs tracing needs. These trace components can generally be classified as
-sinks, links and sources. Trace data produced by one or more sources flows
-through the intermediate links connecting the source to the currently selected
-sink. Each CoreSight component device should use these properties to describe
-its hardware characteristcs.
-
-* Required properties for all components *except* non-configurable replicators:
-
- * compatible: These have to be supplemented with "arm,primecell" as
- drivers are using the AMBA bus interface. Possible values include:
- - Embedded Trace Buffer (version 1.0):
- "arm,coresight-etb10", "arm,primecell";
-
- - Trace Port Interface Unit:
- "arm,coresight-tpiu", "arm,primecell";
-
- - Trace Memory Controller, used for Embedded Trace Buffer(ETB),
- Embedded Trace FIFO(ETF) and Embedded Trace Router(ETR)
- configuration. The configuration mode (ETB, ETF, ETR) is
- discovered at boot time when the device is probed.
- "arm,coresight-tmc", "arm,primecell";
-
- - Trace Funnel:
- "arm,coresight-funnel", "arm,primecell";
-
- - Embedded Trace Macrocell (version 3.x) and
- Program Flow Trace Macrocell:
- "arm,coresight-etm3x", "arm,primecell";
-
- - Embedded Trace Macrocell (version 4.x):
- "arm,coresight-etm4x", "arm,primecell";
-
- - Coresight programmable Replicator :
- "arm,coresight-dynamic-replicator", "arm,primecell";
-
- - System Trace Macrocell:
- "arm,coresight-stm", "arm,primecell"; [1]
- - Coresight Address Translation Unit (CATU)
- "arm,coresight-catu", "arm,primecell";
-
- * reg: physical base address and length of the register
- set(s) of the component.
-
- * clocks: the clocks associated to this component.
-
- * clock-names: the name of the clocks referenced by the code.
- Since we are using the AMBA framework, the name of the clock
- providing the interconnect should be "apb_pclk", and some
- coresight blocks also have an additional clock "atclk", which
- clocks the core of that coresight component. The latter clock
- is optional.
-
- * port or ports: see "Graph bindings for Coresight" below.
-
-* Additional required properties for System Trace Macrocells (STM):
- * reg: along with the physical base address and length of the register
- set as described above, another entry is required to describe the
- mapping of the extended stimulus port area.
-
- * reg-names: the only acceptable values are "stm-base" and
- "stm-stimulus-base", each corresponding to the areas defined in "reg".
-
-* Required properties for devices that don't show up on the AMBA bus, such as
- non-configurable replicators:
-
- * compatible: Currently supported value is (note the absence of the
- AMBA markee):
- - "arm,coresight-replicator"
-
- * port or ports: see "Graph bindings for Coresight" below.
-
-* Optional properties for ETM/PTMs:
-
- * arm,cp14: must be present if the system accesses ETM/PTM management
- registers via co-processor 14.
-
- * cpu: the cpu phandle this ETM/PTM is affined to. When omitted the
- source is considered to belong to CPU0.
-
-* Optional property for TMC:
-
- * arm,buffer-size: size of contiguous buffer space for TMC ETR
- (embedded trace router). This property is obsolete. The buffer size
- can be configured dynamically via buffer_size property in sysfs.
-
- * arm,scatter-gather: boolean. Indicates that the TMC-ETR can safely
- use the SG mode on this system.
-
-* Optional property for CATU :
- * interrupts : Exactly one SPI may be listed for reporting the address
- error
-
-Graph bindings for Coresight
--------------------------------
-
-Coresight components are interconnected to create a data path for the flow of
-trace data generated from the "sources" to their collection points "sink".
-Each coresight component must describe the "input" and "output" connections.
-The connections must be described via generic DT graph bindings as described
-by the "bindings/graph.txt", where each "port" along with an "endpoint"
-component represents a hardware port and the connection.
-
- * All output ports must be listed inside a child node named "out-ports"
- * All input ports must be listed inside a child node named "in-ports".
- * Port address must match the hardware port number.
-
-Example:
-
-1. Sinks
- etb@20010000 {
- compatible = "arm,coresight-etb10", "arm,primecell";
- reg = <0 0x20010000 0 0x1000>;
-
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- in-ports {
- port {
- etb_in_port: endpoint@0 {
- remote-endpoint = <&replicator_out_port0>;
- };
- };
- };
- };
-
- tpiu@20030000 {
- compatible = "arm,coresight-tpiu", "arm,primecell";
- reg = <0 0x20030000 0 0x1000>;
-
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- in-ports {
- port {
- tpiu_in_port: endpoint@0 {
- remote-endpoint = <&replicator_out_port1>;
- };
- };
- };
- };
-
- etr@20070000 {
- compatible = "arm,coresight-tmc", "arm,primecell";
- reg = <0 0x20070000 0 0x1000>;
-
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- in-ports {
- port {
- etr_in_port: endpoint {
- remote-endpoint = <&replicator2_out_port0>;
- };
- };
- };
-
- out-ports {
- port {
- etr_out_port: endpoint {
- remote-endpoint = <&catu_in_port>;
- };
- };
- };
- };
-
-2. Links
- replicator {
- /* non-configurable replicators don't show up on the
- * AMBA bus. As such no need to add "arm,primecell".
- */
- compatible = "arm,coresight-replicator";
-
- out-ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* replicator output ports */
- port@0 {
- reg = <0>;
- replicator_out_port0: endpoint {
- remote-endpoint = <&etb_in_port>;
- };
- };
-
- port@1 {
- reg = <1>;
- replicator_out_port1: endpoint {
- remote-endpoint = <&tpiu_in_port>;
- };
- };
- };
-
- in-ports {
- port {
- replicator_in_port0: endpoint {
- remote-endpoint = <&funnel_out_port0>;
- };
- };
- };
- };
-
- funnel@20040000 {
- compatible = "arm,coresight-funnel", "arm,primecell";
- reg = <0 0x20040000 0 0x1000>;
-
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- out-ports {
- port {
- funnel_out_port0: endpoint {
- remote-endpoint =
- <&replicator_in_port0>;
- };
- };
- };
-
- in-ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- funnel_in_port0: endpoint {
- remote-endpoint = <&ptm0_out_port>;
- };
- };
-
- port@1 {
- reg = <1>;
- funnel_in_port1: endpoint {
- remote-endpoint = <&ptm1_out_port>;
- };
- };
-
- port@2 {
- reg = <2>;
- funnel_in_port2: endpoint {
- remote-endpoint = <&etm0_out_port>;
- };
- };
-
- };
- };
-
-3. Sources
- ptm@2201c000 {
- compatible = "arm,coresight-etm3x", "arm,primecell";
- reg = <0 0x2201c000 0 0x1000>;
-
- cpu = <&cpu0>;
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- out-ports {
- port {
- ptm0_out_port: endpoint {
- remote-endpoint = <&funnel_in_port0>;
- };
- };
- };
- };
-
- ptm@2201d000 {
- compatible = "arm,coresight-etm3x", "arm,primecell";
- reg = <0 0x2201d000 0 0x1000>;
-
- cpu = <&cpu1>;
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
- out-ports {
- port {
- ptm1_out_port: endpoint {
- remote-endpoint = <&funnel_in_port1>;
- };
- };
- };
- };
-
-4. STM
- stm@20100000 {
- compatible = "arm,coresight-stm", "arm,primecell";
- reg = <0 0x20100000 0 0x1000>,
- <0 0x28000000 0 0x180000>;
- reg-names = "stm-base", "stm-stimulus-base";
-
- clocks = <&soc_smc50mhz>;
- clock-names = "apb_pclk";
- out-ports {
- port {
- stm_out_port: endpoint {
- remote-endpoint = <&main_funnel_in_port2>;
- };
- };
- };
- };
-
-5. CATU
-
- catu@207e0000 {
- compatible = "arm,coresight-catu", "arm,primecell";
- reg = <0 0x207e0000 0 0x1000>;
-
- clocks = <&oscclk6a>;
- clock-names = "apb_pclk";
-
- interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
- in-ports {
- port {
- catu_in_port: endpoint {
- remote-endpoint = <&etr_out_port>;
- };
- };
- };
- };
-
-[1]. There is currently two version of STM: STM32 and STM500. Both
-have the same HW interface and as such don't need an explicit binding name.
diff --git a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
index 35e5afb6d9ad..cc7b1402a31f 100644
--- a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
+++ b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
@@ -27,16 +27,6 @@ Properties:
- reg : Offset and length of the register set for the device
-* Alpine System-Fabric Service Registers
-
-The System-Fabric Service Registers allow various operation on CPU and
-system fabric, like powering CPUs off.
-
-Properties:
-- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
-- reg : Offset and length of the register set for the device
-
-
Example:
cpus {
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
deleted file mode 100644
index b0198a1cf403..000000000000
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ /dev/null
@@ -1,490 +0,0 @@
-=================
-ARM CPUs bindings
-=================
-
-The device tree allows to describe the layout of CPUs in a system through
-the "cpus" node, which in turn contains a number of subnodes (ie "cpu")
-defining properties for every cpu.
-
-Bindings for CPU nodes follow the Devicetree Specification, available from:
-
-https://www.devicetree.org/specifications/
-
-with updates for 32-bit and 64-bit ARM systems provided in this document.
-
-================================
-Convention used in this document
-================================
-
-This document follows the conventions described in the Devicetree
-Specification, with the addition:
-
-- square brackets define bitfields, eg reg[7:0] value of the bitfield in
- the reg property contained in bits 7 down to 0
-
-=====================================
-cpus and cpu node bindings definition
-=====================================
-
-The ARM architecture, in accordance with the Devicetree Specification,
-requires the cpus and cpu nodes to be present and contain the properties
-described below.
-
-- cpus node
-
- Description: Container of cpu nodes
-
- The node name must be "cpus".
-
- A cpus node must define the following properties:
-
- - #address-cells
- Usage: required
- Value type: <u32>
-
- Definition depends on ARM architecture version and
- configuration:
-
- # On uniprocessor ARM architectures previous to v7
- value must be 1, to enable a simple enumeration
- scheme for processors that do not have a HW CPU
- identification register.
- # On 32-bit ARM 11 MPcore, ARM v7 or later systems
- value must be 1, that corresponds to CPUID/MPIDR
- registers sizes.
- # On ARM v8 64-bit systems value should be set to 2,
- that corresponds to the MPIDR_EL1 register size.
- If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
- in the system, #address-cells can be set to 1, since
- MPIDR_EL1[63:32] bits are not used for CPUs
- identification.
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: must be set to 0
-
-- cpu node
-
- Description: Describes a CPU in an ARM based system
-
- PROPERTIES
-
- - device_type
- Usage: required
- Value type: <string>
- Definition: must be "cpu"
- - reg
- Usage and definition depend on ARM architecture version and
- configuration:
-
- # On uniprocessor ARM architectures previous to v7
- this property is required and must be set to 0.
-
- # On ARM 11 MPcore based systems this property is
- required and matches the CPUID[11:0] register bits.
-
- Bits [11:0] in the reg cell must be set to
- bits [11:0] in CPU ID register.
-
- All other bits in the reg cell must be set to 0.
-
- # On 32-bit ARM v7 or later systems this property is
- required and matches the CPU MPIDR[23:0] register
- bits.
-
- Bits [23:0] in the reg cell must be set to
- bits [23:0] in MPIDR.
-
- All other bits in the reg cell must be set to 0.
-
- # On ARM v8 64-bit systems this property is required
- and matches the MPIDR_EL1 register affinity bits.
-
- * If cpus node's #address-cells property is set to 2
-
- The first reg cell bits [7:0] must be set to
- bits [39:32] of MPIDR_EL1.
-
- The second reg cell bits [23:0] must be set to
- bits [23:0] of MPIDR_EL1.
-
- * If cpus node's #address-cells property is set to 1
-
- The reg cell bits [23:0] must be set to bits [23:0]
- of MPIDR_EL1.
-
- All other bits in the reg cells must be set to 0.
-
- - compatible:
- Usage: required
- Value type: <string>
- Definition: should be one of:
- "arm,arm710t"
- "arm,arm720t"
- "arm,arm740t"
- "arm,arm7ej-s"
- "arm,arm7tdmi"
- "arm,arm7tdmi-s"
- "arm,arm9es"
- "arm,arm9ej-s"
- "arm,arm920t"
- "arm,arm922t"
- "arm,arm925"
- "arm,arm926e-s"
- "arm,arm926ej-s"
- "arm,arm940t"
- "arm,arm946e-s"
- "arm,arm966e-s"
- "arm,arm968e-s"
- "arm,arm9tdmi"
- "arm,arm1020e"
- "arm,arm1020t"
- "arm,arm1022e"
- "arm,arm1026ej-s"
- "arm,arm1136j-s"
- "arm,arm1136jf-s"
- "arm,arm1156t2-s"
- "arm,arm1156t2f-s"
- "arm,arm1176jzf"
- "arm,arm1176jz-s"
- "arm,arm1176jzf-s"
- "arm,arm11mpcore"
- "arm,cortex-a5"
- "arm,cortex-a7"
- "arm,cortex-a8"
- "arm,cortex-a9"
- "arm,cortex-a12"
- "arm,cortex-a15"
- "arm,cortex-a17"
- "arm,cortex-a53"
- "arm,cortex-a57"
- "arm,cortex-a72"
- "arm,cortex-a73"
- "arm,cortex-m0"
- "arm,cortex-m0+"
- "arm,cortex-m1"
- "arm,cortex-m3"
- "arm,cortex-m4"
- "arm,cortex-r4"
- "arm,cortex-r5"
- "arm,cortex-r7"
- "brcm,brahma-b15"
- "brcm,brahma-b53"
- "brcm,vulcan"
- "cavium,thunder"
- "cavium,thunder2"
- "faraday,fa526"
- "intel,sa110"
- "intel,sa1100"
- "marvell,feroceon"
- "marvell,mohawk"
- "marvell,pj4a"
- "marvell,pj4b"
- "marvell,sheeva-v5"
- "nvidia,tegra132-denver"
- "nvidia,tegra186-denver"
- "nvidia,tegra194-carmel"
- "qcom,krait"
- "qcom,kryo"
- "qcom,kryo385"
- "qcom,scorpion"
- - enable-method
- Value type: <stringlist>
- Usage and definition depend on ARM architecture version.
- # On ARM v8 64-bit this property is required and must
- be one of:
- "psci"
- "spin-table"
- # On ARM 32-bit systems this property is optional and
- can be one of:
- "actions,s500-smp"
- "allwinner,sun6i-a31"
- "allwinner,sun8i-a23"
- "allwinner,sun9i-a80-smp"
- "amlogic,meson8-smp"
- "amlogic,meson8b-smp"
- "arm,realview-smp"
- "brcm,bcm11351-cpu-method"
- "brcm,bcm23550"
- "brcm,bcm2836-smp"
- "brcm,bcm-nsp-smp"
- "brcm,brahma-b15"
- "marvell,armada-375-smp"
- "marvell,armada-380-smp"
- "marvell,armada-390-smp"
- "marvell,armada-xp-smp"
- "marvell,98dx3236-smp"
- "mediatek,mt6589-smp"
- "mediatek,mt81xx-tz-smp"
- "qcom,gcc-msm8660"
- "qcom,kpss-acc-v1"
- "qcom,kpss-acc-v2"
- "renesas,apmu"
- "renesas,r9a06g032-smp"
- "rockchip,rk3036-smp"
- "rockchip,rk3066-smp"
- "ste,dbx500-smp"
-
- - cpu-release-addr
- Usage: required for systems that have an "enable-method"
- property value of "spin-table".
- Value type: <prop-encoded-array>
- Definition:
- # On ARM v8 64-bit systems must be a two cell
- property identifying a 64-bit zero-initialised
- memory location.
-
- - qcom,saw
- Usage: required for systems that have an "enable-method"
- property value of "qcom,kpss-acc-v1" or
- "qcom,kpss-acc-v2"
- Value type: <phandle>
- Definition: Specifies the SAW[1] node associated with this CPU.
-
- - qcom,acc
- Usage: required for systems that have an "enable-method"
- property value of "qcom,kpss-acc-v1" or
- "qcom,kpss-acc-v2"
- Value type: <phandle>
- Definition: Specifies the ACC[2] node associated with this CPU.
-
- - cpu-idle-states
- Usage: Optional
- Value type: <prop-encoded-array>
- Definition:
- # List of phandles to idle state nodes supported
- by this cpu [3].
-
- - capacity-dmips-mhz
- Usage: Optional
- Value type: <u32>
- Definition:
- # u32 value representing CPU capacity [4] in
- DMIPS/MHz, relative to highest capacity-dmips-mhz
- in the system.
-
- - rockchip,pmu
- Usage: optional for systems that have an "enable-method"
- property value of "rockchip,rk3066-smp"
- While optional, it is the preferred way to get access to
- the cpu-core power-domains.
- Value type: <phandle>
- Definition: Specifies the syscon node controlling the cpu core
- power domains.
-
- - dynamic-power-coefficient
- Usage: optional
- Value type: <prop-encoded-array>
- Definition: A u32 value that represents the running time dynamic
- power coefficient in units of uW/MHz/V^2. The
- coefficient can either be calculated from power
- measurements or derived by analysis.
-
- The dynamic power consumption of the CPU is
- proportional to the square of the Voltage (V) and
- the clock frequency (f). The coefficient is used to
- calculate the dynamic power as below -
-
- Pdyn = dynamic-power-coefficient * V^2 * f
-
- where voltage is in V, frequency is in MHz.
-
-Example 1 (dual-cluster big.LITTLE system 32-bit):
-
- cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x0>;
- };
-
- cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x1>;
- };
-
- cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x100>;
- };
-
- cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x101>;
- };
- };
-
-Example 2 (Cortex-A8 uniprocessor 32-bit system):
-
- cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a8";
- reg = <0x0>;
- };
- };
-
-Example 3 (ARM 926EJ-S uniprocessor 32-bit system):
-
- cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,arm926ej-s";
- reg = <0x0>;
- };
- };
-
-Example 4 (ARM Cortex-A57 64-bit system):
-
-cpus {
- #size-cells = <0>;
- #address-cells = <2>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x0>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x1>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@10000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10000>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@10001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10001>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@10100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@10101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100000000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x0>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100000001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x1>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100000100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100000101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100010000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10000>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100010001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10001>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100010100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- cpu@100010101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-};
-
---
-[1] arm/msm/qcom,saw2.txt
-[2] arm/msm/qcom,kpss-acc.txt
-[3] ARM Linux kernel documentation - idle states bindings
- Documentation/devicetree/bindings/arm/idle-states.txt
-[4] ARM Linux kernel documentation - cpu capacity bindings
- Documentation/devicetree/bindings/arm/cpu-capacity.txt
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
new file mode 100644
index 000000000000..736b7ab1bd0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -0,0 +1,701 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/cpus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM CPUs
+
+maintainers:
+ - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: |+
+ The device tree allows to describe the layout of CPUs in a system through the
+ "cpus" node, which in turn contains a number of subnodes (ie "cpu") defining
+ properties for every cpu.
+
+ Bindings for CPU nodes follow the Devicetree Specification, available from:
+
+ https://www.devicetree.org/specifications/
+
+ with updates for 32-bit and 64-bit ARM systems provided in this document.
+
+ ================================
+ Convention used in this document
+ ================================
+
+ This document follows the conventions described in the Devicetree
+ Specification, with the addition:
+
+ - square brackets define bitfields, eg reg[7:0] value of the bitfield in
+ the reg property contained in bits 7 down to 0
+
+ =====================================
+ cpus and cpu node bindings definition
+ =====================================
+
+ The ARM architecture, in accordance with the Devicetree Specification,
+ requires the cpus and cpu nodes to be present and contain the properties
+ described below.
+
+properties:
+ reg:
+ maxItems: 1
+ description: >
+ Usage and definition depend on ARM architecture version and configuration:
+
+ On uniprocessor ARM architectures previous to v7 this property is required
+ and must be set to 0.
+
+ On ARM 11 MPcore based systems this property is required and matches the
+ CPUID[11:0] register bits.
+
+ Bits [11:0] in the reg cell must be set to bits [11:0] in CPU ID register.
+
+ All other bits in the reg cell must be set to 0.
+
+ On 32-bit ARM v7 or later systems this property is required and matches
+ the CPU MPIDR[23:0] register bits.
+
+ Bits [23:0] in the reg cell must be set to bits [23:0] in MPIDR.
+
+ All other bits in the reg cell must be set to 0.
+
+ On ARM v8 64-bit systems this property is required and matches the
+ MPIDR_EL1 register affinity bits.
+
+ * If cpus node's #address-cells property is set to 2
+
+ The first reg cell bits [7:0] must be set to bits [39:32] of
+ MPIDR_EL1.
+
+ The second reg cell bits [23:0] must be set to bits [23:0] of
+ MPIDR_EL1.
+
+ * If cpus node's #address-cells property is set to 1
+
+ The reg cell bits [23:0] must be set to bits [23:0] of MPIDR_EL1.
+
+ All other bits in the reg cells must be set to 0.
+
+ compatible:
+ enum:
+ - apm,potenza
+ - apm,strega
+ - apple,avalanche
+ - apple,blizzard
+ - apple,cyclone
+ - apple,firestorm
+ - apple,hurricane-zephyr
+ - apple,icestorm
+ - apple,mistral
+ - apple,monsoon
+ - apple,twister
+ - apple,typhoon
+ - arm,arm710t
+ - arm,arm720t
+ - arm,arm740t
+ - arm,arm7ej-s
+ - arm,arm7tdmi
+ - arm,arm7tdmi-s
+ - arm,arm9es
+ - arm,arm9ej-s
+ - arm,arm920t
+ - arm,arm922t
+ - arm,arm925
+ - arm,arm926e-s
+ - arm,arm926ej-s
+ - arm,arm940t
+ - arm,arm946e-s
+ - arm,arm966e-s
+ - arm,arm968e-s
+ - arm,arm9tdmi
+ - arm,arm1020e
+ - arm,arm1020t
+ - arm,arm1022e
+ - arm,arm1026ej-s
+ - arm,arm1136j-s
+ - arm,arm1136jf-s
+ - arm,arm1156t2-s
+ - arm,arm1156t2f-s
+ - arm,arm1176jzf
+ - arm,arm1176jz-s
+ - arm,arm1176jzf-s
+ - arm,arm11mpcore
+ - arm,armv8 # Only for s/w models
+ - arm,c1-nano
+ - arm,c1-premium
+ - arm,c1-pro
+ - arm,c1-ultra
+ - arm,cortex-a5
+ - arm,cortex-a7
+ - arm,cortex-a8
+ - arm,cortex-a9
+ - arm,cortex-a12
+ - arm,cortex-a15
+ - arm,cortex-a17
+ - arm,cortex-a32
+ - arm,cortex-a34
+ - arm,cortex-a35
+ - arm,cortex-a53
+ - arm,cortex-a55
+ - arm,cortex-a57
+ - arm,cortex-a65
+ - arm,cortex-a72
+ - arm,cortex-a73
+ - arm,cortex-a75
+ - arm,cortex-a76
+ - arm,cortex-a77
+ - arm,cortex-a78
+ - arm,cortex-a78ae
+ - arm,cortex-a78c
+ - arm,cortex-a320
+ - arm,cortex-a510
+ - arm,cortex-a520
+ - arm,cortex-a520ae
+ - arm,cortex-a710
+ - arm,cortex-a715
+ - arm,cortex-a720
+ - arm,cortex-a720ae
+ - arm,cortex-a725
+ - arm,cortex-m0
+ - arm,cortex-m0+
+ - arm,cortex-m1
+ - arm,cortex-m3
+ - arm,cortex-m4
+ - arm,cortex-r4
+ - arm,cortex-r5
+ - arm,cortex-r7
+ - arm,cortex-r52
+ - arm,cortex-x1
+ - arm,cortex-x1c
+ - arm,cortex-x2
+ - arm,cortex-x3
+ - arm,cortex-x4
+ - arm,cortex-x925
+ - arm,neoverse-e1
+ - arm,neoverse-n1
+ - arm,neoverse-n2
+ - arm,neoverse-n3
+ - arm,neoverse-v1
+ - arm,neoverse-v2
+ - arm,neoverse-v3
+ - arm,neoverse-v3ae
+ - arm,rainier
+ - brcm,brahma-b15
+ - brcm,brahma-b53
+ - brcm,vulcan
+ - cavium,thunder
+ - cavium,thunder2
+ - faraday,fa526
+ - intel,sa110
+ - intel,sa1100
+ - marvell,feroceon
+ - marvell,mohawk
+ - marvell,pj4a
+ - marvell,pj4b
+ - marvell,sheeva-v5
+ - marvell,sheeva-v7
+ - nvidia,tegra132-denver
+ - nvidia,tegra186-denver
+ - nvidia,tegra194-carmel
+ - qcom,krait
+ - qcom,kryo
+ - qcom,kryo240
+ - qcom,kryo250
+ - qcom,kryo260
+ - qcom,kryo280
+ - qcom,kryo360
+ - qcom,kryo385
+ - qcom,kryo465
+ - qcom,kryo468
+ - qcom,kryo470
+ - qcom,kryo485
+ - qcom,kryo560
+ - qcom,kryo570
+ - qcom,kryo660
+ - qcom,kryo670
+ - qcom,kryo685
+ - qcom,kryo780
+ - qcom,oryon
+ - qcom,scorpion
+ - samsung,mongoose-m2
+ - samsung,mongoose-m3
+ - samsung,mongoose-m5
+
+ enable-method:
+ $ref: /schemas/types.yaml#/definitions/string
+ oneOf:
+ # On ARM v8 64-bit this property is required
+ - enum:
+ - psci
+ - spin-table
+ # On ARM 32-bit systems this property is optional
+ - enum:
+ - actions,s500-smp
+ - allwinner,sun6i-a31
+ - allwinner,sun8i-a23
+ - allwinner,sun9i-a80-smp
+ - allwinner,sun8i-a83t-smp
+ - amlogic,meson8-smp
+ - amlogic,meson8b-smp
+ - arm,realview-smp
+ - aspeed,ast2600-smp
+ - brcm,bcm11351-cpu-method
+ - brcm,bcm23550
+ - brcm,bcm2836-smp
+ - brcm,bcm63138
+ - brcm,bcm-nsp-smp
+ - brcm,brahma-b15
+ - marvell,armada-375-smp
+ - marvell,armada-380-smp
+ - marvell,armada-390-smp
+ - marvell,armada-xp-smp
+ - marvell,98dx3236-smp
+ - marvell,mmp3-smp
+ - mediatek,mt6589-smp
+ - mediatek,mt81xx-tz-smp
+ - qcom,gcc-msm8660
+ - qcom,kpss-acc-v1
+ - qcom,kpss-acc-v2
+ - qcom,msm8226-smp
+ - qcom,msm8909-smp
+ # Only valid on ARM 32-bit, see above for ARM v8 64-bit
+ - qcom,msm8916-smp
+ - renesas,apmu
+ - renesas,r9a06g032-smp
+ - rockchip,rk3036-smp
+ - rockchip,rk3066-smp
+ - socionext,milbeaut-m10v-smp
+ - ste,dbx500-smp
+ - ti,am3352
+ - ti,am4372
+
+ cpu-release-addr:
+ oneOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - $ref: /schemas/types.yaml#/definitions/uint64
+ description:
+ The DT specification defines this as 64-bit always, but some 32-bit Arm
+ systems have used a 32-bit value which must be supported.
+
+ cpu-idle-states:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ maxItems: 1
+ description:
+ List of phandles to idle state nodes supported by this cpu (see
+ ./idle-states.yaml).
+
+ capacity-dmips-mhz:
+ description:
+ u32 value representing CPU capacity (see ../cpu/cpu-capacity.txt) in
+ DMIPS/MHz, relative to highest capacity-dmips-mhz in the system.
+
+ cci-control-port: true
+
+ dynamic-power-coefficient:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: >
+ A u32 value that represents the running time dynamic power coefficient in
+ units of uW/MHz/V^2. The coefficient can either be calculated from power
+ measurements or derived by analysis.
+
+ The dynamic power consumption of the CPU is proportional to the square of
+ the Voltage (V) and the clock frequency (f). The coefficient is used to
+ calculate the dynamic power as below -
+
+ Pdyn = dynamic-power-coefficient * V^2 * f
+
+ where voltage is in V, frequency is in MHz.
+
+ interconnects:
+ minItems: 1
+ maxItems: 3
+
+ nvmem-cells:
+ maxItems: 1
+
+ nvmem-cell-names:
+ const: speed_grade
+
+ performance-domains:
+ maxItems: 1
+
+ power-domains:
+ minItems: 1
+ maxItems: 2
+
+ power-domain-names:
+ description:
+ For PSCI based platforms, the name corresponding to the index of the PSCI
+ PM domain provider, must be "psci". For SCMI based platforms, the name
+ corresponding to the index of an SCMI performance domain provider, must be
+ "perf".
+ minItems: 1
+ maxItems: 2
+ items:
+ enum: [ psci, perf, cpr ]
+
+ resets:
+ maxItems: 1
+
+ arm-supply:
+ deprecated: true
+ description: Use 'cpu-supply' instead
+
+ cpu0-supply:
+ deprecated: true
+ description: Use 'cpu-supply' instead
+
+ mem-supply: true
+
+ proc-supply:
+ deprecated: true
+ description: Use 'cpu-supply' instead
+
+ pu-supply:
+ deprecated: true
+ description: Only for i.MX6Q/DL/SL SoCs.
+
+ soc-supply:
+ deprecated: true
+ description: Only for i.MX6/7 Soc.
+
+ sram-supply:
+ deprecated: true
+ description: Use 'mem-supply' instead
+
+ fsl,soc-operating-points:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ description: FSL i.MX6 Soc operation-points when change cpu frequency
+ deprecated: true
+ items:
+ items:
+ - description: Frequency in kHz
+ - description: Voltage for OPP in uV
+
+ mediatek,cci:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: Link to Mediatek Cache Coherent Interconnect
+
+ edac-enabled:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ A72 CPUs support Error Detection And Correction (EDAC) on their L1 and
+ L2 caches. This flag marks this function as usable.
+
+ qcom,saw:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies the SAW node associated with this CPU.
+
+ qcom,acc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies the ACC node associated with this CPU.
+
+ qcom,freq-domain:
+ description: Specifies the QCom CPUFREQ HW associated with the CPU.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ rockchip,pmu:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: >
+ Specifies the syscon node controlling the cpu core power domains.
+
+ Optional for systems that have an "enable-method" property value of
+ "rockchip,rk3066-smp". While optional, it is the preferred way to get
+ access to the cpu-core power-domains.
+
+ secondary-boot-reg:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: >
+ Required for systems that have an "enable-method" property value of
+ "brcm,bcm11351-cpu-method", "brcm,bcm23550" or "brcm,bcm-nsp-smp".
+
+ This includes the following SoCs:
+ BCM11130, BCM11140, BCM11351, BCM28145, BCM28155, BCM21664, BCM23550,
+ BCM58522, BCM58525, BCM58535, BCM58622, BCM58623, BCM58625, BCM88312
+
+ The secondary-boot-reg property is a u32 value that specifies the
+ physical address of the register used to request the ROM holding pen
+ code release a secondary CPU. The value written to the register is
+ formed by encoding the target CPU id into the low bits of the
+ physical start address it should jump to.
+
+ thermal-idle:
+ type: object
+
+allOf:
+ - $ref: /schemas/cpu.yaml#
+ - $ref: /schemas/opp/opp-v1.yaml#
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: arm,cortex-a72
+ then:
+ # Allow edac-enabled only for Cortex A72
+ properties:
+ edac-enabled: false
+
+ - if:
+ # If the enable-method property contains one of those values
+ properties:
+ enable-method:
+ contains:
+ enum:
+ - brcm,bcm11351-cpu-method
+ - brcm,bcm23550
+ - brcm,bcm-nsp-smp
+ # and if enable-method is present
+ required:
+ - enable-method
+ then:
+ required:
+ - secondary-boot-reg
+ - if:
+ properties:
+ enable-method:
+ enum:
+ - spin-table
+ - renesas,r9a06g032-smp
+ required:
+ - enable-method
+ then:
+ required:
+ - cpu-release-addr
+ - if:
+ properties:
+ enable-method:
+ enum:
+ - qcom,kpss-acc-v1
+ - qcom,kpss-acc-v2
+ - qcom,msm8226-smp
+ - qcom,msm8916-smp
+ required:
+ - enable-method
+ then:
+ required:
+ - qcom,acc
+ - qcom,saw
+ else:
+ if:
+ # 2 Qualcomm platforms bootloaders need qcom,acc and qcom,saw yet use
+ # "spin-table" or "psci" enable-methods. Disallowing the properties for
+ # all other CPUs is the best we can do as there's not any way to
+ # distinguish these Qualcomm platforms.
+ not:
+ properties:
+ compatible:
+ const: arm,cortex-a53
+ then:
+ properties:
+ qcom,acc: false
+ qcom,saw: false
+
+required:
+ - device_type
+ - reg
+ - compatible
+
+dependencies:
+ rockchip,pmu: [enable-method]
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ };
+ };
+
+ - |
+ // Example 2 (Cortex-A8 uniprocessor 32-bit system):
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a8";
+ reg = <0x0>;
+ };
+ };
+
+ - |
+ // Example 3 (ARM 926EJ-S uniprocessor 32-bit system):
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,arm926ej-s";
+ reg = <0x0>;
+ };
+ };
+
+ - |
+ // Example 4 (ARM Cortex-A57 64-bit system):
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <2>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/davinci.txt b/Documentation/devicetree/bindings/arm/davinci.txt
deleted file mode 100644
index 715622c36260..000000000000
--- a/Documentation/devicetree/bindings/arm/davinci.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Texas Instruments DaVinci Platforms Device Tree Bindings
---------------------------------------------------------
-
-DA850/OMAP-L138/AM18x Evaluation Module (EVM) board
-Required root node properties:
- - compatible = "ti,da850-evm", "ti,da850";
-
-DA850/OMAP-L138/AM18x L138/C6748 Development Kit (LCDK) board
-Required root node properties:
- - compatible = "ti,da850-lcdk", "ti,da850";
-
-EnBW AM1808 based CMC board
-Required root node properties:
- - compatible = "enbw,cmc", "ti,da850;
-
-LEGO MINDSTORMS EV3 (AM1808 based)
-Required root node properties:
- - compatible = "lego,ev3", "ti,da850";
-
-Generic DaVinci Boards
-----------------------
-
-DA850/OMAP-L138/AM18x generic board
-Required root node properties:
- - compatible = "ti,da850";
diff --git a/Documentation/devicetree/bindings/arm/digicolor.txt b/Documentation/devicetree/bindings/arm/digicolor.txt
deleted file mode 100644
index 658553f40b23..000000000000
--- a/Documentation/devicetree/bindings/arm/digicolor.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Conexant Digicolor Platforms Device Tree Bindings
-
-Each device tree must specify which Conexant Digicolor SoC it uses.
-Must be the following compatible string:
-
- cnxt,cx92755
diff --git a/Documentation/devicetree/bindings/arm/digicolor.yaml b/Documentation/devicetree/bindings/arm/digicolor.yaml
new file mode 100644
index 000000000000..0cf9ddaa527e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/digicolor.yaml
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/digicolor.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Conexant Digicolor Platforms
+
+maintainers:
+ - Baruch Siach <baruch@tkos.co.il>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ const: cnxt,cx92755
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
deleted file mode 100644
index d38834c67dff..000000000000
--- a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-OP-TEE Device Tree Bindings
-
-OP-TEE is a piece of software using hardware features to provide a Trusted
-Execution Environment. The security can be provided with ARM TrustZone, but
-also by virtualization or a separate chip.
-
-We're using "linaro" as the first part of the compatible property for
-the reference implementation maintained by Linaro.
-
-* OP-TEE based on ARM TrustZone required properties:
-
-- compatible : should contain "linaro,optee-tz"
-
-- method : The method of calling the OP-TEE Trusted OS. Permitted
- values are:
-
- "smc" : SMC #0, with the register assignments specified
- in drivers/tee/optee/optee_smc.h
-
- "hvc" : HVC #0, with the register assignments specified
- in drivers/tee/optee/optee_smc.h
-
-
-
-Example:
- firmware {
- optee {
- compatible = "linaro,optee-tz";
- method = "smc";
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
new file mode 100644
index 000000000000..5d033570b57b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/firmware/linaro,optee-tz.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OP-TEE
+
+maintainers:
+ - Jens Wiklander <jens.wiklander@linaro.org>
+
+description: |
+ OP-TEE is a piece of software using hardware features to provide a Trusted
+ Execution Environment. The security can be provided with ARM TrustZone, but
+ also by virtualization or a separate chip.
+
+ We're using "linaro" as the first part of the compatible property for
+ the reference implementation maintained by Linaro.
+
+properties:
+ $nodename:
+ const: optee
+
+ compatible:
+ const: linaro,optee-tz
+
+ interrupts:
+ maxItems: 1
+ description: |
+ This interrupt which is used to signal an event by the secure world
+ software is expected to be either a per-cpu interrupt or an
+ edge-triggered peripheral interrupt.
+
+ method:
+ enum: [smc, hvc]
+ description: |
+ The method of calling the OP-TEE Trusted OS depending on smc or hvc
+ instruction usage.
+ SMC #0, register assignments
+ or
+ HVC #0, register assignments
+ register assignments are specified in drivers/tee/optee/optee_smc.h
+
+required:
+ - compatible
+ - method
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ firmware {
+ optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ interrupts = <GIC_SPI 187 IRQ_TYPE_EDGE_RISING>;
+ };
+ };
+
+ - |
+ firmware {
+ optee {
+ compatible = "linaro,optee-tz";
+ method = "hvc";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt b/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt
deleted file mode 100644
index 780d0392a66b..000000000000
--- a/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Trusted Foundations
--------------------
-
-Boards that use the Trusted Foundations secure monitor can signal its
-presence by declaring a node compatible with "tlm,trusted-foundations"
-under the /firmware/ node
-
-Required properties:
-- compatible: "tlm,trusted-foundations"
-- tlm,version-major: major version number of Trusted Foundations firmware
-- tlm,version-minor: minor version number of Trusted Foundations firmware
-
-Example:
- firmware {
- trusted-foundations {
- compatible = "tlm,trusted-foundations";
- tlm,version-major = <2>;
- tlm,version-minor = <8>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.yaml b/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.yaml
new file mode 100644
index 000000000000..e3980b659f63
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/firmware/tlm,trusted-foundations.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Trusted Foundations
+
+description: |
+ Boards that use the Trusted Foundations secure monitor can signal its
+ presence by declaring a node compatible under the /firmware/ node
+
+maintainers:
+ - Stephen Warren <swarren@nvidia.com>
+
+properties:
+ $nodename:
+ const: trusted-foundations
+
+ compatible:
+ const: tlm,trusted-foundations
+
+ tlm,version-major:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: major version number of Trusted Foundations firmware
+
+ tlm,version-minor:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: minor version number of Trusted Foundations firmware
+
+required:
+ - compatible
+ - tlm,version-major
+ - tlm,version-minor
+
+additionalProperties: false
+
+examples:
+ - |
+ firmware {
+ trusted-foundations {
+ compatible = "tlm,trusted-foundations";
+ tlm,version-major = <2>;
+ tlm,version-minor = <8>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,imx51-m4if.yaml b/Documentation/devicetree/bindings/arm/freescale/fsl,imx51-m4if.yaml
new file mode 100644
index 000000000000..1f515bea3959
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,imx51-m4if.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/freescale/fsl,imx51-m4if.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Multi Master Multi Memory Interface (M4IF) and Tigerp module
+
+description: collect the imx devices, which only have compatible and reg property
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx51-m4if
+ - fsl,imx51-tigerp
+ - fsl,imx51-aipstz
+ - fsl,imx53-aipstz
+ - fsl,imx7d-pcie-phy
+ - items:
+ - const: fsl,imx53-tigerp
+ - const: fsl,imx51-tigerp
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ m4if@83fd8000 {
+ compatible = "fsl,imx51-m4if";
+ reg = <0x83fd8000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-pm.yaml b/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-pm.yaml
new file mode 100644
index 000000000000..9d377e193c12
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-pm.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/freescale/fsl,imx7ulp-pm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX7ULP Power Management Components
+
+maintainers:
+ - A.s. Dong <aisheng.dong@nxp.com>
+
+description: |
+ The Multi-System Mode Controller (MSMC) is responsible for sequencing
+ the MCU into and out of all stop and run power modes. Specifically, it
+ monitors events to trigger transitions between power modes while
+ controlling the power, clocks, and memories of the MCU to achieve the
+ power consumption and functionality of that mode.
+
+ The WFI or WFE instruction is used to invoke a Sleep, Deep Sleep or
+ Standby modes for either Cortex family. Run, Wait, and Stop are the
+ common terms used for the primary operating modes of Kinetis
+ microcontrollers.
+
+properties:
+ compatible:
+ const: fsl,imx7ulp-smc1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: divcore
+ - const: hsrun_divcore
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ smc1@40410000 {
+ compatible = "fsl,imx7ulp-smc1";
+ reg = <0x40410000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-sim.yaml b/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-sim.yaml
new file mode 100644
index 000000000000..bd39cf107f3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-sim.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/freescale/fsl,imx7ulp-sim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX7ULP System Integration Module
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+ - Sascha Hauer <s.hauer@pengutronix.de>
+ - Fabio Estevam <festevam@gmail.com>
+
+description: |
+ The system integration module (SIM) provides system control and chip configuration
+ registers. In this module, chip revision information is located in JTAG ID register,
+ and a set of registers have been made available in DGO domain for SW use, with the
+ objective to maintain its value between system resets.
+
+properties:
+ compatible:
+ items:
+ - const: fsl,imx7ulp-sim
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ sim@410a3000 {
+ compatible = "fsl,imx7ulp-sim", "syscon";
+ reg = <0x410a3000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
deleted file mode 100644
index b5cb374dc47d..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Freescale DCFG
-
-DCFG is the device configuration unit, that provides general purpose
-configuration and status for the device. Such as setting the secondary
-core start address and release the secondary core from holdoff and startup.
-
-Required properties:
- - compatible: Should contain a chip-specific compatible string,
- Chip-specific strings are of the form "fsl,<chip>-dcfg",
- The following <chip>s are known to be supported:
- ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
- - reg : should contain base address and length of DCFG memory-mapped registers
-
-Example:
- dcfg: dcfg@1ee0000 {
- compatible = "fsl,ls1021a-dcfg";
- reg = <0x0 0x1ee0000 0x0 0x10000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
deleted file mode 100644
index 0ab67b0b216d..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Freescale SCFG
-
-SCFG is the supplemental configuration unit, that provides SoC specific
-configuration and status registers for the chip. Such as getting PEX port
-status.
-
-Required properties:
- - compatible: Should contain a chip-specific compatible string,
- Chip-specific strings are of the form "fsl,<chip>-scfg",
- The following <chip>s are known to be supported:
- ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
- - reg: should contain base address and length of SCFG memory-mapped registers
-
-Example:
- scfg: scfg@1570000 {
- compatible = "fsl,ls1021a-scfg";
- reg = <0x0 0x1570000 0x0 0x10000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
deleted file mode 100644
index 46d0af1f0872..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-NXP i.MX System Controller Firmware (SCFW)
---------------------------------------------------------------------
-
-The System Controller Firmware (SCFW) is a low-level system function
-which runs on a dedicated Cortex-M core to provide power, clock, and
-resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
-(QM, QP), and i.MX8QX (QXP, DX).
-
-The AP communicates with the SC using a multi-ported MU module found
-in the LSIO subsystem. The current definition of this MU module provides
-5 remote AP connections to the SC to support up to 5 execution environments
-(TZ, HV, standard Linux, etc.). The SC side of this MU module interfaces
-with the LSIO DSC IP bus. The SC firmware will communicate with this MU
-using the MSI bus.
-
-System Controller Device Node:
-============================================================
-
-The scu node with the following properties shall be under the /firmware/ node.
-
-Required properties:
--------------------
-- compatible: should be "fsl,imx-scu".
-- mbox-names: should include "tx0", "tx1", "tx2", "tx3",
- "rx0", "rx1", "rx2", "rx3".
-- mboxes: List of phandle of 4 MU channels for tx and 4 MU channels
- for rx. All 8 MU channels must be in the same MU instance.
- Cross instances are not allowed. The MU instance can only
- be one of LSIO MU0~M4 for imx8qxp and imx8qm. Users need
- to make sure use the one which is not conflict with other
- execution environments. e.g. ATF.
- Note:
- Channel 0 must be "tx0" or "rx0".
- Channel 1 must be "tx1" or "rx1".
- Channel 2 must be "tx2" or "rx2".
- Channel 3 must be "tx3" or "rx3".
- e.g.
- mboxes = <&lsio_mu1 0 0
- &lsio_mu1 0 1
- &lsio_mu1 0 2
- &lsio_mu1 0 3
- &lsio_mu1 1 0
- &lsio_mu1 1 1
- &lsio_mu1 1 2
- &lsio_mu1 1 3>;
- See Documentation/devicetree/bindings/mailbox/fsl,mu.txt
- for detailed mailbox binding.
-
-i.MX SCU Client Device Node:
-============================================================
-
-Client nodes are maintained as children of the relevant IMX-SCU device node.
-
-Power domain bindings based on SCU Message Protocol
-------------------------------------------------------------
-
-This binding for the SCU power domain providers uses the generic power
-domain binding[2].
-
-Required properties:
-- compatible: Should be "fsl,scu-pd".
-- #address-cells: Should be 1.
-- #size-cells: Should be 0.
-
-Required properties for power domain sub nodes:
-- #power-domain-cells: Must be 0.
-
-Optional Properties:
-- reg: Resource ID of this power domain.
- No exist means uncontrollable by user.
- See detailed Resource ID list from:
- include/dt-bindings/power/imx-rsrc.h
-- power-domains: phandle pointing to the parent power domain.
-
-Clock bindings based on SCU Message Protocol
-------------------------------------------------------------
-
-This binding uses the common clock binding[1].
-
-Required properties:
-- compatible: Should be "fsl,imx8qxp-clock".
-- #clock-cells: Should be 1. Contains the Clock ID value.
-- clocks: List of clock specifiers, must contain an entry for
- each required entry in clock-names
-- clock-names: Should include entries "xtal_32KHz", "xtal_24MHz"
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell.
-
-See the full list of clock IDs from:
-include/dt-bindings/clock/imx8qxp-clock.h
-
-Pinctrl bindings based on SCU Message Protocol
-------------------------------------------------------------
-
-This binding uses the i.MX common pinctrl binding[3].
-
-Required properties:
-- compatible: Should be "fsl,imx8qxp-iomuxc".
-
-Required properties for Pinctrl sub nodes:
-- fsl,pins: Each entry consists of 3 integers which represents
- the mux and config setting for one pin. The first 2
- integers <pin_id mux_mode> are specified using a
- PIN_FUNC_ID macro, which can be found in
- <dt-bindings/pinctrl/pads-imx8qxp.h>.
- The last integer CONFIG is the pad setting value like
- pull-up on this pin.
-
- Please refer to i.MX8QXP Reference Manual for detailed
- CONFIG settings.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/power/power_domain.txt
-[3] Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt
-
-Example (imx8qxp):
--------------
-lsio_mu1: mailbox@5d1c0000 {
- ...
- #mbox-cells = <2>;
-};
-
-firmware {
- scu {
- compatible = "fsl,imx-scu";
- mbox-names = "tx0", "tx1", "tx2", "tx3",
- "rx0", "rx1", "rx2", "rx3";
- mboxes = <&lsio_mu1 0 0
- &lsio_mu1 0 1
- &lsio_mu1 0 2
- &lsio_mu1 0 3
- &lsio_mu1 1 0
- &lsio_mu1 1 1
- &lsio_mu1 1 2
- &lsio_mu1 1 3>;
-
- clk: clk {
- compatible = "fsl,imx8qxp-clk";
- #clock-cells = <1>;
- };
-
- iomuxc {
- compatible = "fsl,imx8qxp-iomuxc";
-
- pinctrl_lpuart0: lpuart0grp {
- fsl,pins = <
- SC_P_UART0_RX_ADMA_UART0_RX 0x06000020
- SC_P_UART0_TX_ADMA_UART0_TX 0x06000020
- >;
- };
- ...
- };
-
- imx8qx-pm {
- compatible = "fsl,scu-pd";
- #address-cells = <1>;
- #size-cells = <0>;
-
- pd_dma: dma-power-domain {
- #power-domain-cells = <0>;
-
- pd_dma_lpuart0: dma-lpuart0@57 {
- reg = <SC_R_UART_0>;
- #power-domain-cells = <0>;
- power-domains = <&pd_dma>;
- };
- ...
- };
- ...
- };
- };
-};
-
-serial@5a060000 {
- ...
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_lpuart0>;
- clocks = <&clk IMX8QXP_UART0_CLK>,
- <&clk IMX8QXP_UART0_IPG_CLK>;
- clock-names = "per", "ipg";
- power-domains = <&pd_dma_lpuart0>;
-};
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
deleted file mode 100644
index 44aa3c451ccf..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Freescale Vybrid Miscellaneous System Control - CPU Configuration
-
-The MSCM IP contains multiple sub modules, this binding describes the first
-block of registers which contains CPU configuration information.
-
-Required properties:
-- compatible: "fsl,vf610-mscm-cpucfg", "syscon"
-- reg: the register range of the MSCM CPU configuration registers
-
-Example:
- mscm_cpucfg: cpucfg@40001000 {
- compatible = "fsl,vf610-mscm-cpucfg", "syscon";
- reg = <0x40001000 0x800>;
- }
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
deleted file mode 100644
index 6dd6f399236d..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Freescale Vybrid Miscellaneous System Control - Interrupt Router
-
-The MSCM IP contains multiple sub modules, this binding describes the second
-block of registers which control the interrupt router. The interrupt router
-allows to configure the recipient of each peripheral interrupt. Furthermore
-it controls the directed processor interrupts. The module is available in all
-Vybrid SoC's but is only really useful in dual core configurations (VF6xx
-which comes with a Cortex-A5/Cortex-M4 combination).
-
-Required properties:
-- compatible: "fsl,vf610-mscm-ir"
-- reg: the register range of the MSCM Interrupt Router
-- fsl,cpucfg: The handle to the MSCM CPU configuration node, required
- to get the current CPU ID
-- interrupt-controller: Identifies the node as an interrupt controller
-- #interrupt-cells: Two cells, interrupt number and cells.
- The hardware interrupt number according to interrupt
- assignment of the interrupt router is required.
- Flags get passed only when using GIC as parent. Flags
- encoding as documented by the GIC bindings.
-
-Example:
- mscm_ir: interrupt-controller@40001800 {
- compatible = "fsl,vf610-mscm-ir";
- reg = <0x40001800 0x400>;
- fsl,cpucfg = <&mscm_cpucfg>;
- interrupt-controller;
- #interrupt-cells = <2>;
- interrupt-parent = <&intc>;
- }
diff --git a/Documentation/devicetree/bindings/arm/freescale/m4if.txt b/Documentation/devicetree/bindings/arm/freescale/m4if.txt
deleted file mode 100644
index 93bd7b867a53..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/m4if.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-* Freescale Multi Master Multi Memory Interface (M4IF) module
-
-Required properties:
-- compatible : Should be "fsl,imx51-m4if"
-- reg : Address and length of the register set for the device
-
-Example:
-
-m4if: m4if@83fd8000 {
- compatible = "fsl,imx51-m4if";
- reg = <0x83fd8000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/arm/freescale/tigerp.txt b/Documentation/devicetree/bindings/arm/freescale/tigerp.txt
deleted file mode 100644
index 19e2aad63d6e..000000000000
--- a/Documentation/devicetree/bindings/arm/freescale/tigerp.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-* Freescale Tigerp platform module
-
-Required properties:
-- compatible : Should be "fsl,imx51-tigerp"
-- reg : Address and length of the register set for the device
-
-Example:
-
-tigerp: tigerp@83fa0000 {
- compatible = "fsl,imx51-tigerp";
- reg = <0x83fa0000 0x28>;
-};
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt
deleted file mode 100644
index 5074aeecd327..000000000000
--- a/Documentation/devicetree/bindings/arm/fsl.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-Freescale i.MX Platforms Device Tree Bindings
------------------------------------------------
-
-i.MX23 Evaluation Kit
-Required root node properties:
- - compatible = "fsl,imx23-evk", "fsl,imx23";
-
-i.MX25 Product Development Kit
-Required root node properties:
- - compatible = "fsl,imx25-pdk", "fsl,imx25";
-
-i.MX27 Product Development Kit
-Required root node properties:
- - compatible = "fsl,imx27-pdk", "fsl,imx27";
-
-i.MX28 Evaluation Kit
-Required root node properties:
- - compatible = "fsl,imx28-evk", "fsl,imx28";
-
-i.MX51 Babbage Board
-Required root node properties:
- - compatible = "fsl,imx51-babbage", "fsl,imx51";
-
-i.MX53 Automotive Reference Design Board
-Required root node properties:
- - compatible = "fsl,imx53-ard", "fsl,imx53";
-
-i.MX53 Evaluation Kit
-Required root node properties:
- - compatible = "fsl,imx53-evk", "fsl,imx53";
-
-i.MX53 Quick Start Board
-Required root node properties:
- - compatible = "fsl,imx53-qsb", "fsl,imx53";
-
-i.MX53 Smart Mobile Reference Design Board
-Required root node properties:
- - compatible = "fsl,imx53-smd", "fsl,imx53";
-
-i.MX6 Quad Armadillo2 Board
-Required root node properties:
- - compatible = "fsl,imx6q-arm2", "fsl,imx6q";
-
-i.MX6 Quad SABRE Lite Board
-Required root node properties:
- - compatible = "fsl,imx6q-sabrelite", "fsl,imx6q";
-
-i.MX6 Quad SABRE Smart Device Board
-Required root node properties:
- - compatible = "fsl,imx6q-sabresd", "fsl,imx6q";
-
-i.MX6 Quad SABRE Automotive Board
-Required root node properties:
- - compatible = "fsl,imx6q-sabreauto", "fsl,imx6q";
-
-i.MX6SLL EVK board
-Required root node properties:
- - compatible = "fsl,imx6sll-evk", "fsl,imx6sll";
-
-i.MX6 Quad Plus SABRE Smart Device Board
-Required root node properties:
- - compatible = "fsl,imx6qp-sabresd", "fsl,imx6qp";
-
-i.MX6 Quad Plus SABRE Automotive Board
-Required root node properties:
- - compatible = "fsl,imx6qp-sabreauto", "fsl,imx6qp";
-
-i.MX6 DualLite SABRE Smart Device Board
-Required root node properties:
- - compatible = "fsl,imx6dl-sabresd", "fsl,imx6dl";
-
-i.MX6 DualLite/Solo SABRE Automotive Board
-Required root node properties:
- - compatible = "fsl,imx6dl-sabreauto", "fsl,imx6dl";
-
-i.MX6 SoloLite EVK Board
-Required root node properties:
- - compatible = "fsl,imx6sl-evk", "fsl,imx6sl";
-
-i.MX6 UltraLite 14x14 EVK Board
-Required root node properties:
- - compatible = "fsl,imx6ul-14x14-evk", "fsl,imx6ul";
-
-i.MX6 UltraLiteLite 14x14 EVK Board
-Required root node properties:
- - compatible = "fsl,imx6ull-14x14-evk", "fsl,imx6ull";
-
-i.MX6 ULZ 14x14 EVK Board
-Required root node properties:
- - compatible = "fsl,imx6ulz-14x14-evk", "fsl,imx6ull", "fsl,imx6ulz";
-
-i.MX6 SoloX SDB Board
-Required root node properties:
- - compatible = "fsl,imx6sx-sdb", "fsl,imx6sx";
-
-i.MX6 SoloX Sabre Auto Board
-Required root node properties:
- - compatible = "fsl,imx6sx-sabreauto", "fsl,imx6sx";
-
-i.MX7 SabreSD Board
-Required root node properties:
- - compatible = "fsl,imx7d-sdb", "fsl,imx7d";
-
-Generic i.MX boards
--------------------
-
-No iomux setup is done for these boards, so this must have been configured
-by the bootloader for boards to work with the generic bindings.
-
-i.MX27 generic board
-Required root node properties:
- - compatible = "fsl,imx27";
-
-i.MX51 generic board
-Required root node properties:
- - compatible = "fsl,imx51";
-
-i.MX53 generic board
-Required root node properties:
- - compatible = "fsl,imx53";
-
-i.MX6q generic board
-Required root node properties:
- - compatible = "fsl,imx6q";
-
-Freescale Vybrid Platform Device Tree Bindings
-----------------------------------------------
-
-For the Vybrid SoC familiy all variants with DDR controller are supported,
-which is the VF5xx and VF6xx series. Out of historical reasons, in most
-places the kernel uses vf610 to refer to the whole familiy.
-The compatible string "fsl,vf610m4" is used for the secondary Cortex-M4
-core support.
-
-Required root node compatible property (one of them):
- - compatible = "fsl,vf500";
- - compatible = "fsl,vf510";
- - compatible = "fsl,vf600";
- - compatible = "fsl,vf610";
- - compatible = "fsl,vf610m4";
-
-Freescale LS1021A Platform Device Tree Bindings
-------------------------------------------------
-
-Required root node compatible properties:
- - compatible = "fsl,ls1021a";
-
-Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
-----------------------------------------------------------------
-
-LS1012A SoC
-Required root node properties:
- - compatible = "fsl,ls1012a";
-
-LS1012A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls1012a-rdb", "fsl,ls1012a";
-
-LS1012A ARMv8 based FRDM Board
-Required root node properties:
- - compatible = "fsl,ls1012a-frdm", "fsl,ls1012a";
-
-LS1012A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls1012a-qds", "fsl,ls1012a";
-
-LS1043A SoC
-Required root node properties:
- - compatible = "fsl,ls1043a";
-
-LS1043A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls1043a-rdb", "fsl,ls1043a";
-
-LS1043A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls1043a-qds", "fsl,ls1043a";
-
-LS1046A SoC
-Required root node properties:
- - compatible = "fsl,ls1046a";
-
-LS1046A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls1046a-qds", "fsl,ls1046a";
-
-LS1046A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls1046a-rdb", "fsl,ls1046a";
-
-LS1088A SoC
-Required root node properties:
- - compatible = "fsl,ls1088a";
-
-LS1088A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls1088a-qds", "fsl,ls1088a";
-
-LS1088A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls1088a-rdb", "fsl,ls1088a";
-
-LS2080A SoC
-Required root node properties:
- - compatible = "fsl,ls2080a";
-
-LS2080A ARMv8 based Simulator model
-Required root node properties:
- - compatible = "fsl,ls2080a-simu", "fsl,ls2080a";
-
-LS2080A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls2080a-qds", "fsl,ls2080a";
-
-LS2080A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls2080a-rdb", "fsl,ls2080a";
-
-LS2088A SoC
-Required root node properties:
- - compatible = "fsl,ls2088a";
-
-LS2088A ARMv8 based QDS Board
-Required root node properties:
- - compatible = "fsl,ls2088a-qds", "fsl,ls2088a";
-
-LS2088A ARMv8 based RDB Board
-Required root node properties:
- - compatible = "fsl,ls2088a-rdb", "fsl,ls2088a";
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
new file mode 100644
index 000000000000..00cdf490b062
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -0,0 +1,1784 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/fsl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX Platforms
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: i.MX1 based Boards
+ items:
+ - enum:
+ - armadeus,imx1-apf9328
+ - fsl,imx1ads
+ - const: fsl,imx1
+
+ - description: i.MX23 based Boards
+ items:
+ - enum:
+ - creative,x-fi3
+ - fsl,imx23-evk
+ - fsl,stmp378x-devb
+ - olimex,imx23-olinuxino
+ - sandisk,sansa_fuze_plus
+ - const: fsl,imx23
+
+ - description: i.MX25 Product Development Kit
+ items:
+ - enum:
+ - fsl,imx25-pdk
+ - karo,imx25-tx25
+ - const: fsl,imx25
+
+ - description: i.MX25 Eukrea CPUIMX25 Boards
+ items:
+ - enum:
+ - eukrea,mbimxsd25-baseboard # Eukrea MBIMXSD25
+ - const: eukrea,cpuimx25
+ - const: fsl,imx25
+
+ - description: i.MX25 Eukrea MBIMXSD25 Boards
+ items:
+ - enum:
+ - eukrea,mbimxsd25-baseboard-cmo-qvga
+ - eukrea,mbimxsd25-baseboard-dvi-svga
+ - eukrea,mbimxsd25-baseboard-dvi-vga
+ - const: eukrea,mbimxsd25-baseboard
+ - const: eukrea,cpuimx25
+ - const: fsl,imx25
+
+ - description: i.MX27 based Boards
+ items:
+ - enum:
+ - armadeus,imx27-apf27 # APF27 SoM
+ - fsl,imx27-pdk
+ - const: fsl,imx27
+
+ - description: i.MX27 APF27 SoM Board
+ items:
+ - const: armadeus,imx27-apf27dev
+ - const: armadeus,imx27-apf27
+ - const: fsl,imx27
+
+ - description: i.MX27 Eukrea CPUIMX27 SoM Board
+ items:
+ - const: eukrea,mbimxsd27-baseboard
+ - const: eukrea,cpuimx27
+ - const: fsl,imx27
+
+ - description: i.MX27 Phytec pca100 Board
+ items:
+ - const: phytec,imx27-pca100-rdk
+ - const: phytec,imx27-pca100
+ - const: fsl,imx27
+
+ - description: i.MX27 Phytec pcm970 Board
+ items:
+ - const: phytec,imx27-pcm970
+ - const: phytec,imx27-pcm038
+ - const: fsl,imx27
+
+ - description: i.MX28 based Boards
+ items:
+ - enum:
+ - amarula,imx28-rmm
+ - armadeus,imx28-apf28 # APF28 SoM
+ - bluegiga,apx4devkit # Bluegiga APx4 SoM on dev board
+ - crystalfontz,cfa10036 # Crystalfontz CFA-10036 SoM
+ - eukrea,mbmx28lc
+ - fsl,imx28-evk
+ - i2se,duckbill
+ - i2se,duckbill-2
+ - karo,tx28 # Ka-Ro electronics TX28 module
+ - lwn,imx28-btt3
+ - lwn,imx28-xea
+ - msr,m28cu3 # M28 SoM with custom base board
+ - schulercontrol,imx28-sps1
+ - technologic,imx28-ts4600
+ - const: fsl,imx28
+
+ - description: i.MX28 Aries M28 SoM Board
+ items:
+ - const: aries,m28
+ - const: denx,m28
+ - const: fsl,imx28
+
+ - description: i.MX28 Aries M28EVK Board
+ items:
+ - const: aries,m28evk
+ - const: denx,m28evk
+ - const: fsl,imx28
+
+ - description: i.MX28 Armadeus Systems APF28Dev Board
+ items:
+ - const: armadeus,imx28-apf28dev
+ - const: armadeus,imx28-apf28
+ - const: fsl,imx28
+
+ - description: i.MX28 Crystalfontz CFA-10036 based Boards
+ items:
+ - enum:
+ - crystalfontz,cfa10037
+ - crystalfontz,cfa10049
+ - crystalfontz,cfa10057
+ - crystalfontz,cfa10058
+ - const: crystalfontz,cfa10036
+ - const: fsl,imx28
+
+ - description: i.MX28 Crystalfontz CFA-10037 based Boards
+ items:
+ - enum:
+ - crystalfontz,cfa10055
+ - crystalfontz,cfa10056
+ - const: crystalfontz,cfa10037
+ - const: crystalfontz,cfa10036
+ - const: fsl,imx28
+
+ - description: i.MX28 Duckbill 2 based Boards
+ items:
+ - enum:
+ - i2se,duckbill-2-485
+ - i2se,duckbill-2-enocean
+ - i2se,duckbill-2-spi
+ - const: i2se,duckbill-2
+ - const: fsl,imx28
+
+ - description: i.MX28 Eukrea Electromatique MBMX283LC Board
+ items:
+ - const: eukrea,mbmx283lc
+ - const: eukrea,mbmx28lc
+ - const: fsl,imx28
+
+ - description: i.MX28 Eukrea Electromatique MBMX287LC Board
+ items:
+ - const: eukrea,mbmx287lc
+ - const: eukrea,mbmx283lc
+ - const: eukrea,mbmx28lc
+ - const: fsl,imx28
+
+ - description: i.MX31 based Boards
+ items:
+ - enum:
+ - buglabs,imx31-bug
+ - logicpd,imx31-lite
+ - const: fsl,imx31
+
+ - description: i.MX35 based Boards
+ items:
+ - enum:
+ - fsl,imx35-pdk
+ - const: fsl,imx35
+
+ - description: i.MX35 Eukrea CPUIMX35 Board
+ items:
+ - const: eukrea,mbimxsd35-baseboard
+ - const: eukrea,cpuimx35
+ - const: fsl,imx35
+
+ - description: i.MX50 based Boards
+ items:
+ - enum:
+ - fsl,imx50-evk
+ - kobo,aura
+ - const: fsl,imx50
+
+ - description: i.MX51 based Boards
+ items:
+ - enum:
+ - armadeus,imx51-apf51 # Armadeus Systems APF51 module
+ - fsl,imx51-babbage
+ - technologic,imx51-ts4800
+ - zii,imx51-scu3-esb
+ - zii,imx51-scu2-mezz
+ - zii,imx51-rdu1
+ - const: fsl,imx51
+
+ - description: i.MX51 based Armadeus Systems APF51Dev Board
+ items:
+ - const: armadeus,imx51-apf51dev
+ - const: armadeus,imx51-apf51
+ - const: fsl,imx51
+
+ - description: i.MX51 based Digi ConnectCore CC(W)-MX51 JSK Board
+ items:
+ - const: digi,connectcore-ccxmx51-jsk
+ - const: digi,connectcore-ccxmx51-som
+ - const: fsl,imx51
+
+ - description: i.MX51 based Eukrea CPUIMX51 Board
+ items:
+ - const: eukrea,mbimxsd51
+ - const: eukrea,cpuimx51
+ - const: fsl,imx51
+
+ - description: i.MX53 based Boards
+ items:
+ - enum:
+ - bhf,cx9020
+ - fsl,imx53-ard
+ - fsl,imx53-evk
+ - fsl,imx53-qsb
+ - fsl,imx53-qsrb # Freescale i.MX53 Quick Start-R Board
+ - fsl,imx53-smd
+ - ge,imx53-cpuvo # General Electric CS ONE
+ - inversepath,imx53-usbarmory # Inverse Path USB armory
+ - karo,tx53 # Ka-Ro electronics TX53 module
+ - kiebackpeter,imx53-ddc # K+P imx53 DDC
+ - kiebackpeter,imx53-hsc # K+P imx53 HSC
+ - menlo,m53menlo # i.MX53 Menlo board
+ - starterkit,sk-imx53
+ - voipac,imx53-dmm-668 # Voipac i.MX53 X53-DMM-668
+ - const: fsl,imx53
+
+ - description: i.MX53 based Aries/DENX M53EVK Board
+ items:
+ - const: aries,imx53-m53evk
+ - const: denx,imx53-m53evk
+ - const: fsl,imx53
+
+ - description: i.MX53 based TQ MBa53 Board
+ items:
+ - const: tq,mba53
+ - const: tq,tqma53
+ - const: fsl,imx53
+
+ - description: i.MX6Q based Boards
+ items:
+ - enum:
+ - auvidea,h100 # Auvidea H100
+ - bosch,imx6q-acc # Bosch ACC i.MX6 Dual
+ - boundary,imx6q-nitrogen6_max
+ - boundary,imx6q-nitrogen6_som2
+ - boundary,imx6q-nitrogen6x
+ - compulab,cm-fx6 # CompuLab CM-FX6
+ - dmo,imx6q-edmqmx6 # Data Modul eDM-QMX6 Board
+ - ds,imx6q-sbc # Da Sheng COM-9XX Modules
+ - embest,imx6q-marsboard # Embest MarS Board i.MX6Dual
+ - emtrion,emcon-mx6 # emCON-MX6D or emCON-MX6Q SoM
+ - emtrion,emcon-mx6-avari # emCON-MX6D or emCON-MX6Q SoM on Avari Base
+ - engicam,imx6-icore # Engicam i.CoreM6 Starter Kit
+ - engicam,imx6-icore-rqs # Engicam i.CoreM6 RQS Starter Kit
+ - fsl,imx6q-arm2
+ - fsl,imx6q-sabreauto
+ - fsl,imx6q-sabrelite
+ - fsl,imx6q-sabresd
+ - karo,imx6q-tx6q # Ka-Ro electronics TX6Q Modules
+ - kiebackpeter,imx6q-tpc # K+P i.MX6 Quad TPC Board
+ - kontron,imx6q-samx6i # Kontron i.MX6 Dual/Quad SMARC Module
+ - kosagi,imx6q-novena # Kosagi Novena Dual/Quad
+ - kvg,vicut1q # Kverneland UT1Q board
+ - logicpd,imx6q-logicpd
+ - lwn,display5 # Liebherr Display5 i.MX6 Quad Board
+ - lwn,mccmon6 # Liebherr Monitor6 i.MX6 Quad Board
+ - nutsboard,imx6q-pistachio # NutsBoard i.MX6 Quad Pistachio
+ - microsys,sbc6x # MicroSys sbc6x board
+ - poslab,imx6q-savageboard # Poslab SavageBoard Quad
+ - prt,prti6q # Protonic PRTI6Q board
+ - prt,prtwd2 # Protonic WD2 board
+ - rex,imx6q-rex-pro # Rex Pro i.MX6 Quad Board
+ - skov,imx6q-skov-revc-lt2 # SKOV IMX6 CPU QuadCore lt2
+ - skov,imx6q-skov-revc-lt6 # SKOV IMX6 CPU QuadCore lt6
+ - skov,imx6q-skov-reve-mi1010ait-1cp1 # SKOV IMX6 CPU QuadCore mi1010ait-1cp1
+ - solidrun,cubox-i/q # SolidRun Cubox-i Dual/Quad
+ - solidrun,hummingboard/q
+ - solidrun,hummingboard2/q
+ - solidrun,solidsense/q # SolidRun SolidSense Dual/Quad
+ - tbs,imx6q-tbs2910 # TBS2910 Matrix ARM mini PC
+ - technexion,imx6q-pico-dwarf # TechNexion i.MX6Q Pico-Dwarf
+ - technexion,imx6q-pico-hobbit # TechNexion i.MX6Q Pico-Hobbit
+ - technexion,imx6q-pico-nymph # TechNexion i.MX6Q Pico-Nymph
+ - technexion,imx6q-pico-pi # TechNexion i.MX6Q Pico-Pi
+ - technologic,imx6q-ts4900
+ - technologic,imx6q-ts7970
+ - udoo,imx6q-udoo # Udoo i.MX6 Quad Board
+ - uniwest,imx6q-evi # Uniwest Evi
+ - variscite,dt6customboard
+ - wand,imx6q-wandboard # Wandboard i.MX6 Quad Board
+ - ysoft,imx6q-yapp4-crux # i.MX6 Quad Y Soft IOTA Crux board
+ - ysoft,imx6q-yapp4-pegasus # i.MX6 Quad Y Soft IOTA Pegasus board
+ - zealz,imx6q-gk802 # Zealz GK802
+ - zii,imx6q-zii-rdu2 # ZII RDU2 Board
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Advantech DMS-BA16 Boards
+ items:
+ - enum:
+ - advantech,imx6q-dms-ba16 # Advantech DMS-BA16
+ - ge,imx6q-b450v3 # General Electric B450v3
+ - ge,imx6q-b650v3 # General Electric B650v3
+ - ge,imx6q-b850v3 # General Electric B850v3
+ - const: advantech,imx6q-ba16
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Armadeus APF6 Boards
+ items:
+ - const: armadeus,imx6q-apf6dev
+ - const: armadeus,imx6q-apf6
+ - const: fsl,imx6q
+
+ - description: i.MX6Q CompuLab Utilite Pro Board
+ items:
+ - const: compulab,utilite-pro
+ - const: compulab,cm-fx6
+ - const: fsl,imx6q
+
+ - description: i.MX6Q DFI FS700-M60-6QD Board
+ items:
+ - const: dfi,fs700-m60-6qd
+ - const: dfi,fs700e-m60
+ - const: fsl,imx6q
+
+ - description: i.MX6Q DHCOM Premium Developer Kit Board
+ items:
+ - const: dh,imx6q-dhcom-pdk2
+ - const: dh,imx6q-dhcom-som
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Gateworks Ventana Boards
+ items:
+ - enum:
+ - gw,imx6q-gw51xx
+ - gw,imx6q-gw52xx
+ - gw,imx6q-gw53xx
+ - gw,imx6q-gw5400-a
+ - gw,imx6q-gw54xx
+ - gw,imx6q-gw551x
+ - gw,imx6q-gw552x
+ - gw,imx6q-gw553x
+ - gw,imx6q-gw560x
+ - gw,imx6q-gw5903
+ - gw,imx6q-gw5904
+ - gw,imx6q-gw5907
+ - gw,imx6q-gw5910
+ - gw,imx6q-gw5912
+ - gw,imx6q-gw5913
+ - const: gw,ventana
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Kontron SMARC-sAMX6i on SMARC Eval Carrier 2.0
+ items:
+ - const: kontron,imx6q-samx6i-ads2
+ - const: kontron,imx6q-samx6i
+ - const: fsl,imx6q
+
+ - description: i.MX6Q PHYTEC phyBOARD-Mira
+ items:
+ - enum:
+ - phytec,imx6q-pbac06-emmc # PHYTEC phyBOARD-Mira eMMC RDK
+ - phytec,imx6q-pbac06-nand # PHYTEC phyBOARD-Mira NAND RDK
+ - const: phytec,imx6q-pbac06 # PHYTEC phyBOARD-Mira
+ - const: phytec,imx6qdl-pcm058 # PHYTEC phyCORE-i.MX6
+ - const: fsl,imx6q
+
+ - description: i.MX6Q PHYTEC phyFLEX-i.MX6
+ items:
+ - enum:
+ - comvetia,imx6q-lxr # Comvetia LXR board
+ - phytec,imx6q-pbab01 # PHYTEC phyFLEX carrier board
+ - const: phytec,imx6q-pfla02 # PHYTEC phyFLEX-i.MX6 Quad
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Boards with Toradex Apalis iMX6Q/D Modules
+ items:
+ - enum:
+ - toradex,apalis_imx6q-ixora # Apalis iMX6Q/D Module on Ixora Carrier Board
+ - toradex,apalis_imx6q-ixora-v1.1 # Apalis iMX6Q/D Module on Ixora V1.1 Carrier Board
+ - toradex,apalis_imx6q-ixora-v1.2 # Apalis iMX6Q/D Module on Ixora V1.2 Carrier Board
+ - toradex,apalis_imx6q-eval # Apalis iMX6Q/D Module on Apalis Evaluation Board v1.0/v1.1
+ - toradex,apalis_imx6q-eval-v1.2 # Apalis iMX6Q/D Module on Apalis Evaluation Board v1.2
+ - const: toradex,apalis_imx6q
+ - const: fsl,imx6q
+
+ - description: i.MX6Q Variscite VAR-SOM-MX6 Boards
+ items:
+ - const: variscite,mx6customboard
+ - const: variscite,var-som-imx6q
+ - const: fsl,imx6q
+
+ - description: TQ-Systems TQMa6Q SoM (variant A) on MBa6x
+ items:
+ - const: tq,imx6q-mba6x-a
+ - const: tq,mba6a # Expected by bootloader, to be removed in the future
+ - const: tq,imx6q-tqma6q-a
+ - const: fsl,imx6q
+
+ - description: TQ-Systems TQMa6Q SoM (variant B) on MBa6x
+ items:
+ - const: tq,imx6q-mba6x-b
+ - const: tq,mba6b # Expected by bootloader, to be removed in the future
+ - const: tq,imx6q-tqma6q-b
+ - const: fsl,imx6q
+
+ - description: i.MX6QP based Boards
+ items:
+ - enum:
+ - boundary,imx6qp-nitrogen6_max
+ - boundary,imx6qp-nitrogen6_som2
+ - fsl,imx6qp-sabreauto # i.MX6 Quad Plus SABRE Automotive Board
+ - fsl,imx6qp-sabresd # i.MX6 Quad Plus SABRE Smart Device Board
+ - karo,imx6qp-tx6qp # Ka-Ro electronics TX6QP-8037 Module
+ - kvg,vicutp # Kverneland UT1P board
+ - prt,prtwd3 # Protonic WD3 board
+ - wand,imx6qp-wandboard # Wandboard i.MX6 QuadPlus Board
+ - ysoft,imx6qp-yapp4-crux-plus # i.MX6 Quad Plus Y Soft IOTA Crux+ board
+ - ysoft,imx6qp-yapp4-pegasus-plus # i.MX6 Quad Plus Y Soft IOTA Pegasus+ board
+ - zii,imx6qp-zii-rdu2 # ZII RDU2+ Board
+ - const: fsl,imx6qp
+
+ - description: i.MX6QP PHYTEC phyBOARD-Mira
+ items:
+ - const: phytec,imx6qp-pbac06-nand
+ - const: phytec,imx6qp-pbac06 # PHYTEC phyBOARD-Mira
+ - const: phytec,imx6qdl-pcm058 # PHYTEC phyCORE-i.MX6
+ - const: fsl,imx6qp
+
+ - description: TQ-Systems TQMa6QP SoM on MBa6x
+ items:
+ - const: tq,imx6qp-mba6x-b
+ - const: tq,mba6b # Expected by bootloader, to be removed in the future
+ - const: tq,imx6qp-tqma6qp-b
+ - const: fsl,imx6qp
+
+ - description: i.MX6DL based Boards
+ items:
+ - enum:
+ - abb,aristainetos-imx6dl-4 # aristainetos i.MX6 Dual Lite Board 4
+ - abb,aristainetos-imx6dl-7 # aristainetos i.MX6 Dual Lite Board 7
+ - abb,aristainetos2-imx6dl-4 # aristainetos2 i.MX6 Dual Lite Board 4
+ - abb,aristainetos2-imx6dl-7 # aristainetos2 i.MX6 Dual Lite Board 7
+ - alt,alti6p # Altesco I6P Board
+ - boundary,imx6dl-nit6xlite # Boundary Devices Nitrogen6 Lite
+ - boundary,imx6dl-nitrogen6x # Boundary Devices Nitrogen6x
+ - bticino,imx6dl-mamoj # BTicino i.MX6DL Mamoj
+ - eckelmann,imx6dl-ci4x10
+ - emtrion,emcon-mx6 # emCON-MX6S or emCON-MX6DL SoM
+ - emtrion,emcon-mx6-avari # emCON-MX6S or emCON-MX6DL SoM on Avari Base
+ - engicam,imx6-icore # Engicam i.CoreM6 Starter Kit
+ - engicam,imx6-icore-rqs # Engicam i.CoreM6 RQS Starter Kit
+ - fsl,imx6dl-sabreauto # i.MX6 DualLite/Solo SABRE Automotive Board
+ - fsl,imx6dl-sabrelite # i.MX6 DualLite SABRE Lite Board
+ - fsl,imx6dl-sabresd # i.MX6 DualLite SABRE Smart Device Board
+ - karo,imx6dl-tx6dl # Ka-Ro electronics TX6U Modules
+ - kontron,imx6dl-samx6i # Kontron i.MX6 Solo SMARC Module
+ - kvg,victgo # Kverneland TGO
+ - kvg,vicut1 # Kverneland UT1 board
+ - ply,plybas # Plymovent BAS board
+ - ply,plym2m # Plymovent M2M board
+ - poslab,imx6dl-savageboard # Poslab SavageBoard Dual
+ - prt,prtmvt # Protonic MVT board
+ - prt,prtrvt # Protonic RVT board
+ - prt,prtvt7 # Protonic VT7 board
+ - rex,imx6dl-rex-basic # Rex Basic i.MX6 Dual Lite Board
+ - riot,imx6s-riotboard # RIoTboard i.MX6S
+ - sielaff,imx6dl-board # Sielaff i.MX6 Solo Board
+ - skov,imx6dl-skov-revc-lt2 # SKOV IMX6 CPU SoloCore lt2
+ - skov,imx6dl-skov-revc-lt6 # SKOV IMX6 CPU SoloCore lt6
+ - solidrun,cubox-i/dl # SolidRun Cubox-i Solo/DualLite
+ - solidrun,hummingboard/dl
+ - solidrun,hummingboard2/dl # SolidRun HummingBoard2 Solo/DualLite
+ - solidrun,solidsense/dl # SolidRun SolidSense Solo/DualLite
+ - technexion,imx6dl-pico-dwarf # TechNexion i.MX6DL Pico-Dwarf
+ - technexion,imx6dl-pico-hobbit # TechNexion i.MX6DL Pico-Hobbit
+ - technexion,imx6dl-pico-nymph # TechNexion i.MX6DL Pico-Nymph
+ - technexion,imx6dl-pico-pi # TechNexion i.MX6DL Pico-Pi
+ - technologic,imx6dl-ts4900
+ - technologic,imx6dl-ts7970
+ - udoo,imx6dl-udoo # Udoo i.MX6 Dual-lite Board
+ - vdl,lanmcu # Van der Laan LANMCU board
+ - wand,imx6dl-wandboard # Wandboard i.MX6 Dual Lite Board
+ - ysoft,imx6dl-yapp4-draco # i.MX6 Solo Y Soft IOTA Draco board
+ - ysoft,imx6dl-yapp4-hydra # i.MX6 DualLite Y Soft IOTA Hydra board
+ - ysoft,imx6dl-yapp4-lynx # i.MX6 DualLite Y Soft IOTA Lynx board
+ - ysoft,imx6dl-yapp4-orion # i.MX6 DualLite Y Soft IOTA Orion board
+ - ysoft,imx6dl-yapp4-phoenix # i.MX6 DualLite Y Soft IOTA Phoenix board
+ - ysoft,imx6dl-yapp4-ursa # i.MX6 Solo Y Soft IOTA Ursa board
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL based Armadeus AFP6 Board
+ items:
+ - const: armadeus,imx6dl-apf6dev
+ - const: armadeus,imx6dl-apf6 # APF6 (Solo) SoM
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL based congatec QMX6 Boards
+ items:
+ - enum:
+ - ge,imx6dl-b105v2 # General Electric B105v2
+ - ge,imx6dl-b105pv2 # General Electric B105Pv2
+ - ge,imx6dl-b125v2 # General Electric B125v2
+ - ge,imx6dl-b125pv2 # General Electric B125Pv2
+ - ge,imx6dl-b155v2 # General Electric B155v2
+ - const: congatec,qmx6
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL based DFI FS700-M60-6DL Board
+ items:
+ - const: dfi,fs700-m60-6dl
+ - const: dfi,fs700e-m60
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL DHCOM based Boards
+ items:
+ - enum:
+ - dh,imx6dl-dhcom-pdk2 # i.MX6DL DHCOM SoM on PDK2 board
+ - dh,imx6dl-dhcom-picoitx # i.MX6DL DHCOM SoM on PicoITX board
+ - const: dh,imx6dl-dhcom-som
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL Gateworks Ventana Boards
+ items:
+ - enum:
+ - gw,imx6dl-gw51xx
+ - gw,imx6dl-gw52xx
+ - gw,imx6dl-gw53xx
+ - gw,imx6dl-gw54xx
+ - gw,imx6dl-gw551x
+ - gw,imx6dl-gw552x
+ - gw,imx6dl-gw553x
+ - gw,imx6dl-gw560x
+ - gw,imx6dl-gw5903
+ - gw,imx6dl-gw5904
+ - gw,imx6dl-gw5907
+ - gw,imx6dl-gw5910
+ - gw,imx6dl-gw5912
+ - gw,imx6dl-gw5913
+ - const: gw,ventana
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL Kontron SMARC-sAMX6i on SMARC Eval Carrier 2.0
+ items:
+ - const: kontron,imx6dl-samx6i-ads2
+ - const: kontron,imx6dl-samx6i
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL PHYTEC phyBOARD-Mira
+ items:
+ - enum:
+ - phytec,imx6dl-pbac06-emmc # PHYTEC phyBOARD-Mira eMMC RDK
+ - phytec,imx6dl-pbac06-nand # PHYTEC phyBOARD-Mira NAND RDK
+ - const: phytec,imx6dl-pbac06 # PHYTEC phyBOARD-Mira
+ - const: phytec,imx6qdl-pcm058 # PHYTEC phyCORE-i.MX6
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL PHYTEC phyFLEX-i.MX6
+ items:
+ - const: phytec,imx6dl-pbab01 # PHYTEC phyFLEX carrier board
+ - const: phytec,imx6dl-pfla02 # PHYTEC phyFLEX-i.MX6 Quad
+ - const: fsl,imx6dl
+
+ - description: i.MX6DL Boards with Toradex Colibri iMX6DL/S Modules
+ items:
+ - enum:
+ - toradex,colibri_imx6dl-aster # Colibri iMX6DL/S Module on Aster Board
+ - toradex,colibri_imx6dl-eval-v3 # Colibri iMX6DL/S Module on Colibri Evaluation Board V3
+ - toradex,colibri_imx6dl-iris # Colibri iMX6DL/S Module on Iris Board
+ - toradex,colibri_imx6dl-iris-v2 # Colibri iMX6DL/S Module on Iris Board V2
+ - const: toradex,colibri_imx6dl # Colibri iMX6DL/S Module
+ - const: fsl,imx6dl
+
+ - description: i.MX6S DHCOM DRC02 Board
+ items:
+ - const: dh,imx6s-dhcom-drc02
+ - const: dh,imx6s-dhcom-som
+ - const: fsl,imx6dl
+
+ - description: TQ-Systems TQMa6DL SoM (variant A) on MBa6x
+ items:
+ - const: tq,imx6dl-mba6x-a
+ - const: tq,mba6a # Expected by bootloader, to be removed in the future
+ - const: tq,imx6dl-tqma6dl-a
+ - const: fsl,imx6dl
+
+ - description: TQ-Systems TQMa6DL SoM (variant B) on MBa6x
+ items:
+ - const: tq,imx6dl-mba6x-b
+ - const: tq,mba6b # Expected by bootloader, to be removed in the future
+ - const: tq,imx6dl-tqma6dl-b
+ - const: fsl,imx6dl
+
+ - description: i.MX6SL based Boards
+ items:
+ - enum:
+ - fsl,imx6sl-evk # i.MX6 SoloLite EVK Board
+ - kobo,aura2
+ - kobo,tolino-shine2hd
+ - kobo,tolino-shine3
+ - kobo,tolino-vision
+ - kobo,tolino-vision5
+ - revotics,imx6sl-warp # Revotics WaRP Board
+ - const: fsl,imx6sl
+
+ - description: i.MX6SLL based Boards
+ items:
+ - enum:
+ - fsl,imx6sll-evk
+ - kobo,clarahd
+ - kobo,librah2o
+ - const: fsl,imx6sll
+
+ - description: i.MX6SLL Kobo Clara 2e Rev. A/B
+ items:
+ - enum:
+ - kobo,clara2e-a
+ - kobo,clara2e-b
+ - const: kobo,clara2e
+ - const: fsl,imx6sll
+
+ - description: i.MX6SX based Boards
+ items:
+ - enum:
+ - boundary,imx6sx-nitrogen6sx
+ - fsl,imx6sx-sabreauto # i.MX6 SoloX Sabre Auto Board
+ - fsl,imx6sx-sdb # i.MX6 SoloX SDB Board
+ - fsl,imx6sx-sdb-reva # i.MX6 SoloX SDB Rev-A Board
+ - samtec,imx6sx-vining-2000 # Softing VIN|ING 2000 Board
+ - udoo,neobasic # UDOO Neo Basic Board
+ - udoo,neoextended # UDOO Neo Extended
+ - udoo,neofull # UDOO Neo Full
+ - const: fsl,imx6sx
+
+ - description: i.MX6UL based Boards
+ items:
+ - enum:
+ - engicam,imx6ul-geam # Engicam GEAM6UL Starter Kit
+ - engicam,imx6ul-isiot # Engicam Is.IoT MX6UL eMMC/NAND Starter kit
+ - fsl,imx6ul-14x14-evk # i.MX6 UltraLite 14x14 EVK Board
+ - karo,imx6ul-tx6ul # Ka-Ro electronics TXUL-0010 Module
+ - kontron,sl-imx6ul # Kontron SL i.MX6UL SoM
+ - prt,prti6g # Protonic PRTI6G Board
+ - technexion,imx6ul-pico-dwarf # TechNexion i.MX6UL Pico-Dwarf
+ - technexion,imx6ul-pico-hobbit # TechNexion i.MX6UL Pico-Hobbit
+ - technexion,imx6ul-pico-pi # TechNexion i.MX6UL Pico-Pi
+ - const: fsl,imx6ul
+
+ - description: i.MX6UL Armadeus Systems OPOS6UL SoM Board
+ items:
+ - const: armadeus,imx6ul-opos6uldev # OPOS6UL (i.MX6UL) SoM on OPOS6ULDev board
+ - const: armadeus,imx6ul-opos6ul # OPOS6UL (i.MX6UL) SoM
+ - const: fsl,imx6ul
+
+ - description: i.MX6UL Digi International ConnectCore 6UL Boards
+ items:
+ - enum:
+ - digi,ccimx6ulsbcexpress # Digi International ConnectCore 6UL SBC Express
+ - digi,ccimx6ulsbcpro # Digi International ConnectCore 6UL SBC Pro
+ - const: digi,ccimx6ulsom
+ - const: fsl,imx6ul
+
+ - description: i.MX6UL Grinn liteBoard
+ items:
+ - const: grinn,imx6ul-liteboard
+ - const: grinn,imx6ul-litesom
+ - const: fsl,imx6ul
+
+ - description: i.MX6UL PHYTEC phyBOARD-Segin
+ items:
+ - enum:
+ - phytec,imx6ul-pbacd10-emmc
+ - phytec,imx6ul-pbacd10-nand
+ - const: phytec,imx6ul-pbacd10 # PHYTEC phyBOARD-Segin with i.MX6 UL
+ - const: phytec,imx6ul-pcl063 # PHYTEC phyCORE-i.MX 6UL
+ - const: fsl,imx6ul
+
+ - description: i.MX6UL Variscite VAR-SOM-MX6 Boards
+ items:
+ - const: variscite,mx6ulconcerto
+ - const: variscite,var-som-imx6ul
+ - const: fsl,imx6ul
+
+ - description: Kontron BL i.MX6UL (N631X S) Board
+ items:
+ - const: kontron,bl-imx6ul # Kontron BL i.MX6UL Carrier Board
+ - const: kontron,sl-imx6ul # Kontron SL i.MX6UL SoM
+ - const: fsl,imx6ul
+
+ - description: Kontron BL i.MX6UL 43 (N631X S 43) Board
+ items:
+ - const: kontron,bl-imx6ul-43 # Kontron BL i.MX6UL Carrier Board with 4.3" Display
+ - const: kontron,bl-imx6ul # Kontron BL i.MX6UL Carrier Board
+ - const: kontron,sl-imx6ul # Kontron SL i.MX6UL SoM
+ - const: fsl,imx6ul
+
+ - description: TQ-Systems TQMa6UL1 SoM on MBa6ULx board
+ items:
+ - enum:
+ - tq,imx6ul-tqma6ul1-mba6ulx
+ - const: tq,imx6ul-tqma6ul1 # MCIMX6G1
+ - const: fsl,imx6ul
+
+ - description: TQ-Systems TQMa6UL2 SoM on MBa6ULx board
+ items:
+ - enum:
+ - tq,imx6ul-tqma6ul2-mba6ulx
+ - const: tq,imx6ul-tqma6ul2 # MCIMX6G2
+ - const: fsl,imx6ul
+
+ - description: TQ-Systems TQMa6ULxL SoM on MBa6ULx[L] board
+ items:
+ - enum:
+ - tq,imx6ul-tqma6ul2l-mba6ulx # using LGA adapter
+ - tq,imx6ul-tqma6ul2l-mba6ulxl
+ - const: tq,imx6ul-tqma6ul2l # MCIMX6G2, LGA SoM variant
+ - const: fsl,imx6ul
+
+ - description: i.MX6ULL based Boards
+ items:
+ - enum:
+ - fsl,imx6ull-14x14-evk # i.MX6 UltraLiteLite 14x14 EVK Board
+ - joz,jozacp # JOZ Access Point
+ - kontron,sl-imx6ull # Kontron SL i.MX6ULL SoM
+ - myir,imx6ull-mys-6ulx-eval # MYiR Tech iMX6ULL Evaluation Board
+ - uni-t,uti260b # UNI-T UTi260B Thermal Camera
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL Armadeus Systems OPOS6ULDev Board
+ items:
+ - const: armadeus,imx6ull-opos6uldev # OPOS6UL (i.MX6ULL) SoM on OPOS6ULDev board
+ - const: armadeus,imx6ull-opos6ul # OPOS6UL (i.MX6ULL) SoM
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL chargebyte Tarragon Boards
+ items:
+ - enum:
+ - chargebyte,imx6ull-tarragon-master
+ - chargebyte,imx6ull-tarragon-micro
+ - chargebyte,imx6ull-tarragon-slave
+ - chargebyte,imx6ull-tarragon-slavext
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL DHCOM SoM based Boards
+ items:
+ - enum:
+ - dh,imx6ull-dhcom-drc02
+ - dh,imx6ull-dhcom-pdk2
+ - dh,imx6ull-dhcom-picoitx
+ - const: dh,imx6ull-dhcom-som # The DHCOR is soldered on the DHCOM
+ - const: dh,imx6ull-dhcor-som
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL DHCOR SoM based Boards
+ items:
+ - const: marantec,imx6ull-dhcor-maveo-box
+ - const: dh,imx6ull-dhcor-som
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL Engicam MicroGEA SoM based boards
+ items:
+ - enum:
+ - engicam,microgea-imx6ull-bmm # i.MX6ULL Engicam MicroGEA BMM Board
+ - engicam,microgea-imx6ull-gtw # i.MX6ULL Engicam MicroGEA GTW Board
+ - engicam,microgea-imx6ull-rmm # i.MX6ULL Engicam MicroGEA RMM Board
+ - const: engicam,microgea-imx6ull # i.MX6ULL Engicam MicroGEA SoM
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL PHYTEC phyBOARD-Segin
+ items:
+ - enum:
+ - phytec,imx6ull-pbacd10-emmc
+ - phytec,imx6ull-pbacd10-nand
+ - const: phytec,imx6ull-pbacd10 # PHYTEC phyBOARD-Segin with i.MX6 ULL
+ - const: phytec,imx6ull-pcl063 # PHYTEC phyCORE-i.MX 6ULL
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL PHYTEC phyGATE-Tauri
+ items:
+ - enum:
+ - phytec,imx6ull-phygate-tauri-emmc
+ - phytec,imx6ull-phygate-tauri-nand
+ - const: phytec,imx6ull-phygate-tauri # PHYTEC phyGATE-Tauri with i.MX6 ULL
+ - const: phytec,imx6ull-pcl063 # PHYTEC phyCORE-i.MX 6ULL
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL Boards with Toradex Colibri iMX6ULL Modules
+ items:
+ - enum:
+ - toradex,colibri-imx6ull-aster # Aster Carrier Board
+ - toradex,colibri-imx6ull-eval # Colibri Evaluation Board V3
+ - toradex,colibri-imx6ull-iris # Iris Carrier Board
+ - toradex,colibri-imx6ull-iris-v2 # Iris V2 Carrier Board
+ - const: toradex,colibri-imx6ull # Colibri iMX6ULL Module
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL Boards with Toradex Colibri iMX6ULL 1GB (eMMC) Module
+ items:
+ - enum:
+ - toradex,colibri-imx6ull-emmc-aster # Aster Carrier Board
+ - toradex,colibri-imx6ull-emmc-eval # Colibri Evaluation B. V3
+ - toradex,colibri-imx6ull-emmc-iris # Iris Carrier Board
+ - toradex,colibri-imx6ull-emmc-iris-v2 # Iris V2 Carrier Board
+ - const: toradex,colibri-imx6ull-emmc # Colibri iMX6ULL 1GB (eMMC) Module
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULL Boards with Toradex Colibri iMX6ULL Wi-Fi / BT Modules
+ items:
+ - enum:
+ - toradex,colibri-imx6ull-wifi-eval # Colibri Eval. B. V3
+ - toradex,colibri-imx6ull-wifi-aster # Aster Carrier Board
+ - toradex,colibri-imx6ull-wifi-iris # Iris Carrier Board
+ - toradex,colibri-imx6ull-wifi-iris-v2 # Iris V2 Carrier Board
+ - const: toradex,colibri-imx6ull-wifi # Colibri iMX6ULL Wi-Fi / BT Module
+ - const: fsl,imx6ull
+
+ - description: Kontron BL i.MX6ULL (N6411 S) Board
+ items:
+ - const: kontron,bl-imx6ull # Kontron BL i.MX6ULL Carrier Board
+ - const: kontron,sl-imx6ull # Kontron SL i.MX6ULL SoM
+ - const: fsl,imx6ull
+
+ - description: TQ-Systems TQMa6ULLx SoM on MBa6ULx board
+ items:
+ - enum:
+ - tq,imx6ull-tqma6ull2-mba6ulx # TQMa6ULL socketable SoM with MCIMX6Y2 on MBa6ULx EVK
+ - const: tq,imx6ull-tqma6ull2 # TQMa6ULL socketable SoM with MCIMX6Y2
+ - const: fsl,imx6ull
+
+ - description: TQ-Systems TQMa6ULLxL SoM on MBa6ULx[L] board
+ items:
+ - enum:
+ - tq,imx6ull-tqma6ull2l-mba6ulx # TQMa6ULLxL LGA SoM with socketable Adapter on MBa6ULx EVK
+ - tq,imx6ull-tqma6ull2l-mba6ulxl # TQMa6ULLxL LGA SoM on MBa6ULxL gateway board
+ - const: tq,imx6ull-tqma6ull2l # TQMa6ULLxL LGA SoM with MCIMX6Y2
+ - const: fsl,imx6ull
+
+ - description: Seeed Stuido i.MX6ULL SoM on dev boards
+ items:
+ - enum:
+ - seeed,imx6ull-seeed-npi-emmc
+ - seeed,imx6ull-seeed-npi-nand
+ - const: seeed,imx6ull-seeed-npi
+ - const: fsl,imx6ull
+
+ - description: i.MX6ULZ based Boards
+ items:
+ - enum:
+ - bsh,imx6ulz-bsh-smm-m2 # i.MX6 ULZ BSH SystemMaster
+ - fsl,imx6ulz-14x14-evk # i.MX6 ULZ 14x14 EVK Board
+ - const: fsl,imx6ull # This seems odd. Should be last?
+ - const: fsl,imx6ulz
+
+ - description: i.MX7S based Boards
+ items:
+ - enum:
+ - element14,imx7s-warp # Element14 Warp i.MX7 Board
+ - toradex,colibri-imx7s # Colibri iMX7S Module
+ - const: fsl,imx7s
+
+ - description: i.MX7S Boards with Toradex Colibri iMX7S Module
+ items:
+ - enum:
+ - toradex,colibri-imx7s-aster # Module on Aster Carrier Board
+ - toradex,colibri-imx7s-eval-v3 # Module on Colibri Evaluation Board V3
+ - toradex,colibri-imx7s-iris # Module on Iris Carrier Board
+ - toradex,colibri-imx7s-iris-v2 # Module on Iris Carrier Board V2
+ - const: toradex,colibri-imx7s
+ - const: fsl,imx7s
+
+ - description: TQ-Systems TQMa7S SoM on MBa7x board
+ items:
+ - const: tq,imx7s-mba7
+ - const: tq,imx7s-tqma7
+ - const: fsl,imx7s
+
+ - description: i.MX7D based Boards
+ items:
+ - enum:
+ - boundary,imx7d-nitrogen7
+ - compulab,cl-som-imx7 # CompuLab CL-SOM-iMX7
+ - fsl,imx7d-sdb # i.MX7 SabreSD Board
+ - fsl,imx7d-sdb-reva # i.MX7 SabreSD Rev-A Board
+ - kam,imx7d-flex-concentrator # Kamstrup OMNIA Flex Concentrator
+ - kam,imx7d-flex-concentrator-mfg # Kamstrup OMNIA Flex Concentrator in manufacturing mode
+ - novtech,imx7d-meerkat96 # i.MX7 Meerkat96 Board
+ - remarkable,imx7d-remarkable2 # i.MX7D ReMarkable 2 E-Ink Tablet
+ - storopack,imx7d-smegw01 # Storopack i.MX7D SMEGW01
+ - technexion,imx7d-pico-dwarf # TechNexion i.MX7D Pico-Dwarf
+ - technexion,imx7d-pico-hobbit # TechNexion i.MX7D Pico-Hobbit
+ - technexion,imx7d-pico-nymph # TechNexion i.MX7D Pico-Nymph
+ - technexion,imx7d-pico-pi # TechNexion i.MX7D Pico-Pi
+ - zii,imx7d-rmu2 # ZII RMU2 Board
+ - zii,imx7d-rpu2 # ZII RPU2 Board
+ - const: fsl,imx7d
+
+ - description: TQ-Systems TQMa7D SoM on MBa7x board
+ items:
+ - const: tq,imx7d-mba7
+ - const: tq,imx7d-tqma7
+ - const: fsl,imx7d
+
+ - description:
+ Compulab SBC-iMX7 is a single board computer based on the
+ Freescale i.MX7 system-on-chip. SBC-iMX7 is implemented with
+ the CL-SOM-iMX7 System-on-Module providing most of the functions,
+ and SB-SOM-iMX7 carrier board providing additional peripheral
+ functions and connectors.
+ items:
+ - const: compulab,sbc-imx7
+ - const: compulab,cl-som-imx7
+ - const: fsl,imx7d
+
+ - description: i.MX7D Boards with Toradex Colibri i.MX7D Module
+ items:
+ - enum:
+ - toradex,colibri-imx7d-aster # Aster Carrier Board
+ - toradex,colibri-imx7d-eval-v3 # Colibri Evaluation Board V3
+ - toradex,colibri-imx7d-iris # Iris Carrier Board
+ - toradex,colibri-imx7d-iris-v2 # Iris Carrier Board V2
+ - const: toradex,colibri-imx7d
+ - const: fsl,imx7d
+
+ - description: i.MX7D Boards with Toradex Colibri i.MX7D 1GB (eMMC) Module
+ items:
+ - enum:
+ - toradex,colibri-imx7d-emmc-aster # Module on Aster Carrier Board
+ - toradex,colibri-imx7d-emmc-eval-v3 # Module on Colibri Evaluation Board V3
+ - toradex,colibri-imx7d-emmc-iris # Module on Iris Carrier Board
+ - toradex,colibri-imx7d-emmc-iris-v2 # Module on Iris Carrier Board V2
+ - const: toradex,colibri-imx7d-emmc
+ - const: fsl,imx7d
+
+ - description: i.MX7ULP based Boards
+ items:
+ - enum:
+ - ea,imx7ulp-com # i.MX7ULP Embedded Artists COM Board
+ - fsl,imx7ulp-evk # i.MX7ULP Evaluation Kit
+ - const: fsl,imx7ulp
+
+ - description: i.MX8MM based Boards
+ items:
+ - enum:
+ - beacon,imx8mm-beacon-kit # i.MX8MM Beacon Development Kit
+ - boundary,imx8mm-nitrogen8mm # i.MX8MM Nitrogen Board
+ - dmo,imx8mm-data-modul-edm-sbc # i.MX8MM eDM SBC
+ - emtrion,emcon-mx8mm-avari # emCON-MX8MM SoM on Avari Base
+ - fsl,imx8mm-ddr4-evk # i.MX8MM DDR4 EVK Board
+ - fsl,imx8mm-evk # i.MX8MM EVK Board
+ - fsl,imx8mm-evkb # i.MX8MM EVKB Board
+ - gateworks,imx8mm-gw75xx-0x # i.MX8MM Gateworks Board
+ - gateworks,imx8mm-gw7904
+ - gw,imx8mm-gw71xx-0x # i.MX8MM Gateworks Development Kit
+ - gw,imx8mm-gw72xx-0x # i.MX8MM Gateworks Development Kit
+ - gw,imx8mm-gw73xx-0x # i.MX8MM Gateworks Development Kit
+ - gw,imx8mm-gw7901 # i.MX8MM Gateworks Board
+ - gw,imx8mm-gw7902 # i.MX8MM Gateworks Board
+ - gw,imx8mm-gw7903 # i.MX8MM Gateworks Board
+ - innocomm,wb15-evk # i.MX8MM Innocomm EVK board with WB15 SoM
+ - kontron,imx8mm-sl # i.MX8MM Kontron SL (N801X) SOM
+ - kontron,imx8mm-osm-s # i.MX8MM Kontron OSM-S (N802X) SOM
+ - prt,prt8mm # i.MX8MM Protonic PRT8MM Board
+ - const: fsl,imx8mm
+
+ - description: Compulab i.MX8MM UCM SoM based boards
+ items:
+ - enum:
+ - compulab,imx8mm-iot-gateway # i.MX8MM Compulab IoT-Gateway
+ - const: compulab,imx8mm-ucm-som # i.MX8MM Compulab UCM SoM
+ - const: fsl,imx8mm
+
+ - description: Emtop i.MX8MM based Boards
+ items:
+ - const: ees,imx8mm-emtop-baseboard # i.MX8MM Emtop SoM on i.MX8M Mini Baseboard V1
+ - const: ees,imx8mm-emtop-som # i.MX8MM Emtop SOM-IMX8MMLPD4 module
+ - const: fsl,imx8mm
+
+ - description: Engicam i.Core MX8M Mini SoM based boards
+ items:
+ - enum:
+ - engicam,icore-mx8mm-ctouch2 # i.MX8MM Engicam i.Core MX8M Mini C.TOUCH 2.0
+ - engicam,icore-mx8mm-edimm2.2 # i.MX8MM Engicam i.Core MX8M Mini EDIMM2.2 Starter Kit
+ - const: engicam,icore-mx8mm # i.MX8MM Engicam i.Core MX8M Mini SoM
+ - const: fsl,imx8mm
+
+ - description: Kontron BL i.MX8MM (N801X S) Board
+ items:
+ - const: kontron,imx8mm-bl
+ - const: kontron,imx8mm-sl
+ - const: fsl,imx8mm
+
+ - description: Kontron BL i.MX8MM OSM-S (N802X S) Board
+ items:
+ - const: kontron,imx8mm-bl-osm-s
+ - const: kontron,imx8mm-osm-s
+ - const: fsl,imx8mm
+
+ - description: Toradex Boards with Verdin iMX8M Mini Modules
+ items:
+ - enum:
+ - menlo,mx8menlo # Verdin iMX8M Mini Module on i.MX8MM Menlo board
+ - toradex,verdin-imx8mm-nonwifi-dahlia # Verdin iMX8M Mini Module on Dahlia
+ - toradex,verdin-imx8mm-nonwifi-dev # Verdin iMX8M Mini Module on Verdin Development Board
+ - toradex,verdin-imx8mm-nonwifi-ivy # Verdin iMX8M Mini Module on Ivy
+ - toradex,verdin-imx8mm-nonwifi-mallow # Verdin iMX8M Mini Module on Mallow
+ - toradex,verdin-imx8mm-nonwifi-yavia # Verdin iMX8M Mini Module on Yavia
+ - const: toradex,verdin-imx8mm-nonwifi # Verdin iMX8M Mini Module without Wi-Fi / BT
+ - const: toradex,verdin-imx8mm # Verdin iMX8M Mini Module
+ - const: fsl,imx8mm
+
+ - description: Toradex Boards with Verdin iMX8M Mini Wi-Fi / BT Modules
+ items:
+ - enum:
+ - toradex,verdin-imx8mm-wifi-dahlia # Verdin iMX8M Mini Wi-Fi / BT Module on Dahlia
+ - toradex,verdin-imx8mm-wifi-dev # Verdin iMX8M Mini Wi-Fi / BT M. on Verdin Development B.
+ - toradex,verdin-imx8mm-wifi-ivy # Verdin iMX8M Mini Wi-Fi / BT Module on Ivy
+ - toradex,verdin-imx8mm-wifi-mallow # Verdin iMX8M Mini Wi-Fi / BT Module on Mallow
+ - toradex,verdin-imx8mm-wifi-yavia # Verdin iMX8M Mini Wi-Fi / BT Module on Yavia
+ - const: toradex,verdin-imx8mm-wifi # Verdin iMX8M Mini Wi-Fi / BT Module
+ - const: toradex,verdin-imx8mm # Verdin iMX8M Mini Module
+ - const: fsl,imx8mm
+
+ - description: PHYTEC phyCORE-i.MX8MM SoM based boards
+ items:
+ - enum:
+ - phytec,imx8mm-phyboard-polis-rdk # phyBOARD-Polis RDK
+ - phytec,imx8mm-phygate-tauri-l # phyGATE-Tauri-L Gateway
+ - const: phytec,imx8mm-phycore-som # phyCORE-i.MX8MM SoM
+ - const: fsl,imx8mm
+
+ - description: Variscite VAR-SOM-MX8MM based boards
+ items:
+ - const: variscite,var-som-mx8mm-symphony
+ - const: variscite,var-som-mx8mm
+ - const: fsl,imx8mm
+
+ - description:
+ TQMa8MxML is a series of SOM featuring NXP i.MX8MM system-on-chip
+ variants. It is designed to be soldered on different carrier boards.
+ All variants (TQMa8M[Q,D,S][L]ML) use the same device tree, hence only
+ one compatible is needed.
+ items:
+ - enum:
+ - cloos,imx8mm-phg # i.MX8MM Cloos PHG Board
+ - tq,imx8mm-tqma8mqml-mba8mx # TQ-Systems GmbH i.MX8MM TQMa8MQML SOM on MBa8Mx
+ - const: tq,imx8mm-tqma8mqml # TQ-Systems GmbH i.MX8MM TQMa8MQML SOM
+ - const: fsl,imx8mm
+
+ - description: i.MX8MN based Boards
+ items:
+ - enum:
+ - beacon,imx8mn-beacon-kit # i.MX8MN Beacon Development Kit
+ - bsh,imx8mn-bsh-smm-s2 # i.MX8MN BSH SystemMaster S2
+ - bsh,imx8mn-bsh-smm-s2pro # i.MX8MN BSH SystemMaster S2 PRO
+ - fsl,imx8mn-ddr3l-evk # i.MX8MN DDR3L EVK Board
+ - fsl,imx8mn-ddr4-evk # i.MX8MN DDR4 EVK Board
+ - fsl,imx8mn-evk # i.MX8MN LPDDR4 EVK Board
+ - gw,imx8mn-gw7902 # i.MX8MM Gateworks Board
+ - const: fsl,imx8mn
+
+ - description: Variscite VAR-SOM-MX8MN based boards
+ items:
+ - enum:
+ - dimonoff,gateway-evk # i.MX8MN Dimonoff Gateway EVK Board
+ - rve,gateway # i.MX8MN RVE Gateway Board
+ - variscite,var-som-mx8mn-symphony
+ - const: variscite,var-som-mx8mn
+ - const: fsl,imx8mn
+
+ - description:
+ TQMa8MxNL is a series of SOM featuring NXP i.MX8MN system-on-chip
+ variants. It is designed to be soldered on different carrier boards.
+ All variants (TQMa8M[Q,D,S][L]NL) use the same device tree, hence only
+ one compatible is needed.
+ items:
+ - enum:
+ - tq,imx8mn-tqma8mqnl-mba8mx # TQ-Systems GmbH i.MX8MN TQMa8MQNL SOM on MBa8Mx
+ - const: tq,imx8mn-tqma8mqnl # TQ-Systems GmbH i.MX8MN TQMa8MQNL SOM
+ - const: fsl,imx8mn
+
+ - description: i.MX8MP based Boards
+ items:
+ - enum:
+ - beacon,imx8mp-beacon-kit # i.MX8MP Beacon Development Kit
+ - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC
+ - emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit
+ - fsl,imx8mp-evk # i.MX8MP EVK Board
+ - fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board
+ - gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board
+ - gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board
+ - gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board
+ - gateworks,imx8mp-gw74xx # i.MX8MP Gateworks Board
+ - gateworks,imx8mp-gw75xx-2x # i.MX8MP Gateworks Board
+ - gateworks,imx8mp-gw82xx-2x # i.MX8MP Gateworks Board
+ - gocontroll,moduline-display # GOcontroll Moduline Display controller
+ - skov,imx8mp-skov-basic # SKOV i.MX8MP baseboard without frontplate
+ - skov,imx8mp-skov-revb-hdmi # SKOV i.MX8MP climate control without panel
+ - skov,imx8mp-skov-revb-lt6 # SKOV i.MX8MP climate control with 7” panel
+ - skov,imx8mp-skov-revb-mi1010ait-1cp1 # SKOV i.MX8MP climate control with 10.1" panel
+ - skov,imx8mp-skov-revc-bd500 # SKOV i.MX8MP climate control with LED frontplate
+ - skov,imx8mp-skov-revc-tian-g07017 # SKOV i.MX8MP climate control with 7" panel
+ - ultratronik,imx8mp-ultra-mach-sbc # Ultratronik SBC i.MX8MP based board
+ - ysoft,imx8mp-iota2-lumpy # Y Soft i.MX8MP IOTA2 Lumpy Board
+ - const: fsl,imx8mp
+
+ - description: ABB Boards with i.MX8M Plus Modules from ADLink
+ items:
+ - enum:
+ - abb,imx8mp-aristanetos3-adpismarc # i.MX8MP ABB SoM on PI SMARC Board
+ - abb,imx8mp-aristanetos3-helios # i.MX8MP ABB SoM on helios Board
+ - abb,imx8mp-aristanetos3-proton2s # i.MX8MP ABB SoM on proton2s Board
+ - const: abb,imx8mp-aristanetos3-som # i.MX8MP ABB SoM
+ - const: fsl,imx8mp
+
+ - description: Avnet (MSC Branded) Boards with SM2S i.MX8M Plus Modules
+ items:
+ - const: avnet,sm2s-imx8mp-14N0600E-ep1 # SM2S-IMX8PLUS-14N0600E on SM2-MB-EP1 Carrier Board
+ - const: avnet,sm2s-imx8mp-14N0600E # 14N0600E variant of SM2S-IMX8PLUS SoM
+ - const: avnet,sm2s-imx8mp # SM2S-IMX8PLUS SoM
+ - const: fsl,imx8mp
+
+ - description: Boundary Devices Nitrogen8M Plus ENC Carrier Board
+ items:
+ - const: boundary,imx8mp-nitrogen-enc-carrier-board
+ - const: boundary,imx8mp-nitrogen-som
+ - const: fsl,imx8mp
+
+ - description: Boundary Device Nitrogen8MP Universal SMARC Carrier Board
+ items:
+ - const: boundary,imx8mp-nitrogen-smarc-universal-board
+ - const: boundary,imx8mp-nitrogen-smarc-som
+ - const: fsl,imx8mp
+
+ - description: i.MX8MP DHCOM based Boards
+ items:
+ - enum:
+ - dh,imx8mp-dhcom-drc02 # i.MX8MP DHCOM SoM on DRC02 board
+ - dh,imx8mp-dhcom-pdk2 # i.MX8MP DHCOM SoM on PDK2 board
+ - dh,imx8mp-dhcom-pdk3 # i.MX8MP DHCOM SoM on PDK3 board
+ - dh,imx8mp-dhcom-picoitx # i.MX8MP DHCOM SoM on PicoITX board
+ - const: dh,imx8mp-dhcom-som # i.MX8MP DHCOM SoM
+ - const: fsl,imx8mp
+
+ - description: Engicam i.Core MX8M Plus SoM based boards
+ items:
+ - enum:
+ - engicam,icore-mx8mp-edimm2.2 # i.MX8MP Engicam i.Core MX8M Plus EDIMM2.2 Starter Kit
+ - const: engicam,icore-mx8mp # i.MX8MP Engicam i.Core MX8M Plus SoM
+ - const: fsl,imx8mp
+
+ - description: Kontron i.MX8MP OSM-S SoM based Boards
+ items:
+ - const: kontron,imx8mp-bl-osm-s # Kontron BL i.MX8MP OSM-S Board
+ - const: kontron,imx8mp-osm-s # Kontron i.MX8MP OSM-S SoM
+ - const: fsl,imx8mp
+
+ - description: Kontron i.MX8MP SMARC based Boards
+ items:
+ - const: kontron,imx8mp-smarc-eval-carrier # Kontron i.MX8MP SMARC Eval Carrier
+ - const: kontron,imx8mp-smarc # Kontron i.MX8MP SMARC Module
+ - const: kontron,imx8mp-osm-s # Kontron i.MX8MP OSM-S SoM
+ - const: fsl,imx8mp
+
+ - description: PHYTEC phyCORE-i.MX8MP FPSC based boards
+ items:
+ - enum:
+ - phytec,imx8mp-libra-rdk-fpsc # i.MX 8M Plus Libra RDK
+ - const: phytec,imx8mp-phycore-fpsc # phyCORE-i.MX 8M Plus FPSC
+ - const: fsl,imx8mp
+
+ - description: PHYTEC phyCORE-i.MX8MP SoM based boards
+ items:
+ - const: phytec,imx8mp-phyboard-pollux-rdk # phyBOARD-Pollux RDK
+ - const: phytec,imx8mp-phycore-som # phyCORE-i.MX8MP SoM
+ - const: fsl,imx8mp
+
+ - description: Polyhex DEBIX i.MX8MP based SBCs
+ items:
+ - enum:
+ - polyhex,imx8mp-debix-model-a # Polyhex Debix Model A Board
+ - const: polyhex,imx8mp-debix # Polyhex i.MX8MP Debix SBCs
+ - const: fsl,imx8mp
+
+ - description: Polyhex DEBIX i.MX8MP SOM A based boards
+ items:
+ - enum:
+ - polyhex,imx8mp-debix-som-a-bmb-08 # Polyhex Debix SOM A on SOM A I/O board
+ - const: polyhex,imx8mp-debix-som-a # Polyhex Debix SOM A
+ - const: fsl,imx8mp
+
+ - description: SolidRun i.MX8MP SoM based boards
+ items:
+ - enum:
+ - solidrun,imx8mp-cubox-m # SolidRun i.MX8MP SoM on CuBox-M
+ - solidrun,imx8mp-hummingboard-mate # SolidRun i.MX8MP SoM on HummingBoard Mate
+ - solidrun,imx8mp-hummingboard-pro # SolidRun i.MX8MP SoM on HummingBoard Pro
+ - solidrun,imx8mp-hummingboard-pulse # SolidRun i.MX8MP SoM on HummingBoard Pulse
+ - solidrun,imx8mp-hummingboard-ripple # SolidRun i.MX8MP SoM on HummingBoard Ripple
+ - const: solidrun,imx8mp-sr-som
+ - const: fsl,imx8mp
+
+ - description: TechNexion EDM-G-IMX8M-PLUS SoM based boards
+ items:
+ - enum:
+ - technexion,edm-g-imx8mp-wb # TechNexion EDM-G-IMX8MP SOM on WB-EDM-G
+ - const: technexion,edm-g-imx8mp # TechNexion EDM-G-IMX8MP SOM
+ - const: fsl,imx8mp
+
+ - description: Toradex Boards with SMARC iMX8M Plus Modules
+ items:
+ - const: toradex,smarc-imx8mp-dev # Toradex SMARC iMX8M Plus on Toradex SMARC Development Board
+ - const: toradex,smarc-imx8mp # Toradex SMARC iMX8M Plus Module
+ - const: fsl,imx8mp
+
+ - description: Toradex Boards with Verdin iMX8M Plus Modules
+ items:
+ - enum:
+ - toradex,verdin-imx8mp-nonwifi-dahlia # Verdin iMX8M Plus Module on Dahlia
+ - toradex,verdin-imx8mp-nonwifi-dev # Verdin iMX8M Plus Module on Verdin Development Board
+ - toradex,verdin-imx8mp-nonwifi-ivy # Verdin iMX8M Plus Module on Ivy
+ - toradex,verdin-imx8mp-nonwifi-mallow # Verdin iMX8M Plus Module on Mallow
+ - toradex,verdin-imx8mp-nonwifi-yavia # Verdin iMX8M Plus Module on Yavia
+ - const: toradex,verdin-imx8mp-nonwifi # Verdin iMX8M Plus Module without Wi-Fi / BT
+ - const: toradex,verdin-imx8mp # Verdin iMX8M Plus Module
+ - const: fsl,imx8mp
+
+ - description: Toradex Boards with Verdin iMX8M Plus Wi-Fi / BT Modules
+ items:
+ - enum:
+ - toradex,verdin-imx8mp-wifi-dahlia # Verdin iMX8M Plus Wi-Fi / BT Module on Dahlia
+ - toradex,verdin-imx8mp-wifi-dev # Verdin iMX8M Plus Wi-Fi / BT M. on Verdin Development B.
+ - toradex,verdin-imx8mp-wifi-ivy # Verdin iMX8M Plus Wi-Fi / BT Module on Ivy
+ - toradex,verdin-imx8mp-wifi-mallow # Verdin iMX8M Plus Wi-Fi / BT Module on Mallow
+ - toradex,verdin-imx8mp-wifi-yavia # Verdin iMX8M Plus Wi-Fi / BT Module on Yavia
+ - const: toradex,verdin-imx8mp-wifi # Verdin iMX8M Plus Wi-Fi / BT Module
+ - const: toradex,verdin-imx8mp # Verdin iMX8M Plus Module
+ - const: fsl,imx8mp
+
+ - description:
+ TQMa8MPxL is a series of LGA SOM featuring NXP i.MX8MP system-on-chip
+ variants. It is designed to be soldered on different carrier boards.
+ All CPU variants use the same device tree hence only one compatible
+ is needed. MBa8MPxL mainboard can be used as starterkit or in a boxed
+ version as an industrial computing device.
+ items:
+ - enum:
+ - tq,imx8mp-tqma8mpql-mba8mpxl # TQ-Systems GmbH i.MX8MP TQMa8MPQL SOM on MBa8MPxL
+ - tq,imx8mp-tqma8mpql-mba8mp-ras314 # TQ-Systems GmbH i.MX8MP TQMa8MPQL SOM on MBa8MP-RAS314
+ - const: tq,imx8mp-tqma8mpql # TQ-Systems GmbH i.MX8MP TQMa8MPQL SOM
+ - const: fsl,imx8mp
+
+ - description: Variscite VAR-SOM-MX8M Plus based boards
+ items:
+ - const: variscite,var-som-mx8mp-symphony
+ - const: variscite,var-som-mx8mp
+ - const: fsl,imx8mp
+
+ - description: i.MX8MQ based Boards
+ items:
+ - enum:
+ - boundary,imx8mq-nitrogen8m # i.MX8MQ NITROGEN Board
+ - boundary,imx8mq-nitrogen8m-som # i.MX8MQ NITROGEN SoM
+ - einfochips,imx8mq-thor96 # i.MX8MQ Thor96 Board
+ - fsl,imx8mq-evk # i.MX8MQ EVK Board
+ - google,imx8mq-phanbell # Google Coral Edge TPU
+ - kontron,pitx-imx8m # Kontron pITX-imx8m Board
+ - purism,librem5-devkit # Purism Librem5 devkit
+ - solidrun,hummingboard-pulse # SolidRun Hummingboard Pulse
+ - technexion,pico-pi-imx8m # TechNexion PICO-PI-8M evk
+ - const: fsl,imx8mq
+
+ - description: i.MX8MQ NITROGEN SoM based Boards
+ items:
+ - const: mntre,reform2 # MNT Reform2 Laptop
+ - const: boundary,imx8mq-nitrogen8m-som # i.MX8MQ NITROGEN SoM
+ - const: fsl,imx8mq
+
+ - description: Purism Librem5 phones
+ items:
+ - enum:
+ - purism,librem5r2 # Purism Librem5 phone "Chestnut"
+ - purism,librem5r3 # Purism Librem5 phone "Dogwood"
+ - purism,librem5r4 # Purism Librem5 phone "Evergreen"
+ - const: purism,librem5
+ - const: fsl,imx8mq
+
+ - description:
+ TQMa8Mx is a series of SOM featuring NXP i.MX8MQ system-on-chip
+ variants. It is designed to be clicked on different carrier boards.
+ items:
+ - enum:
+ - tq,imx8mq-tqma8mq-mba8mx # TQ-Systems GmbH i.MX8MQ TQMa8Mx SOM on MBa8Mx
+ - const: tq,imx8mq-tqma8mq # TQ-Systems GmbH i.MX8MQ TQMa8Mx SOM
+ - const: fsl,imx8mq
+
+ - description: Zodiac Inflight Innovations Ultra Boards
+ items:
+ - enum:
+ - zii,imx8mq-ultra-rmb3
+ - zii,imx8mq-ultra-zest
+ - const: zii,imx8mq-ultra
+ - const: fsl,imx8mq
+
+ - description: i.MX8QM based Boards
+ items:
+ - enum:
+ - fsl,imx8qm-mek # i.MX8QM MEK Board
+ - fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board
+ - const: fsl,imx8qm
+
+ - description: i.MX8QM Boards with Toradex Apalis iMX8 Modules
+ items:
+ - enum:
+ - toradex,apalis-imx8-eval # Apalis iMX8 Module on Apalis Evaluation V1.0/V1.1 Board
+ - toradex,apalis-imx8-eval-v1.2 # Apalis iMX8 Module on Apalis Evaluation V1.2 Board
+ - toradex,apalis-imx8-ixora-v1.1 # Apalis iMX8 Module on Ixora V1.1 Carrier Board
+ - const: toradex,apalis-imx8
+ - const: fsl,imx8qm
+
+ - description: i.MX8QM Boards with Toradex Apalis iMX8 V1.1 Modules
+ items:
+ - enum:
+ - toradex,apalis-imx8-v1.1-eval # Apalis iMX8 V1.1 Module on Apalis Eval. V1.0/V1.1 Board
+ - toradex,apalis-imx8-v1.1-eval-v1.2 # Apalis iMX8 V1.1 Module on Apalis Eval. V1.2 Board
+ - toradex,apalis-imx8-v1.1-ixora-v1.1 # Apalis iMX8 V1.1 Module on Ixora V1.1 C. Board
+ - toradex,apalis-imx8-v1.1-ixora-v1.2 # Apalis iMX8 V1.1 Module on Ixora V1.2 C. Board
+ - const: toradex,apalis-imx8-v1.1
+ - const: fsl,imx8qm
+
+ - description: i.MX8QXP based Boards
+ items:
+ - enum:
+ - einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board
+ - fsl,imx8qxp-mek # i.MX8QXP MEK Board
+ - fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board
+ - const: fsl,imx8qxp
+
+ - description: i.MX8DXL based Boards
+ items:
+ - enum:
+ - fsl,imx8dxl-evk # i.MX8DXL EVK Board
+ - const: fsl,imx8dxl
+
+ - description: i.MX8QXP/i.MX8DX Boards with Toradex Colibri iMX8X Modules
+ items:
+ - enum:
+ - toradex,colibri-imx8x-aster # Colibri iMX8X Module on Aster Board
+ - toradex,colibri-imx8x-eval-v3 # Colibri iMX8X Module on Colibri Evaluation Board V3
+ - toradex,colibri-imx8x-iris # Colibri iMX8X Module on Iris Board
+ - toradex,colibri-imx8x-iris-v2 # Colibri iMX8X Module on Iris Board V2
+ - const: toradex,colibri-imx8x
+ - enum:
+ - fsl,imx8qxp
+ - fsl,imx8dx
+
+ - description:
+ TQMa8Xx is a series of SOM featuring NXP i.MX8X system-on-chip
+ variants. It is designed to be clicked on different carrier boards
+ MBa8Xx is the starterkit
+ oneOf:
+ - items:
+ - enum:
+ - tq,imx8dxp-tqma8xdp-mba8xx # TQ-Systems GmbH TQMa8XDP SOM on MBa8Xx
+ - const: tq,imx8dxp-tqma8xdp # TQ-Systems GmbH TQMa8XDP SOM (with i.MX8DXP)
+ - const: fsl,imx8dxp
+ - items:
+ - enum:
+ - tq,imx8qxp-tqma8xqp-mba8xx # TQ-Systems GmbH TQMa8XQP SOM on MBa8Xx
+ - const: tq,imx8qxp-tqma8xqp # TQ-Systems GmbH TQMa8XQP SOM (with i.MX8QXP)
+ - const: fsl,imx8qxp
+
+ - description:
+ TQMa8XxS is a series of SOM featuring NXP i.MX8X system-on-chip
+ variants. It has the SMARC-2.0 form factor and is designed to be placed on
+ different carrier boards. MB-SMARC-2 is a carrier reference design.
+ oneOf:
+ - items:
+ - enum:
+ - tq,imx8qxp-tqma8xqps-mb-smarc-2 # TQ-Systems GmbH TQMa8QXPS SOM on MB-SMARC-2
+ - const: tq,imx8qxp-tqma8xqps # TQ-Systems GmbH TQMa8QXPS SOM
+ - const: fsl,imx8qxp
+ - items:
+ - enum:
+ - tq,imx8dxp-tqma8xdps-mb-smarc-2 # TQ-Systems GmbH TQMa8XDPS SOM on MB-SMARC-2
+ - const: tq,imx8dxp-tqma8xdps # TQ-Systems GmbH TQMa8XDPS SOM
+ - const: fsl,imx8dxp
+
+ - description: i.MX8ULP based Boards
+ items:
+ - enum:
+ - fsl,imx8ulp-9x9-evk # i.MX8ULP EVK9 Board
+ - fsl,imx8ulp-evk # i.MX8ULP EVK Board
+ - const: fsl,imx8ulp
+
+ - description: i.MX91 based Boards
+ items:
+ - enum:
+ - fsl,imx91-11x11-evk # i.MX91 11x11 EVK Board
+ - const: fsl,imx91
+
+ - description: i.MX93 based Boards
+ items:
+ - enum:
+ - fsl,imx93-9x9-qsb # i.MX93 9x9 QSB Board
+ - fsl,imx93-11x11-evk # i.MX93 11x11 EVK Board
+ - fsl,imx93-14x14-evk # i.MX93 14x14 EVK Board
+ - const: fsl,imx93
+
+ - description: i.MX94 based Boards
+ items:
+ - enum:
+ - fsl,imx943-evk # i.MX943 EVK Board
+ - const: fsl,imx94
+
+ - description: i.MX95 based Boards
+ items:
+ - enum:
+ - fsl,imx95-15x15-evk # i.MX95 15x15 EVK Board
+ - fsl,imx95-19x19-evk # i.MX95 19x19 EVK Board
+ - const: fsl,imx95
+
+ - description: PHYTEC i.MX 95 FPSC based Boards
+ items:
+ - enum:
+ - phytec,imx95-libra-rdk-fpsc # Libra-i.MX 95 FPSC
+ - const: phytec,imx95-phycore-fpsc # phyCORE-i.MX 95 FPSC
+ - const: fsl,imx95
+
+ - description: i.MXRT1050 based Boards
+ items:
+ - enum:
+ - fsl,imxrt1050-evk # i.MXRT1050 EVK Board
+ - const: fsl,imxrt1050
+
+ - description: i.MXRT1170 based Boards
+ items:
+ - enum:
+ - fsl,imxrt1170-evk # i.MXRT1170 EVK Board
+ - const: fsl,imxrt1170
+
+ - description:
+ TQMa91xxLA and TQMa91xxCA are two series of feature compatible SOM
+ using NXP i.MX91 SOC in 11x11 mm package.
+ TQMa91xxLA is designed to be soldered on different carrier boards.
+ TQMa91xxCA is a compatible variant using board to board connectors.
+ All SOM and CPU variants use the same device tree hence only one
+ compatible is needed. Bootloader disables all features not present
+ in the assembled SOC.
+ MBa91xxCA mainboard can be used as starterkit for the SOM
+ soldered on an adapter board or for the connector variant
+ MBa91xxLA mainboard is a single board computer using the solderable
+ SOM variant
+ items:
+ - enum:
+ - tq,imx91-tqma9131-mba91xxca # TQ-Systems GmbH i.MX91 TQMa91xxCA/LA SOM on MBa91xxCA
+ - const: tq,imx91-tqma9131 # TQ-Systems GmbH i.MX91 TQMa91xxCA/LA SOM
+ - const: fsl,imx91
+
+ - description:
+ TQMa93xxLA and TQMa93xxCA are two series of feature compatible SOM
+ using NXP i.MX93 SOC in 11x11 mm package.
+ TQMa93xxLA is designed to be soldered on different carrier boards.
+ TQMa93xxCA is a compatible variant using board to board connectors.
+ All SOM and CPU variants use the same device tree hence only one
+ compatible is needed. Bootloader disables all features not present
+ in the assembled SOC.
+ MBa91xxCA mainboard can be used as starterkit for the SOM
+ soldered on an adapter board or for the connector variant
+ to evaluate RGB display support.
+ MBa93xxCA mainboard can be used as starterkit for the SOM
+ soldered on an adapter board or for the connector variant
+ MBa93xxLA mainboard is a single board computer using the solderable
+ SOM variant
+ items:
+ - enum:
+ - tq,imx93-tqma9352-mba91xxca # TQ-Systems GmbH i.MX93 TQMa93xxCA/LA SOM on MBa91xxCA
+ - tq,imx93-tqma9352-mba93xxca # TQ-Systems GmbH i.MX93 TQMa93xxCA/LA SOM on MBa93xxCA
+ - tq,imx93-tqma9352-mba93xxla # TQ-Systems GmbH i.MX93 TQMa93xxLA SOM on MBa93xxLA SBC
+ - const: tq,imx93-tqma9352 # TQ-Systems GmbH i.MX93 TQMa93xxCA/LA SOM
+ - const: fsl,imx93
+
+ - description: PHYTEC phyCORE-i.MX93 SoM based boards
+ items:
+ - enum:
+ - phytec,imx93-phyboard-nash # phyBOARD-Nash-i.MX93
+ - phytec,imx93-phyboard-segin # phyBOARD-Segin with i.MX93
+ - const: phytec,imx93-phycore-som # phyCORE-i.MX93 SoM
+ - const: fsl,imx93
+
+ - description: Variscite VAR-SOM-MX93 based boards
+ items:
+ - const: variscite,var-som-mx93-symphony
+ - const: variscite,var-som-mx93
+ - const: fsl,imx93
+
+ - description: Kontron OSM-S i.MX93 SoM based boards
+ items:
+ - const: kontron,imx93-bl-osm-s # Kontron BL i.MX93 OSM-S board
+ - const: kontron,imx93-osm-s # Kontron OSM-S i.MX93 SoM
+ - const: fsl,imx93
+
+ - description:
+ TQMa95xxSA is a series of SOM featuring NXP i.MX95 SoC variants.
+ It has the SMARC form factor and is designed to be placed on
+ different carrier boards. MB-SMARC-2 is a carrier reference design.
+ items:
+ - enum:
+ - tq,imx95-tqma9596sa-mb-smarc-2 # TQ-Systems GmbH i.MX95 TQMa95xxSA SOM on MB-SMARC-2
+ - const: tq,imx95-tqma9596sa # TQ-Systems GmbH i.MX95 TQMa95xxSA SOM
+ - const: fsl,imx95
+
+ - description:
+ Freescale Vybrid Platform Device Tree Bindings
+
+ For the Vybrid SoC family all variants with DDR controller are supported,
+ which is the VF5xx and VF6xx series. Out of historical reasons, in most
+ places the kernel uses vf610 to refer to the whole family.
+ The compatible string "fsl,vf610m4" is used for the secondary Cortex-M4
+ core support.
+ items:
+ - enum:
+ - fsl,vf500
+ - fsl,vf510
+ - fsl,vf600
+ - fsl,vf610
+ - fsl,vf610m4
+
+ - description: Toradex Colibri VF50 Module on Colibri Evaluation Board
+ items:
+ - const: toradex,vf500-colibri_vf50-on-eval
+ - const: toradex,vf500-colibri_vf50
+ - const: fsl,vf500
+
+ - description: VF610 based Boards
+ items:
+ - enum:
+ - fsl,vf610-twr # VF610 Tower Board
+ - lwn,bk4 # Liebherr BK4 controller
+ - phytec,vf610-cosmic # PHYTEC Cosmic/Cosmic+ Board
+ - const: fsl,vf610
+
+ - description: Toradex Colibri VF61 Module on Colibri Evaluation Board
+ items:
+ - const: toradex,vf610-colibri_vf61-on-eval
+ - const: toradex,vf610-colibri_vf61
+ - const: fsl,vf610
+
+ - description: ZII's VF610 based Boards
+ items:
+ - enum:
+ - zii,vf610cfu1 # ZII VF610 CFU1 Board
+ - zii,vf610dev-c # ZII VF610 Development Board, Rev C
+ - zii,vf610dev-b # ZII VF610 Development Board, Rev B
+ - zii,vf610scu4-aib # ZII VF610 SCU4 AIB
+ - zii,vf610dtu # ZII VF610 SSMB DTU Board
+ - zii,vf610spu3 # ZII VF610 SSMB SPU3 Board
+ - zii,vf610spb4 # ZII VF610 SPB4 Board
+ - const: zii,vf610dev
+ - const: fsl,vf610
+
+ - description: LS1012A based Boards
+ items:
+ - enum:
+ - ebs-systart,oxalis
+ - fsl,ls1012a-rdb
+ - fsl,ls1012a-frdm
+ - fsl,ls1012a-frwy
+ - fsl,ls1012a-qds
+ - const: fsl,ls1012a
+
+ - description: TQ Systems TQMLS12AL SoM on MBLS1012AL board
+ items:
+ - const: tq,ls1012a-tqmls1012al-mbls1012al
+ - const: tq,ls1012a-tqmls1012al
+ - const: fsl,ls1012a
+
+ - description: LS1021A based Boards
+ items:
+ - enum:
+ - fsl,ls1021a-iot
+ - fsl,ls1021a-moxa-uc-8410a
+ - fsl,ls1021a-qds
+ - fsl,ls1021a-tsn
+ - fsl,ls1021a-twr
+ - const: fsl,ls1021a
+
+ - description:
+ TQ-Systems TQMLS102xA is a series of socketable SOM featuring
+ LS102x system-on-chip variants. MBLS102xA mainboard can be used as
+ starterkit.
+ items:
+ - enum:
+ - tq,ls1021a-tqmls1021a-mbls102xa
+ - const: tq,ls1021a-tqmls1021a
+ - const: fsl,ls1021a
+
+ - description: LS1028A based Boards
+ items:
+ - enum:
+ - fsl,ls1028a-qds
+ - fsl,ls1028a-rdb
+ - const: fsl,ls1028a
+
+ - description: Kontron KBox A-230-LS
+ items:
+ - const: kontron,kbox-a-230-ls
+ - const: kontron,sl28-var4
+ - const: kontron,sl28
+ - const: fsl,ls1028a
+ - description:
+ Kontron SMARC-sAL28 board on the SMARC Eval Carrier 2.0
+ items:
+ - enum:
+ - kontron,sl28-var1-ads2
+ - kontron,sl28-var2-ads2
+ - kontron,sl28-var3-ads2
+ - kontron,sl28-var4-ads2
+ - enum:
+ - kontron,sl28-var1
+ - kontron,sl28-var2
+ - kontron,sl28-var3
+ - kontron,sl28-var4
+ - const: kontron,sl28
+ - const: fsl,ls1028a
+
+ - description:
+ Kontron SMARC-sAL28 board (on a generic/undefined carrier)
+ items:
+ - enum:
+ - kontron,sl28-var1
+ - kontron,sl28-var2
+ - kontron,sl28-var3
+ - kontron,sl28-var4
+ - const: kontron,sl28
+ - const: fsl,ls1028a
+
+ - description:
+ Kontron SMARC-sAL28 board (base). This is used in the base device
+ tree which is compatible with the overlays provided by the
+ vendor.
+ items:
+ - const: kontron,sl28
+ - const: fsl,ls1028a
+
+ - description: LS1043A based Boards
+ items:
+ - enum:
+ - fsl,ls1043a-rdb
+ - fsl,ls1043a-qds
+ - const: fsl,ls1043a
+
+ - description: TQ-Systems LS1043A based Boards
+ items:
+ - enum:
+ - tq,ls1043a-tqmls1043a-mbls10xxa
+ - const: tq,ls1043a-tqmls1043a
+ - const: fsl,ls1043a
+
+ - description: LS1046A based Boards
+ items:
+ - enum:
+ - fsl,ls1046a-frwy
+ - fsl,ls1046a-qds
+ - fsl,ls1046a-rdb
+ - const: fsl,ls1046a
+
+ - description: TQ-Systems LS1046A based Boards
+ items:
+ - enum:
+ - tq,ls1046a-tqmls1046a-mbls10xxa
+ - const: tq,ls1046a-tqmls1046a
+ - const: fsl,ls1046a
+
+ - description: LS1088A based Boards
+ items:
+ - enum:
+ - fsl,ls1088a-qds
+ - fsl,ls1088a-rdb
+ - const: fsl,ls1088a
+
+ - description: TQ-Systems LS1088A based Boards
+ items:
+ - enum:
+ - tq,ls1088a-tqmls1088a-mbls10xxa
+ - const: tq,ls1088a-tqmls1088a
+ - const: fsl,ls1088a
+
+ - description: LS2080A based Boards
+ items:
+ - enum:
+ - fsl,ls2080a-simu
+ - fsl,ls2080a-qds
+ - fsl,ls2080a-rdb
+ - const: fsl,ls2080a
+
+ - description: LS2081A based Boards
+ items:
+ - enum:
+ - fsl,ls2081a-rdb
+ - const: fsl,ls2081a
+
+ - description: LS2088A based Boards
+ items:
+ - enum:
+ - fsl,ls2088a-qds
+ - fsl,ls2088a-rdb
+ - const: fsl,ls2088a
+
+ - description: LX2160A based Boards
+ items:
+ - enum:
+ - fsl,lx2160a-bluebox3
+ - fsl,lx2160a-bluebox3-rev-a
+ - fsl,lx2160a-qds
+ - fsl,lx2160a-rdb
+ - fsl,lx2162a-qds
+ - const: fsl,lx2160a
+
+ - description: SolidRun LX2160A CEX-7 based Boards
+ items:
+ - enum:
+ - solidrun,clearfog-cx
+ - solidrun,honeycomb
+ - const: solidrun,lx2160a-cex7
+ - const: fsl,lx2160a
+
+ - description: SolidRun LX2162A SoM based Boards
+ items:
+ - enum:
+ - solidrun,lx2162a-clearfog
+ - const: solidrun,lx2162a-som
+ - const: fsl,lx2160a
+
+ - description:
+ TQ-Systems TQMLX2160A is a series of socketable SOM featuring
+ LX2160A system-on-chip variants. MBLX2160A mainboard can be used a
+ starterkit.
+ items:
+ - enum:
+ - tq,lx2160a-tqmlx2160a-mblx2160a
+ - const: tq,lx2160a-tqmlx2160a
+ - const: fsl,lx2160a
+
+ - description: S32G2 based Boards
+ items:
+ - enum:
+ - nxp,s32g274a-evb
+ - nxp,s32g274a-rdb2
+ - const: nxp,s32g2
+
+ - description: S32G3 based Boards
+ items:
+ - enum:
+ - nxp,s32g399a-rdb3
+ - const: nxp,s32g3
+
+ - description: S32V234 based Boards
+ items:
+ - enum:
+ - fsl,s32v234-evb # S32V234-EVB2 Customer Evaluation Board
+ - const: fsl,s32v234
+
+ - description: Traverse LS1088A based Boards
+ items:
+ - enum:
+ - traverse,ten64 # Ten64 Networking Appliance / Board
+ - const: fsl,ls1088a
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/fw-cfg.txt b/Documentation/devicetree/bindings/arm/fw-cfg.txt
deleted file mode 100644
index fd54e1db2156..000000000000
--- a/Documentation/devicetree/bindings/arm/fw-cfg.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* QEMU Firmware Configuration bindings for ARM
-
-QEMU's arm-softmmu and aarch64-softmmu emulation / virtualization targets
-provide the following Firmware Configuration interface on the "virt" machine
-type:
-
-- A write-only, 16-bit wide selector (or control) register,
-- a read-write, 64-bit wide data register.
-
-QEMU exposes the control and data register to ARM guests as memory mapped
-registers; their location is communicated to the guest's UEFI firmware in the
-DTB that QEMU places at the bottom of the guest's DRAM.
-
-The authoritative guest-side hardware interface documentation to the fw_cfg
-device can be found in "docs/specs/fw_cfg.txt" in the QEMU source tree.
-
-
-Required properties:
-
-- compatible: "qemu,fw-cfg-mmio".
-
-- reg: the MMIO region used by the device.
- * Bytes 0x0 to 0x7 cover the data register.
- * Bytes 0x8 to 0x9 cover the selector register.
- * Further registers may be appended to the region in case of future interface
- revisions / feature bits.
-
-Example:
-
-/ {
- #size-cells = <0x2>;
- #address-cells = <0x2>;
-
- fw-cfg@9020000 {
- compatible = "qemu,fw-cfg-mmio";
- reg = <0x0 0x9020000 0x0 0xa>;
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/gemini.txt b/Documentation/devicetree/bindings/arm/gemini.txt
deleted file mode 100644
index 55bf7ce96c44..000000000000
--- a/Documentation/devicetree/bindings/arm/gemini.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-Cortina systems Gemini platforms
-
-The Gemini SoC is the project name for an ARMv4 FA525-based SoC originally
-produced by Storlink Semiconductor around 2005. The company was renamed
-later renamed Storm Semiconductor. The chip product name is Storlink SL3516.
-It was derived from earlier products from Storm named SL3316 (Centroid) and
-SL3512 (Bulverde).
-
-Storm Semiconductor was acquired by Cortina Systems in 2008 and the SoC was
-produced and used for NAS and similar usecases. In 2014 Cortina Systems was
-in turn acquired by Inphi, who seem to have discontinued this product family.
-
-Many of the IP blocks used in the SoC comes from Faraday Technology.
-
-Required properties (in root node):
- compatible = "cortina,gemini";
-
-Required nodes:
-
-- soc: the SoC should be represented by a simple bus encompassing all the
- onchip devices, this is referred to as the soc bus node.
-
-- syscon: the soc bus node must have a system controller node pointing to the
- global control registers, with the compatible string
- "cortina,gemini-syscon", "syscon";
-
- Required properties on the syscon:
- - reg: syscon register location and size.
- - #clock-cells: should be set to <1> - the system controller is also a
- clock provider.
- - #reset-cells: should be set to <1> - the system controller is also a
- reset line provider.
-
- The clock sources have shorthand defines in the include file:
- <dt-bindings/clock/cortina,gemini-clock.h>
-
- The reset lines have shorthand defines in the include file:
- <dt-bindings/reset/cortina,gemini-reset.h>
-
-- timer: the soc bus node must have a timer node pointing to the SoC timer
- block, with the compatible string "cortina,gemini-timer"
- See: clocksource/cortina,gemini-timer.txt
-
-- interrupt-controller: the sob bus node must have an interrupt controller
- node pointing to the SoC interrupt controller block, with the compatible
- string "cortina,gemini-interrupt-controller"
- See interrupt-controller/cortina,gemini-interrupt-controller.txt
-
-Example:
-
-/ {
- model = "Foo Gemini Machine";
- compatible = "cortina,gemini";
- #address-cells = <1>;
- #size-cells = <1>;
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x8000000>;
- };
-
- soc {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- compatible = "simple-bus";
- interrupt-parent = <&intcon>;
-
- syscon: syscon@40000000 {
- compatible = "cortina,gemini-syscon", "syscon";
- reg = <0x40000000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- uart0: serial@42000000 {
- compatible = "ns16550a";
- reg = <0x42000000 0x100>;
- resets = <&syscon GEMINI_RESET_UART>;
- clocks = <&syscon GEMINI_CLK_UART>;
- interrupts = <18 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- };
-
- timer@43000000 {
- compatible = "cortina,gemini-timer";
- reg = <0x43000000 0x1000>;
- interrupt-parent = <&intcon>;
- interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */
- <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */
- <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */
- resets = <&syscon GEMINI_RESET_TIMER>;
- /* APB clock or RTC clock */
- clocks = <&syscon GEMINI_CLK_APB>,
- <&syscon GEMINI_CLK_RTC>;
- clock-names = "PCLK", "EXTCLK";
- syscon = <&syscon>;
- };
-
- intcon: interrupt-controller@48000000 {
- compatible = "cortina,gemini-interrupt-controller";
- reg = <0x48000000 0x1000>;
- resets = <&syscon GEMINI_RESET_INTCON0>;
- interrupt-controller;
- #interrupt-cells = <2>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/gemini.yaml b/Documentation/devicetree/bindings/arm/gemini.yaml
new file mode 100644
index 000000000000..f6a0b675830f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/gemini.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/gemini.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cortina systems Gemini platforms
+
+description: |
+ The Gemini SoC is the project name for an ARMv4 FA525-based SoC originally
+ produced by Storlink Semiconductor around 2005. The company was renamed
+ later renamed Storm Semiconductor. The chip product name is Storlink SL3516.
+ It was derived from earlier products from Storm named SL3316 (Centroid) and
+ SL3512 (Bulverde).
+
+ Storm Semiconductor was acquired by Cortina Systems in 2008 and the SoC was
+ produced and used for NAS and similar usecases. In 2014 Cortina Systems was
+ in turn acquired by Inphi, who seem to have discontinued this product family.
+
+ Many of the IP blocks used in the SoC comes from Faraday Technology.
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: Storlink Semiconductor Gemini324 EV-Board also known
+ as Storm Semiconductor SL93512R_BRD
+ items:
+ - const: storlink,gemini324
+ - const: storm,sl93512r
+ - const: cortina,gemini
+
+ - description: D-Link DIR-685 Xtreme N Storage Router
+ items:
+ - const: dlink,dir-685
+ - const: cortina,gemini
+
+ - description: D-Link DNS-313 1-Bay Network Storage Enclosure
+ items:
+ - const: dlink,dns-313
+ - const: cortina,gemini
+
+ - description: Edimax NS-2502
+ items:
+ - const: edimax,ns-2502
+ - const: cortina,gemini
+
+ - description: ITian Square One SQ201
+ items:
+ - const: itian,sq201
+ - const: cortina,gemini
+
+ - description: Raidsonic NAS IB-4220-B
+ items:
+ - const: raidsonic,ib-4220-b
+ - const: cortina,gemini
+
+ - description: SSI 1328
+ items:
+ - const: ssi,1328
+ - const: cortina,gemini
+
+ - description: Teltonika RUT1xx Mobile Router
+ items:
+ - const: teltonika,rut1xx
+ - const: cortina,gemini
+
+ - description: Wiligear Wiliboard WBD-111
+ items:
+ - const: wiligear,wiliboard-wbd111
+ - const: cortina,gemini
+
+ - description: Wiligear Wiliboard WBD-222
+ items:
+ - const: wiligear,wiliboard-wbd222
+ - const: cortina,gemini
+
+ - description: Wiligear Wiliboard WBD-111 - old incorrect binding
+ items:
+ - const: wiliboard,wbd111
+ - const: cortina,gemini
+ deprecated: true
+
+ - description: Wiligear Wiliboard WBD-222 - old incorrect binding
+ items:
+ - const: wiliboard,wbd222
+ - const: cortina,gemini
+ deprecated: true
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/google.yaml b/Documentation/devicetree/bindings/arm/google.yaml
new file mode 100644
index 000000000000..99961e5282e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/google.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/google.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Google Tensor platforms
+
+maintainers:
+ - Peter Griffin <peter.griffin@linaro.org>
+
+description: |
+ ARM platforms using SoCs designed by Google branded "Tensor" used in Pixel
+ devices.
+
+ Currently upstream this is devices using "gs101" SoC which is found in Pixel
+ 6, Pixel 6 Pro and Pixel 6a.
+
+ Google have a few different names for the SoC:
+ - Marketing name ("Tensor")
+ - Codename ("Whitechapel")
+ - SoC ID ("gs101")
+ - Die ID ("S5P9845")
+
+ Likewise there are a couple of names for the actual device
+ - Marketing name ("Pixel 6")
+ - Codename ("Oriole")
+
+ Devicetrees should use the lowercased SoC ID and lowercased board codename,
+ e.g. gs101 and gs101-oriole.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Google Pixel 6 or 6 Pro (Oriole or Raven)
+ items:
+ - enum:
+ - google,gs101-oriole
+ - google,gs101-raven
+ - const: google,gs101
+
+ # Bootloader requires empty ect node to be present
+ ect:
+ type: object
+ additionalProperties: false
+
+required:
+ - ect
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/cpuctrl.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/cpuctrl.yaml
new file mode 100644
index 000000000000..4fc208d3995e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/cpuctrl.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/cpuctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon CPU controller
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: |
+ The clock registers and power registers of secondary cores are defined
+ in CPU controller, especially in HIX5HD2 SoC.
+
+properties:
+ compatible:
+ items:
+ - const: hisilicon,cpuctrl
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ "^clock@[0-9a-f]+$":
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ const: hisilicon,hix5hd2-clock
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+required:
+ - compatible
+ - reg
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ cpuctrl@a22000 {
+ compatible = "hisilicon,cpuctrl";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x00a22000 0x2000>;
+ ranges = <0 0x00a22000 0x2000>;
+
+ clock: clock@0 {
+ compatible = "hisilicon,hix5hd2-clock";
+ reg = <0 0x2000>;
+ #clock-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/hi3798cv200-perictrl.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/hi3798cv200-perictrl.yaml
new file mode 100644
index 000000000000..cba1937aad9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/hi3798cv200-perictrl.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/hi3798cv200-perictrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon Hi3798CV200 Peripheral Controller
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: |
+ The Hi3798CV200 Peripheral Controller controls peripherals, queries
+ their status, and configures some functions of peripherals.
+
+properties:
+ compatible:
+ items:
+ - const: hisilicon,hi3798cv200-perictrl
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ peripheral-controller@8a20000 {
+ compatible = "hisilicon,hi3798cv200-perictrl", "syscon", "simple-mfd";
+ reg = <0x8a20000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x8a20000 0x1000>;
+
+ phy@850 {
+ compatible = "hisilicon,hi3798cv200-combphy";
+ reg = <0x850 0x8>;
+ #phy-cells = <1>;
+ clocks = <&crg 42>;
+ resets = <&crg 0x188 4>;
+ assigned-clocks = <&crg 42>;
+ assigned-clock-rates = <100000000>;
+ hisilicon,fixed-mode = <4>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/hi6220-domain-ctrl.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/hi6220-domain-ctrl.yaml
new file mode 100644
index 000000000000..6ea6d7ee7a14
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/hi6220-domain-ctrl.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/hi6220-domain-ctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon Hi6220 domain controller
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: |
+ Hisilicon designs some special domain controllers for mobile platform,
+ such as: the power Always On domain controller, the Media domain
+ controller(e.g. codec, G3D ...) and the Power Management domain
+ controller.
+
+ The compatible names of each domain controller are as follows:
+ Power Always ON domain controller --> hisilicon,hi6220-aoctrl
+ Media domain controller --> hisilicon,hi6220-mediactrl
+ Power Management domain controller --> hisilicon,hi6220-pmctrl
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - hisilicon,hi6220-aoctrl
+ - hisilicon,hi6220-mediactrl
+ - hisilicon,hi6220-pmctrl
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ ao_ctrl@f7800000 {
+ compatible = "hisilicon,hi6220-aoctrl", "syscon";
+ reg = <0xf7800000 0x2000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ media_ctrl@f4410000 {
+ compatible = "hisilicon,hi6220-mediactrl", "syscon";
+ reg = <0xf4410000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ pm_ctrl@f7032000 {
+ compatible = "hisilicon,hi6220-pmctrl", "syscon";
+ reg = <0xf7032000 0x1000>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-bootwrapper.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-bootwrapper.yaml
new file mode 100644
index 000000000000..483caf0ce25b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-bootwrapper.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/hip04-bootwrapper.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bootwrapper boot method
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: Bootwrapper boot method (software protocol on SMP)
+
+properties:
+ compatible:
+ items:
+ - const: hisilicon,hip04-bootwrapper
+
+ boot-method:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description: |
+ Address and size of boot method.
+ [0]: bootwrapper physical address
+ [1]: bootwrapper size
+ [2]: relocation physical address
+ [3]: relocation size
+ minItems: 2
+ maxItems: 4
+
+required:
+ - compatible
+ - boot-method
+
+additionalProperties: false
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-fabric.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-fabric.yaml
new file mode 100644
index 000000000000..60c516a04ad5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/hip04-fabric.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/hip04-fabric.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon Fabric controller
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: Hisilicon Fabric controller
+
+properties:
+ compatible:
+ items:
+ - const: hisilicon,hip04-fabric
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/pctrl.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/pctrl.yaml
new file mode 100644
index 000000000000..6d5065872809
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/pctrl.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/pctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Peripheral misc control register
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: Peripheral misc control register
+
+properties:
+ compatible:
+ items:
+ - const: hisilicon,pctrl
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ pctrl@fca09000 {
+ compatible = "hisilicon,pctrl";
+ reg = <0xfca09000 0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/controller/sysctrl.yaml b/Documentation/devicetree/bindings/arm/hisilicon/controller/sysctrl.yaml
new file mode 100644
index 000000000000..7a221e1c09df
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/controller/sysctrl.yaml
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/controller/sysctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon system controller
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: |
+ The Hisilicon system controller is used on many Hisilicon boards, it can be
+ used to assist the slave core startup, reboot the system, etc.
+
+ There are some variants of the Hisilicon system controller, such as HiP01,
+ Hi3519, Hi6220 system controller, each of them is mostly compatible with the
+ Hisilicon system controller, but some same registers located at different
+ offset. In addition, the HiP01 system controller has some specific control
+ registers for HIP01 SoC family, such as slave core boot.
+
+ The compatible names of each system controller are as follows:
+ Hisilicon system controller --> hisilicon,sysctrl
+ HiP01 system controller --> hisilicon,hip01-sysctrl
+ Hi6220 system controller --> hisilicon,hi6220-sysctrl
+ Hi3519 system controller --> hisilicon,hi3519-sysctrl
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: hisilicon,hi6220-sysctrl
+ then:
+ required:
+ - '#clock-cells'
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - hisilicon,sysctrl
+ - hisilicon,hi6220-sysctrl
+ - hisilicon,hi3519-sysctrl
+ - const: syscon
+ - items:
+ - const: hisilicon,hip01-sysctrl
+ - const: hisilicon,sysctrl
+
+ reg:
+ maxItems: 1
+
+ smp-offset:
+ description: |
+ offset in sysctrl for notifying slave cpu booting
+ cpu 1, reg;
+ cpu 2, reg + 0x4;
+ cpu 3, reg + 0x8;
+ If reg value is not zero, cpun exit wfi and go
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ resume-offset:
+ description: offset in sysctrl for notifying cpu0 when resume
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ reboot-offset:
+ description: offset in sysctrl for system reboot
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ '^clock@':
+ type: object
+ additionalProperties: false
+
+ properties:
+ compatible:
+ enum:
+ - hisilicon,hi3620-clock
+ - hisilicon,hi3620-mmc-clock
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ /* Hisilicon system controller */
+ system-controller@802000 {
+ compatible = "hisilicon,sysctrl", "syscon";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x802000 0x1000>;
+ reg = <0x802000 0x1000>;
+
+ smp-offset = <0x31c>;
+ resume-offset = <0x308>;
+ reboot-offset = <0x4>;
+
+ clock: clock@0 {
+ compatible = "hisilicon,hi3620-clock";
+ reg = <0 0x10000>;
+ #clock-cells = <1>;
+ };
+ };
+
+ /* HiP01 system controller */
+ system-controller@10000000 {
+ compatible = "hisilicon,hip01-sysctrl", "hisilicon,sysctrl";
+ reg = <0x10000000 0x1000>;
+ reboot-offset = <0x4>;
+ };
+
+ /* Hi6220 system controller */
+ system-controller@f7030000 {
+ compatible = "hisilicon,hi6220-sysctrl", "syscon";
+ reg = <0xf7030000 0x2000>;
+ #clock-cells = <1>;
+ };
+
+ /* Hi3519 system controller */
+ system-controller@12010000 {
+ compatible = "hisilicon,hi3519-sysctrl", "syscon";
+ reg = <0x12010000 0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt b/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt
deleted file mode 100644
index 115c5be0bd0b..000000000000
--- a/Documentation/devicetree/bindings/arm/hisilicon/hi3519-sysctrl.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-* Hisilicon Hi3519 System Controller Block
-
-This bindings use the following binding:
-Documentation/devicetree/bindings/mfd/syscon.txt
-
-Required properties:
-- compatible: "hisilicon,hi3519-sysctrl".
-- reg: the register region of this block
-
-Examples:
-sysctrl: system-controller@12010000 {
- compatible = "hisilicon,hi3519-sysctrl", "syscon";
- reg = <0x12010000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt
deleted file mode 100644
index 10bd35f9207f..000000000000
--- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Hisilicon Hip06 Low Pin Count device
- Hisilicon Hip06 SoCs implement a Low Pin Count (LPC) controller, which
- provides I/O access to some legacy ISA devices.
- Hip06 is based on arm64 architecture where there is no I/O space. So, the
- I/O ports here are not CPU addresses, and there is no 'ranges' property in
- LPC device node.
-
-Required properties:
-- compatible: value should be as follows:
- (a) "hisilicon,hip06-lpc"
- (b) "hisilicon,hip07-lpc"
-- #address-cells: must be 2 which stick to the ISA/EISA binding doc.
-- #size-cells: must be 1 which stick to the ISA/EISA binding doc.
-- reg: base memory range where the LPC register set is mapped.
-
-Note:
- The node name before '@' must be "isa" to represent the binding stick to the
- ISA/EISA binding specification.
-
-Example:
-
-isa@a01b0000 {
- compatible = "hisilicon,hip06-lpc";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0x0 0xa01b0000 0x0 0x1000>;
-
- ipmi0: bt@e4 {
- compatible = "ipmi-bt";
- device_type = "ipmi";
- reg = <0x01 0xe4 0x04>;
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
deleted file mode 100644
index a97f643e7d1c..000000000000
--- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
+++ /dev/null
@@ -1,319 +0,0 @@
-Hisilicon Platforms Device Tree Bindings
-----------------------------------------------------
-Hi3660 SoC
-Required root node properties:
- - compatible = "hisilicon,hi3660";
-
-HiKey960 Board
-Required root node properties:
- - compatible = "hisilicon,hi3660-hikey960", "hisilicon,hi3660";
-
-Hi3670 SoC
-Required root node properties:
- - compatible = "hisilicon,hi3670";
-
-HiKey970 Board
-Required root node properties:
- - compatible = "hisilicon,hi3670-hikey970", "hisilicon,hi3670";
-
-Hi3798cv200 SoC
-Required root node properties:
- - compatible = "hisilicon,hi3798cv200";
-
-Hi3798cv200 Poplar Board
-Required root node properties:
- - compatible = "hisilicon,hi3798cv200-poplar", "hisilicon,hi3798cv200";
-
-Hi4511 Board
-Required root node properties:
- - compatible = "hisilicon,hi3620-hi4511";
-
-Hi6220 SoC
-Required root node properties:
- - compatible = "hisilicon,hi6220";
-
-HiKey Board
-Required root node properties:
- - compatible = "hisilicon,hi6220-hikey", "hisilicon,hi6220";
-
-HiP01 ca9x2 Board
-Required root node properties:
- - compatible = "hisilicon,hip01-ca9x2";
-
-HiP04 D01 Board
-Required root node properties:
- - compatible = "hisilicon,hip04-d01";
-
-HiP05 D02 Board
-Required root node properties:
- - compatible = "hisilicon,hip05-d02";
-
-HiP06 D03 Board
-Required root node properties:
- - compatible = "hisilicon,hip06-d03";
-
-HiP07 D05 Board
-Required root node properties:
- - compatible = "hisilicon,hip07-d05";
-
-Hisilicon system controller
-
-Required properties:
-- compatible : "hisilicon,sysctrl"
-- reg : Register address and size
-
-Optional properties:
-- smp-offset : offset in sysctrl for notifying slave cpu booting
- cpu 1, reg;
- cpu 2, reg + 0x4;
- cpu 3, reg + 0x8;
- If reg value is not zero, cpun exit wfi and go
-- resume-offset : offset in sysctrl for notifying cpu0 when resume
-- reboot-offset : offset in sysctrl for system reboot
-
-Example:
-
- /* for Hi3620 */
- sysctrl: system-controller@fc802000 {
- compatible = "hisilicon,sysctrl";
- reg = <0xfc802000 0x1000>;
- smp-offset = <0x31c>;
- resume-offset = <0x308>;
- reboot-offset = <0x4>;
- };
-
------------------------------------------------------------------------
-Hisilicon Hi3798CV200 Peripheral Controller
-
-The Hi3798CV200 Peripheral Controller controls peripherals, queries
-their status, and configures some functions of peripherals.
-
-Required properties:
-- compatible: Should contain "hisilicon,hi3798cv200-perictrl", "syscon"
- and "simple-mfd".
-- reg: Register address and size of Peripheral Controller.
-- #address-cells: Should be 1.
-- #size-cells: Should be 1.
-
-Examples:
-
- perictrl: peripheral-controller@8a20000 {
- compatible = "hisilicon,hi3798cv200-perictrl", "syscon",
- "simple-mfd";
- reg = <0x8a20000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
- };
-
------------------------------------------------------------------------
-Hisilicon Hi6220 system controller
-
-Required properties:
-- compatible : "hisilicon,hi6220-sysctrl"
-- reg : Register address and size
-- #clock-cells: should be set to 1, many clock registers are defined
- under this controller and this property must be present.
-
-Hisilicon designs this controller as one of the system controllers,
-its main functions are the same as Hisilicon system controller, but
-the register offset of some core modules are different.
-
-Example:
- /*for Hi6220*/
- sys_ctrl: sys_ctrl@f7030000 {
- compatible = "hisilicon,hi6220-sysctrl", "syscon";
- reg = <0x0 0xf7030000 0x0 0x2000>;
- #clock-cells = <1>;
- };
-
-
-Hisilicon Hi6220 Power Always ON domain controller
-
-Required properties:
-- compatible : "hisilicon,hi6220-aoctrl"
-- reg : Register address and size
-- #clock-cells: should be set to 1, many clock registers are defined
- under this controller and this property must be present.
-
-Hisilicon designs this system controller to control the power always
-on domain for mobile platform.
-
-Example:
- /*for Hi6220*/
- ao_ctrl: ao_ctrl@f7800000 {
- compatible = "hisilicon,hi6220-aoctrl", "syscon";
- reg = <0x0 0xf7800000 0x0 0x2000>;
- #clock-cells = <1>;
- };
-
-
-Hisilicon Hi6220 Media domain controller
-
-Required properties:
-- compatible : "hisilicon,hi6220-mediactrl"
-- reg : Register address and size
-- #clock-cells: should be set to 1, many clock registers are defined
- under this controller and this property must be present.
-
-Hisilicon designs this system controller to control the multimedia
-domain(e.g. codec, G3D ...) for mobile platform.
-
-Example:
- /*for Hi6220*/
- media_ctrl: media_ctrl@f4410000 {
- compatible = "hisilicon,hi6220-mediactrl", "syscon";
- reg = <0x0 0xf4410000 0x0 0x1000>;
- #clock-cells = <1>;
- };
-
-
-Hisilicon Hi6220 Power Management domain controller
-
-Required properties:
-- compatible : "hisilicon,hi6220-pmctrl"
-- reg : Register address and size
-- #clock-cells: should be set to 1, some clock registers are define
- under this controller and this property must be present.
-
-Hisilicon designs this system controller to control the power management
-domain for mobile platform.
-
-Example:
- /*for Hi6220*/
- pm_ctrl: pm_ctrl@f7032000 {
- compatible = "hisilicon,hi6220-pmctrl", "syscon";
- reg = <0x0 0xf7032000 0x0 0x1000>;
- #clock-cells = <1>;
- };
-
-
-Hisilicon Hi6220 SRAM controller
-
-Required properties:
-- compatible : "hisilicon,hi6220-sramctrl", "syscon"
-- reg : Register address and size
-
-Hisilicon's SoCs use sram for multiple purpose; on Hi6220 there have several
-SRAM banks for power management, modem, security, etc. Further, use "syscon"
-managing the common sram which can be shared by multiple modules.
-
-Example:
- /*for Hi6220*/
- sram: sram@fff80000 {
- compatible = "hisilicon,hi6220-sramctrl", "syscon";
- reg = <0x0 0xfff80000 0x0 0x12000>;
- };
-
------------------------------------------------------------------------
-Hisilicon HiP01 system controller
-
-Required properties:
-- compatible : "hisilicon,hip01-sysctrl"
-- reg : Register address and size
-
-The HiP01 system controller is mostly compatible with hisilicon
-system controller,but it has some specific control registers for
-HIP01 SoC family, such as slave core boot, and also some same
-registers located at different offset.
-
-Example:
-
- /* for hip01-ca9x2 */
- sysctrl: system-controller@10000000 {
- compatible = "hisilicon,hip01-sysctrl", "hisilicon,sysctrl";
- reg = <0x10000000 0x1000>;
- reboot-offset = <0x4>;
- };
-
------------------------------------------------------------------------
-Hisilicon HiP05/HiP06 PCIe-SAS sub system controller
-
-Required properties:
-- compatible : "hisilicon,pcie-sas-subctrl", "syscon";
-- reg : Register address and size
-
-The PCIe-SAS sub system controller is shared by PCIe and SAS controllers in
-HiP05 or HiP06 Soc to implement some basic configurations.
-
-Example:
- /* for HiP05 PCIe-SAS sub system */
- pcie_sas: system_controller@b0000000 {
- compatible = "hisilicon,pcie-sas-subctrl", "syscon";
- reg = <0xb0000000 0x10000>;
- };
-
-Hisilicon HiP05/HiP06 PERI sub system controller
-
-Required properties:
-- compatible : "hisilicon,peri-subctrl", "syscon";
-- reg : Register address and size
-
-The PERI sub system controller is shared by peripheral controllers in
-HiP05 or HiP06 Soc to implement some basic configurations. The peripheral
-controllers include mdio, ddr, iic, uart, timer and so on.
-
-Example:
- /* for HiP05 sub peri system */
- peri_c_subctrl: syscon@80000000 {
- compatible = "hisilicon,peri-subctrl", "syscon";
- reg = <0x0 0x80000000 0x0 0x10000>;
- };
-
-Hisilicon HiP05/HiP06 DSA sub system controller
-
-Required properties:
-- compatible : "hisilicon,dsa-subctrl", "syscon";
-- reg : Register address and size
-
-The DSA sub system controller is shared by peripheral controllers in
-HiP05 or HiP06 Soc to implement some basic configurations.
-
-Example:
- /* for HiP05 dsa sub system */
- pcie_sas: system_controller@a0000000 {
- compatible = "hisilicon,dsa-subctrl", "syscon";
- reg = <0xa0000000 0x10000>;
- };
-
------------------------------------------------------------------------
-Hisilicon CPU controller
-
-Required properties:
-- compatible : "hisilicon,cpuctrl"
-- reg : Register address and size
-
-The clock registers and power registers of secondary cores are defined
-in CPU controller, especially in HIX5HD2 SoC.
-
------------------------------------------------------------------------
-PCTRL: Peripheral misc control register
-
-Required Properties:
-- compatible: "hisilicon,pctrl"
-- reg: Address and size of pctrl.
-
-Example:
-
- /* for Hi3620 */
- pctrl: pctrl@fca09000 {
- compatible = "hisilicon,pctrl";
- reg = <0xfca09000 0x1000>;
- };
-
------------------------------------------------------------------------
-Fabric:
-
-Required Properties:
-- compatible: "hisilicon,hip04-fabric";
-- reg: Address and size of Fabric
-
------------------------------------------------------------------------
-Bootwrapper boot method (software protocol on SMP):
-
-Required Properties:
-- compatible: "hisilicon,hip04-bootwrapper";
-- boot-method: Address and size of boot method.
- [0]: bootwrapper physical address
- [1]: bootwrapper size
- [2]: relocation physical address
- [3]: relocation size
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
new file mode 100644
index 000000000000..540876322040
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/hisilicon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon Platforms
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+properties:
+ $nodename:
+ const: '/'
+
+ compatible:
+ oneOf:
+ - description: Hi3660 based boards.
+ items:
+ - const: hisilicon,hi3660-hikey960
+ - const: hisilicon,hi3660
+
+ - description: Hi3670 based boards.
+ items:
+ - const: hisilicon,hi3670-hikey970
+ - const: hisilicon,hi3670
+
+ - description: Hi3798cv200 based boards.
+ items:
+ - const: hisilicon,hi3798cv200-poplar
+ - const: hisilicon,hi3798cv200
+
+ - description: Hi4511 Board
+ items:
+ - const: hisilicon,hi3620-hi4511
+
+ - description: Hi6220 based boards.
+ items:
+ - const: hisilicon,hi6220-hikey
+ - const: hisilicon,hi6220
+
+ - description: HiP01 based boards.
+ items:
+ - const: hisilicon,hip01-ca9x2
+ - const: hisilicon,hip01
+
+ - description: HiP04 D01 Board
+ items:
+ - const: hisilicon,hip04-d01
+
+ - description: HiP05 D02 Board
+ items:
+ - const: hisilicon,hip05-d02
+
+ - description: HiP06 D03 Board
+ items:
+ - const: hisilicon,hip06-d03
+
+ - description: HiP07 D05 Board
+ items:
+ - const: hisilicon,hip07-d05
+
+ - description: SD5203 based boards
+ items:
+ - const: H836ASDJ
+ - const: hisilicon,sd5203
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml b/Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml
new file mode 100644
index 000000000000..3b36e683bb15
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hisilicon/low-pin-count.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon HiP06 Low Pin Count device
+
+maintainers:
+ - Wei Xu <xuwei5@hisilicon.com>
+
+description: |
+ Hisilicon HiP06 SoCs implement a Low Pin Count (LPC) controller, which
+ provides I/O access to some legacy ISA devices.
+ HiP06 is based on arm64 architecture where there is no I/O space. So, the
+ I/O ports here are not CPU addresses, and there is no 'ranges' property in
+ LPC device node.
+
+properties:
+ $nodename:
+ pattern: '^isa@[0-9a-f]+$'
+ description: |
+ The node name before '@' must be "isa" to represent the binding stick
+ to the ISA/EISA binding specification.
+
+ compatible:
+ enum:
+ - hisilicon,hip06-lpc
+ - hisilicon,hip07-lpc
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 2
+
+ '#size-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ isa@a01b0000 {
+ compatible = "hisilicon,hip06-lpc";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ reg = <0xa01b0000 0x1000>;
+
+ ipmi0: bt@e4 {
+ compatible = "ipmi-bt";
+ device_type = "ipmi";
+ reg = <0x01 0xe4 0x04>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/hpe,gxp.yaml b/Documentation/devicetree/bindings/arm/hpe,gxp.yaml
new file mode 100644
index 000000000000..224bbcb93f95
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hpe,gxp.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/hpe,gxp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: HPE BMC GXP platforms
+
+maintainers:
+ - Nick Hawkins <nick.hawkins@hpe.com>
+ - Jean-Marie Verdun <verdun@hpe.com>
+
+properties:
+ compatible:
+ oneOf:
+ - description: GXP Based Boards
+ items:
+ - enum:
+ - hpe,gxp-dl360gen10
+ - const: hpe,gxp
+
+required:
+ - compatible
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/i2se.txt b/Documentation/devicetree/bindings/arm/i2se.txt
deleted file mode 100644
index dbd54a3aa07d..000000000000
--- a/Documentation/devicetree/bindings/arm/i2se.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-I2SE Device Tree Bindings
--------------------------
-
-Duckbill Board
-Required root node properties:
- - compatible = "i2se,duckbill", "fsl,imx28";
-
-Duckbill 2 Board
-Required root node properties:
- - compatible = "i2se,duckbill-2", "fsl,imx28";
-
-Duckbill 2 485 Board
-Required root node properties:
- - compatible = "i2se,duckbill-2-485", "i2se,duckbill-2", "fsl,imx28";
-
-Duckbill 2 EnOcean Board
-Required root node properties:
- - compatible = "i2se,duckbill-2-enocean", "i2se,duckbill-2", "fsl,imx28";
-
-Duckbill 2 SPI Board
-Required root node properties:
- - compatible = "i2se,duckbill-2-spi", "i2se,duckbill-2", "fsl,imx28";
diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
deleted file mode 100644
index 2c73847499ab..000000000000
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ /dev/null
@@ -1,699 +0,0 @@
-==========================================
-ARM idle states binding description
-==========================================
-
-==========================================
-1 - Introduction
-==========================================
-
-ARM systems contain HW capable of managing power consumption dynamically,
-where cores can be put in different low-power states (ranging from simple
-wfi to power gating) according to OS PM policies. The CPU states representing
-the range of dynamic idle states that a processor can enter at run-time, can be
-specified through device tree bindings representing the parameters required
-to enter/exit specific idle states on a given processor.
-
-According to the Server Base System Architecture document (SBSA, [3]), the
-power states an ARM CPU can be put into are identified by the following list:
-
-- Running
-- Idle_standby
-- Idle_retention
-- Sleep
-- Off
-
-The power states described in the SBSA document define the basic CPU states on
-top of which ARM platforms implement power management schemes that allow an OS
-PM implementation to put the processor in different idle states (which include
-states listed above; "off" state is not an idle state since it does not have
-wake-up capabilities, hence it is not considered in this document).
-
-Idle state parameters (eg entry latency) are platform specific and need to be
-characterized with bindings that provide the required information to OS PM
-code so that it can build the required tables and use them at runtime.
-
-The device tree binding definition for ARM idle states is the subject of this
-document.
-
-===========================================
-2 - idle-states definitions
-===========================================
-
-Idle states are characterized for a specific system through a set of
-timing and energy related properties, that underline the HW behaviour
-triggered upon idle states entry and exit.
-
-The following diagram depicts the CPU execution phases and related timing
-properties required to enter and exit an idle state:
-
-..__[EXEC]__|__[PREP]__|__[ENTRY]__|__[IDLE]__|__[EXIT]__|__[EXEC]__..
- | | | | |
-
- |<------ entry ------->|
- | latency |
- |<- exit ->|
- | latency |
- |<-------- min-residency -------->|
- |<------- wakeup-latency ------->|
-
- Diagram 1: CPU idle state execution phases
-
-EXEC: Normal CPU execution.
-
-PREP: Preparation phase before committing the hardware to idle mode
- like cache flushing. This is abortable on pending wake-up
- event conditions. The abort latency is assumed to be negligible
- (i.e. less than the ENTRY + EXIT duration). If aborted, CPU
- goes back to EXEC. This phase is optional. If not abortable,
- this should be included in the ENTRY phase instead.
-
-ENTRY: The hardware is committed to idle mode. This period must run
- to completion up to IDLE before anything else can happen.
-
-IDLE: This is the actual energy-saving idle period. This may last
- between 0 and infinite time, until a wake-up event occurs.
-
-EXIT: Period during which the CPU is brought back to operational
- mode (EXEC).
-
-entry-latency: Worst case latency required to enter the idle state. The
-exit-latency may be guaranteed only after entry-latency has passed.
-
-min-residency: Minimum period, including preparation and entry, for a given
-idle state to be worthwhile energywise.
-
-wakeup-latency: Maximum delay between the signaling of a wake-up event and the
-CPU being able to execute normal code again. If not specified, this is assumed
-to be entry-latency + exit-latency.
-
-These timing parameters can be used by an OS in different circumstances.
-
-An idle CPU requires the expected min-residency time to select the most
-appropriate idle state based on the expected expiry time of the next IRQ
-(ie wake-up) that causes the CPU to return to the EXEC phase.
-
-An operating system scheduler may need to compute the shortest wake-up delay
-for CPUs in the system by detecting how long will it take to get a CPU out
-of an idle state, eg:
-
-wakeup-delay = exit-latency + max(entry-latency - (now - entry-timestamp), 0)
-
-In other words, the scheduler can make its scheduling decision by selecting
-(eg waking-up) the CPU with the shortest wake-up latency.
-The wake-up latency must take into account the entry latency if that period
-has not expired. The abortable nature of the PREP period can be ignored
-if it cannot be relied upon (e.g. the PREP deadline may occur much sooner than
-the worst case since it depends on the CPU operating conditions, ie caches
-state).
-
-An OS has to reliably probe the wakeup-latency since some devices can enforce
-latency constraints guarantees to work properly, so the OS has to detect the
-worst case wake-up latency it can incur if a CPU is allowed to enter an
-idle state, and possibly to prevent that to guarantee reliable device
-functioning.
-
-The min-residency time parameter deserves further explanation since it is
-expressed in time units but must factor in energy consumption coefficients.
-
-The energy consumption of a cpu when it enters a power state can be roughly
-characterised by the following graph:
-
- |
- |
- |
- e |
- n | /---
- e | /------
- r | /------
- g | /-----
- y | /------
- | ----
- | /|
- | / |
- | / |
- | / |
- | / |
- | / |
- |/ |
- -----|-------+----------------------------------
- 0| 1 time(ms)
-
- Graph 1: Energy vs time example
-
-The graph is split in two parts delimited by time 1ms on the X-axis.
-The graph curve with X-axis values = { x | 0 < x < 1ms } has a steep slope
-and denotes the energy costs incurred whilst entering and leaving the idle
-state.
-The graph curve in the area delimited by X-axis values = {x | x > 1ms } has
-shallower slope and essentially represents the energy consumption of the idle
-state.
-
-min-residency is defined for a given idle state as the minimum expected
-residency time for a state (inclusive of preparation and entry) after
-which choosing that state become the most energy efficient option. A good
-way to visualise this, is by taking the same graph above and comparing some
-states energy consumptions plots.
-
-For sake of simplicity, let's consider a system with two idle states IDLE1,
-and IDLE2:
-
- |
- |
- |
- | /-- IDLE1
- e | /---
- n | /----
- e | /---
- r | /-----/--------- IDLE2
- g | /-------/---------
- y | ------------ /---|
- | / /---- |
- | / /--- |
- | / /---- |
- | / /--- |
- | --- |
- | / |
- | / |
- |/ | time
- ---/----------------------------+------------------------
- |IDLE1-energy < IDLE2-energy | IDLE2-energy < IDLE1-energy
- |
- IDLE2-min-residency
-
- Graph 2: idle states min-residency example
-
-In graph 2 above, that takes into account idle states entry/exit energy
-costs, it is clear that if the idle state residency time (ie time till next
-wake-up IRQ) is less than IDLE2-min-residency, IDLE1 is the better idle state
-choice energywise.
-
-This is mainly down to the fact that IDLE1 entry/exit energy costs are lower
-than IDLE2.
-
-However, the lower power consumption (ie shallower energy curve slope) of idle
-state IDLE2 implies that after a suitable time, IDLE2 becomes more energy
-efficient.
-
-The time at which IDLE2 becomes more energy efficient than IDLE1 (and other
-shallower states in a system with multiple idle states) is defined
-IDLE2-min-residency and corresponds to the time when energy consumption of
-IDLE1 and IDLE2 states breaks even.
-
-The definitions provided in this section underpin the idle states
-properties specification that is the subject of the following sections.
-
-===========================================
-3 - idle-states node
-===========================================
-
-ARM processor idle states are defined within the idle-states node, which is
-a direct child of the cpus node [1] and provides a container where the
-processor idle states, defined as device tree nodes, are listed.
-
-- idle-states node
-
- Usage: Optional - On ARM systems, it is a container of processor idle
- states nodes. If the system does not provide CPU
- power management capabilities or the processor just
- supports idle_standby an idle-states node is not
- required.
-
- Description: idle-states node is a container node, where its
- subnodes describe the CPU idle states.
-
- Node name must be "idle-states".
-
- The idle-states node's parent node must be the cpus node.
-
- The idle-states node's child nodes can be:
-
- - one or more state nodes
-
- Any other configuration is considered invalid.
-
- An idle-states node defines the following properties:
-
- - entry-method
- Value type: <stringlist>
- Usage and definition depend on ARM architecture version.
- # On ARM v8 64-bit this property is required and must
- be:
- - "psci"
- # On ARM 32-bit systems this property is optional
-
-The nodes describing the idle states (state) can only be defined within the
-idle-states node, any other configuration is considered invalid and therefore
-must be ignored.
-
-===========================================
-4 - state node
-===========================================
-
-A state node represents an idle state description and must be defined as
-follows:
-
-- state node
-
- Description: must be child of the idle-states node
-
- The state node name shall follow standard device tree naming
- rules ([5], 2.2.1 "Node names"), in particular state nodes which
- are siblings within a single common parent must be given a unique name.
-
- The idle state entered by executing the wfi instruction (idle_standby
- SBSA,[3][4]) is considered standard on all ARM platforms and therefore
- must not be listed.
-
- With the definitions provided above, the following list represents
- the valid properties for a state node:
-
- - compatible
- Usage: Required
- Value type: <stringlist>
- Definition: Must be "arm,idle-state".
-
- - local-timer-stop
- Usage: See definition
- Value type: <none>
- Definition: if present the CPU local timer control logic is
- lost on state entry, otherwise it is retained.
-
- - entry-latency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing worst case latency in
- microseconds required to enter the idle state.
- The exit-latency-us duration may be guaranteed
- only after entry-latency-us has passed.
-
- - exit-latency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing worst case latency
- in microseconds required to exit the idle state.
-
- - min-residency-us
- Usage: Required
- Value type: <prop-encoded-array>
- Definition: u32 value representing minimum residency duration
- in microseconds, inclusive of preparation and
- entry, for this idle state to be considered
- worthwhile energy wise (refer to section 2 of
- this document for a complete description).
-
- - wakeup-latency-us:
- Usage: Optional
- Value type: <prop-encoded-array>
- Definition: u32 value representing maximum delay between the
- signaling of a wake-up event and the CPU being
- able to execute normal code again. If omitted,
- this is assumed to be equal to:
-
- entry-latency-us + exit-latency-us
-
- It is important to supply this value on systems
- where the duration of PREP phase (see diagram 1,
- section 2) is non-neglibigle.
- In such systems entry-latency-us + exit-latency-us
- will exceed wakeup-latency-us by this duration.
-
- - status:
- Usage: Optional
- Value type: <string>
- Definition: A standard device tree property [5] that indicates
- the operational status of an idle-state.
- If present, it shall be:
- "okay": to indicate that the idle state is
- operational.
- "disabled": to indicate that the idle state has
- been disabled in firmware so it is not
- operational.
- If the property is not present the idle-state must
- be considered operational.
-
- - idle-state-name:
- Usage: Optional
- Value type: <string>
- Definition: A string used as a descriptive name for the idle
- state.
-
- In addition to the properties listed above, a state node may require
- additional properties specifics to the entry-method defined in the
- idle-states node, please refer to the entry-method bindings
- documentation for properties definitions.
-
-===========================================
-4 - Examples
-===========================================
-
-Example 1 (ARM 64-bit, 16-cpu system, PSCI enable-method):
-
-cpus {
- #size-cells = <0>;
- #address-cells = <2>;
-
- CPU0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x0>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x1>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU2: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x100>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU3: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x101>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU4: cpu@10000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10000>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU5: cpu@10001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10001>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU6: cpu@10100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10100>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU7: cpu@10101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10101>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
- &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU8: cpu@100000000 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x0>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU9: cpu@100000001 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x1>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU10: cpu@100000100 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x100>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU11: cpu@100000101 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x101>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU12: cpu@100010000 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x10000>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU13: cpu@100010001 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x10001>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU14: cpu@100010100 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x10100>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- CPU15: cpu@100010101 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x1 0x10101>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
- &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
- };
-
- idle-states {
- entry-method = "psci";
-
- CPU_RETENTION_0_0: cpu-retention-0-0 {
- compatible = "arm,idle-state";
- arm,psci-suspend-param = <0x0010000>;
- entry-latency-us = <20>;
- exit-latency-us = <40>;
- min-residency-us = <80>;
- };
-
- CLUSTER_RETENTION_0: cluster-retention-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x1010000>;
- entry-latency-us = <50>;
- exit-latency-us = <100>;
- min-residency-us = <250>;
- wakeup-latency-us = <130>;
- };
-
- CPU_SLEEP_0_0: cpu-sleep-0-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x0010000>;
- entry-latency-us = <250>;
- exit-latency-us = <500>;
- min-residency-us = <950>;
- };
-
- CLUSTER_SLEEP_0: cluster-sleep-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x1010000>;
- entry-latency-us = <600>;
- exit-latency-us = <1100>;
- min-residency-us = <2700>;
- wakeup-latency-us = <1500>;
- };
-
- CPU_RETENTION_1_0: cpu-retention-1-0 {
- compatible = "arm,idle-state";
- arm,psci-suspend-param = <0x0010000>;
- entry-latency-us = <20>;
- exit-latency-us = <40>;
- min-residency-us = <90>;
- };
-
- CLUSTER_RETENTION_1: cluster-retention-1 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x1010000>;
- entry-latency-us = <50>;
- exit-latency-us = <100>;
- min-residency-us = <270>;
- wakeup-latency-us = <100>;
- };
-
- CPU_SLEEP_1_0: cpu-sleep-1-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x0010000>;
- entry-latency-us = <70>;
- exit-latency-us = <100>;
- min-residency-us = <300>;
- wakeup-latency-us = <150>;
- };
-
- CLUSTER_SLEEP_1: cluster-sleep-1 {
- compatible = "arm,idle-state";
- local-timer-stop;
- arm,psci-suspend-param = <0x1010000>;
- entry-latency-us = <500>;
- exit-latency-us = <1200>;
- min-residency-us = <3500>;
- wakeup-latency-us = <1300>;
- };
- };
-
-};
-
-Example 2 (ARM 32-bit, 8-cpu system, two clusters):
-
-cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- CPU0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x0>;
- cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x1>;
- cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU2: cpu@2 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x2>;
- cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU3: cpu@3 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x3>;
- cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
- };
-
- CPU4: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x100>;
- cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
- };
-
- CPU5: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x101>;
- cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
- };
-
- CPU6: cpu@102 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x102>;
- cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
- };
-
- CPU7: cpu@103 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x103>;
- cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
- };
-
- idle-states {
- CPU_SLEEP_0_0: cpu-sleep-0-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- entry-latency-us = <200>;
- exit-latency-us = <100>;
- min-residency-us = <400>;
- wakeup-latency-us = <250>;
- };
-
- CLUSTER_SLEEP_0: cluster-sleep-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- entry-latency-us = <500>;
- exit-latency-us = <1500>;
- min-residency-us = <2500>;
- wakeup-latency-us = <1700>;
- };
-
- CPU_SLEEP_1_0: cpu-sleep-1-0 {
- compatible = "arm,idle-state";
- local-timer-stop;
- entry-latency-us = <300>;
- exit-latency-us = <500>;
- min-residency-us = <900>;
- wakeup-latency-us = <600>;
- };
-
- CLUSTER_SLEEP_1: cluster-sleep-1 {
- compatible = "arm,idle-state";
- local-timer-stop;
- entry-latency-us = <800>;
- exit-latency-us = <2000>;
- min-residency-us = <6500>;
- wakeup-latency-us = <2300>;
- };
- };
-
-};
-
-===========================================
-5 - References
-===========================================
-
-[1] ARM Linux Kernel documentation - CPUs bindings
- Documentation/devicetree/bindings/arm/cpus.txt
-
-[2] ARM Linux Kernel documentation - PSCI bindings
- Documentation/devicetree/bindings/arm/psci.txt
-
-[3] ARM Server Base System Architecture (SBSA)
- http://infocenter.arm.com/help/index.jsp
-
-[4] ARM Architecture Reference Manuals
- http://infocenter.arm.com/help/index.jsp
-
-[5] Devicetree Specification
- https://www.devicetree.org/specifications/
diff --git a/Documentation/devicetree/bindings/arm/intel,keembay.yaml b/Documentation/devicetree/bindings/arm/intel,keembay.yaml
new file mode 100644
index 000000000000..53d2ce02b207
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/intel,keembay.yaml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/intel,keembay.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Keem Bay platform
+
+maintainers:
+ - Paul J. Murphy <paul.j.murphy@intel.com>
+ - Daniele Alessandrelli <daniele.alessandrelli@intel.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - intel,keembay-evm
+ - const: intel,keembay
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/intel,socfpga.yaml b/Documentation/devicetree/bindings/arm/intel,socfpga.yaml
new file mode 100644
index 000000000000..c75cd7d29f1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/intel,socfpga.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/intel,socfpga.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel SoCFPGA platform
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - description: AgileX boards
+ items:
+ - enum:
+ - intel,n5x-socdk
+ - intel,socfpga-agilex-n6000
+ - intel,socfpga-agilex-socdk
+ - const: intel,socfpga-agilex
+ - description: Agilex5 boards
+ items:
+ - enum:
+ - intel,socfpga-agilex5-socdk
+ - intel,socfpga-agilex5-socdk-nand
+ - const: intel,socfpga-agilex5
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
new file mode 100644
index 000000000000..b7b430896596
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/intel-ixp4xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel IXP4xx
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - actiontec,mi424wr-ac
+ - actiontec,mi424wr-d
+ - adieng,coyote
+ - arcom,vulcan
+ - dlink,dsm-g600-a
+ - freecom,fsg-3
+ - gateway,7001
+ - gateworks,gw2348
+ - goramo,multilink-router
+ - intel,ixdp425
+ - intel,ixdpg425
+ - iom,nas-100d
+ - linksys,nslu2
+ - netgear,wg302v1
+ - netgear,wg302v2
+ - usr,8200
+ - welltech,epbx100
+ - linksys,wrv54g
+ - gemtek,gtwx5715
+ - const: intel,ixp42x
+ - items:
+ - enum:
+ - gateworks,gw2358
+ - intel,kixrp435
+ - const: intel,ixp43x
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/juno,scpi.txt b/Documentation/devicetree/bindings/arm/juno,scpi.txt
deleted file mode 100644
index 2ace8696bbee..000000000000
--- a/Documentation/devicetree/bindings/arm/juno,scpi.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-System Control and Power Interface (SCPI) Message Protocol
-(in addition to the standard binding in [0])
-
-Juno SRAM and Shared Memory for SCPI
-------------------------------------
-
-Required properties:
-- compatible : should be "arm,juno-sram-ns" for Non-secure SRAM
-
-Each sub-node represents the reserved area for SCPI.
-
-Required sub-node properties:
-- reg : The base offset and size of the reserved area with the SRAM
-- compatible : should be "arm,juno-scp-shmem" for Non-secure SRAM based
- shared memory on Juno platforms
-
-Sensor bindings for the sensors based on SCPI Message Protocol
---------------------------------------------------------------
-Required properties:
-- compatible : should be "arm,scpi-sensors".
-- #thermal-sensor-cells: should be set to 1.
- For Juno R0 and Juno R1 refer to [1] for the
- sensor identifiers
-
-[0] Documentation/devicetree/bindings/arm/arm,scpi.txt
-[1] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0922b/apas03s22.html
diff --git a/Documentation/devicetree/bindings/arm/keystone/keystone.txt b/Documentation/devicetree/bindings/arm/keystone/keystone.txt
deleted file mode 100644
index f310bad04483..000000000000
--- a/Documentation/devicetree/bindings/arm/keystone/keystone.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-TI Keystone Platforms Device Tree Bindings
------------------------------------------------
-
-Boards with Keystone2 based devices (TCI66xxK2H) SOC shall have the
-following properties.
-
-Required properties:
- - compatible: All TI specific devices present in Keystone SOC should be in
- the form "ti,keystone-*". Generic devices like gic, arch_timers, ns16550
- type UART should use the specified compatible for those devices.
-
-SoC families:
-
-- Keystone 2 generic SoC:
- compatible = "ti,keystone"
-
-SoCs:
-
-- Keystone 2 Hawking/Kepler
- compatible = "ti,k2hk", "ti,keystone"
-- Keystone 2 Lamarr
- compatible = "ti,k2l", "ti,keystone"
-- Keystone 2 Edison
- compatible = "ti,k2e", "ti,keystone"
-- K2G
- compatible = "ti,k2g", "ti,keystone"
-
-Boards:
-- Keystone 2 Hawking/Kepler EVM
- compatible = "ti,k2hk-evm", "ti,k2hk", "ti,keystone"
-
-- Keystone 2 Lamarr EVM
- compatible = "ti,k2l-evm", "ti, k2l", "ti,keystone"
-
-- Keystone 2 Edison EVM
- compatible = "ti,k2e-evm", "ti,k2e", "ti,keystone"
-
-- K2G EVM
- compatible = "ti,k2g-evm", "ti,k2g", "ti-keystone"
-
-- K2G Industrial Communication Engine EVM
- compatible = "ti,k2g-ice", "ti,k2g", "ti-keystone"
diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
new file mode 100644
index 000000000000..4a323e8c785d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/keystone/ti,k3-sci-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common K3 TI-SCI
+
+maintainers:
+ - Nishanth Menon <nm@ti.com>
+
+description: |
+ The TI K3 family of SoCs usually have a central System Controller Processor
+ that is responsible for managing various SoC-level resources like clocks,
+ resets, interrupts etc. The communication with that processor is performed
+ through the TI-SCI protocol.
+
+ Each specific device management node like a clock controller node, a reset
+ controller node or an interrupt-controller node should define a common set
+ of properties that enables them to implement the corresponding functionality
+ over the TI-SCI protocol. The following are some of the common properties
+ needed by such individual nodes. The required properties for each device
+ management node is defined in the respective binding.
+
+properties:
+ ti,sci:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Should be a phandle to the TI-SCI System Controller node
+
+ ti,sci-dev-id:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Should contain the TI-SCI device id corresponding to the device. Please
+ refer to the corresponding System Controller documentation for valid
+ values for the desired device.
+
+ ti,sci-proc-ids:
+ description: Should contain a single tuple of <proc_id host_id>.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: TI-SCI processor id for the remote processor device
+ - description: TI-SCI host id to which processor control ownership
+ should be transferred to
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,sci.txt b/Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
deleted file mode 100644
index b56a02c10ae6..000000000000
--- a/Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-Texas Instruments System Control Interface (TI-SCI) Message Protocol
---------------------------------------------------------------------
-
-Texas Instrument's processors including those belonging to Keystone generation
-of processors have separate hardware entity which is now responsible for the
-management of the System on Chip (SoC) system. These include various system
-level functions as well.
-
-An example of such an SoC is K2G, which contains the system control hardware
-block called Power Management Micro Controller (PMMC). This hardware block is
-initialized early into boot process and provides services to Operating Systems
-on multiple processors including ones running Linux.
-
-See http://processors.wiki.ti.com/index.php/TISCI for protocol definition.
-
-TI-SCI controller Device Node:
-=============================
-
-The TI-SCI node describes the Texas Instrument's System Controller entity node.
-This parent node may optionally have additional children nodes which describe
-specific functionality such as clocks, power domain, reset or additional
-functionality as may be required for the SoC. This hierarchy also describes the
-relationship between the TI-SCI parent node to the child node.
-
-Required properties:
--------------------
-- compatible: should be "ti,k2g-sci"
-- mbox-names:
- "rx" - Mailbox corresponding to receive path
- "tx" - Mailbox corresponding to transmit path
-
-- mboxes: Mailboxes corresponding to the mbox-names. Each value of the mboxes
- property should contain a phandle to the mailbox controller device
- node and an args specifier that will be the phandle to the intended
- sub-mailbox child node to be used for communication.
-
-See Documentation/devicetree/bindings/mailbox/mailbox.txt for more details
-about the generic mailbox controller and client driver bindings. Also see
-Documentation/devicetree/bindings/mailbox/ti,message-manager.txt for typical
-controller that is used to communicate with this System controllers.
-
-Optional Properties:
--------------------
-- reg-names:
- debug_messages - Map the Debug message region
-- reg: register space corresponding to the debug_messages
-- ti,system-reboot-controller: If system reboot can be triggered by SoC reboot
-- ti,host-id: Integer value corresponding to the host ID assigned by Firmware
- for identification of host processing entities such as virtual
- machines
-
-Example (K2G):
--------------
- pmmc: pmmc {
- compatible = "ti,k2g-sci";
- ti,host-id = <2>;
- mbox-names = "rx", "tx";
- mboxes= <&msgmgr &msgmgr_proxy_pmmc_rx>,
- <&msgmgr &msgmgr_proxy_pmmc_tx>;
- reg-names = "debug_messages";
- reg = <0x02921800 0x800>;
- };
-
-
-TI-SCI Client Device Node:
-=========================
-
-Client nodes are maintained as children of the relevant TI-SCI device node.
-
-Example (K2G):
--------------
- pmmc: pmmc {
- compatible = "ti,k2g-sci";
- ...
-
- my_clk_node: clk_node {
- ...
- ...
- };
-
- my_pd_node: pd_node {
- ...
- ...
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml
new file mode 100644
index 000000000000..25a2b42105e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/keystone/ti,sci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI-SCI controller
+
+maintainers:
+ - Nishanth Menon <nm@ti.com>
+
+description: |
+ Texas Instrument's processors including those belonging to Keystone generation
+ of processors have separate hardware entity which is now responsible for the
+ management of the System on Chip (SoC) system. These include various system
+ level functions as well.
+
+ An example of such an SoC is K2G, which contains the system control hardware
+ block called Power Management Micro Controller (PMMC). This hardware block is
+ initialized early into boot process and provides services to Operating Systems
+ on multiple processors including ones running Linux.
+
+ See https://software-dl.ti.com/tisci/esd/latest/index.html for protocol definition.
+
+ The TI-SCI node describes the Texas Instrument's System Controller entity node.
+ This parent node may optionally have additional children nodes which describe
+ specific functionality such as clocks, power domain, reset or additional
+ functionality as may be required for the SoC. This hierarchy also describes the
+ relationship between the TI-SCI parent node to the child node.
+
+properties:
+ $nodename:
+ pattern: "^system-controller@[0-9a-f]+$"
+
+ compatible:
+ oneOf:
+ - description: System controller on TI 66AK2G SoC and other K3 SoCs
+ items:
+ - const: ti,k2g-sci
+ - description: System controller on TI AM654 SoC
+ items:
+ - const: ti,am654-sci
+
+ reg-names:
+ description: |
+ Specifies the debug messages memory mapped region that is optionally
+ made available from TI-SCI controller.
+ const: debug_messages
+
+ reg:
+ minItems: 1
+
+ mbox-names:
+ description: |
+ Specifies the mailboxes used to communicate with TI-SCI Controller
+ made available from TI-SCI controller.
+ items:
+ - const: rx
+ - const: tx
+
+ mboxes:
+ minItems: 2
+
+ ti,host-id:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Value corresponding to the host ID assigned by Firmware
+ for identification of host processing entities such as virtual machines.
+
+ power-controller:
+ type: object
+ $ref: /schemas/soc/ti/sci-pm-domain.yaml#
+
+ clock-controller:
+ type: object
+ $ref: /schemas/clock/ti,sci-clk.yaml#
+
+ reset-controller:
+ type: object
+ $ref: /schemas/reset/ti,sci-reset.yaml#
+
+required:
+ - compatible
+ - mbox-names
+ - mboxes
+
+additionalProperties: false
+
+examples:
+ - |
+ pmmc: system-controller@2921800 {
+ compatible = "ti,k2g-sci";
+ mbox-names = "rx", "tx";
+ mboxes = <&msgmgr 5 2>,
+ <&msgmgr 0 0>;
+ reg-names = "debug_messages";
+ reg = <0x02921800 0x800>;
+ };
+
+ - |
+ dmsc: system-controller@44083000 {
+ compatible = "ti,k2g-sci";
+ ti,host-id = <12>;
+ mbox-names = "rx", "tx";
+ mboxes = <&secure_proxy_main 11>,
+ <&secure_proxy_main 13>;
+ reg-names = "debug_messages";
+ reg = <0x44083000 0x1000>;
+
+ k3_pds: power-controller {
+ compatible = "ti,sci-pm-domain";
+ #power-domain-cells = <2>;
+ };
+
+ k3_clks: clock-controller {
+ compatible = "ti,k2g-sci-clk";
+ #clock-cells = <2>;
+ };
+
+ k3_reset: reset-controller {
+ compatible = "ti,sci-reset";
+ #reset-cells = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt
deleted file mode 100644
index fbe6cb21f4cf..000000000000
--- a/Documentation/devicetree/bindings/arm/l2c2x0.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-* ARM L2 Cache Controller
-
-ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/PL220/
-PL310 and variants) based level 2 cache controller. All these various implementations
-of the L2 cache controller have compatible programming models (Note 1).
-Some of the properties that are just prefixed "cache-*" are taken from section
-3.7.3 of the Devicetree Specification which can be found at:
-https://www.devicetree.org/specifications/
-
-The ARM L2 cache representation in the device tree should be done as follows:
-
-Required properties:
-
-- compatible : should be one of:
- "arm,pl310-cache"
- "arm,l220-cache"
- "arm,l210-cache"
- "bcm,bcm11351-a2-pl310-cache": DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
- "brcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
- offset needs to be added to the address before passing down to the L2
- cache controller
- "marvell,aurora-system-cache": Marvell Controller designed to be
- compatible with the ARM one, with system cache mode (meaning
- maintenance operations on L1 are broadcasted to the L2 and L2
- performs the same operation).
- "marvell,aurora-outer-cache": Marvell Controller designed to be
- compatible with the ARM one with outer cache mode.
- "marvell,tauros3-cache": Marvell Tauros3 cache controller, compatible
- with arm,pl310-cache controller.
-- cache-unified : Specifies the cache is a unified cache.
-- cache-level : Should be set to 2 for a level 2 cache.
-- reg : Physical base address and size of cache controller's memory mapped
- registers.
-
-Optional properties:
-
-- arm,data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of
- read, write and setup latencies. Minimum valid values are 1. Controllers
- without setup latency control should use a value of 0.
-- arm,tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
- read, write and setup latencies. Controllers without setup latency control
- should use 0. Controllers without separate read and write Tag RAM latency
- values should only use the first cell.
-- arm,dirty-latency : Cycles of latency for Dirty RAMs. This is a single cell.
-- arm,filter-ranges : <start length> Starting address and length of window to
- filter. Addresses in the filter window are directed to the M1 port. Other
- addresses will go to the M0 port.
-- arm,io-coherent : indicates that the system is operating in an hardware
- I/O coherent mode. Valid only when the arm,pl310-cache compatible
- string is used.
-- interrupts : 1 combined interrupt.
-- cache-size : specifies the size in bytes of the cache
-- cache-sets : specifies the number of associativity sets of the cache
-- cache-block-size : specifies the size in bytes of a cache block
-- cache-line-size : specifies the size in bytes of a line in the cache,
- if this is not specified, the line size is assumed to be equal to the
- cache block size
-- cache-id-part: cache id part number to be used if it is not present
- on hardware
-- wt-override: If present then L2 is forced to Write through mode
-- arm,double-linefill : Override double linefill enable setting. Enable if
- non-zero, disable if zero.
-- arm,double-linefill-incr : Override double linefill on INCR read. Enable
- if non-zero, disable if zero.
-- arm,double-linefill-wrap : Override double linefill on WRAP read. Enable
- if non-zero, disable if zero.
-- arm,prefetch-drop : Override prefetch drop enable setting. Enable if non-zero,
- disable if zero.
-- arm,prefetch-offset : Override prefetch offset value. Valid values are
- 0-7, 15, 23, and 31.
-- arm,shared-override : The default behavior of the L220 or PL310 cache
- controllers with respect to the shareable attribute is to transform "normal
- memory non-cacheable transactions" into "cacheable no allocate" (for reads)
- or "write through no write allocate" (for writes).
- On systems where this may cause DMA buffer corruption, this property must be
- specified to indicate that such transforms are precluded.
-- arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310).
-- arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310).
-- arm,outer-sync-disable : disable the outer sync operation on the L2 cache.
- Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
- will randomly hang unless outer sync operations are disabled.
-- prefetch-data : Data prefetch. Value: <0> (forcibly disable), <1>
- (forcibly enable), property absent (retain settings set by firmware)
-- prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable),
- <1> (forcibly enable), property absent (retain settings set by
- firmware)
-- arm,dynamic-clock-gating : L2 dynamic clock gating. Value: <0> (forcibly
- disable), <1> (forcibly enable), property absent (OS specific behavior,
- preferably retain firmware settings)
-- arm,standby-mode: L2 standby mode enable. Value <0> (forcibly disable),
- <1> (forcibly enable), property absent (OS specific behavior,
- preferably retain firmware settings)
-- arm,early-bresp-disable : Disable the CA9 optimization Early BRESP (PL310)
-- arm,full-line-zero-disable : Disable the CA9 optimization Full line of zero
- write (PL310)
-
-Example:
-
-L2: cache-controller {
- compatible = "arm,pl310-cache";
- reg = <0xfff12000 0x1000>;
- arm,data-latency = <1 1 1>;
- arm,tag-latency = <2 2 2>;
- arm,filter-ranges = <0x80000000 0x8000000>;
- cache-unified;
- cache-level = <2>;
- interrupts = <45>;
-};
-
-Note 1: The description in this document doesn't apply to integrated L2
- cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
- integrated L2 controllers are assumed to be all preconfigured by
- early secure boot code. Thus no need to deal with their configuration
- in the kernel at all.
diff --git a/Documentation/devicetree/bindings/arm/linux,dummy-virt.yaml b/Documentation/devicetree/bindings/arm/linux,dummy-virt.yaml
new file mode 100644
index 000000000000..c7c5eb48fc7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/linux,dummy-virt.yaml
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/linux,dummy-virt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: QEMU virt machine
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ const: linux,dummy-virt
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/marvell,berlin.yaml b/Documentation/devicetree/bindings/arm/marvell,berlin.yaml
new file mode 100644
index 000000000000..4e8442980dcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell,berlin.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell,berlin.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synaptics/Marvell Berlin SoC
+
+maintainers:
+ - Jisheng Zhang <jszhang@kernel.org>
+
+description:
+ According to https://www.synaptics.com/company/news/conexant-marvell
+ Synaptics has acquired the Multimedia Solutions Business of Marvell, so
+ Berlin SoCs are now Synaptics' SoCs.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - sony,nsz-gs7
+ - const: marvell,berlin2
+ - const: marvell,berlin
+ - items:
+ - enum:
+ - google,chromecast
+ - valve,steamlink
+ - const: marvell,berlin2cd
+ - const: marvell,berlin
+ - items:
+ - enum:
+ - marvell,berlin2q-dmp
+ - const: marvell,berlin2q
+ - const: marvell,berlin
+ - items:
+ - enum:
+ - marvell,berlin4ct-dmp
+ - marvell,berlin4ct-stb
+ - const: marvell,berlin4ct
+ - const: marvell,berlin
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/98dx3236.txt b/Documentation/devicetree/bindings/arm/marvell/98dx3236.txt
deleted file mode 100644
index 64e8c73fc5ab..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/98dx3236.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Marvell 98DX3236, 98DX3336 and 98DX4251 Platforms Device Tree Bindings
-----------------------------------------------------------------------
-
-Boards with a SoC of the Marvell 98DX3236, 98DX3336 and 98DX4251 families
-shall have the following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armadaxp-98dx3236"
-
-In addition, boards using the Marvell 98DX3336 SoC shall have the
-following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armadaxp-98dx3336"
-
-In addition, boards using the Marvell 98DX4251 SoC shall have the
-following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armadaxp-98dx4251"
diff --git a/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt b/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
deleted file mode 100644
index 3fd21bb7cb37..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-Marvell Armada AP806 System Controller
-======================================
-
-The AP806 is one of the two core HW blocks of the Marvell Armada 7K/8K
-SoCs. It contains system controllers, which provide several registers
-giving access to numerous features: clocks, pin-muxing and many other
-SoC configuration items. This DT binding allows to describe these
-system controllers.
-
-For the top level node:
- - compatible: must be: "syscon", "simple-mfd";
- - reg: register area of the AP806 system controller
-
-SYSTEM CONTROLLER 0
-===================
-
-Clocks:
--------
-
-
-The Device Tree node representing the AP806 system controller provides
-a number of clocks:
-
- - 0: clock of CPU cluster 0
- - 1: clock of CPU cluster 1
- - 2: fixed PLL at 1200 Mhz
- - 3: MSS clock, derived from the fixed PLL
-
-Required properties:
-
- - compatible: must be: "marvell,ap806-clock"
- - #clock-cells: must be set to 1
-
-Pinctrl:
---------
-
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt.
-
-Required properties:
-- compatible must be "marvell,ap806-pinctrl",
-
-Available mpp pins/groups and functions:
-Note: brackets (x) are not part of the mpp name for marvell,function and given
-only for more detailed description in this document.
-
-name pins functions
-================================================================================
-mpp0 0 gpio, sdio(clk), spi0(clk)
-mpp1 1 gpio, sdio(cmd), spi0(miso)
-mpp2 2 gpio, sdio(d0), spi0(mosi)
-mpp3 3 gpio, sdio(d1), spi0(cs0n)
-mpp4 4 gpio, sdio(d2), i2c0(sda)
-mpp5 5 gpio, sdio(d3), i2c0(sdk)
-mpp6 6 gpio, sdio(ds)
-mpp7 7 gpio, sdio(d4), uart1(rxd)
-mpp8 8 gpio, sdio(d5), uart1(txd)
-mpp9 9 gpio, sdio(d6), spi0(cs1n)
-mpp10 10 gpio, sdio(d7)
-mpp11 11 gpio, uart0(txd)
-mpp12 12 gpio, sdio(pw_off), sdio(hw_rst)
-mpp13 13 gpio
-mpp14 14 gpio
-mpp15 15 gpio
-mpp16 16 gpio
-mpp17 17 gpio
-mpp18 18 gpio
-mpp19 19 gpio, uart0(rxd), sdio(pw_off)
-
-GPIO:
------
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/gpio/gpio-mvebu.txt.
-
-Required properties:
-
-- compatible: "marvell,armada-8k-gpio"
-
-- offset: offset address inside the syscon block
-
-Example:
-ap_syscon: system-controller@6f4000 {
- compatible = "syscon", "simple-mfd";
- reg = <0x6f4000 0x1000>;
-
- ap_clk: clock {
- compatible = "marvell,ap806-clock";
- #clock-cells = <1>;
- };
-
- ap_pinctrl: pinctrl {
- compatible = "marvell,ap806-pinctrl";
- };
-
- ap_gpio: gpio {
- compatible = "marvell,armada-8k-gpio";
- offset = <0x1040>;
- ngpios = <19>;
- gpio-controller;
- #gpio-cells = <2>;
- gpio-ranges = <&ap_pinctrl 0 0 19>;
- };
-};
-
-SYSTEM CONTROLLER 1
-===================
-
-Thermal:
---------
-
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/thermal/thermal.txt
-
-The thermal IP can probe the temperature all around the processor. It
-may feature several channels, each of them wired to one sensor.
-
-Required properties:
-- compatible: must be one of:
- * marvell,armada-ap806-thermal
-- reg: register range associated with the thermal functions.
-
-Optional properties:
-- #thermal-sensor-cells: shall be <1> when thermal-zones subnodes refer
- to this IP and represents the channel ID. There is one sensor per
- channel. O refers to the thermal IP internal channel, while positive
- IDs refer to each CPU.
-
-Example:
-ap_syscon1: system-controller@6f8000 {
- compatible = "syscon", "simple-mfd";
- reg = <0x6f8000 0x1000>;
-
- ap_thermal: thermal-sensor@80 {
- compatible = "marvell,armada-ap806-thermal";
- reg = <0x80 0x10>;
- #thermal-sensor-cells = <1>;
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt b/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt
new file mode 100644
index 000000000000..72de11bd2ef0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt
@@ -0,0 +1,146 @@
+Marvell Armada AP80x System Controller
+======================================
+
+The AP806/AP807 is one of the two core HW blocks of the Marvell Armada
+7K/8K/931x SoCs. It contains system controllers, which provide several
+registers giving access to numerous features: clocks, pin-muxing and
+many other SoC configuration items. This DT binding allows to describe
+these system controllers.
+
+For the top level node:
+ - compatible: must be: "syscon", "simple-mfd";
+ - reg: register area of the AP80x system controller
+
+SYSTEM CONTROLLER 0
+===================
+
+Clocks:
+-------
+
+
+The Device Tree node representing the AP806/AP807 system controller
+provides a number of clocks:
+
+ - 0: reference clock of CPU cluster 0
+ - 1: reference clock of CPU cluster 1
+ - 2: fixed PLL at 1200 Mhz
+ - 3: MSS clock, derived from the fixed PLL
+
+Required properties:
+
+ - compatible: must be one of:
+ * "marvell,ap806-clock"
+ * "marvell,ap807-clock"
+ - #clock-cells: must be set to 1
+
+Pinctrl:
+--------
+
+For common binding part and usage, refer to
+Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt.
+
+Required properties:
+- compatible must be "marvell,ap806-pinctrl",
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name pins functions
+================================================================================
+mpp0 0 gpio, sdio(clk), spi0(clk)
+mpp1 1 gpio, sdio(cmd), spi0(miso)
+mpp2 2 gpio, sdio(d0), spi0(mosi)
+mpp3 3 gpio, sdio(d1), spi0(cs0n)
+mpp4 4 gpio, sdio(d2), i2c0(sda)
+mpp5 5 gpio, sdio(d3), i2c0(sdk)
+mpp6 6 gpio, sdio(ds)
+mpp7 7 gpio, sdio(d4), uart1(rxd)
+mpp8 8 gpio, sdio(d5), uart1(txd)
+mpp9 9 gpio, sdio(d6), spi0(cs1n)
+mpp10 10 gpio, sdio(d7)
+mpp11 11 gpio, uart0(txd)
+mpp12 12 gpio, sdio(pw_off), sdio(hw_rst)
+mpp13 13 gpio
+mpp14 14 gpio
+mpp15 15 gpio
+mpp16 16 gpio
+mpp17 17 gpio
+mpp18 18 gpio
+mpp19 19 gpio, uart0(rxd), sdio(pw_off)
+
+GPIO:
+-----
+For common binding part and usage, refer to
+Documentation/devicetree/bindings/gpio/gpio-mvebu.yaml.
+
+Required properties:
+
+- compatible: "marvell,armada-8k-gpio"
+
+- offset: offset address inside the syscon block
+
+Optional properties:
+
+- marvell,pwm-offset: offset address of PWM duration control registers inside
+ the syscon block
+
+Example:
+ap_syscon: system-controller@6f4000 {
+ compatible = "syscon", "simple-mfd";
+ reg = <0x6f4000 0x1000>;
+
+ ap_clk: clock {
+ compatible = "marvell,ap806-clock";
+ #clock-cells = <1>;
+ };
+
+ ap_pinctrl: pinctrl {
+ compatible = "marvell,ap806-pinctrl";
+ };
+
+ ap_gpio: gpio {
+ compatible = "marvell,armada-8k-gpio";
+ offset = <0x1040>;
+ ngpios = <19>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&ap_pinctrl 0 0 19>;
+ marvell,pwm-offset = <0x10c0>;
+ #pwm-cells = <2>;
+ clocks = <&ap_clk 3>;
+ };
+};
+
+SYSTEM CONTROLLER 1
+===================
+
+Cluster clocks:
+---------------
+
+Device Tree Clock bindings for cluster clock of Marvell
+AP806/AP807. Each cluster contain up to 2 CPUs running at the same
+frequency.
+
+Required properties:
+ - compatible: must be one of:
+ * "marvell,ap806-cpu-clock"
+ * "marvell,ap807-cpu-clock"
+- #clock-cells : should be set to 1.
+
+- clocks : shall be the input parent clock(s) phandle for the clock
+ (one per cluster)
+
+- reg: register range associated with the cluster clocks
+
+ap_syscon1: system-controller@6f8000 {
+ compatible = "marvell,armada-ap806-syscon1", "syscon", "simple-mfd";
+ reg = <0x6f8000 0x1000>;
+
+ cpu_clk: clock-cpu@278 {
+ compatible = "marvell,ap806-cpu-clock";
+ clocks = <&ap_clk 0>, <&ap_clk 1>;
+ #clock-cells = <1>;
+ reg = <0x278 0xa30>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-370-xp.txt b/Documentation/devicetree/bindings/arm/marvell/armada-370-xp.txt
deleted file mode 100644
index c6ed90ea6e17..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-370-xp.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Marvell Armada 370 and Armada XP Platforms Device Tree Bindings
----------------------------------------------------------------
-
-Boards with a SoC of the Marvell Armada 370 and Armada XP families
-shall have the following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armada-370-xp"
-
-In addition, boards using the Marvell Armada 370 SoC shall have the
-following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armada370"
-
-In addition, boards using the Marvell Armada XP SoC shall have the
-following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armadaxp"
-
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-375.txt b/Documentation/devicetree/bindings/arm/marvell/armada-375.txt
deleted file mode 100644
index 867d0b80cb8f..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-375.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Marvell Armada 375 Platforms Device Tree Bindings
--------------------------------------------------
-
-Boards with a SoC of the Marvell Armada 375 family shall have the
-following property:
-
-Required root node property:
-
-compatible: must contain "marvell,armada375"
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
deleted file mode 100644
index eddde4faef01..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Marvell Armada 37xx Platforms Device Tree Bindings
---------------------------------------------------
-
-Boards using a SoC of the Marvell Armada 37xx family must carry the
-following root node property:
-
- - compatible: must contain "marvell,armada3710"
-
-In addition, boards using the Marvell Armada 3720 SoC shall have the
-following property before the previous one:
-
- - compatible: must contain "marvell,armada3720"
-
-Example:
-
-compatible = "marvell,armada-3720-db", "marvell,armada3720", "marvell,armada3710";
-
-
-Power management
-----------------
-
-For power management (particularly DVFS and AVS), the North Bridge
-Power Management component is needed:
-
-Required properties:
-- compatible : should contain "marvell,armada-3700-nb-pm", "syscon";
-- reg : the register start and length for the North Bridge
- Power Management
-
-Example:
-
-nb_pm: syscon@14000 {
- compatible = "marvell,armada-3700-nb-pm", "syscon";
- reg = <0x14000 0x60>;
-}
-
-AVS
----
-
-For AVS an other component is needed:
-
-Required properties:
-- compatible : should contain "marvell,armada-3700-avs", "syscon";
-- reg : the register start and length for the AVS
-
-Example:
-avs: avs@11500 {
- compatible = "marvell,armada-3700-avs", "syscon";
- reg = <0x11500 0x40>;
-}
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.yaml
new file mode 100644
index 000000000000..b2f4fe81b97c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/armada-37xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 37xx Platforms
+
+maintainers:
+ - Robert Marko <robert.marko@sartura.hr>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Armada 3720 SoC boards
+ items:
+ - enum:
+ - cznic,turris-mox
+ - glinet,gl-mv1000
+ - globalscale,espressobin
+ - marvell,armada-3720-db
+ - methode,edpu
+ - methode,udpu
+ - ripe,atlas-v5
+ - const: marvell,armada3720
+ - const: marvell,armada3710
+
+ - description: Globalscale Espressobin boards
+ items:
+ - enum:
+ - globalscale,espressobin-emmc
+ - globalscale,espressobin-ultra
+ - globalscale,espressobin-v7
+ - const: globalscale,espressobin
+ - const: marvell,armada3720
+ - const: marvell,armada3710
+
+ - description: Globalscale Espressobin V7 boards
+ items:
+ - enum:
+ - globalscale,espressobin-v7-emmc
+ - const: globalscale,espressobin-v7
+ - const: globalscale,espressobin
+ - const: marvell,armada3720
+ - const: marvell,armada3710
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt b/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt
deleted file mode 100644
index 202953f1887e..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-38x.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Marvell Armada 38x Platforms Device Tree Bindings
--------------------------------------------------
-
-Boards with a SoC of the Marvell Armada 38x family shall have the
-following property:
-
-Required root node property:
-
- - compatible: must contain "marvell,armada380"
-
-In addition, boards using the Marvell Armada 385 SoC shall have the
-following property before the previous one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada385"
-
-In addition, boards using the Marvell Armada 388 SoC shall have the
-following property before the previous one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada388"
-
-Example:
-
-compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380";
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml
new file mode 100644
index 000000000000..cdf805b5db95
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/armada-38x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 38x Platforms
+
+maintainers:
+ - Gregory CLEMENT <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description:
+ Netgear Armada 380 GS110EM Managed Switch.
+ items:
+ - const: netgear,gs110emx
+ - const: marvell,armada380
+
+ - description:
+ Marvell Armada 385 Development Boards.
+ items:
+ - enum:
+ - marvell,a385-db-amc
+ - marvell,a385-db-ap
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ SolidRun Armada 385 based single-board computers.
+ items:
+ - enum:
+ - solidrun,clearfog-gtr-l8
+ - solidrun,clearfog-gtr-s4
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ Kobol Armada 388 based Helios-4 NAS.
+ items:
+ - const: kobol,helios4
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ Marvell Armada 388 Development Boards.
+ items:
+ - enum:
+ - marvell,a388-gp
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+ - description:
+ SolidRun Armada 388 clearfog family single-board computers.
+ items:
+ - enum:
+ - solidrun,clearfog-base-a1
+ - solidrun,clearfog-pro-a1
+ - const: solidrun,clearfog-a1
+ - const: marvell,armada388
+ - const: marvell,armada385
+ - const: marvell,armada380
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-39x.txt b/Documentation/devicetree/bindings/arm/marvell/armada-39x.txt
deleted file mode 100644
index 89468664f6ea..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-39x.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Marvell Armada 39x Platforms Device Tree Bindings
--------------------------------------------------
-
-Boards with a SoC of the Marvell Armada 39x family shall have the
-following property:
-
-Required root node property:
-
- - compatible: must contain "marvell,armada390"
-
-In addition, boards using the Marvell Armada 395 SoC shall have the
-following property before the common "marvell,armada390" one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada395"
-
-Example:
-
-compatible = "marvell,a395-gp", "marvell,armada395", "marvell,armada390";
-
-Boards using the Marvell Armada 398 SoC shall have the following
-property before the common "marvell,armada390" one:
-
-Required root node property:
-
-compatible: must contain "marvell,armada398"
-
-Example:
-
-compatible = "marvell,a398-db", "marvell,armada398", "marvell,armada390";
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt
deleted file mode 100644
index df98a9c82a8c..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Marvell Armada 7K/8K Platforms Device Tree Bindings
----------------------------------------------------
-
-Boards using a SoC of the Marvell Armada 7K or 8K families must carry
-the following root node property:
-
- - compatible, with one of the following values:
-
- - "marvell,armada7020", "marvell,armada-ap806-dual", "marvell,armada-ap806"
- when the SoC being used is the Armada 7020
-
- - "marvell,armada7040", "marvell,armada-ap806-quad", "marvell,armada-ap806"
- when the SoC being used is the Armada 7040
-
- - "marvell,armada8020", "marvell,armada-ap806-dual", "marvell,armada-ap806"
- when the SoC being used is the Armada 8020
-
- - "marvell,armada8040", "marvell,armada-ap806-quad", "marvell,armada-ap806"
- when the SoC being used is the Armada 8040
-
-Example:
-
-compatible = "marvell,armada7040-db", "marvell,armada7040",
- "marvell,armada-ap806-quad", "marvell,armada-ap806";
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
new file mode 100644
index 000000000000..4bc7454a5d3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR X11)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/armada-7k-8k.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 7K/8K Platforms
+
+maintainers:
+ - Gregory CLEMENT <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: Armada 7020 SoC
+ items:
+ - const: marvell,armada7020
+ - const: marvell,armada-ap806-dual
+ - const: marvell,armada-ap806
+
+ - description: Armada 7040 SoC
+ items:
+ - enum:
+ - globalscale,mochabin
+ - marvell,armada7040-db
+ - const: marvell,armada7040
+ - const: marvell,armada-ap806-quad
+ - const: marvell,armada-ap806
+
+ - description: Armada 8020 SoC
+ items:
+ - const: marvell,armada8020
+ - const: marvell,armada-ap806-dual
+ - const: marvell,armada-ap806
+
+ - description: Armada 8040 SoC
+ items:
+ - enum:
+ - iei,puzzle-m801
+ - marvell,armada8040-db
+ - solidrun,clearfog-gt-8k
+ - const: marvell,armada8040
+ - const: marvell,armada-ap806-quad
+ - const: marvell,armada-ap806
+
+ - description: Armada 8040 SoC MACCHIATOBin Boards
+ items:
+ - enum:
+ - marvell,armada8040-mcbin-doubleshot
+ - marvell,armada8040-mcbin-singleshot
+ - const: marvell,armada8040-mcbin
+ - const: marvell,armada8040
+ - const: marvell,armada-ap806-quad
+ - const: marvell,armada-ap806
+
+ - description: Armada 8080 SoC
+ items:
+ - enum:
+ - marvell,armada-8080-db
+ - const: marvell,armada-8080
+ - const: marvell,armada-ap810-octa
+ - const: marvell,armada-ap810
+
+ - description: Armada CN9130 SoC with no external CP
+ items:
+ - const: marvell,cn9130
+ - const: marvell,armada-ap807-quad
+ - const: marvell,armada-ap807
+
+ - description: Armada CN9131 SoC with one external CP
+ items:
+ - const: marvell,cn9131
+ - const: marvell,cn9130
+ - const: marvell,armada-ap807-quad
+ - const: marvell,armada-ap807
+
+ - description: Armada CN9132 SoC with two external CPs
+ items:
+ - const: marvell,cn9132
+ - const: marvell,cn9131
+ - const: marvell,cn9130
+ - const: marvell,armada-ap807-quad
+ - const: marvell,armada-ap807
+
+ - description:
+ Alleycat5X (98DX35xx) Reference Design as COM Express Carrier plus
+ Armada CN9130 COM Express CPU module
+ items:
+ - const: marvell,cn9130-ac5x-carrier
+ - const: marvell,rd-ac5x-carrier
+ - const: marvell,cn9130-cpu-module
+ - const: marvell,cn9130
+ - const: marvell,armada-ap807-quad
+ - const: marvell,armada-ap807
+
+ - description:
+ Alleycat5X (98DX35xx) Reference Design as COM Express Carrier plus
+ Armada CN9131 COM Express CPU module
+ items:
+ - const: marvell,cn9131-ac5x-carrier
+ - const: marvell,rd-ac5x-carrier
+ - const: marvell,cn9131-cpu-module
+ - const: marvell,cn9131
+ - const: marvell,armada-ap807-quad
+ - const: marvell,armada-ap807
+
+ - description:
+ SolidRun CN9130 SoM based single-board computers
+ items:
+ - enum:
+ - solidrun,cn9130-clearfog-base
+ - solidrun,cn9130-clearfog-pro
+ - solidrun,cn9131-solidwan
+ - const: solidrun,cn9130-sr-som
+ - const: marvell,cn9130
+
+ - description:
+ SolidRun CN9132 COM-Express Type 7 based single-board computers
+ items:
+ - enum:
+ - solidrun,cn9132-clearfog
+ - const: solidrun,cn9132-sr-cex7
+ - const: marvell,cn9130
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-8kp.txt b/Documentation/devicetree/bindings/arm/marvell/armada-8kp.txt
deleted file mode 100644
index f3e9624534c6..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/armada-8kp.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Marvell Armada 8KPlus Platforms Device Tree Bindings
-----------------------------------------------------
-
-Boards using a SoC of the Marvell Armada 8KP families must carry
-the following root node property:
-
- - compatible, with one of the following values:
-
- - "marvell,armada-8080", "marvell,armada-ap810-octa", "marvell,armada-ap810"
- when the SoC being used is the Armada 8080
-
-Example:
-
-compatible = "marvell,armada-8080-db", "marvell,armada-8080",
- "marvell,armada-ap810-octa", "marvell,armada-ap810"
diff --git a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt
index 81ce742d2760..54ff9f218328 100644
--- a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt
@@ -21,13 +21,13 @@ The Device Tree node representing this System Controller 0 provides a
number of clocks:
- a set of core clocks
- - a set of gatable clocks
+ - a set of gateable clocks
Those clocks can be referenced by other Device Tree nodes using two
cells:
- The first cell must be 0 or 1. 0 for the core clocks and 1 for the
- gatable clocks.
- - The second cell identifies the particular core clock or gatable
+ gateable clocks.
+ - The second cell identifies the particular core clock or gateable
clocks.
The following clocks are available:
@@ -38,7 +38,7 @@ The following clocks are available:
- 0 3 Core
- 0 4 NAND core
- 0 5 SDIO core
- - Gatable clocks
+ - Gateable clocks
- 1 0 Audio
- 1 1 Comm Unit
- 1 2 NAND
@@ -78,8 +78,8 @@ Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt.
Required properties:
-- compatible: "marvell,armada-7k-pinctrl",
- "marvell,armada-8k-cpm-pinctrl" or "marvell,armada-8k-cps-pinctrl"
+- compatible: "marvell,armada-7k-pinctrl", "marvell,armada-8k-cpm-pinctrl",
+ "marvell,armada-8k-cps-pinctrl" or "marvell,cp115-standalone-pinctrl"
depending on the specific variant of the SoC being used.
Available mpp pins/groups and functions:
@@ -142,8 +142,8 @@ mpp50 50 gpio, ge1(rxclk), mss_i2c(sda), spi1(csn0), uart2(txd), uart0(rxd), xg(
mpp51 51 gpio, ge1(rxd0), mss_i2c(sck), spi1(csn1), uart2(rxd), uart0(cts), sdio(pwr10)
mpp52 52 gpio, ge1(rxd1), synce1(clk), synce2(clk), spi1(csn2), uart1(cts), led(clk), pcie(rstoutn), pcie0(clkreq)
mpp53 53 gpio, ge1(rxd2), ptp(clk), spi1(csn3), uart1(rxd), led(stb), sdio(led)
-mpp54 54 gpio, ge1(rxd3), synce2(clk), ptp(pclk_out), synce1(clk), led(data), sdio(hw_rst), sdio(wr_protect)
-mpp55 55 gpio, ge1(rxctl_rxdv), ptp(pulse), sdio(led), sdio(card_detect)
+mpp54 54 gpio, ge1(rxd3), synce2(clk), ptp(pclk_out), synce1(clk), led(data), sdio(hw_rst), sdio_wp(wr_protect)
+mpp55 55 gpio, ge1(rxctl_rxdv), ptp(pulse), sdio(led), sdio_cd(card_detect)
mpp56 56 gpio, tdm(drx), au(i2sdo_spdifo), spi0(clk), uart1(rxd), sata1(present_act), sdio(clk)
mpp57 57 gpio, mss_i2c(sda), ptp(pclk_out), tdm(intn), au(i2sbclk), spi0(mosi), uart1(txd), sata0(present_act), sdio(cmd)
mpp58 58 gpio, mss_i2c(sck), ptp(clk), tdm(rstn), au(i2sdi), spi0(miso), uart1(cts), led(clk), sdio(d0)
@@ -156,7 +156,7 @@ GPIO:
-----
For common binding part and usage, refer to
-Documentation/devicetree/bindings/gpio/gpio-mvebu.txt.
+Documentation/devicetree/bindings/gpio/gpio-mvebu.yaml.
Required properties:
@@ -189,37 +189,3 @@ CP110_LABEL(syscon0): system-controller@440000 {
};
};
-
-SYSTEM CONTROLLER 1
-===================
-
-Thermal:
---------
-
-The thermal IP can probe the temperature all around the processor. It
-may feature several channels, each of them wired to one sensor.
-
-For common binding part and usage, refer to
-Documentation/devicetree/bindings/thermal/thermal.txt
-
-Required properties:
-- compatible: must be one of:
- * marvell,armada-cp110-thermal
-- reg: register range associated with the thermal functions.
-
-Optional properties:
-- #thermal-sensor-cells: shall be <1> when thermal-zones subnodes refer
- to this IP and represents the channel ID. There is one sensor per
- channel. O refers to the thermal IP internal channel.
-
-Example:
-CP110_LABEL(syscon1): system-controller@6f8000 {
- compatible = "syscon", "simple-mfd";
- reg = <0x6f8000 0x1000>;
-
- CP110_LABEL(thermal): thermal-sensor@70 {
- compatible = "marvell,armada-cp110-thermal";
- reg = <0x70 0x10>;
- #thermal-sensor-cells = <1>;
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/marvell/kirkwood.txt b/Documentation/devicetree/bindings/arm/marvell/kirkwood.txt
deleted file mode 100644
index 98cce9a653eb..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/kirkwood.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Marvell Kirkwood Platforms Device Tree Bindings
------------------------------------------------
-
-Boards with a SoC of the Marvell Kirkwood
-shall have the following property:
-
-Required root node property:
-
-compatible: must contain "marvell,kirkwood";
-
-In order to support the kirkwood cpufreq driver, there must be a node
-cpus/cpu@0 with three clocks, "cpu_clk", "ddrclk" and "powersave",
-where the "powersave" clock is a gating clock used to switch the CPU
-between the "cpu_clk" and the "ddrclk".
-
-Example:
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "marvell,sheeva-88SV131";
- clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>;
- clock-names = "cpu_clk", "ddrclk", "powersave";
- };
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,ac5.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,ac5.yaml
new file mode 100644
index 000000000000..8960fb8b2b2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,ac5.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,ac5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Alleycat5/5X Platforms
+
+maintainers:
+ - Chris Packham <chris.packham@alliedtelesis.co.nz>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Alleycat5 (98DX25xx) Reference Design
+ items:
+ - enum:
+ - marvell,rd-ac5
+ - const: marvell,ac5
+
+ - description: Alleycat5X (98DX35xx) Reference Design
+ items:
+ - enum:
+ - marvell,rd-ac5x
+ - const: marvell,ac5x
+ - const: marvell,ac5
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,armada-370-xp.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,armada-370-xp.yaml
new file mode 100644
index 000000000000..e65eadfbd097
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,armada-370-xp.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,armada-370-xp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 370 and Armada XP platforms
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ctera,c200-v2
+ - dlink,dns327l
+ - globalscale,mirabox
+ - netgear,readynas-102
+ - netgear,readynas-104
+ - marvell,a370-db
+ - marvell,a370-rd
+ - seagate,dart-2
+ - seagate,dart-4
+ - seagate,cumulus-max
+ - seagate,cumulus
+ - synology,ds213j
+ - const: marvell,armada370
+ - const: marvell,armada-370-xp
+
+ - items:
+ - enum:
+ - mikrotik,crs305-1g-4s
+ - mikrotik,crs326-24g-2s
+ - mikrotik,crs328-4c-20s-4s
+ - const: marvell,armadaxp-98dx3236
+ - const: marvell,armada-370-xp
+
+ - items:
+ - const: marvell,db-xc3-24g4xg
+ - const: marvell,armadaxp-98dx3336
+ - const: marvell,armada-370-xp
+
+ - items:
+ - const: marvell,db-dxbc2
+ - const: marvell,armadaxp-98dx4251
+ - const: marvell,armada-370-xp
+
+ - items:
+ - enum:
+ - lenovo,ix4-300d
+ - linksys,mamba
+ - marvell,rd-axpwifiap
+ - netgear,readynas-2120
+ - synology,ds414
+ - const: marvell,armadaxp-mv78230
+ - const: marvell,armadaxp
+ - const: marvell,armada-370-xp
+
+ - items:
+ - const: plathome,openblocks-ax3-4
+ - const: marvell,armadaxp-mv78260
+ - const: marvell,armadaxp
+ - const: marvell,armada-370-xp
+
+ - items:
+ - enum:
+ - marvell,axp-db
+ - marvell,axp-gp
+ - marvell,axp-matrix
+ - const: marvell,armadaxp-mv78460
+ - const: marvell,armadaxp
+ - const: marvell,armada-370-xp
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,armada375.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,armada375.yaml
new file mode 100644
index 000000000000..81c33e46fecc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,armada375.yaml
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,armada375.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 375 Platform
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - const: marvell,a375-db
+ - const: marvell,armada375
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,armada390.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,armada390.yaml
new file mode 100644
index 000000000000..5ff6a5439525
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,armada390.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,armada390.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 39x Platforms
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - const: marvell,a390-db
+ - const: marvell,armada390
+ - items:
+ - enum:
+ - marvell,a398-db
+ - const: marvell,armada398
+ - const: marvell,armada390
+ - items:
+ - enum:
+ - marvell,a395-gp
+ - const: marvell,armada395
+ - const: marvell,armada390
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt b/Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt
deleted file mode 100644
index aaaf64c56e44..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Marvell Dove Platforms Device Tree Bindings
------------------------------------------------
-
-Boards with a Marvell Dove SoC shall have the following properties:
-
-Required root node property:
-- compatible: must contain "marvell,dove";
-
-* Global Configuration registers
-
-Global Configuration registers of Dove SoC are shared by a syscon node.
-
-Required properties:
-- compatible: must contain "marvell,dove-global-config" and "syscon".
-- reg: base address and size of the Global Configuration registers.
-
-Example:
-
-gconf: global-config@e802c {
- compatible = "marvell,dove-global-config", "syscon";
- reg = <0xe802c 0x14>;
-};
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,dove.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,dove.yaml
new file mode 100644
index 000000000000..a37804fb30c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,dove.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,dove.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Dove SoC
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - compulab,cm-a510
+ - solidrun,cubox
+ - globalscale,d2plug
+ - globalscale,d3plug
+ - marvell,dove-db
+ - const: marvell,dove
+ - items:
+ - const: solidrun,cubox-es
+ - const: solidrun,cubox
+ - const: marvell,dove
+ - items:
+ - const: compulab,sbc-a510
+ - const: compulab,cm-a510
+ - const: marvell,dove
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.txt b/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.txt
deleted file mode 100644
index 7d28fe4bf654..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-Marvell Kirkwood SoC Family Device Tree Bindings
-------------------------------------------------
-
-Boards with a SoC of the Marvell Kirkwook family, eg 88f6281
-
-* Required root node properties:
-compatible: must contain "marvell,kirkwood"
-
-In addition, the above compatible shall be extended with the specific
-SoC. Currently known SoC compatibles are:
-
-"marvell,kirkwood-88f6192"
-"marvell,kirkwood-88f6281"
-"marvell,kirkwood-88f6282"
-"marvell,kirkwood-88f6283"
-"marvell,kirkwood-88f6702"
-"marvell,kirkwood-98DX4122"
-
-And in addition, the compatible shall be extended with the specific
-board. Currently known boards are:
-
-"buffalo,linkstation-lsqvl"
-"buffalo,linkstation-lsvl"
-"buffalo,linkstation-lswsxl"
-"buffalo,linkstation-lswxl"
-"buffalo,linkstation-lswvl"
-"buffalo,lschlv2"
-"buffalo,lsxhl"
-"buffalo,lsxl"
-"cloudengines,pogo02"
-"cloudengines,pogoplugv4"
-"dlink,dns-320"
-"dlink,dns-320-a1"
-"dlink,dns-325"
-"dlink,dns-325-a1"
-"dlink,dns-kirkwood"
-"excito,b3"
-"globalscale,dreamplug-003-ds2001"
-"globalscale,guruplug"
-"globalscale,guruplug-server-plus"
-"globalscale,sheevaplug"
-"globalscale,sheevaplug"
-"globalscale,sheevaplug-esata"
-"globalscale,sheevaplug-esata-rev13"
-"iom,iconnect"
-"iom,iconnect-1.1"
-"iom,ix2-200"
-"keymile,km_kirkwood"
-"lacie,cloudbox"
-"lacie,inetspace_v2"
-"lacie,laplug"
-"lacie,nas2big"
-"lacie,netspace_lite_v2"
-"lacie,netspace_max_v2"
-"lacie,netspace_mini_v2"
-"lacie,netspace_v2"
-"marvell,db-88f6281-bp"
-"marvell,db-88f6282-bp"
-"marvell,mv88f6281gtw-ge"
-"marvell,rd88f6281"
-"marvell,rd88f6281"
-"marvell,rd88f6281-a0"
-"marvell,rd88f6281-a1"
-"mpl,cec4"
-"mpl,cec4-10"
-"netgear,readynas"
-"netgear,readynas"
-"netgear,readynas-duo-v2"
-"netgear,readynas-nv+-v2"
-"plathome,openblocks-a6"
-"plathome,openblocks-a7"
-"raidsonic,ib-nas6210"
-"raidsonic,ib-nas6210-b"
-"raidsonic,ib-nas6220"
-"raidsonic,ib-nas6220-b"
-"raidsonic,ib-nas62x0"
-"seagate,dockstar"
-"seagate,goflexnet"
-"synology,ds109"
-"synology,ds110jv10"
-"synology,ds110jv20"
-"synology,ds110jv30"
-"synology,ds111"
-"synology,ds209"
-"synology,ds210jv10"
-"synology,ds210jv20"
-"synology,ds212"
-"synology,ds212jv10"
-"synology,ds212jv20"
-"synology,ds212pv10"
-"synology,ds409"
-"synology,ds409slim"
-"synology,ds410j"
-"synology,ds411"
-"synology,ds411j"
-"synology,ds411slim"
-"synology,ds413jv10"
-"synology,rs212"
-"synology,rs409"
-"synology,rs411"
-"synology,rs812"
-"usi,topkick"
-"usi,topkick-1281P2"
-"zyxel,nsa310"
-"zyxel,nsa310a"
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.yaml
new file mode 100644
index 000000000000..120784066833
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.yaml
@@ -0,0 +1,266 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,kirkwood.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Kirkwood SoC Family
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - qnap,ts219
+ - qnap,ts419
+ - synology,ds110
+ - synology,ds111
+ - synology,ds209
+ - synology,ds409slim
+ - synology,ds411j
+ - synology,ds411slim
+ - synology,rs212
+ - synology,rs409
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds109
+ - const: synology,ds110jv20
+ - const: synology,ds110
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds110jv10
+ - const: synology,ds110jv30
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds210jv10
+ - const: synology,ds210jv20
+ - const: synology,ds210jv30
+ - const: synology,ds211j
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds212jv10
+ - const: synology,ds212jv20
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds212
+ - const: synology,ds212pv10
+ - const: synology,ds212pv10
+ - const: synology,ds212pv20
+ - const: synology,ds213airv10
+ - const: synology,ds213v10
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds409
+ - const: synology,ds410j
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,ds411
+ - const: synology,ds413jv10
+ - const: marvell,kirkwood
+
+ - items:
+ - const: synology,rs411
+ - const: synology,rs812
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - cloudengines,pogoplugv4
+ - lacie,laplug
+ - lacie,netspace_lite_v2
+ - lacie,netspace_mini_v2
+ - marvell,rd88f6192
+ - seagate,blackarmor-nas220
+ - enum:
+ - marvell,kirkwood-88f6192
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - buffalo,lswsxl
+ - buffalo,lswxl
+ - checkpoint,l-50
+ - cloudengines,pogoe02
+ - ctera,c200-v1
+ - dlink,dir-665
+ - endian,4i-edge-200
+ - excito,b3
+ - globalscale,sheevaplug
+ - hp,t5325
+ - iom,ix2-200
+ - lacie,inetspace_v2
+ - lacie,netspace_v2
+ - lacie,netspace_max_v2
+ - marvell,db-88f6281-bp
+ - marvell,mv88f6281gtw-ge
+ - seagate,dockstar
+ - seagate,goflexnet
+ - zyxel,nsa310
+ - zyxel,nsa320
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - buffalo,lschlv2
+ - buffalo,lsxhl
+ - const: buffalo,lsxl
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: dlink,dns-320-a1
+ - const: dlink,dns-320
+ - const: dlink,dns-kirkwood
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: dlink,dns-325-a1
+ - const: dlink,dns-325
+ - const: dlink,dns-kirkwood
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: globalscale,dreamplug-003-ds2001
+ - const: globalscale,dreamplug
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: globalscale,guruplug-server-plus
+ - const: globalscale,guruplug
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: globalscale,sheevaplug-esata-rev13
+ - const: globalscale,sheevaplug-esata
+ - const: globalscale,sheevaplug
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: iom,iconnect-1.1
+ - const: iom,iconnect
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: lacie,d2net_v2
+ - const: lacie,netxbig
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+ - items:
+ - enum:
+ - lacie,net2big_v2
+ - lacie,net5big_v2
+ - const: lacie,netxbig
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - marvell,openrd-base
+ - marvell,openrd-client
+ - marvell,openrd-ultimate
+ - const: marvell,openrd
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - marvell,rd88f6281-a
+ - marvell,rd88f6281-z0
+ - const: marvell,rd88f6281
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: mpl,cec4-10
+ - const: mpl,cec4
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: raidsonic,ib-nas6210-b
+ - const: raidsonic,ib-nas6220-b
+ - const: raidsonic,ib-nas6210
+ - const: raidsonic,ib-nas6220
+ - const: raidsonic,ib-nas62x0
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - const: zyxel,nsa310a
+ - const: zyxel,nsa310
+ - const: marvell,kirkwood-88f6281
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - buffalo,lsqvl
+ - buffalo,lsvl
+ - buffalo,lswvl
+ - linksys,viper
+ - marvell,db-88f6282-bp
+ - zyxel,nsa325
+ - const: marvell,kirkwood-88f6282
+ - const: marvell,kirkwood
+
+ - items:
+ - const: lacie,nas2big
+ - const: lacie,netxbig
+ - const: marvell,kirkwood-88f6282
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - netgear,readynas-duo-v2
+ - netgear,readynas-nv+-v2
+ - const: netgear,readynas
+ - const: marvell,kirkwood-88f6282
+ - const: marvell,kirkwood
+
+ - items:
+ - const: usi,topkick-1281P2
+ - const: usi,topkick
+ - const: marvell,kirkwood-88f6282
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - plathome,openblocks-a6
+ - plathome,openblocks-a7
+ - const: marvell,kirkwood-88f6283
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - lacie,cloudbox
+ - zyxel,nsa310s
+ - const: marvell,kirkwood-88f6702
+ - const: marvell,kirkwood
+
+ - items:
+ - enum:
+ - keymile,km_fixedeth
+ - keymile,km_kirkwood
+ - const: marvell,kirkwood-98DX4122
+ - const: marvell,kirkwood
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt b/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt
deleted file mode 100644
index 748a8f287462..000000000000
--- a/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Marvell Orion SoC Family Device Tree Bindings
----------------------------------------------
-
-Boards with a SoC of the Marvell Orion family, eg 88f5181
-
-* Required root node properties:
-compatible: must contain "marvell,orion5x"
-
-In addition, the above compatible shall be extended with the specific
-SoC. Currently known SoC compatibles are:
-
-"marvell,orion5x-88f5181"
-"marvell,orion5x-88f5182"
-
-And in addition, the compatible shall be extended with the specific
-board. Currently known boards are:
-
-"buffalo,lsgl"
-"buffalo,lswsgl"
-"buffalo,lswtgl"
-"lacie,ethernet-disk-mini-v2"
-"lacie,d2-network"
-"marvell,rd-88f5182-nas"
-"maxtor,shared-storage-2"
-"netgear,wnr854t"
diff --git a/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.yaml b/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.yaml
new file mode 100644
index 000000000000..c0417591b2be
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/marvell/marvell,orion5x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Orion5x SoC Family
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - netgear,wnr854t
+ - const: marvell,orion5x-88f5181
+ - const: marvell,orion5x
+ - items:
+ - enum:
+ - buffalo,kurobox-pro
+ - buffalo,lschl
+ - buffalo,lsgl
+ - buffalo,lswsgl
+ - buffalo,lswtgl
+ - lacie,ethernet-disk-mini-v2
+ - lacie,d2-network
+ - marvell,rd-88f5182-nas
+ - maxtor,shared-storage-2
+ - const: marvell,orion5x-88f5182
+ - const: marvell,orion5x
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/mediatek.txt b/Documentation/devicetree/bindings/arm/mediatek.txt
deleted file mode 100644
index 8f260e5cfd16..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-MediaTek SoC based Platforms Device Tree Bindings
-
-Boards with a MediaTek SoC shall have the following property:
-
-Required root node property:
-
-compatible: Must contain one of
- "mediatek,mt2701"
- "mediatek,mt2712"
- "mediatek,mt6580"
- "mediatek,mt6589"
- "mediatek,mt6592"
- "mediatek,mt6755"
- "mediatek,mt6765"
- "mediatek,mt6795"
- "mediatek,mt6797"
- "mediatek,mt7622"
- "mediatek,mt7623" which is referred to MT7623N SoC
- "mediatek,mt7623a"
- "mediatek,mt8127"
- "mediatek,mt8135"
- "mediatek,mt8173"
-
-
-Supported boards:
-
-- Evaluation board for MT2701:
- Required root node properties:
- - compatible = "mediatek,mt2701-evb", "mediatek,mt2701";
-- Evaluation board for MT2712:
- Required root node properties:
- - compatible = "mediatek,mt2712-evb", "mediatek,mt2712";
-- Evaluation board for MT6580:
- Required root node properties:
- - compatible = "mediatek,mt6580-evbp1", "mediatek,mt6580";
-- bq Aquaris5 smart phone:
- Required root node properties:
- - compatible = "mundoreader,bq-aquaris5", "mediatek,mt6589";
-- Evaluation board for MT6592:
- Required root node properties:
- - compatible = "mediatek,mt6592-evb", "mediatek,mt6592";
-- Evaluation phone for MT6755(Helio P10):
- Required root node properties:
- - compatible = "mediatek,mt6755-evb", "mediatek,mt6755";
-- Evaluation board for MT6765(Helio P22):
- Required root node properties:
- - compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
-- Evaluation board for MT6795(Helio X10):
- Required root node properties:
- - compatible = "mediatek,mt6795-evb", "mediatek,mt6795";
-- Evaluation board for MT6797(Helio X20):
- Required root node properties:
- - compatible = "mediatek,mt6797-evb", "mediatek,mt6797";
-- Mediatek X20 Development Board:
- Required root node properties:
- - compatible = "archermind,mt6797-x20-dev", "mediatek,mt6797";
-- Reference board variant 1 for MT7622:
- Required root node properties:
- - compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622";
-- Reference board for MT7623a with eMMC:
- Required root node properties:
- - compatible = "mediatek,mt7623a-rfb-emmc", "mediatek,mt7623";
-- Reference board for MT7623a with NAND:
- Required root node properties:
- - compatible = "mediatek,mt7623a-rfb-nand", "mediatek,mt7623";
-- Reference board for MT7623n with eMMC:
- Required root node properties:
- - compatible = "mediatek,mt7623n-rfb-emmc", "mediatek,mt7623";
-- Bananapi BPI-R2 board:
- - compatible = "bananapi,bpi-r2", "mediatek,mt7623";
-- MTK mt8127 tablet moose EVB:
- Required root node properties:
- - compatible = "mediatek,mt8127-moose", "mediatek,mt8127";
-- MTK mt8135 tablet EVB:
- Required root node properties:
- - compatible = "mediatek,mt8135-evbp1", "mediatek,mt8135";
-- MTK mt8173 tablet EVB:
- Required root node properties:
- - compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
diff --git a/Documentation/devicetree/bindings/arm/mediatek.yaml b/Documentation/devicetree/bindings/arm/mediatek.yaml
new file mode 100644
index 000000000000..f04277873694
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek.yaml
@@ -0,0 +1,458 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SoC based Platforms
+
+maintainers:
+ - Sean Wang <sean.wang@mediatek.com>
+ - Matthias Brugger <matthias.bgg@gmail.com>
+description: |
+ Boards with a MediaTek SoC shall have the following properties.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ # Sort by SoC (last) compatible, then board compatible
+ - items:
+ - enum:
+ - mediatek,mt2701-evb
+ - const: mediatek,mt2701
+
+ - items:
+ - enum:
+ - mediatek,mt2712-evb
+ - const: mediatek,mt2712
+ - items:
+ - enum:
+ - jty,d101
+ - lenovo,a369i
+ - const: mediatek,mt6572
+ - items:
+ - enum:
+ - mediatek,mt6580-evbp1
+ - const: mediatek,mt6580
+ - items:
+ - enum:
+ - prestigio,pmt5008-3g
+ - const: mediatek,mt6582
+ - items:
+ - enum:
+ - fairphone,fp1
+ - mundoreader,bq-aquaris5
+ - const: mediatek,mt6589
+ - items:
+ - enum:
+ - mediatek,mt6592-evb
+ - const: mediatek,mt6592
+ - items:
+ - enum:
+ - mediatek,mt6755-evb
+ - const: mediatek,mt6755
+ - items:
+ - enum:
+ - mediatek,mt6765-evb
+ - const: mediatek,mt6765
+ - items:
+ - enum:
+ - mediatek,mt6779-evb
+ - const: mediatek,mt6779
+ - items:
+ - enum:
+ - mediatek,mt6795-evb
+ - sony,xperia-m5
+ - const: mediatek,mt6795
+ - items:
+ - enum:
+ - archermind,mt6797-x20-dev
+ - mediatek,mt6797-evb
+ - const: mediatek,mt6797
+ - items:
+ - enum:
+ - bananapi,bpi-r64
+ - mediatek,mt7622-rfb1
+ - const: mediatek,mt7622
+ - items:
+ - enum:
+ - mediatek,mt7623a-rfb-emmc
+ - mediatek,mt7623a-rfb-nand
+ - mediatek,mt7623n-rfb-emmc
+ - bananapi,bpi-r2
+ - const: mediatek,mt7623
+
+ - items:
+ - enum:
+ - mediatek,mt7629-rfb
+ - const: mediatek,mt7629
+ - items:
+ - enum:
+ - cudy,wr3000-v1
+ - openwrt,one
+ - xiaomi,ax3000t
+ - const: mediatek,mt7981b
+ - items:
+ - enum:
+ - acelink,ew-7886cax
+ - bananapi,bpi-r3
+ - bananapi,bpi-r3mini
+ - mediatek,mt7986a-rfb
+ - const: mediatek,mt7986a
+ - items:
+ - enum:
+ - mediatek,mt7986b-rfb
+ - const: mediatek,mt7986b
+ - items:
+ - enum:
+ - bananapi,bpi-r4
+ - const: mediatek,mt7988a
+ - items:
+ - const: bananapi,bpi-r4-2g5
+ - const: bananapi,bpi-r4
+ - const: mediatek,mt7988a
+ - items:
+ - enum:
+ - mediatek,mt8127-moose
+ - const: mediatek,mt8127
+ - items:
+ - enum:
+ - mediatek,mt8135-evbp1
+ - const: mediatek,mt8135
+ - items:
+ - enum:
+ - mediatek,mt8167-pumpkin
+ - const: mediatek,mt8167
+ - description: Google Elm (Acer Chromebook R13)
+ items:
+ - const: google,elm-rev8
+ - const: google,elm-rev7
+ - const: google,elm-rev6
+ - const: google,elm-rev5
+ - const: google,elm-rev4
+ - const: google,elm-rev3
+ - const: google,elm
+ - const: mediatek,mt8173
+ - description: Google Hana (Lenovo Chromebook N23 Yoga, C330, 300e,...)
+ items:
+ - const: google,hana-rev6
+ - const: google,hana-rev5
+ - const: google,hana-rev4
+ - const: google,hana-rev3
+ - const: google,hana
+ - const: mediatek,mt8173
+ - description: Google Hana rev7 (Poin2 Chromebook 11C)
+ items:
+ - const: google,hana-rev7
+ - const: mediatek,mt8173
+ - items:
+ - enum:
+ - mediatek,mt8173-evb
+ - const: mediatek,mt8173
+ - description: Google Burnet (HP Chromebook x360 11MK G3 EE)
+ items:
+ - const: google,burnet
+ - const: mediatek,mt8183
+ - description: Google Cozmo (Acer Chromebook 314)
+ items:
+ - const: google,cozmo
+ - const: mediatek,mt8183
+ - description: Google Damu (ASUS Chromebook Flip CM3)
+ items:
+ - const: google,damu
+ - const: mediatek,mt8183
+ - description: Google Fennel (Lenovo IdeaPad 3 Chromebook)
+ items:
+ - enum:
+ - google,fennel-sku0
+ - google,fennel-sku1
+ - google,fennel-sku2
+ - google,fennel-sku6
+ - google,fennel-sku7
+ - const: google,fennel
+ - const: mediatek,mt8183
+ - description: Google Juniper (Acer Chromebook Spin 311) / Kenzo (Acer Chromebook 311)
+ items:
+ - enum:
+ - google,juniper-sku16
+ - google,juniper-sku17
+ - const: google,juniper
+ - const: mediatek,mt8183
+ - description: Google Kakadu (ASUS Chromebook Detachable CM3)
+ items:
+ - const: google,kakadu-rev3
+ - const: google,kakadu-rev2
+ - const: google,kakadu
+ - const: mediatek,mt8183
+ - description: Google Kakadu (ASUS Chromebook Detachable CM3)
+ items:
+ - const: google,kakadu-rev3-sku22
+ - const: google,kakadu-rev2-sku22
+ - const: google,kakadu
+ - const: mediatek,mt8183
+ - description: Google Kappa (HP Chromebook 11a)
+ items:
+ - const: google,kappa
+ - const: mediatek,mt8183
+ - description: Google Katsu (ASUS Chromebook Detachable CZ1)
+ items:
+ - enum:
+ - google,katsu-sku32
+ - google,katsu-sku38
+ - const: google,katsu
+ - const: mediatek,mt8183
+ - description: Google Kodama (Lenovo 10e Chromebook Tablet)
+ items:
+ - enum:
+ - google,kodama-sku16
+ - google,kodama-sku272
+ - google,kodama-sku288
+ - google,kodama-sku32
+ - const: google,kodama
+ - const: mediatek,mt8183
+ - description: Google Krane (Lenovo IdeaPad Duet, 10e,...)
+ items:
+ - enum:
+ - google,krane-sku0
+ - google,krane-sku176
+ - const: google,krane
+ - const: mediatek,mt8183
+ - description: Google Makomo (Lenovo 100e Chromebook 2nd Gen MTK 2)
+ items:
+ - enum:
+ - google,makomo-sku0
+ - google,makomo-sku1
+ - const: google,makomo
+ - const: mediatek,mt8183
+ - description: Google Pico (Acer Chromebook Spin 311)
+ items:
+ - enum:
+ - google,pico-sku1
+ - google,pico-sku2
+ - const: google,pico
+ - const: mediatek,mt8183
+ - description: Google Willow (Acer Chromebook 311 C722/C722T)
+ items:
+ - enum:
+ - google,willow-sku0
+ - google,willow-sku1
+ - const: google,willow
+ - const: mediatek,mt8183
+ - items:
+ - enum:
+ - mediatek,mt8183-evb
+ - const: mediatek,mt8183
+ - items:
+ - enum:
+ - mediatek,mt8183-pumpkin
+ - const: mediatek,mt8183
+ - description: Google Chinchou (Asus Chromebook CZ1104CM2A/CZ1204CM2A)
+ items:
+ - const: google,chinchou-sku0
+ - const: google,chinchou-sku2
+ - const: google,chinchou-sku4
+ - const: google,chinchou-sku5
+ - const: google,chinchou
+ - const: mediatek,mt8186
+ - description: Google Chinchou (Asus Chromebook CZ1104FM2A/CZ1204FM2A/CZ1104CM2A/CZ1204CM2A)
+ items:
+ - const: google,chinchou-sku1
+ - const: google,chinchou-sku3
+ - const: google,chinchou-sku6
+ - const: google,chinchou-sku7
+ - const: google,chinchou-sku17
+ - const: google,chinchou-sku20
+ - const: google,chinchou-sku22
+ - const: google,chinchou-sku23
+ - const: google,chinchou
+ - const: mediatek,mt8186
+ - description: Google Chinchou360 (Asus Chromebook CZ1104FM2A/CZ1204FM2A Flip)
+ items:
+ - const: google,chinchou-sku16
+ - const: google,chinchou-sku18
+ - const: google,chinchou-sku19
+ - const: google,chinchou-sku21
+ - const: google,chinchou
+ - const: mediatek,mt8186
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393219
+ - const: google,steelix-sku393216
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393220
+ - const: google,steelix-sku393217
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
+ items:
+ - const: google,steelix-sku393221
+ - const: google,steelix-sku393218
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Ponyta
+ items:
+ - enum:
+ - google,ponyta-sku0
+ - google,ponyta-sku1
+ - const: google,ponyta
+ - const: mediatek,mt8186
+ - description: Google Rusty (Lenovo 100e Chromebook Gen 4)
+ items:
+ - const: google,steelix-sku196609
+ - const: google,steelix-sku196608
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Squirtle (Acer Chromebook Spin 311 (R724T)
+ items:
+ - const: google,squirtle
+ - const: mediatek,mt8186
+ - description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
+ items:
+ - const: google,starmie-sku0
+ - const: google,starmie-sku2
+ - const: google,starmie-sku3
+ - const: google,starmie
+ - const: mediatek,mt8186
+ - description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
+ items:
+ - const: google,starmie-sku1
+ - const: google,starmie-sku4
+ - const: google,starmie
+ - const: mediatek,mt8186
+ - description: Google Steelix (Lenovo 300e Yoga Chromebook Gen 4)
+ items:
+ - enum:
+ - google,steelix-sku131072
+ - google,steelix-sku131073
+ - const: google,steelix
+ - const: mediatek,mt8186
+ - description: Google Tentacruel (ASUS Chromebook CM14 Flip CM1402F)
+ items:
+ - const: google,tentacruel-sku262147
+ - const: google,tentacruel-sku262146
+ - const: google,tentacruel-sku262145
+ - const: google,tentacruel-sku262144
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacruel (ASUS Chromebook CM14 Flip CM1402F)
+ items:
+ - const: google,tentacruel-sku262151
+ - const: google,tentacruel-sku262150
+ - const: google,tentacruel-sku262149
+ - const: google,tentacruel-sku262148
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacool (ASUS Chromebook CM14 CM1402C)
+ items:
+ - const: google,tentacruel-sku327681
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Tentacool (ASUS Chromebook CM14 CM1402C)
+ items:
+ - const: google,tentacruel-sku327683
+ - const: google,tentacruel
+ - const: mediatek,mt8186
+ - description: Google Voltorb (Acer Chromebook 311 C723/C732T)
+ items:
+ - const: google,voltorb
+ - const: mediatek,mt8186
+ - items:
+ - enum:
+ - mediatek,mt8186-evb
+ - const: mediatek,mt8186
+ - description: Google Ciri (Lenovo Chromebook Duet (11", 9))
+ items:
+ - enum:
+ - google,ciri-sku0
+ - google,ciri-sku1
+ - google,ciri-sku2
+ - google,ciri-sku3
+ - google,ciri-sku4
+ - google,ciri-sku5
+ - google,ciri-sku6
+ - google,ciri-sku7
+ - const: google,ciri
+ - const: mediatek,mt8188
+ - items:
+ - enum:
+ - mediatek,mt8188-evb
+ - const: mediatek,mt8188
+ - description: Google Hayato
+ items:
+ - const: google,hayato-rev1
+ - const: google,hayato
+ - const: mediatek,mt8192
+ - description: Google Spherion (Acer Chromebook 514)
+ items:
+ - const: google,spherion-rev3
+ - const: google,spherion-rev2
+ - const: google,spherion-rev1
+ - const: google,spherion-rev0
+ - const: google,spherion
+ - const: mediatek,mt8192
+ - items:
+ - enum:
+ - mediatek,mt8192-evb
+ - const: mediatek,mt8192
+ - description: Acer Tomato (Acer Chromebook Spin 513 CP513-2H)
+ items:
+ - enum:
+ - google,tomato-rev2
+ - google,tomato-rev1
+ - const: google,tomato
+ - const: mediatek,mt8195
+ - description: Acer Tomato rev3 - 4 (Acer Chromebook Spin 513 CP513-2H)
+ items:
+ - const: google,tomato-rev4
+ - const: google,tomato-rev3
+ - const: google,tomato
+ - const: mediatek,mt8195
+ - description: HP Dojo sku1, 3, 5, 7 (HP Chromebook x360 13b-ca0002sa)
+ items:
+ - const: google,dojo-sku7
+ - const: google,dojo-sku5
+ - const: google,dojo-sku3
+ - const: google,dojo-sku1
+ - const: google,dojo
+ - const: mediatek,mt8195
+ - items:
+ - enum:
+ - mediatek,mt8195-demo
+ - mediatek,mt8195-evb
+ - const: mediatek,mt8195
+ - items:
+ - enum:
+ - mediatek,mt8365-evk
+ - const: mediatek,mt8365
+ - items:
+ - enum:
+ - grinn,genio-510-sbc
+ - mediatek,mt8370-evk
+ - const: mediatek,mt8370
+ - const: mediatek,mt8188
+ - items:
+ - enum:
+ - grinn,genio-700-sbc
+ - mediatek,mt8390-evk
+ - const: mediatek,mt8390
+ - const: mediatek,mt8188
+ - items:
+ - enum:
+ - kontron,3-5-sbc-i1200
+ - mediatek,mt8395-evk
+ - radxa,nio-12l
+ - const: mediatek,mt8395
+ - const: mediatek,mt8195
+ - items:
+ - enum:
+ - mediatek,mt8516-pumpkin
+ - const: mediatek,mt8516
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
deleted file mode 100644
index 4e4a3c0ab9ab..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Mediatek apmixedsys controller
-==============================
-
-The Mediatek apmixedsys controller provides the PLLs to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-apmixedsys"
- - "mediatek,mt2712-apmixedsys", "syscon"
- - "mediatek,mt6797-apmixedsys"
- - "mediatek,mt7622-apmixedsys"
- - "mediatek,mt7623-apmixedsys", "mediatek,mt2701-apmixedsys"
- - "mediatek,mt8135-apmixedsys"
- - "mediatek,mt8173-apmixedsys"
-- #clock-cells: Must be 1
-
-The apmixedsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-apmixedsys: clock-controller@10209000 {
- compatible = "mediatek,mt8173-apmixedsys";
- reg = <0 0x10209000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
deleted file mode 100644
index d1606b2c3e63..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-MediaTek AUDSYS controller
-============================
-
-The MediaTek AUDSYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-audsys", "syscon"
- - "mediatek,mt7622-audsys", "syscon"
- - "mediatek,mt7623-audsys", "mediatek,mt2701-audsys", "syscon"
-- #clock-cells: Must be 1
-
-The AUDSYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Required sub-nodes:
--------
-For common binding part and usage, refer to
-../sonud/mt2701-afe-pcm.txt.
-
-Example:
-
- audsys: clock-controller@11220000 {
- compatible = "mediatek,mt7622-audsys", "syscon";
- reg = <0 0x11220000 0 0x2000>;
- #clock-cells = <1>;
-
- afe: audio-controller {
- ...
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.yaml
new file mode 100644
index 000000000000..f3a761cbd0fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.yaml
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,audsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek AUDSYS controller
+
+maintainers:
+ - Eugen Hristev <eugen.hristev@collabora.com>
+
+description:
+ The MediaTek AUDSYS controller provides various clocks to the system.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-audsys
+ - mediatek,mt6765-audsys
+ - mediatek,mt6779-audsys
+ - mediatek,mt7622-audsys
+ - mediatek,mt8167-audsys
+ - mediatek,mt8173-audsys
+ - mediatek,mt8183-audiosys
+ - mediatek,mt8183-audsys
+ - mediatek,mt8186-audsys
+ - mediatek,mt8192-audsys
+ - mediatek,mt8516-audsys
+ - const: syscon
+ - items:
+ # Special case for mt7623 for backward compatibility
+ - const: mediatek,mt7623-audsys
+ - const: mediatek,mt2701-audsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ audio-controller:
+ type: object
+
+required:
+ - compatible
+ - '#clock-cells'
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt8183-audiosys
+then:
+ properties:
+ audio-controller:
+ $ref: /schemas/sound/mediatek,mt8183-audio.yaml#
+else:
+ properties:
+ audio-controller:
+ $ref: /schemas/sound/mediatek,mt2701-audio.yaml#
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/mt2701-power.h>
+ #include <dt-bindings/clock/mt2701-clk.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ audsys: clock-controller@11220000 {
+ compatible = "mediatek,mt7622-audsys", "syscon";
+ reg = <0 0x11220000 0 0x2000>;
+ #clock-cells = <1>;
+
+ afe: audio-controller {
+ compatible = "mediatek,mt2701-audio";
+ interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_SPI 132 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "afe", "asys";
+ power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>;
+
+ clocks = <&infracfg CLK_INFRA_AUDIO>,
+ <&topckgen CLK_TOP_AUD_MUX1_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX2_SEL>,
+ <&topckgen CLK_TOP_AUD_48K_TIMING>,
+ <&topckgen CLK_TOP_AUD_44K_TIMING>,
+ <&topckgen CLK_TOP_AUD_K1_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K2_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K3_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K4_SRC_SEL>,
+ <&topckgen CLK_TOP_AUD_K1_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K2_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K3_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_K4_SRC_DIV>,
+ <&topckgen CLK_TOP_AUD_I2S1_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S2_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S3_MCLK>,
+ <&topckgen CLK_TOP_AUD_I2S4_MCLK>,
+ <&audsys CLK_AUD_I2SO1>,
+ <&audsys CLK_AUD_I2SO2>,
+ <&audsys CLK_AUD_I2SO3>,
+ <&audsys CLK_AUD_I2SO4>,
+ <&audsys CLK_AUD_I2SIN1>,
+ <&audsys CLK_AUD_I2SIN2>,
+ <&audsys CLK_AUD_I2SIN3>,
+ <&audsys CLK_AUD_I2SIN4>,
+ <&audsys CLK_AUD_ASRCO1>,
+ <&audsys CLK_AUD_ASRCO2>,
+ <&audsys CLK_AUD_ASRCO3>,
+ <&audsys CLK_AUD_ASRCO4>,
+ <&audsys CLK_AUD_AFE>,
+ <&audsys CLK_AUD_AFE_CONN>,
+ <&audsys CLK_AUD_A1SYS>,
+ <&audsys CLK_AUD_A2SYS>,
+ <&audsys CLK_AUD_AFE_MRGIF>;
+
+ clock-names = "infra_sys_audio_clk",
+ "top_audio_mux1_sel",
+ "top_audio_mux2_sel",
+ "top_audio_a1sys_hp",
+ "top_audio_a2sys_hp",
+ "i2s0_src_sel",
+ "i2s1_src_sel",
+ "i2s2_src_sel",
+ "i2s3_src_sel",
+ "i2s0_src_div",
+ "i2s1_src_div",
+ "i2s2_src_div",
+ "i2s3_src_div",
+ "i2s0_mclk_en",
+ "i2s1_mclk_en",
+ "i2s2_mclk_en",
+ "i2s3_mclk_en",
+ "i2so0_hop_ck",
+ "i2so1_hop_ck",
+ "i2so2_hop_ck",
+ "i2so3_hop_ck",
+ "i2si0_hop_ck",
+ "i2si1_hop_ck",
+ "i2si2_hop_ck",
+ "i2si3_hop_ck",
+ "asrc0_out_ck",
+ "asrc1_out_ck",
+ "asrc2_out_ck",
+ "asrc3_out_ck",
+ "audio_afe_pd",
+ "audio_afe_conn_pd",
+ "audio_a1sys_pd",
+ "audio_a2sys_pd",
+ "audio_mrgif_pd";
+
+ assigned-clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX2_SEL>,
+ <&topckgen CLK_TOP_AUD_MUX1_DIV>,
+ <&topckgen CLK_TOP_AUD_MUX2_DIV>;
+ assigned-clock-parents = <&topckgen CLK_TOP_AUD1PLL_98M>,
+ <&topckgen CLK_TOP_AUD2PLL_90M>;
+ assigned-clock-rates = <0>, <0>, <49152000>, <45158400>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt
deleted file mode 100644
index 149567a38215..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Mediatek bdpsys controller
-============================
-
-The Mediatek bdpsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt2701-bdpsys", "syscon"
- - "mediatek,mt2712-bdpsys", "syscon"
- - "mediatek,mt7623-bdpsys", "mediatek,mt2701-bdpsys", "syscon"
-- #clock-cells: Must be 1
-
-The bdpsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-bdpsys: clock-controller@1c000000 {
- compatible = "mediatek,mt2701-bdpsys", "syscon";
- reg = <0 0x1c000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt
deleted file mode 100644
index f17cfe64255d..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek ethsys controller
-============================
-
-The Mediatek ethsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt2701-ethsys", "syscon"
- - "mediatek,mt7622-ethsys", "syscon"
- - "mediatek,mt7623-ethsys", "mediatek,mt2701-ethsys", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The ethsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-ethsys: clock-controller@1b000000 {
- compatible = "mediatek,mt2701-ethsys", "syscon";
- reg = <0 0x1b000000 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
deleted file mode 100644
index 323905af82c3..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek hifsys controller
-============================
-
-The Mediatek hifsys controller provides various clocks and reset
-outputs to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt2701-hifsys", "syscon"
- - "mediatek,mt7622-hifsys", "syscon"
- - "mediatek,mt7623-hifsys", "mediatek,mt2701-hifsys", "syscon"
-- #clock-cells: Must be 1
-
-The hifsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-hifsys: clock-controller@1a000000 {
- compatible = "mediatek,mt2701-hifsys", "syscon";
- reg = <0 0x1a000000 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
deleted file mode 100644
index 3f99672163e3..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek imgsys controller
-============================
-
-The Mediatek imgsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-imgsys", "syscon"
- - "mediatek,mt2712-imgsys", "syscon"
- - "mediatek,mt6797-imgsys", "syscon"
- - "mediatek,mt7623-imgsys", "mediatek,mt2701-imgsys", "syscon"
- - "mediatek,mt8173-imgsys", "syscon"
-- #clock-cells: Must be 1
-
-The imgsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-imgsys: clock-controller@15000000 {
- compatible = "mediatek,mt8173-imgsys", "syscon";
- reg = <0 0x15000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
deleted file mode 100644
index 89f4272a1441..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Mediatek infracfg controller
-============================
-
-The Mediatek infracfg controller provides various clocks and reset
-outputs to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-infracfg", "syscon"
- - "mediatek,mt2712-infracfg", "syscon"
- - "mediatek,mt6797-infracfg", "syscon"
- - "mediatek,mt7622-infracfg", "syscon"
- - "mediatek,mt7623-infracfg", "mediatek,mt2701-infracfg", "syscon"
- - "mediatek,mt8135-infracfg", "syscon"
- - "mediatek,mt8173-infracfg", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The infracfg controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-Also it uses the common reset controller binding from
-Documentation/devicetree/bindings/reset/reset.txt.
-The available reset outputs are defined in
-dt-bindings/reset/mt*-resets.h
-
-Example:
-
-infracfg: power-controller@10001000 {
- compatible = "mediatek,mt8173-infracfg", "syscon";
- reg = <0 0x10001000 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,jpgdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,jpgdecsys.txt
deleted file mode 100644
index 2df799cd06a7..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,jpgdecsys.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Mediatek jpgdecsys controller
-============================
-
-The Mediatek jpgdecsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt2712-jpgdecsys", "syscon"
-- #clock-cells: Must be 1
-
-The jpgdecsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-jpgdecsys: syscon@19000000 {
- compatible = "mediatek,mt2712-jpgdecsys", "syscon";
- reg = <0 0x19000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mcucfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mcucfg.txt
deleted file mode 100644
index b8fb03f3613e..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mcucfg.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Mediatek mcucfg controller
-============================
-
-The Mediatek mcucfg controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2712-mcucfg", "syscon"
-- #clock-cells: Must be 1
-
-The mcucfg controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-mcucfg: syscon@10220000 {
- compatible = "mediatek,mt2712-mcucfg", "syscon";
- reg = <0 0x10220000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt
deleted file mode 100644
index 859e67b416d5..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Mediatek mfgcfg controller
-============================
-
-The Mediatek mfgcfg controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2712-mfgcfg", "syscon"
-- #clock-cells: Must be 1
-
-The mfgcfg controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-mfgcfg: syscon@13000000 {
- compatible = "mediatek,mt2712-mfgcfg", "syscon";
- reg = <0 0x13000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
deleted file mode 100644
index 15d977afad31..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek mmsys controller
-============================
-
-The Mediatek mmsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-mmsys", "syscon"
- - "mediatek,mt2712-mmsys", "syscon"
- - "mediatek,mt6797-mmsys", "syscon"
- - "mediatek,mt7623-mmsys", "mediatek,mt2701-mmsys", "syscon"
- - "mediatek,mt8173-mmsys", "syscon"
-- #clock-cells: Must be 1
-
-The mmsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-mmsys: clock-controller@14000000 {
- compatible = "mediatek,mt8173-mmsys", "syscon";
- reg = <0 0x14000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
new file mode 100644
index 000000000000..3f4262e93c78
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mmsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek mmsys controller
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+
+description:
+ The MediaTek mmsys system controller provides clock control, routing control,
+ and miscellaneous control in mmsys partition.
+
+properties:
+ $nodename:
+ pattern: "^syscon@[0-9a-f]+$"
+
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-mmsys
+ - mediatek,mt2712-mmsys
+ - mediatek,mt6765-mmsys
+ - mediatek,mt6779-mmsys
+ - mediatek,mt6795-mmsys
+ - mediatek,mt6797-mmsys
+ - mediatek,mt8167-mmsys
+ - mediatek,mt8173-mmsys
+ - mediatek,mt8183-mmsys
+ - mediatek,mt8186-mmsys
+ - mediatek,mt8188-vdosys0
+ - mediatek,mt8188-vdosys1
+ - mediatek,mt8188-vppsys0
+ - mediatek,mt8188-vppsys1
+ - mediatek,mt8192-mmsys
+ - mediatek,mt8195-vdosys1
+ - mediatek,mt8195-vppsys0
+ - mediatek,mt8195-vppsys1
+ - mediatek,mt8365-mmsys
+ - const: syscon
+
+ - description: vdosys0 and vdosys1 are 2 display HW pipelines,
+ so mt8195 binding should be deprecated.
+ deprecated: true
+ items:
+ - const: mediatek,mt8195-mmsys
+ - const: syscon
+
+ - items:
+ - const: mediatek,mt7623-mmsys
+ - const: mediatek,mt2701-mmsys
+ - const: syscon
+
+ - items:
+ - const: mediatek,mt8195-vdosys0
+ - const: mediatek,mt8195-mmsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier as defined by bindings
+ of the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ mboxes:
+ description:
+ Using mailbox to communicate with GCE, it should have this
+ property and list of phandle, mailbox specifiers. See
+ Documentation/devicetree/bindings/mailbox/mediatek,gce-mailbox.yaml
+ for details.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+
+ mediatek,gce-client-reg:
+ description:
+ The register of client driver can be configured by gce with 4 arguments
+ defined in this property, such as phandle of gce, subsys id,
+ register offset and size.
+ Each subsys id is mapping to a base address of display function blocks
+ register which is defined in the gce header
+ include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port node. This port connects the MMSYS/VDOSYS output to
+ the first component of one display pipeline, for example one of
+ the available OVL or RDMA blocks.
+ Some MediaTek SoCs support multiple display outputs per MMSYS.
+ properties:
+ endpoint@0:
+ $ref: /schemas/graph.yaml#/properties/endpoint
+ description: Output to the primary display pipeline
+
+ endpoint@1:
+ $ref: /schemas/graph.yaml#/properties/endpoint
+ description: Output to the secondary display pipeline
+
+ endpoint@2:
+ $ref: /schemas/graph.yaml#/properties/endpoint
+ description: Output to the tertiary display pipeline
+
+ anyOf:
+ - required:
+ - endpoint@0
+ - required:
+ - endpoint@1
+ - required:
+ - endpoint@2
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+
+ mmsys: syscon@14000000 {
+ compatible = "mediatek,mt8173-mmsys", "syscon";
+ reg = <0x14000000 0x1000>;
+ power-domains = <&spm MT8173_POWER_DOMAIN_MM>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ mboxes = <&gce 0 CMDQ_THR_PRIO_HIGHEST>,
+ <&gce 1 CMDQ_THR_PRIO_HIGHEST>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
new file mode 100644
index 000000000000..d89848a8f478
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PCIE Mirror Controller for MT7622
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+ - Felix Fietkau <nbd@nbd.name>
+
+description:
+ The mediatek PCIE mirror provides a configuration interface for PCIE
+ controller on MT7622 soc.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-pcie-mirror
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ pcie_mirror: pcie-mirror@10000400 {
+ compatible = "mediatek,mt7622-pcie-mirror", "syscon";
+ reg = <0 0x10000400 0 0x10>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
new file mode 100644
index 000000000000..e7720caf31b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-wed.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Wireless Ethernet Dispatch Controller for MT7622
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+ - Felix Fietkau <nbd@nbd.name>
+
+description:
+ The mediatek wireless ethernet dispatch controller can be configured to
+ intercept and handle access to the WLAN DMA queues and PCIe interrupts
+ and implement hardware flow offloading from ethernet to WLAN.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-wed
+ - mediatek,mt7981-wed
+ - mediatek,mt7986-wed
+ - mediatek,mt7988-wed
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ memory-region:
+ items:
+ - description: firmware EMI region
+ - description: firmware ILM region
+ - description: firmware DLM region
+ - description: firmware CPU DATA region
+ - description: firmware BOOT region
+
+ memory-region-names:
+ items:
+ - const: wo-emi
+ - const: wo-ilm
+ - const: wo-dlm
+ - const: wo-data
+ - const: wo-boot
+
+ mediatek,wo-ccif:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: mediatek wed-wo controller interface.
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt7622-wed
+ then:
+ properties:
+ memory-region-names: false
+ memory-region: false
+ mediatek,wo-ccif: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ wed0: wed@1020a000 {
+ compatible = "mediatek,mt7622-wed","syscon";
+ reg = <0 0x1020a000 0 0x1000>;
+ interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>;
+ };
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ wed@15010000 {
+ compatible = "mediatek,mt7986-wed", "syscon";
+ reg = <0 0x15010000 0 0x1000>;
+ interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+
+ memory-region = <&wo_emi>, <&wo_ilm>, <&wo_dlm>,
+ <&wo_data>, <&wo_boot>;
+ memory-region-names = "wo-emi", "wo-ilm", "wo-dlm",
+ "wo-data", "wo-boot";
+ mediatek,wo-ccif = <&wo_ccif0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml
new file mode 100644
index 000000000000..82f64469a601
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7986-wed-pcie.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PCIE WED Controller for MT7986
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+ - Felix Fietkau <nbd@nbd.name>
+
+description:
+ The mediatek WED PCIE provides a configuration interface for PCIE
+ controller on MT7986 soc.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7986-wed-pcie
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ wed_pcie: wed-pcie@10003000 {
+ compatible = "mediatek,mt7986-wed-pcie",
+ "syscon";
+ reg = <0 0x10003000 0 0x10>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt
deleted file mode 100644
index 7fe5dc6097a6..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-MediaTek PCIESYS controller
-============================
-
-The MediaTek PCIESYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt7622-pciesys", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The PCIESYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-pciesys: pciesys@1a100800 {
- compatible = "mediatek,mt7622-pciesys", "syscon";
- reg = <0 0x1a100800 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
deleted file mode 100644
index 6755514deb80..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Mediatek pericfg controller
-===========================
-
-The Mediatek pericfg controller provides various clocks and reset
-outputs to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-pericfg", "syscon"
- - "mediatek,mt2712-pericfg", "syscon"
- - "mediatek,mt7622-pericfg", "syscon"
- - "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon"
- - "mediatek,mt8135-pericfg", "syscon"
- - "mediatek,mt8173-pericfg", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The pericfg controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-Also it uses the common reset controller binding from
-Documentation/devicetree/bindings/reset/reset.txt.
-The available reset outputs are defined in
-dt-bindings/reset/mt*-resets.h
-
-Example:
-
-pericfg: power-controller@10003000 {
- compatible = "mediatek,mt8173-pericfg", "syscon";
- reg = <0 0x10003000 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
deleted file mode 100644
index d113b8e741f3..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-MediaTek SGMIISYS controller
-============================
-
-The MediaTek SGMIISYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt7622-sgmiisys", "syscon"
-- #clock-cells: Must be 1
-
-The SGMIISYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-sgmiisys: sgmiisys@1b128000 {
- compatible = "mediatek,mt7622-sgmiisys", "syscon";
- reg = <0 0x1b128000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt
deleted file mode 100644
index b8184da2508c..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-MediaTek SSUSBSYS controller
-============================
-
-The MediaTek SSUSBSYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt7622-ssusbsys", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The SSUSBSYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-ssusbsys: ssusbsys@1a000000 {
- compatible = "mediatek,mt7622-ssusbsys", "syscon";
- reg = <0 0x1a000000 0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
deleted file mode 100644
index d849465b8c99..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Mediatek topckgen controller
-============================
-
-The Mediatek topckgen controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-topckgen"
- - "mediatek,mt2712-topckgen", "syscon"
- - "mediatek,mt6797-topckgen"
- - "mediatek,mt7622-topckgen"
- - "mediatek,mt7623-topckgen", "mediatek,mt2701-topckgen"
- - "mediatek,mt8135-topckgen"
- - "mediatek,mt8173-topckgen"
-- #clock-cells: Must be 1
-
-The topckgen controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-topckgen: power-controller@10000000 {
- compatible = "mediatek,mt8173-topckgen";
- reg = <0 0x10000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
deleted file mode 100644
index 3212afc753c8..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek vdecsys controller
-============================
-
-The Mediatek vdecsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2701-vdecsys", "syscon"
- - "mediatek,mt2712-vdecsys", "syscon"
- - "mediatek,mt6797-vdecsys", "syscon"
- - "mediatek,mt7623-vdecsys", "mediatek,mt2701-vdecsys", "syscon"
- - "mediatek,mt8173-vdecsys", "syscon"
-- #clock-cells: Must be 1
-
-The vdecsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-vdecsys: clock-controller@16000000 {
- compatible = "mediatek,mt8173-vdecsys", "syscon";
- reg = <0 0x16000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencltsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencltsys.txt
deleted file mode 100644
index 3cc299fd7857..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencltsys.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Mediatek vencltsys controller
-============================
-
-The Mediatek vencltsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt8173-vencltsys", "syscon"
-- #clock-cells: Must be 1
-
-The vencltsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-vencltsys: clock-controller@19000000 {
- compatible = "mediatek,mt8173-vencltsys", "syscon";
- reg = <0 0x19000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
deleted file mode 100644
index 851545357e94..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Mediatek vencsys controller
-============================
-
-The Mediatek vencsys controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
- - "mediatek,mt2712-vencsys", "syscon"
- - "mediatek,mt6797-vencsys", "syscon"
- - "mediatek,mt8173-vencsys", "syscon"
-- #clock-cells: Must be 1
-
-The vencsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-vencsys: clock-controller@18000000 {
- compatible = "mediatek,mt8173-vencsys", "syscon";
- reg = <0 0x18000000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/microchip,sparx5.yaml b/Documentation/devicetree/bindings/arm/microchip,sparx5.yaml
new file mode 100644
index 000000000000..9a0d54e9799c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/microchip,sparx5.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/microchip,sparx5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip Sparx5 Boards
+
+maintainers:
+ - Lars Povlsen <lars.povlsen@microchip.com>
+
+description: |+
+ The Microchip Sparx5 SoC is a ARMv8-based used in a family of
+ gigabit TSN-capable gigabit switches.
+
+ The SparX-5 Ethernet switch family provides a rich set of switching
+ features such as advanced TCAM-based VLAN and QoS processing
+ enabling delivery of differentiated services, and security through
+ TCAM-based frame processing using versatile content aware processor
+ (VCAP)
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: The Sparx5 pcb125 board is a modular board,
+ which has both spi-nor and eMMC storage. The modular design
+ allows for connection of different network ports.
+ items:
+ - const: microchip,sparx5-pcb125
+ - const: microchip,sparx5
+
+ - description: The Sparx5 pcb134 is a pizzabox form factor
+ gigabit switch with 20 SFP ports. It features spi-nor and
+ either spi-nand or eMMC storage (mount option).
+ items:
+ - const: microchip,sparx5-pcb134
+ - const: microchip,sparx5
+
+ - description: The Sparx5 pcb135 is a pizzabox form factor
+ gigabit switch with 48+4 Cu ports. It features spi-nor and
+ either spi-nand or eMMC storage (mount option).
+ items:
+ - const: microchip,sparx5-pcb135
+ - const: microchip,sparx5
+
+ axi@600000000:
+ type: object
+ description: the root node in the Sparx5 platforms must contain
+ an axi bus child node. They are always at physical address
+ 0x600000000 in all the Sparx5 variants.
+ properties:
+ compatible:
+ items:
+ - const: simple-bus
+
+ required:
+ - compatible
+
+required:
+ - compatible
+ - axi@600000000
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/moxart.txt b/Documentation/devicetree/bindings/arm/moxart.txt
deleted file mode 100644
index 11087edb0658..000000000000
--- a/Documentation/devicetree/bindings/arm/moxart.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-MOXA ART device tree bindings
-
-Boards with the MOXA ART SoC shall have the following properties:
-
-Required root node property:
-
-compatible = "moxa,moxart";
-
-Boards:
-
-- UC-7112-LX: embedded computer
- compatible = "moxa,moxart-uc-7112-lx", "moxa,moxart"
diff --git a/Documentation/devicetree/bindings/arm/moxart.yaml b/Documentation/devicetree/bindings/arm/moxart.yaml
new file mode 100644
index 000000000000..42565280914c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/moxart.yaml
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/moxart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MOXA ART
+
+maintainers:
+ - Jonas Jensen <jonas.jensen@gmail.com>
+
+properties:
+ compatible:
+ description: UC-7112-LX embedded computer
+ items:
+ - const: moxa,moxart-uc-7112-lx
+ - const: moxa,moxart
+
+additionalProperties: true
+...
diff --git a/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt b/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
deleted file mode 100644
index 0d244b999d10..000000000000
--- a/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* Marvell Feroceon Cache
-
-Required properties:
-- compatible : Should be either "marvell,feroceon-cache" or
- "marvell,kirkwood-cache".
-
-Optional properties:
-- reg : Address of the L2 cache control register. Mandatory for
- "marvell,kirkwood-cache", not used by "marvell,feroceon-cache"
-
-
-Example:
- l2: l2-cache@20128 {
- compatible = "marvell,kirkwood-cache";
- reg = <0x20128 0x4>;
- };
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt b/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
deleted file mode 100644
index 117d741a2e4f..000000000000
--- a/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Marvell Platforms Device Tree Bindings
-----------------------------------------------------
-
-PXA168 Aspenite Board
-Required root node properties:
- - compatible = "mrvl,pxa168-aspenite", "mrvl,pxa168";
-
-PXA910 DKB Board
-Required root node properties:
- - compatible = "mrvl,pxa910-dkb";
-
-MMP2 Brownstone Board
-Required root node properties:
- - compatible = "mrvl,mmp2-brownstone";
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
new file mode 100644
index 000000000000..f73bb8ec3a1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mrvl/mrvl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Platforms
+
+maintainers:
+ - Lubomir Rintel <lkundrak@v3.sk>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: PXA168 Aspenite Board
+ items:
+ - enum:
+ - mrvl,pxa168-aspenite
+ - const: mrvl,pxa168
+ - description: PXA910 DKB Board
+ items:
+ - enum:
+ - mrvl,pxa910-dkb
+ - const: mrvl,pxa910
+ - description: MMP2 based boards
+ items:
+ - enum:
+ - mrvl,mmp2-brownstone
+ - olpc,xo-1.75
+ - const: mrvl,mmp2
+ - description: MMP3 based boards
+ items:
+ - enum:
+ - dell,wyse-ariel
+ - const: marvell,mmp3
+ - description: PXA1908 based boards
+ items:
+ - enum:
+ - samsung,coreprimevelte
+ - const: marvell,pxa1908
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt b/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
deleted file mode 100644
index 31af1cbb60bd..000000000000
--- a/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* Marvell Tauros2 Cache
-
-Required properties:
-- compatible : Should be "marvell,tauros2-cache".
-- marvell,tauros2-cache-features : Specify the features supported for the
- tauros2 cache.
- The features including
- CACHE_TAUROS2_PREFETCH_ON (1 << 0)
- CACHE_TAUROS2_LINEFILL_BURST8 (1 << 1)
- The definition can be found at
- arch/arm/include/asm/hardware/cache-tauros2.h
-
-Example:
- L2: l2-cache {
- compatible = "marvell,tauros2-cache";
- marvell,tauros2-cache-features = <0x3>;
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
deleted file mode 100644
index 06df04cc827a..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-QCOM Idle States for cpuidle driver
-
-ARM provides idle-state node to define the cpuidle states, as defined in [1].
-cpuidle-qcom is the cpuidle driver for Qualcomm SoCs and uses these idle
-states. Idle states have different enter/exit latency and residency values.
-The idle states supported by the QCOM SoC are defined as -
-
- * Standby
- * Retention
- * Standalone Power Collapse (Standalone PC or SPC)
- * Power Collapse (PC)
-
-Standby: Standby does a little more in addition to architectural clock gating.
-When the WFI instruction is executed the ARM core would gate its internal
-clocks. In addition to gating the clocks, QCOM cpus use this instruction as a
-trigger to execute the SPM state machine. The SPM state machine waits for the
-interrupt to trigger the core back in to active. This triggers the cache
-hierarchy to enter standby states, when all cpus are idle. An interrupt brings
-the SPM state machine out of its wait, the next step is to ensure that the
-cache hierarchy is also out of standby, and then the cpu is allowed to resume
-execution. This state is defined as a generic ARM WFI state by the ARM cpuidle
-driver and is not defined in the DT. The SPM state machine should be
-configured to execute this state by default and after executing every other
-state below.
-
-Retention: Retention is a low power state where the core is clock gated and
-the memory and the registers associated with the core are retained. The
-voltage may be reduced to the minimum value needed to keep the processor
-registers active. The SPM should be configured to execute the retention
-sequence and would wait for interrupt, before restoring the cpu to execution
-state. Retention may have a slightly higher latency than Standby.
-
-Standalone PC: A cpu can power down and warmboot if there is a sufficient time
-between the time it enters idle and the next known wake up. SPC mode is used
-to indicate a core entering a power down state without consulting any other
-cpu or the system resources. This helps save power only on that core. The SPM
-sequence for this idle state is programmed to power down the supply to the
-core, wait for the interrupt, restore power to the core, and ensure the
-system state including cache hierarchy is ready before allowing core to
-resume. Applying power and resetting the core causes the core to warmboot
-back into Elevation Level (EL) which trampolines the control back to the
-kernel. Entering a power down state for the cpu, needs to be done by trapping
-into a EL. Failing to do so, would result in a crash enforced by the warm boot
-code in the EL for the SoC. On SoCs with write-back L1 cache, the cache has to
-be flushed in s/w, before powering down the core.
-
-Power Collapse: This state is similar to the SPC mode, but distinguishes
-itself in that the cpu acknowledges and permits the SoC to enter deeper sleep
-modes. In a hierarchical power domain SoC, this means L2 and other caches can
-be flushed, system bus, clocks - lowered, and SoC main XO clock gated and
-voltages reduced, provided all cpus enter this state. Since the span of low
-power modes possible at this state is vast, the exit latency and the residency
-of this low power mode would be considered high even though at a cpu level,
-this essentially is cpu power down. The SPM in this state also may handshake
-with the Resource power manager (RPM) processor in the SoC to indicate a
-complete application processor subsystem shut down.
-
-The idle-state for QCOM SoCs are distinguished by the compatible property of
-the idle-states device node.
-
-The devicetree representation of the idle state should be -
-
-Required properties:
-
-- compatible: Must be one of -
- "qcom,idle-state-ret",
- "qcom,idle-state-spc",
- "qcom,idle-state-pc",
- and "arm,idle-state".
-
-Other required and optional properties are specified in [1].
-
-Example:
-
- idle-states {
- CPU_SPC: spc {
- compatible = "qcom,idle-state-spc", "arm,idle-state";
- entry-latency-us = <150>;
- exit-latency-us = <200>;
- min-residency-us = <2000>;
- };
- };
-
-[1]. Documentation/devicetree/bindings/arm/idle-states.txt
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
deleted file mode 100644
index 7f696362a4a1..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Krait Processor Sub-system (KPSS) Application Clock Controller (ACC)
-
-The KPSS ACC provides clock, power domain, and reset control to a Krait CPU.
-There is one ACC register region per CPU within the KPSS remapped region as
-well as an alias register region that remaps accesses to the ACC associated
-with the CPU accessing the region.
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: should be one of:
- "qcom,kpss-acc-v1"
- "qcom,kpss-acc-v2"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: the first element specifies the base address and size of
- the register region. An optional second element specifies
- the base address and size of the alias register region.
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the pll parents.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "pll8_vote", "pxo".
-
-- clock-output-names:
- Usage: optional
- Value type: <string>
- Definition: Name of the output clock. Typically acpuX_aux where X is a
- CPU number starting at 0.
-
-Example:
-
- clock-controller@2088000 {
- compatible = "qcom,kpss-acc-v2";
- reg = <0x02088000 0x1000>,
- <0x02008000 0x1000>;
- clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>;
- clock-names = "pll8_vote", "pxo";
- clock-output-names = "acpu0_aux";
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt
deleted file mode 100644
index e628758950e1..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Krait Processor Sub-system (KPSS) Global Clock Controller (GCC)
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: should be one of the following. The generic compatible
- "qcom,kpss-gcc" should also be included.
- "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc"
- "qcom,kpss-gcc-apq8064", "qcom,kpss-gcc"
- "qcom,kpss-gcc-msm8974", "qcom,kpss-gcc"
- "qcom,kpss-gcc-msm8960", "qcom,kpss-gcc"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: base address and size of the register region
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the pll parents.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "pll8_vote", "pxo".
-
-- clock-output-names:
- Usage: required
- Value type: <string>
- Definition: Name of the output clock. Typically acpu_l2_aux indicating
- an L2 cache auxiliary clock.
-
-Example:
-
- l2cc: clock-controller@2011000 {
- compatible = "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc";
- reg = <0x2011000 0x1000>;
- clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>;
- clock-names = "pll8_vote", "pxo";
- clock-output-names = "acpu_l2_aux";
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt
deleted file mode 100644
index eaee06b2d8f2..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-== Introduction==
-
-LLCC (Last Level Cache Controller) provides last level of cache memory in SOC,
-that can be shared by multiple clients. Clients here are different cores in the
-SOC, the idea is to minimize the local caches at the clients and migrate to
-common pool of memory. Cache memory is divided into partitions called slices
-which are assigned to clients. Clients can query the slice details, activate
-and deactivate them.
-
-Properties:
-- compatible:
- Usage: required
- Value type: <string>
- Definition: must be "qcom,sdm845-llcc"
-
-- reg:
- Usage: required
- Value Type: <prop-encoded-array>
- Definition: The first element specifies the llcc base start address and
- the size of the register region. The second element specifies
- the llcc broadcast base address and size of the register region.
-
-- reg-names:
- Usage: required
- Value Type: <stringlist>
- Definition: Register region names. Must be "llcc_base", "llcc_broadcast_base".
-
-- interrupts:
- Usage: required
- Definition: The interrupt is associated with the llcc edac device.
- It's used for llcc cache single and double bit error detection
- and reporting.
-
-Example:
-
- cache-controller@1100000 {
- compatible = "qcom,sdm845-llcc";
- reg = <0x1100000 0x200000>, <0x1300000 0x50000> ;
- reg-names = "llcc_base", "llcc_broadcast_base";
- interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
deleted file mode 100644
index ae4afc6dcfe0..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-SPM AVS Wrapper 2 (SAW2)
-
-The SAW2 is a wrapper around the Subsystem Power Manager (SPM) and the
-Adaptive Voltage Scaling (AVS) hardware. The SPM is a programmable
-power-controller that transitions a piece of hardware (like a processor or
-subsystem) into and out of low power modes via a direct connection to
-the PMIC. It can also be wired up to interact with other processors in the
-system, notifying them when a low power state is entered or exited.
-
-Multiple revisions of the SAW hardware are supported using these Device Nodes.
-SAW2 revisions differ in the register offset and configuration data. Also, the
-same revision of the SAW in different SoCs may have different configuration
-data due the the differences in hardware capabilities. Hence the SoC name, the
-version of the SAW hardware in that SoC and the distinction between cpu (big
-or Little) or cache, may be needed to uniquely identify the SAW register
-configuration and initialization data. The compatible string is used to
-indicate this parameter.
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: Must have
- "qcom,saw2"
- A more specific value could be one of:
- "qcom,apq8064-saw2-v1.1-cpu"
- "qcom,msm8974-saw2-v2.1-cpu"
- "qcom,apq8084-saw2-v2.1-cpu"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: the first element specifies the base address and size of
- the register region. An optional second element specifies
- the base address and size of the alias register region.
-
-- regulator:
- Usage: optional
- Value type: boolean
- Definition: Indicates that this SPM device acts as a regulator device
- device for the core (CPU or Cache) the SPM is attached
- to.
-
-Example 1:
-
- power-controller@2099000 {
- compatible = "qcom,saw2";
- reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
- regulator;
- };
-
-Example 2:
- saw0: power-controller@f9089000 {
- compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
- reg = <0xf9089000 0x1000>, <0xf9009000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/ssbi.txt b/Documentation/devicetree/bindings/arm/msm/ssbi.txt
deleted file mode 100644
index 54fd5ced3401..000000000000
--- a/Documentation/devicetree/bindings/arm/msm/ssbi.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-* Qualcomm SSBI
-
-Some Qualcomm MSM devices contain a point-to-point serial bus used to
-communicate with a limited range of devices (mostly power management
-chips).
-
-These require the following properties:
-
-- compatible: "qcom,ssbi"
-
-- qcom,controller-type
- indicates the SSBI bus variant the controller should use to talk
- with the slave device. This should be one of "ssbi", "ssbi2", or
- "pmic-arbiter". The type chosen is determined by the attached
- slave.
-
-The slave device should be the single child node of the ssbi device
-with a compatible field.
diff --git a/Documentation/devicetree/bindings/arm/mstar/mstar,l3bridge.yaml b/Documentation/devicetree/bindings/arm/mstar/mstar,l3bridge.yaml
new file mode 100644
index 000000000000..a8ac4a2d672d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mstar/mstar,l3bridge.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2020 thingy.jp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mstar/mstar,l3bridge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar/SigmaStar Armv7 SoC l3bridge
+
+maintainers:
+ - Daniel Palmer <daniel@thingy.jp>
+
+description: |
+ MStar/SigmaStar's Armv7 SoCs have a pipeline in the interface
+ between the CPU and memory. This means that before DMA capable
+ devices are allowed to run the pipeline must be flushed to ensure
+ everything is in memory.
+
+ The l3bridge region contains registers that allow such a flush
+ to be triggered.
+
+ This node is used by the platform code to find where the registers
+ are and install a barrier that triggers the required pipeline flush.
+
+properties:
+ compatible:
+ items:
+ - const: mstar,l3bridge
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ l3bridge: l3bridge@1f204400 {
+ compatible = "mstar,l3bridge";
+ reg = <0x1f204400 0x200>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mstar/mstar,smpctrl.yaml b/Documentation/devicetree/bindings/arm/mstar/mstar,smpctrl.yaml
new file mode 100644
index 000000000000..5739848000b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mstar/mstar,smpctrl.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2020 thingy.jp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mstar/mstar,smpctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar/SigmaStar Armv7 SoC SMP control registers
+
+maintainers:
+ - Daniel Palmer <daniel@thingy.jp>
+
+description: |
+ MStar/SigmaStar's Armv7 SoCs that have more than one processor
+ have a region of registers that allow setting the boot address
+ and a magic number that allows secondary processors to leave
+ the loop they are parked in by the boot ROM.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - sstar,ssd201-smpctrl # SSD201/SSD202D
+ - const: mstar,smpctrl
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ smpctrl@204000 {
+ compatible = "sstar,ssd201-smpctrl", "mstar,smpctrl";
+ reg = <0x204000 0x200>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mstar/mstar.yaml b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml
new file mode 100644
index 000000000000..937059fcc7b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mstar/mstar.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar platforms
+
+maintainers:
+ - Daniel Palmer <daniel@thingy.jp>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: infinity boards
+ items:
+ - enum:
+ - thingyjp,breadbee-crust # thingy.jp BreadBee Crust
+ - const: mstar,infinity
+
+ - description: infinity2m boards
+ items:
+ - enum:
+ - 100ask,dongshanpione # 100ask DongShanPiOne
+ - honestar,ssd201htv2 # Honestar SSD201_HT_V2 devkit
+ - m5stack,unitv2 # M5Stack UnitV2
+ - miyoo,miyoo-mini # Miyoo Mini
+ - wirelesstag,ido-som2d01 # Wireless Tag IDO-SOM2D01
+ - wirelesstag,ido-sbc2d06-v1b-22w # Wireless Tag IDO-SBC2D06-1VB-22W
+ - const: mstar,infinity2m
+
+ - description: infinity3 boards
+ items:
+ - enum:
+ - thingyjp,breadbee # thingy.jp BreadBee
+ - const: mstar,infinity3
+
+ - description: mercury5 boards
+ items:
+ - enum:
+ - 70mai,midrived08 # 70mai midrive d08
+ - const: mstar,mercury5
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/npcm/npcm.txt b/Documentation/devicetree/bindings/arm/npcm/npcm.txt
deleted file mode 100644
index 2d87d9ecea85..000000000000
--- a/Documentation/devicetree/bindings/arm/npcm/npcm.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-NPCM Platforms Device Tree Bindings
------------------------------------
-NPCM750 SoC
-Required root node properties:
- - compatible = "nuvoton,npcm750";
-
diff --git a/Documentation/devicetree/bindings/arm/nspire.txt b/Documentation/devicetree/bindings/arm/nspire.txt
deleted file mode 100644
index 4d08518bd176..000000000000
--- a/Documentation/devicetree/bindings/arm/nspire.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-TI-NSPIRE calculators
-
-Required properties:
-- compatible: Compatible property value should contain "ti,nspire".
- CX models should have "ti,nspire-cx"
- Touchpad models should have "ti,nspire-tp"
- Clickpad models should have "ti,nspire-clp"
-
-Example:
-
-/ {
- model = "TI-NSPIRE CX";
- compatible = "ti,nspire-cx";
- ...
diff --git a/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,ma35d1.yaml b/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,ma35d1.yaml
new file mode 100644
index 000000000000..fb190db61525
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,ma35d1.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/nuvoton/nuvoton,ma35d1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton MA35 series SoC based platforms
+
+maintainers:
+ - Jacky Huang <ychuang3@nuvoton.com>
+
+description: |
+ Boards with an ARMv8 based Nuvoton MA35 series SoC shall have
+ the following properties.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: MA35D1 based boards
+ items:
+ - enum:
+ - nuvoton,ma35d1-iot
+ - nuvoton,ma35d1-som
+ - const: nuvoton,ma35d1
+
+additionalProperties: true
+...
diff --git a/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,npcm.yaml b/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,npcm.yaml
new file mode 100644
index 000000000000..d386744c8815
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nuvoton/nuvoton,npcm.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/nuvoton/nuvoton,npcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NPCM Platforms
+
+maintainers:
+ - Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+ - Tomer Maimon <tmaimon77@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: WPCM450 based boards
+ items:
+ - enum:
+ - supermicro,x9sci-ln4f-bmc # Supermicro X9SCI-LN4F server's BMC
+ - const: nuvoton,wpcm450
+
+ - description: NPCM750 based boards
+ items:
+ - enum:
+ - nuvoton,npcm750-evb # NPCM750 evaluation board
+ - const: nuvoton,npcm750
+
+ - description: NPCM845 based boards
+ items:
+ - enum:
+ - nuvoton,npcm845-evb # NPCM845 evaluation board
+ - const: nuvoton,npcm845
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml b/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml
new file mode 100644
index 000000000000..84dc6b7512af
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/nvidia,tegra194-ccplex.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra194 CPU Complex
+
+maintainers:
+ - Thierry Reding <thierry.reding@gmail.com>
+ - Jonathan Hunter <jonathanh@nvidia.com>
+ - Sumit Gupta <sumitg@nvidia.com>
+
+description: |+
+ Tegra194 SOC has homogeneous architecture where each cluster has two
+ symmetric cores. Compatible string in "cpus" node represents the CPU
+ Complex having all clusters.
+
+properties:
+ $nodename:
+ const: cpus
+
+ compatible:
+ enum:
+ - nvidia,tegra194-ccplex
+
+ nvidia,bpmp:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Specifies the bpmp node that needs to be queried to get
+ operating point data for all CPUs.
+
+additionalProperties: true
+
+examples:
+ - |
+ cpus {
+ compatible = "nvidia,tegra194-ccplex";
+ nvidia,bpmp = <&bpmp>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0_0: cpu@0 {
+ compatible = "nvidia,tegra194-carmel";
+ device_type = "cpu";
+ reg = <0x0>;
+ enable-method = "psci";
+ };
+
+ cpu0_1: cpu@1 {
+ compatible = "nvidia,tegra194-carmel";
+ device_type = "cpu";
+ reg = <0x001>;
+ enable-method = "psci";
+ };
+
+ cpu1_0: cpu@100 {
+ compatible = "nvidia,tegra194-carmel";
+ device_type = "cpu";
+ reg = <0x100>;
+ enable-method = "psci";
+ };
+
+ cpu1_1: cpu@101 {
+ compatible = "nvidia,tegra194-carmel";
+ device_type = "cpu";
+ reg = <0x101>;
+ enable-method = "psci";
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt
deleted file mode 100644
index 56ec8ddc4a3b..000000000000
--- a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-NXP LPC32xx Platforms Device Tree Bindings
-------------------------------------------
-
-Boards with the NXP LPC32xx SoC shall have the following properties:
-
-Required root node property:
-
-compatible: must be "nxp,lpc3220", "nxp,lpc3230", "nxp,lpc3240" or "nxp,lpc3250"
diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
new file mode 100644
index 000000000000..6b7f5e6f99cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/nxp/lpc32xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC32xx Platforms
+
+maintainers:
+ - Vladimir Zapolskiy <vz@mleia.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - nxp,lpc3220
+ - nxp,lpc3230
+ - nxp,lpc3240
+ - items:
+ - enum:
+ - ea,ea3250
+ - phytec,phy3250
+ - const: nxp,lpc3250
+
+additionalProperties: true
+...
diff --git a/Documentation/devicetree/bindings/arm/olimex.txt b/Documentation/devicetree/bindings/arm/olimex.txt
deleted file mode 100644
index d726aeca56be..000000000000
--- a/Documentation/devicetree/bindings/arm/olimex.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Olimex Device Tree Bindings
----------------------------
-
-SAM9-L9260 Board
-Required root node properties:
- - compatible = "olimex,sam9-l9260", "atmel,at91sam9260";
-
-i.MX23 Olinuxino Low Cost Board
-Required root node properties:
- - compatible = "olimex,imx23-olinuxino", "fsl,imx23";
diff --git a/Documentation/devicetree/bindings/arm/omap/crossbar.txt b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
index 4cd5d873fc3a..a43e4c7aba3d 100644
--- a/Documentation/devicetree/bindings/arm/omap/crossbar.txt
+++ b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
@@ -41,7 +41,7 @@ Examples:
Consumer:
========
See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt and
-Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt for
+Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml for
further details.
An interrupt consumer on an SoC using crossbar will use:
diff --git a/Documentation/devicetree/bindings/arm/omap/ctrl.txt b/Documentation/devicetree/bindings/arm/omap/ctrl.txt
index f35b77920786..0ce6665df4a2 100644
--- a/Documentation/devicetree/bindings/arm/omap/ctrl.txt
+++ b/Documentation/devicetree/bindings/arm/omap/ctrl.txt
@@ -8,7 +8,7 @@ control module driver itself.
See [2] for documentation about clock/clockdomain nodes.
-[1] Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
+[1] Documentation/devicetree/bindings/pinctrl/pinctrl-single.yaml
[2] Documentation/devicetree/bindings/clock/ti/*
Required properties:
diff --git a/Documentation/devicetree/bindings/arm/omap/mpu.txt b/Documentation/devicetree/bindings/arm/omap/mpu.txt
index f301e636fd52..e41490e6979c 100644
--- a/Documentation/devicetree/bindings/arm/omap/mpu.txt
+++ b/Documentation/devicetree/bindings/arm/omap/mpu.txt
@@ -17,7 +17,7 @@ am335x and am437x only:
- pm-sram: Phandles to ocmcram nodes to be used for power management.
First should be type 'protect-exec' for the driver to use to copy
and run PM functions, second should be regular pool to be used for
- data region for code. See Documentation/devicetree/bindings/sram/sram.txt
+ data region for code. See Documentation/devicetree/bindings/sram/sram.yaml
for more details.
Examples:
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index 2ecc712bf707..c863ec07cbbb 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -41,45 +41,15 @@ SoC Type (optional):
SoC Families:
-- OMAP2 generic - defaults to OMAP2420
- compatible = "ti,omap2"
-- OMAP3 generic - defaults to OMAP3430
- compatible = "ti,omap3"
-- OMAP4 generic - defaults to OMAP4430
- compatible = "ti,omap4"
-- OMAP5 generic - defaults to OMAP5430
- compatible = "ti,omap5"
- DRA7 generic - defaults to DRA742
compatible = "ti,dra7"
+- AM33x generic
+ compatible = "ti,am33xx"
- AM43x generic - defaults to AM4372
compatible = "ti,am43"
SoCs:
-- OMAP2420
- compatible = "ti,omap2420", "ti,omap2"
-- OMAP2430
- compatible = "ti,omap2430", "ti,omap2"
-
-- OMAP3430
- compatible = "ti,omap3430", "ti,omap3"
-- AM3517
- compatible = "ti,am3517", "ti,omap3"
-- OMAP3630
- compatible = "ti,omap36xx", "ti,omap3"
-- AM33xx
- compatible = "ti,am33xx", "ti,omap3"
-
-- OMAP4430
- compatible = "ti,omap4430", "ti,omap4"
-- OMAP4460
- compatible = "ti,omap4460", "ti,omap4"
-
-- OMAP5430
- compatible = "ti,omap5430", "ti,omap5"
-- OMAP5432
- compatible = "ti,omap5432", "ti,omap5"
-
- DRA762
compatible = "ti,dra762", "ti,dra7"
@@ -92,6 +62,9 @@ SoCs:
- DRA718
compatible = "ti,dra718", "ti,dra722", "ti,dra72", "ti,dra7"
+- AM5748
+ compatible = "ti,am5748", "ti,dra762", "ti,dra7"
+
- AM5728
compatible = "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
@@ -107,61 +80,8 @@ SoCs:
- AM4372
compatible = "ti,am4372", "ti,am43"
-Boards:
-
-- OMAP3 BeagleBoard : Low cost community board
- compatible = "ti,omap3-beagle", "ti,omap3"
-
-- OMAP3 Tobi with Overo : Commercial expansion board with daughter board
- compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3"
-
-- OMAP4 SDP : Software Development Board
- compatible = "ti,omap4-sdp", "ti,omap4430"
-
-- OMAP4 PandaBoard : Low cost community board
- compatible = "ti,omap4-panda", "ti,omap4430"
-
-- OMAP4 DuoVero with Parlor : Commercial expansion board with daughter board
- compatible = "gumstix,omap4-duovero-parlor", "gumstix,omap4-duovero", "ti,omap4430", "ti,omap4";
-
-- OMAP4 VAR-STK-OM44 : Commercial dev kit with VAR-OM44CustomBoard and VAR-SOM-OM44 w/WLAN
- compatible = "variscite,var-stk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
-
-- OMAP4 VAR-DVK-OM44 : Commercial dev kit with VAR-OM44CustomBoard, VAR-SOM-OM44 w/WLAN and LCD touchscreen
- compatible = "variscite,var-dvk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
-
-- OMAP3 EVM : Software Development Board for OMAP35x, AM/DM37x
- compatible = "ti,omap3-evm", "ti,omap3"
-
-- AM335X EVM : Software Development Board for AM335x
- compatible = "ti,am335x-evm", "ti,am33xx", "ti,omap3"
+Boards (incomplete list of examples):
-- AM335X Bone : Low cost community board
- compatible = "ti,am335x-bone", "ti,am33xx", "ti,omap3"
-
-- AM3359 ICEv2 : Low cost Industrial Communication Engine EVM.
- compatible = "ti,am3359-icev2", "ti,am33xx", "ti,omap3"
-
-- AM335X OrionLXm : Substation Automation Platform
- compatible = "novatech,am335x-lxm", "ti,am33xx"
-
-- AM335X phyBOARD-WEGA: Single Board Computer dev kit
- compatible = "phytec,am335x-wega", "phytec,am335x-phycore-som", "ti,am33xx"
-
-- AM335X CM-T335 : System On Module, built around the Sitara AM3352/4
- compatible = "compulab,cm-t335", "ti,am33xx"
-
-- AM335X SBC-T335 : single board computer, built around the Sitara AM3352/4
- compatible = "compulab,sbc-t335", "compulab,cm-t335", "ti,am33xx"
-
-- AM335X phyCORE-AM335x: Development kit
- compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"
-
-- AM335X UC-8100-ME-T: Communication-centric industrial computing platform
- compatible = "moxa,uc-8100-me-t", "ti,am33xx";
-
-- OMAP5 EVM : Evaluation Module
- compatible = "ti,omap5-evm", "ti,omap5"
- AM437x CM-T43
compatible = "compulab,am437x-cm-t43", "ti,am4372", "ti,am43"
@@ -184,6 +104,9 @@ Boards:
- AM57XX SBC-AM57x
compatible = "compulab,sbc-am57x", "compulab,cl-som-am57x", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
+- AM5748 IDK
+ compatible = "ti,am5748-idk", "ti,am5748", "ti,dra762", "ti,dra7";
+
- AM5728 IDK
compatible = "ti,am5728-idk", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
@@ -201,9 +124,3 @@ Boards:
- DRA718 EVM: Software Development Board for DRA718
compatible = "ti,dra718-evm", "ti,dra718", "ti,dra722", "ti,dra72", "ti,dra7"
-
-- DM3730 Logic PD Torpedo + Wireless: Commercial System on Module with WiFi and Bluetooth
- compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3630", "ti,omap3"
-
-- DM3730 Logic PD SOM-LV: Commercial System on Module with WiFi and Bluetooth
- compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3"
diff --git a/Documentation/devicetree/bindings/arm/omap/prcm.txt b/Documentation/devicetree/bindings/arm/omap/prcm.txt
index 3eb6d7afff14..431ef8c56a13 100644
--- a/Documentation/devicetree/bindings/arm/omap/prcm.txt
+++ b/Documentation/devicetree/bindings/arm/omap/prcm.txt
@@ -31,12 +31,17 @@ Required properties:
(base address and length)
- clocks: clocks for this module
- clockdomains: clockdomains for this module
+- #clock-cells: From common clock binding
+- clock-output-names: From common clock binding
+
Example:
-cm: cm@48004000 {
+cm: clock@48004000 {
compatible = "ti,omap3-cm";
reg = <0x48004000 0x4000>;
+ #clock-cells = <0>;
+ clock-output-names = "cm";
cm_clocks: clocks {
#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/arm/omap/prm-inst.txt b/Documentation/devicetree/bindings/arm/omap/prm-inst.txt
new file mode 100644
index 000000000000..42db138e091a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/prm-inst.txt
@@ -0,0 +1,31 @@
+OMAP PRM instance bindings
+
+Power and Reset Manager is an IP block on OMAP family of devices which
+handle the power domains and their current state, and provide reset
+handling for the domains and/or separate IP blocks under the power domain
+hierarchy.
+
+Required properties:
+- compatible: Must contain one of the following:
+ "ti,am3-prm-inst"
+ "ti,am4-prm-inst"
+ "ti,omap4-prm-inst"
+ "ti,omap5-prm-inst"
+ "ti,dra7-prm-inst"
+ and additionally must contain:
+ "ti,omap-prm-inst"
+- reg: Contains PRM instance register address range
+ (base address and length)
+
+Optional properties:
+- #power-domain-cells: Should be 0 if the instance is a power domain provider.
+- #reset-cells: Should be 1 if the PRM instance in question supports resets.
+
+Example:
+
+prm_dsp2: prm@1b00 {
+ compatible = "ti,dra7-prm-inst", "ti,omap-prm-inst";
+ reg = <0x1b00 0x40>;
+ #power-domain-cells = <0>;
+ #reset-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/oxnas.txt b/Documentation/devicetree/bindings/arm/oxnas.txt
deleted file mode 100644
index ac64e60f99f1..000000000000
--- a/Documentation/devicetree/bindings/arm/oxnas.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Oxford Semiconductor OXNAS SoCs Family device tree bindings
--------------------------------------------
-
-Boards with the OX810SE SoC shall have the following properties:
- Required root node property:
- compatible: "oxsemi,ox810se"
-
-Boards with the OX820 SoC shall have the following properties:
- Required root node property:
- compatible: "oxsemi,ox820"
-
-Board compatible values:
- - "wd,mbwe" (OX810SE)
- - "cloudengines,pogoplugv3" (OX820)
diff --git a/Documentation/devicetree/bindings/arm/picoxcell.txt b/Documentation/devicetree/bindings/arm/picoxcell.txt
deleted file mode 100644
index e75c0ef51e69..000000000000
--- a/Documentation/devicetree/bindings/arm/picoxcell.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Picochip picoXcell device tree bindings.
-========================================
-
-Required root node properties:
- - compatible:
- - "picochip,pc7302-pc3x3" : PC7302 development board with PC3X3 device.
- - "picochip,pc7302-pc3x2" : PC7302 development board with PC3X2 device.
- - "picochip,pc3x3" : picoXcell PC3X3 device based board.
- - "picochip,pc3x2" : picoXcell PC3X2 device based board.
-
-Timers required properties:
- - compatible = "picochip,pc3x2-timer"
- - interrupts : The single IRQ line for the timer.
- - clock-freq : The frequency in HZ of the timer.
- - reg : The register bank for the timer.
-
-Note: two timers are required - one for the scheduler clock and one for the
-event tick/NOHZ.
-
-VIC required properties:
- - compatible = "arm,pl192-vic".
- - interrupt-controller.
- - reg : The register bank for the device.
- - #interrupt-cells : Must be 1.
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
deleted file mode 100644
index 13611a8199bb..000000000000
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-* ARM Performance Monitor Units
-
-ARM cores often have a PMU for counting cpu and cache events like cache misses
-and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
-representation in the device tree should be done as under:-
-
-Required properties:
-
-- compatible : should be one of
- "apm,potenza-pmu"
- "arm,armv8-pmuv3"
- "arm,cortex-a73-pmu"
- "arm,cortex-a72-pmu"
- "arm,cortex-a57-pmu"
- "arm,cortex-a53-pmu"
- "arm,cortex-a35-pmu"
- "arm,cortex-a17-pmu"
- "arm,cortex-a15-pmu"
- "arm,cortex-a12-pmu"
- "arm,cortex-a9-pmu"
- "arm,cortex-a8-pmu"
- "arm,cortex-a7-pmu"
- "arm,cortex-a5-pmu"
- "arm,arm11mpcore-pmu"
- "arm,arm1176-pmu"
- "arm,arm1136-pmu"
- "brcm,vulcan-pmu"
- "cavium,thunder-pmu"
- "qcom,scorpion-pmu"
- "qcom,scorpion-mp-pmu"
- "qcom,krait-pmu"
-- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
- interrupt (PPI) then 1 interrupt should be specified.
-
-Optional properties:
-
-- interrupt-affinity : When using SPIs, specifies a list of phandles to CPU
- nodes corresponding directly to the affinity of
- the SPIs listed in the interrupts property.
-
- When using a PPI, specifies a list of phandles to CPU
- nodes corresponding to the set of CPUs which have
- a PMU of this type signalling the PPI listed in the
- interrupts property, unless this is already specified
- by the PPI interrupt specifier itself (in which case
- the interrupt-affinity property shouldn't be present).
-
- This property should be present when there is more than
- a single SPI.
-
-
-- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
- events.
-
-- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
- (SDER) is accessible. This will cause the driver to do
- any setup required that is only possible in ARMv7 secure
- state. If not present the ARMv7 SDER will not be touched,
- which means the PMU may fail to operate unless external
- code (bootloader or security monitor) has performed the
- appropriate initialisation. Note that this property is
- not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
- in Non-secure state.
-
-Example:
-
-pmu {
- compatible = "arm,cortex-a9-pmu";
- interrupts = <100 101>;
-};
diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
new file mode 100644
index 000000000000..f47baaefcdac
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/pmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Performance Monitor Units
+
+maintainers:
+ - Mark Rutland <mark.rutland@arm.com>
+ - Will Deacon <will.deacon@arm.com>
+
+description: |+
+ ARM cores often have a PMU for counting cpu and cache events like cache misses
+ and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
+ representation in the device tree should be done as under:-
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - apm,potenza-pmu
+ - apple,avalanche-pmu
+ - apple,blizzard-pmu
+ - apple,firestorm-pmu
+ - apple,icestorm-pmu
+ - arm,armv8-pmuv3 # Only for s/w models
+ - arm,arm1136-pmu
+ - arm,arm1176-pmu
+ - arm,arm11mpcore-pmu
+ - arm,c1-nano-pmu
+ - arm,c1-premium-pmu
+ - arm,c1-pro-pmu
+ - arm,c1-ultra-pmu
+ - arm,cortex-a5-pmu
+ - arm,cortex-a7-pmu
+ - arm,cortex-a8-pmu
+ - arm,cortex-a9-pmu
+ - arm,cortex-a12-pmu
+ - arm,cortex-a15-pmu
+ - arm,cortex-a17-pmu
+ - arm,cortex-a32-pmu
+ - arm,cortex-a34-pmu
+ - arm,cortex-a35-pmu
+ - arm,cortex-a53-pmu
+ - arm,cortex-a55-pmu
+ - arm,cortex-a57-pmu
+ - arm,cortex-a65-pmu
+ - arm,cortex-a72-pmu
+ - arm,cortex-a73-pmu
+ - arm,cortex-a75-pmu
+ - arm,cortex-a76-pmu
+ - arm,cortex-a77-pmu
+ - arm,cortex-a78-pmu
+ - arm,cortex-a320-pmu
+ - arm,cortex-a510-pmu
+ - arm,cortex-a520-pmu
+ - arm,cortex-a520ae-pmu
+ - arm,cortex-a710-pmu
+ - arm,cortex-a715-pmu
+ - arm,cortex-a720-pmu
+ - arm,cortex-a720ae-pmu
+ - arm,cortex-a725-pmu
+ - arm,cortex-x1-pmu
+ - arm,cortex-x2-pmu
+ - arm,cortex-x3-pmu
+ - arm,cortex-x4-pmu
+ - arm,cortex-x925-pmu
+ - arm,neoverse-e1-pmu
+ - arm,neoverse-n1-pmu
+ - arm,neoverse-n2-pmu
+ - arm,neoverse-n3-pmu
+ - arm,neoverse-v1-pmu
+ - arm,neoverse-v2-pmu
+ - arm,neoverse-v3-pmu
+ - arm,neoverse-v3ae-pmu
+ - arm,rainier-pmu
+ - brcm,vulcan-pmu
+ - cavium,thunder-pmu
+ - nvidia,denver-pmu
+ - nvidia,carmel-pmu
+ - qcom,krait-pmu
+ - qcom,scorpion-pmu
+ - qcom,scorpion-mp-pmu
+ - samsung,mongoose-pmu
+
+ interrupts:
+ # Don't know how many CPUs, so no constraints to specify
+ description: 1 per-cpu interrupt (PPI) or 1 interrupt per core.
+
+ interrupt-affinity:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ maxItems: 1
+ description:
+ When using SPIs, specifies a list of phandles to CPU
+ nodes corresponding directly to the affinity of
+ the SPIs listed in the interrupts property.
+
+ When using a PPI, specifies a list of phandles to CPU
+ nodes corresponding to the set of CPUs which have
+ a PMU of this type signalling the PPI listed in the
+ interrupts property, unless this is already specified
+ by the PPI interrupt specifier itself (in which case
+ the interrupt-affinity property shouldn't be present).
+
+ This property should be present when there is more than
+ a single SPI.
+
+ qcom,no-pc-write:
+ type: boolean
+ description:
+ Indicates that this PMU doesn't support the 0xc and 0xd events.
+
+ secure-reg-access:
+ type: boolean
+ description:
+ Indicates that the ARMv7 Secure Debug Enable Register
+ (SDER) is accessible. This will cause the driver to do
+ any setup required that is only possible in ARMv7 secure
+ state. If not present the ARMv7 SDER will not be touched,
+ which means the PMU may fail to operate unless external
+ code (bootloader or security monitor) has performed the
+ appropriate initialisation. Note that this property is
+ not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
+ in Non-secure state.
+
+required:
+ - compatible
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt
deleted file mode 100644
index 0df6acacfaea..000000000000
--- a/Documentation/devicetree/bindings/arm/primecell.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-* ARM Primecell Peripherals
-
-ARM, Ltd. Primecell peripherals have a standard id register that can be used to
-identify the peripheral type, vendor, and revision. This value can be used for
-driver matching.
-
-Required properties:
-
-- compatible : should be a specific name for the peripheral and
- "arm,primecell". The specific name will match the ARM
- engineering name for the logic block in the form: "arm,pl???"
-
-Optional properties:
-
-- arm,primecell-periphid : Value to override the h/w value with
-- clocks : From common clock binding. First clock is phandle to clock for apb
- pclk. Additional clocks are optional and specific to those peripherals.
-- clock-names : From common clock binding. Shall be "apb_pclk" for first clock.
-- dmas : From common DMA binding. If present, refers to one or more dma channels.
-- dma-names : From common DMA binding, needs to match the 'dmas' property.
- Devices with exactly one receive and transmit channel shall name
- these "rx" and "tx", respectively.
-- pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
-- pinctrl-names : Names corresponding to the numbered pinctrl states
-- interrupts : one or more interrupt specifiers
-- interrupt-names : names corresponding to the interrupts properties
-
-Example:
-
-serial@fff36000 {
- compatible = "arm,pl011", "arm,primecell";
- arm,primecell-periphid = <0x00341011>;
-
- clocks = <&pclk>;
- clock-names = "apb_pclk";
-
- dmas = <&dma-controller 4>, <&dma-controller 5>;
- dma-names = "rx", "tx";
-
- pinctrl-0 = <&uart0_default_mux>, <&uart0_default_mode>;
- pinctrl-1 = <&uart0_sleep_mode>;
- pinctrl-names = "default","sleep";
-
- interrupts = <0 11 0x4>;
-};
-
diff --git a/Documentation/devicetree/bindings/arm/primecell.yaml b/Documentation/devicetree/bindings/arm/primecell.yaml
new file mode 100644
index 000000000000..e15fe00aafb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/primecell.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/primecell.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Primecell Peripherals
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+description: |+
+ ARM, Ltd. Primecell peripherals have a standard id register that can be used to
+ identify the peripheral type, vendor, and revision. This value can be used for
+ driver matching.
+
+properties:
+ compatible:
+ contains:
+ const: arm,primecell
+ description:
+ Should be a specific name for the peripheral followed by "arm,primecell".
+ The specific name will match the ARM engineering name for the logic block
+ in the form "arm,pl???"
+
+ arm,primecell-periphid:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Value to override the h/w ID value
+ clocks:
+ minItems: 1
+ maxItems: 32
+ clock-names:
+ contains:
+ const: apb_pclk
+ additionalItems: true
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt
deleted file mode 100644
index a2c4f1d52492..000000000000
--- a/Documentation/devicetree/bindings/arm/psci.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-* Power State Coordination Interface (PSCI)
-
-Firmware implementing the PSCI functions described in ARM document number
-ARM DEN 0022A ("Power State Coordination Interface System Software on ARM
-processors") can be used by Linux to initiate various CPU-centric power
-operations.
-
-Issue A of the specification describes functions for CPU suspend, hotplug
-and migration of secure software.
-
-Functions are invoked by trapping to the privilege level of the PSCI
-firmware (specified as part of the binding below) and passing arguments
-in a manner similar to that specified by AAPCS:
-
- r0 => 32-bit Function ID / return value
- {r1 - r3} => Parameters
-
-Note that the immediate field of the trapping instruction must be set
-to #0.
-
-
-Main node required properties:
-
- - compatible : should contain at least one of:
-
- * "arm,psci" : For implementations complying to PSCI versions prior
- to 0.2.
- For these cases function IDs must be provided.
-
- * "arm,psci-0.2" : For implementations complying to PSCI 0.2.
- Function IDs are not required and should be ignored by
- an OS with PSCI 0.2 support, but are permitted to be
- present for compatibility with existing software when
- "arm,psci" is later in the compatible list.
-
- * "arm,psci-1.0" : For implementations complying to PSCI 1.0.
- PSCI 1.0 is backward compatible with PSCI 0.2 with
- minor specification updates, as defined in the PSCI
- specification[2].
-
- - method : The method of calling the PSCI firmware. Permitted
- values are:
-
- "smc" : SMC #0, with the register assignments specified
- in this binding.
-
- "hvc" : HVC #0, with the register assignments specified
- in this binding.
-
-Main node optional properties:
-
- - cpu_suspend : Function ID for CPU_SUSPEND operation
-
- - cpu_off : Function ID for CPU_OFF operation
-
- - cpu_on : Function ID for CPU_ON operation
-
- - migrate : Function ID for MIGRATE operation
-
-Device tree nodes that require usage of PSCI CPU_SUSPEND function (ie idle
-state nodes, as per bindings in [1]) must specify the following properties:
-
-- arm,psci-suspend-param
- Usage: Required for state nodes[1] if the corresponding
- idle-states node entry-method property is set
- to "psci".
- Value type: <u32>
- Definition: power_state parameter to pass to the PSCI
- suspend call.
-
-Example:
-
-Case 1: PSCI v0.1 only.
-
- psci {
- compatible = "arm,psci";
- method = "smc";
- cpu_suspend = <0x95c10000>;
- cpu_off = <0x95c10001>;
- cpu_on = <0x95c10002>;
- migrate = <0x95c10003>;
- };
-
-Case 2: PSCI v0.2 only
-
- psci {
- compatible = "arm,psci-0.2";
- method = "smc";
- };
-
-Case 3: PSCI v0.2 and PSCI v0.1.
-
- A DTB may provide IDs for use by kernels without PSCI 0.2 support,
- enabling firmware and hypervisors to support existing and new kernels.
- These IDs will be ignored by kernels with PSCI 0.2 support, which will
- use the standard PSCI 0.2 IDs exclusively.
-
- psci {
- compatible = "arm,psci-0.2", "arm,psci";
- method = "hvc";
-
- cpu_on = < arbitrary value >;
- cpu_off = < arbitrary value >;
-
- ...
- };
-
-[1] Kernel documentation - ARM idle states bindings
- Documentation/devicetree/bindings/arm/idle-states.txt
-[2] Power State Coordination Interface (PSCI) specification
- http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf
diff --git a/Documentation/devicetree/bindings/arm/psci.yaml b/Documentation/devicetree/bindings/arm/psci.yaml
new file mode 100644
index 000000000000..7360a2849b5b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/psci.yaml
@@ -0,0 +1,264 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/psci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Power State Coordination Interface (PSCI)
+
+maintainers:
+ - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+description: |+
+ Firmware implementing the PSCI functions described in ARM document number
+ ARM DEN 0022A ("Power State Coordination Interface System Software on ARM
+ processors") can be used by Linux to initiate various CPU-centric power
+ operations.
+
+ Issue A of the specification describes functions for CPU suspend, hotplug
+ and migration of secure software.
+
+ Functions are invoked by trapping to the privilege level of the PSCI
+ firmware (specified as part of the binding below) and passing arguments
+ in a manner similar to that specified by AAPCS:
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+ Note that the immediate field of the trapping instruction must be set
+ to #0.
+
+ [2] Power State Coordination Interface (PSCI) specification
+ http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf
+
+properties:
+ $nodename:
+ const: psci
+
+ compatible:
+ oneOf:
+ - description:
+ For implementations complying to PSCI versions prior to 0.2.
+ const: arm,psci
+
+ - description:
+ For implementations complying to PSCI 0.2.
+ Function IDs are not required and should be ignored by an OS with
+ PSCI 0.2 support, but are permitted to be present for compatibility
+ with existing software when "arm,psci" is later in the compatible
+ list.
+ minItems: 1
+ items:
+ - const: arm,psci-0.2
+ - const: arm,psci
+
+ - description:
+ For implementations complying to PSCI 1.0.
+ PSCI 1.0 is backward compatible with PSCI 0.2 with minor
+ specification updates, as defined in the PSCI specification[2].
+ minItems: 1
+ items:
+ - const: arm,psci-1.0
+ - const: arm,psci-0.2
+ - const: arm,psci
+
+ method:
+ description: The method of calling the PSCI firmware.
+ $ref: /schemas/types.yaml#/definitions/string-array
+ enum:
+ - smc
+ # HVC #0, with the register assignments specified in this binding.
+ - hvc
+
+ cpu_suspend:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Function ID for CPU_SUSPEND operation
+
+ cpu_off:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Function ID for CPU_OFF operation
+
+ cpu_on:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Function ID for CPU_ON operation
+
+ migrate:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Function ID for MIGRATE operation
+
+ arm,psci-suspend-param:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ power_state parameter to pass to the PSCI suspend call.
+
+ Device tree nodes that require usage of PSCI CPU_SUSPEND function (ie
+ idle state nodes with entry-method property is set to "psci", as per
+ bindings in [1]) must specify this property.
+
+ [1] Kernel documentation - ARM idle states bindings
+ Documentation/devicetree/bindings/cpu/idle-states.yaml
+
+patternProperties:
+ "^power-domain-":
+ $ref: /schemas/power/power-domain.yaml#
+ unevaluatedProperties: false
+
+ type: object
+ description: |
+ ARM systems can have multiple cores, sometimes in an hierarchical
+ arrangement. This often, but not always, maps directly to the processor
+ power topology of the system. Individual nodes in a topology have their
+ own specific power states and can be better represented hierarchically.
+
+ For these cases, the definitions of the idle states for the CPUs and the
+ CPU topology, must conform to the binding in [3]. The idle states
+ themselves must conform to the binding in [4] and must specify the
+ arm,psci-suspend-param property.
+
+ It should also be noted that, in PSCI firmware v1.0 the OS-Initiated
+ (OSI) CPU suspend mode is introduced. Using a hierarchical representation
+ helps to implement support for OSI mode and OS implementations may choose
+ to mandate it.
+
+ [3] Documentation/devicetree/bindings/power/power-domain.yaml
+ [4] Documentation/devicetree/bindings/power/domain-idle-state.yaml
+
+required:
+ - compatible
+ - method
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,psci
+ then:
+ required:
+ - cpu_off
+ - cpu_on
+
+additionalProperties: false
+
+examples:
+ - |+
+
+ // Case 1: PSCI v0.1 only.
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0x95c10000>;
+ cpu_off = <0x95c10001>;
+ cpu_on = <0x95c10002>;
+ migrate = <0x95c10003>;
+ };
+
+ - |+
+
+ // Case 2: PSCI v0.2 only
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+
+ - |+
+
+ // Case 3: PSCI v0.2 and PSCI v0.1.
+
+ /*
+ * A DTB may provide IDs for use by kernels without PSCI 0.2 support,
+ * enabling firmware and hypervisors to support existing and new kernels.
+ * These IDs will be ignored by kernels with PSCI 0.2 support, which will
+ * use the standard PSCI 0.2 IDs exclusively.
+ */
+
+ psci {
+ compatible = "arm,psci-0.2", "arm,psci";
+ method = "hvc";
+
+ cpu_on = <0x95c10002>;
+ cpu_off = <0x95c10001>;
+ };
+
+ - |+
+
+ // Case 4: CPUs and CPU idle states described using the hierarchical model.
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x0>;
+ enable-method = "psci";
+ power-domains = <&cpu_pd0>;
+ power-domain-names = "psci";
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x100>;
+ enable-method = "psci";
+ power-domains = <&cpu_pd1>;
+ power-domain-names = "psci";
+ };
+
+ idle-states {
+
+ cpu_pwrdn: cpu-power-down {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0000001>;
+ entry-latency-us = <10>;
+ exit-latency-us = <10>;
+ min-residency-us = <100>;
+ };
+ };
+
+ domain-idle-states {
+
+ cluster_ret: cluster-retention {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x1000011>;
+ entry-latency-us = <500>;
+ exit-latency-us = <500>;
+ min-residency-us = <2000>;
+ };
+
+ cluster_pwrdn: cluster-power-down {
+ compatible = "domain-idle-state";
+ arm,psci-suspend-param = <0x1000031>;
+ entry-latency-us = <2000>;
+ exit-latency-us = <2000>;
+ min-residency-us = <6000>;
+ };
+ };
+ };
+
+ psci {
+ compatible = "arm,psci-1.0";
+ method = "smc";
+
+ cpu_pd0: power-domain-cpu0 {
+ #power-domain-cells = <0>;
+ domain-idle-states = <&cpu_pwrdn>;
+ power-domains = <&cluster_pd>;
+ };
+
+ cpu_pd1: power-domain-cpu1 {
+ #power-domain-cells = <0>;
+ domain-idle-states = <&cpu_pwrdn>;
+ power-domains = <&cluster_pd>;
+ };
+
+ cluster_pd: power-domain-cluster {
+ #power-domain-cells = <0>;
+ domain-idle-states = <&cluster_ret>, <&cluster_pwrdn>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml
new file mode 100644
index 000000000000..c969c16c21ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom,coresight-ctcu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CoreSight TMC Control Unit
+
+maintainers:
+ - Yuanfang Zhang <quic_yuanfang@quicinc.com>
+ - Mao Jinlong <quic_jinlmao@quicinc.com>
+ - Jie Gan <quic_jiegan@quicinc.com>
+
+description: |
+ The Trace Memory Controller(TMC) is used for Embedded Trace Buffer(ETB),
+ Embedded Trace FIFO(ETF) and Embedded Trace Router(ETR) configurations.
+ The configuration mode (ETB, ETF, ETR) is discovered at boot time when
+ the device is probed.
+
+ The Coresight TMC Control unit controls various Coresight behaviors.
+ It works as a helper device when connected to TMC ETR device.
+ It is responsible for controlling the data filter function based on
+ the source device's Trace ID for TMC ETR device. The trace data with
+ that Trace id can get into ETR's buffer while other trace data gets
+ ignored.
+
+properties:
+ compatible:
+ enum:
+ - qcom,sa8775p-ctcu
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: apb
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port(@[0-1])?$':
+ description: Input connections from CoreSight Trace bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - in-ports
+
+additionalProperties: false
+
+examples:
+ - |
+ ctcu@1001000 {
+ compatible = "qcom,sa8775p-ctcu";
+ reg = <0x1001000 0x1000>;
+
+ clocks = <&aoss_qmp>;
+ clock-names = "apb";
+
+ in-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ ctcu_in_port0: endpoint {
+ remote-endpoint = <&etr0_out_port>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ ctcu_in_port1: endpoint {
+ remote-endpoint = <&etr1_out_port>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-remote-etm.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-remote-etm.yaml
new file mode 100644
index 000000000000..ffe613efeabe
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom,coresight-remote-etm.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom,coresight-remote-etm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Coresight Remote ETM(Embedded Trace Macrocell)
+
+maintainers:
+ - Jinlong Mao <quic_jinlmao@quicinc.com>
+ - Tao Zhang <quic_taozha@quicinc.com>
+
+description:
+ Support for ETM trace collection on remote processor using coresight
+ framework. Enabling this will allow turning on ETM tracing on remote
+ processor like modem processor via sysfs and collecting the trace
+ via coresight TMC sinks.
+
+properties:
+ compatible:
+ const: qcom,coresight-remote-etm
+
+ label:
+ description:
+ Description of a coresight device.
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description: Output connection to the CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ - |
+ etm {
+ compatible = "qcom,coresight-remote-etm";
+
+ out-ports {
+ port {
+ modem_etm0_out_funnel_modem: endpoint {
+ remote-endpoint = <&funnel_modem_in_modem_etm0>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-tnoc.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-tnoc.yaml
new file mode 100644
index 000000000000..9d1c93a9ade3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom,coresight-tnoc.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom,coresight-tnoc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Trace Network On Chip - TNOC
+
+maintainers:
+ - Yuanfang Zhang <quic_yuanfang@quicinc.com>
+
+description: >
+ The Trace Network On Chip (TNOC) is an integration hierarchy hardware
+ component that integrates the functionalities of TPDA and funnels.
+
+ It sits in the different subsystem of SOC and aggregates the trace and
+ transports it to Aggregation TNOC or to coresight trace sink eventually.
+ TNOC embeds bridges for all the interfaces APB, ATB, TPDA and NTS (Narrow
+ Time Stamp).
+
+ TNOC can take inputs from different trace sources i.e. ATB, TPDM.
+
+ Note this binding is specifically intended for Aggregator TNOC instances.
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,coresight-tnoc
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^tn(@[0-9a-f]+)$"
+
+ compatible:
+ items:
+ - const: qcom,coresight-tnoc
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: apb_pclk
+
+ clocks:
+ items:
+ - description: APB register access clock
+
+ in-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port(@[0-9a-f]{1,2})?$':
+ description: Input connections from CoreSight Trace Bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+ out-ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ additionalProperties: false
+
+ properties:
+ port:
+ description:
+ Output connection to CoreSight Trace Bus
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ - |
+ tn@109ab000 {
+ compatible = "qcom,coresight-tnoc", "arm,primecell";
+ reg = <0x109ab000 0x4200>;
+
+ clocks = <&aoss_qmp>;
+ clock-names = "apb_pclk";
+
+ in-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tn_ag_in_tpdm_gcc: endpoint {
+ remote-endpoint = <&tpdm_gcc_out_tn_ag>;
+ };
+ };
+ };
+
+ out-ports {
+ port {
+ tn_ag_out_funnel_in1: endpoint {
+ remote-endpoint = <&funnel_in1_in_tn_ag>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml
new file mode 100644
index 000000000000..a48c9ac3eaa9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom,coresight-tpda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Trace, Profiling and Diagnostics Aggregator - TPDA
+
+description: |
+ TPDAs are responsible for packetization and timestamping of data sets
+ utilizing the MIPI STPv2 packet protocol. Pulling data sets from one or
+ more attached TPDM and pushing the resultant (packetized) data out a
+ master ATB interface. Performing an arbitrated ATB interleaving (funneling)
+ task for free-flowing data from TPDM (i.e. CMB and DSB data set flows).
+
+ There is no strict binding between TPDM and TPDA. TPDA can have multiple
+ TPDMs connect to it. But There must be only one TPDA in the path from the
+ TPDM source to TMC sink. TPDM can directly connect to TPDA's inport or
+ connect to funnel which will connect to TPDA's inport.
+
+ We can use the commands are similar to the below to validate TPDMs.
+ Enable coresight sink first.
+
+ echo 1 > /sys/bus/coresight/devices/tmc_etf0/enable_sink
+ echo 1 > /sys/bus/coresight/devices/tpdm0/enable_source
+ echo 1 > /sys/bus/coresight/devices/tpdm0/integration_test
+ echo 2 > /sys/bus/coresight/devices/tpdm0/integration_test
+
+ The test data will be collected in the coresight sink which is enabled.
+ If rwp register of the sink is keeping updating when do integration_test
+ (by cat tmc_etf0/mgmt/rwp), it means there is data generated from TPDM
+ to sink.
+
+maintainers:
+ - Mao Jinlong <quic_jinlmao@quicinc.com>
+ - Tao Zhang <quic_taozha@quicinc.com>
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,coresight-tpda
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^tpda(@[0-9a-f]+)$"
+ compatible:
+ items:
+ - const: qcom,coresight-tpda
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: apb_pclk
+
+ label:
+ description:
+ Description of a coresight device.
+
+ in-ports:
+ description: |
+ Input connections from TPDM to TPDA
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ out-ports:
+ description: |
+ Output connections from the TPDA to legacy CoreSight trace bus.
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port:
+ description:
+ Output connection from the TPDA to legacy CoreSight Trace bus.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - in-ports
+ - out-ports
+
+additionalProperties: false
+
+examples:
+ # minimum tpda definition.
+ - |
+ tpda@6004000 {
+ compatible = "qcom,coresight-tpda", "arm,primecell";
+ reg = <0x6004000 0x1000>;
+
+ clocks = <&aoss_qmp>;
+ clock-names = "apb_pclk";
+
+ in-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ tpda_qdss_0_in_tpdm_dcc: endpoint {
+ remote-endpoint = <&tpdm_dcc_out_tpda_qdss_0>;
+ };
+ };
+ };
+
+ out-ports {
+ port {
+ tpda_qdss_out_funnel_in0: endpoint {
+ remote-endpoint = <&funnel_in0_in_tpda_qdss>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml
new file mode 100644
index 000000000000..4edc47483851
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom,coresight-tpdm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Trace, Profiling and Diagnostics Monitor - TPDM
+
+description: |
+ The TPDM or Monitor serves as data collection component for various dataset
+ types specified in the QPMDA spec. It covers Implementation defined ((ImplDef),
+ Basic Counts (BC), Tenure Counts (TC), Continuous Multi-Bit (CMB), and Discrete
+ Single Bit (DSB). It performs data collection in the data producing clock
+ domain and transfers it to the data collection time domain, generally ATB
+ clock domain.
+
+ The primary use case of the TPDM is to collect data from different data
+ sources and send it to a TPDA for packetization, timestamping, and funneling.
+
+maintainers:
+ - Mao Jinlong <quic_jinlmao@quicinc.com>
+ - Tao Zhang <quic_taozha@quicinc.com>
+
+# Need a custom select here or 'arm,primecell' will match on lots of nodes
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,coresight-tpdm
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^tpdm(@[0-9a-f]+)$"
+ compatible:
+ items:
+ - const: qcom,coresight-tpdm
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ qcom,dsb-element-bits:
+ description:
+ Specifies the DSB(Discrete Single Bit) element size supported by
+ the monitor. The associated aggregator will read this size before it
+ is enabled. DSB element size currently only supports 32-bit and 64-bit.
+ enum: [32, 64]
+
+ qcom,cmb-element-bits:
+ description:
+ Specifies the CMB(Continuous Multi-Bit) element size supported by
+ the monitor. The associated aggregator will read this size before it
+ is enabled. CMB element size currently only supports 8-bit, 32-bit
+ and 64-bit.
+ enum: [8, 32, 64]
+
+ qcom,dsb-msrs-num:
+ description:
+ Specifies the number of DSB(Discrete Single Bit) MSR(mux select register)
+ registers supported by the monitor. If this property is not configured
+ or set to 0, it means this DSB TPDM doesn't support MSR.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 32
+
+ qcom,cmb-msrs-num:
+ description:
+ Specifies the number of CMB MSR(mux select register) registers supported
+ by the monitor. If this property is not configured or set to 0, it means
+ this TPDM doesn't support CMB MSR.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 32
+
+ label:
+ description:
+ Description of a coresight device.
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: apb_pclk
+
+ out-ports:
+ description: |
+ Output connections from the TPDM to coresight funnel/TPDA.
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port:
+ description: Output connection from the TPDM to coresight
+ funnel/TPDA.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ # minimum TPDM definition. TPDM connect to coresight TPDA.
+ - |
+ tpdm@684c000 {
+ compatible = "qcom,coresight-tpdm", "arm,primecell";
+ reg = <0x0684c000 0x1000>;
+
+ qcom,dsb-element-bits = <32>;
+ qcom,dsb-msrs-num = <16>;
+
+ clocks = <&aoss_qmp>;
+ clock-names = "apb_pclk";
+
+ out-ports {
+ port {
+ tpdm_prng_out_tpda_qdss: endpoint {
+ remote-endpoint =
+ <&tpda_qdss_in_tpdm_prng>;
+ };
+ };
+ };
+ };
+
+ tpdm@6c29000 {
+ compatible = "qcom,coresight-tpdm", "arm,primecell";
+ reg = <0x06c29000 0x1000>;
+
+ qcom,cmb-element-bits = <64>;
+ qcom,cmb-msrs-num = <32>;
+
+ clocks = <&aoss_qmp>;
+ clock-names = "apb_pclk";
+
+ out-ports {
+ port {
+ tpdm_ipcc_out_funnel_center: endpoint {
+ remote-endpoint = <&funnel_center_in_tpdm_ipcc>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/arm/qcom-soc.yaml b/Documentation/devicetree/bindings/arm/qcom-soc.yaml
new file mode 100644
index 000000000000..27261039d56f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom-soc.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom-soc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SoC compatibles naming convention
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Guidelines for new compatibles for SoC blocks/components.
+ When adding new compatibles in new bindings, use the format::
+ qcom,SoC-IP
+
+ For example::
+ qcom,sdm845-llcc-bwmon
+
+ When adding new compatibles to existing bindings, use the format in the
+ existing binding, even if it contradicts the above.
+
+select:
+ properties:
+ compatible:
+ oneOf:
+ - pattern: "^qcom,.*(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sar|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
+ - pattern: "^qcom,.*(glymur|milos).*$"
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ # Preferred naming style for compatibles of SoC components:
+ - pattern: "^qcom,(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+(pro)?-.*$"
+ - pattern: "^qcom,sar[0-9]+[a-z]?-.*$"
+ - pattern: "^qcom,(sa|sc)8[0-9]+[a-z][a-z]?-.*$"
+ - pattern: "^qcom,(glymur|milos)-.*$"
+
+ # Legacy namings - variations of existing patterns/compatibles are OK,
+ # but do not add completely new entries to these:
+ - pattern: "^qcom,[ak]pss-wdt-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
+ - pattern: "^qcom,gcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
+ - pattern: "^qcom,mmcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
+ - pattern: "^qcom,pcie-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
+ - pattern: "^qcom,rpm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
+ - pattern: "^qcom,scm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
+ - enum:
+ - qcom,dsi-ctrl-6g-qcm2290
+ - qcom,gpucc-sdm630
+ - qcom,gpucc-sdm660
+ - qcom,lcc-apq8064
+ - qcom,lcc-ipq8064
+ - qcom,lcc-mdm9615
+ - qcom,lcc-msm8960
+ - qcom,lpass-cpu-apq8016
+ - qcom,usb-ss-ipq4019-phy
+ - qcom,usb-hs-ipq4019-phy
+ - qcom,vqmmc-ipq4019-regulator
+
+ # Legacy compatibles with wild-cards - list cannot grow with new bindings:
+ - enum:
+ - qcom,ipq806x-gmac
+ - qcom,ipq806x-nand
+ - qcom,ipq806x-sata-phy
+ - qcom,ipq806x-usb-phy-ss
+ - qcom,ipq806x-usb-phy-hs
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/qcom.txt b/Documentation/devicetree/bindings/arm/qcom.txt
deleted file mode 100644
index ee532e705d6c..000000000000
--- a/Documentation/devicetree/bindings/arm/qcom.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-QCOM device tree bindings
--------------------------
-
-Some qcom based bootloaders identify the dtb blob based on a set of
-device properties like SoC and platform and revisions of those components.
-To support this scheme, we encode this information into the board compatible
-string.
-
-Each board must specify a top-level board compatible string with the following
-format:
-
- compatible = "qcom,<SoC>[-<soc_version>][-<foundry_id>]-<board>[/<subtype>][-<board_version>]"
-
-The 'SoC' and 'board' elements are required. All other elements are optional.
-
-The 'SoC' element must be one of the following strings:
-
- apq8016
- apq8074
- apq8084
- apq8096
- msm8916
- msm8974
- msm8992
- msm8994
- msm8996
- mdm9615
- ipq8074
- sdm845
-
-The 'board' element must be one of the following strings:
-
- cdp
- liquid
- dragonboard
- mtp
- sbc
- hk01
-
-The 'soc_version' and 'board_version' elements take the form of v<Major>.<Minor>
-where the minor number may be omitted when it's zero, i.e. v1.0 is the same
-as v1. If all versions of the 'board_version' elements match, then a
-wildcard '*' should be used, e.g. 'v*'.
-
-The 'foundry_id' and 'subtype' elements are one or more digits from 0 to 9.
-
-Examples:
-
- "qcom,msm8916-v1-cdp-pm8916-v2.1"
-
-A CDP board with an msm8916 SoC, version 1 paired with a pm8916 PMIC of version
-2.1.
-
- "qcom,apq8074-v2.0-2-dragonboard/1-v0.1"
-
-A dragonboard board v0.1 of subtype 1 with an apq8074 SoC version 2, made in
-foundry 2.
diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
new file mode 100644
index 000000000000..18b5ed044f9f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -0,0 +1,1226 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/qcom.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: QCOM
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+ There are many devices in the list below that run the standard ChromeOS
+ bootloader setup and use the open source depthcharge bootloader to boot the
+ OS. These devices use the bootflow explained at
+ https://docs.kernel.org/arch/arm/google/chromebook-boot-flow.html
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - qcom,apq8016-sbc
+ - schneider,apq8016-hmibsc
+ - const: qcom,apq8016
+
+ - items:
+ - enum:
+ - asus,sparrow
+ - huawei,sturgeon
+ - lg,lenok
+ - samsung,matisse-wifi
+ - samsung,milletwifi
+ - const: qcom,apq8026
+
+ - items:
+ - enum:
+ - asus,nexus7-flo
+ - lg,nexus4-mako
+ - sony,xperia-yuga
+ - qcom,apq8064-cm-qs600
+ - qcom,apq8064-ifc6410
+ - const: qcom,apq8064
+
+ - items:
+ - enum:
+ - qcom,apq8074-dragonboard
+ - const: qcom,apq8074
+
+ - items:
+ - enum:
+ - qcom,apq8060-dragonboard
+ - qcom,msm8660-surf
+ - const: qcom,msm8660
+
+ - items:
+ - enum:
+ - qcom,apq8084-mtp
+ - qcom,apq8084-sbc
+ - const: qcom,apq8084
+
+ - items:
+ - enum:
+ - microsoft,dempsey
+ - microsoft,makepeace
+ - microsoft,moneypenny
+ - motorola,falcon
+ - samsung,ms013g
+ - samsung,s3ve3g
+ - const: qcom,msm8226
+
+ - items:
+ - enum:
+ - htc,memul
+ - microsoft,superman-lte
+ - microsoft,tesla
+ - motorola,peregrine
+ - samsung,matisselte
+ - const: qcom,msm8926
+ - const: qcom,msm8226
+
+ - items:
+ - enum:
+ - wingtech,wt82918hd
+ - const: qcom,msm8929
+
+ - items:
+ - enum:
+ - huawei,kiwi
+ - longcheer,l9100
+ - samsung,a7
+ - sony,kanuti-tulip
+ - square,apq8039-t2
+ - wingtech,wt82918
+ - wingtech,wt82918hdhw39
+ - const: qcom,msm8939
+
+ - items:
+ - enum:
+ - sony,kugo-row
+ - sony,suzu-row
+ - const: qcom,msm8956
+
+ - items:
+ - enum:
+ - qcom,msm8960-cdp
+ - samsung,expressatt
+ - const: qcom,msm8960
+
+ - items:
+ - enum:
+ - sony,huashan
+ - const: qcom,msm8960t
+ - const: qcom,msm8960
+
+ - items:
+ - enum:
+ - lge,hammerhead
+ - samsung,hlte
+ - sony,xperia-amami
+ - sony,xperia-honami
+ - sony,xperia-togari
+ - const: qcom,msm8974
+
+ - items:
+ - enum:
+ - fairphone,fp2
+ - htc,m8
+ - oneplus,bacon
+ - samsung,klte
+ - sony,xperia-aries
+ - sony,xperia-castor
+ - sony,xperia-leo
+ - const: qcom,msm8974pro
+ - const: qcom,msm8974
+
+ - items:
+ - enum:
+ - samsung,kltechn
+ - const: samsung,klte
+ - const: qcom,msm8974pro
+ - const: qcom,msm8974
+
+ - items:
+ - enum:
+ - longcheer,l9360
+ - const: qcom,msm8976
+
+ - items:
+ - enum:
+ - acer,a1-724
+ - alcatel,idol347
+ - asus,z00l
+ - gplus,fl8005a
+ - huawei,g7
+ - lg,c50
+ - lg,m216
+ - longcheer,l8910
+ - longcheer,l8150
+ - motorola,harpia
+ - motorola,osprey
+ - motorola,surnia
+ - qcom,msm8916-mtp
+ - samsung,a3u-eur
+ - samsung,a5u-eur
+ - samsung,e5
+ - samsung,e7
+ - samsung,fortuna3g
+ - samsung,gprimeltecan
+ - samsung,grandmax
+ - samsung,grandprimelte
+ - samsung,gt510
+ - samsung,gt58
+ - samsung,j3ltetw
+ - samsung,j5
+ - samsung,j5x
+ - samsung,rossa
+ - samsung,serranove
+ - thwc,uf896
+ - thwc,ufi001c
+ - wingtech,wt86518
+ - wingtech,wt86528
+ - wingtech,wt88047
+ - yiming,uz801-v3
+ - const: qcom,msm8916
+
+ - items:
+ - enum:
+ - xiaomi,riva
+ - const: qcom,msm8917
+
+ - items:
+ - enum:
+ - flipkart,rimob
+ - motorola,potter
+ - xiaomi,daisy
+ - xiaomi,mido
+ - xiaomi,tissot
+ - xiaomi,vince
+ - const: qcom,msm8953
+
+ - items:
+ - enum:
+ - lg,bullhead
+ - lg,h815
+ - microsoft,talkman
+ - xiaomi,libra
+ - const: qcom,msm8992
+
+ - items:
+ - enum:
+ - sony,karin_windy
+ - const: qcom,apq8094
+
+ - items:
+ - enum:
+ - huawei,angler
+ - microsoft,cityman
+ - sony,ivy-row
+ - sony,karin-row
+ - sony,satsuki-row
+ - sony,sumire-row
+ - sony,suzuran-row
+ - const: qcom,msm8994
+
+ - items:
+ - enum:
+ - arrow,apq8096-db820c
+ - inforce,ifc6640
+ - const: qcom,apq8096-sbc
+ - const: qcom,apq8096
+
+ - items:
+ - enum:
+ - oneplus,oneplus3
+ - oneplus,oneplus3t
+ - qcom,msm8996-mtp
+ - sony,dora-row
+ - sony,kagura-row
+ - sony,keyaki-row
+ - xiaomi,gemini
+ - const: qcom,msm8996
+
+ - items:
+ - enum:
+ - xiaomi,natrium
+ - xiaomi,scorpio
+ - const: qcom,msm8996pro
+ - const: qcom,msm8996
+
+ - items:
+ - enum:
+ - asus,novago-tp370ql
+ - fxtec,pro1
+ - hp,envy-x2
+ - lenovo,miix-630
+ - oneplus,cheeseburger
+ - oneplus,dumpling
+ - qcom,msm8998-mtp
+ - sony,xperia-lilac
+ - sony,xperia-maple
+ - sony,xperia-poplar
+ - xiaomi,sagit
+ - const: qcom,msm8998
+
+ - items:
+ - enum:
+ - 8dev,jalapeno
+ - alfa-network,ap120c-ac
+ - const: qcom,ipq4018
+
+ - items:
+ - enum:
+ - qcom,ipq4019-ap-dk01.1-c1
+ - qcom,ipq4019-ap-dk04.1-c3
+ - qcom,ipq4019-ap-dk07.1-c1
+ - qcom,ipq4019-ap-dk07.1-c2
+ - qcom,ipq4019-dk04.1-c1
+ - const: qcom,ipq4019
+
+ - items:
+ - enum:
+ - qcom,ipq5018-rdp432-c2
+ - tplink,archer-ax55-v1
+ - const: qcom,ipq5018
+
+ - items:
+ - enum:
+ - qcom,ipq5332-ap-mi01.2
+ - qcom,ipq5332-ap-mi01.3
+ - qcom,ipq5332-ap-mi01.6
+ - qcom,ipq5332-ap-mi01.9
+ - const: qcom,ipq5332
+
+ - items:
+ - enum:
+ - qcom,ipq5424-rdp466
+ - const: qcom,ipq5424
+
+ - items:
+ - enum:
+ - mikrotik,rb3011
+ - qcom,ipq8064-ap148
+ - const: qcom,ipq8064
+
+ - items:
+ - enum:
+ - qcom,ipq8074-hk01
+ - qcom,ipq8074-hk10-c1
+ - qcom,ipq8074-hk10-c2
+ - const: qcom,ipq8074
+
+ - items:
+ - enum:
+ - qcom,ipq9574-ap-al02-c2
+ - qcom,ipq9574-ap-al02-c6
+ - qcom,ipq9574-ap-al02-c7
+ - qcom,ipq9574-ap-al02-c8
+ - qcom,ipq9574-ap-al02-c9
+ - const: qcom,ipq9574
+
+ - description: Sierra Wireless MangOH Green with WP8548 Module
+ items:
+ - const: swir,mangoh-green-wp8548
+ - const: swir,wp8548
+ - const: qcom,mdm9615
+
+ - description: Qualcomm Technologies, Inc. Robotics RB1
+ items:
+ - enum:
+ - qcom,qrb2210-rb1
+ - const: qcom,qrb2210
+ - const: qcom,qcm2290
+
+ - items:
+ - enum:
+ - fairphone,fp5
+ - particle,tachyon
+ - qcom,qcm6490-idp
+ - qcom,qcs6490-rb3gen2
+ - shift,otter
+ - const: qcom,qcm6490
+
+ - description: Qualcomm Technologies, Inc. Distributed Unit 1000 platform
+ items:
+ - enum:
+ - qcom,qdu1000-idp
+ - qcom,qdu1000-x100
+ - const: qcom,qdu1000
+
+ - description: Qualcomm Technologies, Inc. Radio Unit 1000 platform
+ items:
+ - enum:
+ - qcom,qru1000-idp
+ - const: qcom,qru1000
+
+ - description: Qualcomm AR2 Gen1 platform
+ items:
+ - enum:
+ - qcom,qar2130p
+ - const: qcom,sar2130p
+
+ - items:
+ - enum:
+ - acer,aspire1
+ - qcom,sc7180-idp
+ - const: qcom,sc7180
+
+ - description: HP Chromebook x2 11c (rev1 - 2)
+ items:
+ - const: google,coachz-rev1
+ - const: google,coachz-rev2
+ - const: qcom,sc7180
+
+ - description: HP Chromebook x2 11c (newest rev)
+ items:
+ - const: google,coachz
+ - const: qcom,sc7180
+
+ - description: HP Chromebook x2 11c with LTE (rev1 - 2)
+ items:
+ - const: google,coachz-rev1-sku0
+ - const: google,coachz-rev2-sku0
+ - const: qcom,sc7180
+
+ - description: HP Chromebook x2 11c with LTE (newest rev)
+ items:
+ - const: google,coachz-sku0
+ - const: qcom,sc7180
+
+ - description: Lenovo Chromebook Duet 5 13 (rev2)
+ items:
+ - const: google,homestar-rev2
+ - const: google,homestar-rev23
+ - const: qcom,sc7180
+
+ - description: Lenovo Chromebook Duet 5 13 (rev3)
+ items:
+ - const: google,homestar-rev3
+ - const: qcom,sc7180
+
+ - description: Lenovo Chromebook Duet 5 13 (newest rev)
+ items:
+ - const: google,homestar
+ - const: qcom,sc7180
+
+ - description: Google Kingoftown (rev0)
+ items:
+ - const: google,kingoftown-rev0
+ - const: qcom,sc7180
+
+ - description: Google Kingoftown (newest rev)
+ items:
+ - const: google,kingoftown
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 (rev0)
+ items:
+ - const: google,lazor-rev0
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 (rev1 - 2)
+ items:
+ - const: google,lazor-rev1
+ - const: google,lazor-rev2
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 (rev3 - 8)
+ items:
+ - const: google,lazor-rev3
+ - const: google,lazor-rev4
+ - const: google,lazor-rev5
+ - const: google,lazor-rev6
+ - const: google,lazor-rev7
+ - const: google,lazor-rev8
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 (rev9)
+ items:
+ - const: google,lazor-rev9
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 (newest rev)
+ items:
+ - const: google,lazor
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with KB Backlight (rev1 - 2)
+ items:
+ - const: google,lazor-rev1-sku2
+ - const: google,lazor-rev2-sku2
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with KB Backlight (rev3 - 8)
+ items:
+ - const: google,lazor-rev3-sku2
+ - const: google,lazor-rev4-sku2
+ - const: google,lazor-rev5-sku2
+ - const: google,lazor-rev6-sku2
+ - const: google,lazor-rev7-sku2
+ - const: google,lazor-rev8-sku2
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with KB Backlight (rev9)
+ items:
+ - const: google,lazor-rev9-sku2
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with KB Backlight (newest rev)
+ items:
+ - const: google,lazor-sku2
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with LTE (rev1 - 2)
+ items:
+ - const: google,lazor-rev1-sku0
+ - const: google,lazor-rev2-sku0
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with LTE (rev3 - 8)
+ items:
+ - const: google,lazor-rev3-sku0
+ - const: google,lazor-rev4-sku0
+ - const: google,lazor-rev5-sku0
+ - const: google,lazor-rev6-sku0
+ - const: google,lazor-rev7-sku0
+ - const: google,lazor-rev8-sku0
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with LTE (rev9)
+ items:
+ - const: google,lazor-rev9-sku0
+ - const: google,lazor-rev9-sku10
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook Spin 513 with LTE (newest rev)
+ items:
+ - const: google,lazor-sku0
+ - const: google,lazor-sku10
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 (rev4 - rev8)
+ items:
+ - const: google,lazor-rev4-sku4
+ - const: google,lazor-rev5-sku4
+ - const: google,lazor-rev6-sku4
+ - const: google,lazor-rev7-sku4
+ - const: google,lazor-rev8-sku4
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 (rev9)
+ items:
+ - const: google,lazor-rev9-sku4
+ - const: google,lazor-rev9-sku15
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 (newest rev)
+ items:
+ - const: google,lazor-sku4
+ - const: google,lazor-sku15
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 without Touchscreen (rev4)
+ items:
+ - const: google,lazor-rev4-sku5
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 without Touchscreen (rev5 - rev8)
+ items:
+ - const: google,lazor-rev5-sku5
+ - const: google,lazor-rev5-sku6
+ - const: google,lazor-rev6-sku6
+ - const: google,lazor-rev7-sku6
+ - const: google,lazor-rev8-sku6
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 without Touchscreen (rev9)
+ items:
+ - const: google,lazor-rev9-sku6
+ - const: google,lazor-rev9-sku18
+ - const: qcom,sc7180
+
+ - description: Acer Chromebook 511 without Touchscreen (newest rev)
+ items:
+ - const: google,lazor-sku6
+ - const: google,lazor-sku18
+ - const: qcom,sc7180
+
+ - description: Google Mrbland with AUO panel (rev0)
+ items:
+ - const: google,mrbland-rev0-sku0
+ - const: qcom,sc7180
+
+ - description: Google Mrbland with AUO panel (newest rev)
+ items:
+ - const: google,mrbland-sku1536
+ - const: qcom,sc7180
+
+ - description: Google Mrbland with BOE panel (rev0)
+ items:
+ - const: google,mrbland-rev0-sku16
+ - const: qcom,sc7180
+
+ - description: Google Mrbland with BOE panel (newest rev)
+ items:
+ - const: google,mrbland-sku1024
+ - const: google,mrbland-sku768
+ - const: qcom,sc7180
+
+ - description: Google Pazquel with Parade (newest rev)
+ items:
+ - const: google,pazquel-sku5
+ - const: qcom,sc7180
+
+ - description: Google Pazquel with TI (newest rev)
+ items:
+ - const: google,pazquel-sku1
+ - const: qcom,sc7180
+
+ - description: Google Pazquel with LTE and Parade (newest rev)
+ items:
+ - const: google,pazquel-sku6
+ - const: google,pazquel-sku4
+ - const: qcom,sc7180
+
+ - description: Google Pazquel with LTE and TI (newest rev)
+ items:
+ - const: google,pazquel-sku0
+ - const: google,pazquel-sku2
+ - const: qcom,sc7180
+
+ - description: Google Pazquel360 with LTE (newest rev)
+ items:
+ - const: google,pazquel-sku22
+ - const: google,pazquel-sku20
+ - const: qcom,sc7180
+
+ - description: Google Pazquel360 with WiFi (newest rev)
+ items:
+ - const: google,pazquel-sku21
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 (rev1)
+ items:
+ - const: google,pompom-rev1
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 (rev2)
+ items:
+ - const: google,pompom-rev2
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 (newest rev)
+ items:
+ - const: google,pompom
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 with LTE (rev1)
+ items:
+ - const: google,pompom-rev1-sku0
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 with LTE (rev2)
+ items:
+ - const: google,pompom-rev2-sku0
+ - const: qcom,sc7180
+
+ - description: Sharp Dynabook Chromebook C1 with LTE (newest rev)
+ items:
+ - const: google,pompom-sku0
+ - const: qcom,sc7180
+
+ - description: Google Quackingstick (newest rev)
+ items:
+ - const: google,quackingstick-sku1537
+ - const: qcom,sc7180
+
+ - description: Google Quackingstick with LTE (newest rev)
+ items:
+ - const: google,quackingstick-sku1536
+ - const: qcom,sc7180
+
+ - description: Google Trogdor (newest rev)
+ items:
+ - const: google,trogdor
+ - const: qcom,sc7180
+
+ - description: Google Trogdor with LTE (newest rev)
+ items:
+ - const: google,trogdor-sku0
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with BOE panel (rev0)
+ items:
+ - const: google,wormdingler-rev0-sku16
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with BOE panel (newest rev)
+ items:
+ - const: google,wormdingler-sku1024
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with BOE panel and rt5682s (newest rev)
+ items:
+ - const: google,wormdingler-sku1025
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with INX panel (rev0)
+ items:
+ - const: google,wormdingler-rev0-sku0
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with INX panel (newest rev)
+ items:
+ - const: google,wormdingler-sku0
+ - const: qcom,sc7180
+
+ - description: Lenovo IdeaPad Chromebook Duet 3 with INX panel and rt5682s (newest rev)
+ items:
+ - const: google,wormdingler-sku1
+ - const: qcom,sc7180
+
+ - description: Qualcomm Technologies, Inc. sc7280 CRD platform (rev3 - 4)
+ items:
+ - const: qcom,sc7280-crd
+ - const: google,hoglin-rev3
+ - const: google,hoglin-rev4
+ - const: google,piglin-rev3
+ - const: google,piglin-rev4
+ - const: qcom,sc7280
+
+ - description: Qualcomm Technologies, Inc. sc7280 CRD platform (newest rev)
+ items:
+ - const: google,zoglin
+ - const: google,hoglin
+ - const: qcom,sc7280
+
+ - description: Qualcomm Technologies, Inc. sc7280 CRD Pro platform (newest rev)
+ items:
+ - const: google,zoglin-sku1536
+ - const: google,hoglin-sku1536
+ - const: qcom,sc7280
+
+ - description: Qualcomm Technologies, Inc. sc7280 IDP SKU1 platform
+ items:
+ - const: qcom,sc7280-idp
+ - const: google,senor
+ - const: qcom,sc7280
+
+ - description: Qualcomm Technologies, Inc. sc7280 IDP SKU2 platform
+ items:
+ - const: qcom,sc7280-idp2
+ - const: google,piglin
+ - const: qcom,sc7280
+
+ - description: Google Evoker (newest rev)
+ items:
+ - const: google,evoker
+ - const: qcom,sc7280
+
+ - description: Google Evoker with LTE (newest rev)
+ items:
+ - const: google,evoker-sku512
+ - const: qcom,sc7280
+
+ - description: Google Herobrine (newest rev)
+ items:
+ - const: google,herobrine
+ - const: qcom,sc7280
+
+ - description: Google Villager (rev0)
+ items:
+ - const: google,villager-rev0
+ - const: qcom,sc7280
+
+ - description: Google Villager (newest rev)
+ items:
+ - const: google,villager
+ - const: qcom,sc7280
+
+ - description: Google Villager with LTE (newest rev)
+ items:
+ - const: google,villager-sku512
+ - const: qcom,sc7280
+
+ - description: Google Zombie (newest rev)
+ items:
+ - const: google,zombie
+ - const: qcom,sc7280
+
+ - description: Google Zombie with LTE (newest rev)
+ items:
+ - const: google,zombie-sku512
+ - const: qcom,sc7280
+
+ - description: Google Zombie with NVMe (newest rev)
+ items:
+ - const: google,zombie-sku2
+ - const: google,zombie-sku3
+ - const: google,zombie-sku515
+ - const: qcom,sc7280
+
+ - description: Google Zombie with LTE and NVMe (newest rev)
+ items:
+ - const: google,zombie-sku514
+ - const: qcom,sc7280
+
+ - items:
+ - enum:
+ - lenovo,flex-5g
+ - microsoft,surface-prox
+ - qcom,sc8180x-primus
+ - const: qcom,sc8180x
+
+ - items:
+ - enum:
+ - huawei,gaokun3
+ - lenovo,thinkpad-x13s
+ - microsoft,arcata
+ - microsoft,blackrock
+ - qcom,sc8280xp-crd
+ - qcom,sc8280xp-qrd
+ - const: qcom,sc8280xp
+
+ - items:
+ - enum:
+ - lenovo,tbx605f
+ - motorola,ali
+ - const: qcom,sdm450
+
+ - items:
+ - enum:
+ - sony,discovery-row
+ - sony,kirin-row
+ - sony,pioneer-row
+ - sony,voyager-row
+ - const: qcom,sdm630
+
+ - items:
+ - enum:
+ - inforce,ifc6560
+ - const: qcom,sda660
+
+ - items:
+ - enum:
+ - fairphone,fp3
+ - motorola,ocean
+ - const: qcom,sdm632
+
+ - items:
+ - enum:
+ - sony,mermaid-row
+ - const: qcom,sdm636
+
+ - items:
+ - enum:
+ - xiaomi,lavender
+ - const: qcom,sdm660
+
+ - items:
+ - enum:
+ - google,sargo
+ - const: qcom,sdm670
+
+ - items:
+ - enum:
+ - qcom,sdx55-mtp
+ - qcom,sdx55-telit-fn980-tlb
+ - qcom,sdx55-t55
+ - const: qcom,sdx55
+
+ - items:
+ - enum:
+ - qcom,sdx65-mtp
+ - const: qcom,sdx65
+
+ - items:
+ - enum:
+ - qcom,sdx75-idp
+ - const: qcom,sdx75
+
+ - items:
+ - enum:
+ - qcom,ipq6018-cp01
+ - qcom,ipq6018-cp01-c1
+ - const: qcom,ipq6018
+
+ - items:
+ - enum:
+ - qcom,qcs404-evb-1000
+ - qcom,qcs404-evb-4000
+ - const: qcom,qcs404-evb
+ - const: qcom,qcs404
+
+ - items:
+ - enum:
+ - qcom,monaco-evk
+ - qcom,qcs8300-ride
+ - const: qcom,qcs8300
+
+ - items:
+ - enum:
+ - qcom,qcs615-ride
+ - const: qcom,qcs615
+ - const: qcom,sm6150
+
+ - items:
+ - enum:
+ - qcom,sa8155p-adp
+ - const: qcom,sa8155p
+
+ - items:
+ - enum:
+ - qcom,sa8295p-adp
+ - qcom,sa8540p-ride
+ - const: qcom,sa8540p
+
+ - items:
+ - enum:
+ - qcom,sa8775p-ride
+ - qcom,sa8775p-ride-r3
+ - const: qcom,sa8775p
+
+ - items:
+ - enum:
+ - qcom,lemans-evk
+ - qcom,qcs9100-ride
+ - qcom,qcs9100-ride-r3
+ - const: qcom,qcs9100
+ - const: qcom,sa8775p
+
+ - items:
+ - enum:
+ - lenovo,yoga-c630
+ - lg,judyln
+ - lg,judyp
+ - oneplus,enchilada
+ - oneplus,fajita
+ - qcom,sdm845-mtp
+ - shift,axolotl
+ - samsung,starqltechn
+ - samsung,w737
+ - sony,akari-row
+ - sony,akatsuki-row
+ - sony,apollo-row
+ - thundercomm,db845c
+ - xiaomi,beryllium
+ - xiaomi,beryllium-ebbg
+ - xiaomi,polaris
+ - const: qcom,sdm845
+
+ - items:
+ - enum:
+ - oneplus,billie2
+ - const: qcom,sm4250
+
+ - items:
+ - enum:
+ - qcom,qrb4210-rb2
+ - const: qcom,qrb4210
+ - const: qcom,sm4250
+
+ - items:
+ - enum:
+ - qcom,sm4450-qrd
+ - const: qcom,sm4450
+
+ - items:
+ - enum:
+ - fxtec,pro1x
+ - const: qcom,sm6115
+
+ - items:
+ - enum:
+ - lenovo,j606f
+ - const: qcom,sm6115p
+ - const: qcom,sm6115
+
+ - items:
+ - enum:
+ - sony,pdx201
+ - xiaomi,ginkgo
+ - xiaomi,laurel-sprout
+ - const: qcom,sm6125
+
+ - items:
+ - enum:
+ - sony,pdx213
+ - const: qcom,sm6350
+
+ - items:
+ - enum:
+ - sony,pdx225
+ - const: qcom,sm6375
+
+ - items:
+ - enum:
+ - xiaomi,curtana
+ - xiaomi,joyeuse
+ - const: qcom,sm7125
+
+ - items:
+ - enum:
+ - google,sunfish
+ - const: qcom,sm7150
+
+ - items:
+ - enum:
+ - fairphone,fp4
+ - const: qcom,sm7225
+
+ - items:
+ - enum:
+ - nothing,spacewar
+ - const: qcom,sm7325
+
+ - items:
+ - enum:
+ - microsoft,surface-duo
+ - qcom,sm8150-hdk
+ - qcom,sm8150-mtp
+ - sony,bahamut-generic
+ - sony,griffin-generic
+ - const: qcom,sm8150
+
+ - items:
+ - enum:
+ - qcom,qrb5165-rb5
+ - qcom,sm8250-hdk
+ - qcom,sm8250-mtp
+ - samsung,r8q
+ - samsung,x1q
+ - sony,pdx203-generic
+ - sony,pdx206-generic
+ - xiaomi,elish
+ - xiaomi,pipa
+ - const: qcom,sm8250
+
+ - items:
+ - enum:
+ - microsoft,surface-duo2
+ - qcom,sm8350-hdk
+ - qcom,sm8350-mtp
+ - sony,pdx214-generic
+ - sony,pdx215-generic
+ - const: qcom,sm8350
+
+ - items:
+ - enum:
+ - qcom,sm8450-hdk
+ - qcom,sm8450-qrd
+ - samsung,r0q
+ - sony,pdx223
+ - sony,pdx224
+ - const: qcom,sm8450
+
+ - items:
+ - enum:
+ - qcom,sm8550-hdk
+ - qcom,sm8550-mtp
+ - qcom,sm8550-qrd
+ - samsung,q5q
+ - sony,pdx234
+ - const: qcom,sm8550
+
+ - items:
+ - enum:
+ - qcom,qcs8550-aim300-aiot
+ - const: qcom,qcs8550-aim300
+ - const: qcom,qcs8550
+ - const: qcom,sm8550
+
+ - items:
+ - enum:
+ - qcom,sm8650-hdk
+ - qcom,sm8650-mtp
+ - qcom,sm8650-qrd
+ - const: qcom,sm8650
+
+ - items:
+ - enum:
+ - qcom,sm8750-mtp
+ - qcom,sm8750-qrd
+ - const: qcom,sm8750
+
+ - items:
+ - enum:
+ - qcom,x1e001de-devkit
+ - const: qcom,x1e001de
+ - const: qcom,x1e80100
+
+ - items:
+ - enum:
+ - lenovo,thinkpad-t14s-lcd
+ - lenovo,thinkpad-t14s-oled
+ - const: lenovo,thinkpad-t14s
+ - const: qcom,x1e78100
+ - const: qcom,x1e80100
+
+ - items:
+ - enum:
+ - asus,vivobook-s15
+ - asus,zenbook-a14-ux3407ra
+ - dell,inspiron-14-plus-7441
+ - dell,latitude-7455
+ - dell,xps13-9345
+ - hp,elitebook-ultra-g1q
+ - hp,omnibook-x14
+ - lenovo,yoga-slim7x
+ - microsoft,romulus13
+ - microsoft,romulus15
+ - qcom,x1e80100-crd
+ - qcom,x1e80100-qcp
+ - const: qcom,x1e80100
+
+ - items:
+ - enum:
+ - qcom,hamoa-iot-evk
+ - const: qcom,hamoa-iot-som
+ - const: qcom,x1e80100
+
+ - items:
+ - enum:
+ - asus,zenbook-a14-ux3407qa
+ - hp,omnibook-x14-fe1
+ - lenovo,thinkbook-16
+ - qcom,x1p42100-crd
+ - const: qcom,x1p42100
+
+ # Board compatibles go above
+
+ qcom,msm-id:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ minItems: 1
+ maxItems: 8
+ items:
+ items:
+ - description: |
+ MSM chipset ID - an exact match value consisting of two bitfields::
+ - bits 0-15 - The unique MSM chipset ID
+ - bits 16-31 - Reserved; should be 0
+ - description: |
+ Hardware revision ID - a chipset specific 32-bit ID representing
+ the version of the chipset. It is best a match value - the
+ bootloader will look for the closest possible match.
+ deprecated: true
+ description:
+ The MSM chipset and hardware revision used Qualcomm bootloaders. It
+ can optionally be an array of these to indicate multiple hardware that
+ use the same device tree. It is expected that the bootloader will use
+ this information at boot-up to decide which device tree to use when given
+ multiple device trees, some of which may not be compatible with the
+ actual hardware. It is the bootloader's responsibility to pass the
+ correct device tree to the kernel.
+ The property is deprecated.
+
+ qcom,board-id:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ minItems: 1
+ maxItems: 8
+ oneOf:
+ - items:
+ - items:
+ - description: |
+ Board ID consisting of three bitfields::
+ - bits 31-24 - Unused
+ - bits 23-16 - Platform Version Major
+ - bits 15-8 - Platform Version Minor
+ - bits 7-0 - Platform Type
+ Platform Type field is an exact match value. The
+ Platform Major/Minor field is a best match. The bootloader will
+ look for the closest possible match.
+ - description: |
+ Subtype ID unique to a Platform Type/Chipset ID. For a given
+ Platform Type, there will typically only be a single board and the
+ subtype_id will be 0. However in some cases board variants may
+ need to be distinguished by different subtype_id values.
+ - items:
+ # OnePlus uses a variant of board-id with four elements:
+ - items:
+ - const: 8
+ - const: 0
+ - description: OnePlus board ID
+ - description: OnePlus subtype ID
+ deprecated: true
+ description:
+ The board type and revision information. It can optionally be an array
+ of these to indicate multiple boards that use the same device tree. It
+ is expected that the bootloader will use this information at boot-up to
+ decide which device tree to use when given multiple device trees, some of
+ which may not be compatible with the actual hardware. It is the
+ bootloader's responsibility to pass the correct device tree to the
+ kernel
+ The property is deprecated.
+
+allOf:
+ # Explicit allow-list for older SoCs. The legacy properties are not allowed
+ # on newer SoCs.
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,apq8026
+ - qcom,apq8094
+ - qcom,apq8096
+ - qcom,msm8917
+ - qcom,msm8939
+ - qcom,msm8953
+ - qcom,msm8956
+ - qcom,msm8992
+ - qcom,msm8994
+ - qcom,msm8996
+ - qcom,msm8998
+ - qcom,sdm450
+ - qcom,sdm630
+ - qcom,sdm632
+ - qcom,sdm636
+ - qcom,sdm845
+ - qcom,sdx55
+ - qcom,sdx65
+ - qcom,sdx75
+ - qcom,sm4250
+ - qcom,sm6115
+ - qcom,sm6125
+ - qcom,sm6350
+ - qcom,sm7125
+ - qcom,sm7225
+ - qcom,sm8150
+ - qcom,sm8250
+ then:
+ properties:
+ qcom,board-id: true
+ qcom,msm-id: true
+ else:
+ properties:
+ qcom,board-id: false
+ qcom,msm-id: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - oneplus,cheeseburger
+ - oneplus,dumpling
+ - oneplus,enchilada
+ - oneplus,fajita
+ - oneplus,oneplus3
+ - oneplus,oneplus3t
+ then:
+ properties:
+ qcom,board-id:
+ items:
+ minItems: 4
+ else:
+ properties:
+ qcom,board-id:
+ items:
+ maxItems: 2
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/rda.yaml b/Documentation/devicetree/bindings/arm/rda.yaml
new file mode 100644
index 000000000000..09241ea1d228
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rda.yaml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/rda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RDA Micro platforms
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ items:
+ - enum:
+ - xunlong,orangepi-2g-iot # Orange Pi 2G-IoT
+ - xunlong,orangepi-i96 # Orange Pi i96
+ - const: rda,8810pl
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/realtek.txt b/Documentation/devicetree/bindings/arm/realtek.txt
deleted file mode 100644
index 95839e19ae92..000000000000
--- a/Documentation/devicetree/bindings/arm/realtek.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Realtek platforms device tree bindings
---------------------------------------
-
-
-RTD1295 SoC
-===========
-
-Required root node properties:
-
- - compatible : must contain "realtek,rtd1295"
-
-
-Root node property compatible must contain, depending on board:
-
- - MeLE V9: "mele,v9"
- - ProBox2 AVA: "probox2,ava"
- - Zidoo X9S: "zidoo,x9s"
-
-
-Example:
-
- compatible = "zidoo,x9s", "realtek,rtd1295";
diff --git a/Documentation/devicetree/bindings/arm/realtek.yaml b/Documentation/devicetree/bindings/arm/realtek.yaml
new file mode 100644
index 000000000000..ddd9a85099e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/realtek.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/realtek.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek platforms
+
+maintainers:
+ - Andreas Färber <afaerber@suse.de>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ # RTD1195 SoC based boards
+ - items:
+ - enum:
+ - mele,x1000 # MeLE X1000
+ - realtek,horseradish # Realtek Horseradish EVB
+ - const: realtek,rtd1195
+
+ # RTD1293 SoC based boards
+ - items:
+ - enum:
+ - synology,ds418j # Synology DiskStation DS418j
+ - const: realtek,rtd1293
+
+ # RTD1295 SoC based boards
+ - items:
+ - enum:
+ - mele,v9 # MeLE V9
+ - probox2,ava # ProBox2 AVA
+ - xnano,x5 # Xnano X5
+ - zidoo,x9s # Zidoo X9S
+ - const: realtek,rtd1295
+
+ # RTD1296 SoC based boards
+ - items:
+ - enum:
+ - synology,ds418 # Synology DiskStation DS418
+ - const: realtek,rtd1296
+
+ # RTD1395 SoC based boards
+ - items:
+ - enum:
+ - bananapi,bpi-m4 # Banana Pi BPI-M4
+ - realtek,lion-skin # Realtek Lion Skin EVB
+ - const: realtek,rtd1395
+
+ # RTD1619 SoC based boards
+ - items:
+ - enum:
+ - realtek,mjolnir # Realtek Mjolnir EVB
+ - const: realtek,rtd1619
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/rockchip.txt b/Documentation/devicetree/bindings/arm/rockchip.txt
deleted file mode 100644
index 0cc71236d639..000000000000
--- a/Documentation/devicetree/bindings/arm/rockchip.txt
+++ /dev/null
@@ -1,240 +0,0 @@
-Rockchip platforms device tree bindings
----------------------------------------
-
-- 96boards RK3399 Ficus (ROCK960 Enterprise Edition)
- Required root node properties:
- - compatible = "vamrs,ficus", "rockchip,rk3399";
-
-- 96boards RK3399 Rock960 (ROCK960 Consumer Edition)
- Required root node properties:
- - compatible = "vamrs,rock960", "rockchip,rk3399";
-
-- Amarula Vyasa RK3288 board
- Required root node properties:
- - compatible = "amarula,vyasa-rk3288", "rockchip,rk3288";
-
-- Asus Tinker board
- Required root node properties:
- - compatible = "asus,rk3288-tinker", "rockchip,rk3288";
-
-- Asus Tinker board S
- Required root node properties:
- - compatible = "asus,rk3288-tinker-s", "rockchip,rk3288";
-
-- Kylin RK3036 board:
- Required root node properties:
- - compatible = "rockchip,kylin-rk3036", "rockchip,rk3036";
-
-- MarsBoard RK3066 board:
- Required root node properties:
- - compatible = "haoyu,marsboard-rk3066", "rockchip,rk3066a";
-
-- bq Curie 2 tablet:
- Required root node properties:
- - compatible = "mundoreader,bq-curie2", "rockchip,rk3066a";
-
-- ChipSPARK Rayeager PX2 board:
- Required root node properties:
- - compatible = "chipspark,rayeager-px2", "rockchip,rk3066a";
-
-- Radxa Rock board:
- Required root node properties:
- - compatible = "radxa,rock", "rockchip,rk3188";
-
-- Radxa Rock2 Square board:
- Required root node properties:
- - compatible = "radxa,rock2-square", "rockchip,rk3288";
-
-- Rikomagic MK808 v1 board:
- Required root node properties:
- - compatible = "rikomagic,mk808", "rockchip,rk3066a";
-
-- Firefly Firefly-RK3288 board:
- Required root node properties:
- - compatible = "firefly,firefly-rk3288", "rockchip,rk3288";
- or
- - compatible = "firefly,firefly-rk3288-beta", "rockchip,rk3288";
-
-- Firefly Firefly-RK3288 Reload board:
- Required root node properties:
- - compatible = "firefly,firefly-rk3288-reload", "rockchip,rk3288";
-
-- Firefly Firefly-RK3399 board:
- Required root node properties:
- - compatible = "firefly,firefly-rk3399", "rockchip,rk3399";
-
-- Firefly roc-rk3328-cc board:
- Required root node properties:
- - compatible = "firefly,roc-rk3328-cc", "rockchip,rk3328";
-
-- Firefly ROC-RK3399-PC board:
- Required root node properties:
- - compatible = "firefly,roc-rk3399-pc", "rockchip,rk3399";
-
-- ChipSPARK PopMetal-RK3288 board:
- Required root node properties:
- - compatible = "chipspark,popmetal-rk3288", "rockchip,rk3288";
-
-- Netxeon R89 board:
- Required root node properties:
- - compatible = "netxeon,r89", "rockchip,rk3288";
-
-- GeekBuying GeekBox:
- Required root node properties:
- - compatible = "geekbuying,geekbox", "rockchip,rk3368";
-
-- Google Bob (Asus Chromebook Flip C101PA):
- Required root node properties:
- compatible = "google,bob-rev13", "google,bob-rev12",
- "google,bob-rev11", "google,bob-rev10",
- "google,bob-rev9", "google,bob-rev8",
- "google,bob-rev7", "google,bob-rev6",
- "google,bob-rev5", "google,bob-rev4",
- "google,bob", "google,gru", "rockchip,rk3399";
-
-- Google Brain (dev-board):
- Required root node properties:
- - compatible = "google,veyron-brain-rev0", "google,veyron-brain",
- "google,veyron", "rockchip,rk3288";
-
-- Google Gru (dev-board):
- Required root node properties:
- - compatible = "google,gru-rev15", "google,gru-rev14",
- "google,gru-rev13", "google,gru-rev12",
- "google,gru-rev11", "google,gru-rev10",
- "google,gru-rev9", "google,gru-rev8",
- "google,gru-rev7", "google,gru-rev6",
- "google,gru-rev5", "google,gru-rev4",
- "google,gru-rev3", "google,gru-rev2",
- "google,gru", "rockchip,rk3399";
-
-- Google Jaq (Haier Chromebook 11 and more):
- Required root node properties:
- - compatible = "google,veyron-jaq-rev5", "google,veyron-jaq-rev4",
- "google,veyron-jaq-rev3", "google,veyron-jaq-rev2",
- "google,veyron-jaq-rev1", "google,veyron-jaq",
- "google,veyron", "rockchip,rk3288";
-
-- Google Jerry (Hisense Chromebook C11 and more):
- Required root node properties:
- - compatible = "google,veyron-jerry-rev7", "google,veyron-jerry-rev6",
- "google,veyron-jerry-rev5", "google,veyron-jerry-rev4",
- "google,veyron-jerry-rev3", "google,veyron-jerry",
- "google,veyron", "rockchip,rk3288";
-
-- Google Kevin (Samsung Chromebook Plus):
- Required root node properties:
- - compatible = "google,kevin-rev15", "google,kevin-rev14",
- "google,kevin-rev13", "google,kevin-rev12",
- "google,kevin-rev11", "google,kevin-rev10",
- "google,kevin-rev9", "google,kevin-rev8",
- "google,kevin-rev7", "google,kevin-rev6",
- "google,kevin", "google,gru", "rockchip,rk3399";
-
-- Google Mickey (Asus Chromebit CS10):
- Required root node properties:
- - compatible = "google,veyron-mickey-rev8", "google,veyron-mickey-rev7",
- "google,veyron-mickey-rev6", "google,veyron-mickey-rev5",
- "google,veyron-mickey-rev4", "google,veyron-mickey-rev3",
- "google,veyron-mickey-rev2", "google,veyron-mickey-rev1",
- "google,veyron-mickey-rev0", "google,veyron-mickey",
- "google,veyron", "rockchip,rk3288";
-
-- Google Minnie (Asus Chromebook Flip C100P):
- Required root node properties:
- - compatible = "google,veyron-minnie-rev4", "google,veyron-minnie-rev3",
- "google,veyron-minnie-rev2", "google,veyron-minnie-rev1",
- "google,veyron-minnie-rev0", "google,veyron-minnie",
- "google,veyron", "rockchip,rk3288";
-
-- Google Pinky (dev-board):
- Required root node properties:
- - compatible = "google,veyron-pinky-rev2", "google,veyron-pinky",
- "google,veyron", "rockchip,rk3288";
-
-- Google Speedy (Asus C201 Chromebook):
- Required root node properties:
- - compatible = "google,veyron-speedy-rev9", "google,veyron-speedy-rev8",
- "google,veyron-speedy-rev7", "google,veyron-speedy-rev6",
- "google,veyron-speedy-rev5", "google,veyron-speedy-rev4",
- "google,veyron-speedy-rev3", "google,veyron-speedy-rev2",
- "google,veyron-speedy", "google,veyron", "rockchip,rk3288";
-
-- mqmaker MiQi:
- Required root node properties:
- - compatible = "mqmaker,miqi", "rockchip,rk3288";
-
-- Phytec phyCORE-RK3288: Rapid Development Kit
- Required root node properties:
- - compatible = "phytec,rk3288-pcm-947", "phytec,rk3288-phycore-som", "rockchip,rk3288";
-
-- Pine64 Rock64 board:
- Required root node properties:
- - compatible = "pine64,rock64", "rockchip,rk3328";
-
-- Pine64 RockPro64 board:
- Required root node properties:
- - compatible = "pine64,rockpro64", "rockchip,rk3399";
-
-- Rockchip PX3 Evaluation board:
- Required root node properties:
- - compatible = "rockchip,px3-evb", "rockchip,px3", "rockchip,rk3188";
-
-- Rockchip PX5 Evaluation board:
- Required root node properties:
- - compatible = "rockchip,px5-evb", "rockchip,px5", "rockchip,rk3368";
-
-- Rockchip PX30 Evaluation board:
- Required root node properties:
- - compatible = "rockchip,px30-evb", "rockchip,px30";
-
-- Rockchip RV1108 Evaluation board
- Required root node properties:
- - compatible = "rockchip,rv1108-evb", "rockchip,rv1108";
-
-- Rockchip RK3368 evb:
- Required root node properties:
- - compatible = "rockchip,rk3368-evb-act8846", "rockchip,rk3368";
-
-- Rockchip R88 board:
- Required root node properties:
- - compatible = "rockchip,r88", "rockchip,rk3368";
-
-- Rockchip RK3228 Evaluation board:
- Required root node properties:
- - compatible = "rockchip,rk3228-evb", "rockchip,rk3228";
-
-- Rockchip RK3229 Evaluation board:
- - compatible = "rockchip,rk3229-evb", "rockchip,rk3229";
-
-- Rockchip RK3288 Fennec board:
- Required root node properties:
- - compatible = "rockchip,rk3288-fennec", "rockchip,rk3288";
-
-- Rockchip RK3328 evb:
- Required root node properties:
- - compatible = "rockchip,rk3328-evb", "rockchip,rk3328";
-
-- Rockchip RK3399 evb:
- Required root node properties:
- - compatible = "rockchip,rk3399-evb", "rockchip,rk3399";
-
-- Rockchip RK3399 Sapphire board standalone:
- Required root node properties:
- - compatible = "rockchip,rk3399-sapphire", "rockchip,rk3399";
-
-- Rockchip RK3399 Sapphire Excavator board:
- Required root node properties:
- - compatible = "rockchip,rk3399-sapphire-excavator", "rockchip,rk3399";
-
-- Theobroma Systems RK3368-uQ7 Haikou Baseboard:
- Required root node properties:
- - compatible = "tsd,rk3368-uq7-haikou", "rockchip,rk3368";
-
-- Theobroma Systems RK3399-Q7 Haikou Baseboard:
- Required root node properties:
- - compatible = "tsd,rk3399-q7-haikou", "rockchip,rk3399";
-
-- Tronsmart Orion R68 Meta
- Required root node properties:
- - compatible = "tronsmart,orion-r68-meta", "rockchip,rk3368";
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
new file mode 100644
index 000000000000..6aceaa8acbb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -0,0 +1,1282 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/rockchip.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip platforms
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: 96boards RK3399 Ficus (ROCK960 Enterprise Edition)
+ items:
+ - const: vamrs,ficus
+ - const: rockchip,rk3399
+
+ - description: 96boards RK3399 Rock960 (ROCK960 Consumer Edition)
+ items:
+ - const: vamrs,rock960
+ - const: rockchip,rk3399
+
+ - description: Amarula Vyasa RK3288
+ items:
+ - const: amarula,vyasa-rk3288
+ - const: rockchip,rk3288
+
+ - description: Anbernic RK3326 Handheld Gaming Console
+ items:
+ - enum:
+ - anbernic,rg351m
+ - anbernic,rg351v
+ - const: rockchip,rk3326
+
+ - description: Anbernic RK3566 Handheld Gaming Console
+ items:
+ - enum:
+ - anbernic,rg353p
+ - anbernic,rg353ps
+ - anbernic,rg353v
+ - anbernic,rg353vs
+ - anbernic,rg503
+ - anbernic,rg-arc-d
+ - anbernic,rg-arc-s
+ - const: rockchip,rk3566
+
+ - description: Ariaboard Photonicat
+ items:
+ - const: ariaboard,photonicat
+ - const: rockchip,rk3568
+
+ - description: ArmSoM Sige1 board
+ items:
+ - const: armsom,sige1
+ - const: rockchip,rk3528
+
+ - description: ArmSoM Sige5 board
+ items:
+ - const: armsom,sige5
+ - const: rockchip,rk3576
+
+ - description: ArmSoM Sige7 board
+ items:
+ - const: armsom,sige7
+ - const: rockchip,rk3588
+
+ - description: ArmSoM LM7 SoM
+ items:
+ - enum:
+ - armsom,w3
+ - const: armsom,lm7
+ - const: rockchip,rk3588
+
+ - description: Asus Tinker board
+ items:
+ - const: asus,rk3288-tinker
+ - const: rockchip,rk3288
+
+ - description: Asus Tinker board S
+ items:
+ - const: asus,rk3288-tinker-s
+ - const: rockchip,rk3288
+
+ - description: Beelink A1
+ items:
+ - const: azw,beelink-a1
+ - const: rockchip,rk3328
+
+ - description: BigTreeTech CB2 Manta M4/8P
+ items:
+ - const: bigtreetech,cb2-manta
+ - const: bigtreetech,cb2
+ - const: rockchip,rk3566
+
+ - description: BigTreeTech Pi 2
+ items:
+ - const: bigtreetech,pi2
+ - const: rockchip,rk3566
+
+ - description: bq Curie 2 tablet
+ items:
+ - const: mundoreader,bq-curie2
+ - const: rockchip,rk3066a
+
+ - description: bq Edison 2 Quad-Core tablet
+ items:
+ - const: mundoreader,bq-edison2qc
+ - const: rockchip,rk3188
+
+ - description: ChipSPARK PopMetal-RK3288
+ items:
+ - const: chipspark,popmetal-rk3288
+ - const: rockchip,rk3288
+
+ - description: ChipSPARK Rayeager PX2
+ items:
+ - const: chipspark,rayeager-px2
+ - const: rockchip,rk3066a
+
+ - description: Cool Pi Compute Module 5(CM5) EVB
+ items:
+ - enum:
+ - coolpi,pi-cm5-evb
+ - const: coolpi,pi-cm5
+ - const: rockchip,rk3588
+
+ - description: Cool Pi CM5 GenBook
+ items:
+ - enum:
+ - coolpi,pi-cm5-genbook
+ - const: coolpi,pi-cm5
+ - const: rockchip,rk3588
+
+ - description: Cool Pi 4 Model B
+ items:
+ - const: coolpi,pi-4b
+ - const: rockchip,rk3588s
+
+ - description: Edgeble Neural Compute Module 2(Neu2) SoM based boards
+ items:
+ - const: edgeble,neural-compute-module-2-io # Edgeble Neural Compute Module 2 IO Board
+ - const: edgeble,neural-compute-module-2 # Edgeble Neural Compute Module 2 SoM
+ - const: rockchip,rv1126
+
+ - description: Edgeble Neural Compute Module 6(Neu6) SoM based boards
+ items:
+ - const: edgeble,neural-compute-module-6a-io # Edgeble NCM6A-IO Board
+ - enum:
+ - edgeble,neural-compute-module-6a # Edgeble Neural Compute Module 6A SoM
+ - edgeble,neural-compute-module-6b # Edgeble Neural Compute Module 6B SoM
+ - const: rockchip,rk3588
+
+ - description: Elgin RV1108 R1
+ items:
+ - const: elgin,rv1108-r1
+ - const: rockchip,rv1108
+
+ - description: EmbedFire LubanCat 1
+ items:
+ - const: embedfire,lubancat-1
+ - const: rockchip,rk3566
+
+ - description: EmbedFire LubanCat 2
+ items:
+ - const: embedfire,lubancat-2
+ - const: rockchip,rk3568
+
+ - description: Engicam PX30.Core C.TOUCH 2.0
+ items:
+ - const: engicam,px30-core-ctouch2
+ - const: engicam,px30-core
+ - const: rockchip,px30
+
+ - description: Engicam PX30.Core C.TOUCH 2.0 10.1" Open Frame
+ items:
+ - const: engicam,px30-core-ctouch2-of10
+ - const: engicam,px30-core
+ - const: rockchip,px30
+
+ - description: Engicam PX30.Core EDIMM2.2 Starter Kit
+ items:
+ - const: engicam,px30-core-edimm2.2
+ - const: engicam,px30-core
+ - const: rockchip,px30
+
+ - description: Firefly iCore-3588Q-based boards
+ items:
+ - enum:
+ - mntre,reform2-rcore
+ - const: firefly,icore-3588q
+ - const: rockchip,rk3588
+
+ - description: Firefly Core-3588J-based boards
+ items:
+ - enum:
+ - firefly,itx-3588j
+ - const: firefly,core-3588j
+ - const: rockchip,rk3588
+
+ - description: Firefly Core-PX30-JD4 on MB-JD4-PX30 baseboard
+ items:
+ - const: firefly,px30-jd4-core-mb
+ - const: firefly,px30-jd4-core
+ - const: rockchip,px30
+
+ - description: Firefly Firefly-RK3288
+ items:
+ - enum:
+ - firefly,firefly-rk3288
+ - firefly,firefly-rk3288-beta
+ - const: rockchip,rk3288
+
+ - description: Firefly Firefly-RK3288 Reload
+ items:
+ - const: firefly,firefly-rk3288-reload
+ - const: rockchip,rk3288
+
+ - description: Firefly Firefly-RK3399
+ items:
+ - const: firefly,firefly-rk3399
+ - const: rockchip,rk3399
+
+ - description: Firefly ROC-RK3308-CC
+ items:
+ - const: firefly,roc-rk3308-cc
+ - const: rockchip,rk3308
+
+ - description: Firefly roc-rk3328-cc
+ items:
+ - const: firefly,roc-rk3328-cc
+ - const: rockchip,rk3328
+
+ - description: Firefly ROC-RK3328-PC
+ items:
+ - const: firefly,roc-rk3328-pc
+ - const: rockchip,rk3328
+
+ - description: Firefly ROC-RK3399-PC
+ items:
+ - enum:
+ - firefly,roc-rk3399-pc
+ - firefly,roc-rk3399-pc-mezzanine
+ - const: rockchip,rk3399
+
+ - description: Firefly ROC-RK3399-PC-PLUS
+ items:
+ - enum:
+ - firefly,roc-rk3399-pc-plus
+ - const: rockchip,rk3399
+
+ - description: Firefly ROC-RK3576-PC
+ items:
+ - const: firefly,roc-rk3576-pc
+ - const: rockchip,rk3576
+
+ - description: Firefly ROC-RK3588-RT
+ items:
+ - const: firefly,roc-rk3588-rt
+ - const: rockchip,rk3588
+
+ - description: Firefly Station M2
+ items:
+ - const: firefly,rk3566-roc-pc
+ - const: rockchip,rk3566
+
+ - description: Firefly Station M3
+ items:
+ - const: firefly,rk3588s-roc-pc
+ - const: rockchip,rk3588s
+
+ - description: Firefly Station P2
+ items:
+ - const: firefly,rk3568-roc-pc
+ - const: rockchip,rk3568
+
+ - description: Forlinx FET3588-C SoM
+ items:
+ - enum:
+ - forlinx,ok3588-c
+ - const: forlinx,fet3588-c
+ - const: rockchip,rk3588
+
+ - description: FriendlyElec NanoPi R2 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-r2c
+ - friendlyarm,nanopi-r2c-plus
+ - friendlyarm,nanopi-r2s
+ - friendlyarm,nanopi-r2s-plus
+ - const: rockchip,rk3328
+
+ - description: FriendlyElec NanoPi R3S
+ items:
+ - const: friendlyarm,nanopi-r3s
+ - const: rockchip,rk3566
+
+ - description: FriendlyElec NanoPi4 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopc-t4
+ - friendlyarm,nanopi-m4
+ - friendlyarm,nanopi-m4b
+ - friendlyarm,nanopi-neo4
+ - friendlyarm,nanopi-r4s
+ - friendlyarm,nanopi-r4s-enterprise
+ - const: rockchip,rk3399
+
+ - description: FriendlyElec NanoPi M5 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-m5
+ - const: rockchip,rk3576
+
+ - description: FriendlyElec NanoPi R5 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-r5c
+ - friendlyarm,nanopi-r5s
+ - const: rockchip,rk3568
+
+ - description: FriendlyElec NanoPi R6 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-r6c
+ - friendlyarm,nanopi-r6s
+ - const: rockchip,rk3588s
+
+ - description: FriendlyElec NanoPi Zero2
+ items:
+ - const: friendlyarm,nanopi-zero2
+ - const: rockchip,rk3528
+
+ - description: FriendlyElec NanoPC T6 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopc-t6
+ - friendlyarm,nanopc-t6-lts
+ - const: rockchip,rk3588
+
+ - description: FriendlyElec CM3588-based boards
+ items:
+ - enum:
+ - friendlyarm,cm3588-nas
+ - const: friendlyarm,cm3588
+ - const: rockchip,rk3588
+
+ - description: GameForce Ace
+ items:
+ - const: gameforce,ace
+ - const: rockchip,rk3588s
+
+ - description: GameForce Chi
+ items:
+ - const: gameforce,chi
+ - const: rockchip,rk3326
+
+ - description: GeekBuying GeekBox
+ items:
+ - const: geekbuying,geekbox
+ - const: rockchip,rk3368
+
+ - description: Geniatech XPI-3128
+ items:
+ - const: geniatech,xpi-3128
+ - const: rockchip,rk3128
+
+ - description: Google Bob (Asus Chromebook Flip C101PA)
+ items:
+ - const: google,bob-rev13
+ - const: google,bob-rev12
+ - const: google,bob-rev11
+ - const: google,bob-rev10
+ - const: google,bob-rev9
+ - const: google,bob-rev8
+ - const: google,bob-rev7
+ - const: google,bob-rev6
+ - const: google,bob-rev5
+ - const: google,bob-rev4
+ - const: google,bob
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: Google Brain (dev-board)
+ items:
+ - const: google,veyron-brain-rev0
+ - const: google,veyron-brain
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Fievel (AOPEN Chromebox Mini)
+ items:
+ - const: google,veyron-fievel-rev8
+ - const: google,veyron-fievel-rev7
+ - const: google,veyron-fievel-rev6
+ - const: google,veyron-fievel-rev5
+ - const: google,veyron-fievel-rev4
+ - const: google,veyron-fievel-rev3
+ - const: google,veyron-fievel-rev2
+ - const: google,veyron-fievel-rev1
+ - const: google,veyron-fievel-rev0
+ - const: google,veyron-fievel
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Gru (dev-board)
+ items:
+ - const: google,gru-rev15
+ - const: google,gru-rev14
+ - const: google,gru-rev13
+ - const: google,gru-rev12
+ - const: google,gru-rev11
+ - const: google,gru-rev10
+ - const: google,gru-rev9
+ - const: google,gru-rev8
+ - const: google,gru-rev7
+ - const: google,gru-rev6
+ - const: google,gru-rev5
+ - const: google,gru-rev4
+ - const: google,gru-rev3
+ - const: google,gru-rev2
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: Google Jaq (Haier Chromebook 11 and more w/ uSD)
+ items:
+ - const: google,veyron-jaq-rev5
+ - const: google,veyron-jaq-rev4
+ - const: google,veyron-jaq-rev3
+ - const: google,veyron-jaq-rev2
+ - const: google,veyron-jaq-rev1
+ - const: google,veyron-jaq
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Jerry (Hisense Chromebook C11 and more)
+ items:
+ - const: google,veyron-jerry-rev15
+ - const: google,veyron-jerry-rev14
+ - const: google,veyron-jerry-rev13
+ - const: google,veyron-jerry-rev12
+ - const: google,veyron-jerry-rev11
+ - const: google,veyron-jerry-rev10
+ - const: google,veyron-jerry-rev7
+ - const: google,veyron-jerry-rev6
+ - const: google,veyron-jerry-rev5
+ - const: google,veyron-jerry-rev4
+ - const: google,veyron-jerry-rev3
+ - const: google,veyron-jerry
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Kevin (Samsung Chromebook Plus)
+ items:
+ - const: google,kevin-rev15
+ - const: google,kevin-rev14
+ - const: google,kevin-rev13
+ - const: google,kevin-rev12
+ - const: google,kevin-rev11
+ - const: google,kevin-rev10
+ - const: google,kevin-rev9
+ - const: google,kevin-rev8
+ - const: google,kevin-rev7
+ - const: google,kevin-rev6
+ - const: google,kevin
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: Google Mickey (Asus Chromebit CS10)
+ items:
+ - const: google,veyron-mickey-rev8
+ - const: google,veyron-mickey-rev7
+ - const: google,veyron-mickey-rev6
+ - const: google,veyron-mickey-rev5
+ - const: google,veyron-mickey-rev4
+ - const: google,veyron-mickey-rev3
+ - const: google,veyron-mickey-rev2
+ - const: google,veyron-mickey-rev1
+ - const: google,veyron-mickey-rev0
+ - const: google,veyron-mickey
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Mighty (Haier Chromebook 11 and more w/ SD)
+ items:
+ - const: google,veyron-mighty-rev5
+ - const: google,veyron-mighty-rev4
+ - const: google,veyron-mighty-rev3
+ - const: google,veyron-mighty-rev2
+ - const: google,veyron-mighty-rev1
+ - const: google,veyron-mighty
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Minnie (Asus Chromebook Flip C100P)
+ items:
+ - const: google,veyron-minnie-rev4
+ - const: google,veyron-minnie-rev3
+ - const: google,veyron-minnie-rev2
+ - const: google,veyron-minnie-rev1
+ - const: google,veyron-minnie-rev0
+ - const: google,veyron-minnie
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Pinky (dev-board)
+ items:
+ - const: google,veyron-pinky-rev2
+ - const: google,veyron-pinky
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Scarlet - Dumo (ASUS Chromebook Tablet CT100)
+ items:
+ - const: google,scarlet-rev15-sku0
+ - const: google,scarlet-rev15
+ - const: google,scarlet-rev14-sku0
+ - const: google,scarlet-rev14
+ - const: google,scarlet-rev13-sku0
+ - const: google,scarlet-rev13
+ - const: google,scarlet-rev12-sku0
+ - const: google,scarlet-rev12
+ - const: google,scarlet-rev11-sku0
+ - const: google,scarlet-rev11
+ - const: google,scarlet-rev10-sku0
+ - const: google,scarlet-rev10
+ - const: google,scarlet-rev9-sku0
+ - const: google,scarlet-rev9
+ - const: google,scarlet-rev8-sku0
+ - const: google,scarlet-rev8
+ - const: google,scarlet-rev7-sku0
+ - const: google,scarlet-rev7
+ - const: google,scarlet-rev6-sku0
+ - const: google,scarlet-rev6
+ - const: google,scarlet-rev5-sku0
+ - const: google,scarlet-rev5
+ - const: google,scarlet
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: Google Scarlet - Kingdisplay (Acer Chromebook Tab 10)
+ items:
+ - const: google,scarlet-rev15-sku7
+ - const: google,scarlet-rev15
+ - const: google,scarlet-rev14-sku7
+ - const: google,scarlet-rev14
+ - const: google,scarlet-rev13-sku7
+ - const: google,scarlet-rev13
+ - const: google,scarlet-rev12-sku7
+ - const: google,scarlet-rev12
+ - const: google,scarlet-rev11-sku7
+ - const: google,scarlet-rev11
+ - const: google,scarlet-rev10-sku7
+ - const: google,scarlet-rev10
+ - const: google,scarlet-rev9-sku7
+ - const: google,scarlet-rev9
+ - const: google,scarlet-rev8-sku7
+ - const: google,scarlet-rev8
+ - const: google,scarlet-rev7-sku7
+ - const: google,scarlet-rev7
+ - const: google,scarlet-rev6-sku7
+ - const: google,scarlet-rev6
+ - const: google,scarlet-rev5-sku7
+ - const: google,scarlet-rev5
+ - const: google,scarlet-rev4-sku7
+ - const: google,scarlet-rev4
+ - const: google,scarlet-rev3-sku7
+ - const: google,scarlet-rev3
+ - const: google,scarlet
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: |
+ Google Scarlet - Innolux display (Acer Chromebook Tab 10 and more)
+ items:
+ - const: google,scarlet-rev15-sku2
+ - const: google,scarlet-rev15-sku4
+ - const: google,scarlet-rev15-sku6
+ - const: google,scarlet-rev15
+ - const: google,scarlet-rev14-sku2
+ - const: google,scarlet-rev14-sku4
+ - const: google,scarlet-rev14-sku6
+ - const: google,scarlet-rev14
+ - const: google,scarlet-rev13-sku2
+ - const: google,scarlet-rev13-sku4
+ - const: google,scarlet-rev13-sku6
+ - const: google,scarlet-rev13
+ - const: google,scarlet-rev12-sku2
+ - const: google,scarlet-rev12-sku4
+ - const: google,scarlet-rev12-sku6
+ - const: google,scarlet-rev12
+ - const: google,scarlet-rev11-sku2
+ - const: google,scarlet-rev11-sku4
+ - const: google,scarlet-rev11-sku6
+ - const: google,scarlet-rev11
+ - const: google,scarlet-rev10-sku2
+ - const: google,scarlet-rev10-sku4
+ - const: google,scarlet-rev10-sku6
+ - const: google,scarlet-rev10
+ - const: google,scarlet-rev9-sku2
+ - const: google,scarlet-rev9-sku4
+ - const: google,scarlet-rev9-sku6
+ - const: google,scarlet-rev9
+ - const: google,scarlet-rev8-sku2
+ - const: google,scarlet-rev8-sku4
+ - const: google,scarlet-rev8-sku6
+ - const: google,scarlet-rev8
+ - const: google,scarlet-rev7-sku2
+ - const: google,scarlet-rev7-sku4
+ - const: google,scarlet-rev7-sku6
+ - const: google,scarlet-rev7
+ - const: google,scarlet-rev6-sku2
+ - const: google,scarlet-rev6-sku4
+ - const: google,scarlet-rev6-sku6
+ - const: google,scarlet-rev6
+ - const: google,scarlet-rev5-sku2
+ - const: google,scarlet-rev5-sku4
+ - const: google,scarlet-rev5-sku6
+ - const: google,scarlet-rev5
+ - const: google,scarlet-rev4-sku2
+ - const: google,scarlet-rev4-sku4
+ - const: google,scarlet-rev4-sku6
+ - const: google,scarlet-rev4
+ - const: google,scarlet
+ - const: google,gru
+ - const: rockchip,rk3399
+
+ - description: Google Speedy (Asus C201 Chromebook)
+ items:
+ - const: google,veyron-speedy-rev9
+ - const: google,veyron-speedy-rev8
+ - const: google,veyron-speedy-rev7
+ - const: google,veyron-speedy-rev6
+ - const: google,veyron-speedy-rev5
+ - const: google,veyron-speedy-rev4
+ - const: google,veyron-speedy-rev3
+ - const: google,veyron-speedy-rev2
+ - const: google,veyron-speedy
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: Google Tiger (AOpen Chromebase Mini)
+ items:
+ - const: google,veyron-tiger-rev8
+ - const: google,veyron-tiger-rev7
+ - const: google,veyron-tiger-rev6
+ - const: google,veyron-tiger-rev5
+ - const: google,veyron-tiger-rev4
+ - const: google,veyron-tiger-rev3
+ - const: google,veyron-tiger-rev2
+ - const: google,veyron-tiger-rev1
+ - const: google,veyron-tiger-rev0
+ - const: google,veyron-tiger
+ - const: google,veyron
+ - const: rockchip,rk3288
+
+ - description: H96 Max V58 TV Box
+ items:
+ - const: haochuangyi,h96-max-v58
+ - const: rockchip,rk3588
+
+ - description: Haoyu MarsBoard RK3066
+ items:
+ - const: haoyu,marsboard-rk3066
+ - const: rockchip,rk3066a
+
+ - description: Hardkernel Odroid Go Advance
+ items:
+ - const: hardkernel,rk3326-odroid-go2
+ - const: rockchip,rk3326
+
+ - description: Hardkernel Odroid Go Advance Black Edition
+ items:
+ - const: hardkernel,rk3326-odroid-go2-v11
+ - const: rockchip,rk3326
+
+ - description: Hardkernel Odroid Go Super
+ items:
+ - const: hardkernel,rk3326-odroid-go3
+ - const: rockchip,rk3326
+
+ - description: Hardkernel Odroid M1
+ items:
+ - const: hardkernel,odroid-m1
+ - const: rockchip,rk3568
+
+ - description: Hardkernel Odroid M1S
+ items:
+ - const: hardkernel,odroid-m1s
+ - const: rockchip,rk3566
+
+ - description: Hardkernel Odroid M2
+ items:
+ - const: hardkernel,odroid-m2
+ - const: rockchip,rk3588s
+
+ - description: HINLINK H66K / H68K
+ items:
+ - enum:
+ - hinlink,h66k
+ - hinlink,h68k
+ - const: rockchip,rk3568
+
+ - description: Hugsun X99 TV Box
+ items:
+ - const: hugsun,x99
+ - const: rockchip,rk3399
+
+ - description: Indiedroid Nova SBC
+ items:
+ - const: indiedroid,nova
+ - const: rockchip,rk3588s
+
+ - description: Khadas Edge series boards
+ items:
+ - enum:
+ - khadas,edge
+ - khadas,edge-captain
+ - khadas,edge-v
+ - const: rockchip,rk3399
+
+ - description: Khadas Edge2 series boards
+ items:
+ - const: khadas,edge2
+ - const: rockchip,rk3588s
+
+ - description: Kobol Helios64
+ items:
+ - const: kobol,helios64
+ - const: rockchip,rk3399
+
+ - description: Mecer Xtreme Mini S6
+ items:
+ - const: mecer,xms6
+ - const: rockchip,rk3229
+
+ - description: Leez RK3399 P710
+ items:
+ - const: leez,p710
+ - const: rockchip,rk3399
+
+ - description: LCKFB Taishan Pi RK3566
+ items:
+ - const: lckfb,tspi-rk3566
+ - const: rockchip,rk3566
+
+ - description: Luckfox Core3576 Module based boards
+ items:
+ - enum:
+ - luckfox,omni3576
+ - const: luckfox,core3576
+ - const: rockchip,rk3576
+
+ - description: Lunzn FastRhino R66S / R68S
+ items:
+ - enum:
+ - lunzn,fastrhino-r66s
+ - lunzn,fastrhino-r68s
+ - const: rockchip,rk3568
+
+ - description: mqmaker MiQi
+ items:
+ - const: mqmaker,miqi
+ - const: rockchip,rk3288
+
+ - description: Neardi LBA3368
+ items:
+ - const: neardi,lba3368
+ - const: rockchip,rk3368
+
+ - description: Netxeon R89 board
+ items:
+ - const: netxeon,r89
+ - const: rockchip,rk3288
+
+ - description: OPEN AI LAB EAIDK-610
+ items:
+ - const: openailab,eaidk-610
+ - const: rockchip,rk3399
+
+ - description: Xunlong Orange Pi RK3399 board
+ items:
+ - const: xunlong,rk3399-orangepi
+ - const: rockchip,rk3399
+
+ - description: Phytec phyCORE-RK3288 Rapid Development Kit
+ items:
+ - const: phytec,rk3288-pcm-947
+ - const: phytec,rk3288-phycore-som
+ - const: rockchip,rk3288
+
+ - description: Pine64 Pinebook Pro
+ items:
+ - const: pine64,pinebook-pro
+ - const: rockchip,rk3399
+
+ - description: Pine64 PineNote
+ items:
+ - enum:
+ - pine64,pinenote-v1.1
+ - pine64,pinenote-v1.2
+ - const: pine64,pinenote
+ - const: rockchip,rk3566
+
+ - description: Pine64 PinePhone Pro
+ items:
+ - const: pine64,pinephone-pro
+ - const: rockchip,rk3399
+
+ - description: Pine64 PineTab2
+ items:
+ - enum:
+ - pine64,pinetab2-v0.1
+ - pine64,pinetab2-v2.0
+ - const: pine64,pinetab2
+ - const: rockchip,rk3566
+
+ - description: Pine64 Rock64
+ items:
+ - const: pine64,rock64
+ - const: rockchip,rk3328
+
+ - description: Pine64 RockPro64
+ items:
+ - enum:
+ - pine64,rockpro64-v2.1
+ - pine64,rockpro64-v2.0
+ - const: pine64,rockpro64
+ - const: rockchip,rk3399
+
+ - description: Pine64 Quartz64 Model A/B
+ items:
+ - enum:
+ - pine64,quartz64-a
+ - pine64,quartz64-b
+ - const: rockchip,rk3566
+
+ - description: Pine64 QuartzPro64
+ items:
+ - const: pine64,quartzpro64
+ - const: rockchip,rk3588
+
+ - description: Pine64 SOQuartz
+ items:
+ - enum:
+ - pine64,soquartz-blade
+ - pine64,soquartz-cm4io
+ - pine64,soquartz-model-a
+ - const: pine64,soquartz
+ - const: rockchip,rk3566
+
+ - description: Powkiddy RK3566 Handheld Gaming Console
+ items:
+ - enum:
+ - powkiddy,rgb10max3
+ - powkiddy,rgb20sx
+ - powkiddy,rgb30
+ - powkiddy,rk2023
+ - powkiddy,x55
+ - const: rockchip,rk3566
+
+ - description: Protonic MECSBC board
+ items:
+ - const: prt,mecsbc
+ - const: rockchip,rk3568
+
+ - description: QNAP TS-433-4G 4-Bay NAS
+ items:
+ - const: qnap,ts433
+ - const: rockchip,rk3568
+
+ - description: Radxa Compute Module 3 (CM3)
+ items:
+ - enum:
+ - radxa,cm3-io
+ - const: radxa,cm3
+ - const: rockchip,rk3566
+
+ - description: Radxa CM3 Industrial
+ items:
+ - enum:
+ - radxa,e25
+ - const: radxa,cm3i
+ - const: rockchip,rk3568
+
+ - description: Radxa E20C
+ items:
+ - const: radxa,e20c
+ - const: rockchip,rk3528
+
+ - description: Radxa E52C
+ items:
+ - const: radxa,e52c
+ - const: rockchip,rk3582
+ - const: rockchip,rk3588s
+
+ - description: Radxa Rock
+ items:
+ - const: radxa,rock
+ - const: rockchip,rk3188
+
+ - description: Radxa ROCK 2A/2F
+ items:
+ - enum:
+ - radxa,rock-2a
+ - radxa,rock-2f
+ - const: rockchip,rk3528
+
+ - description: Radxa ROCK Pi 4A/A+/B/B+/C
+ items:
+ - enum:
+ - radxa,rockpi4a
+ - radxa,rockpi4a-plus
+ - radxa,rockpi4b
+ - radxa,rockpi4b-plus
+ - radxa,rockpi4c
+ - const: radxa,rockpi4
+ - const: rockchip,rk3399
+
+ - description: Radxa ROCK 4C+
+ items:
+ - const: radxa,rock-4c-plus
+ - const: rockchip,rk3399
+
+ - description: Radxa ROCK 4D
+ items:
+ - const: radxa,rock-4d
+ - const: rockchip,rk3576
+
+ - description: Radxa ROCK 4SE
+ items:
+ - const: radxa,rock-4se
+ - const: rockchip,rk3399
+
+ - description: Radxa ROCK Pi E
+ items:
+ - const: radxa,rockpi-e
+ - const: rockchip,rk3328
+
+ - description: Radxa ROCK Pi N8
+ items:
+ - const: radxa,rockpi-n8
+ - const: vamrs,rk3288-vmarc-som
+ - const: rockchip,rk3288
+
+ - description: Radxa ROCK Pi N10
+ items:
+ - const: radxa,rockpi-n10
+ - const: vamrs,rk3399pro-vmarc-som
+ - const: rockchip,rk3399pro
+
+ - description: Radxa ROCK Pi S
+ items:
+ - const: radxa,rockpis
+ - const: rockchip,rk3308
+
+ - description: Radxa Rock 2 Square
+ items:
+ - const: radxa,rock2-square
+ - const: rockchip,rk3288
+
+ - description: Radxa ROCK 3A
+ items:
+ - const: radxa,rock3a
+ - const: rockchip,rk3568
+
+ - description: Radxa ROCK 3B
+ items:
+ - const: radxa,rock-3b
+ - const: rockchip,rk3568
+
+ - description: Radxa ROCK 3C
+ items:
+ - const: radxa,rock-3c
+ - const: rockchip,rk3566
+
+ - description: Radxa ROCK 5 ITX
+ items:
+ - const: radxa,rock-5-itx
+ - const: rockchip,rk3588
+
+ - description: Radxa ROCK 5A
+ items:
+ - const: radxa,rock-5a
+ - const: rockchip,rk3588s
+
+ - description: Radxa ROCK 5B
+ items:
+ - const: radxa,rock-5b
+ - const: rockchip,rk3588
+
+ - description: Radxa ROCK 5B+
+ items:
+ - const: radxa,rock-5b-plus
+ - const: rockchip,rk3588
+
+ - description: Radxa ROCK 5C
+ items:
+ - const: radxa,rock-5c
+ - const: rockchip,rk3588s
+
+ - description: Radxa ROCK S0
+ items:
+ - const: radxa,rock-s0
+ - const: rockchip,rk3308
+
+ - description: Radxa ROCK 5T
+ items:
+ - const: radxa,rock-5t
+ - const: rockchip,rk3588
+
+ - description: Radxa ZERO 3W/3E
+ items:
+ - enum:
+ - radxa,zero-3e
+ - radxa,zero-3w
+ - const: rockchip,rk3566
+
+ - description: Relfor SAIB board
+ items:
+ - const: relfor,saib
+ - const: rockchip,rv1109
+
+ - description: Rikomagic MK808 v1
+ items:
+ - const: rikomagic,mk808
+ - const: rockchip,rk3066a
+
+ - description: Rockchip Kylin
+ items:
+ - const: rockchip,rk3036-kylin
+ - const: rockchip,rk3036
+
+ - description: Rockchip PX3 Evaluation board
+ items:
+ - const: rockchip,px3-evb
+ - const: rockchip,px3
+ - const: rockchip,rk3188
+
+ - description: Rockchip PX30 Evaluation board
+ items:
+ - const: rockchip,px30-evb
+ - const: rockchip,px30
+
+ - description: Rockchip PX5 Evaluation board
+ items:
+ - const: rockchip,px5-evb
+ - const: rockchip,px5
+ - const: rockchip,rk3368
+
+ - description: Rockchip R88
+ items:
+ - const: rockchip,r88
+ - const: rockchip,rk3368
+
+ - description: Rockchip RK3036 Evaluation board
+ items:
+ - const: rockchip,rk3036-evb
+ - const: rockchip,rk3036
+
+ - description: Rockchip RK3128 Evaluation board
+ items:
+ - const: rockchip,rk3128-evb
+ - const: rockchip,rk3128
+
+ - description: Rockchip RK3228 Evaluation board
+ items:
+ - const: rockchip,rk3228-evb
+ - const: rockchip,rk3228
+
+ - description: Rockchip RK3229 Evaluation board
+ items:
+ - const: rockchip,rk3229-evb
+ - const: rockchip,rk3229
+
+ - description: Rockchip RK3288 Evaluation board
+ items:
+ - enum:
+ - rockchip,rk3288-evb-act8846
+ - rockchip,rk3288-evb-rk808
+ - const: rockchip,rk3288
+
+ - description: Rockchip RK3308 Evaluation board
+ items:
+ - const: rockchip,rk3308-evb
+ - const: rockchip,rk3308
+
+ - description: Rockchip RK3328 Evaluation board
+ items:
+ - const: rockchip,rk3328-evb
+ - const: rockchip,rk3328
+
+ - description: Rockchip RK3368 Evaluation board (act8846 pmic)
+ items:
+ - const: rockchip,rk3368-evb-act8846
+ - const: rockchip,rk3368
+
+ - description: Rockchip RK3399 Evaluation board
+ items:
+ - const: rockchip,rk3399-evb
+ - const: rockchip,rk3399
+
+ - description: Rockchip RK3399 Industry Evaluation board
+ items:
+ - const: rockchip,rk3399-evb-ind
+ - const: rockchip,rk3399
+
+ - description: Rockchip RK3399 Sapphire standalone
+ items:
+ - const: rockchip,rk3399-sapphire
+ - const: rockchip,rk3399
+
+ - description: Rockchip RK3399 Sapphire with Excavator Baseboard
+ items:
+ - const: rockchip,rk3399-sapphire-excavator
+ - const: rockchip,rk3399
+
+ - description: Rockchip RK3562 Evaluation board 2
+ items:
+ - const: rockchip,rk3562-evb2-v10
+ - const: rockchip,rk3562
+
+ - description: Rockchip RK3566 BOX Evaluation Demo board
+ items:
+ - const: rockchip,rk3566-box-demo
+ - const: rockchip,rk3566
+
+ - description: Rockchip RK3568 Evaluation board
+ items:
+ - const: rockchip,rk3568-evb1-v10
+ - const: rockchip,rk3568
+
+ - description: Rockchip RK3576 Evaluation board
+ items:
+ - const: rockchip,rk3576-evb1-v10
+ - const: rockchip,rk3576
+
+ - description: Rockchip RK3588 Evaluation board
+ items:
+ - enum:
+ - rockchip,rk3588-evb1-v10
+ - rockchip,rk3588-evb2-v10
+ - const: rockchip,rk3588
+
+ - description: Rockchip RK3588S Evaluation board
+ items:
+ - const: rockchip,rk3588s-evb1-v10
+ - const: rockchip,rk3588s
+
+ - description: Rockchip RV1108 Evaluation board
+ items:
+ - const: rockchip,rv1108-evb
+ - const: rockchip,rv1108
+
+ - description: Rockchip Toybrick TB-RK3588X board
+ items:
+ - const: rockchip,rk3588-toybrick-x0
+ - const: rockchip,rk3588
+
+ - description: Sakura Pi RK3308B
+ items:
+ - const: sakurapi,rk3308-sakurapi-rk3308b
+ - const: rockchip,rk3308
+
+ - description: Sinovoip RK3308 Banana Pi P2 Pro
+ items:
+ - const: sinovoip,rk3308-bpi-p2pro
+ - const: rockchip,rk3308
+
+ - description: Sinovoip RK3568 Banana Pi R2 Pro
+ items:
+ - const: sinovoip,rk3568-bpi-r2pro
+ - const: rockchip,rk3568
+
+ - description: Sonoff iHost Smart Home Hub
+ items:
+ - const: itead,sonoff-ihost
+ - enum:
+ - rockchip,rv1126
+ - rockchip,rv1109
+
+ - description: Theobroma Systems PX30-Cobra
+ items:
+ - enum:
+ - tsd,px30-cobra-ltk050h3146w
+ - tsd,px30-cobra-ltk050h3146w-a2
+ - tsd,px30-cobra-ltk050h3148w
+ - tsd,px30-cobra-ltk500hd1829
+ - const: tsd,px30-cobra
+ - const: rockchip,px30
+
+ - description: Theobroma Systems PX30-PP1516
+ items:
+ - enum:
+ - tsd,px30-pp1516-ltk050h3146w-a2
+ - tsd,px30-pp1516-ltk050h3148w
+ - const: tsd,px30-pp1516
+ - const: rockchip,px30
+
+ - description: Theobroma Systems PX30-uQ7 with Haikou baseboard
+ items:
+ - const: tsd,px30-ringneck-haikou
+ - const: rockchip,px30
+
+ - description: Theobroma Systems RK3368-uQ7 with Haikou baseboard
+ items:
+ - const: tsd,rk3368-lion-haikou
+ - const: rockchip,rk3368
+
+ - description: Theobroma Systems RK3399-Q7 with Haikou baseboard
+ items:
+ - const: tsd,rk3399-puma-haikou
+ - const: rockchip,rk3399
+
+ - description: Theobroma Systems RK3588-SBC Jaguar
+ items:
+ - const: tsd,rk3588-jaguar
+ - const: rockchip,rk3588
+
+ - description: Theobroma Systems RK3588-Q7 with Haikou baseboard
+ items:
+ - const: tsd,rk3588-tiger-haikou
+ - const: tsd,rk3588-tiger
+ - const: rockchip,rk3588
+
+ - description: Tronsmart Orion R68 Meta
+ items:
+ - const: tronsmart,orion-r68-meta
+ - const: rockchip,rk3368
+
+ - description: Turing RK1
+ items:
+ - const: turing,rk1
+ - const: rockchip,rk3588
+
+ - description: WolfVision PF5 mainboard
+ items:
+ - const: wolfvision,rk3568-pf5
+ - const: rockchip,rk3568
+
+ - description: Xunlong Orange Pi 3B
+ items:
+ - enum:
+ - xunlong,orangepi-3b-v1.1
+ - xunlong,orangepi-3b-v2.1
+ - const: xunlong,orangepi-3b
+ - const: rockchip,rk3566
+
+ - description: Xunlong Orange Pi 5 Max/Plus/Ultra
+ items:
+ - enum:
+ - xunlong,orangepi-5-max
+ - xunlong,orangepi-5-plus
+ - xunlong,orangepi-5-ultra
+ - const: rockchip,rk3588
+
+ - description: Xunlong Orange Pi R1 Plus / LTS
+ items:
+ - enum:
+ - xunlong,orangepi-r1-plus
+ - xunlong,orangepi-r1-plus-lts
+ - const: rockchip,rk3328
+
+ - description: Xunlong Orange Pi 5
+ items:
+ - enum:
+ - xunlong,orangepi-5
+ - xunlong,orangepi-5b
+ - const: rockchip,rk3588s
+
+ - description: Zkmagic A95X Z2
+ items:
+ - const: zkmagic,a95x-z2
+ - const: rockchip,rk3318
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.txt b/Documentation/devicetree/bindings/arm/rockchip/pmu.txt
deleted file mode 100644
index 3ee9b428b2f7..000000000000
--- a/Documentation/devicetree/bindings/arm/rockchip/pmu.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Rockchip power-management-unit:
--------------------------------
-
-The pmu is used to turn off and on different power domains of the SoCs
-This includes the power to the CPU cores.
-
-Required node properties:
-- compatible value : = "rockchip,rk3066-pmu";
-- reg : physical base address and the size of the registers window
-
-Example:
-
- pmu@20004000 {
- compatible = "rockchip,rk3066-pmu";
- reg = <0x20004000 0x100>;
- };
diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml
new file mode 100644
index 000000000000..55b2200d6e75
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/rockchip/pmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Power Management Unit (PMU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The PMU is used to turn on and off different power domains of the SoCs.
+ This includes the power to the CPU cores.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,px30-pmu
+ - rockchip,rk3066-pmu
+ - rockchip,rk3128-pmu
+ - rockchip,rk3288-pmu
+ - rockchip,rk3368-pmu
+ - rockchip,rk3399-pmu
+ - rockchip,rk3528-pmu
+ - rockchip,rk3562-pmu
+ - rockchip,rk3568-pmu
+ - rockchip,rk3576-pmu
+ - rockchip,rk3588-pmu
+ - rockchip,rv1126-pmu
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - rockchip,px30-pmu
+ - rockchip,rk3066-pmu
+ - rockchip,rk3128-pmu
+ - rockchip,rk3288-pmu
+ - rockchip,rk3368-pmu
+ - rockchip,rk3399-pmu
+ - rockchip,rk3528-pmu
+ - rockchip,rk3562-pmu
+ - rockchip,rk3568-pmu
+ - rockchip,rk3576-pmu
+ - rockchip,rk3588-pmu
+ - rockchip,rv1126-pmu
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ power-controller:
+ type: object
+
+ reboot-mode:
+ type: object
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ pmu@20004000 {
+ compatible = "rockchip,rk3066-pmu", "syscon", "simple-mfd";
+ reg = <0x20004000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
deleted file mode 100644
index 3b8fbf3c00c5..000000000000
--- a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-ARM Dual Cluster System Configuration Block
--------------------------------------------
-
-The Dual Cluster System Configuration Block (DCSCB) provides basic
-functionality for controlling clocks, resets and configuration pins in
-the Dual Cluster System implemented by the Real-Time System Model (RTSM).
-
-Required properties:
-
-- compatible : should be "arm,rtsm,dcscb"
-
-- reg : physical base address and the size of the registers window
-
-Example:
-
- dcscb@60000000 {
- compatible = "arm,rtsm,dcscb";
- reg = <0x60000000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
deleted file mode 100644
index 85c5dfd4a720..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-SAMSUNG Exynos SoCs Chipid driver.
-
-Required properties:
-- compatible : Should at least contain "samsung,exynos4210-chipid".
-
-- reg: offset and length of the register set
-
-Example:
- chipid@10000000 {
- compatible = "samsung,exynos4210-chipid";
- reg = <0x10000000 0x100>;
- };
diff --git a/Documentation/devicetree/bindings/arm/samsung/pmu.txt b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
deleted file mode 100644
index 433bfd7593ac..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/pmu.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-SAMSUNG Exynos SoC series PMU Registers
-
-Properties:
- - compatible : should contain two values. First value must be one from following list:
- - "samsung,exynos3250-pmu" - for Exynos3250 SoC,
- - "samsung,exynos4210-pmu" - for Exynos4210 SoC,
- - "samsung,exynos4412-pmu" - for Exynos4412 SoC,
- - "samsung,exynos5250-pmu" - for Exynos5250 SoC,
- - "samsung,exynos5260-pmu" - for Exynos5260 SoC.
- - "samsung,exynos5410-pmu" - for Exynos5410 SoC,
- - "samsung,exynos5420-pmu" - for Exynos5420 SoC.
- - "samsung,exynos5433-pmu" - for Exynos5433 SoC.
- - "samsung,exynos7-pmu" - for Exynos7 SoC.
- second value must be always "syscon".
-
- - reg : offset and length of the register set.
-
- - #clock-cells : must be <1>, since PMU requires once cell as clock specifier.
- The single specifier cell is used as index to list of clocks
- provided by PMU, which is currently:
- 0 : SoC clock output (CLKOUT pin)
-
- - clock-names : list of clock names for particular CLKOUT mux inputs in
- following format:
- "clkoutN", where N is a decimal number corresponding to
- CLKOUT mux control bits value for given input, e.g.
- "clkout0", "clkout7", "clkout15".
-
- - clocks : list of phandles and specifiers to all input clocks listed in
- clock-names property.
-
-Optional properties:
-
-Some PMUs are capable of behaving as an interrupt controller (mostly
-to wake up a suspended PMU). In which case, they can have the
-following properties:
-
-- interrupt-controller: indicate that said PMU is an interrupt controller
-
-- #interrupt-cells: must be identical to the that of the parent interrupt
- controller.
-
-
-Optional nodes:
-
-- nodes defining the restart and poweroff syscon children
-
-
-Example :
-pmu_system_controller: system-controller@10040000 {
- compatible = "samsung,exynos5250-pmu", "syscon";
- reg = <0x10040000 0x5000>;
- interrupt-controller;
- #interrupt-cells = <3>;
- interrupt-parent = <&gic>;
- #clock-cells = <1>;
- clock-names = "clkout0", "clkout1", "clkout2", "clkout3",
- "clkout4", "clkout8", "clkout9";
- clocks = <&clock CLK_OUT_DMC>, <&clock CLK_OUT_TOP>,
- <&clock CLK_OUT_LEFTBUS>, <&clock CLK_OUT_RIGHTBUS>,
- <&clock CLK_OUT_CPU>, <&clock CLK_XXTI>,
- <&clock CLK_XUSBXTI>;
-};
-
-Example of clock consumer :
-
-usb3503: usb3503@8 {
- /* ... */
- clock-names = "refclk";
- clocks = <&pmu_system_controller 0>;
- /* ... */
-};
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
deleted file mode 100644
index 56021bf2a916..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-* Samsung's Exynos and S5P SoC based boards
-
-Required root node properties:
- - compatible = should be one or more of the following.
- - "samsung,aries" - for S5PV210-based Samsung Aries board.
- - "samsung,fascinate4g" - for S5PV210-based Samsung Galaxy S Fascinate 4G (SGH-T959P) board.
- - "samsung,galaxys" - for S5PV210-based Samsung Galaxy S (i9000) board.
- - "samsung,artik5" - for Exynos3250-based Samsung ARTIK5 module.
- - "samsung,artik5-eval" - for Exynos3250-based Samsung ARTIK5 eval board.
- - "samsung,monk" - for Exynos3250-based Samsung Simband board.
- - "samsung,rinato" - for Exynos3250-based Samsung Gear2 board.
- - "samsung,smdkv310" - for Exynos4210-based Samsung SMDKV310 eval board.
- - "samsung,trats" - for Exynos4210-based Tizen Reference board.
- - "samsung,universal_c210" - for Exynos4210-based Samsung board.
- - "samsung,i9300" - for Exynos4412-based Samsung GT-I9300 board.
- - "samsung,i9305" - for Exynos4412-based Samsung GT-I9305 board.
- - "samsung,midas" - for Exynos4412-based Samsung Midas board.
- - "samsung,smdk4412", - for Exynos4412-based Samsung SMDK4412 eval board.
- - "samsung,n710x" - for Exynos4412-based Samsung GT-N7100/GT-N7105 board.
- - "samsung,trats2" - for Exynos4412-based Tizen Reference board.
- - "samsung,smdk5250" - for Exynos5250-based Samsung SMDK5250 eval board.
- - "samsung,xyref5260" - for Exynos5260-based Samsung board.
- - "samsung,smdk5410" - for Exynos5410-based Samsung SMDK5410 eval board.
- - "samsung,smdk5420" - for Exynos5420-based Samsung SMDK5420 eval board.
- - "samsung,tm2" - for Exynos5433-based Samsung TM2 board.
- - "samsung,tm2e" - for Exynos5433-based Samsung TM2E board.
-
-* Other companies Exynos SoC based
- * FriendlyARM
- - "friendlyarm,tiny4412" - for Exynos4412-based FriendlyARM
- TINY4412 board.
- * TOPEET
- - "topeet,itop4412-elite" - for Exynos4412-based TOPEET
- Elite base board.
-
- * Google
- - "google,pi" - for Exynos5800-based Google Peach Pi
- Rev 10+ board,
- also: "google,pi-rev16", "google,pi-rev15", "google,pi-rev14",
- "google,pi-rev13", "google,pi-rev12", "google,pi-rev11",
- "google,pi-rev10", "google,peach".
-
- - "google,pit" - for Exynos5420-based Google Peach Pit
- Rev 6+ (Exynos5420),
- also: "google,pit-rev16", "google,pit-rev15", "google,pit-rev14",
- "google,pit-rev13", "google,pit-rev12", "google,pit-rev11",
- "google,pit-rev10", "google,pit-rev9", "google,pit-rev8",
- "google,pit-rev7", "google,pit-rev6", "google,peach".
-
- - "google,snow-rev4" - for Exynos5250-based Google Snow board,
- also: "google,snow"
- - "google,snow-rev5" - for Exynos5250-based Google Snow
- Rev 5+ board.
- - "google,spring" - for Exynos5250-based Google Spring board.
-
- * Hardkernel
- - "hardkernel,odroid-u3" - for Exynos4412-based Hardkernel Odroid U3.
- - "hardkernel,odroid-x" - for Exynos4412-based Hardkernel Odroid X.
- - "hardkernel,odroid-x2" - for Exynos4412-based Hardkernel Odroid X2.
- - "hardkernel,odroid-xu" - for Exynos5410-based Hardkernel Odroid XU.
- - "hardkernel,odroid-xu3" - for Exynos5422-based Hardkernel Odroid XU3.
- - "hardkernel,odroid-xu3-lite" - for Exynos5422-based Hardkernel
- Odroid XU3 Lite board.
- - "hardkernel,odroid-xu4" - for Exynos5422-based Hardkernel Odroid XU4.
- - "hardkernel,odroid-hc1" - for Exynos5422-based Hardkernel Odroid HC1.
-
- * Insignal
- - "insignal,arndale" - for Exynos5250-based Insignal Arndale board.
- - "insignal,arndale-octa" - for Exynos5420-based Insignal Arndale
- Octa board.
- - "insignal,origen" - for Exynos4210-based Insignal Origen board.
- - "insignal,origen4412" - for Exynos4412-based Insignal Origen board.
-
-
-Optional nodes:
- - firmware node, specifying presence and type of secure firmware:
- - compatible: only "samsung,secure-firmware" is currently supported
- - reg: address of non-secure SYSRAM used for communication with firmware
-
- firmware@203f000 {
- compatible = "samsung,secure-firmware";
- reg = <0x0203F000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml
new file mode 100644
index 000000000000..f8e20e602c20
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml
@@ -0,0 +1,271 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/samsung-boards.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos and S5P SoC based boards
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: S3C6410 based boards
+ items:
+ - enum:
+ - friendlyarm,mini6410 # FriendlyARM Mini6410
+ - samsung,smdk6410 # Samsung SMDK6410
+ - const: samsung,s3c6410
+
+ - description: S5PV210 based boards
+ items:
+ - enum:
+ - aesop,torbreck # aESOP Torbreck based on S5PV210
+ - samsung,aquila # Samsung Aquila based on S5PC110
+ - samsung,goni # Samsung Goni based on S5PC110
+ - yic,smdkc110 # YIC System SMDKC110 based on S5PC110
+ - yic,smdkv210 # YIC System SMDKV210 based on S5PV210
+ - const: samsung,s5pv210
+
+ - description: S5PV210 based Aries boards
+ items:
+ - enum:
+ - samsung,fascinate4g # Samsung Galaxy S Fascinate 4G (SGH-T959P)
+ - samsung,galaxys # Samsung Galaxy S (i9000)
+ - const: samsung,aries
+ - const: samsung,s5pv210
+
+ - description: Exynos2200 based boards
+ items:
+ - enum:
+ - samsung,g0s # Samsung Galaxy S22+ (SM-S906B)
+ - const: samsung,exynos2200
+
+ - description: Exynos3250 based boards
+ items:
+ - enum:
+ - samsung,monk # Samsung Simband
+ - samsung,rinato # Samsung Gear2
+ - const: samsung,exynos3250
+ - const: samsung,exynos3
+
+ - description: Samsung ARTIK5 boards
+ items:
+ - enum:
+ - samsung,artik5-eval # Samsung ARTIK5 eval board
+ - const: samsung,artik5 # Samsung ARTIK5 module
+ - const: samsung,exynos3250
+ - const: samsung,exynos3
+
+ - description: Exynos4210 based boards
+ items:
+ - enum:
+ - insignal,origen # Insignal Origen
+ - samsung,i9100 # Samsung Galaxy S2 (GT-I9100)
+ - samsung,smdkv310 # Samsung SMDKV310 eval
+ - samsung,trats # Samsung Tizen Reference
+ - samsung,universal_c210 # Samsung C210
+ - const: samsung,exynos4210
+ - const: samsung,exynos4
+
+ - description: Samsung Galaxy Tab3 family boards
+ items:
+ - enum:
+ - samsung,t310 # Samsung Galaxy Tab 3 8.0 WiFi (SM-T310)
+ - samsung,t311 # Samsung Galaxy Tab 3 8.0 3G (SM-T311)
+ - samsung,t315 # Samsung Galaxy Tab 3 8.0 LTE (SM-T315)
+ - const: samsung,tab3
+ - const: samsung,exynos4212
+ - const: samsung,exynos4
+
+ - description: Exynos4412 based boards
+ items:
+ - enum:
+ - friendlyarm,tiny4412 # FriendlyARM TINY4412
+ - hardkernel,odroid-u3 # Hardkernel Odroid U3
+ - hardkernel,odroid-x # Hardkernel Odroid X
+ - hardkernel,odroid-x2 # Hardkernel Odroid X2
+ - insignal,origen4412 # Insignal Origen
+ - samsung,smdk4412 # Samsung SMDK4412 eval
+ - topeet,itop4412-elite # TOPEET Elite base
+ - const: samsung,exynos4412
+ - const: samsung,exynos4
+
+ - description: Samsung Midas family boards
+ items:
+ - enum:
+ - samsung,i9300 # Samsung GT-I9300
+ - samsung,i9305 # Samsung GT-I9305
+ - samsung,n710x # Samsung GT-N7100/GT-N7105
+ - samsung,trats2 # Samsung Tizen Reference
+ - const: samsung,midas
+ - const: samsung,exynos4412
+ - const: samsung,exynos4
+
+ - description: Samsung p4note family boards
+ items:
+ - enum:
+ - samsung,n8010 # Samsung GT-N8010/GT-N8013
+ - const: samsung,p4note
+ - const: samsung,exynos4412
+ - const: samsung,exynos4
+
+ - description: Exynos5250 based boards
+ items:
+ - enum:
+ - google,snow-rev5 # Google Snow Rev 5+
+ - google,spring # Google Spring
+ - insignal,arndale # Insignal Arndale
+ - samsung,smdk5250 # Samsung SMDK5250 eval
+ - const: samsung,exynos5250
+ - const: samsung,exynos5
+
+ - description: Google Snow Boards (Rev 4+)
+ items:
+ - const: google,snow-rev4
+ - const: google,snow
+ - const: samsung,exynos5250
+ - const: samsung,exynos5
+
+ - description: Exynos5260 based boards
+ items:
+ - enum:
+ - samsung,xyref5260 # Samsung Xyref5260 eval
+ - const: samsung,exynos5260
+ - const: samsung,exynos5
+
+ - description: Exynos5410 based boards
+ items:
+ - enum:
+ - hardkernel,odroid-xu # Hardkernel Odroid XU
+ - samsung,smdk5410 # Samsung SMDK5410 eval
+ - const: samsung,exynos5410
+ - const: samsung,exynos5
+
+ - description: Exynos5420 based boards
+ items:
+ - enum:
+ - insignal,arndale-octa # Insignal Arndale Octa
+ - samsung,chagall-wifi # Samsung SM-T800
+ - samsung,klimt-wifi # Samsung SM-T700
+ - samsung,smdk5420 # Samsung SMDK5420 eval
+ - const: samsung,exynos5420
+ - const: samsung,exynos5
+
+ - description: Google Peach Pit Boards (Rev 6+)
+ items:
+ - const: google,pit-rev16
+ - const: google,pit-rev15
+ - const: google,pit-rev14
+ - const: google,pit-rev13
+ - const: google,pit-rev12
+ - const: google,pit-rev11
+ - const: google,pit-rev10
+ - const: google,pit-rev9
+ - const: google,pit-rev8
+ - const: google,pit-rev7
+ - const: google,pit-rev6
+ - const: google,pit
+ - const: google,peach
+ - const: samsung,exynos5420
+ - const: samsung,exynos5
+
+ - description: Exynos5800 based boards
+ items:
+ - enum:
+ - hardkernel,odroid-xu3 # Hardkernel Odroid XU3
+ - hardkernel,odroid-xu3-lite # Hardkernel Odroid XU3 Lite
+ - hardkernel,odroid-xu4 # Hardkernel Odroid XU4
+ - hardkernel,odroid-hc1 # Hardkernel Odroid HC1
+ - samsung,k3g # Samsung Galaxy S5 (SM-G900H)
+ - const: samsung,exynos5800
+ - const: samsung,exynos5
+
+ - description: Google Peach Pi Boards (Rev 10+)
+ items:
+ - const: google,pi-rev16
+ - const: google,pi-rev15
+ - const: google,pi-rev14
+ - const: google,pi-rev13
+ - const: google,pi-rev12
+ - const: google,pi-rev11
+ - const: google,pi-rev10
+ - const: google,pi
+ - const: google,peach
+ - const: samsung,exynos5800
+ - const: samsung,exynos5
+
+ - description: Exynos5433 based boards
+ items:
+ - enum:
+ - samsung,tm2 # Samsung TM2
+ - samsung,tm2e # Samsung TM2E
+ - const: samsung,exynos5433
+
+ - description: Exynos7 based boards
+ items:
+ - enum:
+ - samsung,exynos7-espresso # Samsung Exynos7 Espresso
+ - const: samsung,exynos7
+
+ - description: Exynos7870 based boards
+ items:
+ - enum:
+ - samsung,a2corelte # Samsung Galaxy A2 Core
+ - samsung,j6lte # Samsung Galaxy J6
+ - samsung,on7xelte # Samsung Galaxy J7 Prime
+ - const: samsung,exynos7870
+
+ - description: Exynos7885 based boards
+ items:
+ - enum:
+ - samsung,jackpotlte # Samsung Galaxy A8 (2018)
+ - const: samsung,exynos7885
+
+ - description: Exynos850 based boards
+ items:
+ - enum:
+ - winlink,e850-96 # WinLink E850-96
+ - const: samsung,exynos850
+
+ - description: Exynos8895 based boards
+ items:
+ - enum:
+ - samsung,dreamlte # Samsung Galaxy S8 (SM-G950F)
+ - const: samsung,exynos8895
+
+ - description: Exynos9810 based boards
+ items:
+ - enum:
+ - samsung,starlte # Samsung Galaxy S9 (SM-G960F)
+ - const: samsung,exynos9810
+
+ - description: Exynos990 based boards
+ items:
+ - enum:
+ - samsung,c1s # Samsung Galaxy Note20 5G (SM-N981B)
+ - samsung,r8s # Samsung Galaxy S20 FE (SM-G780F)
+ - samsung,x1s # Samsung Galaxy S20 5G (SM-G981B)
+ - samsung,x1slte # Samsung Galaxy S20 (SM-G980F)
+ - const: samsung,exynos990
+
+ - description: Exynos Auto v9 based boards
+ items:
+ - enum:
+ - samsung,exynosautov9-sadk # Samsung Exynos Auto v9 SADK
+ - const: samsung,exynosautov9
+
+ - description: Exynos Auto v920 based boards
+ items:
+ - enum:
+ - samsung,exynosautov920-sadk # Samsung Exynos Auto v920 SADK
+ - const: samsung,exynosautov920
+
+required:
+ - compatible
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml b/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml
new file mode 100644
index 000000000000..3d9abad3c749
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-secure-firmware.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/samsung-secure-firmware.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos Secure Firmware
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+ compatible:
+ items:
+ - const: samsung,secure-firmware
+
+ reg:
+ description:
+ Address of non-secure SYSRAM used for communication with firmware.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ firmware@203f000 {
+ compatible = "samsung,secure-firmware";
+ reg = <0x0203f000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-soc.yaml b/Documentation/devicetree/bindings/arm/samsung/samsung-soc.yaml
new file mode 100644
index 000000000000..653f85997643
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-soc.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/samsung/samsung-soc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C, S5P and Exynos SoC compatibles naming convention
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ Guidelines for new compatibles for SoC blocks/components.
+ When adding new compatibles in new bindings, use the format::
+ samsung,SoC-IP
+
+ For example::
+ samsung,exynos5433-cmu-isp
+
+select:
+ properties:
+ compatible:
+ pattern: "^samsung,.*(s3c|s5pv|exynos)[0-9a-z]+.*$"
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - description: Preferred naming style for compatibles of SoC components
+ pattern: "^samsung,(s3c|s5pv|exynos|exynosautov)[0-9]+-.*$"
+
+ # Legacy compatibles with wild-cards - list cannot grow with new bindings:
+ - enum:
+ - samsung,exynos4x12-pinctrl
+ - samsung,exynos4x12-usb2-phy
+ - samsung,s3c64xx-pinctrl
+ - samsung,s3c64xx-wakeup-eint
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/samsung/sysreg.txt b/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
deleted file mode 100644
index 4fced6e9d5e4..000000000000
--- a/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-SAMSUNG S5P/Exynos SoC series System Registers (SYSREG)
-
-Properties:
- - compatible : should contain two values. First value must be one from following list:
- - "samsung,exynos4-sysreg" - for Exynos4 based SoCs,
- - "samsung,exynos5-sysreg" - for Exynos5 based SoCs.
- second value must be always "syscon".
- - reg : offset and length of the register set.
-
-Example:
- syscon@10010000 {
- compatible = "samsung,exynos4-sysreg", "syscon";
- reg = <0x10010000 0x400>;
- };
-
- syscon@10050000 {
- compatible = "samsung,exynos5-sysreg", "syscon";
- reg = <0x10050000 0x5000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/scu.txt b/Documentation/devicetree/bindings/arm/scu.txt
deleted file mode 100644
index 74d0a780ce51..000000000000
--- a/Documentation/devicetree/bindings/arm/scu.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* ARM Snoop Control Unit (SCU)
-
-As part of the MPCore complex, Cortex-A5 and Cortex-A9 are provided
-with a Snoop Control Unit. The register range is usually 256 (0x100)
-bytes.
-
-References:
-
-- Cortex-A9: see DDI0407E Cortex-A9 MPCore Technical Reference Manual
- Revision r2p0
-- Cortex-A5: see DDI0434B Cortex-A5 MPCore Technical Reference Manual
- Revision r0p1
-- ARM11 MPCore: see DDI0360F ARM 11 MPCore Processor Technical Reference
- Manial Revision r2p0
-
-- compatible : Should be:
- "arm,cortex-a9-scu"
- "arm,cortex-a5-scu"
- "arm,arm11mp-scu"
-
-- reg : Specify the base address and the size of the SCU register window.
-
-Example:
-
-scu@a0410000 {
- compatible = "arm,cortex-a9-scu";
- reg = <0xa0410000 0x100>;
-};
diff --git a/Documentation/devicetree/bindings/arm/shmobile.txt b/Documentation/devicetree/bindings/arm/shmobile.txt
deleted file mode 100644
index 58c4256d37a3..000000000000
--- a/Documentation/devicetree/bindings/arm/shmobile.txt
+++ /dev/null
@@ -1,169 +0,0 @@
-Renesas SH-Mobile, R-Mobile, and R-Car Platform Device Tree Bindings
---------------------------------------------------------------------
-
-SoCs:
-
- - Emma Mobile EV2
- compatible = "renesas,emev2"
- - RZ/A1H (R7S72100)
- compatible = "renesas,r7s72100"
- - RZ/A2 (R7S9210)
- compatible = "renesas,r7s9210"
- - SH-Mobile AG5 (R8A73A00/SH73A0)
- compatible = "renesas,sh73a0"
- - R-Mobile APE6 (R8A73A40)
- compatible = "renesas,r8a73a4"
- - R-Mobile A1 (R8A77400)
- compatible = "renesas,r8a7740"
- - RZ/G1H (R8A77420)
- compatible = "renesas,r8a7742"
- - RZ/G1M (R8A77430)
- compatible = "renesas,r8a7743"
- - RZ/G1N (R8A77440)
- compatible = "renesas,r8a7744"
- - RZ/G1E (R8A77450)
- compatible = "renesas,r8a7745"
- - RZ/G1C (R8A77470)
- compatible = "renesas,r8a77470"
- - RZ/G2M (R8A774A1)
- compatible = "renesas,r8a774a1"
- - RZ/G2E (R8A774C0)
- compatible = "renesas,r8a774c0"
- - R-Car M1A (R8A77781)
- compatible = "renesas,r8a7778"
- - R-Car H1 (R8A77790)
- compatible = "renesas,r8a7779"
- - R-Car H2 (R8A77900)
- compatible = "renesas,r8a7790"
- - R-Car M2-W (R8A77910)
- compatible = "renesas,r8a7791"
- - R-Car V2H (R8A77920)
- compatible = "renesas,r8a7792"
- - R-Car M2-N (R8A77930)
- compatible = "renesas,r8a7793"
- - R-Car E2 (R8A77940)
- compatible = "renesas,r8a7794"
- - R-Car H3 (R8A77950)
- compatible = "renesas,r8a7795"
- - R-Car M3-W (R8A77960)
- compatible = "renesas,r8a7796"
- - R-Car M3-N (R8A77965)
- compatible = "renesas,r8a77965"
- - R-Car V3M (R8A77970)
- compatible = "renesas,r8a77970"
- - R-Car V3H (R8A77980)
- compatible = "renesas,r8a77980"
- - R-Car E3 (R8A77990)
- compatible = "renesas,r8a77990"
- - R-Car D3 (R8A77995)
- compatible = "renesas,r8a77995"
- - RZ/N1D (R9A06G032)
- compatible = "renesas,r9a06g032"
-
-Boards:
-
- - Alt (RTP0RC7794SEB00010S)
- compatible = "renesas,alt", "renesas,r8a7794"
- - APE6-EVM
- compatible = "renesas,ape6evm", "renesas,r8a73a4"
- - Atmark Techno Armadillo-800 EVA
- compatible = "renesas,armadillo800eva", "renesas,r8a7740"
- - Blanche (RTP0RC7792SEB00010S)
- compatible = "renesas,blanche", "renesas,r8a7792"
- - BOCK-W
- compatible = "renesas,bockw", "renesas,r8a7778"
- - Condor (RTP0RC77980SEB0010SS/RTP0RC77980SEB0010SA01)
- compatible = "renesas,condor", "renesas,r8a77980"
- - Draak (RTP0RC77995SEB0010S)
- compatible = "renesas,draak", "renesas,r8a77995"
- - Eagle (RTP0RC77970SEB0010S)
- compatible = "renesas,eagle", "renesas,r8a77970"
- - Ebisu (RTP0RC77990SEB0010S)
- compatible = "renesas,ebisu", "renesas,r8a77990"
- - Genmai (RTK772100BC00000BR)
- compatible = "renesas,genmai", "renesas,r7s72100"
- - GR-Peach (X28A-M01-E/F)
- compatible = "renesas,gr-peach", "renesas,r7s72100"
- - Gose (RTP0RC7793SEB00010S)
- compatible = "renesas,gose", "renesas,r8a7793"
- - H3ULCB (R-Car Starter Kit Premier, RTP0RC7795SKBX0010SA00 (H3 ES1.1))
- H3ULCB (R-Car Starter Kit Premier, RTP0RC77951SKBX010SA00 (H3 ES2.0))
- compatible = "renesas,h3ulcb", "renesas,r8a7795"
- - Henninger
- compatible = "renesas,henninger", "renesas,r8a7791"
- - iWave Systems RZ/G1C Single Board Computer (iW-RainboW-G23S)
- compatible = "iwave,g23s", "renesas,r8a77470"
- - iWave Systems RZ/G1E SODIMM SOM Development Platform (iW-RainboW-G22D)
- compatible = "iwave,g22d", "iwave,g22m", "renesas,r8a7745"
- - iWave Systems RZ/G1E SODIMM System On Module (iW-RainboW-G22M-SM)
- compatible = "iwave,g22m", "renesas,r8a7745"
- - iWave Systems RZ/G1M Qseven Development Platform (iW-RainboW-G20D-Qseven)
- compatible = "iwave,g20d", "iwave,g20m", "renesas,r8a7743"
- - iWave Systems RZ/G1M Qseven System On Module (iW-RainboW-G20M-Qseven)
- compatible = "iwave,g20m", "renesas,r8a7743"
- - Kingfisher (SBEV-RCAR-KF-M03)
- compatible = "shimafuji,kingfisher"
- - Koelsch (RTP0RC7791SEB00010S)
- compatible = "renesas,koelsch", "renesas,r8a7791"
- - Kyoto Microcomputer Co. KZM-A9-Dual
- compatible = "renesas,kzm9d", "renesas,emev2"
- - Kyoto Microcomputer Co. KZM-A9-GT
- compatible = "renesas,kzm9g", "renesas,sh73a0"
- - Lager (RTP0RC7790SEB00010S)
- compatible = "renesas,lager", "renesas,r8a7790"
- - M3ULCB (R-Car Starter Kit Pro, RTP0RC7796SKBX0010SA09 (M3 ES1.0))
- compatible = "renesas,m3ulcb", "renesas,r8a7796"
- - M3NULCB (R-Car Starter Kit Pro, RTP0RC77965SKBX010SA00 (M3-N ES1.1))
- compatible = "renesas,m3nulcb", "renesas,r8a77965"
- - Marzen (R0P7779A00010S)
- compatible = "renesas,marzen", "renesas,r8a7779"
- - Porter (M2-LCDP)
- compatible = "renesas,porter", "renesas,r8a7791"
- - RSKRZA1 (YR0K77210C000BE)
- compatible = "renesas,rskrza1", "renesas,r7s72100"
- - RZN1D-DB (RZ/N1D Demo Board for the RZ/N1D 400 pins package)
- compatible = "renesas,rzn1d400-db", "renesas,r9a06g032"
- - Salvator-X (RTP0RC7795SIPB0010S)
- compatible = "renesas,salvator-x", "renesas,r8a7795"
- - Salvator-X (RTP0RC7796SIPB0011S)
- compatible = "renesas,salvator-x", "renesas,r8a7796"
- - Salvator-X (RTP0RC7796SIPB0011S (M3-N))
- compatible = "renesas,salvator-x", "renesas,r8a77965"
- - Salvator-XS (Salvator-X 2nd version, RTP0RC7795SIPB0012S)
- compatible = "renesas,salvator-xs", "renesas,r8a7795"
- - Salvator-XS (Salvator-X 2nd version, RTP0RC7796SIPB0012S)
- compatible = "renesas,salvator-xs", "renesas,r8a7796"
- - Salvator-XS (Salvator-X 2nd version, RTP0RC77965SIPB012S)
- compatible = "renesas,salvator-xs", "renesas,r8a77965"
- - SILK (RTP0RC7794LCB00011S)
- compatible = "renesas,silk", "renesas,r8a7794"
- - SK-RZG1E (YR8A77450S000BE)
- compatible = "renesas,sk-rzg1e", "renesas,r8a7745"
- - SK-RZG1M (YR8A77430S000BE)
- compatible = "renesas,sk-rzg1m", "renesas,r8a7743"
- - Stout (ADAS Starterkit, Y-R-CAR-ADAS-SKH2-BOARD)
- compatible = "renesas,stout", "renesas,r8a7790"
- - V3HSK (Y-ASK-RCAR-V3H-WS10)
- compatible = "renesas,v3hsk", "renesas,r8a77980"
- - V3MSK (Y-ASK-RCAR-V3M-WS10)
- compatible = "renesas,v3msk", "renesas,r8a77970"
- - Wheat (RTP0RC7792ASKB0000JE)
- compatible = "renesas,wheat", "renesas,r8a7792"
-
-
-Most Renesas ARM SoCs have a Product Register or Boundary Scan ID Register that
-allows to retrieve SoC product and revision information. If present, a device
-node for this register should be added.
-
-Required properties:
- - compatible: Must be "renesas,prr" or "renesas,bsid"
- - reg: Base address and length of the register block.
-
-
-Examples
---------
-
- prr: chipid@ff000044 {
- compatible = "renesas,prr";
- reg = <0 0xff000044 0 4>;
- };
diff --git a/Documentation/devicetree/bindings/arm/sirf.txt b/Documentation/devicetree/bindings/arm/sirf.txt
deleted file mode 100644
index 7b28ee6fee91..000000000000
--- a/Documentation/devicetree/bindings/arm/sirf.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-CSR SiRFprimaII and SiRFmarco device tree bindings.
-========================================
-
-Required root node properties:
- - compatible:
- - "sirf,atlas6-cb" : atlas6 "cb" evaluation board
- - "sirf,atlas6" : atlas6 device based board
- - "sirf,atlas7-cb" : atlas7 "cb" evaluation board
- - "sirf,atlas7" : atlas7 device based board
- - "sirf,prima2-cb" : prima2 "cb" evaluation board
- - "sirf,prima2" : prima2 device based board
diff --git a/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml
new file mode 100644
index 000000000000..5a428a885760
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/socionext/milbeaut.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Milbeaut platforms
+
+maintainers:
+ - Taichi Sugaya <sugaya.taichi@socionext.com>
+ - Takao Orito <orito.takao@socionext.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - socionext,milbeaut-m10v-evb
+ - const: socionext,sc2000a
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/socionext/synquacer.yaml b/Documentation/devicetree/bindings/arm/socionext/synquacer.yaml
new file mode 100644
index 000000000000..72554a4f1c92
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/socionext/synquacer.yaml
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/socionext/synquacer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext Synquacer platform
+
+maintainers:
+ - Masahisa Kojima <masahisa.kojima@linaro.org>
+ - Jassi Brar <jaswinder.singh@linaro.org>
+
+description:
+ Socionext SC2A11B (Synquacer) SoC based boards
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - socionext,developer-box
+ - const: socionext,synquacer
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml
new file mode 100644
index 000000000000..3e7f3d927ec7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/socionext/uniphier.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext UniPhier platform
+
+maintainers:
+ - Masahiro Yamada <yamada.masahiro@socionext.com>
+
+properties:
+ $nodename:
+ const: /
+ compatible:
+ oneOf:
+ - description: LD4 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-ld4-ref
+ - const: socionext,uniphier-ld4
+ - description: Pro4 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-pro4-ace
+ - socionext,uniphier-pro4-ref
+ - socionext,uniphier-pro4-sanji
+ - const: socionext,uniphier-pro4
+ - description: Pro5 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-pro5-epcore
+ - socionext,uniphier-pro5-proex
+ - const: socionext,uniphier-pro5
+ - description: sLD8 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-sld8-ref
+ - const: socionext,uniphier-sld8
+ - description: PXs2 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-pxs2-gentil
+ - socionext,uniphier-pxs2-vodka
+ - const: socionext,uniphier-pxs2
+ - description: LD6b SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-ld6b-ref
+ - const: socionext,uniphier-ld6b
+ - description: LD11 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-ld11-global
+ - socionext,uniphier-ld11-ref
+ - const: socionext,uniphier-ld11
+ - description: LD20 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-ld20-akebi96
+ - socionext,uniphier-ld20-global
+ - socionext,uniphier-ld20-ref
+ - const: socionext,uniphier-ld20
+ - description: PXs3 SoC boards
+ items:
+ - enum:
+ - socionext,uniphier-pxs3-ref
+ - const: socionext,uniphier-pxs3
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/sp810.txt b/Documentation/devicetree/bindings/arm/sp810.txt
deleted file mode 100644
index 1b2ab1ff5587..000000000000
--- a/Documentation/devicetree/bindings/arm/sp810.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-SP810 System Controller
------------------------
-
-Required properties:
-
-- compatible: standard compatible string for a Primecell peripheral,
- see Documentation/devicetree/bindings/arm/primecell.txt
- for more details
- should be: "arm,sp810", "arm,primecell"
-
-- reg: standard registers property, physical address and size
- of the control registers
-
-- clock-names: from the common clock bindings, for more details see
- Documentation/devicetree/bindings/clock/clock-bindings.txt;
- should be: "refclk", "timclk", "apb_pclk"
-
-- clocks: from the common clock bindings, phandle and clock
- specifier pairs for the entries of clock-names property
-
-- #clock-cells: from the common clock bindings;
- should be: <1>
-
-- clock-output-names: from the common clock bindings;
- should be: "timerclken0", "timerclken1", "timerclken2", "timerclken3"
-
-- assigned-clocks: from the common clock binding;
- should be: clock specifier for each output clock of this
- provider node
-
-- assigned-clock-parents: from the common clock binding;
- should be: phandle of input clock listed in clocks
- property with the highest frequency
-
-Example:
- v2m_sysctl: sysctl@20000 {
- compatible = "arm,sp810", "arm,primecell";
- reg = <0x020000 0x1000>;
- clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>;
- clock-names = "refclk", "timclk", "apb_pclk";
- #clock-cells = <1>;
- clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
- assigned-clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&v2m_sysctl 3>, <&v2m_sysctl 3>;
- assigned-clock-parents = <&v2m_refclk1mhz>, <&v2m_refclk1mhz>, <&v2m_refclk1mhz>, <&v2m_refclk1mhz>;
-
- };
diff --git a/Documentation/devicetree/bindings/arm/sp810.yaml b/Documentation/devicetree/bindings/arm/sp810.yaml
new file mode 100644
index 000000000000..c9094e5ec565
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sp810.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sp810.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Express SP810 System Controller
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The Arm SP810 system controller provides clocks, timers and a watchdog.
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,sp810
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - const: arm,sp810
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: refclk
+ - const: timclk
+ - const: apb_pclk
+
+ clocks:
+ items:
+ - description: reference clock
+ - description: timer clock
+ - description: APB register access clock
+
+ "#clock-cells":
+ const: 1
+
+ clock-output-names:
+ maxItems: 4
+
+ assigned-clocks:
+ maxItems: 4
+
+ assigned-clock-parents:
+ maxItems: 4
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+examples:
+ - |
+ sysctl@20000 {
+ compatible = "arm,sp810", "arm,primecell";
+ reg = <0x020000 0x1000>;
+ clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>;
+ clock-names = "refclk", "timclk", "apb_pclk";
+ #clock-cells = <1>;
+ clock-output-names = "timerclken0", "timerclken1",
+ "timerclken2", "timerclken3";
+ assigned-clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>,
+ <&v2m_sysctl 3>, <&v2m_sysctl 3>;
+ assigned-clock-parents = <&v2m_refclk1mhz>, <&v2m_refclk1mhz>,
+ <&v2m_refclk1mhz>, <&v2m_refclk1mhz>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/spe-pmu.txt b/Documentation/devicetree/bindings/arm/spe-pmu.txt
deleted file mode 100644
index 93372f2a7df9..000000000000
--- a/Documentation/devicetree/bindings/arm/spe-pmu.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* ARMv8.2 Statistical Profiling Extension (SPE) Performance Monitor Units (PMU)
-
-ARMv8.2 introduces the optional Statistical Profiling Extension for collecting
-performance sample data using an in-memory trace buffer.
-
-** SPE Required properties:
-
-- compatible : should be one of:
- "arm,statistical-profiling-extension-v1"
-
-- interrupts : Exactly 1 PPI must be listed. For heterogeneous systems where
- SPE is only supported on a subset of the CPUs, please consult
- the arm,gic-v3 binding for details on describing a PPI partition.
-
-** Example:
-
-spe-pmu {
- compatible = "arm,statistical-profiling-extension-v1";
- interrupts = <GIC_PPI 05 IRQ_TYPE_LEVEL_HIGH &part1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/spear-misc.txt b/Documentation/devicetree/bindings/arm/spear-misc.txt
deleted file mode 100644
index e404e2556b4a..000000000000
--- a/Documentation/devicetree/bindings/arm/spear-misc.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-SPEAr Misc configuration
-===========================
-SPEAr SOCs have some miscellaneous registers which are used to configure
-few properties of different peripheral controllers.
-
-misc node required properties:
-
-- compatible Should be "st,spear1340-misc", "syscon".
-- reg: Address range of misc space up to 8K
diff --git a/Documentation/devicetree/bindings/arm/spear.txt b/Documentation/devicetree/bindings/arm/spear.txt
deleted file mode 100644
index 0d42949df6c2..000000000000
--- a/Documentation/devicetree/bindings/arm/spear.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-ST SPEAr Platforms Device Tree Bindings
----------------------------------------
-
-Boards with the ST SPEAr600 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear600";
-
-Boards with the ST SPEAr300 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear300";
-
-Boards with the ST SPEAr310 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear310";
-
-Boards with the ST SPEAr320 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear320";
-
-Boards with the ST SPEAr1310 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear1310";
-
-Boards with the ST SPEAr1340 SoC shall have the following properties:
-Required root node property:
-compatible = "st,spear1340";
diff --git a/Documentation/devicetree/bindings/arm/spear.yaml b/Documentation/devicetree/bindings/arm/spear.yaml
new file mode 100644
index 000000000000..a465c9eca76e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear.yaml
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/spear.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST SPEAr Platforms
+
+maintainers:
+ - Viresh Kumar <vireshk@kernel.org>
+ - Stefan Roese <sr@denx.de>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - st,spear600
+ - st,spear300
+ - st,spear310
+ - st,spear320
+ - st,spear1310
+ - st,spear1340
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sprd.txt b/Documentation/devicetree/bindings/arm/sprd.txt
deleted file mode 100644
index 3df034b13e28..000000000000
--- a/Documentation/devicetree/bindings/arm/sprd.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Spreadtrum SoC Platforms Device Tree Bindings
-----------------------------------------------------
-
-SC9836 openphone Board
-Required root node properties:
- - compatible = "sprd,sc9836-openphone", "sprd,sc9836";
-
-SC9860 SoC
-Required root node properties:
- - compatible = "sprd,sc9860"
-
-SP9860G 3GFHD Board
-Required root node properties:
- - compatible = "sprd,sp9860g-1h10", "sprd,sc9860";
diff --git a/Documentation/devicetree/bindings/arm/sprd/sprd.yaml b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml
new file mode 100644
index 000000000000..40fc3c8b9dce
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2019 Unisoc Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sprd/sprd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Unisoc platforms
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - sprd,sc9836-openphone
+ - const: sprd,sc9836
+ - items:
+ - enum:
+ - sprd,sp9860g-1h10
+ - const: sprd,sc9860
+ - items:
+ - enum:
+ - sprd,sp9863a-1h10
+ - const: sprd,sc9863a
+
+ - items:
+ - enum:
+ - sprd,ums512-1h10
+ - const: sprd,ums512
+
+ - items:
+ - enum:
+ - sprd,ums9620-2h10
+ - const: sprd,ums9620
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ste-u300.txt b/Documentation/devicetree/bindings/arm/ste-u300.txt
deleted file mode 100644
index d11d80006a19..000000000000
--- a/Documentation/devicetree/bindings/arm/ste-u300.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-ST-Ericsson U300 Device Tree Bindings
-
-For various board the "board" node may contain specific properties
-that pertain to this particular board, such as board-specific GPIOs
-or board power regulator supplies.
-
-Required root node property:
-
-compatible="stericsson,u300";
-
-Required node: syscon
-This contains the system controller.
-- compatible: must be "stericsson,u300-syscon".
-- reg: the base address and size of the system controller.
-
-Boards with the U300 SoC include:
-
-S365 "Small Board U365":
-
-Required node: s365
-This contains the board-specific information.
-- compatible: must be "stericsson,s365".
-- vana15-supply: the regulator supplying the 1.5V to drive the
- board.
-- syscon: a pointer to the syscon node so we can access the
- syscon registers to set the board as self-powered.
-
-Example:
-
-/ {
- model = "ST-Ericsson U300";
- compatible = "stericsson,u300";
- #address-cells = <1>;
- #size-cells = <1>;
-
- s365 {
- compatible = "stericsson,s365";
- vana15-supply = <&ab3100_ldo_d_reg>;
- syscon = <&syscon>;
- };
-
- syscon: syscon@c0011000 {
- compatible = "stericsson,u300-syscon";
- reg = <0xc0011000 0x1000>;
- };
-};
diff --git a/Documentation/devicetree/bindings/arm/sti.txt b/Documentation/devicetree/bindings/arm/sti.txt
deleted file mode 100644
index 8d27f6b084c7..000000000000
--- a/Documentation/devicetree/bindings/arm/sti.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-ST STi Platforms Device Tree Bindings
----------------------------------------
-
-Boards with the ST STiH415 SoC shall have the following properties:
-Required root node property:
-compatible = "st,stih415";
-
-Boards with the ST STiH416 SoC shall have the following properties:
-Required root node property:
-compatible = "st,stih416";
-
-Boards with the ST STiH407 SoC shall have the following properties:
-Required root node property:
-compatible = "st,stih407";
-
-Boards with the ST STiH410 SoC shall have the following properties:
-Required root node property:
-compatible = "st,stih410";
-
-Boards with the ST STiH418 SoC shall have the following properties:
-Required root node property:
-compatible = "st,stih418";
-
diff --git a/Documentation/devicetree/bindings/arm/sti.yaml b/Documentation/devicetree/bindings/arm/sti.yaml
new file mode 100644
index 000000000000..177358895fe1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sti.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sti.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST STi Platforms
+
+maintainers:
+ - Patrice Chotard <patrice.chotard@foss.st.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - st,stih410-b2260
+ - const: st,stih410
+ - items:
+ - enum:
+ - st,stih418-b2199
+ - st,stih418-b2264
+ - const: st,stih418
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
new file mode 100644
index 000000000000..4970b9167d1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/stm32/st,mlahb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 ML-AHB interconnect
+
+maintainers:
+ - Fabien Dessenne <fabien.dessenne@foss.st.com>
+ - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
+
+description: |
+ These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
+ a Cortex-M subsystem with dedicated memories. The MCU SRAM and RETRAM memory
+ parts can be accessed through different addresses (see "RAM aliases" in [1])
+ using different buses (see [2]): balancing the Cortex-M firmware accesses
+ among those ports allows to tune the system performance.
+ [1]: https://www.st.com/resource/en/reference_manual/dm00327659.pdf
+ [2]: https://wiki.st.com/stm32mpu/wiki/STM32MP15_RAM_mapping
+
+allOf:
+ - $ref: /schemas/simple-bus.yaml#
+
+properties:
+ compatible:
+ contains:
+ enum:
+ - st,mlahb
+
+ dma-ranges:
+ description: |
+ Describe memory addresses translation between the local CPU and the
+ remote Cortex-M processor. Each memory region, is declared with
+ 3 parameters:
+ - param 1: device base address (Cortex-M processor address)
+ - param 2: physical base address (local CPU address)
+ - param 3: size of the memory region.
+ maxItems: 3
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+required:
+ - compatible
+ - '#address-cells'
+ - '#size-cells'
+ - dma-ranges
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ ahb {
+ compatible = "st,mlahb", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ dma-ranges = <0x00000000 0x38000000 0x10000>,
+ <0x10000000 0x10000000 0x60000>,
+ <0x30000000 0x30000000 0x60000>;
+
+ m4_rproc: m4@10000000 {
+ reg = <0x10000000 0x40000>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
new file mode 100644
index 000000000000..95d2319afe23
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/stm32/st,stm32-syscon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Platforms System Controller
+
+maintainers:
+ - Alexandre Torgue <alexandre.torgue@foss.st.com>
+ - Christophe Roullier <christophe.roullier@foss.st.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - st,stm32-power-config
+ - st,stm32-syscfg
+ - st,stm32-tamp
+ - st,stm32f4-gcan
+ - st,stm32mp151-pwr-mcu
+ - st,stm32mp157-syscfg
+ - st,stm32mp21-syscfg
+ - st,stm32mp23-syscfg
+ - st,stm32mp25-syscfg
+ - const: syscon
+ - items:
+ - const: st,stm32-tamp
+ - const: syscon
+ - const: simple-mfd
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - st,stm32mp157-syscfg
+ - st,stm32f4-gcan
+ then:
+ required:
+ - clocks
+ - if:
+ properties:
+ compatible:
+ const: st,stm32mp25-syscfg
+ then:
+ required:
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/stm32mp1-clks.h>
+ syscfg: syscon@50020000 {
+ compatible = "st,stm32mp157-syscfg", "syscon";
+ reg = <0x50020000 0x400>;
+ clocks = <&rcc SYSCFG>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt b/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
deleted file mode 100644
index 99980aee26e5..000000000000
--- a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-STMicroelectronics STM32 Platforms System Controller
-
-Properties:
- - compatible : should contain two values. First value must be :
- - " st,stm32mp157-syscfg " - for stm32mp157 based SoCs,
- second value must be always "syscon".
- - reg : offset and length of the register set.
-
- Example:
- syscfg: syscon@50020000 {
- compatible = "st,stm32mp157-syscfg", "syscon";
- reg = <0x50020000 0x400>;
- };
-
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.txt b/Documentation/devicetree/bindings/arm/stm32/stm32.txt
deleted file mode 100644
index 6808ed9ddfd5..000000000000
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-STMicroelectronics STM32 Platforms Device Tree Bindings
-
-Each device tree must specify which STM32 SoC it uses,
-using one of the following compatible strings:
-
- st,stm32f429
- st,stm32f469
- st,stm32f746
- st,stm32h743
- st,stm32mp157
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
new file mode 100644
index 000000000000..ad144c02eb7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/stm32/stm32.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Platforms
+
+maintainers:
+ - Alexandre Torgue <alexandre.torgue@foss.st.com>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - description: emtrion STM32MP1 Argon based Boards
+ items:
+ - const: emtrion,stm32mp157c-emsbc-argon
+ - const: emtrion,stm32mp157c-emstamp-argon
+ - const: st,stm32mp157
+ - items:
+ - enum:
+ - st,stm32f429i-disco
+ - st,stm32429i-eval
+ - const: st,stm32f429
+ - items:
+ - enum:
+ - st,stm32f469i-disco
+ - const: st,stm32f469
+ - items:
+ - enum:
+ - st,stm32f746-disco
+ - st,stm32746g-eval
+ - const: st,stm32f746
+ - items:
+ - enum:
+ - st,stm32f769-disco
+ - const: st,stm32f769
+ - items:
+ - enum:
+ - st,stm32h743i-disco
+ - st,stm32h743i-eval
+ - const: st,stm32h743
+ - items:
+ - enum:
+ - st,stm32h747i-disco
+ - const: st,stm32h747
+ - items:
+ - enum:
+ - st,stm32h750i-art-pi
+ - const: st,stm32h750
+ - items:
+ - enum:
+ - st,stm32mp135f-dk
+ - const: st,stm32mp135
+
+ - description: ST STM32MP133 based Boards
+ items:
+ - enum:
+ - pri,prihmb # Priva E-Measuringbox board
+ - const: st,stm32mp133
+
+ - description: ST STM32MP151 based Boards
+ items:
+ - enum:
+ - ply,plyaqm # Plymovent AQM board
+ - prt,mecio1r0 # Protonic MECIO1r0
+ - prt,mect1s # Protonic MECT1S
+ - prt,prtt1a # Protonic PRTT1A
+ - prt,prtt1c # Protonic PRTT1C
+ - prt,prtt1s # Protonic PRTT1S
+ - const: st,stm32mp151
+
+ - description: DH STM32MP135 DHCOR SoM based Boards
+ items:
+ - const: dh,stm32mp135f-dhcor-dhsbc
+ - const: dh,stm32mp135f-dhcor-som
+ - const: st,stm32mp135
+
+ - description: DH STM32MP151 DHCOR SoM based Boards
+ items:
+ - const: dh,stm32mp151a-dhcor-testbench
+ - const: dh,stm32mp151a-dhcor-som
+ - const: st,stm32mp151
+
+ - description: ST STM32MP153 based Boards
+ items:
+ - enum:
+ - prt,mecio1r1 # Protonic MECIO1r1
+ - const: st,stm32mp153
+
+ - description: DH STM32MP153 DHCOM SoM based Boards
+ items:
+ - const: dh,stm32mp153c-dhcom-drc02
+ - const: dh,stm32mp153c-dhcom-som
+ - const: st,stm32mp153
+
+ - description: DH STM32MP153 DHCOR SoM based Boards
+ items:
+ - const: dh,stm32mp153c-dhcor-drc-compact
+ - const: dh,stm32mp153c-dhcor-som
+ - const: st,stm32mp153
+
+ - description: Octavo OSD32MP153 System-in-Package based boards
+ items:
+ - enum:
+ - lxa,stm32mp153c-fairytux2-gen1 # Linux Automation FairyTux 2 (Generation 1)
+ - lxa,stm32mp153c-fairytux2-gen2 # Linux Automation FairyTux 2 (Generation 2)
+ - lxa,stm32mp153c-tac-gen3 # Linux Automation TAC (Generation 3)
+ - const: oct,stm32mp153x-osd32
+ - const: st,stm32mp153
+
+ - items:
+ - enum:
+ - shiratech,stm32mp157a-iot-box # IoT Box
+ - shiratech,stm32mp157a-stinger96 # Stinger96
+ - st,stm32mp157c-ed1
+ - st,stm32mp157c-ed1-scmi
+ - st,stm32mp157a-dk1
+ - st,stm32mp157a-dk1-scmi
+ - st,stm32mp157c-dk2
+ - st,stm32mp157c-dk2-scmi
+ - st,stm32mp157f-dk2
+ - const: st,stm32mp157
+
+ - items:
+ - const: st,stm32mp157c-ev1
+ - const: st,stm32mp157c-ed1
+ - const: st,stm32mp157
+ - items:
+ - const: st,stm32mp157c-ev1-scmi
+ - const: st,stm32mp157c-ed1
+ - const: st,stm32mp157
+
+ - description: DH STM32MP1 SoM based Boards
+ items:
+ - enum:
+ - arrow,stm32mp157a-avenger96 # Avenger96
+ - const: dh,stm32mp157a-dhcor-som
+ - const: st,stm32mp157
+
+ - description: DH STM32MP1 SoM based Boards
+ items:
+ - enum:
+ - dh,stm32mp157c-dhcom-pdk2
+ - dh,stm32mp157c-dhcom-picoitx
+ - const: dh,stm32mp157c-dhcom-som
+ - const: st,stm32mp157
+
+ - description: Engicam i.Core STM32MP1 SoM based Boards
+ items:
+ - enum:
+ - engicam,icore-stm32mp1-ctouch2 # STM32MP1 Engicam i.Core STM32MP1 C.TOUCH 2.0
+ - engicam,icore-stm32mp1-ctouch2-of10 # STM32MP1 Engicam i.Core STM32MP1 C.TOUCH 2.0 10.1" OF
+ - engicam,icore-stm32mp1-edimm2.2 # STM32MP1 Engicam i.Core STM32MP1 EDIMM2.2 Starter Kit
+ - const: engicam,icore-stm32mp1 # STM32MP1 Engicam i.Core STM32MP1 SoM
+ - const: st,stm32mp157
+
+ - description: Engicam MicroGEA STM32MP1 SoM based Boards
+ items:
+ - enum:
+ - engicam,microgea-stm32mp1-microdev2.0
+ - engicam,microgea-stm32mp1-microdev2.0-of7
+ - const: engicam,microgea-stm32mp1
+ - const: st,stm32mp157
+
+ - description: Octavo OSD32MP15x System-in-Package based boards
+ items:
+ - enum:
+ - lxa,stm32mp157c-mc1 # Linux Automation MC-1
+ - lxa,stm32mp157c-tac-gen1 # Linux Automation TAC (Generation 1)
+ - lxa,stm32mp157c-tac-gen2 # Linux Automation TAC (Generation 2)
+ - oct,stm32mp157c-osd32-red # Octavo OSD32MP1 RED board
+ - const: oct,stm32mp15xx-osd32
+ - enum:
+ - st,stm32mp157
+
+ - description: Odyssey STM32MP1 SoM based Boards
+ items:
+ - enum:
+ - seeed,stm32mp157c-odyssey
+ - const: seeed,stm32mp157c-odyssey-som
+ - const: st,stm32mp157
+
+ - description: Phytec STM32MP1 SoM based Boards
+ items:
+ - const: phytec,phycore-stm32mp1-3
+ - const: phytec,phycore-stm32mp157c-som
+ - const: st,stm32mp157
+
+ - description: Ultratronik STM32MP1 SBC based Boards
+ items:
+ - const: ultratronik,stm32mp157c-ultra-fly-sbc
+ - const: st,stm32mp157
+
+ - description: ST STM32MP257 based Boards
+ items:
+ - enum:
+ - st,stm32mp257f-dk
+ - st,stm32mp257f-ev1
+ - const: st,stm32mp257
+
+ - description: ST STM32MP235 based Boards
+ items:
+ - enum:
+ - st,stm32mp235f-dk
+ - const: st,stm32mp235
+
+ - description: ST STM32MP215 based Boards
+ items:
+ - enum:
+ - st,stm32mp215f-dk
+ - const: st,stm32mp215
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunplus,sp7021.yaml b/Documentation/devicetree/bindings/arm/sunplus,sp7021.yaml
new file mode 100644
index 000000000000..def7d0cfeb31
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunplus,sp7021.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunplus,sp7021.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus SP7021 Boards
+
+maintainers:
+ - qinjian <qinjian@cqplus1.com>
+
+description: |
+ ARM platforms using Sunplus SP7021, an ARM Cortex A7 (4-cores) based SoC.
+ Wiki: https://sunplus-tibbo.atlassian.net/wiki/spaces/doc/overview
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - sunplus,sp7021-achip
+ - sunplus,sp7021-demo-v3
+ - const: sunplus,sp7021
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt
deleted file mode 100644
index e4beec3d9ad3..000000000000
--- a/Documentation/devicetree/bindings/arm/sunxi.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Allwinner sunXi Platforms Device Tree Bindings
-
-Each device tree must specify which Allwinner SoC it uses,
-using one of the following compatible strings:
-
- allwinner,sun4i-a10
- allwinner,sun5i-a10s
- allwinner,sun5i-a13
- allwinner,sun5i-r8
- allwinner,sun6i-a31
- allwinner,sun7i-a20
- allwinner,sun8i-a23
- allwinner,sun8i-a33
- allwinner,sun8i-a83t
- allwinner,sun8i-h2-plus
- allwinner,sun8i-h3
- allwinner-sun8i-r40
- allwinner,sun8i-v3s
- allwinner,sun9i-a80
- allwinner,sun50i-a64
- nextthing,gr8
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
new file mode 100644
index 000000000000..9e4627f97d7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -0,0 +1,1114 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR X11)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner platforms
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: Allwinner A100 Perf1 Board
+ items:
+ - const: allwinner,a100-perf1
+ - const: allwinner,sun50i-a100
+
+ - description: Allwinner A23 Evaluation Board
+ items:
+ - const: allwinner,sun8i-a23-evb
+ - const: allwinner,sun8i-a23
+
+ - description: Allwinner A31 APP4 Evaluation Board
+ items:
+ - const: allwinner,app4-evb1
+ - const: allwinner,sun6i-a31
+
+ - description: Allwinner A83t Homlet Evaluation Board v2
+ items:
+ - const: allwinner,h8homlet-v2
+ - const: allwinner,sun8i-a83t
+
+ - description: Allwinner GA10H Quad Core Tablet v1.1
+ items:
+ - const: allwinner,ga10h-v1.1
+ - const: allwinner,sun8i-a33
+
+ - description: Allwinner GT90H Tablet v4
+ items:
+ - const: allwinner,gt90h-v4
+ - const: allwinner,sun8i-a23
+
+ - description: Allwinner R16 EVB (Parrot)
+ items:
+ - const: allwinner,parrot
+ - const: allwinner,sun8i-a33
+
+ - description: Anbernic RG-Nano
+ items:
+ - const: anbernic,rg-nano
+ - const: allwinner,sun8i-v3s
+
+ - description: Anbernic RG35XX (2024)
+ items:
+ - const: anbernic,rg35xx-2024
+ - const: allwinner,sun50i-h700
+
+ - description: Anbernic RG35XX H
+ items:
+ - const: anbernic,rg35xx-h
+ - const: allwinner,sun50i-h700
+
+ - description: Anbernic RG35XX Plus
+ items:
+ - const: anbernic,rg35xx-plus
+ - const: allwinner,sun50i-h700
+
+ - description: Anbernic RG35XX SP
+ items:
+ - const: anbernic,rg35xx-sp
+ - const: allwinner,sun50i-h700
+
+ - description: Amarula A64 Relic
+ items:
+ - const: amarula,a64-relic
+ - const: allwinner,sun50i-a64
+
+ - description: Auxtek T003 A10s HDMI TV Stick
+ items:
+ - const: allwinner,auxtek-t003
+ - const: allwinner,sun5i-a10s
+
+ - description: Auxtek T004 A10s HDMI TV Stick
+ items:
+ - const: allwinner,auxtek-t004
+ - const: allwinner,sun5i-a10s
+
+ - description: BA10 TV Box
+ items:
+ - const: allwinner,ba10-tvbox
+ - const: allwinner,sun4i-a10
+
+ - description: BananaPi
+ items:
+ - const: lemaker,bananapi
+ - const: allwinner,sun7i-a20
+
+ - description: BananaPi M1 Plus
+ items:
+ - const: sinovoip,bpi-m1-plus
+ - const: allwinner,sun7i-a20
+
+ - description: BananaPi M2
+ items:
+ - const: sinovoip,bpi-m2
+ - const: allwinner,sun6i-a31s
+
+ - description: BananaPi M2 Berry
+ items:
+ - const: sinovoip,bpi-m2-berry
+ - const: allwinner,sun8i-r40
+
+ - description: BananaPi M2 Plus
+ items:
+ - const: sinovoip,bpi-m2-plus
+ - const: allwinner,sun8i-h3
+
+ - description: BananaPi M2 Plus
+ items:
+ - const: sinovoip,bpi-m2-plus
+ - const: allwinner,sun50i-h5
+
+ - description: BananaPi M2 Plus v1.2
+ items:
+ - const: bananapi,bpi-m2-plus-v1.2
+ - const: allwinner,sun8i-h3
+
+ - description: BananaPi M2 Plus v1.2
+ items:
+ - const: bananapi,bpi-m2-plus-v1.2
+ - const: allwinner,sun50i-h5
+
+ - description: BananaPi M2 Magic
+ items:
+ - const: sinovoip,bananapi-m2m
+ - const: allwinner,sun8i-a33
+
+ - description: BananaPi M2 Ultra
+ items:
+ - const: sinovoip,bpi-m2-ultra
+ - const: allwinner,sun8i-r40
+
+ - description: BananaPi M2 Zero
+ items:
+ - const: sinovoip,bpi-m2-zero
+ - const: allwinner,sun8i-h2-plus
+
+ - description: BananaPi M3
+ items:
+ - const: sinovoip,bpi-m3
+ - const: allwinner,sun8i-a83t
+
+ - description: BananaPi M64
+ items:
+ - const: sinovoip,bananapi-m64
+ - const: allwinner,sun50i-a64
+
+ - description: BananaPro
+ items:
+ - const: lemaker,bananapro
+ - const: allwinner,sun7i-a20
+
+ - description: Beelink GS1
+ items:
+ - const: azw,beelink-gs1
+ - const: allwinner,sun50i-h6
+
+ - description: Beelink X2
+ items:
+ - const: roofull,beelink-x2
+ - const: allwinner,sun8i-h3
+
+ - description: BigTreeTech Manta M4/8P
+ items:
+ - const: bigtreetech,cb1-manta
+ - const: bigtreetech,cb1
+ - const: allwinner,sun50i-h616
+
+ - description: BigTreeTech Pi
+ items:
+ - const: bigtreetech,pi
+ - const: allwinner,sun50i-h616
+
+ - description: Chuwi V7 CW0825
+ items:
+ - const: chuwi,v7-cw0825
+ - const: allwinner,sun4i-a10
+
+ - description: Colorfly E708 Q1 Tablet
+ items:
+ - const: colorfly,e708-q1
+ - const: allwinner,sun6i-a31s
+
+ - description: CSQ CS908 Set Top Box
+ items:
+ - const: csq,cs908
+ - const: allwinner,sun6i-a31s
+
+ - description: Cubietech Cubieboard
+ items:
+ - const: cubietech,a10-cubieboard
+ - const: allwinner,sun4i-a10
+
+ - description: Cubietech Cubieboard2
+ items:
+ - const: cubietech,cubieboard2
+ - const: allwinner,sun7i-a20
+
+ - description: Cubietech Cubieboard4
+ items:
+ - const: cubietech,a80-cubieboard4
+ - const: allwinner,sun9i-a80
+
+ - description: Cubietech Cubietruck
+ items:
+ - const: cubietech,cubietruck
+ - const: allwinner,sun7i-a20
+
+ - description: Cubietech Cubietruck Plus
+ items:
+ - const: cubietech,cubietruck-plus
+ - const: allwinner,sun8i-a83t
+
+ - description: Difrnce DIT4350
+ items:
+ - const: difrnce,dit4350
+ - const: allwinner,sun5i-a13
+
+ - description: Dserve DSRV9703C
+ items:
+ - const: dserve,dsrv9703c
+ - const: allwinner,sun4i-a10
+
+ - description: Elimo Engineering Impetus SoM
+ items:
+ - const: elimo,impetus
+ - const: sochip,s3
+ - const: allwinner,sun8i-v3
+
+ - description: Elimo Engineering Initium
+ items:
+ - const: elimo,initium
+ - const: elimo,impetus
+ - const: sochip,s3
+ - const: allwinner,sun8i-v3
+
+ - description: Empire Electronix D709 Tablet
+ items:
+ - const: empire-electronix,d709
+ - const: allwinner,sun5i-a13
+
+ - description: Empire Electronix M712 Tablet
+ items:
+ - const: empire-electronix,m712
+ - const: allwinner,sun5i-a13
+
+ - description: Forlinx OKA40i-C Development board
+ items:
+ - const: forlinx,oka40i-c
+ - const: forlinx,feta40i-c
+ - const: allwinner,sun8i-r40
+
+ - description: FriendlyARM NanoPi A64
+ items:
+ - const: friendlyarm,nanopi-a64
+ - const: allwinner,sun50i-a64
+
+ - description: FriendlyARM NanoPi Duo2
+ items:
+ - const: friendlyarm,nanopi-duo2
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi M1
+ items:
+ - const: friendlyarm,nanopi-m1
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi M1 Plus
+ items:
+ - const: friendlyarm,nanopi-m1-plus
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo
+ items:
+ - const: friendlyarm,nanopi-neo
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo 2
+ items:
+ - const: friendlyarm,nanopi-neo2
+ - const: allwinner,sun50i-h5
+
+ - description: FriendlyARM NanoPi Neo Air
+ items:
+ - const: friendlyarm,nanopi-neo-air
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo Plus2
+ items:
+ - const: friendlyarm,nanopi-neo-plus2
+ - const: allwinner,sun50i-h5
+
+ - description: FriendlyARM NanoPi R1
+ items:
+ - const: friendlyarm,nanopi-r1
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi R1S H5
+ items:
+ - const: friendlyarm,nanopi-r1s-h5
+ - const: allwinner,sun50i-h5
+
+ - description: FriendlyARM ZeroPi
+ items:
+ - const: friendlyarm,zeropi
+ - const: allwinner,sun8i-h3
+
+ - description: Gemei G9 Tablet
+ items:
+ - const: gemei,g9
+ - const: allwinner,sun4i-a10
+
+ - description: Hyundai A7HD
+ items:
+ - const: hyundai,a7hd
+ - const: allwinner,sun4i-a10
+
+ - description: HSG H702
+ items:
+ - const: hsg,h702
+ - const: allwinner,sun5i-a13
+
+ - description: I12 TV Box
+ items:
+ - const: allwinner,i12-tvbox
+ - const: allwinner,sun7i-a20
+
+ - description: ICnova A20
+ items:
+ - enum:
+ - incircuit,icnova-a20-adb4006
+ - incircuit,icnova-a20-swac
+ - const: incircuit,icnova-a20
+ - const: allwinner,sun7i-a20
+
+ - description: INet-1
+ items:
+ - const: inet-tek,inet1
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-86DZ Rev 01
+ items:
+ - const: primux,inet86dz
+ - const: allwinner,sun8i-a23
+
+ - description: iNet-9F Rev 03
+ items:
+ - const: inet-tek,inet9f-rev03
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-97F Rev 02
+ items:
+ - const: primux,inet97fv2
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-98V Rev 02
+ items:
+ - const: primux,inet98v-rev2
+ - const: allwinner,sun5i-a13
+
+ - description: iNet D978 Rev 02 Tablet
+ items:
+ - const: primux,inet-d978-rev2
+ - const: allwinner,sun8i-a33
+
+ - description: iNet Q972 Tablet
+ items:
+ - const: inet-tek,inet-q972
+ - const: allwinner,sun6i-a31s
+
+ - description: Itead Ibox A20
+ items:
+ - const: itead,itead-ibox-a20
+ - const: allwinner,sun7i-a20
+
+ - description: Itead Iteaduino Plus A10
+ items:
+ - const: itead,iteaduino-plus-a10
+ - const: allwinner,sun4i-a10
+
+ - description: Jesurun Q5
+ items:
+ - const: jesurun,q5
+ - const: allwinner,sun4i-a10
+
+ - description: Lamobo R1
+ items:
+ - const: lamobo,lamobo-r1
+ - const: allwinner,sun7i-a20
+
+ - description: Lctech Pi F1C200s
+ items:
+ - const: lctech,pi-f1c200s
+ - const: allwinner,suniv-f1c200s
+ - const: allwinner,suniv-f1c100s
+
+ - description: Libre Computer Board ALL-H3-CC H2+
+ items:
+ - const: libretech,all-h3-cc-h2-plus
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Libre Computer Board ALL-H3-CC H3
+ items:
+ - const: libretech,all-h3-cc-h3
+ - const: allwinner,sun8i-h3
+
+ - description: Libre Computer Board ALL-H3-CC H5
+ items:
+ - const: libretech,all-h3-cc-h5
+ - const: allwinner,sun50i-h5
+
+ - description: Libre Computer Board ALL-H3-IT H5
+ items:
+ - const: libretech,all-h3-it-h5
+ - const: allwinner,sun50i-h5
+
+ - description: Libre Computer Board ALL-H5-CC H5
+ items:
+ - const: libretech,all-h5-cc-h5
+ - const: allwinner,sun50i-h5
+
+ - description: Lichee Pi Nano
+ items:
+ - const: licheepi,licheepi-nano
+ - const: allwinner,suniv-f1c100s
+
+ - description: Lichee Pi One
+ items:
+ - const: licheepi,licheepi-one
+ - const: allwinner,sun5i-a13
+
+ - description: Lichee Pi Zero
+ items:
+ - const: licheepi,licheepi-zero
+ - const: allwinner,sun8i-v3s
+
+ - description: Lichee Pi Zero (with Dock)
+ items:
+ - const: licheepi,licheepi-zero-dock
+ - const: licheepi,licheepi-zero
+ - const: allwinner,sun8i-v3s
+
+ - description: Lichee Zero Plus (with S3, without eMMC/SPI Flash)
+ items:
+ - const: sipeed,lichee-zero-plus
+ - const: sochip,s3
+ - const: allwinner,sun8i-v3
+
+ - description: Linksprite PCDuino
+ items:
+ - const: linksprite,a10-pcduino
+ - const: allwinner,sun4i-a10
+
+ - description: Linksprite PCDuino2
+ items:
+ - const: linksprite,a10-pcduino2
+ - const: allwinner,sun4i-a10
+
+ - description: Linksprite PCDuino3
+ items:
+ - const: linksprite,pcduino3
+ - const: allwinner,sun7i-a20
+
+ - description: Linksprite PCDuino3 Nano
+ items:
+ - const: linksprite,pcduino3-nano
+ - const: allwinner,sun7i-a20
+
+ - description: Linutronix Testbox v2
+ items:
+ - const: linutronix,testbox-v2
+ - const: lamobo,lamobo-r1
+ - const: allwinner,sun7i-a20
+
+ - description: Liontron H-A133L
+ items:
+ - const: liontron,h-a133l
+ - const: allwinner,sun50i-a100
+
+ - description: HAOYU Electronics Marsboard A10
+ items:
+ - const: haoyu,a10-marsboard
+ - const: allwinner,sun4i-a10
+
+ - description: HAOYU Electronics Marsboard A20
+ items:
+ - const: haoyu,a20-marsboard
+ - const: allwinner,sun7i-a20
+
+ - description: MapleBoard MP130
+ items:
+ - const: mapleboard,mp130
+ - const: allwinner,sun8i-h3
+
+ - description: Mele A1000
+ items:
+ - const: mele,a1000
+ - const: allwinner,sun4i-a10
+
+ - description: Mele A1000G Quad Set Top Box
+ items:
+ - const: mele,a1000g-quad
+ - const: allwinner,sun6i-a31
+
+ - description: Mele I7 Quad Set Top Box
+ items:
+ - const: mele,i7
+ - const: allwinner,sun6i-a31
+
+ - description: Mele M3
+ items:
+ - const: mele,m3
+ - const: allwinner,sun7i-a20
+
+ - description: Mele M9 Set Top Box
+ items:
+ - const: mele,m9
+ - const: allwinner,sun6i-a31
+
+ - description: Merrii A20 Hummingboard
+ items:
+ - const: merrii,a20-hummingbird
+ - const: allwinner,sun7i-a20
+
+ - description: Merrii A31 Hummingboard
+ items:
+ - const: merrii,a31-hummingbird
+ - const: allwinner,sun6i-a31
+
+ - description: Merrii A80 Optimus
+ items:
+ - const: merrii,a80-optimus
+ - const: allwinner,sun9i-a80
+
+ - description: Miniand Hackberry
+ items:
+ - const: miniand,hackberry
+ - const: allwinner,sun4i-a10
+
+ - description: MK802
+ items:
+ - const: allwinner,mk802
+ - const: allwinner,sun4i-a10
+
+ - description: MK802-A10s
+ items:
+ - const: allwinner,a10s-mk802
+ - const: allwinner,sun5i-a10s
+
+ - description: MK802-II
+ items:
+ - const: allwinner,mk802ii
+ - const: allwinner,sun4i-a10
+
+ - description: MK808c
+ items:
+ - const: allwinner,mk808c
+ - const: allwinner,sun7i-a20
+
+ - description: MSI Primo81 Tablet
+ items:
+ - const: msi,primo81
+ - const: allwinner,sun6i-a31s
+
+ - description: Emlid Neutis N5 Developer Board
+ items:
+ - const: emlid,neutis-n5-devboard
+ - const: emlid,neutis-n5
+ - const: allwinner,sun50i-h5
+
+ - description: Emlid Neutis N5H3 Developer Board
+ items:
+ - const: emlid,neutis-n5h3-devboard
+ - const: emlid,neutis-n5h3
+ - const: allwinner,sun8i-h3
+
+ - description: NetCube Systems Kumquat
+ items:
+ - const: netcube,kumquat
+ - const: allwinner,sun8i-v3s
+
+ - description: NetCube Systems Nagami SoM based boards
+ items:
+ - enum:
+ - netcube,nagami-basic-carrier
+ - netcube,nagami-keypad-carrier
+ - const: netcube,nagami
+ - const: allwinner,sun8i-t113s
+
+ - description: NextThing Co. CHIP
+ items:
+ - const: nextthing,chip
+ - const: allwinner,sun5i-r8
+ - const: allwinner,sun5i-a13
+
+ - description: NextThing Co. CHIP Pro
+ items:
+ - const: nextthing,chip-pro
+ - const: nextthing,gr8
+
+ - description: NextThing Co. GR8 Evaluation Board
+ items:
+ - const: nextthing,gr8-evb
+ - const: nextthing,gr8
+
+ - description: Nintendo NES Classic
+ items:
+ - const: nintendo,nes-classic
+ - const: allwinner,sun8i-r16
+ - const: allwinner,sun8i-a33
+
+ - description: Nintendo Super NES Classic
+ items:
+ - const: nintendo,super-nes-classic
+ - const: nintendo,nes-classic
+ - const: allwinner,sun8i-r16
+ - const: allwinner,sun8i-a33
+
+ - description: Oceanic 5inMFD (5205)
+ items:
+ - const: oceanic,5205-5inmfd
+ - const: allwinner,sun50i-a64
+
+ - description: Olimex A10-OlinuXino LIME
+ items:
+ - const: olimex,a10-olinuxino-lime
+ - const: allwinner,sun4i-a10
+
+ - description: Olimex A10s-OlinuXino Micro
+ items:
+ - const: olimex,a10s-olinuxino-micro
+ - const: allwinner,sun5i-a10s
+
+ - description: Olimex A13-OlinuXino
+ items:
+ - const: olimex,a13-olinuxino
+ - const: allwinner,sun5i-a13
+
+ - description: Olimex A13-OlinuXino Micro
+ items:
+ - const: olimex,a13-olinuxino-micro
+ - const: allwinner,sun5i-a13
+
+ - description: Olimex A20-Olimex SOM Evaluation Board
+ items:
+ - const: olimex,a20-olimex-som-evb
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-Olimex SOM Evaluation Board (with eMMC)
+ items:
+ - const: olimex,a20-olimex-som-evb-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME
+ items:
+ - const: olimex,a20-olinuxino-lime
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME (with eMMC)
+ items:
+ - const: olimex,a20-olinuxino-lime-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME2
+ items:
+ - const: olimex,a20-olinuxino-lime2
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME2 (with eMMC)
+ items:
+ - const: olimex,a20-olinuxino-lime2-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino Micro
+ items:
+ - const: olimex,a20-olinuxino-micro
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino Micro (with eMMC)
+ items:
+ - const: olimex,a20-olinuxino-micro-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-SOM204 Evaluation Board
+ items:
+ - const: olimex,a20-olimex-som204-evb
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-SOM204 Evaluation Board (with eMMC)
+ items:
+ - const: olimex,a20-olimex-som204-evb-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A33-OlinuXino
+ items:
+ - const: olimex,a33-olinuxino
+ - const: allwinner,sun8i-a33
+
+ - description: Olimex A64-OlinuXino
+ items:
+ - const: olimex,a64-olinuxino
+ - const: allwinner,sun50i-a64
+
+ - description: Olimex A64-OlinuXino (with eMMC)
+ items:
+ - const: olimex,a64-olinuxino-emmc
+ - const: allwinner,sun50i-a64
+
+ - description: Olimex A64 Teres-I
+ items:
+ - const: olimex,a64-teres-i
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PINE A64
+ items:
+ - const: pine64,pine64
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PINE A64+
+ items:
+ - const: pine64,pine64-plus
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PineCube
+ items:
+ - const: pine64,pinecube
+ - const: sochip,s3
+ - const: allwinner,sun8i-v3
+
+ - description: Pine64 PINE H64 Model A
+ items:
+ - const: pine64,pine-h64
+ - const: allwinner,sun50i-h6
+
+ - description: Pine64 PINE H64 Model B
+ items:
+ - const: pine64,pine-h64-model-b
+ - const: allwinner,sun50i-h6
+
+ - description: Pine64 PINE A64 LTS
+ items:
+ - const: pine64,pine64-lts
+ - const: allwinner,sun50i-r18
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 Pinebook
+ items:
+ - const: pine64,pinebook
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PinePhone
+ items:
+ - enum:
+ - pine64,pinephone-1.0 # Developer Batch (1.0)
+ - pine64,pinephone-1.1 # Braveheart (1.1)
+ - pine64,pinephone-1.2
+ - const: pine64,pinephone
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PineTab Developer Sample
+ items:
+ - const: pine64,pinetab
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PineTab Early Adopter
+ items:
+ - const: pine64,pinetab-early-adopter
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 SOPINE
+ items:
+ - const: pine64,sopine-baseboard
+ - const: pine64,sopine
+ - const: allwinner,sun50i-a64
+
+ - description: PineRiver Mini X-Plus
+ items:
+ - const: pineriver,mini-xplus
+ - const: allwinner,sun4i-a10
+
+ - description: PocketBook Touch Lux 3
+ items:
+ - const: pocketbook,touch-lux-3
+ - const: allwinner,sun5i-a13
+
+ - description: PocketBook 614 Plus
+ items:
+ - const: pocketbook,614-plus
+ - const: allwinner,sun5i-a13
+
+ - description: Point of View Protab2-IPS9
+ items:
+ - const: pov,protab2-ips9
+ - const: allwinner,sun4i-a10
+
+ - description: Polaroid MID2407PXE03 Tablet
+ items:
+ - const: polaroid,mid2407pxe03
+ - const: allwinner,sun8i-a23
+
+ - description: Polaroid MID2809PXE04 Tablet
+ items:
+ - const: polaroid,mid2809pxe04
+ - const: allwinner,sun8i-a23
+
+ - description: Q8 A13 Tablet
+ items:
+ - const: allwinner,q8-a13
+ - const: allwinner,sun5i-a13
+
+ - description: Q8 A23 Tablet
+ items:
+ - const: allwinner,q8-a23
+ - const: allwinner,sun8i-a23
+
+ - description: Q8 A33 Tablet
+ items:
+ - const: allwinner,q8-a33
+ - const: allwinner,sun8i-a33
+
+ - description: Qihua CQA3T BV3
+ items:
+ - const: qihua,t3-cqa3t-bv3
+ - const: allwinner,sun8i-t3
+ - const: allwinner,sun8i-r40
+
+ - description: R7 A10s HDMI TV Stick
+ items:
+ - const: allwinner,r7-tv-dongle
+ - const: allwinner,sun5i-a10s
+
+ - description: Radxa Cubie A5E
+ items:
+ - const: radxa,cubie-a5e
+ - const: allwinner,sun55i-a527
+
+ - description: Remix Mini PC
+ items:
+ - const: jide,remix-mini-pc
+ - const: allwinner,sun50i-h64
+ - const: allwinner,sun50i-a64
+
+ - description: RerVision A33-Vstar (with A33-Core1 SoM)
+ items:
+ - const: rervision,a33-vstar
+ - const: rervision,a33-core1
+ - const: allwinner,sun8i-a33
+
+ - description: RerVision H3-DVK
+ items:
+ - const: rervision,h3-dvk
+ - const: allwinner,sun8i-h3
+
+ - description: Sinlinx SinA31s Core Board
+ items:
+ - const: sinlinx,sina31s
+ - const: allwinner,sun6i-a31s
+
+ - description: Sinlinx SinA31s Development Board
+ items:
+ - const: sinlinx,sina31s-sdk
+ - const: allwinner,sun6i-a31s
+
+ - description: Sinlinx SinA33
+ items:
+ - const: sinlinx,sina33
+ - const: allwinner,sun8i-a33
+
+ - description: Sipeed Longan Pi 3H board for the Sipeed Longan Module 3H
+ items:
+ - const: sipeed,longan-pi-3h
+ - const: sipeed,longan-module-3h
+ - const: allwinner,sun50i-h618
+
+ - description: SourceParts PopStick v1.1
+ items:
+ - const: sourceparts,popstick-v1.1
+ - const: sourceparts,popstick
+ - const: allwinner,suniv-f1c200s
+ - const: allwinner,suniv-f1c100s
+
+ - description: SL631 Action Camera with IMX179
+ items:
+ - const: allwinner,sl631-imx179
+ - const: allwinner,sl631
+ - const: allwinner,sun8i-v3
+
+ - description: Tanix TX1
+ items:
+ - const: oranth,tanix-tx1
+ - const: allwinner,sun50i-h616
+
+ - description: Tanix TX6
+ items:
+ - const: oranth,tanix-tx6
+ - const: allwinner,sun50i-h6
+
+ - description: Tanix TX6 mini
+ items:
+ - const: oranth,tanix-tx6-mini
+ - const: allwinner,sun50i-h6
+
+ - description: TBS A711 Tablet
+ items:
+ - const: tbs-biometrics,a711
+ - const: allwinner,sun8i-a83t
+
+ - description: Topwise A721 Tablet
+ items:
+ - const: topwise,a721
+ - const: allwinner,sun4i-a10
+
+ - description: Transpeed 8K618-T
+ items:
+ - const: transpeed,8k618-t
+ - const: allwinner,sun50i-h618
+
+ - description: Utoo P66
+ items:
+ - const: utoo,p66
+ - const: allwinner,sun5i-a13
+
+ - description: Wexler TAB7200
+ items:
+ - const: wexler,tab7200
+ - const: allwinner,sun7i-a20
+
+ - description: MangoPi MQ-R board
+ items:
+ - const: widora,mangopi-mq-r-t113
+ - const: allwinner,sun8i-t113s
+
+ - description: WITS A31 Colombus Evaluation Board
+ items:
+ - const: wits,colombus
+ - const: allwinner,sun6i-a31
+
+ - description: WITS Pro A20 DKT
+ items:
+ - const: wits,pro-a20-dkt
+ - const: allwinner,sun7i-a20
+
+ - description: Wobo i5
+ items:
+ - const: wobo,a10s-wobo-i5
+ - const: allwinner,sun5i-a10s
+
+ - description: Yones TopTech BS1078 v2 Tablet
+ items:
+ - const: yones-toptech,bs1078-v2
+ - const: allwinner,sun6i-a31s
+
+ - description: X96 Mate TV box
+ items:
+ - const: hechuang,x96-mate
+ - const: allwinner,sun50i-h616
+
+ - description: X96Q
+ items:
+ - const: amediatech,x96q
+ - const: allwinner,sun50i-h616
+
+ - description: X96Q Pro+
+ items:
+ - const: amediatech,x96q-pro-plus
+ - const: allwinner,sun55i-h728
+
+ - description: Xunlong OrangePi
+ items:
+ - const: xunlong,orangepi
+ - const: allwinner,sun7i-a20
+
+ - description: Xunlong OrangePi 2
+ items:
+ - const: xunlong,orangepi-2
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi 3
+ items:
+ - const: xunlong,orangepi-3
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi 4A
+ items:
+ - const: xunlong,orangepi-4a
+ - const: allwinner,sun55i-t527
+
+ - description: Xunlong OrangePi Lite
+ items:
+ - const: xunlong,orangepi-lite
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Lite2
+ items:
+ - const: xunlong,orangepi-lite2
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi Mini
+ items:
+ - const: xunlong,orangepi-mini
+ - const: allwinner,sun7i-a20
+
+ - description: Xunlong OrangePi One
+ items:
+ - const: xunlong,orangepi-one
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi One Plus
+ items:
+ - const: xunlong,orangepi-one-plus
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi PC
+ items:
+ - const: xunlong,orangepi-pc
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi PC 2
+ items:
+ - const: xunlong,orangepi-pc2
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi PC Plus
+ items:
+ - const: xunlong,orangepi-pc-plus
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Plus
+ items:
+ - const: xunlong,orangepi-plus
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Plus 2E
+ items:
+ - const: xunlong,orangepi-plus2e
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Prime
+ items:
+ - const: xunlong,orangepi-prime
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi R1
+ items:
+ - const: xunlong,orangepi-r1
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Xunlong OrangePi Win
+ items:
+ - const: xunlong,orangepi-win
+ - const: allwinner,sun50i-a64
+
+ - description: Xunlong OrangePi Zero
+ items:
+ - const: xunlong,orangepi-zero
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Xunlong OrangePi Zero Plus
+ items:
+ - const: xunlong,orangepi-zero-plus
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi Zero Plus2
+ items:
+ - const: xunlong,orangepi-zero-plus2
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi Zero Plus2
+ items:
+ - const: xunlong,orangepi-zero-plus2-h3
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Zero 2
+ items:
+ - const: xunlong,orangepi-zero2
+ - const: allwinner,sun50i-h616
+
+ - description: Xunlong OrangePi Zero 2W
+ items:
+ - const: xunlong,orangepi-zero2w
+ - const: allwinner,sun50i-h618
+
+ - description: Xunlong OrangePi Zero 3
+ items:
+ - const: xunlong,orangepi-zero3
+ - const: allwinner,sun50i-h618
+
+ - description: YuzukiHD Avaota A1
+ items:
+ - const: yuzukihd,avaota-a1
+ - const: allwinner,sun55i-t527
+
+ - description: YuzukiHD Chameleon
+ items:
+ - const: yuzukihd,chameleon
+ - const: allwinner,sun50i-h618
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml
new file mode 100644
index 000000000000..99566688d033
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun4i-a10-mbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Memory Bus (MBUS) controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The MBUS controller drives the MBUS that other devices in the SoC
+ will use to perform DMA. It also has a register interface that
+ allows to monitor and control the bandwidth and priorities for
+ masters on that bus.
+
+ Each device having to perform their DMA through the MBUS must have
+ the interconnects and interconnect-names properties set to the MBUS
+ controller and with "dma-mem" as the interconnect name.
+
+properties:
+ "#interconnect-cells":
+ const: 1
+ description:
+ The content of the cell is the MBUS ID.
+
+ compatible:
+ enum:
+ - allwinner,sun5i-a13-mbus
+ - allwinner,sun8i-a33-mbus
+ - allwinner,sun8i-a50-mbus
+ - allwinner,sun8i-a83t-mbus
+ - allwinner,sun8i-h3-mbus
+ - allwinner,sun8i-r40-mbus
+ - allwinner,sun8i-v3s-mbus
+ - allwinner,sun8i-v536-mbus
+ - allwinner,sun20i-d1-mbus
+ - allwinner,sun50i-a64-mbus
+ - allwinner,sun50i-a100-mbus
+ - allwinner,sun50i-h5-mbus
+ - allwinner,sun50i-h6-mbus
+ - allwinner,sun50i-h616-mbus
+ - allwinner,sun50i-r329-mbus
+
+ reg:
+ minItems: 1
+ items:
+ - description: MBUS interconnect/bandwidth limit/PMU registers
+ - description: DRAM controller/PHY registers
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: mbus
+ - const: dram
+
+ clocks:
+ minItems: 1
+ items:
+ - description: MBUS interconnect module clock
+ - description: DRAM controller/PHY module clock
+ - description: Register bus clock, shared by MBUS and DRAM
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: mbus
+ - const: dram
+ - const: bus
+
+ interrupts:
+ maxItems: 1
+ description:
+ MBUS PMU activity interrupt.
+
+ dma-ranges:
+ description:
+ See section 2.3.9 of the DeviceTree Specification.
+
+ '#address-cells': true
+
+ '#size-cells': true
+
+required:
+ - "#interconnect-cells"
+ - compatible
+ - reg
+ - clocks
+ - dma-ranges
+
+if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun5i-a13-mbus
+ - allwinner,sun8i-r40-mbus
+
+then:
+ properties:
+ reg:
+ minItems: 2
+
+ reg-names:
+ minItems: 2
+
+ clocks:
+ minItems: 3
+
+ clock-names:
+ minItems: 3
+
+ required:
+ - reg-names
+ - clock-names
+
+else:
+ properties:
+ reg:
+ maxItems: 1
+
+ reg-names:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun50i-a64-ccu.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ dram-controller@1c01000 {
+ compatible = "allwinner,sun5i-a13-mbus";
+ reg = <0x01c01000 0x1000>;
+ clocks = <&ccu CLK_MBUS>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ dma-ranges = <0x00000000 0x40000000 0x20000000>;
+ #interconnect-cells = <1>;
+ };
+
+ - |
+ dram-controller@1c62000 {
+ compatible = "allwinner,sun50i-a64-mbus";
+ reg = <0x01c62000 0x1000>,
+ <0x01c63000 0x1000>;
+ reg-names = "mbus", "dram";
+ clocks = <&ccu CLK_MBUS>,
+ <&ccu CLK_DRAM>,
+ <&ccu CLK_BUS_DRAM>;
+ clock-names = "mbus", "dram", "bus";
+ interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ dma-ranges = <0x00000000 0x40000000 0xc0000000>;
+ #interconnect-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml
new file mode 100644
index 000000000000..d805c4508b4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner CPU Configuration Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun6i-a31-cpuconfig
+ - allwinner,sun8i-a23-cpuconfig
+ - allwinner,sun8i-a83t-cpucfg
+ - allwinner,sun8i-a83t-r-cpucfg
+ - allwinner,sun9i-a80-cpucfg
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ cpucfg@1f01c00 {
+ compatible = "allwinner,sun6i-a31-cpuconfig";
+ reg = <0x01f01c00 0x300>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml
new file mode 100644
index 000000000000..644f391afb32
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun9i-a80-prcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 PRCM
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ const: allwinner,sun9i-a80-prcm
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ prcm@8001400 {
+ compatible = "allwinner,sun9i-a80-prcm";
+ reg = <0x08001400 0x200>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt b/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
deleted file mode 100644
index 082e6a9382d3..000000000000
--- a/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Allwinner SRAM for smp bringup:
-------------------------------------------------
-
-Allwinner's A80 SoC uses part of the secure sram for hotplugging of the
-primary core (cpu0). Once the core gets powered up it checks if a magic
-value is set at a specific location. If it is then the BROM will jump
-to the software entry address, instead of executing a standard boot.
-
-Therefore a reserved section sub-node has to be added to the mmio-sram
-declaration.
-
-Note that this is separate from the Allwinner SRAM controller found in
-../../sram/sunxi-sram.txt. This SRAM is secure only and not mappable to
-any device.
-
-Also there are no "secure-only" properties. The implementation should
-check if this SRAM is usable first.
-
-Required sub-node properties:
-- compatible : depending on the SoC this should be one of:
- "allwinner,sun9i-a80-smp-sram"
-
-The rest of the properties should follow the generic mmio-sram discription
-found in ../../misc/sram.txt
-
-Example:
-
- sram_b: sram@20000 {
- /* 256 KiB secure SRAM at 0x20000 */
- compatible = "mmio-sram";
- reg = <0x00020000 0x40000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x00020000 0x40000>;
-
- smp-sram@1000 {
- /*
- * This is checked by BROM to determine if
- * cpu0 should jump to SMP entry vector
- */
- compatible = "allwinner,sun9i-a80-smp-sram";
- reg = <0x1000 0x8>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/swir.txt b/Documentation/devicetree/bindings/arm/swir.txt
deleted file mode 100644
index 042be73a95d3..000000000000
--- a/Documentation/devicetree/bindings/arm/swir.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Sierra Wireless Modules device tree bindings
---------------------------------------------
-
-Supported Modules :
- - WP8548 : Includes MDM9615 and PM8018 in a module
-
-Sierra Wireless modules shall have the following properties :
- Required root node property
- - compatible: "swir,wp8548" for the WP8548 CF3 Module
-
-Board compatible values:
- - "swir,mangoh-green-wp8548" for the mangOH green board with the WP8548 module
diff --git a/Documentation/devicetree/bindings/arm/syna.txt b/Documentation/devicetree/bindings/arm/syna.txt
deleted file mode 100644
index 2face46a5f64..000000000000
--- a/Documentation/devicetree/bindings/arm/syna.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-Synaptics SoC Device Tree Bindings
-
-According to https://www.synaptics.com/company/news/conexant-marvell
-Synaptics has acquired the Multimedia Solutions Business of Marvell, so
-berlin SoCs are now Synaptics' SoCs now.
-
----------------------------------------------------------------
-
-Work in progress statement:
-
-Device tree files and bindings applying to Marvell Berlin SoCs and boards are
-considered "unstable". Any Marvell Berlin device tree binding may change at any
-time. Be sure to use a device tree binary and a kernel image generated from the
-same source tree.
-
-Please refer to Documentation/devicetree/bindings/ABI.txt for a definition of a
-stable binding/ABI.
-
----------------------------------------------------------------
-
-Boards with the Synaptics AS370 SoC shall have the following properties:
- Required root node property:
- compatible: "syna,as370"
-
-Boards with a SoC of the Marvell Berlin family, e.g. Armada 1500
-shall have the following properties:
-
-* Required root node properties:
-compatible: must contain "marvell,berlin"
-
-In addition, the above compatible shall be extended with the specific
-SoC and board used. Currently known SoC compatibles are:
- "marvell,berlin2" for Marvell Armada 1500 (BG2, 88DE3100),
- "marvell,berlin2cd" for Marvell Armada 1500-mini (BG2CD, 88DE3005)
- "marvell,berlin2ct" for Marvell Armada ? (BG2CT, 88DE????)
- "marvell,berlin2q" for Marvell Armada 1500-pro (BG2Q, 88DE3114)
- "marvell,berlin3" for Marvell Armada ? (BG3, 88DE????)
-
-* Example:
-
-/ {
- model = "Sony NSZ-GS7";
- compatible = "sony,nsz-gs7", "marvell,berlin2", "marvell,berlin";
-
- ...
-}
-
-* Marvell Berlin CPU control bindings
-
-CPU control register allows various operations on CPUs, like resetting them
-independently.
-
-Required properties:
-- compatible: should be "marvell,berlin-cpu-ctrl"
-- reg: address and length of the register set
-
-Example:
-
-cpu-ctrl@f7dd0000 {
- compatible = "marvell,berlin-cpu-ctrl";
- reg = <0xf7dd0000 0x10000>;
-};
-
-* Marvell Berlin2 chip control binding
-
-Marvell Berlin SoCs have a chip control register set providing several
-individual registers dealing with pinmux, padmux, clock, reset, and secondary
-CPU boot address. Unfortunately, the individual registers are spread among the
-chip control registers, so there should be a single DT node only providing the
-different functions which are described below.
-
-Required properties:
-- compatible:
- * the first and second values must be:
- "simple-mfd", "syscon"
-- reg: address and length of following register sets for
- BG2/BG2CD: chip control register set
- BG2Q: chip control register set and cpu pll registers
-
-* Marvell Berlin2 system control binding
-
-Marvell Berlin SoCs have a system control register set providing several
-individual registers dealing with pinmux, padmux, and reset.
-
-Required properties:
-- compatible:
- * the first and second values must be:
- "simple-mfd", "syscon"
-- reg: address and length of the system control register set
-
-Example:
-
-chip: chip-control@ea0000 {
- compatible = "simple-mfd", "syscon";
- reg = <0xea0000 0x400>;
-
- /* sub-device nodes */
-};
-
-sysctrl: system-controller@d000 {
- compatible = "simple-mfd", "syscon";
- reg = <0xd000 0x100>;
-
- /* sub-device nodes */
-};
diff --git a/Documentation/devicetree/bindings/arm/technologic.txt b/Documentation/devicetree/bindings/arm/technologic.txt
deleted file mode 100644
index f1cedc00dcab..000000000000
--- a/Documentation/devicetree/bindings/arm/technologic.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Technologic Systems Platforms Device Tree Bindings
---------------------------------------------------
-
-TS-4600 is a System-on-Module based on the Freescale i.MX28 System-on-Chip.
-It can be mounted on a carrier board providing additional peripheral connectors.
-Required root node properties:
- - compatible = "technologic,imx28-ts4600", "fsl,imx28"
-
-TS-4800 board
-Required root node properties:
- - compatible = "technologic,imx51-ts4800", "fsl,imx51";
-
-TS-4900 is a System-on-Module based on the Freescale i.MX6 System-on-Chip.
-It can be mounted on a carrier board providing additional peripheral connectors.
-Required root node properties:
- - compatible = "technologic,imx6dl-ts4900", "fsl,imx6dl"
- - compatible = "technologic,imx6q-ts4900", "fsl,imx6q"
-
-TS-7970 is a System-on-Module based on the Freescale i.MX6 System-on-Chip.
-It can be mounted on a carrier board providing additional peripheral connectors.
-Required root node properties:
- - compatible = "technologic,imx6dl-ts7970", "fsl,imx6dl"
- - compatible = "technologic,imx6q-ts7970", "fsl,imx6q"
diff --git a/Documentation/devicetree/bindings/arm/tegra.txt b/Documentation/devicetree/bindings/arm/tegra.txt
deleted file mode 100644
index c59b15f64346..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-NVIDIA Tegra device tree bindings
--------------------------------------------
-
-SoCs
--------------------------------------------
-
-Each device tree must specify which Tegra SoC it uses, using one of the
-following compatible values:
-
- nvidia,tegra20
- nvidia,tegra30
- nvidia,tegra114
- nvidia,tegra124
- nvidia,tegra132
- nvidia,tegra210
- nvidia,tegra186
- nvidia,tegra194
-
-Boards
--------------------------------------------
-
-Each device tree must specify which one or more of the following
-board-specific compatible values:
-
- ad,medcom-wide
- ad,plutux
- ad,tamonten
- ad,tec
- compal,paz00
- compulab,trimslice
- nvidia,beaver
- nvidia,cardhu
- nvidia,cardhu-a02
- nvidia,cardhu-a04
- nvidia,dalmore
- nvidia,harmony
- nvidia,jetson-tk1
- nvidia,norrin
- nvidia,p2371-0000
- nvidia,p2371-2180
- nvidia,p2571
- nvidia,p2771-0000
- nvidia,p2972-0000
- nvidia,roth
- nvidia,seaboard
- nvidia,tn7
- nvidia,ventana
- toradex,apalis_t30
- toradex,apalis_t30-eval
- toradex,apalis_t30-v1.1
- toradex,apalis_t30-v1.1-eval
- toradex,apalis-tk1
- toradex,apalis-tk1-eval
- toradex,apalis-tk1-v1.2
- toradex,apalis-tk1-v1.2-eval
- toradex,colibri_t20
- toradex,colibri_t20-eval-v3
- toradex,colibri_t20-iris
- toradex,colibri_t30
- toradex,colibri_t30-eval-v3
-
-Trusted Foundations
--------------------------------------------
-Tegra supports the Trusted Foundation secure monitor. See the
-"tlm,trusted-foundations" binding's documentation for more details.
diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml
new file mode 100644
index 000000000000..6139407c2cbf
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra.yaml
@@ -0,0 +1,267 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra
+
+maintainers:
+ - Thierry Reding <thierry.reding@gmail.com>
+ - Jonathan Hunter <jonathanh@nvidia.com>
+
+properties:
+ $nodename:
+ const: "/"
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - compal,paz00
+ - compulab,trimslice
+ - nvidia,harmony
+ - nvidia,seaboard
+ - nvidia,ventana
+ - const: nvidia,tegra20
+ - items:
+ - enum:
+ - ad,medcom-wide
+ - ad,plutux
+ - ad,tec
+ - const: ad,tamonten
+ - const: nvidia,tegra20
+ - items:
+ - enum:
+ - toradex,colibri_t20-eval-v3
+ - toradex,colibri_t20-iris
+ - const: toradex,colibri_t20
+ - const: nvidia,tegra20
+ - description: ASUS Transformers T20 Device family
+ items:
+ - enum:
+ - asus,sl101
+ - asus,tf101
+ - asus,tf101g
+ - const: nvidia,tegra20
+ - items:
+ - const: acer,picasso
+ - const: nvidia,tegra20
+ - items:
+ - enum:
+ - nvidia,beaver
+ - const: nvidia,tegra30
+ - items:
+ - enum:
+ - nvidia,cardhu-a02
+ - nvidia,cardhu-a04
+ - const: nvidia,cardhu
+ - const: nvidia,tegra30
+ - description: ASUS Portable AiO P1801-T
+ items:
+ - const: asus,p1801-t
+ - const: nvidia,tegra30
+ - description: ASUS Transformers Device family
+ items:
+ - enum:
+ - asus,tf201
+ - asus,tf300t
+ - asus,tf300tg
+ - asus,tf300tl
+ - asus,tf700t
+ - const: nvidia,tegra30
+ - description: Asus VivoTab RT
+ items:
+ - const: asus,tf600t
+ - const: nvidia,tegra30
+ - description: LG Optimus 4X P880
+ items:
+ - const: lg,p880
+ - const: nvidia,tegra30
+ - description: LG Optimus Vu P895
+ items:
+ - const: lg,p895
+ - const: nvidia,tegra30
+ - items:
+ - const: toradex,apalis_t30-eval
+ - const: toradex,apalis_t30
+ - const: nvidia,tegra30
+ - items:
+ - const: toradex,apalis_t30-v1.1-eval
+ - const: toradex,apalis_t30-eval
+ - const: toradex,apalis_t30-v1.1
+ - const: toradex,apalis_t30
+ - const: nvidia,tegra30
+ - items:
+ - enum:
+ - toradex,colibri_t30-eval-v3
+ - const: toradex,colibri_t30
+ - const: nvidia,tegra30
+ - items:
+ - const: asus,grouper
+ - const: nvidia,tegra30
+ - items:
+ - const: asus,tilapia
+ - const: asus,grouper
+ - const: nvidia,tegra30
+ - items:
+ - const: ouya,ouya
+ - const: nvidia,tegra30
+ - items:
+ - const: pegatron,chagall
+ - const: nvidia,tegra30
+ - items:
+ - enum:
+ - asus,tf701t
+ - nvidia,dalmore
+ - nvidia,roth
+ - nvidia,tn7
+ - const: nvidia,tegra114
+ - items:
+ - enum:
+ - nvidia,jetson-tk1
+ - nvidia,venice2
+ - const: nvidia,tegra124
+ - items:
+ - const: toradex,apalis-tk1-eval
+ - const: toradex,apalis-tk1
+ - const: nvidia,tegra124
+ - items:
+ - const: toradex,apalis-tk1-v1.2-eval
+ - const: toradex,apalis-tk1-eval
+ - const: toradex,apalis-tk1-v1.2
+ - const: toradex,apalis-tk1
+ - const: nvidia,tegra124
+ - items:
+ - enum:
+ - nvidia,norrin
+ - const: nvidia,tegra132
+ - const: nvidia,tegra124
+ - items:
+ - const: google,nyan-blaze-rev10
+ - const: google,nyan-blaze-rev9
+ - const: google,nyan-blaze-rev8
+ - const: google,nyan-blaze-rev7
+ - const: google,nyan-blaze-rev6
+ - const: google,nyan-blaze-rev5
+ - const: google,nyan-blaze-rev4
+ - const: google,nyan-blaze-rev3
+ - const: google,nyan-blaze-rev2
+ - const: google,nyan-blaze-rev1
+ - const: google,nyan-blaze-rev0
+ - const: google,nyan-blaze
+ - const: google,nyan
+ - const: nvidia,tegra124
+ - items:
+ - const: google,nyan-big-rev10
+ - const: google,nyan-big-rev9
+ - const: google,nyan-big-rev8
+ - const: google,nyan-big-rev7
+ - const: google,nyan-big-rev6
+ - const: google,nyan-big-rev5
+ - const: google,nyan-big-rev4
+ - const: google,nyan-big-rev3
+ - const: google,nyan-big-rev2
+ - const: google,nyan-big-rev1
+ - const: google,nyan-big-rev0
+ - const: google,nyan-big
+ - const: google,nyan
+ - const: nvidia,tegra124
+ - items:
+ - const: google,nyan-big-rev7
+ - const: google,nyan-big-rev6
+ - const: google,nyan-big-rev5
+ - const: google,nyan-big-rev4
+ - const: google,nyan-big-rev3
+ - const: google,nyan-big-rev2
+ - const: google,nyan-big-rev1
+ - const: google,nyan-big-rev0
+ - const: google,nyan-big
+ - const: google,nyan
+ - const: nvidia,tegra124
+ - description: Xiaomi Mi Pad (A0101)
+ items:
+ - const: xiaomi,mocha
+ - const: nvidia,tegra124
+ - items:
+ - enum:
+ - nvidia,darcy
+ - nvidia,p2371-0000
+ - nvidia,p2371-2180
+ - nvidia,p2571
+ - nvidia,p2894-0050-a08
+ - const: nvidia,tegra210
+ - description: Jetson TX2 Developer Kit
+ items:
+ - const: nvidia,p2771-0000
+ - const: nvidia,tegra186
+ - description: Jetson TX2 NX Developer Kit
+ items:
+ - const: nvidia,p3509-0000+p3636-0001
+ - const: nvidia,tegra186
+ - description: Jetson AGX Xavier Developer Kit
+ items:
+ - const: nvidia,p2972-0000
+ - const: nvidia,tegra194
+ - description: Jetson Xavier NX
+ items:
+ - const: nvidia,p3668-0000
+ - const: nvidia,tegra194
+ - description: Jetson Xavier NX (eMMC)
+ items:
+ - const: nvidia,p3668-0001
+ - const: nvidia,tegra194
+ - description: Jetson Xavier NX Developer Kit
+ items:
+ - const: nvidia,p3509-0000+p3668-0000
+ - const: nvidia,tegra194
+ - description: Jetson Xavier NX Developer Kit (eMMC)
+ items:
+ - const: nvidia,p3509-0000+p3668-0001
+ - const: nvidia,tegra194
+ - items:
+ - const: nvidia,tegra234-vdk
+ - const: nvidia,tegra234
+ - description: Jetson AGX Orin
+ items:
+ - const: nvidia,p3701-0000
+ - const: nvidia,tegra234
+ - description: Jetson AGX Orin Developer Kit
+ items:
+ - const: nvidia,p3737-0000+p3701-0000
+ - const: nvidia,p3701-0000
+ - const: nvidia,tegra234
+ - description: Jetson AGX Orin Developer Kit with Industrial Module
+ items:
+ - const: nvidia,p3737-0000+p3701-0008
+ - const: nvidia,p3701-0008
+ - const: nvidia,tegra234
+ - description: NVIDIA IGX Orin Development Kit
+ items:
+ - const: nvidia,p3740-0002+p3701-0008
+ - const: nvidia,p3701-0008
+ - const: nvidia,tegra234
+ - description: Jetson Orin NX
+ items:
+ - const: nvidia,p3767-0000
+ - const: nvidia,tegra234
+ - description: Jetson Orin NX Engineering Reference Developer Kit
+ items:
+ - const: nvidia,p3768-0000+p3767-0000
+ - const: nvidia,p3767-0000
+ - const: nvidia,tegra234
+ - description: Jetson Orin Nano
+ items:
+ - const: nvidia,p3767-0005
+ - const: nvidia,tegra234
+ - description: Jetson Orin Nano Developer Kit
+ items:
+ - const: nvidia,p3768-0000+p3767-0005
+ - const: nvidia,p3767-0005
+ - const: nvidia,tegra234
+ - description: NVIDIA P3971-0089+P3834-0008 Engineering Reference Platform
+ items:
+ - const: nvidia,p3971-0089+p3834-0008
+ - const: nvidia,p3834-0008
+ - const: nvidia,tegra264
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,nvec.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,nvec.txt
deleted file mode 100644
index 5ae601e7f51f..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,nvec.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-NVIDIA compliant embedded controller
-
-Required properties:
-- compatible : should be "nvidia,nvec".
-- reg : the iomem of the i2c slave controller
-- interrupts : the interrupt line of the i2c slave controller
-- clock-frequency : the frequency of the i2c bus
-- gpios : the gpio used for ec request
-- slave-addr: the i2c address of the slave controller
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- Tegra20/Tegra30:
- - div-clk
- - fast-clk
- Tegra114:
- - div-clk
-- resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
- - i2c
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
new file mode 100644
index 000000000000..36dbd0838f2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra CPU COMPLEX CLUSTER area
+
+maintainers:
+ - Sumit Gupta <sumitg@nvidia.com>
+ - Mikko Perttunen <mperttunen@nvidia.com>
+ - Jon Hunter <jonathanh@nvidia.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description: |+
+ The Tegra CPU COMPLEX CLUSTER area contains memory-mapped
+ registers that initiate CPU frequency/voltage transitions.
+
+properties:
+ $nodename:
+ pattern: "ccplex@([0-9a-f]+)$"
+
+ compatible:
+ enum:
+ - nvidia,tegra186-ccplex-cluster
+ - nvidia,tegra234-ccplex-cluster
+
+ reg:
+ maxItems: 1
+
+ nvidia,bpmp:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Specifies the BPMP node that needs to be queried to get
+ operating point data for all CPUs.
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - nvidia,bpmp
+
+examples:
+ - |
+ ccplex@e000000 {
+ compatible = "nvidia,tegra234-ccplex-cluster";
+ reg = <0x0e000000 0x5ffff>;
+ nvidia,bpmp = <&bpmp>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
deleted file mode 100644
index c9fd6d1de57e..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt
+++ /dev/null
@@ -1,129 +0,0 @@
-NVIDIA Tegra Power Management Controller (PMC)
-
-Required properties:
-- compatible: Should contain one of the following:
- - "nvidia,tegra186-pmc": for Tegra186
- - "nvidia,tegra194-pmc": for Tegra194
-- reg: Must contain an (offset, length) pair of the register set for each
- entry in reg-names.
-- reg-names: Must include the following entries:
- - "pmc"
- - "wake"
- - "aotag"
- - "scratch"
- - "misc" (Only for Tegra194)
-
-Optional properties:
-- nvidia,invert-interrupt: If present, inverts the PMU interrupt signal.
-
-Example:
-
-SoC DTSI:
-
- pmc@c3600000 {
- compatible = "nvidia,tegra186-pmc";
- reg = <0 0x0c360000 0 0x10000>,
- <0 0x0c370000 0 0x10000>,
- <0 0x0c380000 0 0x10000>,
- <0 0x0c390000 0 0x10000>;
- reg-names = "pmc", "wake", "aotag", "scratch";
- };
-
-Board DTS:
-
- pmc@c360000 {
- nvidia,invert-interrupt;
- };
-
-== Pad Control ==
-
-On Tegra SoCs a pad is a set of pins which are configured as a group.
-The pin grouping is a fixed attribute of the hardware. The PMC can be
-used to set pad power state and signaling voltage. A pad can be either
-in active or power down mode. The support for power state and signaling
-voltage configuration varies depending on the pad in question. 3.3 V and
-1.8 V signaling voltages are supported on pins where software
-controllable signaling voltage switching is available.
-
-Pad configurations are described with pin configuration nodes which
-are placed under the pmc node and they are referred to by the pinctrl
-client properties. For more information see
-Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt.
-
-The following pads are present on Tegra186:
-csia csib dsi mipi-bias
-pex-clk-bias pex-clk3 pex-clk2 pex-clk1
-usb0 usb1 usb2 usb-bias
-uart audio hsic dbg
-hdmi-dp0 hdmi-dp1 pex-cntrl sdmmc2-hv
-sdmmc4 cam dsib dsic
-dsid csic csid csie
-dsif spi ufs dmic-hv
-edp sdmmc1-hv sdmmc3-hv conn
-audio-hv ao-hv
-
-Required pin configuration properties:
- - pins: A list of strings, each of which contains the name of a pad
- to be configured.
-
-Optional pin configuration properties:
- - low-power-enable: Configure the pad into power down mode
- - low-power-disable: Configure the pad into active mode
- - power-source: Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 or
- TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages.
- The values are defined in
- include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h.
-
-Note: The power state can be configured on all of the above pads except
- for ao-hv. Following pads have software configurable signaling
- voltages: sdmmc2-hv, dmic-hv, sdmmc1-hv, sdmmc3-hv, audio-hv,
- ao-hv.
-
-Pad configuration state example:
- pmc: pmc@7000e400 {
- compatible = "nvidia,tegra186-pmc";
- reg = <0 0x0c360000 0 0x10000>,
- <0 0x0c370000 0 0x10000>,
- <0 0x0c380000 0 0x10000>,
- <0 0x0c390000 0 0x10000>;
- reg-names = "pmc", "wake", "aotag", "scratch";
-
- ...
-
- sdmmc1_3v3: sdmmc1-3v3 {
- pins = "sdmmc1-hv";
- power-source = <TEGRA_IO_PAD_VOLTAGE_3V3>;
- };
-
- sdmmc1_1v8: sdmmc1-1v8 {
- pins = "sdmmc1-hv";
- power-source = <TEGRA_IO_PAD_VOLTAGE_1V8>;
- };
-
- hdmi_off: hdmi-off {
- pins = "hdmi";
- low-power-enable;
- }
-
- hdmi_on: hdmi-on {
- pins = "hdmi";
- low-power-disable;
- }
- };
-
-Pinctrl client example:
- sdmmc1: sdhci@3400000 {
- ...
- pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
- pinctrl-0 = <&sdmmc1_3v3>;
- pinctrl-1 = <&sdmmc1_1v8>;
- };
-
- ...
-
- sor0: sor@15540000 {
- ...
- pinctrl-0 = <&hdmi_off>;
- pinctrl-1 = <&hdmi_on>;
- pinctrl-names = "hdmi-on", "hdmi-off";
- };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml
new file mode 100644
index 000000000000..be70819020c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra186-pmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra Power Management Controller (PMC)
+
+maintainers:
+ - Thierry Reding <thierry.reding@gmail.com>
+ - Jon Hunter <jonathanh@nvidia.com>
+
+properties:
+ compatible:
+ enum:
+ - nvidia,tegra186-pmc
+ - nvidia,tegra194-pmc
+ - nvidia,tegra234-pmc
+ - nvidia,tegra264-pmc
+
+ reg:
+ minItems: 4
+ maxItems: 5
+
+ reg-names:
+ minItems: 4
+ items:
+ - const: pmc
+ - const: wake
+ - const: aotag
+ - enum: [ scratch, misc ]
+ - const: misc
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ description: Specifies the number of cells needed to encode an
+ interrupt source. The value must be 2.
+ const: 2
+
+ nvidia,invert-interrupt:
+ description: If present, inverts the PMU interrupt signal.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra186-pmc
+ then:
+ properties:
+ reg:
+ maxItems: 4
+ reg-names:
+ maxItems: 4
+ contains:
+ const: scratch
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra194-pmc
+ then:
+ properties:
+ reg:
+ minItems: 5
+ reg-names:
+ minItems: 5
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra234-pmc
+ then:
+ properties:
+ reg-names:
+ contains:
+ const: misc
+
+patternProperties:
+ "^[a-z0-9]+-[a-z0-9]+$":
+ if:
+ type: object
+ then:
+ description: |
+ These are pad configuration nodes. On Tegra SoCs a pad is a set of
+ pins which are configured as a group. The pin grouping is a fixed
+ attribute of the hardware. The PMC can be used to set pad power
+ state and signaling voltage. A pad can be either in active or
+ power down mode. The support for power state and signaling voltage
+ configuration varies depending on the pad in question. 3.3 V and
+ 1.8 V signaling voltages are supported on pins where software
+ controllable signaling voltage switching is available.
+
+ Pad configurations are described with pin configuration nodes
+ which are placed under the pmc node and they are referred to by
+ the pinctrl client properties. For more information see
+
+ Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+ The following pads are present on Tegra186:
+
+ csia, csib, dsi, mipi-bias, pex-clk-bias, pex-clk3, pex-clk2,
+ pex-clk1, usb0, usb1, usb2, usb-bias, uart, audio, hsic, dbg,
+ hdmi-dp0, hdmi-dp1, pex-cntrl, sdmmc2-hv, sdmmc4, cam, dsib,
+ dsic, dsid, csic, csid, csie, dsif, spi, ufs, dmic-hv, edp,
+ sdmmc1-hv, sdmmc3-hv, conn, audio-hv, ao-hv
+
+ The following pads are present on Tegra194:
+
+ csia, csib, mipi-bias, pex-clk-bias, pex-clk3, pex-clk2,
+ pex-clk1, eqos, pex-clk-2-bias, pex-clk-2, dap3, dap5, uart,
+ pwr-ctl, soc-gpio53, audio, gp-pwm2, gp-pwm3, soc-gpio12,
+ soc-gpio13, soc-gpio10, uart4, uart5, dbg, hdmi-dp3, hdmi-dp2,
+ hdmi-dp0, hdmi-dp1, pex-cntrl, pex-ctl2, pex-l0-rst,
+ pex-l1-rst, sdmmc4, pex-l5-rst, cam, csic, csid, csie, csif,
+ spi, ufs, csig, csih, edp, sdmmc1-hv, sdmmc3-hv, conn,
+ audio-hv, ao-hv
+
+ properties:
+ pins:
+ $ref: /schemas/types.yaml#/definitions/string
+ description: Must contain the name of the pad(s) to be
+ configured.
+
+ low-power-enable:
+ description: Configure the pad into power down mode.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ low-power-disable:
+ description: Configure the pad into active mode.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ power-source:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 or
+ TEGRA_IO_PAD_VOLTAGE_3V3 to select between signalling
+ voltages.
+
+ The values are defined in
+
+ include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h
+
+ The power state can be configured on all of the above pads
+ except for ao-hv. Following pads have software configurable
+ signaling voltages: sdmmc2-hv, dmic-hv, sdmmc1-hv, sdmmc3-hv,
+ audio-hv, ao-hv.
+
+ phandle: true
+
+ required:
+ - pins
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+dependencies:
+ interrupt-controller: ['#interrupt-cells']
+ "#interrupt-cells":
+ required:
+ - interrupt-controller
+
+examples:
+ - |
+ #include <dt-bindings/clock/tegra186-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/pinctrl/pinctrl-tegra-io-pad.h>
+ #include <dt-bindings/memory/tegra186-mc.h>
+ #include <dt-bindings/reset/tegra186-reset.h>
+
+ pmc@c3600000 {
+ compatible = "nvidia,tegra186-pmc";
+ reg = <0x0c360000 0x10000>,
+ <0x0c370000 0x10000>,
+ <0x0c380000 0x10000>,
+ <0x0c390000 0x10000>;
+ reg-names = "pmc", "wake", "aotag", "scratch";
+ nvidia,invert-interrupt;
+
+ sdmmc1_3v3: sdmmc1-3v3 {
+ pins = "sdmmc1-hv";
+ power-source = <TEGRA_IO_PAD_VOLTAGE_3V3>;
+ };
+
+ sdmmc1_1v8: sdmmc1-1v8 {
+ pins = "sdmmc1-hv";
+ power-source = <TEGRA_IO_PAD_VOLTAGE_1V8>;
+ };
+ };
+
+ sdmmc1: mmc@3400000 {
+ compatible = "nvidia,tegra186-sdhci";
+ reg = <0x03400000 0x10000>;
+ interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&bpmp TEGRA186_CLK_SDMMC1>,
+ <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>;
+ clock-names = "sdhci", "tmclk";
+ resets = <&bpmp TEGRA186_RESET_SDMMC1>;
+ reset-names = "sdhci";
+ interconnects = <&mc TEGRA186_MEMORY_CLIENT_SDMMCRA &emc>,
+ <&mc TEGRA186_MEMORY_CLIENT_SDMMCWA &emc>;
+ interconnect-names = "dma-mem", "write";
+ iommus = <&smmu TEGRA186_SID_SDMMC1>;
+ pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
+ pinctrl-0 = <&sdmmc1_3v3>;
+ pinctrl-1 = <&sdmmc1_1v8>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml
new file mode 100644
index 000000000000..5e0f1dc542b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-axi2apb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra194 AXI2APB bridge
+
+maintainers:
+ - Sumit Gupta <sumitg@nvidia.com>
+
+properties:
+ $nodename:
+ pattern: "^axi2apb@([0-9a-f]+)$"
+
+ compatible:
+ enum:
+ - nvidia,tegra194-axi2apb
+
+ reg:
+ maxItems: 6
+ description: Physical base address and length of registers for all bridges
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+
+examples:
+ - |
+ axi2apb: axi2apb@2390000 {
+ compatible = "nvidia,tegra194-axi2apb";
+ reg = <0x02390000 0x1000>,
+ <0x023a0000 0x1000>,
+ <0x023b0000 0x1000>,
+ <0x023c0000 0x1000>,
+ <0x023d0000 0x1000>,
+ <0x023e0000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
new file mode 100644
index 000000000000..d9c54c32c6b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra194 CBB 1.0
+
+maintainers:
+ - Sumit Gupta <sumitg@nvidia.com>
+
+description: |+
+ The Control Backbone (CBB) is comprised of the physical path from an
+ initiator to a target's register configuration space. CBB 1.0 has
+ multiple hierarchical sub-NOCs (Network-on-Chip) and connects various
+ initiators and targets using different bridges like AXIP2P, AXI2APB.
+
+ This driver handles errors due to illegal register accesses reported
+ by the NOCs inside the CBB. NOCs reporting errors are cluster NOCs
+ "AON-NOC, SCE-NOC, RCE-NOC, BPMP-NOC, CV-NOC" and "CBB Central NOC"
+ which is the main NOC.
+
+ By default, the access issuing initiator is informed about the error
+ using SError or Data Abort exception unless the ERD (Error Response
+ Disable) is enabled/set for that initiator. If the ERD is enabled, then
+ SError or Data Abort is masked and the error is reported with interrupt.
+
+ - For CCPLEX (CPU Complex) initiator, the driver sets ERD bit. So, the
+ errors due to illegal accesses from CCPLEX are reported by interrupts.
+ If ERD is not set, then error is reported by SError.
+ - For other initiators, the ERD is disabled. So, the access issuing
+ initiator is informed about the illegal access by Data Abort exception.
+ In addition, an interrupt is also generated to CCPLEX. These initiators
+ include all engines using Cortex-R5 (which is ARMv7 CPU cluster) and
+ engines like TSEC (Security co-processor), NVDEC (NVIDIA Video Decoder
+ engine) etc which can initiate transactions.
+
+ The driver prints relevant debug information like Error Code, Error
+ Description, Master, Address, AXI ID, Cache, Protection, Security Group
+ etc on receiving error notification.
+
+properties:
+ $nodename:
+ pattern: "^[a-z]+-noc@[0-9a-f]+$"
+
+ compatible:
+ enum:
+ - nvidia,tegra194-cbb-noc
+ - nvidia,tegra194-aon-noc
+ - nvidia,tegra194-bpmp-noc
+ - nvidia,tegra194-rce-noc
+ - nvidia,tegra194-sce-noc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description:
+ CCPLEX receives secure or nonsecure interrupt depending on error type.
+ A secure interrupt is received for SEC(firewall) & SLV errors and a
+ non-secure interrupt is received for TMO & DEC errors.
+ items:
+ - description: non-secure interrupt
+ - description: secure interrupt
+
+ nvidia,axi2apb:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies the node having all axi2apb bridges which need to be checked
+ for any error logged in their status register.
+
+ nvidia,apbmisc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Specifies the apbmisc node which need to be used for reading the ERD
+ register.
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - nvidia,apbmisc
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ cbb-noc@2300000 {
+ compatible = "nvidia,tegra194-cbb-noc";
+ reg = <0x02300000 0x1000>;
+ interrupts = <GIC_SPI 230 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 231 IRQ_TYPE_LEVEL_HIGH>;
+ nvidia,axi2apb = <&axi2apb>;
+ nvidia,apbmisc = <&apbmisc>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt
deleted file mode 100644
index 9a4295b54539..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-NVIDIA Tegra AHB
-
-Required properties:
-- compatible : For Tegra20, must contain "nvidia,tegra20-ahb". For
- Tegra30, must contain "nvidia,tegra30-ahb". Otherwise, must contain
- '"nvidia,<chip>-ahb", "nvidia,tegra30-ahb"' where <chip> is tegra124,
- tegra132, or tegra210.
-- reg : Should contain 1 register ranges(address and length). For
- Tegra20, Tegra30, and Tegra114 chips, the value must be <0x6000c004
- 0x10c>. For Tegra124, Tegra132 and Tegra210 chips, the value should
- be be <0x6000c000 0x150>.
-
-Example (for a Tegra20 chip):
- ahb: ahb@6000c004 {
- compatible = "nvidia,tegra20-ahb";
- reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */
- };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt
deleted file mode 100644
index 4c33b29dc660..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-Embedded Memory Controller
-
-Properties:
-- name : Should be emc
-- #address-cells : Should be 1
-- #size-cells : Should be 0
-- compatible : Should contain "nvidia,tegra20-emc".
-- reg : Offset and length of the register set for the device
-- nvidia,use-ram-code : If present, the sub-nodes will be addressed
- and chosen using the ramcode board selector. If omitted, only one
- set of tables can be present and said tables will be used
- irrespective of ram-code configuration.
-
-Child device nodes describe the memory settings for different configurations and clock rates.
-
-Example:
-
- memory-controller@7000f400 {
- #address-cells = < 1 >;
- #size-cells = < 0 >;
- compatible = "nvidia,tegra20-emc";
- reg = <0x7000f4000 0x200>;
- }
-
-
-Embedded Memory Controller ram-code table
-
-If the emc node has the nvidia,use-ram-code property present, then the
-next level of nodes below the emc table are used to specify which settings
-apply for which ram-code settings.
-
-If the emc node lacks the nvidia,use-ram-code property, this level is omitted
-and the tables are stored directly under the emc node (see below).
-
-Properties:
-
-- name : Should be emc-tables
-- nvidia,ram-code : the binary representation of the ram-code board strappings
- for which this node (and children) are valid.
-
-
-
-Embedded Memory Controller configuration table
-
-This is a table containing the EMC register settings for the various
-operating speeds of the memory controller. They are always located as
-subnodes of the emc controller node.
-
-There are two ways of specifying which tables to use:
-
-* The simplest is if there is just one set of tables in the device tree,
- and they will always be used (based on which frequency is used).
- This is the preferred method, especially when firmware can fill in
- this information based on the specific system information and just
- pass it on to the kernel.
-
-* The slightly more complex one is when more than one memory configuration
- might exist on the system. The Tegra20 platform handles this during
- early boot by selecting one out of possible 4 memory settings based
- on a 2-pin "ram code" bootstrap setting on the board. The values of
- these strappings can be read through a register in the SoC, and thus
- used to select which tables to use.
-
-Properties:
-- name : Should be emc-table
-- compatible : Should contain "nvidia,tegra20-emc-table".
-- reg : either an opaque enumerator to tell different tables apart, or
- the valid frequency for which the table should be used (in kHz).
-- clock-frequency : the clock frequency for the EMC at which this
- table should be used (in kHz).
-- nvidia,emc-registers : a 46 word array of EMC registers to be programmed
- for operation at the 'clock-frequency' setting.
- The order and contents of the registers are:
- RC, RFC, RAS, RP, R2W, W2R, R2P, W2P, RD_RCD, WR_RCD, RRD, REXT,
- WDV, QUSE, QRST, QSAFE, RDV, REFRESH, BURST_REFRESH_NUM, PDEX2WR,
- PDEX2RD, PCHG2PDEN, ACT2PDEN, AR2PDEN, RW2PDEN, TXSR, TCKE, TFAW,
- TRPAB, TCLKSTABLE, TCLKSTOP, TREFBW, QUSE_EXTRA, FBIO_CFG6, ODT_WRITE,
- ODT_READ, FBIO_CFG5, CFG_DIG_DLL, DLL_XFORM_DQS, DLL_XFORM_QUSE,
- ZCAL_REF_CNT, ZCAL_WAIT_CNT, AUTO_CAL_INTERVAL, CFG_CLKTRIM_0,
- CFG_CLKTRIM_1, CFG_CLKTRIM_2
-
- emc-table@166000 {
- reg = <166000>;
- compatible = "nvidia,tegra20-emc-table";
- clock-frequency = < 166000 >;
- nvidia,emc-registers = < 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 >;
- };
-
- emc-table@333000 {
- reg = <333000>;
- compatible = "nvidia,tegra20-emc-table";
- clock-frequency = < 333000 >;
- nvidia,emc-registers = < 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 >;
- };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt
deleted file mode 100644
index a855c1bffc0f..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-NVIDIA Tegra Flow Controller
-
-Required properties:
-- compatible: Should contain one of the following:
- - "nvidia,tegra20-flowctrl": for Tegra20
- - "nvidia,tegra30-flowctrl": for Tegra30
- - "nvidia,tegra114-flowctrl": for Tegra114
- - "nvidia,tegra124-flowctrl": for Tegra124
- - "nvidia,tegra132-flowctrl", "nvidia,tegra124-flowctrl": for Tegra132
- - "nvidia,tegra210-flowctrl": for Tegra210
-- reg: Should contain one register range (address and length)
-
-Example:
-
- flow-controller@60007000 {
- compatible = "nvidia,tegra20-flowctrl";
- reg = <0x60007000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
deleted file mode 100644
index cb12f33a247f..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
+++ /dev/null
@@ -1,300 +0,0 @@
-NVIDIA Tegra Power Management Controller (PMC)
-
-== Power Management Controller Node ==
-
-The PMC block interacts with an external Power Management Unit. The PMC
-mostly controls the entry and exit of the system from different sleep
-modes. It provides power-gating controllers for SoC and CPU power-islands.
-
-Required properties:
-- name : Should be pmc
-- compatible : Should contain one of the following:
- For Tegra20 must contain "nvidia,tegra20-pmc".
- For Tegra30 must contain "nvidia,tegra30-pmc".
- For Tegra114 must contain "nvidia,tegra114-pmc"
- For Tegra124 must contain "nvidia,tegra124-pmc"
- For Tegra132 must contain "nvidia,tegra124-pmc"
- For Tegra210 must contain "nvidia,tegra210-pmc"
-- reg : Offset and length of the register set for the device
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- "pclk" (The Tegra clock of that name),
- "clk32k_in" (The 32KHz clock input to Tegra).
-
-Optional properties:
-- nvidia,invert-interrupt : If present, inverts the PMU interrupt signal.
- The PMU is an external Power Management Unit, whose interrupt output
- signal is fed into the PMC. This signal is optionally inverted, and then
- fed into the ARM GIC. The PMC is not involved in the detection or
- handling of this interrupt signal, merely its inversion.
-- nvidia,suspend-mode : The suspend mode that the platform should use.
- Valid values are 0, 1 and 2:
- 0 (LP0): CPU + Core voltage off and DRAM in self-refresh
- 1 (LP1): CPU voltage off and DRAM in self-refresh
- 2 (LP2): CPU voltage off
-- nvidia,core-power-req-active-high : Boolean, core power request active-high
-- nvidia,sys-clock-req-active-high : Boolean, system clock request active-high
-- nvidia,combined-power-req : Boolean, combined power request for CPU & Core
-- nvidia,cpu-pwr-good-en : Boolean, CPU power good signal (from PMIC to PMC)
- is enabled.
-
-Required properties when nvidia,suspend-mode is specified:
-- nvidia,cpu-pwr-good-time : CPU power good time in uS.
-- nvidia,cpu-pwr-off-time : CPU power off time in uS.
-- nvidia,core-pwr-good-time : <Oscillator-stable-time Power-stable-time>
- Core power good time in uS.
-- nvidia,core-pwr-off-time : Core power off time in uS.
-
-Required properties when nvidia,suspend-mode=<0>:
-- nvidia,lp0-vec : <start length> Starting address and length of LP0 vector
- The LP0 vector contains the warm boot code that is executed by AVP when
- resuming from the LP0 state. The AVP (Audio-Video Processor) is an ARM7
- processor and always being the first boot processor when chip is power on
- or resume from deep sleep mode. When the system is resumed from the deep
- sleep mode, the warm boot code will restore some PLLs, clocks and then
- bring up CPU0 for resuming the system.
-
-Hardware-triggered thermal reset:
-On Tegra30, Tegra114 and Tegra124, if the 'i2c-thermtrip' subnode exists,
-hardware-triggered thermal reset will be enabled.
-
-Required properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'):
-- nvidia,i2c-controller-id : ID of I2C controller to send poweroff command to. Valid values are
- described in section 9.2.148 "APBDEV_PMC_SCRATCH53_0" of the
- Tegra K1 Technical Reference Manual.
-- nvidia,bus-addr : Bus address of the PMU on the I2C bus
-- nvidia,reg-addr : I2C register address to write poweroff command to
-- nvidia,reg-data : Poweroff command to write to PMU
-
-Optional properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'):
-- nvidia,pinmux-id : Pinmux used by the hardware when issuing poweroff command.
- Defaults to 0. Valid values are described in section 12.5.2
- "Pinmux Support" of the Tegra4 Technical Reference Manual.
-
-Optional nodes:
-- powergates : This node contains a hierarchy of power domain nodes, which
- should match the powergates on the Tegra SoC. See "Powergate
- Nodes" below.
-
-Example:
-
-/ SoC dts including file
-pmc@7000f400 {
- compatible = "nvidia,tegra20-pmc";
- reg = <0x7000e400 0x400>;
- clocks = <&tegra_car 110>, <&clk32k_in>;
- clock-names = "pclk", "clk32k_in";
- nvidia,invert-interrupt;
- nvidia,suspend-mode = <1>;
- nvidia,cpu-pwr-good-time = <2000>;
- nvidia,cpu-pwr-off-time = <100>;
- nvidia,core-pwr-good-time = <3845 3845>;
- nvidia,core-pwr-off-time = <458>;
- nvidia,core-power-req-active-high;
- nvidia,sys-clock-req-active-high;
- nvidia,lp0-vec = <0xbdffd000 0x2000>;
-};
-
-/ Tegra board dts file
-{
- ...
- pmc@7000f400 {
- i2c-thermtrip {
- nvidia,i2c-controller-id = <4>;
- nvidia,bus-addr = <0x40>;
- nvidia,reg-addr = <0x36>;
- nvidia,reg-data = <0x2>;
- };
- };
- ...
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- clk32k_in: clock {
- compatible = "fixed-clock";
- reg=<0>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
- ...
-};
-
-
-== Powergate Nodes ==
-
-Each of the powergate nodes represents a power-domain on the Tegra SoC
-that can be power-gated by the Tegra PMC. The name of the powergate node
-should be one of the below. Note that not every powergate is applicable
-to all Tegra devices and the following list shows which powergates are
-applicable to which devices. Please refer to the Tegra TRM for more
-details on the various powergates.
-
- Name Description Devices Applicable
- 3d 3D Graphics Tegra20/114/124/210
- 3d0 3D Graphics 0 Tegra30
- 3d1 3D Graphics 1 Tegra30
- aud Audio Tegra210
- dfd Debug Tegra210
- dis Display A Tegra114/124/210
- disb Display B Tegra114/124/210
- heg 2D Graphics Tegra30/114/124/210
- iram Internal RAM Tegra124/210
- mpe MPEG Encode All
- nvdec NVIDIA Video Decode Engine Tegra210
- nvjpg NVIDIA JPEG Engine Tegra210
- pcie PCIE Tegra20/30/124/210
- sata SATA Tegra30/124/210
- sor Display interfaces Tegra124/210
- ve2 Video Encode Engine 2 Tegra210
- venc Video Encode Engine All
- vdec Video Decode Engine Tegra20/30/114/124
- vic Video Imaging Compositor Tegra124/210
- xusba USB Partition A Tegra114/124/210
- xusbb USB Partition B Tegra114/124/210
- xusbc USB Partition C Tegra114/124/210
-
-Required properties:
- - clocks: Must contain an entry for each clock required by the PMC for
- controlling a power-gate. See ../clocks/clock-bindings.txt for details.
- - resets: Must contain an entry for each reset required by the PMC for
- controlling a power-gate. See ../reset/reset.txt for details.
- - #power-domain-cells: Must be 0.
-
-Example:
-
- pmc: pmc@7000e400 {
- compatible = "nvidia,tegra210-pmc";
- reg = <0x0 0x7000e400 0x0 0x400>;
- clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
- clock-names = "pclk", "clk32k_in";
-
- powergates {
- pd_audio: aud {
- clocks = <&tegra_car TEGRA210_CLK_APE>,
- <&tegra_car TEGRA210_CLK_APB2APE>;
- resets = <&tegra_car 198>;
- #power-domain-cells = <0>;
- };
- };
- };
-
-
-== Powergate Clients ==
-
-Hardware blocks belonging to a power domain should contain a "power-domains"
-property that is a phandle pointing to the corresponding powergate node.
-
-Example:
-
- adma: adma@702e2000 {
- ...
- power-domains = <&pd_audio>;
- ...
- };
-
-== Pad Control ==
-
-On Tegra SoCs a pad is a set of pins which are configured as a group.
-The pin grouping is a fixed attribute of the hardware. The PMC can be
-used to set pad power state and signaling voltage. A pad can be either
-in active or power down mode. The support for power state and signaling
-voltage configuration varies depending on the pad in question. 3.3 V and
-1.8 V signaling voltages are supported on pins where software
-controllable signaling voltage switching is available.
-
-The pad configuration state nodes are placed under the pmc node and they
-are referred to by the pinctrl client properties. For more information
-see Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt.
-The pad name should be used as the value of the pins property in pin
-configuration nodes.
-
-The following pads are present on Tegra124 and Tegra132:
-audio bb cam comp
-csia csb cse dsi
-dsib dsic dsid hdmi
-hsic hv lvds mipi-bias
-nand pex-bias pex-clk1 pex-clk2
-pex-cntrl sdmmc1 sdmmc3 sdmmc4
-sys_ddc uart usb0 usb1
-usb2 usb_bias
-
-The following pads are present on Tegra210:
-audio audio-hv cam csia
-csib csic csid csie
-csif dbg debug-nonao dmic
-dp dsi dsib dsic
-dsid emmc emmc2 gpio
-hdmi hsic lvds mipi-bias
-pex-bias pex-clk1 pex-clk2 pex-cntrl
-sdmmc1 sdmmc3 spi spi-hv
-uart usb0 usb1 usb2
-usb3 usb-bias
-
-Required pin configuration properties:
- - pins: Must contain name of the pad(s) to be configured.
-
-Optional pin configuration properties:
- - low-power-enable: Configure the pad into power down mode
- - low-power-disable: Configure the pad into active mode
- - power-source: Must contain either TEGRA_IO_PAD_VOLTAGE_1V8
- or TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages.
- The values are defined in
- include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h.
-
-Note: The power state can be configured on all of the Tegra124 and
- Tegra132 pads. None of the Tegra124 or Tegra132 pads support
- signaling voltage switching.
-
-Note: All of the listed Tegra210 pads except pex-cntrl support power
- state configuration. Signaling voltage switching is supported on
- following Tegra210 pads: audio, audio-hv, cam, dbg, dmic, gpio,
- pex-cntrl, sdmmc1, sdmmc3, spi, spi-hv, and uart.
-
-Pad configuration state example:
- pmc: pmc@7000e400 {
- compatible = "nvidia,tegra210-pmc";
- reg = <0x0 0x7000e400 0x0 0x400>;
- clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
- clock-names = "pclk", "clk32k_in";
-
- ...
-
- sdmmc1_3v3: sdmmc1-3v3 {
- pins = "sdmmc1";
- power-source = <TEGRA_IO_PAD_VOLTAGE_3V3>;
- };
-
- sdmmc1_1v8: sdmmc1-1v8 {
- pins = "sdmmc1";
- power-source = <TEGRA_IO_PAD_VOLTAGE_1V8>;
- };
-
- hdmi_off: hdmi-off {
- pins = "hdmi";
- low-power-enable;
- }
-
- hdmi_on: hdmi-on {
- pins = "hdmi";
- low-power-disable;
- }
- };
-
-Pinctrl client example:
- sdmmc1: sdhci@700b0000 {
- ...
- pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
- pinctrl-0 = <&sdmmc1_3v3>;
- pinctrl-1 = <&sdmmc1_1v8>;
- };
- ...
- sor@54540000 {
- ...
- pinctrl-0 = <&hdmi_off>;
- pinctrl-1 = <&hdmi_on>;
- pinctrl-names = "hdmi-on", "hdmi-off";
- };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
new file mode 100644
index 000000000000..fcdf03131323
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra234-cbb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra CBB 2.0
+
+maintainers:
+ - Sumit Gupta <sumitg@nvidia.com>
+
+description: |+
+ The Control Backbone (CBB) is comprised of the physical path from an
+ initiator to a target's register configuration space. CBB 2.0 consists
+ of multiple sub-blocks connected to each other to create a topology.
+ The Tegra234 SoC has different fabrics based on CBB 2.0 architecture
+ which include cluster fabrics BPMP, AON, PSC, SCE, RCE, DCE, FSI and
+ "CBB central fabric".
+
+ In CBB 2.0, each initiator which can issue transactions connects to a
+ Root Master Node (MN) before it connects to any other element of the
+ fabric. Each Root MN contains a Error Monitor (EM) which detects and
+ logs error. Interrupts from various EM blocks are collated by Error
+ Notifier (EN) which is per fabric and presents a single interrupt from
+ fabric to the SoC interrupt controller.
+
+ The driver handles errors from CBB due to illegal register accesses
+ and prints debug information about failed transaction on receiving
+ the interrupt from EN. Debug information includes Error Code, Error
+ Description, MasterID, Fabric, SlaveID, Address, Cache, Protection,
+ Security Group etc on receiving error notification.
+
+ If the Error Response Disable (ERD) is set/enabled for an initiator,
+ then SError or Data abort exception error response is masked and an
+ interrupt is used for reporting errors due to illegal accesses from
+ that initiator. The value returned on read failures is '0xFFFFFFFF'
+ for compatibility with PCIE.
+
+properties:
+ $nodename:
+ pattern: "^[a-z]+-fabric@[0-9a-f]+$"
+
+ compatible:
+ enum:
+ - nvidia,tegra234-aon-fabric
+ - nvidia,tegra234-bpmp-fabric
+ - nvidia,tegra234-cbb-fabric
+ - nvidia,tegra234-dce-fabric
+ - nvidia,tegra234-rce-fabric
+ - nvidia,tegra234-sce-fabric
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: secure interrupt from error notifier
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ cbb-fabric@1300000 {
+ compatible = "nvidia,tegra234-cbb-fabric";
+ reg = <0x13a00000 0x400000>;
+ interrupts = <GIC_SPI 231 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
deleted file mode 100644
index ea670a5d7ee3..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-NVIDIA Tegra Activity Monitor
-
-The activity monitor block collects statistics about the behaviour of other
-components in the system. This information can be used to derive the rate at
-which the external memory needs to be clocked in order to serve all requests
-from the monitored clients.
-
-Required properties:
-- compatible: should be "nvidia,tegra<chip>-actmon"
-- reg: offset and length of the register set for the device
-- interrupts: standard interrupt property
-- clocks: Must contain a phandle and clock specifier pair for each entry in
-clock-names. See ../../clock/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
- - actmon
- - emc
-- resets: Must contain an entry for each entry in reset-names. See
-../../reset/reset.txt for details.
-- reset-names: Must include the following entries:
- - actmon
-
-Example:
- actmon@6000c800 {
- compatible = "nvidia,tegra124-actmon";
- reg = <0x0 0x6000c800 0x0 0x400>;
- interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
- <&tegra_car TEGRA124_CLK_EMC>;
- clock-names = "actmon", "emc";
- resets = <&tegra_car 119>;
- reset-names = "actmon";
- };
diff --git a/Documentation/devicetree/bindings/arm/tesla.yaml b/Documentation/devicetree/bindings/arm/tesla.yaml
new file mode 100644
index 000000000000..d670a0d56222
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tesla.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/tesla.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Tesla Full Self Driving(FSD) platforms
+
+maintainers:
+ - Alim Akhtar <alim.akhtar@samsung.com>
+ - linux-fsd@tesla.com
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: FSD SoC board
+ items:
+ - enum:
+ - tesla,fsd-evb # Tesla FSD Evaluation
+ - const: tesla,fsd
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ti/k3.txt b/Documentation/devicetree/bindings/arm/ti/k3.txt
deleted file mode 100644
index 6a059cabb2da..000000000000
--- a/Documentation/devicetree/bindings/arm/ti/k3.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Texas Instruments K3 Multicore SoC architecture device tree bindings
---------------------------------------------------------------------
-
-Platforms based on Texas Instruments K3 Multicore SoC architecture
-shall follow the following scheme:
-
-SoCs
-----
-
-Each device tree root node must specify which exact SoC in K3 Multicore SoC
-architecture it uses, using one of the following compatible values:
-
-- AM654
- compatible = "ti,am654";
-
-Boards
-------
-
-In addition, each device tree root node must specify which one or more
-of the following board-specific compatible values:
-
-- AM654 EVM
- compatible = "ti,am654-evm", "ti,am654";
diff --git a/Documentation/devicetree/bindings/arm/ti/k3.yaml b/Documentation/devicetree/bindings/arm/ti/k3.yaml
new file mode 100644
index 000000000000..0105dcda6e04
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ti/k3.yaml
@@ -0,0 +1,215 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ti/k3.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments K3 Multicore SoC architecture
+
+maintainers:
+ - Nishanth Menon <nm@ti.com>
+
+description: |
+ Platforms based on Texas Instruments K3 Multicore SoC architecture
+ shall have the following properties.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: K3 AM62A7 SoC
+ items:
+ - enum:
+ - ti,am62a7-sk
+ - const: ti,am62a7
+
+ - description: K3 AM62D2 SoC and Boards
+ items:
+ - enum:
+ - ti,am62d2-evm
+ - const: ti,am62d2
+
+ - description: K3 AM62A7 SoC PHYTEC phyBOARD-Lyra
+ items:
+ - const: phytec,am62a7-phyboard-lyra-rdk
+ - const: phytec,am62a-phycore-som
+ - const: ti,am62a7
+
+ - description: K3 AM62P5 SoC and Boards
+ items:
+ - enum:
+ - ti,am62p5-sk
+ - const: ti,am62p5
+
+ - description: K3 AM625 SoC PHYTEC phyBOARD-Lyra
+ items:
+ - const: phytec,am625-phyboard-lyra-rdk
+ - const: phytec,am62-phycore-som
+ - const: ti,am625
+
+ - description: K3 AM625 SoC
+ items:
+ - enum:
+ - beagle,am62-pocketbeagle2
+ - beagle,am625-beagleplay
+ - ti,am625-sk
+ - ti,am62-lp-sk
+ - const: ti,am625
+
+ - description: K3 AM6254atl SiP
+ items:
+ - enum:
+ - ti,am6254atl-sk
+ - const: ti,am6254atl
+ - const: ti,am625
+
+ - description: K3 AM62x SoC Toradex Verdin Modules and Carrier Boards
+ items:
+ - enum:
+ - toradex,verdin-am62-nonwifi-dahlia # Verdin AM62 Module on Dahlia
+ - toradex,verdin-am62-nonwifi-dev # Verdin AM62 Module on Verdin Development Board
+ - toradex,verdin-am62-nonwifi-ivy # Verdin AM62 Module on Ivy
+ - toradex,verdin-am62-nonwifi-mallow # Verdin AM62 Module on Mallow
+ - toradex,verdin-am62-nonwifi-yavia # Verdin AM62 Module on Yavia
+ - const: toradex,verdin-am62-nonwifi # Verdin AM62 Module without Wi-Fi / BT
+ - const: toradex,verdin-am62 # Verdin AM62 Module
+ - const: ti,am625
+
+ - description: K3 AM62x SoC Toradex Verdin Modules and Carrier Boards with Wi-Fi / BT
+ items:
+ - enum:
+ - toradex,verdin-am62-wifi-dahlia # Verdin AM62 Wi-Fi / BT Module on Dahlia
+ - toradex,verdin-am62-wifi-dev # Verdin AM62 Wi-Fi / BT M. on Verdin Development B.
+ - toradex,verdin-am62-wifi-ivy # Verdin AM62 Wi-Fi / BT Module on Ivy
+ - toradex,verdin-am62-wifi-mallow # Verdin AM62 Wi-Fi / BT Module on Mallow
+ - toradex,verdin-am62-wifi-yavia # Verdin AM62 Wi-Fi / BT Module on Yavia
+ - const: toradex,verdin-am62-wifi # Verdin AM62 Wi-Fi / BT Module
+ - const: toradex,verdin-am62 # Verdin AM62 Module
+ - const: ti,am625
+
+ - description: K3 AM62P5 SoC Toradex Verdin Modules and Carrier Boards
+ items:
+ - enum:
+ - toradex,verdin-am62p-nonwifi-dahlia # Verdin AM62P Module on Dahlia
+ - toradex,verdin-am62p-nonwifi-dev # Verdin AM62P Module on Verdin Development Board
+ - toradex,verdin-am62p-nonwifi-ivy # Verdin AM62P Module on Ivy
+ - toradex,verdin-am62p-nonwifi-mallow # Verdin AM62P Module on Mallow
+ - toradex,verdin-am62p-nonwifi-yavia # Verdin AM62P Module on Yavia
+ - const: toradex,verdin-am62p-nonwifi # Verdin AM62P Module without Wi-Fi / BT
+ - const: toradex,verdin-am62p # Verdin AM62P Module
+ - const: ti,am62p5
+
+ - description: K3 AM62P5 SoC Toradex Verdin Modules and Carrier Boards with Wi-Fi / BT
+ items:
+ - enum:
+ - toradex,verdin-am62p-wifi-dahlia # Verdin AM62P Wi-Fi / BT Module on Dahlia
+ - toradex,verdin-am62p-wifi-dev # Verdin AM62P Wi-Fi / BT M. on Verdin Development B.
+ - toradex,verdin-am62p-wifi-ivy # Verdin AM62P Wi-Fi / BT Module on Ivy
+ - toradex,verdin-am62p-wifi-mallow # Verdin AM62P Wi-Fi / BT Module on Mallow
+ - toradex,verdin-am62p-wifi-yavia # Verdin AM62P Wi-Fi / BT Module on Yavia
+ - const: toradex,verdin-am62p-wifi # Verdin AM62P Wi-Fi / BT Module
+ - const: toradex,verdin-am62p # Verdin AM62P Module
+ - const: ti,am62p5
+
+ - description: K3 AM62P5 SoC Variscite SOM and Carrier Boards
+ items:
+ - const: variscite,var-som-am62p-symphony
+ - const: variscite,var-som-am62p
+ - const: ti,am62p5
+
+ - description: K3 AM642 SoC
+ items:
+ - enum:
+ - ti,am642-evm
+ - ti,am642-sk
+ - const: ti,am642
+
+ - description: K3 AM642 SoC PHYTEC phyBOARD-Electra
+ items:
+ - const: phytec,am642-phyboard-electra-rdk
+ - const: phytec,am64-phycore-som
+ - const: ti,am642
+
+ - description: K3 AM642 SoC on TQ-Systems TQMaX4XxL SoM
+ items:
+ - enum:
+ - tq,am642-tqma6442l-mbax4xxl # MBaX4XxL base board
+ - const: tq,am642-tqma6442l
+ - const: ti,am642
+
+ - description: K3 AM642 SoC SolidRun SoM based boards
+ items:
+ - enum:
+ - solidrun,am642-hummingboard-t
+ - const: solidrun,am642-sr-som
+ - const: ti,am642
+
+ - description: K3 AM654 SoC
+ items:
+ - enum:
+ - siemens,iot2050-advanced
+ - siemens,iot2050-advanced-m2
+ - siemens,iot2050-advanced-pg2
+ - siemens,iot2050-advanced-sm
+ - siemens,iot2050-basic
+ - siemens,iot2050-basic-pg2
+ - ti,am654-evm
+ - const: ti,am654
+
+ - description: K3 J7200 SoC
+ oneOf:
+ - const: ti,j7200
+ - items:
+ - enum:
+ - ti,j7200-evm
+ - const: ti,j7200
+
+ - description: K3 J721E SoC
+ oneOf:
+ - const: ti,j721e
+ - items:
+ - enum:
+ - beagle,j721e-beagleboneai64
+ - ti,j721e-evm
+ - ti,j721e-sk
+ - const: ti,j721e
+
+ - description: K3 J721s2 SoC
+ items:
+ - enum:
+ - ti,am68-sk
+ - ti,j721s2-evm
+ - const: ti,j721s2
+
+ - description: K3 J721s2 SoC Phytec SoM based boards
+ items:
+ - enum:
+ - phytec,am68-phyboard-izar
+ - const: phytec,am68-phycore-som
+ - const: ti,j721s2
+
+ - description: K3 J722S SoC and Boards
+ items:
+ - enum:
+ - beagle,am67a-beagley-ai
+ - ti,j722s-evm
+ - const: ti,j722s
+
+ - description: K3 J742S2 SoC
+ items:
+ - enum:
+ - ti,j742s2-evm
+ - const: ti,j742s2
+
+ - description: K3 J784s4 SoC
+ items:
+ - enum:
+ - ti,am69-sk
+ - ti,j784s4-evm
+ - const: ti,j784s4
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ti/nspire.yaml b/Documentation/devicetree/bindings/arm/ti/nspire.yaml
new file mode 100644
index 000000000000..cc2023bb7fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ti/nspire.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ti/nspire.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI-NSPIRE calculators
+
+maintainers:
+ - Daniel Tang <dt.tangr@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ # CX models
+ - ti,nspire-cx
+ # Touchpad models
+ - ti,nspire-tp
+ # Clickpad models
+ - ti,nspire-clp
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ti/omap.yaml b/Documentation/devicetree/bindings/arm/ti/omap.yaml
new file mode 100644
index 000000000000..aa5df4692e37
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ti/omap.yaml
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ti/omap.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments OMAP SoC architecture
+
+maintainers:
+ - Tony Lindgren <tony@atomide.com>
+
+description: Platforms based on Texas Instruments OMAP SoC architecture.
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: TI OMAP2420 SoC based platforms
+ items:
+ - enum:
+ - nokia,n800
+ - nokia,n810
+ - nokia,n810-wimax
+ - ti,omap2420-h4
+ - const: ti,omap2420
+ - const: ti,omap2
+
+ - description: TI OMAP2430 SoC based platforms
+ items:
+ - enum:
+ - ti,omap2430-sdp # TI OMAP2430 SDP
+ - const: ti,omap2430
+ - const: ti,omap2
+
+ - description: TI OMAP3430 SoC based platforms
+ items:
+ - enum:
+ - compulab,omap3-cm-t3530
+ - logicpd,dm3730-som-lv-devkit # LogicPD Zoom OMAP35xx SOM-LV Development Kit
+ - logicpd,dm3730-torpedo-devkit # LogicPD Zoom OMAP35xx Torpedo Development Kit
+ - nokia,omap3-n900
+ - openpandora,omap3-pandora-600mhz
+ - ti,omap3430-sdp
+ - ti,omap3-beagle
+ - ti,omap3-evm # TI OMAP35XX EVM (TMDSEVM3530)
+ - ti,omap3-ldp # TI OMAP3430 LDP (Zoom1 Labrador)
+ - timll,omap3-devkit8000
+ - const: ti,omap3430
+ - const: ti,omap3
+
+ - description: Early BeagleBoard revisions A to B4 with a timer quirk
+ items:
+ - const: ti,omap3-beagle-ab4
+ - const: ti,omap3-beagle
+ - const: ti,omap3430
+ - const: ti,omap3
+
+ - description: Gumstix Overo TI OMAP 3430/3630 boards + expansion boards
+ items:
+ - enum:
+ - gumstix,omap3-overo-alto35
+ - gumstix,omap3-overo-chestnut43
+ - gumstix,omap3-overo-gallop43
+ - gumstix,omap3-overo-palo35
+ - gumstix,omap3-overo-palo43
+ - gumstix,omap3-overo-summit
+ - gumstix,omap3-overo-tobi
+ - gumstix,omap3-overo-tobiduo
+ - const: gumstix,omap3-overo
+ - enum:
+ - ti,omap3430
+ - ti,omap3630
+
+ - description: TI OMAP3630 SoC based platforms
+ items:
+ - enum:
+ - amazon,omap3-echo # Amazon Echo (first generation)
+ - compulab,omap3-cm-t3730
+ - goldelico,gta04
+ - lg,omap3-sniper # LG Optimus Black
+ - logicpd,dm3730-som-lv-devkit # LogicPD Zoom DM3730 SOM-LV Development Kit
+ - logicpd,dm3730-torpedo-devkit # LogicPD Zoom DM3730 Torpedo + Wireless Development Kit
+ - nokia,omap3-n9
+ - nokia,omap3-n950
+ - openpandora,omap3-pandora-1ghz
+ - ti,omap3-beagle-xm
+ - ti,omap3-evm-37xx # TI OMAP37XX EVM (TMDSEVM3730)
+ - ti,omap3-zoom3
+ - const: ti,omap3630
+ - const: ti,omap3
+
+ - description: TI AM35 SoC based platforms
+ items:
+ - enum:
+ - compulab,omap3-sbc-t3517 # CompuLab SBC-T3517 with CM-T3517
+ - teejet,mt_ventoux
+ - ti,am3517-craneboard # TI AM3517 CraneBoard (TMDSEVM3517)
+ - ti,am3517-evm # TI AM3517 EVM (AM3517/05 TMDSEVM3517)
+ - const: ti,am3517
+ - const: ti,omap3
+
+ - description: TI AM33 based platform
+ items:
+ - enum:
+ - compulab,cm-t335
+ - moxa,uc-8100-me-t
+ - novatech,am335x-lxm
+ - seeed,am335x-bone-green-eco
+ - ti,am335x-bone
+ - ti,am335x-evm
+ - ti,am3359-icev2
+ - const: ti,am33xx
+
+ - description: Compulab board variants based on TI AM33
+ items:
+ - enum:
+ - compulab,sbc-t335
+ - const: compulab,cm-t335
+ - const: ti,am33xx
+
+ - description: Phytec boards based on TI AM33
+ items:
+ - enum:
+ - phytec,am335x-wega
+ - phytec,am335x-pcm-953
+ - phytec,am335x-regor
+ - const: phytec,am335x-phycore-som
+ - const: ti,am33xx
+
+ - description: TI OMAP4430 SoC based platforms
+ items:
+ - enum:
+ - amazon,omap4-kc1 # Amazon Kindle Fire (first generation)
+ - motorola,droid4 # Motorola Droid 4 XT894
+ - motorola,droid-bionic # Motorola Droid Bionic XT875
+ - motorola,xyboard-mz609
+ - motorola,xyboard-mz617
+ - ti,omap4-panda
+ - ti,omap4-sdp
+ - const: ti,omap4430
+ - const: ti,omap4
+
+ - description: OMAP4 PandaBoard Revision A4 and later
+ items:
+ - const: ti,omap4-panda-a4
+ - const: ti,omap4-panda
+ - const: ti,omap4430
+ - const: ti,omap4
+
+ - description: OMAP4 DuoVero with Parlor expansion board/daughter board
+ items:
+ - const: gumstix,omap4-duovero-parlor
+ - const: gumstix,omap4-duovero
+ - const: ti,omap4430
+ - const: ti,omap4
+
+ - description: TI OMAP4460 SoC based platforms
+ items:
+ - enum:
+ - epson,embt2ws # Epson Moverio BT-200
+ - ti,omap4-panda-es
+ - const: ti,omap4460
+ - const: ti,omap4
+
+ - description: VAR-OM44 boards
+ items:
+ - enum:
+ - variscite,var-dvk-om44
+ - variscite,var-stk-om44
+ - const: variscite,var-som-om44
+ - const: ti,omap4460
+ - const: ti,omap4
+
+ - description: TI OMAP5 SoC based platforms
+ items:
+ - enum:
+ - compulab,omap5-cm-t54
+ - isee,omap5-igep0050
+ - ti,omap5-uevm
+ - const: ti,omap5
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml
new file mode 100644
index 000000000000..1656d1a4476f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ti/ti,davinci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments DaVinci Platforms
+
+maintainers:
+ - Sekhar Nori <nsekhar@ti.com>
+
+description:
+ DA850/OMAP-L138/AM18x based boards
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - ti,da850-evm # DA850/OMAP-L138/AM18x Evaluation Module (EVM) board
+ - ti,da850-lcdk # DA850/OMAP-L138/AM18x L138/C6748 Development Kit (LCDK) board
+ - enbw,cmc # EnBW AM1808 based CMC board
+ - lego,ev3 # LEGO MINDSTORMS EV3 (AM1808 based)
+ - const: ti,da850
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/ti/ti,keystone.yaml b/Documentation/devicetree/bindings/arm/ti/ti,keystone.yaml
new file mode 100644
index 000000000000..20d4084f4506
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ti/ti,keystone.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ti/ti,keystone.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI Keystone Platforms
+
+maintainers:
+ - Nishanth Menon <nm@ti.com>
+ - Santosh Shilimkar <ssantosh@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - description: K2G
+ items:
+ - enum:
+ - ti,k2g-evm
+ - ti,k2g-ice
+ - const: ti,k2g
+ - const: ti,keystone
+ - description: Keystone 2 Edison
+ items:
+ - enum:
+ - ti,k2e-evm
+ - const: ti,k2e
+ - const: ti,keystone
+ - description: Keystone 2 Lamarr
+ items:
+ - enum:
+ - ti,k2l-evm
+ - const: ti,k2l
+ - const: ti,keystone
+ - description: Keystone 2 Hawking/Kepler
+ items:
+ - enum:
+ - ti,k2hk-evm
+ - const: ti,k2hk
+ - const: ti,keystone
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/topology.txt b/Documentation/devicetree/bindings/arm/topology.txt
deleted file mode 100644
index de9eb0486630..000000000000
--- a/Documentation/devicetree/bindings/arm/topology.txt
+++ /dev/null
@@ -1,475 +0,0 @@
-===========================================
-ARM topology binding description
-===========================================
-
-===========================================
-1 - Introduction
-===========================================
-
-In an ARM system, the hierarchy of CPUs is defined through three entities that
-are used to describe the layout of physical CPUs in the system:
-
-- cluster
-- core
-- thread
-
-The cpu nodes (bindings defined in [1]) represent the devices that
-correspond to physical CPUs and are to be mapped to the hierarchy levels.
-
-The bottom hierarchy level sits at core or thread level depending on whether
-symmetric multi-threading (SMT) is supported or not.
-
-For instance in a system where CPUs support SMT, "cpu" nodes represent all
-threads existing in the system and map to the hierarchy level "thread" above.
-In systems where SMT is not supported "cpu" nodes represent all cores present
-in the system and map to the hierarchy level "core" above.
-
-ARM topology bindings allow one to associate cpu nodes with hierarchical groups
-corresponding to the system hierarchy; syntactically they are defined as device
-tree nodes.
-
-The remainder of this document provides the topology bindings for ARM, based
-on the Devicetree Specification, available from:
-
-https://www.devicetree.org/specifications/
-
-If not stated otherwise, whenever a reference to a cpu node phandle is made its
-value must point to a cpu node compliant with the cpu node bindings as
-documented in [1].
-A topology description containing phandles to cpu nodes that are not compliant
-with bindings standardized in [1] is therefore considered invalid.
-
-===========================================
-2 - cpu-map node
-===========================================
-
-The ARM CPU topology is defined within the cpu-map node, which is a direct
-child of the cpus node and provides a container where the actual topology
-nodes are listed.
-
-- cpu-map node
-
- Usage: Optional - On ARM SMP systems provide CPUs topology to the OS.
- ARM uniprocessor systems do not require a topology
- description and therefore should not define a
- cpu-map node.
-
- Description: The cpu-map node is just a container node where its
- subnodes describe the CPU topology.
-
- Node name must be "cpu-map".
-
- The cpu-map node's parent node must be the cpus node.
-
- The cpu-map node's child nodes can be:
-
- - one or more cluster nodes
-
- Any other configuration is considered invalid.
-
-The cpu-map node can only contain three types of child nodes:
-
-- cluster node
-- core node
-- thread node
-
-whose bindings are described in paragraph 3.
-
-The nodes describing the CPU topology (cluster/core/thread) can only
-be defined within the cpu-map node and every core/thread in the system
-must be defined within the topology. Any other configuration is
-invalid and therefore must be ignored.
-
-===========================================
-2.1 - cpu-map child nodes naming convention
-===========================================
-
-cpu-map child nodes must follow a naming convention where the node name
-must be "clusterN", "coreN", "threadN" depending on the node type (ie
-cluster/core/thread) (where N = {0, 1, ...} is the node number; nodes which
-are siblings within a single common parent node must be given a unique and
-sequential N value, starting from 0).
-cpu-map child nodes which do not share a common parent node can have the same
-name (ie same number N as other cpu-map child nodes at different device tree
-levels) since name uniqueness will be guaranteed by the device tree hierarchy.
-
-===========================================
-3 - cluster/core/thread node bindings
-===========================================
-
-Bindings for cluster/cpu/thread nodes are defined as follows:
-
-- cluster node
-
- Description: must be declared within a cpu-map node, one node
- per cluster. A system can contain several layers of
- clustering and cluster nodes can be contained in parent
- cluster nodes.
-
- The cluster node name must be "clusterN" as described in 2.1 above.
- A cluster node can not be a leaf node.
-
- A cluster node's child nodes must be:
-
- - one or more cluster nodes; or
- - one or more core nodes
-
- Any other configuration is considered invalid.
-
-- core node
-
- Description: must be declared in a cluster node, one node per core in
- the cluster. If the system does not support SMT, core
- nodes are leaf nodes, otherwise they become containers of
- thread nodes.
-
- The core node name must be "coreN" as described in 2.1 above.
-
- A core node must be a leaf node if SMT is not supported.
-
- Properties for core nodes that are leaf nodes:
-
- - cpu
- Usage: required
- Value type: <phandle>
- Definition: a phandle to the cpu node that corresponds to the
- core node.
-
- If a core node is not a leaf node (CPUs supporting SMT) a core node's
- child nodes can be:
-
- - one or more thread nodes
-
- Any other configuration is considered invalid.
-
-- thread node
-
- Description: must be declared in a core node, one node per thread
- in the core if the system supports SMT. Thread nodes are
- always leaf nodes in the device tree.
-
- The thread node name must be "threadN" as described in 2.1 above.
-
- A thread node must be a leaf node.
-
- A thread node must contain the following property:
-
- - cpu
- Usage: required
- Value type: <phandle>
- Definition: a phandle to the cpu node that corresponds to
- the thread node.
-
-===========================================
-4 - Example dts
-===========================================
-
-Example 1 (ARM 64-bit, 16-cpu system, two clusters of clusters):
-
-cpus {
- #size-cells = <0>;
- #address-cells = <2>;
-
- cpu-map {
- cluster0 {
- cluster0 {
- core0 {
- thread0 {
- cpu = <&CPU0>;
- };
- thread1 {
- cpu = <&CPU1>;
- };
- };
-
- core1 {
- thread0 {
- cpu = <&CPU2>;
- };
- thread1 {
- cpu = <&CPU3>;
- };
- };
- };
-
- cluster1 {
- core0 {
- thread0 {
- cpu = <&CPU4>;
- };
- thread1 {
- cpu = <&CPU5>;
- };
- };
-
- core1 {
- thread0 {
- cpu = <&CPU6>;
- };
- thread1 {
- cpu = <&CPU7>;
- };
- };
- };
- };
-
- cluster1 {
- cluster0 {
- core0 {
- thread0 {
- cpu = <&CPU8>;
- };
- thread1 {
- cpu = <&CPU9>;
- };
- };
- core1 {
- thread0 {
- cpu = <&CPU10>;
- };
- thread1 {
- cpu = <&CPU11>;
- };
- };
- };
-
- cluster1 {
- core0 {
- thread0 {
- cpu = <&CPU12>;
- };
- thread1 {
- cpu = <&CPU13>;
- };
- };
- core1 {
- thread0 {
- cpu = <&CPU14>;
- };
- thread1 {
- cpu = <&CPU15>;
- };
- };
- };
- };
- };
-
- CPU0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x0>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x1>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU2: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU3: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU4: cpu@10000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10000>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU5: cpu@10001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10001>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU6: cpu@10100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU7: cpu@10101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x0 0x10101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU8: cpu@100000000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x0>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU9: cpu@100000001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x1>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU10: cpu@100000100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU11: cpu@100000101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU12: cpu@100010000 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10000>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU13: cpu@100010001 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10001>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU14: cpu@100010100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10100>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-
- CPU15: cpu@100010101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x1 0x10101>;
- enable-method = "spin-table";
- cpu-release-addr = <0 0x20000000>;
- };
-};
-
-Example 2 (ARM 32-bit, dual-cluster, 8-cpu system, no SMT):
-
-cpus {
- #size-cells = <0>;
- #address-cells = <1>;
-
- cpu-map {
- cluster0 {
- core0 {
- cpu = <&CPU0>;
- };
- core1 {
- cpu = <&CPU1>;
- };
- core2 {
- cpu = <&CPU2>;
- };
- core3 {
- cpu = <&CPU3>;
- };
- };
-
- cluster1 {
- core0 {
- cpu = <&CPU4>;
- };
- core1 {
- cpu = <&CPU5>;
- };
- core2 {
- cpu = <&CPU6>;
- };
- core3 {
- cpu = <&CPU7>;
- };
- };
- };
-
- CPU0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x0>;
- };
-
- CPU1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x1>;
- };
-
- CPU2: cpu@2 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x2>;
- };
-
- CPU3: cpu@3 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
- reg = <0x3>;
- };
-
- CPU4: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x100>;
- };
-
- CPU5: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x101>;
- };
-
- CPU6: cpu@102 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x102>;
- };
-
- CPU7: cpu@103 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x103>;
- };
-};
-
-===============================================================================
-[1] ARM Linux kernel documentation
- Documentation/devicetree/bindings/arm/cpus.txt
diff --git a/Documentation/devicetree/bindings/arm/toshiba.yaml b/Documentation/devicetree/bindings/arm/toshiba.yaml
new file mode 100644
index 000000000000..716ba4a3cab4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/toshiba.yaml
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/toshiba.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba Visconti Platform
+
+maintainers:
+ - Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - description: Visconti5 TMPV7708
+ items:
+ - enum:
+ - toshiba,tmpv7708-rm-mbrc # TMPV7708 RM main board
+ - toshiba,tmpv7708-visrobo-vrb # TMPV7708 VisROBO VRB board
+ - const: toshiba,tmpv7708
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/uniphier/cache-uniphier.txt b/Documentation/devicetree/bindings/arm/uniphier/cache-uniphier.txt
deleted file mode 100644
index d27a646f48a9..000000000000
--- a/Documentation/devicetree/bindings/arm/uniphier/cache-uniphier.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-UniPhier outer cache controller
-
-UniPhier SoCs are integrated with a full-custom outer cache controller system.
-All of them have a level 2 cache controller, and some have a level 3 cache
-controller as well.
-
-Required properties:
-- compatible: should be "socionext,uniphier-system-cache"
-- reg: offsets and lengths of the register sets for the device. It should
- contain 3 regions: control register, revision register, operation register,
- in this order.
-- cache-unified: specifies the cache is a unified cache.
-- cache-size: specifies the size in bytes of the cache
-- cache-sets: specifies the number of associativity sets of the cache
-- cache-line-size: specifies the line size in bytes
-- cache-level: specifies the level in the cache hierarchy. The value should
- be 2 for L2 cache, 3 for L3 cache, etc.
-
-Optional properties:
-- next-level-cache: phandle to the next level cache if present. The next level
- cache should be also compatible with "socionext,uniphier-system-cache".
-
-The L2 cache must exist to use the L3 cache; the cache hierarchy must be
-indicated correctly with "next-level-cache" properties.
-
-Example 1 (system with L2):
- l2: l2-cache@500c0000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c0000 0x2000>, <0x503c0100 0x4>,
- <0x506c0000 0x400>;
- cache-unified;
- cache-size = <0x80000>;
- cache-sets = <256>;
- cache-line-size = <128>;
- cache-level = <2>;
- };
-
-Example 2 (system with L2 and L3):
- l2: l2-cache@500c0000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c0000 0x2000>, <0x503c0100 0x8>,
- <0x506c0000 0x400>;
- cache-unified;
- cache-size = <0x200000>;
- cache-sets = <512>;
- cache-line-size = <128>;
- cache-level = <2>;
- next-level-cache = <&l3>;
- };
-
- l3: l3-cache@500c8000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c8000 0x2000>, <0x503c8100 0x8>,
- <0x506c8000 0x400>;
- cache-unified;
- cache-size = <0x400000>;
- cache-sets = <512>;
- cache-line-size = <256>;
- cache-level = <3>;
- };
diff --git a/Documentation/devicetree/bindings/arm/ux500.yaml b/Documentation/devicetree/bindings/arm/ux500.yaml
new file mode 100644
index 000000000000..b42d20fa4359
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ux500.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/ux500.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ux500 platforms
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: ST-Ericsson HREF (pre-v60)
+ items:
+ - const: st-ericsson,mop500
+ - const: st-ericsson,u8500
+
+ - description: ST-Ericsson HREF520
+ items:
+ - const: st-ericsson,href520
+ - const: st-ericsson,u8500
+
+ - description: ST-Ericsson HREF (v60+)
+ items:
+ - const: st-ericsson,hrefv60+
+ - const: st-ericsson,u8500
+
+ - description: Calao Systems Snowball
+ items:
+ - const: calaosystems,snowball-a9500
+ - const: st-ericsson,u9500
+
+ - description: Samsung Galaxy Ace 2 (GT-I8160)
+ items:
+ - const: samsung,codina
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy Exhibit (SGH-T599)
+ items:
+ - const: samsung,codina-tmo
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy Beam (GT-I8530)
+ items:
+ - const: samsung,gavini
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy S III mini (GT-I8190)
+ items:
+ - const: samsung,golden
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy S Advance (GT-I9070)
+ items:
+ - const: samsung,janice
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy Amp (SGH-I407)
+ items:
+ - const: samsung,kyle
+ - const: st-ericsson,u8500
+
+ - description: Samsung Galaxy XCover 2 (GT-S7710)
+ items:
+ - const: samsung,skomer
+ - const: st-ericsson,u8500
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/ux500/boards.txt b/Documentation/devicetree/bindings/arm/ux500/boards.txt
index 89408de55bfd..18d55532d31e 100644
--- a/Documentation/devicetree/bindings/arm/ux500/boards.txt
+++ b/Documentation/devicetree/bindings/arm/ux500/boards.txt
@@ -20,13 +20,13 @@ during retention, system won't boot without this):
compatible = "ste,dbx500-backupram"
scu:
- see binding for arm/scu.txt
+ see binding for arm/arm,scu.yaml
interrupt-controller:
see binding for interrupt-controller/arm,gic.txt
timer:
- see binding for timer/arm,twd.txt
+ see binding for timer/arm,twd-timer.yaml
clocks:
see binding for clocks/ux500.txt
diff --git a/Documentation/devicetree/bindings/arm/versatile-sysreg.txt b/Documentation/devicetree/bindings/arm/versatile-sysreg.txt
deleted file mode 100644
index a4f15262d717..000000000000
--- a/Documentation/devicetree/bindings/arm/versatile-sysreg.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-ARM Versatile system registers
---------------------------------------
-
-This is a system control registers block, providing multiple low level
-platform functions like board detection and identification, software
-interrupt generation, MMC and NOR Flash control etc.
-
-Required node properties:
-- compatible value : = "arm,versatile-sysreg", "syscon"
-- reg : physical base address and the size of the registers window
diff --git a/Documentation/devicetree/bindings/arm/vexpress-config.yaml b/Documentation/devicetree/bindings/arm/vexpress-config.yaml
new file mode 100644
index 000000000000..b74380da3198
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress-config.yaml
@@ -0,0 +1,285 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/vexpress-config.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Express configuration bus
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ This is a system control register block, acting as a bridge to the
+ platform's configuration bus via "system control" interface, addressing
+ devices with site number, position in the board stack, config controller,
+ function and device numbers - see motherboard's TRM for more details.
+
+properties:
+ compatible:
+ const: arm,vexpress,config-bus
+
+ arm,vexpress,config-bridge:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the sysreg node.
+
+ muxfpga:
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-muxfpga
+
+ arm,vexpress-sysreg,func:
+ description: FPGA specifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 7
+ - description: device number
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ shutdown:
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-shutdown
+
+ arm,vexpress-sysreg,func:
+ description: shutdown identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 8
+ - description: device number
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ reboot:
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-reboot
+
+ arm,vexpress-sysreg,func:
+ description: reboot identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 9
+ - description: device number
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ dvimode:
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-dvimode
+
+ arm,vexpress-sysreg,func:
+ description: DVI mode identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 11
+ - description: device number
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+additionalProperties: false
+
+required:
+ - compatible
+ - arm,vexpress,config-bridge
+
+patternProperties:
+ 'clk[0-9]*$':
+ type: object
+ description:
+ clocks
+
+ properties:
+ compatible:
+ const: arm,vexpress-osc
+
+ arm,vexpress-sysreg,func:
+ description: clock specifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 1
+ - description: clock number
+
+ freq-range:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: minimal clock frequency
+ - description: maximum clock frequency
+
+ "#clock-cells":
+ const: 0
+
+ clock-output-names:
+ maxItems: 1
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+ - "#clock-cells"
+
+ "^volt-.+$":
+ $ref: /schemas/regulator/regulator.yaml#
+ properties:
+ compatible:
+ const: arm,vexpress-volt
+
+ arm,vexpress-sysreg,func:
+ description: regulator specifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 2
+ - description: device number
+
+ label:
+ maxItems: 1
+
+ unevaluatedProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ "^amp-.+$":
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-amp
+
+ arm,vexpress-sysreg,func:
+ description: current sensor identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 3
+ - description: device number
+
+ label:
+ maxItems: 1
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ "^temp-.+$":
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-temp
+
+ arm,vexpress-sysreg,func:
+ description: temperature sensor identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 4
+ - description: device number
+
+ label:
+ maxItems: 1
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ "^reset[0-9]*$":
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-reset
+
+ arm,vexpress-sysreg,func:
+ description: reset specifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 5
+ - description: reset device number
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ "^power-.+$":
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-power
+
+ arm,vexpress-sysreg,func:
+ description: power sensor identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - const: 12
+ - description: device number
+
+ label:
+ maxItems: 1
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+ "^energy(-.+)?$":
+ type: object
+ properties:
+ compatible:
+ const: arm,vexpress-energy
+
+ arm,vexpress-sysreg,func:
+ description: energy sensor identifier
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ oneOf:
+ - items:
+ - const: 13
+ - description: device number
+ - items:
+ - const: 13
+ - description: device number
+ - const: 13
+ - description: second device number
+
+ label:
+ maxItems: 1
+
+ additionalProperties: false
+ required:
+ - compatible
+ - arm,vexpress-sysreg,func
+
+examples:
+ - |
+ mcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ clk0 {
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ #clock-cells = <0>;
+ };
+
+ energy {
+ compatible = "arm,vexpress-energy";
+ arm,vexpress-sysreg,func = <13 0>, <13 1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
deleted file mode 100644
index 50095802fb4a..000000000000
--- a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-ARM Versatile Express system registers
---------------------------------------
-
-This is a system control registers block, providing multiple low level
-platform functions like board detection and identification, software
-interrupt generation, MMC and NOR Flash control etc.
-
-Required node properties:
-- compatible value : = "arm,vexpress,sysreg";
-- reg : physical base address and the size of the registers window
-
-Deprecated properties, replaced by GPIO subnodes (see below):
-- gpio-controller : specifies that the node is a GPIO controller
-- #gpio-cells : size of the GPIO specifier, should be 2:
- - first cell is the pseudo-GPIO line number:
- 0 - MMC CARDIN
- 1 - MMC WPROT
- 2 - NOR FLASH WPn
- - second cell can take standard GPIO flags (currently ignored).
-
-Control registers providing pseudo-GPIO lines must be represented
-by subnodes, each of them requiring the following properties:
-- compatible value : one of
- "arm,vexpress-sysreg,sys_led"
- "arm,vexpress-sysreg,sys_mci"
- "arm,vexpress-sysreg,sys_flash"
-- gpio-controller : makes the node a GPIO controller
-- #gpio-cells : size of the GPIO specifier, must be 2:
- - first cell is the function number:
- - for sys_led : 0..7 = LED 0..7
- - for sys_mci : 0 = MMC CARDIN, 1 = MMC WPROT
- - for sys_flash : 0 = NOR FLASH WPn
- - second cell can take standard GPIO flags (currently ignored).
-
-Example:
- v2m_sysreg: sysreg@10000000 {
- compatible = "arm,vexpress-sysreg";
- reg = <0x10000000 0x1000>;
-
- v2m_led_gpios: sys_led@8 {
- compatible = "arm,vexpress-sysreg,sys_led";
- gpio-controller;
- #gpio-cells = <2>;
- };
-
- v2m_mmc_gpios: sys_mci@48 {
- compatible = "arm,vexpress-sysreg,sys_mci";
- gpio-controller;
- #gpio-cells = <2>;
- };
-
- v2m_flash_gpios: sys_flash@4c {
- compatible = "arm,vexpress-sysreg,sys_flash";
- gpio-controller;
- #gpio-cells = <2>;
- };
- };
-
-This block also can also act a bridge to the platform's configuration
-bus via "system control" interface, addressing devices with site number,
-position in the board stack, config controller, function and device
-numbers - see motherboard's TRM for more details. All configuration
-controller accessible via this interface must reference the sysreg
-node via "arm,vexpress,config-bridge" phandle and define appropriate
-topology properties - see main vexpress node documentation for more
-details. Each child of such node describes one function and must
-define the following properties:
-- compatible value : must be one of (corresponding to the TRM):
- "arm,vexpress-amp"
- "arm,vexpress-dvimode"
- "arm,vexpress-energy"
- "arm,vexpress-muxfpga"
- "arm,vexpress-osc"
- "arm,vexpress-power"
- "arm,vexpress-reboot"
- "arm,vexpress-reset"
- "arm,vexpress-scc"
- "arm,vexpress-shutdown"
- "arm,vexpress-temp"
- "arm,vexpress-volt"
-- arm,vexpress-sysreg,func : must contain a set of two cells long groups:
- - first cell of each group defines the function number
- (eg. 1 for clock generator, 2 for voltage regulators etc.)
- - second cell of each group defines device number (eg. osc 0,
- osc 1 etc.)
- - some functions (eg. energy meter, with its 64 bit long counter)
- are using more than one function/device number pair
-
-Example:
- mcc {
- compatible = "arm,vexpress,config-bus";
- arm,vexpress,config-bridge = <&v2m_sysreg>;
-
- osc@0 {
- compatible = "arm,vexpress-osc";
- arm,vexpress-sysreg,func = <1 0>;
- };
-
- energy@0 {
- compatible = "arm,vexpress-energy";
- arm,vexpress-sysreg,func = <13 0>, <13 1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml
new file mode 100644
index 000000000000..be6e3b542569
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/vexpress-sysreg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Express system registers
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ This is a system control registers block, providing multiple low level
+ platform functions like board detection and identification, software
+ interrupt generation, MMC and NOR Flash control, etc.
+
+properties:
+ compatible:
+ const: arm,vexpress-sysreg
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+ gpio-controller:
+ deprecated: true
+
+ "#gpio-cells":
+ deprecated: true
+ const: 2
+
+additionalProperties: false
+
+patternProperties:
+ '^gpio@[0-9a-f]+$':
+ type: object
+ additionalProperties: false
+ description:
+ GPIO children
+
+ properties:
+ compatible:
+ enum:
+ - arm,vexpress-sysreg,sys_led
+ - arm,vexpress-sysreg,sys_mci
+ - arm,vexpress-sysreg,sys_flash
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 2
+ description: |
+ The first cell is the function number:
+ for sys_led : 0..7 = LED 0..7
+ for sys_mci : 0 = MMC CARDIN, 1 = MMC WPROT
+ for sys_flash : 0 = NOR FLASH WPn
+ The second cell can take standard GPIO flags.
+
+ reg:
+ maxItems: 1
+
+ required:
+ - compatible
+ - reg
+ - gpio-controller
+ - "#gpio-cells"
+
+required:
+ - compatible
+ - reg
+
+examples:
+ - |
+ sysreg@0 {
+ compatible = "arm,vexpress-sysreg";
+ reg = <0x00000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0 0x1000>;
+
+ v2m_led_gpios: gpio@8 {
+ compatible = "arm,vexpress-sysreg,sys_led";
+ reg = <0x008 4>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/vexpress.txt b/Documentation/devicetree/bindings/arm/vexpress.txt
deleted file mode 100644
index 39844cd0bcce..000000000000
--- a/Documentation/devicetree/bindings/arm/vexpress.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-ARM Versatile Express boards family
------------------------------------
-
-ARM's Versatile Express platform consists of a motherboard and one
-or more daughterboards (tiles). The motherboard provides a set of
-peripherals. Processor and RAM "live" on the tiles.
-
-The motherboard and each core tile should be described by a separate
-Device Tree source file, with the tile's description including
-the motherboard file using a /include/ directive. As the motherboard
-can be initialized in one of two different configurations ("memory
-maps"), care must be taken to include the correct one.
-
-
-Root node
----------
-
-Required properties in the root node:
-- compatible value:
- compatible = "arm,vexpress,<model>", "arm,vexpress";
- where <model> is the full tile model name (as used in the tile's
- Technical Reference Manual), eg.:
- - for Coretile Express A5x2 (V2P-CA5s):
- compatible = "arm,vexpress,v2p-ca5s", "arm,vexpress";
- - for Coretile Express A9x4 (V2P-CA9):
- compatible = "arm,vexpress,v2p-ca9", "arm,vexpress";
- If a tile comes in several variants or can be used in more then one
- configuration, the compatible value should be:
- compatible = "arm,vexpress,<model>,<variant>", \
- "arm,vexpress,<model>", "arm,vexpress";
- eg:
- - Coretile Express A15x2 (V2P-CA15) with Tech Chip 1:
- compatible = "arm,vexpress,v2p-ca15,tc1", \
- "arm,vexpress,v2p-ca15", "arm,vexpress";
- - LogicTile Express 13MG (V2F-2XV6) running Cortex-A7 (3 cores) SMM:
- compatible = "arm,vexpress,v2f-2xv6,ca7x3", \
- "arm,vexpress,v2f-2xv6", "arm,vexpress";
-
-Optional properties in the root node:
-- tile model name (use name from the tile's Technical Reference
- Manual, eg. "V2P-CA5s")
- model = "<model>";
-- tile's HBI number (unique ARM's board model ID, visible on the
- PCB's silkscreen) in hexadecimal transcription:
- arm,hbi = <0xhbi>
- eg:
- - for Coretile Express A5x2 (V2P-CA5s) HBI-0191:
- arm,hbi = <0x191>;
- - Coretile Express A9x4 (V2P-CA9) HBI-0225:
- arm,hbi = <0x225>;
-
-
-CPU nodes
----------
-
-Top-level standard "cpus" node is required. It must contain a node
-with device_type = "cpu" property for every available core, eg.:
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a5";
- reg = <0>;
- };
- };
-
-
-Configuration infrastructure
-----------------------------
-
-The platform has an elaborated configuration system, consisting of
-microcontrollers residing on the mother- and daughterboards known
-as Motherboard/Daughterboard Configuration Controller (MCC and DCC).
-The controllers are responsible for the platform initialization
-(reset generation, flash programming, FPGA bitfiles loading etc.)
-but also control clock generators, voltage regulators, gather
-environmental data like temperature, power consumption etc. Even
-the video output switch (FPGA) is controlled that way.
-
-The controllers are not mapped into normal memory address space
-and must be accessed through bridges - other devices capable
-of generating transactions on the configuration bus.
-
-The nodes describing configuration controllers must define
-the following properties:
-- compatible value:
- compatible = "arm,vexpress,config-bus";
-- bridge phandle:
- arm,vexpress,config-bridge = <phandle>;
-and children describing available functions.
-
-
-Platform topology
------------------
-
-As Versatile Express can be configured in number of physically
-different setups, the device tree should describe platform topology.
-Root node and main motherboard node must define the following
-property, describing physical location of the children nodes:
-- site number:
- arm,vexpress,site = <number>;
- where 0 means motherboard, 1 or 2 are daugtherboard sites,
- 0xf means "master" site (site containing main CPU tile)
-- when daughterboards are stacked on one site, their position
- in the stack be be described with:
- arm,vexpress,position = <number>;
-- when describing tiles consisting more than one DCC, its number
- can be described with:
- arm,vexpress,dcc = <number>;
-
-Any of the numbers above defaults to zero if not defined in
-the node or any of its parent.
-
-
-Motherboard
------------
-
-The motherboard description file provides a single "motherboard" node
-using 2 address cells corresponding to the Static Memory Bus used
-between the motherboard and the tile. The first cell defines the Chip
-Select (CS) line number, the second cell address offset within the CS.
-All interrupt lines between the motherboard and the tile are active
-high and are described using single cell.
-
-Optional properties of the "motherboard" node:
-- motherboard's memory map variant:
- arm,v2m-memory-map = "<name>";
- where name is one of:
- - "rs1" - for RS1 map (i.a. peripherals on CS3); this map is also
- referred to as "ARM Cortex-A Series memory map":
- arm,v2m-memory-map = "rs1";
- When this property is missing, the motherboard is using the original
- memory map (also known as the "Legacy memory map", primarily used
- with the original CoreTile Express A9x4) with peripherals on CS7.
-
-Motherboard .dtsi files provide a set of labelled peripherals that
-can be used to obtain required phandle in the tile's "aliases" node:
-- UARTs, note that the numbers correspond to the physical connectors
- on the motherboard's back panel:
- v2m_serial0, v2m_serial1, v2m_serial2 and v2m_serial3
-- I2C controllers:
- v2m_i2c_dvi and v2m_i2c_pcie
-- SP804 timers:
- v2m_timer01 and v2m_timer23
-
-The tile description should define a "smb" node, describing the
-Static Memory Bus between the tile and motherboard. It must define
-the following properties:
-- "simple-bus" compatible value (to ensure creation of the children)
- compatible = "simple-bus";
-- mapping of the SMB CS/offset addresses into main address space:
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <...>;
-- interrupts mapping:
- #interrupt-cells = <1>;
- interrupt-map-mask = <0 0 63>;
- interrupt-map = <...>;
-
-
-Example of a VE tile description (simplified)
----------------------------------------------
-
-/dts-v1/;
-
-/ {
- model = "V2P-CA5s";
- arm,hbi = <0x225>;
- arm,vexpress,site = <0xf>;
- compatible = "arm,vexpress-v2p-ca5s", "arm,vexpress";
- interrupt-parent = <&gic>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- chosen { };
-
- aliases {
- serial0 = &v2m_serial0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a5";
- reg = <0>;
- };
- };
-
- gic: interrupt-controller@2c001000 {
- compatible = "arm,cortex-a9-gic";
- #interrupt-cells = <3>;
- #address-cells = <0>;
- interrupt-controller;
- reg = <0x2c001000 0x1000>,
- <0x2c000100 0x100>;
- };
-
- dcc {
- compatible = "arm,vexpress,config-bus";
- arm,vexpress,config-bridge = <&v2m_sysreg>;
-
- osc@0 {
- compatible = "arm,vexpress-osc";
- };
- };
-
- smb {
- compatible = "simple-bus";
-
- #address-cells = <2>;
- #size-cells = <1>;
- /* CS0 is visible at 0x08000000 */
- ranges = <0 0 0x08000000 0x04000000>;
-
- #interrupt-cells = <1>;
- interrupt-map-mask = <0 0 63>;
- /* Active high IRQ 0 is connected to GIC's SPI0 */
- interrupt-map = <0 0 0 &gic 0 0 4>;
-
- /include/ "vexpress-v2m-rs1.dtsi"
- };
-};
-
diff --git a/Documentation/devicetree/bindings/arm/vt8500.txt b/Documentation/devicetree/bindings/arm/vt8500.txt
deleted file mode 100644
index 87dc1ddf4770..000000000000
--- a/Documentation/devicetree/bindings/arm/vt8500.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-VIA/Wondermedia VT8500 Platforms Device Tree Bindings
----------------------------------------
-
-Boards with the VIA VT8500 SoC shall have the following properties:
-Required root node property:
-compatible = "via,vt8500";
-
-Boards with the Wondermedia WM8505 SoC shall have the following properties:
-Required root node property:
-compatible = "wm,wm8505";
-
-Boards with the Wondermedia WM8650 SoC shall have the following properties:
-Required root node property:
-compatible = "wm,wm8650";
-
-Boards with the Wondermedia WM8750 SoC shall have the following properties:
-Required root node property:
-compatible = "wm,wm8750";
-
-Boards with the Wondermedia WM8850 SoC shall have the following properties:
-Required root node property:
-compatible = "wm,wm8850";
diff --git a/Documentation/devicetree/bindings/arm/vt8500.yaml b/Documentation/devicetree/bindings/arm/vt8500.yaml
new file mode 100644
index 000000000000..fa47b8989bbf
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500.yaml
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/vt8500.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: VIA/Wondermedia VT8500 Platforms
+
+maintainers:
+ - Alexey Charkov <alchark@gmail.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - enum:
+ - via,vt8500
+ - wm,wm8505
+ - wm,wm8650
+ - wm,wm8750
+ - wm,wm8850
+
+ - description: VIA APC Rock and Paper boards
+ items:
+ - const: via,apc-rock
+ - const: wm,wm8950
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
index c9b9321434ea..f925290d4641 100644
--- a/Documentation/devicetree/bindings/arm/xen.txt
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -7,15 +7,17 @@ the following properties:
compatible = "xen,xen-<version>", "xen,xen";
where <version> is the version of the Xen ABI of the platform.
-- reg: specifies the base physical address and size of a region in
- memory where the grant table should be mapped to, using an
- HYPERVISOR_memory_op hypercall. The memory region is large enough to map
- the whole grant table (it is larger or equal to gnttab_max_grant_frames()).
- This property is unnecessary when booting Dom0 using ACPI.
+- reg: specifies the base physical address and size of the regions in memory
+ where the special resources should be mapped to, using an HYPERVISOR_memory_op
+ hypercall.
+ Region 0 is reserved for mapping grant table, it must be always present.
+ The memory region is large enough to map the whole grant table (it is larger
+ or equal to gnttab_max_grant_frames()).
+ Regions 1...N are extended regions (unused address space) for mapping foreign
+ GFNs and grants, they might be absent if there is nothing to expose.
- interrupts: the interrupt used by Xen to inject event notifications.
A GIC node is also required.
- This property is unnecessary when booting Dom0 using ACPI.
To support UEFI on Xen ARM virtual platforms, Xen populates the FDT "uefi" node
under /hypervisor with following parameters:
@@ -54,7 +56,7 @@ hypervisor {
};
The format and meaning of the "xen,uefi-*" parameters are similar to those in
-Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However
+Documentation/arch/arm/uefi.rst, which are provided by the regular UEFI stub. However
they differ because they are provided by the Xen hypervisor, together with a set
of UEFI runtime services implemented via hypercalls, see
http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html.
diff --git a/Documentation/devicetree/bindings/arm/xilinx.txt b/Documentation/devicetree/bindings/arm/xilinx.txt
deleted file mode 100644
index 26fe5ecc4332..000000000000
--- a/Documentation/devicetree/bindings/arm/xilinx.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Xilinx Zynq Platforms Device Tree Bindings
-
-Boards with Zynq-7000 SOC based on an ARM Cortex A9 processor
-shall have the following properties.
-
-Required root node properties:
- - compatible = "xlnx,zynq-7000";
-
-Additional compatible strings:
-
-- Adapteva Parallella board
- "adapteva,parallella"
-
-- Avnet MicroZed board
- "avnet,zynq-microzed"
- "xlnx,zynq-microzed"
-
-- Avnet ZedBoard board
- "avnet,zynq-zed"
- "xlnx,zynq-zed"
-
-- Digilent Zybo board
- "digilent,zynq-zybo"
-
-- Digilent Zybo Z7 board
- "digilent,zynq-zybo-z7"
-
-- Xilinx CC108 internal board
- "xlnx,zynq-cc108"
-
-- Xilinx ZC702 internal board
- "xlnx,zynq-zc702"
-
-- Xilinx ZC706 internal board
- "xlnx,zynq-zc706"
-
-- Xilinx ZC770 internal board, with different FMC cards
- "xlnx,zynq-zc770-xm010"
- "xlnx,zynq-zc770-xm011"
- "xlnx,zynq-zc770-xm012"
- "xlnx,zynq-zc770-xm013"
-
----------------------------------------------------------------
-
-Xilinx Zynq UltraScale+ MPSoC Platforms Device Tree Bindings
-
-Boards with ZynqMP SOC based on an ARM Cortex A53 processor
-shall have the following properties.
-
-Required root node properties:
- - compatible = "xlnx,zynqmp";
-
-
-Additional compatible strings:
-
-- Xilinx internal board zc1232
- "xlnx,zynqmp-zc1232-revA", "xlnx,zynqmp-zc1232"
-
-- Xilinx internal board zc1254
- "xlnx,zynqmp-zc1254-revA", "xlnx,zynqmp-zc1254"
-
-- Xilinx internal board zc1275
- "xlnx,zynqmp-zc1275-revA", "xlnx,zynqmp-zc1275"
-
-- Xilinx internal board zc1751
- "xlnx,zynqmp-zc1751"
-
-- Xilinx 96boards compatible board zcu100
- "xlnx,zynqmp-zcu100-revC", "xlnx,zynqmp-zcu100"
-
-- Xilinx evaluation board zcu102
- "xlnx,zynqmp-zcu102-revA", "xlnx,zynqmp-zcu102"
- "xlnx,zynqmp-zcu102-revB", "xlnx,zynqmp-zcu102"
- "xlnx,zynqmp-zcu102-rev1.0", "xlnx,zynqmp-zcu102"
-
-- Xilinx evaluation board zcu104
- "xlnx,zynqmp-zcu104-revA", "xlnx,zynqmp-zcu104"
-
-- Xilinx evaluation board zcu106
- "xlnx,zynqmp-zcu106-revA", "xlnx,zynqmp-zcu106"
-
-- Xilinx evaluation board zcu111
- "xlnx,zynqmp-zcu111-revA", "xlnx,zynqmp-zcu111"
diff --git a/Documentation/devicetree/bindings/arm/zte,sysctrl.txt b/Documentation/devicetree/bindings/arm/zte,sysctrl.txt
deleted file mode 100644
index 7e66b7f7ba96..000000000000
--- a/Documentation/devicetree/bindings/arm/zte,sysctrl.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-ZTE sysctrl Registers
-
-Registers for 'zte,zx296702' SoC:
-
-System management required properties:
- - compatible = "zte,sysctrl"
-
-Low power management required properties:
- - compatible = "zte,zx296702-pcu"
-
-Bus matrix required properties:
- - compatible = "zte,zx-bus-matrix"
-
-
-Registers for 'zte,zx296718' SoC:
-
-System management required properties:
- - compatible = "zte,zx296718-aon-sysctrl"
- - compatible = "zte,zx296718-sysctrl"
-
-Example:
-aon_sysctrl: aon-sysctrl@116000 {
- compatible = "zte,zx296718-aon-sysctrl", "syscon";
- reg = <0x116000 0x1000>;
-};
-
-sysctrl: sysctrl@1463000 {
- compatible = "zte,zx296718-sysctrl", "syscon";
- reg = <0x1463000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/arm/zte.txt b/Documentation/devicetree/bindings/arm/zte.txt
deleted file mode 100644
index 340612794a37..000000000000
--- a/Documentation/devicetree/bindings/arm/zte.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-ZTE platforms device tree bindings
-
----------------------------------------
-- ZX296702 board:
- Required root node properties:
- - compatible = "zte,zx296702-ad1", "zte,zx296702"
-
----------------------------------------
-- ZX296718 SoC:
- Required root node properties:
- - compatible = "zte,zx296718"
-
-ZX296718 EVB board:
- - "zte,zx296718-evb"
diff --git a/Documentation/devicetree/bindings/ata/ahci-ceva.txt b/Documentation/devicetree/bindings/ata/ahci-ceva.txt
deleted file mode 100644
index 7561cc4de371..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-ceva.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-Binding for CEVA AHCI SATA Controller
-
-Required properties:
- - reg: Physical base address and size of the controller's register area.
- - compatible: Compatibility string. Must be 'ceva,ahci-1v84'.
- - clocks: Input clock specifier. Refer to common clock bindings.
- - interrupts: Interrupt specifier. Refer to interrupt binding.
- - ceva,p0-cominit-params: OOB timing value for COMINIT parameter for port 0.
- - ceva,p1-cominit-params: OOB timing value for COMINIT parameter for port 1.
- The fields for the above parameter must be as shown below:
- ceva,pN-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
- CINMP : COMINIT Negate Minimum Period.
- CIBGN : COMINIT Burst Gap Nominal.
- CIBGMX: COMINIT Burst Gap Maximum.
- CIBGMN: COMINIT Burst Gap Minimum.
- - ceva,p0-comwake-params: OOB timing value for COMWAKE parameter for port 0.
- - ceva,p1-comwake-params: OOB timing value for COMWAKE parameter for port 1.
- The fields for the above parameter must be as shown below:
- ceva,pN-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
- CWBGMN: COMWAKE Burst Gap Minimum.
- CWBGMX: COMWAKE Burst Gap Maximum.
- CWBGN: COMWAKE Burst Gap Nominal.
- CWNMP: COMWAKE Negate Minimum Period.
- - ceva,p0-burst-params: Burst timing value for COM parameter for port 0.
- - ceva,p1-burst-params: Burst timing value for COM parameter for port 1.
- The fields for the above parameter must be as shown below:
- ceva,pN-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
- BMX: COM Burst Maximum.
- BNM: COM Burst Nominal.
- SFD: Signal Failure Detection value.
- PTST: Partial to Slumber timer value.
- - ceva,p0-retry-params: Retry interval timing value for port 0.
- - ceva,p1-retry-params: Retry interval timing value for port 1.
- The fields for the above parameter must be as shown below:
- ceva,pN-retry-params = /bits/ 16 <RIT RCT>;
- RIT: Retry Interval Timer.
- RCT: Rate Change Timer.
-
-Optional properties:
- - ceva,broken-gen2: limit to gen1 speed instead of gen2.
-
-Examples:
- ahci@fd0c0000 {
- compatible = "ceva,ahci-1v84";
- reg = <0xfd0c0000 0x200>;
- interrupt-parent = <&gic>;
- interrupts = <0 133 4>;
- clocks = <&clkc SATA_CLK_ID>;
- ceva,p0-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
- ceva,p0-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
- ceva,p0-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
- ceva,p0-retry-params = /bits/ 16 <0x0216 0x7F06>;
-
- ceva,p1-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
- ceva,p1-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
- ceva,p1-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
- ceva,p1-retry-params = /bits/ 16 <0x0216 0x7F06>;
- ceva,broken-gen2;
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-common.yaml b/Documentation/devicetree/bindings/ata/ahci-common.yaml
new file mode 100644
index 000000000000..38770c4c85fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ahci-common.yaml
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ahci-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Serial ATA AHCI controllers
+
+maintainers:
+ - Hans de Goede <hdegoede@redhat.com>
+ - Damien Le Moal <dlemoal@kernel.org>
+
+description:
+ This document defines device tree properties for a common AHCI SATA
+ controller implementation. It's hardware interface is supposed to
+ conform to the technical standard defined by Intel (see Serial ATA
+ Advanced Host Controller Interface specification for details). The
+ document doesn't constitute a DT-node binding by itself but merely
+ defines a set of common properties for the AHCI-compatible devices.
+
+select: false
+
+allOf:
+ - $ref: sata-common.yaml#
+
+properties:
+ reg:
+ description:
+ Generic AHCI registers space conforming to the Serial ATA AHCI
+ specification.
+
+ reg-names:
+ description: CSR space IDs
+ contains:
+ const: ahci
+
+ interrupts:
+ description:
+ Generic AHCI state change interrupt. Can be implemented either as a
+ single line attached to the controller or as a set of the signals
+ indicating the particular port events.
+ minItems: 1
+ maxItems: 32
+
+ ahci-supply:
+ description: Power regulator for AHCI controller
+
+ target-supply:
+ description: Power regulator for SATA target device
+
+ phy-supply:
+ description: Power regulator for SATA PHY
+
+ phys:
+ description: Reference to the SATA PHY node
+ maxItems: 1
+
+ phy-names:
+ const: sata-phy
+
+ hba-cap:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Bitfield of the HBA generic platform capabilities like Staggered
+ Spin-up or Mechanical Presence Switch support. It can be used to
+ appropriately initialize the HWinit fields of the HBA CAP register
+ in case if the system firmware hasn't done it.
+
+ ports-implemented:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Mask that indicates which ports the HBA supports. Useful if PI is not
+ programmed by the BIOS, which is true for some embedded SoC's.
+
+patternProperties:
+ "^sata-port@[0-9a-f]+$":
+ $ref: '#/$defs/ahci-port'
+ description:
+ It is optionally possible to describe the ports as sub-nodes so
+ to enable each port independently when dealing with multiple PHYs.
+
+required:
+ - reg
+ - interrupts
+
+additionalProperties: true
+
+$defs:
+ ahci-port:
+ $ref: /schemas/ata/sata-common.yaml#/$defs/sata-port
+
+ properties:
+ reg:
+ description:
+ AHCI SATA port identifier. By design AHCI controller can't have
+ more than 32 ports due to the CAP.NP fields and PI register size
+ constraints.
+ minimum: 0
+ maximum: 31
+
+ phys:
+ description: Individual AHCI SATA port PHY
+ maxItems: 1
+
+ phy-names:
+ description: AHCI SATA port PHY ID
+ const: sata-phy
+
+ target-supply:
+ description: Power regulator for SATA port target device
+
+ hba-port-cap:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Bitfield of the HBA port-specific platform capabilities like Hot
+ plugging, eSATA, FIS-based Switching, etc (see AHCI specification
+ for details). It can be used to initialize the HWinit fields of
+ the PxCMD register in case if the system firmware hasn't done it.
+
+ required:
+ - reg
+
+...
diff --git a/Documentation/devicetree/bindings/ata/ahci-da850.txt b/Documentation/devicetree/bindings/ata/ahci-da850.txt
deleted file mode 100644
index 5f8193417725..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-da850.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Device tree binding for the TI DA850 AHCI SATA Controller
----------------------------------------------------------
-
-Required properties:
- - compatible: must be "ti,da850-ahci"
- - reg: physical base addresses and sizes of the two register regions
- used by the controller: the register map as defined by the
- AHCI 1.1 standard and the Power Down Control Register (PWRDN)
- for enabling/disabling the SATA clock receiver
- - interrupts: interrupt specifier (refer to the interrupt binding)
-
-Example:
-
- sata: sata@218000 {
- compatible = "ti,da850-ahci";
- reg = <0x218000 0x2000>, <0x22c018 0x4>;
- interrupts = <67>;
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-dm816.txt b/Documentation/devicetree/bindings/ata/ahci-dm816.txt
deleted file mode 100644
index f8c535f3541f..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-dm816.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Device tree binding for the TI DM816 AHCI SATA Controller
----------------------------------------------------------
-
-Required properties:
- - compatible: must be "ti,dm816-ahci"
- - reg: physical base address and size of the register region used by
- the controller (as defined by the AHCI 1.1 standard)
- - interrupts: interrupt specifier (refer to the interrupt binding)
- - clocks: list of phandle and clock specifier pairs (or only
- phandles for clock providers with '0' defined for
- #clock-cells); two clocks must be specified: the functional
- clock and an external reference clock
-
-Example:
-
- sata: sata@4a140000 {
- compatible = "ti,dm816-ahci";
- reg = <0x4a140000 0x10000>;
- interrupts = <16>;
- clocks = <&sysclk5_ck>, <&sata_refclk>;
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-fsl-qoriq.txt b/Documentation/devicetree/bindings/ata/ahci-fsl-qoriq.txt
deleted file mode 100644
index 7c3ca0e13de0..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-fsl-qoriq.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Binding for Freescale QorIQ AHCI SATA Controller
-
-Required properties:
- - reg: Physical base address and size of the controller's register area.
- - compatible: Compatibility string. Must be 'fsl,<chip>-ahci', where
- chip could be ls1021a, ls1043a, ls1046a, ls1088a, ls2080a etc.
- - clocks: Input clock specifier. Refer to common clock bindings.
- - interrupts: Interrupt specifier. Refer to interrupt binding.
-
-Optional properties:
- - dma-coherent: Enable AHCI coherent DMA operation.
- - reg-names: register area names when there are more than 1 register area.
-
-Examples:
- sata@3200000 {
- compatible = "fsl,ls1021a-ahci";
- reg = <0x0 0x3200000 0x0 0x10000>;
- interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&platform_clk 1>;
- dma-coherent;
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-mtk.txt b/Documentation/devicetree/bindings/ata/ahci-mtk.txt
deleted file mode 100644
index d2aa696b161b..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-mtk.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-MediaTek Serial ATA controller
-
-Required properties:
- - compatible : Must be "mediatek,<chip>-ahci", "mediatek,mtk-ahci".
- When using "mediatek,mtk-ahci" compatible strings, you
- need SoC specific ones in addition, one of:
- - "mediatek,mt7622-ahci"
- - reg : Physical base addresses and length of register sets.
- - interrupts : Interrupt associated with the SATA device.
- - interrupt-names : Associated name must be: "hostc".
- - clocks : A list of phandle and clock specifier pairs, one for each
- entry in clock-names.
- - clock-names : Associated names must be: "ahb", "axi", "asic", "rbc", "pm".
- - phys : A phandle and PHY specifier pair for the PHY port.
- - phy-names : Associated name must be: "sata-phy".
- - ports-implemented : See ./ahci-platform.txt for details.
-
-Optional properties:
- - power-domains : A phandle and power domain specifier pair to the power
- domain which is responsible for collapsing and restoring
- power to the peripheral.
- - resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
- - reset-names : Associated names must be: "axi", "sw", "reg".
- - mediatek,phy-mode : A phandle to the system controller, used to enable
- SATA function.
-
-Example:
-
- sata: sata@1a200000 {
- compatible = "mediatek,mt7622-ahci",
- "mediatek,mtk-ahci";
- reg = <0 0x1a200000 0 0x1100>;
- interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "hostc";
- clocks = <&pciesys CLK_SATA_AHB_EN>,
- <&pciesys CLK_SATA_AXI_EN>,
- <&pciesys CLK_SATA_ASIC_EN>,
- <&pciesys CLK_SATA_RBC_EN>,
- <&pciesys CLK_SATA_PM_EN>;
- clock-names = "ahb", "axi", "asic", "rbc", "pm";
- phys = <&u3port1 PHY_TYPE_SATA>;
- phy-names = "sata-phy";
- ports-implemented = <0x1>;
- power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
- resets = <&pciesys MT7622_SATA_AXI_BUS_RST>,
- <&pciesys MT7622_SATA_PHY_SW_RST>,
- <&pciesys MT7622_SATA_PHY_REG_RST>;
- reset-names = "axi", "sw", "reg";
- mediatek,phy-mode = <&pciesys>;
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
deleted file mode 100644
index e30fd106df4f..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-* AHCI SATA Controller
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-Each SATA controller should have its own node.
-
-It is possible, but not required, to represent each port as a sub-node.
-It allows to enable each port independently when dealing with multiple
-PHYs.
-
-Required properties:
-- compatible : compatible string, one of:
- - "allwinner,sun4i-a10-ahci"
- - "allwinner,sun8i-r40-ahci"
- - "brcm,iproc-ahci"
- - "hisilicon,hisi-ahci"
- - "cavium,octeon-7130-ahci"
- - "ibm,476gtr-ahci"
- - "marvell,armada-380-ahci"
- - "marvell,armada-3700-ahci"
- - "snps,dwc-ahci"
- - "snps,spear-ahci"
- - "generic-ahci"
-- interrupts : <interrupt mapping for SATA IRQ>
-- reg : <registers mapping>
-
-Please note that when using "generic-ahci" you must also specify a SoC specific
-compatible:
- compatible = "manufacturer,soc-model-ahci", "generic-ahci";
-
-Optional properties:
-- dma-coherent : Present if dma operations are coherent
-- clocks : a list of phandle + clock specifier pairs
-- resets : a list of phandle + reset specifier pairs
-- target-supply : regulator for SATA target power
-- phy-supply : regulator for PHY power
-- phys : reference to the SATA PHY node
-- phy-names : must be "sata-phy"
-- ahci-supply : regulator for AHCI controller
-- ports-implemented : Mask that indicates which ports that the HBA supports
- are available for software to use. Useful if PORTS_IMPL
- is not programmed by the BIOS, which is true with
- some embedded SOC's.
-
-Required properties when using sub-nodes:
-- #address-cells : number of cells to encode an address
-- #size-cells : number of cells representing the size of an address
-
-For allwinner,sun8i-r40-ahci, the reset propertie must be present.
-
-Sub-nodes required properties:
-- reg : the port number
-And at least one of the following properties:
-- phys : reference to the SATA PHY node
-- target-supply : regulator for SATA target power
-
-Examples:
- sata@ffe08000 {
- compatible = "snps,spear-ahci";
- reg = <0xffe08000 0x1000>;
- interrupts = <115>;
- };
-
- ahci: sata@1c18000 {
- compatible = "allwinner,sun4i-a10-ahci";
- reg = <0x01c18000 0x1000>;
- interrupts = <56>;
- clocks = <&pll6 0>, <&ahb_gates 25>;
- target-supply = <&reg_ahci_5v>;
- };
-
-With sub-nodes:
- sata@f7e90000 {
- compatible = "marvell,berlin2q-achi", "generic-ahci";
- reg = <0xe90000 0x1000>;
- interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&chip CLKID_SATA>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- sata0: sata-port@0 {
- reg = <0>;
- phys = <&sata_phy 0>;
- target-supply = <&reg_sata0>;
- };
-
- sata1: sata-port@1 {
- reg = <1>;
- phys = <&sata_phy 1>;
- target-supply = <&reg_sata1>;;
- };
- };
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.yaml b/Documentation/devicetree/bindings/ata/ahci-platform.yaml
new file mode 100644
index 000000000000..cc35cdc02840
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.yaml
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ahci-platform.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AHCI SATA Controller
+
+description: |
+ SATA nodes are defined to describe on-chip Serial ATA controllers.
+ Each SATA controller should have its own node.
+
+ It is possible, but not required, to represent each port as a sub-node.
+ It allows to enable each port independently when dealing with multiple
+ PHYs.
+
+maintainers:
+ - Hans de Goede <hdegoede@redhat.com>
+ - Jens Axboe <axboe@kernel.dk>
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,iproc-ahci
+ - cavium,octeon-7130-ahci
+ - hisilicon,hisi-ahci
+ - ibm,476gtr-ahci
+ - marvell,armada-3700-ahci
+ - marvell,armada-8k-ahci
+ - marvell,berlin2q-ahci
+ - qcom,apq8064-ahci
+ - qcom,ipq806x-ahci
+ - socionext,uniphier-pro4-ahci
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - brcm,iproc-ahci
+ - marvell,armada-8k-ahci
+ - marvell,berlin2-ahci
+ - marvell,berlin2q-ahci
+ - qcom,apq8064-ahci
+ - qcom,ipq806x-ahci
+ - socionext,uniphier-pro4-ahci
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
+ - const: generic-ahci
+ - enum:
+ - cavium,octeon-7130-ahci
+ - hisilicon,hisi-ahci
+ - ibm,476gtr-ahci
+ - marvell,armada-3700-ahci
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ minItems: 1
+ maxItems: 3
+
+ iommus:
+ maxItems: 1
+
+patternProperties:
+ "^sata-port@[0-9a-f]+$":
+ $ref: /schemas/ata/ahci-common.yaml#/$defs/ahci-port
+
+ anyOf:
+ - required: [ phys ]
+ - required: [ target-supply ]
+
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,apq8064-ahci
+ - qcom,ipq806x-ahci
+ then:
+ properties:
+ clocks:
+ minItems: 5
+ clock-names:
+ items:
+ - const: slave_iface
+ - const: iface
+ - const: core
+ - const: rxoob
+ - const: pmalive
+ required:
+ - phys
+ - phy-names
+ - clocks
+ - clock-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: socionext,uniphier-pro4-ahci
+ then:
+ properties:
+ resets:
+ items:
+ - description: reset line for the parent
+ - description: reset line for the glue logic
+ - description: reset line for the controller
+ required:
+ - resets
+ else:
+ if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
+ then:
+ properties:
+ resets:
+ items:
+ - description: reset for the glue logic
+ - description: reset for the controller
+ required:
+ - resets
+ else:
+ properties:
+ resets:
+ maxItems: 1
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ sata@ffe08000 {
+ compatible = "snps,spear-ahci";
+ reg = <0xffe08000 0x1000>;
+ interrupts = <115>;
+ };
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/berlin2q.h>
+ #include <dt-bindings/ata/ahci.h>
+
+ sata@f7e90000 {
+ compatible = "marvell,berlin2q-ahci", "generic-ahci";
+ reg = <0xf7e90000 0x1000>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&chip CLKID_SATA>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hba-cap = <HBA_SMPS>;
+
+ sata0: sata-port@0 {
+ reg = <0>;
+
+ phys = <&sata_phy 0>;
+ target-supply = <&reg_sata0>;
+
+ hba-port-cap = <(HBA_PORT_FBSCP | HBA_PORT_ESP)>;
+ };
+
+ sata1: sata-port@1 {
+ reg = <1>;
+
+ phys = <&sata_phy 1>;
+ target-supply = <&reg_sata1>;
+
+ hba-port-cap = <(HBA_PORT_HPCP | HBA_PORT_MPSP | HBA_PORT_FBSCP)>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/ata/ahci-st.txt b/Documentation/devicetree/bindings/ata/ahci-st.txt
deleted file mode 100644
index 909c9935360d..000000000000
--- a/Documentation/devicetree/bindings/ata/ahci-st.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-STMicroelectronics STi SATA controller
-
-This binding describes a SATA device.
-
-Required properties:
- - compatible : Must be "st,ahci"
- - reg : Physical base addresses and length of register sets
- - interrupts : Interrupt associated with the SATA device
- - interrupt-names : Associated name must be; "hostc"
- - clocks : The phandle for the clock
- - clock-names : Associated name must be; "ahci_clk"
- - phys : The phandle for the PHY port
- - phy-names : Associated name must be; "ahci_phy"
-
-Optional properties:
- - resets : The power-down, soft-reset and power-reset lines of SATA IP
- - reset-names : Associated names must be; "pwr-dwn", "sw-rst" and "pwr-rst"
-
-Example:
-
- /* Example for stih407 family silicon */
- sata0: sata@9b20000 {
- compatible = "st,ahci";
- reg = <0x9b20000 0x1000>;
- interrupts = <GIC_SPI 159 IRQ_TYPE_NONE>;
- interrupt-names = "hostc";
- phys = <&phy_port0 PHY_TYPE_SATA>;
- phy-names = "ahci_phy";
- resets = <&powerdown STIH407_SATA0_POWERDOWN>,
- <&softreset STIH407_SATA0_SOFTRESET>,
- <&softreset STIH407_SATA0_PWR_SOFTRESET>;
- reset-names = "pwr-dwn", "sw-rst", "pwr-rst";
- clocks = <&clk_s_c0_flexgen CLK_ICN_REG>;
- clock-names = "ahci_clk";
- };
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
new file mode 100644
index 000000000000..2011bd03cdcd
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/allwinner,sun4i-a10-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AHCI SATA Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ const: allwinner,sun4i-a10-ahci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: AHCI Bus Clock
+ - description: AHCI Module Clock
+
+ interrupts:
+ maxItems: 1
+
+ target-supply:
+ description: Regulator for SATA target power
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ ahci: sata@1c18000 {
+ compatible = "allwinner,sun4i-a10-ahci";
+ reg = <0x01c18000 0x1000>;
+ interrupts = <56>;
+ clocks = <&pll6 0>, <&ahb_gates 25>;
+ target-supply = <&reg_ahci_5v>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
new file mode 100644
index 000000000000..a2afe2ad6063
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/allwinner,sun8i-r40-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner R40 AHCI SATA Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ const: allwinner,sun8i-r40-ahci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: AHCI Bus Clock
+ - description: AHCI Module Clock
+
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: ahci
+
+ ahci-supply:
+ description: Regulator for the AHCI controller
+
+ phy-supply:
+ description: Regulator for the SATA PHY power
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/sun8i-r40-ccu.h>
+ #include <dt-bindings/reset/sun8i-r40-ccu.h>
+
+ ahci: sata@1c18000 {
+ compatible = "allwinner,sun8i-r40-ahci";
+ reg = <0x01c18000 0x1000>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
+ resets = <&ccu RST_BUS_SATA>;
+ reset-names = "ahci";
+ ahci-supply = <&reg_dldo4>;
+ phy-supply = <&reg_eldo3>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/apm,xgene-ahci.yaml b/Documentation/devicetree/bindings/ata/apm,xgene-ahci.yaml
new file mode 100644
index 000000000000..dc631381f9e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/apm,xgene-ahci.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/apm,xgene-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: APM X-Gene 6.0 Gb/s SATA host controller
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - apm,xgene-ahci
+ - apm,xgene-ahci-v2
+
+ reg:
+ minItems: 4
+ items:
+ - description: AHCI memory resource
+ - description: Host controller core
+ - description: Host controller diagnostic
+ - description: Host controller AXI
+ - description: Host controller MUX
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+allOf:
+ - $ref: ahci-common.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: apm,xgene-ahci
+ then:
+ required:
+ - clocks
+ - phys
+ - phy-names
+
+examples:
+ - |
+ sata@1a400000 {
+ compatible = "apm,xgene-ahci";
+ reg = <0x1a400000 0x1000>,
+ <0x1f220000 0x1000>,
+ <0x1f22d000 0x1000>,
+ <0x1f22e000 0x1000>,
+ <0x1f227000 0x1000>;
+ clocks = <&sataclk 0>;
+ dma-coherent;
+ interrupts = <0x0 0x87 0x4>;
+ phys = <&phy2 0>;
+ phy-names = "sata-phy";
+ };
diff --git a/Documentation/devicetree/bindings/ata/apm-xgene.txt b/Documentation/devicetree/bindings/ata/apm-xgene.txt
deleted file mode 100644
index 02e690a675db..000000000000
--- a/Documentation/devicetree/bindings/ata/apm-xgene.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-* APM X-Gene 6.0 Gb/s SATA host controller nodes
-
-SATA host controller nodes are defined to describe on-chip Serial ATA
-controllers. Each SATA controller (pair of ports) have its own node.
-
-Required properties:
-- compatible : Shall contain:
- * "apm,xgene-ahci"
-- reg : First memory resource shall be the AHCI memory
- resource.
- Second memory resource shall be the host controller
- core memory resource.
- Third memory resource shall be the host controller
- diagnostic memory resource.
- 4th memory resource shall be the host controller
- AXI memory resource.
- 5th optional memory resource shall be the host
- controller MUX memory resource if required.
-- interrupts : Interrupt-specifier for SATA host controller IRQ.
-- clocks : Reference to the clock entry.
-- phys : A list of phandles + phy-specifiers, one for each
- entry in phy-names.
-- phy-names : Should contain:
- * "sata-phy" for the SATA 6.0Gbps PHY
-
-Optional properties:
-- dma-coherent : Present if dma operations are coherent
-- status : Shall be "ok" if enabled or "disabled" if disabled.
- Default is "ok".
-
-Example:
- sataclk: sataclk {
- compatible = "fixed-clock";
- #clock-cells = <1>;
- clock-frequency = <100000000>;
- clock-output-names = "sataclk";
- };
-
- phy2: phy@1f22a000 {
- compatible = "apm,xgene-phy";
- reg = <0x0 0x1f22a000 0x0 0x100>;
- #phy-cells = <1>;
- };
-
- phy3: phy@1f23a000 {
- compatible = "apm,xgene-phy";
- reg = <0x0 0x1f23a000 0x0 0x100>;
- #phy-cells = <1>;
- };
-
- sata2: sata@1a400000 {
- compatible = "apm,xgene-ahci";
- reg = <0x0 0x1a400000 0x0 0x1000>,
- <0x0 0x1f220000 0x0 0x1000>,
- <0x0 0x1f22d000 0x0 0x1000>,
- <0x0 0x1f22e000 0x0 0x1000>,
- <0x0 0x1f227000 0x0 0x1000>;
- interrupts = <0x0 0x87 0x4>;
- dma-coherent;
- clocks = <&sataclk 0>;
- phys = <&phy2 0>;
- phy-names = "sata-phy";
- };
-
- sata3: sata@1a800000 {
- compatible = "apm,xgene-ahci-pcie";
- reg = <0x0 0x1a800000 0x0 0x1000>,
- <0x0 0x1f230000 0x0 0x1000>,
- <0x0 0x1f23d000 0x0 0x1000>,
- <0x0 0x1f23e000 0x0 0x1000>,
- <0x0 0x1f237000 0x0 0x1000>;
- interrupts = <0x0 0x88 0x4>;
- dma-coherent;
- clocks = <&sataclk 0>;
- phys = <&phy3 0>;
- phy-names = "sata-phy";
- };
diff --git a/Documentation/devicetree/bindings/ata/arasan,cf-spear1340.yaml b/Documentation/devicetree/bindings/ata/arasan,cf-spear1340.yaml
new file mode 100644
index 000000000000..4d7017452dda
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/arasan,cf-spear1340.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/arasan,cf-spear1340.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arasan PATA Compact Flash Controller
+
+maintainers:
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+properties:
+ compatible:
+ const: arasan,cf-spear1340
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ arasan,broken-udma:
+ description: UDMA mode is unusable
+ type: boolean
+
+ arasan,broken-mwdma:
+ description: MWDMA mode is unusable
+ type: boolean
+
+ arasan,broken-pio:
+ description: PIO mode is unusable
+ type: boolean
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ items:
+ - const: data
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+allOf:
+ - if:
+ not:
+ required:
+ - arasan,broken-udma
+ - arasan,broken-mwdma
+ then:
+ required:
+ - dmas
+ - dma-names
+
+examples:
+ - |
+ cf@fc000000 {
+ compatible = "arasan,cf-spear1340";
+ reg = <0xfc000000 0x1000>;
+ interrupts = <12>;
+ dmas = <&dma 23>;
+ dma-names = "data";
+ };
diff --git a/Documentation/devicetree/bindings/ata/ata-generic.yaml b/Documentation/devicetree/bindings/ata/ata-generic.yaml
new file mode 100644
index 000000000000..0697927f3d7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ata-generic.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ata-generic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic Parallel ATA Controller
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description:
+ Generic Parallel ATA controllers supporting PIO modes only.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - arm,vexpress-cf
+ - fsl,mpc8349emitx-pata
+ - const: ata-generic
+
+ reg:
+ items:
+ - description: Command interface registers
+ - description: Control interface registers
+
+ reg-shift:
+ enum: [ 1, 2 ]
+
+ interrupts:
+ maxItems: 1
+
+ ata-generic,use16bit:
+ type: boolean
+ description: Use 16-bit accesses instead of 32-bit for data transfers
+
+ pio-mode:
+ description: Maximum ATA PIO transfer mode
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 6
+ default: 0
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ compact-flash@1a000 {
+ compatible = "arm,vexpress-cf", "ata-generic";
+ reg = <0x1a000 0x100>,
+ <0x1a100 0xf00>;
+ reg-shift = <2>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt b/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
deleted file mode 100644
index c1d22b3ae134..000000000000
--- a/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Atmel AT91RM9200 CompactFlash
-
-Required properties:
-- compatible : "atmel,at91rm9200-cf".
-- reg : should specify localbus address and size used.
-- gpios : specifies the gpio pins to control the CF device. Detect
- and reset gpio's are mandatory while irq and vcc gpio's are
- optional and may be set to 0 if not present.
-
-Example:
-compact-flash@50000000 {
- compatible = "atmel,at91rm9200-cf";
- reg = <0x50000000 0x30000000>;
- gpios = <&pioC 13 0 /* irq */
- &pioC 15 0 /* detect */
- 0 /* vcc */
- &pioC 5 0 /* reset */
- >;
-};
diff --git a/Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml b/Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml
new file mode 100644
index 000000000000..9b7ca4759bd7
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/baikal,bt1-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 SoC AHCI SATA controller
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description:
+ AHCI SATA controller embedded into the Baikal-T1 SoC is based on the
+ DWC AHCI SATA v4.10a IP-core.
+
+allOf:
+ - $ref: snps,dwc-ahci-common.yaml#
+
+properties:
+ compatible:
+ const: baikal,bt1-ahci
+
+ clocks:
+ items:
+ - description: Peripheral APB bus clock
+ - description: Application AXI BIU clock
+ - description: SATA Ports reference clock
+
+ clock-names:
+ items:
+ - const: pclk
+ - const: aclk
+ - const: ref
+
+ resets:
+ items:
+ - description: Application AXI BIU domain reset
+ - description: SATA Ports clock domain reset
+
+ reset-names:
+ items:
+ - const: arst
+ - const: ref
+
+ ports-implemented:
+ maximum: 0x3
+
+patternProperties:
+ "^sata-port@[0-1]$":
+ $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port
+
+ properties:
+ reg:
+ minimum: 0
+ maximum: 1
+
+ snps,tx-ts-max:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Due to having AXI3 bus interface utilized the maximum Tx DMA
+ transaction size can't exceed 16 beats (AxLEN[3:0]).
+ enum: [ 1, 2, 4, 8, 16 ]
+
+ snps,rx-ts-max:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Due to having AXI3 bus interface utilized the maximum Rx DMA
+ transaction size can't exceed 16 beats (AxLEN[3:0]).
+ enum: [ 1, 2, 4, 8, 16 ]
+
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ sata@1f050000 {
+ compatible = "baikal,bt1-ahci";
+ reg = <0x1f050000 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupts = <0 64 4>;
+
+ clocks = <&ccu_sys 1>, <&ccu_axi 2>, <&sata_ref_clk>;
+ clock-names = "pclk", "aclk", "ref";
+
+ resets = <&ccu_axi 2>, <&ccu_sys 0>;
+ reset-names = "arst", "ref";
+
+ ports-implemented = <0x3>;
+
+ sata-port@0 {
+ reg = <0>;
+
+ snps,tx-ts-max = <4>;
+ snps,rx-ts-max = <4>;
+ };
+
+ sata-port@1 {
+ reg = <1>;
+
+ snps,tx-ts-max = <4>;
+ snps,rx-ts-max = <4>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
deleted file mode 100644
index 7713a413c6a7..000000000000
--- a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Broadcom SATA3 AHCI Controller
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-Each SATA controller should have its own node.
-
-Required properties:
-- compatible : should be one or more of
- "brcm,bcm7425-ahci"
- "brcm,bcm7445-ahci"
- "brcm,bcm-nsp-ahci"
- "brcm,sata3-ahci"
- "brcm,bcm63138-ahci"
-- reg : register mappings for AHCI and SATA_TOP_CTRL
-- reg-names : "ahci" and "top-ctrl"
-- interrupts : interrupt mapping for SATA IRQ
-
-Also see ahci-platform.txt.
-
-Example:
-
- sata@f045a000 {
- compatible = "brcm,bcm7445-ahci", "brcm,sata3-ahci";
- reg = <0xf045a000 0xa9c>, <0xf0458040 0x24>;
- reg-names = "ahci", "top-ctrl";
- interrupts = <0 30 0>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- sata0: sata-port@0 {
- reg = <0>;
- phys = <&sata_phy 0>;
- };
-
- sata1: sata-port@1 {
- reg = <1>;
- phys = <&sata_phy 1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.yaml b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.yaml
new file mode 100644
index 000000000000..fe7f091e744f
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/brcm,sata-brcm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom SATA3 AHCI Controller
+
+description:
+ SATA nodes are defined to describe on-chip Serial ATA controllers.
+ Each SATA controller should have its own node.
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - brcm,bcm7216-ahci
+ - brcm,bcm7445-ahci
+ - brcm,bcm7425-ahci
+ - brcm,bcm63138-ahci
+ - const: brcm,sata3-ahci
+ - items:
+ - const: brcm,bcm-nsp-ahci
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: ahci
+ - const: top-ctrl
+
+ interrupts:
+ maxItems: 1
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,bcm7216-ahci
+ - brcm,bcm63138-ahci
+then:
+ properties:
+ resets:
+ maxItems: 1
+ reset-names:
+ enum:
+ - rescal
+ - ahci
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - "#address-cells"
+ - "#size-cells"
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ sata@f045a000 {
+ compatible = "brcm,bcm7445-ahci", "brcm,sata3-ahci";
+ reg = <0xf045a000 0xa9c>, <0xf0458040 0x24>;
+ reg-names = "ahci", "top-ctrl";
+ interrupts = <0 30 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sata0: sata-port@0 {
+ reg = <0>;
+ phys = <&sata_phy 0>;
+ };
+
+ sata1: sata-port@1 {
+ reg = <1>;
+ phys = <&sata_phy 1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/ata/cavium,ebt3000-compact-flash.yaml b/Documentation/devicetree/bindings/ata/cavium,ebt3000-compact-flash.yaml
new file mode 100644
index 000000000000..349f289b81e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/cavium,ebt3000-compact-flash.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/cavium,ebt3000-compact-flash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cavium Compact Flash
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+description:
+ The Cavium Compact Flash device is connected to the Octeon Boot Bus, and is
+ thus a child of the Boot Bus device. It can read and write industry standard
+ compact flash devices.
+
+properties:
+ compatible:
+ const: cavium,ebt3000-compact-flash
+
+ reg:
+ description: The base address of the CF chip select banks.
+ items:
+ - description: CF chip select bank 0
+ - description: CF chip select bank 1
+
+ cavium,bus-width:
+ description: The width of the connection to the CF devices.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [8, 16]
+
+ cavium,true-ide:
+ description: True IDE mode when present.
+ type: boolean
+
+ cavium,dma-engine-handle:
+ description: A phandle for the DMA Engine connected to this device.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <1>;
+
+ compact-flash@5,0 {
+ compatible = "cavium,ebt3000-compact-flash";
+ reg = <5 0 0x10000>, <6 0 0x10000>;
+ cavium,bus-width = <16>;
+ cavium,true-ide;
+ cavium,dma-engine-handle = <&dma0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt b/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt
deleted file mode 100644
index 3bacc8e0931e..000000000000
--- a/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Compact Flash
-
-The Cavium Compact Flash device is connected to the Octeon Boot Bus,
-and is thus a child of the Boot Bus device. It can read and write
-industry standard compact flash devices.
-
-Properties:
-- compatible: "cavium,ebt3000-compact-flash";
-
- Compatibility with many Cavium evaluation boards.
-
-- reg: The base address of the CF chip select banks. Depending on
- the device configuration, there may be one or two banks.
-
-- cavium,bus-width: The width of the connection to the CF devices. Valid
- values are 8 and 16.
-
-- cavium,true-ide: Optional, if present the CF connection is in True IDE mode.
-
-- cavium,dma-engine-handle: Optional, a phandle for the DMA Engine connected
- to this device.
-
-Example:
- compact-flash@5,0 {
- compatible = "cavium,ebt3000-compact-flash";
- reg = <5 0 0x10000>, <6 0 0x10000>;
- cavium,bus-width = <16>;
- cavium,true-ide;
- cavium,dma-engine-handle = <&dma0>;
- };
diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
new file mode 100644
index 000000000000..c92341888a28
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ceva,ahci-1v84.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ceva AHCI SATA Controller
+
+maintainers:
+ - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+
+description: |
+ The Ceva SATA controller mostly conforms to the AHCI interface with some
+ special extensions to add functionality, is a high-performance dual-port
+ SATA host controller with an AHCI compliant command layer which supports
+ advanced features such as native command queuing and frame information
+ structure (FIS) based switching for systems employing port multipliers.
+
+properties:
+ compatible:
+ const: ceva,ahci-1v84
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ dma-coherent: true
+
+ interrupts:
+ maxItems: 1
+
+ iommus:
+ maxItems: 4
+
+ power-domains:
+ maxItems: 1
+
+ ceva,p0-cominit-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ OOB timing value for COMINIT parameter for port 0.
+ The fields for the above parameter must be as shown below:-
+ ceva,p0-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
+ items:
+ - description: CINMP - COMINIT Negate Minimum Period.
+ - description: CIBGN - COMINIT Burst Gap Nominal.
+ - description: CIBGMX - COMINIT Burst Gap Maximum.
+ - description: CIBGMN - COMINIT Burst Gap Minimum.
+
+ ceva,p0-comwake-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ OOB timing value for COMWAKE parameter for port 0.
+ The fields for the above parameter must be as shown below:-
+ ceva,p0-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
+ items:
+ - description: CWBGMN - COMWAKE Burst Gap Minimum.
+ - description: CWBGMX - COMWAKE Burst Gap Maximum.
+ - description: CWBGN - COMWAKE Burst Gap Nominal.
+ - description: CWNMP - COMWAKE Negate Minimum Period.
+
+ ceva,p0-burst-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ Burst timing value for COM parameter for port 0.
+ The fields for the above parameter must be as shown below:-
+ ceva,p0-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
+ items:
+ - description: BMX - COM Burst Maximum.
+ - description: BNM - COM Burst Nominal.
+ - description: SFD - Signal Failure Detection value.
+ - description: PTST - Partial to Slumber timer value.
+
+ ceva,p0-retry-params:
+ $ref: /schemas/types.yaml#/definitions/uint16-array
+ description: |
+ Retry interval timing value for port 0.
+ The fields for the above parameter must be as shown below:-
+ ceva,p0-retry-params = /bits/ 16 <RIT RCT>;
+ items:
+ - description: RIT - Retry Interval Timer.
+ - description: RCT - Rate Change Timer.
+
+ ceva,p1-cominit-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ OOB timing value for COMINIT parameter for port 1.
+ The fields for the above parameter must be as shown below:-
+ ceva,p1-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
+ items:
+ - description: CINMP - COMINIT Negate Minimum Period.
+ - description: CIBGN - COMINIT Burst Gap Nominal.
+ - description: CIBGMX - COMINIT Burst Gap Maximum.
+ - description: CIBGMN - COMINIT Burst Gap Minimum.
+
+ ceva,p1-comwake-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ OOB timing value for COMWAKE parameter for port 1.
+ The fields for the above parameter must be as shown below:-
+ ceva,p1-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
+ items:
+ - description: CWBGMN - COMWAKE Burst Gap Minimum.
+ - description: CWBGMX - COMWAKE Burst Gap Maximum.
+ - description: CWBGN - COMWAKE Burst Gap Nominal.
+ - description: CWNMP - COMWAKE Negate Minimum Period.
+
+ ceva,p1-burst-params:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description: |
+ Burst timing value for COM parameter for port 1.
+ The fields for the above parameter must be as shown below:-
+ ceva,p1-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
+ items:
+ - description: BMX - COM Burst Maximum.
+ - description: BNM - COM Burst Nominal.
+ - description: SFD - Signal Failure Detection value.
+ - description: PTST - Partial to Slumber timer value.
+
+ ceva,p1-retry-params:
+ $ref: /schemas/types.yaml#/definitions/uint16-array
+ description: |
+ Retry interval timing value for port 1.
+ The fields for the above parameter must be as shown below:-
+ ceva,pN-retry-params = /bits/ 16 <RIT RCT>;
+ items:
+ - description: RIT - Retry Interval Timer.
+ - description: RCT - Rate Change Timer.
+
+ ceva,broken-gen2:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: |
+ limit to gen1 speed instead of gen2.
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: sata-phy
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - ceva,p0-cominit-params
+ - ceva,p0-comwake-params
+ - ceva,p0-burst-params
+ - ceva,p0-retry-params
+ - ceva,p1-cominit-params
+ - ceva,p1-comwake-params
+ - ceva,p1-burst-params
+ - ceva,p1-retry-params
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/xlnx-zynqmp-power.h>
+ #include <dt-bindings/reset/xlnx-zynqmp-resets.h>
+ #include <dt-bindings/phy/phy.h>
+
+ sata: ahci@fd0c0000 {
+ compatible = "ceva,ahci-1v84";
+ reg = <0xfd0c0000 0x200>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 133 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&zynqmp_clk 22>;
+ ceva,p0-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
+ ceva,p0-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
+ ceva,p0-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
+ ceva,p0-retry-params = /bits/ 16 <0x0216 0x7F06>;
+ ceva,p1-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
+ ceva,p1-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
+ ceva,p1-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
+ ceva,p1-retry-params = /bits/ 16 <0x0216 0x7F06>;
+ ceva,broken-gen2;
+ phys = <&psgtr 1 PHY_TYPE_SATA 1 1>;
+ resets = <&zynqmp_reset ZYNQMP_RESET_SATA>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml b/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml
new file mode 100644
index 000000000000..8130923fdc72
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/cirrus,ep9312-pata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic EP9312 PATA controller
+
+maintainers:
+ - Damien Le Moal <dlemoal@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: cirrus,ep9312-pata
+ - items:
+ - const: cirrus,ep9315-pata
+ - const: cirrus,ep9312-pata
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ ide@800a0000 {
+ compatible = "cirrus,ep9312-pata";
+ reg = <0x800a0000 0x38>;
+ interrupt-parent = <&vic1>;
+ interrupts = <8>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ide_default_pins>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.txt b/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.txt
deleted file mode 100644
index 1c3d3cc70051..000000000000
--- a/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* Cortina Systems Gemini SATA Bridge
-
-The Gemini SATA bridge in a SoC-internal PATA to SATA bridge that
-takes two Faraday Technology FTIDE010 PATA controllers and bridges
-them in different configurations to two SATA ports.
-
-Required properties:
-- compatible: should be
- "cortina,gemini-sata-bridge"
-- reg: registers and size for the block
-- resets: phandles to the reset lines for both SATA bridges
-- reset-names: must be "sata0", "sata1"
-- clocks: phandles to the compulsory peripheral clocks
-- clock-names: must be "SATA0_PCLK", "SATA1_PCLK"
-- syscon: a phandle to the global Gemini system controller
-- cortina,gemini-ata-muxmode: tell the desired multiplexing mode for
- the ATA controller and SATA bridges. Values 0..3:
- Mode 0: ata0 master <-> sata0
- ata1 master <-> sata1
- ata0 slave interface brought out on IDE pads
- Mode 1: ata0 master <-> sata0
- ata1 master <-> sata1
- ata1 slave interface brought out on IDE pads
- Mode 2: ata1 master <-> sata1
- ata1 slave <-> sata0
- ata0 master and slave interfaces brought out
- on IDE pads
- Mode 3: ata0 master <-> sata0
- ata0 slave <-> sata1
- ata1 master and slave interfaces brought out
- on IDE pads
-
-Optional boolean properties:
-- cortina,gemini-enable-ide-pins: enables the PATA to IDE connection.
- The muxmode setting decides whether ATA0 or ATA1 is brought out,
- and whether master, slave or both interfaces get brought out.
-- cortina,gemini-enable-sata-bridge: enables the PATA to SATA bridge
- inside the Gemnini SoC. The Muxmode decides what PATA blocks will
- be muxed out and how.
-
-Example:
-
-sata: sata@46000000 {
- compatible = "cortina,gemini-sata-bridge";
- reg = <0x46000000 0x100>;
- resets = <&rcon 26>, <&rcon 27>;
- reset-names = "sata0", "sata1";
- clocks = <&gcc GEMINI_CLK_GATE_SATA0>,
- <&gcc GEMINI_CLK_GATE_SATA1>;
- clock-names = "SATA0_PCLK", "SATA1_PCLK";
- syscon = <&syscon>;
- cortina,gemini-ata-muxmode = <3>;
- cortina,gemini-enable-ide-pins;
- cortina,gemini-enable-sata-bridge;
-};
diff --git a/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml b/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml
new file mode 100644
index 000000000000..529093666508
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/cortina,gemini-sata-bridge.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/cortina,gemini-sata-bridge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cortina Systems Gemini SATA Bridge
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The Gemini SATA bridge in a SoC-internal PATA to SATA bridge that
+ takes two Faraday Technology FTIDE010 PATA controllers and bridges
+ them in different configurations to two SATA ports.
+
+properties:
+ compatible:
+ const: cortina,gemini-sata-bridge
+
+ reg:
+ maxItems: 1
+
+ resets:
+ maxItems: 2
+ description: phandles to the reset lines for both SATA bridges
+
+ reset-names:
+ items:
+ - const: sata0
+ - const: sata1
+
+ clocks:
+ maxItems: 2
+ description: phandles to the compulsory peripheral clocks
+
+ clock-names:
+ items:
+ - const: SATA0_PCLK
+ - const: SATA1_PCLK
+
+ syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: a phandle to the global Gemini system controller
+
+ cortina,gemini-ata-muxmode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0
+ - 1
+ - 2
+ - 3
+ description: |
+ Tell the desired multiplexing mode for the ATA controller and SATA
+ bridges.
+ Mode 0: ata0 master <-> sata0
+ ata1 master <-> sata1
+ ata0 slave interface brought out on IDE pads
+ Mode 1: ata0 master <-> sata0
+ ata1 master <-> sata1
+ ata1 slave interface brought out on IDE pads
+ Mode 2: ata1 master <-> sata1
+ ata1 slave <-> sata0
+ ata0 master and slave interfaces brought out on IDE pads
+ Mode 3: ata0 master <-> sata0
+ ata0 slave <-> sata1
+ ata1 master and slave interfaces brought out on IDE pads
+
+ cortina,gemini-enable-ide-pins:
+ type: boolean
+ description: Enables the PATA to IDE connection.
+ The muxmode setting decides whether ATA0 or ATA1 is brought out,
+ and whether master, slave or both interfaces get brought out.
+
+ cortina,gemini-enable-sata-bridge:
+ type: boolean
+ description: Enables the PATA to SATA bridge inside the Gemnini SoC.
+ The Muxmode decides what PATA blocks will be muxed out and how.
+
+required:
+ - clocks
+ - clock-names
+ - cortina,gemini-ata-muxmode
+ - resets
+ - reset-names
+ - compatible
+ - reg
+ - syscon
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/cortina,gemini-clock.h>
+ sata@46000000 {
+ compatible = "cortina,gemini-sata-bridge";
+ reg = <0x46000000 0x100>;
+ resets = <&rcon 26>, <&rcon 27>;
+ reset-names = "sata0", "sata1";
+ clocks = <&gcc GEMINI_CLK_GATE_SATA0>,
+ <&gcc GEMINI_CLK_GATE_SATA1>;
+ clock-names = "SATA0_PCLK", "SATA1_PCLK";
+ syscon = <&syscon>;
+ cortina,gemini-ata-muxmode = <3>;
+ cortina,gemini-enable-ide-pins;
+ cortina,gemini-enable-sata-bridge;
+ };
diff --git a/Documentation/devicetree/bindings/ata/exynos-sata.txt b/Documentation/devicetree/bindings/ata/exynos-sata.txt
deleted file mode 100644
index cb48448247ea..000000000000
--- a/Documentation/devicetree/bindings/ata/exynos-sata.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Samsung AHCI SATA Controller
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-Each SATA controller should have its own node.
-
-Required properties:
-- compatible : compatible list, contains "samsung,exynos5-sata"
-- interrupts : <interrupt mapping for SATA IRQ>
-- reg : <registers mapping>
-- samsung,sata-freq : <frequency in MHz>
-- phys : Must contain exactly one entry as specified
- in phy-bindings.txt
-- phy-names : Must be "sata-phy"
-
-Optional properties:
-- clocks : Must contain an entry for each entry in clock-names.
-- clock-names : Shall be "sata" for the external SATA bus clock,
- and "sclk_sata" for the internal controller clock.
-
-Example:
- sata@122f0000 {
- compatible = "snps,dwc-ahci";
- samsung,sata-freq = <66>;
- reg = <0x122f0000 0x1ff>;
- interrupts = <0 115 0>;
- clocks = <&clock 277>, <&clock 143>;
- clock-names = "sata", "sclk_sata";
- phys = <&sata_phy>;
- phy-names = "sata-phy";
- };
diff --git a/Documentation/devicetree/bindings/ata/faraday,ftide010.txt b/Documentation/devicetree/bindings/ata/faraday,ftide010.txt
deleted file mode 100644
index a0c64a29104d..000000000000
--- a/Documentation/devicetree/bindings/ata/faraday,ftide010.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Faraday Technology FTIDE010 PATA controller
-
-This controller is the first Faraday IDE interface block, used in the
-StorLink SL2312 and SL3516, later known as the Cortina Systems Gemini
-platform. The controller can do PIO modes 0 through 4, Multi-word DMA
-(MWDM)modes 0 through 2 and Ultra DMA modes 0 through 6.
-
-On the Gemini platform, this PATA block is accompanied by a PATA to
-SATA bridge in order to support SATA. This is why a phandle to that
-controller is compulsory on that platform.
-
-The timing properties are unique per-SoC, not per-board.
-
-Required properties:
-- compatible: should be one of
- "cortina,gemini-pata", "faraday,ftide010"
- "faraday,ftide010"
-- interrupts: interrupt for the block
-- reg: registers and size for the block
-
-Optional properties:
-- clocks: a SoC clock running the peripheral.
-- clock-names: should be set to "PCLK" for the peripheral clock.
-
-Required properties for "cortina,gemini-pata" compatible:
-- sata: a phande to the Gemini PATA to SATA bridge, see
- cortina,gemini-sata-bridge.txt for details.
-
-Example:
-
-ata@63000000 {
- compatible = "cortina,gemini-pata", "faraday,ftide010";
- reg = <0x63000000 0x100>;
- interrupts = <4 IRQ_TYPE_EDGE_RISING>;
- clocks = <&gcc GEMINI_CLK_GATE_IDE>;
- clock-names = "PCLK";
- sata = <&sata>;
-};
diff --git a/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
new file mode 100644
index 000000000000..fa16f3767c6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/faraday,ftide010.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/faraday,ftide010.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Faraday Technology FTIDE010 PATA controller
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ This controller is the first Faraday IDE interface block, used in the
+ StorLink SL3512 and SL3516, later known as the Cortina Systems Gemini
+ platform. The controller can do PIO modes 0 through 4, Multi-word DMA
+ (MWDM) modes 0 through 2 and Ultra DMA modes 0 through 6.
+
+ On the Gemini platform, this PATA block is accompanied by a PATA to
+ SATA bridge in order to support SATA. This is why a phandle to that
+ controller is compulsory on that platform.
+
+ The timing properties are unique per-SoC, not per-board.
+
+properties:
+ compatible:
+ oneOf:
+ - const: faraday,ftide010
+ - items:
+ - const: cortina,gemini-pata
+ - const: faraday,ftide010
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+
+ clock-names:
+ const: PCLK
+
+ sata:
+ description:
+ phandle to the Gemini PATA to SATA bridge, if available
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+allOf:
+ - $ref: pata-common.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: cortina,gemini-pata
+
+ then:
+ required:
+ - sata
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/cortina,gemini-clock.h>
+
+ ide@63000000 {
+ compatible = "cortina,gemini-pata", "faraday,ftide010";
+ reg = <0x63000000 0x100>;
+ interrupts = <4 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&gcc GEMINI_CLK_GATE_IDE>;
+ clock-names = "PCLK";
+ sata = <&sata>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ ide-port@0 {
+ reg = <0>;
+ };
+ ide-port@1 {
+ reg = <1>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/fsl,ahci.yaml b/Documentation/devicetree/bindings/ata/fsl,ahci.yaml
new file mode 100644
index 000000000000..ea4428bc1742
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/fsl,ahci.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/fsl,ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale QorIQ AHCI SATA Controller
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - description: SATA controller for ls1012a
+ items:
+ - const: fsl,ls1012a-ahci
+ - const: fsl,ls1043a-ahci
+ - enum:
+ - fsl,ls1021a-ahci
+ - fsl,ls1028a-ahci
+ - fsl,ls1043a-ahci
+ - fsl,ls1046a-ahci
+ - fsl,ls1088a-ahci
+ - fsl,ls2080a-ahci
+ - fsl,lx2160a-ahci
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: ahci
+ - const: sata-ecc
+ minItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ sata@3200000 {
+ compatible = "fsl,ls1021a-ahci";
+ reg = <0x3200000 0x10000>;
+ interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&platform_clk 1>;
+ dma-coherent;
+ };
diff --git a/Documentation/devicetree/bindings/ata/fsl,imx-pata.yaml b/Documentation/devicetree/bindings/ata/fsl,imx-pata.yaml
new file mode 100644
index 000000000000..324e2413bba8
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/fsl,imx-pata.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/fsl,imx-pata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX PATA Controller
+
+maintainers:
+ - Animesh Agarwal <animeshagarwal28@gmail.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - fsl,imx31-pata
+ - fsl,imx51-pata
+ - const: fsl,imx27-pata
+ - const: fsl,imx27-pata
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: PATA Controller interrupts
+
+ clocks:
+ items:
+ - description: PATA Controller clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ pata: pata@83fe0000 {
+ compatible = "fsl,imx51-pata", "fsl,imx27-pata";
+ reg = <0x83fe0000 0x4000>;
+ interrupts = <70>;
+ clocks = <&clks 161>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/fsl,pq-sata.yaml b/Documentation/devicetree/bindings/ata/fsl,pq-sata.yaml
new file mode 100644
index 000000000000..1d19ee832f0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/fsl,pq-sata.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/fsl,pq-sata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale 8xxx/3.0 Gb/s SATA nodes
+
+maintainers:
+ - J. Neuschäfer <j.ne@posteo.net>
+
+description:
+ SATA nodes are defined to describe on-chip Serial ATA controllers.
+ Each SATA controller should have its own node.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - fsl,mpc8377-sata
+ - fsl,mpc8536-sata
+ - fsl,mpc8315-sata
+ - fsl,mpc8379-sata
+ - const: fsl,pq-sata
+ - const: fsl,pq-sata-v2
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ cell-index:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 3, 4]
+ description: |
+ 1 for controller @ 0x18000
+ 2 for controller @ 0x19000
+ 3 for controller @ 0x1a000
+ 4 for controller @ 0x1b000
+
+required:
+ - compatible
+ - interrupts
+ - cell-index
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ sata@18000 {
+ compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+ reg = <0x18000 0x1000>;
+ cell-index = <1>;
+ interrupts = <44 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/fsl-sata.txt b/Documentation/devicetree/bindings/ata/fsl-sata.txt
deleted file mode 100644
index fd63bb3becc9..000000000000
--- a/Documentation/devicetree/bindings/ata/fsl-sata.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Freescale 8xxx/3.0 Gb/s SATA nodes
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-Each SATA port should have its own node.
-
-Required properties:
-- compatible : compatible list, contains 2 entries, first is
- "fsl,CHIP-sata", where CHIP is the processor
- (mpc8315, mpc8379, etc.) and the second is
- "fsl,pq-sata"
-- interrupts : <interrupt mapping for SATA IRQ>
-- cell-index : controller index.
- 1 for controller @ 0x18000
- 2 for controller @ 0x19000
- 3 for controller @ 0x1a000
- 4 for controller @ 0x1b000
-
-Optional properties:
-- reg : <registers mapping>
-
-Example:
- sata@18000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x18000 0x1000>;
- cell-index = <1>;
- interrupts = <2c 8>;
- interrupt-parent = < &ipic >;
- };
diff --git a/Documentation/devicetree/bindings/ata/imx-pata.txt b/Documentation/devicetree/bindings/ata/imx-pata.txt
deleted file mode 100644
index f1172f00188a..000000000000
--- a/Documentation/devicetree/bindings/ata/imx-pata.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* Freescale i.MX PATA Controller
-
-Required properties:
-- compatible: "fsl,imx27-pata"
-- reg: Address range of the PATA Controller
-- interrupts: The interrupt of the PATA Controller
-- clocks: the clocks for the PATA Controller
-
-Example:
-
- pata: pata@83fe0000 {
- compatible = "fsl,imx51-pata", "fsl,imx27-pata";
- reg = <0x83fe0000 0x4000>;
- interrupts = <70>;
- clocks = <&clks 161>;
- };
diff --git a/Documentation/devicetree/bindings/ata/imx-sata.txt b/Documentation/devicetree/bindings/ata/imx-sata.txt
deleted file mode 100644
index 781f88751762..000000000000
--- a/Documentation/devicetree/bindings/ata/imx-sata.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-* Freescale i.MX AHCI SATA Controller
-
-The Freescale i.MX SATA controller mostly conforms to the AHCI interface
-with some special extensions at integration level.
-
-Required properties:
-- compatible : should be one of the following:
- - "fsl,imx53-ahci" for i.MX53 SATA controller
- - "fsl,imx6q-ahci" for i.MX6Q SATA controller
- - "fsl,imx6qp-ahci" for i.MX6QP SATA controller
-- interrupts : interrupt mapping for SATA IRQ
-- reg : registers mapping
-- clocks : list of clock specifiers, must contain an entry for each
- required entry in clock-names
-- clock-names : should include "sata", "sata_ref" and "ahb" entries
-
-Optional properties:
-- fsl,transmit-level-mV : transmit voltage level, in millivolts.
-- fsl,transmit-boost-mdB : transmit boost level, in milli-decibels
-- fsl,transmit-atten-16ths : transmit attenuation, in 16ths
-- fsl,receive-eq-mdB : receive equalisation, in milli-decibels
- Please refer to the technical documentation or the driver source code
- for the list of legal values for these options.
-- fsl,no-spread-spectrum : disable spread-spectrum clocking on the SATA
- link.
-
-Examples:
-
-sata@2200000 {
- compatible = "fsl,imx6q-ahci";
- reg = <0x02200000 0x4000>;
- interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX6QDL_CLK_SATA>,
- <&clks IMX6QDL_CLK_SATA_REF_100M>,
- <&clks IMX6QDL_CLK_AHB>;
- clock-names = "sata", "sata_ref", "ahb";
-};
diff --git a/Documentation/devicetree/bindings/ata/imx-sata.yaml b/Documentation/devicetree/bindings/ata/imx-sata.yaml
new file mode 100644
index 000000000000..31c43374763a
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/imx-sata.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/imx-sata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX AHCI SATA Controller
+
+maintainers:
+ - Shawn Guo <shawn.guo@linaro.org>
+
+description: |
+ The Freescale i.MX SATA controller mostly conforms to the AHCI interface
+ with some special extensions at integration level.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx53-ahci
+ - fsl,imx6q-ahci
+ - fsl,imx6qp-ahci
+ - fsl,imx8qm-ahci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ items:
+ - description: sata clock
+ - description: sata reference clock
+ - description: ahb clock
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: sata
+ - const: sata_ref
+ - const: ahb
+
+ fsl,transmit-level-mV:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: transmit voltage level, in millivolts.
+
+ fsl,transmit-boost-mdB:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: transmit boost level, in milli-decibels.
+
+ fsl,transmit-atten-16ths:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: transmit attenuation, in 16ths.
+
+ fsl,receive-eq-mdB:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: receive equalisation, in milli-decibels.
+
+ fsl,no-spread-spectrum:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: if present, disable spread-spectrum clocking on the SATA link.
+
+ phys:
+ items:
+ - description: phandle to SATA PHY.
+ Since "REXT" pin is only present for first lane of i.MX8QM PHY, it's
+ calibration result will be stored, passed through second lane, and
+ shared with all three lanes PHY. The first two lanes PHY are used as
+ calibration PHYs, although only the third lane PHY is used by SATA.
+ - description: phandle to the first lane PHY of i.MX8QM.
+ - description: phandle to the second lane PHY of i.MX8QM.
+
+ phy-names:
+ items:
+ - const: sata-phy
+ - const: cali-phy0
+ - const: cali-phy1
+
+ power-domains:
+ maxItems: 1
+
+ target-supply:
+ description: Power regulator for the SATA target device.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx53-ahci
+ - fsl,imx6q-ahci
+ - fsl,imx6qp-ahci
+ then:
+ properties:
+ clock-names:
+ minItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qm-ahci
+ then:
+ properties:
+ clock-names:
+ minItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx6qdl-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ sata@2200000 {
+ compatible = "fsl,imx6q-ahci";
+ reg = <0x02200000 0x4000>;
+ interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6QDL_CLK_SATA>,
+ <&clks IMX6QDL_CLK_SATA_REF_100M>,
+ <&clks IMX6QDL_CLK_AHB>;
+ clock-names = "sata", "sata_ref", "ahb";
+ };
diff --git a/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml b/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml
new file mode 100644
index 000000000000..378692010c56
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/intel,ixp4xx-compact-flash.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/intel,ixp4xx-compact-flash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel IXP4xx CompactFlash Card Controller
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The IXP4xx network processors have a CompactFlash interface that presents
+ a CompactFlash card to the system as a true IDE (parallel ATA) device. The
+ device is always connected to the expansion bus of the IXP4xx SoCs using one
+ or two chip select areas and address translating logic on the board. The
+ node must be placed inside a chip select node on the IXP4xx expansion bus.
+
+properties:
+ compatible:
+ const: intel,ixp4xx-compact-flash
+
+ reg:
+ items:
+ - description: Command interface registers
+ - description: Control interface registers
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+allOf:
+ - $ref: pata-common.yaml#
+ - $ref: /schemas/memory-controllers/intel,ixp4xx-expansion-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ bus@c4000000 {
+ compatible = "intel,ixp43x-expansion-bus-controller", "syscon";
+ reg = <0xc4000000 0x1000>;
+ native-endian;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0x0 0x50000000 0x01000000>, <1 0x0 0x51000000 0x01000000>;
+ dma-ranges = <0 0x0 0x50000000 0x01000000>, <1 0x0 0x51000000 0x01000000>;
+ ide@1,0 {
+ compatible = "intel,ixp4xx-compact-flash";
+ reg = <1 0x00000000 0x1000>, <1 0x00040000 0x1000>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <12 IRQ_TYPE_EDGE_RISING>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/marvell,orion-sata.yaml b/Documentation/devicetree/bindings/ata/marvell,orion-sata.yaml
new file mode 100644
index 000000000000..f656ea9223d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/marvell,orion-sata.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/marvell,orion-sata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Orion SATA
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+allOf:
+ - $ref: sata-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - marvell,orion-sata
+ - marvell,armada-370-sata
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 8
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: '0'
+ - const: '1'
+ - const: '2'
+ - const: '3'
+ - const: '4'
+ - const: '5'
+ - const: '6'
+ - const: '7'
+
+ interrupts:
+ maxItems: 1
+
+ nr-ports:
+ description:
+ Number of SATA ports in use.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 8
+
+ phys:
+ minItems: 1
+ maxItems: 8
+
+ phy-names:
+ minItems: 1
+ items:
+ - const: port0
+ - const: port1
+ - const: port2
+ - const: port3
+ - const: port4
+ - const: port5
+ - const: port6
+ - const: port7
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - nr-ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ sata@80000 {
+ compatible = "marvell,orion-sata";
+ reg = <0x80000 0x5000>;
+ interrupts = <21>;
+ phys = <&sata_phy0>, <&sata_phy1>;
+ phy-names = "port0", "port1";
+ nr-ports = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/marvell.txt b/Documentation/devicetree/bindings/ata/marvell.txt
deleted file mode 100644
index b460edd12766..000000000000
--- a/Documentation/devicetree/bindings/ata/marvell.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-* Marvell Orion SATA
-
-Required Properties:
-- compatibility : "marvell,orion-sata" or "marvell,armada-370-sata"
-- reg : Address range of controller
-- interrupts : Interrupt controller is using
-- nr-ports : Number of SATA ports in use.
-
-Optional Properties:
-- phys : List of phandles to sata phys
-- phy-names : Should be "0", "1", etc, one number per phandle
-
-Example:
-
- sata@80000 {
- compatible = "marvell,orion-sata";
- reg = <0x80000 0x5000>;
- interrupts = <21>;
- phys = <&sata_phy0>, <&sata_phy1>;
- phy-names = "0", "1";
- nr-ports = <2>;
- }
diff --git a/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml b/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml
new file mode 100644
index 000000000000..a34bd2e9c352
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/mediatek,mtk-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Serial ATA controller
+
+maintainers:
+ - Ryder Lee <ryder.lee@mediatek.com>
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-ahci
+ - const: mediatek,mtk-ahci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ const: hostc
+
+ clocks:
+ maxItems: 5
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: axi
+ - const: asic
+ - const: rbc
+ - const: pm
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 3
+
+ reset-names:
+ items:
+ - const: axi
+ - const: sw
+ - const: reg
+
+ mediatek,phy-mode:
+ description: System controller phandle, used to enable SATA function
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+ - ports-implemented
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt7622-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/phy/phy.h>
+ #include <dt-bindings/power/mt7622-power.h>
+ #include <dt-bindings/reset/mt7622-reset.h>
+
+ sata@1a200000 {
+ compatible = "mediatek,mt7622-ahci", "mediatek,mtk-ahci";
+ reg = <0x1a200000 0x1100>;
+ interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "hostc";
+ clocks = <&pciesys CLK_SATA_AHB_EN>,
+ <&pciesys CLK_SATA_AXI_EN>,
+ <&pciesys CLK_SATA_ASIC_EN>,
+ <&pciesys CLK_SATA_RBC_EN>,
+ <&pciesys CLK_SATA_PM_EN>;
+ clock-names = "ahb", "axi", "asic", "rbc", "pm";
+ phys = <&u3port1 PHY_TYPE_SATA>;
+ phy-names = "sata-phy";
+ ports-implemented = <0x1>;
+ power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
+ resets = <&pciesys MT7622_SATA_AXI_BUS_RST>,
+ <&pciesys MT7622_SATA_PHY_SW_RST>,
+ <&pciesys MT7622_SATA_PHY_REG_RST>;
+ reset-names = "axi", "sw", "reg";
+ mediatek,phy-mode = <&pciesys>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml b/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml
new file mode 100644
index 000000000000..a17297cbefcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/nvidia,tegra-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Tegra AHCI SATA Controller
+
+maintainers:
+ - Thierry Reding <thierry.reding@gmail.com>
+ - Jonathan Hunter <jonathanh@nvidia.com>
+
+properties:
+ compatible:
+ enum:
+ - nvidia,tegra124-ahci
+ - nvidia,tegra132-ahci
+ - nvidia,tegra210-ahci
+ - nvidia,tegra186-ahci
+
+ reg:
+ minItems: 2
+ items:
+ - description: AHCI registers
+ - description: SATA configuration and IPFS registers
+ - description: SATA AUX registers
+
+ interrupts:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: sata
+ - const: sata-oob
+
+ clocks:
+ maxItems: 2
+
+ reset-names:
+ minItems: 2
+ items:
+ - const: sata
+ - const: sata-cold
+ - const: sata-oob
+
+ resets:
+ minItems: 2
+ maxItems: 3
+
+ iommus:
+ maxItems: 1
+
+ interconnect-names:
+ items:
+ - const: dma-mem
+ - const: write
+
+ interconnects:
+ maxItems: 2
+
+ power-domains:
+ items:
+ - description: SAX power-domain
+
+ phy-names:
+ items:
+ - const: sata-0
+
+ phys:
+ maxItems: 1
+
+ hvdd-supply:
+ description: SATA HVDD regulator supply.
+
+ vddio-supply:
+ description: SATA VDDIO regulator supply.
+
+ avdd-supply:
+ description: SATA AVDD regulator supply.
+
+ target-5v-supply:
+ description: SATA 5V power regulator supply.
+
+ target-12v-supply:
+ description: SATA 12V power regulator supply.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clock-names
+ - clocks
+ - reset-names
+ - resets
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra124-ahci
+ - nvidia,tegra132-ahci
+ then:
+ properties:
+ reg:
+ maxItems: 2
+ reset-names:
+ minItems: 3
+ resets:
+ minItems: 3
+ required:
+ - phys
+ - phy-names
+ - hvdd-supply
+ - vddio-supply
+ - avdd-supply
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra210-ahci
+ then:
+ properties:
+ reg:
+ minItems: 3
+ reset-names:
+ minItems: 3
+ resets:
+ minItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra186-ahci
+ then:
+ properties:
+ reg:
+ minItems: 3
+ reset-names:
+ maxItems: 2
+ resets:
+ maxItems: 2
+ required:
+ - iommus
+ - interconnect-names
+ - interconnects
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/tegra210-car.h>
+ #include <dt-bindings/reset/tegra210-car.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ sata@70020000 {
+ compatible = "nvidia,tegra210-ahci";
+ reg = <0x70027000 0x00002000>, /* AHCI */
+ <0x70020000 0x00007000>, /* SATA */
+ <0x70001100 0x00010000>; /* SATA AUX */
+ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SATA>,
+ <&tegra_car TEGRA210_CLK_SATA_OOB>;
+ clock-names = "sata", "sata-oob";
+ resets = <&tegra_car 124>,
+ <&tegra_car 129>,
+ <&tegra_car 123>;
+ reset-names = "sata", "sata-cold", "sata-oob";
+ };
diff --git a/Documentation/devicetree/bindings/ata/nvidia,tegra124-ahci.txt b/Documentation/devicetree/bindings/ata/nvidia,tegra124-ahci.txt
deleted file mode 100644
index 12ab2f723eb0..000000000000
--- a/Documentation/devicetree/bindings/ata/nvidia,tegra124-ahci.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Tegra SoC SATA AHCI controller
-
-Required properties :
-- compatible : Must be one of:
- - Tegra124 : "nvidia,tegra124-ahci"
- - Tegra132 : "nvidia,tegra132-ahci", "nvidia,tegra124-ahci"
- - Tegra210 : "nvidia,tegra210-ahci"
-- reg : Should contain 2 entries:
- - AHCI register set (SATA BAR5)
- - SATA register set
-- interrupts : Defines the interrupt used by SATA
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- - sata
- - sata-oob
-- resets : Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
- - sata
- - sata-oob
- - sata-cold
-- phys : Must contain an entry for each entry in phy-names.
- See ../phy/phy-bindings.txt for details.
-- phy-names : Must include the following entries:
- - For Tegra124 and Tegra132:
- - sata-phy : XUSB PADCTL SATA PHY
-- For Tegra124 and Tegra132:
- - hvdd-supply : Defines the SATA HVDD regulator
- - vddio-supply : Defines the SATA VDDIO regulator
- - avdd-supply : Defines the SATA AVDD regulator
- - target-5v-supply : Defines the SATA 5V power regulator
- - target-12v-supply : Defines the SATA 12V power regulator
-
-Optional properties:
-- reg :
- - AUX register set
-- clock-names :
- - cml1 :
- cml1 clock should be defined here if the PHY driver
- doesn't manage them. If it does, they should not be.
-- phy-names :
- - For T210:
- - sata-phy
diff --git a/Documentation/devicetree/bindings/ata/pata-arasan.txt b/Documentation/devicetree/bindings/ata/pata-arasan.txt
deleted file mode 100644
index 872edc105680..000000000000
--- a/Documentation/devicetree/bindings/ata/pata-arasan.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-* ARASAN PATA COMPACT FLASH CONTROLLER
-
-Required properties:
-- compatible: "arasan,cf-spear1340"
-- reg: Address range of the CF registers
-- interrupt: Should contain the CF interrupt number
-- clock-frequency: Interface clock rate, in Hz, one of
- 25000000
- 33000000
- 40000000
- 50000000
- 66000000
- 75000000
- 100000000
- 125000000
- 150000000
- 166000000
- 200000000
-
-Optional properties:
-- arasan,broken-udma: if present, UDMA mode is unusable
-- arasan,broken-mwdma: if present, MWDMA mode is unusable
-- arasan,broken-pio: if present, PIO mode is unusable
-- dmas: one DMA channel, as described in bindings/dma/dma.txt
- required unless both UDMA and MWDMA mode are broken
-- dma-names: the corresponding channel name, must be "data"
-
-Example:
-
- cf@fc000000 {
- compatible = "arasan,cf-spear1340";
- reg = <0xfc000000 0x1000>;
- interrupt-parent = <&vic1>;
- interrupts = <12>;
- dmas = <&dma-controller 23>;
- dma-names = "data";
- };
diff --git a/Documentation/devicetree/bindings/ata/pata-common.yaml b/Documentation/devicetree/bindings/ata/pata-common.yaml
new file mode 100644
index 000000000000..4e867dd4d402
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/pata-common.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/pata-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Parallel AT attachment (PATA) controllers
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ This document defines device tree properties common to most Parallel
+ ATA (PATA, also known as IDE) AT attachment storage devices.
+ It doesn't constitute a device tree binding specification by itself but is
+ meant to be referenced by device tree bindings.
+
+ The PATA (IDE) controller-specific device tree bindings are responsible for
+ defining whether each property is required or optional.
+
+properties:
+ $nodename:
+ pattern: "^ide(@.*)?$"
+ description:
+ Specifies the host controller node. PATA host controller nodes are named
+ "ide".
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+patternProperties:
+ "^ide-port@[0-1]$":
+ description: |
+ DT nodes for ports connected on the PATA host. The master drive will have
+ ID number 0 and the slave drive will have ID number 1. The PATA port
+ nodes will be named "ide-port".
+ type: object
+ additionalProperties: false
+
+ properties:
+ reg:
+ minimum: 0
+ maximum: 1
+ description:
+ The ID number of the drive port, 0 for the master port and 1 for the
+ slave port.
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/ata/qcom-sata.txt b/Documentation/devicetree/bindings/ata/qcom-sata.txt
deleted file mode 100644
index 094de91cd9fd..000000000000
--- a/Documentation/devicetree/bindings/ata/qcom-sata.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-* Qualcomm AHCI SATA Controller
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-Each SATA controller should have its own node.
-
-Required properties:
-- compatible : compatible list, must contain "generic-ahci"
-- interrupts : <interrupt mapping for SATA IRQ>
-- reg : <registers mapping>
-- phys : Must contain exactly one entry as specified
- in phy-bindings.txt
-- phy-names : Must be "sata-phy"
-
-Required properties for "qcom,ipq806x-ahci" compatible:
-- clocks : Must contain an entry for each entry in clock-names.
-- clock-names : Shall be:
- "slave_iface" - Fabric port AHB clock for SATA
- "iface" - AHB clock
- "core" - core clock
- "rxoob" - RX out-of-band clock
- "pmalive" - Power Module Alive clock
-- assigned-clocks : Shall be:
- SATA_RXOOB_CLK
- SATA_PMALIVE_CLK
-- assigned-clock-rates : Shall be:
- 100Mhz (100000000) for SATA_RXOOB_CLK
- 100Mhz (100000000) for SATA_PMALIVE_CLK
-
-Example:
- sata@29000000 {
- compatible = "qcom,ipq806x-ahci", "generic-ahci";
- reg = <0x29000000 0x180>;
-
- interrupts = <0 209 0x0>;
-
- clocks = <&gcc SFAB_SATA_S_H_CLK>,
- <&gcc SATA_H_CLK>,
- <&gcc SATA_A_CLK>,
- <&gcc SATA_RXOOB_CLK>,
- <&gcc SATA_PMALIVE_CLK>;
- clock-names = "slave_iface", "iface", "core",
- "rxoob", "pmalive";
- assigned-clocks = <&gcc SATA_RXOOB_CLK>, <&gcc SATA_PMALIVE_CLK>;
- assigned-clock-rates = <100000000>, <100000000>;
-
- phys = <&sata_phy>;
- phy-names = "sata-phy";
- };
diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
new file mode 100644
index 000000000000..fe0909554790
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/renesas,rcar-sata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car Serial-ATA Interface
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - renesas,sata-r8a7779 # R-Car H1
+ - items:
+ - enum:
+ - renesas,sata-r8a7742 # RZ/G1H
+ - renesas,sata-r8a7790-es1 # R-Car H2 ES1
+ - renesas,sata-r8a7790 # R-Car H2 other than ES1
+ - renesas,sata-r8a7791 # R-Car M2-W
+ - renesas,sata-r8a7793 # R-Car M2-N
+ - const: renesas,rcar-gen2-sata # generic R-Car Gen2
+ - items:
+ - enum:
+ - renesas,sata-r8a774b1 # RZ/G2N
+ - renesas,sata-r8a774e1 # RZ/G2H
+ - renesas,sata-r8a7795 # R-Car H3
+ - renesas,sata-r8a77965 # R-Car M3-N
+ - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ iommus:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - power-domains
+
+if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: renesas,sata-r8a7779
+then:
+ required:
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a7791-cpg-mssr.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/r8a7791-sysc.h>
+
+ sata@ee300000 {
+ compatible = "renesas,sata-r8a7791", "renesas,rcar-gen2-sata";
+ reg = <0xee300000 0x200000>;
+ interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD 815>;
+ power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+ resets = <&cpg 815>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml
new file mode 100644
index 000000000000..b5ecaabfe2e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/rockchip,dwc-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DWC AHCI SATA controller for Rockchip devices
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description:
+ This document defines device tree bindings for the Synopsys DWC
+ implementation of the AHCI SATA controller found in Rockchip
+ devices.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3568-dwc-ahci
+ - rockchip,rk3576-dwc-ahci
+ - rockchip,rk3588-dwc-ahci
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - rockchip,rk3568-dwc-ahci
+ - rockchip,rk3576-dwc-ahci
+ - rockchip,rk3588-dwc-ahci
+ - const: snps,dwc-ahci
+
+ ports-implemented:
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ sata-port@0:
+ $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port
+
+ properties:
+ reg:
+ const: 0
+
+ unevaluatedProperties: false
+
+patternProperties:
+ "^sata-port@[1-9a-e]$": false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - ports-implemented
+
+allOf:
+ - $ref: snps,dwc-ahci-common.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3588-dwc-ahci
+ then:
+ properties:
+ clocks:
+ maxItems: 5
+ clock-names:
+ items:
+ - const: sata
+ - const: pmalive
+ - const: rxoob
+ - const: ref
+ - const: asic
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3568-dwc-ahci
+ - rockchip,rk3576-dwc-ahci
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: sata
+ - const: pmalive
+ - const: rxoob
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/ata/ahci.h>
+ #include <dt-bindings/phy/phy.h>
+
+ sata@fe210000 {
+ compatible = "rockchip,rk3588-dwc-ahci", "snps,dwc-ahci";
+ reg = <0xfe210000 0x1000>;
+ clocks = <&cru ACLK_SATA0>, <&cru CLK_PMALIVE0>,
+ <&cru CLK_RXOOB0>, <&cru CLK_PIPEPHY0_REF>,
+ <&cru CLK_PIPEPHY0_PIPE_ASIC_G>;
+ clock-names = "sata", "pmalive", "rxoob", "ref", "asic";
+ interrupts = <GIC_SPI 273 IRQ_TYPE_LEVEL_HIGH 0>;
+ ports-implemented = <0x1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sata-port@0 {
+ reg = <0>;
+ hba-port-cap = <HBA_PORT_FBSCP>;
+ phys = <&combphy0_ps PHY_TYPE_SATA>;
+ phy-names = "sata-phy";
+ snps,rx-ts-max = <32>;
+ snps,tx-ts-max = <32>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/sata-common.yaml b/Documentation/devicetree/bindings/ata/sata-common.yaml
new file mode 100644
index 000000000000..58c9342b9925
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/sata-common.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/sata-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Serial AT attachment (SATA) controllers
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ This document defines device tree properties common to most Serial
+ AT attachment (SATA) storage devices. It doesn't constitute a device tree
+ binding specification by itself but is meant to be referenced by device
+ tree bindings.
+
+ The SATA controller-specific device tree bindings are responsible for
+ defining whether each property is required or optional.
+
+properties:
+ $nodename:
+ pattern: "^sata(@.*)?$"
+ description:
+ Specifies the host controller node. SATA host controller nodes are named
+ "sata"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ dma-coherent: true
+
+patternProperties:
+ "^sata-port@[0-9a-e]$":
+ $ref: '#/$defs/sata-port'
+ description: |
+ DT nodes for ports connected on the SATA host. The SATA port
+ nodes will be named "sata-port".
+
+additionalProperties: true
+
+$defs:
+ sata-port:
+ type: object
+
+ properties:
+ reg:
+ minimum: 0
+ description:
+ The ID number of the SATA port. Aside with being directly used,
+ each port can have a Port Multiplier attached thus allowing to
+ access more than one drive by means of a single SATA port.
+
+...
diff --git a/Documentation/devicetree/bindings/ata/sata_highbank.txt b/Documentation/devicetree/bindings/ata/sata_highbank.txt
deleted file mode 100644
index aa83407cb7a4..000000000000
--- a/Documentation/devicetree/bindings/ata/sata_highbank.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-* Calxeda AHCI SATA Controller
-
-SATA nodes are defined to describe on-chip Serial ATA controllers.
-The Calxeda SATA controller mostly conforms to the AHCI interface
-with some special extensions to add functionality.
-Each SATA controller should have its own node.
-
-Required properties:
-- compatible : compatible list, contains "calxeda,hb-ahci"
-- interrupts : <interrupt mapping for SATA IRQ>
-- reg : <registers mapping>
-
-Optional properties:
-- dma-coherent : Present if dma operations are coherent
-- calxeda,port-phys : phandle-combophy and lane assignment, which maps each
- SATA port to a combophy and a lane within that
- combophy
-- calxeda,sgpio-gpio: phandle-gpio bank, bit offset, and default on or off,
- which indicates that the driver supports SGPIO
- indicator lights using the indicated GPIOs
-- calxeda,led-order : a u32 array that map port numbers to offsets within the
- SGPIO bitstream.
-- calxeda,tx-atten : a u32 array that contains TX attenuation override
- codes, one per port. The upper 3 bytes are always
- 0 and thus ignored.
-- calxeda,pre-clocks : a u32 that indicates the number of additional clock
- cycles to transmit before sending an SGPIO pattern
-- calxeda,post-clocks: a u32 that indicates the number of additional clock
- cycles to transmit after sending an SGPIO pattern
-
-Example:
- sata@ffe08000 {
- compatible = "calxeda,hb-ahci";
- reg = <0xffe08000 0x1000>;
- interrupts = <115>;
- dma-coherent;
- calxeda,port-phys = <&combophy5 0 &combophy0 0 &combophy0 1
- &combophy0 2 &combophy0 3>;
- calxeda,sgpio-gpio =<&gpioh 5 1 &gpioh 6 1 &gpioh 7 1>;
- calxeda,led-order = <4 0 1 2 3>;
- calxeda,tx-atten = <0xff 22 0xff 0xff 23>;
- calxeda,pre-clocks = <10>;
- calxeda,post-clocks = <0>;
- };
diff --git a/Documentation/devicetree/bindings/ata/sata_highbank.yaml b/Documentation/devicetree/bindings/ata/sata_highbank.yaml
new file mode 100644
index 000000000000..48bdca0f5577
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/sata_highbank.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/sata_highbank.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Calxeda AHCI SATA Controller
+
+description: |
+ The Calxeda SATA controller mostly conforms to the AHCI interface
+ with some special extensions to add functionality, to map GPIOs for
+ activity LEDs and for mapping the ComboPHYs.
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+properties:
+ compatible:
+ const: calxeda,hb-ahci
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dma-coherent: true
+
+ calxeda,pre-clocks:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Indicates the number of additional clock cycles to transmit before
+ sending an SGPIO pattern.
+
+ calxeda,post-clocks:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Indicates the number of additional clock cycles to transmit after
+ sending an SGPIO pattern.
+
+ calxeda,led-order:
+ description: Maps port numbers to offsets within the SGPIO bitstream.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+
+ calxeda,port-phys:
+ description: |
+ phandle-combophy and lane assignment, which maps each SATA port to a
+ combophy and a lane within that combophy
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 8
+ items:
+ maxItems: 2
+
+ calxeda,tx-atten:
+ description: |
+ Contains TX attenuation override codes, one per port.
+ The upper 24 bits of each entry are always 0 and thus ignored.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+
+ calxeda,sgpio-gpio:
+ maxItems: 3
+ description: |
+ phandle-gpio bank, bit offset, and default on or off, which indicates
+ that the driver supports SGPIO indicator lights using the indicated
+ GPIOs.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ sata@ffe08000 {
+ compatible = "calxeda,hb-ahci";
+ reg = <0xffe08000 0x1000>;
+ interrupts = <115>;
+ dma-coherent;
+ calxeda,port-phys = <&combophy5 0>, <&combophy0 0>, <&combophy0 1>,
+ <&combophy0 2>, <&combophy0 3>;
+ calxeda,sgpio-gpio = <&gpioh 5 1>, <&gpioh 6 1>, <&gpioh 7 1>;
+ calxeda,led-order = <4 0 1 2 3>;
+ calxeda,tx-atten = <0xff 22 0xff 0xff 23>;
+ calxeda,pre-clocks = <10>;
+ calxeda,post-clocks = <0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt
deleted file mode 100644
index 4268e17d2411..000000000000
--- a/Documentation/devicetree/bindings/ata/sata_rcar.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-* Renesas R-Car SATA
-
-Required properties:
-- compatible : should contain one or more of the following:
- - "renesas,sata-r8a7779" for R-Car H1
- - "renesas,sata-r8a7790-es1" for R-Car H2 ES1
- - "renesas,sata-r8a7790" for R-Car H2 other than ES1
- - "renesas,sata-r8a7791" for R-Car M2-W
- - "renesas,sata-r8a7793" for R-Car M2-N
- - "renesas,sata-r8a7795" for R-Car H3
- - "renesas,sata-r8a77965" for R-Car M3-N
- - "renesas,rcar-gen2-sata" for a generic R-Car Gen2 compatible device
- - "renesas,rcar-gen3-sata" for a generic R-Car Gen3 compatible device
- - "renesas,rcar-sata" is deprecated
-
- When compatible with the generic version nodes
- must list the SoC-specific version corresponding
- to the platform first followed by the generic
- version.
-
-- reg : address and length of the SATA registers;
-- interrupts : must consist of one interrupt specifier.
-- clocks : must contain a reference to the functional clock.
-
-Example:
-
-sata0: sata@ee300000 {
- compatible = "renesas,sata-r8a7791", "renesas,rcar-gen2-sata";
- reg = <0 0xee300000 0 0x2000>;
- interrupt-parent = <&gic>;
- interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&mstp8_clks R8A7791_CLK_SATA0>;
-};
diff --git a/Documentation/devicetree/bindings/ata/snps,dwc-ahci-common.yaml b/Documentation/devicetree/bindings/ata/snps,dwc-ahci-common.yaml
new file mode 100644
index 000000000000..34c5bf65b02d
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/snps,dwc-ahci-common.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/snps,dwc-ahci-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DWC AHCI SATA controller properties
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description:
+ This document defines device tree schema for the generic Synopsys DWC
+ AHCI controller properties.
+
+select: false
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description:
+ Basic DWC AHCI SATA clock sources like application AXI/AHB BIU clock,
+ PM-alive clock, RxOOB detection clock, embedded PHYs reference (Rx/Tx)
+ clock, etc.
+ minItems: 1
+ maxItems: 6
+
+ clock-names:
+ minItems: 1
+ maxItems: 6
+ items:
+ oneOf:
+ - description: Application APB/AHB/AXI BIU clock
+ enum:
+ - pclk
+ - aclk
+ - hclk
+ - sata
+ - description: Power Module keep-alive clock
+ const: pmalive
+ - description: RxOOB detection clock
+ const: rxoob
+ - description: PHY Transmit Clock
+ const: asic
+ - description: PHY Receive Clock
+ const: rbc
+ - description: SATA Ports reference clock
+ const: ref
+
+ resets:
+ description:
+ At least basic application and reference clock domains resets are
+ normally supported by the DWC AHCI SATA controller.
+ minItems: 1
+ maxItems: 4
+
+ reset-names:
+ minItems: 1
+ maxItems: 4
+ items:
+ oneOf:
+ - description: Application AHB/AXI BIU clock domain reset control
+ enum:
+ - arst
+ - hrst
+ - description: Power Module keep-alive clock domain reset control
+ const: pmalive
+ - description: RxOOB detection clock domain reset control
+ const: rxoob
+ - description: Reference clock domain reset control
+ const: ref
+
+patternProperties:
+ "^sata-port@[0-9a-e]$":
+ $ref: '#/$defs/dwc-ahci-port'
+
+additionalProperties: true
+
+$defs:
+ dwc-ahci-port:
+ $ref: /schemas/ata/ahci-common.yaml#/$defs/ahci-port
+
+ properties:
+ reg:
+ minimum: 0
+ maximum: 7
+
+ snps,tx-ts-max:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Maximal size of Tx DMA transactions in FIFO words
+ enum: [ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 ]
+
+ snps,rx-ts-max:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Maximal size of Rx DMA transactions in FIFO words
+ enum: [ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 ]
+
+...
diff --git a/Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml
new file mode 100644
index 000000000000..4c848fcb5a5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/snps,dwc-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DWC AHCI SATA controller
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description:
+ This document defines device tree bindings for the generic Synopsys DWC
+ implementation of the AHCI SATA controller.
+
+select:
+ properties:
+ compatible:
+ enum:
+ - snps,dwc-ahci
+ - snps,spear-ahci
+ required:
+ - compatible
+
+allOf:
+ - $ref: snps,dwc-ahci-common.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - description: Synopsys AHCI SATA-compatible devices
+ const: snps,dwc-ahci
+ - description: SPEAr1340 AHCI SATA device
+ const: snps,spear-ahci
+
+patternProperties:
+ "^sata-port@[0-9a-e]$":
+ $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port
+
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/ata/ahci.h>
+
+ sata@122f0000 {
+ compatible = "snps,dwc-ahci";
+ reg = <0x122F0000 0x1ff>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&clock1>, <&clock2>;
+ clock-names = "aclk", "ref";
+
+ phys = <&sata_phy>;
+ phy-names = "sata-phy";
+
+ ports-implemented = <0x1>;
+
+ sata-port@0 {
+ reg = <0>;
+
+ hba-port-cap = <HBA_PORT_FBSCP>;
+
+ snps,tx-ts-max = <512>;
+ snps,rx-ts-max = <512>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/ata/st,ahci.yaml b/Documentation/devicetree/bindings/ata/st,ahci.yaml
new file mode 100644
index 000000000000..6e8e4b4f3d6c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/st,ahci.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/st,ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STi SATA controller
+
+maintainers:
+ - Patrice Chotard <patrice.chotard@foss.st.com>
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ compatible:
+ const: st,ahci
+
+ interrupt-names:
+ items:
+ - const: hostc
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ahci_clk
+
+ resets:
+ items:
+ - description: Power-down line
+ - description: Soft-reset line
+ - description: Power-reset line
+
+ reset-names:
+ items:
+ - const: pwr-dwn
+ - const: sw-rst
+ - const: pwr-rst
+
+required:
+ - compatible
+ - interrupt-names
+ - phys
+ - phy-names
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/phy/phy.h>
+ #include <dt-bindings/reset/stih407-resets.h>
+ #include <dt-bindings/clock/stih407-clks.h>
+
+ sata@9b20000 {
+ compatible = "st,ahci";
+ reg = <0x9b20000 0x1000>;
+ interrupts = <GIC_SPI 159 IRQ_TYPE_NONE>;
+ interrupt-names = "hostc";
+ phys = <&phy_port0 PHY_TYPE_SATA>;
+ phy-names = "sata-phy";
+ resets = <&powerdown STIH407_SATA0_POWERDOWN>,
+ <&softreset STIH407_SATA0_SOFTRESET>,
+ <&softreset STIH407_SATA0_PWR_SOFTRESET>;
+ reset-names = "pwr-dwn", "sw-rst", "pwr-rst";
+ clocks = <&clk_s_c0_flexgen CLK_ICN_REG>;
+ clock-names = "ahci_clk";
+ };
diff --git a/Documentation/devicetree/bindings/ata/ti,da850-ahci.yaml b/Documentation/devicetree/bindings/ata/ti,da850-ahci.yaml
new file mode 100644
index 000000000000..ce13c76bdffb
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ti,da850-ahci.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ti,da850-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI DA850 AHCI SATA Controller
+
+maintainers:
+ - Animesh Agarwal <animeshagarwal28@gmail.com>
+
+properties:
+ compatible:
+ const: ti,da850-ahci
+
+ reg:
+ items:
+ - description: Address and size of the register map as defined by the AHCI 1.1 standard.
+ - description:
+ Address and size of Power Down Control Register (PWRDN) for enabling/disabling the SATA clock
+ receiver.
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ sata@218000 {
+ compatible = "ti,da850-ahci";
+ reg = <0x218000 0x2000>, <0x22c018 0x4>;
+ interrupts = <67>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/ti,dm816-ahci.yaml b/Documentation/devicetree/bindings/ata/ti,dm816-ahci.yaml
new file mode 100644
index 000000000000..d0ff9e78afe6
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ti,dm816-ahci.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ata/ti,dm816-ahci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI DM816 AHCI SATA Controller
+
+maintainers:
+ - Bartosz Golaszewski <brgl@bgdev.pl>
+
+allOf:
+ - $ref: ahci-common.yaml#
+
+properties:
+ compatible:
+ const: ti,dm816-ahci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: functional clock
+ - description: external reference clock
+
+ ti,hwmods:
+ const: sata
+
+required:
+ - compatible
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ sata@4a140000 {
+ compatible = "ti,dm816-ahci";
+ reg = <0x4a140000 0x10000>;
+ interrupts = <16>;
+ clocks = <&sysclk5_ck>, <&sata_refclk>;
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml b/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
new file mode 100644
index 000000000000..439f7b811a94
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/arm,versatile-lcd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile Character LCD
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+ - Rob Herring <robh@kernel.org>
+
+description:
+ This binding defines the character LCD interface found on ARM Versatile AB
+ and PB reference platforms.
+
+properties:
+ compatible:
+ const: arm,versatile-lcd
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ lcd@10008000 {
+ compatible = "arm,versatile-lcd";
+ reg = <0x10008000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt b/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
deleted file mode 100644
index e28e2aac47f1..000000000000
--- a/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-ARM Versatile Character LCD
------------------------------------------------------
-This binding defines the character LCD interface found on ARM Versatile AB
-and PB reference platforms.
-
-Required properties:
-- compatible : "arm,versatile-clcd"
-- reg : Location and size of character LCD registers
-
-Optional properties:
-- interrupts - single interrupt for character LCD. The character LCD can
- operate in polled mode without an interrupt.
-
-Example:
- lcd@10008000 {
- compatible = "arm,versatile-lcd";
- reg = <0x10008000 0x1000>;
- };
diff --git a/Documentation/devicetree/bindings/auxdisplay/gpio-7-segment.yaml b/Documentation/devicetree/bindings/auxdisplay/gpio-7-segment.yaml
new file mode 100644
index 000000000000..328954893c64
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/gpio-7-segment.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/gpio-7-segment.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO based LED segment display
+
+maintainers:
+ - Chris Packham <chris.packham@alliedtelesis.co.nz>
+
+properties:
+ compatible:
+ const: gpio-7-segment
+
+ segment-gpios:
+ description: |
+ An array of GPIOs one per segment. The first GPIO corresponds to the A
+ segment, the seventh GPIO corresponds to the G segment. Some LED blocks
+ also have a decimal point which can be specified as an optional eighth
+ segment.
+
+ -a-
+ | |
+ f b
+ | |
+ -g-
+ | |
+ e c
+ | |
+ -d- dp
+
+ minItems: 7
+ maxItems: 8
+
+required:
+ - segment-gpios
+
+additionalProperties: false
+
+examples:
+ - |
+
+ #include <dt-bindings/gpio/gpio.h>
+
+ led-7seg {
+ compatible = "gpio-7-segment";
+ segment-gpios = <&gpio 0 GPIO_ACTIVE_LOW>,
+ <&gpio 1 GPIO_ACTIVE_LOW>,
+ <&gpio 2 GPIO_ACTIVE_LOW>,
+ <&gpio 3 GPIO_ACTIVE_LOW>,
+ <&gpio 4 GPIO_ACTIVE_LOW>,
+ <&gpio 5 GPIO_ACTIVE_LOW>,
+ <&gpio 6 GPIO_ACTIVE_LOW>;
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt
deleted file mode 100644
index 2aa24b889923..000000000000
--- a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-DT bindings for the Hitachi HD44780 Character LCD Controller
-
-The Hitachi HD44780 Character LCD Controller is commonly used on character LCDs
-that can display one or more lines of text. It exposes an M6800 bus interface,
-which can be used in either 4-bit or 8-bit mode.
-
-Required properties:
- - compatible: Must contain "hit,hd44780",
- - data-gpios: Must contain an array of either 4 or 8 GPIO specifiers,
- referring to the GPIO pins connected to the data signal lines DB0-DB7
- (8-bit mode) or DB4-DB7 (4-bit mode) of the LCD Controller's bus interface,
- - enable-gpios: Must contain a GPIO specifier, referring to the GPIO pin
- connected to the "E" (Enable) signal line of the LCD Controller's bus
- interface,
- - rs-gpios: Must contain a GPIO specifier, referring to the GPIO pin
- connected to the "RS" (Register Select) signal line of the LCD Controller's
- bus interface,
- - display-height-chars: Height of the display, in character cells,
- - display-width-chars: Width of the display, in character cells.
-
-Optional properties:
- - rw-gpios: Must contain a GPIO specifier, referring to the GPIO pin
- connected to the "RW" (Read/Write) signal line of the LCD Controller's bus
- interface,
- - backlight-gpios: Must contain a GPIO specifier, referring to the GPIO pin
- used for enabling the LCD's backlight,
- - internal-buffer-width: Internal buffer width (default is 40 for displays
- with 1 or 2 lines, and display-width-chars for displays with more than 2
- lines).
-
-Example:
-
- auxdisplay {
- compatible = "hit,hd44780";
-
- data-gpios = <&hc595 0 GPIO_ACTIVE_HIGH>,
- <&hc595 1 GPIO_ACTIVE_HIGH>,
- <&hc595 2 GPIO_ACTIVE_HIGH>,
- <&hc595 3 GPIO_ACTIVE_HIGH>;
- enable-gpios = <&hc595 4 GPIO_ACTIVE_HIGH>;
- rs-gpios = <&hc595 5 GPIO_ACTIVE_HIGH>;
-
- display-height-chars = <2>;
- display-width-chars = <16>;
- };
diff --git a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
new file mode 100644
index 000000000000..3ca0e9863d83
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/hit,hd44780.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hitachi HD44780 Character LCD Controller
+
+maintainers:
+ - Geert Uytterhoeven <geert@linux-m68k.org>
+
+description:
+ The Hitachi HD44780 Character LCD Controller is commonly used on character
+ LCDs that can display one or more lines of text. It exposes an M6800 bus
+ interface, which can be used in either 4-bit or 8-bit mode. By using a
+ GPIO expander it is possible to use the driver with one of the popular I2C
+ expander boards based on the PCF8574 available for these displays. For
+ an example see below.
+
+properties:
+ compatible:
+ const: hit,hd44780
+
+ data-gpios:
+ description:
+ GPIO pins connected to the data signal lines DB0-DB7 (8-bit mode) or
+ DB4-DB7 (4-bit mode) of the LCD Controller's bus interface.
+ oneOf:
+ - maxItems: 4
+ - maxItems: 8
+
+ enable-gpios:
+ description:
+ GPIO pin connected to the "E" (Enable) signal line of the LCD
+ Controller's bus interface.
+ maxItems: 1
+
+ rs-gpios:
+ description:
+ GPIO pin connected to the "RS" (Register Select) signal line of the LCD
+ Controller's bus interface.
+ maxItems: 1
+
+ rw-gpios:
+ description:
+ GPIO pin connected to the "RW" (Read/Write) signal line of the LCD
+ Controller's bus interface.
+ maxItems: 1
+
+ backlight-gpios:
+ description: GPIO pin used for enabling the LCD's backlight.
+ maxItems: 1
+
+ display-height-chars:
+ description: Height of the display, in character cells,
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 4
+
+ display-width-chars:
+ description: Width of the display, in character cells.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 64
+
+ internal-buffer-width:
+ description:
+ Internal buffer width (default is 40 for displays with 1 or 2 lines, and
+ display-width-chars for displays with more than 2 lines).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 64
+
+required:
+ - compatible
+ - data-gpios
+ - enable-gpios
+ - rs-gpios
+ - display-height-chars
+ - display-width-chars
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ display-controller {
+ compatible = "hit,hd44780";
+
+ data-gpios = <&hc595 0 GPIO_ACTIVE_HIGH>,
+ <&hc595 1 GPIO_ACTIVE_HIGH>,
+ <&hc595 2 GPIO_ACTIVE_HIGH>,
+ <&hc595 3 GPIO_ACTIVE_HIGH>;
+ enable-gpios = <&hc595 4 GPIO_ACTIVE_HIGH>;
+ rs-gpios = <&hc595 5 GPIO_ACTIVE_HIGH>;
+
+ display-height-chars = <2>;
+ display-width-chars = <16>;
+ };
+
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pcf8574: gpio-expander@27 {
+ compatible = "nxp,pcf8574";
+ reg = <0x27>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ };
+
+ display-controller {
+ compatible = "hit,hd44780";
+ display-height-chars = <2>;
+ display-width-chars = <16>;
+ data-gpios = <&pcf8574 4 GPIO_ACTIVE_HIGH>,
+ <&pcf8574 5 GPIO_ACTIVE_HIGH>,
+ <&pcf8574 6 GPIO_ACTIVE_HIGH>,
+ <&pcf8574 7 GPIO_ACTIVE_HIGH>;
+ enable-gpios = <&pcf8574 2 GPIO_ACTIVE_HIGH>;
+ rs-gpios = <&pcf8574 0 GPIO_ACTIVE_HIGH>;
+ rw-gpios = <&pcf8574 1 GPIO_ACTIVE_HIGH>;
+ backlight-gpios = <&pcf8574 3 GPIO_ACTIVE_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
new file mode 100644
index 000000000000..b90eec2077b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/holtek,ht16k33.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Holtek HT16K33 RAM mapping 16*8 LED controller with keyscan
+
+maintainers:
+ - Robin van der Gracht <robin@protonic.nl>
+
+allOf:
+ - $ref: /schemas/input/matrix-keymap.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - adafruit,3108 # 0.56" 4-Digit 7-Segment FeatherWing Display (Red)
+ - adafruit,3130 # 0.54" Quad Alphanumeric FeatherWing Display (Red)
+ - const: holtek,ht16k33
+
+ - const: holtek,ht16k33 # Generic 16*8 LED controller with dot-matrix display
+
+ reg:
+ maxItems: 1
+
+ refresh-rate-hz:
+ maxItems: 1
+ description: Display update interval in Hertz for dot-matrix displays
+
+ interrupts:
+ maxItems: 1
+
+ debounce-delay-ms:
+ maxItems: 1
+ description: Debouncing interval time in milliseconds
+
+ linux,keymap: true
+
+ linux,no-autorepeat:
+ type: boolean
+ description: Disable keyrepeat
+
+ default-brightness-level:
+ minimum: 1
+ maximum: 16
+ default: 16
+ description: Initial brightness level
+
+ led:
+ type: object
+ $ref: /schemas/leds/common.yaml#
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+
+if:
+ properties:
+ compatible:
+ const: holtek,ht16k33
+then:
+ required:
+ - refresh-rate-hz
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/input/input.h>
+ #include <dt-bindings/leds/common.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ display-controller@70 {
+ compatible = "holtek,ht16k33";
+ reg = <0x70>;
+ refresh-rate-hz = <20>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <5 (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING)>;
+ debounce-delay-ms = <50>;
+ linux,keymap = <MATRIX_KEY(2, 0, KEY_F6)>,
+ <MATRIX_KEY(3, 0, KEY_F8)>,
+ <MATRIX_KEY(4, 0, KEY_F10)>,
+ <MATRIX_KEY(5, 0, KEY_F4)>,
+ <MATRIX_KEY(6, 0, KEY_F2)>,
+ <MATRIX_KEY(2, 1, KEY_F5)>,
+ <MATRIX_KEY(3, 1, KEY_F7)>,
+ <MATRIX_KEY(4, 1, KEY_F9)>,
+ <MATRIX_KEY(5, 1, KEY_F3)>,
+ <MATRIX_KEY(6, 1, KEY_F1)>;
+
+ led {
+ color = <LED_COLOR_ID_RED>;
+ function = LED_FUNCTION_BACKLIGHT;
+ linux,default-trigger = "backlight";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml b/Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml
new file mode 100644
index 000000000000..55e9831b3f67
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/img,ascii-lcd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASCII LCD displays on Imagination Technologies boards
+
+maintainers:
+ - Paul Burton <paulburton@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - img,boston-lcd
+ - mti,malta-lcd
+ - mti,sead3-lcd
+
+ reg:
+ maxItems: 1
+
+ offset:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Offset in bytes to the LCD registers within the system controller
+
+required:
+ - compatible
+
+oneOf:
+ - required:
+ - reg
+ - required:
+ - offset
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: img,boston-lcd
+then:
+ required:
+ - reg
+else:
+ required:
+ - offset
+
+additionalProperties: false
+
+examples:
+ - |
+ lcd: lcd@17fff000 {
+ compatible = "img,boston-lcd";
+ reg = <0x17fff000 0x8>;
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt b/Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
deleted file mode 100644
index b69bb68992fd..000000000000
--- a/Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Binding for ASCII LCD displays on Imagination Technologies boards
-
-Required properties:
-- compatible : should be one of:
- "img,boston-lcd"
- "mti,malta-lcd"
- "mti,sead3-lcd"
-
-Required properties for "img,boston-lcd":
-- reg : memory region locating the device registers
-
-Required properties for "mti,malta-lcd" or "mti,sead3-lcd":
-- regmap: phandle of the system controller containing the LCD registers
-- offset: offset in bytes to the LCD registers within the system controller
-
-The layout of the registers & properties of the display are determined
-from the compatible string, making this binding somewhat trivial.
diff --git a/Documentation/devicetree/bindings/auxdisplay/maxim,max6959.yaml b/Documentation/devicetree/bindings/auxdisplay/maxim,max6959.yaml
new file mode 100644
index 000000000000..20dd9e8c8190
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/maxim,max6959.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/maxim,max6959.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MAX6958/6959 7-segment LED display controller
+
+maintainers:
+ - Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+description:
+ The Maxim MAX6958/6959 7-segment LED display controller provides
+ an I2C interface to up to four 7-segment LED digits. The MAX6959,
+ in comparison to MAX6958, adds input support. Type of the chip can
+ be autodetected via specific register read, and hence the features
+ may be enabled in the driver at run-time, in case they are requested
+ via Device Tree. A given hardware is simple and does not provide
+ any additional pins, such as reset or power enable.
+
+properties:
+ compatible:
+ const: maxim,max6959
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ display-controller@38 {
+ compatible = "maxim,max6959";
+ reg = <0x38>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/auxdisplay/modtronix,lcd2s.yaml b/Documentation/devicetree/bindings/auxdisplay/modtronix,lcd2s.yaml
new file mode 100644
index 000000000000..a1d55a2634a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/auxdisplay/modtronix,lcd2s.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/auxdisplay/modtronix,lcd2s.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Modtronix engineering LCD2S Character LCD Display
+
+maintainers:
+ - Lars Poeschel <poeschel@lemonage.de>
+
+description:
+ The LCD2S is a Character LCD Display manufactured by Modtronix Engineering.
+ The display supports a serial I2C and SPI interface. The driver currently
+ only supports the I2C interface.
+
+properties:
+ compatible:
+ const: modtronix,lcd2s
+
+ reg:
+ maxItems: 1
+ description:
+ I2C bus address of the display.
+
+ display-height-chars:
+ description: Height of the display, in character cells.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 4
+
+ display-width-chars:
+ description: Width of the display, in character cells.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 16
+ maximum: 20
+
+required:
+ - compatible
+ - reg
+ - display-height-chars
+ - display-width-chars
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ lcd2s: auxdisplay@28 {
+ compatible = "modtronix,lcd2s";
+ reg = <0x28>;
+ display-height-chars = <4>;
+ display-width-chars = <20>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/board/fsl,bcsr.yaml b/Documentation/devicetree/bindings/board/fsl,bcsr.yaml
new file mode 100644
index 000000000000..df3dd8399671
--- /dev/null
+++ b/Documentation/devicetree/bindings/board/fsl,bcsr.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/board/fsl,bcsr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Board Control and Status
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,mpc8360mds-bcsr
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ board@f8000000 {
+ compatible = "fsl,mpc8360mds-bcsr";
+ reg = <0xf8000000 0x8000>;
+ };
+
diff --git a/Documentation/devicetree/bindings/board/fsl,fpga-qixis-i2c.yaml b/Documentation/devicetree/bindings/board/fsl,fpga-qixis-i2c.yaml
new file mode 100644
index 000000000000..28b37772fb65
--- /dev/null
+++ b/Documentation/devicetree/bindings/board/fsl,fpga-qixis-i2c.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/board/fsl,fpga-qixis-i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale on-board FPGA connected on I2C bus
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - fsl,bsc9132qds-fpga
+ - const: fsl,fpga-qixis-i2c
+ - items:
+ - enum:
+ - fsl,ls1028aqds-fpga
+ - fsl,lx2160aqds-fpga
+ - const: fsl,fpga-qixis-i2c
+ - const: simple-mfd
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ mux-controller:
+ $ref: /schemas/mux/reg-mux.yaml
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ board-control@66 {
+ compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
+ reg = <0x66>;
+ };
+ };
+
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ board-control@66 {
+ compatible = "fsl,ls1028aqds-fpga", "fsl,fpga-qixis-i2c",
+ "simple-mfd";
+ reg = <0x66>;
+
+ mux-controller {
+ compatible = "reg-mux";
+ #mux-control-cells = <1>;
+ mux-reg-masks = <0x54 0xf0>; /* 0: reg 0x54, bits 7:4 */
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/board/fsl,fpga-qixis.yaml b/Documentation/devicetree/bindings/board/fsl,fpga-qixis.yaml
new file mode 100644
index 000000000000..5a3cd431ef6e
--- /dev/null
+++ b/Documentation/devicetree/bindings/board/fsl,fpga-qixis.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/board/fsl,fpga-qixis.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale on-board FPGA/CPLD
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: fsl,p1022ds-fpga
+ - const: fsl,fpga-ngpixis
+ - items:
+ - enum:
+ - fsl,ls1088aqds-fpga
+ - fsl,ls1088ardb-fpga
+ - fsl,ls2080aqds-fpga
+ - fsl,ls2080ardb-fpga
+ - const: fsl,fpga-qixis
+ - items:
+ - enum:
+ - fsl,ls1043aqds-fpga
+ - fsl,ls1043ardb-fpga
+ - fsl,ls1046aqds-fpga
+ - fsl,ls1046ardb-fpga
+ - fsl,ls208xaqds-fpga
+ - const: fsl,fpga-qixis
+ - const: simple-mfd
+ - enum:
+ - fsl,ls1043ardb-cpld
+ - fsl,ls1046ardb-cpld
+ - fsl,t1040rdb-cpld
+ - fsl,t1042rdb-cpld
+ - fsl,t1042rdb_pi-cpld
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges:
+ maxItems: 1
+
+patternProperties:
+ '^mdio-mux@[a-f0-9,]+$':
+ $ref: /schemas/net/mdio-mux-mmioreg.yaml
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ board-control@3 {
+ compatible = "fsl,p1022ds-fpga", "fsl,fpga-ngpixis";
+ reg = <3 0x30>;
+ interrupt-parent = <&mpic>;
+ interrupts = <8 IRQ_TYPE_LEVEL_LOW 0 0>;
+ };
+
+ - |
+ board-control@3 {
+ compatible = "fsl,ls2080ardb-fpga", "fsl,fpga-qixis";
+ reg = <0x3 0x10000>;
+ };
+
diff --git a/Documentation/devicetree/bindings/board/fsl-board.txt b/Documentation/devicetree/bindings/board/fsl-board.txt
deleted file mode 100644
index eb52f6b35159..000000000000
--- a/Documentation/devicetree/bindings/board/fsl-board.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-Freescale Reference Board Bindings
-
-This document describes device tree bindings for various devices that
-exist on some Freescale reference boards.
-
-* Board Control and Status (BCSR)
-
-Required properties:
-
- - compatible : Should be "fsl,<board>-bcsr"
- - reg : Offset and length of the register set for the device
-
-Example:
-
- bcsr@f8000000 {
- compatible = "fsl,mpc8360mds-bcsr";
- reg = <f8000000 8000>;
- };
-
-* Freescale on-board FPGA
-
-This is the memory-mapped registers for on board FPGA.
-
-Required properties:
-- compatible: should be a board-specific string followed by a string
- indicating the type of FPGA. Example:
- "fsl,<board>-fpga", "fsl,fpga-pixis", or
- "fsl,<board>-fpga", "fsl,fpga-qixis"
-- reg: should contain the address and the length of the FPGA register set.
-
-Optional properties:
-- interrupts: should specify event (wakeup) IRQ.
-
-Example (P1022DS):
-
- board-control@3,0 {
- compatible = "fsl,p1022ds-fpga", "fsl,fpga-ngpixis";
- reg = <3 0 0x30>;
- interrupt-parent = <&mpic>;
- interrupts = <8 8 0 0>;
- };
-
-Example (LS2080A-RDB):
-
- cpld@3,0 {
- compatible = "fsl,ls2080ardb-fpga", "fsl,fpga-qixis";
- reg = <0x3 0 0x10000>;
- };
-
-* Freescale BCSR GPIO banks
-
-Some BCSR registers act as simple GPIO controllers, each such
-register can be represented by the gpio-controller node.
-
-Required properities:
-- compatible : Should be "fsl,<board>-bcsr-gpio".
-- reg : Should contain the address and the length of the GPIO bank
- register.
-- #gpio-cells : Should be two. The first cell is the pin number and the
- second cell is used to specify optional parameters (currently unused).
-- gpio-controller : Marks the port as GPIO controller.
-
-Example:
-
- bcsr@1,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8360mds-bcsr";
- reg = <1 0 0x8000>;
- ranges = <0 1 0 0x8000>;
-
- bcsr13: gpio-controller@d {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8360mds-bcsr-gpio";
- reg = <0xd 1>;
- gpio-controller;
- };
- };
-
-* Freescale on-board FPGA connected on I2C bus
-
-Some Freescale boards like BSC9132QDS have on board FPGA connected on
-the i2c bus.
-
-Required properties:
-- compatible: Should be a board-specific string followed by a string
- indicating the type of FPGA. Example:
- "fsl,<board>-fpga", "fsl,fpga-qixis-i2c"
-- reg: Should contain the address of the FPGA
-
-Example:
- fpga: fpga@66 {
- compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
- reg = <0x66>;
- };
-
-* Freescale on-board CPLD
-
-Some Freescale boards like T1040RDB have an on board CPLD connected.
-
-Required properties:
-- compatible: Should be a board-specific string like "fsl,<board>-cpld"
- Example:
- "fsl,t1040rdb-cpld", "fsl,t1042rdb-cpld", "fsl,t1042rdb_pi-cpld"
-- reg: should describe CPLD registers
-
-Example:
- cpld@3,0 {
- compatible = "fsl,t1040rdb-cpld";
- reg = <3 0 0x300>;
- };
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
new file mode 100644
index 000000000000..232252e8825e
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/allwinner,sun50i-a64-de2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A64 Display Engine Bus
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ $nodename:
+ pattern: "^bus(@[0-9a-f]+)?$"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun50i-a64-de2
+ - items:
+ - const: allwinner,sun50i-h6-de3
+ - const: allwinner,sun50i-a64-de2
+
+ reg:
+ maxItems: 1
+
+ allwinner,sram:
+ description:
+ The SRAM that needs to be claimed to access the display engine
+ bus.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to SRAM
+ - description: register value for device
+
+ ranges: true
+
+patternProperties:
+ # All other properties should be child nodes with unit-address and 'reg'
+ "@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+ properties:
+ reg:
+ maxItems: 1
+
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+ - allwinner,sram
+
+additionalProperties: false
+
+examples:
+ - |
+ bus@1000000 {
+ compatible = "allwinner,sun50i-a64-de2";
+ reg = <0x1000000 0x400000>;
+ allwinner,sram = <&de2_sram 1>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x1000000 0x400000>;
+
+ display_clocks: clock@0 {
+ compatible = "allwinner,sun50i-a64-de2-clk";
+ reg = <0x0 0x100000>;
+ clocks = <&ccu 52>, <&ccu 99>;
+ clock-names = "bus", "mod";
+ resets = <&ccu 30>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
new file mode 100644
index 000000000000..24c939f59091
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/allwinner,sun8i-a23-rsb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A23 RSB
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun8i-a23-rsb
+ - items:
+ - enum:
+ - allwinner,sun8i-a83t-rsb
+ - allwinner,sun50i-h616-rsb
+ - const: allwinner,sun8i-a23-rsb
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ clock-frequency:
+ minimum: 1
+ maximum: 20000000
+
+patternProperties:
+ "^.*@[0-9a-fA-F]+$":
+ type: object
+ additionalProperties: true
+ properties:
+ reg:
+ maxItems: 1
+
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - resets
+
+examples:
+ - |
+ rsb@1f03400 {
+ compatible = "allwinner,sun8i-a23-rsb";
+ reg = <0x01f03400 0x400>;
+ interrupts = <0 39 4>;
+ clocks = <&apb0_gates 3>;
+ clock-frequency = <3000000>;
+ resets = <&apb0_rst 3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@3e3 {
+ reg = <0x3e3>;
+
+ /* ... */
+ };
+ };
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/bus/arm,integrator-ap-lm.yaml b/Documentation/devicetree/bindings/bus/arm,integrator-ap-lm.yaml
new file mode 100644
index 000000000000..47227427c1c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/arm,integrator-ap-lm.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/arm,integrator-ap-lm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Integrator/AP Logic Module extension bus
+
+maintainers:
+ - Linus Walleij <linusw@kernel.org>
+
+description: The Integrator/AP is a prototyping platform and as such has a
+ site for stacking up to four logic modules (LM) designed specifically for
+ use with this platform. A special system controller register can be read to
+ determine if a logic module is connected at index 0, 1, 2 or 3. The logic
+ module connector is described in this binding. The logic modules per se
+ then have their own specific per-module bindings and they will be described
+ as subnodes under this logic module extension bus.
+
+properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ compatible:
+ items:
+ - const: arm,integrator-ap-lm
+
+ ranges: true
+ dma-ranges: true
+
+patternProperties:
+ "^bus(@[0-9a-f]*)?$":
+ description: Nodes on the Logic Module bus represent logic modules
+ and are named with bus. The first module is at 0xc0000000, the second
+ at 0xd0000000 and so on until the top of the memory of the system at
+ 0xffffffff. All information about the memory used by the module is
+ in ranges and dma-ranges.
+ type: object
+
+ required:
+ - compatible
+
+required:
+ - compatible
+
+examples:
+ - |
+ bus@c0000000 {
+ compatible = "arm,integrator-ap-lm";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0xc0000000 0xc0000000 0x40000000>;
+ dma-ranges;
+
+ bus@c0000000 {
+ compatible = "simple-bus";
+ ranges = <0x00000000 0xc0000000 0x10000000>;
+ /* The Logic Modules sees the Core Module 0 RAM @80000000 */
+ dma-ranges = <0x00000000 0x80000000 0x10000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ serial@100000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x00100000 0x1000>;
+ interrupts-extended = <&impd1_vic 1>;
+ };
+
+ impd1_vic: interrupt-controller@3000000 {
+ compatible = "arm,pl192-vic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x03000000 0x1000>;
+ valid-mask = <0x00000bff>;
+ interrupts-extended = <&pic 9>;
+ };
+ };
+ };
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml b/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml
new file mode 100644
index 000000000000..2894256c976d
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/aspeed,ast2600-ahbc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED Advanced High-Performance Bus Controller (AHBC)
+
+maintainers:
+ - Neal Liu <neal_liu@aspeedtech.com>
+ - Chia-Wei Wang <chiawei_wang@aspeedtech.com>
+
+description: |
+ Advanced High-performance Bus Controller (AHBC) supports plenty of mechanisms
+ including a priority arbiter, an address decoder and a data multiplexer
+ to control the overall operations of Advanced High-performance Bus (AHB).
+
+properties:
+ compatible:
+ enum:
+ - aspeed,ast2600-ahbc
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ ahbc@1e600000 {
+ compatible = "aspeed,ast2600-ahbc";
+ reg = <0x1e600000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml
new file mode 100644
index 000000000000..37ba3337f944
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/baikal,bt1-apb.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/baikal,bt1-apb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 APB-bus
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ Baikal-T1 CPU or DMAC MMIO requests are handled by the AMBA 3 AXI Interconnect
+ which routes them to the AXI-APB bridge. This interface is a single master
+ multiple slaves bus in turn serializing IO accesses and routing them to the
+ addressed APB slave devices. In case of any APB protocol collisions, slave
+ device not responding on timeout an IRQ is raised with an erroneous address
+ reported to the APB terminator (APB Errors Handler Block).
+
+allOf:
+ - $ref: /schemas/simple-bus.yaml#
+
+properties:
+ compatible:
+ contains:
+ const: baikal,bt1-apb
+
+ reg:
+ items:
+ - description: APB EHB MMIO registers
+ - description: APB MMIO region with no any device mapped
+
+ reg-names:
+ items:
+ - const: ehb
+ - const: nodev
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: APB reference clock
+
+ clock-names:
+ items:
+ - const: pclk
+
+ resets:
+ items:
+ - description: APB domain reset line
+
+ reset-names:
+ items:
+ - const: prst
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - clocks
+ - clock-names
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+
+ bus@1f059000 {
+ compatible = "baikal,bt1-apb", "simple-bus";
+ reg = <0x1f059000 0x1000>,
+ <0x1d000000 0x2040000>;
+ reg-names = "ehb", "nodev";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges;
+
+ interrupts = <GIC_SHARED 16 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&ccu_sys 1>;
+ clock-names = "pclk";
+
+ resets = <&ccu_sys 1>;
+ reset-names = "prst";
+ };
+...
diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml
new file mode 100644
index 000000000000..4ac78b44e45e
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/baikal,bt1-axi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 AXI-bus
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ AXI3-bus is the main communication bus of Baikal-T1 SoC connecting all
+ high-speed peripheral IP-cores with RAM controller and with MIPS P5600
+ cores. Traffic arbitration is done by means of DW AXI Interconnect (so
+ called AXI Main Interconnect) routing IO requests from one block to
+ another: from CPU to SoC peripherals and between some SoC peripherals
+ (mostly between peripheral devices and RAM, but also between DMA and
+ some peripherals). In case of any protocol error, device not responding
+ an IRQ is raised and a faulty situation is reported to the AXI EHB
+ (Errors Handler Block) embedded on top of the DW AXI Interconnect and
+ accessible by means of the Baikal-T1 System Controller.
+
+allOf:
+ - $ref: /schemas/simple-bus.yaml#
+
+properties:
+ compatible:
+ contains:
+ const: baikal,bt1-axi
+
+ reg:
+ minItems: 1
+ items:
+ - description: Synopsys DesignWare AXI Interconnect QoS registers
+ - description: AXI EHB MMIO system controller registers
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: qos
+ - const: ehb
+
+ '#interconnect-cells':
+ const: 1
+
+ syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: Phandle to the Baikal-T1 System Controller DT node
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Main Interconnect uplink reference clock
+
+ clock-names:
+ items:
+ - const: aclk
+
+ resets:
+ items:
+ - description: Main Interconnect reset line
+
+ reset-names:
+ items:
+ - const: arst
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - syscon
+ - interrupts
+ - clocks
+ - clock-names
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+
+ bus@1f05a000 {
+ compatible = "baikal,bt1-axi", "simple-bus";
+ reg = <0x1f05a000 0x1000>,
+ <0x1f04d110 0x8>;
+ reg-names = "qos", "ehb";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interconnect-cells = <1>;
+
+ syscon = <&syscon>;
+
+ ranges;
+
+ interrupts = <GIC_SHARED 127 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&ccu_axi 0>;
+ clock-names = "aclk";
+
+ resets = <&ccu_axi 0>;
+ reset-names = "arst";
+ };
+...
diff --git a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
deleted file mode 100644
index 729def62f0c5..000000000000
--- a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Broadcom GISB bus Arbiter controller
-
-Required properties:
-
-- compatible:
- "brcm,bcm7278-gisb-arb" for V7 28nm chips
- "brcm,gisb-arb" or "brcm,bcm7445-gisb-arb" for other 28nm chips
- "brcm,bcm7435-gisb-arb" for newer 40nm chips
- "brcm,bcm7400-gisb-arb" for older 40nm chips and all 65nm chips
- "brcm,bcm7038-gisb-arb" for 130nm chips
-- reg: specifies the base physical address and size of the registers
-- interrupts: specifies the two interrupts (timeout and TEA) to be used from
- the parent interrupt controller
-
-Optional properties:
-
-- brcm,gisb-arb-master-mask: 32-bits wide bitmask used to specify which GISB
- masters are valid at the system level
-- brcm,gisb-arb-master-names: string list of the litteral name of the GISB
- masters. Should match the number of bits set in brcm,gisb-master-mask and
- the order in which they appear
-
-Example:
-
-gisb-arb@f0400000 {
- compatible = "brcm,gisb-arb";
- reg = <0xf0400000 0x800>;
- interrupts = <0>, <2>;
- interrupt-parent = <&sun_l2_intc>;
-
- brcm,gisb-arb-master-mask = <0x7>;
- brcm,gisb-arb-master-names = "bsp_0", "scpu_0", "cpu_0";
-};
diff --git a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.yaml b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.yaml
new file mode 100644
index 000000000000..9017c5a3f3d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/brcm,gisb-arb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom GISB bus Arbiter controller
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - brcm,bcm7445-gisb-arb # for other 28nm chips
+ - const: brcm,gisb-arb
+ - items:
+ - enum:
+ - brcm,bcm74165-gisb-arb # for V7 new style 16nm chips
+ - brcm,bcm7278-gisb-arb # for V7 28nm chips
+ - brcm,bcm7435-gisb-arb # for newer 40nm chips
+ - brcm,bcm7400-gisb-arb # for older 40nm chips and all 65nm chips
+ - brcm,bcm7038-gisb-arb # for 130nm chips
+ - brcm,gisb-arb # fallback compatible
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 2
+ items:
+ - description: timeout interrupt line
+ - description: target abort interrupt line
+ - description: breakpoint interrupt line
+
+ brcm,gisb-arb-master-mask:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: >
+ 32-bits wide bitmask used to specify which GISB masters are valid at the
+ system level
+
+ brcm,gisb-arb-master-names:
+ $ref: /schemas/types.yaml#/definitions/string-array
+ description: >
+ String list of the literal name of the GISB masters. Should match the
+ number of bits set in brcm,gisb-master-mask and the order in which they
+ appear from MSB to LSB.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ gisb-arb@f0400000 {
+ compatible = "brcm,gisb-arb";
+ reg = <0xf0400000 0x800>;
+ interrupts = <0>, <2>;
+ interrupt-parent = <&sun_l2_intc>;
+ brcm,gisb-arb-master-mask = <0x7>;
+ brcm,gisb-arb-master-names = "bsp_0", "scpu_0", "cpu_0";
+ };
diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8mp-aipstz.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8mp-aipstz.yaml
new file mode 100644
index 000000000000..993293ebc4d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/fsl,imx8mp-aipstz.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/fsl,imx8mp-aipstz.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Secure AHB to IP Slave bus (AIPSTZ) bridge
+
+description:
+ The secure AIPS bridge (AIPSTZ) acts as a bridge for AHB masters issuing
+ transactions to IP Slave peripherals. Additionally, this module offers access
+ control configurations meant to restrict which peripherals a master can
+ access.
+
+maintainers:
+ - Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8mp-aipstz
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ "#access-controller-cells":
+ const: 3
+ description:
+ First cell - consumer ID
+ Second cell - consumer type (master or peripheral)
+ Third cell - configuration value
+
+ ranges: true
+
+# borrowed from simple-bus.yaml, no additional requirements for children
+patternProperties:
+ "@(0|[1-9a-f][0-9a-f]*)$":
+ type: object
+ additionalProperties: true
+ properties:
+ reg:
+ items:
+ minItems: 2
+ maxItems: 4
+ minItems: 1
+ maxItems: 1024
+ ranges:
+ oneOf:
+ - items:
+ minItems: 3
+ maxItems: 7
+ minItems: 1
+ maxItems: 1024
+ - $ref: /schemas/types.yaml#/definitions/flag
+ anyOf:
+ - required:
+ - reg
+ - required:
+ - ranges
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - "#address-cells"
+ - "#size-cells"
+ - "#access-controller-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mp-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ bus@30df0000 {
+ compatible = "fsl,imx8mp-aipstz";
+ reg = <0x30df0000 0x10000>;
+ ranges = <0x30c00000 0x30c00000 0x400000>;
+ power-domains = <&pgc_audio>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #access-controller-cells = <3>;
+
+ dma-controller@30e00000 {
+ compatible = "fsl,imx8mp-sdma", "fsl,imx8mq-sdma";
+ reg = <0x30e00000 0x10000>;
+ #dma-cells = <3>;
+ clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SDMA3_ROOT>,
+ <&clk IMX8MP_CLK_AUDIO_ROOT>;
+ clock-names = "ipg", "ahb";
+ interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml
new file mode 100644
index 000000000000..4adbb7afa889
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml
@@ -0,0 +1,232 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Pixel Link Medium Speed Interconnect (MSI) Bus
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ i.MX8qxp pixel link MSI bus is used to control settings of PHYs, I/Os
+ sitting together with the PHYs. It is not the same as the MSI bus coming
+ from i.MX8 System Controller Unit (SCU) which is used to control power,
+ clock and reset through the i.MX8 Distributed Slave System Controller (DSC).
+
+ i.MX8qxp pixel link MSI bus is a simple memory-mapped bus. Two input clocks,
+ that is, MSI clock and AHB clock, need to be enabled so that peripherals
+ connected to the bus can be accessed. Also, the bus is part of a power
+ domain. The power domain needs to be enabled before the peripherals can
+ be accessed.
+
+ Peripherals in i.MX8qm/qxp imaging, LVDS, MIPI DSI and HDMI TX subsystems,
+ like I2C controller, PWM controller, MIPI DSI controller and Control and
+ Status Registers (CSR) module, are accessed through the bus.
+
+ The i.MX System Controller Firmware (SCFW) owns and uses the i.MX8qm/qxp
+ pixel link MSI bus controller and does not allow SCFW user to control it.
+ So, the controller's registers cannot be accessed by SCFW user. Hence,
+ the interrupts generated by the controller don't make any sense from SCFW
+ user's point of view.
+
+allOf:
+ - $ref: simple-pm-bus.yaml#
+
+# We need a select here so we don't match all nodes with 'simple-pm-bus'.
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qxp-display-pixel-link-msi-bus
+ - fsl,imx8qm-display-pixel-link-msi-bus
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - fsl,imx8qxp-display-pixel-link-msi-bus
+ - fsl,imx8qm-display-pixel-link-msi-bus
+ - const: simple-pm-bus
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: master gated clock from system
+ - description: AHB clock
+
+ clock-names:
+ items:
+ - const: msi
+ - const: ahb
+
+patternProperties:
+ "^.*@[0-9a-f]+$":
+ description: Devices attached to the bus
+ type: object
+
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+ bus@56200000 {
+ compatible = "fsl,imx8qxp-display-pixel-link-msi-bus", "simple-pm-bus";
+ reg = <0x56200000 0x20000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&dc0_irqsteer>;
+ interrupts = <320>;
+ ranges;
+ clocks = <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>,
+ <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>;
+ clock-names = "msi", "ahb";
+ power-domains = <&pd IMX_SC_R_DC_0>;
+
+ bus@56221000 {
+ compatible = "simple-pm-bus", "syscon";
+ reg = <0x56221000 0x1000>;
+ clocks = <&mipi_lvds_0_di_mipi_lvds_regs_lpcg IMX_LPCG_CLK_4>;
+ clock-names = "ipg";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ pxl2dpi {
+ compatible = "fsl,imx8qxp-pxl2dpi";
+ fsl,sc-resource = <IMX_SC_R_MIPI_0>;
+ power-domains = <&pd IMX_SC_R_MIPI_0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ mipi_lvds_0_pxl2dpi_dc0_pixel_link0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&dc0_pixel_link0_mipi_lvds_0_pxl2dpi>;
+ };
+
+ mipi_lvds_0_pxl2dpi_dc0_pixel_link1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&dc0_pixel_link1_mipi_lvds_0_pxl2dpi>;
+ };
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi>;
+ };
+
+ mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi>;
+ };
+ };
+ };
+ };
+
+ ldb {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx8qxp-ldb";
+ clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_MISC2>,
+ <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_BYPASS>;
+ clock-names = "pixel", "bypass";
+ power-domains = <&pd IMX_SC_R_LVDS_0>;
+
+ channel@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ phys = <&mipi_lvds_0_phy>;
+ phy-names = "lvds_phy";
+
+ port@0 {
+ reg = <0>;
+
+ mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi: endpoint {
+ remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ /* ... */
+ };
+ };
+
+ channel@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ phys = <&mipi_lvds_0_phy>;
+ phy-names = "lvds_phy";
+
+ port@0 {
+ reg = <0>;
+
+ mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi: endpoint {
+ remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ /* ... */
+ };
+ };
+ };
+ };
+
+ clock-controller@56223004 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x56223004 0x4>;
+ #clock-cells = <1>;
+ clocks = <&mipi_lvds_0_ipg_clk>;
+ clock-indices = <IMX_LPCG_CLK_4>;
+ clock-output-names = "mipi_lvds_0_di_mipi_lvds_regs_lpcg_ipg_clk";
+ power-domains = <&pd IMX_SC_R_MIPI_0>;
+ };
+
+ phy@56228300 {
+ compatible = "fsl,imx8qxp-mipi-dphy";
+ reg = <0x56228300 0x100>;
+ clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_PHY>;
+ clock-names = "phy_ref";
+ #phy-cells = <0>;
+ fsl,syscon = <&mipi_lvds_0_csr>;
+ power-domains = <&pd IMX_SC_R_MIPI_0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/fsl,spba-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,spba-bus.yaml
new file mode 100644
index 000000000000..d42dbb0bbc2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/fsl,spba-bus.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/fsl,spba-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Shared Peripherals Bus Interface
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+
+description: |
+ A simple bus enabling access to shared peripherals.
+
+ The "spba-bus" follows the "simple-bus" set of properties, as
+ specified in the Devicetree Specification. It is an extension of
+ "simple-bus" because the SDMA controller uses this compatible flag to
+ determine which peripherals are available to it and the range over which
+ the SDMA can access. There are no special clocks for the bus, because
+ the SDMA controller itself has its interrupt and clock assignments.
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: fsl,spba-bus
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^spba-bus(@[0-9a-f]+)?$"
+
+ compatible:
+ items:
+ - const: fsl,spba-bus
+ - const: simple-bus
+
+ '#address-cells':
+ enum: [ 1, 2 ]
+
+ '#size-cells':
+ enum: [ 1, 2 ]
+
+ reg:
+ maxItems: 1
+
+ ranges: true
+
+required:
+ - compatible
+ - '#address-cells'
+ - '#size-cells'
+ - reg
+ - ranges
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ spba-bus@30000000 {
+ compatible = "fsl,spba-bus", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x30000000 0x100000>;
+ ranges;
+ };
diff --git a/Documentation/devicetree/bindings/bus/imx-weim.txt b/Documentation/devicetree/bindings/bus/imx-weim.txt
deleted file mode 100644
index 683eaf3aed79..000000000000
--- a/Documentation/devicetree/bindings/bus/imx-weim.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-Device tree bindings for i.MX Wireless External Interface Module (WEIM)
-
-The term "wireless" does not imply that the WEIM is literally an interface
-without wires. It simply means that this module was originally designed for
-wireless and mobile applications that use low-power technology.
-
-The actual devices are instantiated from the child nodes of a WEIM node.
-
-Required properties:
-
- - compatible: Should contain one of the following:
- "fsl,imx1-weim"
- "fsl,imx27-weim"
- "fsl,imx51-weim"
- "fsl,imx50-weim"
- "fsl,imx6q-weim"
- - reg: A resource specifier for the register space
- (see the example below)
- - clocks: the clock, see the example below.
- - #address-cells: Must be set to 2 to allow memory address translation
- - #size-cells: Must be set to 1 to allow CS address passing
- - ranges: Must be set up to reflect the memory layout with four
- integer values for each chip-select line in use:
-
- <cs-number> 0 <physical address of mapping> <size>
-
-Optional properties:
-
- - fsl,weim-cs-gpr: For "fsl,imx50-weim" and "fsl,imx6q-weim" type of
- devices, it should be the phandle to the system General
- Purpose Register controller that contains WEIM CS GPR
- register, e.g. IOMUXC_GPR1 on i.MX6Q. IOMUXC_GPR1[11:0]
- should be set up as one of the following 4 possible
- values depending on the CS space configuration.
-
- IOMUXC_GPR1[11:0] CS0 CS1 CS2 CS3
- ---------------------------------------------
- 05 128M 0M 0M 0M
- 033 64M 64M 0M 0M
- 0113 64M 32M 32M 0M
- 01111 32M 32M 32M 32M
-
- In case that the property is absent, the reset value or
- what bootloader sets up in IOMUXC_GPR1[11:0] will be
- used.
-
-Timing property for child nodes. It is mandatory, not optional.
-
- - fsl,weim-cs-timing: The timing array, contains timing values for the
- child node. We can get the CS index from the child
- node's "reg" property. The number of registers depends
- on the selected chip.
- For i.MX1, i.MX21 ("fsl,imx1-weim") there are two
- registers: CSxU, CSxL.
- For i.MX25, i.MX27, i.MX31 and i.MX35 ("fsl,imx27-weim")
- there are three registers: CSCRxU, CSCRxL, CSCRxA.
- For i.MX50, i.MX53 ("fsl,imx50-weim"),
- i.MX51 ("fsl,imx51-weim") and i.MX6Q ("fsl,imx6q-weim")
- there are six registers: CSxGCR1, CSxGCR2, CSxRCR1,
- CSxRCR2, CSxWCR1, CSxWCR2.
-
-Example for an imx6q-sabreauto board, the NOR flash connected to the WEIM:
-
- weim: weim@21b8000 {
- compatible = "fsl,imx6q-weim";
- reg = <0x021b8000 0x4000>;
- clocks = <&clks 196>;
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <0 0 0x08000000 0x08000000>;
- fsl,weim-cs-gpr = <&gpr>;
-
- nor@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x02000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- bank-width = <2>;
- fsl,weim-cs-timing = <0x00620081 0x00000001 0x1c022000
- 0x0000c000 0x1404a38e 0x00000000>;
- };
- };
diff --git a/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
new file mode 100644
index 000000000000..0bea4f5287ce
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/microsoft,vmbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microsoft Hyper-V VMBus
+
+maintainers:
+ - Saurabh Sengar <ssengar@linux.microsoft.com>
+
+description:
+ VMBus is a software bus that implements the protocols for communication
+ between the root or host OS and guest OS'es (virtual machines).
+
+properties:
+ compatible:
+ const: microsoft,vmbus
+
+ ranges: true
+
+ '#address-cells':
+ const: 2
+
+ '#size-cells':
+ const: 1
+
+ dma-coherent: true
+
+ interrupts:
+ maxItems: 1
+ description: Interrupt is used to report a message from the host.
+
+required:
+ - compatible
+ - ranges
+ - interrupts
+ - '#address-cells'
+ - '#size-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ bus {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges;
+
+ vmbus@ff0000000 {
+ compatible = "microsoft,vmbus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0x0f 0xf0000000 0x0f 0xf0000000 0x10000000>;
+ dma-coherent;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_PPI 2 IRQ_TYPE_EDGE_RISING>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/moxtet.txt b/Documentation/devicetree/bindings/bus/moxtet.txt
new file mode 100644
index 000000000000..fb50fc865336
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/moxtet.txt
@@ -0,0 +1,46 @@
+Turris Mox module status and configuration bus (over SPI)
+
+Required properties:
+ - compatible : Should be "cznic,moxtet"
+ - #address-cells : Has to be 1
+ - #size-cells : Has to be 0
+ - spi-cpol : Required inverted clock polarity
+ - spi-cpha : Required shifted clock phase
+ - interrupts : Must contain reference to the shared interrupt line
+ - interrupt-controller : Required
+ - #interrupt-cells : Has to be 1
+
+For other required and optional properties of SPI slave nodes please refer to
+../spi/spi-bus.txt.
+
+Required properties of subnodes:
+ - reg : Should be position on the Moxtet bus (how many Moxtet
+ modules are between this module and CPU module, so
+ either 0 or a positive integer)
+
+The driver finds the devices connected to the bus by itself, but it may be
+needed to reference some of them from other parts of the device tree. In that
+case the devices can be defined as subnodes of the moxtet node.
+
+Example:
+
+ moxtet@1 {
+ compatible = "cznic,moxtet";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ spi-max-frequency = <10000000>;
+ spi-cpol;
+ spi-cpha;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&gpiosb>;
+ interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+
+ moxtet_sfp: gpio@0 {
+ compatible = "cznic,moxtet-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0>;
+ }
+ };
diff --git a/Documentation/devicetree/bindings/bus/mti,mips-cdmm.yaml b/Documentation/devicetree/bindings/bus/mti,mips-cdmm.yaml
new file mode 100644
index 000000000000..6a7b26b049f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/mti,mips-cdmm.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/mti,mips-cdmm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MIPS Common Device Memory Map
+
+description: |
+ Defines a location of the MIPS Common Device Memory Map registers.
+
+maintainers:
+ - James Hogan <jhogan@kernel.org>
+
+properties:
+ compatible:
+ const: mti,mips-cdmm
+
+ reg:
+ description: |
+ Base address and size of an unoccupied memory region, which will be
+ used to map the MIPS CDMM registers block.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ cdmm@1bde8000 {
+ compatible = "mti,mips-cdmm";
+ reg = <0x1bde8000 0x8000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.txt b/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.txt
deleted file mode 100644
index 3108d03802ee..000000000000
--- a/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-NVIDIA Tegra ACONNECT Bus
-
-The Tegra ACONNECT bus is an AXI switch which is used to connnect various
-components inside the Audio Processing Engine (APE). All CPU accesses to
-the APE subsystem go through the ACONNECT via an APB to AXI wrapper.
-
-Required properties:
-- compatible: Must be "nvidia,tegra210-aconnect".
-- clocks: Must contain the entries for the APE clock (TEGRA210_CLK_APE),
- and APE interface clock (TEGRA210_CLK_APB2APE).
-- clock-names: Must contain the names "ape" and "apb2ape" for the corresponding
- 'clocks' entries.
-- power-domains: Must contain a phandle that points to the audio powergate
- (namely 'aud') for Tegra210.
-- #address-cells: The number of cells used to represent physical base addresses
- in the aconnect address space. Should be 1.
-- #size-cells: The number of cells used to represent the size of an address
- range in the aconnect address space. Should be 1.
-- ranges: Mapping of the aconnect address space to the CPU address space.
-
-All devices accessed via the ACONNNECT are described by child-nodes.
-
-Example:
-
- aconnect@702c0000 {
- compatible = "nvidia,tegra210-aconnect";
- clocks = <&tegra_car TEGRA210_CLK_APE>,
- <&tegra_car TEGRA210_CLK_APB2APE>;
- clock-names = "ape", "apb2ape";
- power-domains = <&pd_audio>;
-
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x702c0000 0x0 0x702c0000 0x00040000>;
-
-
- child1 {
- ...
- };
-
- child2 {
- ...
- };
- };
diff --git a/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml b/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml
new file mode 100644
index 000000000000..81a65e9f93f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/nvidia,tegra210-aconnect.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra ACONNECT Bus
+
+description: |
+ The Tegra ACONNECT bus is an AXI switch which is used to connect various
+ components inside the Audio Processing Engine (APE). All CPU accesses to
+ the APE subsystem go through the ACONNECT via an APB to AXI wrapper. All
+ devices accessed via the ACONNECT are described by child-nodes.
+
+maintainers:
+ - Jon Hunter <jonathanh@nvidia.com>
+
+properties:
+ compatible:
+ oneOf:
+ - const: nvidia,tegra210-aconnect
+ - items:
+ - enum:
+ - nvidia,tegra264-aconnect
+ - nvidia,tegra234-aconnect
+ - nvidia,tegra186-aconnect
+ - nvidia,tegra194-aconnect
+ - const: nvidia,tegra210-aconnect
+
+ clocks:
+ items:
+ - description: Must contain the entry for APE clock
+ - description: Must contain the entry for APE interface clock
+
+ clock-names:
+ items:
+ - const: ape
+ - const: apb2ape
+
+ power-domains:
+ maxItems: 1
+
+ "#address-cells":
+ enum: [ 1, 2 ]
+
+ "#size-cells":
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+patternProperties:
+ "@[0-9a-f]+$":
+ type: object
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - power-domains
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include<dt-bindings/clock/tegra210-car.h>
+
+ aconnect@702c0000 {
+ compatible = "nvidia,tegra210-aconnect";
+ clocks = <&tegra_car TEGRA210_CLK_APE>,
+ <&tegra_car TEGRA210_CLK_APB2APE>;
+ clock-names = "ape", "apb2ape";
+ power-domains = <&pd_audio>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x702c0000 0x702c0000 0x00040000>;
+
+ // Child device nodes follow ...
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/bus/palmbus.yaml b/Documentation/devicetree/bindings/bus/palmbus.yaml
new file mode 100644
index 000000000000..c36c1e92a573
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/palmbus.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/palmbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ralink PalmBus
+
+maintainers:
+ - Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+description: |
+ The ralink palmbus controller can be found in all ralink MIPS
+ SoCs. It provides an external bus for connecting multiple
+ external devices to the SoC.
+
+properties:
+ $nodename:
+ pattern: "^palmbus(@[0-9a-f]+)?$"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ compatible:
+ const: palmbus
+
+ reg:
+ maxItems: 1
+
+ ranges: true
+
+patternProperties:
+ # All other properties should be child nodes with unit-address and 'reg'
+ "@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+ properties:
+ reg:
+ maxItems: 1
+
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ palmbus@1e000000 {
+ compatible = "palmbus";
+ reg = <0x1e000000 0x100000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x1e000000 0x0fffff>;
+
+ gpio@600 {
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "mediatek,mt7621-gpio";
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 95>;
+ interrupt-controller;
+ reg = <0x600 0x100>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SHARED 12 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/bus/qcom,ebi2.txt b/Documentation/devicetree/bindings/bus/qcom,ebi2.txt
deleted file mode 100644
index 5a7d567f6833..000000000000
--- a/Documentation/devicetree/bindings/bus/qcom,ebi2.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-Qualcomm External Bus Interface 2 (EBI2)
-
-The EBI2 contains two peripheral blocks: XMEM and LCDC. The XMEM handles any
-external memory (such as NAND or other memory-mapped peripherals) whereas
-LCDC handles LCD displays.
-
-As it says it connects devices to an external bus interface, meaning address
-lines (up to 9 address lines so can only address 1KiB external memory space),
-data lines (16 bits), OE (output enable), ADV (address valid, used on some
-NOR flash memories), WE (write enable). This on top of 6 different chip selects
-(CS0 thru CS5) so that in theory 6 different devices can be connected.
-
-Apparently this bus is clocked at 64MHz. It has dedicated pins on the package
-and the bus can only come out on these pins, however if some of the pins are
-unused they can be left unconnected or remuxed to be used as GPIO or in some
-cases other orthogonal functions as well.
-
-Also CS1 and CS2 has -A and -B signals. Why they have that is unclear to me.
-
-The chip selects have the following memory range assignments. This region of
-memory is referred to as "Chip Peripheral SS FPB0" and is 168MB big.
-
-Chip Select Physical address base
-CS0 GPIO134 0x1a800000-0x1b000000 (8MB)
-CS1 GPIO39 (A) / GPIO123 (B) 0x1b000000-0x1b800000 (8MB)
-CS2 GPIO40 (A) / GPIO124 (B) 0x1b800000-0x1c000000 (8MB)
-CS3 GPIO133 0x1d000000-0x25000000 (128 MB)
-CS4 GPIO132 0x1c800000-0x1d000000 (8MB)
-CS5 GPIO131 0x1c000000-0x1c800000 (8MB)
-
-The APQ8060 Qualcomm Application Processor User Guide, 80-N7150-14 Rev. A,
-August 6, 2012 contains some incomplete documentation of the EBI2.
-
-FIXME: the manual mentions "write precharge cycles" and "precharge cycles".
-We have not been able to figure out which bit fields these correspond to
-in the hardware, or what valid values exist. The current hypothesis is that
-this is something just used on the FAST chip selects and that the SLOW
-chip selects are understood fully. There is also a "byte device enable"
-flag somewhere for 8bit memories.
-
-FIXME: The chipselects have SLOW and FAST configuration registers. It's a bit
-unclear what this means, if they are mutually exclusive or can be used
-together, or if some chip selects are hardwired to be FAST and others are SLOW
-by design.
-
-The XMEM registers are totally undocumented but could be partially decoded
-because the Cypress AN49576 Antioch Westbridge apparently has suspiciously
-similar register layout, see: http://www.cypress.com/file/105771/download
-
-Required properties:
-- compatible: should be one of:
- "qcom,msm8660-ebi2"
- "qcom,apq8060-ebi2"
-- #address-cells: should be <2>: the first cell is the chipselect,
- the second cell is the offset inside the memory range
-- #size-cells: should be <1>
-- ranges: should be set to:
- ranges = <0 0x0 0x1a800000 0x00800000>,
- <1 0x0 0x1b000000 0x00800000>,
- <2 0x0 0x1b800000 0x00800000>,
- <3 0x0 0x1d000000 0x08000000>,
- <4 0x0 0x1c800000 0x00800000>,
- <5 0x0 0x1c000000 0x00800000>;
-- reg: two ranges of registers: EBI2 config and XMEM config areas
-- reg-names: should be "ebi2", "xmem"
-- clocks: two clocks, EBI_2X and EBI
-- clock-names: should be "ebi2x", "ebi2"
-
-Optional subnodes:
-- Nodes inside the EBI2 will be considered device nodes.
-
-The following optional properties are properties that can be tagged onto
-any device subnode. We are assuming that there can be only ONE device per
-chipselect subnode, else the properties will become ambigous.
-
-Optional properties arrays for SLOW chip selects:
-- qcom,xmem-recovery-cycles: recovery cycles is the time the memory continues to
- drive the data bus after OE is de-asserted, in order to avoid contention on
- the data bus. They are inserted when reading one CS and switching to another
- CS or read followed by write on the same CS. Valid values 0 thru 15. Minimum
- value is actually 1, so a value of 0 will still yield 1 recovery cycle.
-- qcom,xmem-write-hold-cycles: write hold cycles, these are extra cycles
- inserted after every write minimum 1. The data out is driven from the time
- WE is asserted until CS is asserted. With a hold of 1 (value = 0), the CS
- stays active for 1 extra cycle etc. Valid values 0 thru 15.
-- qcom,xmem-write-delta-cycles: initial latency for write cycles inserted for
- the first write to a page or burst memory. Valid values 0 thru 255.
-- qcom,xmem-read-delta-cycles: initial latency for read cycles inserted for the
- first read to a page or burst memory. Valid values 0 thru 255.
-- qcom,xmem-write-wait-cycles: number of wait cycles for every write access, 0=1
- cycle. Valid values 0 thru 15.
-- qcom,xmem-read-wait-cycles: number of wait cycles for every read access, 0=1
- cycle. Valid values 0 thru 15.
-
-Optional properties arrays for FAST chip selects:
-- qcom,xmem-address-hold-enable: this is a boolean property stating that we
- shall hold the address for an extra cycle to meet hold time requirements
- with ADV assertion.
-- qcom,xmem-adv-to-oe-recovery-cycles: the number of cycles elapsed before an OE
- assertion, with respect to the cycle where ADV (address valid) is asserted.
- 2 means 2 cycles between ADV and OE. Valid values 0, 1, 2 or 3.
-- qcom,xmem-read-hold-cycles: the length in cycles of the first segment of a
- read transfer. For a single read transfer this will be the time from CS
- assertion to OE assertion. Valid values 0 thru 15.
-
-
-Example:
-
-ebi2@1a100000 {
- compatible = "qcom,apq8060-ebi2";
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <0 0x0 0x1a800000 0x00800000>,
- <1 0x0 0x1b000000 0x00800000>,
- <2 0x0 0x1b800000 0x00800000>,
- <3 0x0 0x1d000000 0x08000000>,
- <4 0x0 0x1c800000 0x00800000>,
- <5 0x0 0x1c000000 0x00800000>;
- reg = <0x1a100000 0x1000>, <0x1a110000 0x1000>;
- reg-names = "ebi2", "xmem";
- clocks = <&gcc EBI2_2X_CLK>, <&gcc EBI2_CLK>;
- clock-names = "ebi2x", "ebi2";
- /* Make sure to set up the pin control for the EBI2 */
- pinctrl-names = "default";
- pinctrl-0 = <&foo_ebi2_pins>;
-
- foo-ebi2@2,0 {
- compatible = "foo";
- reg = <2 0x0 0x100>;
- (...)
- qcom,xmem-recovery-cycles = <0>;
- qcom,xmem-write-hold-cycles = <3>;
- qcom,xmem-write-delta-cycles = <31>;
- qcom,xmem-read-delta-cycles = <28>;
- qcom,xmem-write-wait-cycles = <9>;
- qcom,xmem-read-wait-cycles = <9>;
- };
-};
diff --git a/Documentation/devicetree/bindings/bus/qcom,ssbi.yaml b/Documentation/devicetree/bindings/bus/qcom,ssbi.yaml
new file mode 100644
index 000000000000..693cfa9696b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/qcom,ssbi.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/qcom,ssbi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Single-wire Serial Bus Interface (SSBI)
+
+description:
+ Some Qualcomm MSM devices contain a point-to-point serial bus used to
+ communicate with a limited range of devices (mostly power management
+ chips).
+
+maintainers:
+ - Andy Gross <agross@kernel.org>
+ - Bjorn Andersson <andersson@kernel.org>
+
+properties:
+ compatible:
+ const: qcom,ssbi
+
+ reg:
+ maxItems: 1
+
+ qcom,controller-type:
+ description:
+ Indicates the SSBI bus variant the controller should use to talk
+ with the slave device. The type chosen is determined by the attached
+ slave.
+ enum:
+ - ssbi
+ - ssbi2
+ - pmic-arbiter
+
+ pmic:
+ $ref: /schemas/mfd/qcom-pm8xxx.yaml#
+
+required:
+ - compatible
+ - reg
+ - qcom,controller-type
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ ssbi@c00000 {
+ compatible = "qcom,ssbi";
+ reg = <0x00c00000 0x1000>;
+ qcom,controller-type = "pmic-arbiter";
+
+ pmic {
+ compatible = "qcom,pm8821";
+ interrupt-parent = <&msmgpio>;
+ interrupts = <76 IRQ_TYPE_LEVEL_LOW>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/bus/qcom,ssc-block-bus.yaml b/Documentation/devicetree/bindings/bus/qcom,ssc-block-bus.yaml
new file mode 100644
index 000000000000..8e9e6ff35d7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/qcom,ssc-block-bus.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/qcom,ssc-block-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: The AHB Bus Providing a Global View of the SSC Block on (some) qcom SoCs
+
+maintainers:
+ - Michael Srba <Michael.Srba@seznam.cz>
+
+description: |
+ This binding describes the dependencies (clocks, resets, power domains) which
+ need to be turned on in a sequence before communication over the AHB bus
+ becomes possible.
+
+ Additionally, the reg property is used to pass to the driver the location of
+ two sadly undocumented registers which need to be poked as part of the sequence.
+
+ The SSC (Snapdragon Sensor Core) block contains a gpio controller, i2c/spi/uart
+ controllers, a hexagon core, and a clock controller which provides clocks for
+ the above.
+
+properties:
+ compatible:
+ items:
+ - const: qcom,msm8998-ssc-block-bus
+ - const: qcom,ssc-block-bus
+
+ reg:
+ items:
+ - description: SSCAON_CONFIG0 registers
+ - description: SSCAON_CONFIG1 registers
+
+ reg-names:
+ items:
+ - const: mpm_sscaon_config0
+ - const: mpm_sscaon_config1
+
+ '#address-cells':
+ enum: [ 1, 2 ]
+
+ '#size-cells':
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ clocks:
+ maxItems: 6
+
+ clock-names:
+ items:
+ - const: xo
+ - const: aggre2
+ - const: gcc_im_sleep
+ - const: aggre2_north
+ - const: ssc_xo
+ - const: ssc_ahbs
+
+ power-domains:
+ items:
+ - description: CX power domain
+ - description: MX power domain
+
+ power-domain-names:
+ items:
+ - const: ssc_cx
+ - const: ssc_mx
+
+ resets:
+ items:
+ - description: Main reset
+ - description:
+ SSC Branch Control Register reset (associated with the ssc_xo and
+ ssc_ahbs clocks)
+
+ reset-names:
+ items:
+ - const: ssc_reset
+ - const: ssc_bcr
+
+ qcom,halt-regs:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description: describes how to locate the ssc AXI halt register
+ items:
+ - items:
+ - description: Phandle reference to a syscon representing TCSR
+ - description: offset for the ssc AXI halt register
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - '#address-cells'
+ - '#size-cells'
+ - ranges
+ - clocks
+ - clock-names
+ - power-domains
+ - power-domain-names
+ - resets
+ - reset-names
+ - qcom,halt-regs
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-msm8998.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ // devices under this node are physically located in the SSC block, connected to an ssc-internal bus;
+ ssc_ahb_slave: bus@10ac008 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ compatible = "qcom,msm8998-ssc-block-bus", "qcom,ssc-block-bus";
+ reg = <0x10ac008 0x4>, <0x10ac010 0x4>;
+ reg-names = "mpm_sscaon_config0", "mpm_sscaon_config1";
+
+ clocks = <&xo>,
+ <&rpmcc RPM_SMD_AGGR2_NOC_CLK>,
+ <&gcc GCC_IM_SLEEP>,
+ <&gcc AGGRE2_SNOC_NORTH_AXI>,
+ <&gcc SSC_XO>,
+ <&gcc SSC_CNOC_AHBS_CLK>;
+ clock-names = "xo", "aggre2", "gcc_im_sleep", "aggre2_north", "ssc_xo", "ssc_ahbs";
+
+ resets = <&gcc GCC_SSC_RESET>, <&gcc GCC_SSC_BCR>;
+ reset-names = "ssc_reset", "ssc_bcr";
+
+ power-domains = <&rpmpd MSM8998_SSCCX>, <&rpmpd MSM8998_SSCMX>;
+ power-domain-names = "ssc_cx", "ssc_mx";
+
+ qcom,halt-regs = <&tcsr_mutex_regs 0x26000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/renesas,bsc.txt b/Documentation/devicetree/bindings/bus/renesas,bsc.txt
deleted file mode 100644
index 90e947269437..000000000000
--- a/Documentation/devicetree/bindings/bus/renesas,bsc.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-Renesas Bus State Controller (BSC)
-==================================
-
-The Renesas Bus State Controller (BSC, sometimes called "LBSC within Bus
-Bridge", or "External Bus Interface") can be found in several Renesas ARM SoCs.
-It provides an external bus for connecting multiple external devices to the
-SoC, driving several chip select lines, for e.g. NOR FLASH, Ethernet and USB.
-
-While the BSC is a fairly simple memory-mapped bus, it may be part of a PM
-domain, and may have a gateable functional clock.
-Before a device connected to the BSC can be accessed, the PM domain
-containing the BSC must be powered on, and the functional clock
-driving the BSC must be enabled.
-
-The bindings for the BSC extend the bindings for "simple-pm-bus".
-
-
-Required properties
- - compatible: Must contain an SoC-specific value, and "renesas,bsc" and
- "simple-pm-bus" as fallbacks.
- SoC-specific values can be:
- "renesas,bsc-r8a73a4" for R-Mobile APE6 (r8a73a4)
- "renesas,bsc-sh73a0" for SH-Mobile AG5 (sh73a0)
- - #address-cells, #size-cells, ranges: Must describe the mapping between
- parent address and child address spaces.
- - reg: Must contain the base address and length to access the bus controller.
-
-Optional properties:
- - interrupts: Must contain a reference to the BSC interrupt, if available.
- - clocks: Must contain a reference to the functional clock, if available.
- - power-domains: Must contain a reference to the PM domain, if available.
-
-
-Example:
-
- bsc: bus@fec10000 {
- compatible = "renesas,bsc-sh73a0", "renesas,bsc",
- "simple-pm-bus";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0 0x20000000>;
- reg = <0xfec10000 0x400>;
- interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&zb_clk>;
- power-domains = <&pd_a4s>;
- };
diff --git a/Documentation/devicetree/bindings/bus/renesas,bsc.yaml b/Documentation/devicetree/bindings/bus/renesas,bsc.yaml
new file mode 100644
index 000000000000..ff3c78317d28
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/renesas,bsc.yaml
@@ -0,0 +1,74 @@
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/renesas,bsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Bus State Controller (BSC)
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+ The Renesas Bus State Controller (BSC, sometimes called "LBSC within Bus
+ Bridge", or "External Bus Interface") can be found in several Renesas ARM
+ SoCs. It provides an external bus for connecting multiple external
+ devices to the SoC, driving several chip select lines, for e.g. NOR
+ FLASH, Ethernet and USB.
+
+ While the BSC is a fairly simple memory-mapped bus, it may be part of a
+ PM domain, and may have a gateable functional clock. Before a device
+ connected to the BSC can be accessed, the PM domain containing the BSC
+ must be powered on, and the functional clock driving the BSC must be
+ enabled.
+
+ The bindings for the BSC extend the bindings for "simple-pm-bus".
+
+allOf:
+ - $ref: simple-pm-bus.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,bsc-r8a73a4 # R-Mobile APE6 (r8a73a4)
+ - renesas,bsc-sh73a0 # SH-Mobile AG5 (sh73a0)
+ - const: renesas,bsc
+ - {} # simple-pm-bus, but not listed here to avoid false select
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+patternProperties:
+ # All other properties should be child nodes with unit-address and 'reg'
+ "@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+ properties:
+ reg:
+ maxItems: 1
+
+ required:
+ - reg
+
+required:
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ bsc: bus@fec10000 {
+ compatible = "renesas,bsc-sh73a0", "renesas,bsc", "simple-pm-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0 0x20000000>;
+ reg = <0xfec10000 0x400>;
+ interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&zb_clk>;
+ power-domains = <&pd_a4s>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/simple-pm-bus.txt b/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
deleted file mode 100644
index 6f15037131ed..000000000000
--- a/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Simple Power-Managed Bus
-========================
-
-A Simple Power-Managed Bus is a transparent bus that doesn't need a real
-driver, as it's typically initialized by the boot loader.
-
-However, its bus controller is part of a PM domain, or under the control of a
-functional clock. Hence, the bus controller's PM domain and/or clock must be
-enabled for child devices connected to the bus (either on-SoC or externally)
-to function.
-
-While "simple-pm-bus" follows the "simple-bus" set of properties, as specified
-in the Devicetree Specification, it is not an extension of "simple-bus".
-
-
-Required properties:
- - compatible: Must contain at least "simple-pm-bus".
- Must not contain "simple-bus".
- It's recommended to let this be preceded by one or more
- vendor-specific compatible values.
- - #address-cells, #size-cells, ranges: Must describe the mapping between
- parent address and child address spaces.
-
-Optional platform-specific properties for clock or PM domain control (at least
-one of them is required):
- - clocks: Must contain a reference to the functional clock(s),
- - power-domains: Must contain a reference to the PM domain.
-Please refer to the binding documentation for the clock and/or PM domain
-providers for more details.
-
-
-Example:
-
- bsc: bus@fec10000 {
- compatible = "renesas,bsc-sh73a0", "renesas,bsc",
- "simple-pm-bus";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0 0x20000000>;
- reg = <0xfec10000 0x400>;
- interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&zb_clk>;
- power-domains = <&pd_a4s>;
- };
diff --git a/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml b/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml
new file mode 100644
index 000000000000..182134d7a6a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/simple-pm-bus.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/simple-pm-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple Power-Managed Bus
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+ A Simple Power-Managed Bus is a transparent bus that doesn't need a real
+ driver, as it's typically initialized by the boot loader.
+
+ However, its bus controller is part of a PM domain, or under the control
+ of a functional clock. Hence, the bus controller's PM domain and/or
+ clock must be enabled for child devices connected to the bus (either
+ on-SoC or externally) to function.
+
+ While "simple-pm-bus" follows the "simple-bus" set of properties, as
+ specified in the Devicetree Specification, it is not an extension of
+ "simple-bus".
+
+properties:
+ $nodename:
+ pattern: "^bus(@[0-9a-f]+)?$"
+
+ compatible:
+ contains:
+ const: simple-pm-bus
+ description:
+ Shall contain "simple-pm-bus" in addition to a optional bus-specific
+ compatible strings defined in individual pm-bus bindings.
+
+ '#address-cells':
+ enum: [ 1, 2 ]
+
+ '#size-cells':
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ clocks: true
+ # Functional clocks
+ # Required if power-domains is absent, optional otherwise
+
+ power-domains:
+ # Required if clocks is absent, optional otherwise
+ minItems: 1
+
+required:
+ - compatible
+ - '#address-cells'
+ - '#size-cells'
+ - ranges
+
+anyOf:
+ - required:
+ - clocks
+ - required:
+ - power-domains
+
+additionalProperties: true
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-msm8996.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ bus {
+ power-domains = <&gcc AGGRE0_NOC_GDSC>;
+ compatible = "simple-pm-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ };
diff --git a/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml b/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml
new file mode 100644
index 000000000000..49df13fc2f89
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/socionext,uniphier-system-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UniPhier System Bus
+
+description: |
+ The UniPhier System Bus is an external bus that connects on-board devices to
+ the UniPhier SoC. It is a simple (semi-)parallel bus with address, data, and
+ some control signals. It supports up to 8 banks (chip selects).
+
+ Before any access to the bus, the bus controller must be configured; the bus
+ controller registers provide the control for the translation from the offset
+ within each bank to the CPU-viewed address. The needed setup includes the
+ base address, the size of each bank. Optionally, some timing parameters can
+ be optimized for faster bus access.
+
+maintainers:
+ - Masahiro Yamada <yamada.masahiro@socionext.com>
+
+properties:
+ compatible:
+ const: socionext,uniphier-system-bus
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ description: |
+ The first cell is the bank number (chip select).
+ The second cell is the address offset within the bank.
+ const: 2
+
+ "#size-cells":
+ const: 1
+
+ ranges:
+ description: |
+ Provide address translation from the System Bus to the parent bus.
+
+ Note:
+ The address region(s) that can be assigned for the System Bus is
+ implementation defined. Some SoCs can use 0x00000000-0x0fffffff and
+ 0x40000000-0x4fffffff, while other SoCs only 0x40000000-0x4fffffff.
+ There might be additional limitations depending on SoCs and the boot mode.
+ The address translation is arbitrary as long as the banks are assigned in
+ the supported address space with the required alignment and they do not
+ overlap one another.
+
+ For example, it is possible to map:
+ bank 0 to 0x42000000-0x43ffffff, bank 5 to 0x46000000-0x46ffffff
+ It is also possible to map:
+ bank 0 to 0x48000000-0x49ffffff, bank 5 to 0x44000000-0x44ffffff
+ There is no reason to stick to a particular translation mapping, but the
+ "ranges" property should provide a "reasonable" default that is known to
+ work. The software should initialize the bus controller according to it.
+
+patternProperties:
+ "^.*@[1-5],[1-9a-f][0-9a-f]+$":
+ description: Devices attached to chip selects
+ type: object
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ // In this example,
+ // - the Ethernet device is connected at the offset 0x01f00000 of CS1 and
+ // mapped to 0x43f00000 of the parent bus.
+ // - the UART device is connected at the offset 0x00200000 of CS5 and
+ // mapped to 0x46200000 of the parent bus.
+
+ system-bus@58c00000 {
+ compatible = "socionext,uniphier-system-bus";
+ reg = <0x58c00000 0x400>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <1 0x00000000 0x42000000 0x02000000>,
+ <5 0x00000000 0x46000000 0x01000000>;
+
+ ethernet@1,1f00000 {
+ compatible = "smsc,lan9115";
+ reg = <1 0x01f00000 0x1000>;
+ interrupts = <0 48 4>;
+ phy-mode = "mii";
+ };
+
+ serial@5,200000 {
+ compatible = "ns16550a";
+ reg = <5 0x00200000 0x20>;
+ interrupts = <0 49 4>;
+ clock-frequency = <12288000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/st,stm32-etzpc.yaml b/Documentation/devicetree/bindings/bus/st,stm32-etzpc.yaml
new file mode 100644
index 000000000000..d12b62a3a5a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/st,stm32-etzpc.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/st,stm32-etzpc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32 Extended TrustZone protection controller
+
+description: |
+ The ETZPC configures TrustZone security in a SoC having bus masters and
+ devices with programmable-security attributes (securable resources).
+
+maintainers:
+ - Gatien Chevallier <gatien.chevallier@foss.st.com>
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: st,stm32-etzpc
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - const: st,stm32-etzpc
+ - const: simple-bus
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+ "#access-controller-cells":
+ const: 1
+ description:
+ Contains the firewall ID associated to the peripheral.
+
+patternProperties:
+ "^.*@[0-9a-f]+$":
+ description: Peripherals
+ type: object
+
+ additionalProperties: true
+
+ required:
+ - access-controllers
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - "#access-controller-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ // In this example, the usart2 device refers to rifsc as its access
+ // controller.
+ // Access rights are verified before creating devices.
+
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/stm32mp13-clks.h>
+ #include <dt-bindings/reset/stm32mp13-resets.h>
+
+ etzpc: bus@5c007000 {
+ compatible = "st,stm32-etzpc", "simple-bus";
+ reg = <0x5c007000 0x400>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #access-controller-cells = <1>;
+ ranges;
+
+ usart2: serial@4c001000 {
+ compatible = "st,stm32h7-uart";
+ reg = <0x4c001000 0x400>;
+ interrupts-extended = <&exti 27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&rcc USART2_K>;
+ resets = <&rcc USART2_R>;
+ wakeup-source;
+ dmas = <&dmamux1 43 0x400 0x5>,
+ <&dmamux1 44 0x400 0x1>;
+ dma-names = "rx", "tx";
+ access-controllers = <&etzpc 17>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/st,stm32mp25-rifsc.yaml b/Documentation/devicetree/bindings/bus/st,stm32mp25-rifsc.yaml
new file mode 100644
index 000000000000..20acd1a6b173
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/st,stm32mp25-rifsc.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/st,stm32mp25-rifsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32 Resource isolation framework security controller
+
+maintainers:
+ - Gatien Chevallier <gatien.chevallier@foss.st.com>
+
+description: |
+ Resource isolation framework (RIF) is a comprehensive set of hardware blocks
+ designed to enforce and manage isolation of STM32 hardware resources like
+ memory and peripherals.
+
+ The RIFSC (RIF security controller) is composed of three sets of registers,
+ each managing a specific set of hardware resources:
+ - RISC registers associated with RISUP logic (resource isolation device unit
+ for peripherals), assign all non-RIF aware peripherals to zero, one or
+ any security domains (secure, privilege, compartment).
+ - RIMC registers: associated with RIMU logic (resource isolation master
+ unit), assign all non RIF-aware bus master to one security domain by
+ setting secure, privileged and compartment information on the system bus.
+ Alternatively, the RISUP logic controlling the device port access to a
+ peripheral can assign target bus attributes to this peripheral master port
+ (supported attribute: CID).
+ - RISC registers associated with RISAL logic (resource isolation device unit
+ for address space - Lite version), assign address space subregions to one
+ security domains (secure, privilege, compartment).
+
+select:
+ properties:
+ compatible:
+ contains:
+ const: st,stm32mp25-rifsc
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - const: st,stm32mp25-rifsc
+ - const: simple-bus
+
+ reg:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+ "#access-controller-cells":
+ const: 1
+ description:
+ Contains the firewall ID associated to the peripheral.
+
+patternProperties:
+ "^.*@[0-9a-f]+$":
+ description: Peripherals
+ type: object
+
+ additionalProperties: true
+
+ required:
+ - access-controllers
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - "#access-controller-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ // In this example, the usart2 device refers to rifsc as its domain
+ // controller.
+ // Access rights are verified before creating devices.
+
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ rifsc: bus@42080000 {
+ compatible = "st,stm32mp25-rifsc", "simple-bus";
+ reg = <0x42080000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #access-controller-cells = <1>;
+ ranges;
+
+ usart2: serial@400e0000 {
+ compatible = "st,stm32h7-uart";
+ reg = <0x400e0000 0x400>;
+ interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ck_flexgen_08>;
+ access-controllers = <&rifsc 32>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/sun50i-de2-bus.txt b/Documentation/devicetree/bindings/bus/sun50i-de2-bus.txt
deleted file mode 100644
index 87dfb33fb3be..000000000000
--- a/Documentation/devicetree/bindings/bus/sun50i-de2-bus.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Device tree bindings for Allwinner A64 DE2 bus
-
-The Allwinner A64 DE2 is on a special bus, which needs a SRAM region (SRAM C)
-to be claimed for enabling the access.
-
-Required properties:
-
- - compatible: Should contain "allwinner,sun50i-a64-de2"
- - reg: A resource specifier for the register space
- - #address-cells: Must be set to 1
- - #size-cells: Must be set to 1
- - ranges: Must be set up to map the address space inside the
- DE2, for the sub-blocks of DE2.
- - allwinner,sram: the SRAM that needs to be claimed
-
-Example:
-
- de2@1000000 {
- compatible = "allwinner,sun50i-a64-de2";
- reg = <0x1000000 0x400000>;
- allwinner,sram = <&de2_sram 1>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x1000000 0x400000>;
-
- display_clocks: clock@0 {
- compatible = "allwinner,sun50i-a64-de2-clk";
- reg = <0x0 0x100000>;
- clocks = <&ccu CLK_DE>,
- <&ccu CLK_BUS_DE>;
- clock-names = "mod",
- "bus";
- resets = <&ccu RST_BUS_DE>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/bus/sunxi-rsb.txt b/Documentation/devicetree/bindings/bus/sunxi-rsb.txt
deleted file mode 100644
index eb3ed628c6f1..000000000000
--- a/Documentation/devicetree/bindings/bus/sunxi-rsb.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Allwinner Reduced Serial Bus (RSB) controller
-
-The RSB controller found on later Allwinner SoCs is an SMBus like 2 wire
-serial bus with 1 master and up to 15 slaves. It is represented by a node
-for the controller itself, and child nodes representing the slave devices.
-
-Required properties :
-
- - reg : Offset and length of the register set for the controller.
- - compatible : Shall be "allwinner,sun8i-a23-rsb".
- - interrupts : The interrupt line associated to the RSB controller.
- - clocks : The gate clk associated to the RSB controller.
- - resets : The reset line associated to the RSB controller.
- - #address-cells : shall be 1
- - #size-cells : shall be 0
-
-Optional properties :
-
- - clock-frequency : Desired RSB bus clock frequency in Hz. Maximum is 20MHz.
- If not set this defaults to 3MHz.
-
-Child nodes:
-
-An RSB controller node can contain zero or more child nodes representing
-slave devices on the bus. Child 'reg' properties should contain the slave
-device's hardware address. The hardware address is hardwired in the device,
-which can normally be found in the datasheet.
-
-Example:
-
- rsb@1f03400 {
- compatible = "allwinner,sun8i-a23-rsb";
- reg = <0x01f03400 0x400>;
- interrupts = <0 39 4>;
- clocks = <&apb0_gates 3>;
- clock-frequency = <3000000>;
- resets = <&apb0_rst 3>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- pmic@3e3 {
- compatible = "...";
- reg = <0x3e3>;
-
- /* ... */
- };
- };
diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.txt b/Documentation/devicetree/bindings/bus/ti-sysc.txt
deleted file mode 100644
index 91dc2333af01..000000000000
--- a/Documentation/devicetree/bindings/bus/ti-sysc.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-Texas Instruments sysc interconnect target module wrapper binding
-
-Texas Instruments SoCs can have a generic interconnect target module
-hardware for devices connected to various interconnects such as L3
-interconnect (Arteris NoC) and L4 interconnect (Sonics s3220). The sysc
-is mostly used for interaction between module and PRCM. It participates
-in the OCP Disconnect Protocol but other than that is mostly independent
-of the interconnect.
-
-Each interconnect target module can have one or more devices connected to
-it. There is a set of control registers for managing interconnect target
-module clocks, idle modes and interconnect level resets for the module.
-
-These control registers are sprinkled into the unused register address
-space of the first child device IP block managed by the interconnect
-target module and typically are named REVISION, SYSCONFIG and SYSSTATUS.
-
-Required standard properties:
-
-- compatible shall be one of the following generic types:
-
- "ti,sysc"
- "ti,sysc-omap2"
- "ti,sysc-omap4"
- "ti,sysc-omap4-simple"
-
- or one of the following derivative types for hardware
- needing special workarounds:
-
- "ti,sysc-omap2-timer"
- "ti,sysc-omap4-timer"
- "ti,sysc-omap3430-sr"
- "ti,sysc-omap3630-sr"
- "ti,sysc-omap4-sr"
- "ti,sysc-omap3-sham"
- "ti,sysc-omap-aes"
- "ti,sysc-mcasp"
- "ti,sysc-usb-host-fs"
- "ti,sysc-dra7-mcan"
-
-- reg shall have register areas implemented for the interconnect
- target module in question such as revision, sysc and syss
-
-- reg-names shall contain the register names implemented for the
- interconnect target module in question such as
- "rev, "sysc", and "syss"
-
-- ranges shall contain the interconnect target module IO range
- available for one or more child device IP blocks managed
- by the interconnect target module, the ranges may include
- multiple ranges such as device L4 range for control and
- parent L3 range for DMA access
-
-Optional properties:
-
-- ti,sysc-mask shall contain mask of supported register bits for the
- SYSCONFIG register as documented in the Technical Reference
- Manual (TRM) for the interconnect target module
-
-- ti,sysc-midle list of master idle modes supported by the interconnect
- target module as documented in the TRM for SYSCONFIG
- register MIDLEMODE bits
-
-- ti,sysc-sidle list of slave idle modes supported by the interconnect
- target module as documented in the TRM for SYSCONFIG
- register SIDLEMODE bits
-
-- ti,sysc-delay-us delay needed after OCP softreset before accssing
- SYSCONFIG register again
-
-- ti,syss-mask optional mask of reset done status bits as described in the
- TRM for SYSSTATUS registers, typically 1 with some devices
- having separate reset done bits for children like OHCI and
- EHCI
-
-- clocks clock specifier for each name in the clock-names as
- specified in the binding documentation for ti-clkctrl,
- typically available for all interconnect targets on TI SoCs
- based on omap4 except if it's read-only register in hwauto
- mode as for example omap4 L4_CFG_CLKCTRL
-
-- clock-names should contain at least "fck", and optionally also "ick"
- depending on the SoC and the interconnect target module,
- some interconnect target modules also need additional
- optional clocks that can be specified as listed in TRM
- for the related CLKCTRL register bits 8 to 15 such as
- "dbclk" or "clk32k" depending on their role
-
-- ti,hwmods optional TI interconnect module name to use legacy
- hwmod platform data
-
-- ti,no-reset-on-init interconnect target module should not be reset at init
-
-- ti,no-idle-on-init interconnect target module should not be idled at init
-
-Example: Single instance of MUSB controller on omap4 using interconnect ranges
-using offsets from l4_cfg second segment (0x4a000000 + 0x80000 = 0x4a0ab000):
-
- target-module@2b000 { /* 0x4a0ab000, ap 84 12.0 */
- compatible = "ti,sysc-omap2";
- ti,hwmods = "usb_otg_hs";
- reg = <0x2b400 0x4>,
- <0x2b404 0x4>,
- <0x2b408 0x4>;
- reg-names = "rev", "sysc", "syss";
- clocks = <&l3_init_clkctrl OMAP4_USB_OTG_HS_CLKCTRL 0>;
- clock-names = "fck";
- ti,sysc-mask = <(SYSC_OMAP2_ENAWAKEUP |
- SYSC_OMAP2_SOFTRESET |
- SYSC_OMAP2_AUTOIDLE)>;
- ti,sysc-midle = <SYSC_IDLE_FORCE>,
- <SYSC_IDLE_NO>,
- <SYSC_IDLE_SMART>;
- ti,sysc-sidle = <SYSC_IDLE_FORCE>,
- <SYSC_IDLE_NO>,
- <SYSC_IDLE_SMART>,
- <SYSC_IDLE_SMART_WKUP>;
- ti,syss-mask = <1>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x2b000 0x1000>;
-
- usb_otg_hs: otg@0 {
- compatible = "ti,omap4-musb";
- reg = <0x0 0x7ff>;
- interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
- usb-phy = <&usb2_phy>;
- ...
- };
- };
-
-Note that other SoCs, such as am335x can have multipe child devices. On am335x
-there are two MUSB instances, two USB PHY instances, and a single CPPI41 DMA
-instance as children of a single interconnet target module.
diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.yaml b/Documentation/devicetree/bindings/bus/ti-sysc.yaml
new file mode 100644
index 000000000000..6d7bca6c138e
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/ti-sysc.yaml
@@ -0,0 +1,215 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/ti-sysc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments interconnect target module
+
+maintainers:
+ - Tony Lindgren <tony@atomide.com>
+
+description:
+ Texas Instruments SoCs can have a generic interconnect target module
+ for devices connected to various interconnects such as L3 interconnect
+ using Arteris NoC, and L4 interconnect using Sonics s3220. This module
+ is mostly used for interaction between module and Power, Reset and Clock
+ Manager PRCM. It participates in the OCP Disconnect Protocol, but other
+ than that it is mostly independent of the interconnect.
+
+ Each interconnect target module can have one or more devices connected to
+ it. There is a set of control registers for managing the interconnect target
+ module clocks, idle modes and interconnect level resets.
+
+ The interconnect target module control registers are sprinkled into the
+ unused register address space of the first child device IP block managed by
+ the interconnect target module. Typically the register names are REVISION,
+ SYSCONFIG and SYSSTATUS.
+
+properties:
+ $nodename:
+ pattern: "^target-module(@[0-9a-f]+)?$"
+
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,sysc-omap2
+ - ti,sysc-omap4
+ - ti,sysc-omap4-simple
+ - ti,sysc-omap2-timer
+ - ti,sysc-omap4-timer
+ - ti,sysc-omap3430-sr
+ - ti,sysc-omap3630-sr
+ - ti,sysc-omap4-sr
+ - ti,sysc-omap3-sham
+ - ti,sysc-omap-aes
+ - ti,sysc-mcasp
+ - ti,sysc-dra7-mcasp
+ - ti,sysc-usb-host-fs
+ - ti,sysc-dra7-mcan
+ - ti,sysc-pruss
+ - const: ti,sysc
+ - items:
+ - const: ti,sysc
+
+ reg:
+ description:
+ Interconnect target module control registers consisting of
+ REVISION, SYSCONFIG and SYSSTATUS registers as defined in the
+ Technical Reference Manual for the SoC.
+ minItems: 1
+ maxItems: 3
+
+ reg-names:
+ description:
+ Interconnect target module control register names consisting
+ of "rev", "sysc" and "syss".
+ oneOf:
+ - minItems: 1
+ items:
+ - const: rev
+ - const: sysc
+ - const: syss
+ - items:
+ - const: rev
+ - const: syss
+ - enum: [ sysc, syss ]
+
+ power-domains:
+ description: Target module power domain if available.
+ maxItems: 1
+
+ clocks:
+ description:
+ Target module clocks consisting of one functional clock, one
+ interface clock, and up to 8 module specific optional clocks.
+ Some modules may have only the functional clock, and some have
+ no configurable clocks.
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ description:
+ Target module clock names like "fck", "ick", "optck1", "optck2"
+ if the clocks are configurable.
+ oneOf:
+ - enum: [ ick, fck, sys_clk ]
+ - items:
+ - const: fck
+ - enum: [ ick, dbclk, osc, sys_clk, dss_clk, ahclkx ]
+ - items:
+ - const: fck
+ - const: phy-clk
+ - const: phy-clk-div
+ - items:
+ - const: fck
+ - const: hdmi_clk
+ - const: sys_clk
+ - const: tv_clk
+ - items:
+ - const: fck
+ - const: ahclkx
+ - const: ahclkr
+
+ resets:
+ description:
+ Target module reset bit in the RSTCTRL register if wired for the module.
+ Note that the other reset bits should be mapped for the child device
+ driver to use.
+ maxItems: 1
+
+ reset-names:
+ description:
+ Target module reset names in the RSTCTRL register, typically named
+ "rstctrl" if only one reset bit is wired for the module.
+ items:
+ - const: rstctrl
+
+ '#address-cells':
+ enum: [ 1, 2 ]
+
+ '#size-cells':
+ enum: [ 1, 2 ]
+
+ ranges: true
+
+ dma-ranges: true
+
+ ti,sysc-mask:
+ description: Mask of supported register bits for the SYSCONFIG register
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ ti,sysc-midle:
+ description: List of hardware supported idle modes
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ ti,sysc-sidle:
+ description: List of hardware supported idle modes
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ ti,syss-mask:
+ description: Mask of supported register bits for the SYSSTATUS register
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ ti,sysc-delay-us:
+ description: Delay needed after OCP softreset before accessing SYCONFIG
+ default: 0
+ minimum: 0
+ maximum: 2
+
+ ti,no-reset-on-init:
+ description: Interconnect target module shall not be reset at init
+ type: boolean
+
+ ti,no-idle-on-init:
+ description: Interconnect target module shall not be idled at init
+ type: boolean
+
+ ti,no-idle:
+ description: Interconnect target module shall not be idled
+ type: boolean
+
+ ti,hwmods:
+ description: Interconnect module name to use with legacy hwmod data
+ $ref: /schemas/types.yaml#/definitions/string
+ deprecated: true
+
+required:
+ - compatible
+ - '#address-cells'
+ - '#size-cells'
+ - ranges
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ #include <dt-bindings/bus/ti-sysc.h>
+ #include <dt-bindings/clock/omap4.h>
+
+ target-module@2b000 {
+ compatible = "ti,sysc-omap2", "ti,sysc";
+ ti,hwmods = "usb_otg_hs";
+ reg = <0x2b400 0x4>,
+ <0x2b404 0x4>,
+ <0x2b408 0x4>;
+ reg-names = "rev", "sysc", "syss";
+ clocks = <&l3_init_clkctrl OMAP4_USB_OTG_HS_CLKCTRL 0>;
+ clock-names = "fck";
+ ti,sysc-mask = <(SYSC_OMAP2_ENAWAKEUP |
+ SYSC_OMAP2_SOFTRESET |
+ SYSC_OMAP2_AUTOIDLE)>;
+ ti,sysc-midle = <SYSC_IDLE_FORCE>,
+ <SYSC_IDLE_NO>,
+ <SYSC_IDLE_SMART>;
+ ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+ <SYSC_IDLE_NO>,
+ <SYSC_IDLE_SMART>,
+ <SYSC_IDLE_SMART_WKUP>;
+ ti,syss-mask = <1>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x2b000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt b/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt
deleted file mode 100644
index 68ef80afff16..000000000000
--- a/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-UniPhier System Bus
-
-The UniPhier System Bus is an external bus that connects on-board devices to
-the UniPhier SoC. It is a simple (semi-)parallel bus with address, data, and
-some control signals. It supports up to 8 banks (chip selects).
-
-Before any access to the bus, the bus controller must be configured; the bus
-controller registers provide the control for the translation from the offset
-within each bank to the CPU-viewed address. The needed setup includes the base
-address, the size of each bank. Optionally, some timing parameters can be
-optimized for faster bus access.
-
-Required properties:
-- compatible: should be "socionext,uniphier-system-bus".
-- reg: offset and length of the register set for the bus controller device.
-- #address-cells: should be 2. The first cell is the bank number (chip select).
- The second cell is the address offset within the bank.
-- #size-cells: should be 1.
-- ranges: should provide a proper address translation from the System Bus to
- the parent bus.
-
-Note:
-The address region(s) that can be assigned for the System Bus is implementation
-defined. Some SoCs can use 0x00000000-0x0fffffff and 0x40000000-0x4fffffff,
-while other SoCs can only use 0x40000000-0x4fffffff. There might be additional
-limitations depending on SoCs and the boot mode. The address translation is
-arbitrary as long as the banks are assigned in the supported address space with
-the required alignment and they do not overlap one another.
-For example, it is possible to map:
- bank 0 to 0x42000000-0x43ffffff, bank 5 to 0x46000000-0x46ffffff
-It is also possible to map:
- bank 0 to 0x48000000-0x49ffffff, bank 5 to 0x44000000-0x44ffffff
-There is no reason to stick to a particular translation mapping, but the
-"ranges" property should provide a "reasonable" default that is known to work.
-The software should initialize the bus controller according to it.
-
-Example:
-
- system-bus {
- compatible = "socionext,uniphier-system-bus";
- reg = <0x58c00000 0x400>;
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <1 0x00000000 0x42000000 0x02000000
- 5 0x00000000 0x46000000 0x01000000>;
-
- ethernet@1,01f00000 {
- compatible = "smsc,lan9115";
- reg = <1 0x01f00000 0x1000>;
- interrupts = <0 48 4>
- phy-mode = "mii";
- };
-
- uart@5,00200000 {
- compatible = "ns16550a";
- reg = <5 0x00200000 0x20>;
- interrupts = <0 49 4>
- clock-frequency = <12288000>;
- };
- };
-
-In this example,
- - the Ethernet device is connected at the offset 0x01f00000 of CS1 and
- mapped to 0x43f00000 of the parent bus.
- - the UART device is connected at the offset 0x00200000 of CS5 and
- mapped to 0x46200000 of the parent bus.
diff --git a/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml b/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml
new file mode 100644
index 000000000000..7f62ffbdc245
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/xlnx,versal-net-cdx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMD CDX bus controller
+
+description: |
+ CDX bus controller for AMD devices is implemented to dynamically
+ detect CDX bus and devices using the firmware.
+ The CDX bus manages multiple FPGA based hardware devices, which
+ can support network, crypto or any other specialized type of
+ devices. These FPGA based devices can be added/modified dynamically
+ on run-time.
+
+ All devices on the CDX bus will have a unique streamid (for IOMMU)
+ and a unique device ID (for MSI) corresponding to a requestor ID
+ (one to one associated with the device). The streamid and deviceid
+ are used to configure SMMU and GIC-ITS respectively.
+
+ iommu-map property is used to define the set of stream ids
+ corresponding to each device and the associated IOMMU.
+
+ The MSI writes are accompanied by sideband data (Device ID).
+ The msi-map property is used to associate the devices with the
+ device ID as well as the associated ITS controller.
+
+ rproc property (xlnx,rproc) is used to identify the remote processor
+ with which APU (Application Processor Unit) interacts to find out
+ the bus and device configuration.
+
+maintainers:
+ - Nipun Gupta <nipun.gupta@amd.com>
+ - Nikhil Agarwal <nikhil.agarwal@amd.com>
+
+properties:
+ compatible:
+ const: xlnx,versal-net-cdx
+
+ iommu-map: true
+
+ msi-map: true
+
+ xlnx,rproc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the remoteproc_r5 rproc node using which APU interacts
+ with remote processor.
+
+ ranges: true
+
+ "#address-cells":
+ enum: [1, 2]
+
+ "#size-cells":
+ enum: [1, 2]
+
+required:
+ - compatible
+ - iommu-map
+ - msi-map
+ - xlnx,rproc
+ - ranges
+ - "#address-cells"
+ - "#size-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cdx {
+ compatible = "xlnx,versal-net-cdx";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ /* define map for RIDs 250-259 */
+ iommu-map = <250 &smmu 250 10>;
+ /* define msi map for RIDs 250-259 */
+ msi-map = <250 &its 250 10>;
+ xlnx,rproc = <&remoteproc_r5>;
+ ranges;
+ };
diff --git a/Documentation/devicetree/bindings/c6x/clocks.txt b/Documentation/devicetree/bindings/c6x/clocks.txt
deleted file mode 100644
index a04f5fd30122..000000000000
--- a/Documentation/devicetree/bindings/c6x/clocks.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-C6X PLL Clock Controllers
--------------------------
-
-This is a first-cut support for the SoC clock controllers. This is still
-under development and will probably change as the common device tree
-clock support is added to the kernel.
-
-Required properties:
-
-- compatible: "ti,c64x+pll"
- May also have SoC-specific value to support SoC-specific initialization
- in the driver. One of:
- "ti,c6455-pll"
- "ti,c6457-pll"
- "ti,c6472-pll"
- "ti,c6474-pll"
-
-- reg: base address and size of register area
-- clock-frequency: input clock frequency in hz
-
-
-Optional properties:
-
-- ti,c64x+pll-bypass-delay: CPU cycles to delay when entering bypass mode
-
-- ti,c64x+pll-reset-delay: CPU cycles to delay after PLL reset
-
-- ti,c64x+pll-lock-delay: CPU cycles to delay after PLL frequency change
-
-Example:
-
- clock-controller@29a0000 {
- compatible = "ti,c6472-pll", "ti,c64x+pll";
- reg = <0x029a0000 0x200>;
- clock-frequency = <25000000>;
-
- ti,c64x+pll-bypass-delay = <200>;
- ti,c64x+pll-reset-delay = <12000>;
- ti,c64x+pll-lock-delay = <80000>;
- };
diff --git a/Documentation/devicetree/bindings/c6x/dscr.txt b/Documentation/devicetree/bindings/c6x/dscr.txt
deleted file mode 100644
index 92672235de57..000000000000
--- a/Documentation/devicetree/bindings/c6x/dscr.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-Device State Configuration Registers
-------------------------------------
-
-TI C6X SoCs contain a region of miscellaneous registers which provide various
-function for SoC control or status. Details vary considerably among from SoC
-to SoC with no two being alike.
-
-In general, the Device State Configuration Registers (DSCR) will provide one or
-more configuration registers often protected by a lock register where one or
-more key values must be written to a lock register in order to unlock the
-configuration register for writes. These configuration register may be used to
-enable (and disable in some cases) SoC pin drivers, select peripheral clock
-sources (internal or pin), etc. In some cases, a configuration register is
-write once or the individual bits are write once. In addition to device config,
-the DSCR block may provide registers which are used to reset peripherals,
-provide device ID information, provide ethernet MAC addresses, as well as other
-miscellaneous functions.
-
-For device state control (enable/disable), each device control is assigned an
-id which is used by individual device drivers to control the state as needed.
-
-Required properties:
-
-- compatible: must be "ti,c64x+dscr"
-- reg: register area base and size
-
-Optional properties:
-
- NOTE: These are optional in that not all SoCs will have all properties. For
- SoCs which do support a given property, leaving the property out of the
- device tree will result in reduced functionality or possibly driver
- failure.
-
-- ti,dscr-devstat
- offset of the devstat register
-
-- ti,dscr-silicon-rev
- offset, start bit, and bitsize of silicon revision field
-
-- ti,dscr-rmii-resets
- offset and bitmask of RMII reset field. May have multiple tuples if more
- than one ethernet port is available.
-
-- ti,dscr-locked-regs
- possibly multiple tuples describing registers which are write protected by
- a lock register. Each tuple consists of the register offset, lock register
- offsset, and the key value used to unlock the register.
-
-- ti,dscr-kick-regs
- offset and key values of two "kick" registers used to write protect other
- registers in DSCR. On SoCs using kick registers, the first key must be
- written to the first kick register and the second key must be written to
- the second register before other registers in the area are write-enabled.
-
-- ti,dscr-mac-fuse-regs
- MAC addresses are contained in two registers. Each element of a MAC address
- is contained in a single byte. This property has two tuples. Each tuple has
- a register offset and four cells representing bytes in the register from
- most significant to least. The value of these four cells is the MAC byte
- index (1-6) of the byte within the register. A value of 0 means the byte
- is unused in the MAC address.
-
-- ti,dscr-devstate-ctl-regs
- This property describes the bitfields used to control the state of devices.
- Each tuple describes a range of identical bitfields used to control one or
- more devices (one bitfield per device). The layout of each tuple is:
-
- start_id num_ids reg enable disable start_bit nbits
-
- Where:
- start_id is device id for the first device control in the range
- num_ids is the number of device controls in the range
- reg is the offset of the register holding the control bits
- enable is the value to enable a device
- disable is the value to disable a device (0xffffffff if cannot disable)
- start_bit is the bit number of the first bit in the range
- nbits is the number of bits per device control
-
-- ti,dscr-devstate-stat-regs
- This property describes the bitfields used to provide device state status
- for device states controlled by the DSCR. Each tuple describes a range of
- identical bitfields used to provide status for one or more devices (one
- bitfield per device). The layout of each tuple is:
-
- start_id num_ids reg enable disable start_bit nbits
-
- Where:
- start_id is device id for the first device status in the range
- num_ids is the number of devices covered by the range
- reg is the offset of the register holding the status bits
- enable is the value indicating device is enabled
- disable is the value indicating device is disabled
- start_bit is the bit number of the first bit in the range
- nbits is the number of bits per device status
-
-- ti,dscr-privperm
- Offset and default value for register used to set access privilege for
- some SoC devices.
-
-
-Example:
-
- device-state-config-regs@2a80000 {
- compatible = "ti,c64x+dscr";
- reg = <0x02a80000 0x41000>;
-
- ti,dscr-devstat = <0>;
- ti,dscr-silicon-rev = <8 28 0xf>;
- ti,dscr-rmii-resets = <0x40020 0x00040000>;
-
- ti,dscr-locked-regs = <0x40008 0x40004 0x0f0a0b00>;
- ti,dscr-devstate-ctl-regs =
- <0 12 0x40008 1 0 0 2
- 12 1 0x40008 3 0 30 2
- 13 2 0x4002c 1 0xffffffff 0 1>;
- ti,dscr-devstate-stat-regs =
- <0 10 0x40014 1 0 0 3
- 10 2 0x40018 1 0 0 3>;
-
- ti,dscr-mac-fuse-regs = <0x700 1 2 3 4
- 0x704 5 6 0 0>;
-
- ti,dscr-privperm = <0x41c 0xaaaaaaaa>;
-
- ti,dscr-kick-regs = <0x38 0x83E70B13
- 0x3c 0x95A4F1E0>;
- };
diff --git a/Documentation/devicetree/bindings/c6x/emifa.txt b/Documentation/devicetree/bindings/c6x/emifa.txt
deleted file mode 100644
index 0ff6e9b9a13f..000000000000
--- a/Documentation/devicetree/bindings/c6x/emifa.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-External Memory Interface
--------------------------
-
-The emifa node describes a simple external bus controller found on some C6X
-SoCs. This interface provides external busses with a number of chip selects.
-
-Required properties:
-
-- compatible: must be "ti,c64x+emifa", "simple-bus"
-- reg: register area base and size
-- #address-cells: must be 2 (chip-select + offset)
-- #size-cells: must be 1
-- ranges: mapping from EMIFA space to parent space
-
-
-Optional properties:
-
-- ti,dscr-dev-enable: Device ID if EMIF is enabled/disabled from DSCR
-
-- ti,emifa-burst-priority:
- Number of memory transfers after which the EMIF will elevate the priority
- of the oldest command in the command FIFO. Setting this field to 255
- disables this feature, thereby allowing old commands to stay in the FIFO
- indefinitely.
-
-- ti,emifa-ce-config:
- Configuration values for each of the supported chip selects.
-
-Example:
-
- emifa@70000000 {
- compatible = "ti,c64x+emifa", "simple-bus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0x70000000 0x100>;
- ranges = <0x2 0x0 0xa0000000 0x00000008
- 0x3 0x0 0xb0000000 0x00400000
- 0x4 0x0 0xc0000000 0x10000000
- 0x5 0x0 0xD0000000 0x10000000>;
-
- ti,dscr-dev-enable = <13>;
- ti,emifa-burst-priority = <255>;
- ti,emifa-ce-config = <0x00240120
- 0x00240120
- 0x00240122
- 0x00240122>;
-
- flash@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0x3 0x0 0x400000>;
- bank-width = <1>;
- device-width = <1>;
- partition@0 {
- reg = <0x0 0x400000>;
- label = "NOR";
- };
- };
- };
-
-This shows a flash chip attached to chip select 3.
diff --git a/Documentation/devicetree/bindings/c6x/soc.txt b/Documentation/devicetree/bindings/c6x/soc.txt
deleted file mode 100644
index b1e4973b5769..000000000000
--- a/Documentation/devicetree/bindings/c6x/soc.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-C6X System-on-Chip
-------------------
-
-Required properties:
-
-- compatible: "simple-bus"
-- #address-cells: must be 1
-- #size-cells: must be 1
-- ranges
-
-Optional properties:
-
-- model: specific SoC model
-
-- nodes for IP blocks within SoC
-
-
-Example:
-
- soc {
- compatible = "simple-bus";
- model = "tms320c6455";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- ...
- };
diff --git a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml
new file mode 100644
index 000000000000..b135ffa4ab6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) 2023 Renesas Electronics Corp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/andestech,ax45mp-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Andestech AX45MP L2 Cache Controller
+
+maintainers:
+ - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+
+description:
+ A level-2 cache (L2C) is used to improve the system performance by providing
+ a large amount of cache line entries and reasonable access delays. The L2C
+ is shared between cores, and a non-inclusive non-exclusive policy is used.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - andestech,ax45mp-cache
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - andestech,qilai-ax45mp-cache
+ - renesas,r9a07g043f-ax45mp-cache
+ - const: andestech,ax45mp-cache
+ - const: cache
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ cache-line-size:
+ const: 64
+
+ cache-level:
+ const: 2
+
+ cache-sets:
+ enum: [1024, 2048]
+
+ cache-size:
+ enum: [131072, 262144, 524288, 1048576, 2097152]
+
+ cache-unified: true
+
+ next-level-cache: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - cache-line-size
+ - cache-level
+ - cache-sets
+ - cache-size
+ - cache-unified
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: andestech,qilai-ax45mp-cache
+
+ then:
+ properties:
+ cache-sets:
+ const: 2048
+ cache-size:
+ const: 2097152
+ else:
+ properties:
+ cache-sets:
+ const: 1024
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ cache-controller@13400000 {
+ compatible = "renesas,r9a07g043f-ax45mp-cache", "andestech,ax45mp-cache",
+ "cache";
+ reg = <0x13400000 0x100000>;
+ interrupts = <508 IRQ_TYPE_LEVEL_HIGH>;
+ cache-line-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <262144>;
+ cache-unified;
+ };
diff --git a/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
new file mode 100644
index 000000000000..ec4f367bc0b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/baikal,bt1-l2-ctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 L2-cache Control Block
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ By means of the System Controller Baikal-T1 SoC exposes a few settings to
+ tune the MIPS P5600 CM2 L2 cache performance up. In particular it's possible
+ to change the Tag, Data and Way-select RAM access latencies. Baikal-T1
+ L2-cache controller block is responsible for the tuning. Its DT node is
+ supposed to be a child of the system controller.
+
+properties:
+ compatible:
+ const: baikal,bt1-l2-ctl
+
+ reg:
+ maxItems: 1
+
+ baikal,l2-ws-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Way-select RAM accesses
+ default: 0
+ minimum: 0
+ maximum: 3
+
+ baikal,l2-tag-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Tag RAM accesses
+ default: 0
+ minimum: 0
+ maximum: 3
+
+ baikal,l2-data-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Data RAM accesses
+ default: 1
+ minimum: 0
+ maximum: 3
+
+additionalProperties: false
+
+required:
+ - compatible
+
+examples:
+ - |
+ l2@1f04d028 {
+ compatible = "baikal,bt1-l2-ctl";
+ reg = <0x1f04d028 0x004>;
+
+ baikal,l2-ws-latency = <1>;
+ baikal,l2-tag-latency = <1>;
+ baikal,l2-data-latency = <2>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/cache/freescale-l2cache.txt b/Documentation/devicetree/bindings/cache/freescale-l2cache.txt
new file mode 100644
index 000000000000..22ad012660e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/freescale-l2cache.txt
@@ -0,0 +1,55 @@
+Freescale L2 Cache Controller
+
+L2 cache is present in Freescale's QorIQ and QorIQ Qonverge platforms.
+The cache bindings explained below are Devicetree Specification compliant
+
+Required Properties:
+
+- compatible : Should include one of the following:
+ "fsl,b4420-l2-cache-controller"
+ "fsl,b4860-l2-cache-controller"
+ "fsl,bsc9131-l2-cache-controller"
+ "fsl,bsc9132-l2-cache-controller"
+ "fsl,c293-l2-cache-controller"
+ "fsl,mpc8536-l2-cache-controller"
+ "fsl,mpc8540-l2-cache-controller"
+ "fsl,mpc8541-l2-cache-controller"
+ "fsl,mpc8544-l2-cache-controller"
+ "fsl,mpc8548-l2-cache-controller"
+ "fsl,mpc8555-l2-cache-controller"
+ "fsl,mpc8560-l2-cache-controller"
+ "fsl,mpc8568-l2-cache-controller"
+ "fsl,mpc8569-l2-cache-controller"
+ "fsl,mpc8572-l2-cache-controller"
+ "fsl,p1010-l2-cache-controller"
+ "fsl,p1011-l2-cache-controller"
+ "fsl,p1012-l2-cache-controller"
+ "fsl,p1013-l2-cache-controller"
+ "fsl,p1014-l2-cache-controller"
+ "fsl,p1015-l2-cache-controller"
+ "fsl,p1016-l2-cache-controller"
+ "fsl,p1020-l2-cache-controller"
+ "fsl,p1021-l2-cache-controller"
+ "fsl,p1022-l2-cache-controller"
+ "fsl,p1023-l2-cache-controller"
+ "fsl,p1024-l2-cache-controller"
+ "fsl,p1025-l2-cache-controller"
+ "fsl,p2010-l2-cache-controller"
+ "fsl,p2020-l2-cache-controller"
+ "fsl,t2080-l2-cache-controller"
+ "fsl,t4240-l2-cache-controller"
+ and "cache".
+- reg : Address and size of L2 cache controller registers
+- cache-size : Size of the entire L2 cache
+- interrupts : Error interrupt of L2 controller
+- cache-line-size : Size of L2 cache lines
+
+Example:
+
+ L2: l2-cache-controller@20000 {
+ compatible = "fsl,bsc9132-l2-cache-controller", "cache";
+ reg = <0x20000 0x1000>;
+ cache-line-size = <32>; // 32 bytes
+ cache-size = <0x40000>; // L2,256K
+ interrupts = <16 2 1 0>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/l2c2x0.yaml b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
new file mode 100644
index 000000000000..10c1a900202f
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/l2c2x0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM L2 Cache Controller
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+description: |+
+ ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/
+ PL220/PL310 and variants) based level 2 cache controller. All these various
+ implementations of the L2 cache controller have compatible programming
+ models (Note 1). Some of the properties that are just prefixed "cache-*" are
+ taken from section 3.7.3 of the Devicetree Specification which can be found
+ at:
+ https://www.devicetree.org/specifications/
+
+ Note 1: The description in this document doesn't apply to integrated L2
+ cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
+ integrated L2 controllers are assumed to be all preconfigured by
+ early secure boot code. Thus no need to deal with their configuration
+ in the kernel at all.
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - arm,pl310-cache
+ - arm,l220-cache
+ - arm,l210-cache
+ # DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
+ - bcm,bcm11351-a2-pl310-cache
+ # For Broadcom bcm11351 chipset where an
+ # offset needs to be added to the address before passing down to the L2
+ # cache controller
+ - brcm,bcm11351-a2-pl310-cache
+ # Marvell Controller designed to be
+ # compatible with the ARM one, with system cache mode (meaning
+ # maintenance operations on L1 are broadcasted to the L2 and L2
+ # performs the same operation).
+ - marvell,aurora-system-cache
+ # Marvell Controller designed to be
+ # compatible with the ARM one with outer cache mode.
+ - marvell,aurora-outer-cache
+ - items:
+ # Marvell Tauros3 cache controller, compatible
+ # with arm,pl310-cache controller.
+ - const: marvell,tauros3-cache
+ - const: arm,pl310-cache
+
+ cache-level:
+ const: 2
+
+ cache-unified: true
+ cache-size: true
+ cache-sets: true
+ cache-block-size: true
+ cache-line-size: true
+
+ reg:
+ maxItems: 1
+
+ arm,data-latency:
+ description: Cycles of latency for Data RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Minimum valid values are 1. Controllers
+ without setup latency control should use a value of 0.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 2
+ maxItems: 3
+ items:
+ minimum: 0
+ maximum: 8
+
+ arm,tag-latency:
+ description: Cycles of latency for Tag RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Controllers without setup latency control
+ should use 0. Controllers without separate read and write Tag RAM latency
+ values should only use the first cell.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 3
+ items:
+ minimum: 0
+ maximum: 8
+
+ arm,dirty-latency:
+ description: Cycles of latency for Dirty RAMs. This is a single cell.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 8
+
+ arm,filter-ranges:
+ description: <start length> Starting address and length of window to
+ filter. Addresses in the filter window are directed to the M1 port. Other
+ addresses will go to the M0 port.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 2
+ maxItems: 2
+
+ arm,io-coherent:
+ description: indicates that the system is operating in an hardware
+ I/O coherent mode. Valid only when the arm,pl310-cache compatible
+ string is used.
+ type: boolean
+
+ interrupts:
+ # Either a single combined interrupt or up to 9 individual interrupts
+ minItems: 1
+ maxItems: 9
+
+ cache-id-part:
+ description: cache id part number to be used if it is not present
+ on hardware
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ wt-override:
+ description: If present then L2 is forced to Write through mode
+ type: boolean
+
+ arm,double-linefill:
+ description: Override double linefill enable setting. Enable if
+ non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,double-linefill-incr:
+ description: Override double linefill on INCR read. Enable
+ if non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,double-linefill-wrap:
+ description: Override double linefill on WRAP read. Enable
+ if non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,prefetch-drop:
+ description: Override prefetch drop enable setting. Enable if non-zero,
+ disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,prefetch-offset:
+ description: Override prefetch offset value.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4, 5, 6, 7, 15, 23, 31]
+
+ arm,shared-override:
+ description: The default behavior of the L220 or PL310 cache
+ controllers with respect to the shareable attribute is to transform "normal
+ memory non-cacheable transactions" into "cacheable no allocate" (for reads)
+ or "write through no write allocate" (for writes).
+ On systems where this may cause DMA buffer corruption, this property must
+ be specified to indicate that such transforms are precluded.
+ type: boolean
+
+ arm,parity-enable:
+ description: enable parity checking on the L2 cache (L220 or PL310).
+ type: boolean
+
+ arm,parity-disable:
+ description: disable parity checking on the L2 cache (L220 or PL310).
+ type: boolean
+
+ marvell,ecc-enable:
+ description: enable ECC protection on the L2 cache
+ type: boolean
+
+ arm,outer-sync-disable:
+ description: disable the outer sync operation on the L2 cache.
+ Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
+ will randomly hang unless outer sync operations are disabled.
+ type: boolean
+
+ prefetch-data:
+ description: |
+ Data prefetch. Value: <0> (forcibly disable), <1>
+ (forcibly enable), property absent (retain settings set by firmware)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ prefetch-instr:
+ description: |
+ Instruction prefetch. Value: <0> (forcibly disable),
+ <1> (forcibly enable), property absent (retain settings set by
+ firmware)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,dynamic-clock-gating:
+ description: |
+ L2 dynamic clock gating. Value: <0> (forcibly
+ disable), <1> (forcibly enable), property absent (OS specific behavior,
+ preferably retain firmware settings)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,standby-mode:
+ description: L2 standby mode enable. Value <0> (forcibly disable),
+ <1> (forcibly enable), property absent (OS specific behavior,
+ preferably retain firmware settings)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,early-bresp-disable:
+ description: Disable the CA9 optimization Early BRESP (PL310)
+ type: boolean
+
+ arm,full-line-zero-disable:
+ description: Disable the CA9 optimization Full line of zero
+ write (PL310)
+ type: boolean
+
+required:
+ - compatible
+ - cache-unified
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ cache-controller@fff12000 {
+ compatible = "arm,pl310-cache";
+ reg = <0xfff12000 0x1000>;
+ arm,data-latency = <1 1 1>;
+ arm,tag-latency = <2 2 2>;
+ arm,filter-ranges = <0x80000000 0x8000000>;
+ cache-unified;
+ cache-level = <2>;
+ interrupts = <45>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/cache/marvell,kirkwood-cache.yaml b/Documentation/devicetree/bindings/cache/marvell,kirkwood-cache.yaml
new file mode 100644
index 000000000000..2bfa3c29f6a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/marvell,kirkwood-cache.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/marvell,kirkwood-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Feroceon/Kirkwood Cache
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ enum:
+ - marvell,feroceon-cache
+ - marvell,kirkwood-cache
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: marvell,kirkwood-cache
+ then:
+ required:
+ - reg
+ else:
+ properties:
+ reg: false
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ l2-cache@20128 {
+ compatible = "marvell,kirkwood-cache";
+ reg = <0x20128 0x4>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/marvell,tauros2-cache.yaml b/Documentation/devicetree/bindings/cache/marvell,tauros2-cache.yaml
new file mode 100644
index 000000000000..9f7f0d031631
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/marvell,tauros2-cache.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/marvell,tauros2-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Tauros2 Cache
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ const: marvell,tauros2-cache
+
+ marvell,tauros2-cache-features:
+ description: >
+ Specify the features supported for the tauros2 cache. The features include:
+
+ - CACHE_TAUROS2_PREFETCH_ON (1 << 0)
+ - CACHE_TAUROS2_LINEFILL_BURST8 (1 << 1)
+
+ The definition can be found at arch/arm/include/asm/hardware/cache-tauros2.h
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 0x3
+
+required:
+ - compatible
+ - marvell,tauros2-cache-features
+
+additionalProperties: false
+
+examples:
+ - |
+ l2-cache {
+ compatible = "marvell,tauros2-cache";
+ marvell,tauros2-cache-features = <0x3>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
new file mode 100644
index 000000000000..37e3ebd55487
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
@@ -0,0 +1,317 @@
+# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/qcom,llcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Last Level Cache Controller
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ LLCC (Last Level Cache Controller) provides last level of cache memory in SoC,
+ that can be shared by multiple clients. Clients here are different cores in the
+ SoC, the idea is to minimize the local caches at the clients and migrate to
+ common pool of memory. Cache memory is divided into partitions called slices
+ which are assigned to clients. Clients can query the slice details, activate
+ and deactivate them.
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq5424-llcc
+ - qcom,qcs615-llcc
+ - qcom,qcs8300-llcc
+ - qcom,qdu1000-llcc
+ - qcom,sa8775p-llcc
+ - qcom,sar1130p-llcc
+ - qcom,sar2130p-llcc
+ - qcom,sc7180-llcc
+ - qcom,sc7280-llcc
+ - qcom,sc8180x-llcc
+ - qcom,sc8280xp-llcc
+ - qcom,sdm845-llcc
+ - qcom,sm6350-llcc
+ - qcom,sm7150-llcc
+ - qcom,sm8150-llcc
+ - qcom,sm8250-llcc
+ - qcom,sm8350-llcc
+ - qcom,sm8450-llcc
+ - qcom,sm8550-llcc
+ - qcom,sm8650-llcc
+ - qcom,sm8750-llcc
+ - qcom,x1e80100-llcc
+
+ reg:
+ minItems: 1
+ maxItems: 10
+
+ reg-names:
+ minItems: 1
+ maxItems: 10
+
+ interrupts:
+ maxItems: 1
+
+ nvmem-cells:
+ items:
+ - description: Reference to an nvmem node for multi channel DDR
+
+ nvmem-cell-names:
+ items:
+ - const: multi-chan-ddr
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,ipq5424-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sar1130p-llcc
+ - qcom,sar2130p-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC broadcast OR register region
+ - description: LLCC broadcast AND register region
+ - description: LLCC scratchpad broadcast OR register region
+ - description: LLCC scratchpad broadcast AND register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc_broadcast_base
+ - const: llcc_broadcast_and_base
+ - const: llcc_scratchpad_broadcast_base
+ - const: llcc_scratchpad_broadcast_and_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcs615-llcc
+ - qcom,sc7180-llcc
+ - qcom,sm6350-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sa8775p-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC4 base register region
+ - description: LLCC5 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc4_base
+ - const: llcc5_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7280-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qdu1000-llcc
+ - qcom,sc8180x-llcc
+ - qcom,sc8280xp-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC4 base register region
+ - description: LLCC5 base register region
+ - description: LLCC6 base register region
+ - description: LLCC7 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc4_base
+ - const: llcc5_base
+ - const: llcc6_base
+ - const: llcc7_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,x1e80100-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC4 base register region
+ - description: LLCC5 base register region
+ - description: LLCC6 base register region
+ - description: LLCC7 base register region
+ - description: LLCC broadcast base register region
+ - description: LLCC broadcast AND register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc4_base
+ - const: llcc5_base
+ - const: llcc6_base
+ - const: llcc7_base
+ - const: llcc_broadcast_base
+ - const: llcc_broadcast_and_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcs8300-llcc
+ - qcom,sdm845-llcc
+ - qcom,sm8150-llcc
+ - qcom,sm8250-llcc
+ - qcom,sm8350-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8450-llcc
+ - qcom,sm8550-llcc
+ - qcom,sm8650-llcc
+ - qcom,sm8750-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC broadcast OR register region
+ - description: LLCC broadcast AND register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc_broadcast_base
+ - const: llcc_broadcast_and_base
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ system-cache-controller@1100000 {
+ compatible = "qcom,sdm845-llcc";
+ reg = <0 0x01100000 0 0x50000>, <0 0x01180000 0 0x50000>,
+ <0 0x01200000 0 0x50000>, <0 0x01280000 0 0x50000>,
+ <0 0x01300000 0 0x50000>;
+ reg-names = "llcc0_base", "llcc1_base", "llcc2_base",
+ "llcc3_base", "llcc_broadcast_base";
+ interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
new file mode 100644
index 000000000000..579bacb66f34
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
@@ -0,0 +1,212 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) 2020 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/sifive,ccache0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive Composable Cache Controller
+
+maintainers:
+ - Paul Walmsley <paul.walmsley@sifive.com>
+
+description:
+ The SiFive Composable Cache Controller is used to provide access to fast copies
+ of memory for masters in a Core Complex. The Composable Cache Controller also
+ acts as directory-based coherency manager.
+ All the properties in ePAPR/DeviceTree specification applies for this platform.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,ccache0
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - sifive,ccache0
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
+ - const: cache
+ - items:
+ - enum:
+ - eswin,eic7700-l3-cache
+ - starfive,jh7100-ccache
+ - starfive,jh7110-ccache
+ - const: sifive,ccache0
+ - const: cache
+ - items:
+ - const: microchip,mpfs-ccache
+ - const: sifive,fu540-c000-ccache
+ - const: cache
+
+ cache-block-size:
+ const: 64
+
+ cache-level:
+ enum: [2, 3]
+
+ cache-sets:
+ enum: [1024, 2048, 4096]
+
+ cache-size:
+ enum: [2097152, 4194304]
+
+ cache-unified: true
+
+ interrupts:
+ minItems: 3
+ items:
+ - description: DirError interrupt
+ - description: DataError interrupt
+ - description: DataFail interrupt
+ - description: DirFail interrupt
+
+ reg:
+ maxItems: 1
+
+ next-level-cache: true
+
+ memory-region:
+ maxItems: 1
+ description: |
+ The reference to the reserved-memory for the L2 Loosely Integrated Memory region.
+ The reserved memory node should be defined as per the bindings in reserved-memory.txt.
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - eswin,eic7700-l3-cache
+ - sifive,fu740-c000-ccache
+ - starfive,jh7100-ccache
+ - starfive,jh7110-ccache
+ - microchip,mpfs-ccache
+
+ then:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError, DataFail, DirFail signals.
+ minItems: 4
+
+ else:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError and DataFail signals.
+ maxItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: eswin,eic7700-l3-cache
+
+ then:
+ properties:
+ cache-size:
+ const: 4194304
+
+ else:
+ properties:
+ cache-size:
+ const: 2097152
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,fu740-c000-ccache
+ - starfive,jh7100-ccache
+ - starfive,jh7110-ccache
+
+ then:
+ properties:
+ cache-sets:
+ const: 2048
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - microchip,mpfs-ccache
+ - sifive,fu540-c000-ccache
+
+ then:
+ properties:
+ cache-sets:
+ const: 1024
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - eswin,eic7700-l3-cache
+
+ then:
+ properties:
+ cache-sets:
+ const: 4096
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: sifive,ccache0
+
+ then:
+ properties:
+ cache-level:
+ enum: [2, 3]
+
+ else:
+ properties:
+ cache-level:
+ const: 2
+
+additionalProperties: false
+
+required:
+ - compatible
+ - cache-block-size
+ - cache-level
+ - cache-sets
+ - cache-size
+ - cache-unified
+ - interrupts
+ - reg
+
+examples:
+ - |
+ cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ reg = <0x2010000 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <1>,
+ <2>,
+ <3>;
+ next-level-cache = <&L25>;
+ memory-region = <&l2_lim>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml b/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml
new file mode 100644
index 000000000000..3196263685a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/socionext,uniphier-system-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UniPhier outer cache controller
+
+description: |
+ UniPhier ARM 32-bit SoCs are integrated with a full-custom outer cache
+ controller system. All of them have a level 2 cache controller, and some
+ have a level 3 cache controller as well.
+
+maintainers:
+ - Masahiro Yamada <yamada.masahiro@socionext.com>
+
+properties:
+ compatible:
+ const: socionext,uniphier-system-cache
+
+ reg:
+ description: |
+ should contain 3 regions: control register, revision register,
+ operation register, in this order.
+ maxItems: 3
+
+ interrupts:
+ description: |
+ Interrupts can be used to notify the completion of cache operations.
+ The number of interrupts should match to the number of CPU cores.
+ The specified interrupts correspond to CPU0, CPU1, ... in this order.
+ minItems: 1
+ maxItems: 4
+
+ cache-unified: true
+
+ cache-size: true
+
+ cache-sets: true
+
+ cache-line-size: true
+
+ cache-level:
+ minimum: 2
+ maximum: 3
+
+ next-level-cache: true
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - cache-unified
+ - cache-size
+ - cache-sets
+ - cache-line-size
+ - cache-level
+
+examples:
+ - |
+ // System with L2.
+ cache-controller@500c0000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c0000 0x2000>, <0x503c0100 0x4>, <0x506c0000 0x400>;
+ interrupts = <0 174 4>, <0 175 4>, <0 190 4>, <0 191 4>;
+ cache-unified;
+ cache-size = <0x140000>;
+ cache-sets = <512>;
+ cache-line-size = <128>;
+ cache-level = <2>;
+ };
+ - |
+ // System with L2 and L3.
+ // L2 should specify the next level cache by 'next-level-cache'.
+ l2: cache-controller@500c0000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c0000 0x2000>, <0x503c0100 0x8>, <0x506c0000 0x400>;
+ interrupts = <0 190 4>, <0 191 4>;
+ cache-unified;
+ cache-size = <0x200000>;
+ cache-sets = <512>;
+ cache-line-size = <128>;
+ cache-level = <2>;
+ next-level-cache = <&l3>;
+ };
+
+ l3: cache-controller@500c8000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c8000 0x2000>, <0x503c8100 0x8>, <0x506c8000 0x400>;
+ interrupts = <0 174 4>, <0 175 4>;
+ cache-unified;
+ cache-size = <0x200000>;
+ cache-sets = <512>;
+ cache-line-size = <256>;
+ cache-level = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/starfive,jh8100-starlink-cache.yaml b/Documentation/devicetree/bindings/cache/starfive,jh8100-starlink-cache.yaml
new file mode 100644
index 000000000000..6d61098e388b
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/starfive,jh8100-starlink-cache.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/starfive,jh8100-starlink-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive StarLink Cache Controller
+
+maintainers:
+ - Joshua Yeong <joshua.yeong@starfivetech.com>
+
+description:
+ StarFive's StarLink Cache Controller manages the L3 cache shared between
+ clusters of CPU cores. The cache driver enables RISC-V non-standard cache
+ management as an alternative to instructions in the RISC-V Zicbom extension.
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+# We need a select here so we don't match all nodes with 'cache'
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - starfive,jh8100-starlink-cache
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - const: starfive,jh8100-starlink-cache
+ - const: cache
+
+ reg:
+ maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - cache-block-size
+ - cache-level
+ - cache-sets
+ - cache-size
+ - cache-unified
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cache-controller@15000000 {
+ compatible = "starfive,jh8100-starlink-cache", "cache";
+ reg = <0x0 0x15000000 0x0 0x278>;
+ cache-block-size = <64>;
+ cache-level = <3>;
+ cache-sets = <8192>;
+ cache-size = <0x400000>;
+ cache-unified;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt
deleted file mode 100644
index 45e79172a646..000000000000
--- a/Documentation/devicetree/bindings/chosen.txt
+++ /dev/null
@@ -1,137 +0,0 @@
-The chosen node
----------------
-
-The chosen node does not represent a real device, but serves as a place
-for passing data between firmware and the operating system, like boot
-arguments. Data in the chosen node does not represent the hardware.
-
-The following properties are recognized:
-
-
-kaslr-seed
------------
-
-This property is used when booting with CONFIG_RANDOMIZE_BASE as the
-entropy used to randomize the kernel image base address location. Since
-it is used directly, this value is intended only for KASLR, and should
-not be used for other purposes (as it may leak information about KASLR
-offsets). It is parsed as a u64 value, e.g.
-
-/ {
- chosen {
- kaslr-seed = <0xfeedbeef 0xc0def00d>;
- };
-};
-
-Note that if this property is set from UEFI (or a bootloader in EFI
-mode) when EFI_RNG_PROTOCOL is supported, it will be overwritten by
-the Linux EFI stub (which will populate the property itself, using
-EFI_RNG_PROTOCOL).
-
-stdout-path
------------
-
-Device trees may specify the device to be used for boot console output
-with a stdout-path property under /chosen, as described in the Devicetree
-Specification, e.g.
-
-/ {
- chosen {
- stdout-path = "/serial@f00:115200";
- };
-
- serial@f00 {
- compatible = "vendor,some-uart";
- reg = <0xf00 0x10>;
- };
-};
-
-If the character ":" is present in the value, this terminates the path.
-The meaning of any characters following the ":" is device-specific, and
-must be specified in the relevant binding documentation.
-
-For UART devices, the preferred binding is a string in the form:
-
- <baud>{<parity>{<bits>{<flow>}}}
-
-where
-
- baud - baud rate in decimal
- parity - 'n' (none), 'o', (odd) or 'e' (even)
- bits - number of data bits
- flow - 'r' (rts)
-
-For example: 115200n8r
-
-Implementation note: Linux will look for the property "linux,stdout-path" or
-on PowerPC "stdout" if "stdout-path" is not found. However, the
-"linux,stdout-path" and "stdout" properties are deprecated. New platforms
-should only use the "stdout-path" property.
-
-linux,booted-from-kexec
------------------------
-
-This property is set (currently only on PowerPC, and only needed on
-book3e) by some versions of kexec-tools to tell the new kernel that it
-is being booted by kexec, as the booting environment may differ (e.g.
-a different secondary CPU release mechanism)
-
-linux,usable-memory-range
--------------------------
-
-This property (arm64 only) holds a base address and size, describing a
-limited region in which memory may be considered available for use by
-the kernel. Memory outside of this range is not available for use.
-
-This property describes a limitation: memory within this range is only
-valid when also described through another mechanism that the kernel
-would otherwise use to determine available memory (e.g. memory nodes
-or the EFI memory map). Valid memory may be sparse within the range.
-e.g.
-
-/ {
- chosen {
- linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>;
- };
-};
-
-The main usage is for crash dump kernel to identify its own usable
-memory and exclude, at its boot time, any other memory areas that are
-part of the panicked kernel's memory.
-
-While this property does not represent a real hardware, the address
-and the size are expressed in #address-cells and #size-cells,
-respectively, of the root node.
-
-linux,elfcorehdr
-----------------
-
-This property (currently used only on arm64) holds the memory range,
-the address and the size, of the elf core header which mainly describes
-the panicked kernel's memory layout as PT_LOAD segments of elf format.
-e.g.
-
-/ {
- chosen {
- linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>;
- };
-};
-
-While this property does not represent a real hardware, the address
-and the size are expressed in #address-cells and #size-cells,
-respectively, of the root node.
-
-linux,initrd-start and linux,initrd-end
----------------------------------------
-
-These properties hold the physical start and end address of an initrd that's
-loaded by the bootloader. Note that linux,initrd-start is inclusive, but
-linux,initrd-end is exclusive.
-e.g.
-
-/ {
- chosen {
- linux,initrd-start = <0x82000000>;
- linux,initrd-end = <0x82800000>;
- };
-};
diff --git a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
new file mode 100644
index 000000000000..9f9816fbecbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/chrome/google,cros-ec-typec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Google Chrome OS EC(Embedded Controller) Type C port driver.
+
+maintainers:
+ - Benson Leung <bleung@chromium.org>
+ - Prashant Malani <pmalani@chromium.org>
+
+description:
+ Chrome OS devices have an Embedded Controller(EC) which has access to
+ Type C port state. This node is intended to allow the host to read and
+ control the Type C ports. The node for this device should be under a
+ cros-ec node like google,cros-ec-spi.
+
+properties:
+ compatible:
+ const: google,cros-ec-typec
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+patternProperties:
+ '^connector@[0-9a-f]+$':
+ $ref: /schemas/connector/usb-connector.yaml#
+ required:
+ - reg
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cros_ec: ec@0 {
+ compatible = "google,cros-ec-spi";
+ reg = <0>;
+ interrupts = <35 0>;
+
+ typec {
+ compatible = "google,cros-ec-typec";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ connector@0 {
+ compatible = "usb-c-connector";
+ reg = <0>;
+ power-role = "dual";
+ data-role = "dual";
+ try-power-role = "source";
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt b/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
deleted file mode 100644
index 2ef86ae96df8..000000000000
--- a/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-* Actions Semi Owl Clock Management Unit (CMU)
-
-The Actions Semi Owl Clock Management Unit generates and supplies clock
-to various controllers within the SoC. The clock binding described here is
-applicable to S900 and S700 SoC's.
-
-Required Properties:
-
-- compatible: should be one of the following,
- "actions,s900-cmu"
- "actions,s700-cmu"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- clocks: Reference to the parent clocks ("hosc", "losc")
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Each clock is assigned an identifier, and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in corresponding
-dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h header and can be
-used in device tree sources.
-
-External clocks:
-
-The hosc clock used as input for the plls is generated outside the SoC. It is
-expected that it is defined using standard clock bindings as "hosc".
-
-Actions Semi S900 CMU also requires one more clock:
- - "losc" - internal low frequency oscillator
-
-Example: Clock Management Unit node:
-
- cmu: clock-controller@e0160000 {
- compatible = "actions,s900-cmu";
- reg = <0x0 0xe0160000 0x0 0x1000>;
- clocks = <&hosc>, <&losc>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes clock generated by the clock
-management unit:
-
- uart: serial@e012a000 {
- compatible = "actions,s900-uart", "actions,owl-uart";
- reg = <0x0 0xe012a000 0x0 0x2000>;
- interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cmu CLK_UART5>;
- };
diff --git a/Documentation/devicetree/bindings/clock/actions,owl-cmu.yaml b/Documentation/devicetree/bindings/clock/actions,owl-cmu.yaml
new file mode 100644
index 000000000000..28396441bc98
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/actions,owl-cmu.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/actions,owl-cmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi Owl Clock Management Unit (CMU)
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+ The Actions Semi Owl Clock Management Unit generates and supplies clock
+ to various controllers within the SoC.
+
+ See also:
+ include/dt-bindings/clock/actions,s500-cmu.h
+ include/dt-bindings/clock/actions,s700-cmu.h
+ include/dt-bindings/clock/actions,s900-cmu.h
+
+properties:
+ compatible:
+ enum:
+ - actions,s500-cmu
+ - actions,s700-cmu
+ - actions,s900-cmu
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Host oscillator source
+ - description: Internal low frequency oscillator source
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@e0160000 {
+ compatible = "actions,s900-cmu";
+ reg = <0xe0160000 0x1000>;
+ clocks = <&hosc>, <&losc>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
new file mode 100644
index 000000000000..6eea1a41150a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/adi,axi-clkgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AXI clkgen pcore clock generator
+
+maintainers:
+ - Lars-Peter Clausen <lars@metafoo.de>
+ - Michael Hennerich <michael.hennerich@analog.com>
+
+description: |
+ The axi_clkgen IP core is a software programmable clock generator,
+ that can be synthesized on various FPGA platforms.
+
+ Link: https://wiki.analog.com/resources/fpga/docs/axi_clkgen
+
+properties:
+ compatible:
+ enum:
+ - adi,axi-clkgen-2.00.a
+ - adi,zynqmp-axi-clkgen-2.00.a
+
+ clocks:
+ description:
+ Specifies the reference clock(s) from which the output frequency is
+ derived. This must either reference one clock if only the first clock
+ input is connected or two if both clock inputs are connected. The last
+ clock is the AXI bus clock that needs to be enabled so we can access the
+ core registers.
+ minItems: 2
+ maxItems: 3
+
+ clock-names:
+ oneOf:
+ - items:
+ - const: clkin1
+ - const: s_axi_aclk
+ - items:
+ - const: clkin1
+ - const: clkin2
+ - const: s_axi_aclk
+
+ clock-output-names:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@ff000000 {
+ compatible = "adi,axi-clkgen-2.00.a";
+ #clock-cells = <0>;
+ reg = <0xff000000 0x1000>;
+ clocks = <&osc 1>, <&clkc 15>;
+ clock-names = "clkin1", "s_axi_aclk";
+ clock-output-names = "spi_sclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml b/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml
new file mode 100644
index 000000000000..fe2c5c1baf43
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/airoha,en7523-scu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: EN7523 Clock
+
+maintainers:
+ - Felix Fietkau <nbd@nbd.name>
+ - John Crispin <nbd@nbd.name>
+
+description: |
+ This node defines the System Control Unit of the EN7523 SoC,
+ a collection of registers configuring many different aspects of the SoC.
+
+ The clock driver uses it to read and configure settings of the
+ PLL controller, which provides clocks for the CPU, the bus and
+ other SoC internal peripherals.
+
+ Each clock is assigned an identifier and client nodes use this identifier
+ to specify which clock they consume.
+
+ All these identifiers can be found in:
+ [1]: <include/dt-bindings/clock/en7523-clk.h>.
+
+ The clocks are provided inside a system controller node.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - airoha,en7523-scu
+ - airoha,en7581-scu
+
+ reg:
+ items:
+ - description: scu base address
+ - description: misc scu base address
+ minItems: 1
+
+ "#clock-cells":
+ description:
+ The first cell indicates the clock number, see [1] for available
+ clocks.
+ const: 1
+
+ '#reset-cells':
+ description: ID of the controller reset line
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ const: airoha,en7523-scu
+ then:
+ properties:
+ reg:
+ minItems: 2
+
+ '#reset-cells': false
+
+ - if:
+ properties:
+ compatible:
+ const: airoha,en7581-scu
+ then:
+ properties:
+ reg:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/en7523-clk.h>
+ scu: system-controller@1fa20000 {
+ compatible = "airoha,en7523-scu";
+ reg = <0x1fa20000 0x400>,
+ <0x1fb00000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ scuclk: clock-controller@1fb00000 {
+ compatible = "airoha,en7581-scu";
+ reg = <0x0 0x1fb00000 0x0 0x970>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml
new file mode 100644
index 000000000000..93587b700476
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ahb-clk.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AHB Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-ahb-clk
+ - allwinner,sun6i-a31-ahb1-clk
+ - allwinner,sun8i-h3-ahb2-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun4i-a10-ahb-clk
+
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun6i-a31-ahb1-clk
+
+ then:
+ properties:
+ clocks:
+ maxItems: 4
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun8i-h3-ahb2-clk
+
+ then:
+ properties:
+ clocks:
+ maxItems: 2
+
+examples:
+ - |
+ ahb@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-ahb-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&axi>;
+ clock-output-names = "ahb";
+ };
+
+ - |
+ ahb1@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun6i-a31-ahb1-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&osc32k>, <&osc24M>, <&axi>, <&pll6 0>;
+ clock-output-names = "ahb1";
+ };
+
+ - |
+ ahb2_clk@1c2005c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun8i-h3-ahb2-clk";
+ reg = <0x01c2005c 0x4>;
+ clocks = <&ahb1>, <&pll6d2>;
+ clock-output-names = "ahb2";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml
new file mode 100644
index 000000000000..e14e1aad9fd6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb0-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-apb0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 APB0 Bus Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-apb0-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ apb0@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-apb0-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&ahb>;
+ clock-output-names = "apb0";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml
new file mode 100644
index 000000000000..8a4747ebe0ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-apb1-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-apb1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 APB1 Bus Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-apb1-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20058 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-apb1-clk";
+ reg = <0x01c20058 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
+ clock-output-names = "apb1";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml
new file mode 100644
index 000000000000..aa08dd49dd61
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-axi-clk.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-axi-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 AXI Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-axi-clk
+ - allwinner,sun8i-a23-axi-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ axi@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-axi-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&cpu>;
+ clock-output-names = "axi";
+ };
+
+ - |
+ axi_clk@1c20050 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun8i-a23-axi-clk";
+ reg = <0x01c20050 0x4>;
+ clocks = <&cpu>;
+ clock-output-names = "axi";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
new file mode 100644
index 000000000000..1690b9d99c3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ccu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Clock Control Unit
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-ccu
+ - allwinner,sun5i-a10s-ccu
+ - allwinner,sun5i-a13-ccu
+ - allwinner,sun6i-a31-ccu
+ - allwinner,sun7i-a20-ccu
+ - allwinner,sun8i-a23-ccu
+ - allwinner,sun8i-a33-ccu
+ - allwinner,sun8i-a83t-ccu
+ - allwinner,sun8i-a83t-r-ccu
+ - allwinner,sun8i-h3-ccu
+ - allwinner,sun8i-h3-r-ccu
+ - allwinner,sun8i-r40-ccu
+ - allwinner,sun8i-v3-ccu
+ - allwinner,sun8i-v3s-ccu
+ - allwinner,sun9i-a80-ccu
+ - allwinner,sun20i-d1-ccu
+ - allwinner,sun20i-d1-r-ccu
+ - allwinner,sun50i-a64-ccu
+ - allwinner,sun50i-a64-r-ccu
+ - allwinner,sun50i-a100-ccu
+ - allwinner,sun50i-a100-r-ccu
+ - allwinner,sun50i-h5-ccu
+ - allwinner,sun50i-h6-ccu
+ - allwinner,sun50i-h6-r-ccu
+ - allwinner,sun50i-h616-ccu
+ - allwinner,sun50i-h616-r-ccu
+ - allwinner,suniv-f1c100s-ccu
+ - nextthing,gr8-ccu
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ items:
+ - description: High Frequency Oscillator (usually at 24MHz)
+ - description: Low Frequency Oscillator (usually at 32kHz)
+ - description: Internal Oscillator
+ - description: Peripherals PLL
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: hosc
+ - const: losc
+ - const: iosc
+ - const: pll-periph
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun8i-a83t-r-ccu
+ - allwinner,sun8i-h3-r-ccu
+ - allwinner,sun20i-d1-r-ccu
+ - allwinner,sun50i-a64-r-ccu
+ - allwinner,sun50i-a100-r-ccu
+ - allwinner,sun50i-h6-r-ccu
+ - allwinner,sun50i-h616-r-ccu
+
+then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+
+ clock-names:
+ minItems: 4
+ maxItems: 4
+
+else:
+ if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun20i-d1-ccu
+ - allwinner,sun50i-a100-ccu
+ - allwinner,sun50i-h6-ccu
+ - allwinner,sun50i-h616-ccu
+
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+
+ clock-names:
+ minItems: 3
+ maxItems: 3
+
+ else:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ minItems: 2
+ maxItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ ccu: clock@1c20000 {
+ compatible = "allwinner,sun8i-h3-ccu";
+ reg = <0x01c20000 0x400>;
+ clocks = <&osc24M>, <&osc32k>;
+ clock-names = "hosc", "losc";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ - |
+ r_ccu: clock@1f01400 {
+ compatible = "allwinner,sun50i-a64-r-ccu";
+ reg = <0x01f01400 0x100>;
+ clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu 11>;
+ clock-names = "hosc", "losc", "iosc", "pll-periph";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml
new file mode 100644
index 000000000000..08d073520cfa
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-cpu-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-cpu-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 CPU Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-cpu-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ cpu@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-cpu-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&osc32k>, <&osc24M>, <&pll1>, <&dummy>;
+ clock-output-names = "cpu";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml
new file mode 100644
index 000000000000..e665e50c1785
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-display-clk.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-display-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ "#reset-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-display-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20104 {
+ #clock-cells = <0>;
+ #reset-cells = <0>;
+ compatible = "allwinner,sun4i-a10-display-clk";
+ reg = <0x01c20104 0x4>;
+ clocks = <&pll3>, <&pll7>, <&pll5 1>;
+ clock-output-names = "de-be";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml
new file mode 100644
index 000000000000..c4714d0fbe07
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-gates-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Bus Gates Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ This additional argument passed to that clock is the offset of
+ the bit controlling this particular gate in the register.
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun4i-a10-gates-clk
+ - const: allwinner,sun4i-a10-axi-gates-clk
+ - const: allwinner,sun4i-a10-ahb-gates-clk
+ - const: allwinner,sun5i-a10s-ahb-gates-clk
+ - const: allwinner,sun5i-a13-ahb-gates-clk
+ - const: allwinner,sun7i-a20-ahb-gates-clk
+ - const: allwinner,sun6i-a31-ahb1-gates-clk
+ - const: allwinner,sun8i-a23-ahb1-gates-clk
+ - const: allwinner,sun9i-a80-ahb0-gates-clk
+ - const: allwinner,sun9i-a80-ahb1-gates-clk
+ - const: allwinner,sun9i-a80-ahb2-gates-clk
+ - const: allwinner,sun4i-a10-apb0-gates-clk
+ - const: allwinner,sun5i-a10s-apb0-gates-clk
+ - const: allwinner,sun5i-a13-apb0-gates-clk
+ - const: allwinner,sun7i-a20-apb0-gates-clk
+ - const: allwinner,sun9i-a80-apb0-gates-clk
+ - const: allwinner,sun8i-a83t-apb0-gates-clk
+ - const: allwinner,sun4i-a10-apb1-gates-clk
+ - const: allwinner,sun5i-a13-apb1-gates-clk
+ - const: allwinner,sun5i-a10s-apb1-gates-clk
+ - const: allwinner,sun6i-a31-apb1-gates-clk
+ - const: allwinner,sun7i-a20-apb1-gates-clk
+ - const: allwinner,sun8i-a23-apb1-gates-clk
+ - const: allwinner,sun9i-a80-apb1-gates-clk
+ - const: allwinner,sun6i-a31-apb2-gates-clk
+ - const: allwinner,sun8i-a23-apb2-gates-clk
+ - const: allwinner,sun8i-a83t-bus-gates-clk
+ - const: allwinner,sun9i-a80-apbs-gates-clk
+ - const: allwinner,sun4i-a10-dram-gates-clk
+
+ - items:
+ - const: allwinner,sun5i-a13-dram-gates-clk
+ - const: allwinner,sun4i-a10-gates-clk
+
+ - items:
+ - const: allwinner,sun8i-h3-apb0-gates-clk
+ - const: allwinner,sun4i-a10-gates-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-indices:
+ minItems: 1
+ maxItems: 64
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 64
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-indices
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c2005c {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-axi-gates-clk";
+ reg = <0x01c2005c 0x4>;
+ clocks = <&axi>;
+ clock-indices = <0>;
+ clock-output-names = "axi_dram";
+ };
+
+ - |
+ clk@1c20060 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-ahb-gates-clk";
+ reg = <0x01c20060 0x8>;
+ clocks = <&ahb>;
+ clock-indices = <0>, <1>,
+ <2>, <3>,
+ <4>, <5>, <6>,
+ <7>, <8>, <9>,
+ <10>, <11>, <12>,
+ <13>, <14>, <16>,
+ <17>, <18>, <20>,
+ <21>, <22>, <23>,
+ <24>, <25>, <26>,
+ <32>, <33>, <34>,
+ <35>, <36>, <37>,
+ <40>, <41>, <43>,
+ <44>, <45>,
+ <46>, <47>,
+ <50>, <52>;
+ clock-output-names = "ahb_usb0", "ahb_ehci0",
+ "ahb_ohci0", "ahb_ehci1",
+ "ahb_ohci1", "ahb_ss", "ahb_dma",
+ "ahb_bist", "ahb_mmc0", "ahb_mmc1",
+ "ahb_mmc2", "ahb_mmc3", "ahb_ms",
+ "ahb_nand", "ahb_sdram", "ahb_ace",
+ "ahb_emac", "ahb_ts", "ahb_spi0",
+ "ahb_spi1", "ahb_spi2", "ahb_spi3",
+ "ahb_pata", "ahb_sata", "ahb_gps",
+ "ahb_ve", "ahb_tvd", "ahb_tve0",
+ "ahb_tve1", "ahb_lcd0", "ahb_lcd1",
+ "ahb_csi0", "ahb_csi1", "ahb_hdmi",
+ "ahb_de_be0", "ahb_de_be1",
+ "ahb_de_fe0", "ahb_de_fe1",
+ "ahb_mp", "ahb_mali400";
+ };
+
+
+ - |
+ clk@1c20068 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-apb0-gates-clk";
+ reg = <0x01c20068 0x4>;
+ clocks = <&apb0>;
+ clock-indices = <0>, <1>,
+ <2>, <3>,
+ <5>, <6>,
+ <7>, <10>;
+ clock-output-names = "apb0_codec", "apb0_spdif",
+ "apb0_ac97", "apb0_iis",
+ "apb0_pio", "apb0_ir0",
+ "apb0_ir1", "apb0_keypad";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml
new file mode 100644
index 000000000000..e824e33489b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mbus-clk.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mbus-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 MBUS Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun5i-a13-mbus-clk
+ - allwinner,sun8i-a23-mbus-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c2015c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun5i-a13-mbus-clk";
+ reg = <0x01c2015c 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+ clock-output-names = "mbus";
+ };
+
+ - |
+ clk@1c2015c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun8i-a23-mbus-clk";
+ reg = <0x01c2015c 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&pll5>;
+ clock-output-names = "mbus";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml
new file mode 100644
index 000000000000..c612f94befb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mmc-clk.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mmc-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 1 Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ There is three different outputs: the main clock, with the ID 0,
+ and the output and sample clocks, with the IDs 1 and 2,
+ respectively.
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-mmc-clk
+ - allwinner,sun9i-a80-mmc-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 3
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun4i-a10-mmc-clk
+
+then:
+ properties:
+ clocks:
+ maxItems: 3
+
+else:
+ properties:
+ clocks:
+ maxItems: 2
+
+examples:
+ - |
+ clk@1c20088 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-mmc-clk";
+ reg = <0x01c20088 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+ clock-output-names = "mmc0",
+ "mmc0_output",
+ "mmc0_sample";
+ };
+
+ - |
+ clk@6000410 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun9i-a80-mmc-clk";
+ reg = <0x06000410 0x4>;
+ clocks = <&osc24M>, <&pll4>;
+ clock-output-names = "mmc0", "mmc0_output",
+ "mmc0_sample";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml
new file mode 100644
index 000000000000..80ae3a7a588c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod0-clk.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mod0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 0 Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun4i-a10-mod0-clk
+ - allwinner,sun9i-a80-mod0-clk
+
+ # The PRCM on the A31 and A23 will have the reg property missing,
+ # since it's set at the upper level node, and will be validated by
+ # PRCM's schema. Make sure we only validate standalone nodes.
+ required:
+ - compatible
+ - reg
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-mod0-clk
+ - allwinner,sun9i-a80-mod0-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ # On the A80, the PRCM mod0 clocks have 2 parents.
+ minItems: 2
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20080 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-mod0-clk";
+ reg = <0x01c20080 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+ clock-output-names = "nand";
+ };
+
+ - |
+ clk@8001454 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-mod0-clk";
+ reg = <0x08001454 0x4>;
+ clocks = <&osc32k>, <&osc24M>;
+ clock-output-names = "r_ir";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml
new file mode 100644
index 000000000000..4f9a8d44d42a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-mod1-clk.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-mod1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Module 1 Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-mod1-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun4i-a10-pll2.h>
+
+ clk@1c200c0 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-mod1-clk";
+ reg = <0x01c200c0 0x4>;
+ clocks = <&pll2 SUN4I_A10_PLL2_8X>,
+ <&pll2 SUN4I_A10_PLL2_4X>,
+ <&pll2 SUN4I_A10_PLL2_2X>,
+ <&pll2 SUN4I_A10_PLL2_1X>;
+ clock-output-names = "spdif";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
new file mode 100644
index 000000000000..0052bf1e8a6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-osc-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-osc-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Gateable Oscillator Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-osc-clk
+
+ reg:
+ maxItems: 1
+
+ clock-frequency:
+ description: >
+ Frequency of the main oscillator.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clock-frequency
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ osc24M: clk@1c20050 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-osc-clk";
+ reg = <0x01c20050 0x4>;
+ clock-frequency = <24000000>;
+ clock-output-names = "osc24M";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml
new file mode 100644
index 000000000000..b13a1f21d5da
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll1-clk.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 CPU PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-pll1-clk
+ - allwinner,sun6i-a31-pll1-clk
+ - allwinner,sun8i-a23-pll1-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20000 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-pll1-clk";
+ reg = <0x01c20000 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "osc24M";
+ };
+
+ - |
+ clk@1c20000 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun6i-a31-pll1-clk";
+ reg = <0x01c20000 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll1";
+ };
+
+ - |
+ clk@1c20000 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun8i-a23-pll1-clk";
+ reg = <0x01c20000 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll1";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml
new file mode 100644
index 000000000000..418d207d23b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll3-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll3-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Video PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-pll3-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20010 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-pll3-clk";
+ reg = <0x01c20010 0x4>;
+ clocks = <&osc3M>;
+ clock-output-names = "pll3";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml
new file mode 100644
index 000000000000..76ef3f0c7f2c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll5-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll5-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 DRAM PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The first output is the DRAM clock output, the second is meant
+ for peripherals on the SoC.
+
+ compatible:
+ const: allwinner,sun4i-a10-pll5-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 2
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20020 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-pll5-clk";
+ reg = <0x01c20020 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll5_ddr", "pll5_other";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml
new file mode 100644
index 000000000000..a94c93c90ece
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-pll6-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-pll6-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Peripheral PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The first output is the SATA clock output, the second is the
+ regular PLL output, the third is a PLL output at twice the rate.
+
+ compatible:
+ const: allwinner,sun4i-a10-pll6-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 3
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20028 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-pll6-clk";
+ reg = <0x01c20028 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll6_sata", "pll6_other", "pll6";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml
new file mode 100644
index 000000000000..6646b2a99fc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-tcon-ch0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 TCON Channel 0 Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-tcon-ch0-clk
+ - allwinner,sun4i-a10-tcon-ch1-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun4i-a10-tcon-ch0-clk
+
+then:
+ required:
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20118 {
+ #clock-cells = <0>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun4i-a10-tcon-ch0-clk";
+ reg = <0x01c20118 0x4>;
+ clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
+ clock-output-names = "tcon-ch0-sclk";
+ };
+
+ - |
+ clk@1c2012c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-tcon-ch1-clk";
+ reg = <0x01c2012c 0x4>;
+ clocks = <&pll3>, <&pll7>, <&pll3x2>, <&pll7x2>;
+ clock-output-names = "tcon-ch1-sclk";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml
new file mode 100644
index 000000000000..5103b675e488
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-usb-clk.yaml
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-usb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 USB Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The additional ID argument passed to the clock shall refer to
+ the index of the output.
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-usb-clk
+ - allwinner,sun5i-a13-usb-clk
+ - allwinner,sun6i-a31-usb-clk
+ - allwinner,sun8i-a23-usb-clk
+ - allwinner,sun8i-h3-usb-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ minItems: 2
+ maxItems: 8
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun4i-a10-usb-clk
+
+ then:
+ properties:
+ clock-output-names:
+ maxItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun5i-a13-usb-clk
+
+ then:
+ properties:
+ clock-output-names:
+ maxItems: 2
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun6i-a31-usb-clk
+
+ then:
+ properties:
+ clock-output-names:
+ maxItems: 6
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun8i-a23-usb-clk
+
+ then:
+ properties:
+ clock-output-names:
+ maxItems: 5
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun8i-h3-usb-clk
+
+ then:
+ properties:
+ clock-output-names:
+ maxItems: 8
+
+examples:
+ - |
+ clk@1c200cc {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun4i-a10-usb-clk";
+ reg = <0x01c200cc 0x4>;
+ clocks = <&pll6 1>;
+ clock-output-names = "usb_ohci0", "usb_ohci1", "usb_phy";
+ };
+
+ - |
+ clk@1c200cc {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun5i-a13-usb-clk";
+ reg = <0x01c200cc 0x4>;
+ clocks = <&pll6 1>;
+ clock-output-names = "usb_ohci0", "usb_phy";
+ };
+
+ - |
+ clk@1c200cc {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun6i-a31-usb-clk";
+ reg = <0x01c200cc 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "usb_phy0", "usb_phy1", "usb_phy2",
+ "usb_ohci0", "usb_ohci1",
+ "usb_ohci2";
+ };
+
+ - |
+ clk@1c200cc {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun8i-a23-usb-clk";
+ reg = <0x01c200cc 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "usb_phy0", "usb_phy1", "usb_hsic",
+ "usb_hsic_12M", "usb_ohci0";
+ };
+
+ - |
+ clk@1c200cc {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun8i-h3-usb-clk";
+ reg = <0x01c200cc 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "usb_phy0", "usb_phy1",
+ "usb_phy2", "usb_phy3",
+ "usb_ohci0", "usb_ohci1",
+ "usb_ohci2", "usb_ohci3";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml
new file mode 100644
index 000000000000..80337e38d6e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ve-clk.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ve-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Video Engine Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ "#reset-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun4i-a10-ve-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c2013c {
+ #clock-cells = <0>;
+ #reset-cells = <0>;
+ compatible = "allwinner,sun4i-a10-ve-clk";
+ reg = <0x01c2013c 0x4>;
+ clocks = <&pll4>;
+ clock-output-names = "ve";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun55i-a523-ccu.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun55i-a523-ccu.yaml
new file mode 100644
index 000000000000..58be701a720e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun55i-a523-ccu.yaml
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun55i-a523-ccu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A523 Clock Control Unit
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - allwinner,sun55i-a523-ccu
+ - allwinner,sun55i-a523-mcu-ccu
+ - allwinner,sun55i-a523-r-ccu
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 4
+ maxItems: 9
+
+ clock-names:
+ minItems: 4
+ maxItems: 9
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun55i-a523-ccu
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: High Frequency Oscillator (usually at 24MHz)
+ - description: Low Frequency Oscillator (usually at 32kHz)
+ - description: Internal Oscillator
+ - description: Low Frequency Oscillator fanout
+
+ clock-names:
+ items:
+ - const: hosc
+ - const: losc
+ - const: iosc
+ - const: losc-fanout
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun55i-a523-mcu-ccu
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: High Frequency Oscillator (usually at 24MHz)
+ - description: Low Frequency Oscillator (usually at 32kHz)
+ - description: Internal Oscillator
+ - description: Audio PLL (4x)
+ - description: Peripherals PLL 0 (300 MHz output)
+ - description: DSP module clock
+ - description: MBUS clock
+ - description: PRCM AHB clock
+ - description: PRCM APB0 clock
+
+ clock-names:
+ items:
+ - const: hosc
+ - const: losc
+ - const: iosc
+ - const: pll-audio0-4x
+ - const: pll-periph0-300m
+ - const: dsp
+ - const: mbus
+ - const: r-ahb
+ - const: r-apb0
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun55i-a523-r-ccu
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: High Frequency Oscillator (usually at 24MHz)
+ - description: Low Frequency Oscillator (usually at 32kHz)
+ - description: Internal Oscillator
+ - description: Peripherals PLL
+ - description: Audio PLL
+
+ clock-names:
+ items:
+ - const: hosc
+ - const: losc
+ - const: iosc
+ - const: pll-periph
+ - const: pll-audio
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@2001000 {
+ compatible = "allwinner,sun55i-a523-ccu";
+ reg = <0x02001000 0x1000>;
+ clocks = <&osc24M>, <&osc32k>, <&iosc>, <&r_ccu 1>;
+ clock-names = "hosc", "losc", "iosc", "losc-fanout";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml
new file mode 100644
index 000000000000..c6a6fbb6863b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun5i-a13-ahb-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun5i-a13-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A13 AHB Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun5i-a13-ahb-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ ahb@1c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun5i-a13-ahb-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&axi>, <&cpu>, <&pll6 1>;
+ clock-output-names = "ahb";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml
new file mode 100644
index 000000000000..7d6a6a34d20c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun6i-a31-pll6-clk.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun6i-a31-pll6-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 Peripheral PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The first output is the regular PLL output, the second is a PLL
+ output at twice the rate.
+
+ compatible:
+ const: allwinner,sun6i-a31-pll6-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 2
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20028 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun6i-a31-pll6-clk";
+ reg = <0x01c20028 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll6", "pll6x2";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml
new file mode 100644
index 000000000000..b6202de35707
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-gmac-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun7i-a20-gmac-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 GMAC TX Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun7i-a20-gmac-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+ description: >
+ The parent clocks shall be fixed rate dummy clocks at 25 MHz and
+ 125 MHz, respectively.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20164 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun7i-a20-gmac-clk";
+ reg = <0x01c20164 0x4>;
+ clocks = <&mii_phy_tx_clk>, <&gmac_int_tx_clk>;
+ clock-output-names = "gmac_tx";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml
new file mode 100644
index 000000000000..fde7f7dc3d34
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun7i-a20-out-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun7i-a20-out-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 Output Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun7i-a20-out-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c201f0 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun7i-a20-out-clk";
+ reg = <0x01c201f0 0x4>;
+ clocks = <&osc24M_32k>, <&osc32k>, <&osc24M>;
+ clock-output-names = "clk_out_a";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun8i-a83t-de2-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun8i-a83t-de2-clk.yaml
new file mode 100644
index 000000000000..7fcd55d468d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun8i-a83t-de2-clk.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun8i-a83t-de2-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t Display Engine 2/3 Clock Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun8i-a83t-de2-clk
+ - const: allwinner,sun8i-h3-de2-clk
+ - const: allwinner,sun8i-v3s-de2-clk
+ - const: allwinner,sun50i-a64-de2-clk
+ - const: allwinner,sun50i-h5-de2-clk
+ - const: allwinner,sun50i-h6-de3-clk
+ - const: allwinner,sun50i-h616-de33-clk
+ - items:
+ - const: allwinner,sun8i-r40-de2-clk
+ - const: allwinner,sun8i-h3-de2-clk
+ - items:
+ - const: allwinner,sun20i-d1-de2-clk
+ - const: allwinner,sun50i-h5-de2-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: mod
+
+ resets:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun8i-h3-ccu.h>
+ #include <dt-bindings/reset/sun8i-h3-ccu.h>
+
+ de2_clocks: clock@1000000 {
+ compatible = "allwinner,sun8i-h3-de2-clk";
+ reg = <0x01000000 0x100000>;
+ clocks = <&ccu CLK_BUS_DE>,
+ <&ccu CLK_DE>;
+ clock-names = "bus",
+ "mod";
+ resets = <&ccu RST_BUS_DE>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml
new file mode 100644
index 000000000000..45b9e2c7c1d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun8i-h3-bus-gates-clk.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun8i-h3-bus-gates-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Bus Gates Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ This additional argument passed to that clock is the offset of
+ the bit controlling this particular gate in the register.
+
+ compatible:
+ const: allwinner,sun8i-h3-bus-gates-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+
+ clock-names:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-indices:
+ minItems: 1
+ maxItems: 64
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 64
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-indices
+ - clock-names
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c20060 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun8i-h3-bus-gates-clk";
+ reg = <0x01c20060 0x14>;
+ clocks = <&ahb1>, <&ahb2>, <&apb1>, <&apb2>;
+ clock-names = "ahb1", "ahb2", "apb1", "apb2";
+ clock-indices = <5>, <6>, <8>,
+ <9>, <10>, <13>,
+ <14>, <17>, <18>,
+ <19>, <20>,
+ <21>, <23>,
+ <24>, <25>,
+ <26>, <27>,
+ <28>, <29>,
+ <30>, <31>, <32>,
+ <35>, <36>, <37>,
+ <40>, <41>, <43>,
+ <44>, <52>, <53>,
+ <54>, <64>,
+ <65>, <69>, <72>,
+ <76>, <77>, <78>,
+ <96>, <97>, <98>,
+ <112>, <113>,
+ <114>, <115>,
+ <116>, <128>, <135>;
+ clock-output-names = "bus_ce", "bus_dma", "bus_mmc0",
+ "bus_mmc1", "bus_mmc2", "bus_nand",
+ "bus_sdram", "bus_gmac", "bus_ts",
+ "bus_hstimer", "bus_spi0",
+ "bus_spi1", "bus_otg",
+ "bus_otg_ehci0", "bus_ehci1",
+ "bus_ehci2", "bus_ehci3",
+ "bus_otg_ohci0", "bus_ohci1",
+ "bus_ohci2", "bus_ohci3", "bus_ve",
+ "bus_lcd0", "bus_lcd1", "bus_deint",
+ "bus_csi", "bus_tve", "bus_hdmi",
+ "bus_de", "bus_gpu", "bus_msgbox",
+ "bus_spinlock", "bus_codec",
+ "bus_spdif", "bus_pio", "bus_ths",
+ "bus_i2s0", "bus_i2s1", "bus_i2s2",
+ "bus_i2c0", "bus_i2c1", "bus_i2c2",
+ "bus_uart0", "bus_uart1",
+ "bus_uart2", "bus_uart3",
+ "bus_scr", "bus_ephy", "bus_dbg";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml
new file mode 100644
index 000000000000..f0f65af8ae22
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-ahb-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-ahb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 AHB Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun9i-a80-ahb-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@6000060 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun9i-a80-ahb-clk";
+ reg = <0x06000060 0x4>;
+ clocks = <&gt_clk>, <&pll4>, <&pll12>, <&pll12>;
+ clock-output-names = "ahb0";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml
new file mode 100644
index 000000000000..e9f9bc8f5794
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-apb0-clk.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-apb0-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 APB0 Bus Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun9i-a80-apb0-clk
+ - allwinner,sun9i-a80-apb1-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@6000070 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun9i-a80-apb0-clk";
+ reg = <0x06000070 0x4>;
+ clocks = <&osc24M>, <&pll4>;
+ clock-output-names = "apb0";
+ };
+
+ - |
+ clk@6000074 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun9i-a80-apb1-clk";
+ reg = <0x06000074 0x4>;
+ clocks = <&osc24M>, <&pll4>;
+ clock-output-names = "apb1";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml
new file mode 100644
index 000000000000..c48db2d49340
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-cpus-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-cpus-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 CPUS Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun9i-a80-cpus-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@8001410 {
+ compatible = "allwinner,sun9i-a80-cpus-clk";
+ reg = <0x08001410 0x4>;
+ #clock-cells = <0>;
+ clocks = <&osc32k>, <&osc24M>, <&pll4>, <&pll3>;
+ clock-output-names = "cpus";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-de-clks.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-de-clks.yaml
new file mode 100644
index 000000000000..e9f81a343be1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-de-clks.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-de-clks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 Display Engine Clock Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: allwinner,sun9i-a80-de-clks
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: RAM Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: mod
+ - const: dram
+ - const: bus
+
+ resets:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun9i-a80-ccu.h>
+ #include <dt-bindings/reset/sun9i-a80-ccu.h>
+
+ de_clocks: clock@3000000 {
+ compatible = "allwinner,sun9i-a80-de-clks";
+ reg = <0x03000000 0x30>;
+ clocks = <&ccu CLK_DE>, <&ccu CLK_SDRAM>, <&ccu CLK_BUS_DE>;
+ clock-names = "mod", "dram", "bus";
+ resets = <&ccu RST_BUS_DE>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
new file mode 100644
index 000000000000..d3ce5eb18d4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-gt-clk.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-gt-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 GT Bus Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun9i-a80-gt-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+ description: >
+ The parent order must match the hardware programming order.
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@600005c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun9i-a80-gt-clk";
+ reg = <0x0600005c 0x4>;
+ clocks = <&osc24M>, <&pll4>, <&pll12>, <&pll12>;
+ clock-output-names = "gt";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml
new file mode 100644
index 000000000000..65ee5afe83cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-mmc-config-clk.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-mmc-config-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 MMC Configuration Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+description: >
+ There is one clock/reset output per mmc controller. The number of
+ outputs is determined by the size of the address block, which is
+ related to the overall mmc block.
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The additional ID argument passed to the clock shall refer to
+ the index of the output.
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: allwinner,sun9i-a80-mmc-config-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 4
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@1c13000 {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun9i-a80-mmc-config-clk";
+ reg = <0x01c13000 0x10>;
+ clocks = <&ahb0_gates 8>;
+ resets = <&ahb0_resets 8>;
+ clock-output-names = "mmc0_config", "mmc1_config",
+ "mmc2_config", "mmc3_config";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml
new file mode 100644
index 000000000000..261264a8aef6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-pll4-clk.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-pll4-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 Peripheral PLL
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ const: allwinner,sun9i-a80-pll4-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@600000c {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun9i-a80-pll4-clk";
+ reg = <0x0600000c 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll4";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-clks.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-clks.yaml
new file mode 100644
index 000000000000..515c15d5f661
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-clks.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-usb-clks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB Clock Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: allwinner,sun9i-a80-usb-clks
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: High Frequency Oscillator
+
+ clock-names:
+ items:
+ - const: bus
+ - const: hosc
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun9i-a80-ccu.h>
+
+ usb_clocks: clock@a08000 {
+ compatible = "allwinner,sun9i-a80-usb-clks";
+ reg = <0x00a08000 0x8>;
+ clocks = <&ccu CLK_BUS_USB>, <&osc24M>;
+ clock-names = "bus", "hosc";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml
new file mode 100644
index 000000000000..3f7b8d9511f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-mod-clk.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-usb-mod-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB Module Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The additional ID argument passed to the clock shall refer to
+ the index of the output.
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: allwinner,sun9i-a80-usb-mod-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 6
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@a08000 {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun9i-a80-usb-mod-clk";
+ reg = <0x00a08000 0x4>;
+ clocks = <&ahb1_gates 1>;
+ clock-output-names = "usb0_ahb", "usb_ohci0",
+ "usb1_ahb", "usb_ohci1",
+ "usb2_ahb", "usb_ohci2";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml
new file mode 100644
index 000000000000..0d49072d47ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun9i-a80-usb-phy-clk.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/allwinner,sun9i-a80-usb-phy-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 USB PHY Clock
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+deprecated: true
+
+properties:
+ "#clock-cells":
+ const: 1
+ description: >
+ The additional ID argument passed to the clock shall refer to
+ the index of the output.
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: allwinner,sun9i-a80-usb-phy-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 6
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clk@a08004 {
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ compatible = "allwinner,sun9i-a80-usb-phy-clk";
+ reg = <0x00a08004 0x4>;
+ clocks = <&ahb1_gates 1>;
+ clock-output-names = "usb_phy0", "usb_hsic1_480M",
+ "usb_phy1", "usb_hsic2_480M",
+ "usb_phy2", "usb_hsic_12M";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/alphascale,acc.txt b/Documentation/devicetree/bindings/clock/alphascale,acc.txt
deleted file mode 100644
index b3205b21c9d0..000000000000
--- a/Documentation/devicetree/bindings/clock/alphascale,acc.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-Alphascale Clock Controller
-
-The ACC (Alphascale Clock Controller) is responsible of choising proper
-clock source, setting deviders and clock gates.
-
-Required properties for the ACC node:
- - compatible: must be "alphascale,asm9260-clock-controller"
- - reg: must contain the ACC register base and size
- - #clock-cells : shall be set to 1.
-
-Simple one-cell clock specifier format is used, where the only cell is used
-as an index of the clock inside the provider.
-It is encouraged to use dt-binding for clock index definitions. SoC specific
-dt-binding should be included to the device tree descriptor. For example
-Alphascale ASM9260:
-#include <dt-bindings/clock/alphascale,asm9260.h>
-
-This binding contains two types of clock providers:
- _AHB_ - AHB gate;
- _SYS_ - adjustable clock source. Not all peripheral have _SYS_ clock provider.
-All clock specific details can be found in the SoC documentation.
-CLKID_AHB_ROM 0
-CLKID_AHB_RAM 1
-CLKID_AHB_GPIO 2
-CLKID_AHB_MAC 3
-CLKID_AHB_EMI 4
-CLKID_AHB_USB0 5
-CLKID_AHB_USB1 6
-CLKID_AHB_DMA0 7
-CLKID_AHB_DMA1 8
-CLKID_AHB_UART0 9
-CLKID_AHB_UART1 10
-CLKID_AHB_UART2 11
-CLKID_AHB_UART3 12
-CLKID_AHB_UART4 13
-CLKID_AHB_UART5 14
-CLKID_AHB_UART6 15
-CLKID_AHB_UART7 16
-CLKID_AHB_UART8 17
-CLKID_AHB_UART9 18
-CLKID_AHB_I2S0 19
-CLKID_AHB_I2C0 20
-CLKID_AHB_I2C1 21
-CLKID_AHB_SSP0 22
-CLKID_AHB_IOCONFIG 23
-CLKID_AHB_WDT 24
-CLKID_AHB_CAN0 25
-CLKID_AHB_CAN1 26
-CLKID_AHB_MPWM 27
-CLKID_AHB_SPI0 28
-CLKID_AHB_SPI1 29
-CLKID_AHB_QEI 30
-CLKID_AHB_QUADSPI0 31
-CLKID_AHB_CAMIF 32
-CLKID_AHB_LCDIF 33
-CLKID_AHB_TIMER0 34
-CLKID_AHB_TIMER1 35
-CLKID_AHB_TIMER2 36
-CLKID_AHB_TIMER3 37
-CLKID_AHB_IRQ 38
-CLKID_AHB_RTC 39
-CLKID_AHB_NAND 40
-CLKID_AHB_ADC0 41
-CLKID_AHB_LED 42
-CLKID_AHB_DAC0 43
-CLKID_AHB_LCD 44
-CLKID_AHB_I2S1 45
-CLKID_AHB_MAC1 46
-
-CLKID_SYS_CPU 47
-CLKID_SYS_AHB 48
-CLKID_SYS_I2S0M 49
-CLKID_SYS_I2S0S 50
-CLKID_SYS_I2S1M 51
-CLKID_SYS_I2S1S 52
-CLKID_SYS_UART0 53
-CLKID_SYS_UART1 54
-CLKID_SYS_UART2 55
-CLKID_SYS_UART3 56
-CLKID_SYS_UART4 56
-CLKID_SYS_UART5 57
-CLKID_SYS_UART6 58
-CLKID_SYS_UART7 59
-CLKID_SYS_UART8 60
-CLKID_SYS_UART9 61
-CLKID_SYS_SPI0 62
-CLKID_SYS_SPI1 63
-CLKID_SYS_QUADSPI 64
-CLKID_SYS_SSP0 65
-CLKID_SYS_NAND 66
-CLKID_SYS_TRACE 67
-CLKID_SYS_CAMM 68
-CLKID_SYS_WDT 69
-CLKID_SYS_CLKOUT 70
-CLKID_SYS_MAC 71
-CLKID_SYS_LCD 72
-CLKID_SYS_ADCANA 73
-
-Example of clock consumer with _SYS_ and _AHB_ sinks.
-uart4: serial@80010000 {
- compatible = "alphascale,asm9260-uart";
- reg = <0x80010000 0x4000>;
- clocks = <&acc CLKID_SYS_UART4>, <&acc CLKID_AHB_UART4>;
- interrupts = <19>;
-};
-
-Clock consumer with only one, _AHB_ sink.
-timer0: timer@80088000 {
- compatible = "alphascale,asm9260-timer";
- reg = <0x80088000 0x4000>;
- clocks = <&acc CLKID_AHB_TIMER0>;
- interrupts = <29>;
-};
-
diff --git a/Documentation/devicetree/bindings/clock/alphascale,asm9260-clock-controller.yaml b/Documentation/devicetree/bindings/clock/alphascale,asm9260-clock-controller.yaml
new file mode 100644
index 000000000000..1caad419ce9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/alphascale,asm9260-clock-controller.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/alphascale,asm9260-clock-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Alphascale Clock Controller
+
+maintainers:
+ - Oleksij Rempel <linux@rempel-privat.de>
+
+description: |
+ The ACC (Alphascale Clock Controller) is responsible for choosing proper
+ clock source, setting dividers and clock gates.
+
+ Simple one-cell clock specifier format is used, where the only cell is used
+ as an index of the clock inside the provider.
+ It is encouraged to use dt-binding for clock index definitions. SoC specific
+ dt-binding should be included to the device tree descriptor. For example
+ Alphascale ASM9260:
+
+ #include <dt-bindings/clock/alphascale,asm9260.h>
+
+ This binding contains two types of clock providers:
+
+ _AHB_ - AHB gate;
+ _SYS_ - adjustable clock source. Not all peripheral have _SYS_ clock provider.
+
+ All clock specific details can be found in the SoC documentation.
+
+properties:
+ compatible:
+ const: alphascale,asm9260-clock-controller
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/altr_socfpga.txt b/Documentation/devicetree/bindings/clock/altr_socfpga.txt
deleted file mode 100644
index f72e80e0dade..000000000000
--- a/Documentation/devicetree/bindings/clock/altr_socfpga.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Device Tree Clock bindings for Altera's SoCFPGA platform
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "altr,socfpga-pll-clock" - for a PLL clock
- "altr,socfpga-perip-clock" - The peripheral clock divided from the
- PLL clock.
- "altr,socfpga-gate-clk" - Clocks that directly feed peripherals and
- can get gated.
-
-- reg : shall be the control register offset from CLOCK_MANAGER's base for the clock.
-- clocks : shall be the input parent clock phandle for the clock. This is
- either an oscillator or a pll output.
-- #clock-cells : from common clock binding, shall be set to 0.
-
-Optional properties:
-- fixed-divider : If clocks have a fixed divider value, use this property.
-- clk-gate : For "socfpga-gate-clk", clk-gate contains the gating register
- and the bit index.
-- div-reg : For "socfpga-gate-clk" and "socfpga-periph-clock", div-reg contains
- the divider register, bit shift, and width.
-- clk-phase : For the sdmmc_clk, contains the value of the clock phase that controls
- the SDMMC CIU clock. The first value is the clk_sample(smpsel), and the second
- value is the cclk_in_drv(drvsel). The clk-phase is used to enable the correct
- hold/delay times that is needed for the SD/MMC CIU clock. The values of both
- can be 0-315 degrees, in 45 degree increments.
diff --git a/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml
new file mode 100644
index 000000000000..2568ad7dd0ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,a1-peripherals-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic A1 Peripherals Clock Control Unit
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Jerome Brunet <jbrunet@baylibre.com>
+ - Jian Hu <jian.hu@jian.hu.com>
+ - Dmitry Rokosov <ddrokosov@sberdevices.ru>
+
+properties:
+ compatible:
+ const: amlogic,a1-peripherals-clkc
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: input fixed pll div2
+ - description: input fixed pll div3
+ - description: input fixed pll div5
+ - description: input fixed pll div7
+ - description: input hifi pll
+ - description: input oscillator (usually at 24MHz)
+ - description: input sys pll
+ minItems: 6 # sys_pll is optional
+
+ clock-names:
+ items:
+ - const: fclk_div2
+ - const: fclk_div3
+ - const: fclk_div5
+ - const: fclk_div7
+ - const: hifi_pll
+ - const: xtal
+ - const: sys_pll
+ minItems: 6 # sys_pll is optional
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/amlogic,a1-pll-clkc.h>
+ apb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@800 {
+ compatible = "amlogic,a1-peripherals-clkc";
+ reg = <0 0x800 0 0x104>;
+ #clock-cells = <1>;
+ clocks = <&clkc_pll CLKID_FCLK_DIV2>,
+ <&clkc_pll CLKID_FCLK_DIV3>,
+ <&clkc_pll CLKID_FCLK_DIV5>,
+ <&clkc_pll CLKID_FCLK_DIV7>,
+ <&clkc_pll CLKID_HIFI_PLL>,
+ <&xtal>,
+ <&clkc_pll CLKID_SYS_PLL>;
+ clock-names = "fclk_div2", "fclk_div3",
+ "fclk_div5", "fclk_div7",
+ "hifi_pll", "xtal", "sys_pll";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml
new file mode 100644
index 000000000000..c99274d2a9bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,a1-pll-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic A1 PLL Clock Control Unit
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Jerome Brunet <jbrunet@baylibre.com>
+ - Jian Hu <jian.hu@jian.hu.com>
+ - Dmitry Rokosov <ddrokosov@sberdevices.ru>
+
+properties:
+ compatible:
+ const: amlogic,a1-pll-clkc
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: input fixpll_in
+ - description: input hifipll_in
+ - description: input syspll_in
+ minItems: 2 # syspll_in is optional
+
+ clock-names:
+ items:
+ - const: fixpll_in
+ - const: hifipll_in
+ - const: syspll_in
+ minItems: 2 # syspll_in is optional
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/amlogic,a1-peripherals-clkc.h>
+ apb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@7c80 {
+ compatible = "amlogic,a1-pll-clkc";
+ reg = <0 0x7c80 0 0x18c>;
+ #clock-cells = <1>;
+ clocks = <&clkc_periphs CLKID_FIXPLL_IN>,
+ <&clkc_periphs CLKID_HIFIPLL_IN>,
+ <&clkc_periphs CLKID_SYSPLL_IN>;
+ clock-names = "fixpll_in", "hifipll_in", "syspll_in";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
deleted file mode 100644
index 61777ad24f61..000000000000
--- a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-* Amlogic AXG Audio Clock Controllers
-
-The Amlogic AXG audio clock controller generates and supplies clock to the
-other elements of the audio subsystem, such as fifos, i2s, spdif and pdm
-devices.
-
-Required Properties:
-
-- compatible : should be "amlogic,axg-audio-clkc" for the A113X and A113D
-- reg : physical base address of the clock controller and length of
- memory mapped region.
-- clocks : a list of phandle + clock-specifier pairs for the clocks listed
- in clock-names.
-- clock-names : must contain the following:
- * "pclk" - Main peripheral bus clock
- may contain the following:
- * "mst_in[0-7]" - 8 input plls to generate clock signals
- * "slv_sclk[0-9]" - 10 slave bit clocks provided by external
- components.
- * "slv_lrclk[0-9]" - 10 slave sample clocks provided by external
- components.
-- resets : phandle of the internal reset line
-- #clock-cells : should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/axg-audio-clkc.h header and can be
-used in device tree sources.
-
-Example:
-
-clkc_audio: clock-controller@0 {
- compatible = "amlogic,axg-audio-clkc";
- reg = <0x0 0x0 0x0 0xb4>;
- #clock-cells = <1>;
-
- clocks = <&clkc CLKID_AUDIO>,
- <&clkc CLKID_MPLL0>,
- <&clkc CLKID_MPLL1>,
- <&clkc CLKID_MPLL2>,
- <&clkc CLKID_MPLL3>,
- <&clkc CLKID_HIFI_PLL>,
- <&clkc CLKID_FCLK_DIV3>,
- <&clkc CLKID_FCLK_DIV4>,
- <&clkc CLKID_GP0_PLL>;
- clock-names = "pclk",
- "mst_in0",
- "mst_in1",
- "mst_in2",
- "mst_in3",
- "mst_in4",
- "mst_in5",
- "mst_in6",
- "mst_in7";
- resets = <&reset RESET_AUDIO>;
-};
diff --git a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.yaml
new file mode 100644
index 000000000000..fd7982dd4cea
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.yaml
@@ -0,0 +1,201 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,axg-audio-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic AXG Audio Clock Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Jerome Brunet <jbrunet@baylibre.com>
+
+description:
+ The Amlogic AXG audio clock controller generates and supplies clock to the
+ other elements of the audio subsystem, such as fifos, i2s, spdif and pdm
+ devices.
+
+properties:
+ compatible:
+ enum:
+ - amlogic,axg-audio-clkc
+ - amlogic,g12a-audio-clkc
+ - amlogic,sm1-audio-clkc
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: main peripheral bus clock
+ - description: input plls to generate clock signals N0
+ - description: input plls to generate clock signals N1
+ - description: input plls to generate clock signals N2
+ - description: input plls to generate clock signals N3
+ - description: input plls to generate clock signals N4
+ - description: input plls to generate clock signals N5
+ - description: input plls to generate clock signals N6
+ - description: input plls to generate clock signals N7
+ - description: slave bit clock N0 provided by external components
+ - description: slave bit clock N1 provided by external components
+ - description: slave bit clock N2 provided by external components
+ - description: slave bit clock N3 provided by external components
+ - description: slave bit clock N4 provided by external components
+ - description: slave bit clock N5 provided by external components
+ - description: slave bit clock N6 provided by external components
+ - description: slave bit clock N7 provided by external components
+ - description: slave bit clock N8 provided by external components
+ - description: slave bit clock N9 provided by external components
+ - description: slave sample clock N0 provided by external components
+ - description: slave sample clock N1 provided by external components
+ - description: slave sample clock N2 provided by external components
+ - description: slave sample clock N3 provided by external components
+ - description: slave sample clock N4 provided by external components
+ - description: slave sample clock N5 provided by external components
+ - description: slave sample clock N6 provided by external components
+ - description: slave sample clock N7 provided by external components
+ - description: slave sample clock N8 provided by external components
+ - description: slave sample clock N9 provided by external components
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: pclk
+ - const: mst_in0
+ - const: mst_in1
+ - const: mst_in2
+ - const: mst_in3
+ - const: mst_in4
+ - const: mst_in5
+ - const: mst_in6
+ - const: mst_in7
+ - const: slv_sclk0
+ - const: slv_sclk1
+ - const: slv_sclk2
+ - const: slv_sclk3
+ - const: slv_sclk4
+ - const: slv_sclk5
+ - const: slv_sclk6
+ - const: slv_sclk7
+ - const: slv_sclk8
+ - const: slv_sclk9
+ - const: slv_lrclk0
+ - const: slv_lrclk1
+ - const: slv_lrclk2
+ - const: slv_lrclk3
+ - const: slv_lrclk4
+ - const: slv_lrclk5
+ - const: slv_lrclk6
+ - const: slv_lrclk7
+ - const: slv_lrclk8
+ - const: slv_lrclk9
+
+ resets:
+ description: internal reset line
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+ - clocks
+ - clock-names
+ - resets
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - amlogic,g12a-audio-clkc
+ - amlogic,sm1-audio-clkc
+ then:
+ required:
+ - '#reset-cells'
+ else:
+ properties:
+ '#reset-cells': false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/axg-clkc.h>
+ #include <dt-bindings/reset/amlogic,meson-axg-reset.h>
+ apb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clkc_audio: clock-controller@0 {
+ compatible = "amlogic,axg-audio-clkc";
+ reg = <0x0 0x0 0x0 0xb4>;
+ #clock-cells = <1>;
+
+ clocks = <&clkc CLKID_AUDIO>,
+ <&clkc CLKID_MPLL0>,
+ <&clkc CLKID_MPLL1>,
+ <&clkc CLKID_MPLL2>,
+ <&clkc CLKID_MPLL3>,
+ <&clkc CLKID_HIFI_PLL>,
+ <&clkc CLKID_FCLK_DIV3>,
+ <&clkc CLKID_FCLK_DIV4>,
+ <&clkc CLKID_GP0_PLL>,
+ <&slv_sclk0>,
+ <&slv_sclk1>,
+ <&slv_sclk2>,
+ <&slv_sclk3>,
+ <&slv_sclk4>,
+ <&slv_sclk5>,
+ <&slv_sclk6>,
+ <&slv_sclk7>,
+ <&slv_sclk8>,
+ <&slv_sclk9>,
+ <&slv_lrclk0>,
+ <&slv_lrclk1>,
+ <&slv_lrclk2>,
+ <&slv_lrclk3>,
+ <&slv_lrclk4>,
+ <&slv_lrclk5>,
+ <&slv_lrclk6>,
+ <&slv_lrclk7>,
+ <&slv_lrclk8>,
+ <&slv_lrclk9>;
+ clock-names = "pclk",
+ "mst_in0",
+ "mst_in1",
+ "mst_in2",
+ "mst_in3",
+ "mst_in4",
+ "mst_in5",
+ "mst_in6",
+ "mst_in7",
+ "slv_sclk0",
+ "slv_sclk1",
+ "slv_sclk2",
+ "slv_sclk3",
+ "slv_sclk4",
+ "slv_sclk5",
+ "slv_sclk6",
+ "slv_sclk7",
+ "slv_sclk8",
+ "slv_sclk9",
+ "slv_lrclk0",
+ "slv_lrclk1",
+ "slv_lrclk2",
+ "slv_lrclk3",
+ "slv_lrclk4",
+ "slv_lrclk5",
+ "slv_lrclk6",
+ "slv_lrclk7",
+ "slv_lrclk8",
+ "slv_lrclk9";
+ resets = <&reset RESET_AUDIO>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml
new file mode 100644
index 000000000000..98e30b8c0529
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,c3-peripherals-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic C3 series Peripheral Clock Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Jerome Brunet <jbrunet@baylibre.com>
+ - Xianwei Zhao <xianwei.zhao@amlogic.com>
+ - Chuan Liu <chuan.liu@amlogic.com>
+
+properties:
+ compatible:
+ const: amlogic,c3-peripherals-clkc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 16
+ items:
+ - description: input oscillator (usually at 24MHz)
+ - description: input oscillators multiplexer
+ - description: input fix pll
+ - description: input fclk div 2
+ - description: input fclk div 2p5
+ - description: input fclk div 3
+ - description: input fclk div 4
+ - description: input fclk div 5
+ - description: input fclk div 7
+ - description: input gp0 pll
+ - description: input gp1 pll
+ - description: input hifi pll
+ - description: input sys clk
+ - description: input axi clk
+ - description: input sys pll div 16
+ - description: input cpu clk div 16
+ - description: input pad clock for rtc clk (optional)
+
+ clock-names:
+ minItems: 16
+ items:
+ - const: xtal_24m
+ - const: oscin
+ - const: fix
+ - const: fdiv2
+ - const: fdiv2p5
+ - const: fdiv3
+ - const: fdiv4
+ - const: fdiv5
+ - const: fdiv7
+ - const: gp0
+ - const: gp1
+ - const: hifi
+ - const: sysclk
+ - const: axiclk
+ - const: sysplldiv16
+ - const: cpudiv16
+ - const: pad_osc
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ apb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@0 {
+ compatible = "amlogic,c3-peripherals-clkc";
+ reg = <0x0 0x0 0x0 0x49c>;
+ #clock-cells = <1>;
+ clocks = <&xtal_24m>,
+ <&scmi_clk 8>,
+ <&scmi_clk 12>,
+ <&clkc_pll 3>,
+ <&clkc_pll 5>,
+ <&clkc_pll 7>,
+ <&clkc_pll 9>,
+ <&clkc_pll 11>,
+ <&clkc_pll 13>,
+ <&clkc_pll 15>,
+ <&scmi_clk 13>,
+ <&clkc_pll 17>,
+ <&scmi_clk 9>,
+ <&scmi_clk 10>,
+ <&scmi_clk 14>,
+ <&scmi_clk 15>;
+ clock-names = "xtal_24m",
+ "oscin",
+ "fix",
+ "fdiv2",
+ "fdiv2p5",
+ "fdiv3",
+ "fdiv4",
+ "fdiv5",
+ "fdiv7",
+ "gp0",
+ "gp1",
+ "hifi",
+ "sysclk",
+ "axiclk",
+ "sysplldiv16",
+ "cpudiv16";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml
new file mode 100644
index 000000000000..700865cc9792
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,c3-pll-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic C3 series PLL Clock Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Jerome Brunet <jbrunet@baylibre.com>
+ - Chuan Liu <chuan.liu@amlogic.com>
+ - Xianwei Zhao <xianwei.zhao@amlogic.com>
+
+properties:
+ compatible:
+ const: amlogic,c3-pll-clkc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: input top pll
+ - description: input mclk pll
+ - description: input fix pll
+
+ clock-names:
+ items:
+ - const: top
+ - const: mclk
+ - const: fix
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ apb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@8000 {
+ compatible = "amlogic,c3-pll-clkc";
+ reg = <0x0 0x8000 0x0 0x1a4>;
+ clocks = <&scmi_clk 2>,
+ <&scmi_clk 5>,
+ <&scmi_clk 12>;
+ clock-names = "top", "mclk", "fix";
+ #clock-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
deleted file mode 100644
index 3a880528030e..000000000000
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* Amlogic GXBB AO Clock and Reset Unit
-
-The Amlogic GXBB AO clock controller generates and supplies clock to various
-controllers within the Always-On part of the SoC.
-
-Required Properties:
-
-- compatible: value should be different for each SoC family as :
- - GXBB (S905) : "amlogic,meson-gxbb-aoclkc"
- - GXL (S905X, S905D) : "amlogic,meson-gxl-aoclkc"
- - GXM (S912) : "amlogic,meson-gxm-aoclkc"
- - AXG (A113D, A113X) : "amlogic,meson-axg-aoclkc"
- followed by the common "amlogic,meson-gx-aoclkc"
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/gxbb-aoclkc.h header and can be
-used in device tree sources.
-
-- #reset-cells: should be 1.
-
-Each reset is assigned an identifier and client nodes can use this identifier
-to specify the reset which they consume. All available resets are defined as
-preprocessor macros in the dt-bindings/reset/gxbb-aoclkc.h header and can be
-used in device tree sources.
-
-Parent node should have the following properties :
-- compatible: "amlogic,meson-gx-ao-sysctrl", "syscon", "simple-mfd"
-- reg: base address and size of the AO system control register space.
-
-Example: AO Clock controller node:
-
-ao_sysctrl: sys-ctrl@0 {
- compatible = "amlogic,meson-gx-ao-sysctrl", "syscon", "simple-mfd";
- reg = <0x0 0x0 0x0 0x100>;
-
- clkc_AO: clock-controller {
- compatible = "amlogic,meson-gxbb-aoclkc", "amlogic,meson-gx-aoclkc";
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-};
-
-Example: UART controller node that consumes the clock and reset generated
- by the clock controller:
-
- uart_AO: serial@4c0 {
- compatible = "amlogic,meson-uart";
- reg = <0x4c0 0x14>;
- interrupts = <0 90 1>;
- clocks = <&clkc_AO CLKID_AO_UART1>;
- resets = <&clkc_AO RESET_AO_UART1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.yaml
new file mode 100644
index 000000000000..628e5dd33dd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,gxbb-aoclkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Always-On Clock Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - amlogic,meson-gxbb-aoclkc
+ - amlogic,meson-gxl-aoclkc
+ - amlogic,meson-gxm-aoclkc
+ - amlogic,meson-axg-aoclkc
+ - const: amlogic,meson-gx-aoclkc
+ - enum:
+ - amlogic,meson-axg-aoclkc
+ - amlogic,meson-g12a-aoclkc
+
+ clocks:
+ minItems: 2
+ maxItems: 5
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: xtal
+ - const: mpeg-clk
+ - const: ext-32k-0
+ - const: ext-32k-1
+ - const: ext-32k-2
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#reset-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - amlogic,meson-g12a-aoclkc
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 3
+
+ clock-names:
+ minItems: 2
+ maxItems: 3
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - amlogic,meson-gxl-aoclkc
+ - amlogic,meson-gxm-aoclkc
+ - amlogic,meson-axg-aoclkc
+
+ then:
+ properties:
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
deleted file mode 100644
index e950599566a9..000000000000
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-* Amlogic GXBB Clock and Reset Unit
-
-The Amlogic GXBB clock controller generates and supplies clock to various
-controllers within the SoC.
-
-Required Properties:
-
-- compatible: should be:
- "amlogic,gxbb-clkc" for GXBB SoC,
- "amlogic,gxl-clkc" for GXL and GXM SoC,
- "amlogic,axg-clkc" for AXG SoC.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/gxbb-clkc.h header and can be
-used in device tree sources.
-
-Parent node should have the following properties :
-- compatible: "syscon", "simple-mfd, and "amlogic,meson-gx-hhi-sysctrl" or
- "amlogic,meson-axg-hhi-sysctrl"
-- reg: base address and size of the HHI system control register space.
-
-Example: Clock controller node:
-
-sysctrl: system-controller@0 {
- compatible = "amlogic,meson-gx-hhi-sysctrl", "syscon", "simple-mfd";
- reg = <0 0 0 0x400>;
-
- clkc: clock-controller {
- #clock-cells = <1>;
- compatible = "amlogic,gxbb-clkc";
- };
-};
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart_AO: serial@c81004c0 {
- compatible = "amlogic,meson-uart";
- reg = <0xc81004c0 0x14>;
- interrupts = <0 90 1>;
- clocks = <&clkc CLKID_CLK81>;
- };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.yaml
new file mode 100644
index 000000000000..63246f1cb539
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,gxbb-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Clock Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+properties:
+ compatible:
+ enum:
+ - amlogic,gxbb-clkc
+ - amlogic,gxl-clkc
+ - amlogic,axg-clkc
+ - amlogic,g12a-clkc
+ - amlogic,g12b-clkc
+ - amlogic,sm1-clkc
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xtal
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/amlogic,meson8-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,meson8-clkc.yaml
new file mode 100644
index 000000000000..ab73d4654171
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,meson8-clkc.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,meson8-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson8, Meson8b and Meson8m2 Clock and Reset Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - amlogic,meson8-clkc
+ - amlogic,meson8b-clkc
+ - items:
+ - const: amlogic,meson8m2-clkc
+ - const: amlogic,meson8-clkc
+
+ clocks:
+ minItems: 2
+ maxItems: 3
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: xtal
+ - const: ddr_pll
+ - const: clk_32k
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#reset-cells'
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/amlogic,meson8-ddr-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,meson8-ddr-clkc.yaml
new file mode 100644
index 000000000000..d98d95d8e8c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,meson8-ddr-clkc.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,meson8-ddr-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic DDR Clock Controller
+
+maintainers:
+ - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+properties:
+ compatible:
+ enum:
+ - amlogic,meson8-ddr-clkc
+ - amlogic,meson8b-ddr-clkc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: xtal
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ ddr_clkc: clock-controller@400 {
+ compatible = "amlogic,meson8-ddr-clkc";
+ reg = <0x400 0x20>;
+ clocks = <&xtal>;
+ clock-names = "xtal";
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/amlogic,meson8b-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,meson8b-clkc.txt
deleted file mode 100644
index b455c5aa9139..000000000000
--- a/Documentation/devicetree/bindings/clock/amlogic,meson8b-clkc.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* Amlogic Meson8, Meson8b and Meson8m2 Clock and Reset Unit
-
-The Amlogic Meson8 / Meson8b / Meson8m2 clock controller generates and
-supplies clock to various controllers within the SoC.
-
-Required Properties:
-
-- compatible: must be one of:
- - "amlogic,meson8-clkc" for Meson8 (S802) SoCs
- - "amlogic,meson8b-clkc" for Meson8 (S805) SoCs
- - "amlogic,meson8m2-clkc" for Meson8m2 (S812) SoCs
-- reg: it must be composed by two tuples:
- 0) physical base address of the xtal register and length of memory
- mapped region.
- 1) physical base address of the clock controller and length of memory
- mapped region.
-
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/meson8b-clkc.h header and can be
-used in device tree sources.
-
-Similarly a preprocessor macro for each reset line is defined in
-dt-bindings/reset/amlogic,meson8b-clkc-reset.h (which can be used from the
-device tree sources).
-
-
-Example: Clock controller node:
-
- clkc: clock-controller@c1104000 {
- compatible = "amlogic,meson8b-clkc";
- reg = <0xc1108000 0x4>, <0xc1104000 0x460>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart_AO: serial@c81004c0 {
- compatible = "amlogic,meson-uart";
- reg = <0xc81004c0 0x14>;
- interrupts = <0 90 1>;
- clocks = <&clkc CLKID_CLK81>;
- };
diff --git a/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml
new file mode 100644
index 000000000000..c229e4f0c1d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,s4-peripherals-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic S4 Peripherals Clock Controller
+
+maintainers:
+ - Yu Tu <yu.tu@amlogic.com>
+
+properties:
+ compatible:
+ const: amlogic,s4-peripherals-clkc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 14
+ items:
+ - description: input fixed pll div2
+ - description: input fixed pll div2p5
+ - description: input fixed pll div3
+ - description: input fixed pll div4
+ - description: input fixed pll div5
+ - description: input fixed pll div7
+ - description: input hifi pll
+ - description: input gp0 pll
+ - description: input mpll0
+ - description: input mpll1
+ - description: input mpll2
+ - description: input mpll3
+ - description: input hdmi pll
+ - description: input oscillator (usually at 24MHz)
+ - description: input external 32kHz reference (optional)
+
+ clock-names:
+ minItems: 14
+ items:
+ - const: fclk_div2
+ - const: fclk_div2p5
+ - const: fclk_div3
+ - const: fclk_div4
+ - const: fclk_div5
+ - const: fclk_div7
+ - const: hifi_pll
+ - const: gp0_pll
+ - const: mpll0
+ - const: mpll1
+ - const: mpll2
+ - const: mpll3
+ - const: hdmi_pll
+ - const: xtal
+ - const: ext_32k
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/amlogic,s4-peripherals-clkc.h>
+
+ clkc_periphs: clock-controller@fe000000 {
+ compatible = "amlogic,s4-peripherals-clkc";
+ reg = <0xfe000000 0x49c>;
+ clocks = <&clkc_pll 3>,
+ <&clkc_pll 13>,
+ <&clkc_pll 5>,
+ <&clkc_pll 7>,
+ <&clkc_pll 9>,
+ <&clkc_pll 11>,
+ <&clkc_pll 17>,
+ <&clkc_pll 15>,
+ <&clkc_pll 25>,
+ <&clkc_pll 27>,
+ <&clkc_pll 29>,
+ <&clkc_pll 31>,
+ <&clkc_pll 20>,
+ <&xtal>;
+ clock-names = "fclk_div2", "fclk_div2p5", "fclk_div3", "fclk_div4",
+ "fclk_div5", "fclk_div7", "hifi_pll", "gp0_pll",
+ "mpll0", "mpll1", "mpll2", "mpll3", "hdmi_pll", "xtal";
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml
new file mode 100644
index 000000000000..d8932ec26ca8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/amlogic,s4-pll-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic S4 PLL Clock Controller
+
+maintainers:
+ - Yu Tu <yu.tu@amlogic.com>
+
+properties:
+ compatible:
+ const: amlogic,s4-pll-clkc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: xtal
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clkc_pll: clock-controller@fe008000 {
+ compatible = "amlogic,s4-pll-clkc";
+ reg = <0xfe008000 0x1e8>;
+ clocks = <&xtal>;
+ clock-names = "xtal";
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/apm,xgene-device-clock.yaml b/Documentation/devicetree/bindings/clock/apm,xgene-device-clock.yaml
new file mode 100644
index 000000000000..b27bcb2a9ee0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/apm,xgene-device-clock.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/apm,xgene-device-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: APM X-Gene SoC device clocks
+
+maintainers:
+ - Khuong Dinh <khuong@os.amperecomputing.com>
+
+properties:
+ compatible:
+ const: apm,xgene-device-clock
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ items:
+ - enum: [ csr-reg, div-reg ]
+ - const: div-reg
+ minItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clock-output-names:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ csr-offset:
+ description: Offset to the CSR reset register
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ csr-mask:
+ description: CSR reset mask bit
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0xf
+
+ enable-offset:
+ description: Offset to the enable register
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 8
+
+ enable-mask:
+ description: CSR enable mask bit
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0xf
+
+ divider-offset:
+ description: Offset to the divider register
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ divider-width:
+ description: Width of the divider register
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ divider-shift:
+ description: Bit shift of the divider register
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - clock-output-names
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/apm,xgene-socpll-clock.yaml b/Documentation/devicetree/bindings/clock/apm,xgene-socpll-clock.yaml
new file mode 100644
index 000000000000..bdd4a6b92bbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/apm,xgene-socpll-clock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/apm,xgene-socpll-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: APM X-Gene SoC PLL, PCPPLL, and PMD clocks
+
+maintainers:
+ - Khuong Dinh <khuong@os.amperecomputing.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - apm,xgene-pcppll-clock
+ - apm,xgene-pcppll-v2-clock
+ - apm,xgene-pmd-clock
+ - apm,xgene-socpll-clock
+ - apm,xgene-socpll-v2-clock
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ items:
+ - enum: [ csr-reg, div-reg ]
+ - const: div-reg
+ minItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ enum: [ pcppll, socpll ]
+
+ "#clock-cells":
+ const: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - clock-output-names
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/apple,nco.yaml b/Documentation/devicetree/bindings/clock/apple,nco.yaml
new file mode 100644
index 000000000000..080454f56721
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/apple,nco.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/apple,nco.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple SoCs' NCO block
+
+maintainers:
+ - Martin Povišer <povik+lin@cutebit.org>
+
+description: |
+ The NCO (Numerically Controlled Oscillator) block found on Apple SoCs
+ such as the t8103 (M1) is a programmable clock generator performing
+ fractional division of a high frequency input clock.
+
+ It carries a number of independent channels and is typically used for
+ generation of audio bitclocks.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: apple,t6020-nco
+ - const: apple,t8103-nco
+ - items:
+ - enum:
+ # Do not add additional SoC to this list.
+ - apple,t6000-nco
+ - apple,t8103-nco
+ - apple,t8112-nco
+ - const: apple,nco
+
+ clocks:
+ description:
+ Specifies the reference clock from which the output clocks
+ are derived through fractional division.
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#clock-cells'
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ nco_clkref: clock-ref {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <900000000>;
+ clock-output-names = "nco-ref";
+ };
+
+ nco: clock-controller@23b044000 {
+ compatible = "apple,t8103-nco", "apple,nco";
+ reg = <0x3b044000 0x14000>;
+ #clock-cells = <1>;
+ clocks = <&nco_clkref>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml b/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
new file mode 100644
index 000000000000..b5533f81307c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/arm,syscon-icst.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM System Controller ICST Clocks
+
+maintainers:
+ - Linus Walleij <linusw@kernel.org>
+
+description: |
+ The ICS525 and ICS307 oscillators are produced by Integrated
+ Devices Technology (IDT). ARM integrated these oscillators deeply into their
+ reference designs by adding special control registers that manage such
+ oscillators to their system controllers.
+
+ The various ARM system controllers contain logic to serialize and initialize
+ an ICST clock request after a write to the 32 bit register at an offset
+ into the system controller. Furthermore, to even be able to alter one of
+ these frequencies, the system controller must first be unlocked by
+ writing a special token to another offset in the system controller.
+
+ Some ARM hardware contain special versions of the serial interface that only
+ connects the low 8 bits of the VDW (missing one bit), hard-wires RDW to
+ different values and sometimes also hard-wires the output divider. They
+ therefore have special compatible strings as per this table (the OD value is
+ the value on the pins, not the resulting output divider).
+
+ In the core modules and logic tiles, the ICST is a configurable clock fed
+ from a 24 MHz clock on the motherboard (usually the main crystal) used for
+ generating e.g. video clocks. It is located on the core module and there is
+ only one of these. This clock node must be a subnode of the core module.
+
+ Hardware variant RDW OD VDW
+
+ Integrator/AP 22 1 Bit 8 0, rest variable
+ integratorap-cm
+
+ Integrator/AP 46 3 Bit 8 0, rest variable
+ integratorap-sys
+
+ Integrator/AP 22 or 1 17 or (33 or 25 MHz)
+ integratorap-pci 14 1 14
+
+ Integrator/CP 22 variable Bit 8 0, rest variable
+ integratorcp-cm-core
+
+ Integrator/CP 22 variable Bit 8 0, rest variable
+ integratorcp-cm-mem
+
+ The ICST oscillator must be provided inside a system controller node.
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - arm,syscon-icst525
+ - arm,syscon-icst307
+ - arm,syscon-icst525-integratorap-cm
+ - arm,syscon-icst525-integratorap-sys
+ - arm,syscon-icst525-integratorap-pci
+ - arm,syscon-icst525-integratorcp-cm-core
+ - arm,syscon-icst525-integratorcp-cm-mem
+ - arm,integrator-cm-auxosc
+ - arm,versatile-cm-auxosc
+ - arm,impd1-vco1
+ - arm,impd1-vco2
+
+ reg:
+ maxItems: 1
+ description: The VCO register
+
+ clocks:
+ description: Parent clock for the ICST VCO
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+ lock-offset:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Offset to the unlocking register for the oscillator
+
+ vco-offset:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Offset to the VCO register for the oscillator
+ deprecated: true
+
+required:
+ - "#clock-cells"
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ vco1: clock {
+ compatible = "arm,impd1-vco1";
+ #clock-cells = <0>;
+ lock-offset = <0x08>;
+ vco-offset = <0x00>;
+ clocks = <&sysclk>;
+ clock-output-names = "IM-PD1-VCO1";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/arm-integrator.txt b/Documentation/devicetree/bindings/clock/arm-integrator.txt
deleted file mode 100644
index 11f5f95f571b..000000000000
--- a/Documentation/devicetree/bindings/clock/arm-integrator.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Clock bindings for ARM Integrator and Versatile Core Module clocks
-
-Auxiliary Oscillator Clock
-
-This is a configurable clock fed from a 24 MHz chrystal,
-used for generating e.g. video clocks. It is located on the
-core module and there is only one of these.
-
-This clock node *must* be a subnode of the core module, since
-it obtains the base address for it's address range from its
-parent node.
-
-
-Required properties:
-- compatible: must be "arm,integrator-cm-auxosc" or "arm,versatile-cm-auxosc"
-- #clock-cells: must be <0>
-
-Optional properties:
-- clocks: parent clock(s)
-
-Example:
-
-core-module@10000000 {
- xtal24mhz: xtal24mhz@24M {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <24000000>;
- };
- auxosc: cm_aux_osc@25M {
- #clock-cells = <0>;
- compatible = "arm,integrator-cm-auxosc";
- clocks = <&xtal24mhz>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt b/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt
deleted file mode 100644
index 4cd81742038f..000000000000
--- a/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-ARM System Controller ICST clocks
-
-The ICS525 and ICS307 oscillators are produced by Integrated Devices
-Technology (IDT). ARM integrated these oscillators deeply into their
-reference designs by adding special control registers that manage such
-oscillators to their system controllers.
-
-The various ARM system controllers contain logic to serialize and initialize
-an ICST clock request after a write to the 32 bit register at an offset
-into the system controller. Furthermore, to even be able to alter one of
-these frequencies, the system controller must first be unlocked by
-writing a special token to another offset in the system controller.
-
-Some ARM hardware contain special versions of the serial interface that only
-connects the low 8 bits of the VDW (missing one bit), hardwires RDW to
-different values and sometimes also hardwire the output divider. They
-therefore have special compatible strings as per this table (the OD value is
-the value on the pins, not the resulting output divider):
-
-Hardware variant: RDW OD VDW
-
-Integrator/AP 22 1 Bit 8 0, rest variable
-integratorap-cm
-
-Integrator/AP 46 3 Bit 8 0, rest variable
-integratorap-sys
-
-Integrator/AP 22 or 1 17 or (33 or 25 MHz)
-integratorap-pci 14 1 14
-
-Integrator/CP 22 variable Bit 8 0, rest variable
-integratorcp-cm-core
-
-Integrator/CP 22 variable Bit 8 0, rest variable
-integratorcp-cm-mem
-
-The ICST oscillator must be provided inside a system controller node.
-
-Required properties:
-- compatible: must be one of
- "arm,syscon-icst525"
- "arm,syscon-icst307"
- "arm,syscon-icst525-integratorap-cm"
- "arm,syscon-icst525-integratorap-sys"
- "arm,syscon-icst525-integratorap-pci"
- "arm,syscon-icst525-integratorcp-cm-core"
- "arm,syscon-icst525-integratorcp-cm-mem"
-- lock-offset: the offset address into the system controller where the
- unlocking register is located
-- vco-offset: the offset address into the system controller where the
- ICST control register is located (even 32 bit address)
-- #clock-cells: must be <0>
-- clocks: parent clock, since the ICST needs a parent clock to derive its
- frequency from, this attribute is compulsory.
-
-Example:
-
-syscon: syscon@10000000 {
- compatible = "syscon";
- reg = <0x10000000 0x1000>;
-
- oscclk0: osc0@c {
- compatible = "arm,syscon-icst307";
- #clock-cells = <0>;
- lock-offset = <0x20>;
- vco-offset = <0x0c>;
- clocks = <&xtal24mhz>;
- };
- (...)
-};
diff --git a/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
deleted file mode 100644
index 1e3370ba189f..000000000000
--- a/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-* Peripheral Clock bindings for Marvell Armada 37xx SoCs
-
-Marvell Armada 37xx SoCs provide peripheral clocks which are
-used as clock source for the peripheral of the SoC.
-
-There are two different blocks associated to north bridge and south
-bridge.
-
-The peripheral clock consumer should specify the desired clock by
-having the clock ID in its "clocks" phandle cell.
-
-The following is a list of provided IDs for Armada 370 North bridge clocks:
-ID Clock name Description
------------------------------------
-0 mmc MMC controller
-1 sata_host Sata Host
-2 sec_at Security AT
-3 sac_dap Security DAP
-4 tsecm Security Engine
-5 setm_tmx Serial Embedded Trace Module
-6 avs Adaptive Voltage Scaling
-7 sqf SPI
-8 pwm PWM
-9 i2c_2 I2C 2
-10 i2c_1 I2C 1
-11 ddr_phy DDR PHY
-12 ddr_fclk DDR F clock
-13 trace Trace
-14 counter Counter
-15 eip97 EIP 97
-16 cpu CPU
-
-The following is a list of provided IDs for Armada 370 South bridge clocks:
-ID Clock name Description
------------------------------------
-0 gbe-50 50 MHz parent clock for Gigabit Ethernet
-1 gbe-core parent clock for Gigabit Ethernet core
-2 gbe-125 125 MHz parent clock for Gigabit Ethernet
-3 gbe1-50 50 MHz clock for Gigabit Ethernet port 1
-4 gbe0-50 50 MHz clock for Gigabit Ethernet port 0
-5 gbe1-125 125 MHz clock for Gigabit Ethernet port 1
-6 gbe0-125 125 MHz clock for Gigabit Ethernet port 0
-7 gbe1-core Gigabit Ethernet core port 1
-8 gbe0-core Gigabit Ethernet core port 0
-9 gbe-bm Gigabit Ethernet Buffer Manager
-10 sdio SDIO
-11 usb32-sub2-sys USB 2 clock
-12 usb32-ss-sys USB 3 clock
-
-Required properties:
-
-- compatible : shall be "marvell,armada-3700-periph-clock-nb" for the
- north bridge block, or
- "marvell,armada-3700-periph-clock-sb" for the south bridge block
-- reg : must be the register address of North/South Bridge Clock register
-- #clock-cells : from common clock binding; shall be set to 1
-
-- clocks : list of the parent clock phandle in the following order:
- TBG-A P, TBG-B P, TBG-A S, TBG-B S and finally the xtal clock.
-
-
-Example:
-
-nb_perih_clk: nb-periph-clk@13000{
- compatible = "marvell,armada-3700-periph-clock-nb";
- reg = <0x13000 0x1000>;
- clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
- <&tbg 3>, <&xtalclk>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt
deleted file mode 100644
index 0ba1d83ff363..000000000000
--- a/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Time Base Generator Clock bindings for Marvell Armada 37xx SoCs
-
-Marvell Armada 37xx SoCs provde Time Base Generator clocks which are
-used as parent clocks for the peripheral clocks.
-
-The TBG clock consumer should specify the desired clock by having the
-clock ID in its "clocks" phandle cell.
-
-The following is a list of provided IDs and clock names on Armada 3700:
- 0 = TBG A P
- 1 = TBG B P
- 2 = TBG A S
- 3 = TBG B S
-
-Required properties:
-- compatible : shall be "marvell,armada-3700-tbg-clock"
-- reg : must be the register address of North Bridge PLL register
-- #clock-cells : from common clock binding; shall be set to 1
-
-Example:
-
-tbg: tbg@13200 {
- compatible = "marvell,armada-3700-tbg-clock";
- reg = <0x13200 0x1000>;
- clocks = <&xtalclk>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/artpec6.txt b/Documentation/devicetree/bindings/clock/artpec6.txt
deleted file mode 100644
index dff9cdf0009c..000000000000
--- a/Documentation/devicetree/bindings/clock/artpec6.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-* Clock bindings for Axis ARTPEC-6 chip
-
-The bindings are based on the clock provider binding in
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-External clocks:
-----------------
-
-There are two external inputs to the main clock controller which should be
-provided using the common clock bindings.
-- "sys_refclk": External 50 Mhz oscillator (required)
-- "i2s_refclk": Alternate audio reference clock (optional).
-
-Main clock controller
----------------------
-
-Required properties:
-- #clock-cells: Should be <1>
- See dt-bindings/clock/axis,artpec6-clkctrl.h for the list of valid identifiers.
-- compatible: Should be "axis,artpec6-clkctrl"
-- reg: Must contain the base address and length of the system controller
-- clocks: Must contain a phandle entry for each clock in clock-names
-- clock-names: Must include the external oscillator ("sys_refclk"). Optional
- ones are the audio reference clock ("i2s_refclk") and the audio fractional
- dividers ("frac_clk0" and "frac_clk1").
-
-Examples:
-
-ext_clk: ext_clk {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <50000000>;
-};
-
-clkctrl: clkctrl@f8000000 {
- #clock-cells = <1>;
- compatible = "axis,artpec6-clkctrl";
- reg = <0xf8000000 0x48>;
- clocks = <&ext_clk>;
- clock-names = "sys_refclk";
-};
diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
deleted file mode 100644
index e9f70fcdfe80..000000000000
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-Device Tree Clock bindings for arch-at91
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Slow Clock controller:
-
-Required properties:
-- compatible : shall be one of the following:
- "atmel,at91sam9x5-sckc" or
- "atmel,sama5d4-sckc":
- at91 SCKC (Slow Clock Controller)
- This node contains the slow clock definitions.
-
- "atmel,at91sam9x5-clk-slow-osc":
- at91 slow oscillator
-
- "atmel,at91sam9x5-clk-slow-rc-osc":
- at91 internal slow RC oscillator
-- reg : defines the IO memory reserved for the SCKC.
-- #size-cells : shall be 0 (reg is used to encode clk id).
-- #address-cells : shall be 1 (reg is used to encode clk id).
-
-
-For example:
- sckc: sckc@fffffe50 {
- compatible = "atmel,sama5d3-pmc";
- reg = <0xfffffe50 0x4>
- #size-cells = <0>;
- #address-cells = <1>;
-
- /* put at91 slow clocks here */
- };
-
-Power Management Controller (PMC):
-
-Required properties:
-- compatible : shall be "atmel,<chip>-pmc", "syscon":
- <chip> can be: at91rm9200, at91sam9260, at91sam9261,
- at91sam9263, at91sam9g45, at91sam9n12, at91sam9rl, at91sam9g15,
- at91sam9g25, at91sam9g35, at91sam9x25, at91sam9x35, at91sam9x5,
- sama5d2, sama5d3 or sama5d4.
-- #clock-cells : from common clock binding; shall be set to 2. The first entry
- is the type of the clock (core, system, peripheral or generated) and the
- second entry its index as provided by the datasheet
-- clocks : Must contain an entry for each entry in clock-names.
-- clock-names: Must include the following entries: "slow_clk", "main_xtal"
-
-Optional properties:
-- atmel,osc-bypass : boolean property. Set this when a clock signal is directly
- provided on XIN.
-
-For example:
- pmc: pmc@f0018000 {
- compatible = "atmel,sama5d4-pmc", "syscon";
- reg = <0xf0018000 0x120>;
- interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
- #clock-cells = <2>;
- clocks = <&clk32k>, <&main_xtal>;
- clock-names = "slow_clk", "main_xtal";
- };
diff --git a/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml b/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml
new file mode 100644
index 000000000000..e803a1fc3681
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/atmel,at91rm9200-pmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Power Management Controller (PMC)
+
+maintainers:
+ - Claudiu Beznea <claudiu.beznea@microchip.com>
+
+description:
+ The power management controller optimizes power consumption by controlling all
+ system and user peripheral clocks. The PMC enables/disables the clock inputs
+ to many of the peripherals and to the processor.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: atmel,at91sam9g20-pmc
+ - const: atmel,at91sam9260-pmc
+ - const: syscon
+ - items:
+ - enum:
+ - atmel,at91sam9g15-pmc
+ - atmel,at91sam9g25-pmc
+ - atmel,at91sam9g35-pmc
+ - atmel,at91sam9x25-pmc
+ - atmel,at91sam9x35-pmc
+ - const: atmel,at91sam9x5-pmc
+ - const: syscon
+ - items:
+ - enum:
+ - atmel,at91rm9200-pmc
+ - atmel,at91sam9260-pmc
+ - atmel,at91sam9261-pmc
+ - atmel,at91sam9263-pmc
+ - atmel,at91sam9g45-pmc
+ - atmel,at91sam9n12-pmc
+ - atmel,at91sam9rl-pmc
+ - atmel,at91sam9x5-pmc
+ - atmel,sama5d2-pmc
+ - atmel,sama5d3-pmc
+ - atmel,sama5d4-pmc
+ - microchip,sam9x60-pmc
+ - microchip,sam9x7-pmc
+ - microchip,sama7d65-pmc
+ - microchip,sama7g5-pmc
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ "#clock-cells":
+ description: |
+ - 1st cell is the clock type, one of PMC_TYPE_CORE, PMC_TYPE_SYSTEM,
+ PMC_TYPE_PERIPHERAL, PMC_TYPE_GCK, PMC_TYPE_PROGRAMMABLE (as defined
+ in <dt-bindings/clock/at91.h>)
+ - 2nd cell is the clock identifier as defined in <dt-bindings/clock/at91.h
+ (for core clocks) or as defined in datasheet (for system, peripheral,
+ gck and programmable clocks).
+ const: 2
+
+ clocks:
+ minItems: 2
+ maxItems: 3
+
+ clock-names:
+ minItems: 2
+ maxItems: 3
+
+ atmel,osc-bypass:
+ description: set when a clock signal is directly provided on XIN
+ type: boolean
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - microchip,sam9x60-pmc
+ - microchip,sam9x7-pmc
+ - microchip,sama7d65-pmc
+ - microchip,sama7g5-pmc
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: td_slck
+ - const: md_slck
+ - const: main_xtal
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - atmel,at91rm9200-pmc
+ - atmel,at91sam9260-pmc
+ - atmel,at91sam9261-pmc
+ - atmel,at91sam9263-pmc
+ - atmel,at91sam9g20-pmc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: slow_xtal
+ - const: main_xtal
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - atmel,sama5d2-pmc
+ - atmel,sama5d3-pmc
+ - atmel,sama5d4-pmc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: slow_clk
+ - const: main_xtal
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ pmc: clock-controller@f0018000 {
+ compatible = "atmel,sama5d4-pmc", "syscon";
+ reg = <0xf0018000 0x120>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+ #clock-cells = <2>;
+ clocks = <&clk32k>, <&main_xtal>;
+ clock-names = "slow_clk", "main_xtal";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/atmel,at91sam9x5-sckc.yaml b/Documentation/devicetree/bindings/clock/atmel,at91sam9x5-sckc.yaml
new file mode 100644
index 000000000000..d4cf8ae2961e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/atmel,at91sam9x5-sckc.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/atmel,at91sam9x5-sckc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Slow Clock Controller (SCKC)
+
+maintainers:
+ - Claudiu Beznea <claudiu.beznea@microchip.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - atmel,at91sam9x5-sckc
+ - atmel,sama5d3-sckc
+ - atmel,sama5d4-sckc
+ - microchip,sam9x60-sckc
+ - items:
+ - enum:
+ - microchip,sam9x7-sckc
+ - microchip,sama7d65-sckc
+ - microchip,sama7g5-sckc
+ - const: microchip,sam9x60-sckc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ enum: [0, 1]
+
+ atmel,osc-bypass:
+ type: boolean
+ description: set when a clock signal is directly provided on XIN
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - microchip,sam9x60-sckc
+ then:
+ properties:
+ "#clock-cells":
+ const: 1
+ else:
+ properties:
+ "#clock-cells":
+ const: 0
+
+additionalProperties: false
+
+examples:
+ - |
+ clk32k: clock-controller@fffffe50 {
+ compatible = "microchip,sam9x60-sckc";
+ reg = <0xfffffe50 0x4>;
+ clocks = <&slow_xtal>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
deleted file mode 100644
index aca94fe9416f..000000000000
--- a/Documentation/devicetree/bindings/clock/axi-clkgen.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Binding for the axi-clkgen clock generator
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "adi,axi-clkgen-1.00.a" or "adi,axi-clkgen-2.00.a".
-- #clock-cells : from common clock binding; Should always be set to 0.
-- reg : Address and length of the axi-clkgen register set.
-- clocks : Phandle and clock specifier for the parent clock(s). This must
- either reference one clock if only the first clock input is connected or two
- if both clock inputs are connected. For the later case the clock connected
- to the first input must be specified first.
-
-Optional properties:
-- clock-output-names : From common clock binding.
-
-Example:
- clock@ff000000 {
- compatible = "adi,axi-clkgen";
- #clock-cells = <0>;
- reg = <0xff000000 0x1000>;
- clocks = <&osc 1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/axis,artpec6-clkctrl.yaml b/Documentation/devicetree/bindings/clock/axis,artpec6-clkctrl.yaml
new file mode 100644
index 000000000000..a78269369df8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/axis,artpec6-clkctrl.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/axis,artpec6-clkctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axis ARTPEC-6 clock controller
+
+maintainers:
+ - Lars Persson <lars.persson@axis.com>
+
+properties:
+ compatible:
+ const: axis,artpec6-clkctrl
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: external 50 MHz oscillator.
+ - description: optional audio reference clock.
+ - description: fractional audio clock divider 0.
+ - description: fractional audio clock divider 1.
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: sys_refclk
+ - const: i2s_refclk
+ - const: frac_clk0
+ - const: frac_clk1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@f8000000 {
+ compatible = "axis,artpec6-clkctrl";
+ reg = <0xf8000000 0x48>;
+ #clock-cells = <1>;
+ clocks = <&ext_clk>;
+ clock-names = "sys_refclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/axis,artpec8-clock.yaml b/Documentation/devicetree/bindings/clock/axis,artpec8-clock.yaml
new file mode 100644
index 000000000000..277af48ac841
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/axis,artpec8-clock.yaml
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/axis,artpec8-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axis ARTPEC-8 SoC clock controller
+
+maintainers:
+ - Jesper Nilsson <jesper.nilsson@axis.com>
+
+description: |
+ ARTPEC-8 clock controller is comprised of several CMU (Clock Management Unit)
+ units, generating clocks for different domains. Those CMU units are modeled
+ as separate device tree nodes, and might depend on each other.
+ The root clock in that root tree is an external clock: OSCCLK (25 MHz).
+ This external clock must be defined as a fixed-rate clock in dts.
+
+ CMU_CMU is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_CMU.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/axis,artpec8-clk.h' header.
+
+properties:
+ compatible:
+ enum:
+ - axis,artpec8-cmu-cmu
+ - axis,artpec8-cmu-bus
+ - axis,artpec8-cmu-core
+ - axis,artpec8-cmu-cpucl
+ - axis,artpec8-cmu-fsys
+ - axis,artpec8-cmu-imem
+ - axis,artpec8-cmu-peri
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-cmu
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+
+ clock-names:
+ items:
+ - const: fin_pll
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-bus
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_BUS BUS clock (from CMU_CMU)
+ - description: CMU_BUS DLP clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: bus
+ - const: dlp
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-core
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_CORE main clock (from CMU_CMU)
+ - description: CMU_CORE DLP clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: main
+ - const: dlp
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-cpucl
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_CPUCL switch clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: switch
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-fsys
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_FSYS SCAN0 clock (from CMU_CMU)
+ - description: CMU_FSYS SCAN1 clock (from CMU_CMU)
+ - description: CMU_FSYS BUS clock (from CMU_CMU)
+ - description: CMU_FSYS IP clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: scan0
+ - const: scan1
+ - const: bus
+ - const: ip
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-imem
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_IMEM ACLK clock (from CMU_CMU)
+ - description: CMU_IMEM JPEG clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: aclk
+ - const: jpeg
+
+ - if:
+ properties:
+ compatible:
+ const: axis,artpec8-cmu-peri
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25 MHz)
+ - description: CMU_PERI IP clock (from CMU_CMU)
+ - description: CMU_PERI AUDIO clock (from CMU_CMU)
+ - description: CMU_PERI DISP clock (from CMU_CMU)
+
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: ip
+ - const: audio
+ - const: disp
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_FSYS
+ - |
+ #include <dt-bindings/clock/axis,artpec8-clk.h>
+
+ cmu_fsys: clock-controller@16c10000 {
+ compatible = "axis,artpec8-cmu-fsys";
+ reg = <0x16c10000 0x4000>;
+ #clock-cells = <1>;
+ clocks = <&fin_pll>,
+ <&cmu_cmu CLK_DOUT_CMU_FSYS_SCAN0>,
+ <&cmu_cmu CLK_DOUT_CMU_FSYS_SCAN1>,
+ <&cmu_cmu CLK_DOUT_CMU_FSYS_BUS>,
+ <&cmu_cmu CLK_DOUT_CMU_FSYS_IP>;
+ clock-names = "fin_pll", "scan0", "scan1", "bus", "ip";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-div.yaml b/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-div.yaml
new file mode 100644
index 000000000000..30252c95700c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-div.yaml
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/baikal,bt1-ccu-div.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 Clock Control Unit Dividers
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ Clocks Control Unit is the core of Baikal-T1 SoC System Controller
+ responsible for the chip subsystems clocking and resetting. The CCU is
+ connected with an external fixed rate oscillator, which signal is transformed
+ into clocks of various frequencies and then propagated to either individual
+ IP-blocks or to groups of blocks (clock domains). The transformation is done
+ by means of an embedded into CCU PLLs and gateable/non-gateable dividers. The
+ later ones are described in this binding. Each clock domain can be also
+ individually reset by using the domain clocks divider configuration
+ registers. Baikal-T1 CCU is logically divided into the next components:
+ 1) External oscillator (normally XTAL's 25 MHz crystal oscillator, but
+ in general can provide any frequency supported by the CCU PLLs).
+ 2) PLLs clocks generators (PLLs).
+ 3) AXI-bus clock dividers (AXI) - described in this binding file.
+ 4) System devices reference clock dividers (SYS) - described in this binding
+ file.
+ which are connected with each other as shown on the next figure:
+
+ +---------------+
+ | Baikal-T1 CCU |
+ | +----+------|- MIPS P5600 cores
+ | +-|PLLs|------|- DDR controller
+ | | +----+ |
+ +----+ | | | | |
+ |XTAL|--|-+ | | +---+-|
+ +----+ | | | +-|AXI|-|- AXI-bus
+ | | | +---+-|
+ | | | |
+ | | +----+---+-|- APB-bus
+ | +-------|SYS|-|- Low-speed Devices
+ | +---+-|- High-speed Devices
+ +---------------+
+
+ Each sub-block is represented as a separate DT node and has an individual
+ driver to be bound with.
+
+ In order to create signals of wide range frequencies the external oscillator
+ output is primarily connected to a set of CCU PLLs. Some of PLLs CLKOUT are
+ then passed over CCU dividers to create signals required for the target clock
+ domain (like AXI-bus or System Device consumers). The dividers have the
+ following structure:
+
+ +--------------+
+ CLKIN --|->+----+ 1|\ |
+ SETCLK--|--|/DIV|->| | |
+ CLKDIV--|--| | | |-|->CLKLOUT
+ LOCK----|--+----+ | | |
+ | |/ |
+ | | |
+ EN------|-----------+ |
+ RST-----|--------------|->RSTOUT
+ +--------------+
+
+ where CLKIN is the reference clock coming either from CCU PLLs or from an
+ external clock oscillator, SETCLK - a command to update the output clock in
+ accordance with a set divider, CLKDIV - clocks divider, LOCK - a signal of
+ the output clock stabilization, EN - enable/disable the divider block,
+ RST/RSTOUT - reset clocks domain signal. Depending on the consumer IP-core
+ peculiarities the dividers may lack of some functionality depicted on the
+ figure above (like EN, CLKDIV/LOCK/SETCLK). In this case the corresponding
+ clock provider just doesn't expose either switching functions, or the rate
+ configuration, or both of them.
+
+ The clock dividers, which output clock is then consumed by the SoC individual
+ devices, are united into a single clocks provider called System Devices CCU.
+ Similarly the dividers with output clocks utilized as AXI-bus reference clocks
+ are called AXI-bus CCU. Both of them use the common clock bindings with no
+ custom properties. The list of exported clocks and reset signals can be found
+ in the files: 'include/dt-bindings/clock/bt1-ccu.h' and
+ 'include/dt-bindings/reset/bt1-ccu.h'. Since System Devices and AXI-bus CCU
+ are a part of the Baikal-T1 SoC System Controller their DT nodes are supposed
+ to be a children of later one.
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: baikal,bt1-ccu-axi
+
+then:
+ properties:
+ clocks:
+ items:
+ - description: CCU SATA PLL output clock
+ - description: CCU PCIe PLL output clock
+ - description: CCU Ethernet PLL output clock
+
+ clock-names:
+ items:
+ - const: sata_clk
+ - const: pcie_clk
+ - const: eth_clk
+
+else:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock
+ - description: CCU SATA PLL output clock
+ - description: CCU PCIe PLL output clock
+ - description: CCU Ethernet PLL output clock
+
+ clock-names:
+ items:
+ - const: ref_clk
+ - const: sata_clk
+ - const: pcie_clk
+ - const: eth_clk
+
+properties:
+ compatible:
+ enum:
+ - baikal,bt1-ccu-axi
+ - baikal,bt1-ccu-sys
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ minItems: 3
+ maxItems: 4
+
+ clock-names:
+ minItems: 3
+ maxItems: 4
+
+additionalProperties: false
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+examples:
+ # AXI-bus Clock Control Unit node:
+ - |
+ #include <dt-bindings/clock/bt1-ccu.h>
+
+ clock-controller@1f04d030 {
+ compatible = "baikal,bt1-ccu-axi";
+ reg = <0x1f04d030 0x030>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+
+ clocks = <&ccu_pll CCU_SATA_PLL>,
+ <&ccu_pll CCU_PCIE_PLL>,
+ <&ccu_pll CCU_ETH_PLL>;
+ clock-names = "sata_clk", "pcie_clk", "eth_clk";
+ };
+ # System Devices Clock Control Unit node:
+ - |
+ #include <dt-bindings/clock/bt1-ccu.h>
+
+ clock-controller@1f04d060 {
+ compatible = "baikal,bt1-ccu-sys";
+ reg = <0x1f04d060 0x0a0>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+
+ clocks = <&clk25m>,
+ <&ccu_pll CCU_SATA_PLL>,
+ <&ccu_pll CCU_PCIE_PLL>,
+ <&ccu_pll CCU_ETH_PLL>;
+ clock-names = "ref_clk", "sata_clk", "pcie_clk",
+ "eth_clk";
+ };
+ # Required Clock Control Unit PLL node:
+ - |
+ ccu_pll: clock-controller@1f04d000 {
+ compatible = "baikal,bt1-ccu-pll";
+ reg = <0x1f04d000 0x028>;
+ #clock-cells = <1>;
+
+ clocks = <&clk25m>;
+ clock-names = "ref_clk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-pll.yaml b/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-pll.yaml
new file mode 100644
index 000000000000..7f8d98226437
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/baikal,bt1-ccu-pll.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/baikal,bt1-ccu-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 Clock Control Unit PLL
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ Clocks Control Unit is the core of Baikal-T1 SoC System Controller
+ responsible for the chip subsystems clocking and resetting. The CCU is
+ connected with an external fixed rate oscillator, which signal is transformed
+ into clocks of various frequencies and then propagated to either individual
+ IP-blocks or to groups of blocks (clock domains). The transformation is done
+ by means of PLLs and gateable/non-gateable dividers embedded into the CCU.
+ It's logically divided into the next components:
+ 1) External oscillator (normally XTAL's 25 MHz crystal oscillator, but
+ in general can provide any frequency supported by the CCU PLLs).
+ 2) PLLs clocks generators (PLLs) - described in this binding file.
+ 3) AXI-bus clock dividers (AXI).
+ 4) System devices reference clock dividers (SYS).
+ which are connected with each other as shown on the next figure:
+
+ +---------------+
+ | Baikal-T1 CCU |
+ | +----+------|- MIPS P5600 cores
+ | +-|PLLs|------|- DDR controller
+ | | +----+ |
+ +----+ | | | | |
+ |XTAL|--|-+ | | +---+-|
+ +----+ | | | +-|AXI|-|- AXI-bus
+ | | | +---+-|
+ | | | |
+ | | +----+---+-|- APB-bus
+ | +-------|SYS|-|- Low-speed Devices
+ | +---+-|- High-speed Devices
+ +---------------+
+
+ Each CCU sub-block is represented as a separate dts-node and has an
+ individual driver to be bound with.
+
+ In order to create signals of wide range frequencies the external oscillator
+ output is primarily connected to a set of CCU PLLs. There are five PLLs
+ to create a clock for the MIPS P5600 cores, the embedded DDR controller,
+ SATA, Ethernet and PCIe domains. The last three domains though named by the
+ biggest system interfaces in fact include nearly all of the rest SoC
+ peripherals. Each of the PLLs is based on True Circuits TSMC CLN28HPM core
+ with an interface wrapper (so called safe PLL' clocks switcher) to simplify
+ the PLL configuration procedure. The PLLs work as depicted on the next
+ diagram:
+
+ +--------------------------+
+ | |
+ +-->+---+ +---+ +---+ | +---+ 0|\
+ CLKF--->|/NF|--->|PFD|...|VCO|-+->|/OD|--->| |
+ +---+ +->+---+ +---+ /->+---+ | |--->CLKOUT
+ CLKOD---------C----------------+ 1| |
+ +--------C--------------------------->|/
+ | | ^
+ Rclk-+->+---+ | |
+ CLKR--->|/NR|-+ |
+ +---+ |
+ BYPASS--------------------------------------+
+ BWADJ--->
+
+ where Rclk is the reference clock coming from XTAL, NR - reference clock
+ divider, NF - PLL clock multiplier, OD - VCO output clock divider, CLKOUT -
+ output clock, BWADJ is the PLL bandwidth adjustment parameter. At this moment
+ the binding supports the PLL dividers configuration in accordance with a
+ requested rate, while bypassing and bandwidth adjustment settings can be
+ added in future if it gets to be necessary.
+
+ The PLLs CLKOUT is then either directly connected with the corresponding
+ clocks consumer (like P5600 cores or DDR controller) or passed over a CCU
+ divider to create a signal required for the clock domain.
+
+ The CCU PLL dts-node uses the common clock bindings with no custom
+ parameters. The list of exported clocks can be found in
+ 'include/dt-bindings/clock/bt1-ccu.h'. Since CCU PLL is a part of the
+ Baikal-T1 SoC System Controller its DT node is supposed to be a child of
+ later one.
+
+properties:
+ compatible:
+ const: baikal,bt1-ccu-pll
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ description: External reference clock
+ maxItems: 1
+
+ clock-names:
+ const: ref_clk
+
+additionalProperties: false
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+examples:
+ # Clock Control Unit PLL node:
+ - |
+ clock-controller@1f04d000 {
+ compatible = "baikal,bt1-ccu-pll";
+ reg = <0x1f04d000 0x028>;
+ #clock-cells = <1>;
+
+ clocks = <&clk25m>;
+ clock-names = "ref_clk";
+ };
+ # Required external oscillator:
+ - |
+ clk25m: clock-oscillator-25m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "clk25m";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml b/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml
new file mode 100644
index 000000000000..f0f9392470a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/bitmain,bm1880-clk.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/bitmain,bm1880-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bitmain BM1880 Clock Controller
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+ The Bitmain BM1880 clock controller generates and supplies clock to
+ various peripherals within the SoC.
+
+ This binding uses common clock bindings
+ [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+properties:
+ compatible:
+ const: bitmain,bm1880-clk
+
+ reg:
+ items:
+ - description: pll registers
+ - description: system registers
+
+ reg-names:
+ items:
+ - const: pll
+ - const: sys
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: osc
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock controller node:
+ - |
+ clk: clock-controller@e8 {
+ compatible = "bitmain,bm1880-clk";
+ reg = <0xe8 0x0c>, <0x800 0xb0>;
+ reg-names = "pll", "sys";
+ clocks = <&osc>;
+ clock-names = "osc";
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2711-dvp.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm2711-dvp.yaml
new file mode 100644
index 000000000000..2d40df2d34df
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm2711-dvp.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm2711-dvp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2711 HDMI DVP
+
+maintainers:
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ const: brcm,brcm2711-dvp
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ dvp: clock@7ef00000 {
+ compatible = "brcm,brcm2711-dvp";
+ reg = <0x7ef00000 0x10>;
+ clocks = <&clk_108MHz>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt
deleted file mode 100644
index 4acfc8f641b6..000000000000
--- a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Broadcom BCM2835 auxiliary peripheral support
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The auxiliary peripherals (UART, SPI1, and SPI2) have a small register
-area controlling clock gating to the peripherals, and providing an IRQ
-status register.
-
-Required properties:
-- compatible: Should be "brcm,bcm2835-aux"
-- #clock-cells: Should be <1>. The permitted clock-specifier values can be
- found in include/dt-bindings/clock/bcm2835-aux.h
-- reg: Specifies base physical address and size of the registers
-- clocks: The parent clock phandle
-
-Example:
-
- clocks: cprman@7e101000 {
- compatible = "brcm,bcm2835-cprman";
- #clock-cells = <1>;
- reg = <0x7e101000 0x2000>;
- clocks = <&clk_osc>;
- };
-
- aux: aux@7e215004 {
- compatible = "brcm,bcm2835-aux";
- #clock-cells = <1>;
- reg = <0x7e215000 0x8>;
- clocks = <&clocks BCM2835_CLOCK_VPU>;
- };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.yaml
new file mode 100644
index 000000000000..0f4050ffa41c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm2835-aux-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2835 auxiliary peripheral clock
+
+maintainers:
+ - Stefan Wahren <wahrenst@gmx.net>
+ - Raspberry Pi Kernel Maintenance <kernel-list@raspberrypi.com>
+
+description:
+ The auxiliary peripherals (UART, SPI1, and SPI2) have a small register
+ area controlling clock gating to the peripherals, and providing an IRQ
+ status register.
+
+properties:
+ compatible:
+ const: brcm,bcm2835-aux
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+ clock@7e215000 {
+ compatible = "brcm,bcm2835-aux";
+ reg = <0x7e215000 0x8>;
+ #clock-cells = <1>;
+ clocks = <&clocks BCM2835_CLOCK_VPU>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt b/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
deleted file mode 100644
index dd906db34b32..000000000000
--- a/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-Broadcom BCM2835 CPRMAN clocks
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CPRMAN clock controller generates clocks in the audio power domain
-of the BCM2835. There is a level of PLLs deriving from an external
-oscillator, a level of PLL dividers that produce channels off of the
-few PLLs, and a level of mostly-generic clock generators sourcing from
-the PLL channels. Most other hardware components source from the
-clock generators, but a few (like the ARM or HDMI) will source from
-the PLL dividers directly.
-
-Required properties:
-- compatible: Should be "brcm,bcm2835-cprman"
-- #clock-cells: Should be <1>. The permitted clock-specifier values can be
- found in include/dt-bindings/clock/bcm2835.h
-- reg: Specifies base physical address and size of the registers
-- clocks: phandles to the parent clocks used as input to the module, in
- the following order:
-
- - External oscillator
- - DSI0 byte clock
- - DSI0 DDR2 clock
- - DSI0 DDR clock
- - DSI1 byte clock
- - DSI1 DDR2 clock
- - DSI1 DDR clock
-
- Only external oscillator is required. The DSI clocks may
- not be present, in which case their children will be
- unusable.
-
-Example:
-
- clk_osc: clock@3 {
- compatible = "fixed-clock";
- reg = <3>;
- #clock-cells = <0>;
- clock-output-names = "osc";
- clock-frequency = <19200000>;
- };
-
- clocks: cprman@7e101000 {
- compatible = "brcm,bcm2835-cprman";
- #clock-cells = <1>;
- reg = <0x7e101000 0x2000>;
- clocks = <&clk_osc>;
- };
-
- i2c0: i2c@7e205000 {
- compatible = "brcm,bcm2835-i2c";
- reg = <0x7e205000 0x1000>;
- interrupts = <2 21>;
- clocks = <&clocks BCM2835_CLOCK_VPU>;
- #address-cells = <1>;
- #size-cells = <0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.yaml
new file mode 100644
index 000000000000..b0cf76c74bc7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm2835-cprman.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2835 CPRMAN clocks
+
+maintainers:
+ - Stefan Wahren <wahrenst@gmx.net>
+ - Raspberry Pi Kernel Maintenance <kernel-list@raspberrypi.com>
+
+description:
+ The CPRMAN clock controller generates clocks in the audio power domain of the
+ BCM2835. There is a level of PLLs deriving from an external oscillator, a
+ level of PLL dividers that produce channels off of the few PLLs, and a level
+ of mostly-generic clock generators sourcing from the PLL channels. Most other
+ hardware components source from the clock generators, but a few (like the ARM
+ or HDMI) will source from the PLL dividers directly.
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2711-cprman
+ - brcm,bcm2835-cprman
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: External oscillator clock.
+ - description: DSI0 byte clock.
+ - description: DSI0 DDR2 clock.
+ - description: DSI0 DDR clock.
+ - description: DSI1 byte clock.
+ - description: DSI1 DDR2 clock.
+ - description: DSI1 DDR clock.
+
+additionalProperties: false
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+ - clocks
+
+examples:
+ - |
+ clock-controller@7e101000 {
+ compatible = "brcm,bcm2835-cprman";
+ reg = <0x7e101000 0x2000>;
+ #clock-cells = <1>;
+ clocks = <&clk_osc>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt b/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt
deleted file mode 100644
index 2ebb107331dd..000000000000
--- a/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Broadcom BCM53573 ILP clock
-===========================
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-This binding is used for ILP clock (sometimes referred as "slow clock")
-on Broadcom BCM53573 devices using Cortex-A7 CPU.
-
-ILP's rate has to be calculated on runtime and it depends on ALP clock
-which has to be referenced.
-
-This clock is part of PMU (Power Management Unit), a Broadcom's device
-handing power-related aspects. Its node must be sub-node of the PMU
-device.
-
-Required properties:
-- compatible: "brcm,bcm53573-ilp"
-- clocks: has to reference an ALP clock
-- #clock-cells: should be <0>
-- clock-output-names: from common clock bindings, should contain clock
- name
-
-Example:
-
-pmu@18012000 {
- compatible = "simple-mfd", "syscon";
- reg = <0x18012000 0x00001000>;
-
- ilp {
- compatible = "brcm,bcm53573-ilp";
- clocks = <&alp>;
- #clock-cells = <0>;
- clock-output-names = "ilp";
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.yaml
new file mode 100644
index 000000000000..cd291f428a8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm53573-ilp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM53573 ILP clock
+
+maintainers:
+ - Rafał Miłecki <rafal@milecki.pl>
+
+description: >
+ ILP clock (sometimes referred as "slow clock") on Broadcom BCM53573 devices
+ using Cortex-A7 CPU.
+
+ ILP's rate has to be calculated on runtime and it depends on ALP clock which
+ has to be referenced.
+
+ This clock is part of PMU (Power Management Unit), a Broadcom device handling
+ power-related aspects. Its node must be sub-node of the PMU device.
+
+properties:
+ compatible:
+ items:
+ - const: brcm,bcm53573-ilp
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ clock-output-names:
+ items:
+ - const: ilp
+
+additionalProperties: false
+
+examples:
+ - |
+ ilp {
+ compatible = "brcm,bcm53573-ilp";
+ clocks = <&alp>;
+ #clock-cells = <0>;
+ clock-output-names = "ilp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml
new file mode 100644
index 000000000000..cd0d763ce2f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm63268-timer-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM63268 Timer Clock and Reset
+
+maintainers:
+ - Álvaro Fernández Rojas <noltari@gmail.com>
+
+properties:
+ compatible:
+ const: brcm,bcm63268-timer-clocks
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ timer_clk: clock-controller@100000ac {
+ compatible = "brcm,bcm63268-timer-clocks";
+ reg = <0x100000ac 0x4>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.yaml
new file mode 100644
index 000000000000..56909ea499a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm63xx-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MIPS based BCM63XX SoCs Gated Clock Controller
+
+maintainers:
+ - Álvaro Fernández Rojas <noltari@gmail.com>
+ - Jonas Gorski <jonas.gorski@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm3368-clocks
+ - brcm,bcm6318-clocks
+ - brcm,bcm6318-ubus-clocks
+ - brcm,bcm6328-clocks
+ - brcm,bcm6358-clocks
+ - brcm,bcm6362-clocks
+ - brcm,bcm6368-clocks
+ - brcm,bcm63268-clocks
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@10000004 {
+ compatible = "brcm,bcm6328-clocks";
+ reg = <0x10000004 0x4>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt
deleted file mode 100644
index ab730ea0a560..000000000000
--- a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-Broadcom iProc Family Clocks
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The iProc clock controller manages clocks that are common to the iProc family.
-An SoC from the iProc family may have several PPLs, e.g., ARMPLL, GENPLL,
-LCPLL0, MIPIPLL, and etc., all derived from an onboard crystal. Each PLL
-comprises of several leaf clocks
-
-Required properties for a PLL and its leaf clocks:
-
-- compatible:
- Should have a value of the form "brcm,<soc>-<pll>". For example, GENPLL on
-Cygnus has a compatible string of "brcm,cygnus-genpll"
-
-- #clock-cells:
- Have a value of <1> since there are more than 1 leaf clock of a given PLL
-
-- reg:
- Define the base and range of the I/O address space that contain the iProc
-clock control registers required for the PLL
-
-- clocks:
- The input parent clock phandle for the PLL. For most iProc PLLs, this is an
-onboard crystal with a fixed rate
-
-- clock-output-names:
- An ordered list of strings defining the names of the clocks
-
-Example:
-
- osc: oscillator {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <25000000>;
- };
-
- genpll: genpll {
- #clock-cells = <1>;
- compatible = "brcm,cygnus-genpll";
- reg = <0x0301d000 0x2c>, <0x0301c020 0x4>;
- clocks = <&osc>;
- clock-output-names = "genpll", "axi21", "250mhz", "ihost_sys",
- "enet_sw", "audio_125", "can";
- };
-
-Required properties for ASIU clocks:
-
-ASIU clocks are a special case. These clocks are derived directly from the
-reference clock of the onboard crystal
-
-- compatible:
- Should have a value of the form "brcm,<soc>-asiu-clk". For example, ASIU
-clocks for Cygnus have a compatible string of "brcm,cygnus-asiu-clk"
-
-- #clock-cells:
- Have a value of <1> since there are more than 1 ASIU clocks
-
-- reg:
- Define the base and range of the I/O address space that contain the iProc
-clock control registers required for ASIU clocks
-
-- clocks:
- The input parent clock phandle for the ASIU clock, i.e., the onboard
-crystal
-
-- clock-output-names:
- An ordered list of strings defining the names of the ASIU clocks
-
-Example:
-
- osc: oscillator {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <25000000>;
- };
-
- asiu_clks: asiu_clks {
- #clock-cells = <1>;
- compatible = "brcm,cygnus-asiu-clk";
- reg = <0x0301d048 0xc>, <0x180aa024 0x4>;
- clocks = <&osc>;
- clock-output-names = "keypad", "adc/touch", "pwm";
- };
-
-Cygnus
-------
-PLL and leaf clock compatible strings for Cygnus are:
- "brcm,cygnus-armpll"
- "brcm,cygnus-genpll"
- "brcm,cygnus-lcpll0"
- "brcm,cygnus-mipipll"
- "brcm,cygnus-asiu-clk"
- "brcm,cygnus-audiopll"
-
-The following table defines the set of PLL/clock index and ID for Cygnus.
-These clock IDs are defined in:
- "include/dt-bindings/clock/bcm-cygnus.h"
-
- Clock Source (Parent) Index ID
- --- ----- ----- ---------
- crystal N/A N/A N/A
-
- armpll crystal N/A N/A
-
- keypad crystal (ASIU) 0 BCM_CYGNUS_ASIU_KEYPAD_CLK
- adc/tsc crystal (ASIU) 1 BCM_CYGNUS_ASIU_ADC_CLK
- pwm crystal (ASIU) 2 BCM_CYGNUS_ASIU_PWM_CLK
-
- genpll crystal 0 BCM_CYGNUS_GENPLL
- axi21 genpll 1 BCM_CYGNUS_GENPLL_AXI21_CLK
- 250mhz genpll 2 BCM_CYGNUS_GENPLL_250MHZ_CLK
- ihost_sys genpll 3 BCM_CYGNUS_GENPLL_IHOST_SYS_CLK
- enet_sw genpll 4 BCM_CYGNUS_GENPLL_ENET_SW_CLK
- audio_125 genpll 5 BCM_CYGNUS_GENPLL_AUDIO_125_CLK
- can genpll 6 BCM_CYGNUS_GENPLL_CAN_CLK
-
- lcpll0 crystal 0 BCM_CYGNUS_LCPLL0
- pcie_phy lcpll0 1 BCM_CYGNUS_LCPLL0_PCIE_PHY_REF_CLK
- ddr_phy lcpll0 2 BCM_CYGNUS_LCPLL0_DDR_PHY_CLK
- sdio lcpll0 3 BCM_CYGNUS_LCPLL0_SDIO_CLK
- usb_phy lcpll0 4 BCM_CYGNUS_LCPLL0_USB_PHY_REF_CLK
- smart_card lcpll0 5 BCM_CYGNUS_LCPLL0_SMART_CARD_CLK
- ch5_unused lcpll0 6 BCM_CYGNUS_LCPLL0_CH5_UNUSED
-
- mipipll crystal 0 BCM_CYGNUS_MIPIPLL
- ch0_unused mipipll 1 BCM_CYGNUS_MIPIPLL_CH0_UNUSED
- ch1_lcd mipipll 2 BCM_CYGNUS_MIPIPLL_CH1_LCD
- ch2_v3d mipipll 3 BCM_CYGNUS_MIPIPLL_CH2_V3D
- ch3_unused mipipll 4 BCM_CYGNUS_MIPIPLL_CH3_UNUSED
- ch4_unused mipipll 5 BCM_CYGNUS_MIPIPLL_CH4_UNUSED
- ch5_unused mipipll 6 BCM_CYGNUS_MIPIPLL_CH5_UNUSED
-
- audiopll crystal 0 BCM_CYGNUS_AUDIOPLL
- ch0_audio audiopll 1 BCM_CYGNUS_AUDIOPLL_CH0
- ch1_audio audiopll 2 BCM_CYGNUS_AUDIOPLL_CH1
- ch2_audio audiopll 3 BCM_CYGNUS_AUDIOPLL_CH2
-
-Hurricane 2
-------
-PLL and leaf clock compatible strings for Hurricane 2 are:
- "brcm,hr2-armpll"
-
-The following table defines the set of PLL/clock for Hurricane 2:
-
- Clock Source Index ID
- --- ----- ----- ---------
- crystal N/A N/A N/A
-
- armpll crystal N/A N/A
-
-
-Northstar and Northstar Plus
-------
-PLL and leaf clock compatible strings for Northstar and Northstar Plus are:
- "brcm,nsp-armpll"
- "brcm,nsp-genpll"
- "brcm,nsp-lcpll0"
-
-The following table defines the set of PLL/clock index and ID for Northstar and
-Northstar Plus. These clock IDs are defined in:
- "include/dt-bindings/clock/bcm-nsp.h"
-
- Clock Source Index ID
- --- ----- ----- ---------
- crystal N/A N/A N/A
-
- armpll crystal N/A N/A
-
- genpll crystal 0 BCM_NSP_GENPLL
- phy genpll 1 BCM_NSP_GENPLL_PHY_CLK
- ethernetclk genpll 2 BCM_NSP_GENPLL_ENET_SW_CLK
- usbclk genpll 3 BCM_NSP_GENPLL_USB_PHY_REF_CLK
- iprocfast genpll 4 BCM_NSP_GENPLL_IPROCFAST_CLK
- sata1 genpll 5 BCM_NSP_GENPLL_SATA1_CLK
- sata2 genpll 6 BCM_NSP_GENPLL_SATA2_CLK
-
- lcpll0 crystal 0 BCM_NSP_LCPLL0
- pcie_phy lcpll0 1 BCM_NSP_LCPLL0_PCIE_PHY_REF_CLK
- sdio lcpll0 2 BCM_NSP_LCPLL0_SDIO_CLK
- ddr_phy lcpll0 3 BCM_NSP_LCPLL0_DDR_PHY_CLK
-
-Northstar 2
------------
-PLL and leaf clock compatible strings for Northstar 2 are:
- "brcm,ns2-genpll-scr"
- "brcm,ns2-genpll-sw"
- "brcm,ns2-lcpll-ddr"
- "brcm,ns2-lcpll-ports"
-
-The following table defines the set of PLL/clock index and ID for Northstar 2.
-These clock IDs are defined in:
- "include/dt-bindings/clock/bcm-ns2.h"
-
- Clock Source Index ID
- --- ----- ----- ---------
- crystal N/A N/A N/A
-
- genpll_scr crystal 0 BCM_NS2_GENPLL_SCR
- scr genpll_scr 1 BCM_NS2_GENPLL_SCR_SCR_CLK
- fs genpll_scr 2 BCM_NS2_GENPLL_SCR_FS_CLK
- audio_ref genpll_scr 3 BCM_NS2_GENPLL_SCR_AUDIO_CLK
- ch3_unused genpll_scr 4 BCM_NS2_GENPLL_SCR_CH3_UNUSED
- ch4_unused genpll_scr 5 BCM_NS2_GENPLL_SCR_CH4_UNUSED
- ch5_unused genpll_scr 6 BCM_NS2_GENPLL_SCR_CH5_UNUSED
-
- genpll_sw crystal 0 BCM_NS2_GENPLL_SW
- rpe genpll_sw 1 BCM_NS2_GENPLL_SW_RPE_CLK
- 250 genpll_sw 2 BCM_NS2_GENPLL_SW_250_CLK
- nic genpll_sw 3 BCM_NS2_GENPLL_SW_NIC_CLK
- chimp genpll_sw 4 BCM_NS2_GENPLL_SW_CHIMP_CLK
- port genpll_sw 5 BCM_NS2_GENPLL_SW_PORT_CLK
- sdio genpll_sw 6 BCM_NS2_GENPLL_SW_SDIO_CLK
-
- lcpll_ddr crystal 0 BCM_NS2_LCPLL_DDR
- pcie_sata_usb lcpll_ddr 1 BCM_NS2_LCPLL_DDR_PCIE_SATA_USB_CLK
- ddr lcpll_ddr 2 BCM_NS2_LCPLL_DDR_DDR_CLK
- ch2_unused lcpll_ddr 3 BCM_NS2_LCPLL_DDR_CH2_UNUSED
- ch3_unused lcpll_ddr 4 BCM_NS2_LCPLL_DDR_CH3_UNUSED
- ch4_unused lcpll_ddr 5 BCM_NS2_LCPLL_DDR_CH4_UNUSED
- ch5_unused lcpll_ddr 6 BCM_NS2_LCPLL_DDR_CH5_UNUSED
-
- lcpll_ports crystal 0 BCM_NS2_LCPLL_PORTS
- wan lcpll_ports 1 BCM_NS2_LCPLL_PORTS_WAN_CLK
- rgmii lcpll_ports 2 BCM_NS2_LCPLL_PORTS_RGMII_CLK
- ch2_unused lcpll_ports 3 BCM_NS2_LCPLL_PORTS_CH2_UNUSED
- ch3_unused lcpll_ports 4 BCM_NS2_LCPLL_PORTS_CH3_UNUSED
- ch4_unused lcpll_ports 5 BCM_NS2_LCPLL_PORTS_CH4_UNUSED
- ch5_unused lcpll_ports 6 BCM_NS2_LCPLL_PORTS_CH5_UNUSED
-
-BCM63138
---------
-PLL and leaf clock compatible strings for BCM63138 are:
- "brcm,bcm63138-armpll"
-
-Stingray
------------
-PLL and leaf clock compatible strings for Stingray are:
- "brcm,sr-genpll0"
- "brcm,sr-genpll1"
- "brcm,sr-genpll2"
- "brcm,sr-genpll3"
- "brcm,sr-genpll4"
- "brcm,sr-genpll5"
- "brcm,sr-genpll6"
-
- "brcm,sr-lcpll0"
- "brcm,sr-lcpll1"
- "brcm,sr-lcpll-pcie"
-
-
-The following table defines the set of PLL/clock index and ID for Stingray.
-These clock IDs are defined in:
- "include/dt-bindings/clock/bcm-sr.h"
-
- Clock Source Index ID
- --- ----- ----- ---------
- crystal N/A N/A N/A
- crmu_ref25m crystal N/A N/A
-
- genpll0 crystal 0 BCM_SR_GENPLL0
- clk_125m genpll0 1 BCM_SR_GENPLL0_125M_CLK
- clk_scr genpll0 2 BCM_SR_GENPLL0_SCR_CLK
- clk_250 genpll0 3 BCM_SR_GENPLL0_250M_CLK
- clk_pcie_axi genpll0 4 BCM_SR_GENPLL0_PCIE_AXI_CLK
- clk_paxc_axi_x2 genpll0 5 BCM_SR_GENPLL0_PAXC_AXI_X2_CLK
- clk_paxc_axi genpll0 6 BCM_SR_GENPLL0_PAXC_AXI_CLK
-
- genpll1 crystal 0 BCM_SR_GENPLL1
- clk_pcie_tl genpll1 1 BCM_SR_GENPLL1_PCIE_TL_CLK
- clk_mhb_apb genpll1 2 BCM_SR_GENPLL1_MHB_APB_CLK
-
- genpll2 crystal 0 BCM_SR_GENPLL2
- clk_nic genpll2 1 BCM_SR_GENPLL2_NIC_CLK
- clk_ts_500_ref genpll2 2 BCM_SR_GENPLL2_TS_500_REF_CLK
- clk_125_nitro genpll2 3 BCM_SR_GENPLL2_125_NITRO_CLK
- clk_chimp genpll2 4 BCM_SR_GENPLL2_CHIMP_CLK
- clk_nic_flash genpll2 5 BCM_SR_GENPLL2_NIC_FLASH_CLK
- clk_fs genpll2 6 BCM_SR_GENPLL2_FS_CLK
-
- genpll3 crystal 0 BCM_SR_GENPLL3
- clk_hsls genpll3 1 BCM_SR_GENPLL3_HSLS_CLK
- clk_sdio genpll3 2 BCM_SR_GENPLL3_SDIO_CLK
-
- genpll4 crystal 0 BCM_SR_GENPLL4
- clk_ccn genpll4 1 BCM_SR_GENPLL4_CCN_CLK
- clk_tpiu_pll genpll4 2 BCM_SR_GENPLL4_TPIU_PLL_CLK
- clk_noc genpll4 3 BCM_SR_GENPLL4_NOC_CLK
- clk_chclk_fs4 genpll4 4 BCM_SR_GENPLL4_CHCLK_FS4_CLK
- clk_bridge_fscpu genpll4 5 BCM_SR_GENPLL4_BRIDGE_FSCPU_CLK
-
- genpll5 crystal 0 BCM_SR_GENPLL5
- clk_fs4_hf genpll5 1 BCM_SR_GENPLL5_FS4_HF_CLK
- clk_crypto_ae genpll5 2 BCM_SR_GENPLL5_CRYPTO_AE_CLK
- clk_raid_ae genpll5 3 BCM_SR_GENPLL5_RAID_AE_CLK
-
- genpll6 crystal 0 BCM_SR_GENPLL6
- clk_48_usb genpll6 1 BCM_SR_GENPLL6_48_USB_CLK
-
- lcpll0 crystal 0 BCM_SR_LCPLL0
- clk_sata_refp lcpll0 1 BCM_SR_LCPLL0_SATA_REFP_CLK
- clk_sata_refn lcpll0 2 BCM_SR_LCPLL0_SATA_REFN_CLK
- clk_sata_350 lcpll0 3 BCM_SR_LCPLL0_SATA_350_CLK
- clk_sata_500 lcpll0 4 BCM_SR_LCPLL0_SATA_500_CLK
-
- lcpll1 crystal 0 BCM_SR_LCPLL1
- clk_wan lcpll1 1 BCM_SR_LCPLL1_WAN_CLK
- clk_usb_ref lcpll1 2 BCM_SR_LCPLL1_USB_REF_CLK
- clk_crmu_ts lcpll1 3 BCM_SR_LCPLL1_CRMU_TS_CLK
-
- lcpll_pcie crystal 0 BCM_SR_LCPLL_PCIE
- clk_pcie_phy_ref lcpll1 1 BCM_SR_LCPLL_PCIE_PHY_REF_CLK
diff --git a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml
new file mode 100644
index 000000000000..5ad147d265e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml
@@ -0,0 +1,417 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,iproc-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom iProc Family Clocks
+
+maintainers:
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+description: |
+ The iProc clock controller manages clocks that are common to the iProc family.
+ An SoC from the iProc family may have several PLLs, e.g., ARMPLL, GENPLL,
+ LCPLL0, MIPIPLL, and etc., all derived from an onboard crystal. Each PLL
+ comprises of several leaf clocks
+
+ ASIU clocks are a special case. These clocks are derived directly from the
+ reference clock of the onboard crystal.
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm63138-armpll
+ - brcm,cygnus-armpll
+ - brcm,cygnus-genpll
+ - brcm,cygnus-lcpll0
+ - brcm,cygnus-mipipll
+ - brcm,cygnus-asiu-clk
+ - brcm,cygnus-audiopll
+ - brcm,hr2-armpll
+ - brcm,nsp-armpll
+ - brcm,nsp-genpll
+ - brcm,nsp-lcpll0
+ - brcm,ns2-genpll-scr
+ - brcm,ns2-genpll-sw
+ - brcm,ns2-lcpll-ddr
+ - brcm,ns2-lcpll-ports
+ - brcm,sr-genpll0
+ - brcm,sr-genpll1
+ - brcm,sr-genpll2
+ - brcm,sr-genpll3
+ - brcm,sr-genpll4
+ - brcm,sr-genpll5
+ - brcm,sr-genpll6
+ - brcm,sr-lcpll0
+ - brcm,sr-lcpll1
+ - brcm,sr-lcpll-pcie
+
+ reg:
+ minItems: 1
+ items:
+ - description: base register
+ - description: power register
+ - description: ASIU or split status register
+
+ clocks:
+ description: The input parent clock phandle for the PLL / ASIU clock. For
+ most iProc PLLs, this is an onboard crystal with a fixed rate.
+ maxItems: 1
+
+ '#clock-cells':
+ true
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 45
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,cygnus-armpll
+ - brcm,nsp-armpll
+ then:
+ properties:
+ '#clock-cells':
+ const: 0
+ else:
+ properties:
+ '#clock-cells':
+ const: 1
+ required:
+ - clock-output-names
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,cygnus-armpll
+ - brcm,cygnus-genpll
+ - brcm,cygnus-lcpll0
+ - brcm,cygnus-mipipll
+ - brcm,cygnus-asiu-clk
+ - brcm,cygnus-audiopll
+ then:
+ properties:
+ clock-output-names:
+ description: |
+ The following table defines the set of PLL/clock index and ID for Cygnus.
+ These clock IDs are defined in:
+ "include/dt-bindings/clock/bcm-cygnus.h"
+
+ Clock Source (Parent) Index ID
+ ----- --------------- ----- --
+ crystal N/A N/A N/A
+
+ armpll crystal N/A N/A
+
+ keypad crystal (ASIU) 0 BCM_CYGNUS_ASIU_KEYPAD_CLK
+ adc/tsc crystal (ASIU) 1 BCM_CYGNUS_ASIU_ADC_CLK
+ pwm crystal (ASIU) 2 BCM_CYGNUS_ASIU_PWM_CLK
+
+ genpll crystal 0 BCM_CYGNUS_GENPLL
+ axi21 genpll 1 BCM_CYGNUS_GENPLL_AXI21_CLK
+ 250mhz genpll 2 BCM_CYGNUS_GENPLL_250MHZ_CLK
+ ihost_sys genpll 3 BCM_CYGNUS_GENPLL_IHOST_SYS_CLK
+ enet_sw genpll 4 BCM_CYGNUS_GENPLL_ENET_SW_CLK
+ audio_125 genpll 5 BCM_CYGNUS_GENPLL_AUDIO_125_CLK
+ can genpll 6 BCM_CYGNUS_GENPLL_CAN_CLK
+
+ lcpll0 crystal 0 BCM_CYGNUS_LCPLL0
+ pcie_phy lcpll0 1 BCM_CYGNUS_LCPLL0_PCIE_PHY_REF_CLK
+ ddr_phy lcpll0 2 BCM_CYGNUS_LCPLL0_DDR_PHY_CLK
+ sdio lcpll0 3 BCM_CYGNUS_LCPLL0_SDIO_CLK
+ usb_phy lcpll0 4 BCM_CYGNUS_LCPLL0_USB_PHY_REF_CLK
+ smart_card lcpll0 5 BCM_CYGNUS_LCPLL0_SMART_CARD_CLK
+ ch5_unused lcpll0 6 BCM_CYGNUS_LCPLL0_CH5_UNUSED
+
+ mipipll crystal 0 BCM_CYGNUS_MIPIPLL
+ ch0_unused mipipll 1 BCM_CYGNUS_MIPIPLL_CH0_UNUSED
+ ch1_lcd mipipll 2 BCM_CYGNUS_MIPIPLL_CH1_LCD
+ ch2_v3d mipipll 3 BCM_CYGNUS_MIPIPLL_CH2_V3D
+ ch3_unused mipipll 4 BCM_CYGNUS_MIPIPLL_CH3_UNUSED
+ ch4_unused mipipll 5 BCM_CYGNUS_MIPIPLL_CH4_UNUSED
+ ch5_unused mipipll 6 BCM_CYGNUS_MIPIPLL_CH5_UNUSED
+
+ audiopll crystal 0 BCM_CYGNUS_AUDIOPLL
+ ch0_audio audiopll 1 BCM_CYGNUS_AUDIOPLL_CH0
+ ch1_audio audiopll 2 BCM_CYGNUS_AUDIOPLL_CH1
+ ch2_audio audiopll 3 BCM_CYGNUS_AUDIOPLL_CH2
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,hr2-armpll
+ then:
+ properties:
+ clock-output-names:
+ description: |
+ The following table defines the set of PLL/clock for Hurricane 2:
+
+ Clock Source Index ID
+ ----- ------ ----- --
+ crystal N/A N/A N/A
+
+ armpll crystal N/A N/A
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,nsp-armpll
+ - brcm,nsp-genpll
+ - brcm,nsp-lcpll0
+ then:
+ properties:
+ clock-output-names:
+ description: |
+ The following table defines the set of PLL/clock index and ID for Northstar and
+ Northstar Plus. These clock IDs are defined in:
+ "include/dt-bindings/clock/bcm-nsp.h"
+
+ Clock Source Index ID
+ ----- ------ ----- --
+ crystal N/A N/A N/A
+
+ armpll crystal N/A N/A
+
+ genpll crystal 0 BCM_NSP_GENPLL
+ phy genpll 1 BCM_NSP_GENPLL_PHY_CLK
+ ethernetclk genpll 2 BCM_NSP_GENPLL_ENET_SW_CLK
+ usbclk genpll 3 BCM_NSP_GENPLL_USB_PHY_REF_CLK
+ iprocfast genpll 4 BCM_NSP_GENPLL_IPROCFAST_CLK
+ sata1 genpll 5 BCM_NSP_GENPLL_SATA1_CLK
+ sata2 genpll 6 BCM_NSP_GENPLL_SATA2_CLK
+
+ lcpll0 crystal 0 BCM_NSP_LCPLL0
+ pcie_phy lcpll0 1 BCM_NSP_LCPLL0_PCIE_PHY_REF_CLK
+ sdio lcpll0 2 BCM_NSP_LCPLL0_SDIO_CLK
+ ddr_phy lcpll0 3 BCM_NSP_LCPLL0_DDR_PHY_CLK
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,ns2-genpll-scr
+ - brcm,ns2-genpll-sw
+ - brcm,ns2-lcpll-ddr
+ - brcm,ns2-lcpll-ports
+ then:
+ properties:
+ clock-output-names:
+ description: |
+ The following table defines the set of PLL/clock index and ID for Northstar 2.
+ These clock IDs are defined in:
+ "include/dt-bindings/clock/bcm-ns2.h"
+
+ Clock Source Index ID
+ ----- ------ ----- --
+ crystal N/A N/A N/A
+
+ genpll_scr crystal 0 BCM_NS2_GENPLL_SCR
+ scr genpll_scr 1 BCM_NS2_GENPLL_SCR_SCR_CLK
+ fs genpll_scr 2 BCM_NS2_GENPLL_SCR_FS_CLK
+ audio_ref genpll_scr 3 BCM_NS2_GENPLL_SCR_AUDIO_CLK
+ ch3_unused genpll_scr 4 BCM_NS2_GENPLL_SCR_CH3_UNUSED
+ ch4_unused genpll_scr 5 BCM_NS2_GENPLL_SCR_CH4_UNUSED
+ ch5_unused genpll_scr 6 BCM_NS2_GENPLL_SCR_CH5_UNUSED
+
+ genpll_sw crystal 0 BCM_NS2_GENPLL_SW
+ rpe genpll_sw 1 BCM_NS2_GENPLL_SW_RPE_CLK
+ 250 genpll_sw 2 BCM_NS2_GENPLL_SW_250_CLK
+ nic genpll_sw 3 BCM_NS2_GENPLL_SW_NIC_CLK
+ chimp genpll_sw 4 BCM_NS2_GENPLL_SW_CHIMP_CLK
+ port genpll_sw 5 BCM_NS2_GENPLL_SW_PORT_CLK
+ sdio genpll_sw 6 BCM_NS2_GENPLL_SW_SDIO_CLK
+
+ lcpll_ddr crystal 0 BCM_NS2_LCPLL_DDR
+ pcie_sata_usb lcpll_ddr 1 BCM_NS2_LCPLL_DDR_PCIE_SATA_USB_CLK
+ ddr lcpll_ddr 2 BCM_NS2_LCPLL_DDR_DDR_CLK
+ ch2_unused lcpll_ddr 3 BCM_NS2_LCPLL_DDR_CH2_UNUSED
+ ch3_unused lcpll_ddr 4 BCM_NS2_LCPLL_DDR_CH3_UNUSED
+ ch4_unused lcpll_ddr 5 BCM_NS2_LCPLL_DDR_CH4_UNUSED
+ ch5_unused lcpll_ddr 6 BCM_NS2_LCPLL_DDR_CH5_UNUSED
+
+ lcpll_ports crystal 0 BCM_NS2_LCPLL_PORTS
+ wan lcpll_ports 1 BCM_NS2_LCPLL_PORTS_WAN_CLK
+ rgmii lcpll_ports 2 BCM_NS2_LCPLL_PORTS_RGMII_CLK
+ ch2_unused lcpll_ports 3 BCM_NS2_LCPLL_PORTS_CH2_UNUSED
+ ch3_unused lcpll_ports 4 BCM_NS2_LCPLL_PORTS_CH3_UNUSED
+ ch4_unused lcpll_ports 5 BCM_NS2_LCPLL_PORTS_CH4_UNUSED
+ ch5_unused lcpll_ports 6 BCM_NS2_LCPLL_PORTS_CH5_UNUSED
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,sr-genpll0
+ - brcm,sr-genpll1
+ - brcm,sr-genpll2
+ - brcm,sr-genpll3
+ - brcm,sr-genpll4
+ - brcm,sr-genpll5
+ - brcm,sr-genpll6
+ - brcm,sr-lcpll0
+ - brcm,sr-lcpll1
+ - brcm,sr-lcpll-pcie
+ then:
+ properties:
+ clock-output-names:
+ description: |
+ The following table defines the set of PLL/clock index and ID for Stingray.
+ These clock IDs are defined in:
+ "include/dt-bindings/clock/bcm-sr.h"
+
+ Clock Source Index ID
+ ----- ------ ----- --
+ crystal N/A N/A N/A
+ crmu_ref25m crystal N/A N/A
+
+ genpll0 crystal 0 BCM_SR_GENPLL0
+ clk_125m genpll0 1 BCM_SR_GENPLL0_125M_CLK
+ clk_scr genpll0 2 BCM_SR_GENPLL0_SCR_CLK
+ clk_250 genpll0 3 BCM_SR_GENPLL0_250M_CLK
+ clk_pcie_axi genpll0 4 BCM_SR_GENPLL0_PCIE_AXI_CLK
+ clk_paxc_axi_x2 genpll0 5 BCM_SR_GENPLL0_PAXC_AXI_X2_CLK
+ clk_paxc_axi genpll0 6 BCM_SR_GENPLL0_PAXC_AXI_CLK
+
+ genpll1 crystal 0 BCM_SR_GENPLL1
+ clk_pcie_tl genpll1 1 BCM_SR_GENPLL1_PCIE_TL_CLK
+ clk_mhb_apb genpll1 2 BCM_SR_GENPLL1_MHB_APB_CLK
+
+ genpll2 crystal 0 BCM_SR_GENPLL2
+ clk_nic genpll2 1 BCM_SR_GENPLL2_NIC_CLK
+ clk_ts_500_ref genpll2 2 BCM_SR_GENPLL2_TS_500_REF_CLK
+ clk_125_nitro genpll2 3 BCM_SR_GENPLL2_125_NITRO_CLK
+ clk_chimp genpll2 4 BCM_SR_GENPLL2_CHIMP_CLK
+ clk_nic_flash genpll2 5 BCM_SR_GENPLL2_NIC_FLASH_CLK
+ clk_fs genpll2 6 BCM_SR_GENPLL2_FS_CLK
+
+ genpll3 crystal 0 BCM_SR_GENPLL3
+ clk_hsls genpll3 1 BCM_SR_GENPLL3_HSLS_CLK
+ clk_sdio genpll3 2 BCM_SR_GENPLL3_SDIO_CLK
+
+ genpll4 crystal 0 BCM_SR_GENPLL4
+ clk_ccn genpll4 1 BCM_SR_GENPLL4_CCN_CLK
+ clk_tpiu_pll genpll4 2 BCM_SR_GENPLL4_TPIU_PLL_CLK
+ clk_noc genpll4 3 BCM_SR_GENPLL4_NOC_CLK
+ clk_chclk_fs4 genpll4 4 BCM_SR_GENPLL4_CHCLK_FS4_CLK
+ clk_bridge_fscpu genpll4 5 BCM_SR_GENPLL4_BRIDGE_FSCPU_CLK
+
+ genpll5 crystal 0 BCM_SR_GENPLL5
+ clk_fs4_hf genpll5 1 BCM_SR_GENPLL5_FS4_HF_CLK
+ clk_crypto_ae genpll5 2 BCM_SR_GENPLL5_CRYPTO_AE_CLK
+ clk_raid_ae genpll5 3 BCM_SR_GENPLL5_RAID_AE_CLK
+
+ genpll6 crystal 0 BCM_SR_GENPLL6
+ clk_48_usb genpll6 1 BCM_SR_GENPLL6_48_USB_CLK
+
+ lcpll0 crystal 0 BCM_SR_LCPLL0
+ clk_sata_refp lcpll0 1 BCM_SR_LCPLL0_SATA_REFP_CLK
+ clk_sata_refn lcpll0 2 BCM_SR_LCPLL0_SATA_REFN_CLK
+ clk_sata_350 lcpll0 3 BCM_SR_LCPLL0_SATA_350_CLK
+ clk_sata_500 lcpll0 4 BCM_SR_LCPLL0_SATA_500_CLK
+
+ lcpll1 crystal 0 BCM_SR_LCPLL1
+ clk_wan lcpll1 1 BCM_SR_LCPLL1_WAN_CLK
+ clk_usb_ref lcpll1 2 BCM_SR_LCPLL1_USB_REF_CLK
+ clk_crmu_ts lcpll1 3 BCM_SR_LCPLL1_CRMU_TS_CLK
+
+ lcpll_pcie crystal 0 BCM_SR_LCPLL_PCIE
+ clk_pcie_phy_ref lcpll1 1 BCM_SR_LCPLL_PCIE_PHY_REF_CLK
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,cygnus-genpll
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: genpll
+ - const: axi21
+ - const: 250mhz
+ - const: ihost_sys
+ - const: enet_sw
+ - const: audio_125
+ - const: can
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,nsp-lcpll0
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: lcpll0
+ - const: pcie_phy
+ - const: sdio
+ - const: ddr_phy
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,nsp-genpll
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: genpll
+ - const: phy
+ - const: ethernetclk
+ - const: usbclk
+ - const: iprocfast
+ - const: sata1
+ - const: sata2
+
+required:
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ osc1: oscillator {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <25000000>;
+ };
+
+ genpll@301d000 {
+ #clock-cells = <1>;
+ compatible = "brcm,cygnus-genpll";
+ reg = <0x301d000 0x2c>, <0x301c020 0x4>;
+ clocks = <&os1c>;
+ clock-output-names = "genpll", "axi21", "250mhz", "ihost_sys",
+ "enet_sw", "audio_125", "can";
+ };
+ - |
+ osc2: oscillator {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <25000000>;
+ };
+
+ asiu_clks@301d048 {
+ #clock-cells = <1>;
+ compatible = "brcm,cygnus-asiu-clk";
+ reg = <0x301d048 0xc>, <0x180aa024 0x4>;
+ clocks = <&osc2>;
+ clock-output-names = "keypad", "adc/touch", "pwm";
+ };
+ - |
+ arm_clk@0 {
+ #clock-cells = <0>;
+ compatible = "brcm,nsp-armpll";
+ clocks = <&osc>;
+ reg = <0x0 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt b/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt
deleted file mode 100644
index 8e5a7d868557..000000000000
--- a/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-Broadcom Kona Family Clocks
-
-This binding is associated with Broadcom SoCs having "Kona" style
-clock control units (CCUs). A CCU is a clock provider that manages
-a set of clock signals. Each CCU is represented by a node in the
-device tree.
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible
- Shall have a value of the form "brcm,<model>-<which>-ccu",
- where <model> is a Broadcom SoC model number and <which> is
- the name of a defined CCU. For example:
- "brcm,bcm11351-root-ccu"
- The compatible strings used for each supported SoC family
- are defined below.
-- reg
- Shall define the base and range of the address space
- containing clock control registers
-- #clock-cells
- Shall have value <1>. The permitted clock-specifier values
- are defined below.
-- clock-output-names
- Shall be an ordered list of strings defining the names of
- the clocks provided by the CCU.
-
-Device tree example:
-
- slave_ccu: slave_ccu {
- compatible = "brcm,bcm11351-slave-ccu";
- reg = <0x3e011000 0x0f00>;
- #clock-cells = <1>;
- clock-output-names = "uartb",
- "uartb2",
- "uartb3",
- "uartb4";
- };
-
- ref_crystal_clk: ref_crystal {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <26000000>;
- };
-
- uart@3e002000 {
- compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
- reg = <0x3e002000 0x1000>;
- clocks = <&slave_ccu BCM281XX_SLAVE_CCU_UARTB3>;
- interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- };
-
-BCM281XX family
----------------
-CCU compatible string values for SoCs in the BCM281XX family are:
- "brcm,bcm11351-root-ccu"
- "brcm,bcm11351-aon-ccu"
- "brcm,bcm11351-hub-ccu"
- "brcm,bcm11351-master-ccu"
- "brcm,bcm11351-slave-ccu"
-
-The following table defines the set of CCUs and clock specifiers for
-BCM281XX family clocks. When a clock consumer references a clocks,
-its symbolic specifier (rather than its numeric index value) should
-be used. These specifiers are defined in:
- "include/dt-bindings/clock/bcm281xx.h"
-
- CCU Clock Type Index Specifier
- --- ----- ---- ----- ---------
- root frac_1m peri 0 BCM281XX_ROOT_CCU_FRAC_1M
-
- aon hub_timer peri 0 BCM281XX_AON_CCU_HUB_TIMER
- aon pmu_bsc peri 1 BCM281XX_AON_CCU_PMU_BSC
- aon pmu_bsc_var peri 2 BCM281XX_AON_CCU_PMU_BSC_VAR
-
- hub tmon_1m peri 0 BCM281XX_HUB_CCU_TMON_1M
-
- master sdio1 peri 0 BCM281XX_MASTER_CCU_SDIO1
- master sdio2 peri 1 BCM281XX_MASTER_CCU_SDIO2
- master sdio3 peri 2 BCM281XX_MASTER_CCU_SDIO3
- master sdio4 peri 3 BCM281XX_MASTER_CCU_SDIO4
- master dmac peri 4 BCM281XX_MASTER_CCU_DMAC
- master usb_ic peri 5 BCM281XX_MASTER_CCU_USB_IC
- master hsic2_48m peri 6 BCM281XX_MASTER_CCU_HSIC_48M
- master hsic2_12m peri 7 BCM281XX_MASTER_CCU_HSIC_12M
-
- slave uartb peri 0 BCM281XX_SLAVE_CCU_UARTB
- slave uartb2 peri 1 BCM281XX_SLAVE_CCU_UARTB2
- slave uartb3 peri 2 BCM281XX_SLAVE_CCU_UARTB3
- slave uartb4 peri 3 BCM281XX_SLAVE_CCU_UARTB4
- slave ssp0 peri 4 BCM281XX_SLAVE_CCU_SSP0
- slave ssp2 peri 5 BCM281XX_SLAVE_CCU_SSP2
- slave bsc1 peri 6 BCM281XX_SLAVE_CCU_BSC1
- slave bsc2 peri 7 BCM281XX_SLAVE_CCU_BSC2
- slave bsc3 peri 8 BCM281XX_SLAVE_CCU_BSC3
- slave pwm peri 9 BCM281XX_SLAVE_CCU_PWM
-
-
-BCM21664 family
----------------
-CCU compatible string values for SoCs in the BCM21664 family are:
- "brcm,bcm21664-root-ccu"
- "brcm,bcm21664-aon-ccu"
- "brcm,bcm21664-master-ccu"
- "brcm,bcm21664-slave-ccu"
-
-The following table defines the set of CCUs and clock specifiers for
-BCM21664 family clocks. When a clock consumer references a clocks,
-its symbolic specifier (rather than its numeric index value) should
-be used. These specifiers are defined in:
- "include/dt-bindings/clock/bcm21664.h"
-
- CCU Clock Type Index Specifier
- --- ----- ---- ----- ---------
- root frac_1m peri 0 BCM21664_ROOT_CCU_FRAC_1M
-
- aon hub_timer peri 0 BCM21664_AON_CCU_HUB_TIMER
-
- master sdio1 peri 0 BCM21664_MASTER_CCU_SDIO1
- master sdio2 peri 1 BCM21664_MASTER_CCU_SDIO2
- master sdio3 peri 2 BCM21664_MASTER_CCU_SDIO3
- master sdio4 peri 3 BCM21664_MASTER_CCU_SDIO4
- master sdio1_sleep peri 4 BCM21664_MASTER_CCU_SDIO1_SLEEP
- master sdio2_sleep peri 5 BCM21664_MASTER_CCU_SDIO2_SLEEP
- master sdio3_sleep peri 6 BCM21664_MASTER_CCU_SDIO3_SLEEP
- master sdio4_sleep peri 7 BCM21664_MASTER_CCU_SDIO4_SLEEP
-
- slave uartb peri 0 BCM21664_SLAVE_CCU_UARTB
- slave uartb2 peri 1 BCM21664_SLAVE_CCU_UARTB2
- slave uartb3 peri 2 BCM21664_SLAVE_CCU_UARTB3
- slave uartb4 peri 3 BCM21664_SLAVE_CCU_UARTB4
- slave bsc1 peri 4 BCM21664_SLAVE_CCU_BSC1
- slave bsc2 peri 5 BCM21664_SLAVE_CCU_BSC2
- slave bsc3 peri 6 BCM21664_SLAVE_CCU_BSC3
- slave bsc4 peri 7 BCM21664_SLAVE_CCU_BSC4
diff --git a/Documentation/devicetree/bindings/clock/brcm,kona-ccu.yaml b/Documentation/devicetree/bindings/clock/brcm,kona-ccu.yaml
new file mode 100644
index 000000000000..e5656950b3bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,kona-ccu.yaml
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,kona-ccu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family clock control units (CCU)
+
+maintainers:
+ - Florian Fainelli <florian.fainelli@broadcom.com>
+ - Ray Jui <rjui@broadcom.com>
+ - Scott Branden <sbranden@broadcom.com>
+
+description: |
+ Broadcom "Kona" style clock control unit (CCU) is a clock provider that
+ manages a set of clock signals.
+
+ All available clock IDs are defined in
+ - include/dt-bindings/clock/bcm281xx.h for BCM281XX family
+ - include/dt-bindings/clock/bcm21664.h for BCM21664 family
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm11351-aon-ccu
+ - brcm,bcm11351-hub-ccu
+ - brcm,bcm11351-master-ccu
+ - brcm,bcm11351-root-ccu
+ - brcm,bcm11351-slave-ccu
+ - brcm,bcm21664-aon-ccu
+ - brcm,bcm21664-master-ccu
+ - brcm,bcm21664-root-ccu
+ - brcm,bcm21664-slave-ccu
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 10
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clock-output-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm11351-aon-ccu
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: hub_timer
+ - const: pmu_bsc
+ - const: pmu_bsc_var
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm11351-hub-ccu
+ then:
+ properties:
+ clock-output-names:
+ const: tmon_1m
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm11351-master-ccu
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: sdio1
+ - const: sdio2
+ - const: sdio3
+ - const: sdio4
+ - const: usb_ic
+ - const: hsic2_48m
+ - const: hsic2_12m
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,bcm11351-root-ccu
+ - brcm,bcm21664-root-ccu
+ then:
+ properties:
+ clock-output-names:
+ const: frac_1m
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm11351-slave-ccu
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: uartb
+ - const: uartb2
+ - const: uartb3
+ - const: uartb4
+ - const: ssp0
+ - const: ssp2
+ - const: bsc1
+ - const: bsc2
+ - const: bsc3
+ - const: pwm
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm21664-aon-ccu
+ then:
+ properties:
+ clock-output-names:
+ const: hub_timer
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm21664-master-ccu
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: sdio1
+ - const: sdio2
+ - const: sdio3
+ - const: sdio4
+ - const: sdio1_sleep
+ - const: sdio2_sleep
+ - const: sdio3_sleep
+ - const: sdio4_sleep
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: brcm,bcm21664-slave-ccu
+ then:
+ properties:
+ clock-output-names:
+ items:
+ - const: uartb
+ - const: uartb2
+ - const: uartb3
+ - const: bsc1
+ - const: bsc2
+ - const: bsc3
+ - const: bsc4
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@3e011000 {
+ compatible = "brcm,bcm11351-slave-ccu";
+ reg = <0x3e011000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "uartb",
+ "uartb2",
+ "uartb3",
+ "uartb4",
+ "ssp0",
+ "ssp2",
+ "bsc1",
+ "bsc2",
+ "bsc3",
+ "pwm";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/calxeda.txt b/Documentation/devicetree/bindings/clock/calxeda.txt
deleted file mode 100644
index 0a6ac1bdcda1..000000000000
--- a/Documentation/devicetree/bindings/clock/calxeda.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Device Tree Clock bindings for Calxeda highbank platform
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "calxeda,hb-pll-clock" - for a PLL clock
- "calxeda,hb-a9periph-clock" - The A9 peripheral clock divided from the
- A9 clock.
- "calxeda,hb-a9bus-clock" - The A9 bus clock divided from the A9 clock.
- "calxeda,hb-emmc-clock" - Divided clock for MMC/SD controller.
-- reg : shall be the control register offset from SYSREGs base for the clock.
-- clocks : shall be the input parent clock phandle for the clock. This is
- either an oscillator or a pll output.
-- #clock-cells : from common clock binding; shall be set to 0.
diff --git a/Documentation/devicetree/bindings/clock/calxeda.yaml b/Documentation/devicetree/bindings/clock/calxeda.yaml
new file mode 100644
index 000000000000..a88fbe20fef1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/calxeda.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/calxeda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Calxeda highbank platform Clock Controller
+
+description: |
+ This binding covers the Calxeda SoC internal peripheral and bus clocks
+ as used by peripherals. The clocks live inside the "system register"
+ region of the SoC, so are typically presented as children of an
+ "hb-sregs" node.
+
+maintainers:
+ - Andre Przywara <andre.przywara@arm.com>
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - calxeda,hb-pll-clock
+ - calxeda,hb-a9periph-clock
+ - calxeda,hb-a9bus-clock
+ - calxeda,hb-emmc-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - clocks
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ sregs@3fffc000 {
+ compatible = "calxeda,hb-sregs";
+ reg = <0x3fffc000 0x1000>;
+
+ clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: oscillator {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <33333000>;
+ };
+
+ ddrpll: ddrpll@108 {
+ #clock-cells = <0>;
+ compatible = "calxeda,hb-pll-clock";
+ clocks = <&osc>;
+ reg = <0x108>;
+ };
+
+ a9pll: a9pll@100 {
+ #clock-cells = <0>;
+ compatible = "calxeda,hb-pll-clock";
+ clocks = <&osc>;
+ reg = <0x100>;
+ };
+
+ a9periphclk: a9periphclk@104 {
+ #clock-cells = <0>;
+ compatible = "calxeda,hb-a9periph-clock";
+ clocks = <&a9pll>;
+ reg = <0x104>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
new file mode 100644
index 000000000000..380cb6d80025
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/canaan,k210-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 Clock
+
+maintainers:
+ - Damien Le Moal <dlemoal@kernel.org>
+
+description: |
+ Canaan Kendryte K210 SoC clocks driver bindings. The clock
+ controller node must be defined as a child node of the K210
+ system controller node.
+
+ See also:
+ - dt-bindings/clock/k210-clk.h
+
+properties:
+ compatible:
+ const: canaan,k210-clk
+
+ clocks:
+ maxItems: 1
+ description:
+ Phandle of the SoC 26MHz fixed-rate oscillator clock.
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/k210-clk.h>
+ clocks {
+ in0: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ };
+ };
+
+ /* ... */
+ sysclk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "canaan,k210-clk";
+ clocks = <&in0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml
new file mode 100644
index 000000000000..d416c374e853
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/cirrus,cs2000-cp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier
+
+maintainers:
+ - Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+
+description: |
+ The CS2000-CP is an extremely versatile system clocking device that
+ utilizes a programmable phase lock loop.
+
+ Link: https://www.cirrus.com/products/cs2000/
+
+properties:
+ compatible:
+ enum:
+ - cirrus,cs2000-cp
+
+ clocks:
+ description:
+ Common clock binding for CLK_IN, XTI/REF_CLK
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: clk_in
+ - const: ref_clk
+
+ '#clock-cells':
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ cirrus,aux-output-source:
+ description:
+ Specifies the function of the auxiliary clock output pin
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0 # CS2000CP_AUX_OUTPUT_REF_CLK: ref_clk input
+ - 1 # CS2000CP_AUX_OUTPUT_CLK_IN: clk_in input
+ - 2 # CS2000CP_AUX_OUTPUT_CLK_OUT: clk_out output
+ - 3 # CS2000CP_AUX_OUTPUT_PLL_LOCK: pll lock status
+ default: 0
+
+ cirrus,clock-skip:
+ description:
+ This mode allows the PLL to maintain lock even when CLK_IN
+ has missing pulses for up to 20 ms.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ cirrus,dynamic-mode:
+ description:
+ In dynamic mode, the CLK_IN input is used to drive the
+ digital PLL of the silicon.
+ If not given, the static mode shall be used to derive the
+ output signal directly from the REF_CLK input.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/cirrus,cs2000-cp.h>
+
+ i2c@0 {
+ reg = <0x0 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@4f {
+ #clock-cells = <0>;
+ compatible = "cirrus,cs2000-cp";
+ reg = <0x4f>;
+ clocks = <&rcar_sound 0>, <&x12_clk>;
+ clock-names = "clk_in", "ref_clk";
+ cirrus,aux-output-source = <CS2000CP_AUX_OUTPUT_CLK_OUT>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/cirrus,ep7209-clk.yaml b/Documentation/devicetree/bindings/clock/cirrus,ep7209-clk.yaml
new file mode 100644
index 000000000000..fbd0d50d46a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/cirrus,ep7209-clk.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/cirrus,ep7209-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic CLPS711X Clock Controller
+
+maintainers:
+ - Alexander Shiyan <shc_work@mail.ru>
+
+description:
+ See include/dt-bindings/clock/clps711x-clock.h for the full list of CLPS711X
+ clock IDs.
+
+properties:
+ compatible:
+ items:
+ - const: cirrus,ep7312-clk
+ - const: cirrus,ep7209-clk
+
+ reg:
+ maxItems: 1
+
+ startup-frequency:
+ description: Factory set CPU startup frequency in HZ.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - startup-frequency
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@80000000 {
+ compatible = "cirrus,ep7312-clk", "cirrus,ep7209-clk";
+ reg = <0x80000000 0xc000>;
+ #clock-cells = <1>;
+ startup-frequency = <73728000>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/clock/cirrus,lochnagar.yaml
new file mode 100644
index 000000000000..ccff74eda9fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/cirrus,lochnagar.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/cirrus,lochnagar.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic Lochnagar Audio Development Board
+
+maintainers:
+ - patches@opensource.cirrus.com
+
+description: |
+ Lochnagar is an evaluation and development board for Cirrus Logic
+ Smart CODEC and Amp devices. It allows the connection of most Cirrus
+ Logic devices on mini-cards, as well as allowing connection of various
+ application processor systems to provide a full evaluation platform.
+ Audio system topology, clocking and power can all be controlled through
+ the Lochnagar, allowing the device under test to be used in a variety of
+ possible use cases.
+
+ This binding document describes the binding for the clock portion of the
+ driver.
+
+ Also see these documents for generic binding information:
+ [1] Clock : ../clock/clock-bindings.txt
+
+ And these for relevant defines:
+ [2] include/dt-bindings/clock/lochnagar.h
+
+ This binding must be part of the Lochnagar MFD binding:
+ [3] ../mfd/cirrus,lochnagar.yaml
+
+properties:
+ compatible:
+ enum:
+ - cirrus,lochnagar1-clk
+ - cirrus,lochnagar2-clk
+
+ '#clock-cells':
+ description:
+ The first cell indicates the clock number, see [2] for available
+ clocks and [1].
+ const: 1
+
+ clock-names:
+ items:
+ enum:
+ - ln-cdc-clkout # Output clock from CODEC card.
+ - ln-dsp-clkout # Output clock from DSP card.
+ - ln-gf-mclk1 # Optional input clock from host system.
+ - ln-gf-mclk2 # Optional input clock from host system.
+ - ln-gf-mclk3 # Optional input clock from host system.
+ - ln-gf-mclk4 # Optional input clock from host system.
+ - ln-psia1-mclk # Optional input clock from external connector.
+ - ln-psia2-mclk # Optional input clock from external connector.
+ - ln-spdif-mclk # Optional input clock from SPDIF.
+ - ln-spdif-clkout # Optional input clock from SPDIF.
+ - ln-adat-mclk # Optional input clock from ADAT.
+ - ln-pmic-32k # On board fixed clock.
+ - ln-clk-12m # On board fixed clock.
+ - ln-clk-11m # On board fixed clock.
+ - ln-clk-24m # On board fixed clock.
+ - ln-clk-22m # On board fixed clock.
+ - ln-clk-8m # On board fixed clock.
+ - ln-usb-clk-24m # On board fixed clock.
+ - ln-usb-clk-12m # On board fixed clock.
+ minItems: 1
+ maxItems: 19
+
+ clocks:
+ minItems: 1
+ maxItems: 19
+
+additionalProperties: false
+
+required:
+ - compatible
+ - '#clock-cells'
diff --git a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
deleted file mode 100644
index 6030afb10b5c..000000000000
--- a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-* Samsung Audio Subsystem Clock Controller
-
-The Samsung Audio Subsystem clock controller generates and supplies clocks
-to Audio Subsystem block available in the S5PV210 and Exynos SoCs. The clock
-binding described here is applicable to all SoCs in Exynos family.
-
-Required Properties:
-
-- compatible: should be one of the following:
- - "samsung,exynos4210-audss-clock" - controller compatible with all Exynos4 SoCs.
- - "samsung,exynos5250-audss-clock" - controller compatible with Exynos5250
- SoCs.
- - "samsung,exynos5410-audss-clock" - controller compatible with Exynos5410
- SoCs.
- - "samsung,exynos5420-audss-clock" - controller compatible with Exynos5420
- SoCs.
-- reg: physical base address and length of the controller's register set.
-
-- #clock-cells: should be 1.
-
-- clocks:
- - pll_ref: Fixed rate PLL reference clock, parent of mout_audss. "fin_pll"
- is used if not specified.
- - pll_in: Input PLL to the AudioSS block, parent of mout_audss. "fout_epll"
- is used if not specified.
- - cdclk: External i2s clock, parent of mout_i2s. "cdclk0" is used if not
- specified.
- - sclk_audio: Audio bus clock, parent of mout_i2s. "sclk_audio0" is used if
- not specified.
- - sclk_pcm_in: PCM clock, parent of sclk_pcm. "sclk_pcm0" is used if not
- specified.
-
-- clock-names: Aliases for the above clocks. They should be "pll_ref",
- "pll_in", "cdclk", "sclk_audio", and "sclk_pcm_in" respectively.
-
-Optional Properties:
-
- - power-domains: a phandle to respective power domain node as described by
- generic PM domain bindings (see power/power_domain.txt for more
- information).
-
-The following is the list of clocks generated by the controller. Each clock is
-assigned an identifier and client nodes use this identifier to specify the
-clock which they consume. Some of the clocks are available only on a particular
-Exynos4 SoC and this is specified where applicable.
-
-Provided clocks:
-
-Clock ID SoC (if specific)
------------------------------------------------
-
-mout_audss 0
-mout_i2s 1
-dout_srp 2
-dout_aud_bus 3
-dout_i2s 4
-srp_clk 5
-i2s_bus 6
-sclk_i2s 7
-pcm_bus 8
-sclk_pcm 9
-adma 10 Exynos5420
-
-Example 1: An example of a clock controller node using the default input
- clock names is listed below.
-
-clock_audss: audss-clock-controller@3810000 {
- compatible = "samsung,exynos5250-audss-clock";
- reg = <0x03810000 0x0C>;
- #clock-cells = <1>;
-};
-
-Example 2: An example of a clock controller node with the input clocks
- specified.
-
-clock_audss: audss-clock-controller@3810000 {
- compatible = "samsung,exynos5250-audss-clock";
- reg = <0x03810000 0x0C>;
- #clock-cells = <1>;
- clocks = <&clock 1>, <&clock 7>, <&clock 138>, <&clock 160>,
- <&ext_i2s_clk>;
- clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in", "cdclk";
-};
-
-Example 3: I2S controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
-i2s0: i2s@3830000 {
- compatible = "samsung,i2s-v5";
- reg = <0x03830000 0x100>;
- dmas = <&pdma0 10
- &pdma0 9
- &pdma0 8>;
- dma-names = "tx", "rx", "tx-sec";
- clocks = <&clock_audss EXYNOS_I2S_BUS>,
- <&clock_audss EXYNOS_I2S_BUS>,
- <&clock_audss EXYNOS_SCLK_I2S>,
- <&clock_audss EXYNOS_MOUT_AUDSS>,
- <&clock_audss EXYNOS_MOUT_I2S>;
- clock-names = "iis", "i2s_opclk0", "i2s_opclk1",
- "mout_audss", "mout_i2s";
-};
diff --git a/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt b/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
deleted file mode 100644
index f6272dcd96f4..000000000000
--- a/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Samsung Audio Subsystem Clock Controller
-
-The Samsung Audio Subsystem clock controller generates and supplies clocks
-to Audio Subsystem block available in the S5PV210 and compatible SoCs.
-
-Required Properties:
-
-- compatible: should be "samsung,s5pv210-audss-clock".
-- reg: physical base address and length of the controller's register set.
-
-- #clock-cells: should be 1.
-
-- clocks:
- - hclk: AHB bus clock of the Audio Subsystem.
- - xxti: Optional fixed rate PLL reference clock, parent of mout_audss. If
- not specified (i.e. xusbxti is used for PLL reference), it is fixed to
- a clock named "xxti".
- - fout_epll: Input PLL to the AudioSS block, parent of mout_audss.
- - iiscdclk0: Optional external i2s clock, parent of mout_i2s. If not
- specified, it is fixed to a clock named "iiscdclk0".
- - sclk_audio0: Audio bus clock, parent of mout_i2s.
-
-- clock-names: Aliases for the above clocks. They should be "hclk",
- "xxti", "fout_epll", "iiscdclk0", and "sclk_audio0" respectively.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s5pv210-audss-clk.h header and can be used in device
-tree sources.
-
-Example: Clock controller node.
-
- clk_audss: clock-controller@c0900000 {
- compatible = "samsung,s5pv210-audss-clock";
- reg = <0xc0900000 0x1000>;
- #clock-cells = <1>;
- clock-names = "hclk", "xxti",
- "fout_epll", "sclk_audio0";
- clocks = <&clocks DOUT_HCLKP>, <&xxti>,
- <&clocks FOUT_EPLL>, <&clocks SCLK_AUDIO0>;
- };
-
-Example: I2S controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- i2s0: i2s@3830000 {
- /* ... */
- clock-names = "iis", "i2s_opclk0",
- "i2s_opclk1";
- clocks = <&clk_audss CLK_I2S>, <&clk_audss CLK_I2S>,
- <&clk_audss CLK_DOUT_AUD_BUS>;
- /* ... */
- };
diff --git a/Documentation/devicetree/bindings/clock/clock-bindings.txt b/Documentation/devicetree/bindings/clock/clock-bindings.txt
index 2ec489eebe72..6fe541368889 100644
--- a/Documentation/devicetree/bindings/clock/clock-bindings.txt
+++ b/Documentation/devicetree/bindings/clock/clock-bindings.txt
@@ -1,170 +1,2 @@
-This binding is a work-in-progress, and are based on some experimental
-work by benh[1].
-
-Sources of clock signal can be represented by any node in the device
-tree. Those nodes are designated as clock providers. Clock consumer
-nodes use a phandle and clock specifier pair to connect clock provider
-outputs to clock inputs. Similar to the gpio specifiers, a clock
-specifier is an array of zero, one or more cells identifying the clock
-output on a device. The length of a clock specifier is defined by the
-value of a #clock-cells property in the clock provider node.
-
-[1] http://patchwork.ozlabs.org/patch/31551/
-
-==Clock providers==
-
-Required properties:
-#clock-cells: Number of cells in a clock specifier; Typically 0 for nodes
- with a single clock output and 1 for nodes with multiple
- clock outputs.
-
-Optional properties:
-clock-output-names: Recommended to be a list of strings of clock output signal
- names indexed by the first cell in the clock specifier.
- However, the meaning of clock-output-names is domain
- specific to the clock provider, and is only provided to
- encourage using the same meaning for the majority of clock
- providers. This format may not work for clock providers
- using a complex clock specifier format. In those cases it
- is recommended to omit this property and create a binding
- specific names property.
-
- Clock consumer nodes must never directly reference
- the provider's clock-output-names property.
-
-For example:
-
- oscillator {
- #clock-cells = <1>;
- clock-output-names = "ckil", "ckih";
- };
-
-- this node defines a device with two clock outputs, the first named
- "ckil" and the second named "ckih". Consumer nodes always reference
- clocks by index. The names should reflect the clock output signal
- names for the device.
-
-clock-indices: If the identifying number for the clocks in the node
- is not linear from zero, then this allows the mapping of
- identifiers into the clock-output-names array.
-
-For example, if we have two clocks <&oscillator 1> and <&oscillator 3>:
-
- oscillator {
- compatible = "myclocktype";
- #clock-cells = <1>;
- clock-indices = <1>, <3>;
- clock-output-names = "clka", "clkb";
- }
-
- This ensures we do not have any empty strings in clock-output-names
-
-
-==Clock consumers==
-
-Required properties:
-clocks: List of phandle and clock specifier pairs, one pair
- for each clock input to the device. Note: if the
- clock provider specifies '0' for #clock-cells, then
- only the phandle portion of the pair will appear.
-
-Optional properties:
-clock-names: List of clock input name strings sorted in the same
- order as the clocks property. Consumers drivers
- will use clock-names to match clock input names
- with clocks specifiers.
-clock-ranges: Empty property indicating that child nodes can inherit named
- clocks from this node. Useful for bus nodes to provide a
- clock to their children.
-
-For example:
-
- device {
- clocks = <&osc 1>, <&ref 0>;
- clock-names = "baud", "register";
- };
-
-
-This represents a device with two clock inputs, named "baud" and "register".
-The baud clock is connected to output 1 of the &osc device, and the register
-clock is connected to output 0 of the &ref.
-
-==Example==
-
- /* external oscillator */
- osc: oscillator {
- compatible = "fixed-clock";
- #clock-cells = <1>;
- clock-frequency = <32678>;
- clock-output-names = "osc";
- };
-
- /* phase-locked-loop device, generates a higher frequency clock
- * from the external oscillator reference */
- pll: pll@4c000 {
- compatible = "vendor,some-pll-interface"
- #clock-cells = <1>;
- clocks = <&osc 0>;
- clock-names = "ref";
- reg = <0x4c000 0x1000>;
- clock-output-names = "pll", "pll-switched";
- };
-
- /* UART, using the low frequency oscillator for the baud clock,
- * and the high frequency switched PLL output for register
- * clocking */
- uart@a000 {
- compatible = "fsl,imx-uart";
- reg = <0xa000 0x1000>;
- interrupts = <33>;
- clocks = <&osc 0>, <&pll 1>;
- clock-names = "baud", "register";
- };
-
-This DT fragment defines three devices: an external oscillator to provide a
-low-frequency reference clock, a PLL device to generate a higher frequency
-clock signal, and a UART.
-
-* The oscillator is fixed-frequency, and provides one clock output, named "osc".
-* The PLL is both a clock provider and a clock consumer. It uses the clock
- signal generated by the external oscillator, and provides two output signals
- ("pll" and "pll-switched").
-* The UART has its baud clock connected the external oscillator and its
- register clock connected to the PLL clock (the "pll-switched" signal)
-
-==Assigned clock parents and rates==
-
-Some platforms may require initial configuration of default parent clocks
-and clock frequencies. Such a configuration can be specified in a device tree
-node through assigned-clocks, assigned-clock-parents and assigned-clock-rates
-properties. The assigned-clock-parents property should contain a list of parent
-clocks in the form of a phandle and clock specifier pair and the
-assigned-clock-rates property should contain a list of frequencies in Hz. Both
-these properties should correspond to the clocks listed in the assigned-clocks
-property.
-
-To skip setting parent or rate of a clock its corresponding entry should be
-set to 0, or can be omitted if it is not followed by any non-zero entry.
-
- uart@a000 {
- compatible = "fsl,imx-uart";
- reg = <0xa000 0x1000>;
- ...
- clocks = <&osc 0>, <&pll 1>;
- clock-names = "baud", "register";
-
- assigned-clocks = <&clkcon 0>, <&pll 2>;
- assigned-clock-parents = <&pll 2>;
- assigned-clock-rates = <0>, <460800>;
- };
-
-In this example the <&pll 2> clock is set as parent of clock <&clkcon 0> and
-the <&pll 2> clock is assigned a frequency value of 460800 Hz.
-
-Configuring a clock's parent and rate through the device node that consumes
-the clock can be done only for clocks that have a single user. Specifying
-conflicting parent or rate configuration in multiple consumer nodes for
-a shared clock is forbidden.
-
-Configuration of common clocks, which affect multiple consumer devices can
-be similarly specified in the clock provider node.
+This file has moved to the clock binding schema:
+https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/clock/clock.yaml
diff --git a/Documentation/devicetree/bindings/clock/clps711x-clock.txt b/Documentation/devicetree/bindings/clock/clps711x-clock.txt
deleted file mode 100644
index f1bd53f79d91..000000000000
--- a/Documentation/devicetree/bindings/clock/clps711x-clock.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* Clock bindings for the Cirrus Logic CLPS711X CPUs
-
-Required properties:
-- compatible : Shall contain "cirrus,ep7209-clk".
-- reg : Address of the internal register set.
-- startup-frequency: Factory set CPU startup frequency in HZ.
-- #clock-cells : Should be <1>.
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/clps711x-clock.h
-for the full list of CLPS711X clock IDs.
-
-Example:
- clks: clks@80000000 {
- #clock-cells = <1>;
- compatible = "cirrus,ep7312-clk", "cirrus,ep7209-clk";
- reg = <0x80000000 0xc000>;
- startup-frequency = <73728000>;
- };
diff --git a/Documentation/devicetree/bindings/clock/cs2000-cp.txt b/Documentation/devicetree/bindings/clock/cs2000-cp.txt
deleted file mode 100644
index 54e6df0bee8a..000000000000
--- a/Documentation/devicetree/bindings/clock/cs2000-cp.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier
-
-Required properties:
-
-- compatible: "cirrus,cs2000-cp"
-- reg: The chip select number on the I2C bus
-- clocks: common clock binding for CLK_IN, XTI/REF_CLK
-- clock-names: CLK_IN : clk_in, XTI/REF_CLK : ref_clk
-- #clock-cells: must be <0>
-
-Example:
-
-&i2c2 {
- ...
- cs2000: clk_multiplier@4f {
- #clock-cells = <0>;
- compatible = "cirrus,cs2000-cp";
- reg = <0x4f>;
- clocks = <&rcar_sound 0>, <&x12_clk>;
- clock-names = "clk_in", "ref_clk";
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/csr,atlas7-car.txt b/Documentation/devicetree/bindings/clock/csr,atlas7-car.txt
deleted file mode 100644
index 54d6d1358339..000000000000
--- a/Documentation/devicetree/bindings/clock/csr,atlas7-car.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* Clock and reset bindings for CSR atlas7
-
-Required properties:
-- compatible: Should be "sirf,atlas7-car"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-- #reset-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell.
-The ID list atlas7_clks defined in drivers/clk/sirf/clk-atlas7.c
-
-The reset consumer should specify the desired reset by having the reset
-ID in its "reset" phandle cell.
-The ID list atlas7_reset_unit defined in drivers/clk/sirf/clk-atlas7.c
-
-Examples: Clock and reset controller node:
-
-car: clock-controller@18620000 {
- compatible = "sirf,atlas7-car";
- reg = <0x18620000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
-
-Examples: Consumers using clock or reset:
-
-timer@10dc0000 {
- compatible = "sirf,macro-tick";
- reg = <0x10dc0000 0x1000>;
- clocks = <&car 54>;
- interrupts = <0 0 0>,
- <0 1 0>,
- <0 2 0>,
- <0 49 0>,
- <0 50 0>,
- <0 51 0>;
-};
-
-uart1: uart@18020000 {
- cell-index = <1>;
- compatible = "sirf,macro-uart";
- reg = <0x18020000 0x1000>;
- clocks = <&clks 95>;
- interrupts = <0 18 0>;
- fifosize = <32>;
-};
-
-vpp@13110000 {
- compatible = "sirf,prima2-vpp";
- reg = <0x13110000 0x10000>;
- interrupts = <0 31 0>;
- clocks = <&car 85>;
- resets = <&car 29>;
-};
diff --git a/Documentation/devicetree/bindings/clock/dove-divider-clock.txt b/Documentation/devicetree/bindings/clock/dove-divider-clock.txt
deleted file mode 100644
index 217871f483c0..000000000000
--- a/Documentation/devicetree/bindings/clock/dove-divider-clock.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-PLL divider based Dove clocks
-
-Marvell Dove has a 2GHz PLL, which feeds into a set of dividers to provide
-high speed clocks for a number of peripherals. These dividers are part of
-the PMU, and thus this node should be a child of the PMU node.
-
-The following clocks are provided:
-
-ID Clock
--------------
-0 AXI bus clock
-1 GPU clock
-2 VMeta clock
-3 LCD clock
-
-Required properties:
-- compatible : shall be "marvell,dove-divider-clock"
-- reg : shall be the register address of the Core PLL and Clock Divider
- Control 0 register. This will cover that register, as well as the
- Core PLL and Clock Divider Control 1 register. Thus, it will have
- a size of 8.
-- #clock-cells : from common clock binding; shall be set to 1
-
-divider_clk: core-clock@64 {
- compatible = "marvell,dove-divider-clock";
- reg = <0x0064 0x8>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/efm32-clock.txt b/Documentation/devicetree/bindings/clock/efm32-clock.txt
deleted file mode 100644
index 263d293f6a10..000000000000
--- a/Documentation/devicetree/bindings/clock/efm32-clock.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-* Clock bindings for Energy Micro efm32 Giant Gecko's Clock Management Unit
-
-Required properties:
-- compatible: Should be "efm32gg,cmu"
-- reg: Base address and length of the register set
-- interrupts: Interrupt used by the CMU
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock ID in
-its "clocks" phandle cell. The header efm32-clk.h contains a list of available
-IDs.
diff --git a/Documentation/devicetree/bindings/clock/emev2-clock.txt b/Documentation/devicetree/bindings/clock/emev2-clock.txt
deleted file mode 100644
index 268ca615459e..000000000000
--- a/Documentation/devicetree/bindings/clock/emev2-clock.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-Device tree Clock bindings for Renesas EMMA Mobile EV2
-
-This binding uses the common clock binding.
-
-* SMU
-System Management Unit described in user's manual R19UH0037EJ1000_SMU.
-This is not a clock provider, but clocks under SMU depend on it.
-
-Required properties:
-- compatible: Should be "renesas,emev2-smu"
-- reg: Address and Size of SMU registers
-
-* SMU_CLKDIV
-Function block with an input mux and a divider, which corresponds to
-"Serial clock generator" in fig."Clock System Overview" of the manual,
-and "xxx frequency division setting register" (XXXCLKDIV) registers.
-This makes internal (neither input nor output) clock that is provided
-to input of xxxGCLK block.
-
-Required properties:
-- compatible: Should be "renesas,emev2-smu-clkdiv"
-- reg: Byte offset from SMU base and Bit position in the register
-- clocks: Parent clocks. Input clocks as described in clock-bindings.txt
-- #clock-cells: Should be <0>
-
-* SMU_GCLK
-Clock gating node shown as "Clock stop processing block" in the
-fig."Clock System Overview" of the manual.
-Registers are "xxx clock gate control register" (XXXGCLKCTRL).
-
-Required properties:
-- compatible: Should be "renesas,emev2-smu-gclk"
-- reg: Byte offset from SMU base and Bit position in the register
-- clocks: Input clock as described in clock-bindings.txt
-- #clock-cells: Should be <0>
-
-Example of provider:
-
-usia_u0_sclkdiv: usia_u0_sclkdiv {
- compatible = "renesas,emev2-smu-clkdiv";
- reg = <0x610 0>;
- clocks = <&pll3_fo>, <&pll4_fo>, <&pll1_fo>, <&osc1_fo>;
- #clock-cells = <0>;
-};
-
-usia_u0_sclk: usia_u0_sclk {
- compatible = "renesas,emev2-smu-gclk";
- reg = <0x4a0 1>;
- clocks = <&usia_u0_sclkdiv>;
- #clock-cells = <0>;
-};
-
-Example of consumer:
-
-serial@e1020000 {
- compatible = "renesas,em-uart";
- reg = <0xe1020000 0x38>;
- interrupts = <0 8 0>;
- clocks = <&usia_u0_sclk>;
- clock-names = "sclk";
-};
-
-Example of clock-tree description:
-
- This describes a clock path in the clock tree
- c32ki -> pll3_fo -> usia_u0_sclkdiv -> usia_u0_sclk
-
-smu@e0110000 {
- compatible = "renesas,emev2-smu";
- reg = <0xe0110000 0x10000>;
- #address-cells = <2>;
- #size-cells = <0>;
-
- c32ki: c32ki {
- compatible = "fixed-clock";
- clock-frequency = <32768>;
- #clock-cells = <0>;
- };
- pll3_fo: pll3_fo {
- compatible = "fixed-factor-clock";
- clocks = <&c32ki>;
- clock-div = <1>;
- clock-mult = <7000>;
- #clock-cells = <0>;
- };
- usia_u0_sclkdiv: usia_u0_sclkdiv {
- compatible = "renesas,emev2-smu-clkdiv";
- reg = <0x610 0>;
- clocks = <&pll3_fo>;
- #clock-cells = <0>;
- };
- usia_u0_sclk: usia_u0_sclk {
- compatible = "renesas,emev2-smu-gclk";
- reg = <0x4a0 1>;
- clocks = <&usia_u0_sclkdiv>;
- #clock-cells = <0>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/exynos3250-clock.txt b/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
deleted file mode 100644
index 7441ed519f02..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-* Samsung Exynos3250 Clock Controller
-
-The Exynos3250 clock controller generates and supplies clock to various
-controllers within the Exynos3250 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,exynos3250-cmu" - controller compatible with Exynos3250 SoC.
- - "samsung,exynos3250-cmu-dmc" - controller compatible with
- Exynos3250 SoC for Dynamic Memory Controller domain.
- - "samsung,exynos3250-cmu-isp" - ISP block clock controller compatible
- with Exynos3250 SOC
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos3250.h header and can be used in device
-tree sources.
-
-Example 1: Examples of clock controller nodes are listed below.
-
- cmu: clock-controller@10030000 {
- compatible = "samsung,exynos3250-cmu";
- reg = <0x10030000 0x20000>;
- #clock-cells = <1>;
- };
-
- cmu_dmc: clock-controller@105c0000 {
- compatible = "samsung,exynos3250-cmu-dmc";
- reg = <0x105C0000 0x2000>;
- #clock-cells = <1>;
- };
-
- cmu_isp: clock-controller@10048000 {
- compatible = "samsung,exynos3250-cmu-isp";
- reg = <0x10048000 0x1000>;
- #clock-cells = <1>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- serial@13800000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x13800000 0x100>;
- interrupts = <0 109 0>;
- clocks = <&cmu CLK_UART0>, <&cmu CLK_SCLK_UART0>;
- clock-names = "uart", "clk_uart_baud0";
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
deleted file mode 100644
index 17bb11365354..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos4-clock.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-* Samsung Exynos4 Clock Controller
-
-The Exynos4 clock controller generates and supplies clock to various controllers
-within the Exynos4 SoC. The clock binding described here is applicable to all
-SoC's in the Exynos4 family.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,exynos4210-clock" - controller compatible with Exynos4210 SoC.
- - "samsung,exynos4412-clock" - controller compatible with Exynos4412 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos4.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
- clock: clock-controller@10030000 {
- compatible = "samsung,exynos4210-clock";
- reg = <0x10030000 0x20000>;
- #clock-cells = <1>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- serial@13820000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x13820000 0x100>;
- interrupts = <0 54 0>;
- clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
- clock-names = "uart", "clk_uart_baud0";
- };
-
-Exynos4412 SoC contains some additional clocks for FIMC-ISP (Camera ISP)
-subsystem. Registers for those clocks are located in the ISP power domain.
-Because those registers are also located in a different memory region than
-the main clock controller, a separate clock controller has to be defined for
-handling them.
-
-Required Properties:
-
-- compatible: should be "samsung,exynos4412-isp-clock".
-
-- reg: physical base address of the ISP clock controller and length of memory
- mapped region.
-
-- #clock-cells: should be 1.
-
-- clocks: list of the clock controller input clock identifiers,
- from common clock bindings, should point to CLK_ACLK200 and
- CLK_ACLK400_MCUISP clocks from the main clock controller.
-
-- clock-names: list of the clock controller input clock names,
- as described in clock-bindings.txt, should be "aclk200" and
- "aclk400_mcuisp".
-
-- power-domains: a phandle to ISP power domain node as described by
- generic PM domain bindings.
-
-Example 3: The clock controllers bindings for Exynos4412 SoCs.
-
- clock: clock-controller@10030000 {
- compatible = "samsung,exynos4412-clock";
- reg = <0x10030000 0x18000>;
- #clock-cells = <1>;
- };
-
- isp_clock: clock-controller@10048000 {
- compatible = "samsung,exynos4412-isp-clock";
- reg = <0x10048000 0x1000>;
- #clock-cells = <1>;
- power-domains = <&pd_isp>;
- clocks = <&clock CLK_ACLK200>, <&clock CLK_ACLK400_MCUISP>;
- clock-names = "aclk200", "aclk400_mcuisp";
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
deleted file mode 100644
index aff266a12eeb..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-* Samsung Exynos5250 Clock Controller
-
-The Exynos5250 clock controller generates and supplies clock to various
-controllers within the Exynos5250 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,exynos5250-clock" - controller compatible with Exynos5250 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5250.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
- clock: clock-controller@10010000 {
- compatible = "samsung,exynos5250-clock";
- reg = <0x10010000 0x30000>;
- #clock-cells = <1>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- serial@13820000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x13820000 0x100>;
- interrupts = <0 54 0>;
- clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
- clock-names = "uart", "clk_uart_baud0";
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos5260-clock.txt b/Documentation/devicetree/bindings/clock/exynos5260-clock.txt
deleted file mode 100644
index c79d31f7f66e..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5260-clock.txt
+++ /dev/null
@@ -1,190 +0,0 @@
-* Samsung Exynos5260 Clock Controller
-
-Exynos5260 has 13 clock controllers which are instantiated
-independently from the device-tree. These clock controllers
-generate and supply clocks to various hardware blocks within
-the SoC.
-
-Each clock is assigned an identifier and client nodes can use
-this identifier to specify the clock which they consume. All
-available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5260-clk.h header and can be used in
-device tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It
-is expected that they are defined using standard clock bindings
-with following clock-output-names:
-
- - "fin_pll" - PLL input clock from XXTI
- - "xrtcxti" - input clock from XRTCXTI
- - "ioclk_pcm_extclk" - pcm external operation clock
- - "ioclk_spdif_extclk" - spdif external operation clock
- - "ioclk_i2s_cdclk" - i2s0 codec clock
-
-Phy clocks:
-
-There are several clocks which are generated by specific PHYs.
-These clocks are fed into the clock controller and then routed to
-the hardware blocks. These clocks are defined as fixed clocks in the
-driver with following names:
-
- - "phyclk_dptx_phy_ch3_txd_clk" - dp phy clock for channel 3
- - "phyclk_dptx_phy_ch2_txd_clk" - dp phy clock for channel 2
- - "phyclk_dptx_phy_ch1_txd_clk" - dp phy clock for channel 1
- - "phyclk_dptx_phy_ch0_txd_clk" - dp phy clock for channel 0
- - "phyclk_hdmi_phy_tmds_clko" - hdmi phy tmds clock
- - "phyclk_hdmi_phy_pixel_clko" - hdmi phy pixel clock
- - "phyclk_hdmi_link_o_tmds_clkhi" - hdmi phy for hdmi link
- - "phyclk_dptx_phy_o_ref_clk_24m" - dp phy reference clock
- - "phyclk_dptx_phy_clk_div2"
- - "phyclk_mipi_dphy_4l_m_rxclkesc0"
- - "phyclk_usbhost20_phy_phyclock" - usb 2.0 phy clock
- - "phyclk_usbhost20_phy_freeclk"
- - "phyclk_usbhost20_phy_clk48mohci"
- - "phyclk_usbdrd30_udrd30_pipe_pclk"
- - "phyclk_usbdrd30_udrd30_phyclock" - usb 3.0 phy clock
-
-Required Properties for Clock Controller:
-
- - compatible: should be one of the following.
- 1) "samsung,exynos5260-clock-top"
- 2) "samsung,exynos5260-clock-peri"
- 3) "samsung,exynos5260-clock-egl"
- 4) "samsung,exynos5260-clock-kfc"
- 5) "samsung,exynos5260-clock-g2d"
- 6) "samsung,exynos5260-clock-mif"
- 7) "samsung,exynos5260-clock-mfc"
- 8) "samsung,exynos5260-clock-g3d"
- 9) "samsung,exynos5260-clock-fsys"
- 10) "samsung,exynos5260-clock-aud"
- 11) "samsung,exynos5260-clock-isp"
- 12) "samsung,exynos5260-clock-gscl"
- 13) "samsung,exynos5260-clock-disp"
-
- - reg: physical base address of the controller and the length of
- memory mapped region.
-
- - #clock-cells: should be 1.
-
- - clocks: list of clock identifiers which are fed as the input to
- the given clock controller. Please refer the next section to find
- the input clocks for a given controller.
-
- - clock-names: list of names of clocks which are fed as the input
- to the given clock controller.
-
-Input clocks for top clock controller:
- - fin_pll
- - dout_mem_pll
- - dout_bus_pll
- - dout_media_pll
-
-Input clocks for peri clock controller:
- - fin_pll
- - ioclk_pcm_extclk
- - ioclk_i2s_cdclk
- - ioclk_spdif_extclk
- - phyclk_hdmi_phy_ref_cko
- - dout_aclk_peri_66
- - dout_sclk_peri_uart0
- - dout_sclk_peri_uart1
- - dout_sclk_peri_uart2
- - dout_sclk_peri_spi0_b
- - dout_sclk_peri_spi1_b
- - dout_sclk_peri_spi2_b
- - dout_aclk_peri_aud
- - dout_sclk_peri_spi0_b
-
-Input clocks for egl clock controller:
- - fin_pll
- - dout_bus_pll
-
-Input clocks for kfc clock controller:
- - fin_pll
- - dout_media_pll
-
-Input clocks for g2d clock controller:
- - fin_pll
- - dout_aclk_g2d_333
-
-Input clocks for mif clock controller:
- - fin_pll
-
-Input clocks for mfc clock controller:
- - fin_pll
- - dout_aclk_mfc_333
-
-Input clocks for g3d clock controller:
- - fin_pll
-
-Input clocks for fsys clock controller:
- - fin_pll
- - phyclk_usbhost20_phy_phyclock
- - phyclk_usbhost20_phy_freeclk
- - phyclk_usbhost20_phy_clk48mohci
- - phyclk_usbdrd30_udrd30_pipe_pclk
- - phyclk_usbdrd30_udrd30_phyclock
- - dout_aclk_fsys_200
-
-Input clocks for aud clock controller:
- - fin_pll
- - fout_aud_pll
- - ioclk_i2s_cdclk
- - ioclk_pcm_extclk
-
-Input clocks for isp clock controller:
- - fin_pll
- - dout_aclk_isp1_266
- - dout_aclk_isp1_400
- - mout_aclk_isp1_266
-
-Input clocks for gscl clock controller:
- - fin_pll
- - dout_aclk_gscl_400
- - dout_aclk_gscl_333
-
-Input clocks for disp clock controller:
- - fin_pll
- - phyclk_dptx_phy_ch3_txd_clk
- - phyclk_dptx_phy_ch2_txd_clk
- - phyclk_dptx_phy_ch1_txd_clk
- - phyclk_dptx_phy_ch0_txd_clk
- - phyclk_hdmi_phy_tmds_clko
- - phyclk_hdmi_phy_ref_clko
- - phyclk_hdmi_phy_pixel_clko
- - phyclk_hdmi_link_o_tmds_clkhi
- - phyclk_mipi_dphy_4l_m_txbyte_clkhs
- - phyclk_dptx_phy_o_ref_clk_24m
- - phyclk_dptx_phy_clk_div2
- - phyclk_mipi_dphy_4l_m_rxclkesc0
- - phyclk_hdmi_phy_ref_cko
- - ioclk_spdif_extclk
- - dout_aclk_peri_aud
- - dout_aclk_disp_222
- - dout_sclk_disp_pixel
- - dout_aclk_disp_333
-
-Example 1: An example of a clock controller node is listed below.
-
- clock_mfc: clock-controller@11090000 {
- compatible = "samsung,exynos5260-clock-mfc";
- clock = <&fin_pll>, <&clock_top TOP_DOUT_ACLK_MFC_333>;
- clock-names = "fin_pll", "dout_aclk_mfc_333";
- reg = <0x11090000 0x10000>;
- #clock-cells = <1>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the
- peri clock controller. Refer to the standard clock bindings for
- information about 'clocks' and 'clock-names' property.
-
- serial@12c00000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x12C00000 0x100>;
- interrupts = <0 146 0>;
- clocks = <&clock_peri PERI_PCLK_UART0>, <&clock_peri PERI_SCLK_UART0>;
- clock-names = "uart", "clk_uart_baud0";
- };
-
diff --git a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
deleted file mode 100644
index 217beb27c30e..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-* Samsung Exynos5410 Clock Controller
-
-The Exynos5410 clock controller generates and supplies clock to various
-controllers within the Exynos5410 SoC.
-
-Required Properties:
-
-- compatible: should be "samsung,exynos5410-clock"
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-- clocks: should contain an entry specifying the root clock from external
- oscillator supplied through XXTI or XusbXTI pin. This clock should be
- defined using standard clock bindings with "fin_pll" clock-output-name.
- That clock is being passed internally to the 9 PLLs.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5410.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
- fin_pll: xxti {
- compatible = "fixed-clock";
- clock-frequency = <24000000>;
- clock-output-names = "fin_pll";
- #clock-cells = <0>;
- };
-
- clock: clock-controller@10010000 {
- compatible = "samsung,exynos5410-clock";
- reg = <0x10010000 0x30000>;
- #clock-cells = <1>;
- clocks = <&fin_pll>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- serial@12c20000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x12C00000 0x100>;
- interrupts = <0 51 0>;
- clocks = <&clock CLK_UART0>, <&clock CLK_SCLK_UART0>;
- clock-names = "uart", "clk_uart_baud0";
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
deleted file mode 100644
index 717a7b1531c7..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-* Samsung Exynos5420 Clock Controller
-
-The Exynos5420 clock controller generates and supplies clock to various
-controllers within the Exynos5420 SoC and for the Exynos5800 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,exynos5420-clock" - controller compatible with Exynos5420 SoC.
- - "samsung,exynos5800-clock" - controller compatible with Exynos5800 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5420.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
- clock: clock-controller@10010000 {
- compatible = "samsung,exynos5420-clock";
- reg = <0x10010000 0x30000>;
- #clock-cells = <1>;
- };
-
-Example 2: UART controller node that consumes the clock generated by the clock
- controller. Refer to the standard clock bindings for information
- about 'clocks' and 'clock-names' property.
-
- serial@13820000 {
- compatible = "samsung,exynos4210-uart";
- reg = <0x13820000 0x100>;
- interrupts = <0 54 0>;
- clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
- clock-names = "uart", "clk_uart_baud0";
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos5433-clock.txt b/Documentation/devicetree/bindings/clock/exynos5433-clock.txt
deleted file mode 100644
index 50d5897c9849..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5433-clock.txt
+++ /dev/null
@@ -1,484 +0,0 @@
-* Samsung Exynos5433 CMU (Clock Management Units)
-
-The Exynos5433 clock controller generates and supplies clock to various
-controllers within the Exynos5433 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,exynos5433-cmu-top" - clock controller compatible for CMU_TOP
- which generates clocks for IMEM/FSYS/G3D/GSCL/HEVC/MSCL/G2D/MFC/PERIC/PERIS
- domains and bus clocks.
- - "samsung,exynos5433-cmu-cpif" - clock controller compatible for CMU_CPIF
- which generates clocks for LLI (Low Latency Interface) IP.
- - "samsung,exynos5433-cmu-mif" - clock controller compatible for CMU_MIF
- which generates clocks for DRAM Memory Controller domain.
- - "samsung,exynos5433-cmu-peric" - clock controller compatible for CMU_PERIC
- which generates clocks for UART/I2C/SPI/I2S/PCM/SPDIF/PWM/SLIMBUS IPs.
- - "samsung,exynos5433-cmu-peris" - clock controller compatible for CMU_PERIS
- which generates clocks for PMU/TMU/MCT/WDT/RTC/SECKEY/TZPC IPs.
- - "samsung,exynos5433-cmu-fsys" - clock controller compatible for CMU_FSYS
- which generates clocks for USB/UFS/SDMMC/TSI/PDMA IPs.
- - "samsung,exynos5433-cmu-g2d" - clock controller compatible for CMU_G2D
- which generates clocks for G2D/MDMA IPs.
- - "samsung,exynos5433-cmu-disp" - clock controller compatible for CMU_DISP
- which generates clocks for Display (DECON/HDMI/DSIM/MIXER) IPs.
- - "samsung,exynos5433-cmu-aud" - clock controller compatible for CMU_AUD
- which generates clocks for Cortex-A5/BUS/AUDIO clocks.
- - "samsung,exynos5433-cmu-bus0", "samsung,exynos5433-cmu-bus1"
- and "samsung,exynos5433-cmu-bus2" - clock controller compatible for CMU_BUS
- which generates global data buses clock and global peripheral buses clock.
- - "samsung,exynos5433-cmu-g3d" - clock controller compatible for CMU_G3D
- which generates clocks for 3D Graphics Engine IP.
- - "samsung,exynos5433-cmu-gscl" - clock controller compatible for CMU_GSCL
- which generates clocks for GSCALER IPs.
- - "samsung,exynos5433-cmu-apollo"- clock controller compatible for CMU_APOLLO
- which generates clocks for Cortex-A53 Quad-core processor.
- - "samsung,exynos5433-cmu-atlas" - clock controller compatible for CMU_ATLAS
- which generates clocks for Cortex-A57 Quad-core processor, CoreSight and
- L2 cache controller.
- - "samsung,exynos5433-cmu-mscl" - clock controller compatible for CMU_MSCL
- which generates clocks for M2M (Memory to Memory) scaler and JPEG IPs.
- - "samsung,exynos5433-cmu-mfc" - clock controller compatible for CMU_MFC
- which generates clocks for MFC(Multi-Format Codec) IP.
- - "samsung,exynos5433-cmu-hevc" - clock controller compatible for CMU_HEVC
- which generates clocks for HEVC(High Efficiency Video Codec) decoder IP.
- - "samsung,exynos5433-cmu-isp" - clock controller compatible for CMU_ISP
- which generates clocks for FIMC-ISP/DRC/SCLC/DIS/3DNR IPs.
- - "samsung,exynos5433-cmu-cam0" - clock controller compatible for CMU_CAM0
- which generates clocks for MIPI_CSIS{0|1}/FIMC_LITE_{A|B|D}/FIMC_3AA{0|1}
- IPs.
- - "samsung,exynos5433-cmu-cam1" - clock controller compatible for CMU_CAM1
- which generates clocks for Cortex-A5/MIPI_CSIS2/FIMC-LITE_C/FIMC-FD IPs.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-- clocks: list of the clock controller input clock identifiers,
- from common clock bindings. Please refer the next section
- to find the input clocks for a given controller.
-
-- clock-names: list of the clock controller input clock names,
- as described in clock-bindings.txt.
-
- Input clocks for top clock controller:
- - oscclk
- - sclk_mphy_pll
- - sclk_mfc_pll
- - sclk_bus_pll
-
- Input clocks for cpif clock controller:
- - oscclk
-
- Input clocks for mif clock controller:
- - oscclk
- - sclk_mphy_pll
-
- Input clocks for fsys clock controller:
- - oscclk
- - sclk_ufs_mphy
- - aclk_fsys_200
- - sclk_pcie_100_fsys
- - sclk_ufsunipro_fsys
- - sclk_mmc2_fsys
- - sclk_mmc1_fsys
- - sclk_mmc0_fsys
- - sclk_usbhost30_fsys
- - sclk_usbdrd30_fsys
-
- Input clocks for g2d clock controller:
- - oscclk
- - aclk_g2d_266
- - aclk_g2d_400
-
- Input clocks for disp clock controller:
- - oscclk
- - sclk_dsim1_disp
- - sclk_dsim0_disp
- - sclk_dsd_disp
- - sclk_decon_tv_eclk_disp
- - sclk_decon_vclk_disp
- - sclk_decon_eclk_disp
- - sclk_decon_tv_vclk_disp
- - aclk_disp_333
-
- Input clocks for audio clock controller:
- - oscclk
- - fout_aud_pll
-
- Input clocks for bus0 clock controller:
- - aclk_bus0_400
-
- Input clocks for bus1 clock controller:
- - aclk_bus1_400
-
- Input clocks for bus2 clock controller:
- - oscclk
- - aclk_bus2_400
-
- Input clocks for g3d clock controller:
- - oscclk
- - aclk_g3d_400
-
- Input clocks for gscl clock controller:
- - oscclk
- - aclk_gscl_111
- - aclk_gscl_333
-
- Input clocks for apollo clock controller:
- - oscclk
- - sclk_bus_pll_apollo
-
- Input clocks for atlas clock controller:
- - oscclk
- - sclk_bus_pll_atlas
-
- Input clocks for mscl clock controller:
- - oscclk
- - sclk_jpeg_mscl
- - aclk_mscl_400
-
- Input clocks for mfc clock controller:
- - oscclk
- - aclk_mfc_400
-
- Input clocks for hevc clock controller:
- - oscclk
- - aclk_hevc_400
-
- Input clocks for isp clock controller:
- - oscclk
- - aclk_isp_dis_400
- - aclk_isp_400
-
- Input clocks for cam0 clock controller:
- - oscclk
- - aclk_cam0_333
- - aclk_cam0_400
- - aclk_cam0_552
-
- Input clocks for cam1 clock controller:
- - oscclk
- - sclk_isp_uart_cam1
- - sclk_isp_spi1_cam1
- - sclk_isp_spi0_cam1
- - aclk_cam1_333
- - aclk_cam1_400
- - aclk_cam1_552
-
-Optional properties:
- - power-domains: a phandle to respective power domain node as described by
- generic PM domain bindings (see power/power_domain.txt for more
- information).
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5433.h header and can be used in device
-tree sources.
-
-Example 1: Examples of 'oscclk' source clock node are listed below.
-
- xxti: xxti {
- compatible = "fixed-clock";
- clock-output-names = "oscclk";
- #clock-cells = <0>;
- };
-
-Example 2: Examples of clock controller nodes are listed below.
-
- cmu_top: clock-controller@10030000 {
- compatible = "samsung,exynos5433-cmu-top";
- reg = <0x10030000 0x0c04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_mphy_pll",
- "sclk_mfc_pll",
- "sclk_bus_pll";
- clocks = <&xxti>,
- <&cmu_cpif CLK_SCLK_MPHY_PLL>,
- <&cmu_mif CLK_SCLK_MFC_PLL>,
- <&cmu_mif CLK_SCLK_BUS_PLL>;
- };
-
- cmu_cpif: clock-controller@10fc0000 {
- compatible = "samsung,exynos5433-cmu-cpif";
- reg = <0x10fc0000 0x0c04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk";
- clocks = <&xxti>;
- };
-
- cmu_mif: clock-controller@105b0000 {
- compatible = "samsung,exynos5433-cmu-mif";
- reg = <0x105b0000 0x100c>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_mphy_pll";
- clocks = <&xxti>,
- <&cmu_cpif CLK_SCLK_MPHY_PLL>;
- };
-
- cmu_peric: clock-controller@14c80000 {
- compatible = "samsung,exynos5433-cmu-peric";
- reg = <0x14c80000 0x0b08>;
- #clock-cells = <1>;
- };
-
- cmu_peris: clock-controller@10040000 {
- compatible = "samsung,exynos5433-cmu-peris";
- reg = <0x10040000 0x0b20>;
- #clock-cells = <1>;
- };
-
- cmu_fsys: clock-controller@156e0000 {
- compatible = "samsung,exynos5433-cmu-fsys";
- reg = <0x156e0000 0x0b04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_ufs_mphy",
- "aclk_fsys_200",
- "sclk_pcie_100_fsys",
- "sclk_ufsunipro_fsys",
- "sclk_mmc2_fsys",
- "sclk_mmc1_fsys",
- "sclk_mmc0_fsys",
- "sclk_usbhost30_fsys",
- "sclk_usbdrd30_fsys";
- clocks = <&xxti>,
- <&cmu_cpif CLK_SCLK_UFS_MPHY>,
- <&cmu_top CLK_ACLK_FSYS_200>,
- <&cmu_top CLK_SCLK_PCIE_100_FSYS>,
- <&cmu_top CLK_SCLK_UFSUNIPRO_FSYS>,
- <&cmu_top CLK_SCLK_MMC2_FSYS>,
- <&cmu_top CLK_SCLK_MMC1_FSYS>,
- <&cmu_top CLK_SCLK_MMC0_FSYS>,
- <&cmu_top CLK_SCLK_USBHOST30_FSYS>,
- <&cmu_top CLK_SCLK_USBDRD30_FSYS>;
- };
-
- cmu_g2d: clock-controller@12460000 {
- compatible = "samsung,exynos5433-cmu-g2d";
- reg = <0x12460000 0x0b08>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "aclk_g2d_266",
- "aclk_g2d_400";
- clocks = <&xxti>,
- <&cmu_top CLK_ACLK_G2D_266>,
- <&cmu_top CLK_ACLK_G2D_400>;
- power-domains = <&pd_g2d>;
- };
-
- cmu_disp: clock-controller@13b90000 {
- compatible = "samsung,exynos5433-cmu-disp";
- reg = <0x13b90000 0x0c04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_dsim1_disp",
- "sclk_dsim0_disp",
- "sclk_dsd_disp",
- "sclk_decon_tv_eclk_disp",
- "sclk_decon_vclk_disp",
- "sclk_decon_eclk_disp",
- "sclk_decon_tv_vclk_disp",
- "aclk_disp_333";
- clocks = <&xxti>,
- <&cmu_mif CLK_SCLK_DSIM1_DISP>,
- <&cmu_mif CLK_SCLK_DSIM0_DISP>,
- <&cmu_mif CLK_SCLK_DSD_DISP>,
- <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
- <&cmu_mif CLK_SCLK_DECON_VCLK_DISP>,
- <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
- <&cmu_mif CLK_SCLK_DECON_TV_VCLK_DISP>,
- <&cmu_mif CLK_ACLK_DISP_333>;
- power-domains = <&pd_disp>;
- };
-
- cmu_aud: clock-controller@114c0000 {
- compatible = "samsung,exynos5433-cmu-aud";
- reg = <0x114c0000 0x0b04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "fout_aud_pll";
- clocks = <&xxti>, <&cmu_top CLK_FOUT_AUD_PLL>;
- power-domains = <&pd_aud>;
- };
-
- cmu_bus0: clock-controller@13600000 {
- compatible = "samsung,exynos5433-cmu-bus0";
- reg = <0x13600000 0x0b04>;
- #clock-cells = <1>;
-
- clock-names = "aclk_bus0_400";
- clocks = <&cmu_top CLK_ACLK_BUS0_400>;
- };
-
- cmu_bus1: clock-controller@14800000 {
- compatible = "samsung,exynos5433-cmu-bus1";
- reg = <0x14800000 0x0b04>;
- #clock-cells = <1>;
-
- clock-names = "aclk_bus1_400";
- clocks = <&cmu_top CLK_ACLK_BUS1_400>;
- };
-
- cmu_bus2: clock-controller@13400000 {
- compatible = "samsung,exynos5433-cmu-bus2";
- reg = <0x13400000 0x0b04>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "aclk_bus2_400";
- clocks = <&xxti>, <&cmu_mif CLK_ACLK_BUS2_400>;
- };
-
- cmu_g3d: clock-controller@14aa0000 {
- compatible = "samsung,exynos5433-cmu-g3d";
- reg = <0x14aa0000 0x1000>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "aclk_g3d_400";
- clocks = <&xxti>, <&cmu_top CLK_ACLK_G3D_400>;
- power-domains = <&pd_g3d>;
- };
-
- cmu_gscl: clock-controller@13cf0000 {
- compatible = "samsung,exynos5433-cmu-gscl";
- reg = <0x13cf0000 0x0b10>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "aclk_gscl_111",
- "aclk_gscl_333";
- clocks = <&xxti>,
- <&cmu_top CLK_ACLK_GSCL_111>,
- <&cmu_top CLK_ACLK_GSCL_333>;
- power-domains = <&pd_gscl>;
- };
-
- cmu_apollo: clock-controller@11900000 {
- compatible = "samsung,exynos5433-cmu-apollo";
- reg = <0x11900000 0x1088>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "sclk_bus_pll_apollo";
- clocks = <&xxti>, <&cmu_mif CLK_SCLK_BUS_PLL_APOLLO>;
- };
-
- cmu_atlas: clock-controller@11800000 {
- compatible = "samsung,exynos5433-cmu-atlas";
- reg = <0x11800000 0x1088>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "sclk_bus_pll_atlas";
- clocks = <&xxti>, <&cmu_mif CLK_SCLK_BUS_PLL_ATLAS>;
- };
-
- cmu_mscl: clock-controller@105d0000 {
- compatible = "samsung,exynos5433-cmu-mscl";
- reg = <0x105d0000 0x0b10>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_jpeg_mscl",
- "aclk_mscl_400";
- clocks = <&xxti>,
- <&cmu_top CLK_SCLK_JPEG_MSCL>,
- <&cmu_top CLK_ACLK_MSCL_400>;
- power-domains = <&pd_mscl>;
- };
-
- cmu_mfc: clock-controller@15280000 {
- compatible = "samsung,exynos5433-cmu-mfc";
- reg = <0x15280000 0x0b08>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "aclk_mfc_400";
- clocks = <&xxti>, <&cmu_top CLK_ACLK_MFC_400>;
- power-domains = <&pd_mfc>;
- };
-
- cmu_hevc: clock-controller@14f80000 {
- compatible = "samsung,exynos5433-cmu-hevc";
- reg = <0x14f80000 0x0b08>;
- #clock-cells = <1>;
-
- clock-names = "oscclk", "aclk_hevc_400";
- clocks = <&xxti>, <&cmu_top CLK_ACLK_HEVC_400>;
- power-domains = <&pd_hevc>;
- };
-
- cmu_isp: clock-controller@146d0000 {
- compatible = "samsung,exynos5433-cmu-isp";
- reg = <0x146d0000 0x0b0c>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "aclk_isp_dis_400",
- "aclk_isp_400";
- clocks = <&xxti>,
- <&cmu_top CLK_ACLK_ISP_DIS_400>,
- <&cmu_top CLK_ACLK_ISP_400>;
- power-domains = <&pd_isp>;
- };
-
- cmu_cam0: clock-controller@120d0000 {
- compatible = "samsung,exynos5433-cmu-cam0";
- reg = <0x120d0000 0x0b0c>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "aclk_cam0_333",
- "aclk_cam0_400",
- "aclk_cam0_552";
- clocks = <&xxti>,
- <&cmu_top CLK_ACLK_CAM0_333>,
- <&cmu_top CLK_ACLK_CAM0_400>,
- <&cmu_top CLK_ACLK_CAM0_552>;
- power-domains = <&pd_cam0>;
- };
-
- cmu_cam1: clock-controller@145d0000 {
- compatible = "samsung,exynos5433-cmu-cam1";
- reg = <0x145d0000 0x0b08>;
- #clock-cells = <1>;
-
- clock-names = "oscclk",
- "sclk_isp_uart_cam1",
- "sclk_isp_spi1_cam1",
- "sclk_isp_spi0_cam1",
- "aclk_cam1_333",
- "aclk_cam1_400",
- "aclk_cam1_552";
- clocks = <&xxti>,
- <&cmu_top CLK_SCLK_ISP_UART_CAM1>,
- <&cmu_top CLK_SCLK_ISP_SPI1_CAM1>,
- <&cmu_top CLK_SCLK_ISP_SPI0_CAM1>,
- <&cmu_top CLK_ACLK_CAM1_333>,
- <&cmu_top CLK_ACLK_CAM1_400>,
- <&cmu_top CLK_ACLK_CAM1_552>;
- power-domains = <&pd_cam1>;
- };
-
-Example 3: UART controller node that consumes the clock generated by the clock
- controller.
-
- serial_0: serial@14c10000 {
- compatible = "samsung,exynos5433-uart";
- reg = <0x14C10000 0x100>;
- interrupts = <0 421 0>;
- clocks = <&cmu_peric CLK_PCLK_UART0>,
- <&cmu_peric CLK_SCLK_UART0>;
- clock-names = "uart", "clk_uart_baud0";
- pinctrl-names = "default";
- pinctrl-0 = <&uart0_bus>;
- };
diff --git a/Documentation/devicetree/bindings/clock/exynos7-clock.txt b/Documentation/devicetree/bindings/clock/exynos7-clock.txt
deleted file mode 100644
index 6bf1e7493f61..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos7-clock.txt
+++ /dev/null
@@ -1,108 +0,0 @@
-* Samsung Exynos7 Clock Controller
-
-Exynos7 clock controller has various blocks which are instantiated
-independently from the device-tree. These clock controllers
-generate and supply clocks to various hardware blocks within
-the SoC.
-
-Each clock is assigned an identifier and client nodes can use
-this identifier to specify the clock which they consume. All
-available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos7-clk.h header and can be used in
-device tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It
-is expected that they are defined using standard clock bindings
-with following clock-output-names:
-
- - "fin_pll" - PLL input clock from XXTI
-
-Required Properties for Clock Controller:
-
- - compatible: clock controllers will use one of the following
- compatible strings to indicate the clock controller
- functionality.
-
- - "samsung,exynos7-clock-topc"
- - "samsung,exynos7-clock-top0"
- - "samsung,exynos7-clock-top1"
- - "samsung,exynos7-clock-ccore"
- - "samsung,exynos7-clock-peric0"
- - "samsung,exynos7-clock-peric1"
- - "samsung,exynos7-clock-peris"
- - "samsung,exynos7-clock-fsys0"
- - "samsung,exynos7-clock-fsys1"
- - "samsung,exynos7-clock-mscl"
- - "samsung,exynos7-clock-aud"
-
- - reg: physical base address of the controller and the length of
- memory mapped region.
-
- - #clock-cells: should be 1.
-
- - clocks: list of clock identifiers which are fed as the input to
- the given clock controller. Please refer the next section to
- find the input clocks for a given controller.
-
-- clock-names: list of names of clocks which are fed as the input
- to the given clock controller.
-
-Input clocks for top0 clock controller:
- - fin_pll
- - dout_sclk_bus0_pll
- - dout_sclk_bus1_pll
- - dout_sclk_cc_pll
- - dout_sclk_mfc_pll
- - dout_sclk_aud_pll
-
-Input clocks for top1 clock controller:
- - fin_pll
- - dout_sclk_bus0_pll
- - dout_sclk_bus1_pll
- - dout_sclk_cc_pll
- - dout_sclk_mfc_pll
-
-Input clocks for ccore clock controller:
- - fin_pll
- - dout_aclk_ccore_133
-
-Input clocks for peric0 clock controller:
- - fin_pll
- - dout_aclk_peric0_66
- - sclk_uart0
-
-Input clocks for peric1 clock controller:
- - fin_pll
- - dout_aclk_peric1_66
- - sclk_uart1
- - sclk_uart2
- - sclk_uart3
- - sclk_spi0
- - sclk_spi1
- - sclk_spi2
- - sclk_spi3
- - sclk_spi4
- - sclk_i2s1
- - sclk_pcm1
- - sclk_spdif
-
-Input clocks for peris clock controller:
- - fin_pll
- - dout_aclk_peris_66
-
-Input clocks for fsys0 clock controller:
- - fin_pll
- - dout_aclk_fsys0_200
- - dout_sclk_mmc2
-
-Input clocks for fsys1 clock controller:
- - fin_pll
- - dout_aclk_fsys1_200
- - dout_sclk_mmc0
- - dout_sclk_mmc1
-
-Input clocks for aud clock controller:
- - fin_pll
- - fout_aud_pll
diff --git a/Documentation/devicetree/bindings/clock/fixed-clock.txt b/Documentation/devicetree/bindings/clock/fixed-clock.txt
deleted file mode 100644
index 0641a663ad69..000000000000
--- a/Documentation/devicetree/bindings/clock/fixed-clock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Binding for simple fixed-rate clock sources.
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "fixed-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- clock-frequency : frequency of clock in Hz. Should be a single cell.
-
-Optional properties:
-- clock-accuracy : accuracy of clock in ppb (parts per billion).
- Should be a single cell.
-- clock-output-names : From common clock binding.
-
-Example:
- clock {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <1000000000>;
- clock-accuracy = <100>;
- };
diff --git a/Documentation/devicetree/bindings/clock/fixed-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-clock.yaml
new file mode 100644
index 000000000000..90fb10660684
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-clock.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fixed-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple fixed-rate clock sources
+
+maintainers:
+ - Michael Turquette <mturquette@baylibre.com>
+ - Stephen Boyd <sboyd@kernel.org>
+
+properties:
+ $nodename:
+ anyOf:
+ - description:
+ Preferred name is 'clock-<freq>' with <freq> being the output
+ frequency as defined in the 'clock-frequency' property.
+ pattern: "^clock-([0-9]+|[a-z0-9-]+)$"
+ - description: Any name allowed
+ deprecated: true
+
+ compatible:
+ const: fixed-clock
+
+ "#clock-cells":
+ const: 0
+
+ clock-frequency: true
+
+ clock-accuracy:
+ description: accuracy of clock in ppb (parts per billion).
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clock-frequency
+
+additionalProperties: false
+
+examples:
+ - |
+ clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000000>;
+ clock-accuracy = <100>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
deleted file mode 100644
index 189467a7188a..000000000000
--- a/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Binding for simple fixed factor rate clock sources.
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "fixed-factor-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- clock-div: fixed divider.
-- clock-mult: fixed multiplier.
-- clocks: parent clock.
-
-Optional properties:
-- clock-output-names : From common clock binding.
-
-Some clocks that require special treatments are also handled by that
-driver, with the compatibles:
- - allwinner,sun4i-a10-pll3-2x-clk
-
-Example:
- clock {
- compatible = "fixed-factor-clock";
- clocks = <&parentclk>;
- #clock-cells = <0>;
- clock-div = <2>;
- clock-mult = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml
new file mode 100644
index 000000000000..4afdb1c98f5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fixed-factor-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple fixed factor rate clock sources
+
+maintainers:
+ - Michael Turquette <mturquette@baylibre.com>
+ - Stephen Boyd <sboyd@kernel.org>
+
+properties:
+ $nodename:
+ anyOf:
+ - description:
+ If the frequency is fixed, the preferred name is 'clock-<freq>' with
+ <freq> being the output frequency.
+ pattern: "^clock-([0-9]+|[0-9a-z-]+)$"
+ - description: Any name allowed
+ deprecated: true
+
+ compatible:
+ enum:
+ - fixed-factor-clock
+
+ "#clock-cells":
+ const: 0
+
+ clocks:
+ maxItems: 1
+
+ clock-div:
+ description: Fixed divider
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+
+ clock-mult:
+ description: Fixed multiplier
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - "#clock-cells"
+ - clock-div
+ - clock-mult
+
+additionalProperties: false
+
+examples:
+ - |
+ clock {
+ compatible = "fixed-factor-clock";
+ clocks = <&parentclk>;
+ #clock-cells = <0>;
+ clock-div = <2>;
+ clock-mult = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml
new file mode 100644
index 000000000000..e22fc272d023
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fixed-mmio-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple memory mapped IO fixed-rate clock sources
+
+description:
+ This binding describes a fixed-rate clock for which the frequency can
+ be read from a single 32-bit memory mapped I/O register.
+
+ It was designed for test systems, like FPGA, not for complete,
+ finished SoCs.
+
+maintainers:
+ - Jan Kotas <jank@cadence.com>
+
+properties:
+ compatible:
+ const: fixed-mmio-clock
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 0
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ sysclock: sysclock@fd020004 {
+ compatible = "fixed-mmio-clock";
+ #clock-cells = <0>;
+ reg = <0xfd020004 0x4>;
+ clock-output-names = "sysclk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml
new file mode 100644
index 000000000000..1fa390ee7b9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,flexspi-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale FlexSPI clock driver for Layerscape SoCs
+
+maintainers:
+ - Michael Walle <michael@walle.cc>
+
+description:
+ The Freescale Layerscape SoCs have a special FlexSPI clock which is
+ derived from the platform PLL.
+
+properties:
+ compatible:
+ enum:
+ - fsl,ls1028a-flexspi-clk
+ - fsl,lx2160a-flexspi-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ dcfg {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ fspi_clk: clock-controller@900 {
+ compatible = "fsl,ls1028a-flexspi-clk";
+ reg = <0x900 0x4>;
+ #clock-cells = <0>;
+ clocks = <&parentclk>;
+ clock-output-names = "fspi_clk";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/fsl,imx8-acm.yaml b/Documentation/devicetree/bindings/clock/fsl,imx8-acm.yaml
new file mode 100644
index 000000000000..07b9d21719c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,imx8-acm.yaml
@@ -0,0 +1,282 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,imx8-acm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8 Audio Clock Mux
+
+maintainers:
+ - Shengjiu Wang <shengjiu.wang@nxp.com>
+
+description: |
+ NXP i.MX8 Audio Clock Mux is dedicated clock muxing IP
+ used to control Audio related clock on the SoC.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8dxl-acm
+ - fsl,imx8qm-acm
+ - fsl,imx8qxp-acm
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ minItems: 13
+ maxItems: 21
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8-clock.h
+ for the full list of i.MX8 ACM clock IDs.
+
+ clocks:
+ minItems: 13
+ maxItems: 27
+
+ clock-names:
+ minItems: 13
+ maxItems: 27
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qxp-acm
+ then:
+ properties:
+ power-domains:
+ items:
+ - description: power domain of IMX_SC_R_AUDIO_CLK_0
+ - description: power domain of IMX_SC_R_AUDIO_CLK_1
+ - description: power domain of IMX_SC_R_MCLK_OUT_0
+ - description: power domain of IMX_SC_R_MCLK_OUT_1
+ - description: power domain of IMX_SC_R_AUDIO_PLL_0
+ - description: power domain of IMX_SC_R_AUDIO_PLL_1
+ - description: power domain of IMX_SC_R_ASRC_0
+ - description: power domain of IMX_SC_R_ASRC_1
+ - description: power domain of IMX_SC_R_ESAI_0
+ - description: power domain of IMX_SC_R_SAI_0
+ - description: power domain of IMX_SC_R_SAI_1
+ - description: power domain of IMX_SC_R_SAI_2
+ - description: power domain of IMX_SC_R_SAI_3
+ - description: power domain of IMX_SC_R_SAI_4
+ - description: power domain of IMX_SC_R_SAI_5
+ - description: power domain of IMX_SC_R_SPDIF_0
+ - description: power domain of IMX_SC_R_MQS_0
+
+ clocks:
+ minItems: 18
+ maxItems: 18
+
+ clock-names:
+ items:
+ - const: aud_rec_clk0_lpcg_clk
+ - const: aud_rec_clk1_lpcg_clk
+ - const: aud_pll_div_clk0_lpcg_clk
+ - const: aud_pll_div_clk1_lpcg_clk
+ - const: ext_aud_mclk0
+ - const: ext_aud_mclk1
+ - const: esai0_rx_clk
+ - const: esai0_rx_hf_clk
+ - const: esai0_tx_clk
+ - const: esai0_tx_hf_clk
+ - const: spdif0_rx
+ - const: sai0_rx_bclk
+ - const: sai0_tx_bclk
+ - const: sai1_rx_bclk
+ - const: sai1_tx_bclk
+ - const: sai2_rx_bclk
+ - const: sai3_rx_bclk
+ - const: sai4_rx_bclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qm-acm
+ then:
+ properties:
+ power-domains:
+ items:
+ - description: power domain of IMX_SC_R_AUDIO_CLK_0
+ - description: power domain of IMX_SC_R_AUDIO_CLK_1
+ - description: power domain of IMX_SC_R_MCLK_OUT_0
+ - description: power domain of IMX_SC_R_MCLK_OUT_1
+ - description: power domain of IMX_SC_R_AUDIO_PLL_0
+ - description: power domain of IMX_SC_R_AUDIO_PLL_1
+ - description: power domain of IMX_SC_R_ASRC_0
+ - description: power domain of IMX_SC_R_ASRC_1
+ - description: power domain of IMX_SC_R_ESAI_0
+ - description: power domain of IMX_SC_R_ESAI_1
+ - description: power domain of IMX_SC_R_SAI_0
+ - description: power domain of IMX_SC_R_SAI_1
+ - description: power domain of IMX_SC_R_SAI_2
+ - description: power domain of IMX_SC_R_SAI_3
+ - description: power domain of IMX_SC_R_SAI_4
+ - description: power domain of IMX_SC_R_SAI_5
+ - description: power domain of IMX_SC_R_SAI_6
+ - description: power domain of IMX_SC_R_SAI_7
+ - description: power domain of IMX_SC_R_SPDIF_0
+ - description: power domain of IMX_SC_R_SPDIF_1
+ - description: power domain of IMX_SC_R_MQS_0
+
+ clocks:
+ minItems: 27
+ maxItems: 27
+
+ clock-names:
+ items:
+ - const: aud_rec_clk0_lpcg_clk
+ - const: aud_rec_clk1_lpcg_clk
+ - const: aud_pll_div_clk0_lpcg_clk
+ - const: aud_pll_div_clk1_lpcg_clk
+ - const: mlb_clk
+ - const: hdmi_rx_mclk
+ - const: ext_aud_mclk0
+ - const: ext_aud_mclk1
+ - const: esai0_rx_clk
+ - const: esai0_rx_hf_clk
+ - const: esai0_tx_clk
+ - const: esai0_tx_hf_clk
+ - const: esai1_rx_clk
+ - const: esai1_rx_hf_clk
+ - const: esai1_tx_clk
+ - const: esai1_tx_hf_clk
+ - const: spdif0_rx
+ - const: spdif1_rx
+ - const: sai0_rx_bclk
+ - const: sai0_tx_bclk
+ - const: sai1_rx_bclk
+ - const: sai1_tx_bclk
+ - const: sai2_rx_bclk
+ - const: sai3_rx_bclk
+ - const: sai4_rx_bclk
+ - const: sai5_tx_bclk
+ - const: sai6_rx_bclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8dxl-acm
+ then:
+ properties:
+ power-domains:
+ items:
+ - description: power domain of IMX_SC_R_AUDIO_CLK_0
+ - description: power domain of IMX_SC_R_AUDIO_CLK_1
+ - description: power domain of IMX_SC_R_MCLK_OUT_0
+ - description: power domain of IMX_SC_R_MCLK_OUT_1
+ - description: power domain of IMX_SC_R_AUDIO_PLL_0
+ - description: power domain of IMX_SC_R_AUDIO_PLL_1
+ - description: power domain of IMX_SC_R_ASRC_0
+ - description: power domain of IMX_SC_R_SAI_0
+ - description: power domain of IMX_SC_R_SAI_1
+ - description: power domain of IMX_SC_R_SAI_2
+ - description: power domain of IMX_SC_R_SAI_3
+ - description: power domain of IMX_SC_R_SPDIF_0
+ - description: power domain of IMX_SC_R_MQS_0
+
+ clocks:
+ minItems: 13
+ maxItems: 13
+
+ clock-names:
+ items:
+ - const: aud_rec_clk0_lpcg_clk
+ - const: aud_rec_clk1_lpcg_clk
+ - const: aud_pll_div_clk0_lpcg_clk
+ - const: aud_pll_div_clk1_lpcg_clk
+ - const: ext_aud_mclk0
+ - const: ext_aud_mclk1
+ - const: spdif0_rx
+ - const: sai0_rx_bclk
+ - const: sai0_tx_bclk
+ - const: sai1_rx_bclk
+ - const: sai1_tx_bclk
+ - const: sai2_rx_bclk
+ - const: sai3_rx_bclk
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+
+ clock-controller@59e00000 {
+ compatible = "fsl,imx8qxp-acm";
+ reg = <0x59e00000 0x1d0000>;
+ #clock-cells = <1>;
+ power-domains = <&pd IMX_SC_R_AUDIO_CLK_0>,
+ <&pd IMX_SC_R_AUDIO_CLK_1>,
+ <&pd IMX_SC_R_MCLK_OUT_0>,
+ <&pd IMX_SC_R_MCLK_OUT_1>,
+ <&pd IMX_SC_R_AUDIO_PLL_0>,
+ <&pd IMX_SC_R_AUDIO_PLL_1>,
+ <&pd IMX_SC_R_ASRC_0>,
+ <&pd IMX_SC_R_ASRC_1>,
+ <&pd IMX_SC_R_ESAI_0>,
+ <&pd IMX_SC_R_SAI_0>,
+ <&pd IMX_SC_R_SAI_1>,
+ <&pd IMX_SC_R_SAI_2>,
+ <&pd IMX_SC_R_SAI_3>,
+ <&pd IMX_SC_R_SAI_4>,
+ <&pd IMX_SC_R_SAI_5>,
+ <&pd IMX_SC_R_SPDIF_0>,
+ <&pd IMX_SC_R_MQS_0>;
+ clocks = <&aud_rec0_lpcg IMX_LPCG_CLK_0>,
+ <&aud_rec1_lpcg IMX_LPCG_CLK_0>,
+ <&aud_pll_div0_lpcg IMX_LPCG_CLK_0>,
+ <&aud_pll_div1_lpcg IMX_LPCG_CLK_0>,
+ <&clk_ext_aud_mclk0>,
+ <&clk_ext_aud_mclk1>,
+ <&clk_esai0_rx_clk>,
+ <&clk_esai0_rx_hf_clk>,
+ <&clk_esai0_tx_clk>,
+ <&clk_esai0_tx_hf_clk>,
+ <&clk_spdif0_rx>,
+ <&clk_sai0_rx_bclk>,
+ <&clk_sai0_tx_bclk>,
+ <&clk_sai1_rx_bclk>,
+ <&clk_sai1_tx_bclk>,
+ <&clk_sai2_rx_bclk>,
+ <&clk_sai3_rx_bclk>,
+ <&clk_sai4_rx_bclk>;
+ clock-names = "aud_rec_clk0_lpcg_clk",
+ "aud_rec_clk1_lpcg_clk",
+ "aud_pll_div_clk0_lpcg_clk",
+ "aud_pll_div_clk1_lpcg_clk",
+ "ext_aud_mclk0",
+ "ext_aud_mclk1",
+ "esai0_rx_clk",
+ "esai0_rx_hf_clk",
+ "esai0_tx_clk",
+ "esai0_tx_hf_clk",
+ "spdif0_rx",
+ "sai0_rx_bclk",
+ "sai0_tx_bclk",
+ "sai1_rx_bclk",
+ "sai1_tx_bclk",
+ "sai2_rx_bclk",
+ "sai3_rx_bclk",
+ "sai4_rx_bclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/fsl,imx8m-anatop.yaml b/Documentation/devicetree/bindings/clock/fsl,imx8m-anatop.yaml
new file mode 100644
index 000000000000..bbd22e95b319
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,imx8m-anatop.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,imx8m-anatop.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8M Family Anatop Module
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+description: |
+ NXP i.MX8M Family anatop PLL module which generates PLL to CCM root.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx8mm-anatop
+ - fsl,imx8mq-anatop
+ - items:
+ - enum:
+ - fsl,imx8mn-anatop
+ - fsl,imx8mp-anatop
+ - const: fsl,imx8mm-anatop
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ anatop: clock-controller@30360000 {
+ compatible = "fsl,imx8mn-anatop", "fsl,imx8mm-anatop";
+ reg = <0x30360000 0x10000>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/fsl,imx93-anatop.yaml b/Documentation/devicetree/bindings/clock/fsl,imx93-anatop.yaml
new file mode 100644
index 000000000000..8a3b2476419a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,imx93-anatop.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,imx93-anatop.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX93 ANATOP Clock Module
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+description: |
+ NXP i.MX93 ANATOP module which contains PLL and OSC to Clock Controller
+ Module.
+
+properties:
+ compatible:
+ items:
+ - const: fsl,imx93-anatop
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@44480000 {
+ compatible = "fsl,imx93-anatop";
+ reg = <0x44480000 0x2000>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml
new file mode 100644
index 000000000000..88dd9c18db92
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,plldig.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock
+
+maintainers:
+ - Wen He <wen.he_1@nxp.com>
+
+description: |
+ NXP LS1028A has a clock domain PXLCLK0 used for the Display output
+ interface in the display core, as implemented in TSMC CLN28HPM PLL.
+ which generate and offers pixel clocks to Display.
+
+properties:
+ compatible:
+ const: fsl,ls1028a-plldig
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ fsl,vco-hz:
+ description: Optional for VCO frequency of the PLL in Hertz. The VCO frequency
+ of this PLL cannot be changed during runtime only at startup. Therefore,
+ the output frequencies are very limited and might not even closely match
+ the requested frequency. To work around this restriction the user may specify
+ its own desired VCO frequency for the PLL.
+ minimum: 650000000
+ maximum: 1300000000
+ default: 1188000000
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Display PIXEL Clock node:
+ - |
+ dpclk: clock-display@f1f0000 {
+ compatible = "fsl,ls1028a-plldig";
+ reg = <0xf1f0000 0xffff>;
+ #clock-cells = <0>;
+ clocks = <&osc_27m>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/fsl,qoriq-clock-legacy.yaml b/Documentation/devicetree/bindings/clock/fsl,qoriq-clock-legacy.yaml
new file mode 100644
index 000000000000..97b96a1a5825
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,qoriq-clock-legacy.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,qoriq-clock-legacy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Legacy Clock Block on Freescale QorIQ Platforms
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+description: |
+ These nodes are deprecated. Kernels should continue to support
+ device trees with these nodes, but new device trees should not use them.
+
+ Most of the bindings are from the common clock binding[1].
+ [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+properties:
+ compatible:
+ enum:
+ - fsl,qoriq-core-pll-1.0
+ - fsl,qoriq-core-pll-2.0
+ - fsl,qoriq-core-mux-1.0
+ - fsl,qoriq-core-mux-2.0
+ - fsl,qoriq-sysclk-1.0
+ - fsl,qoriq-sysclk-2.0
+ - fsl,qoriq-platform-pll-1.0
+ - fsl,qoriq-platform-pll-2.0
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ maxItems: 4
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 8
+
+ '#clock-cells':
+ minimum: 0
+ maximum: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,qoriq-sysclk-1.0
+ - fsl,qoriq-sysclk-2.0
+ then:
+ properties:
+ '#clock-cells':
+ const: 0
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,qoriq-core-pll-1.0
+ - fsl,qoriq-core-pll-2.0
+ then:
+ properties:
+ '#clock-cells':
+ const: 1
+ description: |
+ * 0 - equal to the PLL frequency
+ * 1 - equal to the PLL frequency divided by 2
+ * 2 - equal to the PLL frequency divided by 4
+
diff --git a/Documentation/devicetree/bindings/clock/fsl,qoriq-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,qoriq-clock.yaml
new file mode 100644
index 000000000000..95a3e3b24267
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,qoriq-clock.yaml
@@ -0,0 +1,207 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,qoriq-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Clock Block on Freescale QorIQ Platforms
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+description: |
+ Freescale QorIQ chips take primary clocking input from the external
+ SYSCLK signal. The SYSCLK input (frequency) is multiplied using
+ multiple phase locked loops (PLL) to create a variety of frequencies
+ which can then be passed to a variety of internal logic, including
+ cores and peripheral IP blocks.
+ Please refer to the Reference Manual for details.
+
+ All references to "1.0" and "2.0" refer to the QorIQ chassis version to
+ which the chip complies.
+
+ Chassis Version Example Chips
+ --------------- -------------
+ 1.0 p4080, p5020, p5040
+ 2.0 t4240
+
+ Clock Provider
+
+ The clockgen node should act as a clock provider, though in older device
+ trees the children of the clockgen node are the clock providers.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - fsl,p2041-clockgen
+ - fsl,p3041-clockgen
+ - fsl,p4080-clockgen
+ - fsl,p5020-clockgen
+ - fsl,p5040-clockgen
+ - const: fsl,qoriq-clockgen-1.0
+ - items:
+ - enum:
+ - fsl,t1023-clockgen
+ - fsl,t1024-clockgen
+ - fsl,t1040-clockgen
+ - fsl,t1042-clockgen
+ - fsl,t2080-clockgen
+ - fsl,t2081-clockgen
+ - fsl,t4240-clockgen
+ - const: fsl,qoriq-clockgen-2.0
+ - items:
+ - enum:
+ - fsl,b4420-clockgen
+ - fsl,b4860-clockgen
+ - const: fsl,b4-clockgen
+ - items:
+ - enum:
+ - fsl,ls1012a-clockgen
+ - fsl,ls1021a-clockgen
+ - fsl,ls1028a-clockgen
+ - fsl,ls1043a-clockgen
+ - fsl,ls1046a-clockgen
+ - fsl,ls1088a-clockgen
+ - fsl,ls2080a-clockgen
+ - fsl,lx2160a-clockgen
+
+ reg:
+ maxItems: 1
+
+ ranges: true
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+ '#clock-cells':
+ const: 2
+ description: |
+ The first cell of the clock specifier is the clock type, and the
+ second cell is the clock index for the specified type.
+
+ Type# Name Index Cell
+ 0 sysclk must be 0
+ 1 cmux index (n in CLKCnCSR)
+ 2 hwaccel index (n in CLKCGnHWACSR)
+ 3 fman 0 for fm1, 1 for fm2
+ 4 platform pll n=pll/(n+1). For example, when n=1,
+ that means output_freq=PLL_freq/2.
+ 5 coreclk must be 0
+
+ clock-frequency:
+ description: Input system clock frequency (SYSCLK)
+
+ clocks:
+ items:
+ - description:
+ sysclk may be provided as an input clock. Either clock-frequency
+ or clocks must be provided.
+ - description:
+ A second input clock, called "coreclk", may be provided if
+ core PLLs are based on a different input clock from the
+ platform PLL.
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: sysclk
+ - const: coreclk
+
+patternProperties:
+ '^mux[0-9]@[a-f0-9]+$':
+ deprecated: true
+ $ref: fsl,qoriq-clock-legacy.yaml
+
+ '^sysclk(-[a-z0-9]+)?$':
+ deprecated: true
+ $ref: fsl,qoriq-clock-legacy.yaml
+
+ '^pll[0-9]@[a-f0-9]+$':
+ deprecated: true
+ $ref: fsl,qoriq-clock-legacy.yaml
+
+ '^platform\-pll@[a-f0-9]+$':
+ deprecated: true
+ $ref: fsl,qoriq-clock-legacy.yaml
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ /* clock provider example */
+ global-utilities@e1000 {
+ compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
+ reg = <0xe1000 0x1000>;
+ clock-frequency = <133333333>;
+ #clock-cells = <2>;
+ };
+
+ - |
+ /* Legacy example */
+ global-utilities@e1000 {
+ compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
+ reg = <0xe1000 0x1000>;
+ ranges = <0x0 0xe1000 0x1000>;
+ clock-frequency = <133333333>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #clock-cells = <2>;
+
+ sysclk: sysclk {
+ compatible = "fsl,qoriq-sysclk-1.0";
+ clock-output-names = "sysclk";
+ #clock-cells = <0>;
+ };
+
+ pll0: pll0@800 {
+ compatible = "fsl,qoriq-core-pll-1.0";
+ reg = <0x800 0x4>;
+ #clock-cells = <1>;
+ clocks = <&sysclk>;
+ clock-output-names = "pll0", "pll0-div2";
+ };
+
+ pll1: pll1@820 {
+ compatible = "fsl,qoriq-core-pll-1.0";
+ reg = <0x820 0x4>;
+ #clock-cells = <1>;
+ clocks = <&sysclk>;
+ clock-output-names = "pll1", "pll1-div2";
+ };
+
+ mux0: mux0@0 {
+ compatible = "fsl,qoriq-core-mux-1.0";
+ reg = <0x0 0x4>;
+ #clock-cells = <0>;
+ clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+ clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+ clock-output-names = "cmux0";
+ };
+
+ mux1: mux1@20 {
+ compatible = "fsl,qoriq-core-mux-1.0";
+ reg = <0x20 0x4>;
+ #clock-cells = <0>;
+ clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+ clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+ clock-output-names = "cmux1";
+ };
+
+ platform-pll@c00 {
+ #clock-cells = <1>;
+ reg = <0xc00 0x4>;
+ compatible = "fsl,qoriq-platform-pll-1.0";
+ clocks = <&sysclk>;
+ clock-output-names = "platform-pll", "platform-pll-div2";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml
new file mode 100644
index 000000000000..3bca9d11c148
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,sai-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale SAI bitclock-as-a-clock
+
+maintainers:
+ - Michael Walle <michael@walle.cc>
+
+description: |
+ It is possible to use the BCLK pin of a SAI module as a generic clock
+ output. Some SoC are very constrained in their pin multiplexer
+ configuration. Eg. pins can only be changed groups. For example, on the
+ LS1028A SoC you can only enable SAIs in pairs. If you use only one SAI,
+ the second pins are wasted. Using this binding it is possible to use the
+ clock of the second SAI as a MCLK clock for an audio codec, for example.
+
+ This is a composite of a gated clock and a divider clock.
+
+properties:
+ compatible:
+ const: fsl,vf610-sai-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ mclk: clock-mclk@f130080 {
+ compatible = "fsl,vf610-sai-clock";
+ reg = <0x0 0xf130080 0x0 0x80>;
+ #clock-cells = <0>;
+ clocks = <&parentclk>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml
new file mode 100644
index 000000000000..36d4cfc3c2f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,scu-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: i.MX SCU Client Device Node - Clock Controller Based on SCU Message Protocol
+
+maintainers:
+ - Abel Vesa <abel.vesa@nxp.com>
+
+description: i.MX SCU Client Device Node
+ Client nodes are maintained as children of the relevant IMX-SCU device node.
+ This binding uses the common clock binding.
+ (Documentation/devicetree/bindings/clock/clock-bindings.txt)
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See the full list of clock IDs from
+ include/dt-bindings/clock/imx8qxp-clock.h
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - fsl,imx8dxl-clk
+ - fsl,imx8qm-clk
+ - fsl,imx8qxp-clk
+ - const: fsl,scu-clk
+
+ '#clock-cells':
+ const: 2
+
+required:
+ - compatible
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller {
+ compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
+ #clock-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/fsl,vf610-ccm.yaml b/Documentation/devicetree/bindings/clock/fsl,vf610-ccm.yaml
new file mode 100644
index 000000000000..29ae5be51acf
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fsl,vf610-ccm.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,vf610-ccm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Clock for Freescale Vybrid VF610 SOC
+
+description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/vf610-clock.h
+ for the full list of VF610 clock IDs
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,vf610-ccm
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: external crystal oscillator 32KHz, recommended
+ - description: external crystal oscillator 24MHz, recommended
+ - description: audio
+ - description: enet
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: sxosc
+ - const: fxosc
+ - const: enet_ext
+ - const: audio_ext
+ minItems: 2
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@4006b000 {
+ compatible = "fsl,vf610-ccm";
+ reg = <0x4006b000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&sxosc>, <&fxosc>;
+ clock-names = "sxosc", "fxosc";
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt b/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt
deleted file mode 100644
index 332396265689..000000000000
--- a/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Fujitsu CRG11 clock driver bindings
------------------------------------
-
-Required properties :
-- compatible : Shall contain "fujitsu,mb86s70-crg11"
-- #clock-cells : Shall be 3 {cntrlr domain port}
-
-The consumer specifies the desired clock pointing to its phandle.
-
-Example:
-
- clock: crg11 {
- compatible = "fujitsu,mb86s70-crg11";
- #clock-cells = <3>;
- };
-
- mhu: mhu0@2b1f0000 {
- #mbox-cells = <1>;
- compatible = "arm,mhu";
- reg = <0 0x2B1F0000 0x1000>;
- interrupts = <0 36 4>, /* LP Non-Sec */
- <0 35 4>, /* HP Non-Sec */
- <0 37 4>; /* Secure */
- clocks = <&clock 0 2 1>; /* Cntrlr:0 Domain:2 Port:1 */
- clock-names = "clk";
- };
diff --git a/Documentation/devicetree/bindings/clock/gated-fixed-clock.yaml b/Documentation/devicetree/bindings/clock/gated-fixed-clock.yaml
new file mode 100644
index 000000000000..d3e0faf3c64d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gated-fixed-clock.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/gated-fixed-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Gated Fixed clock
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ const: gated-fixed-clock
+
+ "#clock-cells":
+ const: 0
+
+ clock-frequency: true
+
+ clock-output-names:
+ maxItems: 1
+
+ enable-gpios:
+ description:
+ Contains a single GPIO specifier for the GPIO that enables and disables
+ the oscillator.
+ maxItems: 1
+
+ vdd-supply:
+ description: handle of the regulator that provides the supply voltage
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clock-frequency
+ - vdd-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-1000000000 {
+ compatible = "gated-fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000000>;
+ vdd-supply = <&reg_vdd>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
new file mode 100644
index 000000000000..caf442ead24b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/google,gs101-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Google GS101 SoC clock controller
+
+maintainers:
+ - Peter Griffin <peter.griffin@linaro.org>
+
+description: |
+ Google GS101 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clock in that clock tree
+ is OSCCLK (24.576 MHz). That external clock must be defined as a fixed-rate
+ clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other leaf clocks (other CMUs) are usually derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'dt-bindings/clock/gs101.h' header.
+
+properties:
+ compatible:
+ enum:
+ - google,gs101-cmu-top
+ - google,gs101-cmu-apm
+ - google,gs101-cmu-misc
+ - google,gs101-cmu-hsi0
+ - google,gs101-cmu-hsi2
+ - google,gs101-cmu-peric0
+ - google,gs101-cmu-peric1
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - google,gs101-cmu-top
+ - google,gs101-cmu-apm
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24.576 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: google,gs101-cmu-hsi0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24.576 MHz)
+ - description: HSI0 bus clock (from CMU_TOP)
+ - description: DPGTC (from CMU_TOP)
+ - description: USB DRD controller clock (from CMU_TOP)
+ - description: USB Display Port debug clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: dpgtc
+ - const: usb31drd
+ - const: usbdpdbg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - google,gs101-cmu-hsi2
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24.576 MHz)
+ - description: High Speed Interface bus clock (from CMU_TOP)
+ - description: High Speed Interface pcie clock (from CMU_TOP)
+ - description: High Speed Interface ufs clock (from CMU_TOP)
+ - description: High Speed Interface mmc clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: pcie
+ - const: ufs
+ - const: mmc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: google,gs101-cmu-misc
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Misc bus clock (from CMU_TOP)
+ - description: Misc sss clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: bus
+ - const: sss
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - google,gs101-cmu-peric0
+ - google,gs101-cmu-peric1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24.576 MHz)
+ - description: Connectivity Peripheral 0/1 bus clock (from CMU_TOP)
+ - description: Connectivity Peripheral 0/1 IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: ip
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_TOP
+ - |
+ #include <dt-bindings/clock/google,gs101.h>
+
+ cmu_top: clock-controller@1e080000 {
+ compatible = "google,gs101-cmu-top";
+ reg = <0x1e080000 0x8000>;
+ #clock-cells = <1>;
+ clocks = <&ext_24_5m>;
+ clock-names = "oscclk";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
deleted file mode 100644
index d3379ff9b84b..000000000000
--- a/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Binding for simple gpio gated clock.
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "gpio-gate-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- enable-gpios : GPIO reference for enabling and disabling the clock.
-
-Optional properties:
-- clocks: Maximum of one parent clock is supported.
-
-Example:
- clock {
- compatible = "gpio-gate-clock";
- clocks = <&parentclk>;
- #clock-cells = <0>;
- enable-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
- };
diff --git a/Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml b/Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml
new file mode 100644
index 000000000000..d09d0e3f0c6e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/gpio-gate-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple GPIO clock gate
+
+maintainers:
+ - Jyri Sarha <jsarha@ti.com>
+
+properties:
+ compatible:
+ const: gpio-gate-clock
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ enable-gpios:
+ description: GPIO reference for enabling and disabling the clock.
+ maxItems: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+ - enable-gpios
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ clock {
+ compatible = "gpio-gate-clock";
+ clocks = <&parentclk>;
+ #clock-cells = <0>;
+ enable-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt b/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt
deleted file mode 100644
index 2be1e038ca62..000000000000
--- a/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Binding for simple gpio clock multiplexer.
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "gpio-mux-clock".
-- clocks: list of two references to parent clocks.
-- #clock-cells : from common clock binding; shall be set to 0.
-- select-gpios : GPIO reference for selecting the parent clock.
-
-Example:
- clock {
- compatible = "gpio-mux-clock";
- clocks = <&parentclk1>, <&parentclk2>;
- #clock-cells = <0>;
- select-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
- };
diff --git a/Documentation/devicetree/bindings/clock/gpio-mux-clock.yaml b/Documentation/devicetree/bindings/clock/gpio-mux-clock.yaml
new file mode 100644
index 000000000000..1e21f8b3a4ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gpio-mux-clock.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/gpio-mux-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Simple GPIO clock multiplexer
+
+maintainers:
+ - Sergej Sawazki <ce3a@gmx.de>
+
+properties:
+ compatible:
+ const: gpio-mux-clock
+
+ clocks:
+ items:
+ - description: First parent clock
+ - description: Second parent clock
+
+ '#clock-cells':
+ const: 0
+
+ select-gpios:
+ description: GPIO reference for selecting the parent clock.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#clock-cells'
+ - select-gpios
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ clock {
+ compatible = "gpio-mux-clock";
+ clocks = <&parentclk1>, <&parentclk2>;
+ #clock-cells = <0>;
+ select-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/hi3620-clock.txt b/Documentation/devicetree/bindings/clock/hi3620-clock.txt
deleted file mode 100644
index dad6269f52c5..000000000000
--- a/Documentation/devicetree/bindings/clock/hi3620-clock.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* Hisilicon Hi3620 Clock Controller
-
-The Hi3620 clock controller generates and supplies clock to various
-controllers within the Hi3620 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "hisilicon,hi3620-clock" - controller compatible with Hi3620 SoC.
- - "hisilicon,hi3620-mmc-clock" - controller specific for Hi3620 mmc.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes use this identifier
-to specify the clock which they consume.
-
-All these identifier could be found in <dt-bindings/clock/hi3620-clock.h>.
diff --git a/Documentation/devicetree/bindings/clock/hi6220-clock.txt b/Documentation/devicetree/bindings/clock/hi6220-clock.txt
index ef3deb7b86ea..17ac4a3dd26a 100644
--- a/Documentation/devicetree/bindings/clock/hi6220-clock.txt
+++ b/Documentation/devicetree/bindings/clock/hi6220-clock.txt
@@ -4,7 +4,7 @@ Clock control registers reside in different Hi6220 system controllers,
please refer the following document to know more about the binding rules
for these system controllers:
-Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
+Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
Required Properties:
diff --git a/Documentation/devicetree/bindings/clock/hisilicon,hi3559av100-clock.yaml b/Documentation/devicetree/bindings/clock/hisilicon,hi3559av100-clock.yaml
new file mode 100644
index 000000000000..3ceb29cec704
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/hisilicon,hi3559av100-clock.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/hisilicon,hi3559av100-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon SOC Clock for HI3559AV100
+
+maintainers:
+ - Dongjiu Geng <gengdongjiu@huawei.com>
+
+description: |
+ Hisilicon SOC clock control module which supports the clocks, resets and
+ power domains on HI3559AV100.
+
+ See also:
+ dt-bindings/clock/hi3559av100-clock.h
+
+properties:
+ compatible:
+ enum:
+ - hisilicon,hi3559av100-clock
+ - hisilicon,hi3559av100-shub-clock
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 2
+ description: |
+ First cell is reset request register offset.
+ Second cell is bit offset in reset request register.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@12010000 {
+ compatible = "hisilicon,hi3559av100-clock";
+ #clock-cells = <1>;
+ #reset-cells = <2>;
+ reg = <0x0 0x12010000 0x0 0x10000>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt b/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt
deleted file mode 100644
index 4733e58e491b..000000000000
--- a/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* Hisilicon Hix5hd2 Clock Controller
-
-The hix5hd2 clock controller generates and supplies clock to various
-controllers within the hix5hd2 SoC.
-
-Required Properties:
-
-- compatible: should be "hisilicon,hix5hd2-clock"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-
-Each clock is assigned an identifier and client nodes use this identifier
-to specify the clock which they consume.
-
-All these identifier could be found in <dt-bindings/clock/hix5hd2-clock.h>.
-
-Examples:
- clock: clock@f8a22000 {
- compatible = "hisilicon,hix5hd2-clock";
- reg = <0xf8a22000 0x1000>;
- #clock-cells = <1>;
- };
-
- uart0: uart@f8b00000 {
- compatible = "arm,pl011", "arm,primecell";
- reg = <0xf8b00000 0x1000>;
- interrupts = <0 49 4>;
- clocks = <&clock HIX5HD2_FIXED_83M>;
- clock-names = "apb_pclk";
- };
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
deleted file mode 100644
index 05a245c9df08..000000000000
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-Binding for IDT VersaClock 5,6 programmable i2c clock generators.
-
-The IDT VersaClock 5 and VersaClock 6 are programmable i2c clock
-generators providing from 3 to 12 output clocks.
-
-==I2C device node==
-
-Required properties:
-- compatible: shall be one of
- "idt,5p49v5923"
- "idt,5p49v5925"
- "idt,5p49v5933"
- "idt,5p49v5935"
- "idt,5p49v6901"
-- reg: i2c device address, shall be 0x68 or 0x6a.
-- #clock-cells: from common clock binding; shall be set to 1.
-- clocks: from common clock binding; list of parent clock handles,
- - 5p49v5923 and
- 5p49v5925 and
- 5p49v6901: (required) either or both of XTAL or CLKIN
- reference clock.
- - 5p49v5933 and
- - 5p49v5935: (optional) property not present (internal
- Xtal used) or CLKIN reference
- clock.
-- clock-names: from common clock binding; clock input names, can be
- - 5p49v5923 and
- 5p49v5925 and
- 5p49v6901: (required) either or both of "xin", "clkin".
- - 5p49v5933 and
- - 5p49v5935: (optional) property not present or "clkin".
-
-==Mapping between clock specifier and physical pins==
-
-When referencing the provided clock in the DT using phandle and
-clock specifier, the following mapping applies:
-
-5P49V5923:
- 0 -- OUT0_SEL_I2CB
- 1 -- OUT1
- 2 -- OUT2
-
-5P49V5933:
- 0 -- OUT0_SEL_I2CB
- 1 -- OUT1
- 2 -- OUT4
-
-5P49V5925 and
-5P49V5935:
- 0 -- OUT0_SEL_I2CB
- 1 -- OUT1
- 2 -- OUT2
- 3 -- OUT3
- 4 -- OUT4
-
-5P49V6901:
- 0 -- OUT0_SEL_I2CB
- 1 -- OUT1
- 2 -- OUT2
- 3 -- OUT3
- 4 -- OUT4
-
-==Example==
-
-/* 25MHz reference crystal */
-ref25: ref25m {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <25000000>;
-};
-
-i2c-master-node {
-
- /* IDT 5P49V5923 i2c clock generator */
- vc5: clock-generator@6a {
- compatible = "idt,5p49v5923";
- reg = <0x6a>;
- #clock-cells = <1>;
-
- /* Connect XIN input to 25MHz reference */
- clocks = <&ref25m>;
- clock-names = "xin";
- };
-};
-
-/* Consumer referencing the 5P49V5923 pin OUT1 */
-consumer {
- ...
- clocks = <&vc5 1>;
- ...
-}
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
new file mode 100644
index 000000000000..8b400da05fbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/idt,versaclock5.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: IDT VersaClock 5 and 6 programmable I2C clock generators
+
+description: |
+ The IDT VersaClock 5 and VersaClock 6 are programmable I2C
+ clock generators providing from 3 to 12 output clocks.
+
+ When referencing the provided clock in the DT using phandle and clock
+ specifier, the following mapping applies:
+
+ - 5P49V5923:
+ 0 -- OUT0_SEL_I2CB
+ 1 -- OUT1
+ 2 -- OUT2
+
+ - 5P49V5933:
+ 0 -- OUT0_SEL_I2CB
+ 1 -- OUT1
+ 2 -- OUT4
+
+ - other parts:
+ 0 -- OUT0_SEL_I2CB
+ 1 -- OUT1
+ 2 -- OUT2
+ 3 -- OUT3
+ 4 -- OUT4
+
+ The idt,shutdown and idt,output-enable-active properties control the
+ SH (en_global_shutdown) and SP bits of the Primary Source and Shutdown
+ Register, respectively. Their behavior is summarized by the following
+ table:
+
+ SH SP Output when the SD/OE pin is Low/High
+ == == =====================================
+ 0 0 Active/Inactive
+ 0 1 Inactive/Active
+ 1 0 Active/Shutdown
+ 1 1 Inactive/Shutdown
+
+ The case where SH and SP are both 1 is likely not very interesting.
+
+maintainers:
+ - Luca Ceresoli <luca.ceresoli@bootlin.com>
+
+properties:
+ compatible:
+ enum:
+ - idt,5p49v5923
+ - idt,5p49v5925
+ - idt,5p49v5933
+ - idt,5p49v5935
+ - idt,5p49v60
+ - idt,5p49v6901
+ - idt,5p49v6965
+ - idt,5p49v6975
+
+ reg:
+ description: I2C device address
+ enum: [ 0x68, 0x6a ]
+
+ '#clock-cells':
+ const: 1
+
+ clock-names:
+ minItems: 1
+ maxItems: 2
+ items:
+ enum: [ xin, clkin ]
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ idt,xtal-load-femtofarads:
+ minimum: 9000
+ maximum: 22760
+ description: Optional load capacitor for XTAL1 and XTAL2
+
+ idt,shutdown:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ If 1, this enables the shutdown functionality: the chip will be
+ shut down if the SD/OE pin is driven high. If 0, this disables the
+ shutdown functionality: the chip will never be shut down based on
+ the value of the SD/OE pin. This property corresponds to the SH
+ bit of the Primary Source and Shutdown Register.
+
+ idt,output-enable-active:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ If 1, this enables output when the SD/OE pin is high, and disables
+ output when the SD/OE pin is low. If 0, this disables output when
+ the SD/OE pin is high, and enables output when the SD/OE pin is
+ low. This corresponds to the SP bit of the Primary Source and
+ Shutdown Register.
+
+patternProperties:
+ "^OUT[1-4]$":
+ type: object
+ description:
+ Description of one of the outputs (OUT1..OUT4). See "Clock1 Output
+ Configuration" in the Versaclock 5/6/6E Family Register Description
+ and Programming Guide.
+ properties:
+ idt,mode:
+ description:
+ The output drive mode. Values defined in dt-bindings/clock/versaclock.h
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 6
+ idt,voltage-microvolt:
+ description: The output drive voltage.
+ enum: [ 1800000, 2500000, 3300000 ]
+ idt,slew-percent:
+ description: The Slew rate control for CMOS single-ended.
+ enum: [ 80, 85, 90, 100 ]
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - idt,5p49v5933
+ - idt,5p49v5935
+ - idt,5p49v6975
+ then:
+ # Devices with builtin crystal + optional external input
+ properties:
+ clock-names:
+ const: clkin
+ clocks:
+ maxItems: 1
+ else:
+ # Devices without builtin crystal
+ required:
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/versaclock.h>
+
+ /* 25MHz reference crystal */
+ ref25: ref25m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ };
+
+ i2c@0 {
+ reg = <0x0 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* IDT 5P49V5923 I2C clock generator */
+ vc5: clock-generator@6a {
+ compatible = "idt,5p49v5923";
+ reg = <0x6a>;
+ #clock-cells = <1>;
+
+ /* Connect XIN input to 25MHz reference */
+ clocks = <&ref25m>;
+ clock-names = "xin";
+
+ /* Set the SD/OE pin's settings */
+ idt,shutdown = <0>;
+ idt,output-enable-active = <0>;
+
+ OUT1 {
+ idt,mode = <VC5_CMOSD>;
+ idt,voltage-microvolt = <1800000>;
+ idt,slew-percent = <80>;
+ };
+
+ OUT4 {
+ idt,mode = <VC5_LVDS>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/img,pistachio-clk.yaml b/Documentation/devicetree/bindings/clock/img,pistachio-clk.yaml
new file mode 100644
index 000000000000..e70feee8e894
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/img,pistachio-clk.yaml
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/img,pistachio-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Imagination Technologies Pistachio SoC clock controllers
+
+maintainers:
+ - Andrew Bresticker <abrestic@chromium.org>
+
+description: |
+ Pistachio has four clock controllers (core clock, peripheral clock, peripheral
+ general control, and top general control) which are instantiated individually
+ from the device-tree.
+
+ Core clock controller:
+
+ The core clock controller generates clocks for the CPU, RPU (WiFi + BT
+ co-processor), audio, and several peripherals.
+
+ Peripheral clock controller:
+
+ The peripheral clock controller generates clocks for the DDR, ROM, and other
+ peripherals. The peripheral system clock ("periph_sys") generated by the core
+ clock controller is the input clock to the peripheral clock controller.
+
+ Peripheral general control:
+
+ The peripheral general control block generates system interface clocks and
+ resets for various peripherals. It also contains miscellaneous peripheral
+ control registers.
+
+ Top-level general control:
+
+ The top-level general control block contains miscellaneous control registers
+ and gates for the external clocks "audio_clk_in" and "enet_clk_in".
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - img,pistachio-clk
+ - img,pistachio-clk-periph
+ - img,pistachio-cr-periph
+ - img,pistachio-cr-top
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: img,pistachio-clk
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External 52Mhz oscillator
+ - description: Alternate audio reference clock
+ - description: Alternate ethernet PHY clock
+
+ clock-names:
+ items:
+ - const: xtal
+ - const: audio_refclk_ext_gate
+ - const: ext_enet_in_gate
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: img,pistachio-clk-periph
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Peripheral system clock
+
+ clock-names:
+ items:
+ - const: periph_sys_core
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: img,pistachio-cr-periph
+ then:
+ properties:
+ clocks:
+ items:
+ - description: System interface clock
+
+ clock-names:
+ items:
+ - const: sys
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: img,pistachio-cr-top
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External audio reference clock
+ - description: External ethernet PHY clock
+
+ clock-names:
+ items:
+ - const: audio_clk_in
+ - const: enet_clk_in
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/imx1-clock.txt b/Documentation/devicetree/bindings/clock/imx1-clock.txt
deleted file mode 100644
index 9823baf7acb6..000000000000
--- a/Documentation/devicetree/bindings/clock/imx1-clock.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-* Clock bindings for Freescale i.MX1 CPUs
-
-Required properties:
-- compatible: Should be "fsl,imx1-ccm".
-- reg: Address and length of the register set.
-- #clock-cells: Should be <1>.
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx1-clock.h
-for the full list of i.MX1 clock IDs.
-
-Examples:
- clks: ccm@21b000 {
- #clock-cells = <1>;
- compatible = "fsl,imx1-ccm";
- reg = <0x0021b000 0x1000>;
- };
-
- pwm: pwm@208000 {
- #pwm-cells = <2>;
- compatible = "fsl,imx1-pwm";
- reg = <0x00208000 0x1000>;
- interrupts = <34>;
- clocks = <&clks IMX1_CLK_DUMMY>, <&clks IMX1_CLK_PER1>;
- clock-names = "ipg", "per";
- };
diff --git a/Documentation/devicetree/bindings/clock/imx1-clock.yaml b/Documentation/devicetree/bindings/clock/imx1-clock.yaml
new file mode 100644
index 000000000000..7ade4c32aff3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx1-clock.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx1-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX1 CPUs Clock Controller
+
+maintainers:
+ - Alexander Shiyan <shc_work@mail.ru>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx1-clock.h
+ for the full list of i.MX1 clock IDs.
+
+properties:
+ compatible:
+ const: fsl,imx1-ccm
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx1-clock.h>
+
+ clock-controller@21b000 {
+ #clock-cells = <1>;
+ compatible = "fsl,imx1-ccm";
+ reg = <0x0021b000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx21-clock.txt b/Documentation/devicetree/bindings/clock/imx21-clock.txt
deleted file mode 100644
index 806f63d628bd..000000000000
--- a/Documentation/devicetree/bindings/clock/imx21-clock.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Clock bindings for Freescale i.MX21
-
-Required properties:
-- compatible : Should be "fsl,imx21-ccm".
-- reg : Address and length of the register set.
-- interrupts : Should contain CCM interrupt.
-- #clock-cells: Should be <1>.
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx21-clock.h
-for the full list of i.MX21 clock IDs.
-
-Examples:
- clks: ccm@10027000{
- compatible = "fsl,imx21-ccm";
- reg = <0x10027000 0x800>;
- #clock-cells = <1>;
- };
-
- uart1: serial@1000a000 {
- compatible = "fsl,imx21-uart";
- reg = <0x1000a000 0x1000>;
- interrupts = <20>;
- clocks = <&clks IMX21_CLK_UART1_IPG_GATE>,
- <&clks IMX21_CLK_PER1>;
- clock-names = "ipg", "per";
- };
diff --git a/Documentation/devicetree/bindings/clock/imx21-clock.yaml b/Documentation/devicetree/bindings/clock/imx21-clock.yaml
new file mode 100644
index 000000000000..79cc843703ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx21-clock.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx21-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX21 Clock Controller
+
+maintainers:
+ - Alexander Shiyan <shc_work@mail.ru>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx21-clock.h
+ for the full list of i.MX21 clock IDs.
+
+properties:
+ compatible:
+ const: fsl,imx21-ccm
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx21-clock.h>
+
+ clock-controller@10027000 {
+ compatible = "fsl,imx21-ccm";
+ reg = <0x10027000 0x800>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.txt b/Documentation/devicetree/bindings/clock/imx23-clock.txt
deleted file mode 100644
index 8385348d3bd9..000000000000
--- a/Documentation/devicetree/bindings/clock/imx23-clock.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-* Clock bindings for Freescale i.MX23
-
-Required properties:
-- compatible: Should be "fsl,imx23-clkctrl"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of i.MX23
-clocks and IDs.
-
- Clock ID
- ------------------
- ref_xtal 0
- pll 1
- ref_cpu 2
- ref_emi 3
- ref_pix 4
- ref_io 5
- saif_sel 6
- lcdif_sel 7
- gpmi_sel 8
- ssp_sel 9
- emi_sel 10
- cpu 11
- etm_sel 12
- cpu_pll 13
- cpu_xtal 14
- hbus 15
- xbus 16
- lcdif_div 17
- ssp_div 18
- gpmi_div 19
- emi_pll 20
- emi_xtal 21
- etm_div 22
- saif_div 23
- clk32k_div 24
- rtc 25
- adc 26
- spdif_div 27
- clk32k 28
- dri 29
- pwm 30
- filt 31
- uart 32
- ssp 33
- gpmi 34
- spdif 35
- emi 36
- saif 37
- lcdif 38
- etm 39
- usb 40
- usb_phy 41
-
-Examples:
-
-clks: clkctrl@80040000 {
- compatible = "fsl,imx23-clkctrl";
- reg = <0x80040000 0x2000>;
- #clock-cells = <1>;
-};
-
-auart0: serial@8006c000 {
- compatible = "fsl,imx23-auart";
- reg = <0x8006c000 0x2000>;
- interrupts = <24 25 23>;
- clocks = <&clks 32>;
-};
diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.yaml b/Documentation/devicetree/bindings/clock/imx23-clock.yaml
new file mode 100644
index 000000000000..5e71c9219500
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx23-clock.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx23-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX23 Clock Controller
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. The following is a full list of i.MX23
+ clocks and IDs.
+
+ Clock ID
+ ------------------
+ ref_xtal 0
+ pll 1
+ ref_cpu 2
+ ref_emi 3
+ ref_pix 4
+ ref_io 5
+ saif_sel 6
+ lcdif_sel 7
+ gpmi_sel 8
+ ssp_sel 9
+ emi_sel 10
+ cpu 11
+ etm_sel 12
+ cpu_pll 13
+ cpu_xtal 14
+ hbus 15
+ xbus 16
+ lcdif_div 17
+ ssp_div 18
+ gpmi_div 19
+ emi_pll 20
+ emi_xtal 21
+ etm_div 22
+ saif_div 23
+ clk32k_div 24
+ rtc 25
+ adc 26
+ spdif_div 27
+ clk32k 28
+ dri 29
+ pwm 30
+ filt 31
+ uart 32
+ ssp 33
+ gpmi 34
+ spdif 35
+ emi 36
+ saif 37
+ lcdif 38
+ etm 39
+ usb 40
+ usb_phy 41
+
+properties:
+ compatible:
+ const: fsl,imx23-clkctrl
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@80040000 {
+ compatible = "fsl,imx23-clkctrl";
+ reg = <0x80040000 0x2000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx25-clock.txt b/Documentation/devicetree/bindings/clock/imx25-clock.txt
deleted file mode 100644
index f8135ea9ca4e..000000000000
--- a/Documentation/devicetree/bindings/clock/imx25-clock.txt
+++ /dev/null
@@ -1,160 +0,0 @@
-* Clock bindings for Freescale i.MX25
-
-Required properties:
-- compatible: Should be "fsl,imx25-ccm"
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of i.MX25
-clocks and IDs.
-
- Clock ID
- ---------------------------
- dummy 0
- osc 1
- mpll 2
- upll 3
- mpll_cpu_3_4 4
- cpu_sel 5
- cpu 6
- ahb 7
- usb_div 8
- ipg 9
- per0_sel 10
- per1_sel 11
- per2_sel 12
- per3_sel 13
- per4_sel 14
- per5_sel 15
- per6_sel 16
- per7_sel 17
- per8_sel 18
- per9_sel 19
- per10_sel 20
- per11_sel 21
- per12_sel 22
- per13_sel 23
- per14_sel 24
- per15_sel 25
- per0 26
- per1 27
- per2 28
- per3 29
- per4 30
- per5 31
- per6 32
- per7 33
- per8 34
- per9 35
- per10 36
- per11 37
- per12 38
- per13 39
- per14 40
- per15 41
- csi_ipg_per 42
- epit_ipg_per 43
- esai_ipg_per 44
- esdhc1_ipg_per 45
- esdhc2_ipg_per 46
- gpt_ipg_per 47
- i2c_ipg_per 48
- lcdc_ipg_per 49
- nfc_ipg_per 50
- owire_ipg_per 51
- pwm_ipg_per 52
- sim1_ipg_per 53
- sim2_ipg_per 54
- ssi1_ipg_per 55
- ssi2_ipg_per 56
- uart_ipg_per 57
- ata_ahb 58
- reserved 59
- csi_ahb 60
- emi_ahb 61
- esai_ahb 62
- esdhc1_ahb 63
- esdhc2_ahb 64
- fec_ahb 65
- lcdc_ahb 66
- rtic_ahb 67
- sdma_ahb 68
- slcdc_ahb 69
- usbotg_ahb 70
- reserved 71
- reserved 72
- reserved 73
- reserved 74
- can1_ipg 75
- can2_ipg 76
- csi_ipg 77
- cspi1_ipg 78
- cspi2_ipg 79
- cspi3_ipg 80
- dryice_ipg 81
- ect_ipg 82
- epit1_ipg 83
- epit2_ipg 84
- reserved 85
- esdhc1_ipg 86
- esdhc2_ipg 87
- fec_ipg 88
- reserved 89
- reserved 90
- reserved 91
- gpt1_ipg 92
- gpt2_ipg 93
- gpt3_ipg 94
- gpt4_ipg 95
- reserved 96
- reserved 97
- reserved 98
- iim_ipg 99
- reserved 100
- reserved 101
- kpp_ipg 102
- lcdc_ipg 103
- reserved 104
- pwm1_ipg 105
- pwm2_ipg 106
- pwm3_ipg 107
- pwm4_ipg 108
- rngb_ipg 109
- reserved 110
- scc_ipg 111
- sdma_ipg 112
- sim1_ipg 113
- sim2_ipg 114
- slcdc_ipg 115
- spba_ipg 116
- ssi1_ipg 117
- ssi2_ipg 118
- tsc_ipg 119
- uart1_ipg 120
- uart2_ipg 121
- uart3_ipg 122
- uart4_ipg 123
- uart5_ipg 124
- reserved 125
- wdt_ipg 126
- cko_div 127
- cko_sel 128
- cko 129
-
-Examples:
-
-clks: ccm@53f80000 {
- compatible = "fsl,imx25-ccm";
- reg = <0x53f80000 0x4000>;
- interrupts = <31>;
-};
-
-uart1: serial@43f90000 {
- compatible = "fsl,imx25-uart", "fsl,imx21-uart";
- reg = <0x43f90000 0x4000>;
- interrupts = <45>;
- clocks = <&clks 79>, <&clks 50>;
- clock-names = "ipg", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx25-clock.yaml b/Documentation/devicetree/bindings/clock/imx25-clock.yaml
new file mode 100644
index 000000000000..c626a158590e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx25-clock.yaml
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx25-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX25 Clock Controller
+
+maintainers:
+ - Sascha Hauer <s.hauer@pengutronix.de>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. The following is a full list of i.MX25
+ clocks and IDs.
+
+ Clock ID
+ --------------------------
+ dummy 0
+ osc 1
+ mpll 2
+ upll 3
+ mpll_cpu_3_4 4
+ cpu_sel 5
+ cpu 6
+ ahb 7
+ usb_div 8
+ ipg 9
+ per0_sel 10
+ per1_sel 11
+ per2_sel 12
+ per3_sel 13
+ per4_sel 14
+ per5_sel 15
+ per6_sel 16
+ per7_sel 17
+ per8_sel 18
+ per9_sel 19
+ per10_sel 20
+ per11_sel 21
+ per12_sel 22
+ per13_sel 23
+ per14_sel 24
+ per15_sel 25
+ per0 26
+ per1 27
+ per2 28
+ per3 29
+ per4 30
+ per5 31
+ per6 32
+ per7 33
+ per8 34
+ per9 35
+ per10 36
+ per11 37
+ per12 38
+ per13 39
+ per14 40
+ per15 41
+ csi_ipg_per 42
+ epit_ipg_per 43
+ esai_ipg_per 44
+ esdhc1_ipg_per 45
+ esdhc2_ipg_per 46
+ gpt_ipg_per 47
+ i2c_ipg_per 48
+ lcdc_ipg_per 49
+ nfc_ipg_per 50
+ owire_ipg_per 51
+ pwm_ipg_per 52
+ sim1_ipg_per 53
+ sim2_ipg_per 54
+ ssi1_ipg_per 55
+ ssi2_ipg_per 56
+ uart_ipg_per 57
+ ata_ahb 58
+ reserved 59
+ csi_ahb 60
+ emi_ahb 61
+ esai_ahb 62
+ esdhc1_ahb 63
+ esdhc2_ahb 64
+ fec_ahb 65
+ lcdc_ahb 66
+ rtic_ahb 67
+ sdma_ahb 68
+ slcdc_ahb 69
+ usbotg_ahb 70
+ reserved 71
+ reserved 72
+ reserved 73
+ reserved 74
+ can1_ipg 75
+ can2_ipg 76
+ csi_ipg 77
+ cspi1_ipg 78
+ cspi2_ipg 79
+ cspi3_ipg 80
+ dryice_ipg 81
+ ect_ipg 82
+ epit1_ipg 83
+ epit2_ipg 84
+ reserved 85
+ esdhc1_ipg 86
+ esdhc2_ipg 87
+ fec_ipg 88
+ reserved 89
+ reserved 90
+ reserved 91
+ gpt1_ipg 92
+ gpt2_ipg 93
+ gpt3_ipg 94
+ gpt4_ipg 95
+ reserved 96
+ reserved 97
+ reserved 98
+ iim_ipg 99
+ reserved 100
+ reserved 101
+ kpp_ipg 102
+ lcdc_ipg 103
+ reserved 104
+ pwm1_ipg 105
+ pwm2_ipg 106
+ pwm3_ipg 107
+ pwm4_ipg 108
+ rngb_ipg 109
+ reserved 110
+ scc_ipg 111
+ sdma_ipg 112
+ sim1_ipg 113
+ sim2_ipg 114
+ slcdc_ipg 115
+ spba_ipg 116
+ ssi1_ipg 117
+ ssi2_ipg 118
+ tsc_ipg 119
+ uart1_ipg 120
+ uart2_ipg 121
+ uart3_ipg 122
+ uart4_ipg 123
+ uart5_ipg 124
+ reserved 125
+ wdt_ipg 126
+ cko_div 127
+ cko_sel 128
+ cko 129
+
+properties:
+ compatible:
+ const: fsl,imx25-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@53f80000 {
+ compatible = "fsl,imx25-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <31>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.txt b/Documentation/devicetree/bindings/clock/imx27-clock.txt
deleted file mode 100644
index 4c95c048d3b2..000000000000
--- a/Documentation/devicetree/bindings/clock/imx27-clock.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Clock bindings for Freescale i.MX27
-
-Required properties:
-- compatible: Should be "fsl,imx27-ccm"
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx27-clock.h
-for the full list of i.MX27 clock IDs.
-
-Examples:
- clks: ccm@10027000{
- compatible = "fsl,imx27-ccm";
- reg = <0x10027000 0x1000>;
- #clock-cells = <1>;
- };
-
- uart1: serial@1000a000 {
- compatible = "fsl,imx27-uart", "fsl,imx21-uart";
- reg = <0x1000a000 0x1000>;
- interrupts = <20>;
- clocks = <&clks IMX27_CLK_UART1_IPG_GATE>,
- <&clks IMX27_CLK_PER1_GATE>;
- clock-names = "ipg", "per";
- };
diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.yaml b/Documentation/devicetree/bindings/clock/imx27-clock.yaml
new file mode 100644
index 000000000000..71d78a0b551f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx27-clock.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx27-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX27 Clock Controller
+
+maintainers:
+ - Fabio Estevam <festevam@gmail.com>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx27-clock.h
+ for the full list of i.MX27 clock IDs.
+
+properties:
+ compatible:
+ const: fsl,imx27-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx27-clock.h>
+
+ clock-controller@10027000 {
+ compatible = "fsl,imx27-ccm";
+ reg = <0x10027000 0x1000>;
+ interrupts = <31>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.txt b/Documentation/devicetree/bindings/clock/imx28-clock.txt
deleted file mode 100644
index d84a37d2885f..000000000000
--- a/Documentation/devicetree/bindings/clock/imx28-clock.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-* Clock bindings for Freescale i.MX28
-
-Required properties:
-- compatible: Should be "fsl,imx28-clkctrl"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of i.MX28
-clocks and IDs.
-
- Clock ID
- ------------------
- ref_xtal 0
- pll0 1
- pll1 2
- pll2 3
- ref_cpu 4
- ref_emi 5
- ref_io0 6
- ref_io1 7
- ref_pix 8
- ref_hsadc 9
- ref_gpmi 10
- saif0_sel 11
- saif1_sel 12
- gpmi_sel 13
- ssp0_sel 14
- ssp1_sel 15
- ssp2_sel 16
- ssp3_sel 17
- emi_sel 18
- etm_sel 19
- lcdif_sel 20
- cpu 21
- ptp_sel 22
- cpu_pll 23
- cpu_xtal 24
- hbus 25
- xbus 26
- ssp0_div 27
- ssp1_div 28
- ssp2_div 29
- ssp3_div 30
- gpmi_div 31
- emi_pll 32
- emi_xtal 33
- lcdif_div 34
- etm_div 35
- ptp 36
- saif0_div 37
- saif1_div 38
- clk32k_div 39
- rtc 40
- lradc 41
- spdif_div 42
- clk32k 43
- pwm 44
- uart 45
- ssp0 46
- ssp1 47
- ssp2 48
- ssp3 49
- gpmi 50
- spdif 51
- emi 52
- saif0 53
- saif1 54
- lcdif 55
- etm 56
- fec 57
- can0 58
- can1 59
- usb0 60
- usb1 61
- usb0_phy 62
- usb1_phy 63
- enet_out 64
-
-Examples:
-
-clks: clkctrl@80040000 {
- compatible = "fsl,imx28-clkctrl";
- reg = <0x80040000 0x2000>;
- #clock-cells = <1>;
-};
-
-auart0: serial@8006a000 {
- compatible = "fsl,imx28-auart", "fsl,imx23-auart";
- reg = <0x8006a000 0x2000>;
- interrupts = <112 70 71>;
- clocks = <&clks 45>;
-};
diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.yaml b/Documentation/devicetree/bindings/clock/imx28-clock.yaml
new file mode 100644
index 000000000000..4aaad7b9c66e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx28-clock.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx28-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX28 Clock Controller
+
+maintainers:
+ - Shawn Guo <shawnguo@kernel.org>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. The following is a full list of i.MX28
+ clocks and IDs.
+
+ Clock ID
+ ------------------
+ ref_xtal 0
+ pll0 1
+ pll1 2
+ pll2 3
+ ref_cpu 4
+ ref_emi 5
+ ref_io0 6
+ ref_io1 7
+ ref_pix 8
+ ref_hsadc 9
+ ref_gpmi 10
+ saif0_sel 11
+ saif1_sel 12
+ gpmi_sel 13
+ ssp0_sel 14
+ ssp1_sel 15
+ ssp2_sel 16
+ ssp3_sel 17
+ emi_sel 18
+ etm_sel 19
+ lcdif_sel 20
+ cpu 21
+ ptp_sel 22
+ cpu_pll 23
+ cpu_xtal 24
+ hbus 25
+ xbus 26
+ ssp0_div 27
+ ssp1_div 28
+ ssp2_div 29
+ ssp3_div 30
+ gpmi_div 31
+ emi_pll 32
+ emi_xtal 33
+ lcdif_div 34
+ etm_div 35
+ ptp 36
+ saif0_div 37
+ saif1_div 38
+ clk32k_div 39
+ rtc 40
+ lradc 41
+ spdif_div 42
+ clk32k 43
+ pwm 44
+ uart 45
+ ssp0 46
+ ssp1 47
+ ssp2 48
+ ssp3 49
+ gpmi 50
+ spdif 51
+ emi 52
+ saif0 53
+ saif1 54
+ lcdif 55
+ etm 56
+ fec 57
+ can0 58
+ can1 59
+ usb0 60
+ usb1 61
+ usb0_phy 62
+ usb1_phy 63
+ enet_out 64
+
+properties:
+ compatible:
+ const: fsl,imx28-clkctrl
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@80040000 {
+ compatible = "fsl,imx28-clkctrl";
+ reg = <0x80040000 0x2000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.txt b/Documentation/devicetree/bindings/clock/imx31-clock.txt
deleted file mode 100644
index 0a291090e562..000000000000
--- a/Documentation/devicetree/bindings/clock/imx31-clock.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-* Clock bindings for Freescale i.MX31
-
-Required properties:
-- compatible: Should be "fsl,imx31-ccm"
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of i.MX31
-clocks and IDs.
-
- Clock ID
- -----------------------
- dummy 0
- ckih 1
- ckil 2
- mpll 3
- spll 4
- upll 5
- mcu_main 6
- hsp 7
- ahb 8
- nfc 9
- ipg 10
- per_div 11
- per 12
- csi_sel 13
- fir_sel 14
- csi_div 15
- usb_div_pre 16
- usb_div_post 17
- fir_div_pre 18
- fir_div_post 19
- sdhc1_gate 20
- sdhc2_gate 21
- gpt_gate 22
- epit1_gate 23
- epit2_gate 24
- iim_gate 25
- ata_gate 26
- sdma_gate 27
- cspi3_gate 28
- rng_gate 29
- uart1_gate 30
- uart2_gate 31
- ssi1_gate 32
- i2c1_gate 33
- i2c2_gate 34
- i2c3_gate 35
- hantro_gate 36
- mstick1_gate 37
- mstick2_gate 38
- csi_gate 39
- rtc_gate 40
- wdog_gate 41
- pwm_gate 42
- sim_gate 43
- ect_gate 44
- usb_gate 45
- kpp_gate 46
- ipu_gate 47
- uart3_gate 48
- uart4_gate 49
- uart5_gate 50
- owire_gate 51
- ssi2_gate 52
- cspi1_gate 53
- cspi2_gate 54
- gacc_gate 55
- emi_gate 56
- rtic_gate 57
- firi_gate 58
-
-Examples:
-
-clks: ccm@53f80000{
- compatible = "fsl,imx31-ccm";
- reg = <0x53f80000 0x4000>;
- interrupts = <31>, <53>;
- #clock-cells = <1>;
-};
-
-uart1: serial@43f90000 {
- compatible = "fsl,imx31-uart", "fsl,imx21-uart";
- reg = <0x43f90000 0x4000>;
- interrupts = <45>;
- clocks = <&clks 10>, <&clks 30>;
- clock-names = "ipg", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.yaml b/Documentation/devicetree/bindings/clock/imx31-clock.yaml
new file mode 100644
index 000000000000..50a8498eef8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx31-clock.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx31-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX31 Clock Controller
+
+maintainers:
+ - Fabio Estevam <festevam@gmail.com>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. The following is a full list of i.MX31
+ clocks and IDs.
+
+ Clock ID
+ -----------------------
+ dummy 0
+ ckih 1
+ ckil 2
+ mpll 3
+ spll 4
+ upll 5
+ mcu_main 6
+ hsp 7
+ ahb 8
+ nfc 9
+ ipg 10
+ per_div 11
+ per 12
+ csi_sel 13
+ fir_sel 14
+ csi_div 15
+ usb_div_pre 16
+ usb_div_post 17
+ fir_div_pre 18
+ fir_div_post 19
+ sdhc1_gate 20
+ sdhc2_gate 21
+ gpt_gate 22
+ epit1_gate 23
+ epit2_gate 24
+ iim_gate 25
+ ata_gate 26
+ sdma_gate 27
+ cspi3_gate 28
+ rng_gate 29
+ uart1_gate 30
+ uart2_gate 31
+ ssi1_gate 32
+ i2c1_gate 33
+ i2c2_gate 34
+ i2c3_gate 35
+ hantro_gate 36
+ mstick1_gate 37
+ mstick2_gate 38
+ csi_gate 39
+ rtc_gate 40
+ wdog_gate 41
+ pwm_gate 42
+ sim_gate 43
+ ect_gate 44
+ usb_gate 45
+ kpp_gate 46
+ ipu_gate 47
+ uart3_gate 48
+ uart4_gate 49
+ uart5_gate 50
+ owire_gate 51
+ ssi2_gate 52
+ cspi1_gate 53
+ cspi2_gate 54
+ gacc_gate 55
+ emi_gate 56
+ rtic_gate 57
+ firi_gate 58
+
+properties:
+ compatible:
+ const: fsl,imx31-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for DVFS when a frequency change is requested, request 2 is
+ to generate interrupt for DPTC when a voltage change is requested.
+ items:
+ - description: CCM DVFS interrupt request 1
+ - description: CCM DPTC interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@53f80000 {
+ compatible = "fsl,imx31-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <31>, <53>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.txt b/Documentation/devicetree/bindings/clock/imx35-clock.txt
deleted file mode 100644
index f49783213c56..000000000000
--- a/Documentation/devicetree/bindings/clock/imx35-clock.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-* Clock bindings for Freescale i.MX35
-
-Required properties:
-- compatible: Should be "fsl,imx35-ccm"
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of i.MX35
-clocks and IDs.
-
- Clock ID
- ---------------------------
- ckih 0
- mpll 1
- ppll 2
- mpll_075 3
- arm 4
- hsp 5
- hsp_div 6
- hsp_sel 7
- ahb 8
- ipg 9
- arm_per_div 10
- ahb_per_div 11
- ipg_per 12
- uart_sel 13
- uart_div 14
- esdhc_sel 15
- esdhc1_div 16
- esdhc2_div 17
- esdhc3_div 18
- spdif_sel 19
- spdif_div_pre 20
- spdif_div_post 21
- ssi_sel 22
- ssi1_div_pre 23
- ssi1_div_post 24
- ssi2_div_pre 25
- ssi2_div_post 26
- usb_sel 27
- usb_div 28
- nfc_div 29
- asrc_gate 30
- pata_gate 31
- audmux_gate 32
- can1_gate 33
- can2_gate 34
- cspi1_gate 35
- cspi2_gate 36
- ect_gate 37
- edio_gate 38
- emi_gate 39
- epit1_gate 40
- epit2_gate 41
- esai_gate 42
- esdhc1_gate 43
- esdhc2_gate 44
- esdhc3_gate 45
- fec_gate 46
- gpio1_gate 47
- gpio2_gate 48
- gpio3_gate 49
- gpt_gate 50
- i2c1_gate 51
- i2c2_gate 52
- i2c3_gate 53
- iomuxc_gate 54
- ipu_gate 55
- kpp_gate 56
- mlb_gate 57
- mshc_gate 58
- owire_gate 59
- pwm_gate 60
- rngc_gate 61
- rtc_gate 62
- rtic_gate 63
- scc_gate 64
- sdma_gate 65
- spba_gate 66
- spdif_gate 67
- ssi1_gate 68
- ssi2_gate 69
- uart1_gate 70
- uart2_gate 71
- uart3_gate 72
- usbotg_gate 73
- wdog_gate 74
- max_gate 75
- admux_gate 76
- csi_gate 77
- csi_div 78
- csi_sel 79
- iim_gate 80
- gpu2d_gate 81
- ckli_gate 82
-
-Examples:
-
-clks: ccm@53f80000 {
- compatible = "fsl,imx35-ccm";
- reg = <0x53f80000 0x4000>;
- interrupts = <31>;
- #clock-cells = <1>;
-};
-
-esdhc1: esdhc@53fb4000 {
- compatible = "fsl,imx35-esdhc";
- reg = <0x53fb4000 0x4000>;
- interrupts = <7>;
- clocks = <&clks 9>, <&clks 8>, <&clks 43>;
- clock-names = "ipg", "ahb", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.yaml b/Documentation/devicetree/bindings/clock/imx35-clock.yaml
new file mode 100644
index 000000000000..c063369de3ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx35-clock.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx35-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX35 Clock Controller
+
+maintainers:
+ - Steffen Trumtrar <s.trumtrar@pengutronix.de>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. The following is a full list of i.MX35
+ clocks and IDs.
+
+ Clock ID
+ ---------------------------
+ ckih 0
+ mpll 1
+ ppll 2
+ mpll_075 3
+ arm 4
+ hsp 5
+ hsp_div 6
+ hsp_sel 7
+ ahb 8
+ ipg 9
+ arm_per_div 10
+ ahb_per_div 11
+ ipg_per 12
+ uart_sel 13
+ uart_div 14
+ esdhc_sel 15
+ esdhc1_div 16
+ esdhc2_div 17
+ esdhc3_div 18
+ spdif_sel 19
+ spdif_div_pre 20
+ spdif_div_post 21
+ ssi_sel 22
+ ssi1_div_pre 23
+ ssi1_div_post 24
+ ssi2_div_pre 25
+ ssi2_div_post 26
+ usb_sel 27
+ usb_div 28
+ nfc_div 29
+ asrc_gate 30
+ pata_gate 31
+ audmux_gate 32
+ can1_gate 33
+ can2_gate 34
+ cspi1_gate 35
+ cspi2_gate 36
+ ect_gate 37
+ edio_gate 38
+ emi_gate 39
+ epit1_gate 40
+ epit2_gate 41
+ esai_gate 42
+ esdhc1_gate 43
+ esdhc2_gate 44
+ esdhc3_gate 45
+ fec_gate 46
+ gpio1_gate 47
+ gpio2_gate 48
+ gpio3_gate 49
+ gpt_gate 50
+ i2c1_gate 51
+ i2c2_gate 52
+ i2c3_gate 53
+ iomuxc_gate 54
+ ipu_gate 55
+ kpp_gate 56
+ mlb_gate 57
+ mshc_gate 58
+ owire_gate 59
+ pwm_gate 60
+ rngc_gate 61
+ rtc_gate 62
+ rtic_gate 63
+ scc_gate 64
+ sdma_gate 65
+ spba_gate 66
+ spdif_gate 67
+ ssi1_gate 68
+ ssi2_gate 69
+ uart1_gate 70
+ uart2_gate 71
+ uart3_gate 72
+ usbotg_gate 73
+ wdog_gate 74
+ max_gate 75
+ admux_gate 76
+ csi_gate 77
+ csi_div 78
+ csi_sel 79
+ iim_gate 80
+ gpu2d_gate 81
+ ckli_gate 82
+
+properties:
+ compatible:
+ const: fsl,imx35-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@53f80000 {
+ compatible = "fsl,imx35-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <31>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.txt b/Documentation/devicetree/bindings/clock/imx5-clock.txt
deleted file mode 100644
index a24ca9e582d2..000000000000
--- a/Documentation/devicetree/bindings/clock/imx5-clock.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Clock bindings for Freescale i.MX5
-
-Required properties:
-- compatible: Should be "fsl,<soc>-ccm" , where <soc> can be imx51 or imx53
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx5-clock.h
-for the full list of i.MX5 clock IDs.
-
-Examples (for mx53):
-
-clks: ccm@53fd4000{
- compatible = "fsl,imx53-ccm";
- reg = <0x53fd4000 0x4000>;
- interrupts = <0 71 0x04 0 72 0x04>;
- #clock-cells = <1>;
-};
-
-can1: can@53fc8000 {
- compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
- reg = <0x53fc8000 0x4000>;
- interrupts = <82>;
- clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, <&clks IMX5_CLK_CAN1_SERIAL_GATE>;
- clock-names = "ipg", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.yaml b/Documentation/devicetree/bindings/clock/imx5-clock.yaml
new file mode 100644
index 000000000000..423c0142c1d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx5-clock.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx5-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX5 Clock Controller
+
+maintainers:
+ - Fabio Estevam <festevam@gmail.com>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx5-clock.h
+ for the full list of i.MX5 clock IDs.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx53-ccm
+ - fsl,imx51-ccm
+ - fsl,imx50-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx5-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@53fd4000{
+ compatible = "fsl,imx53-ccm";
+ reg = <0x53fd4000 0x4000>;
+ interrupts = <0 71 IRQ_TYPE_LEVEL_HIGH>,
+ <0 72 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.txt b/Documentation/devicetree/bindings/clock/imx6q-clock.txt
deleted file mode 100644
index e1308346e00d..000000000000
--- a/Documentation/devicetree/bindings/clock/imx6q-clock.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Clock bindings for Freescale i.MX6 Quad
-
-Required properties:
-- compatible: Should be "fsl,imx6q-ccm"
-- reg: Address and length of the register set
-- interrupts: Should contain CCM interrupt
-- #clock-cells: Should be <1>
-
-Optional properties:
-- fsl,pmic-stby-poweroff: Configure CCM to assert PMIC_STBY_REQ signal
- on power off.
- Use this property if the SoC should be powered off by external power
- management IC (PMIC) triggered via PMIC_STBY_REQ signal.
- Boards that are designed to initiate poweroff on PMIC_ON_REQ signal should
- be using "syscon-poweroff" driver instead.
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6qdl-clock.h
-for the full list of i.MX6 Quad and DualLite clock IDs.
-
-Examples:
-
-#include <dt-bindings/clock/imx6qdl-clock.h>
-
-clks: ccm@20c4000 {
- compatible = "fsl,imx6q-ccm";
- reg = <0x020c4000 0x4000>;
- interrupts = <0 87 0x04 0 88 0x04>;
- #clock-cells = <1>;
-};
-
-uart1: serial@2020000 {
- compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
- reg = <0x02020000 0x4000>;
- interrupts = <0 26 0x04>;
- clocks = <&clks IMX6QDL_CLK_UART_IPG>, <&clks IMX6QDL_CLK_UART_SERIAL>;
- clock-names = "ipg", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.yaml b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml
new file mode 100644
index 000000000000..cd3c04c883df
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx6q-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 Quad Clock Controller
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6q-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: 24m osc
+ - description: 32k osc
+ - description: ckih1 clock input
+ - description: anaclk1 clock input
+ - description: anaclk2 clock input
+
+ clock-names:
+ items:
+ - const: osc
+ - const: ckil
+ - const: ckih1
+ - const: anaclk1
+ - const: anaclk2
+
+ fsl,pmic-stby-poweroff:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: |
+ Use this property if the SoC should be powered off by external power
+ management IC (PMIC) triggered via PMIC_STBY_REQ signal.
+ Boards that are designed to initiate poweroff on PMIC_ON_REQ signal should
+ be using "syscon-poweroff" driver instead.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@20c4000 {
+ compatible = "fsl,imx6q-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <0 87 IRQ_TYPE_LEVEL_HIGH>,
+ <0 88 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx6sl-clock.txt b/Documentation/devicetree/bindings/clock/imx6sl-clock.txt
deleted file mode 100644
index 15e40bdf147d..000000000000
--- a/Documentation/devicetree/bindings/clock/imx6sl-clock.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-* Clock bindings for Freescale i.MX6 SoloLite
-
-Required properties:
-- compatible: Should be "fsl,imx6sl-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6sl-clock.h
-for the full list of i.MX6 SoloLite clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml
new file mode 100644
index 000000000000..6713bbb14f30
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx6sl-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 SoloLite Clock Controller
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6sl-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@20c4000 {
+ compatible = "fsl,imx6sl-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <0 87 IRQ_TYPE_LEVEL_HIGH>,
+ <0 88 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx6sll-clock.txt b/Documentation/devicetree/bindings/clock/imx6sll-clock.txt
deleted file mode 100644
index fee849d5fdd1..000000000000
--- a/Documentation/devicetree/bindings/clock/imx6sll-clock.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-* Clock bindings for Freescale i.MX6 SLL
-
-Required properties:
-- compatible: Should be "fsl,imx6sll-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-- clocks: list of clock specifiers, must contain an entry for each required
- entry in clock-names
-- clock-names: should include entries "ckil", "osc", "ipp_di0" and "ipp_di1"
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6sll-clock.h
-for the full list of i.MX6 SLL clock IDs.
-
-Examples:
-
-#include <dt-bindings/clock/imx6sll-clock.h>
-
-clks: clock-controller@20c4000 {
- compatible = "fsl,imx6sll-ccm";
- reg = <0x020c4000 0x4000>;
- interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
- #clock-cells = <1>;
- clocks = <&ckil>, <&osc>, <&ipp_di0>, <&ipp_di1>;
- clock-names = "ckil", "osc", "ipp_di0", "ipp_di1";
-};
-
-uart1: serial@2020000 {
- compatible = "fsl,imx6sl-uart", "fsl,imx6q-uart", "fsl,imx21-uart";
- reg = <0x02020000 0x4000>;
- interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX6SLL_CLK_UART1_IPG>,
- <&clks IMX6SLL_CLK_UART1_SERIAL>;
- clock-names = "ipg", "per";
-};
diff --git a/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml
new file mode 100644
index 000000000000..6d64cf9463c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx6sll-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 SLL Clock Controller
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6sll-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 24m osc
+ - description: ipp_di0 clock input
+ - description: ipp_di1 clock input
+
+ clock-names:
+ items:
+ - const: ckil
+ - const: osc
+ - const: ipp_di0
+ - const: ipp_di1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@20c4000 {
+ compatible = "fsl,imx6sll-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ clocks = <&ckil>, <&osc>, <&ipp_di0>, <&ipp_di1>;
+ clock-names = "ckil", "osc", "ipp_di0", "ipp_di1";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx6sx-clock.txt b/Documentation/devicetree/bindings/clock/imx6sx-clock.txt
deleted file mode 100644
index 22362b9b7ba3..000000000000
--- a/Documentation/devicetree/bindings/clock/imx6sx-clock.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-* Clock bindings for Freescale i.MX6 SoloX
-
-Required properties:
-- compatible: Should be "fsl,imx6sx-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-- clocks: list of clock specifiers, must contain an entry for each required
- entry in clock-names
-- clock-names: should include entries "ckil", "osc", "ipp_di0" and "ipp_di1"
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6sx-clock.h
-for the full list of i.MX6 SoloX clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml
new file mode 100644
index 000000000000..77afa4b81cf7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx6sx-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 SoloX Clock Controller
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6sx-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 24m osc
+ - description: ipp_di0 clock input
+ - description: ipp_di1 clock input
+ - description: anaclk1 clock input
+ - description: anaclk2 clock input
+
+ clock-names:
+ items:
+ - const: ckil
+ - const: osc
+ - const: ipp_di0
+ - const: ipp_di1
+ - const: anaclk1
+ - const: anaclk2
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@20c4000 {
+ compatible = "fsl,imx6sx-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ clocks = <&ckil>, <&osc>, <&ipp_di0>, <&ipp_di1>, <&anaclk1>, <&anaclk2>;
+ clock-names = "ckil", "osc", "ipp_di0", "ipp_di1", "anaclk1", "anaclk2";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx6ul-clock.txt b/Documentation/devicetree/bindings/clock/imx6ul-clock.txt
deleted file mode 100644
index 571d5039f663..000000000000
--- a/Documentation/devicetree/bindings/clock/imx6ul-clock.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-* Clock bindings for Freescale i.MX6 UltraLite
-
-Required properties:
-- compatible: Should be "fsl,imx6ul-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-- clocks: list of clock specifiers, must contain an entry for each required
- entry in clock-names
-- clock-names: should include entries "ckil", "osc", "ipp_di0" and "ipp_di1"
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6ul-clock.h
-for the full list of i.MX6 UltraLite clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml
new file mode 100644
index 000000000000..d57e18a210cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx6ul-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 UltraLite Clock Controller
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6ul-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: CCM provides 2 interrupt requests, request 1 is to generate
+ interrupt for frequency or mux change, request 2 is to generate
+ interrupt for oscillator read or PLL lock.
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 24m osc
+ - description: ipp_di0 clock input
+ - description: ipp_di1 clock input
+
+ clock-names:
+ items:
+ - const: ckil
+ - const: osc
+ - const: ipp_di0
+ - const: ipp_di1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@20c4000 {
+ compatible = "fsl,imx6ul-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ clocks = <&ckil>, <&osc>, <&ipp_di0>, <&ipp_di1>;
+ clock-names = "ckil", "osc", "ipp_di0", "ipp_di1";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx7d-clock.txt b/Documentation/devicetree/bindings/clock/imx7d-clock.txt
deleted file mode 100644
index 9d3026d81a68..000000000000
--- a/Documentation/devicetree/bindings/clock/imx7d-clock.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-* Clock bindings for Freescale i.MX7 Dual
-
-Required properties:
-- compatible: Should be "fsl,imx7d-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-- clocks: list of clock specifiers, must contain an entry for each required
- entry in clock-names
-- clock-names: should include entries "ckil", "osc"
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx7d-clock.h
-for the full list of i.MX7 Dual clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/imx7d-clock.yaml b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml
new file mode 100644
index 000000000000..880d602d09f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx7d-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX7 Dual Clock Controller
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx7d-clock.h
+ for the full list of i.MX7 Dual clock IDs.
+
+properties:
+ compatible:
+ const: fsl,imx7d-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: CCM interrupt request 1
+ - description: CCM interrupt request 2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 24m osc
+
+ clock-names:
+ items:
+ - const: ckil
+ - const: osc
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@30380000 {
+ compatible = "fsl,imx7d-ccm";
+ reg = <0x30380000 0x10000>;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <1>;
+ clocks = <&ckil>, <&osc>;
+ clock-names = "ckil", "osc";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml
new file mode 100644
index 000000000000..76842038f52e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx7ulp-pcc-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX7ULP Peripheral Clock Control (PCC) modules Clock Controller
+
+maintainers:
+ - A.s. Dong <aisheng.dong@nxp.com>
+
+description: |
+ i.MX7ULP Clock functions are under joint control of the System
+ Clock Generation (SCG) modules, Peripheral Clock Control (PCC)
+ modules, and Core Mode Controller (CMC)1 blocks
+
+ The clocking scheme provides clear separation between M4 domain
+ and A7 domain. Except for a few clock sources shared between two
+ domains, such as the System Oscillator clock, the Slow IRC (SIRC),
+ and and the Fast IRC clock (FIRCLK), clock sources and clock
+ management are separated and contained within each domain.
+
+ M4 clock management consists of SCG0, PCC0, PCC1, and CMC0 modules.
+ A7 clock management consists of SCG1, PCC2, PCC3, and CMC1 modules.
+
+ Note: this binding doc is only for A7 clock domain.
+
+ The Peripheral Clock Control (PCC) is responsible for clock selection,
+ optional division and clock gating mode for peripherals in their
+ respected power domain.
+
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/imx7ulp-clock.h for the full list of
+ i.MX7ULP clock IDs of each module.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx7ulp-pcc2
+ - fsl,imx7ulp-pcc3
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: nic1 bus clock
+ - description: nic1 clock
+ - description: ddr clock
+ - description: apll pfd2
+ - description: apll pfd1
+ - description: apll pfd0
+ - description: usb pll
+ - description: system osc bus clock
+ - description: fast internal reference clock bus
+ - description: rtc osc
+ - description: system pll bus clock
+
+ clock-names:
+ items:
+ - const: nic1_bus_clk
+ - const: nic1_clk
+ - const: ddr_clk
+ - const: apll_pfd2
+ - const: apll_pfd1
+ - const: apll_pfd0
+ - const: upll
+ - const: sosc_bus_clk
+ - const: firc_bus_clk
+ - const: rosc
+ - const: spll_bus_clk
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx7ulp-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@403f0000 {
+ compatible = "fsl,imx7ulp-pcc2";
+ reg = <0x403f0000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&scg1 IMX7ULP_CLK_NIC1_BUS_DIV>,
+ <&scg1 IMX7ULP_CLK_NIC1_DIV>,
+ <&scg1 IMX7ULP_CLK_DDR_DIV>,
+ <&scg1 IMX7ULP_CLK_APLL_PFD2>,
+ <&scg1 IMX7ULP_CLK_APLL_PFD1>,
+ <&scg1 IMX7ULP_CLK_APLL_PFD0>,
+ <&scg1 IMX7ULP_CLK_UPLL>,
+ <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>,
+ <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>,
+ <&scg1 IMX7ULP_CLK_ROSC>,
+ <&scg1 IMX7ULP_CLK_SPLL_BUS_CLK>;
+ clock-names = "nic1_bus_clk", "nic1_clk", "ddr_clk",
+ "apll_pfd2", "apll_pfd1", "apll_pfd0",
+ "upll", "sosc_bus_clk", "firc_bus_clk",
+ "rosc", "spll_bus_clk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml
new file mode 100644
index 000000000000..5e25bc6d1372
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx7ulp-scg-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX7ULP System Clock Generation (SCG) modules Clock Controller
+
+maintainers:
+ - A.s. Dong <aisheng.dong@nxp.com>
+
+description: |
+ i.MX7ULP Clock functions are under joint control of the System
+ Clock Generation (SCG) modules, Peripheral Clock Control (PCC)
+ modules, and Core Mode Controller (CMC)1 blocks
+
+ The clocking scheme provides clear separation between M4 domain
+ and A7 domain. Except for a few clock sources shared between two
+ domains, such as the System Oscillator clock, the Slow IRC (SIRC),
+ and and the Fast IRC clock (FIRCLK), clock sources and clock
+ management are separated and contained within each domain.
+
+ M4 clock management consists of SCG0, PCC0, PCC1, and CMC0 modules.
+ A7 clock management consists of SCG1, PCC2, PCC3, and CMC1 modules.
+
+ Note: this binding doc is only for A7 clock domain.
+
+ The System Clock Generation (SCG) is responsible for clock generation
+ and distribution across this device. Functions performed by the SCG
+ include: clock reference selection, generation of clock used to derive
+ processor, system, peripheral bus and external memory interface clocks,
+ source selection for peripheral clocks and control of power saving
+ clock gating mode.
+
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/imx7ulp-clock.h for the full list of
+ i.MX7ULP clock IDs of each module.
+
+properties:
+ compatible:
+ const: fsl,imx7ulp-scg1
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: rtc osc
+ - description: system osc
+ - description: slow internal reference clock
+ - description: fast internal reference clock
+ - description: usb PLL
+
+ clock-names:
+ items:
+ - const: rosc
+ - const: sosc
+ - const: sirc
+ - const: firc
+ - const: upll
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx7ulp-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ clock-controller@403e0000 {
+ compatible = "fsl,imx7ulp-scg1";
+ reg = <0x403e0000 0x10000>;
+ clocks = <&rosc>, <&sosc>, <&sirc>,
+ <&firc>, <&upll>;
+ clock-names = "rosc", "sosc", "sirc",
+ "firc", "upll";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
new file mode 100644
index 000000000000..4fec55832702
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8m-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8M Family Clock Control Module
+
+maintainers:
+ - Abel Vesa <abelvesa@kernel.org>
+ - Peng Fan <peng.fan@nxp.com>
+
+description: |
+ NXP i.MX8M Mini/Nano/Plus/Quad clock control module is an integrated clock
+ controller, which generates and supplies to all modules.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8mm-ccm
+ - fsl,imx8mn-ccm
+ - fsl,imx8mp-ccm
+ - fsl,imx8mq-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 2
+
+ clocks:
+ minItems: 6
+ maxItems: 7
+
+ clock-names:
+ minItems: 6
+ maxItems: 7
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8m-clock.h
+ for the full list of i.MX8M clock IDs.
+
+ fsl,operating-mode:
+ $ref: /schemas/types.yaml#/definitions/string
+ enum: [nominal, overdrive]
+ description:
+ The operating mode of the SoC. This affects the maximum clock rates that
+ can safely be configured by the clock controller.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx8mq-ccm
+ then:
+ properties:
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 25m osc
+ - description: 27m osc
+ - description: ext1 clock input
+ - description: ext2 clock input
+ - description: ext3 clock input
+ - description: ext4 clock input
+ clock-names:
+ items:
+ - const: ckil
+ - const: osc_25m
+ - const: osc_27m
+ - const: clk_ext1
+ - const: clk_ext2
+ - const: clk_ext3
+ - const: clk_ext4
+ else:
+ properties:
+ clocks:
+ items:
+ - description: 32k osc
+ - description: 24m osc
+ - description: ext1 clock input
+ - description: ext2 clock input
+ - description: ext3 clock input
+ - description: ext4 clock input
+
+ clock-names:
+ items:
+ - const: osc_32k
+ - const: osc_24m
+ - const: clk_ext1
+ - const: clk_ext2
+ - const: clk_ext3
+ - const: clk_ext4
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ clock-controller@30380000 {
+ compatible = "fsl,imx8mm-ccm";
+ reg = <0x30380000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>, <&clk_ext2>,
+ <&clk_ext3>, <&clk_ext4>;
+ clock-names = "osc_32k", "osc_24m", "clk_ext1", "clk_ext2",
+ "clk_ext3", "clk_ext4";
+ fsl,operating-mode = "nominal";
+ };
+
+ - |
+ clock-controller@30380000 {
+ compatible = "fsl,imx8mq-ccm";
+ reg = <0x30380000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&ckil>, <&osc_25m>, <&osc_27m>, <&clk_ext1>,
+ <&clk_ext2>, <&clk_ext3>, <&clk_ext4>;
+ clock-names = "ckil", "osc_25m", "osc_27m", "clk_ext1",
+ "clk_ext2", "clk_ext3", "clk_ext4";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml b/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml
new file mode 100644
index 000000000000..0272c9527037
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8mp-audiomix.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8MP AudioMIX Block Control
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ NXP i.MX8M Plus AudioMIX is dedicated clock muxing and gating IP
+ used to control Audio related clock on the SoC.
+
+properties:
+ compatible:
+ const: fsl,imx8mp-audio-blk-ctrl
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ minItems: 8
+ maxItems: 8
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: sai1
+ - const: sai2
+ - const: sai3
+ - const: sai5
+ - const: sai6
+ - const: sai7
+ - const: axi
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mp-clock.h
+ for the full list of i.MX8MP IMX8MP_CLK_AUDIOMIX_ clock IDs.
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/clock/imx8mp-clock.h>
+
+ clock-controller@30e20000 {
+ compatible = "fsl,imx8mp-audio-blk-ctrl";
+ reg = <0x30e20000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX8MP_CLK_AUDIO_ROOT>,
+ <&clk IMX8MP_CLK_SAI1>,
+ <&clk IMX8MP_CLK_SAI2>,
+ <&clk IMX8MP_CLK_SAI3>,
+ <&clk IMX8MP_CLK_SAI5>,
+ <&clk IMX8MP_CLK_SAI6>,
+ <&clk IMX8MP_CLK_SAI7>,
+ <&clk IMX8MP_CLK_AUDIO_AXI_ROOT>;
+ clock-names = "ahb",
+ "sai1", "sai2", "sai3",
+ "sai5", "sai6", "sai7", "axi";
+ power-domains = <&pgc_audio>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
new file mode 100644
index 000000000000..b207f95361b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8qxp-lpcg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8QXP LPCG (Low-Power Clock Gating) Clock
+
+maintainers:
+ - Aisheng Dong <aisheng.dong@nxp.com>
+
+description: |
+ The Low-Power Clock Gate (LPCG) modules contain a local programming
+ model to control the clock gates for the peripherals. An LPCG module
+ is used to locally gate the clocks for the associated peripheral.
+
+ This level of clock gating is provided after the clocks are generated
+ by the SCU resources and clock controls. Thus even if the clock is
+ enabled by these control bits, it might still not be running based
+ on the base resource.
+
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See the full list of clock IDs from:
+ include/dt-bindings/clock/imx8-lpcg.h
+
+properties:
+ compatible:
+ oneOf:
+ - const: fsl,imx8qxp-lpcg
+ - items:
+ - enum:
+ - fsl,imx8qm-lpcg
+ - const: fsl,imx8qxp-lpcg
+ - enum:
+ - fsl,imx8qxp-lpcg-adma
+ - fsl,imx8qxp-lpcg-conn
+ - fsl,imx8qxp-lpcg-dc
+ - fsl,imx8qxp-lpcg-dsp
+ - fsl,imx8qxp-lpcg-gpu
+ - fsl,imx8qxp-lpcg-hsio
+ - fsl,imx8qxp-lpcg-img
+ - fsl,imx8qxp-lpcg-lsio
+ - fsl,imx8qxp-lpcg-vpu
+ deprecated: true
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ description: |
+ Input parent clocks phandle array for each clock
+ minItems: 1
+ maxItems: 8
+
+ clock-indices:
+ description: |
+ An integer array indicating the bit offset for each clock.
+ Refer to <include/dt-bindings/clock/imx8-lpcg.h> for the
+ supported LPCG clock indices.
+ minItems: 1
+ maxItems: 8
+
+ clock-output-names:
+ description: |
+ Shall be the corresponding names of the outputs.
+ NOTE this property must be specified in the same order
+ as the clock-indices property.
+ minItems: 1
+ maxItems: 8
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ sdhc0_lpcg: clock-controller@5b200000 {
+ compatible = "fsl,imx8qxp-lpcg";
+ reg = <0x5b200000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&sdhc0_clk IMX_SC_PM_CLK_PER>,
+ <&conn_ipg_clk>,
+ <&conn_axi_clk>;
+ clock-indices = <IMX_LPCG_CLK_0>,
+ <IMX_LPCG_CLK_4>,
+ <IMX_LPCG_CLK_5>;
+ clock-output-names = "sdhc0_lpcg_per_clk",
+ "sdhc0_lpcg_ipg_clk",
+ "sdhc0_lpcg_ahb_clk";
+ power-domains = <&pd IMX_SC_R_SDHC_0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml
new file mode 100644
index 000000000000..68a60cdc19af
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8ulp-cgc-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8ULP Clock Generation & Control(CGC) Module
+
+maintainers:
+ - Jacky Bai <ping.bai@nxp.com>
+
+description: |
+ On i.MX8ULP, The clock sources generation, distribution and management is
+ under the control of several CGCs & PCCs modules. The CGC modules generate
+ and distribute clocks on the device.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8ulp-cgc1
+ - fsl,imx8ulp-cgc2
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Generation & Control Module node:
+ - |
+ clock-controller@292c0000 {
+ compatible = "fsl,imx8ulp-cgc1";
+ reg = <0x292c0000 0x10000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml
new file mode 100644
index 000000000000..d0b0792fe7ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8ulp-pcc-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8ULP Peripheral Clock Controller(PCC) Module
+
+maintainers:
+ - Jacky Bai <ping.bai@nxp.com>
+
+description: |
+ On i.MX8ULP, The clock sources generation, distribution and management is
+ under the control of several CGCs & PCCs modules. The PCC modules control
+ software reset, clock selection, optional division and clock gating mode
+ for peripherals.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8ulp-pcc3
+ - fsl,imx8ulp-pcc4
+ - fsl,imx8ulp-pcc5
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ # Peripheral Clock Control Module node:
+ - |
+ clock-controller@292d0000 {
+ compatible = "fsl,imx8ulp-pcc3";
+ reg = <0x292d0000 0x10000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx93-clock.yaml b/Documentation/devicetree/bindings/clock/imx93-clock.yaml
new file mode 100644
index 000000000000..98c0800732ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx93-clock.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx93-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX93 Clock Control Module
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+description: |
+ i.MX93 clock control module is an integrated clock controller, which
+ includes clock generator, clock gate and supplies to all modules.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx91-ccm
+ - fsl,imx93-ccm
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description:
+ specify the external clocks used by the CCM module.
+ items:
+ - description: 32k osc
+ - description: 24m osc
+ - description: ext1 clock input
+
+ clock-names:
+ description:
+ specify the external clocks names used by the CCM module.
+ items:
+ - const: osc_32k
+ - const: osc_24m
+ - const: clk_ext1
+
+ '#clock-cells':
+ const: 1
+ description:
+ See include/dt-bindings/clock/imx93-clock.h for the full list of
+ i.MX93 clock IDs.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ clock-controller@44450000 {
+ compatible = "fsl,imx93-ccm";
+ reg = <0x44450000 0x10000>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml
new file mode 100644
index 000000000000..777af4aad4b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imxrt1050-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MXRT Clock Controller
+
+maintainers:
+ - Giulio Benetti <giulio.benetti@benettiengineering.com>
+ - Jesse Taube <Mr.Bossman075@gmail.com>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imxrt*-clock.h
+ for the full list of i.MXRT clock IDs.
+
+properties:
+ compatible:
+ const: fsl,imxrt1050-ccm
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 2
+
+ clocks:
+ description: 24m osc
+ maxItems: 1
+
+ clock-names:
+ const: osc
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imxrt1050-clock.h>
+
+ clks: clock-controller@400fc000 {
+ compatible = "fsl,imxrt1050-ccm";
+ reg = <0x400fc000 0x4000>;
+ interrupts = <95>, <96>;
+ clocks = <&osc>;
+ clock-names = "osc";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.txt b/Documentation/devicetree/bindings/clock/ingenic,cgu.txt
deleted file mode 100644
index ba5a442026b7..000000000000
--- a/Documentation/devicetree/bindings/clock/ingenic,cgu.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Ingenic SoC CGU binding
-
-The CGU in an Ingenic SoC provides all the clocks generated on-chip. It
-typically includes a variety of PLLs, multiplexers, dividers & gates in order
-to provide many different clock signals derived from only 2 external source
-clocks.
-
-Required properties:
-- compatible : Should be one of:
- * ingenic,jz4740-cgu
- * ingenic,jz4725b-cgu
- * ingenic,jz4770-cgu
- * ingenic,jz4780-cgu
-- reg : The address & length of the CGU registers.
-- clocks : List of phandle & clock specifiers for clocks external to the CGU.
- Two such external clocks should be specified - first the external crystal
- "ext" and second the RTC clock source "rtc".
-- clock-names : List of name strings for the external clocks.
-- #clock-cells: Should be 1.
- Clock consumers specify this argument to identify a clock. The valid values
- may be found in <dt-bindings/clock/<soctype>-cgu.h>.
-
-Example SoC include file:
-
-/ {
- cgu: jz4740-cgu {
- compatible = "ingenic,jz4740-cgu";
- reg = <0x10000000 0x100>;
- #clock-cells = <1>;
- };
-
- uart0: serial@10030000 {
- clocks = <&cgu JZ4740_CLK_UART0>;
- };
-};
-
-Example board file:
-
-/ {
- ext: clock@0 {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <12000000>;
- };
-
- rtc: clock@1 {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
-
- &cgu {
- clocks = <&ext> <&rtc>;
- clock-names: "ext", "rtc";
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
new file mode 100644
index 000000000000..509df06b9c9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ingenic,cgu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ingenic SoCs CGU
+
+description: |
+ The CGU in an Ingenic SoC provides all the clocks generated on-chip. It
+ typically includes a variety of PLLs, multiplexers, dividers & gates in order
+ to provide many different clock signals derived from only 2 external source
+ clocks.
+
+maintainers:
+ - Paul Cercueil <paul@crapouillou.net>
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ingenic,jz4740-cgu
+ - ingenic,jz4725b-cgu
+ - ingenic,jz4755-cgu
+ - ingenic,jz4760-cgu
+ - ingenic,jz4760b-cgu
+ - ingenic,jz4770-cgu
+ - ingenic,jz4780-cgu
+ - ingenic,x1000-cgu
+ - ingenic,x1830-cgu
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^clock-controller@[0-9a-f]+$"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ "#clock-cells":
+ const: 1
+
+ ranges: true
+
+ compatible:
+ items:
+ - enum:
+ - ingenic,jz4740-cgu
+ - ingenic,jz4725b-cgu
+ - ingenic,jz4755-cgu
+ - ingenic,jz4760-cgu
+ - ingenic,jz4760b-cgu
+ - ingenic,jz4770-cgu
+ - ingenic,jz4780-cgu
+ - ingenic,x1000-cgu
+ - ingenic,x1830-cgu
+ - const: simple-mfd
+ minItems: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: External oscillator clock
+ - description: Internal 32 kHz RTC clock
+
+ clock-names:
+ items:
+ - const: ext
+ - enum:
+ - rtc
+ - osc32k # Different name, same clock
+
+ assigned-clocks:
+ minItems: 1
+ maxItems: 64
+
+ assigned-clock-parents:
+ minItems: 1
+ maxItems: 64
+
+ assigned-clock-rates:
+ minItems: 1
+ maxItems: 64
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+patternProperties:
+ "^usb-phy@[a-f0-9]+$":
+ $ref: /schemas/phy/ingenic,phy-usb.yaml#
+ "^mac-phy-ctrl@[a-f0-9]+$":
+ $ref: /schemas/net/ingenic,mac.yaml#
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
+ cgu: clock-controller@10000000 {
+ compatible = "ingenic,jz4770-cgu", "simple-mfd";
+ reg = <0x10000000 0x100>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x10000000 0x100>;
+
+ clocks = <&ext>, <&osc32k>;
+ clock-names = "ext", "osc32k";
+
+ #clock-cells = <1>;
+
+ otg_phy: usb-phy@3c {
+ compatible = "ingenic,jz4770-phy";
+ reg = <0x3c 0x10>;
+
+ clocks = <&cgu JZ4770_CLK_OTG_PHY>;
+
+ vcc-supply = <&ldo5>;
+
+ #phy-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/intc_stratix10.txt b/Documentation/devicetree/bindings/clock/intc_stratix10.txt
deleted file mode 100644
index 9f4ec5cb5c6b..000000000000
--- a/Documentation/devicetree/bindings/clock/intc_stratix10.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Device Tree Clock bindings for Intel's SoCFPGA Stratix10 platform
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be
- "intel,stratix10-clkmgr"
-
-- reg : shall be the control register offset from CLOCK_MANAGER's base for the clock.
-
-- #clock-cells : from common clock binding, shall be set to 1.
-
-Example:
- clkmgr: clock-controller@ffd10000 {
- compatible = "intel,stratix10-clkmgr";
- reg = <0xffd10000 0x1000>;
- #clock-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/intel,agilex.yaml b/Documentation/devicetree/bindings/clock/intel,agilex.yaml
new file mode 100644
index 000000000000..3745ba8dbd76
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intel,agilex.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/intel,agilex.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel SoCFPGA Agilex platform clock controller
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+description:
+ The Intel Agilex Clock controller is an integrated clock controller, which
+ generates and supplies to all modules.
+
+properties:
+ compatible:
+ const: intel,agilex-clkmgr
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock controller node:
+ - |
+ clkmgr: clock-controller@ffd10000 {
+ compatible = "intel,agilex-clkmgr";
+ reg = <0xffd10000 0x1000>;
+ clocks = <&osc1>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/intel,agilex5-clkmgr.yaml b/Documentation/devicetree/bindings/clock/intel,agilex5-clkmgr.yaml
new file mode 100644
index 000000000000..d120b0da7f3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intel,agilex5-clkmgr.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/intel,agilex5-clkmgr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel SoCFPGA Agilex5 clock manager
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+description:
+ The Intel Agilex5 Clock Manager is an integrated clock controller, which
+ generates and supplies clock to all the modules.
+
+properties:
+ compatible:
+ const: intel,agilex5-clkmgr
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clkmgr: clock-controller@10d10000 {
+ compatible = "intel,agilex5-clkmgr";
+ reg = <0x10d10000 0x1000>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml
new file mode 100644
index 000000000000..76609a390429
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/intel,cgu-lgm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain SoC's Clock Controller(CGU)
+
+maintainers:
+ - Rahul Tanwar <rahul.tanwar@linux.intel.com>
+
+description: |
+ Lightning Mountain(LGM) SoC's Clock Generation Unit(CGU) driver provides
+ all means to access the CGU hardware module in order to generate a series
+ of clocks for the whole system and individual peripherals.
+
+ Please refer to include/dt-bindings/clock/intel,lgm-clk.h header file, it
+ defines all available clocks as macros. These macros can be used in device
+ tree sources.
+
+properties:
+ compatible:
+ const: intel,cgu-lgm
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ cgu: clock-controller@e0200000 {
+ compatible = "intel,cgu-lgm";
+ reg = <0xe0200000 0x33c>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml
new file mode 100644
index 000000000000..e000116a51a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/intel,easic-n5x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel SoCFPGA eASIC N5X platform clock controller
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+description:
+ The Intel eASIC N5X Clock controller is an integrated clock controller, which
+ generates and supplies to all modules.
+
+properties:
+ compatible:
+ const: intel,easic-n5x-clkmgr
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock controller node:
+ - |
+ clkmgr: clock-controller@ffd10000 {
+ compatible = "intel,easic-n5x-clkmgr";
+ reg = <0xffd10000 0x1000>;
+ clocks = <&osc1>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/intel,stratix10.yaml b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml
new file mode 100644
index 000000000000..b4a8be213400
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/intel,stratix10.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel SoCFPGA Stratix10 platform clock controller
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+properties:
+ compatible:
+ const: intel,stratix10-clkmgr
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@ffd10000 {
+ compatible = "intel,stratix10-clkmgr";
+ reg = <0xffd10000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt
index c5aa187026e3..43f6fb6c9392 100644
--- a/Documentation/devicetree/bindings/clock/keystone-gate.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for Keystone gate control driver which uses PSC controller IP.
This binding uses the common clock binding[1].
diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt
index 47570d207215..69b0eb7c03c9 100644
--- a/Documentation/devicetree/bindings/clock/keystone-pll.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for keystone PLLs. The main PLL IP typically has a multiplier,
a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL
and PAPLL are controlled by the memory mapped register where as the Main
@@ -14,7 +12,7 @@ Required properties:
- #clock-cells : from common clock binding; shall be set to 0.
- compatible : shall be "ti,keystone,main-pll-clock" or "ti,keystone,pll-clock"
- clocks : parent clock phandle
-- reg - pll control0 and pll multipler registers
+- reg - pll control0 and pll multiplier registers
- reg-names : control, multiplier and post-divider. The multiplier and
post-divider registers are applicable only for main pll clock
- fixed-postdiv : fixed post divider value. If absent, use clkod register bits
diff --git a/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml b/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml
new file mode 100644
index 000000000000..01561a0f35d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/loongson,ls1x-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1 Clock Controller
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - loongson,ls1b-clk
+ - loongson,ls1c-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clkc: clock-controller@1fe78030 {
+ compatible = "loongson,ls1b-clk";
+ reg = <0x1fe78030 0x8>;
+
+ clocks = <&xtal>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml b/Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml
new file mode 100644
index 000000000000..c07ad1f85857
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/loongson,ls2k-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-2 SoC Clock Control Module
+
+maintainers:
+ - Yinbo Zhu <zhuyinbo@loongson.cn>
+
+description: |
+ Loongson-2 SoC clock control module is an integrated clock controller, which
+ generates and supplies to all modules.
+
+properties:
+ compatible:
+ enum:
+ - loongson,ls2k0300-clk
+ - loongson,ls2k0500-clk
+ - loongson,ls2k-clk # This is for Loongson-2K1000
+ - loongson,ls2k2000-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ref_100m
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/loongson,ls2k-clk.h
+ for the full list of Loongson-2 SoC clock IDs.
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: loongson,ls2k0300-clk
+ then:
+ properties:
+ clock-names: false
+ else:
+ required:
+ - clock-names
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ ref_100m: clock-ref-100m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <100000000>;
+ clock-output-names = "ref_100m";
+ };
+
+ clk: clock-controller@1fe00480 {
+ compatible = "loongson,ls2k-clk";
+ reg = <0x1fe00480 0x58>;
+ #clock-cells = <1>;
+ clocks = <&ref_100m>;
+ clock-names = "ref_100m";
+ };
diff --git a/Documentation/devicetree/bindings/clock/lpc1850-ccu.txt b/Documentation/devicetree/bindings/clock/lpc1850-ccu.txt
deleted file mode 100644
index fa97c12014ac..000000000000
--- a/Documentation/devicetree/bindings/clock/lpc1850-ccu.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-* NXP LPC1850 Clock Control Unit (CCU)
-
-Each CGU base clock has several clock branches which can be turned on
-or off independently by the Clock Control Units CCU1 or CCU2. The
-branch clocks are distributed between CCU1 and CCU2.
-
- - Above text taken from NXP LPC1850 User Manual.
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible:
- Should be "nxp,lpc1850-ccu"
-- reg:
- Shall define the base and range of the address space
- containing clock control registers
-- #clock-cells:
- Shall have value <1>. The permitted clock-specifier values
- are the branch clock names defined in table below.
-- clocks:
- Shall contain a list of phandles for the base clocks routed
- from the CGU to the specific CCU. See mapping of base clocks
- and CCU in table below.
-- clock-names:
- Shall contain a list of names for the base clock routed
- from the CGU to the specific CCU. Valid CCU clock names:
- "base_usb0_clk", "base_periph_clk", "base_usb1_clk",
- "base_cpu_clk", "base_spifi_clk", "base_spi_clk",
- "base_apb1_clk", "base_apb3_clk", "base_adchs_clk",
- "base_sdio_clk", "base_ssp0_clk", "base_ssp1_clk",
- "base_uart0_clk", "base_uart1_clk", "base_uart2_clk",
- "base_uart3_clk", "base_audio_clk"
-
-Which branch clocks that are available on the CCU depends on the
-specific LPC part. Check the user manual for your specific part.
-
-A list of CCU clocks can be found in dt-bindings/clock/lpc18xx-ccu.h.
-
-Example board file:
-
-soc {
- ccu1: clock-controller@40051000 {
- compatible = "nxp,lpc1850-ccu";
- reg = <0x40051000 0x1000>;
- #clock-cells = <1>;
- clocks = <&cgu BASE_APB3_CLK>, <&cgu BASE_APB1_CLK>,
- <&cgu BASE_SPIFI_CLK>, <&cgu BASE_CPU_CLK>,
- <&cgu BASE_PERIPH_CLK>, <&cgu BASE_USB0_CLK>,
- <&cgu BASE_USB1_CLK>, <&cgu BASE_SPI_CLK>;
- clock-names = "base_apb3_clk", "base_apb1_clk",
- "base_spifi_clk", "base_cpu_clk",
- "base_periph_clk", "base_usb0_clk",
- "base_usb1_clk", "base_spi_clk";
- };
-
- ccu2: clock-controller@40052000 {
- compatible = "nxp,lpc1850-ccu";
- reg = <0x40052000 0x1000>;
- #clock-cells = <1>;
- clocks = <&cgu BASE_AUDIO_CLK>, <&cgu BASE_UART3_CLK>,
- <&cgu BASE_UART2_CLK>, <&cgu BASE_UART1_CLK>,
- <&cgu BASE_UART0_CLK>, <&cgu BASE_SSP1_CLK>,
- <&cgu BASE_SSP0_CLK>, <&cgu BASE_SDIO_CLK>;
- clock-names = "base_audio_clk", "base_uart3_clk",
- "base_uart2_clk", "base_uart1_clk",
- "base_uart0_clk", "base_ssp1_clk",
- "base_ssp0_clk", "base_sdio_clk";
- };
-
- /* A user of CCU brach clocks */
- uart1: serial@40082000 {
- ...
- clocks = <&ccu2 CLK_APB0_UART1>, <&ccu1 CLK_CPU_UART1>;
- ...
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/lpc1850-cgu.txt b/Documentation/devicetree/bindings/clock/lpc1850-cgu.txt
deleted file mode 100644
index 2cc32a9a945a..000000000000
--- a/Documentation/devicetree/bindings/clock/lpc1850-cgu.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-* NXP LPC1850 Clock Generation Unit (CGU)
-
-The CGU generates multiple independent clocks for the core and the
-peripheral blocks of the LPC18xx. Each independent clock is called
-a base clock and itself is one of the inputs to the two Clock
-Control Units (CCUs) which control the branch clocks to the
-individual peripherals.
-
-The CGU selects the inputs to the clock generators from multiple
-clock sources, controls the clock generation, and routes the outputs
-of the clock generators through the clock source bus to the output
-stages. Each output stage provides an independent clock source and
-corresponds to one of the base clocks for the LPC18xx.
-
- - Above text taken from NXP LPC1850 User Manual.
-
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible:
- Should be "nxp,lpc1850-cgu"
-- reg:
- Shall define the base and range of the address space
- containing clock control registers
-- #clock-cells:
- Shall have value <1>. The permitted clock-specifier values
- are the base clock numbers defined below.
-- clocks:
- Shall contain a list of phandles for the external input
- sources to the CGU. The list shall be in the following
- order: xtal, 32khz, enet_rx_clk, enet_tx_clk, gp_clkin.
-- clock-indices:
- Shall be an ordered list of numbers defining the base clock
- number provided by the CGU.
-- clock-output-names:
- Shall be an ordered list of strings defining the names of
- the clocks provided by the CGU.
-
-Which base clocks that are available on the CGU depends on the
-specific LPC part. Base clocks are numbered from 0 to 27.
-
-Number: Name: Description:
- 0 BASE_SAFE_CLK Base safe clock (always on) for WWDT
- 1 BASE_USB0_CLK Base clock for USB0
- 2 BASE_PERIPH_CLK Base clock for Cortex-M0SUB subsystem,
- SPI, and SGPIO
- 3 BASE_USB1_CLK Base clock for USB1
- 4 BASE_CPU_CLK System base clock for ARM Cortex-M core
- and APB peripheral blocks #0 and #2
- 5 BASE_SPIFI_CLK Base clock for SPIFI
- 6 BASE_SPI_CLK Base clock for SPI
- 7 BASE_PHY_RX_CLK Base clock for Ethernet PHY Receive clock
- 8 BASE_PHY_TX_CLK Base clock for Ethernet PHY Transmit clock
- 9 BASE_APB1_CLK Base clock for APB peripheral block # 1
-10 BASE_APB3_CLK Base clock for APB peripheral block # 3
-11 BASE_LCD_CLK Base clock for LCD
-12 BASE_ADCHS_CLK Base clock for ADCHS
-13 BASE_SDIO_CLK Base clock for SD/MMC
-14 BASE_SSP0_CLK Base clock for SSP0
-15 BASE_SSP1_CLK Base clock for SSP1
-16 BASE_UART0_CLK Base clock for UART0
-17 BASE_UART1_CLK Base clock for UART1
-18 BASE_UART2_CLK Base clock for UART2
-19 BASE_UART3_CLK Base clock for UART3
-20 BASE_OUT_CLK Base clock for CLKOUT pin
-24-21 - Reserved
-25 BASE_AUDIO_CLK Base clock for audio system (I2S)
-26 BASE_CGU_OUT0_CLK Base clock for CGU_OUT0 clock output
-27 BASE_CGU_OUT1_CLK Base clock for CGU_OUT1 clock output
-
-BASE_PERIPH_CLK and BASE_SPI_CLK is only available on LPC43xx.
-BASE_ADCHS_CLK is only available on LPC4370.
-
-
-Example board file:
-
-/ {
- clocks {
- xtal: xtal {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <12000000>;
- };
-
- xtal32: xtal32 {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
-
- enet_rx_clk: enet_rx_clk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <0>;
- clock-output-names = "enet_rx_clk";
- };
-
- enet_tx_clk: enet_tx_clk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <0>;
- clock-output-names = "enet_tx_clk";
- };
-
- gp_clkin: gp_clkin {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <0>;
- clock-output-names = "gp_clkin";
- };
- };
-
- soc {
- cgu: clock-controller@40050000 {
- compatible = "nxp,lpc1850-cgu";
- reg = <0x40050000 0x1000>;
- #clock-cells = <1>;
- clocks = <&xtal>, <&creg_clk 1>, <&enet_rx_clk>, <&enet_tx_clk>, <&gp_clkin>;
- };
-
- /* A CGU and CCU clock consumer */
- lcdc: lcdc@40008000 {
- ...
- clocks = <&cgu BASE_LCD_CLK>, <&ccu1 CLK_CPU_LCD>;
- clock-names = "clcdclk", "apb_pclk";
- ...
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt b/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt
deleted file mode 100644
index 6f1c7b4e4d2c..000000000000
--- a/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-* NXP LPC1850 CREG clocks
-
-The NXP LPC18xx/43xx CREG (Configuration Registers) block contains
-control registers for two low speed clocks. One of the clocks is a
-32 kHz oscillator driver with power up/down and clock gating. Next
-is a fixed divider that creates a 1 kHz clock from the 32 kHz osc.
-
-These clocks are used by the RTC and the Event Router peripherials.
-The 32 kHz can also be routed to other peripherials to enable low
-power modes.
-
-This binding uses the common clock binding:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible:
- Should be "nxp,lpc1850-creg-clk"
-- #clock-cells:
- Shall have value <1>.
-- clocks:
- Shall contain a phandle to the fixed 32 kHz crystal.
-
-The creg-clk node must be a child of the creg syscon node.
-
-The following clocks are available from the clock node.
-
-Clock ID Name
- 0 1 kHz clock
- 1 32 kHz Oscillator
-
-Example:
-soc {
- creg: syscon@40043000 {
- compatible = "nxp,lpc1850-creg", "syscon", "simple-mfd";
- reg = <0x40043000 0x1000>;
-
- creg_clk: clock-controller {
- compatible = "nxp,lpc1850-creg-clk";
- clocks = <&xtal32>;
- #clock-cells = <1>;
- };
-
- ...
- };
-
- rtc: rtc@40046000 {
- ...
- clocks = <&creg_clk 0>, <&ccu1 CLK_CPU_BUS>;
- clock-names = "rtc", "reg";
- ...
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt b/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt
deleted file mode 100644
index 3ce97cfe999b..000000000000
--- a/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-AXM5516 clock driver bindings
------------------------------
-
-Required properties :
-- compatible : shall contain "lsi,axm5516-clks"
-- reg : shall contain base register location and length
-- #clock-cells : shall contain 1
-
-The consumer specifies the desired clock by having the clock ID in its "clocks"
-phandle cell. See <dt-bindings/clock/lsi,axxia-clock.h> for the list of
-supported clock IDs.
-
-Example:
-
- clks: clock-controller@2010020000 {
- compatible = "lsi,axm5516-clks";
- #clock-cells = <1>;
- reg = <0x20 0x10020000 0 0x20000>;
- };
-
- serial0: uart@2010080000 {
- compatible = "arm,pl011", "arm,primecell";
- reg = <0x20 0x10080000 0 0x1000>;
- interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks AXXIA_CLK_PER>;
- clock-names = "apb_pclk";
- };
- };
-
diff --git a/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.yaml b/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.yaml
new file mode 100644
index 000000000000..7a792dbeffb3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2025 LSI
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/lsi,axm5516-clks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LSI AXM5516 Clock Controller
+
+maintainers:
+ - Anders Berg <anders.berg@lsi.com>
+
+description:
+ See <dt-bindings/clock/lsi,axxia-clock.h> for the list of supported clock IDs.
+
+properties:
+ compatible:
+ const: lsi,axm5516-clks
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ clock-controller@2010020000 {
+ compatible = "lsi,axm5516-clks";
+ #clock-cells = <1>;
+ reg = <0x20 0x10020000 0x20000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/lsi,nspire-cx-clock.yaml b/Documentation/devicetree/bindings/clock/lsi,nspire-cx-clock.yaml
new file mode 100644
index 000000000000..52c217d210d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/lsi,nspire-cx-clock.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/lsi,nspire-cx-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI-NSPIRE Clocks
+
+maintainers:
+ - Daniel Tang <dt.tangr@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - lsi,nspire-cx-ahb-divider
+ - lsi,nspire-classic-ahb-divider
+ - lsi,nspire-cx-clock
+ - lsi,nspire-classic-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
diff --git a/Documentation/devicetree/bindings/clock/marvell,armada-370-corediv-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,armada-370-corediv-clock.yaml
new file mode 100644
index 000000000000..9d766558cdb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,armada-370-corediv-clock.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/marvell,armada-370-corediv-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MVEBU Core Divider Clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - marvell,armada-370-corediv-clock
+ - marvell,armada-375-corediv-clock
+ - marvell,armada-380-corediv-clock
+ - marvell,mv98dx3236-corediv-clock
+ - items:
+ - const: marvell,armada-390-corediv-clock
+ - const: marvell,armada-380-corediv-clock
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@18740 {
+ compatible = "marvell,armada-370-corediv-clock";
+ reg = <0x18740 0xc>;
+ #clock-cells = <1>;
+ clocks = <&pll>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,armada-3700-periph-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,armada-3700-periph-clock.yaml
new file mode 100644
index 000000000000..87e8e4ca111a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,armada-3700-periph-clock.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,armada-3700-periph-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 37xx SoCs Peripheral Clocks
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+description: >
+ Marvell Armada 37xx SoCs provide peripheral clocks which are used as clock
+ source for the peripheral of the SoC.
+
+ There are two different blocks associated to north bridge and south bridge.
+
+ The following is a list of provided IDs for Armada 3700 North bridge clocks:
+
+ ID Clock name Description
+ -----------------------------------
+ 0 mmc MMC controller
+ 1 sata_host Sata Host
+ 2 sec_at Security AT
+ 3 sac_dap Security DAP
+ 4 tsecm Security Engine
+ 5 setm_tmx Serial Embedded Trace Module
+ 6 avs Adaptive Voltage Scaling
+ 7 sqf SPI
+ 8 pwm PWM
+ 9 i2c_2 I2C 2
+ 10 i2c_1 I2C 1
+ 11 ddr_phy DDR PHY
+ 12 ddr_fclk DDR F clock
+ 13 trace Trace
+ 14 counter Counter
+ 15 eip97 EIP 97
+ 16 cpu CPU
+
+ The following is a list of provided IDs for Armada 3700 South bridge clocks:
+
+ ID Clock name Description
+ -----------------------------------
+ 0 gbe-50 50 MHz parent clock for Gigabit Ethernet
+ 1 gbe-core parent clock for Gigabit Ethernet core
+ 2 gbe-125 125 MHz parent clock for Gigabit Ethernet
+ 3 gbe1-50 50 MHz clock for Gigabit Ethernet port 1
+ 4 gbe0-50 50 MHz clock for Gigabit Ethernet port 0
+ 5 gbe1-125 125 MHz clock for Gigabit Ethernet port 1
+ 6 gbe0-125 125 MHz clock for Gigabit Ethernet port 0
+ 7 gbe1-core Gigabit Ethernet core port 1
+ 8 gbe0-core Gigabit Ethernet core port 0
+ 9 gbe-bm Gigabit Ethernet Buffer Manager
+ 10 sdio SDIO
+ 11 usb32-sub2-sys USB 2 clock
+ 12 usb32-ss-sys USB 3 clock
+ 13 pcie PCIe controller
+
+properties:
+ compatible:
+ oneOf:
+ - const: marvell,armada-3700-periph-clock-sb
+ - items:
+ - const: marvell,armada-3700-periph-clock-nb
+ - const: syscon
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: TBG-A P clock and specifier
+ - description: TBG-B P clock and specifier
+ - description: TBG-A S clock and specifier
+ - description: TBG-B S clock and specifier
+ - description: Xtal clock and specifier
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@13000{
+ compatible = "marvell,armada-3700-periph-clock-sb";
+ reg = <0x13000 0x1000>;
+ clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, <&tbg 3>, <&xtalclk>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,armada-3700-tbg-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,armada-3700-tbg-clock.yaml
new file mode 100644
index 000000000000..7fd1d758f794
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,armada-3700-tbg-clock.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,armada-3700-tbg-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Armada 3700 Time Base Generator Clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+description: >
+ Marvell Armada 37xx SoCs provide Time Base Generator clocks which are used as
+ parent clocks for the peripheral clocks.
+
+ The TBG clock consumer should specify the desired clock by having the clock ID
+ in its "clocks" phandle cell.
+
+ The following is a list of provided IDs and clock names on Armada 3700:
+
+ 0 = TBG A P
+ 1 = TBG B P
+ 2 = TBG A S
+ 3 = TBG B S
+
+properties:
+ compatible:
+ const: marvell,armada-3700-tbg-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@13200 {
+ compatible = "marvell,armada-3700-tbg-clock";
+ reg = <0x13200 0x1000>;
+ clocks = <&xtalclk>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,armada-3700-uart-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,armada-3700-uart-clock.yaml
new file mode 100644
index 000000000000..175f5c8f2bc5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,armada-3700-uart-clock.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,armada-3700-uart-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+title: Marvell Armada 3720 UART clocks
+
+maintainers:
+ - Pali Rohár <pali@kernel.org>
+
+properties:
+ compatible:
+ const: marvell,armada-3700-uart-clock
+
+ reg:
+ items:
+ - description: UART Clock Control Register
+ - description: UART 2 Baud Rate Divisor Register
+
+ clocks:
+ description: |
+ List of parent clocks suitable for UART from following set:
+ "TBG-A-P", "TBG-B-P", "TBG-A-S", "TBG-B-S", "xtal"
+ UART clock can use one from this set and when more are provided
+ then kernel would choose and configure the most suitable one.
+ It is suggest to specify at least one TBG clock to achieve
+ baudrates above 230400 and also to specify clock which bootloader
+ used for UART (most probably xtal) for smooth boot log on UART.
+
+ clock-names:
+ items:
+ - const: TBG-A-P
+ - const: TBG-B-P
+ - const: TBG-A-S
+ - const: TBG-B-S
+ - const: xtal
+ minItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ uartclk: clock-controller@12010 {
+ compatible = "marvell,armada-3700-uart-clock";
+ reg = <0x12010 0x4>, <0x12210 0x4>;
+ clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, <&tbg 3>, <&xtalclk>;
+ clock-names = "TBG-A-P", "TBG-B-P", "TBG-A-S", "TBG-B-S", "xtal";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,armada-xp-cpu-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,armada-xp-cpu-clock.yaml
new file mode 100644
index 000000000000..f2ac6741da9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,armada-xp-cpu-clock.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+---
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+$id: http://devicetree.org/schemas/clock/marvell,armada-xp-cpu-clock.yaml#
+
+title: Marvell EBU CPU Clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+properties:
+ compatible:
+ enum:
+ - marvell,armada-xp-cpu-clock
+ - marvell,mv98dx3236-cpu-clock
+
+ reg:
+ items:
+ - description: Clock complex registers
+ - description: PMU DFS registers
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@d0018700 {
+ #clock-cells = <1>;
+ compatible = "marvell,armada-xp-cpu-clock";
+ reg = <0xd0018700 0xa0>, <0x1c054 0x10>;
+ clocks = <&coreclk 1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,berlin.txt b/Documentation/devicetree/bindings/clock/marvell,berlin.txt
deleted file mode 100644
index c611c495f3ff..000000000000
--- a/Documentation/devicetree/bindings/clock/marvell,berlin.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Device Tree Clock bindings for Marvell Berlin
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Clock related registers are spread among the chip control registers. Berlin
-clock node should be a sub-node of the chip controller node. Marvell Berlin2
-(BG2, BG2CD, BG2Q) SoCs share the same IP for PLLs and clocks, with some
-minor differences in features and register layout.
-
-Required properties:
-- compatible: must be "marvell,berlin2-clk" or "marvell,berlin2q-clk"
-- #clock-cells: must be 1
-- clocks: must be the input parent clock phandle
-- clock-names: name of the input parent clock
- Allowed clock-names for the reference clocks are
- "refclk" for the SoCs oscillator input on all SoCs,
- and SoC-specific input clocks for
- BG2/BG2CD: "video_ext0" for the external video clock input
-
-
-Example:
-
-chip_clk: clock {
- compatible = "marvell,berlin2q-clk";
-
- #clock-cells = <1>;
- clocks = <&refclk>;
- clock-names = "refclk";
-};
diff --git a/Documentation/devicetree/bindings/clock/marvell,berlin2-clk.yaml b/Documentation/devicetree/bindings/clock/marvell,berlin2-clk.yaml
new file mode 100644
index 000000000000..8d48a2c7e381
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,berlin2-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,berlin2-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Berlin Clock Controller
+
+maintainers:
+ - Jisheng Zhang <jszhang@kernel.org>
+
+description:
+ Clock related registers are spread among the chip control registers. Berlin
+ clock node should be a sub-node of the chip controller node. Marvell Berlin2
+ (BG2, BG2CD, BG2Q) SoCs share the same IP for PLLs and clocks, with some minor
+ differences in features and register layout.
+
+properties:
+ compatible:
+ enum:
+ - marvell,berlin2-clk
+ - marvell,berlin2q-clk
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - enum:
+ - refclk
+ - video_ext0
+
+required:
+ - compatible
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller {
+ compatible = "marvell,berlin2q-clk";
+ #clock-cells = <1>;
+ clocks = <&refclk>;
+ clock-names = "refclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,dove-divider-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,dove-divider-clock.yaml
new file mode 100644
index 000000000000..7a8e0e281b63
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,dove-divider-clock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/marvell,dove-divider-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Dove PLL Divider Clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+description: >
+ Marvell Dove has a 2GHz PLL, which feeds into a set of dividers to provide
+ high speed clocks for a number of peripherals. These dividers are part of the
+ PMU, and thus this node should be a child of the PMU node.
+
+ The following clocks are provided:
+
+ ID Clock
+ -------------
+ 0 AXI bus clock
+ 1 GPU clock
+ 2 VMeta clock
+ 3 LCD clock
+
+properties:
+ compatible:
+ const: marvell,dove-divider-clock
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@64 {
+ compatible = "marvell,dove-divider-clock";
+ reg = <0x0064 0x8>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2-audio-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,mmp2-audio-clock.yaml
new file mode 100644
index 000000000000..dffa73402da9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,mmp2-audio-clock.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,mmp2-audio-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MMP2 Audio Clock Controller
+
+maintainers:
+ - Lubomir Rintel <lkundrak@v3.sk>
+
+description: |
+ The audio clock controller generates and supplies the clocks to the audio
+ codec.
+
+ Each clock is assigned an identifier and client nodes use this identifier
+ to specify the clock which they consume.
+
+ All these identifiers could be found in
+ <dt-bindings/clock/marvell,mmp2-audio.h>.
+
+properties:
+ compatible:
+ enum:
+ - marvell,mmp2-audio-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Audio subsystem clock
+ - description: The crystal oscillator clock
+ - description: First I2S clock
+ - description: Second I2S clock
+
+ clock-names:
+ items:
+ - const: audio
+ - const: vctcxo
+ - const: i2s0
+ - const: i2s1
+
+ '#clock-cells':
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/marvell,mmp2-audio.h>
+ #include <dt-bindings/clock/marvell,mmp2.h>
+ #include <dt-bindings/power/marvell,mmp2.h>
+
+ clock-controller@d42a0c30 {
+ compatible = "marvell,mmp2-audio-clock";
+ reg = <0xd42a0c30 0x10>;
+ clock-names = "audio", "vctcxo", "i2s0", "i2s1";
+ clocks = <&soc_clocks MMP2_CLK_AUDIO>,
+ <&soc_clocks MMP2_CLK_VCTCXO>,
+ <&soc_clocks MMP2_CLK_I2S0>,
+ <&soc_clocks MMP2_CLK_I2S1>;
+ power-domains = <&soc_clocks MMP2_POWER_DOMAIN_AUDIO>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,mmp2-clock.yaml
new file mode 100644
index 000000000000..d68f0d196e7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,mmp2-clock.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,mmp2-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MMP2 and MMP3 Clock Controller
+
+maintainers:
+ - Lubomir Rintel <lkundrak@v3.sk>
+
+description: |
+ The clock subsystem on MMP2 or MMP3 generates and supplies clock to various
+ controllers within the SoC.
+
+ Each clock is assigned an identifier and client nodes use this identifier
+ to specify the clock which they consume.
+
+ All these identifiers could be found in <dt-bindings/clock/marvell,mmp2.h>.
+
+properties:
+ compatible:
+ enum:
+ - marvell,mmp2-clock # controller compatible with MMP2 SoC
+ - marvell,mmp3-clock # controller compatible with MMP3 SoC
+
+ reg:
+ items:
+ - description: MPMU register region
+ - description: APMU register region
+ - description: APBC register region
+
+ reg-names:
+ items:
+ - const: mpmu
+ - const: apmu
+ - const: apbc
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - '#clock-cells'
+ - '#reset-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@d4050000 {
+ compatible = "marvell,mmp2-clock";
+ reg = <0xd4050000 0x1000>,
+ <0xd4282800 0x400>,
+ <0xd4015000 0x1000>;
+ reg-names = "mpmu", "apmu", "apbc";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
deleted file mode 100644
index af376a01f2b7..000000000000
--- a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-* Marvell MMP2 Clock Controller
-
-The MMP2 clock subsystem generates and supplies clock to various
-controllers within the MMP2 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "marvell,mmp2-clock" - controller compatible with MMP2 SoC.
-
-- reg: physical base address of the clock subsystem and length of memory mapped
- region. There are 3 places in SOC has clock control logic:
- "mpmu", "apmu", "apbc". So three reg spaces need to be defined.
-
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes use this identifier
-to specify the clock which they consume.
-
-All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.
diff --git a/Documentation/devicetree/bindings/clock/marvell,mvebu-core-clock.yaml b/Documentation/devicetree/bindings/clock/marvell,mvebu-core-clock.yaml
new file mode 100644
index 000000000000..215bcd9080c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,mvebu-core-clock.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,mvebu-core-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MVEBU SoC core clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+description: >
+ Marvell MVEBU SoCs usually allow to determine core clock frequencies by
+ reading the Sample-At-Reset (SAR) register. The core clock consumer should
+ specify the desired clock by having the clock ID in its "clocks" phandle cell.
+
+ The following is a list of provided IDs and clock names on Armada 370/XP:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = nbclk (L2 Cache clock)
+ 3 = hclk (DRAM control clock)
+ 4 = dramclk (DDR clock)
+
+ The following is a list of provided IDs and clock names on Armada 375:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = l2clk (L2 Cache clock)
+ 3 = ddrclk (DDR clock)
+
+ The following is a list of provided IDs and clock names on Armada 380/385:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = l2clk (L2 Cache clock)
+ 3 = ddrclk (DDR clock)
+
+ The following is a list of provided IDs and clock names on Armada 39x:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = nbclk (Coherent Fabric clock)
+ 3 = hclk (SDRAM Controller Internal Clock)
+ 4 = dclk (SDRAM Interface Clock)
+ 5 = refclk (Reference Clock)
+
+ The following is a list of provided IDs and clock names on 98dx3236:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = ddrclk (DDR clock)
+ 3 = mpll (MPLL Clock)
+
+ The following is a list of provided IDs and clock names on Kirkwood and Dove:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = l2clk (L2 Cache clock derived from CPU0 clock)
+ 3 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+ The following is a list of provided IDs and clock names on Orion5x:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+properties:
+ compatible:
+ enum:
+ - marvell,armada-370-core-clock
+ - marvell,armada-375-core-clock
+ - marvell,armada-380-core-clock
+ - marvell,armada-390-core-clock
+ - marvell,armada-xp-core-clock
+ - marvell,dove-core-clock
+ - marvell,kirkwood-core-clock
+ - marvell,mv88f5181-core-clock
+ - marvell,mv88f5182-core-clock
+ - marvell,mv88f5281-core-clock
+ - marvell,mv88f6180-core-clock
+ - marvell,mv88f6183-core-clock
+ - marvell,mv98dx1135-core-clock
+ - marvell,mv98dx3236-core-clock
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clock-output-names:
+ description: Overwrite default clock output names.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/marvell,pxa1908.yaml b/Documentation/devicetree/bindings/clock/marvell,pxa1908.yaml
new file mode 100644
index 000000000000..6f3a8578fe2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,pxa1908.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/marvell,pxa1908.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell PXA1908 Clock Controllers
+
+maintainers:
+ - Duje Mihanović <duje.mihanovic@skole.hr>
+
+description: |
+ The PXA1908 clock subsystem generates and supplies clock to various
+ controllers within the PXA1908 SoC. The PXA1908 contains numerous clock
+ controller blocks, with the ones currently supported being APBC, APBCP, MPMU
+ and APMU roughly corresponding to internal buses.
+
+ All these clock identifiers could be found in <include/dt-bindings/marvell,pxa1908.h>.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - marvell,pxa1908-apbc
+ - marvell,pxa1908-apbcp
+ - marvell,pxa1908-mpmu
+ - items:
+ - const: marvell,pxa1908-apmu
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: marvell,pxa1908-apmu
+
+then:
+ properties:
+ '#power-domain-cells': false
+
+examples:
+ # APMU block:
+ - |
+ clock-controller@d4282800 {
+ compatible = "marvell,pxa1908-apmu", "syscon";
+ reg = <0xd4282800 0x400>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/marvell-armada-370-gating-clock.yaml b/Documentation/devicetree/bindings/clock/marvell-armada-370-gating-clock.yaml
new file mode 100644
index 000000000000..0475360d2b6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell-armada-370-gating-clock.yaml
@@ -0,0 +1,227 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+---
+$id: http://devicetree.org/schemas/clock/marvell-armada-370-gating-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell EBU SoC gating-clock
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Gregory Clement <gregory.clement@bootlin.com>
+
+description: >
+ Marvell Armada 370/375/380/385/39x/XP, Dove and Kirkwood allow some peripheral
+ clocks to be gated to save some power. The clock ID is directly mapped to the
+ corresponding clock gating control bit in HW to ease manual clock lookup in
+ datasheet.
+
+ The following is a list of provided IDs for Armada 370:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 0 Audio AC97 Cntrl
+ 1 pex0_en PCIe 0 Clock out
+ 2 pex1_en PCIe 1 Clock out
+ 3 ge1 Gigabit Ethernet 1
+ 4 ge0 Gigabit Ethernet 0
+ 5 pex0 PCIe Cntrl 0
+ 9 pex1 PCIe Cntrl 1
+ 15 sata0 SATA Host 0
+ 17 sdio SDHCI Host
+ 23 crypto CESA (crypto engine)
+ 25 tdm Time Division Mplx
+ 28 ddr DDR Cntrl
+ 30 sata1 SATA Host 0
+
+ The following is a list of provided IDs for Armada 375:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 2 mu Management Unit
+ 3 pp Packet Processor
+ 4 ptp PTP
+ 5 pex0 PCIe 0 Clock out
+ 6 pex1 PCIe 1 Clock out
+ 8 audio Audio Cntrl
+ 11 nd_clk Nand Flash Cntrl
+ 14 sata0_link SATA 0 Link
+ 15 sata0_core SATA 0 Core
+ 16 usb3 USB3 Host
+ 17 sdio SDHCI Host
+ 18 usb USB Host
+ 19 gop Gigabit Ethernet MAC
+ 20 sata1_link SATA 1 Link
+ 21 sata1_core SATA 1 Core
+ 22 xor0 XOR DMA 0
+ 23 xor1 XOR DMA 0
+ 24 copro Coprocessor
+ 25 tdm Time Division Mplx
+ 28 crypto0_enc Cryptographic Unit Port 0 Encryption
+ 29 crypto0_core Cryptographic Unit Port 0 Core
+ 30 crypto1_enc Cryptographic Unit Port 1 Encryption
+ 31 crypto1_core Cryptographic Unit Port 1 Core
+
+ The following is a list of provided IDs for Armada 380/385:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 0 audio Audio
+ 2 ge2 Gigabit Ethernet 2
+ 3 ge1 Gigabit Ethernet 1
+ 4 ge0 Gigabit Ethernet 0
+ 5 pex1 PCIe 1
+ 6 pex2 PCIe 2
+ 7 pex3 PCIe 3
+ 8 pex0 PCIe 0
+ 9 usb3h0 USB3 Host 0
+ 10 usb3h1 USB3 Host 1
+ 11 usb3d USB3 Device
+ 13 bm Buffer Management
+ 14 crypto0z Cryptographic 0 Z
+ 15 sata0 SATA 0
+ 16 crypto1z Cryptographic 1 Z
+ 17 sdio SDIO
+ 18 usb2 USB 2
+ 21 crypto1 Cryptographic 1
+ 22 xor0 XOR 0
+ 23 crypto0 Cryptographic 0
+ 25 tdm Time Division Multiplexing
+ 28 xor1 XOR 1
+ 30 sata1 SATA 1
+
+ The following is a list of provided IDs for Armada 39x:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 5 pex1 PCIe 1
+ 6 pex2 PCIe 2
+ 7 pex3 PCIe 3
+ 8 pex0 PCIe 0
+ 9 usb3h0 USB3 Host 0
+ 10 usb3h1 USB3 Host 1
+ 15 sata0 SATA 0
+ 17 sdio SDIO
+ 22 xor0 XOR 0
+ 28 xor1 XOR 1
+
+ The following is a list of provided IDs for Armada XP:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 0 audio Audio Cntrl
+ 1 ge3 Gigabit Ethernet 3
+ 2 ge2 Gigabit Ethernet 2
+ 3 ge1 Gigabit Ethernet 1
+ 4 ge0 Gigabit Ethernet 0
+ 5 pex0 PCIe Cntrl 0
+ 6 pex1 PCIe Cntrl 1
+ 7 pex2 PCIe Cntrl 2
+ 8 pex3 PCIe Cntrl 3
+ 13 bp
+ 14 sata0lnk
+ 15 sata0 SATA Host 0
+ 16 lcd LCD Cntrl
+ 17 sdio SDHCI Host
+ 18 usb0 USB Host 0
+ 19 usb1 USB Host 1
+ 20 usb2 USB Host 2
+ 22 xor0 XOR DMA 0
+ 23 crypto CESA engine
+ 25 tdm Time Division Mplx
+ 28 xor1 XOR DMA 1
+ 29 sata1lnk
+ 30 sata1 SATA Host 1
+
+ The following is a list of provided IDs for 98dx3236:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 3 ge1 Gigabit Ethernet 1
+ 4 ge0 Gigabit Ethernet 0
+ 5 pex0 PCIe Cntrl 0
+ 17 sdio SDHCI Host
+ 18 usb0 USB Host 0
+ 22 xor0 XOR DMA 0
+
+ The following is a list of provided IDs for Dove:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 0 usb0 USB Host 0
+ 1 usb1 USB Host 1
+ 2 ge Gigabit Ethernet
+ 3 sata SATA Host
+ 4 pex0 PCIe Cntrl 0
+ 5 pex1 PCIe Cntrl 1
+ 8 sdio0 SDHCI Host 0
+ 9 sdio1 SDHCI Host 1
+ 10 nand NAND Cntrl
+ 11 camera Camera Cntrl
+ 12 i2s0 I2S Cntrl 0
+ 13 i2s1 I2S Cntrl 1
+ 15 crypto CESA engine
+ 21 ac97 AC97 Cntrl
+ 22 pdma Peripheral DMA
+ 23 xor0 XOR DMA 0
+ 24 xor1 XOR DMA 1
+ 30 gephy Gigabit Ethernet PHY
+ Note: gephy(30) is implemented as a parent clock of ge(2)
+
+ The following is a list of provided IDs for Kirkwood:
+
+ ID Clock Peripheral
+ -----------------------------------
+ 0 ge0 Gigabit Ethernet 0
+ 2 pex0 PCIe Cntrl 0
+ 3 usb0 USB Host 0
+ 4 sdio SDIO Cntrl
+ 5 tsu Transp. Stream Unit
+ 6 dunit SDRAM Cntrl
+ 7 runit Runit
+ 8 xor0 XOR DMA 0
+ 9 audio I2S Cntrl 0
+ 14 sata0 SATA Host 0
+ 15 sata1 SATA Host 1
+ 16 xor1 XOR DMA 1
+ 17 crypto CESA engine
+ 18 pex1 PCIe Cntrl 1
+ 19 ge1 Gigabit Ethernet 1
+ 20 tdm Time Division Mplx
+
+properties:
+ compatible:
+ enum:
+ - marvell,armada-370-gating-clock
+ - marvell,armada-375-gating-clock
+ - marvell,armada-380-gating-clock
+ - marvell,armada-390-gating-clock
+ - marvell,armada-xp-gating-clock
+ - marvell,mv98dx3236-gating-clock
+ - marvell,dove-gating-clock
+ - marvell,kirkwood-gating-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@d0038 {
+ compatible = "marvell,dove-gating-clock";
+ reg = <0xd0038 0x4>;
+ /* default parent clock is tclk */
+ clocks = <&core_clk 0>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/maxim,max77686.txt b/Documentation/devicetree/bindings/clock/maxim,max77686.txt
deleted file mode 100644
index 3472b461ca93..000000000000
--- a/Documentation/devicetree/bindings/clock/maxim,max77686.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-Binding for Maxim MAX77686/MAX77802/MAX77620 32k clock generator block
-
-This is a part of device tree bindings of MAX77686/MAX77802/MAX77620
-multi-function device. More information can be found in MFD DT binding
-doc as follows:
- bindings/mfd/max77686.txt for MAX77686 and
- bindings/mfd/max77802.txt for MAX77802 and
- bindings/mfd/max77620.txt for MAX77620.
-
-The MAX77686 contains three 32.768khz clock outputs that can be controlled
-(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
-dt-bindings/clock/maxim,max77686.h.
-
-
-The MAX77802 contains two 32.768khz clock outputs that can be controlled
-(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
-dt-bindings/clock/maxim,max77802.h.
-
-The MAX77686 contains one 32.768khz clock outputs that can be controlled
-(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
-dt-bindings/clock/maxim,max77620.h.
-
-Following properties should be presend in main device node of the MFD chip.
-
-Required properties:
-
-- #clock-cells: from common clock binding; shall be set to 1.
-
-Optional properties:
-- clock-output-names: From common clock binding.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Following indices are allowed:
- - 0: 32khz_ap clock (max77686, max77802), 32khz_out0 (max77620)
- - 1: 32khz_cp clock (max77686, max77802),
- - 2: 32khz_pmic clock (max77686).
-
-Clocks are defined as preprocessor macros in above dt-binding header for
-respective chips.
-
-Example:
-
-1. With MAX77686:
-
-#include <dt-bindings/clock/maxim,max77686.h>
-/* ... */
-
- Node of the MFD chip
- max77686: max77686@9 {
- compatible = "maxim,max77686";
- interrupt-parent = <&wakeup_eint>;
- interrupts = <26 0>;
- reg = <0x09>;
- #clock-cells = <1>;
-
- /* ... */
- };
-
- Clock consumer node
-
- foo@0 {
- compatible = "bar,foo";
- /* ... */
- clock-names = "my-clock";
- clocks = <&max77686 MAX77686_CLK_PMIC>;
- };
-
-2. With MAX77802:
-
-#include <dt-bindings/clock/maxim,max77802.h>
-/* ... */
-
- Node of the MFD chip
- max77802: max77802@9 {
- compatible = "maxim,max77802";
- interrupt-parent = <&wakeup_eint>;
- interrupts = <26 0>;
- reg = <0x09>;
- #clock-cells = <1>;
-
- /* ... */
- };
-
- Clock consumer node
-
- foo@0 {
- compatible = "bar,foo";
- /* ... */
- clock-names = "my-clock";
- clocks = <&max77802 MAX77802_CLK_32K_AP>;
- };
-
-
-3. With MAX77620:
-
-#include <dt-bindings/clock/maxim,max77620.h>
-/* ... */
-
- Node of the MFD chip
- max77620: max77620@3c {
- compatible = "maxim,max77620";
- reg = <0x3c>;
- #clock-cells = <1>;
- /* ... */
- };
-
- Clock consumer node
-
- foo@0 {
- compatible = "bar,foo";
- /* ... */
- clock-names = "my-clock";
- clocks = <&max77620 MAX77620_CLK_32K_OUT0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/maxim,max9485.txt b/Documentation/devicetree/bindings/clock/maxim,max9485.txt
deleted file mode 100644
index 61bec1100a94..000000000000
--- a/Documentation/devicetree/bindings/clock/maxim,max9485.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-Devicetree bindings for Maxim MAX9485 Programmable Audio Clock Generator
-
-This device exposes 4 clocks in total:
-
-- MAX9485_MCLKOUT: A gated, buffered output of the input clock of 27 MHz
-- MAX9485_CLKOUT: A PLL that can be configured to 16 different discrete
- frequencies
-- MAX9485_CLKOUT[1,2]: Two gated outputs for MAX9485_CLKOUT
-
-MAX9485_CLKOUT[1,2] are children of MAX9485_CLKOUT which upchain all rate set
-requests.
-
-Required properties:
-- compatible: "maxim,max9485"
-- clocks: Input clock, must provice 27.000 MHz
-- clock-names: Must be set to "xclk"
-- #clock-cells: From common clock binding; shall be set to 1
-
-Optional properties:
-- reset-gpios: GPIO descriptor connected to the #RESET input pin
-- vdd-supply: A regulator node for Vdd
-- clock-output-names: Name of output clocks, as defined in common clock
- bindings
-
-If not explicitly set, the output names are "mclkout", "clkout", "clkout1"
-and "clkout2".
-
-Clocks are defined as preprocessor macros in the dt-binding header.
-
-Example:
-
- #include <dt-bindings/clock/maxim,max9485.h>
-
- xo-27mhz: xo-27mhz {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <27000000>;
- };
-
- &i2c0 {
- max9485: audio-clock@63 {
- reg = <0x63>;
- compatible = "maxim,max9485";
- clock-names = "xclk";
- clocks = <&xo-27mhz>;
- reset-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
- vdd-supply = <&3v3-reg>;
- #clock-cells = <1>;
- };
- };
-
- // Clock consumer node
-
- foo@0 {
- compatible = "bar,foo";
- /* ... */
- clock-names = "foo-input-clk";
- clocks = <&max9485 MAX9485_CLKOUT1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/maxim,max9485.yaml b/Documentation/devicetree/bindings/clock/maxim,max9485.yaml
new file mode 100644
index 000000000000..f9d8941c7235
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/maxim,max9485.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/maxim,max9485.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX9485 Programmable Audio Clock Generator
+
+maintainers:
+ - Daniel Mack <daniel@zonque.org>
+
+description: >
+ Maxim MAX9485 Programmable Audio Clock Generator exposes 4 clocks in total:
+
+ - MAX9485_MCLKOUT: A gated, buffered output of the input clock of 27 MHz
+ - MAX9485_CLKOUT: A PLL that can be configured to 16 different discrete
+ frequencies
+ - MAX9485_CLKOUT[1,2]: Two gated outputs for MAX9485_CLKOUT
+
+ MAX9485_CLKOUT[1,2] are children of MAX9485_CLKOUT which upchain all rate set
+ requests.
+
+properties:
+ compatible:
+ const: maxim,max9485
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description: Input clock. Must provide 27 MHz
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: xclk
+
+ '#clock-cells':
+ const: 1
+
+ reset-gpios:
+ description: >
+ GPIO descriptor connected to the #RESET input pin
+
+ vdd-supply:
+ description: A regulator node for Vdd
+
+ clock-output-names:
+ description: Name of output clocks, as defined in common clock bindings
+ items:
+ - const: mclkout
+ - const: clkout
+ - const: clkout1
+ - const: clkout2
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@63 {
+ compatible = "maxim,max9485";
+ reg = <0x63>;
+ #clock-cells = <1>;
+ clock-names = "xclk";
+ clocks = <&xo_27mhz>;
+ reset-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+ vdd-supply = <&reg_3v3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml
new file mode 100644
index 000000000000..591a9e862c7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,apmixedsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek AP Mixedsys Controller
+
+maintainers:
+ - Michael Turquette <mturquette@baylibre.com>
+ - Stephen Boyd <sboyd@kernel.org>
+
+description:
+ The Mediatek apmixedsys controller provides PLLs to the system.
+ The clock values can be found in <dt-bindings/clock/mt*-clk.h>
+ and <dt-bindings/clock/mediatek,mt*-apmixedsys.h>.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt6797-apmixedsys
+ - mediatek,mt7622-apmixedsys
+ - mediatek,mt7981-apmixedsys
+ - mediatek,mt7986-apmixedsys
+ - mediatek,mt7988-apmixedsys
+ - mediatek,mt8135-apmixedsys
+ - mediatek,mt8173-apmixedsys
+ - mediatek,mt8516-apmixedsys
+ - items:
+ - const: mediatek,mt7623-apmixedsys
+ - const: mediatek,mt2701-apmixedsys
+ - const: syscon
+ - items:
+ - enum:
+ - mediatek,mt2701-apmixedsys
+ - mediatek,mt2712-apmixedsys
+ - mediatek,mt6735-apmixedsys
+ - mediatek,mt6765-apmixedsys
+ - mediatek,mt6779-apmixed
+ - mediatek,mt6795-apmixedsys
+ - mediatek,mt7629-apmixedsys
+ - mediatek,mt8167-apmixedsys
+ - mediatek,mt8183-apmixedsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ apmixedsys: clock-controller@10209000 {
+ compatible = "mediatek,mt8173-apmixedsys";
+ reg = <0x10209000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,ethsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,ethsys.yaml
new file mode 100644
index 000000000000..f9cddacc2eae
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,ethsys.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,ethsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek ethsys controller
+
+description:
+ The available clocks are defined in dt-bindings/clock/mt*-clk.h.
+
+maintainers:
+ - James Liao <jamesjj.liao@mediatek.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-ethsys
+ - mediatek,mt7622-ethsys
+ - mediatek,mt7629-ethsys
+ - mediatek,mt7981-ethsys
+ - mediatek,mt7986-ethsys
+ - mediatek,mt7988-ethsys
+ - const: syscon
+ - items:
+ - const: mediatek,mt7623-ethsys
+ - const: mediatek,mt2701-ethsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1b000000 {
+ compatible = "mediatek,mt2701-ethsys", "syscon";
+ reg = <0x1b000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml b/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml
new file mode 100644
index 000000000000..d1d30700d9b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,infracfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Infrastructure System Configuration Controller
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+
+description:
+ The Mediatek infracfg controller provides various clocks and reset outputs
+ to the system. The clock values can be found in <dt-bindings/clock/mt*-clk.h>
+ and <dt-bindings/clock/mediatek,mt*-infracfg.h>, and reset values in
+ <dt-bindings/reset/mt*-reset.h>, <dt-bindings/reset/mt*-resets.h> and
+ <dt-bindings/reset/mediatek,mt*-infracfg.h>.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-infracfg
+ - mediatek,mt2712-infracfg
+ - mediatek,mt6735-infracfg
+ - mediatek,mt6765-infracfg
+ - mediatek,mt6795-infracfg
+ - mediatek,mt6779-infracfg_ao
+ - mediatek,mt6797-infracfg
+ - mediatek,mt7622-infracfg
+ - mediatek,mt7629-infracfg
+ - mediatek,mt7981-infracfg
+ - mediatek,mt7986-infracfg
+ - mediatek,mt7988-infracfg
+ - mediatek,mt8135-infracfg
+ - mediatek,mt8167-infracfg
+ - mediatek,mt8173-infracfg
+ - mediatek,mt8183-infracfg
+ - mediatek,mt8516-infracfg
+ - const: syscon
+ - items:
+ - const: mediatek,mt7623-infracfg
+ - const: mediatek,mt2701-infracfg
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt2701-infracfg
+ - mediatek,mt2712-infracfg
+ - mediatek,mt6795-infracfg
+ - mediatek,mt7622-infracfg
+ - mediatek,mt7986-infracfg
+ - mediatek,mt8135-infracfg
+ - mediatek,mt8173-infracfg
+ - mediatek,mt8183-infracfg
+then:
+ required:
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ infracfg: clock-controller@10001000 {
+ compatible = "mediatek,mt8173-infracfg", "syscon";
+ reg = <0x10001000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml
new file mode 100644
index 000000000000..9e7c725093aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt2701-hifsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek HIFSYS clock and reset controller
+
+description:
+ The MediaTek HIFSYS controller provides various clocks and reset outputs to
+ the system.
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-hifsys
+ - mediatek,mt7622-hifsys
+ - items:
+ - enum:
+ - mediatek,mt7623-hifsys
+ - const: mediatek,mt2701-hifsys
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+ description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - reg
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1a000000 {
+ compatible = "mediatek,mt2701-hifsys";
+ reg = <0x1a000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt6795-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt6795-clock.yaml
new file mode 100644
index 000000000000..04469eabc8fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt6795-clock.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt6795-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT6795
+
+maintainers:
+ - AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The devices provide clock gate control in different IP blocks.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt6795-mfgcfg
+ - mediatek,mt6795-vdecsys
+ - mediatek,mt6795-vencsys
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ mfgcfg: clock-controller@13000000 {
+ compatible = "mediatek,mt6795-mfgcfg";
+ reg = <0 0x13000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ vdecsys: clock-controller@16000000 {
+ compatible = "mediatek,mt6795-vdecsys";
+ reg = <0 0x16000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ vencsys: clock-controller@18000000 {
+ compatible = "mediatek,mt6795-vencsys";
+ reg = <0 0x18000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7621-sysc.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7621-sysc.yaml
new file mode 100644
index 000000000000..b42f0f5c11b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt7621-sysc.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7621-sysc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MT7621 Clock
+
+maintainers:
+ - Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+description: |
+ The MT7621 has a PLL controller from where the cpu clock is provided
+ as well as derived clocks for the bus and the peripherals. It also
+ can gate SoC device clocks.
+
+ Each clock is assigned an identifier and client nodes use this identifier
+ to specify the clock which they consume.
+
+ All these identifiers could be found in:
+ [1]: <include/dt-bindings/clock/mt7621-clk.h>.
+
+ The clocks are provided inside a system controller node.
+
+ This node is also a reset provider for all the peripherals.
+
+ Reset related bits are defined in:
+ [2]: <include/dt-bindings/reset/mt7621-reset.h>.
+
+properties:
+ compatible:
+ items:
+ - const: mediatek,mt7621-sysc
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ description:
+ The first cell indicates the clock number, see [1] for available
+ clocks.
+ const: 1
+
+ "#reset-cells":
+ description:
+ The first cell indicates the reset bit within the register, see
+ [2] for available resets.
+ const: 1
+
+ ralink,memctl:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle of syscon used to control memory registers
+
+ clock-output-names:
+ maxItems: 8
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - ralink,memctl
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt7621-clk.h>
+
+ sysc: sysc@0 {
+ compatible = "mediatek,mt7621-sysc", "syscon";
+ reg = <0x0 0x100>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ ralink,memctl = <&memc>;
+ clock-output-names = "xtal", "cpu", "bus",
+ "50m", "125m", "150m",
+ "250m", "270m";
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml
new file mode 100644
index 000000000000..9c3913f9092c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7622-pciesys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PCIESYS clock and reset controller
+
+description:
+ The MediaTek PCIESYS controller provides various clocks to the system.
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: mediatek,mt7622-pciesys
+ - const: syscon
+ - const: mediatek,mt7629-pciesys
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+ description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1a100800 {
+ compatible = "mediatek,mt7622-pciesys", "syscon";
+ reg = <0x1a100800 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml
new file mode 100644
index 000000000000..da93eccdcfc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7622-ssusbsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SSUSBSYS clock and reset controller
+
+description:
+ The MediaTek SSUSBSYS controller provides various clocks to the system.
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt7622-ssusbsys
+ - mediatek,mt7629-ssusbsys
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+ description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1a000000 {
+ compatible = "mediatek,mt7622-ssusbsys";
+ reg = <0x1a000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7988-ethwarp.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7988-ethwarp.yaml
new file mode 100644
index 000000000000..e32a0251ff6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt7988-ethwarp.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7988-ethwarp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek MT7988 ethwarp Controller
+
+maintainers:
+ - Daniel Golle <daniel@makrotopia.org>
+
+description:
+ The Mediatek MT7988 ethwarp controller provides clocks and resets for the
+ Ethernet related subsystems found the MT7988 SoC.
+ The clock values can be found in <dt-bindings/clock/mt*-clk.h>.
+
+properties:
+ compatible:
+ items:
+ - const: mediatek,mt7988-ethwarp
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/reset/ti-syscon.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@15031000 {
+ compatible = "mediatek,mt7988-ethwarp";
+ reg = <0 0x15031000 0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7988-xfi-pll.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7988-xfi-pll.yaml
new file mode 100644
index 000000000000..192f1451f0af
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt7988-xfi-pll.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7988-xfi-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek MT7988 XFI PLL Clock Controller
+
+maintainers:
+ - Daniel Golle <daniel@makrotopia.org>
+
+description:
+ The MediaTek XFI PLL controller provides the 156.25MHz clock for the
+ Ethernet SerDes PHY from the 40MHz top_xtal clock.
+
+properties:
+ compatible:
+ const: mediatek,mt7988-xfi-pll
+
+ reg:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - resets
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ clock-controller@11f40000 {
+ compatible = "mediatek,mt7988-xfi-pll";
+ reg = <0 0x11f40000 0 0x1000>;
+ resets = <&watchdog 16>;
+ #clock-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8186-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8186-clock.yaml
new file mode 100644
index 000000000000..f4e58bfa504f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8186-clock.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8186-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8186
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The devices provide clock gate control in different IP blocks.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8186-imp_iic_wrap
+ - mediatek,mt8186-mfgsys
+ - mediatek,mt8186-wpesys
+ - mediatek,mt8186-imgsys1
+ - mediatek,mt8186-imgsys2
+ - mediatek,mt8186-vdecsys
+ - mediatek,mt8186-vencsys
+ - mediatek,mt8186-camsys
+ - mediatek,mt8186-camsys_rawa
+ - mediatek,mt8186-camsys_rawb
+ - mediatek,mt8186-mdpsys
+ - mediatek,mt8186-ipesys
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ imp_iic_wrap: clock-controller@11017000 {
+ compatible = "mediatek,mt8186-imp_iic_wrap";
+ reg = <0x11017000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml
new file mode 100644
index 000000000000..d00327d12e1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8186-fhctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek frequency hopping and spread spectrum clocking control
+
+maintainers:
+ - Edward-JW Yang <edward-jw.yang@mediatek.com>
+
+description: |
+ Frequency hopping control (FHCTL) is a piece of hardware that control
+ some PLLs to adopt "hopping" mechanism to adjust their frequency.
+ Spread spectrum clocking (SSC) is another function provided by this hardware.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt6795-fhctl
+ - mediatek,mt8173-fhctl
+ - mediatek,mt8186-fhctl
+ - mediatek,mt8192-fhctl
+ - mediatek,mt8195-fhctl
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description: Phandles of the PLL with FHCTL hardware capability.
+ minItems: 1
+ maxItems: 30
+
+ mediatek,hopping-ssc-percent:
+ description: The percentage of spread spectrum clocking for one PLL.
+ minItems: 1
+ maxItems: 30
+ items:
+ default: 0
+ minimum: 0
+ maximum: 8
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8186-clk.h>
+ fhctl: fhctl@1000ce00 {
+ compatible = "mediatek,mt8186-fhctl";
+ reg = <0x1000ce00 0x200>;
+ clocks = <&apmixedsys CLK_APMIXED_MSDCPLL>;
+ mediatek,hopping-ssc-percent = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8186-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8186-sys-clock.yaml
new file mode 100644
index 000000000000..1c446fbc5108
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8186-sys-clock.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8186-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8186
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The apmixedsys provides most of PLLs which generated from SoC 26m.
+ The topckgen provides dividers and muxes which provide the clock source to other IP blocks.
+ The infracfg_ao provides clock gate in peripheral and infrastructure IP blocks.
+ The mcusys provides mux control to select the clock source in AP MCU.
+ The device nodes also provide the system control capacity for configuration.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8186-mcusys
+ - mediatek,mt8186-topckgen
+ - mediatek,mt8186-infracfg_ao
+ - mediatek,mt8186-apmixedsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ topckgen: syscon@10000000 {
+ compatible = "mediatek,mt8186-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
new file mode 100644
index 000000000000..5403242545ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8188-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8188
+
+maintainers:
+ - Garmin Chang <garmin.chang@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The devices provide clock gate control in different IP blocks.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt8188-adsp-audio26m
+ - mediatek,mt8188-camsys
+ - mediatek,mt8188-camsys-rawa
+ - mediatek,mt8188-camsys-rawb
+ - mediatek,mt8188-camsys-yuva
+ - mediatek,mt8188-camsys-yuvb
+ - mediatek,mt8188-ccusys
+ - mediatek,mt8188-imgsys
+ - mediatek,mt8188-imgsys-wpe1
+ - mediatek,mt8188-imgsys-wpe2
+ - mediatek,mt8188-imgsys-wpe3
+ - mediatek,mt8188-imgsys1-dip-nr
+ - mediatek,mt8188-imgsys1-dip-top
+ - mediatek,mt8188-imp-iic-wrap-c
+ - mediatek,mt8188-imp-iic-wrap-en
+ - mediatek,mt8188-imp-iic-wrap-w
+ - mediatek,mt8188-ipesys
+ - mediatek,mt8188-mfgcfg
+ - mediatek,mt8188-vdecsys
+ - mediatek,mt8188-vdecsys-soc
+ - mediatek,mt8188-vencsys
+ - mediatek,mt8188-wpesys
+ - mediatek,mt8188-wpesys-vpp0
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt8188-camsys-rawa
+ - mediatek,mt8188-camsys-rawb
+ - mediatek,mt8188-camsys-yuva
+ - mediatek,mt8188-camsys-yuvb
+ - mediatek,mt8188-imgsys-wpe1
+ - mediatek,mt8188-imgsys-wpe2
+ - mediatek,mt8188-imgsys-wpe3
+ - mediatek,mt8188-imgsys1-dip-nr
+ - mediatek,mt8188-imgsys1-dip-top
+ - mediatek,mt8188-ipesys
+
+ then:
+ required:
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@11283000 {
+ compatible = "mediatek,mt8188-imp-iic-wrap-c";
+ reg = <0x11283000 0x1000>;
+ #clock-cells = <1>;
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml
new file mode 100644
index 000000000000..db13d51a4903
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8188-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8188
+
+maintainers:
+ - Garmin Chang <garmin.chang@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The apmixedsys provides most of PLLs which generated from SoC 26m.
+ The topckgen provides dividers and muxes which provide the clock source to other IP blocks.
+ The infracfg_ao provides clock gate in peripheral and infrastructure IP blocks.
+ The mcusys provides mux control to select the clock source in AP MCU.
+ The device nodes also provide the system control capacity for configuration.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8188-apmixedsys
+ - mediatek,mt8188-infracfg-ao
+ - mediatek,mt8188-pericfg-ao
+ - mediatek,mt8188-topckgen
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@10000000 {
+ compatible = "mediatek,mt8188-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8192-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8192-clock.yaml
new file mode 100644
index 000000000000..b8d690e28bdc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8192-clock.yaml
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8192-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8192
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+ The Mediatek functional clock controller provides various clocks on MT8192.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8192-scp_adsp
+ - mediatek,mt8192-imp_iic_wrap_c
+ - mediatek,mt8192-imp_iic_wrap_e
+ - mediatek,mt8192-imp_iic_wrap_s
+ - mediatek,mt8192-imp_iic_wrap_ws
+ - mediatek,mt8192-imp_iic_wrap_w
+ - mediatek,mt8192-imp_iic_wrap_n
+ - mediatek,mt8192-msdc_top
+ - mediatek,mt8192-mfgcfg
+ - mediatek,mt8192-imgsys
+ - mediatek,mt8192-imgsys2
+ - mediatek,mt8192-vdecsys_soc
+ - mediatek,mt8192-vdecsys
+ - mediatek,mt8192-vencsys
+ - mediatek,mt8192-camsys
+ - mediatek,mt8192-camsys_rawa
+ - mediatek,mt8192-camsys_rawb
+ - mediatek,mt8192-camsys_rawc
+ - mediatek,mt8192-ipesys
+ - mediatek,mt8192-mdpsys
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ scp_adsp: clock-controller@10720000 {
+ compatible = "mediatek,mt8192-scp_adsp";
+ reg = <0x10720000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_c: clock-controller@11007000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_c";
+ reg = <0x11007000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_e: clock-controller@11cb1000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_e";
+ reg = <0x11cb1000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_s: clock-controller@11d03000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_s";
+ reg = <0x11d03000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_ws: clock-controller@11d23000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_ws";
+ reg = <0x11d23000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_w: clock-controller@11e01000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_w";
+ reg = <0x11e01000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_n: clock-controller@11f02000 {
+ compatible = "mediatek,mt8192-imp_iic_wrap_n";
+ reg = <0x11f02000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ msdc_top: clock-controller@11f10000 {
+ compatible = "mediatek,mt8192-msdc_top";
+ reg = <0x11f10000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ mfgcfg: clock-controller@13fbf000 {
+ compatible = "mediatek,mt8192-mfgcfg";
+ reg = <0x13fbf000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys: clock-controller@15020000 {
+ compatible = "mediatek,mt8192-imgsys";
+ reg = <0x15020000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys2: clock-controller@15820000 {
+ compatible = "mediatek,mt8192-imgsys2";
+ reg = <0x15820000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vdecsys_soc: clock-controller@1600f000 {
+ compatible = "mediatek,mt8192-vdecsys_soc";
+ reg = <0x1600f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vdecsys: clock-controller@1602f000 {
+ compatible = "mediatek,mt8192-vdecsys";
+ reg = <0x1602f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vencsys: clock-controller@17000000 {
+ compatible = "mediatek,mt8192-vencsys";
+ reg = <0x17000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys: clock-controller@1a000000 {
+ compatible = "mediatek,mt8192-camsys";
+ reg = <0x1a000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_rawa: clock-controller@1a04f000 {
+ compatible = "mediatek,mt8192-camsys_rawa";
+ reg = <0x1a04f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_rawb: clock-controller@1a06f000 {
+ compatible = "mediatek,mt8192-camsys_rawb";
+ reg = <0x1a06f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_rawc: clock-controller@1a08f000 {
+ compatible = "mediatek,mt8192-camsys_rawc";
+ reg = <0x1a08f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ ipesys: clock-controller@1b000000 {
+ compatible = "mediatek,mt8192-ipesys";
+ reg = <0x1b000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ mdpsys: clock-controller@1f000000 {
+ compatible = "mediatek,mt8192-mdpsys";
+ reg = <0x1f000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8192-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8192-sys-clock.yaml
new file mode 100644
index 000000000000..bf8c9aacdf1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8192-sys-clock.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8192-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8192
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+ The Mediatek system clock controller provides various clocks and system configuration
+ like reset and bus protection on MT8192.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8192-topckgen
+ - mediatek,mt8192-infracfg
+ - mediatek,mt8192-pericfg
+ - mediatek,mt8192-apmixedsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ topckgen: syscon@10000000 {
+ compatible = "mediatek,mt8192-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ infracfg: syscon@10001000 {
+ compatible = "mediatek,mt8192-infracfg", "syscon";
+ reg = <0x10001000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ pericfg: syscon@10003000 {
+ compatible = "mediatek,mt8192-pericfg", "syscon";
+ reg = <0x10003000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ apmixedsys: syscon@1000c000 {
+ compatible = "mediatek,mt8192-apmixedsys", "syscon";
+ reg = <0x1000c000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8195-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8195-clock.yaml
new file mode 100644
index 000000000000..fcc963aff087
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8195-clock.yaml
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8195-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8195
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+ The clock architecture in Mediatek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The devices except apusys_pll provide clock gate control in different IP blocks.
+ The apusys_pll provides Plls which generated from SoC 26m for AI Processing Unit.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8195-scp_adsp
+ - mediatek,mt8195-imp_iic_wrap_s
+ - mediatek,mt8195-imp_iic_wrap_w
+ - mediatek,mt8195-mfgcfg
+ - mediatek,mt8195-wpesys
+ - mediatek,mt8195-wpesys_vpp0
+ - mediatek,mt8195-wpesys_vpp1
+ - mediatek,mt8195-imgsys
+ - mediatek,mt8195-imgsys1_dip_top
+ - mediatek,mt8195-imgsys1_dip_nr
+ - mediatek,mt8195-imgsys1_wpe
+ - mediatek,mt8195-ipesys
+ - mediatek,mt8195-camsys
+ - mediatek,mt8195-camsys_rawa
+ - mediatek,mt8195-camsys_yuva
+ - mediatek,mt8195-camsys_rawb
+ - mediatek,mt8195-camsys_yuvb
+ - mediatek,mt8195-camsys_mraw
+ - mediatek,mt8195-ccusys
+ - mediatek,mt8195-vdecsys_soc
+ - mediatek,mt8195-vdecsys
+ - mediatek,mt8195-vdecsys_core1
+ - mediatek,mt8195-vencsys
+ - mediatek,mt8195-vencsys_core1
+ - mediatek,mt8195-apusys_pll
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ scp_adsp: clock-controller@10720000 {
+ compatible = "mediatek,mt8195-scp_adsp";
+ reg = <0x10720000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_s: clock-controller@11d03000 {
+ compatible = "mediatek,mt8195-imp_iic_wrap_s";
+ reg = <0x11d03000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imp_iic_wrap_w: clock-controller@11e05000 {
+ compatible = "mediatek,mt8195-imp_iic_wrap_w";
+ reg = <0x11e05000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ mfgcfg: clock-controller@13fbf000 {
+ compatible = "mediatek,mt8195-mfgcfg";
+ reg = <0x13fbf000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ wpesys: clock-controller@14e00000 {
+ compatible = "mediatek,mt8195-wpesys";
+ reg = <0x14e00000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ wpesys_vpp0: clock-controller@14e02000 {
+ compatible = "mediatek,mt8195-wpesys_vpp0";
+ reg = <0x14e02000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ wpesys_vpp1: clock-controller@14e03000 {
+ compatible = "mediatek,mt8195-wpesys_vpp1";
+ reg = <0x14e03000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys: clock-controller@15000000 {
+ compatible = "mediatek,mt8195-imgsys";
+ reg = <0x15000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys1_dip_top: clock-controller@15110000 {
+ compatible = "mediatek,mt8195-imgsys1_dip_top";
+ reg = <0x15110000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys1_dip_nr: clock-controller@15130000 {
+ compatible = "mediatek,mt8195-imgsys1_dip_nr";
+ reg = <0x15130000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ imgsys1_wpe: clock-controller@15220000 {
+ compatible = "mediatek,mt8195-imgsys1_wpe";
+ reg = <0x15220000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ ipesys: clock-controller@15330000 {
+ compatible = "mediatek,mt8195-ipesys";
+ reg = <0x15330000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys: clock-controller@16000000 {
+ compatible = "mediatek,mt8195-camsys";
+ reg = <0x16000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_rawa: clock-controller@1604f000 {
+ compatible = "mediatek,mt8195-camsys_rawa";
+ reg = <0x1604f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_yuva: clock-controller@1606f000 {
+ compatible = "mediatek,mt8195-camsys_yuva";
+ reg = <0x1606f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_rawb: clock-controller@1608f000 {
+ compatible = "mediatek,mt8195-camsys_rawb";
+ reg = <0x1608f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_yuvb: clock-controller@160af000 {
+ compatible = "mediatek,mt8195-camsys_yuvb";
+ reg = <0x160af000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ camsys_mraw: clock-controller@16140000 {
+ compatible = "mediatek,mt8195-camsys_mraw";
+ reg = <0x16140000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ ccusys: clock-controller@17200000 {
+ compatible = "mediatek,mt8195-ccusys";
+ reg = <0x17200000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vdecsys_soc: clock-controller@1800f000 {
+ compatible = "mediatek,mt8195-vdecsys_soc";
+ reg = <0x1800f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vdecsys: clock-controller@1802f000 {
+ compatible = "mediatek,mt8195-vdecsys";
+ reg = <0x1802f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vdecsys_core1: clock-controller@1803f000 {
+ compatible = "mediatek,mt8195-vdecsys_core1";
+ reg = <0x1803f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vencsys: clock-controller@1a000000 {
+ compatible = "mediatek,mt8195-vencsys";
+ reg = <0x1a000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ vencsys_core1: clock-controller@1b000000 {
+ compatible = "mediatek,mt8195-vencsys_core1";
+ reg = <0x1b000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ apusys_pll: clock-controller@190f3000 {
+ compatible = "mediatek,mt8195-apusys_pll";
+ reg = <0x190f3000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8195-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8195-sys-clock.yaml
new file mode 100644
index 000000000000..69f096eb168d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8195-sys-clock.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8195-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8195
+
+maintainers:
+ - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+ The clock architecture in Mediatek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The apmixedsys provides most of PLLs which generated from SoC 26m.
+ The topckgen provides dividers and muxes which provide the clock source to other IP blocks.
+ The infracfg_ao and pericfg_ao provides clock gate in peripheral and infrastructure IP blocks.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8195-topckgen
+ - mediatek,mt8195-infracfg_ao
+ - mediatek,mt8195-apmixedsys
+ - mediatek,mt8195-pericfg_ao
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ topckgen: syscon@10000000 {
+ compatible = "mediatek,mt8195-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ infracfg_ao: syscon@10001000 {
+ compatible = "mediatek,mt8195-infracfg_ao", "syscon";
+ reg = <0x10001000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ apmixedsys: syscon@1000c000 {
+ compatible = "mediatek,mt8195-apmixedsys", "syscon";
+ reg = <0x1000c000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ - |
+ pericfg_ao: syscon@11003000 {
+ compatible = "mediatek,mt8195-pericfg_ao", "syscon";
+ reg = <0x11003000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8196-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8196-clock.yaml
new file mode 100644
index 000000000000..bfdbd2e4a167
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8196-clock.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8196-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8196
+
+maintainers:
+ - Guangjie Song <guangjie.song@mediatek.com>
+ - Laura Nao <laura.nao@collabora.com>
+
+description: |
+ The clock architecture in MediaTek SoCs is structured like below:
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The device nodes provide clock gate control in different IP blocks.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8196-imp-iic-wrap-c
+ - mediatek,mt8196-imp-iic-wrap-e
+ - mediatek,mt8196-imp-iic-wrap-n
+ - mediatek,mt8196-imp-iic-wrap-w
+ - mediatek,mt8196-mdpsys0
+ - mediatek,mt8196-mdpsys1
+ - mediatek,mt8196-pericfg-ao
+ - mediatek,mt8196-pextp0cfg-ao
+ - mediatek,mt8196-pextp1cfg-ao
+ - mediatek,mt8196-ufscfg-ao
+ - mediatek,mt8196-vencsys
+ - mediatek,mt8196-vencsys-c1
+ - mediatek,mt8196-vencsys-c2
+ - mediatek,mt8196-vdecsys
+ - mediatek,mt8196-vdecsys-soc
+ - mediatek,mt8196-vdisp-ao
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+ description:
+ Reset lines for PEXTP0/1 and UFS blocks.
+
+ mediatek,hardware-voter:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Phandle to the "Hardware Voter" (HWV), as named in the vendor
+ documentation for MT8196/MT6991.
+
+ The HWV is a SoC-internal fixed-function MCU used to collect votes from
+ both the Application Processor and other remote processors within the SoC.
+ It is intended to transparently enable or disable hardware resources (such
+ as power domains or clocks) based on internal vote aggregation handled by
+ the MCU's internal state machine.
+
+ However, in practice, this design is incomplete. While the HWV performs
+ some internal vote aggregation,software is still required to
+ - Manually enable power supplies externally, if present and if required
+ - Manually enable parent clocks via direct MMIO writes to clock controllers
+ - Enable the FENC after the clock has been ungated via direct MMIO
+ writes to clock controllers
+
+ As such, the HWV behaves more like a hardware-managed clock reference
+ counter than a true voter. Furthermore, it is not a separate
+ controller. It merely serves as an alternative interface to the same
+ underlying clock or power controller. Actual control still requires
+ direct access to the controller's own MMIO register space, in
+ addition to writing to the HWV's MMIO region.
+
+ For this reason, a custom phandle is used here - drivers need to directly
+ access the HWV MMIO region in a syscon-like fashion, due to how the
+ hardware is wired. This differs from true hardware voting systems, which
+ typically do not require custom phandles and rely instead on generic APIs
+ (clocks, power domains, interconnects).
+
+ The name "hardware-voter" is retained to match vendor documentation, but
+ this should not be reused or misunderstood as a proper voting mechanism.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ pericfg_ao: clock-controller@16640000 {
+ compatible = "mediatek,mt8196-pericfg-ao", "syscon";
+ reg = <0x16640000 0x1000>;
+ mediatek,hardware-voter = <&scp_hwv>;
+ #clock-cells = <1>;
+ };
+ - |
+ pextp0cfg_ao: clock-controller@169b0000 {
+ compatible = "mediatek,mt8196-pextp0cfg-ao", "syscon";
+ reg = <0x169b0000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8196-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8196-sys-clock.yaml
new file mode 100644
index 000000000000..660ab64f390d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8196-sys-clock.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8196-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8196
+
+maintainers:
+ - Guangjie Song <guangjie.song@mediatek.com>
+ - Laura Nao <laura.nao@collabora.com>
+
+description: |
+ The clock architecture in MediaTek SoCs is structured like below:
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The apmixedsys, apmixedsys_gp2, vlpckgen, armpll, ccipll, mfgpll and ptppll
+ provide most of the PLLs which are generated from the SoC's 26MHZ crystal oscillator.
+ The topckgen, topckgen_gp2 and vlpckgen provide dividers and muxes which
+ provide the clock source to other IP blocks.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8196-apmixedsys
+ - mediatek,mt8196-armpll-b-pll-ctrl
+ - mediatek,mt8196-armpll-bl-pll-ctrl
+ - mediatek,mt8196-armpll-ll-pll-ctrl
+ - mediatek,mt8196-apmixedsys-gp2
+ - mediatek,mt8196-ccipll-pll-ctrl
+ - mediatek,mt8196-mfgpll-pll-ctrl
+ - mediatek,mt8196-mfgpll-sc0-pll-ctrl
+ - mediatek,mt8196-mfgpll-sc1-pll-ctrl
+ - mediatek,mt8196-ptppll-pll-ctrl
+ - mediatek,mt8196-topckgen
+ - mediatek,mt8196-topckgen-gp2
+ - mediatek,mt8196-vlpckgen
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ mediatek,hardware-voter:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ Phandle to the "Hardware Voter" (HWV), as named in the vendor
+ documentation for MT8196/MT6991.
+
+ The HWV is a SoC-internal fixed-function MCU used to collect votes from
+ both the Application Processor and other remote processors within the SoC.
+ It is intended to transparently enable or disable hardware resources (such
+ as power domains or clocks) based on internal vote aggregation handled by
+ the MCU's internal state machine.
+
+ However, in practice, this design is incomplete. While the HWV performs
+ some internal vote aggregation,software is still required to
+ - Manually enable power supplies externally, if present and if required
+ - Manually enable parent clocks via direct MMIO writes to clock controllers
+ - Enable the FENC after the clock has been ungated via direct MMIO
+ writes to clock controllers
+
+ As such, the HWV behaves more like a hardware-managed clock reference
+ counter than a true voter. Furthermore, it is not a separate
+ controller. It merely serves as an alternative interface to the same
+ underlying clock or power controller. Actual control still requires
+ direct access to the controller's own MMIO register space, in
+ addition to writing to the HWV's MMIO region.
+
+ For this reason, a custom phandle is used here - drivers need to directly
+ access the HWV MMIO region in a syscon-like fashion, due to how the
+ hardware is wired. This differs from true hardware voting systems, which
+ typically do not require custom phandles and rely instead on generic APIs
+ (clocks, power domains, interconnects).
+
+ The name "hardware-voter" is retained to match vendor documentation, but
+ this should not be reused or misunderstood as a proper voting mechanism.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ apmixedsys_clk: syscon@10000800 {
+ compatible = "mediatek,mt8196-apmixedsys", "syscon";
+ reg = <0x10000800 0x1000>;
+ #clock-cells = <1>;
+ };
+ - |
+ topckgen: syscon@10000000 {
+ compatible = "mediatek,mt8196-topckgen", "syscon";
+ reg = <0x10000000 0x800>;
+ mediatek,hardware-voter = <&scp_hwv>;
+ #clock-cells = <1>;
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8365-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8365-clock.yaml
new file mode 100644
index 000000000000..b327ecb4e524
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8365-clock.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8365-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8365
+
+maintainers:
+ - Markus Schneider-Pargmann <msp@baylibre.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8365-apu
+ - mediatek,mt8365-imgsys
+ - mediatek,mt8365-mfgcfg
+ - mediatek,mt8365-vdecsys
+ - mediatek,mt8365-vencsys
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ apu: clock-controller@19020000 {
+ compatible = "mediatek,mt8365-apu", "syscon";
+ reg = <0x19020000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8365-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8365-sys-clock.yaml
new file mode 100644
index 000000000000..643f84660c8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8365-sys-clock.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8365-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8365
+
+maintainers:
+ - Markus Schneider-Pargmann <msp@baylibre.com>
+
+description:
+ The apmixedsys module provides most of PLLs which generated from SoC 26m.
+ The topckgen provides dividers and muxes which provides the clock source to other IP blocks.
+ The infracfg_ao and pericfg_ao provides clock gate in peripheral and infrastructure IP blocks.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8365-topckgen
+ - mediatek,mt8365-infracfg
+ - mediatek,mt8365-apmixedsys
+ - mediatek,mt8365-pericfg
+ - mediatek,mt8365-mcucfg
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ topckgen: clock-controller@10000000 {
+ compatible = "mediatek,mt8365-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mtmips-sysc.yaml b/Documentation/devicetree/bindings/clock/mediatek,mtmips-sysc.yaml
new file mode 100644
index 000000000000..56bbd69b16d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mtmips-sysc.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mtmips-sysc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MTMIPS SoCs System Controller
+
+maintainers:
+ - Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+description: |
+ MediaTek MIPS and Ralink SoCs provides a system controller to allow
+ to access to system control registers. These registers include clock
+ and reset related ones so this node is both clock and reset provider
+ for the rest of the world.
+
+ These SoCs have an XTAL from where the cpu clock is
+ provided as well as derived clocks for the bus and the peripherals.
+
+ Each clock is assigned an identifier and client nodes use this identifier
+ to specify the clock which they consume.
+
+ All these identifiers could be found in:
+ [1]: <include/dt-bindings/clock/mediatek,mtmips-sysc.h>.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ralink,mt7620-sysc
+ - ralink,mt7688-sysc
+ - ralink,rt2880-sysc
+ - ralink,rt3050-sysc
+ - ralink,rt3052-sysc
+ - ralink,rt3352-sysc
+ - ralink,rt3883-sysc
+ - ralink,rt5350-sysc
+ - const: syscon
+ - items:
+ - const: ralink,mt7628-sysc
+ - const: ralink,mt7688-sysc
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ description:
+ The first cell indicates the clock number, see [1] for available
+ clocks.
+ const: 1
+
+ '#reset-cells':
+ description:
+ The first cell indicates the reset bit within the register.
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mediatek,mtmips-sysc.h>
+
+ syscon@0 {
+ compatible = "ralink,rt5350-sysc", "syscon";
+ reg = <0x0 0x100>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml
new file mode 100644
index 000000000000..b98cf45efe2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,pericfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Peripheral Configuration Controller
+
+maintainers:
+ - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description:
+ The Mediatek pericfg controller provides various clocks and reset outputs
+ to the system.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-pericfg
+ - mediatek,mt2712-pericfg
+ - mediatek,mt6735-pericfg
+ - mediatek,mt6765-pericfg
+ - mediatek,mt6795-pericfg
+ - mediatek,mt7622-pericfg
+ - mediatek,mt7629-pericfg
+ - mediatek,mt8135-pericfg
+ - mediatek,mt8173-pericfg
+ - mediatek,mt8183-pericfg
+ - mediatek,mt8186-pericfg
+ - mediatek,mt8188-pericfg
+ - mediatek,mt8195-pericfg
+ - mediatek,mt8516-pericfg
+ - const: syscon
+ - items:
+ # Special case for mt7623 for backward compatibility
+ - const: mediatek,mt7623-pericfg
+ - const: mediatek,mt2701-pericfg
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ pericfg@10003000 {
+ compatible = "mediatek,mt8173-pericfg", "syscon";
+ reg = <0x10003000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ - |
+ pericfg@10003000 {
+ compatible = "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon";
+ reg = <0x10003000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,syscon.yaml b/Documentation/devicetree/bindings/clock/mediatek,syscon.yaml
new file mode 100644
index 000000000000..a52f90bfc9f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,syscon.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,syscon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Clock controller syscon's
+
+maintainers:
+ - Matthias Brugger <matthias.bgg@gmail.com>
+ - AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+
+description:
+ The MediaTek clock controller syscon's provide various clocks to the system.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt2701-bdpsys
+ - mediatek,mt2701-imgsys
+ - mediatek,mt2701-vdecsys
+ - mediatek,mt2712-bdpsys
+ - mediatek,mt2712-imgsys
+ - mediatek,mt2712-jpgdecsys
+ - mediatek,mt2712-mcucfg
+ - mediatek,mt2712-mfgcfg
+ - mediatek,mt2712-vdecsys
+ - mediatek,mt2712-vencsys
+ - mediatek,mt6735-imgsys
+ - mediatek,mt6735-mfgcfg
+ - mediatek,mt6735-vdecsys
+ - mediatek,mt6735-vencsys
+ - mediatek,mt6765-camsys
+ - mediatek,mt6765-imgsys
+ - mediatek,mt6765-mipi0a
+ - mediatek,mt6765-vcodecsys
+ - mediatek,mt6779-camsys
+ - mediatek,mt6779-imgsys
+ - mediatek,mt6779-ipesys
+ - mediatek,mt6779-mfgcfg
+ - mediatek,mt6779-vdecsys
+ - mediatek,mt6779-vencsys
+ - mediatek,mt6797-imgsys
+ - mediatek,mt6797-vdecsys
+ - mediatek,mt6797-vencsys
+ - mediatek,mt8167-imgsys
+ - mediatek,mt8167-mfgcfg
+ - mediatek,mt8167-vdecsys
+ - mediatek,mt8173-imgsys
+ - mediatek,mt8173-vdecsys
+ - mediatek,mt8173-vencltsys
+ - mediatek,mt8173-vencsys
+ - mediatek,mt8183-camsys
+ - mediatek,mt8183-imgsys
+ - mediatek,mt8183-ipu_conn
+ - mediatek,mt8183-ipu_adl
+ - mediatek,mt8183-ipu_core0
+ - mediatek,mt8183-ipu_core1
+ - mediatek,mt8183-mcucfg
+ - mediatek,mt8183-mfgcfg
+ - mediatek,mt8183-vdecsys
+ - mediatek,mt8183-vencsys
+ - const: syscon
+ - items:
+ - const: mediatek,mt7623-bdpsys
+ - const: mediatek,mt2701-bdpsys
+ - const: syscon
+ - items:
+ - const: mediatek,mt7623-imgsys
+ - const: mediatek,mt2701-imgsys
+ - const: syscon
+ - items:
+ - const: mediatek,mt7623-vdecsys
+ - const: mediatek,mt2701-vdecsys
+ - const: syscon
+
+ power-domains:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt8183-mfgcfg
+then:
+ properties:
+ power-domains: true
+else:
+ properties:
+ power-domains: false
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@11220000 {
+ compatible = "mediatek,mt2701-bdpsys", "syscon";
+ reg = <0x11220000 0x2000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml
new file mode 100644
index 000000000000..c080fb0a1618
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,topckgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Top Clock Generator Controller
+
+maintainers:
+ - Michael Turquette <mturquette@baylibre.com>
+ - Stephen Boyd <sboyd@kernel.org>
+
+description:
+ The Mediatek topckgen controller provides various clocks to the system.
+ The clock values can be found in <dt-bindings/clock/mt*-clk.h> and
+ <dt-bindings/clock/mediatek,mt*-topckgen.h>.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt6797-topckgen
+ - mediatek,mt7622-topckgen
+ - mediatek,mt8135-topckgen
+ - mediatek,mt8173-topckgen
+ - mediatek,mt8516-topckgen
+ - items:
+ - const: mediatek,mt7623-topckgen
+ - const: mediatek,mt2701-topckgen
+ - const: syscon
+ - items:
+ - enum:
+ - mediatek,mt2701-topckgen
+ - mediatek,mt2712-topckgen
+ - mediatek,mt6735-topckgen
+ - mediatek,mt6765-topckgen
+ - mediatek,mt6779-topckgen
+ - mediatek,mt6795-topckgen
+ - mediatek,mt7629-topckgen
+ - mediatek,mt7981-topckgen
+ - mediatek,mt7986-topckgen
+ - mediatek,mt7988-mcusys
+ - mediatek,mt7988-topckgen
+ - mediatek,mt8167-topckgen
+ - mediatek,mt8183-topckgen
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ topckgen: clock-controller@10000000 {
+ compatible = "mediatek,mt8173-topckgen";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/microchip,lan966x-gck.yaml b/Documentation/devicetree/bindings/clock/microchip,lan966x-gck.yaml
new file mode 100644
index 000000000000..16106e8b637f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,lan966x-gck.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,lan966x-gck.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip LAN966X Generic Clock Controller
+
+maintainers:
+ - Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
+
+description: |
+ The LAN966X Generic clock controller contains 3 PLLs - cpu_clk,
+ ddr_clk and sys_clk. This clock controller generates and supplies
+ clock to various peripherals within the SoC.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - microchip,lan966x-gck
+ - microchip,lan9691-gck
+ - items:
+ - enum:
+ - microchip,lan9698-gck
+ - microchip,lan9696-gck
+ - microchip,lan9694-gck
+ - microchip,lan9693-gck
+ - microchip,lan9692-gck
+ - const: microchip,lan9691-gck
+
+ reg:
+ minItems: 1
+ items:
+ - description: Generic clock registers
+ - description: Optional gate clock registers
+
+ clocks:
+ items:
+ - description: CPU clock source
+ - description: DDR clock source
+ - description: System clock source
+
+ clock-names:
+ items:
+ - const: cpu
+ - const: ddr
+ - const: sys
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clks: clock-controller@e00c00a8 {
+ compatible = "microchip,lan966x-gck";
+ #clock-cells = <1>;
+ clocks = <&cpu_clk>, <&ddr_clk>, <&sys_clk>;
+ clock-names = "cpu", "ddr", "sys";
+ reg = <0xe00c00a8 0x38>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs-ccc.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs-ccc.yaml
new file mode 100644
index 000000000000..f1770360798f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,mpfs-ccc.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,mpfs-ccc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire SoC Fabric Clock Conditioning Circuitry
+
+maintainers:
+ - Conor Dooley <conor.dooley@microchip.com>
+
+description: |
+ Microchip PolarFire SoC has 4 Clock Conditioning Circuitry blocks. Each of
+ these blocks contains two PLLs and 2 DLLs & are located in the four corners of
+ the FPGA. For more information see "PolarFire SoC FPGA Clocking Resources" at:
+ https://onlinedocs.microchip.com/pr/GUID-8F0CC4C0-0317-4262-89CA-CE7773ED1931-en-US-1/index.html
+
+properties:
+ compatible:
+ const: microchip,mpfs-ccc
+
+ reg:
+ items:
+ - description: PLL0's control registers
+ - description: PLL1's control registers
+ - description: DLL0's control registers
+ - description: DLL1's control registers
+
+ clocks:
+ description:
+ The CCC PLL's have two input clocks. It is required that even if the input
+ clocks are identical that both are provided.
+ minItems: 2
+ items:
+ - description: PLL0's refclk0
+ - description: PLL0's refclk1
+ - description: PLL1's refclk0
+ - description: PLL1's refclk1
+ - description: DLL0's refclk
+ - description: DLL1's refclk
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: pll0_ref0
+ - const: pll0_ref1
+ - const: pll1_ref0
+ - const: pll1_ref1
+ - const: dll0_ref
+ - const: dll1_ref
+
+ '#clock-cells':
+ const: 1
+ description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/microchip,mpfs-clock.h for the full list of
+ PolarFire clock IDs.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@38100000 {
+ compatible = "microchip,mpfs-ccc";
+ reg = <0x38010000 0x1000>, <0x38020000 0x1000>,
+ <0x39010000 0x1000>, <0x39020000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&refclk_ccc>, <&refclk_ccc>, <&refclk_ccc>, <&refclk_ccc>,
+ <&refclk_ccc>, <&refclk_ccc>;
+ clock-names = "pll0_ref0", "pll0_ref1", "pll1_ref0", "pll1_ref1",
+ "dll0_ref", "dll1_ref";
+ };
diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml
new file mode 100644
index 000000000000..e4e1c31267d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,mpfs-clkcfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire Clock Control Module
+
+maintainers:
+ - Daire McNamara <daire.mcnamara@microchip.com>
+
+description: |
+ Microchip PolarFire clock control (CLKCFG) is an integrated clock controller,
+ which gates and enables all peripheral clocks.
+
+ This device tree binding describes 33 gate clocks. Clocks are referenced by
+ user nodes by the CLKCFG node phandle and the clock index in the group, from
+ 0 to 32.
+
+properties:
+ compatible:
+ const: microchip,mpfs-clkcfg
+
+ reg:
+ items:
+ - description: |
+ clock config registers:
+ These registers contain enable, reset & divider tables for the, cpu,
+ axi, ahb and rtc/mtimer reference clocks as well as enable and reset
+ for the peripheral clocks.
+ - description: |
+ mss pll dri registers:
+ Block of registers responsible for dynamic reconfiguration of the mss
+ pll
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/microchip,mpfs-clock.h for the full list of
+ PolarFire clock IDs.
+
+ resets:
+ maxItems: 1
+
+ '#reset-cells':
+ description:
+ The AHB/AXI peripherals on the PolarFire SoC have reset support, so from
+ CLK_ENVM to CLK_CFM. The reset consumer should specify the desired
+ peripheral via the clock ID in its "resets" phandle cell.
+ See include/dt-bindings/clock/microchip,mpfs-clock.h for the full list of
+ PolarFire clock IDs.
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Config node:
+ - |
+ #include <dt-bindings/clock/microchip,mpfs-clock.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ clkcfg: clock-controller@20002000 {
+ compatible = "microchip,mpfs-clkcfg";
+ reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
+ clocks = <&ref>;
+ #clock-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/microchip,pic32.txt b/Documentation/devicetree/bindings/clock/microchip,pic32.txt
deleted file mode 100644
index c93d88fdd858..000000000000
--- a/Documentation/devicetree/bindings/clock/microchip,pic32.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Microchip PIC32 Clock Controller Binding
-----------------------------------------
-Microchip clock controller is consists of few oscillators, PLL, multiplexer
-and few divider modules.
-
-This binding uses common clock bindings.
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible: shall be "microchip,pic32mzda-clk".
-- reg: shall contain base address and length of clock registers.
-- #clock-cells: shall be 1.
-
-Optional properties:
-- microchip,pic32mzda-sosc: shall be added only if platform has
- secondary oscillator connected.
-
-Example:
- rootclk: clock-controller@1f801200 {
- compatible = "microchip,pic32mzda-clk";
- reg = <0x1f801200 0x200>;
- #clock-cells = <1>;
- /* optional */
- microchip,pic32mzda-sosc;
- };
-
-
-The clock consumer shall specify the desired clock-output of the clock
-controller (as defined in [2]) by specifying output-id in its "clock"
-phandle cell.
-[2] include/dt-bindings/clock/microchip,pic32-clock.h
-
-For example for UART2:
-uart2: serial@2 {
- compatible = "microchip,pic32mzda-uart";
- reg = <>;
- interrupts = <>;
- clocks = <&rootclk PB2CLK>;
-};
diff --git a/Documentation/devicetree/bindings/clock/microchip,pic32mzda-clk.yaml b/Documentation/devicetree/bindings/clock/microchip,pic32mzda-clk.yaml
new file mode 100644
index 000000000000..a14a838140f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,pic32mzda-clk.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,pic32mzda-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PIC32MZDA Clock Controller
+
+maintainers:
+ - Purna Chandra Mandal <purna.mandal@microchip.com>
+
+description:
+ Microchip clock controller consists of a few oscillators, PLL, multiplexer
+ and divider modules.
+
+properties:
+ compatible:
+ const: microchip,pic32mzda-clk
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ microchip,pic32mzda-sosc:
+ description: Presence of secondary oscillator.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1f801200 {
+ compatible = "microchip,pic32mzda-clk";
+ reg = <0x1f801200 0x200>;
+ #clock-cells = <1>;
+ /* optional */
+ microchip,pic32mzda-sosc;
+ };
diff --git a/Documentation/devicetree/bindings/clock/microchip,sparx5-dpll.yaml b/Documentation/devicetree/bindings/clock/microchip,sparx5-dpll.yaml
new file mode 100644
index 000000000000..39559a0a598a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,sparx5-dpll.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/microchip,sparx5-dpll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip Sparx5 DPLL Clock
+
+maintainers:
+ - Lars Povlsen <lars.povlsen@microchip.com>
+
+description: |
+ The Sparx5 DPLL clock controller generates and supplies clock to
+ various peripherals within the SoC.
+
+properties:
+ compatible:
+ const: microchip,sparx5-dpll
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock provider for eMMC:
+ - |
+ lcpll_clk: lcpll-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <2500000000>;
+ };
+ clks: clock-controller@61110000c {
+ compatible = "microchip,sparx5-dpll";
+ #clock-cells = <1>;
+ clocks = <&lcpll_clk>;
+ reg = <0x1110000c 0x24>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml
new file mode 100644
index 000000000000..d786f1e2d007
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/milbeaut-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Milbeaut SoCs Clock Controller
+
+maintainers:
+ - Taichi Sugaya <sugaya.taichi@socionext.com>
+
+description: |
+ Milbeaut SoCs Clock controller is an integrated clock controller, which
+ generates and supplies to all modules.
+
+ This binding uses common clock bindings
+ [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+properties:
+ compatible:
+ enum:
+ - socionext,milbeaut-m10v-ccu
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description: external clock
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@1d021000 {
+ compatible = "socionext,milbeaut-m10v-ccu";
+ reg = <0x1d021000 0x4000>;
+ #clock-cells = <1>;
+ clocks = <&clki40mhz>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt b/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt
deleted file mode 100644
index fedea84314a1..000000000000
--- a/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Device Tree Clock bindings for arch-moxart
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-MOXA ART SoCs allow to determine PLL output and APB frequencies
-by reading registers holding multiplier and divisor information.
-
-
-PLL:
-
-Required properties:
-- compatible : Must be "moxa,moxart-pll-clock"
-- #clock-cells : Should be 0
-- reg : Should contain registers location and length
-- clocks : Should contain phandle + clock-specifier for the parent clock
-
-Optional properties:
-- clock-output-names : Should contain clock name
-
-
-APB:
-
-Required properties:
-- compatible : Must be "moxa,moxart-apb-clock"
-- #clock-cells : Should be 0
-- reg : Should contain registers location and length
-- clocks : Should contain phandle + clock-specifier for the parent clock
-
-Optional properties:
-- clock-output-names : Should contain clock name
-
-
-For example:
-
- clk_pll: clk_pll@98100000 {
- compatible = "moxa,moxart-pll-clock";
- #clock-cells = <0>;
- reg = <0x98100000 0x34>;
- };
-
- clk_apb: clk_apb@98100000 {
- compatible = "moxa,moxart-apb-clock";
- #clock-cells = <0>;
- reg = <0x98100000 0x34>;
- clocks = <&clk_pll>;
- };
diff --git a/Documentation/devicetree/bindings/clock/moxa,moxart-clock.yaml b/Documentation/devicetree/bindings/clock/moxa,moxart-clock.yaml
new file mode 100644
index 000000000000..bcf7cc240eba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/moxa,moxart-clock.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/moxa,moxart-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MOXA ART Clock Controllers
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description:
+ MOXA ART SoCs allow to determine PLL output and APB frequencies by reading
+ registers holding multiplier and divisor information.
+
+properties:
+ compatible:
+ enum:
+ - moxa,moxart-apb-clock
+ - moxa,moxart-pll-clock
+
+ "#clock-cells":
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
diff --git a/Documentation/devicetree/bindings/clock/mstar,msc313-cpupll.yaml b/Documentation/devicetree/bindings/clock/mstar,msc313-cpupll.yaml
new file mode 100644
index 000000000000..a9ad7ab5230c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mstar,msc313-cpupll.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mstar,msc313-cpupll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar/Sigmastar MSC313 CPU PLL
+
+maintainers:
+ - Daniel Palmer <daniel@thingy.jp>
+
+description: |
+ The MStar/SigmaStar MSC313 and later ARMv7 chips have a scalable
+ PLL that can be used as the clock source for the CPU(s).
+
+properties:
+ compatible:
+ const: mstar,msc313-cpupll
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mstar-msc313-mpll.h>
+ cpupll: cpupll@206400 {
+ compatible = "mstar,msc313-cpupll";
+ reg = <0x206400 0x200>;
+ #clock-cells = <1>;
+ clocks = <&mpll MSTAR_MSC313_MPLL_DIV2>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mstar,msc313-mpll.yaml b/Documentation/devicetree/bindings/clock/mstar,msc313-mpll.yaml
new file mode 100644
index 000000000000..0df5d75d4ebc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mstar,msc313-mpll.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mstar,msc313-mpll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar/Sigmastar MSC313 MPLL
+
+maintainers:
+ - Daniel Palmer <daniel@thingy.jp>
+
+description: |
+ The MStar/SigmaStar MSC313 and later ARMv7 chips have an MPLL block that
+ takes the external xtal input and multiplies it to create a high
+ frequency clock and divides that down into a number of clocks that
+ peripherals use.
+
+properties:
+ compatible:
+ const: mstar,msc313-mpll
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ mpll@206000 {
+ compatible = "mstar,msc313-mpll";
+ reg = <0x206000 0x200>;
+ #clock-cells = <1>;
+ clocks = <&xtal>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
deleted file mode 100644
index 796c260c183d..000000000000
--- a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-* Core Clock bindings for Marvell MVEBU SoCs
-
-Marvell MVEBU SoCs usually allow to determine core clock frequencies by
-reading the Sample-At-Reset (SAR) register. The core clock consumer should
-specify the desired clock by having the clock ID in its "clocks" phandle cell.
-
-The following is a list of provided IDs and clock names on Armada 370/XP:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU clock)
- 2 = nbclk (L2 Cache clock)
- 3 = hclk (DRAM control clock)
- 4 = dramclk (DDR clock)
-
-The following is a list of provided IDs and clock names on Armada 375:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU clock)
- 2 = l2clk (L2 Cache clock)
- 3 = ddrclk (DDR clock)
-
-The following is a list of provided IDs and clock names on Armada 380/385:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU clock)
- 2 = l2clk (L2 Cache clock)
- 3 = ddrclk (DDR clock)
-
-The following is a list of provided IDs and clock names on Armada 39x:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU clock)
- 2 = nbclk (Coherent Fabric clock)
- 3 = hclk (SDRAM Controller Internal Clock)
- 4 = dclk (SDRAM Interface Clock)
- 5 = refclk (Reference Clock)
-
-The following is a list of provided IDs and clock names on 98dx3236:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU clock)
- 2 = ddrclk (DDR clock)
- 3 = mpll (MPLL Clock)
-
-The following is a list of provided IDs and clock names on Kirkwood and Dove:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU0 clock)
- 2 = l2clk (L2 Cache clock derived from CPU0 clock)
- 3 = ddrclk (DDR controller clock derived from CPU0 clock)
-
-The following is a list of provided IDs and clock names on Orion5x:
- 0 = tclk (Internal Bus clock)
- 1 = cpuclk (CPU0 clock)
- 2 = ddrclk (DDR controller clock derived from CPU0 clock)
-
-Required properties:
-- compatible : shall be one of the following:
- "marvell,armada-370-core-clock" - For Armada 370 SoC core clocks
- "marvell,armada-375-core-clock" - For Armada 375 SoC core clocks
- "marvell,armada-380-core-clock" - For Armada 380/385 SoC core clocks
- "marvell,armada-390-core-clock" - For Armada 39x SoC core clocks
- "marvell,armada-xp-core-clock" - For Armada XP SoC core clocks
- "marvell,mv98dx3236-core-clock" - For 98dx3236 family SoC core clocks
- "marvell,dove-core-clock" - for Dove SoC core clocks
- "marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180)
- "marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC
- "marvell,mv88f5181-core-clock" - for Orion MV88F5181 SoC
- "marvell,mv88f5182-core-clock" - for Orion MV88F5182 SoC
- "marvell,mv88f5281-core-clock" - for Orion MV88F5281 SoC
- "marvell,mv88f6183-core-clock" - for Orion MV88F6183 SoC
-- reg : shall be the register address of the Sample-At-Reset (SAR) register
-- #clock-cells : from common clock binding; shall be set to 1
-
-Optional properties:
-- clock-output-names : from common clock binding; allows overwrite default clock
- output names ("tclk", "cpuclk", "l2clk", "ddrclk")
-
-Example:
-
-core_clk: core-clocks@d0214 {
- compatible = "marvell,dove-core-clock";
- reg = <0xd0214 0x4>;
- #clock-cells = <1>;
-};
-
-spi0: spi@10600 {
- compatible = "marvell,orion-spi";
- /* ... */
- /* get tclk from core clock provider */
- clocks = <&core_clk 0>;
-};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt
deleted file mode 100644
index c7b4e3a6b2c6..000000000000
--- a/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-* Core Divider Clock bindings for Marvell MVEBU SoCs
-
-The following is a list of provided IDs and clock names on Armada 370/XP:
- 0 = nand (NAND clock)
-
-Required properties:
-- compatible : must be "marvell,armada-370-corediv-clock",
- "marvell,armada-375-corediv-clock",
- "marvell,armada-380-corediv-clock",
- "marvell,mv98dx3236-corediv-clock",
-
-- reg : must be the register address of Core Divider control register
-- #clock-cells : from common clock binding; shall be set to 1
-- clocks : must be set to the parent's phandle
-
-Example:
-
-corediv_clk: corediv-clocks@18740 {
- compatible = "marvell,armada-370-corediv-clock";
- reg = <0x18740 0xc>;
- #clock-cells = <1>;
- clocks = <&pll>;
-};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt
deleted file mode 100644
index 7f28506eaee7..000000000000
--- a/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Device Tree Clock bindings for cpu clock of Marvell EBU platforms
-
-Required properties:
-- compatible : shall be one of the following:
- "marvell,armada-xp-cpu-clock" - cpu clocks for Armada XP
- "marvell,mv98dx3236-cpu-clock" - cpu clocks for 98DX3236 SoC
-- reg : Address and length of the clock complex register set, followed
- by address and length of the PMU DFS registers
-- #clock-cells : should be set to 1.
-- clocks : shall be the input parent clock phandle for the clock.
-
-cpuclk: clock-complex@d0018700 {
- #clock-cells = <1>;
- compatible = "marvell,armada-xp-cpu-clock";
- reg = <0xd0018700 0xA0>, <0x1c054 0x10>;
- clocks = <&coreclk 1>;
-}
-
-cpu@0 {
- compatible = "marvell,sheeva-v7";
- reg = <0>;
- clocks = <&cpuclk 0>;
-};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
deleted file mode 100644
index de562da2ae77..000000000000
--- a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-* Gated Clock bindings for Marvell EBU SoCs
-
-Marvell Armada 370/375/380/385/39x/XP, Dove and Kirkwood allow some
-peripheral clocks to be gated to save some power. The clock consumer
-should specify the desired clock by having the clock ID in its
-"clocks" phandle cell. The clock ID is directly mapped to the
-corresponding clock gating control bit in HW to ease manual clock
-lookup in datasheet.
-
-The following is a list of provided IDs for Armada 370:
-ID Clock Peripheral
------------------------------------
-0 Audio AC97 Cntrl
-1 pex0_en PCIe 0 Clock out
-2 pex1_en PCIe 1 Clock out
-3 ge1 Gigabit Ethernet 1
-4 ge0 Gigabit Ethernet 0
-5 pex0 PCIe Cntrl 0
-9 pex1 PCIe Cntrl 1
-15 sata0 SATA Host 0
-17 sdio SDHCI Host
-23 crypto CESA (crypto engine)
-25 tdm Time Division Mplx
-28 ddr DDR Cntrl
-30 sata1 SATA Host 0
-
-The following is a list of provided IDs for Armada 375:
-ID Clock Peripheral
------------------------------------
-2 mu Management Unit
-3 pp Packet Processor
-4 ptp PTP
-5 pex0 PCIe 0 Clock out
-6 pex1 PCIe 1 Clock out
-8 audio Audio Cntrl
-11 nd_clk Nand Flash Cntrl
-14 sata0_link SATA 0 Link
-15 sata0_core SATA 0 Core
-16 usb3 USB3 Host
-17 sdio SDHCI Host
-18 usb USB Host
-19 gop Gigabit Ethernet MAC
-20 sata1_link SATA 1 Link
-21 sata1_core SATA 1 Core
-22 xor0 XOR DMA 0
-23 xor1 XOR DMA 0
-24 copro Coprocessor
-25 tdm Time Division Mplx
-28 crypto0_enc Cryptographic Unit Port 0 Encryption
-29 crypto0_core Cryptographic Unit Port 0 Core
-30 crypto1_enc Cryptographic Unit Port 1 Encryption
-31 crypto1_core Cryptographic Unit Port 1 Core
-
-The following is a list of provided IDs for Armada 380/385:
-ID Clock Peripheral
------------------------------------
-0 audio Audio
-2 ge2 Gigabit Ethernet 2
-3 ge1 Gigabit Ethernet 1
-4 ge0 Gigabit Ethernet 0
-5 pex1 PCIe 1
-6 pex2 PCIe 2
-7 pex3 PCIe 3
-8 pex0 PCIe 0
-9 usb3h0 USB3 Host 0
-10 usb3h1 USB3 Host 1
-11 usb3d USB3 Device
-13 bm Buffer Management
-14 crypto0z Cryptographic 0 Z
-15 sata0 SATA 0
-16 crypto1z Cryptographic 1 Z
-17 sdio SDIO
-18 usb2 USB 2
-21 crypto1 Cryptographic 1
-22 xor0 XOR 0
-23 crypto0 Cryptographic 0
-25 tdm Time Division Multiplexing
-28 xor1 XOR 1
-30 sata1 SATA 1
-
-The following is a list of provided IDs for Armada 39x:
-ID Clock Peripheral
------------------------------------
-5 pex1 PCIe 1
-6 pex2 PCIe 2
-7 pex3 PCIe 3
-8 pex0 PCIe 0
-9 usb3h0 USB3 Host 0
-10 usb3h1 USB3 Host 1
-15 sata0 SATA 0
-17 sdio SDIO
-22 xor0 XOR 0
-28 xor1 XOR 1
-
-The following is a list of provided IDs for Armada XP:
-ID Clock Peripheral
------------------------------------
-0 audio Audio Cntrl
-1 ge3 Gigabit Ethernet 3
-2 ge2 Gigabit Ethernet 2
-3 ge1 Gigabit Ethernet 1
-4 ge0 Gigabit Ethernet 0
-5 pex0 PCIe Cntrl 0
-6 pex1 PCIe Cntrl 1
-7 pex2 PCIe Cntrl 2
-8 pex3 PCIe Cntrl 3
-13 bp
-14 sata0lnk
-15 sata0 SATA Host 0
-16 lcd LCD Cntrl
-17 sdio SDHCI Host
-18 usb0 USB Host 0
-19 usb1 USB Host 1
-20 usb2 USB Host 2
-22 xor0 XOR DMA 0
-23 crypto CESA engine
-25 tdm Time Division Mplx
-28 xor1 XOR DMA 1
-29 sata1lnk
-30 sata1 SATA Host 1
-
-The following is a list of provided IDs for 98dx3236:
-ID Clock Peripheral
------------------------------------
-3 ge1 Gigabit Ethernet 1
-4 ge0 Gigabit Ethernet 0
-5 pex0 PCIe Cntrl 0
-17 sdio SDHCI Host
-18 usb0 USB Host 0
-22 xor0 XOR DMA 0
-
-The following is a list of provided IDs for Dove:
-ID Clock Peripheral
------------------------------------
-0 usb0 USB Host 0
-1 usb1 USB Host 1
-2 ge Gigabit Ethernet
-3 sata SATA Host
-4 pex0 PCIe Cntrl 0
-5 pex1 PCIe Cntrl 1
-8 sdio0 SDHCI Host 0
-9 sdio1 SDHCI Host 1
-10 nand NAND Cntrl
-11 camera Camera Cntrl
-12 i2s0 I2S Cntrl 0
-13 i2s1 I2S Cntrl 1
-15 crypto CESA engine
-21 ac97 AC97 Cntrl
-22 pdma Peripheral DMA
-23 xor0 XOR DMA 0
-24 xor1 XOR DMA 1
-30 gephy Gigabit Ethernel PHY
-Note: gephy(30) is implemented as a parent clock of ge(2)
-
-The following is a list of provided IDs for Kirkwood:
-ID Clock Peripheral
------------------------------------
-0 ge0 Gigabit Ethernet 0
-2 pex0 PCIe Cntrl 0
-3 usb0 USB Host 0
-4 sdio SDIO Cntrl
-5 tsu Transp. Stream Unit
-6 dunit SDRAM Cntrl
-7 runit Runit
-8 xor0 XOR DMA 0
-9 audio I2S Cntrl 0
-14 sata0 SATA Host 0
-15 sata1 SATA Host 1
-16 xor1 XOR DMA 1
-17 crypto CESA engine
-18 pex1 PCIe Cntrl 1
-19 ge1 Gigabit Ethernet 1
-20 tdm Time Division Mplx
-
-Required properties:
-- compatible : shall be one of the following:
- "marvell,armada-370-gating-clock" - for Armada 370 SoC clock gating
- "marvell,armada-375-gating-clock" - for Armada 375 SoC clock gating
- "marvell,armada-380-gating-clock" - for Armada 380/385 SoC clock gating
- "marvell,armada-390-gating-clock" - for Armada 39x SoC clock gating
- "marvell,armada-xp-gating-clock" - for Armada XP SoC clock gating
- "marvell,mv98dx3236-gating-clock" - for 98dx3236 SoC clock gating
- "marvell,dove-gating-clock" - for Dove SoC clock gating
- "marvell,kirkwood-gating-clock" - for Kirkwood SoC clock gating
-- reg : shall be the register address of the Clock Gating Control register
-- #clock-cells : from common clock binding; shall be set to 1
-
-Optional properties:
-- clocks : default parent clock phandle (e.g. tclk)
-
-Example:
-
-gate_clk: clock-gating-control@d0038 {
- compatible = "marvell,dove-gating-clock";
- reg = <0xd0038 0x4>;
- /* default parent clock is tclk */
- clocks = <&core_clk 0>;
- #clock-cells = <1>;
-};
-
-sdio0: sdio@92000 {
- compatible = "marvell,dove-sdhci";
- /* get clk gate bit 8 (sdio0) */
- clocks = <&gate_clk 8>;
-};
diff --git a/Documentation/devicetree/bindings/clock/nspire-clock.txt b/Documentation/devicetree/bindings/clock/nspire-clock.txt
deleted file mode 100644
index 7c3bc8bb5b9f..000000000000
--- a/Documentation/devicetree/bindings/clock/nspire-clock.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-TI-NSPIRE Clocks
-
-Required properties:
-- compatible: Valid compatible properties include:
- "lsi,nspire-cx-ahb-divider" for the AHB divider in the CX model
- "lsi,nspire-classic-ahb-divider" for the AHB divider in the older model
- "lsi,nspire-cx-clock" for the base clock in the CX model
- "lsi,nspire-classic-clock" for the base clock in the older model
-
-- reg: Physical base address of the controller and length of memory mapped
- region.
-
-Optional:
-- clocks: For the "nspire-*-ahb-divider" compatible clocks, this is the parent
- clock where it divides the rate from.
-
-Example:
-
-ahb_clk {
- #clock-cells = <0>;
- compatible = "lsi,nspire-cx-clock";
- reg = <0x900B0000 0x4>;
- clocks = <&base_clk>;
-};
diff --git a/Documentation/devicetree/bindings/clock/nuvoton,ma35d1-clk.yaml b/Documentation/devicetree/bindings/clock/nuvoton,ma35d1-clk.yaml
new file mode 100644
index 000000000000..8f0c43683729
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nuvoton,ma35d1-clk.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nuvoton,ma35d1-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton MA35D1 Clock Controller Module
+
+maintainers:
+ - Chi-Fang Li <cfli0@nuvoton.com>
+ - Jacky Huang <ychuang3@nuvoton.com>
+
+description: |
+ The MA35D1 clock controller generates clocks for the whole chip,
+ including system clocks and all peripheral clocks.
+
+ See also:
+ include/dt-bindings/clock/ma35d1-clk.h
+
+properties:
+ compatible:
+ items:
+ - const: nuvoton,ma35d1-clk
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ nuvoton,pll-mode:
+ description:
+ A list of PLL operation mode corresponding to CAPLL, DDRPLL, APLL,
+ EPLL, and VPLL in sequential.
+ maxItems: 5
+ items:
+ enum:
+ - integer
+ - fractional
+ - spread-spectrum
+ $ref: /schemas/types.yaml#/definitions/non-unique-string-array
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+
+ clock-controller@40460200 {
+ compatible = "nuvoton,ma35d1-clk";
+ reg = <0x40460200 0x100>;
+ #clock-cells = <1>;
+ clocks = <&clk_hxt>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.txt b/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.txt
deleted file mode 100644
index f82064546d11..000000000000
--- a/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-* Nuvoton NPCM7XX Clock Controller
-
-Nuvoton Poleg BMC NPCM7XX contains an integrated clock controller, which
-generates and supplies clocks to all modules within the BMC.
-
-External clocks:
-
-There are six fixed clocks that are generated outside the BMC. All clocks are of
-a known fixed value that cannot be changed. clk_refclk, clk_mcbypck and
-clk_sysbypck are inputs to the clock controller.
-clk_rg1refck, clk_rg2refck and clk_xin are external clocks suppling the
-network. They are set on the device tree, but not used by the clock module. The
-network devices use them directly.
-Example can be found below.
-
-All available clocks are defined as preprocessor macros in:
-dt-bindings/clock/nuvoton,npcm7xx-clock.h
-and can be reused as DT sources.
-
-Required Properties of clock controller:
-
- - compatible: "nuvoton,npcm750-clk" : for clock controller of Nuvoton
- Poleg BMC NPCM750
-
- - reg: physical base address of the clock controller and length of
- memory mapped region.
-
- - #clock-cells: should be 1.
-
-Example: Clock controller node:
-
- clk: clock-controller@f0801000 {
- compatible = "nuvoton,npcm750-clk";
- #clock-cells = <1>;
- reg = <0xf0801000 0x1000>;
- clock-names = "refclk", "sysbypck", "mcbypck";
- clocks = <&clk_refclk>, <&clk_sysbypck>, <&clk_mcbypck>;
- };
-
-Example: Required external clocks for network:
-
- /* external reference clock */
- clk_refclk: clk-refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <25000000>;
- clock-output-names = "refclk";
- };
-
- /* external reference clock for cpu. float in normal operation */
- clk_sysbypck: clk-sysbypck {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <800000000>;
- clock-output-names = "sysbypck";
- };
-
- /* external reference clock for MC. float in normal operation */
- clk_mcbypck: clk-mcbypck {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <800000000>;
- clock-output-names = "mcbypck";
- };
-
- /* external clock signal rg1refck, supplied by the phy */
- clk_rg1refck: clk-rg1refck {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <125000000>;
- clock-output-names = "clk_rg1refck";
- };
-
- /* external clock signal rg2refck, supplied by the phy */
- clk_rg2refck: clk-rg2refck {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <125000000>;
- clock-output-names = "clk_rg2refck";
- };
-
- clk_xin: clk-xin {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <50000000>;
- clock-output-names = "clk_xin";
- };
-
-
-Example: GMAC controller node that consumes two clocks: a generated clk by the
-clock controller and a fixed clock from DT (clk_rg1refck).
-
- ethernet0: ethernet@f0802000 {
- compatible = "snps,dwmac";
- reg = <0xf0802000 0x2000>;
- interrupts = <0 14 4>;
- interrupt-names = "macirq";
- clocks = <&clk_rg1refck>, <&clk NPCM7XX_CLK_AHB>;
- clock-names = "stmmaceth", "clk_gmac";
- };
diff --git a/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.yaml b/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.yaml
new file mode 100644
index 000000000000..694dac68619c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nuvoton,npcm750-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton NPCM7XX Clock Controller
+
+maintainers:
+ - Tali Perry <tali.perry1@gmail.com>
+
+description: >
+ Nuvoton Poleg BMC NPCM7XX contains an integrated clock controller, which
+ generates and supplies clocks to all modules within the BMC.
+
+ External clocks:
+
+ There are six fixed clocks that are generated outside the BMC. All clocks are of
+ a known fixed value that cannot be changed. clk_refclk, clk_mcbypck and
+ clk_sysbypck are inputs to the clock controller.
+ clk_rg1refck, clk_rg2refck and clk_xin are external clocks suppling the
+ network. They are set on the device tree, but not used by the clock module. The
+ network devices use them directly.
+
+ All available clocks are defined as preprocessor macros in:
+ dt-bindings/clock/nuvoton,npcm7xx-clock.h
+ and can be reused as DT sources.
+
+properties:
+ compatible:
+ const: nuvoton,npcm750-clk
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clock-names:
+ items:
+ - const: refclk
+ - const: sysbypck
+ - const: mcbypck
+
+ clocks:
+ items:
+ - description: refclk
+ - description: sysbypck
+ - description: mcbypck
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@f0801000 {
+ compatible = "nuvoton,npcm750-clk";
+ #clock-cells = <1>;
+ reg = <0xf0801000 0x1000>;
+ clock-names = "refclk", "sysbypck", "mcbypck";
+ clocks = <&clk_refclk>, <&clk_sysbypck>, <&clk_mcbypck>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml
new file mode 100644
index 000000000000..b901ca13cd25
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nuvoton,npcm845-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton NPCM8XX Clock Controller
+
+maintainers:
+ - Tomer Maimon <tmaimon77@gmail.com>
+
+description: |
+ Nuvoton Arbel BMC NPCM8XX contains an integrated clock controller, which
+ generates and supplies clocks to all modules within the BMC.
+
+properties:
+ compatible:
+ enum:
+ - nuvoton,npcm845-clk
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description:
+ See include/dt-bindings/clock/nuvoton,npcm8xx-clock.h for the full
+ list of NPCM8XX clock IDs.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ ahb {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@f0801000 {
+ compatible = "nuvoton,npcm845-clk";
+ reg = <0x0 0xf0801000 0x0 0x1000>;
+ #clock-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt
deleted file mode 100644
index 9acea9d93160..000000000000
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-NVIDIA Tegra114 Clock And Reset Controller
-
-This binding uses the common clock binding:
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
-for muxing and gating Tegra's clocks, and setting their rates.
-
-Required properties :
-- compatible : Should be "nvidia,tegra114-car"
-- reg : Should contain CAR registers location and length
-- clocks : Should contain phandle and clock specifiers for two clocks:
- the 32 KHz "32k_in", and the board-specific oscillator "osc".
-- #clock-cells : Should be 1.
- In clock consumers, this cell represents the clock ID exposed by the
- CAR. The assignments may be found in header file
- <dt-bindings/clock/tegra114-car.h>.
-- #reset-cells : Should be 1.
- In clock consumers, this cell represents the bit number in the CAR's
- array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
-
-Example SoC include file:
-
-/ {
- tegra_car: clock {
- compatible = "nvidia,tegra114-car";
- reg = <0x60006000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- usb@c5004000 {
- clocks = <&tegra_car TEGRA114_CLK_USB2>;
- };
-};
-
-Example board file:
-
-/ {
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- osc: clock@0 {
- compatible = "fixed-clock";
- reg = <0>;
- #clock-cells = <0>;
- clock-frequency = <12000000>;
- };
-
- clk_32k: clock@1 {
- compatible = "fixed-clock";
- reg = <1>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
-
- &tegra_car {
- clocks = <&clk_32k> <&osc>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt
deleted file mode 100644
index 7f02fb4ca4ad..000000000000
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-NVIDIA Tegra124 and Tegra132 Clock And Reset Controller
-
-This binding uses the common clock binding:
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
-for muxing and gating Tegra's clocks, and setting their rates.
-
-Required properties :
-- compatible : Should be "nvidia,tegra124-car" or "nvidia,tegra132-car"
-- reg : Should contain CAR registers location and length
-- clocks : Should contain phandle and clock specifiers for two clocks:
- the 32 KHz "32k_in", and the board-specific oscillator "osc".
-- #clock-cells : Should be 1.
- In clock consumers, this cell represents the clock ID exposed by the
- CAR. The assignments may be found in the header files
- <dt-bindings/clock/tegra124-car-common.h> (which covers IDs common
- to Tegra124 and Tegra132) and <dt-bindings/clock/tegra124-car.h>
- (for Tegra124-specific clocks).
-- #reset-cells : Should be 1.
- In clock consumers, this cell represents the bit number in the CAR's
- array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
-- nvidia,external-memory-controller : phandle of the EMC driver.
-
-The node should contain a "emc-timings" subnode for each supported RAM type (see
-field RAM_CODE in register PMC_STRAPPING_OPT_A).
-
-Required properties for "emc-timings" nodes :
-- nvidia,ram-code : Should contain the value of RAM_CODE this timing set
- is used for.
-
-Each "emc-timings" node should contain a "timing" subnode for every supported
-EMC clock rate.
-
-Required properties for "timing" nodes :
-- clock-frequency : Should contain the memory clock rate to which this timing
-relates.
-- nvidia,parent-clock-frequency : Should contain the rate at which the current
-parent of the EMC clock should be running at this timing.
-- clocks : Must contain an entry for each entry in clock-names.
- See ../clocks/clock-bindings.txt for details.
-- clock-names : Must include the following entries:
- - emc-parent : the clock that should be the parent of the EMC clock at this
-timing.
-
-Example SoC include file:
-
-/ {
- tegra_car: clock@60006000 {
- compatible = "nvidia,tegra124-car";
- reg = <0x60006000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- nvidia,external-memory-controller = <&emc>;
- };
-
- usb@c5004000 {
- clocks = <&tegra_car TEGRA124_CLK_USB2>;
- };
-};
-
-Example board file:
-
-/ {
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- osc: clock@0 {
- compatible = "fixed-clock";
- reg = <0>;
- #clock-cells = <0>;
- clock-frequency = <112400000>;
- };
-
- clk_32k: clock@1 {
- compatible = "fixed-clock";
- reg = <1>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
-
- &tegra_car {
- clocks = <&clk_32k> <&osc>;
- };
-
- clock@60006000 {
- emc-timings-3 {
- nvidia,ram-code = <3>;
-
- timing-12750000 {
- clock-frequency = <12750000>;
- nvidia,parent-clock-frequency = <408000000>;
- clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
- clock-names = "emc-parent";
- };
- timing-20400000 {
- clock-frequency = <20400000>;
- nvidia,parent-clock-frequency = <408000000>;
- clocks = <&tegra_car TEGRA124_CLK_PLL_P>;
- clock-names = "emc-parent";
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.yaml b/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.yaml
new file mode 100644
index 000000000000..a9ba21144a56
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.yaml
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nvidia,tegra124-car.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra Clock and Reset Controller
+
+maintainers:
+ - Jon Hunter <jonathanh@nvidia.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+ The Clock and Reset (CAR) is the HW module responsible for muxing and gating
+ Tegra's clocks, and setting their rates. It comprises CLKGEN and RSTGEN units.
+
+ CLKGEN provides the registers to program the PLLs. It controls most of
+ the clock source programming and most of the clock dividers.
+
+ CLKGEN input signals include the external clock for the reference frequency
+ (12 MHz, 26 MHz) and the external clock for the Real Time Clock (32.768 KHz).
+
+ Outputs from CLKGEN are inputs clock of the h/w blocks in the Tegra system.
+
+ RSTGEN provides the registers needed to control resetting of each block in
+ the Tegra system.
+
+properties:
+ compatible:
+ enum:
+ - nvidia,tegra124-car
+ - nvidia,tegra132-car
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ nvidia,external-memory-controller:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle of the external memory controller node
+
+patternProperties:
+ "^emc-timings-[0-9]+$":
+ type: object
+ properties:
+ nvidia,ram-code:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ value of the RAM_CODE field in the PMC_STRAPPING_OPT_A register that
+ this timing set is used for
+
+ patternProperties:
+ "^timing-[0-9]+$":
+ type: object
+ properties:
+ clock-frequency:
+ description:
+ external memory clock rate in Hz
+ minimum: 1000000
+ maximum: 1000000000
+
+ nvidia,parent-clock-frequency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ rate of parent clock in Hz
+ minimum: 1000000
+ maximum: 1000000000
+
+ clocks:
+ items:
+ - description: parent clock of EMC
+
+ clock-names:
+ items:
+ - const: emc-parent
+
+ required:
+ - clock-frequency
+ - nvidia,parent-clock-frequency
+ - clocks
+ - clock-names
+
+ additionalProperties: false
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/tegra124-car.h>
+
+ car: clock-controller@60006000 {
+ compatible = "nvidia,tegra124-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra124-dfll.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra124-dfll.txt
index dff236f524a7..f7d347385b57 100644
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra124-dfll.txt
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra124-dfll.txt
@@ -8,10 +8,11 @@ the fast CPU cluster. It consists of a free-running voltage controlled
oscillator connected to the CPU voltage rail (VDD_CPU), and a closed loop
control module that will automatically adjust the VDD_CPU voltage by
communicating with an off-chip PMIC either via an I2C bus or via PWM signals.
-Currently only the I2C mode is supported by these bindings.
Required properties:
-- compatible : should be "nvidia,tegra124-dfll"
+- compatible : should be one of:
+ - "nvidia,tegra124-dfll": for Tegra124
+ - "nvidia,tegra210-dfll": for Tegra210
- reg : Defines the following set of registers, in the order listed:
- registers for the DFLL control logic.
- registers for the I2C output logic.
@@ -45,10 +46,31 @@ Required properties for the control loop parameters:
Optional properties for the control loop parameters:
- nvidia,cg-scale: Boolean value, see the field DFLL_PARAMS_CG_SCALE in the TRM.
+Optional properties for mode selection:
+- nvidia,pwm-to-pmic: Use PWM to control regulator rather then I2C.
+
Required properties for I2C mode:
- nvidia,i2c-fs-rate: I2C transfer rate, if using full speed mode.
-Example:
+Required properties for PWM mode:
+- nvidia,pwm-period-nanoseconds: period of PWM square wave in nanoseconds.
+- nvidia,pwm-tristate-microvolts: Regulator voltage in micro volts when PWM
+ control is disabled and the PWM output is tristated. Note that this voltage is
+ configured in hardware, typically via a resistor divider.
+- nvidia,pwm-min-microvolts: Regulator voltage in micro volts when PWM control
+ is enabled and PWM output is low. Hence, this is the minimum output voltage
+ that the regulator supports when PWM control is enabled.
+- nvidia,pwm-voltage-step-microvolts: Voltage increase in micro volts
+ corresponding to a 1/33th increase in duty cycle. Eg the voltage for 2/33th
+ duty cycle would be: nvidia,pwm-min-microvolts +
+ nvidia,pwm-voltage-step-microvolts * 2.
+- pinctrl-0: I/O pad configuration when PWM control is enabled.
+- pinctrl-1: I/O pad configuration when PWM control is disabled.
+- pinctrl-names: must include the following entries:
+ - dvfs_pwm_enable: I/O pad configuration when PWM control is enabled.
+ - dvfs_pwm_disable: I/O pad configuration when PWM control is disabled.
+
+Example for I2C:
clock@70110000 {
compatible = "nvidia,tegra124-dfll";
@@ -76,3 +98,58 @@ clock@70110000 {
nvidia,i2c-fs-rate = <400000>;
};
+
+Example for PWM:
+
+clock@70110000 {
+ compatible = "nvidia,tegra124-dfll";
+ reg = <0 0x70110000 0 0x100>, /* DFLL control */
+ <0 0x70110000 0 0x100>, /* I2C output control */
+ <0 0x70110100 0 0x100>, /* Integrated I2C controller */
+ <0 0x70110200 0 0x100>; /* Look-up table RAM */
+ interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_DFLL_SOC>,
+ <&tegra_car TEGRA210_CLK_DFLL_REF>,
+ <&tegra_car TEGRA124_CLK_I2C5>;;
+ clock-names = "soc", "ref", "i2c";
+ resets = <&tegra_car TEGRA124_RST_DFLL_DVCO>;
+ reset-names = "dvco";
+ #clock-cells = <0>;
+ clock-output-names = "dfllCPU_out";
+
+ nvidia,sample-rate = <25000>;
+ nvidia,droop-ctrl = <0x00000f00>;
+ nvidia,force-mode = <1>;
+ nvidia,cf = <6>;
+ nvidia,ci = <0>;
+ nvidia,cg = <2>;
+
+ nvidia,pwm-min-microvolts = <708000>; /* 708mV */
+ nvidia,pwm-period-nanoseconds = <2500>; /* 2.5us */
+ nvidia,pwm-to-pmic;
+ nvidia,pwm-tristate-microvolts = <1000000>;
+ nvidia,pwm-voltage-step-microvolts = <19200>; /* 19.2mV */
+
+ pinctrl-names = "dvfs_pwm_enable", "dvfs_pwm_disable";
+ pinctrl-0 = <&dvfs_pwm_active_state>;
+ pinctrl-1 = <&dvfs_pwm_inactive_state>;
+};
+
+/* pinmux nodes added for completeness. Binding doc can be found in:
+ * Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.yaml
+ */
+
+pinmux: pinmux@700008d4 {
+ dvfs_pwm_active_state: dvfs_pwm_active {
+ dvfs_pwm_pbb1 {
+ nvidia,pins = "dvfs_pwm_pbb1";
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ dvfs_pwm_inactive_state: dvfs_pwm_inactive {
+ dvfs_pwm_pbb1 {
+ nvidia,pins = "dvfs_pwm_pbb1";
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt
deleted file mode 100644
index 6c5901b503d0..000000000000
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-NVIDIA Tegra20 Clock And Reset Controller
-
-This binding uses the common clock binding:
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
-for muxing and gating Tegra's clocks, and setting their rates.
-
-Required properties :
-- compatible : Should be "nvidia,tegra20-car"
-- reg : Should contain CAR registers location and length
-- clocks : Should contain phandle and clock specifiers for two clocks:
- the 32 KHz "32k_in", and the board-specific oscillator "osc".
-- #clock-cells : Should be 1.
- In clock consumers, this cell represents the clock ID exposed by the
- CAR. The assignments may be found in header file
- <dt-bindings/clock/tegra20-car.h>.
-- #reset-cells : Should be 1.
- In clock consumers, this cell represents the bit number in the CAR's
- array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
-
-Example SoC include file:
-
-/ {
- tegra_car: clock {
- compatible = "nvidia,tegra20-car";
- reg = <0x60006000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- usb@c5004000 {
- clocks = <&tegra_car TEGRA20_CLK_USB2>;
- };
-};
-
-Example board file:
-
-/ {
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- osc: clock@0 {
- compatible = "fixed-clock";
- reg = <0>;
- #clock-cells = <0>;
- clock-frequency = <12000000>;
- };
-
- clk_32k: clock@1 {
- compatible = "fixed-clock";
- reg = <1>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
-
- &tegra_car {
- clocks = <&clk_32k> <&osc>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml
new file mode 100644
index 000000000000..bee2dd4b29bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nvidia,tegra20-car.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra Clock and Reset Controller
+
+maintainers:
+ - Jon Hunter <jonathanh@nvidia.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+ The Clock and Reset (CAR) is the HW module responsible for muxing and gating
+ Tegra's clocks, and setting their rates. It comprises CLKGEN and RSTGEN units.
+
+ CLKGEN provides the registers to program the PLLs. It controls most of
+ the clock source programming and most of the clock dividers.
+
+ CLKGEN input signals include the external clock for the reference frequency
+ (12 MHz, 26 MHz) and the external clock for the Real Time Clock (32.768 KHz).
+
+ Outputs from CLKGEN are inputs clock of the h/w blocks in the Tegra system.
+
+ RSTGEN provides the registers needed to control resetting of each block in
+ the Tegra system.
+
+properties:
+ compatible:
+ enum:
+ - nvidia,tegra20-car
+ - nvidia,tegra30-car
+ - nvidia,tegra114-car
+ - nvidia,tegra210-car
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+patternProperties:
+ "^(sclk)|(pll-[cem])$":
+ type: object
+ properties:
+ compatible:
+ enum:
+ - nvidia,tegra20-sclk
+ - nvidia,tegra30-sclk
+ - nvidia,tegra30-pllc
+ - nvidia,tegra30-plle
+ - nvidia,tegra30-pllm
+
+ operating-points-v2: true
+
+ clocks:
+ items:
+ - description: node's clock
+
+ power-domains:
+ maxItems: 1
+ description: phandle to the core SoC power domain
+
+ required:
+ - compatible
+ - operating-points-v2
+ - clocks
+ - power-domains
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/tegra20-car.h>
+
+ car: clock-controller@60006000 {
+ compatible = "nvidia,tegra20-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+
+ sclk {
+ compatible = "nvidia,tegra20-sclk";
+ operating-points-v2 = <&opp_table>;
+ clocks = <&tegra_car TEGRA20_CLK_SCLK>;
+ power-domains = <&domain>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra210-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra210-car.txt
deleted file mode 100644
index 26f237f641b7..000000000000
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra210-car.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-NVIDIA Tegra210 Clock And Reset Controller
-
-This binding uses the common clock binding:
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
-for muxing and gating Tegra's clocks, and setting their rates.
-
-Required properties :
-- compatible : Should be "nvidia,tegra210-car"
-- reg : Should contain CAR registers location and length
-- clocks : Should contain phandle and clock specifiers for two clocks:
- the 32 KHz "32k_in".
-- #clock-cells : Should be 1.
- In clock consumers, this cell represents the clock ID exposed by the
- CAR. The assignments may be found in header file
- <dt-bindings/clock/tegra210-car.h>.
-- #reset-cells : Should be 1.
- In clock consumers, this cell represents the bit number in the CAR's
- array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
-
-Example SoC include file:
-
-/ {
- tegra_car: clock {
- compatible = "nvidia,tegra210-car";
- reg = <0x60006000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- usb@c5004000 {
- clocks = <&tegra_car TEGRA210_CLK_USB2>;
- };
-};
-
-Example board file:
-
-/ {
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- clk_32k: clock@1 {
- compatible = "fixed-clock";
- reg = <1>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
-
- &tegra_car {
- clocks = <&clk_32k>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt
deleted file mode 100644
index 63618cde12df..000000000000
--- a/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-NVIDIA Tegra30 Clock And Reset Controller
-
-This binding uses the common clock binding:
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
-for muxing and gating Tegra's clocks, and setting their rates.
-
-Required properties :
-- compatible : Should be "nvidia,tegra30-car"
-- reg : Should contain CAR registers location and length
-- clocks : Should contain phandle and clock specifiers for two clocks:
- the 32 KHz "32k_in", and the board-specific oscillator "osc".
-- #clock-cells : Should be 1.
- In clock consumers, this cell represents the clock ID exposed by the
- CAR. The assignments may be found in header file
- <dt-bindings/clock/tegra30-car.h>.
-- #reset-cells : Should be 1.
- In clock consumers, this cell represents the bit number in the CAR's
- array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
-
-Example SoC include file:
-
-/ {
- tegra_car: clock {
- compatible = "nvidia,tegra30-car";
- reg = <0x60006000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- usb@c5004000 {
- clocks = <&tegra_car TEGRA30_CLK_USB2>;
- };
-};
-
-Example board file:
-
-/ {
- clocks {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <0>;
-
- osc: clock@0 {
- compatible = "fixed-clock";
- reg = <0>;
- #clock-cells = <0>;
- clock-frequency = <12000000>;
- };
-
- clk_32k: clock@1 {
- compatible = "fixed-clock";
- reg = <1>;
- #clock-cells = <0>;
- clock-frequency = <32768>;
- };
- };
-
- &tegra_car {
- clocks = <&clk_32k> <&osc>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/nxp,imx95-blk-ctl.yaml b/Documentation/devicetree/bindings/clock/nxp,imx95-blk-ctl.yaml
new file mode 100644
index 000000000000..27403b4c52d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,imx95-blk-ctl.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,imx95-blk-ctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX95 Block Control
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - nxp,imx94-display-csr
+ - nxp,imx94-lvds-csr
+ - nxp,imx95-camera-csr
+ - nxp,imx95-display-csr
+ - nxp,imx95-hsio-blk-ctl
+ - nxp,imx95-lvds-csr
+ - nxp,imx95-netcmix-blk-ctrl
+ - nxp,imx95-vpu-csr
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See
+ include/dt-bindings/clock/nxp,imx95-clock.h
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ syscon@4c410000 {
+ compatible = "nxp,imx95-vpu-csr", "syscon";
+ reg = <0x4c410000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&scmi_clk 114>;
+ power-domains = <&scmi_devpd 21>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/nxp,imx95-display-master-csr.yaml b/Documentation/devicetree/bindings/clock/nxp,imx95-display-master-csr.yaml
new file mode 100644
index 000000000000..07f7412e7658
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,imx95-display-master-csr.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,imx95-display-master-csr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX95 Display Master Block Control
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ items:
+ - const: nxp,imx95-display-master-csr
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See
+ include/dt-bindings/clock/nxp,imx95-clock.h
+
+ mux-controller:
+ type: object
+ $ref: /schemas/mux/reg-mux.yaml
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - mux-controller
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ syscon@4c410000 {
+ compatible = "nxp,imx95-display-master-csr", "syscon";
+ reg = <0x4c410000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&scmi_clk 62>;
+ power-domains = <&scmi_devpd 3>;
+
+ mux: mux-controller {
+ compatible = "mmio-mux";
+ #mux-control-cells = <1>;
+ mux-reg-masks = <0x4 0x00000001>; /* Pixel_link_sel */
+ idle-states = <0>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc1850-ccu.yaml b/Documentation/devicetree/bindings/clock/nxp,lpc1850-ccu.yaml
new file mode 100644
index 000000000000..5459038cc954
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,lpc1850-ccu.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,lpc1850-ccu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC1850 Clock Control Unit (CCU)
+
+description:
+ Each CGU base clock has several clock branches which can be turned on
+ or off independently by the Clock Control Units CCU1 or CCU2. The
+ branch clocks are distributed between CCU1 and CCU2.
+
+ Above text taken from NXP LPC1850 User Manual
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: nxp,lpc1850-ccu
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 8
+
+ clock-names:
+ minItems: 1
+ maxItems: 8
+ items:
+ enum:
+ - base_usb0_clk
+ - base_periph_clk
+ - base_usb1_clk
+ - base_cpu_clk
+ - base_spifi_clk
+ - base_spi_clk
+ - base_apb1_clk
+ - base_apb3_clk
+ - base_adchs_clk
+ - base_sdio_clk
+ - base_ssp0_clk
+ - base_ssp1_clk
+ - base_uart0_clk
+ - base_uart1_clk
+ - base_uart2_clk
+ - base_uart3_clk
+ - base_audio_clk
+ description:
+ Which branch clocks that are available on the CCU depends on the
+ specific LPC part. Check the user manual for your specific part.
+
+ A list of CCU clocks can be found in dt-bindings/clock/lpc18xx-ccu.h.
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/lpc18xx-cgu.h>
+
+ clock-controller@40051000 {
+ compatible = "nxp,lpc1850-ccu";
+ reg = <0x40051000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&cgu BASE_APB3_CLK>, <&cgu BASE_APB1_CLK>,
+ <&cgu BASE_SPIFI_CLK>, <&cgu BASE_CPU_CLK>,
+ <&cgu BASE_PERIPH_CLK>, <&cgu BASE_USB0_CLK>,
+ <&cgu BASE_USB1_CLK>, <&cgu BASE_SPI_CLK>;
+ clock-names = "base_apb3_clk", "base_apb1_clk",
+ "base_spifi_clk", "base_cpu_clk",
+ "base_periph_clk", "base_usb0_clk",
+ "base_usb1_clk", "base_spi_clk";
+ };
+
+ - |
+ #include <dt-bindings/clock/lpc18xx-cgu.h>
+
+ clock-controller@40052000 {
+ compatible = "nxp,lpc1850-ccu";
+ reg = <0x40052000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&cgu BASE_AUDIO_CLK>, <&cgu BASE_UART3_CLK>,
+ <&cgu BASE_UART2_CLK>, <&cgu BASE_UART1_CLK>,
+ <&cgu BASE_UART0_CLK>, <&cgu BASE_SSP1_CLK>,
+ <&cgu BASE_SSP0_CLK>, <&cgu BASE_SDIO_CLK>;
+ clock-names = "base_audio_clk", "base_uart3_clk",
+ "base_uart2_clk", "base_uart1_clk",
+ "base_uart0_clk", "base_ssp1_clk",
+ "base_ssp0_clk", "base_sdio_clk";
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc1850-cgu.yaml b/Documentation/devicetree/bindings/clock/nxp,lpc1850-cgu.yaml
new file mode 100644
index 000000000000..ed178c7df00c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,lpc1850-cgu.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,lpc1850-cgu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC1850 Clock Generation Unit (CGU)
+
+description: >
+ The CGU generates multiple independent clocks for the core and the
+ peripheral blocks of the LPC18xx. Each independent clock is called
+ a base clock and itself is one of the inputs to the two Clock
+ Control Units (CCUs) which control the branch clocks to the
+ individual peripherals.
+
+ The CGU selects the inputs to the clock generators from multiple
+ clock sources, controls the clock generation, and routes the outputs
+ of the clock generators through the clock source bus to the output
+ stages. Each output stage provides an independent clock source and
+ corresponds to one of the base clocks for the LPC18xx.
+
+ Above text taken from NXP LPC1850 User Manual.
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: nxp,lpc1850-cgu
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description: |
+ Which base clocks that are available on the CGU depends on the
+ specific LPC part. Base clocks are numbered from 0 to 27.
+
+ Number: Name: Description:
+ 0 BASE_SAFE_CLK Base safe clock (always on) for WWDT
+ 1 BASE_USB0_CLK Base clock for USB0
+ 2 BASE_PERIPH_CLK Base clock for Cortex-M0SUB subsystem,
+ SPI, and SGPIO
+ 3 BASE_USB1_CLK Base clock for USB1
+ 4 BASE_CPU_CLK System base clock for ARM Cortex-M core
+ and APB peripheral blocks #0 and #2
+ 5 BASE_SPIFI_CLK Base clock for SPIFI
+ 6 BASE_SPI_CLK Base clock for SPI
+ 7 BASE_PHY_RX_CLK Base clock for Ethernet PHY Receive clock
+ 8 BASE_PHY_TX_CLK Base clock for Ethernet PHY Transmit clock
+ 9 BASE_APB1_CLK Base clock for APB peripheral block # 1
+ 10 BASE_APB3_CLK Base clock for APB peripheral block # 3
+ 11 BASE_LCD_CLK Base clock for LCD
+ 12 BASE_ADCHS_CLK Base clock for ADCHS
+ 13 BASE_SDIO_CLK Base clock for SD/MMC
+ 14 BASE_SSP0_CLK Base clock for SSP0
+ 15 BASE_SSP1_CLK Base clock for SSP1
+ 16 BASE_UART0_CLK Base clock for UART0
+ 17 BASE_UART1_CLK Base clock for UART1
+ 18 BASE_UART2_CLK Base clock for UART2
+ 19 BASE_UART3_CLK Base clock for UART3
+ 20 BASE_OUT_CLK Base clock for CLKOUT pin
+ 24-21 - Reserved
+ 25 BASE_AUDIO_CLK Base clock for audio system (I2S)
+ 26 BASE_CGU_OUT0_CLK Base clock for CGU_OUT0 clock output
+ 27 BASE_CGU_OUT1_CLK Base clock for CGU_OUT1 clock output
+
+ BASE_PERIPH_CLK and BASE_SPI_CLK is only available on LPC43xx.
+ BASE_ADCHS_CLK is only available on LPC4370.
+
+ clocks:
+ maxItems: 5
+
+ clock-indices:
+ minItems: 1
+ maxItems: 28
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 28
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@40050000 {
+ compatible = "nxp,lpc1850-cgu";
+ reg = <0x40050000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&xtal>, <&creg_clk 1>, <&enet_rx_clk>, <&enet_tx_clk>, <&gp_clkin>;
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.txt b/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.txt
deleted file mode 100644
index 20cbca3f41d8..000000000000
--- a/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-NXP LPC32xx Clock Controller
-
-Required properties:
-- compatible: should be "nxp,lpc3220-clk"
-- reg: should contain clock controller registers location and length
-- #clock-cells: must be 1, the cell holds id of a clock provided by the
- clock controller
-- clocks: phandles of external oscillators, the list must contain one
- 32768 Hz oscillator and may have one optional high frequency oscillator
-- clock-names: list of external oscillator clock names, must contain
- "xtal_32k" and may have optional "xtal"
-
-Examples:
-
- /* System Control Block */
- scb {
- compatible = "simple-bus";
- ranges = <0x0 0x040004000 0x00001000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- clk: clock-controller@0 {
- compatible = "nxp,lpc3220-clk";
- reg = <0x00 0x114>;
- #clock-cells = <1>;
-
- clocks = <&xtal_32k>, <&xtal>;
- clock-names = "xtal_32k", "xtal";
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.yaml b/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.yaml
new file mode 100644
index 000000000000..16f79616d18a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,lpc3220-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC32xx Clock Controller
+
+maintainers:
+ - Animesh Agarwal <animeshagarwal28@gmail.com>
+
+properties:
+ compatible:
+ const: nxp,lpc3220-clk
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: External 32768 Hz oscillator.
+ - description: Optional high frequency oscillator.
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: xtal_32k
+ - const: xtal
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@0 {
+ compatible = "nxp,lpc3220-clk";
+ reg = <0x00 0x114>;
+ #clock-cells = <1>;
+ clocks = <&xtal_32k>, <&xtal>;
+ clock-names = "xtal_32k", "xtal";
+ };
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.txt b/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.txt
deleted file mode 100644
index 0aa249409b51..000000000000
--- a/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-NXP LPC32xx USB Clock Controller
-
-Required properties:
-- compatible: should be "nxp,lpc3220-usb-clk"
-- reg: should contain clock controller registers location and length
-- #clock-cells: must be 1, the cell holds id of a clock provided by the
- USB clock controller
-
-Examples:
-
- usb {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "simple-bus";
- ranges = <0x0 0x31020000 0x00001000>;
-
- usbclk: clock-controller@f00 {
- compatible = "nxp,lpc3220-usb-clk";
- reg = <0xf00 0x100>;
- #clock-cells = <1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.yaml b/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.yaml
new file mode 100644
index 000000000000..10361d2292fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/nxp,lpc3220-usb-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC32xx USB Clock Controller
+
+maintainers:
+ - Animesh Agarwal <animeshagarwal28@gmail.com>
+
+properties:
+ compatible:
+ const: nxp,lpc3220-usb-clk
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@f00 {
+ compatible = "nxp,lpc3220-usb-clk";
+ reg = <0xf00 0x100>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/oxnas,stdclk.txt b/Documentation/devicetree/bindings/clock/oxnas,stdclk.txt
deleted file mode 100644
index b652f3fb7796..000000000000
--- a/Documentation/devicetree/bindings/clock/oxnas,stdclk.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Oxford Semiconductor OXNAS SoC Family Standard Clocks
-================================================
-
-Please also refer to clock-bindings.txt in this directory for common clock
-bindings usage.
-
-Required properties:
-- compatible: For OX810SE, should be "oxsemi,ox810se-stdclk"
- For OX820, should be "oxsemi,ox820-stdclk"
-- #clock-cells: 1, see below
-
-Parent node should have the following properties :
-- compatible: For OX810SE, should be
- "oxsemi,ox810se-sys-ctrl", "syscon", "simple-mfd"
- For OX820, should be
- "oxsemi,ox820-sys-ctrl", "syscon", "simple-mfd"
-
-example:
-
-sys: sys-ctrl@000000 {
- compatible = "oxsemi,ox810se-sys-ctrl", "syscon", "simple-mfd";
- reg = <0x000000 0x100000>;
-
- stdclk: stdclk {
- compatible = "oxsemi,ox810se-stdclk";
- #clock-cells = <1>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/pistachio-clock.txt b/Documentation/devicetree/bindings/clock/pistachio-clock.txt
deleted file mode 100644
index 868db499eed2..000000000000
--- a/Documentation/devicetree/bindings/clock/pistachio-clock.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-Imagination Technologies Pistachio SoC clock controllers
-========================================================
-
-Pistachio has four clock controllers (core clock, peripheral clock, peripheral
-general control, and top general control) which are instantiated individually
-from the device-tree.
-
-External clocks:
-----------------
-
-There are three external inputs to the clock controllers which should be
-defined with the following clock-output-names:
-- "xtal": External 52Mhz oscillator (required)
-- "audio_clk_in": Alternate audio reference clock (optional)
-- "enet_clk_in": Alternate ethernet PHY clock (optional)
-
-Core clock controller:
-----------------------
-
-The core clock controller generates clocks for the CPU, RPU (WiFi + BT
-co-processor), audio, and several peripherals.
-
-Required properties:
-- compatible: Must be "img,pistachio-clk".
-- reg: Must contain the base address and length of the core clock controller.
-- #clock-cells: Must be 1. The single cell is the clock identifier.
- See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
-- clocks: Must contain an entry for each clock in clock-names.
-- clock-names: Must include "xtal" (see "External clocks") and
- "audio_clk_in_gate", "enet_clk_in_gate" which are generated by the
- top-level general control.
-
-Example:
- clk_core: clock-controller@18144000 {
- compatible = "img,pistachio-clk";
- reg = <0x18144000 0x800>;
- clocks = <&xtal>, <&cr_top EXT_CLK_AUDIO_IN>,
- <&cr_top EXT_CLK_ENET_IN>;
- clock-names = "xtal", "audio_clk_in_gate", "enet_clk_in_gate";
-
- #clock-cells = <1>;
- };
-
-Peripheral clock controller:
-----------------------------
-
-The peripheral clock controller generates clocks for the DDR, ROM, and other
-peripherals. The peripheral system clock ("periph_sys") generated by the core
-clock controller is the input clock to the peripheral clock controller.
-
-Required properties:
-- compatible: Must be "img,pistachio-periph-clk".
-- reg: Must contain the base address and length of the peripheral clock
- controller.
-- #clock-cells: Must be 1. The single cell is the clock identifier.
- See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
-- clocks: Must contain an entry for each clock in clock-names.
-- clock-names: Must include "periph_sys", the peripheral system clock generated
- by the core clock controller.
-
-Example:
- clk_periph: clock-controller@18144800 {
- compatible = "img,pistachio-clk-periph";
- reg = <0x18144800 0x800>;
- clocks = <&clk_core CLK_PERIPH_SYS>;
- clock-names = "periph_sys";
-
- #clock-cells = <1>;
- };
-
-Peripheral general control:
----------------------------
-
-The peripheral general control block generates system interface clocks and
-resets for various peripherals. It also contains miscellaneous peripheral
-control registers. The system clock ("sys") generated by the peripheral clock
-controller is the input clock to the system clock controller.
-
-Required properties:
-- compatible: Must include "img,pistachio-periph-cr" and "syscon".
-- reg: Must contain the base address and length of the peripheral general
- control registers.
-- #clock-cells: Must be 1. The single cell is the clock identifier.
- See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
-- clocks: Must contain an entry for each clock in clock-names.
-- clock-names: Must include "sys", the system clock generated by the peripheral
- clock controller.
-
-Example:
- cr_periph: syscon@18144800 {
- compatible = "img,pistachio-cr-periph", "syscon";
- reg = <0x18148000 0x1000>;
- clocks = <&clock_periph PERIPH_CLK_PERIPH_SYS>;
- clock-names = "sys";
-
- #clock-cells = <1>;
- };
-
-Top-level general control:
---------------------------
-
-The top-level general control block contains miscellaneous control registers and
-gates for the external clocks "audio_clk_in" and "enet_clk_in".
-
-Required properties:
-- compatible: Must include "img,pistachio-cr-top" and "syscon".
-- reg: Must contain the base address and length of the top-level
- control registers.
-- clocks: Must contain an entry for each clock in clock-names.
-- clock-names: Two optional clocks, "audio_clk_in" and "enet_clk_in" (see
- "External clocks").
-- #clock-cells: Must be 1. The single cell is the clock identifier.
- See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
-
-Example:
- cr_top: syscon@18144800 {
- compatible = "img,pistachio-cr-top", "syscon";
- reg = <0x18149000 0x200>;
- clocks = <&audio_refclk>, <&ext_enet_in>;
- clock-names = "audio_clk_in", "enet_clk_in";
-
- #clock-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/prima2-clock.txt b/Documentation/devicetree/bindings/clock/prima2-clock.txt
deleted file mode 100644
index 5016979c0f78..000000000000
--- a/Documentation/devicetree/bindings/clock/prima2-clock.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-* Clock bindings for CSR SiRFprimaII
-
-Required properties:
-- compatible: Should be "sirf,prima2-clkc"
-- reg: Address and length of the register set
-- interrupts: Should contain clock controller interrupt
-- #clock-cells: Should be <1>
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. The following is a full list of prima2
-clocks and IDs.
-
- Clock ID
- ---------------------------
- rtc 0
- osc 1
- pll1 2
- pll2 3
- pll3 4
- mem 5
- sys 6
- security 7
- dsp 8
- gps 9
- mf 10
- io 11
- cpu 12
- uart0 13
- uart1 14
- uart2 15
- tsc 16
- i2c0 17
- i2c1 18
- spi0 19
- spi1 20
- pwmc 21
- efuse 22
- pulse 23
- dmac0 24
- dmac1 25
- nand 26
- audio 27
- usp0 28
- usp1 29
- usp2 30
- vip 31
- gfx 32
- mm 33
- lcd 34
- vpp 35
- mmc01 36
- mmc23 37
- mmc45 38
- usbpll 39
- usb0 40
- usb1 41
-
-Examples:
-
-clks: clock-controller@88000000 {
- compatible = "sirf,prima2-clkc";
- reg = <0x88000000 0x1000>;
- interrupts = <3>;
- #clock-cells = <1>;
-};
-
-i2c0: i2c@b00e0000 {
- cell-index = <0>;
- compatible = "sirf,prima2-i2c";
- reg = <0xb00e0000 0x10000>;
- interrupts = <24>;
- clocks = <&clks 17>;
-};
diff --git a/Documentation/devicetree/bindings/clock/pwm-clock.txt b/Documentation/devicetree/bindings/clock/pwm-clock.txt
deleted file mode 100644
index 83db876b3b90..000000000000
--- a/Documentation/devicetree/bindings/clock/pwm-clock.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Binding for an external clock signal driven by a PWM pin.
-
-This binding uses the common clock binding[1] and the common PWM binding[2].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/pwm/pwm.txt
-
-Required properties:
-- compatible : shall be "pwm-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- pwms : from common PWM binding; this determines the clock frequency
- via the period given in the PWM specifier.
-
-Optional properties:
-- clock-output-names : From common clock binding.
-- clock-frequency : Exact output frequency, in case the PWM period
- is not exact but was rounded to nanoseconds.
-
-Example:
- clock {
- compatible = "pwm-clock";
- #clock-cells = <0>;
- clock-frequency = <25000000>;
- clock-output-names = "mipi_mclk";
- pwms = <&pwm2 0 40>; /* 1 / 40 ns = 25 MHz */
- };
diff --git a/Documentation/devicetree/bindings/clock/pwm-clock.yaml b/Documentation/devicetree/bindings/clock/pwm-clock.yaml
new file mode 100644
index 000000000000..f88ecb2995e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/pwm-clock.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/pwm-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: An external clock signal driven by a PWM pin.
+
+maintainers:
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+properties:
+ compatible:
+ const: pwm-clock
+
+ '#clock-cells':
+ const: 0
+
+ clock-frequency:
+ description: Exact output frequency, in case the PWM period is not exact
+ but was rounded to nanoseconds.
+
+ clock-output-names:
+ maxItems: 1
+
+ pwms:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+ - pwms
+
+additionalProperties: false
+
+examples:
+ - |
+ clock {
+ compatible = "pwm-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "mipi_mclk";
+ pwms = <&pwm2 0 40>; /* 1 / 40 ns = 25 MHz */
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
deleted file mode 100644
index 241fb0545b9e..000000000000
--- a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Binding for Qualcomm Atheros AR7xxx/AR9XXX PLL controller
-
-The PPL controller provides the 3 main clocks of the SoC: CPU, DDR and AHB.
-
-Required Properties:
-- compatible: has to be "qca,<soctype>-pll" and one of the following
- fallbacks:
- - "qca,ar7100-pll"
- - "qca,ar7240-pll"
- - "qca,ar9130-pll"
- - "qca,ar9330-pll"
- - "qca,ar9340-pll"
- - "qca,qca9550-pll"
-- reg: Base address and size of the controllers memory area
-- clock-names: Name of the input clock, has to be "ref"
-- clocks: phandle of the external reference clock
-- #clock-cells: has to be one
-
-Optional properties:
-- clock-output-names: should be "cpu", "ddr", "ahb"
-
-Example:
-
- pll-controller@18050000 {
- compatible = "qca,ar9132-pll", "qca,ar9130-pll";
- reg = <0x18050000 0x20>;
-
- clock-names = "ref";
- clocks = <&extosc>;
-
- #clock-cells = <1>;
- clock-output-names = "cpu", "ddr", "ahb";
- };
diff --git a/Documentation/devicetree/bindings/clock/qca,ath79-pll.yaml b/Documentation/devicetree/bindings/clock/qca,ath79-pll.yaml
new file mode 100644
index 000000000000..69863e8a4648
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qca,ath79-pll.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qca,ath79-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Atheros ATH79 PLL controller
+
+maintainers:
+ - Alban Bedel <albeu@free.fr>
+ - Antony Pavlov <antonynpavlov@gmail.com>
+
+description: >
+ The PLL controller provides the 3 main clocks of the SoC: CPU, DDR and AHB.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: qca,ar9132-pll
+ - const: qca,ar9130-pll
+ - items:
+ - enum:
+ - qca,ar7100-pll
+ - qca,ar7240-pll
+ - qca,ar9130-pll
+ - qca,ar9330-pll
+ - qca,ar9340-pll
+ - qca,qca9530-pll
+ - qca,qca9550-pll
+ - qca,qca9560-pll
+
+ reg:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ref
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clock-output-names:
+ items:
+ - const: cpu
+ - const: ddr
+ - const: ahb
+
+required:
+ - compatible
+ - reg
+ - clock-names
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@18050000 {
+ compatible = "qca,ar9132-pll", "qca,ar9130-pll";
+ reg = <0x18050000 0x20>;
+ clock-names = "ref";
+ clocks = <&extosc>;
+ #clock-cells = <1>;
+ clock-output-names = "cpu", "ddr", "ahb";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,a53pll.txt b/Documentation/devicetree/bindings/clock/qcom,a53pll.txt
deleted file mode 100644
index e3fa8118eaee..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,a53pll.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Qualcomm MSM8916 A53 PLL Binding
---------------------------------
-The A53 PLL on MSM8916 platforms is the main CPU PLL used used for frequencies
-above 1GHz.
-
-Required properties :
-- compatible : Shall contain only one of the following:
-
- "qcom,msm8916-a53pll"
-
-- reg : shall contain base register location and length
-
-- #clock-cells : must be set to <0>
-
-Example:
-
- a53pll: clock@b016000 {
- compatible = "qcom,msm8916-a53pll";
- reg = <0xb016000 0x40>;
- #clock-cells = <0>;
- };
-
diff --git a/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
new file mode 100644
index 000000000000..47ceab641a4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,a53pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm A53 PLL clock
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description:
+ The A53 PLL on few Qualcomm platforms is the main CPU PLL used used for
+ frequencies above 1GHz.
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq5018-a53pll
+ - qcom,ipq5332-a53pll
+ - qcom,ipq6018-a53pll
+ - qcom,ipq8074-a53pll
+ - qcom,ipq9574-a73pll
+ - qcom,msm8226-a7pll
+ - qcom,msm8916-a53pll
+ - qcom,msm8939-a53pll
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ clocks:
+ items:
+ - description: board XO clock
+
+ clock-names:
+ items:
+ - const: xo
+
+ operating-points-v2: true
+
+ opp-table:
+ type: object
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Example 1 - A53 PLL found on MSM8916 devices
+ - |
+ a53pll: clock@b016000 {
+ compatible = "qcom,msm8916-a53pll";
+ reg = <0xb016000 0x40>;
+ #clock-cells = <0>;
+ };
+ # Example 2 - A53 PLL found on IPQ6018 devices
+ - |
+ a53pll_ipq: clock-controller@b116000 {
+ compatible = "qcom,ipq6018-a53pll";
+ reg = <0x0b116000 0x40>;
+ #clock-cells = <0>;
+ clocks = <&xo>;
+ clock-names = "xo";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,a7pll.yaml b/Documentation/devicetree/bindings/clock/qcom,a7pll.yaml
new file mode 100644
index 000000000000..809c34eb7d5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,a7pll.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,a7pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm A7 PLL clock
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description:
+ The A7 PLL on the Qualcomm platforms like SDX55, SDX65 is used to provide high
+ frequency clock to the CPU.
+
+properties:
+ compatible:
+ enum:
+ - qcom,sdx55-a7pll
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ clocks:
+ items:
+ - description: board XO clock
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ a7pll: clock@17808000 {
+ compatible = "qcom,sdx55-a7pll";
+ reg = <0x17808000 0x1000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "bi_tcxo";
+ #clock-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml
new file mode 100644
index 000000000000..8b8932bd5a92
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,aoncc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LPASS Always ON Clock Controller on SM8250 SoCs
+
+maintainers:
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h for the full list
+ of Audio Clock controller clock IDs.
+
+properties:
+ compatible:
+ const: qcom,sm8250-lpass-aoncc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: LPASS Core voting clock
+ - description: LPASS Audio codec voting clock
+ - description: Glitch Free Mux register clock
+
+ clock-names:
+ items:
+ - const: core
+ - const: audio
+ - const: bus
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm8250-lpass-aoncc.h>
+ #include <dt-bindings/sound/qcom,q6afe.h>
+ clock-controller@3800000 {
+ #clock-cells = <1>;
+ compatible = "qcom,sm8250-lpass-aoncc";
+ reg = <0x03380000 0x40000>;
+ clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6afecc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+ clock-names = "core", "audio", "bus";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml
new file mode 100644
index 000000000000..cfca888f6014
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,audiocc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LPASS Audio Clock Controller on SM8250 SoCs
+
+maintainers:
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell.
+ See include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h for the full list
+ of Audio Clock controller clock IDs.
+
+properties:
+ compatible:
+ const: qcom,sm8250-lpass-audiocc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: LPASS Core voting clock
+ - description: LPASS Audio codec voting clock
+ - description: Glitch Free Mux register clock
+
+ clock-names:
+ items:
+ - const: core
+ - const: audio
+ - const: bus
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm8250-lpass-audiocc.h>
+ #include <dt-bindings/sound/qcom,q6afe.h>
+ clock-controller@3300000 {
+ #clock-cells = <1>;
+ compatible = "qcom,sm8250-lpass-audiocc";
+ reg = <0x03300000 0x30000>;
+ clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6afecc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+ <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+ clock-names = "core", "audio", "bus";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,camcc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,camcc-sm8250.yaml
new file mode 100644
index 000000000000..5c3ff37ec0d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,camcc-sm8250.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,camcc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM8250
+
+maintainers:
+ - Jonathan Marek <jonathan@marek.ca>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and
+ power domains on SM8250.
+
+ See also: include/dt-bindings/clock/qcom,camcc-sm8250.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,sm8250-camcc
+
+ clocks:
+ items:
+ - description: AHB
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: iface
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+ power-domains:
+ items:
+ - description: MMCX power domain
+
+ reg:
+ maxItems: 1
+
+ required-opps:
+ maxItems: 1
+ description:
+ OPP node describing required MMCX performance point.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8250.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sm8250-camcc";
+ reg = <0x0ad00000 0x10000>;
+ clocks = <&gcc GCC_CAMERA_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "iface", "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,camcc.txt b/Documentation/devicetree/bindings/clock/qcom,camcc.txt
deleted file mode 100644
index c5eb6694fda9..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,camcc.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Qualcomm Camera Clock & Reset Controller Binding
-------------------------------------------------
-
-Required properties :
-- compatible : shall contain "qcom,sdm845-camcc".
-- reg : shall contain base register location and length.
-- #clock-cells : from common clock binding, shall contain 1.
-- #reset-cells : from common reset binding, shall contain 1.
-- #power-domain-cells : from generic power domain binding, shall contain 1.
-
-Example:
- camcc: clock-controller@ad00000 {
- compatible = "qcom,sdm845-camcc";
- reg = <0xad00000 0x10000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml
new file mode 100644
index 000000000000..ffae037779a1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,dispcc-sc8280xp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SC8280XP
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+ Qualcomm display clock control module which supports the clocks, resets and
+ power domains for the two MDSS instances on SC8280XP.
+
+ See also:
+ include/dt-bindings/clock/qcom,dispcc-sc8280xp.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc8280xp-dispcc0
+ - qcom,sc8280xp-dispcc1
+
+ clocks:
+ items:
+ - description: AHB interface clock,
+ - description: SoC CXO clock
+ - description: SoC sleep clock
+ - description: DisplayPort 0 link clock
+ - description: DisplayPort 0 VCO div clock
+ - description: DisplayPort 1 link clock
+ - description: DisplayPort 1 VCO div clock
+ - description: DisplayPort 2 link clock
+ - description: DisplayPort 2 VCO div clock
+ - description: DisplayPort 3 link clock
+ - description: DisplayPort 3 VCO div clock
+ - description: DSI 0 PLL byte clock
+ - description: DSI 0 PLL DSI clock
+ - description: DSI 1 PLL byte clock
+ - description: DSI 1 PLL DSI clock
+
+ power-domains:
+ items:
+ - description: MMCX power domain
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sc8280xp-dispcc0";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&gcc GCC_DISP_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&mdss0_dp_phy0 0>,
+ <&mdss0_dp_phy0 1>,
+ <&mdss0_dp_phy1 0>,
+ <&mdss0_dp_phy1 1>,
+ <&mdss0_dp_phy2 0>,
+ <&mdss0_dp_phy2 1>,
+ <&mdss0_dp_phy3 0>,
+ <&mdss0_dp_phy3 1>,
+ <&mdss0_dsi0_phy 0>,
+ <&mdss0_dsi0_phy 1>,
+ <&mdss0_dsi1_phy 0>,
+ <&mdss0_dsi1_phy 1>;
+ power-domains = <&rpmhpd SC8280XP_MMCX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6125.yaml
new file mode 100644
index 000000000000..ef2b1e204430
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6125.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,dispcc-sm6125.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock Controller on SM6125
+
+maintainers:
+ - Martin Botka <martin.botka@somainline.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks and power domains
+ on SM6125.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sm6125.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6125-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Pixel clock from DSI PHY1
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+ - description: AHB config clock from GCC
+ - description: GPLL0 div source from GCC
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dsi1_phy_pll_out_dsiclk
+ - const: dp_phy_pll_link_clk
+ - const: dp_phy_pll_vco_div_clk
+ - const: cfg_ahb_clk
+ - const: gcc_disp_gpll0_div_clk_src
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the CX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing the power domain's performance point.
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/clock/qcom,gcc-sm6125.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ clock-controller@5f00000 {
+ compatible = "qcom,sm6125-dispcc";
+ reg = <0x5f00000 0x20000>;
+
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&dsi1_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>;
+ clock-names = "bi_tcxo",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dsi1_phy_pll_out_dsiclk",
+ "dp_phy_pll_link_clk",
+ "dp_phy_pll_vco_div_clk",
+ "cfg_ahb_clk",
+ "gcc_disp_gpll0_div_clk_src";
+
+ required-opps = <&rpmhpd_opp_ret>;
+ power-domains = <&rpmpd SM6125_VDDCX>;
+
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml
new file mode 100644
index 000000000000..a602e882e964
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,dispcc-sm6350.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SM6350
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM6350.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sm6350.h
+
+properties:
+ compatible:
+ const: qcom,sm6350-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: Byte clock from DSI PHY
+ - description: Pixel clock from DSI PHY
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: gcc_disp_gpll0_clk
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dp_phy_pll_link_clk
+ - const: dp_phy_pll_vco_div_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm6350.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm6350-dispcc";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_CLK>,
+ <&dsi_phy 0>,
+ <&dsi_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>;
+ clock-names = "bi_tcxo",
+ "gcc_disp_gpll0_clk",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dp_phy_pll_link_clk",
+ "dp_phy_pll_vco_div_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm8x50.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm8x50.yaml
new file mode 100644
index 000000000000..53a5ab319159
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm8x50.yaml
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,dispcc-sm8x50.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SM8150/SM8250/SM8350
+
+maintainers:
+ - Jonathan Marek <jonathan@marek.ca>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM8150/SM8250/SM8350.
+
+ See also::
+ include/dt-bindings/clock/qcom,dispcc-sm8150.h
+ include/dt-bindings/clock/qcom,dispcc-sm8250.h
+ include/dt-bindings/clock/qcom,dispcc-sm8350.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc8180x-dispcc
+ - qcom,sm8150-dispcc
+ - qcom,sm8250-dispcc
+ - qcom,sm8350-dispcc
+
+ clocks:
+ minItems: 7
+ items:
+ - description: Board XO source
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY1
+ - description: Pixel clock from DSI PHY1
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+ - description: Link clock from eDP PHY
+ - description: VCO DIV clock from eDP PHY
+ - description: Link clock from DP1 PHY
+ - description: VCO DIV clock from DP1 PHY
+ - description: Link clock from DP2 PHY
+ - description: VCO DIV clock from DP2 PHY
+
+ clock-names:
+ minItems: 7
+ items:
+ - const: bi_tcxo
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dsi1_phy_pll_out_byteclk
+ - const: dsi1_phy_pll_out_dsiclk
+ - const: dp_phy_pll_link_clk
+ - const: dp_phy_pll_vco_div_clk
+ - const: edp_phy_pll_link_clk
+ - const: edp_phy_pll_vco_div_clk
+ - const: dptx1_phy_pll_link_clk
+ - const: dptx1_phy_pll_vco_div_clk
+ - const: dptx2_phy_pll_link_clk
+ - const: dptx2_phy_pll_vco_div_clk
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: qcom,sc8180x-dispcc
+ then:
+ properties:
+ clocks:
+ maxItems: 7
+ clock-names:
+ maxItems: 7
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm8250-dispcc";
+ reg = <0x0af00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&dsi1_phy 0>,
+ <&dsi1_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>;
+ clock-names = "bi_tcxo",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dsi1_phy_pll_out_byteclk",
+ "dsi1_phy_pll_out_dsiclk",
+ "dp_phy_pll_link_clk",
+ "dp_phy_pll_vco_div_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc.txt b/Documentation/devicetree/bindings/clock/qcom,dispcc.txt
deleted file mode 100644
index d639e18d0b85..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,dispcc.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Qualcomm Technologies, Inc. Display Clock Controller Binding
-------------------------------------------------------------
-
-Required properties :
-
-- compatible : shall contain "qcom,sdm845-dispcc"
-- reg : shall contain base register location and length.
-- #clock-cells : from common clock binding, shall contain 1.
-- #reset-cells : from common reset binding, shall contain 1.
-- #power-domain-cells : from generic power domain binding, shall contain 1.
-
-Example:
- dispcc: clock-controller@af00000 {
- compatible = "qcom,sdm845-dispcc";
- reg = <0xaf00000 0x100000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml
new file mode 100644
index 000000000000..27df7e3e5bf3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-apq8064.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on APQ8064/MSM8960
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on APQ8064.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8960.h
+ include/dt-bindings/reset/qcom,gcc-msm8960.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - qcom,gcc-apq8064
+ - qcom,gcc-msm8960
+ - const: syscon
+ - enum:
+ - qcom,gcc-apq8064
+ - qcom,gcc-msm8960
+ deprecated: true
+
+ thermal-sensor:
+ description: child tsens device
+ $ref: /schemas/thermal/qcom-tsens.yaml#
+
+ clocks:
+ maxItems: 3
+
+ clock-names:
+ items:
+ - const: cxo
+ - const: pxo
+ - const: pll4
+
+ nvmem-cells:
+ minItems: 1
+ maxItems: 2
+ deprecated: true
+ description:
+ Qualcomm TSENS (thermal sensor device) on some devices can
+ be part of GCC and hence the TSENS properties can also be part
+ of the GCC/clock-controller node.
+ For more details on the TSENS properties please refer
+ Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+
+ nvmem-cell-names:
+ minItems: 1
+ deprecated: true
+ items:
+ - const: calib
+ - const: calib_backup
+
+ '#thermal-sensor-cells':
+ const: 1
+ deprecated: true
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@900000 {
+ compatible = "qcom,gcc-apq8064", "syscon";
+ reg = <0x00900000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+
+ thermal-sensor {
+ compatible = "qcom,msm8960-tsens";
+
+ nvmem-cells = <&tsens_calib>, <&tsens_backup>;
+ nvmem-cell-names = "calib", "calib_backup";
+ interrupts = <0 178 4>;
+ interrupt-names = "uplow";
+
+ #qcom,sensors = <11>;
+ #thermal-sensor-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-apq8084.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8084.yaml
new file mode 100644
index 000000000000..0a0a26d9beab
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-apq8084.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-apq8084.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on APQ8084
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on APQ8084.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-apq8084.h
+ include/dt-bindings/reset/qcom,gcc-apq8084.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,gcc-apq8084
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+ - description: UFS RX symbol 0 clock
+ - description: UFS RX symbol 1 clock
+ - description: UFS TX symbol 0 clock
+ - description: UFS TX symbol 1 clock
+ - description: SATA ASIC0 clock
+ - description: SATA RX clock
+ - description: PCIe PIPE clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: ufs_rx_symbol_0_clk_src
+ - const: ufs_rx_symbol_1_clk_src
+ - const: ufs_tx_symbol_0_clk_src
+ - const: ufs_tx_symbol_1_clk_src
+ - const: sata_asic0_clk
+ - const: sata_rx_clk
+ - const: pcie_pipe
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ /* UFS PHY on APQ8084 is not supported (yet), so these bindings just serve an example */
+ clock-controller@fc400000 {
+ compatible = "qcom,gcc-apq8084";
+ reg = <0xfc400000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+
+ clocks = <&xo_board>,
+ <&sleep_clk>,
+ <&ufsphy 0>,
+ <&ufsphy 1>,
+ <&ufsphy 2>,
+ <&ufsphy 3>,
+ <&sata 0>,
+ <&sata 1>,
+ <&pcie_phy>;
+ clock-names = "xo",
+ "sleep_clk",
+ "ufs_rx_symbol_0_clk_src",
+ "ufs_rx_symbol_1_clk_src",
+ "ufs_tx_symbol_0_clk_src",
+ "ufs_tx_symbol_1_clk_src",
+ "sata_asic0_clk",
+ "sata_rx_clk",
+ "pcie_pipe";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml
new file mode 100644
index 000000000000..c91039dc100e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq4019.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ4019
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Robert Marko <robert.markoo@sartura.hr>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ4019.
+
+ See also: include/dt-bindings/clock/qcom,gcc-ipq4019.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,gcc-ipq4019
+
+ clocks:
+ items:
+ - description: board XO clock
+ - description: sleep clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-ipq4019";
+ reg = <0x1800000 0x60000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&xo>, <&sleep_clk>;
+ clock-names = "xo", "sleep_clk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq6018.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq6018.yaml
new file mode 100644
index 000000000000..4d2614d4f368
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq6018.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq6018.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ6018
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Robert Marko <robimarko@gmail.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ6018.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-ipq6018.h
+ include/dt-bindings/reset/qcom,gcc-ipq6018.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,gcc-ipq6018
+
+ clocks:
+ items:
+ - description: board XO clock
+ - description: sleep clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-ipq6018";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo>, <&sleep_clk>;
+ clock-names = "xo", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8064.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8064.yaml
new file mode 100644
index 000000000000..a71557395c01
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8064.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq8064.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ8064
+
+maintainers:
+ - Ansuel Smith <ansuelsmth@gmail.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ8064.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064)
+ include/dt-bindings/reset/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064)
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: qcom,gcc-ipq8064
+ - const: syscon
+
+ clocks:
+ minItems: 2
+ items:
+ - description: PXO source
+ - description: CXO source
+ - description: PLL4 from LCC
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: pxo
+ - const: cxo
+ - const: pll4
+
+ thermal-sensor:
+ type: object
+
+ allOf:
+ - $ref: /schemas/thermal/qcom-tsens.yaml#
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,lcc-ipq806x.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gcc: clock-controller@900000 {
+ compatible = "qcom,gcc-ipq8064", "syscon";
+ reg = <0x00900000 0x4000>;
+ clocks = <&pxo_board>, <&cxo_board>, <&lcc PLL4>;
+ clock-names = "pxo", "cxo", "pll4";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+
+ tsens: thermal-sensor {
+ compatible = "qcom,ipq8064-tsens";
+
+ nvmem-cells = <&tsens_calib>, <&tsens_calib_backup>;
+ nvmem-cell-names = "calib", "calib_backup";
+ interrupts = <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "uplow";
+
+ #qcom,sensors = <11>;
+ #thermal-sensor-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml
new file mode 100644
index 000000000000..00d7df75b3d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq8074.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ8074
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ8074.
+
+ See also: include/dt-bindings/clock/qcom,gcc-ipq8074.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,gcc-ipq8074
+
+ clocks:
+ items:
+ - description: board XO clock
+ - description: sleep clock
+ - description: Gen3 QMP PCIe PHY PIPE clock
+ - description: Gen2 QMP PCIe PHY PIPE clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: pcie0_pipe
+ - const: pcie1_pipe
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-ipq8074";
+ reg = <0x01800000 0x80000>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9607.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9607.yaml
new file mode 100644
index 000000000000..d7da30b0e7ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9607.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-mdm9607.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-mdm9607.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-mdm9607
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@900000 {
+ compatible = "qcom,gcc-mdm9607";
+ reg = <0x900000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9615.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9615.yaml
new file mode 100644
index 000000000000..418dea31eb62
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-mdm9615.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-mdm9615.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-mdm9615.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-mdm9615
+
+ clocks:
+ items:
+ - description: CXO clock
+ - description: PLL4 from LLC
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@900000 {
+ compatible = "qcom,gcc-mdm9615";
+ reg = <0x900000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&cxo_board>,
+ <&lcc_pll4>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8660.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8660.yaml
new file mode 100644
index 000000000000..e03b6d0acdb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8660.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8660.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8660
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks and resets on
+ MSM8660
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8660.h
+ include/dt-bindings/reset/qcom,gcc-msm8660.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8660
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pxo
+ - const: cxo
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ # Example for GCC for MSM8974:
+ - |
+ clock-controller@900000 {
+ compatible = "qcom,gcc-msm8660";
+ reg = <0x900000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&pxo_board>, <&cxo_board>;
+ clock-names = "pxo", "cxo";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml
new file mode 100644
index 000000000000..ce1f5a60bd8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8909.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8909, MSM8917 and QM215
+
+maintainers:
+ - Stephan Gerhold <stephan@gerhold.net>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8909, MSM8917 or QM215.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8909.h
+ include/dt-bindings/clock/qcom,gcc-msm8917.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8909
+ - qcom,gcc-msm8917
+ - qcom,gcc-qm215
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: dsi0pll
+ - const: dsi0pllbyte
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ gcc: clock-controller@1800000 {
+ compatible = "qcom,gcc-msm8909";
+ reg = <0x01800000 0x80000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ clocks = <&xo_board>, <&sleep_clk>, <&dsi0_phy 1>, <&dsi0_phy 0>;
+ clock-names = "xo", "sleep_clk", "dsi0pll", "dsi0pllbyte";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8916.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8916.yaml
new file mode 100644
index 000000000000..258b6b93deca
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8916.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8916.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8916 and MSM8939
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8916 or MSM8939.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8916.h
+ include/dt-bindings/clock/qcom,gcc-msm8939.h
+ include/dt-bindings/reset/qcom,gcc-msm8916.h
+ include/dt-bindings/reset/qcom,gcc-msm8939.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8916
+ - qcom,gcc-msm8939
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: External MCLK clock
+ - description: External Primary I2S clock
+ - description: External Secondary I2S clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: ext_mclk
+ - const: ext_pri_i2s
+ - const: ext_sec_i2s
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@300000 {
+ compatible = "qcom,gcc-msm8916";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ reg = <0x300000 0x90000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8953.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8953.yaml
new file mode 100644
index 000000000000..f2e37f439d28
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8953.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8953.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8953
+
+maintainers:
+ - Adam Skladowski <a_skl39@protonmail.com>
+ - Sireesh Kodali <sireeshkodali@protonmail.com>
+ - Barnabas Czeman <barnabas.czeman@mainlining.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8937 or MSM8953.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8917.h
+ include/dt-bindings/clock/qcom,gcc-msm8953.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8937
+ - qcom,gcc-msm8953
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY1
+ - description: Pixel clock from DSI PHY1
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-msm8953";
+ reg = <0x01800000 0x80000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&sleep_clk>,
+ <&dsi0_phy 1>,
+ <&dsi0_phy 0>,
+ <&dsi1_phy 1>,
+ <&dsi1_phy 0>;
+ clock-names = "xo",
+ "sleep",
+ "dsi0pll",
+ "dsi0pllbyte",
+ "dsi1pll",
+ "dsi1pllbyte";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8974.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8974.yaml
new file mode 100644
index 000000000000..929fafc84c19
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8974.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8974.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8974 (including Pro) and MSM8226
+ Controller
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8974 (all variants) and MSM8226.
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8974.h (qcom,gcc-msm8226 and qcom,gcc-msm8974)
+ include/dt-bindings/reset/qcom,gcc-msm8974.h (qcom,gcc-msm8226 and qcom,gcc-msm8974)
+
+$ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8226
+ - qcom,gcc-msm8974
+ - qcom,gcc-msm8974pro
+ - qcom,gcc-msm8974pro-ac
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@fc400000 {
+ compatible = "qcom,gcc-msm8974";
+ reg = <0x00100000 0x94000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+
+ clock-names = "xo", "sleep_clk";
+ clocks = <&xo_board>,
+ <&sleep_clk>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8976.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8976.yaml
new file mode 100644
index 000000000000..92195091a919
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8976.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8976.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8976
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8976.
+
+ See also: include/dt-bindings/clock/qcom,gcc-msm8976.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8976
+ - qcom,gcc-msm8976-v1.1
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Always-on XO source
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY1
+ - description: Byte clock from DSI PHY1
+
+ clock-names:
+ items:
+ - const: xo
+ - const: xo_a
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+
+ vdd_gfx-supply:
+ description:
+ Phandle to voltage regulator providing power to the GX domain.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - vdd_gfx-supply
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-msm8976";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ reg = <0x1800000 0x80000>;
+
+ clocks = <&xo_board>,
+ <&xo_board>,
+ <&dsi0_phy 1>,
+ <&dsi0_phy 0>,
+ <&dsi1_phy 1>,
+ <&dsi1_phy 0>;
+
+ clock-names = "xo",
+ "xo_a",
+ "dsi0pll",
+ "dsi0pllbyte",
+ "dsi1pll",
+ "dsi1pllbyte";
+
+ vdd_gfx-supply = <&pm8004_s5>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml
new file mode 100644
index 000000000000..93bcd61461e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8994.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8994
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8994 and MSM8992.
+
+ See also: include/dt-bindings/clock/qcom,gcc-msm8994.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-msm8992
+ - qcom,gcc-msm8994
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@300000 {
+ compatible = "qcom,gcc-msm8994";
+ reg = <0x00300000 0x90000>;
+ clocks = <&xo_board>, <&sleep_clk>;
+ clock-names = "xo", "sleep";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml
new file mode 100644
index 000000000000..64796f45f294
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8996.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8996
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module which provides the clocks, resets and
+ power domains on MSM8996.
+
+ See also: include/dt-bindings/clock/qcom,gcc-msm8996.h
+
+properties:
+ compatible:
+ const: qcom,gcc-msm8996
+
+ clocks:
+ minItems: 3
+ items:
+ - description: XO source
+ - description: Second XO source
+ - description: Sleep clock source
+ - description: PCIe 0 PIPE clock (optional)
+ - description: PCIe 1 PIPE clock (optional)
+ - description: PCIe 2 PIPE clock (optional)
+ - description: USB3 PIPE clock (optional)
+ - description: UFS RX symbol 0 clock (optional)
+ - description: UFS RX symbol 1 clock (optional)
+ - description: UFS TX symbol 0 clock (optional)
+
+ clock-names:
+ minItems: 3
+ items:
+ - const: cxo
+ - const: cxo2
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk_src
+ - const: pcie_1_pipe_clk_src
+ - const: pcie_2_pipe_clk_src
+ - const: usb3_phy_pipe_clk_src
+ - const: ufs_rx_symbol_0_clk_src
+ - const: ufs_rx_symbol_1_clk_src
+ - const: ufs_tx_symbol_0_clk_src
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@300000 {
+ compatible = "qcom,gcc-msm8996";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ reg = <0x300000 0x90000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml
new file mode 100644
index 000000000000..d882f2b6620e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8998.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on MSM8998
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on MSM8998.
+
+ See also: include/dt-bindings/clock/qcom,gcc-msm8998.h
+
+properties:
+ compatible:
+ const: qcom,gcc-msm8998
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Audio reference clock (Optional clock)
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: aud_ref_clk # Optional clock
+ minItems: 2
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-msm8998";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ reg = <0x00100000 0xb0000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&sleep>,
+ <0>;
+ clock-names = "xo",
+ "sleep_clk",
+ "aud_ref_clk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml
new file mode 100644
index 000000000000..b9194fa11e47
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-qcm2290.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on QCM2290
+
+maintainers:
+ - Shawn Guo <shawn.guo@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on QCM2290.
+
+ See also: include/dt-bindings/clock/qcom,gcc-qcm2290.h
+
+properties:
+ compatible:
+ const: qcom,gcc-qcm2290
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@1400000 {
+ compatible = "qcom,gcc-qcm2290";
+ reg = <0x01400000 0x1f0000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&sleep_clk>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml
new file mode 100644
index 000000000000..6b35a3c080a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-qcs404.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on QCS404
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on QCS404.
+
+ See also: include/dt-bindings/clock/qcom,gcc-qcs404.h
+
+properties:
+ compatible:
+ const: qcom,gcc-qcs404
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+ - description: PCIe 0 PIPE clock (optional)
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: HDMI phy PLL clock
+
+ clock-names:
+ items:
+ - const: cxo
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk_src
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: hdmi_pll
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-qcs404";
+ reg = <0x01800000 0x80000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml
new file mode 100644
index 000000000000..e30d1df3eeb5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sc7180.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SC7180
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SC7180.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sc7180.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sc7180
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+ power-domains:
+ items:
+ - description: CX domain
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sc7180";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ power-domains = <&rpmhpd SC7180_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc7280.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7280.yaml
new file mode 100644
index 000000000000..5ddaf27bb1f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc7280.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sc7280.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SC7280
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SC7280.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sc7280.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sc7280
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+ - description: PCIE-0 pipe clock source
+ - description: PCIE-1 pipe clock source
+ - description: USF phy rx symbol 0 clock source
+ - description: USF phy rx symbol 1 clock source
+ - description: USF phy tx symbol 0 clock source
+ - description: USB30 phy wrapper pipe clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk
+ - const: pcie_1_pipe_clk
+ - const: ufs_phy_rx_symbol_0_clk
+ - const: ufs_phy_rx_symbol_1_clk
+ - const: ufs_phy_tx_symbol_0_clk
+ - const: usb3_phy_wrapper_gcc_usb30_pipe_clk
+
+ power-domains:
+ items:
+ - description: CX domain
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sc7280";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&pcie_0_pipe_clk>, <&pcie_1_pipe_clk>,
+ <&ufs_phy_rx_symbol_0_clk>, <&ufs_phy_rx_symbol_1_clk>,
+ <&ufs_phy_tx_symbol_0_clk>,
+ <&usb3_phy_wrapper_gcc_usb30_pipe_clk>;
+
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk", "pcie_0_pipe_clk",
+ "pcie_1_pipe_clk", "ufs_phy_rx_symbol_0_clk",
+ "ufs_phy_rx_symbol_1_clk", "ufs_phy_tx_symbol_0_clk",
+ "usb3_phy_wrapper_gcc_usb30_pipe_clk";
+ power-domains = <&rpmhpd SC7280_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml
new file mode 100644
index 000000000000..82c2ef39934d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sc8180x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SC8180x
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SC8180x.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sc8180x.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sc8180x
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+ power-domains:
+ items:
+ - description: CX domain
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sc8180x";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ power-domains = <&rpmhpd SC8180X_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sc8280xp.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8280xp.yaml
new file mode 100644
index 000000000000..c1eeccef66b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sc8280xp.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sc8280xp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SC8280xp
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and
+ power domains on SC8280xp.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sc8280xp.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sc8280xp
+
+ clocks:
+ items:
+ - description: XO reference clock
+ - description: Sleep clock
+ - description: UFS memory first RX symbol clock
+ - description: UFS memory second RX symbol clock
+ - description: UFS memory first TX symbol clock
+ - description: UFS card first RX symbol clock
+ - description: UFS card second RX symbol clock
+ - description: UFS card first TX symbol clock
+ - description: Primary USB SuperSpeed pipe clock
+ - description: USB4 PHY pipegmux clock source
+ - description: USB4 PHY DP gmux clock source
+ - description: USB4 PHY sys pipegmux clock source
+ - description: USB4 PHY PCIe pipe clock
+ - description: USB4 PHY router max pipe clock
+ - description: Primary USB4 RX0 clock
+ - description: Primary USB4 RX1 clock
+ - description: Secondary USB SuperSpeed pipe clock
+ - description: Second USB4 PHY pipegmux clock source
+ - description: Second USB4 PHY DP gmux clock source
+ - description: Second USB4 PHY sys pipegmux clock source
+ - description: Second USB4 PHY PCIe pipe clock
+ - description: Second USB4 PHY router max pipe clock
+ - description: Secondary USB4 RX0 clock
+ - description: Secondary USB4 RX1 clock
+ - description: Multiport USB first SuperSpeed pipe clock
+ - description: Multiport USB second SuperSpeed pipe clock
+ - description: PCIe 2a pipe clock
+ - description: PCIe 2b pipe clock
+ - description: PCIe 3a pipe clock
+ - description: PCIe 3b pipe clock
+ - description: PCIe 4 pipe clock
+ - description: First EMAC controller reference clock
+ - description: Second EMAC controller reference clock
+
+ power-domains:
+ items:
+ - description: CX domain
+
+ protected-clocks:
+ maxItems: 389
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sc8280xp";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&ufs_phy_rx_symbol_0_clk>,
+ <&ufs_phy_rx_symbol_1_clk>,
+ <&ufs_phy_tx_symbol_0_clk>,
+ <&ufs_card_rx_symbol_0_clk>,
+ <&ufs_card_rx_symbol_1_clk>,
+ <&ufs_card_tx_symbol_0_clk>,
+ <&usb_0_ssphy>,
+ <&gcc_usb4_phy_pipegmux_clk_src>,
+ <&gcc_usb4_phy_dp_gmux_clk_src>,
+ <&gcc_usb4_phy_sys_pipegmux_clk_src>,
+ <&usb4_phy_gcc_usb4_pcie_pipe_clk>,
+ <&usb4_phy_gcc_usb4rtr_max_pipe_clk>,
+ <&qusb4phy_gcc_usb4_rx0_clk>,
+ <&qusb4phy_gcc_usb4_rx1_clk>,
+ <&usb_1_ssphy>,
+ <&gcc_usb4_1_phy_pipegmux_clk_src>,
+ <&gcc_usb4_1_phy_dp_gmux_clk_src>,
+ <&gcc_usb4_1_phy_sys_pipegmux_clk_src>,
+ <&usb4_1_phy_gcc_usb4_pcie_pipe_clk>,
+ <&usb4_1_phy_gcc_usb4rtr_max_pipe_clk>,
+ <&qusb4phy_1_gcc_usb4_rx0_clk>,
+ <&qusb4phy_1_gcc_usb4_rx1_clk>,
+ <&usb_2_ssphy>,
+ <&usb_3_ssphy>,
+ <&pcie2a_lane>,
+ <&pcie2b_lane>,
+ <&pcie3a_lane>,
+ <&pcie3b_lane>,
+ <&pcie4_lane>,
+ <&rxc0_ref_clk>,
+ <&rxc1_ref_clk>;
+ power-domains = <&rpmhpd SC8280XP_CX>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdm660.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdm660.yaml
new file mode 100644
index 000000000000..724ce0491118
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sdm660.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sdm660.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SDM660/SDM630/SDM636 Global Clock & Reset Controller
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SDM630, SDM636 and SDM660
+
+ See also::
+ include/dt-bindings/clock/qcom,gcc-sdm660.h (qcom,gcc-sdm630 and qcom,gcc-sdm660)
+
+$ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-sdm630
+ - qcom,gcc-sdm660
+
+ clocks:
+ items:
+ - description: XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ # Example for GCC for SDM660:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sdm660";
+ reg = <0x00100000 0x94000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+
+ clock-names = "xo", "sleep_clk";
+ clocks = <&xo_board>,
+ <&sleep_clk>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdm845.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdm845.yaml
new file mode 100644
index 000000000000..a7523a414341
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sdm845.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sdm845.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SDM670 and SDM845
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SDM670 and SDM845
+
+ See also: include/dt-bindings/clock/qcom,gcc-sdm845.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-sdm670
+ - qcom,gcc-sdm845
+
+ clocks:
+ minItems: 3
+ maxItems: 5
+
+ clock-names:
+ minItems: 3
+ maxItems: 5
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,gcc-sdm670
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,gcc-sdm845
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 1 Pipe clock source
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk
+ - const: pcie_1_pipe_clk
+
+unevaluatedProperties: false
+
+examples:
+ # Example for GCC for SDM845:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sdm845";
+ reg = <0x100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&pcie0_lane>,
+ <&pcie1_lane>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk", "pcie_0_pipe_clk", "pcie_1_pipe_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml
new file mode 100644
index 000000000000..320e4f5b2b18
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sdx55.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SDX55
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and
+ power domains on SDX55
+
+ See also: include/dt-bindings/clock/qcom,gcc-sdx55.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sdx55
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sdx55";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo",
+ "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdx65.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx65.yaml
new file mode 100644
index 000000000000..9242e6e19139
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx65.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sdx65.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SDX65
+
+maintainers:
+ - Vamsi krishna Lanka <quic_vamslank@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SDX65
+
+ See also: include/dt-bindings/clock/qcom,gcc-sdx65.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sdx65
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+ - description: PCIE Pipe clock source
+ - description: USB3 phy wrapper pipe clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+ - const: pcie_pipe_clk
+ - const: usb3_phy_wrapper_gcc_usb30_pipe_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sdx65";
+ reg = <0x100000 0x1f7400>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&rpmhcc RPMH_CXO_CLK_A>, <&sleep_clk>,
+ <&pcie_pipe_clk>, <&usb3_phy_wrapper_gcc_usb30_pipe_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk",
+ "pcie_pipe_clk", "usb3_phy_wrapper_gcc_usb30_pipe_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml
new file mode 100644
index 000000000000..c926630907c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm6115.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM6115 and SM4250
+
+maintainers:
+ - Iskren Chernev <iskren.chernev@gmail.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM4250/6115.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm6115.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm6115
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@1400000 {
+ compatible = "qcom,gcc-sm6115";
+ reg = <0x01400000 0x1f0000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&sleep_clk>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml
new file mode 100644
index 000000000000..5bd422e94a38
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm6125.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM6125
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM6125.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm6125.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm6125
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@1400000 {
+ compatible = "qcom,gcc-sm6125";
+ reg = <0x01400000 0x1f0000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&sleep_clk>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml
new file mode 100644
index 000000000000..819e855eaf9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm6350.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM6350
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM6350.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm6350.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm6350
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sm6350";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml
new file mode 100644
index 000000000000..5f3f69fe9ddb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm8150.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8150
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8150.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm8150.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm8150
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sm8150";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8250.yaml
new file mode 100644
index 000000000000..f4cd5a509c60
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8250.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8250
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8250.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm8250.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm8250
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sm8250";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8350.yaml
new file mode 100644
index 000000000000..97ffae3b5522
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8350.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm8350.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8350
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8350.
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm8350.h
+
+properties:
+ compatible:
+ const: qcom,gcc-sm8350
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source (Optional clock)
+ - description: PCIE 1 Pipe clock source (Optional clock)
+ - description: UFS card Rx symbol 0 clock source (Optional clock)
+ - description: UFS card Rx symbol 1 clock source (Optional clock)
+ - description: UFS card Tx symbol 0 clock source (Optional clock)
+ - description: UFS phy Rx symbol 0 clock source (Optional clock)
+ - description: UFS phy Rx symbol 1 clock source (Optional clock)
+ - description: UFS phy Tx symbol 0 clock source (Optional clock)
+ - description: USB3 phy wrapper pipe clock source (Optional clock)
+ - description: USB3 phy sec pipe clock source (Optional clock)
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk # Optional clock
+ - const: pcie_1_pipe_clk # Optional clock
+ - const: ufs_card_rx_symbol_0_clk # Optional clock
+ - const: ufs_card_rx_symbol_1_clk # Optional clock
+ - const: ufs_card_tx_symbol_0_clk # Optional clock
+ - const: ufs_phy_rx_symbol_0_clk # Optional clock
+ - const: ufs_phy_rx_symbol_1_clk # Optional clock
+ - const: ufs_phy_tx_symbol_0_clk # Optional clock
+ - const: usb3_phy_wrapper_gcc_usb30_pipe_clk # Optional clock
+ - const: usb3_uni_phy_sec_gcc_usb30_pipe_clk # Optional clock
+ minItems: 2
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sm8350";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml
new file mode 100644
index 000000000000..3169ac05e1d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm8450.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8450
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8450
+
+ See also: include/dt-bindings/clock/qcom,gcc-sm8450.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,gcc-sm8450
+ - qcom,sm8475-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source (Optional clock)
+ - description: PCIE 1 Pipe clock source (Optional clock)
+ - description: PCIE 1 Phy Auxiliary clock source (Optional clock)
+ - description: UFS Phy Rx symbol 0 clock source (Optional clock)
+ - description: UFS Phy Rx symbol 1 clock source (Optional clock)
+ - description: UFS Phy Tx symbol 0 clock source (Optional clock)
+ - description: USB3 Phy wrapper pipe clock source (Optional clock)
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: sleep_clk
+ - const: pcie_0_pipe_clk # Optional clock
+ - const: pcie_1_pipe_clk # Optional clock
+ - const: pcie_1_phy_aux_clk # Optional clock
+ - const: ufs_phy_rx_symbol_0_clk # Optional clock
+ - const: ufs_phy_rx_symbol_1_clk # Optional clock
+ - const: ufs_phy_tx_symbol_0_clk # Optional clock
+ - const: usb3_phy_wrapper_gcc_usb30_pipe_clk # Optional clock
+ minItems: 2
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,gcc-sm8450";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>;
+ clock-names = "bi_tcxo", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
deleted file mode 100644
index 52d9345c9927..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Qualcomm Global Clock & Reset Controller Binding
-------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
-
- "qcom,gcc-apq8064"
- "qcom,gcc-apq8084"
- "qcom,gcc-ipq8064"
- "qcom,gcc-ipq4019"
- "qcom,gcc-ipq8074"
- "qcom,gcc-msm8660"
- "qcom,gcc-msm8916"
- "qcom,gcc-msm8960"
- "qcom,gcc-msm8974"
- "qcom,gcc-msm8974pro"
- "qcom,gcc-msm8974pro-ac"
- "qcom,gcc-msm8994"
- "qcom,gcc-msm8996"
- "qcom,gcc-msm8998"
- "qcom,gcc-mdm9615"
- "qcom,gcc-qcs404"
- "qcom,gcc-sdm630"
- "qcom,gcc-sdm660"
- "qcom,gcc-sdm845"
-
-- reg : shall contain base register location and length
-- #clock-cells : shall contain 1
-- #reset-cells : shall contain 1
-
-Optional properties :
-- #power-domain-cells : shall contain 1
-- Qualcomm TSENS (thermal sensor device) on some devices can
-be part of GCC and hence the TSENS properties can also be
-part of the GCC/clock-controller node.
-For more details on the TSENS properties please refer
-Documentation/devicetree/bindings/thermal/qcom-tsens.txt
-
-Example:
- clock-controller@900000 {
- compatible = "qcom,gcc-msm8960";
- reg = <0x900000 0x4000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
- };
-
-Example of GCC with TSENS properties:
- clock-controller@900000 {
- compatible = "qcom,gcc-apq8064";
- reg = <0x00900000 0x4000>;
- nvmem-cells = <&tsens_calib>, <&tsens_backup>;
- nvmem-cell-names = "calib", "calib_backup";
- #clock-cells = <1>;
- #reset-cells = <1>;
- #thermal-sensor-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
new file mode 100644
index 000000000000..513d6fd89249
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller Common Properties
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Common bindings for Qualcomm global clock control module providing the
+ clocks, resets and power domains.
+
+properties:
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+ protected-clocks:
+ description:
+ Protected clock specifier list as per common clock binding.
+
+required:
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,glymur-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,glymur-dispcc.yaml
new file mode 100644
index 000000000000..45f027c70e03
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,glymur-dispcc.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,glymur-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on GLYMUR
+
+maintainers:
+ - Taniya Das <taniya.das@oss.qualcomm.com>
+
+description: |
+ Qualcomm display clock control module which supports the clocks, resets and
+ power domains for the MDSS instances on GLYMUR SoC.
+
+ See also:
+ include/dt-bindings/clock/qcom,dispcc-glymur.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,glymur-dispcc
+
+ clocks:
+ items:
+ - description: Board CXO clock
+ - description: Board sleep clock
+ - description: DisplayPort 0 link clock
+ - description: DisplayPort 0 VCO div clock
+ - description: DisplayPort 1 link clock
+ - description: DisplayPort 1 VCO div clock
+ - description: DisplayPort 2 link clock
+ - description: DisplayPort 2 VCO div clock
+ - description: DisplayPort 3 link clock
+ - description: DisplayPort 3 VCO div clock
+ - description: DSI 0 PLL byte clock
+ - description: DSI 0 PLL DSI clock
+ - description: DSI 1 PLL byte clock
+ - description: DSI 1 PLL DSI clock
+ - description: Standalone PHY 0 PLL link clock
+ - description: Standalone PHY 0 VCO div clock
+ - description: Standalone PHY 1 PLL link clock
+ - description: Standalone PHY 1 VCO div clock
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+
+ clock-controller@af00000 {
+ compatible = "qcom,glymur-dispcc";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&mdss_dp_phy0 0>,
+ <&mdss_dp_phy0 1>,
+ <&mdss_dp_phy1 0>,
+ <&mdss_dp_phy1 1>,
+ <&mdss_dp_phy2 0>,
+ <&mdss_dp_phy2 1>,
+ <&mdss_dp_phy3 0>,
+ <&mdss_dp_phy3 1>,
+ <&mdss_dsi0_phy 0>,
+ <&mdss_dsi0_phy 1>,
+ <&mdss_dsi1_phy 0>,
+ <&mdss_dsi1_phy 1>,
+ <&mdss_phy0_link 0>,
+ <&mdss_phy0_vco_div 0>,
+ <&mdss_phy1_link 1>,
+ <&mdss_phy1_vco_div 1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,glymur-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,glymur-gcc.yaml
new file mode 100644
index 000000000000..b05b0e6c4483
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,glymur-gcc.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,glymur-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on Glymur SoC
+
+maintainers:
+ - Taniya Das <taniya.das@oss.qualcomm.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on Glymur SoC.
+
+ See also: include/dt-bindings/clock/qcom,glymur-gcc.h
+
+properties:
+ compatible:
+ const: qcom,glymur-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO_A source
+ - description: Sleep clock source
+ - description: USB 0 Phy DP0 GMUX clock source
+ - description: USB 0 Phy DP1 GMUX clock source
+ - description: USB 0 Phy PCIE PIPEGMUX clock source
+ - description: USB 0 Phy PIPEGMUX clock source
+ - description: USB 0 Phy SYS PCIE PIPEGMUX clock source
+ - description: USB 1 Phy DP0 GMUX 2 clock source
+ - description: USB 1 Phy DP1 GMUX 2 clock source
+ - description: USB 1 Phy PCIE PIPEGMUX clock source
+ - description: USB 1 Phy PIPEGMUX clock source
+ - description: USB 1 Phy SYS PCIE PIPEGMUX clock source
+ - description: USB 2 Phy DP0 GMUX 2 clock source
+ - description: USB 2 Phy DP1 GMUX 2 clock source
+ - description: USB 2 Phy PCIE PIPEGMUX clock source
+ - description: USB 2 Phy PIPEGMUX clock source
+ - description: USB 2 Phy SYS PCIE PIPEGMUX clock source
+ - description: PCIe 3a pipe clock
+ - description: PCIe 3b pipe clock
+ - description: PCIe 4 pipe clock
+ - description: PCIe 5 pipe clock
+ - description: PCIe 6 pipe clock
+ - description: QUSB4 0 PHY RX 0 clock source
+ - description: QUSB4 0 PHY RX 1 clock source
+ - description: QUSB4 1 PHY RX 0 clock source
+ - description: QUSB4 1 PHY RX 1 clock source
+ - description: QUSB4 2 PHY RX 0 clock source
+ - description: QUSB4 2 PHY RX 1 clock source
+ - description: UFS PHY RX Symbol 0 clock source
+ - description: UFS PHY RX Symbol 1 clock source
+ - description: UFS PHY TX Symbol 0 clock source
+ - description: USB3 PHY 0 pipe clock source
+ - description: USB3 PHY 1 pipe clock source
+ - description: USB3 PHY 2 pipe clock source
+ - description: USB3 UNI PHY pipe 0 clock source
+ - description: USB3 UNI PHY pipe 1 clock source
+ - description: USB4 PHY 0 pcie pipe clock source
+ - description: USB4 PHY 0 Max pipe clock source
+ - description: USB4 PHY 1 pcie pipe clock source
+ - description: USB4 PHY 1 Max pipe clock source
+ - description: USB4 PHY 2 pcie pipe clock source
+ - description: USB4 PHY 2 Max pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,glymur-gcc";
+ reg = <0x100000 0x1f9000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&usb_0_phy_dp0_gmux>,
+ <&usb_0_phy_dp1_gmux>,
+ <&usb_0_phy_pcie_pipegmux>,
+ <&usb_0_phy_pipegmux>,
+ <&usb_0_phy_sys_pcie_pipegmux>,
+ <&usb_1_phy_dp0_gmux_2>,
+ <&usb_1_phy_dp1_gmux_2>,
+ <&usb_1_phy_pcie_pipegmux>,
+ <&usb_1_phy_pipegmux>,
+ <&usb_1_phy_sys_pcie_pipegmux>,
+ <&usb_2_phy_dp0_gmux 2>,
+ <&usb_2_phy_dp1_gmux 2>,
+ <&usb_2_phy_pcie_pipegmux>,
+ <&usb_2_phy_pipegmux>,
+ <&usb_2_phy_sys_pcie_pipegmux>,
+ <&pcie_3a_pipe>, <&pcie_3b_pipe>,
+ <&pcie_4_pipe>, <&pcie_5_pipe>,
+ <&pcie_6_pipe>,
+ <&qusb4_0_phy_rx_0>, <&qusb4_0_phy_rx_1>,
+ <&qusb4_1_phy_rx_0>, <&qusb4_1_phy_rx_1>,
+ <&qusb4_2_phy_rx_0>, <&qusb4_2_phy_rx_1>,
+ <&ufs_phy_rx_symbol_0>, <&ufs_phy_rx_symbol_1>,
+ <&ufs_phy_tx_symbol_0>,
+ <&usb3_phy_0_pipe>, <&usb3_phy_1_pipe>,
+ <&usb3_phy_2_pipe>,
+ <&usb3_uni_phy_pipe_0>, <&usb3_uni_phy_pipe_1>,
+ <&usb4_phy_0_pcie_pipe>, <&usb4_phy_0_max_pipe>,
+ <&usb4_phy_1_pcie_pipe>, <&usb4_phy_1_max_pipe>,
+ <&usb4_phy_2_pcie_pipe>, <&usb4_phy_2_max_pipe>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc-sdm660.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc-sdm660.yaml
new file mode 100644
index 000000000000..79bb90dbe4c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc-sdm660.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gpucc-sdm660.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SDM630 and SDM660
+
+maintainers:
+ - AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
+
+description: |
+ Qualcomm graphics clock control module provides the clocks, resets and
+ power domains on SDM630 and SDM660.
+
+ See also dt-bindings/clock/qcom,gpucc-sdm660.h.
+
+properties:
+ compatible:
+ enum:
+ - qcom,gpucc-sdm630
+ - qcom,gpucc-sdm660
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main gpu branch
+ - description: GPLL0 divider gpu branch
+
+ clock-names:
+ items:
+ - const: xo
+ - const: gcc_gpu_gpll0_clk
+ - const: gcc_gpu_gpll0_div_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sdm660.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ clock-controller@5065000 {
+ compatible = "qcom,gpucc-sdm660";
+ reg = <0x05065000 0x9038>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK>;
+ clock-names = "xo", "gcc_gpu_gpll0_clk",
+ "gcc_gpu_gpll0_div_clk";
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
new file mode 100644
index 000000000000..4cdff6161bf0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Imran Shaik <quic_imrashai@quicinc.com>
+
+description: |
+ Qualcomm graphics clock control module provides the clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also::
+ include/dt-bindings/clock/qcom,gpucc-sdm845.h
+ include/dt-bindings/clock/qcom,gpucc-sa8775p.h
+ include/dt-bindings/clock/qcom,gpucc-sc7180.h
+ include/dt-bindings/clock/qcom,gpucc-sc7280.h
+ include/dt-bindings/clock/qcom,gpucc-sc8280xp.h
+ include/dt-bindings/clock/qcom,gpucc-sm6350.h
+ include/dt-bindings/clock/qcom,gpucc-sm8150.h
+ include/dt-bindings/clock/qcom,gpucc-sm8250.h
+ include/dt-bindings/clock/qcom,gpucc-sm8350.h
+ include/dt-bindings/clock/qcom,qcs8300-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcs8300-gpucc
+ - qcom,sdm845-gpucc
+ - qcom,sa8775p-gpucc
+ - qcom,sc7180-gpucc
+ - qcom,sc7280-gpucc
+ - qcom,sc8180x-gpucc
+ - qcom,sc8280xp-gpucc
+ - qcom,sm6350-gpucc
+ - qcom,sm8150-gpucc
+ - qcom,sm8250-gpucc
+ - qcom,sm8350-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 div branch source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: gcc_gpu_gpll0_clk_src
+ - const: gcc_gpu_gpll0_div_clk_src
+
+ power-domains:
+ maxItems: 1
+
+ vdd-gfx-supply:
+ description: Regulator supply for the VDD_GFX pads
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+# Require that power-domains and vdd-gfx-supply are not both present
+not:
+ required:
+ - power-domains
+ - vdd-gfx-supply
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@5090000 {
+ compatible = "qcom,sdm845-gpucc";
+ reg = <0x05090000 0x9000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+ clock-names = "bi_tcxo",
+ "gcc_gpu_gpll0_clk_src",
+ "gcc_gpu_gpll0_div_clk_src";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,hfpll.txt b/Documentation/devicetree/bindings/clock/qcom,hfpll.txt
deleted file mode 100644
index ec02a024424c..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,hfpll.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-High-Frequency PLL (HFPLL)
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>:
- shall contain only one of the following. The generic
- compatible "qcom,hfpll" should be also included.
-
- "qcom,hfpll-ipq8064", "qcom,hfpll"
- "qcom,hfpll-apq8064", "qcom,hfpll"
- "qcom,hfpll-msm8974", "qcom,hfpll"
- "qcom,hfpll-msm8960", "qcom,hfpll"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: address and size of HPLL registers. An optional second
- element specifies the address and size of the alias
- register region.
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the xo clock.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "xo".
-
-- clock-output-names:
- Usage: required
- Value type: <string>
- Definition: Name of the PLL. Typically hfpllX where X is a CPU number
- starting at 0. Otherwise hfpll_Y where Y is more specific
- such as "l2".
-
-Example:
-
-1) An HFPLL for the L2 cache.
-
- clock-controller@f9016000 {
- compatible = "qcom,hfpll-ipq8064", "qcom,hfpll";
- reg = <0xf9016000 0x30>;
- clocks = <&xo_board>;
- clock-names = "xo";
- clock-output-names = "hfpll_l2";
- };
-
-2) An HFPLL for CPU0. This HFPLL has the alias register region.
-
- clock-controller@f908a000 {
- compatible = "qcom,hfpll-ipq8064", "qcom,hfpll";
- reg = <0xf908a000 0x30>, <0xf900a000 0x30>;
- clocks = <&xo_board>;
- clock-names = "xo";
- clock-output-names = "hfpll0";
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,hfpll.yaml b/Documentation/devicetree/bindings/clock/qcom,hfpll.yaml
new file mode 100644
index 000000000000..8cb1c164f760
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,hfpll.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,hfpll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm High-Frequency PLL
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description:
+ The HFPLL is used as CPU PLL on various Qualcomm SoCs.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - qcom,msm8974-hfpll
+ - qcom,msm8976-hfpll-a53
+ - qcom,msm8976-hfpll-a72
+ - qcom,msm8976-hfpll-cci
+ - qcom,qcs404-hfpll
+ - const: qcom,hfpll
+ deprecated: true
+
+ reg:
+ items:
+ - description: HFPLL registers
+ - description: Alias register region
+ minItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ clocks:
+ items:
+ - description: board XO clock
+
+ clock-names:
+ items:
+ - const: xo
+
+ clock-output-names:
+ description:
+ Name of the PLL. Typically hfpllX where X is a CPU number starting at 0.
+ Otherwise hfpll_Y where Y is more specific such as "l2".
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@f908a000 {
+ compatible = "qcom,msm8974-hfpll";
+ reg = <0xf908a000 0x30>, <0xf900a000 0x30>;
+ #clock-cells = <0>;
+ clock-output-names = "hfpll0";
+ clocks = <&xo_board>;
+ clock-names = "xo";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq5018-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq5018-gcc.yaml
new file mode 100644
index 000000000000..489d0fc5607c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq5018-gcc.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq5018-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ5018
+
+maintainers:
+ - Sricharan Ramabadhran <quic_srichara@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ5018
+
+ See also::
+ include/dt-bindings/clock/qcom,ipq5018-gcc.h
+ include/dt-bindings/reset/qcom,ipq5018-gcc.h
+
+properties:
+ compatible:
+ const: qcom,gcc-ipq5018
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE20 PHY0 pipe clock source
+ - description: PCIE20 PHY1 pipe clock source
+ - description: USB3 PHY pipe clock source
+ - description: GEPHY RX clock source
+ - description: GEPHY TX clock source
+ - description: UNIPHY RX clock source
+ - description: UNIPHY TX clk source
+
+ '#power-domain-cells': false
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-ipq5018";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo_board_clk>,
+ <&sleep_clk>,
+ <&pcie20_phy0_pipe_clk>,
+ <&pcie20_phy1_pipe_clk>,
+ <&usb3_phy0_pipe_clk>,
+ <&gephy_rx_clk>,
+ <&gephy_tx_clk>,
+ <&uniphy_rx_clk>,
+ <&uniphy_tx_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml
new file mode 100644
index 000000000000..1230183fc0a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq5332-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ5332 and IPQ5424
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ5332 and IPQ5424.
+
+ See also:
+ include/dt-bindings/clock/qcom,gcc-ipq5332.h
+ include/dt-bindings/clock/qcom,gcc-ipq5424.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq5332-gcc
+ - qcom,ipq5424-gcc
+
+ clocks:
+ minItems: 5
+ items:
+ - description: Board XO clock source
+ - description: Sleep clock source
+ - description: PCIE 2lane PHY pipe clock source
+ - description: PCIE 2lane x1 PHY pipe clock source (For second lane)
+ - description: USB PCIE wrapper pipe clock source
+ - description: PCIE 2-lane PHY2 pipe clock source
+ - description: PCIE 2-lane PHY3 pipe clock source
+
+ '#power-domain-cells': false
+ '#interconnect-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,ipq5332-gcc
+ then:
+ properties:
+ clocks:
+ maxItems: 5
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,ipq5424-gcc
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,ipq5332-gcc";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo_board>,
+ <&sleep_clk>,
+ <&pcie_2lane_phy_pipe_clk>,
+ <&pcie_2lane_phy_pipe_clk_x1>,
+ <&usb_pcie_wrapper_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq5424-apss-clk.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq5424-apss-clk.yaml
new file mode 100644
index 000000000000..def739fa0a8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq5424-apss-clk.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq5424-apss-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm APSS IPQ5424 Clock Controller
+
+maintainers:
+ - Varadarajan Narayanan <quic_varada@quicinc.com>
+
+description:
+ The CPU core in ipq5424 is clocked by a huayra PLL with RCG support.
+ The RCG and PLL have a separate register space from the GCC.
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq5424-apss-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Reference to the XO clock.
+ - description: Reference to the GPLL0 clock.
+
+ '#clock-cells':
+ const: 1
+
+ '#interconnect-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#interconnect-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,ipq5424-gcc.h>
+
+ apss_clk: clock-controller@fa80000 {
+ compatible = "qcom,ipq5424-apss-clk";
+ reg = <0x0fa80000 0x20000>;
+ clocks = <&xo_board>,
+ <&gcc GPLL0>;
+ #clock-cells = <1>;
+ #interconnect-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq9574-cmn-pll.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq9574-cmn-pll.yaml
new file mode 100644
index 000000000000..817d51135fbf
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq9574-cmn-pll.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq9574-cmn-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm CMN PLL Clock Controller on IPQ SoC
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Luo Jie <quic_luoj@quicinc.com>
+
+description:
+ The CMN (or common) PLL clock controller expects a reference
+ input clock. This reference clock is from the on-board Wi-Fi.
+ The CMN PLL supplies a number of fixed rate output clocks to
+ the devices providing networking functions and to GCC. These
+ networking hardware include PPE (packet process engine), PCS
+ and the externally connected switch or PHY devices. The CMN
+ PLL block also outputs fixed rate clocks to GCC. The PLL's
+ primary function is to enable fixed rate output clocks for
+ networking hardware functions used with the IPQ SoC.
+
+properties:
+ compatible:
+ enum:
+ - qcom,ipq5018-cmn-pll
+ - qcom,ipq5424-cmn-pll
+ - qcom,ipq9574-cmn-pll
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The reference clock. The supported clock rates include
+ 25000000, 31250000, 40000000, 48000000, 50000000 and 96000000 HZ.
+ - description: The AHB clock
+ - description: The SYS clock
+ description:
+ The reference clock is the source clock of CMN PLL, which is from the
+ Wi-Fi. The AHB and SYS clocks must be enabled to access CMN PLL
+ clock registers.
+
+ clock-names:
+ items:
+ - const: ref
+ - const: ahb
+ - const: sys
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,ipq-cmn-pll.h>
+ #include <dt-bindings/clock/qcom,ipq9574-gcc.h>
+
+ cmn_pll: clock-controller@9b000 {
+ compatible = "qcom,ipq9574-cmn-pll";
+ reg = <0x0009b000 0x800>;
+ clocks = <&cmn_pll_ref_clk>,
+ <&gcc GCC_CMN_12GPLL_AHB_CLK>,
+ <&gcc GCC_CMN_12GPLL_SYS_CLK>;
+ clock-names = "ref", "ahb", "sys";
+ #clock-cells = <1>;
+ assigned-clocks = <&cmn_pll CMN_PLL_CLK>;
+ assigned-clock-rates-u64 = /bits/ 64 <12000000000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml
new file mode 100644
index 000000000000..27ae9938febc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq9574-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ9574
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Anusha Rao <quic_anusha@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ9574
+
+ See also::
+ include/dt-bindings/clock/qcom,ipq9574-gcc.h
+ include/dt-bindings/reset/qcom,ipq9574-gcc.h
+
+properties:
+ compatible:
+ const: qcom,ipq9574-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Bias PLL ubi clock source
+ - description: PCIE30 PHY0 pipe clock source
+ - description: PCIE30 PHY1 pipe clock source
+ - description: PCIE30 PHY2 pipe clock source
+ - description: PCIE30 PHY3 pipe clock source
+ - description: USB3 PHY pipe clock source
+
+ '#power-domain-cells': false
+
+ '#interconnect-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,ipq9574-gcc";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo_board_clk>,
+ <&sleep_clk>,
+ <&bias_pll_ubi_nc_clk>,
+ <&pcie30_phy0_pipe_clk>,
+ <&pcie30_phy1_pipe_clk>,
+ <&pcie30_phy2_pipe_clk>,
+ <&pcie30_phy3_pipe_clk>,
+ <&usb3phy_0_cc_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq9574-nsscc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq9574-nsscc.yaml
new file mode 100644
index 000000000000..17252b6ea3be
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq9574-nsscc.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq9574-nsscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Networking Sub System Clock & Reset Controller on IPQ9574
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Anusha Rao <quic_anusha@quicinc.com>
+
+description: |
+ Qualcomm networking sub system clock control module provides the clocks,
+ resets on IPQ9574
+
+ See also::
+ include/dt-bindings/clock/qcom,ipq9574-nsscc.h
+ include/dt-bindings/reset/qcom,ipq9574-nsscc.h
+
+properties:
+ compatible:
+ const: qcom,ipq9574-nsscc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: CMN_PLL NSS 1200MHz (Bias PLL cc) clock source
+ - description: CMN_PLL PPE 353MHz (Bias PLL ubi nc) clock source
+ - description: GCC GPLL0 OUT AUX clock source
+ - description: Uniphy0 NSS Rx clock source
+ - description: Uniphy0 NSS Tx clock source
+ - description: Uniphy1 NSS Rx clock source
+ - description: Uniphy1 NSS Tx clock source
+ - description: Uniphy2 NSS Rx clock source
+ - description: Uniphy2 NSS Tx clock source
+ - description: GCC NSSCC clock source
+
+ '#interconnect-cells':
+ const: 1
+
+ clock-names:
+ items:
+ - const: xo
+ - const: nss_1200
+ - const: ppe_353
+ - const: gpll0_out
+ - const: uniphy0_rx
+ - const: uniphy0_tx
+ - const: uniphy1_rx
+ - const: uniphy1_tx
+ - const: uniphy2_rx
+ - const: uniphy2_tx
+ - const: bus
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,ipq9574-gcc.h>
+ #include <dt-bindings/clock/qcom,ipq-cmn-pll.h>
+ clock-controller@39b00000 {
+ compatible = "qcom,ipq9574-nsscc";
+ reg = <0x39b00000 0x80000>;
+ clocks = <&xo_board_clk>,
+ <&cmn_pll NSS_1200MHZ_CLK>,
+ <&cmn_pll PPE_353MHZ_CLK>,
+ <&gcc GPLL0_OUT_AUX>,
+ <&uniphy 0>,
+ <&uniphy 1>,
+ <&uniphy 2>,
+ <&uniphy 3>,
+ <&uniphy 4>,
+ <&uniphy 5>,
+ <&gcc GCC_NSSCC_CLK>;
+ clock-names = "xo",
+ "nss_1200",
+ "ppe_353",
+ "gpll0_out",
+ "uniphy0_rx",
+ "uniphy0_tx",
+ "uniphy1_rx",
+ "uniphy1_tx",
+ "uniphy2_rx",
+ "uniphy2_tx",
+ "bus";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml b/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml
new file mode 100644
index 000000000000..57632757d4e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,kpss-acc-v1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Krait Processor Sub-system (KPSS) Application Clock Controller (ACC) v1
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ The KPSS ACC provides clock, power domain, and reset control to a Krait CPU.
+ There is one ACC register region per CPU within the KPSS remapped region as
+ well as an alias register region that remaps accesses to the ACC associated
+ with the CPU accessing the region. ACC v1 is currently used as a
+ clock-controller for enabling the cpu and handling the aux clocks.
+
+properties:
+ compatible:
+ const: qcom,kpss-acc-v1
+
+ reg:
+ items:
+ - description: Base address and size of the register region
+ - description: Optional base address and size of the alias register region
+ minItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pll8_vote
+ - const: pxo
+
+ clock-output-names:
+ description: Name of the aux clock. Krait can have at most 4 cpu.
+ enum:
+ - acpu0_aux
+ - acpu1_aux
+ - acpu2_aux
+ - acpu3_aux
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - clock-output-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-ipq806x.h>
+
+ clock-controller@2088000 {
+ compatible = "qcom,kpss-acc-v1";
+ reg = <0x02088000 0x1000>, <0x02008000 0x1000>;
+ clocks = <&gcc PLL8_VOTE>, <&pxo_board>;
+ clock-names = "pll8_vote", "pxo";
+ clock-output-names = "acpu0_aux";
+ #clock-cells = <0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml
new file mode 100644
index 000000000000..88b7672123a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,kpss-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Krait Processor Sub-system (KPSS) Global Clock Controller (GCC)
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ Krait Processor Sub-system (KPSS) Global Clock Controller (GCC). Used
+ to control L2 mux (in the current implementation) and provide access
+ to the kpss-gcc registers.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,kpss-gcc-ipq8064
+ - qcom,kpss-gcc-apq8064
+ - qcom,kpss-gcc-msm8974
+ - qcom,kpss-gcc-msm8960
+ - qcom,kpss-gcc-msm8660
+ - qcom,kpss-gcc-mdm9615
+ - const: qcom,kpss-gcc
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pll8_vote
+ - const: pxo
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,kpss-gcc-ipq8064
+ - qcom,kpss-gcc-apq8064
+ - qcom,kpss-gcc-msm8974
+ - qcom,kpss-gcc-msm8960
+then:
+ required:
+ - clocks
+ - clock-names
+ - '#clock-cells'
+else:
+ properties:
+ clock: false
+ clock-names: false
+ '#clock-cells': false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-ipq806x.h>
+
+ clock-controller@2011000 {
+ compatible = "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc", "syscon";
+ reg = <0x2011000 0x1000>;
+ clocks = <&gcc PLL8_VOTE>, <&pxo_board>;
+ clock-names = "pll8_vote", "pxo";
+ #clock-cells = <0>;
+ };
+
+ - |
+ clock-controller@2011000 {
+ compatible = "qcom,kpss-gcc-mdm9615", "qcom,kpss-gcc", "syscon";
+ reg = <0x02011000 0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,krait-cc.txt b/Documentation/devicetree/bindings/clock/qcom,krait-cc.txt
deleted file mode 100644
index 030ba60dab08..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,krait-cc.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Krait Clock Controller
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: must be one of:
- "qcom,krait-cc-v1"
- "qcom,krait-cc-v2"
-
-- #clock-cells:
- Usage: required
- Value type: <u32>
- Definition: must be 1
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the clock parents of hfpll, secondary muxes.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "hfpll0", "hfpll1", "acpu0_aux", "acpu1_aux", "qsb".
-
-Example:
-
- kraitcc: clock-controller {
- compatible = "qcom,krait-cc-v1";
- clocks = <&hfpll0>, <&hfpll1>, <&acpu0_aux>, <&acpu1_aux>, <qsb>;
- clock-names = "hfpll0", "hfpll1", "acpu0_aux", "acpu1_aux", "qsb";
- #clock-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,krait-cc.yaml b/Documentation/devicetree/bindings/clock/qcom,krait-cc.yaml
new file mode 100644
index 000000000000..d6a019371fcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,krait-cc.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,krait-cc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Krait Clock Controller
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - qcom,krait-cc-v1
+ - qcom,krait-cc-v2
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: Parent clock phandle for hfpll0
+ - description: Parent clock phandle for hfpll1
+ - description: Parent clock phandle for acpu0_aux
+ - description: Parent clock phandle for acpu1_aux
+ - description: Parent clock phandle for qsb
+
+ clock-names:
+ items:
+ - const: hfpll0
+ - const: hfpll1
+ - const: acpu0_aux
+ - const: acpu1_aux
+ - const: qsb
+
+required:
+ - compatible
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/qcom,lcc.txt b/Documentation/devicetree/bindings/clock/qcom,lcc.txt
deleted file mode 100644
index a3c78aa88038..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,lcc.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Qualcomm LPASS Clock & Reset Controller Binding
-------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
-
- "qcom,lcc-msm8960"
- "qcom,lcc-apq8064"
- "qcom,lcc-ipq8064"
- "qcom,lcc-mdm9615"
-
-- reg : shall contain base register location and length
-- #clock-cells : shall contain 1
-- #reset-cells : shall contain 1
-
-Example:
- clock-controller@28000000 {
- compatible = "qcom,lcc-ipq8064";
- reg = <0x28000000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,lcc.yaml b/Documentation/devicetree/bindings/clock/qcom,lcc.yaml
new file mode 100644
index 000000000000..55985e562a34
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,lcc.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,lcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Clock & Reset Controller
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - qcom,lcc-apq8064
+ - qcom,lcc-ipq8064
+ - qcom,lcc-mdm9615
+ - qcom,lcc-msm8960
+
+ clocks:
+ maxItems: 8
+
+ clock-names:
+ maxItems: 8
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,lcc-apq8064
+ - qcom,lcc-msm8960
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board PXO source
+ - description: PLL 4 Vote clock
+ - description: MI2S codec clock
+ - description: Mic I2S codec clock
+ - description: Mic I2S spare clock
+ - description: Speaker I2S codec clock
+ - description: Speaker I2S spare clock
+ - description: PCM codec clock
+
+ clock-names:
+ items:
+ - const: pxo
+ - const: pll4_vote
+ - const: mi2s_codec_clk
+ - const: codec_i2s_mic_codec_clk
+ - const: spare_i2s_mic_codec_clk
+ - const: codec_i2s_spkr_codec_clk
+ - const: spare_i2s_spkr_codec_clk
+ - const: pcm_codec_clk
+
+ required:
+ - clocks
+ - clock-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,lcc-mdm9615
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board CXO source
+ - description: PLL 4 Vote clock
+ - description: MI2S codec clock
+ - description: Mic I2S codec clock
+ - description: Mic I2S spare clock
+ - description: Speaker I2S codec clock
+ - description: Speaker I2S spare clock
+ - description: PCM codec clock
+
+ clock-names:
+ items:
+ - const: cxo
+ - const: pll4_vote
+ - const: mi2s_codec_clk
+ - const: codec_i2s_mic_codec_clk
+ - const: spare_i2s_mic_codec_clk
+ - const: codec_i2s_spkr_codec_clk
+ - const: spare_i2s_spkr_codec_clk
+ - const: pcm_codec_clk
+
+ required:
+ - clocks
+ - clock-names
+
+examples:
+ - |
+ clock-controller@28000000 {
+ compatible = "qcom,lcc-ipq8064";
+ reg = <0x28000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,milos-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,milos-camcc.yaml
new file mode 100644
index 000000000000..f63149ecf3e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,milos-camcc.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,milos-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on Milos
+
+maintainers:
+ - Luca Weiss <luca.weiss@fairphone.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on Milos.
+
+ See also: include/dt-bindings/clock/qcom,milos-camcc.h
+
+properties:
+ compatible:
+ const: qcom,milos-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Camera AHB clock from GCC
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,milos-gcc.h>
+ clock-controller@adb0000 {
+ compatible = "qcom,milos-camcc";
+ reg = <0x0adb0000 0x40000>;
+ clocks = <&bi_tcxo_div2>,
+ <&sleep_clk>,
+ <&gcc GCC_CAMERA_AHB_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,milos-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,milos-dispcc.yaml
new file mode 100644
index 000000000000..94908804756b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,milos-dispcc.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,milos-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on Milos
+
+maintainers:
+ - Luca Weiss <luca.weiss@fairphone.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on Milos.
+
+ See also: include/dt-bindings/clock/qcom,milos-dispcc.h
+
+properties:
+ compatible:
+ const: qcom,milos-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Display's AHB clock
+ - description: GPLL0 source from GCC
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Link clock from DP PHY0
+ - description: VCO DIV clock from DP PHY0
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,milos-gcc.h>
+ #include <dt-bindings/phy/phy-qcom-qmp.h>
+ clock-controller@af00000 {
+ compatible = "qcom,milos-dispcc";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&bi_tcxo_div2>,
+ <&sleep_clk>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>,
+ <&mdss_dsi0_phy 0>,
+ <&mdss_dsi0_phy 1>,
+ <&usb_dp_qmpphy QMP_USB43DP_DP_LINK_CLK>,
+ <&usb_dp_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,milos-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,milos-gcc.yaml
new file mode 100644
index 000000000000..cf244c155f9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,milos-gcc.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,milos-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on Milos
+
+maintainers:
+ - Luca Weiss <luca.weiss@fairphone.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on Milos.
+
+ See also: include/dt-bindings/clock/qcom,milos-gcc.h
+
+properties:
+ compatible:
+ const: qcom,milos-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 1 Pipe clock source
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,milos-gcc";
+ reg = <0x00100000 0x1f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&pcie0_phy>,
+ <&pcie1_phy>,
+ <&ufs_mem_phy 0>,
+ <&ufs_mem_phy 1>,
+ <&ufs_mem_phy 2>,
+ <&usb_1_qmpphy>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,milos-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,milos-videocc.yaml
new file mode 100644
index 000000000000..14c31efe1308
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,milos-videocc.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,milos-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller on Milos
+
+maintainers:
+ - Luca Weiss <luca.weiss@fairphone.com>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on Milos.
+
+ See also: include/dt-bindings/clock/qcom,milos-videocc.h
+
+properties:
+ compatible:
+ const: qcom,milos-videocc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+ - description: Video AHB clock from GCC
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,milos-gcc.h>
+ clock-controller@aaf0000 {
+ compatible = "qcom,milos-videocc";
+ reg = <0x0aaf0000 0x10000>;
+ clocks = <&bi_tcxo_div2>,
+ <&bi_tcxo_ao_div2>,
+ <&sleep_clk>,
+ <&gcc GCC_VIDEO_AHB_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,mmcc.txt b/Documentation/devicetree/bindings/clock/qcom,mmcc.txt
deleted file mode 100644
index 8b0f7841af8d..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,mmcc.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Qualcomm Multimedia Clock & Reset Controller Binding
-----------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
-
- "qcom,mmcc-apq8064"
- "qcom,mmcc-apq8084"
- "qcom,mmcc-msm8660"
- "qcom,mmcc-msm8960"
- "qcom,mmcc-msm8974"
- "qcom,mmcc-msm8996"
-
-- reg : shall contain base register location and length
-- #clock-cells : shall contain 1
-- #reset-cells : shall contain 1
-
-Optional properties :
-- #power-domain-cells : shall contain 1
-
-Example:
- clock-controller@4000000 {
- compatible = "qcom,mmcc-msm8960";
- reg = <0x4000000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- #power-domain-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml
new file mode 100644
index 000000000000..53ceec9673a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml
@@ -0,0 +1,346 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,mmcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Multimedia Clock & Reset Controller
+
+maintainers:
+ - Jeffrey Hugo <quic_jhugo@quicinc.com>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm multimedia clock control module provides the clocks, resets and
+ power domains.
+
+properties:
+ compatible:
+ enum:
+ - qcom,mmcc-apq8064
+ - qcom,mmcc-apq8084
+ - qcom,mmcc-msm8226
+ - qcom,mmcc-msm8660
+ - qcom,mmcc-msm8960
+ - qcom,mmcc-msm8974
+ - qcom,mmcc-msm8992
+ - qcom,mmcc-msm8994
+ - qcom,mmcc-msm8996
+ - qcom,mmcc-msm8998
+ - qcom,mmcc-sdm630
+ - qcom,mmcc-sdm660
+
+ clocks:
+ minItems: 7
+ maxItems: 13
+
+ clock-names:
+ minItems: 7
+ maxItems: 13
+
+ vdd-gfx-supply:
+ description:
+ Regulator supply for the GPU_GX GDSC
+
+required:
+ - compatible
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-apq8064
+ - qcom,mmcc-msm8960
+ then:
+ properties:
+ clocks:
+ minItems: 8
+ items:
+ - description: Board PXO source
+ - description: PLL 3 clock
+ - description: PLL 3 Vote clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: DSI phy instance 2 dsi clock
+ - description: DSI phy instance 2 byte clock
+ - description: HDMI phy PLL clock
+ - description: LVDS PLL clock
+
+ clock-names:
+ minItems: 8
+ items:
+ - const: pxo
+ - const: pll3
+ - const: pll8_vote
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: dsi2pll
+ - const: dsi2pllbyte
+ - const: hdmipll
+ - const: lvdspll
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-msm8226
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: MMSS GPLL0 voted clock
+ - description: GPLL0 voted clock
+ - description: GPLL1 voted clock
+ - description: GFX3D clock source
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: mmss_gpll0_vote
+ - const: gpll0_vote
+ - const: gpll1_vote
+ - const: gfx3d_clk_src
+ - const: dsi0pll
+ - const: dsi0pllbyte
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-msm8974
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: MMSS GPLL0 voted clock
+ - description: GPLL0 voted clock
+ - description: GPLL1 voted clock
+ - description: GFX3D clock source
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: HDMI phy PLL clock
+ - description: eDP phy PLL link clock
+ - description: eDP phy PLL vco clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: mmss_gpll0_vote
+ - const: gpll0_vote
+ - const: gpll1_vote
+ - const: gfx3d_clk_src
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: hdmipll
+ - const: edp_link_clk
+ - const: edp_vco_div
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-apq8084
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board sleep source
+ - description: MMSS GPLL0 voted clock
+ - description: GPLL0 clock
+ - description: GPLL0 voted clock
+ - description: GPLL1 clock
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: HDMI phy PLL clock
+ - description: eDP phy PLL link clock
+ - description: eDP phy PLL vco clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: mmss_gpll0_vote
+ - const: gpll0
+ - const: gpll0_vote
+ - const: gpll1
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: hdmipll
+ - const: edp_link_clk
+ - const: edp_vco_div
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-msm8994
+ - qcom,mmcc-msm8998
+ - qcom,mmcc-sdm630
+ - qcom,mmcc-sdm660
+ then:
+ required:
+ - clocks
+ - clock-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,mmcc-msm8994
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Global PLL 0 clock
+ - description: MMSS NoC AHB clock
+ - description: GFX3D clock
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: HDMI phy PLL clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: gpll0
+ - const: mmssnoc_ahb
+ - const: oxili_gfx3d_clk_src
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: hdmipll
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,mmcc-msm8996
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Global PLL 0 clock
+ - description: MMSS NoC AHB clock
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: HDMI phy PLL clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: gpll0
+ - const: gcc_mmss_noc_cfg_ahb_clk
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: hdmipll
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,mmcc-msm8998
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Global PLL 0 clock
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: HDMI phy PLL clock
+ - description: DisplayPort phy PLL link clock
+ - description: DisplayPort phy PLL vco clock
+ - description: Global PLL 0 DIV clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: gpll0
+ - const: dsi0dsi
+ - const: dsi0byte
+ - const: dsi1dsi
+ - const: dsi1byte
+ - const: hdmipll
+ - const: dplink
+ - const: dpvco
+ - const: gpll0_div
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,mmcc-sdm630
+ - qcom,mmcc-sdm660
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board sleep source
+ - description: Global PLL 0 clock
+ - description: Global PLL 0 DIV clock
+ - description: DSI phy instance 0 dsi clock
+ - description: DSI phy instance 0 byte clock
+ - description: DSI phy instance 1 dsi clock
+ - description: DSI phy instance 1 byte clock
+ - description: DisplayPort phy PLL link clock
+ - description: DisplayPort phy PLL vco clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+ - const: gpll0
+ - const: gpll0_div
+ - const: dsi0pll
+ - const: dsi0pllbyte
+ - const: dsi1pll
+ - const: dsi1pllbyte
+ - const: dp_link_2x_clk_divsel_five
+ - const: dp_vco_divided_clk_src_mux
+
+unevaluatedProperties: false
+
+examples:
+ # Example for MMCC for MSM8960:
+ - |
+ clock-controller@4000000 {
+ compatible = "qcom,mmcc-msm8960";
+ reg = <0x4000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml
new file mode 100644
index 000000000000..fcace96c72eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,msm8996-apcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm clock controller for MSM8996 CPUs
+
+maintainers:
+ - Loic Poulain <loic.poulain@linaro.org>
+
+description: |
+ Qualcomm CPU clock controller for MSM8996 CPUs, clock 0 is for Power cluster
+ and clock 1 is for Perf cluster.
+
+properties:
+ compatible:
+ enum:
+ - qcom,msm8996-apcc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: XO source
+ - description: SYS APCS AUX clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sys_apcs_aux
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ kryocc: clock-controller@6400000 {
+ compatible = "qcom,msm8996-apcc";
+ reg = <0x6400000 0x90000>;
+ #clock-cells = <1>;
+
+ clocks = <&xo_board>, <&apcs_glb>;
+ clock-names = "xo", "sys_apcs_aux";
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8996-cbf.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8996-cbf.yaml
new file mode 100644
index 000000000000..0dfbd8c4d465
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,msm8996-cbf.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,msm8996-cbf.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm MSM8996 Core Bus Fabric (CBF) clock controller
+
+maintainers:
+ - Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+description: >
+ The clock controller for the Qualcomm MSM8996 CBF clock, which drives the
+ interconnect between two CPU clusters.
+
+properties:
+ compatible:
+ enum:
+ - qcom,msm8996-cbf
+ - qcom,msm8996pro-cbf
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: XO source
+ - description: SYS APCS AUX clock
+
+ '#clock-cells':
+ const: 0
+
+ '#interconnect-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#interconnect-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@9a11000 {
+ compatible = "qcom,msm8996-cbf";
+ reg = <0x09a11000 0x10000>;
+ clocks = <&rpmcc RPM_SMD_BB_CLK1>, <&apcs_glb>;
+ #clock-cells = <0>;
+ #interconnect-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml
new file mode 100644
index 000000000000..374de7a6f8d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,msm8998-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on MSM8998
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm graphics clock control module provides the clocks, resets and power
+ domains on MSM8998.
+
+ See also: include/dt-bindings/clock/qcom,gpucc-msm8998.h
+
+properties:
+ compatible:
+ const: qcom,msm8998-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source (gcc_gpu_gpll0_clk_src)
+
+ clock-names:
+ items:
+ - const: xo
+ - const: gpll0
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-msm8998.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@5065000 {
+ compatible = "qcom,msm8998-gpucc";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ reg = <0x05065000 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&gcc GPLL0_OUT_MAIN>;
+ clock-names = "xo", "gpll0";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml b/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml
new file mode 100644
index 000000000000..e0f4d692728c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,q6sstopcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Q6SSTOP clock Controller
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+properties:
+ compatible:
+ const: qcom,qcs404-q6sstopcc
+
+ reg:
+ items:
+ - description: Q6SSTOP clocks register region
+ - description: Q6SSTOP_TCSR register region
+
+ clocks:
+ items:
+ - description: ahb clock for the q6sstopCC
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ q6sstopcc: clock-controller@7500000 {
+ compatible = "qcom,qcs404-q6sstopcc";
+ reg = <0x07500000 0x4e000>, <0x07550000 0x10000>;
+ clocks = <&gcc 141>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml b/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml
new file mode 100644
index 000000000000..61473385da2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qca8k-nsscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm NSS Clock & Reset Controller on QCA8386/QCA8084
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Luo Jie <quic_luoj@quicinc.com>
+
+description: |
+ Qualcomm NSS clock control module provides the clocks and resets
+ on QCA8386(switch mode)/QCA8084(PHY mode)
+
+ See also::
+ include/dt-bindings/clock/qcom,qca8k-nsscc.h
+ include/dt-bindings/reset/qcom,qca8k-nsscc.h
+
+properties:
+ compatible:
+ oneOf:
+ - const: qcom,qca8084-nsscc
+ - items:
+ - enum:
+ - qcom,qca8082-nsscc
+ - qcom,qca8085-nsscc
+ - qcom,qca8384-nsscc
+ - qcom,qca8385-nsscc
+ - qcom,qca8386-nsscc
+ - const: qcom,qca8084-nsscc
+
+ clocks:
+ items:
+ - description: Chip reference clock source
+ - description: UNIPHY0 RX 312P5M/125M clock source
+ - description: UNIPHY0 TX 312P5M/125M clock source
+ - description: UNIPHY1 RX 312P5M/125M clock source
+ - description: UNIPHY1 TX 312P5M/125M clock source
+ - description: UNIPHY1 RX 312P5M clock source
+ - description: UNIPHY1 TX 312P5M clock source
+
+ reg:
+ items:
+ - description: MDIO bus address for Clock & Reset Controller register
+
+ reset-gpios:
+ description: GPIO connected to the chip
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - reg
+ - reset-gpios
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@18 {
+ compatible = "qcom,qca8084-nsscc";
+ reg = <0x18>;
+ reset-gpios = <&tlmm 51 GPIO_ACTIVE_LOW>;
+ clocks = <&pcs0_pll>,
+ <&qca8k_uniphy0_rx>,
+ <&qca8k_uniphy0_tx>,
+ <&qca8k_uniphy1_rx>,
+ <&qca8k_uniphy1_tx>,
+ <&qca8k_uniphy1_rx312p5m>,
+ <&qca8k_uniphy1_tx312p5m>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcm2290-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcm2290-dispcc.yaml
new file mode 100644
index 000000000000..4a533b45eec2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcm2290-dispcc.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcm2290-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on QCM2290
+
+maintainers:
+ - Loic Poulain <loic.poulain@linaro.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on qcm2290.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-qcm2290.h
+
+properties:
+ compatible:
+ const: qcom,qcm2290-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active-only XO source
+ - description: GPLL0 source from GCC
+ - description: GPLL0 div source from GCC
+ - description: Byte clock from DSI PHY
+ - description: Pixel clock from DSI PHY
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: gcc_disp_gpll0_clk_src
+ - const: gcc_disp_gpll0_div_clk_src
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-qcm2290.h>
+ #include <dt-bindings/clock/qcom,gcc-qcm2290.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@5f00000 {
+ compatible = "qcom,qcm2290-dispcc";
+ reg = <0x5f00000 0x20000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&rpmcc RPM_SMD_XO_A_CLK_SRC>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>;
+ clock-names = "bi_tcxo",
+ "bi_tcxo_ao",
+ "gcc_disp_gpll0_clk_src",
+ "gcc_disp_gpll0_div_clk_src",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml
new file mode 100644
index 000000000000..734880805c1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcm2290-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on QCM2290
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm graphics clock control module provides the clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also::
+ include/dt-bindings/clock/qcom,qcm2290-gpucc.h
+
+properties:
+ compatible:
+ const: qcom,qcm2290-gpucc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: AHB interface clock,
+ - description: SoC CXO clock
+ - description: GPLL0 main branch source
+ - description: GPLL0 div branch source
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the CX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required CX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-qcm2290.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,qcm2290-gpucc";
+ reg = <0x0 0x05990000 0x0 0x9000>;
+ clocks = <&gcc GCC_GPU_CFG_AHB_CLK>,
+ <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+ power-domains = <&rpmpd QCM2290_VDDCX>;
+ required-opps = <&rpmpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs404-turingcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs404-turingcc.yaml
new file mode 100644
index 000000000000..033e010754a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs404-turingcc.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs404-turingcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Turing Clock & Reset Controller on QCS404
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+properties:
+ compatible:
+ const: qcom,qcs404-turingcc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-qcs404.h>
+ clock-controller@800000 {
+ compatible = "qcom,qcs404-turingcc";
+ reg = <0x00800000 0x30000>;
+ clocks = <&gcc GCC_CDSP_CFG_AHB_CLK>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs615-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs615-dispcc.yaml
new file mode 100644
index 000000000000..d566f19beb0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs615-dispcc.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs615-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on QCS615
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on QCS615.
+
+ See also: include/dt-bindings/clock/qcom,qcs615-dispcc.h
+
+properties:
+ compatible:
+ const: qcom,qcs615-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 clock source from GCC
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Pixel clock from DSI PHY1
+ - description: Display port PLL link clock
+ - description: Display port PLL VCO DIV clock
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,qcs615-gcc.h>
+ clock-controller@af00000 {
+ compatible = "qcom,qcs615-dispcc";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>,
+ <&mdss_dsi0_phy 0>,
+ <&mdss_dsi0_phy 1>,
+ <&mdss_dsi1_phy 0>,
+ <&mdss_dp_phy 0>,
+ <&mdss_dp_vco 0>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml
new file mode 100644
index 000000000000..4a828e102d25
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs615-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on QCS615
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on QCS615.
+
+ See also: include/dt-bindings/clock/qcom,qcs615-gcc.h
+
+properties:
+ compatible:
+ const: qcom,qcs615-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,qcs615-gcc";
+ reg = <0x00100000 0x1f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs615-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs615-gpucc.yaml
new file mode 100644
index 000000000000..5f7d83d1a7be
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs615-gpucc.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs615-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on QCS615
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm graphics clock control module provides clocks, resets and power
+ domains on QCS615 Qualcomm SoCs.
+
+ See also: include/dt-bindings/clock/qcom,qcs615-gpucc.h
+
+properties:
+ compatible:
+ const: qcom,qcs615-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 GPUCC div branch source
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,qcs615-gcc.h>
+
+ clock-controller@5090000 {
+ compatible = "qcom,qcs615-gpucc";
+ reg = <0x5090000 0x9000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GPLL0>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs615-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs615-videocc.yaml
new file mode 100644
index 000000000000..f51b69de1047
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs615-videocc.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs615-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller on QCS615
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm video clock control module provides clocks, resets and power
+ domains on QCS615 Qualcomm SoCs.
+
+ See also: include/dt-bindings/clock/qcom,qcs615-videocc.h
+
+properties:
+ compatible:
+ const: qcom,qcs615-videocc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,qcs615-gcc.h>
+
+ clock-controller@ab00000 {
+ compatible = "qcom,qcs615-videocc";
+ reg = <0xab00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs8300-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs8300-gcc.yaml
new file mode 100644
index 000000000000..081bc452081f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qcs8300-gcc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qcs8300-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. Global Clock & Reset Controller on QCS8300
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Imran Shaik <quic_imrashai@quicinc.com>
+
+description: |
+ Qualcomm Technologies, Inc. Global clock control module provides the clocks, resets and
+ power domains on QCS8300
+
+ See also: include/dt-bindings/clock/qcom,qcs8300-gcc.h
+
+properties:
+ compatible:
+ const: qcom,qcs8300-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 1 Pipe clock source
+ - description: PCIE Phy Auxiliary clock source
+ - description: First EMAC controller reference clock
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,qcs8300-gcc";
+ reg = <0x00100000 0xc7018>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&pcie_0_pipe_clk>,
+ <&pcie_1_pipe_clk>,
+ <&pcie_phy_aux_clk>,
+ <&rxc0_ref_clk>,
+ <&ufs_phy_rx_symbol_0_clk>,
+ <&ufs_phy_rx_symbol_1_clk>,
+ <&ufs_phy_tx_symbol_0_clk>,
+ <&usb3_phy_wrapper_gcc_usb30_prim_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,qdu1000-ecpricc.yaml b/Documentation/devicetree/bindings/clock/qcom,qdu1000-ecpricc.yaml
new file mode 100644
index 000000000000..3038307ff2c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qdu1000-ecpricc.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qdu1000-ecpricc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm ECPRI Clock & Reset Controller for QDU1000 and QRU1000
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Imran Shaik <quic_imrashai@quicinc.com>
+
+description: |
+ Qualcomm ECPRI Specification V2.0 Common Public Radio Interface clock control
+ module which supports the clocks, resets on QDU1000 and QRU1000
+
+ See also: include/dt-bindings/clock/qcom,qdu1000-ecpricc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qdu1000-ecpricc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: GPLL1 source from GCC
+ - description: GPLL2 source from GCC
+ - description: GPLL3 source from GCC
+ - description: GPLL4 source from GCC
+ - description: GPLL5 source from GCC
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,qdu1000-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@280000 {
+ compatible = "qcom,qdu1000-ecpricc";
+ reg = <0x00280000 0x31c00>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_ECPRI_CC_GPLL0_CLK_SRC>,
+ <&gcc GCC_ECPRI_CC_GPLL1_EVEN_CLK_SRC>,
+ <&gcc GCC_ECPRI_CC_GPLL2_EVEN_CLK_SRC>,
+ <&gcc GCC_ECPRI_CC_GPLL3_CLK_SRC>,
+ <&gcc GCC_ECPRI_CC_GPLL4_CLK_SRC>,
+ <&gcc GCC_ECPRI_CC_GPLL5_EVEN_CLK_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,qdu1000-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qdu1000-gcc.yaml
new file mode 100644
index 000000000000..2c5a9ef4fe4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,qdu1000-gcc.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,qdu1000-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller for QDU1000 and QRU1000
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Imran Shaik <quic_imrashai@quicinc.com>
+
+description: |
+ Qualcomm global clock control module which supports the clocks, resets and
+ power domains on QDU1000 and QRU1000
+
+ See also: include/dt-bindings/clock/qcom,qdu1000-gcc.h
+
+properties:
+ compatible:
+ const: qcom,qdu1000-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 0 Phy Auxiliary clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,qdu1000-gcc";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>,
+ <&pcie_0_pipe_clk>, <&pcie_0_phy_aux_clk>,
+ <&usb3_phy_wrapper_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt b/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
deleted file mode 100644
index 4491d1c104aa..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Qualcomm RPM Clock Controller Binding
-------------------------------------------------
-The RPM is a dedicated hardware engine for managing the shared
-SoC resources in order to keep the lowest power profile. It
-communicates with other hardware subsystems via shared memory
-and accepts clock requests, aggregates the requests and turns
-the clocks on/off or scales them on demand.
-
-Required properties :
-- compatible : shall contain only one of the following. The generic
- compatible "qcom,rpmcc" should be also included.
-
- "qcom,rpmcc-msm8660", "qcom,rpmcc"
- "qcom,rpmcc-apq8060", "qcom,rpmcc"
- "qcom,rpmcc-msm8916", "qcom,rpmcc"
- "qcom,rpmcc-msm8974", "qcom,rpmcc"
- "qcom,rpmcc-apq8064", "qcom,rpmcc"
- "qcom,rpmcc-msm8996", "qcom,rpmcc"
-
-- #clock-cells : shall contain 1
-
-The clock enumerators are defined in <dt-bindings/clock/qcom,rpmcc.h>
-and come in pairs: FOO_CLK followed by FOO_A_CLK. The latter clock
-is an "active" clock, which means that the consumer only care that the
-clock is available when the apps CPU subsystem is active, i.e. not
-suspended or in deep idle. If it is important that the clock keeps running
-during system suspend, you need to specify the non-active clock, the one
-not containing *_A_* in the enumerator name.
-
-Example:
- smd {
- compatible = "qcom,smd";
-
- rpm {
- interrupts = <0 168 1>;
- qcom,ipc = <&apcs 8 0>;
- qcom,smd-edge = <15>;
-
- rpm_requests {
- compatible = "qcom,rpm-msm8916";
- qcom,smd-channels = "rpm_requests";
-
- rpmcc: clock-controller {
- compatible = "qcom,rpmcc-msm8916", "qcom,rpmcc";
- #clock-cells = <1>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml
new file mode 100644
index 000000000000..90cd3feab5fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,rpmcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm RPM Clock Controller
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+ - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+description: |
+ The clock enumerators are defined in <dt-bindings/clock/qcom,rpmcc.h> and
+ come in pairs:: FOO_CLK followed by FOO_A_CLK. The latter clock is
+ an "active" clock, which means that the consumer only care that the clock is
+ available when the apps CPU subsystem is active, i.e. not suspended or in
+ deep idle. If it is important that the clock keeps running during system
+ suspend, you need to specify the non-active clock, the one not containing
+ *_A_* in the enumerator name.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,rpmcc-apq8060
+ - qcom,rpmcc-apq8064
+ - qcom,rpmcc-ipq806x
+ - qcom,rpmcc-mdm9607
+ - qcom,rpmcc-msm8226
+ - qcom,rpmcc-msm8660
+ - qcom,rpmcc-msm8909
+ - qcom,rpmcc-msm8916
+ - qcom,rpmcc-msm8917
+ - qcom,rpmcc-msm8936
+ - qcom,rpmcc-msm8937
+ - qcom,rpmcc-msm8940
+ - qcom,rpmcc-msm8953
+ - qcom,rpmcc-msm8974
+ - qcom,rpmcc-msm8976
+ - qcom,rpmcc-msm8992
+ - qcom,rpmcc-msm8994
+ - qcom,rpmcc-msm8996
+ - qcom,rpmcc-msm8998
+ - qcom,rpmcc-qcm2290
+ - qcom,rpmcc-qcs404
+ - qcom,rpmcc-sdm429
+ - qcom,rpmcc-sdm660
+ - qcom,rpmcc-sm6115
+ - qcom,rpmcc-sm6125
+ - qcom,rpmcc-sm6375
+ - const: qcom,rpmcc
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ maxItems: 2
+
+required:
+ - compatible
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,rpmcc-apq8060
+ - qcom,rpmcc-ipq806x
+ - qcom,rpmcc-msm8660
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: pxo clock
+
+ clock-names:
+ items:
+ - const: pxo
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,rpmcc-apq8064
+ then:
+ properties:
+ clocks:
+ items:
+ - description: pxo clock
+ - description: cxo clock
+
+ clock-names:
+ items:
+ - const: pxo
+ - const: cxo
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,rpmcc-mdm9607
+ - qcom,rpmcc-msm8226
+ - qcom,rpmcc-msm8916
+ - qcom,rpmcc-msm8917
+ - qcom,rpmcc-msm8936
+ - qcom,rpmcc-msm8937
+ - qcom,rpmcc-msm8940
+ - qcom,rpmcc-msm8953
+ - qcom,rpmcc-msm8974
+ - qcom,rpmcc-msm8976
+ - qcom,rpmcc-msm8992
+ - qcom,rpmcc-msm8994
+ - qcom,rpmcc-msm8996
+ - qcom,rpmcc-msm8998
+ - qcom,rpmcc-qcm2290
+ - qcom,rpmcc-qcs404
+ - qcom,rpmcc-sdm429
+ - qcom,rpmcc-sdm660
+ - qcom,rpmcc-sm6115
+ - qcom,rpmcc-sm6125
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: xo clock
+
+ clock-names:
+ items:
+ - const: xo
+
+additionalProperties: false
+
+examples:
+ - |
+ rpm {
+ rpm-requests {
+ compatible = "qcom,rpm-msm8916", "qcom,smd-rpm";
+ qcom,smd-channels = "rpm_requests";
+
+ clock-controller {
+ compatible = "qcom,rpmcc-msm8916", "qcom,rpmcc";
+ #clock-cells = <1>;
+ };
+ };
+ };
+
+ - |
+ rpm {
+ clock-controller {
+ compatible = "qcom,rpmcc-ipq806x", "qcom,rpmcc";
+ #clock-cells = <1>;
+ clocks = <&pxo_board>;
+ clock-names = "pxo";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
deleted file mode 100644
index 3c007653da31..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Qualcomm Technologies, Inc. RPMh Clocks
--------------------------------------------------------
-
-Resource Power Manager Hardened (RPMh) manages shared resources on
-some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
-other hardware subsystems via RSC to control clocks.
-
-Required properties :
-- compatible : shall contain "qcom,sdm845-rpmh-clk"
-
-- #clock-cells : must contain 1
-
-Example :
-
-#include <dt-bindings/clock/qcom,rpmh.h>
-
- &apps_rsc {
- rpmhcc: clock-controller {
- compatible = "qcom,sdm845-rpmh-clk";
- #clock-cells = <1>;
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
new file mode 100644
index 000000000000..78fa05726685
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,rpmhcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. RPMh Clocks
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Resource Power Manager Hardened (RPMh) manages shared resources on
+ some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+ other hardware subsystems via RSC to control clocks.
+
+properties:
+ compatible:
+ enum:
+ - qcom,glymur-rpmh-clk
+ - qcom,milos-rpmh-clk
+ - qcom,qcs615-rpmh-clk
+ - qcom,qdu1000-rpmh-clk
+ - qcom,sa8775p-rpmh-clk
+ - qcom,sar2130p-rpmh-clk
+ - qcom,sc7180-rpmh-clk
+ - qcom,sc7280-rpmh-clk
+ - qcom,sc8180x-rpmh-clk
+ - qcom,sc8280xp-rpmh-clk
+ - qcom,sdm670-rpmh-clk
+ - qcom,sdm845-rpmh-clk
+ - qcom,sdx55-rpmh-clk
+ - qcom,sdx65-rpmh-clk
+ - qcom,sdx75-rpmh-clk
+ - qcom,sm4450-rpmh-clk
+ - qcom,sm6350-rpmh-clk
+ - qcom,sm8150-rpmh-clk
+ - qcom,sm8250-rpmh-clk
+ - qcom,sm8350-rpmh-clk
+ - qcom,sm8450-rpmh-clk
+ - qcom,sm8550-rpmh-clk
+ - qcom,sm8650-rpmh-clk
+ - qcom,sm8750-rpmh-clk
+ - qcom,x1e80100-rpmh-clk
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: xo
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Example for GCC for SDM845: The below node should be defined inside
+ # &apps_rsc node.
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ rpmhcc: clock-controller {
+ compatible = "qcom,sdm845-rpmh-clk";
+ clocks = <&xo_board>;
+ clock-names = "xo";
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml
new file mode 100644
index 000000000000..f42ccb6627a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sa8775p-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SA8775P
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Imran Shaik <quic_imrashai@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SA8775p.
+
+ See also:
+ include/dt-bindings/clock/qcom,qcs8300-camcc.h
+ include/dt-bindings/clock/qcom,sa8775p-camcc.h
+ include/dt-bindings/clock/qcom,sc8280xp-camcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcs8300-camcc
+ - qcom,sa8775p-camcc
+ - qcom,sc8280xp-camcc
+
+ clocks:
+ items:
+ - description: Camera AHB clock from GCC
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ power-domains:
+ maxItems: 1
+ description: MMCX power domain
+
+ required-opps:
+ description:
+ OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,sc8280xp-camcc
+ then:
+ required:
+ - required-opps
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ #include <dt-bindings/clock/qcom,sa8775p-gcc.h>
+ clock-controller@ade0000 {
+ compatible = "qcom,sa8775p-camcc";
+ reg = <0x0ade0000 0x20000>;
+ clocks = <&gcc GCC_CAMERA_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd SA8775P_MMCX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml
new file mode 100644
index 000000000000..ce61755e62d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sa8775p-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SA8775P
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SA8775P.
+
+ See also: include/dt-bindings/clock/qcom,sa8775p-dispcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sa8775p-dispcc0
+ - qcom,sa8775p-dispcc1
+
+ clocks:
+ items:
+ - description: GCC AHB clock source
+ - description: Board XO source
+ - description: Board XO_AO source
+ - description: Sleep clock source
+ - description: Link clock from DP0 PHY
+ - description: VCO DIV clock from DP0 PHY
+ - description: Link clock from DP1 PHY
+ - description: VCO DIV clock from DP1 PHY
+ - description: Byte clock from DSI0 PHY
+ - description: Pixel clock from DSI0 PHY
+ - description: Byte clock from DSI1 PHY
+ - description: Pixel clock from DSI1 PHY
+
+ power-domains:
+ maxItems: 1
+ description: MMCX power domain
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ #include <dt-bindings/clock/qcom,sa8775p-gcc.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sa8775p-dispcc0";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&gcc GCC_DISP_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&dp_phy0 0>,
+ <&dp_phy0 1>,
+ <&dp_phy1 2>,
+ <&dp_phy1 3>,
+ <&dsi_phy0 0>,
+ <&dsi_phy0 1>,
+ <&dsi_phy1 2>,
+ <&dsi_phy1 3>;
+ power-domains = <&rpmhpd SA8775P_MMCX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-gcc.yaml
new file mode 100644
index 000000000000..c641aac8c451
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-gcc.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sa8775p-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on sa8775p
+
+maintainers:
+ - Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and
+ power domains on sa8775p.
+
+ See also: include/dt-bindings/clock/qcom,sa8775p-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sa8775p-gcc
+
+ clocks:
+ items:
+ - description: XO reference clock
+ - description: Sleep clock
+ - description: UFS memory first RX symbol clock
+ - description: UFS memory second RX symbol clock
+ - description: UFS memory first TX symbol clock
+ - description: UFS card first RX symbol clock
+ - description: UFS card second RX symbol clock
+ - description: UFS card first TX symbol clock
+ - description: Primary USB3 PHY wrapper pipe clock
+ - description: Secondary USB3 PHY wrapper pipe clock
+ - description: PCIe 0 pipe clock
+ - description: PCIe 1 pipe clock
+ - description: PCIe PHY clock
+ - description: First EMAC controller reference clock
+ - description: Second EMAC controller reference clock
+
+ protected-clocks:
+ maxItems: 240
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ gcc: clock-controller@100000 {
+ compatible = "qcom,sa8775p-gcc";
+ reg = <0x100000 0xc7018>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&ufs_phy_rx_symbol_0_clk>,
+ <&ufs_phy_rx_symbol_1_clk>,
+ <&ufs_phy_tx_symbol_0_clk>,
+ <&ufs_card_rx_symbol_0_clk>,
+ <&ufs_card_rx_symbol_1_clk>,
+ <&ufs_card_tx_symbol_0_clk>,
+ <&usb_0_ssphy>,
+ <&usb_1_ssphy>,
+ <&pcie_0_pipe_clk>,
+ <&pcie_1_pipe_clk>,
+ <&pcie_phy_pipe_clk>,
+ <&rxc0_ref_clk>,
+ <&rxc1_ref_clk>;
+ power-domains = <&rpmhpd SA8775P_CX>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml
new file mode 100644
index 000000000000..07e5d811d816
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sa8775p-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller on SA8775P
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on SA8775P.
+
+ See also: include/dt-bindings/clock/qcom,sa8775p-videocc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcs8300-videocc
+ - qcom,sa8775p-videocc
+
+ clocks:
+ items:
+ - description: Video AHB clock from GCC
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep Clock source
+
+ power-domains:
+ maxItems: 1
+ description: MMCX power domain
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ #include <dt-bindings/clock/qcom,sa8775p-gcc.h>
+ videocc: clock-controller@abf0000 {
+ compatible = "qcom,sa8775p-videocc";
+ reg = <0x0abf0000 0x10000>;
+ clocks = <&gcc GCC_VIDEO_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd SA8775P_MMCX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sar2130p-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sar2130p-gcc.yaml
new file mode 100644
index 000000000000..9a430bbd872a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sar2130p-gcc.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sar2130p-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on sar2130p
+
+maintainers:
+ - Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and
+ power domains on sar2130p.
+
+ See also: include/dt-bindings/clock/qcom,sar2130p-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sar2130p-gcc
+
+ clocks:
+ items:
+ - description: XO reference clock
+ - description: Sleep clock
+ - description: PCIe 0 pipe clock
+ - description: PCIe 1 pipe clock
+ - description: Primary USB3 PHY wrapper pipe clock
+
+ protected-clocks:
+ maxItems: 240
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+
+ gcc: clock-controller@100000 {
+ compatible = "qcom,sar2130p-gcc";
+ reg = <0x100000 0x1f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>,
+ <&pcie_0_pipe_clk>,
+ <&pcie_1_pipe_clk>,
+ <&usb_0_ssphy>;
+ power-domains = <&rpmhpd RPMHPD_CX>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml
new file mode 100644
index 000000000000..98ee9be84794
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7180-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SC7180
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SC7180.
+
+ See also: include/dt-bindings/clock/qcom,camcc-sc7180.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,sc7180-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Camera_ahb clock from GCC
+ - description: Camera XO clock from GCC
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: iface
+ - const: xo
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc7180.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sc7180-camcc";
+ reg = <0x0ad00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_CAMERA_AHB_CLK>,
+ <&gcc GCC_CAMERA_XO_CLK>;
+ clock-names = "bi_tcxo", "iface", "xo";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml
new file mode 100644
index 000000000000..f147d06ad2ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7180-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SC7180
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SC7180.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sc7180.h
+
+properties:
+ compatible:
+ const: qcom,sc7180-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: Byte clock from DSI PHY
+ - description: Pixel clock from DSI PHY
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: gcc_disp_gpll0_clk_src
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dp_phy_pll_link_clk
+ - const: dp_phy_pll_vco_div_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc7180.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sc7180-dispcc";
+ reg = <0x0af00000 0x200000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&dsi_phy 0>,
+ <&dsi_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>;
+ clock-names = "bi_tcxo",
+ "gcc_disp_gpll0_clk_src",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dp_phy_pll_link_clk",
+ "dp_phy_pll_vco_div_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml
new file mode 100644
index 000000000000..ad360debef7c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7180-lpasscorecc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Core Clock Controller on SC7180
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm LPASS core clock control module provides the clocks and power
+ domains on SC7180.
+
+ See also: include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc7180-lpasshm
+ - qcom,sc7180-lpasscorecc
+
+ clocks:
+ items:
+ - description: gcc_lpass_sway clock from GCC
+ - description: Board XO source
+
+ clock-names:
+ items:
+ - const: iface
+ - const: bi_tcxo
+
+ power-domains:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ reg:
+ minItems: 1
+ items:
+ - description: lpass core cc register
+ - description: lpass audio cc register
+
+ reg-names:
+ items:
+ - const: lpass_core_cc
+ - const: lpass_audio_cc
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,sc7180-lpasshm
+then:
+ properties:
+ reg:
+ maxItems: 1
+
+else:
+ properties:
+ reg:
+ minItems: 2
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7180.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7180.h>
+ clock-controller@63000000 {
+ compatible = "qcom,sc7180-lpasshm";
+ reg = <0x63000000 0x28>;
+ clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "bi_tcxo";
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7180.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7180.h>
+ clock-controller@62d00000 {
+ compatible = "qcom,sc7180-lpasscorecc";
+ reg = <0x62d00000 0x50000>, <0x62780000 0x30000>;
+ reg-names = "lpass_core_cc", "lpass_audio_cc";
+ clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "bi_tcxo";
+ power-domains = <&lpass_hm LPASS_CORE_HM_GDSCR>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml
new file mode 100644
index 000000000000..2f28be58e82e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7280-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SC7280
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and
+ power domains on SC7280.
+
+ See also: include/dt-bindings/clock/qcom,camcc-sc7280.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,sc7280-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO active source
+ - description: Sleep clock source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: sleep_clk
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sc7280-camcc";
+ reg = <0x0ad00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml
new file mode 100644
index 000000000000..95b1e4f48c4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7280-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SC7280
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SC7280.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sc7280.h
+
+properties:
+ compatible:
+ const: qcom,sc7280-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: Byte clock from DSI PHY
+ - description: Pixel clock from DSI PHY
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+ - description: Link clock from EDP PHY
+ - description: VCO DIV clock from EDP PHY
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: gcc_disp_gpll0_clk
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dp_phy_pll_link_clk
+ - const: dp_phy_pll_vco_div_clk
+ - const: edp_phy_pll_link_clk
+ - const: edp_phy_pll_vco_div_clk
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sc7280-dispcc";
+ reg = <0x0af00000 0x200000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&dsi_phy 0>,
+ <&dsi_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>,
+ <&edp_phy 0>,
+ <&edp_phy 1>;
+ clock-names = "bi_tcxo",
+ "gcc_disp_gpll0_clk",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dp_phy_pll_link_clk",
+ "dp_phy_pll_vco_div_clk",
+ "edp_phy_pll_link_clk",
+ "edp_phy_pll_vco_div_clk";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml
new file mode 100644
index 000000000000..a90961d8656c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7280-lpasscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Core Clock Controller on SC7280
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm LPASS core clock control module provides the clocks and power
+ domains on SC7280.
+
+ See also: include/dt-bindings/clock/qcom,lpass-sc7280.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc7280-lpasscc
+
+ clocks:
+ items:
+ - description: gcc_cfg_noc_lpass_clk from GCC
+
+ clock-names:
+ items:
+ - const: iface
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ items:
+ - description: LPASS qdsp6ss register
+ - description: LPASS top-cc register
+
+ reg-names:
+ items:
+ - const: qdsp6ss
+ - const: top_cc
+
+ qcom,adsp-pil-mode:
+ description:
+ Indicates if the LPASS would be brought out of reset using
+ remoteproc peripheral loader.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpass-sc7280.h>
+ clock-controller@3000000 {
+ compatible = "qcom,sc7280-lpasscc";
+ reg = <0x03000000 0x40>, <0x03c04000 0x4>;
+ reg-names = "qdsp6ss", "top_cc";
+ clocks = <&gcc GCC_CFG_NOC_LPASS_CLK>;
+ clock-names = "iface";
+ qcom,adsp-pil-mode;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscorecc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscorecc.yaml
new file mode 100644
index 000000000000..99ab9106009f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscorecc.yaml
@@ -0,0 +1,202 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7280-lpasscorecc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Core & Audio Clock Controller on SC7280
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm LPASS core and audio clock control module provides the clocks and
+ power domains on SC7280.
+
+ See also::
+ include/dt-bindings/clock/qcom,lpasscorecc-sc7280.h
+ include/dt-bindings/clock/qcom,lpassaudiocc-sc7280.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcm6490-lpassaudiocc
+ - qcom,sc7280-lpassaoncc
+ - qcom,sc7280-lpassaudiocc
+ - qcom,sc7280-lpasscorecc
+ - qcom,sc7280-lpasshm
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+ '#clock-cells':
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ qcom,adsp-pil-mode:
+ description:
+ Indicates if the LPASS would be brought out of reset using
+ peripheral loader.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcm6490-lpassaudiocc
+ - qcom,sc7280-lpassaudiocc
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: LPASS_AON_CC_MAIN_RCG_CLK_SRC
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: lpass_aon_cc_main_rcg_clk_src
+
+ reg:
+ items:
+ - description: lpass core cc register
+ - description: lpass audio csr register
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7280-lpassaoncc
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO active only source
+ - description: LPASS_AON_CC_MAIN_RCG_CLK_SRC
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+ - const: iface
+
+ reg:
+ maxItems: 1
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7280-lpasshm
+ - qcom,sc7280-lpasscorecc
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+
+ reg:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpassaudiocc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7280.h>
+ lpass_audiocc: clock-controller@3300000 {
+ compatible = "qcom,sc7280-lpassaudiocc";
+ reg = <0x3300000 0x30000>,
+ <0x32a9000 0x1000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&lpass_aon LPASS_AON_CC_MAIN_RCG_CLK_SRC>;
+ clock-names = "bi_tcxo", "lpass_aon_cc_main_rcg_clk_src";
+ power-domains = <&lpass_aon LPASS_AON_CC_LPASS_AUDIO_HM_GDSC>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpassaudiocc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7280.h>
+ lpass_hm: clock-controller@3c00000 {
+ compatible = "qcom,sc7280-lpasshm";
+ reg = <0x3c00000 0x28>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "bi_tcxo";
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpassaudiocc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7280.h>
+ lpasscore: clock-controller@3900000 {
+ compatible = "qcom,sc7280-lpasscorecc";
+ reg = <0x3900000 0x50000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "bi_tcxo";
+ power-domains = <&lpass_hm LPASS_CORE_CC_LPASS_CORE_HM_GDSC>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpassaudiocc-sc7280.h>
+ #include <dt-bindings/clock/qcom,lpasscorecc-sc7280.h>
+ lpass_aon: clock-controller@3380000 {
+ compatible = "qcom,sc7280-lpassaoncc";
+ reg = <0x3380000 0x30000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&rpmhcc RPMH_CXO_CLK_A>,
+ <&lpasscore LPASS_CORE_CC_CORE_CLK>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao","iface";
+ qcom,adsp-pil-mode;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc8180x-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc8180x-camcc.yaml
new file mode 100644
index 000000000000..477ee687520e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc8180x-camcc.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc8180x-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SC8180X
+
+maintainers:
+ - Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and
+ power domains on SC8180X.
+
+ See also: include/dt-bindings/clock/qcom,sc8180x-camcc.h
+
+properties:
+ compatible:
+ const: qcom,sc8180x-camcc
+
+ clocks:
+ items:
+ - description: Camera AHB clock from GCC
+ - description: Board XO source
+ - description: Sleep clock source
+
+ power-domains:
+ maxItems: 1
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+
+ required-opps:
+ maxItems: 1
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - required-opps
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sc8180x.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sc8180x-camcc";
+ reg = <0x0ad00000 0x20000>;
+ clocks = <&gcc GCC_CAMERA_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd SC8180X_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc8280xp-lpasscc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc8280xp-lpasscc.yaml
new file mode 100644
index 000000000000..273d66e245c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc8280xp-lpasscc.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc8280xp-lpasscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Core & Audio Clock Controller on SC8280XP
+
+maintainers:
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+ Qualcomm LPASS core and audio clock control module provides the clocks,
+ and reset on SC8280XP.
+
+ See also::
+ include/dt-bindings/clock/qcom,lpasscc-sc8280xp.h
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - qcom,sc8280xp-lpassaudiocc
+ - qcom,sc8280xp-lpasscc
+ - items:
+ - const: qcom,x1e80100-lpassaudiocc
+ - const: qcom,sc8280xp-lpassaudiocc
+ - items:
+ - const: qcom,x1e80100-lpasscc
+ - const: qcom,sc8280xp-lpasscc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sc8280xp-lpasscc.h>
+ lpass_audiocc: clock-controller@32a9000 {
+ compatible = "qcom,sc8280xp-lpassaudiocc";
+ reg = <0x032a9000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/clock/qcom,sc8280xp-lpasscc.h>
+ lpasscc: clock-controller@33e0000 {
+ compatible = "qcom,sc8280xp-lpasscc";
+ reg = <0x033e0000 0x12000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-camcc.yaml
new file mode 100644
index 000000000000..6214e41eec1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-camcc.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sdm845-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SDM845
+
+maintainers:
+ - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SDM845.
+
+ See also: include/dt-bindings/clock/qcom,camcc-sm845.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: qcom,sdm670-camcc
+ - const: qcom,sdm845-camcc
+ - const: qcom,sdm845-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sdm845-camcc";
+ reg = <0x0ad00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "bi_tcxo";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml
new file mode 100644
index 000000000000..854c391c8307
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sdm845-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SDM845
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SDM845.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sdm845.h
+
+properties:
+ compatible:
+ const: qcom,sdm845-dispcc
+
+ # NOTE: sdm845.dtsi existed for quite some time and specified no clocks.
+ # The code had to use hardcoded mechanisms to find the input clocks.
+ # New dts files should have these clocks.
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: GPLL0 div source from GCC
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY1
+ - description: Pixel clock from DSI PHY1
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: gcc_disp_gpll0_clk_src
+ - const: gcc_disp_gpll0_div_clk_src
+ - const: dsi0_phy_pll_out_byteclk
+ - const: dsi0_phy_pll_out_dsiclk
+ - const: dsi1_phy_pll_out_byteclk
+ - const: dsi1_phy_pll_out_dsiclk
+ - const: dp_link_clk_divsel_ten
+ - const: dp_vco_divided_clk_src_mux
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sdm845-dispcc";
+ reg = <0x0af00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&dsi1_phy 0>,
+ <&dsi1_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>;
+ clock-names = "bi_tcxo",
+ "gcc_disp_gpll0_clk_src",
+ "gcc_disp_gpll0_div_clk_src",
+ "dsi0_phy_pll_out_byteclk",
+ "dsi0_phy_pll_out_dsiclk",
+ "dsi1_phy_pll_out_byteclk",
+ "dsi1_phy_pll_out_dsiclk",
+ "dp_link_clk_divsel_ten",
+ "dp_vco_divided_clk_src_mux";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-lpasscc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdm845-lpasscc.yaml
new file mode 100644
index 000000000000..f9feb7049b21
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sdm845-lpasscc.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sdm845-lpasscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SDM845 LPASS Clock Controller
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm SDM845 LPASS (Low Power Audio SubSystem) Clock Controller.
+
+ See also: include/dt-bindings/clock/qcom,lpass-sdm845.h
+
+properties:
+ compatible:
+ const: qcom,sdm845-lpasscc
+
+ '#clock-cells':
+ const: 1
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: cc
+ - const: qdsp6ss
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@17014000 {
+ compatible = "qcom,sdm845-lpasscc";
+ reg = <0x17014000 0x1f004>, <0x17300000 0x200>;
+ reg-names = "cc", "qdsp6ss";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,sdx75-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sdx75-gcc.yaml
new file mode 100644
index 000000000000..29a0b29bcb81
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sdx75-gcc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sdx75-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SDX75
+
+maintainers:
+ - Imran Shaik <quic_imrashai@quicinc.com>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SDX75
+
+ See also: include/dt-bindings/clock/qcom,sdx75-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sdx75-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: EMAC0 sgmiiphy mac rclk source
+ - description: EMAC0 sgmiiphy mac tclk source
+ - description: EMAC0 sgmiiphy rclk source
+ - description: EMAC0 sgmiiphy tclk source
+ - description: EMAC1 sgmiiphy mac rclk source
+ - description: EMAC1 sgmiiphy mac tclk source
+ - description: EMAC1 sgmiiphy rclk source
+ - description: EMAC1 sgmiiphy tclk source
+ - description: PCIE20 phy aux clock source
+ - description: PCIE_1 Pipe clock source
+ - description: PCIE_2 Pipe clock source
+ - description: PCIE Pipe clock source
+ - description: USB3 phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@80000 {
+ compatible = "qcom,sdx75-gcc";
+ reg = <0x80000 0x1f7400>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>, <&emac0_sgmiiphy_mac_rclk>,
+ <&emac0_sgmiiphy_mac_tclk>, <&emac0_sgmiiphy_rclk>, <&emac0_sgmiiphy_tclk>,
+ <&emac1_sgmiiphy_mac_rclk>, <&emac1_sgmiiphy_mac_tclk>, <&emac1_sgmiiphy_rclk>,
+ <&emac1_sgmiiphy_tclk>, <&pcie20_phy_aux_clk>, <&pcie_1_pipe_clk>,
+ <&pcie_2_pipe_clk>, <&pcie_pipe_clk>, <&usb3_phy_wrapper_gcc_usb30_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm4450-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm4450-camcc.yaml
new file mode 100644
index 000000000000..70f025b26736
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm4450-camcc.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm4450-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM4450
+
+maintainers:
+ - Ajit Pandey <quic_ajipan@quicinc.com>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SM4450
+
+ See also: include/dt-bindings/clock/qcom,sm4450-camcc.h
+
+properties:
+ compatible:
+ const: qcom,sm4450-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Camera AHB clock source from GCC
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,sm4450-gcc.h>
+ clock-controller@ade0000 {
+ compatible = "qcom,sm4450-camcc";
+ reg = <0x0ade0000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_CAMERA_AHB_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm4450-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm4450-dispcc.yaml
new file mode 100644
index 000000000000..d977788bdc8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm4450-dispcc.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm4450-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SM4450
+
+maintainers:
+ - Ajit Pandey <quic_ajipan@quicinc.com>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM4450
+
+ See also: include/dt-bindings/clock/qcom,sm4450-dispcc.h
+
+properties:
+ compatible:
+ const: qcom,sm4450-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Display AHB clock source from GCC
+ - description: sleep clock source
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/clock/qcom,sm4450-gcc.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm4450-dispcc";
+ reg = <0x0af00000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&sleep_clk>,
+ <&dsi0_phy_pll_out_byteclk>,
+ <&dsi0_phy_pll_out_dsiclk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml
new file mode 100644
index 000000000000..9cfe859bacc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm4450-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM4450
+
+maintainers:
+ - Ajit Pandey <quic_ajipan@quicinc.com>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM4450
+
+ See also: include/dt-bindings/clock/qcom,sm4450-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm4450-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm4450-gcc";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>,
+ <&ufs_mem_phy 0>, <&ufs_mem_phy 1>,
+ <&ufs_mem_phy 2>, <&usb_1_qmpphy>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-dispcc.yaml
new file mode 100644
index 000000000000..b31424306f49
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-dispcc.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6115-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock Controller for SM6115
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks and power domains
+ on SM6115.
+
+ See also: include/dt-bindings/clock/qcom,sm6115-dispcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6115-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board sleep clock
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: GPLL0 DISP DIV clock from GCC
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/clock/qcom,gcc-sm6115.h>
+ clock-controller@5f00000 {
+ compatible = "qcom,sm6115-dispcc";
+ reg = <0x5f00000 0x20000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&sleep_clk>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&gcc GCC_DISP_GPLL0_DIV_CLK_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml
new file mode 100644
index 000000000000..104ba10ca573
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6115-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6115
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also: include/dt-bindings/clock/qcom,sm6115-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6115-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 main div source
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm6115.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6115-gpucc";
+ reg = <0x05990000 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-lpasscc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-lpasscc.yaml
new file mode 100644
index 000000000000..8cbab3fbb660
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-lpasscc.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6115-lpasscc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm LPASS Core & Audio Clock Controller on SM6115
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+ Qualcomm LPASS core and audio clock controllers provide audio-related resets
+ on SM6115 and its derivatives.
+
+ See also::
+ include/dt-bindings/clock/qcom,sm6115-lpasscc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6115-lpassaudiocc
+ - qcom,sm6115-lpasscc
+
+ reg:
+ maxItems: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ lpass_audiocc: clock-controller@a6a9000 {
+ compatible = "qcom,sm6115-lpassaudiocc";
+ reg = <0x0a6a9000 0x1000>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml
new file mode 100644
index 000000000000..12d6f0cdbcd8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6125-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6125
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks and power domains on
+ Qualcomm SoCs.
+
+ See also: include/dt-bindings/clock/qcom,sm6125-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6125-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm6125.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6125-gpucc";
+ reg = <0x05990000 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml
new file mode 100644
index 000000000000..e31cd4300f7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6350-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM6350
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SM6350 and QCS615 SoC.
+
+ See also:
+ include/dt-bindings/clock/qcom,qcs615-camcc.h
+ include/dt-bindings/clock/qcom,sm6350-camcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,qcs615-camcc
+ - qcom,sm6350-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sm6350-camcc";
+ reg = <0x0ad00000 0x16000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml
new file mode 100644
index 000000000000..519ea76cb052
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6375-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller on SM6375
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM6375.
+
+ See also: include/dt-bindings/clock/qcom,dispcc-sm6375.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,sm6375-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 source from GCC
+ - description: Byte clock from DSI PHY
+ - description: Pixel clock from DSI PHY
+
+required:
+ - compatible
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm6375-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ clock-controller@5f00000 {
+ compatible = "qcom,sm6375-dispcc";
+ reg = <0x05f00000 0x20000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&dsi_phy 0>,
+ <&dsi_phy 1>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml
new file mode 100644
index 000000000000..66dfa72fa975
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6375-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM6375
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM6375
+
+ See also: include/dt-bindings/clock/qcom,sm6375-gcc.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,sm6375-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO Active-Only source
+ - description: Sleep clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ clock-controller@1400000 {
+ compatible = "qcom,sm6375-gcc";
+ reg = <0x01400000 0x1f0000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&rpmcc RPM_SMD_XO_A_CLK_SRC>,
+ <&sleep_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml
new file mode 100644
index 000000000000..3aad6b5bb1c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6375-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6375
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also: include/dt-bindings/clock/qcom,sm6375-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6375-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 div branch source
+ - description: SNoC DVM GFX source
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the VDD_GX power rail
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required VDD_GX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - required-opps
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm6375-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6375-gpucc";
+ reg = <0 0x05990000 0 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>,
+ <&gcc GCC_GPU_SNOC_DVM_GFX_CLK>;
+ power-domains = <&rpmpd SM6375_VDDGX>;
+ required-opps = <&rpmpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml
new file mode 100644
index 000000000000..b96091c28c5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm7150-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM7150
+
+maintainers:
+ - Danila Tikhonov <danila@jiaxyga.com>
+ - David Wronek <david@mainlining.org>
+ - Jens Reidel <adrian@travitia.xyz>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SM7150.
+
+ See also: include/dt-bindings/clock/qcom,sm7150-camcc.h
+
+properties:
+ compatible:
+ const: qcom,sm7150-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO Active-Only source
+ - description: Sleep clock source
+
+ power-domains:
+ maxItems: 1
+ description:
+ CX power domain.
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sm7150-camcc";
+ reg = <0xad00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd RPMHPD_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml
new file mode 100644
index 000000000000..13ab3359b592
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm7150-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller for SM7150
+
+maintainers:
+ - Danila Tikhonov <danila@jiaxyga.com>
+ - David Wronek <david@mainlining.org>
+ - Jens Reidel <adrian@travitia.xyz>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM7150.
+
+ See also: include/dt-bindings/clock/qcom,sm7150-dispcc.h
+
+properties:
+ compatible:
+ const: qcom,sm7150-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+ - description: GPLL0 source from GCC
+ - description: Sleep clock source
+ - description: Byte clock from MDSS DSI PHY0
+ - description: Pixel clock from MDSS DSI PHY0
+ - description: Byte clock from MDSS DSI PHY1
+ - description: Pixel clock from MDSS DSI PHY1
+ - description: Link clock from DP PHY
+ - description: VCO DIV clock from DP PHY
+
+ power-domains:
+ maxItems: 1
+ description:
+ CX power domain.
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm7150-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm7150-dispcc";
+ reg = <0x0af00000 0x200000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+ <&sleep_clk>,
+ <&mdss_dsi0_phy 0>,
+ <&mdss_dsi0_phy 1>,
+ <&mdss_dsi1_phy 0>,
+ <&mdss_dsi1_phy 1>,
+ <&dp_phy 0>,
+ <&dp_phy 1>;
+ power-domains = <&rpmhpd RPMHPD_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml
new file mode 100644
index 000000000000..3878808f811e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm7150-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM7150
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Danila Tikhonov <danila@jiaxyga.com>
+ - David Wronek <davidwronek@gmail.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM7150
+
+ See also: include/dt-bindings/clock/qcom,sm7150-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm7150-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO Active-Only source
+ - description: Sleep clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm7150-gcc";
+ reg = <0x00100000 0x001f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml
new file mode 100644
index 000000000000..9f7928730386
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm7150-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller on SM7150
+
+maintainers:
+ - Danila Tikhonov <danila@jiaxyga.com>
+ - David Wronek <david@mainlining.org>
+ - Jens Reidel <adrian@travitia.xyz>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on SM7150.
+
+ See also: include/dt-bindings/clock/qcom,videocc-sm7150.h
+
+properties:
+ compatible:
+ const: qcom,sm7150-videocc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+
+ power-domains:
+ maxItems: 1
+ description:
+ CX power domain.
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ videocc: clock-controller@ab00000 {
+ compatible = "qcom,sm7150-videocc";
+ reg = <0x0ab00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>;
+ power-domains = <&rpmhpd RPMHPD_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml
new file mode 100644
index 000000000000..a55e30a4975e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8150-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM8150
+
+maintainers:
+ - Satya Priya Kakitapalli <quic_skakitap@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and
+ power domains on SM8150.
+
+ See also: include/dt-bindings/clock/qcom,sm8150-camcc.h
+
+properties:
+ compatible:
+ const: qcom,sm8150-camcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Camera AHB clock from GCC
+
+ power-domains:
+ maxItems: 1
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+
+ required-opps:
+ maxItems: 1
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - required-opps
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8150.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ clock-controller@ad00000 {
+ compatible = "qcom,sm8150-camcc";
+ reg = <0x0ad00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_CAMERA_AHB_CLK>;
+ power-domains = <&rpmhpd SM8150_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml
new file mode 100644
index 000000000000..5c2ecec0624e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8350-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM8350 Video Clock & Reset Controller
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also::
+ include/dt-bindings/clock/qcom,videocc-sm8350.h
+ include/dt-bindings/reset/qcom,videocc-sm8350.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc8280xp-videocc
+ - qcom,sm8350-videocc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Board sleep clock
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - required-opps
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+
+ clock-controller@abf0000 {
+ compatible = "qcom,sm8350-videocc";
+ reg = <0x0abf0000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml
new file mode 100644
index 000000000000..c1e06f39431e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8450-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on SM8450
+
+maintainers:
+ - Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org>
+ - Jagadeesh Kona <quic_jkona@quicinc.com>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on SM8450.
+
+ See also:
+ include/dt-bindings/clock/qcom,sm8450-camcc.h
+ include/dt-bindings/clock/qcom,sm8550-camcc.h
+ include/dt-bindings/clock/qcom,sm8650-camcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm8450-camcc
+ - qcom,sm8475-camcc
+ - qcom,sm8550-camcc
+ - qcom,sm8650-camcc
+
+ clocks:
+ items:
+ - description: Camera AHB clock from GCC
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ power-domains:
+ description:
+ Power domains required for the clock controller to operate
+ items:
+ - description: MMCX power domain
+ - description: MXC power domain
+
+ required-opps:
+ description:
+ OPP nodes that describe required performance points on power domains
+ items:
+ - description: MMCX performance point
+ - description: MXC performance point
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc8280xp-camcc
+ - qcom,sm8450-camcc
+ - qcom,sm8550-camcc
+ then:
+ required:
+ - required-opps
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8450.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@ade0000 {
+ compatible = "qcom,sm8450-camcc";
+ reg = <0xade0000 0x20000>;
+ clocks = <&gcc GCC_CAMERA_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>,
+ <&rpmhpd RPMHPD_MXC>;
+ required-opps = <&rpmhpd_opp_low_svs>,
+ <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-dispcc.yaml
new file mode 100644
index 000000000000..bd131a1ff165
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-dispcc.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8450-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller for SM8450
+
+maintainers:
+ - Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM8450.
+
+ See also: include/dt-bindings/clock/qcom,sm8450-dispcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm8450-dispcc
+ - qcom,sm8475-dispcc
+
+ clocks:
+ minItems: 3
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+ - description: Display's AHB clock
+ - description: sleep clock
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY1
+ - description: Pixel clock from DSI PHY1
+ - description: Link clock from DP PHY0
+ - description: VCO DIV clock from DP PHY0
+ - description: Link clock from DP PHY1
+ - description: VCO DIV clock from DP PHY1
+ - description: Link clock from DP PHY2
+ - description: VCO DIV clock from DP PHY2
+ - description: Link clock from DP PHY3
+ - description: VCO DIV clock from DP PHY3
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8450.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm8450-dispcc";
+ reg = <0x0af00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&sleep_clk>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&dsi1_phy 0>,
+ <&dsi1_phy 1>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml
new file mode 100644
index 000000000000..44380f6f8136
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8450-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM8450
+
+maintainers:
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description: |
+ Qualcomm graphics clock control module provides the clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also::
+ include/dt-bindings/clock/qcom,milos-gpucc.h
+ include/dt-bindings/clock/qcom,sar2130p-gpucc.h
+ include/dt-bindings/clock/qcom,sm4450-gpucc.h
+ include/dt-bindings/clock/qcom,sm8450-gpucc.h
+ include/dt-bindings/clock/qcom,sm8550-gpucc.h
+ include/dt-bindings/reset/qcom,sm8450-gpucc.h
+ include/dt-bindings/reset/qcom,sm8650-gpucc.h
+ include/dt-bindings/reset/qcom,x1e80100-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,milos-gpucc
+ - qcom,sar2130p-gpucc
+ - qcom,sm4450-gpucc
+ - qcom,sm8450-gpucc
+ - qcom,sm8475-gpucc
+ - qcom,sm8550-gpucc
+ - qcom,sm8650-gpucc
+ - qcom,x1e80100-gpucc
+ - qcom,x1p42100-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 div branch source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8450.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@3d90000 {
+ compatible = "qcom,sm8450-gpucc";
+ reg = <0 0x03d90000 0 0xa000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-videocc.yaml
new file mode 100644
index 000000000000..fcd2727dae46
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-videocc.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8450-videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller on SM8450
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+ - Jagadeesh Kona <quic_jkona@quicinc.com>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on SM8450.
+
+ See also:
+ include/dt-bindings/clock/qcom,sm8450-videocc.h
+ include/dt-bindings/clock/qcom,sm8650-videocc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm8450-videocc
+ - qcom,sm8475-videocc
+ - qcom,sm8550-videocc
+ - qcom,sm8650-videocc
+ - qcom,x1e80100-videocc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Video AHB clock from GCC
+
+ power-domains:
+ description:
+ Power domains required for the clock controller to operate
+ items:
+ - description: MMCX power domain
+ - description: MXC power domain
+
+ required-opps:
+ description:
+ OPP nodes that describe required performance points on power domains
+ items:
+ - description: MMCX performance point
+ - description: MXC performance point
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8450-videocc
+ - qcom,sm8550-videocc
+ then:
+ required:
+ - required-opps
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm8450.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ videocc: clock-controller@aaf0000 {
+ compatible = "qcom,sm8450-videocc";
+ reg = <0x0aaf0000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&gcc GCC_VIDEO_AHB_CLK>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>,
+ <&rpmhpd RPMHPD_MXC>;
+ required-opps = <&rpmhpd_opp_low_svs>,
+ <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
new file mode 100644
index 000000000000..30e4b4631575
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8550-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller for SM8550
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+ Qualcomm display clock control module provides the clocks, resets and power
+ domains on SM8550, SM8650, SM8750 and few other platforms.
+
+ See also:
+ - include/dt-bindings/clock/qcom,sm8550-dispcc.h
+ - include/dt-bindings/clock/qcom,sm8650-dispcc.h
+ - include/dt-bindings/clock/qcom,sm8750-dispcc.h
+ - include/dt-bindings/clock/qcom,x1e80100-dispcc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sar2130p-dispcc
+ - qcom,sm8550-dispcc
+ - qcom,sm8650-dispcc
+ - qcom,sm8750-dispcc
+ - qcom,x1e80100-dispcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+ - description: Display's AHB clock
+ - description: sleep clock
+ - description: Byte clock from DSI PHY0
+ - description: Pixel clock from DSI PHY0
+ - description: Byte clock from DSI PHY1
+ - description: Pixel clock from DSI PHY1
+ - description: Link clock from DP PHY0
+ - description: VCO DIV clock from DP PHY0
+ - description: Link clock from DP PHY1
+ - description: VCO DIV clock from DP PHY1
+ - description: Link clock from DP PHY2
+ - description: VCO DIV clock from DP PHY2
+ - description: Link clock from DP PHY3
+ - description: VCO DIV clock from DP PHY3
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm8550-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@af00000 {
+ compatible = "qcom,sm8550-dispcc";
+ reg = <0x0af00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&gcc GCC_DISP_AHB_CLK>,
+ <&sleep_clk>,
+ <&dsi0_phy 0>,
+ <&dsi0_phy 1>,
+ <&dsi1_phy 0>,
+ <&dsi1_phy 1>,
+ <&dp0_phy 0>,
+ <&dp0_phy 1>,
+ <&dp1_phy 0>,
+ <&dp1_phy 1>,
+ <&dp2_phy 0>,
+ <&dp2_phy 1>,
+ <&dp3_phy 0>,
+ <&dp3_phy 1>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8550-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8550-gcc.yaml
new file mode 100644
index 000000000000..c4e9b9bb63f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8550-gcc.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8550-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8550
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8550
+
+ See also: include/dt-bindings/clock/qcom,sm8550-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm8550-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 1 Pipe clock source
+ - description: PCIE 1 Phy Auxiliary clock source
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm8550-gcc";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>,
+ <&pcie0_phy>,
+ <&pcie1_phy>,
+ <&pcie_1_phy_aux_clk>,
+ <&ufs_mem_phy 0>,
+ <&ufs_mem_phy 1>,
+ <&ufs_mem_phy 2>,
+ <&usb_1_qmpphy>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml
new file mode 100644
index 000000000000..2c992b3437f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8550-tcsr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm TCSR Clock Controller on SM8550
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Taniya Das <taniya.das@oss.qualcomm.com>
+
+description: |
+ Qualcomm TCSR clock control module provides the clocks, resets and
+ power domains on SM8550
+
+ See also:
+ - include/dt-bindings/clock/qcom,glymur-tcsr.h
+ - include/dt-bindings/clock/qcom,sm8550-tcsr.h
+ - include/dt-bindings/clock/qcom,sm8650-tcsr.h
+ - include/dt-bindings/clock/qcom,sm8750-tcsr.h
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,glymur-tcsr
+ - qcom,milos-tcsr
+ - qcom,sar2130p-tcsr
+ - qcom,sm8550-tcsr
+ - qcom,sm8650-tcsr
+ - qcom,sm8750-tcsr
+ - qcom,x1e80100-tcsr
+ - const: syscon
+
+ clocks:
+ items:
+ - description: TCXO pad clock
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ clock-controller@1fc0000 {
+ compatible = "qcom,sm8550-tcsr", "syscon";
+ reg = <0x1fc0000 0x30000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8650-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8650-gcc.yaml
new file mode 100644
index 000000000000..c7143e2abc80
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8650-gcc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8650-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8650
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8650
+
+ See also: include/dt-bindings/clock/qcom,sm8650-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm8650-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: PCIE 1 Pipe clock source
+ - description: PCIE 1 Phy Auxiliary clock source
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm8650-gcc";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&pcie0_phy>,
+ <&pcie1_phy>,
+ <&pcie_1_phy_aux_clk>,
+ <&ufs_mem_phy 0>,
+ <&ufs_mem_phy 1>,
+ <&ufs_mem_phy 2>,
+ <&usb_1_qmpphy>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8750-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8750-gcc.yaml
new file mode 100644
index 000000000000..aab7039fd28d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm8750-gcc.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm8750-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM8750
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM8750
+
+ See also: include/dt-bindings/clock/qcom,sm8750-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm8750-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board Always On XO source
+ - description: Sleep clock source
+ - description: PCIE 0 Pipe clock source
+ - description: UFS Phy Rx symbol 0 clock source
+ - description: UFS Phy Rx symbol 1 clock source
+ - description: UFS Phy Tx symbol 0 clock source
+ - description: USB3 Phy wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm8750-gcc";
+ reg = <0x00100000 0x001f4200>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>,
+ <&pcie0_phy>,
+ <&ufs_mem_phy 0>,
+ <&ufs_mem_phy 1>,
+ <&ufs_mem_phy 2>,
+ <&usb_1_qmpphy>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.txt b/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.txt
deleted file mode 100644
index 7474aba36607..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-Qualcomm Technologies, Inc. SPMI PMIC clock divider (clkdiv)
-
-clkdiv configures the clock frequency of a set of outputs on the PMIC.
-These clocks are typically wired through alternate functions on
-gpio pins.
-
-=======================
-Properties
-=======================
-
-- compatible
- Usage: required
- Value type: <string>
- Definition: must be "qcom,spmi-clkdiv".
-
-- reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: base address of CLKDIV peripherals.
-
-- qcom,num-clkdivs
- Usage: required
- Value type: <u32>
- Definition: number of CLKDIV peripherals.
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the xo clock.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "xo".
-
-- #clock-cells:
- Usage: required
- Value type: <u32>
- Definition: shall contain 1.
-
-=======
-Example
-=======
-
-pm8998_clk_divs: clock-controller@5b00 {
- compatible = "qcom,spmi-clkdiv";
- reg = <0x5b00>;
- #clock-cells = <1>;
- qcom,num-clkdivs = <3>;
- clocks = <&xo_board>;
- clock-names = "xo";
-
- assigned-clocks = <&pm8998_clk_divs 1>,
- <&pm8998_clk_divs 2>,
- <&pm8998_clk_divs 3>;
- assigned-clock-rates = <9600000>,
- <9600000>,
- <9600000>;
-};
diff --git a/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.yaml b/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.yaml
new file mode 100644
index 000000000000..16c95ad6c9d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,spmi-clkdiv.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,spmi-clkdiv.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SPMI PMIC clock divider
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Stephen Boyd <sboyd@kernel.org>
+
+description: |
+ Qualcomm SPMI PMIC clock divider configures the clock frequency of a set of
+ outputs on the PMIC. These clocks are typically wired through alternate
+ functions on GPIO pins.
+
+properties:
+ compatible:
+ const: qcom,spmi-clkdiv
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Board XO source
+
+ clock-names:
+ items:
+ - const: xo
+
+ "#clock-cells":
+ const: 1
+
+ qcom,num-clkdivs:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Number of CLKDIV peripherals.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - qcom,num-clkdivs
+
+additionalProperties: false
+
+examples:
+ - |
+ pmic {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@5b00 {
+ compatible = "qcom,spmi-clkdiv";
+ reg = <0x5b00>;
+ clocks = <&xo_board>;
+ clock-names = "xo";
+ #clock-cells = <1>;
+ qcom,num-clkdivs = <3>;
+
+ assigned-clocks = <&pm8998_clk_divs 1>,
+ <&pm8998_clk_divs 2>,
+ <&pm8998_clk_divs 3>;
+ assigned-clock-rates = <9600000>,
+ <9600000>,
+ <9600000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,videocc.txt b/Documentation/devicetree/bindings/clock/qcom,videocc.txt
deleted file mode 100644
index e7c035afa778..000000000000
--- a/Documentation/devicetree/bindings/clock/qcom,videocc.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Qualcomm Video Clock & Reset Controller Binding
------------------------------------------------
-
-Required properties :
-- compatible : shall contain "qcom,sdm845-videocc"
-- reg : shall contain base register location and length
-- #clock-cells : from common clock binding, shall contain 1.
-- #power-domain-cells : from generic power domain binding, shall contain 1.
-
-Optional properties :
-- #reset-cells : from common reset binding, shall contain 1.
-
-Example:
- videocc: clock-controller@ab00000 {
- compatible = "qcom,sdm845-videocc";
- reg = <0xab00000 0x10000>;
- #clock-cells = <1>;
- #power-domain-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/qcom,videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,videocc.yaml
new file mode 100644
index 000000000000..f4ff9acef9d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,videocc.yaml
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,videocc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Video Clock & Reset Controller
+
+maintainers:
+ - Taniya Das <quic_tdas@quicinc.com>
+
+description: |
+ Qualcomm video clock control module provides the clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also::
+ include/dt-bindings/clock/qcom,sm6350-videocc.h
+ include/dt-bindings/clock/qcom,videocc-sc7180.h
+ include/dt-bindings/clock/qcom,videocc-sc7280.h
+ include/dt-bindings/clock/qcom,videocc-sdm845.h
+ include/dt-bindings/clock/qcom,videocc-sm8150.h
+ include/dt-bindings/clock/qcom,videocc-sm8250.h
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - qcom,sc7180-videocc
+ - qcom,sc7280-videocc
+ - qcom,sdm845-videocc
+ - qcom,sm6350-videocc
+ - qcom,sm8150-videocc
+ - qcom,sm8250-videocc
+ - items:
+ - const: qcom,sc8180x-videocc
+ - const: qcom,sm8150-videocc
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the MMCX power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing required MMCX performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sc7180-videocc
+ - qcom,sdm845-videocc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ clock-names:
+ items:
+ - const: bi_tcxo
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sc7280-videocc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board active XO source
+ clock-names:
+ items:
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sm6350-videocc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: Video AHB clock from GCC
+ - description: Board XO source
+ - description: Sleep Clock source
+ clock-names:
+ items:
+ - const: iface
+ - const: bi_tcxo
+ - const: sleep_clk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8150-videocc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: AHB
+ - description: Board XO source
+ clock-names:
+ items:
+ - const: iface
+ - const: bi_tcxo
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,sm8250-videocc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: AHB
+ - description: Board XO source
+ - description: Board active XO source
+ clock-names:
+ items:
+ - const: iface
+ - const: bi_tcxo
+ - const: bi_tcxo_ao
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@ab00000 {
+ compatible = "qcom,sdm845-videocc";
+ reg = <0x0ab00000 0x10000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "bi_tcxo";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ power-domains = <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,x1e80100-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,x1e80100-camcc.yaml
new file mode 100644
index 000000000000..938a2f1ff3fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,x1e80100-camcc.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,x1e80100-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller on x1e80100
+
+maintainers:
+ - Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+
+description: |
+ Qualcomm camera clock control module provides the clocks, resets and power
+ domains on x1e80100.
+
+ See also:
+ include/dt-bindings/clock/qcom,x1e80100-camcc.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,x1e80100-camcc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Camera AHB clock from GCC
+ - description: Board XO source
+ - description: Board active XO source
+ - description: Sleep clock source
+
+ power-domains:
+ items:
+ - description: A phandle to the MXC power-domain
+ - description: A phandle to the MMCX power-domain
+
+ required-opps:
+ items:
+ - description: A phandle to an OPP node describing MXC performance points
+ - description: A phandle to an OPP node describing MMCX performance points
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - required-opps
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,x1e80100-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@ade0000 {
+ compatible = "qcom,x1e80100-camcc";
+ reg = <0xade0000 0x20000>;
+ clocks = <&gcc GCC_CAMERA_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ power-domains = <&rpmhpd RPMHPD_MXC>,
+ <&rpmhpd RPMHPD_MMCX>;
+ required-opps = <&rpmhpd_opp_low_svs>,
+ <&rpmhpd_opp_low_svs>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,x1e80100-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,x1e80100-gcc.yaml
new file mode 100644
index 000000000000..68dde0720c71
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,x1e80100-gcc.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,x1e80100-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on X1E80100
+
+maintainers:
+ - Rajendra Nayak <quic_rjendra@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on X1E80100
+
+ See also: include/dt-bindings/clock/qcom,x1e80100-gcc.h
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: qcom,x1p42100-gcc
+ - const: qcom,x1e80100-gcc
+ - const: qcom,x1e80100-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: PCIe 3 pipe clock
+ - description: PCIe 4 pipe clock
+ - description: PCIe 5 pipe clock
+ - description: PCIe 6a pipe clock
+ - description: PCIe 6b pipe clock
+ - description: USB QMP Phy 0 clock source
+ - description: USB QMP Phy 1 clock source
+ - description: USB QMP Phy 2 clock source
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for the CX power domain.
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - power-domains
+ - '#power-domain-cells'
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/power/qcom,rpmhpd.h>
+ clock-controller@100000 {
+ compatible = "qcom,x1e80100-gcc";
+ reg = <0x00100000 0x200000>;
+ clocks = <&bi_tcxo_div2>,
+ <&sleep_clk>,
+ <&pcie3_phy>,
+ <&pcie4_phy>,
+ <&pcie5_phy>,
+ <&pcie6a_phy>,
+ <&pcie6b_phy>,
+ <&usb_1_ss0_qmpphy 0>,
+ <&usb_1_ss1_qmpphy 1>,
+ <&usb_1_ss2_qmpphy 2>;
+ power-domains = <&rpmhpd RPMHPD_CX>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
deleted file mode 100644
index 97f46adac85f..000000000000
--- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt
+++ /dev/null
@@ -1,204 +0,0 @@
-* Clock Block on Freescale QorIQ Platforms
-
-Freescale QorIQ chips take primary clocking input from the external
-SYSCLK signal. The SYSCLK input (frequency) is multiplied using
-multiple phase locked loops (PLL) to create a variety of frequencies
-which can then be passed to a variety of internal logic, including
-cores and peripheral IP blocks.
-Please refer to the Reference Manual for details.
-
-All references to "1.0" and "2.0" refer to the QorIQ chassis version to
-which the chip complies.
-
-Chassis Version Example Chips
---------------- -------------
-1.0 p4080, p5020, p5040
-2.0 t4240, b4860
-
-1. Clock Block Binding
-
-Required properties:
-- compatible: Should contain a chip-specific clock block compatible
- string and (if applicable) may contain a chassis-version clock
- compatible string.
-
- Chip-specific strings are of the form "fsl,<chip>-clockgen", such as:
- * "fsl,p2041-clockgen"
- * "fsl,p3041-clockgen"
- * "fsl,p4080-clockgen"
- * "fsl,p5020-clockgen"
- * "fsl,p5040-clockgen"
- * "fsl,t4240-clockgen"
- * "fsl,b4420-clockgen"
- * "fsl,b4860-clockgen"
- * "fsl,ls1012a-clockgen"
- * "fsl,ls1021a-clockgen"
- * "fsl,ls1043a-clockgen"
- * "fsl,ls1046a-clockgen"
- * "fsl,ls1088a-clockgen"
- * "fsl,ls2080a-clockgen"
- Chassis-version clock strings include:
- * "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks
- * "fsl,qoriq-clockgen-2.0": for chassis 2.0 clocks
-- reg: Describes the address of the device's resources within the
- address space defined by its parent bus, and resource zero
- represents the clock register set
-
-Optional properties:
-- ranges: Allows valid translation between child's address space and
- parent's. Must be present if the device has sub-nodes.
-- #address-cells: Specifies the number of cells used to represent
- physical base addresses. Must be present if the device has
- sub-nodes and set to 1 if present
-- #size-cells: Specifies the number of cells used to represent
- the size of an address. Must be present if the device has
- sub-nodes and set to 1 if present
-- clock-frequency: Input system clock frequency (SYSCLK)
-- clocks: If clock-frequency is not specified, sysclk may be provided
- as an input clock. Either clock-frequency or clocks must be
- provided.
- A second input clock, called "coreclk", may be provided if
- core PLLs are based on a different input clock from the
- platform PLL.
-- clock-names: Required if a coreclk is present. Valid names are
- "sysclk" and "coreclk".
-
-2. Clock Provider
-
-The clockgen node should act as a clock provider, though in older device
-trees the children of the clockgen node are the clock providers.
-
-When the clockgen node is a clock provider, #clock-cells = <2>.
-The first cell of the clock specifier is the clock type, and the
-second cell is the clock index for the specified type.
-
- Type# Name Index Cell
- 0 sysclk must be 0
- 1 cmux index (n in CLKCnCSR)
- 2 hwaccel index (n in CLKCGnHWACSR)
- 3 fman 0 for fm1, 1 for fm2
- 4 platform pll 0=pll, 1=pll/2, 2=pll/3, 3=pll/4
- 4=pll/5, 5=pll/6, 6=pll/7, 7=pll/8
- 5 coreclk must be 0
-
-3. Example
-
- clockgen: global-utilities@e1000 {
- compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
- clock-frequency = <133333333>;
- reg = <0xe1000 0x1000>;
- #clock-cells = <2>;
- };
-
- fman@400000 {
- ...
- clocks = <&clockgen 3 0>;
- ...
- };
-}
-4. Legacy Child Nodes
-
-NOTE: These nodes are deprecated. Kernels should continue to support
-device trees with these nodes, but new device trees should not use them.
-
-Most of the bindings are from the common clock binding[1].
- [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : Should include one of the following:
- * "fsl,qoriq-core-pll-1.0" for core PLL clocks (v1.0)
- * "fsl,qoriq-core-pll-2.0" for core PLL clocks (v2.0)
- * "fsl,qoriq-core-mux-1.0" for core mux clocks (v1.0)
- * "fsl,qoriq-core-mux-2.0" for core mux clocks (v2.0)
- * "fsl,qoriq-sysclk-1.0": for input system clock (v1.0).
- It takes parent's clock-frequency as its clock.
- * "fsl,qoriq-sysclk-2.0": for input system clock (v2.0).
- It takes parent's clock-frequency as its clock.
- * "fsl,qoriq-platform-pll-1.0" for the platform PLL clock (v1.0)
- * "fsl,qoriq-platform-pll-2.0" for the platform PLL clock (v2.0)
-- #clock-cells: From common clock binding. The number of cells in a
- clock-specifier. Should be <0> for "fsl,qoriq-sysclk-[1,2].0"
- clocks, or <1> for "fsl,qoriq-core-pll-[1,2].0" clocks.
- For "fsl,qoriq-core-pll-[1,2].0" clocks, the single
- clock-specifier cell may take the following values:
- * 0 - equal to the PLL frequency
- * 1 - equal to the PLL frequency divided by 2
- * 2 - equal to the PLL frequency divided by 4
-
-Recommended properties:
-- clocks: Should be the phandle of input parent clock
-- clock-names: From common clock binding, indicates the clock name
-- clock-output-names: From common clock binding, indicates the names of
- output clocks
-- reg: Should be the offset and length of clock block base address.
- The length should be 4.
-
-Legacy Example:
-/ {
- clockgen: global-utilities@e1000 {
- compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- clock-frequency = <133333333>;
- reg = <0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
-
- platform-pll: platform-pll@c00 {
- #clock-cells = <1>;
- reg = <0xc00 0x4>;
- compatible = "fsl,qoriq-platform-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "platform-pll", "platform-pll-div2";
- };
- };
-};
-
-Example for legacy clock consumer:
-
-/ {
- cpu0: PowerPC,e5500@0 {
- ...
- clocks = <&mux0>;
- ...
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/raspberrypi,rp1-clocks.yaml b/Documentation/devicetree/bindings/clock/raspberrypi,rp1-clocks.yaml
new file mode 100644
index 000000000000..cc4491f7ee5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/raspberrypi,rp1-clocks.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/raspberrypi,rp1-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RaspberryPi RP1 clock generator
+
+maintainers:
+ - A. della Porta <andrea.porta@suse.com>
+
+description: |
+ The RP1 contains a clock generator designed as three PLLs (CORE, AUDIO,
+ VIDEO), and each PLL output can be programmed through dividers to generate
+ the clocks to drive the sub-peripherals embedded inside the chipset.
+
+ Link to datasheet:
+ https://datasheets.raspberrypi.com/rp1/rp1-peripherals.pdf
+
+properties:
+ compatible:
+ const: raspberrypi,rp1-clocks
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+ description:
+ The available clocks are defined in
+ include/dt-bindings/clock/raspberrypi,rp1-clocks.h.
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/raspberrypi,rp1-clocks.h>
+
+ rp1 {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clocks@c040018000 {
+ compatible = "raspberrypi,rp1-clocks";
+ reg = <0xc0 0x40018000 0x0 0x10038>;
+ #clock-cells = <1>;
+ clocks = <&clk_rp1_xosc>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml b/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml
new file mode 100644
index 000000000000..162d38035188
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,5p35023.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,5p35023.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas 5p35023 VersaClock 3 programmable I2C clock generator
+
+maintainers:
+ - Biju Das <biju.das.jz@bp.renesas.com>
+
+description: |
+ The 5P35023 is a VersaClock programmable clock generator and
+ is designed for low-power, consumer, and high-performance PCI
+ express applications. The 5P35023 device is a three PLL
+ architecture design, and each PLL is individually programmable
+ and allowing for up to 6 unique frequency outputs.
+
+ An internal OTP memory allows the user to store the configuration
+ in the device. After power up, the user can change the device register
+ settings through the I2C interface when I2C mode is selected.
+
+ The driver can read a full register map from the DT, and will use that
+ register map to initialize the attached part (via I2C) when the system
+ boots. Any configuration not supported by the common clock framework
+ must be done via the full register map, including optimized settings.
+
+ Link to datasheet:
+ https://www.renesas.com/us/en/products/clocks-timing/clock-generation/programmable-clocks/5p35023-versaclock-3s-programmable-clock-generator
+
+properties:
+ compatible:
+ enum:
+ - renesas,5l35023
+ - renesas,5p35023
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ description:
+ The index in the assigned-clocks is mapped to the output clock as below
+ 0 - REF, 1 - SE1, 2 - SE2, 3 - SE3, 4 - DIFF1, 5 - DIFF2.
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ renesas,settings:
+ description: Optional, complete register map of the device.
+ Optimized settings for the device must be provided in full
+ and are written during initialization.
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ maxItems: 37
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ versa3: clock-generator@68 {
+ compatible = "renesas,5p35023";
+ reg = <0x68>;
+ #clock-cells = <1>;
+
+ clocks = <&x1>;
+
+ renesas,settings = [
+ 80 00 11 19 4c 02 23 7f 83 19 08 a9 5f 25 24 bf
+ 00 14 7a e1 00 00 00 00 01 55 59 bb 3f 30 90 b6
+ 80 b0 45 c4 95
+ ];
+
+ assigned-clocks = <&versa3 0>, <&versa3 1>,
+ <&versa3 2>, <&versa3 3>,
+ <&versa3 4>, <&versa3 5>;
+ assigned-clock-rates = <24000000>, <11289600>,
+ <11289600>, <12000000>,
+ <25000000>, <12288000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,9series.yaml b/Documentation/devicetree/bindings/clock/renesas,9series.yaml
new file mode 100644
index 000000000000..af6319697b1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,9series.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,9series.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas 9-series I2C PCIe clock generators
+
+description: |
+ The Renesas 9-series are I2C PCIe clock generators providing
+ from 1 to 20 output clocks.
+
+ When referencing the provided clock in the DT using phandle
+ and clock specifier, the following mapping applies:
+
+ - 9FGV0241:
+ 0 -- DIF0
+ 1 -- DIF1
+ - 9FGV0441:
+ 0 -- DIF0
+ 1 -- DIF1
+ 2 -- DIF2
+ 3 -- DIF3
+ - 9FGV0841:
+ 0 -- DIF0
+ 1 -- DIF1
+ 2 -- DIF2
+ 3 -- DIF3
+ 4 -- DIF4
+ 5 -- DIF5
+ 6 -- DIF6
+ 7 -- DIF7
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+properties:
+ compatible:
+ enum:
+ - renesas,9fgv0241
+ - renesas,9fgv0441
+ - renesas,9fgv0841
+
+ reg:
+ description: I2C device address
+ enum: [ 0x68, 0x6a ]
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: XTal input clock
+
+ renesas,out-amplitude-microvolt:
+ enum: [ 600000, 700000, 800000, 900000 ]
+ description: Output clock signal amplitude
+
+ renesas,out-spread-spectrum:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 100000, 99750, 99500 ]
+ description: Output clock down spread in pcm (1/1000 of percent)
+
+patternProperties:
+ "^DIF[0-19]$":
+ type: object
+ description:
+ Description of one of the outputs (DIF0..DIF19).
+
+ properties:
+ renesas,slew-rate:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 2000000, 3000000 ]
+ description: Output clock slew rate select in V/ns
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ /* 25MHz reference crystal */
+ ref25: ref25m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ };
+
+ i2c@0 {
+ reg = <0x0 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rs9: clock-generator@6a {
+ compatible = "renesas,9fgv0241";
+ reg = <0x6a>;
+ #clock-cells = <1>;
+
+ clocks = <&ref25m>;
+
+ DIF0 {
+ renesas,slew-rate = <3000000>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml
new file mode 100644
index 000000000000..a0e09b7002f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml
@@ -0,0 +1,245 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,cpg-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Clock Pulse Generator (CPG)
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description:
+ The Clock Pulse Generator (CPG) generates core clocks for the SoC. It
+ includes PLLs, and fixed and variable ratio dividers.
+
+ The CPG may also provide a Clock Domain for SoC devices, in combination with
+ the CPG Module Stop (MSTP) Clocks.
+
+properties:
+ compatible:
+ oneOf:
+ - const: renesas,r8a73a4-cpg-clocks # R-Mobile APE6
+ - const: renesas,r8a7740-cpg-clocks # R-Mobile A1
+ - const: renesas,r8a7778-cpg-clocks # R-Car M1
+ - const: renesas,r8a7779-cpg-clocks # R-Car H1
+ - items:
+ - enum:
+ - renesas,r7s72100-cpg-clocks # RZ/A1H
+ - const: renesas,rz-cpg-clocks # RZ/A1
+ - const: renesas,sh73a0-cpg-clocks # SH-Mobile AG5
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ '#clock-cells':
+ const: 1
+
+ clock-output-names:
+ minItems: 3
+ maxItems: 17
+
+ renesas,mode:
+ description: Board-specific settings of the MD_CK* bits on R-Mobile A1
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 7
+
+ '#power-domain-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - clock-output-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r8a73a4-cpg-clocks
+ then:
+ properties:
+ clocks:
+ items:
+ - description: extal1
+ - description: extal2
+
+ clock-output-names:
+ items:
+ - const: main
+ - const: pll0
+ - const: pll1
+ - const: pll2
+ - const: pll2s
+ - const: pll2h
+ - const: z
+ - const: z2
+ - const: i
+ - const: m3
+ - const: b
+ - const: m1
+ - const: m2
+ - const: zx
+ - const: zs
+ - const: hp
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r8a7740-cpg-clocks
+ then:
+ properties:
+ clocks:
+ items:
+ - description: extal1
+ - description: extal2
+ - description: extalr
+
+ clock-output-names:
+ items:
+ - const: system
+ - const: pllc0
+ - const: pllc1
+ - const: pllc2
+ - const: r
+ - const: usb24s
+ - const: i
+ - const: zg
+ - const: b
+ - const: m1
+ - const: hp
+ - const: hpp
+ - const: usbp
+ - const: s
+ - const: zb
+ - const: m3
+ - const: cp
+
+ required:
+ - renesas,mode
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r8a7778-cpg-clocks
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ items:
+ - const: plla
+ - const: pllb
+ - const: b
+ - const: out
+ - const: p
+ - const: s
+ - const: s1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r8a7779-cpg-clocks
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ items:
+ - const: plla
+ - const: z
+ - const: zs
+ - const: s
+ - const: s1
+ - const: p
+ - const: b
+ - const: out
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r7s72100-cpg-clocks
+ then:
+ properties:
+ clocks:
+ items:
+ - description: extal1
+ - description: usb_x1
+
+ clock-output-names:
+ items:
+ - const: pll
+ - const: i
+ - const: g
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,sh73a0-cpg-clocks
+ then:
+ properties:
+ clocks:
+ items:
+ - description: extal1
+ - description: extal2
+
+ clock-output-names:
+ items:
+ - const: main
+ - const: pll0
+ - const: pll1
+ - const: pll2
+ - const: pll3
+ - const: dsi0phy
+ - const: dsi1phy
+ - const: zg
+ - const: m3
+ - const: b
+ - const: m1
+ - const: m2
+ - const: z
+ - const: zx
+ - const: hp
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,r8a7778-cpg-clocks
+ - renesas,r8a7779-cpg-clocks
+ - renesas,rz-cpg-clocks
+ then:
+ required:
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a7740-clock.h>
+ cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,r8a7740-cpg-clocks";
+ reg = <0xe6150000 0x10000>;
+ clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "system", "pllc0", "pllc1", "pllc2", "r",
+ "usb24s", "i", "zg", "b", "m1", "hp", "hpp",
+ "usbp", "s", "zb", "m3", "cp";
+ renesas,mode = <0x05>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clock.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clock.yaml
new file mode 100644
index 000000000000..2197c952e21d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clock.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,cpg-div6-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas CPG DIV6 Clock
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description:
+ The CPG DIV6 clocks are variable factor clocks provided by the Clock Pulse
+ Generator (CPG). Their clock input is divided by a configurable factor from 1
+ to 64.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r8a73a4-div6-clock # R-Mobile APE6
+ - renesas,r8a7740-div6-clock # R-Mobile A1
+ - renesas,sh73a0-div6-clock # SH-Mobile AG5
+ - const: renesas,cpg-div6-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ oneOf:
+ - maxItems: 1
+ - maxItems: 4
+ - maxItems: 8
+ description:
+ For clocks with multiple parents, invalid settings must be specified as
+ "<0>".
+
+ '#clock-cells':
+ const: 0
+
+ clock-output-names: true
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a73a4-clock.h>
+
+ cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,r8a73a4-cpg-clocks";
+ reg = <0xe6150000 0x10000>;
+ clocks = <&extal1_clk>, <&extal2_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "main", "pll0", "pll1", "pll2",
+ "pll2s", "pll2h", "z", "z2",
+ "i", "m3", "b", "m1", "m2",
+ "zx", "zs", "hp";
+ };
+
+ sdhi2_clk: sdhi2_clk@e615007c {
+ compatible = "renesas,r8a73a4-div6-clock", "renesas,cpg-div6-clock";
+ reg = <0xe615007c 4>;
+ clocks = <&pll1_div2_clk>, <&cpg_clocks R8A73A4_CLK_PLL2S>, <0>,
+ <&extal2_clk>;
+ #clock-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
deleted file mode 100644
index ae36ab842919..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-* Renesas CPG DIV6 Clock
-
-The CPG DIV6 clocks are variable factor clocks provided by the Clock Pulse
-Generator (CPG). Their clock input is divided by a configurable factor from 1
-to 64.
-
-Required Properties:
-
- - compatible: Must be one of the following
- - "renesas,r8a73a4-div6-clock" for R8A73A4 (R-Mobile APE6) DIV6 clocks
- - "renesas,r8a7740-div6-clock" for R8A7740 (R-Mobile A1) DIV6 clocks
- - "renesas,r8a7790-div6-clock" for R8A7790 (R-Car H2) DIV6 clocks
- - "renesas,r8a7791-div6-clock" for R8A7791 (R-Car M2-W) DIV6 clocks
- - "renesas,r8a7793-div6-clock" for R8A7793 (R-Car M2-N) DIV6 clocks
- - "renesas,r8a7794-div6-clock" for R8A7794 (R-Car E2) DIV6 clocks
- - "renesas,sh73a0-div6-clock" for SH73A0 (SH-Mobile AG5) DIV6 clocks
- and "renesas,cpg-div6-clock" as a fallback.
- - reg: Base address and length of the memory resource used by the DIV6 clock
- - clocks: Reference to the parent clock(s); either one, four, or eight
- clocks must be specified. For clocks with multiple parents, invalid
- settings must be specified as "<0>".
- - #clock-cells: Must be 0
-
-
-Optional Properties:
-
- - clock-output-names: The name of the clock as a free-form string
-
-
-Example
--------
-
- sdhi2_clk: sdhi2_clk@e615007c {
- compatible = "renesas,r8a73a4-div6-clock", "renesas,cpg-div6-clock";
- reg = <0 0xe615007c 0 4>;
- clocks = <&pll1_div2_clk>, <&cpg_clocks R8A73A4_CLK_PLL2S>,
- <0>, <&extal2_clk>;
- #clock-cells = <0>;
- clock-output-names = "sdhi2ck";
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
deleted file mode 100644
index 916a601b76a7..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-* Renesas Clock Pulse Generator / Module Standby and Software Reset
-
-On Renesas ARM SoCs (SH/R-Mobile, R-Car, RZ), the CPG (Clock Pulse Generator)
-and MSSR (Module Standby and Software Reset) blocks are intimately connected,
-and share the same register block.
-
-They provide the following functionalities:
- - The CPG block generates various core clocks,
- - The MSSR block provides two functions:
- 1. Module Standby, providing a Clock Domain to control the clock supply
- to individual SoC devices,
- 2. Reset Control, to perform a software reset of individual SoC devices.
-
-Required Properties:
- - compatible: Must be one of:
- - "renesas,r7s9210-cpg-mssr" for the r7s9210 SoC (RZ/A2)
- - "renesas,r8a7743-cpg-mssr" for the r8a7743 SoC (RZ/G1M)
- - "renesas,r8a7744-cpg-mssr" for the r8a7744 SoC (RZ/G1N)
- - "renesas,r8a7745-cpg-mssr" for the r8a7745 SoC (RZ/G1E)
- - "renesas,r8a77470-cpg-mssr" for the r8a77470 SoC (RZ/G1C)
- - "renesas,r8a774a1-cpg-mssr" for the r8a774a1 SoC (RZ/G2M)
- - "renesas,r8a774c0-cpg-mssr" for the r8a774c0 SoC (RZ/G2E)
- - "renesas,r8a7790-cpg-mssr" for the r8a7790 SoC (R-Car H2)
- - "renesas,r8a7791-cpg-mssr" for the r8a7791 SoC (R-Car M2-W)
- - "renesas,r8a7792-cpg-mssr" for the r8a7792 SoC (R-Car V2H)
- - "renesas,r8a7793-cpg-mssr" for the r8a7793 SoC (R-Car M2-N)
- - "renesas,r8a7794-cpg-mssr" for the r8a7794 SoC (R-Car E2)
- - "renesas,r8a7795-cpg-mssr" for the r8a7795 SoC (R-Car H3)
- - "renesas,r8a7796-cpg-mssr" for the r8a7796 SoC (R-Car M3-W)
- - "renesas,r8a77965-cpg-mssr" for the r8a77965 SoC (R-Car M3-N)
- - "renesas,r8a77970-cpg-mssr" for the r8a77970 SoC (R-Car V3M)
- - "renesas,r8a77980-cpg-mssr" for the r8a77980 SoC (R-Car V3H)
- - "renesas,r8a77990-cpg-mssr" for the r8a77990 SoC (R-Car E3)
- - "renesas,r8a77995-cpg-mssr" for the r8a77995 SoC (R-Car D3)
-
- - reg: Base address and length of the memory resource used by the CPG/MSSR
- block
-
- - clocks: References to external parent clocks, one entry for each entry in
- clock-names
- - clock-names: List of external parent clock names. Valid names are:
- - "extal" (r7s9210, r8a7743, r8a7744, r8a7745, r8a77470, r8a774a1,
- r8a774c0, r8a7790, r8a7791, r8a7792, r8a7793, r8a7794,
- r8a7795, r8a7796, r8a77965, r8a77970, r8a77980, r8a77990,
- r8a77995)
- - "extalr" (r8a774a1, r8a7795, r8a7796, r8a77965, r8a77970, r8a77980)
- - "usb_extal" (r8a7743, r8a7744, r8a7745, r8a77470, r8a7790, r8a7791,
- r8a7793, r8a7794)
-
- - #clock-cells: Must be 2
- - For CPG core clocks, the two clock specifier cells must be "CPG_CORE"
- and a core clock reference, as defined in
- <dt-bindings/clock/*-cpg-mssr.h>.
- - For module clocks, the two clock specifier cells must be "CPG_MOD" and
- a module number, as defined in the datasheet.
-
- - #power-domain-cells: Must be 0
- - SoC devices that are part of the CPG/MSSR Clock Domain and can be
- power-managed through Module Standby should refer to the CPG device
- node in their "power-domains" property, as documented by the generic PM
- Domain bindings in
- Documentation/devicetree/bindings/power/power_domain.txt.
-
- - #reset-cells: Must be 1
- - The single reset specifier cell must be the module number, as defined
- in the datasheet.
-
-
-Examples
---------
-
- - CPG device node:
-
- cpg: clock-controller@e6150000 {
- compatible = "renesas,r8a7795-cpg-mssr";
- reg = <0 0xe6150000 0 0x1000>;
- clocks = <&extal_clk>, <&extalr_clk>;
- clock-names = "extal", "extalr";
- #clock-cells = <2>;
- #power-domain-cells = <0>;
- #reset-cells = <1>;
- };
-
-
- - CPG/MSSR Clock Domain member device node:
-
- scif2: serial@e6e88000 {
- compatible = "renesas,scif-r8a7795", "renesas,scif";
- reg = <0 0xe6e88000 0 64>;
- interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cpg CPG_MOD 310>;
- clock-names = "fck";
- dmas = <&dmac1 0x13>, <&dmac1 0x12>;
- dma-names = "tx", "rx";
- power-domains = <&cpg>;
- resets = <&cpg 310>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml
new file mode 100644
index 000000000000..bc2fd3761328
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,cpg-mssr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Clock Pulse Generator / Module Standby and Software Reset
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+ On Renesas ARM SoCs (SH/R-Mobile, R-Car, RZ), the CPG (Clock Pulse Generator)
+ and MSSR (Module Standby and Software Reset) blocks are intimately connected,
+ and share the same register block.
+
+ They provide the following functionalities:
+ - The CPG block generates various core clocks,
+ - The MSSR block provides two functions:
+ 1. Module Standby, providing a Clock Domain to control the clock supply
+ to individual SoC devices,
+ 2. Reset Control, to perform a software reset of individual SoC devices.
+
+properties:
+ compatible:
+ enum:
+ - renesas,r7s9210-cpg-mssr # RZ/A2
+ - renesas,r8a7742-cpg-mssr # RZ/G1H
+ - renesas,r8a7743-cpg-mssr # RZ/G1M
+ - renesas,r8a7744-cpg-mssr # RZ/G1N
+ - renesas,r8a7745-cpg-mssr # RZ/G1E
+ - renesas,r8a77470-cpg-mssr # RZ/G1C
+ - renesas,r8a774a1-cpg-mssr # RZ/G2M
+ - renesas,r8a774a3-cpg-mssr # RZ/G2M v3.0
+ - renesas,r8a774b1-cpg-mssr # RZ/G2N
+ - renesas,r8a774c0-cpg-mssr # RZ/G2E
+ - renesas,r8a774e1-cpg-mssr # RZ/G2H
+ - renesas,r8a7790-cpg-mssr # R-Car H2
+ - renesas,r8a7791-cpg-mssr # R-Car M2-W
+ - renesas,r8a7792-cpg-mssr # R-Car V2H
+ - renesas,r8a7793-cpg-mssr # R-Car M2-N
+ - renesas,r8a7794-cpg-mssr # R-Car E2
+ - renesas,r8a7795-cpg-mssr # R-Car H3
+ - renesas,r8a7796-cpg-mssr # R-Car M3-W
+ - renesas,r8a77961-cpg-mssr # R-Car M3-W+
+ - renesas,r8a77965-cpg-mssr # R-Car M3-N
+ - renesas,r8a77970-cpg-mssr # R-Car V3M
+ - renesas,r8a77980-cpg-mssr # R-Car V3H
+ - renesas,r8a77990-cpg-mssr # R-Car E3
+ - renesas,r8a77995-cpg-mssr # R-Car D3
+ - renesas,r8a779a0-cpg-mssr # R-Car V3U
+ - renesas,r8a779f0-cpg-mssr # R-Car S4-8
+ - renesas,r8a779g0-cpg-mssr # R-Car V4H
+ - renesas,r8a779h0-cpg-mssr # R-Car V4M
+ - renesas,r9a09g077-cpg-mssr # RZ/T2H
+ - renesas,r9a09g087-cpg-mssr # RZ/N2H
+
+ reg:
+ minItems: 1
+ items:
+ - description: base address of register block 0
+ - description: base address of register block 1
+ description: base addresses of clock controller. Some controllers
+ (like r9a09g077) use two blocks instead of a single one.
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ maxItems: 2
+ items:
+ enum:
+ - extal # All
+ - extalr # Most R-Car Gen3 and RZ/G2
+ - usb_extal # Most R-Car Gen2 and RZ/G1
+
+ '#clock-cells':
+ description: |
+ - For CPG core clocks, the two clock specifier cells must be "CPG_CORE"
+ and a core clock reference, as defined in
+ <dt-bindings/clock/*-cpg-mssr.h>
+ - For module clocks, the two clock specifier cells must be "CPG_MOD" and
+ a module number, as defined in the datasheet.
+ const: 2
+
+ '#power-domain-cells':
+ description:
+ SoC devices that are part of the CPG/MSSR Clock Domain and can be
+ power-managed through Module Standby should refer to the CPG device node
+ in their "power-domains" property, as documented by the generic PM Domain
+ bindings in Documentation/devicetree/bindings/power/power-domain.yaml.
+ const: 0
+
+ '#reset-cells':
+ description:
+ The single reset specifier cell must be the module number, as defined in
+ the datasheet.
+ const: 1
+
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,r9a09g077-cpg-mssr
+ - renesas,r9a09g087-cpg-mssr
+ then:
+ properties:
+ reg:
+ minItems: 2
+ clock-names:
+ items:
+ - const: extal
+ else:
+ properties:
+ reg:
+ maxItems: 1
+ - if:
+ not:
+ properties:
+ compatible:
+ items:
+ enum:
+ - renesas,r7s9210-cpg-mssr
+ then:
+ required:
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ cpg: clock-controller@e6150000 {
+ compatible = "renesas,r8a7795-cpg-mssr";
+ reg = <0xe6150000 0x1000>;
+ clocks = <&extal_clk>, <&extalr_clk>;
+ clock-names = "extal", "extalr";
+ #clock-cells = <2>;
+ #power-domain-cells = <0>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
deleted file mode 100644
index da578ebdda28..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-* Renesas CPG Module Stop (MSTP) Clocks
-
-The CPG can gate SoC device clocks. The gates are organized in groups of up to
-32 gates.
-
-This device tree binding describes a single 32 gate clocks group per node.
-Clocks are referenced by user nodes by the MSTP node phandle and the clock
-index in the group, from 0 to 31.
-
-Required Properties:
-
- - compatible: Must be one of the following
- - "renesas,r7s72100-mstp-clocks" for R7S72100 (RZ) MSTP gate clocks
- - "renesas,r8a73a4-mstp-clocks" for R8A73A4 (R-Mobile APE6) MSTP gate clocks
- - "renesas,r8a7740-mstp-clocks" for R8A7740 (R-Mobile A1) MSTP gate clocks
- - "renesas,r8a7778-mstp-clocks" for R8A7778 (R-Car M1) MSTP gate clocks
- - "renesas,r8a7779-mstp-clocks" for R8A7779 (R-Car H1) MSTP gate clocks
- - "renesas,r8a7790-mstp-clocks" for R8A7790 (R-Car H2) MSTP gate clocks
- - "renesas,r8a7791-mstp-clocks" for R8A7791 (R-Car M2-W) MSTP gate clocks
- - "renesas,r8a7792-mstp-clocks" for R8A7792 (R-Car V2H) MSTP gate clocks
- - "renesas,r8a7793-mstp-clocks" for R8A7793 (R-Car M2-N) MSTP gate clocks
- - "renesas,r8a7794-mstp-clocks" for R8A7794 (R-Car E2) MSTP gate clocks
- - "renesas,sh73a0-mstp-clocks" for SH73A0 (SH-MobileAG5) MSTP gate clocks
- and "renesas,cpg-mstp-clocks" as a fallback.
- - reg: Base address and length of the I/O mapped registers used by the MSTP
- clocks. The first register is the clock control register and is mandatory.
- The second register is the clock status register and is optional when not
- implemented in hardware.
- - clocks: Reference to the parent clocks, one per output clock. The parents
- must appear in the same order as the output clocks.
- - #clock-cells: Must be 1
- - clock-output-names: The name of the clocks as free-form strings
- - clock-indices: Indices of the gate clocks into the group (0 to 31)
-
-The clocks, clock-output-names and clock-indices properties contain one entry
-per gate clock. The MSTP groups are sparsely populated. Unimplemented gate
-clocks must not be declared.
-
-
-Example
--------
-
- #include <dt-bindings/clock/r8a7790-clock.h>
-
- mstp3_clks: mstp3_clks@e615013c {
- compatible = "renesas,r8a7790-mstp-clocks", "renesas,cpg-mstp-clocks";
- reg = <0 0xe615013c 0 4>, <0 0xe6150048 0 4>;
- clocks = <&cp_clk>, <&mmc1_clk>, <&sd3_clk>, <&sd2_clk>,
- <&cpg_clocks R8A7790_CLK_SD1>, <&cpg_clocks R8A7790_CLK_SD0>,
- <&mmc0_clk>;
- #clock-cells = <1>;
- clock-output-names =
- "tpu0", "mmcif1", "sdhi3", "sdhi2",
- "sdhi1", "sdhi0", "mmcif0";
- clock-indices = <
- R8A7790_CLK_TPU0 R8A7790_CLK_MMCIF1 R8A7790_CLK_SDHI3
- R8A7790_CLK_SDHI2 R8A7790_CLK_SDHI1 R8A7790_CLK_SDHI0
- R8A7790_CLK_MMCIF0
- >;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.yaml b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.yaml
new file mode 100644
index 000000000000..9752ac63288b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,cpg-mstp-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Clock Pulse Generator (CPG) Module Stop (MSTP) Clocks
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description:
+ The Clock Pulse Generator (CPG) can gate SoC device clocks. The gates are
+ organized in groups of up to 32 gates.
+
+ This device tree binding describes a single 32 gate clocks group per node.
+ Clocks are referenced by user nodes by the Module Stop (MSTP) node phandle
+ and the clock index in the group, from 0 to 31.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r7s72100-mstp-clocks # RZ/A1
+ - renesas,r8a73a4-mstp-clocks # R-Mobile APE6
+ - renesas,r8a7740-mstp-clocks # R-Mobile A1
+ - renesas,r8a7778-mstp-clocks # R-Car M1
+ - renesas,r8a7779-mstp-clocks # R-Car H1
+ - renesas,sh73a0-mstp-clocks # SH-Mobile AG5
+ - const: renesas,cpg-mstp-clocks
+
+ reg:
+ minItems: 1
+ items:
+ - description: Module Stop Control Register (MSTPCR)
+ - description: Module Stop Status Register (MSTPSR)
+
+ clocks:
+ minItems: 1
+ maxItems: 32
+
+ '#clock-cells':
+ const: 1
+
+ clock-indices:
+ minItems: 1
+ maxItems: 32
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 32
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - clock-indices
+ - clock-output-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a73a4-clock.h>
+ mstp2_clks: mstp2_clks@e6150138 {
+ compatible = "renesas,r8a73a4-mstp-clocks",
+ "renesas,cpg-mstp-clocks";
+ reg = <0xe6150138 4>, <0xe6150040 4>;
+ clocks = <&mp_clk>, <&mp_clk>, <&mp_clk>, <&mp_clk>, <&mp_clk>,
+ <&mp_clk>, <&cpg_clocks R8A73A4_CLK_HP>;
+ #clock-cells = <1>;
+ clock-indices = <
+ R8A73A4_CLK_SCIFA0 R8A73A4_CLK_SCIFA1
+ R8A73A4_CLK_SCIFB0 R8A73A4_CLK_SCIFB1
+ R8A73A4_CLK_SCIFB2 R8A73A4_CLK_SCIFB3
+ R8A73A4_CLK_DMAC
+ >;
+ clock-output-names =
+ "scifa0", "scifa1", "scifb0", "scifb1", "scifb2", "scifb3",
+ "dmac";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,emev2-smu.yaml b/Documentation/devicetree/bindings/clock/renesas,emev2-smu.yaml
new file mode 100644
index 000000000000..4d9a64800481
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,emev2-smu.yaml
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,emev2-smu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas EMMA Mobile EV2 System Management Unit
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+ - Magnus Damm <magnus.damm@gmail.com>
+
+description: |
+ The System Management Unit is described in user's manual R19UH0037EJ1000_SMU.
+ This is not a clock provider, but clocks under SMU depend on it.
+
+properties:
+ compatible:
+ const: renesas,emev2-smu
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 2
+
+ '#size-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - '#address-cells'
+ - '#size-cells'
+
+patternProperties:
+ ".*sclkdiv@.*":
+ type: object
+
+ description: |
+ Function block with an input mux and a divider, which corresponds to
+ "Serial clock generator" in fig. "Clock System Overview" of the manual,
+ and "xxx frequency division setting register" (XXXCLKDIV) registers.
+ This makes internal (neither input nor output) clock that is provided
+ to input of xxxGCLK block.
+
+ properties:
+ compatible:
+ const: renesas,emev2-smu-clkdiv
+
+ reg:
+ maxItems: 1
+ description:
+ Byte offset from SMU base and Bit position in the register.
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ '#clock-cells':
+ const: 0
+
+ required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+ additionalProperties: false
+
+ ".*sclk@.*":
+ type: object
+
+ description: |
+ Clock gating node shown as "Clock stop processing block" in the
+ fig. "Clock System Overview" of the manual.
+ Registers are "xxx clock gate control register" (XXXGCLKCTRL).
+
+ properties:
+ compatible:
+ const: renesas,emev2-smu-gclk
+
+ reg:
+ maxItems: 1
+ description:
+ Byte offset from SMU base and Bit position in the register.
+
+ clocks:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+ additionalProperties: false
+
+additionalProperties: true
+
+examples:
+ - |
+ // Example of clock-tree description:
+ //
+ // This describes a clock path in the clock tree
+ // c32ki -> pll3_fo -> usia_u0_sclkdiv -> usia_u0_sclk
+ clocks@e0110000 {
+ compatible = "renesas,emev2-smu";
+ reg = <0xe0110000 0x10000>;
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ c32ki: c32ki {
+ compatible = "fixed-clock";
+ clock-frequency = <32768>;
+ #clock-cells = <0>;
+ };
+ pll3_fo: pll3_fo {
+ compatible = "fixed-factor-clock";
+ clocks = <&c32ki>;
+ clock-div = <1>;
+ clock-mult = <7000>;
+ #clock-cells = <0>;
+ };
+ usia_u0_sclkdiv: usia_u0_sclkdiv@610,0 {
+ compatible = "renesas,emev2-smu-clkdiv";
+ reg = <0x610 0>;
+ clocks = <&pll3_fo>;
+ #clock-cells = <0>;
+ };
+ usia_u0_sclk: usia_u0_sclk@4a0,1 {
+ compatible = "renesas,emev2-smu-gclk";
+ reg = <0x4a0 1>;
+ clocks = <&usia_u0_sclkdiv>;
+ #clock-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,h8300-div-clock.txt b/Documentation/devicetree/bindings/clock/renesas,h8300-div-clock.txt
deleted file mode 100644
index 399e0da22348..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,h8300-div-clock.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-* Renesas H8/300 divider clock
-
-Required Properties:
-
- - compatible: Must be "renesas,h8300-div-clock"
-
- - clocks: Reference to the parent clocks ("extal1" and "extal2")
-
- - #clock-cells: Must be 1
-
- - reg: Base address and length of the divide rate selector
-
- - renesas,width: bit width of selector
-
-Example
--------
-
- cclk: cclk {
- compatible = "renesas,h8300-div-clock";
- clocks = <&xclk>;
- #clock-cells = <0>;
- reg = <0xfee01b 2>;
- renesas,width = <2>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,h8s2678-pll-clock.txt b/Documentation/devicetree/bindings/clock/renesas,h8s2678-pll-clock.txt
deleted file mode 100644
index 500cdadbceb7..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,h8s2678-pll-clock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Renesas H8S2678 PLL clock
-
-This device is Clock multiplyer
-
-Required Properties:
-
- - compatible: Must be "renesas,h8s2678-pll-clock"
-
- - clocks: Reference to the parent clocks
-
- - #clock-cells: Must be 0
-
- - reg: Two rate selector (Multiply / Divide) register address
-
-Example
--------
-
- pllclk: pllclk {
- compatible = "renesas,h8s2678-pll-clock";
- clocks = <&xclk>;
- #clock-cells = <0>;
- reg = <0xfee03b 2>, <0xfee045 2>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt
deleted file mode 100644
index ece92393e80d..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-* Renesas R8A73A4 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R8A73A4 SoC. It includes five PLLs
-and several fixed ratio dividers.
-
-Required Properties:
-
- - compatible: Must be "renesas,r8a73a4-cpg-clocks"
-
- - reg: Base address and length of the memory resource used by the CPG
-
- - clocks: Reference to the parent clocks ("extal1" and "extal2")
-
- - #clock-cells: Must be 1
-
- - clock-output-names: The names of the clocks. Supported clocks are "main",
- "pll0", "pll1", "pll2", "pll2s", "pll2h", "z", "z2", "i", "m3", "b",
- "m1", "m2", "zx", "zs", and "hp".
-
-
-Example
--------
-
- cpg_clocks: cpg_clocks@e6150000 {
- compatible = "renesas,r8a73a4-cpg-clocks";
- reg = <0 0xe6150000 0 0x10000>;
- clocks = <&extal1_clk>, <&extal2_clk>;
- #clock-cells = <1>;
- clock-output-names = "main", "pll0", "pll1", "pll2",
- "pll2s", "pll2h", "z", "z2",
- "i", "m3", "b", "m1", "m2",
- "zx", "zs", "hp";
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt
deleted file mode 100644
index 2c03302f86ed..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-These bindings should be considered EXPERIMENTAL for now.
-
-* Renesas R8A7740 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R8A7740 SoC. It includes three PLLs
-and several fixed ratio and variable ratio dividers.
-
-Required Properties:
-
- - compatible: Must be "renesas,r8a7740-cpg-clocks"
-
- - reg: Base address and length of the memory resource used by the CPG
-
- - clocks: Reference to the three parent clocks
- - #clock-cells: Must be 1
- - clock-output-names: The names of the clocks. Supported clocks are
- "system", "pllc0", "pllc1", "pllc2", "r", "usb24s", "i", "zg", "b",
- "m1", "hp", "hpp", "usbp", "s", "zb", "m3", and "cp".
-
- - renesas,mode: board-specific settings of the MD_CK* bits
-
-
-Example
--------
-
-cpg_clocks: cpg_clocks@e6150000 {
- compatible = "renesas,r8a7740-cpg-clocks";
- reg = <0xe6150000 0x10000>;
- clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>;
- #clock-cells = <1>;
- clock-output-names = "system", "pllc0", "pllc1",
- "pllc2", "r",
- "usb24s",
- "i", "zg", "b", "m1", "hp",
- "hpp", "usbp", "s", "zb", "m3",
- "cp";
-};
-
-&cpg_clocks {
- renesas,mode = <0x05>;
-};
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
deleted file mode 100644
index 7cc4c0330b53..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* Renesas R8A7778 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R8A7778. It includes two PLLs and
-several fixed ratio dividers.
-The CPG also provides a Clock Domain for SoC devices, in combination with the
-CPG Module Stop (MSTP) Clocks.
-
-Required Properties:
-
- - compatible: Must be "renesas,r8a7778-cpg-clocks"
- - reg: Base address and length of the memory resource used by the CPG
- - #clock-cells: Must be 1
- - clock-output-names: The names of the clocks. Supported clocks are
- "plla", "pllb", "b", "out", "p", "s", and "s1".
- - #power-domain-cells: Must be 0
-
-SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
-through an MSTP clock should refer to the CPG device node in their
-"power-domains" property, as documented by the generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
-
-
-Examples
---------
-
- - CPG device node:
-
- cpg_clocks: cpg_clocks@ffc80000 {
- compatible = "renesas,r8a7778-cpg-clocks";
- reg = <0xffc80000 0x80>;
- #clock-cells = <1>;
- clocks = <&extal_clk>;
- clock-output-names = "plla", "pllb", "b",
- "out", "p", "s", "s1";
- #power-domain-cells = <0>;
- };
-
-
- - CPG/MSTP Clock Domain member device node:
-
- sdhi0: sd@ffe4c000 {
- compatible = "renesas,sdhi-r8a7778";
- reg = <0xffe4c000 0x100>;
- interrupts = <0 87 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&mstp3_clks R8A7778_CLK_SDHI0>;
- power-domains = <&cpg_clocks>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
deleted file mode 100644
index 8c81547c29f5..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* Renesas R8A7779 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R8A7779. It includes one PLL and
-several fixed ratio dividers.
-The CPG also provides a Clock Domain for SoC devices, in combination with the
-CPG Module Stop (MSTP) Clocks.
-
-Required Properties:
-
- - compatible: Must be "renesas,r8a7779-cpg-clocks"
- - reg: Base address and length of the memory resource used by the CPG
-
- - clocks: Reference to the parent clock
- - #clock-cells: Must be 1
- - clock-output-names: The names of the clocks. Supported clocks are "plla",
- "z", "zs", "s", "s1", "p", "b", "out".
- - #power-domain-cells: Must be 0
-
-SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
-through an MSTP clock should refer to the CPG device node in their
-"power-domains" property, as documented by the generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
-
-
-Examples
---------
-
- - CPG device node:
-
- cpg_clocks: cpg_clocks@ffc80000 {
- compatible = "renesas,r8a7779-cpg-clocks";
- reg = <0xffc80000 0x30>;
- clocks = <&extal_clk>;
- #clock-cells = <1>;
- clock-output-names = "plla", "z", "zs", "s", "s1", "p",
- "b", "out";
- #power-domain-cells = <0>;
- };
-
-
- - CPG/MSTP Clock Domain member device node:
-
- sata: sata@fc600000 {
- compatible = "renesas,sata-r8a7779", "renesas,rcar-sata";
- reg = <0xfc600000 0x2000>;
- interrupts = <0 100 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&mstp1_clks R8A7779_CLK_SATA>;
- power-domains = <&cpg_clocks>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt b/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt
deleted file mode 100644
index d60b99756bb9..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-* Renesas R9A06G032 SYSCTRL
-
-Required Properties:
-
- - compatible: Must be:
- - "renesas,r9a06g032-sysctrl"
- - reg: Base address and length of the SYSCTRL IO block.
- - #clock-cells: Must be 1
- - clocks: References to the parent clocks:
- - external 40mhz crystal.
- - external (optional) 32.768khz
- - external (optional) jtag input
- - external (optional) RGMII_REFCLK
- - clock-names: Must be:
- clock-names = "mclk", "rtc", "jtag", "rgmii_ref_ext";
-
-Examples
---------
-
- - SYSCTRL node:
-
- sysctrl: system-controller@4000c000 {
- compatible = "renesas,r9a06g032-sysctrl";
- reg = <0x4000c000 0x1000>;
- #clock-cells = <1>;
-
- clocks = <&ext_mclk>, <&ext_rtc_clk>,
- <&ext_jtag_clk>, <&ext_rgmii_ref>;
- clock-names = "mclk", "rtc", "jtag", "rgmii_ref_ext";
- };
-
- - Other nodes can use the clocks provided by SYSCTRL as in:
-
- #include <dt-bindings/clock/r9a06g032-sysctrl.h>
- uart0: serial@40060000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x40060000 0x400>;
- interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- clocks = <&sysctrl R9A06G032_CLK_UART0>;
- clock-names = "baudclk";
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.yaml b/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.yaml
new file mode 100644
index 000000000000..26d94cedc871
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r9a06g032-sysctrl.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,r9a06g032-sysctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/N1D (R9A06G032) System Controller
+
+maintainers:
+ - Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+ compatible:
+ const: renesas,r9a06g032-sysctrl
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: External 40 MHz crystal
+ - description: Optional external 32.768 kHz crystal
+ - description: Optional external JTAG input
+ - description: Optional external RGMII_REFCLK
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: mclk
+ - const: rtc
+ - const: jtag
+ - const: rgmii_ref_ext
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 0
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+patternProperties:
+ "^dma-router@[a-f0-9]+$":
+ type: object
+ $ref: /schemas/dma/renesas,rzn1-dmamux.yaml#
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ sysctrl: system-controller@4000c000 {
+ compatible = "renesas,r9a06g032-sysctrl";
+ reg = <0x4000c000 0x1000>;
+ clocks = <&ext_mclk>, <&ext_rtc_clk>, <&ext_jtag_clk>,
+ <&ext_rgmii_ref>;
+ clock-names = "mclk", "rtc", "jtag", "rgmii_ref_ext";
+ #clock-cells = <1>;
+ #power-domain-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r9a08g045-vbattb.yaml b/Documentation/devicetree/bindings/clock/renesas,r9a08g045-vbattb.yaml
new file mode 100644
index 000000000000..3707e4118949
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r9a08g045-vbattb.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,r9a08g045-vbattb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Battery Backup Function (VBATTB)
+
+description:
+ Renesas VBATTB is an always on powered module (backed by battery) which
+ controls the RTC clock (VBATTCLK), tamper detection logic and a small
+ general usage memory (128B).
+
+maintainers:
+ - Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+properties:
+ compatible:
+ const: renesas,r9a08g045-vbattb
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: tamper detector interrupt
+
+ clocks:
+ items:
+ - description: VBATTB module clock
+ - description: RTC input clock (crystal or external clock device)
+
+ clock-names:
+ items:
+ - const: bclk
+ - const: rtx
+
+ '#clock-cells':
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ items:
+ - description: VBATTB module reset
+
+ quartz-load-femtofarads:
+ description: load capacitance of the on board crystal
+ enum: [ 4000, 7000, 9000, 12500 ]
+ default: 4000
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - power-domains
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r9a08g045-cpg.h>
+ #include <dt-bindings/clock/renesas,r9a08g045-vbattb.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ clock-controller@1005c000 {
+ compatible = "renesas,r9a08g045-vbattb";
+ reg = <0x1005c000 0x1000>;
+ interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A08G045_VBAT_BCLK>, <&vbattb_xtal>;
+ clock-names = "bclk", "rtx";
+ assigned-clocks = <&vbattb VBATTB_MUX>;
+ assigned-clock-parents = <&vbattb VBATTB_XC>;
+ #clock-cells = <1>;
+ power-domains = <&cpg>;
+ resets = <&cpg R9A08G045_VBAT_BRESETN>;
+ quartz-load-femtofarads = <12500>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
deleted file mode 100644
index f8c05bb4116e..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-* Renesas R-Car Gen2 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the R-Car Gen2 SoCs. It includes three PLLs
-and several fixed ratio dividers.
-The CPG also provides a Clock Domain for SoC devices, in combination with the
-CPG Module Stop (MSTP) Clocks.
-
-Required Properties:
-
- - compatible: Must be one of
- - "renesas,r8a7790-cpg-clocks" for the r8a7790 CPG
- - "renesas,r8a7791-cpg-clocks" for the r8a7791 CPG
- - "renesas,r8a7792-cpg-clocks" for the r8a7792 CPG
- - "renesas,r8a7793-cpg-clocks" for the r8a7793 CPG
- - "renesas,r8a7794-cpg-clocks" for the r8a7794 CPG
- and "renesas,rcar-gen2-cpg-clocks" as a fallback.
-
- - reg: Base address and length of the memory resource used by the CPG
-
- - clocks: References to the parent clocks: first to the EXTAL clock, second
- to the USB_EXTAL clock
- - #clock-cells: Must be 1
- - clock-output-names: The names of the clocks. Supported clocks are "main",
- "pll0", "pll1", "pll3", "lb", "qspi", "sdh", "sd0", "sd1", "z", "rcan", and
- "adsp"
- - #power-domain-cells: Must be 0
-
-SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
-through an MSTP clock should refer to the CPG device node in their
-"power-domains" property, as documented by the generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
-
-
-Examples
---------
-
- - CPG device node:
-
- cpg_clocks: cpg_clocks@e6150000 {
- compatible = "renesas,r8a7790-cpg-clocks",
- "renesas,rcar-gen2-cpg-clocks";
- reg = <0 0xe6150000 0 0x1000>;
- clocks = <&extal_clk &usb_extal_clk>;
- #clock-cells = <1>;
- clock-output-names = "main", "pll0, "pll1", "pll3",
- "lb", "qspi", "sdh", "sd0", "sd1", "z",
- "rcan", "adsp";
- #power-domain-cells = <0>;
- };
-
-
- - CPG/MSTP Clock Domain member device node:
-
- thermal@e61f0000 {
- compatible = "renesas,thermal-r8a7790", "renesas,rcar-thermal";
- reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>;
- interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&mstp5_clks R8A7790_CLK_THERMAL>;
- power-domains = <&cpg_clocks>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
deleted file mode 100644
index e96e085271c1..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* Renesas R-Car USB 2.0 clock selector
-
-This file provides information on what the device node for the R-Car USB 2.0
-clock selector.
-
-If you connect an external clock to the USB_EXTAL pin only, you should set
-the clock rate to "usb_extal" node only.
-If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module
-is not needed because this is default setting. (Of course, you can set the
-clock rates to both "usb_extal" and "usb_xtal" nodes.
-
-Case 1: An external clock connects to R-Car SoC
- +----------+ +--- R-Car ---------------------+
- |External |---|USB_EXTAL ---> all usb channels|
- |clock | |USB_XTAL |
- +----------+ +-------------------------------+
-In this case, we need this driver with "usb_extal" clock.
-
-Case 2: An oscillator connects to R-Car SoC
- +----------+ +--- R-Car ---------------------+
- |Oscillator|---|USB_EXTAL -+-> all usb channels|
- | |---|USB_XTAL --+ |
- +----------+ +-------------------------------+
-In this case, we don't need this selector.
-
-Required properties:
-- compatible: "renesas,r8a7795-rcar-usb2-clock-sel" if the device is a part of
- an R8A7795 SoC.
- "renesas,r8a7796-rcar-usb2-clock-sel" if the device if a part of
- an R8A7796 SoC.
- "renesas,rcar-gen3-usb2-clock-sel" for a generic R-Car Gen3
- compatible device.
-
- When compatible with the generic version, nodes must list the
- SoC-specific version corresponding to the platform first
- followed by the generic version.
-
-- reg: offset and length of the USB 2.0 clock selector register block.
-- clocks: A list of phandles and specifier pairs.
-- clock-names: Name of the clocks.
- - The functional clock must be "ehci_ohci"
- - The USB_EXTAL clock pin must be "usb_extal"
- - The USB_XTAL clock pin must be "usb_xtal"
-- #clock-cells: Must be 0
-
-Example (R-Car H3):
-
- usb2_clksel: clock-controller@e6590630 {
- compatible = "renesas,r8a77950-rcar-usb2-clock-sel",
- "renesas,rcar-gen3-usb2-clock-sel";
- reg = <0 0xe6590630 0 0x02>;
- clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>;
- clock-names = "ehci_ohci", "usb_extal", "usb_xtal";
- #clock-cells = <0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml
new file mode 100644
index 000000000000..c84f29f1810f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,rcar-usb2-clock-sel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car USB 2.0 clock selector
+
+maintainers:
+ - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+description: |
+ If you connect an external clock to the USB_EXTAL pin only, you should set
+ the clock rate to "usb_extal" node only.
+ If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module
+ is not needed because this is default setting. (Of course, you can set the
+ clock rates to both "usb_extal" and "usb_xtal" nodes.
+
+ Case 1: An external clock connects to R-Car SoC
+ +----------+ +--- R-Car ---------------------+
+ |External |---|USB_EXTAL ---> all usb channels|
+ |clock | |USB_XTAL |
+ +----------+ +-------------------------------+
+
+ In this case, we need this driver with "usb_extal" clock.
+
+ Case 2: An oscillator connects to R-Car SoC
+ +----------+ +--- R-Car ---------------------+
+ |Oscillator|---|USB_EXTAL -+-> all usb channels|
+ | |---|USB_XTAL --+ |
+ +----------+ +-------------------------------+
+ In this case, we don't need this selector.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r8a774a1-rcar-usb2-clock-sel # RZ/G2M
+ - renesas,r8a774b1-rcar-usb2-clock-sel # RZ/G2N
+ - renesas,r8a774e1-rcar-usb2-clock-sel # RZ/G2H
+ - renesas,r8a7795-rcar-usb2-clock-sel # R-Car H3
+ - renesas,r8a7796-rcar-usb2-clock-sel # R-Car M3-W
+ - renesas,r8a77961-rcar-usb2-clock-sel # R-Car M3-W+
+ - const: renesas,rcar-gen3-usb2-clock-sel
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 4
+
+ clock-names:
+ items:
+ - const: ehci_ohci
+ - const: hs-usb-if
+ - const: usb_extal
+ - const: usb_xtal
+
+ '#clock-cells':
+ const: 0
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 2
+
+ reset-names:
+ items:
+ - const: ehci_ohci
+ - const: hs-usb-if
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - power-domains
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
+ #include <dt-bindings/power/r8a7795-sysc.h>
+
+ usb2_clksel: clock-controller@e6590630 {
+ compatible = "renesas,r8a7795-rcar-usb2-clock-sel",
+ "renesas,rcar-gen3-usb2-clock-sel";
+ reg = <0xe6590630 0x02>;
+ clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>,
+ <&usb_extal>, <&usb_xtal>;
+ clock-names = "ehci_ohci", "hs-usb-if", "usb_extal", "usb_xtal";
+ #clock-cells = <0>;
+ power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+ resets = <&cpg 703>, <&cpg 704>;
+ reset-names = "ehci_ohci", "hs-usb-if";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
deleted file mode 100644
index 8ff3e2774ed8..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Renesas RZ/A1 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the RZ/A1 SoCs. It includes the PLL, variable
-CPU and GPU clocks, and several fixed ratio dividers.
-The CPG also provides a Clock Domain for SoC devices, in combination with the
-CPG Module Stop (MSTP) Clocks.
-
-Required Properties:
-
- - compatible: Must be one of
- - "renesas,r7s72100-cpg-clocks" for the r7s72100 CPG
- and "renesas,rz-cpg-clocks" as a fallback.
- - reg: Base address and length of the memory resource used by the CPG
- - clocks: References to possible parent clocks. Order must match clock modes
- in the datasheet. For the r7s72100, this is extal, usb_x1.
- - #clock-cells: Must be 1
- - clock-output-names: The names of the clocks. Supported clocks are "pll",
- "i", and "g"
- - #power-domain-cells: Must be 0
-
-SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
-through an MSTP clock should refer to the CPG device node in their
-"power-domains" property, as documented by the generic PM domain bindings in
-Documentation/devicetree/bindings/power/power_domain.txt.
-
-
-Examples
---------
-
- - CPG device node:
-
- cpg_clocks: cpg_clocks@fcfe0000 {
- #clock-cells = <1>;
- compatible = "renesas,r7s72100-cpg-clocks",
- "renesas,rz-cpg-clocks";
- reg = <0xfcfe0000 0x18>;
- clocks = <&extal_clk>, <&usb_x1_clk>;
- clock-output-names = "pll", "i", "g";
- #power-domain-cells = <0>;
- };
-
-
- - CPG/MSTP Clock Domain member device node:
-
- mtu2: timer@fcff0000 {
- compatible = "renesas,mtu2-r7s72100", "renesas,mtu2";
- reg = <0xfcff0000 0x400>;
- interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "tgi0a";
- clocks = <&mstp3_clks R7S72100_CLK_MTU2>;
- clock-names = "fck";
- power-domains = <&cpg_clocks>;
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml b/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml
new file mode 100644
index 000000000000..8c18616e5c4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,rzg2l-cpg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/{G2L,V2L,V2M} Clock Pulse Generator / Module Standby Mode
+
+maintainers:
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+ On Renesas RZ/{G2L,V2L}-alike SoC's, the CPG (Clock Pulse Generator) and Module
+ Standby Mode share the same register block. On RZ/V2M, the functionality is
+ similar, but does not have Clock Monitor Registers.
+
+ They provide the following functionalities:
+ - The CPG block generates various core clocks,
+ - The Module Standby Mode block provides two functions:
+ 1. Module Standby, providing a Clock Domain to control the clock supply
+ to individual SoC devices,
+ 2. Reset Control, to perform a software reset of individual SoC devices.
+
+properties:
+ compatible:
+ enum:
+ - renesas,r9a07g043-cpg # RZ/G2UL{Type-1,Type-2} and RZ/Five
+ - renesas,r9a07g044-cpg # RZ/G2{L,LC}
+ - renesas,r9a07g054-cpg # RZ/V2L
+ - renesas,r9a08g045-cpg # RZ/G3S
+ - renesas,r9a09g011-cpg # RZ/V2M
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ description:
+ Clock source to CPG can be either from external clock input (EXCLK) or
+ crystal oscillator (XIN/XOUT).
+ const: extal
+
+ '#clock-cells':
+ description: |
+ - For CPG core clocks, the two clock specifier cells must be "CPG_CORE"
+ and a core clock reference, as defined in
+ <dt-bindings/clock/r9a0*-cpg.h>,
+ - For module clocks, the two clock specifier cells must be "CPG_MOD" and
+ a module number, as defined in <dt-bindings/clock/r9a0*-cpg.h>.
+ const: 2
+
+ '#power-domain-cells':
+ description:
+ SoC devices that are part of the CPG/Module Standby Mode Clock Domain and
+ can be power-managed through Module Standby should refer to the CPG device
+ node in their "power-domains" property, as documented by the generic PM
+ Domain bindings in Documentation/devicetree/bindings/power/power-domain.yaml.
+ const: 0
+
+ '#reset-cells':
+ description:
+ The single reset specifier cell must be the reset number, as defined in
+ <dt-bindings/clock/r9a0*-cpg.h>.
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ cpg: clock-controller@11010000 {
+ compatible = "renesas,r9a07g044-cpg";
+ reg = <0x11010000 0x10000>;
+ clocks = <&extal_clk>;
+ clock-names = "extal";
+ #clock-cells = <2>;
+ #power-domain-cells = <0>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml b/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml
new file mode 100644
index 000000000000..f261445bf341
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,rzv2h-cpg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/{G3E,V2H(P),V2N} Clock Pulse Generator (CPG)
+
+maintainers:
+ - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+
+description:
+ On Renesas RZ/{G3E,V2H(P),V2N} SoCs, the CPG (Clock Pulse Generator) handles
+ generation and control of clock signals for the IP modules, generation and
+ control of resets, and control over booting, low power consumption and power
+ supply domains.
+
+properties:
+ compatible:
+ enum:
+ - renesas,r9a09g047-cpg # RZ/G3E
+ - renesas,r9a09g056-cpg # RZ/V2N
+ - renesas,r9a09g057-cpg # RZ/V2H
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: AUDIO_EXTAL clock input
+ - description: RTXIN clock input
+ - description: QEXTAL clock input
+
+ clock-names:
+ items:
+ - const: audio_extal
+ - const: rtxin
+ - const: qextal
+
+ '#clock-cells':
+ description: |
+ - For CPG core clocks, the two clock specifier cells must be "CPG_CORE"
+ and a core clock reference, as defined in
+ <dt-bindings/clock/renesas,r9a09g0*-cpg.h>,
+ - For module clocks, the two clock specifier cells must be "CPG_MOD" and
+ a module number. The module number is calculated as the CLKON register
+ offset index multiplied by 16, plus the actual bit in the register
+ used to turn the CLK ON. For example, for CGC_GIC_0_GICCLK, the
+ calculation is (1 * 16 + 3) = 0x13.
+ const: 2
+
+ '#power-domain-cells':
+ const: 0
+
+ '#reset-cells':
+ description:
+ The single reset specifier cell must be the reset number. The reset number
+ is calculated as the reset register offset index multiplied by 16, plus the
+ actual bit in the register used to reset the specific IP block. For example,
+ for SYS_0_PRESETN, the calculation is (3 * 16 + 0) = 0x30.
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#power-domain-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@10420000 {
+ compatible = "renesas,r9a09g057-cpg";
+ reg = <0x10420000 0x10000>;
+ clocks = <&audio_extal_clk>, <&rtxin_clk>, <&qextal_clk>;
+ clock-names = "audio_extal", "rtxin", "qextal";
+ #clock-cells = <2>;
+ #power-domain-cells = <0>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt
deleted file mode 100644
index a8978ec94831..000000000000
--- a/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-These bindings should be considered EXPERIMENTAL for now.
-
-* Renesas SH73A0 Clock Pulse Generator (CPG)
-
-The CPG generates core clocks for the SH73A0 SoC. It includes four PLLs
-and several fixed ratio dividers.
-
-Required Properties:
-
- - compatible: Must be "renesas,sh73a0-cpg-clocks"
-
- - reg: Base address and length of the memory resource used by the CPG
-
- - clocks: Reference to the parent clocks ("extal1" and "extal2")
-
- - #clock-cells: Must be 1
-
- - clock-output-names: The names of the clocks. Supported clocks are "main",
- "pll0", "pll1", "pll2", "pll3", "dsi0phy", "dsi1phy", "zg", "m3", "b",
- "m1", "m2", "z", "zx", and "hp".
-
-
-Example
--------
-
- cpg_clocks: cpg_clocks@e6150000 {
- compatible = "renesas,sh73a0-cpg-clocks";
- reg = <0 0xe6150000 0 0x10000>;
- clocks = <&extal1_clk>, <&extal2_clk>;
- #clock-cells = <1>;
- clock-output-names = "main", "pll0", "pll1", "pll2",
- "pll3", "dsi0phy", "dsi1phy",
- "zg", "m3", "b", "m1", "m2",
- "z", "zx", "hp";
- };
diff --git a/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml
new file mode 100644
index 000000000000..b339f1f9f072
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/renesas,versaclock7.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Versaclock7 Programmable Clock
+
+maintainers:
+ - Alex Helms <alexander.helms.jy@renesas.com>
+
+description: |
+ Renesas Versaclock7 is a family of configurable clock generator and
+ jitter attenuator ICs with fractional and integer dividers.
+
+properties:
+ '#clock-cells':
+ const: 1
+
+ compatible:
+ enum:
+ - renesas,rc21008a
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: External crystal or oscillator
+
+ clock-names:
+ items:
+ - const: xin
+
+required:
+ - '#clock-cells'
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ vc7_xin: clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <49152000>;
+ };
+
+ i2c@0 {
+ reg = <0x0 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vc7: clock-controller@9 {
+ compatible = "renesas,rc21008a";
+ reg = <0x9>;
+ #clock-cells = <1>;
+ clocks = <&vc7_xin>;
+ clock-names = "xin";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/riscv,rpmi-clock.yaml b/Documentation/devicetree/bindings/clock/riscv,rpmi-clock.yaml
new file mode 100644
index 000000000000..5d62bf8215c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/riscv,rpmi-clock.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/riscv,rpmi-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RISC-V RPMI clock service group based clock controller
+
+maintainers:
+ - Anup Patel <anup@brainfault.org>
+
+description: |
+ The RISC-V Platform Management Interface (RPMI) [1] defines a
+ messaging protocol which is modular and extensible. The supervisor
+ software can send/receive RPMI messages via SBI MPXY extension [2]
+ or some dedicated supervisor-mode RPMI transport.
+
+ The RPMI specification [1] defines clock service group for accessing
+ system clocks managed by a platform microcontroller. The supervisor
+ software can access RPMI clock service group via SBI MPXY channel or
+ some dedicated supervisor-mode RPMI transport.
+
+ ===========================================
+ References
+ ===========================================
+
+ [1] RISC-V Platform Management Interface (RPMI) v1.0 (or higher)
+ https://github.com/riscv-non-isa/riscv-rpmi/releases
+
+ [2] RISC-V Supervisor Binary Interface (SBI) v3.0 (or higher)
+ https://github.com/riscv-non-isa/riscv-sbi-doc/releases
+
+properties:
+ compatible:
+ description:
+ Intended for use by the supervisor software.
+ const: riscv,rpmi-clock
+
+ mboxes:
+ maxItems: 1
+ description:
+ Mailbox channel of the underlying RPMI transport or SBI message proxy channel.
+
+ "#clock-cells":
+ const: 1
+ description:
+ Platform specific CLOCK_ID as defined by the RISC-V Platform Management
+ Interface (RPMI) specification.
+
+required:
+ - compatible
+ - mboxes
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller {
+ compatible = "riscv,rpmi-clock";
+ mboxes = <&mpxy_mbox 0x1000 0x0>;
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/riscv,rpmi-mpxy-clock.yaml b/Documentation/devicetree/bindings/clock/riscv,rpmi-mpxy-clock.yaml
new file mode 100644
index 000000000000..76f2a1b3d30d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/riscv,rpmi-mpxy-clock.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/riscv,rpmi-mpxy-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RISC-V RPMI clock service group based message proxy
+
+maintainers:
+ - Anup Patel <anup@brainfault.org>
+
+description: |
+ The RISC-V Platform Management Interface (RPMI) [1] defines a
+ messaging protocol which is modular and extensible. The supervisor
+ software can send/receive RPMI messages via SBI MPXY extension [2]
+ or some dedicated supervisor-mode RPMI transport.
+
+ The RPMI specification [1] defines clock service group for accessing
+ system clocks managed by a platform microcontroller. The SBI implementation
+ (machine mode firmware or hypervisor) can implement an SBI MPXY channel
+ to allow RPMI clock service group access to the supervisor software.
+
+ ===========================================
+ References
+ ===========================================
+
+ [1] RISC-V Platform Management Interface (RPMI) v1.0 (or higher)
+ https://github.com/riscv-non-isa/riscv-rpmi/releases
+
+ [2] RISC-V Supervisor Binary Interface (SBI) v3.0 (or higher)
+ https://github.com/riscv-non-isa/riscv-sbi-doc/releases
+
+properties:
+ compatible:
+ description:
+ Intended for use by the SBI implementation.
+ const: riscv,rpmi-mpxy-clock
+
+ mboxes:
+ maxItems: 1
+ description:
+ Mailbox channel of the underlying RPMI transport.
+
+ riscv,sbi-mpxy-channel-id:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The SBI MPXY channel id to be used for providing RPMI access to
+ the supervisor software.
+
+required:
+ - compatible
+ - mboxes
+ - riscv,sbi-mpxy-channel-id
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-service {
+ compatible = "riscv,rpmi-mpxy-clock";
+ mboxes = <&rpmi_shmem_mbox 0x8>;
+ riscv,sbi-mpxy-channel-id = <0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
deleted file mode 100644
index 39f0c1ac84ee..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-* Rockchip PX30 Clock and Reset Unit
-
-The PX30 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: PMU for CRU should be "rockchip,px30-pmu-cru"
-- compatible: CRU should be "rockchip,px30-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing, pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/px30-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "i2sx_clkin" - external I2S clock - optional,
- - "gmac_clkin" - external GMAC clock - optional
-
-Example: Clock controller node:
-
- pmucru: clock-controller@ff2bc000 {
- compatible = "rockchip,px30-pmucru";
- reg = <0x0 0xff2bc000 0x0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- cru: clock-controller@ff2b0000 {
- compatible = "rockchip,px30-cru";
- reg = <0x0 0xff2b0000 0x0 0x1000>;
- rockchip,grf = <&grf>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@ff030000 {
- compatible = "rockchip,px30-uart", "snps,dw-apb-uart";
- reg = <0x0 0xff030000 0x0 0x100>;
- interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&pmucru SCLK_UART0_PMU>, <&pmucru PCLK_UART0_PMU>;
- clock-names = "baudclk", "apb_pclk";
- reg-shift = <2>;
- reg-io-width = <4>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,px30-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.yaml
new file mode 100644
index 000000000000..0f0f64b6f8cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,px30-cru.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,px30-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip PX30 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The PX30 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/px30-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "xin32k" - rtc clock - optional
+ - "i2sx_clkin" - external I2S clock - optional
+ - "gmac_clkin" - external GMAC clock - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,px30-cru
+ - rockchip,px30-pmucru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: Clock for both PMUCRU and CRU
+ - description: Clock for CRU (sourced from PMUCRU)
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: xin24m
+ - const: gpll
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - "#reset-cells"
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,px30-cru
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ minItems: 2
+
+ else:
+ properties:
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/px30-cru.h>
+
+ pmucru: clock-controller@ff2bc000 {
+ compatible = "rockchip,px30-pmucru";
+ reg = <0xff2bc000 0x1000>;
+ clocks = <&xin24m>;
+ clock-names = "xin24m";
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ cru: clock-controller@ff2b0000 {
+ compatible = "rockchip,px30-cru";
+ reg = <0xff2b0000 0x1000>;
+ clocks = <&xin24m>, <&pmucru PLL_GPLL>;
+ clock-names = "xin24m", "gpll";
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
deleted file mode 100644
index 20df350b9ef3..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-* Rockchip RK3036 Clock and Reset Unit
-
-The RK3036 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3036-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3036-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "ext_i2s" - external I2S clock - optional,
- - "rmii_clkin" - external EMAC clock - optional
-
-Example: Clock controller node:
-
- cru: cru@20000000 {
- compatible = "rockchip,rk3036-cru";
- reg = <0x20000000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@20060000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x20060000 0x100>;
- interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.yaml
new file mode 100644
index 000000000000..ba5b45464315
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3036-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3036 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3036 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3036-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "ext_i2s" - external I2S clock - optional
+ - "rmii_clkin" - external EMAC clock - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3036-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@20000000 {
+ compatible = "rockchip,rk3036-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.txt
deleted file mode 100644
index 6f8744fd301b..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-* Rockchip RK3126/RK3128 Clock and Reset Unit
-
-The RK3126/RK3128 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3126-cru" or "rockchip,rk3128-cru"
- "rockchip,rk3126-cru" - controller compatible with RK3126 SoC.
- "rockchip,rk3128-cru" - controller compatible with RK3128 SoC.
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3128-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "ext_i2s" - external I2S clock - optional,
- - "gmac_clkin" - external GMAC clock - optional
-
-Example: Clock controller node:
-
- cru: cru@20000000 {
- compatible = "rockchip,rk3128-cru";
- reg = <0x20000000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart2: serial@20068000 {
- compatible = "rockchip,serial";
- reg = <0x20068000 0x100>;
- interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
- clock-frequency = <24000000>;
- clocks = <&cru SCLK_UART2>, <&cru PCLK_UART2>;
- clock-names = "sclk_uart", "pclk_uart";
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.yaml
new file mode 100644
index 000000000000..b3d9c8eca989
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3128-cru.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3128-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3126/RK3128 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3126/RK3128 clock controller generates and supplies clock to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3128-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3126-cru
+ - rockchip,rk3128-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: xin24m
+ - enum:
+ - ext_i2s
+ - gmac_clkin
+ - enum:
+ - ext_i2s
+ - gmac_clkin
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@20000000 {
+ compatible = "rockchip,rk3128-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt
deleted file mode 100644
index 7f368530a2e4..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-* Rockchip RK3188/RK3066 Clock and Reset Unit
-
-The RK3188/RK3066 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3188-cru", "rockchip,rk3188a-cru" or
- "rockchip,rk3066a-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3188-cru.h and
-dt-bindings/clock/rk3066-cru.h headers and can be used in device tree sources.
-Similar macros exist for the reset sources in these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "xin27m" - 27mhz crystal input on rk3066 - optional,
- - "ext_hsadc" - external HSADC clock - optional,
- - "ext_cif0" - external camera clock - optional,
- - "ext_rmii" - external RMII clock - optional,
- - "ext_jtag" - externalJTAG clock - optional
-
-Example: Clock controller node:
-
- cru: cru@20000000 {
- compatible = "rockchip,rk3188-cru";
- reg = <0x20000000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@10124000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x10124000 0x400>;
- interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <1>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.yaml
new file mode 100644
index 000000000000..ddd7e46af0f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3188-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3188/RK3066 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3188/RK3066 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3188-cru.h and
+ dt-bindings/clock/rk3066-cru.h headers and can be used in device tree sources.
+ Similar macros exist for the reset sources in these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "xin32k" - RTC clock - optional
+ - "xin27m" - 27mhz crystal input on RK3066 - optional
+ - "ext_hsadc" - external HSADC clock - optional
+ - "ext_cif0" - external camera clock - optional
+ - "ext_rmii" - external RMII clock - optional
+ - "ext_jtag" - external JTAG clock - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3066a-cru
+ - rockchip,rk3188-cru
+ - rockchip,rk3188a-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@20000000 {
+ compatible = "rockchip,rk3188-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.txt
deleted file mode 100644
index f323048127eb..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-* Rockchip RK3228 Clock and Reset Unit
-
-The RK3228 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3228-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3228-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "ext_i2s" - external I2S clock - optional,
- - "ext_gmac" - external GMAC clock - optional
- - "ext_hsadc" - external HSADC clock - optional
- - "phy_50m_out" - output clock of the pll in the mac phy
-
-Example: Clock controller node:
-
- cru: cru@20000000 {
- compatible = "rockchip,rk3228-cru";
- reg = <0x20000000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@10110000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x10110000 0x100>;
- interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.yaml
new file mode 100644
index 000000000000..1050fff72ade
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3228-cru.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3228-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3228 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3228 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3228-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "ext_i2s" - external I2S clock - optional
+ - "ext_gmac" - external GMAC clock - optional
+ - "ext_hsadc" - external HSADC clock - optional
+ - "phy_50m_out" - output clock of the pll in the mac phy
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3228-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@20000000 {
+ compatible = "rockchip,rk3228-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt
deleted file mode 100644
index 8cb47c39ba53..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-* Rockchip RK3288 Clock and Reset Unit
-
-The RK3288 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3288-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3288-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "ext_i2s" - external I2S clock - optional,
- - "ext_hsadc" - external HSADC clock - optional,
- - "ext_edp_24m" - external display port clock - optional,
- - "ext_vip" - external VIP clock - optional,
- - "ext_isp" - external ISP clock - optional,
- - "ext_jtag" - external JTAG clock - optional
-
-Example: Clock controller node:
-
- cru: cru@20000000 {
- compatible = "rockchip,rk3188-cru";
- reg = <0x20000000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@10124000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x10124000 0x400>;
- interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <1>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.yaml
new file mode 100644
index 000000000000..6655e97d52e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3288-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3288 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3288 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+
+ A revision of this SoC is available: rk3288w. The clock tree is a bit
+ different so another dt-compatible is available. Noticed that it is only
+ setting the difference but there is no automatic revision detection. This
+ should be performed by boot loaders.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3288-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "ext_i2s" - external I2S clock - optional,
+ - "ext_hsadc" - external HSADC clock - optional,
+ - "ext_edp_24m" - external display port clock - optional,
+ - "ext_vip" - external VIP clock - optional,
+ - "ext_isp" - external ISP clock - optional,
+ - "ext_jtag" - external JTAG clock - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3288-cru
+ - rockchip,rk3288w-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@ff760000 {
+ compatible = "rockchip,rk3288-cru";
+ reg = <0xff760000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.yaml
new file mode 100644
index 000000000000..fec37f5b80f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3308-cru.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3308-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3308 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3308 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3308-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "xin32k" - rtc clock - optional
+ - "mclk_i2s0_8ch_in", "mclk_i2s1_8ch_in",
+ "mclk_i2s2_8ch_in", "mclk_i2s3_8ch_in",
+ "mclk_i2s0_2ch_in", "mclk_i2s1_2ch_in" - external I2S or
+ SPDIF clock - optional
+ - "mac_clkin" - external MAC clock - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3308-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@ff500000 {
+ compatible = "rockchip,rk3308-cru";
+ reg = <0xff500000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
deleted file mode 100644
index 904ae682ea90..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-* Rockchip RK3328 Clock and Reset Unit
-
-The RK3328 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3328-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3328-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "clkin_i2s" - external I2S clock - optional,
- - "gmac_clkin" - external GMAC clock - optional
- - "phy_50m_out" - output clock of the pll in the mac phy
- - "hdmi_phy" - output clock of the hdmi phy pll - optional
-
-Example: Clock controller node:
-
- cru: clock-controller@ff440000 {
- compatible = "rockchip,rk3328-cru";
- reg = <0x0 0xff440000 0x0 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@ff120000 {
- compatible = "snps,dw-apb-uart";
- reg = <0xff120000 0x100>;
- interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.yaml
new file mode 100644
index 000000000000..f079c7a2559b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3328-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3328 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3328 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3328-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "clkin_i2s" - external I2S clock - optional,
+ - "gmac_clkin" - external GMAC clock - optional
+ - "phy_50m_out" - output clock of the pll in the mac phy
+ - "hdmi_phy" - output clock of the hdmi phy pll - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3328-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@ff440000 {
+ compatible = "rockchip,rk3328-cru";
+ reg = <0xff440000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.txt
deleted file mode 100644
index 7c8bbcfed8d2..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-* Rockchip RK3368 Clock and Reset Unit
-
-The RK3368 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rk3368-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing, pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3368-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "ext_i2s" - external I2S clock - optional,
- - "ext_gmac" - external GMAC clock - optional
- - "ext_hsadc" - external HSADC clock - optional,
- - "ext_isp" - external ISP clock - optional,
- - "ext_jtag" - external JTAG clock - optional
- - "ext_vip" - external VIP clock - optional,
- - "usbotg_out" - output clock of the pll in the otg phy
-
-Example: Clock controller node:
-
- cru: clock-controller@ff760000 {
- compatible = "rockchip,rk3368-cru";
- reg = <0x0 0xff760000 0x0 0x1000>;
- rockchip,grf = <&grf>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@10124000 {
- compatible = "snps,dw-apb-uart";
- reg = <0x10124000 0x400>;
- interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <1>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.yaml
new file mode 100644
index 000000000000..90af242b41c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3368-cru.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3368-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3368 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3368 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3368-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "xin32k" - rtc clock - optional
+ - "ext_i2s" - external I2S clock - optional
+ - "ext_gmac" - external GMAC clock - optional
+ - "ext_hsadc" - external HSADC clock - optional
+ - "ext_isp" - external ISP clock - optional
+ - "ext_jtag" - external JTAG clock - optional
+ - "ext_vip" - external VIP clock - optional
+ - "usbotg_out" - output clock of the pll in the otg phy
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3368-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@ff760000 {
+ compatible = "rockchip,rk3368-cru";
+ reg = <0xff760000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt
deleted file mode 100644
index 3bc56fae90ac..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-* Rockchip RK3399 Clock and Reset Unit
-
-The RK3399 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: PMU for CRU should be "rockchip,rk3399-pmucru"
-- compatible: CRU should be "rockchip,rk3399-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files".
- It is used for GRF muxes, if missing any muxes present in the GRF will not
- be available.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3399-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "clkin_gmac" - external GMAC clock - optional,
- - "clkin_i2s" - external I2S clock - optional,
- - "pclkin_cif" - external ISP clock - optional,
- - "clk_usbphy0_480m" - output clock of the pll in the usbphy0
- - "clk_usbphy1_480m" - output clock of the pll in the usbphy1
-
-Example: Clock controller node:
-
- pmucru: pmu-clock-controller@ff750000 {
- compatible = "rockchip,rk3399-pmucru";
- reg = <0x0 0xff750000 0x0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
- cru: clock-controller@ff760000 {
- compatible = "rockchip,rk3399-cru";
- reg = <0x0 0xff760000 0x0 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@ff1a0000 {
- compatible = "rockchip,rk3399-uart", "snps,dw-apb-uart";
- reg = <0x0 0xff180000 0x0 0x100>;
- clocks = <&cru SCLK_UART0>, <&cru PCLK_UART0>;
- clock-names = "baudclk", "apb_pclk";
- interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml
new file mode 100644
index 000000000000..0b758e015ee3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3399-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3399 Clock and Reset Unit
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3399 clock controller generates and supplies clock to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3399-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "clkin_gmac" - external GMAC clock - optional,
+ - "clkin_i2s" - external I2S clock - optional,
+ - "pclkin_cif" - external ISP clock - optional,
+ - "clk_usbphy0_480m" - output clock of the pll in the usbphy0
+ - "clk_usbphy1_480m" - output clock of the pll in the usbphy1
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3399-pmucru
+ - rockchip,rk3399-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files". It is used
+ for GRF muxes, if missing any muxes present in the GRF will not be
+ available.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ pmucru: clock-controller@ff750000 {
+ compatible = "rockchip,rk3399-pmucru";
+ reg = <0xff750000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ - |
+ cru: clock-controller@ff760000 {
+ compatible = "rockchip,rk3399-cru";
+ reg = <0xff760000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3528-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3528-cru.yaml
new file mode 100644
index 000000000000..5a3ec902351c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3528-cru.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3528-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3528 Clock and Reset Controller
+
+maintainers:
+ - Yao Zi <ziyao@disroot.org>
+
+description: |
+ The RK3528 clock controller generates the clock and also implements a reset
+ controller for SoC peripherals. For example, it provides SCLK_UART0 and
+ PCLK_UART0 as well as SRST_P_UART0 and SRST_S_UART0 for the first UART
+ module.
+ Each clock is assigned an identifier, consumer nodes can use it to specify
+ the clock. All available clock and reset IDs are defined in dt-binding
+ headers.
+
+properties:
+ compatible:
+ const: rockchip,rk3528-cru
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: External 24MHz oscillator clock
+ - description: >
+ 50MHz clock generated by PHY module, for generating GMAC0 clocks only.
+
+ clock-names:
+ items:
+ - const: xin24m
+ - const: gmac0
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@ff4a0000 {
+ compatible = "rockchip,rk3528-cru";
+ reg = <0xff4a0000 0x30000>;
+ clocks = <&xin24m>, <&gmac0_clk>;
+ clock-names = "xin24m", "gmac0";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3562-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3562-cru.yaml
new file mode 100644
index 000000000000..36a353f5c42a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3562-cru.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3562-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3562 Clock and Reset Control Module
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description:
+ The RK3562 clock controller generates the clock and also implements a reset
+ controller for SoC peripherals. For example it provides SCLK_UART2 and
+ PCLK_UART2, as well as SRST_P_UART2 and SRST_S_UART2 for the second UART
+ module.
+
+properties:
+ compatible:
+ const: rockchip,rk3562-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: xin24m
+ - const: xin32k
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@ff100000 {
+ compatible = "rockchip,rk3562-cru";
+ reg = <0xff100000 0x40000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml
new file mode 100644
index 000000000000..f809c289445e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3568-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ROCKCHIP rk3568 Family Clock Control Module
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3568 clock controller generates the clock and also implements a
+ reset controller for SoC peripherals.
+ (examples: provide SCLK_UART1\PCLK_UART1 and SRST_P_UART1\SRST_S_UART1 for UART module)
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rk3568-cru.h headers and can be
+ used in device tree sources.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3568-cru
+ - rockchip,rk3568-pmucru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ pmucru: clock-controller@fdd00000 {
+ compatible = "rockchip,rk3568-pmucru";
+ reg = <0xfdd00000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ - |
+ cru: clock-controller@fdd20000 {
+ compatible = "rockchip,rk3568-cru";
+ reg = <0xfdd20000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3576-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3576-cru.yaml
new file mode 100644
index 000000000000..9c9b36049c71
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3576-cru.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3576-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3576 Family Clock and Reset Control Module
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+ - Detlev Casanova <detlev.casanova@collabora.com>
+
+description:
+ The RK3576 clock controller generates the clock and also implements a reset
+ controller for SoC peripherals. For example it provides SCLK_UART2 and
+ PCLK_UART2, as well as SRST_P_UART2 and SRST_S_UART2 for the second UART
+ module.
+
+properties:
+ compatible:
+ const: rockchip,rk3576-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: xin24m
+ - const: xin32k
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@27200000 {
+ compatible = "rockchip,rk3576-cru";
+ reg = <0xfd7c0000 0x5c000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3588-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3588-cru.yaml
new file mode 100644
index 000000000000..4ff175c4992b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3588-cru.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3588-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3588 Family Clock and Reset Control Module
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RK3588 clock controller generates the clock and also implements a reset
+ controller for SoC peripherals. For example it provides SCLK_UART2 and
+ PCLK_UART2, as well as SRST_P_UART2 and SRST_S_UART2 for the second UART
+ module.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clock and reset IDs
+ are defined as preprocessor macros in dt-binding headers.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3588-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: xin24m
+ - const: xin32k
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: >
+ phandle to the syscon managing the "general register files". It is used
+ for GRF muxes, if missing any muxes present in the GRF will not be
+ available.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@fd7c0000 {
+ compatible = "rockchip,rk3588-cru";
+ reg = <0xfd7c0000 0x5c000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
deleted file mode 100644
index 161326a4f9c1..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-* Rockchip RV1108 Clock and Reset Unit
-
-The RV1108 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: should be "rockchip,rv1108-cru"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files"
- If missing pll rates are not changeable, due to the missing pll lock status.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rv1108-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "ext_vip" - external VIP clock - optional
- - "ext_i2s" - external I2S clock - optional
- - "ext_gmac" - external GMAC clock - optional
- - "hdmiphy" - external clock input derived from HDMI PHY - optional
- - "usbphy" - external clock input derived from USB PHY - optional
-
-Example: Clock controller node:
-
- cru: cru@20200000 {
- compatible = "rockchip,rv1108-cru";
- reg = <0x20200000 0x1000>;
- rockchip,grf = <&grf>;
-
- #clock-cells = <1>;
- #reset-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller:
-
- uart0: serial@10230000 {
- compatible = "rockchip,rv1108-uart", "snps,dw-apb-uart";
- reg = <0x10230000 0x100>;
- interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
- reg-shift = <2>;
- reg-io-width = <4>;
- clocks = <&cru SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.yaml
new file mode 100644
index 000000000000..4611d920b8df
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rv1108-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RV1108 Clock and Reset Unit (CRU)
+
+maintainers:
+ - Elaine Zhang <zhangqing@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+ The RV1108 clock controller generates and supplies clocks to various
+ controllers within the SoC and also implements a reset controller for SoC
+ peripherals.
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All available clocks are defined as
+ preprocessor macros in the dt-bindings/clock/rv1108-cru.h headers and can be
+ used in device tree sources. Similar macros exist for the reset sources in
+ these files.
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names:
+ - "xin24m" - crystal input - required
+ - "ext_vip" - external VIP clock - optional
+ - "ext_i2s" - external I2S clock - optional
+ - "ext_gmac" - external GMAC clock - optional
+ - "hdmiphy" - external clock input derived from HDMI PHY - optional
+ - "usbphy" - external clock input derived from USB PHY - optional
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rv1108-cru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@20200000 {
+ compatible = "rockchip,rv1108-cru";
+ reg = <0x20200000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rv1126-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rv1126-cru.yaml
new file mode 100644
index 000000000000..0998f8b922bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rv1126-cru.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rv1126-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RV1126 Clock and Reset Unit
+
+maintainers:
+ - Jagan Teki <jagan@edgeble.ai>
+ - Finley Xiao <finley.xiao@rock-chips.com>
+ - Heiko Stuebner <heiko@sntech.de>
+
+description:
+ The RV1126 clock controller generates the clock and also implements a
+ reset controller for SoC peripherals.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rv1126-cru
+ - rockchip,rv1126-pmucru
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: xin24m
+
+ rockchip,grf:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "general register files" (GRF),
+ if missing pll rates are not changeable, due to the missing pll
+ lock status.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cru: clock-controller@ff490000 {
+ compatible = "rockchip,rv1126-cru";
+ reg = <0xff490000 0x1000>;
+ rockchip,grf = <&grf>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml
new file mode 100644
index 000000000000..d819dfaafff9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-audss-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Audio SubSystem clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/exynos-audss-clk.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos4210-audss-clock
+ - samsung,exynos5250-audss-clock
+ - samsung,exynos5410-audss-clock
+ - samsung,exynos5420-audss-clock
+
+ clocks:
+ minItems: 2
+ items:
+ - description:
+ Fixed rate PLL reference clock, parent of mout_audss. "fin_pll" is
+ used if not specified.
+ - description:
+ Input PLL to the AudioSS block, parent of mout_audss. "fout_epll" is
+ used if not specified.
+ - description:
+ Audio bus clock, parent of mout_i2s. "sclk_audio0" is used if not
+ specified.
+ - description:
+ PCM clock, parent of sclk_pcm. "sclk_pcm0" is used if not specified.
+ - description:
+ External i2s clock, parent of mout_i2s. "cdclk0" is used if not
+ specified.
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: pll_ref
+ - const: pll_in
+ - const: sclk_audio
+ - const: sclk_pcm_in
+ - const: cdclk
+
+ "#clock-cells":
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@3810000 {
+ compatible = "samsung,exynos5250-audss-clock";
+ reg = <0x03810000 0x0c>;
+ #clock-cells = <1>;
+ clocks = <&clock 1>, <&clock 7>, <&clock 138>, <&clock 160>, <&ext_i2s_clk>;
+ clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in", "cdclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml
new file mode 100644
index 000000000000..a36781a455b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ All available clocks are defined as preprocessor macros in
+ dt-bindings/clock/ headers.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - samsung,exynos3250-cmu
+ - samsung,exynos3250-cmu-dmc
+ - samsung,exynos3250-cmu-isp
+ - samsung,exynos4210-clock
+ - samsung,exynos4212-clock
+ - samsung,exynos4412-clock
+ - samsung,exynos5250-clock
+ - items:
+ - enum:
+ - samsung,exynos5420-clock
+ - samsung,exynos5800-clock
+ - const: syscon
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5250.h>
+ clock: clock-controller@10010000 {
+ compatible = "samsung,exynos5250-clock";
+ reg = <0x10010000 0x30000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml
new file mode 100644
index 000000000000..c98eff64f2b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-ext-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC external/osc/XXTI/XusbXTI clock
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Samsung SoCs require an external clock supplied through XXTI or XusbXTI pins.
+
+properties:
+ compatible:
+ enum:
+ - samsung,clock-xxti
+ - samsung,clock-xusbxti
+ - samsung,exynos5420-oscclk
+
+ "#clock-cells":
+ const: 0
+
+ clock-frequency: true
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - clock-frequency
+
+additionalProperties: false
+
+examples:
+ - |
+ fixed-rate-clocks {
+ clock {
+ compatible = "samsung,clock-xxti";
+ clock-frequency = <24000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos2200-cmu.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos2200-cmu.yaml
new file mode 100644
index 000000000000..89433e6d3518
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos2200-cmu.yaml
@@ -0,0 +1,247 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos2200-cmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos2200 SoC clock controller
+
+maintainers:
+ - Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ Exynos2200 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clocks in that root tree
+ are two external clocks: XTCXO (76.8 MHz) and RTCCLK (32768 Hz). XTCXO must be
+ defined as a fixed-rate clock in dts, whereas RTCCLK originates from PMIC.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/samsung,exynos2200-cmu.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos2200-cmu-alive
+ - samsung,exynos2200-cmu-cmgp
+ - samsung,exynos2200-cmu-hsi0
+ - samsung,exynos2200-cmu-peric0
+ - samsung,exynos2200-cmu-peric1
+ - samsung,exynos2200-cmu-peric2
+ - samsung,exynos2200-cmu-peris
+ - samsung,exynos2200-cmu-top
+ - samsung,exynos2200-cmu-ufs
+ - samsung,exynos2200-cmu-vts
+
+ clocks:
+ minItems: 1
+ maxItems: 6
+
+ clock-names:
+ minItems: 1
+ maxItems: 6
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - reg
+ - "#clock-cells"
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-alive
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: CMU_ALIVE NOC clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-cmgp
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: CMU_CMGP NOC clock (from CMU_TOP)
+ - description: CMU_CMGP PERI clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: peri
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-hsi0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: External RTC clock (32768 Hz)
+ - description: CMU_HSI0 NOC clock (from CMU_TOP)
+ - description: CMU_HSI0 DPGTC clock (from CMU_TOP)
+ - description: CMU_HSI0 DPOSC clock (from CMU_TOP)
+ - description: CMU_HSI0 USB32DRD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: rtcclk
+ - const: noc
+ - const: dpgtc
+ - const: dposc
+ - const: usb
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos2200-cmu-peric0
+ - samsung,exynos2200-cmu-peric1
+ - samsung,exynos2200-cmu-peric2
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: CMU_PERICn NOC clock (from CMU_TOP)
+ - description: CMU_PERICn IP0 clock (from CMU_TOP)
+ - description: CMU_PERICn IP1 clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: ip0
+ - const: ip1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-peris
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (25.6 MHz)
+ - description: CMU_PERIS NOC clock (from CMU_TOP)
+ - description: CMU_PERIS GIC clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: tcxo_div3
+ - const: noc
+ - const: gic
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-ufs
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: CMU_UFS NOC clock (from CMU_TOP)
+ - description: CMU_UFS MMC clock (from CMU_TOP)
+ - description: CMU_UFS UFS clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: mmc
+ - const: ufs
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos2200-cmu-vts
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (76.8 MHz)
+ - description: CMU_VTS DMIC clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dmic
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/samsung,exynos2200-cmu.h>
+
+ cmu_vts: clock-controller@15300000 {
+ compatible = "samsung,exynos2200-cmu-vts";
+ reg = <0x15300000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&oscclk>,
+ <&cmu_top CLK_DOUT_CMU_VTS_DMIC>;
+ clock-names = "oscclk", "dmic";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml
new file mode 100644
index 000000000000..bee13436d1ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos4412-isp-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos4412 SoC ISP clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Clock controller for Samsung Exynos4412 SoC FIMC-ISP (Camera ISP)
+ All available clocks are defined as preprocessor macros in
+ dt-bindings/clock/ headers.
+
+properties:
+ compatible:
+ const: samsung,exynos4412-isp-clock
+
+ clocks:
+ items:
+ - description: CLK_ACLK200 from the main clock controller
+ - description: CLK_ACLK400_MCUISP from the main clock controller
+
+ clock-names:
+ items:
+ - const: aclk200
+ - const: aclk400_mcuisp
+
+ "#clock-cells":
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - power-domains
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos4.h>
+ clock-controller@10048000 {
+ compatible = "samsung,exynos4412-isp-clock";
+ reg = <0x10048000 0x1000>;
+ #clock-cells = <1>;
+ power-domains = <&pd_isp>;
+ clocks = <&clock CLK_ACLK200>, <&clock CLK_ACLK400_MCUISP>;
+ clock-names = "aclk200", "aclk400_mcuisp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml
new file mode 100644
index 000000000000..b05f83533e3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml
@@ -0,0 +1,382 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos5260-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos5260 SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Expected external clocks, defined in DTS as fixed-rate clocks with a matching
+ name::
+ - "fin_pll" - PLL input clock from XXTI
+ - "xrtcxti" - input clock from XRTCXTI
+ - "ioclk_pcm_extclk" - pcm external operation clock
+ - "ioclk_spdif_extclk" - spdif external operation clock
+ - "ioclk_i2s_cdclk" - i2s0 codec clock
+
+ Phy clocks::
+ There are several clocks which are generated by specific PHYs. These clocks
+ are fed into the clock controller and then routed to the hardware blocks.
+ These clocks are defined as fixed clocks in the driver with following names::
+ - "phyclk_dptx_phy_ch3_txd_clk" - dp phy clock for channel 3
+ - "phyclk_dptx_phy_ch2_txd_clk" - dp phy clock for channel 2
+ - "phyclk_dptx_phy_ch1_txd_clk" - dp phy clock for channel 1
+ - "phyclk_dptx_phy_ch0_txd_clk" - dp phy clock for channel 0
+ - "phyclk_hdmi_phy_tmds_clko" - hdmi phy tmds clock
+ - "phyclk_hdmi_phy_pixel_clko" - hdmi phy pixel clock
+ - "phyclk_hdmi_link_o_tmds_clkhi" - hdmi phy for hdmi link
+ - "phyclk_dptx_phy_o_ref_clk_24m" - dp phy reference clock
+ - "phyclk_dptx_phy_clk_div2"
+ - "phyclk_mipi_dphy_4l_m_rxclkesc0"
+ - "phyclk_usbhost20_phy_phyclock" - usb 2.0 phy clock
+ - "phyclk_usbhost20_phy_freeclk"
+ - "phyclk_usbhost20_phy_clk48mohci"
+ - "phyclk_usbdrd30_udrd30_pipe_pclk"
+ - "phyclk_usbdrd30_udrd30_phyclock" - usb 3.0 phy clock
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/exynos5260-clk.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos5260-clock-top
+ - samsung,exynos5260-clock-peri
+ - samsung,exynos5260-clock-egl
+ - samsung,exynos5260-clock-kfc
+ - samsung,exynos5260-clock-g2d
+ - samsung,exynos5260-clock-mif
+ - samsung,exynos5260-clock-mfc
+ - samsung,exynos5260-clock-g3d
+ - samsung,exynos5260-clock-fsys
+ - samsung,exynos5260-clock-aud
+ - samsung,exynos5260-clock-isp
+ - samsung,exynos5260-clock-gscl
+ - samsung,exynos5260-clock-disp
+
+ clocks:
+ minItems: 1
+ maxItems: 19
+
+ clock-names:
+ minItems: 1
+ maxItems: 19
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-top
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_mem_pll
+ - const: dout_bus_pll
+ - const: dout_media_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-peri
+ then:
+ properties:
+ clocks:
+ minItems: 13
+ maxItems: 13
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: ioclk_pcm_extclk
+ - const: ioclk_i2s_cdclk
+ - const: ioclk_spdif_extclk
+ - const: phyclk_hdmi_phy_ref_cko
+ - const: dout_aclk_peri_66
+ - const: dout_sclk_peri_uart0
+ - const: dout_sclk_peri_uart1
+ - const: dout_sclk_peri_uart2
+ - const: dout_sclk_peri_spi0_b
+ - const: dout_sclk_peri_spi1_b
+ - const: dout_sclk_peri_spi2_b
+ - const: dout_aclk_peri_aud
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-egl
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_bus_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-kfc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_media_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-g2d
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_g2d_333
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-mif
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ clock-names:
+ items:
+ - const: fin_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-mfc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_mfc_333
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-g3d
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ clock-names:
+ items:
+ - const: fin_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-fsys
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: phyclk_usbhost20_phy_phyclock
+ - const: phyclk_usbhost20_phy_freeclk
+ - const: phyclk_usbhost20_phy_clk48mohci
+ - const: phyclk_usbdrd30_udrd30_pipe_pclk
+ - const: phyclk_usbdrd30_udrd30_phyclock
+ - const: dout_aclk_fsys_200
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-aud
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: fout_aud_pll
+ - const: ioclk_i2s_cdclk
+ - const: ioclk_pcm_extclk
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-isp
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_isp1_266
+ - const: dout_aclk_isp1_400
+ - const: mout_aclk_isp1_266
+
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-gscl
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_gscl_400
+ - const: dout_aclk_gscl_333
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5260-clock-disp
+ then:
+ properties:
+ clocks:
+ minItems: 19
+ maxItems: 19
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: phyclk_dptx_phy_ch3_txd_clk
+ - const: phyclk_dptx_phy_ch2_txd_clk
+ - const: phyclk_dptx_phy_ch1_txd_clk
+ - const: phyclk_dptx_phy_ch0_txd_clk
+ - const: phyclk_hdmi_phy_tmds_clko
+ - const: phyclk_hdmi_phy_ref_clko
+ - const: phyclk_hdmi_phy_pixel_clko
+ - const: phyclk_hdmi_link_o_tmds_clkhi
+ - const: phyclk_mipi_dphy_4l_m_txbyte_clkhs
+ - const: phyclk_dptx_phy_o_ref_clk_24m
+ - const: phyclk_dptx_phy_clk_div2
+ - const: phyclk_mipi_dphy_4l_m_rxclkesc0
+ - const: phyclk_hdmi_phy_ref_cko
+ - const: ioclk_spdif_extclk
+ - const: dout_aclk_peri_aud
+ - const: dout_aclk_disp_222
+ - const: dout_sclk_disp_pixel
+ - const: dout_aclk_disp_333
+ required:
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5260-clk.h>
+
+ fin_pll: clock {
+ compatible = "fixed-clock";
+ clock-output-names = "fin_pll";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ };
+
+ clock-controller@10010000 {
+ compatible = "samsung,exynos5260-clock-top";
+ reg = <0x10010000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&fin_pll>,
+ <&clock_mif MIF_DOUT_MEM_PLL>,
+ <&clock_mif MIF_DOUT_BUS_PLL>,
+ <&clock_mif MIF_DOUT_MEDIA_PLL>;
+ clock-names = "fin_pll",
+ "dout_mem_pll",
+ "dout_bus_pll",
+ "dout_media_pll";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml
new file mode 100644
index 000000000000..b737c9d35a1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos5410-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos5410 SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Expected external clocks, defined in DTS as fixed-rate clocks with a matching
+ name::
+ - "fin_pll" - PLL input clock from XXTI
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/exynos5410.h header.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - samsung,exynos5410-clock
+
+ clocks:
+ description:
+ Should contain an entry specifying the root clock from external
+ oscillator supplied through XXTI or XusbXTI pin. This clock should be
+ defined using standard clock bindings with "fin_pll" clock-output-name.
+ That clock is being passed internally to the 9 PLLs.
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5410.h>
+
+ fin_pll: osc-clock {
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ clock-output-names = "fin_pll";
+ #clock-cells = <0>;
+ };
+
+ clock-controller@10010000 {
+ compatible = "samsung,exynos5410-clock";
+ reg = <0x10010000 0x30000>;
+ #clock-cells = <1>;
+ clocks = <&fin_pll>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml
new file mode 100644
index 000000000000..3f9326e09f79
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml
@@ -0,0 +1,524 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos5433-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos5433 SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Expected external clocks, defined in DTS as fixed-rate clocks with a matching
+ name::
+ - "oscclk" - PLL input clock from XXTI
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/exynos5433.h header.
+
+properties:
+ compatible:
+ enum:
+ # CMU_TOP which generates clocks for
+ # IMEM/FSYS/G3D/GSCL/HEVC/MSCL/G2D/MFC/PERIC/PERIS domains and bus
+ # clocks
+ - samsung,exynos5433-cmu-top
+ # CMU_CPIF which generates clocks for LLI (Low Latency Interface) IP
+ - samsung,exynos5433-cmu-cpif
+ # CMU_MIF which generates clocks for DRAM Memory Controller domain
+ - samsung,exynos5433-cmu-mif
+ # CMU_PERIC which generates clocks for
+ # UART/I2C/SPI/I2S/PCM/SPDIF/PWM/SLIMBUS IPs
+ - samsung,exynos5433-cmu-peric
+ # CMU_PERIS which generates clocks for PMU/TMU/MCT/WDT/RTC/SECKEY/TZPC IPs
+ - samsung,exynos5433-cmu-peris
+ # CMU_FSYS which generates clocks for USB/UFS/SDMMC/TSI/PDMA IPs
+ - samsung,exynos5433-cmu-fsys
+ - samsung,exynos5433-cmu-g2d
+ # CMU_DISP which generates clocks for Display (DECON/HDMI/DSIM/MIXER) IPs
+ - samsung,exynos5433-cmu-disp
+ - samsung,exynos5433-cmu-aud
+ - samsung,exynos5433-cmu-bus0
+ - samsung,exynos5433-cmu-bus1
+ - samsung,exynos5433-cmu-bus2
+ - samsung,exynos5433-cmu-g3d
+ - samsung,exynos5433-cmu-gscl
+ - samsung,exynos5433-cmu-apollo
+ # CMU_ATLAS which generates clocks for Cortex-A57 Quad-core processor,
+ # CoreSight and L2 cache controller
+ - samsung,exynos5433-cmu-atlas
+ # CMU_MSCL which generates clocks for M2M (Memory to Memory) scaler and
+ # JPEG IPs
+ - samsung,exynos5433-cmu-mscl
+ - samsung,exynos5433-cmu-mfc
+ - samsung,exynos5433-cmu-hevc
+ # CMU_ISP which generates clocks for FIMC-ISP/DRC/SCLC/DIS/3DNR IPs
+ - samsung,exynos5433-cmu-isp
+ # CMU_CAM0 which generates clocks for
+ # MIPI_CSIS{0|1}/FIMC_LITE_{A|B|D}/FIMC_3AA{0|1} IPs
+ - samsung,exynos5433-cmu-cam0
+ # CMU_CAM1 which generates clocks for
+ # Cortex-A5/MIPI_CSIS2/FIMC-LITE_C/FIMC-FD IPs
+ - samsung,exynos5433-cmu-cam1
+ # CMU_IMEM which generates clocks for SSS (Security SubSystem) and
+ # SlimSSS IPs
+ - samsung,exynos5433-cmu-imem
+
+ clocks:
+ minItems: 1
+ maxItems: 10
+
+ clock-names:
+ minItems: 1
+ maxItems: 10
+
+ "#clock-cells":
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-top
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_mphy_pll
+ - const: sclk_mfc_pll
+ - const: sclk_bus_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-cpif
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ clock-names:
+ items:
+ - const: oscclk
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-mif
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_mphy_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-fsys
+ then:
+ properties:
+ clocks:
+ minItems: 10
+ maxItems: 10
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_ufs_mphy
+ - const: aclk_fsys_200
+ - const: sclk_pcie_100_fsys
+ - const: sclk_ufsunipro_fsys
+ - const: sclk_mmc2_fsys
+ - const: sclk_mmc1_fsys
+ - const: sclk_mmc0_fsys
+ - const: sclk_usbhost30_fsys
+ - const: sclk_usbdrd30_fsys
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-g2d
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_g2d_266
+ - const: aclk_g2d_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-disp
+ then:
+ properties:
+ clocks:
+ minItems: 9
+ maxItems: 9
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_dsim1_disp
+ - const: sclk_dsim0_disp
+ - const: sclk_dsd_disp
+ - const: sclk_decon_tv_eclk_disp
+ - const: sclk_decon_vclk_disp
+ - const: sclk_decon_eclk_disp
+ - const: sclk_decon_tv_vclk_disp
+ - const: aclk_disp_333
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-aud
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: fout_aud_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-bus0
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ clock-names:
+ items:
+ - const: aclk_bus0_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-bus1
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ clock-names:
+ items:
+ - const: aclk_bus1_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-bus2
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_bus2_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-g3d
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_g3d_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-gscl
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_gscl_111
+ - const: aclk_gscl_333
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-apollo
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_bus_pll_apollo
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-atlas
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_bus_pll_atlas
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-mscl
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_jpeg_mscl
+ - const: aclk_mscl_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-mfc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_mfc_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-hevc
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_hevc_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-isp
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_isp_dis_400
+ - const: aclk_isp_400
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-cam0
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_cam0_333
+ - const: aclk_cam0_400
+ - const: aclk_cam0_552
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-cam1
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: oscclk
+ - const: sclk_isp_uart_cam1
+ - const: sclk_isp_spi1_cam1
+ - const: sclk_isp_spi0_cam1
+ - const: aclk_cam1_333
+ - const: aclk_cam1_400
+ - const: aclk_cam1_552
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-cmu-imem
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: oscclk
+ - const: aclk_imem_sssx_266
+ - const: aclk_imem_266
+ - const: aclk_imem_200
+ required:
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5433.h>
+ xxti: clock {
+ compatible = "fixed-clock";
+ clock-output-names = "oscclk";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ };
+
+ clock-controller@10030000 {
+ compatible = "samsung,exynos5433-cmu-top";
+ reg = <0x10030000 0x1000>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_mphy_pll",
+ "sclk_mfc_pll",
+ "sclk_bus_pll";
+ clocks = <&xxti>,
+ <&cmu_cpif CLK_SCLK_MPHY_PLL>,
+ <&cmu_mif CLK_SCLK_MFC_PLL>,
+ <&cmu_mif CLK_SCLK_BUS_PLL>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml
new file mode 100644
index 000000000000..c137c6744ef9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos7-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos7 SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Expected external clocks, defined in DTS as fixed-rate clocks with a matching
+ name::
+ - "fin_pll" - PLL input clock from XXTI
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/exynos7-clk.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos7-clock-topc
+ - samsung,exynos7-clock-top0
+ - samsung,exynos7-clock-top1
+ - samsung,exynos7-clock-ccore
+ - samsung,exynos7-clock-peric0
+ - samsung,exynos7-clock-peric1
+ - samsung,exynos7-clock-peris
+ - samsung,exynos7-clock-fsys0
+ - samsung,exynos7-clock-fsys1
+ - samsung,exynos7-clock-mscl
+ - samsung,exynos7-clock-aud
+
+ clocks:
+ minItems: 1
+ maxItems: 13
+
+ clock-names:
+ minItems: 1
+ maxItems: 13
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-top0
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_sclk_bus0_pll
+ - const: dout_sclk_bus1_pll
+ - const: dout_sclk_cc_pll
+ - const: dout_sclk_mfc_pll
+ - const: dout_sclk_aud_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-top1
+ then:
+ properties:
+ clocks:
+ minItems: 5
+ maxItems: 5
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_sclk_bus0_pll
+ - const: dout_sclk_bus1_pll
+ - const: dout_sclk_cc_pll
+ - const: dout_sclk_mfc_pll
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-ccore
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_ccore_133
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-peric0
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_peric0_66
+ - const: sclk_uart0
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-peric1
+ then:
+ properties:
+ clocks:
+ minItems: 13
+ maxItems: 13
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_peric1_66
+ - const: sclk_uart1
+ - const: sclk_uart2
+ - const: sclk_uart3
+ - const: sclk_spi0
+ - const: sclk_spi1
+ - const: sclk_spi2
+ - const: sclk_spi3
+ - const: sclk_spi4
+ - const: sclk_i2s1
+ - const: sclk_pcm1
+ - const: sclk_spdif
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-peris
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_peris_66
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-fsys0
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_fsys0_200
+ - const: dout_sclk_mmc2
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-fsys1
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_aclk_fsys1_200
+ - const: dout_sclk_mmc0
+ - const: dout_sclk_mmc1
+ - const: dout_sclk_ufsunipro20
+ - const: dout_sclk_phy_fsys1
+ - const: dout_sclk_phy_fsys1_26m
+ required:
+ - clock-names
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7-clock-aud
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: fout_aud_pll
+ required:
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos7-clk.h>
+
+ fin_pll: clock {
+ compatible = "fixed-clock";
+ clock-output-names = "fin_pll";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ };
+
+ clock-controller@105e0000 {
+ compatible = "samsung,exynos7-clock-top1";
+ reg = <0x105e0000 0xb000>;
+ #clock-cells = <1>;
+ clocks = <&fin_pll>,
+ <&clock_topc DOUT_SCLK_BUS0_PLL>,
+ <&clock_topc DOUT_SCLK_BUS1_PLL>,
+ <&clock_topc DOUT_SCLK_CC_PLL>,
+ <&clock_topc DOUT_SCLK_MFC_PLL>;
+ clock-names = "fin_pll",
+ "dout_sclk_bus0_pll",
+ "dout_sclk_bus1_pll",
+ "dout_sclk_cc_pll",
+ "dout_sclk_mfc_pll";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos7870-cmu.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos7870-cmu.yaml
new file mode 100644
index 000000000000..3c58712f12b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos7870-cmu.yaml
@@ -0,0 +1,227 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos7870-cmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos7870 SoC clock controller
+
+maintainers:
+ - Kaustabh Chakraborty <kauschluss@disroot.org>
+
+description: |
+ Exynos7870 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clock in that root tree
+ is an external clock: OSCCLK (26 MHz). This external clock must be defined
+ as a fixed-rate clock in dts.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ include/dt-bindings/clock/samsung,exynos7870-cmu.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos7870-cmu-mif
+ - samsung,exynos7870-cmu-dispaud
+ - samsung,exynos7870-cmu-fsys
+ - samsung,exynos7870-cmu-g3d
+ - samsung,exynos7870-cmu-isp
+ - samsung,exynos7870-cmu-mfcmscl
+ - samsung,exynos7870-cmu-peri
+
+ clocks:
+ minItems: 1
+ maxItems: 10
+
+ clock-names:
+ minItems: 1
+ maxItems: 10
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-mif
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-dispaud
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_DISPAUD bus clock (from CMU_MIF)
+ - description: DECON external clock (from CMU_MIF)
+ - description: DECON vertical clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: decon_eclk
+ - const: decon_vclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-fsys
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS bus clock (from CMU_MIF)
+ - description: USB20DRD clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: usb20drd
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-g3d
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: G3D switch clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: switch
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-isp
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: ISP camera clock (from CMU_MIF)
+ - description: ISP clock (from CMU_MIF)
+ - description: ISP VRA clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: cam
+ - const: isp
+ - const: vra
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-mfcmscl
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: MSCL clock (from CMU_MIF)
+ - description: MFC clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: mfc
+ - const: mscl
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-cmu-peri
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERI bus clock (from CMU_MIF)
+ - description: SPI0 clock (from CMU_MIF)
+ - description: SPI1 clock (from CMU_MIF)
+ - description: SPI2 clock (from CMU_MIF)
+ - description: SPI3 clock (from CMU_MIF)
+ - description: SPI4 clock (from CMU_MIF)
+ - description: UART0 clock (from CMU_MIF)
+ - description: UART1 clock (from CMU_MIF)
+ - description: UART2 clock (from CMU_MIF)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: spi0
+ - const: spi1
+ - const: spi2
+ - const: spi3
+ - const: spi4
+ - const: uart0
+ - const: uart1
+ - const: uart2
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/samsung,exynos7870-cmu.h>
+
+ cmu_peri: clock-controller@101f0000 {
+ compatible = "samsung,exynos7870-cmu-peri";
+ reg = <0x101f0000 0x1000>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "bus", "spi0", "spi1", "spi2",
+ "spi3", "spi4", "uart0", "uart1", "uart2";
+ clocks = <&oscclk>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_BUS>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_SPI0>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_SPI1>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_SPI2>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_SPI3>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_SPI4>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_UART0>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_UART1>,
+ <&cmu_mif CLK_GOUT_MIF_CMU_PERI_UART2>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml
new file mode 100644
index 000000000000..006d33a9e0f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos7885-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos7885 SoC clock controller
+
+maintainers:
+ - Dávid Virág <virag.david003@gmail.com>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Exynos7885 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clock in that root tree
+ is an external clock: OSCCLK (26 MHz). This external clock must be defined
+ as a fixed-rate clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other leaf clocks (other CMUs) are usually derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'dt-bindings/clock/exynos7885.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos7885-cmu-top
+ - samsung,exynos7885-cmu-core
+ - samsung,exynos7885-cmu-fsys
+ - samsung,exynos7885-cmu-peri
+
+ clocks:
+ minItems: 1
+ maxItems: 10
+
+ clock-names:
+ minItems: 1
+ maxItems: 10
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7885-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7885-cmu-core
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_CORE bus clock (from CMU_TOP)
+ - description: CCI clock (from CMU_TOP)
+ - description: G3D clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_core_bus
+ - const: dout_core_cci
+ - const: dout_core_g3d
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7885-cmu-fsys
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS bus clock (from CMU_TOP)
+ - description: MMC_CARD clock (from CMU_TOP)
+ - description: MMC_EMBD clock (from CMU_TOP)
+ - description: MMC_SDIO clock (from CMU_TOP)
+ - description: USB30DRD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_fsys_bus
+ - const: dout_fsys_mmc_card
+ - const: dout_fsys_mmc_embd
+ - const: dout_fsys_mmc_sdio
+ - const: dout_fsys_usb30drd
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7885-cmu-peri
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERI bus clock (from CMU_TOP)
+ - description: SPI0 clock (from CMU_TOP)
+ - description: SPI1 clock (from CMU_TOP)
+ - description: UART0 clock (from CMU_TOP)
+ - description: UART1 clock (from CMU_TOP)
+ - description: UART2 clock (from CMU_TOP)
+ - description: USI0 clock (from CMU_TOP)
+ - description: USI1 clock (from CMU_TOP)
+ - description: USI2 clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_peri_bus
+ - const: dout_peri_spi0
+ - const: dout_peri_spi1
+ - const: dout_peri_uart0
+ - const: dout_peri_uart1
+ - const: dout_peri_uart2
+ - const: dout_peri_usi0
+ - const: dout_peri_usi1
+ - const: dout_peri_usi2
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_PERI
+ - |
+ #include <dt-bindings/clock/exynos7885.h>
+
+ cmu_peri: clock-controller@10010000 {
+ compatible = "samsung,exynos7885-cmu-peri";
+ reg = <0x10010000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&oscclk>,
+ <&cmu_top CLK_DOUT_PERI_BUS>,
+ <&cmu_top CLK_DOUT_PERI_SPI0>,
+ <&cmu_top CLK_DOUT_PERI_SPI1>,
+ <&cmu_top CLK_DOUT_PERI_UART0>,
+ <&cmu_top CLK_DOUT_PERI_UART1>,
+ <&cmu_top CLK_DOUT_PERI_UART2>,
+ <&cmu_top CLK_DOUT_PERI_USI0>,
+ <&cmu_top CLK_DOUT_PERI_USI1>,
+ <&cmu_top CLK_DOUT_PERI_USI2>;
+ clock-names = "oscclk",
+ "dout_peri_bus",
+ "dout_peri_spi0",
+ "dout_peri_spi1",
+ "dout_peri_uart0",
+ "dout_peri_uart1",
+ "dout_peri_uart2",
+ "dout_peri_usi0",
+ "dout_peri_usi1",
+ "dout_peri_usi2";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml
new file mode 100644
index 000000000000..cdc5ded59fe5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml
@@ -0,0 +1,353 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos850-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos850 SoC clock controller
+
+maintainers:
+ - Sam Protsenko <semen.protsenko@linaro.org>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Exynos850 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. Root clocks in that clock tree are
+ two external clocks:: OSCCLK (26 MHz) and RTCCLK (32768 Hz). Those external
+ clocks must be defined as fixed-rate clocks in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other leaf clocks (other CMUs) are usually derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'dt-bindings/clock/exynos850.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos850-cmu-top
+ - samsung,exynos850-cmu-apm
+ - samsung,exynos850-cmu-aud
+ - samsung,exynos850-cmu-cmgp
+ - samsung,exynos850-cmu-core
+ - samsung,exynos850-cmu-cpucl0
+ - samsung,exynos850-cmu-cpucl1
+ - samsung,exynos850-cmu-dpu
+ - samsung,exynos850-cmu-g3d
+ - samsung,exynos850-cmu-hsi
+ - samsung,exynos850-cmu-is
+ - samsung,exynos850-cmu-mfcmscl
+ - samsung,exynos850-cmu-peri
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-apm
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_APM bus clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_apm_bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-aud
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: AUD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_aud
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-cmgp
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_CMGP bus clock (from CMU_APM)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: gout_clkcmu_cmgp_bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-core
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_CORE bus clock (from CMU_TOP)
+ - description: CCI clock (from CMU_TOP)
+ - description: eMMC clock (from CMU_TOP)
+ - description: SSS clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_core_bus
+ - const: dout_core_cci
+ - const: dout_core_mmc_embd
+ - const: dout_core_sss
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-cpucl0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CPUCL0 switch clock (from CMU_TOP)
+ - description: CPUCL0 debug clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_cpucl0_switch
+ - const: dout_cpucl0_dbg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-cpucl1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CPUCL1 switch clock (from CMU_TOP)
+ - description: CPUCL1 debug clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_cpucl1_switch
+ - const: dout_cpucl1_dbg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-dpu
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: DPU clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_dpu
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-g3d
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: G3D clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_g3d_switch
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-hsi
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: External RTC clock (32768 Hz)
+ - description: CMU_HSI bus clock (from CMU_TOP)
+ - description: SD card clock (from CMU_TOP)
+ - description: USB 2.0 DRD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: rtcclk
+ - const: dout_hsi_bus
+ - const: dout_hsi_mmc_card
+ - const: dout_hsi_usb20drd
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-is
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_IS bus clock (from CMU_TOP)
+ - description: Image Texture Processing core clock (from CMU_TOP)
+ - description: Visual Recognition Accelerator clock (from CMU_TOP)
+ - description: Geometric Distortion Correction clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_is_bus
+ - const: dout_is_itp
+ - const: dout_is_vra
+ - const: dout_is_gdc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-mfcmscl
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: Multi-Format Codec clock (from CMU_TOP)
+ - description: Memory to Memory Scaler clock (from CMU_TOP)
+ - description: Multi-Channel Scaler clock (from CMU_TOP)
+ - description: JPEG codec clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_mfcmscl_mfc
+ - const: dout_mfcmscl_m2m
+ - const: dout_mfcmscl_mcsc
+ - const: dout_mfcmscl_jpeg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos850-cmu-peri
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERI bus clock (from CMU_TOP)
+ - description: UART clock (from CMU_TOP)
+ - description: Parent clock for HSI2C and SPI (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_peri_bus
+ - const: dout_peri_uart
+ - const: dout_peri_ip
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_PERI
+ - |
+ #include <dt-bindings/clock/exynos850.h>
+
+ cmu_peri: clock-controller@10030000 {
+ compatible = "samsung,exynos850-cmu-peri";
+ reg = <0x10030000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&oscclk>, <&cmu_top CLK_DOUT_PERI_BUS>,
+ <&cmu_top CLK_DOUT_PERI_UART>,
+ <&cmu_top CLK_DOUT_PERI_IP>;
+ clock-names = "oscclk", "dout_peri_bus",
+ "dout_peri_uart", "dout_peri_ip";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos8895-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos8895-clock.yaml
new file mode 100644
index 000000000000..111de33ce00b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos8895-clock.yaml
@@ -0,0 +1,239 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos8895-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos8895 SoC clock controller
+
+maintainers:
+ - Ivaylo Ivanov <ivo.ivanov.ivanov1@gmail.com>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ Exynos8895 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clock in that root tree
+ is an external clock: OSCCLK (26 MHz). This external clock must be defined
+ as a fixed-rate clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/samsung,exynos8895.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos8895-cmu-fsys0
+ - samsung,exynos8895-cmu-fsys1
+ - samsung,exynos8895-cmu-peric0
+ - samsung,exynos8895-cmu-peric1
+ - samsung,exynos8895-cmu-peris
+ - samsung,exynos8895-cmu-top
+
+ clocks:
+ minItems: 1
+ maxItems: 16
+
+ clock-names:
+ minItems: 1
+ maxItems: 16
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - reg
+ - "#clock-cells"
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-fsys0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS0 BUS clock (from CMU_TOP)
+ - description: CMU_FSYS0 DPGTC clock (from CMU_TOP)
+ - description: CMU_FSYS0 MMC_EMBD clock (from CMU_TOP)
+ - description: CMU_FSYS0 UFS_EMBD clock (from CMU_TOP)
+ - description: CMU_FSYS0 USBDRD30 clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: dpgtc
+ - const: mmc
+ - const: ufs
+ - const: usbdrd30
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-fsys1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS1 BUS clock (from CMU_TOP)
+ - description: CMU_FSYS1 PCIE clock (from CMU_TOP)
+ - description: CMU_FSYS1 UFS_CARD clock (from CMU_TOP)
+ - description: CMU_FSYS1 MMC_CARD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: pcie
+ - const: ufs
+ - const: mmc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-peric0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIC0 BUS clock (from CMU_TOP)
+ - description: CMU_PERIC0 UART_DBG clock (from CMU_TOP)
+ - description: CMU_PERIC0 USI00 clock (from CMU_TOP)
+ - description: CMU_PERIC0 USI01 clock (from CMU_TOP)
+ - description: CMU_PERIC0 USI02 clock (from CMU_TOP)
+ - description: CMU_PERIC0 USI03 clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: uart
+ - const: usi0
+ - const: usi1
+ - const: usi2
+ - const: usi3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-peric1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIC1 BUS clock (from CMU_TOP)
+ - description: CMU_PERIC1 SPEEDY2 clock (from CMU_TOP)
+ - description: CMU_PERIC1 SPI_CAM0 clock (from CMU_TOP)
+ - description: CMU_PERIC1 SPI_CAM1 clock (from CMU_TOP)
+ - description: CMU_PERIC1 UART_BT clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI04 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI05 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI06 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI07 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI08 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI09 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI10 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI11 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI12 clock (from CMU_TOP)
+ - description: CMU_PERIC1 USI13 clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: speedy
+ - const: cam0
+ - const: cam1
+ - const: uart
+ - const: usi4
+ - const: usi5
+ - const: usi6
+ - const: usi7
+ - const: usi8
+ - const: usi9
+ - const: usi10
+ - const: usi11
+ - const: usi12
+ - const: usi13
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-peris
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIS BUS clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos8895-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/samsung,exynos8895.h>
+
+ cmu_fsys1: clock-controller@11400000 {
+ compatible = "samsung,exynos8895-cmu-fsys1";
+ reg = <0x11400000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&oscclk>,
+ <&cmu_top CLK_DOUT_CMU_FSYS1_BUS>,
+ <&cmu_top CLK_DOUT_CMU_FSYS1_PCIE>,
+ <&cmu_top CLK_DOUT_CMU_FSYS1_UFS_CARD>,
+ <&cmu_top CLK_DOUT_CMU_FSYS1_MMC_CARD>;
+ clock-names = "oscclk", "bus", "pcie", "ufs", "mmc";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml
new file mode 100644
index 000000000000..5cd2d80b8ed6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos990-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos990 SoC clock controller
+
+maintainers:
+ - Igor Belwon <igor.belwon@mentallysanemainliners.org>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ Exynos990 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. The root clock in that root tree
+ is an external clock: OSCCLK (26 MHz). This external clock must be defined
+ as a fixed-rate clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/samsung,exynos990.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos990-cmu-peric1
+ - samsung,exynos990-cmu-peric0
+ - samsung,exynos990-cmu-hsi0
+ - samsung,exynos990-cmu-peris
+ - samsung,exynos990-cmu-top
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos990-cmu-peric1
+ - samsung,exynos990-cmu-peric0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: Connectivity Peripheral 0/1 bus clock (from CMU_TOP)
+ - description: Connectivity Peripheral 0/1 IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: ip
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos990-cmu-hsi0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_HSI0 BUS clock (from CMU_TOP)
+ - description: CMU_HSI0 USB31DRD clock (from CMU_TOP)
+ - description: CMU_HSI0 USBDP_DEBUG clock (from CMU_TOP)
+ - description: CMU_HSI0 DPGTC clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+ - const: usb31drd
+ - const: usbdp_debug
+ - const: dpgtc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos990-cmu-peris
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIS BUS clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos990-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/samsung,exynos990.h>
+
+ cmu_hsi0: clock-controller@10a00000 {
+ compatible = "samsung,exynos990-cmu-hsi0";
+ reg = <0x10a00000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&oscclk>,
+ <&cmu_top CLK_DOUT_CMU_HSI0_BUS>,
+ <&cmu_top CLK_DOUT_CMU_HSI0_USB31DRD>,
+ <&cmu_top CLK_DOUT_CMU_HSI0_USBDP_DEBUG>,
+ <&cmu_top CLK_DOUT_CMU_HSI0_DPGTC>;
+ clock-names = "oscclk",
+ "bus",
+ "usb31drd",
+ "usbdp_debug",
+ "dpgtc";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
new file mode 100644
index 000000000000..32f39e543b36
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
@@ -0,0 +1,282 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynosautov9-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos Auto v9 SoC clock controller
+
+maintainers:
+ - Chanho Park <chanho61.park@samsung.com>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Exynos Auto v9 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. Root clocks in that clock tree are
+ two external clocks:: OSCCLK/XTCXO (26 MHz) and RTCCLK/XrtcXTI (32768 Hz).
+ The external OSCCLK must be defined as fixed-rate clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/samsung,exynosautov9.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynosautov9-cmu-top
+ - samsung,exynosautov9-cmu-busmc
+ - samsung,exynosautov9-cmu-core
+ - samsung,exynosautov9-cmu-dpum
+ - samsung,exynosautov9-cmu-fsys0
+ - samsung,exynosautov9-cmu-fsys1
+ - samsung,exynosautov9-cmu-fsys2
+ - samsung,exynosautov9-cmu-peric0
+ - samsung,exynosautov9-cmu-peric1
+ - samsung,exynosautov9-cmu-peris
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-busmc
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_BUSMC bus clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_busmc_bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-core
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_CORE bus clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_core_bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-dpum
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: DPU Main bus clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: bus
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-fsys0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS0 bus clock (from CMU_TOP)
+ - description: CMU_FSYS0 pcie clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_fsys0_bus
+ - const: dout_clkcmu_fsys0_pcie
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-fsys1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS1 bus clock (from CMU_TOP)
+ - description: CMU_FSYS1 mmc card clock (from CMU_TOP)
+ - description: CMU_FSYS1 usb clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_fsys1_bus
+ - const: gout_clkcmu_fsys1_mmc_card
+ - const: dout_clkcmu_fsys1_usbdrd
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-fsys2
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_FSYS2 bus clock (from CMU_TOP)
+ - description: UFS clock (from CMU_TOP)
+ - description: Ethernet clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_fsys2_bus
+ - const: dout_fsys2_clkcmu_ufs_embd
+ - const: dout_fsys2_clkcmu_ethernet
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-peric0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIC0 bus clock (from CMU_TOP)
+ - description: PERIC0 IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_peric0_bus
+ - const: dout_clkcmu_peric0_ip
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-peric1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIC1 bus clock (from CMU_TOP)
+ - description: PERIC1 IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_peric1_bus
+ - const: dout_clkcmu_peric1_ip
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov9-cmu-peris
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (26 MHz)
+ - description: CMU_PERIS bus clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: dout_clkcmu_peris_bus
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_FSYS2
+ - |
+ #include <dt-bindings/clock/samsung,exynosautov9.h>
+
+ cmu_fsys2: clock-controller@17c00000 {
+ compatible = "samsung,exynosautov9-cmu-fsys2";
+ reg = <0x17c00000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&xtcxo>,
+ <&cmu_top DOUT_CLKCMU_FSYS2_BUS>,
+ <&cmu_top DOUT_CLKCMU_FSYS2_UFS_EMBD>,
+ <&cmu_top DOUT_CLKCMU_FSYS2_ETHERNET>;
+ clock-names = "oscclk",
+ "dout_clkcmu_fsys2_bus",
+ "dout_fsys2_clkcmu_ufs_embd",
+ "dout_fsys2_clkcmu_ethernet";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov920-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov920-clock.yaml
new file mode 100644
index 000000000000..72f59db73f76
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov920-clock.yaml
@@ -0,0 +1,256 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynosautov920-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung ExynosAuto v920 SoC clock controller
+
+maintainers:
+ - Sunyeal Hong <sunyeal.hong@samsung.com>
+ - Shin Son <shin.son@samsung.com>
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+
+description: |
+ ExynosAuto v920 clock controller is comprised of several CMU units, generating
+ clocks for different domains. Those CMU units are modeled as separate device
+ tree nodes, and might depend on each other. Root clocks in that clock tree are
+ two external clocks:: OSCCLK/XTCXO (38.4 MHz) and RTCCLK/XrtcXTI (32768 Hz).
+ The external OSCCLK must be defined as fixed-rate clock in dts.
+
+ CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and
+ dividers; all other clocks of function blocks (other CMUs) are usually
+ derived from CMU_TOP.
+
+ Each clock is assigned an identifier and client nodes can use this identifier
+ to specify the clock which they consume. All clocks available for usage
+ in clock consumer nodes are defined as preprocessor macros in
+ 'include/dt-bindings/clock/samsung,exynosautov920.h' header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynosautov920-cmu-cpucl0
+ - samsung,exynosautov920-cmu-cpucl1
+ - samsung,exynosautov920-cmu-cpucl2
+ - samsung,exynosautov920-cmu-hsi0
+ - samsung,exynosautov920-cmu-hsi1
+ - samsung,exynosautov920-cmu-hsi2
+ - samsung,exynosautov920-cmu-misc
+ - samsung,exynosautov920-cmu-peric0
+ - samsung,exynosautov920-cmu-peric1
+ - samsung,exynosautov920-cmu-top
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov920-cmu-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+
+ clock-names:
+ items:
+ - const: oscclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynosautov920-cmu-cpucl0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_CPUCL0 SWITCH clock (from CMU_TOP)
+ - description: CMU_CPUCL0 CLUSTER clock (from CMU_TOP)
+ - description: CMU_CPUCL0 DBG clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: switch
+ - const: cluster
+ - const: dbg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynosautov920-cmu-cpucl1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_CPUCL1 SWITCH clock (from CMU_TOP)
+ - description: CMU_CPUCL1 CLUSTER clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: switch
+ - const: cluster
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynosautov920-cmu-cpucl2
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_CPUCL2 SWITCH clock (from CMU_TOP)
+ - description: CMU_CPUCL2 CLUSTER clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: switch
+ - const: cluster
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynosautov920-cmu-peric0
+ - samsung,exynosautov920-cmu-peric1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_PERICn NOC clock (from CMU_TOP)
+ - description: CMU_PERICn IP clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: ip
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - samsung,exynosautov920-cmu-misc
+ - samsung,exynosautov920-cmu-hsi0
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_MISC/CMU_HSI0 NOC clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov920-cmu-hsi1
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_HSI1 NOC clock (from CMU_TOP)
+ - description: CMU_HSI1 USBDRD clock (from CMU_TOP)
+ - description: CMU_HSI1 MMC_CARD clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: usbdrd
+ - const: mmc_card
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynosautov920-cmu-hsi2
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (38.4 MHz)
+ - description: CMU_HSI2 NOC clock (from CMU_TOP)
+ - description: CMU_HSI2 NOC UFS clock (from CMU_TOP)
+ - description: CMU_HSI2 UFS EMBD clock (from CMU_TOP)
+ - description: CMU_HSI2 ETHERNET clock (from CMU_TOP)
+
+ clock-names:
+ items:
+ - const: oscclk
+ - const: noc
+ - const: ufs
+ - const: embd
+ - const: ethernet
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_PERIC0
+ - |
+ #include <dt-bindings/clock/samsung,exynosautov920.h>
+
+ cmu_peric0: clock-controller@10800000 {
+ compatible = "samsung,exynosautov920-cmu-peric0";
+ reg = <0x10800000 0x8000>;
+ #clock-cells = <1>;
+
+ clocks = <&xtcxo>,
+ <&cmu_top DOUT_CLKCMU_PERIC0_NOC>,
+ <&cmu_top DOUT_CLKCMU_PERIC0_IP>;
+ clock-names = "oscclk",
+ "noc",
+ "ip";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt b/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt
deleted file mode 100644
index 2726c1d58a79..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Binding for Samsung S2M and S5M family clock generator block
-============================================================
-
-This is a part of device tree bindings for S2M and S5M family multi-function
-devices.
-More information can be found in bindings/mfd/sec-core.txt file.
-
-The S2MPS11/13/15 and S5M8767 provide three(AP/CP/BT) buffered 32.768 kHz
-outputs. The S2MPS14 provides two (AP/BT) buffered 32.768 KHz outputs.
-
-To register these as clocks with common clock framework instantiate under
-main device node a sub-node named "clocks".
-
-It uses the common clock binding documented in:
- - Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-
-Required properties of the "clocks" sub-node:
- - #clock-cells: should be 1.
- - compatible: Should be one of: "samsung,s2mps11-clk", "samsung,s2mps13-clk",
- "samsung,s2mps14-clk", "samsung,s5m8767-clk"
- The S2MPS15 uses the same compatible as S2MPS13, as both provides similar
- clocks.
-
-
-Each clock is assigned an identifier and client nodes use this identifier
-to specify the clock which they consume.
- Clock ID Devices
- ----------------------------------------------------------
- 32KhzAP 0 S2MPS11/13/14/15, S5M8767
- 32KhzCP 1 S2MPS11/13/15, S5M8767
- 32KhzBT 2 S2MPS11/13/14/15, S5M8767
-
-Include dt-bindings/clock/samsung,s2mps11.h file to use preprocessor defines
-in device tree sources.
-
-
-Example:
-
- s2mps11_pmic@66 {
- compatible = "samsung,s2mps11-pmic";
- reg = <0x66>;
-
- s2m_osc: clocks {
- compatible = "samsung,s2mps11-clk";
- #clock-cells = <1>;
- clock-output-names = "xx", "yy", "zz";
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml b/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml
new file mode 100644
index 000000000000..91d455155a60
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,s2mps11.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S2M and S5M family clock generator block
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ This is a part of device tree bindings for S2M and S5M family of Power
+ Management IC (PMIC).
+
+ The S2MPS11/13/15 and S5M8767 provide three(AP/CP/BT) buffered 32.768 kHz
+ outputs. The S2MPS14 provides two (AP/BT) buffered 32.768 KHz outputs.
+
+ All available clocks are defined as preprocessor macros in
+ dt-bindings/clock/samsung,s2mps11.h header.
+
+ See also Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml for
+ additional information and example.
+
+properties:
+ compatible:
+ enum:
+ - samsung,s2mpg10-clk
+ - samsung,s2mps11-clk
+ - samsung,s2mps13-clk # S2MPS13 and S2MPS15
+ - samsung,s2mps14-clk
+ - samsung,s5m8767-clk
+
+ "#clock-cells":
+ const: 1
+
+ clock-output-names:
+ maxItems: 3
+ description: Names for AP, CP and BT clocks.
+
+required:
+ - compatible
+ - "#clock-cells"
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt
deleted file mode 100644
index 2632d3f13004..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* Samsung S3C2410 Clock Controller
-
-The S3C2410 clock controller generates and supplies clock to various controllers
-within the SoC. The clock binding described here is applicable to the s3c2410,
-s3c2440 and s3c2442 SoCs in the s3c24x family.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,s3c2410-clock" - controller compatible with S3C2410 SoC.
- - "samsung,s3c2440-clock" - controller compatible with S3C2440 SoC.
- - "samsung,s3c2442-clock" - controller compatible with S3C2442 SoC.
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Some of the clocks are available only
-on a particular SoC.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s3c2410.h header and can be used in device
-tree sources.
-
-External clocks:
-
-The xti clock used as input for the plls is generated outside the SoC. It is
-expected that is are defined using standard clock bindings with a
-clock-output-names value of "xti".
-
-Example: Clock controller node:
-
- clocks: clock-controller@4c000000 {
- compatible = "samsung,s3c2410-clock";
- reg = <0x4c000000 0x20>;
- #clock-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller (refer to the standard clock bindings for information about
- "clocks" and "clock-names" properties):
-
- serial@50004000 {
- compatible = "samsung,s3c2440-uart";
- reg = <0x50004000 0x4000>;
- interrupts = <1 23 3 4>, <1 23 4 4>;
- clock-names = "uart", "clk_uart_baud2";
- clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt
deleted file mode 100644
index 21a8c23e658f..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-* Samsung S3C2412 Clock Controller
-
-The S3C2412 clock controller generates and supplies clock to various controllers
-within the SoC. The clock binding described here is applicable to the s3c2412
-and s3c2413 SoCs in the s3c24x family.
-
-Required Properties:
-
-- compatible: should be "samsung,s3c2412-clock"
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Some of the clocks are available only
-on a particular SoC.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s3c2412.h header and can be used in device
-tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xti" - crystal input - required,
- - "ext" - external clock source - optional,
-
-Example: Clock controller node:
-
- clocks: clock-controller@4c000000 {
- compatible = "samsung,s3c2412-clock";
- reg = <0x4c000000 0x20>;
- #clock-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller (refer to the standard clock bindings for information about
- "clocks" and "clock-names" properties):
-
- serial@50004000 {
- compatible = "samsung,s3c2412-uart";
- reg = <0x50004000 0x4000>;
- interrupts = <1 23 3 4>, <1 23 4 4>;
- clock-names = "uart", "clk_uart_baud2", "clk_uart_baud3";
- clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
- <&clocks SCLK_UART>;
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt
deleted file mode 100644
index 985c0f574e9a..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-* Samsung S3C2443 Clock Controller
-
-The S3C2443 clock controller generates and supplies clock to various controllers
-within the SoC. The clock binding described here is applicable to all SoCs in
-the s3c24x family starting with the s3c2443.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,s3c2416-clock" - controller compatible with S3C2416 SoC.
- - "samsung,s3c2443-clock" - controller compatible with S3C2443 SoC.
- - "samsung,s3c2450-clock" - controller compatible with S3C2450 SoC.
-- reg: physical base address of the controller and length of memory mapped
- region.
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Some of the clocks are available only
-on a particular SoC.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s3c2443.h header and can be used in device
-tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xti" - crystal input - required,
- - "ext" - external clock source - optional,
- - "ext_i2s" - external I2S clock - optional,
- - "ext_uart" - external uart clock - optional,
-
-Example: Clock controller node:
-
- clocks: clock-controller@4c000000 {
- compatible = "samsung,s3c2416-clock";
- reg = <0x4c000000 0x40>;
- #clock-cells = <1>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller (refer to the standard clock bindings for information about
- "clocks" and "clock-names" properties):
-
- serial@50004000 {
- compatible = "samsung,s3c2440-uart";
- reg = <0x50004000 0x4000>;
- interrupts = <1 23 3 4>, <1 23 4 4>;
- clock-names = "uart", "clk_uart_baud2",
- "clk_uart_baud3";
- clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
- <&clocks SCLK_UART>;
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c6400-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s3c6400-clock.yaml
new file mode 100644
index 000000000000..0fcc0c963f8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c6400-clock.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,s3c6400-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C6400 SoC clock controller
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ There are several clocks that are generated outside the SoC. It is expected
+ that they are defined using standard clock bindings with following
+ clock-output-names and/or provided as clock inputs to this clock controller:
+ - "fin_pll" - PLL input clock (xtal/extclk) - required,
+ - "xusbxti" - USB xtal - required,
+ - "iiscdclk0" - I2S0 codec clock - optional,
+ - "iiscdclk1" - I2S1 codec clock - optional,
+ - "iiscdclk2" - I2S2 codec clock - optional,
+ - "pcmcdclk0" - PCM0 codec clock - optional,
+ - "pcmcdclk1" - PCM1 codec clock - optional, only S3C6410.
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/samsung,s3c64xx-clock.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,s3c6400-clock
+ - samsung,s3c6410-clock
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@7e00f000 {
+ compatible = "samsung,s3c6410-clock";
+ reg = <0x7e00f000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&fin_pll>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
deleted file mode 100644
index 872ee8e0f041..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-* Samsung S3C64xx Clock Controller
-
-The S3C64xx clock controller generates and supplies clock to various controllers
-within the SoC. The clock binding described here is applicable to all SoCs in
-the S3C64xx family.
-
-Required Properties:
-
-- compatible: should be one of the following.
- - "samsung,s3c6400-clock" - controller compatible with S3C6400 SoC.
- - "samsung,s3c6410-clock" - controller compatible with S3C6410 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Some of the clocks are available only
-on a particular S3C64xx SoC and this is specified where applicable.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/samsung,s3c64xx-clock.h header and can be used in device
-tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "fin_pll" - PLL input clock (xtal/extclk) - required,
- - "xusbxti" - USB xtal - required,
- - "iiscdclk0" - I2S0 codec clock - optional,
- - "iiscdclk1" - I2S1 codec clock - optional,
- - "iiscdclk2" - I2S2 codec clock - optional,
- - "pcmcdclk0" - PCM0 codec clock - optional,
- - "pcmcdclk1" - PCM1 codec clock - optional, only S3C6410.
-
-Example: Clock controller node:
-
- clock: clock-controller@7e00f000 {
- compatible = "samsung,s3c6410-clock";
- reg = <0x7e00f000 0x1000>;
- #clock-cells = <1>;
- };
-
-Example: Required external clocks:
-
- fin_pll: clock-fin-pll {
- compatible = "fixed-clock";
- clock-output-names = "fin_pll";
- clock-frequency = <12000000>;
- #clock-cells = <0>;
- };
-
- xusbxti: clock-xusbxti {
- compatible = "fixed-clock";
- clock-output-names = "xusbxti";
- clock-frequency = <48000000>;
- #clock-cells = <0>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller (refer to the standard clock bindings for information about
- "clocks" and "clock-names" properties):
-
- uart0: serial@7f005000 {
- compatible = "samsung,s3c6400-uart";
- reg = <0x7f005000 0x100>;
- interrupt-parent = <&vic1>;
- interrupts = <5>;
- clock-names = "uart", "clk_uart_baud2",
- "clk_uart_baud3";
- clocks = <&clock PCLK_UART0>, <&clocks PCLK_UART0>,
- <&clock SCLK_UART>;
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml
new file mode 100644
index 000000000000..2659854ea1c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,s5pv210-audss-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S5Pv210 SoC Audio SubSystem clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/s5pv210-audss.h header.
+
+properties:
+ compatible:
+ const: samsung,s5pv210-audss-clock
+
+ clocks:
+ minItems: 4
+ items:
+ - description:
+ AHB bus clock of the Audio Subsystem.
+ - description:
+ Optional fixed rate PLL reference clock, parent of mout_audss. If not
+ specified (i.e. xusbxti is used for PLL reference), it is fixed to a
+ clock named "xxti".
+ - description:
+ Input PLL to the AudioSS block, parent of mout_audss.
+ - description:
+ Audio bus clock, parent of mout_i2s.
+ - description:
+ Optional external i2s clock, parent of mout_i2s. If not specified, it
+ is fixed to a clock named "iiscdclk0".
+
+ clock-names:
+ minItems: 4
+ items:
+ - const: hclk
+ - const: xxti
+ - const: fout_epll
+ - const: sclk_audio0
+ - const: iiscdclk0
+
+ "#clock-cells":
+ const: 1
+
+ power-domains:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/s5pv210.h>
+
+ clock-controller@c0900000 {
+ compatible = "samsung,s5pv210-audss-clock";
+ reg = <0xc0900000 0x1000>;
+ #clock-cells = <1>;
+ clock-names = "hclk", "xxti", "fout_epll", "sclk_audio0";
+ clocks = <&clocks DOUT_HCLKP>, <&xxti>, <&clocks FOUT_EPLL>,
+ <&clocks SCLK_AUDIO0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt
deleted file mode 100644
index 15b48e20a061..000000000000
--- a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-* Samsung S5P6442/S5PC110/S5PV210 Clock Controller
-
-Samsung S5P6442, S5PC110 and S5PV210 SoCs contain integrated clock
-controller, which generates and supplies clock to various controllers
-within the SoC.
-
-Required Properties:
-
-- compatible: should be one of following:
- - "samsung,s5pv210-clock" : for clock controller of Samsung
- S5PC110/S5PV210 SoCs,
- - "samsung,s5p6442-clock" : for clock controller of Samsung
- S5P6442 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- #clock-cells: should be 1.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s5pv210.h header and can be used in device tree sources.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xxti": external crystal oscillator connected to XXTI and XXTO pins of
-the SoC,
- - "xusbxti": external crystal oscillator connected to XUSBXTI and XUSBXTO
-pins of the SoC,
-
-A subset of above clocks available on given board shall be specified in
-board device tree, including the system base clock, as selected by XOM[0]
-pin of the SoC. Refer to generic fixed rate clock bindings
-documentation[1] for more information how to specify these clocks.
-
-[1] Documentation/devicetree/bindings/clock/fixed-clock.txt
-
-Example: Clock controller node:
-
- clock: clock-controller@7e00f000 {
- compatible = "samsung,s5pv210-clock";
- reg = <0x7e00f000 0x1000>;
- #clock-cells = <1>;
- };
-
-Example: Required external clocks:
-
- xxti: clock-xxti {
- compatible = "fixed-clock";
- clock-output-names = "xxti";
- clock-frequency = <24000000>;
- #clock-cells = <0>;
- };
-
- xusbxti: clock-xusbxti {
- compatible = "fixed-clock";
- clock-output-names = "xusbxti";
- clock-frequency = <24000000>;
- #clock-cells = <0>;
- };
-
-Example: UART controller node that consumes the clock generated by the clock
- controller (refer to the standard clock bindings for information about
- "clocks" and "clock-names" properties):
-
- uart0: serial@e2900000 {
- compatible = "samsung,s5pv210-uart";
- reg = <0xe2900000 0x400>;
- interrupt-parent = <&vic1>;
- interrupts = <10>;
- clock-names = "uart", "clk_uart_baud0",
- "clk_uart_baud1";
- clocks = <&clocks UART0>, <&clocks UART0>,
- <&clocks SCLK_UART0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml
new file mode 100644
index 000000000000..67a33665cf00
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,s5pv210-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S5P6442/S5PC110/S5PV210 SoC clock controller
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+ - Sylwester Nawrocki <s.nawrocki@samsung.com>
+ - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+ Expected external clocks, defined in DTS as fixed-rate clocks with a matching
+ name::
+ - "xxti" - external crystal oscillator connected to XXTI and XXTO pins of
+ the SoC,
+ - "xusbxti" - external crystal oscillator connected to XUSBXTI and XUSBXTO
+ pins of the SoC,
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/s5pv210.h header.
+
+properties:
+ compatible:
+ enum:
+ - samsung,s5pv210-clock
+ - samsung,s5p6442-clock
+
+ clocks:
+ items:
+ - description: xxti clock
+ - description: xusbxti clock
+
+ clock-names:
+ items:
+ - const: xxti
+ - const: xusbxti
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/s5pv210.h>
+
+ xxti: clock-0 {
+ compatible = "fixed-clock";
+ clock-frequency = <0>;
+ clock-output-names = "xxti";
+ #clock-cells = <0>;
+ };
+
+ xusbxti: clock-1 {
+ compatible = "fixed-clock";
+ clock-frequency = <0>;
+ clock-output-names = "xusbxti";
+ #clock-cells = <0>;
+ };
+
+ clock-controller@e0100000 {
+ compatible = "samsung,s5pv210-clock";
+ reg = <0xe0100000 0x10000>;
+ clock-names = "xxti", "xusbxti";
+ clocks = <&xxti>, <&xusbxti>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sifive/fu540-prci.yaml b/Documentation/devicetree/bindings/clock/sifive/fu540-prci.yaml
new file mode 100644
index 000000000000..c79e752283aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sifive/fu540-prci.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sifive/fu540-prci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive FU540 Power Reset Clock Interrupt Controller (PRCI)
+
+maintainers:
+ - Paul Walmsley <paul.walmsley@sifive.com>
+
+description:
+ On the FU540 family of SoCs, most system-wide clock and reset integration
+ is via the PRCI IP block.
+ The clock consumer should specify the desired clock via the clock ID
+ macros defined in include/dt-bindings/clock/sifive-fu540-prci.h.
+ These macros begin with PRCI_CLK_.
+
+ The hfclk and rtcclk nodes are required, and represent physical
+ crystals or resonators located on the PCB. These nodes should be present
+ underneath /, rather than /soc.
+
+properties:
+ compatible:
+ const: sifive,fu540-c000-prci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: high frequency clock.
+ - description: RTL clock.
+
+ clock-names:
+ items:
+ - const: hfclk
+ - const: rtcclk
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ prci: clock-controller@10000000 {
+ compatible = "sifive,fu540-c000-prci";
+ reg = <0x10000000 0x1000>;
+ clocks = <&hfclk>, <&rtcclk>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
new file mode 100644
index 000000000000..252085a0cf65
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sifive/fu740-prci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive FU740 Power Reset Clock Interrupt Controller (PRCI)
+
+maintainers:
+ - Zong Li <zong.li@sifive.com>
+ - Paul Walmsley <paul.walmsley@sifive.com>
+
+description:
+ On the FU740 family of SoCs, most system-wide clock and reset integration
+ is via the PRCI IP block.
+ The clock consumer should specify the desired clock via the clock ID
+ macros defined in include/dt-bindings/clock/sifive-fu740-prci.h.
+ These macros begin with PRCI_CLK_.
+
+ The hfclk and rtcclk nodes are required, and represent physical
+ crystals or resonators located on the PCB. These nodes should be present
+ underneath /, rather than /soc.
+
+properties:
+ compatible:
+ const: sifive,fu740-c000-prci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: high frequency clock.
+ - description: RTL clock.
+
+ clock-names:
+ items:
+ - const: hfclk
+ - const: rtcclk
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ prci: clock-controller@10000000 {
+ compatible = "sifive,fu740-c000-prci";
+ reg = <0x10000000 0x1000>;
+ clocks = <&hfclk>, <&rtcclk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si514.txt b/Documentation/devicetree/bindings/clock/silabs,si514.txt
deleted file mode 100644
index ea1a9dbc63b6..000000000000
--- a/Documentation/devicetree/bindings/clock/silabs,si514.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Binding for Silicon Labs 514 programmable I2C clock generator.
-
-Reference
-This binding uses the common clock binding[1]. Details about the device can be
-found in the datasheet[2].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Si514 datasheet
- http://www.silabs.com/Support%20Documents/TechnicalDocs/si514.pdf
-
-Required properties:
- - compatible: Shall be "silabs,si514"
- - reg: I2C device address.
- - #clock-cells: From common clock bindings: Shall be 0.
-
-Optional properties:
- - clock-output-names: From common clock bindings. Recommended to be "si514".
-
-Example:
- si514: clock-generator@55 {
- reg = <0x55>;
- #clock-cells = <0>;
- compatible = "silabs,si514";
- };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5341.yaml b/Documentation/devicetree/bindings/clock/silabs,si5341.yaml
new file mode 100644
index 000000000000..d6416bded3d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si5341.yaml
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/silabs,si5341.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Labs Si5340/1/2/4/5 programmable i2c clock generator
+
+maintainers:
+ - Mike Looijmans <mike.looijmans@topic.nl>
+
+description: >
+ Silicon Labs Si5340, Si5341 Si5342, Si5344 and Si5345 programmable i2c clock
+ generator.
+
+ Reference
+ [1] Si5341 Data Sheet
+ https://www.silabs.com/documents/public/data-sheets/Si5341-40-D-DataSheet.pdf
+ [2] Si5341 Reference Manual
+ https://www.silabs.com/documents/public/reference-manuals/Si5341-40-D-RM.pdf
+ [3] Si5345 Reference Manual
+ https://www.silabs.com/documents/public/reference-manuals/Si5345-44-42-D-RM.pdf
+
+ The Si5341 and Si5340 are programmable i2c clock generators with up to 10 output
+ clocks. The chip contains a PLL that sources 5 (or 4) multisynth clocks, which
+ in turn can be directed to any of the 10 (or 4) outputs through a divider.
+ The internal structure of the clock generators can be found in [2].
+ The Si5345 is similar to the Si5341 with the addition of fractional input
+ dividers and automatic input selection, as described in [3].
+ The Si5342 and Si5344 are smaller versions of the Si5345, with 2 or 4 outputs.
+
+ The driver can be used in "as is" mode, reading the current settings from the
+ chip at boot, in case you have a (pre-)programmed device. If the PLL is not
+ configured when the driver probes, it assumes the driver must fully initialize
+ it.
+
+ The device type, speed grade and revision are determined runtime by probing.
+
+properties:
+ compatible:
+ enum:
+ - silabs,si5340
+ - silabs,si5341
+ - silabs,si5342
+ - silabs,si5344
+ - silabs,si5345
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 2
+ description: >
+ The first value is "0" for outputs, "1" for synthesizers.
+
+ The second value is the output or synthesizer index.
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: xtal
+ - const: in0
+ - const: in1
+ - const: in2
+
+ clock-output-names: true
+
+ interrupts:
+ maxItems: 1
+ description: Interrupt for INTRb pin
+
+ vdd-supply:
+ description: Regulator node for VDD
+
+ vdda-supply:
+ description: Regulator node for VDDA
+
+ vdds-supply:
+ description: Regulator node for VDDS
+
+ silabs,pll-m-num:
+ description:
+ Numerator for PLL feedback divider. Must be such that the PLL output is in
+ the valid range. For example, to create 14GHz from a 48MHz xtal, use
+ m-num=14000 and m-den=48. Only the fraction matters, using 3500 and 12
+ will deliver the exact same result. If these are not specified, and the
+ PLL is not yet programmed when the driver probes, the PLL will be set to
+ 14GHz.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ silabs,pll-m-den:
+ description: Denominator for PLL feedback divider
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ silabs,reprogram:
+ description: Always perform soft-reset and reinitialize PLL
+ type: boolean
+
+ silabs,xaxb-ext-clk:
+ description: Use XA/XB pins as external reference clock
+ type: boolean
+
+ silabs,iovdd-33:
+ description: I2C lines use 3.3V thresholds
+ type: boolean
+
+patternProperties:
+ "^vddo[0-9]-supply$": true
+
+ "^out@[0-9]$":
+ description: >
+ Output-specific override nodes
+
+ Each of the clock outputs can be overwritten individually by using a child
+ node. If a child node for a clock output is not set, the configuration
+ remains unchanged.
+ type: object
+ additionalProperties: false
+
+ properties:
+ reg:
+ description: Number of clock output
+ maximum: 9
+
+ always-on:
+ description: Set to keep the clock output always running
+ type: boolean
+
+ silabs,format:
+ description: Output format
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4]
+
+ silabs,common-mode:
+ description: Override output common mode
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ silabs,amplitude:
+ description: Override output amplitude
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ silabs,synth-master:
+ description: Allow dynamic multisynth rate control
+ type: boolean
+
+ silabs,disable-high:
+ description: Drive output HIGH when disabled
+ type: boolean
+
+ required:
+ - reg
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#address-cells"
+ - "#size-cells"
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-generator@74 {
+ reg = <0x74>;
+ compatible = "silabs,si5341";
+ #clock-cells = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&ref48>;
+ clock-names = "xtal";
+
+ silabs,pll-m-num = <14000>; /* PLL at 14.0 GHz */
+ silabs,pll-m-den = <48>;
+ silabs,reprogram; /* Chips are not programmed, always reset */
+
+ out@0 {
+ reg = <0>;
+ silabs,format = <1>; /* LVDS 3v3 */
+ silabs,common-mode = <3>;
+ silabs,amplitude = <3>;
+ silabs,synth-master;
+ };
+
+ /*
+ * Output 6 configuration:
+ * LVDS 1v8
+ */
+ out@6 {
+ reg = <6>;
+ silabs,format = <1>; /* LVDS 1v8 */
+ silabs,common-mode = <13>;
+ silabs,amplitude = <3>;
+ };
+
+ /*
+ * Output 8 configuration:
+ * HCSL 3v3
+ */
+ out@8 {
+ reg = <8>;
+ silabs,format = <2>;
+ silabs,common-mode = <11>;
+ silabs,amplitude = <3>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
deleted file mode 100644
index f00191cad8cd..000000000000
--- a/Documentation/devicetree/bindings/clock/silabs,si5351.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-Binding for Silicon Labs Si5351a/b/c programmable i2c clock generator.
-
-Reference
-[1] Si5351A/B/C Data Sheet
- http://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
-
-The Si5351a/b/c are programmable i2c clock generators with up to 8 output
-clocks. Si5351a also has a reduced pin-count package (MSOP10) where only
-3 output clocks are accessible. The internal structure of the clock
-generators can be found in [1].
-
-==I2C device node==
-
-Required properties:
-- compatible: shall be one of the following:
- "silabs,si5351a" - Si5351a, QFN20 package
- "silabs,si5351a-msop" - Si5351a, MSOP10 package
- "silabs,si5351b" - Si5351b, QFN20 package
- "silabs,si5351c" - Si5351c, QFN20 package
-- reg: i2c device address, shall be 0x60 or 0x61.
-- #clock-cells: from common clock binding; shall be set to 1.
-- clocks: from common clock binding; list of parent clock
- handles, shall be xtal reference clock or xtal and clkin for
- si5351c only. Corresponding clock input names are "xtal" and
- "clkin" respectively.
-- #address-cells: shall be set to 1.
-- #size-cells: shall be set to 0.
-
-Optional properties:
-- silabs,pll-source: pair of (number, source) for each pll. Allows
- to overwrite clock source of pll A (number=0) or B (number=1).
-
-==Child nodes==
-
-Each of the clock outputs can be overwritten individually by
-using a child node to the I2C device node. If a child node for a clock
-output is not set, the eeprom configuration is not overwritten.
-
-Required child node properties:
-- reg: number of clock output.
-
-Optional child node properties:
-- silabs,clock-source: source clock of the output divider stage N, shall be
- 0 = multisynth N
- 1 = multisynth 0 for output clocks 0-3, else multisynth4
- 2 = xtal
- 3 = clkin (si5351c only)
-- silabs,drive-strength: output drive strength in mA, shall be one of {2,4,6,8}.
-- silabs,multisynth-source: source pll A(0) or B(1) of corresponding multisynth
- divider.
-- silabs,pll-master: boolean, multisynth can change pll frequency.
-- silabs,pll-reset: boolean, clock output can reset its pll.
-- silabs,disable-state : clock output disable state, shall be
- 0 = clock output is driven LOW when disabled
- 1 = clock output is driven HIGH when disabled
- 2 = clock output is FLOATING (HIGH-Z) when disabled
- 3 = clock output is NEVER disabled
-
-==Example==
-
-/* 25MHz reference crystal */
-ref25: ref25M {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <25000000>;
-};
-
-i2c-master-node {
-
- /* Si5351a msop10 i2c clock generator */
- si5351a: clock-generator@60 {
- compatible = "silabs,si5351a-msop";
- reg = <0x60>;
- #address-cells = <1>;
- #size-cells = <0>;
- #clock-cells = <1>;
-
- /* connect xtal input to 25MHz reference */
- clocks = <&ref25>;
- clock-names = "xtal";
-
- /* connect xtal input as source of pll0 and pll1 */
- silabs,pll-source = <0 0>, <1 0>;
-
- /*
- * overwrite clkout0 configuration with:
- * - 8mA output drive strength
- * - pll0 as clock source of multisynth0
- * - multisynth0 as clock source of output divider
- * - multisynth0 can change pll0
- * - set initial clock frequency of 74.25MHz
- */
- clkout0 {
- reg = <0>;
- silabs,drive-strength = <8>;
- silabs,multisynth-source = <0>;
- silabs,clock-source = <0>;
- silabs,pll-master;
- clock-frequency = <74250000>;
- };
-
- /*
- * overwrite clkout1 configuration with:
- * - 4mA output drive strength
- * - pll1 as clock source of multisynth1
- * - multisynth1 as clock source of output divider
- * - multisynth1 can change pll1
- */
- clkout1 {
- reg = <1>;
- silabs,drive-strength = <4>;
- silabs,multisynth-source = <1>;
- silabs,clock-source = <0>;
- pll-master;
- };
-
- /*
- * overwrite clkout2 configuration with:
- * - xtal as clock source of output divider
- */
- clkout2 {
- reg = <2>;
- silabs,clock-source = <2>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.yaml b/Documentation/devicetree/bindings/clock/silabs,si5351.yaml
new file mode 100644
index 000000000000..d3e0ec29993b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si5351.yaml
@@ -0,0 +1,265 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/silabs,si5351.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Labs Si5351A/B/C programmable I2C clock generators
+
+description: |
+ The Silicon Labs Si5351A/B/C are programmable I2C clock generators with up to
+ 8 outputs. Si5351A also has a reduced pin-count package (10-MSOP) where only 3
+ output clocks are accessible. The internal structure of the clock generators
+ can be found in [1].
+
+ [1] Si5351A/B/C Data Sheet
+ https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/data-sheets/Si5351-B.pdf
+
+maintainers:
+ - Alvin Šipraga <alsi@bang-olufsen.dk>
+
+properties:
+ compatible:
+ enum:
+ - silabs,si5351a # Si5351A, 20-QFN package
+ - silabs,si5351a-msop # Si5351A, 10-MSOP package
+ - silabs,si5351b # Si5351B, 20-QFN package
+ - silabs,si5351c # Si5351C, 20-QFN package
+
+ reg:
+ enum:
+ - 0x60
+ - 0x61
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: xtal
+ - const: clkin
+
+ silabs,pll-source:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ description: |
+ A list of cell pairs containing a PLL index and its source. Allows to
+ overwrite clock source of the internal PLLs.
+ items:
+ items:
+ - description: PLL A (0) or PLL B (1)
+ enum: [ 0, 1 ]
+ - description: PLL source, XTAL (0) or CLKIN (1, Si5351C only).
+ enum: [ 0, 1 ]
+
+ silabs,pll-reset-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ minItems: 1
+ maxItems: 2
+ description: A list of cell pairs containing a PLL index and its reset mode.
+ items:
+ items:
+ - description: PLL A (0) or PLL B (1)
+ enum: [ 0, 1 ]
+ - description: |
+ Reset mode for the PLL. Mode can be one of:
+
+ 0 - reset whenever PLL rate is adjusted (default mode)
+ 1 - do not reset when PLL rate is adjusted
+
+ In mode 1, the PLL is only reset if the silabs,pll-reset is
+ specified in one of the clock output child nodes that also sources
+ the PLL. This mode may be preferable if output clocks are expected
+ to be adjusted without glitches.
+ enum: [ 0, 1 ]
+
+patternProperties:
+ "^clkout@[0-7]$":
+ type: object
+
+ additionalProperties: false
+
+ properties:
+ reg:
+ description: Clock output number.
+
+ clock-frequency: true
+
+ silabs,clock-source:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Source clock of the this output's divider stage.
+
+ 0 - use multisynth N for this output, where N is the output number
+ 1 - use either multisynth 0 (if output number is 0-3) or multisynth 4
+ (otherwise) for this output
+ 2 - use XTAL for this output
+ 3 - use CLKIN for this output (Si5351C only)
+
+ silabs,drive-strength:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 2, 4, 6, 8 ]
+ description: Output drive strength in mA.
+
+ silabs,multisynth-source:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 0, 1 ]
+ description:
+ Source PLL A (0) or B (1) for the corresponding multisynth divider.
+
+ silabs,pll-master:
+ type: boolean
+ description: |
+ The frequency of the source PLL is allowed to be changed by the
+ multisynth when setting the rate of this clock output.
+
+ silabs,pll-reset:
+ type: boolean
+ description: Reset the source PLL when enabling this clock output.
+
+ silabs,disable-state:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 0, 1, 2, 3 ]
+ description: |
+ Clock output disable state. The state can be one of:
+
+ 0 - clock output is driven LOW when disabled
+ 1 - clock output is driven HIGH when disabled
+ 2 - clock output is FLOATING (HIGH-Z) when disabled
+ 3 - clock output is never disabled
+
+ allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: silabs,si5351a-msop
+ then:
+ properties:
+ reg:
+ maximum: 2
+ else:
+ properties:
+ reg:
+ maximum: 7
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: silabs,si5351c
+ then:
+ properties:
+ silabs,clock-source:
+ enum: [ 0, 1, 2, 3 ]
+ else:
+ properties:
+ silabs,clock-source:
+ enum: [ 0, 1, 2 ]
+
+ required:
+ - reg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - silabs,si5351a
+ - silabs,si5351a-msop
+ - silabs,si5351b
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ maxItems: 1
+
+required:
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-generator@60 {
+ compatible = "silabs,si5351a-msop";
+ reg = <0x60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #clock-cells = <1>;
+
+ /* Connect XTAL input to 25MHz reference */
+ clocks = <&ref25>;
+ clock-names = "xtal";
+
+ /* Use XTAL input as source of PLL0 and PLL1 */
+ silabs,pll-source = <0 0>, <1 0>;
+
+ /* Don't reset PLL1 on rate adjustment */
+ silabs,pll-reset-mode = <1 1>;
+
+ /*
+ * Overwrite CLK0 configuration with:
+ * - 8 mA output drive strength
+ * - PLL0 as clock source of multisynth 0
+ * - Multisynth 0 as clock source of output divider
+ * - Multisynth 0 can change PLL0
+ * - Set initial clock frequency of 74.25MHz
+ */
+ clkout@0 {
+ reg = <0>;
+ silabs,drive-strength = <8>;
+ silabs,multisynth-source = <0>;
+ silabs,clock-source = <0>;
+ silabs,pll-master;
+ clock-frequency = <74250000>;
+ };
+
+ /*
+ * Overwrite CLK1 configuration with:
+ * - 4 mA output drive strength
+ * - PLL1 as clock source of multisynth 1
+ * - Multisynth 1 as clock source of output divider
+ * - Multisynth 1 can change PLL1
+ * - Reset PLL1 when enabling this clock output
+ */
+ clkout@1 {
+ reg = <1>;
+ silabs,drive-strength = <4>;
+ silabs,multisynth-source = <1>;
+ silabs,clock-source = <0>;
+ silabs,pll-master;
+ silabs,pll-reset;
+ };
+
+ /*
+ * Overwrite CLK2 configuration with:
+ * - XTAL as clock source of output divider
+ */
+ clkout@2 {
+ reg = <2>;
+ silabs,clock-source = <2>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si544.txt b/Documentation/devicetree/bindings/clock/silabs,si544.txt
deleted file mode 100644
index b86535b80920..000000000000
--- a/Documentation/devicetree/bindings/clock/silabs,si544.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Binding for Silicon Labs 544 programmable I2C clock generator.
-
-Reference
-This binding uses the common clock binding[1]. Details about the device can be
-found in the datasheet[2].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Si544 datasheet
- https://www.silabs.com/documents/public/data-sheets/si544-datasheet.pdf
-
-Required properties:
- - compatible: One of "silabs,si514a", "silabs,si514b" "silabs,si514c" according
- to the speed grade of the chip.
- - reg: I2C device address.
- - #clock-cells: From common clock bindings: Shall be 0.
-
-Optional properties:
- - clock-output-names: From common clock bindings. Recommended to be "si544".
-
-Example:
- si544: clock-controller@55 {
- reg = <0x55>;
- #clock-cells = <0>;
- compatible = "silabs,si544b";
- };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si544.yaml b/Documentation/devicetree/bindings/clock/silabs,si544.yaml
new file mode 100644
index 000000000000..f87e71867108
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si544.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/silabs,si544.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Labs SI514/SI544 clock generator
+
+maintainers:
+ - Mike Looijmans <mike.looijmans@topic.nl>
+
+description: >
+ Silicon Labs 514/544 programmable I2C clock generator. Details about the device
+ can be found in the datasheet:
+
+ https://www.silabs.com/Support%20Documents/TechnicalDocs/si514.pdf
+ https://www.silabs.com/documents/public/data-sheets/si544-datasheet.pdf
+
+properties:
+ compatible:
+ enum:
+ - silabs,si514
+ - silabs,si544a
+ - silabs,si544b
+ - silabs,si544c
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 0
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@55 {
+ reg = <0x55>;
+ #clock-cells = <0>;
+ compatible = "silabs,si544b";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si570.txt b/Documentation/devicetree/bindings/clock/silabs,si570.txt
deleted file mode 100644
index c09f21e1d98f..000000000000
--- a/Documentation/devicetree/bindings/clock/silabs,si570.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Binding for Silicon Labs 570, 571, 598 and 599 programmable
-I2C clock generators.
-
-Reference
-This binding uses the common clock binding[1]. Details about the devices can be
-found in the data sheets[2][3].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Si570/571 Data Sheet
- http://www.silabs.com/Support%20Documents/TechnicalDocs/si570.pdf
-[3] Si598/599 Data Sheet
- http://www.silabs.com/Support%20Documents/TechnicalDocs/si598-99.pdf
-
-Required properties:
- - compatible: Shall be one of "silabs,si570", "silabs,si571",
- "silabs,si598", "silabs,si599"
- - reg: I2C device address.
- - #clock-cells: From common clock bindings: Shall be 0.
- - factory-fout: Factory set default frequency. This frequency is part specific.
- The correct frequency for the part used has to be provided in
- order to generate the correct output frequencies. For more
- details, please refer to the data sheet.
- - temperature-stability: Temperature stability of the device in PPM. Should be
- one of: 7, 20, 50 or 100.
-
-Optional properties:
- - clock-output-names: From common clock bindings. Recommended to be "si570".
- - clock-frequency: Output frequency to generate. This defines the output
- frequency set during boot. It can be reprogrammed during
- runtime through the common clock framework.
-
-Example:
- si570: clock-generator@5d {
- #clock-cells = <0>;
- compatible = "silabs,si570";
- temperature-stability = <50>;
- reg = <0x5d>;
- factory-fout = <156250000>;
- };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si570.yaml b/Documentation/devicetree/bindings/clock/silabs,si570.yaml
new file mode 100644
index 000000000000..90e2f79e2b2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si570.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/silabs,si570.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Labs Si570/Si571/Si598/Si599 programmable I2C clock generator
+
+maintainers:
+ - Soren Brinkmann <soren.brinkmann@xilinx.com>
+
+description: >
+ Silicon Labs 570, 571, 598 and 599 programmable I2C clock generators. Details
+ about the devices can be found in the data sheets[1][2].
+
+ [1] Si570/571 Data Sheet
+ https://www.silabs.com/Support%20Documents/TechnicalDocs/si570.pdf
+ [2] Si598/599 Data Sheet
+ https://www.silabs.com/Support%20Documents/TechnicalDocs/si598-99.pdf
+
+properties:
+ compatible:
+ enum:
+ - silabs,si570
+ - silabs,si571
+ - silabs,si598
+ - silabs,si599
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ factory-fout:
+ description: Factory-set default frequency in Hz.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ temperature-stability:
+ description: Temperature stability of the device in PPM.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 7
+ - 20
+ - 50
+ - 100
+
+ clock-output-names:
+ maxItems: 1
+
+ clock-frequency:
+ description: Output frequency to generate at boot; can be reprogrammed at runtime.
+
+ silabs,skip-recall:
+ description: Skip the NVM-to-RAM recall operation during boot.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - factory-fout
+ - temperature-stability
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-generator@5d {
+ compatible = "silabs,si570";
+ reg = <0x5d>;
+ #clock-cells = <0>;
+ temperature-stability = <50>;
+ factory-fout = <156250000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/skyworks,si521xx.yaml b/Documentation/devicetree/bindings/clock/skyworks,si521xx.yaml
new file mode 100644
index 000000000000..9e35e0e51ce8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/skyworks,si521xx.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/skyworks,si521xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Skyworks Si521xx I2C PCIe clock generators
+
+description: |
+ The Skyworks Si521xx are I2C PCIe clock generators providing
+ from 4 to 9 output clocks.
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+properties:
+ compatible:
+ enum:
+ - skyworks,si52144
+ - skyworks,si52146
+ - skyworks,si52147
+
+ reg:
+ const: 0x6b
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: XTal input clock
+
+ skyworks,out-amplitude-microvolt:
+ enum: [ 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000 ]
+ description: Output clock signal amplitude
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-generator@6b {
+ compatible = "skyworks,si52144";
+ reg = <0x6b>;
+ #clock-cells = <1>;
+ clocks = <&ref25m>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
new file mode 100644
index 000000000000..4e82582fb2f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/socionext,uniphier-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UniPhier clock controller
+
+maintainers:
+ - Masahiro Yamada <yamada.masahiro@socionext.com>
+
+properties:
+ compatible:
+ oneOf:
+ - description: System clock
+ enum:
+ - socionext,uniphier-ld4-clock
+ - socionext,uniphier-pro4-clock
+ - socionext,uniphier-sld8-clock
+ - socionext,uniphier-pro5-clock
+ - socionext,uniphier-pxs2-clock
+ - socionext,uniphier-ld6b-clock
+ - socionext,uniphier-ld11-clock
+ - socionext,uniphier-ld20-clock
+ - socionext,uniphier-pxs3-clock
+ - socionext,uniphier-nx1-clock
+ - description: Media I/O (MIO) clock, SD clock
+ enum:
+ - socionext,uniphier-ld4-mio-clock
+ - socionext,uniphier-pro4-mio-clock
+ - socionext,uniphier-sld8-mio-clock
+ - socionext,uniphier-pro5-sd-clock
+ - socionext,uniphier-pxs2-sd-clock
+ - socionext,uniphier-ld11-mio-clock
+ - socionext,uniphier-ld20-sd-clock
+ - socionext,uniphier-pxs3-sd-clock
+ - socionext,uniphier-nx1-sd-clock
+ - description: Peripheral clock
+ enum:
+ - socionext,uniphier-ld4-peri-clock
+ - socionext,uniphier-pro4-peri-clock
+ - socionext,uniphier-sld8-peri-clock
+ - socionext,uniphier-pro5-peri-clock
+ - socionext,uniphier-pxs2-peri-clock
+ - socionext,uniphier-ld11-peri-clock
+ - socionext,uniphier-ld20-peri-clock
+ - socionext,uniphier-pxs3-peri-clock
+ - socionext,uniphier-nx1-peri-clock
+ - description: SoC-glue clock
+ enum:
+ - socionext,uniphier-pro4-sg-clock
+
+ "#clock-cells":
+ const: 1
+
+additionalProperties: false
+
+required:
+ - compatible
+ - "#clock-cells"
+
+examples:
+ - |
+ clock-controller {
+ compatible = "socionext,uniphier-ld11-clock";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sophgo,cv1800-clk.yaml b/Documentation/devicetree/bindings/clock/sophgo,cv1800-clk.yaml
new file mode 100644
index 000000000000..379ce3e9e391
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sophgo,cv1800-clk.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sophgo,cv1800-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sophgo CV1800/SG2000 Series Clock Controller
+
+maintainers:
+ - Inochi Amaoto <inochiama@outlook.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - sophgo,cv1800b-clk
+ - sophgo,cv1812h-clk
+ - sophgo,sg2000-clk
+ - items:
+ - const: sophgo,sg2002-clk
+ - const: sophgo,sg2000-clk
+ - const: sophgo,cv1800-clk
+ deprecated: true
+ - const: sophgo,cv1810-clk
+ deprecated: true
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+ description:
+ See <dt-bindings/clock/sophgo,cv1800.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@3002000 {
+ compatible = "sophgo,cv1800-clk";
+ reg = <0x03002000 0x1000>;
+ clocks = <&osc>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml
new file mode 100644
index 000000000000..e7a9255bcb58
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sophgo,sg2042-clkgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sophgo SG2042 Clock Generator for divider/mux/gate
+
+maintainers:
+ - Chen Wang <unicorn_wang@outlook.com>
+
+properties:
+ compatible:
+ const: sophgo,sg2042-clkgen
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Main PLL
+ - description: Fixed PLL
+ - description: DDR PLL 0
+ - description: DDR PLL 1
+
+ clock-names:
+ items:
+ - const: mpll
+ - const: fpll
+ - const: dpll0
+ - const: dpll1
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/sophgo,sg2042-clkgen.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@30012000 {
+ compatible = "sophgo,sg2042-clkgen";
+ reg = <0x30012000 0x1000>;
+ clocks = <&pllclk 0>,
+ <&pllclk 1>,
+ <&pllclk 2>,
+ <&pllclk 3>;
+ clock-names = "mpll",
+ "fpll",
+ "dpll0",
+ "dpll1";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml
new file mode 100644
index 000000000000..1a417a627dd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sophgo,sg2042-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sophgo SG2042 PLL Clock Generator
+
+maintainers:
+ - Chen Wang <unicorn_wang@outlook.com>
+
+properties:
+ compatible:
+ const: sophgo,sg2042-pll
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Oscillator(Clock Generation IC) for Main/Fixed PLL (25 MHz)
+ - description: Oscillator(Clock Generation IC) for DDR PLL 0 (25 MHz)
+ - description: Oscillator(Clock Generation IC) for DDR PLL 1 (25 MHz)
+
+ clock-names:
+ items:
+ - const: cgi_main
+ - const: cgi_dpll0
+ - const: cgi_dpll1
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/sophgo,sg2042-pll.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@10000000 {
+ compatible = "sophgo,sg2042-pll";
+ reg = <0x10000000 0x10000>;
+ clocks = <&cgi_main>, <&cgi_dpll0>, <&cgi_dpll1>;
+ clock-names = "cgi_main", "cgi_dpll0", "cgi_dpll1";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml
new file mode 100644
index 000000000000..1491fb8ef6a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sophgo,sg2042-rpgate.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sophgo SG2042 Gate Clock Generator for RP(riscv processors) subsystem
+
+maintainers:
+ - Chen Wang <unicorn_wang@outlook.com>
+
+properties:
+ compatible:
+ const: sophgo,sg2042-rpgate
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Gate clock for RP subsystem
+
+ clock-names:
+ items:
+ - const: rpgate
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/sophgo,sg2042-rpgate.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@20000000 {
+ compatible = "sophgo,sg2042-rpgate";
+ reg = <0x20000000 0x10000>;
+ clocks = <&clkgen 85>;
+ clock-names = "rpgate";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2044-clk.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2044-clk.yaml
new file mode 100644
index 000000000000..272e58bdb62c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sophgo,sg2044-clk.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sophgo,sg2044-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sophgo SG2044 Clock Controller
+
+maintainers:
+ - Inochi Amaoto <inochiama@gmail.com>
+
+description: |
+ The Sophgo SG2044 clock controller requires an external oscillator
+ as input clock.
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/sophgo,sg2044-clk.h
+
+properties:
+ compatible:
+ const: sophgo,sg2044-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: fpll0
+ - description: fpll1
+ - description: fpll2
+ - description: dpll0
+ - description: dpll1
+ - description: dpll2
+ - description: dpll3
+ - description: dpll4
+ - description: dpll5
+ - description: dpll6
+ - description: dpll7
+ - description: mpll0
+ - description: mpll1
+ - description: mpll2
+ - description: mpll3
+ - description: mpll4
+ - description: mpll5
+
+ clock-names:
+ items:
+ - const: fpll0
+ - const: fpll1
+ - const: fpll2
+ - const: dpll0
+ - const: dpll1
+ - const: dpll2
+ - const: dpll3
+ - const: dpll4
+ - const: dpll5
+ - const: dpll6
+ - const: dpll7
+ - const: mpll0
+ - const: mpll1
+ - const: mpll2
+ - const: mpll3
+ - const: mpll4
+ - const: mpll5
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sophgo,sg2044-pll.h>
+
+ clock-controller@50002000 {
+ compatible = "sophgo,sg2044-clk";
+ reg = <0x50002000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&syscon CLK_FPLL0>, <&syscon CLK_FPLL1>,
+ <&syscon CLK_FPLL2>, <&syscon CLK_DPLL0>,
+ <&syscon CLK_DPLL1>, <&syscon CLK_DPLL2>,
+ <&syscon CLK_DPLL3>, <&syscon CLK_DPLL4>,
+ <&syscon CLK_DPLL5>, <&syscon CLK_DPLL6>,
+ <&syscon CLK_DPLL7>, <&syscon CLK_MPLL0>,
+ <&syscon CLK_MPLL1>, <&syscon CLK_MPLL2>,
+ <&syscon CLK_MPLL3>, <&syscon CLK_MPLL4>,
+ <&syscon CLK_MPLL5>;
+ clock-names = "fpll0", "fpll1", "fpll2", "dpll0",
+ "dpll1", "dpll2", "dpll3", "dpll4",
+ "dpll5", "dpll6", "dpll7", "mpll0",
+ "mpll1", "mpll2", "mpll3", "mpll4",
+ "mpll5";
+ };
diff --git a/Documentation/devicetree/bindings/clock/spacemit,k1-pll.yaml b/Documentation/devicetree/bindings/clock/spacemit,k1-pll.yaml
new file mode 100644
index 000000000000..06bafd68c00a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/spacemit,k1-pll.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/spacemit,k1-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SpacemiT K1 PLL
+
+maintainers:
+ - Haylen Chu <heylenay@4d2.org>
+
+properties:
+ compatible:
+ const: spacemit,k1-pll
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description: External 24MHz oscillator
+
+ spacemit,mpmu:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the "Main PMU (MPMU)" syscon. It is used to check PLL
+ lock status.
+
+ "#clock-cells":
+ const: 1
+ description:
+ See <dt-bindings/clock/spacemit,k1-syscon.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - spacemit,mpmu
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@d4090000 {
+ compatible = "spacemit,k1-pll";
+ reg = <0xd4090000 0x1000>;
+ clocks = <&vctcxo_24m>;
+ spacemit,mpmu = <&sysctl_mpmu>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml b/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml
new file mode 100644
index 000000000000..502cd723511f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sprd,sc9860-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Spreadtrum SC9860 clock
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - sprd,sc9860-agcp-gate
+ - sprd,sc9860-aonsecure-clk
+ - sprd,sc9860-aon-gate
+ - sprd,sc9860-aon-prediv
+ - sprd,sc9860-apahb-gate
+ - sprd,sc9860-apapb-gate
+ - sprd,sc9860-ap-clk
+ - sprd,sc9860-cam-clk
+ - sprd,sc9860-cam-gate
+ - sprd,sc9860-disp-clk
+ - sprd,sc9860-disp-gate
+ - sprd,sc9860-gpu-clk
+ - sprd,sc9860-pll
+ - sprd,sc9860-pmu-gate
+ - sprd,sc9860-vsp-clk
+ - sprd,sc9860-vsp-gate
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ '#clock-cells':
+ const: 1
+
+ sprd,syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the syscon which is in the same address area with the
+ clock, and so we can get regmap for the clocks from the syscon device
+
+required:
+ - compatible
+ - clocks
+ - '#clock-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sprd,sc9860-agcp-gate
+ - sprd,sc9860-aon-gate
+ - sprd,sc9860-apahb-gate
+ - sprd,sc9860-apapb-gate
+ - sprd,sc9860-cam-gate
+ - sprd,sc9860-disp-gate
+ - sprd,sc9860-gpu-clk
+ - sprd,sc9860-pll
+ - sprd,sc9860-pmu-gate
+ - sprd,sc9860-vsp-gate
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sprd,sc9860-aonsecure-clk
+ - sprd,sc9860-cam-clk
+ - sprd,sc9860-disp-clk
+ - sprd,sc9860-vsp-clk
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sprd,sc9860-aon-prediv
+ - sprd,sc9860-ap-clk
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sprd,sc9860-aonsecure-clk
+ - sprd,sc9860-aon-prediv
+ - sprd,sc9860-ap-clk
+ - sprd,sc9860-cam-clk
+ - sprd,sc9860-disp-clk
+ - sprd,sc9860-gpu-clk
+ - sprd,sc9860-vsp-clk
+ then:
+ required:
+ - reg
+ properties:
+ sprd,syscon: false
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sprd,sc9860-agcp-gate
+ - sprd,sc9860-aon-gate
+ - sprd,sc9860-apahb-gate
+ - sprd,sc9860-apapb-gate
+ - sprd,sc9860-cam-gate
+ - sprd,sc9860-disp-gate
+ - sprd,sc9860-pll
+ - sprd,sc9860-pmu-gate
+ - sprd,sc9860-vsp-gate
+ then:
+ required:
+ - sprd,syscon
+ properties:
+ reg: false
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pmu-gate {
+ compatible = "sprd,sc9860-pmu-gate";
+ clocks = <&ext_26m>;
+ #clock-cells = <1>;
+ sprd,syscon = <&pmu_regs>;
+ };
+
+ clock-controller@20000000 {
+ compatible = "sprd,sc9860-ap-clk";
+ reg = <0 0x20000000 0 0x400>;
+ clocks = <&ext_26m>, <&pll 0>, <&pmu_gate 0>;
+ #clock-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml b/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml
new file mode 100644
index 000000000000..a0658056c330
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sprd,sc9863a-clk.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2019 Unisoc Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sprd,sc9863a-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SC9863A Clock Control Unit
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - sprd,sc9863a-ap-clk
+ - sprd,sc9863a-aon-clk
+ - sprd,sc9863a-apahb-gate
+ - sprd,sc9863a-pmu-gate
+ - sprd,sc9863a-aonapb-gate
+ - sprd,sc9863a-pll
+ - sprd,sc9863a-mpll
+ - sprd,sc9863a-rpll
+ - sprd,sc9863a-dpll
+ - sprd,sc9863a-mm-gate
+ - sprd,sc9863a-mm-clk
+ - sprd,sc9863a-apapb-gate
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+ description: |
+ The input parent clock(s) phandle for this clock, only list fixed
+ clocks which are declared in devicetree.
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: ext-26m
+ - const: ext-32k
+ - const: ext-4m
+ - const: rco-100m
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+
+if:
+ properties:
+ compatible:
+ enum:
+ - sprd,sc9863a-ap-clk
+ - sprd,sc9863a-aon-clk
+then:
+ required:
+ - reg
+
+else:
+ description: |
+ Other SC9863a clock nodes should be the child of a syscon node in
+ which compatible string should be:
+ "sprd,sc9863a-glbregs", "syscon", "simple-mfd"
+
+ The 'reg' property for the clock node is also required if there is a sub
+ range of registers for the clocks.
+
+additionalProperties: false
+
+examples:
+ - |
+ ap_clk: clock-controller@21500000 {
+ compatible = "sprd,sc9863a-ap-clk";
+ reg = <0x21500000 0x1000>;
+ clocks = <&ext_26m>, <&ext_32k>;
+ clock-names = "ext-26m", "ext-32k";
+ #clock-cells = <1>;
+ };
+
+ - |
+ syscon@20e00000 {
+ compatible = "sprd,sc9863a-glbregs", "syscon", "simple-mfd";
+ reg = <0x20e00000 0x4000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x20e00000 0x4000>;
+
+ apahb_gate: apahb-gate@0 {
+ compatible = "sprd,sc9863a-apahb-gate";
+ reg = <0x0 0x1020>;
+ #clock-cells = <1>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/sprd,ums512-clk.yaml b/Documentation/devicetree/bindings/clock/sprd,ums512-clk.yaml
new file mode 100644
index 000000000000..43d2b6c31357
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sprd,ums512-clk.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2022 Unisoc Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sprd,ums512-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UMS512 Soc clock controller
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - sprd,ums512-apahb-gate
+ - sprd,ums512-ap-clk
+ - sprd,ums512-aonapb-clk
+ - sprd,ums512-pmu-gate
+ - sprd,ums512-g0-pll
+ - sprd,ums512-g2-pll
+ - sprd,ums512-g3-pll
+ - sprd,ums512-gc-pll
+ - sprd,ums512-aon-gate
+ - sprd,ums512-audcpapb-gate
+ - sprd,ums512-audcpahb-gate
+ - sprd,ums512-gpu-clk
+ - sprd,ums512-mm-clk
+ - sprd,ums512-mm-gate-clk
+ - sprd,ums512-apapb-gate
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+ description: |
+ The input parent clock(s) phandle for the clock, only list
+ fixed clocks which are declared in devicetree.
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: ext-26m
+ - const: ext-32k
+ - const: ext-4m
+ - const: rco-100m
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ ap_clk: clock-controller@20200000 {
+ compatible = "sprd,ums512-ap-clk";
+ reg = <0x20200000 0x1000>;
+ clocks = <&ext_26m>;
+ clock-names = "ext-26m";
+ #clock-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/sprd.txt b/Documentation/devicetree/bindings/clock/sprd.txt
deleted file mode 100644
index e9d179e882d9..000000000000
--- a/Documentation/devicetree/bindings/clock/sprd.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-Spreadtrum Clock Binding
-------------------------
-
-Required properties:
-- compatible: should contain the following compatible strings:
- - "sprd,sc9860-pmu-gate"
- - "sprd,sc9860-pll"
- - "sprd,sc9860-ap-clk"
- - "sprd,sc9860-aon-prediv"
- - "sprd,sc9860-apahb-gate"
- - "sprd,sc9860-aon-gate"
- - "sprd,sc9860-aonsecure-clk"
- - "sprd,sc9860-agcp-gate"
- - "sprd,sc9860-gpu-clk"
- - "sprd,sc9860-vsp-clk"
- - "sprd,sc9860-vsp-gate"
- - "sprd,sc9860-cam-clk"
- - "sprd,sc9860-cam-gate"
- - "sprd,sc9860-disp-clk"
- - "sprd,sc9860-disp-gate"
- - "sprd,sc9860-apapb-gate"
-
-- #clock-cells: must be 1
-
-- clocks : Should be the input parent clock(s) phandle for the clock, this
- property here just simply shows which clock group the clocks'
- parents are in, since each clk node would represent many clocks
- which are defined in the driver. The detailed dependency
- relationship (i.e. how many parents and which are the parents)
- are implemented in driver code.
-
-Optional properties:
-
-- reg: Contain the registers base address and length. It must be configured
- only if no 'sprd,syscon' under the node.
-
-- sprd,syscon: phandle to the syscon which is in the same address area with
- the clock, and so we can get regmap for the clocks from the
- syscon device.
-
-Example:
-
- pmu_gate: pmu-gate {
- compatible = "sprd,sc9860-pmu-gate";
- sprd,syscon = <&pmu_regs>;
- clocks = <&ext_26m>;
- #clock-cells = <1>;
- };
-
- pll: pll {
- compatible = "sprd,sc9860-pll";
- sprd,syscon = <&ana_regs>;
- clocks = <&pmu_gate 0>;
- #clock-cells = <1>;
- };
-
- ap_clk: clock-controller@20000000 {
- compatible = "sprd,sc9860-ap-clk";
- reg = <0 0x20000000 0 0x400>;
- clocks = <&ext_26m>, <&pll 0>,
- <&pmu_gate 0>;
- #clock-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
deleted file mode 100644
index b240121d2ac9..000000000000
--- a/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-STMicroelectronics STM32 Reset and Clock Controller
-===================================================
-
-The RCC IP is both a reset and a clock controller.
-
-Please refer to clock-bindings.txt for common clock controller binding usage.
-Please also refer to reset.txt for common reset controller binding usage.
-
-Required properties:
-- compatible: Should be:
- "st,stm32f42xx-rcc"
- "st,stm32f469-rcc"
- "st,stm32f746-rcc"
-- reg: should be register base and length as documented in the
- datasheet
-- #reset-cells: 1, see below
-- #clock-cells: 2, device nodes should specify the clock in their "clocks"
- property, containing a phandle to the clock device node, an index selecting
- between gated clocks and other clocks and an index specifying the clock to
- use.
-- clocks: External oscillator clock phandle
- - high speed external clock signal (HSE)
- - external I2S clock (I2S_CKIN)
-
-Example:
-
- rcc: rcc@40023800 {
- #reset-cells = <1>;
- #clock-cells = <2>
- compatible = "st,stm32f42xx-rcc", "st,stm32-rcc";
- reg = <0x40023800 0x400>;
- clocks = <&clk_hse>, <&clk_i2s_ckin>;
- };
-
-Specifying gated clocks
-=======================
-
-The primary index must be set to 0.
-
-The secondary index is the bit number within the RCC register bank, starting
-from the first RCC clock enable register (RCC_AHB1ENR, address offset 0x30).
-
-It is calculated as: index = register_offset / 4 * 32 + bit_offset.
-Where bit_offset is the bit offset within the register (LSB is 0, MSB is 31).
-
-To simplify the usage and to share bit definition with the reset and clock
-drivers of the RCC IP, macros are available to generate the index in
-human-readble format.
-
-For STM32F4 series, the macro are available here:
- - include/dt-bindings/mfd/stm32f4-rcc.h
-
-Example:
-
- /* Gated clock, AHB1 bit 0 (GPIOA) */
- ... {
- clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOA)>
- };
-
- /* Gated clock, AHB2 bit 4 (CRYP) */
- ... {
- clocks = <&rcc 0 STM32F4_AHB2_CLOCK(CRYP)>
- };
-
-Specifying other clocks
-=======================
-
-The primary index must be set to 1.
-
-The secondary index is bound with the following magic numbers:
-
- 0 SYSTICK
- 1 FCLK
- 2 CLK_LSI (low-power clock source)
- 3 CLK_LSE (generated from a 32.768 kHz low-speed external
- crystal or ceramic resonator)
- 4 CLK_HSE_RTC (HSE division factor for RTC clock)
- 5 CLK_RTC (real-time clock)
- 6 PLL_VCO_I2S (vco frequency of I2S pll)
- 7 PLL_VCO_SAI (vco frequency of SAI pll)
- 8 CLK_LCD (LCD-TFT)
- 9 CLK_I2S (I2S clocks)
- 10 CLK_SAI1 (audio clocks)
- 11 CLK_SAI2
- 12 CLK_I2SQ_PDIV (post divisor of pll i2s q divisor)
- 13 CLK_SAIQ_PDIV (post divisor of pll sai q divisor)
-
- 14 CLK_HSI (Internal ocscillator clock)
- 15 CLK_SYSCLK (System Clock)
- 16 CLK_HDMI_CEC (HDMI-CEC clock)
- 17 CLK_SPDIF (SPDIF-Rx clock)
- 18 CLK_USART1 (U(s)arts clocks)
- 19 CLK_USART2
- 20 CLK_USART3
- 21 CLK_UART4
- 22 CLK_UART5
- 23 CLK_USART6
- 24 CLK_UART7
- 25 CLK_UART8
- 26 CLK_I2C1 (I2S clocks)
- 27 CLK_I2C2
- 28 CLK_I2C3
- 29 CLK_I2C4
- 30 CLK_LPTIMER (LPTimer1 clock)
-)
-
-Example:
-
- /* Misc clock, FCLK */
- ... {
- clocks = <&rcc 1 STM32F4_APB1_CLOCK(TIM2)>
- };
-
-
-Specifying softreset control of devices
-=======================================
-
-Device nodes should specify the reset channel required in their "resets"
-property, containing a phandle to the reset device node and an index specifying
-which channel to use.
-The index is the bit number within the RCC registers bank, starting from RCC
-base address.
-It is calculated as: index = register_offset / 4 * 32 + bit_offset.
-Where bit_offset is the bit offset within the register.
-For example, for CRC reset:
- crc = AHB1RSTR_offset / 4 * 32 + CRCRST_bit_offset = 0x10 / 4 * 32 + 12 = 140
-
-example:
-
- timer2 {
- resets = <&rcc STM32F4_APB1_RESET(TIM2)>;
- };
diff --git a/Documentation/devicetree/bindings/clock/st,stm32-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32-rcc.yaml
new file mode 100644
index 000000000000..f83a6120d65a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32-rcc.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/st,stm32-rcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 Reset Clock Controller
+
+maintainers:
+ - Dario Binacchi <dario.binacchi@amarulasolutions.com>
+
+description: |
+ The RCC IP is both a reset and a clock controller.
+ The reset phandle argument is the bit number within the RCC registers bank,
+ starting from RCC base address.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - st,stm32f42xx-rcc
+ - st,stm32f746-rcc
+ - st,stm32h743-rcc
+ - const: st,stm32-rcc
+ - items:
+ - enum:
+ - st,stm32f469-rcc
+ - const: st,stm32f42xx-rcc
+ - const: st,stm32-rcc
+ - items:
+ - enum:
+ - st,stm32f769-rcc
+ - const: st,stm32f746-rcc
+ - const: st,stm32-rcc
+
+ reg:
+ maxItems: 1
+
+ '#reset-cells':
+ const: 1
+
+ '#clock-cells':
+ enum: [1, 2]
+
+ clocks:
+ minItems: 2
+ maxItems: 3
+
+ st,syscfg:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to system configuration controller. It can be used to control the
+ power domain circuitry.
+
+ st,ssc-modfreq-hz:
+ description:
+ The modulation frequency for main PLL (in Hz)
+
+ st,ssc-moddepth-permyriad:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The modulation rate for main PLL (in permyriad, i.e. 0.01%)
+ minimum: 25
+ maximum: 200
+
+ st,ssc-modmethod:
+ $ref: /schemas/types.yaml#/definitions/string
+ description:
+ The modulation techniques for main PLL.
+ items:
+ enum:
+ - center-spread
+ - down-spread
+
+required:
+ - compatible
+ - reg
+ - '#reset-cells'
+ - '#clock-cells'
+ - clocks
+ - st,syscfg
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: st,stm32h743-rcc
+ then:
+ properties:
+ '#clock-cells':
+ const: 1
+ description: |
+ The clock index for the specified type.
+ clocks:
+ items:
+ - description: high speed external (HSE) clock input
+ - description: low speed external (LSE) clock input
+ - description: Inter-IC sound (I2S) clock input
+ st,ssc-modfreq-hz: false
+ st,ssc-moddepth-permyriad: false
+ st,ssc-modmethod: false
+
+ else:
+ properties:
+ '#clock-cells':
+ const: 2
+ description: |
+ - The first cell is the clock type, possible values are 0 for
+ gated clocks and 1 otherwise.
+ - The second cell is the clock index for the specified type.
+ clocks:
+ items:
+ - description: high speed external (HSE) clock input
+ - description: Inter-IC sound (I2S) clock input
+
+additionalProperties: false
+
+examples:
+ # Reset and Clock Control Module node:
+ - |
+ clock-controller@40023800 {
+ compatible = "st,stm32f42xx-rcc", "st,stm32-rcc";
+ reg = <0x40023800 0x400>;
+ #clock-cells = <2>;
+ #reset-cells = <1>;
+ clocks = <&clk_hse>, <&clk_i2s_ckin>;
+ st,syscfg = <&pwrcfg>;
+ st,ssc-modfreq-hz = <10000>;
+ st,ssc-moddepth-permyriad = <200>;
+ st,ssc-modmethod = "center-spread";
+ };
+ - |
+ clock-controller@58024400 {
+ compatible = "st,stm32h743-rcc", "st,stm32-rcc";
+ reg = <0x58024400 0x400>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&clk_hse>, <&clk_lse>, <&clk_i2s>;
+ st,syscfg = <&pwrcfg>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
deleted file mode 100644
index cac24ee10b72..000000000000
--- a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-STMicroelectronics STM32H7 Reset and Clock Controller
-=====================================================
-
-The RCC IP is both a reset and a clock controller.
-
-Please refer to clock-bindings.txt for common clock controller binding usage.
-Please also refer to reset.txt for common reset controller binding usage.
-
-Required properties:
-- compatible: Should be:
- "st,stm32h743-rcc"
-
-- reg: should be register base and length as documented in the
- datasheet
-
-- #reset-cells: 1, see below
-
-- #clock-cells : from common clock binding; shall be set to 1
-
-- clocks: External oscillator clock phandle
- - high speed external clock signal (HSE)
- - low speed external clock signal (LSE)
- - external I2S clock (I2S_CKIN)
-
-Optional properties:
-- st,syscfg: phandle for pwrcfg, mandatory to disable/enable backup domain
- write protection (RTC clock).
-
-Example:
-
- rcc: reset-clock-controller@58024400 {
- compatible = "st,stm32h743-rcc", "st,stm32-rcc";
- reg = <0x58024400 0x400>;
- #reset-cells = <1>;
- #clock-cells = <1>;
- clocks = <&clk_hse>, <&clk_lse>, <&clk_i2s_ckin>;
-
- st,syscfg = <&pwrcfg>;
-};
-
-The peripheral clock consumer should specify the desired clock by
-having the clock ID in its "clocks" phandle cell.
-
-Example:
-
- timer5: timer@40000c00 {
- compatible = "st,stm32-timer";
- reg = <0x40000c00 0x400>;
- interrupts = <50>;
- clocks = <&rcc TIM5_CK>;
- };
-
-Specifying softreset control of devices
-=======================================
-
-Device nodes should specify the reset channel required in their "resets"
-property, containing a phandle to the reset device node and an index specifying
-which channel to use.
-The index is the bit number within the RCC registers bank, starting from RCC
-base address.
-It is calculated as: index = register_offset / 4 * 32 + bit_offset.
-Where bit_offset is the bit offset within the register.
-
-For example, for CRC reset:
- crc = AHB4RSTR_offset / 4 * 32 + CRCRST_bit_offset = 0x88 / 4 * 32 + 19 = 1107
-
-Example:
-
- timer2 {
- resets = <&rcc STM32H7_APB1L_RESET(TIM2)>;
- };
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
deleted file mode 100644
index fb9495ea582c..000000000000
--- a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-STMicroelectronics STM32 Peripheral Reset Clock Controller
-==========================================================
-
-The RCC IP is both a reset and a clock controller.
-
-RCC makes also power management (resume/supend and wakeup interrupt).
-
-Please also refer to reset.txt for common reset controller binding usage.
-
-Please also refer to clock-bindings.txt for common clock controller
-binding usage.
-
-
-Required properties:
-- compatible: "st,stm32mp1-rcc", "syscon"
-- reg: should be register base and length as documented in the datasheet
-- #clock-cells: 1, device nodes should specify the clock in their
- "clocks" property, containing a phandle to the clock device node,
- an index specifying the clock to use.
-- #reset-cells: Shall be 1
-- interrupts: Should contain a general interrupt line and a interrupt line
- to the wake-up of processor (CSTOP).
-
-Example:
- rcc: rcc@50000000 {
- compatible = "st,stm32mp1-rcc", "syscon";
- reg = <0x50000000 0x1000>;
- #clock-cells = <1>;
- #reset-cells = <1>;
- interrupts = <GIC_SPI 5 IRQ_TYPE_NONE>,
- <GIC_SPI 145 IRQ_TYPE_NONE>;
- };
-
-Specifying clocks
-=================
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/stm32mp1-clks.h header and can be used in device
-tree sources.
-
-Specifying softreset control of devices
-=======================================
-
-Device nodes should specify the reset channel required in their "resets"
-property, containing a phandle to the reset device node and an index specifying
-which channel to use.
-The index is the bit number within the RCC registers bank, starting from RCC
-base address.
-It is calculated as: index = register_offset / 4 * 32 + bit_offset.
-Where bit_offset is the bit offset within the register.
-
-For example on STM32MP1, for LTDC reset:
- ltdc = APB4_RSTSETR_offset / 4 * 32 + LTDC_bit_offset
- = 0x180 / 4 * 32 + 0 = 3072
-
-The list of valid indices for STM32MP1 is available in:
-include/dt-bindings/reset-controller/stm32mp1-resets.h
-
-This file implements defines like:
-#define LTDC_R 3072
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
new file mode 100644
index 000000000000..e72f46e79b90
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/st,stm32mp1-rcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32MP1 Reset Clock Controller
+
+maintainers:
+ - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
+
+description: |
+ The RCC IP is both a reset and a clock controller.
+ RCC makes also power management (resume/supend and wakeup interrupt).
+ Please also refer to reset.txt for common reset controller binding usage.
+
+ This binding uses common clock bindings
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+ Specifying clocks
+ =================
+
+ All available clocks are defined as preprocessor macros in
+ include/dt-bindings/clock/stm32mp1-clks.h header and can be used in device
+ tree sources.
+
+ Specifying softreset control of devices
+ =======================================
+
+ Device nodes should specify the reset channel required in their "resets"
+ property, containing a phandle to the reset device node and an index specifying
+ which channel to use.
+ The index is the bit number within the RCC registers bank, starting from RCC
+ base address.
+ It is calculated as: index = register_offset / 4 * 32 + bit_offset.
+ Where bit_offset is the bit offset within the register.
+
+ For example on STM32MP1, for LTDC reset:
+ ltdc = APB4_RSTSETR_offset / 4 * 32 + LTDC_bit_offset
+ = 0x180 / 4 * 32 + 0 = 3072
+
+ The list of valid indices for STM32MP1 is available in:
+ include/dt-bindings/reset/stm32mp1-resets.h
+ include/dt-bindings/reset/stm32mp13-resets.h
+
+ This file implements defines like:
+ #define LTDC_R 3072
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+ compatible:
+ items:
+ - enum:
+ - st,stm32mp1-rcc-secure
+ - st,stm32mp1-rcc
+ - st,stm32mp13-rcc
+ - const: syscon
+
+ clocks:
+ minItems: 1
+ maxItems: 5
+
+ clock-names:
+ minItems: 1
+ maxItems: 5
+
+ reg:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - "#reset-cells"
+ - compatible
+ - reg
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - st,stm32mp1-rcc-secure
+ - st,stm32mp13-rcc
+then:
+ properties:
+ clocks:
+ description: Specifies oscillators.
+ maxItems: 5
+
+ clock-names:
+ items:
+ - const: hse
+ - const: hsi
+ - const: csi
+ - const: lse
+ - const: lsi
+ required:
+ - clocks
+ - clock-names
+else:
+ properties:
+ clocks:
+ description:
+ Specifies the external RX clock for ethernet MAC.
+ maxItems: 1
+
+ clock-names:
+ const: ETH_RX_CLK/ETH_REF_CLK
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/stm32mp1-clks.h>
+ rcc: rcc@50000000 {
+ compatible = "st,stm32mp1-rcc-secure", "syscon";
+ reg = <0x50000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clock-names = "hse", "hsi", "csi", "lse", "lsi";
+ clocks = <&scmi_clk CK_SCMI_HSE>,
+ <&scmi_clk CK_SCMI_HSI>,
+ <&scmi_clk CK_SCMI_CSI>,
+ <&scmi_clk CK_SCMI_LSE>,
+ <&scmi_clk CK_SCMI_LSI>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp21-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp21-rcc.yaml
new file mode 100644
index 000000000000..4368063c6709
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp21-rcc.yaml
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/st,stm32mp21-rcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32MP21 Reset Clock Controller
+
+maintainers:
+ - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
+
+description: |
+ The RCC hardware block is both a reset and a clock controller.
+ RCC makes also power management (resume/suspend).
+
+ See also:
+ include/dt-bindings/clock/st,stm32mp21-rcc.h
+ include/dt-bindings/reset/st,stm32mp21-rcc.h
+
+properties:
+ compatible:
+ enum:
+ - st,stm32mp21-rcc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: CK_SCMI_HSE High Speed External oscillator (8 to 48 MHz)
+ - description: CK_SCMI_HSI High Speed Internal oscillator (~ 64 MHz)
+ - description: CK_SCMI_MSI Low Power Internal oscillator (~ 4 MHz or ~ 16 MHz)
+ - description: CK_SCMI_LSE Low Speed External oscillator (32 KHz)
+ - description: CK_SCMI_LSI Low Speed Internal oscillator (~ 32 KHz)
+ - description: CK_SCMI_HSE_DIV2 CK_SCMI_HSE divided by 2 (could be gated)
+ - description: CK_SCMI_ICN_HS_MCU High Speed interconnect bus clock
+ - description: CK_SCMI_ICN_LS_MCU Low Speed interconnect bus clock
+ - description: CK_SCMI_ICN_SDMMC SDMMC interconnect bus clock
+ - description: CK_SCMI_ICN_DDR DDR interconnect bus clock
+ - description: CK_SCMI_ICN_DISPLAY Display interconnect bus clock
+ - description: CK_SCMI_ICN_HSL HSL interconnect bus clock
+ - description: CK_SCMI_ICN_NIC NIC interconnect bus clock
+ - description: CK_SCMI_FLEXGEN_07 flexgen clock 7
+ - description: CK_SCMI_FLEXGEN_08 flexgen clock 8
+ - description: CK_SCMI_FLEXGEN_09 flexgen clock 9
+ - description: CK_SCMI_FLEXGEN_10 flexgen clock 10
+ - description: CK_SCMI_FLEXGEN_11 flexgen clock 11
+ - description: CK_SCMI_FLEXGEN_12 flexgen clock 12
+ - description: CK_SCMI_FLEXGEN_13 flexgen clock 13
+ - description: CK_SCMI_FLEXGEN_14 flexgen clock 14
+ - description: CK_SCMI_FLEXGEN_16 flexgen clock 16
+ - description: CK_SCMI_FLEXGEN_17 flexgen clock 17
+ - description: CK_SCMI_FLEXGEN_18 flexgen clock 18
+ - description: CK_SCMI_FLEXGEN_19 flexgen clock 19
+ - description: CK_SCMI_FLEXGEN_20 flexgen clock 20
+ - description: CK_SCMI_FLEXGEN_21 flexgen clock 21
+ - description: CK_SCMI_FLEXGEN_22 flexgen clock 22
+ - description: CK_SCMI_FLEXGEN_23 flexgen clock 23
+ - description: CK_SCMI_FLEXGEN_24 flexgen clock 24
+ - description: CK_SCMI_FLEXGEN_25 flexgen clock 25
+ - description: CK_SCMI_FLEXGEN_26 flexgen clock 26
+ - description: CK_SCMI_FLEXGEN_27 flexgen clock 27
+ - description: CK_SCMI_FLEXGEN_29 flexgen clock 29
+ - description: CK_SCMI_FLEXGEN_30 flexgen clock 30
+ - description: CK_SCMI_FLEXGEN_31 flexgen clock 31
+ - description: CK_SCMI_FLEXGEN_33 flexgen clock 33
+ - description: CK_SCMI_FLEXGEN_36 flexgen clock 36
+ - description: CK_SCMI_FLEXGEN_37 flexgen clock 37
+ - description: CK_SCMI_FLEXGEN_38 flexgen clock 38
+ - description: CK_SCMI_FLEXGEN_39 flexgen clock 39
+ - description: CK_SCMI_FLEXGEN_40 flexgen clock 40
+ - description: CK_SCMI_FLEXGEN_41 flexgen clock 41
+ - description: CK_SCMI_FLEXGEN_42 flexgen clock 42
+ - description: CK_SCMI_FLEXGEN_43 flexgen clock 43
+ - description: CK_SCMI_FLEXGEN_44 flexgen clock 44
+ - description: CK_SCMI_FLEXGEN_45 flexgen clock 45
+ - description: CK_SCMI_FLEXGEN_46 flexgen clock 46
+ - description: CK_SCMI_FLEXGEN_47 flexgen clock 47
+ - description: CK_SCMI_FLEXGEN_48 flexgen clock 48
+ - description: CK_SCMI_FLEXGEN_50 flexgen clock 50
+ - description: CK_SCMI_FLEXGEN_51 flexgen clock 51
+ - description: CK_SCMI_FLEXGEN_52 flexgen clock 52
+ - description: CK_SCMI_FLEXGEN_53 flexgen clock 53
+ - description: CK_SCMI_FLEXGEN_54 flexgen clock 54
+ - description: CK_SCMI_FLEXGEN_55 flexgen clock 55
+ - description: CK_SCMI_FLEXGEN_56 flexgen clock 56
+ - description: CK_SCMI_FLEXGEN_57 flexgen clock 57
+ - description: CK_SCMI_FLEXGEN_58 flexgen clock 58
+ - description: CK_SCMI_FLEXGEN_61 flexgen clock 61
+ - description: CK_SCMI_FLEXGEN_62 flexgen clock 62
+ - description: CK_SCMI_FLEXGEN_63 flexgen clock 63
+ - description: CK_SCMI_ICN_APB1 Peripheral bridge 1
+ - description: CK_SCMI_ICN_APB2 Peripheral bridge 2
+ - description: CK_SCMI_ICN_APB3 Peripheral bridge 3
+ - description: CK_SCMI_ICN_APB4 Peripheral bridge 4
+ - description: CK_SCMI_ICN_APB5 Peripheral bridge 5
+ - description: CK_SCMI_ICN_APBDBG Peripheral bridge for debug
+ - description: CK_SCMI_TIMG1 Peripheral bridge for timer1
+ - description: CK_SCMI_TIMG2 Peripheral bridge for timer2
+
+ access-controllers:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/st,stm32mp21-rcc.h>
+
+ clock-controller@44200000 {
+ compatible = "st,stm32mp21-rcc";
+ reg = <0x44200000 0x10000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&scmi_clk CK_SCMI_HSE>,
+ <&scmi_clk CK_SCMI_HSI>,
+ <&scmi_clk CK_SCMI_MSI>,
+ <&scmi_clk CK_SCMI_LSE>,
+ <&scmi_clk CK_SCMI_LSI>,
+ <&scmi_clk CK_SCMI_HSE_DIV2>,
+ <&scmi_clk CK_SCMI_ICN_HS_MCU>,
+ <&scmi_clk CK_SCMI_ICN_LS_MCU>,
+ <&scmi_clk CK_SCMI_ICN_SDMMC>,
+ <&scmi_clk CK_SCMI_ICN_DDR>,
+ <&scmi_clk CK_SCMI_ICN_DISPLAY>,
+ <&scmi_clk CK_SCMI_ICN_HSL>,
+ <&scmi_clk CK_SCMI_ICN_NIC>,
+ <&scmi_clk CK_SCMI_FLEXGEN_07>,
+ <&scmi_clk CK_SCMI_FLEXGEN_08>,
+ <&scmi_clk CK_SCMI_FLEXGEN_09>,
+ <&scmi_clk CK_SCMI_FLEXGEN_10>,
+ <&scmi_clk CK_SCMI_FLEXGEN_11>,
+ <&scmi_clk CK_SCMI_FLEXGEN_12>,
+ <&scmi_clk CK_SCMI_FLEXGEN_13>,
+ <&scmi_clk CK_SCMI_FLEXGEN_14>,
+ <&scmi_clk CK_SCMI_FLEXGEN_16>,
+ <&scmi_clk CK_SCMI_FLEXGEN_17>,
+ <&scmi_clk CK_SCMI_FLEXGEN_18>,
+ <&scmi_clk CK_SCMI_FLEXGEN_19>,
+ <&scmi_clk CK_SCMI_FLEXGEN_20>,
+ <&scmi_clk CK_SCMI_FLEXGEN_21>,
+ <&scmi_clk CK_SCMI_FLEXGEN_22>,
+ <&scmi_clk CK_SCMI_FLEXGEN_23>,
+ <&scmi_clk CK_SCMI_FLEXGEN_24>,
+ <&scmi_clk CK_SCMI_FLEXGEN_25>,
+ <&scmi_clk CK_SCMI_FLEXGEN_26>,
+ <&scmi_clk CK_SCMI_FLEXGEN_27>,
+ <&scmi_clk CK_SCMI_FLEXGEN_29>,
+ <&scmi_clk CK_SCMI_FLEXGEN_30>,
+ <&scmi_clk CK_SCMI_FLEXGEN_31>,
+ <&scmi_clk CK_SCMI_FLEXGEN_33>,
+ <&scmi_clk CK_SCMI_FLEXGEN_36>,
+ <&scmi_clk CK_SCMI_FLEXGEN_37>,
+ <&scmi_clk CK_SCMI_FLEXGEN_38>,
+ <&scmi_clk CK_SCMI_FLEXGEN_39>,
+ <&scmi_clk CK_SCMI_FLEXGEN_40>,
+ <&scmi_clk CK_SCMI_FLEXGEN_41>,
+ <&scmi_clk CK_SCMI_FLEXGEN_42>,
+ <&scmi_clk CK_SCMI_FLEXGEN_43>,
+ <&scmi_clk CK_SCMI_FLEXGEN_44>,
+ <&scmi_clk CK_SCMI_FLEXGEN_45>,
+ <&scmi_clk CK_SCMI_FLEXGEN_46>,
+ <&scmi_clk CK_SCMI_FLEXGEN_47>,
+ <&scmi_clk CK_SCMI_FLEXGEN_48>,
+ <&scmi_clk CK_SCMI_FLEXGEN_50>,
+ <&scmi_clk CK_SCMI_FLEXGEN_51>,
+ <&scmi_clk CK_SCMI_FLEXGEN_52>,
+ <&scmi_clk CK_SCMI_FLEXGEN_53>,
+ <&scmi_clk CK_SCMI_FLEXGEN_54>,
+ <&scmi_clk CK_SCMI_FLEXGEN_55>,
+ <&scmi_clk CK_SCMI_FLEXGEN_56>,
+ <&scmi_clk CK_SCMI_FLEXGEN_57>,
+ <&scmi_clk CK_SCMI_FLEXGEN_58>,
+ <&scmi_clk CK_SCMI_FLEXGEN_61>,
+ <&scmi_clk CK_SCMI_FLEXGEN_62>,
+ <&scmi_clk CK_SCMI_FLEXGEN_63>,
+ <&scmi_clk CK_SCMI_ICN_APB1>,
+ <&scmi_clk CK_SCMI_ICN_APB2>,
+ <&scmi_clk CK_SCMI_ICN_APB3>,
+ <&scmi_clk CK_SCMI_ICN_APB4>,
+ <&scmi_clk CK_SCMI_ICN_APB5>,
+ <&scmi_clk CK_SCMI_ICN_APBDBG>,
+ <&scmi_clk CK_SCMI_TIMG1>,
+ <&scmi_clk CK_SCMI_TIMG2>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp25-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp25-rcc.yaml
new file mode 100644
index 000000000000..1e3b5d218bb0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp25-rcc.yaml
@@ -0,0 +1,219 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/st,stm32mp25-rcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STM32MP25 Reset Clock Controller
+
+maintainers:
+ - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
+
+description: |
+ The RCC hardware block is both a reset and a clock controller.
+ RCC makes also power management (resume/suspend).
+
+ See also:
+ include/dt-bindings/clock/st,stm32mp25-rcc.h
+ include/dt-bindings/reset/st,stm32mp25-rcc.h
+
+properties:
+ compatible:
+ enum:
+ - st,stm32mp25-rcc
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+ clocks:
+ items:
+ - description: CK_SCMI_HSE High Speed External oscillator (8 to 48 MHz)
+ - description: CK_SCMI_HSI High Speed Internal oscillator (~ 64 MHz)
+ - description: CK_SCMI_MSI Low Power Internal oscillator (~ 4 MHz or ~ 16 MHz)
+ - description: CK_SCMI_LSE Low Speed External oscillator (32 KHz)
+ - description: CK_SCMI_LSI Low Speed Internal oscillator (~ 32 KHz)
+ - description: CK_SCMI_HSE_DIV2 CK_SCMI_HSE divided by 2 (could be gated)
+ - description: CK_SCMI_ICN_HS_MCU High Speed interconnect bus clock
+ - description: CK_SCMI_ICN_LS_MCU Low Speed interconnect bus clock
+ - description: CK_SCMI_ICN_SDMMC SDMMC interconnect bus clock
+ - description: CK_SCMI_ICN_DDR DDR interconnect bus clock
+ - description: CK_SCMI_ICN_DISPLAY Display interconnect bus clock
+ - description: CK_SCMI_ICN_HSL HSL interconnect bus clock
+ - description: CK_SCMI_ICN_NIC NIC interconnect bus clock
+ - description: CK_SCMI_ICN_VID Video interconnect bus clock
+ - description: CK_SCMI_FLEXGEN_07 flexgen clock 7
+ - description: CK_SCMI_FLEXGEN_08 flexgen clock 8
+ - description: CK_SCMI_FLEXGEN_09 flexgen clock 9
+ - description: CK_SCMI_FLEXGEN_10 flexgen clock 10
+ - description: CK_SCMI_FLEXGEN_11 flexgen clock 11
+ - description: CK_SCMI_FLEXGEN_12 flexgen clock 12
+ - description: CK_SCMI_FLEXGEN_13 flexgen clock 13
+ - description: CK_SCMI_FLEXGEN_14 flexgen clock 14
+ - description: CK_SCMI_FLEXGEN_15 flexgen clock 15
+ - description: CK_SCMI_FLEXGEN_16 flexgen clock 16
+ - description: CK_SCMI_FLEXGEN_17 flexgen clock 17
+ - description: CK_SCMI_FLEXGEN_18 flexgen clock 18
+ - description: CK_SCMI_FLEXGEN_19 flexgen clock 19
+ - description: CK_SCMI_FLEXGEN_20 flexgen clock 20
+ - description: CK_SCMI_FLEXGEN_21 flexgen clock 21
+ - description: CK_SCMI_FLEXGEN_22 flexgen clock 22
+ - description: CK_SCMI_FLEXGEN_23 flexgen clock 23
+ - description: CK_SCMI_FLEXGEN_24 flexgen clock 24
+ - description: CK_SCMI_FLEXGEN_25 flexgen clock 25
+ - description: CK_SCMI_FLEXGEN_26 flexgen clock 26
+ - description: CK_SCMI_FLEXGEN_27 flexgen clock 27
+ - description: CK_SCMI_FLEXGEN_28 flexgen clock 28
+ - description: CK_SCMI_FLEXGEN_29 flexgen clock 29
+ - description: CK_SCMI_FLEXGEN_30 flexgen clock 30
+ - description: CK_SCMI_FLEXGEN_31 flexgen clock 31
+ - description: CK_SCMI_FLEXGEN_32 flexgen clock 32
+ - description: CK_SCMI_FLEXGEN_33 flexgen clock 33
+ - description: CK_SCMI_FLEXGEN_34 flexgen clock 34
+ - description: CK_SCMI_FLEXGEN_35 flexgen clock 35
+ - description: CK_SCMI_FLEXGEN_36 flexgen clock 36
+ - description: CK_SCMI_FLEXGEN_37 flexgen clock 37
+ - description: CK_SCMI_FLEXGEN_38 flexgen clock 38
+ - description: CK_SCMI_FLEXGEN_39 flexgen clock 39
+ - description: CK_SCMI_FLEXGEN_40 flexgen clock 40
+ - description: CK_SCMI_FLEXGEN_41 flexgen clock 41
+ - description: CK_SCMI_FLEXGEN_42 flexgen clock 42
+ - description: CK_SCMI_FLEXGEN_43 flexgen clock 43
+ - description: CK_SCMI_FLEXGEN_44 flexgen clock 44
+ - description: CK_SCMI_FLEXGEN_45 flexgen clock 45
+ - description: CK_SCMI_FLEXGEN_46 flexgen clock 46
+ - description: CK_SCMI_FLEXGEN_47 flexgen clock 47
+ - description: CK_SCMI_FLEXGEN_48 flexgen clock 48
+ - description: CK_SCMI_FLEXGEN_49 flexgen clock 49
+ - description: CK_SCMI_FLEXGEN_50 flexgen clock 50
+ - description: CK_SCMI_FLEXGEN_51 flexgen clock 51
+ - description: CK_SCMI_FLEXGEN_52 flexgen clock 52
+ - description: CK_SCMI_FLEXGEN_53 flexgen clock 53
+ - description: CK_SCMI_FLEXGEN_54 flexgen clock 54
+ - description: CK_SCMI_FLEXGEN_55 flexgen clock 55
+ - description: CK_SCMI_FLEXGEN_56 flexgen clock 56
+ - description: CK_SCMI_FLEXGEN_57 flexgen clock 57
+ - description: CK_SCMI_FLEXGEN_58 flexgen clock 58
+ - description: CK_SCMI_FLEXGEN_59 flexgen clock 59
+ - description: CK_SCMI_FLEXGEN_60 flexgen clock 60
+ - description: CK_SCMI_FLEXGEN_61 flexgen clock 61
+ - description: CK_SCMI_FLEXGEN_62 flexgen clock 62
+ - description: CK_SCMI_FLEXGEN_63 flexgen clock 63
+ - description: CK_SCMI_ICN_APB1 Peripheral bridge 1
+ - description: CK_SCMI_ICN_APB2 Peripheral bridge 2
+ - description: CK_SCMI_ICN_APB3 Peripheral bridge 3
+ - description: CK_SCMI_ICN_APB4 Peripheral bridge 4
+ - description: CK_SCMI_ICN_APBDBG Peripheral bridge for debug
+ - description: CK_SCMI_TIMG1 Peripheral bridge for timer1
+ - description: CK_SCMI_TIMG2 Peripheral bridge for timer2
+ - description: CK_SCMI_PLL3 PLL3 clock
+ - description: clk_dsi_txbyte DSI byte clock
+
+ access-controllers:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - '#reset-cells'
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/st,stm32mp25-rcc.h>
+
+ clock-controller@44200000 {
+ compatible = "st,stm32mp25-rcc";
+ reg = <0x44200000 0x10000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&scmi_clk CK_SCMI_HSE>,
+ <&scmi_clk CK_SCMI_HSI>,
+ <&scmi_clk CK_SCMI_MSI>,
+ <&scmi_clk CK_SCMI_LSE>,
+ <&scmi_clk CK_SCMI_LSI>,
+ <&scmi_clk CK_SCMI_HSE_DIV2>,
+ <&scmi_clk CK_SCMI_ICN_HS_MCU>,
+ <&scmi_clk CK_SCMI_ICN_LS_MCU>,
+ <&scmi_clk CK_SCMI_ICN_SDMMC>,
+ <&scmi_clk CK_SCMI_ICN_DDR>,
+ <&scmi_clk CK_SCMI_ICN_DISPLAY>,
+ <&scmi_clk CK_SCMI_ICN_HSL>,
+ <&scmi_clk CK_SCMI_ICN_NIC>,
+ <&scmi_clk CK_SCMI_ICN_VID>,
+ <&scmi_clk CK_SCMI_FLEXGEN_07>,
+ <&scmi_clk CK_SCMI_FLEXGEN_08>,
+ <&scmi_clk CK_SCMI_FLEXGEN_09>,
+ <&scmi_clk CK_SCMI_FLEXGEN_10>,
+ <&scmi_clk CK_SCMI_FLEXGEN_11>,
+ <&scmi_clk CK_SCMI_FLEXGEN_12>,
+ <&scmi_clk CK_SCMI_FLEXGEN_13>,
+ <&scmi_clk CK_SCMI_FLEXGEN_14>,
+ <&scmi_clk CK_SCMI_FLEXGEN_15>,
+ <&scmi_clk CK_SCMI_FLEXGEN_16>,
+ <&scmi_clk CK_SCMI_FLEXGEN_17>,
+ <&scmi_clk CK_SCMI_FLEXGEN_18>,
+ <&scmi_clk CK_SCMI_FLEXGEN_19>,
+ <&scmi_clk CK_SCMI_FLEXGEN_20>,
+ <&scmi_clk CK_SCMI_FLEXGEN_21>,
+ <&scmi_clk CK_SCMI_FLEXGEN_22>,
+ <&scmi_clk CK_SCMI_FLEXGEN_23>,
+ <&scmi_clk CK_SCMI_FLEXGEN_24>,
+ <&scmi_clk CK_SCMI_FLEXGEN_25>,
+ <&scmi_clk CK_SCMI_FLEXGEN_26>,
+ <&scmi_clk CK_SCMI_FLEXGEN_27>,
+ <&scmi_clk CK_SCMI_FLEXGEN_28>,
+ <&scmi_clk CK_SCMI_FLEXGEN_29>,
+ <&scmi_clk CK_SCMI_FLEXGEN_30>,
+ <&scmi_clk CK_SCMI_FLEXGEN_31>,
+ <&scmi_clk CK_SCMI_FLEXGEN_32>,
+ <&scmi_clk CK_SCMI_FLEXGEN_33>,
+ <&scmi_clk CK_SCMI_FLEXGEN_34>,
+ <&scmi_clk CK_SCMI_FLEXGEN_35>,
+ <&scmi_clk CK_SCMI_FLEXGEN_36>,
+ <&scmi_clk CK_SCMI_FLEXGEN_37>,
+ <&scmi_clk CK_SCMI_FLEXGEN_38>,
+ <&scmi_clk CK_SCMI_FLEXGEN_39>,
+ <&scmi_clk CK_SCMI_FLEXGEN_40>,
+ <&scmi_clk CK_SCMI_FLEXGEN_41>,
+ <&scmi_clk CK_SCMI_FLEXGEN_42>,
+ <&scmi_clk CK_SCMI_FLEXGEN_43>,
+ <&scmi_clk CK_SCMI_FLEXGEN_44>,
+ <&scmi_clk CK_SCMI_FLEXGEN_45>,
+ <&scmi_clk CK_SCMI_FLEXGEN_46>,
+ <&scmi_clk CK_SCMI_FLEXGEN_47>,
+ <&scmi_clk CK_SCMI_FLEXGEN_48>,
+ <&scmi_clk CK_SCMI_FLEXGEN_49>,
+ <&scmi_clk CK_SCMI_FLEXGEN_50>,
+ <&scmi_clk CK_SCMI_FLEXGEN_51>,
+ <&scmi_clk CK_SCMI_FLEXGEN_52>,
+ <&scmi_clk CK_SCMI_FLEXGEN_53>,
+ <&scmi_clk CK_SCMI_FLEXGEN_54>,
+ <&scmi_clk CK_SCMI_FLEXGEN_55>,
+ <&scmi_clk CK_SCMI_FLEXGEN_56>,
+ <&scmi_clk CK_SCMI_FLEXGEN_57>,
+ <&scmi_clk CK_SCMI_FLEXGEN_58>,
+ <&scmi_clk CK_SCMI_FLEXGEN_59>,
+ <&scmi_clk CK_SCMI_FLEXGEN_60>,
+ <&scmi_clk CK_SCMI_FLEXGEN_61>,
+ <&scmi_clk CK_SCMI_FLEXGEN_62>,
+ <&scmi_clk CK_SCMI_FLEXGEN_63>,
+ <&scmi_clk CK_SCMI_ICN_APB1>,
+ <&scmi_clk CK_SCMI_ICN_APB2>,
+ <&scmi_clk CK_SCMI_ICN_APB3>,
+ <&scmi_clk CK_SCMI_ICN_APB4>,
+ <&scmi_clk CK_SCMI_ICN_APBDBG>,
+ <&scmi_clk CK_SCMI_TIMG1>,
+ <&scmi_clk CK_SCMI_TIMG2>,
+ <&scmi_clk CK_SCMI_PLL3>,
+ <&clk_dsi_txbyte>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
index f207053e0550..d0fa1e02d06d 100644
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
@@ -10,7 +10,10 @@ Required properties:
- compatible : shall be:
"st,clkgen-pll0"
+ "st,clkgen-pll0-a0"
+ "st,clkgen-pll0-c0"
"st,clkgen-pll1"
+ "st,clkgen-pll1-c0"
"st,stih407-clkgen-plla9"
"st,stih418-clkgen-plla9"
diff --git a/Documentation/devicetree/bindings/clock/st/st,flexgen.txt b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
index 7ff77fc57dff..a9d1c19f30a3 100644
--- a/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
@@ -64,11 +64,18 @@ Required properties:
audio use case)
"st,flexgen-video", "st,flexgen" (enable clock propagation on parent
and activate synchronous mode)
+ "st,flexgen-stih410-a0"
+ "st,flexgen-stih410-c0"
+ "st,flexgen-stih418-c0"
+ "st,flexgen-stih410-d0"
+ "st,flexgen-stih407-d2"
+ "st,flexgen-stih418-d2"
+ "st,flexgen-stih407-d3"
- #clock-cells : from common clock binding; shall be set to 1 (multiple clock
outputs).
-- clocks : must be set to the parent's phandle. it's could be output clocks of
+- clocks : must be set to the parent's phandle. it could be output clocks of
a quadsfs or/and a pll or/and clk_sysin (up to 7 clocks)
- clock-output-names : List of strings used to name the clock outputs.
diff --git a/Documentation/devicetree/bindings/clock/st/st,quadfs.txt b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
index d93d49342e60..c4ba2adb0b4f 100644
--- a/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
@@ -12,6 +12,9 @@ This binding uses the common clock binding[1].
Required properties:
- compatible : shall be:
"st,quadfs"
+ "st,quadfs-d0"
+ "st,quadfs-d2"
+ "st,quadfs-d3"
"st,quadfs-pll"
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7100-audclk.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7100-audclk.yaml
new file mode 100644
index 000000000000..8f49a1ae03f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7100-audclk.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7100-audclk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7100 Audio Clock Generator
+
+maintainers:
+ - Emil Renner Berthing <kernel@esmil.dk>
+
+properties:
+ compatible:
+ const: starfive,jh7100-audclk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Audio source clock
+ - description: External 12.288MHz clock
+ - description: Domain 7 AHB bus clock
+
+ clock-names:
+ items:
+ - const: audio_src
+ - const: audio_12288
+ - const: dom7ahb_bus
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive-jh7100-audio.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/starfive-jh7100.h>
+
+ clock-controller@10480000 {
+ compatible = "starfive,jh7100-audclk";
+ reg = <0x10480000 0x10000>;
+ clocks = <&clkgen JH7100_CLK_AUDIO_SRC>,
+ <&clkgen JH7100_CLK_AUDIO_12288>,
+ <&clkgen JH7100_CLK_DOM7AHB_BUS>;
+ clock-names = "audio_src", "audio_12288", "dom7ahb_bus";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml
new file mode 100644
index 000000000000..12f17b60ecbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7100-clkgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7100 Clock Generator
+
+maintainers:
+ - Geert Uytterhoeven <geert@linux-m68k.org>
+ - Emil Renner Berthing <kernel@esmil.dk>
+
+properties:
+ compatible:
+ const: starfive,jh7100-clkgen
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Main clock source (25 MHz)
+ - description: Application-specific clock source (12-27 MHz)
+ - description: RMII reference clock (50 MHz)
+ - description: RGMII RX clock (125 MHz)
+
+ clock-names:
+ items:
+ - const: osc_sys
+ - const: osc_aud
+ - const: gmac_rmii_ref
+ - const: gmac_gr_mii_rxclk
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive-jh7100.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@11800000 {
+ compatible = "starfive,jh7100-clkgen";
+ reg = <0x11800000 0x10000>;
+ clocks = <&osc_sys>, <&osc_aud>, <&gmac_rmii_ref>, <&gmac_gr_mii_rxclk>;
+ clock-names = "osc_sys", "osc_aud", "gmac_rmii_ref", "gmac_gr_mii_rxclk";
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-aoncrg.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-aoncrg.yaml
new file mode 100644
index 000000000000..923680a44aef
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-aoncrg.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-aoncrg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 Always-On Clock and Reset Generator
+
+maintainers:
+ - Emil Renner Berthing <kernel@esmil.dk>
+
+properties:
+ compatible:
+ const: starfive,jh7110-aoncrg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ oneOf:
+ - items:
+ - description: Main Oscillator (24 MHz)
+ - description: GMAC0 RMII reference or GMAC0 RGMII RX
+ - description: STG AXI/AHB
+ - description: APB Bus
+ - description: GMAC0 GTX
+
+ - items:
+ - description: Main Oscillator (24 MHz)
+ - description: GMAC0 RMII reference or GMAC0 RGMII RX
+ - description: STG AXI/AHB or GMAC0 RGMII RX
+ - description: APB Bus or STG AXI/AHB
+ - description: GMAC0 GTX or APB Bus
+ - description: RTC Oscillator (32.768 kHz) or GMAC0 GTX
+
+ - items:
+ - description: Main Oscillator (24 MHz)
+ - description: GMAC0 RMII reference
+ - description: GMAC0 RGMII RX
+ - description: STG AXI/AHB
+ - description: APB Bus
+ - description: GMAC0 GTX
+ - description: RTC Oscillator (32.768 kHz)
+
+ clock-names:
+ oneOf:
+ - minItems: 5
+ items:
+ - const: osc
+ - enum:
+ - gmac0_rmii_refin
+ - gmac0_rgmii_rxin
+ - const: stg_axiahb
+ - const: apb_bus
+ - const: gmac0_gtxclk
+ - const: rtc_osc
+
+ - minItems: 6
+ items:
+ - const: osc
+ - const: gmac0_rmii_refin
+ - const: gmac0_rgmii_rxin
+ - const: stg_axiahb
+ - const: apb_bus
+ - const: gmac0_gtxclk
+ - const: rtc_osc
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+ '#reset-cells':
+ const: 1
+ description:
+ See <dt-bindings/reset/starfive,jh7110-crg.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/starfive,jh7110-crg.h>
+
+ clock-controller@17000000 {
+ compatible = "starfive,jh7110-aoncrg";
+ reg = <0x17000000 0x10000>;
+ clocks = <&osc>, <&gmac0_rmii_refin>,
+ <&gmac0_rgmii_rxin>,
+ <&syscrg JH7110_SYSCLK_STG_AXIAHB>,
+ <&syscrg JH7110_SYSCLK_APB_BUS>,
+ <&syscrg JH7110_SYSCLK_GMAC0_GTXCLK>,
+ <&rtc_osc>;
+ clock-names = "osc", "gmac0_rmii_refin",
+ "gmac0_rgmii_rxin", "stg_axiahb",
+ "apb_bus", "gmac0_gtxclk",
+ "rtc_osc";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-ispcrg.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-ispcrg.yaml
new file mode 100644
index 000000000000..3b8b85be5cd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-ispcrg.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-ispcrg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 Image-Signal-Process Clock and Reset Generator
+
+maintainers:
+ - Xingyu Wu <xingyu.wu@starfivetech.com>
+
+properties:
+ compatible:
+ const: starfive,jh7110-ispcrg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: ISP Top core
+ - description: ISP Top Axi
+ - description: NOC ISP Bus
+ - description: external DVP
+
+ clock-names:
+ items:
+ - const: isp_top_core
+ - const: isp_top_axi
+ - const: noc_bus_isp_axi
+ - const: dvp_clk
+
+ resets:
+ items:
+ - description: ISP Top core
+ - description: ISP Top Axi
+ - description: NOC ISP Bus
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+ '#reset-cells':
+ const: 1
+ description:
+ See <dt-bindings/reset/starfive,jh7110-crg.h> for valid indices.
+
+ power-domains:
+ maxItems: 1
+ description:
+ ISP domain power
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - '#clock-cells'
+ - '#reset-cells'
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/starfive,jh7110-crg.h>
+ #include <dt-bindings/power/starfive,jh7110-pmu.h>
+ #include <dt-bindings/reset/starfive,jh7110-crg.h>
+
+ ispcrg: clock-controller@19810000 {
+ compatible = "starfive,jh7110-ispcrg";
+ reg = <0x19810000 0x10000>;
+ clocks = <&syscrg JH7110_SYSCLK_ISP_TOP_CORE>,
+ <&syscrg JH7110_SYSCLK_ISP_TOP_AXI>,
+ <&syscrg JH7110_SYSCLK_NOC_BUS_ISP_AXI>,
+ <&dvp_clk>;
+ clock-names = "isp_top_core", "isp_top_axi",
+ "noc_bus_isp_axi", "dvp_clk";
+ resets = <&syscrg JH7110_SYSRST_ISP_TOP>,
+ <&syscrg JH7110_SYSRST_ISP_TOP_AXI>,
+ <&syscrg JH7110_SYSRST_NOC_BUS_ISP_AXI>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ power-domains = <&pwrc JH7110_PD_ISP>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-pll.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-pll.yaml
new file mode 100644
index 000000000000..be8300ce86d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-pll.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-pll.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 PLL Clock Generator
+
+description:
+ These PLLs are high speed, low jitter frequency synthesizers in the JH7110.
+ Each PLL works in integer mode or fraction mode, with configuration
+ registers in the sys syscon. So the PLLs node should be a child of
+ SYS-SYSCON node.
+ The formula for calculating frequency is
+ Fvco = Fref * (NI + NF) / M / Q1
+
+maintainers:
+ - Xingyu Wu <xingyu.wu@starfivetech.com>
+
+properties:
+ compatible:
+ const: starfive,jh7110-pll
+
+ clocks:
+ maxItems: 1
+ description: Main Oscillator (24 MHz)
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+required:
+ - compatible
+ - clocks
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller {
+ compatible = "starfive,jh7110-pll";
+ clocks = <&osc>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-stgcrg.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-stgcrg.yaml
new file mode 100644
index 000000000000..b64ccd84200a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-stgcrg.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-stgcrg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 System-Top-Group Clock and Reset Generator
+
+maintainers:
+ - Xingyu Wu <xingyu.wu@starfivetech.com>
+
+properties:
+ compatible:
+ const: starfive,jh7110-stgcrg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Main Oscillator (24 MHz)
+ - description: HIFI4 core
+ - description: STG AXI/AHB
+ - description: USB (125 MHz)
+ - description: CPU Bus
+ - description: HIFI4 Axi
+ - description: NOC STG Bus
+ - description: APB Bus
+
+ clock-names:
+ items:
+ - const: osc
+ - const: hifi4_core
+ - const: stg_axiahb
+ - const: usb_125m
+ - const: cpu_bus
+ - const: hifi4_axi
+ - const: nocstg_bus
+ - const: apb_bus
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+ '#reset-cells':
+ const: 1
+ description:
+ See <dt-bindings/reset/starfive,jh7110-crg.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/starfive,jh7110-crg.h>
+
+ stgcrg: clock-controller@10230000 {
+ compatible = "starfive,jh7110-stgcrg";
+ reg = <0x10230000 0x10000>;
+ clocks = <&osc>,
+ <&syscrg JH7110_SYSCLK_HIFI4_CORE>,
+ <&syscrg JH7110_SYSCLK_STG_AXIAHB>,
+ <&syscrg JH7110_SYSCLK_USB_125M>,
+ <&syscrg JH7110_SYSCLK_CPU_BUS>,
+ <&syscrg JH7110_SYSCLK_HIFI4_AXI>,
+ <&syscrg JH7110_SYSCLK_NOCSTG_BUS>,
+ <&syscrg JH7110_SYSCLK_APB_BUS>;
+ clock-names = "osc", "hifi4_core",
+ "stg_axiahb", "usb_125m",
+ "cpu_bus", "hifi4_axi",
+ "nocstg_bus", "apb_bus";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-syscrg.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-syscrg.yaml
new file mode 100644
index 000000000000..5ba0a885aa80
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-syscrg.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-syscrg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 System Clock and Reset Generator
+
+maintainers:
+ - Emil Renner Berthing <kernel@esmil.dk>
+
+properties:
+ compatible:
+ const: starfive,jh7110-syscrg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ oneOf:
+ - items:
+ - description: Main Oscillator (24 MHz)
+ - description: GMAC1 RMII reference or GMAC1 RGMII RX
+ - description: External I2S TX bit clock
+ - description: External I2S TX left/right channel clock
+ - description: External I2S RX bit clock
+ - description: External I2S RX left/right channel clock
+ - description: External TDM clock
+ - description: External audio master clock
+ - description: PLL0
+ - description: PLL1
+ - description: PLL2
+
+ - items:
+ - description: Main Oscillator (24 MHz)
+ - description: GMAC1 RMII reference
+ - description: GMAC1 RGMII RX
+ - description: External I2S TX bit clock
+ - description: External I2S TX left/right channel clock
+ - description: External I2S RX bit clock
+ - description: External I2S RX left/right channel clock
+ - description: External TDM clock
+ - description: External audio master clock
+ - description: PLL0
+ - description: PLL1
+ - description: PLL2
+
+ clock-names:
+ oneOf:
+ - items:
+ - const: osc
+ - enum:
+ - gmac1_rmii_refin
+ - gmac1_rgmii_rxin
+ - const: i2stx_bclk_ext
+ - const: i2stx_lrck_ext
+ - const: i2srx_bclk_ext
+ - const: i2srx_lrck_ext
+ - const: tdm_ext
+ - const: mclk_ext
+ - const: pll0_out
+ - const: pll1_out
+ - const: pll2_out
+
+ - items:
+ - const: osc
+ - const: gmac1_rmii_refin
+ - const: gmac1_rgmii_rxin
+ - const: i2stx_bclk_ext
+ - const: i2stx_lrck_ext
+ - const: i2srx_bclk_ext
+ - const: i2srx_lrck_ext
+ - const: tdm_ext
+ - const: mclk_ext
+ - const: pll0_out
+ - const: pll1_out
+ - const: pll2_out
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+ '#reset-cells':
+ const: 1
+ description:
+ See <dt-bindings/reset/starfive,jh7110-crg.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#clock-cells'
+ - '#reset-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@13020000 {
+ compatible = "starfive,jh7110-syscrg";
+ reg = <0x13020000 0x10000>;
+ clocks = <&osc>, <&gmac1_rmii_refin>,
+ <&gmac1_rgmii_rxin>,
+ <&i2stx_bclk_ext>, <&i2stx_lrck_ext>,
+ <&i2srx_bclk_ext>, <&i2srx_lrck_ext>,
+ <&tdm_ext>, <&mclk_ext>,
+ <&pllclk 0>, <&pllclk 1>, <&pllclk 2>;
+ clock-names = "osc", "gmac1_rmii_refin",
+ "gmac1_rgmii_rxin",
+ "i2stx_bclk_ext", "i2stx_lrck_ext",
+ "i2srx_bclk_ext", "i2srx_lrck_ext",
+ "tdm_ext", "mclk_ext",
+ "pll0_out", "pll1_out", "pll2_out";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/starfive,jh7110-voutcrg.yaml b/Documentation/devicetree/bindings/clock/starfive,jh7110-voutcrg.yaml
new file mode 100644
index 000000000000..af77bd8c86b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/starfive,jh7110-voutcrg.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/starfive,jh7110-voutcrg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 Video-Output Clock and Reset Generator
+
+maintainers:
+ - Xingyu Wu <xingyu.wu@starfivetech.com>
+
+properties:
+ compatible:
+ const: starfive,jh7110-voutcrg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Vout Top core
+ - description: Vout Top Ahb
+ - description: Vout Top Axi
+ - description: Vout Top HDMI MCLK
+ - description: I2STX0 BCLK
+ - description: external HDMI pixel
+
+ clock-names:
+ items:
+ - const: vout_src
+ - const: vout_top_ahb
+ - const: vout_top_axi
+ - const: vout_top_hdmitx0_mclk
+ - const: i2stx0_bclk
+ - const: hdmitx0_pixelclk
+
+ resets:
+ maxItems: 1
+ description: Vout Top core
+
+ '#clock-cells':
+ const: 1
+ description:
+ See <dt-bindings/clock/starfive,jh7110-crg.h> for valid indices.
+
+ '#reset-cells':
+ const: 1
+ description:
+ See <dt-bindings/reset/starfive,jh7110-crg.h> for valid indices.
+
+ power-domains:
+ maxItems: 1
+ description:
+ Vout domain power
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - '#clock-cells'
+ - '#reset-cells'
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/starfive,jh7110-crg.h>
+ #include <dt-bindings/power/starfive,jh7110-pmu.h>
+ #include <dt-bindings/reset/starfive,jh7110-crg.h>
+
+ voutcrg: clock-controller@295C0000 {
+ compatible = "starfive,jh7110-voutcrg";
+ reg = <0x295C0000 0x10000>;
+ clocks = <&syscrg JH7110_SYSCLK_VOUT_SRC>,
+ <&syscrg JH7110_SYSCLK_VOUT_TOP_AHB>,
+ <&syscrg JH7110_SYSCLK_VOUT_TOP_AXI>,
+ <&syscrg JH7110_SYSCLK_VOUT_TOP_HDMITX0_MCLK>,
+ <&syscrg JH7110_SYSCLK_I2STX0_BCLK>,
+ <&hdmitx0_pixelclk>;
+ clock-names = "vout_src", "vout_top_ahb",
+ "vout_top_axi", "vout_top_hdmitx0_mclk",
+ "i2stx0_bclk", "hdmitx0_pixelclk";
+ resets = <&syscrg JH7110_SYSRST_VOUT_TOP_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ power-domains = <&pwrc JH7110_PD_VOUT>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt b/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt
deleted file mode 100644
index 7cafcb98ead7..000000000000
--- a/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-Clock bindings for ST-Ericsson U300 System Controller Clocks
-
-Bindings for the gated system controller clocks:
-
-Required properties:
-- compatible: must be "stericsson,u300-syscon-clk"
-- #clock-cells: must be <0>
-- clock-type: specifies the type of clock:
- 0 = slow clock
- 1 = fast clock
- 2 = rest/remaining clock
-- clock-id: specifies the clock in the type range
-
-Optional properties:
-- clocks: parent clock(s)
-
-The available clocks per type are as follows:
-
-Type: ID: Clock:
--------------------
-0 0 Slow peripheral bridge clock
-0 1 UART0 clock
-0 4 GPIO clock
-0 6 RTC clock
-0 7 Application timer clock
-0 8 Access timer clock
-
-1 0 Fast peripheral bridge clock
-1 1 I2C bus 0 clock
-1 2 I2C bus 1 clock
-1 5 MMC interface peripheral (silicon) clock
-1 6 SPI clock
-
-2 3 CPU clock
-2 4 DMA controller clock
-2 5 External Memory Interface (EMIF) clock
-2 6 NAND flask interface clock
-2 8 XGAM graphics engine clock
-2 9 Shared External Memory Interface (SEMI) clock
-2 10 AHB Subsystem Bridge clock
-2 12 Interrupt controller clock
-
-Example:
-
-gpio_clk: gpio_clk@13M {
- #clock-cells = <0>;
- compatible = "stericsson,u300-syscon-clk";
- clock-type = <0>; /* Slow */
- clock-id = <4>;
- clocks = <&slow_clk>;
-};
-
-gpio: gpio@c0016000 {
- compatible = "stericsson,gpio-coh901";
- (...)
- clocks = <&gpio_clk>;
-};
-
-
-Bindings for the MMC/SD card clock:
-
-Required properties:
-- compatible: must be "stericsson,u300-syscon-mclk"
-- #clock-cells: must be <0>
-
-Optional properties:
-- clocks: parent clock(s)
-
-mmc_mclk: mmc_mclk {
- #clock-cells = <0>;
- compatible = "stericsson,u300-syscon-mclk";
- clocks = <&mmc_pclk>;
-};
-
-mmcsd: mmcsd@c0001000 {
- compatible = "arm,pl18x", "arm,primecell";
- clocks = <&mmc_pclk>, <&mmc_mclk>;
- clock-names = "apb_pclk", "mclk";
- (...)
-};
diff --git a/Documentation/devicetree/bindings/clock/stericsson,abx500.txt b/Documentation/devicetree/bindings/clock/stericsson,abx500.txt
deleted file mode 100644
index dbaa886b223e..000000000000
--- a/Documentation/devicetree/bindings/clock/stericsson,abx500.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Clock bindings for ST-Ericsson ABx500 clocks
-
-Required properties :
-- compatible : shall contain the following:
- "stericsson,ab8500-clk"
-- #clock-cells should be <1>
-
-The ABx500 clocks need to be placed as a subnode of an AB8500
-device node, see mfd/ab8500.txt
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/ste-ab8500.h header and can be used in device
-tree sources.
-
-Example:
-
-clock-controller {
- compatible = "stericsson,ab8500-clk";
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml b/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml
new file mode 100644
index 000000000000..2150307219a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/stericsson,u8500-clks.yaml
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/stericsson,u8500-clks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST-Ericsson DB8500 (U8500) clocks
+
+maintainers:
+ - Ulf Hansson <ulf.hansson@linaro.org>
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: While named "U8500 clocks" these clocks are inside the
+ DB8500 digital baseband system-on-chip and its siblings such as
+ DB8520. These bindings consider the clocks present in the SoC
+ itself, not off-chip clocks. There are four different on-chip
+ clocks - RTC (32 kHz), CPU clock (SMP TWD), PRCMU (power reset and
+ control management unit) clocks and PRCC (peripheral reset and
+ clock controller) clocks. For some reason PRCC 4 does not exist so
+ the itemization can be a bit unintuitive.
+
+properties:
+ compatible:
+ enum:
+ - stericsson,u8500-clks
+ - stericsson,u8540-clks
+ - stericsson,u9540-clks
+
+ reg:
+ items:
+ - description: PRCC 1 register area
+ - description: PRCC 2 register area
+ - description: PRCC 3 register area
+ - description: PRCC 5 register area
+ - description: PRCC 6 register area
+
+ prcmu-clock:
+ description: A subnode with one clock cell for PRCMU (power, reset, control
+ management unit) clocks. The cell indicates which PRCMU clock in the
+ prcmu-clock node the consumer wants to use.
+ type: object
+
+ properties:
+ '#clock-cells':
+ const: 1
+
+ additionalProperties: false
+
+ prcc-periph-clock:
+ description: A subnode with two clock cells for PRCC (peripheral
+ reset and clock controller) peripheral clocks. The first cell indicates
+ which PRCC block the consumer wants to use, possible values are 1, 2, 3,
+ 5, 6. The second cell indicates which clock inside the PRCC block it
+ wants, possible values are 0 thru 31.
+ type: object
+
+ properties:
+ '#clock-cells':
+ const: 2
+
+ additionalProperties: false
+
+ prcc-kernel-clock:
+ description: A subnode with two clock cells for PRCC (peripheral reset
+ and clock controller) kernel clocks. The first cell indicates which PRCC
+ block the consumer wants to use, possible values are 1, 2, 3, 5, 6. The
+ second cell indicates which clock inside the PRCC block it wants, possible
+ values are 0 thru 31.
+ type: object
+
+ properties:
+ '#clock-cells':
+ const: 2
+
+ additionalProperties: false
+
+ prcc-reset-controller:
+ description: A subnode with two reset cells for the reset portions of the
+ PRCC (peripheral reset and clock controller). The first cell indicates
+ which PRCC block the consumer wants to use, possible values are 1, 2, 3
+ 5 and 6. The second cell indicates which reset line inside the PRCC block
+ it wants to control, possible values are 0 thru 31.
+ type: object
+
+ properties:
+ '#reset-cells':
+ const: 2
+
+ additionalProperties: false
+
+ rtc32k-clock:
+ description: A subnode with zero clock cells for the 32kHz RTC clock.
+ type: object
+
+ properties:
+ '#clock-cells':
+ const: 0
+
+ additionalProperties: false
+
+ smp-twd-clock:
+ description: A subnode for the ARM SMP Timer Watchdog cluster with zero
+ clock cells.
+ type: object
+
+ properties:
+ '#clock-cells':
+ const: 0
+
+ additionalProperties: false
+
+ clkout-clock:
+ description: A subnode with three clock cells for externally routed clocks,
+ output clocks. These are two PRCMU-internal clocks that can be divided and
+ muxed out on the pads of the DB8500 SoC.
+ type: object
+
+ properties:
+ '#clock-cells':
+ description:
+ The first cell indicates which output clock we are using,
+ possible values are 0 (CLKOUT1) and 1 (CLKOUT2).
+ The second cell indicates which clock we want to use as source,
+ possible values are 0 thru 7, see the defines for the different
+ source clocks.
+ The third cell is a divider, legal values are 1 thru 63.
+ const: 3
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - prcmu-clock
+ - prcc-periph-clock
+ - prcc-kernel-clock
+ - rtc32k-clock
+ - smp-twd-clock
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ste-db8500-clkout.h>
+ clocks@8012 {
+ compatible = "stericsson,u8500-clks";
+ reg = <0x8012f000 0x1000>, <0x8011f000 0x1000>,
+ <0x8000f000 0x1000>, <0xa03ff000 0x1000>,
+ <0xa03cf000 0x1000>;
+
+ prcmu_clk: prcmu-clock {
+ #clock-cells = <1>;
+ };
+
+ prcc_pclk: prcc-periph-clock {
+ #clock-cells = <2>;
+ };
+
+ prcc_kclk: prcc-kernel-clock {
+ #clock-cells = <2>;
+ };
+
+ prcc_reset: prcc-reset-controller {
+ #reset-cells = <2>;
+ };
+
+ rtc_clk: rtc32k-clock {
+ #clock-cells = <0>;
+ };
+
+ smp_twd_clk: smp-twd-clock {
+ #clock-cells = <0>;
+ };
+
+ clkout_clk: clkout-clock {
+ #clock-cells = <3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/sun8i-de2.txt b/Documentation/devicetree/bindings/clock/sun8i-de2.txt
deleted file mode 100644
index e94582e8b8a9..000000000000
--- a/Documentation/devicetree/bindings/clock/sun8i-de2.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Allwinner Display Engine 2.0 Clock Control Binding
---------------------------------------------------
-
-Required properties :
-- compatible: must contain one of the following compatibles:
- - "allwinner,sun8i-a83t-de2-clk"
- - "allwinner,sun8i-h3-de2-clk"
- - "allwinner,sun8i-v3s-de2-clk"
- - "allwinner,sun50i-a64-de2-clk"
- - "allwinner,sun50i-h5-de2-clk"
-
-- reg: Must contain the registers base address and length
-- clocks: phandle to the clocks feeding the display engine subsystem.
- Three are needed:
- - "mod": the display engine module clock (on A83T it's the DE PLL)
- - "bus": the bus clock for the whole display engine subsystem
-- clock-names: Must contain the clock names described just above
-- resets: phandle to the reset control for the display engine subsystem.
-- #clock-cells : must contain 1
-- #reset-cells : must contain 1
-
-Example:
-de2_clocks: clock@1000000 {
- compatible = "allwinner,sun8i-h3-de2-clk";
- reg = <0x01000000 0x100000>;
- clocks = <&ccu CLK_BUS_DE>,
- <&ccu CLK_DE>;
- clock-names = "bus",
- "mod";
- resets = <&ccu RST_BUS_DE>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/sun9i-de.txt b/Documentation/devicetree/bindings/clock/sun9i-de.txt
deleted file mode 100644
index fb18f327b97a..000000000000
--- a/Documentation/devicetree/bindings/clock/sun9i-de.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Allwinner A80 Display Engine Clock Control Binding
---------------------------------------------------
-
-Required properties :
-- compatible: must contain one of the following compatibles:
- - "allwinner,sun9i-a80-de-clks"
-
-- reg: Must contain the registers base address and length
-- clocks: phandle to the clocks feeding the display engine subsystem.
- Three are needed:
- - "mod": the display engine module clock
- - "dram": the DRAM bus clock for the system
- - "bus": the bus clock for the whole display engine subsystem
-- clock-names: Must contain the clock names described just above
-- resets: phandle to the reset control for the display engine subsystem.
-- #clock-cells : must contain 1
-- #reset-cells : must contain 1
-
-Example:
-de_clocks: clock@3000000 {
- compatible = "allwinner,sun9i-a80-de-clks";
- reg = <0x03000000 0x30>;
- clocks = <&ccu CLK_DE>, <&ccu CLK_SDRAM>, <&ccu CLK_BUS_DE>;
- clock-names = "mod", "dram", "bus";
- resets = <&ccu RST_BUS_DE>;
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/sun9i-usb.txt b/Documentation/devicetree/bindings/clock/sun9i-usb.txt
deleted file mode 100644
index 3564bd4f2a20..000000000000
--- a/Documentation/devicetree/bindings/clock/sun9i-usb.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Allwinner A80 USB Clock Control Binding
----------------------------------------
-
-Required properties :
-- compatible: must contain one of the following compatibles:
- - "allwinner,sun9i-a80-usb-clocks"
-
-- reg: Must contain the registers base address and length
-- clocks: phandle to the clocks feeding the USB subsystem. Two are needed:
- - "bus": the bus clock for the whole USB subsystem
- - "hosc": the high frequency oscillator (usually at 24MHz)
-- clock-names: Must contain the clock names described just above
-- #clock-cells : must contain 1
-- #reset-cells : must contain 1
-
-Example:
-usb_clocks: clock@a08000 {
- compatible = "allwinner,sun9i-a80-usb-clks";
- reg = <0x00a08000 0x8>;
- clocks = <&ccu CLK_BUS_USB>, <&osc24M>;
- clock-names = "bus", "hosc";
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/sunplus,sp7021-clkc.yaml b/Documentation/devicetree/bindings/clock/sunplus,sp7021-clkc.yaml
new file mode 100644
index 000000000000..bcc14088220a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunplus,sp7021-clkc.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sunplus,sp7021-clkc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus SP7021 SoC Clock Controller
+
+maintainers:
+ - Qin Jian <qinjian@cqplus1.com>
+
+properties:
+ compatible:
+ const: sunplus,sp7021-clkc
+
+ reg:
+ maxItems: 3
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ extclk: osc0 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <27000000>;
+ clock-output-names = "extclk";
+ };
+
+ clkc: clock-controller@9c000004 {
+ compatible = "sunplus,sp7021-clkc";
+ reg = <0x9c000004 0x28>,
+ <0x9c000200 0x44>,
+ <0x9c000268 0x08>;
+ clocks = <&extclk>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
deleted file mode 100644
index 47d2e902ced4..000000000000
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-Allwinner Clock Control Unit Binding
-------------------------------------
-
-Required properties :
-- compatible: must contain one of the following compatibles:
- - "allwinner,sun4i-a10-ccu"
- - "allwinner,sun5i-a10s-ccu"
- - "allwinner,sun5i-a13-ccu"
- - "allwinner,sun6i-a31-ccu"
- - "allwinner,sun7i-a20-ccu"
- - "allwinner,sun8i-a23-ccu"
- - "allwinner,sun8i-a33-ccu"
- - "allwinner,sun8i-a83t-ccu"
- - "allwinner,sun8i-a83t-r-ccu"
- - "allwinner,sun8i-h3-ccu"
- - "allwinner,sun8i-h3-r-ccu"
-+ - "allwinner,sun8i-r40-ccu"
- - "allwinner,sun8i-v3s-ccu"
- - "allwinner,sun9i-a80-ccu"
- - "allwinner,sun50i-a64-ccu"
- - "allwinner,sun50i-a64-r-ccu"
- - "allwinner,sun50i-h5-ccu"
- - "allwinner,sun50i-h6-ccu"
- - "allwinner,sun50i-h6-r-ccu"
- - "nextthing,gr8-ccu"
-
-- reg: Must contain the registers base address and length
-- clocks: phandle to the oscillators feeding the CCU. Two are needed:
- - "hosc": the high frequency oscillator (usually at 24MHz)
- - "losc": the low frequency oscillator (usually at 32kHz)
- On the A83T, this is the internal 16MHz oscillator divided by 512
-- clock-names: Must contain the clock names described just above
-- #clock-cells : must contain 1
-- #reset-cells : must contain 1
-
-For the main CCU on H6, one more clock is needed:
-- "iosc": the SoC's internal frequency oscillator
-
-For the PRCM CCUs on A83T/H3/A64/H6, two more clocks are needed:
-- "pll-periph": the SoC's peripheral PLL from the main CCU
-- "iosc": the SoC's internal frequency oscillator
-
-Example for generic CCU:
-ccu: clock@1c20000 {
- compatible = "allwinner,sun8i-h3-ccu";
- reg = <0x01c20000 0x400>;
- clocks = <&osc24M>, <&osc32k>;
- clock-names = "hosc", "losc";
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
-
-Example for PRCM CCU:
-r_ccu: clock@1f01400 {
- compatible = "allwinner,sun50i-a64-r-ccu";
- reg = <0x01f01400 0x100>;
- clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
- clock-names = "hosc", "losc", "iosc", "pll-periph";
- #clock-cells = <1>;
- #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
deleted file mode 100644
index 1a042e20b115..000000000000
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ /dev/null
@@ -1,225 +0,0 @@
-Device Tree Clock bindings for arch-sunxi
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "allwinner,sun4i-a10-osc-clk" - for a gatable oscillator
- "allwinner,sun4i-a10-pll1-clk" - for the main PLL clock and PLL4
- "allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
- "allwinner,sun8i-a23-pll1-clk" - for the main PLL clock on A23
- "allwinner,sun4i-a10-pll3-clk" - for the video PLL clock on A10
- "allwinner,sun9i-a80-pll4-clk" - for the peripheral PLLs on A80
- "allwinner,sun4i-a10-pll5-clk" - for the PLL5 clock
- "allwinner,sun4i-a10-pll6-clk" - for the PLL6 clock
- "allwinner,sun6i-a31-pll6-clk" - for the PLL6 clock on A31
- "allwinner,sun9i-a80-gt-clk" - for the GT bus clock on A80
- "allwinner,sun4i-a10-cpu-clk" - for the CPU multiplexer clock
- "allwinner,sun4i-a10-axi-clk" - for the AXI clock
- "allwinner,sun8i-a23-axi-clk" - for the AXI clock on A23
- "allwinner,sun4i-a10-gates-clk" - for generic gates on all compatible SoCs
- "allwinner,sun4i-a10-axi-gates-clk" - for the AXI gates
- "allwinner,sun4i-a10-ahb-clk" - for the AHB clock
- "allwinner,sun5i-a13-ahb-clk" - for the AHB clock on A13
- "allwinner,sun9i-a80-ahb-clk" - for the AHB bus clocks on A80
- "allwinner,sun4i-a10-ahb-gates-clk" - for the AHB gates on A10
- "allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
- "allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
- "allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20
- "allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31
- "allwinner,sun9i-a80-cpus-clk" - for the CPUS on A80
- "allwinner,sun6i-a31-ahb1-clk" - for the AHB1 clock on A31
- "allwinner,sun8i-h3-ahb2-clk" - for the AHB2 clock on H3
- "allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
- "allwinner,sun8i-a23-ahb1-gates-clk" - for the AHB1 gates on A23
- "allwinner,sun9i-a80-ahb0-gates-clk" - for the AHB0 gates on A80
- "allwinner,sun9i-a80-ahb1-gates-clk" - for the AHB1 gates on A80
- "allwinner,sun9i-a80-ahb2-gates-clk" - for the AHB2 gates on A80
- "allwinner,sun4i-a10-apb0-clk" - for the APB0 clock
- "allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
- "allwinner,sun8i-a23-apb0-clk" - for the APB0 clock on A23
- "allwinner,sun9i-a80-apb0-clk" - for the APB0 bus clock on A80
- "allwinner,sun8i-a83t-apb0-gates-clk" - for the APB0 gates on A83T
- "allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
- "allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
- "allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
- "allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
- "allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
- "allwinner,sun8i-a23-apb0-gates-clk" - for the APB0 gates on A23
- "allwinner,sun8i-h3-apb0-gates-clk" - for the APB0 gates on H3
- "allwinner,sun9i-a80-apb0-gates-clk" - for the APB0 gates on A80
- "allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
- "allwinner,sun9i-a80-apb1-clk" - for the APB1 bus clock on A80
- "allwinner,sun4i-a10-apb1-gates-clk" - for the APB1 gates on A10
- "allwinner,sun5i-a13-apb1-gates-clk" - for the APB1 gates on A13
- "allwinner,sun5i-a10s-apb1-gates-clk" - for the APB1 gates on A10s
- "allwinner,sun6i-a31-apb1-gates-clk" - for the APB1 gates on A31
- "allwinner,sun7i-a20-apb1-gates-clk" - for the APB1 gates on A20
- "allwinner,sun8i-a23-apb1-gates-clk" - for the APB1 gates on A23
- "allwinner,sun9i-a80-apb1-gates-clk" - for the APB1 gates on A80
- "allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
- "allwinner,sun8i-a23-apb2-gates-clk" - for the APB2 gates on A23
- "allwinner,sun8i-a83t-bus-gates-clk" - for the bus gates on A83T
- "allwinner,sun8i-h3-bus-gates-clk" - for the bus gates on H3
- "allwinner,sun9i-a80-apbs-gates-clk" - for the APBS gates on A80
- "allwinner,sun4i-a10-display-clk" - for the display clocks on the A10
- "allwinner,sun4i-a10-dram-gates-clk" - for the DRAM gates on A10
- "allwinner,sun5i-a13-dram-gates-clk" - for the DRAM gates on A13
- "allwinner,sun5i-a13-mbus-clk" - for the MBUS clock on A13
- "allwinner,sun4i-a10-mmc-clk" - for the MMC clock
- "allwinner,sun9i-a80-mmc-clk" - for mmc module clocks on A80
- "allwinner,sun9i-a80-mmc-config-clk" - for mmc gates + resets on A80
- "allwinner,sun4i-a10-mod0-clk" - for the module 0 family of clocks
- "allwinner,sun9i-a80-mod0-clk" - for module 0 (storage) clocks on A80
- "allwinner,sun8i-a23-mbus-clk" - for the MBUS clock on A23
- "allwinner,sun7i-a20-out-clk" - for the external output clocks
- "allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31
- "allwinner,sun4i-a10-tcon-ch0-clk" - for the TCON channel 0 clock on the A10
- "allwinner,sun4i-a10-tcon-ch1-clk" - for the TCON channel 1 clock on the A10
- "allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20
- "allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13
- "allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31
- "allwinner,sun8i-a23-usb-clk" - for usb gates + resets on A23
- "allwinner,sun8i-h3-usb-clk" - for usb gates + resets on H3
- "allwinner,sun9i-a80-usb-mod-clk" - for usb gates + resets on A80
- "allwinner,sun9i-a80-usb-phy-clk" - for usb phy gates + resets on A80
- "allwinner,sun4i-a10-ve-clk" - for the Video Engine clock
- "allwinner,sun6i-a31-display-clk" - for the display clocks
-
-Required properties for all clocks:
-- reg : shall be the control register address for the clock.
-- clocks : shall be the input parent clock(s) phandle for the clock. For
- multiplexed clocks, the list order must match the hardware
- programming order.
-- #clock-cells : from common clock binding; shall be set to 0 except for
- the following compatibles where it shall be set to 1:
- "allwinner,*-gates-clk", "allwinner,sun4i-pll5-clk",
- "allwinner,sun4i-pll6-clk", "allwinner,sun6i-a31-pll6-clk",
- "allwinner,*-usb-clk", "allwinner,*-mmc-clk",
- "allwinner,*-mmc-config-clk"
-- clock-output-names : shall be the corresponding names of the outputs.
- If the clock module only has one output, the name shall be the
- module name.
-
-And "allwinner,*-usb-clk" clocks also require:
-- reset-cells : shall be set to 1
-
-The "allwinner,sun4i-a10-ve-clk" clock also requires:
-- reset-cells : shall be set to 0
-
-The "allwinner,sun9i-a80-mmc-config-clk" clock also requires:
-- #reset-cells : shall be set to 1
-- resets : shall be the reset control phandle for the mmc block.
-
-For "allwinner,sun7i-a20-gmac-clk", the parent clocks shall be fixed rate
-dummy clocks at 25 MHz and 125 MHz, respectively. See example.
-
-Clock consumers should specify the desired clocks they use with a
-"clocks" phandle cell. Consumers that are using a gated clock should
-provide an additional ID in their clock property. This ID is the
-offset of the bit controlling this particular gate in the register.
-For the other clocks with "#clock-cells" = 1, the additional ID shall
-refer to the index of the output.
-
-For "allwinner,sun6i-a31-pll6-clk", there are 2 outputs. The first output
-is the normal PLL6 output, or "pll6". The second output is rate doubled
-PLL6, or "pll6x2".
-
-The "allwinner,*-mmc-clk" clocks have three different outputs: the
-main clock, with the ID 0, and the output and sample clocks, with the
-IDs 1 and 2, respectively.
-
-The "allwinner,sun9i-a80-mmc-config-clk" clock has one clock/reset output
-per mmc controller. The number of outputs is determined by the size of
-the address block, which is related to the overall mmc block.
-
-For example:
-
-osc24M: clk@1c20050 {
- #clock-cells = <0>;
- compatible = "allwinner,sun4i-a10-osc-clk";
- reg = <0x01c20050 0x4>;
- clocks = <&osc24M_fixed>;
- clock-output-names = "osc24M";
-};
-
-pll1: clk@1c20000 {
- #clock-cells = <0>;
- compatible = "allwinner,sun4i-a10-pll1-clk";
- reg = <0x01c20000 0x4>;
- clocks = <&osc24M>;
- clock-output-names = "pll1";
-};
-
-pll5: clk@1c20020 {
- #clock-cells = <1>;
- compatible = "allwinner,sun4i-pll5-clk";
- reg = <0x01c20020 0x4>;
- clocks = <&osc24M>;
- clock-output-names = "pll5_ddr", "pll5_other";
-};
-
-pll6: clk@1c20028 {
- #clock-cells = <1>;
- compatible = "allwinner,sun6i-a31-pll6-clk";
- reg = <0x01c20028 0x4>;
- clocks = <&osc24M>;
- clock-output-names = "pll6", "pll6x2";
-};
-
-cpu: cpu@1c20054 {
- #clock-cells = <0>;
- compatible = "allwinner,sun4i-a10-cpu-clk";
- reg = <0x01c20054 0x4>;
- clocks = <&osc32k>, <&osc24M>, <&pll1>;
- clock-output-names = "cpu";
-};
-
-mmc0_clk: clk@1c20088 {
- #clock-cells = <1>;
- compatible = "allwinner,sun4i-a10-mmc-clk";
- reg = <0x01c20088 0x4>;
- clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
- clock-output-names = "mmc0", "mmc0_output", "mmc0_sample";
-};
-
-mii_phy_tx_clk: clk@2 {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <25000000>;
- clock-output-names = "mii_phy_tx";
-};
-
-gmac_int_tx_clk: clk@3 {
- #clock-cells = <0>;
- compatible = "fixed-clock";
- clock-frequency = <125000000>;
- clock-output-names = "gmac_int_tx";
-};
-
-gmac_clk: clk@1c20164 {
- #clock-cells = <0>;
- compatible = "allwinner,sun7i-a20-gmac-clk";
- reg = <0x01c20164 0x4>;
- /*
- * The first clock must be fixed at 25MHz;
- * the second clock must be fixed at 125MHz
- */
- clocks = <&mii_phy_tx_clk>, <&gmac_int_tx_clk>;
- clock-output-names = "gmac";
-};
-
-mmc_config_clk: clk@1c13000 {
- compatible = "allwinner,sun9i-a80-mmc-config-clk";
- reg = <0x01c13000 0x10>;
- clocks = <&ahb0_gates 8>;
- clock-names = "ahb";
- resets = <&ahb0_resets 8>;
- reset-names = "ahb";
- #clock-cells = <1>;
- #reset-cells = <1>;
- clock-output-names = "mmc0_config", "mmc1_config",
- "mmc2_config", "mmc3_config";
-};
diff --git a/Documentation/devicetree/bindings/clock/tango4-clock.txt b/Documentation/devicetree/bindings/clock/tango4-clock.txt
deleted file mode 100644
index 19c580a7bda2..000000000000
--- a/Documentation/devicetree/bindings/clock/tango4-clock.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-* Sigma Designs Tango4 Clock Generator
-
-The Tango4 clock generator outputs cpu_clk and sys_clk (the latter is used
-for RAM and various peripheral devices). The clock binding described here
-is applicable to all Tango4 SoCs.
-
-Required Properties:
-
-- compatible: should be "sigma,tango4-clkgen".
-- reg: physical base address of the device and length of memory mapped region.
-- clocks: phandle of the input clock (crystal oscillator).
-- clock-output-names: should be "cpuclk" and "sysclk".
-- #clock-cells: should be set to 1.
-
-Example:
-
- clkgen: clkgen@10000 {
- compatible = "sigma,tango4-clkgen";
- reg = <0x10000 0x40>;
- clocks = <&xtal>;
- clock-output-names = "cpuclk", "sysclk";
- #clock-cells = <1>;
- };
diff --git a/Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml b/Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml
new file mode 100644
index 000000000000..b370a10a23a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/tesla,fsd-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Tesla FSD (Full Self-Driving) SoC clock controller
+
+maintainers:
+ - Alim Akhtar <alim.akhtar@samsung.com>
+ - linux-fsd@tesla.com
+
+description: |
+ FSD clock controller consist of several clock management unit
+ (CMU), which generates clocks for various internal SoC blocks.
+ The root clock comes from external OSC clock (24 MHz).
+
+ All available clocks are defined as preprocessor macros in
+ 'dt-bindings/clock/fsd-clk.h' header.
+
+properties:
+ compatible:
+ enum:
+ - tesla,fsd-clock-cmu
+ - tesla,fsd-clock-imem
+ - tesla,fsd-clock-peric
+ - tesla,fsd-clock-fsys0
+ - tesla,fsd-clock-fsys1
+ - tesla,fsd-clock-mfc
+ - tesla,fsd-clock-cam_csi
+
+ clocks:
+ minItems: 1
+ maxItems: 6
+
+ clock-names:
+ minItems: 1
+ maxItems: 6
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-cmu
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ clock-names:
+ items:
+ - const: fin_pll
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-imem
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ - description: IMEM TCU clock (from CMU_CMU)
+ - description: IMEM bus clock (from CMU_CMU)
+ - description: IMEM DMA clock (from CMU_CMU)
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_cmu_imem_tcuclk
+ - const: dout_cmu_imem_aclk
+ - const: dout_cmu_imem_dmaclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-peric
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ - description: Shared0 PLL div4 clock (from CMU_CMU)
+ - description: PERIC shared1 div36 clock (from CMU_CMU)
+ - description: PERIC shared0 div3 TBU clock (from CMU_CMU)
+ - description: PERIC shared0 div20 clock (from CMU_CMU)
+ - description: PERIC shared1 div4 DMAclock (from CMU_CMU)
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_cmu_pll_shared0_div4
+ - const: dout_cmu_peric_shared1div36
+ - const: dout_cmu_peric_shared0div3_tbuclk
+ - const: dout_cmu_peric_shared0div20
+ - const: dout_cmu_peric_shared1div4_dmaclk
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-fsys0
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ - description: Shared0 PLL div6 clock (from CMU_CMU)
+ - description: FSYS0 shared1 div4 clock (from CMU_CMU)
+ - description: FSYS0 shared0 div4 clock (from CMU_CMU)
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_cmu_pll_shared0_div6
+ - const: dout_cmu_fsys0_shared1div4
+ - const: dout_cmu_fsys0_shared0div4
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-fsys1
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ - description: FSYS1 shared0 div8 clock (from CMU_CMU)
+ - description: FSYS1 shared0 div4 clock (from CMU_CMU)
+ clock-names:
+ items:
+ - const: fin_pll
+ - const: dout_cmu_fsys1_shared0div8
+ - const: dout_cmu_fsys1_shared0div4
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-mfc
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ clock-names:
+ items:
+ - const: fin_pll
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: tesla,fsd-clock-cam_csi
+ then:
+ properties:
+ clocks:
+ items:
+ - description: External reference clock (24 MHz)
+ clock-names:
+ items:
+ - const: fin_pll
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ # Clock controller node for CMU_FSYS1
+ - |
+ #include <dt-bindings/clock/fsd-clk.h>
+
+ clock_fsys1: clock-controller@16810000 {
+ compatible = "tesla,fsd-clock-fsys1";
+ reg = <0x16810000 0x3000>;
+ #clock-cells = <1>;
+
+ clocks = <&fin_pll>,
+ <&clock_cmu DOUT_CMU_FSYS1_SHARED0DIV8>,
+ <&clock_cmu DOUT_CMU_FSYS1_SHARED0DIV4>;
+ clock-names = "fin_pll",
+ "dout_cmu_fsys1_shared0div8",
+ "dout_cmu_fsys1_shared0div4";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/thead,th1520-clk-ap.yaml b/Documentation/devicetree/bindings/clock/thead,th1520-clk-ap.yaml
new file mode 100644
index 000000000000..9d058c00ab3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/thead,th1520-clk-ap.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/thead,th1520-clk-ap.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: T-HEAD TH1520 AP sub-system clock controller
+
+description: |
+ The T-HEAD TH1520 AP sub-system clock controller configures the
+ CPU, DPU, GMAC and TEE PLLs. Additionally the VO subsystem configures
+ the clock gates for the HDMI, MIPI and the GPU.
+
+ SoC reference manual
+ https://openbeagle.org/beaglev-ahead/beaglev-ahead/-/blob/main/docs/TH1520%20System%20User%20Manual.pdf
+
+maintainers:
+ - Jisheng Zhang <jszhang@kernel.org>
+ - Wei Fu <wefu@redhat.com>
+ - Drew Fustini <dfustini@tenstorrent.com>
+
+properties:
+ compatible:
+ enum:
+ - thead,th1520-clk-ap
+ - thead,th1520-clk-vo
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: |
+ One input clock:
+ - For "thead,th1520-clk-ap": the clock input must be the 24 MHz
+ main oscillator.
+ - For "thead,th1520-clk-vo": the clock input must be the VIDEO_PLL,
+ which is configured by the AP clock controller. According to the
+ TH1520 manual, VIDEO_PLL is a Silicon Creations Sigma-Delta PLL
+ (integer PLL) typically running at 792 MHz (FOUTPOSTDIV), with
+ a maximum FOUTVCO of 2376 MHz.
+
+ "#clock-cells":
+ const: 1
+ description:
+ See <dt-bindings/clock/thead,th1520-clk-ap.h> for valid indices.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/thead,th1520-clk-ap.h>
+ clock-controller@ef010000 {
+ compatible = "thead,th1520-clk-ap";
+ reg = <0xef010000 0x1000>;
+ clocks = <&osc>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,am62-audio-refclk.yaml b/Documentation/devicetree/bindings/clock/ti,am62-audio-refclk.yaml
new file mode 100644
index 000000000000..b2e40bd39a3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,am62-audio-refclk.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,am62-audio-refclk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI Audio Reference Clock
+
+maintainers:
+ - Jai Luthra <j-luthra@ti.com>
+
+properties:
+ compatible:
+ items:
+ - const: ti,am62-audio-refclk
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 0
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ audio_refclk0: clock@82e0 {
+ compatible = "ti,am62-audio-refclk";
+ reg = <0x82e0 0x4>;
+ clocks = <&k3_clks 157 0>;
+ assigned-clocks = <&k3_clks 157 0>;
+ assigned-clock-parents = <&k3_clks 157 8>;
+ #clock-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,am654-ehrpwm-tbclk.yaml b/Documentation/devicetree/bindings/clock/ti,am654-ehrpwm-tbclk.yaml
new file mode 100644
index 000000000000..64b8bce5962c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,am654-ehrpwm-tbclk.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,am654-ehrpwm-tbclk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI EHRPWM Time Base Clock
+
+maintainers:
+ - Vignesh Raghavendra <vigneshr@ti.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - ti,am654-ehrpwm-tbclk
+ - ti,am64-epwm-tbclk
+ - ti,am62-epwm-tbclk
+
+ "#clock-cells":
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ ehrpwm_tbclk: clock@4140 {
+ compatible = "ti,am654-ehrpwm-tbclk";
+ reg = <0x4140 0x18>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,cdce706.txt b/Documentation/devicetree/bindings/clock/ti,cdce706.txt
index 959d96632f5d..21c3ff764788 100644
--- a/Documentation/devicetree/bindings/clock/ti,cdce706.txt
+++ b/Documentation/devicetree/bindings/clock/ti,cdce706.txt
@@ -1,7 +1,7 @@
Bindings for Texas Instruments CDCE706 programmable 3-PLL clock
synthesizer/multiplier/divider.
-Reference: http://www.ti.com/lit/ds/symlink/cdce706.pdf
+Reference: https://www.ti.com/lit/ds/symlink/cdce706.pdf
I2C device node required properties:
- compatible: shall be "ti,cdce706".
diff --git a/Documentation/devicetree/bindings/clock/ti,cdce925.txt b/Documentation/devicetree/bindings/clock/ti,cdce925.txt
deleted file mode 100644
index 0d01f2d5cc36..000000000000
--- a/Documentation/devicetree/bindings/clock/ti,cdce925.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Binding for TI CDCE913/925/937/949 programmable I2C clock synthesizers.
-
-Reference
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] http://www.ti.com/product/cdce913
-[3] http://www.ti.com/product/cdce925
-[4] http://www.ti.com/product/cdce937
-[5] http://www.ti.com/product/cdce949
-
-The driver provides clock sources for each output Y1 through Y5.
-
-Required properties:
- - compatible: Shall be one of the following:
- - "ti,cdce913": 1-PLL, 3 Outputs
- - "ti,cdce925": 2-PLL, 5 Outputs
- - "ti,cdce937": 3-PLL, 7 Outputs
- - "ti,cdce949": 4-PLL, 9 Outputs
- - reg: I2C device address.
- - clocks: Points to a fixed parent clock that provides the input frequency.
- - #clock-cells: From common clock bindings: Shall be 1.
-
-Optional properties:
- - xtal-load-pf: Crystal load-capacitor value to fine-tune performance on a
- board, or to compensate for external influences.
-
-For all PLL1, PLL2, ... an optional child node can be used to specify spread
-spectrum clocking parameters for a board.
- - spread-spectrum: SSC mode as defined in the data sheet.
- - spread-spectrum-center: Use "centered" mode instead of "max" mode. When
- present, the clock runs at the requested frequency on average. Otherwise
- the requested frequency is the maximum value of the SCC range.
-
-
-Example:
-
- clockgen: cdce925pw@64 {
- compatible = "cdce925";
- reg = <0x64>;
- clocks = <&xtal_27Mhz>;
- #clock-cells = <1>;
- xtal-load-pf = <5>;
- /* PLL options to get SSC 1% centered */
- PLL2 {
- spread-spectrum = <4>;
- spread-spectrum-center;
- };
- };
diff --git a/Documentation/devicetree/bindings/clock/ti,cdce925.yaml b/Documentation/devicetree/bindings/clock/ti,cdce925.yaml
new file mode 100644
index 000000000000..95c1c6f8b755
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,cdce925.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,cdce925.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI CDCE913/925/937/949 programmable I2C clock synthesizers
+
+maintainers:
+ - Alexander Stein <alexander.stein@ew.tq-group.com>
+
+description: |
+ Flexible Low Power LVCMOS Clock Generator with SSC Support for EMI Reduction
+
+ - CDCE(L)913: 1-PLL, 3 Outputs https://www.ti.com/product/cdce913
+ - CDCE(L)925: 2-PLL, 5 Outputs https://www.ti.com/product/cdce925
+ - CDCE(L)937: 3-PLL, 7 Outputs https://www.ti.com/product/cdce937
+ - CDCE(L)949: 4-PLL, 9 Outputs https://www.ti.com/product/cdce949
+
+properties:
+ compatible:
+ enum:
+ - ti,cdce913
+ - ti,cdce925
+ - ti,cdce937
+ - ti,cdce949
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: fixed parent clock
+
+ "#clock-cells":
+ const: 1
+
+ vdd-supply:
+ description: Regulator that provides 1.8V Vdd power supply
+
+ vddout-supply:
+ description: |
+ Regulator that provides Vddout power supply.
+ non-L variant: 2.5V or 3.3V for
+ L variant: 1.8V for
+
+ xtal-load-pf:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Crystal load-capacitor value to fine-tune performance on a
+ board, or to compensate for external influences.
+
+patternProperties:
+ "^PLL[1-4]$":
+ type: object
+ description: |
+ optional child node can be used to specify spread
+ spectrum clocking parameters for a board
+
+ additionalProperties: false
+
+ properties:
+ spread-spectrum:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: SSC mode as defined in the data sheet
+
+ spread-spectrum-center:
+ type: boolean
+ description: |
+ Use "centered" mode instead of "max" mode. When
+ present, the clock runs at the requested frequency on average.
+ Otherwise the requested frequency is the maximum value of the
+ SCC range.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cdce925: clock-controller@64 {
+ compatible = "ti,cdce925";
+ reg = <0x64>;
+ clocks = <&xtal_27Mhz>;
+ #clock-cells = <1>;
+ xtal-load-pf = <5>;
+ vdd-supply = <&reg_1v8>;
+ vddout-supply = <&reg_3v3>;
+ /* PLL options to get SSC 1% centered */
+ PLL2 {
+ spread-spectrum = <4>;
+ spread-spectrum-center;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,clkctrl.yaml b/Documentation/devicetree/bindings/clock/ti,clkctrl.yaml
new file mode 100644
index 000000000000..49787550ce45
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,clkctrl.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,clkctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments clkctrl clock
+
+maintainers:
+ - Tony Lindgren <tony@atomide.com>
+ - Andreas Kemnade <andreas@kemnade.info>
+
+description: |
+ Texas Instruments SoCs can have a clkctrl clock controller for each
+ interconnect target module. The clkctrl clock controller manages functional
+ and interface clocks for each module. Each clkctrl controller can also
+ gate one or more optional functional clocks for a module, and can have one
+ or more clock muxes. There is a clkctrl clock controller typically for each
+ interconnect target module on omap4 and later variants.
+
+ The clock consumers can specify the index of the clkctrl clock using
+ the hardware offset from the clkctrl instance register space. The optional
+ clocks can be specified by clkctrl hardware offset and the index of the
+ optional clock.
+
+properties:
+ compatible:
+ enum:
+ - ti,clkctrl
+ - ti,clkctrl-l4-cfg
+ - ti,clkctrl-l4-per
+ - ti,clkctrl-l4-secure
+ - ti,clkctrl-l4-wkup
+
+ "#clock-cells":
+ const: 2
+
+ clock-output-names:
+ maxItems: 1
+
+ reg:
+ minItems: 1
+ maxItems: 8 # arbitrary, should be enough
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clock-output-names
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ clock@20 {
+ compatible = "ti,clkctrl";
+ clock-output-names = "l4_per";
+ reg = <0x20 0x1b0>;
+ #clock-cells = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml
new file mode 100644
index 000000000000..13d7b3d03d84
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,lmk04832.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments LMK04832 Clock Controller
+
+maintainers:
+ - Liam Beguin <liambeguin@gmail.com>
+
+description: |
+ Devicetree binding for the LMK04832, a clock conditioner with JEDEC JESD204B
+ support. The LMK04832 is pin compatible with the LMK0482x family.
+
+ Link to datasheet, https://www.ti.com/lit/ds/symlink/lmk04832.pdf
+
+properties:
+ compatible:
+ enum:
+ - ti,lmk04832
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ '#clock-cells':
+ const: 1
+
+ spi-max-frequency:
+ maximum: 5000000
+
+ clocks:
+ items:
+ - description: PLL2 reference clock.
+
+ clock-names:
+ items:
+ - const: oscin
+
+ reset-gpios:
+ maxItems: 1
+
+ ti,spi-4wire-rdbk:
+ description: |
+ Select SPI 4wire readback pin configuration.
+ Available readback pins are,
+ CLKin_SEL0 0
+ CLKin_SEL1 1
+ RESET 2
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+ default: 1
+
+ ti,vco-hz:
+ description: Optional to set VCO frequency of the PLL in Hertz.
+
+ ti,sysref-ddly:
+ description: SYSREF digital delay value.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 8
+ maximum: 8191
+ default: 8
+
+ ti,sysref-mux:
+ description: |
+ SYSREF Mux configuration.
+ Available options are,
+ Normal SYNC 0
+ Re-clocked 1
+ SYSREF Pulser 2
+ SYSREF Continuous 3
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 3
+
+ ti,sync-mode:
+ description: SYNC pin configuration.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+ default: 1
+
+ ti,sysref-pulse-count:
+ description:
+ Number of SYSREF pulses to send when SYSREF is not in continuous mode.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8]
+ default: 4
+
+patternProperties:
+ "@[0-9a-d]+$":
+ type: object
+ description:
+ Child nodes used to configure output clocks.
+
+ properties:
+ reg:
+ description:
+ clock output identifier.
+ minimum: 0
+ maximum: 13
+
+ ti,clkout-fmt:
+ description:
+ Clock output format.
+ Available options are,
+ Powerdown 0x00
+ LVDS 0x01
+ HSDS 6 mA 0x02
+ HSDS 8 mA 0x03
+ LVPECL 1600 mV 0x04
+ LVPECL 2000 mV 0x05
+ LCPECL 0x06
+ CML 16 mA 0x07
+ CML 24 mA 0x08
+ CML 32 mA 0x09
+ CMOS (Off/Inverted) 0x0a
+ CMOS (Normal/Off) 0x0b
+ CMOS (Inverted/Inverted) 0x0c
+ CMOS (Inverted/Normal) 0x0d
+ CMOS (Normal/Inverted) 0x0e
+ CMOS (Normal/Normal) 0x0f
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+
+ ti,clkout-sysref:
+ description:
+ Select SYSREF clock path for output clock.
+ type: boolean
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clocks {
+ lmk04832_oscin: oscin {
+ compatible = "fixed-clock";
+
+ #clock-cells = <0>;
+ clock-frequency = <122880000>;
+ clock-output-names = "lmk04832-oscin";
+ };
+ };
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ lmk04832: clock-controller@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <0>;
+
+ compatible = "ti,lmk04832";
+ spi-max-frequency = <781250>;
+
+ reset-gpios = <&gpio_lmk 0 0 0>;
+
+ #clock-cells = <1>;
+ clocks = <&lmk04832_oscin>;
+ clock-names = "oscin";
+
+ ti,spi-4wire-rdbk = <0>;
+ ti,vco-hz = <2457600000>;
+
+ assigned-clocks =
+ <&lmk04832 0>, <&lmk04832 1>,
+ <&lmk04832 2>, <&lmk04832 3>,
+ <&lmk04832 4>,
+ <&lmk04832 6>, <&lmk04832 7>,
+ <&lmk04832 10>, <&lmk04832 11>;
+ assigned-clock-rates =
+ <122880000>, <384000>,
+ <122880000>, <384000>,
+ <122880000>,
+ <153600000>, <384000>,
+ <614400000>, <384000>;
+
+ clkout0@0 {
+ reg = <0>;
+ ti,clkout-fmt = <0x01>; // LVDS
+ };
+
+ clkout1@1 {
+ reg = <1>;
+ ti,clkout-fmt = <0x01>; // LVDS
+ ti,clkout-sysref;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti,sci-clk.txt b/Documentation/devicetree/bindings/clock/ti,sci-clk.txt
deleted file mode 100644
index 4e59dc6b1778..000000000000
--- a/Documentation/devicetree/bindings/clock/ti,sci-clk.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Texas Instruments TI-SCI Clocks
-===============================
-
-All clocks on Texas Instruments' SoCs that contain a System Controller,
-are only controlled by this entity. Communication between a host processor
-running an OS and the System Controller happens through a protocol known
-as TI-SCI[1]. This clock implementation plugs into the common clock
-framework and makes use of the TI-SCI protocol on clock API requests.
-
-[1] Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
-
-Required properties:
--------------------
-- compatible: Must be "ti,k2g-sci-clk"
-- #clock-cells: Shall be 2.
- In clock consumers, this cell represents the device ID and clock ID
- exposed by the PM firmware. The list of valid values for the device IDs
- and clocks IDs for 66AK2G SoC are documented at
- http://processors.wiki.ti.com/index.php/TISCI#66AK2G02_Data
-
-Examples:
---------
-
-pmmc: pmmc {
- compatible = "ti,k2g-sci";
-
- k2g_clks: clocks {
- compatible = "ti,k2g-sci-clk";
- #clock-cells = <2>;
- };
-};
-
-uart0: serial@2530c00 {
- compatible = "ns16550a";
- clocks = <&k2g_clks 0x2c 0>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
new file mode 100644
index 000000000000..66e8e66ca175
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti,sci-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI-SCI clock controller
+
+maintainers:
+ - Nishanth Menon <nm@ti.com>
+
+description: |
+ Some TI SoCs contain a system controller (like the Power Management Micro
+ Controller (PMMC) on Keystone 66AK2G SoC) that are responsible for controlling
+ the state of the various hardware modules present on the SoC. Communication
+ between the host processor running an OS and the system controller happens
+ through a protocol called TI System Control Interface (TI-SCI protocol).
+
+ This clock controller node uses the TI SCI protocol to perform various clock
+ management of various hardware modules (devices) present on the SoC. This
+ node must be a child node of the associated TI-SCI system controller node.
+
+properties:
+ $nodename:
+ pattern: "^clock-controller$"
+
+ compatible:
+ const: ti,k2g-sci-clk
+
+ "#clock-cells":
+ const: 2
+ description:
+ The two cells represent values that the TI-SCI controller defines.
+
+ The first cell should contain the device ID.
+
+ The second cell should contain the clock ID.
+
+ Please see https://software-dl.ti.com/tisci/esd/latest/index.html for
+ protocol documentation for the values to be used for different devices.
+
+additionalProperties: false
+
+examples:
+ - |
+ k3_clks: clock-controller {
+ compatible = "ti,k2g-sci-clk";
+ #clock-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti-clkctrl.txt b/Documentation/devicetree/bindings/clock/ti-clkctrl.txt
deleted file mode 100644
index 48ee6991f2cc..000000000000
--- a/Documentation/devicetree/bindings/clock/ti-clkctrl.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Texas Instruments clkctrl clock binding
-
-Texas Instruments SoCs can have a clkctrl clock controller for each
-interconnect target module. The clkctrl clock controller manages functional
-and interface clocks for each module. Each clkctrl controller can also
-gate one or more optional functional clocks for a module, and can have one
-or more clock muxes. There is a clkctrl clock controller typically for each
-interconnect target module on omap4 and later variants.
-
-The clock consumers can specify the index of the clkctrl clock using
-the hardware offset from the clkctrl instance register space. The optional
-clocks can be specified by clkctrl hardware offset and the index of the
-optional clock.
-
-For more information, please see the Linux clock framework binding at
-Documentation/devicetree/bindings/clock/clock-bindings.txt.
-
-Required properties :
-- compatible : shall be "ti,clkctrl"
-- #clock-cells : shall contain 2 with the first entry being the instance
- offset from the clock domain base and the second being the
- clock index
-
-Example: Clock controller node on omap 4430:
-
-&cm2 {
- l4per: cm@1400 {
- cm_l4per@0 {
- cm_l4per_clkctrl: clk@20 {
- compatible = "ti,clkctrl";
- reg = <0x20 0x1b0>;
- #clock-cells = <2>;
- };
- };
- };
-};
-
-Example: Preprocessor helper macros in dt-bindings/clock/ti-clkctrl.h
-
-#define OMAP4_CLKCTRL_OFFSET 0x20
-#define OMAP4_CLKCTRL_INDEX(offset) ((offset) - OMAP4_CLKCTRL_OFFSET)
-#define MODULEMODE_HWCTRL 1
-#define MODULEMODE_SWCTRL 2
-
-#define OMAP4_GPTIMER10_CLKTRL OMAP4_CLKCTRL_INDEX(0x28)
-#define OMAP4_GPTIMER11_CLKTRL OMAP4_CLKCTRL_INDEX(0x30)
-#define OMAP4_GPTIMER2_CLKTRL OMAP4_CLKCTRL_INDEX(0x38)
-...
-#define OMAP4_GPIO2_CLKCTRL OMAP_CLKCTRL_INDEX(0x60)
-
-Example: Clock consumer node for GPIO2:
-
-&gpio2 {
- clocks = <&cm_l4per_clkctrl OMAP4_GPIO2_CLKCTRL 0
- &cm_l4per_clkctrl OMAP4_GPIO2_CLKCTRL 8>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
deleted file mode 100644
index c35cb6c4af4d..000000000000
--- a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-* Device tree bindings for Texas Instruments keystone pll controller
-
-The main pll controller used to drive theC66x CorePacs, the switch fabric,
-and a majority of the peripheral clocks (all but the ARM CorePacs, DDR3 and
-the NETCP modules) requires a PLL Controller to manage the various clock
-divisions, gating, and synchronization.
-
-Required properties:
-
-- compatible: "ti,keystone-pllctrl", "syscon"
-
-- reg: contains offset/length value for pll controller
- registers space.
-
-Example:
-
-pllctrl: pll-controller@02310000 {
- compatible = "ti,keystone-pllctrl", "syscon";
- reg = <0x02310000 0x200>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt
index 4c8a2ce2cd70..3122360adcf3 100644
--- a/Documentation/devicetree/bindings/clock/ti/adpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments ADPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped ADPLL with two to three selectable input clocks
and three to four children.
diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index ade4dd4c30f0..bbd505c1199d 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments APLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped APLL with usually two selectable input clocks
(reference clock and bypass clock), with analog phase locked
diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
deleted file mode 100644
index 7c735dde9fe9..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/autoidle.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Binding for Texas Instruments autoidle clock.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. It assumes a register mapped
-clock which can be put to idle automatically by hardware based on the usage
-and a configuration bit setting. Autoidle clock is never an individual
-clock, it is always a derivative of some basic clock like a gate, divider,
-or fixed-factor.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- reg : offset for the register controlling the autoidle
-- ti,autoidle-shift : bit shift of the autoidle enable bit
-- ti,invert-autoidle-bit : autoidle is enabled by setting the bit to 0
-
-Examples:
- dpll_core_m4_ck: dpll_core_m4_ck {
- #clock-cells = <0>;
- compatible = "ti,divider-clock";
- clocks = <&dpll_core_x2_ck>;
- ti,max-div = <31>;
- ti,autoidle-shift = <8>;
- reg = <0x2d38>;
- ti,index-starts-at-one;
- ti,invert-autoidle-bit;
- };
-
- dpll_usb_clkdcoldo_ck: dpll_usb_clkdcoldo_ck {
- #clock-cells = <0>;
- compatible = "ti,fixed-factor-clock";
- clocks = <&dpll_usb_ck>;
- ti,clock-div = <1>;
- ti,autoidle-shift = <8>;
- reg = <0x01b4>;
- ti,clock-mult = <1>;
- ti,invert-autoidle-bit;
- };
diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
index cb76b3f2b341..edf0b5d42768 100644
--- a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
+++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments clockdomain.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1] in consumer role.
Every clock on TI SoC belongs to one clockdomain, but software
only needs this information for specific clocks which require
@@ -17,6 +15,9 @@ Required properties:
- #clock-cells : from common clock binding; shall be set to 0.
- clocks : link phandles of clocks within this domain
+Optional properties:
+- clock-output-names : from common clock binding.
+
Examples:
dss_clkdm: dss_clkdm {
compatible = "ti,clockdomain";
diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt
deleted file mode 100644
index 5f43c4706b09..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/composite.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Binding for TI composite clock.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. It assumes a
-register-mapped composite clock with multiple different sub-types;
-
-a multiplexer clock with multiple input clock signals or parents, one
-of which can be selected as output, this behaves exactly as [2]
-
-an adjustable clock rate divider, this behaves exactly as [3]
-
-a gating function which can be used to enable and disable the output
-clock, this behaves exactly as [4]
-
-The binding must provide a list of the component clocks that shall be
-merged to this clock. The component clocks shall be of one of the
-"ti,*composite*-clock" types.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/ti/mux.txt
-[3] Documentation/devicetree/bindings/clock/ti/divider.txt
-[4] Documentation/devicetree/bindings/clock/ti/gate.txt
-
-Required properties:
-- compatible : shall be: "ti,composite-clock"
-- clocks : link phandles of component clocks
-- #clock-cells : from common clock binding; shall be set to 0.
-
-Examples:
-
-usb_l4_gate_ick: usb_l4_gate_ick {
- #clock-cells = <0>;
- compatible = "ti,composite-interface-clock";
- clocks = <&l4_ick>;
- ti,bit-shift = <5>;
- reg = <0x0a10>;
-};
-
-usb_l4_div_ick: usb_l4_div_ick {
- #clock-cells = <0>;
- compatible = "ti,composite-divider-clock";
- clocks = <&l4_ick>;
- ti,bit-shift = <4>;
- ti,max-div = <1>;
- reg = <0x0a40>;
- ti,index-starts-at-one;
-};
-
-usb_l4_ick: usb_l4_ick {
- #clock-cells = <0>;
- compatible = "ti,composite-clock";
- clocks = <&usb_l4_gate_ick>, <&usb_l4_div_ick>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt b/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt
index 36998e184821..c9894538315b 100644
--- a/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt
@@ -15,7 +15,7 @@ Required properties:
- for "ti,da850-pll1", shall be "clksrc"
Optional properties:
-- ti,clkmode-square-wave: Indicates that the the board is supplying a square
+- ti,clkmode-square-wave: Indicates that the board is supplying a square
wave input on the OSCIN pin instead of using a crystal oscillator.
This property is only valid when compatible = "ti,da850-pll0".
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
index dae4ad8e198c..5f746ebf7a2c 100644
--- a/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
@@ -67,5 +67,5 @@ Examples:
Also see:
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-- Documentation/devicetree/bindings/power/power_domain.txt
+- Documentation/devicetree/bindings/power/power-domain.yaml
- Documentation/devicetree/bindings/reset/reset.txt
diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt
deleted file mode 100644
index 9b13b32974f9..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/divider.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-Binding for TI divider clock
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. It assumes a
-register-mapped adjustable clock rate divider that does not gate and has
-only one input clock or parent. By default the value programmed into
-the register is one less than the actual divisor value. E.g:
-
-register value actual divisor value
-0 1
-1 2
-2 3
-
-This assumption may be modified by the following optional properties:
-
-ti,index-starts-at-one - valid divisor values start at 1, not the default
-of 0. E.g:
-register value actual divisor value
-1 1
-2 2
-3 3
-
-ti,index-power-of-two - valid divisor values are powers of two. E.g:
-register value actual divisor value
-0 1
-1 2
-2 4
-
-Additionally an array of valid dividers may be supplied like so:
-
- ti,dividers = <4>, <8>, <0>, <16>;
-
-Which will map the resulting values to a divisor table by their index:
-register value actual divisor value
-0 4
-1 8
-2 <invalid divisor, skipped>
-3 16
-
-Any zero value in this array means the corresponding bit-value is invalid
-and must not be used.
-
-The binding must also provide the register to control the divider and
-unless the divider array is provided, min and max dividers. Optionally
-the number of bits to shift that mask, if necessary. If the shift value
-is missing it is the same as supplying a zero shift.
-
-This binding can also optionally provide support to the hardware autoidle
-feature, see [2].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/ti/autoidle.txt
-
-Required properties:
-- compatible : shall be "ti,divider-clock" or "ti,composite-divider-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- clocks : link to phandle of parent clock
-- reg : offset for register controlling adjustable divider
-
-Optional properties:
-- clock-output-names : from common clock binding.
-- ti,dividers : array of integers defining divisors
-- ti,bit-shift : number of bits to shift the divider value, defaults to 0
-- ti,min-div : min divisor for dividing the input clock rate, only
- needed if the first divisor is offset from the default value (1)
-- ti,max-div : max divisor for dividing the input clock rate, only needed
- if ti,dividers is not defined.
-- ti,index-starts-at-one : valid divisor programming starts at 1, not zero,
- only valid if ti,dividers is not defined.
-- ti,index-power-of-two : valid divisor programming must be a power of two,
- only valid if ti,dividers is not defined.
-- ti,autoidle-shift : bit shift of the autoidle enable bit for the clock,
- see [2]
-- ti,invert-autoidle-bit : autoidle is enabled by setting the bit to 0,
- see [2]
-- ti,set-rate-parent : clk_set_rate is propagated to parent
-- ti,latch-bit : latch the divider value to HW, only needed if the register
- access requires this. As an example dra76x DPLL_GMAC H14 divider implements
- such behavior.
-
-Examples:
-dpll_usb_m2_ck: dpll_usb_m2_ck@4a008190 {
- #clock-cells = <0>;
- compatible = "ti,divider-clock";
- clocks = <&dpll_usb_ck>;
- ti,max-div = <127>;
- reg = <0x190>;
- ti,index-starts-at-one;
-};
-
-aess_fclk: aess_fclk@4a004528 {
- #clock-cells = <0>;
- compatible = "ti,divider-clock";
- clocks = <&abe_clk>;
- ti,bit-shift = <24>;
- reg = <0x528>;
- ti,max-div = <2>;
-};
-
-dpll_core_m3x2_div_ck: dpll_core_m3x2_div_ck {
- #clock-cells = <0>;
- compatible = "ti,composite-divider-clock";
- clocks = <&dpll_core_x2_ck>;
- ti,max-div = <31>;
- reg = <0x0134>;
- ti,index-starts-at-one;
-};
-
-ssi_ssr_div_fck_3430es2: ssi_ssr_div_fck_3430es2 {
- #clock-cells = <0>;
- compatible = "ti,composite-divider-clock";
- clocks = <&corex2_fck>;
- ti,bit-shift = <8>;
- reg = <0x0a40>;
- ti,dividers = <0>, <1>, <2>, <3>, <4>, <0>, <6>, <0>, <8>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index df57009ff8e7..14a1b72c2e71 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments DPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped DPLL with usually two selectable input clocks
(reference clock and bypass clock), with digital phase locked
@@ -42,6 +40,11 @@ Required properties:
"idlest" - contains the idle status register base address
"mult-div1" - contains the multiplier / divider register base address
"autoidle" - contains the autoidle register base address (optional)
+ "ssc-deltam" - DPLL supports spread spectrum clocking (SSC), contains
+ the frequency spreading register base address (optional)
+ "ssc-modfreq" - DPLL supports spread spectrum clocking (SSC), contains
+ the modulation frequency register base address
+ (optional)
ti,am3-* dpll types do not have autoidle register
ti,omap2-* dpll type does not support idlest / autoidle registers
@@ -51,6 +54,14 @@ Optional properties:
- ti,low-power-stop : DPLL supports low power stop mode, gating output
- ti,low-power-bypass : DPLL output matches rate of parent bypass clock
- ti,lock : DPLL locks in programmed rate
+ - ti,min-div : the minimum divisor to start from to round the DPLL
+ target rate
+ - ti,ssc-deltam : DPLL supports spread spectrum clocking, frequency
+ spreading in permille (10th of a percent)
+ - ti,ssc-modfreq-hz : DPLL supports spread spectrum clocking, spread
+ spectrum modulation frequency
+ - ti,ssc-downspread : DPLL supports spread spectrum clocking, boolean
+ to enable the downspread feature
Examples:
dpll_core_ck: dpll_core_ck@44e00490 {
@@ -83,3 +94,10 @@ Examples:
clocks = <&sys_ck>, <&sys_ck>;
reg = <0x0500>, <0x0540>;
};
+
+ dpll_disp_ck: dpll_disp_ck {
+ #clock-cells = <0>;
+ compatible = "ti,am3-dpll-no-gate-clock";
+ clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
+ reg = <0x0498>, <0x0448>, <0x0454>, <0x044c>, <0x0450>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
index 10f7047755f3..68504079f99f 100644
--- a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
@@ -6,7 +6,7 @@ functional clock but can be configured to provide different clocks.
ATL can maintain a clock averages to some desired frequency based on the bws/aws
signals - can compensate the drift between the two ws signal.
-In order to provide the support for ATL and it's output clocks (which can be used
+In order to provide the support for ATL and its output clocks (which can be used
internally within the SoC or external components) two sets of bindings is needed:
Clock tree binding:
@@ -43,7 +43,7 @@ Configuration of ATL instances:
- aws : Audio word select signal selection
};
-For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include
+For valid word select signals, see the dt-bindings/clock/ti-dra7-atl.h include
file.
Examples:
@@ -83,7 +83,7 @@ atl: atl@4843c000 {
clock-names = "fck";
};
-#include <dt-bindings/clk/ti-dra7-atl.h>
+#include <dt-bindings/clock/ti-dra7-atl.h>
&atl {
diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt
index c19b3f253b8c..88986ef39ddd 100644
--- a/Documentation/devicetree/bindings/clock/ti/fapll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments FAPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped FAPLL with usually two selectable input clocks
(reference clock and bypass clock), and one or more child
diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
deleted file mode 100644
index 662b36d53bf0..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-Binding for TI fixed factor rate clock sources.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1], and also uses the autoidle
-support from TI autoidle clock [2].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/ti/autoidle.txt
-
-Required properties:
-- compatible : shall be "ti,fixed-factor-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- ti,clock-div: fixed divider.
-- ti,clock-mult: fixed multiplier.
-- clocks: parent clock.
-
-Optional properties:
-- ti,autoidle-shift: bit shift of the autoidle enable bit for the clock,
- see [2]
-- reg: offset for the autoidle register of this clock, see [2]
-- ti,invert-autoidle-bit: autoidle is enabled by setting the bit to 0, see [2]
-- ti,set-rate-parent: clk_set_rate is propagated to parent
-
-Example:
- clock {
- compatible = "ti,fixed-factor-clock";
- clocks = <&parentclk>;
- #clock-cells = <0>;
- ti,clock-div = <2>;
- ti,clock-mult = <1>;
- };
-
- dpll_usb_clkdcoldo_ck: dpll_usb_clkdcoldo_ck {
- #clock-cells = <0>;
- compatible = "ti,fixed-factor-clock";
- clocks = <&dpll_usb_ck>;
- ti,clock-div = <1>;
- ti,autoidle-shift = <8>;
- reg = <0x01b4>;
- ti,clock-mult = <1>;
- ti,invert-autoidle-bit;
- };
diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt
deleted file mode 100644
index 56d603c1f716..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/gate.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-Binding for Texas Instruments gate clock.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. This clock is
-quite much similar to the basic gate-clock [2], however,
-it supports a number of additional features. If no register
-is provided for this clock, the code assumes that a clockdomain
-will be controlled instead and the corresponding hw-ops for
-that is used.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
-[3] Documentation/devicetree/bindings/clock/ti/clockdomain.txt
-
-Required properties:
-- compatible : shall be one of:
- "ti,gate-clock" - basic gate clock
- "ti,wait-gate-clock" - gate clock which waits until clock is active before
- returning from clk_enable()
- "ti,dss-gate-clock" - gate clock with DSS specific hardware handling
- "ti,am35xx-gate-clock" - gate clock with AM35xx specific hardware handling
- "ti,clkdm-gate-clock" - clockdomain gate clock, which derives its functional
- clock directly from a clockdomain, see [3] how
- to map clockdomains properly
- "ti,hsdiv-gate-clock" - gate clock with OMAP36xx specific hardware handling,
- required for a hardware errata
- "ti,composite-gate-clock" - composite gate clock, to be part of composite
- clock
- "ti,composite-no-wait-gate-clock" - composite gate clock that does not wait
- for clock to be active before returning
- from clk_enable()
-- #clock-cells : from common clock binding; shall be set to 0
-- clocks : link to phandle of parent clock
-- reg : offset for register controlling adjustable gate, not needed for
- ti,clkdm-gate-clock type
-
-Optional properties:
-- ti,bit-shift : bit shift for programming the clock gate, invalid for
- ti,clkdm-gate-clock type
-- ti,set-bit-to-disable : inverts default gate programming. Setting the bit
- gates the clock and clearing the bit ungates the clock.
-
-Examples:
- mmchs2_fck: mmchs2_fck@48004a00 {
- #clock-cells = <0>;
- compatible = "ti,gate-clock";
- clocks = <&core_96m_fck>;
- reg = <0x0a00>;
- ti,bit-shift = <25>;
- };
-
- uart4_fck_am35xx: uart4_fck_am35xx {
- #clock-cells = <0>;
- compatible = "ti,wait-gate-clock";
- clocks = <&core_48m_fck>;
- reg = <0x0a00>;
- ti,bit-shift = <23>;
- };
-
- dss1_alwon_fck_3430es2: dss1_alwon_fck_3430es2@48004e00 {
- #clock-cells = <0>;
- compatible = "ti,dss-gate-clock";
- clocks = <&dpll4_m4x2_ck>;
- reg = <0x0e00>;
- ti,bit-shift = <0>;
- };
-
- emac_ick: emac_ick@4800259c {
- #clock-cells = <0>;
- compatible = "ti,am35xx-gate-clock";
- clocks = <&ipss_ick>;
- reg = <0x059c>;
- ti,bit-shift = <1>;
- };
-
- emu_src_ck: emu_src_ck {
- #clock-cells = <0>;
- compatible = "ti,clkdm-gate-clock";
- clocks = <&emu_src_mux_ck>;
- };
-
- dpll4_m2x2_ck: dpll4_m2x2_ck@48004d00 {
- #clock-cells = <0>;
- compatible = "ti,hsdiv-gate-clock";
- clocks = <&dpll4_m2x2_mul_ck>;
- ti,bit-shift = <0x1b>;
- reg = <0x0d00>;
- ti,set-bit-to-disable;
- };
-
- vlynq_gate_fck: vlynq_gate_fck {
- #clock-cells = <0>;
- compatible = "ti,composite-gate-clock";
- clocks = <&core_ck>;
- ti,bit-shift = <3>;
- reg = <0x0200>;
- };
-
- sys_clkout2_src_gate: sys_clkout2_src_gate {
- #clock-cells = <0>;
- compatible = "ti,composite-no-wait-gate-clock";
- clocks = <&core_ck>;
- ti,bit-shift = <15>;
- reg = <0x0070>;
- };
diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
deleted file mode 100644
index 3f4704040140..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Binding for Texas Instruments interface clock.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. This clock is
-quite much similar to the basic gate-clock [2], however,
-it supports a number of additional features, including
-companion clock finding (match corresponding functional gate
-clock) and hardware autoidle enable / disable.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
-
-Required properties:
-- compatible : shall be one of:
- "ti,omap3-interface-clock" - basic OMAP3 interface clock
- "ti,omap3-no-wait-interface-clock" - interface clock which has no hardware
- capability for waiting clock to be ready
- "ti,omap3-hsotgusb-interface-clock" - interface clock with USB specific HW
- handling
- "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling
- "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling
- "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling
- "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW
- handling
-- #clock-cells : from common clock binding; shall be set to 0
-- clocks : link to phandle of parent clock
-- reg : base address for the control register
-
-Optional properties:
-- ti,bit-shift : bit shift for the bit enabling/disabling the clock (default 0)
-
-Examples:
- aes1_ick: aes1_ick@48004a14 {
- #clock-cells = <0>;
- compatible = "ti,omap3-interface-clock";
- clocks = <&security_l4_ick2>;
- reg = <0x48004a14 0x4>;
- ti,bit-shift = <3>;
- };
-
- cam_ick: cam_ick@48004f10 {
- #clock-cells = <0>;
- compatible = "ti,omap3-no-wait-interface-clock";
- clocks = <&l4_ick>;
- reg = <0x48004f10 0x4>;
- ti,bit-shift = <0>;
- };
-
- ssi_ick_3430es2: ssi_ick_3430es2@48004a10 {
- #clock-cells = <0>;
- compatible = "ti,omap3-ssi-interface-clock";
- clocks = <&ssi_l4_ick>;
- reg = <0x48004a10 0x4>;
- ti,bit-shift = <0>;
- };
diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt
deleted file mode 100644
index eec8994b9be8..000000000000
--- a/Documentation/devicetree/bindings/clock/ti/mux.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-Binding for TI mux clock.
-
-Binding status: Unstable - ABI compatibility may be broken in the future
-
-This binding uses the common clock binding[1]. It assumes a
-register-mapped multiplexer with multiple input clock signals or
-parents, one of which can be selected as output. This clock does not
-gate or adjust the parent rate via a divider or multiplier.
-
-By default the "clocks" property lists the parents in the same order
-as they are programmed into the regster. E.g:
-
- clocks = <&foo_clock>, <&bar_clock>, <&baz_clock>;
-
-results in programming the register as follows:
-
-register value selected parent clock
-0 foo_clock
-1 bar_clock
-2 baz_clock
-
-Some clock controller IPs do not allow a value of zero to be programmed
-into the register, instead indexing begins at 1. The optional property
-"index-starts-at-one" modified the scheme as follows:
-
-register value selected clock parent
-1 foo_clock
-2 bar_clock
-3 baz_clock
-
-The binding must provide the register to control the mux. Optionally
-the number of bits to shift the control field in the register can be
-supplied. If the shift value is missing it is the same as supplying
-a zero shift.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "ti,mux-clock" or "ti,composite-mux-clock".
-- #clock-cells : from common clock binding; shall be set to 0.
-- clocks : link phandles of parent clocks
-- reg : register offset for register controlling adjustable mux
-
-Optional properties:
-- ti,bit-shift : number of bits to shift the bit-mask, defaults to
- 0 if not present
-- ti,index-starts-at-one : valid input select programming starts at 1, not
- zero
-- ti,set-rate-parent : clk_set_rate is propagated to parent clock,
- not supported by the composite-mux-clock subtype
-- ti,latch-bit : latch the mux value to HW, only needed if the register
- access requires this. As an example, dra7x DPLL_GMAC H14 muxing
- implements such behavior.
-
-Examples:
-
-sys_clkin_ck: sys_clkin_ck@4a306110 {
- #clock-cells = <0>;
- compatible = "ti,mux-clock";
- clocks = <&virt_12000000_ck>, <&virt_13000000_ck>, <&virt_16800000_ck>, <&virt_19200000_ck>, <&virt_26000000_ck>, <&virt_27000000_ck>, <&virt_38400000_ck>;
- reg = <0x0110>;
- ti,index-starts-at-one;
-};
-
-abe_dpll_bypass_clk_mux_ck: abe_dpll_bypass_clk_mux_ck@4a306108 {
- #clock-cells = <0>;
- compatible = "ti,mux-clock";
- clocks = <&sys_clkin_ck>, <&sys_32k_ck>;
- ti,bit-shift = <24>;
- reg = <0x0108>;
-};
-
-mcbsp5_mux_fck: mcbsp5_mux_fck {
- #clock-cells = <0>;
- compatible = "ti,composite-mux-clock";
- clocks = <&core_96m_fck>, <&mcbsp_clks>;
- ti,bit-shift = <4>;
- reg = <0x02d8>;
-};
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,autoidle.yaml b/Documentation/devicetree/bindings/clock/ti/ti,autoidle.yaml
new file mode 100644
index 000000000000..ed1bf182b64d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,autoidle.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,autoidle.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI autoidle clock
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+ - Sukrut Bellary <sbellary@baylibre.com>
+
+description:
+ Some clocks in TI SoC support the autoidle feature. These properties are
+ applicable only if the clock supports autoidle feature. It assumes a register
+ mapped clock which can be put to idle automatically by hardware based on
+ usage and configuration bit setting. Autoidle clock is never an individual
+ clock, it is always a derivative of some basic clock like a gate, divider, or
+ fixed-factor.
+
+properties:
+ ti,autoidle-shift:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ bit shift of the autoidle enable bit for the clock
+ maximum: 31
+ default: 0
+
+ ti,invert-autoidle-bit:
+ type: boolean
+ description:
+ autoidle is enabled by setting the bit to 0
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml
new file mode 100644
index 000000000000..d525f96cf244
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,clksel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI clksel clock
+
+maintainers:
+ - Tony Lindgren <tony@atomide.com>
+
+description: |
+ The TI CLKSEL clocks consist of consist of input clock mux bits, and in some
+ cases also has divider, multiplier and gate bits.
+
+properties:
+ compatible:
+ const: ti,clksel
+
+ reg:
+ maxItems: 1
+ description: The CLKSEL register range
+
+ '#address-cells':
+ enum: [ 0, 1, 2 ]
+
+ '#size-cells':
+ enum: [ 0, 1, 2 ]
+
+ ranges: true
+
+ "#clock-cells":
+ const: 2
+ description: The CLKSEL register and bit offset
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ clksel_gfx_fclk: clock@52c {
+ compatible = "ti,clksel";
+ reg = <0x25c 0x4>;
+ #clock-cells = <2>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,composite-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,composite-clock.yaml
new file mode 100644
index 000000000000..31a6794852c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,composite-clock.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,composite-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments composite clock
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+
+description: |
+ *Deprecated design pattern: one node per clock*
+
+ This binding assumes a register-mapped composite clock with multiple
+ different sub-types:
+
+ a multiplexer clock with multiple input clock signals or parents, one
+ of which can be selected as output, this behaves exactly as [1].
+
+ an adjustable clock rate divider, this behaves exactly as [2].
+
+ a gating function which can be used to enable and disable the output
+ clock, this behaves exactly as [3].
+
+ The binding must provide a list of the component clocks that shall be
+ merged to this clock. The component clocks shall be of one of the
+ "ti,*composite*-clock" types.
+
+ [1] Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml
+ [2] Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml
+ [3] Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml
+
+properties:
+ compatible:
+ const: ti,composite-clock
+
+ "#clock-cells":
+ const: 0
+
+ clocks: true
+
+ clock-output-names:
+ maxItems: 1
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ usb_l4_gate_ick: clock-controller@a10 {
+ #clock-cells = <0>;
+ compatible = "ti,composite-gate-clock";
+ clocks = <&l4_ick>;
+ ti,bit-shift = <5>;
+ reg = <0x0a10>;
+ };
+
+ usb_l4_div_ick: clock-controller@a40 {
+ #clock-cells = <0>;
+ compatible = "ti,composite-divider-clock";
+ clocks = <&l4_ick>;
+ ti,bit-shift = <4>;
+ ti,max-div = <1>;
+ reg = <0x0a40>;
+ ti,index-starts-at-one;
+ };
+ };
+
+ clock-controller {
+ #clock-cells = <0>;
+ compatible = "ti,composite-clock";
+ clocks = <&usb_l4_gate_ick>, <&usb_l4_div_ick>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml
new file mode 100644
index 000000000000..6729fcb839d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,divider-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments divider clock
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+
+description: |
+ This clock It assumes a register-mapped adjustable clock rate divider
+ that does not gate and has only one input clock or parent. By default the
+ value programmed into the register is one less than the actual divisor value.
+ E.g:
+
+ register value actual divisor value
+ 0 1
+ 1 2
+ 2 3
+
+ This assumption may be modified by the following optional properties:
+
+ ti,index-starts-at-one - valid divisor values start at 1, not the default
+ of 0. E.g:
+ register value actual divisor value
+ 1 1
+ 2 2
+ 3 3
+
+ ti,index-power-of-two - valid divisor values are powers of two. E.g:
+ register value actual divisor value
+ 0 1
+ 1 2
+ 2 4
+
+ Additionally an array of valid dividers may be supplied like so:
+
+ ti,dividers = <4>, <8>, <0>, <16>;
+
+ Which will map the resulting values to a divisor table by their index:
+ register value actual divisor value
+ 0 4
+ 1 8
+ 2 <invalid divisor, skipped>
+ 3 16
+
+ Any zero value in this array means the corresponding bit-value is invalid
+ and must not be used.
+
+ The binding must also provide the register to control the divider and
+ unless the divider array is provided, min and max dividers. Optionally
+ the number of bits to shift that mask, if necessary. If the shift value
+ is missing it is the same as supplying a zero shift.
+
+ This binding can also optionally provide support to the hardware autoidle
+ feature.
+
+allOf:
+ - $ref: ti,autoidle.yaml#
+
+properties:
+ compatible:
+ enum:
+ - ti,divider-clock
+ - ti,composite-divider-clock
+
+ "#clock-cells":
+ const: 0
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ ti,dividers:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description:
+ array of integers defining divisors
+
+ ti,bit-shift:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ number of bits to shift the divider value
+ maximum: 31
+ default: 0
+
+ ti,min-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ min divisor for dividing the input clock rate, only
+ needed if the first divisor is offset from the default value (1)
+ minimum: 1
+ default: 1
+
+ ti,max-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ max divisor for dividing the input clock rate, only needed
+ if ti,dividers is not defined.
+
+ ti,index-starts-at-one:
+ type: boolean
+ description:
+ valid divisor programming starts at 1, not zero,
+ only valid if ti,dividers is not defined
+
+ ti,index-power-of-two:
+ type: boolean
+ description:
+ valid divisor programming must be a power of two,
+ only valid if ti,dividers is not defined.
+
+ ti,set-rate-parent:
+ type: boolean
+ description:
+ clk_set_rate is propagated to parent |
+
+ ti,latch-bit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ latch the divider value to HW, only needed if the register
+ compatible access requires this. As an example dra76x DPLL_GMAC
+ H14 divider implements such behavior.
+
+dependentSchemas:
+ ti,dividers:
+ properties:
+ ti,min-div: false
+ ti,max-div: false
+ ti,index-power-of-two: false
+ ti,index-starts-at-one: false
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@190 {
+ #clock-cells = <0>;
+ compatible = "ti,divider-clock";
+ clocks = <&dpll_usb_ck>;
+ ti,max-div = <127>;
+ reg = <0x190>;
+ ti,index-starts-at-one;
+ };
+
+ clock-controller@528 {
+ #clock-cells = <0>;
+ compatible = "ti,divider-clock";
+ clocks = <&abe_clk>;
+ ti,bit-shift = <24>;
+ reg = <0x528>;
+ ti,max-div = <2>;
+ };
+
+ clock-controller@a40 {
+ #clock-cells = <0>;
+ compatible = "ti,composite-divider-clock";
+ clocks = <&corex2_fck>;
+ ti,bit-shift = <8>;
+ reg = <0x0a40>;
+ ti,dividers = <0>, <1>, <2>, <3>, <4>, <0>, <6>, <0>, <8>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,fixed-factor-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,fixed-factor-clock.yaml
new file mode 100644
index 000000000000..7a63b0992976
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,fixed-factor-clock.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,fixed-factor-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI fixed factor rate clock sources
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+ - Sukrut Bellary <sbellary@baylibre.com>
+
+description:
+ This consists of a divider and a multiplier used to generate a fixed rate
+ clock. This also uses the autoidle support from TI autoidle clock.
+
+allOf:
+ - $ref: ti,autoidle.yaml#
+
+properties:
+ compatible:
+ const: ti,fixed-factor-clock
+
+ "#clock-cells":
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ ti,clock-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Fixed divider
+ minimum: 1
+
+ ti,clock-mult:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Fixed multiplier
+ minimum: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+ ti,set-rate-parent:
+ description:
+ Propagate to parent clock
+ type: boolean
+
+required:
+ - compatible
+ - clocks
+ - "#clock-cells"
+ - ti,clock-mult
+ - ti,clock-div
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ bus{
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock@1b4 {
+ compatible = "ti,fixed-factor-clock";
+ reg = <0x1b4>;
+ clocks = <&dpll_usb_ck>;
+ #clock-cells = <0>;
+ ti,clock-mult = <1>;
+ ti,clock-div = <1>;
+ ti,autoidle-shift = <8>;
+ ti,invert-autoidle-bit;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml
new file mode 100644
index 000000000000..eaa727ab0d7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,gate-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments gate clock
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+
+description: |
+ *Deprecated design pattern: one node per clock*
+
+ This clock is quite much similar to the basic gate-clock [1], however,
+ it supports a number of additional features. If no register
+ is provided for this clock, the code assumes that a clockdomain
+ will be controlled instead and the corresponding hw-ops for
+ that is used.
+
+ [1] Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml
+ [2] Documentation/devicetree/bindings/clock/ti/clockdomain.txt
+
+properties:
+ compatible:
+ enum:
+ - ti,gate-clock # basic gate clock
+ - ti,wait-gate-clock # gate clock which waits until clock is
+ # active before returning from clk_enable()
+ - ti,dss-gate-clock # gate clock with DSS specific hardware
+ # handling
+ - ti,am35xx-gate-clock # gate clock with AM35xx specific hardware
+ # handling
+ - ti,clkdm-gate-clock # clockdomain gate clock, which derives its
+ # functional clock directly from a
+ # clockdomain, see [2] how to map
+ # clockdomains properly
+ - ti,hsdiv-gate-clock # gate clock with OMAP36xx specific hardware
+ # handling, required for a hardware errata
+ - ti,composite-gate-clock # composite gate clock, to be part of
+ # composite clock
+ - ti,composite-no-wait-gate-clock # composite gate clock that does not
+ # wait for clock to be active before
+ # returning from clk_enable()
+ "#clock-cells":
+ const: 0
+
+ clocks: true
+
+ clock-output-names:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ ti,bit-shift:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Number of bits to shift the bit-mask
+ maximum: 31
+ default: 0
+
+ ti,set-bit-to-disable:
+ type: boolean
+ description:
+ Inverts default gate programming. Setting the bit
+ gates the clock and clearing the bit ungates the clock.
+
+ ti,set-rate-parent:
+ type: boolean
+ description:
+ clk_set_rate is propagated to parent clock,
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: ti,clkdm-gate-clock
+then:
+ properties:
+ reg: false
+ required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+else:
+ required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@a00 {
+ #clock-cells = <0>;
+ compatible = "ti,gate-clock";
+ clocks = <&core_96m_fck>;
+ reg = <0x0a00>;
+ ti,bit-shift = <25>;
+ };
+
+ clock-controller@d00 {
+ compatible = "ti,hsdiv-gate-clock";
+ reg = <0x0d00>;
+ #clock-cells = <0>;
+ clocks = <&dpll4_m2x2_mul_ck>;
+ ti,bit-shift = <0x1b>;
+ ti,set-bit-to-disable;
+ };
+ };
+
+ - |
+ clock-controller {
+ #clock-cells = <0>;
+ compatible = "ti,clkdm-gate-clock";
+ clocks = <&emu_src_mux_ck>;
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,interface-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,interface-clock.yaml
new file mode 100644
index 000000000000..1eaf95d88e0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,interface-clock.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,interface-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments interface clock.
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+
+description: |
+ This clock is quite much similar to the basic gate-clock[1], however,
+ it supports a number of additional features, including
+ companion clock finding (match corresponding functional gate
+ clock) and hardware autoidle enable / disable.
+
+ [1] Documentation/devicetree/bindings/clock/gpio-gate-clock.yaml
+
+properties:
+ compatible:
+ enum:
+ - ti,omap3-interface-clock # basic OMAP3 interface clock
+ - ti,omap3-no-wait-interface-clock # interface clock which has no hardware
+ # capability for waiting clock to be ready
+ - ti,omap3-hsotgusb-interface-clock # interface clock with USB specific HW handling
+ - ti,omap3-dss-interface-clock # interface clock with DSS specific HW handling
+ - ti,omap3-ssi-interface-clock # interface clock with SSI specific HW handling
+ - ti,am35xx-interface-clock # interface clock with AM35xx specific HW handling
+ - ti,omap2430-interface-clock # interface clock with OMAP2430 specific HW handling
+
+ "#clock-cells":
+ const: 0
+
+ clocks:
+ maxItems: 1
+
+ clock-output-names:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ ti,bit-shift:
+ description:
+ bit shift for the bit enabling/disabling the clock
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+ maximum: 31
+
+required:
+ - compatible
+ - clocks
+ - '#clock-cells'
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ aes1_ick: clock-controller@3 {
+ #clock-cells = <0>;
+ compatible = "ti,omap3-interface-clock";
+ clocks = <&security_l4_ick2>;
+ reg = <3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml b/Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml
new file mode 100644
index 000000000000..485b6aae85d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/ti/ti,mux-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments mux clock
+
+maintainers:
+ - Tero Kristo <kristo@kernel.org>
+
+description: |
+ This clock assumes a register-mapped multiplexer with multiple inpt clock
+ signals or parents, one of which can be selected as output. This clock does
+ not gate or adjust the parent rate via a divider or multiplier.
+
+ By default the "clocks" property lists the parents in the same order
+ as they are programmed into the register. E.g:
+
+ clocks = <&foo_clock>, <&bar_clock>, <&baz_clock>;
+
+ Results in programming the register as follows:
+
+ register value selected parent clock
+ 0 foo_clock
+ 1 bar_clock
+ 2 baz_clock
+
+ Some clock controller IPs do not allow a value of zero to be programmed
+ into the register, instead indexing begins at 1. The optional property
+ "index-starts-at-one" modified the scheme as follows:
+
+ register value selected clock parent
+ 1 foo_clock
+ 2 bar_clock
+ 3 baz_clock
+
+ The binding must provide the register to control the mux. Optionally
+ the number of bits to shift the control field in the register can be
+ supplied. If the shift value is missing it is the same as supplying
+ a zero shift.
+
+properties:
+ compatible:
+ enum:
+ - ti,mux-clock
+ - ti,composite-mux-clock
+
+ "#clock-cells":
+ const: 0
+
+ clocks: true
+
+ clock-output-names:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ ti,bit-shift:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Number of bits to shift the bit-mask
+ maximum: 31
+ default: 0
+
+ ti,index-starts-at-one:
+ type: boolean
+ description:
+ Valid input select programming starts at 1, not zero
+
+ ti,set-rate-parent:
+ type: boolean
+ description:
+ clk_set_rate is propagated to parent clock,
+ not supported by the composite-mux-clock subtype.
+
+ ti,latch-bit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Latch the mux value to HW, only needed if the register
+ access requires this. As an example, dra7x DPLL_GMAC H14 muxing
+ implements such behavior.
+ maximum: 31
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: ti,composite-mux-clock
+then:
+ properties:
+ ti,set-rate-parent: false
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clock-controller@110 {
+ compatible = "ti,mux-clock";
+ reg = <0x0110>;
+ #clock-cells = <0>;
+ clocks = <&virt_12000000_ck>, <&virt_13000000_ck>, <&virt_16800000_ck>;
+ ti,index-starts-at-one;
+ ti,set-rate-parent;
+ };
+
+ clock-controller@120 {
+ compatible = "ti,composite-mux-clock";
+ reg = <0x0120>;
+ #clock-cells = <0>;
+ clocks = <&core_96m_fck>, <&mcbsp_clks>;
+ ti,bit-shift = <4>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml b/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml
new file mode 100644
index 000000000000..d36558aa39f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/toshiba,tmpv770x-pipllct.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba Visconti5 TMPV770X PLL Controller
+
+maintainers:
+ - Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+
+description:
+ Toshia Visconti5 PLL controller which supports the PLLs on TMPV770X.
+
+properties:
+ compatible:
+ const: toshiba,tmpv7708-pipllct
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ clocks:
+ description: External reference clock (OSC2)
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+
+ osc2_clk: osc2-clk {
+ compatible = "fixed-clock";
+ clock-frequency = <20000000>;
+ #clock-cells = <0>;
+ };
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pipllct: clock-controller@24220000 {
+ compatible = "toshiba,tmpv7708-pipllct";
+ reg = <0 0x24220000 0 0x820>;
+ #clock-cells = <1>;
+ clocks = <&osc2_clk>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml b/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml
new file mode 100644
index 000000000000..081f85b1eb88
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/toshiba,tmpv770x-pismu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba Visconti5 TMPV770x SMU controller
+
+maintainers:
+ - Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+
+description:
+ Toshia Visconti5 SMU (System Management Unit) which supports the clock
+ and resets on TMPV770x.
+
+properties:
+ compatible:
+ items:
+ - const: toshiba,tmpv7708-pismu
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pismu: syscon@24200000 {
+ compatible = "toshiba,tmpv7708-pismu", "syscon";
+ reg = <0 0x24200000 0 0x2140>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/uniphier-clock.txt b/Documentation/devicetree/bindings/clock/uniphier-clock.txt
deleted file mode 100644
index 7b5f602765fe..000000000000
--- a/Documentation/devicetree/bindings/clock/uniphier-clock.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-UniPhier clock controller
-
-
-System clock
-------------
-
-Required properties:
-- compatible: should be one of the following:
- "socionext,uniphier-ld4-clock" - for LD4 SoC.
- "socionext,uniphier-pro4-clock" - for Pro4 SoC.
- "socionext,uniphier-sld8-clock" - for sLD8 SoC.
- "socionext,uniphier-pro5-clock" - for Pro5 SoC.
- "socionext,uniphier-pxs2-clock" - for PXs2/LD6b SoC.
- "socionext,uniphier-ld11-clock" - for LD11 SoC.
- "socionext,uniphier-ld20-clock" - for LD20 SoC.
- "socionext,uniphier-pxs3-clock" - for PXs3 SoC
-- #clock-cells: should be 1.
-
-Example:
-
- sysctrl@61840000 {
- compatible = "socionext,uniphier-sysctrl",
- "simple-mfd", "syscon";
- reg = <0x61840000 0x4000>;
-
- clock {
- compatible = "socionext,uniphier-ld11-clock";
- #clock-cells = <1>;
- };
-
- other nodes ...
- };
-
-Provided clocks:
-
- 8: ST DMAC
-12: GIO (Giga bit stream I/O)
-14: USB3 ch0 host
-15: USB3 ch1 host
-16: USB3 ch0 PHY0
-17: USB3 ch0 PHY1
-20: USB3 ch1 PHY0
-21: USB3 ch1 PHY1
-
-
-Media I/O (MIO) clock, SD clock
--------------------------------
-
-Required properties:
-- compatible: should be one of the following:
- "socionext,uniphier-ld4-mio-clock" - for LD4 SoC.
- "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC.
- "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC.
- "socionext,uniphier-pro5-sd-clock" - for Pro5 SoC.
- "socionext,uniphier-pxs2-sd-clock" - for PXs2/LD6b SoC.
- "socionext,uniphier-ld11-mio-clock" - for LD11 SoC.
- "socionext,uniphier-ld20-sd-clock" - for LD20 SoC.
- "socionext,uniphier-pxs3-sd-clock" - for PXs3 SoC
-- #clock-cells: should be 1.
-
-Example:
-
- mioctrl@59810000 {
- compatible = "socionext,uniphier-mioctrl",
- "simple-mfd", "syscon";
- reg = <0x59810000 0x800>;
-
- clock {
- compatible = "socionext,uniphier-ld11-mio-clock";
- #clock-cells = <1>;
- };
-
- other nodes ...
- };
-
-Provided clocks:
-
- 0: SD ch0 host
- 1: eMMC host
- 2: SD ch1 host
- 7: MIO DMAC
- 8: USB2 ch0 host
- 9: USB2 ch1 host
-10: USB2 ch2 host
-12: USB2 ch0 PHY
-13: USB2 ch1 PHY
-14: USB2 ch2 PHY
-
-
-Peripheral clock
-----------------
-
-Required properties:
-- compatible: should be one of the following:
- "socionext,uniphier-ld4-peri-clock" - for LD4 SoC.
- "socionext,uniphier-pro4-peri-clock" - for Pro4 SoC.
- "socionext,uniphier-sld8-peri-clock" - for sLD8 SoC.
- "socionext,uniphier-pro5-peri-clock" - for Pro5 SoC.
- "socionext,uniphier-pxs2-peri-clock" - for PXs2/LD6b SoC.
- "socionext,uniphier-ld11-peri-clock" - for LD11 SoC.
- "socionext,uniphier-ld20-peri-clock" - for LD20 SoC.
- "socionext,uniphier-pxs3-peri-clock" - for PXs3 SoC
-- #clock-cells: should be 1.
-
-Example:
-
- perictrl@59820000 {
- compatible = "socionext,uniphier-perictrl",
- "simple-mfd", "syscon";
- reg = <0x59820000 0x200>;
-
- clock {
- compatible = "socionext,uniphier-ld11-peri-clock";
- #clock-cells = <1>;
- };
-
- other nodes ...
- };
-
-Provided clocks:
-
- 0: UART ch0
- 1: UART ch1
- 2: UART ch2
- 3: UART ch3
- 4: I2C ch0
- 5: I2C ch1
- 6: I2C ch2
- 7: I2C ch3
- 8: I2C ch4
- 9: I2C ch5
-10: I2C ch6
diff --git a/Documentation/devicetree/bindings/clock/ux500.txt b/Documentation/devicetree/bindings/clock/ux500.txt
deleted file mode 100644
index e52bd4b72348..000000000000
--- a/Documentation/devicetree/bindings/clock/ux500.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-Clock bindings for ST-Ericsson Ux500 clocks
-
-Required properties :
-- compatible : shall contain only one of the following:
- "stericsson,u8500-clks"
- "stericsson,u8540-clks"
- "stericsson,u9540-clks"
-- reg : shall contain base register location and length for
- CLKRST1, 2, 3, 5, and 6 in an array. Note the absence of
- CLKRST4, which does not exist.
-
-Required subnodes:
-- prcmu-clock: a subnode with one clock cell for PRCMU (power,
- reset, control unit) clocks. The cell indicates which PRCMU
- clock in the prcmu-clock node the consumer wants to use.
-- prcc-periph-clock: a subnode with two clock cells for
- PRCC (programmable reset- and clock controller) peripheral clocks.
- The first cell indicates which PRCC block the consumer
- wants to use, possible values are 1, 2, 3, 5, 6. The second
- cell indicates which clock inside the PRCC block it wants,
- possible values are 0 thru 31.
-- prcc-kernel-clock: a subnode with two clock cells for
- PRCC (programmable reset- and clock controller) kernel clocks
- The first cell indicates which PRCC block the consumer
- wants to use, possible values are 1, 2, 3, 5, 6. The second
- cell indicates which clock inside the PRCC block it wants,
- possible values are 0 thru 31.
-- rtc32k-clock: a subnode with zero clock cells for the 32kHz
- RTC clock.
-- smp-twd-clock: a subnode for the ARM SMP Timer Watchdog cluster
- with zero clock cells.
-
-Example:
-
-clocks {
- compatible = "stericsson,u8500-clks";
- /*
- * Registers for the CLKRST block on peripheral
- * groups 1, 2, 3, 5, 6,
- */
- reg = <0x8012f000 0x1000>, <0x8011f000 0x1000>,
- <0x8000f000 0x1000>, <0xa03ff000 0x1000>,
- <0xa03cf000 0x1000>;
-
- prcmu_clk: prcmu-clock {
- #clock-cells = <1>;
- };
-
- prcc_pclk: prcc-periph-clock {
- #clock-cells = <2>;
- };
-
- prcc_kclk: prcc-kernel-clock {
- #clock-cells = <2>;
- };
-
- rtc_clk: rtc32k-clock {
- #clock-cells = <0>;
- };
-
- smp_twd_clk: smp-twd-clock {
- #clock-cells = <0>;
- };
-};
diff --git a/Documentation/devicetree/bindings/clock/vf610-clock.txt b/Documentation/devicetree/bindings/clock/vf610-clock.txt
deleted file mode 100644
index 63f9f1ac3439..000000000000
--- a/Documentation/devicetree/bindings/clock/vf610-clock.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-* Clock bindings for Freescale Vybrid VF610 SOC
-
-Required properties:
-- compatible: Should be "fsl,vf610-ccm"
-- reg: Address and length of the register set
-- #clock-cells: Should be <1>
-
-Optional properties:
-- clocks: list of clock identifiers which are external input clocks to the
- given clock controller. Please refer the next section to find
- the input clocks for a given controller.
-- clock-names: list of names of clocks which are exteral input clocks to the
- given clock controller.
-
-Input clocks for top clock controller:
- - sxosc (external crystal oscillator 32KHz, recommended)
- - fxosc (external crystal oscillator 24MHz, recommended)
- - audio_ext
- - enet_ext
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/vf610-clock.h
-for the full list of VF610 clock IDs.
-
-Examples:
-
-clks: ccm@4006b000 {
- compatible = "fsl,vf610-ccm";
- reg = <0x4006b000 0x1000>;
- #clock-cells = <1>;
- clocks = <&sxosc>, <&fxosc>;
- clock-names = "sxosc", "fxosc";
-};
-
-uart1: serial@40028000 {
- compatible = "fsl,vf610-uart";
- reg = <0x40028000 0x1000>;
- interrupts = <0 62 0x04>;
- clocks = <&clks VF610_CLK_UART1>;
- clock-names = "ipg";
-};
diff --git a/Documentation/devicetree/bindings/clock/xgene.txt b/Documentation/devicetree/bindings/clock/xgene.txt
deleted file mode 100644
index 8233e771711b..000000000000
--- a/Documentation/devicetree/bindings/clock/xgene.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-Device Tree Clock bindings for APM X-Gene
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "apm,xgene-socpll-clock" - for a X-Gene SoC PLL clock
- "apm,xgene-pcppll-clock" - for a X-Gene PCP PLL clock
- "apm,xgene-pmd-clock" - for a X-Gene PMD clock
- "apm,xgene-device-clock" - for a X-Gene device clock
- "apm,xgene-socpll-v2-clock" - for a X-Gene SoC PLL v2 clock
- "apm,xgene-pcppll-v2-clock" - for a X-Gene PCP PLL v2 clock
-
-Required properties for SoC or PCP PLL clocks:
-- reg : shall be the physical PLL register address for the pll clock.
-- clocks : shall be the input parent clock phandle for the clock. This should
- be the reference clock.
-- #clock-cells : shall be set to 1.
-- clock-output-names : shall be the name of the PLL referenced by derive
- clock.
-Optional properties for PLL clocks:
-- clock-names : shall be the name of the PLL. If missing, use the device name.
-
-Required properties for PMD clocks:
-- reg : shall be the physical register address for the pmd clock.
-- clocks : shall be the input parent clock phandle for the clock.
-- #clock-cells : shall be set to 1.
-- clock-output-names : shall be the name of the clock referenced by derive
- clock.
-Optional properties for PLL clocks:
-- clock-names : shall be the name of the clock. If missing, use the device name.
-
-Required properties for device clocks:
-- reg : shall be a list of address and length pairs describing the CSR
- reset and/or the divider. Either may be omitted, but at least
- one must be present.
- - reg-names : shall be a string list describing the reg resource. This
- may include "csr-reg" and/or "div-reg". If this property
- is not present, the reg property is assumed to describe
- only "csr-reg".
-- clocks : shall be the input parent clock phandle for the clock.
-- #clock-cells : shall be set to 1.
-- clock-output-names : shall be the name of the device referenced.
-Optional properties for device clocks:
-- clock-names : shall be the name of the device clock. If missing, use the
- device name.
-- csr-offset : Offset to the CSR reset register from the reset address base.
- Default is 0.
-- csr-mask : CSR reset mask bit. Default is 0xF.
-- enable-offset : Offset to the enable register from the reset address base.
- Default is 0x8.
-- enable-mask : CSR enable mask bit. Default is 0xF.
-- divider-offset : Offset to the divider CSR register from the divider base.
- Default is 0x0.
-- divider-width : Width of the divider register. Default is 0.
-- divider-shift : Bit shift of the divider register. Default is 0.
-
-For example:
-
- pcppll: pcppll@17000100 {
- compatible = "apm,xgene-pcppll-clock";
- #clock-cells = <1>;
- clocks = <&refclk 0>;
- clock-names = "pcppll";
- reg = <0x0 0x17000100 0x0 0x1000>;
- clock-output-names = "pcppll";
- type = <0>;
- };
-
- pmd0clk: pmd0clk@7e200200 {
- compatible = "apm,xgene-pmd-clock";
- #clock-cells = <1>;
- clocks = <&pmdpll 0>;
- reg = <0x0 0x7e200200 0x0 0x10>;
- clock-output-names = "pmd0clk";
- };
-
- socpll: socpll@17000120 {
- compatible = "apm,xgene-socpll-clock";
- #clock-cells = <1>;
- clocks = <&refclk 0>;
- clock-names = "socpll";
- reg = <0x0 0x17000120 0x0 0x1000>;
- clock-output-names = "socpll";
- type = <1>;
- };
-
- qmlclk: qmlclk {
- compatible = "apm,xgene-device-clock";
- #clock-cells = <1>;
- clocks = <&socplldiv2 0>;
- clock-names = "qmlclk";
- reg = <0x0 0x1703C000 0x0 0x1000>;
- reg-name = "csr-reg";
- clock-output-names = "qmlclk";
- };
-
- ethclk: ethclk {
- compatible = "apm,xgene-device-clock";
- #clock-cells = <1>;
- clocks = <&socplldiv2 0>;
- clock-names = "ethclk";
- reg = <0x0 0x17000000 0x0 0x1000>;
- reg-names = "div-reg";
- divider-offset = <0x238>;
- divider-width = <0x9>;
- divider-shift = <0x0>;
- clock-output-names = "ethclk";
- };
-
- apbclk: apbclk {
- compatible = "apm,xgene-device-clock";
- #clock-cells = <1>;
- clocks = <&ahbclk 0>;
- clock-names = "apbclk";
- reg = <0x0 0x1F2AC000 0x0 0x1000
- 0x0 0x1F2AC000 0x0 0x1000>;
- reg-names = "csr-reg", "div-reg";
- csr-offset = <0x0>;
- csr-mask = <0x200>;
- enable-offset = <0x8>;
- enable-mask = <0x200>;
- divider-offset = <0x10>;
- divider-width = <0x2>;
- divider-shift = <0x0>;
- flags = <0x8>;
- clock-output-names = "apbclk";
- };
-
diff --git a/Documentation/devicetree/bindings/clock/xlnx,clocking-wizard.yaml b/Documentation/devicetree/bindings/clock/xlnx,clocking-wizard.yaml
new file mode 100644
index 000000000000..b44a76a958f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/xlnx,clocking-wizard.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/xlnx,clocking-wizard.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx clocking wizard
+
+maintainers:
+ - Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
+
+description:
+ The clocking wizard is a soft ip clocking block of Xilinx versal. It
+ reads required input clock frequencies from the devicetree and acts as clock
+ clock output.
+
+properties:
+ compatible:
+ enum:
+ - xlnx,clocking-wizard
+ - xlnx,clocking-wizard-v5.2
+ - xlnx,clocking-wizard-v6.0
+ - xlnx,versal-clk-wizard
+
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ items:
+ - description: clock input
+ - description: axi clock
+
+ clock-names:
+ items:
+ - const: clk_in1
+ - const: s_axi_aclk
+
+ xlnx,static-config:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicate whether the core has been configured without support for dynamic
+ runtime reconfguration of the clocking primitive MMCM/PLL.
+
+ xlnx,speed-grade:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 3]
+ description:
+ Speed grade of the device. Higher the speed grade faster is the FPGA device.
+
+ xlnx,nr-outputs:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 8
+ description:
+ Number of outputs.
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - clocks
+ - clock-names
+ - xlnx,speed-grade
+ - xlnx,nr-outputs
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@b0000000 {
+ compatible = "xlnx,clocking-wizard";
+ reg = <0xb0000000 0x10000>;
+ #clock-cells = <1>;
+ xlnx,static-config;
+ xlnx,speed-grade = <1>;
+ xlnx,nr-outputs = <6>;
+ clock-names = "clk_in1", "s_axi_aclk";
+ clocks = <&clkc 15>, <&clkc 15>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/xlnx,vcu.yaml b/Documentation/devicetree/bindings/clock/xlnx,vcu.yaml
new file mode 100644
index 000000000000..19dc923e2ee9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/xlnx,vcu.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/xlnx,vcu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+title: LogicoreIP designed compatible with Xilinx ZYNQ family.
+
+maintainers:
+ - Rohit Visavalia <rohit.visavalia@amd.com>
+
+description:
+ LogicoreIP design to provide the isolation between processing system
+ and programmable logic. Also provides the list of register set to configure
+ the frequency.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - xlnx,vcu
+ - xlnx,vcu-logicoreip-1.0
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: pll ref clocksource
+ - description: aclk
+
+ clock-names:
+ items:
+ - const: pll_ref
+ - const: aclk
+
+ reset-gpios:
+ maxItems: 1
+
+required:
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ fpga {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ xlnx_vcu: vcu@a0040000 {
+ compatible = "xlnx,vcu-logicoreip-1.0";
+ reg = <0x0 0xa0040000 0x0 0x1000>;
+ reset-gpios = <&gpio 78 GPIO_ACTIVE_HIGH>;
+ clocks = <&si570_1>, <&clkc 71>;
+ clock-names = "pll_ref", "aclk";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
new file mode 100644
index 000000000000..bef109d163a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/xlnx,versal-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Versal clock controller
+
+maintainers:
+ - Michal Simek <michal.simek@amd.com>
+
+description: |
+ The clock controller is a hardware block of Xilinx versal clock tree. It
+ reads required input clock frequencies from the devicetree and acts as clock
+ provider for all clock consumers of PS clocks.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - xlnx,versal-clk
+ - xlnx,zynqmp-clk
+ - items:
+ - enum:
+ - xlnx,versal-net-clk
+ - const: xlnx,versal-clk
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ description: List of clock specifiers which are external input
+ clocks to the given clock controller.
+ minItems: 2
+ maxItems: 8
+
+ clock-names:
+ minItems: 2
+ maxItems: 8
+
+required:
+ - compatible
+ - "#clock-cells"
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - xlnx,versal-clk
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: reference clock
+ - description: alternate reference clock for programmable logic
+
+ clock-names:
+ items:
+ - const: ref
+ - const: pl_alt_ref
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - xlnx,versal-net-clk
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: reference clock
+ - description: alternate reference clock for programmable logic
+ - description: alternate reference clock
+
+ clock-names:
+ items:
+ - const: ref
+ - const: pl_alt_ref
+ - const: alt_ref
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - xlnx,zynqmp-clk
+
+ then:
+ properties:
+ clocks:
+ minItems: 5
+ items:
+ - description: PS reference clock
+ - description: reference clock for video system
+ - description: alternative PS reference clock
+ - description: auxiliary reference clock
+ - description: transceiver reference clock
+ - description: (E)MIO clock source (Optional clock)
+ - description: GEM emio clock (Optional clock)
+ - description: Watchdog external clock (Optional clock)
+
+ clock-names:
+ minItems: 5
+ items:
+ - const: pss_ref_clk
+ - const: video_clk
+ - const: pss_alt_ref_clk
+ - const: aux_ref_clk
+ - const: gt_crx_ref_clk
+ - pattern: "^mio_clk[00-77]+.*$"
+ - pattern: "gem[0-3]+_emio_clk.*$"
+ - pattern: "swdt[0-1]+_ext_clk.*$"
+
+examples:
+ - |
+ firmware {
+ zynqmp_firmware: zynqmp-firmware {
+ compatible = "xlnx,zynqmp-firmware";
+ method = "smc";
+ versal_clk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "xlnx,versal-clk";
+ clocks = <&ref>, <&pl_alt_ref>;
+ clock-names = "ref", "pl_alt_ref";
+ };
+ };
+ };
+
+ clock-controller {
+ #clock-cells = <1>;
+ compatible = "xlnx,zynqmp-clk";
+ clocks = <&pss_ref_clk>, <&video_clk>, <&pss_alt_ref_clk>,
+ <&aux_ref_clk>, <&gt_crx_ref_clk>;
+ clock-names = "pss_ref_clk", "video_clk", "pss_alt_ref_clk",
+ "aux_ref_clk", "gt_crx_ref_clk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/zx296702-clk.txt b/Documentation/devicetree/bindings/clock/zx296702-clk.txt
deleted file mode 100644
index 5c91c9e4f1be..000000000000
--- a/Documentation/devicetree/bindings/clock/zx296702-clk.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Device Tree Clock bindings for ZTE zx296702
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "zte,zx296702-topcrm-clk":
- zx296702 top clock selection, divider and gating
-
- "zte,zx296702-lsp0crpm-clk" and
- "zte,zx296702-lsp1crpm-clk":
- zx296702 device level clock selection and gating
-
-- reg: Address and length of the register set
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/zx296702-clock.h
-for the full list of zx296702 clock IDs.
-
-
-topclk: topcrm@09800000 {
- compatible = "zte,zx296702-topcrm-clk";
- reg = <0x09800000 0x1000>;
- #clock-cells = <1>;
-};
-
-uart0: serial@09405000 {
- compatible = "zte,zx296702-uart";
- reg = <0x09405000 0x1000>;
- interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&lsp1clk ZX296702_UART0_PCLK>;
-};
diff --git a/Documentation/devicetree/bindings/clock/zx296718-clk.txt b/Documentation/devicetree/bindings/clock/zx296718-clk.txt
deleted file mode 100644
index 3a46bf0b2540..000000000000
--- a/Documentation/devicetree/bindings/clock/zx296718-clk.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Device Tree Clock bindings for ZTE zx296718
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be one of the following:
- "zte,zx296718-topcrm":
- zx296718 top clock selection, divider and gating
-
- "zte,zx296718-lsp0crm" and
- "zte,zx296718-lsp1crm":
- zx296718 device level clock selection and gating
-
- "zte,zx296718-audiocrm":
- zx296718 audio clock selection, divider and gating
-
-- reg: Address and length of the register set
-
-The clock consumer should specify the desired clock by having the clock
-ID in its "clocks" phandle cell. See include/dt-bindings/clock/zx296718-clock.h
-for the full list of zx296718 clock IDs.
-
-
-topclk: topcrm@1461000 {
- compatible = "zte,zx296718-topcrm-clk";
- reg = <0x01461000 0x1000>;
- #clock-cells = <1>;
-};
-
-usbphy0:usb-phy0 {
- compatible = "zte,zx296718-usb-phy";
- #phy-cells = <0>;
- clocks = <&topclk USB20_PHY_CLK>;
- clock-names = "phyclk";
-};
diff --git a/Documentation/devicetree/bindings/common-properties.txt b/Documentation/devicetree/bindings/common-properties.txt
index a3448bfa1c82..98a28130e100 100644
--- a/Documentation/devicetree/bindings/common-properties.txt
+++ b/Documentation/devicetree/bindings/common-properties.txt
@@ -5,30 +5,29 @@ Endianness
----------
The Devicetree Specification does not define any properties related to hardware
-byteswapping, but endianness issues show up frequently in porting Linux to
+byte swapping, but endianness issues show up frequently in porting drivers to
different machine types. This document attempts to provide a consistent
-way of handling byteswapping across drivers.
+way of handling byte swapping across drivers.
Optional properties:
- big-endian: Boolean; force big endian register accesses
unconditionally (e.g. ioread32be/iowrite32be). Use this if you
- know the peripheral always needs to be accessed in BE mode.
+ know the peripheral always needs to be accessed in big endian (BE) mode.
- little-endian: Boolean; force little endian register accesses
unconditionally (e.g. readl/writel). Use this if you know the
- peripheral always needs to be accessed in LE mode.
+ peripheral always needs to be accessed in little endian (LE) mode.
- native-endian: Boolean; always use register accesses matched to the
endianness of the kernel binary (e.g. LE vmlinux -> readl/writel,
- BE vmlinux -> ioread32be/iowrite32be). In this case no byteswaps
+ BE vmlinux -> ioread32be/iowrite32be). In this case no byte swaps
will ever be performed. Use this if the hardware "self-adjusts"
register endianness based on the CPU's configured endianness.
If a binding supports these properties, then the binding should also
specify the default behavior if none of these properties are present.
In such cases, little-endian is the preferred default, but it is not
-a requirement. The of_device_is_big_endian() and of_fdt_is_big_endian()
-helper functions do assume that little-endian is the default, because
-most existing (PCI-based) drivers implicitly default to LE by using
-readl/writel for MMIO accesses.
+a requirement. Some implementations assume that little-endian is
+the default, because most existing (PCI-based) drivers implicitly
+default to LE for their MMIO accesses.
Examples:
Scenario 1 : CPU in LE mode & device in LE mode.
diff --git a/Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml b/Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml
new file mode 100644
index 000000000000..a16ae2762d16
--- /dev/null
+++ b/Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/connector/gocontroll,moduline-module-slot.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GOcontroll Moduline Module slot
+
+maintainers:
+ - Maud Spierings <maudspierings@gocontroll.com>
+
+description:
+ The GOcontroll Moduline module slot represents a connector that fullfills the
+ Moduline slot specification, and can thus house any IO module that is also
+ built to this spec.
+
+properties:
+ compatible:
+ const: gocontroll,moduline-module-slot
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: indicates readiness, high means busy.
+ maxItems: 1
+ reset-gpios:
+ description: resets the module, active low.
+ maxItems: 1
+ sync-gpios:
+ description: sync line between all module slots.
+ maxItems: 1
+
+ vdd-supply:
+ description: low power 3v3 supply generally for the microcontroller.
+ vddp-supply:
+ description: medium power 5v0 supply for on module low power peripherals.
+ vddhpp-supply:
+ description: high power 6v-8v supply for on module high power peripherals.
+ power-supply:
+ description: high power 6v-30v supply for high power module circuits.
+
+ i2c-bus:
+ description: i2c bus shared between module slots and the SoC
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ slot-number:
+ description:
+ The number of the module slot representing the location of on the pcb.
+ This enables access to the modules based on slot location.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ spi-max-frequency: true
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - interrupts
+ - sync-gpios
+ - i2c-bus
+ - slot-number
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ connector@0 {
+ reg = <0>;
+ compatible = "gocontroll,moduline-module-slot";
+ reset-gpios = <&gpio5 10 GPIO_ACTIVE_LOW>;
+ sync-gpios = <&gpio4 16 GPIO_ACTIVE_HIGH>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+ vdd-supply = <&reg_3v3_per>;
+ vddp-supply = <&reg_5v0>;
+ vddhpp-supply = <&reg_6v4>;
+ i2c-bus = <&i2c2>;
+ slot-number = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/connector/samsung,usb-connector-11pin.txt b/Documentation/devicetree/bindings/connector/samsung,usb-connector-11pin.txt
deleted file mode 100644
index 22256e295a7a..000000000000
--- a/Documentation/devicetree/bindings/connector/samsung,usb-connector-11pin.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Samsung micro-USB 11-pin connector
-==================================
-
-Samsung micro-USB 11-pin connector is an extension of micro-USB connector.
-It is present in multiple Samsung mobile devices.
-It has additional pins to route MHL traffic simultanously with USB.
-
-The bindings are superset of usb-connector bindings for micro-USB connector[1].
-
-Required properties:
-- compatible: must be: "samsung,usb-connector-11pin", "usb-b-connector",
-- type: must be "micro".
-
-Required nodes:
-- any data bus to the connector should be modeled using the OF graph bindings
- specified in bindings/graph.txt, unless the bus is between parent node and
- the connector. Since single connector can have multpile data buses every bus
- has assigned OF graph port number as follows:
- 0: High Speed (HS),
- 3: Mobile High-Definition Link (MHL), specific to 11-pin Samsung micro-USB.
-
-[1]: bindings/connector/usb-connector.txt
-
-Example
--------
-
-Micro-USB connector with HS lines routed via controller (MUIC) and MHL lines
-connected to HDMI-MHL bridge (sii8620):
-
-muic-max77843@66 {
- ...
- usb_con: connector {
- compatible = "samsung,usb-connector-11pin", "usb-b-connector";
- label = "micro-USB";
- type = "micro";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@3 {
- reg = <3>;
- usb_con_mhl: endpoint {
- remote-endpoint = <&sii8620_mhl>;
- };
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.txt b/Documentation/devicetree/bindings/connector/usb-connector.txt
deleted file mode 100644
index d90e17e2428b..000000000000
--- a/Documentation/devicetree/bindings/connector/usb-connector.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-USB Connector
-=============
-
-USB connector node represents physical USB connector. It should be
-a child of USB interface controller.
-
-Required properties:
-- compatible: describes type of the connector, must be one of:
- "usb-a-connector",
- "usb-b-connector",
- "usb-c-connector".
-
-Optional properties:
-- label: symbolic name for the connector,
-- type: size of the connector, should be specified in case of USB-A, USB-B
- non-fullsize connectors: "mini", "micro".
-
-Optional properties for usb-c-connector:
-- power-role: should be one of "source", "sink" or "dual"(DRP) if typec
- connector has power support.
-- try-power-role: preferred power role if "dual"(DRP) can support Try.SNK
- or Try.SRC, should be "sink" for Try.SNK or "source" for Try.SRC.
-- data-role: should be one of "host", "device", "dual"(DRD) if typec
- connector supports USB data.
-
-Required properties for usb-c-connector with power delivery support:
-- source-pdos: An array of u32 with each entry providing supported power
- source data object(PDO), the detailed bit definitions of PDO can be found
- in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.2
- Source_Capabilities Message, the order of each entry(PDO) should follow
- the PD spec chapter 6.4.1. Required for power source and power dual role.
- User can specify the source PDO array via PDO_FIXED/BATT/VAR/PPS_APDO()
- defined in dt-bindings/usb/pd.h.
-- sink-pdos: An array of u32 with each entry providing supported power
- sink data object(PDO), the detailed bit definitions of PDO can be found
- in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.3
- Sink Capabilities Message, the order of each entry(PDO) should follow
- the PD spec chapter 6.4.1. Required for power sink and power dual role.
- User can specify the sink PDO array via PDO_FIXED/BATT/VAR/PPS_APDO() defined
- in dt-bindings/usb/pd.h.
-- op-sink-microwatt: Sink required operating power in microwatt, if source
- can't offer the power, Capability Mismatch is set. Required for power
- sink and power dual role.
-
-Required nodes:
-- any data bus to the connector should be modeled using the OF graph bindings
- specified in bindings/graph.txt, unless the bus is between parent node and
- the connector. Since single connector can have multpile data buses every bus
- has assigned OF graph port number as follows:
- 0: High Speed (HS), present in all connectors,
- 1: Super Speed (SS), present in SS capable connectors,
- 2: Sideband use (SBU), present in USB-C.
-
-Examples
---------
-
-1. Micro-USB connector with HS lines routed via controller (MUIC):
-
-muic-max77843@66 {
- ...
- usb_con: connector {
- compatible = "usb-b-connector";
- label = "micro-USB";
- type = "micro";
- };
-};
-
-2. USB-C connector attached to CC controller (s2mm005), HS lines routed
-to companion PMIC (max77865), SS lines to USB3 PHY and SBU to DisplayPort.
-DisplayPort video lines are routed to the connector via SS mux in USB3 PHY.
-
-ccic: s2mm005@33 {
- ...
- usb_con: connector {
- compatible = "usb-c-connector";
- label = "USB-C";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- usb_con_hs: endpoint {
- remote-endpoint = <&max77865_usbc_hs>;
- };
- };
- port@1 {
- reg = <1>;
- usb_con_ss: endpoint {
- remote-endpoint = <&usbdrd_phy_ss>;
- };
- };
- port@2 {
- reg = <2>;
- usb_con_sbu: endpoint {
- remote-endpoint = <&dp_aux>;
- };
- };
- };
- };
-};
-
-3. USB-C connector attached to a typec port controller(ptn5110), which has
-power delivery support and enables drp.
-
-typec: ptn5110@50 {
- ...
- usb_con: connector {
- compatible = "usb-c-connector";
- label = "USB-C";
- power-role = "dual";
- try-power-role = "sink";
- source-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)>;
- sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
- PDO_VAR(5000, 12000, 2000)>;
- op-sink-microwatt = <10000000>;
- };
-};
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml
new file mode 100644
index 000000000000..11e40d225b9f
--- /dev/null
+++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml
@@ -0,0 +1,531 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/connector/usb-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: USB Connector
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+description:
+ A USB connector node represents a physical USB connector. It should be a child
+ of a USB interface controller or a separate node when it is attached to both
+ MUX and USB interface controller.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - usb-a-connector
+ - usb-b-connector
+ - usb-c-connector
+
+ - items:
+ - const: gpio-usb-b-connector
+ - const: usb-b-connector
+
+ - items:
+ - const: samsung,usb-connector-11pin
+ - const: usb-b-connector
+
+ reg:
+ maxItems: 1
+
+ label:
+ description: Symbolic name for the connector.
+
+ type:
+ description: Size of the connector, should be specified in case of
+ non-fullsize 'usb-a-connector' or 'usb-b-connector' compatible
+ connectors.
+ $ref: /schemas/types.yaml#/definitions/string
+
+ enum:
+ - mini
+ - micro
+
+ self-powered:
+ description: Set this property if the USB device has its own power source.
+ type: boolean
+
+ # The following are optional properties for "usb-b-connector".
+ id-gpios:
+ description: An input gpio for USB ID pin.
+ maxItems: 1
+
+ vbus-gpios:
+ description: An input gpio for USB VBus pin, used to detect presence of
+ VBUS 5V.
+ maxItems: 1
+
+ vbus-supply:
+ description: A phandle to the regulator for USB VBUS if needed when host
+ mode or dual role mode is supported.
+ Particularly, if use an output GPIO to control a VBUS regulator, should
+ model it as a regulator. See bindings/regulator/fixed-regulator.yaml
+
+ power-role:
+ description: Determines the power role that the Type C connector will
+ support. "dual" refers to Dual Role Port (DRP).
+ $ref: /schemas/types.yaml#/definitions/string
+
+ enum:
+ - source
+ - sink
+ - dual
+
+ try-power-role:
+ description: Preferred power role.
+ $ref: /schemas/types.yaml#/definitions/string
+
+ enum:
+ - source
+ - sink
+ - dual
+
+ data-role:
+ description: Data role if Type C connector supports USB data. "dual" refers
+ Dual Role Device (DRD).
+ $ref: /schemas/types.yaml#/definitions/string
+
+ enum:
+ - host
+ - device
+ - dual
+
+ typec-power-opmode:
+ description: Determines the power operation mode that the Type C connector
+ will support and will advertise through CC pins when it has no power
+ delivery support.
+ - "default" corresponds to default USB voltage and current defined by the
+ USB 2.0 and USB 3.2 specifications, 5V 500mA for USB 2.0 ports and
+ 5V 900mA or 1500mA for USB 3.2 ports in single-lane or dual-lane
+ operation respectively.
+ - "1.5A" and "3.0A", 5V 1.5A and 5V 3.0A respectively, as defined in USB
+ Type-C Cable and Connector specification, when Power Delivery is not
+ supported.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum:
+ - default
+ - 1.5A
+ - 3.0A
+
+ pd-disable:
+ description: Set this property if the Type-C connector has no power delivery support.
+ type: boolean
+
+ # The following are optional properties for "usb-c-connector" with power
+ # delivery support.
+ sink-vdos:
+ description: An array of u32 with each entry, a Vendor Defined Message Object (VDO),
+ providing additional information corresponding to the product, the detailed bit
+ definitions and the order of each VDO can be found in
+ "USB Power Delivery Specification Revision 3.0, Version 2.0 + ECNs 2020-12-10"
+ chapter 6.4.4.3.1 Discover Identity. User can specify the VDO array via
+ VDO_IDH/_CERT/_PRODUCT/_UFP/_DFP/_PCABLE/_ACABLE(1/2)/_VPD() defined in
+ dt-bindings/usb/pd.h.
+ minItems: 3
+ maxItems: 6
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ sink-vdos-v1:
+ description: An array of u32 with each entry, a Vendor Defined Message Object (VDO),
+ providing additional information corresponding to the product, the detailed bit
+ definitions and the order of each VDO can be found in
+ "USB Power Delivery Specification Revision 2.0, Version 1.3" chapter 6.4.4.3.1 Discover
+ Identity. User can specify the VDO array via VDO_IDH/_CERT/_PRODUCT/_CABLE/_AMA defined in
+ dt-bindings/usb/pd.h.
+ minItems: 3
+ maxItems: 6
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ accessory-mode-audio:
+ type: boolean
+ description: Whether the device supports Audio Adapter Accessory Mode. This
+ is only necessary if there are no other means to discover supported
+ alternative modes (e.g. through the UCSI firmware interface).
+
+ accessory-mode-debug:
+ type: boolean
+ description: Whether the device supports Debug Accessory Mode. This
+ is only necessary if there are no other means to discover supported
+ alternative modes (e.g. through the UCSI firmware interface).
+
+ altmodes:
+ type: object
+ description: List of Alternative Modes supported by the schematics on the
+ particular device. This is only necessary if there are no other means to
+ discover supported alternative modes (e.g. through the UCSI firmware
+ interface).
+
+ additionalProperties: false
+
+ patternProperties:
+ "^(displayport)$":
+ type: object
+ description:
+ A single USB-C Alternative Mode as supported by the USB-C connector logic.
+
+ additionalProperties: false
+
+ properties:
+ svid:
+ $ref: /schemas/types.yaml#/definitions/uint16
+ description: Unique value assigned by USB-IF to the Vendor / AltMode.
+ enum: [ 0xff01 ]
+ vdo:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: VDO returned by Discover Modes USB PD command.
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: OF graph bindings modeling a data bus to the connector, e.g.
+ there is a single High Speed (HS) port present in this connector. If there
+ is more than one bus (several port, with 'reg' property), they can be grouped
+ under 'ports'.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: OF graph bindings modeling any data bus to the connector
+ unless the bus is between parent node and the connector. Since a single
+ connector can have multiple data buses every bus has an assigned OF graph
+ port number as described below.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: High Speed (HS), present in all connectors.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Super Speed (SS), present in SS capable connectors.
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Sideband Use (SBU), present in USB-C. This describes the
+ alternate mode connection of which SBU is a part.
+
+ required:
+ - port@0
+
+ new-source-frs-typec-current:
+ description: Initial current capability of the new source when vSafe5V
+ is applied during PD3.0 Fast Role Swap. "Table 6-14 Fixed Supply PDO - Sink"
+ of "USB Power Delivery Specification Revision 3.0, Version 1.2" provides the
+ different power levels and "6.4.1.3.1.6 Fast Role Swap USB Type-C Current"
+ provides a detailed description of the field. The sink PDO from current source
+ reflects the current source's(i.e. transmitter of the FRS signal) power
+ requirement during fr swap. The current sink (i.e. receiver of the FRS signal),
+ a.k.a new source, should check if it will be able to satisfy the current source's,
+ new sink's, requirement during frswap before enabling the frs signal reception.
+ This property refers to maximum current capability that the current sink can
+ satisfy. During FRS, VBUS voltage is at 5V, as the partners are in implicit
+ contract, hence, the power level is only a function of the current capability.
+ "1" refers to default USB power level as described by "Table 6-14 Fixed Supply PDO - Sink".
+ "2" refers to 1.5A@5V.
+ "3" refers to 3.0A@5V.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 3]
+
+ slow-charger-loop:
+ description: Allows PMIC charger loops which are slow(i.e. cannot meet the 15ms deadline) to
+ still comply to pSnkStby i.e Maximum power that can be consumed by sink while in Sink Standby
+ state as defined in 7.4.2 Sink Electrical Parameters of USB Power Delivery Specification
+ Revision 3.0, Version 1.2. When the property is set, the port requests pSnkStby(2.5W -
+ 5V@500mA) upon entering SNK_DISCOVERY(instead of 3A or the 1.5A, Rp current advertised, during
+ SNK_DISCOVERY) and the actual current limit after reception of PS_Ready for PD link or during
+ SNK_READY for non-pd link.
+ type: boolean
+
+ capabilities:
+ description: A child node to contain all the selectable USB Power Delivery capabilities.
+ type: object
+
+ patternProperties:
+ "^caps-[0-9]+$":
+ description: Child nodes under "capabilities" node. Each node contains a selectable USB
+ Power Delivery capability.
+ type: object
+ $ref: "#/$defs/capabilities"
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
+ sink-wait-cap-time-ms:
+ description: Represents the max time in ms that USB Type-C port (in sink
+ role) should wait for the port partner (source role) to send source caps.
+ SinkWaitCap timer starts when port in sink role attaches to the source.
+ This timer will stop when sink receives PD source cap advertisement before
+ timeout in which case it'll move to capability negotiation stage. A
+ timeout leads to a hard reset message by the port.
+ minimum: 310
+ maximum: 620
+ default: 310
+
+ ps-source-off-time-ms:
+ description: Represents the max time in ms that a DRP in source role should
+ take to turn off power after the PsSourceOff timer starts. PsSourceOff
+ timer starts when a sink's PHY layer receives EOP of the GoodCRC message
+ (corresponding to an Accept message sent in response to a PR_Swap or a
+ FR_Swap request). This timer stops when last bit of GoodCRC EOP
+ corresponding to the received PS_RDY message is transmitted by the PHY
+ layer. A timeout shall lead to error recovery in the type-c port.
+ minimum: 750
+ maximum: 920
+ default: 920
+
+ cc-debounce-time-ms:
+ description: Represents the max time in ms that a port shall wait to
+ determine if it's attached to a partner.
+ minimum: 100
+ maximum: 200
+ default: 200
+
+ sink-bc12-completion-time-ms:
+ description: Represents the max time in ms that a port in sink role takes
+ to complete Battery Charger (BC1.2) Detection. BC1.2 detection is a
+ hardware mechanism, which in some TCPC implementations, can run in
+ parallel once the Type-C connection state machine reaches the "potential
+ connect as sink" state. In TCPCs where this causes delays to respond to
+ the incoming PD messages, sink-bc12-completion-time-ms is used to delay
+ PD negotiation till BC1.2 detection completes.
+ default: 0
+
+ pd-revision:
+ description: Specifies the maximum USB PD revision and version supported by
+ the connector. This property is specified in the following order;
+ <revision_major, revision_minor, version_major, version_minor>.
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ maxItems: 4
+
+dependencies:
+ sink-vdos-v1: [ sink-vdos ]
+ sink-vdos: [ sink-vdos-v1 ]
+
+required:
+ - compatible
+
+$defs:
+ capabilities:
+ type: object
+
+ properties:
+ source-pdos:
+ description: An array of u32 with each entry providing supported power
+ source data object(PDO), the detailed bit definitions of PDO can be found
+ in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.2
+ Source_Capabilities Message, the order of each entry(PDO) should follow
+ the PD spec chapter 6.4.1. Required for power source and power dual role.
+ User can specify the source PDO array via PDO_FIXED/BATT/VAR/PPS_APDO()
+ defined in dt-bindings/usb/pd.h.
+ minItems: 1
+ maxItems: 7
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ sink-pdos:
+ description: An array of u32 with each entry providing supported power sink
+ data object(PDO), the detailed bit definitions of PDO can be found in
+ "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.3
+ Sink Capabilities Message, the order of each entry(PDO) should follow the
+ PD spec chapter 6.4.1. Required for power sink and power dual role. User
+ can specify the sink PDO array via PDO_FIXED/BATT/VAR/PPS_APDO() defined
+ in dt-bindings/usb/pd.h.
+ minItems: 1
+ maxItems: 7
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ op-sink-microwatt:
+ description: Sink required operating power in microwatt, if source can't
+ offer the power, Capability Mismatch is set. Required for power sink and
+ power dual role.
+
+allOf:
+ - $ref: "#/$defs/capabilities"
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: gpio-usb-b-connector
+ then:
+ anyOf:
+ - required:
+ - vbus-gpios
+ - required:
+ - id-gpios
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,usb-connector-11pin
+ then:
+ properties:
+ type:
+ const: micro
+
+anyOf:
+ - not:
+ required:
+ - typec-power-opmode
+ - new-source-frs-typec-current
+
+unevaluatedProperties: false
+
+examples:
+ # Micro-USB connector with HS lines routed via controller (MUIC).
+ - |
+ muic-max77843 {
+ usb_con1: connector {
+ compatible = "usb-b-connector";
+ label = "micro-USB";
+ type = "micro";
+ };
+ };
+
+ # USB-C connector attached to CC controller (s2mm005), HS lines routed
+ # to companion PMIC (max77865), SS lines to USB3 PHY and SBU to DisplayPort.
+ # DisplayPort video lines are routed to the connector via SS mux in USB3 PHY.
+ - |
+ ccic: s2mm005 {
+ usb_con2: connector {
+ compatible = "usb-c-connector";
+ label = "USB-C";
+
+ altmodes {
+ displayport {
+ svid = /bits/ 16 <0xff01>;
+ vdo = <0x00001c46>;
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ usb_con_hs: endpoint {
+ remote-endpoint = <&max77865_usbc_hs>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ usb_con_ss: endpoint {
+ remote-endpoint = <&usbdrd_phy_ss>;
+ };
+ };
+ port@2 {
+ reg = <2>;
+ usb_con_sbu: endpoint {
+ remote-endpoint = <&dp_aux>;
+ };
+ };
+ };
+ };
+ };
+
+ # USB-C connector attached to a typec port controller(ptn5110), which has
+ # power delivery support, explicitly defines time properties and enables drp.
+ - |
+ #include <dt-bindings/usb/pd.h>
+ typec: ptn5110 {
+ usb_con3: connector {
+ compatible = "usb-c-connector";
+ label = "USB-C";
+ power-role = "dual";
+ try-power-role = "sink";
+ source-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)>;
+ sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
+ PDO_VAR(5000, 12000, 2000)>;
+ op-sink-microwatt = <10000000>;
+ sink-wait-cap-time-ms = <465>;
+ ps-source-off-time-ms = <835>;
+ cc-debounce-time-ms = <101>;
+ sink-bc12-completion-time-ms = <500>;
+ };
+ };
+
+ # USB-C connector attached to SoC with a single High-Speed controller
+ - |
+ connector {
+ compatible = "usb-c-connector";
+ label = "USB-C";
+
+ port {
+ high_speed_ep: endpoint {
+ remote-endpoint = <&usb_hs_ep>;
+ };
+ };
+ };
+
+ # USB-C connector attached to SoC and USB3 typec port controller(hd3ss3220)
+ # with SS 2:1 MUX. HS lines routed to SoC, SS lines routed to the MUX and
+ # the output of MUX is connected to the SoC.
+ - |
+ connector {
+ compatible = "usb-c-connector";
+ label = "USB-C";
+ data-role = "dual";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ hs_ep: endpoint {
+ remote-endpoint = <&usb3_hs_ep>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ ss_ep: endpoint {
+ remote-endpoint = <&hd3ss3220_in_ep>;
+ };
+ };
+ };
+ };
+
+ # USB connector with GPIO control lines
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ usb {
+ connector {
+ compatible = "gpio-usb-b-connector", "usb-b-connector";
+ type = "micro";
+ id-gpios = <&pio 12 GPIO_ACTIVE_HIGH>;
+ vbus-supply = <&usb_p0_vbus>;
+ };
+ };
+
+ # Micro-USB connector with HS lines routed via controller (MUIC) and MHL
+ # lines connected to HDMI-MHL bridge (sii8620) on Samsung Exynos5433-based
+ # mobile phone
+ - |
+ muic-max77843 {
+ usb_con4: connector {
+ compatible = "samsung,usb-connector-11pin", "usb-b-connector";
+ label = "micro-USB";
+ type = "micro";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ muic_to_usb: endpoint {
+ remote-endpoint = <&usb_to_muic>;
+ };
+ };
+ port@3 {
+ reg = <3>;
+ usb_con_mhl: endpoint {
+ remote-endpoint = <&sii8620_mhl>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/counter/fsl,ftm-quaddec.yaml b/Documentation/devicetree/bindings/counter/fsl,ftm-quaddec.yaml
new file mode 100644
index 000000000000..384ca63b64d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/counter/fsl,ftm-quaddec.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/counter/fsl,ftm-quaddec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: FlexTimer Quadrature decoder counter
+
+description:
+ Exposes a simple counter for the quadrature decoder mode.
+
+maintainers:
+ - Frank Li <Frank.li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,ftm-quaddec
+
+ reg:
+ maxItems: 1
+
+ big-endian: true
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ counter@29d0000 {
+ compatible = "fsl,ftm-quaddec";
+ reg = <0x29d0000 0x10000>;
+ big-endian;
+ };
diff --git a/Documentation/devicetree/bindings/counter/interrupt-counter.yaml b/Documentation/devicetree/bindings/counter/interrupt-counter.yaml
new file mode 100644
index 000000000000..fd075d104631
--- /dev/null
+++ b/Documentation/devicetree/bindings/counter/interrupt-counter.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/counter/interrupt-counter.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Interrupt counter
+
+maintainers:
+ - Oleksij Rempel <o.rempel@pengutronix.de>
+
+description: |
+ A generic interrupt counter to measure interrupt frequency. It was developed
+ and used for agricultural devices to measure rotation speed of wheels or
+ other tools. Since the direction of rotation is not important, only one
+ signal line is needed.
+ Interrupts or gpios are required. If both are defined, the interrupt will
+ take precedence for counting interrupts.
+
+properties:
+ compatible:
+ const: interrupt-counter
+
+ interrupts:
+ maxItems: 1
+
+ gpios:
+ maxItems: 1
+
+required:
+ - compatible
+
+anyOf:
+ - required: [ interrupts-extended ]
+ - required: [ interrupts ]
+ - required: [ gpios ]
+
+additionalProperties: false
+
+examples:
+ - |
+
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ counter-0 {
+ compatible = "interrupt-counter";
+ interrupts-extended = <&gpio 0 IRQ_TYPE_EDGE_RISING>;
+ };
+
+ counter-1 {
+ compatible = "interrupt-counter";
+ gpios = <&gpio 2 GPIO_ACTIVE_HIGH>;
+ };
+
+ counter-2 {
+ compatible = "interrupt-counter";
+ interrupts-extended = <&gpio 2 IRQ_TYPE_EDGE_RISING>;
+ gpios = <&gpio 2 GPIO_ACTIVE_HIGH>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/counter/ti,am62-ecap-capture.yaml b/Documentation/devicetree/bindings/counter/ti,am62-ecap-capture.yaml
new file mode 100644
index 000000000000..4e0b2d2b303e
--- /dev/null
+++ b/Documentation/devicetree/bindings/counter/ti,am62-ecap-capture.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/counter/ti,am62-ecap-capture.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments Enhanced Capture (eCAP) Module
+
+maintainers:
+ - Julien Panis <jpanis@baylibre.com>
+
+description: |
+ The eCAP module resources can be used to capture timestamps
+ on input signal events (falling/rising edges).
+
+properties:
+ compatible:
+ const: ti,am62-ecap-capture
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: fck
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ capture@23100000 { /* eCAP in capture mode on am62x */
+ compatible = "ti,am62-ecap-capture";
+ reg = <0x00 0x23100000 0x00 0x100>;
+ interrupts = <GIC_SPI 113 IRQ_TYPE_EDGE_RISING>;
+ power-domains = <&k3_pds 51 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 51 0>;
+ clock-names = "fck";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/counter/ti-eqep.yaml b/Documentation/devicetree/bindings/counter/ti-eqep.yaml
new file mode 100644
index 000000000000..c882ab5fcf1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/counter/ti-eqep.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/counter/ti-eqep.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments Enhanced Quadrature Encoder Pulse (eQEP) Module
+
+maintainers:
+ - David Lechner <david@lechnology.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,am3352-eqep
+ - ti,am62-eqep
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: The eQEP event interrupt
+ maxItems: 1
+
+ clocks:
+ description: The functional and interface clock that determines the clock
+ rate for the eQEP peripheral.
+ maxItems: 1
+
+ clock-names:
+ const: sysclkout
+
+ power-domains:
+ maxItems: 1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ti,am62-eqep
+ then:
+ properties:
+ clock-names: false
+
+ required:
+ - power-domains
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ eqep0: counter@180 {
+ compatible = "ti,am3352-eqep";
+ reg = <0x180 0x80>;
+ clocks = <&l4ls_gclk>;
+ interrupts = <79>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/cpu/cpu-capacity.txt
index 84262cdb8d29..f28e1adad428 100644
--- a/Documentation/devicetree/bindings/arm/cpu-capacity.txt
+++ b/Documentation/devicetree/bindings/cpu/cpu-capacity.txt
@@ -1,12 +1,12 @@
==========================================
-ARM CPUs capacity bindings
+CPU capacity bindings
==========================================
==========================================
1 - Introduction
==========================================
-ARM systems may be configured to have cpus with different power/performance
+Some systems may be configured to have cpus with different power/performance
characteristics within the same chip. In this case, additional information has
to be made available to the kernel for it to be aware of such differences and
take decisions accordingly.
@@ -62,8 +62,8 @@ Example 1 (ARM 64-bit, 6-cpu system, two clusters):
The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
are 1024 and 578 for cluster0 and cluster1. Further normalization
is done by the operating system based on cluster0@max-freq=1100 and
-custer1@max-freq=850, final capacities are 1024 for cluster0 and
-446 for cluster1 (576*850/1100).
+cluster1@max-freq=850, final capacities are 1024 for cluster0 and
+446 for cluster1 (578*850/1100).
cpus {
#address-cells = <2>;
@@ -118,7 +118,7 @@ cpus {
};
A57_0: cpu@0 {
- compatible = "arm,cortex-a57","arm,armv8";
+ compatible = "arm,cortex-a57";
reg = <0x0 0x0>;
device_type = "cpu";
enable-method = "psci";
@@ -129,7 +129,7 @@ cpus {
};
A57_1: cpu@1 {
- compatible = "arm,cortex-a57","arm,armv8";
+ compatible = "arm,cortex-a57";
reg = <0x0 0x1>;
device_type = "cpu";
enable-method = "psci";
@@ -140,7 +140,7 @@ cpus {
};
A53_0: cpu@100 {
- compatible = "arm,cortex-a53","arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x0 0x100>;
device_type = "cpu";
enable-method = "psci";
@@ -151,7 +151,7 @@ cpus {
};
A53_1: cpu@101 {
- compatible = "arm,cortex-a53","arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x0 0x101>;
device_type = "cpu";
enable-method = "psci";
@@ -162,7 +162,7 @@ cpus {
};
A53_2: cpu@102 {
- compatible = "arm,cortex-a53","arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x0 0x102>;
device_type = "cpu";
enable-method = "psci";
@@ -173,7 +173,7 @@ cpus {
};
A53_3: cpu@103 {
- compatible = "arm,cortex-a53","arm,armv8";
+ compatible = "arm,cortex-a53";
reg = <0x0 0x103>;
device_type = "cpu";
enable-method = "psci";
@@ -235,4 +235,4 @@ cpus {
===========================================
[1] ARM Linux Kernel documentation - CPUs bindings
- Documentation/devicetree/bindings/arm/cpus.txt
+ Documentation/devicetree/bindings/arm/cpus.yaml
diff --git a/Documentation/devicetree/bindings/cpu/idle-states.yaml b/Documentation/devicetree/bindings/cpu/idle-states.yaml
new file mode 100644
index 000000000000..385b0a511652
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpu/idle-states.yaml
@@ -0,0 +1,928 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpu/idle-states.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Idle states
+
+maintainers:
+ - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ - Anup Patel <anup@brainfault.org>
+
+description: |+
+ ==========================================
+ 1 - Introduction
+ ==========================================
+
+ ARM and RISC-V systems contain HW capable of managing power consumption
+ dynamically, where cores can be put in different low-power states (ranging
+ from simple wfi to power gating) according to OS PM policies. The CPU states
+ representing the range of dynamic idle states that a processor can enter at
+ run-time, can be specified through device tree bindings representing the
+ parameters required to enter/exit specific idle states on a given processor.
+
+ ==========================================
+ 2 - ARM idle states
+ ==========================================
+
+ According to the Server Base System Architecture document (SBSA, [3]), the
+ power states an ARM CPU can be put into are identified by the following list:
+
+ - Running
+ - Idle_standby
+ - Idle_retention
+ - Sleep
+ - Off
+
+ The power states described in the SBSA document define the basic CPU states on
+ top of which ARM platforms implement power management schemes that allow an OS
+ PM implementation to put the processor in different idle states (which include
+ states listed above; "off" state is not an idle state since it does not have
+ wake-up capabilities, hence it is not considered in this document).
+
+ Idle state parameters (e.g. entry latency) are platform specific and need to
+ be characterized with bindings that provide the required information to OS PM
+ code so that it can build the required tables and use them at runtime.
+
+ The device tree binding definition for ARM idle states is the subject of this
+ document.
+
+ ==========================================
+ 3 - RISC-V idle states
+ ==========================================
+
+ On RISC-V systems, the HARTs (or CPUs) [6] can be put in platform specific
+ suspend (or idle) states (ranging from simple WFI, power gating, etc). The
+ RISC-V SBI v0.3 (or higher) [7] hart state management extension provides a
+ standard mechanism for OS to request HART state transitions.
+
+ The platform specific suspend (or idle) states of a hart can be either
+ retentive or non-rententive in nature. A retentive suspend state will
+ preserve HART registers and CSR values for all privilege modes whereas
+ a non-retentive suspend state will not preserve HART registers and CSR
+ values.
+
+ ===========================================
+ 4 - idle-states definitions
+ ===========================================
+
+ Idle states are characterized for a specific system through a set of
+ timing and energy related properties, that underline the HW behaviour
+ triggered upon idle states entry and exit.
+
+ The following diagram depicts the CPU execution phases and related timing
+ properties required to enter and exit an idle state:
+
+ ..__[EXEC]__|__[PREP]__|__[ENTRY]__|__[IDLE]__|__[EXIT]__|__[EXEC]__..
+ | | | | |
+
+ |<------ entry ------->|
+ | latency |
+ |<- exit ->|
+ | latency |
+ |<-------- min-residency -------->|
+ |<------- wakeup-latency ------->|
+
+ Diagram 1: CPU idle state execution phases
+
+ EXEC: Normal CPU execution.
+
+ PREP: Preparation phase before committing the hardware to idle mode
+ like cache flushing. This is abortable on pending wake-up
+ event conditions. The abort latency is assumed to be negligible
+ (i.e. less than the ENTRY + EXIT duration). If aborted, CPU
+ goes back to EXEC. This phase is optional. If not abortable,
+ this should be included in the ENTRY phase instead.
+
+ ENTRY: The hardware is committed to idle mode. This period must run
+ to completion up to IDLE before anything else can happen.
+
+ IDLE: This is the actual energy-saving idle period. This may last
+ between 0 and infinite time, until a wake-up event occurs.
+
+ EXIT: Period during which the CPU is brought back to operational
+ mode (EXEC).
+
+ entry-latency: Worst case latency required to enter the idle state. The
+ exit-latency may be guaranteed only after entry-latency has passed.
+
+ min-residency: Minimum period, including preparation and entry, for a given
+ idle state to be worthwhile energywise.
+
+ wakeup-latency: Maximum delay between the signaling of a wake-up event and the
+ CPU being able to execute normal code again. If not specified, this is assumed
+ to be entry-latency + exit-latency.
+
+ These timing parameters can be used by an OS in different circumstances.
+
+ An idle CPU requires the expected min-residency time to select the most
+ appropriate idle state based on the expected expiry time of the next IRQ
+ (i.e. wake-up) that causes the CPU to return to the EXEC phase.
+
+ An operating system scheduler may need to compute the shortest wake-up delay
+ for CPUs in the system by detecting how long will it take to get a CPU out
+ of an idle state, e.g.:
+
+ wakeup-delay = exit-latency + max(entry-latency - (now - entry-timestamp), 0)
+
+ In other words, the scheduler can make its scheduling decision by selecting
+ (e.g. waking-up) the CPU with the shortest wake-up delay.
+ The wake-up delay must take into account the entry latency if that period
+ has not expired. The abortable nature of the PREP period can be ignored
+ if it cannot be relied upon (e.g. the PREP deadline may occur much sooner than
+ the worst case since it depends on the CPU operating conditions, i.e. caches
+ state).
+
+ An OS has to reliably probe the wakeup-latency since some devices can enforce
+ latency constraint guarantees to work properly, so the OS has to detect the
+ worst case wake-up latency it can incur if a CPU is allowed to enter an
+ idle state, and possibly to prevent that to guarantee reliable device
+ functioning.
+
+ The min-residency time parameter deserves further explanation since it is
+ expressed in time units but must factor in energy consumption coefficients.
+
+ The energy consumption of a cpu when it enters a power state can be roughly
+ characterised by the following graph:
+
+ |
+ |
+ |
+ e |
+ n | /---
+ e | /------
+ r | /------
+ g | /-----
+ y | /------
+ | ----
+ | /|
+ | / |
+ | / |
+ | / |
+ | / |
+ | / |
+ |/ |
+ -----|-------+----------------------------------
+ 0| 1 time(ms)
+
+ Graph 1: Energy vs time example
+
+ The graph is split in two parts delimited by time 1ms on the X-axis.
+ The graph curve with X-axis values = { x | 0 < x < 1ms } has a steep slope
+ and denotes the energy costs incurred while entering and leaving the idle
+ state.
+ The graph curve in the area delimited by X-axis values = {x | x > 1ms } has
+ shallower slope and essentially represents the energy consumption of the idle
+ state.
+
+ min-residency is defined for a given idle state as the minimum expected
+ residency time for a state (inclusive of preparation and entry) after
+ which choosing that state become the most energy efficient option. A good
+ way to visualise this, is by taking the same graph above and comparing some
+ states energy consumptions plots.
+
+ For sake of simplicity, let's consider a system with two idle states IDLE1,
+ and IDLE2:
+
+ |
+ |
+ |
+ | /-- IDLE1
+ e | /---
+ n | /----
+ e | /---
+ r | /-----/--------- IDLE2
+ g | /-------/---------
+ y | ------------ /---|
+ | / /---- |
+ | / /--- |
+ | / /---- |
+ | / /--- |
+ | --- |
+ | / |
+ | / |
+ |/ | time
+ ---/----------------------------+------------------------
+ |IDLE1-energy < IDLE2-energy | IDLE2-energy < IDLE1-energy
+ |
+ IDLE2-min-residency
+
+ Graph 2: idle states min-residency example
+
+ In graph 2 above, that takes into account idle states entry/exit energy
+ costs, it is clear that if the idle state residency time (i.e. time till next
+ wake-up IRQ) is less than IDLE2-min-residency, IDLE1 is the better idle state
+ choice energywise.
+
+ This is mainly down to the fact that IDLE1 entry/exit energy costs are lower
+ than IDLE2.
+
+ However, the lower power consumption (i.e. shallower energy curve slope) of
+ idle state IDLE2 implies that after a suitable time, IDLE2 becomes more energy
+ efficient.
+
+ The time at which IDLE2 becomes more energy efficient than IDLE1 (and other
+ shallower states in a system with multiple idle states) is defined
+ IDLE2-min-residency and corresponds to the time when energy consumption of
+ IDLE1 and IDLE2 states breaks even.
+
+ The definitions provided in this section underpin the idle states
+ properties specification that is the subject of the following sections.
+
+ ===========================================
+ 5 - idle-states node
+ ===========================================
+
+ The processor idle states are defined within the idle-states node, which is
+ a direct child of the cpus node [1] and provides a container where the
+ processor idle states, defined as device tree nodes, are listed.
+
+ On ARM systems, it is a container of processor idle states nodes. If the
+ system does not provide CPU power management capabilities, or the processor
+ just supports idle_standby, an idle-states node is not required.
+
+ ===========================================
+ 6 - Qualcomm specific STATES
+ ===========================================
+
+ Idle states have different enter/exit latency and residency values.
+ The idle states supported by the QCOM SoC are defined as -
+
+ * Standby
+ * Retention
+ * Standalone Power Collapse (Standalone PC or SPC)
+ * Power Collapse (PC)
+
+ Standby: Standby does a little more in addition to architectural clock gating.
+ When the WFI instruction is executed the ARM core would gate its internal
+ clocks. In addition to gating the clocks, QCOM cpus use this instruction as a
+ trigger to execute the SPM state machine. The SPM state machine waits for the
+ interrupt to trigger the core back in to active. This triggers the cache
+ hierarchy to enter standby states, when all cpus are idle. An interrupt brings
+ the SPM state machine out of its wait, the next step is to ensure that the
+ cache hierarchy is also out of standby, and then the cpu is allowed to resume
+ execution. This state is defined as a generic ARM WFI state by the ARM cpuidle
+ driver and is not defined in the DT. The SPM state machine should be
+ configured to execute this state by default and after executing every other
+ state below.
+
+ Retention: Retention is a low power state where the core is clock gated and
+ the memory and the registers associated with the core are retained. The
+ voltage may be reduced to the minimum value needed to keep the processor
+ registers active. The SPM should be configured to execute the retention
+ sequence and would wait for interrupt, before restoring the cpu to execution
+ state. Retention may have a slightly higher latency than Standby.
+
+ Standalone PC: A cpu can power down and warmboot if there is a sufficient time
+ between the time it enters idle and the next known wake up. SPC mode is used
+ to indicate a core entering a power down state without consulting any other
+ cpu or the system resources. This helps save power only on that core. The SPM
+ sequence for this idle state is programmed to power down the supply to the
+ core, wait for the interrupt, restore power to the core, and ensure the
+ system state including cache hierarchy is ready before allowing core to
+ resume. Applying power and resetting the core causes the core to warmboot
+ back into Elevation Level (EL) which trampolines the control back to the
+ kernel. Entering a power down state for the cpu, needs to be done by trapping
+ into a EL. Failing to do so, would result in a crash enforced by the warm boot
+ code in the EL for the SoC. On SoCs with write-back L1 cache, the cache has to
+ be flushed in s/w, before powering down the core.
+
+ Power Collapse: This state is similar to the SPC mode, but distinguishes
+ itself in that the cpu acknowledges and permits the SoC to enter deeper sleep
+ modes. In a hierarchical power domain SoC, this means L2 and other caches can
+ be flushed, system bus, clocks - lowered, and SoC main XO clock gated and
+ voltages reduced, provided all cpus enter this state. Since the span of low
+ power modes possible at this state is vast, the exit latency and the residency
+ of this low power mode would be considered high even though at a cpu level,
+ this essentially is cpu power down. The SPM in this state also may handshake
+ with the Resource power manager (RPM) processor in the SoC to indicate a
+ complete application processor subsystem shut down.
+
+ ===========================================
+ 7 - References
+ ===========================================
+
+ [1] ARM Linux Kernel documentation - CPUs bindings
+ Documentation/devicetree/bindings/arm/cpus.yaml
+
+ [2] ARM Linux Kernel documentation - PSCI bindings
+ Documentation/devicetree/bindings/arm/psci.yaml
+
+ [3] ARM Server Base System Architecture (SBSA)
+ http://infocenter.arm.com/help/index.jsp
+
+ [4] ARM Architecture Reference Manuals
+ http://infocenter.arm.com/help/index.jsp
+
+ [5] ARM Linux Kernel documentation - Booting AArch64 Linux
+ Documentation/arch/arm64/booting.rst
+
+ [6] RISC-V Linux Kernel documentation - CPUs bindings
+ Documentation/devicetree/bindings/riscv/cpus.yaml
+
+ [7] RISC-V Supervisor Binary Interface (SBI)
+ http://github.com/riscv/riscv-sbi-doc/riscv-sbi.adoc
+
+properties:
+ $nodename:
+ const: idle-states
+
+ entry-method:
+ description: |
+ Usage and definition depend on ARM architecture version.
+
+ On ARM v8 64-bit this property is required.
+ On ARM 32-bit systems this property is optional
+
+ This assumes that the "enable-method" property is set to "psci" in the cpu
+ node[5] that is responsible for setting up CPU idle management in the OS
+ implementation.
+ const: psci
+
+patternProperties:
+ "^(cpu|cluster)-":
+ type: object
+ description: |
+ Each state node represents an idle state description and must be defined
+ as follows.
+
+ The idle state entered by executing the wfi instruction (idle_standby
+ SBSA,[3][4]) is considered standard on all ARM and RISC-V platforms and
+ therefore must not be listed.
+
+ In addition to the properties listed above, a state node may require
+ additional properties specific to the entry-method defined in the
+ idle-states node. Please refer to the entry-method bindings
+ documentation for properties definitions.
+
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - qcom,idle-state-ret
+ - qcom,idle-state-spc
+ - qcom,idle-state-pc
+ - const: arm,idle-state
+ - enum:
+ - arm,idle-state
+ - riscv,idle-state
+
+ arm,psci-suspend-param:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ power_state parameter to pass to the ARM PSCI suspend call.
+
+ Device tree nodes that require usage of PSCI CPU_SUSPEND function
+ (i.e. idle states node with entry-method property is set to "psci")
+ must specify this property.
+
+ riscv,sbi-suspend-param:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ suspend_type parameter to pass to the RISC-V SBI HSM suspend call.
+
+ This property is required in idle state nodes of device tree meant
+ for RISC-V systems. For more details on the suspend_type parameter
+ refer the SBI specification v0.3 (or higher) [7].
+
+ local-timer-stop:
+ description:
+ If present the CPU local timer control logic is
+ lost on state entry, otherwise it is retained.
+ type: boolean
+
+ entry-latency-us:
+ description:
+ Worst case latency in microseconds required to enter the idle state.
+
+ exit-latency-us:
+ description:
+ Worst case latency in microseconds required to exit the idle state.
+ The exit-latency-us duration may be guaranteed only after
+ entry-latency-us has passed.
+
+ min-residency-us:
+ description:
+ Minimum residency duration in microseconds, inclusive of preparation
+ and entry, for this idle state to be considered worthwhile energy wise
+ (refer to section 2 of this document for a complete description).
+
+ wakeup-latency-us:
+ description: |
+ Maximum delay between the signaling of a wake-up event and the CPU
+ being able to execute normal code again. If omitted, this is assumed
+ to be equal to:
+
+ entry-latency-us + exit-latency-us
+
+ It is important to supply this value on systems where the duration of
+ PREP phase (see diagram 1, section 2) is non-neglibigle. In such
+ systems entry-latency-us + exit-latency-us will exceed
+ wakeup-latency-us by this duration.
+
+ idle-state-name:
+ $ref: /schemas/types.yaml#/definitions/string
+ description:
+ A string used as a descriptive name for the idle state.
+
+ additionalProperties: false
+
+ required:
+ - compatible
+ - entry-latency-us
+ - exit-latency-us
+ - min-residency-us
+
+additionalProperties: false
+
+examples:
+ - |
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <2>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@10000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10000>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@10001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10001>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@10100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@10101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0>, <&CPU_SLEEP_0_0>,
+ <&CLUSTER_RETENTION_0>, <&CLUSTER_SLEEP_0>;
+ };
+
+ cpu@100000000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x0>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100000001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x1>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100000100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100000101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100010000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10000>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100010001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10001>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100010100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ cpu@100010101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0>, <&CPU_SLEEP_1_0>,
+ <&CLUSTER_RETENTION_1>, <&CLUSTER_SLEEP_1>;
+ };
+
+ idle-states {
+ entry-method = "psci";
+
+ CPU_RETENTION_0_0: cpu-retention-0-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <80>;
+ };
+
+ CLUSTER_RETENTION_0: cluster-retention-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <250>;
+ wakeup-latency-us = <130>;
+ };
+
+ CPU_SLEEP_0_0: cpu-sleep-0-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <250>;
+ exit-latency-us = <500>;
+ min-residency-us = <950>;
+ };
+
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <600>;
+ exit-latency-us = <1100>;
+ min-residency-us = <2700>;
+ wakeup-latency-us = <1500>;
+ };
+
+ CPU_RETENTION_1_0: cpu-retention-1-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <90>;
+ };
+
+ CLUSTER_RETENTION_1: cluster-retention-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <270>;
+ wakeup-latency-us = <100>;
+ };
+
+ CPU_SLEEP_1_0: cpu-sleep-1-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <70>;
+ exit-latency-us = <100>;
+ min-residency-us = <300>;
+ wakeup-latency-us = <150>;
+ };
+
+ CLUSTER_SLEEP_1: cluster-sleep-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <500>;
+ exit-latency-us = <1200>;
+ min-residency-us = <3500>;
+ wakeup-latency-us = <1300>;
+ };
+ };
+ };
+
+ - |
+ // Example 2 (ARM 32-bit, 8-cpu system, two clusters):
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ cpu-idle-states = <&cpu_sleep_0_0>, <&cluster_sleep_0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ cpu-idle-states = <&cpu_sleep_0_0>, <&cluster_sleep_0>;
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x2>;
+ cpu-idle-states = <&cpu_sleep_0_0>, <&cluster_sleep_0>;
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x3>;
+ cpu-idle-states = <&cpu_sleep_0_0>, <&cluster_sleep_0>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ cpu-idle-states = <&cpu_sleep_1_0>, <&cluster_sleep_1>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ cpu-idle-states = <&cpu_sleep_1_0>, <&cluster_sleep_1>;
+ };
+
+ cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x102>;
+ cpu-idle-states = <&cpu_sleep_1_0>, <&cluster_sleep_1>;
+ };
+
+ cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x103>;
+ cpu-idle-states = <&cpu_sleep_1_0>, <&cluster_sleep_1>;
+ };
+
+ idle-states {
+ cpu_sleep_0_0: cpu-sleep-0-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <200>;
+ exit-latency-us = <100>;
+ min-residency-us = <400>;
+ wakeup-latency-us = <250>;
+ };
+
+ cluster_sleep_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <500>;
+ exit-latency-us = <1500>;
+ min-residency-us = <2500>;
+ wakeup-latency-us = <1700>;
+ };
+
+ cpu_sleep_1_0: cpu-sleep-1-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <300>;
+ exit-latency-us = <500>;
+ min-residency-us = <900>;
+ wakeup-latency-us = <600>;
+ };
+
+ cluster_sleep_1: cluster-sleep-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <800>;
+ exit-latency-us = <2000>;
+ min-residency-us = <6500>;
+ wakeup-latency-us = <2300>;
+ };
+ };
+ };
+
+ - |
+ // Example 3 (RISC-V 64-bit, 4-cpu systems, two clusters):
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "riscv";
+ reg = <0x0>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,sv48";
+ cpu-idle-states = <&CPU_RET_0_0>, <&CPU_NONRET_0_0>,
+ <&CLUSTER_RET_0>, <&CLUSTER_NONRET_0>;
+
+ cpu_intc0: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "riscv";
+ reg = <0x1>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,sv48";
+ cpu-idle-states = <&CPU_RET_0_0>, <&CPU_NONRET_0_0>,
+ <&CLUSTER_RET_0>, <&CLUSTER_NONRET_0>;
+
+ cpu_intc1: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@10 {
+ device_type = "cpu";
+ compatible = "riscv";
+ reg = <0x10>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,sv48";
+ cpu-idle-states = <&CPU_RET_1_0>, <&CPU_NONRET_1_0>,
+ <&CLUSTER_RET_1>, <&CLUSTER_NONRET_1>;
+
+ cpu_intc10: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@11 {
+ device_type = "cpu";
+ compatible = "riscv";
+ reg = <0x11>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,sv48";
+ cpu-idle-states = <&CPU_RET_1_0>, <&CPU_NONRET_1_0>,
+ <&CLUSTER_RET_1>, <&CLUSTER_NONRET_1>;
+
+ cpu_intc11: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ idle-states {
+ CPU_RET_0_0: cpu-retentive-0-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x10000000>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <80>;
+ };
+
+ CPU_NONRET_0_0: cpu-nonretentive-0-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x90000000>;
+ entry-latency-us = <250>;
+ exit-latency-us = <500>;
+ min-residency-us = <950>;
+ };
+
+ CLUSTER_RET_0: cluster-retentive-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x11000000>;
+ local-timer-stop;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <250>;
+ wakeup-latency-us = <130>;
+ };
+
+ CLUSTER_NONRET_0: cluster-nonretentive-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x91000000>;
+ local-timer-stop;
+ entry-latency-us = <600>;
+ exit-latency-us = <1100>;
+ min-residency-us = <2700>;
+ wakeup-latency-us = <1500>;
+ };
+
+ CPU_RET_1_0: cpu-retentive-1-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x10000010>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <80>;
+ };
+
+ CPU_NONRET_1_0: cpu-nonretentive-1-0 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x90000010>;
+ entry-latency-us = <250>;
+ exit-latency-us = <500>;
+ min-residency-us = <950>;
+ };
+
+ CLUSTER_RET_1: cluster-retentive-1 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x11000010>;
+ local-timer-stop;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <250>;
+ wakeup-latency-us = <130>;
+ };
+
+ CLUSTER_NONRET_1: cluster-nonretentive-1 {
+ compatible = "riscv,idle-state";
+ riscv,sbi-suspend-param = <0x91000010>;
+ local-timer-stop;
+ entry-latency-us = <600>;
+ exit-latency-us = <1100>;
+ min-residency-us = <2700>;
+ wakeup-latency-us = <1500>;
+ };
+ };
+ };
+
+ // Example 4 - Qualcomm SPC
+ idle-states {
+ cpu_spc: cpu-spc {
+ compatible = "qcom,idle-state-spc", "arm,idle-state";
+ entry-latency-us = <150>;
+ exit-latency-us = <200>;
+ min-residency-us = <2000>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml
new file mode 100644
index 000000000000..7d4510b3219c
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/airoha,en7581-cpufreq.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha EN7581 CPUFreq
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+ On newer Airoha SoC, CPU Frequency is scaled indirectly with SMC commands
+ to ATF.
+
+ A virtual clock is exposed. This virtual clock is a get-only clock and
+ is used to expose the current global CPU clock. The frequency info comes
+ by the output of the SMC command that reports the clock in MHz.
+
+ The SMC sets the CPU clock by providing an index, this is modelled as
+ performance states in a power domain.
+
+ CPUs can't be individually scaled as the CPU frequency is shared across
+ all CPUs and is global.
+
+properties:
+ compatible:
+ const: airoha,en7581-cpufreq
+
+ '#clock-cells':
+ const: 0
+
+ '#power-domain-cells':
+ const: 0
+
+ operating-points-v2: true
+
+required:
+ - compatible
+ - '#clock-cells'
+ - '#power-domain-cells'
+ - operating-points-v2
+
+additionalProperties: false
+
+examples:
+ - |
+ performance-domain {
+ compatible = "airoha,en7581-cpufreq";
+
+ operating-points-v2 = <&cpu_smcc_opp_table>;
+
+ #power-domain-cells = <0>;
+ #clock-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
new file mode 100644
index 000000000000..b51913a81791
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/apple,cluster-cpufreq.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple SoC cluster cpufreq device
+
+maintainers:
+ - Hector Martin <marcan@marcan.st>
+
+description: |
+ Apple SoCs (e.g. M1) have a per-cpu-cluster DVFS controller that is part of
+ the cluster management register block. This binding uses the standard
+ operating-points-v2 table to define the CPU performance states, with the
+ opp-level property specifying the hardware p-state index for that level.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - apple,t8103-cluster-cpufreq
+ - apple,t8112-cluster-cpufreq
+ - const: apple,cluster-cpufreq
+ - items:
+ - enum:
+ - apple,s8000-cluster-cpufreq
+ - apple,t8010-cluster-cpufreq
+ - apple,t8015-cluster-cpufreq
+ - apple,t6000-cluster-cpufreq
+ - const: apple,t8103-cluster-cpufreq
+ - const: apple,cluster-cpufreq
+ - items:
+ - const: apple,t7000-cluster-cpufreq
+ - const: apple,s5l8960x-cluster-cpufreq
+ - const: apple,s5l8960x-cluster-cpufreq
+ - items:
+ - const: apple,t6020-cluster-cpufreq
+ - const: apple,t8112-cluster-cpufreq
+
+ reg:
+ maxItems: 1
+
+ '#performance-domain-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - '#performance-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ // This example shows a single CPU per domain and 2 domains,
+ // with two p-states per domain.
+ // Shipping hardware has 2-4 CPUs per domain and 2-6 domains.
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "apple,icestorm";
+ device_type = "cpu";
+ reg = <0x0 0x0>;
+ operating-points-v2 = <&ecluster_opp>;
+ performance-domains = <&cpufreq_e>;
+ };
+
+ cpu@10100 {
+ compatible = "apple,firestorm";
+ device_type = "cpu";
+ reg = <0x0 0x10100>;
+ operating-points-v2 = <&pcluster_opp>;
+ performance-domains = <&cpufreq_p>;
+ };
+ };
+
+ ecluster_opp: opp-table-0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp01 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-level = <1>;
+ clock-latency-ns = <7500>;
+ };
+ opp02 {
+ opp-hz = /bits/ 64 <972000000>;
+ opp-level = <2>;
+ clock-latency-ns = <22000>;
+ };
+ };
+
+ pcluster_opp: opp-table-1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp01 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-level = <1>;
+ clock-latency-ns = <8000>;
+ };
+ opp02 {
+ opp-hz = /bits/ 64 <828000000>;
+ opp-level = <2>;
+ clock-latency-ns = <19000>;
+ };
+ };
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpufreq_e: performance-controller@210e20000 {
+ compatible = "apple,t8103-cluster-cpufreq", "apple,cluster-cpufreq";
+ reg = <0x2 0x10e20000 0 0x1000>;
+ #performance-domain-cells = <0>;
+ };
+
+ cpufreq_p: performance-controller@211e20000 {
+ compatible = "apple,t8103-cluster-cpufreq", "apple,cluster-cpufreq";
+ reg = <0x2 0x11e20000 0 0x1000>;
+ #performance-domain-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt b/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
index 73470ecd1f12..ce91a9197697 100644
--- a/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
+++ b/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
@@ -16,7 +16,7 @@ has been processed. See [2] for more information on the brcm,l2-intc node.
firmware. On some SoCs, this firmware supports DFS and DVFS in addition to
Adaptive Voltage Scaling.
-[2] Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt
+[2] Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.yaml
Node brcm,avs-cpu-data-mem
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
deleted file mode 100644
index 332aed8f4597..000000000000
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-Generic cpufreq driver
-
-It is a generic DT based cpufreq driver for frequency management. It supports
-both uniprocessor (UP) and symmetric multiprocessor (SMP) systems which share
-clock and voltage across all CPUs.
-
-Both required and optional properties listed below must be defined
-under node /cpus/cpu@0.
-
-Required properties:
-- None
-
-Optional properties:
-- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt for
- details. OPPs *must* be supplied either via DT, i.e. this property, or
- populated at runtime.
-- clock-latency: Specify the possible maximum transition latency for clock,
- in unit of nanoseconds.
-- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
-- #cooling-cells:
- Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
-
-Examples:
-
-cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- compatible = "arm,cortex-a9";
- reg = <0>;
- next-level-cache = <&L2>;
- operating-points = <
- /* kHz uV */
- 792000 1100000
- 396000 950000
- 198000 850000
- >;
- clock-latency = <61036>; /* two CLK32 periods */
- #cooling-cells = <2>;
- };
-
- cpu@1 {
- compatible = "arm,cortex-a9";
- reg = <1>;
- next-level-cache = <&L2>;
- };
-
- cpu@2 {
- compatible = "arm,cortex-a9";
- reg = <2>;
- next-level-cache = <&L2>;
- };
-
- cpu@3 {
- compatible = "arm,cortex-a9";
- reg = <3>;
- next-level-cache = <&L2>;
- };
-};
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
new file mode 100644
index 000000000000..d0aecde2b89b
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/cpufreq-mediatek-hw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek's CPUFREQ
+
+maintainers:
+ - Hector Yuan <hector.yuan@mediatek.com>
+
+description:
+ CPUFREQ HW is a hardware engine used by MediaTek SoCs to
+ manage frequency in hardware. It is capable of controlling
+ frequency for multiple clusters.
+
+properties:
+ compatible:
+ const: mediatek,cpufreq-hw
+
+ reg:
+ minItems: 1
+ maxItems: 2
+ description:
+ Addresses and sizes for the memory of the HW bases in
+ each frequency domain. Each entry corresponds to
+ a register bank for each frequency domain present.
+
+ "#performance-domain-cells":
+ description:
+ Number of cells in a performance domain specifier.
+ Set const to 1 here for nodes providing multiple
+ performance domains.
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - "#performance-domain-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a55";
+ enable-method = "psci";
+ performance-domains = <&performance 0>;
+ reg = <0x000>;
+ };
+ };
+
+ /* ... */
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ performance: performance-controller@11bc00 {
+ compatible = "mediatek,cpufreq-hw";
+ reg = <0 0x0011bc10 0 0x120>, <0 0x0011bd30 0 0x120>;
+
+ #performance-domain-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt
deleted file mode 100644
index 0551c78619de..000000000000
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt
+++ /dev/null
@@ -1,243 +0,0 @@
-Binding for MediaTek's CPUFreq driver
-=====================================
-
-Required properties:
-- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock names.
-- clock-names: Should contain the following:
- "cpu" - The multiplexer for clock input of CPU cluster.
- "intermediate" - A parent of "cpu" clock which is used as "intermediate" clock
- source (usually MAINPLL) when the original CPU PLL is under
- transition and not stable yet.
- Please refer to Documentation/devicetree/bindings/clock/clock-bindings.txt for
- generic clock consumer properties.
-- operating-points-v2: Please refer to Documentation/devicetree/bindings/opp/opp.txt
- for detail.
-- proc-supply: Regulator for Vproc of CPU cluster.
-
-Optional properties:
-- sram-supply: Regulator for Vsram of CPU cluster. When present, the cpufreq driver
- needs to do "voltage tracking" to step by step scale up/down Vproc and
- Vsram to fit SoC specific needs. When absent, the voltage scaling
- flow is handled by hardware, hence no software "voltage tracking" is
- needed.
-- #cooling-cells:
- Please refer to Documentation/devicetree/bindings/thermal/thermal.txt
- for detail.
-
-Example 1 (MT7623 SoC):
-
- cpu_opp_table: opp_table {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-598000000 {
- opp-hz = /bits/ 64 <598000000>;
- opp-microvolt = <1050000>;
- };
-
- opp-747500000 {
- opp-hz = /bits/ 64 <747500000>;
- opp-microvolt = <1050000>;
- };
-
- opp-1040000000 {
- opp-hz = /bits/ 64 <1040000000>;
- opp-microvolt = <1150000>;
- };
-
- opp-1196000000 {
- opp-hz = /bits/ 64 <1196000000>;
- opp-microvolt = <1200000>;
- };
-
- opp-1300000000 {
- opp-hz = /bits/ 64 <1300000000>;
- opp-microvolt = <1300000>;
- };
- };
-
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x0>;
- clocks = <&infracfg CLK_INFRA_CPUSEL>,
- <&apmixedsys CLK_APMIXED_MAINPLL>;
- clock-names = "cpu", "intermediate";
- operating-points-v2 = <&cpu_opp_table>;
- #cooling-cells = <2>;
- };
- cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x1>;
- operating-points-v2 = <&cpu_opp_table>;
- };
- cpu@2 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x2>;
- operating-points-v2 = <&cpu_opp_table>;
- };
- cpu@3 {
- device_type = "cpu";
- compatible = "arm,cortex-a7";
- reg = <0x3>;
- operating-points-v2 = <&cpu_opp_table>;
- };
-
-Example 2 (MT8173 SoC):
- cpu_opp_table_a: opp_table_a {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-507000000 {
- opp-hz = /bits/ 64 <507000000>;
- opp-microvolt = <859000>;
- };
-
- opp-702000000 {
- opp-hz = /bits/ 64 <702000000>;
- opp-microvolt = <908000>;
- };
-
- opp-1001000000 {
- opp-hz = /bits/ 64 <1001000000>;
- opp-microvolt = <983000>;
- };
-
- opp-1105000000 {
- opp-hz = /bits/ 64 <1105000000>;
- opp-microvolt = <1009000>;
- };
-
- opp-1183000000 {
- opp-hz = /bits/ 64 <1183000000>;
- opp-microvolt = <1028000>;
- };
-
- opp-1404000000 {
- opp-hz = /bits/ 64 <1404000000>;
- opp-microvolt = <1083000>;
- };
-
- opp-1508000000 {
- opp-hz = /bits/ 64 <1508000000>;
- opp-microvolt = <1109000>;
- };
-
- opp-1573000000 {
- opp-hz = /bits/ 64 <1573000000>;
- opp-microvolt = <1125000>;
- };
- };
-
- cpu_opp_table_b: opp_table_b {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-507000000 {
- opp-hz = /bits/ 64 <507000000>;
- opp-microvolt = <828000>;
- };
-
- opp-702000000 {
- opp-hz = /bits/ 64 <702000000>;
- opp-microvolt = <867000>;
- };
-
- opp-1001000000 {
- opp-hz = /bits/ 64 <1001000000>;
- opp-microvolt = <927000>;
- };
-
- opp-1209000000 {
- opp-hz = /bits/ 64 <1209000000>;
- opp-microvolt = <968000>;
- };
-
- opp-1404000000 {
- opp-hz = /bits/ 64 <1007000000>;
- opp-microvolt = <1028000>;
- };
-
- opp-1612000000 {
- opp-hz = /bits/ 64 <1612000000>;
- opp-microvolt = <1049000>;
- };
-
- opp-1807000000 {
- opp-hz = /bits/ 64 <1807000000>;
- opp-microvolt = <1089000>;
- };
-
- opp-1989000000 {
- opp-hz = /bits/ 64 <1989000000>;
- opp-microvolt = <1125000>;
- };
- };
-
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x000>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_SLEEP_0>;
- clocks = <&infracfg CLK_INFRA_CA53SEL>,
- <&apmixedsys CLK_APMIXED_MAINPLL>;
- clock-names = "cpu", "intermediate";
- operating-points-v2 = <&cpu_opp_table_a>;
- };
-
- cpu1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a53";
- reg = <0x001>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_SLEEP_0>;
- clocks = <&infracfg CLK_INFRA_CA53SEL>,
- <&apmixedsys CLK_APMIXED_MAINPLL>;
- clock-names = "cpu", "intermediate";
- operating-points-v2 = <&cpu_opp_table_a>;
- };
-
- cpu2: cpu@100 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x100>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_SLEEP_0>;
- clocks = <&infracfg CLK_INFRA_CA57SEL>,
- <&apmixedsys CLK_APMIXED_MAINPLL>;
- clock-names = "cpu", "intermediate";
- operating-points-v2 = <&cpu_opp_table_b>;
- };
-
- cpu3: cpu@101 {
- device_type = "cpu";
- compatible = "arm,cortex-a57";
- reg = <0x101>;
- enable-method = "psci";
- cpu-idle-states = <&CPU_SLEEP_0>;
- clocks = <&infracfg CLK_INFRA_CA57SEL>,
- <&apmixedsys CLK_APMIXED_MAINPLL>;
- clock-names = "cpu", "intermediate";
- operating-points-v2 = <&cpu_opp_table_b>;
- };
-
- &cpu0 {
- proc-supply = <&mt6397_vpca15_reg>;
- };
-
- &cpu1 {
- proc-supply = <&mt6397_vpca15_reg>;
- };
-
- &cpu2 {
- proc-supply = <&da9211_vcpu_reg>;
- sram-supply = <&mt6397_vsramca7_reg>;
- };
-
- &cpu3 {
- proc-supply = <&da9211_vcpu_reg>;
- sram-supply = <&mt6397_vsramca7_reg>;
- };
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
new file mode 100644
index 000000000000..2d42fc3d8ef8
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
@@ -0,0 +1,406 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/cpufreq-qcom-hw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. CPUFREQ
+
+maintainers:
+ - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+
+ CPUFREQ HW is a hardware engine used by some Qualcomm Technologies, Inc. (QTI)
+ SoCs to manage frequency in hardware. It is capable of controlling frequency
+ for multiple clusters.
+
+properties:
+ compatible:
+ oneOf:
+ - description: v1 of CPUFREQ HW
+ items:
+ - enum:
+ - qcom,qcm2290-cpufreq-hw
+ - qcom,qcs615-cpufreq-hw
+ - qcom,sc7180-cpufreq-hw
+ - qcom,sc8180x-cpufreq-hw
+ - qcom,sdm670-cpufreq-hw
+ - qcom,sdm845-cpufreq-hw
+ - qcom,sm6115-cpufreq-hw
+ - qcom,sm6350-cpufreq-hw
+ - qcom,sm8150-cpufreq-hw
+ - const: qcom,cpufreq-hw
+
+ - description: v2 of CPUFREQ HW (EPSS)
+ items:
+ - enum:
+ - qcom,qcs8300-cpufreq-epss
+ - qcom,qdu1000-cpufreq-epss
+ - qcom,sa8255p-cpufreq-epss
+ - qcom,sa8775p-cpufreq-epss
+ - qcom,sar2130p-cpufreq-epss
+ - qcom,sc7280-cpufreq-epss
+ - qcom,sc8280xp-cpufreq-epss
+ - qcom,sdx75-cpufreq-epss
+ - qcom,sm4450-cpufreq-epss
+ - qcom,sm6375-cpufreq-epss
+ - qcom,sm8250-cpufreq-epss
+ - qcom,sm8350-cpufreq-epss
+ - qcom,sm8450-cpufreq-epss
+ - qcom,sm8550-cpufreq-epss
+ - qcom,sm8650-cpufreq-epss
+ - const: qcom,cpufreq-epss
+
+ reg:
+ minItems: 1
+ items:
+ - description: Frequency domain 0 register region
+ - description: Frequency domain 1 register region
+ - description: Frequency domain 2 register region
+ - description: Frequency domain 3 register region
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: freq-domain0
+ - const: freq-domain1
+ - const: freq-domain2
+ - const: freq-domain3
+
+ clocks:
+ items:
+ - description: XO Clock
+ - description: GPLL0 Clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: alternate
+
+ interrupts:
+ minItems: 1
+ maxItems: 4
+
+ interrupt-names:
+ minItems: 1
+ items:
+ - const: dcvsh-irq-0
+ - const: dcvsh-irq-1
+ - const: dcvsh-irq-2
+ - const: dcvsh-irq-3
+
+ '#freq-domain-cells':
+ const: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - '#freq-domain-cells'
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcm2290-cpufreq-hw
+ - qcom,sar2130p-cpufreq-epss
+ - qcom,sdx75-cpufreq-epss
+ then:
+ properties:
+ reg:
+ maxItems: 1
+
+ reg-names:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcs615-cpufreq-hw
+ - qcom,qdu1000-cpufreq-epss
+ - qcom,sa8255p-cpufreq-epss
+ - qcom,sa8775p-cpufreq-epss
+ - qcom,sc7180-cpufreq-hw
+ - qcom,sc8180x-cpufreq-hw
+ - qcom,sc8280xp-cpufreq-epss
+ - qcom,sdm670-cpufreq-hw
+ - qcom,sdm845-cpufreq-hw
+ - qcom,sm4450-cpufreq-epss
+ - qcom,sm6115-cpufreq-hw
+ - qcom,sm6350-cpufreq-hw
+ - qcom,sm6375-cpufreq-epss
+ then:
+ properties:
+ reg:
+ minItems: 2
+ maxItems: 2
+
+ reg-names:
+ minItems: 2
+ maxItems: 2
+
+ interrupts:
+ minItems: 2
+ maxItems: 2
+
+ interrupt-names:
+ minItems: 2
+ maxItems: 2
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcs8300-cpufreq-epss
+ - qcom,sc7280-cpufreq-epss
+ - qcom,sm8250-cpufreq-epss
+ - qcom,sm8350-cpufreq-epss
+ - qcom,sm8450-cpufreq-epss
+ - qcom,sm8550-cpufreq-epss
+ then:
+ properties:
+ reg:
+ minItems: 3
+ maxItems: 3
+
+ reg-names:
+ minItems: 3
+ maxItems: 3
+
+ interrupts:
+ minItems: 3
+ maxItems: 3
+
+ interrupt-names:
+ minItems: 3
+ maxItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8150-cpufreq-hw
+ then:
+ properties:
+ reg:
+ minItems: 3
+ maxItems: 3
+
+ reg-names:
+ minItems: 3
+ maxItems: 3
+
+ # On some SoCs the Prime core shares the LMH irq with Big cores
+ interrupts:
+ minItems: 2
+ maxItems: 2
+
+ interrupt-names:
+ minItems: 2
+ maxItems: 2
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8650-cpufreq-epss
+ then:
+ properties:
+ reg:
+ minItems: 4
+ maxItems: 4
+
+ reg-names:
+ minItems: 4
+ maxItems: 4
+
+ interrupts:
+ minItems: 4
+ maxItems: 4
+
+ interrupt-names:
+ minItems: 4
+ maxItems: 4
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ // Example 1: Dual-cluster, Quad-core per cluster. CPUs within a cluster
+ // switch DCVS state together.
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ next-level-cache = <&L2_0>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ clocks = <&cpufreq_hw 0>;
+ L2_0: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ L3_0: l3-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <3>;
+ };
+ };
+ };
+
+ CPU1: cpu@100 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ next-level-cache = <&L2_100>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ clocks = <&cpufreq_hw 0>;
+ L2_100: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU2: cpu@200 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x200>;
+ enable-method = "psci";
+ next-level-cache = <&L2_200>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ clocks = <&cpufreq_hw 0>;
+ L2_200: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU3: cpu@300 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x300>;
+ enable-method = "psci";
+ next-level-cache = <&L2_300>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ clocks = <&cpufreq_hw 0>;
+ L2_300: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU4: cpu@400 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x400>;
+ enable-method = "psci";
+ next-level-cache = <&L2_400>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ clocks = <&cpufreq_hw 1>;
+ L2_400: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU5: cpu@500 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x500>;
+ enable-method = "psci";
+ next-level-cache = <&L2_500>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ clocks = <&cpufreq_hw 1>;
+ L2_500: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU6: cpu@600 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x600>;
+ enable-method = "psci";
+ next-level-cache = <&L2_600>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ clocks = <&cpufreq_hw 1>;
+ L2_600: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+
+ CPU7: cpu@700 {
+ device_type = "cpu";
+ compatible = "qcom,kryo385";
+ reg = <0x0 0x700>;
+ enable-method = "psci";
+ next-level-cache = <&L2_700>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ clocks = <&cpufreq_hw 1>;
+ L2_700: l2-cache {
+ compatible = "cache";
+ cache-unified;
+ cache-level = <2>;
+ next-level-cache = <&L3_0>;
+ };
+ };
+ };
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpufreq@17d43000 {
+ compatible = "qcom,sdm845-cpufreq-hw", "qcom,cpufreq-hw";
+ reg = <0x17d43000 0x1400>, <0x17d45800 0x1400>;
+ reg-names = "freq-domain0", "freq-domain1";
+
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
+ clock-names = "xo", "alternate";
+
+ #freq-domain-cells = <1>;
+ #clock-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
index d91a02a3b6b0..6b0b452acef0 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
@@ -6,8 +6,6 @@ from the SoC, then supplies the OPP framework with 'prop' and 'supported
hardware' information respectively. The framework is then able to read
the DT and operate in the usual way.
-For more information about the expected DT format [See: ../opp/opp.txt].
-
Frequency Scaling only
----------------------
@@ -15,7 +13,7 @@ No vendor specific driver required for this.
Located in CPU's node:
-- operating-points : [See: ../power/opp.txt]
+- operating-points : [See: ../power/opp-v1.yaml]
Example [safe]
--------------
@@ -37,7 +35,7 @@ This requires the ST CPUFreq driver to supply 'process' and 'version' info.
Located in CPU's node:
-- operating-points-v2 : [See ../power/opp.txt]
+- operating-points-v2 : [See ../power/opp-v2.yaml]
Example [unsafe]
----------------
diff --git a/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt
new file mode 100644
index 000000000000..87bff5add3f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt
@@ -0,0 +1,37 @@
+i.MX CPUFreq-DT OPP bindings
+================================
+
+Certain i.MX SoCs support different OPPs depending on the "market segment" and
+"speed grading" value which are written in fuses. These bits are combined with
+the opp-supported-hw values for each OPP to check if the OPP is allowed.
+
+Required properties:
+--------------------
+
+For each opp entry in 'operating-points-v2' table:
+- opp-supported-hw: Two bitmaps indicating:
+ - Supported speed grade mask
+ - Supported market segment mask
+ 0: Consumer
+ 1: Extended Consumer
+ 2: Industrial
+ 3: Automotive
+
+Example:
+--------
+
+opp_table {
+ compatible = "operating-points-v2";
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ /* grade >= 0, consumer only */
+ opp-supported-hw = <0xf>, <0x3>;
+ };
+
+ opp-1300000000 {
+ opp-hz = /bits/ 64 <1300000000>;
+ opp-microvolt = <1000000>;
+ /* grade >= 1, all segments */
+ opp-supported-hw = <0xe>, <0x7>;
+ };
+}
diff --git a/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml b/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml
new file mode 100644
index 000000000000..5f3c7db3f3aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/mediatek,mt8196-cpufreq-hw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Hybrid CPUFreq for MT8196/MT6991 series SoCs
+
+maintainers:
+ - Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+
+description:
+ MT8196 uses CPUFreq management hardware that supports dynamic voltage
+ frequency scaling (dvfs), and can support several performance domains.
+
+properties:
+ compatible:
+ const: mediatek,mt8196-cpufreq-hw
+
+ reg:
+ items:
+ - description: FDVFS control register region
+ - description: OPP tables and control for performance domain 0
+ - description: OPP tables and control for performance domain 1
+ - description: OPP tables and control for performance domain 2
+
+ "#performance-domain-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - "#performance-domain-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a720";
+ enable-method = "psci";
+ performance-domains = <&performance 0>;
+ reg = <0x000>;
+ };
+
+ /* ... */
+
+ cpu6: cpu@600 {
+ device_type = "cpu";
+ compatible = "arm,cortex-x4";
+ enable-method = "psci";
+ performance-domains = <&performance 1>;
+ reg = <0x600>;
+ };
+
+ cpu7: cpu@700 {
+ device_type = "cpu";
+ compatible = "arm,cortex-x925";
+ enable-method = "psci";
+ performance-domains = <&performance 2>;
+ reg = <0x700>;
+ };
+ };
+
+ /* ... */
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ performance: performance-controller@c2c2034 {
+ compatible = "mediatek,mt8196-cpufreq-hw";
+ reg = <0 0xc220400 0 0x20>, <0 0xc2c0f20 0 0x120>,
+ <0 0xc2c1040 0 0x120>, <0 0xc2c1160 0 0x120>;
+ #performance-domain-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra124-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra124-cpufreq.txt
index b1669fbfb740..03196d5ea515 100644
--- a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra124-cpufreq.txt
+++ b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra124-cpufreq.txt
@@ -9,11 +9,9 @@ Required properties:
See ../clocks/clock-bindings.txt for details.
- clock-names: Must include the following entries:
- cpu_g: Clock mux for the fast CPU cluster.
- - cpu_lp: Clock mux for the low-power CPU cluster.
- pll_x: Fast PLL clocksource.
- pll_p: Auxiliary PLL used during fast PLL rate changes.
- dfll: Fast DFLL clocksource that also automatically scales CPU voltage.
-- vdd-cpu-supply: Regulator for CPU voltage
Optional properties:
- clock-latency: Specify the possible maximum transition latency for clock,
@@ -31,13 +29,11 @@ cpus {
reg = <0>;
clocks = <&tegra_car TEGRA124_CLK_CCLK_G>,
- <&tegra_car TEGRA124_CLK_CCLK_LP>,
<&tegra_car TEGRA124_CLK_PLL_X>,
<&tegra_car TEGRA124_CLK_PLL_P>,
<&dfll>;
- clock-names = "cpu_g", "cpu_lp", "pll_x", "pll_p", "dfll";
+ clock-names = "cpu_g", "pll_x", "pll_p", "dfll";
clock-latency = <300000>;
- vdd-cpu-supply: <&vdd_cpu>;
};
<...>
diff --git a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt
new file mode 100644
index 000000000000..bdbfd7c36101
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt
@@ -0,0 +1,56 @@
+Binding for NVIDIA Tegra20 CPUFreq
+==================================
+
+Required properties:
+- clocks: Must contain an entry for the CPU clock.
+ See ../clocks/clock-bindings.txt for details.
+- operating-points-v2: See ../bindings/opp/opp-v2.yaml for details.
+- #cooling-cells: Should be 2. See ../thermal/thermal-cooling-devices.yaml for details.
+
+For each opp entry in 'operating-points-v2' table:
+- opp-supported-hw: Two bitfields indicating:
+ On Tegra20:
+ 1. CPU process ID mask
+ 2. SoC speedo ID mask
+
+ On Tegra30:
+ 1. CPU process ID mask
+ 2. CPU speedo ID mask
+
+ A bitwise AND is performed against these values and if any bit
+ matches, the OPP gets enabled.
+
+- opp-microvolt: CPU voltage triplet.
+
+Optional properties:
+- cpu-supply: Phandle to the CPU power supply.
+
+Example:
+ regulators {
+ cpu_reg: regulator0 {
+ regulator-name = "vdd_cpu";
+ };
+ };
+
+ cpu0_opp_table: opp_table0 {
+ compatible = "operating-points-v2";
+
+ opp@456000000 {
+ clock-latency-ns = <125000>;
+ opp-microvolt = <825000 825000 1125000>;
+ opp-supported-hw = <0x03 0x0001>;
+ opp-hz = /bits/ 64 <456000000>;
+ };
+
+ ...
+ };
+
+ cpus {
+ cpu@0 {
+ compatible = "arm,cortex-a9";
+ clocks = <&tegra_car TEGRA20_CLK_CCLK>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ cpu-supply = <&cpu_reg>;
+ #cooling-cells = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
new file mode 100644
index 000000000000..547265b8b118
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/qcom-cpufreq-nvmem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. NVMEM CPUFreq
+
+maintainers:
+ - Ilia Lin <ilia.lin@kernel.org>
+
+description: |
+ In certain Qualcomm Technologies, Inc. SoCs such as QCS404, The CPU supply
+ voltage is dynamically configured by Core Power Reduction (CPR) depending on
+ current CPU frequency and efuse values.
+ CPR provides a power domain with multiple levels that are selected depending
+ on the CPU OPP in use. The CPUFreq driver sets the CPR power domain level
+ according to the required OPPs defined in the CPU OPP tables.
+
+ For old implementation efuses are parsed to select the correct opp table and
+ voltage and CPR is not supported/used.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,apq8064
+ - qcom,apq8096
+ - qcom,ipq5332
+ - qcom,ipq6018
+ - qcom,ipq8064
+ - qcom,ipq8074
+ - qcom,ipq9574
+ - qcom,msm8909
+ - qcom,msm8939
+ - qcom,msm8960
+ - qcom,msm8974
+ - qcom,msm8996
+ - qcom,qcs404
+ required:
+ - compatible
+
+patternProperties:
+ '^opp-table(-[a-z0-9]+)?$':
+ allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - operating-points-v2-krait-cpu
+ - operating-points-v2-kryo-cpu
+ then:
+ $ref: /schemas/opp/opp-v2-kryo-cpu.yaml#
+
+ - if:
+ properties:
+ compatible:
+ const: operating-points-v2-qcom-level
+ then:
+ $ref: /schemas/opp/opp-v2-qcom-level.yaml#
+
+ unevaluatedProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcs404
+
+ then:
+ properties:
+ cpus:
+ type: object
+
+ patternProperties:
+ '^cpu@[0-9a-f]+$':
+ type: object
+
+ properties:
+ power-domains:
+ maxItems: 1
+
+ power-domain-names:
+ items:
+ - const: cpr
+
+ required:
+ - power-domains
+ - power-domain-names
+
+ patternProperties:
+ '^opp-table(-[a-z0-9]+)?$':
+ if:
+ properties:
+ compatible:
+ const: operating-points-v2-kryo-cpu
+ then:
+ patternProperties:
+ '^opp-?[0-9]+$':
+ required:
+ - required-opps
+
+additionalProperties: true
+
+examples:
+ - |
+ / {
+ model = "Qualcomm Technologies, Inc. QCS404 EVB 1000";
+ compatible = "qcom,qcs404-evb-1000", "qcom,qcs404-evb", "qcom,qcs404";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ CPU0: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ next-level-cache = <&L2_0>;
+ #cooling-cells = <2>;
+ clocks = <&apcs_glb>;
+ operating-points-v2 = <&cpu_opp_table>;
+ power-domains = <&cpr>;
+ power-domain-names = "cpr";
+ };
+
+ CPU1: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ next-level-cache = <&L2_0>;
+ #cooling-cells = <2>;
+ clocks = <&apcs_glb>;
+ operating-points-v2 = <&cpu_opp_table>;
+ power-domains = <&cpr>;
+ power-domain-names = "cpr";
+ };
+
+ CPU2: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x102>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ next-level-cache = <&L2_0>;
+ #cooling-cells = <2>;
+ clocks = <&apcs_glb>;
+ operating-points-v2 = <&cpu_opp_table>;
+ power-domains = <&cpr>;
+ power-domain-names = "cpr";
+ };
+
+ CPU3: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x103>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_SLEEP_0>;
+ next-level-cache = <&L2_0>;
+ #cooling-cells = <2>;
+ clocks = <&apcs_glb>;
+ operating-points-v2 = <&cpu_opp_table>;
+ power-domains = <&cpr>;
+ power-domain-names = "cpr";
+ };
+ };
+
+ cpu_opp_table: opp-table-cpu {
+ compatible = "operating-points-v2-kryo-cpu";
+ opp-shared;
+
+ opp-1094400000 {
+ opp-hz = /bits/ 64 <1094400000>;
+ required-opps = <&cpr_opp1>;
+ };
+ opp-1248000000 {
+ opp-hz = /bits/ 64 <1248000000>;
+ required-opps = <&cpr_opp2>;
+ };
+ opp-1401600000 {
+ opp-hz = /bits/ 64 <1401600000>;
+ required-opps = <&cpr_opp3>;
+ };
+ };
+
+ cpr_opp_table: opp-table-cpr {
+ compatible = "operating-points-v2-qcom-level";
+
+ cpr_opp1: opp1 {
+ opp-level = <1>;
+ qcom,opp-fuse-level = <1>;
+ };
+ cpr_opp2: opp2 {
+ opp-level = <2>;
+ qcom,opp-fuse-level = <2>;
+ };
+ cpr_opp3: opp3 {
+ opp-level = <3>;
+ qcom,opp-fuse-level = <3>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml
new file mode 100644
index 000000000000..018d98bcdc82
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/qemu,virtual-cpufreq.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtual CPUFreq
+
+maintainers:
+ - David Dai <davidai@google.com>
+ - Saravana Kannan <saravanak@google.com>
+
+description:
+ Virtual CPUFreq is a virtualized driver in guest kernels that sends performance
+ selection of its vCPUs as a hint to the host through MMIO regions. Each vCPU
+ is associated with a performance domain which can be shared with other vCPUs.
+ Each performance domain has its own set of registers for performance controls.
+
+properties:
+ compatible:
+ const: qemu,virtual-cpufreq
+
+ reg:
+ maxItems: 1
+ description:
+ Address and size of region containing performance controls for each of the
+ performance domains. Regions for each performance domain is placed
+ contiguously and contain registers for controlling DVFS(Dynamic Frequency
+ and Voltage) characteristics. The size of the region is proportional to
+ total number of performance domains.
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpufreq@1040000 {
+ compatible = "qemu,virtual-cpufreq";
+ reg = <0x1040000 0x2000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
deleted file mode 100644
index 0c38e4b8fc51..000000000000
--- a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-TI CPUFreq and OPP bindings
-================================
-
-Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx
-families support different OPPs depending on the silicon variant in use.
-The ti-cpufreq driver can use revision and an efuse value from the SoC to
-provide the OPP framework with supported hardware information. This is
-used to determine which OPPs from the operating-points-v2 table get enabled
-when it is parsed by the OPP framework.
-
-Required properties:
---------------------
-In 'cpus' nodes:
-- operating-points-v2: Phandle to the operating-points-v2 table to use.
-
-In 'operating-points-v2' table:
-- compatible: Should be
- - 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx SoCs
-- syscon: A phandle pointing to a syscon node representing the control module
- register space of the SoC.
-
-Optional properties:
---------------------
-For each opp entry in 'operating-points-v2' table:
-- opp-supported-hw: Two bitfields indicating:
- 1. Which revision of the SoC the OPP is supported by
- 2. Which eFuse bits indicate this OPP is available
-
- A bitwise AND is performed against these values and if any bit
- matches, the OPP gets enabled.
-
-Example:
---------
-
-/* From arch/arm/boot/dts/am33xx.dtsi */
-cpus {
- #address-cells = <1>;
- #size-cells = <0>;
- cpu@0 {
- compatible = "arm,cortex-a8";
- device_type = "cpu";
- reg = <0>;
-
- operating-points-v2 = <&cpu0_opp_table>;
-
- clocks = <&dpll_mpu_ck>;
- clock-names = "cpu";
-
- clock-latency = <300000>; /* From omap-cpufreq driver */
- };
-};
-
-/*
- * cpu0 has different OPPs depending on SoC revision and some on revisions
- * 0x2 and 0x4 have eFuse bits that indicate if they are available or not
- */
-cpu0_opp_table: opp-table {
- compatible = "operating-points-v2-ti-cpu";
- syscon = <&scm_conf>;
-
- /*
- * The three following nodes are marked with opp-suspend
- * because they can not be enabled simultaneously on a
- * single SoC.
- */
- opp50-300000000 {
- opp-hz = /bits/ 64 <300000000>;
- opp-microvolt = <950000 931000 969000>;
- opp-supported-hw = <0x06 0x0010>;
- opp-suspend;
- };
-
- opp100-275000000 {
- opp-hz = /bits/ 64 <275000000>;
- opp-microvolt = <1100000 1078000 1122000>;
- opp-supported-hw = <0x01 0x00FF>;
- opp-suspend;
- };
-
- opp100-300000000 {
- opp-hz = /bits/ 64 <300000000>;
- opp-microvolt = <1100000 1078000 1122000>;
- opp-supported-hw = <0x06 0x0020>;
- opp-suspend;
- };
-
- opp100-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <1100000 1078000 1122000>;
- opp-supported-hw = <0x01 0xFFFF>;
- };
-
- opp100-600000000 {
- opp-hz = /bits/ 64 <600000000>;
- opp-microvolt = <1100000 1078000 1122000>;
- opp-supported-hw = <0x06 0x0040>;
- };
-
- opp120-600000000 {
- opp-hz = /bits/ 64 <600000000>;
- opp-microvolt = <1200000 1176000 1224000>;
- opp-supported-hw = <0x01 0xFFFF>;
- };
-
- opp120-720000000 {
- opp-hz = /bits/ 64 <720000000>;
- opp-microvolt = <1200000 1176000 1224000>;
- opp-supported-hw = <0x06 0x0080>;
- };
-
- oppturbo-720000000 {
- opp-hz = /bits/ 64 <720000000>;
- opp-microvolt = <1260000 1234800 1285200>;
- opp-supported-hw = <0x01 0xFFFF>;
- };
-
- oppturbo-800000000 {
- opp-hz = /bits/ 64 <800000000>;
- opp-microvolt = <1260000 1234800 1285200>;
- opp-supported-hw = <0x06 0x0100>;
- };
-
- oppnitro-1000000000 {
- opp-hz = /bits/ 64 <1000000000>;
- opp-microvolt = <1325000 1298500 1351500>;
- opp-supported-hw = <0x04 0x0200>;
- };
-};
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
new file mode 100644
index 000000000000..0401c11da8d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/allwinner,sun4i-a10-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Security System
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: allwinner,sun4i-a10-crypto
+ - items:
+ - const: allwinner,sun5i-a13-crypto
+ - const: allwinner,sun4i-a10-crypto
+ - items:
+ - const: allwinner,sun6i-a31-crypto
+ - const: allwinner,sun4i-a10-crypto
+ - items:
+ - const: allwinner,sun7i-a20-crypto
+ - const: allwinner,sun4i-a10-crypto
+ - const: allwinner,sun8i-a33-crypto
+ - items:
+ - const: allwinner,sun8i-v3s-crypto
+ - const: allwinner,sun8i-a33-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: mod
+
+ dmas:
+ items:
+ - description: RX DMA Channel
+ - description: TX DMA Channel
+
+ dma-names:
+ items:
+ - const: rx
+ - const: tx
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: ahb
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-crypto
+ - allwinner,sun8i-a33-crypto
+
+then:
+ required:
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto: crypto-engine@1c15000 {
+ compatible = "allwinner,sun4i-a10-crypto";
+ reg = <0x01c15000 0x1000>;
+ interrupts = <86>;
+ clocks = <&ahb_gates 5>, <&ss_clk>;
+ clock-names = "ahb", "mod";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml
new file mode 100644
index 000000000000..da47b601c165
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/allwinner,sun8i-ce.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Crypto Engine driver
+
+maintainers:
+ - Corentin Labbe <clabbe.montjoie@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun8i-h3-crypto
+ - allwinner,sun8i-r40-crypto
+ - allwinner,sun20i-d1-crypto
+ - allwinner,sun50i-a64-crypto
+ - allwinner,sun50i-h5-crypto
+ - allwinner,sun50i-h6-crypto
+ - allwinner,sun50i-h616-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus clock
+ - description: Module clock
+ - description: MBus clock
+ - description: TRNG clock (RC oscillator)
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bus
+ - const: mod
+ - const: ram
+ - const: trng
+ minItems: 2
+
+ resets:
+ maxItems: 1
+
+if:
+ properties:
+ compatible:
+ enum:
+ - allwinner,sun20i-d1-crypto
+ - allwinner,sun50i-h616-crypto
+then:
+ properties:
+ clocks:
+ minItems: 4
+ clock-names:
+ minItems: 4
+else:
+ if:
+ properties:
+ compatible:
+ const: allwinner,sun50i-h6-crypto
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ minItems: 3
+ maxItems: 3
+ else:
+ properties:
+ clocks:
+ maxItems: 2
+ clock-names:
+ maxItems: 2
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/sun50i-a64-ccu.h>
+ #include <dt-bindings/reset/sun50i-a64-ccu.h>
+
+ crypto: crypto@1c15000 {
+ compatible = "allwinner,sun8i-h3-crypto";
+ reg = <0x01c15000 0x1000>;
+ interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_CE>, <&ccu CLK_CE>;
+ clock-names = "bus", "mod";
+ resets = <&ccu RST_BUS_CE>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml
new file mode 100644
index 000000000000..8a29d36edf26
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ss.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/allwinner,sun8i-ss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Security System v2 driver
+
+maintainers:
+ - Corentin Labbe <corentin.labbe@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun8i-a83t-crypto
+ - allwinner,sun9i-a80-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus clock
+ - description: Module clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: mod
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/sun8i-a83t-ccu.h>
+ #include <dt-bindings/reset/sun8i-a83t-ccu.h>
+
+ crypto: crypto@1c15000 {
+ compatible = "allwinner,sun8i-a83t-crypto";
+ reg = <0x01c15000 0x1000>;
+ interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ resets = <&ccu RST_BUS_SS>;
+ clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
+ clock-names = "bus", "mod";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml b/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml
new file mode 100644
index 000000000000..32bf3a1c3b42
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/amd,ccp-seattle-v1a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMD Cryptographic Coprocessor (ccp)
+
+maintainers:
+ - Tom Lendacky <thomas.lendacky@amd.com>
+
+properties:
+ compatible:
+ const: amd,ccp-seattle-v1a
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@e0100000 {
+ compatible = "amd,ccp-seattle-v1a";
+ reg = <0xe0100000 0x10000>;
+ interrupts = <0 3 4>;
+ dma-coherent;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/amd-ccp.txt b/Documentation/devicetree/bindings/crypto/amd-ccp.txt
deleted file mode 100644
index d87579d63da6..000000000000
--- a/Documentation/devicetree/bindings/crypto/amd-ccp.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* AMD Cryptographic Coprocessor driver (ccp)
-
-Required properties:
-- compatible: Should be "amd,ccp-seattle-v1a"
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the CCP interrupt
-
-Optional properties:
-- dma-coherent: Present if dma operations are coherent
-
-Example:
- ccp@e0100000 {
- compatible = "amd,ccp-seattle-v1a";
- reg = <0 0xe0100000 0 0x10000>;
- interrupt-parent = <&gic>;
- interrupts = <0 3 4>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml b/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml
new file mode 100644
index 000000000000..948e11ebe4ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/amlogic,gxl-crypto.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/amlogic,gxl-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic GXL Cryptographic Offloader
+
+maintainers:
+ - Corentin Labbe <clabbe@baylibre.com>
+
+properties:
+ compatible:
+ items:
+ - const: amlogic,gxl-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Interrupt for flow 0
+ - description: Interrupt for flow 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: blkmv
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/gxbb-clkc.h>
+
+ crypto: crypto-engine@c883e000 {
+ compatible = "amlogic,gxl-crypto";
+ reg = <0xc883e000 0x36>;
+ interrupts = <GIC_SPI 188 IRQ_TYPE_EDGE_RISING>, <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&clkc CLKID_BLKMV>;
+ clock-names = "blkmv";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/arm,cryptocell.yaml b/Documentation/devicetree/bindings/crypto/arm,cryptocell.yaml
new file mode 100644
index 000000000000..9c97874a6dbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/arm,cryptocell.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/arm,cryptocell.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm TrustZone CryptoCell cryptographic engine
+
+maintainers:
+ - Gilad Ben-Yossef <gilad@benyossef.com>
+
+properties:
+ compatible:
+ enum:
+ - arm,cryptocell-713-ree
+ - arm,cryptocell-703-ree
+ - arm,cryptocell-712-ree
+ - arm,cryptocell-710-ree
+ - arm,cryptocell-630p-ree
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ arm_cc712: crypto@80000000 {
+ compatible = "arm,cryptocell-712-ree";
+ reg = <0x80000000 0x10000>;
+ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
deleted file mode 100644
index 999fb2a810f6..000000000000
--- a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Arm TrustZone CryptoCell cryptographic engine
-
-Required properties:
-- compatible: Should be one of: "arm,cryptocell-712-ree",
- "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree".
-- reg: Base physical address of the engine and length of memory mapped region.
-- interrupts: Interrupt number for the device.
-
-Optional properties:
-- clocks: Reference to the crypto engine clock.
-- dma-coherent: Present if dma operations are coherent.
-
-Examples:
-
- arm_cc712: crypto@80000000 {
- compatible = "arm,cryptocell-712-ree";
- interrupt-parent = <&intc>;
- interrupts = < 0 30 4 >;
- reg = < 0x80000000 0x10000 >;
-
- };
diff --git a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
deleted file mode 100644
index d9cca4875bd6..000000000000
--- a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Axis crypto engine with PDMA interface.
-
-Required properties:
-- compatible : Should be one of the following strings:
- "axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
- "axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
-- reg: Base address and size for the PDMA register area.
-- interrupts: Interrupt handle for the PDMA interrupt line.
-
-Example:
-
-crypto@f4264000 {
- compatible = "axis,artpec6-crypto";
- reg = <0xf4264000 0x1000>;
- interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml b/Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml
new file mode 100644
index 000000000000..a772d232de09
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/aspeed,ast2500-hace.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED HACE hash and crypto Hardware Accelerator Engines
+
+maintainers:
+ - Neal Liu <neal_liu@aspeedtech.com>
+
+description: |
+ The Hash and Crypto Engine (HACE) is designed to accelerate the throughput
+ of hash data digest, encryption, and decryption. Basically, HACE can be
+ divided into two independently engines - Hash Engine and Crypto Engine.
+
+properties:
+ compatible:
+ enum:
+ - aspeed,ast2500-hace
+ - aspeed,ast2600-hace
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ast2600-clock.h>
+ hace: crypto@1e6d0000 {
+ compatible = "aspeed,ast2600-hace";
+ reg = <0x1e6d0000 0x200>;
+ interrupts = <4>;
+ clocks = <&syscon ASPEED_CLK_GATE_YCLK>;
+ resets = <&syscon ASPEED_RESET_HACE>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml b/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml
new file mode 100644
index 000000000000..b18f178aac06
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/aspeed,ast2600-acry.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED ACRY ECDSA/RSA Hardware Accelerator Engines
+
+maintainers:
+ - Neal Liu <neal_liu@aspeedtech.com>
+
+description:
+ The ACRY ECDSA/RSA engines is designed to accelerate the throughput
+ of ECDSA/RSA signature and verification. Basically, ACRY can be
+ divided into two independent engines - ECC Engine and RSA Engine.
+
+properties:
+ compatible:
+ enum:
+ - aspeed,ast2600-acry
+
+ reg:
+ items:
+ - description: acry base address & size
+ - description: acry sram base address & size
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ast2600-clock.h>
+ acry: crypto@1e6fa000 {
+ compatible = "aspeed,ast2600-acry";
+ reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>;
+ interrupts = <160>;
+ clocks = <&syscon ASPEED_CLK_GATE_RSACLK>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
new file mode 100644
index 000000000000..19010f90198a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/atmel,at91sam9g46-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator
+
+maintainers:
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: atmel,at91sam9g46-aes
+ - items:
+ - enum:
+ - microchip,sam9x7-aes
+ - microchip,sama7d65-aes
+ - const: atmel,at91sam9g46-aes
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: aes_clk
+
+ dmas:
+ items:
+ - description: TX DMA Channel
+ - description: RX DMA Channel
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - dmas
+ - dma-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/at91.h>
+ #include <dt-bindings/dma/at91.h>
+
+ aes: crypto@e1810000 {
+ compatible = "atmel,at91sam9g46-aes";
+ reg = <0xe1810000 0x100>;
+ interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 27>;
+ clock-names = "aes_clk";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(1)>,
+ <&dma0 AT91_XDMAC_DT_PERID(2)>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
new file mode 100644
index 000000000000..39e076b275b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/atmel,at91sam9g46-sha.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator
+
+maintainers:
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: atmel,at91sam9g46-sha
+ - items:
+ - enum:
+ - microchip,sam9x7-sha
+ - microchip,sama7d65-sha
+ - const: atmel,at91sam9g46-sha
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: sha_clk
+
+ dmas:
+ maxItems: 1
+ description: TX DMA Channel
+
+ dma-names:
+ const: tx
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/at91.h>
+ #include <dt-bindings/dma/at91.h>
+
+ sha: crypto@e1814000 {
+ compatible = "atmel,at91sam9g46-sha";
+ reg = <0xe1814000 0x100>;
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 83>;
+ clock-names = "sha_clk";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(48)>;
+ dma-names = "tx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
new file mode 100644
index 000000000000..6f16008c4251
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/atmel,at91sam9g46-tdes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator
+
+maintainers:
+ - Tudor Ambarus <tudor.ambarus@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: atmel,at91sam9g46-tdes
+ - items:
+ - enum:
+ - microchip,sam9x7-tdes
+ - microchip,sama7d65-tdes
+ - const: atmel,at91sam9g46-tdes
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: tdes_clk
+
+ dmas:
+ items:
+ - description: TX DMA Channel
+ - description: RX DMA Channel
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/at91.h>
+ #include <dt-bindings/dma/at91.h>
+
+ tdes: crypto@e2014000 {
+ compatible = "atmel,at91sam9g46-tdes";
+ reg = <0xe2014000 0x100>;
+ interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 96>;
+ clock-names = "tdes_clk";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(54)>,
+ <&dma0 AT91_XDMAC_DT_PERID(53)>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
deleted file mode 100644
index 6b458bb2440d..000000000000
--- a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-* Atmel HW cryptographic accelerators
-
-These are the HW cryptographic accelerators found on some Atmel products.
-
-* Advanced Encryption Standard (AES)
-
-Required properties:
-- compatible : Should be "atmel,at91sam9g46-aes".
-- reg: Should contain AES registers location and length.
-- interrupts: Should contain the IRQ line for the AES.
-- dmas: List of two DMA specifiers as described in
- atmel-dma.txt and dma.txt files.
-- dma-names: Contains one identifier string for each DMA specifier
- in the dmas property.
-
-Example:
-aes@f8038000 {
- compatible = "atmel,at91sam9g46-aes";
- reg = <0xf8038000 0x100>;
- interrupts = <43 4 0>;
- dmas = <&dma1 2 18>,
- <&dma1 2 19>;
- dma-names = "tx", "rx";
-
-* Triple Data Encryption Standard (Triple DES)
-
-Required properties:
-- compatible : Should be "atmel,at91sam9g46-tdes".
-- reg: Should contain TDES registers location and length.
-- interrupts: Should contain the IRQ line for the TDES.
-
-Optional properties:
-- dmas: List of two DMA specifiers as described in
- atmel-dma.txt and dma.txt files.
-- dma-names: Contains one identifier string for each DMA specifier
- in the dmas property.
-
-Example:
-tdes@f803c000 {
- compatible = "atmel,at91sam9g46-tdes";
- reg = <0xf803c000 0x100>;
- interrupts = <44 4 0>;
- dmas = <&dma1 2 20>,
- <&dma1 2 21>;
- dma-names = "tx", "rx";
-};
-
-* Secure Hash Algorithm (SHA)
-
-Required properties:
-- compatible : Should be "atmel,at91sam9g46-sha".
-- reg: Should contain SHA registers location and length.
-- interrupts: Should contain the IRQ line for the SHA.
-
-Optional properties:
-- dmas: One DMA specifiers as described in
- atmel-dma.txt and dma.txt files.
-- dma-names: Contains one identifier string for each DMA specifier
- in the dmas property. Only one "tx" string needed.
-
-Example:
-sha@f8034000 {
- compatible = "atmel,at91sam9g46-sha";
- reg = <0xf8034000 0x100>;
- interrupts = <42 4 0>;
- dmas = <&dma1 2 17>;
- dma-names = "tx";
-};
-
-* Eliptic Curve Cryptography (I2C)
-
-Required properties:
-- compatible : must be "atmel,atecc508a".
-- reg: I2C bus address of the device.
-- clock-frequency: must be present in the i2c controller node.
-
-Example:
-atecc508a@c0 {
- compatible = "atmel,atecc508a";
- reg = <0xC0>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml b/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml
new file mode 100644
index 000000000000..c91f81e3c39e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/axis,artpec6-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Axis ARTPEC6 crypto engine with PDMA interface
+
+maintainers:
+ - Lars Persson <lars.persson@axis.com>
+
+properties:
+ compatible:
+ enum:
+ - axis,artpec6-crypto
+ - axis,artpec7-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ crypto@f4264000 {
+ compatible = "axis,artpec6-crypto";
+ reg = <0xf4264000 0x1000>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt b/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
deleted file mode 100644
index 29b6007568eb..000000000000
--- a/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
-cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
-blocks.
-
-Required properties:
-- compatible: Should be one of the following:
- brcm,spum-crypto - for devices with SPU-M hardware
- brcm,spu2-crypto - for devices with SPU2 hardware
- brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3
- and Rabin Fingerprint support
- brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware
-
-- reg: Should contain SPU registers location and length.
-- mboxes: The mailbox channel to be used to communicate with the SPU.
- Mailbox channels correspond to DMA rings on the device.
-
-Example:
- crypto@612d0000 {
- compatible = "brcm,spum-crypto";
- reg = <0 0x612d0000 0 0x900>;
- mboxes = <&pdc0 0>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml b/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml
new file mode 100644
index 000000000000..9a5fb61727fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/brcm,spum-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom SPU Crypto Offload
+
+maintainers:
+ - Rob Rice <rob.rice@broadcom.com>
+
+description:
+ The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
+ cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
+ blocks.
+
+properties:
+ compatible:
+ enum:
+ - brcm,spum-crypto
+ - brcm,spu2-crypto
+ - brcm,spu2-v2-crypto # enhanced SPU2 hardware features like SHA3 and Rabin Fingerprint support
+ - brcm,spum-nsp-crypto # Northstar Plus variant of the SPU-M hardware
+
+ reg:
+ maxItems: 1
+
+ mboxes:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - mboxes
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@612d0000 {
+ compatible = "brcm,spum-crypto";
+ reg = <0x612d0000 0x900>;
+ mboxes = <&pdc0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
new file mode 100644
index 000000000000..b633b8d0e6f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/cortina,sl3516-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SL3516 cryptographic offloader driver
+
+maintainers:
+ - Corentin Labbe <clabbe@baylibre.com>
+
+properties:
+ compatible:
+ enum:
+ - cortina,sl3516-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/cortina,gemini-clock.h>
+ #include <dt-bindings/reset/cortina,gemini-reset.h>
+
+ crypto@62000000 {
+ compatible = "cortina,sl3516-crypto";
+ reg = <0x62000000 0x10000>;
+ interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+ resets = <&syscon GEMINI_RESET_SECURITY>;
+ clocks = <&syscon GEMINI_CLK_GATE_SECURITY>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0-mon.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0-mon.yaml
new file mode 100644
index 000000000000..9f8e6689cd94
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0-mon.yaml
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2008-2011 Freescale Semiconductor Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl,sec-v4.0-mon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Secure Non-Volatile Storage (SNVS)
+
+maintainers:
+ - '"Horia Geantă" <horia.geanta@nxp.com>'
+ - Pankaj Gupta <pankaj.gupta@nxp.com>
+ - Gaurav Jain <gaurav.jain@nxp.com>
+
+description:
+ Node defines address range and the associated interrupt for the SNVS function.
+ This function monitors security state information & reports security
+ violations. This also included rtc, system power off and ON/OFF key.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: fsl,sec-v4.0-mon
+ - const: syscon
+ - const: simple-mfd
+ - items:
+ - const: fsl,sec-v5.0-mon
+ - const: fsl,sec-v4.0-mon
+ - items:
+ - enum:
+ - fsl,sec-v5.3-mon
+ - fsl,sec-v5.4-mon
+ - const: fsl,sec-v5.0-mon
+ - const: fsl,sec-v4.0-mon
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 2
+
+ snvs-rtc-lp:
+ type: object
+ additionalProperties: false
+ description:
+ Secure Non-Volatile Storage (SNVS) Low Power (LP) RTC Node
+
+ properties:
+ compatible:
+ const: fsl,sec-v4.0-mon-rtc-lp
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: snvs-rtc
+
+ interrupts:
+ # VFxxx has only one. What is the 2nd one?
+ minItems: 1
+ maxItems: 2
+
+ regmap:
+ description: Parent node containing registers
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ offset:
+ description: LP register offset
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0x34
+
+ required:
+ - compatible
+ - interrupts
+ - regmap
+
+ snvs-powerkey:
+ type: object
+ additionalProperties: false
+ description:
+ The snvs-pwrkey is designed to enable POWER key function which controlled
+ by SNVS ONOFF, the driver can report the status of POWER key and wakeup
+ system if pressed after system suspend.
+
+ $ref: /schemas/input/input.yaml
+
+ properties:
+ compatible:
+ const: fsl,sec-v4.0-pwrkey
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: snvs-pwrkey
+
+ interrupts:
+ maxItems: 1
+
+ regmap:
+ description: Parent node containing registers
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ wakeup-source: true
+
+ linux,keycode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 116
+ deprecated: true
+
+ linux,keycodes:
+ maxItems: 1
+ default: 116
+
+ power-off-time-sec:
+ enum: [0, 5, 10, 15]
+
+ required:
+ - compatible
+ - interrupts
+ - regmap
+
+ snvs-lpgpr:
+ $ref: /schemas/nvmem/snvs-lpgpr.yaml#
+
+ snvs-poweroff:
+ description:
+ The SNVS could drive signal to PMIC to turn off system power by setting
+ SNVS_LP LPCR register.
+ $ref: /schemas/power/reset/syscon-poweroff.yaml#
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/imx7d-clock.h>
+
+ sec_mon: sec-mon@314000 {
+ compatible = "fsl,sec-v4.0-mon", "syscon", "simple-mfd";
+ reg = <0x314000 0x1000>;
+
+ snvs-rtc-lp {
+ compatible = "fsl,sec-v4.0-mon-rtc-lp";
+ regmap = <&sec_mon>;
+ offset = <0x34>;
+ clocks = <&clks IMX7D_SNVS_CLK>;
+ clock-names = "snvs-rtc";
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ snvs-powerkey {
+ compatible = "fsl,sec-v4.0-pwrkey";
+ regmap = <&sec_mon>;
+ clocks = <&clks IMX7D_SNVS_CLK>;
+ clock-names = "snvs-pwrkey";
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+ linux,keycode = <116>; /* KEY_POWER */
+ wakeup-source;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
new file mode 100644
index 000000000000..dcc755d2709a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml
@@ -0,0 +1,318 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2008-2011 Freescale Semiconductor Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl,sec-v4.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale SEC 4
+
+maintainers:
+ - '"Horia Geantă" <horia.geanta@nxp.com>'
+ - Pankaj Gupta <pankaj.gupta@nxp.com>
+ - Gaurav Jain <gaurav.jain@nxp.com>
+
+description: |
+ NOTE: the SEC 4 is also known as Freescale's Cryptographic Accelerator
+ Accelerator and Assurance Module (CAAM).
+
+ SEC 4 h/w can process requests from 2 types of sources.
+ 1. DPAA Queue Interface (HW interface between Queue Manager & SEC 4).
+ 2. Job Rings (HW interface between cores & SEC 4 registers).
+
+ High Speed Data Path Configuration:
+
+ HW interface between QM & SEC 4 and also BM & SEC 4, on DPAA-enabled parts
+ such as the P4080. The number of simultaneous dequeues the QI can make is
+ equal to the number of Descriptor Controller (DECO) engines in a particular
+ SEC version. E.g., the SEC 4.0 in the P4080 has 5 DECOs and can thus
+ dequeue from 5 subportals simultaneously.
+
+ Job Ring Data Path Configuration:
+
+ Each JR is located on a separate 4k page, they may (or may not) be made visible
+ in the memory partition devoted to a particular core. The P4080 has 4 JRs, so
+ up to 4 JRs can be configured; and all 4 JRs process requests in parallel.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - fsl,sec-v5.4
+ - fsl,sec-v6.0
+ - const: fsl,sec-v5.0
+ - const: fsl,sec-v4.0
+ - items:
+ - enum:
+ - fsl,imx6ul-caam
+ - fsl,imx8qm-caam
+ - fsl,imx8qxp-caam
+ - fsl,sec-v5.0
+ - const: fsl,sec-v4.0
+ - const: fsl,sec-v4.0
+
+ reg:
+ maxItems: 1
+
+ ranges:
+ maxItems: 1
+
+ '#address-cells':
+ enum: [1, 2]
+
+ '#size-cells':
+ enum: [1, 2]
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [mem, aclk, ipg, emi_slow]
+
+ dma-coherent: true
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ fsl,sec-era:
+ description: Defines the 'ERA' of the SEC device.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+patternProperties:
+ '^jr@[0-9a-f]+$':
+ type: object
+ additionalProperties: false
+ description:
+ Job Ring (JR) Node. Defines data processing interface to SEC 4 across the
+ peripheral bus for purposes of processing cryptographic descriptors. The
+ specified address range can be made visible to one (or more) cores. The
+ interrupt defined for this node is controlled within the address range of
+ this node.
+
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: fsl,sec-v6.0-job-ring
+ - const: fsl,sec-v5.2-job-ring
+ - const: fsl,sec-v5.0-job-ring
+ - const: fsl,sec-v4.4-job-ring
+ - const: fsl,sec-v4.0-job-ring
+ - items:
+ - const: fsl,sec-v5.4-job-ring
+ - const: fsl,sec-v5.0-job-ring
+ - const: fsl,sec-v4.0-job-ring
+ - items:
+ - enum:
+ - fsl,imx8qm-job-ring
+ - fsl,imx8qxp-job-ring
+ - fsl,sec-v5.0-job-ring
+ - const: fsl,sec-v4.0-job-ring
+ - const: fsl,sec-v4.0-job-ring
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ fsl,liodn:
+ description:
+ Specifies the LIODN to be used in conjunction with the ppid-to-liodn
+ table that specifies the PPID to LIODN mapping. Needed if the PAMU is
+ used. Value is a 12 bit value where value is a LIODN ID for this JR.
+ This property is normally set by boot firmware.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - maximum: 0xfff
+ allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qm-job-ring
+ - fsl,imx8qxp-job-ring
+ then:
+ required:
+ - power-domains
+ else:
+ properties:
+ power-domains: false
+
+ '^rtic@[0-9a-f]+$':
+ type: object
+ additionalProperties: false
+ description:
+ Run Time Integrity Check (RTIC) Node. Defines a register space that
+ contains up to 5 sets of addresses and their lengths (sizes) that will be
+ checked at run time. After an initial hash result is calculated, these
+ addresses are checked by HW to monitor any change. If any memory is
+ modified, a Security Violation is triggered (see SNVS definition).
+
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: fsl,sec-v5.4-rtic
+ - const: fsl,sec-v5.0-rtic
+ - const: fsl,sec-v4.0-rtic
+ - const: fsl,sec-v4.0-rtic
+
+ reg:
+ items:
+ - description: RTIC control and status register space.
+ - description: RTIC recoverable error indication register space.
+ minItems: 1
+
+ ranges:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 1
+
+ patternProperties:
+ '^rtic-[a-z]@[0-9a-f]+$':
+ type: object
+ additionalProperties: false
+ description:
+ Run Time Integrity Check (RTIC) Memory Node defines individual RTIC
+ memory regions that are used to perform run-time integrity check of
+ memory areas that should not modified. The node defines a register
+ that contains the memory address & length (combined) and a second
+ register that contains the hash result in big endian format.
+
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: fsl,sec-v5.4-rtic-memory
+ - const: fsl,sec-v5.0-rtic-memory
+ - const: fsl,sec-v4.0-rtic-memory
+ - const: fsl,sec-v4.0-rtic-memory
+
+ reg:
+ items:
+ - description: RTIC memory address
+ - description: RTIC hash result
+
+ fsl,liodn:
+ description:
+ Specifies the LIODN to be used in conjunction with the
+ ppid-to-liodn table that specifies the PPID to LIODN mapping.
+ Needed if the PAMU is used. Value is a 12 bit value where value
+ is a LIODN ID for this JR. This property is normally set by boot
+ firmware.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - maximum: 0xfff
+
+ fsl,rtic-region:
+ description:
+ Specifies the HW address (36 bit address) for this region
+ followed by the length of the HW partition to be checked;
+ the address is represented as a 64 bit quantity followed
+ by a 32 bit length.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+required:
+ - compatible
+ - reg
+ - ranges
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8qm-caam
+ - fsl,imx8qxp-caam
+then:
+ required:
+ - power-domains
+else:
+ properties:
+ power-domains: false
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@300000 {
+ compatible = "fsl,sec-v4.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x300000 0x10000>;
+ ranges = <0 0x300000 0x10000>;
+ interrupts = <92 2>;
+
+ jr@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupts = <88 2>;
+ };
+
+ jr@2000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ interrupts = <89 2>;
+ };
+
+ jr@3000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x3000 0x1000>;
+ interrupts = <90 2>;
+ };
+
+ jr@4000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x4000 0x1000>;
+ interrupts = <91 2>;
+ };
+
+ rtic@6000 {
+ compatible = "fsl,sec-v4.0-rtic";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x6000 0x100>;
+ ranges = <0x0 0x6100 0xe00>;
+
+ rtic-a@0 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x00 0x20>, <0x100 0x80>;
+ };
+
+ rtic-b@20 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x20 0x20>, <0x200 0x80>;
+ };
+
+ rtic-c@40 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x40 0x20>, <0x300 0x80>;
+ };
+
+ rtic-d@60 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x60 0x20>, <0x500 0x80>;
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec2.0.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec2.0.yaml
new file mode 100644
index 000000000000..2091b89bb726
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl,sec2.0.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl,sec2.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
+
+maintainers:
+ - J. Neuschäfer <j.ne@posteo.net>
+
+properties:
+ compatible:
+ description:
+ Should contain entries for this and backward compatible SEC versions,
+ high to low. Warning - SEC1 and SEC2 are mutually exclusive.
+ oneOf:
+ - items:
+ - const: fsl,sec3.3
+ - const: fsl,sec3.1
+ - const: fsl,sec3.0
+ - const: fsl,sec2.4
+ - const: fsl,sec2.2
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec3.1
+ - const: fsl,sec3.0
+ - const: fsl,sec2.4
+ - const: fsl,sec2.2
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec3.0
+ - const: fsl,sec2.4
+ - const: fsl,sec2.2
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec2.4
+ - const: fsl,sec2.2
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec2.2
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec2.1
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec2.0
+ - items:
+ - const: fsl,sec1.2
+ - const: fsl,sec1.0
+ - items:
+ - const: fsl,sec1.0
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ fsl,num-channels:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 1, 4 ]
+ description: An integer representing the number of channels available.
+
+ fsl,channel-fifo-len:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 100
+ description:
+ An integer representing the number of descriptor pointers each channel
+ fetch fifo can hold.
+
+ fsl,exec-units-mask:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 0xfff
+ description: |
+ The bitmask representing what execution units (EUs) are available.
+ EU information should be encoded following the SEC's Descriptor Header
+ Dword EU_SEL0 field documentation, i.e. as follows:
+
+ bit 0 = reserved - should be 0
+ bit 1 = set if SEC has the ARC4 EU (AFEU)
+ bit 2 = set if SEC has the DES/3DES EU (DEU)
+ bit 3 = set if SEC has the message digest EU (MDEU/MDEU-A)
+ bit 4 = set if SEC has the random number generator EU (RNG)
+ bit 5 = set if SEC has the public key EU (PKEU)
+ bit 6 = set if SEC has the AES EU (AESU)
+ bit 7 = set if SEC has the Kasumi EU (KEU)
+ bit 8 = set if SEC has the CRC EU (CRCU)
+ bit 11 = set if SEC has the message digest EU extended alg set (MDEU-B)
+
+ remaining bits are reserved for future SEC EUs.
+
+ fsl,descriptor-types-mask:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ The bitmask representing what descriptors are available. Descriptor type
+ information should be encoded following the SEC's Descriptor Header Dword
+ DESC_TYPE field documentation, i.e. as follows:
+
+ bit 0 = SEC supports descriptor type aesu_ctr_nonsnoop
+ bit 1 = SEC supports descriptor type ipsec_esp
+ bit 2 = SEC supports descriptor type common_nonsnoop
+ bit 3 = SEC supports descriptor type 802.11i AES ccmp
+ bit 4 = SEC supports descriptor type hmac_snoop_no_afeu
+ bit 5 = SEC supports descriptor type srtp
+ bit 6 = SEC supports descriptor type non_hmac_snoop_no_afeu
+ bit 7 = SEC supports descriptor type pkeu_assemble
+ bit 8 = SEC supports descriptor type aesu_key_expand_output
+ bit 9 = SEC supports descriptor type pkeu_ptmul
+ bit 10 = SEC supports descriptor type common_nonsnoop_afeu
+ bit 11 = SEC supports descriptor type pkeu_ptadd_dbl
+
+ ..and so on and so forth.
+
+required:
+ - compatible
+ - reg
+ - fsl,num-channels
+ - fsl,channel-fifo-len
+ - fsl,exec-units-mask
+ - fsl,descriptor-types-mask
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ /* MPC8548E */
+ crypto@30000 {
+ compatible = "fsl,sec2.1", "fsl,sec2.0";
+ reg = <0x30000 0x10000>;
+ interrupts = <29 2>;
+ interrupt-parent = <&mpic>;
+ fsl,num-channels = <4>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0xfe>;
+ fsl,descriptor-types-mask = <0x12b0ebf>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
deleted file mode 100644
index 76a0b4e80e83..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28 .
-
-Required properties:
-- compatible : Should be "fsl,<soc>-dcp"
-- reg : Should contain MXS DCP registers location and length
-- interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
- must be supplied, optionally Secure IRQ can be present, but
- is currently not implemented and not used.
-
-Example:
-
-dcp@80028000 {
- compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
- reg = <0x80028000 0x2000>;
- interrupts = <52 53>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.yaml b/Documentation/devicetree/bindings/crypto/fsl-dcp.yaml
new file mode 100644
index 000000000000..8dd36c2f76fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl-dcp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx23-dcp
+ - fsl,imx28-dcp
+ - items:
+ - enum:
+ - fsl,imx6sl-dcp
+ - fsl,imx6ull-dcp
+ - const: fsl,imx28-dcp
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
+ must be supplied, optionally Secure IRQ can be present, but is currently
+ not implemented and not used.
+ items:
+ - description: MXS DCP VMI interrupt
+ - description: MXS DCP DCP interrupt
+ - description: MXS DCP secure interrupt
+ minItems: 2
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: dcp
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@80028000 {
+ compatible = "fsl,imx23-dcp";
+ reg = <0x80028000 0x2000>;
+ interrupts = <53>, <54>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
deleted file mode 100644
index e8a35c71e947..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Freescale SAHARA Cryptographic Accelerator included in some i.MX chips.
-Currently only i.MX27 and i.MX53 are supported.
-
-Required properties:
-- compatible : Should be "fsl,<soc>-sahara"
-- reg : Should contain SAHARA registers location and length
-- interrupts : Should contain SAHARA interrupt number
-
-Example:
-
-sah@10025000 {
- compatible = "fsl,imx27-sahara";
- reg = < 0x10025000 0x800>;
- interrupts = <75>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml
new file mode 100644
index 000000000000..41df80bcdcd9
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl-imx-sahara.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale SAHARA Cryptographic Accelerator
+
+maintainers:
+ - Steffen Trumtrar <s.trumtrar@pengutronix.de>
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx27-sahara
+ - fsl,imx53-sahara
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: SAHARA Interrupt for Host 0
+ - description: SAHARA Interrupt for Host 1
+ minItems: 1
+
+ clocks:
+ items:
+ - description: Sahara IPG clock
+ - description: Sahara AHB clock
+
+ clock-names:
+ items:
+ - const: ipg
+ - const: ahb
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx53-sahara
+ then:
+ properties:
+ interrupts:
+ minItems: 2
+ maxItems: 2
+ else:
+ properties:
+ interrupts:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx27-clock.h>
+
+ crypto@10025000 {
+ compatible = "fsl,imx27-sahara";
+ reg = <0x10025000 0x800>;
+ interrupts = <75>;
+ clocks = <&clks IMX27_CLK_SAHARA_IPG_GATE>,
+ <&clks IMX27_CLK_SAHARA_AHB_GATE>;
+ clock-names = "ipg", "ahb";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt
deleted file mode 100644
index 7aad448e8a36..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Freescale Security Controller (SCC)
-
-Required properties:
-- compatible : Should be "fsl,imx25-scc".
-- reg : Should contain register location and length.
-- interrupts : Should contain interrupt numbers for SCM IRQ and SMN IRQ.
-- interrupt-names : Should specify the names "scm" and "smn" for the
- SCM IRQ and SMN IRQ.
-- clocks: Should contain the clock driving the SCC core.
-- clock-names: Should be set to "ipg".
-
-Example:
-
- scc: crypto@53fac000 {
- compatible = "fsl,imx25-scc";
- reg = <0x53fac000 0x4000>;
- clocks = <&clks 111>;
- clock-names = "ipg";
- interrupts = <49>, <50>;
- interrupt-names = "scm", "smn";
- };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-scc.yaml b/Documentation/devicetree/bindings/crypto/fsl-imx-scc.yaml
new file mode 100644
index 000000000000..563a31605d2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-scc.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/fsl-imx-scc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Security Controller (SCC)
+
+maintainers:
+ - Steffen Trumtrar <s.trumtrar@pengutronix.de>
+
+properties:
+ compatible:
+ const: fsl,imx25-scc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: SCC SCM interrupt
+ - description: SCC SMN interrupt
+
+ interrupt-names:
+ items:
+ - const: scm
+ - const: smn
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: ipg
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@53fac000 {
+ compatible = "fsl,imx25-scc";
+ reg = <0x53fac000 0x4000>;
+ clocks = <&clks 111>;
+ clock-names = "ipg";
+ interrupts = <49>, <50>;
+ interrupt-names = "scm", "smn";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
deleted file mode 100644
index 125f155d00d0..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
-
-Required properties:
-
-- compatible : Should contain entries for this and backward compatible
- SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3)
- e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1)
- warning: SEC1 and SEC2 are mutually exclusive
-- reg : Offset and length of the register set for the device
-- interrupts : the SEC's interrupt number
-- fsl,num-channels : An integer representing the number of channels
- available.
-- fsl,channel-fifo-len : An integer representing the number of
- descriptor pointers each channel fetch fifo can hold.
-- fsl,exec-units-mask : The bitmask representing what execution units
- (EUs) are available. It's a single 32-bit cell. EU information
- should be encoded following the SEC's Descriptor Header Dword
- EU_SEL0 field documentation, i.e. as follows:
-
- bit 0 = reserved - should be 0
- bit 1 = set if SEC has the ARC4 EU (AFEU)
- bit 2 = set if SEC has the DES/3DES EU (DEU)
- bit 3 = set if SEC has the message digest EU (MDEU/MDEU-A)
- bit 4 = set if SEC has the random number generator EU (RNG)
- bit 5 = set if SEC has the public key EU (PKEU)
- bit 6 = set if SEC has the AES EU (AESU)
- bit 7 = set if SEC has the Kasumi EU (KEU)
- bit 8 = set if SEC has the CRC EU (CRCU)
- bit 11 = set if SEC has the message digest EU extended alg set (MDEU-B)
-
-remaining bits are reserved for future SEC EUs.
-
-- fsl,descriptor-types-mask : The bitmask representing what descriptors
- are available. It's a single 32-bit cell. Descriptor type information
- should be encoded following the SEC's Descriptor Header Dword DESC_TYPE
- field documentation, i.e. as follows:
-
- bit 0 = set if SEC supports the aesu_ctr_nonsnoop desc. type
- bit 1 = set if SEC supports the ipsec_esp descriptor type
- bit 2 = set if SEC supports the common_nonsnoop desc. type
- bit 3 = set if SEC supports the 802.11i AES ccmp desc. type
- bit 4 = set if SEC supports the hmac_snoop_no_afeu desc. type
- bit 5 = set if SEC supports the srtp descriptor type
- bit 6 = set if SEC supports the non_hmac_snoop_no_afeu desc.type
- bit 7 = set if SEC supports the pkeu_assemble descriptor type
- bit 8 = set if SEC supports the aesu_key_expand_output desc.type
- bit 9 = set if SEC supports the pkeu_ptmul descriptor type
- bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type
- bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type
-
- ..and so on and so forth.
-
-Example:
-
- /* MPC8548E */
- crypto@30000 {
- compatible = "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <29 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xfe>;
- fsl,descriptor-types-mask = <0x12b0ebf>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
deleted file mode 100644
index 2fe245ca816a..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+++ /dev/null
@@ -1,553 +0,0 @@
-=====================================================================
-SEC 4 Device Tree Binding
-Copyright (C) 2008-2011 Freescale Semiconductor Inc.
-
- CONTENTS
- -Overview
- -SEC 4 Node
- -Job Ring Node
- -Run Time Integrity Check (RTIC) Node
- -Run Time Integrity Check (RTIC) Memory Node
- -Secure Non-Volatile Storage (SNVS) Node
- -Secure Non-Volatile Storage (SNVS) Low Power (LP) RTC Node
- -Full Example
-
-NOTE: the SEC 4 is also known as Freescale's Cryptographic Accelerator
-Accelerator and Assurance Module (CAAM).
-
-=====================================================================
-Overview
-
-DESCRIPTION
-
-SEC 4 h/w can process requests from 2 types of sources.
-1. DPAA Queue Interface (HW interface between Queue Manager & SEC 4).
-2. Job Rings (HW interface between cores & SEC 4 registers).
-
-High Speed Data Path Configuration:
-
-HW interface between QM & SEC 4 and also BM & SEC 4, on DPAA-enabled parts
-such as the P4080. The number of simultaneous dequeues the QI can make is
-equal to the number of Descriptor Controller (DECO) engines in a particular
-SEC version. E.g., the SEC 4.0 in the P4080 has 5 DECOs and can thus
-dequeue from 5 subportals simultaneously.
-
-Job Ring Data Path Configuration:
-
-Each JR is located on a separate 4k page, they may (or may not) be made visible
-in the memory partition devoted to a particular core. The P4080 has 4 JRs, so
-up to 4 JRs can be configured; and all 4 JRs process requests in parallel.
-
-=====================================================================
-SEC 4 Node
-
-Description
-
- Node defines the base address of the SEC 4 block.
- This block specifies the address range of all global
- configuration registers for the SEC 4 block. It
- also receives interrupts from the Run Time Integrity Check
- (RTIC) function within the SEC 4 block.
-
-PROPERTIES
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0"
-
- - fsl,sec-era
- Usage: optional
- Value type: <u32>
- Definition: A standard property. Define the 'ERA' of the SEC
- device.
-
- - #address-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing physical addresses in child nodes.
-
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing the size of physical addresses in
- child nodes.
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical
- address and length of the SEC4 configuration registers.
- registers
-
- - ranges
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical address
- range of the SEC 4.0 register space (-SNVS not included). A
- triplet that includes the child address, parent address, &
- length.
-
- - interrupts
- Usage: required
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
- - clocks
- Usage: required if SEC 4.0 requires explicit enablement of clocks
- Value type: <prop_encoded-array>
- Definition: A list of phandle and clock specifier pairs describing
- the clocks required for enabling and disabling SEC 4.0.
-
- - clock-names
- Usage: required if SEC 4.0 requires explicit enablement of clocks
- Value type: <string>
- Definition: A list of clock name strings in the same order as the
- clocks property.
-
- Note: All other standard properties (see the Devicetree Specification)
- are allowed but are optional.
-
-
-EXAMPLE
-
-iMX6QDL/SX requires four clocks
-
- crypto@300000 {
- compatible = "fsl,sec-v4.0";
- fsl,sec-era = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x300000 0x10000>;
- ranges = <0 0x300000 0x10000>;
- interrupt-parent = <&mpic>;
- interrupts = <92 2>;
- clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
- <&clks IMX6QDL_CLK_CAAM_ACLK>,
- <&clks IMX6QDL_CLK_CAAM_IPG>,
- <&clks IMX6QDL_CLK_EIM_SLOW>;
- clock-names = "mem", "aclk", "ipg", "emi_slow";
- };
-
-
-iMX6UL does only require three clocks
-
- crypto: caam@2140000 {
- compatible = "fsl,sec-v4.0";
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x2140000 0x3c000>;
- ranges = <0 0x2140000 0x3c000>;
- interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
-
- clocks = <&clks IMX6UL_CLK_CAAM_MEM>,
- <&clks IMX6UL_CLK_CAAM_ACLK>,
- <&clks IMX6UL_CLK_CAAM_IPG>;
- clock-names = "mem", "aclk", "ipg";
- };
-
-=====================================================================
-Job Ring (JR) Node
-
- Child of the crypto node defines data processing interface to SEC 4
- across the peripheral bus for purposes of processing
- cryptographic descriptors. The specified address
- range can be made visible to one (or more) cores.
- The interrupt defined for this node is controlled within
- the address range of this node.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0-job-ring"
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: Specifies a two JR parameters: an offset from
- the parent physical address and the length the JR registers.
-
- - fsl,liodn
- Usage: optional-but-recommended
- Value type: <prop-encoded-array>
- Definition:
- Specifies the LIODN to be used in conjunction with
- the ppid-to-liodn table that specifies the PPID to LIODN mapping.
- Needed if the PAMU is used. Value is a 12 bit value
- where value is a LIODN ID for this JR. This property is
- normally set by boot firmware.
-
- - interrupts
- Usage: required
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
-EXAMPLE
- jr@1000 {
- compatible = "fsl,sec-v4.0-job-ring";
- reg = <0x1000 0x1000>;
- fsl,liodn = <0x081>;
- interrupt-parent = <&mpic>;
- interrupts = <88 2>;
- };
-
-
-=====================================================================
-Run Time Integrity Check (RTIC) Node
-
- Child node of the crypto node. Defines a register space that
- contains up to 5 sets of addresses and their lengths (sizes) that
- will be checked at run time. After an initial hash result is
- calculated, these addresses are checked by HW to monitor any
- change. If any memory is modified, a Security Violation is
- triggered (see SNVS definition).
-
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0-rtic".
-
- - #address-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing physical addresses in child nodes. Must
- have a value of 1.
-
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing the size of physical addresses in
- child nodes. Must have a value of 1.
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies a two parameters:
- an offset from the parent physical address and the length
- the SEC4 registers.
-
- - ranges
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical address
- range of the SEC 4 register space (-SNVS not included). A
- triplet that includes the child address, parent address, &
- length.
-
-EXAMPLE
- rtic@6000 {
- compatible = "fsl,sec-v4.0-rtic";
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x6000 0x100>;
- ranges = <0x0 0x6100 0xe00>;
- };
-
-=====================================================================
-Run Time Integrity Check (RTIC) Memory Node
- A child node that defines individual RTIC memory regions that are used to
- perform run-time integrity check of memory areas that should not modified.
- The node defines a register that contains the memory address &
- length (combined) and a second register that contains the hash result
- in big endian format.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0-rtic-memory".
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies two parameters:
- an offset from the parent physical address and the length:
-
- 1. The location of the RTIC memory address & length registers.
- 2. The location RTIC hash result.
-
- - fsl,rtic-region
- Usage: optional-but-recommended
- Value type: <prop-encoded-array>
- Definition:
- Specifies the HW address (36 bit address) for this region
- followed by the length of the HW partition to be checked;
- the address is represented as a 64 bit quantity followed
- by a 32 bit length.
-
- - fsl,liodn
- Usage: optional-but-recommended
- Value type: <prop-encoded-array>
- Definition:
- Specifies the LIODN to be used in conjunction with
- the ppid-to-liodn table that specifies the PPID to LIODN
- mapping. Needed if the PAMU is used. Value is a 12 bit value
- where value is a LIODN ID for this RTIC memory region. This
- property is normally set by boot firmware.
-
-EXAMPLE
- rtic-a@0 {
- compatible = "fsl,sec-v4.0-rtic-memory";
- reg = <0x00 0x20 0x100 0x80>;
- fsl,liodn = <0x03c>;
- fsl,rtic-region = <0x12345678 0x12345678 0x12345678>;
- };
-
-=====================================================================
-Secure Non-Volatile Storage (SNVS) Node
-
- Node defines address range and the associated
- interrupt for the SNVS function. This function
- monitors security state information & reports
- security violations. This also included rtc,
- system power off and ON/OFF key.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0-mon" and "syscon".
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical
- address and length of the SEC4 configuration
- registers.
-
- - #address-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing physical addresses in child nodes. Must
- have a value of 1.
-
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing the size of physical addresses in
- child nodes. Must have a value of 1.
-
- - ranges
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical address
- range of the SNVS register space. A triplet that includes
- the child address, parent address, & length.
-
- - interrupts
- Usage: optional
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
-EXAMPLE
- sec_mon@314000 {
- compatible = "fsl,sec-v4.0-mon", "syscon";
- reg = <0x314000 0x1000>;
- ranges = <0 0x314000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <93 2>;
- };
-
-=====================================================================
-Secure Non-Volatile Storage (SNVS) Low Power (LP) RTC Node
-
- A SNVS child node that defines SNVS LP RTC.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v4.0-mon-rtc-lp".
-
- - interrupts
- Usage: required
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
- - regmap
- Usage: required
- Value type: <phandle>
- Definition: this is phandle to the register map node.
-
- - offset
- Usage: option
- value type: <u32>
- Definition: LP register offset. default it is 0x34.
-
- - clocks
- Usage: optional, required if SNVS LP RTC requires explicit
- enablement of clocks
- Value type: <prop_encoded-array>
- Definition: a clock specifier describing the clock required for
- enabling and disabling SNVS LP RTC.
-
- - clock-names
- Usage: optional, required if SNVS LP RTC requires explicit
- enablement of clocks
- Value type: <string>
- Definition: clock name string should be "snvs-rtc".
-
-EXAMPLE
- sec_mon_rtc_lp@1 {
- compatible = "fsl,sec-v4.0-mon-rtc-lp";
- interrupts = <93 2>;
- regmap = <&snvs>;
- offset = <0x34>;
- clocks = <&clks IMX7D_SNVS_CLK>;
- clock-names = "snvs-rtc";
- };
-
-=====================================================================
-System ON/OFF key driver
-
- The snvs-pwrkey is designed to enable POWER key function which controlled
- by SNVS ONOFF, the driver can report the status of POWER key and wakeup
- system if pressed after system suspend.
-
- - compatible:
- Usage: required
- Value type: <string>
- Definition: Mush include "fsl,sec-v4.0-pwrkey".
-
- - interrupts:
- Usage: required
- Value type: <prop_encoded-array>
- Definition: The SNVS ON/OFF interrupt number to the CPU(s).
-
- - linux,keycode:
- Usage: option
- Value type: <int>
- Definition: Keycode to emit, KEY_POWER by default.
-
- - wakeup-source:
- Usage: option
- Value type: <boo>
- Definition: Button can wake-up the system.
-
- - regmap:
- Usage: required:
- Value type: <phandle>
- Definition: this is phandle to the register map node.
-
-EXAMPLE:
- snvs-pwrkey@020cc000 {
- compatible = "fsl,sec-v4.0-pwrkey";
- regmap = <&snvs>;
- interrupts = <0 4 0x4>
- linux,keycode = <116>; /* KEY_POWER */
- wakeup-source;
- };
-
-=====================================================================
-FULL EXAMPLE
-
- crypto: crypto@300000 {
- compatible = "fsl,sec-v4.0";
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x300000 0x10000>;
- ranges = <0 0x300000 0x10000>;
- interrupt-parent = <&mpic>;
- interrupts = <92 2>;
-
- sec_jr0: jr@1000 {
- compatible = "fsl,sec-v4.0-job-ring";
- reg = <0x1000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <88 2>;
- };
-
- sec_jr1: jr@2000 {
- compatible = "fsl,sec-v4.0-job-ring";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <89 2>;
- };
-
- sec_jr2: jr@3000 {
- compatible = "fsl,sec-v4.0-job-ring";
- reg = <0x3000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <90 2>;
- };
-
- sec_jr3: jr@4000 {
- compatible = "fsl,sec-v4.0-job-ring";
- reg = <0x4000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <91 2>;
- };
-
- rtic@6000 {
- compatible = "fsl,sec-v4.0-rtic";
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x6000 0x100>;
- ranges = <0x0 0x6100 0xe00>;
-
- rtic_a: rtic-a@0 {
- compatible = "fsl,sec-v4.0-rtic-memory";
- reg = <0x00 0x20 0x100 0x80>;
- };
-
- rtic_b: rtic-b@20 {
- compatible = "fsl,sec-v4.0-rtic-memory";
- reg = <0x20 0x20 0x200 0x80>;
- };
-
- rtic_c: rtic-c@40 {
- compatible = "fsl,sec-v4.0-rtic-memory";
- reg = <0x40 0x20 0x300 0x80>;
- };
-
- rtic_d: rtic-d@60 {
- compatible = "fsl,sec-v4.0-rtic-memory";
- reg = <0x60 0x20 0x500 0x80>;
- };
- };
- };
-
- sec_mon: sec_mon@314000 {
- compatible = "fsl,sec-v4.0-mon";
- reg = <0x314000 0x1000>;
- ranges = <0 0x314000 0x1000>;
-
- sec_mon_rtc_lp@34 {
- compatible = "fsl,sec-v4.0-mon-rtc-lp";
- regmap = <&sec_mon>;
- offset = <0x34>;
- interrupts = <93 2>;
- clocks = <&clks IMX7D_SNVS_CLK>;
- clock-names = "snvs-rtc";
- };
-
- snvs-pwrkey@020cc000 {
- compatible = "fsl,sec-v4.0-pwrkey";
- regmap = <&sec_mon>;
- interrupts = <0 4 0x4>;
- linux,keycode = <116>; /* KEY_POWER */
- wakeup-source;
- };
- };
-
-=====================================================================
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec6.txt b/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
deleted file mode 100644
index 73b0eb950bb3..000000000000
--- a/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-SEC 6 is as Freescale's Cryptographic Accelerator and Assurance Module (CAAM).
-Currently Freescale powerpc chip C29X is embedded with SEC 6.
-SEC 6 device tree binding include:
- -SEC 6 Node
- -Job Ring Node
- -Full Example
-
-=====================================================================
-SEC 6 Node
-
-Description
-
- Node defines the base address of the SEC 6 block.
- This block specifies the address range of all global
- configuration registers for the SEC 6 block.
- For example, In C293, we could see three SEC 6 node.
-
-PROPERTIES
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v6.0".
-
- - fsl,sec-era
- Usage: optional
- Value type: <u32>
- Definition: A standard property. Define the 'ERA' of the SEC
- device.
-
- - #address-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing physical addresses in child nodes.
-
- - #size-cells
- Usage: required
- Value type: <u32>
- Definition: A standard property. Defines the number of cells
- for representing the size of physical addresses in
- child nodes.
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical
- address and length of the SEC 6 configuration registers.
-
- - ranges
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property. Specifies the physical address
- range of the SEC 6.0 register space (-SNVS not included). A
- triplet that includes the child address, parent address, &
- length.
-
- Note: All other standard properties (see the Devicetree Specification)
- are allowed but are optional.
-
-EXAMPLE
- crypto@a0000 {
- compatible = "fsl,sec-v6.0";
- fsl,sec-era = <6>;
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0xa0000 0x20000>;
- ranges = <0 0xa0000 0x20000>;
- };
-
-=====================================================================
-Job Ring (JR) Node
-
- Child of the crypto node defines data processing interface to SEC 6
- across the peripheral bus for purposes of processing
- cryptographic descriptors. The specified address
- range can be made visible to one (or more) cores.
- The interrupt defined for this node is controlled within
- the address range of this node.
-
- - compatible
- Usage: required
- Value type: <string>
- Definition: Must include "fsl,sec-v6.0-job-ring".
-
- - reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: Specifies a two JR parameters: an offset from
- the parent physical address and the length the JR registers.
-
- - interrupts
- Usage: required
- Value type: <prop_encoded-array>
- Definition: Specifies the interrupts generated by this
- device. The value of the interrupts property
- consists of one interrupt specifier. The format
- of the specifier is defined by the binding document
- describing the node's interrupt parent.
-
-EXAMPLE
- jr@1000 {
- compatible = "fsl,sec-v6.0-job-ring";
- reg = <0x1000 0x1000>;
- interrupts = <49 2 0 0>;
- };
-
-===================================================================
-Full Example
-
-Since some chips may contain more than one SEC, the dtsi contains
-only the node contents, not the node itself. A chip using the SEC
-should include the dtsi inside each SEC node. Example:
-
-In qoriq-sec6.0.dtsi:
-
- compatible = "fsl,sec-v6.0";
- fsl,sec-era = <6>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- jr@1000 {
- compatible = "fsl,sec-v6.0-job-ring",
- "fsl,sec-v5.2-job-ring",
- "fsl,sec-v5.0-job-ring",
- "fsl,sec-v4.4-job-ring",
- "fsl,sec-v4.0-job-ring";
- reg = <0x1000 0x1000>;
- };
-
- jr@2000 {
- compatible = "fsl,sec-v6.0-job-ring",
- "fsl,sec-v5.2-job-ring",
- "fsl,sec-v5.0-job-ring",
- "fsl,sec-v4.4-job-ring",
- "fsl,sec-v4.0-job-ring";
- reg = <0x2000 0x1000>;
- };
-
-In the C293 device tree, we add the include of public property:
-
- crypto@a0000 {
- /include/ "qoriq-sec6.0.dtsi"
- }
-
- crypto@a0000 {
- reg = <0xa0000 0x20000>;
- ranges = <0 0xa0000 0x20000>;
-
- jr@1000 {
- interrupts = <49 2 0 0>;
- };
-
- jr@2000 {
- interrupts = <50 2 0 0>;
- };
- };
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml b/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml
new file mode 100644
index 000000000000..2bfac9d1c020
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/hisilicon,hip06-sec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hisilicon hip06/hip07 Security Accelerator
+
+maintainers:
+ - Jonathan Cameron <Jonathan.Cameron@huawei.com>
+
+properties:
+ compatible:
+ enum:
+ - hisilicon,hip06-sec
+ - hisilicon,hip07-sec
+
+ reg:
+ items:
+ - description: Registers for backend processing engines
+ - description: Registers for common functionality
+ - description: Registers for queue 0
+ - description: Registers for queue 1
+ - description: Registers for queue 2
+ - description: Registers for queue 3
+ - description: Registers for queue 4
+ - description: Registers for queue 5
+ - description: Registers for queue 6
+ - description: Registers for queue 7
+ - description: Registers for queue 8
+ - description: Registers for queue 9
+ - description: Registers for queue 10
+ - description: Registers for queue 11
+ - description: Registers for queue 12
+ - description: Registers for queue 13
+ - description: Registers for queue 14
+ - description: Registers for queue 15
+
+ interrupts:
+ items:
+ - description: SEC unit error queue interrupt
+ - description: Completion interrupt for queue 0
+ - description: Error interrupt for queue 0
+ - description: Completion interrupt for queue 1
+ - description: Error interrupt for queue 1
+ - description: Completion interrupt for queue 2
+ - description: Error interrupt for queue 2
+ - description: Completion interrupt for queue 3
+ - description: Error interrupt for queue 3
+ - description: Completion interrupt for queue 4
+ - description: Error interrupt for queue 4
+ - description: Completion interrupt for queue 5
+ - description: Error interrupt for queue 5
+ - description: Completion interrupt for queue 6
+ - description: Error interrupt for queue 6
+ - description: Completion interrupt for queue 7
+ - description: Error interrupt for queue 7
+ - description: Completion interrupt for queue 8
+ - description: Error interrupt for queue 8
+ - description: Completion interrupt for queue 9
+ - description: Error interrupt for queue 9
+ - description: Completion interrupt for queue 10
+ - description: Error interrupt for queue 10
+ - description: Completion interrupt for queue 11
+ - description: Error interrupt for queue 11
+ - description: Completion interrupt for queue 12
+ - description: Error interrupt for queue 12
+ - description: Completion interrupt for queue 13
+ - description: Error interrupt for queue 13
+ - description: Completion interrupt for queue 14
+ - description: Error interrupt for queue 14
+ - description: Completion interrupt for queue 15
+ - description: Error interrupt for queue 15
+
+ dma-coherent: true
+
+ iommus:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - dma-coherent
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ crypto@400d2000000 {
+ compatible = "hisilicon,hip07-sec";
+ reg = <0x400 0xd0000000 0x0 0x10000
+ 0x400 0xd2000000 0x0 0x10000
+ 0x400 0xd2010000 0x0 0x10000
+ 0x400 0xd2020000 0x0 0x10000
+ 0x400 0xd2030000 0x0 0x10000
+ 0x400 0xd2040000 0x0 0x10000
+ 0x400 0xd2050000 0x0 0x10000
+ 0x400 0xd2060000 0x0 0x10000
+ 0x400 0xd2070000 0x0 0x10000
+ 0x400 0xd2080000 0x0 0x10000
+ 0x400 0xd2090000 0x0 0x10000
+ 0x400 0xd20a0000 0x0 0x10000
+ 0x400 0xd20b0000 0x0 0x10000
+ 0x400 0xd20c0000 0x0 0x10000
+ 0x400 0xd20d0000 0x0 0x10000
+ 0x400 0xd20e0000 0x0 0x10000
+ 0x400 0xd20f0000 0x0 0x10000
+ 0x400 0xd2100000 0x0 0x10000>;
+ interrupts = <576 4>,
+ <577 1>, <578 4>,
+ <579 1>, <580 4>,
+ <581 1>, <582 4>,
+ <583 1>, <584 4>,
+ <585 1>, <586 4>,
+ <587 1>, <588 4>,
+ <589 1>, <590 4>,
+ <591 1>, <592 4>,
+ <593 1>, <594 4>,
+ <595 1>, <596 4>,
+ <597 1>, <598 4>,
+ <599 1>, <600 4>,
+ <601 1>, <602 4>,
+ <603 1>, <604 4>,
+ <605 1>, <606 4>,
+ <607 1>, <608 4>;
+ dma-coherent;
+ iommus = <&p1_smmu_alg_a 0x600>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
deleted file mode 100644
index d28fd1af01b4..000000000000
--- a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-* Hisilicon hip07 Security Accelerator (SEC)
-
-Required properties:
-- compatible: Must contain one of
- - "hisilicon,hip06-sec"
- - "hisilicon,hip07-sec"
-- reg: Memory addresses and lengths of the memory regions through which
- this device is controlled.
- Region 0 has registers to control the backend processing engines.
- Region 1 has registers for functionality common to all queues.
- Regions 2-18 have registers for the 16 individual queues which are isolated
- both in hardware and within the driver.
-- interrupts: Interrupt specifiers.
- Refer to interrupt-controller/interrupts.txt for generic interrupt client node
- bindings.
- Interrupt 0 is for the SEC unit error queue.
- Interrupt 2N + 1 is the completion interrupt for queue N.
- Interrupt 2N + 2 is the error interrupt for queue N.
-- dma-coherent: The driver assumes coherent dma is possible.
-
-Optional properties:
-- iommus: The SEC units are behind smmu-v3 iommus.
- Refer to iommu/arm,smmu-v3.txt for more information.
-
-Example:
-
-p1_sec_a: crypto@400d2000000 {
- compatible = "hisilicon,hip07-sec";
- reg = <0x400 0xd0000000 0x0 0x10000
- 0x400 0xd2000000 0x0 0x10000
- 0x400 0xd2010000 0x0 0x10000
- 0x400 0xd2020000 0x0 0x10000
- 0x400 0xd2030000 0x0 0x10000
- 0x400 0xd2040000 0x0 0x10000
- 0x400 0xd2050000 0x0 0x10000
- 0x400 0xd2060000 0x0 0x10000
- 0x400 0xd2070000 0x0 0x10000
- 0x400 0xd2080000 0x0 0x10000
- 0x400 0xd2090000 0x0 0x10000
- 0x400 0xd20a0000 0x0 0x10000
- 0x400 0xd20b0000 0x0 0x10000
- 0x400 0xd20c0000 0x0 0x10000
- 0x400 0xd20d0000 0x0 0x10000
- 0x400 0xd20e0000 0x0 0x10000
- 0x400 0xd20f0000 0x0 0x10000
- 0x400 0xd2100000 0x0 0x10000>;
- interrupt-parent = <&p1_mbigen_sec_a>;
- iommus = <&p1_smmu_alg_a 0x600>;
- dma-coherent;
- interrupts = <576 4>,
- <577 1>, <578 4>,
- <579 1>, <580 4>,
- <581 1>, <582 4>,
- <583 1>, <584 4>,
- <585 1>, <586 4>,
- <587 1>, <588 4>,
- <589 1>, <590 4>,
- <591 1>, <592 4>,
- <593 1>, <594 4>,
- <595 1>, <596 4>,
- <597 1>, <598 4>,
- <599 1>, <600 4>,
- <601 1>, <602 4>,
- <603 1>, <604 4>,
- <605 1>, <606 4>,
- <607 1>, <608 4>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml b/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml
new file mode 100644
index 000000000000..46617561ef94
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/img,hash-accelerator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Imagination Technologies hardware hash accelerator
+
+maintainers:
+ - James Hartley <james.hartley@imgtec.com>
+
+description:
+ The hash accelerator provides hardware hashing acceleration for
+ SHA1, SHA224, SHA256 and MD5 hashes.
+
+properties:
+ compatible:
+ const: img,hash-accelerator
+
+ reg:
+ items:
+ - description: Register base address and size
+ - description: DMA port specifier
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ items:
+ - const: tx
+
+ clocks:
+ items:
+ - description: System clock for hash block registers
+ - description: Hash clock for data path
+
+ clock-names:
+ items:
+ - const: sys
+ - const: hash
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - dmas
+ - dma-names
+ - clocks
+ - clock-names
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/mips-gic.h>
+ #include <dt-bindings/clock/pistachio-clk.h>
+
+ hash@18149600 {
+ compatible = "img,hash-accelerator";
+ reg = <0x18149600 0x100>, <0x18101100 0x4>;
+ interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 8 0xffffffff 0>;
+ dma-names = "tx";
+ clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+ clock-names = "sys", "hash";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
deleted file mode 100644
index 91a3d757d641..000000000000
--- a/Documentation/devicetree/bindings/crypto/img-hash.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Imagination Technologies hardware hash accelerator
-
-The hash accelerator provides hardware hashing acceleration for
-SHA1, SHA224, SHA256 and MD5 hashes
-
-Required properties:
-
-- compatible : "img,hash-accelerator"
-- reg : Offset and length of the register set for the module, and the DMA port
-- interrupts : The designated IRQ line for the hashing module.
-- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
-- dma-names : Should be "tx"
-- clocks : Clock specifiers
-- clock-names : "sys" Used to clock the hash block registers
- "hash" Used to clock data through the accelerator
-
-Example:
-
- hash: hash@18149600 {
- compatible = "img,hash-accelerator";
- reg = <0x18149600 0x100>, <0x18101100 0x4>;
- interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
- dmas = <&dma 8 0xffffffff 0>;
- dma-names = "tx";
- clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
- clock-names = "sys", "hash";
- };
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure,safexcel-eip93.yaml b/Documentation/devicetree/bindings/crypto/inside-secure,safexcel-eip93.yaml
new file mode 100644
index 000000000000..997bf9717f9e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/inside-secure,safexcel-eip93.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/inside-secure,safexcel-eip93.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Inside Secure SafeXcel EIP-93 cryptographic engine
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+ The Inside Secure SafeXcel EIP-93 is a cryptographic engine IP block
+ integrated in varios devices with very different and generic name from
+ PKTE to simply vendor+EIP93. The real IP under the hood is actually
+ developed by Inside Secure and given to license to vendors.
+
+ The IP block is sold with different model based on what feature are
+ needed and are identified with the final letter. Each letter correspond
+ to a specific set of feature and multiple letter reflect the sum of the
+ feature set.
+
+ EIP-93 models:
+ - EIP-93i: (basic) DES/Triple DES, AES, PRNG, IPsec ESP, SRTP, SHA1
+ - EIP-93ie: i + SHA224/256, AES-192/256
+ - EIP-93is: i + SSL/DTLS/DTLS, MD5, ARC4
+ - EIP-93ies: i + e + s
+ - EIP-93iw: i + AES-XCB-MAC, AES-CCM
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: airoha,en7581-eip93
+ - const: inside-secure,safexcel-eip93ies
+ - items:
+ - not: {}
+ description: Need a SoC specific compatible
+ - enum:
+ - inside-secure,safexcel-eip93i
+ - inside-secure,safexcel-eip93ie
+ - inside-secure,safexcel-eip93is
+ - inside-secure,safexcel-eip93iw
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ crypto@1e004000 {
+ compatible = "airoha,en7581-eip93", "inside-secure,safexcel-eip93ies";
+ reg = <0x1fb70000 0x1000>;
+
+ interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure,safexcel.yaml b/Documentation/devicetree/bindings/crypto/inside-secure,safexcel.yaml
new file mode 100644
index 000000000000..343e2d04c797
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/inside-secure,safexcel.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/inside-secure,safexcel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Inside Secure SafeXcel cryptographic engine
+
+maintainers:
+ - Antoine Tenart <atenart@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: inside-secure,safexcel-eip197b
+ - const: inside-secure,safexcel-eip197d
+ - const: inside-secure,safexcel-eip97ies
+ - const: inside-secure,safexcel-eip197
+ description: Equivalent of inside-secure,safexcel-eip197b
+ deprecated: true
+ - const: inside-secure,safexcel-eip97
+ description: Equivalent of inside-secure,safexcel-eip97ies
+ deprecated: true
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 6
+
+ interrupt-names:
+ items:
+ - const: ring0
+ - const: ring1
+ - const: ring2
+ - const: ring3
+ - const: eip
+ - const: mem
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: core
+ - const: reg
+
+ dma-coherent: true
+
+required:
+ - reg
+ - interrupts
+ - interrupt-names
+
+allOf:
+ - if:
+ properties:
+ clocks:
+ minItems: 2
+ then:
+ properties:
+ clock-names:
+ minItems: 2
+ required:
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ crypto@800000 {
+ compatible = "inside-secure,safexcel-eip197b";
+ reg = <0x800000 0x200000>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ring0", "ring1", "ring2", "ring3", "eip", "mem";
+ clocks = <&cpm_syscon0 1 26>;
+ clock-names = "core";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
deleted file mode 100644
index 3bbf144c9988..000000000000
--- a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Inside Secure SafeXcel cryptographic engine
-
-Required properties:
-- compatible: Should be "inside-secure,safexcel-eip197b",
- "inside-secure,safexcel-eip197d" or
- "inside-secure,safexcel-eip97ies".
-- reg: Base physical address of the engine and length of memory mapped region.
-- interrupts: Interrupt numbers for the rings and engine.
-- interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
-
-Optional properties:
-- clocks: Reference to the crypto engine clocks, the second clock is
- needed for the Armada 7K/8K SoCs.
-- clock-names: mandatory if there is a second clock, in this case the
- name must be "core" for the first clock and "reg" for
- the second one.
-
-Backward compatibility:
-Two compatibles are kept for backward compatibility, but shouldn't be used for
-new submissions:
-- "inside-secure,safexcel-eip197" is equivalent to
- "inside-secure,safexcel-eip197b".
-- "inside-secure,safexcel-eip97" is equivalent to
- "inside-secure,safexcel-eip97ies".
-
-Example:
-
- crypto: crypto@800000 {
- compatible = "inside-secure,safexcel-eip197b";
- reg = <0x800000 0x200000>;
- interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3",
- "eip";
- clocks = <&cpm_syscon0 1 26>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
new file mode 100644
index 000000000000..a4006237aa89
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/intel,ixp4xx-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel IXP4xx cryptographic engine
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The Intel IXP4xx cryptographic engine makes use of the IXP4xx NPE
+ (Network Processing Engine). Since it is not a device on its own
+ it is defined as a subnode of the NPE, if crypto support is
+ available on the platform.
+
+properties:
+ compatible:
+ const: intel,ixp4xx-crypto
+
+ intel,npe-handle:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to the NPE this crypto engine
+ - description: the NPE instance number
+ description: phandle to the NPE this crypto engine is using, the cell
+ describing the NPE instance to be used.
+
+ queue-rx:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to the RX queue on the NPE
+ - description: the queue instance number
+ description: phandle to the RX queue on the NPE, the cell describing
+ the queue instance to be used.
+
+ queue-txready:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to the TX READY queue on the NPE
+ - description: the queue instance number
+ description: phandle to the TX READY queue on the NPE, the cell describing
+ the queue instance to be used.
+
+required:
+ - compatible
+ - intel,npe-handle
+ - queue-rx
+ - queue-txready
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml
new file mode 100644
index 000000000000..fedd8be56ad6
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay OCS AES
+
+maintainers:
+ - Daniele Alessandrelli <daniele.alessandrelli@intel.com>
+
+description:
+ The Intel Keem Bay Offload and Crypto Subsystem (OCS) AES engine provides
+ hardware-accelerated AES/SM4 encryption/decryption.
+
+properties:
+ compatible:
+ const: intel,keembay-ocs-aes
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ crypto@30008000 {
+ compatible = "intel,keembay-ocs-aes";
+ reg = <0x30008000 0x1000>;
+ interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&scmi_clk 95>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-ecc.yaml b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-ecc.yaml
new file mode 100644
index 000000000000..2bb95247b64f
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-ecc.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-ecc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay OCS ECC
+
+maintainers:
+ - Daniele Alessandrelli <daniele.alessandrelli@intel.com>
+ - Prabhjot Khurana <prabhjot.khurana@intel.com>
+
+description:
+ The Intel Keem Bay Offload and Crypto Subsystem (OCS) Elliptic Curve
+ Cryptography (ECC) device provides hardware acceleration for elliptic curve
+ cryptography using the NIST P-256 and NIST P-384 elliptic curves.
+
+properties:
+ compatible:
+ const: intel,keembay-ocs-ecc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ crypto@30001000 {
+ compatible = "intel,keembay-ocs-ecc";
+ reg = <0x30001000 0x1000>;
+ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&scmi_clk 95>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml
new file mode 100644
index 000000000000..46e2853ab8f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-hcu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay OCS HCU
+
+maintainers:
+ - Declan Murphy <declan.murphy@intel.com>
+ - Daniele Alessandrelli <daniele.alessandrelli@intel.com>
+
+description:
+ The Intel Keem Bay Offload and Crypto Subsystem (OCS) Hash Control Unit (HCU)
+ provides hardware-accelerated hashing and HMAC.
+
+properties:
+ compatible:
+ const: intel,keembay-ocs-hcu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ crypto@3000b000 {
+ compatible = "intel,keembay-ocs-hcu";
+ reg = <0x3000b000 0x1000>;
+ interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&scmi_clk 94>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml b/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml
new file mode 100644
index 000000000000..b44d36c50ec4
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/marvell,orion-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Cryptographic Engines And Security Accelerator
+
+maintainers:
+ - Andrew Lunn <andrew@lunn.ch>
+ - Boris Brezillon <bbrezillon@kernel.org>
+
+description: |
+ Marvell Cryptographic Engines And Security Accelerator
+
+properties:
+ compatible:
+ enum:
+ - marvell,armada-370-crypto
+ - marvell,armada-xp-crypto
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ - marvell,dove-crypto
+ - marvell,kirkwood-crypto
+ - marvell,orion-crypto
+
+ reg:
+ minItems: 1
+ items:
+ - description: Registers region
+ - description: SRAM region
+ deprecated: true
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: regs
+ - const: sram
+ deprecated: true
+
+ interrupts:
+ description: One interrupt for each CESA engine
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ description: One or two clocks for each CESA engine
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: cesa0
+ - const: cesa1
+ - const: cesaz0
+ - const: cesaz1
+
+ marvell,crypto-srams:
+ description: Phandle(s) to crypto SRAM.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 2
+ items:
+ maxItems: 1
+
+ marvell,crypto-sram-size:
+ description: SRAM size reserved for crypto operations.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0x800
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - marvell,crypto-srams
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ enum:
+ - marvell,kirkwood-crypto
+ - marvell,orion-crypto
+ then:
+ required:
+ - clocks
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - marvell,armada-370-crypto
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ - marvell,armada-xp-crypto
+ then:
+ required:
+ - clock-names
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - marvell,armada-375-crypto
+ - marvell,armada-38x-crypto
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ clock-names:
+ minItems: 4
+ else:
+ properties:
+ clocks:
+ maxItems: 2
+ clock-names:
+ maxItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@30000 {
+ compatible = "marvell,orion-crypto";
+ reg = <0x30000 0x10000>;
+ reg-names = "regs";
+ interrupts = <22>;
+ marvell,crypto-srams = <&crypto_sram>;
+ marvell,crypto-sram-size = <0x600>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/marvell-cesa.txt b/Documentation/devicetree/bindings/crypto/marvell-cesa.txt
deleted file mode 100644
index 28d3f2496b89..000000000000
--- a/Documentation/devicetree/bindings/crypto/marvell-cesa.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Marvell Cryptographic Engines And Security Accelerator
-
-Required properties:
-- compatible: should be one of the following string
- "marvell,orion-crypto"
- "marvell,kirkwood-crypto"
- "marvell,dove-crypto"
- "marvell,armada-370-crypto"
- "marvell,armada-xp-crypto"
- "marvell,armada-375-crypto"
- "marvell,armada-38x-crypto"
-- reg: base physical address of the engine and length of memory mapped
- region. Can also contain an entry for the SRAM attached to the CESA,
- but this representation is deprecated and marvell,crypto-srams should
- be used instead
-- reg-names: "regs". Can contain an "sram" entry, but this representation
- is deprecated and marvell,crypto-srams should be used instead
-- interrupts: interrupt number
-- clocks: reference to the crypto engines clocks. This property is not
- required for orion and kirkwood platforms
-- clock-names: "cesaX" and "cesazX", X should be replaced by the crypto engine
- id.
- This property is not required for the orion and kirkwoord
- platforms.
- "cesazX" clocks are not required on armada-370 platforms
-- marvell,crypto-srams: phandle to crypto SRAM definitions
-
-Optional properties:
-- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
- specified the whole SRAM is used (2KB)
-
-
-Examples:
-
- crypto@90000 {
- compatible = "marvell,armada-xp-crypto";
- reg = <0x90000 0x10000>;
- reg-names = "regs";
- interrupts = <48>, <49>;
- clocks = <&gateclk 23>, <&gateclk 23>;
- clock-names = "cesa0", "cesa1";
- marvell,crypto-srams = <&crypto_sram0>, <&crypto_sram1>;
- marvell,crypto-sram-size = <0x600>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
deleted file mode 100644
index 450da3661cad..000000000000
--- a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-MediaTek cryptographic accelerators
-
-Required properties:
-- compatible: Should be "mediatek,eip97-crypto"
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the five crypto engines interrupts in numeric
- order. These are global system and four descriptor rings.
-- clocks: the clock used by the core
-- clock-names: Must contain "cryp".
-- power-domains: Must contain a reference to the PM domain.
-
-
-Example:
- crypto: crypto@1b240000 {
- compatible = "mediatek,eip97-crypto";
- reg = <0 0x1b240000 0 0x20000>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
- <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&ethsys CLK_ETHSYS_CRYPTO>;
- clock-names = "cryp";
- power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/mv_cesa.txt b/Documentation/devicetree/bindings/crypto/mv_cesa.txt
deleted file mode 100644
index d9b92e2f3138..000000000000
--- a/Documentation/devicetree/bindings/crypto/mv_cesa.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Marvell Cryptographic Engines And Security Accelerator
-
-Required properties:
-- compatible: should be one of the following string
- "marvell,orion-crypto"
- "marvell,kirkwood-crypto"
- "marvell,dove-crypto"
-- reg: base physical address of the engine and length of memory mapped
- region. Can also contain an entry for the SRAM attached to the CESA,
- but this representation is deprecated and marvell,crypto-srams should
- be used instead
-- reg-names: "regs". Can contain an "sram" entry, but this representation
- is deprecated and marvell,crypto-srams should be used instead
-- interrupts: interrupt number
-- clocks: reference to the crypto engines clocks. This property is only
- required for Dove platforms
-- marvell,crypto-srams: phandle to crypto SRAM definitions
-
-Optional properties:
-- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
- specified the whole SRAM is used (2KB)
-
-Examples:
-
- crypto@30000 {
- compatible = "marvell,orion-crypto";
- reg = <0x30000 0x10000>;
- reg-names = "regs";
- interrupts = <22>;
- marvell,crypto-srams = <&crypto_sram>;
- marvell,crypto-sram-size = <0x600>;
- };
diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml
new file mode 100644
index 000000000000..cb47ae2889b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra Security Engine for AES algorithms
+
+description:
+ The Tegra Security Engine accelerates the following AES encryption/decryption
+ algorithms - AES-ECB, AES-CBC, AES-OFB, AES-XTS, AES-CTR, AES-GCM, AES-CCM,
+ AES-CMAC
+
+maintainers:
+ - Akhil R <akhilrajeev@nvidia.com>
+
+properties:
+ compatible:
+ const: nvidia,tegra234-se-aes
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ iommus:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/memory/tegra234-mc.h>
+ #include <dt-bindings/clock/tegra234-clock.h>
+
+ crypto@15820000 {
+ compatible = "nvidia,tegra234-se-aes";
+ reg = <0x15820000 0x10000>;
+ clocks = <&bpmp TEGRA234_CLK_SE>;
+ iommus = <&smmu TEGRA234_SID_SES_SE1>;
+ dma-coherent;
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml
new file mode 100644
index 000000000000..f57ef10645e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-hash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra Security Engine for HASH algorithms
+
+description:
+ The Tegra Security HASH Engine accelerates the following HASH functions -
+ SHA1, SHA224, SHA256, SHA384, SHA512, SHA3-224, SHA3-256, SHA3-384, SHA3-512
+ HMAC(SHA224), HMAC(SHA256), HMAC(SHA384), HMAC(SHA512)
+
+maintainers:
+ - Akhil R <akhilrajeev@nvidia.com>
+
+properties:
+ compatible:
+ const: nvidia,tegra234-se-hash
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ iommus:
+ maxItems: 1
+
+ dma-coherent: true
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/memory/tegra234-mc.h>
+ #include <dt-bindings/clock/tegra234-clock.h>
+
+ crypto@15840000 {
+ compatible = "nvidia,tegra234-se-hash";
+ reg = <0x15840000 0x10000>;
+ clocks = <&bpmp TEGRA234_CLK_SE>;
+ iommus = <&smmu TEGRA234_SID_SES_SE2>;
+ dma-coherent;
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/omap-aes.txt b/Documentation/devicetree/bindings/crypto/omap-aes.txt
deleted file mode 100644
index fd9717653cbb..000000000000
--- a/Documentation/devicetree/bindings/crypto/omap-aes.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-OMAP SoC AES crypto Module
-
-Required properties:
-
-- compatible : Should contain entries for this and backward compatible
- AES versions:
- - "ti,omap2-aes" for OMAP2.
- - "ti,omap3-aes" for OMAP3.
- - "ti,omap4-aes" for OMAP4 and AM33XX.
- Note that the OMAP2 and 3 versions are compatible (OMAP3 supports
- more algorithms) but they are incompatible with OMAP4.
-- ti,hwmods: Name of the hwmod associated with the AES module
-- reg : Offset and length of the register set for the module
-- interrupts : the interrupt-specifier for the AES module.
-
-Optional properties:
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
- Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
-
-Example:
- /* AM335x */
- aes: aes@53500000 {
- compatible = "ti,omap4-aes";
- ti,hwmods = "aes";
- reg = <0x53500000 0xa0>;
- interrupts = <102>;
- dmas = <&edma 6>,
- <&edma 5>;
- dma-names = "tx", "rx";
- };
diff --git a/Documentation/devicetree/bindings/crypto/omap-des.txt b/Documentation/devicetree/bindings/crypto/omap-des.txt
deleted file mode 100644
index e8c63bf2e16d..000000000000
--- a/Documentation/devicetree/bindings/crypto/omap-des.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-OMAP SoC DES crypto Module
-
-Required properties:
-
-- compatible : Should contain "ti,omap4-des"
-- ti,hwmods: Name of the hwmod associated with the DES module
-- reg : Offset and length of the register set for the module
-- interrupts : the interrupt-specifier for the DES module
-- clocks : A phandle to the functional clock node of the DES module
- corresponding to each entry in clock-names
-- clock-names : Name of the functional clock, should be "fck"
-
-Optional properties:
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
- Documentation/devicetree/bindings/dma/dma.txt
- Each entry corresponds to an entry in dma-names
-- dma-names: DMA request names should include "tx" and "rx" if present
-
-Example:
- /* DRA7xx SoC */
- des: des@480a5000 {
- compatible = "ti,omap4-des";
- ti,hwmods = "des";
- reg = <0x480a5000 0xa0>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
- dmas = <&sdma 117>, <&sdma 116>;
- dma-names = "tx", "rx";
- clocks = <&l3_iclk_div>;
- clock-names = "fck";
- };
diff --git a/Documentation/devicetree/bindings/crypto/omap-sham.txt b/Documentation/devicetree/bindings/crypto/omap-sham.txt
deleted file mode 100644
index ad9115569611..000000000000
--- a/Documentation/devicetree/bindings/crypto/omap-sham.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-OMAP SoC SHA crypto Module
-
-Required properties:
-
-- compatible : Should contain entries for this and backward compatible
- SHAM versions:
- - "ti,omap2-sham" for OMAP2 & OMAP3.
- - "ti,omap4-sham" for OMAP4 and AM33XX.
- - "ti,omap5-sham" for OMAP5, DRA7 and AM43XX.
-- ti,hwmods: Name of the hwmod associated with the SHAM module
-- reg : Offset and length of the register set for the module
-- interrupts : the interrupt-specifier for the SHAM module.
-
-Optional properties:
-- dmas: DMA specifiers for the rx dma. See the DMA client binding,
- Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request name. Should be "rx" if a dma is present.
-
-Example:
- /* AM335x */
- sham: sham@53100000 {
- compatible = "ti,omap4-sham";
- ti,hwmods = "sham";
- reg = <0x53100000 0x200>;
- interrupts = <109>;
- dmas = <&edma 36>;
- dma-names = "rx";
- };
diff --git a/Documentation/devicetree/bindings/crypto/picochip-spacc.txt b/Documentation/devicetree/bindings/crypto/picochip-spacc.txt
deleted file mode 100644
index df1151f87745..000000000000
--- a/Documentation/devicetree/bindings/crypto/picochip-spacc.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Picochip picoXcell SPAcc (Security Protocol Accelerator) bindings
-
-Picochip picoXcell devices contain crypto offload engines that may be used for
-IPSEC and femtocell layer 2 ciphering.
-
-Required properties:
- - compatible : "picochip,spacc-ipsec" for the IPSEC offload engine
- "picochip,spacc-l2" for the femtocell layer 2 ciphering engine.
- - reg : Offset and length of the register set for this device
- - interrupts : The interrupt line from the SPAcc.
- - ref-clock : The input clock that drives the SPAcc.
-
-Example SPAcc node:
-
-spacc@10000 {
- compatible = "picochip,spacc-ipsec";
- reg = <0x100000 0x10000>;
- interrupt-parent = <&vic0>;
- interrupts = <24>;
- ref-clock = <&ipsec_clk>, "ref";
-};
diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
new file mode 100644
index 000000000000..08fe6a707a37
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/qcom,inline-crypto-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. (QTI) Inline Crypto Engine
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,qcs8300-inline-crypto-engine
+ - qcom,sa8775p-inline-crypto-engine
+ - qcom,sc7180-inline-crypto-engine
+ - qcom,sc7280-inline-crypto-engine
+ - qcom,sm8450-inline-crypto-engine
+ - qcom,sm8550-inline-crypto-engine
+ - qcom,sm8650-inline-crypto-engine
+ - qcom,sm8750-inline-crypto-engine
+ - const: qcom,inline-crypto-engine
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm8550-gcc.h>
+
+ crypto@1d88000 {
+ compatible = "qcom,sm8550-inline-crypto-engine",
+ "qcom,inline-crypto-engine";
+ reg = <0x01d88000 0x8000>;
+ clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/qcom,prng.txt b/Documentation/devicetree/bindings/crypto/qcom,prng.txt
deleted file mode 100644
index 7ee0e9eac973..000000000000
--- a/Documentation/devicetree/bindings/crypto/qcom,prng.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-Qualcomm MSM pseudo random number generator.
-
-Required properties:
-
-- compatible : should be "qcom,prng" for 8916 etc
- : should be "qcom,prng-ee" for 8996 and later using EE
- (Execution Environment) slice of prng
-- reg : specifies base physical address and size of the registers map
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "core" clocks all registers, FIFO and circuits in PRNG IP block
-
-Example:
-
- rng@f9bff000 {
- compatible = "qcom,prng";
- reg = <0xf9bff000 0x200>;
- clocks = <&clock GCC_PRNG_AHB_CLK>;
- clock-names = "core";
- };
diff --git a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
new file mode 100644
index 000000000000..ed7e16bd11d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/qcom,prng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Pseudo Random Number Generator
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - qcom,prng # 8916 etc.
+ - qcom,prng-ee # 8996 and later using EE
+ - items:
+ - enum:
+ - qcom,ipq5332-trng
+ - qcom,ipq5424-trng
+ - qcom,ipq9574-trng
+ - qcom,qcs615-trng
+ - qcom,qcs8300-trng
+ - qcom,sa8255p-trng
+ - qcom,sa8775p-trng
+ - qcom,sc7280-trng
+ - qcom,sm8450-trng
+ - qcom,sm8550-trng
+ - qcom,sm8650-trng
+ - qcom,sm8750-trng
+ - const: qcom,trng
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: core
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: qcom,trng
+ then:
+ required:
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ rng@f9bff000 {
+ compatible = "qcom,prng";
+ reg = <0xf9bff000 0x200>;
+ clocks = <&clk 125>;
+ clock-names = "core";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/qcom-qce.txt b/Documentation/devicetree/bindings/crypto/qcom-qce.txt
deleted file mode 100644
index fdd53b184ba8..000000000000
--- a/Documentation/devicetree/bindings/crypto/qcom-qce.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Qualcomm crypto engine driver
-
-Required properties:
-
-- compatible : should be "qcom,crypto-v5.1"
-- reg : specifies base physical address and size of the registers map
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "iface" clocks register interface
- "bus" clocks data transfer interface
- "core" clocks rest of the crypto block
-- dmas : DMA specifiers for tx and rx dma channels. For more see
- Documentation/devicetree/bindings/dma/dma.txt
-- dma-names : DMA request names should be "rx" and "tx"
-
-Example:
- crypto@fd45a000 {
- compatible = "qcom,crypto-v5.1";
- reg = <0xfd45a000 0x6000>;
- clocks = <&gcc GCC_CE2_AHB_CLK>,
- <&gcc GCC_CE2_AXI_CLK>,
- <&gcc GCC_CE2_CLK>;
- clock-names = "iface", "bus", "core";
- dmas = <&cryptobam 2>, <&cryptobam 3>;
- dma-names = "rx", "tx";
- };
diff --git a/Documentation/devicetree/bindings/crypto/qcom-qce.yaml b/Documentation/devicetree/bindings/crypto/qcom-qce.yaml
new file mode 100644
index 000000000000..e009cb712fb8
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/qcom-qce.yaml
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/qcom-qce.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm crypto engine driver
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Konrad Dybcio <konradybcio@kernel.org>
+
+description:
+ This document defines the binding for the QCE crypto
+ controller found on Qualcomm parts.
+
+properties:
+ compatible:
+ oneOf:
+ - const: qcom,crypto-v5.1
+ deprecated: true
+ description: Kept only for ABI backward compatibility
+
+ - const: qcom,crypto-v5.4
+ deprecated: true
+ description: Kept only for ABI backward compatibility
+
+ - items:
+ - enum:
+ - qcom,ipq4019-qce
+ - qcom,sm8150-qce
+ - const: qcom,qce
+
+ - items:
+ - enum:
+ - qcom,ipq6018-qce
+ - qcom,ipq8074-qce
+ - qcom,ipq9574-qce
+ - qcom,msm8996-qce
+ - qcom,qcm2290-qce
+ - qcom,sdm845-qce
+ - qcom,sm6115-qce
+ - const: qcom,ipq4019-qce
+ - const: qcom,qce
+
+ - items:
+ - enum:
+ - qcom,qcs615-qce
+ - qcom,qcs8300-qce
+ - qcom,sa8775p-qce
+ - qcom,sc7280-qce
+ - qcom,sm6350-qce
+ - qcom,sm8250-qce
+ - qcom,sm8350-qce
+ - qcom,sm8450-qce
+ - qcom,sm8550-qce
+ - qcom,sm8650-qce
+ - qcom,sm8750-qce
+ - qcom,x1e80100-qce
+ - const: qcom,sm8150-qce
+ - const: qcom,qce
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+ iommus:
+ minItems: 1
+ maxItems: 8
+ description:
+ phandle to apps_smmu node with sid mask.
+
+ interconnects:
+ maxItems: 1
+ description:
+ Interconnect path between qce crypto and main memory.
+
+ interconnect-names:
+ const: memory
+
+ dmas:
+ items:
+ - description: DMA specifiers for rx dma channel.
+ - description: DMA specifiers for tx dma channel.
+
+ dma-names:
+ items:
+ - const: rx
+ - const: tx
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,crypto-v5.1
+ - qcom,crypto-v5.4
+ - qcom,ipq6018-qce
+ - qcom,ipq8074-qce
+ - qcom,ipq9574-qce
+ - qcom,msm8996-qce
+ - qcom,sdm845-qce
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: iface
+ - const: bus
+ - const: core
+ required:
+ - clocks
+ - clock-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,qcm2290-qce
+ - qcom,sm6115-qce
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ items:
+ - const: core
+ required:
+ - clocks
+ - clock-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8150-qce
+ then:
+ properties:
+ clocks: false
+ clock-names: false
+
+required:
+ - compatible
+ - reg
+ - dmas
+ - dma-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-apq8084.h>
+ crypto-engine@fd45a000 {
+ compatible = "qcom,ipq6018-qce", "qcom,ipq4019-qce", "qcom,qce";
+ reg = <0xfd45a000 0x6000>;
+ clocks = <&gcc GCC_CE2_AHB_CLK>,
+ <&gcc GCC_CE2_AXI_CLK>,
+ <&gcc GCC_CE2_CLK>;
+ clock-names = "iface", "bus", "core";
+ dmas = <&cryptobam 2>, <&cryptobam 3>;
+ dma-names = "rx", "tx";
+ iommus = <&apps_smmu 0x584 0x0011>,
+ <&apps_smmu 0x586 0x0011>,
+ <&apps_smmu 0x594 0x0011>,
+ <&apps_smmu 0x596 0x0011>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
new file mode 100644
index 000000000000..f1a9da8bff7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Electronics Security Accelerator
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3288-crypto
+ - rockchip,rk3328-crypto
+ - rockchip,rk3399-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+ maxItems: 4
+
+ clock-names:
+ minItems: 3
+ maxItems: 4
+
+ resets:
+ minItems: 1
+ maxItems: 3
+
+ reset-names:
+ minItems: 1
+ maxItems: 3
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3288-crypto
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ clock-names:
+ items:
+ - const: aclk
+ - const: hclk
+ - const: sclk
+ - const: apb_pclk
+ resets:
+ maxItems: 1
+ reset-names:
+ items:
+ - const: crypto-rst
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3328-crypto
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: hclk_master
+ - const: hclk_slave
+ - const: sclk
+ resets:
+ maxItems: 1
+ reset-names:
+ items:
+ - const: crypto-rst
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3399-crypto
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: hclk_master
+ - const: hclk_slave
+ - const: sclk
+ resets:
+ minItems: 3
+ reset-names:
+ items:
+ - const: master
+ - const: slave
+ - const: crypto-rst
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/rk3288-cru.h>
+ crypto@ff8a0000 {
+ compatible = "rockchip,rk3288-crypto";
+ reg = <0xff8a0000 0x4000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
+ <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
+ clock-names = "aclk", "hclk", "sclk", "apb_pclk";
+ resets = <&cru SRST_CRYPTO>;
+ reset-names = "crypto-rst";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
deleted file mode 100644
index 5e2ba385b8c9..000000000000
--- a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Rockchip Electronics And Security Accelerator
-
-Required properties:
-- compatible: Should be "rockchip,rk3288-crypto"
-- reg: Base physical address of the engine and length of memory mapped
- region
-- interrupts: Interrupt number
-- clocks: Reference to the clocks about crypto
-- clock-names: "aclk" used to clock data
- "hclk" used to clock data
- "sclk" used to clock crypto accelerator
- "apb_pclk" used to clock dma
-- resets: Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names: Must include the name "crypto-rst".
-
-Examples:
-
- crypto: cypto-controller@ff8a0000 {
- compatible = "rockchip,rk3288-crypto";
- reg = <0xff8a0000 0x4000>;
- interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
- <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
- clock-names = "aclk", "hclk", "sclk", "apb_pclk";
- resets = <&cru SRST_CRYPTO>;
- reset-names = "crypto-rst";
- };
diff --git a/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml
new file mode 100644
index 000000000000..5b31891c97fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/samsung-slimsss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC SlimSSS (Slim Security SubSystem) module
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+ The SlimSSS module in Exynos5433 SoC supports the following:
+ -- Feeder (FeedCtrl)
+ -- Advanced Encryption Standard (AES) with ECB,CBC,CTR,XTS and (CBC/XTS)/CTS
+ -- SHA-1/SHA-256 and (SHA-1/SHA-256)/HMAC
+
+properties:
+ compatible:
+ items:
+ - const: samsung,exynos5433-slim-sss
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pclk
+ - const: aclk
+
+ interrupts:
+ description: One feed control interrupt.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clock-names
+ - clocks
+ - interrupts
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.txt b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
deleted file mode 100644
index 7a5ca56683cc..000000000000
--- a/Documentation/devicetree/bindings/crypto/samsung-sss.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Samsung SoC SSS (Security SubSystem) module
-
-The SSS module in S5PV210 SoC supports the following:
--- Feeder (FeedCtrl)
--- Advanced Encryption Standard (AES)
--- Data Encryption Standard (DES)/3DES
--- Public Key Accelerator (PKA)
--- SHA-1/SHA-256/MD5/HMAC (SHA-1/SHA-256/MD5)/PRNG
--- PRNG: Pseudo Random Number Generator
-
-The SSS module in Exynos4 (Exynos4210) and
-Exynos5 (Exynos5420 and Exynos5250) SoCs
-supports the following also:
--- ARCFOUR (ARC4)
--- True Random Number Generator (TRNG)
--- Secure Key Manager
-
-Required properties:
-
-- compatible : Should contain entries for this and backward compatible
- SSS versions:
- - "samsung,s5pv210-secss" for S5PV210 SoC.
- - "samsung,exynos4210-secss" for Exynos4210, Exynos4212, Exynos4412, Exynos5250,
- Exynos5260 and Exynos5420 SoCs.
-- reg : Offset and length of the register set for the module
-- interrupts : interrupt specifiers of SSS module interrupts (one feed
- control interrupt).
-
-- clocks : list of clock phandle and specifier pairs for all clocks listed in
- clock-names property.
-- clock-names : list of device clock input names; should contain one entry
- "secss".
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.yaml b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml
new file mode 100644
index 000000000000..6d62b0e42fc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/samsung-sss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC SSS (Security SubSystem) module
+
+maintainers:
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |+
+ The SSS module in S5PV210 SoC supports the following:
+ -- Feeder (FeedCtrl)
+ -- Advanced Encryption Standard (AES)
+ -- Data Encryption Standard (DES)/3DES
+ -- Public Key Accelerator (PKA)
+ -- SHA-1/SHA-256/MD5/HMAC (SHA-1/SHA-256/MD5)/PRNG
+ -- PRNG: Pseudo Random Number Generator
+
+ The SSS module in Exynos4 (Exynos4210) and Exynos5 (Exynos5420 and Exynos5250)
+ SoCs supports the following also:
+ -- ARCFOUR (ARC4)
+ -- True Random Number Generator (TRNG)
+ -- Secure Key Manager
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - samsung,s5pv210-secss # for S5PV210
+ - samsung,exynos4210-secss # for Exynos4210, Exynos4212,
+ # Exynos4412, Exynos5250,
+ # Exynos5260 and Exynos5420
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: secss
+
+ interrupts:
+ description: One feed control interrupt.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clock-names
+ - clocks
+ - interrupts
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt b/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt
deleted file mode 100644
index 3ba92a5e9b36..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-* STMicroelectronics STM32 CRC
-
-Required properties:
-- compatible: Should be "st,stm32f7-crc".
-- reg: The address and length of the peripheral registers space
-- clocks: The input clock of the CRC instance
-
-Optional properties: none
-
-Example:
-
-crc: crc@40023000 {
- compatible = "st,stm32f7-crc";
- reg = <0x40023000 0x400>;
- clocks = <&rcc 0 12>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
new file mode 100644
index 000000000000..50b2c2e0c3cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-crc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 CRC
+
+maintainers:
+ - Lionel Debieve <lionel.debieve@foss.st.com>
+
+properties:
+ compatible:
+ const: st,stm32f7-crc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/stm32mp1-clks.h>
+ crc@40023000 {
+ compatible = "st,stm32f7-crc";
+ reg = <0x40023000 0x400>;
+ clocks = <&rcc 0 12>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
deleted file mode 100644
index 970487fa40b8..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* STMicroelectronics STM32 CRYP
-
-Required properties:
-- compatible: Should be "st,stm32f756-cryp".
-- reg: The address and length of the peripheral registers space
-- clocks: The input clock of the CRYP instance
-- interrupts: The CRYP interrupt
-
-Optional properties:
-- resets: The input reset of the CRYP instance
-
-Example:
-crypto@50060000 {
- compatible = "st,stm32f756-cryp";
- reg = <0x50060000 0x400>;
- interrupts = <79>;
- clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
- resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
new file mode 100644
index 000000000000..27354658d054
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-cryp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 CRYP
+
+description: The STM32 CRYP block is built on the CRYP block found in
+ the STn8820 SoC introduced in 2007, and subsequently used in the U8500
+ SoC in 2010.
+
+maintainers:
+ - Lionel Debieve <lionel.debieve@foss.st.com>
+
+properties:
+ compatible:
+ enum:
+ - st,stn8820-cryp
+ - stericsson,ux500-cryp
+ - st,stm32f756-cryp
+ - st,stm32mp1-cryp
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ dmas:
+ items:
+ - description: mem2cryp DMA channel
+ - description: cryp2mem DMA channel
+
+ dma-names:
+ items:
+ - const: mem2cryp
+ - const: cryp2mem
+
+ power-domains:
+ maxItems: 1
+
+ access-controllers:
+ minItems: 1
+ maxItems: 2
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/stm32mp1-clks.h>
+ #include <dt-bindings/reset/stm32mp1-resets.h>
+ cryp@54001000 {
+ compatible = "st,stm32mp1-cryp";
+ reg = <0x54001000 0x400>;
+ interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&rcc CRYP1>;
+ resets = <&rcc CRYP1_R>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
deleted file mode 100644
index 04fc246f02f7..000000000000
--- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-* STMicroelectronics STM32 HASH
-
-Required properties:
-- compatible: Should contain entries for this and backward compatible
- HASH versions:
- - "st,stm32f456-hash" for stm32 F456.
- - "st,stm32f756-hash" for stm32 F756.
-- reg: The address and length of the peripheral registers space
-- interrupts: the interrupt specifier for the HASH
-- clocks: The input clock of the HASH instance
-
-Optional properties:
-- resets: The input reset of the HASH instance
-- dmas: DMA specifiers for the HASH. See the DMA client binding,
- Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request name. Should be "in" if a dma is present.
-- dma-maxburst: Set number of maximum dma burst supported
-
-Example:
-
-hash1: hash@50060400 {
- compatible = "st,stm32f756-hash";
- reg = <0x50060400 0x400>;
- interrupts = <80>;
- clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
- resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
- dmas = <&dma2 7 2 0x400 0x0>;
- dma-names = "in";
- dma-maxburst = <0>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
new file mode 100644
index 000000000000..822318414095
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/st,stm32-hash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32 HASH
+
+description: The STM32 HASH block is built on the HASH block found in
+ the STn8820 SoC introduced in 2007, and subsequently used in the U8500
+ SoC in 2010.
+
+maintainers:
+ - Lionel Debieve <lionel.debieve@foss.st.com>
+
+properties:
+ compatible:
+ enum:
+ - st,stn8820-hash
+ - stericsson,ux500-hash
+ - st,stm32f456-hash
+ - st,stm32f756-hash
+ - st,stm32mp13-hash
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ items:
+ - const: in
+
+ dma-maxburst:
+ description: Set number of maximum dma burst supported
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 2
+ default: 0
+
+ power-domains:
+ maxItems: 1
+
+ access-controllers:
+ minItems: 1
+ maxItems: 2
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ items:
+ const: stericsson,ux500-hash
+ then:
+ properties:
+ interrupts: false
+ else:
+ required:
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/stm32mp1-clks.h>
+ #include <dt-bindings/reset/stm32mp1-resets.h>
+ hash@54002000 {
+ compatible = "st,stm32f756-hash";
+ reg = <0x54002000 0x400>;
+ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&rcc HASH1>;
+ resets = <&rcc HASH1_R>;
+ dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>;
+ dma-names = "in";
+ dma-maxburst = <2>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml
new file mode 100644
index 000000000000..7ccb6e1641d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/starfive,jh7110-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive Cryptographic Module
+
+maintainers:
+ - Jia Jie Ho <jiajie.ho@starfivetech.com>
+ - William Qiu <william.qiu@starfivetech.com>
+
+properties:
+ compatible:
+ enum:
+ - starfive,jh7110-crypto
+ - starfive,jh8100-crypto
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Hardware reference clock
+ - description: AHB reference clock
+
+ clock-names:
+ items:
+ - const: hclk
+ - const: ahb
+
+ interrupts:
+ minItems: 1
+ items:
+ - description: SHA2 module irq
+ - description: SM3 module irq
+
+ resets:
+ maxItems: 1
+
+ dmas:
+ items:
+ - description: TX DMA channel
+ - description: RX DMA channel
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - dmas
+ - dma-names
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ const: starfive,jh7110-crypto
+
+ then:
+ properties:
+ interrupts:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ const: starfive,jh8100-crypto
+
+ then:
+ properties:
+ interrupts:
+ minItems: 2
+
+examples:
+ - |
+ crypto: crypto@16000000 {
+ compatible = "starfive,jh7110-crypto";
+ reg = <0x16000000 0x4000>;
+ clocks = <&clk 15>, <&clk 16>;
+ clock-names = "hclk", "ahb";
+ interrupts = <28>;
+ resets = <&reset 3>;
+ dmas = <&dma 1 2>,
+ <&dma 0 2>;
+ dma-names = "tx", "rx";
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/sun4i-ss.txt b/Documentation/devicetree/bindings/crypto/sun4i-ss.txt
deleted file mode 100644
index f2dc3d9bca92..000000000000
--- a/Documentation/devicetree/bindings/crypto/sun4i-ss.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-* Allwinner Security System found on A20 SoC
-
-Required properties:
-- compatible : Should be "allwinner,sun4i-a10-crypto".
-- reg: Should contain the Security System register location and length.
-- interrupts: Should contain the IRQ line for the Security System.
-- clocks : List of clock specifiers, corresponding to ahb and ss.
-- clock-names : Name of the functional clock, should be
- * "ahb" : AHB gating clock
- * "mod" : SS controller clock
-
-Optional properties:
- - resets : phandle + reset specifier pair
- - reset-names : must contain "ahb"
-
-Example:
- crypto: crypto-engine@1c15000 {
- compatible = "allwinner,sun4i-a10-crypto";
- reg = <0x01c15000 0x1000>;
- interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ahb_gates 5>, <&ss_clk>;
- clock-names = "ahb", "mod";
- };
diff --git a/Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml b/Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml
new file mode 100644
index 000000000000..5486bfeb2fe8
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/ti,am62l-dthev2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: K3 SoC DTHE V2 crypto module
+
+maintainers:
+ - T Pratham <t-pratham@ti.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,am62l-dthev2
+
+ reg:
+ maxItems: 1
+
+ dmas:
+ items:
+ - description: AES Engine RX DMA Channel
+ - description: AES Engine TX DMA Channel
+ - description: SHA Engine TX DMA Channel
+
+ dma-names:
+ items:
+ - const: rx
+ - const: tx1
+ - const: tx2
+
+required:
+ - compatible
+ - reg
+ - dmas
+ - dma-names
+
+additionalProperties: false
+
+examples:
+ - |
+ crypto@40800000 {
+ compatible = "ti,am62l-dthev2";
+ reg = <0x40800000 0x10000>;
+
+ dmas = <&main_bcdma 0 0 0x4700 0>,
+ <&main_bcdma 0 0 0xc701 0>,
+ <&main_bcdma 0 0 0xc700 0>;
+ dma-names = "rx", "tx1", "tx2";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml
new file mode 100644
index 000000000000..d69b50228009
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/ti,omap-sham.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OMAP SoC SHA crypto Module
+
+maintainers:
+ - Animesh Agarwal <animeshagarwal28@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,omap2-sham
+ - ti,omap4-sham
+ - ti,omap5-sham
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ const: rx
+
+ ti,hwmods:
+ description: Name of the hwmod associated with the SHAM module
+ $ref: /schemas/types.yaml#/definitions/string
+ enum: [sham]
+
+dependencies:
+ dmas: [dma-names]
+
+additionalProperties: false
+
+required:
+ - compatible
+ - ti,hwmods
+ - reg
+ - interrupts
+
+examples:
+ - |
+ sham@53100000 {
+ compatible = "ti,omap4-sham";
+ ti,hwmods = "sham";
+ reg = <0x53100000 0x200>;
+ interrupts = <109>;
+ dmas = <&edma 36>;
+ dma-names = "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml b/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml
new file mode 100644
index 000000000000..90e92050ad2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ti,omap2-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OMAP SoC AES crypto Module
+
+maintainers:
+ - Aaro Koskinen <aaro.koskinen@iki.fi>
+ - Andreas Kemnade <andreas@kemnade.info>
+ - Kevin Hilman <khilman@baylibre.com>
+ - Roger Quadros <rogerq@kernel.org>
+ - Tony Lindgren <tony@atomide.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,omap2-aes
+ - ti,omap3-aes
+ - ti,omap4-aes
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ maxItems: 2
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+ ti,hwmods:
+ description: Name of the hwmod associated with the AES module
+ const: aes
+ deprecated: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ aes@53500000 {
+ compatible = "ti,omap4-aes";
+ reg = <0x53500000 0xa0>;
+ interrupts = <102>;
+ dmas = <&edma 6>,
+ <&edma 5>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml b/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml
new file mode 100644
index 000000000000..f02f1e141218
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ti,omap4-des.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OMAP4 DES crypto Module
+
+maintainers:
+ - Aaro Koskinen <aaro.koskinen@iki.fi>
+ - Andreas Kemnade <andreas@kemnade.info>
+ - Kevin Hilman <khilman@baylibre.com>
+ - Roger Quadros <rogerq@kernel.org>
+ - Tony Lindgren <tony@atomide.com>
+
+properties:
+ compatible:
+ const: ti,omap4-des
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ dmas:
+ maxItems: 2
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: fck
+
+dependencies:
+ dmas: [ dma-names ]
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ des@480a5000 {
+ compatible = "ti,omap4-des";
+ reg = <0x480a5000 0xa0>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&l3_iclk_div>;
+ clock-names = "fck";
+ dmas = <&sdma 117>, <&sdma 116>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
new file mode 100644
index 000000000000..ff10a0838ad6
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/ti,sa2ul.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: K3 SoC SA2UL crypto module
+
+maintainers:
+ - Tero Kristo <t-kristo@ti.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,j721e-sa2ul
+ - ti,am654-sa2ul
+ - ti,am64-sa2ul
+ - ti,am62-sa3ul
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ dmas:
+ items:
+ - description: TX DMA Channel
+ - description: 'RX DMA Channel #1'
+ - description: 'RX DMA Channel #2'
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx1
+ - const: rx2
+
+ "#address-cells":
+ const: 2
+
+ "#size-cells":
+ const: 2
+
+ ranges:
+ description:
+ Address translation for the possible RNG child node for SA2UL
+
+ clocks:
+ items:
+ - description: Clock used by PKA
+ - description: Main Input Clock
+ - description: Clock used by rng
+
+ clock-names:
+ items:
+ - const: pka_in_clk
+ - const: x1_clk
+ - const: x2_clk
+
+patternProperties:
+ "^rng@[a-f0-9]+$":
+ type: object
+ description:
+ Child RNG node for SA2UL
+
+required:
+ - compatible
+ - reg
+ - dmas
+ - dma-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,am62-sa3ul
+ then:
+ properties:
+ power-domains: false
+ else:
+ required:
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+ main_crypto: crypto@4e00000 {
+ compatible = "ti,j721e-sa2ul";
+ reg = <0x4e00000 0x1200>;
+ power-domains = <&k3_pds 264 TI_SCI_PD_EXCLUSIVE>;
+ dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>,
+ <&main_udmap 0x4001>;
+ dma-names = "tx", "rx1", "rx2";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/xlnx,versal-trng.yaml b/Documentation/devicetree/bindings/crypto/xlnx,versal-trng.yaml
new file mode 100644
index 000000000000..9dfb0b0ab5c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/xlnx,versal-trng.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/xlnx,versal-trng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Versal True Random Number Generator Hardware Accelerator
+
+maintainers:
+ - Harsh Jain <h.jain@amd.com>
+ - Mounika Botcha <mounika.botcha@amd.com>
+
+description:
+ The Versal True Random Number Generator consists of Ring Oscillators as
+ entropy source and a deterministic CTR_DRBG random bit generator (DRBG).
+
+properties:
+ compatible:
+ const: xlnx,versal-trng
+
+ reg:
+ maxItems: 1
+
+required:
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ rng@f1230000 {
+ compatible = "xlnx,versal-trng";
+ reg = <0xf1230000 0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml
new file mode 100644
index 000000000000..8aead97a585b
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/xlnx,zynqmp-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx ZynqMP AES-GCM Hardware Accelerator
+
+maintainers:
+ - Kalyani Akula <kalyani.akula@amd.com>
+ - Michal Simek <michal.simek@amd.com>
+
+description: |
+ The ZynqMP AES-GCM hardened cryptographic accelerator is used to
+ encrypt or decrypt the data with provided key and initialization vector.
+
+properties:
+ compatible:
+ const: xlnx,zynqmp-aes
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ firmware {
+ zynqmp_firmware: zynqmp-firmware {
+ compatible = "xlnx,zynqmp-firmware";
+ method = "smc";
+ xlnx_aes: zynqmp-aes {
+ compatible = "xlnx,zynqmp-aes";
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/csky/pmu.txt b/Documentation/devicetree/bindings/csky/pmu.txt
new file mode 100644
index 000000000000..728d05ca6a1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/csky/pmu.txt
@@ -0,0 +1,38 @@
+===============================
+C-SKY Performance Monitor Units
+===============================
+
+C-SKY Performance Monitor is designed for ck807/ck810/ck860 SMP soc and
+it could count cpu's events for helping analysis performance issues.
+
+============================
+PMU node bindings definition
+============================
+
+ Description: Describes PMU
+
+ PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be "csky,csky-pmu"
+ - interrupts
+ Usage: required
+ Value type: <u32 IRQ_TYPE_XXX>
+ Definition: must be pmu irq num defined by soc
+ - count-width
+ Usage: optional
+ Value type: <u32>
+ Definition: the width of pmu counter
+
+Examples:
+---------
+#include <dt-bindings/interrupt-controller/irq.h>
+
+ pmu: performace-monitor {
+ compatible = "csky,csky-pmu";
+ interrupts = <23 IRQ_TYPE_EDGE_RISING>;
+ interrupt-parent = <&intc>;
+ count-width = <48>;
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
deleted file mode 100644
index aeaebd425d1f..000000000000
--- a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-
-* Samsung Exynos NoC (Network on Chip) Probe device
-
-The Samsung Exynos542x SoC has NoC (Network on Chip) Probe for NoC bus.
-NoC provides the primitive values to get the performance data. The packets
-that the Network on Chip (NoC) probes detects are transported over
-the network infrastructure to observer units. You can configure probes to
-capture packets with header or data on the data request response network,
-or as traffic debug or statistic collectors. Exynos542x bus has multiple
-NoC probes to provide bandwidth information about behavior of the SoC
-that you can use while analyzing system performance.
-
-Required properties:
-- compatible: Should be "samsung,exynos5420-nocp"
-- reg: physical base address of each NoC Probe and length of memory mapped region.
-
-Optional properties:
-- clock-names : the name of clock used by the NoC Probe, "nocp"
-- clocks : phandles for clock specified in "clock-names" property
-
-Example : NoC Probe nodes in Device Tree are listed below.
-
- nocp_mem0_0: nocp@10ca1000 {
- compatible = "samsung,exynos5420-nocp";
- reg = <0x10CA1000 0x200>;
- };
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
deleted file mode 100644
index 3e36c1d11386..000000000000
--- a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-
-* Samsung Exynos PPMU (Platform Performance Monitoring Unit) device
-
-The Samsung Exynos SoC has PPMU (Platform Performance Monitoring Unit) for
-each IP. PPMU provides the primitive values to get performance data. These
-PPMU events provide information of the SoC's behaviors so that you may
-use to analyze system performance, to make behaviors visible and to count
-usages of each IP (DMC, CPU, RIGHTBUS, LEFTBUS, CAM interface, LCD, G3D, MFC).
-The Exynos PPMU driver uses the devfreq-event class to provide event data
-to various devfreq devices. The devfreq devices would use the event data when
-derterming the current state of each IP.
-
-Required properties:
-- compatible: Should be "samsung,exynos-ppmu" or "samsung,exynos-ppmu-v2.
-- reg: physical base address of each PPMU and length of memory mapped region.
-
-Optional properties:
-- clock-names : the name of clock used by the PPMU, "ppmu"
-- clocks : phandles for clock specified in "clock-names" property
-
-Example1 : PPMUv1 nodes in exynos3250.dtsi are listed below.
-
- ppmu_dmc0: ppmu_dmc0@106a0000 {
- compatible = "samsung,exynos-ppmu";
- reg = <0x106a0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_dmc1: ppmu_dmc1@106b0000 {
- compatible = "samsung,exynos-ppmu";
- reg = <0x106b0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_cpu: ppmu_cpu@106c0000 {
- compatible = "samsung,exynos-ppmu";
- reg = <0x106c0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_rightbus: ppmu_rightbus@112a0000 {
- compatible = "samsung,exynos-ppmu";
- reg = <0x112a0000 0x2000>;
- clocks = <&cmu CLK_PPMURIGHT>;
- clock-names = "ppmu";
- status = "disabled";
- };
-
- ppmu_leftbus: ppmu_leftbus0@116a0000 {
- compatible = "samsung,exynos-ppmu";
- reg = <0x116a0000 0x2000>;
- clocks = <&cmu CLK_PPMULEFT>;
- clock-names = "ppmu";
- status = "disabled";
- };
-
-Example2 : Events of each PPMU node in exynos3250-rinato.dts are listed below.
-
- &ppmu_dmc0 {
- status = "okay";
-
- events {
- ppmu_dmc0_3: ppmu-event3-dmc0 {
- event-name = "ppmu-event3-dmc0";
- };
-
- ppmu_dmc0_2: ppmu-event2-dmc0 {
- event-name = "ppmu-event2-dmc0";
- };
-
- ppmu_dmc0_1: ppmu-event1-dmc0 {
- event-name = "ppmu-event1-dmc0";
- };
-
- ppmu_dmc0_0: ppmu-event0-dmc0 {
- event-name = "ppmu-event0-dmc0";
- };
- };
- };
-
- &ppmu_dmc1 {
- status = "okay";
-
- events {
- ppmu_dmc1_3: ppmu-event3-dmc1 {
- event-name = "ppmu-event3-dmc1";
- };
- };
- };
-
- &ppmu_leftbus {
- status = "okay";
-
- events {
- ppmu_leftbus_3: ppmu-event3-leftbus {
- event-name = "ppmu-event3-leftbus";
- };
- };
- };
-
- &ppmu_rightbus {
- status = "okay";
-
- events {
- ppmu_rightbus_3: ppmu-event3-rightbus {
- event-name = "ppmu-event3-rightbus";
- };
- };
- };
-
-Example3 : PPMUv2 nodes in exynos5433.dtsi are listed below.
-
- ppmu_d0_cpu: ppmu_d0_cpu@10480000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x10480000 0x2000>;
- status = "disabled";
- };
-
- ppmu_d0_general: ppmu_d0_general@10490000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x10490000 0x2000>;
- status = "disabled";
- };
-
- ppmu_d0_rt: ppmu_d0_rt@104a0000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x104a0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_d1_cpu: ppmu_d1_cpu@104b0000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x104b0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_d1_general: ppmu_d1_general@104c0000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x104c0000 0x2000>;
- status = "disabled";
- };
-
- ppmu_d1_rt: ppmu_d1_rt@104d0000 {
- compatible = "samsung,exynos-ppmu-v2";
- reg = <0x104d0000 0x2000>;
- status = "disabled";
- };
diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml b/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml
new file mode 100644
index 000000000000..50d3fabe958d
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/event/rockchip,dfi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip DFI
+
+maintainers:
+ - Sascha Hauer <s.hauer@pengutronix.de>
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3399-dfi
+ - rockchip,rk3568-dfi
+ - rockchip,rk3588-dfi
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: pclk_ddr_mon
+
+ interrupts:
+ minItems: 1
+ maxItems: 4
+
+ reg:
+ maxItems: 1
+
+ rockchip,pmu:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "PMU general register files".
+
+required:
+ - compatible
+ - interrupts
+ - reg
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3399-dfi
+
+then:
+ required:
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/rk3308-cru.h>
+
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dfi: dfi@ff630000 {
+ compatible = "rockchip,rk3399-dfi";
+ reg = <0x00 0xff630000 0x00 0x4000>;
+ interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH 0>;
+ rockchip,pmu = <&pmugrf>;
+ clocks = <&cru PCLK_DDR_MON>;
+ clock-names = "pclk_ddr_mon";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
deleted file mode 100644
index 148191b0fc15..000000000000
--- a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-
-* Rockchip rk3399 DFI device
-
-Required properties:
-- compatible: Must be "rockchip,rk3399-dfi".
-- reg: physical base address of each DFI and length of memory mapped region
-- rockchip,pmu: phandle to the syscon managing the "pmu general register files"
-- clocks: phandles for clock specified in "clock-names" property
-- clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon";
-
-Example:
- dfi: dfi@ff630000 {
- compatible = "rockchip,rk3399-dfi";
- reg = <0x00 0xff630000 0x00 0x4000>;
- rockchip,pmu = <&pmugrf>;
- clocks = <&cru PCLK_DDR_MON>;
- clock-names = "pclk_ddr_mon";
- };
diff --git a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml
new file mode 100644
index 000000000000..2bdd05af6079
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/event/samsung,exynos-nocp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos NoC (Network on Chip) Probe
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ The Samsung Exynos542x SoC has a NoC (Network on Chip) Probe for NoC bus.
+ NoC provides the primitive values to get the performance data. The packets
+ that the Network on Chip (NoC) probes detects are transported over the
+ network infrastructure to observer units. You can configure probes to capture
+ packets with header or data on the data request response network, or as
+ traffic debug or statistic collectors. Exynos542x bus has multiple NoC probes
+ to provide bandwidth information about behavior of the SoC that you can use
+ while analyzing system performance.
+
+properties:
+ compatible:
+ const: samsung,exynos5420-nocp
+
+ clock-names:
+ items:
+ - const: nocp
+
+ clocks:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ nocp_mem0_0: nocp@10ca1000 {
+ compatible = "samsung,exynos5420-nocp";
+ reg = <0x10ca1000 0x200>;
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml
new file mode 100644
index 000000000000..d27dcb2fef12
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/event/samsung,exynos-ppmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC PPMU (Platform Performance Monitoring Unit)
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzk@kernel.org>
+
+description: |
+ The Samsung Exynos SoC has PPMU (Platform Performance Monitoring Unit) for
+ each IP. PPMU provides the primitive values to get performance data. These
+ PPMU events provide information of the SoC's behaviors so that you may use to
+ analyze system performance, to make behaviors visible and to count usages of
+ each IP (DMC, CPU, RIGHTBUS, LEFTBUS, CAM interface, LCD, G3D, MFC). The
+ Exynos PPMU driver uses the devfreq-event class to provide event data to
+ various devfreq devices. The devfreq devices would use the event data when
+ determining the current state of each IP.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos-ppmu
+ - samsung,exynos-ppmu-v2
+
+ clock-names:
+ items:
+ - const: ppmu
+
+ clocks:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ events:
+ type: object
+
+ patternProperties:
+ '^ppmu-event[0-9]+(-[a-z0-9]+){,2}$':
+ type: object
+ properties:
+ event-name:
+ description: |
+ The unique event name among PPMU device
+ $ref: /schemas/types.yaml#/definitions/string
+
+ event-data-type:
+ description: |
+ Define the type of data which shell be counted by the counter.
+ You can check include/dt-bindings/pmu/exynos_ppmu.h for all
+ possible type, i.e. count read requests, count write data in
+ bytes, etc. This field is optional and when it is missing, the
+ driver code will use default data type.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ required:
+ - event-name
+
+ additionalProperties: false
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ // PPMUv1 nodes for Exynos3250 (although the board DTS defines events)
+ #include <dt-bindings/clock/exynos3250.h>
+
+ ppmu_dmc0: ppmu@106a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x106a0000 0x2000>;
+
+ events {
+ ppmu_dmc0_3: ppmu-event3-dmc0 {
+ event-name = "ppmu-event3-dmc0";
+ };
+
+ ppmu_dmc0_2: ppmu-event2-dmc0 {
+ event-name = "ppmu-event2-dmc0";
+ };
+
+ ppmu_dmc0_1: ppmu-event1-dmc0 {
+ event-name = "ppmu-event1-dmc0";
+ };
+
+ ppmu_dmc0_0: ppmu-event0-dmc0 {
+ event-name = "ppmu-event0-dmc0";
+ };
+ };
+ };
+
+ ppmu_rightbus: ppmu@112a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x112a0000 0x2000>;
+ clocks = <&cmu CLK_PPMURIGHT>;
+ clock-names = "ppmu";
+
+ events {
+ ppmu_rightbus_3: ppmu-event3-rightbus {
+ event-name = "ppmu-event3-rightbus";
+ };
+ };
+ };
+
+ - |
+ // PPMUv2 nodes in Exynos5433
+ ppmu_d0_cpu: ppmu@10480000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x10480000 0x2000>;
+ };
+
+ ppmu_d0_general: ppmu@10490000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x10490000 0x2000>;
+
+ events {
+ ppmu_event0_d0_general: ppmu-event0-d0-general {
+ event-name = "ppmu-event0-d0-general";
+ };
+ };
+ };
+
+ ppmu_d0_rt: ppmu@104a0000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x104a0000 0x2000>;
+ };
+
+ ppmu_d1_cpu: ppmu@104b0000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x104b0000 0x2000>;
+ };
+
+ ppmu_d1_general: ppmu@104c0000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x104c0000 0x2000>;
+ };
+
+ ppmu_d1_rt: ppmu@104d0000 {
+ compatible = "samsung,exynos-ppmu-v2";
+ reg = <0x104d0000 0x2000>;
+ };
+
+ - |
+ // PPMUv1 nodes with event-data-type for Exynos4412
+ #include <dt-bindings/pmu/exynos_ppmu.h>
+
+ ppmu@106a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x106a0000 0x2000>;
+ clocks = <&clock 400>;
+ clock-names = "ppmu";
+
+ events {
+ ppmu-event3-dmc0 {
+ event-name = "ppmu-event3-dmc0";
+ event-data-type = <(PPMU_RO_DATA_CNT |
+ PPMU_WO_DATA_CNT)>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
deleted file mode 100644
index f8e946471a58..000000000000
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ /dev/null
@@ -1,423 +0,0 @@
-* Generic Exynos Bus frequency device
-
-The Samsung Exynos SoC has many buses for data transfer between DRAM
-and sub-blocks in SoC. Most Exynos SoCs share the common architecture
-for buses. Generally, each bus of Exynos SoC includes a source clock
-and a power line, which are able to change the clock frequency
-of the bus in runtime. To monitor the usage of each bus in runtime,
-the driver uses the PPMU (Platform Performance Monitoring Unit), which
-is able to measure the current load of sub-blocks.
-
-The Exynos SoC includes the various sub-blocks which have the each AXI bus.
-The each AXI bus has the owned source clock but, has not the only owned
-power line. The power line might be shared among one more sub-blocks.
-So, we can divide into two type of device as the role of each sub-block.
-There are two type of bus devices as following:
-- parent bus device
-- passive bus device
-
-Basically, parent and passive bus device share the same power line.
-The parent bus device can only change the voltage of shared power line
-and the rest bus devices (passive bus device) depend on the decision of
-the parent bus device. If there are three blocks which share the VDD_xxx
-power line, Only one block should be parent device and then the rest blocks
-should depend on the parent device as passive device.
-
- VDD_xxx |--- A block (parent)
- |--- B block (passive)
- |--- C block (passive)
-
-There are a little different composition among Exynos SoC because each Exynos
-SoC has different sub-blocks. Therefore, such difference should be specified
-in devicetree file instead of each device driver. In result, this driver
-is able to support the bus frequency for all Exynos SoCs.
-
-Required properties for all bus devices:
-- compatible: Should be "samsung,exynos-bus".
-- clock-names : the name of clock used by the bus, "bus".
-- clocks : phandles for clock specified in "clock-names" property.
-- operating-points-v2: the OPP table including frequency/voltage information
- to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
-
-Required properties only for parent bus device:
-- vdd-supply: the regulator to provide the buses with the voltage.
-- devfreq-events: the devfreq-event device to monitor the current utilization
- of buses.
-
-Required properties only for passive bus device:
-- devfreq: the parent bus device.
-
-Optional properties only for parent bus device:
-- exynos,saturation-ratio: the percentage value which is used to calibrate
- the performance count against total cycle count.
-- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
- which is used to calculate the max voltage.
-
-Detailed correlation between sub-blocks and power line according to Exynos SoC:
-- In case of Exynos3250, there are two power line as following:
- VDD_MIF |--- DMC
-
- VDD_INT |--- LEFTBUS (parent device)
- |--- PERIL
- |--- MFC
- |--- G3D
- |--- RIGHTBUS
- |--- PERIR
- |--- FSYS
- |--- LCD0
- |--- PERIR
- |--- ISP
- |--- CAM
-
-- In case of Exynos4210, there is one power line as following:
- VDD_INT |--- DMC (parent device)
- |--- LEFTBUS
- |--- PERIL
- |--- MFC(L)
- |--- G3D
- |--- TV
- |--- LCD0
- |--- RIGHTBUS
- |--- PERIR
- |--- MFC(R)
- |--- CAM
- |--- FSYS
- |--- GPS
- |--- LCD0
- |--- LCD1
-
-- In case of Exynos4x12, there are two power line as following:
- VDD_MIF |--- DMC
-
- VDD_INT |--- LEFTBUS (parent device)
- |--- PERIL
- |--- MFC(L)
- |--- G3D
- |--- TV
- |--- IMAGE
- |--- RIGHTBUS
- |--- PERIR
- |--- MFC(R)
- |--- CAM
- |--- FSYS
- |--- GPS
- |--- LCD0
- |--- ISP
-
-- In case of Exynos5422, there are two power line as following:
- VDD_MIF |--- DREX 0 (parent device, DRAM EXpress controller)
- |--- DREX 1
-
- VDD_INT |--- NoC_Core (parent device)
- |--- G2D
- |--- G3D
- |--- DISP1
- |--- NoC_WCORE
- |--- GSCL
- |--- MSCL
- |--- ISP
- |--- MFC
- |--- GEN
- |--- PERIS
- |--- PERIC
- |--- FSYS
- |--- FSYS2
-
-- In case of Exynos5433, there is VDD_INT power line as following:
- VDD_INT |--- G2D (parent device)
- |--- MSCL
- |--- GSCL
- |--- JPEG
- |--- MFC
- |--- HEVC
- |--- BUS0
- |--- BUS1
- |--- BUS2
- |--- PERIS (Fixed clock rate)
- |--- PERIC (Fixed clock rate)
- |--- FSYS (Fixed clock rate)
-
-Example1:
- Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
- power line (regulator). The MIF (Memory Interface) AXI bus is used to
- transfer data between DRAM and CPU and uses the VDD_MIF regulator.
-
- - MIF (Memory Interface) block
- : VDD_MIF |--- DMC (Dynamic Memory Controller)
-
- - INT (Internal) block
- : VDD_INT |--- LEFTBUS (parent device)
- |--- PERIL
- |--- MFC
- |--- G3D
- |--- RIGHTBUS
- |--- FSYS
- |--- LCD0
- |--- PERIR
- |--- ISP
- |--- CAM
-
- - MIF bus's frequency/voltage table
- -----------------------
- |Lv| Freq | Voltage |
- -----------------------
- |L1| 50000 |800000 |
- |L2| 100000 |800000 |
- |L3| 134000 |800000 |
- |L4| 200000 |825000 |
- |L5| 400000 |875000 |
- -----------------------
-
- - INT bus's frequency/voltage table
- ----------------------------------------------------------
- |Block|LEFTBUS|RIGHTBUS|MCUISP |ISP |PERIL ||VDD_INT |
- | name| |LCD0 | | | || |
- | | |FSYS | | | || |
- | | |MFC | | | || |
- ----------------------------------------------------------
- |Mode |*parent|passive |passive|passive|passive|| |
- ----------------------------------------------------------
- |Lv |Frequency ||Voltage |
- ----------------------------------------------------------
- |L1 |50000 |50000 |50000 |50000 |50000 ||900000 |
- |L2 |80000 |80000 |80000 |80000 |80000 ||900000 |
- |L3 |100000 |100000 |100000 |100000 |100000 ||1000000 |
- |L4 |134000 |134000 |200000 |200000 | ||1000000 |
- |L5 |200000 |200000 |400000 |300000 | ||1000000 |
- ----------------------------------------------------------
-
-Example2 :
- The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
- is listed below:
-
- bus_dmc: bus_dmc {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu_dmc CLK_DIV_DMC>;
- clock-names = "bus";
- operating-points-v2 = <&bus_dmc_opp_table>;
- status = "disabled";
- };
-
- bus_dmc_opp_table: opp_table1 {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-50000000 {
- opp-hz = /bits/ 64 <50000000>;
- opp-microvolt = <800000>;
- };
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <800000>;
- };
- opp-134000000 {
- opp-hz = /bits/ 64 <134000000>;
- opp-microvolt = <800000>;
- };
- opp-200000000 {
- opp-hz = /bits/ 64 <200000000>;
- opp-microvolt = <825000>;
- };
- opp-400000000 {
- opp-hz = /bits/ 64 <400000000>;
- opp-microvolt = <875000>;
- };
- };
-
- bus_leftbus: bus_leftbus {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_GDL>;
- clock-names = "bus";
- operating-points-v2 = <&bus_leftbus_opp_table>;
- status = "disabled";
- };
-
- bus_rightbus: bus_rightbus {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_GDR>;
- clock-names = "bus";
- operating-points-v2 = <&bus_leftbus_opp_table>;
- status = "disabled";
- };
-
- bus_lcd0: bus_lcd0 {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_ACLK_160>;
- clock-names = "bus";
- operating-points-v2 = <&bus_leftbus_opp_table>;
- status = "disabled";
- };
-
- bus_fsys: bus_fsys {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_ACLK_200>;
- clock-names = "bus";
- operating-points-v2 = <&bus_leftbus_opp_table>;
- status = "disabled";
- };
-
- bus_mcuisp: bus_mcuisp {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_ACLK_400_MCUISP>;
- clock-names = "bus";
- operating-points-v2 = <&bus_mcuisp_opp_table>;
- status = "disabled";
- };
-
- bus_isp: bus_isp {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_ACLK_266>;
- clock-names = "bus";
- operating-points-v2 = <&bus_isp_opp_table>;
- status = "disabled";
- };
-
- bus_peril: bus_peril {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_DIV_ACLK_100>;
- clock-names = "bus";
- operating-points-v2 = <&bus_peril_opp_table>;
- status = "disabled";
- };
-
- bus_mfc: bus_mfc {
- compatible = "samsung,exynos-bus";
- clocks = <&cmu CLK_SCLK_MFC>;
- clock-names = "bus";
- operating-points-v2 = <&bus_leftbus_opp_table>;
- status = "disabled";
- };
-
- bus_leftbus_opp_table: opp_table1 {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-50000000 {
- opp-hz = /bits/ 64 <50000000>;
- opp-microvolt = <900000>;
- };
- opp-80000000 {
- opp-hz = /bits/ 64 <80000000>;
- opp-microvolt = <900000>;
- };
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <1000000>;
- };
- opp-134000000 {
- opp-hz = /bits/ 64 <134000000>;
- opp-microvolt = <1000000>;
- };
- opp-200000000 {
- opp-hz = /bits/ 64 <200000000>;
- opp-microvolt = <1000000>;
- };
- };
-
- bus_mcuisp_opp_table: opp_table2 {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-50000000 {
- opp-hz = /bits/ 64 <50000000>;
- };
- opp-80000000 {
- opp-hz = /bits/ 64 <80000000>;
- };
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- };
- opp-200000000 {
- opp-hz = /bits/ 64 <200000000>;
- };
- opp-400000000 {
- opp-hz = /bits/ 64 <400000000>;
- };
- };
-
- bus_isp_opp_table: opp_table3 {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-50000000 {
- opp-hz = /bits/ 64 <50000000>;
- };
- opp-80000000 {
- opp-hz = /bits/ 64 <80000000>;
- };
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- };
- opp-200000000 {
- opp-hz = /bits/ 64 <200000000>;
- };
- opp-300000000 {
- opp-hz = /bits/ 64 <300000000>;
- };
- };
-
- bus_peril_opp_table: opp_table4 {
- compatible = "operating-points-v2";
- opp-shared;
-
- opp-50000000 {
- opp-hz = /bits/ 64 <50000000>;
- };
- opp-80000000 {
- opp-hz = /bits/ 64 <80000000>;
- };
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- };
- };
-
-
- Usage case to handle the frequency and voltage of bus on runtime
- in exynos3250-rinato.dts is listed below:
-
- &bus_dmc {
- devfreq-events = <&ppmu_dmc0_3>, <&ppmu_dmc1_3>;
- vdd-supply = <&buck1_reg>; /* VDD_MIF */
- status = "okay";
- };
-
- &bus_leftbus {
- devfreq-events = <&ppmu_leftbus_3>, <&ppmu_rightbus_3>;
- vdd-supply = <&buck3_reg>;
- status = "okay";
- };
-
- &bus_rightbus {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_lcd0 {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_fsys {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_mcuisp {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_isp {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_peril {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
-
- &bus_mfc {
- devfreq = <&bus_leftbus>;
- status = "okay";
- };
diff --git a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
new file mode 100644
index 000000000000..e3379d106728
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/nvidia,tegra30-actmon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra30 Activity Monitor
+
+maintainers:
+ - Dmitry Osipenko <digetx@gmail.com>
+ - Jon Hunter <jonathanh@nvidia.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+ The activity monitor block collects statistics about the behaviour of other
+ components in the system. This information can be used to derive the rate at
+ which the external memory needs to be clocked in order to serve all requests
+ from the monitored clients.
+
+properties:
+ compatible:
+ enum:
+ - nvidia,tegra30-actmon
+ - nvidia,tegra114-actmon
+ - nvidia,tegra124-actmon
+ - nvidia,tegra210-actmon
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: actmon
+ - const: emc
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ items:
+ - const: actmon
+
+ interrupts:
+ maxItems: 1
+
+ interconnects:
+ minItems: 1
+ maxItems: 12
+
+ interconnect-names:
+ minItems: 1
+ maxItems: 12
+ description:
+ Should include name of the interconnect path for each interconnect
+ entry. Consult TRM documentation for information about available
+ memory clients, see MEMORY CONTROLLER and ACTIVITY MONITOR sections.
+
+ operating-points-v2:
+ description:
+ Should contain freqs and voltages and opp-supported-hw property, which
+ is a bitfield indicating SoC speedo ID mask.
+
+ "#cooling-cells":
+ const: 2
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - interrupts
+ - interconnects
+ - interconnect-names
+ - operating-points-v2
+ - "#cooling-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/memory/tegra30-mc.h>
+
+ mc: memory-controller@7000f000 {
+ compatible = "nvidia,tegra30-mc";
+ reg = <0x7000f000 0x400>;
+ clocks = <&clk 32>;
+ clock-names = "mc";
+
+ interrupts = <0 77 4>;
+
+ #iommu-cells = <1>;
+ #reset-cells = <1>;
+ #interconnect-cells = <1>;
+ };
+
+ emc: external-memory-controller@7000f400 {
+ compatible = "nvidia,tegra30-emc";
+ reg = <0x7000f400 0x400>;
+ interrupts = <0 78 4>;
+ clocks = <&clk 57>;
+
+ nvidia,memory-controller = <&mc>;
+ operating-points-v2 = <&dvfs_opp_table>;
+ power-domains = <&domain>;
+
+ #interconnect-cells = <0>;
+ };
+
+ actmon@6000c800 {
+ compatible = "nvidia,tegra30-actmon";
+ reg = <0x6000c800 0x400>;
+ interrupts = <0 45 4>;
+ clocks = <&clk 119>, <&clk 57>;
+ clock-names = "actmon", "emc";
+ resets = <&rst 119>;
+ reset-names = "actmon";
+ operating-points-v2 = <&dvfs_opp_table>;
+ interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
+ interconnect-names = "cpu-read";
+ #cooling-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
deleted file mode 100644
index 0ec68141f85a..000000000000
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-* Rockchip rk3399 DMC (Dynamic Memory Controller) device
-
-Required properties:
-- compatible: Must be "rockchip,rk3399-dmc".
-- devfreq-events: Node to get DDR loading, Refer to
- Documentation/devicetree/bindings/devfreq/event/
- rockchip-dfi.txt
-- clocks: Phandles for clock specified in "clock-names" property
-- clock-names : The name of clock used by the DFI, must be
- "pclk_ddr_mon";
-- operating-points-v2: Refer to Documentation/devicetree/bindings/opp/opp.txt
- for details.
-- center-supply: DMC supply node.
-- status: Marks the node enabled/disabled.
-
-Optional properties:
-- interrupts: The CPU interrupt number. The interrupt specifier
- format depends on the interrupt controller.
- It should be a DCF interrupt. When DDR DVFS finishes
- a DCF interrupt is triggered.
-
-Following properties relate to DDR timing:
-
-- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/rk3399-ddr.h,
- it selects the DDR3 cl-trp-trcd type. It must be
- set according to "Speed Bin" in DDR3 datasheet,
- DO NOT use a smaller "Speed Bin" than specified
- for the DDR3 being used.
-
-- rockchip,pd_idle : Configure the PD_IDLE value. Defines the
- power-down idle period in which memories are
- placed into power-down mode if bus is idle
- for PD_IDLE DFI clock cycles.
-
-- rockchip,sr_idle : Configure the SR_IDLE value. Defines the
- self-refresh idle period in which memories are
- placed into self-refresh mode if bus is idle
- for SR_IDLE * 1024 DFI clock cycles (DFI
- clocks freq is half of DRAM clock), default
- value is "0".
-
-- rockchip,sr_mc_gate_idle : Defines the memory self-refresh and controller
- clock gating idle period. Memories are placed
- into self-refresh mode and memory controller
- clock arg gating started if bus is idle for
- sr_mc_gate_idle*1024 DFI clock cycles.
-
-- rockchip,srpd_lite_idle : Defines the self-refresh power down idle
- period in which memories are placed into
- self-refresh power down mode if bus is idle
- for srpd_lite_idle * 1024 DFI clock cycles.
- This parameter is for LPDDR4 only.
-
-- rockchip,standby_idle : Defines the standby idle period in which
- memories are placed into self-refresh mode.
- The controller, pi, PHY and DRAM clock will
- be gated if bus is idle for standby_idle * DFI
- clock cycles.
-
-- rockchip,dram_dll_dis_freq : Defines the DDR3 DLL bypass frequency in MHz.
- When DDR frequency is less than DRAM_DLL_DISB_FREQ,
- DDR3 DLL will be bypassed. Note: if DLL was bypassed,
- the odt will also stop working.
-
-- rockchip,phy_dll_dis_freq : Defines the PHY dll bypass frequency in
- MHz (Mega Hz). When DDR frequency is less than
- DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
- Note: PHY DLL and PHY ODT are independent.
-
-- rockchip,ddr3_odt_dis_freq : When the DRAM type is DDR3, this parameter defines
- the ODT disable frequency in MHz (Mega Hz).
- when the DDR frequency is less then ddr3_odt_dis_freq,
- the ODT on the DRAM side and controller side are
- both disabled.
-
-- rockchip,ddr3_drv : When the DRAM type is DDR3, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is DDR3_DS_40ohm.
-
-- rockchip,ddr3_odt : When the DRAM type is DDR3, this parameter defines
- the DRAM side ODT strength in ohms. Default value
- is DDR3_ODT_120ohm.
-
-- rockchip,phy_ddr3_ca_drv : When the DRAM type is DDR3, this parameter defines
- the phy side CA line (incluing command line,
- address line and clock line) driver strength.
- Default value is PHY_DRV_ODT_40.
-
-- rockchip,phy_ddr3_dq_drv : When the DRAM type is DDR3, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is PHY_DRV_ODT_40.
-
-- rockchip,phy_ddr3_odt : When the DRAM type is DDR3, this parameter defines
- the PHY side ODT strength. Default value is
- PHY_DRV_ODT_240.
-
-- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines
- then ODT disable frequency in MHz (Mega Hz).
- When DDR frequency is less then ddr3_odt_dis_freq,
- the ODT on the DRAM side and controller side are
- both disabled.
-
-- rockchip,lpddr3_drv : When the DRAM type is LPDDR3, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is LP3_DS_34ohm.
-
-- rockchip,lpddr3_odt : When the DRAM type is LPDDR3, this parameter defines
- the DRAM side ODT strength in ohms. Default value
- is LP3_ODT_240ohm.
-
-- rockchip,phy_lpddr3_ca_drv : When the DRAM type is LPDDR3, this parameter defines
- the PHY side CA line (including command line,
- address line and clock line) driver strength.
- Default value is PHY_DRV_ODT_40.
-
-- rockchip,phy_lpddr3_dq_drv : When the DRAM type is LPDDR3, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is
- PHY_DRV_ODT_40.
-
-- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define
- the phy side odt strength, default value is
- PHY_DRV_ODT_240.
-
-- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter
- defines the ODT disable frequency in
- MHz (Mega Hz). When the DDR frequency is less then
- ddr3_odt_dis_freq, the ODT on the DRAM side and
- controller side are both disabled.
-
-- rockchip,lpddr4_drv : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is LP4_PDDS_60ohm.
-
-- rockchip,lpddr4_dq_odt : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side ODT on DQS/DQ line strength in ohms.
- Default value is LP4_DQ_ODT_40ohm.
-
-- rockchip,lpddr4_ca_odt : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side ODT on CA line strength in ohms.
- Default value is LP4_CA_ODT_40ohm.
-
-- rockchip,phy_lpddr4_ca_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side CA line (including command address
- line) driver strength. Default value is
- PHY_DRV_ODT_40.
-
-- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side clock line and CS line driver
- strength. Default value is PHY_DRV_ODT_80.
-
-- rockchip,phy_lpddr4_dq_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is PHY_DRV_ODT_80.
-
-- rockchip,phy_lpddr4_odt : When the DRAM type is LPDDR4, this parameter defines
- the PHY side ODT strength. Default value is
- PHY_DRV_ODT_60.
-
-Example:
- dmc_opp_table: dmc_opp_table {
- compatible = "operating-points-v2";
-
- opp00 {
- opp-hz = /bits/ 64 <300000000>;
- opp-microvolt = <900000>;
- };
- opp01 {
- opp-hz = /bits/ 64 <666000000>;
- opp-microvolt = <900000>;
- };
- };
-
- dmc: dmc {
- compatible = "rockchip,rk3399-dmc";
- devfreq-events = <&dfi>;
- interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cru SCLK_DDRCLK>;
- clock-names = "dmc_clk";
- operating-points-v2 = <&dmc_opp_table>;
- center-supply = <&ppvar_centerlogic>;
- upthreshold = <15>;
- downdifferential = <10>;
- rockchip,ddr3_speed_bin = <21>;
- rockchip,pd_idle = <0x40>;
- rockchip,sr_idle = <0x2>;
- rockchip,sr_mc_gate_idle = <0x3>;
- rockchip,srpd_lite_idle = <0x4>;
- rockchip,standby_idle = <0x2000>;
- rockchip,dram_dll_dis_freq = <300>;
- rockchip,phy_dll_dis_freq = <125>;
- rockchip,auto_pd_dis_freq = <666>;
- rockchip,ddr3_odt_dis_freq = <333>;
- rockchip,ddr3_drv = <DDR3_DS_40ohm>;
- rockchip,ddr3_odt = <DDR3_ODT_120ohm>;
- rockchip,phy_ddr3_ca_drv = <PHY_DRV_ODT_40>;
- rockchip,phy_ddr3_dq_drv = <PHY_DRV_ODT_40>;
- rockchip,phy_ddr3_odt = <PHY_DRV_ODT_240>;
- rockchip,lpddr3_odt_dis_freq = <333>;
- rockchip,lpddr3_drv = <LP3_DS_34ohm>;
- rockchip,lpddr3_odt = <LP3_ODT_240ohm>;
- rockchip,phy_lpddr3_ca_drv = <PHY_DRV_ODT_40>;
- rockchip,phy_lpddr3_dq_drv = <PHY_DRV_ODT_40>;
- rockchip,phy_lpddr3_odt = <PHY_DRV_ODT_240>;
- rockchip,lpddr4_odt_dis_freq = <333>;
- rockchip,lpddr4_drv = <LP4_PDDS_60ohm>;
- rockchip,lpddr4_dq_odt = <LP4_DQ_ODT_40ohm>;
- rockchip,lpddr4_ca_odt = <LP4_CA_ODT_40ohm>;
- rockchip,phy_lpddr4_ca_drv = <PHY_DRV_ODT_40>;
- rockchip,phy_lpddr4_ck_cs_drv = <PHY_DRV_ODT_80>;
- rockchip,phy_lpddr4_dq_drv = <PHY_DRV_ODT_80>;
- rockchip,phy_lpddr4_odt = <PHY_DRV_ODT_60>;
- };
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml
new file mode 100644
index 000000000000..ba06d1857b7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-backend.yaml
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-backend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Backend
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The display engine backend exposes layers and sprites to the system.
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-display-backend
+ - allwinner,sun5i-a13-display-backend
+ - allwinner,sun6i-a31-display-backend
+ - allwinner,sun7i-a20-display-backend
+ - allwinner,sun8i-a23-display-backend
+ - allwinner,sun8i-a33-display-backend
+ - allwinner,sun9i-a80-display-backend
+
+ reg:
+ minItems: 1
+ items:
+ - description: Display Backend registers
+ - description: SAT registers
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: be
+ - const: sat
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+ items:
+ - description: The backend interface clock
+ - description: The backend module clock
+ - description: The backend DRAM clock
+ - description: The SAT clock
+
+ clock-names:
+ minItems: 3
+ items:
+ - const: ahb
+ - const: mod
+ - const: ram
+ - const: sat
+
+ resets:
+ minItems: 1
+ items:
+ - description: The Backend reset line
+ - description: The SAT reset line
+
+ reset-names:
+ minItems: 1
+ items:
+ - const: be
+ - const: sat
+
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnects:
+ maxItems: 1
+
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnect-names:
+ const: dma-mem
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output endpoints of the controller.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun8i-a33-display-backend
+
+then:
+ properties:
+ reg:
+ minItems: 2
+
+ reg-names:
+ minItems: 2
+
+ clocks:
+ minItems: 4
+
+ clock-names:
+ minItems: 4
+
+ resets:
+ minItems: 2
+
+ reset-names:
+ minItems: 2
+
+ required:
+ - reg-names
+ - reset-names
+
+else:
+ properties:
+ reg:
+ maxItems: 1
+
+ reg-names:
+ maxItems: 1
+
+ clocks:
+ maxItems: 3
+
+ clock-names:
+ maxItems: 3
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ maxItems: 1
+
+examples:
+ - |
+ /*
+ * This comes from the clock/sun4i-a10-ccu.h and
+ * reset/sun4i-a10-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_AHB_DE_BE0 42
+ #define CLK_DRAM_DE_BE0 140
+ #define CLK_DE_BE0 144
+ #define RST_DE_BE0 5
+
+ display-backend@1e60000 {
+ compatible = "allwinner,sun4i-a10-display-backend";
+ reg = <0x01e60000 0x10000>;
+ interrupts = <47>;
+ clocks = <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_DE_BE0>,
+ <&ccu CLK_DRAM_DE_BE0>;
+ clock-names = "ahb", "mod",
+ "ram";
+ resets = <&ccu RST_DE_BE0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&fe0_out_be0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&fe1_out_be0>;
+ };
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon0_in_be0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon1_in_be0>;
+ };
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun8i-a23-a33-ccu.h and
+ * reset/sun8i-a23-a33-ccu.h headers, but we can't include them
+ * since it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_BUS_DE_BE 40
+ #define CLK_BUS_SAT 46
+ #define CLK_DRAM_DE_BE 84
+ #define CLK_DE_BE 85
+ #define RST_BUS_DE_BE 21
+ #define RST_BUS_SAT 27
+
+ display-backend@1e60000 {
+ compatible = "allwinner,sun8i-a33-display-backend";
+ reg = <0x01e60000 0x10000>, <0x01e80000 0x1000>;
+ reg-names = "be", "sat";
+ interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_DE_BE>, <&ccu CLK_DE_BE>,
+ <&ccu CLK_DRAM_DE_BE>, <&ccu CLK_BUS_SAT>;
+ clock-names = "ahb", "mod",
+ "ram", "sat";
+ resets = <&ccu RST_BUS_DE_BE>, <&ccu RST_BUS_SAT>;
+ reset-names = "be", "sat";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&fe0_out_be0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&drc0_in_be0>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
new file mode 100644
index 000000000000..e6088f379f70
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Pipeline
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The display engine pipeline (and its entry point, since it can be
+ either directly the backend or the frontend) is represented as an
+ extra node.
+
+ The Allwinner A10 Display pipeline is composed of several components
+ that are going to be documented below:
+
+ For all connections between components up to the TCONs in the
+ display pipeline, when there are multiple components of the same
+ type at the same depth, the local endpoint ID must be the same as
+ the remote component's index. For example, if the remote endpoint is
+ Frontend 1, then the local endpoint ID must be 1.
+
+ Frontend 0 [0] ------- [0] Backend 0 [0] ------- [0] TCON 0
+ [1] -- -- [1] [1] -- -- [1]
+ \ / \ /
+ X X
+ / \ / \
+ [0] -- -- [0] [0] -- -- [0]
+ Frontend 1 [1] ------- [1] Backend 1 [1] ------- [1] TCON 1
+
+ For a two pipeline system such as the one depicted above, the lines
+ represent the connections between the components, while the numbers
+ within the square brackets corresponds to the ID of the local endpoint.
+
+ The same rule also applies to DE 2.0 mixer-TCON connections:
+
+ Mixer 0 [0] ----------- [0] TCON 0
+ [1] ---- ---- [1]
+ \ /
+ X
+ / \
+ [0] ---- ---- [0]
+ Mixer 1 [1] ----------- [1] TCON 1
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-display-engine
+ - allwinner,sun5i-a10s-display-engine
+ - allwinner,sun5i-a13-display-engine
+ - allwinner,sun6i-a31-display-engine
+ - allwinner,sun6i-a31s-display-engine
+ - allwinner,sun7i-a20-display-engine
+ - allwinner,sun8i-a23-display-engine
+ - allwinner,sun8i-a33-display-engine
+ - allwinner,sun8i-a83t-display-engine
+ - allwinner,sun8i-h3-display-engine
+ - allwinner,sun8i-r40-display-engine
+ - allwinner,sun8i-v3s-display-engine
+ - allwinner,sun9i-a80-display-engine
+ - allwinner,sun20i-d1-display-engine
+ - allwinner,sun50i-a64-display-engine
+ - allwinner,sun50i-h6-display-engine
+
+ allwinner,pipelines:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 2
+ items:
+ maxItems: 1
+ description: |
+ Available display engine frontends (DE 1.0) or mixers (DE
+ 2.0/3.0) available.
+
+required:
+ - compatible
+ - allwinner,pipelines
+
+additionalProperties: false
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun4i-a10-display-engine
+ - allwinner,sun6i-a31-display-engine
+ - allwinner,sun6i-a31s-display-engine
+ - allwinner,sun7i-a20-display-engine
+ - allwinner,sun8i-a83t-display-engine
+ - allwinner,sun8i-r40-display-engine
+ - allwinner,sun9i-a80-display-engine
+ - allwinner,sun20i-d1-display-engine
+ - allwinner,sun50i-a64-display-engine
+
+then:
+ properties:
+ allwinner,pipelines:
+ minItems: 2
+
+else:
+ properties:
+ allwinner,pipelines:
+ maxItems: 1
+
+examples:
+ - |
+ de: display-engine {
+ compatible = "allwinner,sun4i-a10-display-engine";
+ allwinner,pipelines = <&fe0>, <&fe1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml
new file mode 100644
index 000000000000..98e8240a05bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-display-frontend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Display Engine Frontend
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The display engine frontend does formats conversion, scaling,
+ deinterlacing and color space conversion.
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-display-frontend
+ - allwinner,sun5i-a13-display-frontend
+ - allwinner,sun6i-a31-display-frontend
+ - allwinner,sun7i-a20-display-frontend
+ - allwinner,sun8i-a23-display-frontend
+ - allwinner,sun8i-a33-display-frontend
+ - allwinner,sun9i-a80-display-frontend
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The frontend interface clock
+ - description: The frontend module clock
+ - description: The frontend DRAM clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: mod
+ - const: ram
+
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnects:
+ maxItems: 1
+
+ # FIXME: This should be made required eventually once every SoC will
+ # have the MBUS declared.
+ interconnect-names:
+ const: dma-mem
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller.
+
+ required:
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun4i-a10-ccu.h>
+ #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+ fe0: display-frontend@1e00000 {
+ compatible = "allwinner,sun4i-a10-display-frontend";
+ reg = <0x01e00000 0x20000>;
+ interrupts = <47>;
+ clocks = <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_FE0>,
+ <&ccu CLK_DRAM_DE_FE0>;
+ clock-names = "ahb", "mod",
+ "ram";
+ resets = <&ccu RST_DE_FE0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ fe0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ fe0_out_be0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&be0_in_fe0>;
+ };
+
+ fe0_out_be1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&be1_in_fe0>;
+ };
+ };
+ };
+ };
+
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml
new file mode 100644
index 000000000000..55703caacb9c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-hdmi.yaml
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 HDMI Controller
+
+description: |
+ The HDMI Encoder supports the HDMI video and audio outputs, and does
+ CEC. It is one end of the pipeline.
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: allwinner,sun4i-a10-hdmi
+ - const: allwinner,sun5i-a10s-hdmi
+ - const: allwinner,sun6i-a31-hdmi
+ - items:
+ - const: allwinner,sun7i-a20-hdmi
+ - const: allwinner,sun5i-a10s-hdmi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ oneOf:
+ - items:
+ - description: The HDMI interface clock
+ - description: The HDMI module clock
+ - description: The first video PLL
+ - description: The second video PLL
+
+ - items:
+ - description: The HDMI interface clock
+ - description: The HDMI module clock
+ - description: The HDMI DDC clock
+ - description: The first video PLL
+ - description: The second video PLL
+
+ clock-names:
+ oneOf:
+ - items:
+ - const: ahb
+ - const: mod
+ - const: pll-0
+ - const: pll-1
+
+ - items:
+ - const: ahb
+ - const: mod
+ - const: ddc
+ - const: pll-0
+ - const: pll-1
+
+ resets:
+ maxItems: 1
+
+ dmas:
+ items:
+ - description: DDC Transmission DMA Channel
+ - description: DDC Reception DMA Channel
+ - description: Audio Transmission DMA Channel
+
+ dma-names:
+ items:
+ - const: ddc-tx
+ - const: ddc-rx
+ - const: audio-tx
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller. Usually an HDMI
+ connector.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - dmas
+ - dma-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun6i-a31-hdmi
+
+then:
+ properties:
+ clocks:
+ minItems: 5
+
+ clock-names:
+ minItems: 5
+
+ required:
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun4i-a10-ccu.h>
+ #include <dt-bindings/dma/sun4i-a10.h>
+ #include <dt-bindings/reset/sun4i-a10-ccu.h>
+
+ hdmi: hdmi@1c16000 {
+ compatible = "allwinner,sun4i-a10-hdmi";
+ reg = <0x01c16000 0x1000>;
+ interrupts = <58>;
+ clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
+ <&ccu CLK_PLL_VIDEO0_2X>,
+ <&ccu CLK_PLL_VIDEO1_2X>;
+ clock-names = "ahb", "mod", "pll-0", "pll-1";
+ dmas = <&dma SUN4I_DMA_NORMAL 16>,
+ <&dma SUN4I_DMA_NORMAL 16>,
+ <&dma SUN4I_DMA_DEDICATED 24>;
+ dma-names = "ddc-tx", "ddc-rx", "audio-tx";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi_in: port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ hdmi_in_tcon0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon0_out_hdmi>;
+ };
+
+ hdmi_in_tcon1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon1_out_hdmi>;
+ };
+ };
+
+ hdmi_out: port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
new file mode 100644
index 000000000000..724d93b9193b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml
@@ -0,0 +1,677 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-tcon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 Timings Controller (TCON)
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The TCON acts as a timing controller for RGB, LVDS and TV
+ interfaces.
+
+properties:
+ "#clock-cells":
+ const: 0
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun4i-a10-tcon
+ - const: allwinner,sun5i-a13-tcon
+ - const: allwinner,sun6i-a31-tcon
+ - const: allwinner,sun6i-a31s-tcon
+ - const: allwinner,sun7i-a20-tcon
+ - const: allwinner,sun8i-a23-tcon
+ - const: allwinner,sun8i-a33-tcon
+ - const: allwinner,sun8i-a83t-tcon-lcd
+ - const: allwinner,sun8i-a83t-tcon-tv
+ - const: allwinner,sun8i-r40-tcon-tv
+ - const: allwinner,sun8i-v3s-tcon
+ - const: allwinner,sun9i-a80-tcon-lcd
+ - const: allwinner,sun9i-a80-tcon-tv
+ - const: allwinner,sun20i-d1-tcon-lcd
+ - const: allwinner,sun20i-d1-tcon-tv
+
+ - items:
+ - enum:
+ - allwinner,sun7i-a20-tcon0
+ - allwinner,sun7i-a20-tcon1
+ - const: allwinner,sun7i-a20-tcon
+
+ - items:
+ - enum:
+ - allwinner,sun50i-a64-tcon-lcd
+ - const: allwinner,sun8i-a83t-tcon-lcd
+
+ - items:
+ - enum:
+ - allwinner,sun8i-h3-tcon-tv
+ - allwinner,sun50i-a64-tcon-tv
+ - const: allwinner,sun8i-a83t-tcon-tv
+
+ - items:
+ - enum:
+ - allwinner,sun50i-h6-tcon-tv
+ - const: allwinner,sun8i-r40-tcon-tv
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ maxItems: 4
+
+ clock-output-names:
+ description:
+ Name of the LCD pixel clock created.
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ resets:
+ anyOf:
+ - items:
+ - description: TCON Reset Line
+
+ - items:
+ - description: TCON Reset Line
+ - description: TCON LVDS Reset Line
+
+ - items:
+ - description: TCON Reset Line
+ - description: TCON eDP Reset Line
+
+ - items:
+ - description: TCON Reset Line
+ - description: TCON eDP Reset Line
+ - description: TCON LVDS Reset Line
+
+ reset-names:
+ oneOf:
+ - const: lcd
+
+ - items:
+ - const: lcd
+ - const: lvds
+
+ - items:
+ - const: lcd
+ - const: edp
+
+ - items:
+ - const: lcd
+ - const: edp
+ - const: lvds
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ Output endpoints of the controller.
+
+ patternProperties:
+ "^endpoint(@[0-9])$":
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ allwinner,tcon-channel:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ TCON can have 1 or 2 channels, usually with the
+ first channel being used for the panels interfaces
+ (RGB, LVDS, etc.), and the second being used for the
+ outputs that require another controller (TV Encoder,
+ HDMI, etc.).
+
+ If that property is present, specifies the TCON
+ channel the endpoint is associated to. If that
+ property is not present, the endpoint number will be
+ used as the channel number.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun4i-a10-tcon
+ - allwinner,sun5i-a13-tcon
+ - allwinner,sun7i-a20-tcon
+
+ then:
+ properties:
+ clocks:
+ minItems: 3
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: tcon-ch0
+ - const: tcon-ch1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-tcon
+ - allwinner,sun6i-a31s-tcon
+
+ then:
+ properties:
+ clocks:
+ minItems: 4
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: tcon-ch0
+ - const: tcon-ch1
+ - const: lvds-alt
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun8i-a23-tcon
+ - allwinner,sun8i-a33-tcon
+
+ then:
+ properties:
+ clocks:
+ minItems: 3
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: tcon-ch0
+ - const: lvds-alt
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun8i-a83t-tcon-lcd
+ - allwinner,sun8i-v3s-tcon
+ - allwinner,sun9i-a80-tcon-lcd
+ - allwinner,sun20i-d1-tcon-lcd
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: tcon-ch0
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun8i-a83t-tcon-tv
+ - allwinner,sun8i-r40-tcon-tv
+ - allwinner,sun9i-a80-tcon-tv
+ - allwinner,sun20i-d1-tcon-tv
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: tcon-ch1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun5i-a13-tcon
+ - allwinner,sun6i-a31-tcon
+ - allwinner,sun6i-a31s-tcon
+ - allwinner,sun7i-a20-tcon
+ - allwinner,sun8i-a23-tcon
+ - allwinner,sun8i-a33-tcon
+ - allwinner,sun8i-v3s-tcon
+ - allwinner,sun9i-a80-tcon-lcd
+ - allwinner,sun4i-a10-tcon
+ - allwinner,sun8i-a83t-tcon-lcd
+ - allwinner,sun20i-d1-tcon-lcd
+
+ then:
+ required:
+ - "#clock-cells"
+ - clock-output-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-tcon
+ - allwinner,sun6i-a31s-tcon
+ - allwinner,sun8i-a23-tcon
+ - allwinner,sun8i-a33-tcon
+ - allwinner,sun8i-a83t-tcon-lcd
+ - allwinner,sun20i-d1-tcon-lcd
+
+ then:
+ properties:
+ resets:
+ minItems: 2
+
+ reset-names:
+ items:
+ - const: lcd
+ - const: lvds
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun9i-a80-tcon-lcd
+
+ then:
+ properties:
+ resets:
+ minItems: 3
+
+ reset-names:
+ items:
+ - const: lcd
+ - const: edp
+ - const: lvds
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun9i-a80-tcon-tv
+
+ then:
+ properties:
+ resets:
+ minItems: 2
+
+ reset-names:
+ items:
+ - const: lcd
+ - const: edp
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun4i-a10-tcon
+ - allwinner,sun5i-a13-tcon
+ - allwinner,sun6i-a31-tcon
+ - allwinner,sun6i-a31s-tcon
+ - allwinner,sun7i-a20-tcon
+ - allwinner,sun8i-a23-tcon
+ - allwinner,sun8i-a33-tcon
+
+ then:
+ required:
+ - dmas
+
+examples:
+ - |
+ #include <dt-bindings/dma/sun4i-a10.h>
+
+ /*
+ * This comes from the clock/sun4i-a10-ccu.h and
+ * reset/sun4i-a10-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_AHB_LCD0 56
+ #define CLK_TCON0_CH0 149
+ #define CLK_TCON0_CH1 155
+ #define RST_TCON0 11
+
+ lcd-controller@1c0c000 {
+ compatible = "allwinner,sun4i-a10-tcon";
+ reg = <0x01c0c000 0x1000>;
+ interrupts = <44>;
+ resets = <&ccu RST_TCON0>;
+ reset-names = "lcd";
+ clocks = <&ccu CLK_AHB_LCD0>,
+ <&ccu CLK_TCON0_CH0>,
+ <&ccu CLK_TCON0_CH1>;
+ clock-names = "ahb",
+ "tcon-ch0",
+ "tcon-ch1";
+ clock-output-names = "tcon0-pixel-clock";
+ #clock-cells = <0>;
+ dmas = <&dma SUN4I_DMA_DEDICATED 14>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&be0_out_tcon0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&be1_out_tcon0>;
+ };
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&hdmi_in_tcon0>;
+ allwinner,tcon-channel = <1>;
+ };
+ };
+ };
+ };
+
+ #undef CLK_AHB_LCD0
+ #undef CLK_TCON0_CH0
+ #undef CLK_TCON0_CH1
+ #undef RST_TCON0
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun6i-a31-ccu.h and
+ * reset/sun6i-a31-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_PLL_MIPI 15
+ #define CLK_AHB1_LCD0 47
+ #define CLK_LCD0_CH0 127
+ #define CLK_LCD0_CH1 129
+ #define RST_AHB1_LCD0 27
+ #define RST_AHB1_LVDS 41
+
+ lcd-controller@1c0c000 {
+ compatible = "allwinner,sun6i-a31-tcon";
+ reg = <0x01c0c000 0x1000>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 11>;
+ resets = <&ccu RST_AHB1_LCD0>, <&ccu RST_AHB1_LVDS>;
+ reset-names = "lcd", "lvds";
+ clocks = <&ccu CLK_AHB1_LCD0>,
+ <&ccu CLK_LCD0_CH0>,
+ <&ccu CLK_LCD0_CH1>,
+ <&ccu CLK_PLL_MIPI>;
+ clock-names = "ahb",
+ "tcon-ch0",
+ "tcon-ch1",
+ "lvds-alt";
+ clock-output-names = "tcon0-pixel-clock";
+ #clock-cells = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&drc0_out_tcon0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&drc1_out_tcon0>;
+ };
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&hdmi_in_tcon0>;
+ allwinner,tcon-channel = <1>;
+ };
+ };
+ };
+ };
+
+ #undef CLK_PLL_MIPI
+ #undef CLK_AHB1_LCD0
+ #undef CLK_LCD0_CH0
+ #undef CLK_LCD0_CH1
+ #undef RST_AHB1_LCD0
+ #undef RST_AHB1_LVDS
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun9i-a80-ccu.h and
+ * reset/sun9i-a80-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_BUS_LCD0 102
+ #define CLK_LCD0 58
+ #define RST_BUS_LCD0 22
+ #define RST_BUS_EDP 24
+ #define RST_BUS_LVDS 25
+
+ lcd-controller@3c00000 {
+ compatible = "allwinner,sun9i-a80-tcon-lcd";
+ reg = <0x03c00000 0x10000>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_LCD0>, <&ccu CLK_LCD0>;
+ clock-names = "ahb", "tcon-ch0";
+ resets = <&ccu RST_BUS_LCD0>, <&ccu RST_BUS_EDP>, <&ccu RST_BUS_LVDS>;
+ reset-names = "lcd", "edp", "lvds";
+ clock-output-names = "tcon0-pixel-clock";
+ #clock-cells = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&drc0_out_tcon0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
+ #undef CLK_BUS_TCON0
+ #undef CLK_TCON0
+ #undef RST_BUS_TCON0
+ #undef RST_BUS_EDP
+ #undef RST_BUS_LVDS
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun8i-a83t-ccu.h and
+ * reset/sun8i-a83t-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_BUS_TCON0 36
+ #define CLK_TCON0 85
+ #define RST_BUS_TCON0 22
+ #define RST_BUS_LVDS 31
+
+ lcd-controller@1c0c000 {
+ compatible = "allwinner,sun8i-a83t-tcon-lcd";
+ reg = <0x01c0c000 0x1000>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_TCON0>, <&ccu CLK_TCON0>;
+ clock-names = "ahb", "tcon-ch0";
+ clock-output-names = "tcon-pixel-clock";
+ #clock-cells = <0>;
+ resets = <&ccu RST_BUS_TCON0>, <&ccu RST_BUS_LVDS>;
+ reset-names = "lcd", "lvds";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&mixer0_out_tcon0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&mixer1_out_tcon0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
+ #undef CLK_BUS_TCON0
+ #undef CLK_TCON0
+ #undef RST_BUS_TCON0
+ #undef RST_BUS_LVDS
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun8i-r40-ccu.h and
+ * reset/sun8i-r40-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+
+ #define CLK_BUS_TCON_TV0 73
+ #define RST_BUS_TCON_TV0 49
+
+ tcon_tv0: lcd-controller@1c73000 {
+ compatible = "allwinner,sun8i-r40-tcon-tv";
+ reg = <0x01c73000 0x1000>;
+ interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_TCON_TV0>, <&tcon_top 0>;
+ clock-names = "ahb", "tcon-ch1";
+ resets = <&ccu RST_BUS_TCON_TV0>;
+ reset-names = "lcd";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon_top_mixer0_out_tcon_tv0>;
+ };
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon_top_mixer1_out_tcon_tv0>;
+ };
+ };
+
+ tcon_tv0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon_top_hdmi_in_tcon_tv0>;
+ };
+ };
+ };
+ };
+
+ #undef CLK_BUS_TCON_TV0
+ #undef RST_BUS_TCON_TV0
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
new file mode 100644
index 000000000000..c39e90a5945f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tv-encoder.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun4i-a10-tv-encoder.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 TV Encoder
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ const: allwinner,sun4i-a10-tv-encoder
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ The first port should be the input endpoint, usually coming from the
+ associated TCON.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - resets
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ tve0: tv-encoder@1c0a000 {
+ compatible = "allwinner,sun4i-a10-tv-encoder";
+ reg = <0x01c0a000 0x1000>;
+ clocks = <&ahb_gates 34>;
+ resets = <&tcon_ch0_clk 0>;
+
+ port {
+ tve0_in_tcon0: endpoint {
+ remote-endpoint = <&tcon0_out_tve0>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml
new file mode 100644
index 000000000000..895506d93f4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-drc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 Dynamic Range Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The DRC (Dynamic Range Controller) allows to dynamically adjust
+ pixel brightness/contrast based on histogram measurements for LCD
+ content adaptive backlight control.
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun6i-a31-drc
+ - allwinner,sun6i-a31s-drc
+ - allwinner,sun8i-a23-drc
+ - allwinner,sun8i-a33-drc
+ - allwinner,sun9i-a80-drc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The DRC interface clock
+ - description: The DRC module clock
+ - description: The DRC DRAM clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: mod
+ - const: ram
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ #include <dt-bindings/clock/sun6i-a31-ccu.h>
+ #include <dt-bindings/reset/sun6i-a31-ccu.h>
+
+ drc0: drc@1e70000 {
+ compatible = "allwinner,sun6i-a31-drc";
+ reg = <0x01e70000 0x10000>;
+ interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_AHB1_DRC0>, <&ccu CLK_IEP_DRC0>,
+ <&ccu CLK_DRAM_DRC0>;
+ clock-names = "ahb", "mod",
+ "ram";
+ resets = <&ccu RST_AHB1_DRC0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ drc0_in: port@0 {
+ reg = <0>;
+
+ drc0_in_be0: endpoint {
+ remote-endpoint = <&be0_out_drc0>;
+ };
+ };
+
+ drc0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ drc0_out_tcon0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon0_in_drc0>;
+ };
+
+ drc0_out_tcon1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon1_in_drc0>;
+ };
+ };
+ };
+ };
+
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
new file mode 100644
index 000000000000..c731fbdc2fe0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 MIPI-DSI Controller
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - allwinner,sun6i-a31-mipi-dsi
+ - allwinner,sun50i-a64-mipi-dsi
+ - allwinner,sun50i-a100-mipi-dsi
+ - items:
+ - const: allwinner,sun20i-d1-mipi-dsi
+ - const: allwinner,sun50i-a100-mipi-dsi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: mod
+
+ resets:
+ maxItems: 1
+
+ vcc-dsi-supply:
+ description: VCC-DSI power supply of the DSI encoder
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ const: dphy
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ The port should be the input endpoint, usually coming from the
+ associated TCON.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - phys
+ - phy-names
+ - resets
+ - port
+
+allOf:
+ - $ref: dsi-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-mipi-dsi
+ - allwinner,sun50i-a100-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ required:
+ - clock-names
+
+ else:
+ properties:
+ clocks:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun6i-a31-mipi-dsi
+ - allwinner,sun50i-a64-mipi-dsi
+
+ then:
+ required:
+ - vcc-dsi-supply
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ dsi0: dsi@1ca0000 {
+ compatible = "allwinner,sun6i-a31-mipi-dsi";
+ reg = <0x01ca0000 0x1000>;
+ interrupts = <0 89 4>;
+ clocks = <&ccu 23>, <&ccu 96>;
+ clock-names = "bus", "mod";
+ resets = <&ccu 4>;
+ phys = <&dphy0>;
+ phy-names = "dphy";
+ vcc-dsi-supply = <&reg_dcdc1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ panel@0 {
+ compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+ reg = <0>;
+ power-supply = <&reg_display>;
+ reset-gpios = <&r_pio 0 5 1>; /* PL05 */
+ backlight = <&pwm_bl>;
+ };
+
+ port {
+ dsi0_in_tcon0: endpoint {
+ remote-endpoint = <&tcon0_out_dsi0>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml
new file mode 100644
index 000000000000..cbd18fd83e52
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-de2-mixer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner Display Engine 2.0 Mixer
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - allwinner,sun8i-a83t-de2-mixer-0
+ - allwinner,sun8i-a83t-de2-mixer-1
+ - allwinner,sun8i-h3-de2-mixer-0
+ - allwinner,sun8i-r40-de2-mixer-0
+ - allwinner,sun8i-r40-de2-mixer-1
+ - allwinner,sun8i-v3s-de2-mixer
+ - allwinner,sun20i-d1-de2-mixer-0
+ - allwinner,sun20i-d1-de2-mixer-1
+ - allwinner,sun50i-a64-de2-mixer-0
+ - allwinner,sun50i-a64-de2-mixer-1
+ - allwinner,sun50i-h6-de3-mixer-0
+ - allwinner,sun50i-h616-de33-mixer-0
+
+ reg: true
+
+ reg-names: true
+
+ clocks:
+ items:
+ - description: The mixer interface clock
+ - description: The mixer module clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: mod
+
+ iommus:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller.
+
+ required:
+ - port@1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun50i-h616-de33-mixer-0
+ then:
+ properties:
+ reg:
+ description: |
+ Registers for controlling individual layers of the display
+ engine (layers), global control (top), and display blending
+ control (display). Names are from Allwinner BSP kernel.
+ maxItems: 3
+ reg-names:
+ items:
+ - const: layers
+ - const: top
+ - const: display
+ required:
+ - reg-names
+
+ else:
+ properties:
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun8i-de2.h>
+ #include <dt-bindings/reset/sun8i-de2.h>
+
+ mixer0: mixer@1100000 {
+ compatible = "allwinner,sun8i-a83t-de2-mixer-0";
+ reg = <0x01100000 0x100000>;
+ clocks = <&display_clocks CLK_BUS_MIXER0>,
+ <&display_clocks CLK_MIXER0>;
+ clock-names = "bus",
+ "mod";
+ resets = <&display_clocks RST_MIXER0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mixer0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ mixer0_out_tcon0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon0_in_mixer0>;
+ };
+
+ mixer0_out_tcon1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon1_in_mixer0>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml
new file mode 100644
index 000000000000..60fd927b5a06
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml
@@ -0,0 +1,254 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-dw-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t DWC HDMI TX Encoder
+
+description: |
+ The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller
+ IP with Allwinner\'s own PHY IP. It supports audio and video outputs
+ and CEC.
+
+ These DT bindings follow the Synopsys DWC HDMI TX bindings defined
+ in bridge/synopsys,dw-hdmi.yaml with the following device-specific
+ properties.
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#phy-cells":
+ const: 0
+
+ compatible:
+ oneOf:
+ - const: allwinner,sun8i-a83t-dw-hdmi
+ - const: allwinner,sun50i-h6-dw-hdmi
+
+ - items:
+ - enum:
+ - allwinner,sun8i-h3-dw-hdmi
+ - allwinner,sun8i-r40-dw-hdmi
+ - allwinner,sun50i-a64-dw-hdmi
+ - const: allwinner,sun8i-a83t-dw-hdmi
+
+ reg:
+ maxItems: 1
+
+ reg-io-width:
+ const: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+ items:
+ - description: Bus Clock
+ - description: Register Clock
+ - description: TMDS Clock
+ - description: HDMI CEC Clock
+ - description: HDCP Clock
+ - description: HDCP Bus Clock
+
+ clock-names:
+ minItems: 3
+ items:
+ - const: iahb
+ - const: isfr
+ - const: tmds
+ - const: cec
+ - const: hdcp
+ - const: hdcp-bus
+
+ resets:
+ minItems: 1
+ items:
+ - description: HDMI Controller Reset
+ - description: HDCP Reset
+
+ reset-names:
+ minItems: 1
+ items:
+ - const: ctrl
+ - const: hdcp
+
+ phys:
+ maxItems: 1
+ description:
+ Phandle to the DWC HDMI PHY.
+
+ phy-names:
+ const: phy
+
+ hvcc-supply:
+ description:
+ The VCC power supply of the controller
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller. Usually the associated
+ TCON.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller. Usually an HDMI
+ connector.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - reg-io-width
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - phys
+ - phy-names
+ - ports
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun50i-h6-dw-hdmi
+
+then:
+ properties:
+ clocks:
+ minItems: 6
+
+ clock-names:
+ minItems: 6
+
+ resets:
+ minItems: 2
+
+ reset-names:
+ minItems: 2
+
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun8i-a83t-ccu.h and
+ * reset/sun8i-a83t-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+ #define CLK_BUS_HDMI 39
+ #define CLK_HDMI 93
+ #define CLK_HDMI_SLOW 94
+ #define RST_BUS_HDMI1 26
+
+ hdmi@1ee0000 {
+ compatible = "allwinner,sun8i-a83t-dw-hdmi";
+ reg = <0x01ee0000 0x10000>;
+ reg-io-width = <1>;
+ interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>,
+ <&ccu CLK_HDMI>;
+ clock-names = "iahb", "isfr", "tmds";
+ resets = <&ccu RST_BUS_HDMI1>;
+ reset-names = "ctrl";
+ phys = <&hdmi_phy>;
+ phy-names = "phy";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pins>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&tcon1_out_hdmi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
+ /* Cleanup after ourselves */
+ #undef CLK_BUS_HDMI
+ #undef CLK_HDMI
+ #undef CLK_HDMI_SLOW
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ /*
+ * This comes from the clock/sun50i-h6-ccu.h and
+ * reset/sun50i-h6-ccu.h headers, but we can't include them since
+ * it would trigger a bunch of warnings for redefinitions of
+ * symbols with the other example.
+ */
+ #define CLK_BUS_HDMI 126
+ #define CLK_BUS_HDCP 137
+ #define CLK_HDMI 123
+ #define CLK_HDMI_SLOW 124
+ #define CLK_HDMI_CEC 125
+ #define CLK_HDCP 136
+ #define RST_BUS_HDMI_SUB 57
+ #define RST_BUS_HDCP 62
+
+ hdmi@6000000 {
+ compatible = "allwinner,sun50i-h6-dw-hdmi";
+ reg = <0x06000000 0x10000>;
+ reg-io-width = <1>;
+ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>,
+ <&ccu CLK_HDMI>, <&ccu CLK_HDMI_CEC>,
+ <&ccu CLK_HDCP>, <&ccu CLK_BUS_HDCP>;
+ clock-names = "iahb", "isfr", "tmds", "cec", "hdcp",
+ "hdcp-bus";
+ resets = <&ccu RST_BUS_HDMI_SUB>, <&ccu RST_BUS_HDCP>;
+ reset-names = "ctrl", "hdcp";
+ phys = <&hdmi_phy>;
+ phy-names = "phy";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pins>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&tcon_top_hdmi_out_hdmi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml
new file mode 100644
index 000000000000..1b47f3d99a78
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-hdmi-phy.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-a83t-hdmi-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t HDMI PHY
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#phy-cells":
+ const: 0
+
+ compatible:
+ enum:
+ - allwinner,sun8i-a83t-hdmi-phy
+ - allwinner,sun8i-h3-hdmi-phy
+ - allwinner,sun8i-r40-hdmi-phy
+ - allwinner,sun50i-a64-hdmi-phy
+ - allwinner,sun50i-h6-hdmi-phy
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+ - description: Parent of the PHY clock
+ - description: Second possible parent of the PHY clock
+
+ clock-names:
+ minItems: 2
+ items:
+ - const: bus
+ - const: mod
+ - const: pll-0
+ - const: pll-1
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: phy
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun8i-r40-hdmi-phy
+
+then:
+ properties:
+ clocks:
+ minItems: 4
+
+ clock-names:
+ minItems: 4
+
+else:
+ if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - allwinner,sun8i-h3-hdmi-phy
+ - allwinner,sun50i-a64-hdmi-phy
+
+ then:
+ properties:
+ clocks:
+ minItems: 3
+
+ clock-names:
+ minItems: 3
+
+ else:
+ properties:
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/sun8i-a83t-ccu.h>
+ #include <dt-bindings/reset/sun8i-a83t-ccu.h>
+
+ hdmi_phy: hdmi-phy@1ef0000 {
+ compatible = "allwinner,sun8i-a83t-hdmi-phy";
+ reg = <0x01ef0000 0x10000>;
+ clocks = <&ccu CLK_BUS_HDMI>, <&ccu CLK_HDMI_SLOW>;
+ clock-names = "bus", "mod";
+ resets = <&ccu RST_BUS_HDMI0>;
+ reset-names = "phy";
+ #phy-cells = <0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml b/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml
new file mode 100644
index 000000000000..7d849c4095a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun8i-r40-tcon-top.yaml
@@ -0,0 +1,329 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun8i-r40-tcon-top.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner R40 TCON TOP
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ TCON TOPs main purpose is to configure whole display pipeline. It
+ determines relationships between mixers and TCONs, selects source
+ TCON for HDMI, muxes LCD and TV encoder GPIO output, selects TV
+ encoder clock source and contains additional TV TCON and DSI gates.
+
+ It allows display pipeline to be configured in very different ways:
+
+ / LCD0/LVDS0
+ / [0] TCON-LCD0
+ | \ MIPI DSI
+ mixer0 |
+ \ / [1] TCON-LCD1 - LCD1/LVDS1
+ TCON-TOP
+ / \ [2] TCON-TV0 [0] - TVE0/RGB
+ mixer1 | \
+ | TCON-TOP - HDMI
+ | /
+ \ [3] TCON-TV1 [1] - TVE1/RGB
+
+ Note that both TCON TOP references same physical unit. Both mixers
+ can be connected to any TCON. Not all TCON TOP variants support all
+ features.
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - allwinner,sun8i-r40-tcon-top
+ - allwinner,sun20i-d1-tcon-top
+ - allwinner,sun50i-h6-tcon-top
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 6
+
+ clock-names:
+ minItems: 2
+ maxItems: 6
+
+ clock-output-names:
+ minItems: 1
+ maxItems: 3
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoint for Mixer 0 mux.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoint for Mixer 0 mux
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoint for Mixer 1 mux.
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoint for Mixer 1 mux
+
+ port@4:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoint for HDMI mux.
+
+ port@5:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoint for HDMI mux
+
+ required:
+ - port@0
+ - port@1
+ - port@4
+ - port@5
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - clock-output-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun8i-r40-tcon-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: The TCON TOP interface clock
+ - description: The TCON TOP TV0 clock
+ - description: The TCON TOP TVE0 clock
+ - description: The TCON TOP TV1 clock
+ - description: The TCON TOP TVE1 clock
+ - description: The TCON TOP MIPI DSI clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: tcon-tv0
+ - const: tve0
+ - const: tcon-tv1
+ - const: tve1
+ - const: dsi
+
+ clock-output-names:
+ items:
+ - description: TCON TV0 output clock name
+ - description: TCON TV1 output clock name
+ - description: DSI output clock name
+
+ ports:
+ required:
+ - port@2
+ - port@3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun20i-d1-tcon-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: The TCON TOP interface clock
+ - description: The TCON TOP TV0 clock
+ - description: The TCON TOP TVE0 clock
+ - description: The TCON TOP MIPI DSI clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: tcon-tv0
+ - const: tve0
+ - const: dsi
+
+ clock-output-names:
+ items:
+ - description: TCON TV0 output clock name
+ - description: DSI output clock name
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: allwinner,sun50i-h6-tcon-top
+
+ then:
+ properties:
+ clocks:
+ items:
+ - description: The TCON TOP interface clock
+ - description: The TCON TOP TV0 clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: tcon-tv0
+
+ clock-output-names:
+ items:
+ - description: TCON TV0 output clock name
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ #include <dt-bindings/clock/sun8i-r40-ccu.h>
+ #include <dt-bindings/reset/sun8i-r40-ccu.h>
+
+ tcon_top: tcon-top@1c70000 {
+ compatible = "allwinner,sun8i-r40-tcon-top";
+ reg = <0x01c70000 0x1000>;
+ clocks = <&ccu CLK_BUS_TCON_TOP>,
+ <&ccu CLK_TCON_TV0>,
+ <&ccu CLK_TVE0>,
+ <&ccu CLK_TCON_TV1>,
+ <&ccu CLK_TVE1>,
+ <&ccu CLK_DSI_DPHY>;
+ clock-names = "bus",
+ "tcon-tv0",
+ "tve0",
+ "tcon-tv1",
+ "tve1",
+ "dsi";
+ clock-output-names = "tcon-top-tv0",
+ "tcon-top-tv1",
+ "tcon-top-dsi";
+ resets = <&ccu RST_BUS_TCON_TOP>;
+ #clock-cells = <1>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ tcon_top_mixer0_in: port@0 {
+ reg = <0>;
+
+ tcon_top_mixer0_in_mixer0: endpoint {
+ remote-endpoint = <&mixer0_out_tcon_top>;
+ };
+ };
+
+ tcon_top_mixer0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ tcon_top_mixer0_out_tcon_lcd0: endpoint@0 {
+ reg = <0>;
+ };
+
+ tcon_top_mixer0_out_tcon_lcd1: endpoint@1 {
+ reg = <1>;
+ };
+
+ tcon_top_mixer0_out_tcon_tv0: endpoint@2 {
+ reg = <2>;
+ remote-endpoint = <&tcon_tv0_in_tcon_top_mixer0>;
+ };
+
+ tcon_top_mixer0_out_tcon_tv1: endpoint@3 {
+ reg = <3>;
+ remote-endpoint = <&tcon_tv1_in_tcon_top_mixer0>;
+ };
+ };
+
+ tcon_top_mixer1_in: port@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+
+ tcon_top_mixer1_in_mixer1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&mixer1_out_tcon_top>;
+ };
+ };
+
+ tcon_top_mixer1_out: port@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ tcon_top_mixer1_out_tcon_lcd0: endpoint@0 {
+ reg = <0>;
+ };
+
+ tcon_top_mixer1_out_tcon_lcd1: endpoint@1 {
+ reg = <1>;
+ };
+
+ tcon_top_mixer1_out_tcon_tv0: endpoint@2 {
+ reg = <2>;
+ remote-endpoint = <&tcon_tv0_in_tcon_top_mixer1>;
+ };
+
+ tcon_top_mixer1_out_tcon_tv1: endpoint@3 {
+ reg = <3>;
+ remote-endpoint = <&tcon_tv1_in_tcon_top_mixer1>;
+ };
+ };
+
+ tcon_top_hdmi_in: port@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ tcon_top_hdmi_in_tcon_tv0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&tcon_tv0_out_tcon_top>;
+ };
+
+ tcon_top_hdmi_in_tcon_tv1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&tcon_tv1_out_tcon_top>;
+ };
+ };
+
+ tcon_top_hdmi_out: port@5 {
+ reg = <5>;
+
+ tcon_top_hdmi_out_hdmi: endpoint {
+ remote-endpoint = <&hdmi_in_tcon_top>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml b/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml
new file mode 100644
index 000000000000..193afee2c3c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun9i-a80-deu.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun9i-a80-deu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A80 Detail Enhancement Unit
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ The DEU (Detail Enhancement Unit), found in the Allwinner A80 SoC,
+ can sharpen the display content in both luma and chroma channels.
+
+properties:
+ compatible:
+ const: allwinner,sun9i-a80-deu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The DEU interface clock
+ - description: The DEU module clock
+ - description: The DEU DRAM clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: mod
+ - const: ram
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ #include <dt-bindings/clock/sun9i-a80-de.h>
+ #include <dt-bindings/reset/sun9i-a80-de.h>
+
+ deu0: deu@3300000 {
+ compatible = "allwinner,sun9i-a80-deu";
+ reg = <0x03300000 0x40000>;
+ interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&de_clocks CLK_BUS_DEU0>,
+ <&de_clocks CLK_IEP_DEU0>,
+ <&de_clocks CLK_DRAM_DEU0>;
+ clock-names = "ahb",
+ "mod",
+ "ram";
+ resets = <&de_clocks RST_DEU0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ deu0_in: port@0 {
+ reg = <0>;
+
+ deu0_in_fe0: endpoint {
+ remote-endpoint = <&fe0_out_deu0>;
+ };
+ };
+
+ deu0_out: port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ deu0_out_be0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&be0_in_deu0>;
+ };
+
+ deu0_out_be1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&be1_in_deu0>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
deleted file mode 100644
index bf4a18047309..000000000000
--- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-Amlogic specific extensions to the Synopsys Designware HDMI Controller
-======================================================================
-
-The Amlogic Meson Synopsys Designware Integration is composed of :
-- A Synopsys DesignWare HDMI Controller IP
-- A TOP control block controlling the Clocks and PHY
-- A custom HDMI PHY in order to convert video to TMDS signal
- ___________________________________
-| HDMI TOP |<= HPD
-|___________________________________|
-| | |
-| Synopsys HDMI | HDMI PHY |=> TMDS
-| Controller |________________|
-|___________________________________|<=> DDC
-
-The HDMI TOP block only supports HPD sensing.
-The Synopsys HDMI Controller interrupt is routed through the
-TOP Block interrupt.
-Communication to the TOP Block and the Synopsys HDMI Controller is done
-via a pair of dedicated addr+read/write registers.
-The HDMI PHY is configured by registers in the HHI register block.
-
-Pixel data arrives in 4:4:4 format from the VENC block and the VPU HDMI mux
-selects either the ENCI encoder for the 576i or 480i formats or the ENCP
-encoder for all the other formats including interlaced HD formats.
-
-The VENC uses a DVI encoder on top of the ENCI or ENCP encoders to generate
-DVI timings for the HDMI controller.
-
-Amlogic Meson GXBB, GXL and GXM SoCs families embeds the Synopsys DesignWare
-HDMI TX IP version 2.01a with HDCP and I2C & S/PDIF
-audio source interfaces.
-
-Required properties:
-- compatible: value should be different for each SoC family as :
- - GXBB (S905) : "amlogic,meson-gxbb-dw-hdmi"
- - GXL (S905X, S905D) : "amlogic,meson-gxl-dw-hdmi"
- - GXM (S912) : "amlogic,meson-gxm-dw-hdmi"
- followed by the common "amlogic,meson-gx-dw-hdmi"
-- reg: Physical base address and length of the controller's registers.
-- interrupts: The HDMI interrupt number
-- clocks, clock-names : must have the phandles to the HDMI iahb and isfr clocks,
- and the Amlogic Meson venci clocks as described in
- Documentation/devicetree/bindings/clock/clock-bindings.txt,
- the clocks are soc specific, the clock-names should be "iahb", "isfr", "venci"
-- resets, resets-names: must have the phandles to the HDMI apb, glue and phy
- resets as described in :
- Documentation/devicetree/bindings/reset/reset.txt,
- the reset-names should be "hdmitx_apb", "hdmitx", "hdmitx_phy"
-
-Optional properties:
-- hdmi-supply: Optional phandle to an external 5V regulator to power the HDMI
- logic, as described in the file ../regulator/regulator.txt
-
-Required nodes:
-
-The connections to the HDMI ports are modeled using the OF graph
-bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-The following table lists for each supported model the port number
-corresponding to each HDMI output and input.
-
- Port 0 Port 1
------------------------------------------
- S905 (GXBB) VENC Input TMDS Output
- S905X (GXL) VENC Input TMDS Output
- S905D (GXL) VENC Input TMDS Output
- S912 (GXM) VENC Input TMDS Output
-
-Example:
-
-hdmi-connector {
- compatible = "hdmi-connector";
- type = "a";
-
- port {
- hdmi_connector_in: endpoint {
- remote-endpoint = <&hdmi_tx_tmds_out>;
- };
- };
-};
-
-hdmi_tx: hdmi-tx@c883a000 {
- compatible = "amlogic,meson-gxbb-dw-hdmi", "amlogic,meson-gx-dw-hdmi";
- reg = <0x0 0xc883a000 0x0 0x1c>;
- interrupts = <GIC_SPI 57 IRQ_TYPE_EDGE_RISING>;
- resets = <&reset RESET_HDMITX_CAPB3>,
- <&reset RESET_HDMI_SYSTEM_RESET>,
- <&reset RESET_HDMI_TX>;
- reset-names = "hdmitx_apb", "hdmitx", "hdmitx_phy";
- clocks = <&clkc CLKID_HDMI_PCLK>,
- <&clkc CLKID_CLK81>,
- <&clkc CLKID_GCLK_VENCI_INT0>;
- clock-names = "isfr", "iahb", "venci";
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* VPU VENC Input */
- hdmi_tx_venc_port: port@0 {
- reg = <0>;
-
- hdmi_tx_in: endpoint {
- remote-endpoint = <&hdmi_tx_out>;
- };
- };
-
- /* TMDS Output */
- hdmi_tx_tmds_port: port@1 {
- reg = <1>;
-
- hdmi_tx_tmds_out: endpoint {
- remote-endpoint = <&hdmi_connector_in>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
new file mode 100644
index 000000000000..416fe263ac92
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/amlogic,meson-dw-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic specific extensions to the Synopsys Designware HDMI Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+description: |
+ The Amlogic Meson Synopsys Designware Integration is composed of
+ - A Synopsys DesignWare HDMI Controller IP
+ - A TOP control block controlling the Clocks and PHY
+ - A custom HDMI PHY in order to convert video to TMDS signal
+ ___________________________________
+ | HDMI TOP |<= HPD
+ |___________________________________|
+ | | |
+ | Synopsys HDMI | HDMI PHY |=> TMDS
+ | Controller |________________|
+ |___________________________________|<=> DDC
+
+ The HDMI TOP block only supports HPD sensing.
+ The Synopsys HDMI Controller interrupt is routed through the
+ TOP Block interrupt.
+ Communication to the TOP Block and the Synopsys HDMI Controller is done
+ via a pair of dedicated addr+read/write registers.
+ The HDMI PHY is configured by registers in the HHI register block.
+
+ Pixel data arrives in "4:4:4" format from the VENC block and the VPU HDMI mux
+ selects either the ENCI encoder for the 576i or 480i formats or the ENCP
+ encoder for all the other formats including interlaced HD formats.
+
+ The VENC uses a DVI encoder on top of the ENCI or ENCP encoders to generate
+ DVI timings for the HDMI controller.
+
+ Amlogic Meson GXBB, GXL and GXM SoCs families embeds the Synopsys DesignWare
+ HDMI TX IP version 2.01a with HDCP and I2C & S/PDIF
+ audio source interfaces.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - amlogic,meson-gxbb-dw-hdmi # GXBB (S905)
+ - amlogic,meson-gxl-dw-hdmi # GXL (S905X, S905D)
+ - amlogic,meson-gxm-dw-hdmi # GXM (S912)
+ - const: amlogic,meson-gx-dw-hdmi
+ - enum:
+ - amlogic,meson-g12a-dw-hdmi # G12A (S905X2, S905Y2, S905D2)
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+
+ clock-names:
+ items:
+ - const: isfr
+ - const: iahb
+ - const: venci
+
+ power-domains:
+ maxItems: 1
+ description: phandle to the associated power domain
+
+ resets:
+ minItems: 3
+
+ reset-names:
+ items:
+ - const: hdmitx_apb
+ - const: hdmitx
+ - const: hdmitx_phy
+
+ hdmi-supply:
+ description: phandle to an external 5V regulator to power the HDMI logic
+
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the VENC Input port node.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the TMDS Output port node.
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ "#sound-dai-cells":
+ const: 0
+
+ sound-name-prefix: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - port@0
+ - port@1
+ - "#address-cells"
+ - "#size-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ hdmi_tx: hdmi-tx@c883a000 {
+ compatible = "amlogic,meson-gxbb-dw-hdmi", "amlogic,meson-gx-dw-hdmi";
+ reg = <0xc883a000 0x1c>;
+ interrupts = <57>;
+ resets = <&reset_apb>, <&reset_hdmitx>, <&reset_hdmitx_phy>;
+ reset-names = "hdmitx_apb", "hdmitx", "hdmitx_phy";
+ clocks = <&clk_isfr>, <&clk_iahb>, <&clk_venci>;
+ clock-names = "isfr", "iahb", "venci";
+ power-domains = <&pd_vpu>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* VPU VENC Input */
+ hdmi_tx_venc_port: port@0 {
+ reg = <0>;
+
+ hdmi_tx_in: endpoint {
+ remote-endpoint = <&hdmi_tx_out>;
+ };
+ };
+
+ /* TMDS Output */
+ hdmi_tx_tmds_port: port@1 {
+ reg = <1>;
+
+ hdmi_tx_tmds_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-g12a-dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-g12a-dw-mipi-dsi.yaml
new file mode 100644
index 000000000000..a3428f012005
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-g12a-dw-mipi-dsi.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2020 BayLibre, SAS
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/amlogic,meson-g12a-dw-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic specific extensions to the Synopsys Designware MIPI DSI Host Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+ The Amlogic Meson Synopsys Designware Integration is composed of
+ - A Synopsys DesignWare MIPI DSI Host Controller IP
+ - A TOP control block controlling the Clocks & Resets of the IP
+
+allOf:
+ - $ref: dsi-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - amlogic,meson-g12a-dw-mipi-dsi
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+ maxItems: 4
+
+ clock-names:
+ minItems: 3
+ items:
+ - const: pclk
+ - const: bit
+ - const: px
+ - const: meas
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ items:
+ - const: top
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: dphy
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input node to receive pixel data.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DSI output node to panel.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - phys
+ - phy-names
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ dsi@6000 {
+ compatible = "amlogic,meson-g12a-dw-mipi-dsi";
+ reg = <0x6000 0x400>;
+ resets = <&reset_top>;
+ reset-names = "top";
+ clocks = <&clk_pclk>, <&bit_clk>, <&clk_px>;
+ clock-names = "pclk", "bit", "px";
+ phys = <&mipi_dphy>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* VPU VENC Input */
+ mipi_dsi_venc_port: port@0 {
+ reg = <0>;
+
+ mipi_dsi_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ /* DSI Output */
+ mipi_dsi_panel_port: port@1 {
+ reg = <1>;
+
+ mipi_out_panel: endpoint {
+ remote-endpoint = <&mipi_in_panel>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
deleted file mode 100644
index 057b81335775..000000000000
--- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt
+++ /dev/null
@@ -1,116 +0,0 @@
-Amlogic Meson Display Controller
-================================
-
-The Amlogic Meson Display controller is composed of several components
-that are going to be documented below:
-
-DMC|---------------VPU (Video Processing Unit)----------------|------HHI------|
- | vd1 _______ _____________ _________________ | |
-D |-------| |----| | | | | HDMI PLL |
-D | vd2 | VIU | | Video Post | | Video Encoders |<---|-----VCLK |
-R |-------| |----| Processing | | | | |
- | osd2 | | | |---| Enci ----------|----|-----VDAC------|
-R |-------| CSC |----| Scalers | | Encp ----------|----|----HDMI-TX----|
-A | osd1 | | | Blenders | | Encl ----------|----|---------------|
-M |-------|______|----|____________| |________________| | |
-___|__________________________________________________________|_______________|
-
-
-VIU: Video Input Unit
----------------------
-
-The Video Input Unit is in charge of the pixel scanout from the DDR memory.
-It fetches the frames addresses, stride and parameters from the "Canvas" memory.
-This part is also in charge of the CSC (Colorspace Conversion).
-It can handle 2 OSD Planes and 2 Video Planes.
-
-VPP: Video Post Processing
---------------------------
-
-The Video Post Processing is in charge of the scaling and blending of the
-various planes into a single pixel stream.
-There is a special "pre-blending" used by the video planes with a dedicated
-scaler and a "post-blending" to merge with the OSD Planes.
-The OSD planes also have a dedicated scaler for one of the OSD.
-
-VENC: Video Encoders
---------------------
-
-The VENC is composed of the multiple pixel encoders :
- - ENCI : Interlace Video encoder for CVBS and Interlace HDMI
- - ENCP : Progressive Video Encoder for HDMI
- - ENCL : LCD LVDS Encoder
-The VENC Unit gets a Pixel Clocks (VCLK) from a dedicated HDMI PLL and clock
-tree and provides the scanout clock to the VPP and VIU.
-The ENCI is connected to a single VDAC for Composite Output.
-The ENCI and ENCP are connected to an on-chip HDMI Transceiver.
-
-Device Tree Bindings:
----------------------
-
-VPU: Video Processing Unit
---------------------------
-
-Required properties:
-- compatible: value should be different for each SoC family as :
- - GXBB (S905) : "amlogic,meson-gxbb-vpu"
- - GXL (S905X, S905D) : "amlogic,meson-gxl-vpu"
- - GXM (S912) : "amlogic,meson-gxm-vpu"
- followed by the common "amlogic,meson-gx-vpu"
-- reg: base address and size of he following memory-mapped regions :
- - vpu
- - hhi
- - dmc
-- reg-names: should contain the names of the previous memory regions
-- interrupts: should contain the VENC Vsync interrupt number
-
-Optional properties:
-- power-domains: Optional phandle to associated power domain as described in
- the file ../power/power_domain.txt
-
-Required nodes:
-
-The connections to the VPU output video ports are modeled using the OF graph
-bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-The following table lists for each supported model the port number
-corresponding to each VPU output.
-
- Port 0 Port 1
------------------------------------------
- S905 (GXBB) CVBS VDAC HDMI-TX
- S905X (GXL) CVBS VDAC HDMI-TX
- S905D (GXL) CVBS VDAC HDMI-TX
- S912 (GXM) CVBS VDAC HDMI-TX
-
-Example:
-
-tv-connector {
- compatible = "composite-video-connector";
-
- port {
- tv_connector_in: endpoint {
- remote-endpoint = <&cvbs_vdac_out>;
- };
- };
-};
-
-vpu: vpu@d0100000 {
- compatible = "amlogic,meson-gxbb-vpu";
- reg = <0x0 0xd0100000 0x0 0x100000>,
- <0x0 0xc883c000 0x0 0x1000>,
- <0x0 0xc8838000 0x0 0x1000>;
- reg-names = "vpu", "hhi", "dmc";
- interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* CVBS VDAC output port */
- port@0 {
- reg = <0>;
-
- cvbs_vdac_out: endpoint {
- remote-endpoint = <&tv_connector_in>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
new file mode 100644
index 000000000000..cb0a90f02321
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/amlogic,meson-vpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson Display Controller
+
+maintainers:
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+ The Amlogic Meson Display controller is composed of several components
+ that are going to be documented below
+
+ DMC|---------------VPU (Video Processing Unit)----------------|------HHI------|
+ | vd1 _______ _____________ _________________ | |
+ D |-------| |----| | | | | HDMI PLL |
+ D | vd2 | VIU | | Video Post | | Video Encoders |<---|-----VCLK |
+ R |-------| |----| Processing | | | | |
+ | osd2 | | | |---| Enci ----------|----|-----VDAC------|
+ R |-------| CSC |----| Scalers | | Encp ----------|----|----HDMI-TX----|
+ A | osd1 | | | Blenders | | Encl ----------|----|---------------|
+ M |-------|______|----|____________| |________________| | |
+ ___|__________________________________________________________|_______________|
+
+
+ VIU: Video Input Unit
+ ---------------------
+
+ The Video Input Unit is in charge of the pixel scanout from the DDR memory.
+ It fetches the frames addresses, stride and parameters from the "Canvas" memory.
+ This part is also in charge of the CSC (Colorspace Conversion).
+ It can handle 2 OSD Planes and 2 Video Planes.
+
+ VPP: Video Post Processing
+ --------------------------
+
+ The Video Post Processing is in charge of the scaling and blending of the
+ various planes into a single pixel stream.
+ There is a special "pre-blending" used by the video planes with a dedicated
+ scaler and a "post-blending" to merge with the OSD Planes.
+ The OSD planes also have a dedicated scaler for one of the OSD.
+
+ VENC: Video Encoders
+ --------------------
+
+ The VENC is composed of the multiple pixel encoders
+ - ENCI : Interlace Video encoder for CVBS and Interlace HDMI
+ - ENCP : Progressive Video Encoder for HDMI
+ - ENCL : LCD LVDS Encoder
+ The VENC Unit gets a Pixel Clocks (VCLK) from a dedicated HDMI PLL and clock
+ tree and provides the scanout clock to the VPP and VIU.
+ The ENCI is connected to a single VDAC for Composite Output.
+ The ENCI and ENCP are connected to an on-chip HDMI Transceiver.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - amlogic,meson-gxbb-vpu # GXBB (S905)
+ - amlogic,meson-gxl-vpu # GXL (S905X, S905D)
+ - amlogic,meson-gxm-vpu # GXM (S912)
+ - const: amlogic,meson-gx-vpu
+ - enum:
+ - amlogic,meson-g12a-vpu # G12A (S905X2, S905Y2, S905D2)
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: vpu
+ - const: hhi
+
+ interrupts:
+ maxItems: 1
+
+ amlogic,canvas:
+ description: should point to a canvas provider node
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ power-domains:
+ maxItems: 1
+ description: phandle to the associated power domain
+
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the CVBS VDAC port node.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the HDMI-TX port node.
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the DPI port node (e.g. DSI or LVDS transceiver).
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - port@0
+ - port@1
+ - "#address-cells"
+ - "#size-cells"
+ - amlogic,canvas
+
+additionalProperties: false
+
+examples:
+ - |
+ vpu: vpu@d0100000 {
+ compatible = "amlogic,meson-gxbb-vpu", "amlogic,meson-gx-vpu";
+ reg = <0xd0100000 0x100000>, <0xc883c000 0x1000>;
+ reg-names = "vpu", "hhi";
+ interrupts = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ amlogic,canvas = <&canvas>;
+
+ /* CVBS VDAC output port */
+ port@0 {
+ reg = <0>;
+
+ cvbs_vdac_out: endpoint {
+ remote-endpoint = <&tv_connector_in>;
+ };
+ };
+
+ /* HDMI TX output port */
+ port@1 {
+ reg = <1>;
+
+ hdmi_tx_out: endpoint {
+ remote-endpoint = <&hdmi_tx_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/apple,h7-display-pipe-mipi.yaml b/Documentation/devicetree/bindings/display/apple,h7-display-pipe-mipi.yaml
new file mode 100644
index 000000000000..5e6da66499a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/apple,h7-display-pipe-mipi.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/apple,h7-display-pipe-mipi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple pre-DCP display controller MIPI interface
+
+maintainers:
+ - Sasha Finkelstein <fnkl.kernel@gmail.com>
+
+description:
+ The MIPI controller part of the pre-DCP Apple display controller
+
+allOf:
+ - $ref: dsi-controller.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - apple,t8112-display-pipe-mipi
+ - apple,t8103-display-pipe-mipi
+ - const: apple,h7-display-pipe-mipi
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input port. Always connected to the primary controller
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Output MIPI DSI port to the panel
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ dsi@28200000 {
+ compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi";
+ reg = <0x28200000 0xc000>;
+ power-domains = <&ps_dispdfr_mipi>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dfr_adp_out_mipi: endpoint {
+ remote-endpoint = <&dfr_adp_out_mipi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ dfr_panel_in: endpoint {
+ remote-endpoint = <&dfr_mipi_out_panel>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/apple,h7-display-pipe.yaml b/Documentation/devicetree/bindings/display/apple,h7-display-pipe.yaml
new file mode 100644
index 000000000000..102fb1804c0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/apple,h7-display-pipe.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/apple,h7-display-pipe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple pre-DCP display controller
+
+maintainers:
+ - Sasha Finkelstein <fnkl.kernel@gmail.com>
+
+description:
+ A secondary display controller used to drive the "touchbar" on
+ certain Apple laptops.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - apple,t8112-display-pipe
+ - apple,t8103-display-pipe
+ - const: apple,h7-display-pipe
+
+ reg:
+ items:
+ - description: Primary register block, controls planes and blending
+ - description:
+ Contains other configuration registers like interrupt
+ and FIFO control
+
+ reg-names:
+ items:
+ - const: be
+ - const: fe
+
+ power-domains:
+ description:
+ Phandles to pmgr entries that are needed for this controller to turn on.
+ Aside from that, their specific functions are unknown
+ maxItems: 2
+
+ interrupts:
+ items:
+ - description: Unknown function
+ - description: Primary interrupt. Vsync events are reported via it
+
+ interrupt-names:
+ items:
+ - const: be
+ - const: fe
+
+ iommus:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Output port. Always connected to apple,h7-display-pipe-mipi
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/apple-aic.h>
+ display-pipe@28200000 {
+ compatible = "apple,t8103-display-pipe", "apple,h7-display-pipe";
+ reg = <0x28200000 0xc000>,
+ <0x28400000 0x4000>;
+ reg-names = "be", "fe";
+ power-domains = <&ps_dispdfr_fe>, <&ps_dispdfr_be>;
+ interrupt-parent = <&aic>;
+ interrupts = <AIC_IRQ 502 IRQ_TYPE_LEVEL_HIGH>,
+ <AIC_IRQ 506 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "be", "fe";
+ iommus = <&displaydfr_dart 0>;
+
+ port {
+ dfr_adp_out_mipi: endpoint {
+ remote-endpoint = <&dfr_mipi_in_adp>;
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/arm,hdlcd.txt b/Documentation/devicetree/bindings/display/arm,hdlcd.txt
deleted file mode 100644
index 78bc24296f3e..000000000000
--- a/Documentation/devicetree/bindings/display/arm,hdlcd.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-ARM HDLCD
-
-This is a display controller found on several development platforms produced
-by ARM Ltd and in more modern of its' Fast Models. The HDLCD is an RGB
-streamer that reads the data from a framebuffer and sends it to a single
-digital encoder (DVI or HDMI).
-
-Required properties:
- - compatible: "arm,hdlcd"
- - reg: Physical base address and length of the controller's registers.
- - interrupts: One interrupt used by the display controller to notify the
- interrupt controller when any of the interrupt sources programmed in
- the interrupt mask register have activated.
- - clocks: A list of phandle + clock-specifier pairs, one for each
- entry in 'clock-names'.
- - clock-names: A list of clock names. For HDLCD it should contain:
- - "pxlclk" for the clock feeding the output PLL of the controller.
-
-Required sub-nodes:
- - port: The HDLCD connection to an encoder chip. The connection is modeled
- using the OF graph bindings specified in
- Documentation/devicetree/bindings/graph.txt.
-
-Optional properties:
- - memory-region: phandle to a node describing memory (see
- Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt) to be
- used for the framebuffer; if not present, the framebuffer may be located
- anywhere in memory.
-
-
-Example:
-
-/ {
- ...
-
- hdlcd@2b000000 {
- compatible = "arm,hdlcd";
- reg = <0 0x2b000000 0 0x1000>;
- interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&oscclk5>;
- clock-names = "pxlclk";
- port {
- hdlcd_output: endpoint@0 {
- remote-endpoint = <&hdmi_enc_input>;
- };
- };
- };
-
- /* HDMI encoder on I2C bus */
- i2c@7ffa0000 {
- ....
- hdmi-transmitter@70 {
- compatible = ".....";
- reg = <0x70>;
- port@0 {
- hdmi_enc_input: endpoint {
- remote-endpoint = <&hdlcd_output>;
- };
-
- hdmi_enc_output: endpoint {
- remote-endpoint = <&hdmi_1_port>;
- };
- };
- };
-
- };
-
- hdmi1: connector@1 {
- compatible = "hdmi-connector";
- type = "a";
- port {
- hdmi_1_port: endpoint {
- remote-endpoint = <&hdmi_enc_output>;
- };
- };
- };
-
- ...
-};
diff --git a/Documentation/devicetree/bindings/display/arm,hdlcd.yaml b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml
new file mode 100644
index 000000000000..9a30e9005e8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/arm,hdlcd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm HDLCD display controller
+
+maintainers:
+ - Liviu Dudau <Liviu.Dudau@arm.com>
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The Arm HDLCD is a display controller found on several development platforms
+ produced by ARM Ltd and in more modern of its Fast Models. The HDLCD is an
+ RGB streamer that reads the data from a framebuffer and sends it to a single
+ digital encoder (DVI or HDMI).
+
+properties:
+ compatible:
+ const: arm,hdlcd
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-names:
+ const: pxlclk
+
+ clocks:
+ maxItems: 1
+ description: The input reference for the pixel clock.
+
+ memory-region:
+ maxItems: 1
+ description:
+ Phandle to a node describing memory to be used for the framebuffer.
+ If not present, the framebuffer may be located anywhere in memory.
+
+ iommus:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ unevaluatedProperties: false
+ description:
+ Output endpoint of the controller, connecting the LCD panel signals.
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - port
+
+examples:
+ - |
+ hdlcd@2b000000 {
+ compatible = "arm,hdlcd";
+ reg = <0x2b000000 0x1000>;
+ interrupts = <0 85 4>;
+ clocks = <&oscclk5>;
+ clock-names = "pxlclk";
+ port {
+ hdlcd_output: endpoint {
+ remote-endpoint = <&hdmi_enc_input>;
+ };
+ };
+ };
+
+ /* HDMI encoder on I2C bus */
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ hdmi-transmitter@70 {
+ compatible = "nxp,tda998x";
+ reg = <0x70>;
+ port {
+ hdmi_enc_input: endpoint {
+ remote-endpoint = <&hdlcd_output>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/arm,komeda.yaml b/Documentation/devicetree/bindings/display/arm,komeda.yaml
new file mode 100644
index 000000000000..3ad3eef89ca8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/arm,komeda.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/arm,komeda.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Komeda display processor
+
+maintainers:
+ - Liviu Dudau <Liviu.Dudau@arm.com>
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The Arm Mali D71 display processor supports up to two displays with up
+ to a 4K resolution each. Each pipeline can be composed of up to four
+ layers. It is typically connected to a digital display connector like HDMI.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: arm,mali-d32
+ - const: arm,mali-d71
+ - const: arm,mali-d71
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-names:
+ const: aclk
+
+ clocks:
+ maxItems: 1
+ description: The main DPU processor clock
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ memory-region:
+ maxItems: 1
+ description:
+ Phandle to a node describing memory to be used for the framebuffer.
+ If not present, the framebuffer may be located anywhere in memory.
+
+ iommus:
+ description:
+ The stream IDs for each of the used pipelines, each four IDs for the
+ four layers, plus one for the write-back stream.
+ minItems: 5
+ maxItems: 10
+
+patternProperties:
+ '^pipeline@[01]$':
+ type: object
+ additionalProperties: false
+ description:
+ clocks
+
+ properties:
+ reg:
+ enum: [ 0, 1 ]
+
+ clock-names:
+ const: pxclk
+
+ clocks:
+ maxItems: 1
+ description: The input reference for the pixel clock.
+
+ port:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+
+additionalProperties: false
+
+required:
+ - "#address-cells"
+ - "#size-cells"
+ - compatible
+ - reg
+ - interrupts
+ - clock-names
+ - clocks
+ - pipeline@0
+
+examples:
+ - |
+ display@c00000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "arm,mali-d71";
+ reg = <0xc00000 0x20000>;
+ interrupts = <168>;
+ clocks = <&dpu_aclk>;
+ clock-names = "aclk";
+ iommus = <&smmu 0>, <&smmu 1>, <&smmu 2>, <&smmu 3>,
+ <&smmu 8>,
+ <&smmu 4>, <&smmu 5>, <&smmu 6>, <&smmu 7>,
+ <&smmu 9>;
+
+ dp0_pipe0: pipeline@0 {
+ clocks = <&fpgaosc2>;
+ clock-names = "pxclk";
+ reg = <0>;
+
+ port {
+ dp0_pipe0_out: endpoint {
+ remote-endpoint = <&db_dvi0_in>;
+ };
+ };
+ };
+
+ dp0_pipe1: pipeline@1 {
+ clocks = <&fpgaosc2>;
+ clock-names = "pxclk";
+ reg = <1>;
+
+ port {
+ dp0_pipe1_out: endpoint {
+ remote-endpoint = <&db_dvi1_in>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/arm,malidp.txt b/Documentation/devicetree/bindings/display/arm,malidp.txt
deleted file mode 100644
index 2f7870983ef1..000000000000
--- a/Documentation/devicetree/bindings/display/arm,malidp.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-ARM Mali-DP
-
-The following bindings apply to a family of Display Processors sold as
-licensable IP by ARM Ltd. The bindings describe the Mali DP500, DP550 and
-DP650 processors that offer multiple composition layers, support for
-rotation and scaling output.
-
-Required properties:
- - compatible: should be one of
- "arm,mali-dp500"
- "arm,mali-dp550"
- "arm,mali-dp650"
- depending on the particular implementation present in the hardware
- - reg: Physical base address and size of the block of registers used by
- the processor.
- - interrupts: Interrupt list, as defined in ../interrupt-controller/interrupts.txt,
- interrupt client nodes.
- - interrupt-names: name of the engine inside the processor that will
- use the corresponding interrupt. Should be one of "DE" or "SE".
- - clocks: A list of phandle + clock-specifier pairs, one for each entry
- in 'clock-names'
- - clock-names: A list of clock names. It should contain:
- - "pclk": for the APB interface clock
- - "aclk": for the AXI interface clock
- - "mclk": for the main processor clock
- - "pxlclk": for the pixel clock feeding the output PLL of the processor.
- - arm,malidp-output-port-lines: Array of u8 values describing the number
- of output lines per channel (R, G and B).
-
-Required sub-nodes:
- - port: The Mali DP connection to an encoder input port. The connection
- is modelled using the OF graph bindings specified in
- Documentation/devicetree/bindings/graph.txt
-
-Optional properties:
- - memory-region: phandle to a node describing memory (see
- Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
- to be used for the framebuffer; if not present, the framebuffer may
- be located anywhere in memory.
-
-
-Example:
-
-/ {
- ...
-
- dp0: malidp@6f200000 {
- compatible = "arm,mali-dp650";
- reg = <0 0x6f200000 0 0x20000>;
- memory-region = <&display_reserved>;
- interrupts = <0 168 IRQ_TYPE_LEVEL_HIGH>,
- <0 168 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "DE", "SE";
- clocks = <&oscclk2>, <&fpgaosc0>, <&fpgaosc1>, <&fpgaosc1>;
- clock-names = "pxlclk", "mclk", "aclk", "pclk";
- arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
- port {
- dp0_output: endpoint {
- remote-endpoint = <&tda998x_2_input>;
- };
- };
- };
-
- ...
-};
diff --git a/Documentation/devicetree/bindings/display/arm,malidp.yaml b/Documentation/devicetree/bindings/display/arm,malidp.yaml
new file mode 100644
index 000000000000..91812573fd08
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/arm,malidp.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/arm,malidp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Mali Display Processor (Mali-DP)
+
+maintainers:
+ - Liviu Dudau <Liviu.Dudau@arm.com>
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The following bindings apply to a family of Display Processors sold as
+ licensable IP by ARM Ltd. The bindings describe the Mali DP500, DP550 and
+ DP650 processors that offer multiple composition layers, support for
+ rotation and scaling output.
+
+properties:
+ compatible:
+ enum:
+ - arm,mali-dp500
+ - arm,mali-dp550
+ - arm,mali-dp650
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description:
+ The interrupt used by the Display Engine (DE). Can be shared with
+ the interrupt for the Scaling Engine (SE), but it will have to be
+ listed individually.
+ - description:
+ The interrupt used by the Scaling Engine (SE). Can be shared with
+ the interrupt for the Display Engine (DE), but it will have to be
+ listed individually.
+
+ interrupt-names:
+ items:
+ - const: DE
+ - const: SE
+
+ clock-names:
+ items:
+ - const: pxlclk
+ - const: mclk
+ - const: aclk
+ - const: pclk
+
+ clocks:
+ items:
+ - description: the pixel clock feeding the output PLL of the processor
+ - description: the main processor clock
+ - description: the AXI interface clock
+ - description: the APB interface clock
+
+ memory-region:
+ maxItems: 1
+ description:
+ Phandle to a node describing memory to be used for the framebuffer.
+ If not present, the framebuffer may be located anywhere in memory.
+
+ arm,malidp-output-port-lines:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ description:
+ Number of output lines/bits for each colour channel.
+ items:
+ - description: number of output lines for the red channel (R)
+ - description: number of output lines for the green channel (G)
+ - description: number of output lines for the blue channel (B)
+
+ arm,malidp-arqos-value:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Quality-of-Service value for the display engine FIFOs, to write
+ into the RQOS register of the DP500.
+ See the ARM Mali-DP500 TRM for details on the encoding.
+ If omitted, the RQOS register will not be changed.
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ unevaluatedProperties: false
+ description:
+ Output endpoint of the controller, connecting the LCD panel signals.
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+ - port
+ - arm,malidp-output-port-lines
+
+examples:
+ - |
+ dp0: malidp@6f200000 {
+ compatible = "arm,mali-dp650";
+ reg = <0x6f200000 0x20000>;
+ memory-region = <&display_reserved>;
+ interrupts = <168>, <168>;
+ interrupt-names = "DE", "SE";
+ clocks = <&oscclk2>, <&fpgaosc0>, <&fpgaosc1>, <&fpgaosc1>;
+ clock-names = "pxlclk", "mclk", "aclk", "pclk";
+ arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
+ arm,malidp-arqos-value = <0xd000d000>;
+
+ port {
+ dp0_output: endpoint {
+ remote-endpoint = <&tda998x_2_input>;
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/arm,pl11x.txt b/Documentation/devicetree/bindings/display/arm,pl11x.txt
deleted file mode 100644
index ef89ab46b2c9..000000000000
--- a/Documentation/devicetree/bindings/display/arm,pl11x.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-* ARM PrimeCell Color LCD Controller PL110/PL111
-
-See also Documentation/devicetree/bindings/arm/primecell.txt
-
-Required properties:
-
-- compatible: must be one of:
- "arm,pl110", "arm,primecell"
- "arm,pl111", "arm,primecell"
-
-- reg: base address and size of the control registers block
-
-- interrupt-names: either the single entry "combined" representing a
- combined interrupt output (CLCDINTR), or the four entries
- "mbe", "vcomp", "lnbu", "fuf" representing the individual
- CLCDMBEINTR, CLCDVCOMPINTR, CLCDLNBUINTR, CLCDFUFINTR interrupts
-
-- interrupts: contains an interrupt specifier for each entry in
- interrupt-names
-
-- clock-names: should contain "clcdclk" and "apb_pclk"
-
-- clocks: contains phandle and clock specifier pairs for the entries
- in the clock-names property. See
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Optional properties:
-
-- memory-region: phandle to a node describing memory (see
- Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
- to be used for the framebuffer; if not present, the framebuffer
- may be located anywhere in the memory
-
-- max-memory-bandwidth: maximum bandwidth in bytes per second that the
- cell's memory interface can handle; if not present, the memory
- interface is fast enough to handle all possible video modes
-
-Required sub-nodes:
-
-- port: describes LCD panel signals, following the common binding
- for video transmitter interfaces; see
- Documentation/devicetree/bindings/media/video-interfaces.txt;
- when it is a TFT panel, the port's endpoint must define the
- following property:
-
- - arm,pl11x,tft-r0g0b0-pads: an array of three 32-bit values,
- defining the way CLD pads are wired up; first value
- contains index of the "CLD" external pin (pad) used
- as R0 (first bit of the red component), second value
- index of the pad used as G0, third value index of the
- pad used as B0, see also "LCD panel signal multiplexing
- details" paragraphs in the PL110/PL111 Technical
- Reference Manuals; this implicitly defines available
- color modes, for example:
- - PL111 TFT 4:4:4 panel:
- arm,pl11x,tft-r0g0b0-pads = <4 15 20>;
- - PL110 TFT (1:)5:5:5 panel:
- arm,pl11x,tft-r0g0b0-pads = <1 7 13>;
- - PL111 TFT (1:)5:5:5 panel:
- arm,pl11x,tft-r0g0b0-pads = <3 11 19>;
- - PL111 TFT 5:6:5 panel:
- arm,pl11x,tft-r0g0b0-pads = <3 10 19>;
- - PL110 and PL111 TFT 8:8:8 panel:
- arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
- - PL110 and PL111 TFT 8:8:8 panel, R & B components swapped:
- arm,pl11x,tft-r0g0b0-pads = <16 8 0>;
-
-
-Example:
-
- clcd@10020000 {
- compatible = "arm,pl111", "arm,primecell";
- reg = <0x10020000 0x1000>;
- interrupt-names = "combined";
- interrupts = <0 44 4>;
- clocks = <&oscclk1>, <&oscclk2>;
- clock-names = "clcdclk", "apb_pclk";
- max-memory-bandwidth = <94371840>; /* Bps, 1024x768@60 16bpp */
-
- port {
- clcd_pads: endpoint {
- remote-endpoint = <&clcd_panel>;
- arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
- };
- };
-
- };
-
- panel {
- compatible = "panel-dpi";
-
- port {
- clcd_panel: endpoint {
- remote-endpoint = <&clcd_pads>;
- };
- };
-
- panel-timing {
- clock-frequency = <25175000>;
- hactive = <640>;
- hback-porch = <40>;
- hfront-porch = <24>;
- hsync-len = <96>;
- vactive = <480>;
- vback-porch = <32>;
- vfront-porch = <11>;
- vsync-len = <2>;
- };
- };
diff --git a/Documentation/devicetree/bindings/display/arm,pl11x.yaml b/Documentation/devicetree/bindings/display/arm,pl11x.yaml
new file mode 100644
index 000000000000..a43c1c9d9113
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/arm,pl11x.yaml
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/arm,pl11x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm PrimeCell Color LCD Controller PL110/PL111
+
+maintainers:
+ - Liviu Dudau <Liviu.Dudau@arm.com>
+ - Andre Przywara <andre.przywara@arm.com>
+
+description:
+ The Arm Primcell PL010/PL111 is an LCD controller IP, than scans out
+ a framebuffer region in system memory, and creates timed signals for
+ a variety of LCD panels.
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - arm,pl110
+ - arm,pl111
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - arm,pl110
+ - arm,pl111
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ interrupt-names:
+ oneOf:
+ - const: combined
+ description:
+ The IP provides four individual interrupt lines, but also one
+ combined line. If the integration only connects this line to the
+ interrupt controller, this single interrupt is noted here.
+ - items:
+ - const: mbe # CLCDMBEINTR
+ - const: vcomp # CLCDVCOMPINTR
+ - const: lnbu # CLCDLNBUINTR
+ - const: fuf # CLCDFUFINTR
+
+ interrupts:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ items:
+ - const: clcdclk
+ - const: apb_pclk
+
+ clocks:
+ items:
+ - description: The CLCDCLK reference clock for the controller.
+ - description: The HCLK AHB slave clock for the register access.
+
+ memory-region:
+ maxItems: 1
+ description:
+ Phandle to a node describing memory to be used for the framebuffer.
+ If not present, the framebuffer may be located anywhere in memory.
+
+ max-memory-bandwidth:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Maximum bandwidth in bytes per second that the cell's memory interface
+ can handle.
+ If not present, the memory interface is fast enough to handle all
+ possible video modes.
+
+ resets:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ additionalProperties: false
+
+ description:
+ Output endpoint of the controller, connecting the LCD panel signals.
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ arm,pl11x,tft-r0g0b0-pads:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: index of CLD pad used for first red bit (R0)
+ - description: index of CLD pad used for first green bit (G0)
+ - description: index of CLD pad used for first blue bit (G0)
+ deprecated: true
+ description: |
+ DEPRECATED. An array of three 32-bit values, defining the way
+ CLD[23:0] pads are wired up.
+ The first value contains the index of the "CLD" external pin (pad)
+ used as R0 (first bit of the red component), the second value for
+ green, the third value for blue.
+ See also "LCD panel signal multiplexing details" paragraphs in the
+ PL110/PL111 Technical Reference Manuals.
+ This implicitly defines available color modes, for example:
+ - PL111 TFT 4:4:4 panel:
+ arm,pl11x,tft-r0g0b0-pads = <4 15 20>;
+ - PL110 TFT (1:)5:5:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <1 7 13>;
+ - PL111 TFT (1:)5:5:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <3 11 19>;
+ - PL111 TFT 5:6:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <3 10 19>;
+ - PL110 and PL111 TFT 8:8:8 panel:
+ arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
+ - PL110 and PL111 TFT 8:8:8 panel, R & B components swapped:
+ arm,pl11x,tft-r0g0b0-pads = <16 8 0>;
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - clock-names
+ - clocks
+ - port
+
+allOf:
+ - if:
+ properties:
+ interrupts:
+ minItems: 2
+ required:
+ - interrupts
+ then:
+ required:
+ - interrupt-names
+
+examples:
+ - |
+ clcd@10020000 {
+ compatible = "arm,pl111", "arm,primecell";
+ reg = <0x10020000 0x1000>;
+ interrupt-names = "combined";
+ interrupts = <44>;
+ clocks = <&oscclk1>, <&oscclk2>;
+ clock-names = "clcdclk", "apb_pclk";
+ max-memory-bandwidth = <94371840>; /* Bps, 1024x768@60 16bpp */
+
+ port {
+ clcd_pads: endpoint {
+ remote-endpoint = <&clcd_panel>;
+ };
+ };
+ };
+
+ panel {
+ compatible = "arm,rtsm-display";
+
+ port {
+ clcd_panel: endpoint {
+ remote-endpoint = <&clcd_pads>;
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/atmel,lcdc-display.yaml b/Documentation/devicetree/bindings/display/atmel,lcdc-display.yaml
new file mode 100644
index 000000000000..a5cf040ab4ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/atmel,lcdc-display.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/atmel,lcdc-display.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip's LCDC Display
+
+maintainers:
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+ - Dharma Balasubiramani <dharma.b@microchip.com>
+
+description:
+ The LCD Controller (LCDC) consists of logic for transferring LCD image data
+ from an external display buffer to a TFT LCD panel. The LCDC has one display
+ input buffer per layer that fetches pixels through the single bus host
+ interface and a look-up table to allow palletized display configurations. The
+ LCDC is programmable on a per layer basis, and supports different LCD
+ resolutions, window sizes, image formats and pixel depths.
+
+# We need a select here since this schema is applicable only for nodes with the
+# following properties
+
+select:
+ anyOf:
+ - required: [ 'atmel,dmacon' ]
+ - required: [ 'atmel,lcdcon2' ]
+ - required: [ 'atmel,guard-time' ]
+
+properties:
+ atmel,dmacon:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: dma controller configuration
+
+ atmel,lcdcon2:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: lcd controller configuration
+
+ atmel,guard-time:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: lcd guard time (Delay in frame periods)
+ maximum: 127
+
+ bits-per-pixel:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: lcd panel bit-depth.
+ enum: [1, 2, 4, 8, 16, 24, 32]
+
+ atmel,lcdcon-backlight:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: enable backlight
+
+ atmel,lcdcon-backlight-inverted:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: invert backlight PWM polarity
+
+ atmel,lcd-wiring-mode:
+ $ref: /schemas/types.yaml#/definitions/string
+ description: lcd wiring mode "RGB" or "BRG"
+ enum:
+ - RGB
+ - BRG
+
+ atmel,power-control-gpio:
+ description: gpio to power on or off the LCD (as many as needed)
+ maxItems: 1
+
+ display-timings:
+ $ref: panel/display-timings.yaml#
+
+required:
+ - atmel,dmacon
+ - atmel,lcdcon2
+ - atmel,guard-time
+ - bits-per-pixel
+
+additionalProperties: false
+
+examples:
+ - |
+ display: panel {
+ bits-per-pixel = <32>;
+ atmel,lcdcon-backlight;
+ atmel,dmacon = <0x1>;
+ atmel,lcdcon2 = <0x80008002>;
+ atmel,guard-time = <9>;
+ atmel,lcd-wiring-mode = "RGB";
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: timing0 {
+ clock-frequency = <9000000>;
+ hactive = <480>;
+ vactive = <272>;
+ hback-porch = <1>;
+ hfront-porch = <1>;
+ vback-porch = <40>;
+ vfront-porch = <1>;
+ hsync-len = <45>;
+ vsync-len = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/atmel,lcdc.txt b/Documentation/devicetree/bindings/display/atmel,lcdc.txt
deleted file mode 100644
index acb5a0132127..000000000000
--- a/Documentation/devicetree/bindings/display/atmel,lcdc.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-Atmel LCDC Framebuffer
------------------------------------------------------
-
-Required properties:
-- compatible :
- "atmel,at91sam9261-lcdc" ,
- "atmel,at91sam9263-lcdc" ,
- "atmel,at91sam9g10-lcdc" ,
- "atmel,at91sam9g45-lcdc" ,
- "atmel,at91sam9g45es-lcdc" ,
- "atmel,at91sam9rl-lcdc" ,
- "atmel,at32ap-lcdc"
-- reg : Should contain 1 register ranges(address and length).
- Can contain an additional register range(address and length)
- for fixed framebuffer memory. Useful for dedicated memories.
-- interrupts : framebuffer controller interrupt
-- display: a phandle pointing to the display node
-
-Required nodes:
-- display: a display node is required to initialize the lcd panel
- This should be in the board dts.
-- default-mode: a videomode within the display with timing parameters
- as specified below.
-
-Optional properties:
-- lcd-supply: Regulator for LCD supply voltage.
-
-Example:
-
- fb0: fb@00500000 {
- compatible = "atmel,at91sam9g45-lcdc";
- reg = <0x00500000 0x1000>;
- interrupts = <23 3 0>;
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_fb>;
- display = <&display0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- };
-
-Example for fixed framebuffer memory:
-
- fb0: fb@00500000 {
- compatible = "atmel,at91sam9263-lcdc";
- reg = <0x00700000 0x1000 0x70000000 0x200000>;
- [...]
- };
-
-Atmel LCDC Display
------------------------------------------------------
-Required properties (as per of_videomode_helper):
-
- - atmel,dmacon: dma controller configuration
- - atmel,lcdcon2: lcd controller configuration
- - atmel,guard-time: lcd guard time (Delay in frame periods)
- - bits-per-pixel: lcd panel bit-depth.
-
-Optional properties (as per of_videomode_helper):
- - atmel,lcdcon-backlight: enable backlight
- - atmel,lcdcon-backlight-inverted: invert backlight PWM polarity
- - atmel,lcd-wiring-mode: lcd wiring mode "RGB" or "BRG"
- - atmel,power-control-gpio: gpio to power on or off the LCD (as many as needed)
-
-Example:
- display0: display {
- bits-per-pixel = <32>;
- atmel,lcdcon-backlight;
- atmel,dmacon = <0x1>;
- atmel,lcdcon2 = <0x80008002>;
- atmel,guard-time = <9>;
- atmel,lcd-wiring-mode = <1>;
-
- display-timings {
- native-mode = <&timing0>;
- timing0: timing0 {
- clock-frequency = <9000000>;
- hactive = <480>;
- vactive = <272>;
- hback-porch = <1>;
- hfront-porch = <1>;
- vback-porch = <40>;
- vfront-porch = <1>;
- hsync-len = <45>;
- vsync-len = <1>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/atmel,lcdc.yaml b/Documentation/devicetree/bindings/display/atmel,lcdc.yaml
new file mode 100644
index 000000000000..1b6f7e395006
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/atmel,lcdc.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/atmel,lcdc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip's LCDC Framebuffer
+
+maintainers:
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+ - Dharma Balasubiramani <dharma.b@microchip.com>
+
+description:
+ The LCDC works with a framebuffer, which is a section of memory that contains
+ a complete frame of data representing pixel values for the display. The LCDC
+ reads the pixel data from the framebuffer and sends it to the LCD panel to
+ render the image.
+
+properties:
+ compatible:
+ enum:
+ - atmel,at91sam9261-lcdc
+ - atmel,at91sam9263-lcdc
+ - atmel,at91sam9g10-lcdc
+ - atmel,at91sam9g45-lcdc
+ - atmel,at91sam9g45es-lcdc
+ - atmel,at91sam9rl-lcdc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: hclk
+ - const: lcdc_clk
+
+ display:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: A phandle pointing to the display node.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - display
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/at91.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ fb@500000 {
+ compatible = "atmel,at91sam9g45-lcdc";
+ reg = <0x00500000 0x1000>;
+ interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fb>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 23>, <&pmc PMC_TYPE_PERIPHERAL 23>;
+ clock-names = "hclk", "lcdc_clk";
+ display = <&display>;
+ };
diff --git a/Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml b/Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml
new file mode 100644
index 000000000000..29ed42485de3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/atmel/atmel,hlcdc-display-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel's High LCD Controller (HLCDC)
+
+maintainers:
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+ - Alexandre Belloni <alexandre.belloni@bootlin.com>
+ - Claudiu Beznea <claudiu.beznea@tuxon.dev>
+
+description:
+ The LCD Controller (LCDC) consists of logic for transferring LCD image
+ data from an external display buffer to a TFT LCD panel. The LCDC has one
+ display input buffer per layer that fetches pixels through the single bus
+ host interface and a look-up table to allow palletized display
+ configurations.
+
+properties:
+ compatible:
+ const: atmel,hlcdc-display-controller
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Output endpoint of the controller, connecting the LCD panel signals.
+
+ properties:
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+ description:
+ Endpoint connecting the LCD panel signals.
+
+ properties:
+ bus-width:
+ enum: [ 12, 16, 18, 24 ]
+
+required:
+ - '#address-cells'
+ - '#size-cells'
+ - compatible
+ - port@0
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt b/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt
deleted file mode 100644
index 0398aec488ac..000000000000
--- a/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Device-Tree bindings for Atmel's HLCDC (High LCD Controller) DRM driver
-
-The Atmel HLCDC Display Controller is subdevice of the HLCDC MFD device.
-See ../../mfd/atmel-hlcdc.txt for more details.
-
-Required properties:
- - compatible: value should be "atmel,hlcdc-display-controller"
- - pinctrl-names: the pin control state names. Should contain "default".
- - pinctrl-0: should contain the default pinctrl states.
- - #address-cells: should be set to 1.
- - #size-cells: should be set to 0.
-
-Required children nodes:
- Children nodes are encoding available output ports and their connections
- to external devices using the OF graph reprensentation (see ../graph.txt).
- At least one port node is required.
-
-Optional properties in grandchild nodes:
- Any endpoint grandchild node may specify a desired video interface
- according to ../../media/video-interfaces.txt, specifically
- - bus-width: recognized values are <12>, <16>, <18> and <24>, and
- override any output mode selection heuristic, forcing "rgb444",
- "rgb565", "rgb666" and "rgb888" respectively.
-
-Example:
-
- hlcdc: hlcdc@f0030000 {
- compatible = "atmel,sama5d3-hlcdc";
- reg = <0xf0030000 0x2000>;
- interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
- clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
- clock-names = "periph_clk","sys_clk", "slow_clk";
-
- hlcdc-display-controller {
- compatible = "atmel,hlcdc-display-controller";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb888>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
-
- hlcdc_panel_output: endpoint@0 {
- reg = <0>;
- remote-endpoint = <&panel_input>;
- };
- };
- };
-
- hlcdc_pwm: hlcdc-pwm {
- compatible = "atmel,hlcdc-pwm";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_lcd_pwm>;
- #pwm-cells = <3>;
- };
- };
-
-Example 2: With a video interface override to force rgb565; as above
-but with these changes/additions:
-
- &hlcdc {
- hlcdc-display-controller {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb565>;
-
- port@0 {
- hlcdc_panel_output: endpoint@0 {
- bus-width = <16>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
deleted file mode 100644
index 26649b4c4dd8..000000000000
--- a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-Broadcom VC4 (VideoCore4) GPU
-
-The VC4 device present on the Raspberry Pi includes a display system
-with HDMI output and the HVS (Hardware Video Scaler) for compositing
-display planes.
-
-Required properties for VC4:
-- compatible: Should be "brcm,bcm2835-vc4" or "brcm,cygnus-vc4"
-
-Required properties for Pixel Valve:
-- compatible: Should be one of "brcm,bcm2835-pixelvalve0",
- "brcm,bcm2835-pixelvalve1", or "brcm,bcm2835-pixelvalve2"
-- reg: Physical base address and length of the PV's registers
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-
-Required properties for HVS:
-- compatible: Should be "brcm,bcm2835-hvs"
-- reg: Physical base address and length of the HVS's registers
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-
-Required properties for HDMI
-- compatible: Should be "brcm,bcm2835-hdmi"
-- reg: Physical base address and length of the two register ranges
- ("HDMI" and "HD", in that order)
-- interrupts: The interrupt numbers
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-- ddc: phandle of the I2C controller used for DDC EDID probing
-- clocks: a) hdmi: The HDMI state machine clock
- b) pixel: The pixel clock.
-
-Optional properties for HDMI:
-- hpd-gpios: The GPIO pin for HDMI hotplug detect (if it doesn't appear
- as an interrupt/status bit in the HDMI controller
- itself). See bindings/pinctrl/brcm,bcm2835-gpio.txt
-- dmas: Should contain one entry pointing to the DMA channel used to
- transfer audio data
-- dma-names: Should contain "audio-rx"
-
-Required properties for DPI:
-- compatible: Should be "brcm,bcm2835-dpi"
-- reg: Physical base address and length of the registers
-- clocks: a) core: The core clock the unit runs on
- b) pixel: The pixel clock that feeds the pixelvalve
-- port: Port node with a single endpoint connecting to the panel
- device, as defined in [1]
-
-Required properties for VEC:
-- compatible: Should be "brcm,bcm2835-vec"
-- reg: Physical base address and length of the registers
-- clocks: The core clock the unit runs on
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-
-Required properties for V3D:
-- compatible: Should be "brcm,bcm2835-v3d" or "brcm,cygnus-v3d"
-- reg: Physical base address and length of the V3D's registers
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-
-Optional properties for V3D:
-- clocks: The clock the unit runs on
-
-Required properties for DSI:
-- compatible: Should be "brcm,bcm2835-dsi0" or "brcm,bcm2835-dsi1"
-- reg: Physical base address and length of the DSI block's registers
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-- clocks: a) phy: The DSI PLL clock feeding the DSI analog PHY
- b) escape: The DSI ESC clock from CPRMAN
- c) pixel: The DSI pixel clock from CPRMAN
-- clock-output-names:
- The 3 clocks output from the DSI analog PHY: dsi[01]_byte,
- dsi[01]_ddr2, and dsi[01]_ddr
-
-Required properties for the TXP (writeback) block:
-- compatible: Should be "brcm,bcm2835-txp"
-- reg: Physical base address and length of the TXP block's registers
-- interrupts: The interrupt number
- See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
-
-[1] Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-pixelvalve@7e807000 {
- compatible = "brcm,bcm2835-pixelvalve2";
- reg = <0x7e807000 0x100>;
- interrupts = <2 10>; /* pixelvalve */
-};
-
-hvs@7e400000 {
- compatible = "brcm,bcm2835-hvs";
- reg = <0x7e400000 0x6000>;
- interrupts = <2 1>;
-};
-
-hdmi: hdmi@7e902000 {
- compatible = "brcm,bcm2835-hdmi";
- reg = <0x7e902000 0x600>,
- <0x7e808000 0x100>;
- interrupts = <2 8>, <2 9>;
- ddc = <&i2c2>;
- hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
- clocks = <&clocks BCM2835_PLLH_PIX>,
- <&clocks BCM2835_CLOCK_HSM>;
- clock-names = "pixel", "hdmi";
-};
-
-dpi: dpi@7e208000 {
- compatible = "brcm,bcm2835-dpi";
- reg = <0x7e208000 0x8c>;
- clocks = <&clocks BCM2835_CLOCK_VPU>,
- <&clocks BCM2835_CLOCK_DPI>;
- clock-names = "core", "pixel";
- #address-cells = <1>;
- #size-cells = <0>;
-
- port {
- dpi_out: endpoint@0 {
- remote-endpoint = <&panel_in>;
- };
- };
-};
-
-dsi1: dsi@7e700000 {
- compatible = "brcm,bcm2835-dsi1";
- reg = <0x7e700000 0x8c>;
- interrupts = <2 12>;
- #address-cells = <1>;
- #size-cells = <0>;
- #clock-cells = <1>;
-
- clocks = <&clocks BCM2835_PLLD_DSI1>,
- <&clocks BCM2835_CLOCK_DSI1E>,
- <&clocks BCM2835_CLOCK_DSI1P>;
- clock-names = "phy", "escape", "pixel";
-
- clock-output-names = "dsi1_byte", "dsi1_ddr2", "dsi1_ddr";
-
- pitouchscreen: panel@0 {
- compatible = "raspberrypi,touchscreen";
- reg = <0>;
-
- <...>
- };
-};
-
-vec: vec@7e806000 {
- compatible = "brcm,bcm2835-vec";
- reg = <0x7e806000 0x1000>;
- clocks = <&clocks BCM2835_CLOCK_VEC>;
- interrupts = <2 27>;
-};
-
-v3d: v3d@7ec00000 {
- compatible = "brcm,bcm2835-v3d";
- reg = <0x7ec00000 0x1000>;
- interrupts = <1 10>;
-};
-
-vc4: gpu {
- compatible = "brcm,bcm2835-vc4";
-};
-
-panel: panel {
- compatible = "ontat,yx700wv03", "simple-panel";
-
- port {
- panel_in: endpoint {
- remote-endpoint = <&dpi_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
new file mode 100644
index 000000000000..6d11f5955b51
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2711-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2711 HDMI Controller
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2711-hdmi0
+ - brcm,bcm2711-hdmi1
+ - brcm,bcm2712-hdmi0
+ - brcm,bcm2712-hdmi1
+
+ reg:
+ items:
+ - description: HDMI controller register range
+ - description: DVP register range
+ - description: HDMI PHY register range
+ - description: Rate Manager register range
+ - description: Packet RAM register range
+ - description: Metadata RAM register range
+ - description: CSC register range
+ - description: CEC register range
+ - description: HD register range
+
+ reg-names:
+ items:
+ - const: hdmi
+ - const: dvp
+ - const: phy
+ - const: rm
+ - const: packet
+ - const: metadata
+ - const: csc
+ - const: cec
+ - const: hd
+
+ clocks:
+ items:
+ - description: The HDMI state machine clock
+ - description: The Pixel BVB clock
+ - description: The HDMI Audio parent clock
+ - description: The HDMI CEC parent clock
+
+ clock-names:
+ items:
+ - const: hdmi
+ - const: bvb
+ - const: audio
+ - const: cec
+
+ interrupts:
+ items:
+ - description: CEC TX interrupt
+ - description: CEC RX interrupt
+ - description: CEC stuck at low interrupt
+ - description: Wake-up interrupt
+ - description: Hotplug connected interrupt
+ - description: Hotplug removed interrupt
+
+ interrupt-names:
+ items:
+ - const: cec-tx
+ - const: cec-rx
+ - const: cec-low
+ - const: wakeup
+ - const: hpd-connected
+ - const: hpd-removed
+
+ ddc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: >
+ Phandle of the I2C controller used for DDC EDID probing
+
+ hpd-gpios:
+ maxItems: 1
+ description: >
+ The GPIO pin for the HDMI hotplug detect (if it doesn't appear
+ as an interrupt/status bit in the HDMI controller itself)
+
+ dmas:
+ maxItems: 1
+ description: >
+ Should contain one entry pointing to the DMA channel used to
+ transfer audio data.
+
+ dma-names:
+ const: audio-rx
+
+ resets:
+ maxItems: 1
+
+ wifi-2.4ghz-coexistence:
+ type: boolean
+ description: >
+ Should the pixel frequencies in the WiFi frequencies range be
+ avoided?
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - resets
+ - ddc
+
+additionalProperties: false
+
+examples:
+ - |
+ hdmi0: hdmi@7ef00700 {
+ compatible = "brcm,bcm2711-hdmi0";
+ reg = <0x7ef00700 0x300>,
+ <0x7ef00300 0x200>,
+ <0x7ef00f00 0x80>,
+ <0x7ef00f80 0x80>,
+ <0x7ef01b00 0x200>,
+ <0x7ef01f00 0x400>,
+ <0x7ef00200 0x80>,
+ <0x7ef04300 0x100>,
+ <0x7ef20000 0x100>;
+ reg-names = "hdmi",
+ "dvp",
+ "phy",
+ "rm",
+ "packet",
+ "metadata",
+ "csc",
+ "cec",
+ "hd";
+ clocks = <&firmware_clocks 13>, <&firmware_clocks 14>, <&dvp 1>, <&clk_27MHz>;
+ clock-names = "hdmi", "bvb", "audio", "cec";
+ resets = <&dvp 0>;
+ ddc = <&ddc0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-dpi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-dpi.yaml
new file mode 100644
index 000000000000..c9ad0ecc9b6d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-dpi.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-dpi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) DPI Controller
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ const: brcm,bcm2835-dpi
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The core clock the unit runs on
+ - description: The pixel clock that feeds the pixelvalve
+
+ clock-names:
+ items:
+ - const: core
+ - const: pixel
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Port node with a single endpoint connecting to the panel.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+
+ dpi: dpi@7e208000 {
+ compatible = "brcm,bcm2835-dpi";
+ reg = <0x7e208000 0x8c>;
+ clocks = <&clocks BCM2835_CLOCK_VPU>,
+ <&clocks BCM2835_CLOCK_DPI>;
+ clock-names = "core", "pixel";
+
+ port {
+ dpi_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml
new file mode 100644
index 000000000000..c8b2459d64f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-dsi0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) DSI Controller
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+allOf:
+ - $ref: dsi-controller.yaml#
+
+properties:
+ "#clock-cells":
+ const: 1
+
+ compatible:
+ enum:
+ - brcm,bcm2711-dsi1
+ - brcm,bcm2835-dsi0
+ - brcm,bcm2835-dsi1
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The DSI PLL clock feeding the DSI analog PHY
+ - description: The DSI ESC clock
+ - description: The DSI pixel clock
+
+ clock-names:
+ items:
+ - const: phy
+ - const: escape
+ - const: pixel
+
+ clock-output-names: true
+ # FIXME: The meta-schemas don't seem to allow it for now
+ # items:
+ # - description: The DSI byte clock for the PHY
+ # - description: The DSI DDR2 clock
+ # - description: The DSI DDR clock
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - "#clock-cells"
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - clock-output-names
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+
+ dsi1: dsi@7e700000 {
+ compatible = "brcm,bcm2835-dsi1";
+ reg = <0x7e700000 0x8c>;
+ interrupts = <2 12>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #clock-cells = <1>;
+
+ clocks = <&clocks BCM2835_PLLD_DSI1>,
+ <&clocks BCM2835_CLOCK_DSI1E>,
+ <&clocks BCM2835_CLOCK_DSI1P>;
+ clock-names = "phy", "escape", "pixel";
+
+ clock-output-names = "dsi1_byte", "dsi1_ddr2", "dsi1_ddr";
+
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml
new file mode 100644
index 000000000000..48c8cad0d96d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) HDMI Controller
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ const: brcm,bcm2835-hdmi
+
+ reg:
+ items:
+ - description: HDMI register range
+ - description: HD register range
+
+ interrupts:
+ minItems: 2
+
+ clocks:
+ items:
+ - description: The pixel clock
+ - description: The HDMI state machine clock
+
+ clock-names:
+ items:
+ - const: pixel
+ - const: hdmi
+
+ ddc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: >
+ Phandle of the I2C controller used for DDC EDID probing
+
+ hpd-gpios:
+ maxItems: 1
+ description: >
+ The GPIO pin for the HDMI hotplug detect (if it doesn't appear
+ as an interrupt/status bit in the HDMI controller itself)
+
+ dmas:
+ maxItems: 1
+ description: >
+ Should contain one entry pointing to the DMA channel used to
+ transfer audio data.
+
+ dma-names:
+ const: audio-rx
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - ddc
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ hdmi: hdmi@7e902000 {
+ compatible = "brcm,bcm2835-hdmi";
+ reg = <0x7e902000 0x600>,
+ <0x7e808000 0x100>;
+ interrupts = <2 8>, <2 9>;
+ ddc = <&i2c2>;
+ hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
+ clocks = <&clocks BCM2835_PLLH_PIX>,
+ <&clocks BCM2835_CLOCK_HSM>;
+ clock-names = "pixel", "hdmi";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-hvs.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-hvs.yaml
new file mode 100644
index 000000000000..f91c9dce2a44
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-hvs.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-hvs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) Hardware Video Scaler
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2711-hvs
+ - brcm,bcm2712-hvs
+ - brcm,bcm2835-hvs
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description: Core Clock
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - brcm,bcm2711-hvs
+ - brcm,bcm2712-hvs
+
+then:
+ required:
+ - clocks
+
+examples:
+ - |
+ hvs@7e400000 {
+ compatible = "brcm,bcm2835-hvs";
+ reg = <0x7e400000 0x6000>;
+ interrupts = <2 1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-pixelvalve0.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-pixelvalve0.yaml
new file mode 100644
index 000000000000..6b5b1d3fbc0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-pixelvalve0.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-pixelvalve0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) PixelValve
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2835-pixelvalve0
+ - brcm,bcm2835-pixelvalve1
+ - brcm,bcm2835-pixelvalve2
+ - brcm,bcm2711-pixelvalve0
+ - brcm,bcm2711-pixelvalve1
+ - brcm,bcm2711-pixelvalve2
+ - brcm,bcm2711-pixelvalve3
+ - brcm,bcm2711-pixelvalve4
+ - brcm,bcm2712-pixelvalve0
+ - brcm,bcm2712-pixelvalve1
+ - brcm,bcm2712-pixelvalve2
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ pixelvalve@7e807000 {
+ compatible = "brcm,bcm2835-pixelvalve2";
+ reg = <0x7e807000 0x100>;
+ interrupts = <2 10>; /* pixelvalve */
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-txp.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-txp.yaml
new file mode 100644
index 000000000000..16f45afd2bad
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-txp.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-txp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) TXP (writeback) Controller
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2712-mop
+ - brcm,bcm2712-moplet
+ - brcm,bcm2835-txp
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ txp: txp@7e004000 {
+ compatible = "brcm,bcm2835-txp";
+ reg = <0x7e004000 0x20>;
+ interrupts = <1 11>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml
new file mode 100644
index 000000000000..c55a8217de25
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-v3d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) V3D GPU
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2835-v3d
+ - brcm,cygnus-v3d
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ v3d: v3d@7ec00000 {
+ compatible = "brcm,bcm2835-v3d";
+ reg = <0x7ec00000 0x1000>;
+ interrupts = <1 10>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-vc4.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-vc4.yaml
new file mode 100644
index 000000000000..2aa9d5d2afff
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-vc4.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-vc4.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) GPU
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+description: >
+ The VC4 device present on the Raspberry Pi includes a display system
+ with HDMI output and the HVS (Hardware Video Scaler) for compositing
+ display planes.
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2711-vc5
+ - brcm,bcm2712-vc6
+ - brcm,bcm2835-vc4
+ - brcm,cygnus-vc4
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ vc4: gpu {
+ compatible = "brcm,bcm2835-vc4";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml
new file mode 100644
index 000000000000..5d921e30394e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/brcm,bcm2835-vec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom VC4 (VideoCore4) VEC
+
+maintainers:
+ - Eric Anholt <eric@anholt.net>
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2711-vec
+ - brcm,bcm2835-vec
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+
+ vec: vec@7e806000 {
+ compatible = "brcm,bcm2835-vec";
+ reg = <0x7e806000 0x1000>;
+ clocks = <&clocks BCM2835_CLOCK_VEC>;
+ interrupts = <2 27>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt
deleted file mode 100644
index a6b2b2b8f3d9..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Analog Device ADV7123 Video DAC
--------------------------------
-
-The ADV7123 is a digital-to-analog converter that outputs VGA signals from a
-parallel video input.
-
-Required properties:
-
-- compatible: Should be "adi,adv7123"
-
-Optional properties:
-
-- psave-gpios: Power save control GPIO
-
-Required nodes:
-
-The ADV7123 has two video ports. Their connections are modeled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for DPI input
-- Video port 1 for VGA output
-
-
-Example
--------
-
- adv7123: encoder@0 {
- compatible = "adi,adv7123";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- adv7123_in: endpoint@0 {
- remote-endpoint = <&dpi_out>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- adv7123_out: endpoint@0 {
- remote-endpoint = <&vga_connector_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
deleted file mode 100644
index 2c887536258c..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
+++ /dev/null
@@ -1,142 +0,0 @@
-Analog Device ADV7511(W)/13/33 HDMI Encoders
------------------------------------------
-
-The ADV7511, ADV7511W, ADV7513 and ADV7533 are HDMI audio and video transmitters
-compatible with HDMI 1.4 and DVI 1.0. They support color space conversion,
-S/PDIF, CEC and HDCP. ADV7533 supports the DSI interface for input pixels, while
-the others support RGB interface.
-
-Required properties:
-
-- compatible: Should be one of:
- "adi,adv7511"
- "adi,adv7511w"
- "adi,adv7513"
- "adi,adv7533"
-
-- reg: I2C slave addresses
- The ADV7511 internal registers are split into four pages exposed through
- different I2C addresses, creating four register maps. Each map has it own
- I2C address and acts as a standard slave device on the I2C bus. The main
- address is mandatory, others are optional and revert to defaults if not
- specified.
-
-
-The ADV7511 supports a large number of input data formats that differ by their
-color depth, color format, clock mode, bit justification and random
-arrangement of components on the data bus. The combination of the following
-properties describe the input and map directly to the video input tables of the
-ADV7511 datasheet that document all the supported combinations.
-
-- adi,input-depth: Number of bits per color component at the input (8, 10 or
- 12).
-- adi,input-colorspace: The input color space, one of "rgb", "yuv422" or
- "yuv444".
-- adi,input-clock: The input clock type, one of "1x" (one clock cycle per
- pixel), "2x" (two clock cycles per pixel), "ddr" (one clock cycle per pixel,
- data driven on both edges).
-
-The following input format properties are required except in "rgb 1x" and
-"yuv444 1x" modes, in which case they must not be specified.
-
-- adi,input-style: The input components arrangement variant (1, 2 or 3), as
- listed in the input format tables in the datasheet.
-- adi,input-justification: The input bit justification ("left", "evenly",
- "right").
-
-- avdd-supply: A 1.8V supply that powers up the AVDD pin on the chip.
-- dvdd-supply: A 1.8V supply that powers up the DVDD pin on the chip.
-- pvdd-supply: A 1.8V supply that powers up the PVDD pin on the chip.
-- dvdd-3v-supply: A 3.3V supply that powers up the pin called DVDD_3V
- on the chip.
-- bgvdd-supply: A 1.8V supply that powers up the BGVDD pin. This is
- needed only for ADV7511.
-
-The following properties are required for ADV7533:
-
-- adi,dsi-lanes: Number of DSI data lanes connected to the DSI host. It should
- be one of 1, 2, 3 or 4.
-- a2vdd-supply: 1.8V supply that powers up the A2VDD pin on the chip.
-- v3p3-supply: A 3.3V supply that powers up the V3P3 pin on the chip.
-- v1p2-supply: A supply that powers up the V1P2 pin on the chip. It can be
- either 1.2V or 1.8V.
-
-Optional properties:
-
-- interrupts: Specifier for the ADV7511 interrupt
-- pd-gpios: Specifier for the GPIO connected to the power down signal
-
-- adi,clock-delay: Video data clock delay relative to the pixel clock, in ps
- (-1200 ps .. 1600 ps). Defaults to no delay.
-- adi,embedded-sync: The input uses synchronization signals embedded in the
- data stream (similar to BT.656). Defaults to separate H/V synchronization
- signals.
-- adi,disable-timing-generator: Only for ADV7533. Disables the internal timing
- generator. The chip will rely on the sync signals in the DSI data lanes,
- rather than generate its own timings for HDMI output.
-- clocks: from common clock binding: reference to the CEC clock.
-- clock-names: from common clock binding: must be "cec".
-- reg-names : Names of maps with programmable addresses.
- It can contain any map needing a non-default address.
- Possible maps names are : "main", "edid", "cec", "packet"
-
-Required nodes:
-
-The ADV7511 has two video ports. Their connections are modelled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for the RGB, YUV or DSI input. In the case of ADV7533, the
- remote endpoint phandle should be a reference to a valid mipi_dsi_host device
- node.
-- Video port 1 for the HDMI output
-- Audio port 2 for the HDMI audio input
-
-
-Example
--------
-
- adv7511w: hdmi@39 {
- compatible = "adi,adv7511w";
- /*
- * The EDID page will be accessible on address 0x66 on the I2C
- * bus. All other maps continue to use their default addresses.
- */
- reg = <0x39>, <0x66>;
- reg-names = "main", "edid";
- interrupt-parent = <&gpio3>;
- interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
- clocks = <&cec_clock>;
- clock-names = "cec";
-
- adi,input-depth = <8>;
- adi,input-colorspace = "rgb";
- adi,input-clock = "1x";
- adi,input-style = <1>;
- adi,input-justification = "evenly";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- adv7511w_in: endpoint {
- remote-endpoint = <&dpi_out>;
- };
- };
-
- port@1 {
- reg = <1>;
- adv7511_out: endpoint {
- remote-endpoint = <&hdmi_connector_in>;
- };
- };
-
- port@2 {
- reg = <2>;
- codec_endpoint: endpoint {
- remote-endpoint = <&i2s0_cpu_endpoint>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.yaml
new file mode 100644
index 000000000000..5bbe81862c8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.yaml
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/adi,adv7511.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADV7511/11W/13 HDMI Encoders
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+
+description: |
+ The ADV7511, ADV7511W and ADV7513 are HDMI audio and video
+ transmitters compatible with HDMI 1.4 and DVI 1.0. They support color
+ space conversion, S/PDIF, CEC and HDCP. The transmitter input is
+ parallel RGB or YUV data.
+
+properties:
+ compatible:
+ enum:
+ - adi,adv7511
+ - adi,adv7511w
+ - adi,adv7513
+
+ reg:
+ description: |
+ I2C slave addresses.
+
+ The ADV7511/11W/13 internal registers are split into four pages
+ exposed through different I2C addresses, creating four register
+ maps. Each map has it own I2C address and acts as a standard slave
+ device on the I2C bus. The main address is mandatory, others are
+ optional and revert to defaults if not specified.
+ minItems: 1
+ maxItems: 4
+
+ reg-names:
+ description:
+ Names of maps with programmable addresses. It can contain any map
+ needing a non-default address.
+ minItems: 1
+ items:
+ - const: main
+ - const: edid
+ - const: cec
+ - const: packet
+
+ clocks:
+ description: Reference to the CEC clock.
+ maxItems: 1
+
+ clock-names:
+ const: cec
+
+ interrupts:
+ maxItems: 1
+
+ pd-gpios:
+ description: GPIO connected to the power down signal.
+ maxItems: 1
+
+ avdd-supply:
+ description: A 1.8V supply that powers up the AVDD pin.
+
+ dvdd-supply:
+ description: A 1.8V supply that powers up the DVDD pin.
+
+ pvdd-supply:
+ description: A 1.8V supply that powers up the PVDD pin.
+
+ dvdd-3v-supply:
+ description: A 3.3V supply that powers up the DVDD_3V pin.
+
+ bgvdd-supply:
+ description: A 1.8V supply that powers up the BGVDD pin.
+
+ adi,input-depth:
+ description: Number of bits per color component at the input.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 8, 10, 12 ]
+
+ adi,input-colorspace:
+ description: Input color space.
+ enum: [ rgb, yuv422, yuv444 ]
+
+ adi,input-clock:
+ description: |
+ Input clock type.
+ "1x": one clock cycle per pixel
+ "2x": two clock cycles per pixel
+ "dd": one clock cycle per pixel, data driven on both edges
+ enum: [ 1x, 2x, dd ]
+
+ adi,clock-delay:
+ description:
+ Video data clock delay relative to the pixel clock, in ps
+ (-1200ps .. 1600 ps).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ adi,embedded-sync:
+ description:
+ If defined, the input uses synchronization signals embedded in the
+ data stream (similar to BT.656).
+ type: boolean
+
+ adi,input-style:
+ description:
+ Input components arrangement variant as listed in the input
+ format tables in the datasheet.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 1, 2, 3 ]
+
+ adi,input-justification:
+ description: Input bit justification.
+ enum: [ left, evenly, right ]
+
+ ports:
+ description:
+ The ADV7511(W)/13 has two video ports and one audio port.
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ description: Video port for the RGB or YUV input.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ port@1:
+ description: Video port for the HDMI output.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ port@2:
+ description: Audio port for the HDMI output.
+ $ref: /schemas/graph.yaml#/properties/port
+
+# adi,input-colorspace and adi,input-clock are required except in
+# "rgb 1x" and "yuv444 1x" modes, in which case they must not be
+# specified.
+if:
+ not:
+ properties:
+ adi,input-colorspace:
+ contains:
+ enum: [ rgb, yuv444 ]
+ adi,input-clock:
+ contains:
+ const: 1x
+
+then:
+ required:
+ - adi,input-style
+ - adi,input-justification
+
+else:
+ properties:
+ adi,input-style: false
+ adi,input-justification: false
+
+
+required:
+ - compatible
+ - reg
+ - ports
+ - adi,input-depth
+ - adi,input-colorspace
+ - adi,input-clock
+ - avdd-supply
+ - dvdd-supply
+ - pvdd-supply
+ - dvdd-3v-supply
+ - bgvdd-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c@e6500000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <0 0xe6500000>;
+
+ adv7511w: hdmi@39 {
+ compatible = "adi,adv7511w";
+ /*
+ * The EDID page will be accessible on address 0x66 on the I2C
+ * bus. All other maps continue to use their default addresses.
+ */
+ reg = <0x39>, <0x66>;
+ reg-names = "main", "edid";
+ interrupt-parent = <&gpio3>;
+ interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+ clocks = <&cec_clock>;
+ clock-names = "cec";
+ avdd-supply = <&v1v8>;
+ dvdd-supply = <&v1v8>;
+ pvdd-supply = <&v1v8>;
+ dvdd-3v-supply = <&v3v3>;
+ bgvdd-supply = <&v1v8>;
+
+ adi,input-depth = <8>;
+ adi,input-colorspace = "yuv422";
+ adi,input-clock = "1x";
+
+ adi,input-style = <3>;
+ adi,input-justification = "right";
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ adv7511w_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ adv7511_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ codec_endpoint: endpoint {
+ remote-endpoint = <&i2s0_cpu_endpoint>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml
new file mode 100644
index 000000000000..ec89115c74e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/adi,adv7533.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices ADV7533/35 HDMI Encoders
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+description: |
+ The ADV7533 and ADV7535 are HDMI audio and video transmitters
+ compatible with HDMI 1.4 and DVI 1.0. They support color space
+ conversion, S/PDIF, CEC and HDCP. The transmitter input is MIPI DSI.
+
+properties:
+ compatible:
+ enum:
+ - adi,adv7533
+ - adi,adv7535
+
+ reg:
+ description: |
+ I2C slave addresses.
+
+ The ADV7533/35 internal registers are split into four pages
+ exposed through different I2C addresses, creating four register
+ maps. Each map has it own I2C address and acts as a standard slave
+ device on the I2C bus. The main address is mandatory, others are
+ optional and revert to defaults if not specified.
+ minItems: 1
+ maxItems: 4
+
+ reg-names:
+ description:
+ Names of maps with programmable addresses. It can contain any map
+ needing a non-default address.
+ minItems: 1
+ items:
+ - const: main
+ - const: edid
+ - const: cec
+ - const: packet
+
+ clocks:
+ description: Reference to the CEC clock.
+ maxItems: 1
+
+ clock-names:
+ const: cec
+
+ interrupts:
+ maxItems: 1
+
+ pd-gpios:
+ description: GPIO connected to the power down signal.
+ maxItems: 1
+
+ avdd-supply:
+ description: A 1.8V supply that powers up the AVDD pin.
+
+ dvdd-supply:
+ description: A 1.8V supply that powers up the DVDD pin.
+
+ pvdd-supply:
+ description: A 1.8V supply that powers up the PVDD pin.
+
+ a2vdd-supply:
+ description: A 1.8V supply that powers up the A2VDD pin.
+
+ v3p3-supply:
+ description: A 3.3V supply that powers up the V3P3 pin.
+
+ v1p2-supply:
+ description:
+ A supply that powers up the V1P2 pin. It can be either 1.2V
+ or 1.8V for ADV7533 but only 1.8V for ADV7535.
+
+ adi,disable-timing-generator:
+ description:
+ Disables the internal timing generator. The chip will rely on the
+ sync signals in the DSI data lanes, rather than generating its own
+ timings for HDMI output.
+ type: boolean
+
+ adi,dsi-lanes:
+ description: Number of DSI data lanes connected to the DSI host.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 2, 3, 4 ]
+
+ "#sound-dai-cells":
+ const: 0
+
+ ports:
+ description:
+ The ADV7533/35 has two video ports and one audio port.
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ description:
+ Video port for the DSI input. The remote endpoint phandle
+ should be a reference to a valid mipi_dsi_host_device.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ port@1:
+ description: Video port for the HDMI output.
+ $ref: /schemas/graph.yaml#/properties/port
+
+ port@2:
+ description: Audio port for the HDMI output.
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - ports
+ - adi,dsi-lanes
+ - avdd-supply
+ - dvdd-supply
+ - pvdd-supply
+ - a2vdd-supply
+ - v3p3-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c@e6500000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <0 0xe6500000>;
+
+ adv7533: hdmi@39 {
+ compatible = "adi,adv7533";
+ /*
+ * The EDID page will be accessible on address 0x66 on the I2C
+ * bus. All other maps continue to use their default addresses.
+ */
+ reg = <0x39>, <0x66>;
+ reg-names = "main", "edid";
+ interrupt-parent = <&gpio3>;
+ interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+ clocks = <&cec_clock>;
+ clock-names = "cec";
+ adi,dsi-lanes = <4>;
+ avdd-supply = <&v1v8>;
+ dvdd-supply = <&v1v8>;
+ pvdd-supply = <&v1v8>;
+ a2vdd-supply = <&v1v8>;
+ v3p3-supply = <&v3v3>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ adv7533_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ adv7533_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ codec_endpoint: endpoint {
+ remote-endpoint = <&i2s0_cpu_endpoint>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
new file mode 100644
index 000000000000..a1ed1004651b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Analogix Semiconductor, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/analogix,anx7625.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analogix ANX7625 SlimPort (4K Mobile HD Transmitter)
+
+maintainers:
+ - Xin Ji <xji@analogixsemi.com>
+
+description: |
+ The ANX7625 is an ultra-low power 4K Mobile HD Transmitter
+ designed for portable devices.
+
+properties:
+ compatible:
+ const: analogix,anx7625
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: used for interrupt pin B8.
+ maxItems: 1
+
+ enable-gpios:
+ description: used for power on chip control, POWER_EN pin D2.
+ maxItems: 1
+
+ reset-gpios:
+ description: used for reset chip control, RESET_N pin B7.
+ maxItems: 1
+
+ vdd10-supply:
+ description: Regulator that provides the supply 1.0V power.
+
+ vdd18-supply:
+ description: Regulator that provides the supply 1.8V power.
+
+ vdd33-supply:
+ description: Regulator that provides the supply 3.3V power.
+
+ analogix,lane0-swing:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ minItems: 1
+ maxItems: 20
+ description:
+ an array of swing register setting for DP tx lane0 PHY.
+ Registers 0~9 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
+ Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
+ Swing1_Pre2, Swing0_Pre3, they are for [Boost control] and
+ [Swing control] setting.
+ Registers 0~9, bit 3:0 is [Boost control], these bits control
+ post cursor manual, increase the [Boost control] to increase
+ Pre-emphasis value.
+ Registers 0~9, bit 6:4 is [Swing control], these bits control
+ swing manual, increase [Swing control] setting to add Vp-p value
+ for each Swing, Pre.
+ Registers 10~19 are Swing0_Pre0, Swing1_Pre0, Swing2_Pre0,
+ Swing3_Pre0, Swing0_Pre1, Swing1_Pre1, Swing2_Pre1, Swing0_Pre2,
+ Swing1_Pre2, Swing0_Pre3, they are for [R select control] and
+ [R Termination control] setting.
+ Registers 10~19, bit 4:0 is [R select control], these bits are
+ compensation manual, increase it can enhance IO driven strength
+ and Vp-p.
+ Registers 10~19, bit 5:6 is [R termination control], these bits
+ adjust 50ohm impedance of DP tx termination. 00:55 ohm,
+ 01:50 ohm(default), 10:45 ohm, 11:40 ohm.
+
+ analogix,lane1-swing:
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ minItems: 1
+ maxItems: 20
+ description:
+ an array of swing register setting for DP tx lane1 PHY.
+ DP TX lane1 swing register setting same with lane0
+ swing, please refer lane0-swing property description.
+
+ analogix,audio-enable:
+ type: boolean
+ description: let the driver enable audio HDMI codec function or not.
+
+ aux-bus:
+ $ref: /schemas/display/dp-aux-bus.yaml#
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ MIPI DSI/DPI input.
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ type: object
+ additionalProperties: false
+
+ properties:
+ remote-endpoint: true
+
+ bus-type:
+ enum: [7]
+ default: 1
+
+ data-lanes: true
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for panel or connector.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - vdd10-supply
+ - vdd18-supply
+ - vdd33-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ encoder@58 {
+ compatible = "analogix,anx7625";
+ reg = <0x58>;
+ enable-gpios = <&pio 45 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&pio 73 GPIO_ACTIVE_HIGH>;
+ vdd10-supply = <&pp1000_mipibrdg>;
+ vdd18-supply = <&pp1800_mipibrdg>;
+ vdd33-supply = <&pp3300_mipibrdg>;
+ analogix,audio-enable;
+ analogix,lane0-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
+ analogix,lane1-swing = /bits/ 8 <0x14 0x54 0x64 0x74>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mipi2dp_bridge_in: port@0 {
+ reg = <0>;
+ anx7625_in: endpoint {
+ remote-endpoint = <&mipi_dsi>;
+ bus-type = <7>;
+ data-lanes = <0 1 2 3>;
+ };
+ };
+
+ mipi2dp_bridge_out: port@1 {
+ reg = <1>;
+ anx7625_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ aux-bus {
+ panel {
+ compatible = "innolux,n125hce-gn1";
+ power-supply = <&pp3300_disp_x>;
+ backlight = <&backlight_lcd0>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&anx7625_out>;
+ };
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
new file mode 100644
index 000000000000..4509c496731b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/analogix,anx7814.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analogix ANX7814 SlimPort (Full-HD Transmitter)
+
+maintainers:
+ - Andrzej Hajda <andrzej.hajda@intel.com>
+ - Neil Armstrong <neil.armstrong@linaro.org>
+ - Robert Foss <robert.foss@linaro.org>
+
+properties:
+ compatible:
+ enum:
+ - analogix,anx7808
+ - analogix,anx7812
+ - analogix,anx7814
+ - analogix,anx7816
+ - analogix,anx7818
+
+ reg:
+ maxItems: 1
+ description: I2C address of the device.
+
+ interrupts:
+ maxItems: 1
+ description: Should contain the INTP interrupt.
+
+ hpd-gpios:
+ deprecated: true
+ maxItems: 1
+ description: Which GPIO to use for hpd.
+
+ pd-gpios:
+ maxItems: 1
+ description: Which GPIO to use for power down.
+
+ reset-gpios:
+ maxItems: 1
+ description: Which GPIO to use for reset.
+
+ dvdd10-supply:
+ description: Regulator for 1.0V digital core power.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for HDMI input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for SlimPort, DisplayPort, eDP or MyDP output.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ anx7814: bridge@38 {
+ compatible = "analogix,anx7814";
+ reg = <0x38>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <99 IRQ_TYPE_LEVEL_LOW>; /* INTP */
+ pd-gpios = <&pio 33 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&pio 98 GPIO_ACTIVE_HIGH>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ anx7814_in: endpoint {
+ remote-endpoint = <&hdmi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ anx7814_out: endpoint {
+ remote-endpoint = <&edp_out>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,dp.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,dp.yaml
new file mode 100644
index 000000000000..62f0521b0924
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,dp.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/analogix,dp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analogix Display Port bridge
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+properties:
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks: true
+
+ clock-names: true
+
+ phys: true
+
+ phy-names:
+ const: dp
+
+ force-hpd:
+ type: boolean
+ description:
+ Indicate driver need force hpd when hpd detect failed, this
+ is used for some eDP screen which don not have a hpd signal.
+
+ hpd-gpios:
+ description:
+ Hotplug detect GPIO.
+ Indicates which GPIO should be used for hotplug detection
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Input node to receive pixel data.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Port node with one endpoint connected to a dp-connector node.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - reg
+ - interrupts
+ - clock-names
+ - clocks
+ - ports
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix_dp.txt b/Documentation/devicetree/bindings/display/bridge/analogix_dp.txt
deleted file mode 100644
index 027d76c27a41..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/analogix_dp.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-Analogix Display Port bridge bindings
-
-Required properties for dp-controller:
- -compatible:
- platform specific such as:
- * "samsung,exynos5-dp"
- * "rockchip,rk3288-dp"
- * "rockchip,rk3399-edp"
- -reg:
- physical base address of the controller and length
- of memory mapped region.
- -interrupts:
- interrupt combiner values.
- -clocks:
- from common clock binding: handle to dp clock.
- -clock-names:
- from common clock binding: Shall be "dp".
- -phys:
- from general PHY binding: the phandle for the PHY device.
- -phy-names:
- from general PHY binding: Should be "dp".
-
-Optional properties for dp-controller:
- -force-hpd:
- Indicate driver need force hpd when hpd detect failed, this
- is used for some eDP screen which don't have hpd signal.
- -hpd-gpios:
- Hotplug detect GPIO.
- Indicates which GPIO should be used for hotplug detection
- -port@[X]: SoC specific port nodes with endpoint definitions as defined
- in Documentation/devicetree/bindings/media/video-interfaces.txt,
- please refer to the SoC specific binding document:
- * Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
- * Documentation/devicetree/bindings/display/rockchip/analogix_dp-rockchip.txt
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
--------------------------------------------------------------------------------
-
-Example:
-
- dp-controller {
- compatible = "samsung,exynos5-dp";
- reg = <0x145b0000 0x10000>;
- interrupts = <10 3>;
- interrupt-parent = <&combiner>;
- clocks = <&clock 342>;
- clock-names = "dp";
-
- phys = <&dp_phy>;
- phy-names = "dp";
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
new file mode 100644
index 000000000000..514f58852990
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/anx6345.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analogix ANX6345 eDP Transmitter
+
+maintainers:
+ - Torsten Duwe <duwe@lst.de>
+
+description: |
+ The ANX6345 is an ultra-low power Full-HD eDP transmitter designed for
+ portable devices.
+
+properties:
+ compatible:
+ const: analogix,anx6345
+
+ reg:
+ maxItems: 1
+ description: base I2C address of the device
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active low reset
+
+ dvdd12-supply:
+ description: Regulator for 1.2V digital core power.
+
+ dvdd25-supply:
+ description: Regulator for 2.5V digital core power.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for LVTTL input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for eDP output (panel or connector).
+ May be omitted if EDID works reliably.
+
+ required:
+ - port@0
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - dvdd12-supply
+ - dvdd25-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ anx6345: anx6345@38 {
+ compatible = "analogix,anx6345";
+ reg = <0x38>;
+ reset-gpios = <&pio42 1 /* GPIO_ACTIVE_LOW */>;
+ dvdd25-supply = <&reg_dldo2>;
+ dvdd12-supply = <&reg_fldo1>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ anx6345_in: port@0 {
+ reg = <0>;
+ anx6345_in_tcon0: endpoint {
+ remote-endpoint = <&tcon0_out_anx6345>;
+ };
+ };
+
+ anx6345_out: port@1 {
+ reg = <1>;
+ anx6345_out_panel: endpoint {
+ remote-endpoint = <&panel_in_edp>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/anx7814.txt b/Documentation/devicetree/bindings/display/bridge/anx7814.txt
deleted file mode 100644
index dbd7c84ee584..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/anx7814.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Analogix ANX7814 SlimPort (Full-HD Transmitter)
------------------------------------------------
-
-The ANX7814 is an ultra-low power Full-HD (1080p60) SlimPort transmitter
-designed for portable devices.
-
-Required properties:
-
- - compatible : "analogix,anx7814"
- - reg : I2C address of the device
- - interrupts : Should contain the INTP interrupt
- - hpd-gpios : Which GPIO to use for hpd
- - pd-gpios : Which GPIO to use for power down
- - reset-gpios : Which GPIO to use for reset
-
-Optional properties:
-
- - dvdd10-supply : Regulator for 1.0V digital core power.
- - Video port for HDMI input, using the DT bindings defined in [1].
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
- anx7814: anx7814@38 {
- compatible = "analogix,anx7814";
- reg = <0x38>;
- interrupt-parent = <&gpio0>;
- interrupts = <99 IRQ_TYPE_LEVEL_LOW>; /* INTP */
- hpd-gpios = <&pio 36 GPIO_ACTIVE_HIGH>;
- pd-gpios = <&pio 33 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&pio 98 GPIO_ACTIVE_HIGH>;
- port {
- anx7814_in: endpoint {
- remote-endpoint = <&hdmi0_out>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
deleted file mode 100644
index f5725bb6c61c..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
+++ /dev/null
@@ -1,133 +0,0 @@
-Cadence DSI bridge
-==================
-
-The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes.
-
-Required properties:
-- compatible: should be set to "cdns,dsi".
-- reg: physical base address and length of the controller's registers.
-- interrupts: interrupt line connected to the DSI bridge.
-- clocks: DSI bridge clocks.
-- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk".
-- phys: phandle link to the MIPI D-PHY controller.
-- phy-names: must contain "dphy".
-- #address-cells: must be set to 1.
-- #size-cells: must be set to 0.
-
-Optional properties:
-- resets: DSI reset lines.
-- reset-names: can contain "dsi_p_rst".
-
-Required subnodes:
-- ports: Ports as described in Documentation/devicetree/bindings/graph.txt.
- 2 ports are available:
- * port 0: this port is only needed if some of your DSI devices are
- controlled through an external bus like I2C or SPI. Can have at
- most 4 endpoints. The endpoint number is directly encoding the
- DSI virtual channel used by this device.
- * port 1: represents the DPI input.
- Other ports will be added later to support the new kind of inputs.
-
-- one subnode per DSI device connected on the DSI bus. Each DSI device should
- contain a reg property encoding its virtual channel.
-
-Cadence DPHY
-============
-
-Cadence DPHY block.
-
-Required properties:
-- compatible: should be set to "cdns,dphy".
-- reg: physical base address and length of the DPHY registers.
-- clocks: DPHY reference clocks.
-- clock-names: must contain "psm" and "pll_ref".
-- #phy-cells: must be set to 0.
-
-
-Example:
- dphy0: dphy@fd0e0000{
- compatible = "cdns,dphy";
- reg = <0x0 0xfd0e0000 0x0 0x1000>;
- clocks = <&psm_clk>, <&pll_ref_clk>;
- clock-names = "psm", "pll_ref";
- #phy-cells = <0>;
- };
-
- dsi0: dsi@fd0c0000 {
- compatible = "cdns,dsi";
- reg = <0x0 0xfd0c0000 0x0 0x1000>;
- clocks = <&pclk>, <&sysclk>;
- clock-names = "dsi_p_clk", "dsi_sys_clk";
- interrupts = <1>;
- phys = <&dphy0>;
- phy-names = "dphy";
- #address-cells = <1>;
- #size-cells = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@1 {
- reg = <1>;
- dsi0_dpi_input: endpoint {
- remote-endpoint = <&xxx_dpi_output>;
- };
- };
- };
-
- panel: dsi-dev@0 {
- compatible = "<vendor,panel>";
- reg = <0>;
- };
- };
-
-or
-
- dsi0: dsi@fd0c0000 {
- compatible = "cdns,dsi";
- reg = <0x0 0xfd0c0000 0x0 0x1000>;
- clocks = <&pclk>, <&sysclk>;
- clock-names = "dsi_p_clk", "dsi_sys_clk";
- interrupts = <1>;
- phys = <&dphy1>;
- phy-names = "dphy";
- #address-cells = <1>;
- #size-cells = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- dsi0_output: endpoint@0 {
- reg = <0>;
- remote-endpoint = <&dsi_panel_input>;
- };
- };
-
- port@1 {
- reg = <1>;
- dsi0_dpi_input: endpoint {
- remote-endpoint = <&xxx_dpi_output>;
- };
- };
- };
- };
-
- i2c@xxx {
- panel: panel@59 {
- compatible = "<vendor,panel>";
- reg = <0x59>;
-
- port {
- dsi_panel_input: endpoint {
- remote-endpoint = <&dsi0_output>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml
new file mode 100644
index 000000000000..23060324d16e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml
@@ -0,0 +1,180 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/cdns,dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cadence DSI bridge
+
+maintainers:
+ - Boris Brezillon <boris.brezillon@bootlin.com>
+
+description: |
+ CDNS DSI is a bridge device which converts DPI to DSI
+
+properties:
+ compatible:
+ enum:
+ - cdns,dsi
+ - ti,j721e-dsi
+
+ reg:
+ minItems: 1
+ items:
+ - description:
+ Register block for controller's registers.
+ - description:
+ Register block for wrapper settings registers in case of TI J7 SoCs.
+
+ clocks:
+ items:
+ - description: PSM clock, used by the IP
+ - description: sys clock, used by the IP
+
+ clock-names:
+ items:
+ - const: dsi_p_clk
+ - const: dsi_sys_clk
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ const: dphy
+
+ interrupts:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: dsi_p_rst
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port representing the DSI output. It can have
+ at most 4 endpoints. The endpoint number is directly encoding
+ the DSI virtual channel used by this device.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Input port representing the DPI input.
+
+ required:
+ - port@1
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,j721e-dsi
+ then:
+ properties:
+ reg:
+ minItems: 2
+ maxItems: 2
+ power-domains:
+ maxItems: 1
+ else:
+ properties:
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dsi@fd0c0000 {
+ compatible = "cdns,dsi";
+ reg = <0x0 0xfd0c0000 0x0 0x1000>;
+ clocks = <&pclk>, <&sysclk>;
+ clock-names = "dsi_p_clk", "dsi_sys_clk";
+ interrupts = <1>;
+ phys = <&dphy0>;
+ phy-names = "dphy";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&xxx_dpi_output>;
+ };
+ };
+ };
+
+ panel@0 {
+ compatible = "panasonic,vvx10f034n00";
+ reg = <0>;
+ power-supply = <&vcc_lcd_reg>;
+ };
+ };
+ };
+
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dsi@fd0c0000 {
+ compatible = "cdns,dsi";
+ reg = <0x0 0xfd0c0000 0x0 0x1000>;
+ clocks = <&pclk>, <&sysclk>;
+ clock-names = "dsi_p_clk", "dsi_sys_clk";
+ interrupts = <1>;
+ phys = <&dphy1>;
+ phy-names = "dphy";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&dsi_panel_input>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&xxx_dpi_output>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/cdns,mhdp8546.yaml b/Documentation/devicetree/bindings/display/bridge/cdns,mhdp8546.yaml
new file mode 100644
index 000000000000..c2b369456e4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/cdns,mhdp8546.yaml
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/cdns,mhdp8546.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cadence MHDP8546 bridge
+
+maintainers:
+ - Swapnil Jakhade <sjakhade@cadence.com>
+ - Yuti Amonkar <yamonkar@cadence.com>
+
+properties:
+ compatible:
+ enum:
+ - cdns,mhdp8546
+ - ti,j721e-mhdp8546
+
+ reg:
+ minItems: 1
+ items:
+ - description:
+ Register block of mhdptx apb registers up to PHY mapped area (AUX_CONFIG_P).
+ The AUX and PMA registers are not part of this range, they are instead
+ included in the associated PHY.
+ - description:
+ Register block for DSS_EDP0_INTG_CFG_VP registers in case of TI J7 SoCs.
+ - description:
+ Register block of mhdptx sapb registers.
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: mhdptx
+ - const: j721e-intg
+ - const: mhdptx-sapb
+
+ clocks:
+ maxItems: 1
+ description:
+ DP bridge clock, used by the IP to know how to translate a number of
+ clock cycles into a time (which is used to comply with DP standard timings
+ and delays).
+
+ phys:
+ maxItems: 1
+ description:
+ phandle to the DisplayPort PHY.
+
+ phy-names:
+ items:
+ - const: dpphy
+
+ power-domains:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ First input port representing the DP bridge input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Second input port representing the DP bridge input.
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Third input port representing the DP bridge input.
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Fourth input port representing the DP bridge input.
+
+ port@4:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port representing the DP bridge output.
+
+ required:
+ - port@0
+ - port@4
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,j721e-mhdp8546
+ then:
+ properties:
+ reg:
+ minItems: 2
+ maxItems: 3
+ reg-names:
+ minItems: 2
+ maxItems: 3
+ else:
+ properties:
+ reg:
+ minItems: 1
+ maxItems: 2
+ reg-names:
+ minItems: 1
+ maxItems: 2
+
+required:
+ - compatible
+ - clocks
+ - reg
+ - reg-names
+ - phys
+ - phy-names
+ - interrupts
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ mhdp: dp-bridge@f0fb000000 {
+ compatible = "cdns,mhdp8546";
+ reg = <0xf0 0xfb000000 0x0 0x1000000>;
+ reg-names = "mhdptx";
+ clocks = <&mhdp_clock>;
+ phys = <&dp_phy>;
+ phy-names = "dpphy";
+ interrupts = <GIC_SPI 614 IRQ_TYPE_LEVEL_HIGH>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dp_bridge_input: endpoint {
+ remote-endpoint = <&xxx_dpi_output>;
+ };
+ };
+
+ port@4 {
+ reg = <4>;
+ dp_bridge_output: endpoint {
+ remote-endpoint = <&xxx_dp_connector_input>;
+ };
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml
new file mode 100644
index 000000000000..5fb54375aeb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/chipone,icn6211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Chipone ICN6211 MIPI-DSI to RGB Converter bridge
+
+maintainers:
+ - Jagan Teki <jagan@amarulasolutions.com>
+
+description: |
+ ICN6211 is MIPI-DSI to RGB Converter bridge from chipone.
+
+ It has a flexible configuration of MIPI DSI signal input and
+ produce RGB565, RGB666, RGB888 output format.
+
+properties:
+ compatible:
+ enum:
+ - chipone,icn6211
+
+ reg:
+ maxItems: 1
+ description: virtual channel number of a DSI peripheral
+
+ clock-names:
+ const: refclk
+
+ clocks:
+ maxItems: 1
+ description: |
+ Optional external clock connected to REF_CLK input.
+ The clock rate must be in 10..154 MHz range.
+
+ enable-gpios:
+ description: Bridge EN pin, chip is reset when EN is low.
+
+ vdd1-supply:
+ description: A 1.8V/2.5V/3.3V supply that power the MIPI RX.
+
+ vdd2-supply:
+ description: A 1.8V/2.5V/3.3V supply that power the PLL.
+
+ vdd3-supply:
+ description: A 1.8V/2.5V/3.3V supply that power the RGB output.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Video port for MIPI DSI input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DPI output (panel or connector).
+
+ required:
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - enable-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ dsi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@0 {
+ compatible = "chipone,icn6211";
+ reg = <0>;
+ enable-gpios = <&r_pio 0 5 GPIO_ACTIVE_HIGH>; /* LCD-RST: PL5 */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ bridge_in_dsi: endpoint {
+ remote-endpoint = <&dsi_out_bridge>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ bridge_out_panel: endpoint {
+ remote-endpoint = <&panel_out_bridge>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
new file mode 100644
index 000000000000..b0589fa16736
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2019,2020 Lubomir Rintel <lkundrak@v3.sk>
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/chrontel,ch7033.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Chrontel CH7033 Video Encoder
+
+maintainers:
+ - Lubomir Rintel <lkundrak@v3.sk>
+
+properties:
+ compatible:
+ const: chrontel,ch7033
+
+ reg:
+ maxItems: 1
+ description: I2C address of the device
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Video port for RGB input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ DVI port, should be connected to a node compatible with the
+ dvi-connector binding.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vga-dvi-encoder@76 {
+ compatible = "chrontel,ch7033";
+ reg = <0x76>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ endpoint {
+ remote-endpoint = <&lcd0_rgb_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&dvi_in>;
+ };
+ };
+
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt b/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
deleted file mode 100644
index 164cbb15f04c..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Dumb RGB to VGA DAC bridge
----------------------------
-
-This binding is aimed for dumb RGB to VGA DAC based bridges that do not require
-any configuration.
-
-Required properties:
-
-- compatible: Must be "dumb-vga-dac"
-
-Required nodes:
-
-This device has two video ports. Their connections are modelled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for RGB input
-- Video port 1 for VGA output
-
-Optional properties:
-- vdd-supply: Power supply for DAC
-
-Example
--------
-
-bridge {
- compatible = "dumb-vga-dac";
- #address-cells = <1>;
- #size-cells = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- vga_bridge_in: endpoint {
- remote-endpoint = <&tcon0_out_vga>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- vga_bridge_out: endpoint {
- remote-endpoint = <&vga_con_in>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt b/Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt
deleted file mode 100644
index 33bf981fbe33..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Synopsys DesignWare HDMI TX Encoder
-===================================
-
-This document defines device tree properties for the Synopsys DesignWare HDMI
-TX Encoder (DWC HDMI TX). It doesn't constitue a device tree binding
-specification by itself but is meant to be referenced by platform-specific
-device tree bindings.
-
-When referenced from platform device tree bindings the properties defined in
-this document are defined as follows. The platform device tree bindings are
-responsible for defining whether each property is required or optional.
-
-- reg: Memory mapped base address and length of the DWC HDMI TX registers.
-
-- reg-io-width: Width of the registers specified by the reg property. The
- value is expressed in bytes and must be equal to 1 or 4 if specified. The
- register width defaults to 1 if the property is not present.
-
-- interrupts: Reference to the DWC HDMI TX interrupt.
-
-- clocks: References to all the clocks specified in the clock-names property
- as specified in Documentation/devicetree/bindings/clock/clock-bindings.txt.
-
-- clock-names: The DWC HDMI TX uses the following clocks.
-
- - "iahb" is the bus clock for either AHB and APB (mandatory).
- - "isfr" is the internal register configuration clock (mandatory).
- - "cec" is the HDMI CEC controller main clock (optional).
-
-- ports: The connectivity of the DWC HDMI TX with the rest of the system is
- expressed in using ports as specified in the device graph bindings defined
- in Documentation/devicetree/bindings/graph.txt. The numbering of the ports
- is platform-specific.
diff --git a/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt b/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt
deleted file mode 100644
index b13adf30b8d3..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Synopsys DesignWare MIPI DSI host controller
-============================================
-
-This document defines device tree properties for the Synopsys DesignWare MIPI
-DSI host controller. It doesn't constitue a device tree binding specification
-by itself but is meant to be referenced by platform-specific device tree
-bindings.
-
-When referenced from platform device tree bindings the properties defined in
-this document are defined as follows. The platform device tree bindings are
-responsible for defining whether each optional property is used or not.
-
-- reg: Memory mapped base address and length of the DesignWare MIPI DSI
- host controller registers. (mandatory)
-
-- clocks: References to all the clocks specified in the clock-names property
- as specified in [1]. (mandatory)
-
-- clock-names:
- - "pclk" is the peripheral clock for either AHB and APB. (mandatory)
- - "px_clk" is the pixel clock for the DPI/RGB input. (optional)
-
-- resets: References to all the resets specified in the reset-names property
- as specified in [2]. (optional)
-
-- reset-names: string reset name, must be "apb" if used. (optional)
-
-- panel or bridge node: see [3]. (mandatory)
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/reset/reset.txt
-[3] Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx8mp-hdmi-tx.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx8mp-hdmi-tx.yaml
new file mode 100644
index 000000000000..05442d437755
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx8mp-hdmi-tx.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx8mp-hdmi-tx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8MP DWC HDMI TX Encoder
+
+maintainers:
+ - Lucas Stach <l.stach@pengutronix.de>
+
+description:
+ The i.MX8MP HDMI transmitter is a Synopsys DesignWare
+ HDMI 2.0a TX controller IP.
+
+allOf:
+ - $ref: /schemas/display/bridge/synopsys,dw-hdmi.yaml#
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8mp-hdmi-tx
+
+ reg-io-width:
+ const: 1
+
+ clocks:
+ maxItems: 4
+
+ clock-names:
+ items:
+ - const: iahb
+ - const: isfr
+ - const: cec
+ - const: pix
+
+ power-domains:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel RGB input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: HDMI output port
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - interrupts
+ - power-domains
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mp-clock.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/imx8mp-power.h>
+
+ hdmi@32fd8000 {
+ compatible = "fsl,imx8mp-hdmi-tx";
+ reg = <0x32fd8000 0x7eff>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MP_CLK_HDMI_APB>,
+ <&clk IMX8MP_CLK_HDMI_REF_266M>,
+ <&clk IMX8MP_CLK_32K>,
+ <&hdmi_tx_phy>;
+ clock-names = "iahb", "isfr", "cec", "pix";
+ power-domains = <&hdmi_blk_ctrl IMX8MP_HDMIBLK_PD_HDMI_TX>;
+ reg-io-width = <1>;
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&pvi_to_hdmi_tx>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&hdmi0_con>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-ldb.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-ldb.yaml
new file mode 100644
index 000000000000..94543006f5de
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-ldb.yaml
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx8qxp-ldb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp LVDS Display Bridge
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ The Freescale i.MX8qm/qxp LVDS Display Bridge(LDB) has two channels.
+
+ The i.MX8qm/qxp LDB is controlled by Control and Status Registers(CSR) module.
+ The CSR module, as a system controller, contains the LDB's configuration
+ registers.
+
+ For i.MX8qxp LDB, each channel supports up to 24bpp parallel input color
+ format and can map the input to VESA or JEIDA standards. The two channels
+ cannot be used simultaneously, that is to say, the user should pick one of
+ them to use. Two LDB channels from two LDB instances can work together in
+ LDB split mode to support a dual link LVDS display. The channel indexes
+ have to be different. Channel0 outputs odd pixels and channel1 outputs
+ even pixels.
+
+ For i.MX8qm LDB, each channel additionally supports up to 30bpp parallel
+ input color format. The two channels can be used simultaneously, either
+ in dual mode or split mode. In dual mode, the two channels output identical
+ data. In split mode, channel0 outputs odd pixels and channel1 outputs even
+ pixels.
+
+ A side note is that i.MX8qm/qxp LDB is officially called pixel mapper in
+ the SoC reference manuals. The pixel mapper uses logic of LDBs embedded in
+ i.MX6qdl/sx SoCs, i.e., it is essentially based on them. To keep the naming
+ consistency, this binding calls it LDB.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8qm-ldb
+ - fsl,imx8qxp-ldb
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ clocks:
+ items:
+ - description: pixel clock
+ - description: bypass clock
+
+ clock-names:
+ items:
+ - const: pixel
+ - const: bypass
+
+ power-domains:
+ maxItems: 1
+
+ fsl,companion-ldb:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ A phandle which points to companion LDB which is used in LDB split mode.
+
+patternProperties:
+ "^channel@[0-1]$":
+ type: object
+ description: Represents a channel of LDB.
+
+ properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ reg:
+ description: The channel index.
+ enum: [ 0, 1 ]
+
+ phys:
+ description: A phandle to the phy module representing the LVDS PHY.
+ maxItems: 1
+
+ phy-names:
+ const: lvds_phy
+
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input port of the channel.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Output port of the channel.
+
+ required:
+ - "#address-cells"
+ - "#size-cells"
+ - reg
+ - phys
+ - phy-names
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - "#address-cells"
+ - "#size-cells"
+ - clocks
+ - clock-names
+ - power-domains
+ - channel@0
+ - channel@1
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx8qm-ldb
+ then:
+ properties:
+ fsl,companion-ldb: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/firmware/imx/rsrc.h>
+ ldb {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx8qxp-ldb";
+ clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_MISC2>,
+ <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_BYPASS>;
+ clock-names = "pixel", "bypass";
+ power-domains = <&pd IMX_SC_R_LVDS_0>;
+
+ channel@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ phys = <&mipi_lvds_0_phy>;
+ phy-names = "lvds_phy";
+
+ port@0 {
+ reg = <0>;
+
+ mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi: endpoint {
+ remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0>;
+ };
+ };
+ };
+
+ channel@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ phys = <&mipi_lvds_0_phy>;
+ phy-names = "lvds_phy";
+
+ port@0 {
+ reg = <0>;
+
+ mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi: endpoint {
+ remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-combiner.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-combiner.yaml
new file mode 100644
index 000000000000..50bae2122183
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-combiner.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx8qxp-pixel-combiner.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp Pixel Combiner
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ The Freescale i.MX8qm/qxp Pixel Combiner takes two output streams from a
+ single display controller and manipulates the two streams to support a number
+ of modes(bypass, pixel combine, YUV444 to YUV422, split_RGB) configured as
+ either one screen, two screens, or virtual screens. The pixel combiner is
+ also responsible for generating some of the control signals for the pixel link
+ output channel.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8qm-pixel-combiner
+ - fsl,imx8qxp-pixel-combiner
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: apb
+
+ power-domains:
+ maxItems: 1
+
+patternProperties:
+ "^channel@[0-1]$":
+ type: object
+ description: Represents a display stream of pixel combiner.
+
+ properties:
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ reg:
+ description: The display stream index.
+ enum: [ 0, 1 ]
+
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input endpoint of the display stream.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Output endpoint of the display stream.
+
+ required:
+ - "#address-cells"
+ - "#size-cells"
+ - reg
+ - port@0
+ - port@1
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - "#address-cells"
+ - "#size-cells"
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+ pixel-combiner@56020000 {
+ compatible = "fsl,imx8qxp-pixel-combiner";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x56020000 0x10000>;
+ clocks = <&dc0_pixel_combiner_lpcg IMX_LPCG_CLK_4>;
+ clock-names = "apb";
+ power-domains = <&pd IMX_SC_R_DC_0>;
+
+ channel@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dc0_pixel_combiner_ch0_dc0_dpu_disp0: endpoint {
+ remote-endpoint = <&dc0_dpu_disp0_dc0_pixel_combiner_ch0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ dc0_pixel_combiner_ch0_dc0_pixel_link0: endpoint {
+ remote-endpoint = <&dc0_pixel_link0_dc0_pixel_combiner_ch0>;
+ };
+ };
+ };
+
+ channel@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ port@0 {
+ reg = <0>;
+
+ dc0_pixel_combiner_ch1_dc0_dpu_disp1: endpoint {
+ remote-endpoint = <&dc0_dpu_disp1_dc0_pixel_combiner_ch1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ dc0_pixel_combiner_ch1_dc0_pixel_link1: endpoint {
+ remote-endpoint = <&dc0_pixel_link1_dc0_pixel_combiner_ch1>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-link.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-link.yaml
new file mode 100644
index 000000000000..38ecc7926fad
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pixel-link.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx8qxp-pixel-link.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qm/qxp Display Pixel Link
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ The Freescale i.MX8qm/qxp Display Pixel Link(DPL) forms a standard
+ asynchronous linkage between pixel sources(display controller or
+ camera module) and pixel consumers(imaging or displays).
+ It consists of two distinct functions, a pixel transfer function and a
+ control interface. Multiple pixel channels can exist per one control channel.
+ This binding documentation is only for pixel links whose pixel sources are
+ display controllers.
+
+ The i.MX8qm/qxp Display Pixel Link is accessed via System Controller Unit(SCU)
+ firmware.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8qm-dc-pixel-link
+ - fsl,imx8qxp-dc-pixel-link
+
+ fsl,dc-id:
+ $ref: /schemas/types.yaml#/definitions/uint8
+ description: |
+ u8 value representing the display controller index that the pixel link
+ connects to.
+
+ fsl,dc-stream-id:
+ $ref: /schemas/types.yaml#/definitions/uint8
+ description: |
+ u8 value representing the display controller stream index that the pixel
+ link connects to.
+ enum: [0, 1]
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The pixel link input port node from upstream video source.
+
+ patternProperties:
+ "^port@[1-4]$":
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The pixel link output port node to downstream bridge.
+
+ required:
+ - port@0
+ - port@1
+ - port@2
+ - port@3
+ - port@4
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx8qxp-dc-pixel-link
+ then:
+ properties:
+ fsl,dc-id:
+ const: 0
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx8qm-dc-pixel-link
+ then:
+ properties:
+ fsl,dc-id:
+ enum: [0, 1]
+
+required:
+ - compatible
+ - fsl,dc-id
+ - fsl,dc-stream-id
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ dc0-pixel-link0 {
+ compatible = "fsl,imx8qxp-dc-pixel-link";
+ fsl,dc-id = /bits/ 8 <0>;
+ fsl,dc-stream-id = /bits/ 8 <0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* from dc0 pixel combiner channel0 */
+ port@0 {
+ reg = <0>;
+
+ dc0_pixel_link0_dc0_pixel_combiner_ch0: endpoint {
+ remote-endpoint = <&dc0_pixel_combiner_ch0_dc0_pixel_link0>;
+ };
+ };
+
+ /* to PXL2DPIs in MIPI/LVDS combo subsystems */
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ dc0_pixel_link0_mipi_lvds_0_pxl2dpi: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&mipi_lvds_0_pxl2dpi_dc0_pixel_link0>;
+ };
+
+ dc0_pixel_link0_mipi_lvds_1_pxl2dpi: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&mipi_lvds_1_pxl2dpi_dc0_pixel_link0>;
+ };
+ };
+
+ /* unused */
+ port@2 {
+ reg = <2>;
+ };
+
+ /* unused */
+ port@3 {
+ reg = <3>;
+ };
+
+ /* to imaging subsystem */
+ port@4 {
+ reg = <4>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pxl2dpi.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pxl2dpi.yaml
new file mode 100644
index 000000000000..e4e77fad05f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx8qxp-pxl2dpi.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx8qxp-pxl2dpi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Pixel Link to Display Pixel Interface
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ The Freescale i.MX8qxp Pixel Link to Display Pixel Interface(PXL2DPI)
+ interfaces the pixel link 36-bit data output and the DSI controller’s
+ MIPI-DPI 24-bit data input, and inputs of LVDS Display Bridge(LDB) module
+ used in LVDS mode, to remap the pixel color codings between those modules.
+ This module is purely combinatorial.
+
+ The i.MX8qxp PXL2DPI is controlled by Control and Status Registers(CSR) module.
+ The CSR module, as a system controller, contains the PXL2DPI's configuration
+ register.
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-pxl2dpi
+
+ fsl,sc-resource:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: The SCU resource ID associated with this PXL2DPI instance.
+
+ power-domains:
+ maxItems: 1
+
+ fsl,companion-pxl2dpi:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ A phandle which points to companion PXL2DPI which is used by downstream
+ LVDS Display Bridge(LDB) in split mode.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The PXL2DPI input port node from pixel link.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The PXL2DPI output port node to downstream bridge.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - fsl,sc-resource
+ - power-domains
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/firmware/imx/rsrc.h>
+ pxl2dpi {
+ compatible = "fsl,imx8qxp-pxl2dpi";
+ fsl,sc-resource = <IMX_SC_R_MIPI_0>;
+ power-domains = <&pd IMX_SC_R_MIPI_0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ mipi_lvds_0_pxl2dpi_dc_pixel_link0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&dc_pixel_link0_mipi_lvds_0_pxl2dpi>;
+ };
+
+ mipi_lvds_0_pxl2dpi_dc_pixel_link1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&dc_pixel_link1_mipi_lvds_0_pxl2dpi>;
+ };
+ };
+
+ port@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi>;
+ };
+
+ mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml
new file mode 100644
index 000000000000..d6e51d0cf546
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,imx93-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX93 specific extensions to Synopsys Designware MIPI DSI
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ There is a Synopsys Designware MIPI DSI Host Controller and a Synopsys
+ Designware MIPI DPHY embedded in Freescale i.MX93 SoC. Some configurations
+ and extensions to them are controlled by i.MX93 media blk-ctrl.
+
+allOf:
+ - $ref: snps,dw-mipi-dsi.yaml#
+
+properties:
+ compatible:
+ const: fsl,imx93-mipi-dsi
+
+ clocks:
+ items:
+ - description: apb clock
+ - description: pixel clock
+ - description: PHY configuration clock
+ - description: PHY reference clock
+
+ clock-names:
+ items:
+ - const: pclk
+ - const: pix
+ - const: phy_cfg
+ - const: phy_ref
+
+ interrupts:
+ maxItems: 1
+
+ fsl,media-blk-ctrl:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ i.MX93 media blk-ctrl, as a syscon, controls pixel component bit map
+ configurations from LCDIF display controller to the MIPI DSI host
+ controller and MIPI DPHY PLL related configurations through PLL SoC
+ interface.
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - interrupts
+ - fsl,media-blk-ctrl
+ - power-domains
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx93-clock.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/fsl,imx93-power.h>
+
+ dsi@4ae10000 {
+ compatible = "fsl,imx93-mipi-dsi";
+ reg = <0x4ae10000 0x10000>;
+ interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX93_CLK_MIPI_DSI_GATE>,
+ <&clk IMX93_CLK_MEDIA_DISP_PIX>,
+ <&clk IMX93_CLK_MIPI_PHY_CFG>,
+ <&clk IMX93_CLK_24M>;
+ clock-names = "pclk", "pix", "phy_cfg", "phy_ref";
+ fsl,media-blk-ctrl = <&media_blk_ctrl>;
+ power-domains = <&media_blk_ctrl IMX93_MEDIABLK_PD_MIPI_DSI>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ panel@0 {
+ compatible = "raydium,rm67191";
+ reg = <0>;
+ reset-gpios = <&adp5585gpio 6 GPIO_ACTIVE_LOW>;
+ dsi-lanes = <4>;
+ video-mode = <2>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dsi_to_lcdif: endpoint {
+ remote-endpoint = <&lcdif_to_dsi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ dsi_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
new file mode 100644
index 000000000000..07388bf2b90d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/fsl,ldb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8MP DPI to LVDS bridge chip
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ The i.MX8MP mediamix contains two registers which are responsible
+ for configuring the on-SoC DPI-to-LVDS serializer. This describes
+ those registers as bridge within the DT.
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx6sx-ldb
+ - fsl,imx8mp-ldb
+ - fsl,imx93-ldb
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: ldb
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: ldb
+ - const: lvds
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for DPI input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for LVDS Channel-A output (panel or bridge).
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for LVDS Channel-B output (panel or bridge).
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - clocks
+ - ports
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx6sx-ldb
+ - fsl,imx93-ldb
+ then:
+ properties:
+ ports:
+ properties:
+ port@2: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mp-clock.h>
+
+ blk-ctrl {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ bridge@5c {
+ compatible = "fsl,imx8mp-ldb";
+ clocks = <&clk IMX8MP_CLK_MEDIA_LDB>;
+ clock-names = "ldb";
+ reg = <0x5c 0x4>, <0x128 0x4>;
+ reg-names = "ldb", "lvds";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ ldb_from_lcdif2: endpoint {
+ remote-endpoint = <&lcdif2_to_ldb>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ ldb_lvds_ch0: endpoint {
+ remote-endpoint = <&ldb_to_lvdsx4panel>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ ldb_lvds_ch1: endpoint {
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
new file mode 100644
index 000000000000..a44d025d33bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/google,cros-ec-anx7688.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ChromeOS EC ANX7688 HDMI to DP Converter through Type-C Port
+
+maintainers:
+ - Nicolas Boichat <drinkcat@chromium.org>
+
+description: |
+ ChromeOS EC ANX7688 is a display bridge that converts HDMI 2.0 to
+ DisplayPort 1.3 Ultra-HDi (4096x2160p60). It is an Analogix ANX7688 chip
+ which is connected to and operated by the ChromeOS Embedded Controller
+ (See google,cros-ec.yaml). It is accessed using I2C tunneling through
+ the EC and therefore its node should be a child of an EC I2C tunnel node
+ (See google,cros-ec-i2c-tunnel.yaml).
+
+properties:
+ compatible:
+ const: google,cros-ec-anx7688
+
+ reg:
+ maxItems: 1
+ description: I2C address of the device.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for HDMI input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: USB Type-c connector.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c_tunnel_b: i2c-tunnel1 {
+ compatible = "google,cros-ec-i2c-tunnel";
+ google,remote-bus = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ anx7688: anx7688@2c {
+ compatible = "google,cros-ec-anx7688";
+ reg = <0x2c>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ anx7688_in: endpoint {
+ remote-endpoint = <&hdmi0_out>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ anx7688_out: endpoint {
+ remote-endpoint = <&typec_connector>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml
new file mode 100644
index 000000000000..84df3cf239d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ingenic,jz4780-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ingenic JZ4780 HDMI Transmitter
+
+maintainers:
+ - H. Nikolaus Schaller <hns@goldelico.com>
+
+description: |
+ The HDMI Transmitter in the Ingenic JZ4780 is a Synopsys DesignWare HDMI 1.4
+ TX controller IP with accompanying PHY IP.
+
+allOf:
+ - $ref: synopsys,dw-hdmi.yaml#
+
+properties:
+ compatible:
+ const: ingenic,jz4780-dw-hdmi
+
+ reg-io-width:
+ const: 4
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input from LCD controller output.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Link to the HDMI connector.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - ports
+ - reg-io-width
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
+
+ hdmi: hdmi@10180000 {
+ compatible = "ingenic,jz4780-dw-hdmi";
+ reg = <0x10180000 0x8000>;
+ reg-io-width = <4>;
+ interrupt-parent = <&intc>;
+ interrupts = <3>;
+ clocks = <&cgu JZ4780_CLK_AHB0>, <&cgu JZ4780_CLK_HDMI>;
+ clock-names = "iahb", "isfr";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ hdmi_in: port@0 {
+ reg = <0>;
+ dw_hdmi_in: endpoint {
+ remote-endpoint = <&jz4780_lcd_out>;
+ };
+ };
+ hdmi_out: port@1 {
+ reg = <1>;
+ dw_hdmi_out: endpoint {
+ remote-endpoint = <&hdmi_con>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
new file mode 100644
index 000000000000..958a073f4ff7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/intel,keembay-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay mipi dsi controller
+
+maintainers:
+ - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
+ - Edmond J Dea <edmund.j.dea@intel.com>
+
+properties:
+ compatible:
+ const: intel,keembay-dsi
+
+ reg:
+ items:
+ - description: MIPI registers range
+
+ reg-names:
+ items:
+ - const: mipi
+
+ clocks:
+ items:
+ - description: MIPI DSI clock
+ - description: MIPI DSI econfig clock
+ - description: MIPI DSI config clock
+
+ clock-names:
+ items:
+ - const: clk_mipi
+ - const: clk_mipi_ecfg
+ - const: clk_mipi_cfg
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: MIPI DSI input port.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DSI output port.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - clock-names
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ mipi-dsi@20900000 {
+ compatible = "intel,keembay-dsi";
+ reg = <0x20900000 0x4000>;
+ reg-names = "mipi";
+ clocks = <&scmi_clk 0x86>,
+ <&scmi_clk 0x88>,
+ <&scmi_clk 0x89>;
+ clock-names = "clk_mipi", "clk_mipi_ecfg",
+ "clk_mipi_cfg";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi_in: endpoint {
+ remote-endpoint = <&disp_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi_out: endpoint {
+ remote-endpoint = <&adv7535_input>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6263.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6263.yaml
new file mode 100644
index 000000000000..b98d942bbe19
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6263.yaml
@@ -0,0 +1,251 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ite,it6263.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ITE IT6263 LVDS to HDMI converter
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ The IT6263 is a high-performance single-chip De-SSC(De-Spread Spectrum) LVDS
+ to HDMI converter. Combined with LVDS receiver and HDMI 1.4a transmitter,
+ the IT6263 supports LVDS input and HDMI 1.4 output by conversion function.
+ The built-in LVDS receiver can support single-link and dual-link LVDS inputs,
+ and the built-in HDMI transmitter is fully compliant with HDMI 1.4a/3D, HDCP
+ 1.2 and backward compatible with DVI 1.0 specification.
+
+ The IT6263 also encodes and transmits up to 8 channels of I2S digital audio,
+ with sampling rate up to 192KHz and sample size up to 24 bits. In addition,
+ an S/PDIF input port takes in compressed audio of up to 192KHz frame rate.
+
+ The newly supported High-Bit Rate(HBR) audio by HDMI specifications v1.3 is
+ provided by the IT6263 in two interfaces: the four I2S input ports or the
+ S/PDIF input port. With both interfaces the highest possible HBR frame rate
+ is supported at up to 768KHz.
+
+allOf:
+ - $ref: /schemas/display/lvds-dual-ports.yaml#
+ - $ref: /schemas/sound/dai-common.yaml#
+
+properties:
+ compatible:
+ const: ite,it6263
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description: audio master clock
+
+ clock-names:
+ const: mclk
+
+ data-mapping:
+ enum:
+ - jeida-18
+ - jeida-24
+ - jeida-30
+ - vesa-24
+ - vesa-30
+
+ reset-gpios:
+ maxItems: 1
+
+ ivdd-supply:
+ description: 1.8V digital logic power
+
+ ovdd-supply:
+ description: 3.3V I/O pin power
+
+ txavcc18-supply:
+ description: 1.8V HDMI analog frontend power
+
+ txavcc33-supply:
+ description: 3.3V HDMI analog frontend power
+
+ pvcc1-supply:
+ description: 1.8V HDMI frontend core PLL power
+
+ pvcc2-supply:
+ description: 1.8V HDMI frontend filter PLL power
+
+ avcc-supply:
+ description: 3.3V LVDS frontend power
+
+ anvdd-supply:
+ description: 1.8V LVDS frontend analog power
+
+ apvdd-supply:
+ description: 1.8V LVDS frontend PLL power
+
+ "#sound-dai-cells":
+ const: 0
+
+ ite,i2s-audio-fifo-sources:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [0, 1, 2, 3]
+ description:
+ Each array element indicates the pin number of an I2S serial data input
+ line which is connected to an audio FIFO, from audio FIFO0 to FIFO3.
+
+ ite,rl-channel-swap-audio-sources:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 4
+ uniqueItems: true
+ items:
+ enum: [0, 1, 2, 3]
+ description:
+ Each array element indicates an audio source whose right channel and left
+ channel are swapped by this converter. For I2S, the element is the pin
+ number of an I2S serial data input line. For S/PDIF, the element is always
+ 0.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0: true
+
+ port@1:
+ oneOf:
+ - required: [dual-lvds-odd-pixels]
+ - required: [dual-lvds-even-pixels]
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: video port for the HDMI output
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: sound input port
+
+ required:
+ - port@0
+ - port@2
+
+required:
+ - compatible
+ - reg
+ - data-mapping
+ - ivdd-supply
+ - ovdd-supply
+ - txavcc18-supply
+ - txavcc33-supply
+ - pvcc1-supply
+ - pvcc2-supply
+ - avcc-supply
+ - anvdd-supply
+ - apvdd-supply
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ /* single-link LVDS input */
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi@4c {
+ compatible = "ite,it6263";
+ reg = <0x4c>;
+ data-mapping = "jeida-24";
+ reset-gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
+ ivdd-supply = <&reg_buck5>;
+ ovdd-supply = <&reg_vext_3v3>;
+ txavcc18-supply = <&reg_buck5>;
+ txavcc33-supply = <&reg_vext_3v3>;
+ pvcc1-supply = <&reg_buck5>;
+ pvcc2-supply = <&reg_buck5>;
+ avcc-supply = <&reg_vext_3v3>;
+ anvdd-supply = <&reg_buck5>;
+ apvdd-supply = <&reg_buck5>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ it6263_lvds_link1: endpoint {
+ remote-endpoint = <&ldb_lvds_ch0>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ it6263_out: endpoint {
+ remote-endpoint = <&hdmi_in>;
+ };
+ };
+ };
+ };
+ };
+
+ - |
+ /* dual-link LVDS input */
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi@4c {
+ compatible = "ite,it6263";
+ reg = <0x4c>;
+ data-mapping = "jeida-24";
+ reset-gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
+ ivdd-supply = <&reg_buck5>;
+ ovdd-supply = <&reg_vext_3v3>;
+ txavcc18-supply = <&reg_buck5>;
+ txavcc33-supply = <&reg_vext_3v3>;
+ pvcc1-supply = <&reg_buck5>;
+ pvcc2-supply = <&reg_buck5>;
+ avcc-supply = <&reg_vext_3v3>;
+ anvdd-supply = <&reg_buck5>;
+ apvdd-supply = <&reg_buck5>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dual-lvds-odd-pixels;
+
+ it6263_lvds_link1_dual: endpoint {
+ remote-endpoint = <&ldb_lvds_ch0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dual-lvds-even-pixels;
+
+ it6263_lvds_link2_dual: endpoint {
+ remote-endpoint = <&ldb_lvds_ch1>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ it6263_out_dual: endpoint {
+ remote-endpoint = <&hdmi_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
new file mode 100644
index 000000000000..c4469f463978
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ite,it6505.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ITE it6505
+
+maintainers:
+ - Allen Chen <allen.chen@ite.com.tw>
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+description: |
+ The IT6505 is a high-performance DisplayPort 1.1a transmitter,
+ fully compliant with DisplayPort 1.1a, HDCP 1.3 specifications.
+ The IT6505 supports color depth of up to 36 bits (12 bits/color)
+ and ensures robust transmission of high-quality uncompressed video
+ content, along with uncompressed and compressed digital audio content.
+
+ Aside from the various video output formats supported, the IT6505
+ also encodes and transmits up to 8 channels of I2S digital audio,
+ with sampling rate up to 192kHz and sample size up to 24 bits.
+ In addition, an S/PDIF input port takes in compressed audio of up to
+ 192kHz frame rate.
+
+ Each IT6505 chip comes preprogrammed with an unique HDCP key,
+ in compliance with the HDCP 1.3 standard so as to provide secure
+ transmission of high-definition content. Users of the IT6505 need not
+ purchase any HDCP keys or ROMs.
+
+properties:
+ compatible:
+ const: ite,it6505
+
+ reg:
+ maxItems: 1
+
+ ovdd-supply:
+ description: I/O voltage
+
+ pwr18-supply:
+ description: core voltage
+
+ interrupts:
+ maxItems: 1
+ description: interrupt specifier of INT pin
+
+ reset-gpios:
+ maxItems: 1
+ description: gpio specifier of RESET pin
+
+ extcon:
+ maxItems: 1
+ description: extcon specifier for the Power Delivery
+
+ "#sound-dai-cells":
+ const: 0
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: A port node pointing to DPI host port node
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ link-frequencies:
+ minItems: 1
+ maxItems: 1
+ description: Allowed max link frequencies in Hz
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Video port for DP output
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ minItems: 1
+ uniqueItems: true
+ items:
+ - enum: [ 0, 1 ]
+ - const: 1
+ - const: 2
+ - const: 3
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - ovdd-supply
+ - pwr18-supply
+ - interrupts
+ - reset-gpios
+ - extcon
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dp-bridge@5c {
+ compatible = "ite,it6505";
+ interrupts = <152 IRQ_TYPE_EDGE_FALLING 152 0>;
+ reg = <0x5c>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&it6505_pins>;
+ ovdd-supply = <&mt6358_vsim1_reg>;
+ pwr18-supply = <&it6505_pp18_reg>;
+ reset-gpios = <&pio 179 1>;
+ extcon = <&usbc_extcon>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ it6505_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ link-frequencies = /bits/ 64 <150000000>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ it6505_out: endpoint {
+ remote-endpoint = <&dp_in>;
+ data-lanes = <0 1>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
new file mode 100644
index 000000000000..ba644c30dcf4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ite,it66121.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ITE it66121 HDMI bridge
+
+maintainers:
+ - Phong LE <ple@baylibre.com>
+ - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+ The IT66121 is a high-performance and low-power single channel HDMI
+ transmitter, fully compliant with HDMI 1.3a, HDCP 1.2 and backward compatible
+ to DVI 1.0 specifications.
+
+properties:
+ compatible:
+ enum:
+ - ite,it66121
+ - ite,it6610
+
+ reg:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active low reset
+
+ vrf12-supply:
+ description: Regulator for 1.2V analog core power.
+
+ vcn33-supply:
+ description: Regulator for 3.3V digital core power.
+
+ vcn18-supply:
+ description: Regulator for 1.8V IO core power.
+
+ interrupts:
+ maxItems: 1
+
+ "#sound-dai-cells":
+ const: 0
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DPI input port.
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ bus-width:
+ description:
+ Endpoint bus width.
+ enum:
+ - 12 # 12 data lines connected and dual-edge mode
+ - 24 # 24 data lines connected and single-edge mode
+ default: 24
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: HDMI Connector port.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - vrf12-supply
+ - vcn33-supply
+ - vcn18-supply
+ - interrupts
+ - ports
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ it66121hdmitx: hdmitx@4c {
+ compatible = "ite,it66121";
+ pinctrl-names = "default";
+ pinctrl-0 = <&ite_pins_default>;
+ vcn33-supply = <&mt6358_vcn33_wifi_reg>;
+ vcn18-supply = <&mt6358_vcn18_reg>;
+ vrf12-supply = <&mt6358_vrf12_reg>;
+ reset-gpios = <&pio 160 GPIO_ACTIVE_LOW>;
+ interrupt-parent = <&pio>;
+ interrupts = <4 IRQ_TYPE_LEVEL_LOW>;
+ reg = <0x4c>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ it66121_in: endpoint {
+ bus-width = <12>;
+ remote-endpoint = <&display_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ hdmi_conn_out: endpoint {
+ remote-endpoint = <&hdmi_conn_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml b/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml
new file mode 100644
index 000000000000..2cef25215798
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/lontium,lt8912b.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lontium LT8912B MIPI to HDMI Bridge
+
+maintainers:
+ - Adrien Grassein <adrien.grassein@gmail.com>
+
+description: |
+ The LT8912B is a bridge device which convert DSI to HDMI
+
+properties:
+ compatible:
+ enum:
+ - lontium,lt8912b
+
+ reg:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active high RESET pin.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Primary MIPI port for MIPI input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes: true
+
+ required:
+ - data-lanes
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ HDMI port, should be connected to a node compatible with the
+ hdmi-connector binding.
+
+ required:
+ - port@0
+ - port@1
+
+ vcchdmipll-supply:
+ description: A 1.8V supply that powers the HDMI PLL.
+
+ vcchdmitx-supply:
+ description: A 1.8V supply that powers the HDMI TX part.
+
+ vcclvdspll-supply:
+ description: A 1.8V supply that powers the LVDS PLL.
+
+ vcclvdstx-supply:
+ description: A 1.8V supply that powers the LVDS TX part.
+
+ vccmipirx-supply:
+ description: A 1.8V supply that powers the MIPI RX part.
+
+ vccsysclk-supply:
+ description: A 1.8V supply that powers the SYSCLK.
+
+ vdd-supply:
+ description: A 1.8V supply that powers the digital part.
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi-bridge@48 {
+ compatible = "lontium,lt8912b";
+ reg = <0x48>;
+ reset-gpios = <&max7323 0 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi_out_in: endpoint {
+ data-lanes = <0 1 2 3>;
+ remote-endpoint = <&mipi_dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&hdmi_in>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml b/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml
new file mode 100644
index 000000000000..9a6e9b25d14a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/lontium,lt9211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lontium LT9211 DSI/LVDS/DPI to DSI/LVDS/DPI bridge.
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ The LT9211 are bridge devices which convert Single/Dual-Link DSI/LVDS
+ or Single DPI to Single/Dual-Link DSI/LVDS or Single DPI.
+
+properties:
+ compatible:
+ enum:
+ - lontium,lt9211
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active high RESET pin.
+
+ vccio-supply:
+ description: Regulator for 1.8V IO power.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Primary MIPI DSI port-1 for MIPI input or
+ LVDS port-1 for LVDS input or DPI input.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Additional MIPI port-2 for MIPI input or LVDS port-2
+ for LVDS input. Used in combination with primary
+ port-1 to drive higher resolution displays
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Primary MIPI DSI port-1 for MIPI output or
+ LVDS port-1 for LVDS output or DPI output.
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Additional MIPI port-2 for MIPI output or LVDS port-2
+ for LVDS output. Used in combination with primary
+ port-1 to drive higher resolution displays.
+
+ required:
+ - port@0
+ - port@2
+
+required:
+ - compatible
+ - reg
+ - vccio-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi-bridge@3b {
+ compatible = "lontium,lt9211";
+ reg = <0x3b>;
+
+ reset-gpios = <&tlmm 128 GPIO_ACTIVE_HIGH>;
+ interrupts-extended = <&tlmm 84 IRQ_TYPE_EDGE_FALLING>;
+
+ vccio-supply = <&lt9211_1v8>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&dsi0_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ endpoint {
+ remote-endpoint = <&panel_in_lvds>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/lontium,lt9611.yaml b/Documentation/devicetree/bindings/display/bridge/lontium,lt9611.yaml
new file mode 100644
index 000000000000..655db8cfdc25
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/lontium,lt9611.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/lontium,lt9611.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lontium LT9611(UXC) 2 Port MIPI to HDMI Bridge
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+description: |
+ The LT9611 and LT9611UXC are bridge devices which convert DSI to HDMI
+
+properties:
+ compatible:
+ enum:
+ - lontium,lt9611
+ - lontium,lt9611uxc
+
+ reg:
+ maxItems: 1
+
+ "#sound-dai-cells":
+ const: 1
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active high RESET pin.
+
+ vdd-supply:
+ description: Regulator for 1.8V MIPI phy power.
+
+ vcc-supply:
+ description: Regulator for 3.3V IO power.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Primary MIPI port-1 for MIPI input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Additional MIPI port-2 for MIPI input, used in combination
+ with primary MIPI port-1 to drive higher resolution displays
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ HDMI port for HDMI output
+
+ required:
+ - port@0
+ - port@2
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - vdd-supply
+ - vcc-supply
+ - ports
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c10 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi-bridge@3b {
+ compatible = "lontium,lt9611";
+ reg = <0x3b>;
+
+ reset-gpios = <&tlmm 128 GPIO_ACTIVE_HIGH>;
+ interrupts-extended = <&tlmm 84 IRQ_TYPE_EDGE_FALLING>;
+
+ vdd-supply = <&lt9611_1v8>;
+ vcc-supply = <&lt9611_3v3>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lt9611_a: endpoint {
+ remote-endpoint = <&dsi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ lt9611_b: endpoint {
+ remote-endpoint = <&dsi1_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ lt9611_out: endpoint {
+ remote-endpoint = <&hdmi_con>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
new file mode 100644
index 000000000000..0487bbffd7f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/lvds-codec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Transparent LVDS encoders and decoders
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ This binding supports transparent LVDS encoders and decoders that don't
+ require any configuration.
+
+ LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
+ incompatible data link layers have been used over time to transmit image data
+ to LVDS panels. This binding targets devices compatible with the following
+ specifications only.
+
+ [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
+ 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
+ [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
+ Semiconductor
+ [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
+ Electronics Standards Association (VESA)
+
+ Those devices have been marketed under the FPD-Link and FlatLink brand names
+ among others.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,ds90c185 # For the TI DS90C185 FPD-Link Serializer
+ - ti,ds90c187 # For the TI DS90C187 FPD-Link Serializer
+ - ti,sn75lvds83 # For the TI SN75LVDS83 FlatLink transmitter
+ - const: lvds-encoder # Generic LVDS encoder compatible fallback
+ - items:
+ - enum:
+ - ti,ds90cf364a # For the DS90CF364A FPD-Link LVDS Receiver
+ - ti,ds90cf384a # For the DS90CF384A FPD-Link LVDS Receiver
+ - ti,sn65lvds822 # For the SN65LVDS822 FlatLink LVDS Receiver
+ - ti,sn65lvds94 # For the SN65DS94 LVDS serdes
+ - const: lvds-decoder # Generic LVDS decoders compatible fallback
+ - enum:
+ - thine,thc63lvdm83d # For the THC63LVDM83D LVDS serializer
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ For LVDS encoders, port 0 is the parallel input
+ For LVDS decoders, port 0 is the LVDS input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-mapping:
+ enum:
+ - jeida-18
+ - jeida-24
+ - vesa-24
+ description: |
+ The color signals mapping order. See details in
+ Documentation/devicetree/bindings/display/lvds.yaml
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ For LVDS encoders, port 1 is the LVDS output
+ For LVDS decoders, port 1 is the parallel output
+
+ required:
+ - port@0
+ - port@1
+
+ pclk-sample:
+ description:
+ Data sampling on rising or falling edge.
+ enum:
+ - 0 # Falling edge
+ - 1 # Rising edge
+ default: 0
+
+ powerdown-gpios:
+ description:
+ The GPIO used to control the power down line of this device.
+ maxItems: 1
+
+ power-supply: true
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: lvds-decoder
+ then:
+ properties:
+ ports:
+ properties:
+ port@0:
+ properties:
+ endpoint:
+ properties:
+ data-mapping: false
+
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: lvds-encoder
+ then:
+ properties:
+ pclk-sample: false
+
+required:
+ - compatible
+ - ports
+
+additionalProperties: false
+
+
+examples:
+ - |
+ lvds-encoder {
+ compatible = "ti,ds90c185", "lvds-encoder";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lvds_enc_in: endpoint {
+ remote-endpoint = <&display_out_rgb>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ lvds_enc_out: endpoint {
+ remote-endpoint = <&lvds_panel_in>;
+ };
+ };
+ };
+ };
+
+ - |
+ lvds-decoder {
+ compatible = "ti,ds90cf384a", "lvds-decoder";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lvds_dec_in: endpoint {
+ remote-endpoint = <&display_out_lvds>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ lvds_dec_out: endpoint {
+ remote-endpoint = <&rgb_panel_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt b/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
deleted file mode 100644
index 50220190c203..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Parallel to LVDS Encoder
-------------------------
-
-This binding supports the parallel to LVDS encoders that don't require any
-configuration.
-
-LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
-incompatible data link layers have been used over time to transmit image data
-to LVDS panels. This binding targets devices compatible with the following
-specifications only.
-
-[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
-1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
-[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
-Semiconductor
-[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
-Electronics Standards Association (VESA)
-
-Those devices have been marketed under the FPD-Link and FlatLink brand names
-among others.
-
-
-Required properties:
-
-- compatible: Must be one or more of the following
- - "ti,ds90c185" for the TI DS90C185 FPD-Link Serializer
- - "lvds-encoder" for a generic LVDS encoder device
-
- When compatible with the generic version, nodes must list the
- device-specific version corresponding to the device first
- followed by the generic version.
-
-Required nodes:
-
-This device has two video ports. Their connections are modeled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for parallel input
-- Video port 1 for LVDS output
-
-
-Example
--------
-
-lvds-encoder {
- compatible = "lvds-encoder";
- #address-cells = <1>;
- #size-cells = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- lvds_enc_in: endpoint {
- remote-endpoint = <&display_out_rgb>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- lvds_enc_out: endpoint {
- remote-endpoint = <&lvds_panel_in>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml b/Documentation/devicetree/bindings/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml
new file mode 100644
index 000000000000..dfa6ff6f115e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GE B850v3 video bridge
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+description: |
+ STDP4028-ge-b850v3-fw bridges (LVDS-DP)
+ STDP2690-ge-b850v3-fw bridges (DP-DP++)
+
+ The video processing pipeline on the second output on the GE B850v3:
+
+ Host -> LVDS|--(STDP4028)--|DP -> DP|--(STDP2690)--|DP++ -> Video output
+
+ Each bridge has a dedicated flash containing firmware for supporting the custom
+ design. The result is that, in this design, neither the STDP4028 nor the
+ STDP2690 behave as the stock bridges would. The compatible strings include the
+ suffix "-ge-b850v3-fw" to make it clear that the driver is for the bridges with
+ the firmware specific for the GE B850v3.
+
+ The hardware do not provide control over the video processing pipeline, as the
+ two bridges behaves as a single one. The only interfaces exposed by the
+ hardware are EDID, HPD, and interrupts.
+
+properties:
+ compatible:
+ enum:
+ - megachips,stdp4028-ge-b850v3-fw
+ - megachips,stdp2690-ge-b850v3-fw
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ properties:
+ port@0:
+ description: sink port
+ $ref: /schemas/graph.yaml#/properties/port
+
+ port@1:
+ description: source port
+ $ref: /schemas/graph.yaml#/properties/port
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: megachips,stdp4028-ge-b850v3-fw
+ then:
+ required:
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@73 {
+ compatible = "megachips,stdp4028-ge-b850v3-fw";
+ reg = <0x73>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&lvds0_out>;
+ };
+
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&stdp2690_in>;
+ };
+ };
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt b/Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
deleted file mode 100644
index 09e0a21f705e..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-Drivers for the second video output of the GE B850v3:
- STDP4028-ge-b850v3-fw bridges (LVDS-DP)
- STDP2690-ge-b850v3-fw bridges (DP-DP++)
-
-The video processing pipeline on the second output on the GE B850v3:
-
- Host -> LVDS|--(STDP4028)--|DP -> DP|--(STDP2690)--|DP++ -> Video output
-
-Each bridge has a dedicated flash containing firmware for supporting the custom
-design. The result is that, in this design, neither the STDP4028 nor the
-STDP2690 behave as the stock bridges would. The compatible strings include the
-suffix "-ge-b850v3-fw" to make it clear that the driver is for the bridges with
-the firmware specific for the GE B850v3.
-
-The hardware do not provide control over the video processing pipeline, as the
-two bridges behaves as a single one. The only interfaces exposed by the
-hardware are EDID, HPD, and interrupts.
-
-stdp4028-ge-b850v3-fw required properties:
- - compatible : "megachips,stdp4028-ge-b850v3-fw"
- - reg : I2C bus address
- - interrupts : one interrupt should be described here, as in
- <0 IRQ_TYPE_LEVEL_HIGH>
- - ports : One input port(reg = <0>) and one output port(reg = <1>)
-
-stdp2690-ge-b850v3-fw required properties:
- compatible : "megachips,stdp2690-ge-b850v3-fw"
- - reg : I2C bus address
- - ports : One input port(reg = <0>) and one output port(reg = <1>)
-
-Example:
-
-&mux2_i2c2 {
- clock-frequency = <100000>;
-
- stdp4028@73 {
- compatible = "megachips,stdp4028-ge-b850v3-fw";
- #address-cells = <1>;
- #size-cells = <0>;
-
- reg = <0x73>;
-
- interrupt-parent = <&gpio2>;
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- stdp4028_in: endpoint {
- remote-endpoint = <&lvds0_out>;
- };
- };
- port@1 {
- reg = <1>;
- stdp4028_out: endpoint {
- remote-endpoint = <&stdp2690_in>;
- };
- };
- };
- };
-
- stdp2690@72 {
- compatible = "megachips,stdp2690-ge-b850v3-fw";
- #address-cells = <1>;
- #size-cells = <0>;
-
- reg = <0x72>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- stdp2690_in: endpoint {
- remote-endpoint = <&stdp4028_out>;
- };
- };
-
- port@1 {
- reg = <1>;
- stdp2690_out: endpoint {
- /* Connector for external display */
- };
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml b/Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml
new file mode 100644
index 000000000000..862ef441ac9f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/microchip,sam9x75-lvds.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip SAM9X75 LVDS Controller
+
+maintainers:
+ - Dharma Balasubiramani <dharma.b@microchip.com>
+
+description:
+ The Low Voltage Differential Signaling Controller (LVDSC) manages data
+ format conversion from the LCD Controller internal DPI bus to OpenLDI
+ LVDS output signals. LVDSC functions include bit mapping, balanced mode
+ management, and serializer.
+
+properties:
+ compatible:
+ const: microchip,sam9x75-lvds
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Peripheral Bus Clock
+
+ clock-names:
+ items:
+ - const: pclk
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/at91.h>
+ lvds-controller@f8060000 {
+ compatible = "microchip,sam9x75-lvds";
+ reg = <0xf8060000 0x100>;
+ interrupts = <56 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 56>;
+ clock-names = "pclk";
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
new file mode 100644
index 000000000000..5952e6448ed4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/nwl-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Northwest Logic MIPI-DSI controller on i.MX SoCs
+
+maintainers:
+ - Guido Gúnther <agx@sigxcpu.org>
+ - Robert Chiras <robert.chiras@nxp.com>
+
+description: |
+ NWL MIPI-DSI host controller found on i.MX8 platforms. This is a dsi bridge for
+ the SOCs NWL MIPI-DSI host controller.
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+
+properties:
+ compatible:
+ const: fsl,imx8mq-nwl-dsi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ assigned-clock-parents: true
+ assigned-clock-rates: true
+ assigned-clocks: true
+
+ clocks:
+ items:
+ - description: DSI core clock
+ - description: RX_ESC clock (used in escape mode)
+ - description: TX_ESC clock (used in escape mode)
+ - description: PHY_REF clock
+ - description: LCDIF clock
+
+ clock-names:
+ items:
+ - const: core
+ - const: rx_esc
+ - const: tx_esc
+ - const: phy_ref
+ - const: lcdif
+
+ mux-controls:
+ description:
+ mux controller node to use for operating the input mux
+
+ phys:
+ maxItems: 1
+ description:
+ A phandle to the phy module representing the DPHY
+
+ phy-names:
+ items:
+ - const: dphy
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ items:
+ - description: dsi byte reset line
+ - description: dsi dpi reset line
+ - description: dsi esc reset line
+ - description: dsi pclk reset line
+
+ reset-names:
+ items:
+ - const: byte
+ - const: dpi
+ - const: esc
+ - const: pclk
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ description:
+ Input port node to receive pixel data from the
+ display controller. Exactly one endpoint must be
+ specified.
+ properties:
+ endpoint@0:
+ $ref: /schemas/graph.yaml#/properties/endpoint
+ description: sub-node describing the input from LCDIF
+
+ endpoint@1:
+ $ref: /schemas/graph.yaml#/properties/endpoint
+ description: sub-node describing the input from DCSS
+
+ oneOf:
+ - required:
+ - endpoint@0
+ - required:
+ - endpoint@1
+
+ unevaluatedProperties: false
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ DSI output port node to the panel or the next bridge
+ in the chain
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - '#address-cells'
+ - '#size-cells'
+ - clock-names
+ - clocks
+ - compatible
+ - interrupts
+ - mux-controls
+ - phy-names
+ - phys
+ - ports
+ - reg
+ - reset-names
+ - resets
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mq-clock.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/reset/imx8mq-reset.h>
+
+ dsi@30a00000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx8mq-nwl-dsi";
+ reg = <0x30A00000 0x300>;
+ clocks = <&clk IMX8MQ_CLK_DSI_CORE>,
+ <&clk IMX8MQ_CLK_DSI_AHB>,
+ <&clk IMX8MQ_CLK_DSI_IPG_DIV>,
+ <&clk IMX8MQ_CLK_DSI_PHY_REF>,
+ <&clk IMX8MQ_CLK_LCDIF_PIXEL>;
+ clock-names = "core", "rx_esc", "tx_esc", "phy_ref", "lcdif";
+ interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ mux-controls = <&mux 0>;
+ power-domains = <&pgc_mipi>;
+ resets = <&src IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N>,
+ <&src IMX8MQ_RESET_MIPI_DSI_DPI_RESET_N>,
+ <&src IMX8MQ_RESET_MIPI_DSI_ESC_RESET_N>,
+ <&src IMX8MQ_RESET_MIPI_DSI_PCLK_RESET_N>;
+ reset-names = "byte", "dpi", "esc", "pclk";
+ phys = <&dphy>;
+ phy-names = "dphy";
+
+ panel@0 {
+ compatible = "rocktech,jh057n00900";
+ reg = <0>;
+ vcc-supply = <&reg_2v8_p>;
+ iovcc-supply = <&reg_1v8_p>;
+ reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>;
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&mipi_dsi_out>;
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #size-cells = <0>;
+ #address-cells = <1>;
+ reg = <0>;
+ mipi_dsi_in: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&lcdif_mipi_dsi>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ mipi_dsi_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/nxp,ptn3460.yaml b/Documentation/devicetree/bindings/display/bridge/nxp,ptn3460.yaml
new file mode 100644
index 000000000000..70ec70922c13
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/nxp,ptn3460.yaml
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/nxp,ptn3460.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP PTN3460 eDP to LVDS bridge
+
+maintainers:
+ - Sean Paul <seanpaul@chromium.org>
+
+properties:
+ compatible:
+ const: nxp,ptn3460
+
+ reg:
+ description: I2C address of the bridge
+ maxItems: 1
+
+ edid-emulation:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The EDID emulation entry to use
+ Value Resolution Description
+ 0 1024x768 NXP Generic
+ 1 1920x1080 NXP Generic
+ 2 1920x1080 NXP Generic
+ 3 1600x900 Samsung LTM200KT
+ 4 1920x1080 Samsung LTM230HT
+ 5 1366x768 NXP Generic
+ 6 1600x900 ChiMei M215HGE
+ enum: [0, 1, 2, 3, 4, 5, 6]
+
+ powerdown-gpios:
+ description: GPIO connected to the PD_N signal.
+ maxItems: 1
+
+ reset-gpios:
+ description: GPIO connected to the RST_N signal.
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for LVDS output
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for eDP input
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - edid-emulation
+ - powerdown-gpios
+ - reset-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@20 {
+ compatible = "nxp,ptn3460";
+ reg = <0x20>;
+ edid-emulation = <5>;
+ powerdown-gpios = <&gpy2 5 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpx1 5 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ bridge_in: endpoint {
+ remote-endpoint = <&dp_out>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
new file mode 100644
index 000000000000..3fce9e698ea1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/nxp,tda998x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP TDA998x HDMI transmitter
+
+maintainers:
+ - Russell King <linux@armlinux.org.uk>
+
+properties:
+ compatible:
+ const: nxp,tda998x
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ video-ports:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0x230145
+ maximum: 0xffffff
+ description:
+ 24 bits value which defines how the video controller output is wired to
+ the TDA998x input.
+
+ audio-ports:
+ description:
+ Array of 2 values per DAI (Documentation/sound/soc/dai.rst).
+ The implementation allows one or two DAIs.
+ If two DAIs are defined, they must be of different type.
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ minItems: 1
+ maxItems: 2
+ items:
+ items:
+ - description: |
+ The first value defines the DAI type: TDA998x_SPDIF or TDA998x_I2S
+ (see include/dt-bindings/display/tda998x.h).
+ enum: [ 1, 2 ]
+ - description:
+ The second value defines the tda998x AP_ENA reg content when the
+ DAI in question is used.
+ maximum: 0xff
+
+ '#sound-dai-cells':
+ enum: [ 0, 1 ]
+
+ nxp,calib-gpios:
+ maxItems: 1
+ description:
+ Calibration GPIO, which must correspond with the gpio used for the
+ TDA998x interrupt pin.
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel input port
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ type: object
+ description: Parallel input port
+
+ port@1:
+ type: object
+ description: HDMI output port
+
+required:
+ - compatible
+ - reg
+
+oneOf:
+ - required:
+ - port
+ - required:
+ - ports
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/display/tda998x.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ tda998x: hdmi-encoder@70 {
+ compatible = "nxp,tda998x";
+ reg = <0x70>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <27 IRQ_TYPE_EDGE_FALLING>;
+ video-ports = <0x230145>;
+
+ #sound-dai-cells = <1>;
+ /* DAI-format / AP_ENA reg value */
+ audio-ports = <TDA998x_SPDIF 0x04>,
+ <TDA998x_I2S 0x03>;
+
+ port {
+ tda998x_in: endpoint {
+ remote-endpoint = <&lcdc_0>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/parade,ps8622.yaml b/Documentation/devicetree/bindings/display/bridge/parade,ps8622.yaml
new file mode 100644
index 000000000000..e6397ac2048b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/parade,ps8622.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/parade,ps8622.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Parade PS8622/PS8625 DisplayPort to LVDS Converter
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+properties:
+ compatible:
+ enum:
+ - parade,ps8622
+ - parade,ps8625
+
+ reg:
+ maxItems: 1
+
+ lane-count:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2]
+ description: Number of DP lanes to use.
+
+ use-external-pwm:
+ type: boolean
+ description: Backlight will be controlled by an external PWM.
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to RST_ pin.
+
+ sleep-gpios:
+ maxItems: 1
+ description: GPIO connected to PD_ pin.
+
+ vdd12-supply: true
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for LVDS output.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for DisplayPort input.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - sleep-gpios
+ - ports
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ const: parade,ps8622
+ then:
+ properties:
+ lane-count:
+ const: 1
+ else:
+ properties:
+ lane-count:
+ const: 2
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ lvds-bridge@48 {
+ compatible = "parade,ps8625";
+ reg = <0x48>;
+ sleep-gpios = <&gpx3 5 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpy7 7 GPIO_ACTIVE_HIGH>;
+ lane-count = <2>;
+ use-external-pwm;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ bridge_in: endpoint {
+ remote-endpoint = <&dp_out>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8622.txt b/Documentation/devicetree/bindings/display/bridge/ps8622.txt
deleted file mode 100644
index c989c3807f2b..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ps8622.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-ps8622-bridge bindings
-
-Required properties:
- - compatible: "parade,ps8622" or "parade,ps8625"
- - reg: first i2c address of the bridge
- - sleep-gpios: OF device-tree gpio specification for PD_ pin.
- - reset-gpios: OF device-tree gpio specification for RST_ pin.
-
-Optional properties:
- - lane-count: number of DP lanes to use
- - use-external-pwm: backlight will be controlled by an external PWM
- - video interfaces: Device node can contain video interface port
- nodes for panel according to [1].
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
- lvds-bridge@48 {
- compatible = "parade,ps8622";
- reg = <0x48>;
- sleep-gpios = <&gpc3 6 1 0 0>;
- reset-gpios = <&gpc3 1 1 0 0>;
- lane-count = <1>;
- ports {
- port@0 {
- bridge_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
new file mode 100644
index 000000000000..5856450c5da7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ps8640.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MIPI DSI to eDP Video Format Converter
+
+maintainers:
+ - Nicolas Boichat <drinkcat@chromium.org>
+
+description: |
+ The PS8640 is a low power MIPI-to-eDP video format converter supporting
+ mobile devices with embedded panel resolutions up to 2048 x 1536. The
+ device accepts a single channel of MIPI DSI v1.1, with up to four lanes
+ plus clock, at a transmission rate up to 1.5Gbit/sec per lane. The
+ device outputs eDP v1.4, one or two lanes, at a link rate of up to
+ 3.24Gbit/sec per lane.
+
+properties:
+ compatible:
+ const: parade,ps8640
+
+ reg:
+ maxItems: 1
+ description: Base I2C address of the device.
+
+ powerdown-gpios:
+ maxItems: 1
+ description: GPIO connected to active low powerdown.
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active low reset.
+
+ vdd12-supply:
+ description: Regulator for 1.2V digital core power.
+
+ vdd33-supply:
+ description: Regulator for 3.3V digital core power.
+
+ aux-bus:
+ $ref: /schemas/display/dp-aux-bus.yaml#
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for DSI input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for eDP output (panel or connector).
+
+ required:
+ - port@0
+
+required:
+ - compatible
+ - reg
+ - powerdown-gpios
+ - reset-gpios
+ - vdd12-supply
+ - vdd33-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ps8640: edp-bridge@18 {
+ compatible = "parade,ps8640";
+ reg = <0x18>;
+ powerdown-gpios = <&pio 116 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&pio 115 GPIO_ACTIVE_LOW>;
+ vdd12-supply = <&ps8640_fixed_1v2>;
+ vdd33-supply = <&mt6397_vgp2_reg>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ ps8640_in: endpoint {
+ remote-endpoint = <&dsi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ ps8640_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ aux-bus {
+ panel {
+ compatible = "boe,nv133fhm-n62";
+ power-supply = <&pp3300_dx_edp>;
+ backlight = <&backlight>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&ps8640_out>;
+ };
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ptn3460.txt b/Documentation/devicetree/bindings/display/bridge/ptn3460.txt
deleted file mode 100644
index 361971ba104d..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ptn3460.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-ptn3460 bridge bindings
-
-Required properties:
- - compatible: "nxp,ptn3460"
- - reg: i2c address of the bridge
- - powerdown-gpio: OF device-tree gpio specification for PD_N pin.
- - reset-gpio: OF device-tree gpio specification for RST_N pin.
- - edid-emulation: The EDID emulation entry to use
- +-------+------------+------------------+
- | Value | Resolution | Description |
- | 0 | 1024x768 | NXP Generic |
- | 1 | 1920x1080 | NXP Generic |
- | 2 | 1920x1080 | NXP Generic |
- | 3 | 1600x900 | Samsung LTM200KT |
- | 4 | 1920x1080 | Samsung LTM230HT |
- | 5 | 1366x768 | NXP Generic |
- | 6 | 1600x900 | ChiMei M215HGE |
- +-------+------------+------------------+
-
- - video interfaces: Device node can contain video interface port
- nodes for panel according to [1].
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
- lvds-bridge@20 {
- compatible = "nxp,ptn3460";
- reg = <0x20>;
- powerdown-gpio = <&gpy2 5 1 0 0>;
- reset-gpio = <&gpx1 5 1 0 0>;
- edid-emulation = <5>;
- ports {
- port@0 {
- bridge_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml
new file mode 100644
index 000000000000..c167795c63f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/renesas,dsi-csi2-tx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car MIPI DSI/CSI-2 Encoder
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ This binding describes the MIPI DSI/CSI-2 encoder embedded in the Renesas
+ R-Car Gen4 SoCs. The encoder can operate in either DSI or CSI-2 mode, with up
+ to four data lanes.
+
+properties:
+ compatible:
+ enum:
+ - renesas,r8a779a0-dsi-csi2-tx # for V3U
+ - renesas,r8a779g0-dsi-csi2-tx # for V4H
+ - renesas,r8a779h0-dsi-csi2-tx # for V4M
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Functional clock
+ - description: DSI (and CSI-2) functional clock
+ - description: PLL reference clock
+
+ clock-names:
+ items:
+ - const: fck
+ - const: dsi
+ - const: pll
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DSI/CSI-2 output port
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ minItems: 1
+ maxItems: 4
+
+ required:
+ - data-lanes
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - power-domains
+ - resets
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a779a0-cpg-mssr.h>
+ #include <dt-bindings/power/r8a779a0-sysc.h>
+
+ dsi0: dsi-encoder@fed80000 {
+ compatible = "renesas,r8a779a0-dsi-csi2-tx";
+ reg = <0xfed80000 0x10000>;
+ power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
+ clocks = <&cpg CPG_MOD 415>,
+ <&cpg CPG_CORE R8A779A0_CLK_DSI>,
+ <&cpg CPG_CORE R8A779A0_CLK_CP>;
+ clock-names = "fck", "dsi", "pll";
+ resets = <&cpg 415>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi0_in: endpoint {
+ remote-endpoint = <&du_out_dsi0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi0_out: endpoint {
+ data-lanes = <1 2>;
+ remote-endpoint = <&sn65dsi86_in>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml
new file mode 100644
index 000000000000..5a99d9b9635e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/renesas,dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L MIPI DSI Encoder
+
+maintainers:
+ - Biju Das <biju.das.jz@bp.renesas.com>
+
+description: |
+ This binding describes the MIPI DSI encoder embedded in the Renesas
+ RZ/G2L alike family of SoC's. The encoder can operate in DSI mode, with
+ up to four data lanes.
+
+allOf:
+ - $ref: /schemas/display/dsi-controller.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r9a07g044-mipi-dsi # RZ/G2{L,LC}
+ - renesas,r9a07g054-mipi-dsi # RZ/V2L
+ - const: renesas,rzg2l-mipi-dsi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Sequence operation channel 0 interrupt
+ - description: Sequence operation channel 1 interrupt
+ - description: Video-Input operation channel 1 interrupt
+ - description: DSI Packet Receive interrupt
+ - description: DSI Fatal Error interrupt
+ - description: DSI D-PHY PPI interrupt
+ - description: Debug interrupt
+
+ interrupt-names:
+ items:
+ - const: seq0
+ - const: seq1
+ - const: vin1
+ - const: rcv
+ - const: ferr
+ - const: ppi
+ - const: debug
+
+ clocks:
+ items:
+ - description: DSI D-PHY PLL multiplied clock
+ - description: DSI D-PHY system clock
+ - description: DSI AXI bus clock
+ - description: DSI Register access clock
+ - description: DSI Video clock
+ - description: DSI D-PHY Escape mode transmit clock
+
+ clock-names:
+ items:
+ - const: pllclk
+ - const: sysclk
+ - const: aclk
+ - const: pclk
+ - const: vclk
+ - const: lpclk
+
+ resets:
+ items:
+ - description: MIPI_DSI_CMN_RSTB
+ - description: MIPI_DSI_ARESET_N
+ - description: MIPI_DSI_PRESET_N
+
+ reset-names:
+ items:
+ - const: rst
+ - const: arst
+ - const: prst
+
+ power-domains:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DSI output port
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ required:
+ - data-lanes
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+ - power-domains
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r9a07g044-cpg.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ dsi0: dsi@10850000 {
+ compatible = "renesas,r9a07g044-mipi-dsi", "renesas,rzg2l-mipi-dsi";
+ reg = <0x10850000 0x20000>;
+ interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "seq0", "seq1", "vin1", "rcv",
+ "ferr", "ppi", "debug";
+ clocks = <&cpg CPG_MOD R9A07G044_MIPI_DSI_PLLCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_SYSCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_ACLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_PCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_VCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_LPCLK>;
+ clock-names = "pllclk", "sysclk", "aclk", "pclk", "vclk", "lpclk";
+ resets = <&cpg R9A07G044_MIPI_DSI_CMN_RSTB>,
+ <&cpg R9A07G044_MIPI_DSI_ARESET_N>,
+ <&cpg R9A07G044_MIPI_DSI_PRESET_N>;
+ reset-names = "rst", "arst", "prst";
+ power-domains = <&cpg>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi0_in: endpoint {
+ remote-endpoint = <&du_out_dsi0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi0_out: endpoint {
+ data-lanes = <1 2 3 4>;
+ remote-endpoint = <&adv7535_in>;
+ };
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ dsi1: dsi@10860000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "renesas,r9a07g044-mipi-dsi", "renesas,rzg2l-mipi-dsi";
+ reg = <0x10860000 0x20000>;
+ interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "seq0", "seq1", "vin1", "rcv",
+ "ferr", "ppi", "debug";
+ clocks = <&cpg CPG_MOD R9A07G044_MIPI_DSI_PLLCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_SYSCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_ACLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_PCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_VCLK>,
+ <&cpg CPG_MOD R9A07G044_MIPI_DSI_LPCLK>;
+ clock-names = "pllclk", "sysclk", "aclk", "pclk", "vclk", "lpclk";
+ resets = <&cpg R9A07G044_MIPI_DSI_CMN_RSTB>,
+ <&cpg R9A07G044_MIPI_DSI_ARESET_N>,
+ <&cpg R9A07G044_MIPI_DSI_PRESET_N>;
+ reset-names = "rst", "arst", "prst";
+ power-domains = <&cpg>;
+
+ panel@0 {
+ compatible = "rocktech,jh057n00900";
+ reg = <0>;
+ vcc-supply = <&reg_2v8_p>;
+ iovcc-supply = <&reg_1v8_p>;
+ reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&dsi1_out>;
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dsi1_in: endpoint {
+ remote-endpoint = <&du_out_dsi1>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi1_out: endpoint {
+ data-lanes = <1 2 3 4>;
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
deleted file mode 100644
index a41d280c3f9f..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Renesas Gen3 DWC HDMI TX Encoder
-================================
-
-The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
-with a companion PHY IP.
-
-These DT bindings follow the Synopsys DWC HDMI TX bindings defined in
-Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt with the
-following device-specific properties.
-
-
-Required properties:
-
-- compatible : Shall contain one or more of
- - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
- - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
- - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
- - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
-
- When compatible with generic versions, nodes must list the SoC-specific
- version corresponding to the platform first, followed by the
- family-specific version.
-
-- reg: See dw_hdmi.txt.
-- interrupts: HDMI interrupt number
-- clocks: See dw_hdmi.txt.
-- clock-names: Shall contain "iahb" and "isfr" as defined in dw_hdmi.txt.
-- ports: See dw_hdmi.txt. The DWC HDMI shall have one port numbered 0
- corresponding to the video input of the controller and one port numbered 1
- corresponding to its HDMI output, and one port numbered 2 corresponding to
- sound input of the controller. Each port shall have a single endpoint.
-
-Optional properties:
-
-- power-domains: Shall reference the power domain that contains the DWC HDMI,
- if any.
-
-
-Example:
-
- hdmi0: hdmi@fead0000 {
- compatible = "renesas,r8a7795-dw-hdmi";
- reg = <0 0xfead0000 0 0x10000>;
- interrupts = <0 389 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cpg CPG_CORE R8A7795_CLK_S0D4>, <&cpg CPG_MOD 729>;
- clock-names = "iahb", "isfr";
- power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@0 {
- reg = <0>;
- dw_hdmi0_in: endpoint {
- remote-endpoint = <&du_out_hdmi0>;
- };
- };
- port@1 {
- reg = <1>;
- rcar_dw_hdmi0_out: endpoint {
- remote-endpoint = <&hdmi0_con>;
- };
- };
- port@2 {
- reg = <2>;
- rcar_dw_hdmi0_sound_in: endpoint {
- remote-endpoint = <&hdmi_sound_out>;
- };
- };
- };
- };
-
- hdmi0-out {
- compatible = "hdmi-connector";
- label = "HDMI0 OUT";
- type = "a";
-
- port {
- hdmi0_con: endpoint {
- remote-endpoint = <&rcar_dw_hdmi0_out>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml
new file mode 100644
index 000000000000..e3ec697f89e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/renesas,dw-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car DWC HDMI TX Encoder
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
+ with a companion PHY IP.
+
+allOf:
+ - $ref: synopsys,dw-hdmi.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r8a774a1-hdmi # for RZ/G2M compatible HDMI TX
+ - renesas,r8a774b1-hdmi # for RZ/G2N compatible HDMI TX
+ - renesas,r8a774e1-hdmi # for RZ/G2H compatible HDMI TX
+ - renesas,r8a7795-hdmi # for R-Car H3 compatible HDMI TX
+ - renesas,r8a7796-hdmi # for R-Car M3-W compatible HDMI TX
+ - renesas,r8a77961-hdmi # for R-Car M3-W+ compatible HDMI TX
+ - renesas,r8a77965-hdmi # for R-Car M3-N compatible HDMI TX
+ - const: renesas,rcar-gen3-hdmi
+
+ reg-io-width:
+ const: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel RGB input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: HDMI output port
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Sound input port
+
+ required:
+ - port@0
+ - port@1
+ - port@2
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - resets
+ - interrupts
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/r8a7795-sysc.h>
+
+ hdmi@fead0000 {
+ compatible = "renesas,r8a7795-hdmi", "renesas,rcar-gen3-hdmi";
+ reg = <0xfead0000 0x10000>;
+ interrupts = <0 389 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_CORE R8A7795_CLK_S0D4>, <&cpg CPG_MOD 729>;
+ clock-names = "iahb", "isfr";
+ power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+ resets = <&cpg 729>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ dw_hdmi0_in: endpoint {
+ remote-endpoint = <&du_out_hdmi0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ rcar_dw_hdmi0_out: endpoint {
+ remote-endpoint = <&hdmi0_con>;
+ };
+ };
+ port@2 {
+ reg = <2>;
+ rcar_dw_hdmi0_sound_in: endpoint {
+ remote-endpoint = <&hdmi_sound_out>;
+ };
+ };
+ };
+ };
+
+ hdmi0-out {
+ compatible = "hdmi-connector";
+ label = "HDMI0 OUT";
+ type = "a";
+
+ port {
+ hdmi0_con: endpoint {
+ remote-endpoint = <&rcar_dw_hdmi0_out>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
deleted file mode 100644
index 3aeb0ec06fd0..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-Renesas R-Car LVDS Encoder
-==========================
-
-These DT bindings describe the LVDS encoder embedded in the Renesas R-Car
-Gen2, R-Car Gen3 and RZ/G SoCs.
-
-Required properties:
-
-- compatible : Shall contain one of
- - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
- - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
- - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
- - "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders
- - "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
- - "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
- - "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
- - "renesas,r8a77980-lvds" for R8A77980 (R-Car V3H) compatible LVDS encoders
- - "renesas,r8a77990-lvds" for R8A77990 (R-Car E3) compatible LVDS encoders
- - "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders
-
-- reg: Base address and length for the memory-mapped registers
-- clocks: A list of phandles + clock-specifier pairs, one for each entry in
- the clock-names property.
-- clock-names: Name of the clocks. This property is model-dependent.
- - The functional clock, which mandatory for all models, shall be listed
- first, and shall be named "fck".
- - On R8A77990 and R8A77995, the LVDS encoder can use the EXTAL or
- DU_DOTCLKINx clocks. Those clocks are optional. When supplied they must be
- named "extal" and "dclkin.x" respectively, with "x" being the DU_DOTCLKIN
- numerical index.
- - When the clocks property only contains the functional clock, the
- clock-names property may be omitted.
-- resets: A phandle + reset specifier for the module reset
-
-Required nodes:
-
-The LVDS encoder has two video ports. Their connections are modelled using the
-OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 corresponds to the parallel RGB input
-- Video port 1 corresponds to the LVDS output
-
-Each port shall have a single endpoint.
-
-
-Example:
-
- lvds0: lvds@feb90000 {
- compatible = "renesas,r8a7790-lvds";
- reg = <0 0xfeb90000 0 0x1c>;
- clocks = <&cpg CPG_MOD 726>;
- resets = <&cpg 726>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- lvds0_in: endpoint {
- remote-endpoint = <&du_out_lvds0>;
- };
- };
- port@1 {
- reg = <1>;
- lvds0_out: endpoint {
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
new file mode 100644
index 000000000000..bb9dbfb9beaf
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
@@ -0,0 +1,235 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/renesas,lvds.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car LVDS Encoder
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ These DT bindings describe the LVDS encoder embedded in the Renesas R-Car
+ Gen2, R-Car Gen3, RZ/G1 and RZ/G2 SoCs.
+
+properties:
+ compatible:
+ enum:
+ - renesas,r8a7742-lvds # for RZ/G1H compatible LVDS encoders
+ - renesas,r8a7743-lvds # for RZ/G1M compatible LVDS encoders
+ - renesas,r8a7744-lvds # for RZ/G1N compatible LVDS encoders
+ - renesas,r8a774a1-lvds # for RZ/G2M compatible LVDS encoders
+ - renesas,r8a774b1-lvds # for RZ/G2N compatible LVDS encoders
+ - renesas,r8a774c0-lvds # for RZ/G2E compatible LVDS encoders
+ - renesas,r8a774e1-lvds # for RZ/G2H compatible LVDS encoders
+ - renesas,r8a7790-lvds # for R-Car H2 compatible LVDS encoders
+ - renesas,r8a7791-lvds # for R-Car M2-W compatible LVDS encoders
+ - renesas,r8a7793-lvds # for R-Car M2-N compatible LVDS encoders
+ - renesas,r8a7795-lvds # for R-Car H3 compatible LVDS encoders
+ - renesas,r8a7796-lvds # for R-Car M3-W compatible LVDS encoders
+ - renesas,r8a77961-lvds # for R-Car M3-W+ compatible LVDS encoders
+ - renesas,r8a77965-lvds # for R-Car M3-N compatible LVDS encoders
+ - renesas,r8a77970-lvds # for R-Car V3M compatible LVDS encoders
+ - renesas,r8a77980-lvds # for R-Car V3H compatible LVDS encoders
+ - renesas,r8a77990-lvds # for R-Car E3 compatible LVDS encoders
+ - renesas,r8a77995-lvds # for R-Car D3 compatible LVDS encoders
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+
+ clock-names:
+ minItems: 1
+ maxItems: 4
+
+ resets:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Parallel RGB input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: LVDS output port
+
+ required:
+ - port@0
+ - port@1
+
+ power-domains:
+ maxItems: 1
+
+ renesas,companion:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the companion LVDS encoder. This property is mandatory
+ for the first LVDS encoder on R-Car D3 and E3, and RZ/G2E SoCs, and shall
+ point to the second encoder to be used as a companion in dual-link mode.
+ It shall not be set for any other LVDS encoder.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - power-domains
+ - resets
+ - ports
+
+if:
+ properties:
+ compatible:
+ enum:
+ - renesas,r8a774c0-lvds
+ - renesas,r8a77990-lvds
+ - renesas,r8a77995-lvds
+then:
+ properties:
+ clocks:
+ minItems: 1
+ items:
+ - description: Functional clock
+ - description: EXTAL input clock
+ - description: DU_DOTCLKIN0 input clock
+ - description: DU_DOTCLKIN1 input clock
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: fck
+ # The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks.
+ # These clocks are optional.
+ - enum:
+ - extal
+ - dclkin.0
+ - dclkin.1
+ - enum:
+ - extal
+ - dclkin.0
+ - dclkin.1
+ - enum:
+ - extal
+ - dclkin.0
+ - dclkin.1
+
+ required:
+ - clock-names
+
+else:
+ properties:
+ clocks:
+ items:
+ - description: Functional clock
+
+ clock-names:
+ items:
+ - const: fck
+
+ renesas,companion: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/renesas-cpg-mssr.h>
+ #include <dt-bindings/power/r8a7795-sysc.h>
+
+ lvds@feb90000 {
+ compatible = "renesas,r8a7795-lvds";
+ reg = <0xfeb90000 0x14>;
+ clocks = <&cpg CPG_MOD 727>;
+ power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+ resets = <&cpg 727>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds_in: endpoint {
+ remote-endpoint = <&du_out_lvds0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/clock/renesas-cpg-mssr.h>
+ #include <dt-bindings/power/r8a77990-sysc.h>
+
+ lvds0: lvds@feb90000 {
+ compatible = "renesas,r8a77990-lvds";
+ reg = <0xfeb90000 0x20>;
+ clocks = <&cpg CPG_MOD 727>,
+ <&x13_clk>,
+ <&extal_clk>;
+ clock-names = "fck", "dclkin.0", "extal";
+ power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+ resets = <&cpg 727>;
+
+ renesas,companion = <&lvds1>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds0_in: endpoint {
+ remote-endpoint = <&du_out_lvds0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds0_out: endpoint {
+ remote-endpoint = <&panel_in1>;
+ };
+ };
+ };
+ };
+
+ lvds1: lvds@feb90100 {
+ compatible = "renesas,r8a77990-lvds";
+ reg = <0xfeb90100 0x20>;
+ clocks = <&cpg CPG_MOD 727>,
+ <&x13_clk>,
+ <&extal_clk>;
+ clock-names = "fck", "dclkin.0", "extal";
+ power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+ resets = <&cpg 726>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds1_in: endpoint {
+ remote-endpoint = <&du_out_lvds1>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds1_out: endpoint {
+ remote-endpoint = <&panel_in2>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
new file mode 100644
index 000000000000..ad279f0993fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
@@ -0,0 +1,309 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/samsung,mipi-dsim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung MIPI DSIM bridge controller
+
+maintainers:
+ - Inki Dae <inki.dae@samsung.com>
+ - Jagan Teki <jagan@amarulasolutions.com>
+ - Marek Szyprowski <m.szyprowski@samsung.com>
+
+description: |
+ Samsung MIPI DSIM bridge controller can be found it on Exynos
+ and i.MX8M Mini/Nano/Plus SoC's.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - samsung,exynos3250-mipi-dsi
+ - samsung,exynos4210-mipi-dsi
+ - samsung,exynos5410-mipi-dsi
+ - samsung,exynos5422-mipi-dsi
+ - samsung,exynos5433-mipi-dsi
+ - samsung,exynos7870-mipi-dsi
+ - fsl,imx8mm-mipi-dsim
+ - fsl,imx8mp-mipi-dsim
+ - items:
+ - enum:
+ - fsl,imx7d-mipi-dsim
+ - fsl,imx8mn-mipi-dsim
+ - const: fsl,imx8mm-mipi-dsim
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ clocks:
+ minItems: 2
+ maxItems: 5
+
+ clock-names:
+ minItems: 2
+ maxItems: 5
+
+ samsung,phy-type:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: phandle to the samsung phy-type
+
+ power-domains:
+ maxItems: 1
+
+ samsung,power-domain:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to the associated samsung power domain
+
+ vddcore-supply:
+ description: MIPI DSIM Core voltage supply (e.g. 1.1V)
+
+ vddio-supply:
+ description: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V)
+
+ samsung,burst-clock-frequency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ DSIM high speed burst mode frequency. If absent,
+ the pixel clock from the attached device or bridge
+ will be used instead.
+
+ samsung,esc-clock-frequency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ DSIM escape mode frequency.
+
+ samsung,pll-clock-frequency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ DSIM oscillator clock frequency. If absent, the clock frequency
+ of sclk_mipi will be used instead.
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ const: dsim
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Input port node to receive pixel data from the
+ display controller. Exactly one endpoint must be
+ specified.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ DSI output port node to the panel or the next bridge
+ in the chain.
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ minItems: 1
+ maxItems: 4
+ uniqueItems: true
+ items:
+ enum: [ 1, 2, 3, 4 ]
+
+ lane-polarities:
+ minItems: 1
+ maxItems: 5
+ description:
+ The Samsung MIPI DSI IP requires that all the data lanes have
+ the same polarity.
+
+ dependencies:
+ lane-polarities: [data-lanes]
+
+required:
+ - clock-names
+ - clocks
+ - compatible
+ - interrupts
+ - reg
+ - samsung,esc-clock-frequency
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos7870-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+
+ clock-names:
+ items:
+ - const: bus
+ - const: pll
+ - const: byte
+ - const: esc
+
+ ports:
+ required:
+ - port@0
+
+ required:
+ - ports
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 5
+
+ clock-names:
+ items:
+ - const: bus_clk
+ - const: phyclk_mipidphy0_bitclkdiv8
+ - const: phyclk_mipidphy0_rxclkesc0
+ - const: sclk_rgb_vclk_to_dsim0
+ - const: sclk_mipi
+
+ ports:
+ required:
+ - port@0
+
+ required:
+ - ports
+ - vddcore-supply
+ - vddio-supply
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5410-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bus_clk
+ - const: pll_clk
+
+ required:
+ - vddcore-supply
+ - vddio-supply
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos4210-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bus_clk
+ - const: sclk_mipi
+
+ required:
+ - vddcore-supply
+ - vddio-supply
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos3250-mipi-dsi
+
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ items:
+ - const: bus_clk
+ - const: pll_clk
+
+ required:
+ - vddcore-supply
+ - vddio-supply
+ - samsung,phy-type
+
+additionalProperties:
+ type: object
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5433.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ dsi@13900000 {
+ compatible = "samsung,exynos5433-mipi-dsi";
+ reg = <0x13900000 0xC0>;
+ interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&mipi_phy 1>;
+ phy-names = "dsim";
+ clocks = <&cmu_disp CLK_PCLK_DSIM0>,
+ <&cmu_disp CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8>,
+ <&cmu_disp CLK_PHYCLK_MIPIDPHY0_RXCLKESC0>,
+ <&cmu_disp CLK_SCLK_RGB_VCLK_TO_DSIM0>,
+ <&cmu_disp CLK_SCLK_DSIM0>;
+ clock-names = "bus_clk",
+ "phyclk_mipidphy0_bitclkdiv8",
+ "phyclk_mipidphy0_rxclkesc0",
+ "sclk_rgb_vclk_to_dsim0",
+ "sclk_mipi";
+ power-domains = <&pd_disp>;
+ vddcore-supply = <&ldo6_reg>;
+ vddio-supply = <&ldo7_reg>;
+ samsung,burst-clock-frequency = <512000000>;
+ samsung,esc-clock-frequency = <16000000>;
+ samsung,pll-clock-frequency = <24000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&te_irq>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ dsi_to_mic: endpoint {
+ remote-endpoint = <&mic_to_dsi>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/sii902x.txt b/Documentation/devicetree/bindings/display/bridge/sii902x.txt
deleted file mode 100644
index 72d2dc6c3e6b..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-sii902x HDMI bridge bindings
-
-Required properties:
- - compatible: "sil,sii9022"
- - reg: i2c address of the bridge
-
-Optional properties:
- - interrupts: describe the interrupt line used to inform the host
- about hotplug events.
- - reset-gpios: OF device-tree gpio specification for RST_N pin.
-
-Optional subnodes:
- - video input: this subnode can contain a video input port node
- to connect the bridge to a display controller output (See this
- documentation [1]).
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
- hdmi-bridge@39 {
- compatible = "sil,sii9022";
- reg = <0x39>;
- reset-gpios = <&pioA 1 0>;
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- bridge_in: endpoint {
- remote-endpoint = <&dc_out>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/sii9234.txt b/Documentation/devicetree/bindings/display/bridge/sii9234.txt
deleted file mode 100644
index a55bf77bd960..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/sii9234.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Silicon Image SiI9234 HDMI/MHL bridge bindings
-
-Required properties:
- - compatible : "sil,sii9234".
- - reg : I2C address for TPI interface, use 0x39
- - avcc33-supply : MHL/USB Switch Supply Voltage (3.3V)
- - iovcc18-supply : I/O Supply Voltage (1.8V)
- - avcc12-supply : TMDS Analog Supply Voltage (1.2V)
- - cvcc12-supply : Digital Core Supply Voltage (1.2V)
- - interrupts: interrupt specifier of INT pin
- - reset-gpios: gpio specifier of RESET pin (active low)
- - video interfaces: Device node can contain two video interface port
- nodes for HDMI encoder and connector according to [1].
- - port@0 - MHL to HDMI
- - port@1 - MHL to connector
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-
-Example:
- sii9234@39 {
- compatible = "sil,sii9234";
- reg = <0x39>;
- avcc33-supply = <&vcc33mhl>;
- iovcc18-supply = <&vcc18mhl>;
- avcc12-supply = <&vsil12>;
- cvcc12-supply = <&vsil12>;
- reset-gpios = <&gpf3 4 GPIO_ACTIVE_LOW>;
- interrupt-parent = <&gpf3>;
- interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- mhl_to_hdmi: endpoint {
- remote-endpoint = <&hdmi_to_mhl>;
- };
- };
- port@1 {
- reg = <1>;
- mhl_to_connector: endpoint {
- remote-endpoint = <&connector_to_mhl>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/sil,sii8620.yaml b/Documentation/devicetree/bindings/display/bridge/sil,sii8620.yaml
new file mode 100644
index 000000000000..6d1a36b76fcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/sil,sii8620.yaml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/sil,sii8620.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Image SiI8620 HDMI/MHL bridge
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+properties:
+ compatible:
+ const: sil,sii8620
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: xtal
+
+ cvcc10-supply:
+ description: Digital Core Supply Voltage (1.0V)
+
+ interrupts:
+ maxItems: 1
+
+ iovcc18-supply:
+ description: I/O Supply Voltage (1.8V)
+
+ reset-gpios:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ unevaluatedProperties: false
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for HDMI (encoder) input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ MHL to connector port
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - cvcc10-supply
+ - interrupts
+ - iovcc18-supply
+ - reset-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@39 {
+ reg = <0x39>;
+ compatible = "sil,sii8620";
+ cvcc10-supply = <&ldo36_reg>;
+ iovcc18-supply = <&ldo34_reg>;
+ interrupt-parent = <&gpf0>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ reset-gpios = <&gpv7 0 GPIO_ACTIVE_LOW>;
+ clocks = <&pmu_system_controller 0>;
+ clock-names = "xtal";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ mhl_to_hdmi: endpoint {
+ remote-endpoint = <&hdmi_to_mhl>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ mhl_to_musb_con: endpoint {
+ remote-endpoint = <&musb_con_to_mhl>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/sil,sii9022.yaml b/Documentation/devicetree/bindings/display/bridge/sil,sii9022.yaml
new file mode 100644
index 000000000000..17ea06719b56
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/sil,sii9022.yaml
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/sil,sii9022.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Image sii902x HDMI bridge
+
+maintainers:
+ - Boris Brezillon <bbrezillon@kernel.org>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - sil,sii9022-cpi # CEC Programming Interface
+ - sil,sii9022-tpi # Transmitter Programming Interface
+ - const: sil,sii9022
+ - const: sil,sii9022
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+ description: Interrupt line used to inform the host about hotplug events.
+
+ reset-gpios:
+ maxItems: 1
+
+ iovcc-supply:
+ description: I/O Supply Voltage (1.8V or 3.3V)
+
+ cvcc12-supply:
+ description: Digital Core Supply Voltage (1.2V)
+
+ '#sound-dai-cells':
+ enum: [ 0, 1 ]
+ description: |
+ <0> if only I2S or S/PDIF pin is wired,
+ <1> if both are wired.
+ HDMI audio is configured only if this property is found.
+ If HDMI audio is configured, the sii902x device becomes an I2S and/or
+ S/PDIF audio codec component (e.g. a digital audio sink), that can be
+ used in configuring full audio devices with simple-card or
+ audio-graph-card bindings. See their binding documents on how to describe
+ the way the
+ sii902x device is connected to the rest of the audio system:
+ Documentation/devicetree/bindings/sound/simple-card.yaml
+ Documentation/devicetree/bindings/sound/audio-graph-card.yaml
+ Note: In case of the audio-graph-card binding the used port index should
+ be 3.
+
+ sil,i2s-data-lanes:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 4
+ uniqueItems: true
+ items:
+ enum: [ 0, 1, 2, 3 ]
+ description:
+ Each integer indicates which I2S pin is connected to which audio FIFO.
+ The first integer selects the I2S audio pin for the first audio FIFO#0
+ (HDMI channels 1&2), the second for FIFO#1 (HDMI channels 3&4), and so
+ on. There are 4 FIFOs and 4 I2S pins (SD0 - SD3). Any I2S pin can be
+ connected to any FIFO, but there can be no gaps. E.g. an I2S pin must be
+ mapped to FIFO#0 and FIFO#1 before mapping a channel to FIFO#2. The
+ default value is <0>, describing SD0 pin being routed to HDMI audio
+ FIFO#0.
+
+ clocks:
+ maxItems: 1
+ description: MCLK input. MCLK can be used to produce HDMI audio CTS values.
+
+ clock-names:
+ const: mclk
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ unevaluatedProperties: false
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ description: Parallel RGB input port
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ bus-width:
+ description:
+ Endpoint bus width.
+ enum: [ 16, 18, 24 ]
+ default: 24
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: HDMI output port
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Sound input port
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ hdmi-bridge@39 {
+ compatible = "sil,sii9022";
+ reg = <0x39>;
+ reset-gpios = <&pioA 1 0>;
+ iovcc-supply = <&v3v3_hdmi>;
+ cvcc12-supply = <&v1v2_hdmi>;
+
+ #sound-dai-cells = <0>;
+ sil,i2s-data-lanes = < 0 1 2 >;
+ clocks = <&mclk>;
+ clock-names = "mclk";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ bridge_in: endpoint {
+ remote-endpoint = <&dc_out>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/sil,sii9234.yaml b/Documentation/devicetree/bindings/display/bridge/sil,sii9234.yaml
new file mode 100644
index 000000000000..176181d25530
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/sil,sii9234.yaml
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/sil,sii9234.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Image SiI9234 HDMI/MHL bridge
+
+maintainers:
+ - Maciej Purski <m.purski@samsung.com>
+
+properties:
+ compatible:
+ const: sil,sii9234
+
+ reg:
+ description: I2C address for TPI interface
+ maxItems: 1
+
+ avcc12-supply:
+ description: TMDS Analog Supply Voltage, 1.2V
+
+ avcc33-supply:
+ description: MHL/USB Switch Supply Voltage, 3.3V
+
+ cvcc12-supply:
+ description: Digital Core Supply Voltage, 1.2V
+
+ iovcc18-supply:
+ description: I/O voltage supply, 1.8V
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ description: GPIO connected to the reset pin.
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for HDMI (encoder) input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ MHL to connector port
+
+ required:
+ - port@0
+
+required:
+ - compatible
+ - reg
+ - avcc12-supply
+ - avcc33-supply
+ - cvcc12-supply
+ - iovcc18-supply
+ - interrupts
+ - reset-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@39 {
+ compatible = "sil,sii9234";
+ reg = <0x39>;
+ avcc12-supply = <&vsil12>;
+ avcc33-supply = <&vcc33mhl>;
+ cvcc12-supply = <&vsil12>;
+ iovcc18-supply = <&vcc18mhl>;
+ interrupt-parent = <&gpf3>;
+ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+ reset-gpios = <&gpf3 4 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ mhl_to_hdmi: endpoint {
+ remote-endpoint = <&hdmi_to_mhl>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ mhl_to_connector: endpoint {
+ remote-endpoint = <&connector_to_mhl>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/sil-sii8620.txt b/Documentation/devicetree/bindings/display/bridge/sil-sii8620.txt
deleted file mode 100644
index b05052f7d62f..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/sil-sii8620.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Silicon Image SiI8620 HDMI/MHL bridge bindings
-
-Required properties:
- - compatible: "sil,sii8620"
- - reg: i2c address of the bridge
- - cvcc10-supply: Digital Core Supply Voltage (1.0V)
- - iovcc18-supply: I/O Supply Voltage (1.8V)
- - interrupts: interrupt specifier of INT pin
- - reset-gpios: gpio specifier of RESET pin
- - clocks, clock-names: specification and name of "xtal" clock
- - video interfaces: Device node can contain video interface port
- node for HDMI encoder according to [1].
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
- sii8620@39 {
- reg = <0x39>;
- compatible = "sil,sii8620";
- cvcc10-supply = <&ldo36_reg>;
- iovcc18-supply = <&ldo34_reg>;
- interrupt-parent = <&gpf0>;
- interrupts = <2 0>;
- reset-gpio = <&gpv7 0 0>;
- clocks = <&pmu_system_controller 0>;
- clock-names = "xtal";
-
- port {
- mhl_to_hdmi: endpoint {
- remote-endpoint = <&hdmi_to_mhl>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
new file mode 100644
index 000000000000..9ef587d46506
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/simple-bridge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Transparent non-programmable DRM bridges
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+ - Maxime Ripard <mripard@kernel.org>
+
+description: |
+ This binding supports transparent non-programmable bridges that don't require
+ any configuration, with a single input and a single output.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,ths8134a
+ - ti,ths8134b
+ - const: ti,ths8134
+ - items:
+ - const: corpro,gm7123
+ - const: adi,adv7123
+ - enum:
+ - adi,adv7123
+ - dumb-vga-dac
+ - radxa,ra620
+ - realtek,rtd2171
+ - ti,opa362
+ - ti,ths8134
+ - ti,ths8135
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The bridge input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The bridge output
+
+ required:
+ - port@0
+ - port@1
+
+ enable-gpios:
+ maxItems: 1
+ description: GPIO controlling bridge enable
+
+ vdd-supply:
+ description: Power supply for the bridge
+
+required:
+ - compatible
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ bridge {
+ compatible = "ti,ths8134a", "ti,ths8134";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ vga_bridge_in: endpoint {
+ remote-endpoint = <&tcon0_out_vga>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ vga_bridge_out: endpoint {
+ remote-endpoint = <&vga_con_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
new file mode 100644
index 000000000000..8747b95ec20d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/snps,dw-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare MIPI DSI host controller
+
+maintainers:
+ - Philippe CORNU <philippe.cornu@foss.st.com>
+
+description: |
+ This document defines device tree properties for the Synopsys DesignWare MIPI
+ DSI host controller. It doesn't constitute a device tree binding specification
+ by itself but is meant to be referenced by platform-specific device tree
+ bindings.
+
+ When referenced from platform device tree bindings the properties defined in
+ this document are defined as follows. The platform device tree bindings are
+ responsible for defining whether each property is required or optional.
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+
+properties:
+ reg:
+ maxItems: 1
+
+ clocks: true
+
+ clock-names: true
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: apb
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input node to receive pixel data.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DSI output node to panel.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - clock-names
+ - clocks
+ - ports
+ - reg
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/display/bridge/solomon,ssd2825.yaml b/Documentation/devicetree/bindings/display/bridge/solomon,ssd2825.yaml
new file mode 100644
index 000000000000..e2d293d623b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/solomon,ssd2825.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/solomon,ssd2825.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Solomon SSD2825 RGB to MIPI-DSI bridge
+
+maintainers:
+ - Svyatoslav Ryhel <clamor95@gmail.com>
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+ compatible:
+ const: solomon,ssd2825
+
+ reg:
+ maxItems: 1
+
+ reset-gpios: true
+
+ dvdd-supply:
+ description: Regulator for 1.2V digital power supply.
+
+ avdd-supply:
+ description: Regulator for 1.2V analog power supply.
+
+ vddio-supply:
+ description: Regulator for 1.8V IO power supply.
+
+ spi-max-frequency:
+ maximum: 1000000
+
+ spi-cpha: true
+ spi-cpol: true
+
+ clocks:
+ maxItems: 1
+ description: Reference TX_CLK used before PLL is locked.
+
+ solomon,hs-zero-delay-ns:
+ description:
+ HS zero delay period
+ minimum: 0
+ maximum: 1700
+ default: 133
+
+ solomon,hs-prep-delay-ns:
+ description:
+ HS prep delay period
+ minimum: 0
+ maximum: 1728
+ default: 40
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Video port for RGB input
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ bus-width:
+ enum: [ 16, 18, 24 ]
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for DSI output (panel or connector)
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dsi@2 {
+ compatible = "solomon,ssd2825";
+ reg = <2>;
+
+ spi-max-frequency = <1000000>;
+
+ spi-cpha;
+ spi-cpol;
+
+ reset-gpios = <&gpio 114 GPIO_ACTIVE_LOW>;
+
+ dvdd-supply = <&vdd_1v2>;
+ avdd-supply = <&vdd_1v2>;
+ vddio-supply = <&vdd_1v8_io>;
+
+ solomon,hs-zero-delay-ns = <300>;
+ solomon,hs-prep-delay-ns = <65>;
+
+ clocks = <&ssd2825_tx_clk>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ bridge_input: endpoint {
+ remote-endpoint = <&dpi_output>;
+ bus-width = <24>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ bridge_output: endpoint {
+ remote-endpoint = <&panel_input>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml
new file mode 100644
index 000000000000..33481381cccc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/synopsys,dw-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for Synopsys DesignWare HDMI TX Controller
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ This document defines device tree properties for the Synopsys DesignWare HDMI
+ TX controller (DWC HDMI TX) IP core. It doesn't constitute a full device tree
+ binding specification by itself but is meant to be referenced by device tree
+ bindings for the platform-specific integrations of the DWC HDMI TX.
+
+ When referenced from platform device tree bindings the properties defined in
+ this document are defined as follows. The platform device tree bindings are
+ responsible for defining whether each property is required or optional.
+
+properties:
+ reg:
+ maxItems: 1
+
+ reg-io-width:
+ description:
+ Width (in bytes) of the registers specified by the reg property.
+ enum: [1, 4]
+ default: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 5
+ items:
+ - description: The bus clock for either AHB and APB
+ - description: The internal register configuration clock
+ additionalItems: true
+
+ clock-names:
+ minItems: 2
+ maxItems: 5
+ items:
+ - const: iahb
+ - const: isfr
+ additionalItems: true
+
+ ddc-i2c-bus:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ deprecated: true
+ description:
+ The HDMI DDC bus can be connected to either a system I2C master or the
+ functionally-reduced I2C master contained in the DWC HDMI. When connected
+ to a system I2C master this property contains a phandle to that I2C
+ master controller.
+
+ This property is deprecated, the system I2C master controller should
+ be referenced through the ddc-i2c-bus property of the HDMI connector
+ node.
+
+ interrupts:
+ maxItems: 1
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/tda998x.txt b/Documentation/devicetree/bindings/display/bridge/tda998x.txt
deleted file mode 100644
index f5a02f61dd36..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/tda998x.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Device-Tree bindings for the NXP TDA998x HDMI transmitter
-
-Required properties;
- - compatible: must be "nxp,tda998x"
-
- - reg: I2C address
-
-Required node:
- - port: Input port node with endpoint definition, as described
- in Documentation/devicetree/bindings/graph.txt
-
-Optional properties:
- - interrupts: interrupt number and trigger type
- default: polling
-
- - pinctrl-0: pin control group to be used for
- screen plug/unplug interrupt.
-
- - pinctrl-names: must contain a "default" entry.
-
- - video-ports: 24 bits value which defines how the video controller
- output is wired to the TDA998x input - default: <0x230145>
-
- - audio-ports: array of 8-bit values, 2 values per one DAI[1].
- The first value defines the DAI type: TDA998x_SPDIF or TDA998x_I2S[2].
- The second value defines the tda998x AP_ENA reg content when the DAI
- in question is used. The implementation allows one or two DAIs. If two
- DAIs are defined, they must be of different type.
-
- - nxp,calib-gpios: calibration GPIO, which must correspond with the
- gpio used for the TDA998x interrupt pin.
-
-[1] Documentation/sound/soc/dai.rst
-[2] include/dt-bindings/display/tda998x.h
-
-Example:
-
-#include <dt-bindings/display/tda998x.h>
-
- tda998x: hdmi-encoder {
- compatible = "nxp,tda998x";
- reg = <0x70>;
- interrupt-parent = <&gpio0>;
- interrupts = <27 2>; /* falling edge */
- pinctrl-0 = <&pmx_camera>;
- pinctrl-names = "default";
- video-ports = <0x230145>;
-
- #sound-dai-cells = <2>;
- /* DAI-format AP_ENA reg value */
- audio-ports = < TDA998x_SPDIF 0x04
- TDA998x_I2S 0x03>;
-
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
deleted file mode 100644
index 37f0c04d5a28..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-Thine Electronics THC63LVD1024 LVDS decoder
--------------------------------------------
-
-The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams
-to parallel data outputs. The chip supports single/dual input/output modes,
-handling up to two LVDS input streams and up to two digital CMOS/TTL outputs.
-
-Single or dual operation mode, output data mapping and DDR output modes are
-configured through input signals and the chip does not expose any control bus.
-
-Required properties:
-- compatible: Shall be "thine,thc63lvd1024"
-- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input,
- PPL and digital circuitry
-
-Optional properties:
-- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low
-- oe-gpios: Output enable GPIO signal, pin name "OE". Active high
-
-The THC63LVD1024 video port connections are modeled according
-to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt
-
-Required video port nodes:
-- port@0: First LVDS input port
-- port@2: First digital CMOS/TTL parallel output
-
-Optional video port nodes:
-- port@1: Second LVDS input port
-- port@3: Second digital CMOS/TTL parallel output
-
-Example:
---------
-
- thc63lvd1024: lvds-decoder {
- compatible = "thine,thc63lvd1024";
-
- vcc-supply = <&reg_lvds_vcc>;
- powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- lvds_dec_in_0: endpoint {
- remote-endpoint = <&lvds_out>;
- };
- };
-
- port@2{
- reg = <2>;
-
- lvds_dec_out_2: endpoint {
- remote-endpoint = <&adv7511_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
new file mode 100644
index 000000000000..8ae382429d2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/thine,thc63lvd1024.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Thine Electronics THC63LVD1024 LVDS Decoder
+
+maintainers:
+ - Jacopo Mondi <jacopo+renesas@jmondi.org>
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+ The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS
+ streams to parallel data outputs. The chip supports single/dual input/output
+ modes, handling up to two LVDS input streams and up to two digital CMOS/TTL
+ outputs.
+
+ Single or dual operation mode, output data mapping and DDR output modes are
+ configured through input signals and the chip does not expose any control
+ bus.
+
+properties:
+ compatible:
+ const: thine,thc63lvd1024
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: |
+ The device can operate in single or dual input and output modes.
+
+ When operating in single input mode, all pixels are received on port@0,
+ and port@1 shall not contain any endpoint. In dual input mode,
+ even-numbered pixels are received on port@0 and odd-numbered pixels on
+ port@1, and both port@0 and port@1 shall contain endpoints.
+
+ When operating in single output mode all pixels are output from the first
+ CMOS/TTL port and port@3 shall not contain any endpoint. In dual output
+ mode pixels are output from both CMOS/TTL ports and both port@2 and
+ port@3 shall contain endpoints.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: First LVDS input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Second LVDS input port
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: First digital CMOS/TTL parallel output
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Second digital CMOS/TTL parallel output
+
+ required:
+ - port@0
+ - port@2
+
+ oe-gpios:
+ maxItems: 1
+ description: Output enable GPIO signal, pin name "OE", active high.
+
+ powerdown-gpios:
+ maxItems: 1
+ description: Power down GPIO signal, pin name "/PDWN", active low.
+
+ vcc-supply:
+ description:
+ Power supply for the TTL output, TTL CLOCKOUT signal, LVDS input, PLL and
+ digital circuitry.
+
+required:
+ - compatible
+ - ports
+ - vcc-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ lvds-decoder {
+ compatible = "thine,thc63lvd1024";
+
+ vcc-supply = <&reg_lvds_vcc>;
+ powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lvds_dec_in_0: endpoint {
+ remote-endpoint = <&lvds_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ lvds_dec_out_2: endpoint {
+ remote-endpoint = <&adv7511_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
deleted file mode 100644
index 527e236e9a2a..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-THine Electronics THC63LVDM83D LVDS serializer
-----------------------------------------------
-
-The THC63LVDM83D is an LVDS serializer designed to support pixel data
-transmission between a host and a flat panel.
-
-Required properties:
-
-- compatible: Should be "thine,thc63lvdm83d"
-
-Optional properties:
-
-- pwdn-gpios: Power down control GPIO
-
-Required nodes:
-
-The THC63LVDM83D has two video ports. Their connections are modeled using the
-OFgraph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for CMOS/TTL input
-- Video port 1 for LVDS output
-
-
-Example
--------
-
- lvds_enc: encoder@0 {
- compatible = "thine,thc63lvdm83d";
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- lvds_enc_in: endpoint@0 {
- remote-endpoint = <&rgb_out>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- lvds_enc_out: endpoint@0 {
- remote-endpoint = <&panel_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,dlpc3433.yaml b/Documentation/devicetree/bindings/display/bridge/ti,dlpc3433.yaml
new file mode 100644
index 000000000000..d3f84d220723
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ti,dlpc3433.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ti,dlpc3433.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI DLPC3433 MIPI DSI to DMD bridge
+
+maintainers:
+ - Jagan Teki <jagan@amarulasolutions.com>
+ - Christopher Vollo <chris@renewoutreach.org>
+
+description: |
+ TI DLPC3433 is a MIPI DSI based display controller bridge
+ for processing high resolution DMD based projectors.
+
+ It has a flexible configuration of MIPI DSI and DPI signal
+ input that produces a DMD output in RGB565, RGB666, RGB888
+ formats.
+
+ It supports upto 720p resolution with 60 and 120 Hz refresh
+ rates.
+
+properties:
+ compatible:
+ const: ti,dlpc3433
+
+ reg:
+ enum:
+ - 0x1b
+ - 0x1d
+
+ enable-gpios:
+ description: PROJ_ON pin, chip powers up PROJ_ON is high.
+
+ vcc_intf-supply:
+ description: A 1.8V/3.3V supply that power the Host I/O.
+
+ vcc_flsh-supply:
+ description: A 1.8V/3.3V supply that power the Flash I/O.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Video port for MIPI DSI input.
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Video port for DMD output.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - enable-gpios
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@1b {
+ compatible = "ti,dlpc3433";
+ reg = <0x1b>;
+ enable-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ bridge_in_dsi: endpoint {
+ remote-endpoint = <&dsi_out_bridge>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ bridge_out_panel: endpoint {
+ remote-endpoint = <&panel_out_bridge>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml
new file mode 100644
index 000000000000..e69b6343a8eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ti,sn65dsi83.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SN65DSI83 and SN65DSI84 DSI to LVDS bridge chip
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ Texas Instruments SN65DSI83 1x Single-link MIPI DSI
+ to 1x Single-link LVDS
+ https://www.ti.com/lit/gpn/sn65dsi83
+ Texas Instruments SN65DSI84 1x Single-link MIPI DSI
+ to 1x Dual-link or 2x Single-link LVDS
+ https://www.ti.com/lit/gpn/sn65dsi84
+
+properties:
+ compatible:
+ enum:
+ - ti,sn65dsi83
+ - ti,sn65dsi84
+
+ reg:
+ enum:
+ - 0x2c
+ - 0x2d
+
+ enable-gpios:
+ maxItems: 1
+ description: GPIO specifier for bridge_en pin (active high).
+
+ vcc-supply:
+ description: A 1.8V power supply (see regulator/regulator.yaml).
+
+ interrupts:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Video port for MIPI DSI Channel-A input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Video port for MIPI DSI Channel-B input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@2:
+ description: Video port for LVDS Channel-A output (panel or bridge).
+ $ref: '#/$defs/lvds-port'
+
+ port@3:
+ description: Video port for LVDS Channel-B output (panel or bridge).
+ $ref: '#/$defs/lvds-port'
+
+ required:
+ - port@0
+ - port@2
+
+required:
+ - compatible
+ - reg
+ - ports
+
+$defs:
+ lvds-port:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ ti,lvds-termination-ohms:
+ description: The value of near end differential termination in ohms.
+ enum: [100, 200]
+ default: 200
+
+ ti,lvds-vod-swing-clock-microvolt:
+ description: LVDS diferential output voltage <min max> for clock
+ lanes in microvolts.
+ maxItems: 2
+
+ ti,lvds-vod-swing-data-microvolt:
+ description: LVDS diferential output voltage <min max> for data
+ lanes in microvolts.
+ maxItems: 2
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,sn65dsi83
+ then:
+ properties:
+ ports:
+ properties:
+ port@1: false
+ port@3: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,sn65dsi84
+ then:
+ properties:
+ ports:
+ properties:
+ port@1: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@2d {
+ compatible = "ti,sn65dsi83";
+ reg = <0x2d>;
+
+ enable-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
+ vcc-supply = <&reg_sn65dsi83_1v8>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&dsi0_out>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ endpoint {
+ remote-endpoint = <&panel_in_lvds>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
deleted file mode 100644
index 0a3fbb53a16e..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-SN65DSI86 DSI to eDP bridge chip
---------------------------------
-
-This is the binding for Texas Instruments SN65DSI86 bridge.
-http://www.ti.com/general/docs/lit/getliterature.tsp?genericPartNumber=sn65dsi86&fileType=pdf
-
-Required properties:
-- compatible: Must be "ti,sn65dsi86"
-- reg: i2c address of the chip, 0x2d as per datasheet
-- enable-gpios: gpio specification for bridge_en pin (active high)
-
-- vccio-supply: A 1.8V supply that powers up the digital IOs.
-- vpll-supply: A 1.8V supply that powers up the displayport PLL.
-- vcca-supply: A 1.2V supply that powers up the analog circuits.
-- vcc-supply: A 1.2V supply that powers up the digital core.
-
-Optional properties:
-- interrupts-extended: Specifier for the SN65DSI86 interrupt line.
-
-- gpio-controller: Marks the device has a GPIO controller.
-- #gpio-cells : Should be two. The first cell is the pin number and
- the second cell is used to specify flags.
- See ../../gpio/gpio.txt for more information.
-- #pwm-cells : Should be one. See ../../pwm/pwm.txt for description of
- the cell formats.
-
-- clock-names: should be "refclk"
-- clocks: Specification for input reference clock. The reference
- clock rate must be 12 MHz, 19.2 MHz, 26 MHz, 27 MHz or 38.4 MHz.
-
-- data-lanes: See ../../media/video-interface.txt
-- lane-polarities: See ../../media/video-interface.txt
-
-- suspend-gpios: specification for GPIO1 pin on bridge (active low)
-
-Required nodes:
-This device has two video ports. Their connections are modelled using the
-OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for DSI input
-- Video port 1 for eDP output
-
-Example
--------
-
-edp-bridge@2d {
- compatible = "ti,sn65dsi86";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x2d>;
-
- enable-gpios = <&msmgpio 33 GPIO_ACTIVE_HIGH>;
- suspend-gpios = <&msmgpio 34 GPIO_ACTIVE_LOW>;
-
- interrupts-extended = <&gpio3 4 IRQ_TYPE_EDGE_FALLING>;
-
- vccio-supply = <&pm8916_l17>;
- vcca-supply = <&pm8916_l6>;
- vpll-supply = <&pm8916_l17>;
- vcc-supply = <&pm8916_l6>;
-
- clock-names = "refclk";
- clocks = <&input_refclk>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- edp_bridge_in: endpoint {
- remote-endpoint = <&dsi_out>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- edp_bridge_out: endpoint {
- data-lanes = <2 1 3 0>;
- lane-polarities = <0 1 0 1>;
- remote-endpoint = <&edp_panel_in>;
- };
- };
- };
-}
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml
new file mode 100644
index 000000000000..c93878b6d718
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml
@@ -0,0 +1,273 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ti,sn65dsi86.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SN65DSI86 DSI to eDP bridge chip
+
+maintainers:
+ - Douglas Anderson <dianders@chromium.org>
+
+description: |
+ The Texas Instruments SN65DSI86 bridge takes MIPI DSI in and outputs eDP.
+ https://www.ti.com/general/docs/lit/getliterature.tsp?genericPartNumber=sn65dsi86&fileType=pdf
+
+properties:
+ compatible:
+ const: ti,sn65dsi86
+
+ reg:
+ enum: [ 0x2c, 0x2d ]
+
+ enable-gpios:
+ maxItems: 1
+ description: GPIO specifier for bridge_en pin (active high).
+
+ suspend-gpios:
+ maxItems: 1
+ description: GPIO specifier for GPIO1 pin on bridge (active low).
+
+ no-hpd:
+ type: boolean
+ description:
+ Set if the HPD line on the bridge isn't hooked up to anything or is
+ otherwise unusable.
+
+ vccio-supply:
+ description: A 1.8V supply that powers the digital IOs.
+
+ vpll-supply:
+ description: A 1.8V supply that powers the DisplayPort PLL.
+
+ vcca-supply:
+ description: A 1.2V supply that powers the analog circuits.
+
+ vcc-supply:
+ description: A 1.2V supply that powers the digital core.
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+ description:
+ Clock specifier for input reference clock. The reference clock rate must
+ be 12 MHz, 19.2 MHz, 26 MHz, 27 MHz or 38.4 MHz.
+
+ clock-names:
+ const: refclk
+
+ gpio-controller: true
+ '#gpio-cells':
+ const: 2
+ description:
+ First cell is pin number, second cell is flags. GPIO pin numbers are
+ 1-based to match the datasheet. See ../../gpio/gpio.txt for more
+ information.
+
+ '#pwm-cells':
+ const: 1
+ description: See ../../pwm/pwm.yaml for description of the cell formats.
+
+ aux-bus:
+ $ref: /schemas/display/dp-aux-bus.yaml#
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DSI input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Video port for eDP output (panel or connector).
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ oneOf:
+ - minItems: 1
+ maxItems: 1
+ uniqueItems: true
+ items:
+ enum:
+ - 0
+ - 1
+ description:
+ If you have 1 logical lane the bridge supports routing
+ to either port 0 or port 1. Port 0 is suggested.
+
+ - minItems: 2
+ maxItems: 2
+ uniqueItems: true
+ items:
+ enum:
+ - 0
+ - 1
+ description:
+ If you have 2 logical lanes the bridge supports
+ reordering but only on physical ports 0 and 1.
+
+ - minItems: 4
+ maxItems: 4
+ uniqueItems: true
+ items:
+ enum:
+ - 0
+ - 1
+ - 2
+ - 3
+ description:
+ If you have 4 logical lanes the bridge supports
+ reordering in any way.
+
+ lane-polarities:
+ minItems: 1
+ maxItems: 4
+ items:
+ enum:
+ - 0
+ - 1
+
+ dependencies:
+ lane-polarities: [data-lanes]
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - vccio-supply
+ - vpll-supply
+ - vcca-supply
+ - vcc-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@2d {
+ compatible = "ti,sn65dsi86";
+ reg = <0x2d>;
+
+ interrupt-parent = <&tlmm>;
+ interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+
+ enable-gpios = <&tlmm 102 GPIO_ACTIVE_HIGH>;
+
+ vpll-supply = <&src_pp1800_s4a>;
+ vccio-supply = <&src_pp1800_s4a>;
+ vcca-supply = <&src_pp1200_l2a>;
+ vcc-supply = <&src_pp1200_l2a>;
+
+ clocks = <&rpmhcc RPMH_LN_BB_CLK2>;
+ clock-names = "refclk";
+
+ no-hpd;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ endpoint {
+ remote-endpoint = <&dsi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ sn65dsi86_out: endpoint {
+ remote-endpoint = <&panel_in_edp>;
+ };
+ };
+ };
+
+ aux-bus {
+ panel {
+ compatible = "boe,nv133fhm-n62";
+ power-supply = <&pp3300_dx_edp>;
+ backlight = <&backlight>;
+ hpd-gpios = <&sn65dsi86_bridge 2 GPIO_ACTIVE_HIGH>;
+
+ port {
+ panel_in_edp: endpoint {
+ remote-endpoint = <&sn65dsi86_out>;
+ };
+ };
+ };
+ };
+ };
+ };
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@2d {
+ compatible = "ti,sn65dsi86";
+ reg = <0x2d>;
+
+ enable-gpios = <&msmgpio 33 GPIO_ACTIVE_HIGH>;
+ suspend-gpios = <&msmgpio 34 GPIO_ACTIVE_LOW>;
+
+ interrupts-extended = <&gpio3 4 IRQ_TYPE_EDGE_FALLING>;
+
+ vccio-supply = <&pm8916_l17>;
+ vcca-supply = <&pm8916_l6>;
+ vpll-supply = <&pm8916_l17>;
+ vcc-supply = <&pm8916_l6>;
+
+ clock-names = "refclk";
+ clocks = <&input_refclk>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ edp_bridge_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ edp_bridge_out: endpoint {
+ data-lanes = <2 1 3 0>;
+ lane-polarities = <0 1 0 1>;
+ remote-endpoint = <&edp_panel_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,tdp158.yaml b/Documentation/devicetree/bindings/display/bridge/ti,tdp158.yaml
new file mode 100644
index 000000000000..721da44054e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ti,tdp158.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ti,tdp158.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI TDP158 HDMI to TMDS Redriver
+
+maintainers:
+ - Arnaud Vrac <avrac@freebox.fr>
+ - Pierre-Hugues Husson <phhusson@freebox.fr>
+
+properties:
+ compatible:
+ const: ti,tdp158
+
+# The reg property is required if and only if the device is connected
+# to an I2C bus. In pin strap mode, reg must not be specified.
+ reg:
+ maxItems: 1
+ description: I2C address of the device
+
+# Pin 36 = Operation Enable / Reset Pin
+# OE = L: Power Down Mode
+# OE = H: Normal Operation
+# Internal weak pullup - device resets on H to L transitions
+ enable-gpios:
+ description: GPIO controlling bridge enable
+
+ vcc-supply:
+ description: Power supply 3.3V
+
+ vdd-supply:
+ description: Power supply 1.1V
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Bridge input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Bridge output
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - vcc-supply
+ - vdd-supply
+ - ports
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt
deleted file mode 100644
index 54d7e31525ec..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-TFP410 DPI to DVI encoder
-=========================
-
-Required properties:
-- compatible: "ti,tfp410"
-
-Optional properties:
-- powerdown-gpios: power-down gpio
-- reg: I2C address. If and only if present the device node
- should be placed into the i2c controller node where the
- tfp410 i2c is connected to.
-
-Required nodes:
-- Video port 0 for DPI input [1].
-- Video port 1 for DVI output [1].
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example
--------
-
-tfp410: encoder@0 {
- compatible = "ti,tfp410";
- powerdown-gpios = <&twl_gpio 2 GPIO_ACTIVE_LOW>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- tfp410_in: endpoint@0 {
- remote-endpoint = <&dpi_out>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- tfp410_out: endpoint@0 {
- remote-endpoint = <&dvi_connector_in>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,tfp410.yaml b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.yaml
new file mode 100644
index 000000000000..4c5dd8ec2951
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ti,tfp410.yaml
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ti,tfp410.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TFP410 DPI to DVI encoder
+
+maintainers:
+ - Tomi Valkeinen <tomi.valkeinen@ti.com>
+ - Jyri Sarha <jsarha@ti.com>
+
+properties:
+ compatible:
+ const: ti,tfp410
+
+ reg:
+ description: I2C address of the device.
+ maxItems: 1
+
+ powerdown-gpios:
+ maxItems: 1
+
+ ti,deskew:
+ description:
+ Data de-skew value in 350ps increments, from 0 to 7, as configured
+ through the DK[3:1] pins. The de-skew multiplier is computed as
+ (DK[3:1] - 4), so it ranges from -4 to 3.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 7
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DPI input port.
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ pclk-sample:
+ description:
+ Endpoint sampling edge.
+ enum:
+ - 0 # Falling edge
+ - 1 # Rising edge
+ default: 0
+
+ bus-width:
+ description:
+ Endpoint bus width.
+ enum:
+ - 12 # 12 data lines connected and dual-edge mode
+ - 24 # 24 data lines connected and single-edge mode
+ default: 24
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DVI output port.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - ports
+
+if:
+ required:
+ - reg
+then:
+ properties:
+ ti,deskew: false
+else:
+ required:
+ - ti,deskew
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ tfp410: encoder {
+ compatible = "ti,tfp410";
+ powerdown-gpios = <&twl_gpio 2 GPIO_ACTIVE_LOW>;
+ ti,deskew = <3>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ tfp410_in: endpoint {
+ pclk-sample = <1>;
+ bus-width = <24>;
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ tfp410_out: endpoint {
+ remote-endpoint = <&dvi_connector_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt
deleted file mode 100644
index df3d7c1ac09e..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-THS8134 and THS8135 Video DAC
------------------------------
-
-This is the binding for Texas Instruments THS8134, THS8134A, THS8134B and
-THS8135 Video DAC bridges.
-
-Required properties:
-
-- compatible: Must be one of
- "ti,ths8134"
- "ti,ths8134a," "ti,ths8134"
- "ti,ths8134b", "ti,ths8134"
- "ti,ths8135"
-
-Required nodes:
-
-This device has two video ports. Their connections are modelled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for RGB input
-- Video port 1 for VGA output
-
-Example
--------
-
-vga-bridge {
- compatible = "ti,ths8135";
- #address-cells = <1>;
- #size-cells = <0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- vga_bridge_in: endpoint {
- remote-endpoint = <&lcdc_out_vga>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- vga_bridge_out: endpoint {
- remote-endpoint = <&vga_con_in>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml
new file mode 100644
index 000000000000..6c1de0b21722
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358762.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358762 MIPI DSI to MIPI DPI bridge
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ The TC358762 is bridge device which converts MIPI DSI to MIPI DPI.
+
+properties:
+ compatible:
+ enum:
+ - toshiba,tc358762
+
+ reg:
+ maxItems: 1
+ description: virtual channel number of a DSI peripheral
+
+ reset-gpios:
+ maxItems: 1
+
+ vddc-supply:
+ description: Regulator for 1.2V internal core power.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DSI input
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DPI output (panel or connector).
+
+ required:
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - vddc-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@0 {
+ reg = <0>;
+ compatible = "toshiba,tc358762";
+ vddc-supply = <&vcc_1v2_reg>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ bridge_in: endpoint {
+ remote-endpoint = <&dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
deleted file mode 100644
index 8f9abf28a8fa..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-TC358764 MIPI-DSI to LVDS panel bridge
-
-Required properties:
- - compatible: "toshiba,tc358764"
- - reg: the virtual channel number of a DSI peripheral
- - vddc-supply: core voltage supply, 1.2V
- - vddio-supply: I/O voltage supply, 1.8V or 3.3V
- - vddlvds-supply: LVDS1/2 voltage supply, 3.3V
- - reset-gpios: a GPIO spec for the reset pin
-
-The device node can contain following 'port' child nodes,
-according to the OF graph bindings defined in [1]:
- 0: DSI Input, not required, if the bridge is DSI controlled
- 1: LVDS Output, mandatory
-
-[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
- bridge@0 {
- reg = <0>;
- compatible = "toshiba,tc358764";
- vddc-supply = <&vcc_1v2_reg>;
- vddio-supply = <&vcc_1v8_reg>;
- vddlvds-supply = <&vcc_3v3_reg>;
- reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
- #address-cells = <1>;
- #size-cells = <0>;
- port@1 {
- reg = <1>;
- lvds_ep: endpoint {
- remote-endpoint = <&panel_ep>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.yaml
new file mode 100644
index 000000000000..866607400514
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358764.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358764 MIPI-DSI to LVDS bridge
+
+maintainers:
+ - Andrzej Hajda <andrzej.hajda@intel.com>
+
+properties:
+ compatible:
+ const: toshiba,tc358764
+
+ reg:
+ description: Virtual channel number of a DSI peripheral
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ vddc-supply:
+ description: Core voltage supply, 1.2V
+
+ vddio-supply:
+ description: I/O voltage supply, 1.8V or 3.3V
+
+ vddlvds-supply:
+ description: LVDS1/2 voltage supply, 3.3V
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DSI input, if the bridge DSI controlled
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for LVDS output (panel or connector).
+
+ required:
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - vddc-supply
+ - vddio-supply
+ - vddlvds-supply
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@0 {
+ compatible = "toshiba,tc358764";
+ reg = <0>;
+
+ reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
+ vddc-supply = <&vcc_1v2_reg>;
+ vddio-supply = <&vcc_1v8_reg>;
+ vddlvds-supply = <&vcc_3v3_reg>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+ lvds_ep: endpoint {
+ remote-endpoint = <&panel_ep>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
deleted file mode 100644
index e3f6aa6a214d..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-Toshiba TC358767 eDP bridge bindings
-
-Required properties:
- - compatible: "toshiba,tc358767"
- - reg: i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
- - clock-names: should be "ref"
- - clocks: OF device-tree clock specification for refclk input. The reference
- clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
-
-Optional properties:
- - shutdown-gpios: OF device-tree gpio specification for SD pin
- (active high shutdown input)
- - reset-gpios: OF device-tree gpio specification for RSTX pin
- (active low system reset)
- - ports: the ports node can contain video interface port nodes to connect
- to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
- - port@0: DSI input port
- - port@1: DPI input port
- - port@2: eDP/DP output port
-
-[1]: Documentation/devicetree/bindings/graph.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
- edp-bridge@68 {
- compatible = "toshiba,tc358767";
- reg = <0x68>;
- shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
- clock-names = "ref";
- clocks = <&edp_refclk>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@1 {
- reg = <1>;
-
- bridge_in: endpoint {
- remote-endpoint = <&dpi_out>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- bridge_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
new file mode 100644
index 000000000000..b78f64c9c5f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358767.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358767/TC358867/TC9595 DSI/DPI/eDP bridge
+
+maintainers:
+ - Andrey Gusakov <andrey.gusakov@cogentembedded.com>
+
+description: |
+ The TC358767/TC358867/TC9595 is bridge device which
+ converts DSI/DPI to eDP/DP .
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - toshiba,tc358867
+ - toshiba,tc9595
+ - const: toshiba,tc358767
+ - const: toshiba,tc358767
+
+ reg:
+ enum:
+ - 0x0f
+ - 0x68
+ description: |
+ i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
+
+ clock-names:
+ const: ref
+
+ clocks:
+ maxItems: 1
+ description: |
+ OF device-tree clock specification for refclk input. The reference.
+ clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
+
+ shutdown-gpios:
+ maxItems: 1
+ description: |
+ OF device-tree gpio specification for SD pin(active high shutdown input)
+
+ reset-gpios:
+ maxItems: 1
+ description: |
+ OF device-tree gpio specification for RSTX pin(active low system reset)
+
+ interrupts:
+ maxItems: 1
+
+ toshiba,hpd-pin:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum:
+ - 0
+ - 1
+ description: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ DSI input port. The remote endpoint phandle should be a
+ reference to a valid DSI output endpoint node
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ DPI input/output port. The remote endpoint phandle should be a
+ reference to a valid DPI output or input endpoint node.
+
+ port@2:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ eDP/DP output port. The remote endpoint phandle should be a
+ reference to a valid eDP panel input endpoint node. This port is
+ optional, treated as DP panel if not defined
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ toshiba,pre-emphasis:
+ description:
+ Display port output Pre-Emphasis settings for both DP lanes.
+ $ref: /schemas/types.yaml#/definitions/uint8-array
+ minItems: 2
+ maxItems: 2
+ items:
+ enum:
+ - 0 # No pre-emphasis
+ - 1 # 3.5dB pre-emphasis
+ - 2 # 6dB pre-emphasis
+
+ oneOf:
+ - required:
+ - port@0
+ - required:
+ - port@1
+
+
+required:
+ - compatible
+ - reg
+ - clock-names
+ - clocks
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ /* DPI input and eDP output */
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ edp-bridge@68 {
+ compatible = "toshiba,tc358767";
+ reg = <0x68>;
+ shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+ clock-names = "ref";
+ clocks = <&edp_refclk>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+
+ bridge_in_0: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+ };
+ - |
+ /* DPI input and DP output */
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ edp-bridge@68 {
+ compatible = "toshiba,tc358767";
+ reg = <0x68>;
+ shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+ clock-names = "ref";
+ clocks = <&edp_refclk>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ reg = <1>;
+
+ bridge_in_1: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
new file mode 100644
index 000000000000..bb5d3b543800
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358768.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toschiba TC358768/TC358778 Parallel RGB to MIPI DSI bridge
+
+maintainers:
+ - Peter Ujfalusi <peter.ujfalusi@ti.com>
+
+description: |
+ The TC358768/TC358778 is bridge device which converts RGB to DSI.
+
+properties:
+ compatible:
+ enum:
+ - toshiba,tc358768
+ - toshiba,tc358778
+
+ reg:
+ maxItems: 1
+ description: base I2C address of the device
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO connected to active low RESX pin
+
+ vddc-supply:
+ description: Regulator for 1.2V internal core power.
+
+ vddmipi-supply:
+ description: Regulator for 1.2V for the MIPI.
+
+ vddio-supply:
+ description: Regulator for 1.8V - 3.3V IO power.
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: refclk
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ Video port for RGB input
+
+ properties:
+ endpoint:
+ $ref: /schemas/graph.yaml#/$defs/endpoint-base
+ unevaluatedProperties: false
+
+ properties:
+ data-lines:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 16, 18, 24 ]
+ deprecated: true
+
+ bus-width:
+ enum: [ 16, 18, 24 ]
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Video port for DSI output (panel or connector).
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - vddc-supply
+ - vddmipi-supply
+ - vddio-supply
+ - ports
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dsi_bridge: dsi@e {
+ compatible = "toshiba,tc358768";
+ reg = <0xe>;
+
+ clocks = <&tc358768_refclk>;
+ clock-names = "refclk";
+
+ reset-gpios = <&pcf_display_board 0 GPIO_ACTIVE_LOW>;
+
+ vddc-supply = <&v1_2d>;
+ vddmipi-supply = <&v1_2d>;
+ vddio-supply = <&v3_3d>;
+
+ dsi_bridge_ports: ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ rgb_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ data-lines = <24>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ dsi_out: endpoint {
+ remote-endpoint = <&lcd_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
new file mode 100644
index 000000000000..258dd9cfd770
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358775.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358775 DSI to LVDS bridge
+
+maintainers:
+ - Vinay Simha BN <simhavcs@gmail.com>
+
+description: |
+ This binding supports DSI to LVDS bridges TC358765 and TC358775
+
+ MIPI DSI-RX Data 4-lane, CLK 1-lane with data rates up to 800 Mbps/lane.
+ Video frame size:
+ Up to 1600x1200 24-bit/pixel resolution for single-link LVDS display panel
+ limited by 135 MHz LVDS speed
+ Up to WUXGA (1920x1200 24-bit pixels) resolution for dual-link LVDS display
+ panel, limited by 270 MHz LVDS speed.
+
+properties:
+ compatible:
+ enum:
+ - toshiba,tc358765
+ - toshiba,tc358775
+
+ reg:
+ maxItems: 1
+ description: i2c address of the bridge, 0x0f
+
+ vdd-supply:
+ description: 1.2V LVDS Power Supply
+
+ vddio-supply:
+ description: 1.8V IO Power Supply
+
+ stby-gpios:
+ maxItems: 1
+ description: Standby pin, Low active
+
+ reset-gpios:
+ maxItems: 1
+ description: Hardware reset, Low active
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ DSI Input. The remote endpoint phandle should be a
+ reference to a valid mipi_dsi_host device node.
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ minItems: 1
+ items:
+ - const: 1
+ - const: 2
+ - const: 3
+ - const: 4
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Video port for LVDS output (panel or connector).
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Video port for Dual link LVDS output (panel or connector).
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - vdd-supply
+ - vddio-supply
+ - reset-gpios
+ - ports
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: toshiba,tc358765
+ then:
+ properties:
+ stby-gpios: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ /* For single-link LVDS display panel */
+
+ i2c@78b8000 {
+ /* On High speed expansion */
+ label = "HS-I2C2";
+ reg = <0x078b8000 0x500>;
+ clock-frequency = <400000>; /* fastmode operation */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ tc_bridge: bridge@f {
+ compatible = "toshiba,tc358775";
+ reg = <0x0f>;
+
+ vdd-supply = <&pm8916_l2>;
+ vddio-supply = <&pm8916_l6>;
+
+ stby-gpios = <&msmgpio 99 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&msmgpio 72 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ d2l_in_test: endpoint {
+ remote-endpoint = <&dsi0_out>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ lvds_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+ };
+
+ dsi@1a98000 {
+ reg = <0x1a98000 0x25c>;
+ reg-names = "dsi_ctrl";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@1 {
+ reg = <1>;
+ dsi0_out: endpoint {
+ remote-endpoint = <&d2l_in_test>;
+ };
+ };
+ };
+ };
+
+ - |
+ /* For dual-link LVDS display panel */
+
+ i2c@78b8000 {
+ /* On High speed expansion */
+ label = "HS-I2C2";
+ reg = <0x078b8000 0x500>;
+ clock-frequency = <400000>; /* fastmode operation */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ tc_bridge_dual: bridge@f {
+ compatible = "toshiba,tc358775";
+ reg = <0x0f>;
+
+ vdd-supply = <&pm8916_l2>;
+ vddio-supply = <&pm8916_l6>;
+
+ stby-gpios = <&msmgpio 99 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&msmgpio 72 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ d2l_in_dual: endpoint {
+ remote-endpoint = <&dsi0_out_dual>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ lvds0_out: endpoint {
+ remote-endpoint = <&panel_in0>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+ lvds1_out: endpoint {
+ remote-endpoint = <&panel_in1>;
+ };
+ };
+ };
+ };
+ };
+
+ dsi@1a98000 {
+ reg = <0x1a98000 0x25c>;
+ reg-names = "dsi_ctrl";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@1 {
+ reg = <1>;
+ dsi0_out_dual: endpoint {
+ remote-endpoint = <&d2l_in_dual>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/waveshare,dsi2dpi.yaml b/Documentation/devicetree/bindings/display/bridge/waveshare,dsi2dpi.yaml
new file mode 100644
index 000000000000..5e8498c8303d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/waveshare,dsi2dpi.yaml
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/waveshare,dsi2dpi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Waveshare MIPI-DSI to DPI Converter bridge
+
+maintainers:
+ - Joseph Guo <qijian.guo@nxp.com>
+
+description:
+ Waveshare bridge board is part of Waveshare panel which converts DSI to DPI.
+
+properties:
+ compatible:
+ const: waveshare,dsi2dpi
+
+ reg:
+ maxItems: 1
+ description: base I2C address of the device
+
+ power-supply: true
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description:
+ Video port for MIPI DSI input
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ data-lanes:
+ description: array of physical DSI data lane indexes.
+ items:
+ - const: 1
+ - const: 2
+
+ required:
+ - data-lanes
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Video port for MIPI DPI output panel.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - ports
+ - power-supply
+
+additionalProperties: false
+
+examples:
+ - |
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ bridge@45 {
+ compatible = "waveshare,dsi2dpi";
+ reg = <0x45>;
+ power-supply = <&reg_3p3v>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ waveshare_from_dsim: endpoint {
+ data-lanes = <1 2>;
+ remote-endpoint = <&dsim_to_waveshare>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ waveshare_to_panel: endpoint {
+ remote-endpoint = <&panel_to_waveshare>;
+ };
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt b/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
index b0e506610400..84c75f849891 100644
--- a/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
+++ b/Documentation/devicetree/bindings/display/cirrus,clps711x-fb.txt
@@ -1,4 +1,4 @@
-* Currus Logic CLPS711X Framebuffer
+* Cirrus Logic CLPS711X Framebuffer
Required properties:
- compatible: Shall contain "cirrus,ep7209-fb".
@@ -27,11 +27,11 @@ Example:
display: display {
model = "320x240x4";
- native-mode = <&timing0>;
bits-per-pixel = <4>;
ac-prescale = <17>;
display-timings {
+ native-mode = <&timing0>;
timing0: 320x240 {
hactive = <320>;
hback-porch = <0>;
diff --git a/Documentation/devicetree/bindings/display/connector/analog-tv-connector.txt b/Documentation/devicetree/bindings/display/connector/analog-tv-connector.txt
deleted file mode 100644
index 0c0970c210ab..000000000000
--- a/Documentation/devicetree/bindings/display/connector/analog-tv-connector.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Analog TV Connector
-===================
-
-Required properties:
-- compatible: "composite-video-connector" or "svideo-connector"
-
-Optional properties:
-- label: a symbolic name for the connector
-
-Required nodes:
-- Video port for TV input
-
-Example
--------
-
-tv: connector {
- compatible = "composite-video-connector";
- label = "tv";
-
- port {
- tv_connector_in: endpoint {
- remote-endpoint = <&venc_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/connector/analog-tv-connector.yaml b/Documentation/devicetree/bindings/display/connector/analog-tv-connector.yaml
new file mode 100644
index 000000000000..a31ca2d52b86
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/connector/analog-tv-connector.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/connector/analog-tv-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog TV Connector
+
+maintainers:
+ - Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+
+properties:
+ compatible:
+ enum:
+ - composite-video-connector
+ - svideo-connector
+
+ label: true
+
+ sdtv-standards:
+ description:
+ Limit the supported TV standards on a connector to the given ones. If
+ not specified all TV standards are allowed. Possible TV standards are
+ defined in include/dt-bindings/display/sdtv-standards.h.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing analog TV signals
+
+required:
+ - compatible
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/display/sdtv-standards.h>
+
+ connector {
+ compatible = "composite-video-connector";
+ label = "tv";
+ sdtv-standards = <(SDTV_STD_PAL | SDTV_STD_NTSC)>;
+
+ port {
+ tv_connector_in: endpoint {
+ remote-endpoint = <&venc_out>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/connector/dp-connector.yaml b/Documentation/devicetree/bindings/display/connector/dp-connector.yaml
new file mode 100644
index 000000000000..1f2b449dc910
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/connector/dp-connector.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/connector/dp-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DisplayPort Connector
+
+maintainers:
+ - Tomi Valkeinen <tomi.valkeinen@ti.com>
+
+properties:
+ compatible:
+ const: dp-connector
+
+ label: true
+
+ type:
+ enum:
+ - full-size
+ - mini
+
+ hpd-gpios:
+ description: A GPIO line connected to HPD
+ maxItems: 1
+
+ dp-pwr-supply:
+ description: Power supply for the DP_PWR pin
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing DP signals
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: OF graph representation of signales routed to DP connector
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing DP signals
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing AUX signals
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - type
+
+oneOf:
+ - required:
+ - port
+ - required:
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ connector {
+ compatible = "dp-connector";
+ label = "dp0";
+ type = "full-size";
+
+ port {
+ dp_connector_in: endpoint {
+ remote-endpoint = <&dp_out>;
+ };
+ };
+ };
+
+ - |
+ /* DP connecttor being driven by the USB+DP combo PHY */
+ connector {
+ compatible = "dp-connector";
+ label = "dp0";
+ type = "full-size";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&phy_ss_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&phy_sbu_out>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/connector/dvi-connector.txt b/Documentation/devicetree/bindings/display/connector/dvi-connector.txt
deleted file mode 100644
index 207e42e9eba0..000000000000
--- a/Documentation/devicetree/bindings/display/connector/dvi-connector.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-DVI Connector
-==============
-
-Required properties:
-- compatible: "dvi-connector"
-
-Optional properties:
-- label: a symbolic name for the connector
-- ddc-i2c-bus: phandle to the i2c bus that is connected to DVI DDC
-- analog: the connector has DVI analog pins
-- digital: the connector has DVI digital pins
-- dual-link: the connector has pins for DVI dual-link
-- hpd-gpios: HPD GPIO number
-
-Required nodes:
-- Video port for DVI input
-
-Note: One (or both) of 'analog' or 'digital' must be set.
-
-Example
--------
-
-dvi0: connector@0 {
- compatible = "dvi-connector";
- label = "dvi";
-
- digital;
-
- ddc-i2c-bus = <&i2c3>;
-
- port {
- dvi_connector_in: endpoint {
- remote-endpoint = <&tfp410_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/connector/dvi-connector.yaml b/Documentation/devicetree/bindings/display/connector/dvi-connector.yaml
new file mode 100644
index 000000000000..93eb14294e68
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/connector/dvi-connector.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/connector/dvi-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DVI Connector
+
+maintainers:
+ - Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+
+properties:
+ compatible:
+ const: dvi-connector
+
+ label: true
+
+ hpd-gpios:
+ description: A GPIO line connected to HPD
+ maxItems: 1
+
+ ddc-i2c-bus:
+ description: phandle link to the I2C controller used for DDC EDID probing
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ analog:
+ type: boolean
+ description: the connector has DVI analog pins
+
+ digital:
+ type: boolean
+ description: the connector has DVI digital pins
+
+ dual-link:
+ type: boolean
+ description: the connector has pins for DVI dual-link
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing DVI signals
+
+required:
+ - compatible
+ - port
+
+anyOf:
+ - required:
+ - analog
+ - required:
+ - digital
+
+additionalProperties: false
+
+examples:
+ - |
+ connector {
+ compatible = "dvi-connector";
+ label = "dvi";
+
+ digital;
+
+ ddc-i2c-bus = <&i2c3>;
+
+ port {
+ dvi_connector_in: endpoint {
+ remote-endpoint = <&tfp410_out>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/connector/hdmi-connector.txt b/Documentation/devicetree/bindings/display/connector/hdmi-connector.txt
deleted file mode 100644
index 508aee461e0d..000000000000
--- a/Documentation/devicetree/bindings/display/connector/hdmi-connector.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-HDMI Connector
-==============
-
-Required properties:
-- compatible: "hdmi-connector"
-- type: the HDMI connector type: "a", "b", "c", "d" or "e"
-
-Optional properties:
-- label: a symbolic name for the connector
-- hpd-gpios: HPD GPIO number
-- ddc-i2c-bus: phandle link to the I2C controller used for DDC EDID probing
-
-Required nodes:
-- Video port for HDMI input
-
-Example
--------
-
-hdmi0: connector@1 {
- compatible = "hdmi-connector";
- label = "hdmi";
-
- type = "a";
-
- port {
- hdmi_connector_in: endpoint {
- remote-endpoint = <&tpd12s015_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/connector/hdmi-connector.yaml b/Documentation/devicetree/bindings/display/connector/hdmi-connector.yaml
new file mode 100644
index 000000000000..3ee8f9225984
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/connector/hdmi-connector.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/connector/hdmi-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: HDMI Connector
+
+maintainers:
+ - Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+
+properties:
+ compatible:
+ const: hdmi-connector
+
+ type:
+ description: The HDMI connector type
+ enum:
+ - a # Standard full size
+ - b # Never deployed?
+ - c # Mini
+ - d # Micro
+ - e # automotive
+
+ label: true
+
+ hpd-gpios:
+ description: A GPIO line connected to HPD
+ maxItems: 1
+
+ ddc-i2c-bus:
+ description: phandle link to the I2C controller used for DDC EDID probing
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ ddc-en-gpios:
+ description: GPIO signal to enable DDC bus
+ maxItems: 1
+
+ hdmi-pwr-supply:
+ description: Power supply for the HDMI +5V Power pin
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing HDMI signals
+
+required:
+ - compatible
+ - port
+ - type
+
+additionalProperties: false
+
+examples:
+ - |
+ connector {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&tpd12s015_out>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/connector/vga-connector.txt b/Documentation/devicetree/bindings/display/connector/vga-connector.txt
deleted file mode 100644
index c727f298e7ad..000000000000
--- a/Documentation/devicetree/bindings/display/connector/vga-connector.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-VGA Connector
-=============
-
-Required properties:
-
-- compatible: "vga-connector"
-
-Optional properties:
-
-- label: a symbolic name for the connector corresponding to a hardware label
-- ddc-i2c-bus: phandle to the I2C bus that is connected to VGA DDC
-
-Required nodes:
-
-The VGA connector internal connections are modeled using the OF graph bindings
-specified in Documentation/devicetree/bindings/graph.txt.
-
-The VGA connector has a single port that must be connected to a video source
-port.
-
-
-Example
--------
-
-vga0: connector@0 {
- compatible = "vga-connector";
- label = "vga";
-
- ddc-i2c-bus = <&i2c3>;
-
- port {
- vga_connector_in: endpoint {
- remote-endpoint = <&adv7123_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/connector/vga-connector.yaml b/Documentation/devicetree/bindings/display/connector/vga-connector.yaml
new file mode 100644
index 000000000000..25f868002000
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/connector/vga-connector.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/connector/vga-connector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: VGA Connector
+
+maintainers:
+ - Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+
+properties:
+ compatible:
+ const: vga-connector
+
+ label: true
+
+ ddc-i2c-bus:
+ description: phandle link to the I2C controller used for DDC EDID probing
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Connection to controller providing VGA signals
+
+required:
+ - compatible
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ connector {
+ compatible = "vga-connector";
+ label = "vga";
+
+ ddc-i2c-bus = <&i2c3>;
+
+ port {
+ vga_connector_in: endpoint {
+ remote-endpoint = <&adv7123_out>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/dp-aux-bus.yaml b/Documentation/devicetree/bindings/display/dp-aux-bus.yaml
new file mode 100644
index 000000000000..0ece7b01790b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/dp-aux-bus.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/dp-aux-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DisplayPort AUX bus
+
+maintainers:
+ - Douglas Anderson <dianders@chromium.org>
+
+description:
+ DisplayPort controllers provide a control channel to the sinks that
+ are hooked up to them. This is the DP AUX bus. Over the DP AUX bus
+ we can query properties about a sink and also configure it. In
+ particular, DP sinks support DDC over DP AUX which allows tunneling
+ a standard I2C DDC connection over the AUX channel.
+
+ To model this relationship, DP sinks should be placed as children
+ of the DP controller under the "aux-bus" node.
+
+ At the moment, this binding only handles the eDP case. It is
+ possible it will be extended in the future to handle the DP case.
+ For DP, presumably a connector would be listed under the DP AUX
+ bus instead of a panel.
+
+properties:
+ $nodename:
+ const: aux-bus
+
+ panel:
+ $ref: panel/panel-common.yaml#
+
+additionalProperties: false
+
+required:
+ - panel
diff --git a/Documentation/devicetree/bindings/display/dsi-controller.yaml b/Documentation/devicetree/bindings/display/dsi-controller.yaml
new file mode 100644
index 000000000000..bb4d6e9e7d0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/dsi-controller.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/dsi-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common Properties for DSI Display Panels
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ This document defines device tree properties common to DSI, Display
+ Serial Interface controllers and attached panels. It doesn't constitute
+ a device tree binding specification by itself but is meant to be referenced
+ by device tree bindings.
+
+ When referenced from panel device tree bindings the properties defined in
+ this document are defined as follows. The panel device tree bindings are
+ responsible for defining whether each property is required or optional.
+
+ Notice: this binding concerns DSI panels connected directly to a master
+ without any intermediate port graph to the panel. Each DSI master
+ can control one to four virtual channels to one panel. Each virtual
+ channel should have a node "panel" for their virtual channel with their
+ reg-property set to the virtual channel number, usually there is just
+ one virtual channel, number 0.
+
+properties:
+ $nodename:
+ pattern: "^dsi(@.*)?$"
+
+ clock-master:
+ type: boolean
+ description:
+ Should be enabled if the host is being used in conjunction with
+ another DSI host to drive the same peripheral. Hardware supporting
+ such a configuration generally requires the data on both the busses
+ to be driven by the same clock. Only the DSI host instance
+ controlling this clock should contain this property.
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+patternProperties:
+ "^(panel|bridge)@[0-3]$":
+ description: Panels connected to the DSI link
+ type: object
+
+ properties:
+ reg:
+ minimum: 0
+ maximum: 3
+ description:
+ The virtual channel number of a DSI peripheral. Must be in the range
+ from 0 to 3, as DSI uses a 2-bit addressing scheme. Some DSI
+ peripherals respond to more than a single virtual channel. In that
+ case the reg property can take multiple entries, one for each virtual
+ channel that the peripheral responds to.
+
+ enforce-video-mode:
+ type: boolean
+ description:
+ The best option is usually to run a panel in command mode, as this
+ gives better control over the panel hardware. However for different
+ reasons like broken hardware, missing features or testing, it may be
+ useful to be able to force a command mode-capable panel into video
+ mode.
+
+ required:
+ - reg
+
+additionalProperties: true
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ dsi@a0351000 {
+ reg = <0xa0351000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ panel@0 {
+ compatible = "sony,acx424akp";
+ reg = <0>;
+ vddi-supply = <&ab8500_ldo_aux1_reg>;
+ reset-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/elgin,jg10309-01.yaml b/Documentation/devicetree/bindings/display/elgin,jg10309-01.yaml
new file mode 100644
index 000000000000..faca0cb3f154
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/elgin,jg10309-01.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/elgin,jg10309-01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Elgin JG10309-01 SPI-controlled display
+
+maintainers:
+ - Fabio Estevam <festevam@gmail.com>
+
+description: |
+ The Elgin JG10309-01 SPI-controlled display is used on the RV1108-Elgin-r1
+ board and is a custom display.
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+ compatible:
+ const: elgin,jg10309-01
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 24000000
+
+ spi-cpha: true
+
+ spi-cpol: true
+
+required:
+ - compatible
+ - reg
+ - spi-cpha
+ - spi-cpol
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ display@0 {
+ compatible = "elgin,jg10309-01";
+ reg = <0>;
+ spi-max-frequency = <24000000>;
+ spi-cpha;
+ spi-cpol;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt b/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt
deleted file mode 100644
index 8def11b16a24..000000000000
--- a/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Vivante GPU core devices
-========================
-
-Required properties:
-- compatible: Should be "vivante,gc"
- A more specific compatible is not needed, as the cores contain chip
- identification registers at fixed locations, which provide all the
- necessary information to the driver.
-- reg: should be register base and length as documented in the
- datasheet
-- interrupts: Should contain the cores interrupt line
-- clocks: should contain one clock for entry in clock-names
- see Documentation/devicetree/bindings/clock/clock-bindings.txt
-- clock-names:
- - "bus": AXI/master interface clock
- - "reg": AHB/slave interface clock
- (only required if GPU can gate slave interface independently)
- - "core": GPU core clock
- - "shader": Shader clock (only required if GPU has feature PIPE_3D)
-
-Optional properties:
-- power-domains: a power domain consumer specifier according to
- Documentation/devicetree/bindings/power/power_domain.txt
-
-example:
-
-gpu_3d: gpu@130000 {
- compatible = "vivante,gc";
- reg = <0x00130000 0x4000>;
- interrupts = <0 9 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX6QDL_CLK_GPU3D_AXI>,
- <&clks IMX6QDL_CLK_GPU3D_CORE>,
- <&clks IMX6QDL_CLK_GPU3D_SHADER>;
- clock-names = "bus", "core", "shader";
- power-domains = <&gpc 1>;
-};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos-mic.txt b/Documentation/devicetree/bindings/display/exynos/exynos-mic.txt
deleted file mode 100644
index 0fba2ee6440a..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos-mic.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-Device-Tree bindings for Samsung Exynos SoC mobile image compressor (MIC)
-
-MIC (mobile image compressor) resides between decon and mipi dsi. Mipi dsi is
-not capable to transfer high resoltuion frame data as decon can send. MIC
-solves this problem by compressing the frame data by 1/2 before it is
-transferred through mipi dsi. The compressed frame data must be uncompressed in
-the panel PCB.
-
-Required properties:
-- compatible: value should be "samsung,exynos5433-mic".
-- reg: physical base address and length of the MIC registers set and system
- register of mic.
-- clocks: must include clock specifiers corresponding to entries in the
- clock-names property.
-- clock-names: list of clock names sorted in the same order as the clocks
- property. Must contain "pclk_mic0", "sclk_rgb_vclk_to_mic0".
-- samsung,disp-syscon: the reference node for syscon for DISP block.
-- ports: contains a port which is connected to decon node and dsi node.
- address-cells and size-cells must 1 and 0, respectively.
-- port: contains an endpoint node which is connected to the endpoint in the
- decon node or dsi node. The reg value must be 0 and 1 respectively.
-
-Example:
-SoC specific DT entry:
-mic: mic@13930000 {
- compatible = "samsung,exynos5433-mic";
- reg = <0x13930000 0x48>;
- clocks = <&cmu_disp CLK_PCLK_MIC0>,
- <&cmu_disp CLK_SCLK_RGB_VCLK_TO_MIC0>;
- clock-names = "pclk_mic0", "sclk_rgb_vclk_to_mic0";
- samsung,disp-syscon = <&syscon_disp>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- mic_to_decon: endpoint {
- remote-endpoint = <&decon_to_mic>;
- };
- };
-
- port@1 {
- reg = <1>;
- mic_to_dsi: endpoint {
- remote-endpoint = <&dsi_to_mic>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt b/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
deleted file mode 100644
index 775193e1c641..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-Device-Tree bindings for Samsung Exynos SoC display controller (DECON)
-
-DECON (Display and Enhancement Controller) is the Display Controller for the
-Exynos series of SoCs which transfers the image data from a video memory
-buffer to an external LCD interface.
-
-Required properties:
-- compatible: value should be one of:
- "samsung,exynos5433-decon", "samsung,exynos5433-decon-tv";
-- reg: physical base address and length of the DECON registers set.
-- interrupt-names: should contain the interrupt names depending on mode of work:
- video mode: "vsync",
- command mode: "lcd_sys",
- command mode with software trigger: "lcd_sys", "te".
-- interrupts or interrupts-extended: list of interrupt specifiers corresponding
- to names privided in interrupt-names, as described in
- interrupt-controller/interrupts.txt
-- clocks: must include clock specifiers corresponding to entries in the
- clock-names property.
-- clock-names: list of clock names sorted in the same order as the clocks
- property. Must contain "pclk", "aclk_decon", "aclk_smmu_decon0x",
- "aclk_xiu_decon0x", "pclk_smmu_decon0x", "aclk_smmu_decon1x",
- "aclk_xiu_decon1x", "pclk_smmu_decon1x", clk_decon_vclk",
- "sclk_decon_eclk"
-- ports: contains a port which is connected to mic node. address-cells and
- size-cells must 1 and 0, respectively.
-- port: contains an endpoint node which is connected to the endpoint in the mic
- node. The reg value muset be 0.
-
-Example:
-SoC specific DT entry:
-decon: decon@13800000 {
- compatible = "samsung,exynos5433-decon";
- reg = <0x13800000 0x2104>;
- clocks = <&cmu_disp CLK_ACLK_DECON>, <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
- <&cmu_disp CLK_ACLK_XIU_DECON0X>,
- <&cmu_disp CLK_PCLK_SMMU_DECON0X>,
- <&cmu_disp CLK_ACLK_SMMU_DECON1X>,
- <&cmu_disp CLK_ACLK_XIU_DECON1X>,
- <&cmu_disp CLK_PCLK_SMMU_DECON1X>,
- <&cmu_disp CLK_SCLK_DECON_VCLK>,
- <&cmu_disp CLK_SCLK_DECON_ECLK>;
- clock-names = "aclk_decon", "aclk_smmu_decon0x", "aclk_xiu_decon0x",
- "pclk_smmu_decon0x", "aclk_smmu_decon1x", "aclk_xiu_decon1x",
- "pclk_smmu_decon1x", "sclk_decon_vclk", "sclk_decon_eclk";
- interrupt-names = "vsync", "lcd_sys";
- interrupts = <0 202 0>, <0 203 0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- decon_to_mic: endpoint {
- remote-endpoint = <&mic_to_decon>;
- };
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos7-decon.txt b/Documentation/devicetree/bindings/display/exynos/exynos7-decon.txt
deleted file mode 100644
index 53912c99ec38..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos7-decon.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Device-Tree bindings for Samsung Exynos7 SoC display controller (DECON)
-
-DECON (Display and Enhancement Controller) is the Display Controller for the
-Exynos7 series of SoCs which transfers the image data from a video memory
-buffer to an external LCD interface.
-
-Required properties:
-- compatible: value should be "samsung,exynos7-decon";
-
-- reg: physical base address and length of the DECON registers set.
-
-- interrupts: should contain a list of all DECON IP block interrupts in the
- order: FIFO Level, VSYNC, LCD_SYSTEM. The interrupt specifier
- format depends on the interrupt controller used.
-
-- interrupt-names: should contain the interrupt names: "fifo", "vsync",
- "lcd_sys", in the same order as they were listed in the interrupts
- property.
-
-- pinctrl-0: pin control group to be used for this controller.
-
-- pinctrl-names: must contain a "default" entry.
-
-- clocks: must include clock specifiers corresponding to entries in the
- clock-names property.
-
-- clock-names: list of clock names sorted in the same order as the clocks
- property. Must contain "pclk_decon0", "aclk_decon0",
- "decon0_eclk", "decon0_vclk".
-- i80-if-timings: timing configuration for lcd i80 interface support.
-
-Optional Properties:
-- power-domains: a phandle to DECON power domain node.
-- display-timings: timing settings for DECON, as described in document [1].
- Can be used in case timings cannot be provided otherwise
- or to override timings provided by the panel.
-
-[1]: Documentation/devicetree/bindings/display/panel/display-timing.txt
-
-Example:
-
-SoC specific DT entry:
-
- decon@13930000 {
- compatible = "samsung,exynos7-decon";
- interrupt-parent = <&combiner>;
- reg = <0x13930000 0x1000>;
- interrupt-names = "lcd_sys", "vsync", "fifo";
- interrupts = <0 188 0>, <0 189 0>, <0 190 0>;
- clocks = <&clock_disp PCLK_DECON_INT>,
- <&clock_disp ACLK_DECON_INT>,
- <&clock_disp SCLK_DECON_INT_ECLK>,
- <&clock_disp SCLK_DECON_INT_EXTCLKPLL>;
- clock-names = "pclk_decon0", "aclk_decon0", "decon0_eclk",
- "decon0_vclk";
- status = "disabled";
- };
-
-Board specific DT entry:
-
- decon@13930000 {
- pinctrl-0 = <&lcd_clk &pwm1_out>;
- pinctrl-names = "default";
- status = "okay";
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
deleted file mode 100644
index 9b6cba3f82af..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-The Exynos display port interface should be configured based on
-the type of panel connected to it.
-
-We use two nodes:
- -dp-controller node
- -dptx-phy node(defined inside dp-controller node)
-
-For the DP-PHY initialization, we use the dptx-phy node.
-Required properties for dptx-phy: deprecated, use phys and phy-names
- -reg: deprecated
- Base address of DP PHY register.
- -samsung,enable-mask: deprecated
- The bit-mask used to enable/disable DP PHY.
-
-For the Panel initialization, we read data from dp-controller node.
-Required properties for dp-controller:
- -compatible:
- should be "samsung,exynos5-dp".
- -reg:
- physical base address of the controller and length
- of memory mapped region.
- -interrupts:
- interrupt combiner values.
- -clocks:
- from common clock binding: handle to dp clock.
- -clock-names:
- from common clock binding: Shall be "dp".
- -phys:
- from general PHY binding: the phandle for the PHY device.
- -phy-names:
- from general PHY binding: Should be "dp".
-
-Optional properties for dp-controller:
- -interlaced:
- interlace scan mode.
- Progressive if defined, Interlaced if not defined
- -vsync-active-high:
- VSYNC polarity configuration.
- High if defined, Low if not defined
- -hsync-active-high:
- HSYNC polarity configuration.
- High if defined, Low if not defined
- -samsung,hpd-gpio:
- Hotplug detect GPIO.
- Indicates which GPIO should be used for hotplug
- detection
- -video interfaces: Device node can contain video interface port
- nodes according to [1].
- - display-timings: timings for the connected panel as described by
- Documentation/devicetree/bindings/display/panel/display-timing.txt
-
-For the below properties, please refer to Analogix DP binding document:
- * Documentation/devicetree/bindings/display/bridge/analogix_dp.txt
- -phys (required)
- -phy-names (required)
- -hpd-gpios (optional)
- force-hpd (optional)
-
-Deprecated properties for DisplayPort:
--interlaced: deprecated prop that can parsed from drm_display_mode.
--vsync-active-high: deprecated prop that can parsed from drm_display_mode.
--hsync-active-high: deprecated prop that can parsed from drm_display_mode.
--samsung,ycbcr-coeff: deprecated prop that can parsed from drm_display_mode.
--samsung,dynamic-range: deprecated prop that can parsed from drm_display_mode.
--samsung,color-space: deprecated prop that can parsed from drm_display_info.
--samsung,color-depth: deprecated prop that can parsed from drm_display_info.
--samsung,link-rate: deprecated prop that can reading from monitor by dpcd method.
--samsung,lane-count: deprecated prop that can reading from monitor by dpcd method.
--samsung,hpd-gpio: deprecated name for hpd-gpios.
-
--------------------------------------------------------------------------------
-
-Example:
-
-SOC specific portion:
- dp-controller {
- compatible = "samsung,exynos5-dp";
- reg = <0x145b0000 0x10000>;
- interrupts = <10 3>;
- interrupt-parent = <&combiner>;
- clocks = <&clock 342>;
- clock-names = "dp";
-
- phys = <&dp_phy>;
- phy-names = "dp";
- };
-
-Board Specific portion:
- dp-controller {
- display-timings {
- native-mode = <&lcd_timing>;
- lcd_timing: 1366x768 {
- clock-frequency = <70589280>;
- hactive = <1366>;
- vactive = <768>;
- hfront-porch = <40>;
- hback-porch = <40>;
- hsync-len = <32>;
- vback-porch = <10>;
- vfront-porch = <12>;
- vsync-len = <6>;
- };
- };
-
- ports {
- port@0 {
- dp_out: endpoint {
- remote-endpoint = <&bridge_in>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
deleted file mode 100644
index be377786e8cd..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-Exynos MIPI DSI Master
-
-Required properties:
- - compatible: value should be one of the following
- "samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */
- "samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
- "samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
- "samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */
- "samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */
- - reg: physical base address and length of the registers set for the device
- - interrupts: should contain DSI interrupt
- - clocks: list of clock specifiers, must contain an entry for each required
- entry in clock-names
- - clock-names: should include "bus_clk"and "sclk_mipi" entries
- the use of "pll_clk" is deprecated
- - phys: list of phy specifiers, must contain an entry for each required
- entry in phy-names
- - phy-names: should include "dsim" entry
- - vddcore-supply: MIPI DSIM Core voltage supply (e.g. 1.1V)
- - vddio-supply: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V)
- - samsung,pll-clock-frequency: specifies frequency of the oscillator clock
- - #address-cells, #size-cells: should be set respectively to <1> and <0>
- according to DSI host bindings (see MIPI DSI bindings [1])
- - samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
- mode
- - samsung,esc-clock-frequency: specifies DSI frequency in escape mode
-
-Optional properties:
- - power-domains: a phandle to DSIM power domain node
-
-Child nodes:
- Should contain DSI peripheral nodes (see MIPI DSI bindings [1]).
-
-Video interfaces:
- Device node can contain following video interface port nodes according to [2]:
- 0: RGB input,
- 1: DSI output
-
-[1]: Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
- dsi@11c80000 {
- compatible = "samsung,exynos4210-mipi-dsi";
- reg = <0x11C80000 0x10000>;
- interrupts = <0 79 0>;
- clocks = <&clock 286>, <&clock 143>;
- clock-names = "bus_clk", "sclk_mipi";
- phys = <&mipi_phy 1>;
- phy-names = "dsim";
- vddcore-supply = <&vusb_reg>;
- vddio-supply = <&vmipi_reg>;
- power-domains = <&pd_lcd0>;
- #address-cells = <1>;
- #size-cells = <0>;
- samsung,pll-clock-frequency = <24000000>;
-
- panel@1 {
- reg = <0>;
- ...
- port {
- panel_ep: endpoint {
- remote-endpoint = <&dsi_ep>;
- };
- };
- };
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- decon_to_mic: endpoint {
- remote-endpoint = <&mic_to_decon>;
- };
- };
-
- port@1 {
- reg = <1>;
- dsi_ep: endpoint {
- reg = <0>;
- samsung,burst-clock-frequency = <500000000>;
- samsung,esc-clock-frequency = <20000000>;
- remote-endpoint = <&panel_ep>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt b/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
deleted file mode 100644
index 58b12e25bbb1..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-Device-Tree bindings for drm hdmi driver
-
-Required properties:
-- compatible: value should be one among the following:
- 1) "samsung,exynos4210-hdmi"
- 2) "samsung,exynos4212-hdmi"
- 3) "samsung,exynos5420-hdmi"
- 4) "samsung,exynos5433-hdmi"
-- reg: physical base address of the hdmi and length of memory mapped
- region.
-- interrupts: interrupt number to the cpu.
-- hpd-gpios: following information about the hotplug gpio pin.
- a) phandle of the gpio controller node.
- b) pin number within the gpio controller.
- c) optional flags and pull up/down.
-- ddc: phandle to the hdmi ddc node
-- phy: phandle to the hdmi phy node
-- samsung,syscon-phandle: phandle for system controller node for PMU.
-- #sound-dai-cells: should be 0.
-
-Required properties for Exynos 4210, 4212, 5420 and 5433:
-- clocks: list of clock IDs from SoC clock driver.
- a) hdmi: Gate of HDMI IP bus clock.
- b) sclk_hdmi: Gate of HDMI special clock.
- c) sclk_pixel: Pixel special clock, one of the two possible inputs of
- HDMI clock mux.
- d) sclk_hdmiphy: HDMI PHY clock output, one of two possible inputs of
- HDMI clock mux.
- e) mout_hdmi: It is required by the driver to switch between the 2
- parents i.e. sclk_pixel and sclk_hdmiphy. If hdmiphy is stable
- after configuration, parent is set to sclk_hdmiphy else
- sclk_pixel.
-- clock-names: aliases as per driver requirements for above clock IDs:
- "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy" and "mout_hdmi".
-
-Required properties for Exynos 5433:
-- clocks: list of clock specifiers according to common clock bindings.
- a) hdmi_pclk: Gate of HDMI IP APB bus.
- b) hdmi_i_pclk: Gate of HDMI-PHY IP APB bus.
- d) i_tmds_clk: Gate of HDMI TMDS clock.
- e) i_pixel_clk: Gate of HDMI pixel clock.
- f) i_spdif_clk: Gate of HDMI SPDIF clock.
- g) oscclk: Oscillator clock, used as parent of following *_user clocks
- in case HDMI-PHY is not operational.
- h) tmds_clko: TMDS clock generated by HDMI-PHY.
- i) tmds_clko_user: MUX used to switch between oscclk and tmds_clko,
- respectively if HDMI-PHY is off and operational.
- j) pixel_clko: Pixel clock generated by HDMI-PHY.
- k) pixel_clko_user: MUX used to switch between oscclk and pixel_clko,
- respectively if HDMI-PHY is off and operational.
-- clock-names: aliases for above clock specfiers.
-- samsung,sysreg: handle to syscon used to control the system registers.
-
-Example:
-
- hdmi {
- compatible = "samsung,exynos4212-hdmi";
- reg = <0x14530000 0x100000>;
- interrupts = <0 95 0>;
- hpd-gpios = <&gpx3 7 1>;
- ddc = <&hdmi_ddc_node>;
- phy = <&hdmi_phy_node>;
- samsung,syscon-phandle = <&pmu_system_controller>;
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_hdmiddc.txt b/Documentation/devicetree/bindings/display/exynos/exynos_hdmiddc.txt
deleted file mode 100644
index 41eee971562b..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_hdmiddc.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Device-Tree bindings for hdmiddc driver
-
-Required properties:
-- compatible: value should be one of the following
- 1) "samsung,exynos5-hdmiddc" <DEPRECATED>
- 2) "samsung,exynos4210-hdmiddc"
-
-- reg: I2C address of the hdmiddc device.
-
-Example:
-
- hdmiddc {
- compatible = "samsung,exynos4210-hdmiddc";
- reg = <0x50>;
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_hdmiphy.txt b/Documentation/devicetree/bindings/display/exynos/exynos_hdmiphy.txt
deleted file mode 100644
index 162f641f7639..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_hdmiphy.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Device-Tree bindings for hdmiphy driver
-
-Required properties:
-- compatible: value should be one of the following:
- 1) "samsung,exynos5-hdmiphy" <DEPRECATED>
- 2) "samsung,exynos4210-hdmiphy".
- 3) "samsung,exynos4212-hdmiphy".
-- reg: I2C address of the hdmiphy device.
-
-Example:
-
- hdmiphy {
- compatible = "samsung,exynos4210-hdmiphy";
- reg = <0x38>;
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_mixer.txt b/Documentation/devicetree/bindings/display/exynos/exynos_mixer.txt
deleted file mode 100644
index 3e38128f866b..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/exynos_mixer.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Device-Tree bindings for mixer driver
-
-Required properties:
-- compatible: value should be one of the following:
- 1) "samsung,exynos5-mixer" <DEPRECATED>
- 2) "samsung,exynos4210-mixer"
- 3) "samsung,exynos4212-mixer"
- 4) "samsung,exynos5250-mixer"
- 5) "samsung,exynos5420-mixer"
-
-- reg: physical base address of the mixer and length of memory mapped
- region.
-- interrupts: interrupt number to the cpu.
-- clocks: list of clock IDs from SoC clock driver.
- a) mixer: Gate of Mixer IP bus clock.
- b) sclk_hdmi: HDMI Special clock, one of the two possible inputs of
- mixer mux.
- c) hdmi: Gate of HDMI IP bus clock, needed together with sclk_hdmi.
-
-Example:
-
- mixer {
- compatible = "samsung,exynos5250-mixer";
- reg = <0x14450000 0x10000>;
- interrupts = <0 94 0>;
- };
diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
deleted file mode 100644
index b3096421d42b..000000000000
--- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-Device-Tree bindings for Samsung SoC display controller (FIMD)
-
-FIMD (Fully Interactive Mobile Display) is the Display Controller for the
-Samsung series of SoCs which transfers the image data from a video memory
-buffer to an external LCD interface.
-
-Required properties:
-- compatible: value should be one of the following
- "samsung,s3c2443-fimd"; /* for S3C24XX SoCs */
- "samsung,s3c6400-fimd"; /* for S3C64XX SoCs */
- "samsung,s5pv210-fimd"; /* for S5PV210 SoC */
- "samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
- "samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
- "samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */
- "samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */
-
-- reg: physical base address and length of the FIMD registers set.
-
-- interrupts: should contain a list of all FIMD IP block interrupts in the
- order: FIFO Level, VSYNC, LCD_SYSTEM. The interrupt specifier
- format depends on the interrupt controller used.
-
-- interrupt-names: should contain the interrupt names: "fifo", "vsync",
- "lcd_sys", in the same order as they were listed in the interrupts
- property.
-
-- pinctrl-0: pin control group to be used for this controller.
-
-- pinctrl-names: must contain a "default" entry.
-
-- clocks: must include clock specifiers corresponding to entries in the
- clock-names property.
-
-- clock-names: list of clock names sorted in the same order as the clocks
- property. Must contain "sclk_fimd" and "fimd".
-
-Optional Properties:
-- power-domains: a phandle to FIMD power domain node.
-- samsung,invert-vden: video enable signal is inverted
-- samsung,invert-vclk: video clock signal is inverted
-- display-timings: timing settings for FIMD, as described in document [1].
- Can be used in case timings cannot be provided otherwise
- or to override timings provided by the panel.
-- samsung,sysreg: handle to syscon used to control the system registers
-- i80-if-timings: timing configuration for lcd i80 interface support.
- - cs-setup: clock cycles for the active period of address signal is enabled
- until chip select is enabled.
- If not specified, the default value(0) will be used.
- - wr-setup: clock cycles for the active period of CS signal is enabled until
- write signal is enabled.
- If not specified, the default value(0) will be used.
- - wr-active: clock cycles for the active period of CS is enabled.
- If not specified, the default value(1) will be used.
- - wr-hold: clock cycles for the active period of CS is disabled until write
- signal is disabled.
- If not specified, the default value(0) will be used.
-
- The parameters are defined as:
-
- VCLK(internal) __|??????|_____|??????|_____|??????|_____|??????|_____|??
- : : : : :
- Address Output --:<XXXXXXXXXXX:XXXXXXXXXXXX:XXXXXXXXXXXX:XXXXXXXXXXXX:XX
- | cs-setup+1 | : : :
- |<---------->| : : :
- Chip Select ???????????????|____________:____________:____________|??
- | wr-setup+1 | | wr-hold+1 |
- |<---------->| |<---------->|
- Write Enable ????????????????????????????|____________|???????????????
- | wr-active+1|
- |<---------->|
- Video Data ----------------------------<XXXXXXXXXXXXXXXXXXXXXXXXX>--
-
-The device node can contain 'port' child nodes according to the bindings defined
-in [2]. The following are properties specific to those nodes:
-- reg: (required) port index, can be:
- 0 - for CAMIF0 input,
- 1 - for CAMIF1 input,
- 2 - for CAMIF2 input,
- 3 - for parallel output,
- 4 - for write-back interface
-
-[1]: Documentation/devicetree/bindings/display/panel/display-timing.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
-SoC specific DT entry:
-
- fimd@11c00000 {
- compatible = "samsung,exynos4210-fimd";
- interrupt-parent = <&combiner>;
- reg = <0x11c00000 0x20000>;
- interrupt-names = "fifo", "vsync", "lcd_sys";
- interrupts = <11 0>, <11 1>, <11 2>;
- clocks = <&clock 140>, <&clock 283>;
- clock-names = "sclk_fimd", "fimd";
- power-domains = <&pd_lcd0>;
- status = "disabled";
- };
-
-Board specific DT entry:
-
- fimd@11c00000 {
- pinctrl-0 = <&lcd_clk &lcd_data24 &pwm1_out>;
- pinctrl-names = "default";
- status = "okay";
- };
diff --git a/Documentation/devicetree/bindings/display/faraday,tve200.txt b/Documentation/devicetree/bindings/display/faraday,tve200.txt
deleted file mode 100644
index 82e3bc0b7485..000000000000
--- a/Documentation/devicetree/bindings/display/faraday,tve200.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-* Faraday TV Encoder TVE200
-
-Required properties:
-
-- compatible: must be one of:
- "faraday,tve200"
- "cortina,gemini-tvc", "faraday,tve200"
-
-- reg: base address and size of the control registers block
-
-- interrupts: contains an interrupt specifier for the interrupt
- line from the TVE200
-
-- clock-names: should contain "PCLK" for the clock line clocking the
- silicon and "TVE" for the 27MHz clock to the video driver
-
-- clocks: contains phandle and clock specifier pairs for the entries
- in the clock-names property. See
- Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Optional properties:
-
-- resets: contains the reset line phandle for the block
-
-Required sub-nodes:
-
-- port: describes LCD panel signals, following the common binding
- for video transmitter interfaces; see
- Documentation/devicetree/bindings/media/video-interfaces.txt
- This port should have the properties:
- reg = <0>;
- It should have one endpoint connected to a remote endpoint where
- the display is connected.
-
-Example:
-
-display-controller@6a000000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "faraday,tve200";
- reg = <0x6a000000 0x1000>;
- interrupts = <13 IRQ_TYPE_EDGE_RISING>;
- resets = <&syscon GEMINI_RESET_TVC>;
- clocks = <&syscon GEMINI_CLK_GATE_TVC>,
- <&syscon GEMINI_CLK_TVC>;
- clock-names = "PCLK", "TVE";
-
- port@0 {
- reg = <0>;
- display_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/faraday,tve200.yaml b/Documentation/devicetree/bindings/display/faraday,tve200.yaml
new file mode 100644
index 000000000000..e2ee77767321
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/faraday,tve200.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/faraday,tve200.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Faraday TV Encoder TVE200
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - const: faraday,tve200
+ - items:
+ - const: cortina,gemini-tvc
+ - const: faraday,tve200
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: PCLK
+ - const: TVE
+
+ clocks:
+ minItems: 2
+
+ resets:
+ minItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clock-names
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/cortina,gemini-clock.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/reset/cortina,gemini-reset.h>
+ display-controller@6a000000 {
+ compatible = "faraday,tve200";
+ reg = <0x6a000000 0x1000>;
+ interrupts = <13 IRQ_TYPE_EDGE_RISING>;
+ resets = <&syscon GEMINI_RESET_TVC>;
+ clocks = <&syscon GEMINI_CLK_GATE_TVC>,
+ <&syscon GEMINI_CLK_TVC>;
+ clock-names = "PCLK", "TVE";
+
+ port {
+ display_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/fsl,dcu.txt b/Documentation/devicetree/bindings/display/fsl,dcu.txt
deleted file mode 100644
index 63ec2a624aa9..000000000000
--- a/Documentation/devicetree/bindings/display/fsl,dcu.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Device Tree bindings for Freescale DCU DRM Driver
-
-Required properties:
-- compatible: Should be one of
- * "fsl,ls1021a-dcu".
- * "fsl,vf610-dcu".
-
-- reg: Address and length of the register set for dcu.
-- clocks: Handle to "dcu" and "pix" clock (in the order below)
- This can be the same clock (e.g. LS1021a)
- See ../clocks/clock-bindings.txt for details.
-- clock-names: Should be "dcu" and "pix"
- See ../clocks/clock-bindings.txt for details.
-- big-endian Boolean property, LS1021A DCU registers are big-endian.
-- port Video port for the panel output
-
-Optional properties:
-- fsl,tcon: The phandle to the timing controller node.
-
-Examples:
-dcu: dcu@2ce0000 {
- compatible = "fsl,ls1021a-dcu";
- reg = <0x0 0x2ce0000 0x0 0x10000>;
- clocks = <&platform_clk 0>, <&platform_clk 0>;
- clock-names = "dcu", "pix";
- big-endian;
- fsl,tcon = <&tcon>;
-
- port {
- dcu_out: endpoint {
- remote-endpoint = <&panel_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
new file mode 100644
index 000000000000..2dd0411ec651
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/fsl,lcdif.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale/NXP i.MX LCD Interface (LCDIF)
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+ - Stefan Agner <stefan@agner.ch>
+
+description: |
+ (e)LCDIF display controller found in the Freescale/NXP i.MX SoCs.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx23-lcdif
+ - fsl,imx28-lcdif
+ - fsl,imx6sx-lcdif
+ - fsl,imx8mp-lcdif
+ - fsl,imx93-lcdif
+ - items:
+ - enum:
+ - fsl,imx6sl-lcdif
+ - fsl,imx6sll-lcdif
+ - fsl,imx6ul-lcdif
+ - fsl,imx7d-lcdif
+ - fsl,imx8mm-lcdif
+ - fsl,imx8mn-lcdif
+ - fsl,imx8mq-lcdif
+ - const: fsl,imx6sx-lcdif
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Pixel clock
+ - description: Bus clock
+ - description: Display AXI clock
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: pix
+ - const: axi
+ - const: disp_axi
+ minItems: 1
+
+ dmas:
+ items:
+ - description: DMA specifier for the RX DMA channel.
+
+ dma-names:
+ items:
+ - const: rx
+
+ interrupts:
+ items:
+ - description: LCDIF DMA interrupt
+ - description: LCDIF Error interrupt
+ minItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: The LCDIF output port
+
+ display:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to display panel
+ deprecated: true
+
+ display0:
+ $ref: panel/panel-common.yaml#
+ deprecated: true
+
+ lcd-supply:
+ deprecated: true
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,imx6sx-lcdif
+ then:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 3
+ clock-names:
+ minItems: 2
+ maxItems: 3
+ required:
+ - clock-names
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx8mp-lcdif
+ - fsl,imx93-lcdif
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ minItems: 3
+ maxItems: 3
+ required:
+ - clock-names
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx6sx-lcdif
+ - fsl,imx8mp-lcdif
+ - fsl,imx93-lcdif
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ maxItems: 1
+ - if:
+ properties:
+ compatible:
+ const: fsl,imx6sx-lcdif
+ then:
+ required:
+ - power-domains
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx6sl-lcdif
+ - fsl,imx8mm-lcdif
+ - fsl,imx8mn-lcdif
+ - fsl,imx8mp-lcdif
+ - fsl,imx93-lcdif
+ then:
+ required:
+ - power-domains
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx23-lcdif
+ then:
+ properties:
+ interrupts:
+ minItems: 2
+ maxItems: 2
+ else:
+ properties:
+ interrupts:
+ maxItems: 1
+
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx28-lcdif
+ then:
+ properties:
+ dmas: false
+ dma-names: false
+ display: false
+ display0: false
+ lcd-supply: false
+
+ required:
+ - port
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx6sx-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ display-controller@2220000 {
+ compatible = "fsl,imx6sx-lcdif";
+ reg = <0x02220000 0x4000>;
+ interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6SX_CLK_LCDIF1_PIX>,
+ <&clks IMX6SX_CLK_LCDIF_APB>,
+ <&clks IMX6SX_CLK_DISPLAY_AXI>;
+ clock-names = "pix", "axi", "disp_axi";
+ power-domains = <&pd_disp>;
+
+ port {
+ endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/fsl,ls1021a-dcu.yaml b/Documentation/devicetree/bindings/display/fsl,ls1021a-dcu.yaml
new file mode 100644
index 000000000000..72d14babe993
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/fsl,ls1021a-dcu.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/fsl,ls1021a-dcu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale DCU DRM Driver
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,ls1021a-dcu
+ - fsl,vf610-dcu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: dcu
+ - const: pix
+
+ big-endian: true
+
+ port:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Video port for the panel output
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ fsl,tcon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: The phandle to the timing controller node.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ display-controller@2ce0000 {
+ compatible = "fsl,ls1021a-dcu";
+ reg = <0x2ce0000 0x10000>;
+ clocks = <&platform_clk 0>, <&platform_clk 0>;
+ clock-names = "dcu", "pix";
+ big-endian;
+ fsl,tcon = <&tcon>;
+
+ port {
+ endpoint {
+ remote-endpoint = <&panel_out>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/fsl,tcon.txt b/Documentation/devicetree/bindings/display/fsl,tcon.txt
deleted file mode 100644
index 475008747801..000000000000
--- a/Documentation/devicetree/bindings/display/fsl,tcon.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Device Tree bindings for Freescale TCON Driver
-
-Required properties:
-- compatible: Should be one of
- * "fsl,vf610-tcon".
-
-- reg: Address and length of the register set for tcon.
-- clocks: From common clock binding: handle to tcon ipg clock.
-- clock-names: From common clock binding: Shall be "ipg".
-
-Examples:
-timing-controller@4003d000 {
- compatible = "fsl,vf610-tcon";
- reg = <0x4003d000 0x1000>;
- clocks = <&clks VF610_CLK_TCON0>;
- clock-names = "ipg";
-};
diff --git a/Documentation/devicetree/bindings/display/fsl,vf610-tcon.yaml b/Documentation/devicetree/bindings/display/fsl,vf610-tcon.yaml
new file mode 100644
index 000000000000..06bd680524a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/fsl,vf610-tcon.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/fsl,vf610-tcon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale TCON
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,vf610-tcon
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ipg
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/vf610-clock.h>
+
+ timing-controller@4003d000 {
+ compatible = "fsl,vf610-tcon";
+ reg = <0x4003d000 0x1000>;
+ clocks = <&clks VF610_CLK_TCON0>;
+ clock-names = "ipg";
+ };
diff --git a/Documentation/devicetree/bindings/display/himax,hx8357.yaml b/Documentation/devicetree/bindings/display/himax,hx8357.yaml
new file mode 100644
index 000000000000..34c3b89bf003
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/himax,hx8357.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/himax,hx8357.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Himax HX8357D display panel
+
+description:
+ Display panels using a Himax HX8357D controller in SPI
+ mode, such as the Adafruit 3.5" TFT for Raspberry Pi.
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - adafruit,yx350hv15
+ - himax,hx8357b
+ - const: himax,hx8357
+ - items:
+ - enum:
+ - himax,hx8369a
+ - const: himax,hx8369
+
+ reg:
+ maxItems: 1
+
+ dc-gpios:
+ maxItems: 1
+ description: D/C pin
+
+ rotation:
+ enum: [0, 90, 180, 270]
+
+ backlight:
+ description:
+ phandle of the backlight device attached to the panel
+
+ im-gpios:
+ maxItems: 3
+
+ reset-gpios:
+ maxItems: 1
+
+ spi-cpha: true
+
+ spi-cpol: true
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ display@0 {
+ compatible = "adafruit,yx350hv15", "himax,hx8357";
+ reg = <0>;
+ spi-max-frequency = <32000000>;
+ dc-gpios = <&gpio0 25 GPIO_ACTIVE_HIGH>;
+ rotation = <90>;
+ backlight = <&backlight>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/ht16k33.txt b/Documentation/devicetree/bindings/display/ht16k33.txt
deleted file mode 100644
index d5a8b070b467..000000000000
--- a/Documentation/devicetree/bindings/display/ht16k33.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Holtek ht16k33 RAM mapping 16*8 LED controller driver with keyscan
--------------------------------------------------------------------------------
-
-Required properties:
-- compatible: "holtek,ht16k33"
-- reg: I2C slave address of the chip.
-- interrupts: Interrupt specification for the key pressed interrupt.
-- refresh-rate-hz: Display update interval in HZ.
-- debounce-delay-ms: Debouncing interval time in milliseconds.
-- linux,keymap: The keymap for keys as described in the binding
- document (devicetree/bindings/input/matrix-keymap.txt).
-
-Optional properties:
-- linux,no-autorepeat: Disable keyrepeat.
-- default-brightness-level: Initial brightness level [0-15] (default: 15).
-
-Example:
-
-&i2c1 {
- ht16k33: ht16k33@70 {
- compatible = "holtek,ht16k33";
- reg = <0x70>;
- refresh-rate-hz = <20>;
- debounce-delay-ms = <50>;
- interrupt-parent = <&gpio4>;
- interrupts = <5 (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING)>;
- linux,keymap = <
- MATRIX_KEY(2, 0, KEY_F6)
- MATRIX_KEY(3, 0, KEY_F8)
- MATRIX_KEY(4, 0, KEY_F10)
- MATRIX_KEY(5, 0, KEY_F4)
- MATRIX_KEY(6, 0, KEY_F2)
- MATRIX_KEY(2, 1, KEY_F5)
- MATRIX_KEY(3, 1, KEY_F7)
- MATRIX_KEY(4, 1, KEY_F9)
- MATRIX_KEY(5, 1, KEY_F3)
- MATRIX_KEY(6, 1, KEY_F1)
- >;
- };
-};
diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9341.txt b/Documentation/devicetree/bindings/display/ilitek,ili9341.txt
deleted file mode 100644
index 169b32e4ee4e..000000000000
--- a/Documentation/devicetree/bindings/display/ilitek,ili9341.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Ilitek ILI9341 display panels
-
-This binding is for display panels using an Ilitek ILI9341 controller in SPI
-mode.
-
-Required properties:
-- compatible: "adafruit,yx240qv29", "ilitek,ili9341"
-- dc-gpios: D/C pin
-- reset-gpios: Reset pin
-
-The node for this driver must be a child node of a SPI controller, hence
-all mandatory properties described in ../spi/spi-bus.txt must be specified.
-
-Optional properties:
-- rotation: panel rotation in degrees counter clockwise (0,90,180,270)
-- backlight: phandle of the backlight device attached to the panel
-
-Example:
- display@0{
- compatible = "adafruit,yx240qv29", "ilitek,ili9341";
- reg = <0>;
- spi-max-frequency = <32000000>;
- dc-gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
- rotation = <270>;
- backlight = <&backlight>;
- };
diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml
new file mode 100644
index 000000000000..9cc1fd0751cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/ilitek,ili9486.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek ILI9486 display panels
+
+maintainers:
+ - Kamlesh Gurudasani <kamlesh.gurudasani@gmail.com>
+
+description:
+ This binding is for display panels using an Ilitek ILI9486 controller in SPI
+ mode.
+
+allOf:
+ - $ref: panel/panel-common.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ # Waveshare 3.5" 320x480 Color TFT LCD
+ - waveshare,rpi-lcd-35
+ # Ozzmaker 3.5" 320x480 Color TFT LCD
+ - ozzmaker,piscreen
+ - const: ilitek,ili9486
+
+ spi-max-frequency:
+ maximum: 32000000
+
+ dc-gpios:
+ maxItems: 1
+ description: Display data/command selection (D/CX)
+
+ backlight: true
+ reg: true
+ reset-gpios: true
+ rotation: true
+
+required:
+ - compatible
+ - reg
+ - dc-gpios
+ - reset-gpios
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+
+ display@0{
+ compatible = "waveshare,rpi-lcd-35", "ilitek,ili9486";
+ reg = <0>;
+ spi-max-frequency = <32000000>;
+ dc-gpios = <&gpio0 24 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio0 25 GPIO_ACTIVE_HIGH>;
+ rotation = <180>;
+ backlight = <&backlight>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-display-subsystem.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx-display-subsystem.yaml
new file mode 100644
index 000000000000..92a0a797d099
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-display-subsystem.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx-display-subsystem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX DRM master device
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+description:
+ The freescale i.MX DRM master device is a virtual device needed to list all
+ IPU or other display interface nodes that comprise the graphics subsystem.
+
+properties:
+ compatible:
+ const: fsl,imx-display-subsystem
+
+ ports:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ Should contain a list of phandles pointing to camera
+ sensor interface ports of IPU devices.
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ display-subsystem {
+ compatible = "fsl,imx-display-subsystem";
+ ports = <&ipu_di0>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
deleted file mode 100644
index e5a8b363d829..000000000000
--- a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Freescale imx21 Framebuffer
-
-This framebuffer driver supports devices imx1, imx21, imx25, and imx27.
-
-Required properties:
-- compatible : "fsl,<chip>-fb", chip should be imx1 or imx21
-- reg : Should contain 1 register ranges(address and length)
-- interrupts : One interrupt of the fb dev
-
-Required nodes:
-- display: Phandle to a display node as described in
- Documentation/devicetree/bindings/display/panel/display-timing.txt
- Additional, the display node has to define properties:
- - bits-per-pixel: Bits per pixel
- - fsl,pcr: LCDC PCR value
- A display node may optionally define
- - fsl,aus-mode: boolean to enable AUS mode (only for imx21)
-
-Optional properties:
-- lcd-supply: Regulator for LCD supply voltage.
-- fsl,dmacr: DMA Control Register value. This is optional. By default, the
- register is not modified as recommended by the datasheet.
-- fsl,lpccr: Contrast Control Register value. This property provides the
- default value for the contrast control register.
- If that property is omitted, the register is zeroed.
-- fsl,lscr1: LCDC Sharp Configuration Register value.
-
-Example:
-
- imxfb: fb@10021000 {
- compatible = "fsl,imx21-fb";
- interrupts = <61>;
- reg = <0x10021000 0x1000>;
- display = <&display0>;
- };
-
- ...
-
- display0: display0 {
- model = "Primeview-PD050VL1";
- native-mode = <&timing_disp0>;
- bits-per-pixel = <16>;
- fsl,pcr = <0xf0c88080>; /* non-standard but required */
- display-timings {
- timing_disp0: 640x480 {
- hactive = <640>;
- vactive = <480>;
- hback-porch = <112>;
- hfront-porch = <36>;
- hsync-len = <32>;
- vback-porch = <33>;
- vfront-porch = <33>;
- vsync-len = <2>;
- clock-frequency = <25000000>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-lcdc.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx-lcdc.yaml
new file mode 100644
index 000000000000..c2b29622bceb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-lcdc.yaml
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx-lcdc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX LCD Controller, found on i.MX1, i.MX21, i.MX25 and i.MX27
+
+maintainers:
+ - Sascha Hauer <s.hauer@pengutronix.de>
+ - Pengutronix Kernel Team <kernel@pengutronix.de>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx1-fb
+ - fsl,imx21-fb
+ - items:
+ - enum:
+ - fsl,imx25-fb
+ - fsl,imx27-fb
+ - const: fsl,imx21-fb
+ - items:
+ - const: fsl,imx25-lcdc
+ - const: fsl,imx21-lcdc
+
+ clocks:
+ maxItems: 3
+
+ clock-names:
+ items:
+ - const: ipg
+ - const: ahb
+ - const: per
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+
+ display:
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ lcd-supply:
+ description:
+ Regulator for LCD supply voltage.
+
+ fsl,dmacr:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Override value for DMA Control Register
+
+ fsl,lpccr:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Contrast Control Register value.
+
+ fsl,lscr1:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ LCDC Sharp Configuration Register value.
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - fsl,imx1-lcdc
+ - fsl,imx21-lcdc
+ then:
+ properties:
+ display: false
+ fsl,dmacr: false
+ fsl,lpccr: false
+ fsl,lscr1: false
+
+ required:
+ - port
+
+ else:
+ properties:
+ port: false
+
+ required:
+ - display
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - interrupts
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ lcdc@53fbc000 {
+ compatible = "fsl,imx25-lcdc", "fsl,imx21-lcdc";
+ reg = <0x53fbc000 0x4000>;
+ interrupts = <39>;
+ clocks = <&clks 103>, <&clks 66>, <&clks 49>;
+ clock-names = "ipg", "ahb", "per";
+
+ port {
+ parallel_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ - |
+ imxfb: fb@10021000 {
+ compatible = "fsl,imx21-fb";
+ interrupts = <61>;
+ reg = <0x10021000 0x1000>;
+ display = <&display0>;
+ clocks = <&clks 103>, <&clks 49>, <&clks 66>;
+ clock-names = "ipg", "ahb", "per";
+ };
+
+ display0: display0 {
+ model = "Primeview-PD050VL1";
+ bits-per-pixel = <16>;
+ fsl,pcr = <0xf0c88080>; /* non-standard but required */
+
+ display-timings {
+ native-mode = <&timing_disp0>;
+ timing_disp0: timing0 {
+ hactive = <640>;
+ vactive = <480>;
+ hback-porch = <112>;
+ hfront-porch = <36>;
+ hsync-len = <32>;
+ vback-porch = <33>;
+ vfront-porch = <33>;
+ vsync-len = <2>;
+ clock-frequency = <25000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-parallel-display.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx-parallel-display.yaml
new file mode 100644
index 000000000000..bbcfe7e2958b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-parallel-display.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx-parallel-display.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Parallel display support
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx-parallel-display
+
+ interface-pix-fmt:
+ $ref: /schemas/types.yaml#/definitions/string
+ enum:
+ - rgb24
+ - rgb565
+ - bgr666
+ - lvds666
+
+ ddc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle describing the i2c bus handling the display data channel
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: input port connected to the IPU display interface
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: output port connected to a panel
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ display {
+ compatible = "fsl,imx-parallel-display";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interface-pix-fmt = "rgb24";
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&ipu_di0_disp0>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml
new file mode 100644
index 000000000000..180c4b510fb1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx6-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6 DWC HDMI TX Encoder
+
+maintainers:
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
+ with a companion PHY IP.
+
+allOf:
+ - $ref: ../bridge/synopsys,dw-hdmi.yaml#
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx6dl-hdmi
+ - fsl,imx6q-hdmi
+
+ reg-io-width:
+ const: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+ gpr:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the iomuxc-gpr region containing the HDMI multiplexer control
+ register.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: |
+ This device has four video ports, corresponding to the four inputs of the
+ HDMI multiplexer. Each port shall have a single endpoint.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: First input of the HDMI multiplexer
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Second input of the HDMI multiplexer
+
+ port@2:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Third input of the HDMI multiplexer
+
+ port@3:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Fourth input of the HDMI multiplexer
+
+ anyOf:
+ - required:
+ - port@0
+ - required:
+ - port@1
+ - required:
+ - port@2
+ - required:
+ - port@3
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - gpr
+ - interrupts
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx6qdl-clock.h>
+
+ hdmi: hdmi@120000 {
+ reg = <0x00120000 0x9000>;
+ interrupts = <0 115 0x04>;
+ gpr = <&gpr>;
+ clocks = <&clks IMX6QDL_CLK_HDMI_IAHB>,
+ <&clks IMX6QDL_CLK_HDMI_ISFR>;
+ clock-names = "iahb", "isfr";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi_mux_0: endpoint {
+ remote-endpoint = <&ipu1_di0_hdmi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ hdmi_mux_1: endpoint {
+ remote-endpoint = <&ipu1_di1_hdmi>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ipu.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ipu.yaml
new file mode 100644
index 000000000000..ec78645d4de0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ipu.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx6q-ipu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX IPUv3
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx51-ipu
+ - fsl,imx53-ipu
+ - fsl,imx6q-ipu
+ - items:
+ - const: fsl,imx6qp-ipu
+ - const: fsl,imx6q-ipu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ maxItems: 3
+
+ clock-names:
+ items:
+ - const: bus
+ - const: di0
+ - const: di1
+
+ resets:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ fsl,prg:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to prg node associated with this IPU instance
+
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: CSI0
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: CSI1
+
+ port@2:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DI0
+
+ port@3:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: DI1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ display-controller@18000000 {
+ compatible = "fsl,imx53-ipu";
+ reg = <0x18000000 0x080000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <11 10>;
+ resets = <&src 2>;
+
+ port@2 {
+ reg = <2>;
+
+ endpoint {
+ remote-endpoint = <&display_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ldb.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ldb.yaml
new file mode 100644
index 000000000000..1646f41d8f72
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6q-ldb.yaml
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx6q-ldb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale LVDS Display Bridge (ldb)
+
+description:
+ The LVDS Display Bridge device tree node contains up to two lvds-channel
+ nodes describing each of the two LVDS encoder channels of the bridge.
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - fsl,imx53-ldb
+ - items:
+ - enum:
+ - fsl,imx6q-ldb
+ - const: fsl,imx53-ldb
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ gpr:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ The phandle points to the iomuxc-gpr region containing the LVDS
+ control register.
+
+ clocks:
+ minItems: 6
+ maxItems: 8
+
+ clock-names:
+ oneOf:
+ - items:
+ - const: di0_pll
+ - const: di1_pll
+ - const: di0_sel
+ - const: di1_sel
+ - const: di0
+ - const: di1
+ - items:
+ - const: di0_pll
+ - const: di1_pll
+ - const: di0_sel
+ - const: di1_sel
+ - const: di2_sel
+ - const: di3_sel
+ - const: di0
+ - const: di1
+
+ fsl,dual-channel:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ if it exists, only LVDS channel 0 should
+ be configured - one input will be distributed on both outputs in dual
+ channel mode
+
+patternProperties:
+ '^lvds-channel@[0-1]$':
+ type: object
+ description:
+ Each LVDS Channel has to contain either an of graph link to a panel device node
+ or a display-timings node that describes the video timings for the connected
+ LVDS display as well as the fsl,data-mapping and fsl,data-width properties.
+
+ properties:
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+ display-timings:
+ $ref: /schemas/display/panel/display-timings.yaml#
+
+ fsl,data-mapping:
+ enum:
+ - spwg
+ - jeida
+
+ fsl,data-width:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: should be <18> or <24>
+ enum:
+ - 18
+ - 24
+
+ fsl,panel:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to lcd panel
+
+ patternProperties:
+ '^port@[0-4]$':
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ On i.MX5, the internal two-input-multiplexer is used. Due to hardware
+ limitations, only one input port (port@[0,1]) can be used for each channel
+ (lvds-channel@[0,1], respectively).
+ On i.MX6, there should be four input ports (port@[0-3]) that correspond
+ to the four LVDS multiplexer inputs.
+ A single output port (port@2 on i.MX5, port@4 on i.MX6) must be connected
+ to a panel input port. Optionally, the output port can be left out if
+ display-timings are used instead.
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - gpr
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx5-clock.h>
+
+ ldb@53fa8008 {
+ compatible = "fsl,imx53-ldb";
+ reg = <0x53fa8008 0x4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ gpr = <&gpr>;
+ clocks = <&clks IMX5_CLK_LDB_DI0_SEL>,
+ <&clks IMX5_CLK_LDB_DI1_SEL>,
+ <&clks IMX5_CLK_IPU_DI0_SEL>,
+ <&clks IMX5_CLK_IPU_DI1_SEL>,
+ <&clks IMX5_CLK_LDB_DI0_GATE>,
+ <&clks IMX5_CLK_LDB_DI1_GATE>;
+ clock-names = "di0_pll", "di1_pll",
+ "di0_sel", "di1_sel",
+ "di0", "di1";
+
+ /* Using an of-graph endpoint link to connect the panel */
+ lvds-channel@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ endpoint {
+ remote-endpoint = <&ipu_di0_lvds0>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ /* Using display-timings and fsl,data-mapping/width instead */
+ lvds-channel@1 {
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ fsl,data-mapping = "spwg";
+ fsl,data-width = <24>;
+
+ display-timings {/* ... */
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint {
+ remote-endpoint = <&ipu_di1_lvds1>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-pre.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-pre.yaml
new file mode 100644
index 000000000000..73bc73ff6e69
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-pre.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx6qp-pre.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX PRE (Prefetch Resolve Engine)
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6qp-pre
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: axi
+ fsl,iram:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle pointing to the mmio-sram device node, that should be
+ used for the PRE SRAM double buffer.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx6qdl-clock.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ pre@21c8000 {
+ compatible = "fsl,imx6qp-pre";
+ reg = <0x021c8000 0x1000>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&clks IMX6QDL_CLK_PRE0>;
+ clock-names = "axi";
+ fsl,iram = <&ocram2>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-prg.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-prg.yaml
new file mode 100644
index 000000000000..582da8c489f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6qp-prg.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx6qp-prg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX PRG (Prefetch Resolve Gasket)
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx6qp-prg
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: ipg
+ - const: axi
+
+ fsl,pres:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ maxItems: 1
+ description:
+ phandles to the PRE units attached to this PRG, with the fixed
+ PRE as the first entry and the muxable PREs following.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx6qdl-clock.h>
+
+ prg@21cc000 {
+ compatible = "fsl,imx6qp-prg";
+ reg = <0x021cc000 0x1000>;
+ clocks = <&clks IMX6QDL_CLK_PRG0_APB>, <&clks IMX6QDL_CLK_PRG0_AXI>;
+ clock-names = "ipg", "axi";
+ fsl,pres = <&pre1>, <&pre2>, <&pre3>;
+ };
+
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pvi.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pvi.yaml
new file mode 100644
index 000000000000..56da1636014c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pvi.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8mp-hdmi-pvi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8MP HDMI Parallel Video Interface
+
+maintainers:
+ - Lucas Stach <l.stach@pengutronix.de>
+
+description:
+ The HDMI parallel video interface is a timing and sync generator block in the
+ i.MX8MP SoC, that sits between the video source and the HDMI TX controller.
+
+properties:
+ compatible:
+ const: fsl,imx8mp-hdmi-pvi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input from the LCDIF controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Output to the HDMI TX controller.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/imx8mp-power.h>
+
+ display-bridge@32fc4000 {
+ compatible = "fsl,imx8mp-hdmi-pvi";
+ reg = <0x32fc4000 0x44>;
+ interrupt-parent = <&irqsteer_hdmi>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ power-domains = <&hdmi_blk_ctrl IMX8MP_HDMIBLK_PD_PVI>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ pvi_from_lcdif3: endpoint {
+ remote-endpoint = <&lcdif3_to_pvi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ pvi_to_hdmi_tx: endpoint {
+ remote-endpoint = <&hdmi_tx_from_pvi>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml
new file mode 100644
index 000000000000..1d6501afc7f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller AXI Performance Counter
+
+description: |
+ Performance counters are provided to allow measurement of average bandwidth
+ and latency during operation. The following features are supported:
+
+ * Manual and timer controlled measurement mode.
+
+ * Measurement counters:
+ - GLOBAL_COUNTER for overall measurement time
+ - BUSY_COUNTER for number of data bus busy cycles
+ - DATA_COUNTER for number of data transfer cycles
+ - TRANSFER_COUNTER for number of transfers
+ - ADDRBUSY_COUNTER for number of address bus busy cycles
+ - LATENCY_COUNTER for average latency
+
+ * Counter overflow detection.
+
+ * Outstanding Transfer Counters (OTC) which are used for latency measurement
+ have to run immediately after reset, but can be disabled by software when
+ there is no need for latency measurement.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-axi-performance-counter
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+
+ pmu@5618f000 {
+ compatible = "fsl,imx8qxp-dc-axi-performance-counter";
+ reg = <0x5618f000 0x90>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_5>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blit-engine.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blit-engine.yaml
new file mode 100644
index 000000000000..45db6da39e20
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blit-engine.yaml
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-blit-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Blit Engine
+
+description: |
+ A blit operation (block based image transfer) reads up to 3 source images
+ from memory and computes one destination image from it, which is written
+ back to memory. The following basic operations are supported:
+
+ * Buffer Fill
+ Fills a buffer with constant color
+
+ * Buffer Copy
+ Copies one source to a destination buffer.
+
+ * Image Blend
+ Combines two source images by a blending equation and writes result to
+ destination (which can be one of the sources).
+
+ * Image Rop2/3
+ Combines up to three source images by a logical equation (raster operation)
+ and writes result to destination (which can be one of the sources).
+
+ * Image Flip
+ Mirrors the source image in horizontal and/or vertical direction.
+
+ * Format Convert
+ Convert between the supported color and buffer formats.
+
+ * Color Transform
+ Modify colors by linear or non-linear transformations.
+
+ * Image Scale
+ Changes size of the source image.
+
+ * Image Rotate
+ Rotates the source image by any angle.
+
+ * Image Filter
+ Performs an FIR filter operation on the source image.
+
+ * Image Warp
+ Performs a re-sampling of the source image with any pattern. The sample
+ point positions are read from a compressed coordinate buffer.
+
+ * Buffer Pack
+ Writes an image with color components stored in up to three different
+ buffers (planar formats) into a single buffer (packed format).
+
+ * Chroma Resample
+ Converts between different YUV formats that differ in chroma sampling rate
+ (4:4:4, 4:2:2, 4:2:0).
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-blit-engine
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ "^blitblend@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-blitblend
+
+ "^clut@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-clut
+
+ "^fetchdecode@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetchdecode
+
+ "^fetcheco@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetcheco
+
+ "^fetchwarp@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetchwarp
+
+ "^filter@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-filter
+
+ "^hscaler@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-hscaler
+
+ "^matrix@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-matrix
+
+ "^rop@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-rop
+
+ "^store@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-store
+
+ "^vscaler@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-vscaler
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ blit-engine@56180820 {
+ compatible = "fsl,imx8qxp-dc-blit-engine";
+ reg = <0x56180820 0x13c>, <0x56181000 0x3400>;
+ reg-names = "pec", "cfg";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ fetchdecode@56180820 {
+ compatible = "fsl,imx8qxp-dc-fetchdecode";
+ reg = <0x56180820 0x10>, <0x56181000 0x404>;
+ reg-names = "pec", "cfg";
+ };
+
+ store@56180940 {
+ compatible = "fsl,imx8qxp-dc-store";
+ reg = <0x56180940 0x1c>, <0x56184000 0x5c>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <0>, <1>, <2>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blitblend.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blitblend.yaml
new file mode 100644
index 000000000000..095e65939fba
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blitblend.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-blitblend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Blit Blend Unit
+
+description:
+ Combines two input frames to a single output frame, all frames having the
+ same dimension.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-blitblend
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ blitblend@56180920 {
+ compatible = "fsl,imx8qxp-dc-blitblend";
+ reg = <0x56180920 0x10>, <0x56183c00 0x3c>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-clut.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-clut.yaml
new file mode 100644
index 000000000000..21d42aa11b52
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-clut.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-clut.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Color Lookup Table
+
+description: |
+ The unit implements 3 look-up tables with 256 x 10 bit entries each. These
+ can be used for different kinds of applications. From 10-bit input values
+ only upper 8 bits are used.
+
+ The unit supports color lookup, index lookup, dithering and alpha masking.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-clut
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ clut@56180880 {
+ compatible = "fsl,imx8qxp-dc-clut";
+ reg = <0x56180880 0x10>, <0x56182400 0x404>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml
new file mode 100644
index 000000000000..27118f4c0d28
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Command Sequencer
+
+description: |
+ The Command Sequencer is designed to autonomously process command lists.
+ By that it can load setups into the DC configuration and synchronize to
+ hardware events. This releases a system's CPU from workload, because it
+ does not need to wait for certain events. Also it simplifies SW architecture,
+ because no interrupt handlers are required. Setups are read via AXI bus,
+ while write access to configuration registers occurs directly via an internal
+ bus. This saves bandwidth for the AXI interconnect and improves the system
+ architecture in terms of safety aspects.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-command-sequencer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 5
+
+ interrupt-names:
+ items:
+ - const: error
+ - const: sw0
+ - const: sw1
+ - const: sw2
+ - const: sw3
+
+ sram:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle pointing to the mmio-sram device node
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+
+ command-sequencer@56180400 {
+ compatible = "fsl,imx8qxp-dc-command-sequencer";
+ reg = <0x56180400 0x1a4>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_5>;
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <36>, <37>, <38>, <39>, <40>;
+ interrupt-names = "error", "sw0", "sw1", "sw2", "sw3";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-constframe.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-constframe.yaml
new file mode 100644
index 000000000000..94f678563608
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-constframe.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-constframe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Constant Frame
+
+description: |
+ The Constant Frame unit is used instead of a Fetch unit where generation of
+ constant color frames only is sufficient. This is the case for the background
+ planes of content and safety streams in a Display Controller.
+
+ The color can be setup to any RGBA value.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-constframe
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ constframe@56180960 {
+ compatible = "fsl,imx8qxp-dc-constframe";
+ reg = <0x56180960 0xc>, <0x56184400 0x20>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-display-engine.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-display-engine.yaml
new file mode 100644
index 000000000000..91f3bb77d8d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-display-engine.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-display-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Display Engine
+
+description:
+ All Processing Units that operate in a display clock domain. Pixel pipeline
+ is driven by a video timing and cannot be stalled. Implements all display
+ specific processing.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-display-engine
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: top
+ - const: cfg
+
+ resets:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 3
+
+ interrupt-names:
+ items:
+ - const: shdload
+ - const: framecomplete
+ - const: seqcomplete
+
+ power-domains:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ "^dither@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-dither
+
+ "^framegen@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-framegen
+
+ "^gammacor@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-gammacor
+
+ "^matrix@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-matrix
+
+ "^signature@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-signature
+
+ "^tcon@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-tcon
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+ - power-domains
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+
+ display-engine@5618b400 {
+ compatible = "fsl,imx8qxp-dc-display-engine";
+ reg = <0x5618b400 0x14>, <0x5618b800 0x1c00>;
+ reg-names = "top", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <15>, <16>, <17>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ power-domains = <&pd IMX_SC_R_DC_0_PLL_0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ framegen@5618b800 {
+ compatible = "fsl,imx8qxp-dc-framegen";
+ reg = <0x5618b800 0x98>;
+ clocks = <&dc0_disp_lpcg IMX_LPCG_CLK_0>;
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <18>, <19>, <20>, <21>, <41>, <42>, <43>, <44>;
+ interrupt-names = "int0", "int1", "int2", "int3",
+ "primsync_on", "primsync_off",
+ "secsync_on", "secsync_off";
+ };
+
+ tcon@5618c800 {
+ compatible = "fsl,imx8qxp-dc-tcon";
+ reg = <0x5618c800 0x588>;
+
+ port {
+ dc0_disp0_dc0_pixel_combiner_ch0: endpoint {
+ remote-endpoint = <&dc0_pixel_combiner_ch0_dc0_disp0>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-dither.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-dither.yaml
new file mode 100644
index 000000000000..8e4468d91836
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-dither.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-dither.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Dither Unit
+
+description: |
+ The unit can increase the physical color resolution of a display from 5, 6, 7
+ or 8 bits per RGB channel to a virtual resolution of 10 bits. The physical
+ resolution can be set individually for each channel.
+
+ The resolution is increased by mixing the two physical colors that are nearest
+ to the virtual color code in a variable ratio either by time (temporal
+ dithering) or by position (spatial dithering).
+
+ An optimized algorithm for temporal dithering minimizes noise artifacts on the
+ output image.
+
+ The dither operation can be individually enabled or disabled for each pixel
+ using the alpha input bit.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-dither
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ dither@5618c400 {
+ compatible = "fsl,imx8qxp-dc-dither";
+ reg = <0x5618c400 0x14>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-extdst.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-extdst.yaml
new file mode 100644
index 000000000000..dfc2d4f94f8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-extdst.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-extdst.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller External Destination Interface
+
+description: |
+ The External Destination unit is the interface between the internal pixel
+ processing pipeline of the Pixel Engine, which is 30-bit RGB plus 8-bit Alpha,
+ and a Display Engine.
+
+ It comprises the following built-in Gamma apply function.
+
+ +------X-----------------------+
+ | | ExtDst Unit |
+ | V |
+ | +-------+ |
+ | | Gamma | |
+ | +-------+ |
+ | | |
+ | V +
+ +------X-----------------------+
+
+ The output format is 24-bit RGB plus 1-bit Alpha. Conversion from 10 to 8
+ bits is done by LSBit truncation. Alpha output bit is 1 for input 255, 0
+ otherwise.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-extdst
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+ interrupts:
+ maxItems: 3
+
+ interrupt-names:
+ items:
+ - const: shdload
+ - const: framecomplete
+ - const: seqcomplete
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ extdst@56180980 {
+ compatible = "fsl,imx8qxp-dc-extdst";
+ reg = <0x56180980 0x1c>, <0x56184800 0x28>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <3>, <4>, <5>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-fetchunit.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-fetchunit.yaml
new file mode 100644
index 000000000000..97fb6a4598d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-fetchunit.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-fetchunit.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Fetch Unit
+
+description: |
+ The Fetch Unit is the interface between the AXI bus for source buffer access
+ and the internal pixel processing pipeline, which is 30-bit RGB plus 8-bit
+ Alpha.
+
+ It is used to generate foreground planes in Display Controllers and source
+ planes in Blit Engines, and comprises the following built-in functions to
+ convert a wide range of frame buffer types.
+
+ +---------X-----------------------------------------+
+ | | Fetch Unit |
+ | V |
+ | +---------+ |
+ | | | |
+ | | Decode | Decompression [Decode] |
+ | | | |
+ | +---------+ |
+ | | |
+ | V |
+ | +---------+ |
+ | | Clip & | Clip Window [All] |
+ | | Overlay | Plane composition [Layer, Warp] |
+ | | | |
+ | +---------+ |
+ | | |
+ | V |
+ | +---------+ |
+ | | Re- | Flip/Rotate/Repl./Drop [All] |
+ X--> | sample | Perspective/Affine warping [Persp] |
+ | | | | Arbitrary warping [Warp, Persp] |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | |
+ | | | Palette | Color Palette [Layer, Decode] |
+ | | | | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | Extract | Raw to RGBA/YUV [All] |
+ | | | & | Bit width expansion [All] |
+ | | | Expand | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | Planar to packed |
+ | |->| Combine | [Decode, Warp, Persp] |
+ | | | | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | YUV422 to YUV444 |
+ | | | Chroma | [Decode, Persp] |
+ | | | | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | YUV to RGB |
+ | | | Color | [Warp, Persp, Decode, Layer] |
+ | | | | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | Gamma removal |
+ | | | Gamma | [Warp, Persp, Decode, Layer] |
+ | | | | |
+ | | +---------+ |
+ | | | |
+ | | V |
+ | | +---------+ |
+ | | | | Alpla multiply, RGB pre-multiply |
+ | ->| Multiply| [Warp, Persp, Decode, Layer] |
+ | | | |
+ | --------- |
+ | | |
+ | V |
+ | +---------+ |
+ | | | Bilinear filter |
+ | | Filter | [Warp, Persp] |
+ | | | |
+ | +---------+ |
+ | | |
+ | V |
+ +---------X-----------------------------------------+
+
+ Note that different derivatives of the Fetch Unit exist. Each implements a
+ specific subset only of the pipeline stages shown above. Restrictions for the
+ units are specified in [square brackets].
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8qxp-dc-fetchdecode
+ - fsl,imx8qxp-dc-fetcheco
+ - fsl,imx8qxp-dc-fetchlayer
+ - fsl,imx8qxp-dc-fetchwarp
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+ fsl,prg:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Optional Prefetch Resolve Gasket associated with the Fetch Unit.
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ fetchlayer@56180ac0 {
+ compatible = "fsl,imx8qxp-dc-fetchlayer";
+ reg = <0x56180ac0 0xc>, <0x56188400 0x404>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-filter.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-filter.yaml
new file mode 100644
index 000000000000..5c54d5179ee3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-filter.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-filter.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Filter Unit
+
+description: |
+ 5x5 FIR filter with 25 programmable coefficients.
+
+ Typical applications are image blurring, sharpening or support for edge
+ detection algorithms.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-filter
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ filter@56180900 {
+ compatible = "fsl,imx8qxp-dc-filter";
+ reg = <0x56180900 0x10>, <0x56183800 0x30>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-framegen.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-framegen.yaml
new file mode 100644
index 000000000000..9d1dc3a9de90
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-framegen.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-framegen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Frame Generator
+
+description:
+ The Frame Generator (FrameGen) module generates a programmable video timing
+ and optionally allows to synchronize the generated video timing to external
+ synchronization signals.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-framegen
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 8
+
+ interrupt-names:
+ items:
+ - const: int0
+ - const: int1
+ - const: int2
+ - const: int3
+ - const: primsync_on
+ - const: primsync_off
+ - const: secsync_on
+ - const: secsync_off
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+
+ framegen@5618b800 {
+ compatible = "fsl,imx8qxp-dc-framegen";
+ reg = <0x5618b800 0x98>;
+ clocks = <&dc0_disp_lpcg IMX_LPCG_CLK_0>;
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <18>, <19>, <20>, <21>, <41>, <42>, <43>, <44>;
+ interrupt-names = "int0", "int1", "int2", "int3",
+ "primsync_on", "primsync_off",
+ "secsync_on", "secsync_off";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-gammacor.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-gammacor.yaml
new file mode 100644
index 000000000000..25ad85742912
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-gammacor.yaml
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-gammacor.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Gamma Correction Unit
+
+description: The unit supports non-linear color transformation.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-gammacor
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ gammacor@5618c000 {
+ compatible = "fsl,imx8qxp-dc-gammacor";
+ reg = <0x5618c000 0x20>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-layerblend.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-layerblend.yaml
new file mode 100644
index 000000000000..2a6ab8a0ed7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-layerblend.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-layerblend.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Layer Blend Unit
+
+description: Combines two input frames to a single output frame.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-layerblend
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ layerblend@56180ba0 {
+ compatible = "fsl,imx8qxp-dc-layerblend";
+ reg = <0x56180ba0 0x10>, <0x5618a400 0x20>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-matrix.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-matrix.yaml
new file mode 100644
index 000000000000..d773389dd0dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-matrix.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-matrix.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Color Matrix
+
+description:
+ The unit supports linear color transformation, alpha pre-multiply and
+ alpha masking.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-matrix
+
+ reg:
+ minItems: 1
+ maxItems: 2
+
+ reg-names:
+ oneOf:
+ - const: cfg # matrix in display engine
+ - items: # matrix in pixel engine
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ matrix@5618bc00 {
+ compatible = "fsl,imx8qxp-dc-matrix";
+ reg = <0x5618bc00 0x3c>;
+ reg-names = "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml
new file mode 100644
index 000000000000..633443a6cc38
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Pixel Engine
+
+description:
+ All Processing Units that operate in the AXI bus clock domain. Pixel
+ pipelines have the ability to stall when a destination is busy. Implements
+ all communication to memory resources and most of the image processing
+ functions. Interconnection of Processing Units is re-configurable.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-pixel-engine
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ "^blit-engine@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-blit-engine
+
+ "^constframe@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-constframe
+
+ "^extdst@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-extdst
+
+ "^fetchdecode@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetchdecode
+
+ "^fetcheco@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetcheco
+
+ "^fetchlayer@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetchlayer
+
+ "^fetchwarp@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-fetchwarp
+
+ "^hscaler@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-hscaler
+
+ "^layerblend@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-layerblend
+
+ "^matrix@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-matrix
+
+ "^safety@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-safety
+
+ "^vscaler@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-vscaler
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+
+ pixel-engine@56180800 {
+ compatible = "fsl,imx8qxp-dc-pixel-engine";
+ reg = <0x56180800 0xac00>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_5>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ constframe@56180960 {
+ compatible = "fsl,imx8qxp-dc-constframe";
+ reg = <0x56180960 0xc>, <0x56184400 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ extdst@56180980 {
+ compatible = "fsl,imx8qxp-dc-extdst";
+ reg = <0x56180980 0x1c>, <0x56184800 0x28>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <3>, <4>, <5>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
+
+ constframe@561809a0 {
+ compatible = "fsl,imx8qxp-dc-constframe";
+ reg = <0x561809a0 0xc>, <0x56184c00 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ extdst@561809c0 {
+ compatible = "fsl,imx8qxp-dc-extdst";
+ reg = <0x561809c0 0x1c>, <0x56185000 0x28>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <6>, <7>, <8>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
+
+ constframe@561809e0 {
+ compatible = "fsl,imx8qxp-dc-constframe";
+ reg = <0x561809e0 0xc>, <0x56185400 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ extdst@56180a00 {
+ compatible = "fsl,imx8qxp-dc-extdst";
+ reg = <0x56180a00 0x1c>, <0x56185800 0x28>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <9>, <10>, <11>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
+
+ constframe@56180a20 {
+ compatible = "fsl,imx8qxp-dc-constframe";
+ reg = <0x56180a20 0xc>, <0x56185c00 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ extdst@56180a40 {
+ compatible = "fsl,imx8qxp-dc-extdst";
+ reg = <0x56180a40 0x1c>, <0x56186000 0x28>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <12>, <13>, <14>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
+
+ fetchwarp@56180a60 {
+ compatible = "fsl,imx8qxp-dc-fetchwarp";
+ reg = <0x56180a60 0x10>, <0x56186400 0x190>;
+ reg-names = "pec", "cfg";
+ };
+
+ fetchlayer@56180ac0 {
+ compatible = "fsl,imx8qxp-dc-fetchlayer";
+ reg = <0x56180ac0 0xc>, <0x56188400 0x404>;
+ reg-names = "pec", "cfg";
+ };
+
+ layerblend@56180ba0 {
+ compatible = "fsl,imx8qxp-dc-layerblend";
+ reg = <0x56180ba0 0x10>, <0x5618a400 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ layerblend@56180bc0 {
+ compatible = "fsl,imx8qxp-dc-layerblend";
+ reg = <0x56180bc0 0x10>, <0x5618a800 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ layerblend@56180be0 {
+ compatible = "fsl,imx8qxp-dc-layerblend";
+ reg = <0x56180be0 0x10>, <0x5618ac00 0x20>;
+ reg-names = "pec", "cfg";
+ };
+
+ layerblend@56180c00 {
+ compatible = "fsl,imx8qxp-dc-layerblend";
+ reg = <0x56180c00 0x10>, <0x5618b000 0x20>;
+ reg-names = "pec", "cfg";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-rop.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-rop.yaml
new file mode 100644
index 000000000000..7115950ecae0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-rop.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-rop.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Raster Operation Unit
+
+description: |
+ The unit can combine up to three input frames to a single output frame, all
+ having the same dimension.
+
+ The unit supports logic operations, arithmetic operations and packing.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-rop
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ rop@56180860 {
+ compatible = "fsl,imx8qxp-dc-rop";
+ reg = <0x56180860 0x10>, <0x56182000 0x20>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-safety.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-safety.yaml
new file mode 100644
index 000000000000..66c12948ab09
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-safety.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-safety.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Safety Unit
+
+description:
+ The unit allows corresponding processing units to be configured in a path
+ leading to multiple endpoints.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-safety
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ safety@56180800 {
+ compatible = "fsl,imx8qxp-dc-safety";
+ reg = <0x56180800 0x1c>;
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml
new file mode 100644
index 000000000000..76cbe11a6364
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Scaling Engine
+
+description: |
+ The unit can change the dimension of the input frame by nearest or linear
+ re-sampling with 1/32 sub pixel precision.
+
+ Internally it consist of two independent blocks for horizontal and vertical
+ scaling. The sequence of both operations is arbitrary.
+
+ Any frame dimensions between 1 and 16384 pixels in width and height are
+ supported, except that the vertical scaler has a frame width maximum
+ depending of the system's functional limitations.
+
+ In general all scale factors are supported inside the supported frame
+ dimensions. In range of scale factors 1/16..16 the filtered output colors
+ are LSBit precise (e.g. DC ripple free).
+
+ +-----------+
+ | Line |
+ | Buffer |
+ +-----------+
+ ^
+ |
+ V
+ |\ +-----------+
+ ------+ | | |
+ | | +-->| Vertical |----
+ | ----+ | | Scaler | |
+ | | |/ +-----------+ |
+ | | |
+ | | |
+ | | | |\
+ | ------------- -------------+-----+ |
+ Input --+ X | +--> Output
+ | ------------- -------------+-----+ |
+ | | | |/
+ | | |
+ | | |\ +-----------+ |
+ | ----+ | | | |
+ | | +-->| Horizontal|----
+ ------+ | | Scaler |
+ |/ +-----------+
+
+ The unit supports downscaling, upscaling, sub pixel translation and bob
+ de-interlacing.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8qxp-dc-hscaler
+ - fsl,imx8qxp-dc-vscaler
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ hscaler@561808c0 {
+ compatible = "fsl,imx8qxp-dc-hscaler";
+ reg = <0x561808c0 0x10>, <0x56183000 0x18>;
+ reg-names = "pec", "cfg";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-signature.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-signature.yaml
new file mode 100644
index 000000000000..c495822fdc80
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-signature.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-signature.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Signature Unit
+
+description: |
+ In order to control the correctness of display output, signature values can
+ be computed for each frame and compared against reference values. In case of
+ a mismatch (signature violation) a HW event can be triggered, for example a
+ SW interrupt.
+
+ This unit supports signature computation, reference check, evaluation windows,
+ alpha masking and panic modes.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-signature
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 3
+
+ interrupt-names:
+ items:
+ - const: shdload
+ - const: valid
+ - const: error
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ signature@5618d000 {
+ compatible = "fsl,imx8qxp-dc-signature";
+ reg = <0x5618d000 0x140>;
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <22>, <23>, <24>;
+ interrupt-names = "shdload", "valid", "error";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-store.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-store.yaml
new file mode 100644
index 000000000000..42d1b10906be
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-store.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-store.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Store Unit
+
+description: |
+ The Store unit is the interface between the internal pixel processing
+ pipeline, which is 30-bit RGB plus 8-bit Alpha, and the AXI bus for
+ destination buffer access. It is used for the destination of Blit Engines.
+ It comprises a set of built-in functions to generate a wide range of buffer
+ formats. Note, that these are exactly inverse to corresponding functions in
+ the Fetch Unit.
+
+ +------X-------------------------+
+ | | Store Unit |
+ | V |
+ | +-------+ |
+ | | Gamma | Gamma apply |
+ | +-------+ |
+ | | |
+ | V |
+ | +-------+ |
+ | | Color | RGB to YUV |
+ | +-------+ |
+ | | |
+ | V |
+ | +-------+ |
+ | | Chroma| YUV444 to 422 |
+ | +-------+ |
+ | | |
+ | V |
+ | +-------+ |
+ | | Reduce| Bit width reduction |
+ | | | dithering |
+ | +-------+ |
+ | | |
+ | V |
+ | +-------+ |
+ | | Pack | RGBA/YUV to RAW |
+ | | Encode| or Compression |
+ | +-------+ |
+ | | |
+ | V |
+ +------X-------------------------+
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-store
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: pec
+ - const: cfg
+
+ interrupts:
+ maxItems: 3
+
+ interrupt-names:
+ items:
+ - const: shdload
+ - const: framecomplete
+ - const: seqcomplete
+
+ fsl,lts:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Optional Linear Tile Store associated with the Store Unit.
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+
+additionalProperties: false
+
+examples:
+ - |
+ store@56180940 {
+ compatible = "fsl,imx8qxp-dc-store";
+ reg = <0x56180940 0x1c>, <0x56184000 0x5c>;
+ reg-names = "pec", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <0>, <1>, <2>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-tcon.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-tcon.yaml
new file mode 100644
index 000000000000..7a3b77ea92c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-tcon.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc-tcon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller Timing Controller
+
+description:
+ The TCon can generate a wide range of customized synchronization signals and
+ does the mapping of the color bits to the output.
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc-tcon
+
+ reg:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: video output
+
+required:
+ - compatible
+ - reg
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ tcon@5618c800 {
+ compatible = "fsl,imx8qxp-dc-tcon";
+ reg = <0x5618c800 0x588>;
+
+ port {
+ dc0_disp0_dc0_pixel_combiner_ch0: endpoint {
+ remote-endpoint = <&dc0_pixel_combiner_ch0_dc0_disp0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc.yaml
new file mode 100644
index 000000000000..0a72f9f0b5fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc.yaml
@@ -0,0 +1,236 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/fsl,imx8qxp-dc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Display Controller
+
+description: |
+ The Freescale i.MX8qxp Display Controller(DC) is comprised of three main
+ components that include a blit engine for 2D graphics accelerations, display
+ controller for display output processing, as well as a command sequencer.
+
+ Display buffers Source buffers
+ (AXI read master) (AXI read master)
+ | .......... | | | |
+ +---------------------------+------------+------------------+-+-+------+
+ | Display Controller (DC) | .......... | | | | |
+ | | | | | | |
+ | @@@@@@@@@@@ +----------+------------+------------+ | | | |
+ A | | Command | | V V | | | | |
+ X <-+->| Sequencer | | @@@@@@@@@@@@@@@@@@@@@@@@@@@@ | V V V |
+ I | | (AXI CLK) | | | | | @@@@@@@@@@ |
+ | @@@@@@@@@@@ | | Pixel Engine | | | | |
+ | | | | (AXI CLK) | | | | |
+ | V | @@@@@@@@@@@@@@@@@@@@@@@@@@@@ | | | |
+ A | *********** | | | | | | | Blit | |
+ H <-+->| Configure | | V V V V | | Engine | |
+ B | | (CFG CLK) | | 00000000000 11111111111 | | (AXI CLK)| |
+ | *********** | | Display | | Display | | | | |
+ | | | Engine | | Engine | | | | |
+ | | | (Disp CLK)| | (Disp CLK)| | | | |
+ | @@@@@@@@@@@ | 00000000000 11111111111 | @@@@@@@@@@ |
+ I | | Common | | | | | | |
+ R <-+--| Control | | | Display | | | |
+ Q | | (AXI CLK) | | | Controller | | | |
+ | @@@@@@@@@@@ +------------------------------------+ | |
+ | | | ^ | |
+ +--------------------------+----------------+-------+---------+--------+
+ ^ | | | |
+ | V V | V
+ Clocks & Resets Display Display Panic Destination
+ Output0 Output1 Control buffer
+ (AXI write master)
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+properties:
+ compatible:
+ const: fsl,imx8qxp-dc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 2
+
+ reset-names:
+ items:
+ - const: axi
+ - const: cfg
+
+ power-domains:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ ranges: true
+
+patternProperties:
+ "^command-sequencer@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-command-sequencer
+
+ "^display-engine@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-display-engine
+
+ "^interrupt-controller@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-intc
+
+ "^pixel-engine@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-pixel-engine
+
+ "^pmu@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ const: fsl,imx8qxp-dc-axi-performance-counter
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - power-domains
+ - "#address-cells"
+ - "#size-cells"
+ - ranges
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8-lpcg.h>
+ #include <dt-bindings/firmware/imx/rsrc.h>
+
+ display-controller@56180000 {
+ compatible = "fsl,imx8qxp-dc";
+ reg = <0x56180000 0x40000>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_4>;
+ power-domains = <&pd IMX_SC_R_DC_0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ interrupt-controller@56180040 {
+ compatible = "fsl,imx8qxp-dc-intc";
+ reg = <0x56180040 0x60>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_5>;
+ interrupt-controller;
+ interrupt-parent = <&dc0_irqsteer>;
+ #interrupt-cells = <1>;
+ interrupts = <448>, <449>, <450>, <64>,
+ <65>, <66>, <67>, <68>,
+ <69>, <70>, <193>, <194>,
+ <195>, <196>, <197>, <72>,
+ <73>, <74>, <75>, <76>,
+ <77>, <78>, <79>, <80>,
+ <81>, <199>, <200>, <201>,
+ <202>, <203>, <204>, <205>,
+ <206>, <207>, <208>, <5>,
+ <0>, <1>, <2>, <3>,
+ <4>, <82>, <83>, <84>,
+ <85>, <209>, <210>, <211>,
+ <212>;
+ interrupt-names = "store9_shdload",
+ "store9_framecomplete",
+ "store9_seqcomplete",
+ "extdst0_shdload",
+ "extdst0_framecomplete",
+ "extdst0_seqcomplete",
+ "extdst4_shdload",
+ "extdst4_framecomplete",
+ "extdst4_seqcomplete",
+ "extdst1_shdload",
+ "extdst1_framecomplete",
+ "extdst1_seqcomplete",
+ "extdst5_shdload",
+ "extdst5_framecomplete",
+ "extdst5_seqcomplete",
+ "disengcfg_shdload0",
+ "disengcfg_framecomplete0",
+ "disengcfg_seqcomplete0",
+ "framegen0_int0",
+ "framegen0_int1",
+ "framegen0_int2",
+ "framegen0_int3",
+ "sig0_shdload",
+ "sig0_valid",
+ "sig0_error",
+ "disengcfg_shdload1",
+ "disengcfg_framecomplete1",
+ "disengcfg_seqcomplete1",
+ "framegen1_int0",
+ "framegen1_int1",
+ "framegen1_int2",
+ "framegen1_int3",
+ "sig1_shdload",
+ "sig1_valid",
+ "sig1_error",
+ "reserved",
+ "cmdseq_error",
+ "comctrl_sw0",
+ "comctrl_sw1",
+ "comctrl_sw2",
+ "comctrl_sw3",
+ "framegen0_primsync_on",
+ "framegen0_primsync_off",
+ "framegen0_secsync_on",
+ "framegen0_secsync_off",
+ "framegen1_primsync_on",
+ "framegen1_primsync_off",
+ "framegen1_secsync_on",
+ "framegen1_secsync_off";
+ };
+
+ pixel-engine@56180800 {
+ compatible = "fsl,imx8qxp-dc-pixel-engine";
+ reg = <0x56180800 0xac00>;
+ clocks = <&dc0_lpcg IMX_LPCG_CLK_5>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ };
+
+ display-engine@5618b400 {
+ compatible = "fsl,imx8qxp-dc-display-engine";
+ reg = <0x5618b400 0x14>, <0x5618b800 0x1c00>;
+ reg-names = "top", "cfg";
+ interrupt-parent = <&dc0_intc>;
+ interrupts = <15>, <16>, <17>;
+ interrupt-names = "shdload", "framecomplete", "seqcomplete";
+ power-domains = <&pd IMX_SC_R_DC_0_PLL_0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/imx/fsl-imx-drm.txt b/Documentation/devicetree/bindings/display/imx/fsl-imx-drm.txt
deleted file mode 100644
index 5bf77f6dd19d..000000000000
--- a/Documentation/devicetree/bindings/display/imx/fsl-imx-drm.txt
+++ /dev/null
@@ -1,162 +0,0 @@
-Freescale i.MX DRM master device
-================================
-
-The freescale i.MX DRM master device is a virtual device needed to list all
-IPU or other display interface nodes that comprise the graphics subsystem.
-
-Required properties:
-- compatible: Should be "fsl,imx-display-subsystem"
-- ports: Should contain a list of phandles pointing to display interface ports
- of IPU devices
-
-example:
-
-display-subsystem {
- compatible = "fsl,display-subsystem";
- ports = <&ipu_di0>;
-};
-
-
-Freescale i.MX IPUv3
-====================
-
-Required properties:
-- compatible: Should be "fsl,<chip>-ipu" where <chip> is one of
- - imx51
- - imx53
- - imx6q
- - imx6qp
-- reg: should be register base and length as documented in the
- datasheet
-- interrupts: Should contain sync interrupt and error interrupt,
- in this order.
-- resets: phandle pointing to the system reset controller and
- reset line index, see reset/fsl,imx-src.txt for details
-Additional required properties for fsl,imx6qp-ipu:
-- fsl,prg: phandle to prg node associated with this IPU instance
-Optional properties:
-- port@[0-3]: Port nodes with endpoint definitions as defined in
- Documentation/devicetree/bindings/media/video-interfaces.txt.
- Ports 0 and 1 should correspond to CSI0 and CSI1,
- ports 2 and 3 should correspond to DI0 and DI1, respectively.
-
-example:
-
-ipu: ipu@18000000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,imx53-ipu";
- reg = <0x18000000 0x080000000>;
- interrupts = <11 10>;
- resets = <&src 2>;
-
- ipu_di0: port@2 {
- reg = <2>;
-
- ipu_di0_disp0: endpoint {
- remote-endpoint = <&display_in>;
- };
- };
-};
-
-Freescale i.MX PRE (Prefetch Resolve Engine)
-============================================
-
-Required properties:
-- compatible: should be "fsl,imx6qp-pre"
-- reg: should be register base and length as documented in the
- datasheet
-- clocks : phandle to the PRE axi clock input, as described
- in Documentation/devicetree/bindings/clock/clock-bindings.txt and
- Documentation/devicetree/bindings/clock/imx6q-clock.txt.
-- clock-names: should be "axi"
-- interrupts: should contain the PRE interrupt
-- fsl,iram: phandle pointing to the mmio-sram device node, that should be
- used for the PRE SRAM double buffer.
-
-example:
-
-pre@21c8000 {
- compatible = "fsl,imx6qp-pre";
- reg = <0x021c8000 0x1000>;
- interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
- clocks = <&clks IMX6QDL_CLK_PRE0>;
- clock-names = "axi";
- fsl,iram = <&ocram2>;
-};
-
-Freescale i.MX PRG (Prefetch Resolve Gasket)
-============================================
-
-Required properties:
-- compatible: should be "fsl,imx6qp-prg"
-- reg: should be register base and length as documented in the
- datasheet
-- clocks : phandles to the PRG ipg and axi clock inputs, as described
- in Documentation/devicetree/bindings/clock/clock-bindings.txt and
- Documentation/devicetree/bindings/clock/imx6q-clock.txt.
-- clock-names: should be "ipg" and "axi"
-- fsl,pres: phandles to the PRE units attached to this PRG, with the fixed
- PRE as the first entry and the muxable PREs following.
-
-example:
-
-prg@21cc000 {
- compatible = "fsl,imx6qp-prg";
- reg = <0x021cc000 0x1000>;
- clocks = <&clks IMX6QDL_CLK_PRG0_APB>,
- <&clks IMX6QDL_CLK_PRG0_AXI>;
- clock-names = "ipg", "axi";
- fsl,pres = <&pre1>, <&pre2>, <&pre3>;
-};
-
-Parallel display support
-========================
-
-Required properties:
-- compatible: Should be "fsl,imx-parallel-display"
-Optional properties:
-- interface-pix-fmt: How this display is connected to the
- display interface. Currently supported types: "rgb24", "rgb565", "bgr666"
- and "lvds666".
-- edid: verbatim EDID data block describing attached display.
-- ddc: phandle describing the i2c bus handling the display data
- channel
-- port@[0-1]: Port nodes with endpoint definitions as defined in
- Documentation/devicetree/bindings/media/video-interfaces.txt.
- Port 0 is the input port connected to the IPU display interface,
- port 1 is the output port connected to a panel.
-
-example:
-
-disp0 {
- compatible = "fsl,imx-parallel-display";
- edid = [edid-data];
- interface-pix-fmt = "rgb24";
-
- port@0 {
- reg = <0>;
-
- display_in: endpoint {
- remote-endpoint = <&ipu_di0_disp0>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- display_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
-};
-
-panel {
- ...
-
- port {
- panel_in: endpoint {
- remote-endpoint = <&display_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/imx/hdmi.txt b/Documentation/devicetree/bindings/display/imx/hdmi.txt
deleted file mode 100644
index 6d021e71c9cf..000000000000
--- a/Documentation/devicetree/bindings/display/imx/hdmi.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Freescale i.MX6 DWC HDMI TX Encoder
-===================================
-
-The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
-with a companion PHY IP.
-
-These DT bindings follow the Synopsys DWC HDMI TX bindings defined in
-Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt with the
-following device-specific properties.
-
-
-Required properties:
-
-- compatible : Shall be one of "fsl,imx6q-hdmi" or "fsl,imx6dl-hdmi".
-- reg: See dw_hdmi.txt.
-- interrupts: HDMI interrupt number
-- clocks: See dw_hdmi.txt.
-- clock-names: Shall contain "iahb" and "isfr" as defined in dw_hdmi.txt.
-- ports: See dw_hdmi.txt. The DWC HDMI shall have between one and four ports,
- numbered 0 to 3, corresponding to the four inputs of the HDMI multiplexer.
- Each port shall have a single endpoint.
-- gpr : Shall contain a phandle to the iomuxc-gpr region containing the HDMI
- multiplexer control register.
-
-Optional properties
-
-- ddc-i2c-bus: The HDMI DDC bus can be connected to either a system I2C master
- or the functionally-reduced I2C master contained in the DWC HDMI. When
- connected to a system I2C master this property contains a phandle to that
- I2C master controller.
-
-
-Example:
-
- gpr: iomuxc-gpr@20e0000 {
- /* ... */
- };
-
- hdmi: hdmi@120000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,imx6q-hdmi";
- reg = <0x00120000 0x9000>;
- interrupts = <0 115 0x04>;
- gpr = <&gpr>;
- clocks = <&clks 123>, <&clks 124>;
- clock-names = "iahb", "isfr";
- ddc-i2c-bus = <&i2c2>;
-
- port@0 {
- reg = <0>;
-
- hdmi_mux_0: endpoint {
- remote-endpoint = <&ipu1_di0_hdmi>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- hdmi_mux_1: endpoint {
- remote-endpoint = <&ipu1_di1_hdmi>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/imx/ldb.txt b/Documentation/devicetree/bindings/display/imx/ldb.txt
deleted file mode 100644
index 38c637fa39dd..000000000000
--- a/Documentation/devicetree/bindings/display/imx/ldb.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-Device-Tree bindings for LVDS Display Bridge (ldb)
-
-LVDS Display Bridge
-===================
-
-The LVDS Display Bridge device tree node contains up to two lvds-channel
-nodes describing each of the two LVDS encoder channels of the bridge.
-
-Required properties:
- - #address-cells : should be <1>
- - #size-cells : should be <0>
- - compatible : should be "fsl,imx53-ldb" or "fsl,imx6q-ldb".
- Both LDB versions are similar, but i.MX6 has an additional
- multiplexer in the front to select any of the four IPU display
- interfaces as input for each LVDS channel.
- - gpr : should be <&gpr> on i.MX53 and i.MX6q.
- The phandle points to the iomuxc-gpr region containing the LVDS
- control register.
-- clocks, clock-names : phandles to the LDB divider and selector clocks and to
- the display interface selector clocks, as described in
- Documentation/devicetree/bindings/clock/clock-bindings.txt
- The following clocks are expected on i.MX53:
- "di0_pll" - LDB LVDS channel 0 mux
- "di1_pll" - LDB LVDS channel 1 mux
- "di0" - LDB LVDS channel 0 gate
- "di1" - LDB LVDS channel 1 gate
- "di0_sel" - IPU1 DI0 mux
- "di1_sel" - IPU1 DI1 mux
- On i.MX6q the following additional clocks are needed:
- "di2_sel" - IPU2 DI0 mux
- "di3_sel" - IPU2 DI1 mux
- The needed clock numbers for each are documented in
- Documentation/devicetree/bindings/clock/imx5-clock.txt, and in
- Documentation/devicetree/bindings/clock/imx6q-clock.txt.
-
-Optional properties:
- - pinctrl-names : should be "default" on i.MX53, not used on i.MX6q
- - pinctrl-0 : a phandle pointing to LVDS pin settings on i.MX53,
- not used on i.MX6q
- - fsl,dual-channel : boolean. if it exists, only LVDS channel 0 should
- be configured - one input will be distributed on both outputs in dual
- channel mode
-
-LVDS Channel
-============
-
-Each LVDS Channel has to contain either an of graph link to a panel device node
-or a display-timings node that describes the video timings for the connected
-LVDS display as well as the fsl,data-mapping and fsl,data-width properties.
-
-Required properties:
- - reg : should be <0> or <1>
- - port: Input and output port nodes with endpoint definitions as defined in
- Documentation/devicetree/bindings/graph.txt.
- On i.MX5, the internal two-input-multiplexer is used. Due to hardware
- limitations, only one input port (port@[0,1]) can be used for each channel
- (lvds-channel@[0,1], respectively).
- On i.MX6, there should be four input ports (port@[0-3]) that correspond
- to the four LVDS multiplexer inputs.
- A single output port (port@2 on i.MX5, port@4 on i.MX6) must be connected
- to a panel input port. Optionally, the output port can be left out if
- display-timings are used instead.
-
-Optional properties (required if display-timings are used):
- - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
- - display-timings : A node that describes the display timings as defined in
- Documentation/devicetree/bindings/display/panel/display-timing.txt.
- - fsl,data-mapping : should be "spwg" or "jeida"
- This describes how the color bits are laid out in the
- serialized LVDS signal.
- - fsl,data-width : should be <18> or <24>
-
-example:
-
-gpr: iomuxc-gpr@53fa8000 {
- /* ... */
-};
-
-ldb: ldb@53fa8008 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,imx53-ldb";
- gpr = <&gpr>;
- clocks = <&clks IMX5_CLK_LDB_DI0_SEL>,
- <&clks IMX5_CLK_LDB_DI1_SEL>,
- <&clks IMX5_CLK_IPU_DI0_SEL>,
- <&clks IMX5_CLK_IPU_DI1_SEL>,
- <&clks IMX5_CLK_LDB_DI0_GATE>,
- <&clks IMX5_CLK_LDB_DI1_GATE>;
- clock-names = "di0_pll", "di1_pll",
- "di0_sel", "di1_sel",
- "di0", "di1";
-
- /* Using an of-graph endpoint link to connect the panel */
- lvds-channel@0 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
-
- port@0 {
- reg = <0>;
-
- lvds0_in: endpoint {
- remote-endpoint = <&ipu_di0_lvds0>;
- };
- };
-
- port@2 {
- reg = <2>;
-
- lvds0_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
- };
-
- /* Using display-timings and fsl,data-mapping/width instead */
- lvds-channel@1 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <1>;
- fsl,data-mapping = "spwg";
- fsl,data-width = <24>;
-
- display-timings {
- /* ... */
- };
-
- port@1 {
- reg = <1>;
-
- lvds1_in: endpoint {
- remote-endpoint = <&ipu_di1_lvds1>;
- };
- };
- };
-};
-
-panel: lvds-panel {
- /* ... */
-
- port {
- panel_in: endpoint {
- remote-endpoint = <&lvds0_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml b/Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
new file mode 100644
index 000000000000..4ae6328cde64
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 NXP
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/imx/nxp,imx8mq-dcss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: iMX8MQ Display Controller Subsystem (DCSS)
+
+maintainers:
+ - Laurentiu Palcu <laurentiu.palcu@nxp.com>
+
+description:
+
+ The DCSS (display controller sub system) is used to source up to three
+ display buffers, compose them, and drive a display using HDMI 2.0a(with HDCP
+ 2.2) or MIPI-DSI. The DCSS is intended to support up to 4kp60 displays. HDR10
+ image processing capabilities are included to provide a solution capable of
+ driving next generation high dynamic range displays.
+
+properties:
+ compatible:
+ const: nxp,imx8mq-dcss
+
+ reg:
+ items:
+ - description: DCSS base address and size, up to IRQ steer start
+ - description: DCSS BLKCTL base address and size
+
+ interrupts:
+ items:
+ - description: Context loader completion and error interrupt
+ - description: DTG interrupt used to signal context loader trigger time
+ - description: DTG interrupt for Vblank
+
+ interrupt-names:
+ items:
+ - const: ctxld
+ - const: ctxld_kick
+ - const: vblank
+
+ clocks:
+ items:
+ - description: Display APB clock for all peripheral PIO access interfaces
+ - description: Display AXI clock needed by DPR, Scaler, RTRAM_CTRL
+ - description: RTRAM clock
+ - description: Pixel clock, can be driven either by HDMI phy clock or MIPI
+ - description: DTRC clock, needed by video decompressor
+
+ clock-names:
+ items:
+ - const: apb
+ - const: axi
+ - const: rtrm
+ - const: pix
+ - const: dtrc
+
+ assigned-clocks:
+ items:
+ - description: Phandle and clock specifier of IMX8MQ_CLK_DISP_AXI_ROOT
+ - description: Phandle and clock specifier of IMX8MQ_CLK_DISP_RTRM
+ - description: Phandle and clock specifier of either IMX8MQ_VIDEO2_PLL1_REF_SEL or
+ IMX8MQ_VIDEO_PLL1_REF_SEL
+
+ assigned-clock-parents:
+ items:
+ - description: Phandle and clock specifier of IMX8MQ_SYS1_PLL_800M
+ - description: Phandle and clock specifier of IMX8MQ_SYS1_PLL_800M
+ - description: Phandle and clock specifier of IMX8MQ_CLK_27M
+
+ assigned-clock-rates:
+ items:
+ - description: Must be 800 MHz
+ - description: Must be 400 MHz
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ A port node pointing to the input port of a HDMI/DP or MIPI display bridge.
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx8mq-clock.h>
+ dcss: display-controller@32e00000 {
+ compatible = "nxp,imx8mq-dcss";
+ reg = <0x32e00000 0x2d000>, <0x32e2f000 0x1000>;
+ interrupts = <6>, <8>, <9>;
+ interrupt-names = "ctxld", "ctxld_kick", "vblank";
+ interrupt-parent = <&irqsteer>;
+ clocks = <&clk IMX8MQ_CLK_DISP_APB_ROOT>, <&clk IMX8MQ_CLK_DISP_AXI_ROOT>,
+ <&clk IMX8MQ_CLK_DISP_RTRM_ROOT>, <&clk IMX8MQ_VIDEO2_PLL_OUT>,
+ <&clk IMX8MQ_CLK_DISP_DTRC>;
+ clock-names = "apb", "axi", "rtrm", "pix", "dtrc";
+ assigned-clocks = <&clk IMX8MQ_CLK_DISP_AXI>, <&clk IMX8MQ_CLK_DISP_RTRM>,
+ <&clk IMX8MQ_VIDEO2_PLL1_REF_SEL>;
+ assigned-clock-parents = <&clk IMX8MQ_SYS1_PLL_800M>, <&clk IMX8MQ_SYS1_PLL_800M>,
+ <&clk IMX8MQ_CLK_27M>;
+ assigned-clock-rates = <800000000>,
+ <400000000>;
+ port {
+ dcss_out: endpoint {
+ remote-endpoint = <&hdmi_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
new file mode 100644
index 000000000000..319bd7c88fe3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/ingenic,ipu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ingenic SoCs Image Processing Unit (IPU)
+
+maintainers:
+ - Paul Cercueil <paul@crapouillou.net>
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - ingenic,jz4725b-ipu
+ - ingenic,jz4760-ipu
+ - items:
+ - const: ingenic,jz4770-ipu
+ - const: ingenic,jz4760-ipu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ const: ipu
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
+ ipu@13080000 {
+ compatible = "ingenic,jz4770-ipu", "ingenic,jz4760-ipu";
+ reg = <0x13080000 0x800>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <29>;
+
+ clocks = <&cgu JZ4770_CLK_IPU>;
+ clock-names = "ipu";
+
+ port {
+ ipu_ep: endpoint {
+ remote-endpoint = <&lcdc_ep>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
new file mode 100644
index 000000000000..6d4c00f3fcc8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/ingenic,lcd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ingenic SoCs LCD controller
+
+maintainers:
+ - Paul Cercueil <paul@crapouillou.net>
+
+properties:
+ $nodename:
+ pattern: "^lcd-controller@[0-9a-f]+$"
+
+ compatible:
+ enum:
+ - ingenic,jz4740-lcd
+ - ingenic,jz4725b-lcd
+ - ingenic,jz4760-lcd
+ - ingenic,jz4760b-lcd
+ - ingenic,jz4770-lcd
+ - ingenic,jz4780-lcd
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Pixel clock
+ - description: Module clock
+ minItems: 1
+
+ clock-names:
+ items:
+ - const: lcd_pclk
+ - const: lcd
+ minItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DPI output, to interface with TFT panels.
+
+ port@8:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Link to the Image Processing Unit (IPU).
+ (See ingenic,ipu.yaml).
+
+ required:
+ - port@0
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ingenic,jz4740-lcd
+ - ingenic,jz4780-lcd
+then:
+ properties:
+ clocks:
+ minItems: 2
+ clock-names:
+ minItems: 2
+else:
+ properties:
+ clocks:
+ maxItems: 1
+ clock-names:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
+ lcd-controller@13050000 {
+ compatible = "ingenic,jz4740-lcd";
+ reg = <0x13050000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <30>;
+
+ clocks = <&cgu JZ4740_CLK_LCD_PCLK>, <&cgu JZ4740_CLK_LCD>;
+ clock-names = "lcd_pclk", "lcd";
+
+ port {
+ endpoint {
+ remote-endpoint = <&panel_input>;
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
+ lcd-controller@13050000 {
+ compatible = "ingenic,jz4725b-lcd";
+ reg = <0x13050000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <31>;
+
+ clocks = <&cgu JZ4725B_CLK_LCD>;
+ clock-names = "lcd_pclk";
+
+ port {
+ endpoint {
+ remote-endpoint = <&panel_input>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/intel,keembay-display.yaml b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml
new file mode 100644
index 000000000000..2cf54ecc707a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/intel,keembay-display.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay display controller
+
+maintainers:
+ - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
+ - Edmond J Dea <edmund.j.dea@intel.com>
+
+properties:
+ compatible:
+ const: intel,keembay-display
+
+ reg:
+ items:
+ - description: LCD registers range
+
+ reg-names:
+ items:
+ - const: lcd
+
+ clocks:
+ items:
+ - description: LCD controller clock
+ - description: pll0 clock
+
+ clock-names:
+ items:
+ - const: clk_lcd
+ - const: clk_pll0
+
+ interrupts:
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Display output node to DSI.
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - clocks
+ - clock-names
+ - interrupts
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ display@20930000 {
+ compatible = "intel,keembay-display";
+ reg = <0x20930000 0x3000>;
+ reg-names = "lcd";
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&scmi_clk 0x83>,
+ <&scmi_clk 0x0>;
+ clock-names = "clk_lcd", "clk_pll0";
+
+ port {
+ disp_out: endpoint {
+ remote-endpoint = <&dsi_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
new file mode 100644
index 000000000000..cc7e1f318fe4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/intel,keembay-msscam.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay MSSCAM
+
+maintainers:
+ - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
+ - Edmond J Dea <edmund.j.dea@intel.com>
+
+description: |
+ MSSCAM controls local clocks in the display subsystem namely LCD clocks and
+ MIPI DSI clocks. It also configures the interconnect between LCD and
+ MIPI DSI.
+
+properties:
+ compatible:
+ items:
+ - const: intel,keembay-msscam
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ reg-io-width:
+ const: 4
+
+required:
+ - compatible
+ - reg
+ - reg-io-width
+
+additionalProperties: false
+
+examples:
+ - |
+ msscam:msscam@20910000 {
+ compatible = "intel,keembay-msscam", "syscon";
+ reg = <0x20910000 0x30>;
+ reg-io-width = <4>;
+ };
diff --git a/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml b/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml
new file mode 100644
index 000000000000..ab842594feb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/lvds-data-mapping.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LVDS Data Mapping
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+ LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
+ incompatible data link layers have been used over time to transmit image data
+ to LVDS devices. This bindings supports devices compatible with the following
+ specifications.
+
+ [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
+ 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
+ [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
+ Semiconductor
+ [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
+ Electronics Standards Association (VESA)
+
+ Device compatible with those specifications have been marketed under the
+ FPD-Link and FlatLink brands.
+
+ This bindings also supports 30-bit data mapping compatible with JEIDA and
+ VESA.
+
+properties:
+ data-mapping:
+ enum:
+ - jeida-18
+ - jeida-24
+ - jeida-30
+ - vesa-24
+ - vesa-30
+ description: |
+ The color signals mapping order.
+
+ LVDS data mappings are defined as follows.
+
+ - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
+ [VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
+
+ Slot 0 1 2 3 4 5 6
+ ________________ _________________
+ Clock \_______________________/
+ ______ ______ ______ ______ ______ ______ ______
+ DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
+ DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
+
+ - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
+ specifications. Data are transferred as follows on 4 LVDS lanes.
+
+ Slot 0 1 2 3 4 5 6
+ ________________ _________________
+ Clock \_______________________/
+ ______ ______ ______ ______ ______ ______ ______
+ DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
+ DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
+ DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
+
+ - "jeida-30" - 30-bit data mapping compatible with JEIDA and VESA. Data
+ are transferred as follows on 5 LVDS lanes.
+
+ Slot 0 1 2 3 4 5 6
+ ________________ _________________
+ Clock \_______________________/
+ ______ ______ ______ ______ ______ ______ ______
+ DATA0 ><__G4__><__R9__><__R8__><__R7__><__R6__><__R5__><__R4__><
+ DATA1 ><__B5__><__B4__><__G9__><__G8__><__G7__><__G6__><__G5__><
+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B9__><__B8__><__B7__><__B6__><
+ DATA3 ><_CTL3_><__B3__><__B2__><__G3__><__G2__><__R3__><__R2__><
+ DATA4 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
+
+ - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
+ Data are transferred as follows on 4 LVDS lanes.
+
+ Slot 0 1 2 3 4 5 6
+ ________________ _________________
+ Clock \_______________________/
+ ______ ______ ______ ______ ______ ______ ______
+ DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
+ DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
+ DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
+
+ - "vesa-30" - 30-bit data mapping compatible with VESA. Data are
+ transferred as follows on 5 LVDS lanes.
+
+ Slot 0 1 2 3 4 5 6
+ ________________ _________________
+ Clock \_______________________/
+ ______ ______ ______ ______ ______ ______ ______
+ DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
+ DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
+ DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
+ DATA4 ><_CTL3_><__B9__><__B8__><__G9__><__G8__><__R9__><__R8__><
+
+ Control signals are mapped as follows.
+
+ CTL0: HSync
+ CTL1: VSync
+ CTL2: Data Enable
+ CTL3: 0
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/display/lvds-dual-ports.yaml b/Documentation/devicetree/bindings/display/lvds-dual-ports.yaml
new file mode 100644
index 000000000000..785701fe1590
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/lvds-dual-ports.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/lvds-dual-ports.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Dual-link LVDS Display Common Properties
+
+maintainers:
+ - Liu Ying <victor.liu@nxp.com>
+
+description: |
+ Common properties for LVDS displays with dual LVDS links. Extend LVDS display
+ common properties defined in lvds.yaml.
+
+ Dual-link LVDS displays receive odd pixels and even pixels separately from
+ the dual LVDS links. One link receives odd pixels and the other receives
+ even pixels. Some of those displays may also use only one LVDS link to
+ receive all pixels, being odd and even agnostic.
+
+allOf:
+ - $ref: lvds.yaml#
+
+properties:
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ patternProperties:
+ '^port@[01]$':
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ port@0 represents the first LVDS input link.
+ port@1 represents the second LVDS input link.
+
+ properties:
+ dual-lvds-odd-pixels:
+ type: boolean
+ description: LVDS input link for odd pixels
+
+ dual-lvds-even-pixels:
+ type: boolean
+ description: LVDS input link for even pixels
+
+ oneOf:
+ - required: [dual-lvds-odd-pixels]
+ - required: [dual-lvds-even-pixels]
+ - properties:
+ dual-lvds-odd-pixels: false
+ dual-lvds-even-pixels: false
+
+ anyOf:
+ - required:
+ - port@0
+ - required:
+ - port@1
+
+required:
+ - ports
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/display/lvds.yaml b/Documentation/devicetree/bindings/display/lvds.yaml
new file mode 100644
index 000000000000..b74efbea3be2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/lvds.yaml
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/lvds.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LVDS Display Common Properties
+
+allOf:
+ - $ref: lvds-data-mapping.yaml#
+
+maintainers:
+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+ - Thierry Reding <thierry.reding@gmail.com>
+
+description:
+ This binding extends the data mapping defined in lvds-data-mapping.yaml.
+ It supports reversing the bit order on the formats defined there in order
+ to accommodate for even more specialized data formats, since a variety of
+ data formats and layouts is used to drive LVDS displays.
+
+properties:
+ data-mirror:
+ type: boolean
+ description:
+ If set, reverse the bit order described in the data mappings on all
+ data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6.
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml b/Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml
new file mode 100644
index 000000000000..cd27f8ba5ae1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mayqueen,pixpaper.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mayqueen Pixpaper e-ink display panel
+
+maintainers:
+ - LiangCheng Wang <zaq14760@gmail.com>
+
+description:
+ The Pixpaper is an e-ink display panel controlled via an SPI interface.
+ The panel has a resolution of 122x250 pixels and requires GPIO pins for
+ reset, busy, and data/command control.
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+ compatible:
+ const: mayqueen,pixpaper
+
+ reg:
+ maxItems: 1
+
+ spi-max-frequency:
+ maximum: 1000000
+ default: 1000000
+
+ reset-gpios:
+ maxItems: 1
+
+ busy-gpios:
+ maxItems: 1
+
+ dc-gpios:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reset-gpios
+ - busy-gpios
+ - dc-gpios
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ display@0 {
+ compatible = "mayqueen,pixpaper";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ reset-gpios = <&gpio1 17 GPIO_ACTIVE_HIGH>;
+ busy-gpios = <&gpio1 18 GPIO_ACTIVE_HIGH>;
+ dc-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,aal.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,aal.yaml
new file mode 100644
index 000000000000..daf90ebb39bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,aal.yaml
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,aal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display adaptive ambient light processor
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display adaptive ambient light processor, namely AAL,
+ is responsible for backlight power saving and sunlight visibility improving.
+ AAL device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-aal
+ - mediatek,mt8183-disp-aal
+ - mediatek,mt8195-mdp3-aal
+ - items:
+ - enum:
+ - mediatek,mt8188-mdp3-aal
+ - const: mediatek,mt8195-mdp3-aal
+ - items:
+ - enum:
+ - mediatek,mt2712-disp-aal
+ - mediatek,mt6795-disp-aal
+ - const: mediatek,mt8173-disp-aal
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-aal
+ - mediatek,mt8188-disp-aal
+ - mediatek,mt8192-disp-aal
+ - mediatek,mt8195-disp-aal
+ - mediatek,mt8365-disp-aal
+ - const: mediatek,mt8183-disp-aal
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: AAL Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: AAL input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ AAL output to the next component's input, for example could be one
+ of many gamma, overdrive or other blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aal@14015000 {
+ compatible = "mediatek,mt8173-disp-aal";
+ reg = <0 0x14015000 0 0x1000>;
+ interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_AAL>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0x5000 0x1000>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ endpoint {
+ remote-endpoint = <&ccorr0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&gamma0_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ccorr.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ccorr.yaml
new file mode 100644
index 000000000000..fca8e7bb0cbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ccorr.yaml
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,ccorr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display color correction
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display color correction, namely CCORR, reproduces correct color
+ on panels with different color gamut.
+ CCORR device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8183-disp-ccorr
+ - mediatek,mt8192-disp-ccorr
+ - items:
+ - const: mediatek,mt8365-disp-ccorr
+ - const: mediatek,mt8183-disp-ccorr
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-ccorr
+ - mediatek,mt8188-disp-ccorr
+ - mediatek,mt8195-disp-ccorr
+ - const: mediatek,mt8192-disp-ccorr
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: CCORR Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: CCORR input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ CCORR output to the input of the next desired component in the
+ display pipeline, usually only one of the available AAL blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8183-clk.h>
+ #include <dt-bindings/power/mt8183-power.h>
+ #include <dt-bindings/gce/mt8183-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ccorr0: ccorr@1400f000 {
+ compatible = "mediatek,mt8183-disp-ccorr";
+ reg = <0 0x1400f000 0 0x1000>;
+ interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_DISP>;
+ clocks = <&mmsys CLK_MM_DISP_CCORR0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xf000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml
new file mode 100644
index 000000000000..080cf321209e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,cec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek HDMI CEC Controller
+
+maintainers:
+ - CK Hu <ck.hu@mediatek.com>
+ - Jitao shi <jitao.shi@mediatek.com>
+
+description: |
+ The HDMI CEC controller handles hotplug detection and CEC communication.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt7623-cec
+ - mediatek,mt8167-cec
+ - mediatek,mt8173-cec
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ cec: cec@10013000 {
+ compatible = "mediatek,mt8173-cec";
+ reg = <0x10013000 0xbc>;
+ interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&infracfg CLK_INFRA_CEC>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,color.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,color.yaml
new file mode 100644
index 000000000000..5564f4063317
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,color.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,color.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display color processor
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display color processor, namely COLOR, provides hue, luma and
+ saturation adjustments to get better picture quality and to have one panel
+ resemble the other in their output characteristics.
+ COLOR device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-disp-color
+ - mediatek,mt8167-disp-color
+ - mediatek,mt8173-disp-color
+ - mediatek,mt8195-mdp3-color
+ - items:
+ - enum:
+ - mediatek,mt8188-mdp3-color
+ - const: mediatek,mt8195-mdp3-color
+ - items:
+ - enum:
+ - mediatek,mt7623-disp-color
+ - mediatek,mt2712-disp-color
+ - const: mediatek,mt2701-disp-color
+ - items:
+ - enum:
+ - mediatek,mt6795-disp-color
+ - mediatek,mt8183-disp-color
+ - mediatek,mt8186-disp-color
+ - mediatek,mt8188-disp-color
+ - mediatek,mt8192-disp-color
+ - mediatek,mt8195-disp-color
+ - mediatek,mt8365-disp-color
+ - const: mediatek,mt8173-disp-color
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: COLOR Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: COLOR input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ COLOR output to the input of the next desired component in the
+ display pipeline, for example one of the available CCORR or AAL
+ blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ color0: color@14013000 {
+ compatible = "mediatek,mt8173-disp-color";
+ reg = <0 0x14013000 0 0x1000>;
+ interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_COLOR0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0x3000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
deleted file mode 100644
index 8469de510001..000000000000
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-Mediatek display subsystem
-==========================
-
-The Mediatek display subsystem consists of various DISP function blocks in the
-MMSYS register space. The connections between them can be configured by output
-and input selectors in the MMSYS_CONFIG register space. Pixel clock and start
-of frame signal are distributed to the other function blocks by a DISP_MUTEX
-function block.
-
-All DISP device tree nodes must be siblings to the central MMSYS_CONFIG node.
-For a description of the MMSYS_CONFIG binding, see
-Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt.
-
-DISP function blocks
-====================
-
-A display stream starts at a source function block that reads pixel data from
-memory and ends with a sink function block that drives pixels on a display
-interface, or writes pixels back to memory. All DISP function blocks have
-their own register space, interrupt, and clock gate. The blocks that can
-access memory additionally have to list the IOMMU and local arbiter they are
-connected to.
-
-For a description of the display interface sink function blocks, see
-Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt and
-Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt.
-
-Required properties (all function blocks):
-- compatible: "mediatek,<chip>-disp-<function>", one of
- "mediatek,<chip>-disp-ovl" - overlay (4 layers, blending, csc)
- "mediatek,<chip>-disp-rdma" - read DMA / line buffer
- "mediatek,<chip>-disp-wdma" - write DMA
- "mediatek,<chip>-disp-color" - color processor
- "mediatek,<chip>-disp-aal" - adaptive ambient light controller
- "mediatek,<chip>-disp-gamma" - gamma correction
- "mediatek,<chip>-disp-merge" - merge streams from two RDMA sources
- "mediatek,<chip>-disp-split" - split stream to two encoders
- "mediatek,<chip>-disp-ufoe" - data compression engine
- "mediatek,<chip>-dsi" - DSI controller, see mediatek,dsi.txt
- "mediatek,<chip>-dpi" - DPI controller, see mediatek,dpi.txt
- "mediatek,<chip>-disp-mutex" - display mutex
- "mediatek,<chip>-disp-od" - overdrive
- the supported chips are mt2701, mt2712 and mt8173.
-- reg: Physical base address and length of the function block register space
-- interrupts: The interrupt signal from the function block (required, except for
- merge and split function blocks).
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
- For most function blocks this is just a single clock input. Only the DSI and
- DPI controller nodes have multiple clock inputs. These are documented in
- mediatek,dsi.txt and mediatek,dpi.txt, respectively.
-
-Required properties (DMA function blocks):
-- compatible: Should be one of
- "mediatek,<chip>-disp-ovl"
- "mediatek,<chip>-disp-rdma"
- "mediatek,<chip>-disp-wdma"
- the supported chips are mt2701 and mt8173.
-- larb: Should contain a phandle pointing to the local arbiter device as defined
- in Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
-- iommus: Should point to the respective IOMMU block with master port as
- argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
- for details.
-
-Examples:
-
-mmsys: clock-controller@14000000 {
- compatible = "mediatek,mt8173-mmsys", "syscon";
- reg = <0 0x14000000 0 0x1000>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- #clock-cells = <1>;
-};
-
-ovl0: ovl@1400c000 {
- compatible = "mediatek,mt8173-disp-ovl";
- reg = <0 0x1400c000 0 0x1000>;
- interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_OVL0>;
- iommus = <&iommu M4U_PORT_DISP_OVL0>;
- mediatek,larb = <&larb0>;
-};
-
-ovl1: ovl@1400d000 {
- compatible = "mediatek,mt8173-disp-ovl";
- reg = <0 0x1400d000 0 0x1000>;
- interrupts = <GIC_SPI 181 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_OVL1>;
- iommus = <&iommu M4U_PORT_DISP_OVL1>;
- mediatek,larb = <&larb4>;
-};
-
-rdma0: rdma@1400e000 {
- compatible = "mediatek,mt8173-disp-rdma";
- reg = <0 0x1400e000 0 0x1000>;
- interrupts = <GIC_SPI 182 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_RDMA0>;
- iommus = <&iommu M4U_PORT_DISP_RDMA0>;
- mediatek,larb = <&larb0>;
-};
-
-rdma1: rdma@1400f000 {
- compatible = "mediatek,mt8173-disp-rdma";
- reg = <0 0x1400f000 0 0x1000>;
- interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_RDMA1>;
- iommus = <&iommu M4U_PORT_DISP_RDMA1>;
- mediatek,larb = <&larb4>;
-};
-
-rdma2: rdma@14010000 {
- compatible = "mediatek,mt8173-disp-rdma";
- reg = <0 0x14010000 0 0x1000>;
- interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_RDMA2>;
- iommus = <&iommu M4U_PORT_DISP_RDMA2>;
- mediatek,larb = <&larb4>;
-};
-
-wdma0: wdma@14011000 {
- compatible = "mediatek,mt8173-disp-wdma";
- reg = <0 0x14011000 0 0x1000>;
- interrupts = <GIC_SPI 185 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_WDMA0>;
- iommus = <&iommu M4U_PORT_DISP_WDMA0>;
- mediatek,larb = <&larb0>;
-};
-
-wdma1: wdma@14012000 {
- compatible = "mediatek,mt8173-disp-wdma";
- reg = <0 0x14012000 0 0x1000>;
- interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_WDMA1>;
- iommus = <&iommu M4U_PORT_DISP_WDMA1>;
- mediatek,larb = <&larb4>;
-};
-
-color0: color@14013000 {
- compatible = "mediatek,mt8173-disp-color";
- reg = <0 0x14013000 0 0x1000>;
- interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_COLOR0>;
-};
-
-color1: color@14014000 {
- compatible = "mediatek,mt8173-disp-color";
- reg = <0 0x14014000 0 0x1000>;
- interrupts = <GIC_SPI 188 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_COLOR1>;
-};
-
-aal@14015000 {
- compatible = "mediatek,mt8173-disp-aal";
- reg = <0 0x14015000 0 0x1000>;
- interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_AAL>;
-};
-
-gamma@14016000 {
- compatible = "mediatek,mt8173-disp-gamma";
- reg = <0 0x14016000 0 0x1000>;
- interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_GAMMA>;
-};
-
-ufoe@1401a000 {
- compatible = "mediatek,mt8173-disp-ufoe";
- reg = <0 0x1401a000 0 0x1000>;
- interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_UFOE>;
-};
-
-dsi0: dsi@1401b000 {
- /* See mediatek,dsi.txt for details */
-};
-
-dpi0: dpi@1401d000 {
- /* See mediatek,dpi.txt for details */
-};
-
-mutex: mutex@14020000 {
- compatible = "mediatek,mt8173-disp-mutex";
- reg = <0 0x14020000 0 0x1000>;
- interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_LOW>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_MUTEX_32K>;
-};
-
-od@14023000 {
- compatible = "mediatek,mt8173-disp-od";
- reg = <0 0x14023000 0 0x1000>;
- power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
- clocks = <&mmsys CLK_MM_DISP_OD>;
-};
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dither.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dither.yaml
new file mode 100644
index 000000000000..abaf27916d13
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dither.yaml
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,dither.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display dither processor
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display dither processor, namely DITHER, works by approximating
+ unavailable colors with available colors and by mixing and matching available
+ colors to mimic unavailable ones.
+ DITHER device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8183-disp-dither
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-dither
+ - mediatek,mt8188-disp-dither
+ - mediatek,mt8192-disp-dither
+ - mediatek,mt8195-disp-dither
+ - mediatek,mt8365-disp-dither
+ - const: mediatek,mt8183-disp-dither
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: DITHER Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DITHER input, usually from a POSTMASK or GAMMA block.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ DITHER output to the input of the next desired component in the
+ display pipeline, for example one of the available DSC compressors,
+ DP_INTF, DSI, LVDS or others.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8183-clk.h>
+ #include <dt-bindings/power/mt8183-power.h>
+ #include <dt-bindings/gce/mt8183-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dither0: dither@14012000 {
+ compatible = "mediatek,mt8183-disp-dither";
+ reg = <0 0x14012000 0 0x1000>;
+ interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_DISP>;
+ clocks = <&mmsys CLK_MM_DISP_DITHER0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0x2000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml
new file mode 100644
index 000000000000..274f590807ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,dp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Display Port Controller
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Jitao shi <jitao.shi@mediatek.com>
+
+description: |
+ MediaTek DP and eDP are different hardwares and there are some features
+ which are not supported for eDP. For example, audio is not supported for
+ eDP. Therefore, we need to use two different compatibles to describe them.
+ In addition, We just need to enable the power domain of DP, so the clock
+ of DP is generated by itself and we are not using other PLL to generate
+ clocks.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt8188-dp-tx
+ - mediatek,mt8188-edp-tx
+ - mediatek,mt8195-dp-tx
+ - mediatek,mt8195-edp-tx
+
+ reg:
+ maxItems: 1
+
+ nvmem-cells:
+ maxItems: 1
+ description: efuse data for display port calibration
+
+ nvmem-cell-names:
+ const: dp_calibration_data
+
+ power-domains:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#sound-dai-cells':
+ const: 0
+
+ aux-bus:
+ $ref: /schemas/display/dp-aux-bus.yaml#
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input endpoint of the controller, usually dp_intf
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Output endpoint of the controller
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+ properties:
+ data-lanes:
+ description: |
+ number of lanes supported by the hardware.
+ The possible values:
+ 0 - For 1 lane enabled in IP.
+ 0 1 - For 2 lanes enabled in IP.
+ 0 1 2 3 - For 4 lanes enabled in IP.
+ minItems: 1
+ maxItems: 4
+ required:
+ - data-lanes
+
+ required:
+ - port@0
+ - port@1
+
+ max-linkrate-mhz:
+ enum: [ 1620, 2700, 5400, 8100 ]
+ description: maximum link rate supported by the hardware.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - ports
+ - max-linkrate-mhz
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt8188-dp-tx
+ - mediatek,mt8195-dp-tx
+ then:
+ properties:
+ '#sound-dai-cells': false
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/mt8195-power.h>
+ dptx@1c600000 {
+ compatible = "mediatek,mt8195-dp-tx";
+ reg = <0x1c600000 0x8000>;
+ power-domains = <&spm MT8195_POWER_DOMAIN_DP_TX>;
+ interrupts = <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH 0>;
+ max-linkrate-mhz = <8100>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ dptx_in: endpoint {
+ remote-endpoint = <&dp_intf0_out>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ dptx_out: endpoint {
+ data-lanes = <0 1 2 3>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
deleted file mode 100644
index b6a7e7397b8b..000000000000
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Mediatek DPI Device
-===================
-
-The Mediatek DPI function block is a sink of the display subsystem and
-provides 8-bit RGB/YUV444 or 8/10/10-bit YUV422 pixel data on a parallel
-output bus.
-
-Required properties:
-- compatible: "mediatek,<chip>-dpi"
-- reg: Physical base address and length of the controller's registers
-- interrupts: The interrupt signal from the function block.
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- clock-names: must contain "pixel", "engine", and "pll"
-- port: Output port node with endpoint definitions as described in
- Documentation/devicetree/bindings/graph.txt. This port should be connected
- to the input port of an attached HDMI or LVDS encoder chip.
-
-Example:
-
-dpi0: dpi@1401d000 {
- compatible = "mediatek,mt8173-dpi";
- reg = <0 0x1401d000 0 0x1000>;
- interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&mmsys CLK_MM_DPI_PIXEL>,
- <&mmsys CLK_MM_DPI_ENGINE>,
- <&apmixedsys CLK_APMIXED_TVDPLL>;
- clock-names = "pixel", "engine", "pll";
-
- port {
- dpi0_out: endpoint {
- remote-endpoint = <&hdmi0_in>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml
new file mode 100644
index 000000000000..eb4f276e8dc4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,dpi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek DPI and DP_INTF Controller
+
+maintainers:
+ - CK Hu <ck.hu@mediatek.com>
+ - Jitao shi <jitao.shi@mediatek.com>
+
+description: |
+ The MediaTek DPI and DP_INTF function blocks are a sink of the display
+ subsystem and provides 8-bit RGB/YUV444 or 8/10/10-bit YUV422 pixel data on a
+ parallel output bus.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-dpi
+ - mediatek,mt7623-dpi
+ - mediatek,mt8173-dpi
+ - mediatek,mt8183-dpi
+ - mediatek,mt8186-dpi
+ - mediatek,mt8188-dp-intf
+ - mediatek,mt8192-dpi
+ - mediatek,mt8195-dp-intf
+ - mediatek,mt8195-dpi
+ - items:
+ - enum:
+ - mediatek,mt6795-dpi
+ - const: mediatek,mt8183-dpi
+ - items:
+ - enum:
+ - mediatek,mt8365-dpi
+ - const: mediatek,mt8192-dpi
+ - items:
+ - enum:
+ - mediatek,mt8188-dpi
+ - const: mediatek,mt8195-dpi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Pixel Clock
+ - description: Engine Clock
+ - description: DPI PLL
+
+ clock-names:
+ items:
+ - const: pixel
+ - const: engine
+ - const: pll
+
+ pinctrl-0: true
+ pinctrl-1: true
+
+ pinctrl-names:
+ items:
+ - const: default
+ - const: sleep
+
+ power-domains:
+ description: |
+ The MediaTek DPI module is typically associated with one of the
+ following multimedia power domains:
+ POWER_DOMAIN_DISPLAY
+ POWER_DOMAIN_VDOSYS
+ POWER_DOMAIN_MM
+ The specific power domain used varies depending on the SoC design.
+
+ It is recommended to explicitly add the appropriate power domain
+ property to the DPI node in the device tree.
+ maxItems: 1
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port node. This port should be connected to the input port of an
+ attached HDMI, LVDS or DisplayPort encoder chip.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DPI input port
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DPI output to an HDMI, LVDS or DisplayPort encoder input
+
+ required:
+ - port@0
+ - port@1
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ items:
+ - const: dpi
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+oneOf:
+ - required:
+ - port
+ - required:
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+
+ dpi: dpi@1401d000 {
+ compatible = "mediatek,mt8173-dpi";
+ reg = <0x1401d000 0x1000>;
+ interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&spm MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DPI_PIXEL>,
+ <&mmsys CLK_MM_DPI_ENGINE>,
+ <&apmixedsys CLK_APMIXED_TVDPLL>;
+ clock-names = "pixel", "engine", "pll";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&dpi_pin_func>;
+ pinctrl-1 = <&dpi_pin_idle>;
+
+ port {
+ dpi0_out: endpoint {
+ remote-endpoint = <&hdmi0_in>;
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml
new file mode 100644
index 000000000000..a5b88eb97e3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,dsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: mediatek display DSC controller
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ The DSC standard is a specification of the algorithms used for
+ compressing and decompressing image display streams, including
+ the specification of the syntax and semantics of the compressed
+ video bit stream. DSC is designed for real-time systems with
+ real-time compression, transmission, decompression and Display.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8195-disp-dsc
+ - items:
+ - const: mediatek,mt8188-disp-dsc
+ - const: mediatek,mt8195-disp-dsc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: DSC Wrapper Clock
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ mediatek,gce-client-reg:
+ description:
+ The register of client driver can be configured by gce with 4 arguments
+ defined in this property, such as phandle of gce, subsys id,
+ register offset and size.
+ Each subsys id is mapping to a base address of display function blocks
+ register which is defined in the gce header
+ include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Display Stream Compression input, usually from one of the DITHER
+ or MERGE blocks.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Display Stream Compression output to the input of the next desired
+ component in the display pipeline, for example to MERGE, DP_INTF,
+ DPI or DSI.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8195-clk.h>
+ #include <dt-bindings/power/mt8195-power.h>
+ #include <dt-bindings/gce/mt8195-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dsc0: disp_dsc_wrap@1c009000 {
+ compatible = "mediatek,mt8195-disp-dsc";
+ reg = <0 0x1c009000 0 0x1000>;
+ interrupts = <GIC_SPI 645 IRQ_TYPE_LEVEL_HIGH 0>;
+ power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
+ clocks = <&vdosys0 CLK_VDO0_DSC_WRAP0>;
+ mediatek,gce-client-reg = <&gce1 SUBSYS_1c00XXXX 0x9000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
deleted file mode 100644
index fadf327c7cdf..000000000000
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-Mediatek DSI Device
-===================
-
-The Mediatek DSI function block is a sink of the display subsystem and can
-drive up to 4-lane MIPI DSI output. Two DSIs can be synchronized for dual-
-channel output.
-
-Required properties:
-- compatible: "mediatek,<chip>-dsi"
- the supported chips are mt2701 and mt8173.
-- reg: Physical base address and length of the controller's registers
-- interrupts: The interrupt signal from the function block.
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- clock-names: must contain "engine", "digital", and "hs"
-- phys: phandle link to the MIPI D-PHY controller.
-- phy-names: must contain "dphy"
-- port: Output port node with endpoint definitions as described in
- Documentation/devicetree/bindings/graph.txt. This port should be connected
- to the input port of an attached DSI panel or DSI-to-eDP encoder chip.
-
-MIPI TX Configuration Module
-============================
-
-The MIPI TX configuration module controls the MIPI D-PHY.
-
-Required properties:
-- compatible: "mediatek,<chip>-mipi-tx"
- the supported chips are mt2701 and mt8173.
-- reg: Physical base address and length of the controller's registers
-- clocks: PLL reference clock
-- clock-output-names: name of the output clock line to the DSI encoder
-- #clock-cells: must be <0>;
-- #phy-cells: must be <0>.
-
-Example:
-
-mipi_tx0: mipi-dphy@10215000 {
- compatible = "mediatek,mt8173-mipi-tx";
- reg = <0 0x10215000 0 0x1000>;
- clocks = <&clk26m>;
- clock-output-names = "mipi_tx0_pll";
- #clock-cells = <0>;
- #phy-cells = <0>;
-};
-
-dsi0: dsi@1401b000 {
- compatible = "mediatek,mt8173-dsi";
- reg = <0 0x1401b000 0 0x1000>;
- interrupts = <GIC_SPI 192 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&mmsys MM_DSI0_ENGINE>, <&mmsys MM_DSI0_DIGITAL>,
- <&mipi_tx0>;
- clock-names = "engine", "digital", "hs";
- phys = <&mipi_tx0>;
- phy-names = "dphy";
-
- port {
- dsi0_out: endpoint {
- remote-endpoint = <&panel_in>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
new file mode 100644
index 000000000000..27ffbccc2a08
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek DSI Controller
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+ - Jitao Shi <jitao.shi@mediatek.com>
+
+description: |
+ The MediaTek DSI function block is a sink of the display subsystem and can
+ drive up to 4-lane MIPI DSI output. Two DSIs can be synchronized for dual-
+ channel output.
+
+allOf:
+ - $ref: /schemas/display/dsi-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-dsi
+ - mediatek,mt7623-dsi
+ - mediatek,mt8167-dsi
+ - mediatek,mt8173-dsi
+ - mediatek,mt8183-dsi
+ - mediatek,mt8186-dsi
+ - mediatek,mt8188-dsi
+ - items:
+ - enum:
+ - mediatek,mt6795-dsi
+ - const: mediatek,mt8173-dsi
+ - items:
+ - enum:
+ - mediatek,mt8195-dsi
+ - mediatek,mt8365-dsi
+ - const: mediatek,mt8183-dsi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Engine Clock
+ - description: Digital Clock
+ - description: HS Clock
+
+ clock-names:
+ items:
+ - const: engine
+ - const: digital
+ - const: hs
+
+ resets:
+ maxItems: 1
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: dphy
+
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port node. This port should be connected to the input
+ port of an attached DSI panel or DSI-to-eDP encoder chip.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input ports can have multiple endpoints, each of those connects
+ to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: DSI input port, usually from DITHER, DSC or MERGE
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ DSI output to an attached DSI panel, or a DSI-to-X encoder chip
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+
+oneOf:
+ - required:
+ - port
+ - required:
+ - ports
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8183-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/power/mt8183-power.h>
+ #include <dt-bindings/phy/phy.h>
+ #include <dt-bindings/reset/mt8183-resets.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ dsi0: dsi@14014000 {
+ compatible = "mediatek,mt8183-dsi";
+ reg = <0 0x14014000 0 0x1000>;
+ interrupts = <GIC_SPI 236 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_DISP>;
+ clocks = <&mmsys CLK_MM_DSI0_MM>,
+ <&mmsys CLK_MM_DSI0_IF>,
+ <&mipi_tx0>;
+ clock-names = "engine", "digital", "hs";
+ resets = <&mmsys MT8183_MMSYS_SW0_RST_B_DISP_DSI0>;
+ phys = <&mipi_tx0>;
+ phy-names = "dphy";
+ port {
+ dsi0_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
new file mode 100644
index 000000000000..98db47894eeb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,ethdr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Ethdr Device
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description:
+ ETHDR (ET High Dynamic Range) is a MediaTek internal HDR engine and is
+ designed for HDR video and graphics conversion in the external display path.
+ It handles multiple HDR input types and performs tone mapping, color
+ space/color format conversion, and then combine different layers,
+ output the required HDR or SDR signal to the subsequent display path.
+ This engine is composed of two video frontends, two graphic frontends,
+ one video backend and a mixer. ETHDR has two DMA function blocks, DS and ADL.
+ These two function blocks read the pre-programmed registers from DRAM and
+ set them to HW in the v-blanking period.
+
+properties:
+ compatible:
+ oneOf:
+ - const: mediatek,mt8195-disp-ethdr
+ - items:
+ - const: mediatek,mt8188-disp-ethdr
+ - const: mediatek,mt8195-disp-ethdr
+
+ reg:
+ maxItems: 7
+
+ reg-names:
+ items:
+ - const: mixer
+ - const: vdo_fe0
+ - const: vdo_fe1
+ - const: gfx_fe0
+ - const: gfx_fe1
+ - const: vdo_be
+ - const: adl_ds
+
+ interrupts:
+ maxItems: 1
+
+ iommus:
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ items:
+ - description: mixer clock
+ - description: video frontend 0 clock
+ - description: video frontend 1 clock
+ - description: graphic frontend 0 clock
+ - description: graphic frontend 1 clock
+ - description: video backend clock
+ - description: autodownload and menuload clock
+ - description: video frontend 0 async clock
+ - description: video frontend 1 async clock
+ - description: graphic frontend 0 async clock
+ - description: graphic frontend 1 async clock
+ - description: video backend async clock
+ - description: ethdr top clock
+
+ clock-names:
+ items:
+ - const: mixer
+ - const: vdo_fe0
+ - const: vdo_fe1
+ - const: gfx_fe0
+ - const: gfx_fe1
+ - const: vdo_be
+ - const: adl_ds
+ - const: vdo_fe0_async
+ - const: vdo_fe1_async
+ - const: gfx_fe0_async
+ - const: gfx_fe1_async
+ - const: vdo_be_async
+ - const: ethdr_top
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ items:
+ - description: video frontend 0 async reset
+ - description: video frontend 1 async reset
+ - description: graphic frontend 0 async reset
+ - description: graphic frontend 1 async reset
+ - description: video backend async reset
+
+ reset-names:
+ items:
+ - const: vdo_fe0_async
+ - const: vdo_fe1_async
+ - const: gfx_fe0_async
+ - const: gfx_fe1_async
+ - const: vdo_be_async
+
+ mediatek,gce-client-reg:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 7
+ description: The register of display function block to be set by gce.
+ There are 4 arguments in this property, gce node, subsys id, offset and
+ register size. The subsys id is defined in the gce header of each chips
+ include/dt-bindings/gce/<chip>-gce.h, mapping to the register of display
+ function block.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: ETHDR input, usually from one of the MERGE blocks.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ ETHDR output to the input of the next desired component in the
+ display pipeline, for example one of the available MERGE blocks,
+ or others.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - interrupts
+ - power-domains
+ - resets
+ - mediatek,gce-client-reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8195-clk.h>
+ #include <dt-bindings/gce/mt8195-gce.h>
+ #include <dt-bindings/memory/mt8195-memory-port.h>
+ #include <dt-bindings/power/mt8195-power.h>
+ #include <dt-bindings/reset/mt8195-resets.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ hdr-engine@1c114000 {
+ compatible = "mediatek,mt8195-disp-ethdr";
+ reg = <0 0x1c114000 0 0x1000>,
+ <0 0x1c115000 0 0x1000>,
+ <0 0x1c117000 0 0x1000>,
+ <0 0x1c119000 0 0x1000>,
+ <0 0x1c11a000 0 0x1000>,
+ <0 0x1c11b000 0 0x1000>,
+ <0 0x1c11c000 0 0x1000>;
+ reg-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
+ "vdo_be", "adl_ds";
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x4000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0x5000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0x7000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0x9000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0xa000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0xb000 0x1000>,
+ <&gce0 SUBSYS_1c11XXXX 0xc000 0x1000>;
+ clocks = <&vdosys1 CLK_VDO1_DISP_MIXER>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_FE0>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_FE1>,
+ <&vdosys1 CLK_VDO1_HDR_GFX_FE0>,
+ <&vdosys1 CLK_VDO1_HDR_GFX_FE1>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_BE>,
+ <&vdosys1 CLK_VDO1_26M_SLOW>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_FE0_DL_ASYNC>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_FE1_DL_ASYNC>,
+ <&vdosys1 CLK_VDO1_HDR_GFX_FE0_DL_ASYNC>,
+ <&vdosys1 CLK_VDO1_HDR_GFX_FE1_DL_ASYNC>,
+ <&vdosys1 CLK_VDO1_HDR_VDO_BE_DL_ASYNC>,
+ <&topckgen CLK_TOP_ETHDR>;
+ clock-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
+ "vdo_be", "adl_ds", "vdo_fe0_async", "vdo_fe1_async",
+ "gfx_fe0_async", "gfx_fe1_async","vdo_be_async",
+ "ethdr_top";
+ power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ iommus = <&iommu_vpp M4U_PORT_L3_HDR_DS>,
+ <&iommu_vpp M4U_PORT_L3_HDR_ADL>;
+ interrupts = <GIC_SPI 517 IRQ_TYPE_LEVEL_HIGH 0>; /* disp mixer */
+ resets = <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE0_DL_ASYNC>,
+ <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE1_DL_ASYNC>,
+ <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE0_DL_ASYNC>,
+ <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE1_DL_ASYNC>,
+ <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_BE_DL_ASYNC>;
+ reset-names = "vdo_fe0_async", "vdo_fe1_async", "gfx_fe0_async",
+ "gfx_fe1_async", "vdo_be_async";
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,gamma.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,gamma.yaml
new file mode 100644
index 000000000000..48542dc7e784
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,gamma.yaml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,gamma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display gamma correction
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display gamma correction, namely GAMMA, provides a nonlinear
+ operation used to adjust luminance in display system.
+ GAMMA device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-gamma
+ - mediatek,mt8183-disp-gamma
+ - mediatek,mt8195-disp-gamma
+ - items:
+ - enum:
+ - mediatek,mt6795-disp-gamma
+ - const: mediatek,mt8173-disp-gamma
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-gamma
+ - mediatek,mt8188-disp-gamma
+ - mediatek,mt8192-disp-gamma
+ - mediatek,mt8195-disp-gamma
+ - mediatek,mt8365-disp-gamma
+ - const: mediatek,mt8183-disp-gamma
+ - items:
+ - enum:
+ - mediatek,mt8188-disp-gamma
+ - const: mediatek,mt8195-disp-gamma
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: GAMMA Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: GAMMA input, usually from one of the AAL blocks.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ GAMMA output to the input of the next desired component in the
+ display pipeline, for example one of the available DITHER or
+ POSTMASK blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ gamma@14016000 {
+ compatible = "mediatek,mt8173-disp-gamma";
+ reg = <0 0x14016000 0 0x1000>;
+ interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_GAMMA>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0x6000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml
new file mode 100644
index 000000000000..bd8f7b8ae0ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi-ddc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek HDMI DDC
+
+maintainers:
+ - CK Hu <ck.hu@mediatek.com>
+ - Jitao shi <jitao.shi@mediatek.com>
+
+description: |
+ The HDMI DDC i2c controller is used to interface with the HDMI DDC pins.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt7623-hdmi-ddc
+ - mediatek,mt8167-hdmi-ddc
+ - mediatek,mt8173-hdmi-ddc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: ddc-i2c
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ hdmi_ddc0: i2c@11012000 {
+ compatible = "mediatek,mt8173-hdmi-ddc";
+ reg = <0x11012000 0x1c>;
+ interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&pericfg CLK_PERI_I2C5>;
+ clock-names = "ddc-i2c";
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt
deleted file mode 100644
index 7b124242b0c5..000000000000
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt
+++ /dev/null
@@ -1,148 +0,0 @@
-Mediatek HDMI Encoder
-=====================
-
-The Mediatek HDMI encoder can generate HDMI 1.4a or MHL 2.0 signals from
-its parallel input.
-
-Required properties:
-- compatible: Should be "mediatek,<chip>-hdmi".
-- reg: Physical base address and length of the controller's registers
-- interrupts: The interrupt signal from the function block.
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- clock-names: must contain "pixel", "pll", "bclk", and "spdif".
-- phys: phandle link to the HDMI PHY node.
- See Documentation/devicetree/bindings/phy/phy-bindings.txt for details.
-- phy-names: must contain "hdmi"
-- mediatek,syscon-hdmi: phandle link and register offset to the system
- configuration registers. For mt8173 this must be offset 0x900 into the
- MMSYS_CONFIG region: <&mmsys 0x900>.
-- ports: A node containing input and output port nodes with endpoint
- definitions as documented in Documentation/devicetree/bindings/graph.txt.
-- port@0: The input port in the ports node should be connected to a DPI output
- port.
-- port@1: The output port in the ports node should be connected to the input
- port of a connector node that contains a ddc-i2c-bus property, or to the
- input port of an attached bridge chip, such as a SlimPort transmitter.
-
-HDMI CEC
-========
-
-The HDMI CEC controller handles hotplug detection and CEC communication.
-
-Required properties:
-- compatible: Should be "mediatek,<chip>-cec"
-- reg: Physical base address and length of the controller's registers
-- interrupts: The interrupt signal from the function block.
-- clocks: device clock
-
-HDMI DDC
-========
-
-The HDMI DDC i2c controller is used to interface with the HDMI DDC pins.
-The Mediatek's I2C controller is used to interface with I2C devices.
-
-Required properties:
-- compatible: Should be "mediatek,<chip>-hdmi-ddc"
-- reg: Physical base address and length of the controller's registers
-- clocks: device clock
-- clock-names: Should be "ddc-i2c".
-
-HDMI PHY
-========
-
-The HDMI PHY serializes the HDMI encoder's three channel 10-bit parallel
-output and drives the HDMI pads.
-
-Required properties:
-- compatible: "mediatek,<chip>-hdmi-phy"
-- reg: Physical base address and length of the module's registers
-- clocks: PLL reference clock
-- clock-names: must contain "pll_ref"
-- clock-output-names: must be "hdmitx_dig_cts" on mt8173
-- #phy-cells: must be <0>
-- #clock-cells: must be <0>
-
-Optional properties:
-- mediatek,ibias: TX DRV bias current for <1.65Gbps, defaults to 0xa
-- mediatek,ibias_up: TX DRV bias current for >1.65Gbps, defaults to 0x1c
-
-Example:
-
-cec: cec@10013000 {
- compatible = "mediatek,mt8173-cec";
- reg = <0 0x10013000 0 0xbc>;
- interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&infracfg CLK_INFRA_CEC>;
-};
-
-hdmi_phy: hdmi-phy@10209100 {
- compatible = "mediatek,mt8173-hdmi-phy";
- reg = <0 0x10209100 0 0x24>;
- clocks = <&apmixedsys CLK_APMIXED_HDMI_REF>;
- clock-names = "pll_ref";
- clock-output-names = "hdmitx_dig_cts";
- mediatek,ibias = <0xa>;
- mediatek,ibias_up = <0x1c>;
- #clock-cells = <0>;
- #phy-cells = <0>;
-};
-
-hdmi_ddc0: i2c@11012000 {
- compatible = "mediatek,mt8173-hdmi-ddc";
- reg = <0 0x11012000 0 0x1c>;
- interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&pericfg CLK_PERI_I2C5>;
- clock-names = "ddc-i2c";
-};
-
-hdmi0: hdmi@14025000 {
- compatible = "mediatek,mt8173-hdmi";
- reg = <0 0x14025000 0 0x400>;
- interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&mmsys CLK_MM_HDMI_PIXEL>,
- <&mmsys CLK_MM_HDMI_PLLCK>,
- <&mmsys CLK_MM_HDMI_AUDIO>,
- <&mmsys CLK_MM_HDMI_SPDIF>;
- clock-names = "pixel", "pll", "bclk", "spdif";
- pinctrl-names = "default";
- pinctrl-0 = <&hdmi_pin>;
- phys = <&hdmi_phy>;
- phy-names = "hdmi";
- mediatek,syscon-hdmi = <&mmsys 0x900>;
- assigned-clocks = <&topckgen CLK_TOP_HDMI_SEL>;
- assigned-clock-parents = <&hdmi_phy>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
-
- hdmi0_in: endpoint {
- remote-endpoint = <&dpi0_out>;
- };
- };
-
- port@1 {
- reg = <1>;
-
- hdmi0_out: endpoint {
- remote-endpoint = <&hdmi_con_in>;
- };
- };
- };
-};
-
-connector {
- compatible = "hdmi-connector";
- type = "a";
- ddc-i2c-bus = <&hdmiddc0>;
-
- port {
- hdmi_con_in: endpoint {
- remote-endpoint = <&hdmi0_out>;
- };
- };
-};
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml
new file mode 100644
index 000000000000..b90b6d18a828
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek HDMI Encoder
+
+maintainers:
+ - CK Hu <ck.hu@mediatek.com>
+ - Jitao shi <jitao.shi@mediatek.com>
+
+description: |
+ The Mediatek HDMI encoder can generate HDMI 1.4a or MHL 2.0 signals from
+ its parallel input.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt2701-hdmi
+ - mediatek,mt7623-hdmi
+ - mediatek,mt8167-hdmi
+ - mediatek,mt8173-hdmi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Pixel Clock
+ - description: HDMI PLL
+ - description: Bit Clock
+ - description: S/PDIF Clock
+
+ clock-names:
+ items:
+ - const: pixel
+ - const: pll
+ - const: bclk
+ - const: spdif
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: hdmi
+
+ mediatek,syscon-hdmi:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ - items:
+ - description: phandle to system configuration registers
+ - description: register offset in the system configuration registers
+ description: |
+ phandle link and register offset to the system configuration registers.
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input port node. This port should be connected to a DPI output port.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output port node. This port should be connected to the input port of a connector
+ node that contains a ddc-i2c-bus property, or to the input port of an attached
+ bridge chip, such as a SlimPort transmitter.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+ - mediatek,syscon-hdmi
+ - ports
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ hdmi0: hdmi@14025000 {
+ compatible = "mediatek,mt8173-hdmi";
+ reg = <0x14025000 0x400>;
+ interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&mmsys CLK_MM_HDMI_PIXEL>,
+ <&mmsys CLK_MM_HDMI_PLLCK>,
+ <&mmsys CLK_MM_HDMI_AUDIO>,
+ <&mmsys CLK_MM_HDMI_SPDIF>;
+ clock-names = "pixel", "pll", "bclk", "spdif";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pin>;
+ phys = <&hdmi_phy>;
+ phy-names = "hdmi";
+ mediatek,syscon-hdmi = <&mmsys 0x900>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi0_in: endpoint {
+ remote-endpoint = <&dpi0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ hdmi0_out: endpoint {
+ remote-endpoint = <&hdmi_con_in>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
new file mode 100644
index 000000000000..3798a25402d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,merge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display merge
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display merge, namely MERGE, is used to merge two slice-per-line
+ inputs into one side-by-side output.
+ MERGE device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-merge
+ - mediatek,mt8195-disp-merge
+ - mediatek,mt8195-mdp3-merge
+ - items:
+ - enum:
+ - mediatek,mt8188-mdp3-merge
+ - const: mediatek,mt8195-mdp3-merge
+ - items:
+ - const: mediatek,mt6795-disp-merge
+ - const: mediatek,mt8173-disp-merge
+ - items:
+ - const: mediatek,mt8188-disp-merge
+ - const: mediatek,mt8195-disp-merge
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ oneOf:
+ - items:
+ - const: merge
+ - items:
+ - const: merge
+ - const: merge_async
+
+ mediatek,merge-fifo-en:
+ description:
+ The setting of merge fifo is mainly provided for the display latency
+ buffer to ensure that the back-end panel display data will not be
+ underrun, a little more data is needed in the fifo.
+ According to the merge fifo settings, when the water level is detected
+ to be insufficient, it will trigger RDMA sending ultra and preulra
+ command to SMI to speed up the data rate.
+ type: boolean
+
+ mediatek,merge-mute:
+ description: Support mute function. Mute the content of merge output.
+ type: boolean
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ MERGE input port, usually from DITHER, DPI, DSC, DSI, MDP_RDMA,
+ ETHDR or even from a different MERGE block
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ MERGE output to a DSC, DPI, DP_INTF, DSI, ETHDR, Write DMA, or
+ a different MERGE block, or others.
+
+ required:
+ - port@0
+ - port@1
+
+ resets:
+ description: reset controller
+ See Documentation/devicetree/bindings/reset/reset.txt for details.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ merge@14017000 {
+ compatible = "mediatek,mt8173-disp-merge";
+ reg = <0 0x14017000 0 0x1000>;
+ power-domains = <&spm MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_MERGE>;
+ clock-names = "merge";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml
new file mode 100644
index 000000000000..bde4dc556d4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek HDMI MT8195 series HDMI Display Data Channel (DDC)
+
+maintainers:
+ - AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ - CK Hu <ck.hu@mediatek.com>
+
+properties:
+ compatible:
+ oneOf:
+ - const: mediatek,mt8195-hdmi-ddc
+ - items:
+ - const: mediatek,mt8188-hdmi-ddc
+ - const: mediatek,mt8195-hdmi-ddc
+
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ hdmi {
+ hdmi_ddc: i2c {
+ compatible = "mediatek,mt8195-hdmi-ddc";
+ clocks = <&clk26m>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml
new file mode 100644
index 000000000000..1b382f99d3ce
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek MT8195 series HDMI-TX Encoder
+
+maintainers:
+ - AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ - CK Hu <ck.hu@mediatek.com>
+
+description:
+ The MediaTek HDMI-TX v2 encoder can generate HDMI format data based on
+ the HDMI Specification 2.0b.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt8188-hdmi-tx
+ - mediatek,mt8195-hdmi-tx
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: HDMI Peripheral Bus (APB) clock
+ - description: HDCP and HDMI_TOP clock
+ - description: HDCP, HDMI_TOP and HDMI Audio reference clock
+ - description: VPP HDMI Split clock
+
+ clock-names:
+ items:
+ - const: bus
+ - const: hdcp
+ - const: hdcp24m
+ - const: hdmi-split
+
+ i2c:
+ type: object
+ $ref: /schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml
+ unevaluatedProperties: false
+ description: HDMI DDC I2C controller
+
+ phys:
+ maxItems: 1
+ description: PHY providing clocking TMDS and pixel to controller
+
+ phy-names:
+ items:
+ - const: hdmi
+
+ power-domains:
+ maxItems: 1
+
+ '#sound-dai-cells':
+ const: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Input port, usually connected to the output port of a DPI
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Output port that must be connected either to the input port of
+ a HDMI connector node containing a ddc-i2c-bus, or to the input
+ port of an attached bridge chip, such as a SlimPort transmitter.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - power-domains
+ - phys
+ - phy-names
+ - ports
+
+allOf:
+ - $ref: /schemas/sound/dai-common.yaml#
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8195-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/mt8195-power.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ hdmi@1c300000 {
+ compatible = "mediatek,mt8195-hdmi-tx";
+ reg = <0 0x1c300000 0 0x1000>;
+ clocks = <&topckgen CLK_TOP_HDMI_APB>,
+ <&topckgen CLK_TOP_HDCP>,
+ <&topckgen CLK_TOP_HDCP_24M>,
+ <&vppsys1 CLK_VPP1_VPP_SPLIT_HDMI>;
+ clock-names = "bus", "hdcp", "hdcp24m", "hdmi-split";
+ interrupts = <GIC_SPI 677 IRQ_TYPE_LEVEL_HIGH 0>;
+ phys = <&hdmi_phy>;
+ phy-names = "hdmi";
+ power-domains = <&spm MT8195_POWER_DOMAIN_HDMI_TX>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pins>;
+ #sound-dai-cells = <1>;
+
+ hdmitx_ddc: i2c {
+ compatible = "mediatek,mt8195-hdmi-ddc";
+ clocks = <&clk26m>;
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi_in: endpoint {
+ remote-endpoint = <&dpi1_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ hdmi_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,od.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,od.yaml
new file mode 100644
index 000000000000..930c088a722a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,od.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,od.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display overdirve
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display overdrive, namely OD, increases the transition values
+ of pixels between consecutive frames to make LCD rotate faster.
+ OD device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2712-disp-od
+ - mediatek,mt8173-disp-od
+ - items:
+ - const: mediatek,mt6795-disp-od
+ - const: mediatek,mt8173-disp-od
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: OD Clock
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: OD input port, usually from an AAL block
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ OD output to the input of the next desired component in the
+ display pipeline, for example one of the available RDMA or
+ other blocks.
+
+ required:
+ - port@0
+ - port@1
+
+ mediatek,gce-client-reg:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description: describes how to locate the GCE client register
+ items:
+ - items:
+ - description: Phandle reference to a Mediatek GCE Mailbox
+ - description:
+ GCE subsys id mapping to a client defined in header
+ include/dt-bindings/gce/<chip>-gce.h.
+ - description: offset for the GCE register offset
+ - description: size of the GCE register offset
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ od@14023000 {
+ compatible = "mediatek,mt8173-disp-od";
+ reg = <0 0x14023000 0 0x1000>;
+ clocks = <&mmsys CLK_MM_DISP_OD>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1402XXXX 0x3000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl-2l.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl-2l.yaml
new file mode 100644
index 000000000000..bacdfe7d08a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl-2l.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,ovl-2l.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display overlay 2 layer
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display overlay 2 layer, namely OVL-2L, provides 2 more layer
+ for OVL.
+ OVL-2L device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8183-disp-ovl-2l
+ - mediatek,mt8192-disp-ovl-2l
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-ovl-2l
+ - const: mediatek,mt8192-disp-ovl-2l
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: OVL-2L Clock
+
+ iommus:
+ description:
+ This property should point to the respective IOMMU block with master port as argument,
+ see Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details.
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: OVL input port from MMSYS, VDOSYS or other OVLs
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ OVL output to the input of the next desired component in the
+ display pipeline, for example one of the available COLOR, RDMA
+ or WDMA blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8183-clk.h>
+ #include <dt-bindings/power/mt8183-power.h>
+ #include <dt-bindings/gce/mt8183-gce.h>
+ #include <dt-bindings/memory/mt8183-larb-port.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ovl_2l0: ovl@14009000 {
+ compatible = "mediatek,mt8183-disp-ovl-2l";
+ reg = <0 0x14009000 0 0x1000>;
+ interrupts = <GIC_SPI 226 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_DISP>;
+ clocks = <&mmsys CLK_MM_DISP_OVL0_2L>;
+ iommus = <&iommu M4U_PORT_DISP_2L_OVL0_LARB0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x9000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl.yaml
new file mode 100644
index 000000000000..4f110635afb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ovl.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,ovl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display overlay
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display overlay, namely OVL, can do alpha blending from
+ the memory.
+ OVL device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-disp-ovl
+ - mediatek,mt8173-disp-ovl
+ - mediatek,mt8183-disp-ovl
+ - mediatek,mt8192-disp-ovl
+ - mediatek,mt8195-disp-ovl
+ - mediatek,mt8195-mdp3-ovl
+ - items:
+ - enum:
+ - mediatek,mt7623-disp-ovl
+ - mediatek,mt2712-disp-ovl
+ - const: mediatek,mt2701-disp-ovl
+ - items:
+ - enum:
+ - mediatek,mt6795-disp-ovl
+ - const: mediatek,mt8173-disp-ovl
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-ovl
+ - mediatek,mt8365-disp-ovl
+ - const: mediatek,mt8192-disp-ovl
+ - items:
+ - const: mediatek,mt8188-disp-ovl
+ - const: mediatek,mt8195-disp-ovl
+ - items:
+ - const: mediatek,mt8188-mdp3-ovl
+ - const: mediatek,mt8195-mdp3-ovl
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: OVL Clock
+
+ iommus:
+ description:
+ This property should point to the respective IOMMU block with master port as argument,
+ see Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details.
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: OVL input port from MMSYS or one of multiple VDOSYS
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ OVL output to the input of the next desired component in the
+ display pipeline, for example one of the available COLOR, RDMA
+ or WDMA blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+ #include <dt-bindings/memory/mt8173-larb-port.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ovl0: ovl@1400c000 {
+ compatible = "mediatek,mt8173-disp-ovl";
+ reg = <0 0x1400c000 0 0x1000>;
+ interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_OVL0>;
+ iommus = <&iommu M4U_PORT_DISP_OVL0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xc000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
new file mode 100644
index 000000000000..86787866ced0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,padding.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Display Padding
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description:
+ Padding provides ability to add pixels to width and height of a layer with
+ specified colors. Due to hardware design, Mixer in VDOSYS1 requires
+ width of a layer to be 2-pixel-align, or 4-pixel-align when ETHDR is enabled,
+ we need Padding to deal with odd width.
+ Please notice that even if the Padding is in bypass mode, settings in
+ register must be cleared to 0, or undefined behaviors could happen.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8188-disp-padding
+ - mediatek,mt8195-mdp3-padding
+ - items:
+ - const: mediatek,mt8188-mdp3-padding
+ - const: mediatek,mt8195-mdp3-padding
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Padding's clocks
+
+ mediatek,gce-client-reg:
+ description:
+ GCE (Global Command Engine) is a multi-core micro processor that helps
+ its clients to execute commands without interrupting CPU. This property
+ describes GCE client's information that is composed by 4 fields.
+ 1. Phandle of the GCE (there may be several GCE processors)
+ 2. Sub-system ID defined in the dt-binding like a user ID
+ (Please refer to include/dt-bindings/gce/<chip>-gce.h)
+ 3. Offset from base address of the subsys you are at
+ 4. Size of the register the client needs
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ items:
+ - description: Phandle of the GCE
+ - description: Subsys ID defined in the dt-binding
+ - description: Offset from base address of the subsys
+ - description: Size of register
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - clocks
+ - mediatek,gce-client-reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mediatek,mt8188-clk.h>
+ #include <dt-bindings/power/mediatek,mt8188-power.h>
+ #include <dt-bindings/gce/mt8195-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ padding0: padding@1c11d000 {
+ compatible = "mediatek,mt8188-disp-padding";
+ reg = <0 0x1c11d000 0 0x1000>;
+ clocks = <&vdosys1 CLK_VDO1_PADDING0>;
+ power-domains = <&spm MT8188_POWER_DOMAIN_VDOSYS1>;
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0xd000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,postmask.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,postmask.yaml
new file mode 100644
index 000000000000..fb6fe4742624
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,postmask.yaml
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,postmask.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display postmask
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display postmask, namely POSTMASK, provides round corner pattern
+ generation.
+ POSTMASK device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8192-disp-postmask
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-postmask
+ - mediatek,mt8188-disp-postmask
+ - const: mediatek,mt8192-disp-postmask
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: POSTMASK Clock
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: POSTMASK input port, usually from GAMMA
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ POSTMASK output to the input of the next desired component in the
+ display pipeline, for example one of the available DITHER blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8192-clk.h>
+ #include <dt-bindings/power/mt8192-power.h>
+ #include <dt-bindings/gce/mt8192-gce.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ postmask0: postmask@1400d000 {
+ compatible = "mediatek,mt8192-disp-postmask";
+ reg = <0 0x1400d000 0 0x1000>;
+ interrupts = <GIC_SPI 262 IRQ_TYPE_LEVEL_HIGH 0>;
+ power-domains = <&scpsys MT8192_POWER_DOMAIN_DISP>;
+ clocks = <&mmsys CLK_MM_DISP_POSTMASK0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xd000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,rdma.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,rdma.yaml
new file mode 100644
index 000000000000..878f676b581f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,rdma.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,rdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek Read Direct Memory Access
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek Read Direct Memory Access(RDMA) component used to read the
+ data into DMA. It provides real time data to the back-end panel
+ driver, such as DSI, DPI and DP_INTF.
+ It contains one line buffer to store the sufficient pixel data.
+ RDMA device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-disp-rdma
+ - mediatek,mt8173-disp-rdma
+ - mediatek,mt8183-disp-rdma
+ - mediatek,mt8195-disp-rdma
+ - items:
+ - enum:
+ - mediatek,mt8188-disp-rdma
+ - const: mediatek,mt8195-disp-rdma
+ - items:
+ - enum:
+ - mediatek,mt7623-disp-rdma
+ - mediatek,mt2712-disp-rdma
+ - const: mediatek,mt2701-disp-rdma
+ - items:
+ - enum:
+ - mediatek,mt6795-disp-rdma
+ - const: mediatek,mt8173-disp-rdma
+ - items:
+ - enum:
+ - mediatek,mt8186-disp-rdma
+ - mediatek,mt8192-disp-rdma
+ - mediatek,mt8365-disp-rdma
+ - const: mediatek,mt8183-disp-rdma
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: RDMA Clock
+
+ iommus:
+ description:
+ This property should point to the respective IOMMU block with master port as argument,
+ see Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details.
+
+ mediatek,rdma-fifo-size:
+ description:
+ rdma fifo size may be different even in same SOC, add this property to the
+ corresponding rdma.
+ The value below is the Max value which defined in hardware data sheet
+ mediatek,rdma-fifo-size of mt8173-rdma0 is 8K
+ mediatek,rdma-fifo-size of mt8183-rdma0 is 5K
+ mediatek,rdma-fifo-size of mt8183-rdma1 is 2K
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [8192, 5120, 2048]
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: RDMA input port, usually from MMSYS, OD or OVL
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ RDMA output to the input of the next desired component in the
+ display pipeline, for example one of the available COLOR, DPI,
+ DSI, MERGE or UFOE blocks.
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+ #include <dt-bindings/memory/mt8173-larb-port.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ rdma0: rdma@1400e000 {
+ compatible = "mediatek,mt8173-disp-rdma";
+ reg = <0 0x1400e000 0 0x1000>;
+ interrupts = <GIC_SPI 182 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_RDMA0>;
+ iommus = <&iommu M4U_PORT_DISP_RDMA0>;
+ mediatek,rdma-fifo-size = <8192>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xe000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,split.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,split.yaml
new file mode 100644
index 000000000000..4b6ff546757e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,split.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,split.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display split
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display split, namely SPLIT, is used to split stream to two
+ encoders.
+ SPLIT device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-split
+ - mediatek,mt8195-mdp3-split
+ - items:
+ - const: mediatek,mt6795-disp-split
+ - const: mediatek,mt8173-disp-split
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+ maxItems: 1
+
+ mediatek,gce-client-reg:
+ description:
+ The register of display function block to be set by gce. There are 4 arguments,
+ such as gce node, subsys id, offset and register size. The subsys id that is
+ mapping to the register of display function blocks is defined in the gce header
+ include/dt-bindings/gce/<chip>-gce.h of each chips.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ items:
+ items:
+ - description: phandle of GCE
+ - description: GCE subsys id
+ - description: register offset
+ - description: register size
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: SPLIT Clock
+ - description: Used for interfacing with the HDMI RX signal source.
+ - description: Paired with receiving HDMI RX metadata.
+ minItems: 1
+
+required:
+ - compatible
+ - reg
+ - power-domains
+ - clocks
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt8195-mdp3-split
+
+ then:
+ properties:
+ clocks:
+ minItems: 3
+
+ required:
+ - mediatek,gce-client-reg
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: mediatek,mt8173-disp-split
+
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ split0: split@14018000 {
+ compatible = "mediatek,mt8173-disp-split";
+ reg = <0 0x14018000 0 0x1000>;
+ power-domains = <&spm MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_SPLIT0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ufoe.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ufoe.yaml
new file mode 100644
index 000000000000..036a66ed42e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ufoe.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,ufoe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek display UFOe
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek display UFOe stands for Unified Frame Optimization engine.
+ UFOe can cut the data rate for DSI port which may lead to reduce power
+ consumption.
+ UFOe device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-ufoe
+ - items:
+ - const: mediatek,mt6795-disp-ufoe
+ - const: mediatek,mt8173-disp-ufoe
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: UFOe Clock
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description:
+ Input and output ports can have multiple endpoints, each of those
+ connects to either the primary, secondary, etc, display pipeline.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: UFOE input, usually from one of the RDMA blocks.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ UFOE output to the input of the next desired component in the
+ display pipeline, usually one of the available DSI blocks.
+
+ required:
+ - port@0
+ - port@1
+
+ mediatek,gce-client-reg:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description: describes how to locate the GCE client register
+ items:
+ - items:
+ - description: Phandle reference to a Mediatek GCE Mailbox
+ - description:
+ GCE subsys id mapping to a client defined in header
+ include/dt-bindings/gce/<chip>-gce.h.
+ - description: offset for the GCE register offset
+ - description: size of the GCE register offset
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+ #include <dt-bindings/power/mt8173-power.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ufoe@1401a000 {
+ compatible = "mediatek,mt8173-disp-ufoe";
+ reg = <0 0x1401a000 0 0x1000>;
+ interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_UFOE>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0xa000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,wdma.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,wdma.yaml
new file mode 100644
index 000000000000..a3a2b71a4523
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,wdma.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,wdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek Write Direct Memory Access
+
+maintainers:
+ - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+ - Philipp Zabel <p.zabel@pengutronix.de>
+
+description: |
+ Mediatek Write Direct Memory Access(WDMA) component used to write
+ the data into DMA.
+ WDMA device node must be siblings to the central MMSYS_CONFIG node.
+ For a description of the MMSYS_CONFIG binding, see
+ Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+ for details.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt8173-disp-wdma
+ - items:
+ - const: mediatek,mt6795-disp-wdma
+ - const: mediatek,mt8173-disp-wdma
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ description: A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle. See
+ Documentation/devicetree/bindings/power/power-domain.yaml for details.
+
+ clocks:
+ items:
+ - description: WDMA Clock
+
+ iommus:
+ description:
+ This property should point to the respective IOMMU block with master port as argument,
+ see Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details.
+
+ mediatek,gce-client-reg:
+ description: The register of client driver can be configured by gce with
+ 4 arguments defined in this property, such as phandle of gce, subsys id,
+ register offset and size. Each GCE subsys id is mapping to a client
+ defined in the header include/dt-bindings/gce/<chip>-gce.h.
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - power-domains
+ - clocks
+ - iommus
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/power/mt8173-power.h>
+ #include <dt-bindings/gce/mt8173-gce.h>
+ #include <dt-bindings/memory/mt8173-larb-port.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ wdma0: wdma@14011000 {
+ compatible = "mediatek,mt8173-disp-wdma";
+ reg = <0 0x14011000 0 0x1000>;
+ interrupts = <GIC_SPI 185 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_MM>;
+ clocks = <&mmsys CLK_MM_DISP_WDMA0>;
+ iommus = <&iommu M4U_PORT_DISP_WDMA0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1401XXXX 0x1000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
new file mode 100644
index 000000000000..aeb4e4f36044
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -0,0 +1,357 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dp-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MSM Display Port Controller
+
+maintainers:
+ - Kuogee Hsieh <quic_khsieh@quicinc.com>
+ - Abhinav Kumar <quic_abhinavk@quicinc.com>
+
+description: |
+ Device tree bindings for DisplayPort host controller for MSM targets
+ that are compatible with VESA DisplayPort interface specification.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - qcom,sa8775p-dp
+ - qcom,sc7180-dp
+ - qcom,sc7280-dp
+ - qcom,sc7280-edp
+ - qcom,sc8180x-dp
+ - qcom,sc8180x-edp
+ - qcom,sc8280xp-dp
+ - qcom,sc8280xp-edp
+ - qcom,sdm845-dp
+ - qcom,sm8350-dp
+ - qcom,sm8650-dp
+ - qcom,x1e80100-dp
+
+ - items:
+ - enum:
+ - qcom,sm6350-dp
+ - const: qcom,sc7180-dp
+
+ # deprecated entry for compatibility with old DT
+ - items:
+ - enum:
+ - qcom,sm6350-dp
+ - const: qcom,sm8350-dp
+ deprecated: true
+
+ - items:
+ - enum:
+ - qcom,sar2130p-dp
+ - qcom,sm7150-dp
+ - qcom,sm8150-dp
+ - qcom,sm8250-dp
+ - qcom,sm8450-dp
+ - qcom,sm8550-dp
+ - const: qcom,sm8350-dp
+
+ - items:
+ - enum:
+ - qcom,sm8750-dp
+ - const: qcom,sm8650-dp
+
+ reg:
+ minItems: 4
+ items:
+ - description: ahb register block
+ - description: aux register block
+ - description: link register block
+ - description: p0 register block
+ - description: p1 register block
+ - description: p2 register block
+ - description: p3 register block
+ - description: mst2link register block
+ - description: mst3link register block
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 5
+ items:
+ - description: AHB clock to enable register access
+ - description: Display Port AUX clock
+ - description: Display Port Link clock
+ - description: Link interface clock between DP and PHY
+ - description: Display Port stream 0 Pixel clock
+ - description: Display Port stream 1 Pixel clock
+ - description: Display Port stream 2 Pixel clock
+ - description: Display Port stream 3 Pixel clock
+
+ clock-names:
+ minItems: 5
+ items:
+ - const: core_iface
+ - const: core_aux
+ - const: ctrl_link
+ - const: ctrl_link_iface
+ - const: stream_pixel
+ - const: stream_1_pixel
+ - const: stream_2_pixel
+ - const: stream_3_pixel
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ items:
+ - const: dp
+
+ operating-points-v2: true
+
+ opp-table:
+ type: object
+
+ power-domains:
+ maxItems: 1
+
+ aux-bus:
+ $ref: /schemas/display/dp-aux-bus.yaml#
+
+ data-lanes:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ deprecated: true
+ minItems: 1
+ maxItems: 4
+ items:
+ maximum: 3
+
+ "#sound-dai-cells":
+ const: 0
+
+ vdda-0p9-supply:
+ deprecated: true
+ vdda-1p2-supply:
+ deprecated: true
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: Input endpoint of the controller
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: Output endpoint of the controller
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+ properties:
+ data-lanes:
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [ 0, 1, 2, 3 ]
+
+ link-frequencies:
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [ 1620000000, 2700000000, 5400000000, 8100000000 ]
+
+ required:
+ - port@0
+ - port@1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - phys
+ - phy-names
+ - power-domains
+ - ports
+
+allOf:
+ # AUX BUS does not exist on DP controllers
+ # Audio output also is present only on DP output
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7280-edp
+ - qcom,sc8180x-edp
+ - qcom,sc8280xp-edp
+ then:
+ properties:
+ "#sound-dai-cells": false
+ else:
+ if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sa8775p-dp
+ - qcom,x1e80100-dp
+ then:
+ oneOf:
+ - required:
+ - aux-bus
+ - required:
+ - "#sound-dai-cells"
+ else:
+ properties:
+ aux-bus: false
+ required:
+ - "#sound-dai-cells"
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ # these platforms support SST only
+ - qcom,sc7180-dp
+ - qcom,sc7280-dp
+ - qcom,sc7280-edp
+ - qcom,sc8180x-edp
+ - qcom,sc8280xp-edp
+ then:
+ properties:
+ reg:
+ minItems: 5
+ maxItems: 5
+ clocks:
+ minItems: 5
+ maxItems: 5
+ clocks-names:
+ minItems: 5
+ maxItems: 5
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ # these platforms support 2 streams MST on some interfaces,
+ # others are SST only
+ - qcom,sc8280xp-dp
+ - qcom,x1e80100-dp
+ then:
+ properties:
+ reg:
+ minItems: 5
+ maxItems: 5
+ clocks:
+ minItems: 5
+ maxItems: 6
+ clocks-names:
+ minItems: 5
+ maxItems: 6
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ # 2 streams MST
+ enum:
+ - qcom,sc8180x-dp
+ - qcom,sdm845-dp
+ - qcom,sm8350-dp
+ - qcom,sm8650-dp
+ then:
+ properties:
+ reg:
+ minItems: 5
+ maxItems: 5
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clocks-names:
+ minItems: 6
+ maxItems: 6
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ # these platforms support 4 stream MST on first DP,
+ # 2 streams MST on the second one.
+ - qcom,sa8775p-dp
+ then:
+ properties:
+ reg:
+ minItems: 9
+ maxItems: 9
+ clocks:
+ minItems: 6
+ maxItems: 8
+ clocks-names:
+ minItems: 6
+ maxItems: 8
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/qcom,dispcc-sc7180.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ displayport-controller@ae90000 {
+ compatible = "qcom,sc7180-dp";
+ reg = <0xae90000 0x200>,
+ <0xae90200 0x200>,
+ <0xae90400 0xc00>,
+ <0xae91000 0x400>,
+ <0xae91400 0x400>;
+ interrupt-parent = <&mdss>;
+ interrupts = <12>;
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_DP_AUX_CLK>,
+ <&dispcc DISP_CC_MDSS_DP_LINK_CLK>,
+ <&dispcc DISP_CC_MDSS_DP_LINK_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_DP_PIXEL_CLK>;
+ clock-names = "core_iface", "core_aux",
+ "ctrl_link",
+ "ctrl_link_iface", "stream_pixel";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_DP_LINK_CLK_SRC>,
+ <&dispcc DISP_CC_MDSS_DP_PIXEL_CLK_SRC>;
+
+ assigned-clock-parents = <&dp_phy 0>, <&dp_phy 1>;
+
+ phys = <&dp_phy>;
+ phy-names = "dp";
+
+ #sound-dai-cells = <0>;
+
+ power-domains = <&rpmhpd SC7180_CX>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ endpoint {
+ remote-endpoint = <&dpu_intf0_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&typec>;
+ data-lanes = <0 1>;
+ link-frequencies = /bits/ 64 <1620000000 2700000000 5400000000 8100000000>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dpu-common.yaml b/Documentation/devicetree/bindings/display/msm/dpu-common.yaml
new file mode 100644
index 000000000000..3f953aa5e694
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dpu-common.yaml
@@ -0,0 +1,56 @@
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dpu-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DPU common properties
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+ - Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+ - Rob Clark <robdclark@gmail.com>
+
+description: |
+ Common properties for QCom DPU display controller.
+
+# Do not select this by default, otherwise it is also selected for all
+# display-controller@ nodes
+select:
+ false
+
+properties:
+ $nodename:
+ pattern: '^display-controller@[0-9a-f]+$'
+
+ interrupts:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ operating-points-v2: true
+ opp-table:
+ type: object
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: |
+ Contains the list of output ports from DPU device. These ports
+ connect to interfaces that are external to the DPU hardware,
+ such as DSI, DP etc.
+
+ patternProperties:
+ "^port@[0-9a-f]+$":
+ $ref: /schemas/graph.yaml#/properties/port
+
+ # at least one port is required
+ required:
+ - port@0
+
+required:
+ - interrupts
+ - power-domains
+ - operating-points-v2
+ - ports
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/display/msm/dpu.txt b/Documentation/devicetree/bindings/display/msm/dpu.txt
deleted file mode 100644
index ad2e8830324e..000000000000
--- a/Documentation/devicetree/bindings/display/msm/dpu.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-Qualcomm Technologies, Inc. DPU KMS
-
-Description:
-
-Device tree bindings for MSM Mobile Display Subsytem(MDSS) that encapsulates
-sub-blocks like DPU display controller, DSI and DP interfaces etc.
-The DPU display controller is found in SDM845 SoC.
-
-MDSS:
-Required properties:
-- compatible: "qcom,sdm845-mdss"
-- reg: physical base address and length of contoller's registers.
-- reg-names: register region names. The following region is required:
- * "mdss"
-- power-domains: a power domain consumer specifier according to
- Documentation/devicetree/bindings/power/power_domain.txt
-- clocks: list of clock specifiers for clocks needed by the device.
-- clock-names: device clock names, must be in same order as clocks property.
- The following clocks are required:
- * "iface"
- * "bus"
- * "core"
-- interrupts: interrupt signal from MDSS.
-- interrupt-controller: identifies the node as an interrupt controller.
-- #interrupt-cells: specifies the number of cells needed to encode an interrupt
- source, should be 1.
-- iommus: phandle of iommu device node.
-- #address-cells: number of address cells for the MDSS children. Should be 1.
-- #size-cells: Should be 1.
-- ranges: parent bus address space is the same as the child bus address space.
-
-Optional properties:
-- assigned-clocks: list of clock specifiers for clocks needing rate assignment
-- assigned-clock-rates: list of clock frequencies sorted in the same order as
- the assigned-clocks property.
-
-MDP:
-Required properties:
-- compatible: "qcom,sdm845-dpu"
-- reg: physical base address and length of controller's registers.
-- reg-names : register region names. The following region is required:
- * "mdp"
- * "vbif"
-- clocks: list of clock specifiers for clocks needed by the device.
-- clock-names: device clock names, must be in same order as clocks property.
- The following clocks are required.
- * "bus"
- * "iface"
- * "core"
- * "vsync"
-- interrupts: interrupt line from DPU to MDSS.
-- ports: contains the list of output ports from DPU device. These ports connect
- to interfaces that are external to the DPU hardware, such as DSI, DP etc.
-
- Each output port contains an endpoint that describes how it is connected to an
- external interface. These are described by the standard properties documented
- here:
- Documentation/devicetree/bindings/graph.txt
- Documentation/devicetree/bindings/media/video-interfaces.txt
-
- Port 0 -> DPU_INTF1 (DSI1)
- Port 1 -> DPU_INTF2 (DSI2)
-
-Optional properties:
-- assigned-clocks: list of clock specifiers for clocks needing rate assignment
-- assigned-clock-rates: list of clock frequencies sorted in the same order as
- the assigned-clocks property.
-
-Example:
-
- mdss: mdss@ae00000 {
- compatible = "qcom,sdm845-mdss";
- reg = <0xae00000 0x1000>;
- reg-names = "mdss";
-
- power-domains = <&clock_dispcc 0>;
-
- clocks = <&gcc GCC_DISP_AHB_CLK>, <&gcc GCC_DISP_AXI_CLK>,
- <&clock_dispcc DISP_CC_MDSS_MDP_CLK>;
- clock-names = "iface", "bus", "core";
-
- assigned-clocks = <&clock_dispcc DISP_CC_MDSS_MDP_CLK>;
- assigned-clock-rates = <300000000>;
-
- interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-controller;
- #interrupt-cells = <1>;
-
- iommus = <&apps_iommu 0>;
-
- #address-cells = <2>;
- #size-cells = <1>;
- ranges = <0 0 0xae00000 0xb2008>;
-
- mdss_mdp: mdp@ae01000 {
- compatible = "qcom,sdm845-dpu";
- reg = <0 0x1000 0x8f000>, <0 0xb0000 0x2008>;
- reg-names = "mdp", "vbif";
-
- clocks = <&clock_dispcc DISP_CC_MDSS_AHB_CLK>,
- <&clock_dispcc DISP_CC_MDSS_AXI_CLK>,
- <&clock_dispcc DISP_CC_MDSS_MDP_CLK>,
- <&clock_dispcc DISP_CC_MDSS_VSYNC_CLK>;
- clock-names = "iface", "bus", "core", "vsync";
-
- assigned-clocks = <&clock_dispcc DISP_CC_MDSS_MDP_CLK>,
- <&clock_dispcc DISP_CC_MDSS_VSYNC_CLK>;
- assigned-clock-rates = <0 0 300000000 19200000>;
-
- interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- dpu_intf1_out: endpoint {
- remote-endpoint = <&dsi0_in>;
- };
- };
-
- port@1 {
- reg = <1>;
- dpu_intf2_out: endpoint {
- remote-endpoint = <&dsi1_in>;
- };
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
new file mode 100644
index 000000000000..4400d4cce072
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -0,0 +1,482 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-controller-main.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI controller
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - qcom,apq8064-dsi-ctrl
+ - qcom,msm8226-dsi-ctrl
+ - qcom,msm8916-dsi-ctrl
+ - qcom,msm8953-dsi-ctrl
+ - qcom,msm8974-dsi-ctrl
+ - qcom,msm8976-dsi-ctrl
+ - qcom,msm8996-dsi-ctrl
+ - qcom,msm8998-dsi-ctrl
+ - qcom,qcm2290-dsi-ctrl
+ - qcom,sa8775p-dsi-ctrl
+ - qcom,sar2130p-dsi-ctrl
+ - qcom,sc7180-dsi-ctrl
+ - qcom,sc7280-dsi-ctrl
+ - qcom,sc8180x-dsi-ctrl
+ - qcom,sdm660-dsi-ctrl
+ - qcom,sdm670-dsi-ctrl
+ - qcom,sdm845-dsi-ctrl
+ - qcom,sm6115-dsi-ctrl
+ - qcom,sm6125-dsi-ctrl
+ - qcom,sm6150-dsi-ctrl
+ - qcom,sm6350-dsi-ctrl
+ - qcom,sm6375-dsi-ctrl
+ - qcom,sm7150-dsi-ctrl
+ - qcom,sm8150-dsi-ctrl
+ - qcom,sm8250-dsi-ctrl
+ - qcom,sm8350-dsi-ctrl
+ - qcom,sm8450-dsi-ctrl
+ - qcom,sm8550-dsi-ctrl
+ - qcom,sm8650-dsi-ctrl
+ - qcom,sm8750-dsi-ctrl
+ - const: qcom,mdss-dsi-ctrl
+ - enum:
+ - qcom,dsi-ctrl-6g-qcm2290
+ - qcom,mdss-dsi-ctrl # This should always come with an SoC-specific compatible
+ deprecated: true
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ const: dsi_ctrl
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description: |
+ Several clocks are used, depending on the variant. Typical ones are::
+ - bus:: Display AHB clock.
+ - byte:: Display byte clock.
+ - byte_intf:: Display byte interface clock.
+ - core:: Display core clock.
+ - core_mss:: Core MultiMedia SubSystem clock.
+ - iface:: Display AXI clock.
+ - mdp_core:: MDP Core clock.
+ - mnoc:: MNOC clock
+ - pixel:: Display pixel clock.
+ minItems: 3
+ maxItems: 12
+
+ clock-names:
+ minItems: 3
+ maxItems: 12
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ deprecated: true
+ const: dsi
+
+ syscon-sfpb:
+ description: A phandle to mmss_sfpb syscon node (only for DSIv2).
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ qcom,dual-dsi-mode:
+ type: boolean
+ description: |
+ Indicates if the DSI controller is driving a panel which needs
+ 2 DSI links.
+
+ qcom,master-dsi:
+ type: boolean
+ description: |
+ Indicates if the DSI controller is the master DSI controller when
+ qcom,dual-dsi-mode enabled.
+
+ qcom,sync-dual-dsi:
+ type: boolean
+ description: |
+ Indicates if the DSI controller needs to sync the other DSI controller
+ with MIPI DCS commands when qcom,dual-dsi-mode enabled.
+
+ assigned-clocks:
+ minItems: 2
+ maxItems: 4
+ description: |
+ For DSI on SM8650 and older: parents of "byte" and "pixel" for the given
+ platform.
+ For DSIv2 platforms this should contain "byte", "esc", "src" and
+ "pixel_src" clocks.
+
+ assigned-clock-parents:
+ minItems: 2
+ maxItems: 4
+ description: |
+ The Byte clock and Pixel clock PLL outputs provided by a DSI PHY block.
+
+ power-domains:
+ maxItems: 1
+
+ operating-points-v2: true
+
+ opp-table:
+ type: object
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/ports
+ description: |
+ Contains DSI controller input and output ports as children, each
+ containing one endpoint subnode.
+
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ Input endpoints of the controller.
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+ properties:
+ data-lanes:
+ maxItems: 4
+ minItems: 1
+ items:
+ enum: [ 0, 1, 2, 3 ]
+
+ port@1:
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ unevaluatedProperties: false
+ description: |
+ Output endpoints of the controller.
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+ properties:
+ data-lanes:
+ maxItems: 4
+ minItems: 1
+ items:
+ enum: [ 0, 1, 2, 3 ]
+
+ qcom,te-source:
+ $ref: /schemas/types.yaml#/definitions/string
+ description:
+ Specifies the source of vsync signal from the panel used for
+ tearing elimination.
+ default: mdp_vsync_p
+ enum:
+ - mdp_vsync_p
+ - mdp_vsync_s
+ - mdp_vsync_e
+ - timer0
+ - timer1
+ - timer2
+ - timer3
+ - timer4
+
+ required:
+ - port@0
+ - port@1
+
+ avdd-supply:
+ description:
+ Phandle to vdd regulator device node
+
+ refgen-supply:
+ description:
+ Phandle to REFGEN regulator device node
+
+ vcca-supply:
+ description:
+ Phandle to vdd regulator device node
+
+ vdd-supply:
+ description:
+ VDD regulator
+
+ vddio-supply:
+ description:
+ VDD-IO regulator
+
+ vdda-supply:
+ description:
+ VDDA regulator
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - clocks
+ - clock-names
+ - phys
+ - ports
+
+allOf:
+ - $ref: ../dsi-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,apq8064-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: iface
+ - const: bus
+ - const: core_mmss
+ - const: src
+ - const: byte
+ - const: pixel
+ - const: core
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8916-dsi-ctrl
+ - qcom,msm8953-dsi-ctrl
+ - qcom,msm8976-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: mdp_core
+ - const: iface
+ - const: bus
+ - const: byte
+ - const: pixel
+ - const: core
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8226-dsi-ctrl
+ - qcom,msm8974-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: mdp_core
+ - const: iface
+ - const: bus
+ - const: byte
+ - const: pixel
+ - const: core
+ - const: core_mmss
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8996-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: mdp_core
+ - const: byte
+ - const: iface
+ - const: bus
+ - const: core_mmss
+ - const: pixel
+ - const: core
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,msm8998-dsi-ctrl
+ - qcom,sa8775p-dsi-ctrl
+ - qcom,sar2130p-dsi-ctrl
+ - qcom,sc7180-dsi-ctrl
+ - qcom,sc7280-dsi-ctrl
+ - qcom,sc8180x-dsi-ctrl
+ - qcom,sdm845-dsi-ctrl
+ - qcom,sm6115-dsi-ctrl
+ - qcom,sm6125-dsi-ctrl
+ - qcom,sm6350-dsi-ctrl
+ - qcom,sm6375-dsi-ctrl
+ - qcom,sm6150-dsi-ctrl
+ - qcom,sm7150-dsi-ctrl
+ - qcom,sm8150-dsi-ctrl
+ - qcom,sm8250-dsi-ctrl
+ - qcom,sm8350-dsi-ctrl
+ - qcom,sm8450-dsi-ctrl
+ - qcom,sm8550-dsi-ctrl
+ - qcom,sm8650-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: byte
+ - const: byte_intf
+ - const: pixel
+ - const: core
+ - const: iface
+ - const: bus
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sm8750-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 12
+ maxItems: 12
+ clock-names:
+ items:
+ - const: byte
+ - const: byte_intf
+ - const: pixel
+ - const: core
+ - const: iface
+ - const: bus
+ - const: dsi_pll_pixel
+ - const: dsi_pll_byte
+ - const: esync
+ - const: osc
+ - const: byte_src
+ - const: pixel_src
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sdm660-dsi-ctrl
+ then:
+ properties:
+ clocks:
+ minItems: 9
+ maxItems: 9
+ clock-names:
+ items:
+ - const: mdp_core
+ - const: byte
+ - const: byte_intf
+ - const: mnoc
+ - const: iface
+ - const: bus
+ - const: core_mmss
+ - const: pixel
+ - const: core
+ required:
+ - assigned-clocks
+ - assigned-clock-parents
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+
+ dsi@ae94000 {
+ compatible = "qcom,sc7180-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+ reg = <0x0ae94000 0x400>;
+ reg-names = "dsi_ctrl";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&mdss>;
+ interrupts = <4>;
+
+ clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>,
+ <&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>,
+ <&dispcc DISP_CC_MDSS_PCLK0_CLK>,
+ <&dispcc DISP_CC_MDSS_ESC0_CLK>,
+ <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_AXI_CLK>;
+ clock-names = "byte",
+ "byte_intf",
+ "pixel",
+ "core",
+ "iface",
+ "bus";
+
+ phys = <&dsi0_phy>;
+ phy-names = "dsi";
+
+ assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>, <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>;
+ assigned-clock-parents = <&dsi_phy 0>, <&dsi_phy 1>;
+
+ power-domains = <&rpmhpd SC7180_CX>;
+ operating-points-v2 = <&dsi_opp_table>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ endpoint {
+ remote-endpoint = <&dpu_intf1_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ endpoint {
+ remote-endpoint = <&sn65dsi86_in>;
+ data-lanes = <0 1 2 3>;
+ qcom,te-source = "mdp_vsync_e";
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
new file mode 100644
index 000000000000..fc9abf090f0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-10nm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI 10nm PHY
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+allOf:
+ - $ref: dsi-phy-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,dsi-phy-10nm
+ - qcom,dsi-phy-10nm-8998
+
+ reg:
+ items:
+ - description: dsi phy register set
+ - description: dsi phy lane register set
+ - description: dsi pll register set
+
+ reg-names:
+ items:
+ - const: dsi_phy
+ - const: dsi_phy_lane
+ - const: dsi_pll
+
+ vdds-supply:
+ description: |
+ Connected to DSI0_MIPI_DSI_PLL_VDDA0P9 pin for sc7180 target and
+ connected to VDDA_MIPI_DSI_0_PLL_0P9 pin for sdm845 target
+
+ qcom,phy-rescode-offset-top:
+ $ref: /schemas/types.yaml#/definitions/int8-array
+ maxItems: 5
+ description:
+ Integer array of offset for pull-up legs rescode for all five lanes.
+ To offset the drive strength from the calibrated value in an increasing
+ manner, -32 is the weakest and +31 is the strongest.
+ items:
+ minimum: -32
+ maximum: 31
+
+ qcom,phy-rescode-offset-bot:
+ $ref: /schemas/types.yaml#/definitions/int8-array
+ maxItems: 5
+ description:
+ Integer array of offset for pull-down legs rescode for all five lanes.
+ To offset the drive strength from the calibrated value in a decreasing
+ manner, -32 is the weakest and +31 is the strongest.
+ items:
+ minimum: -32
+ maximum: 31
+
+ qcom,phy-drive-ldo-level:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The PHY LDO has an amplitude tuning feature to adjust the LDO output
+ for the HSTX drive. Use supported levels (mV) to offset the drive level
+ from the default value.
+ enum: [ 375, 400, 425, 450, 475, 500 ]
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ dsi-phy@ae94400 {
+ compatible = "qcom,dsi-phy-10nm";
+ reg = <0x0ae94400 0x200>,
+ <0x0ae94600 0x280>,
+ <0x0ae94a00 0x1e0>;
+ reg-names = "dsi_phy",
+ "dsi_phy_lane",
+ "dsi_pll";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ vdds-supply = <&vdda_mipi_dsi0_pll>;
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "ref";
+
+ qcom,phy-rescode-offset-top = /bits/ 8 <0 0 0 0 0>;
+ qcom,phy-rescode-offset-bot = /bits/ 8 <0 0 0 0 0>;
+ qcom,phy-drive-ldo-level = <400>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
new file mode 100644
index 000000000000..206a9a4b3845
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-14nm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI 14nm PHY
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+allOf:
+ - $ref: dsi-phy-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,dsi-phy-14nm
+ - qcom,dsi-phy-14nm-2290
+ - qcom,dsi-phy-14nm-660
+ - qcom,dsi-phy-14nm-8953
+ - qcom,sm6125-dsi-phy-14nm
+ - qcom,sm6150-dsi-phy-14nm
+
+ reg:
+ items:
+ - description: dsi phy register set
+ - description: dsi phy lane register set
+ - description: dsi pll register set
+
+ reg-names:
+ items:
+ - const: dsi_phy
+ - const: dsi_phy_lane
+ - const: dsi_pll
+
+ vcca-supply:
+ description: Phandle to vcca regulator device node.
+
+ power-domains:
+ description:
+ A phandle and PM domain specifier for an optional power domain.
+ maxItems: 1
+
+ required-opps:
+ description:
+ A phandle to an OPP node describing the power domain's performance point.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ dsi-phy@ae94400 {
+ compatible = "qcom,dsi-phy-14nm";
+ reg = <0x0ae94400 0x200>,
+ <0x0ae94600 0x280>,
+ <0x0ae94a00 0x1e0>;
+ reg-names = "dsi_phy",
+ "dsi_phy_lane",
+ "dsi_pll";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ vcca-supply = <&vcca_reg>;
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "ref";
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-20nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-20nm.yaml
new file mode 100644
index 000000000000..93570052992a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-20nm.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-20nm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI 20nm PHY
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+allOf:
+ - $ref: dsi-phy-common.yaml#
+
+properties:
+ compatible:
+ const: qcom,dsi-phy-20nm
+
+ reg:
+ items:
+ - description: dsi pll register set
+ - description: dsi phy register set
+ - description: dsi phy regulator register set
+
+ reg-names:
+ items:
+ - const: dsi_pll
+ - const: dsi_phy
+ - const: dsi_phy_regulator
+
+ vcca-supply:
+ description: Phandle to vcca regulator device node.
+
+ vddio-supply:
+ description: Phandle to vdd-io regulator device node.
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - vddio-supply
+ - vcca-supply
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ dsi-phy@fd922a00 {
+ compatible = "qcom,dsi-phy-20nm";
+ reg = <0xfd922a00 0xd4>,
+ <0xfd922b00 0x2b0>,
+ <0xfd922d80 0x7b>;
+ reg-names = "dsi_pll",
+ "dsi_phy",
+ "dsi_phy_regulator";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ vcca-supply = <&vcca_reg>;
+ vddio-supply = <&vddio_reg>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "ref";
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
new file mode 100644
index 000000000000..371befa9f9d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-28nm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI 28nm PHY
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+allOf:
+ - $ref: dsi-phy-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,dsi-phy-28nm-8226
+ - qcom,dsi-phy-28nm-8937
+ - qcom,dsi-phy-28nm-8960
+ - qcom,dsi-phy-28nm-hpm
+ - qcom,dsi-phy-28nm-hpm-fam-b
+ - qcom,dsi-phy-28nm-lp
+
+ reg:
+ items:
+ - description: dsi pll register set
+ - description: dsi phy register set
+ - description: dsi phy regulator register set
+
+ reg-names:
+ items:
+ - const: dsi_pll
+ - const: dsi_phy
+ - const: dsi_phy_regulator
+
+ vddio-supply:
+ description: Phandle to vdd-io regulator device node.
+
+ qcom,dsi-phy-regulator-ldo-mode:
+ type: boolean
+ description: Indicates if the LDO mode PHY regulator is wanted.
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - vddio-supply
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ dsi-phy@fd922a00 {
+ compatible = "qcom,dsi-phy-28nm-lp";
+ reg = <0xfd922a00 0xd4>,
+ <0xfd922b00 0x2b0>,
+ <0xfd922d80 0x7b>;
+ reg-names = "dsi_pll",
+ "dsi_phy",
+ "dsi_phy_regulator";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ vddio-supply = <&vddio_reg>;
+
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "ref";
+ };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
new file mode 100644
index 000000000000..1ca820a500b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-7nm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI 7nm PHY
+
+maintainers:
+ - Jonathan Marek <jonathan@marek.ca>
+
+allOf:
+ - $ref: dsi-phy-common.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,dsi-phy-7nm
+ - qcom,dsi-phy-7nm-8150
+ - qcom,sa8775p-dsi-phy-5nm
+ - qcom,sar2130p-dsi-phy-5nm
+ - qcom,sc7280-dsi-phy-7nm
+ - qcom,sm6375-dsi-phy-7nm
+ - qcom,sm8350-dsi-phy-5nm
+ - qcom,sm8450-dsi-phy-5nm
+ - qcom,sm8550-dsi-phy-4nm
+ - qcom,sm8650-dsi-phy-4nm
+ - qcom,sm8750-dsi-phy-3nm
+
+ reg:
+ items:
+ - description: dsi phy register set
+ - description: dsi phy lane register set
+ - description: dsi pll register set
+
+ reg-names:
+ items:
+ - const: dsi_phy
+ - const: dsi_phy_lane
+ - const: dsi_pll
+
+ vdds-supply:
+ description: |
+ Connected to VDD_A_DSI_PLL_0P9 pin (or VDDA_DSI{0,1}_PLL_0P9 for sm8150)
+
+ phy-type:
+ description: D-PHY (default) or C-PHY mode
+ enum: [ 10, 11 ]
+ default: 10
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,dispcc-sm8250.h>
+ #include <dt-bindings/clock/qcom,rpmh.h>
+
+ dsi-phy@ae94400 {
+ compatible = "qcom,dsi-phy-7nm";
+ reg = <0x0ae94400 0x200>,
+ <0x0ae94600 0x280>,
+ <0x0ae94900 0x260>;
+ reg-names = "dsi_phy",
+ "dsi_phy_lane",
+ "dsi_pll";
+
+ #clock-cells = <1>;
+ #phy-cells = <0>;
+
+ vdds-supply = <&vreg_l5a_0p88>;
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&rpmhcc RPMH_CXO_CLK>;
+ clock-names = "iface", "ref";
+ };
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-common.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-common.yaml
new file mode 100644
index 000000000000..d0ce85a08b6d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-common.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/dsi-phy-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display DSI PHY Common Properties
+
+maintainers:
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
+
+description:
+ Common properties for Qualcomm Display DSI PHY.
+
+properties:
+ "#clock-cells":
+ const: 1
+ description:
+ See include/dt-bindings/clock/qcom,dsi-phy-28nm.h for clock IDs.
+
+ "#phy-cells":
+ const: 0
+
+ clocks:
+ items:
+ - description: Display AHB clock
+ - description: Board XO source
+
+ clock-names:
+ items:
+ - const: iface
+ - const: ref
+
+required:
+ - clocks
+ - clock-names
+ - "#clock-cells"
+ - "#phy-cells"
+
+additionalProperties: true
+...
diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt
deleted file mode 100644
index dfc743219bd8..000000000000
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-Qualcomm Technologies Inc. adreno/snapdragon DSI output
-
-DSI Controller:
-Required properties:
-- compatible:
- * "qcom,mdss-dsi-ctrl"
-- reg: Physical base address and length of the registers of controller
-- reg-names: The names of register regions. The following regions are required:
- * "dsi_ctrl"
-- interrupts: The interrupt signal from the DSI block.
-- power-domains: Should be <&mmcc MDSS_GDSC>.
-- clocks: Phandles to device clocks.
-- clock-names: the following clocks are required:
- * "mdp_core"
- * "iface"
- * "bus"
- * "core_mmss"
- * "byte"
- * "pixel"
- * "core"
- For DSIv2, we need an additional clock:
- * "src"
- For DSI6G v2.0 onwards, we need also need the clock:
- * "byte_intf"
-- assigned-clocks: Parents of "byte" and "pixel" for the given platform.
-- assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided
- by a DSI PHY block. See [1] for details on clock bindings.
-- vdd-supply: phandle to vdd regulator device node
-- vddio-supply: phandle to vdd-io regulator device node
-- vdda-supply: phandle to vdda regulator device node
-- phys: phandle to DSI PHY device node
-- phy-names: the name of the corresponding PHY device
-- syscon-sfpb: A phandle to mmss_sfpb syscon node (only for DSIv2)
-- ports: Contains 2 DSI controller ports as child nodes. Each port contains
- an endpoint subnode as defined in [2] and [3].
-
-Optional properties:
-- panel@0: Node of panel connected to this DSI controller.
- See files in [4] for each supported panel.
-- qcom,dual-dsi-mode: Boolean value indicating if the DSI controller is
- driving a panel which needs 2 DSI links.
-- qcom,master-dsi: Boolean value indicating if the DSI controller is driving
- the master link of the 2-DSI panel.
-- qcom,sync-dual-dsi: Boolean value indicating if the DSI controller is
- driving a 2-DSI panel whose 2 links need receive command simultaneously.
-- pinctrl-names: the pin control state names; should contain "default"
-- pinctrl-0: the default pinctrl state (active)
-- pinctrl-n: the "sleep" pinctrl state
-- ports: contains DSI controller input and output ports as children, each
- containing one endpoint subnode.
-
- DSI Endpoint properties:
- - remote-endpoint: For port@0, set to phandle of the connected panel/bridge's
- input endpoint. For port@1, set to the MDP interface output. See [2] for
- device graph info.
-
- - data-lanes: this describes how the physical DSI data lanes are mapped
- to the logical lanes on the given platform. The value contained in
- index n describes what physical lane is mapped to the logical lane n
- (DATAn, where n lies between 0 and 3). The clock lane position is fixed
- and can't be changed. Hence, they aren't a part of the DT bindings. See
- [3] for more info on the data-lanes property.
-
- For example:
-
- data-lanes = <3 0 1 2>;
-
- The above mapping describes that the logical data lane DATA0 is mapped to
- the physical data lane DATA3, logical DATA1 to physical DATA0, logic DATA2
- to phys DATA1 and logic DATA3 to phys DATA2.
-
- There are only a limited number of physical to logical mappings possible:
- <0 1 2 3>
- <1 2 3 0>
- <2 3 0 1>
- <3 0 1 2>
- <0 3 2 1>
- <1 0 3 2>
- <2 1 0 3>
- <3 2 1 0>
-
-DSI PHY:
-Required properties:
-- compatible: Could be the following
- * "qcom,dsi-phy-28nm-hpm"
- * "qcom,dsi-phy-28nm-lp"
- * "qcom,dsi-phy-20nm"
- * "qcom,dsi-phy-28nm-8960"
- * "qcom,dsi-phy-14nm"
- * "qcom,dsi-phy-10nm"
-- reg: Physical base address and length of the registers of PLL, PHY. Some
- revisions require the PHY regulator base address, whereas others require the
- PHY lane base address. See below for each PHY revision.
-- reg-names: The names of register regions. The following regions are required:
- For DSI 28nm HPM/LP/8960 PHYs and 20nm PHY:
- * "dsi_pll"
- * "dsi_phy"
- * "dsi_phy_regulator"
- For DSI 14nm and 10nm PHYs:
- * "dsi_pll"
- * "dsi_phy"
- * "dsi_phy_lane"
-- clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating
- 2 clocks: A byte clock (index 0), and a pixel clock (index 1).
-- power-domains: Should be <&mmcc MDSS_GDSC>.
-- clocks: Phandles to device clocks. See [1] for details on clock bindings.
-- clock-names: the following clocks are required:
- * "iface"
- For 28nm HPM/LP, 28nm 8960 PHYs:
-- vddio-supply: phandle to vdd-io regulator device node
- For 20nm PHY:
-- vddio-supply: phandle to vdd-io regulator device node
-- vcca-supply: phandle to vcca regulator device node
- For 14nm PHY:
-- vcca-supply: phandle to vcca regulator device node
- For 10nm PHY:
-- vdds-supply: phandle to vdds regulator device node
-
-Optional properties:
-- qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY
- regulator is wanted.
-- qcom,mdss-mdp-transfer-time-us: Specifies the dsi transfer time for command mode
- panels in microseconds. Driver uses this number to adjust
- the clock rate according to the expected transfer time.
- Increasing this value would slow down the mdp processing
- and can result in slower performance.
- Decreasing this value can speed up the mdp processing,
- but this can also impact power consumption.
- As a rule this time should not be higher than the time
- that would be expected with the processing at the
- dsi link rate since anyways this would be the maximum
- transfer time that could be achieved.
- If ping pong split is enabled, this time should not be higher
- than two times the dsi link rate time.
- If the property is not specified, then the default value is 14000 us.
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/graph.txt
-[3] Documentation/devicetree/bindings/media/video-interfaces.txt
-[4] Documentation/devicetree/bindings/display/panel/
-
-Example:
- dsi0: dsi@fd922800 {
- compatible = "qcom,mdss-dsi-ctrl";
- qcom,dsi-host-index = <0>;
- interrupt-parent = <&mdp>;
- interrupts = <4 0>;
- reg-names = "dsi_ctrl";
- reg = <0xfd922800 0x200>;
- power-domains = <&mmcc MDSS_GDSC>;
- clock-names =
- "bus",
- "byte",
- "core",
- "core_mmss",
- "iface",
- "mdp_core",
- "pixel";
- clocks =
- <&mmcc MDSS_AXI_CLK>,
- <&mmcc MDSS_BYTE0_CLK>,
- <&mmcc MDSS_ESC0_CLK>,
- <&mmcc MMSS_MISC_AHB_CLK>,
- <&mmcc MDSS_AHB_CLK>,
- <&mmcc MDSS_MDP_CLK>,
- <&mmcc MDSS_PCLK0_CLK>;
-
- assigned-clocks =
- <&mmcc BYTE0_CLK_SRC>,
- <&mmcc PCLK0_CLK_SRC>;
- assigned-clock-parents =
- <&dsi_phy0 0>,
- <&dsi_phy0 1>;
-
- vdda-supply = <&pma8084_l2>;
- vdd-supply = <&pma8084_l22>;
- vddio-supply = <&pma8084_l12>;
-
- phys = <&dsi_phy0>;
- phy-names ="dsi-phy";
-
- qcom,dual-dsi-mode;
- qcom,master-dsi;
- qcom,sync-dual-dsi;
-
- qcom,mdss-mdp-transfer-time-us = <12000>;
-
- pinctrl-names = "default", "sleep";
- pinctrl-0 = <&dsi_active>;
- pinctrl-1 = <&dsi_suspend>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- dsi0_in: endpoint {
- remote-endpoint = <&mdp_intf1_out>;
- };
- };
-
- port@1 {
- reg = <1>;
- dsi0_out: endpoint {
- remote-endpoint = <&panel_in>;
- data-lanes = <0 1 2 3>;
- };
- };
- };
-
- panel: panel@0 {
- compatible = "sharp,lq101r1sx01";
- reg = <0>;
- link2 = <&secondary>;
-
- power-supply = <...>;
- backlight = <...>;
-
- port {
- panel_in: endpoint {
- remote-endpoint = <&dsi0_out>;
- };
- };
- };
- };
-
- dsi_phy0: dsi-phy@fd922a00 {
- compatible = "qcom,dsi-phy-28nm-hpm";
- qcom,dsi-phy-index = <0>;
- reg-names =
- "dsi_pll",
- "dsi_phy",
- "dsi_phy_regulator";
- reg = <0xfd922a00 0xd4>,
- <0xfd922b00 0x2b0>,
- <0xfd922d80 0x7b>;
- clock-names = "iface";
- clocks = <&mmcc MDSS_AHB_CLK>;
- #clock-cells = <1>;
- vddio-supply = <&pma8084_l12>;
-
- qcom,dsi-phy-regulator-ldo-mode;
- };
diff --git a/Documentation/devicetree/bindings/display/msm/edp.txt b/Documentation/devicetree/bindings/display/msm/edp.txt
deleted file mode 100644
index eff9daff418c..000000000000
--- a/Documentation/devicetree/bindings/display/msm/edp.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Qualcomm Technologies Inc. adreno/snapdragon eDP output
-
-Required properties:
-- compatible:
- * "qcom,mdss-edp"
-- reg: Physical base address and length of the registers of controller and PLL
-- reg-names: The names of register regions. The following regions are required:
- * "edp"
- * "pll_base"
-- interrupts: The interrupt signal from the eDP block.
-- power-domains: Should be <&mmcc MDSS_GDSC>.
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- clock-names: the following clocks are required:
- * "core"
- * "iface"
- * "mdp_core"
- * "pixel"
- * "link"
-- #clock-cells: The value should be 1.
-- vdda-supply: phandle to vdda regulator device node
-- lvl-vdd-supply: phandle to regulator device node which is used to supply power
- to HPD receiving chip
-- panel-en-gpios: GPIO pin to supply power to panel.
-- panel-hpd-gpios: GPIO pin used for eDP hpd.
-
-
-Example:
- mdss_edp: qcom,mdss_edp@fd923400 {
- compatible = "qcom,mdss-edp";
- reg-names =
- "edp",
- "pll_base";
- reg = <0xfd923400 0x700>,
- <0xfd923a00 0xd4>;
- interrupt-parent = <&mdss_mdp>;
- interrupts = <12 0>;
- power-domains = <&mmcc MDSS_GDSC>;
- clock-names =
- "core",
- "pixel",
- "iface",
- "link",
- "mdp_core";
- clocks =
- <&mmcc MDSS_EDPAUX_CLK>,
- <&mmcc MDSS_EDPPIXEL_CLK>,
- <&mmcc MDSS_AHB_CLK>,
- <&mmcc MDSS_EDPLINK_CLK>,
- <&mmcc MDSS_MDP_CLK>;
- #clock-cells = <1>;
- vdda-supply = <&pma8084_l12>;
- lvl-vdd-supply = <&lvl_vreg>;
- panel-en-gpios = <&tlmm 137 0>;
- panel-hpd-gpios = <&tlmm 103 0>;
- };
diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml
new file mode 100644
index 000000000000..afc187935744
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml
@@ -0,0 +1,364 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright 2019-2020, The Linux Foundation, All Rights Reserved
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/display/msm/gmu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GMU attached to certain Adreno GPUs
+
+maintainers:
+ - Rob Clark <robdclark@gmail.com>
+
+description: |
+ These bindings describe the Graphics Management Unit (GMU) that is attached
+ to members of the Adreno A6xx GPU family. The GMU provides on-device power
+ management and support to improve power efficiency and reduce the load on
+ the CPU.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - pattern: '^qcom,adreno-gmu-[67][0-9][0-9]\.[0-9]$'
+ - const: qcom,adreno-gmu
+ - items:
+ - pattern: '^qcom,adreno-gmu-x[1-9][0-9][0-9]\.[0-9]$'
+ - const: qcom,adreno-gmu
+ - const: qcom,adreno-gmu-wrapper
+
+ reg:
+ minItems: 1
+ maxItems: 4
+
+ reg-names:
+ minItems: 1
+ maxItems: 4
+
+ clocks:
+ minItems: 4
+ maxItems: 7
+
+ clock-names:
+ minItems: 4
+ maxItems: 7
+
+ interrupts:
+ items:
+ - description: GMU HFI interrupt
+ - description: GMU interrupt
+
+ interrupt-names:
+ items:
+ - const: hfi
+ - const: gmu
+
+ power-domains:
+ items:
+ - description: CX power domain
+ - description: GX power domain
+
+ power-domain-names:
+ items:
+ - const: cx
+ - const: gx
+
+ iommus:
+ maxItems: 1
+
+ qcom,qmp:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: Reference to the AOSS side-channel message RAM
+
+ operating-points-v2: true
+
+ opp-table:
+ type: object
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - power-domains
+ - power-domain-names
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-618.0
+ - qcom,adreno-gmu-630.2
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: GMU PDC registers
+ - description: GMU PDC sequence registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: gmu_pdc
+ - const: gmu_pdc_seq
+ clocks:
+ items:
+ - description: GMU clock
+ - description: GPU CX clock
+ - description: GPU AXI clock
+ - description: GPU MEMNOC clock
+ clock-names:
+ items:
+ - const: gmu
+ - const: cxo
+ - const: axi
+ - const: memnoc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-623.0
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: Resource controller registers
+ - description: GMU PDC registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: rscc
+ - const: gmu_pdc
+ clocks:
+ items:
+ - description: GMU clock
+ - description: GPU CX clock
+ - description: GPU AXI clock
+ - description: GPU MEMNOC clock
+ - description: GPU AHB clock
+ - description: GPU HUB CX clock
+ clock-names:
+ items:
+ - const: gmu
+ - const: cxo
+ - const: axi
+ - const: memnoc
+ - const: ahb
+ - const: hub
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-635.0
+ - qcom,adreno-gmu-660.1
+ - qcom,adreno-gmu-663.0
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: Resource controller registers
+ - description: GMU PDC registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: rscc
+ - const: gmu_pdc
+ clocks:
+ items:
+ - description: GMU clock
+ - description: GPU CX clock
+ - description: GPU AXI clock
+ - description: GPU MEMNOC clock
+ - description: GPU AHB clock
+ - description: GPU HUB CX clock
+ - description: GPU SMMU vote clock
+ clock-names:
+ items:
+ - const: gmu
+ - const: cxo
+ - const: axi
+ - const: memnoc
+ - const: ahb
+ - const: hub
+ - const: smmu_vote
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-640.1
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: GMU PDC registers
+ - description: GMU PDC sequence registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: gmu_pdc
+ - const: gmu_pdc_seq
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-650.2
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: Resource controller registers
+ - description: GMU PDC registers
+ - description: GMU PDC sequence registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: rscc
+ - const: gmu_pdc
+ - const: gmu_pdc_seq
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-640.1
+ - qcom,adreno-gmu-650.2
+ then:
+ properties:
+ clocks:
+ items:
+ - description: GPU AHB clock
+ - description: GMU clock
+ - description: GPU CX clock
+ - description: GPU AXI clock
+ - description: GPU MEMNOC clock
+ clock-names:
+ items:
+ - const: ahb
+ - const: gmu
+ - const: cxo
+ - const: axi
+ - const: memnoc
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-gmu-730.1
+ - qcom,adreno-gmu-740.1
+ - qcom,adreno-gmu-750.1
+ - qcom,adreno-gmu-x185.1
+ then:
+ properties:
+ reg:
+ items:
+ - description: Core GMU registers
+ - description: Resource controller registers
+ - description: GMU PDC registers
+ reg-names:
+ items:
+ - const: gmu
+ - const: rscc
+ - const: gmu_pdc
+ clocks:
+ items:
+ - description: GPU AHB clock
+ - description: GMU clock
+ - description: GPU CX clock
+ - description: GPU AXI clock
+ - description: GPU MEMNOC clock
+ - description: GMU HUB clock
+ - description: GPUSS DEMET clock
+ clock-names:
+ items:
+ - const: ahb
+ - const: gmu
+ - const: cxo
+ - const: axi
+ - const: memnoc
+ - const: hub
+ - const: demet
+
+ required:
+ - qcom,qmp
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: qcom,adreno-gmu-wrapper
+ then:
+ properties:
+ reg:
+ items:
+ - description: GMU wrapper register space
+ reg-names:
+ items:
+ - const: gmu
+ else:
+ required:
+ - clocks
+ - clock-names
+ - interrupts
+ - interrupt-names
+ - iommus
+ - operating-points-v2
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gpucc-sdm845.h>
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gmu: gmu@506a000 {
+ compatible = "qcom,adreno-gmu-630.2", "qcom,adreno-gmu";
+
+ reg = <0x506a000 0x30000>,
+ <0xb280000 0x10000>,
+ <0xb480000 0x10000>;
+ reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+
+ clocks = <&gpucc GPU_CC_CX_GMU_CLK>,
+ <&gpucc GPU_CC_CXO_CLK>,
+ <&gcc GCC_DDRSS_GPU_AXI_CLK>,
+ <&gcc GCC_GPU_MEMNOC_GFX_CLK>;
+ clock-names = "gmu", "cxo", "axi", "memnoc";
+
+ interrupts = <GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "hfi", "gmu";
+
+ power-domains = <&gpucc GPU_CX_GDSC>,
+ <&gpucc GPU_GX_GDSC>;
+ power-domain-names = "cx", "gx";
+
+ iommus = <&adreno_smmu 5>;
+ operating-points-v2 = <&gmu_opp_table>;
+ };
+
+ gmu_wrapper: gmu@596a000 {
+ compatible = "qcom,adreno-gmu-wrapper";
+ reg = <0x0596a000 0x30000>;
+ reg-names = "gmu";
+ power-domains = <&gpucc GPU_CX_GDSC>,
+ <&gpucc GPU_GX_GDSC>;
+ power-domain-names = "cx", "gx";
+ };
diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt
deleted file mode 100644
index 43fac0fe09bb..000000000000
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Qualcomm adreno/snapdragon GPU
-
-Required properties:
-- compatible: "qcom,adreno-XYZ.W", "qcom,adreno"
- for example: "qcom,adreno-306.0", "qcom,adreno"
- Note that you need to list the less specific "qcom,adreno" (since this
- is what the device is matched on), in addition to the more specific
- with the chip-id.
-- reg: Physical base address and length of the controller's registers.
-- interrupts: The interrupt signal from the gpu.
-- clocks: device clocks
- See ../clocks/clock-bindings.txt for details.
-- clock-names: the following clocks are required:
- * "core"
- * "iface"
- * "mem_iface"
-
-Example:
-
-/ {
- ...
-
- gpu: qcom,kgsl-3d0@4300000 {
- compatible = "qcom,adreno-320.2", "qcom,adreno";
- reg = <0x04300000 0x20000>;
- reg-names = "kgsl_3d0_reg_memory";
- interrupts = <GIC_SPI 80 0>;
- interrupt-names = "kgsl_3d0_irq";
- clock-names =
- "core",
- "iface",
- "mem_iface";
- clocks =
- <&mmcc GFX3D_CLK>,
- <&mmcc GFX3D_AHB_CLK>,
- <&mmcc MMSS_IMEM_AHB_CLK>;
- };
-};
diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml
new file mode 100644
index 000000000000..3696b083e353
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml
@@ -0,0 +1,535 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/display/msm/gpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Adreno or Snapdragon GPUs
+
+maintainers:
+ - Rob Clark <robdclark@gmail.com>
+
+# dtschema does not select nodes based on pattern+const, so add custom select
+# as a work-around:
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno
+ - amd,imageon
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - description: |
+ The driver is parsing the compat string for Adreno to
+ figure out the chip-id.
+ items:
+ - pattern: '^qcom,adreno-[0-9a-f]{8}$'
+ - const: qcom,adreno
+ - description: |
+ The driver is parsing the compat string for Adreno to
+ figure out the gpu-id and patch level.
+ items:
+ - pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]+$'
+ - const: qcom,adreno
+ - description: |
+ The driver is parsing the compat string for Imageon to
+ figure out the gpu-id and patch level.
+ items:
+ - pattern: '^amd,imageon-200\.[0-1]$'
+ - const: amd,imageon
+
+ clocks:
+ minItems: 2
+ maxItems: 7
+
+ clock-names:
+ minItems: 2
+ maxItems: 7
+
+ reg:
+ minItems: 1
+ maxItems: 3
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: kgsl_3d0_reg_memory
+ - const: cx_mem
+ - const: cx_dbgc
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ maxItems: 1
+
+ interconnects:
+ minItems: 1
+ maxItems: 2
+
+ interconnect-names:
+ minItems: 1
+ items:
+ - const: gfx-mem
+ - const: ocmem
+
+ iommus:
+ minItems: 1
+ maxItems: 64
+
+ sram:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 4
+ items:
+ maxItems: 1
+ description: |
+ phandles to one or more reserved on-chip SRAM regions.
+ phandle to the On Chip Memory (OCMEM) that's present on some a3xx and
+ a4xx Snapdragon SoCs. See
+ Documentation/devicetree/bindings/sram/qcom,ocmem.yaml
+
+ operating-points-v2: true
+ opp-table:
+ type: object
+
+ power-domains:
+ maxItems: 1
+
+ zap-shader:
+ type: object
+ additionalProperties: false
+ description: |
+ For a5xx and a6xx devices this node contains a memory-region that
+ points to reserved memory to store the zap shader that can be used to
+ help bring the GPU out of secure mode.
+ properties:
+ memory-region:
+ maxItems: 1
+
+ firmware-name:
+ description: |
+ Default name of the firmware to load to the remote processor.
+
+ "#cooling-cells":
+ const: 2
+
+ nvmem-cell-names:
+ maxItems: 1
+
+ nvmem-cells:
+ description: efuse registers
+ maxItems: 1
+
+ qcom,gmu:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: |
+ For GMU attached devices a phandle to the GMU device that will
+ control the power for the GPU.
+
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ oneOf:
+ - pattern: '^qcom,adreno-305\.[0-9]+$'
+ - pattern: '^qcom,adreno-330\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-306\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 5
+ maxItems: 6
+ clock-names:
+ oneOf:
+ - items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: alt_mem_iface
+ description: GPU Alternative Memory Interface clock
+ - const: gfx3d
+ description: GPU 3D engine clock
+ - items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: alt_mem_iface
+ description: GPU Alternative Memory Interface clock
+ - const: gfx3d
+ description: GPU 3D engine clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-320\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ maxItems: 4
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-405\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: alt_mem_iface
+ description: GPU Alternative Memory Interface clock
+ - const: gfx3d
+ description: GPU 3D engine clock
+ - const: rbbmtimer
+ description: GPU RBBM Timer for Adreno 5xx series
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-50[56]\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: alt_mem_iface
+ description: GPU Alternative Memory Interface clock
+ - const: rbbmtimer
+ description: GPU RBBM Timer for Adreno 5xx series
+ - const: alwayson
+ description: GPU AON clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ oneOf:
+ - pattern: '^qcom,adreno-508\.[0-9]+$'
+ - pattern: '^qcom,adreno-509\.[0-9]+$'
+ - pattern: '^qcom,adreno-512\.[0-9]+$'
+ - pattern: '^qcom,adreno-540\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: iface
+ description: GPU Interface clock
+ - const: rbbmtimer
+ description: GPU RBBM Timer for Adreno 5xx series
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: rbcpr
+ description: GPU RB Core Power Reduction clock
+ - const: core
+ description: GPU Core clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-510\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: rbbmtimer
+ description: GPU RBBM Timer for Adreno 5xx series
+ - const: alwayson
+ description: GPU AON clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ pattern: '^qcom,adreno-530\.[0-9]+$'
+ then:
+ properties:
+ clocks:
+ minItems: 5
+ maxItems: 5
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: rbbmtimer
+ description: GPU RBBM Timer for Adreno 5xx series
+ - const: mem
+ description: GPU Memory clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,adreno-610.0
+ - qcom,adreno-619.1
+ - qcom,adreno-07000200
+ then:
+ properties:
+ clocks:
+ minItems: 6
+ maxItems: 6
+
+ clock-names:
+ items:
+ - const: core
+ description: GPU Core clock
+ - const: iface
+ description: GPU Interface clock
+ - const: mem_iface
+ description: GPU Memory Interface clock
+ - const: alt_mem_iface
+ description: GPU Alternative Memory Interface clock
+ - const: gmu
+ description: CX GMU clock
+ - const: xo
+ description: GPUCC clocksource clock
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: kgsl_3d0_reg_memory
+ - const: cx_dbgc
+
+ required:
+ - clocks
+ - clock-names
+ else:
+ if:
+ properties:
+ compatible:
+ contains:
+ oneOf:
+ - pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$'
+ - pattern: '^qcom,adreno-[0-9a-f]{8}$'
+
+ then: # Starting with A6xx, the clocks are usually defined in the GMU node
+ properties:
+ clocks: false
+ clock-names: false
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: kgsl_3d0_reg_memory
+ - const: cx_mem
+ - const: cx_dbgc
+
+examples:
+ - |
+
+ // Example a3xx/4xx:
+
+ #include <dt-bindings/clock/qcom,mmcc-msm8974.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ gpu: gpu@fdb00000 {
+ compatible = "qcom,adreno-330.2", "qcom,adreno";
+
+ reg = <0xfdb00000 0x10000>;
+ reg-names = "kgsl_3d0_reg_memory";
+
+ clock-names = "core", "iface", "mem_iface";
+ clocks = <&mmcc OXILI_GFX3D_CLK>,
+ <&mmcc OXILICX_AHB_CLK>,
+ <&mmcc OXILICX_AXI_CLK>;
+
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "kgsl_3d0_irq";
+
+ sram = <&gpu_sram>;
+ power-domains = <&mmcc OXILICX_GDSC>;
+ operating-points-v2 = <&gpu_opp_table>;
+ iommus = <&gpu_iommu 0>;
+ #cooling-cells = <2>;
+ };
+
+ ocmem@fdd00000 {
+ compatible = "qcom,msm8974-ocmem";
+
+ reg = <0xfdd00000 0x2000>,
+ <0xfec00000 0x180000>;
+ reg-names = "ctrl", "mem";
+
+ clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>,
+ <&mmcc OCMEMCX_OCMEMNOC_CLK>;
+ clock-names = "core", "iface";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0xfec00000 0x100000>;
+
+ gpu_sram: gpu-sram@0 {
+ reg = <0x0 0x100000>;
+ };
+ };
+ - |
+
+ // Example a6xx (with GMU):
+
+ #include <dt-bindings/clock/qcom,gpucc-sdm845.h>
+ #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interconnect/qcom,sdm845.h>
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ zap_shader_region: gpu@8f200000 {
+ compatible = "shared-dma-pool";
+ reg = <0x0 0x90b00000 0x0 0xa00000>;
+ no-map;
+ };
+ };
+
+ gpu@5000000 {
+ compatible = "qcom,adreno-630.2", "qcom,adreno";
+
+ reg = <0x5000000 0x40000>, <0x509e000 0x10>;
+ reg-names = "kgsl_3d0_reg_memory", "cx_mem";
+
+ #cooling-cells = <2>;
+
+ interrupts = <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>;
+
+ iommus = <&adreno_smmu 0>;
+
+ operating-points-v2 = <&gpu_opp_table>;
+
+ interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>;
+ interconnect-names = "gfx-mem";
+
+ qcom,gmu = <&gmu>;
+
+ gpu_opp_table: opp-table {
+ compatible = "operating-points-v2";
+
+ opp-430000000 {
+ opp-hz = /bits/ 64 <430000000>;
+ opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>;
+ opp-peak-kBps = <5412000>;
+ };
+
+ opp-355000000 {
+ opp-hz = /bits/ 64 <355000000>;
+ opp-level = <RPMH_REGULATOR_LEVEL_SVS>;
+ opp-peak-kBps = <3072000>;
+ };
+
+ opp-267000000 {
+ opp-hz = /bits/ 64 <267000000>;
+ opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>;
+ opp-peak-kBps = <3072000>;
+ };
+
+ opp-180000000 {
+ opp-hz = /bits/ 64 <180000000>;
+ opp-level = <RPMH_REGULATOR_LEVEL_MIN_SVS>;
+ opp-peak-kBps = <1804000>;
+ };
+ };
+
+ zap-shader {
+ memory-region = <&zap_shader_region>;
+ firmware-name = "qcom/LENOVO/81JL/qcdxkmsuc850.mbn";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/display/msm/hdmi.txt b/Documentation/devicetree/bindings/display/msm/hdmi.txt
deleted file mode 100644
index 5f90a40da51b..000000000000
--- a/Documentation/devicetree/bindings/display/msm/hdmi.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-Qualcomm adreno/snapdragon hdmi output
-
-Required properties:
-- compatible: one of the following
- * "qcom,hdmi-tx-8996"
- * "qcom,hdmi-tx-8994"
- * "qcom,hdmi-tx-8084"
- * "qcom,hdmi-tx-8974"
- * "qcom,hdmi-tx-8660"
- * "qcom,hdmi-tx-8960"
-- reg: Physical base address and length of the controller's registers
-- reg-names: "core_physical"
-- interrupts: The interrupt signal from the hdmi block.
-- power-domains: Should be <&mmcc MDSS_GDSC>.
-- clocks: device clocks
- See ../clocks/clock-bindings.txt for details.
-- core-vdda-supply: phandle to supply regulator
-- hdmi-mux-supply: phandle to mux regulator
-- phys: the phandle for the HDMI PHY device
-- phy-names: the name of the corresponding PHY device
-
-Optional properties:
-- hpd-gpios: hpd pin
-- qcom,hdmi-tx-mux-en-gpios: hdmi mux enable pin
-- qcom,hdmi-tx-mux-sel-gpios: hdmi mux select pin
-- qcom,hdmi-tx-mux-lpm-gpios: hdmi mux lpm pin
-- power-domains: reference to the power domain(s), if available.
-- pinctrl-names: the pin control state names; should contain "default"
-- pinctrl-0: the default pinctrl state (active)
-- pinctrl-1: the "sleep" pinctrl state
-
-HDMI PHY:
-Required properties:
-- compatible: Could be the following
- * "qcom,hdmi-phy-8660"
- * "qcom,hdmi-phy-8960"
- * "qcom,hdmi-phy-8974"
- * "qcom,hdmi-phy-8084"
- * "qcom,hdmi-phy-8996"
-- #phy-cells: Number of cells in a PHY specifier; Should be 0.
-- reg: Physical base address and length of the registers of the PHY sub blocks.
-- reg-names: The names of register regions. The following regions are required:
- * "hdmi_phy"
- * "hdmi_pll"
- For HDMI PHY on msm8996, these additional register regions are required:
- * "hdmi_tx_l0"
- * "hdmi_tx_l1"
- * "hdmi_tx_l3"
- * "hdmi_tx_l4"
-- power-domains: Should be <&mmcc MDSS_GDSC>.
-- clocks: device clocks
- See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
-- core-vdda-supply: phandle to vdda regulator device node
-
-Example:
-
-/ {
- ...
-
- hdmi: hdmi@4a00000 {
- compatible = "qcom,hdmi-tx-8960";
- reg-names = "core_physical";
- reg = <0x04a00000 0x2f0>;
- interrupts = <GIC_SPI 79 0>;
- power-domains = <&mmcc MDSS_GDSC>;
- clock-names =
- "core",
- "master_iface",
- "slave_iface";
- clocks =
- <&mmcc HDMI_APP_CLK>,
- <&mmcc HDMI_M_AHB_CLK>,
- <&mmcc HDMI_S_AHB_CLK>;
- qcom,hdmi-tx-ddc-clk = <&msmgpio 70 GPIO_ACTIVE_HIGH>;
- qcom,hdmi-tx-ddc-data = <&msmgpio 71 GPIO_ACTIVE_HIGH>;
- qcom,hdmi-tx-hpd = <&msmgpio 72 GPIO_ACTIVE_HIGH>;
- core-vdda-supply = <&pm8921_hdmi_mvs>;
- hdmi-mux-supply = <&ext_3p3v>;
- pinctrl-names = "default", "sleep";
- pinctrl-0 = <&hpd_active &ddc_active &cec_active>;
- pinctrl-1 = <&hpd_suspend &ddc_suspend &cec_suspend>;
-
- phys = <&hdmi_phy>;
- phy-names = "hdmi_phy";
- };
-
- hdmi_phy: phy@4a00400 {
- compatible = "qcom,hdmi-phy-8960";
- reg-names = "hdmi_phy",
- "hdmi_pll";
- reg = <0x4a00400 0x60>,
- <0x4a00500 0x100>;
- #phy-cells = <0>;
- power-domains = <&mmcc MDSS_GDSC>;
- clock-names = "slave_iface";
- clocks = <&mmcc HDMI_S_AHB_CLK>;
- core-vdda-supply = <&pm8921_hdmi_mvs>;
- };
-};
diff --git a/Documentation/devicetree/bindings/display/msm/hdmi.yaml b/Documentation/devicetree/bindings/display/msm/hdmi.yaml
new file mode 100644
index 000000000000..dfec6c3480f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/hdmi.yaml
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/display/msm/hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Adreno/Snapdragon HDMI output
+
+maintainers:
+ - Rob Clark <robdclark@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - qcom,hdmi-tx-8084
+ - qcom,hdmi-tx-8660
+ - qcom,hdmi-tx-8960
+ - qcom,hdmi-tx-8974
+ - qcom,hdmi-tx-8994
+ - qcom,hdmi-tx-8996
+ - qcom,hdmi-tx-8998
+
+ clocks:
+ minItems: 1
+ maxItems: 8
+
+ clock-names:
+ minItems: 1
+ maxItems: 8
+
+ reg:
+ minItems: 1
+ maxItems: 3
+
+ reg-names:
+ minItems: 1
+ items:
+ - const: core_physical
+ - const: qfprom_physical
+ - const: hdcp_physical
+
+ interrupts:
+ maxItems: 1
+
+ phys:
+ maxItems: 1
+
+ phy-names:
+ enum:
+ - hdmi_phy
+ - hdmi-phy
+ deprecated: true
+
+ core-vdda-supply:
+ description: phandle to VDDA supply regulator
+
+ hdmi-mux-supply:
+ description: phandle to mux regulator
+ deprecated: true
+
+ core-vcc-supply:
+ description: phandle to VCC supply regulator
+
+ hpd-gpios:
+ maxItems: 1
+ description: hpd pin
+
+ '#sound-dai-cells':
+ const: 1
+
+ ports:
+ type: object
+ $ref: /schemas/graph.yaml#/properties/ports
+ properties:
+ port@0:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Input endpoints of the controller.
+
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description: |
+ Output endpoints of the controller.
+
+ required:
+ - port@0
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - reg
+ - reg-names
+ - interrupts
+ - phys
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,hdmi-tx-8960
+ - qcom,hdmi-tx-8660
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: core
+ - const: master_iface
+ - const: slave_iface
+ core-vcc-supplies: false
+
+ - if:
+ properties:
+ compatible: